diff --git a/.bash_history b/.bash_history new file mode 100644 index 0000000..274811d --- /dev/null +++ b/.bash_history @@ -0,0 +1,68 @@ +sudo apt-get update +sudo apt-get upgrade +sudo apt-get dist-upgrade +stack install +sudo apt install haskell-stack +pip install kaggle +sudo apt install python3-pip +pip install kaggle +pip install pandas +kaggle datasets list -s new york +kaggle -h +kaggle +-h kaggle +help kaggle +info kaggle +help help +kaggle datasets +mkdir +mkdir --help +cd ~/.kaggle +cd ~/ +ls +ls korne +cd Kornelia Girejko +cd C:\Users\korne\.kaggle +cd C:/Users/korne/.kaggle +cd ~/.kaggle +cd ~\.kaggle +cd home +cd nela +ls +ls -r +cd .. +cd nela +ls +ls +git +git remote add origin https://git.wmi.amu.edu.pl/s478815/ium_478815.git +touch README.md +git init +git add README.md +git commit -m "first commit" +git remote add origin https://git.wmi.amu.edu.pl/s478815/ium_478815.git +git push -u origin master +touch README.md +git init +git add README.md +git commit -m "first commit" +git config --global user.email "korgir@st.amu.edu.pl" +git config --global user.name "Kornelia" +git remote add origin https://git.wmi.amu.edu.pl/s478815/ium_478815.git +git push -u origin master +sudo git install +git --help +sudo apt install git-all +sudo apt install git +git remote add origin https://git.wmi.amu.edu.pl/s478815/ium_478815.git +git push -u origin master +git commit -m "first commit" +git add README.md +git remote add origin https://git.wmi.amu.edu.pl/s478815/ium_478815.git +git push -u origin master +virtualenv environment +wsl --set-default-version 2 +dism.exe /online /enable-feature /featurename:Microsoft-Windows-Subsystem-Linux /all /norestart +docker run -d -p 80:80 docker/getting-started +docker build . +docker diff --git a/.bash_logout b/.bash_logout new file mode 100644 index 0000000..de4f5f7 --- /dev/null +++ b/.bash_logout @@ -0,0 +1,7 @@ +# ~/.bash_logout: executed by bash(1) when login shell exits. + +# when leaving the console clear the screen to increase privacy + +if [ "$SHLVL" = 1 ]; then + [ -x /usr/bin/clear_console ] && /usr/bin/clear_console -q +fi diff --git a/.bashrc b/.bashrc new file mode 100644 index 0000000..b488fcc --- /dev/null +++ b/.bashrc @@ -0,0 +1,117 @@ +# ~/.bashrc: executed by bash(1) for non-login shells. +# see /usr/share/doc/bash/examples/startup-files (in the package bash-doc) +# for examples + +# If not running interactively, don't do anything +case $- in + *i*) ;; + *) return;; +esac + +# don't put duplicate lines or lines starting with space in the history. +# See bash(1) for more options +HISTCONTROL=ignoreboth + +# append to the history file, don't overwrite it +shopt -s histappend + +# for setting history length see HISTSIZE and HISTFILESIZE in bash(1) +HISTSIZE=1000 +HISTFILESIZE=2000 + +# check the window size after each command and, if necessary, +# update the values of LINES and COLUMNS. +shopt -s checkwinsize + +# If set, the pattern "**" used in a pathname expansion context will +# match all files and zero or more directories and subdirectories. +#shopt -s globstar + +# make less more friendly for non-text input files, see lesspipe(1) +[ -x /usr/bin/lesspipe ] && eval "$(SHELL=/bin/sh lesspipe)" + +# set variable identifying the chroot you work in (used in the prompt below) +if [ -z "${debian_chroot:-}" ] && [ -r /etc/debian_chroot ]; then + debian_chroot=$(cat /etc/debian_chroot) +fi + +# set a fancy prompt (non-color, unless we know we "want" color) +case "$TERM" in + xterm-color|*-256color) color_prompt=yes;; +esac + +# uncomment for a colored prompt, if the terminal has the capability; turned +# off by default to not distract the user: the focus in a terminal window +# should be on the output of commands, not on the prompt +#force_color_prompt=yes + +if [ -n "$force_color_prompt" ]; then + if [ -x /usr/bin/tput ] && tput setaf 1 >&/dev/null; then + # We have color support; assume it's compliant with Ecma-48 + # (ISO/IEC-6429). (Lack of such support is extremely rare, and such + # a case would tend to support setf rather than setaf.) + color_prompt=yes + else + color_prompt= + fi +fi + +if [ "$color_prompt" = yes ]; then + PS1='${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ ' +else + PS1='${debian_chroot:+($debian_chroot)}\u@\h:\w\$ ' +fi +unset color_prompt force_color_prompt + +# If this is an xterm set the title to user@host:dir +case "$TERM" in +xterm*|rxvt*) + PS1="\[\e]0;${debian_chroot:+($debian_chroot)}\u@\h: \w\a\]$PS1" + ;; +*) + ;; +esac + +# enable color support of ls and also add handy aliases +if [ -x /usr/bin/dircolors ]; then + test -r ~/.dircolors && eval "$(dircolors -b ~/.dircolors)" || eval "$(dircolors -b)" + alias ls='ls --color=auto' + #alias dir='dir --color=auto' + #alias vdir='vdir --color=auto' + + alias grep='grep --color=auto' + alias fgrep='fgrep --color=auto' + alias egrep='egrep --color=auto' +fi + +# colored GCC warnings and errors +#export GCC_COLORS='error=01;31:warning=01;35:note=01;36:caret=01;32:locus=01:quote=01' + +# some more ls aliases +alias ll='ls -alF' +alias la='ls -A' +alias l='ls -CF' + +# Add an "alert" alias for long running commands. Use like so: +# sleep 10; alert +alias alert='notify-send --urgency=low -i "$([ $? = 0 ] && echo terminal || echo error)" "$(history|tail -n1|sed -e '\''s/^\s*[0-9]\+\s*//;s/[;&|]\s*alert$//'\'')"' + +# Alias definitions. +# You may want to put all your additions into a separate file like +# ~/.bash_aliases, instead of adding them here directly. +# See /usr/share/doc/bash-doc/examples in the bash-doc package. + +if [ -f ~/.bash_aliases ]; then + . ~/.bash_aliases +fi + +# enable programmable completion features (you don't need to enable +# this, if it's already enabled in /etc/bash.bashrc and /etc/profile +# sources /etc/bash.bashrc). +if ! shopt -oq posix; then + if [ -f /usr/share/bash-completion/bash_completion ]; then + . /usr/share/bash-completion/bash_completion + elif [ -f /etc/bash_completion ]; then + . /etc/bash_completion + fi +fi diff --git a/.cache/pip/http/1/4/1/c/9/141c978eee07ee5cf4df0e2a0b02a2cba17d60b3d64b6251604d7fed b/.cache/pip/http/1/4/1/c/9/141c978eee07ee5cf4df0e2a0b02a2cba17d60b3d64b6251604d7fed new file mode 100644 index 0000000..0ff58ae Binary files /dev/null and b/.cache/pip/http/1/4/1/c/9/141c978eee07ee5cf4df0e2a0b02a2cba17d60b3d64b6251604d7fed differ diff --git a/.cache/pip/http/1/5/8/5/6/15856d84ee205815a9da20d464181272415376d8ef0d14a6313288ea b/.cache/pip/http/1/5/8/5/6/15856d84ee205815a9da20d464181272415376d8ef0d14a6313288ea new file mode 100644 index 0000000..6cb6e9a Binary files /dev/null and b/.cache/pip/http/1/5/8/5/6/15856d84ee205815a9da20d464181272415376d8ef0d14a6313288ea differ diff --git a/.cache/pip/http/1/b/a/3/2/1ba322d0d6e49a3e5fc9dce46388803dc4b64194112f73df7f125051 b/.cache/pip/http/1/b/a/3/2/1ba322d0d6e49a3e5fc9dce46388803dc4b64194112f73df7f125051 new file mode 100644 index 0000000..b4e0278 Binary files /dev/null and b/.cache/pip/http/1/b/a/3/2/1ba322d0d6e49a3e5fc9dce46388803dc4b64194112f73df7f125051 differ diff --git a/.cache/pip/http/3/8/6/0/e/3860e4de9ae53c79d2fd61419e9049df314ccc8b640782c02c6e2e2d b/.cache/pip/http/3/8/6/0/e/3860e4de9ae53c79d2fd61419e9049df314ccc8b640782c02c6e2e2d new file mode 100644 index 0000000..02f0ef4 Binary files /dev/null and b/.cache/pip/http/3/8/6/0/e/3860e4de9ae53c79d2fd61419e9049df314ccc8b640782c02c6e2e2d differ diff --git a/.cache/pip/http/4/5/b/c/c/45bccb1b96c130cef676939981deb73cb4304783482d49eecfdffb92 b/.cache/pip/http/4/5/b/c/c/45bccb1b96c130cef676939981deb73cb4304783482d49eecfdffb92 new file mode 100644 index 0000000..927e789 Binary files /dev/null and b/.cache/pip/http/4/5/b/c/c/45bccb1b96c130cef676939981deb73cb4304783482d49eecfdffb92 differ diff --git a/.cache/pip/http/4/a/a/d/3/4aad339416e8026d1fa8b230a91e72b9dbcc20ce7675dd245fc4d95c b/.cache/pip/http/4/a/a/d/3/4aad339416e8026d1fa8b230a91e72b9dbcc20ce7675dd245fc4d95c new file mode 100644 index 0000000..4a57695 Binary files /dev/null and b/.cache/pip/http/4/a/a/d/3/4aad339416e8026d1fa8b230a91e72b9dbcc20ce7675dd245fc4d95c differ diff --git a/.cache/pip/http/6/6/e/c/7/66ec76a7b6ed4081044f5c7821af293b63c17bc2ac523ff93d5ca7d5 b/.cache/pip/http/6/6/e/c/7/66ec76a7b6ed4081044f5c7821af293b63c17bc2ac523ff93d5ca7d5 new file mode 100644 index 0000000..372d120 Binary files /dev/null and b/.cache/pip/http/6/6/e/c/7/66ec76a7b6ed4081044f5c7821af293b63c17bc2ac523ff93d5ca7d5 differ diff --git a/.cache/pip/http/7/c/c/1/7/7cc17dcc171bf45b7103ef4501f6aff66e6ce8353bf12b16bb7cf35b b/.cache/pip/http/7/c/c/1/7/7cc17dcc171bf45b7103ef4501f6aff66e6ce8353bf12b16bb7cf35b new file mode 100644 index 0000000..3e7c532 Binary files /dev/null and b/.cache/pip/http/7/c/c/1/7/7cc17dcc171bf45b7103ef4501f6aff66e6ce8353bf12b16bb7cf35b differ diff --git a/.cache/pip/http/8/7/4/b/5/874b5c5b8576ec6d3865384c0f32d6178c963155a7587d594f07215e b/.cache/pip/http/8/7/4/b/5/874b5c5b8576ec6d3865384c0f32d6178c963155a7587d594f07215e new file mode 100644 index 0000000..d797d3a Binary files /dev/null and b/.cache/pip/http/8/7/4/b/5/874b5c5b8576ec6d3865384c0f32d6178c963155a7587d594f07215e differ diff --git a/.cache/pip/http/8/9/3/a/9/893a9178010858cecf72b523725b46ddedbe6c23406a020810080ab3 b/.cache/pip/http/8/9/3/a/9/893a9178010858cecf72b523725b46ddedbe6c23406a020810080ab3 new file mode 100644 index 0000000..ce9f8c9 Binary files /dev/null and b/.cache/pip/http/8/9/3/a/9/893a9178010858cecf72b523725b46ddedbe6c23406a020810080ab3 differ diff --git a/.cache/pip/http/9/9/2/3/a/9923ae74b1e69ca0279bc2d162ac9c66569ba17ac2485ef5864cd329 b/.cache/pip/http/9/9/2/3/a/9923ae74b1e69ca0279bc2d162ac9c66569ba17ac2485ef5864cd329 new file mode 100644 index 0000000..81a2020 Binary files /dev/null and b/.cache/pip/http/9/9/2/3/a/9923ae74b1e69ca0279bc2d162ac9c66569ba17ac2485ef5864cd329 differ diff --git a/.cache/pip/http/b/7/a/d/c/b7adcceb9046ebbc03bbd93aaddf6426d1668ce10944de655175290c b/.cache/pip/http/b/7/a/d/c/b7adcceb9046ebbc03bbd93aaddf6426d1668ce10944de655175290c new file mode 100644 index 0000000..3f1d31d Binary files /dev/null and b/.cache/pip/http/b/7/a/d/c/b7adcceb9046ebbc03bbd93aaddf6426d1668ce10944de655175290c differ diff --git a/.cache/pip/http/c/5/d/7/1/c5d710a17cae1d2fc2c6a04c38ea4155fcef5749737e28b6c24e04b5 b/.cache/pip/http/c/5/d/7/1/c5d710a17cae1d2fc2c6a04c38ea4155fcef5749737e28b6c24e04b5 new file mode 100644 index 0000000..39862fd Binary files /dev/null and b/.cache/pip/http/c/5/d/7/1/c5d710a17cae1d2fc2c6a04c38ea4155fcef5749737e28b6c24e04b5 differ diff --git a/.cache/pip/http/c/c/1/3/3/cc133da5701a066bc3777b2575e0daa601596f19871db4608ca98b97 b/.cache/pip/http/c/c/1/3/3/cc133da5701a066bc3777b2575e0daa601596f19871db4608ca98b97 new file mode 100644 index 0000000..1d8cafe Binary files /dev/null and b/.cache/pip/http/c/c/1/3/3/cc133da5701a066bc3777b2575e0daa601596f19871db4608ca98b97 differ diff --git a/.cache/pip/http/d/f/5/1/5/df5151e792305143e81ca2f7a2c963cbc786303eea65fcc2e65ccec5 b/.cache/pip/http/d/f/5/1/5/df5151e792305143e81ca2f7a2c963cbc786303eea65fcc2e65ccec5 new file mode 100644 index 0000000..81400de Binary files /dev/null and b/.cache/pip/http/d/f/5/1/5/df5151e792305143e81ca2f7a2c963cbc786303eea65fcc2e65ccec5 differ diff --git a/.cache/pip/http/e/1/a/1/0/e1a102a58d5f97b4d2a7ffd62157f43e68a5808f81e4779f2eb95e89 b/.cache/pip/http/e/1/a/1/0/e1a102a58d5f97b4d2a7ffd62157f43e68a5808f81e4779f2eb95e89 new file mode 100644 index 0000000..e787aac Binary files /dev/null and b/.cache/pip/http/e/1/a/1/0/e1a102a58d5f97b4d2a7ffd62157f43e68a5808f81e4779f2eb95e89 differ diff --git a/.cache/pip/wheels/29/da/11/144cc25aebdaeb4931b231e25fd34b394e6a5725cbb2f50106/kaggle-1.5.12-py3-none-any.whl b/.cache/pip/wheels/29/da/11/144cc25aebdaeb4931b231e25fd34b394e6a5725cbb2f50106/kaggle-1.5.12-py3-none-any.whl new file mode 100644 index 0000000..f3d4a23 Binary files /dev/null and b/.cache/pip/wheels/29/da/11/144cc25aebdaeb4931b231e25fd34b394e6a5725cbb2f50106/kaggle-1.5.12-py3-none-any.whl differ diff --git a/.gitconfig b/.gitconfig new file mode 100644 index 0000000..05ef6c7 --- /dev/null +++ b/.gitconfig @@ -0,0 +1,3 @@ +[user] + email = korgir@st.amu.edu.pl + name = Kornelia diff --git a/.landscape/sysinfo.log b/.landscape/sysinfo.log new file mode 100644 index 0000000..e69de29 diff --git a/.local/bin/f2py b/.local/bin/f2py new file mode 100755 index 0000000..c4f7341 --- /dev/null +++ b/.local/bin/f2py @@ -0,0 +1,8 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from numpy.f2py.f2py2e import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/.local/bin/f2py3 b/.local/bin/f2py3 new file mode 100755 index 0000000..c4f7341 --- /dev/null +++ b/.local/bin/f2py3 @@ -0,0 +1,8 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from numpy.f2py.f2py2e import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/.local/bin/f2py3.8 b/.local/bin/f2py3.8 new file mode 100755 index 0000000..c4f7341 --- /dev/null +++ b/.local/bin/f2py3.8 @@ -0,0 +1,8 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from numpy.f2py.f2py2e import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/.local/bin/kaggle b/.local/bin/kaggle new file mode 100755 index 0000000..547c6b6 --- /dev/null +++ b/.local/bin/kaggle @@ -0,0 +1,8 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from kaggle.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/.local/bin/tqdm b/.local/bin/tqdm new file mode 100755 index 0000000..bdac0c3 --- /dev/null +++ b/.local/bin/tqdm @@ -0,0 +1,8 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from tqdm.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/.local/lib/python3.8/site-packages/dateutil/__init__.py b/.local/lib/python3.8/site-packages/dateutil/__init__.py new file mode 100644 index 0000000..0defb82 --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +try: + from ._version import version as __version__ +except ImportError: + __version__ = 'unknown' + +__all__ = ['easter', 'parser', 'relativedelta', 'rrule', 'tz', + 'utils', 'zoneinfo'] diff --git a/.local/lib/python3.8/site-packages/dateutil/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..95fa590 Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/__pycache__/_common.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/__pycache__/_common.cpython-38.pyc new file mode 100644 index 0000000..5aae969 Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/__pycache__/_common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/__pycache__/_version.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/__pycache__/_version.cpython-38.pyc new file mode 100644 index 0000000..eb56430 Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/__pycache__/_version.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/__pycache__/easter.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/__pycache__/easter.cpython-38.pyc new file mode 100644 index 0000000..4c85ad6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/__pycache__/easter.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/__pycache__/relativedelta.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/__pycache__/relativedelta.cpython-38.pyc new file mode 100644 index 0000000..7f44037 Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/__pycache__/relativedelta.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/__pycache__/rrule.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/__pycache__/rrule.cpython-38.pyc new file mode 100644 index 0000000..8df9ec7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/__pycache__/rrule.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/__pycache__/tzwin.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/__pycache__/tzwin.cpython-38.pyc new file mode 100644 index 0000000..8ad62a3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/__pycache__/tzwin.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/__pycache__/utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000..10fb28b Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/__pycache__/utils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/_common.py b/.local/lib/python3.8/site-packages/dateutil/_common.py new file mode 100644 index 0000000..4eb2659 --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/_common.py @@ -0,0 +1,43 @@ +""" +Common code used in multiple modules. +""" + + +class weekday(object): + __slots__ = ["weekday", "n"] + + def __init__(self, weekday, n=None): + self.weekday = weekday + self.n = n + + def __call__(self, n): + if n == self.n: + return self + else: + return self.__class__(self.weekday, n) + + def __eq__(self, other): + try: + if self.weekday != other.weekday or self.n != other.n: + return False + except AttributeError: + return False + return True + + def __hash__(self): + return hash(( + self.weekday, + self.n, + )) + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday] + if not self.n: + return s + else: + return "%s(%+d)" % (s, self.n) + +# vim:ts=4:sw=4:et diff --git a/.local/lib/python3.8/site-packages/dateutil/_version.py b/.local/lib/python3.8/site-packages/dateutil/_version.py new file mode 100644 index 0000000..b723056 --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/_version.py @@ -0,0 +1,5 @@ +# coding: utf-8 +# file generated by setuptools_scm +# don't change, don't track in version control +version = '2.8.2' +version_tuple = (2, 8, 2) diff --git a/.local/lib/python3.8/site-packages/dateutil/easter.py b/.local/lib/python3.8/site-packages/dateutil/easter.py new file mode 100644 index 0000000..f74d1f7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/easter.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +""" +This module offers a generic Easter computing method for any given year, using +Western, Orthodox or Julian algorithms. +""" + +import datetime + +__all__ = ["easter", "EASTER_JULIAN", "EASTER_ORTHODOX", "EASTER_WESTERN"] + +EASTER_JULIAN = 1 +EASTER_ORTHODOX = 2 +EASTER_WESTERN = 3 + + +def easter(year, method=EASTER_WESTERN): + """ + This method was ported from the work done by GM Arts, + on top of the algorithm by Claus Tondering, which was + based in part on the algorithm of Ouding (1940), as + quoted in "Explanatory Supplement to the Astronomical + Almanac", P. Kenneth Seidelmann, editor. + + This algorithm implements three different Easter + calculation methods: + + 1. Original calculation in Julian calendar, valid in + dates after 326 AD + 2. Original method, with date converted to Gregorian + calendar, valid in years 1583 to 4099 + 3. Revised method, in Gregorian calendar, valid in + years 1583 to 4099 as well + + These methods are represented by the constants: + + * ``EASTER_JULIAN = 1`` + * ``EASTER_ORTHODOX = 2`` + * ``EASTER_WESTERN = 3`` + + The default method is method 3. + + More about the algorithm may be found at: + + `GM Arts: Easter Algorithms `_ + + and + + `The Calendar FAQ: Easter `_ + + """ + + if not (1 <= method <= 3): + raise ValueError("invalid method") + + # g - Golden year - 1 + # c - Century + # h - (23 - Epact) mod 30 + # i - Number of days from March 21 to Paschal Full Moon + # j - Weekday for PFM (0=Sunday, etc) + # p - Number of days from March 21 to Sunday on or before PFM + # (-6 to 28 methods 1 & 3, to 56 for method 2) + # e - Extra days to add for method 2 (converting Julian + # date to Gregorian date) + + y = year + g = y % 19 + e = 0 + if method < 3: + # Old method + i = (19*g + 15) % 30 + j = (y + y//4 + i) % 7 + if method == 2: + # Extra dates to convert Julian to Gregorian date + e = 10 + if y > 1600: + e = e + y//100 - 16 - (y//100 - 16)//4 + else: + # New method + c = y//100 + h = (c - c//4 - (8*c + 13)//25 + 19*g + 15) % 30 + i = h - (h//28)*(1 - (h//28)*(29//(h + 1))*((21 - g)//11)) + j = (y + y//4 + i + 2 - c + c//4) % 7 + + # p can be from -6 to 56 corresponding to dates 22 March to 23 May + # (later dates apply to method 2, although 23 May never actually occurs) + p = i - j + e + d = 1 + (p + 27 + (p + 6)//40) % 31 + m = 3 + (p + 26)//30 + return datetime.date(int(y), int(m), int(d)) diff --git a/.local/lib/python3.8/site-packages/dateutil/parser/__init__.py b/.local/lib/python3.8/site-packages/dateutil/parser/__init__.py new file mode 100644 index 0000000..d174b0e --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/parser/__init__.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +from ._parser import parse, parser, parserinfo, ParserError +from ._parser import DEFAULTPARSER, DEFAULTTZPARSER +from ._parser import UnknownTimezoneWarning + +from ._parser import __doc__ + +from .isoparser import isoparser, isoparse + +__all__ = ['parse', 'parser', 'parserinfo', + 'isoparse', 'isoparser', + 'ParserError', + 'UnknownTimezoneWarning'] + + +### +# Deprecate portions of the private interface so that downstream code that +# is improperly relying on it is given *some* notice. + + +def __deprecated_private_func(f): + from functools import wraps + import warnings + + msg = ('{name} is a private function and may break without warning, ' + 'it will be moved and or renamed in future versions.') + msg = msg.format(name=f.__name__) + + @wraps(f) + def deprecated_func(*args, **kwargs): + warnings.warn(msg, DeprecationWarning) + return f(*args, **kwargs) + + return deprecated_func + +def __deprecate_private_class(c): + import warnings + + msg = ('{name} is a private class and may break without warning, ' + 'it will be moved and or renamed in future versions.') + msg = msg.format(name=c.__name__) + + class private_class(c): + __doc__ = c.__doc__ + + def __init__(self, *args, **kwargs): + warnings.warn(msg, DeprecationWarning) + super(private_class, self).__init__(*args, **kwargs) + + private_class.__name__ = c.__name__ + + return private_class + + +from ._parser import _timelex, _resultbase +from ._parser import _tzparser, _parsetz + +_timelex = __deprecate_private_class(_timelex) +_tzparser = __deprecate_private_class(_tzparser) +_resultbase = __deprecate_private_class(_resultbase) +_parsetz = __deprecated_private_func(_parsetz) diff --git a/.local/lib/python3.8/site-packages/dateutil/parser/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/parser/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..d0adcad Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/parser/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/parser/__pycache__/_parser.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/parser/__pycache__/_parser.cpython-38.pyc new file mode 100644 index 0000000..2e61efe Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/parser/__pycache__/_parser.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/parser/__pycache__/isoparser.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/parser/__pycache__/isoparser.cpython-38.pyc new file mode 100644 index 0000000..c16808c Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/parser/__pycache__/isoparser.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/parser/_parser.py b/.local/lib/python3.8/site-packages/dateutil/parser/_parser.py new file mode 100644 index 0000000..37d1663 --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/parser/_parser.py @@ -0,0 +1,1613 @@ +# -*- coding: utf-8 -*- +""" +This module offers a generic date/time string parser which is able to parse +most known formats to represent a date and/or time. + +This module attempts to be forgiving with regards to unlikely input formats, +returning a datetime object even for dates which are ambiguous. If an element +of a date/time stamp is omitted, the following rules are applied: + +- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour + on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is + specified. +- If a time zone is omitted, a timezone-naive datetime is returned. + +If any other elements are missing, they are taken from the +:class:`datetime.datetime` object passed to the parameter ``default``. If this +results in a day number exceeding the valid number of days per month, the +value falls back to the end of the month. + +Additional resources about date/time string formats can be found below: + +- `A summary of the international standard date and time notation + `_ +- `W3C Date and Time Formats `_ +- `Time Formats (Planetary Rings Node) `_ +- `CPAN ParseDate module + `_ +- `Java SimpleDateFormat Class + `_ +""" +from __future__ import unicode_literals + +import datetime +import re +import string +import time +import warnings + +from calendar import monthrange +from io import StringIO + +import six +from six import integer_types, text_type + +from decimal import Decimal + +from warnings import warn + +from .. import relativedelta +from .. import tz + +__all__ = ["parse", "parserinfo", "ParserError"] + + +# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth +# making public and/or figuring out if there is something we can +# take off their plate. +class _timelex(object): + # Fractional seconds are sometimes split by a comma + _split_decimal = re.compile("([.,])") + + def __init__(self, instream): + if isinstance(instream, (bytes, bytearray)): + instream = instream.decode() + + if isinstance(instream, text_type): + instream = StringIO(instream) + elif getattr(instream, 'read', None) is None: + raise TypeError('Parser must be a string or character stream, not ' + '{itype}'.format(itype=instream.__class__.__name__)) + + self.instream = instream + self.charstack = [] + self.tokenstack = [] + self.eof = False + + def get_token(self): + """ + This function breaks the time string into lexical units (tokens), which + can be parsed by the parser. Lexical units are demarcated by changes in + the character set, so any continuous string of letters is considered + one unit, any continuous string of numbers is considered one unit. + + The main complication arises from the fact that dots ('.') can be used + both as separators (e.g. "Sep.20.2009") or decimal points (e.g. + "4:30:21.447"). As such, it is necessary to read the full context of + any dot-separated strings before breaking it into tokens; as such, this + function maintains a "token stack", for when the ambiguous context + demands that multiple tokens be parsed at once. + """ + if self.tokenstack: + return self.tokenstack.pop(0) + + seenletters = False + token = None + state = None + + while not self.eof: + # We only realize that we've reached the end of a token when we + # find a character that's not part of the current token - since + # that character may be part of the next token, it's stored in the + # charstack. + if self.charstack: + nextchar = self.charstack.pop(0) + else: + nextchar = self.instream.read(1) + while nextchar == '\x00': + nextchar = self.instream.read(1) + + if not nextchar: + self.eof = True + break + elif not state: + # First character of the token - determines if we're starting + # to parse a word, a number or something else. + token = nextchar + if self.isword(nextchar): + state = 'a' + elif self.isnum(nextchar): + state = '0' + elif self.isspace(nextchar): + token = ' ' + break # emit token + else: + break # emit token + elif state == 'a': + # If we've already started reading a word, we keep reading + # letters until we find something that's not part of a word. + seenletters = True + if self.isword(nextchar): + token += nextchar + elif nextchar == '.': + token += nextchar + state = 'a.' + else: + self.charstack.append(nextchar) + break # emit token + elif state == '0': + # If we've already started reading a number, we keep reading + # numbers until we find something that doesn't fit. + if self.isnum(nextchar): + token += nextchar + elif nextchar == '.' or (nextchar == ',' and len(token) >= 2): + token += nextchar + state = '0.' + else: + self.charstack.append(nextchar) + break # emit token + elif state == 'a.': + # If we've seen some letters and a dot separator, continue + # parsing, and the tokens will be broken up later. + seenletters = True + if nextchar == '.' or self.isword(nextchar): + token += nextchar + elif self.isnum(nextchar) and token[-1] == '.': + token += nextchar + state = '0.' + else: + self.charstack.append(nextchar) + break # emit token + elif state == '0.': + # If we've seen at least one dot separator, keep going, we'll + # break up the tokens later. + if nextchar == '.' or self.isnum(nextchar): + token += nextchar + elif self.isword(nextchar) and token[-1] == '.': + token += nextchar + state = 'a.' + else: + self.charstack.append(nextchar) + break # emit token + + if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or + token[-1] in '.,')): + l = self._split_decimal.split(token) + token = l[0] + for tok in l[1:]: + if tok: + self.tokenstack.append(tok) + + if state == '0.' and token.count('.') == 0: + token = token.replace(',', '.') + + return token + + def __iter__(self): + return self + + def __next__(self): + token = self.get_token() + if token is None: + raise StopIteration + + return token + + def next(self): + return self.__next__() # Python 2.x support + + @classmethod + def split(cls, s): + return list(cls(s)) + + @classmethod + def isword(cls, nextchar): + """ Whether or not the next character is part of a word """ + return nextchar.isalpha() + + @classmethod + def isnum(cls, nextchar): + """ Whether the next character is part of a number """ + return nextchar.isdigit() + + @classmethod + def isspace(cls, nextchar): + """ Whether the next character is whitespace """ + return nextchar.isspace() + + +class _resultbase(object): + + def __init__(self): + for attr in self.__slots__: + setattr(self, attr, None) + + def _repr(self, classname): + l = [] + for attr in self.__slots__: + value = getattr(self, attr) + if value is not None: + l.append("%s=%s" % (attr, repr(value))) + return "%s(%s)" % (classname, ", ".join(l)) + + def __len__(self): + return (sum(getattr(self, attr) is not None + for attr in self.__slots__)) + + def __repr__(self): + return self._repr(self.__class__.__name__) + + +class parserinfo(object): + """ + Class which handles what inputs are accepted. Subclass this to customize + the language and acceptable values for each parameter. + + :param dayfirst: + Whether to interpret the first value in an ambiguous 3-integer date + (e.g. 01/05/09) as the day (``True``) or month (``False``). If + ``yearfirst`` is set to ``True``, this distinguishes between YDM + and YMD. Default is ``False``. + + :param yearfirst: + Whether to interpret the first value in an ambiguous 3-integer date + (e.g. 01/05/09) as the year. If ``True``, the first number is taken + to be the year, otherwise the last number is taken to be the year. + Default is ``False``. + """ + + # m from a.m/p.m, t from ISO T separator + JUMP = [" ", ".", ",", ";", "-", "/", "'", + "at", "on", "and", "ad", "m", "t", "of", + "st", "nd", "rd", "th"] + + WEEKDAYS = [("Mon", "Monday"), + ("Tue", "Tuesday"), # TODO: "Tues" + ("Wed", "Wednesday"), + ("Thu", "Thursday"), # TODO: "Thurs" + ("Fri", "Friday"), + ("Sat", "Saturday"), + ("Sun", "Sunday")] + MONTHS = [("Jan", "January"), + ("Feb", "February"), # TODO: "Febr" + ("Mar", "March"), + ("Apr", "April"), + ("May", "May"), + ("Jun", "June"), + ("Jul", "July"), + ("Aug", "August"), + ("Sep", "Sept", "September"), + ("Oct", "October"), + ("Nov", "November"), + ("Dec", "December")] + HMS = [("h", "hour", "hours"), + ("m", "minute", "minutes"), + ("s", "second", "seconds")] + AMPM = [("am", "a"), + ("pm", "p")] + UTCZONE = ["UTC", "GMT", "Z", "z"] + PERTAIN = ["of"] + TZOFFSET = {} + # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", + # "Anno Domini", "Year of Our Lord"] + + def __init__(self, dayfirst=False, yearfirst=False): + self._jump = self._convert(self.JUMP) + self._weekdays = self._convert(self.WEEKDAYS) + self._months = self._convert(self.MONTHS) + self._hms = self._convert(self.HMS) + self._ampm = self._convert(self.AMPM) + self._utczone = self._convert(self.UTCZONE) + self._pertain = self._convert(self.PERTAIN) + + self.dayfirst = dayfirst + self.yearfirst = yearfirst + + self._year = time.localtime().tm_year + self._century = self._year // 100 * 100 + + def _convert(self, lst): + dct = {} + for i, v in enumerate(lst): + if isinstance(v, tuple): + for v in v: + dct[v.lower()] = i + else: + dct[v.lower()] = i + return dct + + def jump(self, name): + return name.lower() in self._jump + + def weekday(self, name): + try: + return self._weekdays[name.lower()] + except KeyError: + pass + return None + + def month(self, name): + try: + return self._months[name.lower()] + 1 + except KeyError: + pass + return None + + def hms(self, name): + try: + return self._hms[name.lower()] + except KeyError: + return None + + def ampm(self, name): + try: + return self._ampm[name.lower()] + except KeyError: + return None + + def pertain(self, name): + return name.lower() in self._pertain + + def utczone(self, name): + return name.lower() in self._utczone + + def tzoffset(self, name): + if name in self._utczone: + return 0 + + return self.TZOFFSET.get(name) + + def convertyear(self, year, century_specified=False): + """ + Converts two-digit years to year within [-50, 49] + range of self._year (current local time) + """ + + # Function contract is that the year is always positive + assert year >= 0 + + if year < 100 and not century_specified: + # assume current century to start + year += self._century + + if year >= self._year + 50: # if too far in future + year -= 100 + elif year < self._year - 50: # if too far in past + year += 100 + + return year + + def validate(self, res): + # move to info + if res.year is not None: + res.year = self.convertyear(res.year, res.century_specified) + + if ((res.tzoffset == 0 and not res.tzname) or + (res.tzname == 'Z' or res.tzname == 'z')): + res.tzname = "UTC" + res.tzoffset = 0 + elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): + res.tzoffset = 0 + return True + + +class _ymd(list): + def __init__(self, *args, **kwargs): + super(self.__class__, self).__init__(*args, **kwargs) + self.century_specified = False + self.dstridx = None + self.mstridx = None + self.ystridx = None + + @property + def has_year(self): + return self.ystridx is not None + + @property + def has_month(self): + return self.mstridx is not None + + @property + def has_day(self): + return self.dstridx is not None + + def could_be_day(self, value): + if self.has_day: + return False + elif not self.has_month: + return 1 <= value <= 31 + elif not self.has_year: + # Be permissive, assume leap year + month = self[self.mstridx] + return 1 <= value <= monthrange(2000, month)[1] + else: + month = self[self.mstridx] + year = self[self.ystridx] + return 1 <= value <= monthrange(year, month)[1] + + def append(self, val, label=None): + if hasattr(val, '__len__'): + if val.isdigit() and len(val) > 2: + self.century_specified = True + if label not in [None, 'Y']: # pragma: no cover + raise ValueError(label) + label = 'Y' + elif val > 100: + self.century_specified = True + if label not in [None, 'Y']: # pragma: no cover + raise ValueError(label) + label = 'Y' + + super(self.__class__, self).append(int(val)) + + if label == 'M': + if self.has_month: + raise ValueError('Month is already set') + self.mstridx = len(self) - 1 + elif label == 'D': + if self.has_day: + raise ValueError('Day is already set') + self.dstridx = len(self) - 1 + elif label == 'Y': + if self.has_year: + raise ValueError('Year is already set') + self.ystridx = len(self) - 1 + + def _resolve_from_stridxs(self, strids): + """ + Try to resolve the identities of year/month/day elements using + ystridx, mstridx, and dstridx, if enough of these are specified. + """ + if len(self) == 3 and len(strids) == 2: + # we can back out the remaining stridx value + missing = [x for x in range(3) if x not in strids.values()] + key = [x for x in ['y', 'm', 'd'] if x not in strids] + assert len(missing) == len(key) == 1 + key = key[0] + val = missing[0] + strids[key] = val + + assert len(self) == len(strids) # otherwise this should not be called + out = {key: self[strids[key]] for key in strids} + return (out.get('y'), out.get('m'), out.get('d')) + + def resolve_ymd(self, yearfirst, dayfirst): + len_ymd = len(self) + year, month, day = (None, None, None) + + strids = (('y', self.ystridx), + ('m', self.mstridx), + ('d', self.dstridx)) + + strids = {key: val for key, val in strids if val is not None} + if (len(self) == len(strids) > 0 or + (len(self) == 3 and len(strids) == 2)): + return self._resolve_from_stridxs(strids) + + mstridx = self.mstridx + + if len_ymd > 3: + raise ValueError("More than three YMD values") + elif len_ymd == 1 or (mstridx is not None and len_ymd == 2): + # One member, or two members with a month string + if mstridx is not None: + month = self[mstridx] + # since mstridx is 0 or 1, self[mstridx-1] always + # looks up the other element + other = self[mstridx - 1] + else: + other = self[0] + + if len_ymd > 1 or mstridx is None: + if other > 31: + year = other + else: + day = other + + elif len_ymd == 2: + # Two members with numbers + if self[0] > 31: + # 99-01 + year, month = self + elif self[1] > 31: + # 01-99 + month, year = self + elif dayfirst and self[1] <= 12: + # 13-01 + day, month = self + else: + # 01-13 + month, day = self + + elif len_ymd == 3: + # Three members + if mstridx == 0: + if self[1] > 31: + # Apr-2003-25 + month, year, day = self + else: + month, day, year = self + elif mstridx == 1: + if self[0] > 31 or (yearfirst and self[2] <= 31): + # 99-Jan-01 + year, month, day = self + else: + # 01-Jan-01 + # Give precedence to day-first, since + # two-digit years is usually hand-written. + day, month, year = self + + elif mstridx == 2: + # WTF!? + if self[1] > 31: + # 01-99-Jan + day, year, month = self + else: + # 99-01-Jan + year, day, month = self + + else: + if (self[0] > 31 or + self.ystridx == 0 or + (yearfirst and self[1] <= 12 and self[2] <= 31)): + # 99-01-01 + if dayfirst and self[2] <= 12: + year, day, month = self + else: + year, month, day = self + elif self[0] > 12 or (dayfirst and self[1] <= 12): + # 13-01-01 + day, month, year = self + else: + # 01-13-01 + month, day, year = self + + return year, month, day + + +class parser(object): + def __init__(self, info=None): + self.info = info or parserinfo() + + def parse(self, timestr, default=None, + ignoretz=False, tzinfos=None, **kwargs): + """ + Parse the date/time string into a :class:`datetime.datetime` object. + + :param timestr: + Any date/time string using the supported formats. + + :param default: + The default datetime object, if this is a datetime object and not + ``None``, elements specified in ``timestr`` replace elements in the + default object. + + :param ignoretz: + If set ``True``, time zones in parsed strings are ignored and a + naive :class:`datetime.datetime` object is returned. + + :param tzinfos: + Additional time zone names / aliases which may be present in the + string. This argument maps time zone names (and optionally offsets + from those time zones) to time zones. This parameter can be a + dictionary with timezone aliases mapping time zone names to time + zones or a function taking two parameters (``tzname`` and + ``tzoffset``) and returning a time zone. + + The timezones to which the names are mapped can be an integer + offset from UTC in seconds or a :class:`tzinfo` object. + + .. doctest:: + :options: +NORMALIZE_WHITESPACE + + >>> from dateutil.parser import parse + >>> from dateutil.tz import gettz + >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} + >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) + datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) + >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) + datetime.datetime(2012, 1, 19, 17, 21, + tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) + + This parameter is ignored if ``ignoretz`` is set. + + :param \\*\\*kwargs: + Keyword arguments as passed to ``_parse()``. + + :return: + Returns a :class:`datetime.datetime` object or, if the + ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the + first element being a :class:`datetime.datetime` object, the second + a tuple containing the fuzzy tokens. + + :raises ParserError: + Raised for invalid or unknown string format, if the provided + :class:`tzinfo` is not in a valid format, or if an invalid date + would be created. + + :raises TypeError: + Raised for non-string or character stream input. + + :raises OverflowError: + Raised if the parsed date exceeds the largest valid C integer on + your system. + """ + + if default is None: + default = datetime.datetime.now().replace(hour=0, minute=0, + second=0, microsecond=0) + + res, skipped_tokens = self._parse(timestr, **kwargs) + + if res is None: + raise ParserError("Unknown string format: %s", timestr) + + if len(res) == 0: + raise ParserError("String does not contain a date: %s", timestr) + + try: + ret = self._build_naive(res, default) + except ValueError as e: + six.raise_from(ParserError(str(e) + ": %s", timestr), e) + + if not ignoretz: + ret = self._build_tzaware(ret, res, tzinfos) + + if kwargs.get('fuzzy_with_tokens', False): + return ret, skipped_tokens + else: + return ret + + class _result(_resultbase): + __slots__ = ["year", "month", "day", "weekday", + "hour", "minute", "second", "microsecond", + "tzname", "tzoffset", "ampm","any_unused_tokens"] + + def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False, + fuzzy_with_tokens=False): + """ + Private method which performs the heavy lifting of parsing, called from + ``parse()``, which passes on its ``kwargs`` to this function. + + :param timestr: + The string to parse. + + :param dayfirst: + Whether to interpret the first value in an ambiguous 3-integer date + (e.g. 01/05/09) as the day (``True``) or month (``False``). If + ``yearfirst`` is set to ``True``, this distinguishes between YDM + and YMD. If set to ``None``, this value is retrieved from the + current :class:`parserinfo` object (which itself defaults to + ``False``). + + :param yearfirst: + Whether to interpret the first value in an ambiguous 3-integer date + (e.g. 01/05/09) as the year. If ``True``, the first number is taken + to be the year, otherwise the last number is taken to be the year. + If this is set to ``None``, the value is retrieved from the current + :class:`parserinfo` object (which itself defaults to ``False``). + + :param fuzzy: + Whether to allow fuzzy parsing, allowing for string like "Today is + January 1, 2047 at 8:21:00AM". + + :param fuzzy_with_tokens: + If ``True``, ``fuzzy`` is automatically set to True, and the parser + will return a tuple where the first element is the parsed + :class:`datetime.datetime` datetimestamp and the second element is + a tuple containing the portions of the string which were ignored: + + .. doctest:: + + >>> from dateutil.parser import parse + >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) + (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) + + """ + if fuzzy_with_tokens: + fuzzy = True + + info = self.info + + if dayfirst is None: + dayfirst = info.dayfirst + + if yearfirst is None: + yearfirst = info.yearfirst + + res = self._result() + l = _timelex.split(timestr) # Splits the timestr into tokens + + skipped_idxs = [] + + # year/month/day list + ymd = _ymd() + + len_l = len(l) + i = 0 + try: + while i < len_l: + + # Check if it's a number + value_repr = l[i] + try: + value = float(value_repr) + except ValueError: + value = None + + if value is not None: + # Numeric token + i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy) + + # Check weekday + elif info.weekday(l[i]) is not None: + value = info.weekday(l[i]) + res.weekday = value + + # Check month name + elif info.month(l[i]) is not None: + value = info.month(l[i]) + ymd.append(value, 'M') + + if i + 1 < len_l: + if l[i + 1] in ('-', '/'): + # Jan-01[-99] + sep = l[i + 1] + ymd.append(l[i + 2]) + + if i + 3 < len_l and l[i + 3] == sep: + # Jan-01-99 + ymd.append(l[i + 4]) + i += 2 + + i += 2 + + elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and + info.pertain(l[i + 2])): + # Jan of 01 + # In this case, 01 is clearly year + if l[i + 4].isdigit(): + # Convert it here to become unambiguous + value = int(l[i + 4]) + year = str(info.convertyear(value)) + ymd.append(year, 'Y') + else: + # Wrong guess + pass + # TODO: not hit in tests + i += 4 + + # Check am/pm + elif info.ampm(l[i]) is not None: + value = info.ampm(l[i]) + val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy) + + if val_is_ampm: + res.hour = self._adjust_ampm(res.hour, value) + res.ampm = value + + elif fuzzy: + skipped_idxs.append(i) + + # Check for a timezone name + elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]): + res.tzname = l[i] + res.tzoffset = info.tzoffset(res.tzname) + + # Check for something like GMT+3, or BRST+3. Notice + # that it doesn't mean "I am 3 hours after GMT", but + # "my time +3 is GMT". If found, we reverse the + # logic so that timezone parsing code will get it + # right. + if i + 1 < len_l and l[i + 1] in ('+', '-'): + l[i + 1] = ('+', '-')[l[i + 1] == '+'] + res.tzoffset = None + if info.utczone(res.tzname): + # With something like GMT+3, the timezone + # is *not* GMT. + res.tzname = None + + # Check for a numbered timezone + elif res.hour is not None and l[i] in ('+', '-'): + signal = (-1, 1)[l[i] == '+'] + len_li = len(l[i + 1]) + + # TODO: check that l[i + 1] is integer? + if len_li == 4: + # -0300 + hour_offset = int(l[i + 1][:2]) + min_offset = int(l[i + 1][2:]) + elif i + 2 < len_l and l[i + 2] == ':': + # -03:00 + hour_offset = int(l[i + 1]) + min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like? + i += 2 + elif len_li <= 2: + # -[0]3 + hour_offset = int(l[i + 1][:2]) + min_offset = 0 + else: + raise ValueError(timestr) + + res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60) + + # Look for a timezone name between parenthesis + if (i + 5 < len_l and + info.jump(l[i + 2]) and l[i + 3] == '(' and + l[i + 5] == ')' and + 3 <= len(l[i + 4]) and + self._could_be_tzname(res.hour, res.tzname, + None, l[i + 4])): + # -0300 (BRST) + res.tzname = l[i + 4] + i += 4 + + i += 1 + + # Check jumps + elif not (info.jump(l[i]) or fuzzy): + raise ValueError(timestr) + + else: + skipped_idxs.append(i) + i += 1 + + # Process year/month/day + year, month, day = ymd.resolve_ymd(yearfirst, dayfirst) + + res.century_specified = ymd.century_specified + res.year = year + res.month = month + res.day = day + + except (IndexError, ValueError): + return None, None + + if not info.validate(res): + return None, None + + if fuzzy_with_tokens: + skipped_tokens = self._recombine_skipped(l, skipped_idxs) + return res, tuple(skipped_tokens) + else: + return res, None + + def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy): + # Token is a number + value_repr = tokens[idx] + try: + value = self._to_decimal(value_repr) + except Exception as e: + six.raise_from(ValueError('Unknown numeric token'), e) + + len_li = len(value_repr) + + len_l = len(tokens) + + if (len(ymd) == 3 and len_li in (2, 4) and + res.hour is None and + (idx + 1 >= len_l or + (tokens[idx + 1] != ':' and + info.hms(tokens[idx + 1]) is None))): + # 19990101T23[59] + s = tokens[idx] + res.hour = int(s[:2]) + + if len_li == 4: + res.minute = int(s[2:]) + + elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6): + # YYMMDD or HHMMSS[.ss] + s = tokens[idx] + + if not ymd and '.' not in tokens[idx]: + ymd.append(s[:2]) + ymd.append(s[2:4]) + ymd.append(s[4:]) + else: + # 19990101T235959[.59] + + # TODO: Check if res attributes already set. + res.hour = int(s[:2]) + res.minute = int(s[2:4]) + res.second, res.microsecond = self._parsems(s[4:]) + + elif len_li in (8, 12, 14): + # YYYYMMDD + s = tokens[idx] + ymd.append(s[:4], 'Y') + ymd.append(s[4:6]) + ymd.append(s[6:8]) + + if len_li > 8: + res.hour = int(s[8:10]) + res.minute = int(s[10:12]) + + if len_li > 12: + res.second = int(s[12:]) + + elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None: + # HH[ ]h or MM[ ]m or SS[.ss][ ]s + hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True) + (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx) + if hms is not None: + # TODO: checking that hour/minute/second are not + # already set? + self._assign_hms(res, value_repr, hms) + + elif idx + 2 < len_l and tokens[idx + 1] == ':': + # HH:MM[:SS[.ss]] + res.hour = int(value) + value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this? + (res.minute, res.second) = self._parse_min_sec(value) + + if idx + 4 < len_l and tokens[idx + 3] == ':': + res.second, res.microsecond = self._parsems(tokens[idx + 4]) + + idx += 2 + + idx += 2 + + elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'): + sep = tokens[idx + 1] + ymd.append(value_repr) + + if idx + 2 < len_l and not info.jump(tokens[idx + 2]): + if tokens[idx + 2].isdigit(): + # 01-01[-01] + ymd.append(tokens[idx + 2]) + else: + # 01-Jan[-01] + value = info.month(tokens[idx + 2]) + + if value is not None: + ymd.append(value, 'M') + else: + raise ValueError() + + if idx + 3 < len_l and tokens[idx + 3] == sep: + # We have three members + value = info.month(tokens[idx + 4]) + + if value is not None: + ymd.append(value, 'M') + else: + ymd.append(tokens[idx + 4]) + idx += 2 + + idx += 1 + idx += 1 + + elif idx + 1 >= len_l or info.jump(tokens[idx + 1]): + if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None: + # 12 am + hour = int(value) + res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2])) + idx += 1 + else: + # Year, month or day + ymd.append(value) + idx += 1 + + elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24): + # 12am + hour = int(value) + res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1])) + idx += 1 + + elif ymd.could_be_day(value): + ymd.append(value) + + elif not fuzzy: + raise ValueError() + + return idx + + def _find_hms_idx(self, idx, tokens, info, allow_jump): + len_l = len(tokens) + + if idx+1 < len_l and info.hms(tokens[idx+1]) is not None: + # There is an "h", "m", or "s" label following this token. We take + # assign the upcoming label to the current token. + # e.g. the "12" in 12h" + hms_idx = idx + 1 + + elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and + info.hms(tokens[idx+2]) is not None): + # There is a space and then an "h", "m", or "s" label. + # e.g. the "12" in "12 h" + hms_idx = idx + 2 + + elif idx > 0 and info.hms(tokens[idx-1]) is not None: + # There is a "h", "m", or "s" preceding this token. Since neither + # of the previous cases was hit, there is no label following this + # token, so we use the previous label. + # e.g. the "04" in "12h04" + hms_idx = idx-1 + + elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and + info.hms(tokens[idx-2]) is not None): + # If we are looking at the final token, we allow for a + # backward-looking check to skip over a space. + # TODO: Are we sure this is the right condition here? + hms_idx = idx - 2 + + else: + hms_idx = None + + return hms_idx + + def _assign_hms(self, res, value_repr, hms): + # See GH issue #427, fixing float rounding + value = self._to_decimal(value_repr) + + if hms == 0: + # Hour + res.hour = int(value) + if value % 1: + res.minute = int(60*(value % 1)) + + elif hms == 1: + (res.minute, res.second) = self._parse_min_sec(value) + + elif hms == 2: + (res.second, res.microsecond) = self._parsems(value_repr) + + def _could_be_tzname(self, hour, tzname, tzoffset, token): + return (hour is not None and + tzname is None and + tzoffset is None and + len(token) <= 5 and + (all(x in string.ascii_uppercase for x in token) + or token in self.info.UTCZONE)) + + def _ampm_valid(self, hour, ampm, fuzzy): + """ + For fuzzy parsing, 'a' or 'am' (both valid English words) + may erroneously trigger the AM/PM flag. Deal with that + here. + """ + val_is_ampm = True + + # If there's already an AM/PM flag, this one isn't one. + if fuzzy and ampm is not None: + val_is_ampm = False + + # If AM/PM is found and hour is not, raise a ValueError + if hour is None: + if fuzzy: + val_is_ampm = False + else: + raise ValueError('No hour specified with AM or PM flag.') + elif not 0 <= hour <= 12: + # If AM/PM is found, it's a 12 hour clock, so raise + # an error for invalid range + if fuzzy: + val_is_ampm = False + else: + raise ValueError('Invalid hour specified for 12-hour clock.') + + return val_is_ampm + + def _adjust_ampm(self, hour, ampm): + if hour < 12 and ampm == 1: + hour += 12 + elif hour == 12 and ampm == 0: + hour = 0 + return hour + + def _parse_min_sec(self, value): + # TODO: Every usage of this function sets res.second to the return + # value. Are there any cases where second will be returned as None and + # we *don't* want to set res.second = None? + minute = int(value) + second = None + + sec_remainder = value % 1 + if sec_remainder: + second = int(60 * sec_remainder) + return (minute, second) + + def _parse_hms(self, idx, tokens, info, hms_idx): + # TODO: Is this going to admit a lot of false-positives for when we + # just happen to have digits and "h", "m" or "s" characters in non-date + # text? I guess hex hashes won't have that problem, but there's plenty + # of random junk out there. + if hms_idx is None: + hms = None + new_idx = idx + elif hms_idx > idx: + hms = info.hms(tokens[hms_idx]) + new_idx = hms_idx + else: + # Looking backwards, increment one. + hms = info.hms(tokens[hms_idx]) + 1 + new_idx = idx + + return (new_idx, hms) + + # ------------------------------------------------------------------ + # Handling for individual tokens. These are kept as methods instead + # of functions for the sake of customizability via subclassing. + + def _parsems(self, value): + """Parse a I[.F] seconds value into (seconds, microseconds).""" + if "." not in value: + return int(value), 0 + else: + i, f = value.split(".") + return int(i), int(f.ljust(6, "0")[:6]) + + def _to_decimal(self, val): + try: + decimal_value = Decimal(val) + # See GH 662, edge case, infinite value should not be converted + # via `_to_decimal` + if not decimal_value.is_finite(): + raise ValueError("Converted decimal value is infinite or NaN") + except Exception as e: + msg = "Could not convert %s to decimal" % val + six.raise_from(ValueError(msg), e) + else: + return decimal_value + + # ------------------------------------------------------------------ + # Post-Parsing construction of datetime output. These are kept as + # methods instead of functions for the sake of customizability via + # subclassing. + + def _build_tzinfo(self, tzinfos, tzname, tzoffset): + if callable(tzinfos): + tzdata = tzinfos(tzname, tzoffset) + else: + tzdata = tzinfos.get(tzname) + # handle case where tzinfo is paased an options that returns None + # eg tzinfos = {'BRST' : None} + if isinstance(tzdata, datetime.tzinfo) or tzdata is None: + tzinfo = tzdata + elif isinstance(tzdata, text_type): + tzinfo = tz.tzstr(tzdata) + elif isinstance(tzdata, integer_types): + tzinfo = tz.tzoffset(tzname, tzdata) + else: + raise TypeError("Offset must be tzinfo subclass, tz string, " + "or int offset.") + return tzinfo + + def _build_tzaware(self, naive, res, tzinfos): + if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)): + tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset) + aware = naive.replace(tzinfo=tzinfo) + aware = self._assign_tzname(aware, res.tzname) + + elif res.tzname and res.tzname in time.tzname: + aware = naive.replace(tzinfo=tz.tzlocal()) + + # Handle ambiguous local datetime + aware = self._assign_tzname(aware, res.tzname) + + # This is mostly relevant for winter GMT zones parsed in the UK + if (aware.tzname() != res.tzname and + res.tzname in self.info.UTCZONE): + aware = aware.replace(tzinfo=tz.UTC) + + elif res.tzoffset == 0: + aware = naive.replace(tzinfo=tz.UTC) + + elif res.tzoffset: + aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) + + elif not res.tzname and not res.tzoffset: + # i.e. no timezone information was found. + aware = naive + + elif res.tzname: + # tz-like string was parsed but we don't know what to do + # with it + warnings.warn("tzname {tzname} identified but not understood. " + "Pass `tzinfos` argument in order to correctly " + "return a timezone-aware datetime. In a future " + "version, this will raise an " + "exception.".format(tzname=res.tzname), + category=UnknownTimezoneWarning) + aware = naive + + return aware + + def _build_naive(self, res, default): + repl = {} + for attr in ("year", "month", "day", "hour", + "minute", "second", "microsecond"): + value = getattr(res, attr) + if value is not None: + repl[attr] = value + + if 'day' not in repl: + # If the default day exceeds the last day of the month, fall back + # to the end of the month. + cyear = default.year if res.year is None else res.year + cmonth = default.month if res.month is None else res.month + cday = default.day if res.day is None else res.day + + if cday > monthrange(cyear, cmonth)[1]: + repl['day'] = monthrange(cyear, cmonth)[1] + + naive = default.replace(**repl) + + if res.weekday is not None and not res.day: + naive = naive + relativedelta.relativedelta(weekday=res.weekday) + + return naive + + def _assign_tzname(self, dt, tzname): + if dt.tzname() != tzname: + new_dt = tz.enfold(dt, fold=1) + if new_dt.tzname() == tzname: + return new_dt + + return dt + + def _recombine_skipped(self, tokens, skipped_idxs): + """ + >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] + >>> skipped_idxs = [0, 1, 2, 5] + >>> _recombine_skipped(tokens, skipped_idxs) + ["foo bar", "baz"] + """ + skipped_tokens = [] + for i, idx in enumerate(sorted(skipped_idxs)): + if i > 0 and idx - 1 == skipped_idxs[i - 1]: + skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] + else: + skipped_tokens.append(tokens[idx]) + + return skipped_tokens + + +DEFAULTPARSER = parser() + + +def parse(timestr, parserinfo=None, **kwargs): + """ + + Parse a string in one of the supported formats, using the + ``parserinfo`` parameters. + + :param timestr: + A string containing a date/time stamp. + + :param parserinfo: + A :class:`parserinfo` object containing parameters for the parser. + If ``None``, the default arguments to the :class:`parserinfo` + constructor are used. + + The ``**kwargs`` parameter takes the following keyword arguments: + + :param default: + The default datetime object, if this is a datetime object and not + ``None``, elements specified in ``timestr`` replace elements in the + default object. + + :param ignoretz: + If set ``True``, time zones in parsed strings are ignored and a naive + :class:`datetime` object is returned. + + :param tzinfos: + Additional time zone names / aliases which may be present in the + string. This argument maps time zone names (and optionally offsets + from those time zones) to time zones. This parameter can be a + dictionary with timezone aliases mapping time zone names to time + zones or a function taking two parameters (``tzname`` and + ``tzoffset``) and returning a time zone. + + The timezones to which the names are mapped can be an integer + offset from UTC in seconds or a :class:`tzinfo` object. + + .. doctest:: + :options: +NORMALIZE_WHITESPACE + + >>> from dateutil.parser import parse + >>> from dateutil.tz import gettz + >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} + >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) + datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) + >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) + datetime.datetime(2012, 1, 19, 17, 21, + tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) + + This parameter is ignored if ``ignoretz`` is set. + + :param dayfirst: + Whether to interpret the first value in an ambiguous 3-integer date + (e.g. 01/05/09) as the day (``True``) or month (``False``). If + ``yearfirst`` is set to ``True``, this distinguishes between YDM and + YMD. If set to ``None``, this value is retrieved from the current + :class:`parserinfo` object (which itself defaults to ``False``). + + :param yearfirst: + Whether to interpret the first value in an ambiguous 3-integer date + (e.g. 01/05/09) as the year. If ``True``, the first number is taken to + be the year, otherwise the last number is taken to be the year. If + this is set to ``None``, the value is retrieved from the current + :class:`parserinfo` object (which itself defaults to ``False``). + + :param fuzzy: + Whether to allow fuzzy parsing, allowing for string like "Today is + January 1, 2047 at 8:21:00AM". + + :param fuzzy_with_tokens: + If ``True``, ``fuzzy`` is automatically set to True, and the parser + will return a tuple where the first element is the parsed + :class:`datetime.datetime` datetimestamp and the second element is + a tuple containing the portions of the string which were ignored: + + .. doctest:: + + >>> from dateutil.parser import parse + >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) + (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) + + :return: + Returns a :class:`datetime.datetime` object or, if the + ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the + first element being a :class:`datetime.datetime` object, the second + a tuple containing the fuzzy tokens. + + :raises ParserError: + Raised for invalid or unknown string formats, if the provided + :class:`tzinfo` is not in a valid format, or if an invalid date would + be created. + + :raises OverflowError: + Raised if the parsed date exceeds the largest valid C integer on + your system. + """ + if parserinfo: + return parser(parserinfo).parse(timestr, **kwargs) + else: + return DEFAULTPARSER.parse(timestr, **kwargs) + + +class _tzparser(object): + + class _result(_resultbase): + + __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset", + "start", "end"] + + class _attr(_resultbase): + __slots__ = ["month", "week", "weekday", + "yday", "jyday", "day", "time"] + + def __repr__(self): + return self._repr("") + + def __init__(self): + _resultbase.__init__(self) + self.start = self._attr() + self.end = self._attr() + + def parse(self, tzstr): + res = self._result() + l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x] + used_idxs = list() + try: + + len_l = len(l) + + i = 0 + while i < len_l: + # BRST+3[BRDT[+2]] + j = i + while j < len_l and not [x for x in l[j] + if x in "0123456789:,-+"]: + j += 1 + if j != i: + if not res.stdabbr: + offattr = "stdoffset" + res.stdabbr = "".join(l[i:j]) + else: + offattr = "dstoffset" + res.dstabbr = "".join(l[i:j]) + + for ii in range(j): + used_idxs.append(ii) + i = j + if (i < len_l and (l[i] in ('+', '-') or l[i][0] in + "0123456789")): + if l[i] in ('+', '-'): + # Yes, that's right. See the TZ variable + # documentation. + signal = (1, -1)[l[i] == '+'] + used_idxs.append(i) + i += 1 + else: + signal = -1 + len_li = len(l[i]) + if len_li == 4: + # -0300 + setattr(res, offattr, (int(l[i][:2]) * 3600 + + int(l[i][2:]) * 60) * signal) + elif i + 1 < len_l and l[i + 1] == ':': + # -03:00 + setattr(res, offattr, + (int(l[i]) * 3600 + + int(l[i + 2]) * 60) * signal) + used_idxs.append(i) + i += 2 + elif len_li <= 2: + # -[0]3 + setattr(res, offattr, + int(l[i][:2]) * 3600 * signal) + else: + return None + used_idxs.append(i) + i += 1 + if res.dstabbr: + break + else: + break + + + if i < len_l: + for j in range(i, len_l): + if l[j] == ';': + l[j] = ',' + + assert l[i] == ',' + + i += 1 + + if i >= len_l: + pass + elif (8 <= l.count(',') <= 9 and + not [y for x in l[i:] if x != ',' + for y in x if y not in "0123456789+-"]): + # GMT0BST,3,0,30,3600,10,0,26,7200[,3600] + for x in (res.start, res.end): + x.month = int(l[i]) + used_idxs.append(i) + i += 2 + if l[i] == '-': + value = int(l[i + 1]) * -1 + used_idxs.append(i) + i += 1 + else: + value = int(l[i]) + used_idxs.append(i) + i += 2 + if value: + x.week = value + x.weekday = (int(l[i]) - 1) % 7 + else: + x.day = int(l[i]) + used_idxs.append(i) + i += 2 + x.time = int(l[i]) + used_idxs.append(i) + i += 2 + if i < len_l: + if l[i] in ('-', '+'): + signal = (-1, 1)[l[i] == "+"] + used_idxs.append(i) + i += 1 + else: + signal = 1 + used_idxs.append(i) + res.dstoffset = (res.stdoffset + int(l[i]) * signal) + + # This was a made-up format that is not in normal use + warn(('Parsed time zone "%s"' % tzstr) + + 'is in a non-standard dateutil-specific format, which ' + + 'is now deprecated; support for parsing this format ' + + 'will be removed in future versions. It is recommended ' + + 'that you switch to a standard format like the GNU ' + + 'TZ variable format.', tz.DeprecatedTzFormatWarning) + elif (l.count(',') == 2 and l[i:].count('/') <= 2 and + not [y for x in l[i:] if x not in (',', '/', 'J', 'M', + '.', '-', ':') + for y in x if y not in "0123456789"]): + for x in (res.start, res.end): + if l[i] == 'J': + # non-leap year day (1 based) + used_idxs.append(i) + i += 1 + x.jyday = int(l[i]) + elif l[i] == 'M': + # month[-.]week[-.]weekday + used_idxs.append(i) + i += 1 + x.month = int(l[i]) + used_idxs.append(i) + i += 1 + assert l[i] in ('-', '.') + used_idxs.append(i) + i += 1 + x.week = int(l[i]) + if x.week == 5: + x.week = -1 + used_idxs.append(i) + i += 1 + assert l[i] in ('-', '.') + used_idxs.append(i) + i += 1 + x.weekday = (int(l[i]) - 1) % 7 + else: + # year day (zero based) + x.yday = int(l[i]) + 1 + + used_idxs.append(i) + i += 1 + + if i < len_l and l[i] == '/': + used_idxs.append(i) + i += 1 + # start time + len_li = len(l[i]) + if len_li == 4: + # -0300 + x.time = (int(l[i][:2]) * 3600 + + int(l[i][2:]) * 60) + elif i + 1 < len_l and l[i + 1] == ':': + # -03:00 + x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60 + used_idxs.append(i) + i += 2 + if i + 1 < len_l and l[i + 1] == ':': + used_idxs.append(i) + i += 2 + x.time += int(l[i]) + elif len_li <= 2: + # -[0]3 + x.time = (int(l[i][:2]) * 3600) + else: + return None + used_idxs.append(i) + i += 1 + + assert i == len_l or l[i] == ',' + + i += 1 + + assert i >= len_l + + except (IndexError, ValueError, AssertionError): + return None + + unused_idxs = set(range(len_l)).difference(used_idxs) + res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"}) + return res + + +DEFAULTTZPARSER = _tzparser() + + +def _parsetz(tzstr): + return DEFAULTTZPARSER.parse(tzstr) + + +class ParserError(ValueError): + """Exception subclass used for any failure to parse a datetime string. + + This is a subclass of :py:exc:`ValueError`, and should be raised any time + earlier versions of ``dateutil`` would have raised ``ValueError``. + + .. versionadded:: 2.8.1 + """ + def __str__(self): + try: + return self.args[0] % self.args[1:] + except (TypeError, IndexError): + return super(ParserError, self).__str__() + + def __repr__(self): + args = ", ".join("'%s'" % arg for arg in self.args) + return "%s(%s)" % (self.__class__.__name__, args) + + +class UnknownTimezoneWarning(RuntimeWarning): + """Raised when the parser finds a timezone it cannot parse into a tzinfo. + + .. versionadded:: 2.7.0 + """ +# vim:ts=4:sw=4:et diff --git a/.local/lib/python3.8/site-packages/dateutil/parser/isoparser.py b/.local/lib/python3.8/site-packages/dateutil/parser/isoparser.py new file mode 100644 index 0000000..5d7bee3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/parser/isoparser.py @@ -0,0 +1,416 @@ +# -*- coding: utf-8 -*- +""" +This module offers a parser for ISO-8601 strings + +It is intended to support all valid date, time and datetime formats per the +ISO-8601 specification. + +..versionadded:: 2.7.0 +""" +from datetime import datetime, timedelta, time, date +import calendar +from dateutil import tz + +from functools import wraps + +import re +import six + +__all__ = ["isoparse", "isoparser"] + + +def _takes_ascii(f): + @wraps(f) + def func(self, str_in, *args, **kwargs): + # If it's a stream, read the whole thing + str_in = getattr(str_in, 'read', lambda: str_in)() + + # If it's unicode, turn it into bytes, since ISO-8601 only covers ASCII + if isinstance(str_in, six.text_type): + # ASCII is the same in UTF-8 + try: + str_in = str_in.encode('ascii') + except UnicodeEncodeError as e: + msg = 'ISO-8601 strings should contain only ASCII characters' + six.raise_from(ValueError(msg), e) + + return f(self, str_in, *args, **kwargs) + + return func + + +class isoparser(object): + def __init__(self, sep=None): + """ + :param sep: + A single character that separates date and time portions. If + ``None``, the parser will accept any single character. + For strict ISO-8601 adherence, pass ``'T'``. + """ + if sep is not None: + if (len(sep) != 1 or ord(sep) >= 128 or sep in '0123456789'): + raise ValueError('Separator must be a single, non-numeric ' + + 'ASCII character') + + sep = sep.encode('ascii') + + self._sep = sep + + @_takes_ascii + def isoparse(self, dt_str): + """ + Parse an ISO-8601 datetime string into a :class:`datetime.datetime`. + + An ISO-8601 datetime string consists of a date portion, followed + optionally by a time portion - the date and time portions are separated + by a single character separator, which is ``T`` in the official + standard. Incomplete date formats (such as ``YYYY-MM``) may *not* be + combined with a time portion. + + Supported date formats are: + + Common: + + - ``YYYY`` + - ``YYYY-MM`` or ``YYYYMM`` + - ``YYYY-MM-DD`` or ``YYYYMMDD`` + + Uncommon: + + - ``YYYY-Www`` or ``YYYYWww`` - ISO week (day defaults to 0) + - ``YYYY-Www-D`` or ``YYYYWwwD`` - ISO week and day + + The ISO week and day numbering follows the same logic as + :func:`datetime.date.isocalendar`. + + Supported time formats are: + + - ``hh`` + - ``hh:mm`` or ``hhmm`` + - ``hh:mm:ss`` or ``hhmmss`` + - ``hh:mm:ss.ssssss`` (Up to 6 sub-second digits) + + Midnight is a special case for `hh`, as the standard supports both + 00:00 and 24:00 as a representation. The decimal separator can be + either a dot or a comma. + + + .. caution:: + + Support for fractional components other than seconds is part of the + ISO-8601 standard, but is not currently implemented in this parser. + + Supported time zone offset formats are: + + - `Z` (UTC) + - `±HH:MM` + - `±HHMM` + - `±HH` + + Offsets will be represented as :class:`dateutil.tz.tzoffset` objects, + with the exception of UTC, which will be represented as + :class:`dateutil.tz.tzutc`. Time zone offsets equivalent to UTC (such + as `+00:00`) will also be represented as :class:`dateutil.tz.tzutc`. + + :param dt_str: + A string or stream containing only an ISO-8601 datetime string + + :return: + Returns a :class:`datetime.datetime` representing the string. + Unspecified components default to their lowest value. + + .. warning:: + + As of version 2.7.0, the strictness of the parser should not be + considered a stable part of the contract. Any valid ISO-8601 string + that parses correctly with the default settings will continue to + parse correctly in future versions, but invalid strings that + currently fail (e.g. ``2017-01-01T00:00+00:00:00``) are not + guaranteed to continue failing in future versions if they encode + a valid date. + + .. versionadded:: 2.7.0 + """ + components, pos = self._parse_isodate(dt_str) + + if len(dt_str) > pos: + if self._sep is None or dt_str[pos:pos + 1] == self._sep: + components += self._parse_isotime(dt_str[pos + 1:]) + else: + raise ValueError('String contains unknown ISO components') + + if len(components) > 3 and components[3] == 24: + components[3] = 0 + return datetime(*components) + timedelta(days=1) + + return datetime(*components) + + @_takes_ascii + def parse_isodate(self, datestr): + """ + Parse the date portion of an ISO string. + + :param datestr: + The string portion of an ISO string, without a separator + + :return: + Returns a :class:`datetime.date` object + """ + components, pos = self._parse_isodate(datestr) + if pos < len(datestr): + raise ValueError('String contains unknown ISO ' + + 'components: {!r}'.format(datestr.decode('ascii'))) + return date(*components) + + @_takes_ascii + def parse_isotime(self, timestr): + """ + Parse the time portion of an ISO string. + + :param timestr: + The time portion of an ISO string, without a separator + + :return: + Returns a :class:`datetime.time` object + """ + components = self._parse_isotime(timestr) + if components[0] == 24: + components[0] = 0 + return time(*components) + + @_takes_ascii + def parse_tzstr(self, tzstr, zero_as_utc=True): + """ + Parse a valid ISO time zone string. + + See :func:`isoparser.isoparse` for details on supported formats. + + :param tzstr: + A string representing an ISO time zone offset + + :param zero_as_utc: + Whether to return :class:`dateutil.tz.tzutc` for zero-offset zones + + :return: + Returns :class:`dateutil.tz.tzoffset` for offsets and + :class:`dateutil.tz.tzutc` for ``Z`` and (if ``zero_as_utc`` is + specified) offsets equivalent to UTC. + """ + return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc) + + # Constants + _DATE_SEP = b'-' + _TIME_SEP = b':' + _FRACTION_REGEX = re.compile(b'[\\.,]([0-9]+)') + + def _parse_isodate(self, dt_str): + try: + return self._parse_isodate_common(dt_str) + except ValueError: + return self._parse_isodate_uncommon(dt_str) + + def _parse_isodate_common(self, dt_str): + len_str = len(dt_str) + components = [1, 1, 1] + + if len_str < 4: + raise ValueError('ISO string too short') + + # Year + components[0] = int(dt_str[0:4]) + pos = 4 + if pos >= len_str: + return components, pos + + has_sep = dt_str[pos:pos + 1] == self._DATE_SEP + if has_sep: + pos += 1 + + # Month + if len_str - pos < 2: + raise ValueError('Invalid common month') + + components[1] = int(dt_str[pos:pos + 2]) + pos += 2 + + if pos >= len_str: + if has_sep: + return components, pos + else: + raise ValueError('Invalid ISO format') + + if has_sep: + if dt_str[pos:pos + 1] != self._DATE_SEP: + raise ValueError('Invalid separator in ISO string') + pos += 1 + + # Day + if len_str - pos < 2: + raise ValueError('Invalid common day') + components[2] = int(dt_str[pos:pos + 2]) + return components, pos + 2 + + def _parse_isodate_uncommon(self, dt_str): + if len(dt_str) < 4: + raise ValueError('ISO string too short') + + # All ISO formats start with the year + year = int(dt_str[0:4]) + + has_sep = dt_str[4:5] == self._DATE_SEP + + pos = 4 + has_sep # Skip '-' if it's there + if dt_str[pos:pos + 1] == b'W': + # YYYY-?Www-?D? + pos += 1 + weekno = int(dt_str[pos:pos + 2]) + pos += 2 + + dayno = 1 + if len(dt_str) > pos: + if (dt_str[pos:pos + 1] == self._DATE_SEP) != has_sep: + raise ValueError('Inconsistent use of dash separator') + + pos += has_sep + + dayno = int(dt_str[pos:pos + 1]) + pos += 1 + + base_date = self._calculate_weekdate(year, weekno, dayno) + else: + # YYYYDDD or YYYY-DDD + if len(dt_str) - pos < 3: + raise ValueError('Invalid ordinal day') + + ordinal_day = int(dt_str[pos:pos + 3]) + pos += 3 + + if ordinal_day < 1 or ordinal_day > (365 + calendar.isleap(year)): + raise ValueError('Invalid ordinal day' + + ' {} for year {}'.format(ordinal_day, year)) + + base_date = date(year, 1, 1) + timedelta(days=ordinal_day - 1) + + components = [base_date.year, base_date.month, base_date.day] + return components, pos + + def _calculate_weekdate(self, year, week, day): + """ + Calculate the day of corresponding to the ISO year-week-day calendar. + + This function is effectively the inverse of + :func:`datetime.date.isocalendar`. + + :param year: + The year in the ISO calendar + + :param week: + The week in the ISO calendar - range is [1, 53] + + :param day: + The day in the ISO calendar - range is [1 (MON), 7 (SUN)] + + :return: + Returns a :class:`datetime.date` + """ + if not 0 < week < 54: + raise ValueError('Invalid week: {}'.format(week)) + + if not 0 < day < 8: # Range is 1-7 + raise ValueError('Invalid weekday: {}'.format(day)) + + # Get week 1 for the specific year: + jan_4 = date(year, 1, 4) # Week 1 always has January 4th in it + week_1 = jan_4 - timedelta(days=jan_4.isocalendar()[2] - 1) + + # Now add the specific number of weeks and days to get what we want + week_offset = (week - 1) * 7 + (day - 1) + return week_1 + timedelta(days=week_offset) + + def _parse_isotime(self, timestr): + len_str = len(timestr) + components = [0, 0, 0, 0, None] + pos = 0 + comp = -1 + + if len_str < 2: + raise ValueError('ISO time too short') + + has_sep = False + + while pos < len_str and comp < 5: + comp += 1 + + if timestr[pos:pos + 1] in b'-+Zz': + # Detect time zone boundary + components[-1] = self._parse_tzstr(timestr[pos:]) + pos = len_str + break + + if comp == 1 and timestr[pos:pos+1] == self._TIME_SEP: + has_sep = True + pos += 1 + elif comp == 2 and has_sep: + if timestr[pos:pos+1] != self._TIME_SEP: + raise ValueError('Inconsistent use of colon separator') + pos += 1 + + if comp < 3: + # Hour, minute, second + components[comp] = int(timestr[pos:pos + 2]) + pos += 2 + + if comp == 3: + # Fraction of a second + frac = self._FRACTION_REGEX.match(timestr[pos:]) + if not frac: + continue + + us_str = frac.group(1)[:6] # Truncate to microseconds + components[comp] = int(us_str) * 10**(6 - len(us_str)) + pos += len(frac.group()) + + if pos < len_str: + raise ValueError('Unused components in ISO string') + + if components[0] == 24: + # Standard supports 00:00 and 24:00 as representations of midnight + if any(component != 0 for component in components[1:4]): + raise ValueError('Hour may only be 24 at 24:00:00.000') + + return components + + def _parse_tzstr(self, tzstr, zero_as_utc=True): + if tzstr == b'Z' or tzstr == b'z': + return tz.UTC + + if len(tzstr) not in {3, 5, 6}: + raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters') + + if tzstr[0:1] == b'-': + mult = -1 + elif tzstr[0:1] == b'+': + mult = 1 + else: + raise ValueError('Time zone offset requires sign') + + hours = int(tzstr[1:3]) + if len(tzstr) == 3: + minutes = 0 + else: + minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):]) + + if zero_as_utc and hours == 0 and minutes == 0: + return tz.UTC + else: + if minutes > 59: + raise ValueError('Invalid minutes in time zone offset') + + if hours > 23: + raise ValueError('Invalid hours in time zone offset') + + return tz.tzoffset(None, mult * (hours * 60 + minutes) * 60) + + +DEFAULT_ISOPARSER = isoparser() +isoparse = DEFAULT_ISOPARSER.isoparse diff --git a/.local/lib/python3.8/site-packages/dateutil/relativedelta.py b/.local/lib/python3.8/site-packages/dateutil/relativedelta.py new file mode 100644 index 0000000..a9e85f7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/relativedelta.py @@ -0,0 +1,599 @@ +# -*- coding: utf-8 -*- +import datetime +import calendar + +import operator +from math import copysign + +from six import integer_types +from warnings import warn + +from ._common import weekday + +MO, TU, WE, TH, FR, SA, SU = weekdays = tuple(weekday(x) for x in range(7)) + +__all__ = ["relativedelta", "MO", "TU", "WE", "TH", "FR", "SA", "SU"] + + +class relativedelta(object): + """ + The relativedelta type is designed to be applied to an existing datetime and + can replace specific components of that datetime, or represents an interval + of time. + + It is based on the specification of the excellent work done by M.-A. Lemburg + in his + `mx.DateTime `_ extension. + However, notice that this type does *NOT* implement the same algorithm as + his work. Do *NOT* expect it to behave like mx.DateTime's counterpart. + + There are two different ways to build a relativedelta instance. The + first one is passing it two date/datetime classes:: + + relativedelta(datetime1, datetime2) + + The second one is passing it any number of the following keyword arguments:: + + relativedelta(arg1=x,arg2=y,arg3=z...) + + year, month, day, hour, minute, second, microsecond: + Absolute information (argument is singular); adding or subtracting a + relativedelta with absolute information does not perform an arithmetic + operation, but rather REPLACES the corresponding value in the + original datetime with the value(s) in relativedelta. + + years, months, weeks, days, hours, minutes, seconds, microseconds: + Relative information, may be negative (argument is plural); adding + or subtracting a relativedelta with relative information performs + the corresponding arithmetic operation on the original datetime value + with the information in the relativedelta. + + weekday: + One of the weekday instances (MO, TU, etc) available in the + relativedelta module. These instances may receive a parameter N, + specifying the Nth weekday, which could be positive or negative + (like MO(+1) or MO(-2)). Not specifying it is the same as specifying + +1. You can also use an integer, where 0=MO. This argument is always + relative e.g. if the calculated date is already Monday, using MO(1) + or MO(-1) won't change the day. To effectively make it absolute, use + it in combination with the day argument (e.g. day=1, MO(1) for first + Monday of the month). + + leapdays: + Will add given days to the date found, if year is a leap + year, and the date found is post 28 of february. + + yearday, nlyearday: + Set the yearday or the non-leap year day (jump leap days). + These are converted to day/month/leapdays information. + + There are relative and absolute forms of the keyword + arguments. The plural is relative, and the singular is + absolute. For each argument in the order below, the absolute form + is applied first (by setting each attribute to that value) and + then the relative form (by adding the value to the attribute). + + The order of attributes considered when this relativedelta is + added to a datetime is: + + 1. Year + 2. Month + 3. Day + 4. Hours + 5. Minutes + 6. Seconds + 7. Microseconds + + Finally, weekday is applied, using the rule described above. + + For example + + >>> from datetime import datetime + >>> from dateutil.relativedelta import relativedelta, MO + >>> dt = datetime(2018, 4, 9, 13, 37, 0) + >>> delta = relativedelta(hours=25, day=1, weekday=MO(1)) + >>> dt + delta + datetime.datetime(2018, 4, 2, 14, 37) + + First, the day is set to 1 (the first of the month), then 25 hours + are added, to get to the 2nd day and 14th hour, finally the + weekday is applied, but since the 2nd is already a Monday there is + no effect. + + """ + + def __init__(self, dt1=None, dt2=None, + years=0, months=0, days=0, leapdays=0, weeks=0, + hours=0, minutes=0, seconds=0, microseconds=0, + year=None, month=None, day=None, weekday=None, + yearday=None, nlyearday=None, + hour=None, minute=None, second=None, microsecond=None): + + if dt1 and dt2: + # datetime is a subclass of date. So both must be date + if not (isinstance(dt1, datetime.date) and + isinstance(dt2, datetime.date)): + raise TypeError("relativedelta only diffs datetime/date") + + # We allow two dates, or two datetimes, so we coerce them to be + # of the same type + if (isinstance(dt1, datetime.datetime) != + isinstance(dt2, datetime.datetime)): + if not isinstance(dt1, datetime.datetime): + dt1 = datetime.datetime.fromordinal(dt1.toordinal()) + elif not isinstance(dt2, datetime.datetime): + dt2 = datetime.datetime.fromordinal(dt2.toordinal()) + + self.years = 0 + self.months = 0 + self.days = 0 + self.leapdays = 0 + self.hours = 0 + self.minutes = 0 + self.seconds = 0 + self.microseconds = 0 + self.year = None + self.month = None + self.day = None + self.weekday = None + self.hour = None + self.minute = None + self.second = None + self.microsecond = None + self._has_time = 0 + + # Get year / month delta between the two + months = (dt1.year - dt2.year) * 12 + (dt1.month - dt2.month) + self._set_months(months) + + # Remove the year/month delta so the timedelta is just well-defined + # time units (seconds, days and microseconds) + dtm = self.__radd__(dt2) + + # If we've overshot our target, make an adjustment + if dt1 < dt2: + compare = operator.gt + increment = 1 + else: + compare = operator.lt + increment = -1 + + while compare(dt1, dtm): + months += increment + self._set_months(months) + dtm = self.__radd__(dt2) + + # Get the timedelta between the "months-adjusted" date and dt1 + delta = dt1 - dtm + self.seconds = delta.seconds + delta.days * 86400 + self.microseconds = delta.microseconds + else: + # Check for non-integer values in integer-only quantities + if any(x is not None and x != int(x) for x in (years, months)): + raise ValueError("Non-integer years and months are " + "ambiguous and not currently supported.") + + # Relative information + self.years = int(years) + self.months = int(months) + self.days = days + weeks * 7 + self.leapdays = leapdays + self.hours = hours + self.minutes = minutes + self.seconds = seconds + self.microseconds = microseconds + + # Absolute information + self.year = year + self.month = month + self.day = day + self.hour = hour + self.minute = minute + self.second = second + self.microsecond = microsecond + + if any(x is not None and int(x) != x + for x in (year, month, day, hour, + minute, second, microsecond)): + # For now we'll deprecate floats - later it'll be an error. + warn("Non-integer value passed as absolute information. " + + "This is not a well-defined condition and will raise " + + "errors in future versions.", DeprecationWarning) + + if isinstance(weekday, integer_types): + self.weekday = weekdays[weekday] + else: + self.weekday = weekday + + yday = 0 + if nlyearday: + yday = nlyearday + elif yearday: + yday = yearday + if yearday > 59: + self.leapdays = -1 + if yday: + ydayidx = [31, 59, 90, 120, 151, 181, 212, + 243, 273, 304, 334, 366] + for idx, ydays in enumerate(ydayidx): + if yday <= ydays: + self.month = idx+1 + if idx == 0: + self.day = yday + else: + self.day = yday-ydayidx[idx-1] + break + else: + raise ValueError("invalid year day (%d)" % yday) + + self._fix() + + def _fix(self): + if abs(self.microseconds) > 999999: + s = _sign(self.microseconds) + div, mod = divmod(self.microseconds * s, 1000000) + self.microseconds = mod * s + self.seconds += div * s + if abs(self.seconds) > 59: + s = _sign(self.seconds) + div, mod = divmod(self.seconds * s, 60) + self.seconds = mod * s + self.minutes += div * s + if abs(self.minutes) > 59: + s = _sign(self.minutes) + div, mod = divmod(self.minutes * s, 60) + self.minutes = mod * s + self.hours += div * s + if abs(self.hours) > 23: + s = _sign(self.hours) + div, mod = divmod(self.hours * s, 24) + self.hours = mod * s + self.days += div * s + if abs(self.months) > 11: + s = _sign(self.months) + div, mod = divmod(self.months * s, 12) + self.months = mod * s + self.years += div * s + if (self.hours or self.minutes or self.seconds or self.microseconds + or self.hour is not None or self.minute is not None or + self.second is not None or self.microsecond is not None): + self._has_time = 1 + else: + self._has_time = 0 + + @property + def weeks(self): + return int(self.days / 7.0) + + @weeks.setter + def weeks(self, value): + self.days = self.days - (self.weeks * 7) + value * 7 + + def _set_months(self, months): + self.months = months + if abs(self.months) > 11: + s = _sign(self.months) + div, mod = divmod(self.months * s, 12) + self.months = mod * s + self.years = div * s + else: + self.years = 0 + + def normalized(self): + """ + Return a version of this object represented entirely using integer + values for the relative attributes. + + >>> relativedelta(days=1.5, hours=2).normalized() + relativedelta(days=+1, hours=+14) + + :return: + Returns a :class:`dateutil.relativedelta.relativedelta` object. + """ + # Cascade remainders down (rounding each to roughly nearest microsecond) + days = int(self.days) + + hours_f = round(self.hours + 24 * (self.days - days), 11) + hours = int(hours_f) + + minutes_f = round(self.minutes + 60 * (hours_f - hours), 10) + minutes = int(minutes_f) + + seconds_f = round(self.seconds + 60 * (minutes_f - minutes), 8) + seconds = int(seconds_f) + + microseconds = round(self.microseconds + 1e6 * (seconds_f - seconds)) + + # Constructor carries overflow back up with call to _fix() + return self.__class__(years=self.years, months=self.months, + days=days, hours=hours, minutes=minutes, + seconds=seconds, microseconds=microseconds, + leapdays=self.leapdays, year=self.year, + month=self.month, day=self.day, + weekday=self.weekday, hour=self.hour, + minute=self.minute, second=self.second, + microsecond=self.microsecond) + + def __add__(self, other): + if isinstance(other, relativedelta): + return self.__class__(years=other.years + self.years, + months=other.months + self.months, + days=other.days + self.days, + hours=other.hours + self.hours, + minutes=other.minutes + self.minutes, + seconds=other.seconds + self.seconds, + microseconds=(other.microseconds + + self.microseconds), + leapdays=other.leapdays or self.leapdays, + year=(other.year if other.year is not None + else self.year), + month=(other.month if other.month is not None + else self.month), + day=(other.day if other.day is not None + else self.day), + weekday=(other.weekday if other.weekday is not None + else self.weekday), + hour=(other.hour if other.hour is not None + else self.hour), + minute=(other.minute if other.minute is not None + else self.minute), + second=(other.second if other.second is not None + else self.second), + microsecond=(other.microsecond if other.microsecond + is not None else + self.microsecond)) + if isinstance(other, datetime.timedelta): + return self.__class__(years=self.years, + months=self.months, + days=self.days + other.days, + hours=self.hours, + minutes=self.minutes, + seconds=self.seconds + other.seconds, + microseconds=self.microseconds + other.microseconds, + leapdays=self.leapdays, + year=self.year, + month=self.month, + day=self.day, + weekday=self.weekday, + hour=self.hour, + minute=self.minute, + second=self.second, + microsecond=self.microsecond) + if not isinstance(other, datetime.date): + return NotImplemented + elif self._has_time and not isinstance(other, datetime.datetime): + other = datetime.datetime.fromordinal(other.toordinal()) + year = (self.year or other.year)+self.years + month = self.month or other.month + if self.months: + assert 1 <= abs(self.months) <= 12 + month += self.months + if month > 12: + year += 1 + month -= 12 + elif month < 1: + year -= 1 + month += 12 + day = min(calendar.monthrange(year, month)[1], + self.day or other.day) + repl = {"year": year, "month": month, "day": day} + for attr in ["hour", "minute", "second", "microsecond"]: + value = getattr(self, attr) + if value is not None: + repl[attr] = value + days = self.days + if self.leapdays and month > 2 and calendar.isleap(year): + days += self.leapdays + ret = (other.replace(**repl) + + datetime.timedelta(days=days, + hours=self.hours, + minutes=self.minutes, + seconds=self.seconds, + microseconds=self.microseconds)) + if self.weekday: + weekday, nth = self.weekday.weekday, self.weekday.n or 1 + jumpdays = (abs(nth) - 1) * 7 + if nth > 0: + jumpdays += (7 - ret.weekday() + weekday) % 7 + else: + jumpdays += (ret.weekday() - weekday) % 7 + jumpdays *= -1 + ret += datetime.timedelta(days=jumpdays) + return ret + + def __radd__(self, other): + return self.__add__(other) + + def __rsub__(self, other): + return self.__neg__().__radd__(other) + + def __sub__(self, other): + if not isinstance(other, relativedelta): + return NotImplemented # In case the other object defines __rsub__ + return self.__class__(years=self.years - other.years, + months=self.months - other.months, + days=self.days - other.days, + hours=self.hours - other.hours, + minutes=self.minutes - other.minutes, + seconds=self.seconds - other.seconds, + microseconds=self.microseconds - other.microseconds, + leapdays=self.leapdays or other.leapdays, + year=(self.year if self.year is not None + else other.year), + month=(self.month if self.month is not None else + other.month), + day=(self.day if self.day is not None else + other.day), + weekday=(self.weekday if self.weekday is not None else + other.weekday), + hour=(self.hour if self.hour is not None else + other.hour), + minute=(self.minute if self.minute is not None else + other.minute), + second=(self.second if self.second is not None else + other.second), + microsecond=(self.microsecond if self.microsecond + is not None else + other.microsecond)) + + def __abs__(self): + return self.__class__(years=abs(self.years), + months=abs(self.months), + days=abs(self.days), + hours=abs(self.hours), + minutes=abs(self.minutes), + seconds=abs(self.seconds), + microseconds=abs(self.microseconds), + leapdays=self.leapdays, + year=self.year, + month=self.month, + day=self.day, + weekday=self.weekday, + hour=self.hour, + minute=self.minute, + second=self.second, + microsecond=self.microsecond) + + def __neg__(self): + return self.__class__(years=-self.years, + months=-self.months, + days=-self.days, + hours=-self.hours, + minutes=-self.minutes, + seconds=-self.seconds, + microseconds=-self.microseconds, + leapdays=self.leapdays, + year=self.year, + month=self.month, + day=self.day, + weekday=self.weekday, + hour=self.hour, + minute=self.minute, + second=self.second, + microsecond=self.microsecond) + + def __bool__(self): + return not (not self.years and + not self.months and + not self.days and + not self.hours and + not self.minutes and + not self.seconds and + not self.microseconds and + not self.leapdays and + self.year is None and + self.month is None and + self.day is None and + self.weekday is None and + self.hour is None and + self.minute is None and + self.second is None and + self.microsecond is None) + # Compatibility with Python 2.x + __nonzero__ = __bool__ + + def __mul__(self, other): + try: + f = float(other) + except TypeError: + return NotImplemented + + return self.__class__(years=int(self.years * f), + months=int(self.months * f), + days=int(self.days * f), + hours=int(self.hours * f), + minutes=int(self.minutes * f), + seconds=int(self.seconds * f), + microseconds=int(self.microseconds * f), + leapdays=self.leapdays, + year=self.year, + month=self.month, + day=self.day, + weekday=self.weekday, + hour=self.hour, + minute=self.minute, + second=self.second, + microsecond=self.microsecond) + + __rmul__ = __mul__ + + def __eq__(self, other): + if not isinstance(other, relativedelta): + return NotImplemented + if self.weekday or other.weekday: + if not self.weekday or not other.weekday: + return False + if self.weekday.weekday != other.weekday.weekday: + return False + n1, n2 = self.weekday.n, other.weekday.n + if n1 != n2 and not ((not n1 or n1 == 1) and (not n2 or n2 == 1)): + return False + return (self.years == other.years and + self.months == other.months and + self.days == other.days and + self.hours == other.hours and + self.minutes == other.minutes and + self.seconds == other.seconds and + self.microseconds == other.microseconds and + self.leapdays == other.leapdays and + self.year == other.year and + self.month == other.month and + self.day == other.day and + self.hour == other.hour and + self.minute == other.minute and + self.second == other.second and + self.microsecond == other.microsecond) + + def __hash__(self): + return hash(( + self.weekday, + self.years, + self.months, + self.days, + self.hours, + self.minutes, + self.seconds, + self.microseconds, + self.leapdays, + self.year, + self.month, + self.day, + self.hour, + self.minute, + self.second, + self.microsecond, + )) + + def __ne__(self, other): + return not self.__eq__(other) + + def __div__(self, other): + try: + reciprocal = 1 / float(other) + except TypeError: + return NotImplemented + + return self.__mul__(reciprocal) + + __truediv__ = __div__ + + def __repr__(self): + l = [] + for attr in ["years", "months", "days", "leapdays", + "hours", "minutes", "seconds", "microseconds"]: + value = getattr(self, attr) + if value: + l.append("{attr}={value:+g}".format(attr=attr, value=value)) + for attr in ["year", "month", "day", "weekday", + "hour", "minute", "second", "microsecond"]: + value = getattr(self, attr) + if value is not None: + l.append("{attr}={value}".format(attr=attr, value=repr(value))) + return "{classname}({attrs})".format(classname=self.__class__.__name__, + attrs=", ".join(l)) + + +def _sign(x): + return int(copysign(1, x)) + +# vim:ts=4:sw=4:et diff --git a/.local/lib/python3.8/site-packages/dateutil/rrule.py b/.local/lib/python3.8/site-packages/dateutil/rrule.py new file mode 100644 index 0000000..b320339 --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/rrule.py @@ -0,0 +1,1737 @@ +# -*- coding: utf-8 -*- +""" +The rrule module offers a small, complete, and very fast, implementation of +the recurrence rules documented in the +`iCalendar RFC `_, +including support for caching of results. +""" +import calendar +import datetime +import heapq +import itertools +import re +import sys +from functools import wraps +# For warning about deprecation of until and count +from warnings import warn + +from six import advance_iterator, integer_types + +from six.moves import _thread, range + +from ._common import weekday as weekdaybase + +try: + from math import gcd +except ImportError: + from fractions import gcd + +__all__ = ["rrule", "rruleset", "rrulestr", + "YEARLY", "MONTHLY", "WEEKLY", "DAILY", + "HOURLY", "MINUTELY", "SECONDLY", + "MO", "TU", "WE", "TH", "FR", "SA", "SU"] + +# Every mask is 7 days longer to handle cross-year weekly periods. +M366MASK = tuple([1]*31+[2]*29+[3]*31+[4]*30+[5]*31+[6]*30 + + [7]*31+[8]*31+[9]*30+[10]*31+[11]*30+[12]*31+[1]*7) +M365MASK = list(M366MASK) +M29, M30, M31 = list(range(1, 30)), list(range(1, 31)), list(range(1, 32)) +MDAY366MASK = tuple(M31+M29+M31+M30+M31+M30+M31+M31+M30+M31+M30+M31+M31[:7]) +MDAY365MASK = list(MDAY366MASK) +M29, M30, M31 = list(range(-29, 0)), list(range(-30, 0)), list(range(-31, 0)) +NMDAY366MASK = tuple(M31+M29+M31+M30+M31+M30+M31+M31+M30+M31+M30+M31+M31[:7]) +NMDAY365MASK = list(NMDAY366MASK) +M366RANGE = (0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366) +M365RANGE = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365) +WDAYMASK = [0, 1, 2, 3, 4, 5, 6]*55 +del M29, M30, M31, M365MASK[59], MDAY365MASK[59], NMDAY365MASK[31] +MDAY365MASK = tuple(MDAY365MASK) +M365MASK = tuple(M365MASK) + +FREQNAMES = ['YEARLY', 'MONTHLY', 'WEEKLY', 'DAILY', 'HOURLY', 'MINUTELY', 'SECONDLY'] + +(YEARLY, + MONTHLY, + WEEKLY, + DAILY, + HOURLY, + MINUTELY, + SECONDLY) = list(range(7)) + +# Imported on demand. +easter = None +parser = None + + +class weekday(weekdaybase): + """ + This version of weekday does not allow n = 0. + """ + def __init__(self, wkday, n=None): + if n == 0: + raise ValueError("Can't create weekday with n==0") + + super(weekday, self).__init__(wkday, n) + + +MO, TU, WE, TH, FR, SA, SU = weekdays = tuple(weekday(x) for x in range(7)) + + +def _invalidates_cache(f): + """ + Decorator for rruleset methods which may invalidate the + cached length. + """ + @wraps(f) + def inner_func(self, *args, **kwargs): + rv = f(self, *args, **kwargs) + self._invalidate_cache() + return rv + + return inner_func + + +class rrulebase(object): + def __init__(self, cache=False): + if cache: + self._cache = [] + self._cache_lock = _thread.allocate_lock() + self._invalidate_cache() + else: + self._cache = None + self._cache_complete = False + self._len = None + + def __iter__(self): + if self._cache_complete: + return iter(self._cache) + elif self._cache is None: + return self._iter() + else: + return self._iter_cached() + + def _invalidate_cache(self): + if self._cache is not None: + self._cache = [] + self._cache_complete = False + self._cache_gen = self._iter() + + if self._cache_lock.locked(): + self._cache_lock.release() + + self._len = None + + def _iter_cached(self): + i = 0 + gen = self._cache_gen + cache = self._cache + acquire = self._cache_lock.acquire + release = self._cache_lock.release + while gen: + if i == len(cache): + acquire() + if self._cache_complete: + break + try: + for j in range(10): + cache.append(advance_iterator(gen)) + except StopIteration: + self._cache_gen = gen = None + self._cache_complete = True + break + release() + yield cache[i] + i += 1 + while i < self._len: + yield cache[i] + i += 1 + + def __getitem__(self, item): + if self._cache_complete: + return self._cache[item] + elif isinstance(item, slice): + if item.step and item.step < 0: + return list(iter(self))[item] + else: + return list(itertools.islice(self, + item.start or 0, + item.stop or sys.maxsize, + item.step or 1)) + elif item >= 0: + gen = iter(self) + try: + for i in range(item+1): + res = advance_iterator(gen) + except StopIteration: + raise IndexError + return res + else: + return list(iter(self))[item] + + def __contains__(self, item): + if self._cache_complete: + return item in self._cache + else: + for i in self: + if i == item: + return True + elif i > item: + return False + return False + + # __len__() introduces a large performance penalty. + def count(self): + """ Returns the number of recurrences in this set. It will have go + trough the whole recurrence, if this hasn't been done before. """ + if self._len is None: + for x in self: + pass + return self._len + + def before(self, dt, inc=False): + """ Returns the last recurrence before the given datetime instance. The + inc keyword defines what happens if dt is an occurrence. With + inc=True, if dt itself is an occurrence, it will be returned. """ + if self._cache_complete: + gen = self._cache + else: + gen = self + last = None + if inc: + for i in gen: + if i > dt: + break + last = i + else: + for i in gen: + if i >= dt: + break + last = i + return last + + def after(self, dt, inc=False): + """ Returns the first recurrence after the given datetime instance. The + inc keyword defines what happens if dt is an occurrence. With + inc=True, if dt itself is an occurrence, it will be returned. """ + if self._cache_complete: + gen = self._cache + else: + gen = self + if inc: + for i in gen: + if i >= dt: + return i + else: + for i in gen: + if i > dt: + return i + return None + + def xafter(self, dt, count=None, inc=False): + """ + Generator which yields up to `count` recurrences after the given + datetime instance, equivalent to `after`. + + :param dt: + The datetime at which to start generating recurrences. + + :param count: + The maximum number of recurrences to generate. If `None` (default), + dates are generated until the recurrence rule is exhausted. + + :param inc: + If `dt` is an instance of the rule and `inc` is `True`, it is + included in the output. + + :yields: Yields a sequence of `datetime` objects. + """ + + if self._cache_complete: + gen = self._cache + else: + gen = self + + # Select the comparison function + if inc: + comp = lambda dc, dtc: dc >= dtc + else: + comp = lambda dc, dtc: dc > dtc + + # Generate dates + n = 0 + for d in gen: + if comp(d, dt): + if count is not None: + n += 1 + if n > count: + break + + yield d + + def between(self, after, before, inc=False, count=1): + """ Returns all the occurrences of the rrule between after and before. + The inc keyword defines what happens if after and/or before are + themselves occurrences. With inc=True, they will be included in the + list, if they are found in the recurrence set. """ + if self._cache_complete: + gen = self._cache + else: + gen = self + started = False + l = [] + if inc: + for i in gen: + if i > before: + break + elif not started: + if i >= after: + started = True + l.append(i) + else: + l.append(i) + else: + for i in gen: + if i >= before: + break + elif not started: + if i > after: + started = True + l.append(i) + else: + l.append(i) + return l + + +class rrule(rrulebase): + """ + That's the base of the rrule operation. It accepts all the keywords + defined in the RFC as its constructor parameters (except byday, + which was renamed to byweekday) and more. The constructor prototype is:: + + rrule(freq) + + Where freq must be one of YEARLY, MONTHLY, WEEKLY, DAILY, HOURLY, MINUTELY, + or SECONDLY. + + .. note:: + Per RFC section 3.3.10, recurrence instances falling on invalid dates + and times are ignored rather than coerced: + + Recurrence rules may generate recurrence instances with an invalid + date (e.g., February 30) or nonexistent local time (e.g., 1:30 AM + on a day where the local time is moved forward by an hour at 1:00 + AM). Such recurrence instances MUST be ignored and MUST NOT be + counted as part of the recurrence set. + + This can lead to possibly surprising behavior when, for example, the + start date occurs at the end of the month: + + >>> from dateutil.rrule import rrule, MONTHLY + >>> from datetime import datetime + >>> start_date = datetime(2014, 12, 31) + >>> list(rrule(freq=MONTHLY, count=4, dtstart=start_date)) + ... # doctest: +NORMALIZE_WHITESPACE + [datetime.datetime(2014, 12, 31, 0, 0), + datetime.datetime(2015, 1, 31, 0, 0), + datetime.datetime(2015, 3, 31, 0, 0), + datetime.datetime(2015, 5, 31, 0, 0)] + + Additionally, it supports the following keyword arguments: + + :param dtstart: + The recurrence start. Besides being the base for the recurrence, + missing parameters in the final recurrence instances will also be + extracted from this date. If not given, datetime.now() will be used + instead. + :param interval: + The interval between each freq iteration. For example, when using + YEARLY, an interval of 2 means once every two years, but with HOURLY, + it means once every two hours. The default interval is 1. + :param wkst: + The week start day. Must be one of the MO, TU, WE constants, or an + integer, specifying the first day of the week. This will affect + recurrences based on weekly periods. The default week start is got + from calendar.firstweekday(), and may be modified by + calendar.setfirstweekday(). + :param count: + If given, this determines how many occurrences will be generated. + + .. note:: + As of version 2.5.0, the use of the keyword ``until`` in conjunction + with ``count`` is deprecated, to make sure ``dateutil`` is fully + compliant with `RFC-5545 Sec. 3.3.10 `_. Therefore, ``until`` and ``count`` + **must not** occur in the same call to ``rrule``. + :param until: + If given, this must be a datetime instance specifying the upper-bound + limit of the recurrence. The last recurrence in the rule is the greatest + datetime that is less than or equal to the value specified in the + ``until`` parameter. + + .. note:: + As of version 2.5.0, the use of the keyword ``until`` in conjunction + with ``count`` is deprecated, to make sure ``dateutil`` is fully + compliant with `RFC-5545 Sec. 3.3.10 `_. Therefore, ``until`` and ``count`` + **must not** occur in the same call to ``rrule``. + :param bysetpos: + If given, it must be either an integer, or a sequence of integers, + positive or negative. Each given integer will specify an occurrence + number, corresponding to the nth occurrence of the rule inside the + frequency period. For example, a bysetpos of -1 if combined with a + MONTHLY frequency, and a byweekday of (MO, TU, WE, TH, FR), will + result in the last work day of every month. + :param bymonth: + If given, it must be either an integer, or a sequence of integers, + meaning the months to apply the recurrence to. + :param bymonthday: + If given, it must be either an integer, or a sequence of integers, + meaning the month days to apply the recurrence to. + :param byyearday: + If given, it must be either an integer, or a sequence of integers, + meaning the year days to apply the recurrence to. + :param byeaster: + If given, it must be either an integer, or a sequence of integers, + positive or negative. Each integer will define an offset from the + Easter Sunday. Passing the offset 0 to byeaster will yield the Easter + Sunday itself. This is an extension to the RFC specification. + :param byweekno: + If given, it must be either an integer, or a sequence of integers, + meaning the week numbers to apply the recurrence to. Week numbers + have the meaning described in ISO8601, that is, the first week of + the year is that containing at least four days of the new year. + :param byweekday: + If given, it must be either an integer (0 == MO), a sequence of + integers, one of the weekday constants (MO, TU, etc), or a sequence + of these constants. When given, these variables will define the + weekdays where the recurrence will be applied. It's also possible to + use an argument n for the weekday instances, which will mean the nth + occurrence of this weekday in the period. For example, with MONTHLY, + or with YEARLY and BYMONTH, using FR(+1) in byweekday will specify the + first friday of the month where the recurrence happens. Notice that in + the RFC documentation, this is specified as BYDAY, but was renamed to + avoid the ambiguity of that keyword. + :param byhour: + If given, it must be either an integer, or a sequence of integers, + meaning the hours to apply the recurrence to. + :param byminute: + If given, it must be either an integer, or a sequence of integers, + meaning the minutes to apply the recurrence to. + :param bysecond: + If given, it must be either an integer, or a sequence of integers, + meaning the seconds to apply the recurrence to. + :param cache: + If given, it must be a boolean value specifying to enable or disable + caching of results. If you will use the same rrule instance multiple + times, enabling caching will improve the performance considerably. + """ + def __init__(self, freq, dtstart=None, + interval=1, wkst=None, count=None, until=None, bysetpos=None, + bymonth=None, bymonthday=None, byyearday=None, byeaster=None, + byweekno=None, byweekday=None, + byhour=None, byminute=None, bysecond=None, + cache=False): + super(rrule, self).__init__(cache) + global easter + if not dtstart: + if until and until.tzinfo: + dtstart = datetime.datetime.now(tz=until.tzinfo).replace(microsecond=0) + else: + dtstart = datetime.datetime.now().replace(microsecond=0) + elif not isinstance(dtstart, datetime.datetime): + dtstart = datetime.datetime.fromordinal(dtstart.toordinal()) + else: + dtstart = dtstart.replace(microsecond=0) + self._dtstart = dtstart + self._tzinfo = dtstart.tzinfo + self._freq = freq + self._interval = interval + self._count = count + + # Cache the original byxxx rules, if they are provided, as the _byxxx + # attributes do not necessarily map to the inputs, and this can be + # a problem in generating the strings. Only store things if they've + # been supplied (the string retrieval will just use .get()) + self._original_rule = {} + + if until and not isinstance(until, datetime.datetime): + until = datetime.datetime.fromordinal(until.toordinal()) + self._until = until + + if self._dtstart and self._until: + if (self._dtstart.tzinfo is not None) != (self._until.tzinfo is not None): + # According to RFC5545 Section 3.3.10: + # https://tools.ietf.org/html/rfc5545#section-3.3.10 + # + # > If the "DTSTART" property is specified as a date with UTC + # > time or a date with local time and time zone reference, + # > then the UNTIL rule part MUST be specified as a date with + # > UTC time. + raise ValueError( + 'RRULE UNTIL values must be specified in UTC when DTSTART ' + 'is timezone-aware' + ) + + if count is not None and until: + warn("Using both 'count' and 'until' is inconsistent with RFC 5545" + " and has been deprecated in dateutil. Future versions will " + "raise an error.", DeprecationWarning) + + if wkst is None: + self._wkst = calendar.firstweekday() + elif isinstance(wkst, integer_types): + self._wkst = wkst + else: + self._wkst = wkst.weekday + + if bysetpos is None: + self._bysetpos = None + elif isinstance(bysetpos, integer_types): + if bysetpos == 0 or not (-366 <= bysetpos <= 366): + raise ValueError("bysetpos must be between 1 and 366, " + "or between -366 and -1") + self._bysetpos = (bysetpos,) + else: + self._bysetpos = tuple(bysetpos) + for pos in self._bysetpos: + if pos == 0 or not (-366 <= pos <= 366): + raise ValueError("bysetpos must be between 1 and 366, " + "or between -366 and -1") + + if self._bysetpos: + self._original_rule['bysetpos'] = self._bysetpos + + if (byweekno is None and byyearday is None and bymonthday is None and + byweekday is None and byeaster is None): + if freq == YEARLY: + if bymonth is None: + bymonth = dtstart.month + self._original_rule['bymonth'] = None + bymonthday = dtstart.day + self._original_rule['bymonthday'] = None + elif freq == MONTHLY: + bymonthday = dtstart.day + self._original_rule['bymonthday'] = None + elif freq == WEEKLY: + byweekday = dtstart.weekday() + self._original_rule['byweekday'] = None + + # bymonth + if bymonth is None: + self._bymonth = None + else: + if isinstance(bymonth, integer_types): + bymonth = (bymonth,) + + self._bymonth = tuple(sorted(set(bymonth))) + + if 'bymonth' not in self._original_rule: + self._original_rule['bymonth'] = self._bymonth + + # byyearday + if byyearday is None: + self._byyearday = None + else: + if isinstance(byyearday, integer_types): + byyearday = (byyearday,) + + self._byyearday = tuple(sorted(set(byyearday))) + self._original_rule['byyearday'] = self._byyearday + + # byeaster + if byeaster is not None: + if not easter: + from dateutil import easter + if isinstance(byeaster, integer_types): + self._byeaster = (byeaster,) + else: + self._byeaster = tuple(sorted(byeaster)) + + self._original_rule['byeaster'] = self._byeaster + else: + self._byeaster = None + + # bymonthday + if bymonthday is None: + self._bymonthday = () + self._bynmonthday = () + else: + if isinstance(bymonthday, integer_types): + bymonthday = (bymonthday,) + + bymonthday = set(bymonthday) # Ensure it's unique + + self._bymonthday = tuple(sorted(x for x in bymonthday if x > 0)) + self._bynmonthday = tuple(sorted(x for x in bymonthday if x < 0)) + + # Storing positive numbers first, then negative numbers + if 'bymonthday' not in self._original_rule: + self._original_rule['bymonthday'] = tuple( + itertools.chain(self._bymonthday, self._bynmonthday)) + + # byweekno + if byweekno is None: + self._byweekno = None + else: + if isinstance(byweekno, integer_types): + byweekno = (byweekno,) + + self._byweekno = tuple(sorted(set(byweekno))) + + self._original_rule['byweekno'] = self._byweekno + + # byweekday / bynweekday + if byweekday is None: + self._byweekday = None + self._bynweekday = None + else: + # If it's one of the valid non-sequence types, convert to a + # single-element sequence before the iterator that builds the + # byweekday set. + if isinstance(byweekday, integer_types) or hasattr(byweekday, "n"): + byweekday = (byweekday,) + + self._byweekday = set() + self._bynweekday = set() + for wday in byweekday: + if isinstance(wday, integer_types): + self._byweekday.add(wday) + elif not wday.n or freq > MONTHLY: + self._byweekday.add(wday.weekday) + else: + self._bynweekday.add((wday.weekday, wday.n)) + + if not self._byweekday: + self._byweekday = None + elif not self._bynweekday: + self._bynweekday = None + + if self._byweekday is not None: + self._byweekday = tuple(sorted(self._byweekday)) + orig_byweekday = [weekday(x) for x in self._byweekday] + else: + orig_byweekday = () + + if self._bynweekday is not None: + self._bynweekday = tuple(sorted(self._bynweekday)) + orig_bynweekday = [weekday(*x) for x in self._bynweekday] + else: + orig_bynweekday = () + + if 'byweekday' not in self._original_rule: + self._original_rule['byweekday'] = tuple(itertools.chain( + orig_byweekday, orig_bynweekday)) + + # byhour + if byhour is None: + if freq < HOURLY: + self._byhour = {dtstart.hour} + else: + self._byhour = None + else: + if isinstance(byhour, integer_types): + byhour = (byhour,) + + if freq == HOURLY: + self._byhour = self.__construct_byset(start=dtstart.hour, + byxxx=byhour, + base=24) + else: + self._byhour = set(byhour) + + self._byhour = tuple(sorted(self._byhour)) + self._original_rule['byhour'] = self._byhour + + # byminute + if byminute is None: + if freq < MINUTELY: + self._byminute = {dtstart.minute} + else: + self._byminute = None + else: + if isinstance(byminute, integer_types): + byminute = (byminute,) + + if freq == MINUTELY: + self._byminute = self.__construct_byset(start=dtstart.minute, + byxxx=byminute, + base=60) + else: + self._byminute = set(byminute) + + self._byminute = tuple(sorted(self._byminute)) + self._original_rule['byminute'] = self._byminute + + # bysecond + if bysecond is None: + if freq < SECONDLY: + self._bysecond = ((dtstart.second,)) + else: + self._bysecond = None + else: + if isinstance(bysecond, integer_types): + bysecond = (bysecond,) + + self._bysecond = set(bysecond) + + if freq == SECONDLY: + self._bysecond = self.__construct_byset(start=dtstart.second, + byxxx=bysecond, + base=60) + else: + self._bysecond = set(bysecond) + + self._bysecond = tuple(sorted(self._bysecond)) + self._original_rule['bysecond'] = self._bysecond + + if self._freq >= HOURLY: + self._timeset = None + else: + self._timeset = [] + for hour in self._byhour: + for minute in self._byminute: + for second in self._bysecond: + self._timeset.append( + datetime.time(hour, minute, second, + tzinfo=self._tzinfo)) + self._timeset.sort() + self._timeset = tuple(self._timeset) + + def __str__(self): + """ + Output a string that would generate this RRULE if passed to rrulestr. + This is mostly compatible with RFC5545, except for the + dateutil-specific extension BYEASTER. + """ + + output = [] + h, m, s = [None] * 3 + if self._dtstart: + output.append(self._dtstart.strftime('DTSTART:%Y%m%dT%H%M%S')) + h, m, s = self._dtstart.timetuple()[3:6] + + parts = ['FREQ=' + FREQNAMES[self._freq]] + if self._interval != 1: + parts.append('INTERVAL=' + str(self._interval)) + + if self._wkst: + parts.append('WKST=' + repr(weekday(self._wkst))[0:2]) + + if self._count is not None: + parts.append('COUNT=' + str(self._count)) + + if self._until: + parts.append(self._until.strftime('UNTIL=%Y%m%dT%H%M%S')) + + if self._original_rule.get('byweekday') is not None: + # The str() method on weekday objects doesn't generate + # RFC5545-compliant strings, so we should modify that. + original_rule = dict(self._original_rule) + wday_strings = [] + for wday in original_rule['byweekday']: + if wday.n: + wday_strings.append('{n:+d}{wday}'.format( + n=wday.n, + wday=repr(wday)[0:2])) + else: + wday_strings.append(repr(wday)) + + original_rule['byweekday'] = wday_strings + else: + original_rule = self._original_rule + + partfmt = '{name}={vals}' + for name, key in [('BYSETPOS', 'bysetpos'), + ('BYMONTH', 'bymonth'), + ('BYMONTHDAY', 'bymonthday'), + ('BYYEARDAY', 'byyearday'), + ('BYWEEKNO', 'byweekno'), + ('BYDAY', 'byweekday'), + ('BYHOUR', 'byhour'), + ('BYMINUTE', 'byminute'), + ('BYSECOND', 'bysecond'), + ('BYEASTER', 'byeaster')]: + value = original_rule.get(key) + if value: + parts.append(partfmt.format(name=name, vals=(','.join(str(v) + for v in value)))) + + output.append('RRULE:' + ';'.join(parts)) + return '\n'.join(output) + + def replace(self, **kwargs): + """Return new rrule with same attributes except for those attributes given new + values by whichever keyword arguments are specified.""" + new_kwargs = {"interval": self._interval, + "count": self._count, + "dtstart": self._dtstart, + "freq": self._freq, + "until": self._until, + "wkst": self._wkst, + "cache": False if self._cache is None else True } + new_kwargs.update(self._original_rule) + new_kwargs.update(kwargs) + return rrule(**new_kwargs) + + def _iter(self): + year, month, day, hour, minute, second, weekday, yearday, _ = \ + self._dtstart.timetuple() + + # Some local variables to speed things up a bit + freq = self._freq + interval = self._interval + wkst = self._wkst + until = self._until + bymonth = self._bymonth + byweekno = self._byweekno + byyearday = self._byyearday + byweekday = self._byweekday + byeaster = self._byeaster + bymonthday = self._bymonthday + bynmonthday = self._bynmonthday + bysetpos = self._bysetpos + byhour = self._byhour + byminute = self._byminute + bysecond = self._bysecond + + ii = _iterinfo(self) + ii.rebuild(year, month) + + getdayset = {YEARLY: ii.ydayset, + MONTHLY: ii.mdayset, + WEEKLY: ii.wdayset, + DAILY: ii.ddayset, + HOURLY: ii.ddayset, + MINUTELY: ii.ddayset, + SECONDLY: ii.ddayset}[freq] + + if freq < HOURLY: + timeset = self._timeset + else: + gettimeset = {HOURLY: ii.htimeset, + MINUTELY: ii.mtimeset, + SECONDLY: ii.stimeset}[freq] + if ((freq >= HOURLY and + self._byhour and hour not in self._byhour) or + (freq >= MINUTELY and + self._byminute and minute not in self._byminute) or + (freq >= SECONDLY and + self._bysecond and second not in self._bysecond)): + timeset = () + else: + timeset = gettimeset(hour, minute, second) + + total = 0 + count = self._count + while True: + # Get dayset with the right frequency + dayset, start, end = getdayset(year, month, day) + + # Do the "hard" work ;-) + filtered = False + for i in dayset[start:end]: + if ((bymonth and ii.mmask[i] not in bymonth) or + (byweekno and not ii.wnomask[i]) or + (byweekday and ii.wdaymask[i] not in byweekday) or + (ii.nwdaymask and not ii.nwdaymask[i]) or + (byeaster and not ii.eastermask[i]) or + ((bymonthday or bynmonthday) and + ii.mdaymask[i] not in bymonthday and + ii.nmdaymask[i] not in bynmonthday) or + (byyearday and + ((i < ii.yearlen and i+1 not in byyearday and + -ii.yearlen+i not in byyearday) or + (i >= ii.yearlen and i+1-ii.yearlen not in byyearday and + -ii.nextyearlen+i-ii.yearlen not in byyearday)))): + dayset[i] = None + filtered = True + + # Output results + if bysetpos and timeset: + poslist = [] + for pos in bysetpos: + if pos < 0: + daypos, timepos = divmod(pos, len(timeset)) + else: + daypos, timepos = divmod(pos-1, len(timeset)) + try: + i = [x for x in dayset[start:end] + if x is not None][daypos] + time = timeset[timepos] + except IndexError: + pass + else: + date = datetime.date.fromordinal(ii.yearordinal+i) + res = datetime.datetime.combine(date, time) + if res not in poslist: + poslist.append(res) + poslist.sort() + for res in poslist: + if until and res > until: + self._len = total + return + elif res >= self._dtstart: + if count is not None: + count -= 1 + if count < 0: + self._len = total + return + total += 1 + yield res + else: + for i in dayset[start:end]: + if i is not None: + date = datetime.date.fromordinal(ii.yearordinal + i) + for time in timeset: + res = datetime.datetime.combine(date, time) + if until and res > until: + self._len = total + return + elif res >= self._dtstart: + if count is not None: + count -= 1 + if count < 0: + self._len = total + return + + total += 1 + yield res + + # Handle frequency and interval + fixday = False + if freq == YEARLY: + year += interval + if year > datetime.MAXYEAR: + self._len = total + return + ii.rebuild(year, month) + elif freq == MONTHLY: + month += interval + if month > 12: + div, mod = divmod(month, 12) + month = mod + year += div + if month == 0: + month = 12 + year -= 1 + if year > datetime.MAXYEAR: + self._len = total + return + ii.rebuild(year, month) + elif freq == WEEKLY: + if wkst > weekday: + day += -(weekday+1+(6-wkst))+self._interval*7 + else: + day += -(weekday-wkst)+self._interval*7 + weekday = wkst + fixday = True + elif freq == DAILY: + day += interval + fixday = True + elif freq == HOURLY: + if filtered: + # Jump to one iteration before next day + hour += ((23-hour)//interval)*interval + + if byhour: + ndays, hour = self.__mod_distance(value=hour, + byxxx=self._byhour, + base=24) + else: + ndays, hour = divmod(hour+interval, 24) + + if ndays: + day += ndays + fixday = True + + timeset = gettimeset(hour, minute, second) + elif freq == MINUTELY: + if filtered: + # Jump to one iteration before next day + minute += ((1439-(hour*60+minute))//interval)*interval + + valid = False + rep_rate = (24*60) + for j in range(rep_rate // gcd(interval, rep_rate)): + if byminute: + nhours, minute = \ + self.__mod_distance(value=minute, + byxxx=self._byminute, + base=60) + else: + nhours, minute = divmod(minute+interval, 60) + + div, hour = divmod(hour+nhours, 24) + if div: + day += div + fixday = True + filtered = False + + if not byhour or hour in byhour: + valid = True + break + + if not valid: + raise ValueError('Invalid combination of interval and ' + + 'byhour resulting in empty rule.') + + timeset = gettimeset(hour, minute, second) + elif freq == SECONDLY: + if filtered: + # Jump to one iteration before next day + second += (((86399 - (hour * 3600 + minute * 60 + second)) + // interval) * interval) + + rep_rate = (24 * 3600) + valid = False + for j in range(0, rep_rate // gcd(interval, rep_rate)): + if bysecond: + nminutes, second = \ + self.__mod_distance(value=second, + byxxx=self._bysecond, + base=60) + else: + nminutes, second = divmod(second+interval, 60) + + div, minute = divmod(minute+nminutes, 60) + if div: + hour += div + div, hour = divmod(hour, 24) + if div: + day += div + fixday = True + + if ((not byhour or hour in byhour) and + (not byminute or minute in byminute) and + (not bysecond or second in bysecond)): + valid = True + break + + if not valid: + raise ValueError('Invalid combination of interval, ' + + 'byhour and byminute resulting in empty' + + ' rule.') + + timeset = gettimeset(hour, minute, second) + + if fixday and day > 28: + daysinmonth = calendar.monthrange(year, month)[1] + if day > daysinmonth: + while day > daysinmonth: + day -= daysinmonth + month += 1 + if month == 13: + month = 1 + year += 1 + if year > datetime.MAXYEAR: + self._len = total + return + daysinmonth = calendar.monthrange(year, month)[1] + ii.rebuild(year, month) + + def __construct_byset(self, start, byxxx, base): + """ + If a `BYXXX` sequence is passed to the constructor at the same level as + `FREQ` (e.g. `FREQ=HOURLY,BYHOUR={2,4,7},INTERVAL=3`), there are some + specifications which cannot be reached given some starting conditions. + + This occurs whenever the interval is not coprime with the base of a + given unit and the difference between the starting position and the + ending position is not coprime with the greatest common denominator + between the interval and the base. For example, with a FREQ of hourly + starting at 17:00 and an interval of 4, the only valid values for + BYHOUR would be {21, 1, 5, 9, 13, 17}, because 4 and 24 are not + coprime. + + :param start: + Specifies the starting position. + :param byxxx: + An iterable containing the list of allowed values. + :param base: + The largest allowable value for the specified frequency (e.g. + 24 hours, 60 minutes). + + This does not preserve the type of the iterable, returning a set, since + the values should be unique and the order is irrelevant, this will + speed up later lookups. + + In the event of an empty set, raises a :exception:`ValueError`, as this + results in an empty rrule. + """ + + cset = set() + + # Support a single byxxx value. + if isinstance(byxxx, integer_types): + byxxx = (byxxx, ) + + for num in byxxx: + i_gcd = gcd(self._interval, base) + # Use divmod rather than % because we need to wrap negative nums. + if i_gcd == 1 or divmod(num - start, i_gcd)[1] == 0: + cset.add(num) + + if len(cset) == 0: + raise ValueError("Invalid rrule byxxx generates an empty set.") + + return cset + + def __mod_distance(self, value, byxxx, base): + """ + Calculates the next value in a sequence where the `FREQ` parameter is + specified along with a `BYXXX` parameter at the same "level" + (e.g. `HOURLY` specified with `BYHOUR`). + + :param value: + The old value of the component. + :param byxxx: + The `BYXXX` set, which should have been generated by + `rrule._construct_byset`, or something else which checks that a + valid rule is present. + :param base: + The largest allowable value for the specified frequency (e.g. + 24 hours, 60 minutes). + + If a valid value is not found after `base` iterations (the maximum + number before the sequence would start to repeat), this raises a + :exception:`ValueError`, as no valid values were found. + + This returns a tuple of `divmod(n*interval, base)`, where `n` is the + smallest number of `interval` repetitions until the next specified + value in `byxxx` is found. + """ + accumulator = 0 + for ii in range(1, base + 1): + # Using divmod() over % to account for negative intervals + div, value = divmod(value + self._interval, base) + accumulator += div + if value in byxxx: + return (accumulator, value) + + +class _iterinfo(object): + __slots__ = ["rrule", "lastyear", "lastmonth", + "yearlen", "nextyearlen", "yearordinal", "yearweekday", + "mmask", "mrange", "mdaymask", "nmdaymask", + "wdaymask", "wnomask", "nwdaymask", "eastermask"] + + def __init__(self, rrule): + for attr in self.__slots__: + setattr(self, attr, None) + self.rrule = rrule + + def rebuild(self, year, month): + # Every mask is 7 days longer to handle cross-year weekly periods. + rr = self.rrule + if year != self.lastyear: + self.yearlen = 365 + calendar.isleap(year) + self.nextyearlen = 365 + calendar.isleap(year + 1) + firstyday = datetime.date(year, 1, 1) + self.yearordinal = firstyday.toordinal() + self.yearweekday = firstyday.weekday() + + wday = datetime.date(year, 1, 1).weekday() + if self.yearlen == 365: + self.mmask = M365MASK + self.mdaymask = MDAY365MASK + self.nmdaymask = NMDAY365MASK + self.wdaymask = WDAYMASK[wday:] + self.mrange = M365RANGE + else: + self.mmask = M366MASK + self.mdaymask = MDAY366MASK + self.nmdaymask = NMDAY366MASK + self.wdaymask = WDAYMASK[wday:] + self.mrange = M366RANGE + + if not rr._byweekno: + self.wnomask = None + else: + self.wnomask = [0]*(self.yearlen+7) + # no1wkst = firstwkst = self.wdaymask.index(rr._wkst) + no1wkst = firstwkst = (7-self.yearweekday+rr._wkst) % 7 + if no1wkst >= 4: + no1wkst = 0 + # Number of days in the year, plus the days we got + # from last year. + wyearlen = self.yearlen+(self.yearweekday-rr._wkst) % 7 + else: + # Number of days in the year, minus the days we + # left in last year. + wyearlen = self.yearlen-no1wkst + div, mod = divmod(wyearlen, 7) + numweeks = div+mod//4 + for n in rr._byweekno: + if n < 0: + n += numweeks+1 + if not (0 < n <= numweeks): + continue + if n > 1: + i = no1wkst+(n-1)*7 + if no1wkst != firstwkst: + i -= 7-firstwkst + else: + i = no1wkst + for j in range(7): + self.wnomask[i] = 1 + i += 1 + if self.wdaymask[i] == rr._wkst: + break + if 1 in rr._byweekno: + # Check week number 1 of next year as well + # TODO: Check -numweeks for next year. + i = no1wkst+numweeks*7 + if no1wkst != firstwkst: + i -= 7-firstwkst + if i < self.yearlen: + # If week starts in next year, we + # don't care about it. + for j in range(7): + self.wnomask[i] = 1 + i += 1 + if self.wdaymask[i] == rr._wkst: + break + if no1wkst: + # Check last week number of last year as + # well. If no1wkst is 0, either the year + # started on week start, or week number 1 + # got days from last year, so there are no + # days from last year's last week number in + # this year. + if -1 not in rr._byweekno: + lyearweekday = datetime.date(year-1, 1, 1).weekday() + lno1wkst = (7-lyearweekday+rr._wkst) % 7 + lyearlen = 365+calendar.isleap(year-1) + if lno1wkst >= 4: + lno1wkst = 0 + lnumweeks = 52+(lyearlen + + (lyearweekday-rr._wkst) % 7) % 7//4 + else: + lnumweeks = 52+(self.yearlen-no1wkst) % 7//4 + else: + lnumweeks = -1 + if lnumweeks in rr._byweekno: + for i in range(no1wkst): + self.wnomask[i] = 1 + + if (rr._bynweekday and (month != self.lastmonth or + year != self.lastyear)): + ranges = [] + if rr._freq == YEARLY: + if rr._bymonth: + for month in rr._bymonth: + ranges.append(self.mrange[month-1:month+1]) + else: + ranges = [(0, self.yearlen)] + elif rr._freq == MONTHLY: + ranges = [self.mrange[month-1:month+1]] + if ranges: + # Weekly frequency won't get here, so we may not + # care about cross-year weekly periods. + self.nwdaymask = [0]*self.yearlen + for first, last in ranges: + last -= 1 + for wday, n in rr._bynweekday: + if n < 0: + i = last+(n+1)*7 + i -= (self.wdaymask[i]-wday) % 7 + else: + i = first+(n-1)*7 + i += (7-self.wdaymask[i]+wday) % 7 + if first <= i <= last: + self.nwdaymask[i] = 1 + + if rr._byeaster: + self.eastermask = [0]*(self.yearlen+7) + eyday = easter.easter(year).toordinal()-self.yearordinal + for offset in rr._byeaster: + self.eastermask[eyday+offset] = 1 + + self.lastyear = year + self.lastmonth = month + + def ydayset(self, year, month, day): + return list(range(self.yearlen)), 0, self.yearlen + + def mdayset(self, year, month, day): + dset = [None]*self.yearlen + start, end = self.mrange[month-1:month+1] + for i in range(start, end): + dset[i] = i + return dset, start, end + + def wdayset(self, year, month, day): + # We need to handle cross-year weeks here. + dset = [None]*(self.yearlen+7) + i = datetime.date(year, month, day).toordinal()-self.yearordinal + start = i + for j in range(7): + dset[i] = i + i += 1 + # if (not (0 <= i < self.yearlen) or + # self.wdaymask[i] == self.rrule._wkst): + # This will cross the year boundary, if necessary. + if self.wdaymask[i] == self.rrule._wkst: + break + return dset, start, i + + def ddayset(self, year, month, day): + dset = [None] * self.yearlen + i = datetime.date(year, month, day).toordinal() - self.yearordinal + dset[i] = i + return dset, i, i + 1 + + def htimeset(self, hour, minute, second): + tset = [] + rr = self.rrule + for minute in rr._byminute: + for second in rr._bysecond: + tset.append(datetime.time(hour, minute, second, + tzinfo=rr._tzinfo)) + tset.sort() + return tset + + def mtimeset(self, hour, minute, second): + tset = [] + rr = self.rrule + for second in rr._bysecond: + tset.append(datetime.time(hour, minute, second, tzinfo=rr._tzinfo)) + tset.sort() + return tset + + def stimeset(self, hour, minute, second): + return (datetime.time(hour, minute, second, + tzinfo=self.rrule._tzinfo),) + + +class rruleset(rrulebase): + """ The rruleset type allows more complex recurrence setups, mixing + multiple rules, dates, exclusion rules, and exclusion dates. The type + constructor takes the following keyword arguments: + + :param cache: If True, caching of results will be enabled, improving + performance of multiple queries considerably. """ + + class _genitem(object): + def __init__(self, genlist, gen): + try: + self.dt = advance_iterator(gen) + genlist.append(self) + except StopIteration: + pass + self.genlist = genlist + self.gen = gen + + def __next__(self): + try: + self.dt = advance_iterator(self.gen) + except StopIteration: + if self.genlist[0] is self: + heapq.heappop(self.genlist) + else: + self.genlist.remove(self) + heapq.heapify(self.genlist) + + next = __next__ + + def __lt__(self, other): + return self.dt < other.dt + + def __gt__(self, other): + return self.dt > other.dt + + def __eq__(self, other): + return self.dt == other.dt + + def __ne__(self, other): + return self.dt != other.dt + + def __init__(self, cache=False): + super(rruleset, self).__init__(cache) + self._rrule = [] + self._rdate = [] + self._exrule = [] + self._exdate = [] + + @_invalidates_cache + def rrule(self, rrule): + """ Include the given :py:class:`rrule` instance in the recurrence set + generation. """ + self._rrule.append(rrule) + + @_invalidates_cache + def rdate(self, rdate): + """ Include the given :py:class:`datetime` instance in the recurrence + set generation. """ + self._rdate.append(rdate) + + @_invalidates_cache + def exrule(self, exrule): + """ Include the given rrule instance in the recurrence set exclusion + list. Dates which are part of the given recurrence rules will not + be generated, even if some inclusive rrule or rdate matches them. + """ + self._exrule.append(exrule) + + @_invalidates_cache + def exdate(self, exdate): + """ Include the given datetime instance in the recurrence set + exclusion list. Dates included that way will not be generated, + even if some inclusive rrule or rdate matches them. """ + self._exdate.append(exdate) + + def _iter(self): + rlist = [] + self._rdate.sort() + self._genitem(rlist, iter(self._rdate)) + for gen in [iter(x) for x in self._rrule]: + self._genitem(rlist, gen) + exlist = [] + self._exdate.sort() + self._genitem(exlist, iter(self._exdate)) + for gen in [iter(x) for x in self._exrule]: + self._genitem(exlist, gen) + lastdt = None + total = 0 + heapq.heapify(rlist) + heapq.heapify(exlist) + while rlist: + ritem = rlist[0] + if not lastdt or lastdt != ritem.dt: + while exlist and exlist[0] < ritem: + exitem = exlist[0] + advance_iterator(exitem) + if exlist and exlist[0] is exitem: + heapq.heapreplace(exlist, exitem) + if not exlist or ritem != exlist[0]: + total += 1 + yield ritem.dt + lastdt = ritem.dt + advance_iterator(ritem) + if rlist and rlist[0] is ritem: + heapq.heapreplace(rlist, ritem) + self._len = total + + + + +class _rrulestr(object): + """ Parses a string representation of a recurrence rule or set of + recurrence rules. + + :param s: + Required, a string defining one or more recurrence rules. + + :param dtstart: + If given, used as the default recurrence start if not specified in the + rule string. + + :param cache: + If set ``True`` caching of results will be enabled, improving + performance of multiple queries considerably. + + :param unfold: + If set ``True`` indicates that a rule string is split over more + than one line and should be joined before processing. + + :param forceset: + If set ``True`` forces a :class:`dateutil.rrule.rruleset` to + be returned. + + :param compatible: + If set ``True`` forces ``unfold`` and ``forceset`` to be ``True``. + + :param ignoretz: + If set ``True``, time zones in parsed strings are ignored and a naive + :class:`datetime.datetime` object is returned. + + :param tzids: + If given, a callable or mapping used to retrieve a + :class:`datetime.tzinfo` from a string representation. + Defaults to :func:`dateutil.tz.gettz`. + + :param tzinfos: + Additional time zone names / aliases which may be present in a string + representation. See :func:`dateutil.parser.parse` for more + information. + + :return: + Returns a :class:`dateutil.rrule.rruleset` or + :class:`dateutil.rrule.rrule` + """ + + _freq_map = {"YEARLY": YEARLY, + "MONTHLY": MONTHLY, + "WEEKLY": WEEKLY, + "DAILY": DAILY, + "HOURLY": HOURLY, + "MINUTELY": MINUTELY, + "SECONDLY": SECONDLY} + + _weekday_map = {"MO": 0, "TU": 1, "WE": 2, "TH": 3, + "FR": 4, "SA": 5, "SU": 6} + + def _handle_int(self, rrkwargs, name, value, **kwargs): + rrkwargs[name.lower()] = int(value) + + def _handle_int_list(self, rrkwargs, name, value, **kwargs): + rrkwargs[name.lower()] = [int(x) for x in value.split(',')] + + _handle_INTERVAL = _handle_int + _handle_COUNT = _handle_int + _handle_BYSETPOS = _handle_int_list + _handle_BYMONTH = _handle_int_list + _handle_BYMONTHDAY = _handle_int_list + _handle_BYYEARDAY = _handle_int_list + _handle_BYEASTER = _handle_int_list + _handle_BYWEEKNO = _handle_int_list + _handle_BYHOUR = _handle_int_list + _handle_BYMINUTE = _handle_int_list + _handle_BYSECOND = _handle_int_list + + def _handle_FREQ(self, rrkwargs, name, value, **kwargs): + rrkwargs["freq"] = self._freq_map[value] + + def _handle_UNTIL(self, rrkwargs, name, value, **kwargs): + global parser + if not parser: + from dateutil import parser + try: + rrkwargs["until"] = parser.parse(value, + ignoretz=kwargs.get("ignoretz"), + tzinfos=kwargs.get("tzinfos")) + except ValueError: + raise ValueError("invalid until date") + + def _handle_WKST(self, rrkwargs, name, value, **kwargs): + rrkwargs["wkst"] = self._weekday_map[value] + + def _handle_BYWEEKDAY(self, rrkwargs, name, value, **kwargs): + """ + Two ways to specify this: +1MO or MO(+1) + """ + l = [] + for wday in value.split(','): + if '(' in wday: + # If it's of the form TH(+1), etc. + splt = wday.split('(') + w = splt[0] + n = int(splt[1][:-1]) + elif len(wday): + # If it's of the form +1MO + for i in range(len(wday)): + if wday[i] not in '+-0123456789': + break + n = wday[:i] or None + w = wday[i:] + if n: + n = int(n) + else: + raise ValueError("Invalid (empty) BYDAY specification.") + + l.append(weekdays[self._weekday_map[w]](n)) + rrkwargs["byweekday"] = l + + _handle_BYDAY = _handle_BYWEEKDAY + + def _parse_rfc_rrule(self, line, + dtstart=None, + cache=False, + ignoretz=False, + tzinfos=None): + if line.find(':') != -1: + name, value = line.split(':') + if name != "RRULE": + raise ValueError("unknown parameter name") + else: + value = line + rrkwargs = {} + for pair in value.split(';'): + name, value = pair.split('=') + name = name.upper() + value = value.upper() + try: + getattr(self, "_handle_"+name)(rrkwargs, name, value, + ignoretz=ignoretz, + tzinfos=tzinfos) + except AttributeError: + raise ValueError("unknown parameter '%s'" % name) + except (KeyError, ValueError): + raise ValueError("invalid '%s': %s" % (name, value)) + return rrule(dtstart=dtstart, cache=cache, **rrkwargs) + + def _parse_date_value(self, date_value, parms, rule_tzids, + ignoretz, tzids, tzinfos): + global parser + if not parser: + from dateutil import parser + + datevals = [] + value_found = False + TZID = None + + for parm in parms: + if parm.startswith("TZID="): + try: + tzkey = rule_tzids[parm.split('TZID=')[-1]] + except KeyError: + continue + if tzids is None: + from . import tz + tzlookup = tz.gettz + elif callable(tzids): + tzlookup = tzids + else: + tzlookup = getattr(tzids, 'get', None) + if tzlookup is None: + msg = ('tzids must be a callable, mapping, or None, ' + 'not %s' % tzids) + raise ValueError(msg) + + TZID = tzlookup(tzkey) + continue + + # RFC 5445 3.8.2.4: The VALUE parameter is optional, but may be found + # only once. + if parm not in {"VALUE=DATE-TIME", "VALUE=DATE"}: + raise ValueError("unsupported parm: " + parm) + else: + if value_found: + msg = ("Duplicate value parameter found in: " + parm) + raise ValueError(msg) + value_found = True + + for datestr in date_value.split(','): + date = parser.parse(datestr, ignoretz=ignoretz, tzinfos=tzinfos) + if TZID is not None: + if date.tzinfo is None: + date = date.replace(tzinfo=TZID) + else: + raise ValueError('DTSTART/EXDATE specifies multiple timezone') + datevals.append(date) + + return datevals + + def _parse_rfc(self, s, + dtstart=None, + cache=False, + unfold=False, + forceset=False, + compatible=False, + ignoretz=False, + tzids=None, + tzinfos=None): + global parser + if compatible: + forceset = True + unfold = True + + TZID_NAMES = dict(map( + lambda x: (x.upper(), x), + re.findall('TZID=(?P[^:]+):', s) + )) + s = s.upper() + if not s.strip(): + raise ValueError("empty string") + if unfold: + lines = s.splitlines() + i = 0 + while i < len(lines): + line = lines[i].rstrip() + if not line: + del lines[i] + elif i > 0 and line[0] == " ": + lines[i-1] += line[1:] + del lines[i] + else: + i += 1 + else: + lines = s.split() + if (not forceset and len(lines) == 1 and (s.find(':') == -1 or + s.startswith('RRULE:'))): + return self._parse_rfc_rrule(lines[0], cache=cache, + dtstart=dtstart, ignoretz=ignoretz, + tzinfos=tzinfos) + else: + rrulevals = [] + rdatevals = [] + exrulevals = [] + exdatevals = [] + for line in lines: + if not line: + continue + if line.find(':') == -1: + name = "RRULE" + value = line + else: + name, value = line.split(':', 1) + parms = name.split(';') + if not parms: + raise ValueError("empty property name") + name = parms[0] + parms = parms[1:] + if name == "RRULE": + for parm in parms: + raise ValueError("unsupported RRULE parm: "+parm) + rrulevals.append(value) + elif name == "RDATE": + for parm in parms: + if parm != "VALUE=DATE-TIME": + raise ValueError("unsupported RDATE parm: "+parm) + rdatevals.append(value) + elif name == "EXRULE": + for parm in parms: + raise ValueError("unsupported EXRULE parm: "+parm) + exrulevals.append(value) + elif name == "EXDATE": + exdatevals.extend( + self._parse_date_value(value, parms, + TZID_NAMES, ignoretz, + tzids, tzinfos) + ) + elif name == "DTSTART": + dtvals = self._parse_date_value(value, parms, TZID_NAMES, + ignoretz, tzids, tzinfos) + if len(dtvals) != 1: + raise ValueError("Multiple DTSTART values specified:" + + value) + dtstart = dtvals[0] + else: + raise ValueError("unsupported property: "+name) + if (forceset or len(rrulevals) > 1 or rdatevals + or exrulevals or exdatevals): + if not parser and (rdatevals or exdatevals): + from dateutil import parser + rset = rruleset(cache=cache) + for value in rrulevals: + rset.rrule(self._parse_rfc_rrule(value, dtstart=dtstart, + ignoretz=ignoretz, + tzinfos=tzinfos)) + for value in rdatevals: + for datestr in value.split(','): + rset.rdate(parser.parse(datestr, + ignoretz=ignoretz, + tzinfos=tzinfos)) + for value in exrulevals: + rset.exrule(self._parse_rfc_rrule(value, dtstart=dtstart, + ignoretz=ignoretz, + tzinfos=tzinfos)) + for value in exdatevals: + rset.exdate(value) + if compatible and dtstart: + rset.rdate(dtstart) + return rset + else: + return self._parse_rfc_rrule(rrulevals[0], + dtstart=dtstart, + cache=cache, + ignoretz=ignoretz, + tzinfos=tzinfos) + + def __call__(self, s, **kwargs): + return self._parse_rfc(s, **kwargs) + + +rrulestr = _rrulestr() + +# vim:ts=4:sw=4:et diff --git a/.local/lib/python3.8/site-packages/dateutil/tz/__init__.py b/.local/lib/python3.8/site-packages/dateutil/tz/__init__.py new file mode 100644 index 0000000..af1352c --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/tz/__init__.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +from .tz import * +from .tz import __doc__ + +__all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange", + "tzstr", "tzical", "tzwin", "tzwinlocal", "gettz", + "enfold", "datetime_ambiguous", "datetime_exists", + "resolve_imaginary", "UTC", "DeprecatedTzFormatWarning"] + + +class DeprecatedTzFormatWarning(Warning): + """Warning raised when time zones are parsed from deprecated formats.""" diff --git a/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..f3ffee4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/_common.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/_common.cpython-38.pyc new file mode 100644 index 0000000..b2a7cba Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/_common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/_factories.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/_factories.cpython-38.pyc new file mode 100644 index 0000000..5b1e5c3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/_factories.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/tz.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/tz.cpython-38.pyc new file mode 100644 index 0000000..c2754bd Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/tz.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/win.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/win.cpython-38.pyc new file mode 100644 index 0000000..4c3c12f Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/tz/__pycache__/win.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/tz/_common.py b/.local/lib/python3.8/site-packages/dateutil/tz/_common.py new file mode 100644 index 0000000..e6ac118 --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/tz/_common.py @@ -0,0 +1,419 @@ +from six import PY2 + +from functools import wraps + +from datetime import datetime, timedelta, tzinfo + + +ZERO = timedelta(0) + +__all__ = ['tzname_in_python2', 'enfold'] + + +def tzname_in_python2(namefunc): + """Change unicode output into bytestrings in Python 2 + + tzname() API changed in Python 3. It used to return bytes, but was changed + to unicode strings + """ + if PY2: + @wraps(namefunc) + def adjust_encoding(*args, **kwargs): + name = namefunc(*args, **kwargs) + if name is not None: + name = name.encode() + + return name + + return adjust_encoding + else: + return namefunc + + +# The following is adapted from Alexander Belopolsky's tz library +# https://github.com/abalkin/tz +if hasattr(datetime, 'fold'): + # This is the pre-python 3.6 fold situation + def enfold(dt, fold=1): + """ + Provides a unified interface for assigning the ``fold`` attribute to + datetimes both before and after the implementation of PEP-495. + + :param fold: + The value for the ``fold`` attribute in the returned datetime. This + should be either 0 or 1. + + :return: + Returns an object for which ``getattr(dt, 'fold', 0)`` returns + ``fold`` for all versions of Python. In versions prior to + Python 3.6, this is a ``_DatetimeWithFold`` object, which is a + subclass of :py:class:`datetime.datetime` with the ``fold`` + attribute added, if ``fold`` is 1. + + .. versionadded:: 2.6.0 + """ + return dt.replace(fold=fold) + +else: + class _DatetimeWithFold(datetime): + """ + This is a class designed to provide a PEP 495-compliant interface for + Python versions before 3.6. It is used only for dates in a fold, so + the ``fold`` attribute is fixed at ``1``. + + .. versionadded:: 2.6.0 + """ + __slots__ = () + + def replace(self, *args, **kwargs): + """ + Return a datetime with the same attributes, except for those + attributes given new values by whichever keyword arguments are + specified. Note that tzinfo=None can be specified to create a naive + datetime from an aware datetime with no conversion of date and time + data. + + This is reimplemented in ``_DatetimeWithFold`` because pypy3 will + return a ``datetime.datetime`` even if ``fold`` is unchanged. + """ + argnames = ( + 'year', 'month', 'day', 'hour', 'minute', 'second', + 'microsecond', 'tzinfo' + ) + + for arg, argname in zip(args, argnames): + if argname in kwargs: + raise TypeError('Duplicate argument: {}'.format(argname)) + + kwargs[argname] = arg + + for argname in argnames: + if argname not in kwargs: + kwargs[argname] = getattr(self, argname) + + dt_class = self.__class__ if kwargs.get('fold', 1) else datetime + + return dt_class(**kwargs) + + @property + def fold(self): + return 1 + + def enfold(dt, fold=1): + """ + Provides a unified interface for assigning the ``fold`` attribute to + datetimes both before and after the implementation of PEP-495. + + :param fold: + The value for the ``fold`` attribute in the returned datetime. This + should be either 0 or 1. + + :return: + Returns an object for which ``getattr(dt, 'fold', 0)`` returns + ``fold`` for all versions of Python. In versions prior to + Python 3.6, this is a ``_DatetimeWithFold`` object, which is a + subclass of :py:class:`datetime.datetime` with the ``fold`` + attribute added, if ``fold`` is 1. + + .. versionadded:: 2.6.0 + """ + if getattr(dt, 'fold', 0) == fold: + return dt + + args = dt.timetuple()[:6] + args += (dt.microsecond, dt.tzinfo) + + if fold: + return _DatetimeWithFold(*args) + else: + return datetime(*args) + + +def _validate_fromutc_inputs(f): + """ + The CPython version of ``fromutc`` checks that the input is a ``datetime`` + object and that ``self`` is attached as its ``tzinfo``. + """ + @wraps(f) + def fromutc(self, dt): + if not isinstance(dt, datetime): + raise TypeError("fromutc() requires a datetime argument") + if dt.tzinfo is not self: + raise ValueError("dt.tzinfo is not self") + + return f(self, dt) + + return fromutc + + +class _tzinfo(tzinfo): + """ + Base class for all ``dateutil`` ``tzinfo`` objects. + """ + + def is_ambiguous(self, dt): + """ + Whether or not the "wall time" of a given datetime is ambiguous in this + zone. + + :param dt: + A :py:class:`datetime.datetime`, naive or time zone aware. + + + :return: + Returns ``True`` if ambiguous, ``False`` otherwise. + + .. versionadded:: 2.6.0 + """ + + dt = dt.replace(tzinfo=self) + + wall_0 = enfold(dt, fold=0) + wall_1 = enfold(dt, fold=1) + + same_offset = wall_0.utcoffset() == wall_1.utcoffset() + same_dt = wall_0.replace(tzinfo=None) == wall_1.replace(tzinfo=None) + + return same_dt and not same_offset + + def _fold_status(self, dt_utc, dt_wall): + """ + Determine the fold status of a "wall" datetime, given a representation + of the same datetime as a (naive) UTC datetime. This is calculated based + on the assumption that ``dt.utcoffset() - dt.dst()`` is constant for all + datetimes, and that this offset is the actual number of hours separating + ``dt_utc`` and ``dt_wall``. + + :param dt_utc: + Representation of the datetime as UTC + + :param dt_wall: + Representation of the datetime as "wall time". This parameter must + either have a `fold` attribute or have a fold-naive + :class:`datetime.tzinfo` attached, otherwise the calculation may + fail. + """ + if self.is_ambiguous(dt_wall): + delta_wall = dt_wall - dt_utc + _fold = int(delta_wall == (dt_utc.utcoffset() - dt_utc.dst())) + else: + _fold = 0 + + return _fold + + def _fold(self, dt): + return getattr(dt, 'fold', 0) + + def _fromutc(self, dt): + """ + Given a timezone-aware datetime in a given timezone, calculates a + timezone-aware datetime in a new timezone. + + Since this is the one time that we *know* we have an unambiguous + datetime object, we take this opportunity to determine whether the + datetime is ambiguous and in a "fold" state (e.g. if it's the first + occurrence, chronologically, of the ambiguous datetime). + + :param dt: + A timezone-aware :class:`datetime.datetime` object. + """ + + # Re-implement the algorithm from Python's datetime.py + dtoff = dt.utcoffset() + if dtoff is None: + raise ValueError("fromutc() requires a non-None utcoffset() " + "result") + + # The original datetime.py code assumes that `dst()` defaults to + # zero during ambiguous times. PEP 495 inverts this presumption, so + # for pre-PEP 495 versions of python, we need to tweak the algorithm. + dtdst = dt.dst() + if dtdst is None: + raise ValueError("fromutc() requires a non-None dst() result") + delta = dtoff - dtdst + + dt += delta + # Set fold=1 so we can default to being in the fold for + # ambiguous dates. + dtdst = enfold(dt, fold=1).dst() + if dtdst is None: + raise ValueError("fromutc(): dt.dst gave inconsistent " + "results; cannot convert") + return dt + dtdst + + @_validate_fromutc_inputs + def fromutc(self, dt): + """ + Given a timezone-aware datetime in a given timezone, calculates a + timezone-aware datetime in a new timezone. + + Since this is the one time that we *know* we have an unambiguous + datetime object, we take this opportunity to determine whether the + datetime is ambiguous and in a "fold" state (e.g. if it's the first + occurrence, chronologically, of the ambiguous datetime). + + :param dt: + A timezone-aware :class:`datetime.datetime` object. + """ + dt_wall = self._fromutc(dt) + + # Calculate the fold status given the two datetimes. + _fold = self._fold_status(dt, dt_wall) + + # Set the default fold value for ambiguous dates + return enfold(dt_wall, fold=_fold) + + +class tzrangebase(_tzinfo): + """ + This is an abstract base class for time zones represented by an annual + transition into and out of DST. Child classes should implement the following + methods: + + * ``__init__(self, *args, **kwargs)`` + * ``transitions(self, year)`` - this is expected to return a tuple of + datetimes representing the DST on and off transitions in standard + time. + + A fully initialized ``tzrangebase`` subclass should also provide the + following attributes: + * ``hasdst``: Boolean whether or not the zone uses DST. + * ``_dst_offset`` / ``_std_offset``: :class:`datetime.timedelta` objects + representing the respective UTC offsets. + * ``_dst_abbr`` / ``_std_abbr``: Strings representing the timezone short + abbreviations in DST and STD, respectively. + * ``_hasdst``: Whether or not the zone has DST. + + .. versionadded:: 2.6.0 + """ + def __init__(self): + raise NotImplementedError('tzrangebase is an abstract base class') + + def utcoffset(self, dt): + isdst = self._isdst(dt) + + if isdst is None: + return None + elif isdst: + return self._dst_offset + else: + return self._std_offset + + def dst(self, dt): + isdst = self._isdst(dt) + + if isdst is None: + return None + elif isdst: + return self._dst_base_offset + else: + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + if self._isdst(dt): + return self._dst_abbr + else: + return self._std_abbr + + def fromutc(self, dt): + """ Given a datetime in UTC, return local time """ + if not isinstance(dt, datetime): + raise TypeError("fromutc() requires a datetime argument") + + if dt.tzinfo is not self: + raise ValueError("dt.tzinfo is not self") + + # Get transitions - if there are none, fixed offset + transitions = self.transitions(dt.year) + if transitions is None: + return dt + self.utcoffset(dt) + + # Get the transition times in UTC + dston, dstoff = transitions + + dston -= self._std_offset + dstoff -= self._std_offset + + utc_transitions = (dston, dstoff) + dt_utc = dt.replace(tzinfo=None) + + isdst = self._naive_isdst(dt_utc, utc_transitions) + + if isdst: + dt_wall = dt + self._dst_offset + else: + dt_wall = dt + self._std_offset + + _fold = int(not isdst and self.is_ambiguous(dt_wall)) + + return enfold(dt_wall, fold=_fold) + + def is_ambiguous(self, dt): + """ + Whether or not the "wall time" of a given datetime is ambiguous in this + zone. + + :param dt: + A :py:class:`datetime.datetime`, naive or time zone aware. + + + :return: + Returns ``True`` if ambiguous, ``False`` otherwise. + + .. versionadded:: 2.6.0 + """ + if not self.hasdst: + return False + + start, end = self.transitions(dt.year) + + dt = dt.replace(tzinfo=None) + return (end <= dt < end + self._dst_base_offset) + + def _isdst(self, dt): + if not self.hasdst: + return False + elif dt is None: + return None + + transitions = self.transitions(dt.year) + + if transitions is None: + return False + + dt = dt.replace(tzinfo=None) + + isdst = self._naive_isdst(dt, transitions) + + # Handle ambiguous dates + if not isdst and self.is_ambiguous(dt): + return not self._fold(dt) + else: + return isdst + + def _naive_isdst(self, dt, transitions): + dston, dstoff = transitions + + dt = dt.replace(tzinfo=None) + + if dston < dstoff: + isdst = dston <= dt < dstoff + else: + isdst = not dstoff <= dt < dston + + return isdst + + @property + def _dst_base_offset(self): + return self._dst_offset - self._std_offset + + __hash__ = None + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s(...)" % self.__class__.__name__ + + __reduce__ = object.__reduce__ diff --git a/.local/lib/python3.8/site-packages/dateutil/tz/_factories.py b/.local/lib/python3.8/site-packages/dateutil/tz/_factories.py new file mode 100644 index 0000000..f8a6589 --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/tz/_factories.py @@ -0,0 +1,80 @@ +from datetime import timedelta +import weakref +from collections import OrderedDict + +from six.moves import _thread + + +class _TzSingleton(type): + def __init__(cls, *args, **kwargs): + cls.__instance = None + super(_TzSingleton, cls).__init__(*args, **kwargs) + + def __call__(cls): + if cls.__instance is None: + cls.__instance = super(_TzSingleton, cls).__call__() + return cls.__instance + + +class _TzFactory(type): + def instance(cls, *args, **kwargs): + """Alternate constructor that returns a fresh instance""" + return type.__call__(cls, *args, **kwargs) + + +class _TzOffsetFactory(_TzFactory): + def __init__(cls, *args, **kwargs): + cls.__instances = weakref.WeakValueDictionary() + cls.__strong_cache = OrderedDict() + cls.__strong_cache_size = 8 + + cls._cache_lock = _thread.allocate_lock() + + def __call__(cls, name, offset): + if isinstance(offset, timedelta): + key = (name, offset.total_seconds()) + else: + key = (name, offset) + + instance = cls.__instances.get(key, None) + if instance is None: + instance = cls.__instances.setdefault(key, + cls.instance(name, offset)) + + # This lock may not be necessary in Python 3. See GH issue #901 + with cls._cache_lock: + cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance) + + # Remove an item if the strong cache is overpopulated + if len(cls.__strong_cache) > cls.__strong_cache_size: + cls.__strong_cache.popitem(last=False) + + return instance + + +class _TzStrFactory(_TzFactory): + def __init__(cls, *args, **kwargs): + cls.__instances = weakref.WeakValueDictionary() + cls.__strong_cache = OrderedDict() + cls.__strong_cache_size = 8 + + cls.__cache_lock = _thread.allocate_lock() + + def __call__(cls, s, posix_offset=False): + key = (s, posix_offset) + instance = cls.__instances.get(key, None) + + if instance is None: + instance = cls.__instances.setdefault(key, + cls.instance(s, posix_offset)) + + # This lock may not be necessary in Python 3. See GH issue #901 + with cls.__cache_lock: + cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance) + + # Remove an item if the strong cache is overpopulated + if len(cls.__strong_cache) > cls.__strong_cache_size: + cls.__strong_cache.popitem(last=False) + + return instance + diff --git a/.local/lib/python3.8/site-packages/dateutil/tz/tz.py b/.local/lib/python3.8/site-packages/dateutil/tz/tz.py new file mode 100644 index 0000000..c67f56d --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/tz/tz.py @@ -0,0 +1,1849 @@ +# -*- coding: utf-8 -*- +""" +This module offers timezone implementations subclassing the abstract +:py:class:`datetime.tzinfo` type. There are classes to handle tzfile format +files (usually are in :file:`/etc/localtime`, :file:`/usr/share/zoneinfo`, +etc), TZ environment string (in all known formats), given ranges (with help +from relative deltas), local machine timezone, fixed offset timezone, and UTC +timezone. +""" +import datetime +import struct +import time +import sys +import os +import bisect +import weakref +from collections import OrderedDict + +import six +from six import string_types +from six.moves import _thread +from ._common import tzname_in_python2, _tzinfo +from ._common import tzrangebase, enfold +from ._common import _validate_fromutc_inputs + +from ._factories import _TzSingleton, _TzOffsetFactory +from ._factories import _TzStrFactory +try: + from .win import tzwin, tzwinlocal +except ImportError: + tzwin = tzwinlocal = None + +# For warning about rounding tzinfo +from warnings import warn + +ZERO = datetime.timedelta(0) +EPOCH = datetime.datetime.utcfromtimestamp(0) +EPOCHORDINAL = EPOCH.toordinal() + + +@six.add_metaclass(_TzSingleton) +class tzutc(datetime.tzinfo): + """ + This is a tzinfo object that represents the UTC time zone. + + **Examples:** + + .. doctest:: + + >>> from datetime import * + >>> from dateutil.tz import * + + >>> datetime.now() + datetime.datetime(2003, 9, 27, 9, 40, 1, 521290) + + >>> datetime.now(tzutc()) + datetime.datetime(2003, 9, 27, 12, 40, 12, 156379, tzinfo=tzutc()) + + >>> datetime.now(tzutc()).tzname() + 'UTC' + + .. versionchanged:: 2.7.0 + ``tzutc()`` is now a singleton, so the result of ``tzutc()`` will + always return the same object. + + .. doctest:: + + >>> from dateutil.tz import tzutc, UTC + >>> tzutc() is tzutc() + True + >>> tzutc() is UTC + True + """ + def utcoffset(self, dt): + return ZERO + + def dst(self, dt): + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + return "UTC" + + def is_ambiguous(self, dt): + """ + Whether or not the "wall time" of a given datetime is ambiguous in this + zone. + + :param dt: + A :py:class:`datetime.datetime`, naive or time zone aware. + + + :return: + Returns ``True`` if ambiguous, ``False`` otherwise. + + .. versionadded:: 2.6.0 + """ + return False + + @_validate_fromutc_inputs + def fromutc(self, dt): + """ + Fast track version of fromutc() returns the original ``dt`` object for + any valid :py:class:`datetime.datetime` object. + """ + return dt + + def __eq__(self, other): + if not isinstance(other, (tzutc, tzoffset)): + return NotImplemented + + return (isinstance(other, tzutc) or + (isinstance(other, tzoffset) and other._offset == ZERO)) + + __hash__ = None + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s()" % self.__class__.__name__ + + __reduce__ = object.__reduce__ + + +#: Convenience constant providing a :class:`tzutc()` instance +#: +#: .. versionadded:: 2.7.0 +UTC = tzutc() + + +@six.add_metaclass(_TzOffsetFactory) +class tzoffset(datetime.tzinfo): + """ + A simple class for representing a fixed offset from UTC. + + :param name: + The timezone name, to be returned when ``tzname()`` is called. + :param offset: + The time zone offset in seconds, or (since version 2.6.0, represented + as a :py:class:`datetime.timedelta` object). + """ + def __init__(self, name, offset): + self._name = name + + try: + # Allow a timedelta + offset = offset.total_seconds() + except (TypeError, AttributeError): + pass + + self._offset = datetime.timedelta(seconds=_get_supported_offset(offset)) + + def utcoffset(self, dt): + return self._offset + + def dst(self, dt): + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + return self._name + + @_validate_fromutc_inputs + def fromutc(self, dt): + return dt + self._offset + + def is_ambiguous(self, dt): + """ + Whether or not the "wall time" of a given datetime is ambiguous in this + zone. + + :param dt: + A :py:class:`datetime.datetime`, naive or time zone aware. + :return: + Returns ``True`` if ambiguous, ``False`` otherwise. + + .. versionadded:: 2.6.0 + """ + return False + + def __eq__(self, other): + if not isinstance(other, tzoffset): + return NotImplemented + + return self._offset == other._offset + + __hash__ = None + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s(%s, %s)" % (self.__class__.__name__, + repr(self._name), + int(self._offset.total_seconds())) + + __reduce__ = object.__reduce__ + + +class tzlocal(_tzinfo): + """ + A :class:`tzinfo` subclass built around the ``time`` timezone functions. + """ + def __init__(self): + super(tzlocal, self).__init__() + + self._std_offset = datetime.timedelta(seconds=-time.timezone) + if time.daylight: + self._dst_offset = datetime.timedelta(seconds=-time.altzone) + else: + self._dst_offset = self._std_offset + + self._dst_saved = self._dst_offset - self._std_offset + self._hasdst = bool(self._dst_saved) + self._tznames = tuple(time.tzname) + + def utcoffset(self, dt): + if dt is None and self._hasdst: + return None + + if self._isdst(dt): + return self._dst_offset + else: + return self._std_offset + + def dst(self, dt): + if dt is None and self._hasdst: + return None + + if self._isdst(dt): + return self._dst_offset - self._std_offset + else: + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + return self._tznames[self._isdst(dt)] + + def is_ambiguous(self, dt): + """ + Whether or not the "wall time" of a given datetime is ambiguous in this + zone. + + :param dt: + A :py:class:`datetime.datetime`, naive or time zone aware. + + + :return: + Returns ``True`` if ambiguous, ``False`` otherwise. + + .. versionadded:: 2.6.0 + """ + naive_dst = self._naive_is_dst(dt) + return (not naive_dst and + (naive_dst != self._naive_is_dst(dt - self._dst_saved))) + + def _naive_is_dst(self, dt): + timestamp = _datetime_to_timestamp(dt) + return time.localtime(timestamp + time.timezone).tm_isdst + + def _isdst(self, dt, fold_naive=True): + # We can't use mktime here. It is unstable when deciding if + # the hour near to a change is DST or not. + # + # timestamp = time.mktime((dt.year, dt.month, dt.day, dt.hour, + # dt.minute, dt.second, dt.weekday(), 0, -1)) + # return time.localtime(timestamp).tm_isdst + # + # The code above yields the following result: + # + # >>> import tz, datetime + # >>> t = tz.tzlocal() + # >>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() + # 'BRDT' + # >>> datetime.datetime(2003,2,16,0,tzinfo=t).tzname() + # 'BRST' + # >>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() + # 'BRST' + # >>> datetime.datetime(2003,2,15,22,tzinfo=t).tzname() + # 'BRDT' + # >>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() + # 'BRDT' + # + # Here is a more stable implementation: + # + if not self._hasdst: + return False + + # Check for ambiguous times: + dstval = self._naive_is_dst(dt) + fold = getattr(dt, 'fold', None) + + if self.is_ambiguous(dt): + if fold is not None: + return not self._fold(dt) + else: + return True + + return dstval + + def __eq__(self, other): + if isinstance(other, tzlocal): + return (self._std_offset == other._std_offset and + self._dst_offset == other._dst_offset) + elif isinstance(other, tzutc): + return (not self._hasdst and + self._tznames[0] in {'UTC', 'GMT'} and + self._std_offset == ZERO) + elif isinstance(other, tzoffset): + return (not self._hasdst and + self._tznames[0] == other._name and + self._std_offset == other._offset) + else: + return NotImplemented + + __hash__ = None + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s()" % self.__class__.__name__ + + __reduce__ = object.__reduce__ + + +class _ttinfo(object): + __slots__ = ["offset", "delta", "isdst", "abbr", + "isstd", "isgmt", "dstoffset"] + + def __init__(self): + for attr in self.__slots__: + setattr(self, attr, None) + + def __repr__(self): + l = [] + for attr in self.__slots__: + value = getattr(self, attr) + if value is not None: + l.append("%s=%s" % (attr, repr(value))) + return "%s(%s)" % (self.__class__.__name__, ", ".join(l)) + + def __eq__(self, other): + if not isinstance(other, _ttinfo): + return NotImplemented + + return (self.offset == other.offset and + self.delta == other.delta and + self.isdst == other.isdst and + self.abbr == other.abbr and + self.isstd == other.isstd and + self.isgmt == other.isgmt and + self.dstoffset == other.dstoffset) + + __hash__ = None + + def __ne__(self, other): + return not (self == other) + + def __getstate__(self): + state = {} + for name in self.__slots__: + state[name] = getattr(self, name, None) + return state + + def __setstate__(self, state): + for name in self.__slots__: + if name in state: + setattr(self, name, state[name]) + + +class _tzfile(object): + """ + Lightweight class for holding the relevant transition and time zone + information read from binary tzfiles. + """ + attrs = ['trans_list', 'trans_list_utc', 'trans_idx', 'ttinfo_list', + 'ttinfo_std', 'ttinfo_dst', 'ttinfo_before', 'ttinfo_first'] + + def __init__(self, **kwargs): + for attr in self.attrs: + setattr(self, attr, kwargs.get(attr, None)) + + +class tzfile(_tzinfo): + """ + This is a ``tzinfo`` subclass that allows one to use the ``tzfile(5)`` + format timezone files to extract current and historical zone information. + + :param fileobj: + This can be an opened file stream or a file name that the time zone + information can be read from. + + :param filename: + This is an optional parameter specifying the source of the time zone + information in the event that ``fileobj`` is a file object. If omitted + and ``fileobj`` is a file stream, this parameter will be set either to + ``fileobj``'s ``name`` attribute or to ``repr(fileobj)``. + + See `Sources for Time Zone and Daylight Saving Time Data + `_ for more information. + Time zone files can be compiled from the `IANA Time Zone database files + `_ with the `zic time zone compiler + `_ + + .. note:: + + Only construct a ``tzfile`` directly if you have a specific timezone + file on disk that you want to read into a Python ``tzinfo`` object. + If you want to get a ``tzfile`` representing a specific IANA zone, + (e.g. ``'America/New_York'``), you should call + :func:`dateutil.tz.gettz` with the zone identifier. + + + **Examples:** + + Using the US Eastern time zone as an example, we can see that a ``tzfile`` + provides time zone information for the standard Daylight Saving offsets: + + .. testsetup:: tzfile + + from dateutil.tz import gettz + from datetime import datetime + + .. doctest:: tzfile + + >>> NYC = gettz('America/New_York') + >>> NYC + tzfile('/usr/share/zoneinfo/America/New_York') + + >>> print(datetime(2016, 1, 3, tzinfo=NYC)) # EST + 2016-01-03 00:00:00-05:00 + + >>> print(datetime(2016, 7, 7, tzinfo=NYC)) # EDT + 2016-07-07 00:00:00-04:00 + + + The ``tzfile`` structure contains a fully history of the time zone, + so historical dates will also have the right offsets. For example, before + the adoption of the UTC standards, New York used local solar mean time: + + .. doctest:: tzfile + + >>> print(datetime(1901, 4, 12, tzinfo=NYC)) # LMT + 1901-04-12 00:00:00-04:56 + + And during World War II, New York was on "Eastern War Time", which was a + state of permanent daylight saving time: + + .. doctest:: tzfile + + >>> print(datetime(1944, 2, 7, tzinfo=NYC)) # EWT + 1944-02-07 00:00:00-04:00 + + """ + + def __init__(self, fileobj, filename=None): + super(tzfile, self).__init__() + + file_opened_here = False + if isinstance(fileobj, string_types): + self._filename = fileobj + fileobj = open(fileobj, 'rb') + file_opened_here = True + elif filename is not None: + self._filename = filename + elif hasattr(fileobj, "name"): + self._filename = fileobj.name + else: + self._filename = repr(fileobj) + + if fileobj is not None: + if not file_opened_here: + fileobj = _nullcontext(fileobj) + + with fileobj as file_stream: + tzobj = self._read_tzfile(file_stream) + + self._set_tzdata(tzobj) + + def _set_tzdata(self, tzobj): + """ Set the time zone data of this object from a _tzfile object """ + # Copy the relevant attributes over as private attributes + for attr in _tzfile.attrs: + setattr(self, '_' + attr, getattr(tzobj, attr)) + + def _read_tzfile(self, fileobj): + out = _tzfile() + + # From tzfile(5): + # + # The time zone information files used by tzset(3) + # begin with the magic characters "TZif" to identify + # them as time zone information files, followed by + # sixteen bytes reserved for future use, followed by + # six four-byte values of type long, written in a + # ``standard'' byte order (the high-order byte + # of the value is written first). + if fileobj.read(4).decode() != "TZif": + raise ValueError("magic not found") + + fileobj.read(16) + + ( + # The number of UTC/local indicators stored in the file. + ttisgmtcnt, + + # The number of standard/wall indicators stored in the file. + ttisstdcnt, + + # The number of leap seconds for which data is + # stored in the file. + leapcnt, + + # The number of "transition times" for which data + # is stored in the file. + timecnt, + + # The number of "local time types" for which data + # is stored in the file (must not be zero). + typecnt, + + # The number of characters of "time zone + # abbreviation strings" stored in the file. + charcnt, + + ) = struct.unpack(">6l", fileobj.read(24)) + + # The above header is followed by tzh_timecnt four-byte + # values of type long, sorted in ascending order. + # These values are written in ``standard'' byte order. + # Each is used as a transition time (as returned by + # time(2)) at which the rules for computing local time + # change. + + if timecnt: + out.trans_list_utc = list(struct.unpack(">%dl" % timecnt, + fileobj.read(timecnt*4))) + else: + out.trans_list_utc = [] + + # Next come tzh_timecnt one-byte values of type unsigned + # char; each one tells which of the different types of + # ``local time'' types described in the file is associated + # with the same-indexed transition time. These values + # serve as indices into an array of ttinfo structures that + # appears next in the file. + + if timecnt: + out.trans_idx = struct.unpack(">%dB" % timecnt, + fileobj.read(timecnt)) + else: + out.trans_idx = [] + + # Each ttinfo structure is written as a four-byte value + # for tt_gmtoff of type long, in a standard byte + # order, followed by a one-byte value for tt_isdst + # and a one-byte value for tt_abbrind. In each + # structure, tt_gmtoff gives the number of + # seconds to be added to UTC, tt_isdst tells whether + # tm_isdst should be set by localtime(3), and + # tt_abbrind serves as an index into the array of + # time zone abbreviation characters that follow the + # ttinfo structure(s) in the file. + + ttinfo = [] + + for i in range(typecnt): + ttinfo.append(struct.unpack(">lbb", fileobj.read(6))) + + abbr = fileobj.read(charcnt).decode() + + # Then there are tzh_leapcnt pairs of four-byte + # values, written in standard byte order; the + # first value of each pair gives the time (as + # returned by time(2)) at which a leap second + # occurs; the second gives the total number of + # leap seconds to be applied after the given time. + # The pairs of values are sorted in ascending order + # by time. + + # Not used, for now (but seek for correct file position) + if leapcnt: + fileobj.seek(leapcnt * 8, os.SEEK_CUR) + + # Then there are tzh_ttisstdcnt standard/wall + # indicators, each stored as a one-byte value; + # they tell whether the transition times associated + # with local time types were specified as standard + # time or wall clock time, and are used when + # a time zone file is used in handling POSIX-style + # time zone environment variables. + + if ttisstdcnt: + isstd = struct.unpack(">%db" % ttisstdcnt, + fileobj.read(ttisstdcnt)) + + # Finally, there are tzh_ttisgmtcnt UTC/local + # indicators, each stored as a one-byte value; + # they tell whether the transition times associated + # with local time types were specified as UTC or + # local time, and are used when a time zone file + # is used in handling POSIX-style time zone envi- + # ronment variables. + + if ttisgmtcnt: + isgmt = struct.unpack(">%db" % ttisgmtcnt, + fileobj.read(ttisgmtcnt)) + + # Build ttinfo list + out.ttinfo_list = [] + for i in range(typecnt): + gmtoff, isdst, abbrind = ttinfo[i] + gmtoff = _get_supported_offset(gmtoff) + tti = _ttinfo() + tti.offset = gmtoff + tti.dstoffset = datetime.timedelta(0) + tti.delta = datetime.timedelta(seconds=gmtoff) + tti.isdst = isdst + tti.abbr = abbr[abbrind:abbr.find('\x00', abbrind)] + tti.isstd = (ttisstdcnt > i and isstd[i] != 0) + tti.isgmt = (ttisgmtcnt > i and isgmt[i] != 0) + out.ttinfo_list.append(tti) + + # Replace ttinfo indexes for ttinfo objects. + out.trans_idx = [out.ttinfo_list[idx] for idx in out.trans_idx] + + # Set standard, dst, and before ttinfos. before will be + # used when a given time is before any transitions, + # and will be set to the first non-dst ttinfo, or to + # the first dst, if all of them are dst. + out.ttinfo_std = None + out.ttinfo_dst = None + out.ttinfo_before = None + if out.ttinfo_list: + if not out.trans_list_utc: + out.ttinfo_std = out.ttinfo_first = out.ttinfo_list[0] + else: + for i in range(timecnt-1, -1, -1): + tti = out.trans_idx[i] + if not out.ttinfo_std and not tti.isdst: + out.ttinfo_std = tti + elif not out.ttinfo_dst and tti.isdst: + out.ttinfo_dst = tti + + if out.ttinfo_std and out.ttinfo_dst: + break + else: + if out.ttinfo_dst and not out.ttinfo_std: + out.ttinfo_std = out.ttinfo_dst + + for tti in out.ttinfo_list: + if not tti.isdst: + out.ttinfo_before = tti + break + else: + out.ttinfo_before = out.ttinfo_list[0] + + # Now fix transition times to become relative to wall time. + # + # I'm not sure about this. In my tests, the tz source file + # is setup to wall time, and in the binary file isstd and + # isgmt are off, so it should be in wall time. OTOH, it's + # always in gmt time. Let me know if you have comments + # about this. + lastdst = None + lastoffset = None + lastdstoffset = None + lastbaseoffset = None + out.trans_list = [] + + for i, tti in enumerate(out.trans_idx): + offset = tti.offset + dstoffset = 0 + + if lastdst is not None: + if tti.isdst: + if not lastdst: + dstoffset = offset - lastoffset + + if not dstoffset and lastdstoffset: + dstoffset = lastdstoffset + + tti.dstoffset = datetime.timedelta(seconds=dstoffset) + lastdstoffset = dstoffset + + # If a time zone changes its base offset during a DST transition, + # then you need to adjust by the previous base offset to get the + # transition time in local time. Otherwise you use the current + # base offset. Ideally, I would have some mathematical proof of + # why this is true, but I haven't really thought about it enough. + baseoffset = offset - dstoffset + adjustment = baseoffset + if (lastbaseoffset is not None and baseoffset != lastbaseoffset + and tti.isdst != lastdst): + # The base DST has changed + adjustment = lastbaseoffset + + lastdst = tti.isdst + lastoffset = offset + lastbaseoffset = baseoffset + + out.trans_list.append(out.trans_list_utc[i] + adjustment) + + out.trans_idx = tuple(out.trans_idx) + out.trans_list = tuple(out.trans_list) + out.trans_list_utc = tuple(out.trans_list_utc) + + return out + + def _find_last_transition(self, dt, in_utc=False): + # If there's no list, there are no transitions to find + if not self._trans_list: + return None + + timestamp = _datetime_to_timestamp(dt) + + # Find where the timestamp fits in the transition list - if the + # timestamp is a transition time, it's part of the "after" period. + trans_list = self._trans_list_utc if in_utc else self._trans_list + idx = bisect.bisect_right(trans_list, timestamp) + + # We want to know when the previous transition was, so subtract off 1 + return idx - 1 + + def _get_ttinfo(self, idx): + # For no list or after the last transition, default to _ttinfo_std + if idx is None or (idx + 1) >= len(self._trans_list): + return self._ttinfo_std + + # If there is a list and the time is before it, return _ttinfo_before + if idx < 0: + return self._ttinfo_before + + return self._trans_idx[idx] + + def _find_ttinfo(self, dt): + idx = self._resolve_ambiguous_time(dt) + + return self._get_ttinfo(idx) + + def fromutc(self, dt): + """ + The ``tzfile`` implementation of :py:func:`datetime.tzinfo.fromutc`. + + :param dt: + A :py:class:`datetime.datetime` object. + + :raises TypeError: + Raised if ``dt`` is not a :py:class:`datetime.datetime` object. + + :raises ValueError: + Raised if this is called with a ``dt`` which does not have this + ``tzinfo`` attached. + + :return: + Returns a :py:class:`datetime.datetime` object representing the + wall time in ``self``'s time zone. + """ + # These isinstance checks are in datetime.tzinfo, so we'll preserve + # them, even if we don't care about duck typing. + if not isinstance(dt, datetime.datetime): + raise TypeError("fromutc() requires a datetime argument") + + if dt.tzinfo is not self: + raise ValueError("dt.tzinfo is not self") + + # First treat UTC as wall time and get the transition we're in. + idx = self._find_last_transition(dt, in_utc=True) + tti = self._get_ttinfo(idx) + + dt_out = dt + datetime.timedelta(seconds=tti.offset) + + fold = self.is_ambiguous(dt_out, idx=idx) + + return enfold(dt_out, fold=int(fold)) + + def is_ambiguous(self, dt, idx=None): + """ + Whether or not the "wall time" of a given datetime is ambiguous in this + zone. + + :param dt: + A :py:class:`datetime.datetime`, naive or time zone aware. + + + :return: + Returns ``True`` if ambiguous, ``False`` otherwise. + + .. versionadded:: 2.6.0 + """ + if idx is None: + idx = self._find_last_transition(dt) + + # Calculate the difference in offsets from current to previous + timestamp = _datetime_to_timestamp(dt) + tti = self._get_ttinfo(idx) + + if idx is None or idx <= 0: + return False + + od = self._get_ttinfo(idx - 1).offset - tti.offset + tt = self._trans_list[idx] # Transition time + + return timestamp < tt + od + + def _resolve_ambiguous_time(self, dt): + idx = self._find_last_transition(dt) + + # If we have no transitions, return the index + _fold = self._fold(dt) + if idx is None or idx == 0: + return idx + + # If it's ambiguous and we're in a fold, shift to a different index. + idx_offset = int(not _fold and self.is_ambiguous(dt, idx)) + + return idx - idx_offset + + def utcoffset(self, dt): + if dt is None: + return None + + if not self._ttinfo_std: + return ZERO + + return self._find_ttinfo(dt).delta + + def dst(self, dt): + if dt is None: + return None + + if not self._ttinfo_dst: + return ZERO + + tti = self._find_ttinfo(dt) + + if not tti.isdst: + return ZERO + + # The documentation says that utcoffset()-dst() must + # be constant for every dt. + return tti.dstoffset + + @tzname_in_python2 + def tzname(self, dt): + if not self._ttinfo_std or dt is None: + return None + return self._find_ttinfo(dt).abbr + + def __eq__(self, other): + if not isinstance(other, tzfile): + return NotImplemented + return (self._trans_list == other._trans_list and + self._trans_idx == other._trans_idx and + self._ttinfo_list == other._ttinfo_list) + + __hash__ = None + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, repr(self._filename)) + + def __reduce__(self): + return self.__reduce_ex__(None) + + def __reduce_ex__(self, protocol): + return (self.__class__, (None, self._filename), self.__dict__) + + +class tzrange(tzrangebase): + """ + The ``tzrange`` object is a time zone specified by a set of offsets and + abbreviations, equivalent to the way the ``TZ`` variable can be specified + in POSIX-like systems, but using Python delta objects to specify DST + start, end and offsets. + + :param stdabbr: + The abbreviation for standard time (e.g. ``'EST'``). + + :param stdoffset: + An integer or :class:`datetime.timedelta` object or equivalent + specifying the base offset from UTC. + + If unspecified, +00:00 is used. + + :param dstabbr: + The abbreviation for DST / "Summer" time (e.g. ``'EDT'``). + + If specified, with no other DST information, DST is assumed to occur + and the default behavior or ``dstoffset``, ``start`` and ``end`` is + used. If unspecified and no other DST information is specified, it + is assumed that this zone has no DST. + + If this is unspecified and other DST information is *is* specified, + DST occurs in the zone but the time zone abbreviation is left + unchanged. + + :param dstoffset: + A an integer or :class:`datetime.timedelta` object or equivalent + specifying the UTC offset during DST. If unspecified and any other DST + information is specified, it is assumed to be the STD offset +1 hour. + + :param start: + A :class:`relativedelta.relativedelta` object or equivalent specifying + the time and time of year that daylight savings time starts. To + specify, for example, that DST starts at 2AM on the 2nd Sunday in + March, pass: + + ``relativedelta(hours=2, month=3, day=1, weekday=SU(+2))`` + + If unspecified and any other DST information is specified, the default + value is 2 AM on the first Sunday in April. + + :param end: + A :class:`relativedelta.relativedelta` object or equivalent + representing the time and time of year that daylight savings time + ends, with the same specification method as in ``start``. One note is + that this should point to the first time in the *standard* zone, so if + a transition occurs at 2AM in the DST zone and the clocks are set back + 1 hour to 1AM, set the ``hours`` parameter to +1. + + + **Examples:** + + .. testsetup:: tzrange + + from dateutil.tz import tzrange, tzstr + + .. doctest:: tzrange + + >>> tzstr('EST5EDT') == tzrange("EST", -18000, "EDT") + True + + >>> from dateutil.relativedelta import * + >>> range1 = tzrange("EST", -18000, "EDT") + >>> range2 = tzrange("EST", -18000, "EDT", -14400, + ... relativedelta(hours=+2, month=4, day=1, + ... weekday=SU(+1)), + ... relativedelta(hours=+1, month=10, day=31, + ... weekday=SU(-1))) + >>> tzstr('EST5EDT') == range1 == range2 + True + + """ + def __init__(self, stdabbr, stdoffset=None, + dstabbr=None, dstoffset=None, + start=None, end=None): + + global relativedelta + from dateutil import relativedelta + + self._std_abbr = stdabbr + self._dst_abbr = dstabbr + + try: + stdoffset = stdoffset.total_seconds() + except (TypeError, AttributeError): + pass + + try: + dstoffset = dstoffset.total_seconds() + except (TypeError, AttributeError): + pass + + if stdoffset is not None: + self._std_offset = datetime.timedelta(seconds=stdoffset) + else: + self._std_offset = ZERO + + if dstoffset is not None: + self._dst_offset = datetime.timedelta(seconds=dstoffset) + elif dstabbr and stdoffset is not None: + self._dst_offset = self._std_offset + datetime.timedelta(hours=+1) + else: + self._dst_offset = ZERO + + if dstabbr and start is None: + self._start_delta = relativedelta.relativedelta( + hours=+2, month=4, day=1, weekday=relativedelta.SU(+1)) + else: + self._start_delta = start + + if dstabbr and end is None: + self._end_delta = relativedelta.relativedelta( + hours=+1, month=10, day=31, weekday=relativedelta.SU(-1)) + else: + self._end_delta = end + + self._dst_base_offset_ = self._dst_offset - self._std_offset + self.hasdst = bool(self._start_delta) + + def transitions(self, year): + """ + For a given year, get the DST on and off transition times, expressed + always on the standard time side. For zones with no transitions, this + function returns ``None``. + + :param year: + The year whose transitions you would like to query. + + :return: + Returns a :class:`tuple` of :class:`datetime.datetime` objects, + ``(dston, dstoff)`` for zones with an annual DST transition, or + ``None`` for fixed offset zones. + """ + if not self.hasdst: + return None + + base_year = datetime.datetime(year, 1, 1) + + start = base_year + self._start_delta + end = base_year + self._end_delta + + return (start, end) + + def __eq__(self, other): + if not isinstance(other, tzrange): + return NotImplemented + + return (self._std_abbr == other._std_abbr and + self._dst_abbr == other._dst_abbr and + self._std_offset == other._std_offset and + self._dst_offset == other._dst_offset and + self._start_delta == other._start_delta and + self._end_delta == other._end_delta) + + @property + def _dst_base_offset(self): + return self._dst_base_offset_ + + +@six.add_metaclass(_TzStrFactory) +class tzstr(tzrange): + """ + ``tzstr`` objects are time zone objects specified by a time-zone string as + it would be passed to a ``TZ`` variable on POSIX-style systems (see + the `GNU C Library: TZ Variable`_ for more details). + + There is one notable exception, which is that POSIX-style time zones use an + inverted offset format, so normally ``GMT+3`` would be parsed as an offset + 3 hours *behind* GMT. The ``tzstr`` time zone object will parse this as an + offset 3 hours *ahead* of GMT. If you would like to maintain the POSIX + behavior, pass a ``True`` value to ``posix_offset``. + + The :class:`tzrange` object provides the same functionality, but is + specified using :class:`relativedelta.relativedelta` objects. rather than + strings. + + :param s: + A time zone string in ``TZ`` variable format. This can be a + :class:`bytes` (2.x: :class:`str`), :class:`str` (2.x: + :class:`unicode`) or a stream emitting unicode characters + (e.g. :class:`StringIO`). + + :param posix_offset: + Optional. If set to ``True``, interpret strings such as ``GMT+3`` or + ``UTC+3`` as being 3 hours *behind* UTC rather than ahead, per the + POSIX standard. + + .. caution:: + + Prior to version 2.7.0, this function also supported time zones + in the format: + + * ``EST5EDT,4,0,6,7200,10,0,26,7200,3600`` + * ``EST5EDT,4,1,0,7200,10,-1,0,7200,3600`` + + This format is non-standard and has been deprecated; this function + will raise a :class:`DeprecatedTZFormatWarning` until + support is removed in a future version. + + .. _`GNU C Library: TZ Variable`: + https://www.gnu.org/software/libc/manual/html_node/TZ-Variable.html + """ + def __init__(self, s, posix_offset=False): + global parser + from dateutil.parser import _parser as parser + + self._s = s + + res = parser._parsetz(s) + if res is None or res.any_unused_tokens: + raise ValueError("unknown string format") + + # Here we break the compatibility with the TZ variable handling. + # GMT-3 actually *means* the timezone -3. + if res.stdabbr in ("GMT", "UTC") and not posix_offset: + res.stdoffset *= -1 + + # We must initialize it first, since _delta() needs + # _std_offset and _dst_offset set. Use False in start/end + # to avoid building it two times. + tzrange.__init__(self, res.stdabbr, res.stdoffset, + res.dstabbr, res.dstoffset, + start=False, end=False) + + if not res.dstabbr: + self._start_delta = None + self._end_delta = None + else: + self._start_delta = self._delta(res.start) + if self._start_delta: + self._end_delta = self._delta(res.end, isend=1) + + self.hasdst = bool(self._start_delta) + + def _delta(self, x, isend=0): + from dateutil import relativedelta + kwargs = {} + if x.month is not None: + kwargs["month"] = x.month + if x.weekday is not None: + kwargs["weekday"] = relativedelta.weekday(x.weekday, x.week) + if x.week > 0: + kwargs["day"] = 1 + else: + kwargs["day"] = 31 + elif x.day: + kwargs["day"] = x.day + elif x.yday is not None: + kwargs["yearday"] = x.yday + elif x.jyday is not None: + kwargs["nlyearday"] = x.jyday + if not kwargs: + # Default is to start on first sunday of april, and end + # on last sunday of october. + if not isend: + kwargs["month"] = 4 + kwargs["day"] = 1 + kwargs["weekday"] = relativedelta.SU(+1) + else: + kwargs["month"] = 10 + kwargs["day"] = 31 + kwargs["weekday"] = relativedelta.SU(-1) + if x.time is not None: + kwargs["seconds"] = x.time + else: + # Default is 2AM. + kwargs["seconds"] = 7200 + if isend: + # Convert to standard time, to follow the documented way + # of working with the extra hour. See the documentation + # of the tzinfo class. + delta = self._dst_offset - self._std_offset + kwargs["seconds"] -= delta.seconds + delta.days * 86400 + return relativedelta.relativedelta(**kwargs) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, repr(self._s)) + + +class _tzicalvtzcomp(object): + def __init__(self, tzoffsetfrom, tzoffsetto, isdst, + tzname=None, rrule=None): + self.tzoffsetfrom = datetime.timedelta(seconds=tzoffsetfrom) + self.tzoffsetto = datetime.timedelta(seconds=tzoffsetto) + self.tzoffsetdiff = self.tzoffsetto - self.tzoffsetfrom + self.isdst = isdst + self.tzname = tzname + self.rrule = rrule + + +class _tzicalvtz(_tzinfo): + def __init__(self, tzid, comps=[]): + super(_tzicalvtz, self).__init__() + + self._tzid = tzid + self._comps = comps + self._cachedate = [] + self._cachecomp = [] + self._cache_lock = _thread.allocate_lock() + + def _find_comp(self, dt): + if len(self._comps) == 1: + return self._comps[0] + + dt = dt.replace(tzinfo=None) + + try: + with self._cache_lock: + return self._cachecomp[self._cachedate.index( + (dt, self._fold(dt)))] + except ValueError: + pass + + lastcompdt = None + lastcomp = None + + for comp in self._comps: + compdt = self._find_compdt(comp, dt) + + if compdt and (not lastcompdt or lastcompdt < compdt): + lastcompdt = compdt + lastcomp = comp + + if not lastcomp: + # RFC says nothing about what to do when a given + # time is before the first onset date. We'll look for the + # first standard component, or the first component, if + # none is found. + for comp in self._comps: + if not comp.isdst: + lastcomp = comp + break + else: + lastcomp = comp[0] + + with self._cache_lock: + self._cachedate.insert(0, (dt, self._fold(dt))) + self._cachecomp.insert(0, lastcomp) + + if len(self._cachedate) > 10: + self._cachedate.pop() + self._cachecomp.pop() + + return lastcomp + + def _find_compdt(self, comp, dt): + if comp.tzoffsetdiff < ZERO and self._fold(dt): + dt -= comp.tzoffsetdiff + + compdt = comp.rrule.before(dt, inc=True) + + return compdt + + def utcoffset(self, dt): + if dt is None: + return None + + return self._find_comp(dt).tzoffsetto + + def dst(self, dt): + comp = self._find_comp(dt) + if comp.isdst: + return comp.tzoffsetdiff + else: + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + return self._find_comp(dt).tzname + + def __repr__(self): + return "" % repr(self._tzid) + + __reduce__ = object.__reduce__ + + +class tzical(object): + """ + This object is designed to parse an iCalendar-style ``VTIMEZONE`` structure + as set out in `RFC 5545`_ Section 4.6.5 into one or more `tzinfo` objects. + + :param `fileobj`: + A file or stream in iCalendar format, which should be UTF-8 encoded + with CRLF endings. + + .. _`RFC 5545`: https://tools.ietf.org/html/rfc5545 + """ + def __init__(self, fileobj): + global rrule + from dateutil import rrule + + if isinstance(fileobj, string_types): + self._s = fileobj + # ical should be encoded in UTF-8 with CRLF + fileobj = open(fileobj, 'r') + else: + self._s = getattr(fileobj, 'name', repr(fileobj)) + fileobj = _nullcontext(fileobj) + + self._vtz = {} + + with fileobj as fobj: + self._parse_rfc(fobj.read()) + + def keys(self): + """ + Retrieves the available time zones as a list. + """ + return list(self._vtz.keys()) + + def get(self, tzid=None): + """ + Retrieve a :py:class:`datetime.tzinfo` object by its ``tzid``. + + :param tzid: + If there is exactly one time zone available, omitting ``tzid`` + or passing :py:const:`None` value returns it. Otherwise a valid + key (which can be retrieved from :func:`keys`) is required. + + :raises ValueError: + Raised if ``tzid`` is not specified but there are either more + or fewer than 1 zone defined. + + :returns: + Returns either a :py:class:`datetime.tzinfo` object representing + the relevant time zone or :py:const:`None` if the ``tzid`` was + not found. + """ + if tzid is None: + if len(self._vtz) == 0: + raise ValueError("no timezones defined") + elif len(self._vtz) > 1: + raise ValueError("more than one timezone available") + tzid = next(iter(self._vtz)) + + return self._vtz.get(tzid) + + def _parse_offset(self, s): + s = s.strip() + if not s: + raise ValueError("empty offset") + if s[0] in ('+', '-'): + signal = (-1, +1)[s[0] == '+'] + s = s[1:] + else: + signal = +1 + if len(s) == 4: + return (int(s[:2]) * 3600 + int(s[2:]) * 60) * signal + elif len(s) == 6: + return (int(s[:2]) * 3600 + int(s[2:4]) * 60 + int(s[4:])) * signal + else: + raise ValueError("invalid offset: " + s) + + def _parse_rfc(self, s): + lines = s.splitlines() + if not lines: + raise ValueError("empty string") + + # Unfold + i = 0 + while i < len(lines): + line = lines[i].rstrip() + if not line: + del lines[i] + elif i > 0 and line[0] == " ": + lines[i-1] += line[1:] + del lines[i] + else: + i += 1 + + tzid = None + comps = [] + invtz = False + comptype = None + for line in lines: + if not line: + continue + name, value = line.split(':', 1) + parms = name.split(';') + if not parms: + raise ValueError("empty property name") + name = parms[0].upper() + parms = parms[1:] + if invtz: + if name == "BEGIN": + if value in ("STANDARD", "DAYLIGHT"): + # Process component + pass + else: + raise ValueError("unknown component: "+value) + comptype = value + founddtstart = False + tzoffsetfrom = None + tzoffsetto = None + rrulelines = [] + tzname = None + elif name == "END": + if value == "VTIMEZONE": + if comptype: + raise ValueError("component not closed: "+comptype) + if not tzid: + raise ValueError("mandatory TZID not found") + if not comps: + raise ValueError( + "at least one component is needed") + # Process vtimezone + self._vtz[tzid] = _tzicalvtz(tzid, comps) + invtz = False + elif value == comptype: + if not founddtstart: + raise ValueError("mandatory DTSTART not found") + if tzoffsetfrom is None: + raise ValueError( + "mandatory TZOFFSETFROM not found") + if tzoffsetto is None: + raise ValueError( + "mandatory TZOFFSETFROM not found") + # Process component + rr = None + if rrulelines: + rr = rrule.rrulestr("\n".join(rrulelines), + compatible=True, + ignoretz=True, + cache=True) + comp = _tzicalvtzcomp(tzoffsetfrom, tzoffsetto, + (comptype == "DAYLIGHT"), + tzname, rr) + comps.append(comp) + comptype = None + else: + raise ValueError("invalid component end: "+value) + elif comptype: + if name == "DTSTART": + # DTSTART in VTIMEZONE takes a subset of valid RRULE + # values under RFC 5545. + for parm in parms: + if parm != 'VALUE=DATE-TIME': + msg = ('Unsupported DTSTART param in ' + + 'VTIMEZONE: ' + parm) + raise ValueError(msg) + rrulelines.append(line) + founddtstart = True + elif name in ("RRULE", "RDATE", "EXRULE", "EXDATE"): + rrulelines.append(line) + elif name == "TZOFFSETFROM": + if parms: + raise ValueError( + "unsupported %s parm: %s " % (name, parms[0])) + tzoffsetfrom = self._parse_offset(value) + elif name == "TZOFFSETTO": + if parms: + raise ValueError( + "unsupported TZOFFSETTO parm: "+parms[0]) + tzoffsetto = self._parse_offset(value) + elif name == "TZNAME": + if parms: + raise ValueError( + "unsupported TZNAME parm: "+parms[0]) + tzname = value + elif name == "COMMENT": + pass + else: + raise ValueError("unsupported property: "+name) + else: + if name == "TZID": + if parms: + raise ValueError( + "unsupported TZID parm: "+parms[0]) + tzid = value + elif name in ("TZURL", "LAST-MODIFIED", "COMMENT"): + pass + else: + raise ValueError("unsupported property: "+name) + elif name == "BEGIN" and value == "VTIMEZONE": + tzid = None + comps = [] + invtz = True + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, repr(self._s)) + + +if sys.platform != "win32": + TZFILES = ["/etc/localtime", "localtime"] + TZPATHS = ["/usr/share/zoneinfo", + "/usr/lib/zoneinfo", + "/usr/share/lib/zoneinfo", + "/etc/zoneinfo"] +else: + TZFILES = [] + TZPATHS = [] + + +def __get_gettz(): + tzlocal_classes = (tzlocal,) + if tzwinlocal is not None: + tzlocal_classes += (tzwinlocal,) + + class GettzFunc(object): + """ + Retrieve a time zone object from a string representation + + This function is intended to retrieve the :py:class:`tzinfo` subclass + that best represents the time zone that would be used if a POSIX + `TZ variable`_ were set to the same value. + + If no argument or an empty string is passed to ``gettz``, local time + is returned: + + .. code-block:: python3 + + >>> gettz() + tzfile('/etc/localtime') + + This function is also the preferred way to map IANA tz database keys + to :class:`tzfile` objects: + + .. code-block:: python3 + + >>> gettz('Pacific/Kiritimati') + tzfile('/usr/share/zoneinfo/Pacific/Kiritimati') + + On Windows, the standard is extended to include the Windows-specific + zone names provided by the operating system: + + .. code-block:: python3 + + >>> gettz('Egypt Standard Time') + tzwin('Egypt Standard Time') + + Passing a GNU ``TZ`` style string time zone specification returns a + :class:`tzstr` object: + + .. code-block:: python3 + + >>> gettz('AEST-10AEDT-11,M10.1.0/2,M4.1.0/3') + tzstr('AEST-10AEDT-11,M10.1.0/2,M4.1.0/3') + + :param name: + A time zone name (IANA, or, on Windows, Windows keys), location of + a ``tzfile(5)`` zoneinfo file or ``TZ`` variable style time zone + specifier. An empty string, no argument or ``None`` is interpreted + as local time. + + :return: + Returns an instance of one of ``dateutil``'s :py:class:`tzinfo` + subclasses. + + .. versionchanged:: 2.7.0 + + After version 2.7.0, any two calls to ``gettz`` using the same + input strings will return the same object: + + .. code-block:: python3 + + >>> tz.gettz('America/Chicago') is tz.gettz('America/Chicago') + True + + In addition to improving performance, this ensures that + `"same zone" semantics`_ are used for datetimes in the same zone. + + + .. _`TZ variable`: + https://www.gnu.org/software/libc/manual/html_node/TZ-Variable.html + + .. _`"same zone" semantics`: + https://blog.ganssle.io/articles/2018/02/aware-datetime-arithmetic.html + """ + def __init__(self): + + self.__instances = weakref.WeakValueDictionary() + self.__strong_cache_size = 8 + self.__strong_cache = OrderedDict() + self._cache_lock = _thread.allocate_lock() + + def __call__(self, name=None): + with self._cache_lock: + rv = self.__instances.get(name, None) + + if rv is None: + rv = self.nocache(name=name) + if not (name is None + or isinstance(rv, tzlocal_classes) + or rv is None): + # tzlocal is slightly more complicated than the other + # time zone providers because it depends on environment + # at construction time, so don't cache that. + # + # We also cannot store weak references to None, so we + # will also not store that. + self.__instances[name] = rv + else: + # No need for strong caching, return immediately + return rv + + self.__strong_cache[name] = self.__strong_cache.pop(name, rv) + + if len(self.__strong_cache) > self.__strong_cache_size: + self.__strong_cache.popitem(last=False) + + return rv + + def set_cache_size(self, size): + with self._cache_lock: + self.__strong_cache_size = size + while len(self.__strong_cache) > size: + self.__strong_cache.popitem(last=False) + + def cache_clear(self): + with self._cache_lock: + self.__instances = weakref.WeakValueDictionary() + self.__strong_cache.clear() + + @staticmethod + def nocache(name=None): + """A non-cached version of gettz""" + tz = None + if not name: + try: + name = os.environ["TZ"] + except KeyError: + pass + if name is None or name in ("", ":"): + for filepath in TZFILES: + if not os.path.isabs(filepath): + filename = filepath + for path in TZPATHS: + filepath = os.path.join(path, filename) + if os.path.isfile(filepath): + break + else: + continue + if os.path.isfile(filepath): + try: + tz = tzfile(filepath) + break + except (IOError, OSError, ValueError): + pass + else: + tz = tzlocal() + else: + try: + if name.startswith(":"): + name = name[1:] + except TypeError as e: + if isinstance(name, bytes): + new_msg = "gettz argument should be str, not bytes" + six.raise_from(TypeError(new_msg), e) + else: + raise + if os.path.isabs(name): + if os.path.isfile(name): + tz = tzfile(name) + else: + tz = None + else: + for path in TZPATHS: + filepath = os.path.join(path, name) + if not os.path.isfile(filepath): + filepath = filepath.replace(' ', '_') + if not os.path.isfile(filepath): + continue + try: + tz = tzfile(filepath) + break + except (IOError, OSError, ValueError): + pass + else: + tz = None + if tzwin is not None: + try: + tz = tzwin(name) + except (WindowsError, UnicodeEncodeError): + # UnicodeEncodeError is for Python 2.7 compat + tz = None + + if not tz: + from dateutil.zoneinfo import get_zonefile_instance + tz = get_zonefile_instance().get(name) + + if not tz: + for c in name: + # name is not a tzstr unless it has at least + # one offset. For short values of "name", an + # explicit for loop seems to be the fastest way + # To determine if a string contains a digit + if c in "0123456789": + try: + tz = tzstr(name) + except ValueError: + pass + break + else: + if name in ("GMT", "UTC"): + tz = UTC + elif name in time.tzname: + tz = tzlocal() + return tz + + return GettzFunc() + + +gettz = __get_gettz() +del __get_gettz + + +def datetime_exists(dt, tz=None): + """ + Given a datetime and a time zone, determine whether or not a given datetime + would fall in a gap. + + :param dt: + A :class:`datetime.datetime` (whose time zone will be ignored if ``tz`` + is provided.) + + :param tz: + A :class:`datetime.tzinfo` with support for the ``fold`` attribute. If + ``None`` or not provided, the datetime's own time zone will be used. + + :return: + Returns a boolean value whether or not the "wall time" exists in + ``tz``. + + .. versionadded:: 2.7.0 + """ + if tz is None: + if dt.tzinfo is None: + raise ValueError('Datetime is naive and no time zone provided.') + tz = dt.tzinfo + + dt = dt.replace(tzinfo=None) + + # This is essentially a test of whether or not the datetime can survive + # a round trip to UTC. + dt_rt = dt.replace(tzinfo=tz).astimezone(UTC).astimezone(tz) + dt_rt = dt_rt.replace(tzinfo=None) + + return dt == dt_rt + + +def datetime_ambiguous(dt, tz=None): + """ + Given a datetime and a time zone, determine whether or not a given datetime + is ambiguous (i.e if there are two times differentiated only by their DST + status). + + :param dt: + A :class:`datetime.datetime` (whose time zone will be ignored if ``tz`` + is provided.) + + :param tz: + A :class:`datetime.tzinfo` with support for the ``fold`` attribute. If + ``None`` or not provided, the datetime's own time zone will be used. + + :return: + Returns a boolean value whether or not the "wall time" is ambiguous in + ``tz``. + + .. versionadded:: 2.6.0 + """ + if tz is None: + if dt.tzinfo is None: + raise ValueError('Datetime is naive and no time zone provided.') + + tz = dt.tzinfo + + # If a time zone defines its own "is_ambiguous" function, we'll use that. + is_ambiguous_fn = getattr(tz, 'is_ambiguous', None) + if is_ambiguous_fn is not None: + try: + return tz.is_ambiguous(dt) + except Exception: + pass + + # If it doesn't come out and tell us it's ambiguous, we'll just check if + # the fold attribute has any effect on this particular date and time. + dt = dt.replace(tzinfo=tz) + wall_0 = enfold(dt, fold=0) + wall_1 = enfold(dt, fold=1) + + same_offset = wall_0.utcoffset() == wall_1.utcoffset() + same_dst = wall_0.dst() == wall_1.dst() + + return not (same_offset and same_dst) + + +def resolve_imaginary(dt): + """ + Given a datetime that may be imaginary, return an existing datetime. + + This function assumes that an imaginary datetime represents what the + wall time would be in a zone had the offset transition not occurred, so + it will always fall forward by the transition's change in offset. + + .. doctest:: + + >>> from dateutil import tz + >>> from datetime import datetime + >>> NYC = tz.gettz('America/New_York') + >>> print(tz.resolve_imaginary(datetime(2017, 3, 12, 2, 30, tzinfo=NYC))) + 2017-03-12 03:30:00-04:00 + + >>> KIR = tz.gettz('Pacific/Kiritimati') + >>> print(tz.resolve_imaginary(datetime(1995, 1, 1, 12, 30, tzinfo=KIR))) + 1995-01-02 12:30:00+14:00 + + As a note, :func:`datetime.astimezone` is guaranteed to produce a valid, + existing datetime, so a round-trip to and from UTC is sufficient to get + an extant datetime, however, this generally "falls back" to an earlier time + rather than falling forward to the STD side (though no guarantees are made + about this behavior). + + :param dt: + A :class:`datetime.datetime` which may or may not exist. + + :return: + Returns an existing :class:`datetime.datetime`. If ``dt`` was not + imaginary, the datetime returned is guaranteed to be the same object + passed to the function. + + .. versionadded:: 2.7.0 + """ + if dt.tzinfo is not None and not datetime_exists(dt): + + curr_offset = (dt + datetime.timedelta(hours=24)).utcoffset() + old_offset = (dt - datetime.timedelta(hours=24)).utcoffset() + + dt += curr_offset - old_offset + + return dt + + +def _datetime_to_timestamp(dt): + """ + Convert a :class:`datetime.datetime` object to an epoch timestamp in + seconds since January 1, 1970, ignoring the time zone. + """ + return (dt.replace(tzinfo=None) - EPOCH).total_seconds() + + +if sys.version_info >= (3, 6): + def _get_supported_offset(second_offset): + return second_offset +else: + def _get_supported_offset(second_offset): + # For python pre-3.6, round to full-minutes if that's not the case. + # Python's datetime doesn't accept sub-minute timezones. Check + # http://python.org/sf/1447945 or https://bugs.python.org/issue5288 + # for some information. + old_offset = second_offset + calculated_offset = 60 * ((second_offset + 30) // 60) + return calculated_offset + + +try: + # Python 3.7 feature + from contextlib import nullcontext as _nullcontext +except ImportError: + class _nullcontext(object): + """ + Class for wrapping contexts so that they are passed through in a + with statement. + """ + def __init__(self, context): + self.context = context + + def __enter__(self): + return self.context + + def __exit__(*args, **kwargs): + pass + +# vim:ts=4:sw=4:et diff --git a/.local/lib/python3.8/site-packages/dateutil/tz/win.py b/.local/lib/python3.8/site-packages/dateutil/tz/win.py new file mode 100644 index 0000000..cde07ba --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/tz/win.py @@ -0,0 +1,370 @@ +# -*- coding: utf-8 -*- +""" +This module provides an interface to the native time zone data on Windows, +including :py:class:`datetime.tzinfo` implementations. + +Attempting to import this module on a non-Windows platform will raise an +:py:obj:`ImportError`. +""" +# This code was originally contributed by Jeffrey Harris. +import datetime +import struct + +from six.moves import winreg +from six import text_type + +try: + import ctypes + from ctypes import wintypes +except ValueError: + # ValueError is raised on non-Windows systems for some horrible reason. + raise ImportError("Running tzwin on non-Windows system") + +from ._common import tzrangebase + +__all__ = ["tzwin", "tzwinlocal", "tzres"] + +ONEWEEK = datetime.timedelta(7) + +TZKEYNAMENT = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones" +TZKEYNAME9X = r"SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones" +TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation" + + +def _settzkeyname(): + handle = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) + try: + winreg.OpenKey(handle, TZKEYNAMENT).Close() + TZKEYNAME = TZKEYNAMENT + except WindowsError: + TZKEYNAME = TZKEYNAME9X + handle.Close() + return TZKEYNAME + + +TZKEYNAME = _settzkeyname() + + +class tzres(object): + """ + Class for accessing ``tzres.dll``, which contains timezone name related + resources. + + .. versionadded:: 2.5.0 + """ + p_wchar = ctypes.POINTER(wintypes.WCHAR) # Pointer to a wide char + + def __init__(self, tzres_loc='tzres.dll'): + # Load the user32 DLL so we can load strings from tzres + user32 = ctypes.WinDLL('user32') + + # Specify the LoadStringW function + user32.LoadStringW.argtypes = (wintypes.HINSTANCE, + wintypes.UINT, + wintypes.LPWSTR, + ctypes.c_int) + + self.LoadStringW = user32.LoadStringW + self._tzres = ctypes.WinDLL(tzres_loc) + self.tzres_loc = tzres_loc + + def load_name(self, offset): + """ + Load a timezone name from a DLL offset (integer). + + >>> from dateutil.tzwin import tzres + >>> tzr = tzres() + >>> print(tzr.load_name(112)) + 'Eastern Standard Time' + + :param offset: + A positive integer value referring to a string from the tzres dll. + + .. note:: + + Offsets found in the registry are generally of the form + ``@tzres.dll,-114``. The offset in this case is 114, not -114. + + """ + resource = self.p_wchar() + lpBuffer = ctypes.cast(ctypes.byref(resource), wintypes.LPWSTR) + nchar = self.LoadStringW(self._tzres._handle, offset, lpBuffer, 0) + return resource[:nchar] + + def name_from_string(self, tzname_str): + """ + Parse strings as returned from the Windows registry into the time zone + name as defined in the registry. + + >>> from dateutil.tzwin import tzres + >>> tzr = tzres() + >>> print(tzr.name_from_string('@tzres.dll,-251')) + 'Dateline Daylight Time' + >>> print(tzr.name_from_string('Eastern Standard Time')) + 'Eastern Standard Time' + + :param tzname_str: + A timezone name string as returned from a Windows registry key. + + :return: + Returns the localized timezone string from tzres.dll if the string + is of the form `@tzres.dll,-offset`, else returns the input string. + """ + if not tzname_str.startswith('@'): + return tzname_str + + name_splt = tzname_str.split(',-') + try: + offset = int(name_splt[1]) + except: + raise ValueError("Malformed timezone string.") + + return self.load_name(offset) + + +class tzwinbase(tzrangebase): + """tzinfo class based on win32's timezones available in the registry.""" + def __init__(self): + raise NotImplementedError('tzwinbase is an abstract base class') + + def __eq__(self, other): + # Compare on all relevant dimensions, including name. + if not isinstance(other, tzwinbase): + return NotImplemented + + return (self._std_offset == other._std_offset and + self._dst_offset == other._dst_offset and + self._stddayofweek == other._stddayofweek and + self._dstdayofweek == other._dstdayofweek and + self._stdweeknumber == other._stdweeknumber and + self._dstweeknumber == other._dstweeknumber and + self._stdhour == other._stdhour and + self._dsthour == other._dsthour and + self._stdminute == other._stdminute and + self._dstminute == other._dstminute and + self._std_abbr == other._std_abbr and + self._dst_abbr == other._dst_abbr) + + @staticmethod + def list(): + """Return a list of all time zones known to the system.""" + with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle: + with winreg.OpenKey(handle, TZKEYNAME) as tzkey: + result = [winreg.EnumKey(tzkey, i) + for i in range(winreg.QueryInfoKey(tzkey)[0])] + return result + + def display(self): + """ + Return the display name of the time zone. + """ + return self._display + + def transitions(self, year): + """ + For a given year, get the DST on and off transition times, expressed + always on the standard time side. For zones with no transitions, this + function returns ``None``. + + :param year: + The year whose transitions you would like to query. + + :return: + Returns a :class:`tuple` of :class:`datetime.datetime` objects, + ``(dston, dstoff)`` for zones with an annual DST transition, or + ``None`` for fixed offset zones. + """ + + if not self.hasdst: + return None + + dston = picknthweekday(year, self._dstmonth, self._dstdayofweek, + self._dsthour, self._dstminute, + self._dstweeknumber) + + dstoff = picknthweekday(year, self._stdmonth, self._stddayofweek, + self._stdhour, self._stdminute, + self._stdweeknumber) + + # Ambiguous dates default to the STD side + dstoff -= self._dst_base_offset + + return dston, dstoff + + def _get_hasdst(self): + return self._dstmonth != 0 + + @property + def _dst_base_offset(self): + return self._dst_base_offset_ + + +class tzwin(tzwinbase): + """ + Time zone object created from the zone info in the Windows registry + + These are similar to :py:class:`dateutil.tz.tzrange` objects in that + the time zone data is provided in the format of a single offset rule + for either 0 or 2 time zone transitions per year. + + :param: name + The name of a Windows time zone key, e.g. "Eastern Standard Time". + The full list of keys can be retrieved with :func:`tzwin.list`. + """ + + def __init__(self, name): + self._name = name + + with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle: + tzkeyname = text_type("{kn}\\{name}").format(kn=TZKEYNAME, name=name) + with winreg.OpenKey(handle, tzkeyname) as tzkey: + keydict = valuestodict(tzkey) + + self._std_abbr = keydict["Std"] + self._dst_abbr = keydict["Dlt"] + + self._display = keydict["Display"] + + # See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm + tup = struct.unpack("=3l16h", keydict["TZI"]) + stdoffset = -tup[0]-tup[1] # Bias + StandardBias * -1 + dstoffset = stdoffset-tup[2] # + DaylightBias * -1 + self._std_offset = datetime.timedelta(minutes=stdoffset) + self._dst_offset = datetime.timedelta(minutes=dstoffset) + + # for the meaning see the win32 TIME_ZONE_INFORMATION structure docs + # http://msdn.microsoft.com/en-us/library/windows/desktop/ms725481(v=vs.85).aspx + (self._stdmonth, + self._stddayofweek, # Sunday = 0 + self._stdweeknumber, # Last = 5 + self._stdhour, + self._stdminute) = tup[4:9] + + (self._dstmonth, + self._dstdayofweek, # Sunday = 0 + self._dstweeknumber, # Last = 5 + self._dsthour, + self._dstminute) = tup[12:17] + + self._dst_base_offset_ = self._dst_offset - self._std_offset + self.hasdst = self._get_hasdst() + + def __repr__(self): + return "tzwin(%s)" % repr(self._name) + + def __reduce__(self): + return (self.__class__, (self._name,)) + + +class tzwinlocal(tzwinbase): + """ + Class representing the local time zone information in the Windows registry + + While :class:`dateutil.tz.tzlocal` makes system calls (via the :mod:`time` + module) to retrieve time zone information, ``tzwinlocal`` retrieves the + rules directly from the Windows registry and creates an object like + :class:`dateutil.tz.tzwin`. + + Because Windows does not have an equivalent of :func:`time.tzset`, on + Windows, :class:`dateutil.tz.tzlocal` instances will always reflect the + time zone settings *at the time that the process was started*, meaning + changes to the machine's time zone settings during the run of a program + on Windows will **not** be reflected by :class:`dateutil.tz.tzlocal`. + Because ``tzwinlocal`` reads the registry directly, it is unaffected by + this issue. + """ + def __init__(self): + with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle: + with winreg.OpenKey(handle, TZLOCALKEYNAME) as tzlocalkey: + keydict = valuestodict(tzlocalkey) + + self._std_abbr = keydict["StandardName"] + self._dst_abbr = keydict["DaylightName"] + + try: + tzkeyname = text_type('{kn}\\{sn}').format(kn=TZKEYNAME, + sn=self._std_abbr) + with winreg.OpenKey(handle, tzkeyname) as tzkey: + _keydict = valuestodict(tzkey) + self._display = _keydict["Display"] + except OSError: + self._display = None + + stdoffset = -keydict["Bias"]-keydict["StandardBias"] + dstoffset = stdoffset-keydict["DaylightBias"] + + self._std_offset = datetime.timedelta(minutes=stdoffset) + self._dst_offset = datetime.timedelta(minutes=dstoffset) + + # For reasons unclear, in this particular key, the day of week has been + # moved to the END of the SYSTEMTIME structure. + tup = struct.unpack("=8h", keydict["StandardStart"]) + + (self._stdmonth, + self._stdweeknumber, # Last = 5 + self._stdhour, + self._stdminute) = tup[1:5] + + self._stddayofweek = tup[7] + + tup = struct.unpack("=8h", keydict["DaylightStart"]) + + (self._dstmonth, + self._dstweeknumber, # Last = 5 + self._dsthour, + self._dstminute) = tup[1:5] + + self._dstdayofweek = tup[7] + + self._dst_base_offset_ = self._dst_offset - self._std_offset + self.hasdst = self._get_hasdst() + + def __repr__(self): + return "tzwinlocal()" + + def __str__(self): + # str will return the standard name, not the daylight name. + return "tzwinlocal(%s)" % repr(self._std_abbr) + + def __reduce__(self): + return (self.__class__, ()) + + +def picknthweekday(year, month, dayofweek, hour, minute, whichweek): + """ dayofweek == 0 means Sunday, whichweek 5 means last instance """ + first = datetime.datetime(year, month, 1, hour, minute) + + # This will work if dayofweek is ISO weekday (1-7) or Microsoft-style (0-6), + # Because 7 % 7 = 0 + weekdayone = first.replace(day=((dayofweek - first.isoweekday()) % 7) + 1) + wd = weekdayone + ((whichweek - 1) * ONEWEEK) + if (wd.month != month): + wd -= ONEWEEK + + return wd + + +def valuestodict(key): + """Convert a registry key's values to a dictionary.""" + dout = {} + size = winreg.QueryInfoKey(key)[1] + tz_res = None + + for i in range(size): + key_name, value, dtype = winreg.EnumValue(key, i) + if dtype == winreg.REG_DWORD or dtype == winreg.REG_DWORD_LITTLE_ENDIAN: + # If it's a DWORD (32-bit integer), it's stored as unsigned - convert + # that to a proper signed integer + if value & (1 << 31): + value = value - (1 << 32) + elif dtype == winreg.REG_SZ: + # If it's a reference to the tzres DLL, load the actual string + if value.startswith('@tzres'): + tz_res = tz_res or tzres() + value = tz_res.name_from_string(value) + + value = value.rstrip('\x00') # Remove trailing nulls + + dout[key_name] = value + + return dout diff --git a/.local/lib/python3.8/site-packages/dateutil/tzwin.py b/.local/lib/python3.8/site-packages/dateutil/tzwin.py new file mode 100644 index 0000000..cebc673 --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/tzwin.py @@ -0,0 +1,2 @@ +# tzwin has moved to dateutil.tz.win +from .tz.win import * diff --git a/.local/lib/python3.8/site-packages/dateutil/utils.py b/.local/lib/python3.8/site-packages/dateutil/utils.py new file mode 100644 index 0000000..dd2d245 --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/utils.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +""" +This module offers general convenience and utility functions for dealing with +datetimes. + +.. versionadded:: 2.7.0 +""" +from __future__ import unicode_literals + +from datetime import datetime, time + + +def today(tzinfo=None): + """ + Returns a :py:class:`datetime` representing the current day at midnight + + :param tzinfo: + The time zone to attach (also used to determine the current day). + + :return: + A :py:class:`datetime.datetime` object representing the current day + at midnight. + """ + + dt = datetime.now(tzinfo) + return datetime.combine(dt.date(), time(0, tzinfo=tzinfo)) + + +def default_tzinfo(dt, tzinfo): + """ + Sets the ``tzinfo`` parameter on naive datetimes only + + This is useful for example when you are provided a datetime that may have + either an implicit or explicit time zone, such as when parsing a time zone + string. + + .. doctest:: + + >>> from dateutil.tz import tzoffset + >>> from dateutil.parser import parse + >>> from dateutil.utils import default_tzinfo + >>> dflt_tz = tzoffset("EST", -18000) + >>> print(default_tzinfo(parse('2014-01-01 12:30 UTC'), dflt_tz)) + 2014-01-01 12:30:00+00:00 + >>> print(default_tzinfo(parse('2014-01-01 12:30'), dflt_tz)) + 2014-01-01 12:30:00-05:00 + + :param dt: + The datetime on which to replace the time zone + + :param tzinfo: + The :py:class:`datetime.tzinfo` subclass instance to assign to + ``dt`` if (and only if) it is naive. + + :return: + Returns an aware :py:class:`datetime.datetime`. + """ + if dt.tzinfo is not None: + return dt + else: + return dt.replace(tzinfo=tzinfo) + + +def within_delta(dt1, dt2, delta): + """ + Useful for comparing two datetimes that may have a negligible difference + to be considered equal. + """ + delta = abs(delta) + difference = dt1 - dt2 + return -delta <= difference <= delta diff --git a/.local/lib/python3.8/site-packages/dateutil/zoneinfo/__init__.py b/.local/lib/python3.8/site-packages/dateutil/zoneinfo/__init__.py new file mode 100644 index 0000000..34f11ad --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/zoneinfo/__init__.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8 -*- +import warnings +import json + +from tarfile import TarFile +from pkgutil import get_data +from io import BytesIO + +from dateutil.tz import tzfile as _tzfile + +__all__ = ["get_zonefile_instance", "gettz", "gettz_db_metadata"] + +ZONEFILENAME = "dateutil-zoneinfo.tar.gz" +METADATA_FN = 'METADATA' + + +class tzfile(_tzfile): + def __reduce__(self): + return (gettz, (self._filename,)) + + +def getzoneinfofile_stream(): + try: + return BytesIO(get_data(__name__, ZONEFILENAME)) + except IOError as e: # TODO switch to FileNotFoundError? + warnings.warn("I/O error({0}): {1}".format(e.errno, e.strerror)) + return None + + +class ZoneInfoFile(object): + def __init__(self, zonefile_stream=None): + if zonefile_stream is not None: + with TarFile.open(fileobj=zonefile_stream) as tf: + self.zones = {zf.name: tzfile(tf.extractfile(zf), filename=zf.name) + for zf in tf.getmembers() + if zf.isfile() and zf.name != METADATA_FN} + # deal with links: They'll point to their parent object. Less + # waste of memory + links = {zl.name: self.zones[zl.linkname] + for zl in tf.getmembers() if + zl.islnk() or zl.issym()} + self.zones.update(links) + try: + metadata_json = tf.extractfile(tf.getmember(METADATA_FN)) + metadata_str = metadata_json.read().decode('UTF-8') + self.metadata = json.loads(metadata_str) + except KeyError: + # no metadata in tar file + self.metadata = None + else: + self.zones = {} + self.metadata = None + + def get(self, name, default=None): + """ + Wrapper for :func:`ZoneInfoFile.zones.get`. This is a convenience method + for retrieving zones from the zone dictionary. + + :param name: + The name of the zone to retrieve. (Generally IANA zone names) + + :param default: + The value to return in the event of a missing key. + + .. versionadded:: 2.6.0 + + """ + return self.zones.get(name, default) + + +# The current API has gettz as a module function, although in fact it taps into +# a stateful class. So as a workaround for now, without changing the API, we +# will create a new "global" class instance the first time a user requests a +# timezone. Ugly, but adheres to the api. +# +# TODO: Remove after deprecation period. +_CLASS_ZONE_INSTANCE = [] + + +def get_zonefile_instance(new_instance=False): + """ + This is a convenience function which provides a :class:`ZoneInfoFile` + instance using the data provided by the ``dateutil`` package. By default, it + caches a single instance of the ZoneInfoFile object and returns that. + + :param new_instance: + If ``True``, a new instance of :class:`ZoneInfoFile` is instantiated and + used as the cached instance for the next call. Otherwise, new instances + are created only as necessary. + + :return: + Returns a :class:`ZoneInfoFile` object. + + .. versionadded:: 2.6 + """ + if new_instance: + zif = None + else: + zif = getattr(get_zonefile_instance, '_cached_instance', None) + + if zif is None: + zif = ZoneInfoFile(getzoneinfofile_stream()) + + get_zonefile_instance._cached_instance = zif + + return zif + + +def gettz(name): + """ + This retrieves a time zone from the local zoneinfo tarball that is packaged + with dateutil. + + :param name: + An IANA-style time zone name, as found in the zoneinfo file. + + :return: + Returns a :class:`dateutil.tz.tzfile` time zone object. + + .. warning:: + It is generally inadvisable to use this function, and it is only + provided for API compatibility with earlier versions. This is *not* + equivalent to ``dateutil.tz.gettz()``, which selects an appropriate + time zone based on the inputs, favoring system zoneinfo. This is ONLY + for accessing the dateutil-specific zoneinfo (which may be out of + date compared to the system zoneinfo). + + .. deprecated:: 2.6 + If you need to use a specific zoneinfofile over the system zoneinfo, + instantiate a :class:`dateutil.zoneinfo.ZoneInfoFile` object and call + :func:`dateutil.zoneinfo.ZoneInfoFile.get(name)` instead. + + Use :func:`get_zonefile_instance` to retrieve an instance of the + dateutil-provided zoneinfo. + """ + warnings.warn("zoneinfo.gettz() will be removed in future versions, " + "to use the dateutil-provided zoneinfo files, instantiate a " + "ZoneInfoFile object and use ZoneInfoFile.zones.get() " + "instead. See the documentation for details.", + DeprecationWarning) + + if len(_CLASS_ZONE_INSTANCE) == 0: + _CLASS_ZONE_INSTANCE.append(ZoneInfoFile(getzoneinfofile_stream())) + return _CLASS_ZONE_INSTANCE[0].zones.get(name) + + +def gettz_db_metadata(): + """ Get the zonefile metadata + + See `zonefile_metadata`_ + + :returns: + A dictionary with the database metadata + + .. deprecated:: 2.6 + See deprecation warning in :func:`zoneinfo.gettz`. To get metadata, + query the attribute ``zoneinfo.ZoneInfoFile.metadata``. + """ + warnings.warn("zoneinfo.gettz_db_metadata() will be removed in future " + "versions, to use the dateutil-provided zoneinfo files, " + "ZoneInfoFile object and query the 'metadata' attribute " + "instead. See the documentation for details.", + DeprecationWarning) + + if len(_CLASS_ZONE_INSTANCE) == 0: + _CLASS_ZONE_INSTANCE.append(ZoneInfoFile(getzoneinfofile_stream())) + return _CLASS_ZONE_INSTANCE[0].metadata diff --git a/.local/lib/python3.8/site-packages/dateutil/zoneinfo/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/zoneinfo/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..77835d1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/zoneinfo/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/zoneinfo/__pycache__/rebuild.cpython-38.pyc b/.local/lib/python3.8/site-packages/dateutil/zoneinfo/__pycache__/rebuild.cpython-38.pyc new file mode 100644 index 0000000..f30877b Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/zoneinfo/__pycache__/rebuild.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/dateutil/zoneinfo/dateutil-zoneinfo.tar.gz b/.local/lib/python3.8/site-packages/dateutil/zoneinfo/dateutil-zoneinfo.tar.gz new file mode 100644 index 0000000..524c48e Binary files /dev/null and b/.local/lib/python3.8/site-packages/dateutil/zoneinfo/dateutil-zoneinfo.tar.gz differ diff --git a/.local/lib/python3.8/site-packages/dateutil/zoneinfo/rebuild.py b/.local/lib/python3.8/site-packages/dateutil/zoneinfo/rebuild.py new file mode 100644 index 0000000..684c658 --- /dev/null +++ b/.local/lib/python3.8/site-packages/dateutil/zoneinfo/rebuild.py @@ -0,0 +1,75 @@ +import logging +import os +import tempfile +import shutil +import json +from subprocess import check_call, check_output +from tarfile import TarFile + +from dateutil.zoneinfo import METADATA_FN, ZONEFILENAME + + +def rebuild(filename, tag=None, format="gz", zonegroups=[], metadata=None): + """Rebuild the internal timezone info in dateutil/zoneinfo/zoneinfo*tar* + + filename is the timezone tarball from ``ftp.iana.org/tz``. + + """ + tmpdir = tempfile.mkdtemp() + zonedir = os.path.join(tmpdir, "zoneinfo") + moduledir = os.path.dirname(__file__) + try: + with TarFile.open(filename) as tf: + for name in zonegroups: + tf.extract(name, tmpdir) + filepaths = [os.path.join(tmpdir, n) for n in zonegroups] + + _run_zic(zonedir, filepaths) + + # write metadata file + with open(os.path.join(zonedir, METADATA_FN), 'w') as f: + json.dump(metadata, f, indent=4, sort_keys=True) + target = os.path.join(moduledir, ZONEFILENAME) + with TarFile.open(target, "w:%s" % format) as tf: + for entry in os.listdir(zonedir): + entrypath = os.path.join(zonedir, entry) + tf.add(entrypath, entry) + finally: + shutil.rmtree(tmpdir) + + +def _run_zic(zonedir, filepaths): + """Calls the ``zic`` compiler in a compatible way to get a "fat" binary. + + Recent versions of ``zic`` default to ``-b slim``, while older versions + don't even have the ``-b`` option (but default to "fat" binaries). The + current version of dateutil does not support Version 2+ TZif files, which + causes problems when used in conjunction with "slim" binaries, so this + function is used to ensure that we always get a "fat" binary. + """ + + try: + help_text = check_output(["zic", "--help"]) + except OSError as e: + _print_on_nosuchfile(e) + raise + + if b"-b " in help_text: + bloat_args = ["-b", "fat"] + else: + bloat_args = [] + + check_call(["zic"] + bloat_args + ["-d", zonedir] + filepaths) + + +def _print_on_nosuchfile(e): + """Print helpful troubleshooting message + + e is an exception raised by subprocess.check_call() + + """ + if e.errno == 2: + logging.error( + "Could not find zic. Perhaps you need to install " + "libc-bin or some other package that provides it, " + "or it's not in your PATH?") diff --git a/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/INSTALLER b/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/METADATA b/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/METADATA new file mode 100644 index 0000000..9c532cf --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/METADATA @@ -0,0 +1,21 @@ +Metadata-Version: 2.1 +Name: kaggle +Version: 1.5.12 +Summary: Kaggle API +Home-page: https://github.com/Kaggle/kaggle-api +Author: Kaggle +Author-email: support@kaggle.com +License: Apache 2.0 +Keywords: Kaggle,API +Platform: UNKNOWN +Requires-Dist: certifi +Requires-Dist: python-dateutil +Requires-Dist: python-slugify +Requires-Dist: requests +Requires-Dist: six (>=1.10) +Requires-Dist: tqdm +Requires-Dist: urllib3 + +Official API for https://www.kaggle.com, accessible using a command line tool implemented in Python. Beta release - Kaggle reserves the right to modify the API functionality currently offered. + + diff --git a/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/RECORD b/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/RECORD new file mode 100644 index 0000000..0bb6b88 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/RECORD @@ -0,0 +1,51 @@ +../../../bin/kaggle,sha256=A9di5OMstgwIa4awH66t2E8sTDDV9cPOHl9yX3rXSFs,209 +kaggle-1.5.12.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +kaggle-1.5.12.dist-info/METADATA,sha256=9XgZyIpRZCX6Pec9f5cRWCPoiZd8Ae1ziKDiXID6aJY,601 +kaggle-1.5.12.dist-info/RECORD,, +kaggle-1.5.12.dist-info/WHEEL,sha256=g4nMs7d-Xl9-xC9XovUrsDHGXt-FT0E17Yqo92DEfvY,92 +kaggle-1.5.12.dist-info/entry_points.txt,sha256=rBWEp_W_lHPkwqY06UlBKVv2PMxJTOM8OkgUK2iGJ6I,44 +kaggle-1.5.12.dist-info/top_level.txt,sha256=Y-2G5HyGxBsP1wVIKTJK5OB4qbSlC-MaA8-PjkezJzo,7 +kaggle/__init__.py,sha256=VOCRE9nkEDIMvAAVOe7xbODKjl-O0ezOFZqV0zkvVrU,799 +kaggle/__pycache__/__init__.cpython-38.pyc,, +kaggle/__pycache__/api_client.cpython-38.pyc,, +kaggle/__pycache__/cli.cpython-38.pyc,, +kaggle/__pycache__/configuration.cpython-38.pyc,, +kaggle/__pycache__/rest.cpython-38.pyc,, +kaggle/api/__init__.py,sha256=EPLRR7HTv9s0ZRMX3mNyGyV94U3ICtQnomVXG9gnO4k,742 +kaggle/api/__pycache__/__init__.cpython-38.pyc,, +kaggle/api/__pycache__/kaggle_api.cpython-38.pyc,, +kaggle/api/__pycache__/kaggle_api_extended.cpython-38.pyc,, +kaggle/api/kaggle_api.py,sha256=78MxbKuY93-2hzD3r9bTvglmZypqVofKMxwuE-hZhpY,125827 +kaggle/api/kaggle_api_extended.py,sha256=h50qo4V1gt9nK4JEO9wMhni3qKkfu_W9JkUDdNAN6BM,108002 +kaggle/api_client.py,sha256=4PXEzo6jv11lxA_zv6mUm0SWy2jZEDvIswWeSpSidok,25458 +kaggle/cli.py,sha256=G8_hCpjNsrUvFKyqrbP0h52_gP9W_BFqJ35j1wVfV58,53449 +kaggle/configuration.py,sha256=NPuxMObNIRQY44xUnhC8gJaw60uHXirdXeb8VEmVlhQ,8858 +kaggle/models/__init__.py,sha256=QhrMWG0tYILF67c9rUhTRCBy-15Ar-ouDp28ZRv6ysc,1481 +kaggle/models/__pycache__/__init__.cpython-38.pyc,, +kaggle/models/__pycache__/collaborator.cpython-38.pyc,, +kaggle/models/__pycache__/dataset_column.cpython-38.pyc,, +kaggle/models/__pycache__/dataset_new_request.cpython-38.pyc,, +kaggle/models/__pycache__/dataset_new_version_request.cpython-38.pyc,, +kaggle/models/__pycache__/dataset_update_settings_request.cpython-38.pyc,, +kaggle/models/__pycache__/dataset_upload_file.cpython-38.pyc,, +kaggle/models/__pycache__/error.cpython-38.pyc,, +kaggle/models/__pycache__/kaggle_models_extended.cpython-38.pyc,, +kaggle/models/__pycache__/kernel_push_request.cpython-38.pyc,, +kaggle/models/__pycache__/license.cpython-38.pyc,, +kaggle/models/__pycache__/result.cpython-38.pyc,, +kaggle/models/collaborator.py,sha256=RyKGMWV-Yj1GiWGKXeeD1fnzi3X6Cl1ALd4TgnXFmdA,4851 +kaggle/models/dataset_column.py,sha256=iBy1P9Q2jgFpMhCaMX7sBW5a40ogieOSRu9pY2v8t2A,7424 +kaggle/models/dataset_new_request.py,sha256=P6Uy54wpDrcsa_OVqfZzVVUR-EcQZmi4nAMp7yVFGO4,12317 +kaggle/models/dataset_new_version_request.py,sha256=1ZZMpnPFWmXjkWdLhOfaRqd7pFwCLMcRacgr3BzgDu4,10027 +kaggle/models/dataset_update_settings_request.py,sha256=boOEHQkuAx2tW8epjSvfr4cUGLIMXSqRogZuAsJSqZ4,10114 +kaggle/models/dataset_upload_file.py,sha256=-9Q9Ys66YsK9G_KauFy0roaIYiv3T08V43b8HCizZ70,5544 +kaggle/models/error.py,sha256=-IUwKPrBxktlWgsGP3WmFL-E7BA-KSCLyzNFwEWUFJ8,4342 +kaggle/models/kaggle_models_extended.py,sha256=mZ4V_kyIOPONZx8PwfUZvYMa21mkeSaagfnKmar3P8k,6180 +kaggle/models/kernel_push_request.py,sha256=C_RqbuocgqJ5dVw5mmS4G3FGTp2C57qSagn5hUNj7wg,18059 +kaggle/models/license.py,sha256=ULPu9kN4eqoX9j-wjReooXXU4Q8aIjd8rZPm2OMKcS4,4077 +kaggle/models/result.py,sha256=E-_lV8pwAoN4fXgUUAHwUCsWQWo47_fwf7ycQJ0unLE,2895 +kaggle/rest.py,sha256=2-yrpOGjNNEMiPWuwkubNj7KU4RZJGP6N7vJjyVrVYY,13927 +kaggle/tests/__init__.py,sha256=tlLdBBlKzW2I7EkqKj5SOUBkjp3ADimepFL8my4txM8,596 +kaggle/tests/__pycache__/__init__.cpython-38.pyc,, +kaggle/tests/__pycache__/test_authenticate.cpython-38.pyc,, +kaggle/tests/test_authenticate.py,sha256=yJlMOhreo4wA5EiCO4pFydAvczAN4gz0V93MjZnmLug,1780 diff --git a/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/WHEEL b/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/WHEEL new file mode 100644 index 0000000..b552003 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.34.2) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/entry_points.txt b/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/entry_points.txt new file mode 100644 index 0000000..914b6e1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/entry_points.txt @@ -0,0 +1,3 @@ +[console_scripts] +kaggle = kaggle.cli:main + diff --git a/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/top_level.txt b/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/top_level.txt new file mode 100644 index 0000000..83ef4c0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle-1.5.12.dist-info/top_level.txt @@ -0,0 +1 @@ +kaggle diff --git a/.local/lib/python3.8/site-packages/kaggle/__init__.py b/.local/lib/python3.8/site-packages/kaggle/__init__.py new file mode 100644 index 0000000..09ff6bf --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/__init__.py @@ -0,0 +1,23 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding=utf-8 +from __future__ import absolute_import +from kaggle.api.kaggle_api_extended import KaggleApi +from kaggle.api_client import ApiClient + +api = KaggleApi(ApiClient()) +api.authenticate() diff --git a/.local/lib/python3.8/site-packages/kaggle/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..2adb0b1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/__pycache__/api_client.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/__pycache__/api_client.cpython-38.pyc new file mode 100644 index 0000000..69409cf Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/__pycache__/api_client.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/__pycache__/cli.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/__pycache__/cli.cpython-38.pyc new file mode 100644 index 0000000..b8d4352 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/__pycache__/cli.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/__pycache__/configuration.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/__pycache__/configuration.cpython-38.pyc new file mode 100644 index 0000000..92d0f81 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/__pycache__/configuration.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/__pycache__/rest.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/__pycache__/rest.cpython-38.pyc new file mode 100644 index 0000000..5ea80a1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/__pycache__/rest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/api/__init__.py b/.local/lib/python3.8/site-packages/kaggle/api/__init__.py new file mode 100644 index 0000000..abe07fd --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/api/__init__.py @@ -0,0 +1,22 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +# flake8: noqa + +# import apis into api package +from kaggle.api.kaggle_api_extended import KaggleApi diff --git a/.local/lib/python3.8/site-packages/kaggle/api/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/api/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..227d8bd Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/api/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/api/__pycache__/kaggle_api.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/api/__pycache__/kaggle_api.cpython-38.pyc new file mode 100644 index 0000000..b84d5e5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/api/__pycache__/kaggle_api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/api/__pycache__/kaggle_api_extended.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/api/__pycache__/kaggle_api_extended.cpython-38.pyc new file mode 100644 index 0000000..38924be Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/api/__pycache__/kaggle_api_extended.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/api/kaggle_api.py b/.local/lib/python3.8/site-packages/kaggle/api/kaggle_api.py new file mode 100644 index 0000000..d4e4eb9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/api/kaggle_api.py @@ -0,0 +1,2885 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +from __future__ import absolute_import + +import re # noqa: F401 + +# python 2 and python 3 compatibility library +import six + +from kaggle.api_client import ApiClient + + +class KaggleApi(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + Ref: https://github.com/swagger-api/swagger-codegen + """ + + def __init__(self, api_client=None): + if api_client is None: + api_client = ApiClient() + self.api_client = api_client + + def competition_download_leaderboard(self, id, **kwargs): # noqa: E501 + """Download competition leaderboard # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competition_download_leaderboard(id, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str id: Competition name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.competition_download_leaderboard_with_http_info(id, **kwargs) # noqa: E501 + else: + (data) = self.competition_download_leaderboard_with_http_info(id, **kwargs) # noqa: E501 + return data + + def competition_download_leaderboard_with_http_info(self, id, **kwargs): # noqa: E501 + """Download competition leaderboard # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competition_download_leaderboard_with_http_info(id, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str id: Competition name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['id'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method competition_download_leaderboard" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'id' is set + if ('id' not in params or + params['id'] is None): + raise ValueError("Missing the required parameter `id` when calling `competition_download_leaderboard`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'id' in params: + path_params['id'] = params['id'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/competitions/{id}/leaderboard/download', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def competition_view_leaderboard(self, id, **kwargs): # noqa: E501 + """VIew competition leaderboard # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competition_view_leaderboard(id, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str id: Competition name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.competition_view_leaderboard_with_http_info(id, **kwargs) # noqa: E501 + else: + (data) = self.competition_view_leaderboard_with_http_info(id, **kwargs) # noqa: E501 + return data + + def competition_view_leaderboard_with_http_info(self, id, **kwargs): # noqa: E501 + """VIew competition leaderboard # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competition_view_leaderboard_with_http_info(id, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str id: Competition name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['id'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method competition_view_leaderboard" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'id' is set + if ('id' not in params or + params['id'] is None): + raise ValueError("Missing the required parameter `id` when calling `competition_view_leaderboard`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'id' in params: + path_params['id'] = params['id'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/competitions/{id}/leaderboard/view', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def competitions_data_download_file(self, id, file_name, **kwargs): # noqa: E501 + """Download competition data file # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_data_download_file(id, file_name, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str id: Competition name (required) + :param str file_name: Competition name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.competitions_data_download_file_with_http_info(id, file_name, **kwargs) # noqa: E501 + else: + (data) = self.competitions_data_download_file_with_http_info(id, file_name, **kwargs) # noqa: E501 + return data + + def competitions_data_download_file_with_http_info(self, id, file_name, **kwargs): # noqa: E501 + """Download competition data file # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_data_download_file_with_http_info(id, file_name, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str id: Competition name (required) + :param str file_name: Competition name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['id', 'file_name'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method competitions_data_download_file" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'id' is set + if ('id' not in params or + params['id'] is None): + raise ValueError("Missing the required parameter `id` when calling `competitions_data_download_file`") # noqa: E501 + # verify the required parameter 'file_name' is set + if ('file_name' not in params or + params['file_name'] is None): + raise ValueError("Missing the required parameter `file_name` when calling `competitions_data_download_file`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'id' in params: + path_params['id'] = params['id'] # noqa: E501 + if 'file_name' in params: + path_params['fileName'] = params['file_name'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/competitions/data/download/{id}/{fileName}', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def competitions_data_download_files(self, id, **kwargs): # noqa: E501 + """Download all competition data files # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_data_download_files(id, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str id: Competition name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.competitions_data_download_files_with_http_info(id, **kwargs) # noqa: E501 + else: + (data) = self.competitions_data_download_files_with_http_info(id, **kwargs) # noqa: E501 + return data + + def competitions_data_download_files_with_http_info(self, id, **kwargs): # noqa: E501 + """Download all competition data files # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_data_download_files_with_http_info(id, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str id: Competition name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['id'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method competitions_data_download_files" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'id' is set + if ('id' not in params or + params['id'] is None): + raise ValueError("Missing the required parameter `id` when calling `competitions_data_download_files`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'id' in params: + path_params['id'] = params['id'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/competitions/data/download-all/{id}', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def competitions_data_list_files(self, id, **kwargs): # noqa: E501 + """List competition data files # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_data_list_files(id, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str id: Competition name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.competitions_data_list_files_with_http_info(id, **kwargs) # noqa: E501 + else: + (data) = self.competitions_data_list_files_with_http_info(id, **kwargs) # noqa: E501 + return data + + def competitions_data_list_files_with_http_info(self, id, **kwargs): # noqa: E501 + """List competition data files # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_data_list_files_with_http_info(id, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str id: Competition name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['id'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method competitions_data_list_files" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'id' is set + if ('id' not in params or + params['id'] is None): + raise ValueError("Missing the required parameter `id` when calling `competitions_data_list_files`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'id' in params: + path_params['id'] = params['id'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/competitions/data/list/{id}', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def competitions_list(self, **kwargs): # noqa: E501 + """List competitions # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_list(async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str group: Filter competitions by a particular group + :param str category: Filter competitions by a particular category + :param str sort_by: Sort the results + :param int page: Page number + :param str search: Search terms + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.competitions_list_with_http_info(**kwargs) # noqa: E501 + else: + (data) = self.competitions_list_with_http_info(**kwargs) # noqa: E501 + return data + + def competitions_list_with_http_info(self, **kwargs): # noqa: E501 + """List competitions # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_list_with_http_info(async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str group: Filter competitions by a particular group + :param str category: Filter competitions by a particular category + :param str sort_by: Sort the results + :param int page: Page number + :param str search: Search terms + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['group', 'category', 'sort_by', 'page', 'search'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method competitions_list" % key + ) + params[key] = val + del params['kwargs'] + + collection_formats = {} + + path_params = {} + + query_params = [] + if 'group' in params: + query_params.append(('group', params['group'])) # noqa: E501 + if 'category' in params: + query_params.append(('category', params['category'])) # noqa: E501 + if 'sort_by' in params: + query_params.append(('sortBy', params['sort_by'])) # noqa: E501 + if 'page' in params: + query_params.append(('page', params['page'])) # noqa: E501 + if 'search' in params: + query_params.append(('search', params['search'])) # noqa: E501 + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/competitions/list', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def competitions_submissions_list(self, id, **kwargs): # noqa: E501 + """List competition submissions # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_submissions_list(id, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str id: Competition name (required) + :param int page: Page number + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.competitions_submissions_list_with_http_info(id, **kwargs) # noqa: E501 + else: + (data) = self.competitions_submissions_list_with_http_info(id, **kwargs) # noqa: E501 + return data + + def competitions_submissions_list_with_http_info(self, id, **kwargs): # noqa: E501 + """List competition submissions # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_submissions_list_with_http_info(id, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str id: Competition name (required) + :param int page: Page number + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['id', 'page'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method competitions_submissions_list" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'id' is set + if ('id' not in params or + params['id'] is None): + raise ValueError("Missing the required parameter `id` when calling `competitions_submissions_list`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'id' in params: + path_params['id'] = params['id'] # noqa: E501 + + query_params = [] + if 'page' in params: + query_params.append(('page', params['page'])) # noqa: E501 + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/competitions/submissions/list/{id}', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def competitions_submissions_submit(self, blob_file_tokens, submission_description, id, **kwargs): # noqa: E501 + """Submit to competition # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_submissions_submit(blob_file_tokens, submission_description, id, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str blob_file_tokens: Token identifying location of uploaded submission file (required) + :param str submission_description: Description of competition submission (required) + :param str id: Competition name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.competitions_submissions_submit_with_http_info(blob_file_tokens, submission_description, id, **kwargs) # noqa: E501 + else: + (data) = self.competitions_submissions_submit_with_http_info(blob_file_tokens, submission_description, id, **kwargs) # noqa: E501 + return data + + def competitions_submissions_submit_with_http_info(self, blob_file_tokens, submission_description, id, **kwargs): # noqa: E501 + """Submit to competition # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_submissions_submit_with_http_info(blob_file_tokens, submission_description, id, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str blob_file_tokens: Token identifying location of uploaded submission file (required) + :param str submission_description: Description of competition submission (required) + :param str id: Competition name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['blob_file_tokens', 'submission_description', 'id'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method competitions_submissions_submit" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'blob_file_tokens' is set + if ('blob_file_tokens' not in params or + params['blob_file_tokens'] is None): + raise ValueError("Missing the required parameter `blob_file_tokens` when calling `competitions_submissions_submit`") # noqa: E501 + # verify the required parameter 'submission_description' is set + if ('submission_description' not in params or + params['submission_description'] is None): + raise ValueError("Missing the required parameter `submission_description` when calling `competitions_submissions_submit`") # noqa: E501 + # verify the required parameter 'id' is set + if ('id' not in params or + params['id'] is None): + raise ValueError("Missing the required parameter `id` when calling `competitions_submissions_submit`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'id' in params: + path_params['id'] = params['id'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + if 'blob_file_tokens' in params: + form_params.append(('blobFileTokens', params['blob_file_tokens'])) # noqa: E501 + if 'submission_description' in params: + form_params.append(('submissionDescription', params['submission_description'])) # noqa: E501 + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # HTTP header `Content-Type` + header_params['Content-Type'] = self.api_client.select_header_content_type( # noqa: E501 + ['multipart/form-data']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/competitions/submissions/submit/{id}', 'POST', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def competitions_submissions_upload(self, file, guid, content_length, last_modified_date_utc, **kwargs): # noqa: E501 + """Upload competition submission file # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_submissions_upload(file, guid, content_length, last_modified_date_utc, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param file file: Competition submission file (required) + :param str guid: Location where submission should be uploaded (required) + :param int content_length: Content length of file in bytes (required) + :param int last_modified_date_utc: Last modified date of file in milliseconds since epoch in UTC (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.competitions_submissions_upload_with_http_info(file, guid, content_length, last_modified_date_utc, **kwargs) # noqa: E501 + else: + (data) = self.competitions_submissions_upload_with_http_info(file, guid, content_length, last_modified_date_utc, **kwargs) # noqa: E501 + return data + + def competitions_submissions_upload_with_http_info(self, file, guid, content_length, last_modified_date_utc, **kwargs): # noqa: E501 + """Upload competition submission file # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_submissions_upload_with_http_info(file, guid, content_length, last_modified_date_utc, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param file file: Competition submission file (required) + :param str guid: Location where submission should be uploaded (required) + :param int content_length: Content length of file in bytes (required) + :param int last_modified_date_utc: Last modified date of file in milliseconds since epoch in UTC (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['file', 'guid', 'content_length', 'last_modified_date_utc'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method competitions_submissions_upload" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'file' is set + if ('file' not in params or + params['file'] is None): + raise ValueError("Missing the required parameter `file` when calling `competitions_submissions_upload`") # noqa: E501 + # verify the required parameter 'guid' is set + if ('guid' not in params or + params['guid'] is None): + raise ValueError("Missing the required parameter `guid` when calling `competitions_submissions_upload`") # noqa: E501 + # verify the required parameter 'content_length' is set + if ('content_length' not in params or + params['content_length'] is None): + raise ValueError("Missing the required parameter `content_length` when calling `competitions_submissions_upload`") # noqa: E501 + # verify the required parameter 'last_modified_date_utc' is set + if ('last_modified_date_utc' not in params or + params['last_modified_date_utc'] is None): + raise ValueError("Missing the required parameter `last_modified_date_utc` when calling `competitions_submissions_upload`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'guid' in params: + path_params['guid'] = params['guid'] # noqa: E501 + if 'content_length' in params: + path_params['contentLength'] = params['content_length'] # noqa: E501 + if 'last_modified_date_utc' in params: + path_params['lastModifiedDateUtc'] = params['last_modified_date_utc'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + if 'file' in params: + local_var_files['file'] = params['file'] # noqa: E501 + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # HTTP header `Content-Type` + header_params['Content-Type'] = self.api_client.select_header_content_type( # noqa: E501 + ['multipart/form-data']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/competitions/submissions/upload/{guid}/{contentLength}/{lastModifiedDateUtc}', 'POST', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def competitions_submissions_url(self, id, content_length, last_modified_date_utc, **kwargs): # noqa: E501 + """Generate competition submission URL # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_submissions_url(id, content_length, last_modified_date_utc, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str id: Competition name, as it appears in the competition's URL (required) + :param int content_length: Content length of file in bytes (required) + :param int last_modified_date_utc: Last modified date of file in milliseconds since epoch in UTC (required) + :param str file_name: Competition submission file name + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.competitions_submissions_url_with_http_info(id, content_length, last_modified_date_utc, **kwargs) # noqa: E501 + else: + (data) = self.competitions_submissions_url_with_http_info(id, content_length, last_modified_date_utc, **kwargs) # noqa: E501 + return data + + def competitions_submissions_url_with_http_info(self, id, content_length, last_modified_date_utc, **kwargs): # noqa: E501 + """Generate competition submission URL # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.competitions_submissions_url_with_http_info(id, content_length, last_modified_date_utc, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str id: Competition name, as it appears in the competition's URL (required) + :param int content_length: Content length of file in bytes (required) + :param int last_modified_date_utc: Last modified date of file in milliseconds since epoch in UTC (required) + :param str file_name: Competition submission file name + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['id', 'content_length', 'last_modified_date_utc', 'file_name'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method competitions_submissions_url" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'id' is set + if ('id' not in params or + params['id'] is None): + raise ValueError("Missing the required parameter `id` when calling `competitions_submissions_url`") # noqa: E501 + # verify the required parameter 'content_length' is set + if ('content_length' not in params or + params['content_length'] is None): + raise ValueError("Missing the required parameter `content_length` when calling `competitions_submissions_url`") # noqa: E501 + # verify the required parameter 'last_modified_date_utc' is set + if ('last_modified_date_utc' not in params or + params['last_modified_date_utc'] is None): + raise ValueError("Missing the required parameter `last_modified_date_utc` when calling `competitions_submissions_url`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'id' in params: + path_params['id'] = params['id'] # noqa: E501 + if 'content_length' in params: + path_params['contentLength'] = params['content_length'] # noqa: E501 + if 'last_modified_date_utc' in params: + path_params['lastModifiedDateUtc'] = params['last_modified_date_utc'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + if 'file_name' in params: + form_params.append(('fileName', params['file_name'])) # noqa: E501 + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # HTTP header `Content-Type` + header_params['Content-Type'] = self.api_client.select_header_content_type( # noqa: E501 + ['multipart/form-data']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/competitions/{id}/submissions/url/{contentLength}/{lastModifiedDateUtc}', 'POST', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def datasets_create_new(self, dataset_new_request, **kwargs): # noqa: E501 + """Create a new dataset # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_create_new(dataset_new_request, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param DatasetNewRequest dataset_new_request: Information for creating a new dataset (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.datasets_create_new_with_http_info(dataset_new_request, **kwargs) # noqa: E501 + else: + (data) = self.datasets_create_new_with_http_info(dataset_new_request, **kwargs) # noqa: E501 + return data + + def datasets_create_new_with_http_info(self, dataset_new_request, **kwargs): # noqa: E501 + """Create a new dataset # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_create_new_with_http_info(dataset_new_request, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param DatasetNewRequest dataset_new_request: Information for creating a new dataset (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['dataset_new_request'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method datasets_create_new" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'dataset_new_request' is set + if ('dataset_new_request' not in params or + params['dataset_new_request'] is None): + raise ValueError("Missing the required parameter `dataset_new_request` when calling `datasets_create_new`") # noqa: E501 + + collection_formats = {} + + path_params = {} + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + if 'dataset_new_request' in params: + body_params = params['dataset_new_request'] + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # HTTP header `Content-Type` + header_params['Content-Type'] = self.api_client.select_header_content_type( # noqa: E501 + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/datasets/create/new', 'POST', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def datasets_create_version(self, owner_slug, dataset_slug, dataset_new_version_request, **kwargs): # noqa: E501 + """Create a new dataset version # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_create_version(owner_slug, dataset_slug, dataset_new_version_request, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :param DatasetNewVersionRequest dataset_new_version_request: Information for creating a new dataset version (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.datasets_create_version_with_http_info(owner_slug, dataset_slug, dataset_new_version_request, **kwargs) # noqa: E501 + else: + (data) = self.datasets_create_version_with_http_info(owner_slug, dataset_slug, dataset_new_version_request, **kwargs) # noqa: E501 + return data + + def datasets_create_version_with_http_info(self, owner_slug, dataset_slug, dataset_new_version_request, **kwargs): # noqa: E501 + """Create a new dataset version # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_create_version_with_http_info(owner_slug, dataset_slug, dataset_new_version_request, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :param DatasetNewVersionRequest dataset_new_version_request: Information for creating a new dataset version (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['owner_slug', 'dataset_slug', 'dataset_new_version_request'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method datasets_create_version" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'owner_slug' is set + if ('owner_slug' not in params or + params['owner_slug'] is None): + raise ValueError("Missing the required parameter `owner_slug` when calling `datasets_create_version`") # noqa: E501 + # verify the required parameter 'dataset_slug' is set + if ('dataset_slug' not in params or + params['dataset_slug'] is None): + raise ValueError("Missing the required parameter `dataset_slug` when calling `datasets_create_version`") # noqa: E501 + # verify the required parameter 'dataset_new_version_request' is set + if ('dataset_new_version_request' not in params or + params['dataset_new_version_request'] is None): + raise ValueError("Missing the required parameter `dataset_new_version_request` when calling `datasets_create_version`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'owner_slug' in params: + path_params['ownerSlug'] = params['owner_slug'] # noqa: E501 + if 'dataset_slug' in params: + path_params['datasetSlug'] = params['dataset_slug'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + if 'dataset_new_version_request' in params: + body_params = params['dataset_new_version_request'] + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # HTTP header `Content-Type` + header_params['Content-Type'] = self.api_client.select_header_content_type( # noqa: E501 + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/datasets/create/version/{ownerSlug}/{datasetSlug}', 'POST', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def datasets_create_version_by_id(self, id, dataset_new_version_request, **kwargs): # noqa: E501 + """Create a new dataset version by id # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_create_version_by_id(id, dataset_new_version_request, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param int id: Dataset ID (required) + :param DatasetNewVersionRequest dataset_new_version_request: Information for creating a new dataset version (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.datasets_create_version_by_id_with_http_info(id, dataset_new_version_request, **kwargs) # noqa: E501 + else: + (data) = self.datasets_create_version_by_id_with_http_info(id, dataset_new_version_request, **kwargs) # noqa: E501 + return data + + def datasets_create_version_by_id_with_http_info(self, id, dataset_new_version_request, **kwargs): # noqa: E501 + """Create a new dataset version by id # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_create_version_by_id_with_http_info(id, dataset_new_version_request, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param int id: Dataset ID (required) + :param DatasetNewVersionRequest dataset_new_version_request: Information for creating a new dataset version (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['id', 'dataset_new_version_request'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method datasets_create_version_by_id" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'id' is set + if ('id' not in params or + params['id'] is None): + raise ValueError("Missing the required parameter `id` when calling `datasets_create_version_by_id`") # noqa: E501 + # verify the required parameter 'dataset_new_version_request' is set + if ('dataset_new_version_request' not in params or + params['dataset_new_version_request'] is None): + raise ValueError("Missing the required parameter `dataset_new_version_request` when calling `datasets_create_version_by_id`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'id' in params: + path_params['id'] = params['id'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + if 'dataset_new_version_request' in params: + body_params = params['dataset_new_version_request'] + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # HTTP header `Content-Type` + header_params['Content-Type'] = self.api_client.select_header_content_type( # noqa: E501 + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/datasets/create/version/{id}', 'POST', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def datasets_download(self, owner_slug, dataset_slug, **kwargs): # noqa: E501 + """Download dataset file # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_download(owner_slug, dataset_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :param str dataset_version_number: Dataset version number + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.datasets_download_with_http_info(owner_slug, dataset_slug, **kwargs) # noqa: E501 + else: + (data) = self.datasets_download_with_http_info(owner_slug, dataset_slug, **kwargs) # noqa: E501 + return data + + def datasets_download_with_http_info(self, owner_slug, dataset_slug, **kwargs): # noqa: E501 + """Download dataset file # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_download_with_http_info(owner_slug, dataset_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :param str dataset_version_number: Dataset version number + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['owner_slug', 'dataset_slug', 'dataset_version_number'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method datasets_download" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'owner_slug' is set + if ('owner_slug' not in params or + params['owner_slug'] is None): + raise ValueError("Missing the required parameter `owner_slug` when calling `datasets_download`") # noqa: E501 + # verify the required parameter 'dataset_slug' is set + if ('dataset_slug' not in params or + params['dataset_slug'] is None): + raise ValueError("Missing the required parameter `dataset_slug` when calling `datasets_download`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'owner_slug' in params: + path_params['ownerSlug'] = params['owner_slug'] # noqa: E501 + if 'dataset_slug' in params: + path_params['datasetSlug'] = params['dataset_slug'] # noqa: E501 + + query_params = [] + if 'dataset_version_number' in params: + query_params.append(('datasetVersionNumber', params['dataset_version_number'])) # noqa: E501 + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['file']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/datasets/download/{ownerSlug}/{datasetSlug}', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def datasets_download_file(self, owner_slug, dataset_slug, file_name, **kwargs): # noqa: E501 + """Download dataset file # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_download_file(owner_slug, dataset_slug, file_name, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :param str file_name: File name (required) + :param str dataset_version_number: Dataset version number + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.datasets_download_file_with_http_info(owner_slug, dataset_slug, file_name, **kwargs) # noqa: E501 + else: + (data) = self.datasets_download_file_with_http_info(owner_slug, dataset_slug, file_name, **kwargs) # noqa: E501 + return data + + def datasets_download_file_with_http_info(self, owner_slug, dataset_slug, file_name, **kwargs): # noqa: E501 + """Download dataset file # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_download_file_with_http_info(owner_slug, dataset_slug, file_name, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :param str file_name: File name (required) + :param str dataset_version_number: Dataset version number + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['owner_slug', 'dataset_slug', 'file_name', 'dataset_version_number'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method datasets_download_file" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'owner_slug' is set + if ('owner_slug' not in params or + params['owner_slug'] is None): + raise ValueError("Missing the required parameter `owner_slug` when calling `datasets_download_file`") # noqa: E501 + # verify the required parameter 'dataset_slug' is set + if ('dataset_slug' not in params or + params['dataset_slug'] is None): + raise ValueError("Missing the required parameter `dataset_slug` when calling `datasets_download_file`") # noqa: E501 + # verify the required parameter 'file_name' is set + if ('file_name' not in params or + params['file_name'] is None): + raise ValueError("Missing the required parameter `file_name` when calling `datasets_download_file`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'owner_slug' in params: + path_params['ownerSlug'] = params['owner_slug'] # noqa: E501 + if 'dataset_slug' in params: + path_params['datasetSlug'] = params['dataset_slug'] # noqa: E501 + if 'file_name' in params: + path_params['fileName'] = params['file_name'] # noqa: E501 + + query_params = [] + if 'dataset_version_number' in params: + query_params.append(('datasetVersionNumber', params['dataset_version_number'])) # noqa: E501 + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['file']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/datasets/download/{ownerSlug}/{datasetSlug}/{fileName}', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def datasets_list(self, **kwargs): # noqa: E501 + """List datasets # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_list(async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str group: Display datasets by a particular group + :param str sort_by: Sort the results + :param str size: (DEPRECATED). Please use --max-size and --min-size to filter dataset sizes. + :param str filetype: Display datasets of a specific file type + :param str license: Display datasets with a specific license + :param str tagids: A comma separated list of tags to filter by + :param str search: Search terms + :param str user: Display datasets by a specific user or organization + :param int page: Page number + :param int max_size: Max Dataset Size (bytes) + :param int min_size: Max Dataset Size (bytes) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.datasets_list_with_http_info(**kwargs) # noqa: E501 + else: + (data) = self.datasets_list_with_http_info(**kwargs) # noqa: E501 + return data + + def datasets_list_with_http_info(self, **kwargs): # noqa: E501 + """List datasets # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_list_with_http_info(async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str group: Display datasets by a particular group + :param str sort_by: Sort the results + :param str size: (DEPRECATED). Please use --max-size and --min-size to filter dataset sizes. + :param str filetype: Display datasets of a specific file type + :param str license: Display datasets with a specific license + :param str tagids: A comma separated list of tags to filter by + :param str search: Search terms + :param str user: Display datasets by a specific user or organization + :param int page: Page number + :param int max_size: Max Dataset Size (bytes) + :param int min_size: Max Dataset Size (bytes) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['group', 'sort_by', 'size', 'filetype', 'license', 'tagids', 'search', 'user', 'page', 'max_size', 'min_size'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method datasets_list" % key + ) + params[key] = val + del params['kwargs'] + + collection_formats = {} + + path_params = {} + + query_params = [] + if 'group' in params: + query_params.append(('group', params['group'])) # noqa: E501 + if 'sort_by' in params: + query_params.append(('sortBy', params['sort_by'])) # noqa: E501 + if 'size' in params: + query_params.append(('size', params['size'])) # noqa: E501 + if 'filetype' in params: + query_params.append(('filetype', params['filetype'])) # noqa: E501 + if 'license' in params: + query_params.append(('license', params['license'])) # noqa: E501 + if 'tagids' in params: + query_params.append(('tagids', params['tagids'])) # noqa: E501 + if 'search' in params: + query_params.append(('search', params['search'])) # noqa: E501 + if 'user' in params: + query_params.append(('user', params['user'])) # noqa: E501 + if 'page' in params: + query_params.append(('page', params['page'])) # noqa: E501 + if 'max_size' in params: + query_params.append(('maxSize', params['max_size'])) # noqa: E501 + if 'min_size' in params: + query_params.append(('minSize', params['min_size'])) # noqa: E501 + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/datasets/list', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def datasets_list_files(self, owner_slug, dataset_slug, **kwargs): # noqa: E501 + """List dataset files # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_list_files(owner_slug, dataset_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.datasets_list_files_with_http_info(owner_slug, dataset_slug, **kwargs) # noqa: E501 + else: + (data) = self.datasets_list_files_with_http_info(owner_slug, dataset_slug, **kwargs) # noqa: E501 + return data + + def datasets_list_files_with_http_info(self, owner_slug, dataset_slug, **kwargs): # noqa: E501 + """List dataset files # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_list_files_with_http_info(owner_slug, dataset_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['owner_slug', 'dataset_slug'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method datasets_list_files" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'owner_slug' is set + if ('owner_slug' not in params or + params['owner_slug'] is None): + raise ValueError("Missing the required parameter `owner_slug` when calling `datasets_list_files`") # noqa: E501 + # verify the required parameter 'dataset_slug' is set + if ('dataset_slug' not in params or + params['dataset_slug'] is None): + raise ValueError("Missing the required parameter `dataset_slug` when calling `datasets_list_files`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'owner_slug' in params: + path_params['ownerSlug'] = params['owner_slug'] # noqa: E501 + if 'dataset_slug' in params: + path_params['datasetSlug'] = params['dataset_slug'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/datasets/list/{ownerSlug}/{datasetSlug}', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def datasets_status(self, owner_slug, dataset_slug, **kwargs): # noqa: E501 + """Get dataset creation status # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_status(owner_slug, dataset_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.datasets_status_with_http_info(owner_slug, dataset_slug, **kwargs) # noqa: E501 + else: + (data) = self.datasets_status_with_http_info(owner_slug, dataset_slug, **kwargs) # noqa: E501 + return data + + def datasets_status_with_http_info(self, owner_slug, dataset_slug, **kwargs): # noqa: E501 + """Get dataset creation status # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_status_with_http_info(owner_slug, dataset_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['owner_slug', 'dataset_slug'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method datasets_status" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'owner_slug' is set + if ('owner_slug' not in params or + params['owner_slug'] is None): + raise ValueError("Missing the required parameter `owner_slug` when calling `datasets_status`") # noqa: E501 + # verify the required parameter 'dataset_slug' is set + if ('dataset_slug' not in params or + params['dataset_slug'] is None): + raise ValueError("Missing the required parameter `dataset_slug` when calling `datasets_status`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'owner_slug' in params: + path_params['ownerSlug'] = params['owner_slug'] # noqa: E501 + if 'dataset_slug' in params: + path_params['datasetSlug'] = params['dataset_slug'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/datasets/status/{ownerSlug}/{datasetSlug}', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def datasets_upload_file(self, file_name, content_length, last_modified_date_utc, **kwargs): # noqa: E501 + """Get URL and token to start uploading a data file # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_upload_file(file_name, content_length, last_modified_date_utc, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str file_name: Dataset file name (required) + :param int content_length: Content length of file in bytes (required) + :param int last_modified_date_utc: Last modified date of file in milliseconds since epoch in UTC (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.datasets_upload_file_with_http_info(file_name, content_length, last_modified_date_utc, **kwargs) # noqa: E501 + else: + (data) = self.datasets_upload_file_with_http_info(file_name, content_length, last_modified_date_utc, **kwargs) # noqa: E501 + return data + + def datasets_upload_file_with_http_info(self, file_name, content_length, last_modified_date_utc, **kwargs): # noqa: E501 + """Get URL and token to start uploading a data file # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_upload_file_with_http_info(file_name, content_length, last_modified_date_utc, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str file_name: Dataset file name (required) + :param int content_length: Content length of file in bytes (required) + :param int last_modified_date_utc: Last modified date of file in milliseconds since epoch in UTC (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['file_name', 'content_length', 'last_modified_date_utc'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method datasets_upload_file" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'file_name' is set + if ('file_name' not in params or + params['file_name'] is None): + raise ValueError("Missing the required parameter `file_name` when calling `datasets_upload_file`") # noqa: E501 + # verify the required parameter 'content_length' is set + if ('content_length' not in params or + params['content_length'] is None): + raise ValueError("Missing the required parameter `content_length` when calling `datasets_upload_file`") # noqa: E501 + # verify the required parameter 'last_modified_date_utc' is set + if ('last_modified_date_utc' not in params or + params['last_modified_date_utc'] is None): + raise ValueError("Missing the required parameter `last_modified_date_utc` when calling `datasets_upload_file`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'content_length' in params: + path_params['contentLength'] = params['content_length'] # noqa: E501 + if 'last_modified_date_utc' in params: + path_params['lastModifiedDateUtc'] = params['last_modified_date_utc'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + if 'file_name' in params: + form_params.append(('fileName', params['file_name'])) # noqa: E501 + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # HTTP header `Content-Type` + header_params['Content-Type'] = self.api_client.select_header_content_type( # noqa: E501 + ['multipart/form-data']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/datasets/upload/file/{contentLength}/{lastModifiedDateUtc}', 'POST', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def datasets_view(self, owner_slug, dataset_slug, **kwargs): # noqa: E501 + """Show details about a dataset # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_view(owner_slug, dataset_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.datasets_view_with_http_info(owner_slug, dataset_slug, **kwargs) # noqa: E501 + else: + (data) = self.datasets_view_with_http_info(owner_slug, dataset_slug, **kwargs) # noqa: E501 + return data + + def datasets_view_with_http_info(self, owner_slug, dataset_slug, **kwargs): # noqa: E501 + """Show details about a dataset # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.datasets_view_with_http_info(owner_slug, dataset_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['owner_slug', 'dataset_slug'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method datasets_view" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'owner_slug' is set + if ('owner_slug' not in params or + params['owner_slug'] is None): + raise ValueError("Missing the required parameter `owner_slug` when calling `datasets_view`") # noqa: E501 + # verify the required parameter 'dataset_slug' is set + if ('dataset_slug' not in params or + params['dataset_slug'] is None): + raise ValueError("Missing the required parameter `dataset_slug` when calling `datasets_view`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'owner_slug' in params: + path_params['ownerSlug'] = params['owner_slug'] # noqa: E501 + if 'dataset_slug' in params: + path_params['datasetSlug'] = params['dataset_slug'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/datasets/view/{ownerSlug}/{datasetSlug}', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def kernel_output(self, user_name, kernel_slug, **kwargs): # noqa: E501 + """Download the latest output from a kernel # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.kernel_output(user_name, kernel_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str user_name: Kernel owner (required) + :param str kernel_slug: Kernel name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.kernel_output_with_http_info(user_name, kernel_slug, **kwargs) # noqa: E501 + else: + (data) = self.kernel_output_with_http_info(user_name, kernel_slug, **kwargs) # noqa: E501 + return data + + def kernel_output_with_http_info(self, user_name, kernel_slug, **kwargs): # noqa: E501 + """Download the latest output from a kernel # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.kernel_output_with_http_info(user_name, kernel_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str user_name: Kernel owner (required) + :param str kernel_slug: Kernel name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['user_name', 'kernel_slug'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method kernel_output" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'user_name' is set + if ('user_name' not in params or + params['user_name'] is None): + raise ValueError("Missing the required parameter `user_name` when calling `kernel_output`") # noqa: E501 + # verify the required parameter 'kernel_slug' is set + if ('kernel_slug' not in params or + params['kernel_slug'] is None): + raise ValueError("Missing the required parameter `kernel_slug` when calling `kernel_output`") # noqa: E501 + + collection_formats = {} + + path_params = {} + + query_params = [] + if 'user_name' in params: + query_params.append(('userName', params['user_name'])) # noqa: E501 + if 'kernel_slug' in params: + query_params.append(('kernelSlug', params['kernel_slug'])) # noqa: E501 + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/kernels/output', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def kernel_pull(self, user_name, kernel_slug, **kwargs): # noqa: E501 + """Pull the latest code from a kernel # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.kernel_pull(user_name, kernel_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str user_name: Kernel owner (required) + :param str kernel_slug: Kernel name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.kernel_pull_with_http_info(user_name, kernel_slug, **kwargs) # noqa: E501 + else: + (data) = self.kernel_pull_with_http_info(user_name, kernel_slug, **kwargs) # noqa: E501 + return data + + def kernel_pull_with_http_info(self, user_name, kernel_slug, **kwargs): # noqa: E501 + """Pull the latest code from a kernel # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.kernel_pull_with_http_info(user_name, kernel_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str user_name: Kernel owner (required) + :param str kernel_slug: Kernel name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['user_name', 'kernel_slug'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method kernel_pull" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'user_name' is set + if ('user_name' not in params or + params['user_name'] is None): + raise ValueError("Missing the required parameter `user_name` when calling `kernel_pull`") # noqa: E501 + # verify the required parameter 'kernel_slug' is set + if ('kernel_slug' not in params or + params['kernel_slug'] is None): + raise ValueError("Missing the required parameter `kernel_slug` when calling `kernel_pull`") # noqa: E501 + + collection_formats = {} + + path_params = {} + + query_params = [] + if 'user_name' in params: + query_params.append(('userName', params['user_name'])) # noqa: E501 + if 'kernel_slug' in params: + query_params.append(('kernelSlug', params['kernel_slug'])) # noqa: E501 + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/kernels/pull', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def kernel_push(self, kernel_push_request, **kwargs): # noqa: E501 + """Push a new kernel version. Can be used to create a new kernel and update an existing one. # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.kernel_push(kernel_push_request, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param KernelPushRequest kernel_push_request: Information for pushing a new kernel version (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.kernel_push_with_http_info(kernel_push_request, **kwargs) # noqa: E501 + else: + (data) = self.kernel_push_with_http_info(kernel_push_request, **kwargs) # noqa: E501 + return data + + def kernel_push_with_http_info(self, kernel_push_request, **kwargs): # noqa: E501 + """Push a new kernel version. Can be used to create a new kernel and update an existing one. # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.kernel_push_with_http_info(kernel_push_request, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param KernelPushRequest kernel_push_request: Information for pushing a new kernel version (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['kernel_push_request'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method kernel_push" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'kernel_push_request' is set + if ('kernel_push_request' not in params or + params['kernel_push_request'] is None): + raise ValueError("Missing the required parameter `kernel_push_request` when calling `kernel_push`") # noqa: E501 + + collection_formats = {} + + path_params = {} + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + if 'kernel_push_request' in params: + body_params = params['kernel_push_request'] + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # HTTP header `Content-Type` + header_params['Content-Type'] = self.api_client.select_header_content_type( # noqa: E501 + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/kernels/push', 'POST', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def kernel_status(self, user_name, kernel_slug, **kwargs): # noqa: E501 + """Get the status of the latest kernel version # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.kernel_status(user_name, kernel_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str user_name: Kernel owner (required) + :param str kernel_slug: Kernel name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.kernel_status_with_http_info(user_name, kernel_slug, **kwargs) # noqa: E501 + else: + (data) = self.kernel_status_with_http_info(user_name, kernel_slug, **kwargs) # noqa: E501 + return data + + def kernel_status_with_http_info(self, user_name, kernel_slug, **kwargs): # noqa: E501 + """Get the status of the latest kernel version # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.kernel_status_with_http_info(user_name, kernel_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str user_name: Kernel owner (required) + :param str kernel_slug: Kernel name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['user_name', 'kernel_slug'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method kernel_status" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'user_name' is set + if ('user_name' not in params or + params['user_name'] is None): + raise ValueError("Missing the required parameter `user_name` when calling `kernel_status`") # noqa: E501 + # verify the required parameter 'kernel_slug' is set + if ('kernel_slug' not in params or + params['kernel_slug'] is None): + raise ValueError("Missing the required parameter `kernel_slug` when calling `kernel_status`") # noqa: E501 + + collection_formats = {} + + path_params = {} + + query_params = [] + if 'user_name' in params: + query_params.append(('userName', params['user_name'])) # noqa: E501 + if 'kernel_slug' in params: + query_params.append(('kernelSlug', params['kernel_slug'])) # noqa: E501 + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/kernels/status', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def kernels_list(self, **kwargs): # noqa: E501 + """List kernels # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.kernels_list(async_req=True) + >>> result = thread.get() + + :param async_req bool + :param int page: Page number + :param int page_size: Page size + :param str search: Search terms + :param str group: Display only your kernels + :param str user: Display kernels by a particular group + :param str language: Display kernels in a specific language + :param str kernel_type: Display kernels of a specific type + :param str output_type: Display kernels with a specific output type + :param str sort_by: Sort the results. 'relevance' only works if there is a search query + :param str dataset: Display kernels using the specified dataset + :param str competition: Display kernels using the specified competition + :param str parent_kernel: Display kernels that have forked the specified kernel + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.kernels_list_with_http_info(**kwargs) # noqa: E501 + else: + (data) = self.kernels_list_with_http_info(**kwargs) # noqa: E501 + return data + + def kernels_list_with_http_info(self, **kwargs): # noqa: E501 + """List kernels # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.kernels_list_with_http_info(async_req=True) + >>> result = thread.get() + + :param async_req bool + :param int page: Page number + :param int page_size: Page size + :param str search: Search terms + :param str group: Display only your kernels + :param str user: Display kernels by a particular group + :param str language: Display kernels in a specific language + :param str kernel_type: Display kernels of a specific type + :param str output_type: Display kernels with a specific output type + :param str sort_by: Sort the results. 'relevance' only works if there is a search query + :param str dataset: Display kernels using the specified dataset + :param str competition: Display kernels using the specified competition + :param str parent_kernel: Display kernels that have forked the specified kernel + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['page', 'page_size', 'search', 'group', 'user', 'language', 'kernel_type', 'output_type', 'sort_by', 'dataset', 'competition', 'parent_kernel'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method kernels_list" % key + ) + params[key] = val + del params['kwargs'] + + collection_formats = {} + + path_params = {} + + query_params = [] + if 'page' in params: + query_params.append(('page', params['page'])) # noqa: E501 + if 'page_size' in params: + query_params.append(('pageSize', params['page_size'])) # noqa: E501 + if 'search' in params: + query_params.append(('search', params['search'])) # noqa: E501 + if 'group' in params: + query_params.append(('group', params['group'])) # noqa: E501 + if 'user' in params: + query_params.append(('user', params['user'])) # noqa: E501 + if 'language' in params: + query_params.append(('language', params['language'])) # noqa: E501 + if 'kernel_type' in params: + query_params.append(('kernelType', params['kernel_type'])) # noqa: E501 + if 'output_type' in params: + query_params.append(('outputType', params['output_type'])) # noqa: E501 + if 'sort_by' in params: + query_params.append(('sortBy', params['sort_by'])) # noqa: E501 + if 'dataset' in params: + query_params.append(('dataset', params['dataset'])) # noqa: E501 + if 'competition' in params: + query_params.append(('competition', params['competition'])) # noqa: E501 + if 'parent_kernel' in params: + query_params.append(('parentKernel', params['parent_kernel'])) # noqa: E501 + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/kernels/list', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def metadata_get(self, owner_slug, dataset_slug, **kwargs): # noqa: E501 + """Get the metadata for a dataset # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.metadata_get(owner_slug, dataset_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.metadata_get_with_http_info(owner_slug, dataset_slug, **kwargs) # noqa: E501 + else: + (data) = self.metadata_get_with_http_info(owner_slug, dataset_slug, **kwargs) # noqa: E501 + return data + + def metadata_get_with_http_info(self, owner_slug, dataset_slug, **kwargs): # noqa: E501 + """Get the metadata for a dataset # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.metadata_get_with_http_info(owner_slug, dataset_slug, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['owner_slug', 'dataset_slug'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method metadata_get" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'owner_slug' is set + if ('owner_slug' not in params or + params['owner_slug'] is None): + raise ValueError("Missing the required parameter `owner_slug` when calling `metadata_get`") # noqa: E501 + # verify the required parameter 'dataset_slug' is set + if ('dataset_slug' not in params or + params['dataset_slug'] is None): + raise ValueError("Missing the required parameter `dataset_slug` when calling `metadata_get`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'owner_slug' in params: + path_params['ownerSlug'] = params['owner_slug'] # noqa: E501 + if 'dataset_slug' in params: + path_params['datasetSlug'] = params['dataset_slug'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/datasets/metadata/{ownerSlug}/{datasetSlug}', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + + def metadata_post(self, owner_slug, dataset_slug, settings, **kwargs): # noqa: E501 + """Update the metadata for a dataset # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.metadata_post(owner_slug, dataset_slug, settings, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :param DatasetUpdateSettingsRequest settings: Dataset metadata to update (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.metadata_post_with_http_info(owner_slug, dataset_slug, settings, **kwargs) # noqa: E501 + else: + (data) = self.metadata_post_with_http_info(owner_slug, dataset_slug, settings, **kwargs) # noqa: E501 + return data + + def metadata_post_with_http_info(self, owner_slug, dataset_slug, settings, **kwargs): # noqa: E501 + """Update the metadata for a dataset # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.metadata_post_with_http_info(owner_slug, dataset_slug, settings, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param str owner_slug: Dataset owner (required) + :param str dataset_slug: Dataset name (required) + :param DatasetUpdateSettingsRequest settings: Dataset metadata to update (required) + :return: Result + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['owner_slug', 'dataset_slug', 'settings'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method metadata_post" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'owner_slug' is set + if ('owner_slug' not in params or + params['owner_slug'] is None): + raise ValueError("Missing the required parameter `owner_slug` when calling `metadata_post`") # noqa: E501 + # verify the required parameter 'dataset_slug' is set + if ('dataset_slug' not in params or + params['dataset_slug'] is None): + raise ValueError("Missing the required parameter `dataset_slug` when calling `metadata_post`") # noqa: E501 + # verify the required parameter 'settings' is set + if ('settings' not in params or + params['settings'] is None): + raise ValueError("Missing the required parameter `settings` when calling `metadata_post`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'owner_slug' in params: + path_params['ownerSlug'] = params['owner_slug'] # noqa: E501 + if 'dataset_slug' in params: + path_params['datasetSlug'] = params['dataset_slug'] # noqa: E501 + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + if 'settings' in params: + body_params = params['settings'] + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # HTTP header `Content-Type` + header_params['Content-Type'] = self.api_client.select_header_content_type( # noqa: E501 + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['basicAuth'] # noqa: E501 + + return self.api_client.call_api( + '/datasets/metadata/{ownerSlug}/{datasetSlug}', 'POST', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Result', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) diff --git a/.local/lib/python3.8/site-packages/kaggle/api/kaggle_api_extended.py b/.local/lib/python3.8/site-packages/kaggle/api/kaggle_api_extended.py new file mode 100644 index 0000000..bbd74df --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/api/kaggle_api_extended.py @@ -0,0 +1,2637 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/python +# +# Copyright 2019 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding=utf-8 +from __future__ import print_function +import csv +from datetime import datetime +import io +import json +import os +from os.path import expanduser +from os.path import isfile +import sys +import shutil +import zipfile +import tempfile +from ..api_client import ApiClient +from kaggle.configuration import Configuration +from .kaggle_api import KaggleApi +from ..models.collaborator import Collaborator +from ..models.dataset_column import DatasetColumn +from ..models.dataset_new_request import DatasetNewRequest +from ..models.dataset_new_version_request import DatasetNewVersionRequest +from ..models.dataset_update_settings_request import DatasetUpdateSettingsRequest +from ..models.dataset_upload_file import DatasetUploadFile +from ..models.kaggle_models_extended import Competition +from ..models.kaggle_models_extended import Dataset +from ..models.kaggle_models_extended import DatasetNewResponse +from ..models.kaggle_models_extended import DatasetNewVersionResponse +from ..models.kaggle_models_extended import File +from ..models.kaggle_models_extended import FileUploadInfo +from ..models.kaggle_models_extended import Kernel +from ..models.kaggle_models_extended import KernelPushResponse +from ..models.kaggle_models_extended import LeaderboardEntry +from ..models.kaggle_models_extended import ListFilesResult +from ..models.kaggle_models_extended import Metadata +from ..models.kaggle_models_extended import Submission +from ..models.kaggle_models_extended import SubmitResult +from ..models.kernel_push_request import KernelPushRequest +from ..models.license import License +import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry +from ..rest import ApiException +import six +from slugify import slugify +from tqdm import tqdm + +try: + unicode # Python 2 +except NameError: + unicode = str # Python 3 + + +class KaggleApi(KaggleApi): + __version__ = '1.5.12' + + CONFIG_NAME_PROXY = 'proxy' + CONFIG_NAME_COMPETITION = 'competition' + CONFIG_NAME_PATH = 'path' + CONFIG_NAME_USER = 'username' + CONFIG_NAME_KEY = 'key' + CONFIG_NAME_SSL_CA_CERT = 'ssl_ca_cert' + + HEADER_API_VERSION = 'X-Kaggle-ApiVersion' + DATASET_METADATA_FILE = 'dataset-metadata.json' + OLD_DATASET_METADATA_FILE = 'datapackage.json' + KERNEL_METADATA_FILE = 'kernel-metadata.json' + + config_dir = os.environ.get('KAGGLE_CONFIG_DIR') or os.path.join( + expanduser('~'), '.kaggle') + if not os.path.exists(config_dir): + os.makedirs(config_dir) + + config_file = 'kaggle.json' + config = os.path.join(config_dir, config_file) + config_values = {} + already_printed_version_warning = False + + # Kernels valid types + valid_push_kernel_types = ['script', 'notebook'] + valid_push_language_types = ['python', 'r', 'rmarkdown'] + valid_push_pinning_types = ['original', 'latest'] + valid_list_languages = ['all', 'python', 'r', 'sqlite', 'julia'] + valid_list_kernel_types = ['all', 'script', 'notebook'] + valid_list_output_types = ['all', 'visualization', 'data'] + valid_list_sort_by = [ + 'hotness', 'commentCount', 'dateCreated', 'dateRun', 'relevance', + 'scoreAscending', 'scoreDescending', 'viewCount', 'voteCount' + ] + + # Competitoins valid types + valid_competition_groups = ['general', 'entered', 'inClass'] + valid_competition_categories = [ + 'all', 'featured', 'research', 'recruitment', 'gettingStarted', + 'masters', 'playground' + ] + valid_competition_sort_by = [ + 'grouped', 'prize', 'earliestDeadline', 'latestDeadline', + 'numberOfTeams', 'recentlyCreated' + ] + + # Datasets valid types + valid_dataset_file_types = ['all', 'csv', 'sqlite', 'json', 'bigQuery'] + valid_dataset_license_names = ['all', 'cc', 'gpl', 'odb', 'other'] + valid_dataset_sort_bys = [ + 'hottest', 'votes', 'updated', 'active', 'published' + ] + + # Hack for https://github.com/Kaggle/kaggle-api/issues/22 / b/78194015 + if six.PY2: + reload(sys) + sys.setdefaultencoding('latin1') + + ## Authentication + + def authenticate(self): + """authenticate the user with the Kaggle API. This method will generate + a configuration, first checking the environment for credential + variables, and falling back to looking for the .kaggle/kaggle.json + configuration file. + """ + + config_data = {} + + # Step 1: try getting username/password from environment + config_data = self.read_config_environment(config_data) + + # Step 2: if credentials were not in env read in configuration file + if self.CONFIG_NAME_USER not in config_data \ + or self.CONFIG_NAME_KEY not in config_data: + if os.path.exists(self.config): + config_data = self.read_config_file(config_data) + else: + raise IOError('Could not find {}. Make sure it\'s located in' + ' {}. Or use the environment method.'.format( + self.config_file, self.config_dir)) + + # Step 3: load into configuration! + self._load_config(config_data) + + def read_config_environment(self, config_data=None, quiet=False): + """read_config_environment is the second effort to get a username + and key to authenticate to the Kaggle API. The environment keys + are equivalent to the kaggle.json file, but with "KAGGLE_" prefix + to define a unique namespace. + + Parameters + ========== + config_data: a partially loaded configuration dictionary (optional) + quiet: suppress verbose print of output (default is False) + """ + + # Add all variables that start with KAGGLE_ to config data + + if config_data is None: + config_data = {} + for key, val in os.environ.items(): + if key.startswith('KAGGLE_'): + config_key = key.replace('KAGGLE_', '', 1).lower() + config_data[config_key] = val + + return config_data + + ## Configuration + + def _load_config(self, config_data): + """the final step of the authenticate steps, where we load the values + from config_data into the Configuration object. + + Parameters + ========== + config_data: a dictionary with configuration values (keys) to read + into self.config_values + + """ + # Username and password are required. + + for item in [self.CONFIG_NAME_USER, self.CONFIG_NAME_KEY]: + if item not in config_data: + raise ValueError('Error: Missing %s in configuration.' % item) + + configuration = Configuration() + + # Add to the final configuration (required) + + configuration.username = config_data[self.CONFIG_NAME_USER] + configuration.password = config_data[self.CONFIG_NAME_KEY] + + # Proxy + + if self.CONFIG_NAME_PROXY in config_data: + configuration.proxy = config_data[self.CONFIG_NAME_PROXY] + + # Cert File + + if self.CONFIG_NAME_SSL_CA_CERT in config_data: + configuration.ssl_ca_cert = config_data[ + self.CONFIG_NAME_SSL_CA_CERT] + + # Keep config values with class instance, and load api client! + + self.config_values = config_data + + try: + self.api_client = ApiClient(configuration) + + except Exception as error: + + if 'Proxy' in type(error).__name__: + raise ValueError( + 'The specified proxy ' + + config_data[self.CONFIG_NAME_PROXY] + + ' is not valid, please check your proxy settings') + else: + raise ValueError( + 'Unauthorized: you must download an API key or export ' + 'credentials to the environment. Please see\n ' + + 'https://github.com/Kaggle/kaggle-api#api-credentials ' + + 'for instructions.') + + def read_config_file(self, config_data=None, quiet=False): + """read_config_file is the first effort to get a username + and key to authenticate to the Kaggle API. Since we can get the + username and password from the environment, it's not required. + + Parameters + ========== + config_data: the Configuration object to save a username and + password, if defined + quiet: suppress verbose print of output (default is False) + """ + if config_data is None: + config_data = {} + + if os.path.exists(self.config): + + try: + if os.name != 'nt': + permissions = os.stat(self.config).st_mode + if (permissions & 4) or (permissions & 32): + print( + 'Warning: Your Kaggle API key is readable by other ' + 'users on this system! To fix this, you can run ' + + '\'chmod 600 {}\''.format(self.config)) + + with open(self.config) as f: + config_data = json.load(f) + except: + pass + + else: + + # Warn the user that configuration will be reliant on environment + if not quiet: + print('No Kaggle API config file found, will use environment.') + + return config_data + + def _read_config_file(self): + """read in the configuration file, a json file defined at self.config""" + + try: + with open(self.config, 'r') as f: + config_data = json.load(f) + except FileNotFoundError: + config_data = {} + + return config_data + + def _write_config_file(self, config_data, indent=2): + """write config data to file. + + Parameters + ========== + config_data: the Configuration object to save a username and + password, if defined + indent: number of tab indentations to use when writing json + """ + with open(self.config, 'w') as f: + json.dump(config_data, f, indent=indent) + + def set_config_value(self, name, value, quiet=False): + """a client helper function to set a configuration value, meaning + reading in the configuration file (if it exists), saving a new + config value, and then writing back + + Parameters + ========== + name: the name of the value to set (key in dictionary) + value: the value to set at the key + quiet: disable verbose output if True (default is False) + """ + + config_data = self._read_config_file() + + if value is not None: + + # Update the config file with the value + config_data[name] = value + + # Update the instance with the value + self.config_values[name] = value + + # If defined by client, set and save! + self._write_config_file(config_data) + + if not quiet: + self.print_config_value(name, separator=' is now set to: ') + + def unset_config_value(self, name, quiet=False): + """unset a configuration value + Parameters + ========== + name: the name of the value to unset (remove key in dictionary) + quiet: disable verbose output if True (default is False) + """ + + config_data = self._read_config_file() + + if name in config_data: + + del config_data[name] + + self._write_config_file(config_data) + + if not quiet: + self.print_config_value(name, separator=' is now set to: ') + + def get_config_value(self, name): + """ return a config value (with key name) if it's in the config_values, + otherwise return None + + Parameters + ========== + name: the config value key to get + + """ + if name in self.config_values: + return self.config_values[name] + + def get_default_download_dir(self, *subdirs): + """ Get the download path for a file. If not defined, return default + from config. + + Parameters + ========== + subdirs: a single (or list of) subfolders under the basepath + """ + # Look up value for key "path" in the config + path = self.get_config_value(self.CONFIG_NAME_PATH) + + # If not set in config, default to present working directory + if path is None: + return os.getcwd() + + return os.path.join(path, *subdirs) + + def print_config_value(self, name, prefix='- ', separator=': '): + """print a single configuration value, based on a prefix and separator + + Parameters + ========== + name: the key of the config valur in self.config_values to print + prefix: the prefix to print + separator: the separator to use (default is : ) + """ + + value_out = 'None' + if name in self.config_values and self.config_values[name] is not None: + value_out = self.config_values[name] + print(prefix + name + separator + value_out) + + def print_config_values(self, prefix='- '): + """a wrapper to print_config_value to print all configuration values + Parameters + ========== + prefix: the character prefix to put before the printed config value + defaults to "- " + """ + print('Configuration values from ' + self.config_dir) + self.print_config_value(self.CONFIG_NAME_USER, prefix=prefix) + self.print_config_value(self.CONFIG_NAME_PATH, prefix=prefix) + self.print_config_value(self.CONFIG_NAME_PROXY, prefix=prefix) + self.print_config_value(self.CONFIG_NAME_COMPETITION, prefix=prefix) + + ## Competitions + + def competitions_list(self, + group=None, + category=None, + sort_by=None, + page=1, + search=None): + """ make call to list competitions, format the response, and return + a list of Competition instances + + Parameters + ========== + + page: the page to return (default is 1) + search: a search term to use (default is empty string) + sort_by: how to sort the result, see valid_competition_sort_by for options + category: category to filter result to + group: group to filter result to + """ + if group and group not in self.valid_competition_groups: + raise ValueError('Invalid group specified. Valid options are ' + + str(self.valid_competition_groups)) + + if category and category not in self.valid_competition_categories: + raise ValueError('Invalid category specified. Valid options are ' + + str(self.valid_competition_categories)) + + if sort_by and sort_by not in self.valid_competition_sort_by: + raise ValueError('Invalid sort_by specified. Valid options are ' + + str(self.valid_competition_sort_by)) + + competitions_list_result = self.process_response( + self.competitions_list_with_http_info(group=group or '', + category=category or '', + sort_by=sort_by or '', + page=page, + search=search or '')) + return [Competition(c) for c in competitions_list_result] + + def competitions_list_cli(self, + group=None, + category=None, + sort_by=None, + page=1, + search=None, + csv_display=False): + """ a wrapper for competitions_list for the client. + + Parameters + ========== + group: group to filter result to + category: category to filter result to + sort_by: how to sort the result, see valid_sort_by for options + page: the page to return (default is 1) + search: a search term to use (default is empty string) + csv_display: if True, print comma separated values + """ + competitions = self.competitions_list(group=group, + category=category, + sort_by=sort_by, + page=page, + search=search) + fields = [ + 'ref', 'deadline', 'category', 'reward', 'teamCount', + 'userHasEntered' + ] + if competitions: + if csv_display: + self.print_csv(competitions, fields) + else: + self.print_table(competitions, fields) + else: + print('No competitions found') + + def competition_submit(self, file_name, message, competition, quiet=False): + """ submit a competition! + + Parameters + ========== + file_name: the competition metadata file + message: the submission description + competition: the competition name + quiet: suppress verbose output (default is False) + """ + if competition is None: + competition = self.get_config_value(self.CONFIG_NAME_COMPETITION) + if competition is not None and not quiet: + print('Using competition: ' + competition) + + if competition is None: + raise ValueError('No competition specified') + else: + url_result = self.process_response( + self.competitions_submissions_url_with_http_info( + id=competition, + file_name=os.path.basename(file_name), + content_length=os.path.getsize(file_name), + last_modified_date_utc=int(os.path.getmtime(file_name)))) + + # Temporary while new worker is gradually turned on. 'isComplete' + # exists on the old DTO but not the new, so this is an hacky but + # easy solution to figure out which submission logic to use + if 'isComplete' in url_result: + # Old submissions path + url_result_list = url_result['createUrl'].split('/') + upload_result = self.process_response( + self.competitions_submissions_upload_with_http_info( + file=file_name, + guid=url_result_list[-3], + content_length=url_result_list[-2], + last_modified_date_utc=url_result_list[-1])) + upload_result_token = upload_result['token'] + else: + # New submissions path! + success = self.upload_complete(file_name, + url_result['createUrl'], quiet) + if not success: + # Actual error is printed during upload_complete. Not + # ideal but changing would not be backwards compatible + return "Could not submit to competition" + + upload_result_token = url_result['token'] + + submit_result = self.process_response( + self.competitions_submissions_submit_with_http_info( + id=competition, + blob_file_tokens=upload_result_token, + submission_description=message)) + return SubmitResult(submit_result) + + def competition_submit_cli(self, + file_name, + message, + competition, + competition_opt=None, + quiet=False): + """ submit a competition using the client. Arguments are same as for + competition_submit, except for extra arguments provided here. + Parameters + ========== + competition_opt: an alternative competition option provided by cli + """ + competition = competition or competition_opt + try: + submit_result = self.competition_submit(file_name, message, + competition, quiet) + except ApiException as e: + if e.status == 404: + print('Could not find competition - please verify that you ' + 'entered the correct competition ID and that the ' + 'competition is still accepting submissions.') + return None + else: + raise e + return submit_result + + def competition_submissions(self, competition): + """ get the list of Submission for a particular competition + + Parameters + ========== + competition: the name of the competition + """ + submissions_result = self.process_response( + self.competitions_submissions_list_with_http_info(id=competition)) + return [Submission(s) for s in submissions_result] + + def competition_submissions_cli(self, + competition=None, + competition_opt=None, + csv_display=False, + quiet=False): + """ wrapper to competition_submission, will return either json or csv + to the user. Additional parameters are listed below, see + competition_submissions for rest. + + Parameters + ========== + competition: the name of the competition. If None, look to config + competition_opt: an alternative competition option provided by cli + csv_display: if True, print comma separated values + quiet: suppress verbose output (default is False) + """ + competition = competition or competition_opt + if competition is None: + competition = self.get_config_value(self.CONFIG_NAME_COMPETITION) + if competition is not None and not quiet: + print('Using competition: ' + competition) + + if competition is None: + raise ValueError('No competition specified') + else: + submissions = self.competition_submissions(competition) + fields = [ + 'fileName', 'date', 'description', 'status', 'publicScore', + 'privateScore' + ] + if submissions: + if csv_display: + self.print_csv(submissions, fields) + else: + self.print_table(submissions, fields) + else: + print('No submissions found') + + def competition_list_files(self, competition): + """ list files for competition + Parameters + ========== + competition: the name of the competition + """ + competition_list_files_result = self.process_response( + self.competitions_data_list_files_with_http_info(id=competition)) + return [File(f) for f in competition_list_files_result] + + def competition_list_files_cli(self, + competition, + competition_opt=None, + csv_display=False, + quiet=False): + """ List files for a competition, if it exists + + Parameters + ========== + competition: the name of the competition. If None, look to config + competition_opt: an alternative competition option provided by cli + csv_display: if True, print comma separated values + quiet: suppress verbose output (default is False) + """ + competition = competition or competition_opt + if competition is None: + competition = self.get_config_value(self.CONFIG_NAME_COMPETITION) + if competition is not None and not quiet: + print('Using competition: ' + competition) + + if competition is None: + raise ValueError('No competition specified') + else: + files = self.competition_list_files(competition) + fields = ['name', 'size', 'creationDate'] + if files: + if csv_display: + self.print_csv(files, fields) + else: + self.print_table(files, fields) + else: + print('No files found') + + def competition_download_file(self, + competition, + file_name, + path=None, + force=False, + quiet=False): + """ download a competition file to a designated location, or use + a default location + + Paramters + ========= + competition: the name of the competition + file_name: the configuration file name + path: a path to download the file to + force: force the download if the file already exists (default False) + quiet: suppress verbose output (default is False) + """ + if path is None: + effective_path = self.get_default_download_dir( + 'competitions', competition) + else: + effective_path = path + + response = self.process_response( + self.competitions_data_download_file_with_http_info( + id=competition, file_name=file_name, _preload_content=False)) + url = response.retries.history[0].redirect_location.split('?')[0] + outfile = os.path.join(effective_path, url.split('/')[-1]) + + if force or self.download_needed(response, outfile, quiet): + self.download_file(response, outfile, quiet) + + def competition_download_files(self, + competition, + path=None, + force=False, + quiet=True): + """ downloads all competition files. + + Parameters + ========= + competition: the name of the competition + path: a path to download the file to + force: force the download if the file already exists (default False) + quiet: suppress verbose output (default is True) + """ + if path is None: + effective_path = self.get_default_download_dir( + 'competitions', competition) + else: + effective_path = path + + response = self.process_response( + self.competitions_data_download_files_with_http_info( + id=competition, _preload_content=False)) + url = response.retries.history[0].redirect_location.split('?')[0] + outfile = os.path.join(effective_path, + competition + '.' + url.split('.')[-1]) + + if force or self.download_needed(response, outfile, quiet): + self.download_file(response, outfile, quiet) + + def competition_download_cli(self, + competition, + competition_opt=None, + file_name=None, + path=None, + force=False, + quiet=False): + """ a wrapper to competition_download_files, but first will parse input + from API client. Additional parameters are listed here, see + competition_download for remaining. + + Parameters + ========= + competition: the name of the competition + competition_opt: an alternative competition option provided by cli + file_name: the configuration file name + path: a path to download the file to + force: force the download if the file already exists (default False) + quiet: suppress verbose output (default is False) + """ + competition = competition or competition_opt + if competition is None: + competition = self.get_config_value(self.CONFIG_NAME_COMPETITION) + if competition is not None and not quiet: + print('Using competition: ' + competition) + + if competition is None: + raise ValueError('No competition specified') + else: + if file_name is None: + self.competition_download_files(competition, path, force, + quiet) + else: + self.competition_download_file(competition, file_name, path, + force, quiet) + + def competition_leaderboard_download(self, competition, path, quiet=True): + """ Download competition leaderboards + + Parameters + ========= + competition: the name of the competition + path: a path to download the file to + quiet: suppress verbose output (default is True) + """ + response = self.process_response( + self.competition_download_leaderboard_with_http_info( + competition, _preload_content=False)) + + if path is None: + effective_path = self.get_default_download_dir( + 'competitions', competition) + else: + effective_path = path + + file_name = competition + '.zip' + outfile = os.path.join(effective_path, file_name) + self.download_file(response, outfile, quiet) + + def competition_leaderboard_view(self, competition): + """ view a leaderboard based on a competition name + + Parameters + ========== + competition: the competition name to view leadboard for + """ + result = self.process_response( + self.competition_view_leaderboard_with_http_info(competition)) + return [LeaderboardEntry(e) for e in result['submissions']] + + def competition_leaderboard_cli(self, + competition, + competition_opt=None, + path=None, + view=False, + download=False, + csv_display=False, + quiet=False): + """ a wrapper for competition_leaderbord_view that will print the + results as a table or comma separated values + + Parameters + ========== + competition: the competition name to view leadboard for + competition_opt: an alternative competition option provided by cli + path: a path to download to, if download is True + view: if True, show the results in the terminal as csv or table + download: if True, download the entire leaderboard + csv_display: if True, print comma separated values instead of table + quiet: suppress verbose output (default is False) + """ + competition = competition or competition_opt + if not view and not download: + raise ValueError('Either --show or --download must be specified') + + if competition is None: + competition = self.get_config_value(self.CONFIG_NAME_COMPETITION) + if competition is not None and not quiet: + print('Using competition: ' + competition) + + if competition is None: + raise ValueError('No competition specified') + + if download: + self.competition_leaderboard_download(competition, path, quiet) + + if view: + results = self.competition_leaderboard_view(competition) + fields = ['teamId', 'teamName', 'submissionDate', 'score'] + if results: + if csv_display: + self.print_csv(results, fields) + else: + self.print_table(results, fields) + else: + print('No results found') + + def dataset_list(self, + sort_by=None, + size=None, + file_type=None, + license_name=None, + tag_ids=None, + search=None, + user=None, + mine=False, + page=1, + max_size=None, + min_size=None): + """ return a list of datasets! + + Parameters + ========== + sort_by: how to sort the result, see valid_dataset_sort_bys for options + size: Deprecated + file_type: the format, see valid_dataset_file_types for string options + license_name: string descriptor for license, see valid_dataset_license_names + tag_ids: tag identifiers to filter the search + search: a search term to use (default is empty string) + user: username to filter the search to + mine: boolean if True, group is changed to "my" to return personal + page: the page to return (default is 1) + max_size: the maximum size of the dataset to return (bytes) + min_size: the minimum size of the dataset to return (bytes) + """ + if sort_by and sort_by not in self.valid_dataset_sort_bys: + raise ValueError('Invalid sort by specified. Valid options are ' + + str(self.valid_dataset_sort_bys)) + + if size: + raise ValueError( + 'The --size parameter has been deprecated. ' + + 'Please use --max-size and --min-size to filter dataset sizes.' + ) + + if file_type and file_type not in self.valid_dataset_file_types: + raise ValueError( + 'Invalid file type specified. Valid options are ' + + str(self.valid_dataset_file_types)) + + if license_name and license_name not in self.valid_dataset_license_names: + raise ValueError('Invalid license specified. Valid options are ' + + str(self.valid_dataset_license_names)) + + if int(page) <= 0: + raise ValueError('Page number must be >= 1') + + if max_size and min_size: + if (int(max_size) < int(min_size)): + raise ValueError('Max Size must be max_size >= min_size') + if (max_size and int(max_size) <= 0): + raise ValueError('Max Size must be > 0') + elif (min_size and int(min_size) < 0): + raise ValueError('Min Size must be >= 0') + + group = 'public' + if mine: + group = 'my' + if user: + raise ValueError('Cannot specify both mine and a user') + if user: + group = 'user' + + datasets_list_result = self.process_response( + self.datasets_list_with_http_info(group=group, + sort_by=sort_by or 'hottest', + size=size, + filetype=file_type or 'all', + license=license_name or 'all', + tagids=tag_ids or '', + search=search or '', + user=user or '', + page=page, + max_size=max_size, + min_size=min_size)) + return [Dataset(d) for d in datasets_list_result] + + def dataset_list_cli(self, + sort_by=None, + size=None, + file_type=None, + license_name=None, + tag_ids=None, + search=None, + user=None, + mine=False, + page=1, + csv_display=False, + max_size=None, + min_size=None): + """ a wrapper to datasets_list for the client. Additional parameters + are described here, see dataset_list for others. + + Parameters + ========== + sort_by: how to sort the result, see valid_dataset_sort_bys for options + size: DEPRECATED + file_type: the format, see valid_dataset_file_types for string options + license_name: string descriptor for license, see valid_dataset_license_names + tag_ids: tag identifiers to filter the search + search: a search term to use (default is empty string) + user: username to filter the search to + mine: boolean if True, group is changed to "my" to return personal + page: the page to return (default is 1) + csv_display: if True, print comma separated values instead of table + max_size: the maximum size of the dataset to return (bytes) + min_size: the minimum size of the dataset to return (bytes) + """ + datasets = self.dataset_list(sort_by, size, file_type, license_name, + tag_ids, search, user, mine, page, + max_size, min_size) + fields = [ + 'ref', 'title', 'size', 'lastUpdated', 'downloadCount', + 'voteCount', 'usabilityRating' + ] + if datasets: + if csv_display: + self.print_csv(datasets, fields) + else: + self.print_table(datasets, fields) + else: + print('No datasets found') + + def dataset_view(self, dataset): + """ view metadata for a dataset. + + Parameters + ========== + dataset: the string identified of the dataset + should be in format [owner]/[dataset-name] + """ + if '/' in dataset: + self.validate_dataset_string(dataset) + dataset_urls = dataset.split('/') + owner_slug = dataset_urls[0] + dataset_slug = dataset_urls[1] + else: + owner_slug = self.get_config_value(self.CONFIG_NAME_USER) + dataset_slug = dataset + + result = self.process_response( + self.datasets_view_with_http_info(owner_slug, dataset_slug)) + return Dataset(result) + + def dataset_metadata_prep(self, dataset, path): + if dataset is None: + raise ValueError('A dataset must be specified') + if '/' in dataset: + self.validate_dataset_string(dataset) + dataset_urls = dataset.split('/') + owner_slug = dataset_urls[0] + dataset_slug = dataset_urls[1] + else: + owner_slug = self.get_config_value(self.CONFIG_NAME_USER) + dataset_slug = dataset + + if path is None: + effective_path = self.get_default_download_dir( + 'datasets', owner_slug, dataset_slug) + else: + effective_path = path + + return (owner_slug, dataset_slug, effective_path) + + def dataset_metadata_update(self, dataset, path): + (owner_slug, dataset_slug, + effective_path) = self.dataset_metadata_prep(dataset, path) + meta_file = self.get_dataset_metadata_file(effective_path) + with open(meta_file, 'r') as f: + metadata = json.load(f) + updateSettingsRequest = DatasetUpdateSettingsRequest( + title=metadata['title'], + subtitle=metadata['subtitle'], + description=metadata['description'], + is_private=metadata['isPrivate'], + licenses=[ + License(name=l['name']) for l in metadata['licenses'] + ], + keywords=metadata['keywords'], + collaborators=[ + Collaborator(username=c['username'], role=c['role']) + for c in metadata['collaborators'] + ], + data=metadata['data']) + result = self.process_response( + self.metadata_post_with_http_info(owner_slug, dataset_slug, + updateSettingsRequest)) + if (len(result['errors']) > 0): + [print(e['message']) for e in result['errors']] + exit(1) + + def dataset_metadata(self, dataset, path): + (owner_slug, dataset_slug, + effective_path) = self.dataset_metadata_prep(dataset, path) + + if not os.path.exists(effective_path): + os.makedirs(effective_path) + + result = self.process_response( + self.metadata_get_with_http_info(owner_slug, dataset_slug)) + if (result['errorMessage']): + raise Exception(result['errorMessage']) + + metadata = Metadata(result['info']) + + meta_file = os.path.join(effective_path, self.DATASET_METADATA_FILE) + with open(meta_file, 'w') as f: + json.dump(metadata, f, indent=2, default=lambda o: o.__dict__) + + return meta_file + + def dataset_metadata_cli(self, dataset, path, update, dataset_opt=None): + dataset = dataset or dataset_opt + if (update): + print('updating dataset metadata') + self.dataset_metadata_update(dataset, path) + print('successfully updated dataset metadata') + else: + meta_file = self.dataset_metadata(dataset, path) + print('Downloaded metadata to ' + meta_file) + + def dataset_list_files(self, dataset): + """ list files for a dataset + Parameters + ========== + dataset: the string identified of the dataset + should be in format [owner]/[dataset-name] + """ + if dataset is None: + raise ValueError('A dataset must be specified') + if '/' in dataset: + self.validate_dataset_string(dataset) + dataset_urls = dataset.split('/') + owner_slug = dataset_urls[0] + dataset_slug = dataset_urls[1] + else: + owner_slug = self.get_config_value(self.CONFIG_NAME_USER) + dataset_slug = dataset + dataset_list_files_result = self.process_response( + self.datasets_list_files_with_http_info(owner_slug=owner_slug, + dataset_slug=dataset_slug)) + return ListFilesResult(dataset_list_files_result) + + def dataset_list_files_cli(self, + dataset, + dataset_opt=None, + csv_display=False): + """ a wrapper to dataset_list_files for the client + (list files for a dataset) + Parameters + ========== + dataset: the string identified of the dataset + should be in format [owner]/[dataset-name] + dataset_opt: an alternative option to providing a dataset + csv_display: if True, print comma separated values instead of table + """ + dataset = dataset or dataset_opt + result = self.dataset_list_files(dataset) + if result: + if result.error_message: + print(result.error_message) + else: + fields = ['name', 'size', 'creationDate'] + if csv_display: + self.print_csv(result.files, fields) + else: + self.print_table(result.files, fields) + else: + print('No files found') + + def dataset_status(self, dataset): + """ call to get the status of a dataset from the API + Parameters + ========== + dataset: the string identified of the dataset + should be in format [owner]/[dataset-name] + """ + if dataset is None: + raise ValueError('A dataset must be specified') + if '/' in dataset: + self.validate_dataset_string(dataset) + dataset_urls = dataset.split('/') + owner_slug = dataset_urls[0] + dataset_slug = dataset_urls[1] + else: + owner_slug = self.get_config_value(self.CONFIG_NAME_USER) + dataset_slug = dataset + dataset_status_result = self.process_response( + self.datasets_status_with_http_info(owner_slug=owner_slug, + dataset_slug=dataset_slug)) + return dataset_status_result + + def dataset_status_cli(self, dataset, dataset_opt=None): + """ wrapper for client for dataset_status, with additional + dataset_opt to get the status of a dataset from the API + Parameters + ========== + dataset_opt: an alternative to dataset + """ + dataset = dataset or dataset_opt + return self.dataset_status(dataset) + + def dataset_download_file(self, + dataset, + file_name, + path=None, + force=False, + quiet=True): + """ download a single file for a dataset + + Parameters + ========== + dataset: the string identified of the dataset + should be in format [owner]/[dataset-name] + file_name: the dataset configuration file + path: if defined, download to this location + force: force the download if the file already exists (default False) + quiet: suppress verbose output (default is True) + """ + if '/' in dataset: + self.validate_dataset_string(dataset) + dataset_urls = dataset.split('/') + owner_slug = dataset_urls[0] + dataset_slug = dataset_urls[1] + else: + owner_slug = self.get_config_value(self.CONFIG_NAME_USER) + dataset_slug = dataset + + if path is None: + effective_path = self.get_default_download_dir( + 'datasets', owner_slug, dataset_slug) + else: + effective_path = path + + response = self.process_response( + self.datasets_download_file_with_http_info( + owner_slug=owner_slug, + dataset_slug=dataset_slug, + file_name=file_name, + _preload_content=False)) + url = response.retries.history[0].redirect_location.split('?')[0] + outfile = os.path.join(effective_path, url.split('/')[-1]) + if force or self.download_needed(response, outfile, quiet): + self.download_file(response, outfile, quiet) + return True + else: + return False + + def dataset_download_files(self, + dataset, + path=None, + force=False, + quiet=True, + unzip=False): + """ download all files for a dataset + + Parameters + ========== + dataset: the string identified of the dataset + should be in format [owner]/[dataset-name] + path: the path to download the dataset to + force: force the download if the file already exists (default False) + quiet: suppress verbose output (default is True) + unzip: if True, unzip files upon download (default is False) + """ + if dataset is None: + raise ValueError('A dataset must be specified') + if '/' in dataset: + self.validate_dataset_string(dataset) + dataset_urls = dataset.split('/') + owner_slug = dataset_urls[0] + dataset_slug = dataset_urls[1] + else: + owner_slug = self.get_config_value(self.CONFIG_NAME_USER) + dataset_slug = dataset + + if path is None: + effective_path = self.get_default_download_dir( + 'datasets', owner_slug, dataset_slug) + else: + effective_path = path + + response = self.process_response( + self.datasets_download_with_http_info(owner_slug=owner_slug, + dataset_slug=dataset_slug, + _preload_content=False)) + + outfile = os.path.join(effective_path, dataset_slug + '.zip') + if force or self.download_needed(response, outfile, quiet): + self.download_file(response, outfile, quiet) + downloaded = True + else: + downloaded = False + + if downloaded: + outfile = os.path.join(effective_path, dataset_slug + '.zip') + if unzip: + try: + with zipfile.ZipFile(outfile) as z: + z.extractall(effective_path) + except zipfile.BadZipFile as e: + raise ValueError( + 'Bad zip file, please report on ' + 'www.github.com/kaggle/kaggle-api', e) + + try: + os.remove(outfile) + except OSError as e: + print('Could not delete zip file, got %s' % e) + + def dataset_download_cli(self, + dataset, + dataset_opt=None, + file_name=None, + path=None, + unzip=False, + force=False, + quiet=False): + """ client wrapper for dataset_download_files and download dataset file, + either for a specific file (when file_name is provided), + or all files for a dataset (plural) + + Parameters + ========== + dataset: the string identified of the dataset + should be in format [owner]/[dataset-name] + dataset_opt: an alternative option to providing a dataset + file_name: the dataset configuration file + path: the path to download the dataset to + force: force the download if the file already exists (default False) + quiet: suppress verbose output (default is False) + unzip: if True, unzip files upon download (default is False) + path: the path to download the dataset to + """ + dataset = dataset or dataset_opt + if file_name is None: + self.dataset_download_files(dataset, + path=path, + unzip=unzip, + force=force, + quiet=quiet) + else: + self.dataset_download_file(dataset, + file_name, + path=path, + force=force, + quiet=quiet) + + def dataset_upload_file(self, path, quiet): + """ upload a dataset file + + Parameters + ========== + path: the complete path to upload + quiet: suppress verbose output (default is False) + """ + file_name = os.path.basename(path) + content_length = os.path.getsize(path) + last_modified_date_utc = int(os.path.getmtime(path)) + result = FileUploadInfo( + self.process_response( + self.datasets_upload_file_with_http_info( + file_name, content_length, last_modified_date_utc))) + + success = self.upload_complete(path, result.createUrl, quiet) + + if success: + return result.token + return None + + def dataset_create_version(self, + folder, + version_notes, + quiet=False, + convert_to_csv=True, + delete_old_versions=False, + dir_mode='skip'): + """ create a version of a dataset + + Parameters + ========== + folder: the folder with the dataset configuration / data files + version_notes: notes to add for the version + quiet: suppress verbose output (default is False) + convert_to_csv: on upload, if data should be converted to csv + delete_old_versions: if True, do that (default False) + dir_mode: What to do with directories: "skip" - ignore; "zip" - compress and upload + """ + if not os.path.isdir(folder): + raise ValueError('Invalid folder: ' + folder) + + meta_file = self.get_dataset_metadata_file(folder) + + # read json + with open(meta_file) as f: + meta_data = json.load(f) + ref = self.get_or_default(meta_data, 'id', None) + id_no = self.get_or_default(meta_data, 'id_no', None) + if not ref and not id_no: + raise ValueError('ID or slug must be specified in the metadata') + + subtitle = meta_data.get('subtitle') + if subtitle and (len(subtitle) < 20 or len(subtitle) > 80): + raise ValueError( + 'Subtitle length must be between 20 and 80 characters') + resources = meta_data.get('resources') + if resources: + self.validate_resources(folder, resources) + + description = meta_data.get('description') + keywords = self.get_or_default(meta_data, 'keywords', []) + + request = DatasetNewVersionRequest( + version_notes=version_notes, + subtitle=subtitle, + description=description, + files=[], + convert_to_csv=convert_to_csv, + category_ids=keywords, + delete_old_versions=delete_old_versions) + self.upload_files(request, resources, folder, quiet, dir_mode) + + if id_no: + result = DatasetNewVersionResponse( + self.process_response( + self.datasets_create_version_by_id_with_http_info( + id_no, request))) + else: + if ref == self.config_values[ + self.CONFIG_NAME_USER] + '/INSERT_SLUG_HERE': + raise ValueError( + 'Default slug detected, please change values before ' + 'uploading') + self.validate_dataset_string(ref) + ref_list = ref.split('/') + owner_slug = ref_list[0] + dataset_slug = ref_list[1] + result = DatasetNewVersionResponse( + self.process_response( + self.datasets_create_version_with_http_info( + owner_slug, dataset_slug, request))) + + return result + + def dataset_create_version_cli(self, + folder, + version_notes, + quiet=False, + convert_to_csv=True, + delete_old_versions=False, + dir_mode='skip'): + """ client wrapper for creating a version of a dataset + Parameters + ========== + folder: the folder with the dataset configuration / data files + version_notes: notes to add for the version + quiet: suppress verbose output (default is False) + convert_to_csv: on upload, if data should be converted to csv + delete_old_versions: if True, do that (default False) + dir_mode: What to do with directories: "skip" - ignore; "zip" - compress and upload + """ + folder = folder or os.getcwd() + result = self.dataset_create_version( + folder, + version_notes, + quiet=quiet, + convert_to_csv=convert_to_csv, + delete_old_versions=delete_old_versions, + dir_mode=dir_mode) + if result.invalidTags: + print( + ('The following are not valid tags and could not be added to ' + 'the dataset: ') + str(result.invalidTags)) + if result is None: + print('Dataset version creation error: See previous output') + elif result.status.lower() == 'ok': + print( + 'Dataset version is being created. Please check progress at ' + + result.url) + else: + print('Dataset version creation error: ' + result.error) + + def dataset_initialize(self, folder): + """ initialize a folder with a a dataset configuration (metadata) file + + Parameters + ========== + folder: the folder to initialize the metadata file in + """ + if not os.path.isdir(folder): + raise ValueError('Invalid folder: ' + folder) + + ref = self.config_values[self.CONFIG_NAME_USER] + '/INSERT_SLUG_HERE' + licenses = [] + default_license = {'name': 'CC0-1.0'} + licenses.append(default_license) + + meta_data = { + 'title': 'INSERT_TITLE_HERE', + 'id': ref, + 'licenses': licenses + } + meta_file = os.path.join(folder, self.DATASET_METADATA_FILE) + with open(meta_file, 'w') as f: + json.dump(meta_data, f, indent=2) + + print('Data package template written to: ' + meta_file) + return meta_file + + def dataset_initialize_cli(self, folder=None): + folder = folder or os.getcwd() + self.dataset_initialize(folder) + + def dataset_create_new(self, + folder, + public=False, + quiet=False, + convert_to_csv=True, + dir_mode='skip'): + """ create a new dataset, meaning the same as creating a version but + with extra metadata like license and user/owner. + Parameters + ========== + folder: the folder to initialize the metadata file in + public: should the dataset be public? + quiet: suppress verbose output (default is False) + convert_to_csv: if True, convert data to comma separated value + dir_mode: What to do with directories: "skip" - ignore; "zip" - compress and upload + """ + if not os.path.isdir(folder): + raise ValueError('Invalid folder: ' + folder) + + meta_file = self.get_dataset_metadata_file(folder) + + # read json + with open(meta_file) as f: + meta_data = json.load(f) + ref = self.get_or_fail(meta_data, 'id') + title = self.get_or_fail(meta_data, 'title') + licenses = self.get_or_fail(meta_data, 'licenses') + ref_list = ref.split('/') + owner_slug = ref_list[0] + dataset_slug = ref_list[1] + + # validations + if ref == self.config_values[ + self.CONFIG_NAME_USER] + '/INSERT_SLUG_HERE': + raise ValueError( + 'Default slug detected, please change values before uploading') + if title == 'INSERT_TITLE_HERE': + raise ValueError( + 'Default title detected, please change values before uploading' + ) + if len(licenses) != 1: + raise ValueError('Please specify exactly one license') + if len(dataset_slug) < 6 or len(dataset_slug) > 50: + raise ValueError( + 'The dataset slug must be between 6 and 50 characters') + if len(title) < 6 or len(title) > 50: + raise ValueError( + 'The dataset title must be between 6 and 50 characters') + resources = meta_data.get('resources') + if resources: + self.validate_resources(folder, resources) + + license_name = self.get_or_fail(licenses[0], 'name') + description = meta_data.get('description') + keywords = self.get_or_default(meta_data, 'keywords', []) + + subtitle = meta_data.get('subtitle') + if subtitle and (len(subtitle) < 20 or len(subtitle) > 80): + raise ValueError( + 'Subtitle length must be between 20 and 80 characters') + + request = DatasetNewRequest(title=title, + slug=dataset_slug, + owner_slug=owner_slug, + license_name=license_name, + subtitle=subtitle, + description=description, + files=[], + is_private=not public, + convert_to_csv=convert_to_csv, + category_ids=keywords) + resources = meta_data.get('resources') + self.upload_files(request, resources, folder, quiet, dir_mode) + result = DatasetNewResponse( + self.process_response( + self.datasets_create_new_with_http_info(request))) + + return result + + def dataset_create_new_cli(self, + folder=None, + public=False, + quiet=False, + convert_to_csv=True, + dir_mode='skip'): + """ client wrapper for creating a new dataset + Parameters + ========== + folder: the folder to initialize the metadata file in + public: should the dataset be public? + quiet: suppress verbose output (default is False) + convert_to_csv: if True, convert data to comma separated value + dir_mode: What to do with directories: "skip" - ignore; "zip" - compress and upload + """ + folder = folder or os.getcwd() + result = self.dataset_create_new(folder, public, quiet, convert_to_csv, + dir_mode) + if result.invalidTags: + print('The following are not valid tags and could not be added to ' + 'the dataset: ' + str(result.invalidTags)) + if result.status.lower() == 'ok': + if public: + print('Your public Dataset is being created. Please check ' + 'progress at ' + result.url) + else: + print('Your private Dataset is being created. Please check ' + 'progress at ' + result.url) + else: + print('Dataset creation error: ' + result.error) + + def download_file(self, response, outfile, quiet=True, chunk_size=1048576): + """ download a file to an output file based on a chunk size + + Parameters + ========== + response: the response to download + outfile: the output file to download to + quiet: suppress verbose output (default is True) + chunk_size: the size of the chunk to stream + """ + + outpath = os.path.dirname(outfile) + if not os.path.exists(outpath): + os.makedirs(outpath) + size = int(response.headers['Content-Length']) + size_read = 0 + if not quiet: + print('Downloading ' + os.path.basename(outfile) + ' to ' + + outpath) + with tqdm(total=size, + unit='B', + unit_scale=True, + unit_divisor=1024, + disable=quiet) as pbar: + with open(outfile, 'wb') as out: + while True: + data = response.read(chunk_size) + if not data: + break + out.write(data) + size_read = min(size, size_read + chunk_size) + pbar.update(len(data)) + if not quiet: + print('\n', end='') + + def kernels_list(self, + page=1, + page_size=20, + dataset=None, + competition=None, + parent_kernel=None, + search=None, + mine=False, + user=None, + language=None, + kernel_type=None, + output_type=None, + sort_by=None): + """ list kernels based on a set of search criteria + + Parameters + ========== + page: the page of results to return (default is 1) + page_size: results per page (default is 20) + dataset: if defined, filter to this dataset (default None) + competition: if defined, filter to this competition (default None) + parent_kernel: if defined, filter to those with specified parent + search: a custom search string to pass to the list query + mine: if true, group is specified as "my" to return personal kernels + user: filter results to a specific user + language: the programming language of the kernel + kernel_type: the type of kernel, one of valid_list_kernel_types (str) + output_type: the output type, one of valid_list_output_types (str) + sort_by: if defined, sort results by this string (valid_list_sort_by) + """ + if int(page) <= 0: + raise ValueError('Page number must be >= 1') + + page_size = int(page_size) + if page_size <= 0: + raise ValueError('Page size must be >= 1') + if page_size > 100: + page_size = 100 + + if language and language not in self.valid_list_languages: + raise ValueError('Invalid language specified. Valid options are ' + + str(self.valid_list_languages)) + + if kernel_type and kernel_type not in self.valid_list_kernel_types: + raise ValueError( + 'Invalid kernel type specified. Valid options are ' + + str(self.valid_list_kernel_types)) + + if output_type and output_type not in self.valid_list_output_types: + raise ValueError( + 'Invalid output type specified. Valid options are ' + + str(self.valid_list_output_types)) + + if sort_by and sort_by not in self.valid_list_sort_by: + raise ValueError( + 'Invalid sort by type specified. Valid options are ' + + str(self.valid_list_sort_by)) + + if sort_by == 'relevance' and search == '': + raise ValueError('Cannot sort by relevance without a search term.') + + self.validate_dataset_string(dataset) + self.validate_kernel_string(parent_kernel) + + group = 'everyone' + if mine: + group = 'profile' + + kernels_list_result = self.process_response( + self.kernels_list_with_http_info(page=page, + page_size=page_size, + group=group, + user=user or '', + language=language or 'all', + kernel_type=kernel_type or 'all', + output_type=output_type or 'all', + sort_by=sort_by or 'hotness', + dataset=dataset or '', + competition=competition or '', + parent_kernel=parent_kernel or '', + search=search or '')) + return [Kernel(k) for k in kernels_list_result] + + def kernels_list_cli(self, + mine=False, + page=1, + page_size=20, + search=None, + csv_display=False, + parent=None, + competition=None, + dataset=None, + user=None, + language=None, + kernel_type=None, + output_type=None, + sort_by=None): + """ client wrapper for kernels_list, see this function for arguments. + Additional arguments are provided here. + Parameters + ========== + csv_display: if True, print comma separated values instead of table + """ + kernels = self.kernels_list(page=page, + page_size=page_size, + search=search, + mine=mine, + dataset=dataset, + competition=competition, + parent_kernel=parent, + user=user, + language=language, + kernel_type=kernel_type, + output_type=output_type, + sort_by=sort_by) + fields = ['ref', 'title', 'author', 'lastRunTime', 'totalVotes'] + if kernels: + if csv_display: + self.print_csv(kernels, fields) + else: + self.print_table(kernels, fields) + else: + print('No kernels found') + + def kernels_initialize(self, folder): + """ create a new kernel in a specified folder from template, including + json metadata that grabs values from the configuration. + Parameters + ========== + folder: the path of the folder + """ + if not os.path.isdir(folder): + raise ValueError('Invalid folder: ' + folder) + + resources = [] + resource = {'path': 'INSERT_SCRIPT_PATH_HERE'} + resources.append(resource) + + username = self.get_config_value(self.CONFIG_NAME_USER) + meta_data = { + 'id': + username + '/INSERT_KERNEL_SLUG_HERE', + 'title': + 'INSERT_TITLE_HERE', + 'code_file': + 'INSERT_CODE_FILE_PATH_HERE', + 'language': + 'Pick one of: {' + + ','.join(x for x in self.valid_push_language_types) + '}', + 'kernel_type': + 'Pick one of: {' + + ','.join(x for x in self.valid_push_kernel_types) + '}', + 'is_private': + 'true', + 'enable_gpu': + 'false', + 'enable_internet': + 'true', + 'dataset_sources': [], + 'competition_sources': [], + 'kernel_sources': [], + } + meta_file = os.path.join(folder, self.KERNEL_METADATA_FILE) + with open(meta_file, 'w') as f: + json.dump(meta_data, f, indent=2) + + return meta_file + + def kernels_initialize_cli(self, folder=None): + """ client wrapper for kernels_initialize, takes same arguments but + sets default folder to be None. If None, defaults to present + working directory. + Parameters + ========== + folder: the path of the folder (None defaults to ${PWD}) + """ + folder = folder or os.getcwd() + meta_file = self.kernels_initialize(folder) + print('Kernel metadata template written to: ' + meta_file) + + def kernels_push(self, folder): + """ read the metadata file and kernel files from a notebook, validate + both, and use Kernel API to push to Kaggle if all is valid. + Parameters + ========== + folder: the path of the folder + """ + if not os.path.isdir(folder): + raise ValueError('Invalid folder: ' + folder) + + meta_file = os.path.join(folder, self.KERNEL_METADATA_FILE) + if not os.path.isfile(meta_file): + raise ValueError('Metadata file not found: ' + + self.KERNEL_METADATA_FILE) + + with open(meta_file) as f: + meta_data = json.load(f) + + title = self.get_or_default(meta_data, 'title', None) + if title and len(title) < 5: + raise ValueError('Title must be at least five characters') + + code_path = self.get_or_default(meta_data, 'code_file', '') + if not code_path: + raise ValueError('A source file must be specified in the metadata') + + code_file = os.path.join(folder, code_path) + if not os.path.isfile(code_file): + raise ValueError('Source file not found: ' + code_file) + + slug = meta_data.get('id') + id_no = meta_data.get('id_no') + if not slug and not id_no: + raise ValueError('ID or slug must be specified in the metadata') + if slug: + self.validate_kernel_string(slug) + if '/' in slug: + kernel_slug = slug.split('/')[1] + else: + kernel_slug = slug + if title: + as_slug = slugify(title) + if kernel_slug.lower() != as_slug: + print( + 'Your kernel title does not resolve to the specified ' + 'id. This may result in surprising behavior. We ' + 'suggest making your title something that resolves to ' + 'the specified id. See %s for more information on ' + 'how slugs are determined.' % + 'https://en.wikipedia.org/wiki/Clean_URL#Slug') + + language = self.get_or_default(meta_data, 'language', '') + if language not in self.valid_push_language_types: + raise ValueError( + 'A valid language must be specified in the metadata. Valid ' + 'options are ' + str(self.valid_push_language_types)) + + kernel_type = self.get_or_default(meta_data, 'kernel_type', '') + if kernel_type not in self.valid_push_kernel_types: + raise ValueError( + 'A valid kernel type must be specified in the metadata. Valid ' + 'options are ' + str(self.valid_push_kernel_types)) + + if kernel_type == 'notebook' and language == 'rmarkdown': + language = 'r' + + dataset_sources = self.get_or_default(meta_data, 'dataset_sources', []) + for source in dataset_sources: + self.validate_dataset_string(source) + + kernel_sources = self.get_or_default(meta_data, 'kernel_sources', []) + for source in kernel_sources: + self.validate_kernel_string(source) + + docker_pinning_type = self.get_or_default(meta_data, + 'docker_image_pinning_type', + None) + if (docker_pinning_type is not None + and docker_pinning_type not in self.valid_push_pinning_types): + raise ValueError( + 'If specified, the docker_image_pinning_type must be ' + 'one of ' + str(self.valid_push_pinning_types)) + + with open(code_file) as f: + script_body = f.read() + + if kernel_type == 'notebook': + json_body = json.loads(script_body) + if 'cells' in json_body: + for cell in json_body['cells']: + if 'outputs' in cell and cell['cell_type'] == 'code': + cell['outputs'] = [] + script_body = json.dumps(json_body) + + kernel_push_request = KernelPushRequest( + id=id_no, + slug=slug, + new_title=self.get_or_default(meta_data, 'title', None), + text=script_body, + language=language, + kernel_type=kernel_type, + is_private=self.get_or_default(meta_data, 'is_private', None), + enable_gpu=self.get_or_default(meta_data, 'enable_gpu', None), + enable_internet=self.get_or_default(meta_data, 'enable_internet', + None), + dataset_data_sources=dataset_sources, + competition_data_sources=self.get_or_default( + meta_data, 'competition_sources', []), + kernel_data_sources=kernel_sources, + category_ids=self.get_or_default(meta_data, 'keywords', []), + docker_image_pinning_type=docker_pinning_type) + + result = KernelPushResponse( + self.process_response( + self.kernel_push_with_http_info( + kernel_push_request=kernel_push_request))) + return result + + def kernels_push_cli(self, folder): + """ client wrapper for kernels_push, with same arguments. + """ + folder = folder or os.getcwd() + result = self.kernels_push(folder) + + if result is None: + print('Kernel push error: see previous output') + elif not result.error: + if result.invalidTags: + print( + 'The following are not valid tags and could not be added ' + 'to the kernel: ' + str(result.invalidTags)) + if result.invalidDatasetSources: + print( + 'The following are not valid dataset sources and could not ' + 'be added to the kernel: ' + + str(result.invalidDatasetSources)) + if result.invalidCompetitionSources: + print( + 'The following are not valid competition sources and could ' + 'not be added to the kernel: ' + + str(result.invalidCompetitionSources)) + if result.invalidKernelSources: + print( + 'The following are not valid kernel sources and could not ' + 'be added to the kernel: ' + + str(result.invalidKernelSources)) + + if result.versionNumber: + print('Kernel version %s successfully pushed. Please check ' + 'progress at %s' % (result.versionNumber, result.url)) + else: + # Shouldn't happen but didn't test exhaustively + print('Kernel version successfully pushed. Please check ' + 'progress at %s' % result.url) + else: + print('Kernel push error: ' + result.error) + + def kernels_pull(self, kernel, path, metadata=False, quiet=True): + """ pull a kernel, including a metadata file (if metadata is True) + and associated files to a specified path. + Parameters + ========== + kernel: the kernel to pull + path: the path to pull files to on the filesystem + metadata: if True, also pull metadata + quiet: suppress verbosity (default is True) + """ + existing_metadata = None + if kernel is None: + if path is None: + existing_metadata_path = os.path.join( + os.getcwd(), self.KERNEL_METADATA_FILE) + else: + existing_metadata_path = os.path.join( + path, self.KERNEL_METADATA_FILE) + if os.path.exists(existing_metadata_path): + with open(existing_metadata_path) as f: + existing_metadata = json.load(f) + kernel = existing_metadata['id'] + if 'INSERT_KERNEL_SLUG_HERE' in kernel: + raise ValueError('A kernel must be specified') + else: + print('Using kernel ' + kernel) + + if '/' in kernel: + self.validate_kernel_string(kernel) + kernel_url_list = kernel.split('/') + owner_slug = kernel_url_list[0] + kernel_slug = kernel_url_list[1] + else: + owner_slug = self.get_config_value(self.CONFIG_NAME_USER) + kernel_slug = kernel + + if path is None: + effective_path = self.get_default_download_dir( + 'kernels', owner_slug, kernel_slug) + else: + effective_path = path + + if not os.path.exists(effective_path): + os.makedirs(effective_path) + + response = self.process_response( + self.kernel_pull_with_http_info(owner_slug, kernel_slug)) + blob = response['blob'] + + if os.path.isfile(effective_path): + effective_dir = os.path.dirname(effective_path) + else: + effective_dir = effective_path + metadata_path = os.path.join(effective_dir, self.KERNEL_METADATA_FILE) + + if not os.path.isfile(effective_path): + language = blob['language'].lower() + kernel_type = blob['kernelType'].lower() + + file_name = None + if existing_metadata: + file_name = existing_metadata['code_file'] + elif os.path.isfile(metadata_path): + with open(metadata_path) as f: + file_name = json.load(f)['code_file'] + + if not file_name or file_name == "INSERT_CODE_FILE_PATH_HERE": + extension = None + if kernel_type == 'script': + if language == 'python': + extension = '.py' + elif language == 'r': + extension = '.R' + elif language == 'rmarkdown': + extension = '.Rmd' + elif language == 'sqlite': + extension = '.sql' + elif language == 'julia': + extension = '.jl' + elif kernel_type == 'notebook': + if language == 'python': + extension = '.ipynb' + elif language == 'r': + extension = '.irnb' + elif language == 'julia': + extension = '.ijlnb' + file_name = blob['slug'] + extension + + if file_name is None: + print( + 'Unknown language %s + kernel type %s - please report this ' + 'on the kaggle-api github issues' % + (language, kernel_type)) + print( + 'Saving as a python file, even though this may not be the ' + 'correct language') + file_name = 'script.py' + script_path = os.path.join(effective_path, file_name) + else: + script_path = effective_path + file_name = os.path.basename(effective_path) + + with open(script_path, 'w', encoding="utf-8") as f: + f.write(blob['source']) + + if metadata: + data = {} + + server_metadata = response['metadata'] + data['id'] = server_metadata['ref'] + data['id_no'] = server_metadata['id'] + data['title'] = server_metadata['title'] + data['code_file'] = file_name + data['language'] = server_metadata['language'] + data['kernel_type'] = server_metadata['kernelType'] + self.set_if_present(server_metadata, 'isPrivate', data, + 'is_private') + self.set_if_present(server_metadata, 'enableGpu', data, + 'enable_gpu') + self.set_if_present(server_metadata, 'enableInternet', data, + 'enable_internet') + self.set_if_present(server_metadata, 'categoryIds', data, + 'keywords') + self.set_if_present(server_metadata, 'datasetDataSources', data, + 'dataset_sources') + self.set_if_present(server_metadata, 'kernelDataSources', data, + 'kernel_sources') + self.set_if_present(server_metadata, 'competitionDataSources', + data, 'competition_sources') + with open(metadata_path, 'w') as f: + json.dump(data, f, indent=2) + + return effective_dir + else: + return script_path + + def kernels_pull_cli(self, + kernel, + kernel_opt=None, + path=None, + metadata=False): + """ client wrapper for kernels_pull + """ + kernel = kernel or kernel_opt + effective_path = self.kernels_pull(kernel, + path=path, + metadata=metadata, + quiet=False) + if metadata: + print('Source code and metadata downloaded to ' + effective_path) + else: + print('Source code downloaded to ' + effective_path) + + def kernels_output(self, kernel, path, force=False, quiet=True): + """ retrieve output for a specified kernel + Parameters + ========== + kernel: the kernel to output + path: the path to pull files to on the filesystem + force: if output already exists, force overwrite (default False) + quiet: suppress verbosity (default is True) + """ + if kernel is None: + raise ValueError('A kernel must be specified') + if '/' in kernel: + self.validate_kernel_string(kernel) + kernel_url_list = kernel.split('/') + owner_slug = kernel_url_list[0] + kernel_slug = kernel_url_list[1] + else: + owner_slug = self.get_config_value(self.CONFIG_NAME_USER) + kernel_slug = kernel + + if path is None: + target_dir = self.get_default_download_dir('kernels', owner_slug, + kernel_slug, 'output') + else: + target_dir = path + + if not os.path.exists(target_dir): + os.makedirs(target_dir) + + if not os.path.isdir(target_dir): + raise ValueError( + 'You must specify a directory for the kernels output') + + response = self.process_response( + self.kernel_output_with_http_info(owner_slug, kernel_slug)) + outfiles = [] + for item in response['files']: + outfile = os.path.join(target_dir, item['fileName']) + outfiles.append(outfile) + download_response = requests.get(item['url']) + if force or self.download_needed(item, outfile, quiet): + os.makedirs(os.path.split(outfile)[0], exist_ok=True) + with open(outfile, 'wb') as out: + out.write(download_response.content) + if not quiet: + print('Output file downloaded to %s' % outfile) + + log = response['log'] + if log: + outfile = os.path.join(target_dir, kernel_slug + '.log') + outfiles.append(outfile) + with open(outfile, 'w') as out: + out.write(log) + if not quiet: + print('Kernel log downloaded to %s ' % outfile) + + return outfiles + + def kernels_output_cli(self, + kernel, + kernel_opt=None, + path=None, + force=False, + quiet=False): + """ client wrapper for kernels_output, with same arguments. Extra + arguments are described below, and see kernels_output for others. + Parameters + ========== + kernel_opt: option from client instead of kernel, if not defined + """ + kernel = kernel or kernel_opt + self.kernels_output(kernel, path, force, quiet) + + def kernels_status(self, kernel): + """ call to the api to get the status of a kernel. + Parameters + ========== + kernel: the kernel to get the status for + """ + if kernel is None: + raise ValueError('A kernel must be specified') + if '/' in kernel: + self.validate_kernel_string(kernel) + kernel_url_list = kernel.split('/') + owner_slug = kernel_url_list[0] + kernel_slug = kernel_url_list[1] + else: + owner_slug = self.get_config_value(self.CONFIG_NAME_USER) + kernel_slug = kernel + response = self.process_response( + self.kernel_status_with_http_info(owner_slug, kernel_slug)) + return response + + def kernels_status_cli(self, kernel, kernel_opt=None): + """ client wrapper for kernel_status + Parameters + ========== + kernel_opt: additional option from the client, if kernel not defined + """ + kernel = kernel or kernel_opt + response = self.kernels_status(kernel) + status = response['status'] + message = response['failureMessage'] + if message: + print('%s has status "%s"' % (kernel, status)) + print('Failure message: "%s"' % message) + else: + print('%s has status "%s"' % (kernel, status)) + + def download_needed(self, response, outfile, quiet=True): + """ determine if a download is needed based on timestamp. Return True + if needed (remote is newer) or False if local is newest. + Parameters + ========== + response: the response from the API + outfile: the output file to write to + quiet: suppress verbose output (default is True) + """ + try: + remote_date = datetime.strptime(response.headers['Last-Modified'], + '%a, %d %b %Y %X %Z') + if isfile(outfile): + local_date = datetime.fromtimestamp(os.path.getmtime(outfile)) + if remote_date <= local_date: + if not quiet: + print( + os.path.basename(outfile) + + ': Skipping, found more recently modified local ' + 'copy (use --force to force download)') + return False + except: + pass + return True + + def print_table(self, items, fields): + """ print a table of items, for a set of fields defined + + Parameters + ========== + items: a list of items to print + fields: a list of fields to select from items + """ + formats = [] + borders = [] + for f in fields: + length = max(len(f), + max([len(self.string(getattr(i, f))) for i in items])) + justify = '>' if isinstance(getattr( + items[0], f), int) or f == 'size' or f == 'reward' else '<' + formats.append('{:' + justify + self.string(length + 2) + '}') + borders.append('-' * length + ' ') + row_format = u''.join(formats) + headers = [f + ' ' for f in fields] + print(row_format.format(*headers)) + print(row_format.format(*borders)) + for i in items: + i_fields = [self.string(getattr(i, f)) + ' ' for f in fields] + try: + print(row_format.format(*i_fields)) + except UnicodeEncodeError: + print(row_format.format(*i_fields).encode('utf-8')) + + def print_csv(self, items, fields): + """ print a set of fields in a set of items using a csv.writer + + Parameters + ========== + items: a list of items to print + fields: a list of fields to select from items + """ + writer = csv.writer(sys.stdout) + writer.writerow(fields) + for i in items: + i_fields = [self.string(getattr(i, f)) for f in fields] + writer.writerow(i_fields) + + def string(self, item): + return item if isinstance(item, unicode) else str(item) + + def get_or_fail(self, data, key): + if key in data: + return data[key] + raise ValueError('Key ' + key + ' not found in data') + + def get_or_default(self, data, key, default): + if key in data: + return data[key] + return default + + def set_if_present(self, data, key, output, output_key): + if key in data: + output[output_key] = data[key] + + def get_dataset_metadata_file(self, folder): + meta_file = os.path.join(folder, self.DATASET_METADATA_FILE) + if not os.path.isfile(meta_file): + meta_file = os.path.join(folder, self.OLD_DATASET_METADATA_FILE) + if not os.path.isfile(meta_file): + raise ValueError('Metadata file not found: ' + + self.DATASET_METADATA_FILE) + return meta_file + + def process_response(self, result): + """ process a response from the API. We check the API version against + the client's to see if it's old, and give them a warning (once) + + Parameters + ========== + result: the result from the API + """ + if len(result) == 3: + data = result[0] + headers = result[2] + if self.HEADER_API_VERSION in headers: + api_version = headers[self.HEADER_API_VERSION] + if (not self.already_printed_version_warning + and not self.is_up_to_date(api_version)): + print('Warning: Looks like you\'re using an outdated API ' + 'Version, please consider updating (server ' + + api_version + ' / client ' + self.__version__ + ')') + self.already_printed_version_warning = True + return data + return result + + def is_up_to_date(self, server_version): + """ determine if a client (on the local user's machine) is up to date + with the version provided on the server. Return a boolean with True + or False + Parameters + ========== + server_version: the server version string to compare to the host + """ + client_split = self.__version__.split('.') + client_len = len(client_split) + server_split = server_version.split('.') + server_len = len(server_split) + + # Make both lists the same length + for i in range(client_len, server_len): + client_split.append('0') + for i in range(server_len, client_len): + server_split.append('0') + + for i in range(0, client_len): + if 'b' in client_split[i]: + # Using a beta version, don't check + return True + client = int(client_split[i]) + server = int(server_split[i]) + if client < server: + return False + elif server < client: + return True + + return True + + def upload_files(self, + request, + resources, + folder, + quiet=False, + dir_mode='skip'): + """ upload files in a folder + Parameters + ========== + request: the prepared request + resources: the files to upload + folder: the folder to upload from + quiet: suppress verbose output (default is False) + """ + for file_name in os.listdir(folder): + if (file_name == self.DATASET_METADATA_FILE + or file_name == self.OLD_DATASET_METADATA_FILE + or file_name == self.KERNEL_METADATA_FILE): + continue + full_path = os.path.join(folder, file_name) + + if os.path.isfile(full_path): + exitcode = self._upload_file(file_name, full_path, quiet, + request, resources) + if exitcode: + return + elif os.path.isdir(full_path): + if dir_mode in ['zip', 'tar']: + temp_dir = tempfile.mkdtemp() + try: + _, dir_name = os.path.split(full_path) + archive_path = shutil.make_archive( + os.path.join(temp_dir, dir_name), dir_mode, + full_path) + _, archive_name = os.path.split(archive_path) + exitcode = self._upload_file(archive_name, + archive_path, quiet, + request, resources) + finally: + shutil.rmtree(temp_dir) + if exitcode: + return + elif not quiet: + print("Skipping folder: " + file_name + + "; use '--dir-mode' to upload folders") + else: + if not quiet: + print('Skipping: ' + file_name) + + def _upload_file(self, file_name, full_path, quiet, request, resources): + """ Helper function to upload a single file + Parameters + ========== + file_name: name of the file to upload + full_path: path to the file to upload + request: the prepared request + resources: optional file metadata + quiet: suppress verbose output + :return: True - upload unsuccessful; False - upload successful + """ + + if not quiet: + print('Starting upload for file ' + file_name) + + content_length = os.path.getsize(full_path) + token = self.dataset_upload_file(full_path, quiet) + if token is None: + if not quiet: + print('Upload unsuccessful: ' + file_name) + return True + if not quiet: + print('Upload successful: ' + file_name + ' (' + + File.get_size(content_length) + ')') + upload_file = DatasetUploadFile() + upload_file.token = token + if resources: + for item in resources: + if file_name == item.get('path'): + upload_file.description = item.get('description') + if 'schema' in item: + fields = self.get_or_default(item['schema'], 'fields', + []) + processed = [] + count = 0 + for field in fields: + processed.append(self.process_column(field)) + processed[count].order = count + count += 1 + upload_file.columns = processed + request.files.append(upload_file) + return False + + def process_column(self, column): + """ process a column, check for the type, and return the processed + column + Parameters + ========== + column: a list of values in a column to be processed + """ + processed_column = DatasetColumn(name=self.get_or_fail(column, 'name'), + description=self.get_or_default( + column, 'description', '')) + if 'type' in column: + original_type = column['type'].lower() + processed_column.original_type = original_type + if (original_type == 'string' or original_type == 'date' + or original_type == 'time' or original_type == 'yearmonth' + or original_type == 'duration' + or original_type == 'geopoint' + or original_type == 'geojson'): + processed_column.type = 'string' + elif (original_type == 'numeric' or original_type == 'number' + or original_type == 'year'): + processed_column.type = 'numeric' + elif original_type == 'boolean': + processed_column.type = 'boolean' + elif original_type == 'datetime': + processed_column.type = 'datetime' + else: + # Possibly extended data type - not going to try to track those + # here. Will set the type and let the server handle it. + processed_column.type = original_type + return processed_column + + def upload_complete(self, path, url, quiet): + """ function to complete an upload to retrieve a path from a url + Parameters + ========== + path: the path for the upload that is read in + url: the url to send the POST to + quiet: suppress verbose output (default is False) + """ + file_size = os.path.getsize(path) + try: + with tqdm(total=file_size, + unit='B', + unit_scale=True, + unit_divisor=1024, + disable=quiet) as progress_bar: + with io.open(path, 'rb', buffering=0) as fp: + reader = TqdmBufferedReader(fp, progress_bar) + session = requests.Session() + retries = Retry(total=10, backoff_factor=0.5) + adapter = HTTPAdapter(max_retries=retries) + session.mount('http://', adapter) + session.mount('https://', adapter) + response = session.put(url, data=reader) + except Exception as error: + print(error) + return False + return response.status_code == 200 or response.status_code == 201 + + def validate_dataset_string(self, dataset): + """ determine if a dataset string is valid, meaning it is in the format + of {username}/{dataset-slug}. + Parameters + ========== + dataset: the dataset name to validate + """ + if dataset: + if '/' not in dataset: + raise ValueError('Dataset must be specified in the form of ' + '\'{username}/{dataset-slug}\'') + + split = dataset.split('/') + if not split[0] or not split[1]: + raise ValueError('Invalid dataset specification ' + dataset) + + def validate_kernel_string(self, kernel): + """ determine if a kernel string is valid, meaning it is in the format + of {username}/{kernel-slug}. + Parameters + ========== + kernel: the kernel name to validate + """ + if kernel: + if '/' not in kernel: + raise ValueError('Kernel must be specified in the form of ' + '\'{username}/{kernel-slug}\'') + + split = kernel.split('/') + if not split[0] or not split[1]: + raise ValueError('Kernel must be specified in the form of ' + '\'{username}/{kernel-slug}\'') + + if len(split[1]) < 5: + raise ValueError( + 'Kernel slug must be at least five characters') + + def validate_resources(self, folder, resources): + """ validate resources is a wrapper to validate the existence of files + and that there are no duplicates for a folder and set of resources. + + Parameters + ========== + folder: the folder to validate + resources: one or more resources to validate within the folder + """ + self.validate_files_exist(folder, resources) + self.validate_no_duplicate_paths(resources) + + def validate_files_exist(self, folder, resources): + """ ensure that one or more resource files exist in a folder + + Parameters + ========== + folder: the folder to validate + resources: one or more resources to validate within the folder + """ + for item in resources: + file_name = item.get('path') + full_path = os.path.join(folder, file_name) + if not os.path.isfile(full_path): + raise ValueError('%s does not exist' % full_path) + + def validate_no_duplicate_paths(self, resources): + """ ensure that the user has not provided duplicate paths in + a list of resources. + + Parameters + ========== + resources: one or more resources to validate not duplicated + """ + paths = set() + for item in resources: + file_name = item.get('path') + if file_name in paths: + raise ValueError( + '%s path was specified more than once in the metadata' % + file_name) + paths.add(file_name) + + def convert_to_dataset_file_metadata(self, file_data, path): + """ convert a set of file_data to a metadata file at path + + Parameters + ========== + file_data: a dictionary of file data to write to file + path: the path to write the metadata to + """ + as_metadata = { + 'path': os.path.join(path, file_data['name']), + 'description': file_data['description'] + } + + schema = {} + fields = [] + for column in file_data['columns']: + field = { + 'name': column['name'], + 'title': column['description'], + 'type': column['type'] + } + fields.append(field) + schema['fields'] = fields + as_metadata['schema'] = schema + + return as_metadata + + +class TqdmBufferedReader(io.BufferedReader): + def __init__(self, raw, progress_bar): + """ helper class to implement an io.BufferedReader + Parameters + ========== + raw: bytes data to pass to the buffered reader + progress_bar: a progress bar to initialize the reader + """ + io.BufferedReader.__init__(self, raw) + self.progress_bar = progress_bar + + def read(self, *args, **kwargs): + """ read the buffer, passing named and non named arguments to the + io.BufferedReader function. + """ + buf = io.BufferedReader.read(self, *args, **kwargs) + self.increment(len(buf)) + return buf + + def increment(self, length): + """ increment the reader by some length + + Parameters + ========== + length: bytes to increment the reader by + """ + self.progress_bar.update(length) diff --git a/.local/lib/python3.8/site-packages/kaggle/api_client.py b/.local/lib/python3.8/site-packages/kaggle/api_client.py new file mode 100644 index 0000000..a740ce6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/api_client.py @@ -0,0 +1,633 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +from __future__ import absolute_import + +import datetime +import json +import mimetypes +from multiprocessing.pool import ThreadPool +import os +import re +import tempfile + +# python 2 and python 3 compatibility library +import six +from six.moves.urllib.parse import quote + +from kaggle.configuration import Configuration +import kaggle.models +from kaggle import rest + + +class ApiClient(object): + """Generic API client for Swagger client library builds. + + Swagger generic API client. This client handles the client- + server communication, and is invariant across implementations. Specifics of + the methods and models for each application are generated from the Swagger + templates. + + NOTE: This class is auto generated by the swagger code generator program. + Ref: https://github.com/swagger-api/swagger-codegen + Do not edit the class manually. + + :param configuration: .Configuration object for this client + :param header_name: a header to pass when making calls to the API. + :param header_value: a header value to pass when making calls to + the API. + :param cookie: a cookie to include in the header when making calls + to the API + """ + + PRIMITIVE_TYPES = (float, bool, bytes, six.text_type) + six.integer_types + NATIVE_TYPES_MAPPING = { + 'int': int, + 'long': int if six.PY3 else long, # noqa: F821 + 'float': float, + 'str': str, + 'bool': bool, + 'date': datetime.date, + 'datetime': datetime.datetime, + 'object': object, + } + + def __init__(self, configuration=None, header_name=None, header_value=None, + cookie=None): + if configuration is None: + configuration = Configuration() + self.configuration = configuration + + self.pool = ThreadPool() + self.rest_client = rest.RESTClientObject(configuration) + self.default_headers = {} + if header_name is not None: + self.default_headers[header_name] = header_value + self.cookie = cookie + # Set default User-Agent. + self.user_agent = 'Swagger-Codegen/1/python' + + @property + def user_agent(self): + """User agent for this API client""" + return self.default_headers['User-Agent'] + + @user_agent.setter + def user_agent(self, value): + self.default_headers['User-Agent'] = value + + def set_default_header(self, header_name, header_value): + self.default_headers[header_name] = header_value + + def __call_api( + self, resource_path, method, path_params=None, + query_params=None, header_params=None, body=None, post_params=None, + files=None, response_type=None, auth_settings=None, + _return_http_data_only=None, collection_formats=None, + _preload_content=True, _request_timeout=None): + + config = self.configuration + + # header parameters + header_params = header_params or {} + header_params.update(self.default_headers) + if self.cookie: + header_params['Cookie'] = self.cookie + if header_params: + header_params = self.sanitize_for_serialization(header_params) + header_params = dict(self.parameters_to_tuples(header_params, + collection_formats)) + + # path parameters + if path_params: + path_params = self.sanitize_for_serialization(path_params) + path_params = self.parameters_to_tuples(path_params, + collection_formats) + for k, v in path_params: + # specified safe chars, encode everything + resource_path = resource_path.replace( + '{%s}' % k, + quote(str(v), safe=config.safe_chars_for_path_param) + ) + + # query parameters + if query_params: + query_params = self.sanitize_for_serialization(query_params) + query_params = self.parameters_to_tuples(query_params, + collection_formats) + + # post parameters + if post_params or files: + post_params = self.prepare_post_parameters(post_params, files) + post_params = self.sanitize_for_serialization(post_params) + post_params = self.parameters_to_tuples(post_params, + collection_formats) + + # auth setting + self.update_params_for_auth(header_params, query_params, auth_settings) + + # body + if body: + body = self.sanitize_for_serialization(body) + + # request url + url = self.configuration.host + resource_path + + # perform request and return response + response_data = self.request( + method, url, query_params=query_params, headers=header_params, + post_params=post_params, body=body, + _preload_content=_preload_content, + _request_timeout=_request_timeout) + + self.last_response = response_data + + return_data = response_data + if _preload_content: + # deserialize response data + if response_type: + return_data = self.deserialize(response_data, response_type) + else: + return_data = None + + if _return_http_data_only: + return (return_data) + else: + return (return_data, response_data.status, + response_data.getheaders()) + + def sanitize_for_serialization(self, obj): + """Builds a JSON POST object. + + If obj is None, return None. + If obj is str, int, long, float, bool, return directly. + If obj is datetime.datetime, datetime.date + convert to string in iso8601 format. + If obj is list, sanitize each element in the list. + If obj is dict, return the dict. + If obj is swagger model, return the properties dict. + + :param obj: The data to serialize. + :return: The serialized form of data. + """ + if obj is None: + return None + elif isinstance(obj, self.PRIMITIVE_TYPES): + return obj + elif isinstance(obj, list): + return [self.sanitize_for_serialization(sub_obj) + for sub_obj in obj] + elif isinstance(obj, tuple): + return tuple(self.sanitize_for_serialization(sub_obj) + for sub_obj in obj) + elif isinstance(obj, (datetime.datetime, datetime.date)): + return obj.isoformat() + + if isinstance(obj, dict): + obj_dict = obj + else: + # Convert model obj to dict except + # attributes `swagger_types`, `attribute_map` + # and attributes which value is not None. + # Convert attribute name to json key in + # model definition for request. + obj_dict = {obj.attribute_map[attr]: getattr(obj, attr) + for attr, _ in six.iteritems(obj.swagger_types) + if getattr(obj, attr) is not None} + + return {key: self.sanitize_for_serialization(val) + for key, val in six.iteritems(obj_dict)} + + def deserialize(self, response, response_type): + """Deserializes response into an object. + + :param response: RESTResponse object to be deserialized. + :param response_type: class literal for + deserialized object, or string of class name. + + :return: deserialized object. + """ + # handle file downloading + # save response body into a tmp file and return the instance + if response_type == "file": + return self.__deserialize_file(response) + + # fetch data from response object + try: + data = json.loads(response.data) + except ValueError: + data = response.data + + return self.__deserialize(data, response_type) + + def __deserialize(self, data, klass): + """Deserializes dict, list, str into an object. + + :param data: dict, list or str. + :param klass: class literal, or string of class name. + + :return: object. + """ + if data is None: + return None + + if type(klass) == str: + if klass.startswith('list['): + sub_kls = re.match('list\[(.*)\]', klass).group(1) + return [self.__deserialize(sub_data, sub_kls) + for sub_data in data] + + if klass.startswith('dict('): + sub_kls = re.match('dict\(([^,]*), (.*)\)', klass).group(2) + return {k: self.__deserialize(v, sub_kls) + for k, v in six.iteritems(data)} + + # convert str to class + if klass in self.NATIVE_TYPES_MAPPING: + klass = self.NATIVE_TYPES_MAPPING[klass] + else: + klass = getattr(kaggle.models, klass) + + if klass in self.PRIMITIVE_TYPES: + return self.__deserialize_primitive(data, klass) + elif klass == object: + return self.__deserialize_object(data) + elif klass == datetime.date: + return self.__deserialize_date(data) + elif klass == datetime.datetime: + return self.__deserialize_datatime(data) + else: + return self.__deserialize_model(data, klass) + + def call_api(self, resource_path, method, + path_params=None, query_params=None, header_params=None, + body=None, post_params=None, files=None, + response_type=None, auth_settings=None, async_req=None, + _return_http_data_only=None, collection_formats=None, + _preload_content=True, _request_timeout=None): + """Makes the HTTP request (synchronous) and returns deserialized data. + + To make an async request, set the async_req parameter. + + :param resource_path: Path to method endpoint. + :param method: Method to call. + :param path_params: Path parameters in the url. + :param query_params: Query parameters in the url. + :param header_params: Header parameters to be + placed in the request header. + :param body: Request body. + :param post_params dict: Request post form parameters, + for `application/x-www-form-urlencoded`, `multipart/form-data`. + :param auth_settings list: Auth Settings names for the request. + :param response: Response data type. + :param files dict: key -> filename, value -> filepath, + for `multipart/form-data`. + :param async_req bool: execute request asynchronously + :param _return_http_data_only: response data without head status code + and headers + :param collection_formats: dict of collection formats for path, query, + header, and post parameters. + :param _preload_content: if False, the urllib3.HTTPResponse object will + be returned without reading/decoding response + data. Default is True. + :param _request_timeout: timeout setting for this request. If one + number provided, it will be total request + timeout. It can also be a pair (tuple) of + (connection, read) timeouts. + :return: + If async_req parameter is True, + the request will be called asynchronously. + The method will return the request thread. + If parameter async_req is False or missing, + then the method will return the response directly. + """ + if not async_req: + return self.__call_api(resource_path, method, + path_params, query_params, header_params, + body, post_params, files, + response_type, auth_settings, + _return_http_data_only, collection_formats, + _preload_content, _request_timeout) + else: + thread = self.pool.apply_async(self.__call_api, (resource_path, + method, path_params, query_params, + header_params, body, + post_params, files, + response_type, auth_settings, + _return_http_data_only, + collection_formats, + _preload_content, _request_timeout)) + return thread + + def request(self, method, url, query_params=None, headers=None, + post_params=None, body=None, _preload_content=True, + _request_timeout=None): + """Makes the HTTP request using RESTClient.""" + if method == "GET": + return self.rest_client.GET(url, + query_params=query_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + headers=headers) + elif method == "HEAD": + return self.rest_client.HEAD(url, + query_params=query_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + headers=headers) + elif method == "OPTIONS": + return self.rest_client.OPTIONS(url, + query_params=query_params, + headers=headers, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + elif method == "POST": + return self.rest_client.POST(url, + query_params=query_params, + headers=headers, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + elif method == "PUT": + return self.rest_client.PUT(url, + query_params=query_params, + headers=headers, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + elif method == "PATCH": + return self.rest_client.PATCH(url, + query_params=query_params, + headers=headers, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + elif method == "DELETE": + return self.rest_client.DELETE(url, + query_params=query_params, + headers=headers, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + else: + raise ValueError( + "http method must be `GET`, `HEAD`, `OPTIONS`," + " `POST`, `PATCH`, `PUT` or `DELETE`." + ) + + def parameters_to_tuples(self, params, collection_formats): + """Get parameters as list of tuples, formatting collections. + + :param params: Parameters as dict or list of two-tuples + :param dict collection_formats: Parameter collection formats + :return: Parameters as list of tuples, collections formatted + """ + new_params = [] + if collection_formats is None: + collection_formats = {} + for k, v in six.iteritems(params) if isinstance(params, dict) else params: # noqa: E501 + if k in collection_formats: + collection_format = collection_formats[k] + if collection_format == 'multi': + new_params.extend((k, value) for value in v) + else: + if collection_format == 'ssv': + delimiter = ' ' + elif collection_format == 'tsv': + delimiter = '\t' + elif collection_format == 'pipes': + delimiter = '|' + else: # csv is the default + delimiter = ',' + new_params.append( + (k, delimiter.join(str(value) for value in v))) + else: + new_params.append((k, v)) + return new_params + + def prepare_post_parameters(self, post_params=None, files=None): + """Builds form parameters. + + :param post_params: Normal form parameters. + :param files: File parameters. + :return: Form parameters with files. + """ + params = [] + + if post_params: + params = post_params + + if files: + for k, v in six.iteritems(files): + if not v: + continue + file_names = v if type(v) is list else [v] + for n in file_names: + with open(n, 'rb') as f: + filename = os.path.basename(f.name) + filedata = f.read() + mimetype = (mimetypes.guess_type(filename)[0] or + 'application/octet-stream') + params.append( + tuple([k, tuple([filename, filedata, mimetype])])) + + return params + + def select_header_accept(self, accepts): + """Returns `Accept` based on an array of accepts provided. + + :param accepts: List of headers. + :return: Accept (e.g. application/json). + """ + if not accepts: + return + + accepts = [x.lower() for x in accepts] + + if 'application/json' in accepts: + return 'application/json' + else: + return ', '.join(accepts) + + def select_header_content_type(self, content_types): + """Returns `Content-Type` based on an array of content_types provided. + + :param content_types: List of content-types. + :return: Content-Type (e.g. application/json). + """ + if not content_types: + return 'application/json' + + content_types = [x.lower() for x in content_types] + + if 'application/json' in content_types or '*/*' in content_types: + return 'application/json' + else: + return content_types[0] + + def update_params_for_auth(self, headers, querys, auth_settings): + """Updates header and query params based on authentication setting. + + :param headers: Header parameters dict to be updated. + :param querys: Query parameters tuple list to be updated. + :param auth_settings: Authentication setting identifiers list. + """ + if not auth_settings: + return + + for auth in auth_settings: + auth_setting = self.configuration.auth_settings().get(auth) + if auth_setting: + if not auth_setting['value']: + continue + elif auth_setting['in'] == 'header': + headers[auth_setting['key']] = auth_setting['value'] + elif auth_setting['in'] == 'query': + querys.append((auth_setting['key'], auth_setting['value'])) + else: + raise ValueError( + 'Authentication token must be in `query` or `header`' + ) + + def __deserialize_file(self, response): + """Deserializes body to file + + Saves response body into a file in a temporary folder, + using the filename from the `Content-Disposition` header if provided. + + :param response: RESTResponse. + :return: file path. + """ + fd, path = tempfile.mkstemp(dir=self.configuration.temp_folder_path) + os.close(fd) + os.remove(path) + + content_disposition = response.getheader("Content-Disposition") + if content_disposition: + filename = re.search(r'filename=[\'"]?([^\'"\s]+)[\'"]?', + content_disposition).group(1) + path = os.path.join(os.path.dirname(path), filename) + + with open(path, "wb") as f: + f.write(response.data) + + return path + + def __deserialize_primitive(self, data, klass): + """Deserializes string to primitive type. + + :param data: str. + :param klass: class literal. + + :return: int, long, float, str, bool. + """ + try: + return klass(data) + except UnicodeEncodeError: + return six.text_type(data) + except TypeError: + return data + + def __deserialize_object(self, value): + """Return a original value. + + :return: object. + """ + return value + + def __deserialize_date(self, string): + """Deserializes string to date. + + :param string: str. + :return: date. + """ + try: + from dateutil.parser import parse + return parse(string).date() + except ImportError: + return string + except ValueError: + raise rest.ApiException( + status=0, + reason="Failed to parse `{0}` as date object".format(string) + ) + + def __deserialize_datatime(self, string): + """Deserializes string to datetime. + + The string should be in iso8601 datetime format. + + :param string: str. + :return: datetime. + """ + try: + from dateutil.parser import parse + return parse(string) + except ImportError: + return string + except ValueError: + raise rest.ApiException( + status=0, + reason=( + "Failed to parse `{0}` as datetime object" + .format(string) + ) + ) + + def __deserialize_model(self, data, klass): + """Deserializes list or dict to model. + + :param data: dict, list. + :param klass: class literal. + :return: model object. + """ + + if not klass.swagger_types and not hasattr(klass, + 'get_real_child_model'): + return data + + kwargs = {} + if klass.swagger_types is not None: + for attr, attr_type in six.iteritems(klass.swagger_types): + if (data is not None and + klass.attribute_map[attr] in data and + isinstance(data, (list, dict))): + value = data[klass.attribute_map[attr]] + kwargs[attr] = self.__deserialize(value, attr_type) + + instance = klass(**kwargs) + + if hasattr(instance, 'get_real_child_model'): + klass_name = instance.get_real_child_model(data) + if klass_name: + instance = self.__deserialize(data, klass_name) + return instance diff --git a/.local/lib/python3.8/site-packages/kaggle/cli.py b/.local/lib/python3.8/site-packages/kaggle/cli.py new file mode 100644 index 0000000..65cac79 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/cli.py @@ -0,0 +1,1095 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/python +# +# Copyright 2019 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding=utf-8 +from __future__ import print_function +import argparse +from kaggle import api +from kaggle import KaggleApi +from .rest import ApiException +import six + + +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.RawTextHelpFormatter) + + parser.add_argument('-v', + '--version', + action='version', + version='Kaggle API ' + KaggleApi.__version__) + + subparsers = parser.add_subparsers(title='commands', + help=Help.kaggle, + dest='command') + subparsers.required = True + subparsers.choices = Help.kaggle_choices + parse_competitions(subparsers) + parse_datasets(subparsers) + parse_kernels(subparsers) + parse_config(subparsers) + args = parser.parse_args() + command_args = {} + command_args.update(vars(args)) + del command_args['func'] + del command_args['command'] + error = False + try: + out = args.func(**command_args) + except ApiException as e: + print(str(e.status) + ' - ' + e.reason) + out = None + error = True + except ValueError as e: + print(e) + out = None + error = True + except KeyboardInterrupt: + print('User cancelled operation') + out = None + if out is not None: + print(out, end='') + + # This is so that scripts that pick up on error codes can tell when there was a failure + if error: + exit(1) + + +def parse_competitions(subparsers): + if six.PY2: + parser_competitions = subparsers.add_parser( + 'competitions', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.group_competitions) + else: + parser_competitions = subparsers.add_parser( + 'competitions', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.group_competitions, + aliases=['c']) + subparsers_competitions = parser_competitions.add_subparsers( + title='commands', dest='command') + subparsers_competitions.required = True + subparsers_competitions.choices = Help.competitions_choices + + # Competitions list + parser_competitions_list = subparsers_competitions.add_parser( + 'list', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_competitions_list) + parser_competitions_list_optional = parser_competitions_list._action_groups.pop( + ) + parser_competitions_list_optional.add_argument( + '--group', + dest='group', + required=False, + help=Help.param_competition_group) + parser_competitions_list_optional.add_argument( + '--category', + dest='category', + required=False, + help=Help.param_competition_category) + parser_competitions_list_optional.add_argument( + '--sort-by', + dest='sort_by', + required=False, + help=Help.param_competition_sort_by) + parser_competitions_list_optional.add_argument('-p', + '--page', + dest='page', + default=1, + required=False, + help=Help.param_page) + parser_competitions_list_optional.add_argument('-s', + '--search', + dest='search', + required=False, + help=Help.param_search) + parser_competitions_list_optional.add_argument('-v', + '--csv', + dest='csv_display', + action='store_true', + help=Help.param_csv) + parser_competitions_list._action_groups.append( + parser_competitions_list_optional) + parser_competitions_list.set_defaults(func=api.competitions_list_cli) + + # Competitions list files + parser_competitions_files = subparsers_competitions.add_parser( + 'files', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_competitions_files) + parser_competitions_files_optional = parser_competitions_files._action_groups.pop( + ) + parser_competitions_files_optional.add_argument( + 'competition', nargs='?', default=None, help=Help.param_competition) + parser_competitions_files_optional.add_argument('-c', + '--competition', + dest='competition_opt', + required=False, + help=argparse.SUPPRESS) + parser_competitions_files_optional.add_argument('-v', + '--csv', + dest='csv_display', + action='store_true', + help=Help.param_csv) + parser_competitions_files_optional.add_argument('-q', + '--quiet', + dest='quiet', + action='store_true', + help=Help.param_quiet) + parser_competitions_files._action_groups.append( + parser_competitions_files_optional) + parser_competitions_files.set_defaults(func=api.competition_list_files_cli) + + # Competitions download + parser_competitions_download = subparsers_competitions.add_parser( + 'download', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_competitions_download) + parser_competitions_download_optional = parser_competitions_download._action_groups.pop( + ) + parser_competitions_download_optional.add_argument( + 'competition', nargs='?', default=None, help=Help.param_competition) + parser_competitions_download_optional.add_argument('-c', + '--competition', + dest='competition_opt', + required=False, + help=argparse.SUPPRESS) + parser_competitions_download_optional.add_argument( + '-f', + '--file', + dest='file_name', + required=False, + help=Help.param_competition_file) + parser_competitions_download_optional.add_argument( + '-p', + '--path', + dest='path', + required=False, + help=Help.param_downfolder) + parser_competitions_download_optional.add_argument('-w', + '--wp', + dest='path', + action='store_const', + const='.', + required=False, + help=Help.param_wp) + parser_competitions_download_optional.add_argument('-o', + '--force', + dest='force', + action='store_true', + help=Help.param_force) + parser_competitions_download_optional.add_argument('-q', + '--quiet', + dest='quiet', + action='store_true', + help=Help.param_quiet) + parser_competitions_download._action_groups.append( + parser_competitions_download_optional) + parser_competitions_download.set_defaults( + func=api.competition_download_cli) + + # Competitions submit + parser_competitions_submit = subparsers_competitions.add_parser( + 'submit', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_competitions_submit) + parser_competitions_submit_optional = parser_competitions_submit._action_groups.pop( + ) + parser_competitions_submit_required = parser_competitions_submit.add_argument_group( + 'required arguments') + parser_competitions_submit_optional.add_argument( + 'competition', nargs='?', default=None, help=Help.param_competition) + parser_competitions_submit_optional.add_argument('-c', + '--competition', + dest='competition_opt', + required=False, + help=argparse.SUPPRESS) + parser_competitions_submit_required.add_argument('-f', + '--file', + dest='file_name', + required=True, + help=Help.param_upfile) + parser_competitions_submit_required.add_argument( + '-m', + '--message', + dest='message', + required=True, + help=Help.param_competition_message) + parser_competitions_submit_optional.add_argument('-q', + '--quiet', + dest='quiet', + action='store_true', + help=Help.param_quiet) + parser_competitions_submit._action_groups.append( + parser_competitions_submit_optional) + parser_competitions_submit.set_defaults(func=api.competition_submit_cli) + + # Competitions list submissions + parser_competitions_submissions = subparsers_competitions.add_parser( + 'submissions', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_competitions_submissions) + parser_competitions_submissions_optional = parser_competitions_submissions._action_groups.pop( + ) + parser_competitions_submissions_optional.add_argument( + 'competition', nargs='?', default=None, help=Help.param_competition) + parser_competitions_submissions_optional.add_argument( + '-c', + '--competition', + dest='competition_opt', + required=False, + help=argparse.SUPPRESS) + parser_competitions_submissions_optional.add_argument('-v', + '--csv', + dest='csv_display', + action='store_true', + help=Help.param_csv) + parser_competitions_submissions_optional.add_argument( + '-q', + '--quiet', + dest='quiet', + action='store_true', + help=Help.param_quiet) + parser_competitions_submissions._action_groups.append( + parser_competitions_submissions_optional) + parser_competitions_submissions.set_defaults( + func=api.competition_submissions_cli) + + # Competitions leaderboard + parser_competitions_leaderboard = subparsers_competitions.add_parser( + 'leaderboard', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_competitions_leaderboard) + parser_competitions_leaderboard_optional = parser_competitions_leaderboard._action_groups.pop( + ) + parser_competitions_leaderboard_optional.add_argument( + 'competition', nargs='?', default=None, help=Help.param_competition) + parser_competitions_leaderboard_optional.add_argument( + '-c', + '--competition', + dest='competition_opt', + required=False, + help=argparse.SUPPRESS) + parser_competitions_leaderboard_optional.add_argument( + '-s', + '--show', + dest='view', + action='store_true', + help=Help.param_competition_leaderboard_view) + parser_competitions_leaderboard_optional.add_argument( + '-d', + '--download', + dest='download', + action='store_true', + help=Help.param_competition_leaderboard_download) + parser_competitions_leaderboard_optional.add_argument( + '-p', '--path', dest='path', help=Help.param_downfolder) + parser_competitions_leaderboard_optional.add_argument('-v', + '--csv', + dest='csv_display', + action='store_true', + help=Help.param_csv) + parser_competitions_leaderboard_optional.add_argument( + '-q', + '--quiet', + dest='quiet', + action='store_true', + help=Help.param_quiet) + parser_competitions_leaderboard._action_groups.append( + parser_competitions_leaderboard_optional) + parser_competitions_leaderboard.set_defaults( + func=api.competition_leaderboard_cli) + + +def parse_datasets(subparsers): + if six.PY2: + parser_datasets = subparsers.add_parser( + 'datasets', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.group_datasets) + else: + parser_datasets = subparsers.add_parser( + 'datasets', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.group_datasets, + aliases=['d']) + subparsers_datasets = parser_datasets.add_subparsers(title='commands', + dest='command') + subparsers_datasets.required = True + subparsers_datasets.choices = Help.datasets_choices + + # Datasets list + parser_datasets_list = subparsers_datasets.add_parser( + 'list', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_datasets_list) + parser_datasets_list_optional = parser_datasets_list._action_groups.pop() + parser_datasets_list.add_argument('--sort-by', + dest='sort_by', + required=False, + help=Help.param_dataset_sort_by) + parser_datasets_list.add_argument('--size', + dest='size', + required=False, + help=Help.param_dataset_size) + parser_datasets_list.add_argument('--file-type', + dest='file_type', + required=False, + help=Help.param_dataset_file_type) + parser_datasets_list.add_argument('--license', + dest='license_name', + required=False, + help=Help.param_dataset_license) + parser_datasets_list.add_argument('--tags', + dest='tag_ids', + required=False, + help=Help.param_dataset_tags) + parser_datasets_list.add_argument('-s', + '--search', + dest='search', + required=False, + help=Help.param_search) + parser_datasets_list.add_argument('-m', + '--mine', + dest='mine', + action='store_true', + help=Help.param_mine) + parser_datasets_list.add_argument('--user', + dest='user', + required=False, + help=Help.param_dataset_user) + parser_datasets_list.add_argument('-p', + '--page', + dest='page', + default=1, + required=False, + help=Help.param_page) + parser_datasets_list.add_argument('-v', + '--csv', + dest='csv_display', + action='store_true', + help=Help.param_csv) + parser_datasets_list.add_argument('--max-size', + dest='max_size', + required=False, + help=Help.param_dataset_maxsize) + parser_datasets_list.add_argument('--min-size', + dest='min_size', + required=False, + help=Help.param_dataset_minsize) + parser_datasets_list._action_groups.append(parser_datasets_list_optional) + parser_datasets_list.set_defaults(func=api.dataset_list_cli) + + # Datasets file list + parser_datasets_files = subparsers_datasets.add_parser( + 'files', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_datasets_files) + parser_datasets_files_optional = parser_datasets_files._action_groups.pop() + parser_datasets_files_optional.add_argument('dataset', + nargs='?', + default=None, + help=Help.param_dataset) + parser_datasets_files_optional.add_argument('-d', + '--dataset', + dest='dataset', + required=False, + help=argparse.SUPPRESS) + parser_datasets_files_optional.add_argument('-v', + '--csv', + dest='csv_display', + action='store_true', + help=Help.param_csv) + parser_datasets_files._action_groups.append(parser_datasets_files_optional) + parser_datasets_files.set_defaults(func=api.dataset_list_files_cli) + + # Datasets download + parser_datasets_download = subparsers_datasets.add_parser( + 'download', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_datasets_download) + parser_datasets_download_optional = parser_datasets_download._action_groups.pop( + ) + parser_datasets_download_optional.add_argument('dataset', + nargs='?', + help=Help.param_dataset) + parser_datasets_download_optional.add_argument('-d', + '--dataset', + dest='dataset_opt', + required=False, + help=argparse.SUPPRESS) + parser_datasets_download_optional.add_argument( + '-f', + '--file', + dest='file_name', + required=False, + help=Help.param_dataset_file) + parser_datasets_download_optional.add_argument('-p', + '--path', + dest='path', + required=False, + help=Help.param_downfolder) + parser_datasets_download_optional.add_argument('-w', + '--wp', + dest='path', + action='store_const', + const='.', + required=False, + help=Help.param_wp) + parser_datasets_download_optional.add_argument('--unzip', + dest='unzip', + action='store_true', + help=Help.param_unzip) + parser_datasets_download_optional.add_argument('-o', + '--force', + dest='force', + action='store_true', + help=Help.param_force) + parser_datasets_download_optional.add_argument('-q', + '--quiet', + dest='quiet', + action='store_true', + help=Help.param_quiet) + parser_datasets_download._action_groups.append( + parser_datasets_download_optional) + parser_datasets_download.set_defaults(func=api.dataset_download_cli) + + # Datasets create + parser_datasets_create = subparsers_datasets.add_parser( + 'create', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_datasets_new) + parser_datasets_create_optional = parser_datasets_create._action_groups.pop( + ) + parser_datasets_create_optional.add_argument( + '-p', + '--path', + dest='folder', + required=False, + help=Help.param_dataset_upfile) + parser_datasets_create_optional.add_argument('-u', + '--public', + dest='public', + action='store_true', + help=Help.param_public) + parser_datasets_create_optional.add_argument('-q', + '--quiet', + dest='quiet', + action='store_true', + help=Help.param_quiet) + parser_datasets_create_optional.add_argument('-t', + '--keep-tabular', + dest='convert_to_csv', + action='store_false', + help=Help.param_keep_tabular) + parser_datasets_create_optional.add_argument( + '-r', + '--dir-mode', + dest='dir_mode', + choices=['skip', 'zip', 'tar'], + default='skip', + help=Help.param_dir_mode) + parser_datasets_create._action_groups.append( + parser_datasets_create_optional) + parser_datasets_create.set_defaults(func=api.dataset_create_new_cli) + + # Datasets update + parser_datasets_version = subparsers_datasets.add_parser( + 'version', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_datasets_new_version) + parser_datasets_version_optional = parser_datasets_version._action_groups.pop( + ) + parser_datasets_version_required = parser_datasets_version.add_argument_group( + 'required arguments') + parser_datasets_version_required.add_argument( + '-m', + '--message', + dest='version_notes', + required=True, + help=Help.param_dataset_version_notes) + parser_datasets_version_optional.add_argument( + '-p', + '--path', + dest='folder', + required=False, + help=Help.param_dataset_upfile) + parser_datasets_version_optional.add_argument('-q', + '--quiet', + dest='quiet', + action='store_true', + help=Help.param_quiet) + parser_datasets_version_optional.add_argument('-t', + '--keep-tabular', + dest='convert_to_csv', + action='store_false', + help=Help.param_keep_tabular) + parser_datasets_version_optional.add_argument( + '-r', + '--dir-mode', + dest='dir_mode', + choices=['skip', 'zip', 'tar'], + default='skip', + help=Help.param_dir_mode) + parser_datasets_version_optional.add_argument( + '-d', + '--delete-old-versions', + dest='delete_old_versions', + action='store_true', + help=Help.param_delete_old_version) + parser_datasets_version._action_groups.append( + parser_datasets_version_optional) + parser_datasets_version.set_defaults(func=api.dataset_create_version_cli) + + # Datasets init + parser_datasets_init = subparsers_datasets.add_parser( + 'init', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_datasets_init) + parser_datasets_init_optional = parser_datasets_init._action_groups.pop() + parser_datasets_init_optional.add_argument('-p', + '--path', + dest='folder', + required=False, + help=Help.param_dataset_upfile) + parser_datasets_init._action_groups.append(parser_datasets_init_optional) + parser_datasets_init.set_defaults(func=api.dataset_initialize_cli) + + # Datasets metadata + parser_datasets_metadata = subparsers_datasets.add_parser( + 'metadata', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_datasets_metadata) + parser_datasets_metadata_optional = parser_datasets_metadata._action_groups.pop( + ) + parser_datasets_metadata_optional.add_argument('dataset', + nargs='?', + default=None, + help=Help.param_dataset) + parser_datasets_metadata_optional.add_argument('-d', + '--dataset', + dest='dataset', + required=False, + help=argparse.SUPPRESS) + parser_datasets_metadata_optional.add_argument( + '--update', + dest='update', + action='store_true', + help=Help.param_dataset_metadata_update) + parser_datasets_metadata_optional.add_argument( + '-p', '--path', dest='path', help=Help.param_dataset_metadata_dir) + parser_datasets_metadata._action_groups.append( + parser_datasets_metadata_optional) + parser_datasets_metadata.set_defaults(func=api.dataset_metadata_cli) + + # Datasets status + parser_datasets_status = subparsers_datasets.add_parser( + 'status', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_datasets_status) + parser_datasets_status_optional = parser_datasets_status._action_groups.pop( + ) + parser_datasets_status_optional.add_argument('dataset', + nargs='?', + default=None, + help=Help.param_dataset) + parser_datasets_status_optional.add_argument('-d', + '--dataset', + dest='dataset', + required=False, + help=argparse.SUPPRESS) + parser_datasets_status._action_groups.append( + parser_datasets_status_optional) + parser_datasets_status.set_defaults(func=api.dataset_status_cli) + + +def parse_kernels(subparsers): + if six.PY2: + parser_kernels = subparsers.add_parser( + 'kernels', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.group_kernels) + else: + parser_kernels = subparsers.add_parser( + 'kernels', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.group_kernels, + aliases=['k']) + subparsers_kernels = parser_kernels.add_subparsers(title='commands', + dest='command') + subparsers_kernels.required = True + subparsers_kernels.choices = Help.kernels_choices + + # Kernels list/search + parser_kernels_list = subparsers_kernels.add_parser( + 'list', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_kernels_list) + parser_kernels_list_optional = parser_kernels_list._action_groups.pop() + parser_kernels_list_optional.add_argument('-m', + '--mine', + dest='mine', + action='store_true', + help=Help.param_mine) + parser_kernels_list_optional.add_argument('-p', + '--page', + dest='page', + default=1, + help=Help.param_page) + parser_kernels_list_optional.add_argument('--page-size', + dest='page_size', + default=20, + help=Help.param_page_size) + parser_kernels_list_optional.add_argument('-s', + '--search', + dest='search', + help=Help.param_search) + parser_kernels_list_optional.add_argument('-v', + '--csv', + dest='csv_display', + action='store_true', + help=Help.param_csv) + parser_kernels_list_optional.add_argument('--parent', + dest='parent', + required=False, + help=Help.param_kernel_parent) + parser_kernels_list_optional.add_argument( + '--competition', + dest='competition', + required=False, + help=Help.param_kernel_competition) + parser_kernels_list_optional.add_argument('--dataset', + dest='dataset', + required=False, + help=Help.param_kernel_dataset) + parser_kernels_list_optional.add_argument('--user', + dest='user', + required=False, + help=Help.param_kernel_user) + parser_kernels_list_optional.add_argument('--language', + dest='language', + required=False, + help=Help.param_kernel_language) + parser_kernels_list_optional.add_argument('--kernel-type', + dest='kernel_type', + required=False, + help=Help.param_kernel_type) + parser_kernels_list_optional.add_argument( + '--output-type', + dest='output_type', + required=False, + help=Help.param_kernel_output_type) + parser_kernels_list_optional.add_argument('--sort-by', + dest='sort_by', + required=False, + help=Help.param_kernel_sort_by) + parser_kernels_list._action_groups.append(parser_kernels_list_optional) + parser_kernels_list.set_defaults(func=api.kernels_list_cli) + + # Kernels init + parser_kernels_init = subparsers_kernels.add_parser( + 'init', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_kernels_init) + parser_kernels_init_optional = parser_kernels_init._action_groups.pop() + parser_kernels_init_optional.add_argument('-p', + '--path', + dest='folder', + required=False, + help=Help.param_kernel_upfile) + parser_kernels_init._action_groups.append(parser_kernels_init_optional) + parser_kernels_init.set_defaults(func=api.kernels_initialize_cli) + + # Kernels push + parser_kernels_push = subparsers_kernels.add_parser( + 'push', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_kernels_push) + parser_kernels_push_optional = parser_kernels_push._action_groups.pop() + parser_kernels_push_optional.add_argument('-p', + '--path', + dest='folder', + required=False, + help=Help.param_kernel_upfile) + parser_kernels_push._action_groups.append(parser_kernels_push_optional) + parser_kernels_push.set_defaults(func=api.kernels_push_cli) + + # Kernels pull + parser_kernels_pull = subparsers_kernels.add_parser( + 'pull', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_kernels_pull) + parser_kernels_pull_optional = parser_kernels_pull._action_groups.pop() + parser_kernels_pull_optional.add_argument('kernel', + nargs='?', + default=None, + help=Help.param_kernel) + parser_kernels_pull_optional.add_argument('-k', + '--kernel', + dest='kernel', + required=False, + help=argparse.SUPPRESS) + parser_kernels_pull_optional.add_argument('-p', + '--path', + dest='path', + required=False, + help=Help.param_downfolder) + parser_kernels_pull_optional.add_argument('-w', + '--wp', + dest='path', + action='store_const', + const='.', + required=False, + help=Help.param_wp) + parser_kernels_pull_optional.add_argument( + '-m', + '--metadata', + dest='metadata', + action='store_true', + help=Help.param_kernel_pull_metadata) + parser_kernels_pull._action_groups.append(parser_kernels_pull_optional) + parser_kernels_pull.set_defaults(func=api.kernels_pull_cli) + + # Kernels output + parser_kernels_output = subparsers_kernels.add_parser( + 'output', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_kernels_output) + parser_kernels_output_optional = parser_kernels_output._action_groups.pop() + parser_kernels_output_optional.add_argument('kernel', + nargs='?', + default=None, + help=Help.param_kernel) + parser_kernels_output_optional.add_argument('-k', + '--kernel', + dest='kernel', + required=False, + help=argparse.SUPPRESS) + parser_kernels_output_optional.add_argument('-p', + '--path', + dest='path', + required=False, + help=Help.param_downfolder) + parser_kernels_output_optional.add_argument('-w', + '--wp', + dest='path', + action='store_const', + const='.', + required=False, + help=Help.param_wp) + parser_kernels_output_optional.add_argument('-o', + '--force', + dest='force', + action='store_true', + required=False, + help=Help.param_force) + parser_kernels_output_optional.add_argument('-q', + '--quiet', + dest='quiet', + action='store_true', + required=False, + help=Help.param_quiet) + parser_kernels_output._action_groups.append(parser_kernels_output_optional) + parser_kernels_output.set_defaults(func=api.kernels_output_cli) + + # Kernels status + parser_kernels_status = subparsers_kernels.add_parser( + 'status', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_kernels_status) + parser_kernels_status_optional = parser_kernels_status._action_groups.pop() + parser_kernels_status_optional.add_argument('kernel', + nargs='?', + default=None, + help=Help.param_kernel) + parser_kernels_status_optional.add_argument('-k', + '--kernel', + dest='kernel', + required=False, + help=argparse.SUPPRESS) + parser_kernels_status._action_groups.append(parser_kernels_status_optional) + parser_kernels_status.set_defaults(func=api.kernels_status_cli) + + +def parse_config(subparsers): + parser_config = subparsers.add_parser( + 'config', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.group_config) + subparsers_config = parser_config.add_subparsers(title='commands', + dest='command') + subparsers_config.required = True + subparsers_config.choices = Help.config_choices + + parser_config_view = subparsers_config.add_parser( + 'view', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_config_view) + parser_config_view.set_defaults(func=api.print_config_values) + + parser_config_set = subparsers_config.add_parser( + 'set', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_config_set) + parser_config_set._action_groups.pop() + parser_config_set_required = parser_config_set.add_argument_group( + 'required arguments') + parser_config_set_required.add_argument('-n', + '--name', + dest='name', + required=True, + help=Help.param_config_name) + parser_config_set_required.add_argument('-v', + '--value', + dest='value', + required=True, + help=Help.param_config_value) + parser_config_set.set_defaults(func=api.set_config_value) + + parser_config_unset = subparsers_config.add_parser( + 'unset', + formatter_class=argparse.RawTextHelpFormatter, + help=Help.command_config_unset) + parser_config_unset._action_groups.pop() + parser_config_unset_required = parser_config_unset.add_argument_group( + 'required arguments') + parser_config_unset_required.add_argument('-n', + '--name', + dest='name', + required=True, + help=Help.param_config_name) + parser_config_unset.set_defaults(func=api.unset_config_value) + + +class Help(object): + kaggle_choices = [ + 'competitions', 'c', 'datasets', 'd', 'kernels', 'k', 'config' + ] + competitions_choices = [ + 'list', 'files', 'download', 'submit', 'submissions', 'leaderboard' + ] + datasets_choices = [ + 'list', 'files', 'download', 'create', 'version', 'init', 'metadata', + 'status' + ] + kernels_choices = ['list', 'init', 'push', 'pull', 'output', 'status'] + config_choices = ['view', 'set', 'unset'] + + kaggle = 'Use one of:\ncompetitions {' + ', '.join( + competitions_choices) + '}\ndatasets {' + ', '.join( + datasets_choices) + '}\nconfig {' + ', '.join(config_choices) + '}' + + group_competitions = 'Commands related to Kaggle competitions' + group_datasets = 'Commands related to Kaggle datasets' + group_kernels = 'Commands related to Kaggle kernels' + group_config = 'Configuration settings' + + # Competitions commands + command_competitions_list = 'List available competitions' + command_competitions_files = 'List competition files' + command_competitions_download = 'Download competition files' + command_competitions_submit = 'Make a new competition submission' + command_competitions_submissions = 'Show your competition submissions' + command_competitions_leaderboard = 'Get competition leaderboard information' + + # Datasets commands + command_datasets_list = 'List available datasets' + command_datasets_files = 'List dataset files' + command_datasets_download = 'Download dataset files' + command_datasets_new = 'Create a new dataset' + command_datasets_new_version = 'Create a new dataset version' + command_datasets_init = 'Initialize metadata file for dataset creation' + command_datasets_metadata = 'Download metadata about a dataset' + command_datasets_status = 'Get the creation status for a dataset' + + # Kernels commands + command_kernels_list = ( + 'List available kernels. By default, shows 20 results sorted by ' + 'hotness') + command_kernels_init = 'Initialize metadata file for a kernel' + command_kernels_push = 'Push new code to a kernel and run the kernel' + command_kernels_pull = 'Pull down code from a kernel' + command_kernels_output = 'Get data output from the latest kernel run' + command_kernels_status = 'Display the status of the latest kernel run' + + # Config commands + command_config_path = ( + 'Set folder where competition or dataset files will be ' + 'downloaded') + command_config_proxy = 'Set proxy server' + command_config_competition = 'Set default competition' + command_config_view = 'View current config values' + command_config_set = 'Set a configuration value' + command_config_unset = 'Clear a configuration value' + + # General params + param_downfolder = ( + 'Folder where file(s) will be downloaded, defaults to current working ' + 'directory') + param_wp = 'Download files to current working path' + param_proxy = 'Proxy for HTTP requests' + param_quiet = ( + 'Suppress printing information about the upload/download progress') + param_public = 'Create publicly (default is private)' + param_keep_tabular = ( + 'Do not convert tabular files to CSV (default is to convert)') + param_dir_mode = ( + 'What to do with directories: "skip" - ignore; "zip" - compressed upload; "tar" - ' + 'uncompressed upload') + param_delete_old_version = 'Delete old versions of this dataset' + param_force = ( + 'Skip check whether local version of file is up to date, force' + ' file download') + param_upfile = 'File for upload (full path)' + param_csv = 'Print results in CSV format (if not set print in table format)' + param_page = 'Page number for results paging. Page size is 20 by default' + param_page_size = ( + 'Number of items to show on a page. Default size is 20, ' + 'max is 100') + param_search = 'Term(s) to search for' + param_mine = 'Display only my items' + param_unzip = ( + 'Unzip the downloaded file. Will delete the zip file when completed.') + + # Competitions params + param_competition = ( + 'Competition URL suffix (use "kaggle competitions list" ' + 'to show options)\nIf empty, the default competition ' + 'will be used (use "kaggle config set competition")"') + param_competition_nonempty = ( + 'Competition URL suffix (use "kaggle competitions list" to show ' + 'options)') + param_competition_leaderboard_view = 'Show the top of the leaderboard' + param_competition_leaderboard_download = 'Download entire leaderboard' + param_competition_file = ( + 'File name, all files downloaded if not provided\n(use "kaggle ' + 'competitions files -c " to show options)') + param_competition_message = 'Message describing this submission' + param_competition_group = ( + 'Search for competitions in a specific group. Default is \'general\'. ' + 'Valid options are \'general\', \'entered\', and \'inClass\'') + param_competition_category = ( + 'Search for competitions of a specific category. Default is \'all\'. ' + 'Valid options are \'all\', \'featured\', \'research\', ' + '\'recruitment\', \'gettingStarted\', \'masters\', and \'playground\'') + param_competition_sort_by = ( + 'Sort list results. Default is \'latestDeadline\'. Valid options are ' + '\'grouped\', \'prize\', \'earliestDeadline\', \'latestDeadline\', ' + '\'numberOfTeams\', and \'recentlyCreated\'') + + # Datasets paramas + param_dataset = ( + 'Dataset URL suffix in format / (use ' + '"kaggle datasets list" to show options)') + param_dataset_file = ( + 'File name, all files downloaded if not provided\n(use ' + '"kaggle datasets files -d " to show options)') + param_dataset_version_notes = 'Message describing the new version' + param_dataset_upfile = ( + 'Folder for upload, containing data files and a ' + 'special datasets-metadata.json file ' + '(https://github.com/Kaggle/kaggle-api/wiki/Dataset-Metadata). ' + 'Defaults to current working directory') + param_dataset_sort_by = ( + 'Sort list results. Default is \'hottest\'. Valid options are ' + '\'hottest\', \'votes\', \'updated\', and \'active\'') + param_dataset_size = ( + 'DEPRECATED. Please use --max-size and --min-size to filter dataset sizes.' + ) + param_dataset_file_type = ( + 'Search for datasets with a specific file type. Default is \'all\'. ' + 'Valid options are \'all\', \'csv\', \'sqlite\', \'json\', and ' + '\'bigQuery\'. Please note that bigQuery datasets cannot be downloaded' + ) + param_dataset_license = ( + 'Search for datasets with a specific license. Default is \'all\'. ' + 'Valid options are \'all\', \'cc\', \'gpl\', \'odb\', and \'other\'') + param_dataset_tags = ( + 'Search for datasets that have specific tags. Tag list should be ' + 'comma separated') + param_dataset_user = ( + 'Find public datasets owned by a specific user or organization') + param_dataset_metadata_dir = ( + 'Location to download dataset metadata to. Defaults to current working ' + 'directory') + param_dataset_metadata_update = ('A flag to indicate whether the dataset' + 'metadata should be updated.') + param_dataset_maxsize = 'Specify the maximum size of the dataset to return (bytes)' + param_dataset_minsize = 'Specify the minimum size of the dataset to return (bytes)' + + # Kernels params + param_kernel = ( + 'Kernel URL suffix in format / (use "kaggle ' + 'kernels list" to show options)') + param_kernel_init = ( + 'Create a metadata file for an existing kernel URL suffix in format ' + '/ (use "kaggle kernels list" to show options)') + param_kernel_upfile = ( + 'Folder for upload, containing data files and a ' + 'special kernel-metadata.json file ' + '(https://github.com/Kaggle/kaggle-api/wiki/Kernel-Metadata). ' + 'Defaults to current working directory') + param_kernel_parent = 'Find children of the specified parent kernel' + param_kernel_competition = 'Find kernels for a given competition slug' + param_kernel_dataset = ('Find kernels for a given dataset slug. Format is ' + '{username/dataset-slug}') + param_kernel_user = 'Find kernels created by a given username' + # TODO(b/129357583): Pull these from the same spot as the api impl + param_kernel_language = ( + 'Specify the language the kernel is written in. Default is \'all\'. ' + 'Valid options are \'all\', \'python\', \'r\', \'sqlite\', and ' + '\'julia\'') + param_kernel_type = ( + 'Specify the type of kernel. Default is \'all\'. Valid ' + 'options are \'all\', \'script\', and \'notebook\'') + param_kernel_output_type = ( + 'Search for specific kernel output types. ' + 'Default is \'all\'. Valid options are \'all\', ' + '\'visualizations\', and \'data\'') + param_kernel_sort_by = ( + 'Sort list results. Default is \'hotness\'. Valid ' + 'options are \'hotness\', \'commentCount\', ' + '\'dateCreated\', \'dateRun\', \'relevance\', ' + '\'scoreAscending\', \'scoreDescending\', ' + '\'viewCount\', and \'voteCount\'. \'relevance\' ' + 'is only applicable if a search term is specified.') + param_kernel_pull_metadata = 'Generate metadata when pulling kernel' + + # Config params + param_config_name = ('Name of the configuration parameter\n(one of ' + 'competition, path, proxy)') + param_config_value = ( + ('Value of the configuration parameter, valid values ' + 'depending on name\n- competition: ') + param_competition_nonempty + + '\n- path: ' + param_downfolder + '\n- proxy: ' + param_proxy) diff --git a/.local/lib/python3.8/site-packages/kaggle/configuration.py b/.local/lib/python3.8/site-packages/kaggle/configuration.py new file mode 100644 index 0000000..b9851c7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/configuration.py @@ -0,0 +1,263 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +from __future__ import absolute_import + +import copy +import logging +import multiprocessing +import sys +import urllib3 + +import six +from six.moves import http_client as httplib + + +class TypeWithDefault(type): + def __init__(cls, name, bases, dct): + super(TypeWithDefault, cls).__init__(name, bases, dct) + cls._default = None + + def __call__(cls): + if cls._default is None: + cls._default = type.__call__(cls) + return copy.copy(cls._default) + + def set_default(cls, default): + cls._default = copy.copy(default) + + +class Configuration(six.with_metaclass(TypeWithDefault, object)): + """NOTE: This class is auto generated by the swagger code generator program. + + Ref: https://github.com/swagger-api/swagger-codegen + Do not edit the class manually. + """ + + def __init__(self): + """Constructor""" + # Default Base url + self.host = "https://www.kaggle.com/api/v1" + # Temp file folder for downloading files + self.temp_folder_path = None + + # Authentication Settings + # dict to store API key(s) + self.api_key = {} + # dict to store API prefix (e.g. Bearer) + self.api_key_prefix = {} + # Username for HTTP basic authentication + self.username = "" + # Password for HTTP basic authentication + self.password = "" + + # Logging Settings + self.logger = {} + self.logger["package_logger"] = logging.getLogger("kaggle") + self.logger["urllib3_logger"] = logging.getLogger("urllib3") + # Log format + self.logger_format = '%(asctime)s %(levelname)s %(message)s' + # Log stream handler + self.logger_stream_handler = None + # Log file handler + self.logger_file_handler = None + # Debug file location + self.logger_file = None + # Debug switch + self.debug = False + + # SSL/TLS verification + # Set this to false to skip verifying SSL certificate when calling API + # from https server. + self.verify_ssl = True + # Set this to customize the certificate file to verify the peer. + self.ssl_ca_cert = None + # client certificate file + self.cert_file = None + # client key file + self.key_file = None + # Set this to True/False to enable/disable SSL hostname verification. + self.assert_hostname = None + + # urllib3 connection pool's maximum number of connections saved + # per pool. urllib3 uses 1 connection as default value, but this is + # not the best value when you are making a lot of possibly parallel + # requests to the same host, which is often the case here. + # cpu_count * 5 is used as default value to increase performance. + self.connection_pool_maxsize = multiprocessing.cpu_count() * 5 + + # Proxy URL + self.proxy = None + # Safe chars for path_param + self.safe_chars_for_path_param = '' + + @property + def logger_file(self): + """The logger file. + + If the logger_file is None, then add stream handler and remove file + handler. Otherwise, add file handler and remove stream handler. + + :param value: The logger_file path. + :type: str + """ + return self.__logger_file + + @logger_file.setter + def logger_file(self, value): + """The logger file. + + If the logger_file is None, then add stream handler and remove file + handler. Otherwise, add file handler and remove stream handler. + + :param value: The logger_file path. + :type: str + """ + self.__logger_file = value + if self.__logger_file: + # If set logging file, + # then add file handler and remove stream handler. + self.logger_file_handler = logging.FileHandler(self.__logger_file) + self.logger_file_handler.setFormatter(self.logger_formatter) + for _, logger in six.iteritems(self.logger): + logger.addHandler(self.logger_file_handler) + if self.logger_stream_handler: + logger.removeHandler(self.logger_stream_handler) + else: + # If not set logging file, + # then add stream handler and remove file handler. + self.logger_stream_handler = logging.StreamHandler() + self.logger_stream_handler.setFormatter(self.logger_formatter) + for _, logger in six.iteritems(self.logger): + logger.addHandler(self.logger_stream_handler) + if self.logger_file_handler: + logger.removeHandler(self.logger_file_handler) + + @property + def debug(self): + """Debug status + + :param value: The debug status, True or False. + :type: bool + """ + return self.__debug + + @debug.setter + def debug(self, value): + """Debug status + + :param value: The debug status, True or False. + :type: bool + """ + self.__debug = value + if self.__debug: + # if debug status is True, turn on debug logging + for _, logger in six.iteritems(self.logger): + logger.setLevel(logging.DEBUG) + # turn on httplib debug + httplib.HTTPConnection.debuglevel = 1 + else: + # if debug status is False, turn off debug logging, + # setting log level to default `logging.WARNING` + for _, logger in six.iteritems(self.logger): + logger.setLevel(logging.WARNING) + # turn off httplib debug + httplib.HTTPConnection.debuglevel = 0 + + @property + def logger_format(self): + """The logger format. + + The logger_formatter will be updated when sets logger_format. + + :param value: The format string. + :type: str + """ + return self.__logger_format + + @logger_format.setter + def logger_format(self, value): + """The logger format. + + The logger_formatter will be updated when sets logger_format. + + :param value: The format string. + :type: str + """ + self.__logger_format = value + self.logger_formatter = logging.Formatter(self.__logger_format) + + def get_api_key_with_prefix(self, identifier): + """Gets API key (with prefix if set). + + :param identifier: The identifier of apiKey. + :return: The token for api key authentication. + """ + if (self.api_key.get(identifier) and + self.api_key_prefix.get(identifier)): + return self.api_key_prefix[identifier] + ' ' + self.api_key[identifier] # noqa: E501 + elif self.api_key.get(identifier): + return self.api_key[identifier] + + def get_basic_auth_token(self): + """Gets HTTP basic authentication header (string). + + :return: The token for basic HTTP authentication. + """ + return urllib3.util.make_headers( + basic_auth=self.username + ':' + self.password + ).get('authorization') + + def auth_settings(self): + """Gets Auth Settings dict for api client. + + :return: The Auth Settings information dict. + """ + return { + 'basicAuth': + { + 'type': 'basic', + 'in': 'header', + 'key': 'Authorization', + 'value': self.get_basic_auth_token() + }, + + } + + def to_debug_report(self): + """Gets the essential information for debugging. + + :return: The report for debugging. + """ + return "Python SDK Debug Report:\n"\ + "OS: {env}\n"\ + "Python Version: {pyversion}\n"\ + "Version of the API: 1\n"\ + "SDK Package Version: 1".\ + format(env=sys.platform, pyversion=sys.version) diff --git a/.local/lib/python3.8/site-packages/kaggle/models/__init__.py b/.local/lib/python3.8/site-packages/kaggle/models/__init__.py new file mode 100644 index 0000000..15b44a6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/models/__init__.py @@ -0,0 +1,43 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +# flake8: noqa +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +from __future__ import absolute_import + +# import models into model package +from kaggle.models.collaborator import Collaborator +from kaggle.models.dataset_column import DatasetColumn +from kaggle.models.dataset_new_request import DatasetNewRequest +from kaggle.models.dataset_new_version_request import DatasetNewVersionRequest +from kaggle.models.dataset_update_settings_request import DatasetUpdateSettingsRequest +from kaggle.models.dataset_upload_file import DatasetUploadFile +from kaggle.models.error import Error +from kaggle.models.kernel_push_request import KernelPushRequest +from kaggle.models.license import License +from kaggle.models.result import Result diff --git a/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..9e2d051 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/collaborator.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/collaborator.cpython-38.pyc new file mode 100644 index 0000000..10219bf Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/collaborator.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_column.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_column.cpython-38.pyc new file mode 100644 index 0000000..7baf361 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_column.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_new_request.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_new_request.cpython-38.pyc new file mode 100644 index 0000000..776409f Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_new_request.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_new_version_request.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_new_version_request.cpython-38.pyc new file mode 100644 index 0000000..85baab4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_new_version_request.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_update_settings_request.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_update_settings_request.cpython-38.pyc new file mode 100644 index 0000000..885971b Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_update_settings_request.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_upload_file.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_upload_file.cpython-38.pyc new file mode 100644 index 0000000..32001f0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/dataset_upload_file.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/error.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/error.cpython-38.pyc new file mode 100644 index 0000000..87b05ec Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/error.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/kaggle_models_extended.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/kaggle_models_extended.cpython-38.pyc new file mode 100644 index 0000000..1a18b65 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/kaggle_models_extended.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/kernel_push_request.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/kernel_push_request.cpython-38.pyc new file mode 100644 index 0000000..7e480c1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/kernel_push_request.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/license.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/license.cpython-38.pyc new file mode 100644 index 0000000..5567173 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/license.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/result.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/result.cpython-38.pyc new file mode 100644 index 0000000..7d8eca6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/models/__pycache__/result.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/models/collaborator.py b/.local/lib/python3.8/site-packages/kaggle/models/collaborator.py new file mode 100644 index 0000000..993a752 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/models/collaborator.py @@ -0,0 +1,166 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +import pprint +import re # noqa: F401 + +import six + + +class Collaborator(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'username': 'str', + 'role': 'str' + } + + attribute_map = { + 'username': 'username', + 'role': 'role' + } + + def __init__(self, username=None, role=None): # noqa: E501 + """Collaborator - a model defined in Swagger""" # noqa: E501 + + self._username = None + self._role = None + self.discriminator = None + + self.username = username + self.role = role + + @property + def username(self): + """Gets the username of this Collaborator. # noqa: E501 + + Username of the collaborator # noqa: E501 + + :return: The username of this Collaborator. # noqa: E501 + :rtype: str + """ + return self._username + + @username.setter + def username(self, username): + """Sets the username of this Collaborator. + + Username of the collaborator # noqa: E501 + + :param username: The username of this Collaborator. # noqa: E501 + :type: str + """ + if username is None: + raise ValueError("Invalid value for `username`, must not be `None`") # noqa: E501 + + self._username = username + + @property + def role(self): + """Gets the role of this Collaborator. # noqa: E501 + + Role of the collaborator # noqa: E501 + + :return: The role of this Collaborator. # noqa: E501 + :rtype: str + """ + return self._role + + @role.setter + def role(self, role): + """Sets the role of this Collaborator. + + Role of the collaborator # noqa: E501 + + :param role: The role of this Collaborator. # noqa: E501 + :type: str + """ + if role is None: + raise ValueError("Invalid value for `role`, must not be `None`") # noqa: E501 + allowed_values = ["reader", "writer"] # noqa: E501 + if role not in allowed_values: + raise ValueError( + "Invalid value for `role` ({0}), must be one of {1}" # noqa: E501 + .format(role, allowed_values) + ) + + self._role = role + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, Collaborator): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/.local/lib/python3.8/site-packages/kaggle/models/dataset_column.py b/.local/lib/python3.8/site-packages/kaggle/models/dataset_column.py new file mode 100644 index 0000000..8f60041 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/models/dataset_column.py @@ -0,0 +1,242 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +import pprint +import re # noqa: F401 + +import six + + +class DatasetColumn(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'order': 'float', + 'name': 'str', + 'type': 'str', + 'original_type': 'str', + 'description': 'str' + } + + attribute_map = { + 'order': 'order', + 'name': 'name', + 'type': 'type', + 'original_type': 'originalType', + 'description': 'description' + } + + def __init__(self, order=None, name=None, type=None, original_type=None, description=None): # noqa: E501 + """DatasetColumn - a model defined in Swagger""" # noqa: E501 + + self._order = None + self._name = None + self._type = None + self._original_type = None + self._description = None + self.discriminator = None + + if order is not None: + self.order = order + if name is not None: + self.name = name + if type is not None: + self.type = type + if original_type is not None: + self.original_type = original_type + if description is not None: + self.description = description + + @property + def order(self): + """Gets the order of this DatasetColumn. # noqa: E501 + + The order that the column comes in, 0-based. (The first column is 0, second is 1, etc.) # noqa: E501 + + :return: The order of this DatasetColumn. # noqa: E501 + :rtype: float + """ + return self._order + + @order.setter + def order(self, order): + """Sets the order of this DatasetColumn. + + The order that the column comes in, 0-based. (The first column is 0, second is 1, etc.) # noqa: E501 + + :param order: The order of this DatasetColumn. # noqa: E501 + :type: float + """ + + self._order = order + + @property + def name(self): + """Gets the name of this DatasetColumn. # noqa: E501 + + The column name # noqa: E501 + + :return: The name of this DatasetColumn. # noqa: E501 + :rtype: str + """ + return self._name + + @name.setter + def name(self, name): + """Sets the name of this DatasetColumn. + + The column name # noqa: E501 + + :param name: The name of this DatasetColumn. # noqa: E501 + :type: str + """ + + self._name = name + + @property + def type(self): + """Gets the type of this DatasetColumn. # noqa: E501 + + The type of all of the fields in the column. Please see the data types on https://github.com/Kaggle/kaggle-api/wiki/Dataset-Metadata # noqa: E501 + + :return: The type of this DatasetColumn. # noqa: E501 + :rtype: str + """ + return self._type + + @type.setter + def type(self, type): + """Sets the type of this DatasetColumn. + + The type of all of the fields in the column. Please see the data types on https://github.com/Kaggle/kaggle-api/wiki/Dataset-Metadata # noqa: E501 + + :param type: The type of this DatasetColumn. # noqa: E501 + :type: str + """ + + self._type = type + + @property + def original_type(self): + """Gets the original_type of this DatasetColumn. # noqa: E501 + + Used to store the original type of the column, which will be converted to Kaggle's types. For example, an `originalType` of `\"integer\"` would convert to a `type` of `\"numeric\"` # noqa: E501 + + :return: The original_type of this DatasetColumn. # noqa: E501 + :rtype: str + """ + return self._original_type + + @original_type.setter + def original_type(self, original_type): + """Sets the original_type of this DatasetColumn. + + Used to store the original type of the column, which will be converted to Kaggle's types. For example, an `originalType` of `\"integer\"` would convert to a `type` of `\"numeric\"` # noqa: E501 + + :param original_type: The original_type of this DatasetColumn. # noqa: E501 + :type: str + """ + + self._original_type = original_type + + @property + def description(self): + """Gets the description of this DatasetColumn. # noqa: E501 + + The description of the column # noqa: E501 + + :return: The description of this DatasetColumn. # noqa: E501 + :rtype: str + """ + return self._description + + @description.setter + def description(self, description): + """Sets the description of this DatasetColumn. + + The description of the column # noqa: E501 + + :param description: The description of this DatasetColumn. # noqa: E501 + :type: str + """ + + self._description = description + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, DatasetColumn): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/.local/lib/python3.8/site-packages/kaggle/models/dataset_new_request.py b/.local/lib/python3.8/site-packages/kaggle/models/dataset_new_request.py new file mode 100644 index 0000000..09af3d7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/models/dataset_new_request.py @@ -0,0 +1,392 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +import pprint +import re # noqa: F401 + +import six + +from kaggle.models.dataset_upload_file import DatasetUploadFile # noqa: F401,E501 + + +class DatasetNewRequest(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'title': 'str', + 'slug': 'str', + 'owner_slug': 'str', + 'license_name': 'str', + 'subtitle': 'str', + 'description': 'str', + 'files': 'list[DatasetUploadFile]', + 'is_private': 'bool', + 'convert_to_csv': 'bool', + 'category_ids': 'list[str]' + } + + attribute_map = { + 'title': 'title', + 'slug': 'slug', + 'owner_slug': 'ownerSlug', + 'license_name': 'licenseName', + 'subtitle': 'subtitle', + 'description': 'description', + 'files': 'files', + 'is_private': 'isPrivate', + 'convert_to_csv': 'convertToCsv', + 'category_ids': 'categoryIds' + } + + def __init__(self, title=None, slug=None, owner_slug=None, license_name='unknown', subtitle=None, description='', files=None, is_private=True, convert_to_csv=True, category_ids=None): # noqa: E501 + """DatasetNewRequest - a model defined in Swagger""" # noqa: E501 + + self._title = None + self._slug = None + self._owner_slug = None + self._license_name = None + self._subtitle = None + self._description = None + self._files = None + self._is_private = None + self._convert_to_csv = None + self._category_ids = None + self.discriminator = None + + self.title = title + if slug is not None: + self.slug = slug + if owner_slug is not None: + self.owner_slug = owner_slug + if license_name is not None: + self.license_name = license_name + if subtitle is not None: + self.subtitle = subtitle + if description is not None: + self.description = description + self.files = files + if is_private is not None: + self.is_private = is_private + if convert_to_csv is not None: + self.convert_to_csv = convert_to_csv + if category_ids is not None: + self.category_ids = category_ids + + @property + def title(self): + """Gets the title of this DatasetNewRequest. # noqa: E501 + + The title of the new dataset # noqa: E501 + + :return: The title of this DatasetNewRequest. # noqa: E501 + :rtype: str + """ + return self._title + + @title.setter + def title(self, title): + """Sets the title of this DatasetNewRequest. + + The title of the new dataset # noqa: E501 + + :param title: The title of this DatasetNewRequest. # noqa: E501 + :type: str + """ + if title is None: + raise ValueError("Invalid value for `title`, must not be `None`") # noqa: E501 + + self._title = title + + @property + def slug(self): + """Gets the slug of this DatasetNewRequest. # noqa: E501 + + The slug that the dataset should be created with # noqa: E501 + + :return: The slug of this DatasetNewRequest. # noqa: E501 + :rtype: str + """ + return self._slug + + @slug.setter + def slug(self, slug): + """Sets the slug of this DatasetNewRequest. + + The slug that the dataset should be created with # noqa: E501 + + :param slug: The slug of this DatasetNewRequest. # noqa: E501 + :type: str + """ + + self._slug = slug + + @property + def owner_slug(self): + """Gets the owner_slug of this DatasetNewRequest. # noqa: E501 + + The owner's username # noqa: E501 + + :return: The owner_slug of this DatasetNewRequest. # noqa: E501 + :rtype: str + """ + return self._owner_slug + + @owner_slug.setter + def owner_slug(self, owner_slug): + """Sets the owner_slug of this DatasetNewRequest. + + The owner's username # noqa: E501 + + :param owner_slug: The owner_slug of this DatasetNewRequest. # noqa: E501 + :type: str + """ + + self._owner_slug = owner_slug + + @property + def license_name(self): + """Gets the license_name of this DatasetNewRequest. # noqa: E501 + + The license that should be associated with the dataset # noqa: E501 + + :return: The license_name of this DatasetNewRequest. # noqa: E501 + :rtype: str + """ + return self._license_name + + @license_name.setter + def license_name(self, license_name): + """Sets the license_name of this DatasetNewRequest. + + The license that should be associated with the dataset # noqa: E501 + + :param license_name: The license_name of this DatasetNewRequest. # noqa: E501 + :type: str + """ + allowed_values = ["CC0-1.0", "CC-BY-SA-4.0", "GPL-2.0", "ODbL-1.0", "CC-BY-NC-SA-4.0", "unknown", "DbCL-1.0", "CC-BY-SA-3.0", "copyright-authors", "other", "reddit-api", "world-bank"] # noqa: E501 + if license_name not in allowed_values: + raise ValueError( + "Invalid value for `license_name` ({0}), must be one of {1}" # noqa: E501 + .format(license_name, allowed_values) + ) + + self._license_name = license_name + + @property + def subtitle(self): + """Gets the subtitle of this DatasetNewRequest. # noqa: E501 + + The subtitle to be set on the dataset # noqa: E501 + + :return: The subtitle of this DatasetNewRequest. # noqa: E501 + :rtype: str + """ + return self._subtitle + + @subtitle.setter + def subtitle(self, subtitle): + """Sets the subtitle of this DatasetNewRequest. + + The subtitle to be set on the dataset # noqa: E501 + + :param subtitle: The subtitle of this DatasetNewRequest. # noqa: E501 + :type: str + """ + + self._subtitle = subtitle + + @property + def description(self): + """Gets the description of this DatasetNewRequest. # noqa: E501 + + The description to be set on the dataset # noqa: E501 + + :return: The description of this DatasetNewRequest. # noqa: E501 + :rtype: str + """ + return self._description + + @description.setter + def description(self, description): + """Sets the description of this DatasetNewRequest. + + The description to be set on the dataset # noqa: E501 + + :param description: The description of this DatasetNewRequest. # noqa: E501 + :type: str + """ + + self._description = description + + @property + def files(self): + """Gets the files of this DatasetNewRequest. # noqa: E501 + + A list of files that should be associated with the dataset # noqa: E501 + + :return: The files of this DatasetNewRequest. # noqa: E501 + :rtype: list[DatasetUploadFile] + """ + return self._files + + @files.setter + def files(self, files): + """Sets the files of this DatasetNewRequest. + + A list of files that should be associated with the dataset # noqa: E501 + + :param files: The files of this DatasetNewRequest. # noqa: E501 + :type: list[DatasetUploadFile] + """ + if files is None: + raise ValueError("Invalid value for `files`, must not be `None`") # noqa: E501 + + self._files = files + + @property + def is_private(self): + """Gets the is_private of this DatasetNewRequest. # noqa: E501 + + Whether or not the dataset should be private # noqa: E501 + + :return: The is_private of this DatasetNewRequest. # noqa: E501 + :rtype: bool + """ + return self._is_private + + @is_private.setter + def is_private(self, is_private): + """Sets the is_private of this DatasetNewRequest. + + Whether or not the dataset should be private # noqa: E501 + + :param is_private: The is_private of this DatasetNewRequest. # noqa: E501 + :type: bool + """ + + self._is_private = is_private + + @property + def convert_to_csv(self): + """Gets the convert_to_csv of this DatasetNewRequest. # noqa: E501 + + Whether or not a tabular dataset should be converted to csv # noqa: E501 + + :return: The convert_to_csv of this DatasetNewRequest. # noqa: E501 + :rtype: bool + """ + return self._convert_to_csv + + @convert_to_csv.setter + def convert_to_csv(self, convert_to_csv): + """Sets the convert_to_csv of this DatasetNewRequest. + + Whether or not a tabular dataset should be converted to csv # noqa: E501 + + :param convert_to_csv: The convert_to_csv of this DatasetNewRequest. # noqa: E501 + :type: bool + """ + + self._convert_to_csv = convert_to_csv + + @property + def category_ids(self): + """Gets the category_ids of this DatasetNewRequest. # noqa: E501 + + A list of tag IDs to associated with the dataset # noqa: E501 + + :return: The category_ids of this DatasetNewRequest. # noqa: E501 + :rtype: list[str] + """ + return self._category_ids + + @category_ids.setter + def category_ids(self, category_ids): + """Sets the category_ids of this DatasetNewRequest. + + A list of tag IDs to associated with the dataset # noqa: E501 + + :param category_ids: The category_ids of this DatasetNewRequest. # noqa: E501 + :type: list[str] + """ + + self._category_ids = category_ids + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, DatasetNewRequest): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/.local/lib/python3.8/site-packages/kaggle/models/dataset_new_version_request.py b/.local/lib/python3.8/site-packages/kaggle/models/dataset_new_version_request.py new file mode 100644 index 0000000..6b1a53a --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/models/dataset_new_version_request.py @@ -0,0 +1,302 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +import pprint +import re # noqa: F401 + +import six + +from kaggle.models.dataset_upload_file import DatasetUploadFile # noqa: F401,E501 + + +class DatasetNewVersionRequest(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'version_notes': 'str', + 'subtitle': 'str', + 'description': 'str', + 'files': 'list[DatasetUploadFile]', + 'convert_to_csv': 'bool', + 'category_ids': 'list[str]', + 'delete_old_versions': 'bool' + } + + attribute_map = { + 'version_notes': 'versionNotes', + 'subtitle': 'subtitle', + 'description': 'description', + 'files': 'files', + 'convert_to_csv': 'convertToCsv', + 'category_ids': 'categoryIds', + 'delete_old_versions': 'deleteOldVersions' + } + + def __init__(self, version_notes=None, subtitle=None, description=None, files=None, convert_to_csv=True, category_ids=None, delete_old_versions=False): # noqa: E501 + """DatasetNewVersionRequest - a model defined in Swagger""" # noqa: E501 + + self._version_notes = None + self._subtitle = None + self._description = None + self._files = None + self._convert_to_csv = None + self._category_ids = None + self._delete_old_versions = None + self.discriminator = None + + self.version_notes = version_notes + if subtitle is not None: + self.subtitle = subtitle + if description is not None: + self.description = description + self.files = files + if convert_to_csv is not None: + self.convert_to_csv = convert_to_csv + if category_ids is not None: + self.category_ids = category_ids + if delete_old_versions is not None: + self.delete_old_versions = delete_old_versions + + @property + def version_notes(self): + """Gets the version_notes of this DatasetNewVersionRequest. # noqa: E501 + + The version notes for the new dataset version # noqa: E501 + + :return: The version_notes of this DatasetNewVersionRequest. # noqa: E501 + :rtype: str + """ + return self._version_notes + + @version_notes.setter + def version_notes(self, version_notes): + """Sets the version_notes of this DatasetNewVersionRequest. + + The version notes for the new dataset version # noqa: E501 + + :param version_notes: The version_notes of this DatasetNewVersionRequest. # noqa: E501 + :type: str + """ + if version_notes is None: + raise ValueError("Invalid value for `version_notes`, must not be `None`") # noqa: E501 + + self._version_notes = version_notes + + @property + def subtitle(self): + """Gets the subtitle of this DatasetNewVersionRequest. # noqa: E501 + + The subtitle to set on the dataset # noqa: E501 + + :return: The subtitle of this DatasetNewVersionRequest. # noqa: E501 + :rtype: str + """ + return self._subtitle + + @subtitle.setter + def subtitle(self, subtitle): + """Sets the subtitle of this DatasetNewVersionRequest. + + The subtitle to set on the dataset # noqa: E501 + + :param subtitle: The subtitle of this DatasetNewVersionRequest. # noqa: E501 + :type: str + """ + + self._subtitle = subtitle + + @property + def description(self): + """Gets the description of this DatasetNewVersionRequest. # noqa: E501 + + The description to set on the dataset # noqa: E501 + + :return: The description of this DatasetNewVersionRequest. # noqa: E501 + :rtype: str + """ + return self._description + + @description.setter + def description(self, description): + """Sets the description of this DatasetNewVersionRequest. + + The description to set on the dataset # noqa: E501 + + :param description: The description of this DatasetNewVersionRequest. # noqa: E501 + :type: str + """ + + self._description = description + + @property + def files(self): + """Gets the files of this DatasetNewVersionRequest. # noqa: E501 + + A list of files that should be associated with the dataset # noqa: E501 + + :return: The files of this DatasetNewVersionRequest. # noqa: E501 + :rtype: list[DatasetUploadFile] + """ + return self._files + + @files.setter + def files(self, files): + """Sets the files of this DatasetNewVersionRequest. + + A list of files that should be associated with the dataset # noqa: E501 + + :param files: The files of this DatasetNewVersionRequest. # noqa: E501 + :type: list[DatasetUploadFile] + """ + if files is None: + raise ValueError("Invalid value for `files`, must not be `None`") # noqa: E501 + + self._files = files + + @property + def convert_to_csv(self): + """Gets the convert_to_csv of this DatasetNewVersionRequest. # noqa: E501 + + Whether or not a tabular dataset should be converted to csv # noqa: E501 + + :return: The convert_to_csv of this DatasetNewVersionRequest. # noqa: E501 + :rtype: bool + """ + return self._convert_to_csv + + @convert_to_csv.setter + def convert_to_csv(self, convert_to_csv): + """Sets the convert_to_csv of this DatasetNewVersionRequest. + + Whether or not a tabular dataset should be converted to csv # noqa: E501 + + :param convert_to_csv: The convert_to_csv of this DatasetNewVersionRequest. # noqa: E501 + :type: bool + """ + + self._convert_to_csv = convert_to_csv + + @property + def category_ids(self): + """Gets the category_ids of this DatasetNewVersionRequest. # noqa: E501 + + A list of tag IDs to associated with the dataset # noqa: E501 + + :return: The category_ids of this DatasetNewVersionRequest. # noqa: E501 + :rtype: list[str] + """ + return self._category_ids + + @category_ids.setter + def category_ids(self, category_ids): + """Sets the category_ids of this DatasetNewVersionRequest. + + A list of tag IDs to associated with the dataset # noqa: E501 + + :param category_ids: The category_ids of this DatasetNewVersionRequest. # noqa: E501 + :type: list[str] + """ + + self._category_ids = category_ids + + @property + def delete_old_versions(self): + """Gets the delete_old_versions of this DatasetNewVersionRequest. # noqa: E501 + + Whether or not all previous versions of the dataset should be deleted upon creating the new version # noqa: E501 + + :return: The delete_old_versions of this DatasetNewVersionRequest. # noqa: E501 + :rtype: bool + """ + return self._delete_old_versions + + @delete_old_versions.setter + def delete_old_versions(self, delete_old_versions): + """Sets the delete_old_versions of this DatasetNewVersionRequest. + + Whether or not all previous versions of the dataset should be deleted upon creating the new version # noqa: E501 + + :param delete_old_versions: The delete_old_versions of this DatasetNewVersionRequest. # noqa: E501 + :type: bool + """ + + self._delete_old_versions = delete_old_versions + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, DatasetNewVersionRequest): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/.local/lib/python3.8/site-packages/kaggle/models/dataset_update_settings_request.py b/.local/lib/python3.8/site-packages/kaggle/models/dataset_update_settings_request.py new file mode 100644 index 0000000..18e7818 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/models/dataset_update_settings_request.py @@ -0,0 +1,326 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +import pprint +import re # noqa: F401 + +import six + + +class DatasetUpdateSettingsRequest(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'title': 'str', + 'subtitle': 'str', + 'description': 'str', + 'is_private': 'bool', + 'licenses': 'list[object]', + 'keywords': 'list[str]', + 'collaborators': 'list[object]', + 'data': 'list[object]' + } + + attribute_map = { + 'title': 'title', + 'subtitle': 'subtitle', + 'description': 'description', + 'is_private': 'isPrivate', + 'licenses': 'licenses', + 'keywords': 'keywords', + 'collaborators': 'collaborators', + 'data': 'data' + } + + def __init__(self, title=None, subtitle=None, description=None, is_private=None, licenses=None, keywords=None, collaborators=None, data=None): # noqa: E501 + """DatasetUpdateSettingsRequest - a model defined in Swagger""" # noqa: E501 + + self._title = None + self._subtitle = None + self._description = None + self._is_private = None + self._licenses = None + self._keywords = None + self._collaborators = None + self._data = None + self.discriminator = None + + if title is not None: + self.title = title + if subtitle is not None: + self.subtitle = subtitle + if description is not None: + self.description = description + if is_private is not None: + self.is_private = is_private + if licenses is not None: + self.licenses = licenses + if keywords is not None: + self.keywords = keywords + if collaborators is not None: + self.collaborators = collaborators + if data is not None: + self.data = data + + @property + def title(self): + """Gets the title of this DatasetUpdateSettingsRequest. # noqa: E501 + + Title of the dataset # noqa: E501 + + :return: The title of this DatasetUpdateSettingsRequest. # noqa: E501 + :rtype: str + """ + return self._title + + @title.setter + def title(self, title): + """Sets the title of this DatasetUpdateSettingsRequest. + + Title of the dataset # noqa: E501 + + :param title: The title of this DatasetUpdateSettingsRequest. # noqa: E501 + :type: str + """ + + self._title = title + + @property + def subtitle(self): + """Gets the subtitle of this DatasetUpdateSettingsRequest. # noqa: E501 + + Subtitle of the dataset # noqa: E501 + + :return: The subtitle of this DatasetUpdateSettingsRequest. # noqa: E501 + :rtype: str + """ + return self._subtitle + + @subtitle.setter + def subtitle(self, subtitle): + """Sets the subtitle of this DatasetUpdateSettingsRequest. + + Subtitle of the dataset # noqa: E501 + + :param subtitle: The subtitle of this DatasetUpdateSettingsRequest. # noqa: E501 + :type: str + """ + + self._subtitle = subtitle + + @property + def description(self): + """Gets the description of this DatasetUpdateSettingsRequest. # noqa: E501 + + Decription of the dataset # noqa: E501 + + :return: The description of this DatasetUpdateSettingsRequest. # noqa: E501 + :rtype: str + """ + return self._description + + @description.setter + def description(self, description): + """Sets the description of this DatasetUpdateSettingsRequest. + + Decription of the dataset # noqa: E501 + + :param description: The description of this DatasetUpdateSettingsRequest. # noqa: E501 + :type: str + """ + + self._description = description + + @property + def is_private(self): + """Gets the is_private of this DatasetUpdateSettingsRequest. # noqa: E501 + + Whether or not the dataset should be private # noqa: E501 + + :return: The is_private of this DatasetUpdateSettingsRequest. # noqa: E501 + :rtype: bool + """ + return self._is_private + + @is_private.setter + def is_private(self, is_private): + """Sets the is_private of this DatasetUpdateSettingsRequest. + + Whether or not the dataset should be private # noqa: E501 + + :param is_private: The is_private of this DatasetUpdateSettingsRequest. # noqa: E501 + :type: bool + """ + + self._is_private = is_private + + @property + def licenses(self): + """Gets the licenses of this DatasetUpdateSettingsRequest. # noqa: E501 + + A list of licenses that apply to this dataset # noqa: E501 + + :return: The licenses of this DatasetUpdateSettingsRequest. # noqa: E501 + :rtype: list[object] + """ + return self._licenses + + @licenses.setter + def licenses(self, licenses): + """Sets the licenses of this DatasetUpdateSettingsRequest. + + A list of licenses that apply to this dataset # noqa: E501 + + :param licenses: The licenses of this DatasetUpdateSettingsRequest. # noqa: E501 + :type: list[object] + """ + + self._licenses = licenses + + @property + def keywords(self): + """Gets the keywords of this DatasetUpdateSettingsRequest. # noqa: E501 + + A list of keywords that apply to this dataset # noqa: E501 + + :return: The keywords of this DatasetUpdateSettingsRequest. # noqa: E501 + :rtype: list[str] + """ + return self._keywords + + @keywords.setter + def keywords(self, keywords): + """Sets the keywords of this DatasetUpdateSettingsRequest. + + A list of keywords that apply to this dataset # noqa: E501 + + :param keywords: The keywords of this DatasetUpdateSettingsRequest. # noqa: E501 + :type: list[str] + """ + + self._keywords = keywords + + @property + def collaborators(self): + """Gets the collaborators of this DatasetUpdateSettingsRequest. # noqa: E501 + + A list of collaborators that may read or edit this dataset # noqa: E501 + + :return: The collaborators of this DatasetUpdateSettingsRequest. # noqa: E501 + :rtype: list[object] + """ + return self._collaborators + + @collaborators.setter + def collaborators(self, collaborators): + """Sets the collaborators of this DatasetUpdateSettingsRequest. + + A list of collaborators that may read or edit this dataset # noqa: E501 + + :param collaborators: The collaborators of this DatasetUpdateSettingsRequest. # noqa: E501 + :type: list[object] + """ + + self._collaborators = collaborators + + @property + def data(self): + """Gets the data of this DatasetUpdateSettingsRequest. # noqa: E501 + + A list containing metadata for each file in the dataset # noqa: E501 + + :return: The data of this DatasetUpdateSettingsRequest. # noqa: E501 + :rtype: list[object] + """ + return self._data + + @data.setter + def data(self, data): + """Sets the data of this DatasetUpdateSettingsRequest. + + A list containing metadata for each file in the dataset # noqa: E501 + + :param data: The data of this DatasetUpdateSettingsRequest. # noqa: E501 + :type: list[object] + """ + + self._data = data + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, DatasetUpdateSettingsRequest): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/.local/lib/python3.8/site-packages/kaggle/models/dataset_upload_file.py b/.local/lib/python3.8/site-packages/kaggle/models/dataset_upload_file.py new file mode 100644 index 0000000..730b861 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/models/dataset_upload_file.py @@ -0,0 +1,188 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +import pprint +import re # noqa: F401 + +import six + +from kaggle.models.dataset_column import DatasetColumn # noqa: F401,E501 + + +class DatasetUploadFile(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'token': 'str', + 'description': 'str', + 'columns': 'list[DatasetColumn]' + } + + attribute_map = { + 'token': 'token', + 'description': 'description', + 'columns': 'columns' + } + + def __init__(self, token=None, description=None, columns=None): # noqa: E501 + """DatasetUploadFile - a model defined in Swagger""" # noqa: E501 + + self._token = None + self._description = None + self._columns = None + self.discriminator = None + + if token is not None: + self.token = token + if description is not None: + self.description = description + if columns is not None: + self.columns = columns + + @property + def token(self): + """Gets the token of this DatasetUploadFile. # noqa: E501 + + A token referencing a specific file upload that can be used across requests # noqa: E501 + + :return: The token of this DatasetUploadFile. # noqa: E501 + :rtype: str + """ + return self._token + + @token.setter + def token(self, token): + """Sets the token of this DatasetUploadFile. + + A token referencing a specific file upload that can be used across requests # noqa: E501 + + :param token: The token of this DatasetUploadFile. # noqa: E501 + :type: str + """ + + self._token = token + + @property + def description(self): + """Gets the description of this DatasetUploadFile. # noqa: E501 + + The file description # noqa: E501 + + :return: The description of this DatasetUploadFile. # noqa: E501 + :rtype: str + """ + return self._description + + @description.setter + def description(self, description): + """Sets the description of this DatasetUploadFile. + + The file description # noqa: E501 + + :param description: The description of this DatasetUploadFile. # noqa: E501 + :type: str + """ + + self._description = description + + @property + def columns(self): + """Gets the columns of this DatasetUploadFile. # noqa: E501 + + A list of dataset column metadata # noqa: E501 + + :return: The columns of this DatasetUploadFile. # noqa: E501 + :rtype: list[DatasetColumn] + """ + return self._columns + + @columns.setter + def columns(self, columns): + """Sets the columns of this DatasetUploadFile. + + A list of dataset column metadata # noqa: E501 + + :param columns: The columns of this DatasetUploadFile. # noqa: E501 + :type: list[DatasetColumn] + """ + + self._columns = columns + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, DatasetUploadFile): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/.local/lib/python3.8/site-packages/kaggle/models/error.py b/.local/lib/python3.8/site-packages/kaggle/models/error.py new file mode 100644 index 0000000..9c76556 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/models/error.py @@ -0,0 +1,158 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +import pprint +import re # noqa: F401 + +import six + + +class Error(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'code': 'int', + 'message': 'str' + } + + attribute_map = { + 'code': 'code', + 'message': 'message' + } + + def __init__(self, code=None, message=None): # noqa: E501 + """Error - a model defined in Swagger""" # noqa: E501 + + self._code = None + self._message = None + self.discriminator = None + + if code is not None: + self.code = code + if message is not None: + self.message = message + + @property + def code(self): + """Gets the code of this Error. # noqa: E501 + + The server error code returned # noqa: E501 + + :return: The code of this Error. # noqa: E501 + :rtype: int + """ + return self._code + + @code.setter + def code(self, code): + """Sets the code of this Error. + + The server error code returned # noqa: E501 + + :param code: The code of this Error. # noqa: E501 + :type: int + """ + + self._code = code + + @property + def message(self): + """Gets the message of this Error. # noqa: E501 + + The error message generated by the server # noqa: E501 + + :return: The message of this Error. # noqa: E501 + :rtype: str + """ + return self._message + + @message.setter + def message(self, message): + """Sets the message of this Error. + + The error message generated by the server # noqa: E501 + + :param message: The message of this Error. # noqa: E501 + :type: str + """ + + self._message = message + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, Error): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/.local/lib/python3.8/site-packages/kaggle/models/kaggle_models_extended.py b/.local/lib/python3.8/site-packages/kaggle/models/kaggle_models_extended.py new file mode 100644 index 0000000..5702f91 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/models/kaggle_models_extended.py @@ -0,0 +1,210 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/python +# +# Copyright 2019 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding=utf-8 +import os +from datetime import datetime + + +class Competition(object): + def __init__(self, init_dict): + parsed_dict = {k: parse(v) for k, v in init_dict.items()} + self.__dict__.update(parsed_dict) + self.tags = [Tag(t) for t in self.tags] + + def __repr__(self): + return self.ref + + +class SubmitResult(object): + def __init__(self, init_dict): + parsed_dict = {k: parse(v) for k, v in init_dict.items()} + self.__dict__.update(parsed_dict) + + def __repr__(self): + return self.message + + +class Submission(object): + def __init__(self, init_dict): + parsed_dict = {k: parse(v) for k, v in init_dict.items()} + self.__dict__.update(parsed_dict) + if self.totalBytes is None: + self.size = None + else: + self.size = File.get_size(self.totalBytes) + + def __repr__(self): + return str(self.ref) + + +class LeaderboardEntry(object): + def __init__(self, init_dict): + parsed_dict = {k: parse(v) for k, v in init_dict.items()} + self.__dict__.update(parsed_dict) + + def __repr__(self): + return self.teamId + + +class Dataset(object): + def __init__(self, init_dict): + parsed_dict = {k: parse(v) for k, v in init_dict.items()} + self.__dict__.update(parsed_dict) + self.tags = [Tag(t) for t in self.tags] + self.files = [File(f) for f in self.files] + self.versions = [DatasetVersion(v) for v in self.versions] + self.size = File.get_size(self.totalBytes) + + def __repr__(self): + return self.ref + + +class Metadata(object): + def __init__(self, init_info): + parsed_info = {k: parse(v) for k, v in init_info.items()} + # backwards compatibility + self.id = parsed_info["ownerUser"] + "/" + parsed_info['datasetSlug'] + self.id_no = parsed_info['datasetId'] + self.__dict__.update(parsed_info) + + def __repr__(self): + return str(self.datasetId) + + +class DatasetVersion(object): + def __init__(self, init_dict): + parsed_dict = {k: parse(v) for k, v in init_dict.items()} + self.__dict__.update(parsed_dict) + + def __repr__(self): + return str(self.versionNumber) + + +class File(object): + def __init__(self, init_dict): + parsed_dict = {k: parse(v) for k, v in init_dict.items()} + self.__dict__.update(parsed_dict) + self.size = File.get_size(self.totalBytes) + + def __repr__(self): + return self.ref + + @staticmethod + def get_size(size, precision=0): + suffixes = ['B', 'KB', 'MB', 'GB', 'TB'] + suffix_index = 0 + while size >= 1024 and suffix_index < 4: + suffix_index += 1 + size /= 1024.0 + return '%.*f%s' % (precision, size, suffixes[suffix_index]) + + +class Tag(object): + def __init__(self, init_dict): + parsed_dict = {k: parse(v) for k, v in init_dict.items()} + self.__dict__.update(parsed_dict) + + def __repr__(self): + return self.ref + + +class FileUploadInfo(object): + def __init__(self, init_dict): + parsed_dict = {k: parse(v) for k, v in init_dict.items()} + self.__dict__.update(parsed_dict) + + def __repr__(self): + return self.token + + +class DatasetNewVersionResponse(object): + def __init__(self, init_dict): + parsed_dict = {k: parse(v) for k, v in init_dict.items()} + self.__dict__.update(parsed_dict) + + def __repr__(self): + return self.url + + +class DatasetNewResponse(object): + def __init__(self, init_dict): + parsed_dict = {k: parse(v) for k, v in init_dict.items()} + self.__dict__.update(parsed_dict) + + def __repr__(self): + return self.url + + +class ListFilesResult(object): + def __init__(self, init_dict): + self.error_message = init_dict['errorMessage'] + files = init_dict['datasetFiles'] + if files: + self.files = [File(f) for f in files] + else: + self.files = {} + + def __repr__(self): + return self.error_message + + +class Kernel: + def __init__(self, init_dict): + parsed_dict = {k: parse(v) for k, v in init_dict.items()} + self.__dict__.update(parsed_dict) + + def __repr__(self): + return self.title + + +class KernelPushResponse(object): + def __init__(self, init_dict): + parsed_dict = {k: parse(v) for k, v in init_dict.items()} + self.__dict__.update(parsed_dict) + + def __repr__(self): + return self.newUrl + + +def parse(string): + time_formats = [ + '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S.%f', + '%Y-%m-%dT%H:%M:%S.%fZ' + ] + for t in time_formats: + try: + result = datetime.strptime(string[:26], t).replace(microsecond=0) + return result + except: + pass + return string diff --git a/.local/lib/python3.8/site-packages/kaggle/models/kernel_push_request.py b/.local/lib/python3.8/site-packages/kaggle/models/kernel_push_request.py new file mode 100644 index 0000000..736cb6f --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/models/kernel_push_request.py @@ -0,0 +1,515 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +import pprint +import re # noqa: F401 + +import six + + +class KernelPushRequest(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'id': 'int', + 'slug': 'str', + 'new_title': 'str', + 'text': 'str', + 'language': 'str', + 'kernel_type': 'str', + 'is_private': 'bool', + 'enable_gpu': 'bool', + 'enable_internet': 'bool', + 'dataset_data_sources': 'list[str]', + 'competition_data_sources': 'list[str]', + 'kernel_data_sources': 'list[str]', + 'category_ids': 'list[str]', + 'docker_image_pinning_type': 'str' + } + + attribute_map = { + 'id': 'id', + 'slug': 'slug', + 'new_title': 'newTitle', + 'text': 'text', + 'language': 'language', + 'kernel_type': 'kernelType', + 'is_private': 'isPrivate', + 'enable_gpu': 'enableGpu', + 'enable_internet': 'enableInternet', + 'dataset_data_sources': 'datasetDataSources', + 'competition_data_sources': 'competitionDataSources', + 'kernel_data_sources': 'kernelDataSources', + 'category_ids': 'categoryIds', + 'docker_image_pinning_type': 'dockerImagePinningType' + } + + def __init__(self, id=None, slug=None, new_title=None, text=None, language=None, kernel_type=None, is_private=None, enable_gpu=None, enable_internet=None, dataset_data_sources=None, competition_data_sources=None, kernel_data_sources=None, category_ids=None, docker_image_pinning_type=None): # noqa: E501 + """KernelPushRequest - a model defined in Swagger""" # noqa: E501 + + self._id = None + self._slug = None + self._new_title = None + self._text = None + self._language = None + self._kernel_type = None + self._is_private = None + self._enable_gpu = None + self._enable_internet = None + self._dataset_data_sources = None + self._competition_data_sources = None + self._kernel_data_sources = None + self._category_ids = None + self._docker_image_pinning_type = None + self.discriminator = None + + if id is not None: + self.id = id + if slug is not None: + self.slug = slug + if new_title is not None: + self.new_title = new_title + self.text = text + self.language = language + self.kernel_type = kernel_type + if is_private is not None: + self.is_private = is_private + if enable_gpu is not None: + self.enable_gpu = enable_gpu + if enable_internet is not None: + self.enable_internet = enable_internet + if dataset_data_sources is not None: + self.dataset_data_sources = dataset_data_sources + if competition_data_sources is not None: + self.competition_data_sources = competition_data_sources + if kernel_data_sources is not None: + self.kernel_data_sources = kernel_data_sources + if category_ids is not None: + self.category_ids = category_ids + if docker_image_pinning_type is not None: + self.docker_image_pinning_type = docker_image_pinning_type + + @property + def id(self): + """Gets the id of this KernelPushRequest. # noqa: E501 + + The kernel's ID number. One of `id` and `slug` are required. If both are specified, `id` will be preferred # noqa: E501 + + :return: The id of this KernelPushRequest. # noqa: E501 + :rtype: int + """ + return self._id + + @id.setter + def id(self, id): + """Sets the id of this KernelPushRequest. + + The kernel's ID number. One of `id` and `slug` are required. If both are specified, `id` will be preferred # noqa: E501 + + :param id: The id of this KernelPushRequest. # noqa: E501 + :type: int + """ + + self._id = id + + @property + def slug(self): + """Gets the slug of this KernelPushRequest. # noqa: E501 + + The full slug of the kernel to push to, in the format `USERNAME/KERNEL-SLUG`. The kernel slug must be the title lowercased with dashes (`-`) replacing spaces. One of `id` and `slug` are required. If both are specified, `id` will be preferred # noqa: E501 + + :return: The slug of this KernelPushRequest. # noqa: E501 + :rtype: str + """ + return self._slug + + @slug.setter + def slug(self, slug): + """Sets the slug of this KernelPushRequest. + + The full slug of the kernel to push to, in the format `USERNAME/KERNEL-SLUG`. The kernel slug must be the title lowercased with dashes (`-`) replacing spaces. One of `id` and `slug` are required. If both are specified, `id` will be preferred # noqa: E501 + + :param slug: The slug of this KernelPushRequest. # noqa: E501 + :type: str + """ + + self._slug = slug + + @property + def new_title(self): + """Gets the new_title of this KernelPushRequest. # noqa: E501 + + The title to be set on the kernel # noqa: E501 + + :return: The new_title of this KernelPushRequest. # noqa: E501 + :rtype: str + """ + return self._new_title + + @new_title.setter + def new_title(self, new_title): + """Sets the new_title of this KernelPushRequest. + + The title to be set on the kernel # noqa: E501 + + :param new_title: The new_title of this KernelPushRequest. # noqa: E501 + :type: str + """ + + self._new_title = new_title + + @property + def text(self): + """Gets the text of this KernelPushRequest. # noqa: E501 + + The kernel's source code # noqa: E501 + + :return: The text of this KernelPushRequest. # noqa: E501 + :rtype: str + """ + return self._text + + @text.setter + def text(self, text): + """Sets the text of this KernelPushRequest. + + The kernel's source code # noqa: E501 + + :param text: The text of this KernelPushRequest. # noqa: E501 + :type: str + """ + if text is None: + raise ValueError("Invalid value for `text`, must not be `None`") # noqa: E501 + + self._text = text + + @property + def language(self): + """Gets the language of this KernelPushRequest. # noqa: E501 + + The language that the kernel is written in # noqa: E501 + + :return: The language of this KernelPushRequest. # noqa: E501 + :rtype: str + """ + return self._language + + @language.setter + def language(self, language): + """Sets the language of this KernelPushRequest. + + The language that the kernel is written in # noqa: E501 + + :param language: The language of this KernelPushRequest. # noqa: E501 + :type: str + """ + if language is None: + raise ValueError("Invalid value for `language`, must not be `None`") # noqa: E501 + allowed_values = ["python", "r", "rmarkdown"] # noqa: E501 + if language not in allowed_values: + raise ValueError( + "Invalid value for `language` ({0}), must be one of {1}" # noqa: E501 + .format(language, allowed_values) + ) + + self._language = language + + @property + def kernel_type(self): + """Gets the kernel_type of this KernelPushRequest. # noqa: E501 + + The type of kernel. Cannot be changed once the kernel has been created # noqa: E501 + + :return: The kernel_type of this KernelPushRequest. # noqa: E501 + :rtype: str + """ + return self._kernel_type + + @kernel_type.setter + def kernel_type(self, kernel_type): + """Sets the kernel_type of this KernelPushRequest. + + The type of kernel. Cannot be changed once the kernel has been created # noqa: E501 + + :param kernel_type: The kernel_type of this KernelPushRequest. # noqa: E501 + :type: str + """ + if kernel_type is None: + raise ValueError("Invalid value for `kernel_type`, must not be `None`") # noqa: E501 + allowed_values = ["script", "notebook"] # noqa: E501 + if kernel_type not in allowed_values: + raise ValueError( + "Invalid value for `kernel_type` ({0}), must be one of {1}" # noqa: E501 + .format(kernel_type, allowed_values) + ) + + self._kernel_type = kernel_type + + @property + def is_private(self): + """Gets the is_private of this KernelPushRequest. # noqa: E501 + + Whether or not the kernel should be private # noqa: E501 + + :return: The is_private of this KernelPushRequest. # noqa: E501 + :rtype: bool + """ + return self._is_private + + @is_private.setter + def is_private(self, is_private): + """Sets the is_private of this KernelPushRequest. + + Whether or not the kernel should be private # noqa: E501 + + :param is_private: The is_private of this KernelPushRequest. # noqa: E501 + :type: bool + """ + + self._is_private = is_private + + @property + def enable_gpu(self): + """Gets the enable_gpu of this KernelPushRequest. # noqa: E501 + + Whether or not the kernel should run on a GPU # noqa: E501 + + :return: The enable_gpu of this KernelPushRequest. # noqa: E501 + :rtype: bool + """ + return self._enable_gpu + + @enable_gpu.setter + def enable_gpu(self, enable_gpu): + """Sets the enable_gpu of this KernelPushRequest. + + Whether or not the kernel should run on a GPU # noqa: E501 + + :param enable_gpu: The enable_gpu of this KernelPushRequest. # noqa: E501 + :type: bool + """ + + self._enable_gpu = enable_gpu + + @property + def enable_internet(self): + """Gets the enable_internet of this KernelPushRequest. # noqa: E501 + + Whether or not the kernel should be able to access the internet # noqa: E501 + + :return: The enable_internet of this KernelPushRequest. # noqa: E501 + :rtype: bool + """ + return self._enable_internet + + @enable_internet.setter + def enable_internet(self, enable_internet): + """Sets the enable_internet of this KernelPushRequest. + + Whether or not the kernel should be able to access the internet # noqa: E501 + + :param enable_internet: The enable_internet of this KernelPushRequest. # noqa: E501 + :type: bool + """ + + self._enable_internet = enable_internet + + @property + def dataset_data_sources(self): + """Gets the dataset_data_sources of this KernelPushRequest. # noqa: E501 + + A list of dataset data sources that the kernel should use. Each dataset is specified as `USERNAME/DATASET-SLUG` # noqa: E501 + + :return: The dataset_data_sources of this KernelPushRequest. # noqa: E501 + :rtype: list[str] + """ + return self._dataset_data_sources + + @dataset_data_sources.setter + def dataset_data_sources(self, dataset_data_sources): + """Sets the dataset_data_sources of this KernelPushRequest. + + A list of dataset data sources that the kernel should use. Each dataset is specified as `USERNAME/DATASET-SLUG` # noqa: E501 + + :param dataset_data_sources: The dataset_data_sources of this KernelPushRequest. # noqa: E501 + :type: list[str] + """ + + self._dataset_data_sources = dataset_data_sources + + @property + def competition_data_sources(self): + """Gets the competition_data_sources of this KernelPushRequest. # noqa: E501 + + A list of competition data sources that the kernel should use # noqa: E501 + + :return: The competition_data_sources of this KernelPushRequest. # noqa: E501 + :rtype: list[str] + """ + return self._competition_data_sources + + @competition_data_sources.setter + def competition_data_sources(self, competition_data_sources): + """Sets the competition_data_sources of this KernelPushRequest. + + A list of competition data sources that the kernel should use # noqa: E501 + + :param competition_data_sources: The competition_data_sources of this KernelPushRequest. # noqa: E501 + :type: list[str] + """ + + self._competition_data_sources = competition_data_sources + + @property + def kernel_data_sources(self): + """Gets the kernel_data_sources of this KernelPushRequest. # noqa: E501 + + A list of kernel data sources that the kernel should use. Each dataset is specified as `USERNAME/KERNEL-SLUG` # noqa: E501 + + :return: The kernel_data_sources of this KernelPushRequest. # noqa: E501 + :rtype: list[str] + """ + return self._kernel_data_sources + + @kernel_data_sources.setter + def kernel_data_sources(self, kernel_data_sources): + """Sets the kernel_data_sources of this KernelPushRequest. + + A list of kernel data sources that the kernel should use. Each dataset is specified as `USERNAME/KERNEL-SLUG` # noqa: E501 + + :param kernel_data_sources: The kernel_data_sources of this KernelPushRequest. # noqa: E501 + :type: list[str] + """ + + self._kernel_data_sources = kernel_data_sources + + @property + def category_ids(self): + """Gets the category_ids of this KernelPushRequest. # noqa: E501 + + A list of tag IDs to associated with the kernel # noqa: E501 + + :return: The category_ids of this KernelPushRequest. # noqa: E501 + :rtype: list[str] + """ + return self._category_ids + + @category_ids.setter + def category_ids(self, category_ids): + """Sets the category_ids of this KernelPushRequest. + + A list of tag IDs to associated with the kernel # noqa: E501 + + :param category_ids: The category_ids of this KernelPushRequest. # noqa: E501 + :type: list[str] + """ + + self._category_ids = category_ids + + @property + def docker_image_pinning_type(self): + """Gets the docker_image_pinning_type of this KernelPushRequest. # noqa: E501 + + Which docker image to use for executing new versions going forward. # noqa: E501 + + :return: The docker_image_pinning_type of this KernelPushRequest. # noqa: E501 + :rtype: str + """ + return self._docker_image_pinning_type + + @docker_image_pinning_type.setter + def docker_image_pinning_type(self, docker_image_pinning_type): + """Sets the docker_image_pinning_type of this KernelPushRequest. + + Which docker image to use for executing new versions going forward. # noqa: E501 + + :param docker_image_pinning_type: The docker_image_pinning_type of this KernelPushRequest. # noqa: E501 + :type: str + """ + allowed_values = ["original", "latest"] # noqa: E501 + if docker_image_pinning_type not in allowed_values: + raise ValueError( + "Invalid value for `docker_image_pinning_type` ({0}), must be one of {1}" # noqa: E501 + .format(docker_image_pinning_type, allowed_values) + ) + + self._docker_image_pinning_type = docker_image_pinning_type + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, KernelPushRequest): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/.local/lib/python3.8/site-packages/kaggle/models/license.py b/.local/lib/python3.8/site-packages/kaggle/models/license.py new file mode 100644 index 0000000..3811488 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/models/license.py @@ -0,0 +1,137 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +import pprint +import re # noqa: F401 + +import six + + +class License(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'name': 'str' + } + + attribute_map = { + 'name': 'name' + } + + def __init__(self, name=None): # noqa: E501 + """License - a model defined in Swagger""" # noqa: E501 + + self._name = None + self.discriminator = None + + self.name = name + + @property + def name(self): + """Gets the name of this License. # noqa: E501 + + Name of the license # noqa: E501 + + :return: The name of this License. # noqa: E501 + :rtype: str + """ + return self._name + + @name.setter + def name(self, name): + """Sets the name of this License. + + Name of the license # noqa: E501 + + :param name: The name of this License. # noqa: E501 + :type: str + """ + if name is None: + raise ValueError("Invalid value for `name`, must not be `None`") # noqa: E501 + allowed_values = ["CC0-1.0", "CC-BY-SA-4.0", "GPL-2.0", "ODbL-1.0", "CC-BY-NC-SA-4.0", "unknown", "DbCL-1.0", "CC-BY-SA-3.0", "copyright-authors", "other", "reddit-api", "world-bank"] # noqa: E501 + if name not in allowed_values: + raise ValueError( + "Invalid value for `name` ({0}), must be one of {1}" # noqa: E501 + .format(name, allowed_values) + ) + + self._name = name + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, License): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/.local/lib/python3.8/site-packages/kaggle/models/result.py b/.local/lib/python3.8/site-packages/kaggle/models/result.py new file mode 100644 index 0000000..461b8c7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/models/result.py @@ -0,0 +1,100 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +import pprint +import re # noqa: F401 + +import six + + +class Result(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + } + + attribute_map = { + } + + def __init__(self): # noqa: E501 + """Result - a model defined in Swagger""" # noqa: E501 + self.discriminator = None + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, Result): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/.local/lib/python3.8/site-packages/kaggle/rest.py b/.local/lib/python3.8/site-packages/kaggle/rest.py new file mode 100644 index 0000000..16d85f5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/rest.py @@ -0,0 +1,336 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +""" + Kaggle API + + API for kaggle.com # noqa: E501 + + OpenAPI spec version: 1 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +from __future__ import absolute_import + +import io +import json +import logging +import re +import ssl + +import certifi +# python 2 and python 3 compatibility library +import six +from six.moves.urllib.parse import urlencode + +try: + import urllib3 +except ImportError: + raise ImportError('Swagger python client requires urllib3.') + + +logger = logging.getLogger(__name__) + + +class RESTResponse(io.IOBase): + + def __init__(self, resp): + self.urllib3_response = resp + self.status = resp.status + self.reason = resp.reason + self.data = resp.data + + def getheaders(self): + """Returns a dictionary of the response headers.""" + return self.urllib3_response.getheaders() + + def getheader(self, name, default=None): + """Returns a given response header.""" + return self.urllib3_response.getheader(name, default) + + +class RESTClientObject(object): + + def __init__(self, configuration, pools_size=4, maxsize=None): + # urllib3.PoolManager will pass all kw parameters to connectionpool + # https://github.com/shazow/urllib3/blob/f9409436f83aeb79fbaf090181cd81b784f1b8ce/urllib3/poolmanager.py#L75 # noqa: E501 + # https://github.com/shazow/urllib3/blob/f9409436f83aeb79fbaf090181cd81b784f1b8ce/urllib3/connectionpool.py#L680 # noqa: E501 + # maxsize is the number of requests to host that are allowed in parallel # noqa: E501 + # Custom SSL certificates and client certificates: http://urllib3.readthedocs.io/en/latest/advanced-usage.html # noqa: E501 + + # cert_reqs + if configuration.verify_ssl: + cert_reqs = ssl.CERT_REQUIRED + else: + cert_reqs = ssl.CERT_NONE + + # ca_certs + if configuration.ssl_ca_cert: + ca_certs = configuration.ssl_ca_cert + else: + # if not set certificate file, use Mozilla's root certificates. + ca_certs = certifi.where() + + addition_pool_args = {} + if configuration.assert_hostname is not None: + addition_pool_args['assert_hostname'] = configuration.assert_hostname # noqa: E501 + + if maxsize is None: + if configuration.connection_pool_maxsize is not None: + maxsize = configuration.connection_pool_maxsize + else: + maxsize = 4 + + # https pool manager + if configuration.proxy: + self.pool_manager = urllib3.ProxyManager( + num_pools=pools_size, + maxsize=maxsize, + cert_reqs=cert_reqs, + ca_certs=ca_certs, + cert_file=configuration.cert_file, + key_file=configuration.key_file, + proxy_url=configuration.proxy, + **addition_pool_args + ) + else: + self.pool_manager = urllib3.PoolManager( + num_pools=pools_size, + maxsize=maxsize, + cert_reqs=cert_reqs, + ca_certs=ca_certs, + cert_file=configuration.cert_file, + key_file=configuration.key_file, + **addition_pool_args + ) + + def request(self, method, url, query_params=None, headers=None, + body=None, post_params=None, _preload_content=True, + _request_timeout=None): + """Perform requests. + + :param method: http request method + :param url: http request url + :param query_params: query parameters in the url + :param headers: http request headers + :param body: request json body, for `application/json` + :param post_params: request post parameters, + `application/x-www-form-urlencoded` + and `multipart/form-data` + :param _preload_content: if False, the urllib3.HTTPResponse object will + be returned without reading/decoding response + data. Default is True. + :param _request_timeout: timeout setting for this request. If one + number provided, it will be total request + timeout. It can also be a pair (tuple) of + (connection, read) timeouts. + """ + method = method.upper() + assert method in ['GET', 'HEAD', 'DELETE', 'POST', 'PUT', + 'PATCH', 'OPTIONS'] + + if post_params and body: + raise ValueError( + "body parameter cannot be used with post_params parameter." + ) + + post_params = post_params or {} + headers = headers or {} + + timeout = None + if _request_timeout: + if isinstance(_request_timeout, (int, ) if six.PY3 else (int, long)): # noqa: E501,F821 + timeout = urllib3.Timeout(total=_request_timeout) + elif (isinstance(_request_timeout, tuple) and + len(_request_timeout) == 2): + timeout = urllib3.Timeout( + connect=_request_timeout[0], read=_request_timeout[1]) + + try: + # For `POST`, `PUT`, `PATCH`, `OPTIONS`, `DELETE` + if method in ['POST', 'PUT', 'PATCH', 'OPTIONS', 'DELETE']: + if query_params: + url += '?' + urlencode(query_params) + if re.search('json', headers['Content-Type'], re.IGNORECASE): + request_body = None + if body is not None: + request_body = json.dumps(body) + r = self.pool_manager.request( + method, url, + body=request_body, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + elif headers['Content-Type'] == 'application/x-www-form-urlencoded': # noqa: E501 + r = self.pool_manager.request( + method, url, + fields=post_params, + encode_multipart=False, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + elif headers['Content-Type'] == 'multipart/form-data': + # must del headers['Content-Type'], or the correct + # Content-Type which generated by urllib3 will be + # overwritten. + del headers['Content-Type'] + r = self.pool_manager.request( + method, url, + fields=post_params, + encode_multipart=True, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + # Pass a `string` parameter directly in the body to support + # other content types than Json when `body` argument is + # provided in serialized form + elif isinstance(body, str): + request_body = body + r = self.pool_manager.request( + method, url, + body=request_body, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + else: + # Cannot generate the request from given parameters + msg = """Cannot prepare a request message for provided + arguments. Please check that your arguments match + declared content type.""" + raise ApiException(status=0, reason=msg) + # For `GET`, `HEAD` + else: + r = self.pool_manager.request(method, url, + fields=query_params, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + except urllib3.exceptions.SSLError as e: + msg = "{0}\n{1}".format(type(e).__name__, str(e)) + raise ApiException(status=0, reason=msg) + + if _preload_content: + r = RESTResponse(r) + + # In the python 3, the response.data is bytes. + # we need to decode it to string. + if six.PY3: + r.data = r.data.decode('utf8') + + # log response body + logger.debug("response body: %s", r.data) + + if not 200 <= r.status <= 299: + raise ApiException(http_resp=r) + + return r + + def GET(self, url, headers=None, query_params=None, _preload_content=True, + _request_timeout=None): + return self.request("GET", url, + headers=headers, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + query_params=query_params) + + def HEAD(self, url, headers=None, query_params=None, _preload_content=True, + _request_timeout=None): + return self.request("HEAD", url, + headers=headers, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + query_params=query_params) + + def OPTIONS(self, url, headers=None, query_params=None, post_params=None, + body=None, _preload_content=True, _request_timeout=None): + return self.request("OPTIONS", url, + headers=headers, + query_params=query_params, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + def DELETE(self, url, headers=None, query_params=None, body=None, + _preload_content=True, _request_timeout=None): + return self.request("DELETE", url, + headers=headers, + query_params=query_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + def POST(self, url, headers=None, query_params=None, post_params=None, + body=None, _preload_content=True, _request_timeout=None): + return self.request("POST", url, + headers=headers, + query_params=query_params, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + def PUT(self, url, headers=None, query_params=None, post_params=None, + body=None, _preload_content=True, _request_timeout=None): + return self.request("PUT", url, + headers=headers, + query_params=query_params, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + def PATCH(self, url, headers=None, query_params=None, post_params=None, + body=None, _preload_content=True, _request_timeout=None): + return self.request("PATCH", url, + headers=headers, + query_params=query_params, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + +class ApiException(Exception): + + def __init__(self, status=None, reason=None, http_resp=None): + if http_resp: + self.status = http_resp.status + self.reason = http_resp.reason + self.body = http_resp.data + self.headers = http_resp.getheaders() + else: + self.status = status + self.reason = reason + self.body = None + self.headers = None + + def __str__(self): + """Custom error messages for exception""" + error_message = "({0})\n"\ + "Reason: {1}\n".format(self.status, self.reason) + if self.headers: + error_message += "HTTP response headers: {0}\n".format( + self.headers) + + if self.body: + error_message += "HTTP response body: {0}\n".format(self.body) + + return error_message diff --git a/.local/lib/python3.8/site-packages/kaggle/tests/__init__.py b/.local/lib/python3.8/site-packages/kaggle/tests/__init__.py new file mode 100644 index 0000000..7b6fad7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/tests/__init__.py @@ -0,0 +1,16 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/.local/lib/python3.8/site-packages/kaggle/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..c8009d2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/tests/__pycache__/test_authenticate.cpython-38.pyc b/.local/lib/python3.8/site-packages/kaggle/tests/__pycache__/test_authenticate.cpython-38.pyc new file mode 100644 index 0000000..dad6572 Binary files /dev/null and b/.local/lib/python3.8/site-packages/kaggle/tests/__pycache__/test_authenticate.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/kaggle/tests/test_authenticate.py b/.local/lib/python3.8/site-packages/kaggle/tests/test_authenticate.py new file mode 100644 index 0000000..a01ca79 --- /dev/null +++ b/.local/lib/python3.8/site-packages/kaggle/tests/test_authenticate.py @@ -0,0 +1,58 @@ +#!/usr/bin/python +# +# Copyright 2021 Kaggle Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from kaggle.api.kaggle_api_extended import KaggleApi + +# python -m unittest tests.test_authenticate + +import os +import unittest + + +class TestAuthenticate(unittest.TestCase): + def setUp(self): + print("setup class:%s" % self) + + def tearDown(self): + print("teardown class:TestStuff") + + # Environment + + def test_environment_variables(self): + os.environ['KAGGLE_USERNAME'] = 'dinosaur' + os.environ['KAGGLE_KEY'] = 'xxxxxxxxxxxx' + api = KaggleApi() + + # We haven't authenticated yet + self.assertTrue("key" not in api.config_values) + self.assertTrue("username" not in api.config_values) + api.authenticate() + + # Should be set from the environment + self.assertEqual(api.config_values['key'], 'xxxxxxxxxxxx') + self.assertEqual(api.config_values['username'], 'dinosaur') + + # Configuration Actions + + def test_config_actions(self): + api = KaggleApi() + + self.assertTrue(api.config_dir.endswith('.kaggle')) + self.assertEqual(api.get_config_value('doesntexist'), None) + + +if __name__ == '__main__': + unittest.main() diff --git a/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/INSTALLER b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/LICENSE.txt b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/LICENSE.txt new file mode 100644 index 0000000..7d18210 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/LICENSE.txt @@ -0,0 +1,910 @@ +Copyright (c) 2005-2022, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---- + +This binary distribution of NumPy also bundles the following software: + + +Name: OpenBLAS +Files: .libs/libopenb*.so +Description: bundled as a dynamically linked library +Availability: https://github.com/xianyi/OpenBLAS/ +License: 3-clause BSD + Copyright (c) 2011-2014, The OpenBLAS Project + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Name: LAPACK +Files: .libs/libopenb*.so +Description: bundled in OpenBLAS +Availability: https://github.com/xianyi/OpenBLAS/ +License 3-clause BSD + Copyright (c) 1992-2013 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. + Copyright (c) 2000-2013 The University of California Berkeley. All + rights reserved. + Copyright (c) 2006-2013 The University of Colorado Denver. All rights + reserved. + + $COPYRIGHT$ + + Additional copyrights may follow + + $HEADER$ + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer listed + in this license in the documentation and/or other materials + provided with the distribution. + + - Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + The copyright holders provide no reassurances that the source code + provided does not infringe any patent, copyright, or any other + intellectual property rights of third parties. The copyright holders + disclaim any liability to any recipient for claims brought against + recipient by any third party for infringement of that parties + intellectual property rights. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Name: GCC runtime library +Files: .libs/libgfortran*.so +Description: dynamically linked to files compiled with gcc +Availability: https://gcc.gnu.org/viewcvs/gcc/ +License: GPLv3 + runtime exception + Copyright (C) 2002-2017 Free Software Foundation, Inc. + + Libgfortran is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgfortran is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . + +---- + +Full text of license texts referred to above follows (that they are +listed below does not necessarily imply the conditions apply to the +present binary release): + +---- + +GCC RUNTIME LIBRARY EXCEPTION + +Version 3.1, 31 March 2009 + +Copyright (C) 2009 Free Software Foundation, Inc. + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + +This GCC Runtime Library Exception ("Exception") is an additional +permission under section 7 of the GNU General Public License, version +3 ("GPLv3"). It applies to a given file (the "Runtime Library") that +bears a notice placed by the copyright holder of the file stating that +the file is governed by GPLv3 along with this Exception. + +When you use GCC to compile a program, GCC may combine portions of +certain GCC header files and runtime libraries with the compiled +program. The purpose of this Exception is to allow compilation of +non-GPL (including proprietary) programs to use, in this way, the +header files and runtime libraries covered by this Exception. + +0. Definitions. + +A file is an "Independent Module" if it either requires the Runtime +Library for execution after a Compilation Process, or makes use of an +interface provided by the Runtime Library, but is not otherwise based +on the Runtime Library. + +"GCC" means a version of the GNU Compiler Collection, with or without +modifications, governed by version 3 (or a specified later version) of +the GNU General Public License (GPL) with the option of using any +subsequent versions published by the FSF. + +"GPL-compatible Software" is software whose conditions of propagation, +modification and use would permit combination with GCC in accord with +the license of GCC. + +"Target Code" refers to output from any compiler for a real or virtual +target processor architecture, in executable form or suitable for +input to an assembler, loader, linker and/or execution +phase. Notwithstanding that, Target Code does not include data in any +format that is used as a compiler intermediate representation, or used +for producing a compiler intermediate representation. + +The "Compilation Process" transforms code entirely represented in +non-intermediate languages designed for human-written code, and/or in +Java Virtual Machine byte code, into Target Code. Thus, for example, +use of source code generators and preprocessors need not be considered +part of the Compilation Process, since the Compilation Process can be +understood as starting with the output of the generators or +preprocessors. + +A Compilation Process is "Eligible" if it is done using GCC, alone or +with other GPL-compatible software, or if it is done without using any +work based on GCC. For example, using non-GPL-compatible Software to +optimize any GCC intermediate representations would not qualify as an +Eligible Compilation Process. + +1. Grant of Additional Permission. + +You have permission to propagate a work of Target Code formed by +combining the Runtime Library with Independent Modules, even if such +propagation would otherwise violate the terms of GPLv3, provided that +all Target Code was generated by Eligible Compilation Processes. You +may then convey such a combination under terms of your choice, +consistent with the licensing of the Independent Modules. + +2. No Weakening of GCC Copyleft. + +The availability of this Exception does not imply any general +presumption that third-party software is unaffected by the copyleft +requirements of the license of GCC. + +---- + + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/LICENSES_bundled.txt b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/LICENSES_bundled.txt new file mode 100644 index 0000000..26c7a78 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/LICENSES_bundled.txt @@ -0,0 +1,22 @@ +The NumPy repository and source distributions bundle several libraries that are +compatibly licensed. We list these here. + +Name: lapack-lite +Files: numpy/linalg/lapack_lite/* +License: BSD-3-Clause + For details, see numpy/linalg/lapack_lite/LICENSE.txt + +Name: tempita +Files: tools/npy_tempita/* +License: MIT + For details, see tools/npy_tempita/license.txt + +Name: dragon4 +Files: numpy/core/src/multiarray/dragon4.c +License: MIT + For license text, see numpy/core/src/multiarray/dragon4.c + +Name: libdivide +Files: numpy/core/include/numpy/libdivide/* +License: Zlib + For license text, see numpy/core/include/numpy/libdivide/LICENSE.txt diff --git a/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/METADATA b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/METADATA new file mode 100644 index 0000000..d570a38 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/METADATA @@ -0,0 +1,56 @@ +Metadata-Version: 2.1 +Name: numpy +Version: 1.22.3 +Summary: NumPy is the fundamental package for array computing with Python. +Home-page: https://www.numpy.org +Author: Travis E. Oliphant et al. +Maintainer: NumPy Developers +Maintainer-email: numpy-discussion@python.org +License: BSD +Download-URL: https://pypi.python.org/pypi/numpy +Project-URL: Bug Tracker, https://github.com/numpy/numpy/issues +Project-URL: Documentation, https://numpy.org/doc/1.22 +Project-URL: Source Code, https://github.com/numpy/numpy +Platform: Windows +Platform: Linux +Platform: Solaris +Platform: Mac OS-X +Platform: Unix +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Science/Research +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Programming Language :: C +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Topic :: Software Development +Classifier: Topic :: Scientific/Engineering +Classifier: Typing :: Typed +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: POSIX +Classifier: Operating System :: Unix +Classifier: Operating System :: MacOS +Requires-Python: >=3.8 + +It provides: + +- a powerful N-dimensional array object +- sophisticated (broadcasting) functions +- tools for integrating C/C++ and Fortran code +- useful linear algebra, Fourier transform, and random number capabilities +- and much more + +Besides its obvious scientific uses, NumPy can also be used as an efficient +multi-dimensional container of generic data. Arbitrary data-types can be +defined. This allows NumPy to seamlessly and speedily integrate with a wide +variety of databases. + +All NumPy wheels distributed on PyPI are BSD licensed. + + + diff --git a/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/RECORD b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/RECORD new file mode 100644 index 0000000..d6e4beb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/RECORD @@ -0,0 +1,1281 @@ +../../../bin/f2py,sha256=OrHyBlcMFecJHT1QercQjr-jd3-UYsOjdOfi_wMRKxE,216 +../../../bin/f2py3,sha256=OrHyBlcMFecJHT1QercQjr-jd3-UYsOjdOfi_wMRKxE,216 +../../../bin/f2py3.8,sha256=OrHyBlcMFecJHT1QercQjr-jd3-UYsOjdOfi_wMRKxE,216 +numpy-1.22.3.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +numpy-1.22.3.dist-info/LICENSE.txt,sha256=ATdhUdRkXvut-KobrcV16_CzXbH65FRWBNVF9cmzL94,45692 +numpy-1.22.3.dist-info/LICENSES_bundled.txt,sha256=VyI3ubJn_nVdgqKOIT9pF2vVavoWKlpkGzegbjAHKNE,634 +numpy-1.22.3.dist-info/METADATA,sha256=lyuiwxAgB8hBsLO-L9rCqpO-k12ohSOk8YLMGVA6Sx4,2044 +numpy-1.22.3.dist-info/RECORD,, +numpy-1.22.3.dist-info/WHEEL,sha256=-ijGDuALlPxm3HbhKntps0QzHsi-DPlXqgerYTTJkFE,148 +numpy-1.22.3.dist-info/entry_points.txt,sha256=htcmoe70QiXKEQ4GFRWK0154LnY85bD8-ga3BjqwgJ0,150 +numpy-1.22.3.dist-info/top_level.txt,sha256=4J9lbBMLnAiyxatxh8iRKV5Entd_6-oqbO7pzJjMsPw,6 +numpy.libs/libgfortran-040039e1.so.5.0.0,sha256=R6s7aClbCjzomQpEjef6sRq928Fg-IlZcsqapxLPhtA,2686064 +numpy.libs/libopenblas64_p-r0-2f7c42d4.3.18.so,sha256=adAv74uvbvkHHtk_S6KnkuD-ydm1cmGHpaiA698fQfI,32889152 +numpy.libs/libquadmath-96973f99.so.0.0.0,sha256=l82oXdtRY-Labh7bTh1rVXgzqZpA7aB5rjflA5Rltl0,247608 +numpy/LICENSE.txt,sha256=ATdhUdRkXvut-KobrcV16_CzXbH65FRWBNVF9cmzL94,45692 +numpy/__config__.py,sha256=49s2ch4aadZ1I3IngFbCComBXWWHwtwT080ZqhHaRSc,5291 +numpy/__init__.cython-30.pxd,sha256=cLPmeF01XBH1NkFxsCkvDHexKV4MD1_qT8Lc0LVqrMA,36238 +numpy/__init__.pxd,sha256=YaJdc2H7uEybO8u9atqKzyS6qZJILO48f4yi1LzZA_I,34606 +numpy/__init__.py,sha256=NfLkFmXmFxSJxmjLu-TlZmvbE_5oz9c31tsCk15ObFs,15320 +numpy/__init__.pyi,sha256=ceoQH55CMKTQv51vRoMDtKR6DpqMGi7P6wNv2dPPqcE,148804 +numpy/__pycache__/__config__.cpython-38.pyc,, +numpy/__pycache__/__init__.cpython-38.pyc,, +numpy/__pycache__/_distributor_init.cpython-38.pyc,, +numpy/__pycache__/_globals.cpython-38.pyc,, +numpy/__pycache__/_pytesttester.cpython-38.pyc,, +numpy/__pycache__/_version.cpython-38.pyc,, +numpy/__pycache__/conftest.cpython-38.pyc,, +numpy/__pycache__/ctypeslib.cpython-38.pyc,, +numpy/__pycache__/dual.cpython-38.pyc,, +numpy/__pycache__/matlib.cpython-38.pyc,, +numpy/__pycache__/setup.cpython-38.pyc,, +numpy/__pycache__/version.cpython-38.pyc,, +numpy/_distributor_init.py,sha256=IgPkSK3H9bgjFeUfWuXhjKrgetQl5ztUW-rTyjGHK3c,331 +numpy/_globals.py,sha256=oqgE-es0c8OxUG_HPHAv4xZp3h2Al3roRSkfCz_0B-0,4011 +numpy/_pytesttester.py,sha256=u2ESexAVeDbwRMIrmrnLXvJFTTyCTW02ZBL_pKKDTQg,6676 +numpy/_pytesttester.pyi,sha256=1-gYNaF9V_UF0IBrrQMeer4q4GVIBouN30xOGipPKDk,468 +numpy/_version.py,sha256=aZoVh1GaBXtEiftx5QP4WLO5PIacVCvtPoqQWUZ_I5g,498 +numpy/array_api/__init__.py,sha256=XvTzhp2_rt1tEPc4IPERZFp-Ij3_fTL6UES8qk-4VmY,10163 +numpy/array_api/__pycache__/__init__.cpython-38.pyc,, +numpy/array_api/__pycache__/_array_object.cpython-38.pyc,, +numpy/array_api/__pycache__/_constants.cpython-38.pyc,, +numpy/array_api/__pycache__/_creation_functions.cpython-38.pyc,, +numpy/array_api/__pycache__/_data_type_functions.cpython-38.pyc,, +numpy/array_api/__pycache__/_dtypes.cpython-38.pyc,, +numpy/array_api/__pycache__/_elementwise_functions.cpython-38.pyc,, +numpy/array_api/__pycache__/_manipulation_functions.cpython-38.pyc,, +numpy/array_api/__pycache__/_searching_functions.cpython-38.pyc,, +numpy/array_api/__pycache__/_set_functions.cpython-38.pyc,, +numpy/array_api/__pycache__/_sorting_functions.cpython-38.pyc,, +numpy/array_api/__pycache__/_statistical_functions.cpython-38.pyc,, +numpy/array_api/__pycache__/_typing.cpython-38.pyc,, +numpy/array_api/__pycache__/_utility_functions.cpython-38.pyc,, +numpy/array_api/__pycache__/linalg.cpython-38.pyc,, +numpy/array_api/__pycache__/setup.cpython-38.pyc,, +numpy/array_api/_array_object.py,sha256=8Orynv7pKrQFWxjWWP30GBS2bjh7v0zKND3GVLFZvQ0,41328 +numpy/array_api/_constants.py,sha256=Z6bImx0puhVuCwc5dPXvZ6TZt46exiuuX1TubSyQ62E,66 +numpy/array_api/_creation_functions.py,sha256=Zyuek7Q_v-yxkizQezeEtzdn8CZ27UNTqG7p1FHpdLo,10051 +numpy/array_api/_data_type_functions.py,sha256=4wD4jwEaWxTaOAGnMhOLd6A06UrzWh3m_h0ku9EReDk,3799 +numpy/array_api/_dtypes.py,sha256=WEy1XztJIbSI9kosFqesv_Fzb_bHwH_nOlrVFlhbyx0,3707 +numpy/array_api/_elementwise_functions.py,sha256=arizOQ2iM3d_bOFpk_rwwCXZM3Zyhx9_fnyCki6Z8eY,24772 +numpy/array_api/_manipulation_functions.py,sha256=EqLf8ZILAV1Se6CttZ-DD271V1-PgjDK5Ae_TD-NmqQ,2944 +numpy/array_api/_searching_functions.py,sha256=M75BEj5b94Z5cya6_OzbgvhR9GGIDGdJ8eyydJHuWPQ,1457 +numpy/array_api/_set_functions.py,sha256=80INpLVP-4bunDXUxmxI3dFW-guu_7rgWlAwuNzdsZE,2848 +numpy/array_api/_sorting_functions.py,sha256=oBsyVwnVLBJjJRLZi4Rpe9awesepZjAdvjTHJ_Jy50o,1627 +numpy/array_api/_statistical_functions.py,sha256=AxiYkz-lW1k8uFaQfCDXJrXiDzG6Qhju-hrm5A9IFSE,3378 +numpy/array_api/_typing.py,sha256=HN0Ao-n6m1XAuIFNkGEUQ56REPmPJ-lKSAernfUxoDc,1376 +numpy/array_api/_utility_functions.py,sha256=HwycylbPAgRVz4nZvjvwqN3mQnJbqKA-NRMaAvIP-CE,824 +numpy/array_api/linalg.py,sha256=bVqPGzw0TCeMZ692Qp3hGR9gJeIkU5EAdeoVR0Cd-Zs,16317 +numpy/array_api/setup.py,sha256=Wx6qD7GU_APiqKolYPO0OHv4eHGYrjPZmDAgjWhOEhM,341 +numpy/array_api/tests/__init__.py,sha256=t_2GZ3lKcsu4ec4GMKPUDYaeMUJyDquBlQAcPgj7kFE,282 +numpy/array_api/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/array_api/tests/__pycache__/test_array_object.cpython-38.pyc,, +numpy/array_api/tests/__pycache__/test_creation_functions.cpython-38.pyc,, +numpy/array_api/tests/__pycache__/test_elementwise_functions.cpython-38.pyc,, +numpy/array_api/tests/__pycache__/test_set_functions.cpython-38.pyc,, +numpy/array_api/tests/__pycache__/test_sorting_functions.cpython-38.pyc,, +numpy/array_api/tests/test_array_object.py,sha256=MKQup6gCAThZeuM3VPQtWmMlZQn4aHXZChnsu-sUT8M,14345 +numpy/array_api/tests/test_creation_functions.py,sha256=s3A1COWmXIAJdhzd8v7VtL-jbiSspskTqwYy0BTpmpw,5023 +numpy/array_api/tests/test_elementwise_functions.py,sha256=_DEB5r6VCDVhUos7VDaFzCId2_M5y7KoyK-HTGybdkc,3619 +numpy/array_api/tests/test_set_functions.py,sha256=D016G7v3ko49bND5sVERP8IqQXZiwr-2yrKbBPJ-oqg,546 +numpy/array_api/tests/test_sorting_functions.py,sha256=INPiYnuGBcsmWtYqdTTX3ENHmM4iUx4zs9KdwDaSmdA,602 +numpy/compat/__init__.py,sha256=vDaDPwY2qBEKixsIfjwPjyKeWiyWg0kPJdgcSxua8R8,432 +numpy/compat/__pycache__/__init__.cpython-38.pyc,, +numpy/compat/__pycache__/_inspect.cpython-38.pyc,, +numpy/compat/__pycache__/py3k.cpython-38.pyc,, +numpy/compat/__pycache__/setup.cpython-38.pyc,, +numpy/compat/_inspect.py,sha256=8Ma7QBRwfSWKeK1ShJpFNc7CDhE6fkIE_wr1FxrG1A8,7447 +numpy/compat/py3k.py,sha256=jbMNbn_cV3JDWDKJfYO78g6lCXkg-_95mgQJlh4qP80,3607 +numpy/compat/setup.py,sha256=36X1kF0C_NVROXfJ7w3SQeBm5AIDBuJbM5qT7cvSDgU,335 +numpy/compat/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/compat/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/compat/tests/__pycache__/test_compat.cpython-38.pyc,, +numpy/compat/tests/test_compat.py,sha256=IB6xDjzHwWRmxKHwnFbESXul2ms2XjT6Hj-Dywdu2oM,476 +numpy/conftest.py,sha256=3lYJfyLGDyx-cyCq2Ox6GX6xw09nn97K4x2WSU-RAn0,4032 +numpy/core/__init__.py,sha256=6ezCGVC4TuvLlyYtBt6fXg0U-UEubw2XmzTqv5T-MOg,5660 +numpy/core/__init__.pyi,sha256=xtd9OFYza-ZG3jyEJrlzRPT-SkVoB_qYmVCe6FxRks0,126 +numpy/core/__pycache__/__init__.cpython-38.pyc,, +numpy/core/__pycache__/_add_newdocs.cpython-38.pyc,, +numpy/core/__pycache__/_add_newdocs_scalars.cpython-38.pyc,, +numpy/core/__pycache__/_asarray.cpython-38.pyc,, +numpy/core/__pycache__/_dtype.cpython-38.pyc,, +numpy/core/__pycache__/_dtype_ctypes.cpython-38.pyc,, +numpy/core/__pycache__/_exceptions.cpython-38.pyc,, +numpy/core/__pycache__/_internal.cpython-38.pyc,, +numpy/core/__pycache__/_machar.cpython-38.pyc,, +numpy/core/__pycache__/_methods.cpython-38.pyc,, +numpy/core/__pycache__/_string_helpers.cpython-38.pyc,, +numpy/core/__pycache__/_type_aliases.cpython-38.pyc,, +numpy/core/__pycache__/_ufunc_config.cpython-38.pyc,, +numpy/core/__pycache__/arrayprint.cpython-38.pyc,, +numpy/core/__pycache__/cversions.cpython-38.pyc,, +numpy/core/__pycache__/defchararray.cpython-38.pyc,, +numpy/core/__pycache__/einsumfunc.cpython-38.pyc,, +numpy/core/__pycache__/fromnumeric.cpython-38.pyc,, +numpy/core/__pycache__/function_base.cpython-38.pyc,, +numpy/core/__pycache__/generate_numpy_api.cpython-38.pyc,, +numpy/core/__pycache__/getlimits.cpython-38.pyc,, +numpy/core/__pycache__/memmap.cpython-38.pyc,, +numpy/core/__pycache__/multiarray.cpython-38.pyc,, +numpy/core/__pycache__/numeric.cpython-38.pyc,, +numpy/core/__pycache__/numerictypes.cpython-38.pyc,, +numpy/core/__pycache__/overrides.cpython-38.pyc,, +numpy/core/__pycache__/records.cpython-38.pyc,, +numpy/core/__pycache__/setup.cpython-38.pyc,, +numpy/core/__pycache__/setup_common.cpython-38.pyc,, +numpy/core/__pycache__/shape_base.cpython-38.pyc,, +numpy/core/__pycache__/umath.cpython-38.pyc,, +numpy/core/__pycache__/umath_tests.cpython-38.pyc,, +numpy/core/_add_newdocs.py,sha256=dQX5TU9R2F4foGP_lckdE3QYVkZcwFtRZ0dg2_cc0m0,197612 +numpy/core/_add_newdocs_scalars.py,sha256=xxU2hhMm4tFux-UNIKLMKHFU7bMelOCRAJ2lCX3bahI,10232 +numpy/core/_asarray.py,sha256=vdQAHeC_3IUhoWP-kZq5YdjWIZV_DahOyEfByzVdTfc,4175 +numpy/core/_asarray.pyi,sha256=Hg9i-t5BYGWgQlOFdC66iVk-wSOkcKDiHGHUDxCnkhA,1003 +numpy/core/_dtype.py,sha256=sqz6_fBORUsxLIn6BBs30l2EezXACP9941bG6J89oXA,10076 +numpy/core/_dtype_ctypes.py,sha256=Vug4i7xKhznK2tdIjmn4ebclClpaCJwSZUlvEoYl0Eg,3673 +numpy/core/_exceptions.py,sha256=foa2dy_Nu3j6Pp_EiRwN6V_O7KOGugSk2XzzduRfbUg,8344 +numpy/core/_internal.py,sha256=nu9TACdNaNGWgda2RTmWA_9JFAeBoEy0g7mv8fKK51w,26416 +numpy/core/_internal.pyi,sha256=8GggxiQB2TOAQTk1eOqU1zL3F7OaNn-WnsbQgz_MvwU,1051 +numpy/core/_machar.py,sha256=pzTBKBdlAducRDuYrrqkYtlVtNtFNSfFNERVGmGLUHo,11486 +numpy/core/_methods.py,sha256=2Zp9L3nc0ec_LBGhq4C_CSqGLOb2HowIUAsgCnX1qF8,10865 +numpy/core/_multiarray_tests.cpython-38-x86_64-linux-gnu.so,sha256=nvd1FR85L3OS4-Z67DJuTj6i0GC_CuOqBL_-guACGEc,186368 +numpy/core/_multiarray_umath.cpython-38-x86_64-linux-gnu.so,sha256=rVpWWF418zuwaDW7C_JJcy1Mssxkjq8GmqEfJWzbkiw,5316256 +numpy/core/_operand_flag_tests.cpython-38-x86_64-linux-gnu.so,sha256=RDq63Mv82pNVQA8agOSpPEGJYL6ZtFG8jq2Ulh4wg7c,17072 +numpy/core/_rational_tests.cpython-38-x86_64-linux-gnu.so,sha256=SDBOfIp4Pdn00izhkwjhPM7ia0K1wgkqDLTEFvFeT5U,59816 +numpy/core/_simd.cpython-38-x86_64-linux-gnu.so,sha256=Hq_rr_d4yoobePpVqQ0ku0a_rvwwVimuJ31Lx2fAHas,2011112 +numpy/core/_string_helpers.py,sha256=NGGGhaFdU5eGiUAj3GTIBoOgWs4r9aTNlsE2r9NgX6Q,2855 +numpy/core/_struct_ufunc_tests.cpython-38-x86_64-linux-gnu.so,sha256=CfkOdIqwpHeOcFqmJWE8JJSKTh-B3Ew6C7aTzRPBXyU,17176 +numpy/core/_type_aliases.py,sha256=6fQ6Wrf3Bww0kv58nh5Iq0NXbGr4xsuIzEba7KB_kVc,7272 +numpy/core/_type_aliases.pyi,sha256=Rt9X2qksBilonPnUM7lGWDpNZBQPRFkivgzEctmxUzQ,405 +numpy/core/_ufunc_config.py,sha256=lgyCN1I_6rdDpYC0Dl0LfJGFijFvMs77CLcgD86MYLI,13387 +numpy/core/_ufunc_config.pyi,sha256=riVvQJw-aQVvHq0CLiDOrToCLyIKpmPCFHThX0mMVkA,1078 +numpy/core/_umath_tests.cpython-38-x86_64-linux-gnu.so,sha256=zAE4YGiMwrRUkaBXq-VOdylFDRPWF2fpmTgk03S58y4,41824 +numpy/core/arrayprint.py,sha256=2ulEKXNysC2RAp2vIk7A1XLLKDKe-9Yd02Ix69wN_XM,62839 +numpy/core/arrayprint.pyi,sha256=DQsFhPuDZPfAxXazTEr8y3mOtc2jxEqOqBRoKpK15_c,4570 +numpy/core/cversions.py,sha256=H_iNIpx9-hY1cQNxqjT2d_5SXZhJbMo_caq4_q6LB7I,347 +numpy/core/defchararray.py,sha256=ynAFiJONpssYUb1hTF1rvDoD1zSDi_r6gXc1lDBF_PE,69776 +numpy/core/defchararray.pyi,sha256=yUv65tuo2MfNunb1qVTuP18nxSjaX5Z9wjwlxsgVA6Q,9225 +numpy/core/einsumfunc.py,sha256=X5XxzBMRx-itAeJ07A0rbRTWSwXc7WLXb6ZA1-5AaGA,51445 +numpy/core/einsumfunc.pyi,sha256=j6J0Ok0VpB18NXcbUOs7cuDDdY8riF84C6rv38urbCg,3636 +numpy/core/fromnumeric.py,sha256=Ig2My66-Egehlfzyf7xgoL_bOhk8NmtoB2pV4Wg17Mo,124187 +numpy/core/fromnumeric.pyi,sha256=c03Z27qB70ANuGplWlH1_ozF4muk3Ogg4WZ6U2A8DIE,8362 +numpy/core/function_base.py,sha256=0aknBH-SwxLjc7f-3hmhxv0HxqTln9Ixa86Z9tOxLgc,19019 +numpy/core/function_base.pyi,sha256=jwbaz_e-UYxzIuHC1UdvZhiu7cEDOWUfPNhV9jF8dBw,1562 +numpy/core/generate_numpy_api.py,sha256=kRyp-DBDU461R7-Hpd8EIG6WtcHwbXHbGjEsDZ5xDjg,7007 +numpy/core/getlimits.py,sha256=kAZb79mM59lvGvqLHYbZxkSlHJGANDFM9lX49kSF2Iw,24067 +numpy/core/getlimits.pyi,sha256=ijGslHQqXn40WJ06Y4hZPFchhQ6f1CpKq_abvobCvOE,107 +numpy/core/include/numpy/.doxyfile,sha256=goOZR7LcINt3snuBGH8Me-_aVTGm5f4u4wEaSpjM_W4,58 +numpy/core/include/numpy/__multiarray_api.h,sha256=wn0msbTb3rO7RjnUzK0r03xL76UdKWbyoebwaT2jW4Y,62482 +numpy/core/include/numpy/__ufunc_api.h,sha256=kOT0I320xLGHnuE8XSaomEiCmFaup8fTOTied514XiM,12614 +numpy/core/include/numpy/_neighborhood_iterator_imp.h,sha256=A6YPpQihUwscZYxz0oODrBjRSYZdJM2dG0vcdtJtY64,1877 +numpy/core/include/numpy/_numpyconfig.h,sha256=yU6IgJgn77qU0zG552mNZGwO7siSXqOMTRwCtvaa0BQ,1010 +numpy/core/include/numpy/arrayobject.h,sha256=-BlWQ7kfVbzCqzHn0qaeMe0_08AbwliuG98XWG57lT8,282 +numpy/core/include/numpy/arrayscalars.h,sha256=h6MKjFHzguTZ5CQd7HATAVWHoZ7mkC-86R1uMHeLT04,3818 +numpy/core/include/numpy/experimental_dtype_api.h,sha256=kkoZDhoPc7Dqj-fJeU7AmLTRjC4lFBPYW_Nypi_NcXs,14267 +numpy/core/include/numpy/halffloat.h,sha256=TRZfXgipa-dFppX2uNgkrjrPli-1BfJtadWjAembJ4s,1959 +numpy/core/include/numpy/libdivide/LICENSE.txt,sha256=-8U59H0M-DvGE3gID7hz1cFGMBJsrL_nVANcOSbapew,1018 +numpy/core/include/numpy/libdivide/libdivide.h,sha256=ew9MNhPQd1LsCZiWiFmj9IZ7yOnA3HKOXffDeR9X1jw,80138 +numpy/core/include/numpy/multiarray_api.txt,sha256=lY8Kd9_HipU14vLBLhdTpGp3P6-ZSa-ncTrcIF6bWhQ,57383 +numpy/core/include/numpy/ndarrayobject.h,sha256=lVwn_Fc1gkR8a_tKURJCdqUJ7ldBQywhQPzPkqpXxUs,10755 +numpy/core/include/numpy/ndarraytypes.h,sha256=B_q3rfoMBUdZeZVusRYR5hNqwqjPmKiTLJlRRRS0b5c,69644 +numpy/core/include/numpy/noprefix.h,sha256=CQqvpmMzQ4k70fx9n9YlUkSydmoL2knvD0i7zn0Rguo,6874 +numpy/core/include/numpy/npy_1_7_deprecated_api.h,sha256=yNiQoquPT6t5Iut16gkOnlK547rWQO3f1rr13E-BDiE,4377 +numpy/core/include/numpy/npy_3kcompat.h,sha256=SYTEjvTjwM7hz8z-Q2fmf9bSoFN8SWjrLFOjEYLEERA,15904 +numpy/core/include/numpy/npy_common.h,sha256=nu6fCGxNpbNFtIJCbRivp8X5xHXasXKPvjLddw3jSAM,38918 +numpy/core/include/numpy/npy_cpu.h,sha256=s5wuxe4hDuxffIlhstEd5k5bZtNdrGWwjUo0haegE0E,4603 +numpy/core/include/numpy/npy_endian.h,sha256=we7X9fPeWzNpo_YTh09MPGDwdE0Rw_WDM4c9y4nBj5I,2786 +numpy/core/include/numpy/npy_interrupt.h,sha256=DQZIxi6FycLXD8drdHn2SSmLoRhIpo6osvPv13vowUA,1948 +numpy/core/include/numpy/npy_math.h,sha256=IHi_QUlePyw-GgFSSrM6EeJnnkGZMT1u9fT9t_qOuV4,21370 +numpy/core/include/numpy/npy_no_deprecated_api.h,sha256=0yZrJcQEJ6MCHJInQk5TP9_qZ4t7EfBuoLOJ34IlJd4,678 +numpy/core/include/numpy/npy_os.h,sha256=XnsUD9ZwSL-xJ1eCSwpdJWeLuez0wO9lHB8dHZHJb1k,907 +numpy/core/include/numpy/numpyconfig.h,sha256=63aQkEtEYTHhJjYyncJwppcQYmJZ2ns_HGdpnx1oms4,2244 +numpy/core/include/numpy/old_defines.h,sha256=xuYQDDlMywu0Zsqm57hkgGwLsOFx6IvxzN2eiNF-gJY,6405 +numpy/core/include/numpy/oldnumeric.h,sha256=laP57STtFtJSs_fjRRDlrk34hqfqCwxP-BFEytR6lTM,899 +numpy/core/include/numpy/random/bitgen.h,sha256=49AwKOR552r-NkhuSOF1usb_URiMSRMvD22JF5pKIng,488 +numpy/core/include/numpy/random/distributions.h,sha256=IY50AHhyNU21Il84gVGiVYdXlzKlTzHM8TswS73TsG8,9894 +numpy/core/include/numpy/ufunc_api.txt,sha256=CMYRT5Wl36VufbEnjRR_yLg1vf0R3ydcNODyJY3WPCw,7198 +numpy/core/include/numpy/ufuncobject.h,sha256=PDpFN5loASykuAgmCEt2t38yt5RiNP54tDyQB33yT98,11895 +numpy/core/include/numpy/utils.h,sha256=Hnef025Bf7AUwvNhi46-24Q-JkqCwO5viYLzWiXNrwU,1187 +numpy/core/lib/libnpymath.a,sha256=Dd2OjGXBczNPAJTkd-noGsL3As6gLo1z6qZSiGybNHE,474844 +numpy/core/lib/npy-pkg-config/mlib.ini,sha256=_LsWV1eStNqwhdiYPa2538GL46dnfVwT4MrI1zbsoFw,147 +numpy/core/lib/npy-pkg-config/npymath.ini,sha256=kamUNrYKAmXqQa8BcNv7D5sLqHh6bnChM0_5rZCsTfY,360 +numpy/core/memmap.py,sha256=tIDygBke18_QnYw2mZNw5pBilnDhydFoJS8alv8KmP8,11688 +numpy/core/memmap.pyi,sha256=Zb9fS_mQOUeyNSuk8aguSr3YmVAyj_Gbu3yXklAeHhA,80 +numpy/core/multiarray.py,sha256=JRCNNhj5LBLAzDeRVsllVsne-QhBlTLlguYDobaFN3I,55414 +numpy/core/multiarray.pyi,sha256=2Renq6KDbEobiYUVzTZ3MHLajS9WesCc-bKC3cJ2Ies,24301 +numpy/core/numeric.py,sha256=vbEP9_m_L9gpZS5K2wuDBoeXWWEydyWmjoA3pFj1Fu4,76968 +numpy/core/numeric.pyi,sha256=694wONW4lBcbA73p7vPpPCD6wd7jreWHrt8h6Px6rAI,13590 +numpy/core/numerictypes.py,sha256=jrE3e1cXM1b3545iIi9TAIPgMyg-i8ShWRes9v6h-qQ,17276 +numpy/core/numerictypes.pyi,sha256=XrS5a9FI-jlkcxKDBWSTJwFxKd5cEZ6j6kIQ93uRhLM,3557 +numpy/core/overrides.py,sha256=Tii8y3txyTcUHEU9EOWuK3_t-87gGfZZGkYvO38YaME,7287 +numpy/core/records.py,sha256=kIsUns_m8BQ40HTmB39S2ogY5WRphF0EK_PXSzwZ2tY,37545 +numpy/core/records.pyi,sha256=qORS6FxhpazM3T30II6m2RAaQmCUizzLzpX8n_GO_Ec,5693 +numpy/core/setup.py,sha256=s2xoqKQF94kQLTF4OH6Z5wZkVFwyA9zO6E1t2hY8b8w,48401 +numpy/core/setup_common.py,sha256=0KVHbEezNR5ERRRuxS5-qhDg2OJArtHFxxv7JTqFFcM,19739 +numpy/core/shape_base.py,sha256=UywGdxNfoeuCdddBwRGdJzgf3fBnP7V3ZTwHDM7kSYY,29001 +numpy/core/shape_base.pyi,sha256=c8ZdfXduLcZXWsMdHKG0cg7iuVbu-32KdsbQf7HTDRU,1821 +numpy/core/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/core/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/core/tests/__pycache__/_locales.cpython-38.pyc,, +numpy/core/tests/__pycache__/test__exceptions.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_abc.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_api.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_argparse.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_array_coercion.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_arraymethod.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_arrayprint.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_casting_unittests.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_conversion_utils.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_cpu_dispatcher.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_cpu_features.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_custom_dtypes.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_cython.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_datetime.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_defchararray.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_deprecations.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_dlpack.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_dtype.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_einsum.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_errstate.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_extint128.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_function_base.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_getlimits.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_half.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_hashtable.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_indexerrors.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_indexing.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_item_selection.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_limited_api.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_longdouble.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_machar.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_mem_overlap.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_mem_policy.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_memmap.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_multiarray.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_nditer.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_numeric.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_numerictypes.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_overrides.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_print.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_protocols.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_records.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_regression.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_scalar_ctors.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_scalar_methods.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_scalarbuffer.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_scalarinherit.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_scalarmath.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_scalarprint.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_shape_base.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_simd.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_simd_module.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_ufunc.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_umath.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_umath_accuracy.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_umath_complex.cpython-38.pyc,, +numpy/core/tests/__pycache__/test_unicode.cpython-38.pyc,, +numpy/core/tests/_locales.py,sha256=R2tNBiBzY6h7zHqexpGsuM1QNd_2CR-JfqEXc4pFXYI,2192 +numpy/core/tests/data/astype_copy.pkl,sha256=lWSzCcvzRB_wpuRGj92spGIw-rNPFcd9hwJaRVvfWdk,716 +numpy/core/tests/data/generate_umath_validation_data.cpp,sha256=cIdhwkUie7Un36z6lBX0CfvTHFJOk6UTDHG1qr9z8dI,5843 +numpy/core/tests/data/recarray_from_file.fits,sha256=NA0kliz31FlLnYxv3ppzeruONqNYkuEvts5wzXEeIc4,8640 +numpy/core/tests/data/umath-validation-set-README.txt,sha256=pxWwOaGGahaRd-AlAidDfocLyrAiDp0whf5hC7hYwqM,967 +numpy/core/tests/data/umath-validation-set-arccos.csv,sha256=W_aL99bjzVjlVyd5omfDUORag8jHzx6uctedPVZgOHQ,61365 +numpy/core/tests/data/umath-validation-set-arccosh.csv,sha256=Uko_d0kDXr1YlN-6Ii-fQQxUvbXAhRfC7Un4gJ23GJk,61365 +numpy/core/tests/data/umath-validation-set-arcsin.csv,sha256=15Aenze4WD2a2dF2aOBXpv9B7u3wwAeUVJdEm4TjOkQ,61339 +numpy/core/tests/data/umath-validation-set-arcsinh.csv,sha256=uDwx4PStpfV21IaPF8pmzQpul6i72g7zDwlfcynWaVQ,60289 +numpy/core/tests/data/umath-validation-set-arctan.csv,sha256=mw5tYze_BMs6ugGEZfg5mcXoInGYdn7fvSCYSUi9Bqw,60305 +numpy/core/tests/data/umath-validation-set-arctanh.csv,sha256=95l4Uu5RmZajljabfqlv5U34RVrifCMhhkop6iLeNBo,61339 +numpy/core/tests/data/umath-validation-set-cbrt.csv,sha256=v855MTZih-fZp_GuEDst2qaIsxU4a7vlAbeIJy2xKpc,60846 +numpy/core/tests/data/umath-validation-set-cos.csv,sha256=yxd7BNaicgyEysvp8Tx2qEM-S4zVJA0f0ZXR_VG4y1E,59122 +numpy/core/tests/data/umath-validation-set-cosh.csv,sha256=FGCNeUSUTAeASsb_j18iRSsCxXLxmzF-_C7tq1elVrQ,60869 +numpy/core/tests/data/umath-validation-set-exp.csv,sha256=rUAWIbvyeKh9rPfp2n0Zq7AKq_nvHpgbgzLjAllhsek,17491 +numpy/core/tests/data/umath-validation-set-exp2.csv,sha256=f1b05MRXPOXihC9M-yi52udKBzVXalhbTuIcqoDAk-g,58624 +numpy/core/tests/data/umath-validation-set-expm1.csv,sha256=_ghc1xiUECNsBGrKCFUAy2lvu01_lkpeYJN0zDtCYWk,60299 +numpy/core/tests/data/umath-validation-set-log.csv,sha256=ynzbVbKxFzxWFwxHnxX7Fpm-va09oI3oK1_lTe19g4w,11692 +numpy/core/tests/data/umath-validation-set-log10.csv,sha256=RJgpruL16FVPgUT3-3xW4eppS_tn6o5yEW79KnITn48,68922 +numpy/core/tests/data/umath-validation-set-log1p.csv,sha256=IZZI-hi55HGCOvBat3vSBVha_8Nt-5alf2fqz6QeTG0,60303 +numpy/core/tests/data/umath-validation-set-log2.csv,sha256=HL2rOCsrEi378rNrbsXHPqlWlEGkXQq8R4e63YeTksU,68917 +numpy/core/tests/data/umath-validation-set-sin.csv,sha256=5I2h5CI8Hv6L9o2mh6x5pttnL23VhhY1h5LA8cjIWTg,58611 +numpy/core/tests/data/umath-validation-set-sinh.csv,sha256=CYiibE8aX7MQnBatl__5k_PWc_9vHUifwS-sFZzzKk0,60293 +numpy/core/tests/data/umath-validation-set-tan.csv,sha256=Oq7gxMvblRVBrQ23kMxc8iT0bHnCWKg9EE4ZqzbJbOA,60299 +numpy/core/tests/data/umath-validation-set-tanh.csv,sha256=iolZF_MOyWRgYSa-SsD4df5mnyFK18zrICI740SWoTc,60299 +numpy/core/tests/examples/cython/__pycache__/setup.cpython-38.pyc,, +numpy/core/tests/examples/cython/checks.pyx,sha256=yQYhpWZLnJ5GCqCnsSvT1UOmCL0CGsFwBDeBnjm2f9U,588 +numpy/core/tests/examples/cython/setup.py,sha256=aAR-TvQabUabnCzuB6UdWdmRXaaPfIG7MzTIfMF-0tk,496 +numpy/core/tests/examples/limited_api/__pycache__/setup.cpython-38.pyc,, +numpy/core/tests/examples/limited_api/limited_api.c,sha256=mncE8TjjXmYpkwli433G0jB2zGQO_5NqWmGKdzRJZug,344 +numpy/core/tests/examples/limited_api/setup.py,sha256=p2w7F1ardi_GRXSrnNIR8W1oeH_pgmw_1P2wS0A2I6M,435 +numpy/core/tests/test__exceptions.py,sha256=QqxQSLXboPXEVwHz-TyE2JeIl_TC-rPugzfo25nbcns,2846 +numpy/core/tests/test_abc.py,sha256=k4zkrHjoRr4TXiVTCu34B8ZyRP5XJ7N8tPKKdPudOS0,2328 +numpy/core/tests/test_api.py,sha256=HFUTpabHFgPXLW4EKmD3wk7pCxNOaCfkA7XT3aj4uNg,23450 +numpy/core/tests/test_argparse.py,sha256=rkbQloOqMiIGIdUsaZb20oHyBO3y9JFqSYr7FawdtDk,1977 +numpy/core/tests/test_array_coercion.py,sha256=fSeZ2GAK5L7CkrgwLlYGMPmSeUTMi-ipffzH9xW9fIk,28522 +numpy/core/tests/test_arraymethod.py,sha256=mYTChMoUZ3SsWHFPAuf7lHP8u3r7_CYPQMlBOnP1qoM,3567 +numpy/core/tests/test_arrayprint.py,sha256=y28q842bjQfmMCpFtxDn3mxwBd3bRmCEhVRTDWlAmHE,37152 +numpy/core/tests/test_casting_unittests.py,sha256=fD8-1sxIVhyapLVCz554RDGObneawSLpBAGPrGnpBes,29434 +numpy/core/tests/test_conversion_utils.py,sha256=6db716kfa8F4JNdJmqD6SVmpYMqFtZpD1Wj9ayVzjWY,6411 +numpy/core/tests/test_cpu_dispatcher.py,sha256=JmYHyBedXNu8pPgGCegP4it6UXY0sifW4EjdM9C0f8M,1521 +numpy/core/tests/test_cpu_features.py,sha256=H5QuSM3mEglWUlHPrtpBK9EL_rsDRj4U0_yeh8y2H_o,6844 +numpy/core/tests/test_custom_dtypes.py,sha256=JdvtCZNfL_KuZprE5tnZIRHmUNodCISYM7BRKOQ5QZQ,7677 +numpy/core/tests/test_cython.py,sha256=a9qLl99j4BtGNJd-Dr8P49oPf94M9AKKnhSAGM-GTk0,3539 +numpy/core/tests/test_datetime.py,sha256=IfB4nICPFOyv3aQiSGdmOeJt3Sc1f4SxR3eLUW8YC7U,114963 +numpy/core/tests/test_defchararray.py,sha256=Lz7lE3-wzmdWK0wQp-uAInKsmda9IIhOixFq3AiTvq8,24583 +numpy/core/tests/test_deprecations.py,sha256=fkOBLym1C9ik1ef3aaChalbIANiji731MXf_j6TtWeY,47719 +numpy/core/tests/test_dlpack.py,sha256=FAWAMFUn9lHTUdmXRbi48u-045rn20i18B6ogObQlV0,3512 +numpy/core/tests/test_dtype.py,sha256=OvhjpavnlQF43ZDW7z2lqcRZFHK5IAaY6BEOAJYwmAE,63101 +numpy/core/tests/test_einsum.py,sha256=Qm8QPBWkkuhPi2InUpVzuIm4Q9fCJPD-YqnaQUxBS9Y,49051 +numpy/core/tests/test_errstate.py,sha256=19uzW3PSyQiQD-sGnaIcVle3h7P2LbzxCEZKt3tS678,2066 +numpy/core/tests/test_extint128.py,sha256=gCZfAwPOb-F1TLsEEeDI0amQYwHk-60-OXi0ccZrrZ8,5643 +numpy/core/tests/test_function_base.py,sha256=MyX8TSOh251NfC6M8LHFpc-ooVj_jfP-V6l00F5wb7M,14411 +numpy/core/tests/test_getlimits.py,sha256=RpbyjCEOII-7zLOBryg64E9M23T_dH8wJ15sdIyv_yw,5207 +numpy/core/tests/test_half.py,sha256=H91ZNjGPENPQNuDuXH0GnJlX2CG5z-KA0bA52ee177k,23816 +numpy/core/tests/test_hashtable.py,sha256=ZV8HL8NkDnoQZfnje7BP0fyIp4fSFqjKsQc40PaTggc,1011 +numpy/core/tests/test_indexerrors.py,sha256=kN9xLl6FVTzmI7fumn_cuZ3k0omXnTetgtCnPY44cvw,5130 +numpy/core/tests/test_indexing.py,sha256=S12n93u7YONz-z5WppRcaoG72rydv-MzXz7eZp60ei0,53972 +numpy/core/tests/test_item_selection.py,sha256=DGJNZRtyOxXoyY67EFwUqmLhQ04v0Kqy_QAOpEL42M0,3579 +numpy/core/tests/test_limited_api.py,sha256=jobirG_gOrL2QwMCQToCgR5eo-U5UJ0Pr3k4S_SHJAA,1073 +numpy/core/tests/test_longdouble.py,sha256=nXQKxT10J4cQW27r3vXFAsrfuKryyWJzCZWTxMVS9WM,13042 +numpy/core/tests/test_machar.py,sha256=_5_TDUVtAJvJI5jBfEFKpCZtAfKCsCFt7tXlWSkWzzc,1067 +numpy/core/tests/test_mem_overlap.py,sha256=_oscEzTKRFVoV9Yl-XX1-NFu4a6ThX_fCEZa90R4TKs,29084 +numpy/core/tests/test_mem_policy.py,sha256=Om8dZ3YV-g_YJtbyJObqg2wNUo2dV0vkvwPJEw4Rhz4,15869 +numpy/core/tests/test_memmap.py,sha256=Szhfp-XCO-IFZIvfL4-h9hTxjnl7rg9c9vsiehmEEck,7469 +numpy/core/tests/test_multiarray.py,sha256=jqXR99NjJ9V3BLdmQZBS4r4R8GbmFTuPFcMOcU29ouQ,348086 +numpy/core/tests/test_nditer.py,sha256=bNAR5fFsO9Xq9EApFlZ1JvMaZ_bNt-xNXKie21AuAO0,127977 +numpy/core/tests/test_numeric.py,sha256=P6l9B0fo42CAGyFZ8StA0Y570kac7j_3hEJecNEi1Gk,134548 +numpy/core/tests/test_numerictypes.py,sha256=NjWqk0mprYrn8J3yfPBKNNSZ0nprKaCpB1TrlzSkFgg,20846 +numpy/core/tests/test_overrides.py,sha256=VHkY5bOFTk7xAkzYYybjATL6DUBQAPWx5154OZByzxU,20135 +numpy/core/tests/test_print.py,sha256=URYIP2BX0uTkkCH_-YwYp6hhyYENaV4sarPaD2rRbZc,6737 +numpy/core/tests/test_protocols.py,sha256=fEXE9K9s22oiVWkX92BY-g00-uXCK-HxjZhZxxYAKFc,1168 +numpy/core/tests/test_records.py,sha256=Ffznf9-_C9qAvBhcE8_sdbMP8oWUFv5PC6ory0LC2rg,20262 +numpy/core/tests/test_regression.py,sha256=cj3l_vNzc6j1fS9J3Hdr484sWkfxH7txBTDH9_wAUpo,91282 +numpy/core/tests/test_scalar_ctors.py,sha256=J0PGDHD6a6dCW1Wg4q-_C5FEccXcYgqLNTnh7AC1iB8,3688 +numpy/core/tests/test_scalar_methods.py,sha256=WtWMkWSsa8Ou0CCc5IXnhDgR1sEEc57gDYygnJXJBx8,7495 +numpy/core/tests/test_scalarbuffer.py,sha256=OEMZ9IkD1qhUqHOWkl5uzMcVs00WM9GV2xJInMP-HGs,5637 +numpy/core/tests/test_scalarinherit.py,sha256=BSxN2JQYrxxp6tbysaj56m5HvLvCdfSv_X7GC0ab7bg,2381 +numpy/core/tests/test_scalarmath.py,sha256=hu2qfXqC0Ajwu5wgJHgGkEz9_bvWwR4ommpvSoDZY_o,33087 +numpy/core/tests/test_scalarprint.py,sha256=l2kCaWzEHGCWE8JnYEsY8mwtw2RpTqt0tdoCC-dzkB8,18622 +numpy/core/tests/test_shape_base.py,sha256=Xq9XrA55m-qhqi02GuEpn_uINsfGJh7sCnuMulvM9eU,27248 +numpy/core/tests/test_simd.py,sha256=cJHS9ddc21vtIaEASxpPXb4ZeLKkWra-36XWtbuyJeg,36705 +numpy/core/tests/test_simd_module.py,sha256=EBJbHm9GchVD_hkQvVQfC-A0GZrIUX-zZEx8jiZUQe8,3758 +numpy/core/tests/test_ufunc.py,sha256=FKKTfn4lBZnJ-8A8DzoecZygJksXUCWiAbQp97zdnBw,105367 +numpy/core/tests/test_umath.py,sha256=7B5eFQ93JK64z1OKPgoSibI4sHcHUJlh_sa0FQymlr8,150890 +numpy/core/tests/test_umath_accuracy.py,sha256=gqVVoJGjSspz7C4isc6RqlllKvuq6irE1pqVegrCBys,3051 +numpy/core/tests/test_umath_complex.py,sha256=Jn4bfX2QsXvKYyDXLo7S4-TT9GkLMrQOX-1IwDLyV2I,23215 +numpy/core/tests/test_unicode.py,sha256=bBqQCQk_UZATySI4HdiWTgfenmgQCm5ynLc6SLvKiiw,12553 +numpy/core/umath.py,sha256=JbT_SxnZ_3MEmjOI9UtX3CcAzX5Q-4RDlnnhDAEJ5Vo,2040 +numpy/core/umath_tests.py,sha256=TIzaDfrEHHgSc2J5kxFEibq8MOPhwSuyOZOUBsZNVSM,389 +numpy/ctypeslib.py,sha256=KvLC_5-5izpOr7l2tiKoybBYgNGv4Cz-EPF16BIiObg,17505 +numpy/ctypeslib.pyi,sha256=dsYF1xvX-xsgiVcH8asm2CXx9Xo47SDPQxVrB1N2gV8,8240 +numpy/distutils/__config__.py,sha256=49s2ch4aadZ1I3IngFbCComBXWWHwtwT080ZqhHaRSc,5291 +numpy/distutils/__init__.py,sha256=KGgmy0CLo0kUE2DqWyiAotDMWyrB7jahrHK2pT5CKDk,1559 +numpy/distutils/__init__.pyi,sha256=D8LRE6BNOmuBGO-oakJGnjT9UJTk9zSR5rxMfZzlX64,119 +numpy/distutils/__pycache__/__config__.cpython-38.pyc,, +numpy/distutils/__pycache__/__init__.cpython-38.pyc,, +numpy/distutils/__pycache__/_shell_utils.cpython-38.pyc,, +numpy/distutils/__pycache__/armccompiler.cpython-38.pyc,, +numpy/distutils/__pycache__/ccompiler.cpython-38.pyc,, +numpy/distutils/__pycache__/ccompiler_opt.cpython-38.pyc,, +numpy/distutils/__pycache__/conv_template.cpython-38.pyc,, +numpy/distutils/__pycache__/core.cpython-38.pyc,, +numpy/distutils/__pycache__/cpuinfo.cpython-38.pyc,, +numpy/distutils/__pycache__/exec_command.cpython-38.pyc,, +numpy/distutils/__pycache__/extension.cpython-38.pyc,, +numpy/distutils/__pycache__/from_template.cpython-38.pyc,, +numpy/distutils/__pycache__/intelccompiler.cpython-38.pyc,, +numpy/distutils/__pycache__/lib2def.cpython-38.pyc,, +numpy/distutils/__pycache__/line_endings.cpython-38.pyc,, +numpy/distutils/__pycache__/log.cpython-38.pyc,, +numpy/distutils/__pycache__/mingw32ccompiler.cpython-38.pyc,, +numpy/distutils/__pycache__/misc_util.cpython-38.pyc,, +numpy/distutils/__pycache__/msvc9compiler.cpython-38.pyc,, +numpy/distutils/__pycache__/msvccompiler.cpython-38.pyc,, +numpy/distutils/__pycache__/npy_pkg_config.cpython-38.pyc,, +numpy/distutils/__pycache__/numpy_distribution.cpython-38.pyc,, +numpy/distutils/__pycache__/pathccompiler.cpython-38.pyc,, +numpy/distutils/__pycache__/setup.cpython-38.pyc,, +numpy/distutils/__pycache__/system_info.cpython-38.pyc,, +numpy/distutils/__pycache__/unixccompiler.cpython-38.pyc,, +numpy/distutils/_shell_utils.py,sha256=kMLOIoimB7PdFRgoVxCIyCFsIl1pP3d0hkm_s3E9XdA,2613 +numpy/distutils/armccompiler.py,sha256=SW12IT7WCB3CLMG_U2K1WDbtYozwFF-DtMDArmpOTkc,1043 +numpy/distutils/ccompiler.py,sha256=Mve315sNqqFR5QS1FnPXwdoj7SmVxWxRuIztnlg8K_k,27691 +numpy/distutils/ccompiler_opt.py,sha256=X-42JDzwz_fmW0TkXosYfBJMoI49ReSHuK-T0qvKxQs,96840 +numpy/distutils/checks/cpu_asimd.c,sha256=1H5MJ9RlKII09G7Ajdo0vGgjfou0SkNNwSJbazKrviQ,704 +numpy/distutils/checks/cpu_asimddp.c,sha256=Sl0_cXoqKLsf9mYF4p228KrC8YKZZLznLwW0FMq3Omw,380 +numpy/distutils/checks/cpu_asimdfhm.c,sha256=sfEkry38tgBFVzyNXOBG_F8Xh0XzoJ0-ycRjlRZdW8A,431 +numpy/distutils/checks/cpu_asimdhp.c,sha256=MaooZyDGO1LTU7RQSG_0hwqL-NxbTNwM0xSRcVauhig,329 +numpy/distutils/checks/cpu_avx.c,sha256=LuZW8o93VZZi7cYEP30dvKWTm7Mw1TLmCt5UaXDxCJg,779 +numpy/distutils/checks/cpu_avx2.c,sha256=jlDlea393op0JOiMJgmmPyKmyAXztLcObPOp9F9FaS0,749 +numpy/distutils/checks/cpu_avx512_clx.c,sha256=P-YHjj2XE4SithBkPwDgShOxGWnVSNUXg72h8O3kpbs,842 +numpy/distutils/checks/cpu_avx512_cnl.c,sha256=f_c2Z0xwAKTJeK3RYMIp1dgXYV8QyeOxUgKkMht4qko,948 +numpy/distutils/checks/cpu_avx512_icl.c,sha256=isI35-gm7Hqn2Qink5hP1XHWlh52a5vwKhEdW_CRviE,1004 +numpy/distutils/checks/cpu_avx512_knl.c,sha256=Veq4zNRDDqABV1dPyYdpyPzqZnEBrRsPsTZU1ebPi_Y,956 +numpy/distutils/checks/cpu_avx512_knm.c,sha256=eszPGr3XC9Js7mQUB0gFxlrNjQwfucQFz_UwFyNLjes,1132 +numpy/distutils/checks/cpu_avx512_skx.c,sha256=59VD8ebEJJHLlbY-4dakZV34bmq_lr9mBKz8BAcsdYc,1010 +numpy/distutils/checks/cpu_avx512cd.c,sha256=Qfh5FJUv9ZWd_P5zxkvYYIkvqsPptgaDuKkeX_F8vyA,759 +numpy/distutils/checks/cpu_avx512f.c,sha256=d97NRcbJhqpvURnw7zyG0TOuEijKXvU0g4qOTWHbwxY,755 +numpy/distutils/checks/cpu_f16c.c,sha256=nzZzpUc8AfTtw-INR3KOxcjx9pyzVUM8OhsrdH2dO_w,868 +numpy/distutils/checks/cpu_fma3.c,sha256=YN6IDwuZALJHVVmpQ2tj-14HI_PcxH_giV8-XjzlmkU,817 +numpy/distutils/checks/cpu_fma4.c,sha256=qKdgTNNFg-n8vSB1Txco60HBLCcOi1aH23gZOX7yKqs,301 +numpy/distutils/checks/cpu_neon.c,sha256=CRi7PYp3rlUOADBO3jC6n48S6NIh1NeDH8GZ8J2nWpw,372 +numpy/distutils/checks/cpu_neon_fp16.c,sha256=OHO5dv9ziEARtX2w2VPDoBHzUrN7DoPFElte09R9K9c,251 +numpy/distutils/checks/cpu_neon_vfpv4.c,sha256=AzQwBN23hMKjT8qi9CHJPQwVUmYpSascJ_T6jY7BmBw,493 +numpy/distutils/checks/cpu_popcnt.c,sha256=vRcXHVw2j1F9I_07eIZ_xzDX3fd3mqgiQXL1w3pULJk,1049 +numpy/distutils/checks/cpu_sse.c,sha256=6MHITtC76UpSR9uh0SiURpnkpPkLzT5tbrcXT4xBFxo,686 +numpy/distutils/checks/cpu_sse2.c,sha256=yUZzdjDtBS-vYlhfP-pEzj3m0UPmgZs-hA99TZAEACU,697 +numpy/distutils/checks/cpu_sse3.c,sha256=j5XRHumUuccgN9XPZyjWUUqkq8Nu8XCSWmvUhmJTJ08,689 +numpy/distutils/checks/cpu_sse41.c,sha256=y_k81P-1b-Hx8OeRVDE9V1O9JakS0zPvlFKJ3VbSmEw,675 +numpy/distutils/checks/cpu_sse42.c,sha256=3PXucdI2mII-txO7zFN99TlVveT_QUAETTGvRk-_hYw,692 +numpy/distutils/checks/cpu_ssse3.c,sha256=X6VWxIXMRpdSCBsHPXvot3yTZ4d5yK9Bi1ScQP3WC-Q,705 +numpy/distutils/checks/cpu_vsx.c,sha256=FVmR4iliKjcihzMCwloR1F2JYwSZK9P4f_hvIRLHSDQ,478 +numpy/distutils/checks/cpu_vsx2.c,sha256=yESs25Rt5ztb5-stuYbu3TbiyJKmllMpMLu01GOAHqE,263 +numpy/distutils/checks/cpu_vsx3.c,sha256=omC50tbEZNigsKMFPtE3zGRlIS2VuDTm3vZ9TBZWo4U,250 +numpy/distutils/checks/cpu_xop.c,sha256=7uabsGeqvmVJQvuSEjs8-Sm8kpmvl6uZ9YHMF5h2opQ,234 +numpy/distutils/checks/extra_avx512bw_mask.c,sha256=pVPOhcu80yJVnIhOcHHXOlZ2proJ1MUf0XgccqhPoNk,636 +numpy/distutils/checks/extra_avx512dq_mask.c,sha256=nMfIvepISGFDexPrMYl5LWtdmt6Uy9TKPzF4BVayw2I,504 +numpy/distutils/checks/extra_avx512f_reduce.c,sha256=_NfbtfSAkm_A67umjR1oEb9yRnBL5EnTA76fvQIuNVk,1595 +numpy/distutils/checks/extra_vsx_asm.c,sha256=BngiMVS9nyr22z6zMrOrHLeCloe_5luXhf5T5mYucgI,945 +numpy/distutils/checks/test_flags.c,sha256=uAIbhfAhyGe4nTdK_mZmoCefj9P0TGHNF9AUv_Cdx5A,16 +numpy/distutils/command/__init__.py,sha256=fW49zUB3syMFsKpf1oRBO0h8tmnTwRP3zUPrsB0R22M,1032 +numpy/distutils/command/__pycache__/__init__.cpython-38.pyc,, +numpy/distutils/command/__pycache__/autodist.cpython-38.pyc,, +numpy/distutils/command/__pycache__/bdist_rpm.cpython-38.pyc,, +numpy/distutils/command/__pycache__/build.cpython-38.pyc,, +numpy/distutils/command/__pycache__/build_clib.cpython-38.pyc,, +numpy/distutils/command/__pycache__/build_ext.cpython-38.pyc,, +numpy/distutils/command/__pycache__/build_py.cpython-38.pyc,, +numpy/distutils/command/__pycache__/build_scripts.cpython-38.pyc,, +numpy/distutils/command/__pycache__/build_src.cpython-38.pyc,, +numpy/distutils/command/__pycache__/config.cpython-38.pyc,, +numpy/distutils/command/__pycache__/config_compiler.cpython-38.pyc,, +numpy/distutils/command/__pycache__/develop.cpython-38.pyc,, +numpy/distutils/command/__pycache__/egg_info.cpython-38.pyc,, +numpy/distutils/command/__pycache__/install.cpython-38.pyc,, +numpy/distutils/command/__pycache__/install_clib.cpython-38.pyc,, +numpy/distutils/command/__pycache__/install_data.cpython-38.pyc,, +numpy/distutils/command/__pycache__/install_headers.cpython-38.pyc,, +numpy/distutils/command/__pycache__/sdist.cpython-38.pyc,, +numpy/distutils/command/autodist.py,sha256=8KWwr5mnjX20UpY4ITRDx-PreApyh9M7B92IwsEtTsQ,3718 +numpy/distutils/command/bdist_rpm.py,sha256=-tkZupIJr_jLqeX7xbRhE8-COXHRI0GoRpAKchVte54,709 +numpy/distutils/command/build.py,sha256=6klC3va_oXRglJ00Emv3rt5xTgk0T-Kk1HLEOHKDbNw,2566 +numpy/distutils/command/build_clib.py,sha256=qbxvwF5n8kfGSsL7H5BS7zguUYZss0X3kct8U20JQio,19235 +numpy/distutils/command/build_ext.py,sha256=WRGHAn2Z6NuV-DhS5_7L8KXjMOWmm8bRnR3KVYtT61E,31877 +numpy/distutils/command/build_py.py,sha256=XiLZ2d_tmCE8uG5VAU5OK2zlzQayBfeY4l8FFEltbig,1144 +numpy/distutils/command/build_scripts.py,sha256=P2ytmZb3UpwfmbMXkFB2iMQk15tNUCynzMATllmp-Gs,1665 +numpy/distutils/command/build_src.py,sha256=Lw3JsmSdi_DpWCtCxU4X5iYC6da5vkOhXzCrnlrgS98,31172 +numpy/distutils/command/config.py,sha256=v-nHMY-3gL7Q6PkM4K2XDVZM_GyUrAjpjNtsRT5-Y68,20724 +numpy/distutils/command/config_compiler.py,sha256=Cp9RTpW72gg8XC_3-9dCTlLYr352pBfBRZA8YBWvOoY,4369 +numpy/distutils/command/develop.py,sha256=9SbbnFnVbSJVZxTFoV9pwlOcM1D30GnOWm2QonQDvHI,575 +numpy/distutils/command/egg_info.py,sha256=i-Zk4sftK5cMQVQ2jqSxTMpVI-gYyXN16-p5TvmjURc,921 +numpy/distutils/command/install.py,sha256=wZfVabAMw-Y3tiAKcXTxC-So65RHo3zCy1ucoTmyECw,3078 +numpy/distutils/command/install_clib.py,sha256=1xv0_lPVu3g16GgICjjlh7T8zQ6PSlevCuq8Bocx5YM,1399 +numpy/distutils/command/install_data.py,sha256=Y59EBG61MWP_5C8XJvSCVfzYpMNVNVcH_Z6c0qgr9KA,848 +numpy/distutils/command/install_headers.py,sha256=tVpOGqkmh8AA_tam0K0SeCd4kvZj3UqSOjWKm6Kz4jY,919 +numpy/distutils/command/sdist.py,sha256=8Tsju1RwXNbPyQcjv8GRMFveFQqYlbNdSZh2X1OV-VU,733 +numpy/distutils/conv_template.py,sha256=F-4vkkfAjCb-fN79WYrXX3BMHMoiQO-W2u09q12OPuI,9536 +numpy/distutils/core.py,sha256=QBJNJdIE0a9Rr4lo-3QnmEaWyVV068l6HbVPdJ75iZg,8173 +numpy/distutils/cpuinfo.py,sha256=XuNhsx_-tyrui_AOgn10yfZ9p4YBM68vW2_bGmKj07I,22639 +numpy/distutils/exec_command.py,sha256=GDk5wRli-tV2ZLycbshnrGLcBgokvZmFMJntFwwT2l0,10343 +numpy/distutils/extension.py,sha256=YgeB8e2fVc2l_1etuRBv0P8c1NULOz4SaudHgsVBc30,3568 +numpy/distutils/fcompiler/__init__.py,sha256=m6ibfNuckW6cmHzpLgi7SJ8nLAyuOG5kV8IwtjbNyBU,40153 +numpy/distutils/fcompiler/__pycache__/__init__.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/absoft.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/arm.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/compaq.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/environment.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/fujitsu.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/g95.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/gnu.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/hpux.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/ibm.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/intel.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/lahey.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/mips.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/nag.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/none.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/nv.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/pathf95.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/pg.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/sun.cpython-38.pyc,, +numpy/distutils/fcompiler/__pycache__/vast.cpython-38.pyc,, +numpy/distutils/fcompiler/absoft.py,sha256=NJxm5Qpiv9F1Z5VClIXwXKGmJoCCEDSZmZthJQZS2Rs,5499 +numpy/distutils/fcompiler/arm.py,sha256=LTfIzpwMEsaAhQ7jtT-ROPontrCTd259-p_ZxEJLIT8,2235 +numpy/distutils/fcompiler/compaq.py,sha256=sjU2GKHJGuChtRb_MhnouMqvkIOQflmowFE6ErCWZhE,3903 +numpy/distutils/fcompiler/environment.py,sha256=DOD2FtKDk6O9k6U0h9UKWQ-65wU8z1tSPn3gUlRwCso,3080 +numpy/distutils/fcompiler/fujitsu.py,sha256=yK3wdHoF5qq25UcnIM6FzTXsJGJxdfKa_f__t04Ne7M,1333 +numpy/distutils/fcompiler/g95.py,sha256=FH4uww6re50OUT_BfdoWSLCDUqk8LvmQ2_j5RhF5nLQ,1330 +numpy/distutils/fcompiler/gnu.py,sha256=A1XVs8t-bccJZou7W1M1SwpRmdXBYKDQDNf4y96w6Es,20263 +numpy/distutils/fcompiler/hpux.py,sha256=gloUjWGo7MgJmukorDq7ZxDnnUKXx-C6AQfryQshVM4,1353 +numpy/distutils/fcompiler/ibm.py,sha256=W7r6W7kvGPBuWsrv-VsDHEUTq7bS73l4dVIU1ipm-IA,3539 +numpy/distutils/fcompiler/intel.py,sha256=lUQsavh2t-Jt5WusYYLAEyXjXHt8PHW5lKaxjvO-qwA,6546 +numpy/distutils/fcompiler/lahey.py,sha256=U63KMfN8zDAd_jnvMkS2N-dvP4UiSRB9Ces290qLNXw,1327 +numpy/distutils/fcompiler/mips.py,sha256=LAwT0DY5yqlYh20hNMYR1-OKu8A9GNw-TbUfI8pvglM,1714 +numpy/distutils/fcompiler/nag.py,sha256=9pQCMUlwjRVHGKwZxvwd4bW5p-9v7VXcflELEImHg1g,2777 +numpy/distutils/fcompiler/none.py,sha256=6RX2X-mV1HuhJZnVfQmDmLVhIUWseIT4P5wf3rdLq9Y,758 +numpy/distutils/fcompiler/nv.py,sha256=LGBQY417zibQ-fnPis5rNtP_I1Qk9OlhEFOnPvmwXHI,1560 +numpy/distutils/fcompiler/pathf95.py,sha256=MiHVar6-beUEYVEpqXORIX4f8G29I47D36kreltdfoQ,1061 +numpy/distutils/fcompiler/pg.py,sha256=NOB1stzrjvQMZS7bIPTgWTcAFe3cjNveA5-SztUZqD0,3568 +numpy/distutils/fcompiler/sun.py,sha256=mfS3RTj9uYT6K9Ikp8RjmsEPIWAtUTzMhX9sGjEyF6I,1577 +numpy/distutils/fcompiler/vast.py,sha256=Xuxa4sNraUPcQmt45SogAfN0kDHFb6C73uNZNmX3RBE,1667 +numpy/distutils/from_template.py,sha256=hpoFQortsLZdMSr_fJILzXzrIwFlZoFjsDSo6jNtvWs,7913 +numpy/distutils/intelccompiler.py,sha256=N_pvWjlLORdlH34cs97oU4LBNr_s9r5ddsmme7XEvs4,4234 +numpy/distutils/lib2def.py,sha256=NlfwSfYfUkW3bY4fRvvSgjUF7c6Gs99B6GoRsOrH6gI,3644 +numpy/distutils/line_endings.py,sha256=a8ZZECrPRffsbs0UygeR47_fOUlZppnx-QPssrIXtB0,2032 +numpy/distutils/log.py,sha256=m8caNBwPhIG7YTnD9iq9jjc6_yJOeU9FHuau2CSulds,2879 +numpy/distutils/mingw/gfortran_vs2003_hack.c,sha256=cbsN3Lk9Hkwzr9c-yOP2xEBg1_ml1X7nwAMDWxGjzc8,77 +numpy/distutils/mingw32ccompiler.py,sha256=J_jOJ5Bl14FVEgb1jUIKGQBFL_mrWs4YSHSvgtmxvqU,22467 +numpy/distutils/misc_util.py,sha256=jO3C5uI-4lZJGDuzdDbT3FMcWLKVRa77ordK6N_QVtk,89768 +numpy/distutils/msvc9compiler.py,sha256=FCtP7g34AVuMIaqQlH8AV1ZBdIUXbk5G7eBeeTSr1zE,2192 +numpy/distutils/msvccompiler.py,sha256=tvTGpdK41L6yJe5W_o_TOFZV3p5IXtjSxmnVFaR52uU,1928 +numpy/distutils/npy_pkg_config.py,sha256=fIFyWLTqRySO3hn-0i0FNdHeblRN_hDv-wc68-sa3hQ,12972 +numpy/distutils/numpy_distribution.py,sha256=10Urolg1aDAG0EHYfcvObzOgqRV0ARh2GhDklEg4vS0,634 +numpy/distutils/pathccompiler.py,sha256=KnJEA5H4cXg7SLrMjwWtidD24VSvOdu72d17votiY9E,713 +numpy/distutils/setup.py,sha256=l9ke_Bws431UdBfysaq7ZeGtZ8dix76oh9Huq5qqbkU,634 +numpy/distutils/system_info.py,sha256=9jHSv4qtDjVbSqiy9eKmRpSKY7WRwzKL7AEcNctxY6g,111028 +numpy/distutils/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/distutils/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_build_ext.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_ccompiler_opt.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_ccompiler_opt_conf.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_exec_command.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_fcompiler.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_fcompiler_gnu.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_fcompiler_intel.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_fcompiler_nagfor.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_from_template.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_log.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_mingw32ccompiler.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_misc_util.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_npy_pkg_config.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_shell_utils.cpython-38.pyc,, +numpy/distutils/tests/__pycache__/test_system_info.cpython-38.pyc,, +numpy/distutils/tests/test_build_ext.py,sha256=qX-j6UBmpigiwwZ9RMMkTcHXBrEB0QXYfejDlM2J-zc,2664 +numpy/distutils/tests/test_ccompiler_opt.py,sha256=cRdiRoj0sKtloi6Kifa_O_cqX7VBmpqnvXaX9knr6T4,28007 +numpy/distutils/tests/test_ccompiler_opt_conf.py,sha256=vwUllmhwwgP6fyMqGUbIfDfUf2cLpQW4vudI6oWyuco,6345 +numpy/distutils/tests/test_exec_command.py,sha256=b2Vv5zCRnkH72vhQQRSeC3zWqJPdDBa1gcOzTyWnJ_g,7301 +numpy/distutils/tests/test_fcompiler.py,sha256=mJXezTXDUbduhCwVGAfABHpEARWhnj8hLW9EOU3rn84,1277 +numpy/distutils/tests/test_fcompiler_gnu.py,sha256=nmfaFCVzbViIOQ2-MjgXt-bN8Uj674hCgiwr5Iol-_U,2136 +numpy/distutils/tests/test_fcompiler_intel.py,sha256=mxkfFD2rNfg8nn1pp_413S0uCdYXydPWBcz9ilgGkA0,1058 +numpy/distutils/tests/test_fcompiler_nagfor.py,sha256=CKEjik7YVfSJGL4abuctkmlkIUhAhv-x2aUcXiTR9b0,1102 +numpy/distutils/tests/test_from_template.py,sha256=SDYoe0XUpAayyEQDq7ZhrvEEz7U9upJDLYzhcdoVifc,1103 +numpy/distutils/tests/test_log.py,sha256=dtUR8nMgRAYAEpDbFkNff71pIdo--sXKL9ZhthcZbBs,762 +numpy/distutils/tests/test_mingw32ccompiler.py,sha256=rMC8-IyBOiuZVfAoklV_KnD9qVeB_hFVvb5dStxfk08,1609 +numpy/distutils/tests/test_misc_util.py,sha256=Qs96vTr8GZSyVCWuamzcNlVMRa15vt0Y-T2yZSUm_QA,3218 +numpy/distutils/tests/test_npy_pkg_config.py,sha256=apGrmViPcXoPCEOgDthJgL13C9N0qQMs392QjZDxJd4,2557 +numpy/distutils/tests/test_shell_utils.py,sha256=OqwJrX9DsBjm4kXq2cBaioXle8FXEB70Ka8hk9sqfho,1954 +numpy/distutils/tests/test_system_info.py,sha256=3q3rJdD1dUUSsDXOXxqSU-Af-wwt56KnkmIQl4pLzvk,10996 +numpy/distutils/unixccompiler.py,sha256=fN4-LH6JJp44SLE7JkdG2kKQlK4LC8zuUpVC-RtmJ-U,5426 +numpy/doc/__init__.py,sha256=OYmE-F6x0CD05PCDY2MiW1HLlwB6i9vhDpk-a3r4lHY,508 +numpy/doc/__pycache__/__init__.cpython-38.pyc,, +numpy/doc/__pycache__/constants.cpython-38.pyc,, +numpy/doc/__pycache__/ufuncs.cpython-38.pyc,, +numpy/doc/constants.py,sha256=PlXoj7b4A8Aa9nADbg83uzTBRJaX8dvJmEdbn4FDPPo,9155 +numpy/doc/ufuncs.py,sha256=u9bbNmVClSJvykPk1Jwd9ZYeD9M7CaKdQyoWt3qCfQw,5360 +numpy/dual.py,sha256=aWZFc7rFVWLoWs9M4-nhf_S131RvsLRUdZrMIag_AzU,2214 +numpy/f2py/__init__.py,sha256=q5q91jWxOvK4JfnskvWeI7aoo9NKhS8uqkICHtU-ch4,5794 +numpy/f2py/__init__.pyi,sha256=dH7s0H52QyZ-4YdSGUc90VqrH4YmHZT98AIWI79-xkE,1092 +numpy/f2py/__main__.py,sha256=6i2jVH2fPriV1aocTY_dUFvWK18qa-zjpnISA-OpF3w,130 +numpy/f2py/__pycache__/__init__.cpython-38.pyc,, +numpy/f2py/__pycache__/__main__.cpython-38.pyc,, +numpy/f2py/__pycache__/__version__.cpython-38.pyc,, +numpy/f2py/__pycache__/auxfuncs.cpython-38.pyc,, +numpy/f2py/__pycache__/capi_maps.cpython-38.pyc,, +numpy/f2py/__pycache__/cb_rules.cpython-38.pyc,, +numpy/f2py/__pycache__/cfuncs.cpython-38.pyc,, +numpy/f2py/__pycache__/common_rules.cpython-38.pyc,, +numpy/f2py/__pycache__/crackfortran.cpython-38.pyc,, +numpy/f2py/__pycache__/diagnose.cpython-38.pyc,, +numpy/f2py/__pycache__/f2py2e.cpython-38.pyc,, +numpy/f2py/__pycache__/f2py_testing.cpython-38.pyc,, +numpy/f2py/__pycache__/f90mod_rules.cpython-38.pyc,, +numpy/f2py/__pycache__/func2subr.cpython-38.pyc,, +numpy/f2py/__pycache__/rules.cpython-38.pyc,, +numpy/f2py/__pycache__/setup.cpython-38.pyc,, +numpy/f2py/__pycache__/symbolic.cpython-38.pyc,, +numpy/f2py/__pycache__/use_rules.cpython-38.pyc,, +numpy/f2py/__version__.py,sha256=7HHdjR82FCBmftwMRyrlhcEj-8mGQb6oCH-wlUPH4Nw,34 +numpy/f2py/auxfuncs.py,sha256=Arny8GrF89fqHngjxTPAKv9FoESZKC4eOJoF5JXm0Tk,21779 +numpy/f2py/capi_maps.py,sha256=r1I8t32K96DCG_cjFH2goIHUHWpS1Vx1Z4B4ijLMiSU,31309 +numpy/f2py/cb_rules.py,sha256=xXiiuGAmyJz-1KIOhkf5oGd4O2TUhqPPSL1L5Jexmw4,24854 +numpy/f2py/cfuncs.py,sha256=UKqFbp7UTu6MNIInHeH19DYdbjjQR3OcWjFixhYeJwY,49482 +numpy/f2py/common_rules.py,sha256=IWfRQfhlSCziDn-3hGoIpIHGQ7yC-EaC0WKx9YrXs6I,4925 +numpy/f2py/crackfortran.py,sha256=2bXlUx9gyZDMGbRbIsJU9OBc6SQeq7y2iO37CW9ncJU,129776 +numpy/f2py/diagnose.py,sha256=W59xcrGVf-1ed9FkQgCxgW0d9a2lENaxHUpYVAZ2w00,5230 +numpy/f2py/f2py2e.py,sha256=wezMdBVYWuhA9AmfHIFOobXj7qxtOLJO_0r5SQ_TOyw,24238 +numpy/f2py/f2py_testing.py,sha256=1K5kdU9EFi-K6yTU6wzxr7m5Prk3AOS3SJJzrE6MjDo,1457 +numpy/f2py/f90mod_rules.py,sha256=oLZtJ_T1QNCra3CACW8oULmQioYU4bOVchWtgQp1zL8,9811 +numpy/f2py/func2subr.py,sha256=7Gg17NbMA8TooCEv-4_XBnfkg6Jjt1TAYkFIl0y--e4,9355 +numpy/f2py/rules.py,sha256=bClwU50K0JoEQ6b7VpK0l_M_wauLwZE6lLhmoglZwVg,60367 +numpy/f2py/setup.py,sha256=9QD603_UEa1HAuJHtoNY2twKwDKoepk72ikyKQpwhjM,2335 +numpy/f2py/src/fortranobject.c,sha256=0UeS33f19zFtrZP4liHUTSqfh2pYW3ZtloLf-9epEjM,37535 +numpy/f2py/src/fortranobject.h,sha256=aF7ACqS_fdrqj4sWi2aYB_Nsf9mcVU6jPGy-G-0Z54M,4384 +numpy/f2py/symbolic.py,sha256=3ERySC3mYO8-NfAfTDmMOd0yfPuYs8kceqxTXKFbpZA,53004 +numpy/f2py/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/f2py/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_abstract_interface.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_array_from_pyobj.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_assumed_shape.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_block_docstring.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_callback.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_common.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_compile_function.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_crackfortran.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_kind.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_mixed.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_module_doc.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_parameter.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_quoted_character.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_regression.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_return_character.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_return_complex.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_return_integer.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_return_logical.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_return_real.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_semicolon_split.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_size.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_string.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/test_symbolic.cpython-38.pyc,, +numpy/f2py/tests/__pycache__/util.cpython-38.pyc,, +numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c,sha256=C9xBs5eJsBKsQnzfREeVLu1Zil0ag62-uNe_uqhe5Ug,7296 +numpy/f2py/tests/src/assumed_shape/.f2py_f2cmap,sha256=But9r9m4iL7EGq_haMW8IiQ4VivH0TgUozxX4pPvdpE,29 +numpy/f2py/tests/src/assumed_shape/foo_free.f90,sha256=oBwbGSlbr9MkFyhVO2aldjc01dr9GHrMrSiRQek8U64,460 +numpy/f2py/tests/src/assumed_shape/foo_mod.f90,sha256=rfzw3QdI-eaDSl-hslCgGpd5tHftJOVhXvb21Y9Gf6M,499 +numpy/f2py/tests/src/assumed_shape/foo_use.f90,sha256=rmT9k4jP9Ru1PLcGqepw9Jc6P9XNXM0axY7o4hi9lUw,269 +numpy/f2py/tests/src/assumed_shape/precision.f90,sha256=r08JeTVmTTExA-hYZ6HzaxVwBn1GMbPAuuwBhBDtJUk,130 +numpy/f2py/tests/src/common/block.f,sha256=GQ0Pd-VMX3H3a-__f2SuosSdwNXHpBqoGnQDjf8aG9g,224 +numpy/f2py/tests/src/kind/foo.f90,sha256=zIHpw1KdkWbTzbXb73hPbCg4N2Htj3XL8DIwM7seXpo,347 +numpy/f2py/tests/src/mixed/foo.f,sha256=90zmbSHloY1XQYcPb8B5d9bv9mCZx8Z8AMTtgDwJDz8,85 +numpy/f2py/tests/src/mixed/foo_fixed.f90,sha256=pxKuPzxF3Kn5khyFq9ayCsQiolxB3SaNtcWaK5j6Rv4,179 +numpy/f2py/tests/src/mixed/foo_free.f90,sha256=fIQ71wrBc00JUAVUj_r3QF9SdeNniBiMw6Ly7CGgPWU,139 +numpy/f2py/tests/src/module_data/mod.mod,sha256=EkjrU7NTZrOH68yKrz6C_eyJMSFSxGgC2yMQT9Zscek,412 +numpy/f2py/tests/src/module_data/module_data_docstring.f90,sha256=tDZ3fUlazLL8ThJm3VwNGJ75QIlLcW70NnMFv-JA4W0,224 +numpy/f2py/tests/src/parameter/constant_both.f90,sha256=-bBf2eqHb-uFxgo6Q7iAtVUUQzrGFqzhHDNaxwSICfQ,1939 +numpy/f2py/tests/src/parameter/constant_compound.f90,sha256=re7pfzcuaquiOia53UT7qNNrTYu2euGKOF4IhoLmT6g,469 +numpy/f2py/tests/src/parameter/constant_integer.f90,sha256=nEmMLitKoSAG7gBBEQLWumogN-KS3DBZOAZJWcSDnFw,612 +numpy/f2py/tests/src/parameter/constant_non_compound.f90,sha256=IcxESVLKJUZ1k9uYKoSb8Hfm9-O_4rVnlkiUU2diy8Q,609 +numpy/f2py/tests/src/parameter/constant_real.f90,sha256=quNbDsM1Ts2rN4WtPO67S9Xi_8l2cXabWRO00CPQSSQ,610 +numpy/f2py/tests/src/regression/inout.f90,sha256=CpHpgMrf0bqA1W3Ozo3vInDz0RP904S7LkpdAH6ODck,277 +numpy/f2py/tests/src/size/foo.f90,sha256=IlFAQazwBRr3zyT7v36-tV0-fXtB1d7WFp6S1JVMstg,815 +numpy/f2py/tests/src/string/char.f90,sha256=ihr_BH9lY7eXcQpHHDQhFoKcbu7VMOX5QP2Tlr7xlaM,618 +numpy/f2py/tests/test_abstract_interface.py,sha256=NUW9mZn8z6iYjkIqk5Cgjv8bz3Z8K47_nUooelRIE2I,1817 +numpy/f2py/tests/test_array_from_pyobj.py,sha256=vm2yEyLXk9_8bIkkKAUGBzkM_mDvTo1js5fE9ZqChDA,22810 +numpy/f2py/tests/test_assumed_shape.py,sha256=rANgW9P3tpEUdw67qeCOWRK9PzwjWu2MyFViSCqW-kI,1562 +numpy/f2py/tests/test_block_docstring.py,sha256=dBurBjOp4IXoSJIbCD0klgEv3zz_DpfZE8xz-WW2skQ,627 +numpy/f2py/tests/test_callback.py,sha256=62DzB-uBOztIgZmhWEU9CTtIzs7EZLiW9vo6LK12IEA,8181 +numpy/f2py/tests/test_common.py,sha256=kxqAUUE0wfDfp8BuKeUcG207uFRsQBLlWiJIhcxQVog,802 +numpy/f2py/tests/test_compile_function.py,sha256=sNqLV4-OCMMFSeDhrbt_cQyEGGaqZqsrgVo0CThW_hM,4309 +numpy/f2py/tests/test_crackfortran.py,sha256=tfWJlWOfpFRkSizXVCCpk0SCiN3CkTCmKJHqREyIgW8,9559 +numpy/f2py/tests/test_kind.py,sha256=OfC4q2V_e8vTPSf_zekzl29_MPmx_Hfv9R8rKQvvLYA,1012 +numpy/f2py/tests/test_mixed.py,sha256=xj399eAqSDD6KuArLjJz4NJgpPkZnJfk0OL3CAEDOlY,911 +numpy/f2py/tests/test_module_doc.py,sha256=sVeHZFZ-R88a1Oh33Uzi_12Wp68in41THWeN6nuiIsw,950 +numpy/f2py/tests/test_parameter.py,sha256=mbIxGbiHdBL_YfW7ODD4TKZgF2gSg_GmT64wl-Y4-yI,3910 +numpy/f2py/tests/test_quoted_character.py,sha256=vNTSGaKM5GcQCLPDxbsUzynnSWetMrYJdJseA3sl6iM,927 +numpy/f2py/tests/test_regression.py,sha256=1yxgvpfhXoex6SYp78b4EkFSqnh_pOmPTiMPFjgidL8,1810 +numpy/f2py/tests/test_return_character.py,sha256=pYC08Th80SvQzMs6sI526mAePfdUmZjZUPq_Ory6v6M,3907 +numpy/f2py/tests/test_return_complex.py,sha256=-7tB3Ifc-fjxI7cbxLOYI9NEqGFXtXzDbMS74aCuAbE,4615 +numpy/f2py/tests/test_return_integer.py,sha256=0h_QAxMr55NNv-_my6hnBOvMOX9gpQYmiEIayWLebuQ,4576 +numpy/f2py/tests/test_return_logical.py,sha256=iyllQcXmD54akl1bUrCLP7CP1Vz7o9bMC6eIV_5zoEk,4843 +numpy/f2py/tests/test_return_real.py,sha256=EXyuK1v0cRp1azCrozlCJ-8y3iDUC8j8xo8Eik7bgWE,5402 +numpy/f2py/tests/test_semicolon_split.py,sha256=ZG3CzQGXrODUd9wFeKL4pSKJtWm5KwyEujaHWsTugeI,1514 +numpy/f2py/tests/test_size.py,sha256=kcOYJtLtYA11GK37JCGJcAz1Raz55hBLsSzR0yVXHFU,1286 +numpy/f2py/tests/test_string.py,sha256=3ihJ6MuTDA4J40DLXN4sWgn9n3rwFAVMB7TOcyDb5LY,4483 +numpy/f2py/tests/test_symbolic.py,sha256=7xvXTfdtraPGXHjYOwZ8s4nyQRuTU-A2yheT553TKcQ,18025 +numpy/f2py/tests/util.py,sha256=hhfPRrb-gHcSGrOFKg2piYMi4lrefLRz7r4CD97Ampg,9477 +numpy/f2py/use_rules.py,sha256=5t6X17rF6y42SwuUYe1LtNihJJEIgCU7f9jPqKphfgA,3587 +numpy/fft/__init__.py,sha256=HqjmF6s_dh0Ri4UZzUDtOKbNUyfAfJAWew3e3EL_KUk,8175 +numpy/fft/__init__.pyi,sha256=fnAMT5YCpXpIVe9uprRHWVK8wcaNZBqTTlaewQ8NiJ4,580 +numpy/fft/__pycache__/__init__.cpython-38.pyc,, +numpy/fft/__pycache__/_pocketfft.cpython-38.pyc,, +numpy/fft/__pycache__/helper.cpython-38.pyc,, +numpy/fft/__pycache__/setup.cpython-38.pyc,, +numpy/fft/_pocketfft.py,sha256=Xkm8wcP4JyBNMbp0ZoHIWhNDlgliX24RzrDuo29uRks,52897 +numpy/fft/_pocketfft.pyi,sha256=avze-KjXiaJp-wCwuYuSCO1QaSkmTw9ab_pSgUIIIEo,2366 +numpy/fft/_pocketfft_internal.cpython-38-x86_64-linux-gnu.so,sha256=EW5WrP0muIQb-wb1bRpLBHt9ZwUbqMAhXyBdscpQl1o,97032 +numpy/fft/helper.py,sha256=aNj1AcLvtfoX26RiLOwcR-k2QSMuBZkGj2Fu0CeFPJs,6154 +numpy/fft/helper.pyi,sha256=AxFre5L8q3SFQ1Fixrt_zFc7A0HFLMPiEKoikNiFLvk,1260 +numpy/fft/setup.py,sha256=OJPeJK4PuEtWRw_yVTZj4dKxfu2y-w3ZtQ6EUaQjQyk,728 +numpy/fft/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/fft/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/fft/tests/__pycache__/test_helper.cpython-38.pyc,, +numpy/fft/tests/__pycache__/test_pocketfft.cpython-38.pyc,, +numpy/fft/tests/test_helper.py,sha256=whgeaQ8PzFf3B1wkbXobGZ5sF4WxPp4gf1UPUVZest8,6148 +numpy/fft/tests/test_pocketfft.py,sha256=lG4NLMbHid5gAXe4HxpayUK08T5wNJnBOwbhW4vY5Lw,12827 +numpy/lib/__init__.py,sha256=FbFTdqaeZW-euF0vebpg1jE3MYEjB8tS9eoYIRtnUHg,1779 +numpy/lib/__init__.pyi,sha256=JEoPUgBcf5p5VfV_0jC6BTa-9J_EYZqeLawja_ayNvE,5589 +numpy/lib/__pycache__/__init__.cpython-38.pyc,, +numpy/lib/__pycache__/_datasource.cpython-38.pyc,, +numpy/lib/__pycache__/_iotools.cpython-38.pyc,, +numpy/lib/__pycache__/_version.cpython-38.pyc,, +numpy/lib/__pycache__/arraypad.cpython-38.pyc,, +numpy/lib/__pycache__/arraysetops.cpython-38.pyc,, +numpy/lib/__pycache__/arrayterator.cpython-38.pyc,, +numpy/lib/__pycache__/format.cpython-38.pyc,, +numpy/lib/__pycache__/function_base.cpython-38.pyc,, +numpy/lib/__pycache__/histograms.cpython-38.pyc,, +numpy/lib/__pycache__/index_tricks.cpython-38.pyc,, +numpy/lib/__pycache__/mixins.cpython-38.pyc,, +numpy/lib/__pycache__/nanfunctions.cpython-38.pyc,, +numpy/lib/__pycache__/npyio.cpython-38.pyc,, +numpy/lib/__pycache__/polynomial.cpython-38.pyc,, +numpy/lib/__pycache__/recfunctions.cpython-38.pyc,, +numpy/lib/__pycache__/scimath.cpython-38.pyc,, +numpy/lib/__pycache__/setup.cpython-38.pyc,, +numpy/lib/__pycache__/shape_base.cpython-38.pyc,, +numpy/lib/__pycache__/stride_tricks.cpython-38.pyc,, +numpy/lib/__pycache__/twodim_base.cpython-38.pyc,, +numpy/lib/__pycache__/type_check.cpython-38.pyc,, +numpy/lib/__pycache__/ufunclike.cpython-38.pyc,, +numpy/lib/__pycache__/user_array.cpython-38.pyc,, +numpy/lib/__pycache__/utils.cpython-38.pyc,, +numpy/lib/_datasource.py,sha256=D9sEaxgd2FJyj-dWwroaJPAeW399KIY2cZnXPwQdkTI,22634 +numpy/lib/_iotools.py,sha256=OL6MmiWabT1_g30YhGjYrNFuBoNSyDCHqiDgXuu_qFQ,30873 +numpy/lib/_version.py,sha256=6vK7czNSB_KrWx2rZJzJ1pyOc73Q07hAgfLB5ItUCnU,4855 +numpy/lib/_version.pyi,sha256=oHxx3k74f6WB1WJpMcUXc9mp3PYZRaKUHJzc1Xop-z8,701 +numpy/lib/arraypad.py,sha256=PoLrImHYFt_V17_r0lDfDlYViZHOPbLEiqtomFAdlxU,31226 +numpy/lib/arraypad.pyi,sha256=1r-iwWqPWiccncdkRVorxQKaE3PNjVhnMnKZRUCpFY8,1870 +numpy/lib/arraysetops.py,sha256=vptiGIIKUBfX0WplpnBEeBwIVKJ3X2GwOiYOjOTAAlM,26476 +numpy/lib/arraysetops.pyi,sha256=Wya5MvLzz4rn-hl0J-7FT7zVEQEKsvt2pUCesqKhhIA,7931 +numpy/lib/arrayterator.py,sha256=BQ97S00zvfURUZfes0GZo-5hydYNRuvwX1I1bLzeRik,7063 +numpy/lib/arrayterator.pyi,sha256=iC6YJXZKFxrwB9Bx5MJGeatNz9YNaU-O5h5x6xcR7PU,1544 +numpy/lib/format.py,sha256=fZN382Oe0eT_3nz85mdU5QTLCqfVsNTJxHqz2V3yskA,31432 +numpy/lib/format.pyi,sha256=ZMn7LYcxMMW7e93cKJaNjxwhB1lDdy5MCGnMljtEXhc,759 +numpy/lib/function_base.py,sha256=q1458clxom5gQ9su0Bf2FUM64oft0Dwzm3I778-LTRA,181564 +numpy/lib/function_base.pyi,sha256=O5YygtOqifsrSmmiFK6MBQmey38JM1Ac83C0EO5-T54,16530 +numpy/lib/histograms.py,sha256=SiXrfkdGqOyjRoLVIebs5i-1HbsiI29XtjS9OFto0EY,40215 +numpy/lib/histograms.pyi,sha256=wlMCbWAWC-6btF3d65fGVh3c2P2fDWSY9XozVaVhN8g,1047 +numpy/lib/index_tricks.py,sha256=OnILAuE50th6IzW3_2VeUeuH-V2WXIATXjy3MXClkqw,30580 +numpy/lib/index_tricks.pyi,sha256=HH5n1XZahUWjJbjHpF1skIcVaO6Iw15ffq3jlLlyY2k,4261 +numpy/lib/mixins.py,sha256=awJWn-wXkA6fg3TaPvMtXzUWaJVXKAn8fOt3XLPJ690,7052 +numpy/lib/mixins.pyi,sha256=Gu-HDVTxQFnVJ2wm03w9aJ09mAlBo-NZJF9jQZYejsw,2153 +numpy/lib/nanfunctions.py,sha256=JiTcA_4XAv_5SxQTTHsuNFUbMSMcYR37Uscd8SFCNLI,65675 +numpy/lib/nanfunctions.pyi,sha256=wI-0gxsmXQk3Zu03Pqx1_Z_IZWa5nqVgOPURUGjVCM4,631 +numpy/lib/npyio.py,sha256=bluvqCZ87p7SeDXdZNYgXTXxiXAMnuE12TTj4Gkh18w,89888 +numpy/lib/npyio.pyi,sha256=pDl8DSMcJ_gZ3KxT1cj3iHMMWlrKeXoQ01rooxZ-pBQ,7202 +numpy/lib/polynomial.py,sha256=7h-w0C704f9jB05jEy_7PMpGbkbvedf083Ns4LA5Ql0,44142 +numpy/lib/polynomial.pyi,sha256=rsfzwpj0vZ_Qc0Fv7WyaCYI6ikQ4ReP8QQtNrHYAYkA,6978 +numpy/lib/recfunctions.py,sha256=WK8xuqXIrHPX2EQ2YrbMHTkOmpRzNm0W8GftxNRHtkQ,56537 +numpy/lib/scimath.py,sha256=IPCxZlEazdloTDYYv64uv7Fo2-HPf7atq7CIyAR7nwU,14786 +numpy/lib/scimath.pyi,sha256=CP4ztgUrmM9TE5Uh98EnlumtnCwCgaRfG1prIYp1eGE,212 +numpy/lib/setup.py,sha256=0K5NJKuvKvNEWp-EX7j0ODi3ZQQgIMHobzSFJq3G7yM,405 +numpy/lib/shape_base.py,sha256=kOvw_bH3Z1OqNpm1OPNVgZhae-HBmwKz4pdYgYdEXlQ,38371 +numpy/lib/shape_base.pyi,sha256=gEb6c9XdFUzcn3QcEN1Kpgjeft3-6alZMCbLu--4zkY,5195 +numpy/lib/stride_tricks.py,sha256=rltd6YH2E-eNBC7y8zTA-PRE99ZdPYzrQH7_r8LhwaQ,17910 +numpy/lib/stride_tricks.pyi,sha256=e-mcoJ5dQXRwH6_kvuj_5PaiqqqmLYzVxzXIh-BJ8Cg,1834 +numpy/lib/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/lib/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test__datasource.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test__iotools.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test__version.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_arraypad.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_arraysetops.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_arrayterator.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_financial_expired.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_format.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_function_base.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_histograms.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_index_tricks.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_io.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_mixins.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_nanfunctions.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_packbits.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_polynomial.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_recfunctions.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_regression.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_shape_base.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_stride_tricks.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_twodim_base.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_type_check.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_ufunclike.cpython-38.pyc,, +numpy/lib/tests/__pycache__/test_utils.cpython-38.pyc,, +numpy/lib/tests/data/py2-objarr.npy,sha256=F4cyUC-_TB9QSFLAo2c7c44rC6NUYIgrfGx9PqWPSKk,258 +numpy/lib/tests/data/py2-objarr.npz,sha256=xo13HBT0FbFZ2qvZz0LWGDb3SuQASSaXh7rKfVcJjx4,366 +numpy/lib/tests/data/py3-objarr.npy,sha256=pTTVh8ezp-lwAK3fkgvdKU8Arp5NMKznVD-M6Ex_uA0,341 +numpy/lib/tests/data/py3-objarr.npz,sha256=qQR0gS57e9ta16d_vCQjaaKM74gPdlwCPkp55P-qrdw,449 +numpy/lib/tests/data/python3.npy,sha256=X0ad3hAaLGXig9LtSHAo-BgOvLlFfPYMnZuVIxRmj-0,96 +numpy/lib/tests/data/win64python2.npy,sha256=agOcgHVYFJrV-nrRJDbGnUnF4ZTPYXuSeF-Mtg7GMpc,96 +numpy/lib/tests/test__datasource.py,sha256=HLhDpqI36qIWXdLmY9pPXpZSZdRccMKZ-h2U5jl62gE,10487 +numpy/lib/tests/test__iotools.py,sha256=HerCqvDE07JxjFQlWEfpZO7lC9z0Sbr3z20GSutoCPs,13743 +numpy/lib/tests/test__version.py,sha256=aO3YgkAohLsLzCNQ7vjIwdpFUMz0cPLbcuuxIkjuN74,1999 +numpy/lib/tests/test_arraypad.py,sha256=2Kucf5lR10QmZXt7s4YdWvMXLQDfZi9JI21TNYyp4a0,54283 +numpy/lib/tests/test_arraysetops.py,sha256=i6KnuCrobU4re9K2Pe4ikRVuHlWmwoLcDjBYjWwBOv0,28429 +numpy/lib/tests/test_arrayterator.py,sha256=AYs2SwV5ankgwnvKI9RSO1jZck118nu3SyZ4ngzZNso,1291 +numpy/lib/tests/test_financial_expired.py,sha256=llvcsFc-Ixm8esrmx45l463nWbp_PXogWSwKHfPTniA,358 +numpy/lib/tests/test_format.py,sha256=4_KYDAldWl3BsdmWtpEXovA-Z8VDVyScEWS5meCkHFs,38207 +numpy/lib/tests/test_function_base.py,sha256=2iSN7Jyw3j88-whiV3Q1Ue7PCkPl6k5jiBr56NZjKGU,144353 +numpy/lib/tests/test_histograms.py,sha256=HFCgnxK_UsiHyq1gSZPNYvDorMou2LDXDXndpe9589o,33672 +numpy/lib/tests/test_index_tricks.py,sha256=hufhkDwdubN-6wk0vghkE5vE9TC7Kc_gmw50b-r7sgU,18955 +numpy/lib/tests/test_io.py,sha256=yNMJVSmO4_iTzDVAvCiws3kiL18WryCrPGHc-6ddP4I,103325 +numpy/lib/tests/test_mixins.py,sha256=Wivwz3XBWsEozGzrzsyyvL3qAuE14t1BHk2LPm9Z9Zc,7030 +numpy/lib/tests/test_nanfunctions.py,sha256=dHUvkPoKr9pEqqpMQ-W4mtYPaUqNdyBEl7GRBZYUSzg,44591 +numpy/lib/tests/test_packbits.py,sha256=OWGAd5g5GG0gl7WHqNfwkZ7G-2rrtLt2sI854PG4nnw,17546 +numpy/lib/tests/test_polynomial.py,sha256=URouxJpr8FQ5hiKybqhtOcLA7e-3hj4kWzjLBROByyA,11395 +numpy/lib/tests/test_recfunctions.py,sha256=qQjRHtICZwOP4wv5R9iJq1i4GpB3LkOxyYJiz0rZU34,41155 +numpy/lib/tests/test_regression.py,sha256=KzGFkhTcvEG97mymoOQ2hP2CEr2nPZou0Ztf4-WaXCs,8257 +numpy/lib/tests/test_shape_base.py,sha256=7Yt9gRluZMcl9yWm29KJ-mWllWEgARhh9ouGOKx21Mk,24455 +numpy/lib/tests/test_stride_tricks.py,sha256=wprpWWH5eq07DY7rzG0WDv5fMtLxzRQz6fm6TZWlScQ,22849 +numpy/lib/tests/test_twodim_base.py,sha256=1A5rI38bQZgQnibY6wnmSHn06pbU3Cz4MfZSR5WE_ss,18678 +numpy/lib/tests/test_type_check.py,sha256=akjNP3V7IGIdvoA73cxrx6XdaNRTaUaKdAR-XPYm9tw,15119 +numpy/lib/tests/test_ufunclike.py,sha256=8umwt73iT_zIbk20MxQSoK5LWD6Bvv9gBUkaPXiRNEw,3278 +numpy/lib/tests/test_utils.py,sha256=XJH5jKw9VvX0iYeIZKwmAWSXKB_kxMU2xGDLet3i3Kc,4560 +numpy/lib/twodim_base.py,sha256=qXWLrVd7gXSXrlH4Whf-lbWrZP5vFF3xe7NcM0C3-gQ,31339 +numpy/lib/twodim_base.pyi,sha256=LnzRiNwOGgTEEwe41jmAP706jhN49xfZnyFFWFJIwPk,5608 +numpy/lib/type_check.py,sha256=WM3QgzS7dgSOMFbx0R8RyXdWW8-ajF93Z-5e-Pv-UxM,20778 +numpy/lib/type_check.pyi,sha256=C81O5zyhznHcmgkVUPgSojWUk5pg7l59j68lWgQfLMY,5710 +numpy/lib/ufunclike.py,sha256=8WVNpHtt5kOkPYDhvlmHql-Swg5zV0f09bJwnVJNpm0,8031 +numpy/lib/ufunclike.pyi,sha256=FnLkam3fm_OvMpNPIzB6eLvx-TpUTl32itoTjVFOG7Y,1311 +numpy/lib/user_array.py,sha256=LE958--CMkBI2r3l1SQxmCHdCSw6HY6-RhWCnduzGA4,7721 +numpy/lib/utils.py,sha256=lRv7ogb14LMGX3RTvWvXKo4tEHYwA4ROpOlG99B0XAM,33150 +numpy/lib/utils.pyi,sha256=NMIrLyitXTNXTpWGx66wb4ekoqHOk1LDPsauR0nPcYo,2426 +numpy/linalg/__init__.py,sha256=mpdlEXWtTvpF7In776ONLwp6RIyo4U_GLPT1L1eIJnw,1813 +numpy/linalg/__init__.pyi,sha256=Qy5ZhwirDfNd4zyZl7Jj1KCGmMv1MRr5FNA34z-T5FA,650 +numpy/linalg/__pycache__/__init__.cpython-38.pyc,, +numpy/linalg/__pycache__/linalg.cpython-38.pyc,, +numpy/linalg/__pycache__/setup.cpython-38.pyc,, +numpy/linalg/_umath_linalg.cpython-38-x86_64-linux-gnu.so,sha256=1DMWXMCsMhCvlSCzbRHy7ARkkYE0f327w4QoTyoaDmc,237984 +numpy/linalg/lapack_lite.cpython-38-x86_64-linux-gnu.so,sha256=I_Djtrdh5F-M307pmkKB1e2OIIF0ZO5j6JGQ0uYMoZc,29992 +numpy/linalg/linalg.py,sha256=fF1_psCTouLOznRdOjZi-2U34EYen9sbwRXiCWRJTSY,89523 +numpy/linalg/linalg.pyi,sha256=nymkydaV630i174Kzc50RDiEyN71_6j_Gj0fxUxMSLg,7437 +numpy/linalg/setup.py,sha256=1MeTJH_z66LCHsTEqLxWvVKirxk2ngmRpWzt6sDceAg,2821 +numpy/linalg/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/linalg/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/linalg/tests/__pycache__/test_deprecations.cpython-38.pyc,, +numpy/linalg/tests/__pycache__/test_linalg.cpython-38.pyc,, +numpy/linalg/tests/__pycache__/test_regression.cpython-38.pyc,, +numpy/linalg/tests/test_deprecations.py,sha256=9p_SRmtxj2zc1doY9Ie3dyy5JzWy-tCQWFoajcAJUmM,640 +numpy/linalg/tests/test_linalg.py,sha256=Y-jmQyii8UvjB1_e7fCIFd-niZSjqHBUuO4RBYIJjAQ,76816 +numpy/linalg/tests/test_regression.py,sha256=wfJx2G8SeApiIta80xN2hzot-wvXSw-TF8TXXIYZwyE,5597 +numpy/ma/__init__.py,sha256=dgP0WdnOpph28Fd6UiqoyDKhfrct0H6QWqbCcETsk6M,1404 +numpy/ma/__init__.pyi,sha256=wM_EEAmnsY2iVfiRaGo3yb5TES0OepP767E4J-2BCiI,6083 +numpy/ma/__pycache__/__init__.cpython-38.pyc,, +numpy/ma/__pycache__/bench.cpython-38.pyc,, +numpy/ma/__pycache__/core.cpython-38.pyc,, +numpy/ma/__pycache__/extras.cpython-38.pyc,, +numpy/ma/__pycache__/mrecords.cpython-38.pyc,, +numpy/ma/__pycache__/setup.cpython-38.pyc,, +numpy/ma/__pycache__/testutils.cpython-38.pyc,, +numpy/ma/__pycache__/timer_comparison.cpython-38.pyc,, +numpy/ma/bench.py,sha256=tSsmn5f59rOx6e7UGU3kyAedPhJG2sp-8LZHeZPla6g,4859 +numpy/ma/core.py,sha256=BT_6ZvKasqFE3eNjNuiLUoFE7meF6aNSiQK9CZqUze4,267253 +numpy/ma/core.pyi,sha256=j2zTrB0JlsvpPGPcvlGPzlizhQqWugzGbm72qqU2a5U,14160 +numpy/ma/extras.py,sha256=gGIV7ZVQOtX9aUoq07r8oIqcnG4faJrVS70s541UjYs,58239 +numpy/ma/extras.pyi,sha256=6K9yvcqpsh39bjhYqqd8KILTC-ycLk6sRMSbVC6a-lw,2598 +numpy/ma/mrecords.py,sha256=degd6dLaDEvEWNHmvSnUZXos1csIzaqjR_jAutm8JfI,27232 +numpy/ma/mrecords.pyi,sha256=jAlPIqI8jbDfC2KkDB1F_5zZmN6gfloa7lV8XWDxcrI,1941 +numpy/ma/setup.py,sha256=MqmMicr_xHkAGoG-T7NJ4YdUZIJLO4ZFp6AmEJDlyhw,418 +numpy/ma/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/ma/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/ma/tests/__pycache__/test_core.cpython-38.pyc,, +numpy/ma/tests/__pycache__/test_deprecations.cpython-38.pyc,, +numpy/ma/tests/__pycache__/test_extras.cpython-38.pyc,, +numpy/ma/tests/__pycache__/test_mrecords.cpython-38.pyc,, +numpy/ma/tests/__pycache__/test_old_ma.cpython-38.pyc,, +numpy/ma/tests/__pycache__/test_regression.cpython-38.pyc,, +numpy/ma/tests/__pycache__/test_subclassing.cpython-38.pyc,, +numpy/ma/tests/test_core.py,sha256=uF62WE5XhAkDr4yDZfy-YdfSIWSznARYnQ9cx_ef4CU,205483 +numpy/ma/tests/test_deprecations.py,sha256=ylBZx-5tGNimMk4xqUl0R2YUL5DX297XzSdL46U4wRk,2777 +numpy/ma/tests/test_extras.py,sha256=xfSzwBS3Wc2D25JgCdcKJZ91BsaXj0KQmFx5m-yTrls,67795 +numpy/ma/tests/test_mrecords.py,sha256=3IbTkbbDJhHfbkf-oWgpATghrwnIKA3XMtVFtnx257w,19883 +numpy/ma/tests/test_old_ma.py,sha256=wkZM5NnvkJCDropQT5dYbL14qweNv6e1aLptQzV4xbY,32758 +numpy/ma/tests/test_regression.py,sha256=Gr5p91SxeKK3jA3Kl7OGKOLdg40nSSHaMSjeVqIuvMM,3079 +numpy/ma/tests/test_subclassing.py,sha256=Q8biShKsBpzJpYW0Wpc-OL5vsgtZHnwNAsr9FuGB5n4,14286 +numpy/ma/testutils.py,sha256=KOD9yEe6rUTZ_nMDcZyXphBP1vpfV1vzvbmIhVaJrsg,10239 +numpy/ma/timer_comparison.py,sha256=pIGSZG-qYYYlRWSTgzPlyCAINbGKhXrZrDZBBjiM080,15658 +numpy/matlib.py,sha256=0tYjeI3dLL0-zpuavuiXkdnewfnhQ_3Pxsz-CI1hl98,10365 +numpy/matrixlib/__init__.py,sha256=L4GDL_3Z8Tf-s8v5hgFbnCzCMNSzvnZydENoSZBkWI4,218 +numpy/matrixlib/__init__.pyi,sha256=Mpe2A1pTTrU7HGYap8KpFZiHa4JGa4R2gDcBX7GNYfk,277 +numpy/matrixlib/__pycache__/__init__.cpython-38.pyc,, +numpy/matrixlib/__pycache__/defmatrix.cpython-38.pyc,, +numpy/matrixlib/__pycache__/setup.cpython-38.pyc,, +numpy/matrixlib/defmatrix.py,sha256=K9ecSTjVAETfgpCLPEALKn5CvT-QynDDFQvMn_3gmv0,30667 +numpy/matrixlib/defmatrix.pyi,sha256=pE8gqsXZnnmfxJj_XML_wSXPrn94pDMRodRqcR60UrU,429 +numpy/matrixlib/setup.py,sha256=1r7JRkSM4HyVorgtjoKJGWLcOcPO3wmvivpeEsVtAEg,426 +numpy/matrixlib/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/matrixlib/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/matrixlib/tests/__pycache__/test_defmatrix.cpython-38.pyc,, +numpy/matrixlib/tests/__pycache__/test_interaction.cpython-38.pyc,, +numpy/matrixlib/tests/__pycache__/test_masked_matrix.cpython-38.pyc,, +numpy/matrixlib/tests/__pycache__/test_matrix_linalg.cpython-38.pyc,, +numpy/matrixlib/tests/__pycache__/test_multiarray.cpython-38.pyc,, +numpy/matrixlib/tests/__pycache__/test_numeric.cpython-38.pyc,, +numpy/matrixlib/tests/__pycache__/test_regression.cpython-38.pyc,, +numpy/matrixlib/tests/test_defmatrix.py,sha256=8E_-y7VD2vsq1y8CcI8km37pp5qcAtkciO16xqf2UIs,14982 +numpy/matrixlib/tests/test_interaction.py,sha256=PpjmgjEKighDXvt38labKE6L7f2jP74UEmp3JRb_iOY,11875 +numpy/matrixlib/tests/test_masked_matrix.py,sha256=RyL5DfLJoNUe-ZgQndMvpP8Jp_XKlreyhWjR1sfU-9A,8925 +numpy/matrixlib/tests/test_matrix_linalg.py,sha256=ObbSUXU4R2pWajH__xAdizADrU2kBKDDCxkDV-oVBXc,2059 +numpy/matrixlib/tests/test_multiarray.py,sha256=jB3XCBmAtcqf-Wb9PwBW6uIykPpMPthuXLJ0giTKzZE,554 +numpy/matrixlib/tests/test_numeric.py,sha256=MP70qUwgshTtThKZaZDp7_6U-Z66NIV1geVhasGXejQ,441 +numpy/matrixlib/tests/test_regression.py,sha256=8sHDtO8Zi8p3a1eQKEWxtCmKrXmHoD3qxlIokg2AIAU,927 +numpy/polynomial/__init__.py,sha256=8jrPKniYrbrE59nmLBbH2WLxNLeMkZ-vQuLa3Hu9Apg,6788 +numpy/polynomial/__init__.pyi,sha256=N9juq6-CIoPQYj0yvkoZUgQI3K-CMBi74-7hunOMbFY,726 +numpy/polynomial/__pycache__/__init__.cpython-38.pyc,, +numpy/polynomial/__pycache__/_polybase.cpython-38.pyc,, +numpy/polynomial/__pycache__/chebyshev.cpython-38.pyc,, +numpy/polynomial/__pycache__/hermite.cpython-38.pyc,, +numpy/polynomial/__pycache__/hermite_e.cpython-38.pyc,, +numpy/polynomial/__pycache__/laguerre.cpython-38.pyc,, +numpy/polynomial/__pycache__/legendre.cpython-38.pyc,, +numpy/polynomial/__pycache__/polynomial.cpython-38.pyc,, +numpy/polynomial/__pycache__/polyutils.cpython-38.pyc,, +numpy/polynomial/__pycache__/setup.cpython-38.pyc,, +numpy/polynomial/_polybase.py,sha256=n_5hGWdm0M-848FOc4Nm5ZXITrxgRLEoccfJxqbZT1U,36485 +numpy/polynomial/_polybase.pyi,sha256=igkkGznfNyQ3o_9CblNxp4sp10BOuqUxKONs0_D3qqg,2253 +numpy/polynomial/chebyshev.py,sha256=W3tx0zn1TCV9yjW7bmsKwUnV4C5m65vdvxvIr6UdE94,62495 +numpy/polynomial/chebyshev.pyi,sha256=ItHTUfbs2flT1ohIuLMX_ay8_ztheS4zgSZuslIGe_0,1393 +numpy/polynomial/hermite.py,sha256=NRk-PbDO69erGxov5-Kogx510TApYVN8s6tldN2EoSc,52243 +numpy/polynomial/hermite.pyi,sha256=uBbITtQXp1eSrlLVfx2UvZwMa3q37stME-VfI4rEDo0,1223 +numpy/polynomial/hermite_e.py,sha256=9gSngNFbr3U39lUEU8lxwfLMxi5K5E7c64b2Sjz8rLY,52366 +numpy/polynomial/hermite_e.pyi,sha256=VRuGIdPEZd_o22SKaqbTm8PBQdEig4JTL8WttofWJ5o,1244 +numpy/polynomial/laguerre.py,sha256=yjSKPCoWMu9s8OQ-QitiA3ATBS_STXCP5LZNkusLrSw,50573 +numpy/polynomial/laguerre.pyi,sha256=VB3H5VijGgXRw8CkC7QCP_fxfYHWg8mRc5sqHNMvdfU,1184 +numpy/polynomial/legendre.py,sha256=8rf4TmjHMHXgqsEsVmLMTbbp9qjSxfW_U_rdCQvuAv8,51270 +numpy/polynomial/legendre.pyi,sha256=fU8Awg2jiynXlykrQfDYuu8lfmjvw3wDquOdvyV1hTA,1184 +numpy/polynomial/polynomial.py,sha256=fcTMcHPjGxWGMMj7yRja-kfSL5kHTSbnOU9ZEaIuGjA,48689 +numpy/polynomial/polynomial.pyi,sha256=6_sIcbIXKCAfwenfdYjuLFr80mOWbIofd6GfAa3hPgQ,1138 +numpy/polynomial/polyutils.py,sha256=xyHL_X5yxum8z1wCN3HA5J48WMgpxm0zmQ5bqg_iq80,22105 +numpy/polynomial/polyutils.pyi,sha256=vLdXONDuSzbj9GmtB5X4fpbAVW1PwQ84PVKa0Z7498s,252 +numpy/polynomial/setup.py,sha256=dXQfzVUMP9OcB6iKv5yo1GLEwFB3gJ48phIgo4N-eM0,373 +numpy/polynomial/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/polynomial/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/polynomial/tests/__pycache__/test_chebyshev.cpython-38.pyc,, +numpy/polynomial/tests/__pycache__/test_classes.cpython-38.pyc,, +numpy/polynomial/tests/__pycache__/test_hermite.cpython-38.pyc,, +numpy/polynomial/tests/__pycache__/test_hermite_e.cpython-38.pyc,, +numpy/polynomial/tests/__pycache__/test_laguerre.cpython-38.pyc,, +numpy/polynomial/tests/__pycache__/test_legendre.cpython-38.pyc,, +numpy/polynomial/tests/__pycache__/test_polynomial.cpython-38.pyc,, +numpy/polynomial/tests/__pycache__/test_polyutils.cpython-38.pyc,, +numpy/polynomial/tests/__pycache__/test_printing.cpython-38.pyc,, +numpy/polynomial/tests/test_chebyshev.py,sha256=6tMsFP1h7K8Zf72mNOta6Tv52_fVTlXknseuffj080c,20522 +numpy/polynomial/tests/test_classes.py,sha256=DFyY2IQBj3r2GZkvbRIeZO2EEY466xbuwc4PShAl4Sw,18331 +numpy/polynomial/tests/test_hermite.py,sha256=N9b2dx2UWPyja5v02dSoWYPnKvb6H-Ozgtrx-xjWz2k,18577 +numpy/polynomial/tests/test_hermite_e.py,sha256=_A3ohAWS4HXrQG06S8L47dImdZGTwYosCXnoyw7L45o,18911 +numpy/polynomial/tests/test_laguerre.py,sha256=BZOgs49VBXOFBepHopxuEDkIROHEvFBfWe4X73UZhn8,17511 +numpy/polynomial/tests/test_legendre.py,sha256=b_bblHs0F_BWw9ESuSq52ZsLKcQKFR5eqPf_SppWFqo,18673 +numpy/polynomial/tests/test_polynomial.py,sha256=OjoRocjhPfEd1tOW6J5LwUan3SdWTLwzHOzLU9umcI8,20238 +numpy/polynomial/tests/test_polyutils.py,sha256=IxkbVfpcBqe5lOZluHFUPbLATLu1rwVg7ghLASpfYrY,3579 +numpy/polynomial/tests/test_printing.py,sha256=Txe4Ac_P5w07avL_2Wr7niF7U8yPBqSLJkgMpGa3Va0,15786 +numpy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/random/__init__.pxd,sha256=9JbnX540aJNSothGs-7e23ozhilG6U8tINOUEp08M_k,431 +numpy/random/__init__.py,sha256=81Thnexg5umN5WZwD5TRyzNc2Yp-d14B6UC7NBgVKh8,7506 +numpy/random/__init__.pyi,sha256=5y-OD2GzF2d0dEdmhHk_PkXrXWAz8sL1D3pxzkYORPE,2080 +numpy/random/__pycache__/__init__.cpython-38.pyc,, +numpy/random/__pycache__/_pickle.cpython-38.pyc,, +numpy/random/__pycache__/setup.cpython-38.pyc,, +numpy/random/_bounded_integers.cpython-38-x86_64-linux-gnu.so,sha256=1O2ibLyU12UlLc3z8L2IAcKLjRLiFCkfTOLuspu-6y4,441960 +numpy/random/_bounded_integers.pxd,sha256=hcoucPH5hkFEM2nm12zYO-5O_Rt8RujEXT5YWuAzl1Q,1669 +numpy/random/_common.cpython-38-x86_64-linux-gnu.so,sha256=Oj_vVes-0MKVJBfUONxlQUEdWrZQG327VyMN45L2_uA,311320 +numpy/random/_common.pxd,sha256=zHhnwWHPXwShPQoqgJtZ948HzOWnGAObx7OmtbPgLV0,4747 +numpy/random/_examples/cffi/__pycache__/extending.cpython-38.pyc,, +numpy/random/_examples/cffi/__pycache__/parse.cpython-38.pyc,, +numpy/random/_examples/cffi/extending.py,sha256=xSla3zWqxi6Hj48EvnYfD3WHfE189VvC4XsKu4_T_Iw,880 +numpy/random/_examples/cffi/parse.py,sha256=o41aw7pB_nA8RfLDUlaP0DNDO_bDo__B0XM5pGLxtY0,1829 +numpy/random/_examples/cython/__pycache__/setup.cpython-38.pyc,, +numpy/random/_examples/cython/extending.pyx,sha256=iUZ9tceTZ2nynXtkzAe0UOnZfWN8FCZcawT2lKwZkGk,2293 +numpy/random/_examples/cython/extending_distributions.pyx,sha256=oazFVWeemfE0eDzax7r7MMHNL1_Yofws2m-c_KT2Hbo,3870 +numpy/random/_examples/cython/setup.py,sha256=mLckw_C9kipWCVT4Eq5nkpZEUB0E3Hfom8rbjl7q1dE,1448 +numpy/random/_examples/numba/__pycache__/extending.cpython-38.pyc,, +numpy/random/_examples/numba/__pycache__/extending_distributions.cpython-38.pyc,, +numpy/random/_examples/numba/extending.py,sha256=Ipyzel_h5iU_DMJ_vnXUgQC38uMDMn7adUpWSeEQLFE,1957 +numpy/random/_examples/numba/extending_distributions.py,sha256=Jnr9aWkHyIWygNbdae32GVURK-5T9BTGhuExRpvve98,2034 +numpy/random/_generator.cpython-38-x86_64-linux-gnu.so,sha256=IrJfrGDcf-IB61KzXN67JeoCTHx36v7tNoub0qAoodU,1014648 +numpy/random/_generator.pyi,sha256=R7GhWySKx2yOsiYlki0w9AhNEWrDN2pXsd5RMawZ8Aw,22119 +numpy/random/_mt19937.cpython-38-x86_64-linux-gnu.so,sha256=8IeVQe1tOZCk_gDbCf-kj12GGxuR-yr4ihdpaBa_Z1U,124512 +numpy/random/_mt19937.pyi,sha256=JlAv5yYV3ZP6kKgTt5R2AJ5KoI1TzMxr1r6422cwRC0,735 +numpy/random/_pcg64.cpython-38-x86_64-linux-gnu.so,sha256=bpGV9ostYR4UeF8CwMc_UQIpTgH3vM2_96j-i8Oa7ic,126928 +numpy/random/_pcg64.pyi,sha256=iqyMXiGoCSDWF9wb61hLdH_QbjDZ0sVtYWvchTghBAA,1107 +numpy/random/_philox.cpython-38-x86_64-linux-gnu.so,sha256=aZVP-Q-EyWgZL43COa_TS21GH-y5psz6BZ9-tXJoccI,106048 +numpy/random/_philox.pyi,sha256=pVFGOaw9VvUfWvePE1FN1c0KQXxe3HA01xE-l8l045s,1001 +numpy/random/_pickle.py,sha256=-edKbAJgOUXo4hZvmRSgFwHK6H2-AD6wdfFHUOzqrkg,2302 +numpy/random/_sfc64.cpython-38-x86_64-linux-gnu.so,sha256=y-tn-SXbwd5BWn4X9szuHNbH--iAzLBUTQA8KudHuPQ,72112 +numpy/random/_sfc64.pyi,sha256=AFftCHCBBKmQloff26-XhR5H8w1Wkoh2eT78NfUeL2U,720 +numpy/random/bit_generator.cpython-38-x86_64-linux-gnu.so,sha256=VwsuocI9SwWjm9UO3sGYGTJBBDuC6r9jw-A0WMzFT9M,227440 +numpy/random/bit_generator.pxd,sha256=lArpIXSgTwVnJMYc4XX0NGxegXq3h_QsUDK6qeZKbNc,1007 +numpy/random/bit_generator.pyi,sha256=J0wNjBJNBvNnCPsp8dAv57LvG7XCO1kiU0PgcZQomn0,3484 +numpy/random/c_distributions.pxd,sha256=FrGMrxWXGD8tc4HUWnzLpzUU8codNR-alXk1hjIAjmI,6033 +numpy/random/lib/libnpyrandom.a,sha256=cS3ie0I-EglUV0A89gCgAd79943D_mYKseb0nhcEa7o,274042 +numpy/random/mtrand.cpython-38-x86_64-linux-gnu.so,sha256=qBA4RPQQ9cdOMKZwIh91TnRXoTxXDEfOemLL01eESMY,823616 +numpy/random/mtrand.pyi,sha256=HArHeqCi9gTFue0VTvLIc7BDx7B9zgHtzBZ05ZEk-k8,20004 +numpy/random/setup.py,sha256=hk_fSx_snEpEENqYWIvBUPcsJ4oC-GxsRGtbwhwBFyg,6654 +numpy/random/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/random/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/random/tests/__pycache__/test_direct.cpython-38.pyc,, +numpy/random/tests/__pycache__/test_extending.cpython-38.pyc,, +numpy/random/tests/__pycache__/test_generator_mt19937.cpython-38.pyc,, +numpy/random/tests/__pycache__/test_generator_mt19937_regressions.cpython-38.pyc,, +numpy/random/tests/__pycache__/test_random.cpython-38.pyc,, +numpy/random/tests/__pycache__/test_randomstate.cpython-38.pyc,, +numpy/random/tests/__pycache__/test_randomstate_regression.cpython-38.pyc,, +numpy/random/tests/__pycache__/test_regression.cpython-38.pyc,, +numpy/random/tests/__pycache__/test_seed_sequence.cpython-38.pyc,, +numpy/random/tests/__pycache__/test_smoke.cpython-38.pyc,, +numpy/random/tests/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/random/tests/data/__pycache__/__init__.cpython-38.pyc,, +numpy/random/tests/data/mt19937-testset-1.csv,sha256=Xkef402AVB-eZgYQkVtoxERHkxffCA9Jyt_oMbtJGwY,15844 +numpy/random/tests/data/mt19937-testset-2.csv,sha256=nsBEQNnff-aFjHYK4thjvUK4xSXDSfv5aTbcE59pOkE,15825 +numpy/random/tests/data/pcg64-testset-1.csv,sha256=xB00DpknGUTTCxDr9L6aNo9Hs-sfzEMbUSS4t11TTfE,23839 +numpy/random/tests/data/pcg64-testset-2.csv,sha256=NTdzTKvG2U7_WyU_IoQUtMzU3kEvDH39CgnR6VzhTkw,23845 +numpy/random/tests/data/pcg64dxsm-testset-1.csv,sha256=vNSUT-gXS_oEw_awR3O30ziVO4seNPUv1UIZ01SfVnI,23833 +numpy/random/tests/data/pcg64dxsm-testset-2.csv,sha256=uylS8PU2AIKZ185OC04RBr_OePweGRtvn-dE4YN0yYA,23839 +numpy/random/tests/data/philox-testset-1.csv,sha256=SedRaIy5zFadmk71nKrGxCFZ6BwKz8g1A9-OZp3IkkY,23852 +numpy/random/tests/data/philox-testset-2.csv,sha256=dWECt-sbfvaSiK8-Ygp5AqyjoN5i26VEOrXqg01rk3g,23838 +numpy/random/tests/data/sfc64-testset-1.csv,sha256=iHs6iX6KR8bxGwKk-3tedAdMPz6ZW8slDSUECkAqC8Q,23840 +numpy/random/tests/data/sfc64-testset-2.csv,sha256=FIDIDFCaPZfWUSxsJMAe58hPNmMrU27kCd9FhCEYt_k,23833 +numpy/random/tests/test_direct.py,sha256=ROlEv_OqQQ2-vtypmqNtg-wzLXNuNe8YIQaMgBQfhLs,16429 +numpy/random/tests/test_extending.py,sha256=I9PCw6sWQg7FKsasX6FJjdDxk3AG4YlmBmzUKAJB7IQ,3493 +numpy/random/tests/test_generator_mt19937.py,sha256=J5-XEfsiU-i7fT8_oA5df2K0uUTBYVGqh1K0hRNknYo,112500 +numpy/random/tests/test_generator_mt19937_regressions.py,sha256=SAdHm3KlGjdh0IKyOF8TQd1YS-P59nSkIwKOe9SuT0U,5639 +numpy/random/tests/test_random.py,sha256=ZdL0iL99rP7U47IsXNkWM8LRWSHzkhgOWVmY3QjxzKE,69760 +numpy/random/tests/test_randomstate.py,sha256=vHRYMUDrR192QhQyVN8g0g5Wbv5igzP56d9TZ3WkNhM,81516 +numpy/random/tests/test_randomstate_regression.py,sha256=VucYWIjA7sAquWsalvZMnfkmYLM1O6ysyWnLl931-lA,7917 +numpy/random/tests/test_regression.py,sha256=trntK51UvajOVELiluEO85l64CKSw5nvBSc5SqYyr9w,5439 +numpy/random/tests/test_seed_sequence.py,sha256=GNRJ4jyzrtfolOND3gUWamnbvK6-b_p1bBK_RIG0sfU,3311 +numpy/random/tests/test_smoke.py,sha256=jjNz0aEGD1_oQl9a9UWt6Mz_298alG7KryLT1pgHljw,28183 +numpy/setup.py,sha256=IEOgR0gw6rSbPd2E4gt8H6v7fiZKsh3JQbXx6VJTYmY,1022 +numpy/testing/__init__.py,sha256=G8Qlx2bRgbnTCVFGoo8kR_OlsebpcTaJHeYUnHY4sZ8,650 +numpy/testing/__init__.pyi,sha256=pqyHh-qTMMDoFq5Oe6-z9yNGUs98NFklnHo2HXRZhMo,1828 +numpy/testing/__pycache__/__init__.cpython-38.pyc,, +numpy/testing/__pycache__/print_coercion_tables.cpython-38.pyc,, +numpy/testing/__pycache__/setup.cpython-38.pyc,, +numpy/testing/__pycache__/utils.cpython-38.pyc,, +numpy/testing/_private/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/testing/_private/__pycache__/__init__.cpython-38.pyc,, +numpy/testing/_private/__pycache__/decorators.cpython-38.pyc,, +numpy/testing/_private/__pycache__/extbuild.cpython-38.pyc,, +numpy/testing/_private/__pycache__/noseclasses.cpython-38.pyc,, +numpy/testing/_private/__pycache__/nosetester.cpython-38.pyc,, +numpy/testing/_private/__pycache__/parameterized.cpython-38.pyc,, +numpy/testing/_private/__pycache__/utils.cpython-38.pyc,, +numpy/testing/_private/decorators.py,sha256=amFUfIH86_F8qvD-jqky59GYVtqMLs7uSxZ6euHblh8,11401 +numpy/testing/_private/extbuild.py,sha256=k7-HViYQcUnzM9LTGfotDwjqsrfjcidWL6Zmkg9_J8E,7816 +numpy/testing/_private/noseclasses.py,sha256=0wuRHsQVkz1c5bX1F0v2C4QEJWdhCuAdVOwo8uOefP8,14516 +numpy/testing/_private/nosetester.py,sha256=wKjN3dagwDInzGdeN6wO9JnLe6IoTxGqF_idSL0qpCQ,19435 +numpy/testing/_private/parameterized.py,sha256=37gdWTVRN6Jd7na44NeHvj5erwjX67-V_14uFGPgoIM,16161 +numpy/testing/_private/utils.py,sha256=iVJHGbs5Z74Lm7bpHuc6Ob1QQRBzYhodfTxfMjIgP0Q,85282 +numpy/testing/_private/utils.pyi,sha256=fcSuU7xDIFV3j9LzsSn7oY24mC1fihPk0mbNbhcX0Aw,9906 +numpy/testing/print_coercion_tables.py,sha256=rZHlR2szrjbTq5zdBBBlbxYS9H0Gr2nyo5vNIK2aceM,6168 +numpy/testing/setup.py,sha256=GPKAtTTBRsNW4kmR7NjP6mmBR_GTdpaTvkTm10_VcLg,709 +numpy/testing/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/testing/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/testing/tests/__pycache__/test_doctesting.cpython-38.pyc,, +numpy/testing/tests/__pycache__/test_utils.cpython-38.pyc,, +numpy/testing/tests/test_doctesting.py,sha256=84GCZsWBQ3gqKrRI5NzmH_PmFHMShaVpZ4m0b_T1qNA,1347 +numpy/testing/tests/test_utils.py,sha256=Ja4DWzSE3FWOZaLqEDNV_aJqpBkQagZHNilrCsm7Qlw,55645 +numpy/testing/utils.py,sha256=B77PkK2qIw8R8tKk09OpWGVQV2O_P77fu4wt5QGOXSM,1255 +numpy/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/tests/__pycache__/test__all__.cpython-38.pyc,, +numpy/tests/__pycache__/test_ctypeslib.cpython-38.pyc,, +numpy/tests/__pycache__/test_matlib.cpython-38.pyc,, +numpy/tests/__pycache__/test_numpy_version.cpython-38.pyc,, +numpy/tests/__pycache__/test_public_api.cpython-38.pyc,, +numpy/tests/__pycache__/test_reloading.cpython-38.pyc,, +numpy/tests/__pycache__/test_scripts.cpython-38.pyc,, +numpy/tests/__pycache__/test_warnings.cpython-38.pyc,, +numpy/tests/test__all__.py,sha256=L3mCnYPTpzAgNfedVuq9g7xPWbc0c1Pot94k9jZ9NpI,221 +numpy/tests/test_ctypeslib.py,sha256=uAckGtgfFsaRGhD6ySPD5YR77US98TIUwR8DVS1bQvU,12290 +numpy/tests/test_matlib.py,sha256=gwhIXrJJo9DiecaGLCHLJBjhx2nVGl6yHq80AOUQSRM,1852 +numpy/tests/test_numpy_version.py,sha256=87imE8sJR6w16YYiTAfHxBSq7IFLnmgNpL6DUgqjKTs,1575 +numpy/tests/test_public_api.py,sha256=PjhqnF3iOpDyPt6IVIzUFAX-iD7Tc9kwSTP-tlJjeKQ,15888 +numpy/tests/test_reloading.py,sha256=bjyPiRx3AMPWcu-ExgbrlgmX4DxRAswwU_ci_Rl3hBk,2244 +numpy/tests/test_scripts.py,sha256=mmuhhijxr9PmiEph72ahLNnsoJJ_VNnt9r54-QE67ts,1573 +numpy/tests/test_warnings.py,sha256=b7x4zdms9sNJkO9FJ-LTzYI4BWhbeLGy2oFMC6Z85ig,2280 +numpy/typing/__init__.py,sha256=3s4hv0rgJxG3Za8w-N6J6snFTZwxacYPSdd8ydDZka8,10654 +numpy/typing/__pycache__/__init__.cpython-38.pyc,, +numpy/typing/__pycache__/_add_docstring.cpython-38.pyc,, +numpy/typing/__pycache__/_array_like.cpython-38.pyc,, +numpy/typing/__pycache__/_char_codes.cpython-38.pyc,, +numpy/typing/__pycache__/_dtype_like.cpython-38.pyc,, +numpy/typing/__pycache__/_extended_precision.cpython-38.pyc,, +numpy/typing/__pycache__/_generic_alias.cpython-38.pyc,, +numpy/typing/__pycache__/_nbit.cpython-38.pyc,, +numpy/typing/__pycache__/_nested_sequence.cpython-38.pyc,, +numpy/typing/__pycache__/_scalars.cpython-38.pyc,, +numpy/typing/__pycache__/_shape.cpython-38.pyc,, +numpy/typing/__pycache__/mypy_plugin.cpython-38.pyc,, +numpy/typing/__pycache__/setup.cpython-38.pyc,, +numpy/typing/_add_docstring.py,sha256=fviZAxCBB5w1OY5PnnP9Hj2Da7krEs8ixpj8yk_0aLM,3925 +numpy/typing/_array_like.py,sha256=nlfUYdypi55khrX0ekMeybmLoog3Q8TSz0Sz0ao-TyA,3147 +numpy/typing/_callable.pyi,sha256=8IKPxvl0CO1GHJO7Ro261okWq6gczNq5L0N93m-fNCo,10780 +numpy/typing/_char_codes.py,sha256=S8Fuv864HsteXqpuMVxy5ORGxGrz8M6W4pLqg36WwpI,5907 +numpy/typing/_dtype_like.py,sha256=PL-z1vCtV8OOwLeC9OPEKQ3PAZUGgPwDi7x0Yxci8rs,5368 +numpy/typing/_extended_precision.py,sha256=PfQZUM8xlS7lFXPVQbJJX1quiMKobeJJP4S9TvtyTrE,1111 +numpy/typing/_generic_alias.py,sha256=E0RqpE8ROI49WsWbWpStJRxkVa-B9IGzmcPVAyGOaFE,6408 +numpy/typing/_nbit.py,sha256=-EQOShHpB3r30b4RVEcruQRTcTaFAZwtqCJ4BsvpEzA,345 +numpy/typing/_nested_sequence.py,sha256=mL05EO9uFC4ir4YlODBmidgeRSJ2dQkxJ-5EiplmVrI,2654 +numpy/typing/_scalars.py,sha256=CRoiNAYZqELubR2hbplA5qom-k4kUpWLNX4GgX8Hli0,957 +numpy/typing/_shape.py,sha256=tsMDZ41GBVM9PiGa3GpGV2gEWxP57c7HqmPMWKrkCQo,191 +numpy/typing/_ufunc.pyi,sha256=mCAMV6eaEzhLoDsIdkAMsa8VP7PfcXoAPVesbI4lORA,11327 +numpy/typing/mypy_plugin.py,sha256=layNtKrAtwIvSQttpcryjv933s-JjJ04ZNqxdc73CQU,6475 +numpy/typing/setup.py,sha256=OZt9ln8rNHMROBi6CDnkIMo2X2tySHdPsk5--OmmJ78,409 +numpy/typing/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +numpy/typing/tests/__pycache__/__init__.cpython-38.pyc,, +numpy/typing/tests/__pycache__/test_generic_alias.cpython-38.pyc,, +numpy/typing/tests/__pycache__/test_isfile.cpython-38.pyc,, +numpy/typing/tests/__pycache__/test_runtime.cpython-38.pyc,, +numpy/typing/tests/__pycache__/test_typing.cpython-38.pyc,, +numpy/typing/tests/data/fail/arithmetic.pyi,sha256=TiIx0lHV5_jcp1qf2wUMr1UBw3r7WBI7thX-cSYLt1I,3856 +numpy/typing/tests/data/fail/array_constructors.pyi,sha256=isyzhELM9K490TCOFsLoKa2OIMTMNPZBje5DD4C0i7M,1018 +numpy/typing/tests/data/fail/array_like.pyi,sha256=YIxl8igSMHGLQlhBFjZTNVXmymbDnDovY91-cRBhsK4,454 +numpy/typing/tests/data/fail/array_pad.pyi,sha256=57oK0Yp53rtKjjIrRFYLcxa-IfIGhtI-bEem7ggJKwI,132 +numpy/typing/tests/data/fail/arrayprint.pyi,sha256=Pbx1_bHtuohCQ6oisGnLI9DehnMn5FQCFKVBxSiolw8,522 +numpy/typing/tests/data/fail/arrayterator.pyi,sha256=FoU4ahHkJZ67dwWXer5FXLjjjesKKg-w2Jq1X1bHymA,480 +numpy/typing/tests/data/fail/bitwise_ops.pyi,sha256=GN9dVqk4_HFXn7zbRrHzJq_UGRFBccoYVUG1UuE7bXs,515 +numpy/typing/tests/data/fail/char.pyi,sha256=-vgN6EmfQ8VaA4SOZ5Ol9u4-Z7Q5I7G78LmaxZOuZ90,2615 +numpy/typing/tests/data/fail/chararray.pyi,sha256=jrNryZFpr8nxG2IHb9e0x3ranpvJpBy_RDex-WpT5rU,2296 +numpy/typing/tests/data/fail/comparisons.pyi,sha256=U4neWzwwtxG6QXsKlNGJuKXHBtwzYBQOa47_7SKF5Wg,888 +numpy/typing/tests/data/fail/constants.pyi,sha256=YSqNbXdhbdMmYbs7ntH0FCKbnm8IFeqsDlZBqcU43iw,286 +numpy/typing/tests/data/fail/datasource.pyi,sha256=PRT2hixR-mVxr2UILvHa99Dr54EF2h3snJXE-v3rWcc,395 +numpy/typing/tests/data/fail/dtype.pyi,sha256=OAGABqdXNB8gClJFEGMckoycuZcIasMaAlS2RkiKROI,334 +numpy/typing/tests/data/fail/einsumfunc.pyi,sha256=XJrNgP5gcdotaCN17J2fofwAlKWvgVkis9LHWQeKOeA,743 +numpy/typing/tests/data/fail/false_positives.pyi,sha256=Q61qMsSsNCtmO0EMRxHj5Z7RYTyrELVpkzfJY5eK8Z0,366 +numpy/typing/tests/data/fail/flatiter.pyi,sha256=cgQGVetcYlurJial06dZPbKF_bBvzDnM5zticSJFhcQ,842 +numpy/typing/tests/data/fail/fromnumeric.pyi,sha256=sjpHFTQvuzDDOS_1ibs8swer1mFjPVN7KFxGdV0xNZs,5992 +numpy/typing/tests/data/fail/histograms.pyi,sha256=8Hx8zJ113UtHPc_yi-BM3xUqtmeUS3r3IFYvci9quQA,424 +numpy/typing/tests/data/fail/index_tricks.pyi,sha256=TqbVTZLqpWiWjVPr0S1jCp25uhlEW-CPOdwA3zIoo_o,533 +numpy/typing/tests/data/fail/lib_function_base.pyi,sha256=6y9T773CBLX-jUry1sCQGVuKVKM2wMuQ56Ni5V5j4Dw,2081 +numpy/typing/tests/data/fail/lib_polynomial.pyi,sha256=siSXSVM0FqwwdHnYrYksOrnMrgdK_zdZ9xGVQ67iMNw,913 +numpy/typing/tests/data/fail/lib_utils.pyi,sha256=VFpE6_DisvlDByyp1PiNPJEe5IcZp8cH0FlAJyoZipo,276 +numpy/typing/tests/data/fail/lib_version.pyi,sha256=7-ZJDZwDcB-wzpMN8TeYtZAgaqc7xnQ8Dnx2ISiX2Ts,158 +numpy/typing/tests/data/fail/linalg.pyi,sha256=yDd05aK1dI37RPt3pD2eJYo4dZFaT2yB1PEu3K0y9Tg,1322 +numpy/typing/tests/data/fail/memmap.pyi,sha256=HSTCQYNuW1Y6X1Woj361pN4rusSPs4oDCXywqk20yUo,159 +numpy/typing/tests/data/fail/modules.pyi,sha256=biKdya7jl9ptCNorql1AFHoOtXU9bdTFodlIUQtbwUY,652 +numpy/typing/tests/data/fail/multiarray.pyi,sha256=aNXwsCGNnAhVAJ8rKyQG1BenlT43konfurZVbRail8s,1715 +numpy/typing/tests/data/fail/ndarray.pyi,sha256=YnjXy16RHs_esKelMjB07865CQ7gLyQnXhnitq5Kv5c,405 +numpy/typing/tests/data/fail/ndarray_misc.pyi,sha256=ykOy904ORIxV-wzhaIFBFcHiWSAVys7Imr2LTdK1q1s,1312 +numpy/typing/tests/data/fail/nditer.pyi,sha256=w7emjnOxnf3NcvLktNLlke6Cuivn2gU3sVmGCfbG6rw,325 +numpy/typing/tests/data/fail/nested_sequence.pyi,sha256=_SW-W07MCMDhQD3U_aDHZB0NP7QA0Ty0BSlfusidKJ8,420 +numpy/typing/tests/data/fail/npyio.pyi,sha256=nUAt8mHO_ZzKsdo_G9mjJfaBCdrba_Q3KiyJwzMiHys,780 +numpy/typing/tests/data/fail/numerictypes.pyi,sha256=S_AIFg4DsJ1sX3EcCAh7ZlsAkpCsycZ_tU92Sj1DWyY,341 +numpy/typing/tests/data/fail/random.pyi,sha256=SfNNG3K9SQ61HM_vWlr6xy8951xnr3HwXbaPkK8w1S4,2836 +numpy/typing/tests/data/fail/rec.pyi,sha256=Ws3TyesnoQjt7Q0wwtpShRDJmZCs2jjP17buFMomVGA,704 +numpy/typing/tests/data/fail/scalars.pyi,sha256=StglCm8lfzTZbp0Va_abLFXE48Pa63e58PLwWT6rX04,2955 +numpy/typing/tests/data/fail/shape_base.pyi,sha256=Y_f4buHtX2Q2ZA4kaDTyR8LErlPXTzCB_-jBoScGh_Q,152 +numpy/typing/tests/data/fail/stride_tricks.pyi,sha256=IjA0Xrnx0lG3m07d1Hjbhtyo1Te5cXgjgr5fLUo4LYQ,315 +numpy/typing/tests/data/fail/testing.pyi,sha256=ypCotDyrCQdF4HUNke7AqSV3uX_KpUGYQJ1WzkJfJCg,1222 +numpy/typing/tests/data/fail/twodim_base.pyi,sha256=wTk8ctb--MjAyTaAZyqAuwn63kV5HOcrc7HkNwqifWk,905 +numpy/typing/tests/data/fail/type_check.pyi,sha256=CIyI0j0Buxv0QgCvNG2urjaKpoIZ-ZNawC2m6NzGlbo,379 +numpy/typing/tests/data/fail/ufunc_config.pyi,sha256=ukA0xwfJHLoGfoOIpWIN-91wj-DG8oaIjYbO72ymjg4,733 +numpy/typing/tests/data/fail/ufunclike.pyi,sha256=ZJyZN2aQqQyyDc2VA4xIPzuhg2cZuNPNGPlE9igxZkw,685 +numpy/typing/tests/data/fail/ufuncs.pyi,sha256=YaDTL7QLmGSUxE6JVMzpOlZTjHWrgbOo0UIlkX-6ZQk,1347 +numpy/typing/tests/data/fail/warnings_and_errors.pyi,sha256=PrbYDFI7IGN3Gf0OPBkVfefzQs4AXHwDQ495pvrX3RY,174 +numpy/typing/tests/data/misc/extended_precision.pyi,sha256=XJE_Qjaou-yzhww8K_3ypc4WTPudW0b3g0-T82aB8QE,347 +numpy/typing/tests/data/mypy.ini,sha256=JIakONhNdxet8PX6ETuCY2YSkYx-vvUoVNbB40xZ0zE,140 +numpy/typing/tests/data/pass/__pycache__/arithmetic.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/array_constructors.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/array_like.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/arrayprint.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/arrayterator.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/bitwise_ops.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/comparisons.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/dtype.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/einsumfunc.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/flatiter.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/fromnumeric.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/index_tricks.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/lib_utils.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/lib_version.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/literal.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/mod.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/modules.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/multiarray.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/ndarray_conversion.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/ndarray_misc.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/ndarray_shape_manipulation.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/numeric.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/numerictypes.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/random.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/scalars.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/simple.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/simple_py3.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/ufunc_config.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/ufunclike.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/ufuncs.cpython-38.pyc,, +numpy/typing/tests/data/pass/__pycache__/warnings_and_errors.cpython-38.pyc,, +numpy/typing/tests/data/pass/arithmetic.py,sha256=e-SkpBu7w-x6wfRwwWWwq1fMW-MCoadE-IN4MspDeuE,7299 +numpy/typing/tests/data/pass/array_constructors.py,sha256=cXfYnFPoe2tu1UfGXOZF2IbfuX_4THGOppskOcjDnP8,2458 +numpy/typing/tests/data/pass/array_like.py,sha256=6yTU4Lts7vfTqg0iDmEGNZNW2j7HKdqCAQL4uo6n76w,887 +numpy/typing/tests/data/pass/arrayprint.py,sha256=y_KkuLz1uM7pv53qfq7GQOuud4LoXE3apK1wtARdVyM,766 +numpy/typing/tests/data/pass/arrayterator.py,sha256=FqcpKdUQBQ0FazHFxr9MsLEZG-jnJVGKWZX2owRr4DQ,393 +numpy/typing/tests/data/pass/bitwise_ops.py,sha256=UnmxVr9HwI8ifdrutGm_u3EZU4iOOPQhrOku7hTaH0c,970 +numpy/typing/tests/data/pass/comparisons.py,sha256=nTE-fvraLK6xTZcP4uPV02wOShzYKWDaoapx35AeDOY,2992 +numpy/typing/tests/data/pass/dtype.py,sha256=lI_5QlB7_20TyC1Urxr0No9MbrhpBHQxs-F_v7UyPmo,1073 +numpy/typing/tests/data/pass/einsumfunc.py,sha256=eXj5L5MWPtQHgrHPsJ36qqrmBHqct9UoujjJCvHnF1k,1370 +numpy/typing/tests/data/pass/flatiter.py,sha256=0BnbuLMBC7MQlprNZ0QhNSscfYwPhEhXOhWoyiRACWU,174 +numpy/typing/tests/data/pass/fromnumeric.py,sha256=Xd_nJVVDoONdztUX8ddgo7EXJ2FD8AX51MO_Yujnmog,3742 +numpy/typing/tests/data/pass/index_tricks.py,sha256=oaFD9vY01_RI5OkrXt-xTk1n_dd-SpuPp-eZ58XR3c8,1492 +numpy/typing/tests/data/pass/lib_utils.py,sha256=SEJRuh7J7JHHlOVctf-zQ9zRwVpCvZ_HvAXHvDrjxw0,420 +numpy/typing/tests/data/pass/lib_version.py,sha256=HnuGOx7tQA_bcxFIJ3dRoMAR0fockxg4lGqQ4g7LGIw,299 +numpy/typing/tests/data/pass/literal.py,sha256=10nLzgoY5VxQKO0VR8RC12fgRAzHIOXGuLaj2DWFZBM,1299 +numpy/typing/tests/data/pass/mod.py,sha256=HB9aK4_wGJbc44tomaoroNy0foIL5cI9KIjknvMTbkk,1578 +numpy/typing/tests/data/pass/modules.py,sha256=f-6R2TbqrLX7F8A3Z9VgSyPAlOwvOy1zZyvUgivD250,595 +numpy/typing/tests/data/pass/multiarray.py,sha256=MxHax6l94yqlTVZleAqG77ILEbW6wU5osPcHzxJ85ns,1331 +numpy/typing/tests/data/pass/ndarray_conversion.py,sha256=yPgzXG6paY1uF_z-QyHYrcmrZvhX7qtvTUh7ANLseCA,1626 +numpy/typing/tests/data/pass/ndarray_misc.py,sha256=Rtb4uJ3oDGo8ke3H0qbKLLIPZ2_3xsNgxLsfHYWVo3k,2716 +numpy/typing/tests/data/pass/ndarray_shape_manipulation.py,sha256=37eYwMNqMLwanIW9-63hrokacnSz2K_qtPUlkdpsTjo,640 +numpy/typing/tests/data/pass/numeric.py,sha256=IsjGnWztX531RewuVi_vAwoA6LD28tFn-0n6S71nAd4,1478 +numpy/typing/tests/data/pass/numerictypes.py,sha256=fdpGtUNDOxjc_ZdezBiiW6ohBYErrw7cC9v644kOHfI,973 +numpy/typing/tests/data/pass/random.py,sha256=brm8o9S86imBigbOMxThXMePZuIzQjrOIdcLWM9N2hc,61823 +numpy/typing/tests/data/pass/scalars.py,sha256=PttbwlHTYjBFXTNgwe4Zm9dJDLSjjEGbDMwvR9ByXSM,3507 +numpy/typing/tests/data/pass/simple.py,sha256=-IShzi7VPGPdZz7Wv_UXyYGhHnKigJ2ISXE6cINJ120,2689 +numpy/typing/tests/data/pass/simple_py3.py,sha256=HuLrc5aphThQkLjU2_19KgGFaXwKOfSzXe0p2xMm8ZI,96 +numpy/typing/tests/data/pass/ufunc_config.py,sha256=b0nWLyq0V2H4mKZXrNM0kT-GM-x8ig9tv3al50_YAWM,1120 +numpy/typing/tests/data/pass/ufunclike.py,sha256=Gve6cJ2AT3TAwOjUOQQDIUnqsRCGYq70_tv_sgODiiA,1039 +numpy/typing/tests/data/pass/ufuncs.py,sha256=xGuKuqPetUTS4io5YDHaki5nbYRu-wC29SGU32tzVIg,462 +numpy/typing/tests/data/pass/warnings_and_errors.py,sha256=Pcg-QWfY4PAhTKyehae8q6LhtbUABxa2Ye63-3h1f4w,150 +numpy/typing/tests/data/reveal/arithmetic.pyi,sha256=Z8cs7fQ3ERTPEv9xEEpGwufHdV-EK23QIrjv5ItuguA,20688 +numpy/typing/tests/data/reveal/array_constructors.pyi,sha256=sXAT4X94J90jdfTWsnlWKaSLf1235PKchoE7mQghrQY,10338 +numpy/typing/tests/data/reveal/arraypad.pyi,sha256=lLRHSbOTdsr8Do_fjn2V5kXOzHN-whMOP6aGunLKPFI,680 +numpy/typing/tests/data/reveal/arrayprint.pyi,sha256=sKa0cWK058LZNB5RfIeqxaxZzj6JtjvsMCdgCWKYjJc,659 +numpy/typing/tests/data/reveal/arraysetops.pyi,sha256=cKxJCQKVLOAXRDi0XGc-pFvHqzYfJW3w3O5JwD3bfW0,4671 +numpy/typing/tests/data/reveal/arrayterator.pyi,sha256=sh5Z34Jr3_qXXIHc-A_kvVrMkAHk2IXdOyZwukeobrM,1123 +numpy/typing/tests/data/reveal/bitwise_ops.pyi,sha256=t7nz7G8SNpGw_8Hdmpwl68pBH1fGHS6JVQAa5e8dd2s,3607 +numpy/typing/tests/data/reveal/char.pyi,sha256=DzEHH5D3YUBIfyPbDfIwvsMtcDuGgMZQozO3yH3WEKM,8038 +numpy/typing/tests/data/reveal/chararray.pyi,sha256=vMxKwfBn15GWjaaiaDLRc3RQHYo6Czb-usLtXJ74Apk,6195 +numpy/typing/tests/data/reveal/comparisons.pyi,sha256=MJ4UdmIzzgW-vBs2Vj0RCyqurgIEtdjAqcoal3VuX-w,7746 +numpy/typing/tests/data/reveal/constants.pyi,sha256=RFPG2UQfP6aRLtDLmL72iz2tqfkLC0GX0uIPUQPLfi8,1940 +numpy/typing/tests/data/reveal/ctypeslib.pyi,sha256=tgvlw5te1vcaf5WeBXBC_ja7i1nH54FD_ViOyrhHMEs,5106 +numpy/typing/tests/data/reveal/datasource.pyi,sha256=melhwwltIDET_oudcv-skR8jKrGeVF5UOZVqfSKjXBw,557 +numpy/typing/tests/data/reveal/dtype.pyi,sha256=4VRYtaQjLCZkTYuBhls5XK-eMAKygVyqmpjL3yi6Tvo,2693 +numpy/typing/tests/data/reveal/einsumfunc.pyi,sha256=O5YOQs9Otd5Udy5Xut2EIdEy6538PDuGXHZgBvHawwY,1893 +numpy/typing/tests/data/reveal/false_positives.pyi,sha256=oQx8mhWsl0V7Zg7d49ZmFGiUV9azVdGPbrXgk1Faj70,349 +numpy/typing/tests/data/reveal/fft.pyi,sha256=T5w3JYzXQhZdfYk7oGXr9fZf1T9kOqaHS2e0DSAOB4I,1852 +numpy/typing/tests/data/reveal/flatiter.pyi,sha256=doegm39nnY--1ZpsO61QmWWEZWEOFEfsmSXKVYxYEUc,825 +numpy/typing/tests/data/reveal/fromnumeric.pyi,sha256=4tuHyKooneSZUNcCsL8JvHhh-ziLSJbxoMXGU_-hSdA,9681 +numpy/typing/tests/data/reveal/getlimits.pyi,sha256=XCjHClh1U8G-KawZAif3B4vD60e83YTEie1cR9oFRKw,1547 +numpy/typing/tests/data/reveal/histograms.pyi,sha256=9xBXQvL5n670oencXNwqNveDPH8dXHKK9djEvnQ0d7Q,1391 +numpy/typing/tests/data/reveal/index_tricks.pyi,sha256=l-ZuWfWe_llVOLl4pUmBn5NWXGLve_jlh6jUj4_5q6U,3555 +numpy/typing/tests/data/reveal/lib_function_base.pyi,sha256=eYc65sc8sCQ7a-UOitDppSIqzThzMhR-tgTMM02DueY,9208 +numpy/typing/tests/data/reveal/lib_polynomial.pyi,sha256=r3WFzw-MOAyl2DeNIKrAPOTjBBjrTJHJYtE7chiAbyw,6353 +numpy/typing/tests/data/reveal/lib_utils.pyi,sha256=2SESkewf-uje-a009lcRKInfIjTPTSta4c6iujpX65w,923 +numpy/typing/tests/data/reveal/lib_version.pyi,sha256=SEo2pRemac1XnmhNDOKEEM0oHf8E0BTJR4rqJekI3fI,605 +numpy/typing/tests/data/reveal/linalg.pyi,sha256=QPfMGXSOSoIT-kGPV0PysdJxVr7Qfklr9zM7R8hy_N8,6389 +numpy/typing/tests/data/reveal/matrix.pyi,sha256=QiJFt5JxiM_KTmzSs3k9jUcjNssdMGBjD1RrbP4nEdM,3033 +numpy/typing/tests/data/reveal/memmap.pyi,sha256=R9HkOJQRbeOFxE4_XtPnYLX1EdoKnWkoBO0tx1ycdNc,690 +numpy/typing/tests/data/reveal/mod.pyi,sha256=a-d4dlG69pP_2YmQ-H63S7gfgAr_EzqzZlVLXrCAam8,5989 +numpy/typing/tests/data/reveal/modules.pyi,sha256=xarouCu6De5qF9dFqrGZafxZ85p7npV958o-uYIxG0E,1910 +numpy/typing/tests/data/reveal/multiarray.pyi,sha256=9kdUTQZP145jdcAyulsKPHpDeptX9v6fnC1HSRsC3YI,5621 +numpy/typing/tests/data/reveal/nbit_base_example.pyi,sha256=7kOuNXehsUek82F0knKkLNGINmReabCv6e_U_Tl1AM8,477 +numpy/typing/tests/data/reveal/ndarray_conversion.pyi,sha256=wzVZpYgNbyeHmMWZd_k0wwbiXRrOMy-_0YL-uVeiJCw,1913 +numpy/typing/tests/data/reveal/ndarray_misc.pyi,sha256=rCQKiD4bNKE8m4qqJah4tW9MWlEuwZHjX_Uls9T8C18,7468 +numpy/typing/tests/data/reveal/ndarray_shape_manipulation.pyi,sha256=Ywz-fsGOQBfzs62Z2hwLlStZJk1-XqU12EhF1XV-80M,904 +numpy/typing/tests/data/reveal/nditer.pyi,sha256=Lg28cl0LUT4rshMLORr0ZqUWjxLS_GhLcEPs3RjVIbI,2066 +numpy/typing/tests/data/reveal/nested_sequence.pyi,sha256=HWehuDY1Kwdp0EreFcZcHvpSjzidsOWEy8OD1xb8y4A,622 +numpy/typing/tests/data/reveal/npyio.pyi,sha256=SYq4HmyJFqdQ1LCjXKhYtd8l5aMjE8lAobH00W-Kogs,4467 +numpy/typing/tests/data/reveal/numeric.pyi,sha256=4uu8InwB-8do_PXyR2hS70yLjApYcfbeXzVpixlpFf8,6811 +numpy/typing/tests/data/reveal/numerictypes.pyi,sha256=ogr_PjsgnwrywJu7A-MeTvlmrfQOPRdqMM4QcoL6GDY,1786 +numpy/typing/tests/data/reveal/random.pyi,sha256=zDxOrZLVh5RoLY4irfAk3tjCpDDsMQEH01UjlWhNdUQ,129394 +numpy/typing/tests/data/reveal/rec.pyi,sha256=w0ps-m5C5LR71ToXRl78D7brcYEyLMmx06U_n85I4h8,3324 +numpy/typing/tests/data/reveal/scalars.pyi,sha256=4uXG4aWbbE7J8_caSkdzlyz9WBEAzJAqNgFnmaZ0QwE,5694 +numpy/typing/tests/data/reveal/shape_base.pyi,sha256=HI717719it-Rbhdfcu0UCqL1lv-nJT9rqCuc-5LBTrQ,2637 +numpy/typing/tests/data/reveal/stride_tricks.pyi,sha256=B65XWe66LxfbzKIrN8QjDBTo6GWoHNwoiX5Kh6eKaaw,1565 +numpy/typing/tests/data/reveal/testing.pyi,sha256=4c5QPgfak8SPVSwKnC2wHMF_q7FaBo6lc_Mo-mYy6tw,8800 +numpy/typing/tests/data/reveal/twodim_base.pyi,sha256=GaTs3aYy18KRQc885pWak7YLN8ET5L9xsx7hp9K1ciY,3333 +numpy/typing/tests/data/reveal/type_check.pyi,sha256=Au0by1OrkzUS80Z93Hopw-trmDKMDj5-Ykekq5edvgs,3025 +numpy/typing/tests/data/reveal/ufunc_config.pyi,sha256=178x2bi3cwqRvGtfMWVdgy1zngKWtYPKq3iRfNicbHo,1304 +numpy/typing/tests/data/reveal/ufunclike.pyi,sha256=DDjDt0JV7_HKbTs15jvKpYx1OVe7KfBANrEoLhmuyG0,1325 +numpy/typing/tests/data/reveal/ufuncs.pyi,sha256=iau_uG1ombTNB2nhpUgdGnY3Pxx6mUj667tRXfLOvEI,2919 +numpy/typing/tests/data/reveal/version.pyi,sha256=T2ECJgJDcQDd5f71eJmnm_QqmXWYtBnlT5xomJqxAmI,313 +numpy/typing/tests/data/reveal/warnings_and_errors.pyi,sha256=pOpT0HrI1lhpHNcwKD0zN2dFtBPTWvyebomFJycBFaE,445 +numpy/typing/tests/test_generic_alias.py,sha256=kkzVvpG7SJkslTBs3qoDFgND7rkshqrJEV0oltBwDKM,5390 +numpy/typing/tests/test_isfile.py,sha256=xF6aWRj0ciBAPowsGx3fMUwRAryz8lxGDFiJPp-IGxc,812 +numpy/typing/tests/test_runtime.py,sha256=mTXwGVlgO5pflOgDdqfZYeCl6HIRtpWIzTyrdTHYgwM,2451 +numpy/typing/tests/test_typing.py,sha256=p6ELx8oGbnLd9kbya5WK60zV374DVlZC18iIOYta_O8,15262 +numpy/version.py,sha256=7WfmOFcKsQVxivXN6g0qZS_-D2PO9GisAfROkq5VlA8,475 diff --git a/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/WHEEL b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/WHEEL new file mode 100644 index 0000000..3a48d34 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.37.1) +Root-Is-Purelib: false +Tag: cp38-cp38-manylinux_2_17_x86_64 +Tag: cp38-cp38-manylinux2014_x86_64 + diff --git a/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/entry_points.txt b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/entry_points.txt new file mode 100644 index 0000000..81c3618 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/entry_points.txt @@ -0,0 +1,8 @@ +[array_api] +numpy = numpy.array_api + +[console_scripts] +f2py = numpy.f2py.f2py2e:main +f2py3 = numpy.f2py.f2py2e:main +f2py3.8 = numpy.f2py.f2py2e:main + diff --git a/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/top_level.txt b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/top_level.txt new file mode 100644 index 0000000..24ce15a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy-1.22.3.dist-info/top_level.txt @@ -0,0 +1 @@ +numpy diff --git a/.local/lib/python3.8/site-packages/numpy.libs/libgfortran-040039e1.so.5.0.0 b/.local/lib/python3.8/site-packages/numpy.libs/libgfortran-040039e1.so.5.0.0 new file mode 100755 index 0000000..9e3c75b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy.libs/libgfortran-040039e1.so.5.0.0 differ diff --git a/.local/lib/python3.8/site-packages/numpy.libs/libopenblas64_p-r0-2f7c42d4.3.18.so b/.local/lib/python3.8/site-packages/numpy.libs/libopenblas64_p-r0-2f7c42d4.3.18.so new file mode 100755 index 0000000..41cf598 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy.libs/libopenblas64_p-r0-2f7c42d4.3.18.so differ diff --git a/.local/lib/python3.8/site-packages/numpy.libs/libquadmath-96973f99.so.0.0.0 b/.local/lib/python3.8/site-packages/numpy.libs/libquadmath-96973f99.so.0.0.0 new file mode 100755 index 0000000..a1bfa40 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy.libs/libquadmath-96973f99.so.0.0.0 differ diff --git a/.local/lib/python3.8/site-packages/numpy/LICENSE.txt b/.local/lib/python3.8/site-packages/numpy/LICENSE.txt new file mode 100644 index 0000000..7d18210 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/LICENSE.txt @@ -0,0 +1,910 @@ +Copyright (c) 2005-2022, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---- + +This binary distribution of NumPy also bundles the following software: + + +Name: OpenBLAS +Files: .libs/libopenb*.so +Description: bundled as a dynamically linked library +Availability: https://github.com/xianyi/OpenBLAS/ +License: 3-clause BSD + Copyright (c) 2011-2014, The OpenBLAS Project + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Name: LAPACK +Files: .libs/libopenb*.so +Description: bundled in OpenBLAS +Availability: https://github.com/xianyi/OpenBLAS/ +License 3-clause BSD + Copyright (c) 1992-2013 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. + Copyright (c) 2000-2013 The University of California Berkeley. All + rights reserved. + Copyright (c) 2006-2013 The University of Colorado Denver. All rights + reserved. + + $COPYRIGHT$ + + Additional copyrights may follow + + $HEADER$ + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer listed + in this license in the documentation and/or other materials + provided with the distribution. + + - Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + The copyright holders provide no reassurances that the source code + provided does not infringe any patent, copyright, or any other + intellectual property rights of third parties. The copyright holders + disclaim any liability to any recipient for claims brought against + recipient by any third party for infringement of that parties + intellectual property rights. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Name: GCC runtime library +Files: .libs/libgfortran*.so +Description: dynamically linked to files compiled with gcc +Availability: https://gcc.gnu.org/viewcvs/gcc/ +License: GPLv3 + runtime exception + Copyright (C) 2002-2017 Free Software Foundation, Inc. + + Libgfortran is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgfortran is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . + +---- + +Full text of license texts referred to above follows (that they are +listed below does not necessarily imply the conditions apply to the +present binary release): + +---- + +GCC RUNTIME LIBRARY EXCEPTION + +Version 3.1, 31 March 2009 + +Copyright (C) 2009 Free Software Foundation, Inc. + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + +This GCC Runtime Library Exception ("Exception") is an additional +permission under section 7 of the GNU General Public License, version +3 ("GPLv3"). It applies to a given file (the "Runtime Library") that +bears a notice placed by the copyright holder of the file stating that +the file is governed by GPLv3 along with this Exception. + +When you use GCC to compile a program, GCC may combine portions of +certain GCC header files and runtime libraries with the compiled +program. The purpose of this Exception is to allow compilation of +non-GPL (including proprietary) programs to use, in this way, the +header files and runtime libraries covered by this Exception. + +0. Definitions. + +A file is an "Independent Module" if it either requires the Runtime +Library for execution after a Compilation Process, or makes use of an +interface provided by the Runtime Library, but is not otherwise based +on the Runtime Library. + +"GCC" means a version of the GNU Compiler Collection, with or without +modifications, governed by version 3 (or a specified later version) of +the GNU General Public License (GPL) with the option of using any +subsequent versions published by the FSF. + +"GPL-compatible Software" is software whose conditions of propagation, +modification and use would permit combination with GCC in accord with +the license of GCC. + +"Target Code" refers to output from any compiler for a real or virtual +target processor architecture, in executable form or suitable for +input to an assembler, loader, linker and/or execution +phase. Notwithstanding that, Target Code does not include data in any +format that is used as a compiler intermediate representation, or used +for producing a compiler intermediate representation. + +The "Compilation Process" transforms code entirely represented in +non-intermediate languages designed for human-written code, and/or in +Java Virtual Machine byte code, into Target Code. Thus, for example, +use of source code generators and preprocessors need not be considered +part of the Compilation Process, since the Compilation Process can be +understood as starting with the output of the generators or +preprocessors. + +A Compilation Process is "Eligible" if it is done using GCC, alone or +with other GPL-compatible software, or if it is done without using any +work based on GCC. For example, using non-GPL-compatible Software to +optimize any GCC intermediate representations would not qualify as an +Eligible Compilation Process. + +1. Grant of Additional Permission. + +You have permission to propagate a work of Target Code formed by +combining the Runtime Library with Independent Modules, even if such +propagation would otherwise violate the terms of GPLv3, provided that +all Target Code was generated by Eligible Compilation Processes. You +may then convey such a combination under terms of your choice, +consistent with the licensing of the Independent Modules. + +2. No Weakening of GCC Copyleft. + +The availability of this Exception does not imply any general +presumption that third-party software is unaffected by the copyleft +requirements of the license of GCC. + +---- + + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/.local/lib/python3.8/site-packages/numpy/__config__.py b/.local/lib/python3.8/site-packages/numpy/__config__.py new file mode 100644 index 0000000..7b3f196 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/__config__.py @@ -0,0 +1,119 @@ +# This file is generated by numpy's setup.py +# It contains system_info results at the time of building this package. +__all__ = ["get_info","show"] + + +import os +import sys + +extra_dll_dir = os.path.join(os.path.dirname(__file__), '.libs') + +if sys.platform == 'win32' and os.path.isdir(extra_dll_dir): + if sys.version_info >= (3, 8): + os.add_dll_directory(extra_dll_dir) + else: + os.environ.setdefault('PATH', '') + os.environ['PATH'] += os.pathsep + extra_dll_dir + +openblas64__info={'libraries': ['openblas64_', 'openblas64_'], 'library_dirs': ['/usr/local/lib'], 'language': 'c', 'define_macros': [('HAVE_CBLAS', None), ('BLAS_SYMBOL_SUFFIX', '64_'), ('HAVE_BLAS_ILP64', None)], 'runtime_library_dirs': ['/usr/local/lib']} +blas_ilp64_opt_info={'libraries': ['openblas64_', 'openblas64_'], 'library_dirs': ['/usr/local/lib'], 'language': 'c', 'define_macros': [('HAVE_CBLAS', None), ('BLAS_SYMBOL_SUFFIX', '64_'), ('HAVE_BLAS_ILP64', None)], 'runtime_library_dirs': ['/usr/local/lib']} +openblas64__lapack_info={'libraries': ['openblas64_', 'openblas64_'], 'library_dirs': ['/usr/local/lib'], 'language': 'c', 'define_macros': [('HAVE_CBLAS', None), ('BLAS_SYMBOL_SUFFIX', '64_'), ('HAVE_BLAS_ILP64', None), ('HAVE_LAPACKE', None)], 'runtime_library_dirs': ['/usr/local/lib']} +lapack_ilp64_opt_info={'libraries': ['openblas64_', 'openblas64_'], 'library_dirs': ['/usr/local/lib'], 'language': 'c', 'define_macros': [('HAVE_CBLAS', None), ('BLAS_SYMBOL_SUFFIX', '64_'), ('HAVE_BLAS_ILP64', None), ('HAVE_LAPACKE', None)], 'runtime_library_dirs': ['/usr/local/lib']} + +def get_info(name): + g = globals() + return g.get(name, g.get(name + "_info", {})) + +def show(): + """ + Show libraries in the system on which NumPy was built. + + Print information about various resources (libraries, library + directories, include directories, etc.) in the system on which + NumPy was built. + + See Also + -------- + get_include : Returns the directory containing NumPy C + header files. + + Notes + ----- + 1. Classes specifying the information to be printed are defined + in the `numpy.distutils.system_info` module. + + Information may include: + + * ``language``: language used to write the libraries (mostly + C or f77) + * ``libraries``: names of libraries found in the system + * ``library_dirs``: directories containing the libraries + * ``include_dirs``: directories containing library header files + * ``src_dirs``: directories containing library source files + * ``define_macros``: preprocessor macros used by + ``distutils.setup`` + * ``baseline``: minimum CPU features required + * ``found``: dispatched features supported in the system + * ``not found``: dispatched features that are not supported + in the system + + 2. NumPy BLAS/LAPACK Installation Notes + + Installing a numpy wheel (``pip install numpy`` or force it + via ``pip install numpy --only-binary :numpy: numpy``) includes + an OpenBLAS implementation of the BLAS and LAPACK linear algebra + APIs. In this case, ``library_dirs`` reports the original build + time configuration as compiled with gcc/gfortran; at run time + the OpenBLAS library is in + ``site-packages/numpy.libs/`` (linux), or + ``site-packages/numpy/.dylibs/`` (macOS), or + ``site-packages/numpy/.libs/`` (windows). + + Installing numpy from source + (``pip install numpy --no-binary numpy``) searches for BLAS and + LAPACK dynamic link libraries at build time as influenced by + environment variables NPY_BLAS_LIBS, NPY_CBLAS_LIBS, and + NPY_LAPACK_LIBS; or NPY_BLAS_ORDER and NPY_LAPACK_ORDER; + or the optional file ``~/.numpy-site.cfg``. + NumPy remembers those locations and expects to load the same + libraries at run-time. + In NumPy 1.21+ on macOS, 'accelerate' (Apple's Accelerate BLAS + library) is in the default build-time search order after + 'openblas'. + + Examples + -------- + >>> import numpy as np + >>> np.show_config() + blas_opt_info: + language = c + define_macros = [('HAVE_CBLAS', None)] + libraries = ['openblas', 'openblas'] + library_dirs = ['/usr/local/lib'] + """ + from numpy.core._multiarray_umath import ( + __cpu_features__, __cpu_baseline__, __cpu_dispatch__ + ) + for name,info_dict in globals().items(): + if name[0] == "_" or type(info_dict) is not type({}): continue + print(name + ":") + if not info_dict: + print(" NOT AVAILABLE") + for k,v in info_dict.items(): + v = str(v) + if k == "sources" and len(v) > 200: + v = v[:60] + " ...\n... " + v[-60:] + print(" %s = %s" % (k,v)) + + features_found, features_not_found = [], [] + for feature in __cpu_dispatch__: + if __cpu_features__[feature]: + features_found.append(feature) + else: + features_not_found.append(feature) + + print("Supported SIMD extensions in this NumPy install:") + print(" baseline = %s" % (','.join(__cpu_baseline__))) + print(" found = %s" % (','.join(features_found))) + print(" not found = %s" % (','.join(features_not_found))) + diff --git a/.local/lib/python3.8/site-packages/numpy/__init__.cython-30.pxd b/.local/lib/python3.8/site-packages/numpy/__init__.cython-30.pxd new file mode 100644 index 0000000..42a46d0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/__init__.cython-30.pxd @@ -0,0 +1,1053 @@ +# NumPy static imports for Cython >= 3.0 +# +# If any of the PyArray_* functions are called, import_array must be +# called first. This is done automatically by Cython 3.0+ if a call +# is not detected inside of the module. +# +# Author: Dag Sverre Seljebotn +# + +from cpython.ref cimport Py_INCREF +from cpython.object cimport PyObject, PyTypeObject, PyObject_TypeCheck +cimport libc.stdio as stdio + + +cdef extern from *: + # Leave a marker that the NumPy declarations came from NumPy itself and not from Cython. + # See https://github.com/cython/cython/issues/3573 + """ + /* Using NumPy API declarations from "numpy/__init__.cython-30.pxd" */ + """ + + +cdef extern from "Python.h": + ctypedef Py_ssize_t Py_intptr_t + +cdef extern from "numpy/arrayobject.h": + ctypedef Py_intptr_t npy_intp + ctypedef size_t npy_uintp + + cdef enum NPY_TYPES: + NPY_BOOL + NPY_BYTE + NPY_UBYTE + NPY_SHORT + NPY_USHORT + NPY_INT + NPY_UINT + NPY_LONG + NPY_ULONG + NPY_LONGLONG + NPY_ULONGLONG + NPY_FLOAT + NPY_DOUBLE + NPY_LONGDOUBLE + NPY_CFLOAT + NPY_CDOUBLE + NPY_CLONGDOUBLE + NPY_OBJECT + NPY_STRING + NPY_UNICODE + NPY_VOID + NPY_DATETIME + NPY_TIMEDELTA + NPY_NTYPES + NPY_NOTYPE + + NPY_INT8 + NPY_INT16 + NPY_INT32 + NPY_INT64 + NPY_INT128 + NPY_INT256 + NPY_UINT8 + NPY_UINT16 + NPY_UINT32 + NPY_UINT64 + NPY_UINT128 + NPY_UINT256 + NPY_FLOAT16 + NPY_FLOAT32 + NPY_FLOAT64 + NPY_FLOAT80 + NPY_FLOAT96 + NPY_FLOAT128 + NPY_FLOAT256 + NPY_COMPLEX32 + NPY_COMPLEX64 + NPY_COMPLEX128 + NPY_COMPLEX160 + NPY_COMPLEX192 + NPY_COMPLEX256 + NPY_COMPLEX512 + + NPY_INTP + + ctypedef enum NPY_ORDER: + NPY_ANYORDER + NPY_CORDER + NPY_FORTRANORDER + NPY_KEEPORDER + + ctypedef enum NPY_CASTING: + NPY_NO_CASTING + NPY_EQUIV_CASTING + NPY_SAFE_CASTING + NPY_SAME_KIND_CASTING + NPY_UNSAFE_CASTING + + ctypedef enum NPY_CLIPMODE: + NPY_CLIP + NPY_WRAP + NPY_RAISE + + ctypedef enum NPY_SCALARKIND: + NPY_NOSCALAR, + NPY_BOOL_SCALAR, + NPY_INTPOS_SCALAR, + NPY_INTNEG_SCALAR, + NPY_FLOAT_SCALAR, + NPY_COMPLEX_SCALAR, + NPY_OBJECT_SCALAR + + ctypedef enum NPY_SORTKIND: + NPY_QUICKSORT + NPY_HEAPSORT + NPY_MERGESORT + + ctypedef enum NPY_SEARCHSIDE: + NPY_SEARCHLEFT + NPY_SEARCHRIGHT + + enum: + # DEPRECATED since NumPy 1.7 ! Do not use in new code! + NPY_C_CONTIGUOUS + NPY_F_CONTIGUOUS + NPY_CONTIGUOUS + NPY_FORTRAN + NPY_OWNDATA + NPY_FORCECAST + NPY_ENSURECOPY + NPY_ENSUREARRAY + NPY_ELEMENTSTRIDES + NPY_ALIGNED + NPY_NOTSWAPPED + NPY_WRITEABLE + NPY_UPDATEIFCOPY + NPY_ARR_HAS_DESCR + + NPY_BEHAVED + NPY_BEHAVED_NS + NPY_CARRAY + NPY_CARRAY_RO + NPY_FARRAY + NPY_FARRAY_RO + NPY_DEFAULT + + NPY_IN_ARRAY + NPY_OUT_ARRAY + NPY_INOUT_ARRAY + NPY_IN_FARRAY + NPY_OUT_FARRAY + NPY_INOUT_FARRAY + + NPY_UPDATE_ALL + + enum: + # Added in NumPy 1.7 to replace the deprecated enums above. + NPY_ARRAY_C_CONTIGUOUS + NPY_ARRAY_F_CONTIGUOUS + NPY_ARRAY_OWNDATA + NPY_ARRAY_FORCECAST + NPY_ARRAY_ENSURECOPY + NPY_ARRAY_ENSUREARRAY + NPY_ARRAY_ELEMENTSTRIDES + NPY_ARRAY_ALIGNED + NPY_ARRAY_NOTSWAPPED + NPY_ARRAY_WRITEABLE + NPY_ARRAY_UPDATEIFCOPY + + NPY_ARRAY_BEHAVED + NPY_ARRAY_BEHAVED_NS + NPY_ARRAY_CARRAY + NPY_ARRAY_CARRAY_RO + NPY_ARRAY_FARRAY + NPY_ARRAY_FARRAY_RO + NPY_ARRAY_DEFAULT + + NPY_ARRAY_IN_ARRAY + NPY_ARRAY_OUT_ARRAY + NPY_ARRAY_INOUT_ARRAY + NPY_ARRAY_IN_FARRAY + NPY_ARRAY_OUT_FARRAY + NPY_ARRAY_INOUT_FARRAY + + NPY_ARRAY_UPDATE_ALL + + cdef enum: + NPY_MAXDIMS + + npy_intp NPY_MAX_ELSIZE + + ctypedef void (*PyArray_VectorUnaryFunc)(void *, void *, npy_intp, void *, void *) + + ctypedef struct PyArray_ArrayDescr: + # shape is a tuple, but Cython doesn't support "tuple shape" + # inside a non-PyObject declaration, so we have to declare it + # as just a PyObject*. + PyObject* shape + + ctypedef struct PyArray_Descr: + pass + + ctypedef class numpy.dtype [object PyArray_Descr, check_size ignore]: + # Use PyDataType_* macros when possible, however there are no macros + # for accessing some of the fields, so some are defined. + cdef PyTypeObject* typeobj + cdef char kind + cdef char type + # Numpy sometimes mutates this without warning (e.g. it'll + # sometimes change "|" to "<" in shared dtype objects on + # little-endian machines). If this matters to you, use + # PyArray_IsNativeByteOrder(dtype.byteorder) instead of + # directly accessing this field. + cdef char byteorder + cdef char flags + cdef int type_num + cdef int itemsize "elsize" + cdef int alignment + cdef object fields + cdef tuple names + # Use PyDataType_HASSUBARRAY to test whether this field is + # valid (the pointer can be NULL). Most users should access + # this field via the inline helper method PyDataType_SHAPE. + cdef PyArray_ArrayDescr* subarray + + ctypedef class numpy.flatiter [object PyArrayIterObject, check_size ignore]: + # Use through macros + pass + + ctypedef class numpy.broadcast [object PyArrayMultiIterObject, check_size ignore]: + # Use through macros + pass + + ctypedef struct PyArrayObject: + # For use in situations where ndarray can't replace PyArrayObject*, + # like PyArrayObject**. + pass + + ctypedef class numpy.ndarray [object PyArrayObject, check_size ignore]: + cdef __cythonbufferdefaults__ = {"mode": "strided"} + + # NOTE: no field declarations since direct access is deprecated since NumPy 1.7 + # Instead, we use properties that map to the corresponding C-API functions. + + @property + cdef inline PyObject* base(self) nogil: + """Returns a borrowed reference to the object owning the data/memory. + """ + return PyArray_BASE(self) + + @property + cdef inline dtype descr(self): + """Returns an owned reference to the dtype of the array. + """ + return PyArray_DESCR(self) + + @property + cdef inline int ndim(self) nogil: + """Returns the number of dimensions in the array. + """ + return PyArray_NDIM(self) + + @property + cdef inline npy_intp *shape(self) nogil: + """Returns a pointer to the dimensions/shape of the array. + The number of elements matches the number of dimensions of the array (ndim). + Can return NULL for 0-dimensional arrays. + """ + return PyArray_DIMS(self) + + @property + cdef inline npy_intp *strides(self) nogil: + """Returns a pointer to the strides of the array. + The number of elements matches the number of dimensions of the array (ndim). + """ + return PyArray_STRIDES(self) + + @property + cdef inline npy_intp size(self) nogil: + """Returns the total size (in number of elements) of the array. + """ + return PyArray_SIZE(self) + + @property + cdef inline char* data(self) nogil: + """The pointer to the data buffer as a char*. + This is provided for legacy reasons to avoid direct struct field access. + For new code that needs this access, you probably want to cast the result + of `PyArray_DATA()` instead, which returns a 'void*'. + """ + return PyArray_BYTES(self) + + ctypedef unsigned char npy_bool + + ctypedef signed char npy_byte + ctypedef signed short npy_short + ctypedef signed int npy_int + ctypedef signed long npy_long + ctypedef signed long long npy_longlong + + ctypedef unsigned char npy_ubyte + ctypedef unsigned short npy_ushort + ctypedef unsigned int npy_uint + ctypedef unsigned long npy_ulong + ctypedef unsigned long long npy_ulonglong + + ctypedef float npy_float + ctypedef double npy_double + ctypedef long double npy_longdouble + + ctypedef signed char npy_int8 + ctypedef signed short npy_int16 + ctypedef signed int npy_int32 + ctypedef signed long long npy_int64 + ctypedef signed long long npy_int96 + ctypedef signed long long npy_int128 + + ctypedef unsigned char npy_uint8 + ctypedef unsigned short npy_uint16 + ctypedef unsigned int npy_uint32 + ctypedef unsigned long long npy_uint64 + ctypedef unsigned long long npy_uint96 + ctypedef unsigned long long npy_uint128 + + ctypedef float npy_float32 + ctypedef double npy_float64 + ctypedef long double npy_float80 + ctypedef long double npy_float96 + ctypedef long double npy_float128 + + ctypedef struct npy_cfloat: + float real + float imag + + ctypedef struct npy_cdouble: + double real + double imag + + ctypedef struct npy_clongdouble: + long double real + long double imag + + ctypedef struct npy_complex64: + float real + float imag + + ctypedef struct npy_complex128: + double real + double imag + + ctypedef struct npy_complex160: + long double real + long double imag + + ctypedef struct npy_complex192: + long double real + long double imag + + ctypedef struct npy_complex256: + long double real + long double imag + + ctypedef struct PyArray_Dims: + npy_intp *ptr + int len + + int _import_array() except -1 + # A second definition so _import_array isn't marked as used when we use it here. + # Do not use - subject to change any time. + int __pyx_import_array "_import_array"() except -1 + + # + # Macros from ndarrayobject.h + # + bint PyArray_CHKFLAGS(ndarray m, int flags) nogil + bint PyArray_IS_C_CONTIGUOUS(ndarray arr) nogil + bint PyArray_IS_F_CONTIGUOUS(ndarray arr) nogil + bint PyArray_ISCONTIGUOUS(ndarray m) nogil + bint PyArray_ISWRITEABLE(ndarray m) nogil + bint PyArray_ISALIGNED(ndarray m) nogil + + int PyArray_NDIM(ndarray) nogil + bint PyArray_ISONESEGMENT(ndarray) nogil + bint PyArray_ISFORTRAN(ndarray) nogil + int PyArray_FORTRANIF(ndarray) nogil + + void* PyArray_DATA(ndarray) nogil + char* PyArray_BYTES(ndarray) nogil + + npy_intp* PyArray_DIMS(ndarray) nogil + npy_intp* PyArray_STRIDES(ndarray) nogil + npy_intp PyArray_DIM(ndarray, size_t) nogil + npy_intp PyArray_STRIDE(ndarray, size_t) nogil + + PyObject *PyArray_BASE(ndarray) nogil # returns borrowed reference! + PyArray_Descr *PyArray_DESCR(ndarray) nogil # returns borrowed reference to dtype! + PyArray_Descr *PyArray_DTYPE(ndarray) nogil # returns borrowed reference to dtype! NP 1.7+ alias for descr. + int PyArray_FLAGS(ndarray) nogil + void PyArray_CLEARFLAGS(ndarray, int flags) nogil # Added in NumPy 1.7 + void PyArray_ENABLEFLAGS(ndarray, int flags) nogil # Added in NumPy 1.7 + npy_intp PyArray_ITEMSIZE(ndarray) nogil + int PyArray_TYPE(ndarray arr) nogil + + object PyArray_GETITEM(ndarray arr, void *itemptr) + int PyArray_SETITEM(ndarray arr, void *itemptr, object obj) + + bint PyTypeNum_ISBOOL(int) nogil + bint PyTypeNum_ISUNSIGNED(int) nogil + bint PyTypeNum_ISSIGNED(int) nogil + bint PyTypeNum_ISINTEGER(int) nogil + bint PyTypeNum_ISFLOAT(int) nogil + bint PyTypeNum_ISNUMBER(int) nogil + bint PyTypeNum_ISSTRING(int) nogil + bint PyTypeNum_ISCOMPLEX(int) nogil + bint PyTypeNum_ISPYTHON(int) nogil + bint PyTypeNum_ISFLEXIBLE(int) nogil + bint PyTypeNum_ISUSERDEF(int) nogil + bint PyTypeNum_ISEXTENDED(int) nogil + bint PyTypeNum_ISOBJECT(int) nogil + + bint PyDataType_ISBOOL(dtype) nogil + bint PyDataType_ISUNSIGNED(dtype) nogil + bint PyDataType_ISSIGNED(dtype) nogil + bint PyDataType_ISINTEGER(dtype) nogil + bint PyDataType_ISFLOAT(dtype) nogil + bint PyDataType_ISNUMBER(dtype) nogil + bint PyDataType_ISSTRING(dtype) nogil + bint PyDataType_ISCOMPLEX(dtype) nogil + bint PyDataType_ISPYTHON(dtype) nogil + bint PyDataType_ISFLEXIBLE(dtype) nogil + bint PyDataType_ISUSERDEF(dtype) nogil + bint PyDataType_ISEXTENDED(dtype) nogil + bint PyDataType_ISOBJECT(dtype) nogil + bint PyDataType_HASFIELDS(dtype) nogil + bint PyDataType_HASSUBARRAY(dtype) nogil + + bint PyArray_ISBOOL(ndarray) nogil + bint PyArray_ISUNSIGNED(ndarray) nogil + bint PyArray_ISSIGNED(ndarray) nogil + bint PyArray_ISINTEGER(ndarray) nogil + bint PyArray_ISFLOAT(ndarray) nogil + bint PyArray_ISNUMBER(ndarray) nogil + bint PyArray_ISSTRING(ndarray) nogil + bint PyArray_ISCOMPLEX(ndarray) nogil + bint PyArray_ISPYTHON(ndarray) nogil + bint PyArray_ISFLEXIBLE(ndarray) nogil + bint PyArray_ISUSERDEF(ndarray) nogil + bint PyArray_ISEXTENDED(ndarray) nogil + bint PyArray_ISOBJECT(ndarray) nogil + bint PyArray_HASFIELDS(ndarray) nogil + + bint PyArray_ISVARIABLE(ndarray) nogil + + bint PyArray_SAFEALIGNEDCOPY(ndarray) nogil + bint PyArray_ISNBO(char) nogil # works on ndarray.byteorder + bint PyArray_IsNativeByteOrder(char) nogil # works on ndarray.byteorder + bint PyArray_ISNOTSWAPPED(ndarray) nogil + bint PyArray_ISBYTESWAPPED(ndarray) nogil + + bint PyArray_FLAGSWAP(ndarray, int) nogil + + bint PyArray_ISCARRAY(ndarray) nogil + bint PyArray_ISCARRAY_RO(ndarray) nogil + bint PyArray_ISFARRAY(ndarray) nogil + bint PyArray_ISFARRAY_RO(ndarray) nogil + bint PyArray_ISBEHAVED(ndarray) nogil + bint PyArray_ISBEHAVED_RO(ndarray) nogil + + + bint PyDataType_ISNOTSWAPPED(dtype) nogil + bint PyDataType_ISBYTESWAPPED(dtype) nogil + + bint PyArray_DescrCheck(object) + + bint PyArray_Check(object) + bint PyArray_CheckExact(object) + + # Cannot be supported due to out arg: + # bint PyArray_HasArrayInterfaceType(object, dtype, object, object&) + # bint PyArray_HasArrayInterface(op, out) + + + bint PyArray_IsZeroDim(object) + # Cannot be supported due to ## ## in macro: + # bint PyArray_IsScalar(object, verbatim work) + bint PyArray_CheckScalar(object) + bint PyArray_IsPythonNumber(object) + bint PyArray_IsPythonScalar(object) + bint PyArray_IsAnyScalar(object) + bint PyArray_CheckAnyScalar(object) + + ndarray PyArray_GETCONTIGUOUS(ndarray) + bint PyArray_SAMESHAPE(ndarray, ndarray) nogil + npy_intp PyArray_SIZE(ndarray) nogil + npy_intp PyArray_NBYTES(ndarray) nogil + + object PyArray_FROM_O(object) + object PyArray_FROM_OF(object m, int flags) + object PyArray_FROM_OT(object m, int type) + object PyArray_FROM_OTF(object m, int type, int flags) + object PyArray_FROMANY(object m, int type, int min, int max, int flags) + object PyArray_ZEROS(int nd, npy_intp* dims, int type, int fortran) + object PyArray_EMPTY(int nd, npy_intp* dims, int type, int fortran) + void PyArray_FILLWBYTE(object, int val) + npy_intp PyArray_REFCOUNT(object) + object PyArray_ContiguousFromAny(op, int, int min_depth, int max_depth) + unsigned char PyArray_EquivArrTypes(ndarray a1, ndarray a2) + bint PyArray_EquivByteorders(int b1, int b2) nogil + object PyArray_SimpleNew(int nd, npy_intp* dims, int typenum) + object PyArray_SimpleNewFromData(int nd, npy_intp* dims, int typenum, void* data) + #object PyArray_SimpleNewFromDescr(int nd, npy_intp* dims, dtype descr) + object PyArray_ToScalar(void* data, ndarray arr) + + void* PyArray_GETPTR1(ndarray m, npy_intp i) nogil + void* PyArray_GETPTR2(ndarray m, npy_intp i, npy_intp j) nogil + void* PyArray_GETPTR3(ndarray m, npy_intp i, npy_intp j, npy_intp k) nogil + void* PyArray_GETPTR4(ndarray m, npy_intp i, npy_intp j, npy_intp k, npy_intp l) nogil + + void PyArray_XDECREF_ERR(ndarray) + # Cannot be supported due to out arg + # void PyArray_DESCR_REPLACE(descr) + + + object PyArray_Copy(ndarray) + object PyArray_FromObject(object op, int type, int min_depth, int max_depth) + object PyArray_ContiguousFromObject(object op, int type, int min_depth, int max_depth) + object PyArray_CopyFromObject(object op, int type, int min_depth, int max_depth) + + object PyArray_Cast(ndarray mp, int type_num) + object PyArray_Take(ndarray ap, object items, int axis) + object PyArray_Put(ndarray ap, object items, object values) + + void PyArray_ITER_RESET(flatiter it) nogil + void PyArray_ITER_NEXT(flatiter it) nogil + void PyArray_ITER_GOTO(flatiter it, npy_intp* destination) nogil + void PyArray_ITER_GOTO1D(flatiter it, npy_intp ind) nogil + void* PyArray_ITER_DATA(flatiter it) nogil + bint PyArray_ITER_NOTDONE(flatiter it) nogil + + void PyArray_MultiIter_RESET(broadcast multi) nogil + void PyArray_MultiIter_NEXT(broadcast multi) nogil + void PyArray_MultiIter_GOTO(broadcast multi, npy_intp dest) nogil + void PyArray_MultiIter_GOTO1D(broadcast multi, npy_intp ind) nogil + void* PyArray_MultiIter_DATA(broadcast multi, npy_intp i) nogil + void PyArray_MultiIter_NEXTi(broadcast multi, npy_intp i) nogil + bint PyArray_MultiIter_NOTDONE(broadcast multi) nogil + + # Functions from __multiarray_api.h + + # Functions taking dtype and returning object/ndarray are disabled + # for now as they steal dtype references. I'm conservative and disable + # more than is probably needed until it can be checked further. + int PyArray_SetNumericOps (object) + object PyArray_GetNumericOps () + int PyArray_INCREF (ndarray) + int PyArray_XDECREF (ndarray) + void PyArray_SetStringFunction (object, int) + dtype PyArray_DescrFromType (int) + object PyArray_TypeObjectFromType (int) + char * PyArray_Zero (ndarray) + char * PyArray_One (ndarray) + #object PyArray_CastToType (ndarray, dtype, int) + int PyArray_CastTo (ndarray, ndarray) + int PyArray_CastAnyTo (ndarray, ndarray) + int PyArray_CanCastSafely (int, int) + npy_bool PyArray_CanCastTo (dtype, dtype) + int PyArray_ObjectType (object, int) + dtype PyArray_DescrFromObject (object, dtype) + #ndarray* PyArray_ConvertToCommonType (object, int *) + dtype PyArray_DescrFromScalar (object) + dtype PyArray_DescrFromTypeObject (object) + npy_intp PyArray_Size (object) + #object PyArray_Scalar (void *, dtype, object) + #object PyArray_FromScalar (object, dtype) + void PyArray_ScalarAsCtype (object, void *) + #int PyArray_CastScalarToCtype (object, void *, dtype) + #int PyArray_CastScalarDirect (object, dtype, void *, int) + object PyArray_ScalarFromObject (object) + #PyArray_VectorUnaryFunc * PyArray_GetCastFunc (dtype, int) + object PyArray_FromDims (int, int *, int) + #object PyArray_FromDimsAndDataAndDescr (int, int *, dtype, char *) + #object PyArray_FromAny (object, dtype, int, int, int, object) + object PyArray_EnsureArray (object) + object PyArray_EnsureAnyArray (object) + #object PyArray_FromFile (stdio.FILE *, dtype, npy_intp, char *) + #object PyArray_FromString (char *, npy_intp, dtype, npy_intp, char *) + #object PyArray_FromBuffer (object, dtype, npy_intp, npy_intp) + #object PyArray_FromIter (object, dtype, npy_intp) + object PyArray_Return (ndarray) + #object PyArray_GetField (ndarray, dtype, int) + #int PyArray_SetField (ndarray, dtype, int, object) + object PyArray_Byteswap (ndarray, npy_bool) + object PyArray_Resize (ndarray, PyArray_Dims *, int, NPY_ORDER) + int PyArray_MoveInto (ndarray, ndarray) + int PyArray_CopyInto (ndarray, ndarray) + int PyArray_CopyAnyInto (ndarray, ndarray) + int PyArray_CopyObject (ndarray, object) + object PyArray_NewCopy (ndarray, NPY_ORDER) + object PyArray_ToList (ndarray) + object PyArray_ToString (ndarray, NPY_ORDER) + int PyArray_ToFile (ndarray, stdio.FILE *, char *, char *) + int PyArray_Dump (object, object, int) + object PyArray_Dumps (object, int) + int PyArray_ValidType (int) + void PyArray_UpdateFlags (ndarray, int) + object PyArray_New (type, int, npy_intp *, int, npy_intp *, void *, int, int, object) + #object PyArray_NewFromDescr (type, dtype, int, npy_intp *, npy_intp *, void *, int, object) + #dtype PyArray_DescrNew (dtype) + dtype PyArray_DescrNewFromType (int) + double PyArray_GetPriority (object, double) + object PyArray_IterNew (object) + object PyArray_MultiIterNew (int, ...) + + int PyArray_PyIntAsInt (object) + npy_intp PyArray_PyIntAsIntp (object) + int PyArray_Broadcast (broadcast) + void PyArray_FillObjectArray (ndarray, object) + int PyArray_FillWithScalar (ndarray, object) + npy_bool PyArray_CheckStrides (int, int, npy_intp, npy_intp, npy_intp *, npy_intp *) + dtype PyArray_DescrNewByteorder (dtype, char) + object PyArray_IterAllButAxis (object, int *) + #object PyArray_CheckFromAny (object, dtype, int, int, int, object) + #object PyArray_FromArray (ndarray, dtype, int) + object PyArray_FromInterface (object) + object PyArray_FromStructInterface (object) + #object PyArray_FromArrayAttr (object, dtype, object) + #NPY_SCALARKIND PyArray_ScalarKind (int, ndarray*) + int PyArray_CanCoerceScalar (int, int, NPY_SCALARKIND) + object PyArray_NewFlagsObject (object) + npy_bool PyArray_CanCastScalar (type, type) + #int PyArray_CompareUCS4 (npy_ucs4 *, npy_ucs4 *, register size_t) + int PyArray_RemoveSmallest (broadcast) + int PyArray_ElementStrides (object) + void PyArray_Item_INCREF (char *, dtype) + void PyArray_Item_XDECREF (char *, dtype) + object PyArray_FieldNames (object) + object PyArray_Transpose (ndarray, PyArray_Dims *) + object PyArray_TakeFrom (ndarray, object, int, ndarray, NPY_CLIPMODE) + object PyArray_PutTo (ndarray, object, object, NPY_CLIPMODE) + object PyArray_PutMask (ndarray, object, object) + object PyArray_Repeat (ndarray, object, int) + object PyArray_Choose (ndarray, object, ndarray, NPY_CLIPMODE) + int PyArray_Sort (ndarray, int, NPY_SORTKIND) + object PyArray_ArgSort (ndarray, int, NPY_SORTKIND) + object PyArray_SearchSorted (ndarray, object, NPY_SEARCHSIDE, PyObject *) + object PyArray_ArgMax (ndarray, int, ndarray) + object PyArray_ArgMin (ndarray, int, ndarray) + object PyArray_Reshape (ndarray, object) + object PyArray_Newshape (ndarray, PyArray_Dims *, NPY_ORDER) + object PyArray_Squeeze (ndarray) + #object PyArray_View (ndarray, dtype, type) + object PyArray_SwapAxes (ndarray, int, int) + object PyArray_Max (ndarray, int, ndarray) + object PyArray_Min (ndarray, int, ndarray) + object PyArray_Ptp (ndarray, int, ndarray) + object PyArray_Mean (ndarray, int, int, ndarray) + object PyArray_Trace (ndarray, int, int, int, int, ndarray) + object PyArray_Diagonal (ndarray, int, int, int) + object PyArray_Clip (ndarray, object, object, ndarray) + object PyArray_Conjugate (ndarray, ndarray) + object PyArray_Nonzero (ndarray) + object PyArray_Std (ndarray, int, int, ndarray, int) + object PyArray_Sum (ndarray, int, int, ndarray) + object PyArray_CumSum (ndarray, int, int, ndarray) + object PyArray_Prod (ndarray, int, int, ndarray) + object PyArray_CumProd (ndarray, int, int, ndarray) + object PyArray_All (ndarray, int, ndarray) + object PyArray_Any (ndarray, int, ndarray) + object PyArray_Compress (ndarray, object, int, ndarray) + object PyArray_Flatten (ndarray, NPY_ORDER) + object PyArray_Ravel (ndarray, NPY_ORDER) + npy_intp PyArray_MultiplyList (npy_intp *, int) + int PyArray_MultiplyIntList (int *, int) + void * PyArray_GetPtr (ndarray, npy_intp*) + int PyArray_CompareLists (npy_intp *, npy_intp *, int) + #int PyArray_AsCArray (object*, void *, npy_intp *, int, dtype) + #int PyArray_As1D (object*, char **, int *, int) + #int PyArray_As2D (object*, char ***, int *, int *, int) + int PyArray_Free (object, void *) + #int PyArray_Converter (object, object*) + int PyArray_IntpFromSequence (object, npy_intp *, int) + object PyArray_Concatenate (object, int) + object PyArray_InnerProduct (object, object) + object PyArray_MatrixProduct (object, object) + object PyArray_CopyAndTranspose (object) + object PyArray_Correlate (object, object, int) + int PyArray_TypestrConvert (int, int) + #int PyArray_DescrConverter (object, dtype*) + #int PyArray_DescrConverter2 (object, dtype*) + int PyArray_IntpConverter (object, PyArray_Dims *) + #int PyArray_BufferConverter (object, chunk) + int PyArray_AxisConverter (object, int *) + int PyArray_BoolConverter (object, npy_bool *) + int PyArray_ByteorderConverter (object, char *) + int PyArray_OrderConverter (object, NPY_ORDER *) + unsigned char PyArray_EquivTypes (dtype, dtype) + #object PyArray_Zeros (int, npy_intp *, dtype, int) + #object PyArray_Empty (int, npy_intp *, dtype, int) + object PyArray_Where (object, object, object) + object PyArray_Arange (double, double, double, int) + #object PyArray_ArangeObj (object, object, object, dtype) + int PyArray_SortkindConverter (object, NPY_SORTKIND *) + object PyArray_LexSort (object, int) + object PyArray_Round (ndarray, int, ndarray) + unsigned char PyArray_EquivTypenums (int, int) + int PyArray_RegisterDataType (dtype) + int PyArray_RegisterCastFunc (dtype, int, PyArray_VectorUnaryFunc *) + int PyArray_RegisterCanCast (dtype, int, NPY_SCALARKIND) + #void PyArray_InitArrFuncs (PyArray_ArrFuncs *) + object PyArray_IntTupleFromIntp (int, npy_intp *) + int PyArray_TypeNumFromName (char *) + int PyArray_ClipmodeConverter (object, NPY_CLIPMODE *) + #int PyArray_OutputConverter (object, ndarray*) + object PyArray_BroadcastToShape (object, npy_intp *, int) + void _PyArray_SigintHandler (int) + void* _PyArray_GetSigintBuf () + #int PyArray_DescrAlignConverter (object, dtype*) + #int PyArray_DescrAlignConverter2 (object, dtype*) + int PyArray_SearchsideConverter (object, void *) + object PyArray_CheckAxis (ndarray, int *, int) + npy_intp PyArray_OverflowMultiplyList (npy_intp *, int) + int PyArray_CompareString (char *, char *, size_t) + int PyArray_SetBaseObject(ndarray, base) # NOTE: steals a reference to base! Use "set_array_base()" instead. + + +# Typedefs that matches the runtime dtype objects in +# the numpy module. + +# The ones that are commented out needs an IFDEF function +# in Cython to enable them only on the right systems. + +ctypedef npy_int8 int8_t +ctypedef npy_int16 int16_t +ctypedef npy_int32 int32_t +ctypedef npy_int64 int64_t +#ctypedef npy_int96 int96_t +#ctypedef npy_int128 int128_t + +ctypedef npy_uint8 uint8_t +ctypedef npy_uint16 uint16_t +ctypedef npy_uint32 uint32_t +ctypedef npy_uint64 uint64_t +#ctypedef npy_uint96 uint96_t +#ctypedef npy_uint128 uint128_t + +ctypedef npy_float32 float32_t +ctypedef npy_float64 float64_t +#ctypedef npy_float80 float80_t +#ctypedef npy_float128 float128_t + +ctypedef float complex complex64_t +ctypedef double complex complex128_t + +# The int types are mapped a bit surprising -- +# numpy.int corresponds to 'l' and numpy.long to 'q' +ctypedef npy_long int_t +ctypedef npy_longlong long_t +ctypedef npy_longlong longlong_t + +ctypedef npy_ulong uint_t +ctypedef npy_ulonglong ulong_t +ctypedef npy_ulonglong ulonglong_t + +ctypedef npy_intp intp_t +ctypedef npy_uintp uintp_t + +ctypedef npy_double float_t +ctypedef npy_double double_t +ctypedef npy_longdouble longdouble_t + +ctypedef npy_cfloat cfloat_t +ctypedef npy_cdouble cdouble_t +ctypedef npy_clongdouble clongdouble_t + +ctypedef npy_cdouble complex_t + +cdef inline object PyArray_MultiIterNew1(a): + return PyArray_MultiIterNew(1, a) + +cdef inline object PyArray_MultiIterNew2(a, b): + return PyArray_MultiIterNew(2, a, b) + +cdef inline object PyArray_MultiIterNew3(a, b, c): + return PyArray_MultiIterNew(3, a, b, c) + +cdef inline object PyArray_MultiIterNew4(a, b, c, d): + return PyArray_MultiIterNew(4, a, b, c, d) + +cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): + return PyArray_MultiIterNew(5, a, b, c, d, e) + +cdef inline tuple PyDataType_SHAPE(dtype d): + if PyDataType_HASSUBARRAY(d): + return d.subarray.shape + else: + return () + + +cdef extern from "numpy/ndarrayobject.h": + PyTypeObject PyTimedeltaArrType_Type + PyTypeObject PyDatetimeArrType_Type + ctypedef int64_t npy_timedelta + ctypedef int64_t npy_datetime + +cdef extern from "numpy/ndarraytypes.h": + ctypedef struct PyArray_DatetimeMetaData: + NPY_DATETIMEUNIT base + int64_t num + +cdef extern from "numpy/arrayscalars.h": + + # abstract types + ctypedef class numpy.generic [object PyObject]: + pass + ctypedef class numpy.number [object PyObject]: + pass + ctypedef class numpy.integer [object PyObject]: + pass + ctypedef class numpy.signedinteger [object PyObject]: + pass + ctypedef class numpy.unsignedinteger [object PyObject]: + pass + ctypedef class numpy.inexact [object PyObject]: + pass + ctypedef class numpy.floating [object PyObject]: + pass + ctypedef class numpy.complexfloating [object PyObject]: + pass + ctypedef class numpy.flexible [object PyObject]: + pass + ctypedef class numpy.character [object PyObject]: + pass + + ctypedef struct PyDatetimeScalarObject: + # PyObject_HEAD + npy_datetime obval + PyArray_DatetimeMetaData obmeta + + ctypedef struct PyTimedeltaScalarObject: + # PyObject_HEAD + npy_timedelta obval + PyArray_DatetimeMetaData obmeta + + ctypedef enum NPY_DATETIMEUNIT: + NPY_FR_Y + NPY_FR_M + NPY_FR_W + NPY_FR_D + NPY_FR_B + NPY_FR_h + NPY_FR_m + NPY_FR_s + NPY_FR_ms + NPY_FR_us + NPY_FR_ns + NPY_FR_ps + NPY_FR_fs + NPY_FR_as + + +# +# ufunc API +# + +cdef extern from "numpy/ufuncobject.h": + + ctypedef void (*PyUFuncGenericFunction) (char **, npy_intp *, npy_intp *, void *) + + ctypedef class numpy.ufunc [object PyUFuncObject, check_size ignore]: + cdef: + int nin, nout, nargs + int identity + PyUFuncGenericFunction *functions + void **data + int ntypes + int check_return + char *name + char *types + char *doc + void *ptr + PyObject *obj + PyObject *userloops + + cdef enum: + PyUFunc_Zero + PyUFunc_One + PyUFunc_None + UFUNC_ERR_IGNORE + UFUNC_ERR_WARN + UFUNC_ERR_RAISE + UFUNC_ERR_CALL + UFUNC_ERR_PRINT + UFUNC_ERR_LOG + UFUNC_MASK_DIVIDEBYZERO + UFUNC_MASK_OVERFLOW + UFUNC_MASK_UNDERFLOW + UFUNC_MASK_INVALID + UFUNC_SHIFT_DIVIDEBYZERO + UFUNC_SHIFT_OVERFLOW + UFUNC_SHIFT_UNDERFLOW + UFUNC_SHIFT_INVALID + UFUNC_FPE_DIVIDEBYZERO + UFUNC_FPE_OVERFLOW + UFUNC_FPE_UNDERFLOW + UFUNC_FPE_INVALID + UFUNC_ERR_DEFAULT + UFUNC_ERR_DEFAULT2 + + object PyUFunc_FromFuncAndData(PyUFuncGenericFunction *, + void **, char *, int, int, int, int, char *, char *, int) + int PyUFunc_RegisterLoopForType(ufunc, int, + PyUFuncGenericFunction, int *, void *) + void PyUFunc_f_f_As_d_d \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_d_d \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_f_f \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_g_g \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_F_F_As_D_D \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_F_F \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_D_D \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_G_G \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_O_O \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_ff_f_As_dd_d \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_ff_f \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_dd_d \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_gg_g \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_FF_F_As_DD_D \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_DD_D \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_FF_F \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_GG_G \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_OO_O \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_O_O_method \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_OO_O_method \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_On_Om \ + (char **, npy_intp *, npy_intp *, void *) + int PyUFunc_GetPyValues \ + (char *, int *, int *, PyObject **) + int PyUFunc_checkfperr \ + (int, PyObject *, int *) + void PyUFunc_clearfperr() + int PyUFunc_getfperr() + int PyUFunc_handlefperr \ + (int, PyObject *, int, int *) + int PyUFunc_ReplaceLoopBySignature \ + (ufunc, PyUFuncGenericFunction, int *, PyUFuncGenericFunction *) + object PyUFunc_FromFuncAndDataAndSignature \ + (PyUFuncGenericFunction *, void **, char *, int, int, int, + int, char *, char *, int, char *) + + int _import_umath() except -1 + +cdef inline void set_array_base(ndarray arr, object base): + Py_INCREF(base) # important to do this before stealing the reference below! + PyArray_SetBaseObject(arr, base) + +cdef inline object get_array_base(ndarray arr): + base = PyArray_BASE(arr) + if base is NULL: + return None + return base + +# Versions of the import_* functions which are more suitable for +# Cython code. +cdef inline int import_array() except -1: + try: + __pyx_import_array() + except Exception: + raise ImportError("numpy.core.multiarray failed to import") + +cdef inline int import_umath() except -1: + try: + _import_umath() + except Exception: + raise ImportError("numpy.core.umath failed to import") + +cdef inline int import_ufunc() except -1: + try: + _import_umath() + except Exception: + raise ImportError("numpy.core.umath failed to import") + + +cdef inline bint is_timedelta64_object(object obj): + """ + Cython equivalent of `isinstance(obj, np.timedelta64)` + + Parameters + ---------- + obj : object + + Returns + ------- + bool + """ + return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) + + +cdef inline bint is_datetime64_object(object obj): + """ + Cython equivalent of `isinstance(obj, np.datetime64)` + + Parameters + ---------- + obj : object + + Returns + ------- + bool + """ + return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) + + +cdef inline npy_datetime get_datetime64_value(object obj) nogil: + """ + returns the int64 value underlying scalar numpy datetime64 object + + Note that to interpret this as a datetime, the corresponding unit is + also needed. That can be found using `get_datetime64_unit`. + """ + return (obj).obval + + +cdef inline npy_timedelta get_timedelta64_value(object obj) nogil: + """ + returns the int64 value underlying scalar numpy timedelta64 object + """ + return (obj).obval + + +cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: + """ + returns the unit part of the dtype for a numpy datetime64 object. + """ + return (obj).obmeta.base diff --git a/.local/lib/python3.8/site-packages/numpy/__init__.pxd b/.local/lib/python3.8/site-packages/numpy/__init__.pxd new file mode 100644 index 0000000..97f3da2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/__init__.pxd @@ -0,0 +1,1018 @@ +# NumPy static imports for Cython < 3.0 +# +# If any of the PyArray_* functions are called, import_array must be +# called first. +# +# Author: Dag Sverre Seljebotn +# + +DEF _buffer_format_string_len = 255 + +cimport cpython.buffer as pybuf +from cpython.ref cimport Py_INCREF +from cpython.mem cimport PyObject_Malloc, PyObject_Free +from cpython.object cimport PyObject, PyTypeObject +from cpython.buffer cimport PyObject_GetBuffer +from cpython.type cimport type +cimport libc.stdio as stdio + +cdef extern from "Python.h": + ctypedef int Py_intptr_t + bint PyObject_TypeCheck(object obj, PyTypeObject* type) + +cdef extern from "numpy/arrayobject.h": + ctypedef Py_intptr_t npy_intp + ctypedef size_t npy_uintp + + cdef enum NPY_TYPES: + NPY_BOOL + NPY_BYTE + NPY_UBYTE + NPY_SHORT + NPY_USHORT + NPY_INT + NPY_UINT + NPY_LONG + NPY_ULONG + NPY_LONGLONG + NPY_ULONGLONG + NPY_FLOAT + NPY_DOUBLE + NPY_LONGDOUBLE + NPY_CFLOAT + NPY_CDOUBLE + NPY_CLONGDOUBLE + NPY_OBJECT + NPY_STRING + NPY_UNICODE + NPY_VOID + NPY_DATETIME + NPY_TIMEDELTA + NPY_NTYPES + NPY_NOTYPE + + NPY_INT8 + NPY_INT16 + NPY_INT32 + NPY_INT64 + NPY_INT128 + NPY_INT256 + NPY_UINT8 + NPY_UINT16 + NPY_UINT32 + NPY_UINT64 + NPY_UINT128 + NPY_UINT256 + NPY_FLOAT16 + NPY_FLOAT32 + NPY_FLOAT64 + NPY_FLOAT80 + NPY_FLOAT96 + NPY_FLOAT128 + NPY_FLOAT256 + NPY_COMPLEX32 + NPY_COMPLEX64 + NPY_COMPLEX128 + NPY_COMPLEX160 + NPY_COMPLEX192 + NPY_COMPLEX256 + NPY_COMPLEX512 + + NPY_INTP + + ctypedef enum NPY_ORDER: + NPY_ANYORDER + NPY_CORDER + NPY_FORTRANORDER + NPY_KEEPORDER + + ctypedef enum NPY_CASTING: + NPY_NO_CASTING + NPY_EQUIV_CASTING + NPY_SAFE_CASTING + NPY_SAME_KIND_CASTING + NPY_UNSAFE_CASTING + + ctypedef enum NPY_CLIPMODE: + NPY_CLIP + NPY_WRAP + NPY_RAISE + + ctypedef enum NPY_SCALARKIND: + NPY_NOSCALAR, + NPY_BOOL_SCALAR, + NPY_INTPOS_SCALAR, + NPY_INTNEG_SCALAR, + NPY_FLOAT_SCALAR, + NPY_COMPLEX_SCALAR, + NPY_OBJECT_SCALAR + + ctypedef enum NPY_SORTKIND: + NPY_QUICKSORT + NPY_HEAPSORT + NPY_MERGESORT + + ctypedef enum NPY_SEARCHSIDE: + NPY_SEARCHLEFT + NPY_SEARCHRIGHT + + enum: + # DEPRECATED since NumPy 1.7 ! Do not use in new code! + NPY_C_CONTIGUOUS + NPY_F_CONTIGUOUS + NPY_CONTIGUOUS + NPY_FORTRAN + NPY_OWNDATA + NPY_FORCECAST + NPY_ENSURECOPY + NPY_ENSUREARRAY + NPY_ELEMENTSTRIDES + NPY_ALIGNED + NPY_NOTSWAPPED + NPY_WRITEABLE + NPY_UPDATEIFCOPY + NPY_ARR_HAS_DESCR + + NPY_BEHAVED + NPY_BEHAVED_NS + NPY_CARRAY + NPY_CARRAY_RO + NPY_FARRAY + NPY_FARRAY_RO + NPY_DEFAULT + + NPY_IN_ARRAY + NPY_OUT_ARRAY + NPY_INOUT_ARRAY + NPY_IN_FARRAY + NPY_OUT_FARRAY + NPY_INOUT_FARRAY + + NPY_UPDATE_ALL + + enum: + # Added in NumPy 1.7 to replace the deprecated enums above. + NPY_ARRAY_C_CONTIGUOUS + NPY_ARRAY_F_CONTIGUOUS + NPY_ARRAY_OWNDATA + NPY_ARRAY_FORCECAST + NPY_ARRAY_ENSURECOPY + NPY_ARRAY_ENSUREARRAY + NPY_ARRAY_ELEMENTSTRIDES + NPY_ARRAY_ALIGNED + NPY_ARRAY_NOTSWAPPED + NPY_ARRAY_WRITEABLE + NPY_ARRAY_UPDATEIFCOPY + + NPY_ARRAY_BEHAVED + NPY_ARRAY_BEHAVED_NS + NPY_ARRAY_CARRAY + NPY_ARRAY_CARRAY_RO + NPY_ARRAY_FARRAY + NPY_ARRAY_FARRAY_RO + NPY_ARRAY_DEFAULT + + NPY_ARRAY_IN_ARRAY + NPY_ARRAY_OUT_ARRAY + NPY_ARRAY_INOUT_ARRAY + NPY_ARRAY_IN_FARRAY + NPY_ARRAY_OUT_FARRAY + NPY_ARRAY_INOUT_FARRAY + + NPY_ARRAY_UPDATE_ALL + + cdef enum: + NPY_MAXDIMS + + npy_intp NPY_MAX_ELSIZE + + ctypedef void (*PyArray_VectorUnaryFunc)(void *, void *, npy_intp, void *, void *) + + ctypedef struct PyArray_ArrayDescr: + # shape is a tuple, but Cython doesn't support "tuple shape" + # inside a non-PyObject declaration, so we have to declare it + # as just a PyObject*. + PyObject* shape + + ctypedef struct PyArray_Descr: + pass + + ctypedef class numpy.dtype [object PyArray_Descr, check_size ignore]: + # Use PyDataType_* macros when possible, however there are no macros + # for accessing some of the fields, so some are defined. + cdef PyTypeObject* typeobj + cdef char kind + cdef char type + # Numpy sometimes mutates this without warning (e.g. it'll + # sometimes change "|" to "<" in shared dtype objects on + # little-endian machines). If this matters to you, use + # PyArray_IsNativeByteOrder(dtype.byteorder) instead of + # directly accessing this field. + cdef char byteorder + cdef char flags + cdef int type_num + cdef int itemsize "elsize" + cdef int alignment + cdef object fields + cdef tuple names + # Use PyDataType_HASSUBARRAY to test whether this field is + # valid (the pointer can be NULL). Most users should access + # this field via the inline helper method PyDataType_SHAPE. + cdef PyArray_ArrayDescr* subarray + + ctypedef class numpy.flatiter [object PyArrayIterObject, check_size ignore]: + # Use through macros + pass + + ctypedef class numpy.broadcast [object PyArrayMultiIterObject, check_size ignore]: + cdef int numiter + cdef npy_intp size, index + cdef int nd + cdef npy_intp *dimensions + cdef void **iters + + ctypedef struct PyArrayObject: + # For use in situations where ndarray can't replace PyArrayObject*, + # like PyArrayObject**. + pass + + ctypedef class numpy.ndarray [object PyArrayObject, check_size ignore]: + cdef __cythonbufferdefaults__ = {"mode": "strided"} + + cdef: + # Only taking a few of the most commonly used and stable fields. + # One should use PyArray_* macros instead to access the C fields. + char *data + int ndim "nd" + npy_intp *shape "dimensions" + npy_intp *strides + dtype descr # deprecated since NumPy 1.7 ! + PyObject* base # NOT PUBLIC, DO NOT USE ! + + + + ctypedef unsigned char npy_bool + + ctypedef signed char npy_byte + ctypedef signed short npy_short + ctypedef signed int npy_int + ctypedef signed long npy_long + ctypedef signed long long npy_longlong + + ctypedef unsigned char npy_ubyte + ctypedef unsigned short npy_ushort + ctypedef unsigned int npy_uint + ctypedef unsigned long npy_ulong + ctypedef unsigned long long npy_ulonglong + + ctypedef float npy_float + ctypedef double npy_double + ctypedef long double npy_longdouble + + ctypedef signed char npy_int8 + ctypedef signed short npy_int16 + ctypedef signed int npy_int32 + ctypedef signed long long npy_int64 + ctypedef signed long long npy_int96 + ctypedef signed long long npy_int128 + + ctypedef unsigned char npy_uint8 + ctypedef unsigned short npy_uint16 + ctypedef unsigned int npy_uint32 + ctypedef unsigned long long npy_uint64 + ctypedef unsigned long long npy_uint96 + ctypedef unsigned long long npy_uint128 + + ctypedef float npy_float32 + ctypedef double npy_float64 + ctypedef long double npy_float80 + ctypedef long double npy_float96 + ctypedef long double npy_float128 + + ctypedef struct npy_cfloat: + float real + float imag + + ctypedef struct npy_cdouble: + double real + double imag + + ctypedef struct npy_clongdouble: + long double real + long double imag + + ctypedef struct npy_complex64: + float real + float imag + + ctypedef struct npy_complex128: + double real + double imag + + ctypedef struct npy_complex160: + long double real + long double imag + + ctypedef struct npy_complex192: + long double real + long double imag + + ctypedef struct npy_complex256: + long double real + long double imag + + ctypedef struct PyArray_Dims: + npy_intp *ptr + int len + + int _import_array() except -1 + # A second definition so _import_array isn't marked as used when we use it here. + # Do not use - subject to change any time. + int __pyx_import_array "_import_array"() except -1 + + # + # Macros from ndarrayobject.h + # + bint PyArray_CHKFLAGS(ndarray m, int flags) nogil + bint PyArray_IS_C_CONTIGUOUS(ndarray arr) nogil + bint PyArray_IS_F_CONTIGUOUS(ndarray arr) nogil + bint PyArray_ISCONTIGUOUS(ndarray m) nogil + bint PyArray_ISWRITEABLE(ndarray m) nogil + bint PyArray_ISALIGNED(ndarray m) nogil + + int PyArray_NDIM(ndarray) nogil + bint PyArray_ISONESEGMENT(ndarray) nogil + bint PyArray_ISFORTRAN(ndarray) nogil + int PyArray_FORTRANIF(ndarray) nogil + + void* PyArray_DATA(ndarray) nogil + char* PyArray_BYTES(ndarray) nogil + + npy_intp* PyArray_DIMS(ndarray) nogil + npy_intp* PyArray_STRIDES(ndarray) nogil + npy_intp PyArray_DIM(ndarray, size_t) nogil + npy_intp PyArray_STRIDE(ndarray, size_t) nogil + + PyObject *PyArray_BASE(ndarray) nogil # returns borrowed reference! + PyArray_Descr *PyArray_DESCR(ndarray) nogil # returns borrowed reference to dtype! + int PyArray_FLAGS(ndarray) nogil + npy_intp PyArray_ITEMSIZE(ndarray) nogil + int PyArray_TYPE(ndarray arr) nogil + + object PyArray_GETITEM(ndarray arr, void *itemptr) + int PyArray_SETITEM(ndarray arr, void *itemptr, object obj) + + bint PyTypeNum_ISBOOL(int) nogil + bint PyTypeNum_ISUNSIGNED(int) nogil + bint PyTypeNum_ISSIGNED(int) nogil + bint PyTypeNum_ISINTEGER(int) nogil + bint PyTypeNum_ISFLOAT(int) nogil + bint PyTypeNum_ISNUMBER(int) nogil + bint PyTypeNum_ISSTRING(int) nogil + bint PyTypeNum_ISCOMPLEX(int) nogil + bint PyTypeNum_ISPYTHON(int) nogil + bint PyTypeNum_ISFLEXIBLE(int) nogil + bint PyTypeNum_ISUSERDEF(int) nogil + bint PyTypeNum_ISEXTENDED(int) nogil + bint PyTypeNum_ISOBJECT(int) nogil + + bint PyDataType_ISBOOL(dtype) nogil + bint PyDataType_ISUNSIGNED(dtype) nogil + bint PyDataType_ISSIGNED(dtype) nogil + bint PyDataType_ISINTEGER(dtype) nogil + bint PyDataType_ISFLOAT(dtype) nogil + bint PyDataType_ISNUMBER(dtype) nogil + bint PyDataType_ISSTRING(dtype) nogil + bint PyDataType_ISCOMPLEX(dtype) nogil + bint PyDataType_ISPYTHON(dtype) nogil + bint PyDataType_ISFLEXIBLE(dtype) nogil + bint PyDataType_ISUSERDEF(dtype) nogil + bint PyDataType_ISEXTENDED(dtype) nogil + bint PyDataType_ISOBJECT(dtype) nogil + bint PyDataType_HASFIELDS(dtype) nogil + bint PyDataType_HASSUBARRAY(dtype) nogil + + bint PyArray_ISBOOL(ndarray) nogil + bint PyArray_ISUNSIGNED(ndarray) nogil + bint PyArray_ISSIGNED(ndarray) nogil + bint PyArray_ISINTEGER(ndarray) nogil + bint PyArray_ISFLOAT(ndarray) nogil + bint PyArray_ISNUMBER(ndarray) nogil + bint PyArray_ISSTRING(ndarray) nogil + bint PyArray_ISCOMPLEX(ndarray) nogil + bint PyArray_ISPYTHON(ndarray) nogil + bint PyArray_ISFLEXIBLE(ndarray) nogil + bint PyArray_ISUSERDEF(ndarray) nogil + bint PyArray_ISEXTENDED(ndarray) nogil + bint PyArray_ISOBJECT(ndarray) nogil + bint PyArray_HASFIELDS(ndarray) nogil + + bint PyArray_ISVARIABLE(ndarray) nogil + + bint PyArray_SAFEALIGNEDCOPY(ndarray) nogil + bint PyArray_ISNBO(char) nogil # works on ndarray.byteorder + bint PyArray_IsNativeByteOrder(char) nogil # works on ndarray.byteorder + bint PyArray_ISNOTSWAPPED(ndarray) nogil + bint PyArray_ISBYTESWAPPED(ndarray) nogil + + bint PyArray_FLAGSWAP(ndarray, int) nogil + + bint PyArray_ISCARRAY(ndarray) nogil + bint PyArray_ISCARRAY_RO(ndarray) nogil + bint PyArray_ISFARRAY(ndarray) nogil + bint PyArray_ISFARRAY_RO(ndarray) nogil + bint PyArray_ISBEHAVED(ndarray) nogil + bint PyArray_ISBEHAVED_RO(ndarray) nogil + + + bint PyDataType_ISNOTSWAPPED(dtype) nogil + bint PyDataType_ISBYTESWAPPED(dtype) nogil + + bint PyArray_DescrCheck(object) + + bint PyArray_Check(object) + bint PyArray_CheckExact(object) + + # Cannot be supported due to out arg: + # bint PyArray_HasArrayInterfaceType(object, dtype, object, object&) + # bint PyArray_HasArrayInterface(op, out) + + + bint PyArray_IsZeroDim(object) + # Cannot be supported due to ## ## in macro: + # bint PyArray_IsScalar(object, verbatim work) + bint PyArray_CheckScalar(object) + bint PyArray_IsPythonNumber(object) + bint PyArray_IsPythonScalar(object) + bint PyArray_IsAnyScalar(object) + bint PyArray_CheckAnyScalar(object) + + ndarray PyArray_GETCONTIGUOUS(ndarray) + bint PyArray_SAMESHAPE(ndarray, ndarray) nogil + npy_intp PyArray_SIZE(ndarray) nogil + npy_intp PyArray_NBYTES(ndarray) nogil + + object PyArray_FROM_O(object) + object PyArray_FROM_OF(object m, int flags) + object PyArray_FROM_OT(object m, int type) + object PyArray_FROM_OTF(object m, int type, int flags) + object PyArray_FROMANY(object m, int type, int min, int max, int flags) + object PyArray_ZEROS(int nd, npy_intp* dims, int type, int fortran) + object PyArray_EMPTY(int nd, npy_intp* dims, int type, int fortran) + void PyArray_FILLWBYTE(object, int val) + npy_intp PyArray_REFCOUNT(object) + object PyArray_ContiguousFromAny(op, int, int min_depth, int max_depth) + unsigned char PyArray_EquivArrTypes(ndarray a1, ndarray a2) + bint PyArray_EquivByteorders(int b1, int b2) nogil + object PyArray_SimpleNew(int nd, npy_intp* dims, int typenum) + object PyArray_SimpleNewFromData(int nd, npy_intp* dims, int typenum, void* data) + #object PyArray_SimpleNewFromDescr(int nd, npy_intp* dims, dtype descr) + object PyArray_ToScalar(void* data, ndarray arr) + + void* PyArray_GETPTR1(ndarray m, npy_intp i) nogil + void* PyArray_GETPTR2(ndarray m, npy_intp i, npy_intp j) nogil + void* PyArray_GETPTR3(ndarray m, npy_intp i, npy_intp j, npy_intp k) nogil + void* PyArray_GETPTR4(ndarray m, npy_intp i, npy_intp j, npy_intp k, npy_intp l) nogil + + void PyArray_XDECREF_ERR(ndarray) + # Cannot be supported due to out arg + # void PyArray_DESCR_REPLACE(descr) + + + object PyArray_Copy(ndarray) + object PyArray_FromObject(object op, int type, int min_depth, int max_depth) + object PyArray_ContiguousFromObject(object op, int type, int min_depth, int max_depth) + object PyArray_CopyFromObject(object op, int type, int min_depth, int max_depth) + + object PyArray_Cast(ndarray mp, int type_num) + object PyArray_Take(ndarray ap, object items, int axis) + object PyArray_Put(ndarray ap, object items, object values) + + void PyArray_ITER_RESET(flatiter it) nogil + void PyArray_ITER_NEXT(flatiter it) nogil + void PyArray_ITER_GOTO(flatiter it, npy_intp* destination) nogil + void PyArray_ITER_GOTO1D(flatiter it, npy_intp ind) nogil + void* PyArray_ITER_DATA(flatiter it) nogil + bint PyArray_ITER_NOTDONE(flatiter it) nogil + + void PyArray_MultiIter_RESET(broadcast multi) nogil + void PyArray_MultiIter_NEXT(broadcast multi) nogil + void PyArray_MultiIter_GOTO(broadcast multi, npy_intp dest) nogil + void PyArray_MultiIter_GOTO1D(broadcast multi, npy_intp ind) nogil + void* PyArray_MultiIter_DATA(broadcast multi, npy_intp i) nogil + void PyArray_MultiIter_NEXTi(broadcast multi, npy_intp i) nogil + bint PyArray_MultiIter_NOTDONE(broadcast multi) nogil + + # Functions from __multiarray_api.h + + # Functions taking dtype and returning object/ndarray are disabled + # for now as they steal dtype references. I'm conservative and disable + # more than is probably needed until it can be checked further. + int PyArray_SetNumericOps (object) + object PyArray_GetNumericOps () + int PyArray_INCREF (ndarray) + int PyArray_XDECREF (ndarray) + void PyArray_SetStringFunction (object, int) + dtype PyArray_DescrFromType (int) + object PyArray_TypeObjectFromType (int) + char * PyArray_Zero (ndarray) + char * PyArray_One (ndarray) + #object PyArray_CastToType (ndarray, dtype, int) + int PyArray_CastTo (ndarray, ndarray) + int PyArray_CastAnyTo (ndarray, ndarray) + int PyArray_CanCastSafely (int, int) + npy_bool PyArray_CanCastTo (dtype, dtype) + int PyArray_ObjectType (object, int) + dtype PyArray_DescrFromObject (object, dtype) + #ndarray* PyArray_ConvertToCommonType (object, int *) + dtype PyArray_DescrFromScalar (object) + dtype PyArray_DescrFromTypeObject (object) + npy_intp PyArray_Size (object) + #object PyArray_Scalar (void *, dtype, object) + #object PyArray_FromScalar (object, dtype) + void PyArray_ScalarAsCtype (object, void *) + #int PyArray_CastScalarToCtype (object, void *, dtype) + #int PyArray_CastScalarDirect (object, dtype, void *, int) + object PyArray_ScalarFromObject (object) + #PyArray_VectorUnaryFunc * PyArray_GetCastFunc (dtype, int) + object PyArray_FromDims (int, int *, int) + #object PyArray_FromDimsAndDataAndDescr (int, int *, dtype, char *) + #object PyArray_FromAny (object, dtype, int, int, int, object) + object PyArray_EnsureArray (object) + object PyArray_EnsureAnyArray (object) + #object PyArray_FromFile (stdio.FILE *, dtype, npy_intp, char *) + #object PyArray_FromString (char *, npy_intp, dtype, npy_intp, char *) + #object PyArray_FromBuffer (object, dtype, npy_intp, npy_intp) + #object PyArray_FromIter (object, dtype, npy_intp) + object PyArray_Return (ndarray) + #object PyArray_GetField (ndarray, dtype, int) + #int PyArray_SetField (ndarray, dtype, int, object) + object PyArray_Byteswap (ndarray, npy_bool) + object PyArray_Resize (ndarray, PyArray_Dims *, int, NPY_ORDER) + int PyArray_MoveInto (ndarray, ndarray) + int PyArray_CopyInto (ndarray, ndarray) + int PyArray_CopyAnyInto (ndarray, ndarray) + int PyArray_CopyObject (ndarray, object) + object PyArray_NewCopy (ndarray, NPY_ORDER) + object PyArray_ToList (ndarray) + object PyArray_ToString (ndarray, NPY_ORDER) + int PyArray_ToFile (ndarray, stdio.FILE *, char *, char *) + int PyArray_Dump (object, object, int) + object PyArray_Dumps (object, int) + int PyArray_ValidType (int) + void PyArray_UpdateFlags (ndarray, int) + object PyArray_New (type, int, npy_intp *, int, npy_intp *, void *, int, int, object) + #object PyArray_NewFromDescr (type, dtype, int, npy_intp *, npy_intp *, void *, int, object) + #dtype PyArray_DescrNew (dtype) + dtype PyArray_DescrNewFromType (int) + double PyArray_GetPriority (object, double) + object PyArray_IterNew (object) + object PyArray_MultiIterNew (int, ...) + + int PyArray_PyIntAsInt (object) + npy_intp PyArray_PyIntAsIntp (object) + int PyArray_Broadcast (broadcast) + void PyArray_FillObjectArray (ndarray, object) + int PyArray_FillWithScalar (ndarray, object) + npy_bool PyArray_CheckStrides (int, int, npy_intp, npy_intp, npy_intp *, npy_intp *) + dtype PyArray_DescrNewByteorder (dtype, char) + object PyArray_IterAllButAxis (object, int *) + #object PyArray_CheckFromAny (object, dtype, int, int, int, object) + #object PyArray_FromArray (ndarray, dtype, int) + object PyArray_FromInterface (object) + object PyArray_FromStructInterface (object) + #object PyArray_FromArrayAttr (object, dtype, object) + #NPY_SCALARKIND PyArray_ScalarKind (int, ndarray*) + int PyArray_CanCoerceScalar (int, int, NPY_SCALARKIND) + object PyArray_NewFlagsObject (object) + npy_bool PyArray_CanCastScalar (type, type) + #int PyArray_CompareUCS4 (npy_ucs4 *, npy_ucs4 *, register size_t) + int PyArray_RemoveSmallest (broadcast) + int PyArray_ElementStrides (object) + void PyArray_Item_INCREF (char *, dtype) + void PyArray_Item_XDECREF (char *, dtype) + object PyArray_FieldNames (object) + object PyArray_Transpose (ndarray, PyArray_Dims *) + object PyArray_TakeFrom (ndarray, object, int, ndarray, NPY_CLIPMODE) + object PyArray_PutTo (ndarray, object, object, NPY_CLIPMODE) + object PyArray_PutMask (ndarray, object, object) + object PyArray_Repeat (ndarray, object, int) + object PyArray_Choose (ndarray, object, ndarray, NPY_CLIPMODE) + int PyArray_Sort (ndarray, int, NPY_SORTKIND) + object PyArray_ArgSort (ndarray, int, NPY_SORTKIND) + object PyArray_SearchSorted (ndarray, object, NPY_SEARCHSIDE, PyObject *) + object PyArray_ArgMax (ndarray, int, ndarray) + object PyArray_ArgMin (ndarray, int, ndarray) + object PyArray_Reshape (ndarray, object) + object PyArray_Newshape (ndarray, PyArray_Dims *, NPY_ORDER) + object PyArray_Squeeze (ndarray) + #object PyArray_View (ndarray, dtype, type) + object PyArray_SwapAxes (ndarray, int, int) + object PyArray_Max (ndarray, int, ndarray) + object PyArray_Min (ndarray, int, ndarray) + object PyArray_Ptp (ndarray, int, ndarray) + object PyArray_Mean (ndarray, int, int, ndarray) + object PyArray_Trace (ndarray, int, int, int, int, ndarray) + object PyArray_Diagonal (ndarray, int, int, int) + object PyArray_Clip (ndarray, object, object, ndarray) + object PyArray_Conjugate (ndarray, ndarray) + object PyArray_Nonzero (ndarray) + object PyArray_Std (ndarray, int, int, ndarray, int) + object PyArray_Sum (ndarray, int, int, ndarray) + object PyArray_CumSum (ndarray, int, int, ndarray) + object PyArray_Prod (ndarray, int, int, ndarray) + object PyArray_CumProd (ndarray, int, int, ndarray) + object PyArray_All (ndarray, int, ndarray) + object PyArray_Any (ndarray, int, ndarray) + object PyArray_Compress (ndarray, object, int, ndarray) + object PyArray_Flatten (ndarray, NPY_ORDER) + object PyArray_Ravel (ndarray, NPY_ORDER) + npy_intp PyArray_MultiplyList (npy_intp *, int) + int PyArray_MultiplyIntList (int *, int) + void * PyArray_GetPtr (ndarray, npy_intp*) + int PyArray_CompareLists (npy_intp *, npy_intp *, int) + #int PyArray_AsCArray (object*, void *, npy_intp *, int, dtype) + #int PyArray_As1D (object*, char **, int *, int) + #int PyArray_As2D (object*, char ***, int *, int *, int) + int PyArray_Free (object, void *) + #int PyArray_Converter (object, object*) + int PyArray_IntpFromSequence (object, npy_intp *, int) + object PyArray_Concatenate (object, int) + object PyArray_InnerProduct (object, object) + object PyArray_MatrixProduct (object, object) + object PyArray_CopyAndTranspose (object) + object PyArray_Correlate (object, object, int) + int PyArray_TypestrConvert (int, int) + #int PyArray_DescrConverter (object, dtype*) + #int PyArray_DescrConverter2 (object, dtype*) + int PyArray_IntpConverter (object, PyArray_Dims *) + #int PyArray_BufferConverter (object, chunk) + int PyArray_AxisConverter (object, int *) + int PyArray_BoolConverter (object, npy_bool *) + int PyArray_ByteorderConverter (object, char *) + int PyArray_OrderConverter (object, NPY_ORDER *) + unsigned char PyArray_EquivTypes (dtype, dtype) + #object PyArray_Zeros (int, npy_intp *, dtype, int) + #object PyArray_Empty (int, npy_intp *, dtype, int) + object PyArray_Where (object, object, object) + object PyArray_Arange (double, double, double, int) + #object PyArray_ArangeObj (object, object, object, dtype) + int PyArray_SortkindConverter (object, NPY_SORTKIND *) + object PyArray_LexSort (object, int) + object PyArray_Round (ndarray, int, ndarray) + unsigned char PyArray_EquivTypenums (int, int) + int PyArray_RegisterDataType (dtype) + int PyArray_RegisterCastFunc (dtype, int, PyArray_VectorUnaryFunc *) + int PyArray_RegisterCanCast (dtype, int, NPY_SCALARKIND) + #void PyArray_InitArrFuncs (PyArray_ArrFuncs *) + object PyArray_IntTupleFromIntp (int, npy_intp *) + int PyArray_TypeNumFromName (char *) + int PyArray_ClipmodeConverter (object, NPY_CLIPMODE *) + #int PyArray_OutputConverter (object, ndarray*) + object PyArray_BroadcastToShape (object, npy_intp *, int) + void _PyArray_SigintHandler (int) + void* _PyArray_GetSigintBuf () + #int PyArray_DescrAlignConverter (object, dtype*) + #int PyArray_DescrAlignConverter2 (object, dtype*) + int PyArray_SearchsideConverter (object, void *) + object PyArray_CheckAxis (ndarray, int *, int) + npy_intp PyArray_OverflowMultiplyList (npy_intp *, int) + int PyArray_CompareString (char *, char *, size_t) + int PyArray_SetBaseObject(ndarray, base) # NOTE: steals a reference to base! Use "set_array_base()" instead. + + +# Typedefs that matches the runtime dtype objects in +# the numpy module. + +# The ones that are commented out needs an IFDEF function +# in Cython to enable them only on the right systems. + +ctypedef npy_int8 int8_t +ctypedef npy_int16 int16_t +ctypedef npy_int32 int32_t +ctypedef npy_int64 int64_t +#ctypedef npy_int96 int96_t +#ctypedef npy_int128 int128_t + +ctypedef npy_uint8 uint8_t +ctypedef npy_uint16 uint16_t +ctypedef npy_uint32 uint32_t +ctypedef npy_uint64 uint64_t +#ctypedef npy_uint96 uint96_t +#ctypedef npy_uint128 uint128_t + +ctypedef npy_float32 float32_t +ctypedef npy_float64 float64_t +#ctypedef npy_float80 float80_t +#ctypedef npy_float128 float128_t + +ctypedef float complex complex64_t +ctypedef double complex complex128_t + +# The int types are mapped a bit surprising -- +# numpy.int corresponds to 'l' and numpy.long to 'q' +ctypedef npy_long int_t +ctypedef npy_longlong long_t +ctypedef npy_longlong longlong_t + +ctypedef npy_ulong uint_t +ctypedef npy_ulonglong ulong_t +ctypedef npy_ulonglong ulonglong_t + +ctypedef npy_intp intp_t +ctypedef npy_uintp uintp_t + +ctypedef npy_double float_t +ctypedef npy_double double_t +ctypedef npy_longdouble longdouble_t + +ctypedef npy_cfloat cfloat_t +ctypedef npy_cdouble cdouble_t +ctypedef npy_clongdouble clongdouble_t + +ctypedef npy_cdouble complex_t + +cdef inline object PyArray_MultiIterNew1(a): + return PyArray_MultiIterNew(1, a) + +cdef inline object PyArray_MultiIterNew2(a, b): + return PyArray_MultiIterNew(2, a, b) + +cdef inline object PyArray_MultiIterNew3(a, b, c): + return PyArray_MultiIterNew(3, a, b, c) + +cdef inline object PyArray_MultiIterNew4(a, b, c, d): + return PyArray_MultiIterNew(4, a, b, c, d) + +cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): + return PyArray_MultiIterNew(5, a, b, c, d, e) + +cdef inline tuple PyDataType_SHAPE(dtype d): + if PyDataType_HASSUBARRAY(d): + return d.subarray.shape + else: + return () + + +cdef extern from "numpy/ndarrayobject.h": + PyTypeObject PyTimedeltaArrType_Type + PyTypeObject PyDatetimeArrType_Type + ctypedef int64_t npy_timedelta + ctypedef int64_t npy_datetime + +cdef extern from "numpy/ndarraytypes.h": + ctypedef struct PyArray_DatetimeMetaData: + NPY_DATETIMEUNIT base + int64_t num + +cdef extern from "numpy/arrayscalars.h": + + # abstract types + ctypedef class numpy.generic [object PyObject]: + pass + ctypedef class numpy.number [object PyObject]: + pass + ctypedef class numpy.integer [object PyObject]: + pass + ctypedef class numpy.signedinteger [object PyObject]: + pass + ctypedef class numpy.unsignedinteger [object PyObject]: + pass + ctypedef class numpy.inexact [object PyObject]: + pass + ctypedef class numpy.floating [object PyObject]: + pass + ctypedef class numpy.complexfloating [object PyObject]: + pass + ctypedef class numpy.flexible [object PyObject]: + pass + ctypedef class numpy.character [object PyObject]: + pass + + ctypedef struct PyDatetimeScalarObject: + # PyObject_HEAD + npy_datetime obval + PyArray_DatetimeMetaData obmeta + + ctypedef struct PyTimedeltaScalarObject: + # PyObject_HEAD + npy_timedelta obval + PyArray_DatetimeMetaData obmeta + + ctypedef enum NPY_DATETIMEUNIT: + NPY_FR_Y + NPY_FR_M + NPY_FR_W + NPY_FR_D + NPY_FR_B + NPY_FR_h + NPY_FR_m + NPY_FR_s + NPY_FR_ms + NPY_FR_us + NPY_FR_ns + NPY_FR_ps + NPY_FR_fs + NPY_FR_as + + +# +# ufunc API +# + +cdef extern from "numpy/ufuncobject.h": + + ctypedef void (*PyUFuncGenericFunction) (char **, npy_intp *, npy_intp *, void *) + + ctypedef class numpy.ufunc [object PyUFuncObject, check_size ignore]: + cdef: + int nin, nout, nargs + int identity + PyUFuncGenericFunction *functions + void **data + int ntypes + int check_return + char *name + char *types + char *doc + void *ptr + PyObject *obj + PyObject *userloops + + cdef enum: + PyUFunc_Zero + PyUFunc_One + PyUFunc_None + UFUNC_ERR_IGNORE + UFUNC_ERR_WARN + UFUNC_ERR_RAISE + UFUNC_ERR_CALL + UFUNC_ERR_PRINT + UFUNC_ERR_LOG + UFUNC_MASK_DIVIDEBYZERO + UFUNC_MASK_OVERFLOW + UFUNC_MASK_UNDERFLOW + UFUNC_MASK_INVALID + UFUNC_SHIFT_DIVIDEBYZERO + UFUNC_SHIFT_OVERFLOW + UFUNC_SHIFT_UNDERFLOW + UFUNC_SHIFT_INVALID + UFUNC_FPE_DIVIDEBYZERO + UFUNC_FPE_OVERFLOW + UFUNC_FPE_UNDERFLOW + UFUNC_FPE_INVALID + UFUNC_ERR_DEFAULT + UFUNC_ERR_DEFAULT2 + + object PyUFunc_FromFuncAndData(PyUFuncGenericFunction *, + void **, char *, int, int, int, int, char *, char *, int) + int PyUFunc_RegisterLoopForType(ufunc, int, + PyUFuncGenericFunction, int *, void *) + void PyUFunc_f_f_As_d_d \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_d_d \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_f_f \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_g_g \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_F_F_As_D_D \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_F_F \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_D_D \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_G_G \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_O_O \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_ff_f_As_dd_d \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_ff_f \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_dd_d \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_gg_g \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_FF_F_As_DD_D \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_DD_D \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_FF_F \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_GG_G \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_OO_O \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_O_O_method \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_OO_O_method \ + (char **, npy_intp *, npy_intp *, void *) + void PyUFunc_On_Om \ + (char **, npy_intp *, npy_intp *, void *) + int PyUFunc_GetPyValues \ + (char *, int *, int *, PyObject **) + int PyUFunc_checkfperr \ + (int, PyObject *, int *) + void PyUFunc_clearfperr() + int PyUFunc_getfperr() + int PyUFunc_handlefperr \ + (int, PyObject *, int, int *) + int PyUFunc_ReplaceLoopBySignature \ + (ufunc, PyUFuncGenericFunction, int *, PyUFuncGenericFunction *) + object PyUFunc_FromFuncAndDataAndSignature \ + (PyUFuncGenericFunction *, void **, char *, int, int, int, + int, char *, char *, int, char *) + + int _import_umath() except -1 + +cdef inline void set_array_base(ndarray arr, object base): + Py_INCREF(base) # important to do this before stealing the reference below! + PyArray_SetBaseObject(arr, base) + +cdef inline object get_array_base(ndarray arr): + base = PyArray_BASE(arr) + if base is NULL: + return None + return base + +# Versions of the import_* functions which are more suitable for +# Cython code. +cdef inline int import_array() except -1: + try: + __pyx_import_array() + except Exception: + raise ImportError("numpy.core.multiarray failed to import") + +cdef inline int import_umath() except -1: + try: + _import_umath() + except Exception: + raise ImportError("numpy.core.umath failed to import") + +cdef inline int import_ufunc() except -1: + try: + _import_umath() + except Exception: + raise ImportError("numpy.core.umath failed to import") + +cdef extern from *: + # Leave a marker that the NumPy declarations came from this file + # See https://github.com/cython/cython/issues/3573 + """ + /* NumPy API declarations from "numpy/__init__.pxd" */ + """ + + +cdef inline bint is_timedelta64_object(object obj): + """ + Cython equivalent of `isinstance(obj, np.timedelta64)` + + Parameters + ---------- + obj : object + + Returns + ------- + bool + """ + return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) + + +cdef inline bint is_datetime64_object(object obj): + """ + Cython equivalent of `isinstance(obj, np.datetime64)` + + Parameters + ---------- + obj : object + + Returns + ------- + bool + """ + return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) + + +cdef inline npy_datetime get_datetime64_value(object obj) nogil: + """ + returns the int64 value underlying scalar numpy datetime64 object + + Note that to interpret this as a datetime, the corresponding unit is + also needed. That can be found using `get_datetime64_unit`. + """ + return (obj).obval + + +cdef inline npy_timedelta get_timedelta64_value(object obj) nogil: + """ + returns the int64 value underlying scalar numpy timedelta64 object + """ + return (obj).obval + + +cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: + """ + returns the unit part of the dtype for a numpy datetime64 object. + """ + return (obj).obmeta.base diff --git a/.local/lib/python3.8/site-packages/numpy/__init__.py b/.local/lib/python3.8/site-packages/numpy/__init__.py new file mode 100644 index 0000000..e8d1820 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/__init__.py @@ -0,0 +1,418 @@ +""" +NumPy +===== + +Provides + 1. An array object of arbitrary homogeneous items + 2. Fast mathematical operations over arrays + 3. Linear Algebra, Fourier Transforms, Random Number Generation + +How to use the documentation +---------------------------- +Documentation is available in two forms: docstrings provided +with the code, and a loose standing reference guide, available from +`the NumPy homepage `_. + +We recommend exploring the docstrings using +`IPython `_, an advanced Python shell with +TAB-completion and introspection capabilities. See below for further +instructions. + +The docstring examples assume that `numpy` has been imported as `np`:: + + >>> import numpy as np + +Code snippets are indicated by three greater-than signs:: + + >>> x = 42 + >>> x = x + 1 + +Use the built-in ``help`` function to view a function's docstring:: + + >>> help(np.sort) + ... # doctest: +SKIP + +For some objects, ``np.info(obj)`` may provide additional help. This is +particularly true if you see the line "Help on ufunc object:" at the top +of the help() page. Ufuncs are implemented in C, not Python, for speed. +The native Python help() does not know how to view their help, but our +np.info() function does. + +To search for documents containing a keyword, do:: + + >>> np.lookfor('keyword') + ... # doctest: +SKIP + +General-purpose documents like a glossary and help on the basic concepts +of numpy are available under the ``doc`` sub-module:: + + >>> from numpy import doc + >>> help(doc) + ... # doctest: +SKIP + +Available subpackages +--------------------- +doc + Topical documentation on broadcasting, indexing, etc. +lib + Basic functions used by several sub-packages. +random + Core Random Tools +linalg + Core Linear Algebra Tools +fft + Core FFT routines +polynomial + Polynomial tools +testing + NumPy testing tools +f2py + Fortran to Python Interface Generator. +distutils + Enhancements to distutils with support for + Fortran compilers support and more. + +Utilities +--------- +test + Run numpy unittests +show_config + Show numpy build configuration +dual + Overwrite certain functions with high-performance SciPy tools. + Note: `numpy.dual` is deprecated. Use the functions from NumPy or Scipy + directly instead of importing them from `numpy.dual`. +matlib + Make everything matrices. +__version__ + NumPy version string + +Viewing documentation using IPython +----------------------------------- +Start IPython with the NumPy profile (``ipython -p numpy``), which will +import `numpy` under the alias `np`. Then, use the ``cpaste`` command to +paste examples into the shell. To see which functions are available in +`numpy`, type ``np.`` (where ```` refers to the TAB key), or use +``np.*cos*?`` (where ```` refers to the ENTER key) to narrow +down the list. To view the docstring for a function, use +``np.cos?`` (to view the docstring) and ``np.cos??`` (to view +the source code). + +Copies vs. in-place operation +----------------------------- +Most of the functions in `numpy` return a copy of the array argument +(e.g., `np.sort`). In-place versions of these functions are often +available as array methods, i.e. ``x = np.array([1,2,3]); x.sort()``. +Exceptions to this rule are documented. + +""" +import sys +import warnings + +from ._globals import ( + ModuleDeprecationWarning, VisibleDeprecationWarning, + _NoValue, _CopyMode +) + +# We first need to detect if we're being called as part of the numpy setup +# procedure itself in a reliable manner. +try: + __NUMPY_SETUP__ +except NameError: + __NUMPY_SETUP__ = False + +if __NUMPY_SETUP__: + sys.stderr.write('Running from numpy source directory.\n') +else: + try: + from numpy.__config__ import show as show_config + except ImportError as e: + msg = """Error importing numpy: you should not try to import numpy from + its source directory; please exit the numpy source tree, and relaunch + your python interpreter from there.""" + raise ImportError(msg) from e + + __all__ = ['ModuleDeprecationWarning', + 'VisibleDeprecationWarning'] + + # mapping of {name: (value, deprecation_msg)} + __deprecated_attrs__ = {} + + # Allow distributors to run custom init code + from . import _distributor_init + + from . import core + from .core import * + from . import compat + from . import lib + # NOTE: to be revisited following future namespace cleanup. + # See gh-14454 and gh-15672 for discussion. + from .lib import * + + from . import linalg + from . import fft + from . import polynomial + from . import random + from . import ctypeslib + from . import ma + from . import matrixlib as _mat + from .matrixlib import * + + # Deprecations introduced in NumPy 1.20.0, 2020-06-06 + import builtins as _builtins + + _msg = ( + "`np.{n}` is a deprecated alias for the builtin `{n}`. " + "To silence this warning, use `{n}` by itself. Doing this will not " + "modify any behavior and is safe. {extended_msg}\n" + "Deprecated in NumPy 1.20; for more details and guidance: " + "https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations") + + _specific_msg = ( + "If you specifically wanted the numpy scalar type, use `np.{}` here.") + + _int_extended_msg = ( + "When replacing `np.{}`, you may wish to use e.g. `np.int64` " + "or `np.int32` to specify the precision. If you wish to review " + "your current use, check the release note link for " + "additional information.") + + _type_info = [ + ("object", ""), # The NumPy scalar only exists by name. + ("bool", _specific_msg.format("bool_")), + ("float", _specific_msg.format("float64")), + ("complex", _specific_msg.format("complex128")), + ("str", _specific_msg.format("str_")), + ("int", _int_extended_msg.format("int"))] + + __deprecated_attrs__.update({ + n: (getattr(_builtins, n), _msg.format(n=n, extended_msg=extended_msg)) + for n, extended_msg in _type_info + }) + + # Numpy 1.20.0, 2020-10-19 + __deprecated_attrs__["typeDict"] = ( + core.numerictypes.typeDict, + "`np.typeDict` is a deprecated alias for `np.sctypeDict`." + ) + + # NumPy 1.22, 2021-10-20 + __deprecated_attrs__["MachAr"] = ( + core._machar.MachAr, + "`np.MachAr` is deprecated (NumPy 1.22)." + ) + + _msg = ( + "`np.{n}` is a deprecated alias for `np.compat.{n}`. " + "To silence this warning, use `np.compat.{n}` by itself. " + "In the likely event your code does not need to work on Python 2 " + "you can use the builtin `{n2}` for which `np.compat.{n}` is itself " + "an alias. Doing this will not modify any behaviour and is safe. " + "{extended_msg}\n" + "Deprecated in NumPy 1.20; for more details and guidance: " + "https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations") + + __deprecated_attrs__["long"] = ( + getattr(compat, "long"), + _msg.format(n="long", n2="int", + extended_msg=_int_extended_msg.format("long"))) + + __deprecated_attrs__["unicode"] = ( + getattr(compat, "unicode"), + _msg.format(n="unicode", n2="str", + extended_msg=_specific_msg.format("str_"))) + + del _msg, _specific_msg, _int_extended_msg, _type_info, _builtins + + from .core import round, abs, max, min + # now that numpy modules are imported, can initialize limits + core.getlimits._register_known_types() + + __all__.extend(['__version__', 'show_config']) + __all__.extend(core.__all__) + __all__.extend(_mat.__all__) + __all__.extend(lib.__all__) + __all__.extend(['linalg', 'fft', 'random', 'ctypeslib', 'ma']) + + # Remove one of the two occurrences of `issubdtype`, which is exposed as + # both `numpy.core.issubdtype` and `numpy.lib.issubdtype`. + __all__.remove('issubdtype') + + # These are exported by np.core, but are replaced by the builtins below + # remove them to ensure that we don't end up with `np.long == np.int_`, + # which would be a breaking change. + del long, unicode + __all__.remove('long') + __all__.remove('unicode') + + # Remove things that are in the numpy.lib but not in the numpy namespace + # Note that there is a test (numpy/tests/test_public_api.py:test_numpy_namespace) + # that prevents adding more things to the main namespace by accident. + # The list below will grow until the `from .lib import *` fixme above is + # taken care of + __all__.remove('Arrayterator') + del Arrayterator + + # These names were removed in NumPy 1.20. For at least one release, + # attempts to access these names in the numpy namespace will trigger + # a warning, and calling the function will raise an exception. + _financial_names = ['fv', 'ipmt', 'irr', 'mirr', 'nper', 'npv', 'pmt', + 'ppmt', 'pv', 'rate'] + __expired_functions__ = { + name: (f'In accordance with NEP 32, the function {name} was removed ' + 'from NumPy version 1.20. A replacement for this function ' + 'is available in the numpy_financial library: ' + 'https://pypi.org/project/numpy-financial') + for name in _financial_names} + + # Filter out Cython harmless warnings + warnings.filterwarnings("ignore", message="numpy.dtype size changed") + warnings.filterwarnings("ignore", message="numpy.ufunc size changed") + warnings.filterwarnings("ignore", message="numpy.ndarray size changed") + + # oldnumeric and numarray were removed in 1.9. In case some packages import + # but do not use them, we define them here for backward compatibility. + oldnumeric = 'removed' + numarray = 'removed' + + def __getattr__(attr): + # Warn for expired attributes, and return a dummy function + # that always raises an exception. + try: + msg = __expired_functions__[attr] + except KeyError: + pass + else: + warnings.warn(msg, DeprecationWarning, stacklevel=2) + + def _expired(*args, **kwds): + raise RuntimeError(msg) + + return _expired + + # Emit warnings for deprecated attributes + try: + val, msg = __deprecated_attrs__[attr] + except KeyError: + pass + else: + warnings.warn(msg, DeprecationWarning, stacklevel=2) + return val + + # Importing Tester requires importing all of UnitTest which is not a + # cheap import Since it is mainly used in test suits, we lazy import it + # here to save on the order of 10 ms of import time for most users + # + # The previous way Tester was imported also had a side effect of adding + # the full `numpy.testing` namespace + if attr == 'testing': + import numpy.testing as testing + return testing + elif attr == 'Tester': + from .testing import Tester + return Tester + + raise AttributeError("module {!r} has no attribute " + "{!r}".format(__name__, attr)) + + def __dir__(): + return list(globals().keys() | {'Tester', 'testing'}) + + # Pytest testing + from numpy._pytesttester import PytestTester + test = PytestTester(__name__) + del PytestTester + + def _sanity_check(): + """ + Quick sanity checks for common bugs caused by environment. + There are some cases e.g. with wrong BLAS ABI that cause wrong + results under specific runtime conditions that are not necessarily + achieved during test suite runs, and it is useful to catch those early. + + See https://github.com/numpy/numpy/issues/8577 and other + similar bug reports. + + """ + try: + x = ones(2, dtype=float32) + if not abs(x.dot(x) - 2.0) < 1e-5: + raise AssertionError() + except AssertionError: + msg = ("The current Numpy installation ({!r}) fails to " + "pass simple sanity checks. This can be caused for example " + "by incorrect BLAS library being linked in, or by mixing " + "package managers (pip, conda, apt, ...). Search closed " + "numpy issues for similar problems.") + raise RuntimeError(msg.format(__file__)) from None + + _sanity_check() + del _sanity_check + + def _mac_os_check(): + """ + Quick Sanity check for Mac OS look for accelerate build bugs. + Testing numpy polyfit calls init_dgelsd(LAPACK) + """ + try: + c = array([3., 2., 1.]) + x = linspace(0, 2, 5) + y = polyval(c, x) + _ = polyfit(x, y, 2, cov=True) + except ValueError: + pass + + import sys + if sys.platform == "darwin": + with warnings.catch_warnings(record=True) as w: + _mac_os_check() + # Throw runtime error, if the test failed Check for warning and error_message + error_message = "" + if len(w) > 0: + error_message = "{}: {}".format(w[-1].category.__name__, str(w[-1].message)) + msg = ( + "Polyfit sanity test emitted a warning, most likely due " + "to using a buggy Accelerate backend." + "\nIf you compiled yourself, more information is available at:" + "\nhttps://numpy.org/doc/stable/user/building.html#accelerated-blas-lapack-libraries" + "\nOtherwise report this to the vendor " + "that provided NumPy.\n{}\n".format(error_message)) + raise RuntimeError(msg) + del _mac_os_check + + # We usually use madvise hugepages support, but on some old kernels it + # is slow and thus better avoided. + # Specifically kernel version 4.6 had a bug fix which probably fixed this: + # https://github.com/torvalds/linux/commit/7cf91a98e607c2f935dbcc177d70011e95b8faff + import os + use_hugepage = os.environ.get("NUMPY_MADVISE_HUGEPAGE", None) + if sys.platform == "linux" and use_hugepage is None: + # If there is an issue with parsing the kernel version, + # set use_hugepages to 0. Usage of LooseVersion will handle + # the kernel version parsing better, but avoided since it + # will increase the import time. See: #16679 for related discussion. + try: + use_hugepage = 1 + kernel_version = os.uname().release.split(".")[:2] + kernel_version = tuple(int(v) for v in kernel_version) + if kernel_version < (4, 6): + use_hugepage = 0 + except ValueError: + use_hugepages = 0 + elif use_hugepage is None: + # This is not Linux, so it should not matter, just enable anyway + use_hugepage = 1 + else: + use_hugepage = int(use_hugepage) + + # Note that this will currently only make a difference on Linux + core.multiarray._set_madvise_hugepage(use_hugepage) + + # Give a warning if NumPy is reloaded or imported on a sub-interpreter + # We do this from python, since the C-module may not be reloaded and + # it is tidier organized. + core.multiarray._multiarray_umath._reload_guard() + + +# get the version using versioneer +from .version import __version__, git_revision as __git_version__ diff --git a/.local/lib/python3.8/site-packages/numpy/__init__.pyi b/.local/lib/python3.8/site-packages/numpy/__init__.pyi new file mode 100644 index 0000000..55a1305 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/__init__.pyi @@ -0,0 +1,4379 @@ +import builtins +import os +import sys +import mmap +import ctypes as ct +import array as _array +import datetime as dt +import enum +from abc import abstractmethod +from types import TracebackType, MappingProxyType +from contextlib import ContextDecorator + +if sys.version_info >= (3, 9): + from types import GenericAlias + +from numpy._pytesttester import PytestTester +from numpy.core._internal import _ctypes + +from numpy.typing import ( + # Arrays + ArrayLike, + NDArray, + _SupportsArray, + _NestedSequence, + _FiniteNestedSequence, + _SupportsArray, + _ArrayLikeBool_co, + _ArrayLikeUInt_co, + _ArrayLikeInt_co, + _ArrayLikeFloat_co, + _ArrayLikeComplex_co, + _ArrayLikeNumber_co, + _ArrayLikeTD64_co, + _ArrayLikeDT64_co, + _ArrayLikeObject_co, + _ArrayLikeStr_co, + _ArrayLikeBytes_co, + + # DTypes + DTypeLike, + _SupportsDType, + _VoidDTypeLike, + + # Shapes + _Shape, + _ShapeLike, + + # Scalars + _CharLike_co, + _BoolLike_co, + _IntLike_co, + _FloatLike_co, + _ComplexLike_co, + _TD64Like_co, + _NumberLike_co, + _ScalarLike_co, + + # `number` precision + NBitBase, + _256Bit, + _128Bit, + _96Bit, + _80Bit, + _64Bit, + _32Bit, + _16Bit, + _8Bit, + _NBitByte, + _NBitShort, + _NBitIntC, + _NBitIntP, + _NBitInt, + _NBitLongLong, + _NBitHalf, + _NBitSingle, + _NBitDouble, + _NBitLongDouble, + + # Character codes + _BoolCodes, + _UInt8Codes, + _UInt16Codes, + _UInt32Codes, + _UInt64Codes, + _Int8Codes, + _Int16Codes, + _Int32Codes, + _Int64Codes, + _Float16Codes, + _Float32Codes, + _Float64Codes, + _Complex64Codes, + _Complex128Codes, + _ByteCodes, + _ShortCodes, + _IntCCodes, + _IntPCodes, + _IntCodes, + _LongLongCodes, + _UByteCodes, + _UShortCodes, + _UIntCCodes, + _UIntPCodes, + _UIntCodes, + _ULongLongCodes, + _HalfCodes, + _SingleCodes, + _DoubleCodes, + _LongDoubleCodes, + _CSingleCodes, + _CDoubleCodes, + _CLongDoubleCodes, + _DT64Codes, + _TD64Codes, + _StrCodes, + _BytesCodes, + _VoidCodes, + _ObjectCodes, + + # Ufuncs + _UFunc_Nin1_Nout1, + _UFunc_Nin2_Nout1, + _UFunc_Nin1_Nout2, + _UFunc_Nin2_Nout2, + _GUFunc_Nin2_Nout1, +) + +from numpy.typing._callable import ( + _BoolOp, + _BoolBitOp, + _BoolSub, + _BoolTrueDiv, + _BoolMod, + _BoolDivMod, + _TD64Div, + _IntTrueDiv, + _UnsignedIntOp, + _UnsignedIntBitOp, + _UnsignedIntMod, + _UnsignedIntDivMod, + _SignedIntOp, + _SignedIntBitOp, + _SignedIntMod, + _SignedIntDivMod, + _FloatOp, + _FloatMod, + _FloatDivMod, + _ComplexOp, + _NumberOp, + _ComparisonOp, +) + +# NOTE: Numpy's mypy plugin is used for removing the types unavailable +# to the specific platform +from numpy.typing._extended_precision import ( + uint128 as uint128, + uint256 as uint256, + int128 as int128, + int256 as int256, + float80 as float80, + float96 as float96, + float128 as float128, + float256 as float256, + complex160 as complex160, + complex192 as complex192, + complex256 as complex256, + complex512 as complex512, +) + +from typing import ( + Literal as L, + Any, + ByteString, + Callable, + Container, + Callable, + Dict, + Generic, + IO, + Iterable, + Iterator, + List, + Mapping, + NoReturn, + Optional, + overload, + Sequence, + Sized, + SupportsComplex, + SupportsFloat, + SupportsInt, + Text, + Tuple, + Type, + TypeVar, + Union, + Protocol, + SupportsIndex, + Final, + final, + ClassVar, + Set, +) + +# Ensures that the stubs are picked up +from numpy import ( + ctypeslib as ctypeslib, + fft as fft, + lib as lib, + linalg as linalg, + ma as ma, + matrixlib as matrixlib, + polynomial as polynomial, + random as random, + testing as testing, + version as version, +) + +from numpy.core import defchararray, records +char = defchararray +rec = records + +from numpy.core.function_base import ( + linspace as linspace, + logspace as logspace, + geomspace as geomspace, +) + +from numpy.core.fromnumeric import ( + take as take, + reshape as reshape, + choose as choose, + repeat as repeat, + put as put, + swapaxes as swapaxes, + transpose as transpose, + partition as partition, + argpartition as argpartition, + sort as sort, + argsort as argsort, + argmax as argmax, + argmin as argmin, + searchsorted as searchsorted, + resize as resize, + squeeze as squeeze, + diagonal as diagonal, + trace as trace, + ravel as ravel, + nonzero as nonzero, + shape as shape, + compress as compress, + clip as clip, + sum as sum, + all as all, + any as any, + cumsum as cumsum, + ptp as ptp, + amax as amax, + amin as amin, + prod as prod, + cumprod as cumprod, + ndim as ndim, + size as size, + around as around, + mean as mean, + std as std, + var as var, +) + +from numpy.core._asarray import ( + require as require, +) + +from numpy.core._type_aliases import ( + sctypes as sctypes, + sctypeDict as sctypeDict, +) + +from numpy.core._ufunc_config import ( + seterr as seterr, + geterr as geterr, + setbufsize as setbufsize, + getbufsize as getbufsize, + seterrcall as seterrcall, + geterrcall as geterrcall, + _ErrKind, + _ErrFunc, + _ErrDictOptional, +) + +from numpy.core.arrayprint import ( + set_printoptions as set_printoptions, + get_printoptions as get_printoptions, + array2string as array2string, + format_float_scientific as format_float_scientific, + format_float_positional as format_float_positional, + array_repr as array_repr, + array_str as array_str, + set_string_function as set_string_function, + printoptions as printoptions, +) + +from numpy.core.einsumfunc import ( + einsum as einsum, + einsum_path as einsum_path, +) + +from numpy.core.multiarray import ( + ALLOW_THREADS as ALLOW_THREADS, + BUFSIZE as BUFSIZE, + CLIP as CLIP, + MAXDIMS as MAXDIMS, + MAY_SHARE_BOUNDS as MAY_SHARE_BOUNDS, + MAY_SHARE_EXACT as MAY_SHARE_EXACT, + RAISE as RAISE, + WRAP as WRAP, + tracemalloc_domain as tracemalloc_domain, + array as array, + empty_like as empty_like, + empty as empty, + zeros as zeros, + concatenate as concatenate, + inner as inner, + where as where, + lexsort as lexsort, + can_cast as can_cast, + min_scalar_type as min_scalar_type, + result_type as result_type, + dot as dot, + vdot as vdot, + bincount as bincount, + copyto as copyto, + putmask as putmask, + packbits as packbits, + unpackbits as unpackbits, + shares_memory as shares_memory, + may_share_memory as may_share_memory, + asarray as asarray, + asanyarray as asanyarray, + ascontiguousarray as ascontiguousarray, + asfortranarray as asfortranarray, + arange as arange, + busday_count as busday_count, + busday_offset as busday_offset, + compare_chararrays as compare_chararrays, + datetime_as_string as datetime_as_string, + datetime_data as datetime_data, + frombuffer as frombuffer, + fromfile as fromfile, + fromiter as fromiter, + is_busday as is_busday, + promote_types as promote_types, + seterrobj as seterrobj, + geterrobj as geterrobj, + fromstring as fromstring, + frompyfunc as frompyfunc, + nested_iters as nested_iters, + flagsobj, +) + +from numpy.core.numeric import ( + zeros_like as zeros_like, + ones as ones, + ones_like as ones_like, + full as full, + full_like as full_like, + count_nonzero as count_nonzero, + isfortran as isfortran, + argwhere as argwhere, + flatnonzero as flatnonzero, + correlate as correlate, + convolve as convolve, + outer as outer, + tensordot as tensordot, + roll as roll, + rollaxis as rollaxis, + moveaxis as moveaxis, + cross as cross, + indices as indices, + fromfunction as fromfunction, + isscalar as isscalar, + binary_repr as binary_repr, + base_repr as base_repr, + identity as identity, + allclose as allclose, + isclose as isclose, + array_equal as array_equal, + array_equiv as array_equiv, +) + +from numpy.core.numerictypes import ( + maximum_sctype as maximum_sctype, + issctype as issctype, + obj2sctype as obj2sctype, + issubclass_ as issubclass_, + issubsctype as issubsctype, + issubdtype as issubdtype, + sctype2char as sctype2char, + find_common_type as find_common_type, + nbytes as nbytes, + cast as cast, + ScalarType as ScalarType, + typecodes as typecodes, +) + +from numpy.core.shape_base import ( + atleast_1d as atleast_1d, + atleast_2d as atleast_2d, + atleast_3d as atleast_3d, + block as block, + hstack as hstack, + stack as stack, + vstack as vstack, +) + +from numpy.lib import ( + emath as emath, +) + +from numpy.lib.arraypad import ( + pad as pad, +) + +from numpy.lib.arraysetops import ( + ediff1d as ediff1d, + intersect1d as intersect1d, + setxor1d as setxor1d, + union1d as union1d, + setdiff1d as setdiff1d, + unique as unique, + in1d as in1d, + isin as isin, +) + +from numpy.lib.arrayterator import ( + Arrayterator as Arrayterator, +) + +from numpy.lib.function_base import ( + select as select, + piecewise as piecewise, + trim_zeros as trim_zeros, + copy as copy, + iterable as iterable, + percentile as percentile, + diff as diff, + gradient as gradient, + angle as angle, + unwrap as unwrap, + sort_complex as sort_complex, + disp as disp, + flip as flip, + rot90 as rot90, + extract as extract, + place as place, + asarray_chkfinite as asarray_chkfinite, + average as average, + bincount as bincount, + digitize as digitize, + cov as cov, + corrcoef as corrcoef, + msort as msort, + median as median, + sinc as sinc, + hamming as hamming, + hanning as hanning, + bartlett as bartlett, + blackman as blackman, + kaiser as kaiser, + trapz as trapz, + i0 as i0, + add_newdoc as add_newdoc, + add_docstring as add_docstring, + meshgrid as meshgrid, + delete as delete, + insert as insert, + append as append, + interp as interp, + add_newdoc_ufunc as add_newdoc_ufunc, + quantile as quantile, +) + +from numpy.lib.histograms import ( + histogram_bin_edges as histogram_bin_edges, + histogram as histogram, + histogramdd as histogramdd, +) + +from numpy.lib.index_tricks import ( + ravel_multi_index as ravel_multi_index, + unravel_index as unravel_index, + mgrid as mgrid, + ogrid as ogrid, + r_ as r_, + c_ as c_, + s_ as s_, + index_exp as index_exp, + ix_ as ix_, + fill_diagonal as fill_diagonal, + diag_indices as diag_indices, + diag_indices_from as diag_indices_from, +) + +from numpy.lib.nanfunctions import ( + nansum as nansum, + nanmax as nanmax, + nanmin as nanmin, + nanargmax as nanargmax, + nanargmin as nanargmin, + nanmean as nanmean, + nanmedian as nanmedian, + nanpercentile as nanpercentile, + nanvar as nanvar, + nanstd as nanstd, + nanprod as nanprod, + nancumsum as nancumsum, + nancumprod as nancumprod, + nanquantile as nanquantile, +) + +from numpy.lib.npyio import ( + savetxt as savetxt, + loadtxt as loadtxt, + genfromtxt as genfromtxt, + recfromtxt as recfromtxt, + recfromcsv as recfromcsv, + load as load, + save as save, + savez as savez, + savez_compressed as savez_compressed, + packbits as packbits, + unpackbits as unpackbits, + fromregex as fromregex, +) + +from numpy.lib.polynomial import ( + poly as poly, + roots as roots, + polyint as polyint, + polyder as polyder, + polyadd as polyadd, + polysub as polysub, + polymul as polymul, + polydiv as polydiv, + polyval as polyval, + polyfit as polyfit, +) + +from numpy.lib.shape_base import ( + column_stack as column_stack, + row_stack as row_stack, + dstack as dstack, + array_split as array_split, + split as split, + hsplit as hsplit, + vsplit as vsplit, + dsplit as dsplit, + apply_over_axes as apply_over_axes, + expand_dims as expand_dims, + apply_along_axis as apply_along_axis, + kron as kron, + tile as tile, + get_array_wrap as get_array_wrap, + take_along_axis as take_along_axis, + put_along_axis as put_along_axis, +) + +from numpy.lib.stride_tricks import ( + broadcast_to as broadcast_to, + broadcast_arrays as broadcast_arrays, + broadcast_shapes as broadcast_shapes, +) + +from numpy.lib.twodim_base import ( + diag as diag, + diagflat as diagflat, + eye as eye, + fliplr as fliplr, + flipud as flipud, + tri as tri, + triu as triu, + tril as tril, + vander as vander, + histogram2d as histogram2d, + mask_indices as mask_indices, + tril_indices as tril_indices, + tril_indices_from as tril_indices_from, + triu_indices as triu_indices, + triu_indices_from as triu_indices_from, +) + +from numpy.lib.type_check import ( + mintypecode as mintypecode, + asfarray as asfarray, + real as real, + imag as imag, + iscomplex as iscomplex, + isreal as isreal, + iscomplexobj as iscomplexobj, + isrealobj as isrealobj, + nan_to_num as nan_to_num, + real_if_close as real_if_close, + typename as typename, + common_type as common_type, +) + +from numpy.lib.ufunclike import ( + fix as fix, + isposinf as isposinf, + isneginf as isneginf, +) + +from numpy.lib.utils import ( + issubclass_ as issubclass_, + issubsctype as issubsctype, + issubdtype as issubdtype, + deprecate as deprecate, + deprecate_with_doc as deprecate_with_doc, + get_include as get_include, + info as info, + source as source, + who as who, + lookfor as lookfor, + byte_bounds as byte_bounds, + safe_eval as safe_eval, +) + +from numpy.matrixlib import ( + asmatrix as asmatrix, + mat as mat, + bmat as bmat, +) + +_AnyStr_contra = TypeVar("_AnyStr_contra", str, bytes, contravariant=True) + +# Protocol for representing file-like-objects accepted +# by `ndarray.tofile` and `fromfile` +class _IOProtocol(Protocol): + def flush(self) -> object: ... + def fileno(self) -> int: ... + def tell(self) -> SupportsIndex: ... + def seek(self, offset: int, whence: int, /) -> object: ... + +# NOTE: `seek`, `write` and `flush` are technically only required +# for `readwrite`/`write` modes +class _MemMapIOProtocol(Protocol): + def flush(self) -> object: ... + def fileno(self) -> SupportsIndex: ... + def tell(self) -> int: ... + def seek(self, offset: int, whence: int, /) -> object: ... + def write(self, s: bytes, /) -> object: ... + @property + def read(self) -> object: ... + +class _SupportsWrite(Protocol[_AnyStr_contra]): + def write(self, s: _AnyStr_contra, /) -> object: ... + +__all__: List[str] +__path__: List[str] +__version__: str +__git_version__: str +test: PytestTester + +# TODO: Move placeholders to their respective module once +# their annotations are properly implemented +# +# Placeholders for classes + +# Some of these are aliases; others are wrappers with an identical signature +round = around +round_ = around +max = amax +min = amin +product = prod +cumproduct = cumprod +sometrue = any +alltrue = all + +def show_config() -> None: ... + +_NdArraySubClass = TypeVar("_NdArraySubClass", bound=ndarray) +_DTypeScalar_co = TypeVar("_DTypeScalar_co", covariant=True, bound=generic) +_ByteOrder = L["S", "<", ">", "=", "|", "L", "B", "N", "I"] + +class dtype(Generic[_DTypeScalar_co]): + names: None | Tuple[builtins.str, ...] + # Overload for subclass of generic + @overload + def __new__( + cls, + dtype: Type[_DTypeScalar_co], + align: bool = ..., + copy: bool = ..., + ) -> dtype[_DTypeScalar_co]: ... + # Overloads for string aliases, Python types, and some assorted + # other special cases. Order is sometimes important because of the + # subtype relationships + # + # bool < int < float < complex < object + # + # so we have to make sure the overloads for the narrowest type is + # first. + # Builtin types + @overload + def __new__(cls, dtype: Type[bool], align: bool = ..., copy: bool = ...) -> dtype[bool_]: ... + @overload + def __new__(cls, dtype: Type[int], align: bool = ..., copy: bool = ...) -> dtype[int_]: ... + @overload + def __new__(cls, dtype: None | Type[float], align: bool = ..., copy: bool = ...) -> dtype[float_]: ... + @overload + def __new__(cls, dtype: Type[complex], align: bool = ..., copy: bool = ...) -> dtype[complex_]: ... + @overload + def __new__(cls, dtype: Type[builtins.str], align: bool = ..., copy: bool = ...) -> dtype[str_]: ... + @overload + def __new__(cls, dtype: Type[bytes], align: bool = ..., copy: bool = ...) -> dtype[bytes_]: ... + + # `unsignedinteger` string-based representations and ctypes + @overload + def __new__(cls, dtype: _UInt8Codes | Type[ct.c_uint8], align: bool = ..., copy: bool = ...) -> dtype[uint8]: ... + @overload + def __new__(cls, dtype: _UInt16Codes | Type[ct.c_uint16], align: bool = ..., copy: bool = ...) -> dtype[uint16]: ... + @overload + def __new__(cls, dtype: _UInt32Codes | Type[ct.c_uint32], align: bool = ..., copy: bool = ...) -> dtype[uint32]: ... + @overload + def __new__(cls, dtype: _UInt64Codes | Type[ct.c_uint64], align: bool = ..., copy: bool = ...) -> dtype[uint64]: ... + @overload + def __new__(cls, dtype: _UByteCodes | Type[ct.c_ubyte], align: bool = ..., copy: bool = ...) -> dtype[ubyte]: ... + @overload + def __new__(cls, dtype: _UShortCodes | Type[ct.c_ushort], align: bool = ..., copy: bool = ...) -> dtype[ushort]: ... + @overload + def __new__(cls, dtype: _UIntCCodes | Type[ct.c_uint], align: bool = ..., copy: bool = ...) -> dtype[uintc]: ... + + # NOTE: We're assuming here that `uint_ptr_t == size_t`, + # an assumption that does not hold in rare cases (same for `ssize_t`) + @overload + def __new__(cls, dtype: _UIntPCodes | Type[ct.c_void_p] | Type[ct.c_size_t], align: bool = ..., copy: bool = ...) -> dtype[uintp]: ... + @overload + def __new__(cls, dtype: _UIntCodes | Type[ct.c_ulong], align: bool = ..., copy: bool = ...) -> dtype[uint]: ... + @overload + def __new__(cls, dtype: _ULongLongCodes | Type[ct.c_ulonglong], align: bool = ..., copy: bool = ...) -> dtype[ulonglong]: ... + + # `signedinteger` string-based representations and ctypes + @overload + def __new__(cls, dtype: _Int8Codes | Type[ct.c_int8], align: bool = ..., copy: bool = ...) -> dtype[int8]: ... + @overload + def __new__(cls, dtype: _Int16Codes | Type[ct.c_int16], align: bool = ..., copy: bool = ...) -> dtype[int16]: ... + @overload + def __new__(cls, dtype: _Int32Codes | Type[ct.c_int32], align: bool = ..., copy: bool = ...) -> dtype[int32]: ... + @overload + def __new__(cls, dtype: _Int64Codes | Type[ct.c_int64], align: bool = ..., copy: bool = ...) -> dtype[int64]: ... + @overload + def __new__(cls, dtype: _ByteCodes | Type[ct.c_byte], align: bool = ..., copy: bool = ...) -> dtype[byte]: ... + @overload + def __new__(cls, dtype: _ShortCodes | Type[ct.c_short], align: bool = ..., copy: bool = ...) -> dtype[short]: ... + @overload + def __new__(cls, dtype: _IntCCodes | Type[ct.c_int], align: bool = ..., copy: bool = ...) -> dtype[intc]: ... + @overload + def __new__(cls, dtype: _IntPCodes | Type[ct.c_ssize_t], align: bool = ..., copy: bool = ...) -> dtype[intp]: ... + @overload + def __new__(cls, dtype: _IntCodes | Type[ct.c_long], align: bool = ..., copy: bool = ...) -> dtype[int_]: ... + @overload + def __new__(cls, dtype: _LongLongCodes | Type[ct.c_longlong], align: bool = ..., copy: bool = ...) -> dtype[longlong]: ... + + # `floating` string-based representations and ctypes + @overload + def __new__(cls, dtype: _Float16Codes, align: bool = ..., copy: bool = ...) -> dtype[float16]: ... + @overload + def __new__(cls, dtype: _Float32Codes, align: bool = ..., copy: bool = ...) -> dtype[float32]: ... + @overload + def __new__(cls, dtype: _Float64Codes, align: bool = ..., copy: bool = ...) -> dtype[float64]: ... + @overload + def __new__(cls, dtype: _HalfCodes, align: bool = ..., copy: bool = ...) -> dtype[half]: ... + @overload + def __new__(cls, dtype: _SingleCodes | Type[ct.c_float], align: bool = ..., copy: bool = ...) -> dtype[single]: ... + @overload + def __new__(cls, dtype: _DoubleCodes | Type[ct.c_double], align: bool = ..., copy: bool = ...) -> dtype[double]: ... + @overload + def __new__(cls, dtype: _LongDoubleCodes | Type[ct.c_longdouble], align: bool = ..., copy: bool = ...) -> dtype[longdouble]: ... + + # `complexfloating` string-based representations + @overload + def __new__(cls, dtype: _Complex64Codes, align: bool = ..., copy: bool = ...) -> dtype[complex64]: ... + @overload + def __new__(cls, dtype: _Complex128Codes, align: bool = ..., copy: bool = ...) -> dtype[complex128]: ... + @overload + def __new__(cls, dtype: _CSingleCodes, align: bool = ..., copy: bool = ...) -> dtype[csingle]: ... + @overload + def __new__(cls, dtype: _CDoubleCodes, align: bool = ..., copy: bool = ...) -> dtype[cdouble]: ... + @overload + def __new__(cls, dtype: _CLongDoubleCodes, align: bool = ..., copy: bool = ...) -> dtype[clongdouble]: ... + + # Miscellaneous string-based representations and ctypes + @overload + def __new__(cls, dtype: _BoolCodes | Type[ct.c_bool], align: bool = ..., copy: bool = ...) -> dtype[bool_]: ... + @overload + def __new__(cls, dtype: _TD64Codes, align: bool = ..., copy: bool = ...) -> dtype[timedelta64]: ... + @overload + def __new__(cls, dtype: _DT64Codes, align: bool = ..., copy: bool = ...) -> dtype[datetime64]: ... + @overload + def __new__(cls, dtype: _StrCodes, align: bool = ..., copy: bool = ...) -> dtype[str_]: ... + @overload + def __new__(cls, dtype: _BytesCodes | Type[ct.c_char], align: bool = ..., copy: bool = ...) -> dtype[bytes_]: ... + @overload + def __new__(cls, dtype: _VoidCodes, align: bool = ..., copy: bool = ...) -> dtype[void]: ... + @overload + def __new__(cls, dtype: _ObjectCodes | Type[ct.py_object], align: bool = ..., copy: bool = ...) -> dtype[object_]: ... + + # dtype of a dtype is the same dtype + @overload + def __new__( + cls, + dtype: dtype[_DTypeScalar_co], + align: bool = ..., + copy: bool = ..., + ) -> dtype[_DTypeScalar_co]: ... + @overload + def __new__( + cls, + dtype: _SupportsDType[dtype[_DTypeScalar_co]], + align: bool = ..., + copy: bool = ..., + ) -> dtype[_DTypeScalar_co]: ... + # Handle strings that can't be expressed as literals; i.e. s1, s2, ... + @overload + def __new__( + cls, + dtype: builtins.str, + align: bool = ..., + copy: bool = ..., + ) -> dtype[Any]: ... + # Catchall overload for void-likes + @overload + def __new__( + cls, + dtype: _VoidDTypeLike, + align: bool = ..., + copy: bool = ..., + ) -> dtype[void]: ... + # Catchall overload for object-likes + @overload + def __new__( + cls, + dtype: Type[object], + align: bool = ..., + copy: bool = ..., + ) -> dtype[object_]: ... + + if sys.version_info >= (3, 9): + def __class_getitem__(self, item: Any) -> GenericAlias: ... + + @overload + def __getitem__(self: dtype[void], key: List[builtins.str]) -> dtype[void]: ... + @overload + def __getitem__(self: dtype[void], key: builtins.str | SupportsIndex) -> dtype[Any]: ... + + # NOTE: In the future 1-based multiplications will also yield `flexible` dtypes + @overload + def __mul__(self: _DType, value: L[1]) -> _DType: ... + @overload + def __mul__(self: _FlexDType, value: SupportsIndex) -> _FlexDType: ... + @overload + def __mul__(self, value: SupportsIndex) -> dtype[void]: ... + + # NOTE: `__rmul__` seems to be broken when used in combination with + # literals as of mypy 0.902. Set the return-type to `dtype[Any]` for + # now for non-flexible dtypes. + @overload + def __rmul__(self: _FlexDType, value: SupportsIndex) -> _FlexDType: ... + @overload + def __rmul__(self, value: SupportsIndex) -> dtype[Any]: ... + + def __gt__(self, other: DTypeLike) -> bool: ... + def __ge__(self, other: DTypeLike) -> bool: ... + def __lt__(self, other: DTypeLike) -> bool: ... + def __le__(self, other: DTypeLike) -> bool: ... + + # Explicitly defined `__eq__` and `__ne__` to get around mypy's + # `strict_equality` option; even though their signatures are + # identical to their `object`-based counterpart + def __eq__(self, other: Any) -> bool: ... + def __ne__(self, other: Any) -> bool: ... + + @property + def alignment(self) -> int: ... + @property + def base(self) -> dtype[Any]: ... + @property + def byteorder(self) -> builtins.str: ... + @property + def char(self) -> builtins.str: ... + @property + def descr(self) -> List[Tuple[builtins.str, builtins.str] | Tuple[builtins.str, builtins.str, _Shape]]: ... + @property + def fields( + self, + ) -> None | MappingProxyType[builtins.str, Tuple[dtype[Any], int] | Tuple[dtype[Any], int, Any]]: ... + @property + def flags(self) -> int: ... + @property + def hasobject(self) -> bool: ... + @property + def isbuiltin(self) -> int: ... + @property + def isnative(self) -> bool: ... + @property + def isalignedstruct(self) -> bool: ... + @property + def itemsize(self) -> int: ... + @property + def kind(self) -> builtins.str: ... + @property + def metadata(self) -> None | MappingProxyType[builtins.str, Any]: ... + @property + def name(self) -> builtins.str: ... + @property + def num(self) -> int: ... + @property + def shape(self) -> _Shape: ... + @property + def ndim(self) -> int: ... + @property + def subdtype(self) -> None | Tuple[dtype[Any], _Shape]: ... + def newbyteorder(self: _DType, __new_order: _ByteOrder = ...) -> _DType: ... + @property + def str(self) -> builtins.str: ... + @property + def type(self) -> Type[_DTypeScalar_co]: ... + +_ArrayLikeInt = Union[ + int, + integer, + Sequence[Union[int, integer]], + Sequence[Sequence[Any]], # TODO: wait for support for recursive types + ndarray +] + +_FlatIterSelf = TypeVar("_FlatIterSelf", bound=flatiter) + +class flatiter(Generic[_NdArraySubClass]): + @property + def base(self) -> _NdArraySubClass: ... + @property + def coords(self) -> _Shape: ... + @property + def index(self) -> int: ... + def copy(self) -> _NdArraySubClass: ... + def __iter__(self: _FlatIterSelf) -> _FlatIterSelf: ... + def __next__(self: flatiter[ndarray[Any, dtype[_ScalarType]]]) -> _ScalarType: ... + def __len__(self) -> int: ... + @overload + def __getitem__( + self: flatiter[ndarray[Any, dtype[_ScalarType]]], + key: int | integer | tuple[int | integer], + ) -> _ScalarType: ... + @overload + def __getitem__( + self, + key: _ArrayLikeInt | slice | ellipsis | tuple[_ArrayLikeInt | slice | ellipsis], + ) -> _NdArraySubClass: ... + # TODO: `__setitem__` operates via `unsafe` casting rules, and can + # thus accept any type accepted by the relevant underlying `np.generic` + # constructor. + # This means that `value` must in reality be a supertype of `npt.ArrayLike`. + def __setitem__( + self, + key: _ArrayLikeInt | slice | ellipsis | tuple[_ArrayLikeInt | slice | ellipsis], + value: Any, + ) -> None: ... + @overload + def __array__(self: flatiter[ndarray[Any, _DType]], dtype: None = ..., /) -> ndarray[Any, _DType]: ... + @overload + def __array__(self, dtype: _DType, /) -> ndarray[Any, _DType]: ... + +_OrderKACF = Optional[L["K", "A", "C", "F"]] +_OrderACF = Optional[L["A", "C", "F"]] +_OrderCF = Optional[L["C", "F"]] + +_ModeKind = L["raise", "wrap", "clip"] +_PartitionKind = L["introselect"] +_SortKind = L["quicksort", "mergesort", "heapsort", "stable"] +_SortSide = L["left", "right"] + +_ArraySelf = TypeVar("_ArraySelf", bound=_ArrayOrScalarCommon) + +class _ArrayOrScalarCommon: + @property + def T(self: _ArraySelf) -> _ArraySelf: ... + @property + def data(self) -> memoryview: ... + @property + def flags(self) -> flagsobj: ... + @property + def itemsize(self) -> int: ... + @property + def nbytes(self) -> int: ... + def __bool__(self) -> bool: ... + def __bytes__(self) -> bytes: ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + def __copy__(self: _ArraySelf) -> _ArraySelf: ... + def __deepcopy__(self: _ArraySelf, memo: None | Dict[int, Any], /) -> _ArraySelf: ... + + # TODO: How to deal with the non-commutative nature of `==` and `!=`? + # xref numpy/numpy#17368 + def __eq__(self, other: Any) -> Any: ... + def __ne__(self, other: Any) -> Any: ... + def copy(self: _ArraySelf, order: _OrderKACF = ...) -> _ArraySelf: ... + def dump(self, file: str | bytes | os.PathLike[str] | os.PathLike[bytes] | _SupportsWrite[bytes]) -> None: ... + def dumps(self) -> bytes: ... + def tobytes(self, order: _OrderKACF = ...) -> bytes: ... + # NOTE: `tostring()` is deprecated and therefore excluded + # def tostring(self, order=...): ... + def tofile( + self, + fid: str | bytes | os.PathLike[str] | os.PathLike[bytes] | _IOProtocol, + sep: str = ..., + format: str = ..., + ) -> None: ... + # generics and 0d arrays return builtin scalars + def tolist(self) -> Any: ... + + @property + def __array_interface__(self) -> Dict[str, Any]: ... + @property + def __array_priority__(self) -> float: ... + @property + def __array_struct__(self) -> Any: ... # builtins.PyCapsule + def __setstate__(self, state: Tuple[ + SupportsIndex, # version + _ShapeLike, # Shape + _DType_co, # DType + bool, # F-continuous + bytes | List[Any], # Data + ], /) -> None: ... + # a `bool_` is returned when `keepdims=True` and `self` is a 0d array + + @overload + def all( + self, + axis: None = ..., + out: None = ..., + keepdims: L[False] = ..., + *, + where: _ArrayLikeBool_co = ..., + ) -> bool_: ... + @overload + def all( + self, + axis: Optional[_ShapeLike] = ..., + out: None = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., + ) -> Any: ... + @overload + def all( + self, + axis: Optional[_ShapeLike] = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., + ) -> _NdArraySubClass: ... + + @overload + def any( + self, + axis: None = ..., + out: None = ..., + keepdims: L[False] = ..., + *, + where: _ArrayLikeBool_co = ..., + ) -> bool_: ... + @overload + def any( + self, + axis: Optional[_ShapeLike] = ..., + out: None = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., + ) -> Any: ... + @overload + def any( + self, + axis: Optional[_ShapeLike] = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., + ) -> _NdArraySubClass: ... + + @overload + def argmax( + self, + axis: None = ..., + out: None = ..., + *, + keepdims: L[False] = ..., + ) -> intp: ... + @overload + def argmax( + self, + axis: _ShapeLike = ..., + out: None = ..., + *, + keepdims: bool = ..., + ) -> Any: ... + @overload + def argmax( + self, + axis: Optional[_ShapeLike] = ..., + out: _NdArraySubClass = ..., + *, + keepdims: bool = ..., + ) -> _NdArraySubClass: ... + + @overload + def argmin( + self, + axis: None = ..., + out: None = ..., + *, + keepdims: L[False] = ..., + ) -> intp: ... + @overload + def argmin( + self, + axis: _ShapeLike = ..., + out: None = ..., + *, + keepdims: bool = ..., + ) -> Any: ... + @overload + def argmin( + self, + axis: Optional[_ShapeLike] = ..., + out: _NdArraySubClass = ..., + *, + keepdims: bool = ..., + ) -> _NdArraySubClass: ... + + def argsort( + self, + axis: Optional[SupportsIndex] = ..., + kind: Optional[_SortKind] = ..., + order: Union[None, str, Sequence[str]] = ..., + ) -> ndarray: ... + + @overload + def choose( + self, + choices: ArrayLike, + out: None = ..., + mode: _ModeKind = ..., + ) -> ndarray: ... + @overload + def choose( + self, + choices: ArrayLike, + out: _NdArraySubClass = ..., + mode: _ModeKind = ..., + ) -> _NdArraySubClass: ... + + @overload + def clip( + self, + min: ArrayLike = ..., + max: Optional[ArrayLike] = ..., + out: None = ..., + **kwargs: Any, + ) -> ndarray: ... + @overload + def clip( + self, + min: None = ..., + max: ArrayLike = ..., + out: None = ..., + **kwargs: Any, + ) -> ndarray: ... + @overload + def clip( + self, + min: ArrayLike = ..., + max: Optional[ArrayLike] = ..., + out: _NdArraySubClass = ..., + **kwargs: Any, + ) -> _NdArraySubClass: ... + @overload + def clip( + self, + min: None = ..., + max: ArrayLike = ..., + out: _NdArraySubClass = ..., + **kwargs: Any, + ) -> _NdArraySubClass: ... + + @overload + def compress( + self, + a: ArrayLike, + axis: Optional[SupportsIndex] = ..., + out: None = ..., + ) -> ndarray: ... + @overload + def compress( + self, + a: ArrayLike, + axis: Optional[SupportsIndex] = ..., + out: _NdArraySubClass = ..., + ) -> _NdArraySubClass: ... + + def conj(self: _ArraySelf) -> _ArraySelf: ... + + def conjugate(self: _ArraySelf) -> _ArraySelf: ... + + @overload + def cumprod( + self, + axis: Optional[SupportsIndex] = ..., + dtype: DTypeLike = ..., + out: None = ..., + ) -> ndarray: ... + @overload + def cumprod( + self, + axis: Optional[SupportsIndex] = ..., + dtype: DTypeLike = ..., + out: _NdArraySubClass = ..., + ) -> _NdArraySubClass: ... + + @overload + def cumsum( + self, + axis: Optional[SupportsIndex] = ..., + dtype: DTypeLike = ..., + out: None = ..., + ) -> ndarray: ... + @overload + def cumsum( + self, + axis: Optional[SupportsIndex] = ..., + dtype: DTypeLike = ..., + out: _NdArraySubClass = ..., + ) -> _NdArraySubClass: ... + + @overload + def max( + self, + axis: Optional[_ShapeLike] = ..., + out: None = ..., + keepdims: bool = ..., + initial: _NumberLike_co = ..., + where: _ArrayLikeBool_co = ..., + ) -> Any: ... + @overload + def max( + self, + axis: Optional[_ShapeLike] = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + initial: _NumberLike_co = ..., + where: _ArrayLikeBool_co = ..., + ) -> _NdArraySubClass: ... + + @overload + def mean( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DTypeLike = ..., + out: None = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., + ) -> Any: ... + @overload + def mean( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DTypeLike = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., + ) -> _NdArraySubClass: ... + + @overload + def min( + self, + axis: Optional[_ShapeLike] = ..., + out: None = ..., + keepdims: bool = ..., + initial: _NumberLike_co = ..., + where: _ArrayLikeBool_co = ..., + ) -> Any: ... + @overload + def min( + self, + axis: Optional[_ShapeLike] = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + initial: _NumberLike_co = ..., + where: _ArrayLikeBool_co = ..., + ) -> _NdArraySubClass: ... + + def newbyteorder( + self: _ArraySelf, + __new_order: _ByteOrder = ..., + ) -> _ArraySelf: ... + + @overload + def prod( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DTypeLike = ..., + out: None = ..., + keepdims: bool = ..., + initial: _NumberLike_co = ..., + where: _ArrayLikeBool_co = ..., + ) -> Any: ... + @overload + def prod( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DTypeLike = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + initial: _NumberLike_co = ..., + where: _ArrayLikeBool_co = ..., + ) -> _NdArraySubClass: ... + + @overload + def ptp( + self, + axis: Optional[_ShapeLike] = ..., + out: None = ..., + keepdims: bool = ..., + ) -> Any: ... + @overload + def ptp( + self, + axis: Optional[_ShapeLike] = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + ) -> _NdArraySubClass: ... + + @overload + def round( + self: _ArraySelf, + decimals: SupportsIndex = ..., + out: None = ..., + ) -> _ArraySelf: ... + @overload + def round( + self, + decimals: SupportsIndex = ..., + out: _NdArraySubClass = ..., + ) -> _NdArraySubClass: ... + + @overload + def std( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DTypeLike = ..., + out: None = ..., + ddof: int = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., + ) -> Any: ... + @overload + def std( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DTypeLike = ..., + out: _NdArraySubClass = ..., + ddof: int = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., + ) -> _NdArraySubClass: ... + + @overload + def sum( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DTypeLike = ..., + out: None = ..., + keepdims: bool = ..., + initial: _NumberLike_co = ..., + where: _ArrayLikeBool_co = ..., + ) -> Any: ... + @overload + def sum( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DTypeLike = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + initial: _NumberLike_co = ..., + where: _ArrayLikeBool_co = ..., + ) -> _NdArraySubClass: ... + + @overload + def var( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DTypeLike = ..., + out: None = ..., + ddof: int = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., + ) -> Any: ... + @overload + def var( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DTypeLike = ..., + out: _NdArraySubClass = ..., + ddof: int = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., + ) -> _NdArraySubClass: ... + +_DType = TypeVar("_DType", bound=dtype[Any]) +_DType_co = TypeVar("_DType_co", covariant=True, bound=dtype[Any]) +_FlexDType = TypeVar("_FlexDType", bound=dtype[flexible]) + +# TODO: Set the `bound` to something more suitable once we +# have proper shape support +_ShapeType = TypeVar("_ShapeType", bound=Any) +_ShapeType2 = TypeVar("_ShapeType2", bound=Any) +_NumberType = TypeVar("_NumberType", bound=number[Any]) + +# There is currently no exhaustive way to type the buffer protocol, +# as it is implemented exclusivelly in the C API (python/typing#593) +_SupportsBuffer = Union[ + bytes, + bytearray, + memoryview, + _array.array[Any], + mmap.mmap, + NDArray[Any], + generic, +] + +_T = TypeVar("_T") +_T_co = TypeVar("_T_co", covariant=True) +_T_contra = TypeVar("_T_contra", contravariant=True) +_2Tuple = Tuple[_T, _T] +_CastingKind = L["no", "equiv", "safe", "same_kind", "unsafe"] + +_DTypeLike = Union[ + dtype[_ScalarType], + Type[_ScalarType], + _SupportsDType[dtype[_ScalarType]], +] + +_ArrayUInt_co = NDArray[Union[bool_, unsignedinteger[Any]]] +_ArrayInt_co = NDArray[Union[bool_, integer[Any]]] +_ArrayFloat_co = NDArray[Union[bool_, integer[Any], floating[Any]]] +_ArrayComplex_co = NDArray[Union[bool_, integer[Any], floating[Any], complexfloating[Any, Any]]] +_ArrayNumber_co = NDArray[Union[bool_, number[Any]]] +_ArrayTD64_co = NDArray[Union[bool_, integer[Any], timedelta64]] + +# Introduce an alias for `dtype` to avoid naming conflicts. +_dtype = dtype + +# `builtins.PyCapsule` unfortunately lacks annotations as of the moment; +# use `Any` as a stopgap measure +_PyCapsule = Any + +class _SupportsItem(Protocol[_T_co]): + def item(self, args: Any, /) -> _T_co: ... + +class _SupportsReal(Protocol[_T_co]): + @property + def real(self) -> _T_co: ... + +class _SupportsImag(Protocol[_T_co]): + @property + def imag(self) -> _T_co: ... + +class ndarray(_ArrayOrScalarCommon, Generic[_ShapeType, _DType_co]): + @property + def base(self) -> Optional[ndarray]: ... + @property + def ndim(self) -> int: ... + @property + def size(self) -> int: ... + @property + def real( + self: ndarray[_ShapeType, dtype[_SupportsReal[_ScalarType]]], # type: ignore[type-var] + ) -> ndarray[_ShapeType, _dtype[_ScalarType]]: ... + @real.setter + def real(self, value: ArrayLike) -> None: ... + @property + def imag( + self: ndarray[_ShapeType, dtype[_SupportsImag[_ScalarType]]], # type: ignore[type-var] + ) -> ndarray[_ShapeType, _dtype[_ScalarType]]: ... + @imag.setter + def imag(self, value: ArrayLike) -> None: ... + def __new__( + cls: Type[_ArraySelf], + shape: _ShapeLike, + dtype: DTypeLike = ..., + buffer: None | _SupportsBuffer = ..., + offset: SupportsIndex = ..., + strides: None | _ShapeLike = ..., + order: _OrderKACF = ..., + ) -> _ArraySelf: ... + + if sys.version_info >= (3, 9): + def __class_getitem__(self, item: Any) -> GenericAlias: ... + + @overload + def __array__(self, dtype: None = ..., /) -> ndarray[Any, _DType_co]: ... + @overload + def __array__(self, dtype: _DType, /) -> ndarray[Any, _DType]: ... + + def __array_ufunc__( + self, + ufunc: ufunc, + method: L["__call__", "reduce", "reduceat", "accumulate", "outer", "inner"], + *inputs: Any, + **kwargs: Any, + ) -> Any: ... + + def __array_function__( + self, + func: Callable[..., Any], + types: Iterable[type], + args: Iterable[Any], + kwargs: Mapping[str, Any], + ) -> Any: ... + + __array_finalize__: Any + + def __array_wrap__( + self, + array: ndarray[_ShapeType2, _DType], + context: None | Tuple[ufunc, Tuple[Any, ...], int] = ..., + /, + ) -> ndarray[_ShapeType2, _DType]: ... + + def __array_prepare__( + self, + array: ndarray[_ShapeType2, _DType], + context: None | Tuple[ufunc, Tuple[Any, ...], int] = ..., + /, + ) -> ndarray[_ShapeType2, _DType]: ... + + @overload + def __getitem__(self, key: Union[ + SupportsIndex, + _ArrayLikeInt_co, + Tuple[SupportsIndex | _ArrayLikeInt_co, ...], + ]) -> Any: ... + @overload + def __getitem__(self, key: Union[ + None, + slice, + ellipsis, + SupportsIndex, + _ArrayLikeInt_co, + Tuple[None | slice | ellipsis | _ArrayLikeInt_co | SupportsIndex, ...], + ]) -> ndarray[Any, _DType_co]: ... + @overload + def __getitem__(self: NDArray[void], key: str) -> NDArray[Any]: ... + @overload + def __getitem__(self: NDArray[void], key: list[str]) -> ndarray[_ShapeType, _dtype[void]]: ... + + @property + def ctypes(self) -> _ctypes[int]: ... + @property + def shape(self) -> _Shape: ... + @shape.setter + def shape(self, value: _ShapeLike) -> None: ... + @property + def strides(self) -> _Shape: ... + @strides.setter + def strides(self, value: _ShapeLike) -> None: ... + def byteswap(self: _ArraySelf, inplace: bool = ...) -> _ArraySelf: ... + def fill(self, value: Any) -> None: ... + @property + def flat(self: _NdArraySubClass) -> flatiter[_NdArraySubClass]: ... + + # Use the same output type as that of the underlying `generic` + @overload + def item( + self: ndarray[Any, _dtype[_SupportsItem[_T]]], # type: ignore[type-var] + *args: SupportsIndex, + ) -> _T: ... + @overload + def item( + self: ndarray[Any, _dtype[_SupportsItem[_T]]], # type: ignore[type-var] + args: Tuple[SupportsIndex, ...], + /, + ) -> _T: ... + + @overload + def itemset(self, value: Any, /) -> None: ... + @overload + def itemset(self, item: _ShapeLike, value: Any, /) -> None: ... + + @overload + def resize(self, new_shape: _ShapeLike, /, *, refcheck: bool = ...) -> None: ... + @overload + def resize(self, *new_shape: SupportsIndex, refcheck: bool = ...) -> None: ... + + def setflags( + self, write: bool = ..., align: bool = ..., uic: bool = ... + ) -> None: ... + + def squeeze( + self, + axis: Union[SupportsIndex, Tuple[SupportsIndex, ...]] = ..., + ) -> ndarray[Any, _DType_co]: ... + + def swapaxes( + self, + axis1: SupportsIndex, + axis2: SupportsIndex, + ) -> ndarray[Any, _DType_co]: ... + + @overload + def transpose(self: _ArraySelf, axes: _ShapeLike, /) -> _ArraySelf: ... + @overload + def transpose(self: _ArraySelf, *axes: SupportsIndex) -> _ArraySelf: ... + + def argpartition( + self, + kth: _ArrayLikeInt_co, + axis: Optional[SupportsIndex] = ..., + kind: _PartitionKind = ..., + order: Union[None, str, Sequence[str]] = ..., + ) -> ndarray[Any, _dtype[intp]]: ... + + def diagonal( + self, + offset: SupportsIndex = ..., + axis1: SupportsIndex = ..., + axis2: SupportsIndex = ..., + ) -> ndarray[Any, _DType_co]: ... + + # 1D + 1D returns a scalar; + # all other with at least 1 non-0D array return an ndarray. + @overload + def dot(self, b: _ScalarLike_co, out: None = ...) -> ndarray: ... + @overload + def dot(self, b: ArrayLike, out: None = ...) -> Any: ... # type: ignore[misc] + @overload + def dot(self, b: ArrayLike, out: _NdArraySubClass) -> _NdArraySubClass: ... + + # `nonzero()` is deprecated for 0d arrays/generics + def nonzero(self) -> Tuple[ndarray[Any, _dtype[intp]], ...]: ... + + def partition( + self, + kth: _ArrayLikeInt_co, + axis: SupportsIndex = ..., + kind: _PartitionKind = ..., + order: Union[None, str, Sequence[str]] = ..., + ) -> None: ... + + # `put` is technically available to `generic`, + # but is pointless as `generic`s are immutable + def put( + self, + ind: _ArrayLikeInt_co, + v: ArrayLike, + mode: _ModeKind = ..., + ) -> None: ... + + @overload + def searchsorted( # type: ignore[misc] + self, # >= 1D array + v: _ScalarLike_co, # 0D array-like + side: _SortSide = ..., + sorter: Optional[_ArrayLikeInt_co] = ..., + ) -> intp: ... + @overload + def searchsorted( + self, # >= 1D array + v: ArrayLike, + side: _SortSide = ..., + sorter: Optional[_ArrayLikeInt_co] = ..., + ) -> ndarray[Any, _dtype[intp]]: ... + + def setfield( + self, + val: ArrayLike, + dtype: DTypeLike, + offset: SupportsIndex = ..., + ) -> None: ... + + def sort( + self, + axis: SupportsIndex = ..., + kind: Optional[_SortKind] = ..., + order: Union[None, str, Sequence[str]] = ..., + ) -> None: ... + + @overload + def trace( + self, # >= 2D array + offset: SupportsIndex = ..., + axis1: SupportsIndex = ..., + axis2: SupportsIndex = ..., + dtype: DTypeLike = ..., + out: None = ..., + ) -> Any: ... + @overload + def trace( + self, # >= 2D array + offset: SupportsIndex = ..., + axis1: SupportsIndex = ..., + axis2: SupportsIndex = ..., + dtype: DTypeLike = ..., + out: _NdArraySubClass = ..., + ) -> _NdArraySubClass: ... + + @overload + def take( # type: ignore[misc] + self: ndarray[Any, _dtype[_ScalarType]], + indices: _IntLike_co, + axis: Optional[SupportsIndex] = ..., + out: None = ..., + mode: _ModeKind = ..., + ) -> _ScalarType: ... + @overload + def take( # type: ignore[misc] + self, + indices: _ArrayLikeInt_co, + axis: Optional[SupportsIndex] = ..., + out: None = ..., + mode: _ModeKind = ..., + ) -> ndarray[Any, _DType_co]: ... + @overload + def take( + self, + indices: _ArrayLikeInt_co, + axis: Optional[SupportsIndex] = ..., + out: _NdArraySubClass = ..., + mode: _ModeKind = ..., + ) -> _NdArraySubClass: ... + + def repeat( + self, + repeats: _ArrayLikeInt_co, + axis: Optional[SupportsIndex] = ..., + ) -> ndarray[Any, _DType_co]: ... + + def flatten( + self, + order: _OrderKACF = ..., + ) -> ndarray[Any, _DType_co]: ... + + def ravel( + self, + order: _OrderKACF = ..., + ) -> ndarray[Any, _DType_co]: ... + + @overload + def reshape( + self, shape: _ShapeLike, /, *, order: _OrderACF = ... + ) -> ndarray[Any, _DType_co]: ... + @overload + def reshape( + self, *shape: SupportsIndex, order: _OrderACF = ... + ) -> ndarray[Any, _DType_co]: ... + + @overload + def astype( + self, + dtype: _DTypeLike[_ScalarType], + order: _OrderKACF = ..., + casting: _CastingKind = ..., + subok: bool = ..., + copy: bool | _CopyMode = ..., + ) -> NDArray[_ScalarType]: ... + @overload + def astype( + self, + dtype: DTypeLike, + order: _OrderKACF = ..., + casting: _CastingKind = ..., + subok: bool = ..., + copy: bool | _CopyMode = ..., + ) -> NDArray[Any]: ... + + @overload + def view(self: _ArraySelf) -> _ArraySelf: ... + @overload + def view(self, type: Type[_NdArraySubClass]) -> _NdArraySubClass: ... + @overload + def view(self, dtype: _DTypeLike[_ScalarType]) -> NDArray[_ScalarType]: ... + @overload + def view(self, dtype: DTypeLike) -> NDArray[Any]: ... + @overload + def view( + self, + dtype: DTypeLike, + type: Type[_NdArraySubClass], + ) -> _NdArraySubClass: ... + + @overload + def getfield( + self, + dtype: _DTypeLike[_ScalarType], + offset: SupportsIndex = ... + ) -> NDArray[_ScalarType]: ... + @overload + def getfield( + self, + dtype: DTypeLike, + offset: SupportsIndex = ... + ) -> NDArray[Any]: ... + + # Dispatch to the underlying `generic` via protocols + def __int__( + self: ndarray[Any, _dtype[SupportsInt]], # type: ignore[type-var] + ) -> int: ... + + def __float__( + self: ndarray[Any, _dtype[SupportsFloat]], # type: ignore[type-var] + ) -> float: ... + + def __complex__( + self: ndarray[Any, _dtype[SupportsComplex]], # type: ignore[type-var] + ) -> complex: ... + + def __index__( + self: ndarray[Any, _dtype[SupportsIndex]], # type: ignore[type-var] + ) -> int: ... + + def __len__(self) -> int: ... + def __setitem__(self, key, value): ... + def __iter__(self) -> Any: ... + def __contains__(self, key) -> bool: ... + + # The last overload is for catching recursive objects whose + # nesting is too deep. + # The first overload is for catching `bytes` (as they are a subtype of + # `Sequence[int]`) and `str`. As `str` is a recursive sequence of + # strings, it will pass through the final overload otherwise + + @overload + def __lt__(self: _ArrayNumber_co, other: _ArrayLikeNumber_co) -> NDArray[bool_]: ... + @overload + def __lt__(self: _ArrayTD64_co, other: _ArrayLikeTD64_co) -> NDArray[bool_]: ... + @overload + def __lt__(self: NDArray[datetime64], other: _ArrayLikeDT64_co) -> NDArray[bool_]: ... + @overload + def __lt__(self: NDArray[object_], other: Any) -> NDArray[bool_]: ... + @overload + def __lt__(self: NDArray[Any], other: _ArrayLikeObject_co) -> NDArray[bool_]: ... + + @overload + def __le__(self: _ArrayNumber_co, other: _ArrayLikeNumber_co) -> NDArray[bool_]: ... + @overload + def __le__(self: _ArrayTD64_co, other: _ArrayLikeTD64_co) -> NDArray[bool_]: ... + @overload + def __le__(self: NDArray[datetime64], other: _ArrayLikeDT64_co) -> NDArray[bool_]: ... + @overload + def __le__(self: NDArray[object_], other: Any) -> NDArray[bool_]: ... + @overload + def __le__(self: NDArray[Any], other: _ArrayLikeObject_co) -> NDArray[bool_]: ... + + @overload + def __gt__(self: _ArrayNumber_co, other: _ArrayLikeNumber_co) -> NDArray[bool_]: ... + @overload + def __gt__(self: _ArrayTD64_co, other: _ArrayLikeTD64_co) -> NDArray[bool_]: ... + @overload + def __gt__(self: NDArray[datetime64], other: _ArrayLikeDT64_co) -> NDArray[bool_]: ... + @overload + def __gt__(self: NDArray[object_], other: Any) -> NDArray[bool_]: ... + @overload + def __gt__(self: NDArray[Any], other: _ArrayLikeObject_co) -> NDArray[bool_]: ... + + @overload + def __ge__(self: _ArrayNumber_co, other: _ArrayLikeNumber_co) -> NDArray[bool_]: ... + @overload + def __ge__(self: _ArrayTD64_co, other: _ArrayLikeTD64_co) -> NDArray[bool_]: ... + @overload + def __ge__(self: NDArray[datetime64], other: _ArrayLikeDT64_co) -> NDArray[bool_]: ... + @overload + def __ge__(self: NDArray[object_], other: Any) -> NDArray[bool_]: ... + @overload + def __ge__(self: NDArray[Any], other: _ArrayLikeObject_co) -> NDArray[bool_]: ... + + # Unary ops + @overload + def __abs__(self: NDArray[bool_]) -> NDArray[bool_]: ... + @overload + def __abs__(self: NDArray[complexfloating[_NBit1, _NBit1]]) -> NDArray[floating[_NBit1]]: ... + @overload + def __abs__(self: NDArray[_NumberType]) -> NDArray[_NumberType]: ... + @overload + def __abs__(self: NDArray[timedelta64]) -> NDArray[timedelta64]: ... + @overload + def __abs__(self: NDArray[object_]) -> Any: ... + + @overload + def __invert__(self: NDArray[bool_]) -> NDArray[bool_]: ... + @overload + def __invert__(self: NDArray[_IntType]) -> NDArray[_IntType]: ... + @overload + def __invert__(self: NDArray[object_]) -> Any: ... + + @overload + def __pos__(self: NDArray[_NumberType]) -> NDArray[_NumberType]: ... + @overload + def __pos__(self: NDArray[timedelta64]) -> NDArray[timedelta64]: ... + @overload + def __pos__(self: NDArray[object_]) -> Any: ... + + @overload + def __neg__(self: NDArray[_NumberType]) -> NDArray[_NumberType]: ... + @overload + def __neg__(self: NDArray[timedelta64]) -> NDArray[timedelta64]: ... + @overload + def __neg__(self: NDArray[object_]) -> Any: ... + + # Binary ops + # NOTE: `ndarray` does not implement `__imatmul__` + @overload + def __matmul__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... # type: ignore[misc] + @overload + def __matmul__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __matmul__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] + @overload + def __matmul__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __matmul__(self: _ArrayComplex_co, other: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... + @overload + def __matmul__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __matmul__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __rmatmul__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... # type: ignore[misc] + @overload + def __rmatmul__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rmatmul__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rmatmul__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __rmatmul__(self: _ArrayComplex_co, other: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... + @overload + def __rmatmul__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __rmatmul__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __mod__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[int8]: ... # type: ignore[misc] + @overload + def __mod__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __mod__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] + @overload + def __mod__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __mod__(self: _ArrayTD64_co, other: _SupportsArray[_dtype[timedelta64]] | _NestedSequence[_SupportsArray[_dtype[timedelta64]]]) -> NDArray[timedelta64]: ... + @overload + def __mod__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __mod__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __rmod__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[int8]: ... # type: ignore[misc] + @overload + def __rmod__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rmod__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rmod__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __rmod__(self: _ArrayTD64_co, other: _SupportsArray[_dtype[timedelta64]] | _NestedSequence[_SupportsArray[_dtype[timedelta64]]]) -> NDArray[timedelta64]: ... + @overload + def __rmod__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __rmod__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __divmod__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> _2Tuple[NDArray[int8]]: ... # type: ignore[misc] + @overload + def __divmod__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> _2Tuple[NDArray[unsignedinteger[Any]]]: ... # type: ignore[misc] + @overload + def __divmod__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> _2Tuple[NDArray[signedinteger[Any]]]: ... # type: ignore[misc] + @overload + def __divmod__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> _2Tuple[NDArray[floating[Any]]]: ... # type: ignore[misc] + @overload + def __divmod__(self: _ArrayTD64_co, other: _SupportsArray[_dtype[timedelta64]] | _NestedSequence[_SupportsArray[_dtype[timedelta64]]]) -> Tuple[NDArray[int64], NDArray[timedelta64]]: ... + + @overload + def __rdivmod__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> _2Tuple[NDArray[int8]]: ... # type: ignore[misc] + @overload + def __rdivmod__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> _2Tuple[NDArray[unsignedinteger[Any]]]: ... # type: ignore[misc] + @overload + def __rdivmod__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> _2Tuple[NDArray[signedinteger[Any]]]: ... # type: ignore[misc] + @overload + def __rdivmod__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> _2Tuple[NDArray[floating[Any]]]: ... # type: ignore[misc] + @overload + def __rdivmod__(self: _ArrayTD64_co, other: _SupportsArray[_dtype[timedelta64]] | _NestedSequence[_SupportsArray[_dtype[timedelta64]]]) -> Tuple[NDArray[int64], NDArray[timedelta64]]: ... + + @overload + def __add__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... # type: ignore[misc] + @overload + def __add__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __add__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] + @overload + def __add__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __add__(self: _ArrayComplex_co, other: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... # type: ignore[misc] + @overload + def __add__(self: _ArrayTD64_co, other: _ArrayLikeTD64_co) -> NDArray[timedelta64]: ... # type: ignore[misc] + @overload + def __add__(self: _ArrayTD64_co, other: _ArrayLikeDT64_co) -> NDArray[datetime64]: ... + @overload + def __add__(self: NDArray[datetime64], other: _ArrayLikeTD64_co) -> NDArray[datetime64]: ... + @overload + def __add__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __add__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __radd__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... # type: ignore[misc] + @overload + def __radd__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __radd__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] + @overload + def __radd__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __radd__(self: _ArrayComplex_co, other: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... # type: ignore[misc] + @overload + def __radd__(self: _ArrayTD64_co, other: _ArrayLikeTD64_co) -> NDArray[timedelta64]: ... # type: ignore[misc] + @overload + def __radd__(self: _ArrayTD64_co, other: _ArrayLikeDT64_co) -> NDArray[datetime64]: ... + @overload + def __radd__(self: NDArray[datetime64], other: _ArrayLikeTD64_co) -> NDArray[datetime64]: ... + @overload + def __radd__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __radd__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __sub__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NoReturn: ... + @overload + def __sub__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __sub__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] + @overload + def __sub__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __sub__(self: _ArrayComplex_co, other: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... # type: ignore[misc] + @overload + def __sub__(self: _ArrayTD64_co, other: _ArrayLikeTD64_co) -> NDArray[timedelta64]: ... # type: ignore[misc] + @overload + def __sub__(self: NDArray[datetime64], other: _ArrayLikeTD64_co) -> NDArray[datetime64]: ... + @overload + def __sub__(self: NDArray[datetime64], other: _ArrayLikeDT64_co) -> NDArray[timedelta64]: ... + @overload + def __sub__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __sub__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __rsub__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NoReturn: ... + @overload + def __rsub__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rsub__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rsub__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __rsub__(self: _ArrayComplex_co, other: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... # type: ignore[misc] + @overload + def __rsub__(self: _ArrayTD64_co, other: _ArrayLikeTD64_co) -> NDArray[timedelta64]: ... # type: ignore[misc] + @overload + def __rsub__(self: _ArrayTD64_co, other: _ArrayLikeDT64_co) -> NDArray[datetime64]: ... # type: ignore[misc] + @overload + def __rsub__(self: NDArray[datetime64], other: _ArrayLikeDT64_co) -> NDArray[timedelta64]: ... + @overload + def __rsub__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __rsub__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __mul__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... # type: ignore[misc] + @overload + def __mul__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __mul__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] + @overload + def __mul__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __mul__(self: _ArrayComplex_co, other: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... # type: ignore[misc] + @overload + def __mul__(self: _ArrayTD64_co, other: _ArrayLikeFloat_co) -> NDArray[timedelta64]: ... + @overload + def __mul__(self: _ArrayFloat_co, other: _ArrayLikeTD64_co) -> NDArray[timedelta64]: ... + @overload + def __mul__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __mul__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __rmul__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... # type: ignore[misc] + @overload + def __rmul__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rmul__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rmul__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __rmul__(self: _ArrayComplex_co, other: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... # type: ignore[misc] + @overload + def __rmul__(self: _ArrayTD64_co, other: _ArrayLikeFloat_co) -> NDArray[timedelta64]: ... + @overload + def __rmul__(self: _ArrayFloat_co, other: _ArrayLikeTD64_co) -> NDArray[timedelta64]: ... + @overload + def __rmul__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __rmul__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __floordiv__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[int8]: ... # type: ignore[misc] + @overload + def __floordiv__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __floordiv__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] + @overload + def __floordiv__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __floordiv__(self: NDArray[timedelta64], other: _SupportsArray[_dtype[timedelta64]] | _NestedSequence[_SupportsArray[_dtype[timedelta64]]]) -> NDArray[int64]: ... + @overload + def __floordiv__(self: NDArray[timedelta64], other: _ArrayLikeBool_co) -> NoReturn: ... + @overload + def __floordiv__(self: NDArray[timedelta64], other: _ArrayLikeFloat_co) -> NDArray[timedelta64]: ... + @overload + def __floordiv__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __floordiv__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __rfloordiv__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[int8]: ... # type: ignore[misc] + @overload + def __rfloordiv__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rfloordiv__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rfloordiv__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __rfloordiv__(self: NDArray[timedelta64], other: _SupportsArray[_dtype[timedelta64]] | _NestedSequence[_SupportsArray[_dtype[timedelta64]]]) -> NDArray[int64]: ... + @overload + def __rfloordiv__(self: NDArray[bool_], other: _ArrayLikeTD64_co) -> NoReturn: ... + @overload + def __rfloordiv__(self: _ArrayFloat_co, other: _ArrayLikeTD64_co) -> NDArray[timedelta64]: ... + @overload + def __rfloordiv__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __rfloordiv__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __pow__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[int8]: ... # type: ignore[misc] + @overload + def __pow__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __pow__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] + @overload + def __pow__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __pow__(self: _ArrayComplex_co, other: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... + @overload + def __pow__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __pow__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __rpow__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[int8]: ... # type: ignore[misc] + @overload + def __rpow__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rpow__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rpow__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __rpow__(self: _ArrayComplex_co, other: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... + @overload + def __rpow__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __rpow__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __truediv__(self: _ArrayInt_co, other: _ArrayInt_co) -> NDArray[float64]: ... # type: ignore[misc] + @overload + def __truediv__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __truediv__(self: _ArrayComplex_co, other: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... # type: ignore[misc] + @overload + def __truediv__(self: NDArray[timedelta64], other: _SupportsArray[_dtype[timedelta64]] | _NestedSequence[_SupportsArray[_dtype[timedelta64]]]) -> NDArray[float64]: ... + @overload + def __truediv__(self: NDArray[timedelta64], other: _ArrayLikeBool_co) -> NoReturn: ... + @overload + def __truediv__(self: NDArray[timedelta64], other: _ArrayLikeFloat_co) -> NDArray[timedelta64]: ... + @overload + def __truediv__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __truediv__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __rtruediv__(self: _ArrayInt_co, other: _ArrayInt_co) -> NDArray[float64]: ... # type: ignore[misc] + @overload + def __rtruediv__(self: _ArrayFloat_co, other: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] + @overload + def __rtruediv__(self: _ArrayComplex_co, other: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... # type: ignore[misc] + @overload + def __rtruediv__(self: NDArray[timedelta64], other: _SupportsArray[_dtype[timedelta64]] | _NestedSequence[_SupportsArray[_dtype[timedelta64]]]) -> NDArray[float64]: ... + @overload + def __rtruediv__(self: NDArray[bool_], other: _ArrayLikeTD64_co) -> NoReturn: ... + @overload + def __rtruediv__(self: _ArrayFloat_co, other: _ArrayLikeTD64_co) -> NDArray[timedelta64]: ... + @overload + def __rtruediv__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __rtruediv__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __lshift__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[int8]: ... # type: ignore[misc] + @overload + def __lshift__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __lshift__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... + @overload + def __lshift__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __lshift__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __rlshift__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[int8]: ... # type: ignore[misc] + @overload + def __rlshift__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rlshift__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... + @overload + def __rlshift__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __rlshift__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __rshift__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[int8]: ... # type: ignore[misc] + @overload + def __rshift__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rshift__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... + @overload + def __rshift__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __rshift__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __rrshift__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[int8]: ... # type: ignore[misc] + @overload + def __rrshift__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rrshift__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... + @overload + def __rrshift__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __rrshift__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __and__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... # type: ignore[misc] + @overload + def __and__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __and__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... + @overload + def __and__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __and__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __rand__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... # type: ignore[misc] + @overload + def __rand__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rand__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... + @overload + def __rand__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __rand__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __xor__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... # type: ignore[misc] + @overload + def __xor__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __xor__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... + @overload + def __xor__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __xor__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __rxor__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... # type: ignore[misc] + @overload + def __rxor__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __rxor__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... + @overload + def __rxor__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __rxor__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __or__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... # type: ignore[misc] + @overload + def __or__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __or__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... + @overload + def __or__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __or__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + @overload + def __ror__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... # type: ignore[misc] + @overload + def __ror__(self: _ArrayUInt_co, other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] + @overload + def __ror__(self: _ArrayInt_co, other: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... + @overload + def __ror__(self: NDArray[object_], other: Any) -> Any: ... + @overload + def __ror__(self: NDArray[Any], other: _ArrayLikeObject_co) -> Any: ... + + # `np.generic` does not support inplace operations + @overload + def __iadd__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... + @overload + def __iadd__(self: NDArray[unsignedinteger[_NBit1]], other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[_NBit1]]: ... + @overload + def __iadd__(self: NDArray[signedinteger[_NBit1]], other: _ArrayLikeInt_co) -> NDArray[signedinteger[_NBit1]]: ... + @overload + def __iadd__(self: NDArray[floating[_NBit1]], other: _ArrayLikeFloat_co) -> NDArray[floating[_NBit1]]: ... + @overload + def __iadd__(self: NDArray[complexfloating[_NBit1, _NBit1]], other: _ArrayLikeComplex_co) -> NDArray[complexfloating[_NBit1, _NBit1]]: ... + @overload + def __iadd__(self: NDArray[timedelta64], other: _ArrayLikeTD64_co) -> NDArray[timedelta64]: ... + @overload + def __iadd__(self: NDArray[datetime64], other: _ArrayLikeTD64_co) -> NDArray[datetime64]: ... + @overload + def __iadd__(self: NDArray[object_], other: Any) -> NDArray[object_]: ... + + @overload + def __isub__(self: NDArray[unsignedinteger[_NBit1]], other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[_NBit1]]: ... + @overload + def __isub__(self: NDArray[signedinteger[_NBit1]], other: _ArrayLikeInt_co) -> NDArray[signedinteger[_NBit1]]: ... + @overload + def __isub__(self: NDArray[floating[_NBit1]], other: _ArrayLikeFloat_co) -> NDArray[floating[_NBit1]]: ... + @overload + def __isub__(self: NDArray[complexfloating[_NBit1, _NBit1]], other: _ArrayLikeComplex_co) -> NDArray[complexfloating[_NBit1, _NBit1]]: ... + @overload + def __isub__(self: NDArray[timedelta64], other: _ArrayLikeTD64_co) -> NDArray[timedelta64]: ... + @overload + def __isub__(self: NDArray[datetime64], other: _ArrayLikeTD64_co) -> NDArray[datetime64]: ... + @overload + def __isub__(self: NDArray[object_], other: Any) -> NDArray[object_]: ... + + @overload + def __imul__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... + @overload + def __imul__(self: NDArray[unsignedinteger[_NBit1]], other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[_NBit1]]: ... + @overload + def __imul__(self: NDArray[signedinteger[_NBit1]], other: _ArrayLikeInt_co) -> NDArray[signedinteger[_NBit1]]: ... + @overload + def __imul__(self: NDArray[floating[_NBit1]], other: _ArrayLikeFloat_co) -> NDArray[floating[_NBit1]]: ... + @overload + def __imul__(self: NDArray[complexfloating[_NBit1, _NBit1]], other: _ArrayLikeComplex_co) -> NDArray[complexfloating[_NBit1, _NBit1]]: ... + @overload + def __imul__(self: NDArray[timedelta64], other: _ArrayLikeFloat_co) -> NDArray[timedelta64]: ... + @overload + def __imul__(self: NDArray[object_], other: Any) -> NDArray[object_]: ... + + @overload + def __itruediv__(self: NDArray[floating[_NBit1]], other: _ArrayLikeFloat_co) -> NDArray[floating[_NBit1]]: ... + @overload + def __itruediv__(self: NDArray[complexfloating[_NBit1, _NBit1]], other: _ArrayLikeComplex_co) -> NDArray[complexfloating[_NBit1, _NBit1]]: ... + @overload + def __itruediv__(self: NDArray[timedelta64], other: _ArrayLikeBool_co) -> NoReturn: ... + @overload + def __itruediv__(self: NDArray[timedelta64], other: _ArrayLikeInt_co) -> NDArray[timedelta64]: ... + @overload + def __itruediv__(self: NDArray[object_], other: Any) -> NDArray[object_]: ... + + @overload + def __ifloordiv__(self: NDArray[unsignedinteger[_NBit1]], other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[_NBit1]]: ... + @overload + def __ifloordiv__(self: NDArray[signedinteger[_NBit1]], other: _ArrayLikeInt_co) -> NDArray[signedinteger[_NBit1]]: ... + @overload + def __ifloordiv__(self: NDArray[floating[_NBit1]], other: _ArrayLikeFloat_co) -> NDArray[floating[_NBit1]]: ... + @overload + def __ifloordiv__(self: NDArray[complexfloating[_NBit1, _NBit1]], other: _ArrayLikeComplex_co) -> NDArray[complexfloating[_NBit1, _NBit1]]: ... + @overload + def __ifloordiv__(self: NDArray[timedelta64], other: _ArrayLikeBool_co) -> NoReturn: ... + @overload + def __ifloordiv__(self: NDArray[timedelta64], other: _ArrayLikeInt_co) -> NDArray[timedelta64]: ... + @overload + def __ifloordiv__(self: NDArray[object_], other: Any) -> NDArray[object_]: ... + + @overload + def __ipow__(self: NDArray[unsignedinteger[_NBit1]], other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[_NBit1]]: ... + @overload + def __ipow__(self: NDArray[signedinteger[_NBit1]], other: _ArrayLikeInt_co) -> NDArray[signedinteger[_NBit1]]: ... + @overload + def __ipow__(self: NDArray[floating[_NBit1]], other: _ArrayLikeFloat_co) -> NDArray[floating[_NBit1]]: ... + @overload + def __ipow__(self: NDArray[complexfloating[_NBit1, _NBit1]], other: _ArrayLikeComplex_co) -> NDArray[complexfloating[_NBit1, _NBit1]]: ... + @overload + def __ipow__(self: NDArray[object_], other: Any) -> NDArray[object_]: ... + + @overload + def __imod__(self: NDArray[unsignedinteger[_NBit1]], other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[_NBit1]]: ... + @overload + def __imod__(self: NDArray[signedinteger[_NBit1]], other: _ArrayLikeInt_co) -> NDArray[signedinteger[_NBit1]]: ... + @overload + def __imod__(self: NDArray[floating[_NBit1]], other: _ArrayLikeFloat_co) -> NDArray[floating[_NBit1]]: ... + @overload + def __imod__(self: NDArray[timedelta64], other: _SupportsArray[_dtype[timedelta64]] | _NestedSequence[_SupportsArray[_dtype[timedelta64]]]) -> NDArray[timedelta64]: ... + @overload + def __imod__(self: NDArray[object_], other: Any) -> NDArray[object_]: ... + + @overload + def __ilshift__(self: NDArray[unsignedinteger[_NBit1]], other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[_NBit1]]: ... + @overload + def __ilshift__(self: NDArray[signedinteger[_NBit1]], other: _ArrayLikeInt_co) -> NDArray[signedinteger[_NBit1]]: ... + @overload + def __ilshift__(self: NDArray[object_], other: Any) -> NDArray[object_]: ... + + @overload + def __irshift__(self: NDArray[unsignedinteger[_NBit1]], other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[_NBit1]]: ... + @overload + def __irshift__(self: NDArray[signedinteger[_NBit1]], other: _ArrayLikeInt_co) -> NDArray[signedinteger[_NBit1]]: ... + @overload + def __irshift__(self: NDArray[object_], other: Any) -> NDArray[object_]: ... + + @overload + def __iand__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... + @overload + def __iand__(self: NDArray[unsignedinteger[_NBit1]], other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[_NBit1]]: ... + @overload + def __iand__(self: NDArray[signedinteger[_NBit1]], other: _ArrayLikeInt_co) -> NDArray[signedinteger[_NBit1]]: ... + @overload + def __iand__(self: NDArray[object_], other: Any) -> NDArray[object_]: ... + + @overload + def __ixor__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... + @overload + def __ixor__(self: NDArray[unsignedinteger[_NBit1]], other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[_NBit1]]: ... + @overload + def __ixor__(self: NDArray[signedinteger[_NBit1]], other: _ArrayLikeInt_co) -> NDArray[signedinteger[_NBit1]]: ... + @overload + def __ixor__(self: NDArray[object_], other: Any) -> NDArray[object_]: ... + + @overload + def __ior__(self: NDArray[bool_], other: _ArrayLikeBool_co) -> NDArray[bool_]: ... + @overload + def __ior__(self: NDArray[unsignedinteger[_NBit1]], other: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[_NBit1]]: ... + @overload + def __ior__(self: NDArray[signedinteger[_NBit1]], other: _ArrayLikeInt_co) -> NDArray[signedinteger[_NBit1]]: ... + @overload + def __ior__(self: NDArray[object_], other: Any) -> NDArray[object_]: ... + + def __dlpack__(self: NDArray[number[Any]], *, stream: None = ...) -> _PyCapsule: ... + def __dlpack_device__(self) -> Tuple[int, L[0]]: ... + + # Keep `dtype` at the bottom to avoid name conflicts with `np.dtype` + @property + def dtype(self) -> _DType_co: ... + +# NOTE: while `np.generic` is not technically an instance of `ABCMeta`, +# the `@abstractmethod` decorator is herein used to (forcefully) deny +# the creation of `np.generic` instances. +# The `# type: ignore` comments are necessary to silence mypy errors regarding +# the missing `ABCMeta` metaclass. + +# See https://github.com/numpy/numpy-stubs/pull/80 for more details. + +_ScalarType = TypeVar("_ScalarType", bound=generic) +_NBit1 = TypeVar("_NBit1", bound=NBitBase) +_NBit2 = TypeVar("_NBit2", bound=NBitBase) + +class generic(_ArrayOrScalarCommon): + @abstractmethod + def __init__(self, *args: Any, **kwargs: Any) -> None: ... + @overload + def __array__(self: _ScalarType, dtype: None = ..., /) -> ndarray[Any, _dtype[_ScalarType]]: ... + @overload + def __array__(self, dtype: _DType, /) -> ndarray[Any, _DType]: ... + @property + def base(self) -> None: ... + @property + def ndim(self) -> L[0]: ... + @property + def size(self) -> L[1]: ... + @property + def shape(self) -> Tuple[()]: ... + @property + def strides(self) -> Tuple[()]: ... + def byteswap(self: _ScalarType, inplace: L[False] = ...) -> _ScalarType: ... + @property + def flat(self: _ScalarType) -> flatiter[ndarray[Any, _dtype[_ScalarType]]]: ... + + @overload + def astype( + self, + dtype: _DTypeLike[_ScalarType], + order: _OrderKACF = ..., + casting: _CastingKind = ..., + subok: bool = ..., + copy: bool | _CopyMode = ..., + ) -> _ScalarType: ... + @overload + def astype( + self, + dtype: DTypeLike, + order: _OrderKACF = ..., + casting: _CastingKind = ..., + subok: bool = ..., + copy: bool | _CopyMode = ..., + ) -> Any: ... + + # NOTE: `view` will perform a 0D->scalar cast, + # thus the array `type` is irrelevant to the output type + @overload + def view( + self: _ScalarType, + type: Type[ndarray[Any, Any]] = ..., + ) -> _ScalarType: ... + @overload + def view( + self, + dtype: _DTypeLike[_ScalarType], + type: Type[ndarray[Any, Any]] = ..., + ) -> _ScalarType: ... + @overload + def view( + self, + dtype: DTypeLike, + type: Type[ndarray[Any, Any]] = ..., + ) -> Any: ... + + @overload + def getfield( + self, + dtype: _DTypeLike[_ScalarType], + offset: SupportsIndex = ... + ) -> _ScalarType: ... + @overload + def getfield( + self, + dtype: DTypeLike, + offset: SupportsIndex = ... + ) -> Any: ... + + def item( + self, args: L[0] | Tuple[()] | Tuple[L[0]] = ..., /, + ) -> Any: ... + + @overload + def take( # type: ignore[misc] + self: _ScalarType, + indices: _IntLike_co, + axis: Optional[SupportsIndex] = ..., + out: None = ..., + mode: _ModeKind = ..., + ) -> _ScalarType: ... + @overload + def take( # type: ignore[misc] + self: _ScalarType, + indices: _ArrayLikeInt_co, + axis: Optional[SupportsIndex] = ..., + out: None = ..., + mode: _ModeKind = ..., + ) -> ndarray[Any, _dtype[_ScalarType]]: ... + @overload + def take( + self, + indices: _ArrayLikeInt_co, + axis: Optional[SupportsIndex] = ..., + out: _NdArraySubClass = ..., + mode: _ModeKind = ..., + ) -> _NdArraySubClass: ... + + def repeat( + self: _ScalarType, + repeats: _ArrayLikeInt_co, + axis: Optional[SupportsIndex] = ..., + ) -> ndarray[Any, _dtype[_ScalarType]]: ... + + def flatten( + self: _ScalarType, + order: _OrderKACF = ..., + ) -> ndarray[Any, _dtype[_ScalarType]]: ... + + def ravel( + self: _ScalarType, + order: _OrderKACF = ..., + ) -> ndarray[Any, _dtype[_ScalarType]]: ... + + @overload + def reshape( + self: _ScalarType, shape: _ShapeLike, /, *, order: _OrderACF = ... + ) -> ndarray[Any, _dtype[_ScalarType]]: ... + @overload + def reshape( + self: _ScalarType, *shape: SupportsIndex, order: _OrderACF = ... + ) -> ndarray[Any, _dtype[_ScalarType]]: ... + + def squeeze( + self: _ScalarType, axis: Union[L[0], Tuple[()]] = ... + ) -> _ScalarType: ... + def transpose(self: _ScalarType, axes: Tuple[()] = ..., /) -> _ScalarType: ... + # Keep `dtype` at the bottom to avoid name conflicts with `np.dtype` + @property + def dtype(self: _ScalarType) -> _dtype[_ScalarType]: ... + +class number(generic, Generic[_NBit1]): # type: ignore + @property + def real(self: _ArraySelf) -> _ArraySelf: ... + @property + def imag(self: _ArraySelf) -> _ArraySelf: ... + if sys.version_info >= (3, 9): + def __class_getitem__(self, item: Any) -> GenericAlias: ... + def __int__(self) -> int: ... + def __float__(self) -> float: ... + def __complex__(self) -> complex: ... + def __neg__(self: _ArraySelf) -> _ArraySelf: ... + def __pos__(self: _ArraySelf) -> _ArraySelf: ... + def __abs__(self: _ArraySelf) -> _ArraySelf: ... + # Ensure that objects annotated as `number` support arithmetic operations + __add__: _NumberOp + __radd__: _NumberOp + __sub__: _NumberOp + __rsub__: _NumberOp + __mul__: _NumberOp + __rmul__: _NumberOp + __floordiv__: _NumberOp + __rfloordiv__: _NumberOp + __pow__: _NumberOp + __rpow__: _NumberOp + __truediv__: _NumberOp + __rtruediv__: _NumberOp + __lt__: _ComparisonOp[_NumberLike_co, _ArrayLikeNumber_co] + __le__: _ComparisonOp[_NumberLike_co, _ArrayLikeNumber_co] + __gt__: _ComparisonOp[_NumberLike_co, _ArrayLikeNumber_co] + __ge__: _ComparisonOp[_NumberLike_co, _ArrayLikeNumber_co] + +class bool_(generic): + def __init__(self, value: object = ..., /) -> None: ... + def item( + self, args: L[0] | Tuple[()] | Tuple[L[0]] = ..., /, + ) -> bool: ... + def tolist(self) -> bool: ... + @property + def real(self: _ArraySelf) -> _ArraySelf: ... + @property + def imag(self: _ArraySelf) -> _ArraySelf: ... + def __int__(self) -> int: ... + def __float__(self) -> float: ... + def __complex__(self) -> complex: ... + def __abs__(self: _ArraySelf) -> _ArraySelf: ... + __add__: _BoolOp[bool_] + __radd__: _BoolOp[bool_] + __sub__: _BoolSub + __rsub__: _BoolSub + __mul__: _BoolOp[bool_] + __rmul__: _BoolOp[bool_] + __floordiv__: _BoolOp[int8] + __rfloordiv__: _BoolOp[int8] + __pow__: _BoolOp[int8] + __rpow__: _BoolOp[int8] + __truediv__: _BoolTrueDiv + __rtruediv__: _BoolTrueDiv + def __invert__(self) -> bool_: ... + __lshift__: _BoolBitOp[int8] + __rlshift__: _BoolBitOp[int8] + __rshift__: _BoolBitOp[int8] + __rrshift__: _BoolBitOp[int8] + __and__: _BoolBitOp[bool_] + __rand__: _BoolBitOp[bool_] + __xor__: _BoolBitOp[bool_] + __rxor__: _BoolBitOp[bool_] + __or__: _BoolBitOp[bool_] + __ror__: _BoolBitOp[bool_] + __mod__: _BoolMod + __rmod__: _BoolMod + __divmod__: _BoolDivMod + __rdivmod__: _BoolDivMod + __lt__: _ComparisonOp[_NumberLike_co, _ArrayLikeNumber_co] + __le__: _ComparisonOp[_NumberLike_co, _ArrayLikeNumber_co] + __gt__: _ComparisonOp[_NumberLike_co, _ArrayLikeNumber_co] + __ge__: _ComparisonOp[_NumberLike_co, _ArrayLikeNumber_co] + +bool8 = bool_ + +class object_(generic): + def __init__(self, value: object = ..., /) -> None: ... + @property + def real(self: _ArraySelf) -> _ArraySelf: ... + @property + def imag(self: _ArraySelf) -> _ArraySelf: ... + # The 3 protocols below may or may not raise, + # depending on the underlying object + def __int__(self) -> int: ... + def __float__(self) -> float: ... + def __complex__(self) -> complex: ... + +object0 = object_ + +# The `datetime64` constructors requires an object with the three attributes below, +# and thus supports datetime duck typing +class _DatetimeScalar(Protocol): + @property + def day(self) -> int: ... + @property + def month(self) -> int: ... + @property + def year(self) -> int: ... + +# TODO: `item`/`tolist` returns either `dt.date`, `dt.datetime` or `int` +# depending on the unit +class datetime64(generic): + @overload + def __init__( + self, + value: None | datetime64 | _CharLike_co | _DatetimeScalar = ..., + format: _CharLike_co | Tuple[_CharLike_co, _IntLike_co] = ..., + /, + ) -> None: ... + @overload + def __init__( + self, + value: int, + format: _CharLike_co | Tuple[_CharLike_co, _IntLike_co], + /, + ) -> None: ... + def __add__(self, other: _TD64Like_co) -> datetime64: ... + def __radd__(self, other: _TD64Like_co) -> datetime64: ... + @overload + def __sub__(self, other: datetime64) -> timedelta64: ... + @overload + def __sub__(self, other: _TD64Like_co) -> datetime64: ... + def __rsub__(self, other: datetime64) -> timedelta64: ... + __lt__: _ComparisonOp[datetime64, _ArrayLikeDT64_co] + __le__: _ComparisonOp[datetime64, _ArrayLikeDT64_co] + __gt__: _ComparisonOp[datetime64, _ArrayLikeDT64_co] + __ge__: _ComparisonOp[datetime64, _ArrayLikeDT64_co] + +_IntValue = Union[SupportsInt, _CharLike_co, SupportsIndex] +_FloatValue = Union[None, _CharLike_co, SupportsFloat, SupportsIndex] +_ComplexValue = Union[ + None, + _CharLike_co, + SupportsFloat, + SupportsComplex, + SupportsIndex, + complex, # `complex` is not a subtype of `SupportsComplex` +] + +class integer(number[_NBit1]): # type: ignore + @property + def numerator(self: _ScalarType) -> _ScalarType: ... + @property + def denominator(self) -> L[1]: ... + @overload + def __round__(self, ndigits: None = ...) -> int: ... + @overload + def __round__(self: _ScalarType, ndigits: SupportsIndex) -> _ScalarType: ... + + # NOTE: `__index__` is technically defined in the bottom-most + # sub-classes (`int64`, `uint32`, etc) + def item( + self, args: L[0] | Tuple[()] | Tuple[L[0]] = ..., /, + ) -> int: ... + def tolist(self) -> int: ... + def is_integer(self) -> L[True]: ... + def bit_count(self: _ScalarType) -> int: ... + def __index__(self) -> int: ... + __truediv__: _IntTrueDiv[_NBit1] + __rtruediv__: _IntTrueDiv[_NBit1] + def __mod__(self, value: _IntLike_co) -> integer: ... + def __rmod__(self, value: _IntLike_co) -> integer: ... + def __invert__(self: _IntType) -> _IntType: ... + # Ensure that objects annotated as `integer` support bit-wise operations + def __lshift__(self, other: _IntLike_co) -> integer: ... + def __rlshift__(self, other: _IntLike_co) -> integer: ... + def __rshift__(self, other: _IntLike_co) -> integer: ... + def __rrshift__(self, other: _IntLike_co) -> integer: ... + def __and__(self, other: _IntLike_co) -> integer: ... + def __rand__(self, other: _IntLike_co) -> integer: ... + def __or__(self, other: _IntLike_co) -> integer: ... + def __ror__(self, other: _IntLike_co) -> integer: ... + def __xor__(self, other: _IntLike_co) -> integer: ... + def __rxor__(self, other: _IntLike_co) -> integer: ... + +class signedinteger(integer[_NBit1]): + def __init__(self, value: _IntValue = ..., /) -> None: ... + __add__: _SignedIntOp[_NBit1] + __radd__: _SignedIntOp[_NBit1] + __sub__: _SignedIntOp[_NBit1] + __rsub__: _SignedIntOp[_NBit1] + __mul__: _SignedIntOp[_NBit1] + __rmul__: _SignedIntOp[_NBit1] + __floordiv__: _SignedIntOp[_NBit1] + __rfloordiv__: _SignedIntOp[_NBit1] + __pow__: _SignedIntOp[_NBit1] + __rpow__: _SignedIntOp[_NBit1] + __lshift__: _SignedIntBitOp[_NBit1] + __rlshift__: _SignedIntBitOp[_NBit1] + __rshift__: _SignedIntBitOp[_NBit1] + __rrshift__: _SignedIntBitOp[_NBit1] + __and__: _SignedIntBitOp[_NBit1] + __rand__: _SignedIntBitOp[_NBit1] + __xor__: _SignedIntBitOp[_NBit1] + __rxor__: _SignedIntBitOp[_NBit1] + __or__: _SignedIntBitOp[_NBit1] + __ror__: _SignedIntBitOp[_NBit1] + __mod__: _SignedIntMod[_NBit1] + __rmod__: _SignedIntMod[_NBit1] + __divmod__: _SignedIntDivMod[_NBit1] + __rdivmod__: _SignedIntDivMod[_NBit1] + +int8 = signedinteger[_8Bit] +int16 = signedinteger[_16Bit] +int32 = signedinteger[_32Bit] +int64 = signedinteger[_64Bit] + +byte = signedinteger[_NBitByte] +short = signedinteger[_NBitShort] +intc = signedinteger[_NBitIntC] +intp = signedinteger[_NBitIntP] +int0 = signedinteger[_NBitIntP] +int_ = signedinteger[_NBitInt] +longlong = signedinteger[_NBitLongLong] + +# TODO: `item`/`tolist` returns either `dt.timedelta` or `int` +# depending on the unit +class timedelta64(generic): + def __init__( + self, + value: None | int | _CharLike_co | dt.timedelta | timedelta64 = ..., + format: _CharLike_co | Tuple[_CharLike_co, _IntLike_co] = ..., + /, + ) -> None: ... + @property + def numerator(self: _ScalarType) -> _ScalarType: ... + @property + def denominator(self) -> L[1]: ... + + # NOTE: Only a limited number of units support conversion + # to builtin scalar types: `Y`, `M`, `ns`, `ps`, `fs`, `as` + def __int__(self) -> int: ... + def __float__(self) -> float: ... + def __complex__(self) -> complex: ... + def __neg__(self: _ArraySelf) -> _ArraySelf: ... + def __pos__(self: _ArraySelf) -> _ArraySelf: ... + def __abs__(self: _ArraySelf) -> _ArraySelf: ... + def __add__(self, other: _TD64Like_co) -> timedelta64: ... + def __radd__(self, other: _TD64Like_co) -> timedelta64: ... + def __sub__(self, other: _TD64Like_co) -> timedelta64: ... + def __rsub__(self, other: _TD64Like_co) -> timedelta64: ... + def __mul__(self, other: _FloatLike_co) -> timedelta64: ... + def __rmul__(self, other: _FloatLike_co) -> timedelta64: ... + __truediv__: _TD64Div[float64] + __floordiv__: _TD64Div[int64] + def __rtruediv__(self, other: timedelta64) -> float64: ... + def __rfloordiv__(self, other: timedelta64) -> int64: ... + def __mod__(self, other: timedelta64) -> timedelta64: ... + def __rmod__(self, other: timedelta64) -> timedelta64: ... + def __divmod__(self, other: timedelta64) -> Tuple[int64, timedelta64]: ... + def __rdivmod__(self, other: timedelta64) -> Tuple[int64, timedelta64]: ... + __lt__: _ComparisonOp[_TD64Like_co, _ArrayLikeTD64_co] + __le__: _ComparisonOp[_TD64Like_co, _ArrayLikeTD64_co] + __gt__: _ComparisonOp[_TD64Like_co, _ArrayLikeTD64_co] + __ge__: _ComparisonOp[_TD64Like_co, _ArrayLikeTD64_co] + +class unsignedinteger(integer[_NBit1]): + # NOTE: `uint64 + signedinteger -> float64` + def __init__(self, value: _IntValue = ..., /) -> None: ... + __add__: _UnsignedIntOp[_NBit1] + __radd__: _UnsignedIntOp[_NBit1] + __sub__: _UnsignedIntOp[_NBit1] + __rsub__: _UnsignedIntOp[_NBit1] + __mul__: _UnsignedIntOp[_NBit1] + __rmul__: _UnsignedIntOp[_NBit1] + __floordiv__: _UnsignedIntOp[_NBit1] + __rfloordiv__: _UnsignedIntOp[_NBit1] + __pow__: _UnsignedIntOp[_NBit1] + __rpow__: _UnsignedIntOp[_NBit1] + __lshift__: _UnsignedIntBitOp[_NBit1] + __rlshift__: _UnsignedIntBitOp[_NBit1] + __rshift__: _UnsignedIntBitOp[_NBit1] + __rrshift__: _UnsignedIntBitOp[_NBit1] + __and__: _UnsignedIntBitOp[_NBit1] + __rand__: _UnsignedIntBitOp[_NBit1] + __xor__: _UnsignedIntBitOp[_NBit1] + __rxor__: _UnsignedIntBitOp[_NBit1] + __or__: _UnsignedIntBitOp[_NBit1] + __ror__: _UnsignedIntBitOp[_NBit1] + __mod__: _UnsignedIntMod[_NBit1] + __rmod__: _UnsignedIntMod[_NBit1] + __divmod__: _UnsignedIntDivMod[_NBit1] + __rdivmod__: _UnsignedIntDivMod[_NBit1] + +uint8 = unsignedinteger[_8Bit] +uint16 = unsignedinteger[_16Bit] +uint32 = unsignedinteger[_32Bit] +uint64 = unsignedinteger[_64Bit] + +ubyte = unsignedinteger[_NBitByte] +ushort = unsignedinteger[_NBitShort] +uintc = unsignedinteger[_NBitIntC] +uintp = unsignedinteger[_NBitIntP] +uint0 = unsignedinteger[_NBitIntP] +uint = unsignedinteger[_NBitInt] +ulonglong = unsignedinteger[_NBitLongLong] + +class inexact(number[_NBit1]): # type: ignore + def __getnewargs__(self: inexact[_64Bit]) -> Tuple[float, ...]: ... + +_IntType = TypeVar("_IntType", bound=integer) +_FloatType = TypeVar('_FloatType', bound=floating) + +class floating(inexact[_NBit1]): + def __init__(self, value: _FloatValue = ..., /) -> None: ... + def item( + self, args: L[0] | Tuple[()] | Tuple[L[0]] = ..., + /, + ) -> float: ... + def tolist(self) -> float: ... + def is_integer(self) -> bool: ... + def hex(self: float64) -> str: ... + @classmethod + def fromhex(cls: Type[float64], string: str, /) -> float64: ... + def as_integer_ratio(self) -> Tuple[int, int]: ... + if sys.version_info >= (3, 9): + def __ceil__(self: float64) -> int: ... + def __floor__(self: float64) -> int: ... + def __trunc__(self: float64) -> int: ... + def __getnewargs__(self: float64) -> Tuple[float]: ... + def __getformat__(self: float64, typestr: L["double", "float"], /) -> str: ... + @overload + def __round__(self, ndigits: None = ...) -> int: ... + @overload + def __round__(self: _ScalarType, ndigits: SupportsIndex) -> _ScalarType: ... + __add__: _FloatOp[_NBit1] + __radd__: _FloatOp[_NBit1] + __sub__: _FloatOp[_NBit1] + __rsub__: _FloatOp[_NBit1] + __mul__: _FloatOp[_NBit1] + __rmul__: _FloatOp[_NBit1] + __truediv__: _FloatOp[_NBit1] + __rtruediv__: _FloatOp[_NBit1] + __floordiv__: _FloatOp[_NBit1] + __rfloordiv__: _FloatOp[_NBit1] + __pow__: _FloatOp[_NBit1] + __rpow__: _FloatOp[_NBit1] + __mod__: _FloatMod[_NBit1] + __rmod__: _FloatMod[_NBit1] + __divmod__: _FloatDivMod[_NBit1] + __rdivmod__: _FloatDivMod[_NBit1] + +float16 = floating[_16Bit] +float32 = floating[_32Bit] +float64 = floating[_64Bit] + +half = floating[_NBitHalf] +single = floating[_NBitSingle] +double = floating[_NBitDouble] +float_ = floating[_NBitDouble] +longdouble = floating[_NBitLongDouble] +longfloat = floating[_NBitLongDouble] + +# The main reason for `complexfloating` having two typevars is cosmetic. +# It is used to clarify why `complex128`s precision is `_64Bit`, the latter +# describing the two 64 bit floats representing its real and imaginary component + +class complexfloating(inexact[_NBit1], Generic[_NBit1, _NBit2]): + def __init__(self, value: _ComplexValue = ..., /) -> None: ... + def item( + self, args: L[0] | Tuple[()] | Tuple[L[0]] = ..., /, + ) -> complex: ... + def tolist(self) -> complex: ... + @property + def real(self) -> floating[_NBit1]: ... # type: ignore[override] + @property + def imag(self) -> floating[_NBit2]: ... # type: ignore[override] + def __abs__(self) -> floating[_NBit1]: ... # type: ignore[override] + def __getnewargs__(self: complex128) -> Tuple[float, float]: ... + # NOTE: Deprecated + # def __round__(self, ndigits=...): ... + __add__: _ComplexOp[_NBit1] + __radd__: _ComplexOp[_NBit1] + __sub__: _ComplexOp[_NBit1] + __rsub__: _ComplexOp[_NBit1] + __mul__: _ComplexOp[_NBit1] + __rmul__: _ComplexOp[_NBit1] + __truediv__: _ComplexOp[_NBit1] + __rtruediv__: _ComplexOp[_NBit1] + __pow__: _ComplexOp[_NBit1] + __rpow__: _ComplexOp[_NBit1] + +complex64 = complexfloating[_32Bit, _32Bit] +complex128 = complexfloating[_64Bit, _64Bit] + +csingle = complexfloating[_NBitSingle, _NBitSingle] +singlecomplex = complexfloating[_NBitSingle, _NBitSingle] +cdouble = complexfloating[_NBitDouble, _NBitDouble] +complex_ = complexfloating[_NBitDouble, _NBitDouble] +cfloat = complexfloating[_NBitDouble, _NBitDouble] +clongdouble = complexfloating[_NBitLongDouble, _NBitLongDouble] +clongfloat = complexfloating[_NBitLongDouble, _NBitLongDouble] +longcomplex = complexfloating[_NBitLongDouble, _NBitLongDouble] + +class flexible(generic): ... # type: ignore + +# TODO: `item`/`tolist` returns either `bytes` or `tuple` +# depending on whether or not it's used as an opaque bytes sequence +# or a structure +class void(flexible): + def __init__(self, value: _IntLike_co | bytes, /) -> None: ... + @property + def real(self: _ArraySelf) -> _ArraySelf: ... + @property + def imag(self: _ArraySelf) -> _ArraySelf: ... + def setfield( + self, val: ArrayLike, dtype: DTypeLike, offset: int = ... + ) -> None: ... + @overload + def __getitem__(self, key: str | SupportsIndex) -> Any: ... + @overload + def __getitem__(self, key: list[str]) -> void: ... + def __setitem__( + self, + key: str | List[str] | SupportsIndex, + value: ArrayLike, + ) -> None: ... + +void0 = void + +class character(flexible): # type: ignore + def __int__(self) -> int: ... + def __float__(self) -> float: ... + +# NOTE: Most `np.bytes_` / `np.str_` methods return their +# builtin `bytes` / `str` counterpart + +class bytes_(character, bytes): + @overload + def __init__(self, value: object = ..., /) -> None: ... + @overload + def __init__( + self, value: str, /, encoding: str = ..., errors: str = ... + ) -> None: ... + def item( + self, args: L[0] | Tuple[()] | Tuple[L[0]] = ..., /, + ) -> bytes: ... + def tolist(self) -> bytes: ... + +string_ = bytes_ +bytes0 = bytes_ + +class str_(character, str): + @overload + def __init__(self, value: object = ..., /) -> None: ... + @overload + def __init__( + self, value: bytes, /, encoding: str = ..., errors: str = ... + ) -> None: ... + def item( + self, args: L[0] | Tuple[()] | Tuple[L[0]] = ..., /, + ) -> str: ... + def tolist(self) -> str: ... + +unicode_ = str_ +str0 = str_ + +# +# Constants +# + +Inf: Final[float] +Infinity: Final[float] +NAN: Final[float] +NINF: Final[float] +NZERO: Final[float] +NaN: Final[float] +PINF: Final[float] +PZERO: Final[float] +e: Final[float] +euler_gamma: Final[float] +inf: Final[float] +infty: Final[float] +nan: Final[float] +pi: Final[float] + +CLIP: L[0] +WRAP: L[1] +RAISE: L[2] + +ERR_IGNORE: L[0] +ERR_WARN: L[1] +ERR_RAISE: L[2] +ERR_CALL: L[3] +ERR_PRINT: L[4] +ERR_LOG: L[5] +ERR_DEFAULT: L[521] + +SHIFT_DIVIDEBYZERO: L[0] +SHIFT_OVERFLOW: L[3] +SHIFT_UNDERFLOW: L[6] +SHIFT_INVALID: L[9] + +FPE_DIVIDEBYZERO: L[1] +FPE_OVERFLOW: L[2] +FPE_UNDERFLOW: L[4] +FPE_INVALID: L[8] + +FLOATING_POINT_SUPPORT: L[1] +UFUNC_BUFSIZE_DEFAULT = BUFSIZE + +little_endian: Final[bool] +True_: Final[bool_] +False_: Final[bool_] + +UFUNC_PYVALS_NAME: L["UFUNC_PYVALS"] + +newaxis: None + +# See `npt._ufunc` for more concrete nin-/nout-specific stubs +class ufunc: + @property + def __name__(self) -> str: ... + @property + def __doc__(self) -> str: ... + __call__: Callable[..., Any] + @property + def nin(self) -> int: ... + @property + def nout(self) -> int: ... + @property + def nargs(self) -> int: ... + @property + def ntypes(self) -> int: ... + @property + def types(self) -> List[str]: ... + # Broad return type because it has to encompass things like + # + # >>> np.logical_and.identity is True + # True + # >>> np.add.identity is 0 + # True + # >>> np.sin.identity is None + # True + # + # and any user-defined ufuncs. + @property + def identity(self) -> Any: ... + # This is None for ufuncs and a string for gufuncs. + @property + def signature(self) -> Optional[str]: ... + # The next four methods will always exist, but they will just + # raise a ValueError ufuncs with that don't accept two input + # arguments and return one output argument. Because of that we + # can't type them very precisely. + reduce: Any + accumulate: Any + reduce: Any + outer: Any + # Similarly at won't be defined for ufuncs that return multiple + # outputs, so we can't type it very precisely. + at: Any + +# Parameters: `__name__`, `ntypes` and `identity` +absolute: _UFunc_Nin1_Nout1[L['absolute'], L[20], None] +add: _UFunc_Nin2_Nout1[L['add'], L[22], L[0]] +arccos: _UFunc_Nin1_Nout1[L['arccos'], L[8], None] +arccosh: _UFunc_Nin1_Nout1[L['arccosh'], L[8], None] +arcsin: _UFunc_Nin1_Nout1[L['arcsin'], L[8], None] +arcsinh: _UFunc_Nin1_Nout1[L['arcsinh'], L[8], None] +arctan2: _UFunc_Nin2_Nout1[L['arctan2'], L[5], None] +arctan: _UFunc_Nin1_Nout1[L['arctan'], L[8], None] +arctanh: _UFunc_Nin1_Nout1[L['arctanh'], L[8], None] +bitwise_and: _UFunc_Nin2_Nout1[L['bitwise_and'], L[12], L[-1]] +bitwise_not: _UFunc_Nin1_Nout1[L['invert'], L[12], None] +bitwise_or: _UFunc_Nin2_Nout1[L['bitwise_or'], L[12], L[0]] +bitwise_xor: _UFunc_Nin2_Nout1[L['bitwise_xor'], L[12], L[0]] +cbrt: _UFunc_Nin1_Nout1[L['cbrt'], L[5], None] +ceil: _UFunc_Nin1_Nout1[L['ceil'], L[7], None] +conj: _UFunc_Nin1_Nout1[L['conjugate'], L[18], None] +conjugate: _UFunc_Nin1_Nout1[L['conjugate'], L[18], None] +copysign: _UFunc_Nin2_Nout1[L['copysign'], L[4], None] +cos: _UFunc_Nin1_Nout1[L['cos'], L[9], None] +cosh: _UFunc_Nin1_Nout1[L['cosh'], L[8], None] +deg2rad: _UFunc_Nin1_Nout1[L['deg2rad'], L[5], None] +degrees: _UFunc_Nin1_Nout1[L['degrees'], L[5], None] +divide: _UFunc_Nin2_Nout1[L['true_divide'], L[11], None] +divmod: _UFunc_Nin2_Nout2[L['divmod'], L[15], None] +equal: _UFunc_Nin2_Nout1[L['equal'], L[23], None] +exp2: _UFunc_Nin1_Nout1[L['exp2'], L[8], None] +exp: _UFunc_Nin1_Nout1[L['exp'], L[10], None] +expm1: _UFunc_Nin1_Nout1[L['expm1'], L[8], None] +fabs: _UFunc_Nin1_Nout1[L['fabs'], L[5], None] +float_power: _UFunc_Nin2_Nout1[L['float_power'], L[4], None] +floor: _UFunc_Nin1_Nout1[L['floor'], L[7], None] +floor_divide: _UFunc_Nin2_Nout1[L['floor_divide'], L[21], None] +fmax: _UFunc_Nin2_Nout1[L['fmax'], L[21], None] +fmin: _UFunc_Nin2_Nout1[L['fmin'], L[21], None] +fmod: _UFunc_Nin2_Nout1[L['fmod'], L[15], None] +frexp: _UFunc_Nin1_Nout2[L['frexp'], L[4], None] +gcd: _UFunc_Nin2_Nout1[L['gcd'], L[11], L[0]] +greater: _UFunc_Nin2_Nout1[L['greater'], L[23], None] +greater_equal: _UFunc_Nin2_Nout1[L['greater_equal'], L[23], None] +heaviside: _UFunc_Nin2_Nout1[L['heaviside'], L[4], None] +hypot: _UFunc_Nin2_Nout1[L['hypot'], L[5], L[0]] +invert: _UFunc_Nin1_Nout1[L['invert'], L[12], None] +isfinite: _UFunc_Nin1_Nout1[L['isfinite'], L[20], None] +isinf: _UFunc_Nin1_Nout1[L['isinf'], L[20], None] +isnan: _UFunc_Nin1_Nout1[L['isnan'], L[20], None] +isnat: _UFunc_Nin1_Nout1[L['isnat'], L[2], None] +lcm: _UFunc_Nin2_Nout1[L['lcm'], L[11], None] +ldexp: _UFunc_Nin2_Nout1[L['ldexp'], L[8], None] +left_shift: _UFunc_Nin2_Nout1[L['left_shift'], L[11], None] +less: _UFunc_Nin2_Nout1[L['less'], L[23], None] +less_equal: _UFunc_Nin2_Nout1[L['less_equal'], L[23], None] +log10: _UFunc_Nin1_Nout1[L['log10'], L[8], None] +log1p: _UFunc_Nin1_Nout1[L['log1p'], L[8], None] +log2: _UFunc_Nin1_Nout1[L['log2'], L[8], None] +log: _UFunc_Nin1_Nout1[L['log'], L[10], None] +logaddexp2: _UFunc_Nin2_Nout1[L['logaddexp2'], L[4], float] +logaddexp: _UFunc_Nin2_Nout1[L['logaddexp'], L[4], float] +logical_and: _UFunc_Nin2_Nout1[L['logical_and'], L[20], L[True]] +logical_not: _UFunc_Nin1_Nout1[L['logical_not'], L[20], None] +logical_or: _UFunc_Nin2_Nout1[L['logical_or'], L[20], L[False]] +logical_xor: _UFunc_Nin2_Nout1[L['logical_xor'], L[19], L[False]] +matmul: _GUFunc_Nin2_Nout1[L['matmul'], L[19], None] +maximum: _UFunc_Nin2_Nout1[L['maximum'], L[21], None] +minimum: _UFunc_Nin2_Nout1[L['minimum'], L[21], None] +mod: _UFunc_Nin2_Nout1[L['remainder'], L[16], None] +modf: _UFunc_Nin1_Nout2[L['modf'], L[4], None] +multiply: _UFunc_Nin2_Nout1[L['multiply'], L[23], L[1]] +negative: _UFunc_Nin1_Nout1[L['negative'], L[19], None] +nextafter: _UFunc_Nin2_Nout1[L['nextafter'], L[4], None] +not_equal: _UFunc_Nin2_Nout1[L['not_equal'], L[23], None] +positive: _UFunc_Nin1_Nout1[L['positive'], L[19], None] +power: _UFunc_Nin2_Nout1[L['power'], L[18], None] +rad2deg: _UFunc_Nin1_Nout1[L['rad2deg'], L[5], None] +radians: _UFunc_Nin1_Nout1[L['radians'], L[5], None] +reciprocal: _UFunc_Nin1_Nout1[L['reciprocal'], L[18], None] +remainder: _UFunc_Nin2_Nout1[L['remainder'], L[16], None] +right_shift: _UFunc_Nin2_Nout1[L['right_shift'], L[11], None] +rint: _UFunc_Nin1_Nout1[L['rint'], L[10], None] +sign: _UFunc_Nin1_Nout1[L['sign'], L[19], None] +signbit: _UFunc_Nin1_Nout1[L['signbit'], L[4], None] +sin: _UFunc_Nin1_Nout1[L['sin'], L[9], None] +sinh: _UFunc_Nin1_Nout1[L['sinh'], L[8], None] +spacing: _UFunc_Nin1_Nout1[L['spacing'], L[4], None] +sqrt: _UFunc_Nin1_Nout1[L['sqrt'], L[10], None] +square: _UFunc_Nin1_Nout1[L['square'], L[18], None] +subtract: _UFunc_Nin2_Nout1[L['subtract'], L[21], None] +tan: _UFunc_Nin1_Nout1[L['tan'], L[8], None] +tanh: _UFunc_Nin1_Nout1[L['tanh'], L[8], None] +true_divide: _UFunc_Nin2_Nout1[L['true_divide'], L[11], None] +trunc: _UFunc_Nin1_Nout1[L['trunc'], L[7], None] + +abs = absolute + +class _CopyMode(enum.Enum): + ALWAYS: L[True] + IF_NEEDED: L[False] + NEVER: L[2] + +# Warnings +class ModuleDeprecationWarning(DeprecationWarning): ... +class VisibleDeprecationWarning(UserWarning): ... +class ComplexWarning(RuntimeWarning): ... +class RankWarning(UserWarning): ... + +# Errors +class TooHardError(RuntimeError): ... + +class AxisError(ValueError, IndexError): + axis: None | int + ndim: None | int + @overload + def __init__(self, axis: str, ndim: None = ..., msg_prefix: None = ...) -> None: ... + @overload + def __init__(self, axis: int, ndim: int, msg_prefix: None | str = ...) -> None: ... + +_CallType = TypeVar("_CallType", bound=Union[_ErrFunc, _SupportsWrite[str]]) + +class errstate(Generic[_CallType], ContextDecorator): + call: _CallType + kwargs: _ErrDictOptional + + # Expand `**kwargs` into explicit keyword-only arguments + def __init__( + self, + *, + call: _CallType = ..., + all: Optional[_ErrKind] = ..., + divide: Optional[_ErrKind] = ..., + over: Optional[_ErrKind] = ..., + under: Optional[_ErrKind] = ..., + invalid: Optional[_ErrKind] = ..., + ) -> None: ... + def __enter__(self) -> None: ... + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_value: Optional[BaseException], + traceback: Optional[TracebackType], + /, + ) -> None: ... + +class ndenumerate(Generic[_ScalarType]): + iter: flatiter[NDArray[_ScalarType]] + @overload + def __new__( + cls, arr: _FiniteNestedSequence[_SupportsArray[dtype[_ScalarType]]], + ) -> ndenumerate[_ScalarType]: ... + @overload + def __new__(cls, arr: str | _NestedSequence[str]) -> ndenumerate[str_]: ... + @overload + def __new__(cls, arr: bytes | _NestedSequence[bytes]) -> ndenumerate[bytes_]: ... + @overload + def __new__(cls, arr: bool | _NestedSequence[bool]) -> ndenumerate[bool_]: ... + @overload + def __new__(cls, arr: int | _NestedSequence[int]) -> ndenumerate[int_]: ... + @overload + def __new__(cls, arr: float | _NestedSequence[float]) -> ndenumerate[float_]: ... + @overload + def __new__(cls, arr: complex | _NestedSequence[complex]) -> ndenumerate[complex_]: ... + def __next__(self: ndenumerate[_ScalarType]) -> Tuple[_Shape, _ScalarType]: ... + def __iter__(self: _T) -> _T: ... + +class ndindex: + @overload + def __init__(self, shape: tuple[SupportsIndex, ...], /) -> None: ... + @overload + def __init__(self, *shape: SupportsIndex) -> None: ... + def __iter__(self: _T) -> _T: ... + def __next__(self) -> _Shape: ... + +class DataSource: + def __init__( + self, + destpath: Union[None, str, os.PathLike[str]] = ..., + ) -> None: ... + def __del__(self) -> None: ... + def abspath(self, path: str) -> str: ... + def exists(self, path: str) -> bool: ... + + # Whether the file-object is opened in string or bytes mode (by default) + # depends on the file-extension of `path` + def open( + self, + path: str, + mode: str = ..., + encoding: Optional[str] = ..., + newline: Optional[str] = ..., + ) -> IO[Any]: ... + +# TODO: The type of each `__next__` and `iters` return-type depends +# on the length and dtype of `args`; we can't describe this behavior yet +# as we lack variadics (PEP 646). +class broadcast: + def __new__(cls, *args: ArrayLike) -> broadcast: ... + @property + def index(self) -> int: ... + @property + def iters(self) -> Tuple[flatiter[Any], ...]: ... + @property + def nd(self) -> int: ... + @property + def ndim(self) -> int: ... + @property + def numiter(self) -> int: ... + @property + def shape(self) -> _Shape: ... + @property + def size(self) -> int: ... + def __next__(self) -> Tuple[Any, ...]: ... + def __iter__(self: _T) -> _T: ... + def reset(self) -> None: ... + +class busdaycalendar: + def __new__( + cls, + weekmask: ArrayLike = ..., + holidays: ArrayLike | dt.date | _NestedSequence[dt.date] = ..., + ) -> busdaycalendar: ... + @property + def weekmask(self) -> NDArray[bool_]: ... + @property + def holidays(self) -> NDArray[datetime64]: ... + +class finfo(Generic[_FloatType]): + dtype: dtype[_FloatType] + bits: int + eps: _FloatType + epsneg: _FloatType + iexp: int + machep: int + max: _FloatType + maxexp: int + min: _FloatType + minexp: int + negep: int + nexp: int + nmant: int + precision: int + resolution: _FloatType + smallest_subnormal: _FloatType + @property + def smallest_normal(self) -> _FloatType: ... + @property + def tiny(self) -> _FloatType: ... + @overload + def __new__( + cls, dtype: inexact[_NBit1] | _DTypeLike[inexact[_NBit1]] + ) -> finfo[floating[_NBit1]]: ... + @overload + def __new__( + cls, dtype: complex | float | Type[complex] | Type[float] + ) -> finfo[float_]: ... + @overload + def __new__( + cls, dtype: str + ) -> finfo[floating[Any]]: ... + +class iinfo(Generic[_IntType]): + dtype: dtype[_IntType] + kind: str + bits: int + key: str + @property + def min(self) -> int: ... + @property + def max(self) -> int: ... + + @overload + def __new__(cls, dtype: _IntType | _DTypeLike[_IntType]) -> iinfo[_IntType]: ... + @overload + def __new__(cls, dtype: int | Type[int]) -> iinfo[int_]: ... + @overload + def __new__(cls, dtype: str) -> iinfo[Any]: ... + +class format_parser: + dtype: dtype[void] + def __init__( + self, + formats: DTypeLike, + names: None | str | Sequence[str], + titles: None | str | Sequence[str], + aligned: bool = ..., + byteorder: None | _ByteOrder = ..., + ) -> None: ... + +class recarray(ndarray[_ShapeType, _DType_co]): + # NOTE: While not strictly mandatory, we're demanding here that arguments + # for the `format_parser`- and `dtype`-based dtype constructors are + # mutually exclusive + @overload + def __new__( + subtype, + shape: _ShapeLike, + dtype: None = ..., + buf: None | _SupportsBuffer = ..., + offset: SupportsIndex = ..., + strides: None | _ShapeLike = ..., + *, + formats: DTypeLike, + names: None | str | Sequence[str] = ..., + titles: None | str | Sequence[str] = ..., + byteorder: None | _ByteOrder = ..., + aligned: bool = ..., + order: _OrderKACF = ..., + ) -> recarray[Any, dtype[record]]: ... + @overload + def __new__( + subtype, + shape: _ShapeLike, + dtype: DTypeLike, + buf: None | _SupportsBuffer = ..., + offset: SupportsIndex = ..., + strides: None | _ShapeLike = ..., + formats: None = ..., + names: None = ..., + titles: None = ..., + byteorder: None = ..., + aligned: L[False] = ..., + order: _OrderKACF = ..., + ) -> recarray[Any, dtype[Any]]: ... + def __array_finalize__(self, obj: object) -> None: ... + def __getattribute__(self, attr: str) -> Any: ... + def __setattr__(self, attr: str, val: ArrayLike) -> None: ... + @overload + def __getitem__(self, indx: Union[ + SupportsIndex, + _ArrayLikeInt_co, + Tuple[SupportsIndex | _ArrayLikeInt_co, ...], + ]) -> Any: ... + @overload + def __getitem__(self: recarray[Any, dtype[void]], indx: Union[ + None, + slice, + ellipsis, + SupportsIndex, + _ArrayLikeInt_co, + Tuple[None | slice | ellipsis | _ArrayLikeInt_co | SupportsIndex, ...], + ]) -> recarray[Any, _DType_co]: ... + @overload + def __getitem__(self, indx: Union[ + None, + slice, + ellipsis, + SupportsIndex, + _ArrayLikeInt_co, + Tuple[None | slice | ellipsis | _ArrayLikeInt_co | SupportsIndex, ...], + ]) -> ndarray[Any, _DType_co]: ... + @overload + def __getitem__(self, indx: str) -> NDArray[Any]: ... + @overload + def __getitem__(self, indx: list[str]) -> recarray[_ShapeType, dtype[record]]: ... + @overload + def field(self, attr: int | str, val: None = ...) -> Any: ... + @overload + def field(self, attr: int | str, val: ArrayLike) -> None: ... + +class record(void): + def __getattribute__(self, attr: str) -> Any: ... + def __setattr__(self, attr: str, val: ArrayLike) -> None: ... + def pprint(self) -> str: ... + @overload + def __getitem__(self, key: str | SupportsIndex) -> Any: ... + @overload + def __getitem__(self, key: list[str]) -> record: ... + +_NDIterFlagsKind = L[ + "buffered", + "c_index", + "copy_if_overlap", + "common_dtype", + "delay_bufalloc", + "external_loop", + "f_index", + "grow_inner", "growinner", + "multi_index", + "ranged", + "refs_ok", + "reduce_ok", + "zerosize_ok", +] + +_NDIterOpFlagsKind = L[ + "aligned", + "allocate", + "arraymask", + "copy", + "config", + "nbo", + "no_subtype", + "no_broadcast", + "overlap_assume_elementwise", + "readonly", + "readwrite", + "updateifcopy", + "virtual", + "writeonly", + "writemasked" +] + +@final +class nditer: + def __new__( + cls, + op: ArrayLike | Sequence[ArrayLike], + flags: None | Sequence[_NDIterFlagsKind] = ..., + op_flags: None | Sequence[Sequence[_NDIterOpFlagsKind]] = ..., + op_dtypes: DTypeLike | Sequence[DTypeLike] = ..., + order: _OrderKACF = ..., + casting: _CastingKind = ..., + op_axes: None | Sequence[Sequence[SupportsIndex]] = ..., + itershape: None | _ShapeLike = ..., + buffersize: SupportsIndex = ..., + ) -> nditer: ... + def __enter__(self) -> nditer: ... + def __exit__( + self, + exc_type: None | Type[BaseException], + exc_value: None | BaseException, + traceback: None | TracebackType, + ) -> None: ... + def __iter__(self) -> nditer: ... + def __next__(self) -> Tuple[NDArray[Any], ...]: ... + def __len__(self) -> int: ... + def __copy__(self) -> nditer: ... + @overload + def __getitem__(self, index: SupportsIndex) -> NDArray[Any]: ... + @overload + def __getitem__(self, index: slice) -> Tuple[NDArray[Any], ...]: ... + def __setitem__(self, index: slice | SupportsIndex, value: ArrayLike) -> None: ... + def close(self) -> None: ... + def copy(self) -> nditer: ... + def debug_print(self) -> None: ... + def enable_external_loop(self) -> None: ... + def iternext(self) -> bool: ... + def remove_axis(self, i: SupportsIndex, /) -> None: ... + def remove_multi_index(self) -> None: ... + def reset(self) -> None: ... + @property + def dtypes(self) -> Tuple[dtype[Any], ...]: ... + @property + def finished(self) -> bool: ... + @property + def has_delayed_bufalloc(self) -> bool: ... + @property + def has_index(self) -> bool: ... + @property + def has_multi_index(self) -> bool: ... + @property + def index(self) -> int: ... + @property + def iterationneedsapi(self) -> bool: ... + @property + def iterindex(self) -> int: ... + @property + def iterrange(self) -> Tuple[int, ...]: ... + @property + def itersize(self) -> int: ... + @property + def itviews(self) -> Tuple[NDArray[Any], ...]: ... + @property + def multi_index(self) -> Tuple[int, ...]: ... + @property + def ndim(self) -> int: ... + @property + def nop(self) -> int: ... + @property + def operands(self) -> Tuple[NDArray[Any], ...]: ... + @property + def shape(self) -> Tuple[int, ...]: ... + @property + def value(self) -> Tuple[NDArray[Any], ...]: ... + +_MemMapModeKind = L[ + "readonly", "r", + "copyonwrite", "c", + "readwrite", "r+", + "write", "w+", +] + +class memmap(ndarray[_ShapeType, _DType_co]): + __array_priority__: ClassVar[float] + filename: str | None + offset: int + mode: str + @overload + def __new__( + subtype, + filename: str | bytes | os.PathLike[str] | os.PathLike[bytes] | _MemMapIOProtocol, + dtype: Type[uint8] = ..., + mode: _MemMapModeKind = ..., + offset: int = ..., + shape: None | int | Tuple[int, ...] = ..., + order: _OrderKACF = ..., + ) -> memmap[Any, dtype[uint8]]: ... + @overload + def __new__( + subtype, + filename: str | bytes | os.PathLike[str] | os.PathLike[bytes] | _MemMapIOProtocol, + dtype: _DTypeLike[_ScalarType], + mode: _MemMapModeKind = ..., + offset: int = ..., + shape: None | int | Tuple[int, ...] = ..., + order: _OrderKACF = ..., + ) -> memmap[Any, dtype[_ScalarType]]: ... + @overload + def __new__( + subtype, + filename: str | bytes | os.PathLike[str] | os.PathLike[bytes] | _MemMapIOProtocol, + dtype: DTypeLike, + mode: _MemMapModeKind = ..., + offset: int = ..., + shape: None | int | Tuple[int, ...] = ..., + order: _OrderKACF = ..., + ) -> memmap[Any, dtype[Any]]: ... + def __array_finalize__(self, obj: memmap[Any, Any]) -> None: ... + def __array_wrap__( + self, + array: memmap[_ShapeType, _DType_co], + context: None | Tuple[ufunc, Tuple[Any, ...], int] = ..., + ) -> Any: ... + def flush(self) -> None: ... + +# TODO: Add a mypy plugin for managing functions whose output type is dependant +# on the literal value of some sort of signature (e.g. `einsum` and `vectorize`) +class vectorize: + pyfunc: Callable[..., Any] + cache: bool + signature: None | str + otypes: None | str + excluded: Set[int | str] + __doc__: None | str + def __init__( + self, + pyfunc: Callable[..., Any], + otypes: None | str | Iterable[DTypeLike] = ..., + doc: None | str = ..., + excluded: None | Iterable[int | str] = ..., + cache: bool = ..., + signature: None | str = ..., + ) -> None: ... + def __call__(self, *args: Any, **kwargs: Any) -> Any: ... + +class poly1d: + @property + def variable(self) -> str: ... + @property + def order(self) -> int: ... + @property + def o(self) -> int: ... + @property + def roots(self) -> NDArray[Any]: ... + @property + def r(self) -> NDArray[Any]: ... + + @property + def coeffs(self) -> NDArray[Any]: ... + @coeffs.setter + def coeffs(self, value: NDArray[Any]) -> None: ... + + @property + def c(self) -> NDArray[Any]: ... + @c.setter + def c(self, value: NDArray[Any]) -> None: ... + + @property + def coef(self) -> NDArray[Any]: ... + @coef.setter + def coef(self, value: NDArray[Any]) -> None: ... + + @property + def coefficients(self) -> NDArray[Any]: ... + @coefficients.setter + def coefficients(self, value: NDArray[Any]) -> None: ... + + __hash__: None # type: ignore + + @overload + def __array__(self, t: None = ...) -> NDArray[Any]: ... + @overload + def __array__(self, t: _DType) -> ndarray[Any, _DType]: ... + + @overload + def __call__(self, val: _ScalarLike_co) -> Any: ... + @overload + def __call__(self, val: poly1d) -> poly1d: ... + @overload + def __call__(self, val: ArrayLike) -> NDArray[Any]: ... + + def __init__( + self, + c_or_r: ArrayLike, + r: bool = ..., + variable: None | str = ..., + ) -> None: ... + def __len__(self) -> int: ... + def __neg__(self) -> poly1d: ... + def __pos__(self) -> poly1d: ... + def __mul__(self, other: ArrayLike) -> poly1d: ... + def __rmul__(self, other: ArrayLike) -> poly1d: ... + def __add__(self, other: ArrayLike) -> poly1d: ... + def __radd__(self, other: ArrayLike) -> poly1d: ... + def __pow__(self, val: _FloatLike_co) -> poly1d: ... # Integral floats are accepted + def __sub__(self, other: ArrayLike) -> poly1d: ... + def __rsub__(self, other: ArrayLike) -> poly1d: ... + def __div__(self, other: ArrayLike) -> poly1d: ... + def __truediv__(self, other: ArrayLike) -> poly1d: ... + def __rdiv__(self, other: ArrayLike) -> poly1d: ... + def __rtruediv__(self, other: ArrayLike) -> poly1d: ... + def __getitem__(self, val: int) -> Any: ... + def __setitem__(self, key: int, val: Any) -> None: ... + def __iter__(self) -> Iterator[Any]: ... + def deriv(self, m: SupportsInt | SupportsIndex = ...) -> poly1d: ... + def integ( + self, + m: SupportsInt | SupportsIndex = ..., + k: None | _ArrayLikeComplex_co | _ArrayLikeObject_co = ..., + ) -> poly1d: ... + +class matrix(ndarray[_ShapeType, _DType_co]): + __array_priority__: ClassVar[float] + def __new__( + subtype, + data: ArrayLike, + dtype: DTypeLike = ..., + copy: bool = ..., + ) -> matrix[Any, Any]: ... + def __array_finalize__(self, obj: NDArray[Any]) -> None: ... + + @overload + def __getitem__(self, key: Union[ + SupportsIndex, + _ArrayLikeInt_co, + Tuple[SupportsIndex | _ArrayLikeInt_co, ...], + ]) -> Any: ... + @overload + def __getitem__(self, key: Union[ + None, + slice, + ellipsis, + SupportsIndex, + _ArrayLikeInt_co, + Tuple[None | slice | ellipsis | _ArrayLikeInt_co | SupportsIndex, ...], + ]) -> matrix[Any, _DType_co]: ... + @overload + def __getitem__(self: NDArray[void], key: str) -> matrix[Any, dtype[Any]]: ... + @overload + def __getitem__(self: NDArray[void], key: list[str]) -> matrix[_ShapeType, dtype[void]]: ... + + def __mul__(self, other: ArrayLike) -> matrix[Any, Any]: ... + def __rmul__(self, other: ArrayLike) -> matrix[Any, Any]: ... + def __imul__(self, other: ArrayLike) -> matrix[_ShapeType, _DType_co]: ... + def __pow__(self, other: ArrayLike) -> matrix[Any, Any]: ... + def __ipow__(self, other: ArrayLike) -> matrix[_ShapeType, _DType_co]: ... + + @overload + def sum(self, axis: None = ..., dtype: DTypeLike = ..., out: None = ...) -> Any: ... + @overload + def sum(self, axis: _ShapeLike, dtype: DTypeLike = ..., out: None = ...) -> matrix[Any, Any]: ... + @overload + def sum(self, axis: None | _ShapeLike = ..., dtype: DTypeLike = ..., out: _NdArraySubClass = ...) -> _NdArraySubClass: ... + + @overload + def mean(self, axis: None = ..., dtype: DTypeLike = ..., out: None = ...) -> Any: ... + @overload + def mean(self, axis: _ShapeLike, dtype: DTypeLike = ..., out: None = ...) -> matrix[Any, Any]: ... + @overload + def mean(self, axis: None | _ShapeLike = ..., dtype: DTypeLike = ..., out: _NdArraySubClass = ...) -> _NdArraySubClass: ... + + @overload + def std(self, axis: None = ..., dtype: DTypeLike = ..., out: None = ..., ddof: float = ...) -> Any: ... + @overload + def std(self, axis: _ShapeLike, dtype: DTypeLike = ..., out: None = ..., ddof: float = ...) -> matrix[Any, Any]: ... + @overload + def std(self, axis: None | _ShapeLike = ..., dtype: DTypeLike = ..., out: _NdArraySubClass = ..., ddof: float = ...) -> _NdArraySubClass: ... + + @overload + def var(self, axis: None = ..., dtype: DTypeLike = ..., out: None = ..., ddof: float = ...) -> Any: ... + @overload + def var(self, axis: _ShapeLike, dtype: DTypeLike = ..., out: None = ..., ddof: float = ...) -> matrix[Any, Any]: ... + @overload + def var(self, axis: None | _ShapeLike = ..., dtype: DTypeLike = ..., out: _NdArraySubClass = ..., ddof: float = ...) -> _NdArraySubClass: ... + + @overload + def prod(self, axis: None = ..., dtype: DTypeLike = ..., out: None = ...) -> Any: ... + @overload + def prod(self, axis: _ShapeLike, dtype: DTypeLike = ..., out: None = ...) -> matrix[Any, Any]: ... + @overload + def prod(self, axis: None | _ShapeLike = ..., dtype: DTypeLike = ..., out: _NdArraySubClass = ...) -> _NdArraySubClass: ... + + @overload + def any(self, axis: None = ..., out: None = ...) -> bool_: ... + @overload + def any(self, axis: _ShapeLike, out: None = ...) -> matrix[Any, dtype[bool_]]: ... + @overload + def any(self, axis: None | _ShapeLike = ..., out: _NdArraySubClass = ...) -> _NdArraySubClass: ... + + @overload + def all(self, axis: None = ..., out: None = ...) -> bool_: ... + @overload + def all(self, axis: _ShapeLike, out: None = ...) -> matrix[Any, dtype[bool_]]: ... + @overload + def all(self, axis: None | _ShapeLike = ..., out: _NdArraySubClass = ...) -> _NdArraySubClass: ... + + @overload + def max(self: NDArray[_ScalarType], axis: None = ..., out: None = ...) -> _ScalarType: ... + @overload + def max(self, axis: _ShapeLike, out: None = ...) -> matrix[Any, _DType_co]: ... + @overload + def max(self, axis: None | _ShapeLike = ..., out: _NdArraySubClass = ...) -> _NdArraySubClass: ... + + @overload + def min(self: NDArray[_ScalarType], axis: None = ..., out: None = ...) -> _ScalarType: ... + @overload + def min(self, axis: _ShapeLike, out: None = ...) -> matrix[Any, _DType_co]: ... + @overload + def min(self, axis: None | _ShapeLike = ..., out: _NdArraySubClass = ...) -> _NdArraySubClass: ... + + @overload + def argmax(self: NDArray[_ScalarType], axis: None = ..., out: None = ...) -> intp: ... + @overload + def argmax(self, axis: _ShapeLike, out: None = ...) -> matrix[Any, dtype[intp]]: ... + @overload + def argmax(self, axis: None | _ShapeLike = ..., out: _NdArraySubClass = ...) -> _NdArraySubClass: ... + + @overload + def argmin(self: NDArray[_ScalarType], axis: None = ..., out: None = ...) -> intp: ... + @overload + def argmin(self, axis: _ShapeLike, out: None = ...) -> matrix[Any, dtype[intp]]: ... + @overload + def argmin(self, axis: None | _ShapeLike = ..., out: _NdArraySubClass = ...) -> _NdArraySubClass: ... + + @overload + def ptp(self: NDArray[_ScalarType], axis: None = ..., out: None = ...) -> _ScalarType: ... + @overload + def ptp(self, axis: _ShapeLike, out: None = ...) -> matrix[Any, _DType_co]: ... + @overload + def ptp(self, axis: None | _ShapeLike = ..., out: _NdArraySubClass = ...) -> _NdArraySubClass: ... + + def squeeze(self, axis: None | _ShapeLike = ...) -> matrix[Any, _DType_co]: ... + def tolist(self: matrix[Any, dtype[_SupportsItem[_T]]]) -> List[List[_T]]: ... # type: ignore[typevar] + def ravel(self, order: _OrderKACF = ...) -> matrix[Any, _DType_co]: ... + def flatten(self, order: _OrderKACF = ...) -> matrix[Any, _DType_co]: ... + + @property + def T(self) -> matrix[Any, _DType_co]: ... + @property + def I(self) -> matrix[Any, Any]: ... + @property + def A(self) -> ndarray[_ShapeType, _DType_co]: ... + @property + def A1(self) -> ndarray[Any, _DType_co]: ... + @property + def H(self) -> matrix[Any, _DType_co]: ... + def getT(self) -> matrix[Any, _DType_co]: ... + def getI(self) -> matrix[Any, Any]: ... + def getA(self) -> ndarray[_ShapeType, _DType_co]: ... + def getA1(self) -> ndarray[Any, _DType_co]: ... + def getH(self) -> matrix[Any, _DType_co]: ... + +_CharType = TypeVar("_CharType", str_, bytes_) +_CharDType = TypeVar("_CharDType", dtype[str_], dtype[bytes_]) +_CharArray = chararray[Any, dtype[_CharType]] + +class chararray(ndarray[_ShapeType, _CharDType]): + @overload + def __new__( + subtype, + shape: _ShapeLike, + itemsize: SupportsIndex | SupportsInt = ..., + unicode: L[False] = ..., + buffer: _SupportsBuffer = ..., + offset: SupportsIndex = ..., + strides: _ShapeLike = ..., + order: _OrderKACF = ..., + ) -> chararray[Any, dtype[bytes_]]: ... + @overload + def __new__( + subtype, + shape: _ShapeLike, + itemsize: SupportsIndex | SupportsInt = ..., + unicode: L[True] = ..., + buffer: _SupportsBuffer = ..., + offset: SupportsIndex = ..., + strides: _ShapeLike = ..., + order: _OrderKACF = ..., + ) -> chararray[Any, dtype[str_]]: ... + + def __array_finalize__(self, obj: NDArray[str_ | bytes_]) -> None: ... + def __mul__(self, other: _ArrayLikeInt_co) -> chararray[Any, _CharDType]: ... + def __rmul__(self, other: _ArrayLikeInt_co) -> chararray[Any, _CharDType]: ... + def __mod__(self, i: Any) -> chararray[Any, _CharDType]: ... + + @overload + def __eq__( + self: _CharArray[str_], + other: _ArrayLikeStr_co, + ) -> NDArray[bool_]: ... + @overload + def __eq__( + self: _CharArray[bytes_], + other: _ArrayLikeBytes_co, + ) -> NDArray[bool_]: ... + + @overload + def __ne__( + self: _CharArray[str_], + other: _ArrayLikeStr_co, + ) -> NDArray[bool_]: ... + @overload + def __ne__( + self: _CharArray[bytes_], + other: _ArrayLikeBytes_co, + ) -> NDArray[bool_]: ... + + @overload + def __ge__( + self: _CharArray[str_], + other: _ArrayLikeStr_co, + ) -> NDArray[bool_]: ... + @overload + def __ge__( + self: _CharArray[bytes_], + other: _ArrayLikeBytes_co, + ) -> NDArray[bool_]: ... + + @overload + def __le__( + self: _CharArray[str_], + other: _ArrayLikeStr_co, + ) -> NDArray[bool_]: ... + @overload + def __le__( + self: _CharArray[bytes_], + other: _ArrayLikeBytes_co, + ) -> NDArray[bool_]: ... + + @overload + def __gt__( + self: _CharArray[str_], + other: _ArrayLikeStr_co, + ) -> NDArray[bool_]: ... + @overload + def __gt__( + self: _CharArray[bytes_], + other: _ArrayLikeBytes_co, + ) -> NDArray[bool_]: ... + + @overload + def __lt__( + self: _CharArray[str_], + other: _ArrayLikeStr_co, + ) -> NDArray[bool_]: ... + @overload + def __lt__( + self: _CharArray[bytes_], + other: _ArrayLikeBytes_co, + ) -> NDArray[bool_]: ... + + @overload + def __add__( + self: _CharArray[str_], + other: _ArrayLikeStr_co, + ) -> _CharArray[str_]: ... + @overload + def __add__( + self: _CharArray[bytes_], + other: _ArrayLikeBytes_co, + ) -> _CharArray[bytes_]: ... + + @overload + def __radd__( + self: _CharArray[str_], + other: _ArrayLikeStr_co, + ) -> _CharArray[str_]: ... + @overload + def __radd__( + self: _CharArray[bytes_], + other: _ArrayLikeBytes_co, + ) -> _CharArray[bytes_]: ... + + @overload + def center( + self: _CharArray[str_], + width: _ArrayLikeInt_co, + fillchar: _ArrayLikeStr_co = ..., + ) -> _CharArray[str_]: ... + @overload + def center( + self: _CharArray[bytes_], + width: _ArrayLikeInt_co, + fillchar: _ArrayLikeBytes_co = ..., + ) -> _CharArray[bytes_]: ... + + @overload + def count( + self: _CharArray[str_], + sub: _ArrayLikeStr_co, + start: _ArrayLikeInt_co = ..., + end: None | _ArrayLikeInt_co = ..., + ) -> NDArray[int_]: ... + @overload + def count( + self: _CharArray[bytes_], + sub: _ArrayLikeBytes_co, + start: _ArrayLikeInt_co = ..., + end: None | _ArrayLikeInt_co = ..., + ) -> NDArray[int_]: ... + + def decode( + self: _CharArray[bytes_], + encoding: None | str = ..., + errors: None | str = ..., + ) -> _CharArray[str_]: ... + + def encode( + self: _CharArray[str_], + encoding: None | str = ..., + errors: None | str = ..., + ) -> _CharArray[bytes_]: ... + + @overload + def endswith( + self: _CharArray[str_], + suffix: _ArrayLikeStr_co, + start: _ArrayLikeInt_co = ..., + end: None | _ArrayLikeInt_co = ..., + ) -> NDArray[bool_]: ... + @overload + def endswith( + self: _CharArray[bytes_], + suffix: _ArrayLikeBytes_co, + start: _ArrayLikeInt_co = ..., + end: None | _ArrayLikeInt_co = ..., + ) -> NDArray[bool_]: ... + + def expandtabs( + self, + tabsize: _ArrayLikeInt_co = ..., + ) -> chararray[Any, _CharDType]: ... + + @overload + def find( + self: _CharArray[str_], + sub: _ArrayLikeStr_co, + start: _ArrayLikeInt_co = ..., + end: None | _ArrayLikeInt_co = ..., + ) -> NDArray[int_]: ... + @overload + def find( + self: _CharArray[bytes_], + sub: _ArrayLikeBytes_co, + start: _ArrayLikeInt_co = ..., + end: None | _ArrayLikeInt_co = ..., + ) -> NDArray[int_]: ... + + @overload + def index( + self: _CharArray[str_], + sub: _ArrayLikeStr_co, + start: _ArrayLikeInt_co = ..., + end: None | _ArrayLikeInt_co = ..., + ) -> NDArray[int_]: ... + @overload + def index( + self: _CharArray[bytes_], + sub: _ArrayLikeBytes_co, + start: _ArrayLikeInt_co = ..., + end: None | _ArrayLikeInt_co = ..., + ) -> NDArray[int_]: ... + + @overload + def join( + self: _CharArray[str_], + seq: _ArrayLikeStr_co, + ) -> _CharArray[str_]: ... + @overload + def join( + self: _CharArray[bytes_], + seq: _ArrayLikeBytes_co, + ) -> _CharArray[bytes_]: ... + + @overload + def ljust( + self: _CharArray[str_], + width: _ArrayLikeInt_co, + fillchar: _ArrayLikeStr_co = ..., + ) -> _CharArray[str_]: ... + @overload + def ljust( + self: _CharArray[bytes_], + width: _ArrayLikeInt_co, + fillchar: _ArrayLikeBytes_co = ..., + ) -> _CharArray[bytes_]: ... + + @overload + def lstrip( + self: _CharArray[str_], + chars: None | _ArrayLikeStr_co = ..., + ) -> _CharArray[str_]: ... + @overload + def lstrip( + self: _CharArray[bytes_], + chars: None | _ArrayLikeBytes_co = ..., + ) -> _CharArray[bytes_]: ... + + @overload + def partition( + self: _CharArray[str_], + sep: _ArrayLikeStr_co, + ) -> _CharArray[str_]: ... + @overload + def partition( + self: _CharArray[bytes_], + sep: _ArrayLikeBytes_co, + ) -> _CharArray[bytes_]: ... + + @overload + def replace( + self: _CharArray[str_], + old: _ArrayLikeStr_co, + new: _ArrayLikeStr_co, + count: None | _ArrayLikeInt_co = ..., + ) -> _CharArray[str_]: ... + @overload + def replace( + self: _CharArray[bytes_], + old: _ArrayLikeBytes_co, + new: _ArrayLikeBytes_co, + count: None | _ArrayLikeInt_co = ..., + ) -> _CharArray[bytes_]: ... + + @overload + def rfind( + self: _CharArray[str_], + sub: _ArrayLikeStr_co, + start: _ArrayLikeInt_co = ..., + end: None | _ArrayLikeInt_co = ..., + ) -> NDArray[int_]: ... + @overload + def rfind( + self: _CharArray[bytes_], + sub: _ArrayLikeBytes_co, + start: _ArrayLikeInt_co = ..., + end: None | _ArrayLikeInt_co = ..., + ) -> NDArray[int_]: ... + + @overload + def rindex( + self: _CharArray[str_], + sub: _ArrayLikeStr_co, + start: _ArrayLikeInt_co = ..., + end: None | _ArrayLikeInt_co = ..., + ) -> NDArray[int_]: ... + @overload + def rindex( + self: _CharArray[bytes_], + sub: _ArrayLikeBytes_co, + start: _ArrayLikeInt_co = ..., + end: None | _ArrayLikeInt_co = ..., + ) -> NDArray[int_]: ... + + @overload + def rjust( + self: _CharArray[str_], + width: _ArrayLikeInt_co, + fillchar: _ArrayLikeStr_co = ..., + ) -> _CharArray[str_]: ... + @overload + def rjust( + self: _CharArray[bytes_], + width: _ArrayLikeInt_co, + fillchar: _ArrayLikeBytes_co = ..., + ) -> _CharArray[bytes_]: ... + + @overload + def rpartition( + self: _CharArray[str_], + sep: _ArrayLikeStr_co, + ) -> _CharArray[str_]: ... + @overload + def rpartition( + self: _CharArray[bytes_], + sep: _ArrayLikeBytes_co, + ) -> _CharArray[bytes_]: ... + + @overload + def rsplit( + self: _CharArray[str_], + sep: None | _ArrayLikeStr_co = ..., + maxsplit: None | _ArrayLikeInt_co = ..., + ) -> NDArray[object_]: ... + @overload + def rsplit( + self: _CharArray[bytes_], + sep: None | _ArrayLikeBytes_co = ..., + maxsplit: None | _ArrayLikeInt_co = ..., + ) -> NDArray[object_]: ... + + @overload + def rstrip( + self: _CharArray[str_], + chars: None | _ArrayLikeStr_co = ..., + ) -> _CharArray[str_]: ... + @overload + def rstrip( + self: _CharArray[bytes_], + chars: None | _ArrayLikeBytes_co = ..., + ) -> _CharArray[bytes_]: ... + + @overload + def split( + self: _CharArray[str_], + sep: None | _ArrayLikeStr_co = ..., + maxsplit: None | _ArrayLikeInt_co = ..., + ) -> NDArray[object_]: ... + @overload + def split( + self: _CharArray[bytes_], + sep: None | _ArrayLikeBytes_co = ..., + maxsplit: None | _ArrayLikeInt_co = ..., + ) -> NDArray[object_]: ... + + def splitlines(self, keepends: None | _ArrayLikeBool_co = ...) -> NDArray[object_]: ... + + @overload + def startswith( + self: _CharArray[str_], + prefix: _ArrayLikeStr_co, + start: _ArrayLikeInt_co = ..., + end: None | _ArrayLikeInt_co = ..., + ) -> NDArray[bool_]: ... + @overload + def startswith( + self: _CharArray[bytes_], + prefix: _ArrayLikeBytes_co, + start: _ArrayLikeInt_co = ..., + end: None | _ArrayLikeInt_co = ..., + ) -> NDArray[bool_]: ... + + @overload + def strip( + self: _CharArray[str_], + chars: None | _ArrayLikeStr_co = ..., + ) -> _CharArray[str_]: ... + @overload + def strip( + self: _CharArray[bytes_], + chars: None | _ArrayLikeBytes_co = ..., + ) -> _CharArray[bytes_]: ... + + @overload + def translate( + self: _CharArray[str_], + table: _ArrayLikeStr_co, + deletechars: None | _ArrayLikeStr_co = ..., + ) -> _CharArray[str_]: ... + @overload + def translate( + self: _CharArray[bytes_], + table: _ArrayLikeBytes_co, + deletechars: None | _ArrayLikeBytes_co = ..., + ) -> _CharArray[bytes_]: ... + + def zfill(self, width: _ArrayLikeInt_co) -> chararray[Any, _CharDType]: ... + def capitalize(self) -> chararray[_ShapeType, _CharDType]: ... + def title(self) -> chararray[_ShapeType, _CharDType]: ... + def swapcase(self) -> chararray[_ShapeType, _CharDType]: ... + def lower(self) -> chararray[_ShapeType, _CharDType]: ... + def upper(self) -> chararray[_ShapeType, _CharDType]: ... + def isalnum(self) -> ndarray[_ShapeType, dtype[bool_]]: ... + def isalpha(self) -> ndarray[_ShapeType, dtype[bool_]]: ... + def isdigit(self) -> ndarray[_ShapeType, dtype[bool_]]: ... + def islower(self) -> ndarray[_ShapeType, dtype[bool_]]: ... + def isspace(self) -> ndarray[_ShapeType, dtype[bool_]]: ... + def istitle(self) -> ndarray[_ShapeType, dtype[bool_]]: ... + def isupper(self) -> ndarray[_ShapeType, dtype[bool_]]: ... + def isnumeric(self) -> ndarray[_ShapeType, dtype[bool_]]: ... + def isdecimal(self) -> ndarray[_ShapeType, dtype[bool_]]: ... + +# NOTE: Deprecated +# class MachAr: ... + +class _SupportsDLPack(Protocol[_T_contra]): + def __dlpack__(self, *, stream: None | _T_contra = ...) -> _PyCapsule: ... + +def _from_dlpack(__obj: _SupportsDLPack[None]) -> NDArray[Any]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/__pycache__/__config__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/__pycache__/__config__.cpython-38.pyc new file mode 100644 index 0000000..ce30af4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/__pycache__/__config__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..bfd389d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/__pycache__/_distributor_init.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/__pycache__/_distributor_init.cpython-38.pyc new file mode 100644 index 0000000..e76fa14 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/__pycache__/_distributor_init.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/__pycache__/_globals.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/__pycache__/_globals.cpython-38.pyc new file mode 100644 index 0000000..3881408 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/__pycache__/_globals.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/__pycache__/_pytesttester.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/__pycache__/_pytesttester.cpython-38.pyc new file mode 100644 index 0000000..db6200d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/__pycache__/_pytesttester.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/__pycache__/_version.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/__pycache__/_version.cpython-38.pyc new file mode 100644 index 0000000..cb2099e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/__pycache__/_version.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..121e7bd Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/__pycache__/ctypeslib.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/__pycache__/ctypeslib.cpython-38.pyc new file mode 100644 index 0000000..867e286 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/__pycache__/ctypeslib.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/__pycache__/dual.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/__pycache__/dual.cpython-38.pyc new file mode 100644 index 0000000..7b33d72 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/__pycache__/dual.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/__pycache__/matlib.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/__pycache__/matlib.cpython-38.pyc new file mode 100644 index 0000000..5fc34cb Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/__pycache__/matlib.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..75fdb14 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/__pycache__/version.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/__pycache__/version.cpython-38.pyc new file mode 100644 index 0000000..8f23a87 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/__pycache__/version.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/_distributor_init.py b/.local/lib/python3.8/site-packages/numpy/_distributor_init.py new file mode 100644 index 0000000..d893ba3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/_distributor_init.py @@ -0,0 +1,10 @@ +""" Distributor init file + +Distributors: you can add custom code here to support particular distributions +of numpy. + +For example, this is a good place to put any checks for hardware requirements. + +The numpy standard source distribution will not put code in this file, so you +can safely replace this file with your own version. +""" diff --git a/.local/lib/python3.8/site-packages/numpy/_globals.py b/.local/lib/python3.8/site-packages/numpy/_globals.py new file mode 100644 index 0000000..c888747 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/_globals.py @@ -0,0 +1,129 @@ +""" +Module defining global singleton classes. + +This module raises a RuntimeError if an attempt to reload it is made. In that +way the identities of the classes defined here are fixed and will remain so +even if numpy itself is reloaded. In particular, a function like the following +will still work correctly after numpy is reloaded:: + + def foo(arg=np._NoValue): + if arg is np._NoValue: + ... + +That was not the case when the singleton classes were defined in the numpy +``__init__.py`` file. See gh-7844 for a discussion of the reload problem that +motivated this module. + +""" +import enum + +__ALL__ = [ + 'ModuleDeprecationWarning', 'VisibleDeprecationWarning', + '_NoValue', '_CopyMode' + ] + + +# Disallow reloading this module so as to preserve the identities of the +# classes defined here. +if '_is_loaded' in globals(): + raise RuntimeError('Reloading numpy._globals is not allowed') +_is_loaded = True + + +class ModuleDeprecationWarning(DeprecationWarning): + """Module deprecation warning. + + The nose tester turns ordinary Deprecation warnings into test failures. + That makes it hard to deprecate whole modules, because they get + imported by default. So this is a special Deprecation warning that the + nose tester will let pass without making tests fail. + + """ + + +ModuleDeprecationWarning.__module__ = 'numpy' + + +class VisibleDeprecationWarning(UserWarning): + """Visible deprecation warning. + + By default, python will not show deprecation warnings, so this class + can be used when a very visible warning is helpful, for example because + the usage is most likely a user bug. + + """ + + +VisibleDeprecationWarning.__module__ = 'numpy' + + +class _NoValueType: + """Special keyword value. + + The instance of this class may be used as the default value assigned to a + keyword if no other obvious default (e.g., `None`) is suitable, + + Common reasons for using this keyword are: + + - A new keyword is added to a function, and that function forwards its + inputs to another function or method which can be defined outside of + NumPy. For example, ``np.std(x)`` calls ``x.std``, so when a ``keepdims`` + keyword was added that could only be forwarded if the user explicitly + specified ``keepdims``; downstream array libraries may not have added + the same keyword, so adding ``x.std(..., keepdims=keepdims)`` + unconditionally could have broken previously working code. + - A keyword is being deprecated, and a deprecation warning must only be + emitted when the keyword is used. + + """ + __instance = None + def __new__(cls): + # ensure that only one instance exists + if not cls.__instance: + cls.__instance = super().__new__(cls) + return cls.__instance + + # needed for python 2 to preserve identity through a pickle + def __reduce__(self): + return (self.__class__, ()) + + def __repr__(self): + return "" + + +_NoValue = _NoValueType() + + +class _CopyMode(enum.Enum): + """ + An enumeration for the copy modes supported + by numpy.copy() and numpy.array(). The following three modes are supported, + + - ALWAYS: This means that a deep copy of the input + array will always be taken. + - IF_NEEDED: This means that a deep copy of the input + array will be taken only if necessary. + - NEVER: This means that the deep copy will never be taken. + If a copy cannot be avoided then a `ValueError` will be + raised. + + Note that the buffer-protocol could in theory do copies. NumPy currently + assumes an object exporting the buffer protocol will never do this. + """ + + ALWAYS = True + IF_NEEDED = False + NEVER = 2 + + def __bool__(self): + # For backwards compatiblity + if self == _CopyMode.ALWAYS: + return True + + if self == _CopyMode.IF_NEEDED: + return False + + raise ValueError(f"{self} is neither True nor False.") + + +_CopyMode.__module__ = 'numpy' diff --git a/.local/lib/python3.8/site-packages/numpy/_pytesttester.py b/.local/lib/python3.8/site-packages/numpy/_pytesttester.py new file mode 100644 index 0000000..8decb9d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/_pytesttester.py @@ -0,0 +1,208 @@ +""" +Pytest test running. + +This module implements the ``test()`` function for NumPy modules. The usual +boiler plate for doing that is to put the following in the module +``__init__.py`` file:: + + from numpy._pytesttester import PytestTester + test = PytestTester(__name__) + del PytestTester + + +Warnings filtering and other runtime settings should be dealt with in the +``pytest.ini`` file in the numpy repo root. The behavior of the test depends on +whether or not that file is found as follows: + +* ``pytest.ini`` is present (develop mode) + All warnings except those explicitly filtered out are raised as error. +* ``pytest.ini`` is absent (release mode) + DeprecationWarnings and PendingDeprecationWarnings are ignored, other + warnings are passed through. + +In practice, tests run from the numpy repo are run in develop mode. That +includes the standard ``python runtests.py`` invocation. + +This module is imported by every numpy subpackage, so lies at the top level to +simplify circular import issues. For the same reason, it contains no numpy +imports at module scope, instead importing numpy within function calls. +""" +import sys +import os + +__all__ = ['PytestTester'] + + + +def _show_numpy_info(): + import numpy as np + + print("NumPy version %s" % np.__version__) + relaxed_strides = np.ones((10, 1), order="C").flags.f_contiguous + print("NumPy relaxed strides checking option:", relaxed_strides) + info = np.lib.utils._opt_info() + print("NumPy CPU features: ", (info if info else 'nothing enabled')) + + + +class PytestTester: + """ + Pytest test runner. + + A test function is typically added to a package's __init__.py like so:: + + from numpy._pytesttester import PytestTester + test = PytestTester(__name__).test + del PytestTester + + Calling this test function finds and runs all tests associated with the + module and all its sub-modules. + + Attributes + ---------- + module_name : str + Full path to the package to test. + + Parameters + ---------- + module_name : module name + The name of the module to test. + + Notes + ----- + Unlike the previous ``nose``-based implementation, this class is not + publicly exposed as it performs some ``numpy``-specific warning + suppression. + + """ + def __init__(self, module_name): + self.module_name = module_name + + def __call__(self, label='fast', verbose=1, extra_argv=None, + doctests=False, coverage=False, durations=-1, tests=None): + """ + Run tests for module using pytest. + + Parameters + ---------- + label : {'fast', 'full'}, optional + Identifies the tests to run. When set to 'fast', tests decorated + with `pytest.mark.slow` are skipped, when 'full', the slow marker + is ignored. + verbose : int, optional + Verbosity value for test outputs, in the range 1-3. Default is 1. + extra_argv : list, optional + List with any extra arguments to pass to pytests. + doctests : bool, optional + .. note:: Not supported + coverage : bool, optional + If True, report coverage of NumPy code. Default is False. + Requires installation of (pip) pytest-cov. + durations : int, optional + If < 0, do nothing, If 0, report time of all tests, if > 0, + report the time of the slowest `timer` tests. Default is -1. + tests : test or list of tests + Tests to be executed with pytest '--pyargs' + + Returns + ------- + result : bool + Return True on success, false otherwise. + + Notes + ----- + Each NumPy module exposes `test` in its namespace to run all tests for + it. For example, to run all tests for numpy.lib: + + >>> np.lib.test() #doctest: +SKIP + + Examples + -------- + >>> result = np.lib.test() #doctest: +SKIP + ... + 1023 passed, 2 skipped, 6 deselected, 1 xfailed in 10.39 seconds + >>> result + True + + """ + import pytest + import warnings + + module = sys.modules[self.module_name] + module_path = os.path.abspath(module.__path__[0]) + + # setup the pytest arguments + pytest_args = ["-l"] + + # offset verbosity. The "-q" cancels a "-v". + pytest_args += ["-q"] + + with warnings.catch_warnings(): + warnings.simplefilter("always") + # Filter out distutils cpu warnings (could be localized to + # distutils tests). ASV has problems with top level import, + # so fetch module for suppression here. + from numpy.distutils import cpuinfo + + with warnings.catch_warnings(record=True): + # Ignore the warning from importing the array_api submodule. This + # warning is done on import, so it would break pytest collection, + # but importing it early here prevents the warning from being + # issued when it imported again. + import numpy.array_api + + # Filter out annoying import messages. Want these in both develop and + # release mode. + pytest_args += [ + "-W ignore:Not importing directory", + "-W ignore:numpy.dtype size changed", + "-W ignore:numpy.ufunc size changed", + "-W ignore::UserWarning:cpuinfo", + ] + + # When testing matrices, ignore their PendingDeprecationWarnings + pytest_args += [ + "-W ignore:the matrix subclass is not", + "-W ignore:Importing from numpy.matlib is", + ] + + if doctests: + raise ValueError("Doctests not supported") + + if extra_argv: + pytest_args += list(extra_argv) + + if verbose > 1: + pytest_args += ["-" + "v"*(verbose - 1)] + + if coverage: + pytest_args += ["--cov=" + module_path] + + if label == "fast": + # not importing at the top level to avoid circular import of module + from numpy.testing import IS_PYPY + if IS_PYPY: + pytest_args += ["-m", "not slow and not slow_pypy"] + else: + pytest_args += ["-m", "not slow"] + + elif label != "full": + pytest_args += ["-m", label] + + if durations >= 0: + pytest_args += ["--durations=%s" % durations] + + if tests is None: + tests = [self.module_name] + + pytest_args += ["--pyargs"] + list(tests) + + # run tests. + _show_numpy_info() + + try: + code = pytest.main(pytest_args) + except SystemExit as exc: + code = exc.code + + return code == 0 diff --git a/.local/lib/python3.8/site-packages/numpy/_pytesttester.pyi b/.local/lib/python3.8/site-packages/numpy/_pytesttester.pyi new file mode 100644 index 0000000..0be64b3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/_pytesttester.pyi @@ -0,0 +1,17 @@ +from typing import List, Iterable, Literal as L + +__all__: List[str] + +class PytestTester: + module_name: str + def __init__(self, module_name: str) -> None: ... + def __call__( + self, + label: L["fast", "full"] = ..., + verbose: int = ..., + extra_argv: None | Iterable[str] = ..., + doctests: L[False] = ..., + coverage: bool = ..., + durations: int = ..., + tests: None | Iterable[str] = ..., + ) -> bool: ... diff --git a/.local/lib/python3.8/site-packages/numpy/_version.py b/.local/lib/python3.8/site-packages/numpy/_version.py new file mode 100644 index 0000000..4272f36 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/_version.py @@ -0,0 +1,21 @@ + +# This file was generated by 'versioneer.py' (0.19) from +# revision-control system data, or from the parent directory name of an +# unpacked source archive. Distribution tarballs contain a pre-generated copy +# of this file. + +import json + +version_json = ''' +{ + "date": "2022-03-07T12:27:54-0700", + "dirty": false, + "error": null, + "full-revisionid": "7d4349e332fcba2bc3f266267421531b3ec5d3e6", + "version": "1.22.3" +} +''' # END VERSION_JSON + + +def get_versions(): + return json.loads(version_json) diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__init__.py b/.local/lib/python3.8/site-packages/numpy/array_api/__init__.py new file mode 100644 index 0000000..bbe2fdc --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/__init__.py @@ -0,0 +1,375 @@ +""" +A NumPy sub-namespace that conforms to the Python array API standard. + +This submodule accompanies NEP 47, which proposes its inclusion in NumPy. It +is still considered experimental, and will issue a warning when imported. + +This is a proof-of-concept namespace that wraps the corresponding NumPy +functions to give a conforming implementation of the Python array API standard +(https://data-apis.github.io/array-api/latest/). The standard is currently in +an RFC phase and comments on it are both welcome and encouraged. Comments +should be made either at https://github.com/data-apis/array-api or at +https://github.com/data-apis/consortium-feedback/discussions. + +NumPy already follows the proposed spec for the most part, so this module +serves mostly as a thin wrapper around it. However, NumPy also implements a +lot of behavior that is not included in the spec, so this serves as a +restricted subset of the API. Only those functions that are part of the spec +are included in this namespace, and all functions are given with the exact +signature given in the spec, including the use of position-only arguments, and +omitting any extra keyword arguments implemented by NumPy but not part of the +spec. The behavior of some functions is also modified from the NumPy behavior +to conform to the standard. Note that the underlying array object itself is +wrapped in a wrapper Array() class, but is otherwise unchanged. This submodule +is implemented in pure Python with no C extensions. + +The array API spec is designed as a "minimal API subset" and explicitly allows +libraries to include behaviors not specified by it. But users of this module +that intend to write portable code should be aware that only those behaviors +that are listed in the spec are guaranteed to be implemented across libraries. +Consequently, the NumPy implementation was chosen to be both conforming and +minimal, so that users can use this implementation of the array API namespace +and be sure that behaviors that it defines will be available in conforming +namespaces from other libraries. + +A few notes about the current state of this submodule: + +- There is a test suite that tests modules against the array API standard at + https://github.com/data-apis/array-api-tests. The test suite is still a work + in progress, but the existing tests pass on this module, with a few + exceptions: + + - DLPack support (see https://github.com/data-apis/array-api/pull/106) is + not included here, as it requires a full implementation in NumPy proper + first. + + The test suite is not yet complete, and even the tests that exist are not + guaranteed to give a comprehensive coverage of the spec. Therefore, when + reviewing and using this submodule, you should refer to the standard + documents themselves. There are some tests in numpy.array_api.tests, but + they primarily focus on things that are not tested by the official array API + test suite. + +- There is a custom array object, numpy.array_api.Array, which is returned by + all functions in this module. All functions in the array API namespace + implicitly assume that they will only receive this object as input. The only + way to create instances of this object is to use one of the array creation + functions. It does not have a public constructor on the object itself. The + object is a small wrapper class around numpy.ndarray. The main purpose of it + is to restrict the namespace of the array object to only those dtypes and + only those methods that are required by the spec, as well as to limit/change + certain behavior that differs in the spec. In particular: + + - The array API namespace does not have scalar objects, only 0-D arrays. + Operations on Array that would create a scalar in NumPy create a 0-D + array. + + - Indexing: Only a subset of indices supported by NumPy are required by the + spec. The Array object restricts indexing to only allow those types of + indices that are required by the spec. See the docstring of the + numpy.array_api.Array._validate_indices helper function for more + information. + + - Type promotion: Some type promotion rules are different in the spec. In + particular, the spec does not have any value-based casting. The spec also + does not require cross-kind casting, like integer -> floating-point. Only + those promotions that are explicitly required by the array API + specification are allowed in this module. See NEP 47 for more info. + + - Functions do not automatically call asarray() on their input, and will not + work if the input type is not Array. The exception is array creation + functions, and Python operators on the Array object, which accept Python + scalars of the same type as the array dtype. + +- All functions include type annotations, corresponding to those given in the + spec (see _typing.py for definitions of some custom types). These do not + currently fully pass mypy due to some limitations in mypy. + +- Dtype objects are just the NumPy dtype objects, e.g., float64 = + np.dtype('float64'). The spec does not require any behavior on these dtype + objects other than that they be accessible by name and be comparable by + equality, but it was considered too much extra complexity to create custom + objects to represent dtypes. + +- All places where the implementations in this submodule are known to deviate + from their corresponding functions in NumPy are marked with "# Note:" + comments. + +Still TODO in this module are: + +- DLPack support for numpy.ndarray is still in progress. See + https://github.com/numpy/numpy/pull/19083. + +- The copy=False keyword argument to asarray() is not yet implemented. This + requires support in numpy.asarray() first. + +- Some functions are not yet fully tested in the array API test suite, and may + require updates that are not yet known until the tests are written. + +- The spec is still in an RFC phase and may still have minor updates, which + will need to be reflected here. + +- Complex number support in array API spec is planned but not yet finalized, + as are the fft extension and certain linear algebra functions such as eig + that require complex dtypes. + +""" + +import warnings + +warnings.warn( + "The numpy.array_api submodule is still experimental. See NEP 47.", stacklevel=2 +) + +__all__ = [] + +from ._constants import e, inf, nan, pi + +__all__ += ["e", "inf", "nan", "pi"] + +from ._creation_functions import ( + asarray, + arange, + empty, + empty_like, + eye, + from_dlpack, + full, + full_like, + linspace, + meshgrid, + ones, + ones_like, + tril, + triu, + zeros, + zeros_like, +) + +__all__ += [ + "asarray", + "arange", + "empty", + "empty_like", + "eye", + "from_dlpack", + "full", + "full_like", + "linspace", + "meshgrid", + "ones", + "ones_like", + "tril", + "triu", + "zeros", + "zeros_like", +] + +from ._data_type_functions import ( + astype, + broadcast_arrays, + broadcast_to, + can_cast, + finfo, + iinfo, + result_type, +) + +__all__ += [ + "astype", + "broadcast_arrays", + "broadcast_to", + "can_cast", + "finfo", + "iinfo", + "result_type", +] + +from ._dtypes import ( + int8, + int16, + int32, + int64, + uint8, + uint16, + uint32, + uint64, + float32, + float64, + bool, +) + +__all__ += [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float32", + "float64", + "bool", +] + +from ._elementwise_functions import ( + abs, + acos, + acosh, + add, + asin, + asinh, + atan, + atan2, + atanh, + bitwise_and, + bitwise_left_shift, + bitwise_invert, + bitwise_or, + bitwise_right_shift, + bitwise_xor, + ceil, + cos, + cosh, + divide, + equal, + exp, + expm1, + floor, + floor_divide, + greater, + greater_equal, + isfinite, + isinf, + isnan, + less, + less_equal, + log, + log1p, + log2, + log10, + logaddexp, + logical_and, + logical_not, + logical_or, + logical_xor, + multiply, + negative, + not_equal, + positive, + pow, + remainder, + round, + sign, + sin, + sinh, + square, + sqrt, + subtract, + tan, + tanh, + trunc, +) + +__all__ += [ + "abs", + "acos", + "acosh", + "add", + "asin", + "asinh", + "atan", + "atan2", + "atanh", + "bitwise_and", + "bitwise_left_shift", + "bitwise_invert", + "bitwise_or", + "bitwise_right_shift", + "bitwise_xor", + "ceil", + "cos", + "cosh", + "divide", + "equal", + "exp", + "expm1", + "floor", + "floor_divide", + "greater", + "greater_equal", + "isfinite", + "isinf", + "isnan", + "less", + "less_equal", + "log", + "log1p", + "log2", + "log10", + "logaddexp", + "logical_and", + "logical_not", + "logical_or", + "logical_xor", + "multiply", + "negative", + "not_equal", + "positive", + "pow", + "remainder", + "round", + "sign", + "sin", + "sinh", + "square", + "sqrt", + "subtract", + "tan", + "tanh", + "trunc", +] + +# linalg is an extension in the array API spec, which is a sub-namespace. Only +# a subset of functions in it are imported into the top-level namespace. +from . import linalg + +__all__ += ["linalg"] + +from .linalg import matmul, tensordot, matrix_transpose, vecdot + +__all__ += ["matmul", "tensordot", "matrix_transpose", "vecdot"] + +from ._manipulation_functions import ( + concat, + expand_dims, + flip, + permute_dims, + reshape, + roll, + squeeze, + stack, +) + +__all__ += ["concat", "expand_dims", "flip", "permute_dims", "reshape", "roll", "squeeze", "stack"] + +from ._searching_functions import argmax, argmin, nonzero, where + +__all__ += ["argmax", "argmin", "nonzero", "where"] + +from ._set_functions import unique_all, unique_counts, unique_inverse, unique_values + +__all__ += ["unique_all", "unique_counts", "unique_inverse", "unique_values"] + +from ._sorting_functions import argsort, sort + +__all__ += ["argsort", "sort"] + +from ._statistical_functions import max, mean, min, prod, std, sum, var + +__all__ += ["max", "mean", "min", "prod", "std", "sum", "var"] + +from ._utility_functions import all, any + +__all__ += ["all", "any"] diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..f0bd17d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_array_object.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_array_object.cpython-38.pyc new file mode 100644 index 0000000..1a55a6d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_array_object.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_constants.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_constants.cpython-38.pyc new file mode 100644 index 0000000..21af8dd Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_constants.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_creation_functions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_creation_functions.cpython-38.pyc new file mode 100644 index 0000000..87a1827 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_creation_functions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_data_type_functions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_data_type_functions.cpython-38.pyc new file mode 100644 index 0000000..376252c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_data_type_functions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_dtypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_dtypes.cpython-38.pyc new file mode 100644 index 0000000..10bc553 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_dtypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_elementwise_functions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_elementwise_functions.cpython-38.pyc new file mode 100644 index 0000000..532e8c2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_elementwise_functions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_manipulation_functions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_manipulation_functions.cpython-38.pyc new file mode 100644 index 0000000..e462500 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_manipulation_functions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_searching_functions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_searching_functions.cpython-38.pyc new file mode 100644 index 0000000..5590338 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_searching_functions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_set_functions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_set_functions.cpython-38.pyc new file mode 100644 index 0000000..f60f148 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_set_functions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_sorting_functions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_sorting_functions.cpython-38.pyc new file mode 100644 index 0000000..ea8ac2e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_sorting_functions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_statistical_functions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_statistical_functions.cpython-38.pyc new file mode 100644 index 0000000..b2b272b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_statistical_functions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_typing.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_typing.cpython-38.pyc new file mode 100644 index 0000000..7a35f18 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_typing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_utility_functions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_utility_functions.cpython-38.pyc new file mode 100644 index 0000000..496ba1c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/_utility_functions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/linalg.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/linalg.cpython-38.pyc new file mode 100644 index 0000000..3b9a114 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/linalg.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..f591f04 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/_array_object.py b/.local/lib/python3.8/site-packages/numpy/array_api/_array_object.py new file mode 100644 index 0000000..00ffe71 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/_array_object.py @@ -0,0 +1,1086 @@ +""" +Wrapper class around the ndarray object for the array API standard. + +The array API standard defines some behaviors differently than ndarray, in +particular, type promotion rules are different (the standard has no +value-based casting). The standard also specifies a more limited subset of +array methods and functionalities than are implemented on ndarray. Since the +goal of the array_api namespace is to be a minimal implementation of the array +API standard, we need to define a separate wrapper class for the array_api +namespace. + +The standard compliant class is only a wrapper class. It is *not* a subclass +of ndarray. +""" + +from __future__ import annotations + +import operator +from enum import IntEnum +from ._creation_functions import asarray +from ._dtypes import ( + _all_dtypes, + _boolean_dtypes, + _integer_dtypes, + _integer_or_boolean_dtypes, + _floating_dtypes, + _numeric_dtypes, + _result_type, + _dtype_categories, +) + +from typing import TYPE_CHECKING, Optional, Tuple, Union, Any +import types + +if TYPE_CHECKING: + from ._typing import Any, PyCapsule, Device, Dtype + import numpy.typing as npt + +import numpy as np + +from numpy import array_api + + +class Array: + """ + n-d array object for the array API namespace. + + See the docstring of :py:obj:`np.ndarray ` for more + information. + + This is a wrapper around numpy.ndarray that restricts the usage to only + those things that are required by the array API namespace. Note, + attributes on this object that start with a single underscore are not part + of the API specification and should only be used internally. This object + should not be constructed directly. Rather, use one of the creation + functions, such as asarray(). + + """ + + # Use a custom constructor instead of __init__, as manually initializing + # this class is not supported API. + @classmethod + def _new(cls, x, /): + """ + This is a private method for initializing the array API Array + object. + + Functions outside of the array_api submodule should not use this + method. Use one of the creation functions instead, such as + ``asarray``. + + """ + obj = super().__new__(cls) + # Note: The spec does not have array scalars, only 0-D arrays. + if isinstance(x, np.generic): + # Convert the array scalar to a 0-D array + x = np.asarray(x) + if x.dtype not in _all_dtypes: + raise TypeError( + f"The array_api namespace does not support the dtype '{x.dtype}'" + ) + obj._array = x + return obj + + # Prevent Array() from working + def __new__(cls, *args, **kwargs): + raise TypeError( + "The array_api Array object should not be instantiated directly. Use an array creation function, such as asarray(), instead." + ) + + # These functions are not required by the spec, but are implemented for + # the sake of usability. + + def __str__(self: Array, /) -> str: + """ + Performs the operation __str__. + """ + return self._array.__str__().replace("array", "Array") + + def __repr__(self: Array, /) -> str: + """ + Performs the operation __repr__. + """ + suffix = f", dtype={self.dtype.name})" + if 0 in self.shape: + prefix = "empty(" + mid = str(self.shape) + else: + prefix = "Array(" + mid = np.array2string(self._array, separator=', ', prefix=prefix, suffix=suffix) + return prefix + mid + suffix + + # This function is not required by the spec, but we implement it here for + # convenience so that np.asarray(np.array_api.Array) will work. + def __array__(self, dtype: None | np.dtype[Any] = None) -> npt.NDArray[Any]: + """ + Warning: this method is NOT part of the array API spec. Implementers + of other libraries need not include it, and users should not assume it + will be present in other implementations. + + """ + return np.asarray(self._array, dtype=dtype) + + # These are various helper functions to make the array behavior match the + # spec in places where it either deviates from or is more strict than + # NumPy behavior + + def _check_allowed_dtypes(self, other, dtype_category, op): + """ + Helper function for operators to only allow specific input dtypes + + Use like + + other = self._check_allowed_dtypes(other, 'numeric', '__add__') + if other is NotImplemented: + return other + """ + + if self.dtype not in _dtype_categories[dtype_category]: + raise TypeError(f"Only {dtype_category} dtypes are allowed in {op}") + if isinstance(other, (int, float, bool)): + other = self._promote_scalar(other) + elif isinstance(other, Array): + if other.dtype not in _dtype_categories[dtype_category]: + raise TypeError(f"Only {dtype_category} dtypes are allowed in {op}") + else: + return NotImplemented + + # This will raise TypeError for type combinations that are not allowed + # to promote in the spec (even if the NumPy array operator would + # promote them). + res_dtype = _result_type(self.dtype, other.dtype) + if op.startswith("__i"): + # Note: NumPy will allow in-place operators in some cases where + # the type promoted operator does not match the left-hand side + # operand. For example, + + # >>> a = np.array(1, dtype=np.int8) + # >>> a += np.array(1, dtype=np.int16) + + # The spec explicitly disallows this. + if res_dtype != self.dtype: + raise TypeError( + f"Cannot perform {op} with dtypes {self.dtype} and {other.dtype}" + ) + + return other + + # Helper function to match the type promotion rules in the spec + def _promote_scalar(self, scalar): + """ + Returns a promoted version of a Python scalar appropriate for use with + operations on self. + + This may raise an OverflowError in cases where the scalar is an + integer that is too large to fit in a NumPy integer dtype, or + TypeError when the scalar type is incompatible with the dtype of self. + """ + if isinstance(scalar, bool): + if self.dtype not in _boolean_dtypes: + raise TypeError( + "Python bool scalars can only be promoted with bool arrays" + ) + elif isinstance(scalar, int): + if self.dtype in _boolean_dtypes: + raise TypeError( + "Python int scalars cannot be promoted with bool arrays" + ) + elif isinstance(scalar, float): + if self.dtype not in _floating_dtypes: + raise TypeError( + "Python float scalars can only be promoted with floating-point arrays." + ) + else: + raise TypeError("'scalar' must be a Python scalar") + + # Note: the spec only specifies integer-dtype/int promotion + # behavior for integers within the bounds of the integer dtype. + # Outside of those bounds we use the default NumPy behavior (either + # cast or raise OverflowError). + return Array._new(np.array(scalar, self.dtype)) + + @staticmethod + def _normalize_two_args(x1, x2): + """ + Normalize inputs to two arg functions to fix type promotion rules + + NumPy deviates from the spec type promotion rules in cases where one + argument is 0-dimensional and the other is not. For example: + + >>> import numpy as np + >>> a = np.array([1.0], dtype=np.float32) + >>> b = np.array(1.0, dtype=np.float64) + >>> np.add(a, b) # The spec says this should be float64 + array([2.], dtype=float32) + + To fix this, we add a dimension to the 0-dimension array before passing it + through. This works because a dimension would be added anyway from + broadcasting, so the resulting shape is the same, but this prevents NumPy + from not promoting the dtype. + """ + # Another option would be to use signature=(x1.dtype, x2.dtype, None), + # but that only works for ufuncs, so we would have to call the ufuncs + # directly in the operator methods. One should also note that this + # sort of trick wouldn't work for functions like searchsorted, which + # don't do normal broadcasting, but there aren't any functions like + # that in the array API namespace. + if x1.ndim == 0 and x2.ndim != 0: + # The _array[None] workaround was chosen because it is relatively + # performant. broadcast_to(x1._array, x2.shape) is much slower. We + # could also manually type promote x2, but that is more complicated + # and about the same performance as this. + x1 = Array._new(x1._array[None]) + elif x2.ndim == 0 and x1.ndim != 0: + x2 = Array._new(x2._array[None]) + return (x1, x2) + + # Note: A large fraction of allowed indices are disallowed here (see the + # docstring below) + @staticmethod + def _validate_index(key, shape): + """ + Validate an index according to the array API. + + The array API specification only requires a subset of indices that are + supported by NumPy. This function will reject any index that is + allowed by NumPy but not required by the array API specification. We + always raise ``IndexError`` on such indices (the spec does not require + any specific behavior on them, but this makes the NumPy array API + namespace a minimal implementation of the spec). See + https://data-apis.org/array-api/latest/API_specification/indexing.html + for the full list of required indexing behavior + + This function either raises IndexError if the index ``key`` is + invalid, or a new key to be used in place of ``key`` in indexing. It + only raises ``IndexError`` on indices that are not already rejected by + NumPy, as NumPy will already raise the appropriate error on such + indices. ``shape`` may be None, in which case, only cases that are + independent of the array shape are checked. + + The following cases are allowed by NumPy, but not specified by the array + API specification: + + - Indices to not include an implicit ellipsis at the end. That is, + every axis of an array must be explicitly indexed or an ellipsis + included. + + - The start and stop of a slice may not be out of bounds. In + particular, for a slice ``i:j:k`` on an axis of size ``n``, only the + following are allowed: + + - ``i`` or ``j`` omitted (``None``). + - ``-n <= i <= max(0, n - 1)``. + - For ``k > 0`` or ``k`` omitted (``None``), ``-n <= j <= n``. + - For ``k < 0``, ``-n - 1 <= j <= max(0, n - 1)``. + + - Boolean array indices are not allowed as part of a larger tuple + index. + + - Integer array indices are not allowed (with the exception of 0-D + arrays, which are treated the same as scalars). + + Additionally, it should be noted that indices that would return a + scalar in NumPy will return a 0-D array. Array scalars are not allowed + in the specification, only 0-D arrays. This is done in the + ``Array._new`` constructor, not this function. + + """ + if isinstance(key, slice): + if shape is None: + return key + if shape == (): + return key + if len(shape) > 1: + raise IndexError( + "Multidimensional arrays must include an index for every axis or use an ellipsis" + ) + size = shape[0] + # Ensure invalid slice entries are passed through. + if key.start is not None: + try: + operator.index(key.start) + except TypeError: + return key + if not (-size <= key.start <= size): + raise IndexError( + "Slices with out-of-bounds start are not allowed in the array API namespace" + ) + if key.stop is not None: + try: + operator.index(key.stop) + except TypeError: + return key + step = 1 if key.step is None else key.step + if (step > 0 and not (-size <= key.stop <= size) + or step < 0 and not (-size - 1 <= key.stop <= max(0, size - 1))): + raise IndexError("Slices with out-of-bounds stop are not allowed in the array API namespace") + return key + + elif isinstance(key, tuple): + key = tuple(Array._validate_index(idx, None) for idx in key) + + for idx in key: + if ( + isinstance(idx, np.ndarray) + and idx.dtype in _boolean_dtypes + or isinstance(idx, (bool, np.bool_)) + ): + if len(key) == 1: + return key + raise IndexError( + "Boolean array indices combined with other indices are not allowed in the array API namespace" + ) + if isinstance(idx, tuple): + raise IndexError( + "Nested tuple indices are not allowed in the array API namespace" + ) + + if shape is None: + return key + n_ellipsis = key.count(...) + if n_ellipsis > 1: + return key + ellipsis_i = key.index(...) if n_ellipsis else len(key) + + for idx, size in list(zip(key[:ellipsis_i], shape)) + list( + zip(key[:ellipsis_i:-1], shape[:ellipsis_i:-1]) + ): + Array._validate_index(idx, (size,)) + if n_ellipsis == 0 and len(key) < len(shape): + raise IndexError( + "Multidimensional arrays must include an index for every axis or use an ellipsis" + ) + return key + elif isinstance(key, bool): + return key + elif isinstance(key, Array): + if key.dtype in _integer_dtypes: + if key.ndim != 0: + raise IndexError( + "Non-zero dimensional integer array indices are not allowed in the array API namespace" + ) + return key._array + elif key is Ellipsis: + return key + elif key is None: + raise IndexError( + "newaxis indices are not allowed in the array API namespace" + ) + try: + key = operator.index(key) + if shape is not None and len(shape) > 1: + raise IndexError( + "Multidimensional arrays must include an index for every axis or use an ellipsis" + ) + return key + except TypeError: + # Note: This also omits boolean arrays that are not already in + # Array() form, like a list of booleans. + raise IndexError( + "Only integers, slices (`:`), ellipsis (`...`), and boolean arrays are valid indices in the array API namespace" + ) + + # Everything below this line is required by the spec. + + def __abs__(self: Array, /) -> Array: + """ + Performs the operation __abs__. + """ + if self.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in __abs__") + res = self._array.__abs__() + return self.__class__._new(res) + + def __add__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __add__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__add__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__add__(other._array) + return self.__class__._new(res) + + def __and__(self: Array, other: Union[int, bool, Array], /) -> Array: + """ + Performs the operation __and__. + """ + other = self._check_allowed_dtypes(other, "integer or boolean", "__and__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__and__(other._array) + return self.__class__._new(res) + + def __array_namespace__( + self: Array, /, *, api_version: Optional[str] = None + ) -> types.ModuleType: + if api_version is not None and not api_version.startswith("2021."): + raise ValueError(f"Unrecognized array API version: {api_version!r}") + return array_api + + def __bool__(self: Array, /) -> bool: + """ + Performs the operation __bool__. + """ + # Note: This is an error here. + if self._array.ndim != 0: + raise TypeError("bool is only allowed on arrays with 0 dimensions") + if self.dtype not in _boolean_dtypes: + raise ValueError("bool is only allowed on boolean arrays") + res = self._array.__bool__() + return res + + def __dlpack__(self: Array, /, *, stream: None = None) -> PyCapsule: + """ + Performs the operation __dlpack__. + """ + return self._array.__dlpack__(stream=stream) + + def __dlpack_device__(self: Array, /) -> Tuple[IntEnum, int]: + """ + Performs the operation __dlpack_device__. + """ + # Note: device support is required for this + return self._array.__dlpack_device__() + + def __eq__(self: Array, other: Union[int, float, bool, Array], /) -> Array: + """ + Performs the operation __eq__. + """ + # Even though "all" dtypes are allowed, we still require them to be + # promotable with each other. + other = self._check_allowed_dtypes(other, "all", "__eq__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__eq__(other._array) + return self.__class__._new(res) + + def __float__(self: Array, /) -> float: + """ + Performs the operation __float__. + """ + # Note: This is an error here. + if self._array.ndim != 0: + raise TypeError("float is only allowed on arrays with 0 dimensions") + if self.dtype not in _floating_dtypes: + raise ValueError("float is only allowed on floating-point arrays") + res = self._array.__float__() + return res + + def __floordiv__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __floordiv__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__floordiv__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__floordiv__(other._array) + return self.__class__._new(res) + + def __ge__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __ge__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__ge__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__ge__(other._array) + return self.__class__._new(res) + + def __getitem__( + self: Array, + key: Union[ + int, slice, ellipsis, Tuple[Union[int, slice, ellipsis], ...], Array + ], + /, + ) -> Array: + """ + Performs the operation __getitem__. + """ + # Note: Only indices required by the spec are allowed. See the + # docstring of _validate_index + key = self._validate_index(key, self.shape) + res = self._array.__getitem__(key) + return self._new(res) + + def __gt__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __gt__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__gt__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__gt__(other._array) + return self.__class__._new(res) + + def __int__(self: Array, /) -> int: + """ + Performs the operation __int__. + """ + # Note: This is an error here. + if self._array.ndim != 0: + raise TypeError("int is only allowed on arrays with 0 dimensions") + if self.dtype not in _integer_dtypes: + raise ValueError("int is only allowed on integer arrays") + res = self._array.__int__() + return res + + def __index__(self: Array, /) -> int: + """ + Performs the operation __index__. + """ + res = self._array.__index__() + return res + + def __invert__(self: Array, /) -> Array: + """ + Performs the operation __invert__. + """ + if self.dtype not in _integer_or_boolean_dtypes: + raise TypeError("Only integer or boolean dtypes are allowed in __invert__") + res = self._array.__invert__() + return self.__class__._new(res) + + def __le__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __le__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__le__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__le__(other._array) + return self.__class__._new(res) + + def __lshift__(self: Array, other: Union[int, Array], /) -> Array: + """ + Performs the operation __lshift__. + """ + other = self._check_allowed_dtypes(other, "integer", "__lshift__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__lshift__(other._array) + return self.__class__._new(res) + + def __lt__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __lt__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__lt__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__lt__(other._array) + return self.__class__._new(res) + + def __matmul__(self: Array, other: Array, /) -> Array: + """ + Performs the operation __matmul__. + """ + # matmul is not defined for scalars, but without this, we may get + # the wrong error message from asarray. + other = self._check_allowed_dtypes(other, "numeric", "__matmul__") + if other is NotImplemented: + return other + res = self._array.__matmul__(other._array) + return self.__class__._new(res) + + def __mod__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __mod__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__mod__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__mod__(other._array) + return self.__class__._new(res) + + def __mul__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __mul__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__mul__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__mul__(other._array) + return self.__class__._new(res) + + def __ne__(self: Array, other: Union[int, float, bool, Array], /) -> Array: + """ + Performs the operation __ne__. + """ + other = self._check_allowed_dtypes(other, "all", "__ne__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__ne__(other._array) + return self.__class__._new(res) + + def __neg__(self: Array, /) -> Array: + """ + Performs the operation __neg__. + """ + if self.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in __neg__") + res = self._array.__neg__() + return self.__class__._new(res) + + def __or__(self: Array, other: Union[int, bool, Array], /) -> Array: + """ + Performs the operation __or__. + """ + other = self._check_allowed_dtypes(other, "integer or boolean", "__or__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__or__(other._array) + return self.__class__._new(res) + + def __pos__(self: Array, /) -> Array: + """ + Performs the operation __pos__. + """ + if self.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in __pos__") + res = self._array.__pos__() + return self.__class__._new(res) + + def __pow__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __pow__. + """ + from ._elementwise_functions import pow + + other = self._check_allowed_dtypes(other, "numeric", "__pow__") + if other is NotImplemented: + return other + # Note: NumPy's __pow__ does not follow type promotion rules for 0-d + # arrays, so we use pow() here instead. + return pow(self, other) + + def __rshift__(self: Array, other: Union[int, Array], /) -> Array: + """ + Performs the operation __rshift__. + """ + other = self._check_allowed_dtypes(other, "integer", "__rshift__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__rshift__(other._array) + return self.__class__._new(res) + + def __setitem__( + self, + key: Union[ + int, slice, ellipsis, Tuple[Union[int, slice, ellipsis], ...], Array + ], + value: Union[int, float, bool, Array], + /, + ) -> None: + """ + Performs the operation __setitem__. + """ + # Note: Only indices required by the spec are allowed. See the + # docstring of _validate_index + key = self._validate_index(key, self.shape) + self._array.__setitem__(key, asarray(value)._array) + + def __sub__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __sub__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__sub__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__sub__(other._array) + return self.__class__._new(res) + + # PEP 484 requires int to be a subtype of float, but __truediv__ should + # not accept int. + def __truediv__(self: Array, other: Union[float, Array], /) -> Array: + """ + Performs the operation __truediv__. + """ + other = self._check_allowed_dtypes(other, "floating-point", "__truediv__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__truediv__(other._array) + return self.__class__._new(res) + + def __xor__(self: Array, other: Union[int, bool, Array], /) -> Array: + """ + Performs the operation __xor__. + """ + other = self._check_allowed_dtypes(other, "integer or boolean", "__xor__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__xor__(other._array) + return self.__class__._new(res) + + def __iadd__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __iadd__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__iadd__") + if other is NotImplemented: + return other + self._array.__iadd__(other._array) + return self + + def __radd__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __radd__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__radd__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__radd__(other._array) + return self.__class__._new(res) + + def __iand__(self: Array, other: Union[int, bool, Array], /) -> Array: + """ + Performs the operation __iand__. + """ + other = self._check_allowed_dtypes(other, "integer or boolean", "__iand__") + if other is NotImplemented: + return other + self._array.__iand__(other._array) + return self + + def __rand__(self: Array, other: Union[int, bool, Array], /) -> Array: + """ + Performs the operation __rand__. + """ + other = self._check_allowed_dtypes(other, "integer or boolean", "__rand__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__rand__(other._array) + return self.__class__._new(res) + + def __ifloordiv__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __ifloordiv__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__ifloordiv__") + if other is NotImplemented: + return other + self._array.__ifloordiv__(other._array) + return self + + def __rfloordiv__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __rfloordiv__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__rfloordiv__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__rfloordiv__(other._array) + return self.__class__._new(res) + + def __ilshift__(self: Array, other: Union[int, Array], /) -> Array: + """ + Performs the operation __ilshift__. + """ + other = self._check_allowed_dtypes(other, "integer", "__ilshift__") + if other is NotImplemented: + return other + self._array.__ilshift__(other._array) + return self + + def __rlshift__(self: Array, other: Union[int, Array], /) -> Array: + """ + Performs the operation __rlshift__. + """ + other = self._check_allowed_dtypes(other, "integer", "__rlshift__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__rlshift__(other._array) + return self.__class__._new(res) + + def __imatmul__(self: Array, other: Array, /) -> Array: + """ + Performs the operation __imatmul__. + """ + # Note: NumPy does not implement __imatmul__. + + # matmul is not defined for scalars, but without this, we may get + # the wrong error message from asarray. + other = self._check_allowed_dtypes(other, "numeric", "__imatmul__") + if other is NotImplemented: + return other + + # __imatmul__ can only be allowed when it would not change the shape + # of self. + other_shape = other.shape + if self.shape == () or other_shape == (): + raise ValueError("@= requires at least one dimension") + if len(other_shape) == 1 or other_shape[-1] != other_shape[-2]: + raise ValueError("@= cannot change the shape of the input array") + self._array[:] = self._array.__matmul__(other._array) + return self + + def __rmatmul__(self: Array, other: Array, /) -> Array: + """ + Performs the operation __rmatmul__. + """ + # matmul is not defined for scalars, but without this, we may get + # the wrong error message from asarray. + other = self._check_allowed_dtypes(other, "numeric", "__rmatmul__") + if other is NotImplemented: + return other + res = self._array.__rmatmul__(other._array) + return self.__class__._new(res) + + def __imod__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __imod__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__imod__") + if other is NotImplemented: + return other + self._array.__imod__(other._array) + return self + + def __rmod__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __rmod__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__rmod__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__rmod__(other._array) + return self.__class__._new(res) + + def __imul__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __imul__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__imul__") + if other is NotImplemented: + return other + self._array.__imul__(other._array) + return self + + def __rmul__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __rmul__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__rmul__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__rmul__(other._array) + return self.__class__._new(res) + + def __ior__(self: Array, other: Union[int, bool, Array], /) -> Array: + """ + Performs the operation __ior__. + """ + other = self._check_allowed_dtypes(other, "integer or boolean", "__ior__") + if other is NotImplemented: + return other + self._array.__ior__(other._array) + return self + + def __ror__(self: Array, other: Union[int, bool, Array], /) -> Array: + """ + Performs the operation __ror__. + """ + other = self._check_allowed_dtypes(other, "integer or boolean", "__ror__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__ror__(other._array) + return self.__class__._new(res) + + def __ipow__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __ipow__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__ipow__") + if other is NotImplemented: + return other + self._array.__ipow__(other._array) + return self + + def __rpow__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __rpow__. + """ + from ._elementwise_functions import pow + + other = self._check_allowed_dtypes(other, "numeric", "__rpow__") + if other is NotImplemented: + return other + # Note: NumPy's __pow__ does not follow the spec type promotion rules + # for 0-d arrays, so we use pow() here instead. + return pow(other, self) + + def __irshift__(self: Array, other: Union[int, Array], /) -> Array: + """ + Performs the operation __irshift__. + """ + other = self._check_allowed_dtypes(other, "integer", "__irshift__") + if other is NotImplemented: + return other + self._array.__irshift__(other._array) + return self + + def __rrshift__(self: Array, other: Union[int, Array], /) -> Array: + """ + Performs the operation __rrshift__. + """ + other = self._check_allowed_dtypes(other, "integer", "__rrshift__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__rrshift__(other._array) + return self.__class__._new(res) + + def __isub__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __isub__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__isub__") + if other is NotImplemented: + return other + self._array.__isub__(other._array) + return self + + def __rsub__(self: Array, other: Union[int, float, Array], /) -> Array: + """ + Performs the operation __rsub__. + """ + other = self._check_allowed_dtypes(other, "numeric", "__rsub__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__rsub__(other._array) + return self.__class__._new(res) + + def __itruediv__(self: Array, other: Union[float, Array], /) -> Array: + """ + Performs the operation __itruediv__. + """ + other = self._check_allowed_dtypes(other, "floating-point", "__itruediv__") + if other is NotImplemented: + return other + self._array.__itruediv__(other._array) + return self + + def __rtruediv__(self: Array, other: Union[float, Array], /) -> Array: + """ + Performs the operation __rtruediv__. + """ + other = self._check_allowed_dtypes(other, "floating-point", "__rtruediv__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__rtruediv__(other._array) + return self.__class__._new(res) + + def __ixor__(self: Array, other: Union[int, bool, Array], /) -> Array: + """ + Performs the operation __ixor__. + """ + other = self._check_allowed_dtypes(other, "integer or boolean", "__ixor__") + if other is NotImplemented: + return other + self._array.__ixor__(other._array) + return self + + def __rxor__(self: Array, other: Union[int, bool, Array], /) -> Array: + """ + Performs the operation __rxor__. + """ + other = self._check_allowed_dtypes(other, "integer or boolean", "__rxor__") + if other is NotImplemented: + return other + self, other = self._normalize_two_args(self, other) + res = self._array.__rxor__(other._array) + return self.__class__._new(res) + + def to_device(self: Array, device: Device, /, stream: None = None) -> Array: + if stream is not None: + raise ValueError("The stream argument to to_device() is not supported") + if device == 'cpu': + return self + raise ValueError(f"Unsupported device {device!r}") + + @property + def dtype(self) -> Dtype: + """ + Array API compatible wrapper for :py:meth:`np.ndarray.dtype `. + + See its docstring for more information. + """ + return self._array.dtype + + @property + def device(self) -> Device: + return "cpu" + + # Note: mT is new in array API spec (see matrix_transpose) + @property + def mT(self) -> Array: + from .linalg import matrix_transpose + return matrix_transpose(self) + + @property + def ndim(self) -> int: + """ + Array API compatible wrapper for :py:meth:`np.ndarray.ndim `. + + See its docstring for more information. + """ + return self._array.ndim + + @property + def shape(self) -> Tuple[int, ...]: + """ + Array API compatible wrapper for :py:meth:`np.ndarray.shape `. + + See its docstring for more information. + """ + return self._array.shape + + @property + def size(self) -> int: + """ + Array API compatible wrapper for :py:meth:`np.ndarray.size `. + + See its docstring for more information. + """ + return self._array.size + + @property + def T(self) -> Array: + """ + Array API compatible wrapper for :py:meth:`np.ndarray.T `. + + See its docstring for more information. + """ + # Note: T only works on 2-dimensional arrays. See the corresponding + # note in the specification: + # https://data-apis.org/array-api/latest/API_specification/array_object.html#t + if self.ndim != 2: + raise ValueError("x.T requires x to have 2 dimensions. Use x.mT to transpose stacks of matrices and permute_dims() to permute dimensions.") + return self.__class__._new(self._array.T) diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/_constants.py b/.local/lib/python3.8/site-packages/numpy/array_api/_constants.py new file mode 100644 index 0000000..9541941 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/_constants.py @@ -0,0 +1,6 @@ +import numpy as np + +e = np.e +inf = np.inf +nan = np.nan +pi = np.pi diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/_creation_functions.py b/.local/lib/python3.8/site-packages/numpy/array_api/_creation_functions.py new file mode 100644 index 0000000..741498f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/_creation_functions.py @@ -0,0 +1,351 @@ +from __future__ import annotations + + +from typing import TYPE_CHECKING, List, Optional, Tuple, Union + +if TYPE_CHECKING: + from ._typing import ( + Array, + Device, + Dtype, + NestedSequence, + SupportsBufferProtocol, + ) + from collections.abc import Sequence +from ._dtypes import _all_dtypes + +import numpy as np + + +def _check_valid_dtype(dtype): + # Note: Only spelling dtypes as the dtype objects is supported. + + # We use this instead of "dtype in _all_dtypes" because the dtype objects + # define equality with the sorts of things we want to disallow. + for d in (None,) + _all_dtypes: + if dtype is d: + return + raise ValueError("dtype must be one of the supported dtypes") + + +def asarray( + obj: Union[ + Array, + bool, + int, + float, + NestedSequence[bool | int | float], + SupportsBufferProtocol, + ], + /, + *, + dtype: Optional[Dtype] = None, + device: Optional[Device] = None, + copy: Optional[Union[bool, np._CopyMode]] = None, +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.asarray `. + + See its docstring for more information. + """ + # _array_object imports in this file are inside the functions to avoid + # circular imports + from ._array_object import Array + + _check_valid_dtype(dtype) + if device not in ["cpu", None]: + raise ValueError(f"Unsupported device {device!r}") + if copy in (False, np._CopyMode.IF_NEEDED): + # Note: copy=False is not yet implemented in np.asarray + raise NotImplementedError("copy=False is not yet implemented") + if isinstance(obj, Array): + if dtype is not None and obj.dtype != dtype: + copy = True + if copy in (True, np._CopyMode.ALWAYS): + return Array._new(np.array(obj._array, copy=True, dtype=dtype)) + return obj + if dtype is None and isinstance(obj, int) and (obj > 2 ** 64 or obj < -(2 ** 63)): + # Give a better error message in this case. NumPy would convert this + # to an object array. TODO: This won't handle large integers in lists. + raise OverflowError("Integer out of bounds for array dtypes") + res = np.asarray(obj, dtype=dtype) + return Array._new(res) + + +def arange( + start: Union[int, float], + /, + stop: Optional[Union[int, float]] = None, + step: Union[int, float] = 1, + *, + dtype: Optional[Dtype] = None, + device: Optional[Device] = None, +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.arange `. + + See its docstring for more information. + """ + from ._array_object import Array + + _check_valid_dtype(dtype) + if device not in ["cpu", None]: + raise ValueError(f"Unsupported device {device!r}") + return Array._new(np.arange(start, stop=stop, step=step, dtype=dtype)) + + +def empty( + shape: Union[int, Tuple[int, ...]], + *, + dtype: Optional[Dtype] = None, + device: Optional[Device] = None, +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.empty `. + + See its docstring for more information. + """ + from ._array_object import Array + + _check_valid_dtype(dtype) + if device not in ["cpu", None]: + raise ValueError(f"Unsupported device {device!r}") + return Array._new(np.empty(shape, dtype=dtype)) + + +def empty_like( + x: Array, /, *, dtype: Optional[Dtype] = None, device: Optional[Device] = None +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.empty_like `. + + See its docstring for more information. + """ + from ._array_object import Array + + _check_valid_dtype(dtype) + if device not in ["cpu", None]: + raise ValueError(f"Unsupported device {device!r}") + return Array._new(np.empty_like(x._array, dtype=dtype)) + + +def eye( + n_rows: int, + n_cols: Optional[int] = None, + /, + *, + k: int = 0, + dtype: Optional[Dtype] = None, + device: Optional[Device] = None, +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.eye `. + + See its docstring for more information. + """ + from ._array_object import Array + + _check_valid_dtype(dtype) + if device not in ["cpu", None]: + raise ValueError(f"Unsupported device {device!r}") + return Array._new(np.eye(n_rows, M=n_cols, k=k, dtype=dtype)) + + +def from_dlpack(x: object, /) -> Array: + from ._array_object import Array + + return Array._new(np._from_dlpack(x)) + + +def full( + shape: Union[int, Tuple[int, ...]], + fill_value: Union[int, float], + *, + dtype: Optional[Dtype] = None, + device: Optional[Device] = None, +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.full `. + + See its docstring for more information. + """ + from ._array_object import Array + + _check_valid_dtype(dtype) + if device not in ["cpu", None]: + raise ValueError(f"Unsupported device {device!r}") + if isinstance(fill_value, Array) and fill_value.ndim == 0: + fill_value = fill_value._array + res = np.full(shape, fill_value, dtype=dtype) + if res.dtype not in _all_dtypes: + # This will happen if the fill value is not something that NumPy + # coerces to one of the acceptable dtypes. + raise TypeError("Invalid input to full") + return Array._new(res) + + +def full_like( + x: Array, + /, + fill_value: Union[int, float], + *, + dtype: Optional[Dtype] = None, + device: Optional[Device] = None, +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.full_like `. + + See its docstring for more information. + """ + from ._array_object import Array + + _check_valid_dtype(dtype) + if device not in ["cpu", None]: + raise ValueError(f"Unsupported device {device!r}") + res = np.full_like(x._array, fill_value, dtype=dtype) + if res.dtype not in _all_dtypes: + # This will happen if the fill value is not something that NumPy + # coerces to one of the acceptable dtypes. + raise TypeError("Invalid input to full_like") + return Array._new(res) + + +def linspace( + start: Union[int, float], + stop: Union[int, float], + /, + num: int, + *, + dtype: Optional[Dtype] = None, + device: Optional[Device] = None, + endpoint: bool = True, +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.linspace `. + + See its docstring for more information. + """ + from ._array_object import Array + + _check_valid_dtype(dtype) + if device not in ["cpu", None]: + raise ValueError(f"Unsupported device {device!r}") + return Array._new(np.linspace(start, stop, num, dtype=dtype, endpoint=endpoint)) + + +def meshgrid(*arrays: Array, indexing: str = "xy") -> List[Array]: + """ + Array API compatible wrapper for :py:func:`np.meshgrid `. + + See its docstring for more information. + """ + from ._array_object import Array + + # Note: unlike np.meshgrid, only inputs with all the same dtype are + # allowed + + if len({a.dtype for a in arrays}) > 1: + raise ValueError("meshgrid inputs must all have the same dtype") + + return [ + Array._new(array) + for array in np.meshgrid(*[a._array for a in arrays], indexing=indexing) + ] + + +def ones( + shape: Union[int, Tuple[int, ...]], + *, + dtype: Optional[Dtype] = None, + device: Optional[Device] = None, +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.ones `. + + See its docstring for more information. + """ + from ._array_object import Array + + _check_valid_dtype(dtype) + if device not in ["cpu", None]: + raise ValueError(f"Unsupported device {device!r}") + return Array._new(np.ones(shape, dtype=dtype)) + + +def ones_like( + x: Array, /, *, dtype: Optional[Dtype] = None, device: Optional[Device] = None +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.ones_like `. + + See its docstring for more information. + """ + from ._array_object import Array + + _check_valid_dtype(dtype) + if device not in ["cpu", None]: + raise ValueError(f"Unsupported device {device!r}") + return Array._new(np.ones_like(x._array, dtype=dtype)) + + +def tril(x: Array, /, *, k: int = 0) -> Array: + """ + Array API compatible wrapper for :py:func:`np.tril `. + + See its docstring for more information. + """ + from ._array_object import Array + + if x.ndim < 2: + # Note: Unlike np.tril, x must be at least 2-D + raise ValueError("x must be at least 2-dimensional for tril") + return Array._new(np.tril(x._array, k=k)) + + +def triu(x: Array, /, *, k: int = 0) -> Array: + """ + Array API compatible wrapper for :py:func:`np.triu `. + + See its docstring for more information. + """ + from ._array_object import Array + + if x.ndim < 2: + # Note: Unlike np.triu, x must be at least 2-D + raise ValueError("x must be at least 2-dimensional for triu") + return Array._new(np.triu(x._array, k=k)) + + +def zeros( + shape: Union[int, Tuple[int, ...]], + *, + dtype: Optional[Dtype] = None, + device: Optional[Device] = None, +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.zeros `. + + See its docstring for more information. + """ + from ._array_object import Array + + _check_valid_dtype(dtype) + if device not in ["cpu", None]: + raise ValueError(f"Unsupported device {device!r}") + return Array._new(np.zeros(shape, dtype=dtype)) + + +def zeros_like( + x: Array, /, *, dtype: Optional[Dtype] = None, device: Optional[Device] = None +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.zeros_like `. + + See its docstring for more information. + """ + from ._array_object import Array + + _check_valid_dtype(dtype) + if device not in ["cpu", None]: + raise ValueError(f"Unsupported device {device!r}") + return Array._new(np.zeros_like(x._array, dtype=dtype)) diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/_data_type_functions.py b/.local/lib/python3.8/site-packages/numpy/array_api/_data_type_functions.py new file mode 100644 index 0000000..e4d6db6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/_data_type_functions.py @@ -0,0 +1,134 @@ +from __future__ import annotations + +from ._array_object import Array +from ._dtypes import _all_dtypes, _result_type + +from dataclasses import dataclass +from typing import TYPE_CHECKING, List, Tuple, Union + +if TYPE_CHECKING: + from ._typing import Dtype + from collections.abc import Sequence + +import numpy as np + + +# Note: astype is a function, not an array method as in NumPy. +def astype(x: Array, dtype: Dtype, /, *, copy: bool = True) -> Array: + if not copy and dtype == x.dtype: + return x + return Array._new(x._array.astype(dtype=dtype, copy=copy)) + + +def broadcast_arrays(*arrays: Array) -> List[Array]: + """ + Array API compatible wrapper for :py:func:`np.broadcast_arrays `. + + See its docstring for more information. + """ + from ._array_object import Array + + return [ + Array._new(array) for array in np.broadcast_arrays(*[a._array for a in arrays]) + ] + + +def broadcast_to(x: Array, /, shape: Tuple[int, ...]) -> Array: + """ + Array API compatible wrapper for :py:func:`np.broadcast_to `. + + See its docstring for more information. + """ + from ._array_object import Array + + return Array._new(np.broadcast_to(x._array, shape)) + + +def can_cast(from_: Union[Dtype, Array], to: Dtype, /) -> bool: + """ + Array API compatible wrapper for :py:func:`np.can_cast `. + + See its docstring for more information. + """ + from ._array_object import Array + + if isinstance(from_, Array): + from_ = from_._array + return np.can_cast(from_, to) + + +# These are internal objects for the return types of finfo and iinfo, since +# the NumPy versions contain extra data that isn't part of the spec. +@dataclass +class finfo_object: + bits: int + # Note: The types of the float data here are float, whereas in NumPy they + # are scalars of the corresponding float dtype. + eps: float + max: float + min: float + smallest_normal: float + + +@dataclass +class iinfo_object: + bits: int + max: int + min: int + + +def finfo(type: Union[Dtype, Array], /) -> finfo_object: + """ + Array API compatible wrapper for :py:func:`np.finfo `. + + See its docstring for more information. + """ + fi = np.finfo(type) + # Note: The types of the float data here are float, whereas in NumPy they + # are scalars of the corresponding float dtype. + return finfo_object( + fi.bits, + float(fi.eps), + float(fi.max), + float(fi.min), + float(fi.smallest_normal), + ) + + +def iinfo(type: Union[Dtype, Array], /) -> iinfo_object: + """ + Array API compatible wrapper for :py:func:`np.iinfo `. + + See its docstring for more information. + """ + ii = np.iinfo(type) + return iinfo_object(ii.bits, ii.max, ii.min) + + +def result_type(*arrays_and_dtypes: Union[Array, Dtype]) -> Dtype: + """ + Array API compatible wrapper for :py:func:`np.result_type `. + + See its docstring for more information. + """ + # Note: we use a custom implementation that gives only the type promotions + # required by the spec rather than using np.result_type. NumPy implements + # too many extra type promotions like int64 + uint64 -> float64, and does + # value-based casting on scalar arrays. + A = [] + for a in arrays_and_dtypes: + if isinstance(a, Array): + a = a.dtype + elif isinstance(a, np.ndarray) or a not in _all_dtypes: + raise TypeError("result_type() inputs must be array_api arrays or dtypes") + A.append(a) + + if len(A) == 0: + raise ValueError("at least one array or dtype is required") + elif len(A) == 1: + return A[0] + else: + t = A[0] + for t2 in A[1:]: + t = _result_type(t, t2) + return t diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/_dtypes.py b/.local/lib/python3.8/site-packages/numpy/array_api/_dtypes.py new file mode 100644 index 0000000..476d619 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/_dtypes.py @@ -0,0 +1,143 @@ +import numpy as np + +# Note: we use dtype objects instead of dtype classes. The spec does not +# require any behavior on dtypes other than equality. +int8 = np.dtype("int8") +int16 = np.dtype("int16") +int32 = np.dtype("int32") +int64 = np.dtype("int64") +uint8 = np.dtype("uint8") +uint16 = np.dtype("uint16") +uint32 = np.dtype("uint32") +uint64 = np.dtype("uint64") +float32 = np.dtype("float32") +float64 = np.dtype("float64") +# Note: This name is changed +bool = np.dtype("bool") + +_all_dtypes = ( + int8, + int16, + int32, + int64, + uint8, + uint16, + uint32, + uint64, + float32, + float64, + bool, +) +_boolean_dtypes = (bool,) +_floating_dtypes = (float32, float64) +_integer_dtypes = (int8, int16, int32, int64, uint8, uint16, uint32, uint64) +_integer_or_boolean_dtypes = ( + bool, + int8, + int16, + int32, + int64, + uint8, + uint16, + uint32, + uint64, +) +_numeric_dtypes = ( + float32, + float64, + int8, + int16, + int32, + int64, + uint8, + uint16, + uint32, + uint64, +) + +_dtype_categories = { + "all": _all_dtypes, + "numeric": _numeric_dtypes, + "integer": _integer_dtypes, + "integer or boolean": _integer_or_boolean_dtypes, + "boolean": _boolean_dtypes, + "floating-point": _floating_dtypes, +} + + +# Note: the spec defines a restricted type promotion table compared to NumPy. +# In particular, cross-kind promotions like integer + float or boolean + +# integer are not allowed, even for functions that accept both kinds. +# Additionally, NumPy promotes signed integer + uint64 to float64, but this +# promotion is not allowed here. To be clear, Python scalar int objects are +# allowed to promote to floating-point dtypes, but only in array operators +# (see Array._promote_scalar) method in _array_object.py. +_promotion_table = { + (int8, int8): int8, + (int8, int16): int16, + (int8, int32): int32, + (int8, int64): int64, + (int16, int8): int16, + (int16, int16): int16, + (int16, int32): int32, + (int16, int64): int64, + (int32, int8): int32, + (int32, int16): int32, + (int32, int32): int32, + (int32, int64): int64, + (int64, int8): int64, + (int64, int16): int64, + (int64, int32): int64, + (int64, int64): int64, + (uint8, uint8): uint8, + (uint8, uint16): uint16, + (uint8, uint32): uint32, + (uint8, uint64): uint64, + (uint16, uint8): uint16, + (uint16, uint16): uint16, + (uint16, uint32): uint32, + (uint16, uint64): uint64, + (uint32, uint8): uint32, + (uint32, uint16): uint32, + (uint32, uint32): uint32, + (uint32, uint64): uint64, + (uint64, uint8): uint64, + (uint64, uint16): uint64, + (uint64, uint32): uint64, + (uint64, uint64): uint64, + (int8, uint8): int16, + (int8, uint16): int32, + (int8, uint32): int64, + (int16, uint8): int16, + (int16, uint16): int32, + (int16, uint32): int64, + (int32, uint8): int32, + (int32, uint16): int32, + (int32, uint32): int64, + (int64, uint8): int64, + (int64, uint16): int64, + (int64, uint32): int64, + (uint8, int8): int16, + (uint16, int8): int32, + (uint32, int8): int64, + (uint8, int16): int16, + (uint16, int16): int32, + (uint32, int16): int64, + (uint8, int32): int32, + (uint16, int32): int32, + (uint32, int32): int64, + (uint8, int64): int64, + (uint16, int64): int64, + (uint32, int64): int64, + (float32, float32): float32, + (float32, float64): float64, + (float64, float32): float64, + (float64, float64): float64, + (bool, bool): bool, +} + + +def _result_type(type1, type2): + if (type1, type2) in _promotion_table: + return _promotion_table[type1, type2] + raise TypeError(f"{type1} and {type2} cannot be type promoted together") diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/_elementwise_functions.py b/.local/lib/python3.8/site-packages/numpy/array_api/_elementwise_functions.py new file mode 100644 index 0000000..c758a09 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/_elementwise_functions.py @@ -0,0 +1,729 @@ +from __future__ import annotations + +from ._dtypes import ( + _boolean_dtypes, + _floating_dtypes, + _integer_dtypes, + _integer_or_boolean_dtypes, + _numeric_dtypes, + _result_type, +) +from ._array_object import Array + +import numpy as np + + +def abs(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.abs `. + + See its docstring for more information. + """ + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in abs") + return Array._new(np.abs(x._array)) + + +# Note: the function name is different here +def acos(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.arccos `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in acos") + return Array._new(np.arccos(x._array)) + + +# Note: the function name is different here +def acosh(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.arccosh `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in acosh") + return Array._new(np.arccosh(x._array)) + + +def add(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.add `. + + See its docstring for more information. + """ + if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in add") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.add(x1._array, x2._array)) + + +# Note: the function name is different here +def asin(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.arcsin `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in asin") + return Array._new(np.arcsin(x._array)) + + +# Note: the function name is different here +def asinh(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.arcsinh `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in asinh") + return Array._new(np.arcsinh(x._array)) + + +# Note: the function name is different here +def atan(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.arctan `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in atan") + return Array._new(np.arctan(x._array)) + + +# Note: the function name is different here +def atan2(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.arctan2 `. + + See its docstring for more information. + """ + if x1.dtype not in _floating_dtypes or x2.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in atan2") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.arctan2(x1._array, x2._array)) + + +# Note: the function name is different here +def atanh(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.arctanh `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in atanh") + return Array._new(np.arctanh(x._array)) + + +def bitwise_and(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.bitwise_and `. + + See its docstring for more information. + """ + if ( + x1.dtype not in _integer_or_boolean_dtypes + or x2.dtype not in _integer_or_boolean_dtypes + ): + raise TypeError("Only integer or boolean dtypes are allowed in bitwise_and") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.bitwise_and(x1._array, x2._array)) + + +# Note: the function name is different here +def bitwise_left_shift(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.left_shift `. + + See its docstring for more information. + """ + if x1.dtype not in _integer_dtypes or x2.dtype not in _integer_dtypes: + raise TypeError("Only integer dtypes are allowed in bitwise_left_shift") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + # Note: bitwise_left_shift is only defined for x2 nonnegative. + if np.any(x2._array < 0): + raise ValueError("bitwise_left_shift(x1, x2) is only defined for x2 >= 0") + return Array._new(np.left_shift(x1._array, x2._array)) + + +# Note: the function name is different here +def bitwise_invert(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.invert `. + + See its docstring for more information. + """ + if x.dtype not in _integer_or_boolean_dtypes: + raise TypeError("Only integer or boolean dtypes are allowed in bitwise_invert") + return Array._new(np.invert(x._array)) + + +def bitwise_or(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.bitwise_or `. + + See its docstring for more information. + """ + if ( + x1.dtype not in _integer_or_boolean_dtypes + or x2.dtype not in _integer_or_boolean_dtypes + ): + raise TypeError("Only integer or boolean dtypes are allowed in bitwise_or") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.bitwise_or(x1._array, x2._array)) + + +# Note: the function name is different here +def bitwise_right_shift(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.right_shift `. + + See its docstring for more information. + """ + if x1.dtype not in _integer_dtypes or x2.dtype not in _integer_dtypes: + raise TypeError("Only integer dtypes are allowed in bitwise_right_shift") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + # Note: bitwise_right_shift is only defined for x2 nonnegative. + if np.any(x2._array < 0): + raise ValueError("bitwise_right_shift(x1, x2) is only defined for x2 >= 0") + return Array._new(np.right_shift(x1._array, x2._array)) + + +def bitwise_xor(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.bitwise_xor `. + + See its docstring for more information. + """ + if ( + x1.dtype not in _integer_or_boolean_dtypes + or x2.dtype not in _integer_or_boolean_dtypes + ): + raise TypeError("Only integer or boolean dtypes are allowed in bitwise_xor") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.bitwise_xor(x1._array, x2._array)) + + +def ceil(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.ceil `. + + See its docstring for more information. + """ + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in ceil") + if x.dtype in _integer_dtypes: + # Note: The return dtype of ceil is the same as the input + return x + return Array._new(np.ceil(x._array)) + + +def cos(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.cos `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in cos") + return Array._new(np.cos(x._array)) + + +def cosh(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.cosh `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in cosh") + return Array._new(np.cosh(x._array)) + + +def divide(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.divide `. + + See its docstring for more information. + """ + if x1.dtype not in _floating_dtypes or x2.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in divide") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.divide(x1._array, x2._array)) + + +def equal(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.equal `. + + See its docstring for more information. + """ + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.equal(x1._array, x2._array)) + + +def exp(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.exp `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in exp") + return Array._new(np.exp(x._array)) + + +def expm1(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.expm1 `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in expm1") + return Array._new(np.expm1(x._array)) + + +def floor(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.floor `. + + See its docstring for more information. + """ + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in floor") + if x.dtype in _integer_dtypes: + # Note: The return dtype of floor is the same as the input + return x + return Array._new(np.floor(x._array)) + + +def floor_divide(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.floor_divide `. + + See its docstring for more information. + """ + if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in floor_divide") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.floor_divide(x1._array, x2._array)) + + +def greater(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.greater `. + + See its docstring for more information. + """ + if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in greater") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.greater(x1._array, x2._array)) + + +def greater_equal(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.greater_equal `. + + See its docstring for more information. + """ + if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in greater_equal") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.greater_equal(x1._array, x2._array)) + + +def isfinite(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.isfinite `. + + See its docstring for more information. + """ + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in isfinite") + return Array._new(np.isfinite(x._array)) + + +def isinf(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.isinf `. + + See its docstring for more information. + """ + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in isinf") + return Array._new(np.isinf(x._array)) + + +def isnan(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.isnan `. + + See its docstring for more information. + """ + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in isnan") + return Array._new(np.isnan(x._array)) + + +def less(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.less `. + + See its docstring for more information. + """ + if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in less") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.less(x1._array, x2._array)) + + +def less_equal(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.less_equal `. + + See its docstring for more information. + """ + if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in less_equal") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.less_equal(x1._array, x2._array)) + + +def log(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.log `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in log") + return Array._new(np.log(x._array)) + + +def log1p(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.log1p `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in log1p") + return Array._new(np.log1p(x._array)) + + +def log2(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.log2 `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in log2") + return Array._new(np.log2(x._array)) + + +def log10(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.log10 `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in log10") + return Array._new(np.log10(x._array)) + + +def logaddexp(x1: Array, x2: Array) -> Array: + """ + Array API compatible wrapper for :py:func:`np.logaddexp `. + + See its docstring for more information. + """ + if x1.dtype not in _floating_dtypes or x2.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in logaddexp") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.logaddexp(x1._array, x2._array)) + + +def logical_and(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.logical_and `. + + See its docstring for more information. + """ + if x1.dtype not in _boolean_dtypes or x2.dtype not in _boolean_dtypes: + raise TypeError("Only boolean dtypes are allowed in logical_and") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.logical_and(x1._array, x2._array)) + + +def logical_not(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.logical_not `. + + See its docstring for more information. + """ + if x.dtype not in _boolean_dtypes: + raise TypeError("Only boolean dtypes are allowed in logical_not") + return Array._new(np.logical_not(x._array)) + + +def logical_or(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.logical_or `. + + See its docstring for more information. + """ + if x1.dtype not in _boolean_dtypes or x2.dtype not in _boolean_dtypes: + raise TypeError("Only boolean dtypes are allowed in logical_or") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.logical_or(x1._array, x2._array)) + + +def logical_xor(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.logical_xor `. + + See its docstring for more information. + """ + if x1.dtype not in _boolean_dtypes or x2.dtype not in _boolean_dtypes: + raise TypeError("Only boolean dtypes are allowed in logical_xor") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.logical_xor(x1._array, x2._array)) + + +def multiply(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.multiply `. + + See its docstring for more information. + """ + if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in multiply") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.multiply(x1._array, x2._array)) + + +def negative(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.negative `. + + See its docstring for more information. + """ + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in negative") + return Array._new(np.negative(x._array)) + + +def not_equal(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.not_equal `. + + See its docstring for more information. + """ + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.not_equal(x1._array, x2._array)) + + +def positive(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.positive `. + + See its docstring for more information. + """ + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in positive") + return Array._new(np.positive(x._array)) + + +# Note: the function name is different here +def pow(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.power `. + + See its docstring for more information. + """ + if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in pow") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.power(x1._array, x2._array)) + + +def remainder(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.remainder `. + + See its docstring for more information. + """ + if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in remainder") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.remainder(x1._array, x2._array)) + + +def round(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.round `. + + See its docstring for more information. + """ + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in round") + return Array._new(np.round(x._array)) + + +def sign(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.sign `. + + See its docstring for more information. + """ + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in sign") + return Array._new(np.sign(x._array)) + + +def sin(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.sin `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in sin") + return Array._new(np.sin(x._array)) + + +def sinh(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.sinh `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in sinh") + return Array._new(np.sinh(x._array)) + + +def square(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.square `. + + See its docstring for more information. + """ + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in square") + return Array._new(np.square(x._array)) + + +def sqrt(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.sqrt `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in sqrt") + return Array._new(np.sqrt(x._array)) + + +def subtract(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.subtract `. + + See its docstring for more information. + """ + if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in subtract") + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.subtract(x1._array, x2._array)) + + +def tan(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.tan `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in tan") + return Array._new(np.tan(x._array)) + + +def tanh(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.tanh `. + + See its docstring for more information. + """ + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in tanh") + return Array._new(np.tanh(x._array)) + + +def trunc(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.trunc `. + + See its docstring for more information. + """ + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in trunc") + if x.dtype in _integer_dtypes: + # Note: The return dtype of trunc is the same as the input + return x + return Array._new(np.trunc(x._array)) diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/_manipulation_functions.py b/.local/lib/python3.8/site-packages/numpy/array_api/_manipulation_functions.py new file mode 100644 index 0000000..4f2114f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/_manipulation_functions.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from ._array_object import Array +from ._data_type_functions import result_type + +from typing import List, Optional, Tuple, Union + +import numpy as np + +# Note: the function name is different here +def concat( + arrays: Union[Tuple[Array, ...], List[Array]], /, *, axis: Optional[int] = 0 +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.concatenate `. + + See its docstring for more information. + """ + # Note: Casting rules here are different from the np.concatenate default + # (no for scalars with axis=None, no cross-kind casting) + dtype = result_type(*arrays) + arrays = tuple(a._array for a in arrays) + return Array._new(np.concatenate(arrays, axis=axis, dtype=dtype)) + + +def expand_dims(x: Array, /, *, axis: int) -> Array: + """ + Array API compatible wrapper for :py:func:`np.expand_dims `. + + See its docstring for more information. + """ + return Array._new(np.expand_dims(x._array, axis)) + + +def flip(x: Array, /, *, axis: Optional[Union[int, Tuple[int, ...]]] = None) -> Array: + """ + Array API compatible wrapper for :py:func:`np.flip `. + + See its docstring for more information. + """ + return Array._new(np.flip(x._array, axis=axis)) + + +# Note: The function name is different here (see also matrix_transpose). +# Unlike transpose(), the axes argument is required. +def permute_dims(x: Array, /, axes: Tuple[int, ...]) -> Array: + """ + Array API compatible wrapper for :py:func:`np.transpose `. + + See its docstring for more information. + """ + return Array._new(np.transpose(x._array, axes)) + + +def reshape(x: Array, /, shape: Tuple[int, ...]) -> Array: + """ + Array API compatible wrapper for :py:func:`np.reshape `. + + See its docstring for more information. + """ + return Array._new(np.reshape(x._array, shape)) + + +def roll( + x: Array, + /, + shift: Union[int, Tuple[int, ...]], + *, + axis: Optional[Union[int, Tuple[int, ...]]] = None, +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.roll `. + + See its docstring for more information. + """ + return Array._new(np.roll(x._array, shift, axis=axis)) + + +def squeeze(x: Array, /, axis: Union[int, Tuple[int, ...]]) -> Array: + """ + Array API compatible wrapper for :py:func:`np.squeeze `. + + See its docstring for more information. + """ + return Array._new(np.squeeze(x._array, axis=axis)) + + +def stack(arrays: Union[Tuple[Array, ...], List[Array]], /, *, axis: int = 0) -> Array: + """ + Array API compatible wrapper for :py:func:`np.stack `. + + See its docstring for more information. + """ + # Call result type here just to raise on disallowed type combinations + result_type(*arrays) + arrays = tuple(a._array for a in arrays) + return Array._new(np.stack(arrays, axis=axis)) diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/_searching_functions.py b/.local/lib/python3.8/site-packages/numpy/array_api/_searching_functions.py new file mode 100644 index 0000000..40f5a4d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/_searching_functions.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from ._array_object import Array +from ._dtypes import _result_type + +from typing import Optional, Tuple + +import numpy as np + + +def argmax(x: Array, /, *, axis: Optional[int] = None, keepdims: bool = False) -> Array: + """ + Array API compatible wrapper for :py:func:`np.argmax `. + + See its docstring for more information. + """ + return Array._new(np.asarray(np.argmax(x._array, axis=axis, keepdims=keepdims))) + + +def argmin(x: Array, /, *, axis: Optional[int] = None, keepdims: bool = False) -> Array: + """ + Array API compatible wrapper for :py:func:`np.argmin `. + + See its docstring for more information. + """ + return Array._new(np.asarray(np.argmin(x._array, axis=axis, keepdims=keepdims))) + + +def nonzero(x: Array, /) -> Tuple[Array, ...]: + """ + Array API compatible wrapper for :py:func:`np.nonzero `. + + See its docstring for more information. + """ + return tuple(Array._new(i) for i in np.nonzero(x._array)) + + +def where(condition: Array, x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.where `. + + See its docstring for more information. + """ + # Call result type here just to raise on disallowed type combinations + _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) + return Array._new(np.where(condition._array, x1._array, x2._array)) diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/_set_functions.py b/.local/lib/python3.8/site-packages/numpy/array_api/_set_functions.py new file mode 100644 index 0000000..db9370f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/_set_functions.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +from ._array_object import Array + +from typing import NamedTuple + +import numpy as np + +# Note: np.unique() is split into four functions in the array API: +# unique_all, unique_counts, unique_inverse, and unique_values (this is done +# to remove polymorphic return types). + +# Note: The various unique() functions are supposed to return multiple NaNs. +# This does not match the NumPy behavior, however, this is currently left as a +# TODO in this implementation as this behavior may be reverted in np.unique(). +# See https://github.com/numpy/numpy/issues/20326. + +# Note: The functions here return a namedtuple (np.unique() returns a normal +# tuple). + +class UniqueAllResult(NamedTuple): + values: Array + indices: Array + inverse_indices: Array + counts: Array + + +class UniqueCountsResult(NamedTuple): + values: Array + counts: Array + + +class UniqueInverseResult(NamedTuple): + values: Array + inverse_indices: Array + + +def unique_all(x: Array, /) -> UniqueAllResult: + """ + Array API compatible wrapper for :py:func:`np.unique `. + + See its docstring for more information. + """ + values, indices, inverse_indices, counts = np.unique( + x._array, + return_counts=True, + return_index=True, + return_inverse=True, + ) + # np.unique() flattens inverse indices, but they need to share x's shape + # See https://github.com/numpy/numpy/issues/20638 + inverse_indices = inverse_indices.reshape(x.shape) + return UniqueAllResult( + Array._new(values), + Array._new(indices), + Array._new(inverse_indices), + Array._new(counts), + ) + + +def unique_counts(x: Array, /) -> UniqueCountsResult: + res = np.unique( + x._array, + return_counts=True, + return_index=False, + return_inverse=False, + ) + + return UniqueCountsResult(*[Array._new(i) for i in res]) + + +def unique_inverse(x: Array, /) -> UniqueInverseResult: + """ + Array API compatible wrapper for :py:func:`np.unique `. + + See its docstring for more information. + """ + values, inverse_indices = np.unique( + x._array, + return_counts=False, + return_index=False, + return_inverse=True, + ) + # np.unique() flattens inverse indices, but they need to share x's shape + # See https://github.com/numpy/numpy/issues/20638 + inverse_indices = inverse_indices.reshape(x.shape) + return UniqueInverseResult(Array._new(values), Array._new(inverse_indices)) + + +def unique_values(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.unique `. + + See its docstring for more information. + """ + res = np.unique( + x._array, + return_counts=False, + return_index=False, + return_inverse=False, + ) + return Array._new(res) diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/_sorting_functions.py b/.local/lib/python3.8/site-packages/numpy/array_api/_sorting_functions.py new file mode 100644 index 0000000..b2a1187 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/_sorting_functions.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from ._array_object import Array + +import numpy as np + + +def argsort( + x: Array, /, *, axis: int = -1, descending: bool = False, stable: bool = True +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.argsort `. + + See its docstring for more information. + """ + # Note: this keyword argument is different, and the default is different. + kind = "stable" if stable else "quicksort" + if not descending: + res = np.argsort(x._array, axis=axis, kind=kind) + else: + # As NumPy has no native descending sort, we imitate it here. Note that + # simply flipping the results of np.argsort(x._array, ...) would not + # respect the relative order like it would in native descending sorts. + res = np.flip( + np.argsort(np.flip(x._array, axis=axis), axis=axis, kind=kind), + axis=axis, + ) + # Rely on flip()/argsort() to validate axis + normalised_axis = axis if axis >= 0 else x.ndim + axis + max_i = x.shape[normalised_axis] - 1 + res = max_i - res + return Array._new(res) + + +def sort( + x: Array, /, *, axis: int = -1, descending: bool = False, stable: bool = True +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.sort `. + + See its docstring for more information. + """ + # Note: this keyword argument is different, and the default is different. + kind = "stable" if stable else "quicksort" + res = np.sort(x._array, axis=axis, kind=kind) + if descending: + res = np.flip(res, axis=axis) + return Array._new(res) diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/_statistical_functions.py b/.local/lib/python3.8/site-packages/numpy/array_api/_statistical_functions.py new file mode 100644 index 0000000..5bc831a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/_statistical_functions.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +from ._dtypes import ( + _floating_dtypes, + _numeric_dtypes, +) +from ._array_object import Array +from ._creation_functions import asarray +from ._dtypes import float32, float64 + +from typing import TYPE_CHECKING, Optional, Tuple, Union + +if TYPE_CHECKING: + from ._typing import Dtype + +import numpy as np + + +def max( + x: Array, + /, + *, + axis: Optional[Union[int, Tuple[int, ...]]] = None, + keepdims: bool = False, +) -> Array: + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in max") + return Array._new(np.max(x._array, axis=axis, keepdims=keepdims)) + + +def mean( + x: Array, + /, + *, + axis: Optional[Union[int, Tuple[int, ...]]] = None, + keepdims: bool = False, +) -> Array: + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in mean") + return Array._new(np.mean(x._array, axis=axis, keepdims=keepdims)) + + +def min( + x: Array, + /, + *, + axis: Optional[Union[int, Tuple[int, ...]]] = None, + keepdims: bool = False, +) -> Array: + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in min") + return Array._new(np.min(x._array, axis=axis, keepdims=keepdims)) + + +def prod( + x: Array, + /, + *, + axis: Optional[Union[int, Tuple[int, ...]]] = None, + dtype: Optional[Dtype] = None, + keepdims: bool = False, +) -> Array: + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in prod") + # Note: sum() and prod() always upcast float32 to float64 for dtype=None + # We need to do so here before computing the product to avoid overflow + if dtype is None and x.dtype == float32: + dtype = float64 + return Array._new(np.prod(x._array, dtype=dtype, axis=axis, keepdims=keepdims)) + + +def std( + x: Array, + /, + *, + axis: Optional[Union[int, Tuple[int, ...]]] = None, + correction: Union[int, float] = 0.0, + keepdims: bool = False, +) -> Array: + # Note: the keyword argument correction is different here + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in std") + return Array._new(np.std(x._array, axis=axis, ddof=correction, keepdims=keepdims)) + + +def sum( + x: Array, + /, + *, + axis: Optional[Union[int, Tuple[int, ...]]] = None, + dtype: Optional[Dtype] = None, + keepdims: bool = False, +) -> Array: + if x.dtype not in _numeric_dtypes: + raise TypeError("Only numeric dtypes are allowed in sum") + # Note: sum() and prod() always upcast integers to (u)int64 and float32 to + # float64 for dtype=None. `np.sum` does that too for integers, but not for + # float32, so we need to special-case it here + if dtype is None and x.dtype == float32: + dtype = float64 + return Array._new(np.sum(x._array, axis=axis, dtype=dtype, keepdims=keepdims)) + + +def var( + x: Array, + /, + *, + axis: Optional[Union[int, Tuple[int, ...]]] = None, + correction: Union[int, float] = 0.0, + keepdims: bool = False, +) -> Array: + # Note: the keyword argument correction is different here + if x.dtype not in _floating_dtypes: + raise TypeError("Only floating-point dtypes are allowed in var") + return Array._new(np.var(x._array, axis=axis, ddof=correction, keepdims=keepdims)) diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/_typing.py b/.local/lib/python3.8/site-packages/numpy/array_api/_typing.py new file mode 100644 index 0000000..dfa87b3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/_typing.py @@ -0,0 +1,74 @@ +""" +This file defines the types for type annotations. + +These names aren't part of the module namespace, but they are used in the +annotations in the function signatures. The functions in the module are only +valid for inputs that match the given type annotations. +""" + +from __future__ import annotations + +__all__ = [ + "Array", + "Device", + "Dtype", + "SupportsDLPack", + "SupportsBufferProtocol", + "PyCapsule", +] + +import sys +from typing import ( + Any, + Literal, + Sequence, + Type, + Union, + TYPE_CHECKING, + TypeVar, + Protocol, +) + +from ._array_object import Array +from numpy import ( + dtype, + int8, + int16, + int32, + int64, + uint8, + uint16, + uint32, + uint64, + float32, + float64, +) + +_T_co = TypeVar("_T_co", covariant=True) + +class NestedSequence(Protocol[_T_co]): + def __getitem__(self, key: int, /) -> _T_co | NestedSequence[_T_co]: ... + def __len__(self, /) -> int: ... + +Device = Literal["cpu"] +if TYPE_CHECKING or sys.version_info >= (3, 9): + Dtype = dtype[Union[ + int8, + int16, + int32, + int64, + uint8, + uint16, + uint32, + uint64, + float32, + float64, + ]] +else: + Dtype = dtype + +SupportsBufferProtocol = Any +PyCapsule = Any + +class SupportsDLPack(Protocol): + def __dlpack__(self, /, *, stream: None = ...) -> PyCapsule: ... diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/_utility_functions.py b/.local/lib/python3.8/site-packages/numpy/array_api/_utility_functions.py new file mode 100644 index 0000000..5ecb4bd --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/_utility_functions.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from ._array_object import Array + +from typing import Optional, Tuple, Union + +import numpy as np + + +def all( + x: Array, + /, + *, + axis: Optional[Union[int, Tuple[int, ...]]] = None, + keepdims: bool = False, +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.all `. + + See its docstring for more information. + """ + return Array._new(np.asarray(np.all(x._array, axis=axis, keepdims=keepdims))) + + +def any( + x: Array, + /, + *, + axis: Optional[Union[int, Tuple[int, ...]]] = None, + keepdims: bool = False, +) -> Array: + """ + Array API compatible wrapper for :py:func:`np.any `. + + See its docstring for more information. + """ + return Array._new(np.asarray(np.any(x._array, axis=axis, keepdims=keepdims))) diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/linalg.py b/.local/lib/python3.8/site-packages/numpy/array_api/linalg.py new file mode 100644 index 0000000..8d7ba65 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/linalg.py @@ -0,0 +1,408 @@ +from __future__ import annotations + +from ._dtypes import _floating_dtypes, _numeric_dtypes +from ._array_object import Array + +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from ._typing import Literal, Optional, Sequence, Tuple, Union + +from typing import NamedTuple + +import numpy.linalg +import numpy as np + +class EighResult(NamedTuple): + eigenvalues: Array + eigenvectors: Array + +class QRResult(NamedTuple): + Q: Array + R: Array + +class SlogdetResult(NamedTuple): + sign: Array + logabsdet: Array + +class SVDResult(NamedTuple): + U: Array + S: Array + Vh: Array + +# Note: the inclusion of the upper keyword is different from +# np.linalg.cholesky, which does not have it. +def cholesky(x: Array, /, *, upper: bool = False) -> Array: + """ + Array API compatible wrapper for :py:func:`np.linalg.cholesky `. + + See its docstring for more information. + """ + # Note: the restriction to floating-point dtypes only is different from + # np.linalg.cholesky. + if x.dtype not in _floating_dtypes: + raise TypeError('Only floating-point dtypes are allowed in cholesky') + L = np.linalg.cholesky(x._array) + if upper: + return Array._new(L).mT + return Array._new(L) + +# Note: cross is the numpy top-level namespace, not np.linalg +def cross(x1: Array, x2: Array, /, *, axis: int = -1) -> Array: + """ + Array API compatible wrapper for :py:func:`np.cross `. + + See its docstring for more information. + """ + if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: + raise TypeError('Only numeric dtypes are allowed in cross') + # Note: this is different from np.cross(), which broadcasts + if x1.shape != x2.shape: + raise ValueError('x1 and x2 must have the same shape') + if x1.ndim == 0: + raise ValueError('cross() requires arrays of dimension at least 1') + # Note: this is different from np.cross(), which allows dimension 2 + if x1.shape[axis] != 3: + raise ValueError('cross() dimension must equal 3') + return Array._new(np.cross(x1._array, x2._array, axis=axis)) + +def det(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.linalg.det `. + + See its docstring for more information. + """ + # Note: the restriction to floating-point dtypes only is different from + # np.linalg.det. + if x.dtype not in _floating_dtypes: + raise TypeError('Only floating-point dtypes are allowed in det') + return Array._new(np.linalg.det(x._array)) + +# Note: diagonal is the numpy top-level namespace, not np.linalg +def diagonal(x: Array, /, *, offset: int = 0) -> Array: + """ + Array API compatible wrapper for :py:func:`np.diagonal `. + + See its docstring for more information. + """ + # Note: diagonal always operates on the last two axes, whereas np.diagonal + # operates on the first two axes by default + return Array._new(np.diagonal(x._array, offset=offset, axis1=-2, axis2=-1)) + + +# Note: the keyword argument name upper is different from np.linalg.eigh +def eigh(x: Array, /) -> EighResult: + """ + Array API compatible wrapper for :py:func:`np.linalg.eigh `. + + See its docstring for more information. + """ + # Note: the restriction to floating-point dtypes only is different from + # np.linalg.eigh. + if x.dtype not in _floating_dtypes: + raise TypeError('Only floating-point dtypes are allowed in eigh') + + # Note: the return type here is a namedtuple, which is different from + # np.eigh, which only returns a tuple. + return EighResult(*map(Array._new, np.linalg.eigh(x._array))) + + +# Note: the keyword argument name upper is different from np.linalg.eigvalsh +def eigvalsh(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.linalg.eigvalsh `. + + See its docstring for more information. + """ + # Note: the restriction to floating-point dtypes only is different from + # np.linalg.eigvalsh. + if x.dtype not in _floating_dtypes: + raise TypeError('Only floating-point dtypes are allowed in eigvalsh') + + return Array._new(np.linalg.eigvalsh(x._array)) + +def inv(x: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.linalg.inv `. + + See its docstring for more information. + """ + # Note: the restriction to floating-point dtypes only is different from + # np.linalg.inv. + if x.dtype not in _floating_dtypes: + raise TypeError('Only floating-point dtypes are allowed in inv') + + return Array._new(np.linalg.inv(x._array)) + + +# Note: matmul is the numpy top-level namespace but not in np.linalg +def matmul(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.matmul `. + + See its docstring for more information. + """ + # Note: the restriction to numeric dtypes only is different from + # np.matmul. + if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: + raise TypeError('Only numeric dtypes are allowed in matmul') + + return Array._new(np.matmul(x1._array, x2._array)) + + +# Note: the name here is different from norm(). The array API norm is split +# into matrix_norm and vector_norm(). + +# The type for ord should be Optional[Union[int, float, Literal[np.inf, +# -np.inf, 'fro', 'nuc']]], but Literal does not support floating-point +# literals. +def matrix_norm(x: Array, /, *, keepdims: bool = False, ord: Optional[Union[int, float, Literal['fro', 'nuc']]] = 'fro') -> Array: + """ + Array API compatible wrapper for :py:func:`np.linalg.norm `. + + See its docstring for more information. + """ + # Note: the restriction to floating-point dtypes only is different from + # np.linalg.norm. + if x.dtype not in _floating_dtypes: + raise TypeError('Only floating-point dtypes are allowed in matrix_norm') + + return Array._new(np.linalg.norm(x._array, axis=(-2, -1), keepdims=keepdims, ord=ord)) + + +def matrix_power(x: Array, n: int, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.matrix_power `. + + See its docstring for more information. + """ + # Note: the restriction to floating-point dtypes only is different from + # np.linalg.matrix_power. + if x.dtype not in _floating_dtypes: + raise TypeError('Only floating-point dtypes are allowed for the first argument of matrix_power') + + # np.matrix_power already checks if n is an integer + return Array._new(np.linalg.matrix_power(x._array, n)) + +# Note: the keyword argument name rtol is different from np.linalg.matrix_rank +def matrix_rank(x: Array, /, *, rtol: Optional[Union[float, Array]] = None) -> Array: + """ + Array API compatible wrapper for :py:func:`np.matrix_rank `. + + See its docstring for more information. + """ + # Note: this is different from np.linalg.matrix_rank, which supports 1 + # dimensional arrays. + if x.ndim < 2: + raise np.linalg.LinAlgError("1-dimensional array given. Array must be at least two-dimensional") + S = np.linalg.svd(x._array, compute_uv=False) + if rtol is None: + tol = S.max(axis=-1, keepdims=True) * max(x.shape[-2:]) * np.finfo(S.dtype).eps + else: + if isinstance(rtol, Array): + rtol = rtol._array + # Note: this is different from np.linalg.matrix_rank, which does not multiply + # the tolerance by the largest singular value. + tol = S.max(axis=-1, keepdims=True)*np.asarray(rtol)[..., np.newaxis] + return Array._new(np.count_nonzero(S > tol, axis=-1)) + + +# Note: this function is new in the array API spec. Unlike transpose, it only +# transposes the last two axes. +def matrix_transpose(x: Array, /) -> Array: + if x.ndim < 2: + raise ValueError("x must be at least 2-dimensional for matrix_transpose") + return Array._new(np.swapaxes(x._array, -1, -2)) + +# Note: outer is the numpy top-level namespace, not np.linalg +def outer(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.outer `. + + See its docstring for more information. + """ + # Note: the restriction to numeric dtypes only is different from + # np.outer. + if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: + raise TypeError('Only numeric dtypes are allowed in outer') + + # Note: the restriction to only 1-dim arrays is different from np.outer + if x1.ndim != 1 or x2.ndim != 1: + raise ValueError('The input arrays to outer must be 1-dimensional') + + return Array._new(np.outer(x1._array, x2._array)) + +# Note: the keyword argument name rtol is different from np.linalg.pinv +def pinv(x: Array, /, *, rtol: Optional[Union[float, Array]] = None) -> Array: + """ + Array API compatible wrapper for :py:func:`np.linalg.pinv `. + + See its docstring for more information. + """ + # Note: the restriction to floating-point dtypes only is different from + # np.linalg.pinv. + if x.dtype not in _floating_dtypes: + raise TypeError('Only floating-point dtypes are allowed in pinv') + + # Note: this is different from np.linalg.pinv, which does not multiply the + # default tolerance by max(M, N). + if rtol is None: + rtol = max(x.shape[-2:]) * np.finfo(x.dtype).eps + return Array._new(np.linalg.pinv(x._array, rcond=rtol)) + +def qr(x: Array, /, *, mode: Literal['reduced', 'complete'] = 'reduced') -> QRResult: + """ + Array API compatible wrapper for :py:func:`np.linalg.qr `. + + See its docstring for more information. + """ + # Note: the restriction to floating-point dtypes only is different from + # np.linalg.qr. + if x.dtype not in _floating_dtypes: + raise TypeError('Only floating-point dtypes are allowed in qr') + + # Note: the return type here is a namedtuple, which is different from + # np.linalg.qr, which only returns a tuple. + return QRResult(*map(Array._new, np.linalg.qr(x._array, mode=mode))) + +def slogdet(x: Array, /) -> SlogdetResult: + """ + Array API compatible wrapper for :py:func:`np.linalg.slogdet `. + + See its docstring for more information. + """ + # Note: the restriction to floating-point dtypes only is different from + # np.linalg.slogdet. + if x.dtype not in _floating_dtypes: + raise TypeError('Only floating-point dtypes are allowed in slogdet') + + # Note: the return type here is a namedtuple, which is different from + # np.linalg.slogdet, which only returns a tuple. + return SlogdetResult(*map(Array._new, np.linalg.slogdet(x._array))) + +# Note: unlike np.linalg.solve, the array API solve() only accepts x2 as a +# vector when it is exactly 1-dimensional. All other cases treat x2 as a stack +# of matrices. The np.linalg.solve behavior of allowing stacks of both +# matrices and vectors is ambiguous c.f. +# https://github.com/numpy/numpy/issues/15349 and +# https://github.com/data-apis/array-api/issues/285. + +# To workaround this, the below is the code from np.linalg.solve except +# only calling solve1 in the exactly 1D case. +def _solve(a, b): + from ..linalg.linalg import (_makearray, _assert_stacked_2d, + _assert_stacked_square, _commonType, + isComplexType, get_linalg_error_extobj, + _raise_linalgerror_singular) + from ..linalg import _umath_linalg + + a, _ = _makearray(a) + _assert_stacked_2d(a) + _assert_stacked_square(a) + b, wrap = _makearray(b) + t, result_t = _commonType(a, b) + + # This part is different from np.linalg.solve + if b.ndim == 1: + gufunc = _umath_linalg.solve1 + else: + gufunc = _umath_linalg.solve + + # This does nothing currently but is left in because it will be relevant + # when complex dtype support is added to the spec in 2022. + signature = 'DD->D' if isComplexType(t) else 'dd->d' + extobj = get_linalg_error_extobj(_raise_linalgerror_singular) + r = gufunc(a, b, signature=signature, extobj=extobj) + + return wrap(r.astype(result_t, copy=False)) + +def solve(x1: Array, x2: Array, /) -> Array: + """ + Array API compatible wrapper for :py:func:`np.linalg.solve `. + + See its docstring for more information. + """ + # Note: the restriction to floating-point dtypes only is different from + # np.linalg.solve. + if x1.dtype not in _floating_dtypes or x2.dtype not in _floating_dtypes: + raise TypeError('Only floating-point dtypes are allowed in solve') + + return Array._new(_solve(x1._array, x2._array)) + +def svd(x: Array, /, *, full_matrices: bool = True) -> SVDResult: + """ + Array API compatible wrapper for :py:func:`np.linalg.svd `. + + See its docstring for more information. + """ + # Note: the restriction to floating-point dtypes only is different from + # np.linalg.svd. + if x.dtype not in _floating_dtypes: + raise TypeError('Only floating-point dtypes are allowed in svd') + + # Note: the return type here is a namedtuple, which is different from + # np.svd, which only returns a tuple. + return SVDResult(*map(Array._new, np.linalg.svd(x._array, full_matrices=full_matrices))) + +# Note: svdvals is not in NumPy (but it is in SciPy). It is equivalent to +# np.linalg.svd(compute_uv=False). +def svdvals(x: Array, /) -> Union[Array, Tuple[Array, ...]]: + return Array._new(np.linalg.svd(x._array, compute_uv=False)) + +# Note: tensordot is the numpy top-level namespace but not in np.linalg + +# Note: axes must be a tuple, unlike np.tensordot where it can be an array or array-like. +def tensordot(x1: Array, x2: Array, /, *, axes: Union[int, Tuple[Sequence[int], Sequence[int]]] = 2) -> Array: + # Note: the restriction to numeric dtypes only is different from + # np.tensordot. + if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: + raise TypeError('Only numeric dtypes are allowed in tensordot') + + return Array._new(np.tensordot(x1._array, x2._array, axes=axes)) + +# Note: trace is the numpy top-level namespace, not np.linalg +def trace(x: Array, /, *, offset: int = 0) -> Array: + """ + Array API compatible wrapper for :py:func:`np.trace `. + + See its docstring for more information. + """ + # Note: trace always operates on the last two axes, whereas np.trace + # operates on the first two axes by default + return Array._new(np.asarray(np.trace(x._array, offset=offset, axis1=-2, axis2=-1))) + +# Note: vecdot is not in NumPy +def vecdot(x1: Array, x2: Array, /, *, axis: int = -1) -> Array: + return tensordot(x1, x2, axes=((axis,), (axis,))) + + +# Note: the name here is different from norm(). The array API norm is split +# into matrix_norm and vector_norm(). + +# The type for ord should be Optional[Union[int, float, Literal[np.inf, +# -np.inf]]] but Literal does not support floating-point literals. +def vector_norm(x: Array, /, *, axis: Optional[Union[int, Tuple[int, int]]] = None, keepdims: bool = False, ord: Optional[Union[int, float]] = 2) -> Array: + """ + Array API compatible wrapper for :py:func:`np.linalg.norm `. + + See its docstring for more information. + """ + # Note: the restriction to floating-point dtypes only is different from + # np.linalg.norm. + if x.dtype not in _floating_dtypes: + raise TypeError('Only floating-point dtypes are allowed in norm') + + a = x._array + if axis is None: + a = a.flatten() + axis = 0 + elif isinstance(axis, tuple): + # Note: The axis argument supports any number of axes, whereas norm() + # only supports a single axis for vector norm. + rest = tuple(i for i in range(a.ndim) if i not in axis) + newshape = axis + rest + a = np.transpose(a, newshape).reshape((np.prod([a.shape[i] for i in axis]), *[a.shape[i] for i in rest])) + axis = 0 + return Array._new(np.linalg.norm(a, axis=axis, keepdims=keepdims, ord=ord)) + + +__all__ = ['cholesky', 'cross', 'det', 'diagonal', 'eigh', 'eigvalsh', 'inv', 'matmul', 'matrix_norm', 'matrix_power', 'matrix_rank', 'matrix_transpose', 'outer', 'pinv', 'qr', 'slogdet', 'solve', 'svd', 'svdvals', 'tensordot', 'trace', 'vecdot', 'vector_norm'] diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/setup.py b/.local/lib/python3.8/site-packages/numpy/array_api/setup.py new file mode 100644 index 0000000..c8bc291 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/setup.py @@ -0,0 +1,12 @@ +def configuration(parent_package="", top_path=None): + from numpy.distutils.misc_util import Configuration + + config = Configuration("array_api", parent_package, top_path) + config.add_subpackage("tests") + return config + + +if __name__ == "__main__": + from numpy.distutils.core import setup + + setup(configuration=configuration) diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/array_api/tests/__init__.py new file mode 100644 index 0000000..536062e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/tests/__init__.py @@ -0,0 +1,7 @@ +""" +Tests for the array API namespace. + +Note, full compliance with the array API can be tested with the official array API test +suite https://github.com/data-apis/array-api-tests. This test suite primarily +focuses on those things that are not tested by the official test suite. +""" diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..967a2e9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_array_object.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_array_object.cpython-38.pyc new file mode 100644 index 0000000..2d4f94a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_array_object.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_creation_functions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_creation_functions.cpython-38.pyc new file mode 100644 index 0000000..9c3937c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_creation_functions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_elementwise_functions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_elementwise_functions.cpython-38.pyc new file mode 100644 index 0000000..306f01d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_elementwise_functions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_set_functions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_set_functions.cpython-38.pyc new file mode 100644 index 0000000..b33cfbd Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_set_functions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_sorting_functions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_sorting_functions.cpython-38.pyc new file mode 100644 index 0000000..708c63e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/array_api/tests/__pycache__/test_sorting_functions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_array_object.py b/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_array_object.py new file mode 100644 index 0000000..1fe1dfd --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_array_object.py @@ -0,0 +1,324 @@ +import operator + +from numpy.testing import assert_raises +import numpy as np + +from .. import ones, asarray, result_type, all, equal +from .._array_object import Array +from .._dtypes import ( + _all_dtypes, + _boolean_dtypes, + _floating_dtypes, + _integer_dtypes, + _integer_or_boolean_dtypes, + _numeric_dtypes, + int8, + int16, + int32, + int64, + uint64, +) + + +def test_validate_index(): + # The indexing tests in the official array API test suite test that the + # array object correctly handles the subset of indices that are required + # by the spec. But the NumPy array API implementation specifically + # disallows any index not required by the spec, via Array._validate_index. + # This test focuses on testing that non-valid indices are correctly + # rejected. See + # https://data-apis.org/array-api/latest/API_specification/indexing.html + # and the docstring of Array._validate_index for the exact indexing + # behavior that should be allowed. This does not test indices that are + # already invalid in NumPy itself because Array will generally just pass + # such indices directly to the underlying np.ndarray. + + a = ones((3, 4)) + + # Out of bounds slices are not allowed + assert_raises(IndexError, lambda: a[:4]) + assert_raises(IndexError, lambda: a[:-4]) + assert_raises(IndexError, lambda: a[:3:-1]) + assert_raises(IndexError, lambda: a[:-5:-1]) + assert_raises(IndexError, lambda: a[4:]) + assert_raises(IndexError, lambda: a[-4:]) + assert_raises(IndexError, lambda: a[4::-1]) + assert_raises(IndexError, lambda: a[-4::-1]) + + assert_raises(IndexError, lambda: a[...,:5]) + assert_raises(IndexError, lambda: a[...,:-5]) + assert_raises(IndexError, lambda: a[...,:5:-1]) + assert_raises(IndexError, lambda: a[...,:-6:-1]) + assert_raises(IndexError, lambda: a[...,5:]) + assert_raises(IndexError, lambda: a[...,-5:]) + assert_raises(IndexError, lambda: a[...,5::-1]) + assert_raises(IndexError, lambda: a[...,-5::-1]) + + # Boolean indices cannot be part of a larger tuple index + assert_raises(IndexError, lambda: a[a[:,0]==1,0]) + assert_raises(IndexError, lambda: a[a[:,0]==1,...]) + assert_raises(IndexError, lambda: a[..., a[0]==1]) + assert_raises(IndexError, lambda: a[[True, True, True]]) + assert_raises(IndexError, lambda: a[(True, True, True),]) + + # Integer array indices are not allowed (except for 0-D) + idx = asarray([[0, 1]]) + assert_raises(IndexError, lambda: a[idx]) + assert_raises(IndexError, lambda: a[idx,]) + assert_raises(IndexError, lambda: a[[0, 1]]) + assert_raises(IndexError, lambda: a[(0, 1), (0, 1)]) + assert_raises(IndexError, lambda: a[[0, 1]]) + assert_raises(IndexError, lambda: a[np.array([[0, 1]])]) + + # np.newaxis is not allowed + assert_raises(IndexError, lambda: a[None]) + assert_raises(IndexError, lambda: a[None, ...]) + assert_raises(IndexError, lambda: a[..., None]) + + # Multiaxis indices must contain exactly as many indices as dimensions + assert_raises(IndexError, lambda: a[()]) + assert_raises(IndexError, lambda: a[0,]) + assert_raises(IndexError, lambda: a[0]) + assert_raises(IndexError, lambda: a[:]) + +def test_operators(): + # For every operator, we test that it works for the required type + # combinations and raises TypeError otherwise + binary_op_dtypes = { + "__add__": "numeric", + "__and__": "integer_or_boolean", + "__eq__": "all", + "__floordiv__": "numeric", + "__ge__": "numeric", + "__gt__": "numeric", + "__le__": "numeric", + "__lshift__": "integer", + "__lt__": "numeric", + "__mod__": "numeric", + "__mul__": "numeric", + "__ne__": "all", + "__or__": "integer_or_boolean", + "__pow__": "numeric", + "__rshift__": "integer", + "__sub__": "numeric", + "__truediv__": "floating", + "__xor__": "integer_or_boolean", + } + + # Recompute each time because of in-place ops + def _array_vals(): + for d in _integer_dtypes: + yield asarray(1, dtype=d) + for d in _boolean_dtypes: + yield asarray(False, dtype=d) + for d in _floating_dtypes: + yield asarray(1.0, dtype=d) + + for op, dtypes in binary_op_dtypes.items(): + ops = [op] + if op not in ["__eq__", "__ne__", "__le__", "__ge__", "__lt__", "__gt__"]: + rop = "__r" + op[2:] + iop = "__i" + op[2:] + ops += [rop, iop] + for s in [1, 1.0, False]: + for _op in ops: + for a in _array_vals(): + # Test array op scalar. From the spec, the following combinations + # are supported: + + # - Python bool for a bool array dtype, + # - a Python int within the bounds of the given dtype for integer array dtypes, + # - a Python int or float for floating-point array dtypes + + # We do not do bounds checking for int scalars, but rather use the default + # NumPy behavior for casting in that case. + + if ((dtypes == "all" + or dtypes == "numeric" and a.dtype in _numeric_dtypes + or dtypes == "integer" and a.dtype in _integer_dtypes + or dtypes == "integer_or_boolean" and a.dtype in _integer_or_boolean_dtypes + or dtypes == "boolean" and a.dtype in _boolean_dtypes + or dtypes == "floating" and a.dtype in _floating_dtypes + ) + # bool is a subtype of int, which is why we avoid + # isinstance here. + and (a.dtype in _boolean_dtypes and type(s) == bool + or a.dtype in _integer_dtypes and type(s) == int + or a.dtype in _floating_dtypes and type(s) in [float, int] + )): + # Only test for no error + getattr(a, _op)(s) + else: + assert_raises(TypeError, lambda: getattr(a, _op)(s)) + + # Test array op array. + for _op in ops: + for x in _array_vals(): + for y in _array_vals(): + # See the promotion table in NEP 47 or the array + # API spec page on type promotion. Mixed kind + # promotion is not defined. + if (x.dtype == uint64 and y.dtype in [int8, int16, int32, int64] + or y.dtype == uint64 and x.dtype in [int8, int16, int32, int64] + or x.dtype in _integer_dtypes and y.dtype not in _integer_dtypes + or y.dtype in _integer_dtypes and x.dtype not in _integer_dtypes + or x.dtype in _boolean_dtypes and y.dtype not in _boolean_dtypes + or y.dtype in _boolean_dtypes and x.dtype not in _boolean_dtypes + or x.dtype in _floating_dtypes and y.dtype not in _floating_dtypes + or y.dtype in _floating_dtypes and x.dtype not in _floating_dtypes + ): + assert_raises(TypeError, lambda: getattr(x, _op)(y)) + # Ensure in-place operators only promote to the same dtype as the left operand. + elif ( + _op.startswith("__i") + and result_type(x.dtype, y.dtype) != x.dtype + ): + assert_raises(TypeError, lambda: getattr(x, _op)(y)) + # Ensure only those dtypes that are required for every operator are allowed. + elif (dtypes == "all" and (x.dtype in _boolean_dtypes and y.dtype in _boolean_dtypes + or x.dtype in _numeric_dtypes and y.dtype in _numeric_dtypes) + or (dtypes == "numeric" and x.dtype in _numeric_dtypes and y.dtype in _numeric_dtypes) + or dtypes == "integer" and x.dtype in _integer_dtypes and y.dtype in _numeric_dtypes + or dtypes == "integer_or_boolean" and (x.dtype in _integer_dtypes and y.dtype in _integer_dtypes + or x.dtype in _boolean_dtypes and y.dtype in _boolean_dtypes) + or dtypes == "boolean" and x.dtype in _boolean_dtypes and y.dtype in _boolean_dtypes + or dtypes == "floating" and x.dtype in _floating_dtypes and y.dtype in _floating_dtypes + ): + getattr(x, _op)(y) + else: + assert_raises(TypeError, lambda: getattr(x, _op)(y)) + + unary_op_dtypes = { + "__abs__": "numeric", + "__invert__": "integer_or_boolean", + "__neg__": "numeric", + "__pos__": "numeric", + } + for op, dtypes in unary_op_dtypes.items(): + for a in _array_vals(): + if ( + dtypes == "numeric" + and a.dtype in _numeric_dtypes + or dtypes == "integer_or_boolean" + and a.dtype in _integer_or_boolean_dtypes + ): + # Only test for no error + getattr(a, op)() + else: + assert_raises(TypeError, lambda: getattr(a, op)()) + + # Finally, matmul() must be tested separately, because it works a bit + # different from the other operations. + def _matmul_array_vals(): + for a in _array_vals(): + yield a + for d in _all_dtypes: + yield ones((3, 4), dtype=d) + yield ones((4, 2), dtype=d) + yield ones((4, 4), dtype=d) + + # Scalars always error + for _op in ["__matmul__", "__rmatmul__", "__imatmul__"]: + for s in [1, 1.0, False]: + for a in _matmul_array_vals(): + if (type(s) in [float, int] and a.dtype in _floating_dtypes + or type(s) == int and a.dtype in _integer_dtypes): + # Type promotion is valid, but @ is not allowed on 0-D + # inputs, so the error is a ValueError + assert_raises(ValueError, lambda: getattr(a, _op)(s)) + else: + assert_raises(TypeError, lambda: getattr(a, _op)(s)) + + for x in _matmul_array_vals(): + for y in _matmul_array_vals(): + if (x.dtype == uint64 and y.dtype in [int8, int16, int32, int64] + or y.dtype == uint64 and x.dtype in [int8, int16, int32, int64] + or x.dtype in _integer_dtypes and y.dtype not in _integer_dtypes + or y.dtype in _integer_dtypes and x.dtype not in _integer_dtypes + or x.dtype in _floating_dtypes and y.dtype not in _floating_dtypes + or y.dtype in _floating_dtypes and x.dtype not in _floating_dtypes + or x.dtype in _boolean_dtypes + or y.dtype in _boolean_dtypes + ): + assert_raises(TypeError, lambda: x.__matmul__(y)) + assert_raises(TypeError, lambda: y.__rmatmul__(x)) + assert_raises(TypeError, lambda: x.__imatmul__(y)) + elif x.shape == () or y.shape == () or x.shape[1] != y.shape[0]: + assert_raises(ValueError, lambda: x.__matmul__(y)) + assert_raises(ValueError, lambda: y.__rmatmul__(x)) + if result_type(x.dtype, y.dtype) != x.dtype: + assert_raises(TypeError, lambda: x.__imatmul__(y)) + else: + assert_raises(ValueError, lambda: x.__imatmul__(y)) + else: + x.__matmul__(y) + y.__rmatmul__(x) + if result_type(x.dtype, y.dtype) != x.dtype: + assert_raises(TypeError, lambda: x.__imatmul__(y)) + elif y.shape[0] != y.shape[1]: + # This one fails because x @ y has a different shape from x + assert_raises(ValueError, lambda: x.__imatmul__(y)) + else: + x.__imatmul__(y) + + +def test_python_scalar_construtors(): + b = asarray(False) + i = asarray(0) + f = asarray(0.0) + + assert bool(b) == False + assert int(i) == 0 + assert float(f) == 0.0 + assert operator.index(i) == 0 + + # bool/int/float should only be allowed on 0-D arrays. + assert_raises(TypeError, lambda: bool(asarray([False]))) + assert_raises(TypeError, lambda: int(asarray([0]))) + assert_raises(TypeError, lambda: float(asarray([0.0]))) + assert_raises(TypeError, lambda: operator.index(asarray([0]))) + + # bool/int/float should only be allowed on arrays of the corresponding + # dtype + assert_raises(ValueError, lambda: bool(i)) + assert_raises(ValueError, lambda: bool(f)) + + assert_raises(ValueError, lambda: int(b)) + assert_raises(ValueError, lambda: int(f)) + + assert_raises(ValueError, lambda: float(b)) + assert_raises(ValueError, lambda: float(i)) + + assert_raises(TypeError, lambda: operator.index(b)) + assert_raises(TypeError, lambda: operator.index(f)) + + +def test_device_property(): + a = ones((3, 4)) + assert a.device == 'cpu' + + assert all(equal(a.to_device('cpu'), a)) + assert_raises(ValueError, lambda: a.to_device('gpu')) + + assert all(equal(asarray(a, device='cpu'), a)) + assert_raises(ValueError, lambda: asarray(a, device='gpu')) + +def test_array_properties(): + a = ones((1, 2, 3)) + b = ones((2, 3)) + assert_raises(ValueError, lambda: a.T) + + assert isinstance(b.T, Array) + assert b.T.shape == (3, 2) + + assert isinstance(a.mT, Array) + assert a.mT.shape == (1, 3, 2) + assert isinstance(b.mT, Array) + assert b.mT.shape == (3, 2) + +def test___array__(): + a = ones((2, 3), dtype=int16) + assert np.asarray(a) is a._array + b = np.asarray(a, dtype=np.float64) + assert np.all(np.equal(b, np.ones((2, 3), dtype=np.float64))) + assert b.dtype == np.float64 diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_creation_functions.py b/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_creation_functions.py new file mode 100644 index 0000000..be9eaa3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_creation_functions.py @@ -0,0 +1,142 @@ +from numpy.testing import assert_raises +import numpy as np + +from .. import all +from .._creation_functions import ( + asarray, + arange, + empty, + empty_like, + eye, + full, + full_like, + linspace, + meshgrid, + ones, + ones_like, + zeros, + zeros_like, +) +from .._dtypes import float32, float64 +from .._array_object import Array + + +def test_asarray_errors(): + # Test various protections against incorrect usage + assert_raises(TypeError, lambda: Array([1])) + assert_raises(TypeError, lambda: asarray(["a"])) + assert_raises(ValueError, lambda: asarray([1.0], dtype=np.float16)) + assert_raises(OverflowError, lambda: asarray(2**100)) + # Preferably this would be OverflowError + # assert_raises(OverflowError, lambda: asarray([2**100])) + assert_raises(TypeError, lambda: asarray([2**100])) + asarray([1], device="cpu") # Doesn't error + assert_raises(ValueError, lambda: asarray([1], device="gpu")) + + assert_raises(ValueError, lambda: asarray([1], dtype=int)) + assert_raises(ValueError, lambda: asarray([1], dtype="i")) + + +def test_asarray_copy(): + a = asarray([1]) + b = asarray(a, copy=True) + a[0] = 0 + assert all(b[0] == 1) + assert all(a[0] == 0) + a = asarray([1]) + b = asarray(a, copy=np._CopyMode.ALWAYS) + a[0] = 0 + assert all(b[0] == 1) + assert all(a[0] == 0) + a = asarray([1]) + b = asarray(a, copy=np._CopyMode.NEVER) + a[0] = 0 + assert all(b[0] == 0) + assert_raises(NotImplementedError, lambda: asarray(a, copy=False)) + assert_raises(NotImplementedError, + lambda: asarray(a, copy=np._CopyMode.IF_NEEDED)) + + +def test_arange_errors(): + arange(1, device="cpu") # Doesn't error + assert_raises(ValueError, lambda: arange(1, device="gpu")) + assert_raises(ValueError, lambda: arange(1, dtype=int)) + assert_raises(ValueError, lambda: arange(1, dtype="i")) + + +def test_empty_errors(): + empty((1,), device="cpu") # Doesn't error + assert_raises(ValueError, lambda: empty((1,), device="gpu")) + assert_raises(ValueError, lambda: empty((1,), dtype=int)) + assert_raises(ValueError, lambda: empty((1,), dtype="i")) + + +def test_empty_like_errors(): + empty_like(asarray(1), device="cpu") # Doesn't error + assert_raises(ValueError, lambda: empty_like(asarray(1), device="gpu")) + assert_raises(ValueError, lambda: empty_like(asarray(1), dtype=int)) + assert_raises(ValueError, lambda: empty_like(asarray(1), dtype="i")) + + +def test_eye_errors(): + eye(1, device="cpu") # Doesn't error + assert_raises(ValueError, lambda: eye(1, device="gpu")) + assert_raises(ValueError, lambda: eye(1, dtype=int)) + assert_raises(ValueError, lambda: eye(1, dtype="i")) + + +def test_full_errors(): + full((1,), 0, device="cpu") # Doesn't error + assert_raises(ValueError, lambda: full((1,), 0, device="gpu")) + assert_raises(ValueError, lambda: full((1,), 0, dtype=int)) + assert_raises(ValueError, lambda: full((1,), 0, dtype="i")) + + +def test_full_like_errors(): + full_like(asarray(1), 0, device="cpu") # Doesn't error + assert_raises(ValueError, lambda: full_like(asarray(1), 0, device="gpu")) + assert_raises(ValueError, lambda: full_like(asarray(1), 0, dtype=int)) + assert_raises(ValueError, lambda: full_like(asarray(1), 0, dtype="i")) + + +def test_linspace_errors(): + linspace(0, 1, 10, device="cpu") # Doesn't error + assert_raises(ValueError, lambda: linspace(0, 1, 10, device="gpu")) + assert_raises(ValueError, lambda: linspace(0, 1, 10, dtype=float)) + assert_raises(ValueError, lambda: linspace(0, 1, 10, dtype="f")) + + +def test_ones_errors(): + ones((1,), device="cpu") # Doesn't error + assert_raises(ValueError, lambda: ones((1,), device="gpu")) + assert_raises(ValueError, lambda: ones((1,), dtype=int)) + assert_raises(ValueError, lambda: ones((1,), dtype="i")) + + +def test_ones_like_errors(): + ones_like(asarray(1), device="cpu") # Doesn't error + assert_raises(ValueError, lambda: ones_like(asarray(1), device="gpu")) + assert_raises(ValueError, lambda: ones_like(asarray(1), dtype=int)) + assert_raises(ValueError, lambda: ones_like(asarray(1), dtype="i")) + + +def test_zeros_errors(): + zeros((1,), device="cpu") # Doesn't error + assert_raises(ValueError, lambda: zeros((1,), device="gpu")) + assert_raises(ValueError, lambda: zeros((1,), dtype=int)) + assert_raises(ValueError, lambda: zeros((1,), dtype="i")) + + +def test_zeros_like_errors(): + zeros_like(asarray(1), device="cpu") # Doesn't error + assert_raises(ValueError, lambda: zeros_like(asarray(1), device="gpu")) + assert_raises(ValueError, lambda: zeros_like(asarray(1), dtype=int)) + assert_raises(ValueError, lambda: zeros_like(asarray(1), dtype="i")) + +def test_meshgrid_dtype_errors(): + # Doesn't raise + meshgrid() + meshgrid(asarray([1.], dtype=float32)) + meshgrid(asarray([1.], dtype=float32), asarray([1.], dtype=float32)) + + assert_raises(ValueError, lambda: meshgrid(asarray([1.], dtype=float32), asarray([1.], dtype=float64))) diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_elementwise_functions.py b/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_elementwise_functions.py new file mode 100644 index 0000000..b2fb44e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_elementwise_functions.py @@ -0,0 +1,111 @@ +from inspect import getfullargspec + +from numpy.testing import assert_raises + +from .. import asarray, _elementwise_functions +from .._elementwise_functions import bitwise_left_shift, bitwise_right_shift +from .._dtypes import ( + _dtype_categories, + _boolean_dtypes, + _floating_dtypes, + _integer_dtypes, +) + + +def nargs(func): + return len(getfullargspec(func).args) + + +def test_function_types(): + # Test that every function accepts only the required input types. We only + # test the negative cases here (error). The positive cases are tested in + # the array API test suite. + + elementwise_function_input_types = { + "abs": "numeric", + "acos": "floating-point", + "acosh": "floating-point", + "add": "numeric", + "asin": "floating-point", + "asinh": "floating-point", + "atan": "floating-point", + "atan2": "floating-point", + "atanh": "floating-point", + "bitwise_and": "integer or boolean", + "bitwise_invert": "integer or boolean", + "bitwise_left_shift": "integer", + "bitwise_or": "integer or boolean", + "bitwise_right_shift": "integer", + "bitwise_xor": "integer or boolean", + "ceil": "numeric", + "cos": "floating-point", + "cosh": "floating-point", + "divide": "floating-point", + "equal": "all", + "exp": "floating-point", + "expm1": "floating-point", + "floor": "numeric", + "floor_divide": "numeric", + "greater": "numeric", + "greater_equal": "numeric", + "isfinite": "numeric", + "isinf": "numeric", + "isnan": "numeric", + "less": "numeric", + "less_equal": "numeric", + "log": "floating-point", + "logaddexp": "floating-point", + "log10": "floating-point", + "log1p": "floating-point", + "log2": "floating-point", + "logical_and": "boolean", + "logical_not": "boolean", + "logical_or": "boolean", + "logical_xor": "boolean", + "multiply": "numeric", + "negative": "numeric", + "not_equal": "all", + "positive": "numeric", + "pow": "numeric", + "remainder": "numeric", + "round": "numeric", + "sign": "numeric", + "sin": "floating-point", + "sinh": "floating-point", + "sqrt": "floating-point", + "square": "numeric", + "subtract": "numeric", + "tan": "floating-point", + "tanh": "floating-point", + "trunc": "numeric", + } + + def _array_vals(): + for d in _integer_dtypes: + yield asarray(1, dtype=d) + for d in _boolean_dtypes: + yield asarray(False, dtype=d) + for d in _floating_dtypes: + yield asarray(1.0, dtype=d) + + for x in _array_vals(): + for func_name, types in elementwise_function_input_types.items(): + dtypes = _dtype_categories[types] + func = getattr(_elementwise_functions, func_name) + if nargs(func) == 2: + for y in _array_vals(): + if x.dtype not in dtypes or y.dtype not in dtypes: + assert_raises(TypeError, lambda: func(x, y)) + else: + if x.dtype not in dtypes: + assert_raises(TypeError, lambda: func(x)) + + +def test_bitwise_shift_error(): + # bitwise shift functions should raise when the second argument is negative + assert_raises( + ValueError, lambda: bitwise_left_shift(asarray([1, 1]), asarray([1, -1])) + ) + assert_raises( + ValueError, lambda: bitwise_right_shift(asarray([1, 1]), asarray([1, -1])) + ) diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_set_functions.py b/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_set_functions.py new file mode 100644 index 0000000..b8eb65d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_set_functions.py @@ -0,0 +1,19 @@ +import pytest +from hypothesis import given +from hypothesis.extra.array_api import make_strategies_namespace + +from numpy import array_api as xp + +xps = make_strategies_namespace(xp) + + +@pytest.mark.parametrize("func", [xp.unique_all, xp.unique_inverse]) +@given(xps.arrays(dtype=xps.scalar_dtypes(), shape=xps.array_shapes())) +def test_inverse_indices_shape(func, x): + """ + Inverse indices share shape of input array + + See https://github.com/numpy/numpy/issues/20638 + """ + out = func(x) + assert out.inverse_indices.shape == x.shape diff --git a/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_sorting_functions.py b/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_sorting_functions.py new file mode 100644 index 0000000..9848bbf --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/array_api/tests/test_sorting_functions.py @@ -0,0 +1,23 @@ +import pytest + +from numpy import array_api as xp + + +@pytest.mark.parametrize( + "obj, axis, expected", + [ + ([0, 0], -1, [0, 1]), + ([0, 1, 0], -1, [1, 0, 2]), + ([[0, 1], [1, 1]], 0, [[1, 0], [0, 1]]), + ([[0, 1], [1, 1]], 1, [[1, 0], [0, 1]]), + ], +) +def test_stable_desc_argsort(obj, axis, expected): + """ + Indices respect relative order of a descending stable-sort + + See https://github.com/numpy/numpy/issues/20778 + """ + x = xp.asarray(obj) + out = xp.argsort(x, axis=axis, stable=True, descending=True) + assert xp.all(out == xp.asarray(expected)) diff --git a/.local/lib/python3.8/site-packages/numpy/compat/__init__.py b/.local/lib/python3.8/site-packages/numpy/compat/__init__.py new file mode 100644 index 0000000..afee621 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/compat/__init__.py @@ -0,0 +1,18 @@ +""" +Compatibility module. + +This module contains duplicated code from Python itself or 3rd party +extensions, which may be included for the following reasons: + + * compatibility + * we may only need a small subset of the copied library/module + +""" +from . import _inspect +from . import py3k +from ._inspect import getargspec, formatargspec +from .py3k import * + +__all__ = [] +__all__.extend(_inspect.__all__) +__all__.extend(py3k.__all__) diff --git a/.local/lib/python3.8/site-packages/numpy/compat/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/compat/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..f84a223 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/compat/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/compat/__pycache__/_inspect.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/compat/__pycache__/_inspect.cpython-38.pyc new file mode 100644 index 0000000..8cb5724 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/compat/__pycache__/_inspect.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/compat/__pycache__/py3k.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/compat/__pycache__/py3k.cpython-38.pyc new file mode 100644 index 0000000..229da97 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/compat/__pycache__/py3k.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/compat/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/compat/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..7bb3e57 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/compat/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/compat/_inspect.py b/.local/lib/python3.8/site-packages/numpy/compat/_inspect.py new file mode 100644 index 0000000..9a874a7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/compat/_inspect.py @@ -0,0 +1,191 @@ +"""Subset of inspect module from upstream python + +We use this instead of upstream because upstream inspect is slow to import, and +significantly contributes to numpy import times. Importing this copy has almost +no overhead. + +""" +import types + +__all__ = ['getargspec', 'formatargspec'] + +# ----------------------------------------------------------- type-checking +def ismethod(object): + """Return true if the object is an instance method. + + Instance method objects provide these attributes: + __doc__ documentation string + __name__ name with which this method was defined + im_class class object in which this method belongs + im_func function object containing implementation of method + im_self instance to which this method is bound, or None + + """ + return isinstance(object, types.MethodType) + +def isfunction(object): + """Return true if the object is a user-defined function. + + Function objects provide these attributes: + __doc__ documentation string + __name__ name with which this function was defined + func_code code object containing compiled function bytecode + func_defaults tuple of any default values for arguments + func_doc (same as __doc__) + func_globals global namespace in which this function was defined + func_name (same as __name__) + + """ + return isinstance(object, types.FunctionType) + +def iscode(object): + """Return true if the object is a code object. + + Code objects provide these attributes: + co_argcount number of arguments (not including * or ** args) + co_code string of raw compiled bytecode + co_consts tuple of constants used in the bytecode + co_filename name of file in which this code object was created + co_firstlineno number of first line in Python source code + co_flags bitmap: 1=optimized | 2=newlocals | 4=*arg | 8=**arg + co_lnotab encoded mapping of line numbers to bytecode indices + co_name name with which this code object was defined + co_names tuple of names of local variables + co_nlocals number of local variables + co_stacksize virtual machine stack space required + co_varnames tuple of names of arguments and local variables + + """ + return isinstance(object, types.CodeType) + +# ------------------------------------------------ argument list extraction +# These constants are from Python's compile.h. +CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS = 1, 2, 4, 8 + +def getargs(co): + """Get information about the arguments accepted by a code object. + + Three things are returned: (args, varargs, varkw), where 'args' is + a list of argument names (possibly containing nested lists), and + 'varargs' and 'varkw' are the names of the * and ** arguments or None. + + """ + + if not iscode(co): + raise TypeError('arg is not a code object') + + nargs = co.co_argcount + names = co.co_varnames + args = list(names[:nargs]) + + # The following acrobatics are for anonymous (tuple) arguments. + # Which we do not need to support, so remove to avoid importing + # the dis module. + for i in range(nargs): + if args[i][:1] in ['', '.']: + raise TypeError("tuple function arguments are not supported") + varargs = None + if co.co_flags & CO_VARARGS: + varargs = co.co_varnames[nargs] + nargs = nargs + 1 + varkw = None + if co.co_flags & CO_VARKEYWORDS: + varkw = co.co_varnames[nargs] + return args, varargs, varkw + +def getargspec(func): + """Get the names and default values of a function's arguments. + + A tuple of four things is returned: (args, varargs, varkw, defaults). + 'args' is a list of the argument names (it may contain nested lists). + 'varargs' and 'varkw' are the names of the * and ** arguments or None. + 'defaults' is an n-tuple of the default values of the last n arguments. + + """ + + if ismethod(func): + func = func.__func__ + if not isfunction(func): + raise TypeError('arg is not a Python function') + args, varargs, varkw = getargs(func.__code__) + return args, varargs, varkw, func.__defaults__ + +def getargvalues(frame): + """Get information about arguments passed into a particular frame. + + A tuple of four things is returned: (args, varargs, varkw, locals). + 'args' is a list of the argument names (it may contain nested lists). + 'varargs' and 'varkw' are the names of the * and ** arguments or None. + 'locals' is the locals dictionary of the given frame. + + """ + args, varargs, varkw = getargs(frame.f_code) + return args, varargs, varkw, frame.f_locals + +def joinseq(seq): + if len(seq) == 1: + return '(' + seq[0] + ',)' + else: + return '(' + ', '.join(seq) + ')' + +def strseq(object, convert, join=joinseq): + """Recursively walk a sequence, stringifying each element. + + """ + if type(object) in [list, tuple]: + return join([strseq(_o, convert, join) for _o in object]) + else: + return convert(object) + +def formatargspec(args, varargs=None, varkw=None, defaults=None, + formatarg=str, + formatvarargs=lambda name: '*' + name, + formatvarkw=lambda name: '**' + name, + formatvalue=lambda value: '=' + repr(value), + join=joinseq): + """Format an argument spec from the 4 values returned by getargspec. + + The first four arguments are (args, varargs, varkw, defaults). The + other four arguments are the corresponding optional formatting functions + that are called to turn names and values into strings. The ninth + argument is an optional function to format the sequence of arguments. + + """ + specs = [] + if defaults: + firstdefault = len(args) - len(defaults) + for i in range(len(args)): + spec = strseq(args[i], formatarg, join) + if defaults and i >= firstdefault: + spec = spec + formatvalue(defaults[i - firstdefault]) + specs.append(spec) + if varargs is not None: + specs.append(formatvarargs(varargs)) + if varkw is not None: + specs.append(formatvarkw(varkw)) + return '(' + ', '.join(specs) + ')' + +def formatargvalues(args, varargs, varkw, locals, + formatarg=str, + formatvarargs=lambda name: '*' + name, + formatvarkw=lambda name: '**' + name, + formatvalue=lambda value: '=' + repr(value), + join=joinseq): + """Format an argument spec from the 4 values returned by getargvalues. + + The first four arguments are (args, varargs, varkw, locals). The + next four arguments are the corresponding optional formatting functions + that are called to turn names and values into strings. The ninth + argument is an optional function to format the sequence of arguments. + + """ + def convert(name, locals=locals, + formatarg=formatarg, formatvalue=formatvalue): + return formatarg(name) + formatvalue(locals[name]) + specs = [strseq(arg, convert, join) for arg in args] + + if varargs: + specs.append(formatvarargs(varargs) + formatvalue(locals[varargs])) + if varkw: + specs.append(formatvarkw(varkw) + formatvalue(locals[varkw])) + return '(' + ', '.join(specs) + ')' diff --git a/.local/lib/python3.8/site-packages/numpy/compat/py3k.py b/.local/lib/python3.8/site-packages/numpy/compat/py3k.py new file mode 100644 index 0000000..3d10bb9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/compat/py3k.py @@ -0,0 +1,137 @@ +""" +Python 3.X compatibility tools. + +While this file was originally intended for Python 2 -> 3 transition, +it is now used to create a compatibility layer between different +minor versions of Python 3. + +While the active version of numpy may not support a given version of python, we +allow downstream libraries to continue to use these shims for forward +compatibility with numpy while they transition their code to newer versions of +Python. +""" +__all__ = ['bytes', 'asbytes', 'isfileobj', 'getexception', 'strchar', + 'unicode', 'asunicode', 'asbytes_nested', 'asunicode_nested', + 'asstr', 'open_latin1', 'long', 'basestring', 'sixu', + 'integer_types', 'is_pathlib_path', 'npy_load_module', 'Path', + 'pickle', 'contextlib_nullcontext', 'os_fspath', 'os_PathLike'] + +import sys +import os +from pathlib import Path +import io +try: + import pickle5 as pickle +except ImportError: + import pickle + +long = int +integer_types = (int,) +basestring = str +unicode = str +bytes = bytes + +def asunicode(s): + if isinstance(s, bytes): + return s.decode('latin1') + return str(s) + +def asbytes(s): + if isinstance(s, bytes): + return s + return str(s).encode('latin1') + +def asstr(s): + if isinstance(s, bytes): + return s.decode('latin1') + return str(s) + +def isfileobj(f): + return isinstance(f, (io.FileIO, io.BufferedReader, io.BufferedWriter)) + +def open_latin1(filename, mode='r'): + return open(filename, mode=mode, encoding='iso-8859-1') + +def sixu(s): + return s + +strchar = 'U' + +def getexception(): + return sys.exc_info()[1] + +def asbytes_nested(x): + if hasattr(x, '__iter__') and not isinstance(x, (bytes, unicode)): + return [asbytes_nested(y) for y in x] + else: + return asbytes(x) + +def asunicode_nested(x): + if hasattr(x, '__iter__') and not isinstance(x, (bytes, unicode)): + return [asunicode_nested(y) for y in x] + else: + return asunicode(x) + +def is_pathlib_path(obj): + """ + Check whether obj is a `pathlib.Path` object. + + Prefer using ``isinstance(obj, os.PathLike)`` instead of this function. + """ + return isinstance(obj, Path) + +# from Python 3.7 +class contextlib_nullcontext: + """Context manager that does no additional processing. + + Used as a stand-in for a normal context manager, when a particular + block of code is only sometimes used with a normal context manager: + + cm = optional_cm if condition else nullcontext() + with cm: + # Perform operation, using optional_cm if condition is True + + .. note:: + Prefer using `contextlib.nullcontext` instead of this context manager. + """ + + def __init__(self, enter_result=None): + self.enter_result = enter_result + + def __enter__(self): + return self.enter_result + + def __exit__(self, *excinfo): + pass + + +def npy_load_module(name, fn, info=None): + """ + Load a module. Uses ``load_module`` which will be deprecated in python + 3.12. An alternative that uses ``exec_module`` is in + numpy.distutils.misc_util.exec_mod_from_location + + .. versionadded:: 1.11.2 + + Parameters + ---------- + name : str + Full module name. + fn : str + Path to module file. + info : tuple, optional + Only here for backward compatibility with Python 2.*. + + Returns + ------- + mod : module + + """ + # Explicitly lazy import this to avoid paying the cost + # of importing importlib at startup + from importlib.machinery import SourceFileLoader + return SourceFileLoader(name, fn).load_module() + + +os_fspath = os.fspath +os_PathLike = os.PathLike diff --git a/.local/lib/python3.8/site-packages/numpy/compat/setup.py b/.local/lib/python3.8/site-packages/numpy/compat/setup.py new file mode 100644 index 0000000..c1b34a2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/compat/setup.py @@ -0,0 +1,10 @@ +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + + config = Configuration('compat', parent_package, top_path) + config.add_subpackage('tests') + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/.local/lib/python3.8/site-packages/numpy/compat/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/compat/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/compat/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/compat/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..f0fa78a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/compat/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/compat/tests/__pycache__/test_compat.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/compat/tests/__pycache__/test_compat.cpython-38.pyc new file mode 100644 index 0000000..34ffb0f Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/compat/tests/__pycache__/test_compat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/compat/tests/test_compat.py b/.local/lib/python3.8/site-packages/numpy/compat/tests/test_compat.py new file mode 100644 index 0000000..2b8acba --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/compat/tests/test_compat.py @@ -0,0 +1,19 @@ +from os.path import join + +from numpy.compat import isfileobj +from numpy.testing import assert_ +from numpy.testing import tempdir + + +def test_isfileobj(): + with tempdir(prefix="numpy_test_compat_") as folder: + filename = join(folder, 'a.bin') + + with open(filename, 'wb') as f: + assert_(isfileobj(f)) + + with open(filename, 'ab') as f: + assert_(isfileobj(f)) + + with open(filename, 'rb') as f: + assert_(isfileobj(f)) diff --git a/.local/lib/python3.8/site-packages/numpy/conftest.py b/.local/lib/python3.8/site-packages/numpy/conftest.py new file mode 100644 index 0000000..fd5fdd7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/conftest.py @@ -0,0 +1,119 @@ +""" +Pytest configuration and fixtures for the Numpy test suite. +""" +import os +import tempfile + +import hypothesis +import pytest +import numpy + +from numpy.core._multiarray_tests import get_fpu_mode + + +_old_fpu_mode = None +_collect_results = {} + +# Use a known and persistent tmpdir for hypothesis' caches, which +# can be automatically cleared by the OS or user. +hypothesis.configuration.set_hypothesis_home_dir( + os.path.join(tempfile.gettempdir(), ".hypothesis") +) + +# We register two custom profiles for Numpy - for details see +# https://hypothesis.readthedocs.io/en/latest/settings.html +# The first is designed for our own CI runs; the latter also +# forces determinism and is designed for use via np.test() +hypothesis.settings.register_profile( + name="numpy-profile", deadline=None, print_blob=True, +) +hypothesis.settings.register_profile( + name="np.test() profile", + deadline=None, print_blob=True, database=None, derandomize=True, + suppress_health_check=hypothesis.HealthCheck.all(), +) +# Note that the default profile is chosen based on the presence +# of pytest.ini, but can be overridden by passing the +# --hypothesis-profile=NAME argument to pytest. +_pytest_ini = os.path.join(os.path.dirname(__file__), "..", "pytest.ini") +hypothesis.settings.load_profile( + "numpy-profile" if os.path.isfile(_pytest_ini) else "np.test() profile" +) + + +def pytest_configure(config): + config.addinivalue_line("markers", + "valgrind_error: Tests that are known to error under valgrind.") + config.addinivalue_line("markers", + "leaks_references: Tests that are known to leak references.") + config.addinivalue_line("markers", + "slow: Tests that are very slow.") + config.addinivalue_line("markers", + "slow_pypy: Tests that are very slow on pypy.") + + +def pytest_addoption(parser): + parser.addoption("--available-memory", action="store", default=None, + help=("Set amount of memory available for running the " + "test suite. This can result to tests requiring " + "especially large amounts of memory to be skipped. " + "Equivalent to setting environment variable " + "NPY_AVAILABLE_MEM. Default: determined" + "automatically.")) + + +def pytest_sessionstart(session): + available_mem = session.config.getoption('available_memory') + if available_mem is not None: + os.environ['NPY_AVAILABLE_MEM'] = available_mem + + +#FIXME when yield tests are gone. +@pytest.hookimpl() +def pytest_itemcollected(item): + """ + Check FPU precision mode was not changed during test collection. + + The clumsy way we do it here is mainly necessary because numpy + still uses yield tests, which can execute code at test collection + time. + """ + global _old_fpu_mode + + mode = get_fpu_mode() + + if _old_fpu_mode is None: + _old_fpu_mode = mode + elif mode != _old_fpu_mode: + _collect_results[item] = (_old_fpu_mode, mode) + _old_fpu_mode = mode + + +@pytest.fixture(scope="function", autouse=True) +def check_fpu_mode(request): + """ + Check FPU precision mode was not changed during the test. + """ + old_mode = get_fpu_mode() + yield + new_mode = get_fpu_mode() + + if old_mode != new_mode: + raise AssertionError("FPU precision mode changed from {0:#x} to {1:#x}" + " during the test".format(old_mode, new_mode)) + + collect_result = _collect_results.get(request.node) + if collect_result is not None: + old_mode, new_mode = collect_result + raise AssertionError("FPU precision mode changed from {0:#x} to {1:#x}" + " when collecting the test".format(old_mode, + new_mode)) + + +@pytest.fixture(autouse=True) +def add_np(doctest_namespace): + doctest_namespace['np'] = numpy + +@pytest.fixture(autouse=True) +def env_setup(monkeypatch): + monkeypatch.setenv('PYTHONHASHSEED', '0') diff --git a/.local/lib/python3.8/site-packages/numpy/core/__init__.py b/.local/lib/python3.8/site-packages/numpy/core/__init__.py new file mode 100644 index 0000000..b89e27f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/__init__.py @@ -0,0 +1,176 @@ +""" +Contains the core of NumPy: ndarray, ufuncs, dtypes, etc. + +Please note that this module is private. All functions and objects +are available in the main ``numpy`` namespace - use that instead. + +""" + +from numpy.version import version as __version__ + +import os +import warnings + +# disables OpenBLAS affinity setting of the main thread that limits +# python threads or processes to one core +env_added = [] +for envkey in ['OPENBLAS_MAIN_FREE', 'GOTOBLAS_MAIN_FREE']: + if envkey not in os.environ: + os.environ[envkey] = '1' + env_added.append(envkey) + +try: + from . import multiarray +except ImportError as exc: + import sys + msg = """ + +IMPORTANT: PLEASE READ THIS FOR ADVICE ON HOW TO SOLVE THIS ISSUE! + +Importing the numpy C-extensions failed. This error can happen for +many reasons, often due to issues with your setup or how NumPy was +installed. + +We have compiled some common reasons and troubleshooting tips at: + + https://numpy.org/devdocs/user/troubleshooting-importerror.html + +Please note and check the following: + + * The Python version is: Python%d.%d from "%s" + * The NumPy version is: "%s" + +and make sure that they are the versions you expect. +Please carefully study the documentation linked above for further help. + +Original error was: %s +""" % (sys.version_info[0], sys.version_info[1], sys.executable, + __version__, exc) + raise ImportError(msg) +finally: + for envkey in env_added: + del os.environ[envkey] +del envkey +del env_added +del os + +from . import umath + +# Check that multiarray,umath are pure python modules wrapping +# _multiarray_umath and not either of the old c-extension modules +if not (hasattr(multiarray, '_multiarray_umath') and + hasattr(umath, '_multiarray_umath')): + import sys + path = sys.modules['numpy'].__path__ + msg = ("Something is wrong with the numpy installation. " + "While importing we detected an older version of " + "numpy in {}. One method of fixing this is to repeatedly uninstall " + "numpy until none is found, then reinstall this version.") + raise ImportError(msg.format(path)) + +from . import numerictypes as nt +multiarray.set_typeDict(nt.sctypeDict) +from . import numeric +from .numeric import * +from . import fromnumeric +from .fromnumeric import * +from . import defchararray as char +from . import records as rec +from .records import record, recarray, format_parser +from .memmap import * +from .defchararray import chararray +from . import function_base +from .function_base import * +from . import _machar +from ._machar import * +from . import getlimits +from .getlimits import * +from . import shape_base +from .shape_base import * +from . import einsumfunc +from .einsumfunc import * +del nt + +from .fromnumeric import amax as max, amin as min, round_ as round +from .numeric import absolute as abs + +# do this after everything else, to minimize the chance of this misleadingly +# appearing in an import-time traceback +from . import _add_newdocs +from . import _add_newdocs_scalars +# add these for module-freeze analysis (like PyInstaller) +from . import _dtype_ctypes +from . import _internal +from . import _dtype +from . import _methods + +__all__ = ['char', 'rec', 'memmap'] +__all__ += numeric.__all__ +__all__ += ['record', 'recarray', 'format_parser'] +__all__ += ['chararray'] +__all__ += function_base.__all__ +__all__ += getlimits.__all__ +__all__ += shape_base.__all__ +__all__ += einsumfunc.__all__ + +# We used to use `np.core._ufunc_reconstruct` to unpickle. This is unnecessary, +# but old pickles saved before 1.20 will be using it, and there is no reason +# to break loading them. +def _ufunc_reconstruct(module, name): + # The `fromlist` kwarg is required to ensure that `mod` points to the + # inner-most module rather than the parent package when module name is + # nested. This makes it possible to pickle non-toplevel ufuncs such as + # scipy.special.expit for instance. + mod = __import__(module, fromlist=[name]) + return getattr(mod, name) + + +def _ufunc_reduce(func): + # Report the `__name__`. pickle will try to find the module. Note that + # pickle supports for this `__name__` to be a `__qualname__`. It may + # make sense to add a `__qualname__` to ufuncs, to allow this more + # explicitly (Numba has ufuncs as attributes). + # See also: https://github.com/dask/distributed/issues/3450 + return func.__name__ + + +def _DType_reconstruct(scalar_type): + # This is a work-around to pickle type(np.dtype(np.float64)), etc. + # and it should eventually be replaced with a better solution, e.g. when + # DTypes become HeapTypes. + return type(dtype(scalar_type)) + + +def _DType_reduce(DType): + # To pickle a DType without having to add top-level names, pickle the + # scalar type for now (and assume that reconstruction will be possible). + if DType is dtype: + return "dtype" # must pickle `np.dtype` as a singleton. + scalar_type = DType.type # pickle the scalar type for reconstruction + return _DType_reconstruct, (scalar_type,) + + +def __getattr__(name): + # Deprecated 2021-10-20, NumPy 1.22 + if name == "machar": + warnings.warn( + "The `np.core.machar` module is deprecated (NumPy 1.22)", + DeprecationWarning, stacklevel=2, + ) + return _machar + raise AttributeError(f"Module {__name__!r} has no attribute {name!r}") + + +import copyreg + +copyreg.pickle(ufunc, _ufunc_reduce) +copyreg.pickle(type(dtype), _DType_reduce, _DType_reconstruct) + +# Unclutter namespace (must keep _*_reconstruct for unpickling) +del copyreg +del _ufunc_reduce +del _DType_reduce + +from numpy._pytesttester import PytestTester +test = PytestTester(__name__) +del PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/core/__init__.pyi b/.local/lib/python3.8/site-packages/numpy/core/__init__.pyi new file mode 100644 index 0000000..4c7a42b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/__init__.pyi @@ -0,0 +1,2 @@ +# NOTE: The `np.core` namespace is deliberately kept empty due to it +# being private (despite the lack of leading underscore) diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..a12d3c4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_add_newdocs.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_add_newdocs.cpython-38.pyc new file mode 100644 index 0000000..ae132f3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_add_newdocs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_add_newdocs_scalars.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_add_newdocs_scalars.cpython-38.pyc new file mode 100644 index 0000000..e3dbf34 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_add_newdocs_scalars.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_asarray.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_asarray.cpython-38.pyc new file mode 100644 index 0000000..f86a311 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_asarray.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_dtype.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_dtype.cpython-38.pyc new file mode 100644 index 0000000..7347ec2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_dtype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_dtype_ctypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_dtype_ctypes.cpython-38.pyc new file mode 100644 index 0000000..9bdb664 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_dtype_ctypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_exceptions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_exceptions.cpython-38.pyc new file mode 100644 index 0000000..7159a4a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_exceptions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_internal.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_internal.cpython-38.pyc new file mode 100644 index 0000000..2be68f9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_internal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_machar.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_machar.cpython-38.pyc new file mode 100644 index 0000000..029f324 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_machar.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_methods.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_methods.cpython-38.pyc new file mode 100644 index 0000000..e89846f Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_methods.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_string_helpers.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_string_helpers.cpython-38.pyc new file mode 100644 index 0000000..b1008c2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_string_helpers.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_type_aliases.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_type_aliases.cpython-38.pyc new file mode 100644 index 0000000..fdcf98c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_type_aliases.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_ufunc_config.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_ufunc_config.cpython-38.pyc new file mode 100644 index 0000000..b8d042c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/_ufunc_config.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/arrayprint.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/arrayprint.cpython-38.pyc new file mode 100644 index 0000000..48440f4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/arrayprint.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/cversions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/cversions.cpython-38.pyc new file mode 100644 index 0000000..01742ca Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/cversions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/defchararray.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/defchararray.cpython-38.pyc new file mode 100644 index 0000000..e3a7665 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/defchararray.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/einsumfunc.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/einsumfunc.cpython-38.pyc new file mode 100644 index 0000000..f7797ae Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/einsumfunc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/fromnumeric.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/fromnumeric.cpython-38.pyc new file mode 100644 index 0000000..eaf94df Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/fromnumeric.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/function_base.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/function_base.cpython-38.pyc new file mode 100644 index 0000000..c14df97 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/function_base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/generate_numpy_api.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/generate_numpy_api.cpython-38.pyc new file mode 100644 index 0000000..28af97d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/generate_numpy_api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/getlimits.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/getlimits.cpython-38.pyc new file mode 100644 index 0000000..9690416 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/getlimits.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/memmap.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/memmap.cpython-38.pyc new file mode 100644 index 0000000..6c4496c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/memmap.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/multiarray.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/multiarray.cpython-38.pyc new file mode 100644 index 0000000..4422768 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/multiarray.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/numeric.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/numeric.cpython-38.pyc new file mode 100644 index 0000000..9932bd4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/numeric.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/numerictypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/numerictypes.cpython-38.pyc new file mode 100644 index 0000000..54e719e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/numerictypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/overrides.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/overrides.cpython-38.pyc new file mode 100644 index 0000000..0e7d32a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/overrides.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/records.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/records.cpython-38.pyc new file mode 100644 index 0000000..d634e47 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/records.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..cff883c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/setup_common.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/setup_common.cpython-38.pyc new file mode 100644 index 0000000..4769a70 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/setup_common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/shape_base.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/shape_base.cpython-38.pyc new file mode 100644 index 0000000..3580046 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/shape_base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/umath.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/umath.cpython-38.pyc new file mode 100644 index 0000000..223b040 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/umath.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/__pycache__/umath_tests.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/umath_tests.cpython-38.pyc new file mode 100644 index 0000000..70e70c9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/__pycache__/umath_tests.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/_add_newdocs.py b/.local/lib/python3.8/site-packages/numpy/core/_add_newdocs.py new file mode 100644 index 0000000..078c589 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_add_newdocs.py @@ -0,0 +1,6735 @@ +""" +This is only meant to add docs to objects defined in C-extension modules. +The purpose is to allow easier editing of the docstrings without +requiring a re-compile. + +NOTE: Many of the methods of ndarray have corresponding functions. + If you update these docstrings, please keep also the ones in + core/fromnumeric.py, core/defmatrix.py up-to-date. + +""" + +from numpy.core.function_base import add_newdoc +from numpy.core.overrides import array_function_like_doc + +############################################################################### +# +# flatiter +# +# flatiter needs a toplevel description +# +############################################################################### + +add_newdoc('numpy.core', 'flatiter', + """ + Flat iterator object to iterate over arrays. + + A `flatiter` iterator is returned by ``x.flat`` for any array `x`. + It allows iterating over the array as if it were a 1-D array, + either in a for-loop or by calling its `next` method. + + Iteration is done in row-major, C-style order (the last + index varying the fastest). The iterator can also be indexed using + basic slicing or advanced indexing. + + See Also + -------- + ndarray.flat : Return a flat iterator over an array. + ndarray.flatten : Returns a flattened copy of an array. + + Notes + ----- + A `flatiter` iterator can not be constructed directly from Python code + by calling the `flatiter` constructor. + + Examples + -------- + >>> x = np.arange(6).reshape(2, 3) + >>> fl = x.flat + >>> type(fl) + + >>> for item in fl: + ... print(item) + ... + 0 + 1 + 2 + 3 + 4 + 5 + + >>> fl[2:4] + array([2, 3]) + + """) + +# flatiter attributes + +add_newdoc('numpy.core', 'flatiter', ('base', + """ + A reference to the array that is iterated over. + + Examples + -------- + >>> x = np.arange(5) + >>> fl = x.flat + >>> fl.base is x + True + + """)) + + + +add_newdoc('numpy.core', 'flatiter', ('coords', + """ + An N-dimensional tuple of current coordinates. + + Examples + -------- + >>> x = np.arange(6).reshape(2, 3) + >>> fl = x.flat + >>> fl.coords + (0, 0) + >>> next(fl) + 0 + >>> fl.coords + (0, 1) + + """)) + + + +add_newdoc('numpy.core', 'flatiter', ('index', + """ + Current flat index into the array. + + Examples + -------- + >>> x = np.arange(6).reshape(2, 3) + >>> fl = x.flat + >>> fl.index + 0 + >>> next(fl) + 0 + >>> fl.index + 1 + + """)) + +# flatiter functions + +add_newdoc('numpy.core', 'flatiter', ('__array__', + """__array__(type=None) Get array from iterator + + """)) + + +add_newdoc('numpy.core', 'flatiter', ('copy', + """ + copy() + + Get a copy of the iterator as a 1-D array. + + Examples + -------- + >>> x = np.arange(6).reshape(2, 3) + >>> x + array([[0, 1, 2], + [3, 4, 5]]) + >>> fl = x.flat + >>> fl.copy() + array([0, 1, 2, 3, 4, 5]) + + """)) + + +############################################################################### +# +# nditer +# +############################################################################### + +add_newdoc('numpy.core', 'nditer', + """ + nditer(op, flags=None, op_flags=None, op_dtypes=None, order='K', casting='safe', op_axes=None, itershape=None, buffersize=0) + + Efficient multi-dimensional iterator object to iterate over arrays. + To get started using this object, see the + :ref:`introductory guide to array iteration `. + + Parameters + ---------- + op : ndarray or sequence of array_like + The array(s) to iterate over. + + flags : sequence of str, optional + Flags to control the behavior of the iterator. + + * ``buffered`` enables buffering when required. + * ``c_index`` causes a C-order index to be tracked. + * ``f_index`` causes a Fortran-order index to be tracked. + * ``multi_index`` causes a multi-index, or a tuple of indices + with one per iteration dimension, to be tracked. + * ``common_dtype`` causes all the operands to be converted to + a common data type, with copying or buffering as necessary. + * ``copy_if_overlap`` causes the iterator to determine if read + operands have overlap with write operands, and make temporary + copies as necessary to avoid overlap. False positives (needless + copying) are possible in some cases. + * ``delay_bufalloc`` delays allocation of the buffers until + a reset() call is made. Allows ``allocate`` operands to + be initialized before their values are copied into the buffers. + * ``external_loop`` causes the ``values`` given to be + one-dimensional arrays with multiple values instead of + zero-dimensional arrays. + * ``grow_inner`` allows the ``value`` array sizes to be made + larger than the buffer size when both ``buffered`` and + ``external_loop`` is used. + * ``ranged`` allows the iterator to be restricted to a sub-range + of the iterindex values. + * ``refs_ok`` enables iteration of reference types, such as + object arrays. + * ``reduce_ok`` enables iteration of ``readwrite`` operands + which are broadcasted, also known as reduction operands. + * ``zerosize_ok`` allows `itersize` to be zero. + op_flags : list of list of str, optional + This is a list of flags for each operand. At minimum, one of + ``readonly``, ``readwrite``, or ``writeonly`` must be specified. + + * ``readonly`` indicates the operand will only be read from. + * ``readwrite`` indicates the operand will be read from and written to. + * ``writeonly`` indicates the operand will only be written to. + * ``no_broadcast`` prevents the operand from being broadcasted. + * ``contig`` forces the operand data to be contiguous. + * ``aligned`` forces the operand data to be aligned. + * ``nbo`` forces the operand data to be in native byte order. + * ``copy`` allows a temporary read-only copy if required. + * ``updateifcopy`` allows a temporary read-write copy if required. + * ``allocate`` causes the array to be allocated if it is None + in the ``op`` parameter. + * ``no_subtype`` prevents an ``allocate`` operand from using a subtype. + * ``arraymask`` indicates that this operand is the mask to use + for selecting elements when writing to operands with the + 'writemasked' flag set. The iterator does not enforce this, + but when writing from a buffer back to the array, it only + copies those elements indicated by this mask. + * ``writemasked`` indicates that only elements where the chosen + ``arraymask`` operand is True will be written to. + * ``overlap_assume_elementwise`` can be used to mark operands that are + accessed only in the iterator order, to allow less conservative + copying when ``copy_if_overlap`` is present. + op_dtypes : dtype or tuple of dtype(s), optional + The required data type(s) of the operands. If copying or buffering + is enabled, the data will be converted to/from their original types. + order : {'C', 'F', 'A', 'K'}, optional + Controls the iteration order. 'C' means C order, 'F' means + Fortran order, 'A' means 'F' order if all the arrays are Fortran + contiguous, 'C' order otherwise, and 'K' means as close to the + order the array elements appear in memory as possible. This also + affects the element memory order of ``allocate`` operands, as they + are allocated to be compatible with iteration order. + Default is 'K'. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + Controls what kind of data casting may occur when making a copy + or buffering. Setting this to 'unsafe' is not recommended, + as it can adversely affect accumulations. + + * 'no' means the data types should not be cast at all. + * 'equiv' means only byte-order changes are allowed. + * 'safe' means only casts which can preserve values are allowed. + * 'same_kind' means only safe casts or casts within a kind, + like float64 to float32, are allowed. + * 'unsafe' means any data conversions may be done. + op_axes : list of list of ints, optional + If provided, is a list of ints or None for each operands. + The list of axes for an operand is a mapping from the dimensions + of the iterator to the dimensions of the operand. A value of + -1 can be placed for entries, causing that dimension to be + treated as `newaxis`. + itershape : tuple of ints, optional + The desired shape of the iterator. This allows ``allocate`` operands + with a dimension mapped by op_axes not corresponding to a dimension + of a different operand to get a value not equal to 1 for that + dimension. + buffersize : int, optional + When buffering is enabled, controls the size of the temporary + buffers. Set to 0 for the default value. + + Attributes + ---------- + dtypes : tuple of dtype(s) + The data types of the values provided in `value`. This may be + different from the operand data types if buffering is enabled. + Valid only before the iterator is closed. + finished : bool + Whether the iteration over the operands is finished or not. + has_delayed_bufalloc : bool + If True, the iterator was created with the ``delay_bufalloc`` flag, + and no reset() function was called on it yet. + has_index : bool + If True, the iterator was created with either the ``c_index`` or + the ``f_index`` flag, and the property `index` can be used to + retrieve it. + has_multi_index : bool + If True, the iterator was created with the ``multi_index`` flag, + and the property `multi_index` can be used to retrieve it. + index + When the ``c_index`` or ``f_index`` flag was used, this property + provides access to the index. Raises a ValueError if accessed + and ``has_index`` is False. + iterationneedsapi : bool + Whether iteration requires access to the Python API, for example + if one of the operands is an object array. + iterindex : int + An index which matches the order of iteration. + itersize : int + Size of the iterator. + itviews + Structured view(s) of `operands` in memory, matching the reordered + and optimized iterator access pattern. Valid only before the iterator + is closed. + multi_index + When the ``multi_index`` flag was used, this property + provides access to the index. Raises a ValueError if accessed + accessed and ``has_multi_index`` is False. + ndim : int + The dimensions of the iterator. + nop : int + The number of iterator operands. + operands : tuple of operand(s) + The array(s) to be iterated over. Valid only before the iterator is + closed. + shape : tuple of ints + Shape tuple, the shape of the iterator. + value + Value of ``operands`` at current iteration. Normally, this is a + tuple of array scalars, but if the flag ``external_loop`` is used, + it is a tuple of one dimensional arrays. + + Notes + ----- + `nditer` supersedes `flatiter`. The iterator implementation behind + `nditer` is also exposed by the NumPy C API. + + The Python exposure supplies two iteration interfaces, one which follows + the Python iterator protocol, and another which mirrors the C-style + do-while pattern. The native Python approach is better in most cases, but + if you need the coordinates or index of an iterator, use the C-style pattern. + + Examples + -------- + Here is how we might write an ``iter_add`` function, using the + Python iterator protocol: + + >>> def iter_add_py(x, y, out=None): + ... addop = np.add + ... it = np.nditer([x, y, out], [], + ... [['readonly'], ['readonly'], ['writeonly','allocate']]) + ... with it: + ... for (a, b, c) in it: + ... addop(a, b, out=c) + ... return it.operands[2] + + Here is the same function, but following the C-style pattern: + + >>> def iter_add(x, y, out=None): + ... addop = np.add + ... it = np.nditer([x, y, out], [], + ... [['readonly'], ['readonly'], ['writeonly','allocate']]) + ... with it: + ... while not it.finished: + ... addop(it[0], it[1], out=it[2]) + ... it.iternext() + ... return it.operands[2] + + Here is an example outer product function: + + >>> def outer_it(x, y, out=None): + ... mulop = np.multiply + ... it = np.nditer([x, y, out], ['external_loop'], + ... [['readonly'], ['readonly'], ['writeonly', 'allocate']], + ... op_axes=[list(range(x.ndim)) + [-1] * y.ndim, + ... [-1] * x.ndim + list(range(y.ndim)), + ... None]) + ... with it: + ... for (a, b, c) in it: + ... mulop(a, b, out=c) + ... return it.operands[2] + + >>> a = np.arange(2)+1 + >>> b = np.arange(3)+1 + >>> outer_it(a,b) + array([[1, 2, 3], + [2, 4, 6]]) + + Here is an example function which operates like a "lambda" ufunc: + + >>> def luf(lamdaexpr, *args, **kwargs): + ... '''luf(lambdaexpr, op1, ..., opn, out=None, order='K', casting='safe', buffersize=0)''' + ... nargs = len(args) + ... op = (kwargs.get('out',None),) + args + ... it = np.nditer(op, ['buffered','external_loop'], + ... [['writeonly','allocate','no_broadcast']] + + ... [['readonly','nbo','aligned']]*nargs, + ... order=kwargs.get('order','K'), + ... casting=kwargs.get('casting','safe'), + ... buffersize=kwargs.get('buffersize',0)) + ... while not it.finished: + ... it[0] = lamdaexpr(*it[1:]) + ... it.iternext() + ... return it.operands[0] + + >>> a = np.arange(5) + >>> b = np.ones(5) + >>> luf(lambda i,j:i*i + j/2, a, b) + array([ 0.5, 1.5, 4.5, 9.5, 16.5]) + + If operand flags `"writeonly"` or `"readwrite"` are used the + operands may be views into the original data with the + `WRITEBACKIFCOPY` flag. In this case `nditer` must be used as a + context manager or the `nditer.close` method must be called before + using the result. The temporary data will be written back to the + original data when the `__exit__` function is called but not before: + + >>> a = np.arange(6, dtype='i4')[::-2] + >>> with np.nditer(a, [], + ... [['writeonly', 'updateifcopy']], + ... casting='unsafe', + ... op_dtypes=[np.dtype('f4')]) as i: + ... x = i.operands[0] + ... x[:] = [-1, -2, -3] + ... # a still unchanged here + >>> a, x + (array([-1, -2, -3], dtype=int32), array([-1., -2., -3.], dtype=float32)) + + It is important to note that once the iterator is exited, dangling + references (like `x` in the example) may or may not share data with + the original data `a`. If writeback semantics were active, i.e. if + `x.base.flags.writebackifcopy` is `True`, then exiting the iterator + will sever the connection between `x` and `a`, writing to `x` will + no longer write to `a`. If writeback semantics are not active, then + `x.data` will still point at some part of `a.data`, and writing to + one will affect the other. + + Context management and the `close` method appeared in version 1.15.0. + + """) + +# nditer methods + +add_newdoc('numpy.core', 'nditer', ('copy', + """ + copy() + + Get a copy of the iterator in its current state. + + Examples + -------- + >>> x = np.arange(10) + >>> y = x + 1 + >>> it = np.nditer([x, y]) + >>> next(it) + (array(0), array(1)) + >>> it2 = it.copy() + >>> next(it2) + (array(1), array(2)) + + """)) + +add_newdoc('numpy.core', 'nditer', ('operands', + """ + operands[`Slice`] + + The array(s) to be iterated over. Valid only before the iterator is closed. + """)) + +add_newdoc('numpy.core', 'nditer', ('debug_print', + """ + debug_print() + + Print the current state of the `nditer` instance and debug info to stdout. + + """)) + +add_newdoc('numpy.core', 'nditer', ('enable_external_loop', + """ + enable_external_loop() + + When the "external_loop" was not used during construction, but + is desired, this modifies the iterator to behave as if the flag + was specified. + + """)) + +add_newdoc('numpy.core', 'nditer', ('iternext', + """ + iternext() + + Check whether iterations are left, and perform a single internal iteration + without returning the result. Used in the C-style pattern do-while + pattern. For an example, see `nditer`. + + Returns + ------- + iternext : bool + Whether or not there are iterations left. + + """)) + +add_newdoc('numpy.core', 'nditer', ('remove_axis', + """ + remove_axis(i, /) + + Removes axis `i` from the iterator. Requires that the flag "multi_index" + be enabled. + + """)) + +add_newdoc('numpy.core', 'nditer', ('remove_multi_index', + """ + remove_multi_index() + + When the "multi_index" flag was specified, this removes it, allowing + the internal iteration structure to be optimized further. + + """)) + +add_newdoc('numpy.core', 'nditer', ('reset', + """ + reset() + + Reset the iterator to its initial state. + + """)) + +add_newdoc('numpy.core', 'nested_iters', + """ + nested_iters(op, axes, flags=None, op_flags=None, op_dtypes=None, \ + order="K", casting="safe", buffersize=0) + + Create nditers for use in nested loops + + Create a tuple of `nditer` objects which iterate in nested loops over + different axes of the op argument. The first iterator is used in the + outermost loop, the last in the innermost loop. Advancing one will change + the subsequent iterators to point at its new element. + + Parameters + ---------- + op : ndarray or sequence of array_like + The array(s) to iterate over. + + axes : list of list of int + Each item is used as an "op_axes" argument to an nditer + + flags, op_flags, op_dtypes, order, casting, buffersize (optional) + See `nditer` parameters of the same name + + Returns + ------- + iters : tuple of nditer + An nditer for each item in `axes`, outermost first + + See Also + -------- + nditer + + Examples + -------- + + Basic usage. Note how y is the "flattened" version of + [a[:, 0, :], a[:, 1, 0], a[:, 2, :]] since we specified + the first iter's axes as [1] + + >>> a = np.arange(12).reshape(2, 3, 2) + >>> i, j = np.nested_iters(a, [[1], [0, 2]], flags=["multi_index"]) + >>> for x in i: + ... print(i.multi_index) + ... for y in j: + ... print('', j.multi_index, y) + (0,) + (0, 0) 0 + (0, 1) 1 + (1, 0) 6 + (1, 1) 7 + (1,) + (0, 0) 2 + (0, 1) 3 + (1, 0) 8 + (1, 1) 9 + (2,) + (0, 0) 4 + (0, 1) 5 + (1, 0) 10 + (1, 1) 11 + + """) + +add_newdoc('numpy.core', 'nditer', ('close', + """ + close() + + Resolve all writeback semantics in writeable operands. + + .. versionadded:: 1.15.0 + + See Also + -------- + + :ref:`nditer-context-manager` + + """)) + + +############################################################################### +# +# broadcast +# +############################################################################### + +add_newdoc('numpy.core', 'broadcast', + """ + Produce an object that mimics broadcasting. + + Parameters + ---------- + in1, in2, ... : array_like + Input parameters. + + Returns + ------- + b : broadcast object + Broadcast the input parameters against one another, and + return an object that encapsulates the result. + Amongst others, it has ``shape`` and ``nd`` properties, and + may be used as an iterator. + + See Also + -------- + broadcast_arrays + broadcast_to + broadcast_shapes + + Examples + -------- + + Manually adding two vectors, using broadcasting: + + >>> x = np.array([[1], [2], [3]]) + >>> y = np.array([4, 5, 6]) + >>> b = np.broadcast(x, y) + + >>> out = np.empty(b.shape) + >>> out.flat = [u+v for (u,v) in b] + >>> out + array([[5., 6., 7.], + [6., 7., 8.], + [7., 8., 9.]]) + + Compare against built-in broadcasting: + + >>> x + y + array([[5, 6, 7], + [6, 7, 8], + [7, 8, 9]]) + + """) + +# attributes + +add_newdoc('numpy.core', 'broadcast', ('index', + """ + current index in broadcasted result + + Examples + -------- + >>> x = np.array([[1], [2], [3]]) + >>> y = np.array([4, 5, 6]) + >>> b = np.broadcast(x, y) + >>> b.index + 0 + >>> next(b), next(b), next(b) + ((1, 4), (1, 5), (1, 6)) + >>> b.index + 3 + + """)) + +add_newdoc('numpy.core', 'broadcast', ('iters', + """ + tuple of iterators along ``self``'s "components." + + Returns a tuple of `numpy.flatiter` objects, one for each "component" + of ``self``. + + See Also + -------- + numpy.flatiter + + Examples + -------- + >>> x = np.array([1, 2, 3]) + >>> y = np.array([[4], [5], [6]]) + >>> b = np.broadcast(x, y) + >>> row, col = b.iters + >>> next(row), next(col) + (1, 4) + + """)) + +add_newdoc('numpy.core', 'broadcast', ('ndim', + """ + Number of dimensions of broadcasted result. Alias for `nd`. + + .. versionadded:: 1.12.0 + + Examples + -------- + >>> x = np.array([1, 2, 3]) + >>> y = np.array([[4], [5], [6]]) + >>> b = np.broadcast(x, y) + >>> b.ndim + 2 + + """)) + +add_newdoc('numpy.core', 'broadcast', ('nd', + """ + Number of dimensions of broadcasted result. For code intended for NumPy + 1.12.0 and later the more consistent `ndim` is preferred. + + Examples + -------- + >>> x = np.array([1, 2, 3]) + >>> y = np.array([[4], [5], [6]]) + >>> b = np.broadcast(x, y) + >>> b.nd + 2 + + """)) + +add_newdoc('numpy.core', 'broadcast', ('numiter', + """ + Number of iterators possessed by the broadcasted result. + + Examples + -------- + >>> x = np.array([1, 2, 3]) + >>> y = np.array([[4], [5], [6]]) + >>> b = np.broadcast(x, y) + >>> b.numiter + 2 + + """)) + +add_newdoc('numpy.core', 'broadcast', ('shape', + """ + Shape of broadcasted result. + + Examples + -------- + >>> x = np.array([1, 2, 3]) + >>> y = np.array([[4], [5], [6]]) + >>> b = np.broadcast(x, y) + >>> b.shape + (3, 3) + + """)) + +add_newdoc('numpy.core', 'broadcast', ('size', + """ + Total size of broadcasted result. + + Examples + -------- + >>> x = np.array([1, 2, 3]) + >>> y = np.array([[4], [5], [6]]) + >>> b = np.broadcast(x, y) + >>> b.size + 9 + + """)) + +add_newdoc('numpy.core', 'broadcast', ('reset', + """ + reset() + + Reset the broadcasted result's iterator(s). + + Parameters + ---------- + None + + Returns + ------- + None + + Examples + -------- + >>> x = np.array([1, 2, 3]) + >>> y = np.array([[4], [5], [6]]) + >>> b = np.broadcast(x, y) + >>> b.index + 0 + >>> next(b), next(b), next(b) + ((1, 4), (2, 4), (3, 4)) + >>> b.index + 3 + >>> b.reset() + >>> b.index + 0 + + """)) + +############################################################################### +# +# numpy functions +# +############################################################################### + +add_newdoc('numpy.core.multiarray', 'array', + """ + array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0, + like=None) + + Create an array. + + Parameters + ---------- + object : array_like + An array, any object exposing the array interface, an object whose + __array__ method returns an array, or any (nested) sequence. + If object is a scalar, a 0-dimensional array containing object is + returned. + dtype : data-type, optional + The desired data-type for the array. If not given, then the type will + be determined as the minimum type required to hold the objects in the + sequence. + copy : bool, optional + If true (default), then the object is copied. Otherwise, a copy will + only be made if __array__ returns a copy, if obj is a nested sequence, + or if a copy is needed to satisfy any of the other requirements + (`dtype`, `order`, etc.). + order : {'K', 'A', 'C', 'F'}, optional + Specify the memory layout of the array. If object is not an array, the + newly created array will be in C order (row major) unless 'F' is + specified, in which case it will be in Fortran order (column major). + If object is an array the following holds. + + ===== ========= =================================================== + order no copy copy=True + ===== ========= =================================================== + 'K' unchanged F & C order preserved, otherwise most similar order + 'A' unchanged F order if input is F and not C, otherwise C order + 'C' C order C order + 'F' F order F order + ===== ========= =================================================== + + When ``copy=False`` and a copy is made for other reasons, the result is + the same as if ``copy=True``, with some exceptions for 'A', see the + Notes section. The default order is 'K'. + subok : bool, optional + If True, then sub-classes will be passed-through, otherwise + the returned array will be forced to be a base-class array (default). + ndmin : int, optional + Specifies the minimum number of dimensions that the resulting + array should have. Ones will be pre-pended to the shape as + needed to meet this requirement. + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + out : ndarray + An array object satisfying the specified requirements. + + See Also + -------- + empty_like : Return an empty array with shape and type of input. + ones_like : Return an array of ones with shape and type of input. + zeros_like : Return an array of zeros with shape and type of input. + full_like : Return a new array with shape of input filled with value. + empty : Return a new uninitialized array. + ones : Return a new array setting values to one. + zeros : Return a new array setting values to zero. + full : Return a new array of given shape filled with value. + + + Notes + ----- + When order is 'A' and `object` is an array in neither 'C' nor 'F' order, + and a copy is forced by a change in dtype, then the order of the result is + not necessarily 'C' as expected. This is likely a bug. + + Examples + -------- + >>> np.array([1, 2, 3]) + array([1, 2, 3]) + + Upcasting: + + >>> np.array([1, 2, 3.0]) + array([ 1., 2., 3.]) + + More than one dimension: + + >>> np.array([[1, 2], [3, 4]]) + array([[1, 2], + [3, 4]]) + + Minimum dimensions 2: + + >>> np.array([1, 2, 3], ndmin=2) + array([[1, 2, 3]]) + + Type provided: + + >>> np.array([1, 2, 3], dtype=complex) + array([ 1.+0.j, 2.+0.j, 3.+0.j]) + + Data-type consisting of more than one element: + + >>> x = np.array([(1,2),(3,4)],dtype=[('a','>> x['a'] + array([1, 3]) + + Creating an array from sub-classes: + + >>> np.array(np.mat('1 2; 3 4')) + array([[1, 2], + [3, 4]]) + + >>> np.array(np.mat('1 2; 3 4'), subok=True) + matrix([[1, 2], + [3, 4]]) + + """.replace( + "${ARRAY_FUNCTION_LIKE}", + array_function_like_doc, + )) + +add_newdoc('numpy.core.multiarray', 'asarray', + """ + asarray(a, dtype=None, order=None, *, like=None) + + Convert the input to an array. + + Parameters + ---------- + a : array_like + Input data, in any form that can be converted to an array. This + includes lists, lists of tuples, tuples, tuples of tuples, tuples + of lists and ndarrays. + dtype : data-type, optional + By default, the data-type is inferred from the input data. + order : {'C', 'F', 'A', 'K'}, optional + Memory layout. 'A' and 'K' depend on the order of input array a. + 'C' row-major (C-style), + 'F' column-major (Fortran-style) memory representation. + 'A' (any) means 'F' if `a` is Fortran contiguous, 'C' otherwise + 'K' (keep) preserve input order + Defaults to 'K'. + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + out : ndarray + Array interpretation of `a`. No copy is performed if the input + is already an ndarray with matching dtype and order. If `a` is a + subclass of ndarray, a base class ndarray is returned. + + See Also + -------- + asanyarray : Similar function which passes through subclasses. + ascontiguousarray : Convert input to a contiguous array. + asfarray : Convert input to a floating point ndarray. + asfortranarray : Convert input to an ndarray with column-major + memory order. + asarray_chkfinite : Similar function which checks input for NaNs and Infs. + fromiter : Create an array from an iterator. + fromfunction : Construct an array by executing a function on grid + positions. + + Examples + -------- + Convert a list into an array: + + >>> a = [1, 2] + >>> np.asarray(a) + array([1, 2]) + + Existing arrays are not copied: + + >>> a = np.array([1, 2]) + >>> np.asarray(a) is a + True + + If `dtype` is set, array is copied only if dtype does not match: + + >>> a = np.array([1, 2], dtype=np.float32) + >>> np.asarray(a, dtype=np.float32) is a + True + >>> np.asarray(a, dtype=np.float64) is a + False + + Contrary to `asanyarray`, ndarray subclasses are not passed through: + + >>> issubclass(np.recarray, np.ndarray) + True + >>> a = np.array([(1.0, 2), (3.0, 4)], dtype='f4,i4').view(np.recarray) + >>> np.asarray(a) is a + False + >>> np.asanyarray(a) is a + True + + """.replace( + "${ARRAY_FUNCTION_LIKE}", + array_function_like_doc, + )) + +add_newdoc('numpy.core.multiarray', 'asanyarray', + """ + asanyarray(a, dtype=None, order=None, *, like=None) + + Convert the input to an ndarray, but pass ndarray subclasses through. + + Parameters + ---------- + a : array_like + Input data, in any form that can be converted to an array. This + includes scalars, lists, lists of tuples, tuples, tuples of tuples, + tuples of lists, and ndarrays. + dtype : data-type, optional + By default, the data-type is inferred from the input data. + order : {'C', 'F', 'A', 'K'}, optional + Memory layout. 'A' and 'K' depend on the order of input array a. + 'C' row-major (C-style), + 'F' column-major (Fortran-style) memory representation. + 'A' (any) means 'F' if `a` is Fortran contiguous, 'C' otherwise + 'K' (keep) preserve input order + Defaults to 'C'. + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + out : ndarray or an ndarray subclass + Array interpretation of `a`. If `a` is an ndarray or a subclass + of ndarray, it is returned as-is and no copy is performed. + + See Also + -------- + asarray : Similar function which always returns ndarrays. + ascontiguousarray : Convert input to a contiguous array. + asfarray : Convert input to a floating point ndarray. + asfortranarray : Convert input to an ndarray with column-major + memory order. + asarray_chkfinite : Similar function which checks input for NaNs and + Infs. + fromiter : Create an array from an iterator. + fromfunction : Construct an array by executing a function on grid + positions. + + Examples + -------- + Convert a list into an array: + + >>> a = [1, 2] + >>> np.asanyarray(a) + array([1, 2]) + + Instances of `ndarray` subclasses are passed through as-is: + + >>> a = np.array([(1.0, 2), (3.0, 4)], dtype='f4,i4').view(np.recarray) + >>> np.asanyarray(a) is a + True + + """.replace( + "${ARRAY_FUNCTION_LIKE}", + array_function_like_doc, + )) + +add_newdoc('numpy.core.multiarray', 'ascontiguousarray', + """ + ascontiguousarray(a, dtype=None, *, like=None) + + Return a contiguous array (ndim >= 1) in memory (C order). + + Parameters + ---------- + a : array_like + Input array. + dtype : str or dtype object, optional + Data-type of returned array. + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + out : ndarray + Contiguous array of same shape and content as `a`, with type `dtype` + if specified. + + See Also + -------- + asfortranarray : Convert input to an ndarray with column-major + memory order. + require : Return an ndarray that satisfies requirements. + ndarray.flags : Information about the memory layout of the array. + + Examples + -------- + >>> x = np.arange(6).reshape(2,3) + >>> np.ascontiguousarray(x, dtype=np.float32) + array([[0., 1., 2.], + [3., 4., 5.]], dtype=float32) + >>> x.flags['C_CONTIGUOUS'] + True + + Note: This function returns an array with at least one-dimension (1-d) + so it will not preserve 0-d arrays. + + """.replace( + "${ARRAY_FUNCTION_LIKE}", + array_function_like_doc, + )) + +add_newdoc('numpy.core.multiarray', 'asfortranarray', + """ + asfortranarray(a, dtype=None, *, like=None) + + Return an array (ndim >= 1) laid out in Fortran order in memory. + + Parameters + ---------- + a : array_like + Input array. + dtype : str or dtype object, optional + By default, the data-type is inferred from the input data. + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + out : ndarray + The input `a` in Fortran, or column-major, order. + + See Also + -------- + ascontiguousarray : Convert input to a contiguous (C order) array. + asanyarray : Convert input to an ndarray with either row or + column-major memory order. + require : Return an ndarray that satisfies requirements. + ndarray.flags : Information about the memory layout of the array. + + Examples + -------- + >>> x = np.arange(6).reshape(2,3) + >>> y = np.asfortranarray(x) + >>> x.flags['F_CONTIGUOUS'] + False + >>> y.flags['F_CONTIGUOUS'] + True + + Note: This function returns an array with at least one-dimension (1-d) + so it will not preserve 0-d arrays. + + """.replace( + "${ARRAY_FUNCTION_LIKE}", + array_function_like_doc, + )) + +add_newdoc('numpy.core.multiarray', 'empty', + """ + empty(shape, dtype=float, order='C', *, like=None) + + Return a new array of given shape and type, without initializing entries. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array, e.g., ``(2, 3)`` or ``2``. + dtype : data-type, optional + Desired output data-type for the array, e.g, `numpy.int8`. Default is + `numpy.float64`. + order : {'C', 'F'}, optional, default: 'C' + Whether to store multi-dimensional data in row-major + (C-style) or column-major (Fortran-style) order in + memory. + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + out : ndarray + Array of uninitialized (arbitrary) data of the given shape, dtype, and + order. Object arrays will be initialized to None. + + See Also + -------- + empty_like : Return an empty array with shape and type of input. + ones : Return a new array setting values to one. + zeros : Return a new array setting values to zero. + full : Return a new array of given shape filled with value. + + + Notes + ----- + `empty`, unlike `zeros`, does not set the array values to zero, + and may therefore be marginally faster. On the other hand, it requires + the user to manually set all the values in the array, and should be + used with caution. + + Examples + -------- + >>> np.empty([2, 2]) + array([[ -9.74499359e+001, 6.69583040e-309], + [ 2.13182611e-314, 3.06959433e-309]]) #uninitialized + + >>> np.empty([2, 2], dtype=int) + array([[-1073741821, -1067949133], + [ 496041986, 19249760]]) #uninitialized + + """.replace( + "${ARRAY_FUNCTION_LIKE}", + array_function_like_doc, + )) + +add_newdoc('numpy.core.multiarray', 'scalar', + """ + scalar(dtype, obj) + + Return a new scalar array of the given type initialized with obj. + + This function is meant mainly for pickle support. `dtype` must be a + valid data-type descriptor. If `dtype` corresponds to an object + descriptor, then `obj` can be any object, otherwise `obj` must be a + string. If `obj` is not given, it will be interpreted as None for object + type and as zeros for all other types. + + """) + +add_newdoc('numpy.core.multiarray', 'zeros', + """ + zeros(shape, dtype=float, order='C', *, like=None) + + Return a new array of given shape and type, filled with zeros. + + Parameters + ---------- + shape : int or tuple of ints + Shape of the new array, e.g., ``(2, 3)`` or ``2``. + dtype : data-type, optional + The desired data-type for the array, e.g., `numpy.int8`. Default is + `numpy.float64`. + order : {'C', 'F'}, optional, default: 'C' + Whether to store multi-dimensional data in row-major + (C-style) or column-major (Fortran-style) order in + memory. + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + out : ndarray + Array of zeros with the given shape, dtype, and order. + + See Also + -------- + zeros_like : Return an array of zeros with shape and type of input. + empty : Return a new uninitialized array. + ones : Return a new array setting values to one. + full : Return a new array of given shape filled with value. + + Examples + -------- + >>> np.zeros(5) + array([ 0., 0., 0., 0., 0.]) + + >>> np.zeros((5,), dtype=int) + array([0, 0, 0, 0, 0]) + + >>> np.zeros((2, 1)) + array([[ 0.], + [ 0.]]) + + >>> s = (2,2) + >>> np.zeros(s) + array([[ 0., 0.], + [ 0., 0.]]) + + >>> np.zeros((2,), dtype=[('x', 'i4'), ('y', 'i4')]) # custom dtype + array([(0, 0), (0, 0)], + dtype=[('x', '>> np.fromstring('1 2', dtype=int, sep=' ') + array([1, 2]) + >>> np.fromstring('1, 2', dtype=int, sep=',') + array([1, 2]) + + """.replace( + "${ARRAY_FUNCTION_LIKE}", + array_function_like_doc, + )) + +add_newdoc('numpy.core.multiarray', 'compare_chararrays', + """ + compare_chararrays(a1, a2, cmp, rstrip) + + Performs element-wise comparison of two string arrays using the + comparison operator specified by `cmp_op`. + + Parameters + ---------- + a1, a2 : array_like + Arrays to be compared. + cmp : {"<", "<=", "==", ">=", ">", "!="} + Type of comparison. + rstrip : Boolean + If True, the spaces at the end of Strings are removed before the comparison. + + Returns + ------- + out : ndarray + The output array of type Boolean with the same shape as a and b. + + Raises + ------ + ValueError + If `cmp_op` is not valid. + TypeError + If at least one of `a` or `b` is a non-string array + + Examples + -------- + >>> a = np.array(["a", "b", "cde"]) + >>> b = np.array(["a", "a", "dec"]) + >>> np.compare_chararrays(a, b, ">", True) + array([False, True, False]) + + """) + +add_newdoc('numpy.core.multiarray', 'fromiter', + """ + fromiter(iter, dtype, count=-1, *, like=None) + + Create a new 1-dimensional array from an iterable object. + + Parameters + ---------- + iter : iterable object + An iterable object providing data for the array. + dtype : data-type + The data-type of the returned array. + count : int, optional + The number of items to read from *iterable*. The default is -1, + which means all data is read. + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + out : ndarray + The output array. + + Notes + ----- + Specify `count` to improve performance. It allows ``fromiter`` to + pre-allocate the output array, instead of resizing it on demand. + + Examples + -------- + >>> iterable = (x*x for x in range(5)) + >>> np.fromiter(iterable, float) + array([ 0., 1., 4., 9., 16.]) + + """.replace( + "${ARRAY_FUNCTION_LIKE}", + array_function_like_doc, + )) + +add_newdoc('numpy.core.multiarray', 'fromfile', + """ + fromfile(file, dtype=float, count=-1, sep='', offset=0, *, like=None) + + Construct an array from data in a text or binary file. + + A highly efficient way of reading binary data with a known data-type, + as well as parsing simply formatted text files. Data written using the + `tofile` method can be read using this function. + + Parameters + ---------- + file : file or str or Path + Open file object or filename. + + .. versionchanged:: 1.17.0 + `pathlib.Path` objects are now accepted. + + dtype : data-type + Data type of the returned array. + For binary files, it is used to determine the size and byte-order + of the items in the file. + Most builtin numeric types are supported and extension types may be supported. + + .. versionadded:: 1.18.0 + Complex dtypes. + + count : int + Number of items to read. ``-1`` means all items (i.e., the complete + file). + sep : str + Separator between items if file is a text file. + Empty ("") separator means the file should be treated as binary. + Spaces (" ") in the separator match zero or more whitespace characters. + A separator consisting only of spaces must match at least one + whitespace. + offset : int + The offset (in bytes) from the file's current position. Defaults to 0. + Only permitted for binary files. + + .. versionadded:: 1.17.0 + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + See also + -------- + load, save + ndarray.tofile + loadtxt : More flexible way of loading data from a text file. + + Notes + ----- + Do not rely on the combination of `tofile` and `fromfile` for + data storage, as the binary files generated are not platform + independent. In particular, no byte-order or data-type information is + saved. Data can be stored in the platform independent ``.npy`` format + using `save` and `load` instead. + + Examples + -------- + Construct an ndarray: + + >>> dt = np.dtype([('time', [('min', np.int64), ('sec', np.int64)]), + ... ('temp', float)]) + >>> x = np.zeros((1,), dtype=dt) + >>> x['time']['min'] = 10; x['temp'] = 98.25 + >>> x + array([((10, 0), 98.25)], + dtype=[('time', [('min', '>> import tempfile + >>> fname = tempfile.mkstemp()[1] + >>> x.tofile(fname) + + Read the raw data from disk: + + >>> np.fromfile(fname, dtype=dt) + array([((10, 0), 98.25)], + dtype=[('time', [('min', '>> np.save(fname, x) + >>> np.load(fname + '.npy') + array([((10, 0), 98.25)], + dtype=[('time', [('min', '>> dt = np.dtype(int) + >>> dt = dt.newbyteorder('>') + >>> np.frombuffer(buf, dtype=dt) # doctest: +SKIP + + The data of the resulting array will not be byteswapped, but will be + interpreted correctly. + + Examples + -------- + >>> s = b'hello world' + >>> np.frombuffer(s, dtype='S1', count=5, offset=6) + array([b'w', b'o', b'r', b'l', b'd'], dtype='|S1') + + >>> np.frombuffer(b'\\x01\\x02', dtype=np.uint8) + array([1, 2], dtype=uint8) + >>> np.frombuffer(b'\\x01\\x02\\x03\\x04\\x05', dtype=np.uint8, count=3) + array([1, 2, 3], dtype=uint8) + + """.replace( + "${ARRAY_FUNCTION_LIKE}", + array_function_like_doc, + )) + +add_newdoc('numpy.core.multiarray', '_from_dlpack', + """ + _from_dlpack(x, /) + + Create a NumPy array from an object implementing the ``__dlpack__`` + protocol. + + See Also + -------- + `Array API documentation + `_ + """) + +add_newdoc('numpy.core', 'fastCopyAndTranspose', + """_fastCopyAndTranspose(a)""") + +add_newdoc('numpy.core.multiarray', 'correlate', + """cross_correlate(a,v, mode=0)""") + +add_newdoc('numpy.core.multiarray', 'arange', + """ + arange([start,] stop[, step,], dtype=None, *, like=None) + + Return evenly spaced values within a given interval. + + Values are generated within the half-open interval ``[start, stop)`` + (in other words, the interval including `start` but excluding `stop`). + For integer arguments the function is equivalent to the Python built-in + `range` function, but returns an ndarray rather than a list. + + When using a non-integer step, such as 0.1, it is often better to use + `numpy.linspace`. See the warnings section below for more information. + + Parameters + ---------- + start : integer or real, optional + Start of interval. The interval includes this value. The default + start value is 0. + stop : integer or real + End of interval. The interval does not include this value, except + in some cases where `step` is not an integer and floating point + round-off affects the length of `out`. + step : integer or real, optional + Spacing between values. For any output `out`, this is the distance + between two adjacent values, ``out[i+1] - out[i]``. The default + step size is 1. If `step` is specified as a position argument, + `start` must also be given. + dtype : dtype + The type of the output array. If `dtype` is not given, infer the data + type from the other input arguments. + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + arange : ndarray + Array of evenly spaced values. + + For floating point arguments, the length of the result is + ``ceil((stop - start)/step)``. Because of floating point overflow, + this rule may result in the last element of `out` being greater + than `stop`. + + Warnings + -------- + The length of the output might not be numerically stable. + + Another stability issue is due to the internal implementation of + `numpy.arange`. + The actual step value used to populate the array is + ``dtype(start + step) - dtype(start)`` and not `step`. Precision loss + can occur here, due to casting or due to using floating points when + `start` is much larger than `step`. This can lead to unexpected + behaviour. For example:: + + >>> np.arange(0, 5, 0.5, dtype=int) + array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + >>> np.arange(-3, 3, 0.5, dtype=int) + array([-3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8]) + + In such cases, the use of `numpy.linspace` should be preferred. + + See Also + -------- + numpy.linspace : Evenly spaced numbers with careful handling of endpoints. + numpy.ogrid: Arrays of evenly spaced numbers in N-dimensions. + numpy.mgrid: Grid-shaped arrays of evenly spaced numbers in N-dimensions. + + Examples + -------- + >>> np.arange(3) + array([0, 1, 2]) + >>> np.arange(3.0) + array([ 0., 1., 2.]) + >>> np.arange(3,7) + array([3, 4, 5, 6]) + >>> np.arange(3,7,2) + array([3, 5]) + + """.replace( + "${ARRAY_FUNCTION_LIKE}", + array_function_like_doc, + )) + +add_newdoc('numpy.core.multiarray', '_get_ndarray_c_version', + """_get_ndarray_c_version() + + Return the compile time NPY_VERSION (formerly called NDARRAY_VERSION) number. + + """) + +add_newdoc('numpy.core.multiarray', '_reconstruct', + """_reconstruct(subtype, shape, dtype) + + Construct an empty array. Used by Pickles. + + """) + + +add_newdoc('numpy.core.multiarray', 'set_string_function', + """ + set_string_function(f, repr=1) + + Internal method to set a function to be used when pretty printing arrays. + + """) + +add_newdoc('numpy.core.multiarray', 'set_numeric_ops', + """ + set_numeric_ops(op1=func1, op2=func2, ...) + + Set numerical operators for array objects. + + .. deprecated:: 1.16 + + For the general case, use :c:func:`PyUFunc_ReplaceLoopBySignature`. + For ndarray subclasses, define the ``__array_ufunc__`` method and + override the relevant ufunc. + + Parameters + ---------- + op1, op2, ... : callable + Each ``op = func`` pair describes an operator to be replaced. + For example, ``add = lambda x, y: np.add(x, y) % 5`` would replace + addition by modulus 5 addition. + + Returns + ------- + saved_ops : list of callables + A list of all operators, stored before making replacements. + + Notes + ----- + .. WARNING:: + Use with care! Incorrect usage may lead to memory errors. + + A function replacing an operator cannot make use of that operator. + For example, when replacing add, you may not use ``+``. Instead, + directly call ufuncs. + + Examples + -------- + >>> def add_mod5(x, y): + ... return np.add(x, y) % 5 + ... + >>> old_funcs = np.set_numeric_ops(add=add_mod5) + + >>> x = np.arange(12).reshape((3, 4)) + >>> x + x + array([[0, 2, 4, 1], + [3, 0, 2, 4], + [1, 3, 0, 2]]) + + >>> ignore = np.set_numeric_ops(**old_funcs) # restore operators + + """) + +add_newdoc('numpy.core.multiarray', 'promote_types', + """ + promote_types(type1, type2) + + Returns the data type with the smallest size and smallest scalar + kind to which both ``type1`` and ``type2`` may be safely cast. + The returned data type is always in native byte order. + + This function is symmetric, but rarely associative. + + Parameters + ---------- + type1 : dtype or dtype specifier + First data type. + type2 : dtype or dtype specifier + Second data type. + + Returns + ------- + out : dtype + The promoted data type. + + Notes + ----- + .. versionadded:: 1.6.0 + + Starting in NumPy 1.9, promote_types function now returns a valid string + length when given an integer or float dtype as one argument and a string + dtype as another argument. Previously it always returned the input string + dtype, even if it wasn't long enough to store the max integer/float value + converted to a string. + + See Also + -------- + result_type, dtype, can_cast + + Examples + -------- + >>> np.promote_types('f4', 'f8') + dtype('float64') + + >>> np.promote_types('i8', 'f4') + dtype('float64') + + >>> np.promote_types('>i8', '>> np.promote_types('i4', 'S8') + dtype('S11') + + An example of a non-associative case: + + >>> p = np.promote_types + >>> p('S', p('i1', 'u1')) + dtype('S6') + >>> p(p('S', 'i1'), 'u1') + dtype('S4') + + """) + +add_newdoc('numpy.core.multiarray', 'c_einsum', + """ + c_einsum(subscripts, *operands, out=None, dtype=None, order='K', + casting='safe') + + *This documentation shadows that of the native python implementation of the `einsum` function, + except all references and examples related to the `optimize` argument (v 0.12.0) have been removed.* + + Evaluates the Einstein summation convention on the operands. + + Using the Einstein summation convention, many common multi-dimensional, + linear algebraic array operations can be represented in a simple fashion. + In *implicit* mode `einsum` computes these values. + + In *explicit* mode, `einsum` provides further flexibility to compute + other array operations that might not be considered classical Einstein + summation operations, by disabling, or forcing summation over specified + subscript labels. + + See the notes and examples for clarification. + + Parameters + ---------- + subscripts : str + Specifies the subscripts for summation as comma separated list of + subscript labels. An implicit (classical Einstein summation) + calculation is performed unless the explicit indicator '->' is + included as well as subscript labels of the precise output form. + operands : list of array_like + These are the arrays for the operation. + out : ndarray, optional + If provided, the calculation is done into this array. + dtype : {data-type, None}, optional + If provided, forces the calculation to use the data type specified. + Note that you may have to also give a more liberal `casting` + parameter to allow the conversions. Default is None. + order : {'C', 'F', 'A', 'K'}, optional + Controls the memory layout of the output. 'C' means it should + be C contiguous. 'F' means it should be Fortran contiguous, + 'A' means it should be 'F' if the inputs are all 'F', 'C' otherwise. + 'K' means it should be as close to the layout of the inputs as + is possible, including arbitrarily permuted axes. + Default is 'K'. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + Controls what kind of data casting may occur. Setting this to + 'unsafe' is not recommended, as it can adversely affect accumulations. + + * 'no' means the data types should not be cast at all. + * 'equiv' means only byte-order changes are allowed. + * 'safe' means only casts which can preserve values are allowed. + * 'same_kind' means only safe casts or casts within a kind, + like float64 to float32, are allowed. + * 'unsafe' means any data conversions may be done. + + Default is 'safe'. + optimize : {False, True, 'greedy', 'optimal'}, optional + Controls if intermediate optimization should occur. No optimization + will occur if False and True will default to the 'greedy' algorithm. + Also accepts an explicit contraction list from the ``np.einsum_path`` + function. See ``np.einsum_path`` for more details. Defaults to False. + + Returns + ------- + output : ndarray + The calculation based on the Einstein summation convention. + + See Also + -------- + einsum_path, dot, inner, outer, tensordot, linalg.multi_dot + + Notes + ----- + .. versionadded:: 1.6.0 + + The Einstein summation convention can be used to compute + many multi-dimensional, linear algebraic array operations. `einsum` + provides a succinct way of representing these. + + A non-exhaustive list of these operations, + which can be computed by `einsum`, is shown below along with examples: + + * Trace of an array, :py:func:`numpy.trace`. + * Return a diagonal, :py:func:`numpy.diag`. + * Array axis summations, :py:func:`numpy.sum`. + * Transpositions and permutations, :py:func:`numpy.transpose`. + * Matrix multiplication and dot product, :py:func:`numpy.matmul` :py:func:`numpy.dot`. + * Vector inner and outer products, :py:func:`numpy.inner` :py:func:`numpy.outer`. + * Broadcasting, element-wise and scalar multiplication, :py:func:`numpy.multiply`. + * Tensor contractions, :py:func:`numpy.tensordot`. + * Chained array operations, in efficient calculation order, :py:func:`numpy.einsum_path`. + + The subscripts string is a comma-separated list of subscript labels, + where each label refers to a dimension of the corresponding operand. + Whenever a label is repeated it is summed, so ``np.einsum('i,i', a, b)`` + is equivalent to :py:func:`np.inner(a,b) `. If a label + appears only once, it is not summed, so ``np.einsum('i', a)`` produces a + view of ``a`` with no changes. A further example ``np.einsum('ij,jk', a, b)`` + describes traditional matrix multiplication and is equivalent to + :py:func:`np.matmul(a,b) `. Repeated subscript labels in one + operand take the diagonal. For example, ``np.einsum('ii', a)`` is equivalent + to :py:func:`np.trace(a) `. + + In *implicit mode*, the chosen subscripts are important + since the axes of the output are reordered alphabetically. This + means that ``np.einsum('ij', a)`` doesn't affect a 2D array, while + ``np.einsum('ji', a)`` takes its transpose. Additionally, + ``np.einsum('ij,jk', a, b)`` returns a matrix multiplication, while, + ``np.einsum('ij,jh', a, b)`` returns the transpose of the + multiplication since subscript 'h' precedes subscript 'i'. + + In *explicit mode* the output can be directly controlled by + specifying output subscript labels. This requires the + identifier '->' as well as the list of output subscript labels. + This feature increases the flexibility of the function since + summing can be disabled or forced when required. The call + ``np.einsum('i->', a)`` is like :py:func:`np.sum(a, axis=-1) `, + and ``np.einsum('ii->i', a)`` is like :py:func:`np.diag(a) `. + The difference is that `einsum` does not allow broadcasting by default. + Additionally ``np.einsum('ij,jh->ih', a, b)`` directly specifies the + order of the output subscript labels and therefore returns matrix + multiplication, unlike the example above in implicit mode. + + To enable and control broadcasting, use an ellipsis. Default + NumPy-style broadcasting is done by adding an ellipsis + to the left of each term, like ``np.einsum('...ii->...i', a)``. + To take the trace along the first and last axes, + you can do ``np.einsum('i...i', a)``, or to do a matrix-matrix + product with the left-most indices instead of rightmost, one can do + ``np.einsum('ij...,jk...->ik...', a, b)``. + + When there is only one operand, no axes are summed, and no output + parameter is provided, a view into the operand is returned instead + of a new array. Thus, taking the diagonal as ``np.einsum('ii->i', a)`` + produces a view (changed in version 1.10.0). + + `einsum` also provides an alternative way to provide the subscripts + and operands as ``einsum(op0, sublist0, op1, sublist1, ..., [sublistout])``. + If the output shape is not provided in this format `einsum` will be + calculated in implicit mode, otherwise it will be performed explicitly. + The examples below have corresponding `einsum` calls with the two + parameter methods. + + .. versionadded:: 1.10.0 + + Views returned from einsum are now writeable whenever the input array + is writeable. For example, ``np.einsum('ijk...->kji...', a)`` will now + have the same effect as :py:func:`np.swapaxes(a, 0, 2) ` + and ``np.einsum('ii->i', a)`` will return a writeable view of the diagonal + of a 2D array. + + Examples + -------- + >>> a = np.arange(25).reshape(5,5) + >>> b = np.arange(5) + >>> c = np.arange(6).reshape(2,3) + + Trace of a matrix: + + >>> np.einsum('ii', a) + 60 + >>> np.einsum(a, [0,0]) + 60 + >>> np.trace(a) + 60 + + Extract the diagonal (requires explicit form): + + >>> np.einsum('ii->i', a) + array([ 0, 6, 12, 18, 24]) + >>> np.einsum(a, [0,0], [0]) + array([ 0, 6, 12, 18, 24]) + >>> np.diag(a) + array([ 0, 6, 12, 18, 24]) + + Sum over an axis (requires explicit form): + + >>> np.einsum('ij->i', a) + array([ 10, 35, 60, 85, 110]) + >>> np.einsum(a, [0,1], [0]) + array([ 10, 35, 60, 85, 110]) + >>> np.sum(a, axis=1) + array([ 10, 35, 60, 85, 110]) + + For higher dimensional arrays summing a single axis can be done with ellipsis: + + >>> np.einsum('...j->...', a) + array([ 10, 35, 60, 85, 110]) + >>> np.einsum(a, [Ellipsis,1], [Ellipsis]) + array([ 10, 35, 60, 85, 110]) + + Compute a matrix transpose, or reorder any number of axes: + + >>> np.einsum('ji', c) + array([[0, 3], + [1, 4], + [2, 5]]) + >>> np.einsum('ij->ji', c) + array([[0, 3], + [1, 4], + [2, 5]]) + >>> np.einsum(c, [1,0]) + array([[0, 3], + [1, 4], + [2, 5]]) + >>> np.transpose(c) + array([[0, 3], + [1, 4], + [2, 5]]) + + Vector inner products: + + >>> np.einsum('i,i', b, b) + 30 + >>> np.einsum(b, [0], b, [0]) + 30 + >>> np.inner(b,b) + 30 + + Matrix vector multiplication: + + >>> np.einsum('ij,j', a, b) + array([ 30, 80, 130, 180, 230]) + >>> np.einsum(a, [0,1], b, [1]) + array([ 30, 80, 130, 180, 230]) + >>> np.dot(a, b) + array([ 30, 80, 130, 180, 230]) + >>> np.einsum('...j,j', a, b) + array([ 30, 80, 130, 180, 230]) + + Broadcasting and scalar multiplication: + + >>> np.einsum('..., ...', 3, c) + array([[ 0, 3, 6], + [ 9, 12, 15]]) + >>> np.einsum(',ij', 3, c) + array([[ 0, 3, 6], + [ 9, 12, 15]]) + >>> np.einsum(3, [Ellipsis], c, [Ellipsis]) + array([[ 0, 3, 6], + [ 9, 12, 15]]) + >>> np.multiply(3, c) + array([[ 0, 3, 6], + [ 9, 12, 15]]) + + Vector outer product: + + >>> np.einsum('i,j', np.arange(2)+1, b) + array([[0, 1, 2, 3, 4], + [0, 2, 4, 6, 8]]) + >>> np.einsum(np.arange(2)+1, [0], b, [1]) + array([[0, 1, 2, 3, 4], + [0, 2, 4, 6, 8]]) + >>> np.outer(np.arange(2)+1, b) + array([[0, 1, 2, 3, 4], + [0, 2, 4, 6, 8]]) + + Tensor contraction: + + >>> a = np.arange(60.).reshape(3,4,5) + >>> b = np.arange(24.).reshape(4,3,2) + >>> np.einsum('ijk,jil->kl', a, b) + array([[ 4400., 4730.], + [ 4532., 4874.], + [ 4664., 5018.], + [ 4796., 5162.], + [ 4928., 5306.]]) + >>> np.einsum(a, [0,1,2], b, [1,0,3], [2,3]) + array([[ 4400., 4730.], + [ 4532., 4874.], + [ 4664., 5018.], + [ 4796., 5162.], + [ 4928., 5306.]]) + >>> np.tensordot(a,b, axes=([1,0],[0,1])) + array([[ 4400., 4730.], + [ 4532., 4874.], + [ 4664., 5018.], + [ 4796., 5162.], + [ 4928., 5306.]]) + + Writeable returned arrays (since version 1.10.0): + + >>> a = np.zeros((3, 3)) + >>> np.einsum('ii->i', a)[:] = 1 + >>> a + array([[ 1., 0., 0.], + [ 0., 1., 0.], + [ 0., 0., 1.]]) + + Example of ellipsis use: + + >>> a = np.arange(6).reshape((3,2)) + >>> b = np.arange(12).reshape((4,3)) + >>> np.einsum('ki,jk->ij', a, b) + array([[10, 28, 46, 64], + [13, 40, 67, 94]]) + >>> np.einsum('ki,...k->i...', a, b) + array([[10, 28, 46, 64], + [13, 40, 67, 94]]) + >>> np.einsum('k...,jk', a, b) + array([[10, 28, 46, 64], + [13, 40, 67, 94]]) + + """) + + +############################################################################## +# +# Documentation for ndarray attributes and methods +# +############################################################################## + + +############################################################################## +# +# ndarray object +# +############################################################################## + + +add_newdoc('numpy.core.multiarray', 'ndarray', + """ + ndarray(shape, dtype=float, buffer=None, offset=0, + strides=None, order=None) + + An array object represents a multidimensional, homogeneous array + of fixed-size items. An associated data-type object describes the + format of each element in the array (its byte-order, how many bytes it + occupies in memory, whether it is an integer, a floating point number, + or something else, etc.) + + Arrays should be constructed using `array`, `zeros` or `empty` (refer + to the See Also section below). The parameters given here refer to + a low-level method (`ndarray(...)`) for instantiating an array. + + For more information, refer to the `numpy` module and examine the + methods and attributes of an array. + + Parameters + ---------- + (for the __new__ method; see Notes below) + + shape : tuple of ints + Shape of created array. + dtype : data-type, optional + Any object that can be interpreted as a numpy data type. + buffer : object exposing buffer interface, optional + Used to fill the array with data. + offset : int, optional + Offset of array data in buffer. + strides : tuple of ints, optional + Strides of data in memory. + order : {'C', 'F'}, optional + Row-major (C-style) or column-major (Fortran-style) order. + + Attributes + ---------- + T : ndarray + Transpose of the array. + data : buffer + The array's elements, in memory. + dtype : dtype object + Describes the format of the elements in the array. + flags : dict + Dictionary containing information related to memory use, e.g., + 'C_CONTIGUOUS', 'OWNDATA', 'WRITEABLE', etc. + flat : numpy.flatiter object + Flattened version of the array as an iterator. The iterator + allows assignments, e.g., ``x.flat = 3`` (See `ndarray.flat` for + assignment examples; TODO). + imag : ndarray + Imaginary part of the array. + real : ndarray + Real part of the array. + size : int + Number of elements in the array. + itemsize : int + The memory use of each array element in bytes. + nbytes : int + The total number of bytes required to store the array data, + i.e., ``itemsize * size``. + ndim : int + The array's number of dimensions. + shape : tuple of ints + Shape of the array. + strides : tuple of ints + The step-size required to move from one element to the next in + memory. For example, a contiguous ``(3, 4)`` array of type + ``int16`` in C-order has strides ``(8, 2)``. This implies that + to move from element to element in memory requires jumps of 2 bytes. + To move from row-to-row, one needs to jump 8 bytes at a time + (``2 * 4``). + ctypes : ctypes object + Class containing properties of the array needed for interaction + with ctypes. + base : ndarray + If the array is a view into another array, that array is its `base` + (unless that array is also a view). The `base` array is where the + array data is actually stored. + + See Also + -------- + array : Construct an array. + zeros : Create an array, each element of which is zero. + empty : Create an array, but leave its allocated memory unchanged (i.e., + it contains "garbage"). + dtype : Create a data-type. + numpy.typing.NDArray : An ndarray alias :term:`generic ` + w.r.t. its `dtype.type `. + + Notes + ----- + There are two modes of creating an array using ``__new__``: + + 1. If `buffer` is None, then only `shape`, `dtype`, and `order` + are used. + 2. If `buffer` is an object exposing the buffer interface, then + all keywords are interpreted. + + No ``__init__`` method is needed because the array is fully initialized + after the ``__new__`` method. + + Examples + -------- + These examples illustrate the low-level `ndarray` constructor. Refer + to the `See Also` section above for easier ways of constructing an + ndarray. + + First mode, `buffer` is None: + + >>> np.ndarray(shape=(2,2), dtype=float, order='F') + array([[0.0e+000, 0.0e+000], # random + [ nan, 2.5e-323]]) + + Second mode: + + >>> np.ndarray((2,), buffer=np.array([1,2,3]), + ... offset=np.int_().itemsize, + ... dtype=int) # offset = 1*itemsize, i.e. skip first element + array([2, 3]) + + """) + + +############################################################################## +# +# ndarray attributes +# +############################################################################## + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_interface__', + """Array protocol: Python side.""")) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_finalize__', + """None.""")) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_priority__', + """Array priority.""")) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_struct__', + """Array protocol: C-struct side.""")) + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__dlpack__', + """a.__dlpack__(*, stream=None) + + DLPack Protocol: Part of the Array API.""")) + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__dlpack_device__', + """a.__dlpack_device__() + + DLPack Protocol: Part of the Array API.""")) + +add_newdoc('numpy.core.multiarray', 'ndarray', ('base', + """ + Base object if memory is from some other object. + + Examples + -------- + The base of an array that owns its memory is None: + + >>> x = np.array([1,2,3,4]) + >>> x.base is None + True + + Slicing creates a view, whose memory is shared with x: + + >>> y = x[2:] + >>> y.base is x + True + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('ctypes', + """ + An object to simplify the interaction of the array with the ctypes + module. + + This attribute creates an object that makes it easier to use arrays + when calling shared libraries with the ctypes module. The returned + object has, among others, data, shape, and strides attributes (see + Notes below) which themselves return ctypes objects that can be used + as arguments to a shared library. + + Parameters + ---------- + None + + Returns + ------- + c : Python object + Possessing attributes data, shape, strides, etc. + + See Also + -------- + numpy.ctypeslib + + Notes + ----- + Below are the public attributes of this object which were documented + in "Guide to NumPy" (we have omitted undocumented public attributes, + as well as documented private attributes): + + .. autoattribute:: numpy.core._internal._ctypes.data + :noindex: + + .. autoattribute:: numpy.core._internal._ctypes.shape + :noindex: + + .. autoattribute:: numpy.core._internal._ctypes.strides + :noindex: + + .. automethod:: numpy.core._internal._ctypes.data_as + :noindex: + + .. automethod:: numpy.core._internal._ctypes.shape_as + :noindex: + + .. automethod:: numpy.core._internal._ctypes.strides_as + :noindex: + + If the ctypes module is not available, then the ctypes attribute + of array objects still returns something useful, but ctypes objects + are not returned and errors may be raised instead. In particular, + the object will still have the ``as_parameter`` attribute which will + return an integer equal to the data attribute. + + Examples + -------- + >>> import ctypes + >>> x = np.array([[0, 1], [2, 3]], dtype=np.int32) + >>> x + array([[0, 1], + [2, 3]], dtype=int32) + >>> x.ctypes.data + 31962608 # may vary + >>> x.ctypes.data_as(ctypes.POINTER(ctypes.c_uint32)) + <__main__.LP_c_uint object at 0x7ff2fc1fc200> # may vary + >>> x.ctypes.data_as(ctypes.POINTER(ctypes.c_uint32)).contents + c_uint(0) + >>> x.ctypes.data_as(ctypes.POINTER(ctypes.c_uint64)).contents + c_ulong(4294967296) + >>> x.ctypes.shape + # may vary + >>> x.ctypes.strides + # may vary + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('data', + """Python buffer object pointing to the start of the array's data.""")) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('dtype', + """ + Data-type of the array's elements. + + Parameters + ---------- + None + + Returns + ------- + d : numpy dtype object + + See Also + -------- + numpy.dtype + + Examples + -------- + >>> x + array([[0, 1], + [2, 3]]) + >>> x.dtype + dtype('int32') + >>> type(x.dtype) + + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('imag', + """ + The imaginary part of the array. + + Examples + -------- + >>> x = np.sqrt([1+0j, 0+1j]) + >>> x.imag + array([ 0. , 0.70710678]) + >>> x.imag.dtype + dtype('float64') + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('itemsize', + """ + Length of one array element in bytes. + + Examples + -------- + >>> x = np.array([1,2,3], dtype=np.float64) + >>> x.itemsize + 8 + >>> x = np.array([1,2,3], dtype=np.complex128) + >>> x.itemsize + 16 + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('flags', + """ + Information about the memory layout of the array. + + Attributes + ---------- + C_CONTIGUOUS (C) + The data is in a single, C-style contiguous segment. + F_CONTIGUOUS (F) + The data is in a single, Fortran-style contiguous segment. + OWNDATA (O) + The array owns the memory it uses or borrows it from another object. + WRITEABLE (W) + The data area can be written to. Setting this to False locks + the data, making it read-only. A view (slice, etc.) inherits WRITEABLE + from its base array at creation time, but a view of a writeable + array may be subsequently locked while the base array remains writeable. + (The opposite is not true, in that a view of a locked array may not + be made writeable. However, currently, locking a base object does not + lock any views that already reference it, so under that circumstance it + is possible to alter the contents of a locked array via a previously + created writeable view onto it.) Attempting to change a non-writeable + array raises a RuntimeError exception. + ALIGNED (A) + The data and all elements are aligned appropriately for the hardware. + WRITEBACKIFCOPY (X) + This array is a copy of some other array. The C-API function + PyArray_ResolveWritebackIfCopy must be called before deallocating + to the base array will be updated with the contents of this array. + UPDATEIFCOPY (U) + (Deprecated, use WRITEBACKIFCOPY) This array is a copy of some other array. + When this array is + deallocated, the base array will be updated with the contents of + this array. + FNC + F_CONTIGUOUS and not C_CONTIGUOUS. + FORC + F_CONTIGUOUS or C_CONTIGUOUS (one-segment test). + BEHAVED (B) + ALIGNED and WRITEABLE. + CARRAY (CA) + BEHAVED and C_CONTIGUOUS. + FARRAY (FA) + BEHAVED and F_CONTIGUOUS and not C_CONTIGUOUS. + + Notes + ----- + The `flags` object can be accessed dictionary-like (as in ``a.flags['WRITEABLE']``), + or by using lowercased attribute names (as in ``a.flags.writeable``). Short flag + names are only supported in dictionary access. + + Only the WRITEBACKIFCOPY, UPDATEIFCOPY, WRITEABLE, and ALIGNED flags can be + changed by the user, via direct assignment to the attribute or dictionary + entry, or by calling `ndarray.setflags`. + + The array flags cannot be set arbitrarily: + + - UPDATEIFCOPY can only be set ``False``. + - WRITEBACKIFCOPY can only be set ``False``. + - ALIGNED can only be set ``True`` if the data is truly aligned. + - WRITEABLE can only be set ``True`` if the array owns its own memory + or the ultimate owner of the memory exposes a writeable buffer + interface or is a string. + + Arrays can be both C-style and Fortran-style contiguous simultaneously. + This is clear for 1-dimensional arrays, but can also be true for higher + dimensional arrays. + + Even for contiguous arrays a stride for a given dimension + ``arr.strides[dim]`` may be *arbitrary* if ``arr.shape[dim] == 1`` + or the array has no elements. + It does *not* generally hold that ``self.strides[-1] == self.itemsize`` + for C-style contiguous arrays or ``self.strides[0] == self.itemsize`` for + Fortran-style contiguous arrays is true. + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('flat', + """ + A 1-D iterator over the array. + + This is a `numpy.flatiter` instance, which acts similarly to, but is not + a subclass of, Python's built-in iterator object. + + See Also + -------- + flatten : Return a copy of the array collapsed into one dimension. + + flatiter + + Examples + -------- + >>> x = np.arange(1, 7).reshape(2, 3) + >>> x + array([[1, 2, 3], + [4, 5, 6]]) + >>> x.flat[3] + 4 + >>> x.T + array([[1, 4], + [2, 5], + [3, 6]]) + >>> x.T.flat[3] + 5 + >>> type(x.flat) + + + An assignment example: + + >>> x.flat = 3; x + array([[3, 3, 3], + [3, 3, 3]]) + >>> x.flat[[1,4]] = 1; x + array([[3, 1, 3], + [3, 1, 3]]) + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('nbytes', + """ + Total bytes consumed by the elements of the array. + + Notes + ----- + Does not include memory consumed by non-element attributes of the + array object. + + Examples + -------- + >>> x = np.zeros((3,5,2), dtype=np.complex128) + >>> x.nbytes + 480 + >>> np.prod(x.shape) * x.itemsize + 480 + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('ndim', + """ + Number of array dimensions. + + Examples + -------- + >>> x = np.array([1, 2, 3]) + >>> x.ndim + 1 + >>> y = np.zeros((2, 3, 4)) + >>> y.ndim + 3 + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('real', + """ + The real part of the array. + + Examples + -------- + >>> x = np.sqrt([1+0j, 0+1j]) + >>> x.real + array([ 1. , 0.70710678]) + >>> x.real.dtype + dtype('float64') + + See Also + -------- + numpy.real : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('shape', + """ + Tuple of array dimensions. + + The shape property is usually used to get the current shape of an array, + but may also be used to reshape the array in-place by assigning a tuple of + array dimensions to it. As with `numpy.reshape`, one of the new shape + dimensions can be -1, in which case its value is inferred from the size of + the array and the remaining dimensions. Reshaping an array in-place will + fail if a copy is required. + + Examples + -------- + >>> x = np.array([1, 2, 3, 4]) + >>> x.shape + (4,) + >>> y = np.zeros((2, 3, 4)) + >>> y.shape + (2, 3, 4) + >>> y.shape = (3, 8) + >>> y + array([[ 0., 0., 0., 0., 0., 0., 0., 0.], + [ 0., 0., 0., 0., 0., 0., 0., 0.], + [ 0., 0., 0., 0., 0., 0., 0., 0.]]) + >>> y.shape = (3, 6) + Traceback (most recent call last): + File "", line 1, in + ValueError: total size of new array must be unchanged + >>> np.zeros((4,2))[::2].shape = (-1,) + Traceback (most recent call last): + File "", line 1, in + AttributeError: Incompatible shape for in-place modification. Use + `.reshape()` to make a copy with the desired shape. + + See Also + -------- + numpy.reshape : similar function + ndarray.reshape : similar method + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('size', + """ + Number of elements in the array. + + Equal to ``np.prod(a.shape)``, i.e., the product of the array's + dimensions. + + Notes + ----- + `a.size` returns a standard arbitrary precision Python integer. This + may not be the case with other methods of obtaining the same value + (like the suggested ``np.prod(a.shape)``, which returns an instance + of ``np.int_``), and may be relevant if the value is used further in + calculations that may overflow a fixed size integer type. + + Examples + -------- + >>> x = np.zeros((3, 5, 2), dtype=np.complex128) + >>> x.size + 30 + >>> np.prod(x.shape) + 30 + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('strides', + """ + Tuple of bytes to step in each dimension when traversing an array. + + The byte offset of element ``(i[0], i[1], ..., i[n])`` in an array `a` + is:: + + offset = sum(np.array(i) * a.strides) + + A more detailed explanation of strides can be found in the + "ndarray.rst" file in the NumPy reference guide. + + Notes + ----- + Imagine an array of 32-bit integers (each 4 bytes):: + + x = np.array([[0, 1, 2, 3, 4], + [5, 6, 7, 8, 9]], dtype=np.int32) + + This array is stored in memory as 40 bytes, one after the other + (known as a contiguous block of memory). The strides of an array tell + us how many bytes we have to skip in memory to move to the next position + along a certain axis. For example, we have to skip 4 bytes (1 value) to + move to the next column, but 20 bytes (5 values) to get to the same + position in the next row. As such, the strides for the array `x` will be + ``(20, 4)``. + + See Also + -------- + numpy.lib.stride_tricks.as_strided + + Examples + -------- + >>> y = np.reshape(np.arange(2*3*4), (2,3,4)) + >>> y + array([[[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]], + [[12, 13, 14, 15], + [16, 17, 18, 19], + [20, 21, 22, 23]]]) + >>> y.strides + (48, 16, 4) + >>> y[1,1,1] + 17 + >>> offset=sum(y.strides * np.array((1,1,1))) + >>> offset/y.itemsize + 17 + + >>> x = np.reshape(np.arange(5*6*7*8), (5,6,7,8)).transpose(2,3,1,0) + >>> x.strides + (32, 4, 224, 1344) + >>> i = np.array([3,5,2,2]) + >>> offset = sum(i * x.strides) + >>> x[3,5,2,2] + 813 + >>> offset / x.itemsize + 813 + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('T', + """ + The transposed array. + + Same as ``self.transpose()``. + + Examples + -------- + >>> x = np.array([[1.,2.],[3.,4.]]) + >>> x + array([[ 1., 2.], + [ 3., 4.]]) + >>> x.T + array([[ 1., 3.], + [ 2., 4.]]) + >>> x = np.array([1.,2.,3.,4.]) + >>> x + array([ 1., 2., 3., 4.]) + >>> x.T + array([ 1., 2., 3., 4.]) + + See Also + -------- + transpose + + """)) + + +############################################################################## +# +# ndarray methods +# +############################################################################## + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__array__', + """ a.__array__([dtype], /) -> reference if type unchanged, copy otherwise. + + Returns either a new reference to self if dtype is not given or a new array + of provided data type if dtype is different from the current dtype of the + array. + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_prepare__', + """a.__array_prepare__(array[, context], /) + + Returns a view of `array` with the same type as self. + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_wrap__', + """a.__array_wrap__(array[, context], /) + + Returns a view of `array` with the same type as self. + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__copy__', + """a.__copy__() + + Used if :func:`copy.copy` is called on an array. Returns a copy of the array. + + Equivalent to ``a.copy(order='K')``. + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__class_getitem__', + """a.__class_getitem__(item, /) + + Return a parametrized wrapper around the `~numpy.ndarray` type. + + .. versionadded:: 1.22 + + Returns + ------- + alias : types.GenericAlias + A parametrized `~numpy.ndarray` type. + + Examples + -------- + >>> from typing import Any + >>> import numpy as np + + >>> np.ndarray[Any, np.dtype[Any]] + numpy.ndarray[typing.Any, numpy.dtype[typing.Any]] + + Notes + ----- + This method is only available for python 3.9 and later. + + See Also + -------- + :pep:`585` : Type hinting generics in standard collections. + numpy.typing.NDArray : An ndarray alias :term:`generic ` + w.r.t. its `dtype.type `. + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__deepcopy__', + """a.__deepcopy__(memo, /) -> Deep copy of array. + + Used if :func:`copy.deepcopy` is called on an array. + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__reduce__', + """a.__reduce__() + + For pickling. + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__setstate__', + """a.__setstate__(state, /) + + For unpickling. + + The `state` argument must be a sequence that contains the following + elements: + + Parameters + ---------- + version : int + optional pickle version. If omitted defaults to 0. + shape : tuple + dtype : data-type + isFortran : bool + rawdata : string or list + a binary string with the data (or a list if 'a' is an object array) + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('all', + """ + a.all(axis=None, out=None, keepdims=False, *, where=True) + + Returns True if all elements evaluate to True. + + Refer to `numpy.all` for full documentation. + + See Also + -------- + numpy.all : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('any', + """ + a.any(axis=None, out=None, keepdims=False, *, where=True) + + Returns True if any of the elements of `a` evaluate to True. + + Refer to `numpy.any` for full documentation. + + See Also + -------- + numpy.any : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('argmax', + """ + a.argmax(axis=None, out=None) + + Return indices of the maximum values along the given axis. + + Refer to `numpy.argmax` for full documentation. + + See Also + -------- + numpy.argmax : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('argmin', + """ + a.argmin(axis=None, out=None) + + Return indices of the minimum values along the given axis. + + Refer to `numpy.argmin` for detailed documentation. + + See Also + -------- + numpy.argmin : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('argsort', + """ + a.argsort(axis=-1, kind=None, order=None) + + Returns the indices that would sort this array. + + Refer to `numpy.argsort` for full documentation. + + See Also + -------- + numpy.argsort : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('argpartition', + """ + a.argpartition(kth, axis=-1, kind='introselect', order=None) + + Returns the indices that would partition this array. + + Refer to `numpy.argpartition` for full documentation. + + .. versionadded:: 1.8.0 + + See Also + -------- + numpy.argpartition : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('astype', + """ + a.astype(dtype, order='K', casting='unsafe', subok=True, copy=True) + + Copy of the array, cast to a specified type. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + order : {'C', 'F', 'A', 'K'}, optional + Controls the memory layout order of the result. + 'C' means C order, 'F' means Fortran order, 'A' + means 'F' order if all the arrays are Fortran contiguous, + 'C' order otherwise, and 'K' means as close to the + order the array elements appear in memory as possible. + Default is 'K'. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + Controls what kind of data casting may occur. Defaults to 'unsafe' + for backwards compatibility. + + * 'no' means the data types should not be cast at all. + * 'equiv' means only byte-order changes are allowed. + * 'safe' means only casts which can preserve values are allowed. + * 'same_kind' means only safe casts or casts within a kind, + like float64 to float32, are allowed. + * 'unsafe' means any data conversions may be done. + subok : bool, optional + If True, then sub-classes will be passed-through (default), otherwise + the returned array will be forced to be a base-class array. + copy : bool, optional + By default, astype always returns a newly allocated array. If this + is set to false, and the `dtype`, `order`, and `subok` + requirements are satisfied, the input array is returned instead + of a copy. + + Returns + ------- + arr_t : ndarray + Unless `copy` is False and the other conditions for returning the input + array are satisfied (see description for `copy` input parameter), `arr_t` + is a new array of the same shape as the input array, with dtype, order + given by `dtype`, `order`. + + Notes + ----- + .. versionchanged:: 1.17.0 + Casting between a simple data type and a structured one is possible only + for "unsafe" casting. Casting to multiple fields is allowed, but + casting from multiple fields is not. + + .. versionchanged:: 1.9.0 + Casting from numeric to string types in 'safe' casting mode requires + that the string dtype length is long enough to store the max + integer/float value converted. + + Raises + ------ + ComplexWarning + When casting from complex to float or int. To avoid this, + one should use ``a.real.astype(t)``. + + Examples + -------- + >>> x = np.array([1, 2, 2.5]) + >>> x + array([1. , 2. , 2.5]) + + >>> x.astype(int) + array([1, 2, 2]) + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('byteswap', + """ + a.byteswap(inplace=False) + + Swap the bytes of the array elements + + Toggle between low-endian and big-endian data representation by + returning a byteswapped array, optionally swapped in-place. + Arrays of byte-strings are not swapped. The real and imaginary + parts of a complex number are swapped individually. + + Parameters + ---------- + inplace : bool, optional + If ``True``, swap bytes in-place, default is ``False``. + + Returns + ------- + out : ndarray + The byteswapped array. If `inplace` is ``True``, this is + a view to self. + + Examples + -------- + >>> A = np.array([1, 256, 8755], dtype=np.int16) + >>> list(map(hex, A)) + ['0x1', '0x100', '0x2233'] + >>> A.byteswap(inplace=True) + array([ 256, 1, 13090], dtype=int16) + >>> list(map(hex, A)) + ['0x100', '0x1', '0x3322'] + + Arrays of byte-strings are not swapped + + >>> A = np.array([b'ceg', b'fac']) + >>> A.byteswap() + array([b'ceg', b'fac'], dtype='|S3') + + ``A.newbyteorder().byteswap()`` produces an array with the same values + but different representation in memory + + >>> A = np.array([1, 2, 3]) + >>> A.view(np.uint8) + array([1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, + 0, 0], dtype=uint8) + >>> A.newbyteorder().byteswap(inplace=True) + array([1, 2, 3]) + >>> A.view(np.uint8) + array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, + 0, 3], dtype=uint8) + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('choose', + """ + a.choose(choices, out=None, mode='raise') + + Use an index array to construct a new array from a set of choices. + + Refer to `numpy.choose` for full documentation. + + See Also + -------- + numpy.choose : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('clip', + """ + a.clip(min=None, max=None, out=None, **kwargs) + + Return an array whose values are limited to ``[min, max]``. + One of max or min must be given. + + Refer to `numpy.clip` for full documentation. + + See Also + -------- + numpy.clip : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('compress', + """ + a.compress(condition, axis=None, out=None) + + Return selected slices of this array along given axis. + + Refer to `numpy.compress` for full documentation. + + See Also + -------- + numpy.compress : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('conj', + """ + a.conj() + + Complex-conjugate all elements. + + Refer to `numpy.conjugate` for full documentation. + + See Also + -------- + numpy.conjugate : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('conjugate', + """ + a.conjugate() + + Return the complex conjugate, element-wise. + + Refer to `numpy.conjugate` for full documentation. + + See Also + -------- + numpy.conjugate : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('copy', + """ + a.copy(order='C') + + Return a copy of the array. + + Parameters + ---------- + order : {'C', 'F', 'A', 'K'}, optional + Controls the memory layout of the copy. 'C' means C-order, + 'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous, + 'C' otherwise. 'K' means match the layout of `a` as closely + as possible. (Note that this function and :func:`numpy.copy` are very + similar but have different default values for their order= + arguments, and this function always passes sub-classes through.) + + See also + -------- + numpy.copy : Similar function with different default behavior + numpy.copyto + + Notes + ----- + This function is the preferred method for creating an array copy. The + function :func:`numpy.copy` is similar, but it defaults to using order 'K', + and will not pass sub-classes through by default. + + Examples + -------- + >>> x = np.array([[1,2,3],[4,5,6]], order='F') + + >>> y = x.copy() + + >>> x.fill(0) + + >>> x + array([[0, 0, 0], + [0, 0, 0]]) + + >>> y + array([[1, 2, 3], + [4, 5, 6]]) + + >>> y.flags['C_CONTIGUOUS'] + True + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('cumprod', + """ + a.cumprod(axis=None, dtype=None, out=None) + + Return the cumulative product of the elements along the given axis. + + Refer to `numpy.cumprod` for full documentation. + + See Also + -------- + numpy.cumprod : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('cumsum', + """ + a.cumsum(axis=None, dtype=None, out=None) + + Return the cumulative sum of the elements along the given axis. + + Refer to `numpy.cumsum` for full documentation. + + See Also + -------- + numpy.cumsum : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('diagonal', + """ + a.diagonal(offset=0, axis1=0, axis2=1) + + Return specified diagonals. In NumPy 1.9 the returned array is a + read-only view instead of a copy as in previous NumPy versions. In + a future version the read-only restriction will be removed. + + Refer to :func:`numpy.diagonal` for full documentation. + + See Also + -------- + numpy.diagonal : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('dot')) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('dump', + """a.dump(file) + + Dump a pickle of the array to the specified file. + The array can be read back with pickle.load or numpy.load. + + Parameters + ---------- + file : str or Path + A string naming the dump file. + + .. versionchanged:: 1.17.0 + `pathlib.Path` objects are now accepted. + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('dumps', + """ + a.dumps() + + Returns the pickle of the array as a string. + pickle.loads will convert the string back to an array. + + Parameters + ---------- + None + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('fill', + """ + a.fill(value) + + Fill the array with a scalar value. + + Parameters + ---------- + value : scalar + All elements of `a` will be assigned this value. + + Examples + -------- + >>> a = np.array([1, 2]) + >>> a.fill(0) + >>> a + array([0, 0]) + >>> a = np.empty(2) + >>> a.fill(1) + >>> a + array([1., 1.]) + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('flatten', + """ + a.flatten(order='C') + + Return a copy of the array collapsed into one dimension. + + Parameters + ---------- + order : {'C', 'F', 'A', 'K'}, optional + 'C' means to flatten in row-major (C-style) order. + 'F' means to flatten in column-major (Fortran- + style) order. 'A' means to flatten in column-major + order if `a` is Fortran *contiguous* in memory, + row-major order otherwise. 'K' means to flatten + `a` in the order the elements occur in memory. + The default is 'C'. + + Returns + ------- + y : ndarray + A copy of the input array, flattened to one dimension. + + See Also + -------- + ravel : Return a flattened array. + flat : A 1-D flat iterator over the array. + + Examples + -------- + >>> a = np.array([[1,2], [3,4]]) + >>> a.flatten() + array([1, 2, 3, 4]) + >>> a.flatten('F') + array([1, 3, 2, 4]) + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('getfield', + """ + a.getfield(dtype, offset=0) + + Returns a field of the given array as a certain type. + + A field is a view of the array data with a given data-type. The values in + the view are determined by the given type and the offset into the current + array in bytes. The offset needs to be such that the view dtype fits in the + array dtype; for example an array of dtype complex128 has 16-byte elements. + If taking a view with a 32-bit integer (4 bytes), the offset needs to be + between 0 and 12 bytes. + + Parameters + ---------- + dtype : str or dtype + The data type of the view. The dtype size of the view can not be larger + than that of the array itself. + offset : int + Number of bytes to skip before beginning the element view. + + Examples + -------- + >>> x = np.diag([1.+1.j]*2) + >>> x[1, 1] = 2 + 4.j + >>> x + array([[1.+1.j, 0.+0.j], + [0.+0.j, 2.+4.j]]) + >>> x.getfield(np.float64) + array([[1., 0.], + [0., 2.]]) + + By choosing an offset of 8 bytes we can select the complex part of the + array for our view: + + >>> x.getfield(np.float64, offset=8) + array([[1., 0.], + [0., 4.]]) + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('item', + """ + a.item(*args) + + Copy an element of an array to a standard Python scalar and return it. + + Parameters + ---------- + \\*args : Arguments (variable number and type) + + * none: in this case, the method only works for arrays + with one element (`a.size == 1`), which element is + copied into a standard Python scalar object and returned. + + * int_type: this argument is interpreted as a flat index into + the array, specifying which element to copy and return. + + * tuple of int_types: functions as does a single int_type argument, + except that the argument is interpreted as an nd-index into the + array. + + Returns + ------- + z : Standard Python scalar object + A copy of the specified element of the array as a suitable + Python scalar + + Notes + ----- + When the data type of `a` is longdouble or clongdouble, item() returns + a scalar array object because there is no available Python scalar that + would not lose information. Void arrays return a buffer object for item(), + unless fields are defined, in which case a tuple is returned. + + `item` is very similar to a[args], except, instead of an array scalar, + a standard Python scalar is returned. This can be useful for speeding up + access to elements of the array and doing arithmetic on elements of the + array using Python's optimized math. + + Examples + -------- + >>> np.random.seed(123) + >>> x = np.random.randint(9, size=(3, 3)) + >>> x + array([[2, 2, 6], + [1, 3, 6], + [1, 0, 1]]) + >>> x.item(3) + 1 + >>> x.item(7) + 0 + >>> x.item((0, 1)) + 2 + >>> x.item((2, 2)) + 1 + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('itemset', + """ + a.itemset(*args) + + Insert scalar into an array (scalar is cast to array's dtype, if possible) + + There must be at least 1 argument, and define the last argument + as *item*. Then, ``a.itemset(*args)`` is equivalent to but faster + than ``a[args] = item``. The item should be a scalar value and `args` + must select a single item in the array `a`. + + Parameters + ---------- + \\*args : Arguments + If one argument: a scalar, only used in case `a` is of size 1. + If two arguments: the last argument is the value to be set + and must be a scalar, the first argument specifies a single array + element location. It is either an int or a tuple. + + Notes + ----- + Compared to indexing syntax, `itemset` provides some speed increase + for placing a scalar into a particular location in an `ndarray`, + if you must do this. However, generally this is discouraged: + among other problems, it complicates the appearance of the code. + Also, when using `itemset` (and `item`) inside a loop, be sure + to assign the methods to a local variable to avoid the attribute + look-up at each loop iteration. + + Examples + -------- + >>> np.random.seed(123) + >>> x = np.random.randint(9, size=(3, 3)) + >>> x + array([[2, 2, 6], + [1, 3, 6], + [1, 0, 1]]) + >>> x.itemset(4, 0) + >>> x.itemset((2, 2), 9) + >>> x + array([[2, 2, 6], + [1, 0, 6], + [1, 0, 9]]) + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('max', + """ + a.max(axis=None, out=None, keepdims=False, initial=, where=True) + + Return the maximum along a given axis. + + Refer to `numpy.amax` for full documentation. + + See Also + -------- + numpy.amax : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('mean', + """ + a.mean(axis=None, dtype=None, out=None, keepdims=False, *, where=True) + + Returns the average of the array elements along given axis. + + Refer to `numpy.mean` for full documentation. + + See Also + -------- + numpy.mean : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('min', + """ + a.min(axis=None, out=None, keepdims=False, initial=, where=True) + + Return the minimum along a given axis. + + Refer to `numpy.amin` for full documentation. + + See Also + -------- + numpy.amin : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('newbyteorder', + """ + arr.newbyteorder(new_order='S', /) + + Return the array with the same data viewed with a different byte order. + + Equivalent to:: + + arr.view(arr.dtype.newbytorder(new_order)) + + Changes are also made in all fields and sub-arrays of the array data + type. + + + + Parameters + ---------- + new_order : string, optional + Byte order to force; a value from the byte order specifications + below. `new_order` codes can be any of: + + * 'S' - swap dtype from current to opposite endian + * {'<', 'little'} - little endian + * {'>', 'big'} - big endian + * {'=', 'native'} - native order, equivalent to `sys.byteorder` + * {'|', 'I'} - ignore (no change to byte order) + + The default value ('S') results in swapping the current + byte order. + + + Returns + ------- + new_arr : array + New array object with the dtype reflecting given change to the + byte order. + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('nonzero', + """ + a.nonzero() + + Return the indices of the elements that are non-zero. + + Refer to `numpy.nonzero` for full documentation. + + See Also + -------- + numpy.nonzero : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('prod', + """ + a.prod(axis=None, dtype=None, out=None, keepdims=False, initial=1, where=True) + + Return the product of the array elements over the given axis + + Refer to `numpy.prod` for full documentation. + + See Also + -------- + numpy.prod : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('ptp', + """ + a.ptp(axis=None, out=None, keepdims=False) + + Peak to peak (maximum - minimum) value along a given axis. + + Refer to `numpy.ptp` for full documentation. + + See Also + -------- + numpy.ptp : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('put', + """ + a.put(indices, values, mode='raise') + + Set ``a.flat[n] = values[n]`` for all `n` in indices. + + Refer to `numpy.put` for full documentation. + + See Also + -------- + numpy.put : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('ravel', + """ + a.ravel([order]) + + Return a flattened array. + + Refer to `numpy.ravel` for full documentation. + + See Also + -------- + numpy.ravel : equivalent function + + ndarray.flat : a flat iterator on the array. + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('repeat', + """ + a.repeat(repeats, axis=None) + + Repeat elements of an array. + + Refer to `numpy.repeat` for full documentation. + + See Also + -------- + numpy.repeat : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('reshape', + """ + a.reshape(shape, order='C') + + Returns an array containing the same data with a new shape. + + Refer to `numpy.reshape` for full documentation. + + See Also + -------- + numpy.reshape : equivalent function + + Notes + ----- + Unlike the free function `numpy.reshape`, this method on `ndarray` allows + the elements of the shape parameter to be passed in as separate arguments. + For example, ``a.reshape(10, 11)`` is equivalent to + ``a.reshape((10, 11))``. + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('resize', + """ + a.resize(new_shape, refcheck=True) + + Change shape and size of array in-place. + + Parameters + ---------- + new_shape : tuple of ints, or `n` ints + Shape of resized array. + refcheck : bool, optional + If False, reference count will not be checked. Default is True. + + Returns + ------- + None + + Raises + ------ + ValueError + If `a` does not own its own data or references or views to it exist, + and the data memory must be changed. + PyPy only: will always raise if the data memory must be changed, since + there is no reliable way to determine if references or views to it + exist. + + SystemError + If the `order` keyword argument is specified. This behaviour is a + bug in NumPy. + + See Also + -------- + resize : Return a new array with the specified shape. + + Notes + ----- + This reallocates space for the data area if necessary. + + Only contiguous arrays (data elements consecutive in memory) can be + resized. + + The purpose of the reference count check is to make sure you + do not use this array as a buffer for another Python object and then + reallocate the memory. However, reference counts can increase in + other ways so if you are sure that you have not shared the memory + for this array with another Python object, then you may safely set + `refcheck` to False. + + Examples + -------- + Shrinking an array: array is flattened (in the order that the data are + stored in memory), resized, and reshaped: + + >>> a = np.array([[0, 1], [2, 3]], order='C') + >>> a.resize((2, 1)) + >>> a + array([[0], + [1]]) + + >>> a = np.array([[0, 1], [2, 3]], order='F') + >>> a.resize((2, 1)) + >>> a + array([[0], + [2]]) + + Enlarging an array: as above, but missing entries are filled with zeros: + + >>> b = np.array([[0, 1], [2, 3]]) + >>> b.resize(2, 3) # new_shape parameter doesn't have to be a tuple + >>> b + array([[0, 1, 2], + [3, 0, 0]]) + + Referencing an array prevents resizing... + + >>> c = a + >>> a.resize((1, 1)) + Traceback (most recent call last): + ... + ValueError: cannot resize an array that references or is referenced ... + + Unless `refcheck` is False: + + >>> a.resize((1, 1), refcheck=False) + >>> a + array([[0]]) + >>> c + array([[0]]) + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('round', + """ + a.round(decimals=0, out=None) + + Return `a` with each element rounded to the given number of decimals. + + Refer to `numpy.around` for full documentation. + + See Also + -------- + numpy.around : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('searchsorted', + """ + a.searchsorted(v, side='left', sorter=None) + + Find indices where elements of v should be inserted in a to maintain order. + + For full documentation, see `numpy.searchsorted` + + See Also + -------- + numpy.searchsorted : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('setfield', + """ + a.setfield(val, dtype, offset=0) + + Put a value into a specified place in a field defined by a data-type. + + Place `val` into `a`'s field defined by `dtype` and beginning `offset` + bytes into the field. + + Parameters + ---------- + val : object + Value to be placed in field. + dtype : dtype object + Data-type of the field in which to place `val`. + offset : int, optional + The number of bytes into the field at which to place `val`. + + Returns + ------- + None + + See Also + -------- + getfield + + Examples + -------- + >>> x = np.eye(3) + >>> x.getfield(np.float64) + array([[1., 0., 0.], + [0., 1., 0.], + [0., 0., 1.]]) + >>> x.setfield(3, np.int32) + >>> x.getfield(np.int32) + array([[3, 3, 3], + [3, 3, 3], + [3, 3, 3]], dtype=int32) + >>> x + array([[1.0e+000, 1.5e-323, 1.5e-323], + [1.5e-323, 1.0e+000, 1.5e-323], + [1.5e-323, 1.5e-323, 1.0e+000]]) + >>> x.setfield(np.eye(3), np.int32) + >>> x + array([[1., 0., 0.], + [0., 1., 0.], + [0., 0., 1.]]) + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('setflags', + """ + a.setflags(write=None, align=None, uic=None) + + Set array flags WRITEABLE, ALIGNED, (WRITEBACKIFCOPY and UPDATEIFCOPY), + respectively. + + These Boolean-valued flags affect how numpy interprets the memory + area used by `a` (see Notes below). The ALIGNED flag can only + be set to True if the data is actually aligned according to the type. + The WRITEBACKIFCOPY and (deprecated) UPDATEIFCOPY flags can never be set + to True. The flag WRITEABLE can only be set to True if the array owns its + own memory, or the ultimate owner of the memory exposes a writeable buffer + interface, or is a string. (The exception for string is made so that + unpickling can be done without copying memory.) + + Parameters + ---------- + write : bool, optional + Describes whether or not `a` can be written to. + align : bool, optional + Describes whether or not `a` is aligned properly for its type. + uic : bool, optional + Describes whether or not `a` is a copy of another "base" array. + + Notes + ----- + Array flags provide information about how the memory area used + for the array is to be interpreted. There are 7 Boolean flags + in use, only four of which can be changed by the user: + WRITEBACKIFCOPY, UPDATEIFCOPY, WRITEABLE, and ALIGNED. + + WRITEABLE (W) the data area can be written to; + + ALIGNED (A) the data and strides are aligned appropriately for the hardware + (as determined by the compiler); + + UPDATEIFCOPY (U) (deprecated), replaced by WRITEBACKIFCOPY; + + WRITEBACKIFCOPY (X) this array is a copy of some other array (referenced + by .base). When the C-API function PyArray_ResolveWritebackIfCopy is + called, the base array will be updated with the contents of this array. + + All flags can be accessed using the single (upper case) letter as well + as the full name. + + Examples + -------- + >>> y = np.array([[3, 1, 7], + ... [2, 0, 0], + ... [8, 5, 9]]) + >>> y + array([[3, 1, 7], + [2, 0, 0], + [8, 5, 9]]) + >>> y.flags + C_CONTIGUOUS : True + F_CONTIGUOUS : False + OWNDATA : True + WRITEABLE : True + ALIGNED : True + WRITEBACKIFCOPY : False + UPDATEIFCOPY : False + >>> y.setflags(write=0, align=0) + >>> y.flags + C_CONTIGUOUS : True + F_CONTIGUOUS : False + OWNDATA : True + WRITEABLE : False + ALIGNED : False + WRITEBACKIFCOPY : False + UPDATEIFCOPY : False + >>> y.setflags(uic=1) + Traceback (most recent call last): + File "", line 1, in + ValueError: cannot set WRITEBACKIFCOPY flag to True + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('sort', + """ + a.sort(axis=-1, kind=None, order=None) + + Sort an array in-place. Refer to `numpy.sort` for full documentation. + + Parameters + ---------- + axis : int, optional + Axis along which to sort. Default is -1, which means sort along the + last axis. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional + Sorting algorithm. The default is 'quicksort'. Note that both 'stable' + and 'mergesort' use timsort under the covers and, in general, the + actual implementation will vary with datatype. The 'mergesort' option + is retained for backwards compatibility. + + .. versionchanged:: 1.15.0 + The 'stable' option was added. + + order : str or list of str, optional + When `a` is an array with fields defined, this argument specifies + which fields to compare first, second, etc. A single field can + be specified as a string, and not all fields need be specified, + but unspecified fields will still be used, in the order in which + they come up in the dtype, to break ties. + + See Also + -------- + numpy.sort : Return a sorted copy of an array. + numpy.argsort : Indirect sort. + numpy.lexsort : Indirect stable sort on multiple keys. + numpy.searchsorted : Find elements in sorted array. + numpy.partition: Partial sort. + + Notes + ----- + See `numpy.sort` for notes on the different sorting algorithms. + + Examples + -------- + >>> a = np.array([[1,4], [3,1]]) + >>> a.sort(axis=1) + >>> a + array([[1, 4], + [1, 3]]) + >>> a.sort(axis=0) + >>> a + array([[1, 3], + [1, 4]]) + + Use the `order` keyword to specify a field to use when sorting a + structured array: + + >>> a = np.array([('a', 2), ('c', 1)], dtype=[('x', 'S1'), ('y', int)]) + >>> a.sort(order='y') + >>> a + array([(b'c', 1), (b'a', 2)], + dtype=[('x', 'S1'), ('y', '>> a = np.array([3, 4, 2, 1]) + >>> a.partition(3) + >>> a + array([2, 1, 3, 4]) + + >>> a.partition((1, 3)) + >>> a + array([1, 2, 3, 4]) + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('squeeze', + """ + a.squeeze(axis=None) + + Remove axes of length one from `a`. + + Refer to `numpy.squeeze` for full documentation. + + See Also + -------- + numpy.squeeze : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('std', + """ + a.std(axis=None, dtype=None, out=None, ddof=0, keepdims=False, *, where=True) + + Returns the standard deviation of the array elements along given axis. + + Refer to `numpy.std` for full documentation. + + See Also + -------- + numpy.std : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('sum', + """ + a.sum(axis=None, dtype=None, out=None, keepdims=False, initial=0, where=True) + + Return the sum of the array elements over the given axis. + + Refer to `numpy.sum` for full documentation. + + See Also + -------- + numpy.sum : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('swapaxes', + """ + a.swapaxes(axis1, axis2) + + Return a view of the array with `axis1` and `axis2` interchanged. + + Refer to `numpy.swapaxes` for full documentation. + + See Also + -------- + numpy.swapaxes : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('take', + """ + a.take(indices, axis=None, out=None, mode='raise') + + Return an array formed from the elements of `a` at the given indices. + + Refer to `numpy.take` for full documentation. + + See Also + -------- + numpy.take : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('tofile', + """ + a.tofile(fid, sep="", format="%s") + + Write array to a file as text or binary (default). + + Data is always written in 'C' order, independent of the order of `a`. + The data produced by this method can be recovered using the function + fromfile(). + + Parameters + ---------- + fid : file or str or Path + An open file object, or a string containing a filename. + + .. versionchanged:: 1.17.0 + `pathlib.Path` objects are now accepted. + + sep : str + Separator between array items for text output. + If "" (empty), a binary file is written, equivalent to + ``file.write(a.tobytes())``. + format : str + Format string for text file output. + Each entry in the array is formatted to text by first converting + it to the closest Python type, and then using "format" % item. + + Notes + ----- + This is a convenience function for quick storage of array data. + Information on endianness and precision is lost, so this method is not a + good choice for files intended to archive data or transport data between + machines with different endianness. Some of these problems can be overcome + by outputting the data as text files, at the expense of speed and file + size. + + When fid is a file object, array contents are directly written to the + file, bypassing the file object's ``write`` method. As a result, tofile + cannot be used with files objects supporting compression (e.g., GzipFile) + or file-like objects that do not support ``fileno()`` (e.g., BytesIO). + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('tolist', + """ + a.tolist() + + Return the array as an ``a.ndim``-levels deep nested list of Python scalars. + + Return a copy of the array data as a (nested) Python list. + Data items are converted to the nearest compatible builtin Python type, via + the `~numpy.ndarray.item` function. + + If ``a.ndim`` is 0, then since the depth of the nested list is 0, it will + not be a list at all, but a simple Python scalar. + + Parameters + ---------- + none + + Returns + ------- + y : object, or list of object, or list of list of object, or ... + The possibly nested list of array elements. + + Notes + ----- + The array may be recreated via ``a = np.array(a.tolist())``, although this + may sometimes lose precision. + + Examples + -------- + For a 1D array, ``a.tolist()`` is almost the same as ``list(a)``, + except that ``tolist`` changes numpy scalars to Python scalars: + + >>> a = np.uint32([1, 2]) + >>> a_list = list(a) + >>> a_list + [1, 2] + >>> type(a_list[0]) + + >>> a_tolist = a.tolist() + >>> a_tolist + [1, 2] + >>> type(a_tolist[0]) + + + Additionally, for a 2D array, ``tolist`` applies recursively: + + >>> a = np.array([[1, 2], [3, 4]]) + >>> list(a) + [array([1, 2]), array([3, 4])] + >>> a.tolist() + [[1, 2], [3, 4]] + + The base case for this recursion is a 0D array: + + >>> a = np.array(1) + >>> list(a) + Traceback (most recent call last): + ... + TypeError: iteration over a 0-d array + >>> a.tolist() + 1 + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('tobytes', """ + a.tobytes(order='C') + + Construct Python bytes containing the raw data bytes in the array. + + Constructs Python bytes showing a copy of the raw contents of + data memory. The bytes object is produced in C-order by default. + This behavior is controlled by the ``order`` parameter. + + .. versionadded:: 1.9.0 + + Parameters + ---------- + order : {'C', 'F', 'A'}, optional + Controls the memory layout of the bytes object. 'C' means C-order, + 'F' means F-order, 'A' (short for *Any*) means 'F' if `a` is + Fortran contiguous, 'C' otherwise. Default is 'C'. + + Returns + ------- + s : bytes + Python bytes exhibiting a copy of `a`'s raw data. + + Examples + -------- + >>> x = np.array([[0, 1], [2, 3]], dtype='>> x.tobytes() + b'\\x00\\x00\\x01\\x00\\x02\\x00\\x03\\x00' + >>> x.tobytes('C') == x.tobytes() + True + >>> x.tobytes('F') + b'\\x00\\x00\\x02\\x00\\x01\\x00\\x03\\x00' + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('tostring', r""" + a.tostring(order='C') + + A compatibility alias for `tobytes`, with exactly the same behavior. + + Despite its name, it returns `bytes` not `str`\ s. + + .. deprecated:: 1.19.0 + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('trace', + """ + a.trace(offset=0, axis1=0, axis2=1, dtype=None, out=None) + + Return the sum along diagonals of the array. + + Refer to `numpy.trace` for full documentation. + + See Also + -------- + numpy.trace : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('transpose', + """ + a.transpose(*axes) + + Returns a view of the array with axes transposed. + + For a 1-D array this has no effect, as a transposed vector is simply the + same vector. To convert a 1-D array into a 2D column vector, an additional + dimension must be added. `np.atleast2d(a).T` achieves this, as does + `a[:, np.newaxis]`. + For a 2-D array, this is a standard matrix transpose. + For an n-D array, if axes are given, their order indicates how the + axes are permuted (see Examples). If axes are not provided and + ``a.shape = (i[0], i[1], ... i[n-2], i[n-1])``, then + ``a.transpose().shape = (i[n-1], i[n-2], ... i[1], i[0])``. + + Parameters + ---------- + axes : None, tuple of ints, or `n` ints + + * None or no argument: reverses the order of the axes. + + * tuple of ints: `i` in the `j`-th place in the tuple means `a`'s + `i`-th axis becomes `a.transpose()`'s `j`-th axis. + + * `n` ints: same as an n-tuple of the same ints (this form is + intended simply as a "convenience" alternative to the tuple form) + + Returns + ------- + out : ndarray + View of `a`, with axes suitably permuted. + + See Also + -------- + transpose : Equivalent function + ndarray.T : Array property returning the array transposed. + ndarray.reshape : Give a new shape to an array without changing its data. + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> a + array([[1, 2], + [3, 4]]) + >>> a.transpose() + array([[1, 3], + [2, 4]]) + >>> a.transpose((1, 0)) + array([[1, 3], + [2, 4]]) + >>> a.transpose(1, 0) + array([[1, 3], + [2, 4]]) + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('var', + """ + a.var(axis=None, dtype=None, out=None, ddof=0, keepdims=False, *, where=True) + + Returns the variance of the array elements, along given axis. + + Refer to `numpy.var` for full documentation. + + See Also + -------- + numpy.var : equivalent function + + """)) + + +add_newdoc('numpy.core.multiarray', 'ndarray', ('view', + """ + a.view([dtype][, type]) + + New view of array with the same data. + + .. note:: + Passing None for ``dtype`` is different from omitting the parameter, + since the former invokes ``dtype(None)`` which is an alias for + ``dtype('float_')``. + + Parameters + ---------- + dtype : data-type or ndarray sub-class, optional + Data-type descriptor of the returned view, e.g., float32 or int16. + Omitting it results in the view having the same data-type as `a`. + This argument can also be specified as an ndarray sub-class, which + then specifies the type of the returned object (this is equivalent to + setting the ``type`` parameter). + type : Python type, optional + Type of the returned view, e.g., ndarray or matrix. Again, omission + of the parameter results in type preservation. + + Notes + ----- + ``a.view()`` is used two different ways: + + ``a.view(some_dtype)`` or ``a.view(dtype=some_dtype)`` constructs a view + of the array's memory with a different data-type. This can cause a + reinterpretation of the bytes of memory. + + ``a.view(ndarray_subclass)`` or ``a.view(type=ndarray_subclass)`` just + returns an instance of `ndarray_subclass` that looks at the same array + (same shape, dtype, etc.) This does not cause a reinterpretation of the + memory. + + For ``a.view(some_dtype)``, if ``some_dtype`` has a different number of + bytes per entry than the previous dtype (for example, converting a + regular array to a structured array), then the behavior of the view + cannot be predicted just from the superficial appearance of ``a`` (shown + by ``print(a)``). It also depends on exactly how ``a`` is stored in + memory. Therefore if ``a`` is C-ordered versus fortran-ordered, versus + defined as a slice or transpose, etc., the view may give different + results. + + + Examples + -------- + >>> x = np.array([(1, 2)], dtype=[('a', np.int8), ('b', np.int8)]) + + Viewing array data using a different type and dtype: + + >>> y = x.view(dtype=np.int16, type=np.matrix) + >>> y + matrix([[513]], dtype=int16) + >>> print(type(y)) + + + Creating a view on a structured array so it can be used in calculations + + >>> x = np.array([(1, 2),(3,4)], dtype=[('a', np.int8), ('b', np.int8)]) + >>> xv = x.view(dtype=np.int8).reshape(-1,2) + >>> xv + array([[1, 2], + [3, 4]], dtype=int8) + >>> xv.mean(0) + array([2., 3.]) + + Making changes to the view changes the underlying array + + >>> xv[0,1] = 20 + >>> x + array([(1, 20), (3, 4)], dtype=[('a', 'i1'), ('b', 'i1')]) + + Using a view to convert an array to a recarray: + + >>> z = x.view(np.recarray) + >>> z.a + array([1, 3], dtype=int8) + + Views share data: + + >>> x[0] = (9, 10) + >>> z[0] + (9, 10) + + Views that change the dtype size (bytes per entry) should normally be + avoided on arrays defined by slices, transposes, fortran-ordering, etc.: + + >>> x = np.array([[1,2,3],[4,5,6]], dtype=np.int16) + >>> y = x[:, 0:2] + >>> y + array([[1, 2], + [4, 5]], dtype=int16) + >>> y.view(dtype=[('width', np.int16), ('length', np.int16)]) + Traceback (most recent call last): + ... + ValueError: To change to a dtype of a different size, the array must be C-contiguous + >>> z = y.copy() + >>> z.view(dtype=[('width', np.int16), ('length', np.int16)]) + array([[(1, 2)], + [(4, 5)]], dtype=[('width', '>> oct_array = np.frompyfunc(oct, 1, 1) + >>> oct_array(np.array((10, 30, 100))) + array(['0o12', '0o36', '0o144'], dtype=object) + >>> np.array((oct(10), oct(30), oct(100))) # for comparison + array(['0o12', '0o36', '0o144'], dtype='>> np.geterrobj() # first get the defaults + [8192, 521, None] + + >>> def err_handler(type, flag): + ... print("Floating point error (%s), with flag %s" % (type, flag)) + ... + >>> old_bufsize = np.setbufsize(20000) + >>> old_err = np.seterr(divide='raise') + >>> old_handler = np.seterrcall(err_handler) + >>> np.geterrobj() + [8192, 521, ] + + >>> old_err = np.seterr(all='ignore') + >>> np.base_repr(np.geterrobj()[1], 8) + '0' + >>> old_err = np.seterr(divide='warn', over='log', under='call', + ... invalid='print') + >>> np.base_repr(np.geterrobj()[1], 8) + '4351' + + """) + +add_newdoc('numpy.core.umath', 'seterrobj', + """ + seterrobj(errobj, /) + + Set the object that defines floating-point error handling. + + The error object contains all information that defines the error handling + behavior in NumPy. `seterrobj` is used internally by the other + functions that set error handling behavior (`seterr`, `seterrcall`). + + Parameters + ---------- + errobj : list + The error object, a list containing three elements: + [internal numpy buffer size, error mask, error callback function]. + + The error mask is a single integer that holds the treatment information + on all four floating point errors. The information for each error type + is contained in three bits of the integer. If we print it in base 8, we + can see what treatment is set for "invalid", "under", "over", and + "divide" (in that order). The printed string can be interpreted with + + * 0 : 'ignore' + * 1 : 'warn' + * 2 : 'raise' + * 3 : 'call' + * 4 : 'print' + * 5 : 'log' + + See Also + -------- + geterrobj, seterr, geterr, seterrcall, geterrcall + getbufsize, setbufsize + + Notes + ----- + For complete documentation of the types of floating-point exceptions and + treatment options, see `seterr`. + + Examples + -------- + >>> old_errobj = np.geterrobj() # first get the defaults + >>> old_errobj + [8192, 521, None] + + >>> def err_handler(type, flag): + ... print("Floating point error (%s), with flag %s" % (type, flag)) + ... + >>> new_errobj = [20000, 12, err_handler] + >>> np.seterrobj(new_errobj) + >>> np.base_repr(12, 8) # int for divide=4 ('print') and over=1 ('warn') + '14' + >>> np.geterr() + {'over': 'warn', 'divide': 'print', 'invalid': 'ignore', 'under': 'ignore'} + >>> np.geterrcall() is err_handler + True + + """) + + +############################################################################## +# +# compiled_base functions +# +############################################################################## + +add_newdoc('numpy.core.multiarray', 'add_docstring', + """ + add_docstring(obj, docstring) + + Add a docstring to a built-in obj if possible. + If the obj already has a docstring raise a RuntimeError + If this routine does not know how to add a docstring to the object + raise a TypeError + """) + +add_newdoc('numpy.core.umath', '_add_newdoc_ufunc', + """ + add_ufunc_docstring(ufunc, new_docstring) + + Replace the docstring for a ufunc with new_docstring. + This method will only work if the current docstring for + the ufunc is NULL. (At the C level, i.e. when ufunc->doc is NULL.) + + Parameters + ---------- + ufunc : numpy.ufunc + A ufunc whose current doc is NULL. + new_docstring : string + The new docstring for the ufunc. + + Notes + ----- + This method allocates memory for new_docstring on + the heap. Technically this creates a mempory leak, since this + memory will not be reclaimed until the end of the program + even if the ufunc itself is removed. However this will only + be a problem if the user is repeatedly creating ufuncs with + no documentation, adding documentation via add_newdoc_ufunc, + and then throwing away the ufunc. + """) + +add_newdoc('numpy.core.multiarray', 'get_handler_name', + """ + get_handler_name(a: ndarray) -> str,None + + Return the name of the memory handler used by `a`. If not provided, return + the name of the memory handler that will be used to allocate data for the + next `ndarray` in this context. May return None if `a` does not own its + memory, in which case you can traverse ``a.base`` for a memory handler. + """) + +add_newdoc('numpy.core.multiarray', 'get_handler_version', + """ + get_handler_version(a: ndarray) -> int,None + + Return the version of the memory handler used by `a`. If not provided, + return the version of the memory handler that will be used to allocate data + for the next `ndarray` in this context. May return None if `a` does not own + its memory, in which case you can traverse ``a.base`` for a memory handler. + """) + +add_newdoc('numpy.core.multiarray', '_set_madvise_hugepage', + """ + _set_madvise_hugepage(enabled: bool) -> bool + + Set or unset use of ``madvise (2)`` MADV_HUGEPAGE support when + allocating the array data. Returns the previously set value. + See `global_state` for more information. + """) + +add_newdoc('numpy.core._multiarray_tests', 'format_float_OSprintf_g', + """ + format_float_OSprintf_g(val, precision) + + Print a floating point scalar using the system's printf function, + equivalent to: + + printf("%.*g", precision, val); + + for half/float/double, or replacing 'g' by 'Lg' for longdouble. This + method is designed to help cross-validate the format_float_* methods. + + Parameters + ---------- + val : python float or numpy floating scalar + Value to format. + + precision : non-negative integer, optional + Precision given to printf. + + Returns + ------- + rep : string + The string representation of the floating point value + + See Also + -------- + format_float_scientific + format_float_positional + """) + + +############################################################################## +# +# Documentation for ufunc attributes and methods +# +############################################################################## + + +############################################################################## +# +# ufunc object +# +############################################################################## + +add_newdoc('numpy.core', 'ufunc', + """ + Functions that operate element by element on whole arrays. + + To see the documentation for a specific ufunc, use `info`. For + example, ``np.info(np.sin)``. Because ufuncs are written in C + (for speed) and linked into Python with NumPy's ufunc facility, + Python's help() function finds this page whenever help() is called + on a ufunc. + + A detailed explanation of ufuncs can be found in the docs for :ref:`ufuncs`. + + **Calling ufuncs:** ``op(*x[, out], where=True, **kwargs)`` + + Apply `op` to the arguments `*x` elementwise, broadcasting the arguments. + + The broadcasting rules are: + + * Dimensions of length 1 may be prepended to either array. + * Arrays may be repeated along dimensions of length 1. + + Parameters + ---------- + *x : array_like + Input arrays. + out : ndarray, None, or tuple of ndarray and None, optional + Alternate array object(s) in which to put the result; if provided, it + must have a shape that the inputs broadcast to. A tuple of arrays + (possible only as a keyword argument) must have length equal to the + number of outputs; use None for uninitialized outputs to be + allocated by the ufunc. + where : array_like, optional + This condition is broadcast over the input. At locations where the + condition is True, the `out` array will be set to the ufunc result. + Elsewhere, the `out` array will retain its original value. + Note that if an uninitialized `out` array is created via the default + ``out=None``, locations within it where the condition is False will + remain uninitialized. + **kwargs + For other keyword-only arguments, see the :ref:`ufunc docs `. + + Returns + ------- + r : ndarray or tuple of ndarray + `r` will have the shape that the arrays in `x` broadcast to; if `out` is + provided, it will be returned. If not, `r` will be allocated and + may contain uninitialized values. If the function has more than one + output, then the result will be a tuple of arrays. + + """) + + +############################################################################## +# +# ufunc attributes +# +############################################################################## + +add_newdoc('numpy.core', 'ufunc', ('identity', + """ + The identity value. + + Data attribute containing the identity element for the ufunc, if it has one. + If it does not, the attribute value is None. + + Examples + -------- + >>> np.add.identity + 0 + >>> np.multiply.identity + 1 + >>> np.power.identity + 1 + >>> print(np.exp.identity) + None + """)) + +add_newdoc('numpy.core', 'ufunc', ('nargs', + """ + The number of arguments. + + Data attribute containing the number of arguments the ufunc takes, including + optional ones. + + Notes + ----- + Typically this value will be one more than what you might expect because all + ufuncs take the optional "out" argument. + + Examples + -------- + >>> np.add.nargs + 3 + >>> np.multiply.nargs + 3 + >>> np.power.nargs + 3 + >>> np.exp.nargs + 2 + """)) + +add_newdoc('numpy.core', 'ufunc', ('nin', + """ + The number of inputs. + + Data attribute containing the number of arguments the ufunc treats as input. + + Examples + -------- + >>> np.add.nin + 2 + >>> np.multiply.nin + 2 + >>> np.power.nin + 2 + >>> np.exp.nin + 1 + """)) + +add_newdoc('numpy.core', 'ufunc', ('nout', + """ + The number of outputs. + + Data attribute containing the number of arguments the ufunc treats as output. + + Notes + ----- + Since all ufuncs can take output arguments, this will always be (at least) 1. + + Examples + -------- + >>> np.add.nout + 1 + >>> np.multiply.nout + 1 + >>> np.power.nout + 1 + >>> np.exp.nout + 1 + + """)) + +add_newdoc('numpy.core', 'ufunc', ('ntypes', + """ + The number of types. + + The number of numerical NumPy types - of which there are 18 total - on which + the ufunc can operate. + + See Also + -------- + numpy.ufunc.types + + Examples + -------- + >>> np.add.ntypes + 18 + >>> np.multiply.ntypes + 18 + >>> np.power.ntypes + 17 + >>> np.exp.ntypes + 7 + >>> np.remainder.ntypes + 14 + + """)) + +add_newdoc('numpy.core', 'ufunc', ('types', + """ + Returns a list with types grouped input->output. + + Data attribute listing the data-type "Domain-Range" groupings the ufunc can + deliver. The data-types are given using the character codes. + + See Also + -------- + numpy.ufunc.ntypes + + Examples + -------- + >>> np.add.types + ['??->?', 'bb->b', 'BB->B', 'hh->h', 'HH->H', 'ii->i', 'II->I', 'll->l', + 'LL->L', 'qq->q', 'QQ->Q', 'ff->f', 'dd->d', 'gg->g', 'FF->F', 'DD->D', + 'GG->G', 'OO->O'] + + >>> np.multiply.types + ['??->?', 'bb->b', 'BB->B', 'hh->h', 'HH->H', 'ii->i', 'II->I', 'll->l', + 'LL->L', 'qq->q', 'QQ->Q', 'ff->f', 'dd->d', 'gg->g', 'FF->F', 'DD->D', + 'GG->G', 'OO->O'] + + >>> np.power.types + ['bb->b', 'BB->B', 'hh->h', 'HH->H', 'ii->i', 'II->I', 'll->l', 'LL->L', + 'qq->q', 'QQ->Q', 'ff->f', 'dd->d', 'gg->g', 'FF->F', 'DD->D', 'GG->G', + 'OO->O'] + + >>> np.exp.types + ['f->f', 'd->d', 'g->g', 'F->F', 'D->D', 'G->G', 'O->O'] + + >>> np.remainder.types + ['bb->b', 'BB->B', 'hh->h', 'HH->H', 'ii->i', 'II->I', 'll->l', 'LL->L', + 'qq->q', 'QQ->Q', 'ff->f', 'dd->d', 'gg->g', 'OO->O'] + + """)) + +add_newdoc('numpy.core', 'ufunc', ('signature', + """ + Definition of the core elements a generalized ufunc operates on. + + The signature determines how the dimensions of each input/output array + are split into core and loop dimensions: + + 1. Each dimension in the signature is matched to a dimension of the + corresponding passed-in array, starting from the end of the shape tuple. + 2. Core dimensions assigned to the same label in the signature must have + exactly matching sizes, no broadcasting is performed. + 3. The core dimensions are removed from all inputs and the remaining + dimensions are broadcast together, defining the loop dimensions. + + Notes + ----- + Generalized ufuncs are used internally in many linalg functions, and in + the testing suite; the examples below are taken from these. + For ufuncs that operate on scalars, the signature is None, which is + equivalent to '()' for every argument. + + Examples + -------- + >>> np.core.umath_tests.matrix_multiply.signature + '(m,n),(n,p)->(m,p)' + >>> np.linalg._umath_linalg.det.signature + '(m,m)->()' + >>> np.add.signature is None + True # equivalent to '(),()->()' + """)) + +############################################################################## +# +# ufunc methods +# +############################################################################## + +add_newdoc('numpy.core', 'ufunc', ('reduce', + """ + reduce(array, axis=0, dtype=None, out=None, keepdims=False, initial=, where=True) + + Reduces `array`'s dimension by one, by applying ufunc along one axis. + + Let :math:`array.shape = (N_0, ..., N_i, ..., N_{M-1})`. Then + :math:`ufunc.reduce(array, axis=i)[k_0, ..,k_{i-1}, k_{i+1}, .., k_{M-1}]` = + the result of iterating `j` over :math:`range(N_i)`, cumulatively applying + ufunc to each :math:`array[k_0, ..,k_{i-1}, j, k_{i+1}, .., k_{M-1}]`. + For a one-dimensional array, reduce produces results equivalent to: + :: + + r = op.identity # op = ufunc + for i in range(len(A)): + r = op(r, A[i]) + return r + + For example, add.reduce() is equivalent to sum(). + + Parameters + ---------- + array : array_like + The array to act on. + axis : None or int or tuple of ints, optional + Axis or axes along which a reduction is performed. + The default (`axis` = 0) is perform a reduction over the first + dimension of the input array. `axis` may be negative, in + which case it counts from the last to the first axis. + + .. versionadded:: 1.7.0 + + If this is None, a reduction is performed over all the axes. + If this is a tuple of ints, a reduction is performed on multiple + axes, instead of a single axis or all the axes as before. + + For operations which are either not commutative or not associative, + doing a reduction over multiple axes is not well-defined. The + ufuncs do not currently raise an exception in this case, but will + likely do so in the future. + dtype : data-type code, optional + The type used to represent the intermediate results. Defaults + to the data-type of the output array if this is provided, or + the data-type of the input array if no output array is provided. + out : ndarray, None, or tuple of ndarray and None, optional + A location into which the result is stored. If not provided or None, + a freshly-allocated array is returned. For consistency with + ``ufunc.__call__``, if given as a keyword, this may be wrapped in a + 1-element tuple. + + .. versionchanged:: 1.13.0 + Tuples are allowed for keyword argument. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `array`. + + .. versionadded:: 1.7.0 + initial : scalar, optional + The value with which to start the reduction. + If the ufunc has no identity or the dtype is object, this defaults + to None - otherwise it defaults to ufunc.identity. + If ``None`` is given, the first element of the reduction is used, + and an error is thrown if the reduction is empty. + + .. versionadded:: 1.15.0 + + where : array_like of bool, optional + A boolean array which is broadcasted to match the dimensions + of `array`, and selects elements to include in the reduction. Note + that for ufuncs like ``minimum`` that do not have an identity + defined, one has to pass in also ``initial``. + + .. versionadded:: 1.17.0 + + Returns + ------- + r : ndarray + The reduced array. If `out` was supplied, `r` is a reference to it. + + Examples + -------- + >>> np.multiply.reduce([2,3,5]) + 30 + + A multi-dimensional array example: + + >>> X = np.arange(8).reshape((2,2,2)) + >>> X + array([[[0, 1], + [2, 3]], + [[4, 5], + [6, 7]]]) + >>> np.add.reduce(X, 0) + array([[ 4, 6], + [ 8, 10]]) + >>> np.add.reduce(X) # confirm: default axis value is 0 + array([[ 4, 6], + [ 8, 10]]) + >>> np.add.reduce(X, 1) + array([[ 2, 4], + [10, 12]]) + >>> np.add.reduce(X, 2) + array([[ 1, 5], + [ 9, 13]]) + + You can use the ``initial`` keyword argument to initialize the reduction + with a different value, and ``where`` to select specific elements to include: + + >>> np.add.reduce([10], initial=5) + 15 + >>> np.add.reduce(np.ones((2, 2, 2)), axis=(0, 2), initial=10) + array([14., 14.]) + >>> a = np.array([10., np.nan, 10]) + >>> np.add.reduce(a, where=~np.isnan(a)) + 20.0 + + Allows reductions of empty arrays where they would normally fail, i.e. + for ufuncs without an identity. + + >>> np.minimum.reduce([], initial=np.inf) + inf + >>> np.minimum.reduce([[1., 2.], [3., 4.]], initial=10., where=[True, False]) + array([ 1., 10.]) + >>> np.minimum.reduce([]) + Traceback (most recent call last): + ... + ValueError: zero-size array to reduction operation minimum which has no identity + """)) + +add_newdoc('numpy.core', 'ufunc', ('accumulate', + """ + accumulate(array, axis=0, dtype=None, out=None) + + Accumulate the result of applying the operator to all elements. + + For a one-dimensional array, accumulate produces results equivalent to:: + + r = np.empty(len(A)) + t = op.identity # op = the ufunc being applied to A's elements + for i in range(len(A)): + t = op(t, A[i]) + r[i] = t + return r + + For example, add.accumulate() is equivalent to np.cumsum(). + + For a multi-dimensional array, accumulate is applied along only one + axis (axis zero by default; see Examples below) so repeated use is + necessary if one wants to accumulate over multiple axes. + + Parameters + ---------- + array : array_like + The array to act on. + axis : int, optional + The axis along which to apply the accumulation; default is zero. + dtype : data-type code, optional + The data-type used to represent the intermediate results. Defaults + to the data-type of the output array if such is provided, or the + the data-type of the input array if no output array is provided. + out : ndarray, None, or tuple of ndarray and None, optional + A location into which the result is stored. If not provided or None, + a freshly-allocated array is returned. For consistency with + ``ufunc.__call__``, if given as a keyword, this may be wrapped in a + 1-element tuple. + + .. versionchanged:: 1.13.0 + Tuples are allowed for keyword argument. + + Returns + ------- + r : ndarray + The accumulated values. If `out` was supplied, `r` is a reference to + `out`. + + Examples + -------- + 1-D array examples: + + >>> np.add.accumulate([2, 3, 5]) + array([ 2, 5, 10]) + >>> np.multiply.accumulate([2, 3, 5]) + array([ 2, 6, 30]) + + 2-D array examples: + + >>> I = np.eye(2) + >>> I + array([[1., 0.], + [0., 1.]]) + + Accumulate along axis 0 (rows), down columns: + + >>> np.add.accumulate(I, 0) + array([[1., 0.], + [1., 1.]]) + >>> np.add.accumulate(I) # no axis specified = axis zero + array([[1., 0.], + [1., 1.]]) + + Accumulate along axis 1 (columns), through rows: + + >>> np.add.accumulate(I, 1) + array([[1., 1.], + [0., 1.]]) + + """)) + +add_newdoc('numpy.core', 'ufunc', ('reduceat', + """ + reduceat(array, indices, axis=0, dtype=None, out=None) + + Performs a (local) reduce with specified slices over a single axis. + + For i in ``range(len(indices))``, `reduceat` computes + ``ufunc.reduce(array[indices[i]:indices[i+1]])``, which becomes the i-th + generalized "row" parallel to `axis` in the final result (i.e., in a + 2-D array, for example, if `axis = 0`, it becomes the i-th row, but if + `axis = 1`, it becomes the i-th column). There are three exceptions to this: + + * when ``i = len(indices) - 1`` (so for the last index), + ``indices[i+1] = array.shape[axis]``. + * if ``indices[i] >= indices[i + 1]``, the i-th generalized "row" is + simply ``array[indices[i]]``. + * if ``indices[i] >= len(array)`` or ``indices[i] < 0``, an error is raised. + + The shape of the output depends on the size of `indices`, and may be + larger than `array` (this happens if ``len(indices) > array.shape[axis]``). + + Parameters + ---------- + array : array_like + The array to act on. + indices : array_like + Paired indices, comma separated (not colon), specifying slices to + reduce. + axis : int, optional + The axis along which to apply the reduceat. + dtype : data-type code, optional + The type used to represent the intermediate results. Defaults + to the data type of the output array if this is provided, or + the data type of the input array if no output array is provided. + out : ndarray, None, or tuple of ndarray and None, optional + A location into which the result is stored. If not provided or None, + a freshly-allocated array is returned. For consistency with + ``ufunc.__call__``, if given as a keyword, this may be wrapped in a + 1-element tuple. + + .. versionchanged:: 1.13.0 + Tuples are allowed for keyword argument. + + Returns + ------- + r : ndarray + The reduced values. If `out` was supplied, `r` is a reference to + `out`. + + Notes + ----- + A descriptive example: + + If `array` is 1-D, the function `ufunc.accumulate(array)` is the same as + ``ufunc.reduceat(array, indices)[::2]`` where `indices` is + ``range(len(array) - 1)`` with a zero placed + in every other element: + ``indices = zeros(2 * len(array) - 1)``, + ``indices[1::2] = range(1, len(array))``. + + Don't be fooled by this attribute's name: `reduceat(array)` is not + necessarily smaller than `array`. + + Examples + -------- + To take the running sum of four successive values: + + >>> np.add.reduceat(np.arange(8),[0,4, 1,5, 2,6, 3,7])[::2] + array([ 6, 10, 14, 18]) + + A 2-D example: + + >>> x = np.linspace(0, 15, 16).reshape(4,4) + >>> x + array([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.], + [12., 13., 14., 15.]]) + + :: + + # reduce such that the result has the following five rows: + # [row1 + row2 + row3] + # [row4] + # [row2] + # [row3] + # [row1 + row2 + row3 + row4] + + >>> np.add.reduceat(x, [0, 3, 1, 2, 0]) + array([[12., 15., 18., 21.], + [12., 13., 14., 15.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.], + [24., 28., 32., 36.]]) + + :: + + # reduce such that result has the following two columns: + # [col1 * col2 * col3, col4] + + >>> np.multiply.reduceat(x, [0, 3], 1) + array([[ 0., 3.], + [ 120., 7.], + [ 720., 11.], + [2184., 15.]]) + + """)) + +add_newdoc('numpy.core', 'ufunc', ('outer', + r""" + outer(A, B, /, **kwargs) + + Apply the ufunc `op` to all pairs (a, b) with a in `A` and b in `B`. + + Let ``M = A.ndim``, ``N = B.ndim``. Then the result, `C`, of + ``op.outer(A, B)`` is an array of dimension M + N such that: + + .. math:: C[i_0, ..., i_{M-1}, j_0, ..., j_{N-1}] = + op(A[i_0, ..., i_{M-1}], B[j_0, ..., j_{N-1}]) + + For `A` and `B` one-dimensional, this is equivalent to:: + + r = empty(len(A),len(B)) + for i in range(len(A)): + for j in range(len(B)): + r[i,j] = op(A[i], B[j]) # op = ufunc in question + + Parameters + ---------- + A : array_like + First array + B : array_like + Second array + kwargs : any + Arguments to pass on to the ufunc. Typically `dtype` or `out`. + See `ufunc` for a comprehensive overview of all available arguments. + + Returns + ------- + r : ndarray + Output array + + See Also + -------- + numpy.outer : A less powerful version of ``np.multiply.outer`` + that `ravel`\ s all inputs to 1D. This exists + primarily for compatibility with old code. + + tensordot : ``np.tensordot(a, b, axes=((), ()))`` and + ``np.multiply.outer(a, b)`` behave same for all + dimensions of a and b. + + Examples + -------- + >>> np.multiply.outer([1, 2, 3], [4, 5, 6]) + array([[ 4, 5, 6], + [ 8, 10, 12], + [12, 15, 18]]) + + A multi-dimensional example: + + >>> A = np.array([[1, 2, 3], [4, 5, 6]]) + >>> A.shape + (2, 3) + >>> B = np.array([[1, 2, 3, 4]]) + >>> B.shape + (1, 4) + >>> C = np.multiply.outer(A, B) + >>> C.shape; C + (2, 3, 1, 4) + array([[[[ 1, 2, 3, 4]], + [[ 2, 4, 6, 8]], + [[ 3, 6, 9, 12]]], + [[[ 4, 8, 12, 16]], + [[ 5, 10, 15, 20]], + [[ 6, 12, 18, 24]]]]) + + """)) + +add_newdoc('numpy.core', 'ufunc', ('at', + """ + at(a, indices, b=None, /) + + Performs unbuffered in place operation on operand 'a' for elements + specified by 'indices'. For addition ufunc, this method is equivalent to + ``a[indices] += b``, except that results are accumulated for elements that + are indexed more than once. For example, ``a[[0,0]] += 1`` will only + increment the first element once because of buffering, whereas + ``add.at(a, [0,0], 1)`` will increment the first element twice. + + .. versionadded:: 1.8.0 + + Parameters + ---------- + a : array_like + The array to perform in place operation on. + indices : array_like or tuple + Array like index object or slice object for indexing into first + operand. If first operand has multiple dimensions, indices can be a + tuple of array like index objects or slice objects. + b : array_like + Second operand for ufuncs requiring two operands. Operand must be + broadcastable over first operand after indexing or slicing. + + Examples + -------- + Set items 0 and 1 to their negative values: + + >>> a = np.array([1, 2, 3, 4]) + >>> np.negative.at(a, [0, 1]) + >>> a + array([-1, -2, 3, 4]) + + Increment items 0 and 1, and increment item 2 twice: + + >>> a = np.array([1, 2, 3, 4]) + >>> np.add.at(a, [0, 1, 2, 2], 1) + >>> a + array([2, 3, 5, 4]) + + Add items 0 and 1 in first array to second array, + and store results in first array: + + >>> a = np.array([1, 2, 3, 4]) + >>> b = np.array([1, 2]) + >>> np.add.at(a, [0, 1], b) + >>> a + array([2, 4, 3, 4]) + + """)) + +############################################################################## +# +# Documentation for dtype attributes and methods +# +############################################################################## + +############################################################################## +# +# dtype object +# +############################################################################## + +add_newdoc('numpy.core.multiarray', 'dtype', + """ + dtype(dtype, align=False, copy=False) + + Create a data type object. + + A numpy array is homogeneous, and contains elements described by a + dtype object. A dtype object can be constructed from different + combinations of fundamental numeric types. + + Parameters + ---------- + dtype + Object to be converted to a data type object. + align : bool, optional + Add padding to the fields to match what a C compiler would output + for a similar C-struct. Can be ``True`` only if `obj` is a dictionary + or a comma-separated string. If a struct dtype is being created, + this also sets a sticky alignment flag ``isalignedstruct``. + copy : bool, optional + Make a new copy of the data-type object. If ``False``, the result + may just be a reference to a built-in data-type object. + + See also + -------- + result_type + + Examples + -------- + Using array-scalar type: + + >>> np.dtype(np.int16) + dtype('int16') + + Structured type, one field name 'f1', containing int16: + + >>> np.dtype([('f1', np.int16)]) + dtype([('f1', '>> np.dtype([('f1', [('f1', np.int16)])]) + dtype([('f1', [('f1', '>> np.dtype([('f1', np.uint64), ('f2', np.int32)]) + dtype([('f1', '>> np.dtype([('a','f8'),('b','S10')]) + dtype([('a', '>> np.dtype("i4, (2,3)f8") + dtype([('f0', '>> np.dtype([('hello',(np.int64,3)),('world',np.void,10)]) + dtype([('hello', '>> np.dtype((np.int16, {'x':(np.int8,0), 'y':(np.int8,1)})) + dtype((numpy.int16, [('x', 'i1'), ('y', 'i1')])) + + Using dictionaries. Two fields named 'gender' and 'age': + + >>> np.dtype({'names':['gender','age'], 'formats':['S1',np.uint8]}) + dtype([('gender', 'S1'), ('age', 'u1')]) + + Offsets in bytes, here 0 and 25: + + >>> np.dtype({'surname':('S25',0),'age':(np.uint8,25)}) + dtype([('surname', 'S25'), ('age', 'u1')]) + + """) + +############################################################################## +# +# dtype attributes +# +############################################################################## + +add_newdoc('numpy.core.multiarray', 'dtype', ('alignment', + """ + The required alignment (bytes) of this data-type according to the compiler. + + More information is available in the C-API section of the manual. + + Examples + -------- + + >>> x = np.dtype('i4') + >>> x.alignment + 4 + + >>> x = np.dtype(float) + >>> x.alignment + 8 + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('byteorder', + """ + A character indicating the byte-order of this data-type object. + + One of: + + === ============== + '=' native + '<' little-endian + '>' big-endian + '|' not applicable + === ============== + + All built-in data-type objects have byteorder either '=' or '|'. + + Examples + -------- + + >>> dt = np.dtype('i2') + >>> dt.byteorder + '=' + >>> # endian is not relevant for 8 bit numbers + >>> np.dtype('i1').byteorder + '|' + >>> # or ASCII strings + >>> np.dtype('S2').byteorder + '|' + >>> # Even if specific code is given, and it is native + >>> # '=' is the byteorder + >>> import sys + >>> sys_is_le = sys.byteorder == 'little' + >>> native_code = sys_is_le and '<' or '>' + >>> swapped_code = sys_is_le and '>' or '<' + >>> dt = np.dtype(native_code + 'i2') + >>> dt.byteorder + '=' + >>> # Swapped code shows up as itself + >>> dt = np.dtype(swapped_code + 'i2') + >>> dt.byteorder == swapped_code + True + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('char', + """A unique character code for each of the 21 different built-in types. + + Examples + -------- + + >>> x = np.dtype(float) + >>> x.char + 'd' + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('descr', + """ + `__array_interface__` description of the data-type. + + The format is that required by the 'descr' key in the + `__array_interface__` attribute. + + Warning: This attribute exists specifically for `__array_interface__`, + and passing it directly to `np.dtype` will not accurately reconstruct + some dtypes (e.g., scalar and subarray dtypes). + + Examples + -------- + + >>> x = np.dtype(float) + >>> x.descr + [('', '>> dt = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2,))]) + >>> dt.descr + [('name', '>> dt = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2,))]) + >>> print(dt.fields) + {'grades': (dtype(('float64',(2,))), 16), 'name': (dtype('|S16'), 0)} + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('flags', + """ + Bit-flags describing how this data type is to be interpreted. + + Bit-masks are in `numpy.core.multiarray` as the constants + `ITEM_HASOBJECT`, `LIST_PICKLE`, `ITEM_IS_POINTER`, `NEEDS_INIT`, + `NEEDS_PYAPI`, `USE_GETITEM`, `USE_SETITEM`. A full explanation + of these flags is in C-API documentation; they are largely useful + for user-defined data-types. + + The following example demonstrates that operations on this particular + dtype requires Python C-API. + + Examples + -------- + + >>> x = np.dtype([('a', np.int32, 8), ('b', np.float64, 6)]) + >>> x.flags + 16 + >>> np.core.multiarray.NEEDS_PYAPI + 16 + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('hasobject', + """ + Boolean indicating whether this dtype contains any reference-counted + objects in any fields or sub-dtypes. + + Recall that what is actually in the ndarray memory representing + the Python object is the memory address of that object (a pointer). + Special handling may be required, and this attribute is useful for + distinguishing data types that may contain arbitrary Python objects + and data-types that won't. + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('isbuiltin', + """ + Integer indicating how this dtype relates to the built-in dtypes. + + Read-only. + + = ======================================================================== + 0 if this is a structured array type, with fields + 1 if this is a dtype compiled into numpy (such as ints, floats etc) + 2 if the dtype is for a user-defined numpy type + A user-defined type uses the numpy C-API machinery to extend + numpy to handle a new array type. See + :ref:`user.user-defined-data-types` in the NumPy manual. + = ======================================================================== + + Examples + -------- + >>> dt = np.dtype('i2') + >>> dt.isbuiltin + 1 + >>> dt = np.dtype('f8') + >>> dt.isbuiltin + 1 + >>> dt = np.dtype([('field1', 'f8')]) + >>> dt.isbuiltin + 0 + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('isnative', + """ + Boolean indicating whether the byte order of this dtype is native + to the platform. + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('isalignedstruct', + """ + Boolean indicating whether the dtype is a struct which maintains + field alignment. This flag is sticky, so when combining multiple + structs together, it is preserved and produces new dtypes which + are also aligned. + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('itemsize', + """ + The element size of this data-type object. + + For 18 of the 21 types this number is fixed by the data-type. + For the flexible data-types, this number can be anything. + + Examples + -------- + + >>> arr = np.array([[1, 2], [3, 4]]) + >>> arr.dtype + dtype('int64') + >>> arr.itemsize + 8 + + >>> dt = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2,))]) + >>> dt.itemsize + 80 + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('kind', + """ + A character code (one of 'biufcmMOSUV') identifying the general kind of data. + + = ====================== + b boolean + i signed integer + u unsigned integer + f floating-point + c complex floating-point + m timedelta + M datetime + O object + S (byte-)string + U Unicode + V void + = ====================== + + Examples + -------- + + >>> dt = np.dtype('i4') + >>> dt.kind + 'i' + >>> dt = np.dtype('f8') + >>> dt.kind + 'f' + >>> dt = np.dtype([('field1', 'f8')]) + >>> dt.kind + 'V' + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('metadata', + """ + Either ``None`` or a readonly dictionary of metadata (mappingproxy). + + The metadata field can be set using any dictionary at data-type + creation. NumPy currently has no uniform approach to propagating + metadata; although some array operations preserve it, there is no + guarantee that others will. + + .. warning:: + + Although used in certain projects, this feature was long undocumented + and is not well supported. Some aspects of metadata propagation + are expected to change in the future. + + Examples + -------- + + >>> dt = np.dtype(float, metadata={"key": "value"}) + >>> dt.metadata["key"] + 'value' + >>> arr = np.array([1, 2, 3], dtype=dt) + >>> arr.dtype.metadata + mappingproxy({'key': 'value'}) + + Adding arrays with identical datatypes currently preserves the metadata: + + >>> (arr + arr).dtype.metadata + mappingproxy({'key': 'value'}) + + But if the arrays have different dtype metadata, the metadata may be + dropped: + + >>> dt2 = np.dtype(float, metadata={"key2": "value2"}) + >>> arr2 = np.array([3, 2, 1], dtype=dt2) + >>> (arr + arr2).dtype.metadata is None + True # The metadata field is cleared so None is returned + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('name', + """ + A bit-width name for this data-type. + + Un-sized flexible data-type objects do not have this attribute. + + Examples + -------- + + >>> x = np.dtype(float) + >>> x.name + 'float64' + >>> x = np.dtype([('a', np.int32, 8), ('b', np.float64, 6)]) + >>> x.name + 'void640' + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('names', + """ + Ordered list of field names, or ``None`` if there are no fields. + + The names are ordered according to increasing byte offset. This can be + used, for example, to walk through all of the named fields in offset order. + + Examples + -------- + >>> dt = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2,))]) + >>> dt.names + ('name', 'grades') + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('num', + """ + A unique number for each of the 21 different built-in types. + + These are roughly ordered from least-to-most precision. + + Examples + -------- + + >>> dt = np.dtype(str) + >>> dt.num + 19 + + >>> dt = np.dtype(float) + >>> dt.num + 12 + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('shape', + """ + Shape tuple of the sub-array if this data type describes a sub-array, + and ``()`` otherwise. + + Examples + -------- + + >>> dt = np.dtype(('i4', 4)) + >>> dt.shape + (4,) + + >>> dt = np.dtype(('i4', (2, 3))) + >>> dt.shape + (2, 3) + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('ndim', + """ + Number of dimensions of the sub-array if this data type describes a + sub-array, and ``0`` otherwise. + + .. versionadded:: 1.13.0 + + Examples + -------- + >>> x = np.dtype(float) + >>> x.ndim + 0 + + >>> x = np.dtype((float, 8)) + >>> x.ndim + 1 + + >>> x = np.dtype(('i4', (3, 4))) + >>> x.ndim + 2 + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('str', + """The array-protocol typestring of this data-type object.""")) + +add_newdoc('numpy.core.multiarray', 'dtype', ('subdtype', + """ + Tuple ``(item_dtype, shape)`` if this `dtype` describes a sub-array, and + None otherwise. + + The *shape* is the fixed shape of the sub-array described by this + data type, and *item_dtype* the data type of the array. + + If a field whose dtype object has this attribute is retrieved, + then the extra dimensions implied by *shape* are tacked on to + the end of the retrieved array. + + See Also + -------- + dtype.base + + Examples + -------- + >>> x = numpy.dtype('8f') + >>> x.subdtype + (dtype('float32'), (8,)) + + >>> x = numpy.dtype('i2') + >>> x.subdtype + >>> + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('base', + """ + Returns dtype for the base element of the subarrays, + regardless of their dimension or shape. + + See Also + -------- + dtype.subdtype + + Examples + -------- + >>> x = numpy.dtype('8f') + >>> x.base + dtype('float32') + + >>> x = numpy.dtype('i2') + >>> x.base + dtype('int16') + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('type', + """The type object used to instantiate a scalar of this data-type.""")) + +############################################################################## +# +# dtype methods +# +############################################################################## + +add_newdoc('numpy.core.multiarray', 'dtype', ('newbyteorder', + """ + newbyteorder(new_order='S', /) + + Return a new dtype with a different byte order. + + Changes are also made in all fields and sub-arrays of the data type. + + Parameters + ---------- + new_order : string, optional + Byte order to force; a value from the byte order specifications + below. The default value ('S') results in swapping the current + byte order. `new_order` codes can be any of: + + * 'S' - swap dtype from current to opposite endian + * {'<', 'little'} - little endian + * {'>', 'big'} - big endian + * {'=', 'native'} - native order + * {'|', 'I'} - ignore (no change to byte order) + + Returns + ------- + new_dtype : dtype + New dtype object with the given change to the byte order. + + Notes + ----- + Changes are also made in all fields and sub-arrays of the data type. + + Examples + -------- + >>> import sys + >>> sys_is_le = sys.byteorder == 'little' + >>> native_code = sys_is_le and '<' or '>' + >>> swapped_code = sys_is_le and '>' or '<' + >>> native_dt = np.dtype(native_code+'i2') + >>> swapped_dt = np.dtype(swapped_code+'i2') + >>> native_dt.newbyteorder('S') == swapped_dt + True + >>> native_dt.newbyteorder() == swapped_dt + True + >>> native_dt == swapped_dt.newbyteorder('S') + True + >>> native_dt == swapped_dt.newbyteorder('=') + True + >>> native_dt == swapped_dt.newbyteorder('N') + True + >>> native_dt == native_dt.newbyteorder('|') + True + >>> np.dtype('>> np.dtype('>> np.dtype('>i2') == native_dt.newbyteorder('>') + True + >>> np.dtype('>i2') == native_dt.newbyteorder('B') + True + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('__class_getitem__', + """ + __class_getitem__(item, /) + + Return a parametrized wrapper around the `~numpy.dtype` type. + + .. versionadded:: 1.22 + + Returns + ------- + alias : types.GenericAlias + A parametrized `~numpy.dtype` type. + + Examples + -------- + >>> import numpy as np + + >>> np.dtype[np.int64] + numpy.dtype[numpy.int64] + + Notes + ----- + This method is only available for python 3.9 and later. + + See Also + -------- + :pep:`585` : Type hinting generics in standard collections. + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('__ge__', + """ + __ge__(value, /) + + Return ``self >= value``. + + Equivalent to ``np.can_cast(value, self, casting="safe")``. + + See Also + -------- + can_cast : Returns True if cast between data types can occur according to + the casting rule. + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('__le__', + """ + __le__(value, /) + + Return ``self <= value``. + + Equivalent to ``np.can_cast(self, value, casting="safe")``. + + See Also + -------- + can_cast : Returns True if cast between data types can occur according to + the casting rule. + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('__gt__', + """ + __ge__(value, /) + + Return ``self > value``. + + Equivalent to + ``self != value and np.can_cast(value, self, casting="safe")``. + + See Also + -------- + can_cast : Returns True if cast between data types can occur according to + the casting rule. + + """)) + +add_newdoc('numpy.core.multiarray', 'dtype', ('__lt__', + """ + __lt__(value, /) + + Return ``self < value``. + + Equivalent to + ``self != value and np.can_cast(self, value, casting="safe")``. + + See Also + -------- + can_cast : Returns True if cast between data types can occur according to + the casting rule. + + """)) + +############################################################################## +# +# Datetime-related Methods +# +############################################################################## + +add_newdoc('numpy.core.multiarray', 'busdaycalendar', + """ + busdaycalendar(weekmask='1111100', holidays=None) + + A business day calendar object that efficiently stores information + defining valid days for the busday family of functions. + + The default valid days are Monday through Friday ("business days"). + A busdaycalendar object can be specified with any set of weekly + valid days, plus an optional "holiday" dates that always will be invalid. + + Once a busdaycalendar object is created, the weekmask and holidays + cannot be modified. + + .. versionadded:: 1.7.0 + + Parameters + ---------- + weekmask : str or array_like of bool, optional + A seven-element array indicating which of Monday through Sunday are + valid days. May be specified as a length-seven list or array, like + [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string + like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for + weekdays, optionally separated by white space. Valid abbreviations + are: Mon Tue Wed Thu Fri Sat Sun + holidays : array_like of datetime64[D], optional + An array of dates to consider as invalid dates, no matter which + weekday they fall upon. Holiday dates may be specified in any + order, and NaT (not-a-time) dates are ignored. This list is + saved in a normalized form that is suited for fast calculations + of valid days. + + Returns + ------- + out : busdaycalendar + A business day calendar object containing the specified + weekmask and holidays values. + + See Also + -------- + is_busday : Returns a boolean array indicating valid days. + busday_offset : Applies an offset counted in valid days. + busday_count : Counts how many valid days are in a half-open date range. + + Attributes + ---------- + Note: once a busdaycalendar object is created, you cannot modify the + weekmask or holidays. The attributes return copies of internal data. + weekmask : (copy) seven-element array of bool + holidays : (copy) sorted array of datetime64[D] + + Examples + -------- + >>> # Some important days in July + ... bdd = np.busdaycalendar( + ... holidays=['2011-07-01', '2011-07-04', '2011-07-17']) + >>> # Default is Monday to Friday weekdays + ... bdd.weekmask + array([ True, True, True, True, True, False, False]) + >>> # Any holidays already on the weekend are removed + ... bdd.holidays + array(['2011-07-01', '2011-07-04'], dtype='datetime64[D]') + """) + +add_newdoc('numpy.core.multiarray', 'busdaycalendar', ('weekmask', + """A copy of the seven-element boolean mask indicating valid days.""")) + +add_newdoc('numpy.core.multiarray', 'busdaycalendar', ('holidays', + """A copy of the holiday array indicating additional invalid days.""")) + +add_newdoc('numpy.core.multiarray', 'normalize_axis_index', + """ + normalize_axis_index(axis, ndim, msg_prefix=None) + + Normalizes an axis index, `axis`, such that is a valid positive index into + the shape of array with `ndim` dimensions. Raises an AxisError with an + appropriate message if this is not possible. + + Used internally by all axis-checking logic. + + .. versionadded:: 1.13.0 + + Parameters + ---------- + axis : int + The un-normalized index of the axis. Can be negative + ndim : int + The number of dimensions of the array that `axis` should be normalized + against + msg_prefix : str + A prefix to put before the message, typically the name of the argument + + Returns + ------- + normalized_axis : int + The normalized axis index, such that `0 <= normalized_axis < ndim` + + Raises + ------ + AxisError + If the axis index is invalid, when `-ndim <= axis < ndim` is false. + + Examples + -------- + >>> normalize_axis_index(0, ndim=3) + 0 + >>> normalize_axis_index(1, ndim=3) + 1 + >>> normalize_axis_index(-1, ndim=3) + 2 + + >>> normalize_axis_index(3, ndim=3) + Traceback (most recent call last): + ... + AxisError: axis 3 is out of bounds for array of dimension 3 + >>> normalize_axis_index(-4, ndim=3, msg_prefix='axes_arg') + Traceback (most recent call last): + ... + AxisError: axes_arg: axis -4 is out of bounds for array of dimension 3 + """) + +add_newdoc('numpy.core.multiarray', 'datetime_data', + """ + datetime_data(dtype, /) + + Get information about the step size of a date or time type. + + The returned tuple can be passed as the second argument of `numpy.datetime64` and + `numpy.timedelta64`. + + Parameters + ---------- + dtype : dtype + The dtype object, which must be a `datetime64` or `timedelta64` type. + + Returns + ------- + unit : str + The :ref:`datetime unit ` on which this dtype + is based. + count : int + The number of base units in a step. + + Examples + -------- + >>> dt_25s = np.dtype('timedelta64[25s]') + >>> np.datetime_data(dt_25s) + ('s', 25) + >>> np.array(10, dt_25s).astype('timedelta64[s]') + array(250, dtype='timedelta64[s]') + + The result can be used to construct a datetime that uses the same units + as a timedelta + + >>> np.datetime64('2010', np.datetime_data(dt_25s)) + numpy.datetime64('2010-01-01T00:00:00','25s') + """) + + +############################################################################## +# +# Documentation for `generic` attributes and methods +# +############################################################################## + +add_newdoc('numpy.core.numerictypes', 'generic', + """ + Base class for numpy scalar types. + + Class from which most (all?) numpy scalar types are derived. For + consistency, exposes the same API as `ndarray`, despite many + consequent attributes being either "get-only," or completely irrelevant. + This is the class from which it is strongly suggested users should derive + custom scalar types. + + """) + +# Attributes + +def refer_to_array_attribute(attr, method=True): + docstring = """ + Scalar {} identical to the corresponding array attribute. + + Please see `ndarray.{}`. + """ + + return attr, docstring.format("method" if method else "attribute", attr) + + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('T', method=False)) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('base', method=False)) + +add_newdoc('numpy.core.numerictypes', 'generic', ('data', + """Pointer to start of data.""")) + +add_newdoc('numpy.core.numerictypes', 'generic', ('dtype', + """Get array data-descriptor.""")) + +add_newdoc('numpy.core.numerictypes', 'generic', ('flags', + """The integer value of flags.""")) + +add_newdoc('numpy.core.numerictypes', 'generic', ('flat', + """A 1-D view of the scalar.""")) + +add_newdoc('numpy.core.numerictypes', 'generic', ('imag', + """The imaginary part of the scalar.""")) + +add_newdoc('numpy.core.numerictypes', 'generic', ('itemsize', + """The length of one element in bytes.""")) + +add_newdoc('numpy.core.numerictypes', 'generic', ('nbytes', + """The length of the scalar in bytes.""")) + +add_newdoc('numpy.core.numerictypes', 'generic', ('ndim', + """The number of array dimensions.""")) + +add_newdoc('numpy.core.numerictypes', 'generic', ('real', + """The real part of the scalar.""")) + +add_newdoc('numpy.core.numerictypes', 'generic', ('shape', + """Tuple of array dimensions.""")) + +add_newdoc('numpy.core.numerictypes', 'generic', ('size', + """The number of elements in the gentype.""")) + +add_newdoc('numpy.core.numerictypes', 'generic', ('strides', + """Tuple of bytes steps in each dimension.""")) + +# Methods + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('all')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('any')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('argmax')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('argmin')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('argsort')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('astype')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('byteswap')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('choose')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('clip')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('compress')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('conjugate')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('copy')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('cumprod')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('cumsum')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('diagonal')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('dump')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('dumps')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('fill')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('flatten')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('getfield')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('item')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('itemset')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('max')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('mean')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('min')) + +add_newdoc('numpy.core.numerictypes', 'generic', ('newbyteorder', + """ + newbyteorder(new_order='S', /) + + Return a new `dtype` with a different byte order. + + Changes are also made in all fields and sub-arrays of the data type. + + The `new_order` code can be any from the following: + + * 'S' - swap dtype from current to opposite endian + * {'<', 'little'} - little endian + * {'>', 'big'} - big endian + * {'=', 'native'} - native order + * {'|', 'I'} - ignore (no change to byte order) + + Parameters + ---------- + new_order : str, optional + Byte order to force; a value from the byte order specifications + above. The default value ('S') results in swapping the current + byte order. + + + Returns + ------- + new_dtype : dtype + New `dtype` object with the given change to the byte order. + + """)) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('nonzero')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('prod')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('ptp')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('put')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('ravel')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('repeat')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('reshape')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('resize')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('round')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('searchsorted')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('setfield')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('setflags')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('sort')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('squeeze')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('std')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('sum')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('swapaxes')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('take')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('tofile')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('tolist')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('tostring')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('trace')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('transpose')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('var')) + +add_newdoc('numpy.core.numerictypes', 'generic', + refer_to_array_attribute('view')) + +add_newdoc('numpy.core.numerictypes', 'number', ('__class_getitem__', + """ + __class_getitem__(item, /) + + Return a parametrized wrapper around the `~numpy.number` type. + + .. versionadded:: 1.22 + + Returns + ------- + alias : types.GenericAlias + A parametrized `~numpy.number` type. + + Examples + -------- + >>> from typing import Any + >>> import numpy as np + + >>> np.signedinteger[Any] + numpy.signedinteger[typing.Any] + + Notes + ----- + This method is only available for python 3.9 and later. + + See Also + -------- + :pep:`585` : Type hinting generics in standard collections. + + """)) + +############################################################################## +# +# Documentation for scalar type abstract base classes in type hierarchy +# +############################################################################## + + +add_newdoc('numpy.core.numerictypes', 'number', + """ + Abstract base class of all numeric scalar types. + + """) + +add_newdoc('numpy.core.numerictypes', 'integer', + """ + Abstract base class of all integer scalar types. + + """) + +add_newdoc('numpy.core.numerictypes', 'signedinteger', + """ + Abstract base class of all signed integer scalar types. + + """) + +add_newdoc('numpy.core.numerictypes', 'unsignedinteger', + """ + Abstract base class of all unsigned integer scalar types. + + """) + +add_newdoc('numpy.core.numerictypes', 'inexact', + """ + Abstract base class of all numeric scalar types with a (potentially) + inexact representation of the values in its range, such as + floating-point numbers. + + """) + +add_newdoc('numpy.core.numerictypes', 'floating', + """ + Abstract base class of all floating-point scalar types. + + """) + +add_newdoc('numpy.core.numerictypes', 'complexfloating', + """ + Abstract base class of all complex number scalar types that are made up of + floating-point numbers. + + """) + +add_newdoc('numpy.core.numerictypes', 'flexible', + """ + Abstract base class of all scalar types without predefined length. + The actual size of these types depends on the specific `np.dtype` + instantiation. + + """) + +add_newdoc('numpy.core.numerictypes', 'character', + """ + Abstract base class of all character string scalar types. + + """) diff --git a/.local/lib/python3.8/site-packages/numpy/core/_add_newdocs_scalars.py b/.local/lib/python3.8/site-packages/numpy/core/_add_newdocs_scalars.py new file mode 100644 index 0000000..94859a9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_add_newdocs_scalars.py @@ -0,0 +1,311 @@ +""" +This file is separate from ``_add_newdocs.py`` so that it can be mocked out by +our sphinx ``conf.py`` during doc builds, where we want to avoid showing +platform-dependent information. +""" +from numpy.core import dtype +from numpy.core import numerictypes as _numerictypes +from numpy.core.function_base import add_newdoc +import platform + +############################################################################## +# +# Documentation for concrete scalar classes +# +############################################################################## + +def numeric_type_aliases(aliases): + def type_aliases_gen(): + for alias, doc in aliases: + try: + alias_type = getattr(_numerictypes, alias) + except AttributeError: + # The set of aliases that actually exist varies between platforms + pass + else: + yield (alias_type, alias, doc) + return list(type_aliases_gen()) + + +possible_aliases = numeric_type_aliases([ + ('int8', '8-bit signed integer (``-128`` to ``127``)'), + ('int16', '16-bit signed integer (``-32_768`` to ``32_767``)'), + ('int32', '32-bit signed integer (``-2_147_483_648`` to ``2_147_483_647``)'), + ('int64', '64-bit signed integer (``-9_223_372_036_854_775_808`` to ``9_223_372_036_854_775_807``)'), + ('intp', 'Signed integer large enough to fit pointer, compatible with C ``intptr_t``'), + ('uint8', '8-bit unsigned integer (``0`` to ``255``)'), + ('uint16', '16-bit unsigned integer (``0`` to ``65_535``)'), + ('uint32', '32-bit unsigned integer (``0`` to ``4_294_967_295``)'), + ('uint64', '64-bit unsigned integer (``0`` to ``18_446_744_073_709_551_615``)'), + ('uintp', 'Unsigned integer large enough to fit pointer, compatible with C ``uintptr_t``'), + ('float16', '16-bit-precision floating-point number type: sign bit, 5 bits exponent, 10 bits mantissa'), + ('float32', '32-bit-precision floating-point number type: sign bit, 8 bits exponent, 23 bits mantissa'), + ('float64', '64-bit precision floating-point number type: sign bit, 11 bits exponent, 52 bits mantissa'), + ('float96', '96-bit extended-precision floating-point number type'), + ('float128', '128-bit extended-precision floating-point number type'), + ('complex64', 'Complex number type composed of 2 32-bit-precision floating-point numbers'), + ('complex128', 'Complex number type composed of 2 64-bit-precision floating-point numbers'), + ('complex192', 'Complex number type composed of 2 96-bit extended-precision floating-point numbers'), + ('complex256', 'Complex number type composed of 2 128-bit extended-precision floating-point numbers'), + ]) + + + + +def add_newdoc_for_scalar_type(obj, fixed_aliases, doc): + # note: `:field: value` is rST syntax which renders as field lists. + o = getattr(_numerictypes, obj) + + character_code = dtype(o).char + canonical_name_doc = "" if obj == o.__name__ else ":Canonical name: `numpy.{}`\n ".format(obj) + alias_doc = ''.join(":Alias: `numpy.{}`\n ".format(alias) for alias in fixed_aliases) + alias_doc += ''.join(":Alias on this platform ({} {}): `numpy.{}`: {}.\n ".format(platform.system(), platform.machine(), alias, doc) + for (alias_type, alias, doc) in possible_aliases if alias_type is o) + docstring = """ + {doc} + + :Character code: ``'{character_code}'`` + {canonical_name_doc}{alias_doc} + """.format(doc=doc.strip(), character_code=character_code, + canonical_name_doc=canonical_name_doc, alias_doc=alias_doc) + + add_newdoc('numpy.core.numerictypes', obj, docstring) + + +add_newdoc_for_scalar_type('bool_', ['bool8'], + """ + Boolean type (True or False), stored as a byte. + + .. warning:: + + The :class:`bool_` type is not a subclass of the :class:`int_` type + (the :class:`bool_` is not even a number type). This is different + than Python's default implementation of :class:`bool` as a + sub-class of :class:`int`. + """) + +add_newdoc_for_scalar_type('byte', [], + """ + Signed integer type, compatible with C ``char``. + """) + +add_newdoc_for_scalar_type('short', [], + """ + Signed integer type, compatible with C ``short``. + """) + +add_newdoc_for_scalar_type('intc', [], + """ + Signed integer type, compatible with C ``int``. + """) + +add_newdoc_for_scalar_type('int_', [], + """ + Signed integer type, compatible with Python `int` and C ``long``. + """) + +add_newdoc_for_scalar_type('longlong', [], + """ + Signed integer type, compatible with C ``long long``. + """) + +add_newdoc_for_scalar_type('ubyte', [], + """ + Unsigned integer type, compatible with C ``unsigned char``. + """) + +add_newdoc_for_scalar_type('ushort', [], + """ + Unsigned integer type, compatible with C ``unsigned short``. + """) + +add_newdoc_for_scalar_type('uintc', [], + """ + Unsigned integer type, compatible with C ``unsigned int``. + """) + +add_newdoc_for_scalar_type('uint', [], + """ + Unsigned integer type, compatible with C ``unsigned long``. + """) + +add_newdoc_for_scalar_type('ulonglong', [], + """ + Signed integer type, compatible with C ``unsigned long long``. + """) + +add_newdoc_for_scalar_type('half', [], + """ + Half-precision floating-point number type. + """) + +add_newdoc_for_scalar_type('single', [], + """ + Single-precision floating-point number type, compatible with C ``float``. + """) + +add_newdoc_for_scalar_type('double', ['float_'], + """ + Double-precision floating-point number type, compatible with Python `float` + and C ``double``. + """) + +add_newdoc_for_scalar_type('longdouble', ['longfloat'], + """ + Extended-precision floating-point number type, compatible with C + ``long double`` but not necessarily with IEEE 754 quadruple-precision. + """) + +add_newdoc_for_scalar_type('csingle', ['singlecomplex'], + """ + Complex number type composed of two single-precision floating-point + numbers. + """) + +add_newdoc_for_scalar_type('cdouble', ['cfloat', 'complex_'], + """ + Complex number type composed of two double-precision floating-point + numbers, compatible with Python `complex`. + """) + +add_newdoc_for_scalar_type('clongdouble', ['clongfloat', 'longcomplex'], + """ + Complex number type composed of two extended-precision floating-point + numbers. + """) + +add_newdoc_for_scalar_type('object_', [], + """ + Any Python object. + """) + +add_newdoc_for_scalar_type('str_', ['unicode_'], + r""" + A unicode string. + + When used in arrays, this type strips trailing null codepoints. + + Unlike the builtin `str`, this supports the :ref:`python:bufferobjects`, exposing its + contents as UCS4: + + >>> m = memoryview(np.str_("abc")) + >>> m.format + '3w' + >>> m.tobytes() + b'a\x00\x00\x00b\x00\x00\x00c\x00\x00\x00' + """) + +add_newdoc_for_scalar_type('bytes_', ['string_'], + r""" + A byte string. + + When used in arrays, this type strips trailing null bytes. + """) + +add_newdoc_for_scalar_type('void', [], + r""" + Either an opaque sequence of bytes, or a structure. + + >>> np.void(b'abcd') + void(b'\x61\x62\x63\x64') + + Structured `void` scalars can only be constructed via extraction from :ref:`structured_arrays`: + + >>> arr = np.array((1, 2), dtype=[('x', np.int8), ('y', np.int8)]) + >>> arr[()] + (1, 2) # looks like a tuple, but is `np.void` + """) + +add_newdoc_for_scalar_type('datetime64', [], + """ + If created from a 64-bit integer, it represents an offset from + ``1970-01-01T00:00:00``. + If created from string, the string can be in ISO 8601 date + or datetime format. + + >>> np.datetime64(10, 'Y') + numpy.datetime64('1980') + >>> np.datetime64('1980', 'Y') + numpy.datetime64('1980') + >>> np.datetime64(10, 'D') + numpy.datetime64('1970-01-11') + + See :ref:`arrays.datetime` for more information. + """) + +add_newdoc_for_scalar_type('timedelta64', [], + """ + A timedelta stored as a 64-bit integer. + + See :ref:`arrays.datetime` for more information. + """) + +add_newdoc('numpy.core.numerictypes', "integer", ('is_integer', + """ + integer.is_integer() -> bool + + Return ``True`` if the number is finite with integral value. + + .. versionadded:: 1.22 + + Examples + -------- + >>> np.int64(-2).is_integer() + True + >>> np.uint32(5).is_integer() + True + """)) + +# TODO: work out how to put this on the base class, np.floating +for float_name in ('half', 'single', 'double', 'longdouble'): + add_newdoc('numpy.core.numerictypes', float_name, ('as_integer_ratio', + """ + {ftype}.as_integer_ratio() -> (int, int) + + Return a pair of integers, whose ratio is exactly equal to the original + floating point number, and with a positive denominator. + Raise `OverflowError` on infinities and a `ValueError` on NaNs. + + >>> np.{ftype}(10.0).as_integer_ratio() + (10, 1) + >>> np.{ftype}(0.0).as_integer_ratio() + (0, 1) + >>> np.{ftype}(-.25).as_integer_ratio() + (-1, 4) + """.format(ftype=float_name))) + + add_newdoc('numpy.core.numerictypes', float_name, ('is_integer', + f""" + {float_name}.is_integer() -> bool + + Return ``True`` if the floating point number is finite with integral + value, and ``False`` otherwise. + + .. versionadded:: 1.22 + + Examples + -------- + >>> np.{float_name}(-2.0).is_integer() + True + >>> np.{float_name}(3.2).is_integer() + False + """)) + +for int_name in ('int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', + 'int64', 'uint64', 'int64', 'uint64', 'int64', 'uint64'): + # Add negative examples for signed cases by checking typecode + add_newdoc('numpy.core.numerictypes', int_name, ('bit_count', + f""" + {int_name}.bit_count() -> int + + Computes the number of 1-bits in the absolute value of the input. + Analogous to the builtin `int.bit_count` or ``popcount`` in C++. + + Examples + -------- + >>> np.{int_name}(127).bit_count() + 7""" + + (f""" + >>> np.{int_name}(-127).bit_count() + 7 + """ if dtype(int_name).char.islower() else ""))) diff --git a/.local/lib/python3.8/site-packages/numpy/core/_asarray.py b/.local/lib/python3.8/site-packages/numpy/core/_asarray.py new file mode 100644 index 0000000..ecb4e7c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_asarray.py @@ -0,0 +1,140 @@ +""" +Functions in the ``as*array`` family that promote array-likes into arrays. + +`require` fits this category despite its name not matching this pattern. +""" +from .overrides import ( + array_function_dispatch, + set_array_function_like_doc, + set_module, +) +from .multiarray import array, asanyarray + + +__all__ = ["require"] + + + +def _require_dispatcher(a, dtype=None, requirements=None, *, like=None): + return (like,) + + +@set_array_function_like_doc +@set_module('numpy') +def require(a, dtype=None, requirements=None, *, like=None): + """ + Return an ndarray of the provided type that satisfies requirements. + + This function is useful to be sure that an array with the correct flags + is returned for passing to compiled code (perhaps through ctypes). + + Parameters + ---------- + a : array_like + The object to be converted to a type-and-requirement-satisfying array. + dtype : data-type + The required data-type. If None preserve the current dtype. If your + application requires the data to be in native byteorder, include + a byteorder specification as a part of the dtype specification. + requirements : str or list of str + The requirements list can be any of the following + + * 'F_CONTIGUOUS' ('F') - ensure a Fortran-contiguous array + * 'C_CONTIGUOUS' ('C') - ensure a C-contiguous array + * 'ALIGNED' ('A') - ensure a data-type aligned array + * 'WRITEABLE' ('W') - ensure a writable array + * 'OWNDATA' ('O') - ensure an array that owns its own data + * 'ENSUREARRAY', ('E') - ensure a base array, instead of a subclass + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + out : ndarray + Array with specified requirements and type if given. + + See Also + -------- + asarray : Convert input to an ndarray. + asanyarray : Convert to an ndarray, but pass through ndarray subclasses. + ascontiguousarray : Convert input to a contiguous array. + asfortranarray : Convert input to an ndarray with column-major + memory order. + ndarray.flags : Information about the memory layout of the array. + + Notes + ----- + The returned array will be guaranteed to have the listed requirements + by making a copy if needed. + + Examples + -------- + >>> x = np.arange(6).reshape(2,3) + >>> x.flags + C_CONTIGUOUS : True + F_CONTIGUOUS : False + OWNDATA : False + WRITEABLE : True + ALIGNED : True + WRITEBACKIFCOPY : False + UPDATEIFCOPY : False + + >>> y = np.require(x, dtype=np.float32, requirements=['A', 'O', 'W', 'F']) + >>> y.flags + C_CONTIGUOUS : False + F_CONTIGUOUS : True + OWNDATA : True + WRITEABLE : True + ALIGNED : True + WRITEBACKIFCOPY : False + UPDATEIFCOPY : False + + """ + if like is not None: + return _require_with_like( + a, + dtype=dtype, + requirements=requirements, + like=like, + ) + + possible_flags = {'C': 'C', 'C_CONTIGUOUS': 'C', 'CONTIGUOUS': 'C', + 'F': 'F', 'F_CONTIGUOUS': 'F', 'FORTRAN': 'F', + 'A': 'A', 'ALIGNED': 'A', + 'W': 'W', 'WRITEABLE': 'W', + 'O': 'O', 'OWNDATA': 'O', + 'E': 'E', 'ENSUREARRAY': 'E'} + if not requirements: + return asanyarray(a, dtype=dtype) + else: + requirements = {possible_flags[x.upper()] for x in requirements} + + if 'E' in requirements: + requirements.remove('E') + subok = False + else: + subok = True + + order = 'A' + if requirements >= {'C', 'F'}: + raise ValueError('Cannot specify both "C" and "F" order') + elif 'F' in requirements: + order = 'F' + requirements.remove('F') + elif 'C' in requirements: + order = 'C' + requirements.remove('C') + + arr = array(a, dtype=dtype, order=order, copy=False, subok=subok) + + for prop in requirements: + if not arr.flags[prop]: + arr = arr.copy(order) + break + return arr + + +_require_with_like = array_function_dispatch( + _require_dispatcher +)(require) diff --git a/.local/lib/python3.8/site-packages/numpy/core/_asarray.pyi b/.local/lib/python3.8/site-packages/numpy/core/_asarray.pyi new file mode 100644 index 0000000..fee9b7b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_asarray.pyi @@ -0,0 +1,41 @@ +from typing import TypeVar, Union, Iterable, overload, Literal + +from numpy import ndarray +from numpy.typing import ArrayLike, DTypeLike + +_ArrayType = TypeVar("_ArrayType", bound=ndarray) + +_Requirements = Literal[ + "C", "C_CONTIGUOUS", "CONTIGUOUS", + "F", "F_CONTIGUOUS", "FORTRAN", + "A", "ALIGNED", + "W", "WRITEABLE", + "O", "OWNDATA" +] +_E = Literal["E", "ENSUREARRAY"] +_RequirementsWithE = Union[_Requirements, _E] + +@overload +def require( + a: _ArrayType, + dtype: None = ..., + requirements: Union[None, _Requirements, Iterable[_Requirements]] = ..., + *, + like: ArrayLike = ... +) -> _ArrayType: ... +@overload +def require( + a: object, + dtype: DTypeLike = ..., + requirements: Union[_E, Iterable[_RequirementsWithE]] = ..., + *, + like: ArrayLike = ... +) -> ndarray: ... +@overload +def require( + a: object, + dtype: DTypeLike = ..., + requirements: Union[None, _Requirements, Iterable[_Requirements]] = ..., + *, + like: ArrayLike = ... +) -> ndarray: ... diff --git a/.local/lib/python3.8/site-packages/numpy/core/_dtype.py b/.local/lib/python3.8/site-packages/numpy/core/_dtype.py new file mode 100644 index 0000000..c3a22b1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_dtype.py @@ -0,0 +1,349 @@ +""" +A place for code to be called from the implementation of np.dtype + +String handling is much easier to do correctly in python. +""" +import numpy as np + + +_kind_to_stem = { + 'u': 'uint', + 'i': 'int', + 'c': 'complex', + 'f': 'float', + 'b': 'bool', + 'V': 'void', + 'O': 'object', + 'M': 'datetime', + 'm': 'timedelta', + 'S': 'bytes', + 'U': 'str', +} + + +def _kind_name(dtype): + try: + return _kind_to_stem[dtype.kind] + except KeyError as e: + raise RuntimeError( + "internal dtype error, unknown kind {!r}" + .format(dtype.kind) + ) from None + + +def __str__(dtype): + if dtype.fields is not None: + return _struct_str(dtype, include_align=True) + elif dtype.subdtype: + return _subarray_str(dtype) + elif issubclass(dtype.type, np.flexible) or not dtype.isnative: + return dtype.str + else: + return dtype.name + + +def __repr__(dtype): + arg_str = _construction_repr(dtype, include_align=False) + if dtype.isalignedstruct: + arg_str = arg_str + ", align=True" + return "dtype({})".format(arg_str) + + +def _unpack_field(dtype, offset, title=None): + """ + Helper function to normalize the items in dtype.fields. + + Call as: + + dtype, offset, title = _unpack_field(*dtype.fields[name]) + """ + return dtype, offset, title + + +def _isunsized(dtype): + # PyDataType_ISUNSIZED + return dtype.itemsize == 0 + + +def _construction_repr(dtype, include_align=False, short=False): + """ + Creates a string repr of the dtype, excluding the 'dtype()' part + surrounding the object. This object may be a string, a list, or + a dict depending on the nature of the dtype. This + is the object passed as the first parameter to the dtype + constructor, and if no additional constructor parameters are + given, will reproduce the exact memory layout. + + Parameters + ---------- + short : bool + If true, this creates a shorter repr using 'kind' and 'itemsize', instead + of the longer type name. + + include_align : bool + If true, this includes the 'align=True' parameter + inside the struct dtype construction dict when needed. Use this flag + if you want a proper repr string without the 'dtype()' part around it. + + If false, this does not preserve the + 'align=True' parameter or sticky NPY_ALIGNED_STRUCT flag for + struct arrays like the regular repr does, because the 'align' + flag is not part of first dtype constructor parameter. This + mode is intended for a full 'repr', where the 'align=True' is + provided as the second parameter. + """ + if dtype.fields is not None: + return _struct_str(dtype, include_align=include_align) + elif dtype.subdtype: + return _subarray_str(dtype) + else: + return _scalar_str(dtype, short=short) + + +def _scalar_str(dtype, short): + byteorder = _byte_order_str(dtype) + + if dtype.type == np.bool_: + if short: + return "'?'" + else: + return "'bool'" + + elif dtype.type == np.object_: + # The object reference may be different sizes on different + # platforms, so it should never include the itemsize here. + return "'O'" + + elif dtype.type == np.string_: + if _isunsized(dtype): + return "'S'" + else: + return "'S%d'" % dtype.itemsize + + elif dtype.type == np.unicode_: + if _isunsized(dtype): + return "'%sU'" % byteorder + else: + return "'%sU%d'" % (byteorder, dtype.itemsize / 4) + + # unlike the other types, subclasses of void are preserved - but + # historically the repr does not actually reveal the subclass + elif issubclass(dtype.type, np.void): + if _isunsized(dtype): + return "'V'" + else: + return "'V%d'" % dtype.itemsize + + elif dtype.type == np.datetime64: + return "'%sM8%s'" % (byteorder, _datetime_metadata_str(dtype)) + + elif dtype.type == np.timedelta64: + return "'%sm8%s'" % (byteorder, _datetime_metadata_str(dtype)) + + elif np.issubdtype(dtype, np.number): + # Short repr with endianness, like '' """ + # hack to obtain the native and swapped byte order characters + swapped = np.dtype(int).newbyteorder('S') + native = swapped.newbyteorder('S') + + byteorder = dtype.byteorder + if byteorder == '=': + return native.byteorder + if byteorder == 'S': + # TODO: this path can never be reached + return swapped.byteorder + elif byteorder == '|': + return '' + else: + return byteorder + + +def _datetime_metadata_str(dtype): + # TODO: this duplicates the C metastr_to_unicode functionality + unit, count = np.datetime_data(dtype) + if unit == 'generic': + return '' + elif count == 1: + return '[{}]'.format(unit) + else: + return '[{}{}]'.format(count, unit) + + +def _struct_dict_str(dtype, includealignedflag): + # unpack the fields dictionary into ls + names = dtype.names + fld_dtypes = [] + offsets = [] + titles = [] + for name in names: + fld_dtype, offset, title = _unpack_field(*dtype.fields[name]) + fld_dtypes.append(fld_dtype) + offsets.append(offset) + titles.append(title) + + # Build up a string to make the dictionary + + if np.core.arrayprint._get_legacy_print_mode() <= 121: + colon = ":" + fieldsep = "," + else: + colon = ": " + fieldsep = ", " + + # First, the names + ret = "{'names'%s[" % colon + ret += fieldsep.join(repr(name) for name in names) + + # Second, the formats + ret += "], 'formats'%s[" % colon + ret += fieldsep.join( + _construction_repr(fld_dtype, short=True) for fld_dtype in fld_dtypes) + + # Third, the offsets + ret += "], 'offsets'%s[" % colon + ret += fieldsep.join("%d" % offset for offset in offsets) + + # Fourth, the titles + if any(title is not None for title in titles): + ret += "], 'titles'%s[" % colon + ret += fieldsep.join(repr(title) for title in titles) + + # Fifth, the itemsize + ret += "], 'itemsize'%s%d" % (colon, dtype.itemsize) + + if (includealignedflag and dtype.isalignedstruct): + # Finally, the aligned flag + ret += ", 'aligned'%sTrue}" % colon + else: + ret += "}" + + return ret + + +def _is_packed(dtype): + """ + Checks whether the structured data type in 'dtype' + has a simple layout, where all the fields are in order, + and follow each other with no alignment padding. + + When this returns true, the dtype can be reconstructed + from a list of the field names and dtypes with no additional + dtype parameters. + + Duplicates the C `is_dtype_struct_simple_unaligned_layout` function. + """ + total_offset = 0 + for name in dtype.names: + fld_dtype, fld_offset, title = _unpack_field(*dtype.fields[name]) + if fld_offset != total_offset: + return False + total_offset += fld_dtype.itemsize + if total_offset != dtype.itemsize: + return False + return True + + +def _struct_list_str(dtype): + items = [] + for name in dtype.names: + fld_dtype, fld_offset, title = _unpack_field(*dtype.fields[name]) + + item = "(" + if title is not None: + item += "({!r}, {!r}), ".format(title, name) + else: + item += "{!r}, ".format(name) + # Special case subarray handling here + if fld_dtype.subdtype is not None: + base, shape = fld_dtype.subdtype + item += "{}, {}".format( + _construction_repr(base, short=True), + shape + ) + else: + item += _construction_repr(fld_dtype, short=True) + + item += ")" + items.append(item) + + return "[" + ", ".join(items) + "]" + + +def _struct_str(dtype, include_align): + # The list str representation can't include the 'align=' flag, + # so if it is requested and the struct has the aligned flag set, + # we must use the dict str instead. + if not (include_align and dtype.isalignedstruct) and _is_packed(dtype): + sub = _struct_list_str(dtype) + + else: + sub = _struct_dict_str(dtype, include_align) + + # If the data type isn't the default, void, show it + if dtype.type != np.void: + return "({t.__module__}.{t.__name__}, {f})".format(t=dtype.type, f=sub) + else: + return sub + + +def _subarray_str(dtype): + base, shape = dtype.subdtype + return "({}, {})".format( + _construction_repr(base, short=True), + shape + ) + + +def _name_includes_bit_suffix(dtype): + if dtype.type == np.object_: + # pointer size varies by system, best to omit it + return False + elif dtype.type == np.bool_: + # implied + return False + elif np.issubdtype(dtype, np.flexible) and _isunsized(dtype): + # unspecified + return False + else: + return True + + +def _name_get(dtype): + # provides dtype.name.__get__, documented as returning a "bit name" + + if dtype.isbuiltin == 2: + # user dtypes don't promise to do anything special + return dtype.type.__name__ + + if issubclass(dtype.type, np.void): + # historically, void subclasses preserve their name, eg `record64` + name = dtype.type.__name__ + else: + name = _kind_name(dtype) + + # append bit counts + if _name_includes_bit_suffix(dtype): + name += "{}".format(dtype.itemsize * 8) + + # append metadata to datetimes + if dtype.type in (np.datetime64, np.timedelta64): + name += _datetime_metadata_str(dtype) + + return name diff --git a/.local/lib/python3.8/site-packages/numpy/core/_dtype_ctypes.py b/.local/lib/python3.8/site-packages/numpy/core/_dtype_ctypes.py new file mode 100644 index 0000000..6d7cbb2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_dtype_ctypes.py @@ -0,0 +1,117 @@ +""" +Conversion from ctypes to dtype. + +In an ideal world, we could achieve this through the PEP3118 buffer protocol, +something like:: + + def dtype_from_ctypes_type(t): + # needed to ensure that the shape of `t` is within memoryview.format + class DummyStruct(ctypes.Structure): + _fields_ = [('a', t)] + + # empty to avoid memory allocation + ctype_0 = (DummyStruct * 0)() + mv = memoryview(ctype_0) + + # convert the struct, and slice back out the field + return _dtype_from_pep3118(mv.format)['a'] + +Unfortunately, this fails because: + +* ctypes cannot handle length-0 arrays with PEP3118 (bpo-32782) +* PEP3118 cannot represent unions, but both numpy and ctypes can +* ctypes cannot handle big-endian structs with PEP3118 (bpo-32780) +""" + +# We delay-import ctypes for distributions that do not include it. +# While this module is not used unless the user passes in ctypes +# members, it is eagerly imported from numpy/core/__init__.py. +import numpy as np + + +def _from_ctypes_array(t): + return np.dtype((dtype_from_ctypes_type(t._type_), (t._length_,))) + + +def _from_ctypes_structure(t): + for item in t._fields_: + if len(item) > 2: + raise TypeError( + "ctypes bitfields have no dtype equivalent") + + if hasattr(t, "_pack_"): + import ctypes + formats = [] + offsets = [] + names = [] + current_offset = 0 + for fname, ftyp in t._fields_: + names.append(fname) + formats.append(dtype_from_ctypes_type(ftyp)) + # Each type has a default offset, this is platform dependent for some types. + effective_pack = min(t._pack_, ctypes.alignment(ftyp)) + current_offset = ((current_offset + effective_pack - 1) // effective_pack) * effective_pack + offsets.append(current_offset) + current_offset += ctypes.sizeof(ftyp) + + return np.dtype(dict( + formats=formats, + offsets=offsets, + names=names, + itemsize=ctypes.sizeof(t))) + else: + fields = [] + for fname, ftyp in t._fields_: + fields.append((fname, dtype_from_ctypes_type(ftyp))) + + # by default, ctypes structs are aligned + return np.dtype(fields, align=True) + + +def _from_ctypes_scalar(t): + """ + Return the dtype type with endianness included if it's the case + """ + if getattr(t, '__ctype_be__', None) is t: + return np.dtype('>' + t._type_) + elif getattr(t, '__ctype_le__', None) is t: + return np.dtype('<' + t._type_) + else: + return np.dtype(t._type_) + + +def _from_ctypes_union(t): + import ctypes + formats = [] + offsets = [] + names = [] + for fname, ftyp in t._fields_: + names.append(fname) + formats.append(dtype_from_ctypes_type(ftyp)) + offsets.append(0) # Union fields are offset to 0 + + return np.dtype(dict( + formats=formats, + offsets=offsets, + names=names, + itemsize=ctypes.sizeof(t))) + + +def dtype_from_ctypes_type(t): + """ + Construct a dtype object from a ctypes type + """ + import _ctypes + if issubclass(t, _ctypes.Array): + return _from_ctypes_array(t) + elif issubclass(t, _ctypes._Pointer): + raise TypeError("ctypes pointers have no dtype equivalent") + elif issubclass(t, _ctypes.Structure): + return _from_ctypes_structure(t) + elif issubclass(t, _ctypes.Union): + return _from_ctypes_union(t) + elif isinstance(getattr(t, '_type_', None), str): + return _from_ctypes_scalar(t) + else: + raise NotImplementedError( + "Unknown ctypes type {}".format(t.__name__)) diff --git a/.local/lib/python3.8/site-packages/numpy/core/_exceptions.py b/.local/lib/python3.8/site-packages/numpy/core/_exceptions.py new file mode 100644 index 0000000..3cd8042 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_exceptions.py @@ -0,0 +1,271 @@ +""" +Various richly-typed exceptions, that also help us deal with string formatting +in python where it's easier. + +By putting the formatting in `__str__`, we also avoid paying the cost for +users who silence the exceptions. +""" +from numpy.core.overrides import set_module + +def _unpack_tuple(tup): + if len(tup) == 1: + return tup[0] + else: + return tup + + +def _display_as_base(cls): + """ + A decorator that makes an exception class look like its base. + + We use this to hide subclasses that are implementation details - the user + should catch the base type, which is what the traceback will show them. + + Classes decorated with this decorator are subject to removal without a + deprecation warning. + """ + assert issubclass(cls, Exception) + cls.__name__ = cls.__base__.__name__ + return cls + + +class UFuncTypeError(TypeError): + """ Base class for all ufunc exceptions """ + def __init__(self, ufunc): + self.ufunc = ufunc + + +@_display_as_base +class _UFuncBinaryResolutionError(UFuncTypeError): + """ Thrown when a binary resolution fails """ + def __init__(self, ufunc, dtypes): + super().__init__(ufunc) + self.dtypes = tuple(dtypes) + assert len(self.dtypes) == 2 + + def __str__(self): + return ( + "ufunc {!r} cannot use operands with types {!r} and {!r}" + ).format( + self.ufunc.__name__, *self.dtypes + ) + + +@_display_as_base +class _UFuncNoLoopError(UFuncTypeError): + """ Thrown when a ufunc loop cannot be found """ + def __init__(self, ufunc, dtypes): + super().__init__(ufunc) + self.dtypes = tuple(dtypes) + + def __str__(self): + return ( + "ufunc {!r} did not contain a loop with signature matching types " + "{!r} -> {!r}" + ).format( + self.ufunc.__name__, + _unpack_tuple(self.dtypes[:self.ufunc.nin]), + _unpack_tuple(self.dtypes[self.ufunc.nin:]) + ) + + +@_display_as_base +class _UFuncCastingError(UFuncTypeError): + def __init__(self, ufunc, casting, from_, to): + super().__init__(ufunc) + self.casting = casting + self.from_ = from_ + self.to = to + + +@_display_as_base +class _UFuncInputCastingError(_UFuncCastingError): + """ Thrown when a ufunc input cannot be casted """ + def __init__(self, ufunc, casting, from_, to, i): + super().__init__(ufunc, casting, from_, to) + self.in_i = i + + def __str__(self): + # only show the number if more than one input exists + i_str = "{} ".format(self.in_i) if self.ufunc.nin != 1 else "" + return ( + "Cannot cast ufunc {!r} input {}from {!r} to {!r} with casting " + "rule {!r}" + ).format( + self.ufunc.__name__, i_str, self.from_, self.to, self.casting + ) + + +@_display_as_base +class _UFuncOutputCastingError(_UFuncCastingError): + """ Thrown when a ufunc output cannot be casted """ + def __init__(self, ufunc, casting, from_, to, i): + super().__init__(ufunc, casting, from_, to) + self.out_i = i + + def __str__(self): + # only show the number if more than one output exists + i_str = "{} ".format(self.out_i) if self.ufunc.nout != 1 else "" + return ( + "Cannot cast ufunc {!r} output {}from {!r} to {!r} with casting " + "rule {!r}" + ).format( + self.ufunc.__name__, i_str, self.from_, self.to, self.casting + ) + + +# Exception used in shares_memory() +@set_module('numpy') +class TooHardError(RuntimeError): + pass + + +@set_module('numpy') +class AxisError(ValueError, IndexError): + """Axis supplied was invalid. + + This is raised whenever an ``axis`` parameter is specified that is larger + than the number of array dimensions. + For compatibility with code written against older numpy versions, which + raised a mixture of `ValueError` and `IndexError` for this situation, this + exception subclasses both to ensure that ``except ValueError`` and + ``except IndexError`` statements continue to catch `AxisError`. + + .. versionadded:: 1.13 + + Parameters + ---------- + axis : int or str + The out of bounds axis or a custom exception message. + If an axis is provided, then `ndim` should be specified as well. + ndim : int, optional + The number of array dimensions. + msg_prefix : str, optional + A prefix for the exception message. + + Attributes + ---------- + axis : int, optional + The out of bounds axis or ``None`` if a custom exception + message was provided. This should be the axis as passed by + the user, before any normalization to resolve negative indices. + + .. versionadded:: 1.22 + ndim : int, optional + The number of array dimensions or ``None`` if a custom exception + message was provided. + + .. versionadded:: 1.22 + + + Examples + -------- + >>> array_1d = np.arange(10) + >>> np.cumsum(array_1d, axis=1) + Traceback (most recent call last): + ... + numpy.AxisError: axis 1 is out of bounds for array of dimension 1 + + Negative axes are preserved: + + >>> np.cumsum(array_1d, axis=-2) + Traceback (most recent call last): + ... + numpy.AxisError: axis -2 is out of bounds for array of dimension 1 + + The class constructor generally takes the axis and arrays' + dimensionality as arguments: + + >>> print(np.AxisError(2, 1, msg_prefix='error')) + error: axis 2 is out of bounds for array of dimension 1 + + Alternatively, a custom exception message can be passed: + + >>> print(np.AxisError('Custom error message')) + Custom error message + + """ + + __slots__ = ("axis", "ndim", "_msg") + + def __init__(self, axis, ndim=None, msg_prefix=None): + if ndim is msg_prefix is None: + # single-argument form: directly set the error message + self._msg = axis + self.axis = None + self.ndim = None + else: + self._msg = msg_prefix + self.axis = axis + self.ndim = ndim + + def __str__(self): + axis = self.axis + ndim = self.ndim + + if axis is ndim is None: + return self._msg + else: + msg = f"axis {axis} is out of bounds for array of dimension {ndim}" + if self._msg is not None: + msg = f"{self._msg}: {msg}" + return msg + + +@_display_as_base +class _ArrayMemoryError(MemoryError): + """ Thrown when an array cannot be allocated""" + def __init__(self, shape, dtype): + self.shape = shape + self.dtype = dtype + + @property + def _total_size(self): + num_bytes = self.dtype.itemsize + for dim in self.shape: + num_bytes *= dim + return num_bytes + + @staticmethod + def _size_to_string(num_bytes): + """ Convert a number of bytes into a binary size string """ + + # https://en.wikipedia.org/wiki/Binary_prefix + LOG2_STEP = 10 + STEP = 1024 + units = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB'] + + unit_i = max(num_bytes.bit_length() - 1, 1) // LOG2_STEP + unit_val = 1 << (unit_i * LOG2_STEP) + n_units = num_bytes / unit_val + del unit_val + + # ensure we pick a unit that is correct after rounding + if round(n_units) == STEP: + unit_i += 1 + n_units /= STEP + + # deal with sizes so large that we don't have units for them + if unit_i >= len(units): + new_unit_i = len(units) - 1 + n_units *= 1 << ((unit_i - new_unit_i) * LOG2_STEP) + unit_i = new_unit_i + + unit_name = units[unit_i] + # format with a sensible number of digits + if unit_i == 0: + # no decimal point on bytes + return '{:.0f} {}'.format(n_units, unit_name) + elif round(n_units) < 1000: + # 3 significant figures, if none are dropped to the left of the . + return '{:#.3g} {}'.format(n_units, unit_name) + else: + # just give all the digits otherwise + return '{:#.0f} {}'.format(n_units, unit_name) + + def __str__(self): + size_str = self._size_to_string(self._total_size) + return ( + "Unable to allocate {} for an array with shape {} and data type {}" + .format(size_str, self.shape, self.dtype) + ) diff --git a/.local/lib/python3.8/site-packages/numpy/core/_internal.py b/.local/lib/python3.8/site-packages/numpy/core/_internal.py new file mode 100644 index 0000000..8942955 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_internal.py @@ -0,0 +1,878 @@ +""" +A place for internal code + +Some things are more easily handled Python. + +""" +import ast +import re +import sys +import platform +import warnings + +from .multiarray import dtype, array, ndarray +try: + import ctypes +except ImportError: + ctypes = None + +IS_PYPY = platform.python_implementation() == 'PyPy' + +if sys.byteorder == 'little': + _nbo = '<' +else: + _nbo = '>' + +def _makenames_list(adict, align): + allfields = [] + + for fname, obj in adict.items(): + n = len(obj) + if not isinstance(obj, tuple) or n not in (2, 3): + raise ValueError("entry not a 2- or 3- tuple") + if n > 2 and obj[2] == fname: + continue + num = int(obj[1]) + if num < 0: + raise ValueError("invalid offset.") + format = dtype(obj[0], align=align) + if n > 2: + title = obj[2] + else: + title = None + allfields.append((fname, format, num, title)) + # sort by offsets + allfields.sort(key=lambda x: x[2]) + names = [x[0] for x in allfields] + formats = [x[1] for x in allfields] + offsets = [x[2] for x in allfields] + titles = [x[3] for x in allfields] + + return names, formats, offsets, titles + +# Called in PyArray_DescrConverter function when +# a dictionary without "names" and "formats" +# fields is used as a data-type descriptor. +def _usefields(adict, align): + try: + names = adict[-1] + except KeyError: + names = None + if names is None: + names, formats, offsets, titles = _makenames_list(adict, align) + else: + formats = [] + offsets = [] + titles = [] + for name in names: + res = adict[name] + formats.append(res[0]) + offsets.append(res[1]) + if len(res) > 2: + titles.append(res[2]) + else: + titles.append(None) + + return dtype({"names": names, + "formats": formats, + "offsets": offsets, + "titles": titles}, align) + + +# construct an array_protocol descriptor list +# from the fields attribute of a descriptor +# This calls itself recursively but should eventually hit +# a descriptor that has no fields and then return +# a simple typestring + +def _array_descr(descriptor): + fields = descriptor.fields + if fields is None: + subdtype = descriptor.subdtype + if subdtype is None: + if descriptor.metadata is None: + return descriptor.str + else: + new = descriptor.metadata.copy() + if new: + return (descriptor.str, new) + else: + return descriptor.str + else: + return (_array_descr(subdtype[0]), subdtype[1]) + + names = descriptor.names + ordered_fields = [fields[x] + (x,) for x in names] + result = [] + offset = 0 + for field in ordered_fields: + if field[1] > offset: + num = field[1] - offset + result.append(('', f'|V{num}')) + offset += num + elif field[1] < offset: + raise ValueError( + "dtype.descr is not defined for types with overlapping or " + "out-of-order fields") + if len(field) > 3: + name = (field[2], field[3]) + else: + name = field[2] + if field[0].subdtype: + tup = (name, _array_descr(field[0].subdtype[0]), + field[0].subdtype[1]) + else: + tup = (name, _array_descr(field[0])) + offset += field[0].itemsize + result.append(tup) + + if descriptor.itemsize > offset: + num = descriptor.itemsize - offset + result.append(('', f'|V{num}')) + + return result + +# Build a new array from the information in a pickle. +# Note that the name numpy.core._internal._reconstruct is embedded in +# pickles of ndarrays made with NumPy before release 1.0 +# so don't remove the name here, or you'll +# break backward compatibility. +def _reconstruct(subtype, shape, dtype): + return ndarray.__new__(subtype, shape, dtype) + + +# format_re was originally from numarray by J. Todd Miller + +format_re = re.compile(r'(?P[<>|=]?)' + r'(?P *[(]?[ ,0-9]*[)]? *)' + r'(?P[<>|=]?)' + r'(?P[A-Za-z0-9.?]*(?:\[[a-zA-Z0-9,.]+\])?)') +sep_re = re.compile(r'\s*,\s*') +space_re = re.compile(r'\s+$') + +# astr is a string (perhaps comma separated) + +_convorder = {'=': _nbo} + +def _commastring(astr): + startindex = 0 + result = [] + while startindex < len(astr): + mo = format_re.match(astr, pos=startindex) + try: + (order1, repeats, order2, dtype) = mo.groups() + except (TypeError, AttributeError): + raise ValueError( + f'format number {len(result)+1} of "{astr}" is not recognized' + ) from None + startindex = mo.end() + # Separator or ending padding + if startindex < len(astr): + if space_re.match(astr, pos=startindex): + startindex = len(astr) + else: + mo = sep_re.match(astr, pos=startindex) + if not mo: + raise ValueError( + 'format number %d of "%s" is not recognized' % + (len(result)+1, astr)) + startindex = mo.end() + + if order2 == '': + order = order1 + elif order1 == '': + order = order2 + else: + order1 = _convorder.get(order1, order1) + order2 = _convorder.get(order2, order2) + if (order1 != order2): + raise ValueError( + 'inconsistent byte-order specification %s and %s' % + (order1, order2)) + order = order1 + + if order in ('|', '=', _nbo): + order = '' + dtype = order + dtype + if (repeats == ''): + newitem = dtype + else: + newitem = (dtype, ast.literal_eval(repeats)) + result.append(newitem) + + return result + +class dummy_ctype: + def __init__(self, cls): + self._cls = cls + def __mul__(self, other): + return self + def __call__(self, *other): + return self._cls(other) + def __eq__(self, other): + return self._cls == other._cls + def __ne__(self, other): + return self._cls != other._cls + +def _getintp_ctype(): + val = _getintp_ctype.cache + if val is not None: + return val + if ctypes is None: + import numpy as np + val = dummy_ctype(np.intp) + else: + char = dtype('p').char + if char == 'i': + val = ctypes.c_int + elif char == 'l': + val = ctypes.c_long + elif char == 'q': + val = ctypes.c_longlong + else: + val = ctypes.c_long + _getintp_ctype.cache = val + return val +_getintp_ctype.cache = None + +# Used for .ctypes attribute of ndarray + +class _missing_ctypes: + def cast(self, num, obj): + return num.value + + class c_void_p: + def __init__(self, ptr): + self.value = ptr + + +class _ctypes: + def __init__(self, array, ptr=None): + self._arr = array + + if ctypes: + self._ctypes = ctypes + self._data = self._ctypes.c_void_p(ptr) + else: + # fake a pointer-like object that holds onto the reference + self._ctypes = _missing_ctypes() + self._data = self._ctypes.c_void_p(ptr) + self._data._objects = array + + if self._arr.ndim == 0: + self._zerod = True + else: + self._zerod = False + + def data_as(self, obj): + """ + Return the data pointer cast to a particular c-types object. + For example, calling ``self._as_parameter_`` is equivalent to + ``self.data_as(ctypes.c_void_p)``. Perhaps you want to use the data as a + pointer to a ctypes array of floating-point data: + ``self.data_as(ctypes.POINTER(ctypes.c_double))``. + + The returned pointer will keep a reference to the array. + """ + # _ctypes.cast function causes a circular reference of self._data in + # self._data._objects. Attributes of self._data cannot be released + # until gc.collect is called. Make a copy of the pointer first then let + # it hold the array reference. This is a workaround to circumvent the + # CPython bug https://bugs.python.org/issue12836 + ptr = self._ctypes.cast(self._data, obj) + ptr._arr = self._arr + return ptr + + def shape_as(self, obj): + """ + Return the shape tuple as an array of some other c-types + type. For example: ``self.shape_as(ctypes.c_short)``. + """ + if self._zerod: + return None + return (obj*self._arr.ndim)(*self._arr.shape) + + def strides_as(self, obj): + """ + Return the strides tuple as an array of some other + c-types type. For example: ``self.strides_as(ctypes.c_longlong)``. + """ + if self._zerod: + return None + return (obj*self._arr.ndim)(*self._arr.strides) + + @property + def data(self): + """ + A pointer to the memory area of the array as a Python integer. + This memory area may contain data that is not aligned, or not in correct + byte-order. The memory area may not even be writeable. The array + flags and data-type of this array should be respected when passing this + attribute to arbitrary C-code to avoid trouble that can include Python + crashing. User Beware! The value of this attribute is exactly the same + as ``self._array_interface_['data'][0]``. + + Note that unlike ``data_as``, a reference will not be kept to the array: + code like ``ctypes.c_void_p((a + b).ctypes.data)`` will result in a + pointer to a deallocated array, and should be spelt + ``(a + b).ctypes.data_as(ctypes.c_void_p)`` + """ + return self._data.value + + @property + def shape(self): + """ + (c_intp*self.ndim): A ctypes array of length self.ndim where + the basetype is the C-integer corresponding to ``dtype('p')`` on this + platform (see `~numpy.ctypeslib.c_intp`). This base-type could be + `ctypes.c_int`, `ctypes.c_long`, or `ctypes.c_longlong` depending on + the platform. The ctypes array contains the shape of + the underlying array. + """ + return self.shape_as(_getintp_ctype()) + + @property + def strides(self): + """ + (c_intp*self.ndim): A ctypes array of length self.ndim where + the basetype is the same as for the shape attribute. This ctypes array + contains the strides information from the underlying array. This strides + information is important for showing how many bytes must be jumped to + get to the next element in the array. + """ + return self.strides_as(_getintp_ctype()) + + @property + def _as_parameter_(self): + """ + Overrides the ctypes semi-magic method + + Enables `c_func(some_array.ctypes)` + """ + return self.data_as(ctypes.c_void_p) + + # Numpy 1.21.0, 2021-05-18 + + def get_data(self): + """Deprecated getter for the `_ctypes.data` property. + + .. deprecated:: 1.21 + """ + warnings.warn('"get_data" is deprecated. Use "data" instead', + DeprecationWarning, stacklevel=2) + return self.data + + def get_shape(self): + """Deprecated getter for the `_ctypes.shape` property. + + .. deprecated:: 1.21 + """ + warnings.warn('"get_shape" is deprecated. Use "shape" instead', + DeprecationWarning, stacklevel=2) + return self.shape + + def get_strides(self): + """Deprecated getter for the `_ctypes.strides` property. + + .. deprecated:: 1.21 + """ + warnings.warn('"get_strides" is deprecated. Use "strides" instead', + DeprecationWarning, stacklevel=2) + return self.strides + + def get_as_parameter(self): + """Deprecated getter for the `_ctypes._as_parameter_` property. + + .. deprecated:: 1.21 + """ + warnings.warn( + '"get_as_parameter" is deprecated. Use "_as_parameter_" instead', + DeprecationWarning, stacklevel=2, + ) + return self._as_parameter_ + + +def _newnames(datatype, order): + """ + Given a datatype and an order object, return a new names tuple, with the + order indicated + """ + oldnames = datatype.names + nameslist = list(oldnames) + if isinstance(order, str): + order = [order] + seen = set() + if isinstance(order, (list, tuple)): + for name in order: + try: + nameslist.remove(name) + except ValueError: + if name in seen: + raise ValueError(f"duplicate field name: {name}") from None + else: + raise ValueError(f"unknown field name: {name}") from None + seen.add(name) + return tuple(list(order) + nameslist) + raise ValueError(f"unsupported order value: {order}") + +def _copy_fields(ary): + """Return copy of structured array with padding between fields removed. + + Parameters + ---------- + ary : ndarray + Structured array from which to remove padding bytes + + Returns + ------- + ary_copy : ndarray + Copy of ary with padding bytes removed + """ + dt = ary.dtype + copy_dtype = {'names': dt.names, + 'formats': [dt.fields[name][0] for name in dt.names]} + return array(ary, dtype=copy_dtype, copy=True) + +def _getfield_is_safe(oldtype, newtype, offset): + """ Checks safety of getfield for object arrays. + + As in _view_is_safe, we need to check that memory containing objects is not + reinterpreted as a non-object datatype and vice versa. + + Parameters + ---------- + oldtype : data-type + Data type of the original ndarray. + newtype : data-type + Data type of the field being accessed by ndarray.getfield + offset : int + Offset of the field being accessed by ndarray.getfield + + Raises + ------ + TypeError + If the field access is invalid + + """ + if newtype.hasobject or oldtype.hasobject: + if offset == 0 and newtype == oldtype: + return + if oldtype.names is not None: + for name in oldtype.names: + if (oldtype.fields[name][1] == offset and + oldtype.fields[name][0] == newtype): + return + raise TypeError("Cannot get/set field of an object array") + return + +def _view_is_safe(oldtype, newtype): + """ Checks safety of a view involving object arrays, for example when + doing:: + + np.zeros(10, dtype=oldtype).view(newtype) + + Parameters + ---------- + oldtype : data-type + Data type of original ndarray + newtype : data-type + Data type of the view + + Raises + ------ + TypeError + If the new type is incompatible with the old type. + + """ + + # if the types are equivalent, there is no problem. + # for example: dtype((np.record, 'i4,i4')) == dtype((np.void, 'i4,i4')) + if oldtype == newtype: + return + + if newtype.hasobject or oldtype.hasobject: + raise TypeError("Cannot change data-type for object array.") + return + +# Given a string containing a PEP 3118 format specifier, +# construct a NumPy dtype + +_pep3118_native_map = { + '?': '?', + 'c': 'S1', + 'b': 'b', + 'B': 'B', + 'h': 'h', + 'H': 'H', + 'i': 'i', + 'I': 'I', + 'l': 'l', + 'L': 'L', + 'q': 'q', + 'Q': 'Q', + 'e': 'e', + 'f': 'f', + 'd': 'd', + 'g': 'g', + 'Zf': 'F', + 'Zd': 'D', + 'Zg': 'G', + 's': 'S', + 'w': 'U', + 'O': 'O', + 'x': 'V', # padding +} +_pep3118_native_typechars = ''.join(_pep3118_native_map.keys()) + +_pep3118_standard_map = { + '?': '?', + 'c': 'S1', + 'b': 'b', + 'B': 'B', + 'h': 'i2', + 'H': 'u2', + 'i': 'i4', + 'I': 'u4', + 'l': 'i4', + 'L': 'u4', + 'q': 'i8', + 'Q': 'u8', + 'e': 'f2', + 'f': 'f', + 'd': 'd', + 'Zf': 'F', + 'Zd': 'D', + 's': 'S', + 'w': 'U', + 'O': 'O', + 'x': 'V', # padding +} +_pep3118_standard_typechars = ''.join(_pep3118_standard_map.keys()) + +_pep3118_unsupported_map = { + 'u': 'UCS-2 strings', + '&': 'pointers', + 't': 'bitfields', + 'X': 'function pointers', +} + +class _Stream: + def __init__(self, s): + self.s = s + self.byteorder = '@' + + def advance(self, n): + res = self.s[:n] + self.s = self.s[n:] + return res + + def consume(self, c): + if self.s[:len(c)] == c: + self.advance(len(c)) + return True + return False + + def consume_until(self, c): + if callable(c): + i = 0 + while i < len(self.s) and not c(self.s[i]): + i = i + 1 + return self.advance(i) + else: + i = self.s.index(c) + res = self.advance(i) + self.advance(len(c)) + return res + + @property + def next(self): + return self.s[0] + + def __bool__(self): + return bool(self.s) + + +def _dtype_from_pep3118(spec): + stream = _Stream(spec) + dtype, align = __dtype_from_pep3118(stream, is_subdtype=False) + return dtype + +def __dtype_from_pep3118(stream, is_subdtype): + field_spec = dict( + names=[], + formats=[], + offsets=[], + itemsize=0 + ) + offset = 0 + common_alignment = 1 + is_padding = False + + # Parse spec + while stream: + value = None + + # End of structure, bail out to upper level + if stream.consume('}'): + break + + # Sub-arrays (1) + shape = None + if stream.consume('('): + shape = stream.consume_until(')') + shape = tuple(map(int, shape.split(','))) + + # Byte order + if stream.next in ('@', '=', '<', '>', '^', '!'): + byteorder = stream.advance(1) + if byteorder == '!': + byteorder = '>' + stream.byteorder = byteorder + + # Byte order characters also control native vs. standard type sizes + if stream.byteorder in ('@', '^'): + type_map = _pep3118_native_map + type_map_chars = _pep3118_native_typechars + else: + type_map = _pep3118_standard_map + type_map_chars = _pep3118_standard_typechars + + # Item sizes + itemsize_str = stream.consume_until(lambda c: not c.isdigit()) + if itemsize_str: + itemsize = int(itemsize_str) + else: + itemsize = 1 + + # Data types + is_padding = False + + if stream.consume('T{'): + value, align = __dtype_from_pep3118( + stream, is_subdtype=True) + elif stream.next in type_map_chars: + if stream.next == 'Z': + typechar = stream.advance(2) + else: + typechar = stream.advance(1) + + is_padding = (typechar == 'x') + dtypechar = type_map[typechar] + if dtypechar in 'USV': + dtypechar += '%d' % itemsize + itemsize = 1 + numpy_byteorder = {'@': '=', '^': '='}.get( + stream.byteorder, stream.byteorder) + value = dtype(numpy_byteorder + dtypechar) + align = value.alignment + elif stream.next in _pep3118_unsupported_map: + desc = _pep3118_unsupported_map[stream.next] + raise NotImplementedError( + "Unrepresentable PEP 3118 data type {!r} ({})" + .format(stream.next, desc)) + else: + raise ValueError("Unknown PEP 3118 data type specifier %r" % stream.s) + + # + # Native alignment may require padding + # + # Here we assume that the presence of a '@' character implicitly implies + # that the start of the array is *already* aligned. + # + extra_offset = 0 + if stream.byteorder == '@': + start_padding = (-offset) % align + intra_padding = (-value.itemsize) % align + + offset += start_padding + + if intra_padding != 0: + if itemsize > 1 or (shape is not None and _prod(shape) > 1): + # Inject internal padding to the end of the sub-item + value = _add_trailing_padding(value, intra_padding) + else: + # We can postpone the injection of internal padding, + # as the item appears at most once + extra_offset += intra_padding + + # Update common alignment + common_alignment = _lcm(align, common_alignment) + + # Convert itemsize to sub-array + if itemsize != 1: + value = dtype((value, (itemsize,))) + + # Sub-arrays (2) + if shape is not None: + value = dtype((value, shape)) + + # Field name + if stream.consume(':'): + name = stream.consume_until(':') + else: + name = None + + if not (is_padding and name is None): + if name is not None and name in field_spec['names']: + raise RuntimeError(f"Duplicate field name '{name}' in PEP3118 format") + field_spec['names'].append(name) + field_spec['formats'].append(value) + field_spec['offsets'].append(offset) + + offset += value.itemsize + offset += extra_offset + + field_spec['itemsize'] = offset + + # extra final padding for aligned types + if stream.byteorder == '@': + field_spec['itemsize'] += (-offset) % common_alignment + + # Check if this was a simple 1-item type, and unwrap it + if (field_spec['names'] == [None] + and field_spec['offsets'][0] == 0 + and field_spec['itemsize'] == field_spec['formats'][0].itemsize + and not is_subdtype): + ret = field_spec['formats'][0] + else: + _fix_names(field_spec) + ret = dtype(field_spec) + + # Finished + return ret, common_alignment + +def _fix_names(field_spec): + """ Replace names which are None with the next unused f%d name """ + names = field_spec['names'] + for i, name in enumerate(names): + if name is not None: + continue + + j = 0 + while True: + name = f'f{j}' + if name not in names: + break + j = j + 1 + names[i] = name + +def _add_trailing_padding(value, padding): + """Inject the specified number of padding bytes at the end of a dtype""" + if value.fields is None: + field_spec = dict( + names=['f0'], + formats=[value], + offsets=[0], + itemsize=value.itemsize + ) + else: + fields = value.fields + names = value.names + field_spec = dict( + names=names, + formats=[fields[name][0] for name in names], + offsets=[fields[name][1] for name in names], + itemsize=value.itemsize + ) + + field_spec['itemsize'] += padding + return dtype(field_spec) + +def _prod(a): + p = 1 + for x in a: + p *= x + return p + +def _gcd(a, b): + """Calculate the greatest common divisor of a and b""" + while b: + a, b = b, a % b + return a + +def _lcm(a, b): + return a // _gcd(a, b) * b + +def array_ufunc_errmsg_formatter(dummy, ufunc, method, *inputs, **kwargs): + """ Format the error message for when __array_ufunc__ gives up. """ + args_string = ', '.join(['{!r}'.format(arg) for arg in inputs] + + ['{}={!r}'.format(k, v) + for k, v in kwargs.items()]) + args = inputs + kwargs.get('out', ()) + types_string = ', '.join(repr(type(arg).__name__) for arg in args) + return ('operand type(s) all returned NotImplemented from ' + '__array_ufunc__({!r}, {!r}, {}): {}' + .format(ufunc, method, args_string, types_string)) + + +def array_function_errmsg_formatter(public_api, types): + """ Format the error message for when __array_ufunc__ gives up. """ + func_name = '{}.{}'.format(public_api.__module__, public_api.__name__) + return ("no implementation found for '{}' on types that implement " + '__array_function__: {}'.format(func_name, list(types))) + + +def _ufunc_doc_signature_formatter(ufunc): + """ + Builds a signature string which resembles PEP 457 + + This is used to construct the first line of the docstring + """ + + # input arguments are simple + if ufunc.nin == 1: + in_args = 'x' + else: + in_args = ', '.join(f'x{i+1}' for i in range(ufunc.nin)) + + # output arguments are both keyword or positional + if ufunc.nout == 0: + out_args = ', /, out=()' + elif ufunc.nout == 1: + out_args = ', /, out=None' + else: + out_args = '[, {positional}], / [, out={default}]'.format( + positional=', '.join( + 'out{}'.format(i+1) for i in range(ufunc.nout)), + default=repr((None,)*ufunc.nout) + ) + + # keyword only args depend on whether this is a gufunc + kwargs = ( + ", casting='same_kind'" + ", order='K'" + ", dtype=None" + ", subok=True" + ) + + # NOTE: gufuncs may or may not support the `axis` parameter + if ufunc.signature is None: + kwargs = f", where=True{kwargs}[, signature, extobj]" + else: + kwargs += "[, signature, extobj, axes, axis]" + + # join all the parts together + return '{name}({in_args}{out_args}, *{kwargs})'.format( + name=ufunc.__name__, + in_args=in_args, + out_args=out_args, + kwargs=kwargs + ) + + +def npy_ctypes_check(cls): + # determine if a class comes from ctypes, in order to work around + # a bug in the buffer protocol for those objects, bpo-10746 + try: + # ctypes class are new-style, so have an __mro__. This probably fails + # for ctypes classes with multiple inheritance. + if IS_PYPY: + # (..., _ctypes.basics._CData, Bufferable, object) + ctype_base = cls.__mro__[-3] + else: + # # (..., _ctypes._CData, object) + ctype_base = cls.__mro__[-2] + # right now, they're part of the _ctypes module + return '_ctypes' in ctype_base.__module__ + except Exception: + return False diff --git a/.local/lib/python3.8/site-packages/numpy/core/_internal.pyi b/.local/lib/python3.8/site-packages/numpy/core/_internal.pyi new file mode 100644 index 0000000..f4bfd77 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_internal.pyi @@ -0,0 +1,30 @@ +from typing import Any, TypeVar, Type, overload, Optional, Generic +import ctypes as ct + +from numpy import ndarray +from numpy.ctypeslib import c_intp + +_CastT = TypeVar("_CastT", bound=ct._CanCastTo) # Copied from `ctypes.cast` +_CT = TypeVar("_CT", bound=ct._CData) +_PT = TypeVar("_PT", bound=Optional[int]) + +# TODO: Let the likes of `shape_as` and `strides_as` return `None` +# for 0D arrays once we've got shape-support + +class _ctypes(Generic[_PT]): + @overload + def __new__(cls, array: ndarray[Any, Any], ptr: None = ...) -> _ctypes[None]: ... + @overload + def __new__(cls, array: ndarray[Any, Any], ptr: _PT) -> _ctypes[_PT]: ... + @property + def data(self) -> _PT: ... + @property + def shape(self) -> ct.Array[c_intp]: ... + @property + def strides(self) -> ct.Array[c_intp]: ... + @property + def _as_parameter_(self) -> ct.c_void_p: ... + + def data_as(self, obj: Type[_CastT]) -> _CastT: ... + def shape_as(self, obj: Type[_CT]) -> ct.Array[_CT]: ... + def strides_as(self, obj: Type[_CT]) -> ct.Array[_CT]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/core/_machar.py b/.local/lib/python3.8/site-packages/numpy/core/_machar.py new file mode 100644 index 0000000..ace19a4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_machar.py @@ -0,0 +1,355 @@ +""" +Machine arithmetic - determine the parameters of the +floating-point arithmetic system + +Author: Pearu Peterson, September 2003 + +""" +__all__ = ['MachAr'] + +from numpy.core.fromnumeric import any +from numpy.core._ufunc_config import errstate +from numpy.core.overrides import set_module + +# Need to speed this up...especially for longfloat + +# Deprecated 2021-10-20, NumPy 1.22 +@set_module('numpy') +class MachAr: + """ + Diagnosing machine parameters. + + Attributes + ---------- + ibeta : int + Radix in which numbers are represented. + it : int + Number of base-`ibeta` digits in the floating point mantissa M. + machep : int + Exponent of the smallest (most negative) power of `ibeta` that, + added to 1.0, gives something different from 1.0 + eps : float + Floating-point number ``beta**machep`` (floating point precision) + negep : int + Exponent of the smallest power of `ibeta` that, subtracted + from 1.0, gives something different from 1.0. + epsneg : float + Floating-point number ``beta**negep``. + iexp : int + Number of bits in the exponent (including its sign and bias). + minexp : int + Smallest (most negative) power of `ibeta` consistent with there + being no leading zeros in the mantissa. + xmin : float + Floating-point number ``beta**minexp`` (the smallest [in + magnitude] positive floating point number with full precision). + maxexp : int + Smallest (positive) power of `ibeta` that causes overflow. + xmax : float + ``(1-epsneg) * beta**maxexp`` (the largest [in magnitude] + usable floating value). + irnd : int + In ``range(6)``, information on what kind of rounding is done + in addition, and on how underflow is handled. + ngrd : int + Number of 'guard digits' used when truncating the product + of two mantissas to fit the representation. + epsilon : float + Same as `eps`. + tiny : float + An alias for `smallest_normal`, kept for backwards compatibility. + huge : float + Same as `xmax`. + precision : float + ``- int(-log10(eps))`` + resolution : float + ``- 10**(-precision)`` + smallest_normal : float + The smallest positive floating point number with 1 as leading bit in + the mantissa following IEEE-754. Same as `xmin`. + smallest_subnormal : float + The smallest positive floating point number with 0 as leading bit in + the mantissa following IEEE-754. + + Parameters + ---------- + float_conv : function, optional + Function that converts an integer or integer array to a float + or float array. Default is `float`. + int_conv : function, optional + Function that converts a float or float array to an integer or + integer array. Default is `int`. + float_to_float : function, optional + Function that converts a float array to float. Default is `float`. + Note that this does not seem to do anything useful in the current + implementation. + float_to_str : function, optional + Function that converts a single float to a string. Default is + ``lambda v:'%24.16e' %v``. + title : str, optional + Title that is printed in the string representation of `MachAr`. + + See Also + -------- + finfo : Machine limits for floating point types. + iinfo : Machine limits for integer types. + + References + ---------- + .. [1] Press, Teukolsky, Vetterling and Flannery, + "Numerical Recipes in C++," 2nd ed, + Cambridge University Press, 2002, p. 31. + + """ + + def __init__(self, float_conv=float,int_conv=int, + float_to_float=float, + float_to_str=lambda v:'%24.16e' % v, + title='Python floating point number'): + """ + + float_conv - convert integer to float (array) + int_conv - convert float (array) to integer + float_to_float - convert float array to float + float_to_str - convert array float to str + title - description of used floating point numbers + + """ + # We ignore all errors here because we are purposely triggering + # underflow to detect the properties of the runninng arch. + with errstate(under='ignore'): + self._do_init(float_conv, int_conv, float_to_float, float_to_str, title) + + def _do_init(self, float_conv, int_conv, float_to_float, float_to_str, title): + max_iterN = 10000 + msg = "Did not converge after %d tries with %s" + one = float_conv(1) + two = one + one + zero = one - one + + # Do we really need to do this? Aren't they 2 and 2.0? + # Determine ibeta and beta + a = one + for _ in range(max_iterN): + a = a + a + temp = a + one + temp1 = temp - a + if any(temp1 - one != zero): + break + else: + raise RuntimeError(msg % (_, one.dtype)) + b = one + for _ in range(max_iterN): + b = b + b + temp = a + b + itemp = int_conv(temp-a) + if any(itemp != 0): + break + else: + raise RuntimeError(msg % (_, one.dtype)) + ibeta = itemp + beta = float_conv(ibeta) + + # Determine it and irnd + it = -1 + b = one + for _ in range(max_iterN): + it = it + 1 + b = b * beta + temp = b + one + temp1 = temp - b + if any(temp1 - one != zero): + break + else: + raise RuntimeError(msg % (_, one.dtype)) + + betah = beta / two + a = one + for _ in range(max_iterN): + a = a + a + temp = a + one + temp1 = temp - a + if any(temp1 - one != zero): + break + else: + raise RuntimeError(msg % (_, one.dtype)) + temp = a + betah + irnd = 0 + if any(temp-a != zero): + irnd = 1 + tempa = a + beta + temp = tempa + betah + if irnd == 0 and any(temp-tempa != zero): + irnd = 2 + + # Determine negep and epsneg + negep = it + 3 + betain = one / beta + a = one + for i in range(negep): + a = a * betain + b = a + for _ in range(max_iterN): + temp = one - a + if any(temp-one != zero): + break + a = a * beta + negep = negep - 1 + # Prevent infinite loop on PPC with gcc 4.0: + if negep < 0: + raise RuntimeError("could not determine machine tolerance " + "for 'negep', locals() -> %s" % (locals())) + else: + raise RuntimeError(msg % (_, one.dtype)) + negep = -negep + epsneg = a + + # Determine machep and eps + machep = - it - 3 + a = b + + for _ in range(max_iterN): + temp = one + a + if any(temp-one != zero): + break + a = a * beta + machep = machep + 1 + else: + raise RuntimeError(msg % (_, one.dtype)) + eps = a + + # Determine ngrd + ngrd = 0 + temp = one + eps + if irnd == 0 and any(temp*one - one != zero): + ngrd = 1 + + # Determine iexp + i = 0 + k = 1 + z = betain + t = one + eps + nxres = 0 + for _ in range(max_iterN): + y = z + z = y*y + a = z*one # Check here for underflow + temp = z*t + if any(a+a == zero) or any(abs(z) >= y): + break + temp1 = temp * betain + if any(temp1*beta == z): + break + i = i + 1 + k = k + k + else: + raise RuntimeError(msg % (_, one.dtype)) + if ibeta != 10: + iexp = i + 1 + mx = k + k + else: + iexp = 2 + iz = ibeta + while k >= iz: + iz = iz * ibeta + iexp = iexp + 1 + mx = iz + iz - 1 + + # Determine minexp and xmin + for _ in range(max_iterN): + xmin = y + y = y * betain + a = y * one + temp = y * t + if any((a + a) != zero) and any(abs(y) < xmin): + k = k + 1 + temp1 = temp * betain + if any(temp1*beta == y) and any(temp != y): + nxres = 3 + xmin = y + break + else: + break + else: + raise RuntimeError(msg % (_, one.dtype)) + minexp = -k + + # Determine maxexp, xmax + if mx <= k + k - 3 and ibeta != 10: + mx = mx + mx + iexp = iexp + 1 + maxexp = mx + minexp + irnd = irnd + nxres + if irnd >= 2: + maxexp = maxexp - 2 + i = maxexp + minexp + if ibeta == 2 and not i: + maxexp = maxexp - 1 + if i > 20: + maxexp = maxexp - 1 + if any(a != y): + maxexp = maxexp - 2 + xmax = one - epsneg + if any(xmax*one != xmax): + xmax = one - beta*epsneg + xmax = xmax / (xmin*beta*beta*beta) + i = maxexp + minexp + 3 + for j in range(i): + if ibeta == 2: + xmax = xmax + xmax + else: + xmax = xmax * beta + + smallest_subnormal = abs(xmin / beta ** (it)) + + self.ibeta = ibeta + self.it = it + self.negep = negep + self.epsneg = float_to_float(epsneg) + self._str_epsneg = float_to_str(epsneg) + self.machep = machep + self.eps = float_to_float(eps) + self._str_eps = float_to_str(eps) + self.ngrd = ngrd + self.iexp = iexp + self.minexp = minexp + self.xmin = float_to_float(xmin) + self._str_xmin = float_to_str(xmin) + self.maxexp = maxexp + self.xmax = float_to_float(xmax) + self._str_xmax = float_to_str(xmax) + self.irnd = irnd + + self.title = title + # Commonly used parameters + self.epsilon = self.eps + self.tiny = self.xmin + self.huge = self.xmax + self.smallest_normal = self.xmin + self.smallest_subnormal = float_to_float(smallest_subnormal) + + import math + self.precision = int(-math.log10(float_to_float(self.eps))) + ten = two + two + two + two + two + resolution = ten ** (-self.precision) + self.resolution = float_to_float(resolution) + self._str_resolution = float_to_str(resolution) + + def __str__(self): + fmt = ( + 'Machine parameters for %(title)s\n' + '---------------------------------------------------------------------\n' + 'ibeta=%(ibeta)s it=%(it)s iexp=%(iexp)s ngrd=%(ngrd)s irnd=%(irnd)s\n' + 'machep=%(machep)s eps=%(_str_eps)s (beta**machep == epsilon)\n' + 'negep =%(negep)s epsneg=%(_str_epsneg)s (beta**epsneg)\n' + 'minexp=%(minexp)s xmin=%(_str_xmin)s (beta**minexp == tiny)\n' + 'maxexp=%(maxexp)s xmax=%(_str_xmax)s ((1-epsneg)*beta**maxexp == huge)\n' + 'smallest_normal=%(smallest_normal)s ' + 'smallest_subnormal=%(smallest_subnormal)s\n' + '---------------------------------------------------------------------\n' + ) + return fmt % self.__dict__ + + +if __name__ == '__main__': + print(MachAr()) diff --git a/.local/lib/python3.8/site-packages/numpy/core/_methods.py b/.local/lib/python3.8/site-packages/numpy/core/_methods.py new file mode 100644 index 0000000..a239e2c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_methods.py @@ -0,0 +1,292 @@ +""" +Array methods which are called by both the C-code for the method +and the Python code for the NumPy-namespace function + +""" +import warnings +from contextlib import nullcontext + +from numpy.core import multiarray as mu +from numpy.core import umath as um +from numpy.core.multiarray import asanyarray +from numpy.core import numerictypes as nt +from numpy.core import _exceptions +from numpy._globals import _NoValue +from numpy.compat import pickle, os_fspath + +# save those O(100) nanoseconds! +umr_maximum = um.maximum.reduce +umr_minimum = um.minimum.reduce +umr_sum = um.add.reduce +umr_prod = um.multiply.reduce +umr_any = um.logical_or.reduce +umr_all = um.logical_and.reduce + +# Complex types to -> (2,)float view for fast-path computation in _var() +_complex_to_float = { + nt.dtype(nt.csingle) : nt.dtype(nt.single), + nt.dtype(nt.cdouble) : nt.dtype(nt.double), +} +# Special case for windows: ensure double takes precedence +if nt.dtype(nt.longdouble) != nt.dtype(nt.double): + _complex_to_float.update({ + nt.dtype(nt.clongdouble) : nt.dtype(nt.longdouble), + }) + +# avoid keyword arguments to speed up parsing, saves about 15%-20% for very +# small reductions +def _amax(a, axis=None, out=None, keepdims=False, + initial=_NoValue, where=True): + return umr_maximum(a, axis, None, out, keepdims, initial, where) + +def _amin(a, axis=None, out=None, keepdims=False, + initial=_NoValue, where=True): + return umr_minimum(a, axis, None, out, keepdims, initial, where) + +def _sum(a, axis=None, dtype=None, out=None, keepdims=False, + initial=_NoValue, where=True): + return umr_sum(a, axis, dtype, out, keepdims, initial, where) + +def _prod(a, axis=None, dtype=None, out=None, keepdims=False, + initial=_NoValue, where=True): + return umr_prod(a, axis, dtype, out, keepdims, initial, where) + +def _any(a, axis=None, dtype=None, out=None, keepdims=False, *, where=True): + # Parsing keyword arguments is currently fairly slow, so avoid it for now + if where is True: + return umr_any(a, axis, dtype, out, keepdims) + return umr_any(a, axis, dtype, out, keepdims, where=where) + +def _all(a, axis=None, dtype=None, out=None, keepdims=False, *, where=True): + # Parsing keyword arguments is currently fairly slow, so avoid it for now + if where is True: + return umr_all(a, axis, dtype, out, keepdims) + return umr_all(a, axis, dtype, out, keepdims, where=where) + +def _count_reduce_items(arr, axis, keepdims=False, where=True): + # fast-path for the default case + if where is True: + # no boolean mask given, calculate items according to axis + if axis is None: + axis = tuple(range(arr.ndim)) + elif not isinstance(axis, tuple): + axis = (axis,) + items = nt.intp(1) + for ax in axis: + items *= arr.shape[mu.normalize_axis_index(ax, arr.ndim)] + else: + # TODO: Optimize case when `where` is broadcast along a non-reduction + # axis and full sum is more excessive than needed. + + # guarded to protect circular imports + from numpy.lib.stride_tricks import broadcast_to + # count True values in (potentially broadcasted) boolean mask + items = umr_sum(broadcast_to(where, arr.shape), axis, nt.intp, None, + keepdims) + return items + +# Numpy 1.17.0, 2019-02-24 +# Various clip behavior deprecations, marked with _clip_dep as a prefix. + +def _clip_dep_is_scalar_nan(a): + # guarded to protect circular imports + from numpy.core.fromnumeric import ndim + if ndim(a) != 0: + return False + try: + return um.isnan(a) + except TypeError: + return False + +def _clip_dep_is_byte_swapped(a): + if isinstance(a, mu.ndarray): + return not a.dtype.isnative + return False + +def _clip_dep_invoke_with_casting(ufunc, *args, out=None, casting=None, **kwargs): + # normal path + if casting is not None: + return ufunc(*args, out=out, casting=casting, **kwargs) + + # try to deal with broken casting rules + try: + return ufunc(*args, out=out, **kwargs) + except _exceptions._UFuncOutputCastingError as e: + # Numpy 1.17.0, 2019-02-24 + warnings.warn( + "Converting the output of clip from {!r} to {!r} is deprecated. " + "Pass `casting=\"unsafe\"` explicitly to silence this warning, or " + "correct the type of the variables.".format(e.from_, e.to), + DeprecationWarning, + stacklevel=2 + ) + return ufunc(*args, out=out, casting="unsafe", **kwargs) + +def _clip(a, min=None, max=None, out=None, *, casting=None, **kwargs): + if min is None and max is None: + raise ValueError("One of max or min must be given") + + # Numpy 1.17.0, 2019-02-24 + # This deprecation probably incurs a substantial slowdown for small arrays, + # it will be good to get rid of it. + if not _clip_dep_is_byte_swapped(a) and not _clip_dep_is_byte_swapped(out): + using_deprecated_nan = False + if _clip_dep_is_scalar_nan(min): + min = -float('inf') + using_deprecated_nan = True + if _clip_dep_is_scalar_nan(max): + max = float('inf') + using_deprecated_nan = True + if using_deprecated_nan: + warnings.warn( + "Passing `np.nan` to mean no clipping in np.clip has always " + "been unreliable, and is now deprecated. " + "In future, this will always return nan, like it already does " + "when min or max are arrays that contain nan. " + "To skip a bound, pass either None or an np.inf of an " + "appropriate sign.", + DeprecationWarning, + stacklevel=2 + ) + + if min is None: + return _clip_dep_invoke_with_casting( + um.minimum, a, max, out=out, casting=casting, **kwargs) + elif max is None: + return _clip_dep_invoke_with_casting( + um.maximum, a, min, out=out, casting=casting, **kwargs) + else: + return _clip_dep_invoke_with_casting( + um.clip, a, min, max, out=out, casting=casting, **kwargs) + +def _mean(a, axis=None, dtype=None, out=None, keepdims=False, *, where=True): + arr = asanyarray(a) + + is_float16_result = False + + rcount = _count_reduce_items(arr, axis, keepdims=keepdims, where=where) + if rcount == 0 if where is True else umr_any(rcount == 0, axis=None): + warnings.warn("Mean of empty slice.", RuntimeWarning, stacklevel=2) + + # Cast bool, unsigned int, and int to float64 by default + if dtype is None: + if issubclass(arr.dtype.type, (nt.integer, nt.bool_)): + dtype = mu.dtype('f8') + elif issubclass(arr.dtype.type, nt.float16): + dtype = mu.dtype('f4') + is_float16_result = True + + ret = umr_sum(arr, axis, dtype, out, keepdims, where=where) + if isinstance(ret, mu.ndarray): + ret = um.true_divide( + ret, rcount, out=ret, casting='unsafe', subok=False) + if is_float16_result and out is None: + ret = arr.dtype.type(ret) + elif hasattr(ret, 'dtype'): + if is_float16_result: + ret = arr.dtype.type(ret / rcount) + else: + ret = ret.dtype.type(ret / rcount) + else: + ret = ret / rcount + + return ret + +def _var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, *, + where=True): + arr = asanyarray(a) + + rcount = _count_reduce_items(arr, axis, keepdims=keepdims, where=where) + # Make this warning show up on top. + if ddof >= rcount if where is True else umr_any(ddof >= rcount, axis=None): + warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning, + stacklevel=2) + + # Cast bool, unsigned int, and int to float64 by default + if dtype is None and issubclass(arr.dtype.type, (nt.integer, nt.bool_)): + dtype = mu.dtype('f8') + + # Compute the mean. + # Note that if dtype is not of inexact type then arraymean will + # not be either. + arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where) + # The shape of rcount has to match arrmean to not change the shape of out + # in broadcasting. Otherwise, it cannot be stored back to arrmean. + if rcount.ndim == 0: + # fast-path for default case when where is True + div = rcount + else: + # matching rcount to arrmean when where is specified as array + div = rcount.reshape(arrmean.shape) + if isinstance(arrmean, mu.ndarray): + arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe', + subok=False) + elif hasattr(arrmean, "dtype"): + arrmean = arrmean.dtype.type(arrmean / rcount) + else: + arrmean = arrmean / rcount + + # Compute sum of squared deviations from mean + # Note that x may not be inexact and that we need it to be an array, + # not a scalar. + x = asanyarray(arr - arrmean) + + if issubclass(arr.dtype.type, (nt.floating, nt.integer)): + x = um.multiply(x, x, out=x) + # Fast-paths for built-in complex types + elif x.dtype in _complex_to_float: + xv = x.view(dtype=(_complex_to_float[x.dtype], (2,))) + um.multiply(xv, xv, out=xv) + x = um.add(xv[..., 0], xv[..., 1], out=x.real).real + # Most general case; includes handling object arrays containing imaginary + # numbers and complex types with non-native byteorder + else: + x = um.multiply(x, um.conjugate(x), out=x).real + + ret = umr_sum(x, axis, dtype, out, keepdims=keepdims, where=where) + + # Compute degrees of freedom and make sure it is not negative. + rcount = um.maximum(rcount - ddof, 0) + + # divide by degrees of freedom + if isinstance(ret, mu.ndarray): + ret = um.true_divide( + ret, rcount, out=ret, casting='unsafe', subok=False) + elif hasattr(ret, 'dtype'): + ret = ret.dtype.type(ret / rcount) + else: + ret = ret / rcount + + return ret + +def _std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, *, + where=True): + ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims, where=where) + + if isinstance(ret, mu.ndarray): + ret = um.sqrt(ret, out=ret) + elif hasattr(ret, 'dtype'): + ret = ret.dtype.type(um.sqrt(ret)) + else: + ret = um.sqrt(ret) + + return ret + +def _ptp(a, axis=None, out=None, keepdims=False): + return um.subtract( + umr_maximum(a, axis, None, out, keepdims), + umr_minimum(a, axis, None, None, keepdims), + out + ) + +def _dump(self, file, protocol=2): + if hasattr(file, 'write'): + ctx = nullcontext(file) + else: + ctx = open(os_fspath(file), "wb") + with ctx as f: + pickle.dump(self, f, protocol=protocol) + +def _dumps(self, protocol=2): + return pickle.dumps(self, protocol=protocol) diff --git a/.local/lib/python3.8/site-packages/numpy/core/_multiarray_tests.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/core/_multiarray_tests.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..f67681c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/_multiarray_tests.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/_multiarray_umath.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/core/_multiarray_umath.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..44a2561 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/_multiarray_umath.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/_operand_flag_tests.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/core/_operand_flag_tests.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..6a476c3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/_operand_flag_tests.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/_rational_tests.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/core/_rational_tests.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..e434181 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/_rational_tests.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/_simd.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/core/_simd.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..5068b5a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/_simd.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/_string_helpers.py b/.local/lib/python3.8/site-packages/numpy/core/_string_helpers.py new file mode 100644 index 0000000..45e6a73 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_string_helpers.py @@ -0,0 +1,100 @@ +""" +String-handling utilities to avoid locale-dependence. + +Used primarily to generate type name aliases. +""" +# "import string" is costly to import! +# Construct the translation tables directly +# "A" = chr(65), "a" = chr(97) +_all_chars = [chr(_m) for _m in range(256)] +_ascii_upper = _all_chars[65:65+26] +_ascii_lower = _all_chars[97:97+26] +LOWER_TABLE = "".join(_all_chars[:65] + _ascii_lower + _all_chars[65+26:]) +UPPER_TABLE = "".join(_all_chars[:97] + _ascii_upper + _all_chars[97+26:]) + + +def english_lower(s): + """ Apply English case rules to convert ASCII strings to all lower case. + + This is an internal utility function to replace calls to str.lower() such + that we can avoid changing behavior with changing locales. In particular, + Turkish has distinct dotted and dotless variants of the Latin letter "I" in + both lowercase and uppercase. Thus, "I".lower() != "i" in a "tr" locale. + + Parameters + ---------- + s : str + + Returns + ------- + lowered : str + + Examples + -------- + >>> from numpy.core.numerictypes import english_lower + >>> english_lower('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_') + 'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz0123456789_' + >>> english_lower('') + '' + """ + lowered = s.translate(LOWER_TABLE) + return lowered + + +def english_upper(s): + """ Apply English case rules to convert ASCII strings to all upper case. + + This is an internal utility function to replace calls to str.upper() such + that we can avoid changing behavior with changing locales. In particular, + Turkish has distinct dotted and dotless variants of the Latin letter "I" in + both lowercase and uppercase. Thus, "i".upper() != "I" in a "tr" locale. + + Parameters + ---------- + s : str + + Returns + ------- + uppered : str + + Examples + -------- + >>> from numpy.core.numerictypes import english_upper + >>> english_upper('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_') + 'ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_' + >>> english_upper('') + '' + """ + uppered = s.translate(UPPER_TABLE) + return uppered + + +def english_capitalize(s): + """ Apply English case rules to convert the first character of an ASCII + string to upper case. + + This is an internal utility function to replace calls to str.capitalize() + such that we can avoid changing behavior with changing locales. + + Parameters + ---------- + s : str + + Returns + ------- + capitalized : str + + Examples + -------- + >>> from numpy.core.numerictypes import english_capitalize + >>> english_capitalize('int8') + 'Int8' + >>> english_capitalize('Int8') + 'Int8' + >>> english_capitalize('') + '' + """ + if s: + return english_upper(s[0]) + s[1:] + else: + return s diff --git a/.local/lib/python3.8/site-packages/numpy/core/_struct_ufunc_tests.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/core/_struct_ufunc_tests.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..d854c0e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/_struct_ufunc_tests.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/_type_aliases.py b/.local/lib/python3.8/site-packages/numpy/core/_type_aliases.py new file mode 100644 index 0000000..3765a0d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_type_aliases.py @@ -0,0 +1,235 @@ +""" +Due to compatibility, numpy has a very large number of different naming +conventions for the scalar types (those subclassing from `numpy.generic`). +This file produces a convoluted set of dictionaries mapping names to types, +and sometimes other mappings too. + +.. data:: allTypes + A dictionary of names to types that will be exposed as attributes through + ``np.core.numerictypes.*`` + +.. data:: sctypeDict + Similar to `allTypes`, but maps a broader set of aliases to their types. + +.. data:: sctypes + A dictionary keyed by a "type group" string, providing a list of types + under that group. + +""" + +from numpy.compat import unicode +from numpy.core._string_helpers import english_lower +from numpy.core.multiarray import typeinfo, dtype +from numpy.core._dtype import _kind_name + + +sctypeDict = {} # Contains all leaf-node scalar types with aliases +allTypes = {} # Collect the types we will add to the module + + +# separate the actual type info from the abstract base classes +_abstract_types = {} +_concrete_typeinfo = {} +for k, v in typeinfo.items(): + # make all the keys lowercase too + k = english_lower(k) + if isinstance(v, type): + _abstract_types[k] = v + else: + _concrete_typeinfo[k] = v + +_concrete_types = {v.type for k, v in _concrete_typeinfo.items()} + + +def _bits_of(obj): + try: + info = next(v for v in _concrete_typeinfo.values() if v.type is obj) + except StopIteration: + if obj in _abstract_types.values(): + msg = "Cannot count the bits of an abstract type" + raise ValueError(msg) from None + + # some third-party type - make a best-guess + return dtype(obj).itemsize * 8 + else: + return info.bits + + +def bitname(obj): + """Return a bit-width name for a given type object""" + bits = _bits_of(obj) + dt = dtype(obj) + char = dt.kind + base = _kind_name(dt) + + if base == 'object': + bits = 0 + + if bits != 0: + char = "%s%d" % (char, bits // 8) + + return base, bits, char + + +def _add_types(): + for name, info in _concrete_typeinfo.items(): + # define C-name and insert typenum and typechar references also + allTypes[name] = info.type + sctypeDict[name] = info.type + sctypeDict[info.char] = info.type + sctypeDict[info.num] = info.type + + for name, cls in _abstract_types.items(): + allTypes[name] = cls +_add_types() + +# This is the priority order used to assign the bit-sized NPY_INTxx names, which +# must match the order in npy_common.h in order for NPY_INTxx and np.intxx to be +# consistent. +# If two C types have the same size, then the earliest one in this list is used +# as the sized name. +_int_ctypes = ['long', 'longlong', 'int', 'short', 'byte'] +_uint_ctypes = list('u' + t for t in _int_ctypes) + +def _add_aliases(): + for name, info in _concrete_typeinfo.items(): + # these are handled by _add_integer_aliases + if name in _int_ctypes or name in _uint_ctypes: + continue + + # insert bit-width version for this class (if relevant) + base, bit, char = bitname(info.type) + + myname = "%s%d" % (base, bit) + + # ensure that (c)longdouble does not overwrite the aliases assigned to + # (c)double + if name in ('longdouble', 'clongdouble') and myname in allTypes: + continue + + allTypes[myname] = info.type + + # add mapping for both the bit name and the numarray name + sctypeDict[myname] = info.type + + # add forward, reverse, and string mapping to numarray + sctypeDict[char] = info.type + + +_add_aliases() + +def _add_integer_aliases(): + seen_bits = set() + for i_ctype, u_ctype in zip(_int_ctypes, _uint_ctypes): + i_info = _concrete_typeinfo[i_ctype] + u_info = _concrete_typeinfo[u_ctype] + bits = i_info.bits # same for both + + for info, charname, intname in [ + (i_info,'i%d' % (bits//8,), 'int%d' % bits), + (u_info,'u%d' % (bits//8,), 'uint%d' % bits)]: + if bits not in seen_bits: + # sometimes two different types have the same number of bits + # if so, the one iterated over first takes precedence + allTypes[intname] = info.type + sctypeDict[intname] = info.type + sctypeDict[charname] = info.type + + seen_bits.add(bits) + +_add_integer_aliases() + +# We use these later +void = allTypes['void'] + +# +# Rework the Python names (so that float and complex and int are consistent +# with Python usage) +# +def _set_up_aliases(): + type_pairs = [('complex_', 'cdouble'), + ('int0', 'intp'), + ('uint0', 'uintp'), + ('single', 'float'), + ('csingle', 'cfloat'), + ('singlecomplex', 'cfloat'), + ('float_', 'double'), + ('intc', 'int'), + ('uintc', 'uint'), + ('int_', 'long'), + ('uint', 'ulong'), + ('cfloat', 'cdouble'), + ('longfloat', 'longdouble'), + ('clongfloat', 'clongdouble'), + ('longcomplex', 'clongdouble'), + ('bool_', 'bool'), + ('bytes_', 'string'), + ('string_', 'string'), + ('str_', 'unicode'), + ('unicode_', 'unicode'), + ('object_', 'object')] + for alias, t in type_pairs: + allTypes[alias] = allTypes[t] + sctypeDict[alias] = sctypeDict[t] + # Remove aliases overriding python types and modules + to_remove = ['ulong', 'object', 'int', 'float', + 'complex', 'bool', 'string', 'datetime', 'timedelta', + 'bytes', 'str'] + + for t in to_remove: + try: + del allTypes[t] + del sctypeDict[t] + except KeyError: + pass +_set_up_aliases() + + +sctypes = {'int': [], + 'uint':[], + 'float':[], + 'complex':[], + 'others':[bool, object, bytes, unicode, void]} + +def _add_array_type(typename, bits): + try: + t = allTypes['%s%d' % (typename, bits)] + except KeyError: + pass + else: + sctypes[typename].append(t) + +def _set_array_types(): + ibytes = [1, 2, 4, 8, 16, 32, 64] + fbytes = [2, 4, 8, 10, 12, 16, 32, 64] + for bytes in ibytes: + bits = 8*bytes + _add_array_type('int', bits) + _add_array_type('uint', bits) + for bytes in fbytes: + bits = 8*bytes + _add_array_type('float', bits) + _add_array_type('complex', 2*bits) + _gi = dtype('p') + if _gi.type not in sctypes['int']: + indx = 0 + sz = _gi.itemsize + _lst = sctypes['int'] + while (indx < len(_lst) and sz >= _lst[indx](0).itemsize): + indx += 1 + sctypes['int'].insert(indx, _gi.type) + sctypes['uint'].insert(indx, dtype('P').type) +_set_array_types() + + +# Add additional strings to the sctypeDict +_toadd = ['int', 'float', 'complex', 'bool', 'object', + 'str', 'bytes', ('a', 'bytes_')] + +for name in _toadd: + if isinstance(name, tuple): + sctypeDict[name[0]] = allTypes[name[1]] + else: + sctypeDict[name] = allTypes['%s_' % name] + +del _toadd, name diff --git a/.local/lib/python3.8/site-packages/numpy/core/_type_aliases.pyi b/.local/lib/python3.8/site-packages/numpy/core/_type_aliases.pyi new file mode 100644 index 0000000..c10d072 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_type_aliases.pyi @@ -0,0 +1,13 @@ +from typing import Dict, Union, Type, List, TypedDict + +from numpy import generic, signedinteger, unsignedinteger, floating, complexfloating + +class _SCTypes(TypedDict): + int: List[Type[signedinteger]] + uint: List[Type[unsignedinteger]] + float: List[Type[floating]] + complex: List[Type[complexfloating]] + others: List[type] + +sctypeDict: Dict[Union[int, str], Type[generic]] +sctypes: _SCTypes diff --git a/.local/lib/python3.8/site-packages/numpy/core/_ufunc_config.py b/.local/lib/python3.8/site-packages/numpy/core/_ufunc_config.py new file mode 100644 index 0000000..b40e744 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_ufunc_config.py @@ -0,0 +1,446 @@ +""" +Functions for changing global ufunc configuration + +This provides helpers which wrap `umath.geterrobj` and `umath.seterrobj` +""" +import collections.abc +import contextlib + +from .overrides import set_module +from .umath import ( + UFUNC_BUFSIZE_DEFAULT, + ERR_IGNORE, ERR_WARN, ERR_RAISE, ERR_CALL, ERR_PRINT, ERR_LOG, ERR_DEFAULT, + SHIFT_DIVIDEBYZERO, SHIFT_OVERFLOW, SHIFT_UNDERFLOW, SHIFT_INVALID, +) +from . import umath + +__all__ = [ + "seterr", "geterr", "setbufsize", "getbufsize", "seterrcall", "geterrcall", + "errstate", +] + +_errdict = {"ignore": ERR_IGNORE, + "warn": ERR_WARN, + "raise": ERR_RAISE, + "call": ERR_CALL, + "print": ERR_PRINT, + "log": ERR_LOG} + +_errdict_rev = {value: key for key, value in _errdict.items()} + + +@set_module('numpy') +def seterr(all=None, divide=None, over=None, under=None, invalid=None): + """ + Set how floating-point errors are handled. + + Note that operations on integer scalar types (such as `int16`) are + handled like floating point, and are affected by these settings. + + Parameters + ---------- + all : {'ignore', 'warn', 'raise', 'call', 'print', 'log'}, optional + Set treatment for all types of floating-point errors at once: + + - ignore: Take no action when the exception occurs. + - warn: Print a `RuntimeWarning` (via the Python `warnings` module). + - raise: Raise a `FloatingPointError`. + - call: Call a function specified using the `seterrcall` function. + - print: Print a warning directly to ``stdout``. + - log: Record error in a Log object specified by `seterrcall`. + + The default is not to change the current behavior. + divide : {'ignore', 'warn', 'raise', 'call', 'print', 'log'}, optional + Treatment for division by zero. + over : {'ignore', 'warn', 'raise', 'call', 'print', 'log'}, optional + Treatment for floating-point overflow. + under : {'ignore', 'warn', 'raise', 'call', 'print', 'log'}, optional + Treatment for floating-point underflow. + invalid : {'ignore', 'warn', 'raise', 'call', 'print', 'log'}, optional + Treatment for invalid floating-point operation. + + Returns + ------- + old_settings : dict + Dictionary containing the old settings. + + See also + -------- + seterrcall : Set a callback function for the 'call' mode. + geterr, geterrcall, errstate + + Notes + ----- + The floating-point exceptions are defined in the IEEE 754 standard [1]_: + + - Division by zero: infinite result obtained from finite numbers. + - Overflow: result too large to be expressed. + - Underflow: result so close to zero that some precision + was lost. + - Invalid operation: result is not an expressible number, typically + indicates that a NaN was produced. + + .. [1] https://en.wikipedia.org/wiki/IEEE_754 + + Examples + -------- + >>> old_settings = np.seterr(all='ignore') #seterr to known value + >>> np.seterr(over='raise') + {'divide': 'ignore', 'over': 'ignore', 'under': 'ignore', 'invalid': 'ignore'} + >>> np.seterr(**old_settings) # reset to default + {'divide': 'ignore', 'over': 'raise', 'under': 'ignore', 'invalid': 'ignore'} + + >>> np.int16(32000) * np.int16(3) + 30464 + >>> old_settings = np.seterr(all='warn', over='raise') + >>> np.int16(32000) * np.int16(3) + Traceback (most recent call last): + File "", line 1, in + FloatingPointError: overflow encountered in short_scalars + + >>> old_settings = np.seterr(all='print') + >>> np.geterr() + {'divide': 'print', 'over': 'print', 'under': 'print', 'invalid': 'print'} + >>> np.int16(32000) * np.int16(3) + 30464 + + """ + + pyvals = umath.geterrobj() + old = geterr() + + if divide is None: + divide = all or old['divide'] + if over is None: + over = all or old['over'] + if under is None: + under = all or old['under'] + if invalid is None: + invalid = all or old['invalid'] + + maskvalue = ((_errdict[divide] << SHIFT_DIVIDEBYZERO) + + (_errdict[over] << SHIFT_OVERFLOW) + + (_errdict[under] << SHIFT_UNDERFLOW) + + (_errdict[invalid] << SHIFT_INVALID)) + + pyvals[1] = maskvalue + umath.seterrobj(pyvals) + return old + + +@set_module('numpy') +def geterr(): + """ + Get the current way of handling floating-point errors. + + Returns + ------- + res : dict + A dictionary with keys "divide", "over", "under", and "invalid", + whose values are from the strings "ignore", "print", "log", "warn", + "raise", and "call". The keys represent possible floating-point + exceptions, and the values define how these exceptions are handled. + + See Also + -------- + geterrcall, seterr, seterrcall + + Notes + ----- + For complete documentation of the types of floating-point exceptions and + treatment options, see `seterr`. + + Examples + -------- + >>> np.geterr() + {'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'} + >>> np.arange(3.) / np.arange(3.) + array([nan, 1., 1.]) + + >>> oldsettings = np.seterr(all='warn', over='raise') + >>> np.geterr() + {'divide': 'warn', 'over': 'raise', 'under': 'warn', 'invalid': 'warn'} + >>> np.arange(3.) / np.arange(3.) + array([nan, 1., 1.]) + + """ + maskvalue = umath.geterrobj()[1] + mask = 7 + res = {} + val = (maskvalue >> SHIFT_DIVIDEBYZERO) & mask + res['divide'] = _errdict_rev[val] + val = (maskvalue >> SHIFT_OVERFLOW) & mask + res['over'] = _errdict_rev[val] + val = (maskvalue >> SHIFT_UNDERFLOW) & mask + res['under'] = _errdict_rev[val] + val = (maskvalue >> SHIFT_INVALID) & mask + res['invalid'] = _errdict_rev[val] + return res + + +@set_module('numpy') +def setbufsize(size): + """ + Set the size of the buffer used in ufuncs. + + Parameters + ---------- + size : int + Size of buffer. + + """ + if size > 10e6: + raise ValueError("Buffer size, %s, is too big." % size) + if size < 5: + raise ValueError("Buffer size, %s, is too small." % size) + if size % 16 != 0: + raise ValueError("Buffer size, %s, is not a multiple of 16." % size) + + pyvals = umath.geterrobj() + old = getbufsize() + pyvals[0] = size + umath.seterrobj(pyvals) + return old + + +@set_module('numpy') +def getbufsize(): + """ + Return the size of the buffer used in ufuncs. + + Returns + ------- + getbufsize : int + Size of ufunc buffer in bytes. + + """ + return umath.geterrobj()[0] + + +@set_module('numpy') +def seterrcall(func): + """ + Set the floating-point error callback function or log object. + + There are two ways to capture floating-point error messages. The first + is to set the error-handler to 'call', using `seterr`. Then, set + the function to call using this function. + + The second is to set the error-handler to 'log', using `seterr`. + Floating-point errors then trigger a call to the 'write' method of + the provided object. + + Parameters + ---------- + func : callable f(err, flag) or object with write method + Function to call upon floating-point errors ('call'-mode) or + object whose 'write' method is used to log such message ('log'-mode). + + The call function takes two arguments. The first is a string describing + the type of error (such as "divide by zero", "overflow", "underflow", + or "invalid value"), and the second is the status flag. The flag is a + byte, whose four least-significant bits indicate the type of error, one + of "divide", "over", "under", "invalid":: + + [0 0 0 0 divide over under invalid] + + In other words, ``flags = divide + 2*over + 4*under + 8*invalid``. + + If an object is provided, its write method should take one argument, + a string. + + Returns + ------- + h : callable, log instance or None + The old error handler. + + See Also + -------- + seterr, geterr, geterrcall + + Examples + -------- + Callback upon error: + + >>> def err_handler(type, flag): + ... print("Floating point error (%s), with flag %s" % (type, flag)) + ... + + >>> saved_handler = np.seterrcall(err_handler) + >>> save_err = np.seterr(all='call') + + >>> np.array([1, 2, 3]) / 0.0 + Floating point error (divide by zero), with flag 1 + array([inf, inf, inf]) + + >>> np.seterrcall(saved_handler) + + >>> np.seterr(**save_err) + {'divide': 'call', 'over': 'call', 'under': 'call', 'invalid': 'call'} + + Log error message: + + >>> class Log: + ... def write(self, msg): + ... print("LOG: %s" % msg) + ... + + >>> log = Log() + >>> saved_handler = np.seterrcall(log) + >>> save_err = np.seterr(all='log') + + >>> np.array([1, 2, 3]) / 0.0 + LOG: Warning: divide by zero encountered in true_divide + array([inf, inf, inf]) + + >>> np.seterrcall(saved_handler) + + >>> np.seterr(**save_err) + {'divide': 'log', 'over': 'log', 'under': 'log', 'invalid': 'log'} + + """ + if func is not None and not isinstance(func, collections.abc.Callable): + if (not hasattr(func, 'write') or + not isinstance(func.write, collections.abc.Callable)): + raise ValueError("Only callable can be used as callback") + pyvals = umath.geterrobj() + old = geterrcall() + pyvals[2] = func + umath.seterrobj(pyvals) + return old + + +@set_module('numpy') +def geterrcall(): + """ + Return the current callback function used on floating-point errors. + + When the error handling for a floating-point error (one of "divide", + "over", "under", or "invalid") is set to 'call' or 'log', the function + that is called or the log instance that is written to is returned by + `geterrcall`. This function or log instance has been set with + `seterrcall`. + + Returns + ------- + errobj : callable, log instance or None + The current error handler. If no handler was set through `seterrcall`, + ``None`` is returned. + + See Also + -------- + seterrcall, seterr, geterr + + Notes + ----- + For complete documentation of the types of floating-point exceptions and + treatment options, see `seterr`. + + Examples + -------- + >>> np.geterrcall() # we did not yet set a handler, returns None + + >>> oldsettings = np.seterr(all='call') + >>> def err_handler(type, flag): + ... print("Floating point error (%s), with flag %s" % (type, flag)) + >>> oldhandler = np.seterrcall(err_handler) + >>> np.array([1, 2, 3]) / 0.0 + Floating point error (divide by zero), with flag 1 + array([inf, inf, inf]) + + >>> cur_handler = np.geterrcall() + >>> cur_handler is err_handler + True + + """ + return umath.geterrobj()[2] + + +class _unspecified: + pass + + +_Unspecified = _unspecified() + + +@set_module('numpy') +class errstate(contextlib.ContextDecorator): + """ + errstate(**kwargs) + + Context manager for floating-point error handling. + + Using an instance of `errstate` as a context manager allows statements in + that context to execute with a known error handling behavior. Upon entering + the context the error handling is set with `seterr` and `seterrcall`, and + upon exiting it is reset to what it was before. + + .. versionchanged:: 1.17.0 + `errstate` is also usable as a function decorator, saving + a level of indentation if an entire function is wrapped. + See :py:class:`contextlib.ContextDecorator` for more information. + + Parameters + ---------- + kwargs : {divide, over, under, invalid} + Keyword arguments. The valid keywords are the possible floating-point + exceptions. Each keyword should have a string value that defines the + treatment for the particular error. Possible values are + {'ignore', 'warn', 'raise', 'call', 'print', 'log'}. + + See Also + -------- + seterr, geterr, seterrcall, geterrcall + + Notes + ----- + For complete documentation of the types of floating-point exceptions and + treatment options, see `seterr`. + + Examples + -------- + >>> olderr = np.seterr(all='ignore') # Set error handling to known state. + + >>> np.arange(3) / 0. + array([nan, inf, inf]) + >>> with np.errstate(divide='warn'): + ... np.arange(3) / 0. + array([nan, inf, inf]) + + >>> np.sqrt(-1) + nan + >>> with np.errstate(invalid='raise'): + ... np.sqrt(-1) + Traceback (most recent call last): + File "", line 2, in + FloatingPointError: invalid value encountered in sqrt + + Outside the context the error handling behavior has not changed: + + >>> np.geterr() + {'divide': 'ignore', 'over': 'ignore', 'under': 'ignore', 'invalid': 'ignore'} + + """ + + def __init__(self, *, call=_Unspecified, **kwargs): + self.call = call + self.kwargs = kwargs + + def __enter__(self): + self.oldstate = seterr(**self.kwargs) + if self.call is not _Unspecified: + self.oldcall = seterrcall(self.call) + + def __exit__(self, *exc_info): + seterr(**self.oldstate) + if self.call is not _Unspecified: + seterrcall(self.oldcall) + + +def _setdef(): + defval = [UFUNC_BUFSIZE_DEFAULT, ERR_DEFAULT, None] + umath.seterrobj(defval) + + +# set the default values +_setdef() diff --git a/.local/lib/python3.8/site-packages/numpy/core/_ufunc_config.pyi b/.local/lib/python3.8/site-packages/numpy/core/_ufunc_config.pyi new file mode 100644 index 0000000..cd7129b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/_ufunc_config.pyi @@ -0,0 +1,36 @@ +from typing import Optional, Union, Callable, Any, Literal, TypedDict + +from numpy import _SupportsWrite + +_ErrKind = Literal["ignore", "warn", "raise", "call", "print", "log"] +_ErrFunc = Callable[[str, int], Any] + +class _ErrDict(TypedDict): + divide: _ErrKind + over: _ErrKind + under: _ErrKind + invalid: _ErrKind + +class _ErrDictOptional(TypedDict, total=False): + all: Optional[_ErrKind] + divide: Optional[_ErrKind] + over: Optional[_ErrKind] + under: Optional[_ErrKind] + invalid: Optional[_ErrKind] + +def seterr( + all: Optional[_ErrKind] = ..., + divide: Optional[_ErrKind] = ..., + over: Optional[_ErrKind] = ..., + under: Optional[_ErrKind] = ..., + invalid: Optional[_ErrKind] = ..., +) -> _ErrDict: ... +def geterr() -> _ErrDict: ... +def setbufsize(size: int) -> int: ... +def getbufsize() -> int: ... +def seterrcall( + func: Union[None, _ErrFunc, _SupportsWrite[str]] +) -> Union[None, _ErrFunc, _SupportsWrite[str]]: ... +def geterrcall() -> Union[None, _ErrFunc, _SupportsWrite[str]]: ... + +# See `numpy/__init__.pyi` for the `errstate` class diff --git a/.local/lib/python3.8/site-packages/numpy/core/_umath_tests.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/core/_umath_tests.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..1d0bfc1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/_umath_tests.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/arrayprint.py b/.local/lib/python3.8/site-packages/numpy/core/arrayprint.py new file mode 100644 index 0000000..d7e9bf7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/arrayprint.py @@ -0,0 +1,1705 @@ +"""Array printing function + +$Id: arrayprint.py,v 1.9 2005/09/13 13:58:44 teoliphant Exp $ + +""" +__all__ = ["array2string", "array_str", "array_repr", "set_string_function", + "set_printoptions", "get_printoptions", "printoptions", + "format_float_positional", "format_float_scientific"] +__docformat__ = 'restructuredtext' + +# +# Written by Konrad Hinsen +# last revision: 1996-3-13 +# modified by Jim Hugunin 1997-3-3 for repr's and str's (and other details) +# and by Perry Greenfield 2000-4-1 for numarray +# and by Travis Oliphant 2005-8-22 for numpy + + +# Note: Both scalartypes.c.src and arrayprint.py implement strs for numpy +# scalars but for different purposes. scalartypes.c.src has str/reprs for when +# the scalar is printed on its own, while arrayprint.py has strs for when +# scalars are printed inside an ndarray. Only the latter strs are currently +# user-customizable. + +import functools +import numbers +import sys +try: + from _thread import get_ident +except ImportError: + from _dummy_thread import get_ident + +import numpy as np +from . import numerictypes as _nt +from .umath import absolute, isinf, isfinite, isnat +from . import multiarray +from .multiarray import (array, dragon4_positional, dragon4_scientific, + datetime_as_string, datetime_data, ndarray, + set_legacy_print_mode) +from .fromnumeric import any +from .numeric import concatenate, asarray, errstate +from .numerictypes import (longlong, intc, int_, float_, complex_, bool_, + flexible) +from .overrides import array_function_dispatch, set_module +import operator +import warnings +import contextlib + +_format_options = { + 'edgeitems': 3, # repr N leading and trailing items of each dimension + 'threshold': 1000, # total items > triggers array summarization + 'floatmode': 'maxprec', + 'precision': 8, # precision of floating point representations + 'suppress': False, # suppress printing small floating values in exp format + 'linewidth': 75, + 'nanstr': 'nan', + 'infstr': 'inf', + 'sign': '-', + 'formatter': None, + # Internally stored as an int to simplify comparisons; converted from/to + # str/False on the way in/out. + 'legacy': sys.maxsize} + +def _make_options_dict(precision=None, threshold=None, edgeitems=None, + linewidth=None, suppress=None, nanstr=None, infstr=None, + sign=None, formatter=None, floatmode=None, legacy=None): + """ + Make a dictionary out of the non-None arguments, plus conversion of + *legacy* and sanity checks. + """ + + options = {k: v for k, v in locals().items() if v is not None} + + if suppress is not None: + options['suppress'] = bool(suppress) + + modes = ['fixed', 'unique', 'maxprec', 'maxprec_equal'] + if floatmode not in modes + [None]: + raise ValueError("floatmode option must be one of " + + ", ".join('"{}"'.format(m) for m in modes)) + + if sign not in [None, '-', '+', ' ']: + raise ValueError("sign option must be one of ' ', '+', or '-'") + + if legacy == False: + options['legacy'] = sys.maxsize + elif legacy == '1.13': + options['legacy'] = 113 + elif legacy == '1.21': + options['legacy'] = 121 + elif legacy is None: + pass # OK, do nothing. + else: + warnings.warn( + "legacy printing option can currently only be '1.13', '1.21', or " + "`False`", stacklevel=3) + + if threshold is not None: + # forbid the bad threshold arg suggested by stack overflow, gh-12351 + if not isinstance(threshold, numbers.Number): + raise TypeError("threshold must be numeric") + if np.isnan(threshold): + raise ValueError("threshold must be non-NAN, try " + "sys.maxsize for untruncated representation") + + if precision is not None: + # forbid the bad precision arg as suggested by issue #18254 + try: + options['precision'] = operator.index(precision) + except TypeError as e: + raise TypeError('precision must be an integer') from e + + return options + + +@set_module('numpy') +def set_printoptions(precision=None, threshold=None, edgeitems=None, + linewidth=None, suppress=None, nanstr=None, infstr=None, + formatter=None, sign=None, floatmode=None, *, legacy=None): + """ + Set printing options. + + These options determine the way floating point numbers, arrays and + other NumPy objects are displayed. + + Parameters + ---------- + precision : int or None, optional + Number of digits of precision for floating point output (default 8). + May be None if `floatmode` is not `fixed`, to print as many digits as + necessary to uniquely specify the value. + threshold : int, optional + Total number of array elements which trigger summarization + rather than full repr (default 1000). + To always use the full repr without summarization, pass `sys.maxsize`. + edgeitems : int, optional + Number of array items in summary at beginning and end of + each dimension (default 3). + linewidth : int, optional + The number of characters per line for the purpose of inserting + line breaks (default 75). + suppress : bool, optional + If True, always print floating point numbers using fixed point + notation, in which case numbers equal to zero in the current precision + will print as zero. If False, then scientific notation is used when + absolute value of the smallest number is < 1e-4 or the ratio of the + maximum absolute value to the minimum is > 1e3. The default is False. + nanstr : str, optional + String representation of floating point not-a-number (default nan). + infstr : str, optional + String representation of floating point infinity (default inf). + sign : string, either '-', '+', or ' ', optional + Controls printing of the sign of floating-point types. If '+', always + print the sign of positive values. If ' ', always prints a space + (whitespace character) in the sign position of positive values. If + '-', omit the sign character of positive values. (default '-') + formatter : dict of callables, optional + If not None, the keys should indicate the type(s) that the respective + formatting function applies to. Callables should return a string. + Types that are not specified (by their corresponding keys) are handled + by the default formatters. Individual types for which a formatter + can be set are: + + - 'bool' + - 'int' + - 'timedelta' : a `numpy.timedelta64` + - 'datetime' : a `numpy.datetime64` + - 'float' + - 'longfloat' : 128-bit floats + - 'complexfloat' + - 'longcomplexfloat' : composed of two 128-bit floats + - 'numpystr' : types `numpy.string_` and `numpy.unicode_` + - 'object' : `np.object_` arrays + + Other keys that can be used to set a group of types at once are: + + - 'all' : sets all types + - 'int_kind' : sets 'int' + - 'float_kind' : sets 'float' and 'longfloat' + - 'complex_kind' : sets 'complexfloat' and 'longcomplexfloat' + - 'str_kind' : sets 'numpystr' + floatmode : str, optional + Controls the interpretation of the `precision` option for + floating-point types. Can take the following values + (default maxprec_equal): + + * 'fixed': Always print exactly `precision` fractional digits, + even if this would print more or fewer digits than + necessary to specify the value uniquely. + * 'unique': Print the minimum number of fractional digits necessary + to represent each value uniquely. Different elements may + have a different number of digits. The value of the + `precision` option is ignored. + * 'maxprec': Print at most `precision` fractional digits, but if + an element can be uniquely represented with fewer digits + only print it with that many. + * 'maxprec_equal': Print at most `precision` fractional digits, + but if every element in the array can be uniquely + represented with an equal number of fewer digits, use that + many digits for all elements. + legacy : string or `False`, optional + If set to the string `'1.13'` enables 1.13 legacy printing mode. This + approximates numpy 1.13 print output by including a space in the sign + position of floats and different behavior for 0d arrays. This also + enables 1.21 legacy printing mode (described below). + + If set to the string `'1.21'` enables 1.21 legacy printing mode. This + approximates numpy 1.21 print output of complex structured dtypes + by not inserting spaces after commas that separate fields and after + colons. + + If set to `False`, disables legacy mode. + + Unrecognized strings will be ignored with a warning for forward + compatibility. + + .. versionadded:: 1.14.0 + .. versionchanged:: 1.22.0 + + See Also + -------- + get_printoptions, printoptions, set_string_function, array2string + + Notes + ----- + `formatter` is always reset with a call to `set_printoptions`. + + Use `printoptions` as a context manager to set the values temporarily. + + Examples + -------- + Floating point precision can be set: + + >>> np.set_printoptions(precision=4) + >>> np.array([1.123456789]) + [1.1235] + + Long arrays can be summarised: + + >>> np.set_printoptions(threshold=5) + >>> np.arange(10) + array([0, 1, 2, ..., 7, 8, 9]) + + Small results can be suppressed: + + >>> eps = np.finfo(float).eps + >>> x = np.arange(4.) + >>> x**2 - (x + eps)**2 + array([-4.9304e-32, -4.4409e-16, 0.0000e+00, 0.0000e+00]) + >>> np.set_printoptions(suppress=True) + >>> x**2 - (x + eps)**2 + array([-0., -0., 0., 0.]) + + A custom formatter can be used to display array elements as desired: + + >>> np.set_printoptions(formatter={'all':lambda x: 'int: '+str(-x)}) + >>> x = np.arange(3) + >>> x + array([int: 0, int: -1, int: -2]) + >>> np.set_printoptions() # formatter gets reset + >>> x + array([0, 1, 2]) + + To put back the default options, you can use: + + >>> np.set_printoptions(edgeitems=3, infstr='inf', + ... linewidth=75, nanstr='nan', precision=8, + ... suppress=False, threshold=1000, formatter=None) + + Also to temporarily override options, use `printoptions` as a context manager: + + >>> with np.printoptions(precision=2, suppress=True, threshold=5): + ... np.linspace(0, 10, 10) + array([ 0. , 1.11, 2.22, ..., 7.78, 8.89, 10. ]) + + """ + opt = _make_options_dict(precision, threshold, edgeitems, linewidth, + suppress, nanstr, infstr, sign, formatter, + floatmode, legacy) + # formatter is always reset + opt['formatter'] = formatter + _format_options.update(opt) + + # set the C variable for legacy mode + if _format_options['legacy'] == 113: + set_legacy_print_mode(113) + # reset the sign option in legacy mode to avoid confusion + _format_options['sign'] = '-' + elif _format_options['legacy'] == 121: + set_legacy_print_mode(121) + elif _format_options['legacy'] == sys.maxsize: + set_legacy_print_mode(0) + + +@set_module('numpy') +def get_printoptions(): + """ + Return the current print options. + + Returns + ------- + print_opts : dict + Dictionary of current print options with keys + + - precision : int + - threshold : int + - edgeitems : int + - linewidth : int + - suppress : bool + - nanstr : str + - infstr : str + - formatter : dict of callables + - sign : str + + For a full description of these options, see `set_printoptions`. + + See Also + -------- + set_printoptions, printoptions, set_string_function + + """ + opts = _format_options.copy() + opts['legacy'] = { + 113: '1.13', 121: '1.21', sys.maxsize: False, + }[opts['legacy']] + return opts + + +def _get_legacy_print_mode(): + """Return the legacy print mode as an int.""" + return _format_options['legacy'] + + +@set_module('numpy') +@contextlib.contextmanager +def printoptions(*args, **kwargs): + """Context manager for setting print options. + + Set print options for the scope of the `with` block, and restore the old + options at the end. See `set_printoptions` for the full description of + available options. + + Examples + -------- + + >>> from numpy.testing import assert_equal + >>> with np.printoptions(precision=2): + ... np.array([2.0]) / 3 + array([0.67]) + + The `as`-clause of the `with`-statement gives the current print options: + + >>> with np.printoptions(precision=2) as opts: + ... assert_equal(opts, np.get_printoptions()) + + See Also + -------- + set_printoptions, get_printoptions + + """ + opts = np.get_printoptions() + try: + np.set_printoptions(*args, **kwargs) + yield np.get_printoptions() + finally: + np.set_printoptions(**opts) + + +def _leading_trailing(a, edgeitems, index=()): + """ + Keep only the N-D corners (leading and trailing edges) of an array. + + Should be passed a base-class ndarray, since it makes no guarantees about + preserving subclasses. + """ + axis = len(index) + if axis == a.ndim: + return a[index] + + if a.shape[axis] > 2*edgeitems: + return concatenate(( + _leading_trailing(a, edgeitems, index + np.index_exp[ :edgeitems]), + _leading_trailing(a, edgeitems, index + np.index_exp[-edgeitems:]) + ), axis=axis) + else: + return _leading_trailing(a, edgeitems, index + np.index_exp[:]) + + +def _object_format(o): + """ Object arrays containing lists should be printed unambiguously """ + if type(o) is list: + fmt = 'list({!r})' + else: + fmt = '{!r}' + return fmt.format(o) + +def repr_format(x): + return repr(x) + +def str_format(x): + return str(x) + +def _get_formatdict(data, *, precision, floatmode, suppress, sign, legacy, + formatter, **kwargs): + # note: extra arguments in kwargs are ignored + + # wrapped in lambdas to avoid taking a code path with the wrong type of data + formatdict = { + 'bool': lambda: BoolFormat(data), + 'int': lambda: IntegerFormat(data), + 'float': lambda: FloatingFormat( + data, precision, floatmode, suppress, sign, legacy=legacy), + 'longfloat': lambda: FloatingFormat( + data, precision, floatmode, suppress, sign, legacy=legacy), + 'complexfloat': lambda: ComplexFloatingFormat( + data, precision, floatmode, suppress, sign, legacy=legacy), + 'longcomplexfloat': lambda: ComplexFloatingFormat( + data, precision, floatmode, suppress, sign, legacy=legacy), + 'datetime': lambda: DatetimeFormat(data, legacy=legacy), + 'timedelta': lambda: TimedeltaFormat(data), + 'object': lambda: _object_format, + 'void': lambda: str_format, + 'numpystr': lambda: repr_format} + + # we need to wrap values in `formatter` in a lambda, so that the interface + # is the same as the above values. + def indirect(x): + return lambda: x + + if formatter is not None: + fkeys = [k for k in formatter.keys() if formatter[k] is not None] + if 'all' in fkeys: + for key in formatdict.keys(): + formatdict[key] = indirect(formatter['all']) + if 'int_kind' in fkeys: + for key in ['int']: + formatdict[key] = indirect(formatter['int_kind']) + if 'float_kind' in fkeys: + for key in ['float', 'longfloat']: + formatdict[key] = indirect(formatter['float_kind']) + if 'complex_kind' in fkeys: + for key in ['complexfloat', 'longcomplexfloat']: + formatdict[key] = indirect(formatter['complex_kind']) + if 'str_kind' in fkeys: + formatdict['numpystr'] = indirect(formatter['str_kind']) + for key in formatdict.keys(): + if key in fkeys: + formatdict[key] = indirect(formatter[key]) + + return formatdict + +def _get_format_function(data, **options): + """ + find the right formatting function for the dtype_ + """ + dtype_ = data.dtype + dtypeobj = dtype_.type + formatdict = _get_formatdict(data, **options) + if dtypeobj is None: + return formatdict["numpystr"]() + elif issubclass(dtypeobj, _nt.bool_): + return formatdict['bool']() + elif issubclass(dtypeobj, _nt.integer): + if issubclass(dtypeobj, _nt.timedelta64): + return formatdict['timedelta']() + else: + return formatdict['int']() + elif issubclass(dtypeobj, _nt.floating): + if issubclass(dtypeobj, _nt.longfloat): + return formatdict['longfloat']() + else: + return formatdict['float']() + elif issubclass(dtypeobj, _nt.complexfloating): + if issubclass(dtypeobj, _nt.clongfloat): + return formatdict['longcomplexfloat']() + else: + return formatdict['complexfloat']() + elif issubclass(dtypeobj, (_nt.unicode_, _nt.string_)): + return formatdict['numpystr']() + elif issubclass(dtypeobj, _nt.datetime64): + return formatdict['datetime']() + elif issubclass(dtypeobj, _nt.object_): + return formatdict['object']() + elif issubclass(dtypeobj, _nt.void): + if dtype_.names is not None: + return StructuredVoidFormat.from_data(data, **options) + else: + return formatdict['void']() + else: + return formatdict['numpystr']() + + +def _recursive_guard(fillvalue='...'): + """ + Like the python 3.2 reprlib.recursive_repr, but forwards *args and **kwargs + + Decorates a function such that if it calls itself with the same first + argument, it returns `fillvalue` instead of recursing. + + Largely copied from reprlib.recursive_repr + """ + + def decorating_function(f): + repr_running = set() + + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + key = id(self), get_ident() + if key in repr_running: + return fillvalue + repr_running.add(key) + try: + return f(self, *args, **kwargs) + finally: + repr_running.discard(key) + + return wrapper + + return decorating_function + + +# gracefully handle recursive calls, when object arrays contain themselves +@_recursive_guard() +def _array2string(a, options, separator=' ', prefix=""): + # The formatter __init__s in _get_format_function cannot deal with + # subclasses yet, and we also need to avoid recursion issues in + # _formatArray with subclasses which return 0d arrays in place of scalars + data = asarray(a) + if a.shape == (): + a = data + + if a.size > options['threshold']: + summary_insert = "..." + data = _leading_trailing(data, options['edgeitems']) + else: + summary_insert = "" + + # find the right formatting function for the array + format_function = _get_format_function(data, **options) + + # skip over "[" + next_line_prefix = " " + # skip over array( + next_line_prefix += " "*len(prefix) + + lst = _formatArray(a, format_function, options['linewidth'], + next_line_prefix, separator, options['edgeitems'], + summary_insert, options['legacy']) + return lst + + +def _array2string_dispatcher( + a, max_line_width=None, precision=None, + suppress_small=None, separator=None, prefix=None, + style=None, formatter=None, threshold=None, + edgeitems=None, sign=None, floatmode=None, suffix=None, + *, legacy=None): + return (a,) + + +@array_function_dispatch(_array2string_dispatcher, module='numpy') +def array2string(a, max_line_width=None, precision=None, + suppress_small=None, separator=' ', prefix="", + style=np._NoValue, formatter=None, threshold=None, + edgeitems=None, sign=None, floatmode=None, suffix="", + *, legacy=None): + """ + Return a string representation of an array. + + Parameters + ---------- + a : ndarray + Input array. + max_line_width : int, optional + Inserts newlines if text is longer than `max_line_width`. + Defaults to ``numpy.get_printoptions()['linewidth']``. + precision : int or None, optional + Floating point precision. + Defaults to ``numpy.get_printoptions()['precision']``. + suppress_small : bool, optional + Represent numbers "very close" to zero as zero; default is False. + Very close is defined by precision: if the precision is 8, e.g., + numbers smaller (in absolute value) than 5e-9 are represented as + zero. + Defaults to ``numpy.get_printoptions()['suppress']``. + separator : str, optional + Inserted between elements. + prefix : str, optional + suffix : str, optional + The length of the prefix and suffix strings are used to respectively + align and wrap the output. An array is typically printed as:: + + prefix + array2string(a) + suffix + + The output is left-padded by the length of the prefix string, and + wrapping is forced at the column ``max_line_width - len(suffix)``. + It should be noted that the content of prefix and suffix strings are + not included in the output. + style : _NoValue, optional + Has no effect, do not use. + + .. deprecated:: 1.14.0 + formatter : dict of callables, optional + If not None, the keys should indicate the type(s) that the respective + formatting function applies to. Callables should return a string. + Types that are not specified (by their corresponding keys) are handled + by the default formatters. Individual types for which a formatter + can be set are: + + - 'bool' + - 'int' + - 'timedelta' : a `numpy.timedelta64` + - 'datetime' : a `numpy.datetime64` + - 'float' + - 'longfloat' : 128-bit floats + - 'complexfloat' + - 'longcomplexfloat' : composed of two 128-bit floats + - 'void' : type `numpy.void` + - 'numpystr' : types `numpy.string_` and `numpy.unicode_` + + Other keys that can be used to set a group of types at once are: + + - 'all' : sets all types + - 'int_kind' : sets 'int' + - 'float_kind' : sets 'float' and 'longfloat' + - 'complex_kind' : sets 'complexfloat' and 'longcomplexfloat' + - 'str_kind' : sets 'numpystr' + threshold : int, optional + Total number of array elements which trigger summarization + rather than full repr. + Defaults to ``numpy.get_printoptions()['threshold']``. + edgeitems : int, optional + Number of array items in summary at beginning and end of + each dimension. + Defaults to ``numpy.get_printoptions()['edgeitems']``. + sign : string, either '-', '+', or ' ', optional + Controls printing of the sign of floating-point types. If '+', always + print the sign of positive values. If ' ', always prints a space + (whitespace character) in the sign position of positive values. If + '-', omit the sign character of positive values. + Defaults to ``numpy.get_printoptions()['sign']``. + floatmode : str, optional + Controls the interpretation of the `precision` option for + floating-point types. + Defaults to ``numpy.get_printoptions()['floatmode']``. + Can take the following values: + + - 'fixed': Always print exactly `precision` fractional digits, + even if this would print more or fewer digits than + necessary to specify the value uniquely. + - 'unique': Print the minimum number of fractional digits necessary + to represent each value uniquely. Different elements may + have a different number of digits. The value of the + `precision` option is ignored. + - 'maxprec': Print at most `precision` fractional digits, but if + an element can be uniquely represented with fewer digits + only print it with that many. + - 'maxprec_equal': Print at most `precision` fractional digits, + but if every element in the array can be uniquely + represented with an equal number of fewer digits, use that + many digits for all elements. + legacy : string or `False`, optional + If set to the string `'1.13'` enables 1.13 legacy printing mode. This + approximates numpy 1.13 print output by including a space in the sign + position of floats and different behavior for 0d arrays. If set to + `False`, disables legacy mode. Unrecognized strings will be ignored + with a warning for forward compatibility. + + .. versionadded:: 1.14.0 + + Returns + ------- + array_str : str + String representation of the array. + + Raises + ------ + TypeError + if a callable in `formatter` does not return a string. + + See Also + -------- + array_str, array_repr, set_printoptions, get_printoptions + + Notes + ----- + If a formatter is specified for a certain type, the `precision` keyword is + ignored for that type. + + This is a very flexible function; `array_repr` and `array_str` are using + `array2string` internally so keywords with the same name should work + identically in all three functions. + + Examples + -------- + >>> x = np.array([1e-16,1,2,3]) + >>> np.array2string(x, precision=2, separator=',', + ... suppress_small=True) + '[0.,1.,2.,3.]' + + >>> x = np.arange(3.) + >>> np.array2string(x, formatter={'float_kind':lambda x: "%.2f" % x}) + '[0.00 1.00 2.00]' + + >>> x = np.arange(3) + >>> np.array2string(x, formatter={'int':lambda x: hex(x)}) + '[0x0 0x1 0x2]' + + """ + + overrides = _make_options_dict(precision, threshold, edgeitems, + max_line_width, suppress_small, None, None, + sign, formatter, floatmode, legacy) + options = _format_options.copy() + options.update(overrides) + + if options['legacy'] <= 113: + if style is np._NoValue: + style = repr + + if a.shape == () and a.dtype.names is None: + return style(a.item()) + elif style is not np._NoValue: + # Deprecation 11-9-2017 v1.14 + warnings.warn("'style' argument is deprecated and no longer functional" + " except in 1.13 'legacy' mode", + DeprecationWarning, stacklevel=3) + + if options['legacy'] > 113: + options['linewidth'] -= len(suffix) + + # treat as a null array if any of shape elements == 0 + if a.size == 0: + return "[]" + + return _array2string(a, options, separator, prefix) + + +def _extendLine(s, line, word, line_width, next_line_prefix, legacy): + needs_wrap = len(line) + len(word) > line_width + if legacy > 113: + # don't wrap lines if it won't help + if len(line) <= len(next_line_prefix): + needs_wrap = False + + if needs_wrap: + s += line.rstrip() + "\n" + line = next_line_prefix + line += word + return s, line + + +def _extendLine_pretty(s, line, word, line_width, next_line_prefix, legacy): + """ + Extends line with nicely formatted (possibly multi-line) string ``word``. + """ + words = word.splitlines() + if len(words) == 1 or legacy <= 113: + return _extendLine(s, line, word, line_width, next_line_prefix, legacy) + + max_word_length = max(len(word) for word in words) + if (len(line) + max_word_length > line_width and + len(line) > len(next_line_prefix)): + s += line.rstrip() + '\n' + line = next_line_prefix + words[0] + indent = next_line_prefix + else: + indent = len(line)*' ' + line += words[0] + + for word in words[1::]: + s += line.rstrip() + '\n' + line = indent + word + + suffix_length = max_word_length - len(words[-1]) + line += suffix_length*' ' + + return s, line + +def _formatArray(a, format_function, line_width, next_line_prefix, + separator, edge_items, summary_insert, legacy): + """formatArray is designed for two modes of operation: + + 1. Full output + + 2. Summarized output + + """ + def recurser(index, hanging_indent, curr_width): + """ + By using this local function, we don't need to recurse with all the + arguments. Since this function is not created recursively, the cost is + not significant + """ + axis = len(index) + axes_left = a.ndim - axis + + if axes_left == 0: + return format_function(a[index]) + + # when recursing, add a space to align with the [ added, and reduce the + # length of the line by 1 + next_hanging_indent = hanging_indent + ' ' + if legacy <= 113: + next_width = curr_width + else: + next_width = curr_width - len(']') + + a_len = a.shape[axis] + show_summary = summary_insert and 2*edge_items < a_len + if show_summary: + leading_items = edge_items + trailing_items = edge_items + else: + leading_items = 0 + trailing_items = a_len + + # stringify the array with the hanging indent on the first line too + s = '' + + # last axis (rows) - wrap elements if they would not fit on one line + if axes_left == 1: + # the length up until the beginning of the separator / bracket + if legacy <= 113: + elem_width = curr_width - len(separator.rstrip()) + else: + elem_width = curr_width - max(len(separator.rstrip()), len(']')) + + line = hanging_indent + for i in range(leading_items): + word = recurser(index + (i,), next_hanging_indent, next_width) + s, line = _extendLine_pretty( + s, line, word, elem_width, hanging_indent, legacy) + line += separator + + if show_summary: + s, line = _extendLine( + s, line, summary_insert, elem_width, hanging_indent, legacy) + if legacy <= 113: + line += ", " + else: + line += separator + + for i in range(trailing_items, 1, -1): + word = recurser(index + (-i,), next_hanging_indent, next_width) + s, line = _extendLine_pretty( + s, line, word, elem_width, hanging_indent, legacy) + line += separator + + if legacy <= 113: + # width of the separator is not considered on 1.13 + elem_width = curr_width + word = recurser(index + (-1,), next_hanging_indent, next_width) + s, line = _extendLine_pretty( + s, line, word, elem_width, hanging_indent, legacy) + + s += line + + # other axes - insert newlines between rows + else: + s = '' + line_sep = separator.rstrip() + '\n'*(axes_left - 1) + + for i in range(leading_items): + nested = recurser(index + (i,), next_hanging_indent, next_width) + s += hanging_indent + nested + line_sep + + if show_summary: + if legacy <= 113: + # trailing space, fixed nbr of newlines, and fixed separator + s += hanging_indent + summary_insert + ", \n" + else: + s += hanging_indent + summary_insert + line_sep + + for i in range(trailing_items, 1, -1): + nested = recurser(index + (-i,), next_hanging_indent, + next_width) + s += hanging_indent + nested + line_sep + + nested = recurser(index + (-1,), next_hanging_indent, next_width) + s += hanging_indent + nested + + # remove the hanging indent, and wrap in [] + s = '[' + s[len(hanging_indent):] + ']' + return s + + try: + # invoke the recursive part with an initial index and prefix + return recurser(index=(), + hanging_indent=next_line_prefix, + curr_width=line_width) + finally: + # recursive closures have a cyclic reference to themselves, which + # requires gc to collect (gh-10620). To avoid this problem, for + # performance and PyPy friendliness, we break the cycle: + recurser = None + +def _none_or_positive_arg(x, name): + if x is None: + return -1 + if x < 0: + raise ValueError("{} must be >= 0".format(name)) + return x + +class FloatingFormat: + """ Formatter for subtypes of np.floating """ + def __init__(self, data, precision, floatmode, suppress_small, sign=False, + *, legacy=None): + # for backcompatibility, accept bools + if isinstance(sign, bool): + sign = '+' if sign else '-' + + self._legacy = legacy + if self._legacy <= 113: + # when not 0d, legacy does not support '-' + if data.shape != () and sign == '-': + sign = ' ' + + self.floatmode = floatmode + if floatmode == 'unique': + self.precision = None + else: + self.precision = precision + + self.precision = _none_or_positive_arg(self.precision, 'precision') + + self.suppress_small = suppress_small + self.sign = sign + self.exp_format = False + self.large_exponent = False + + self.fillFormat(data) + + def fillFormat(self, data): + # only the finite values are used to compute the number of digits + finite_vals = data[isfinite(data)] + + # choose exponential mode based on the non-zero finite values: + abs_non_zero = absolute(finite_vals[finite_vals != 0]) + if len(abs_non_zero) != 0: + max_val = np.max(abs_non_zero) + min_val = np.min(abs_non_zero) + with errstate(over='ignore'): # division can overflow + if max_val >= 1.e8 or (not self.suppress_small and + (min_val < 0.0001 or max_val/min_val > 1000.)): + self.exp_format = True + + # do a first pass of printing all the numbers, to determine sizes + if len(finite_vals) == 0: + self.pad_left = 0 + self.pad_right = 0 + self.trim = '.' + self.exp_size = -1 + self.unique = True + self.min_digits = None + elif self.exp_format: + trim, unique = '.', True + if self.floatmode == 'fixed' or self._legacy <= 113: + trim, unique = 'k', False + strs = (dragon4_scientific(x, precision=self.precision, + unique=unique, trim=trim, sign=self.sign == '+') + for x in finite_vals) + frac_strs, _, exp_strs = zip(*(s.partition('e') for s in strs)) + int_part, frac_part = zip(*(s.split('.') for s in frac_strs)) + self.exp_size = max(len(s) for s in exp_strs) - 1 + + self.trim = 'k' + self.precision = max(len(s) for s in frac_part) + self.min_digits = self.precision + self.unique = unique + + # for back-compat with np 1.13, use 2 spaces & sign and full prec + if self._legacy <= 113: + self.pad_left = 3 + else: + # this should be only 1 or 2. Can be calculated from sign. + self.pad_left = max(len(s) for s in int_part) + # pad_right is only needed for nan length calculation + self.pad_right = self.exp_size + 2 + self.precision + else: + trim, unique = '.', True + if self.floatmode == 'fixed': + trim, unique = 'k', False + strs = (dragon4_positional(x, precision=self.precision, + fractional=True, + unique=unique, trim=trim, + sign=self.sign == '+') + for x in finite_vals) + int_part, frac_part = zip(*(s.split('.') for s in strs)) + if self._legacy <= 113: + self.pad_left = 1 + max(len(s.lstrip('-+')) for s in int_part) + else: + self.pad_left = max(len(s) for s in int_part) + self.pad_right = max(len(s) for s in frac_part) + self.exp_size = -1 + self.unique = unique + + if self.floatmode in ['fixed', 'maxprec_equal']: + self.precision = self.min_digits = self.pad_right + self.trim = 'k' + else: + self.trim = '.' + self.min_digits = 0 + + if self._legacy > 113: + # account for sign = ' ' by adding one to pad_left + if self.sign == ' ' and not any(np.signbit(finite_vals)): + self.pad_left += 1 + + # if there are non-finite values, may need to increase pad_left + if data.size != finite_vals.size: + neginf = self.sign != '-' or any(data[isinf(data)] < 0) + nanlen = len(_format_options['nanstr']) + inflen = len(_format_options['infstr']) + neginf + offset = self.pad_right + 1 # +1 for decimal pt + self.pad_left = max(self.pad_left, nanlen - offset, inflen - offset) + + def __call__(self, x): + if not np.isfinite(x): + with errstate(invalid='ignore'): + if np.isnan(x): + sign = '+' if self.sign == '+' else '' + ret = sign + _format_options['nanstr'] + else: # isinf + sign = '-' if x < 0 else '+' if self.sign == '+' else '' + ret = sign + _format_options['infstr'] + return ' '*(self.pad_left + self.pad_right + 1 - len(ret)) + ret + + if self.exp_format: + return dragon4_scientific(x, + precision=self.precision, + min_digits=self.min_digits, + unique=self.unique, + trim=self.trim, + sign=self.sign == '+', + pad_left=self.pad_left, + exp_digits=self.exp_size) + else: + return dragon4_positional(x, + precision=self.precision, + min_digits=self.min_digits, + unique=self.unique, + fractional=True, + trim=self.trim, + sign=self.sign == '+', + pad_left=self.pad_left, + pad_right=self.pad_right) + + +@set_module('numpy') +def format_float_scientific(x, precision=None, unique=True, trim='k', + sign=False, pad_left=None, exp_digits=None, + min_digits=None): + """ + Format a floating-point scalar as a decimal string in scientific notation. + + Provides control over rounding, trimming and padding. Uses and assumes + IEEE unbiased rounding. Uses the "Dragon4" algorithm. + + Parameters + ---------- + x : python float or numpy floating scalar + Value to format. + precision : non-negative integer or None, optional + Maximum number of digits to print. May be None if `unique` is + `True`, but must be an integer if unique is `False`. + unique : boolean, optional + If `True`, use a digit-generation strategy which gives the shortest + representation which uniquely identifies the floating-point number from + other values of the same type, by judicious rounding. If `precision` + is given fewer digits than necessary can be printed. If `min_digits` + is given more can be printed, in which cases the last digit is rounded + with unbiased rounding. + If `False`, digits are generated as if printing an infinite-precision + value and stopping after `precision` digits, rounding the remaining + value with unbiased rounding + trim : one of 'k', '.', '0', '-', optional + Controls post-processing trimming of trailing digits, as follows: + + * 'k' : keep trailing zeros, keep decimal point (no trimming) + * '.' : trim all trailing zeros, leave decimal point + * '0' : trim all but the zero before the decimal point. Insert the + zero if it is missing. + * '-' : trim trailing zeros and any trailing decimal point + sign : boolean, optional + Whether to show the sign for positive values. + pad_left : non-negative integer, optional + Pad the left side of the string with whitespace until at least that + many characters are to the left of the decimal point. + exp_digits : non-negative integer, optional + Pad the exponent with zeros until it contains at least this many digits. + If omitted, the exponent will be at least 2 digits. + min_digits : non-negative integer or None, optional + Minimum number of digits to print. This only has an effect for + `unique=True`. In that case more digits than necessary to uniquely + identify the value may be printed and rounded unbiased. + + -- versionadded:: 1.21.0 + + Returns + ------- + rep : string + The string representation of the floating point value + + See Also + -------- + format_float_positional + + Examples + -------- + >>> np.format_float_scientific(np.float32(np.pi)) + '3.1415927e+00' + >>> s = np.float32(1.23e24) + >>> np.format_float_scientific(s, unique=False, precision=15) + '1.230000071797338e+24' + >>> np.format_float_scientific(s, exp_digits=4) + '1.23e+0024' + """ + precision = _none_or_positive_arg(precision, 'precision') + pad_left = _none_or_positive_arg(pad_left, 'pad_left') + exp_digits = _none_or_positive_arg(exp_digits, 'exp_digits') + min_digits = _none_or_positive_arg(min_digits, 'min_digits') + if min_digits > 0 and precision > 0 and min_digits > precision: + raise ValueError("min_digits must be less than or equal to precision") + return dragon4_scientific(x, precision=precision, unique=unique, + trim=trim, sign=sign, pad_left=pad_left, + exp_digits=exp_digits, min_digits=min_digits) + + +@set_module('numpy') +def format_float_positional(x, precision=None, unique=True, + fractional=True, trim='k', sign=False, + pad_left=None, pad_right=None, min_digits=None): + """ + Format a floating-point scalar as a decimal string in positional notation. + + Provides control over rounding, trimming and padding. Uses and assumes + IEEE unbiased rounding. Uses the "Dragon4" algorithm. + + Parameters + ---------- + x : python float or numpy floating scalar + Value to format. + precision : non-negative integer or None, optional + Maximum number of digits to print. May be None if `unique` is + `True`, but must be an integer if unique is `False`. + unique : boolean, optional + If `True`, use a digit-generation strategy which gives the shortest + representation which uniquely identifies the floating-point number from + other values of the same type, by judicious rounding. If `precision` + is given fewer digits than necessary can be printed, or if `min_digits` + is given more can be printed, in which cases the last digit is rounded + with unbiased rounding. + If `False`, digits are generated as if printing an infinite-precision + value and stopping after `precision` digits, rounding the remaining + value with unbiased rounding + fractional : boolean, optional + If `True`, the cutoffs of `precision` and `min_digits` refer to the + total number of digits after the decimal point, including leading + zeros. + If `False`, `precision` and `min_digits` refer to the total number of + significant digits, before or after the decimal point, ignoring leading + zeros. + trim : one of 'k', '.', '0', '-', optional + Controls post-processing trimming of trailing digits, as follows: + + * 'k' : keep trailing zeros, keep decimal point (no trimming) + * '.' : trim all trailing zeros, leave decimal point + * '0' : trim all but the zero before the decimal point. Insert the + zero if it is missing. + * '-' : trim trailing zeros and any trailing decimal point + sign : boolean, optional + Whether to show the sign for positive values. + pad_left : non-negative integer, optional + Pad the left side of the string with whitespace until at least that + many characters are to the left of the decimal point. + pad_right : non-negative integer, optional + Pad the right side of the string with whitespace until at least that + many characters are to the right of the decimal point. + min_digits : non-negative integer or None, optional + Minimum number of digits to print. Only has an effect if `unique=True` + in which case additional digits past those necessary to uniquely + identify the value may be printed, rounding the last additional digit. + + -- versionadded:: 1.21.0 + + Returns + ------- + rep : string + The string representation of the floating point value + + See Also + -------- + format_float_scientific + + Examples + -------- + >>> np.format_float_positional(np.float32(np.pi)) + '3.1415927' + >>> np.format_float_positional(np.float16(np.pi)) + '3.14' + >>> np.format_float_positional(np.float16(0.3)) + '0.3' + >>> np.format_float_positional(np.float16(0.3), unique=False, precision=10) + '0.3000488281' + """ + precision = _none_or_positive_arg(precision, 'precision') + pad_left = _none_or_positive_arg(pad_left, 'pad_left') + pad_right = _none_or_positive_arg(pad_right, 'pad_right') + min_digits = _none_or_positive_arg(min_digits, 'min_digits') + if not fractional and precision == 0: + raise ValueError("precision must be greater than 0 if " + "fractional=False") + if min_digits > 0 and precision > 0 and min_digits > precision: + raise ValueError("min_digits must be less than or equal to precision") + return dragon4_positional(x, precision=precision, unique=unique, + fractional=fractional, trim=trim, + sign=sign, pad_left=pad_left, + pad_right=pad_right, min_digits=min_digits) + + +class IntegerFormat: + def __init__(self, data): + if data.size > 0: + max_str_len = max(len(str(np.max(data))), + len(str(np.min(data)))) + else: + max_str_len = 0 + self.format = '%{}d'.format(max_str_len) + + def __call__(self, x): + return self.format % x + + +class BoolFormat: + def __init__(self, data, **kwargs): + # add an extra space so " True" and "False" have the same length and + # array elements align nicely when printed, except in 0d arrays + self.truestr = ' True' if data.shape != () else 'True' + + def __call__(self, x): + return self.truestr if x else "False" + + +class ComplexFloatingFormat: + """ Formatter for subtypes of np.complexfloating """ + def __init__(self, x, precision, floatmode, suppress_small, + sign=False, *, legacy=None): + # for backcompatibility, accept bools + if isinstance(sign, bool): + sign = '+' if sign else '-' + + floatmode_real = floatmode_imag = floatmode + if legacy <= 113: + floatmode_real = 'maxprec_equal' + floatmode_imag = 'maxprec' + + self.real_format = FloatingFormat( + x.real, precision, floatmode_real, suppress_small, + sign=sign, legacy=legacy + ) + self.imag_format = FloatingFormat( + x.imag, precision, floatmode_imag, suppress_small, + sign='+', legacy=legacy + ) + + def __call__(self, x): + r = self.real_format(x.real) + i = self.imag_format(x.imag) + + # add the 'j' before the terminal whitespace in i + sp = len(i.rstrip()) + i = i[:sp] + 'j' + i[sp:] + + return r + i + + +class _TimelikeFormat: + def __init__(self, data): + non_nat = data[~isnat(data)] + if len(non_nat) > 0: + # Max str length of non-NaT elements + max_str_len = max(len(self._format_non_nat(np.max(non_nat))), + len(self._format_non_nat(np.min(non_nat)))) + else: + max_str_len = 0 + if len(non_nat) < data.size: + # data contains a NaT + max_str_len = max(max_str_len, 5) + self._format = '%{}s'.format(max_str_len) + self._nat = "'NaT'".rjust(max_str_len) + + def _format_non_nat(self, x): + # override in subclass + raise NotImplementedError + + def __call__(self, x): + if isnat(x): + return self._nat + else: + return self._format % self._format_non_nat(x) + + +class DatetimeFormat(_TimelikeFormat): + def __init__(self, x, unit=None, timezone=None, casting='same_kind', + legacy=False): + # Get the unit from the dtype + if unit is None: + if x.dtype.kind == 'M': + unit = datetime_data(x.dtype)[0] + else: + unit = 's' + + if timezone is None: + timezone = 'naive' + self.timezone = timezone + self.unit = unit + self.casting = casting + self.legacy = legacy + + # must be called after the above are configured + super().__init__(x) + + def __call__(self, x): + if self.legacy <= 113: + return self._format_non_nat(x) + return super().__call__(x) + + def _format_non_nat(self, x): + return "'%s'" % datetime_as_string(x, + unit=self.unit, + timezone=self.timezone, + casting=self.casting) + + +class TimedeltaFormat(_TimelikeFormat): + def _format_non_nat(self, x): + return str(x.astype('i8')) + + +class SubArrayFormat: + def __init__(self, format_function): + self.format_function = format_function + + def __call__(self, arr): + if arr.ndim <= 1: + return "[" + ", ".join(self.format_function(a) for a in arr) + "]" + return "[" + ", ".join(self.__call__(a) for a in arr) + "]" + + +class StructuredVoidFormat: + """ + Formatter for structured np.void objects. + + This does not work on structured alias types like np.dtype(('i4', 'i2,i2')), + as alias scalars lose their field information, and the implementation + relies upon np.void.__getitem__. + """ + def __init__(self, format_functions): + self.format_functions = format_functions + + @classmethod + def from_data(cls, data, **options): + """ + This is a second way to initialize StructuredVoidFormat, using the raw data + as input. Added to avoid changing the signature of __init__. + """ + format_functions = [] + for field_name in data.dtype.names: + format_function = _get_format_function(data[field_name], **options) + if data.dtype[field_name].shape != (): + format_function = SubArrayFormat(format_function) + format_functions.append(format_function) + return cls(format_functions) + + def __call__(self, x): + str_fields = [ + format_function(field) + for field, format_function in zip(x, self.format_functions) + ] + if len(str_fields) == 1: + return "({},)".format(str_fields[0]) + else: + return "({})".format(", ".join(str_fields)) + + +def _void_scalar_repr(x): + """ + Implements the repr for structured-void scalars. It is called from the + scalartypes.c.src code, and is placed here because it uses the elementwise + formatters defined above. + """ + return StructuredVoidFormat.from_data(array(x), **_format_options)(x) + + +_typelessdata = [int_, float_, complex_, bool_] +if issubclass(intc, int): + _typelessdata.append(intc) +if issubclass(longlong, int): + _typelessdata.append(longlong) + + +def dtype_is_implied(dtype): + """ + Determine if the given dtype is implied by the representation of its values. + + Parameters + ---------- + dtype : dtype + Data type + + Returns + ------- + implied : bool + True if the dtype is implied by the representation of its values. + + Examples + -------- + >>> np.core.arrayprint.dtype_is_implied(int) + True + >>> np.array([1, 2, 3], int) + array([1, 2, 3]) + >>> np.core.arrayprint.dtype_is_implied(np.int8) + False + >>> np.array([1, 2, 3], np.int8) + array([1, 2, 3], dtype=int8) + """ + dtype = np.dtype(dtype) + if _format_options['legacy'] <= 113 and dtype.type == bool_: + return False + + # not just void types can be structured, and names are not part of the repr + if dtype.names is not None: + return False + + return dtype.type in _typelessdata + + +def dtype_short_repr(dtype): + """ + Convert a dtype to a short form which evaluates to the same dtype. + + The intent is roughly that the following holds + + >>> from numpy import * + >>> dt = np.int64([1, 2]).dtype + >>> assert eval(dtype_short_repr(dt)) == dt + """ + if type(dtype).__repr__ != np.dtype.__repr__: + # TODO: Custom repr for user DTypes, logic should likely move. + return repr(dtype) + if dtype.names is not None: + # structured dtypes give a list or tuple repr + return str(dtype) + elif issubclass(dtype.type, flexible): + # handle these separately so they don't give garbage like str256 + return "'%s'" % str(dtype) + + typename = dtype.name + # quote typenames which can't be represented as python variable names + if typename and not (typename[0].isalpha() and typename.isalnum()): + typename = repr(typename) + + return typename + + +def _array_repr_implementation( + arr, max_line_width=None, precision=None, suppress_small=None, + array2string=array2string): + """Internal version of array_repr() that allows overriding array2string.""" + if max_line_width is None: + max_line_width = _format_options['linewidth'] + + if type(arr) is not ndarray: + class_name = type(arr).__name__ + else: + class_name = "array" + + skipdtype = dtype_is_implied(arr.dtype) and arr.size > 0 + + prefix = class_name + "(" + suffix = ")" if skipdtype else "," + + if (_format_options['legacy'] <= 113 and + arr.shape == () and not arr.dtype.names): + lst = repr(arr.item()) + elif arr.size > 0 or arr.shape == (0,): + lst = array2string(arr, max_line_width, precision, suppress_small, + ', ', prefix, suffix=suffix) + else: # show zero-length shape unless it is (0,) + lst = "[], shape=%s" % (repr(arr.shape),) + + arr_str = prefix + lst + suffix + + if skipdtype: + return arr_str + + dtype_str = "dtype={})".format(dtype_short_repr(arr.dtype)) + + # compute whether we should put dtype on a new line: Do so if adding the + # dtype would extend the last line past max_line_width. + # Note: This line gives the correct result even when rfind returns -1. + last_line_len = len(arr_str) - (arr_str.rfind('\n') + 1) + spacer = " " + if _format_options['legacy'] <= 113: + if issubclass(arr.dtype.type, flexible): + spacer = '\n' + ' '*len(class_name + "(") + elif last_line_len + len(dtype_str) + 1 > max_line_width: + spacer = '\n' + ' '*len(class_name + "(") + + return arr_str + spacer + dtype_str + + +def _array_repr_dispatcher( + arr, max_line_width=None, precision=None, suppress_small=None): + return (arr,) + + +@array_function_dispatch(_array_repr_dispatcher, module='numpy') +def array_repr(arr, max_line_width=None, precision=None, suppress_small=None): + """ + Return the string representation of an array. + + Parameters + ---------- + arr : ndarray + Input array. + max_line_width : int, optional + Inserts newlines if text is longer than `max_line_width`. + Defaults to ``numpy.get_printoptions()['linewidth']``. + precision : int, optional + Floating point precision. + Defaults to ``numpy.get_printoptions()['precision']``. + suppress_small : bool, optional + Represent numbers "very close" to zero as zero; default is False. + Very close is defined by precision: if the precision is 8, e.g., + numbers smaller (in absolute value) than 5e-9 are represented as + zero. + Defaults to ``numpy.get_printoptions()['suppress']``. + + Returns + ------- + string : str + The string representation of an array. + + See Also + -------- + array_str, array2string, set_printoptions + + Examples + -------- + >>> np.array_repr(np.array([1,2])) + 'array([1, 2])' + >>> np.array_repr(np.ma.array([0.])) + 'MaskedArray([0.])' + >>> np.array_repr(np.array([], np.int32)) + 'array([], dtype=int32)' + + >>> x = np.array([1e-6, 4e-7, 2, 3]) + >>> np.array_repr(x, precision=6, suppress_small=True) + 'array([0.000001, 0. , 2. , 3. ])' + + """ + return _array_repr_implementation( + arr, max_line_width, precision, suppress_small) + + +@_recursive_guard() +def _guarded_repr_or_str(v): + if isinstance(v, bytes): + return repr(v) + return str(v) + + +def _array_str_implementation( + a, max_line_width=None, precision=None, suppress_small=None, + array2string=array2string): + """Internal version of array_str() that allows overriding array2string.""" + if (_format_options['legacy'] <= 113 and + a.shape == () and not a.dtype.names): + return str(a.item()) + + # the str of 0d arrays is a special case: It should appear like a scalar, + # so floats are not truncated by `precision`, and strings are not wrapped + # in quotes. So we return the str of the scalar value. + if a.shape == (): + # obtain a scalar and call str on it, avoiding problems for subclasses + # for which indexing with () returns a 0d instead of a scalar by using + # ndarray's getindex. Also guard against recursive 0d object arrays. + return _guarded_repr_or_str(np.ndarray.__getitem__(a, ())) + + return array2string(a, max_line_width, precision, suppress_small, ' ', "") + + +def _array_str_dispatcher( + a, max_line_width=None, precision=None, suppress_small=None): + return (a,) + + +@array_function_dispatch(_array_str_dispatcher, module='numpy') +def array_str(a, max_line_width=None, precision=None, suppress_small=None): + """ + Return a string representation of the data in an array. + + The data in the array is returned as a single string. This function is + similar to `array_repr`, the difference being that `array_repr` also + returns information on the kind of array and its data type. + + Parameters + ---------- + a : ndarray + Input array. + max_line_width : int, optional + Inserts newlines if text is longer than `max_line_width`. + Defaults to ``numpy.get_printoptions()['linewidth']``. + precision : int, optional + Floating point precision. + Defaults to ``numpy.get_printoptions()['precision']``. + suppress_small : bool, optional + Represent numbers "very close" to zero as zero; default is False. + Very close is defined by precision: if the precision is 8, e.g., + numbers smaller (in absolute value) than 5e-9 are represented as + zero. + Defaults to ``numpy.get_printoptions()['suppress']``. + + See Also + -------- + array2string, array_repr, set_printoptions + + Examples + -------- + >>> np.array_str(np.arange(3)) + '[0 1 2]' + + """ + return _array_str_implementation( + a, max_line_width, precision, suppress_small) + + +# needed if __array_function__ is disabled +_array2string_impl = getattr(array2string, '__wrapped__', array2string) +_default_array_str = functools.partial(_array_str_implementation, + array2string=_array2string_impl) +_default_array_repr = functools.partial(_array_repr_implementation, + array2string=_array2string_impl) + + +def set_string_function(f, repr=True): + """ + Set a Python function to be used when pretty printing arrays. + + Parameters + ---------- + f : function or None + Function to be used to pretty print arrays. The function should expect + a single array argument and return a string of the representation of + the array. If None, the function is reset to the default NumPy function + to print arrays. + repr : bool, optional + If True (default), the function for pretty printing (``__repr__``) + is set, if False the function that returns the default string + representation (``__str__``) is set. + + See Also + -------- + set_printoptions, get_printoptions + + Examples + -------- + >>> def pprint(arr): + ... return 'HA! - What are you going to do now?' + ... + >>> np.set_string_function(pprint) + >>> a = np.arange(10) + >>> a + HA! - What are you going to do now? + >>> _ = a + >>> # [0 1 2 3 4 5 6 7 8 9] + + We can reset the function to the default: + + >>> np.set_string_function(None) + >>> a + array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + + `repr` affects either pretty printing or normal string representation. + Note that ``__repr__`` is still affected by setting ``__str__`` + because the width of each array element in the returned string becomes + equal to the length of the result of ``__str__()``. + + >>> x = np.arange(4) + >>> np.set_string_function(lambda x:'random', repr=False) + >>> x.__str__() + 'random' + >>> x.__repr__() + 'array([0, 1, 2, 3])' + + """ + if f is None: + if repr: + return multiarray.set_string_function(_default_array_repr, 1) + else: + return multiarray.set_string_function(_default_array_str, 0) + else: + return multiarray.set_string_function(f, repr) diff --git a/.local/lib/python3.8/site-packages/numpy/core/arrayprint.pyi b/.local/lib/python3.8/site-packages/numpy/core/arrayprint.pyi new file mode 100644 index 0000000..0d33820 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/arrayprint.pyi @@ -0,0 +1,141 @@ +from types import TracebackType +from typing import Any, Optional, Callable, Union, Type, Literal, TypedDict, SupportsIndex + +# Using a private class is by no means ideal, but it is simply a consequence +# of a `contextlib.context` returning an instance of aforementioned class +from contextlib import _GeneratorContextManager + +from numpy import ( + ndarray, + generic, + bool_, + integer, + timedelta64, + datetime64, + floating, + complexfloating, + void, + str_, + bytes_, + longdouble, + clongdouble, +) +from numpy.typing import ArrayLike, _CharLike_co, _FloatLike_co + +_FloatMode = Literal["fixed", "unique", "maxprec", "maxprec_equal"] + +class _FormatDict(TypedDict, total=False): + bool: Callable[[bool_], str] + int: Callable[[integer[Any]], str] + timedelta: Callable[[timedelta64], str] + datetime: Callable[[datetime64], str] + float: Callable[[floating[Any]], str] + longfloat: Callable[[longdouble], str] + complexfloat: Callable[[complexfloating[Any, Any]], str] + longcomplexfloat: Callable[[clongdouble], str] + void: Callable[[void], str] + numpystr: Callable[[_CharLike_co], str] + object: Callable[[object], str] + all: Callable[[object], str] + int_kind: Callable[[integer[Any]], str] + float_kind: Callable[[floating[Any]], str] + complex_kind: Callable[[complexfloating[Any, Any]], str] + str_kind: Callable[[_CharLike_co], str] + +class _FormatOptions(TypedDict): + precision: int + threshold: int + edgeitems: int + linewidth: int + suppress: bool + nanstr: str + infstr: str + formatter: Optional[_FormatDict] + sign: Literal["-", "+", " "] + floatmode: _FloatMode + legacy: Literal[False, "1.13", "1.21"] + +def set_printoptions( + precision: Optional[SupportsIndex] = ..., + threshold: Optional[int] = ..., + edgeitems: Optional[int] = ..., + linewidth: Optional[int] = ..., + suppress: Optional[bool] = ..., + nanstr: Optional[str] = ..., + infstr: Optional[str] = ..., + formatter: Optional[_FormatDict] = ..., + sign: Optional[Literal["-", "+", " "]] = ..., + floatmode: Optional[_FloatMode] = ..., + *, + legacy: Optional[Literal[False, "1.13", "1.21"]] = ... +) -> None: ... +def get_printoptions() -> _FormatOptions: ... +def array2string( + a: ndarray[Any, Any], + max_line_width: Optional[int] = ..., + precision: Optional[SupportsIndex] = ..., + suppress_small: Optional[bool] = ..., + separator: str = ..., + prefix: str = ..., + # NOTE: With the `style` argument being deprecated, + # all arguments between `formatter` and `suffix` are de facto + # keyworld-only arguments + *, + formatter: Optional[_FormatDict] = ..., + threshold: Optional[int] = ..., + edgeitems: Optional[int] = ..., + sign: Optional[Literal["-", "+", " "]] = ..., + floatmode: Optional[_FloatMode] = ..., + suffix: str = ..., + legacy: Optional[Literal[False, "1.13", "1.21"]] = ..., +) -> str: ... +def format_float_scientific( + x: _FloatLike_co, + precision: Optional[int] = ..., + unique: bool = ..., + trim: Literal["k", ".", "0", "-"] = ..., + sign: bool = ..., + pad_left: Optional[int] = ..., + exp_digits: Optional[int] = ..., + min_digits: Optional[int] = ..., +) -> str: ... +def format_float_positional( + x: _FloatLike_co, + precision: Optional[int] = ..., + unique: bool = ..., + fractional: bool = ..., + trim: Literal["k", ".", "0", "-"] = ..., + sign: bool = ..., + pad_left: Optional[int] = ..., + pad_right: Optional[int] = ..., + min_digits: Optional[int] = ..., +) -> str: ... +def array_repr( + arr: ndarray[Any, Any], + max_line_width: Optional[int] = ..., + precision: Optional[SupportsIndex] = ..., + suppress_small: Optional[bool] = ..., +) -> str: ... +def array_str( + a: ndarray[Any, Any], + max_line_width: Optional[int] = ..., + precision: Optional[SupportsIndex] = ..., + suppress_small: Optional[bool] = ..., +) -> str: ... +def set_string_function( + f: Optional[Callable[[ndarray[Any, Any]], str]], repr: bool = ... +) -> None: ... +def printoptions( + precision: Optional[SupportsIndex] = ..., + threshold: Optional[int] = ..., + edgeitems: Optional[int] = ..., + linewidth: Optional[int] = ..., + suppress: Optional[bool] = ..., + nanstr: Optional[str] = ..., + infstr: Optional[str] = ..., + formatter: Optional[_FormatDict] = ..., + sign: Optional[Literal["-", "+", " "]] = ..., + floatmode: Optional[_FloatMode] = ..., + *, + legacy: Optional[Literal[False, "1.13", "1.21"]] = ... +) -> _GeneratorContextManager[_FormatOptions]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/core/cversions.py b/.local/lib/python3.8/site-packages/numpy/core/cversions.py new file mode 100644 index 0000000..00159c3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/cversions.py @@ -0,0 +1,13 @@ +"""Simple script to compute the api hash of the current API. + +The API has is defined by numpy_api_order and ufunc_api_order. + +""" +from os.path import dirname + +from code_generators.genapi import fullapi_hash +from code_generators.numpy_api import full_api + +if __name__ == '__main__': + curdir = dirname(__file__) + print(fullapi_hash(full_api)) diff --git a/.local/lib/python3.8/site-packages/numpy/core/defchararray.py b/.local/lib/python3.8/site-packages/numpy/core/defchararray.py new file mode 100644 index 0000000..3521e77 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/defchararray.py @@ -0,0 +1,2796 @@ +""" +This module contains a set of functions for vectorized string +operations and methods. + +.. note:: + The `chararray` class exists for backwards compatibility with + Numarray, it is not recommended for new development. Starting from numpy + 1.4, if one needs arrays of strings, it is recommended to use arrays of + `dtype` `object_`, `string_` or `unicode_`, and use the free functions + in the `numpy.char` module for fast vectorized string operations. + +Some methods will only be available if the corresponding string method is +available in your version of Python. + +The preferred alias for `defchararray` is `numpy.char`. + +""" +import functools +from .numerictypes import ( + string_, unicode_, integer, int_, object_, bool_, character) +from .numeric import ndarray, compare_chararrays +from .numeric import array as narray +from numpy.core.multiarray import _vec_string +from numpy.core.overrides import set_module +from numpy.core import overrides +from numpy.compat import asbytes +import numpy + +__all__ = [ + 'equal', 'not_equal', 'greater_equal', 'less_equal', + 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize', + 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs', + 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace', + 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition', + 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit', + 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase', + 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal', + 'array', 'asarray' + ] + + +_globalvar = 0 + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy.char') + + +def _use_unicode(*args): + """ + Helper function for determining the output type of some string + operations. + + For an operation on two ndarrays, if at least one is unicode, the + result should be unicode. + """ + for x in args: + if (isinstance(x, str) or + issubclass(numpy.asarray(x).dtype.type, unicode_)): + return unicode_ + return string_ + +def _to_string_or_unicode_array(result): + """ + Helper function to cast a result back into a string or unicode array + if an object array must be used as an intermediary. + """ + return numpy.asarray(result.tolist()) + +def _clean_args(*args): + """ + Helper function for delegating arguments to Python string + functions. + + Many of the Python string operations that have optional arguments + do not use 'None' to indicate a default value. In these cases, + we need to remove all None arguments, and those following them. + """ + newargs = [] + for chk in args: + if chk is None: + break + newargs.append(chk) + return newargs + +def _get_num_chars(a): + """ + Helper function that returns the number of characters per field in + a string or unicode array. This is to abstract out the fact that + for a unicode array this is itemsize / 4. + """ + if issubclass(a.dtype.type, unicode_): + return a.itemsize // 4 + return a.itemsize + + +def _binary_op_dispatcher(x1, x2): + return (x1, x2) + + +@array_function_dispatch(_binary_op_dispatcher) +def equal(x1, x2): + """ + Return (x1 == x2) element-wise. + + Unlike `numpy.equal`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of str or unicode + Input arrays of the same shape. + + Returns + ------- + out : ndarray + Output array of bools. + + See Also + -------- + not_equal, greater_equal, less_equal, greater, less + """ + return compare_chararrays(x1, x2, '==', True) + + +@array_function_dispatch(_binary_op_dispatcher) +def not_equal(x1, x2): + """ + Return (x1 != x2) element-wise. + + Unlike `numpy.not_equal`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of str or unicode + Input arrays of the same shape. + + Returns + ------- + out : ndarray + Output array of bools. + + See Also + -------- + equal, greater_equal, less_equal, greater, less + """ + return compare_chararrays(x1, x2, '!=', True) + + +@array_function_dispatch(_binary_op_dispatcher) +def greater_equal(x1, x2): + """ + Return (x1 >= x2) element-wise. + + Unlike `numpy.greater_equal`, this comparison is performed by + first stripping whitespace characters from the end of the string. + This behavior is provided for backward-compatibility with + numarray. + + Parameters + ---------- + x1, x2 : array_like of str or unicode + Input arrays of the same shape. + + Returns + ------- + out : ndarray + Output array of bools. + + See Also + -------- + equal, not_equal, less_equal, greater, less + """ + return compare_chararrays(x1, x2, '>=', True) + + +@array_function_dispatch(_binary_op_dispatcher) +def less_equal(x1, x2): + """ + Return (x1 <= x2) element-wise. + + Unlike `numpy.less_equal`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of str or unicode + Input arrays of the same shape. + + Returns + ------- + out : ndarray + Output array of bools. + + See Also + -------- + equal, not_equal, greater_equal, greater, less + """ + return compare_chararrays(x1, x2, '<=', True) + + +@array_function_dispatch(_binary_op_dispatcher) +def greater(x1, x2): + """ + Return (x1 > x2) element-wise. + + Unlike `numpy.greater`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of str or unicode + Input arrays of the same shape. + + Returns + ------- + out : ndarray + Output array of bools. + + See Also + -------- + equal, not_equal, greater_equal, less_equal, less + """ + return compare_chararrays(x1, x2, '>', True) + + +@array_function_dispatch(_binary_op_dispatcher) +def less(x1, x2): + """ + Return (x1 < x2) element-wise. + + Unlike `numpy.greater`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of str or unicode + Input arrays of the same shape. + + Returns + ------- + out : ndarray + Output array of bools. + + See Also + -------- + equal, not_equal, greater_equal, less_equal, greater + """ + return compare_chararrays(x1, x2, '<', True) + + +def _unary_op_dispatcher(a): + return (a,) + + +@array_function_dispatch(_unary_op_dispatcher) +def str_len(a): + """ + Return len(a) element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of integers + + See Also + -------- + builtins.len + """ + # Note: __len__, etc. currently return ints, which are not C-integers. + # Generally intp would be expected for lengths, although int is sufficient + # due to the dtype itemsize limitation. + return _vec_string(a, int_, '__len__') + + +@array_function_dispatch(_binary_op_dispatcher) +def add(x1, x2): + """ + Return element-wise string concatenation for two arrays of str or unicode. + + Arrays `x1` and `x2` must have the same shape. + + Parameters + ---------- + x1 : array_like of str or unicode + Input array. + x2 : array_like of str or unicode + Input array. + + Returns + ------- + add : ndarray + Output array of `string_` or `unicode_`, depending on input types + of the same shape as `x1` and `x2`. + + """ + arr1 = numpy.asarray(x1) + arr2 = numpy.asarray(x2) + out_size = _get_num_chars(arr1) + _get_num_chars(arr2) + dtype = _use_unicode(arr1, arr2) + return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,)) + + +def _multiply_dispatcher(a, i): + return (a,) + + +@array_function_dispatch(_multiply_dispatcher) +def multiply(a, i): + """ + Return (a * i), that is string multiple concatenation, + element-wise. + + Values in `i` of less than 0 are treated as 0 (which yields an + empty string). + + Parameters + ---------- + a : array_like of str or unicode + + i : array_like of ints + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input types + + """ + a_arr = numpy.asarray(a) + i_arr = numpy.asarray(i) + if not issubclass(i_arr.dtype.type, integer): + raise ValueError("Can only multiply by integers") + out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0) + return _vec_string( + a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,)) + + +def _mod_dispatcher(a, values): + return (a, values) + + +@array_function_dispatch(_mod_dispatcher) +def mod(a, values): + """ + Return (a % i), that is pre-Python 2.6 string formatting + (interpolation), element-wise for a pair of array_likes of str + or unicode. + + Parameters + ---------- + a : array_like of str or unicode + + values : array_like of values + These values will be element-wise interpolated into the string. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input types + + See Also + -------- + str.__mod__ + + """ + return _to_string_or_unicode_array( + _vec_string(a, object_, '__mod__', (values,))) + + +@array_function_dispatch(_unary_op_dispatcher) +def capitalize(a): + """ + Return a copy of `a` with only the first character of each element + capitalized. + + Calls `str.capitalize` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + Input array of strings to capitalize. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input + types + + See Also + -------- + str.capitalize + + Examples + -------- + >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c + array(['a1b2', '1b2a', 'b2a1', '2a1b'], + dtype='|S4') + >>> np.char.capitalize(c) + array(['A1b2', '1b2a', 'B2a1', '2a1b'], + dtype='|S4') + + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'capitalize') + + +def _center_dispatcher(a, width, fillchar=None): + return (a,) + + +@array_function_dispatch(_center_dispatcher) +def center(a, width, fillchar=' '): + """ + Return a copy of `a` with its elements centered in a string of + length `width`. + + Calls `str.center` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + width : int + The length of the resulting strings + fillchar : str or unicode, optional + The padding character to use (default is space). + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input + types + + See Also + -------- + str.center + + """ + a_arr = numpy.asarray(a) + width_arr = numpy.asarray(width) + size = int(numpy.max(width_arr.flat)) + if numpy.issubdtype(a_arr.dtype, numpy.string_): + fillchar = asbytes(fillchar) + return _vec_string( + a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar)) + + +def _count_dispatcher(a, sub, start=None, end=None): + return (a,) + + +@array_function_dispatch(_count_dispatcher) +def count(a, sub, start=0, end=None): + """ + Returns an array with the number of non-overlapping occurrences of + substring `sub` in the range [`start`, `end`]. + + Calls `str.count` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + sub : str or unicode + The substring to search for. + + start, end : int, optional + Optional arguments `start` and `end` are interpreted as slice + notation to specify the range in which to count. + + Returns + ------- + out : ndarray + Output array of ints. + + See Also + -------- + str.count + + Examples + -------- + >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) + >>> c + array(['aAaAaA', ' aA ', 'abBABba'], dtype='>> np.char.count(c, 'A') + array([3, 1, 1]) + >>> np.char.count(c, 'aA') + array([3, 1, 0]) + >>> np.char.count(c, 'A', start=1, end=4) + array([2, 1, 1]) + >>> np.char.count(c, 'A', start=1, end=3) + array([1, 0, 0]) + + """ + return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end)) + + +def _code_dispatcher(a, encoding=None, errors=None): + return (a,) + + +@array_function_dispatch(_code_dispatcher) +def decode(a, encoding=None, errors=None): + """ + Calls `str.decode` element-wise. + + The set of available codecs comes from the Python standard library, + and may be extended at runtime. For more information, see the + :mod:`codecs` module. + + Parameters + ---------- + a : array_like of str or unicode + + encoding : str, optional + The name of an encoding + + errors : str, optional + Specifies how to handle encoding errors + + Returns + ------- + out : ndarray + + See Also + -------- + str.decode + + Notes + ----- + The type of the result will depend on the encoding specified. + + Examples + -------- + >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) + >>> c + array(['aAaAaA', ' aA ', 'abBABba'], dtype='>> np.char.encode(c, encoding='cp037') + array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@', + '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'], + dtype='|S7') + + """ + return _to_string_or_unicode_array( + _vec_string(a, object_, 'decode', _clean_args(encoding, errors))) + + +@array_function_dispatch(_code_dispatcher) +def encode(a, encoding=None, errors=None): + """ + Calls `str.encode` element-wise. + + The set of available codecs comes from the Python standard library, + and may be extended at runtime. For more information, see the codecs + module. + + Parameters + ---------- + a : array_like of str or unicode + + encoding : str, optional + The name of an encoding + + errors : str, optional + Specifies how to handle encoding errors + + Returns + ------- + out : ndarray + + See Also + -------- + str.encode + + Notes + ----- + The type of the result will depend on the encoding specified. + + """ + return _to_string_or_unicode_array( + _vec_string(a, object_, 'encode', _clean_args(encoding, errors))) + + +def _endswith_dispatcher(a, suffix, start=None, end=None): + return (a,) + + +@array_function_dispatch(_endswith_dispatcher) +def endswith(a, suffix, start=0, end=None): + """ + Returns a boolean array which is `True` where the string element + in `a` ends with `suffix`, otherwise `False`. + + Calls `str.endswith` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + suffix : str + + start, end : int, optional + With optional `start`, test beginning at that position. With + optional `end`, stop comparing at that position. + + Returns + ------- + out : ndarray + Outputs an array of bools. + + See Also + -------- + str.endswith + + Examples + -------- + >>> s = np.array(['foo', 'bar']) + >>> s[0] = 'foo' + >>> s[1] = 'bar' + >>> s + array(['foo', 'bar'], dtype='>> np.char.endswith(s, 'ar') + array([False, True]) + >>> np.char.endswith(s, 'a', start=1, end=2) + array([False, True]) + + """ + return _vec_string( + a, bool_, 'endswith', [suffix, start] + _clean_args(end)) + + +def _expandtabs_dispatcher(a, tabsize=None): + return (a,) + + +@array_function_dispatch(_expandtabs_dispatcher) +def expandtabs(a, tabsize=8): + """ + Return a copy of each string element where all tab characters are + replaced by one or more spaces. + + Calls `str.expandtabs` element-wise. + + Return a copy of each string element where all tab characters are + replaced by one or more spaces, depending on the current column + and the given `tabsize`. The column number is reset to zero after + each newline occurring in the string. This doesn't understand other + non-printing characters or escape sequences. + + Parameters + ---------- + a : array_like of str or unicode + Input array + tabsize : int, optional + Replace tabs with `tabsize` number of spaces. If not given defaults + to 8 spaces. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.expandtabs + + """ + return _to_string_or_unicode_array( + _vec_string(a, object_, 'expandtabs', (tabsize,))) + + +@array_function_dispatch(_count_dispatcher) +def find(a, sub, start=0, end=None): + """ + For each element, return the lowest index in the string where + substring `sub` is found. + + Calls `str.find` element-wise. + + For each element, return the lowest index in the string where + substring `sub` is found, such that `sub` is contained in the + range [`start`, `end`]. + + Parameters + ---------- + a : array_like of str or unicode + + sub : str or unicode + + start, end : int, optional + Optional arguments `start` and `end` are interpreted as in + slice notation. + + Returns + ------- + out : ndarray or int + Output array of ints. Returns -1 if `sub` is not found. + + See Also + -------- + str.find + + """ + return _vec_string( + a, int_, 'find', [sub, start] + _clean_args(end)) + + +@array_function_dispatch(_count_dispatcher) +def index(a, sub, start=0, end=None): + """ + Like `find`, but raises `ValueError` when the substring is not found. + + Calls `str.index` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + sub : str or unicode + + start, end : int, optional + + Returns + ------- + out : ndarray + Output array of ints. Returns -1 if `sub` is not found. + + See Also + -------- + find, str.find + + """ + return _vec_string( + a, int_, 'index', [sub, start] + _clean_args(end)) + + +@array_function_dispatch(_unary_op_dispatcher) +def isalnum(a): + """ + Returns true for each element if all characters in the string are + alphanumeric and there is at least one character, false otherwise. + + Calls `str.isalnum` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.isalnum + """ + return _vec_string(a, bool_, 'isalnum') + + +@array_function_dispatch(_unary_op_dispatcher) +def isalpha(a): + """ + Returns true for each element if all characters in the string are + alphabetic and there is at least one character, false otherwise. + + Calls `str.isalpha` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of bools + + See Also + -------- + str.isalpha + """ + return _vec_string(a, bool_, 'isalpha') + + +@array_function_dispatch(_unary_op_dispatcher) +def isdigit(a): + """ + Returns true for each element if all characters in the string are + digits and there is at least one character, false otherwise. + + Calls `str.isdigit` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of bools + + See Also + -------- + str.isdigit + """ + return _vec_string(a, bool_, 'isdigit') + + +@array_function_dispatch(_unary_op_dispatcher) +def islower(a): + """ + Returns true for each element if all cased characters in the + string are lowercase and there is at least one cased character, + false otherwise. + + Calls `str.islower` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of bools + + See Also + -------- + str.islower + """ + return _vec_string(a, bool_, 'islower') + + +@array_function_dispatch(_unary_op_dispatcher) +def isspace(a): + """ + Returns true for each element if there are only whitespace + characters in the string and there is at least one character, + false otherwise. + + Calls `str.isspace` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of bools + + See Also + -------- + str.isspace + """ + return _vec_string(a, bool_, 'isspace') + + +@array_function_dispatch(_unary_op_dispatcher) +def istitle(a): + """ + Returns true for each element if the element is a titlecased + string and there is at least one character, false otherwise. + + Call `str.istitle` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of bools + + See Also + -------- + str.istitle + """ + return _vec_string(a, bool_, 'istitle') + + +@array_function_dispatch(_unary_op_dispatcher) +def isupper(a): + """ + Returns true for each element if all cased characters in the + string are uppercase and there is at least one character, false + otherwise. + + Call `str.isupper` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of bools + + See Also + -------- + str.isupper + """ + return _vec_string(a, bool_, 'isupper') + + +def _join_dispatcher(sep, seq): + return (sep, seq) + + +@array_function_dispatch(_join_dispatcher) +def join(sep, seq): + """ + Return a string which is the concatenation of the strings in the + sequence `seq`. + + Calls `str.join` element-wise. + + Parameters + ---------- + sep : array_like of str or unicode + seq : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input types + + See Also + -------- + str.join + """ + return _to_string_or_unicode_array( + _vec_string(sep, object_, 'join', (seq,))) + + + +def _just_dispatcher(a, width, fillchar=None): + return (a,) + + +@array_function_dispatch(_just_dispatcher) +def ljust(a, width, fillchar=' '): + """ + Return an array with the elements of `a` left-justified in a + string of length `width`. + + Calls `str.ljust` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + width : int + The length of the resulting strings + fillchar : str or unicode, optional + The character to use for padding + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.ljust + + """ + a_arr = numpy.asarray(a) + width_arr = numpy.asarray(width) + size = int(numpy.max(width_arr.flat)) + if numpy.issubdtype(a_arr.dtype, numpy.string_): + fillchar = asbytes(fillchar) + return _vec_string( + a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar)) + + +@array_function_dispatch(_unary_op_dispatcher) +def lower(a): + """ + Return an array with the elements converted to lowercase. + + Call `str.lower` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like, {str, unicode} + Input array. + + Returns + ------- + out : ndarray, {str, unicode} + Output array of str or unicode, depending on input type + + See Also + -------- + str.lower + + Examples + -------- + >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c + array(['A1B C', '1BCA', 'BCA1'], dtype='>> np.char.lower(c) + array(['a1b c', '1bca', 'bca1'], dtype='>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) + >>> c + array(['aAaAaA', ' aA ', 'abBABba'], dtype='>> np.char.lstrip(c, 'a') + array(['AaAaA', ' aA ', 'bBABba'], dtype='>> np.char.lstrip(c, 'A') # leaves c unchanged + array(['aAaAaA', ' aA ', 'abBABba'], dtype='>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all() + ... # XXX: is this a regression? This used to return True + ... # np.char.lstrip(c,'') does not modify c at all. + False + >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all() + True + + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,)) + + +def _partition_dispatcher(a, sep): + return (a,) + + +@array_function_dispatch(_partition_dispatcher) +def partition(a, sep): + """ + Partition each element in `a` around `sep`. + + Calls `str.partition` element-wise. + + For each element in `a`, split the element as the first + occurrence of `sep`, and return 3 strings containing the part + before the separator, the separator itself, and the part after + the separator. If the separator is not found, return 3 strings + containing the string itself, followed by two empty strings. + + Parameters + ---------- + a : array_like, {str, unicode} + Input array + sep : {str, unicode} + Separator to split each string element in `a`. + + Returns + ------- + out : ndarray, {str, unicode} + Output array of str or unicode, depending on input type. + The output array will have an extra dimension with 3 + elements per input element. + + See Also + -------- + str.partition + + """ + return _to_string_or_unicode_array( + _vec_string(a, object_, 'partition', (sep,))) + + +def _replace_dispatcher(a, old, new, count=None): + return (a,) + + +@array_function_dispatch(_replace_dispatcher) +def replace(a, old, new, count=None): + """ + For each element in `a`, return a copy of the string with all + occurrences of substring `old` replaced by `new`. + + Calls `str.replace` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + + old, new : str or unicode + + count : int, optional + If the optional argument `count` is given, only the first + `count` occurrences are replaced. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.replace + + """ + return _to_string_or_unicode_array( + _vec_string( + a, object_, 'replace', [old, new] + _clean_args(count))) + + +@array_function_dispatch(_count_dispatcher) +def rfind(a, sub, start=0, end=None): + """ + For each element in `a`, return the highest index in the string + where substring `sub` is found, such that `sub` is contained + within [`start`, `end`]. + + Calls `str.rfind` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + + sub : str or unicode + + start, end : int, optional + Optional arguments `start` and `end` are interpreted as in + slice notation. + + Returns + ------- + out : ndarray + Output array of ints. Return -1 on failure. + + See Also + -------- + str.rfind + + """ + return _vec_string( + a, int_, 'rfind', [sub, start] + _clean_args(end)) + + +@array_function_dispatch(_count_dispatcher) +def rindex(a, sub, start=0, end=None): + """ + Like `rfind`, but raises `ValueError` when the substring `sub` is + not found. + + Calls `str.rindex` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + + sub : str or unicode + + start, end : int, optional + + Returns + ------- + out : ndarray + Output array of ints. + + See Also + -------- + rfind, str.rindex + + """ + return _vec_string( + a, int_, 'rindex', [sub, start] + _clean_args(end)) + + +@array_function_dispatch(_just_dispatcher) +def rjust(a, width, fillchar=' '): + """ + Return an array with the elements of `a` right-justified in a + string of length `width`. + + Calls `str.rjust` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + width : int + The length of the resulting strings + fillchar : str or unicode, optional + The character to use for padding + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.rjust + + """ + a_arr = numpy.asarray(a) + width_arr = numpy.asarray(width) + size = int(numpy.max(width_arr.flat)) + if numpy.issubdtype(a_arr.dtype, numpy.string_): + fillchar = asbytes(fillchar) + return _vec_string( + a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar)) + + +@array_function_dispatch(_partition_dispatcher) +def rpartition(a, sep): + """ + Partition (split) each element around the right-most separator. + + Calls `str.rpartition` element-wise. + + For each element in `a`, split the element as the last + occurrence of `sep`, and return 3 strings containing the part + before the separator, the separator itself, and the part after + the separator. If the separator is not found, return 3 strings + containing the string itself, followed by two empty strings. + + Parameters + ---------- + a : array_like of str or unicode + Input array + sep : str or unicode + Right-most separator to split each element in array. + + Returns + ------- + out : ndarray + Output array of string or unicode, depending on input + type. The output array will have an extra dimension with + 3 elements per input element. + + See Also + -------- + str.rpartition + + """ + return _to_string_or_unicode_array( + _vec_string(a, object_, 'rpartition', (sep,))) + + +def _split_dispatcher(a, sep=None, maxsplit=None): + return (a,) + + +@array_function_dispatch(_split_dispatcher) +def rsplit(a, sep=None, maxsplit=None): + """ + For each element in `a`, return a list of the words in the + string, using `sep` as the delimiter string. + + Calls `str.rsplit` element-wise. + + Except for splitting from the right, `rsplit` + behaves like `split`. + + Parameters + ---------- + a : array_like of str or unicode + + sep : str or unicode, optional + If `sep` is not specified or None, any whitespace string + is a separator. + maxsplit : int, optional + If `maxsplit` is given, at most `maxsplit` splits are done, + the rightmost ones. + + Returns + ------- + out : ndarray + Array of list objects + + See Also + -------- + str.rsplit, split + + """ + # This will return an array of lists of different sizes, so we + # leave it as an object array + return _vec_string( + a, object_, 'rsplit', [sep] + _clean_args(maxsplit)) + + +def _strip_dispatcher(a, chars=None): + return (a,) + + +@array_function_dispatch(_strip_dispatcher) +def rstrip(a, chars=None): + """ + For each element in `a`, return a copy with the trailing + characters removed. + + Calls `str.rstrip` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + + chars : str or unicode, optional + The `chars` argument is a string specifying the set of + characters to be removed. If omitted or None, the `chars` + argument defaults to removing whitespace. The `chars` argument + is not a suffix; rather, all combinations of its values are + stripped. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.rstrip + + Examples + -------- + >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c + array(['aAaAaA', 'abBABba'], + dtype='|S7') + >>> np.char.rstrip(c, b'a') + array(['aAaAaA', 'abBABb'], + dtype='|S7') + >>> np.char.rstrip(c, b'A') + array(['aAaAa', 'abBABba'], + dtype='|S7') + + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,)) + + +@array_function_dispatch(_split_dispatcher) +def split(a, sep=None, maxsplit=None): + """ + For each element in `a`, return a list of the words in the + string, using `sep` as the delimiter string. + + Calls `str.split` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + sep : str or unicode, optional + If `sep` is not specified or None, any whitespace string is a + separator. + + maxsplit : int, optional + If `maxsplit` is given, at most `maxsplit` splits are done. + + Returns + ------- + out : ndarray + Array of list objects + + See Also + -------- + str.split, rsplit + + """ + # This will return an array of lists of different sizes, so we + # leave it as an object array + return _vec_string( + a, object_, 'split', [sep] + _clean_args(maxsplit)) + + +def _splitlines_dispatcher(a, keepends=None): + return (a,) + + +@array_function_dispatch(_splitlines_dispatcher) +def splitlines(a, keepends=None): + """ + For each element in `a`, return a list of the lines in the + element, breaking at line boundaries. + + Calls `str.splitlines` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + keepends : bool, optional + Line breaks are not included in the resulting list unless + keepends is given and true. + + Returns + ------- + out : ndarray + Array of list objects + + See Also + -------- + str.splitlines + + """ + return _vec_string( + a, object_, 'splitlines', _clean_args(keepends)) + + +def _startswith_dispatcher(a, prefix, start=None, end=None): + return (a,) + + +@array_function_dispatch(_startswith_dispatcher) +def startswith(a, prefix, start=0, end=None): + """ + Returns a boolean array which is `True` where the string element + in `a` starts with `prefix`, otherwise `False`. + + Calls `str.startswith` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + prefix : str + + start, end : int, optional + With optional `start`, test beginning at that position. With + optional `end`, stop comparing at that position. + + Returns + ------- + out : ndarray + Array of booleans + + See Also + -------- + str.startswith + + """ + return _vec_string( + a, bool_, 'startswith', [prefix, start] + _clean_args(end)) + + +@array_function_dispatch(_strip_dispatcher) +def strip(a, chars=None): + """ + For each element in `a`, return a copy with the leading and + trailing characters removed. + + Calls `str.strip` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + + chars : str or unicode, optional + The `chars` argument is a string specifying the set of + characters to be removed. If omitted or None, the `chars` + argument defaults to removing whitespace. The `chars` argument + is not a prefix or suffix; rather, all combinations of its + values are stripped. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.strip + + Examples + -------- + >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) + >>> c + array(['aAaAaA', ' aA ', 'abBABba'], dtype='>> np.char.strip(c) + array(['aAaAaA', 'aA', 'abBABba'], dtype='>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads + array(['AaAaA', ' aA ', 'bBABb'], dtype='>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails + array(['aAaAa', ' aA ', 'abBABba'], dtype='>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c + array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'], + dtype='|S5') + >>> np.char.swapcase(c) + array(['A1b C', '1B cA', 'B cA1', 'Ca1B'], + dtype='|S5') + + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'swapcase') + + +@array_function_dispatch(_unary_op_dispatcher) +def title(a): + """ + Return element-wise title cased version of string or unicode. + + Title case words start with uppercase characters, all remaining cased + characters are lowercase. + + Calls `str.title` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like, {str, unicode} + Input array. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.title + + Examples + -------- + >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c + array(['a1b c', '1b ca', 'b ca1', 'ca1b'], + dtype='|S5') + >>> np.char.title(c) + array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'], + dtype='|S5') + + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'title') + + +def _translate_dispatcher(a, table, deletechars=None): + return (a,) + + +@array_function_dispatch(_translate_dispatcher) +def translate(a, table, deletechars=None): + """ + For each element in `a`, return a copy of the string where all + characters occurring in the optional argument `deletechars` are + removed, and the remaining characters have been mapped through the + given translation table. + + Calls `str.translate` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + + table : str of length 256 + + deletechars : str + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.translate + + """ + a_arr = numpy.asarray(a) + if issubclass(a_arr.dtype.type, unicode_): + return _vec_string( + a_arr, a_arr.dtype, 'translate', (table,)) + else: + return _vec_string( + a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars)) + + +@array_function_dispatch(_unary_op_dispatcher) +def upper(a): + """ + Return an array with the elements converted to uppercase. + + Calls `str.upper` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like, {str, unicode} + Input array. + + Returns + ------- + out : ndarray, {str, unicode} + Output array of str or unicode, depending on input type + + See Also + -------- + str.upper + + Examples + -------- + >>> c = np.array(['a1b c', '1bca', 'bca1']); c + array(['a1b c', '1bca', 'bca1'], dtype='>> np.char.upper(c) + array(['A1B C', '1BCA', 'BCA1'], dtype='= 2`` and ``order='F'``, in which case `strides` + is in "Fortran order". + + Methods + ------- + astype + argsort + copy + count + decode + dump + dumps + encode + endswith + expandtabs + fill + find + flatten + getfield + index + isalnum + isalpha + isdecimal + isdigit + islower + isnumeric + isspace + istitle + isupper + item + join + ljust + lower + lstrip + nonzero + put + ravel + repeat + replace + reshape + resize + rfind + rindex + rjust + rsplit + rstrip + searchsorted + setfield + setflags + sort + split + splitlines + squeeze + startswith + strip + swapaxes + swapcase + take + title + tofile + tolist + tostring + translate + transpose + upper + view + zfill + + Parameters + ---------- + shape : tuple + Shape of the array. + itemsize : int, optional + Length of each array element, in number of characters. Default is 1. + unicode : bool, optional + Are the array elements of type unicode (True) or string (False). + Default is False. + buffer : object exposing the buffer interface or str, optional + Memory address of the start of the array data. Default is None, + in which case a new array is created. + offset : int, optional + Fixed stride displacement from the beginning of an axis? + Default is 0. Needs to be >=0. + strides : array_like of ints, optional + Strides for the array (see `ndarray.strides` for full description). + Default is None. + order : {'C', 'F'}, optional + The order in which the array data is stored in memory: 'C' -> + "row major" order (the default), 'F' -> "column major" + (Fortran) order. + + Examples + -------- + >>> charar = np.chararray((3, 3)) + >>> charar[:] = 'a' + >>> charar + chararray([[b'a', b'a', b'a'], + [b'a', b'a', b'a'], + [b'a', b'a', b'a']], dtype='|S1') + + >>> charar = np.chararray(charar.shape, itemsize=5) + >>> charar[:] = 'abc' + >>> charar + chararray([[b'abc', b'abc', b'abc'], + [b'abc', b'abc', b'abc'], + [b'abc', b'abc', b'abc']], dtype='|S5') + + """ + def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None, + offset=0, strides=None, order='C'): + global _globalvar + + if unicode: + dtype = unicode_ + else: + dtype = string_ + + # force itemsize to be a Python int, since using NumPy integer + # types results in itemsize.itemsize being used as the size of + # strings in the new array. + itemsize = int(itemsize) + + if isinstance(buffer, str): + # unicode objects do not have the buffer interface + filler = buffer + buffer = None + else: + filler = None + + _globalvar = 1 + if buffer is None: + self = ndarray.__new__(subtype, shape, (dtype, itemsize), + order=order) + else: + self = ndarray.__new__(subtype, shape, (dtype, itemsize), + buffer=buffer, + offset=offset, strides=strides, + order=order) + if filler is not None: + self[...] = filler + _globalvar = 0 + return self + + def __array_finalize__(self, obj): + # The b is a special case because it is used for reconstructing. + if not _globalvar and self.dtype.char not in 'SUbc': + raise ValueError("Can only create a chararray from string data.") + + def __getitem__(self, obj): + val = ndarray.__getitem__(self, obj) + + if isinstance(val, character): + temp = val.rstrip() + if len(temp) == 0: + val = '' + else: + val = temp + + return val + + # IMPLEMENTATION NOTE: Most of the methods of this class are + # direct delegations to the free functions in this module. + # However, those that return an array of strings should instead + # return a chararray, so some extra wrapping is required. + + def __eq__(self, other): + """ + Return (self == other) element-wise. + + See Also + -------- + equal + """ + return equal(self, other) + + def __ne__(self, other): + """ + Return (self != other) element-wise. + + See Also + -------- + not_equal + """ + return not_equal(self, other) + + def __ge__(self, other): + """ + Return (self >= other) element-wise. + + See Also + -------- + greater_equal + """ + return greater_equal(self, other) + + def __le__(self, other): + """ + Return (self <= other) element-wise. + + See Also + -------- + less_equal + """ + return less_equal(self, other) + + def __gt__(self, other): + """ + Return (self > other) element-wise. + + See Also + -------- + greater + """ + return greater(self, other) + + def __lt__(self, other): + """ + Return (self < other) element-wise. + + See Also + -------- + less + """ + return less(self, other) + + def __add__(self, other): + """ + Return (self + other), that is string concatenation, + element-wise for a pair of array_likes of str or unicode. + + See Also + -------- + add + """ + return asarray(add(self, other)) + + def __radd__(self, other): + """ + Return (other + self), that is string concatenation, + element-wise for a pair of array_likes of `string_` or `unicode_`. + + See Also + -------- + add + """ + return asarray(add(numpy.asarray(other), self)) + + def __mul__(self, i): + """ + Return (self * i), that is string multiple concatenation, + element-wise. + + See Also + -------- + multiply + """ + return asarray(multiply(self, i)) + + def __rmul__(self, i): + """ + Return (self * i), that is string multiple concatenation, + element-wise. + + See Also + -------- + multiply + """ + return asarray(multiply(self, i)) + + def __mod__(self, i): + """ + Return (self % i), that is pre-Python 2.6 string formatting + (interpolation), element-wise for a pair of array_likes of `string_` + or `unicode_`. + + See Also + -------- + mod + """ + return asarray(mod(self, i)) + + def __rmod__(self, other): + return NotImplemented + + def argsort(self, axis=-1, kind=None, order=None): + """ + Return the indices that sort the array lexicographically. + + For full documentation see `numpy.argsort`, for which this method is + in fact merely a "thin wrapper." + + Examples + -------- + >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5') + >>> c = c.view(np.chararray); c + chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'], + dtype='|S5') + >>> c[c.argsort()] + chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'], + dtype='|S5') + + """ + return self.__array__().argsort(axis, kind, order) + argsort.__doc__ = ndarray.argsort.__doc__ + + def capitalize(self): + """ + Return a copy of `self` with only the first character of each element + capitalized. + + See Also + -------- + char.capitalize + + """ + return asarray(capitalize(self)) + + def center(self, width, fillchar=' '): + """ + Return a copy of `self` with its elements centered in a + string of length `width`. + + See Also + -------- + center + """ + return asarray(center(self, width, fillchar)) + + def count(self, sub, start=0, end=None): + """ + Returns an array with the number of non-overlapping occurrences of + substring `sub` in the range [`start`, `end`]. + + See Also + -------- + char.count + + """ + return count(self, sub, start, end) + + def decode(self, encoding=None, errors=None): + """ + Calls `str.decode` element-wise. + + See Also + -------- + char.decode + + """ + return decode(self, encoding, errors) + + def encode(self, encoding=None, errors=None): + """ + Calls `str.encode` element-wise. + + See Also + -------- + char.encode + + """ + return encode(self, encoding, errors) + + def endswith(self, suffix, start=0, end=None): + """ + Returns a boolean array which is `True` where the string element + in `self` ends with `suffix`, otherwise `False`. + + See Also + -------- + char.endswith + + """ + return endswith(self, suffix, start, end) + + def expandtabs(self, tabsize=8): + """ + Return a copy of each string element where all tab characters are + replaced by one or more spaces. + + See Also + -------- + char.expandtabs + + """ + return asarray(expandtabs(self, tabsize)) + + def find(self, sub, start=0, end=None): + """ + For each element, return the lowest index in the string where + substring `sub` is found. + + See Also + -------- + char.find + + """ + return find(self, sub, start, end) + + def index(self, sub, start=0, end=None): + """ + Like `find`, but raises `ValueError` when the substring is not found. + + See Also + -------- + char.index + + """ + return index(self, sub, start, end) + + def isalnum(self): + """ + Returns true for each element if all characters in the string + are alphanumeric and there is at least one character, false + otherwise. + + See Also + -------- + char.isalnum + + """ + return isalnum(self) + + def isalpha(self): + """ + Returns true for each element if all characters in the string + are alphabetic and there is at least one character, false + otherwise. + + See Also + -------- + char.isalpha + + """ + return isalpha(self) + + def isdigit(self): + """ + Returns true for each element if all characters in the string are + digits and there is at least one character, false otherwise. + + See Also + -------- + char.isdigit + + """ + return isdigit(self) + + def islower(self): + """ + Returns true for each element if all cased characters in the + string are lowercase and there is at least one cased character, + false otherwise. + + See Also + -------- + char.islower + + """ + return islower(self) + + def isspace(self): + """ + Returns true for each element if there are only whitespace + characters in the string and there is at least one character, + false otherwise. + + See Also + -------- + char.isspace + + """ + return isspace(self) + + def istitle(self): + """ + Returns true for each element if the element is a titlecased + string and there is at least one character, false otherwise. + + See Also + -------- + char.istitle + + """ + return istitle(self) + + def isupper(self): + """ + Returns true for each element if all cased characters in the + string are uppercase and there is at least one character, false + otherwise. + + See Also + -------- + char.isupper + + """ + return isupper(self) + + def join(self, seq): + """ + Return a string which is the concatenation of the strings in the + sequence `seq`. + + See Also + -------- + char.join + + """ + return join(self, seq) + + def ljust(self, width, fillchar=' '): + """ + Return an array with the elements of `self` left-justified in a + string of length `width`. + + See Also + -------- + char.ljust + + """ + return asarray(ljust(self, width, fillchar)) + + def lower(self): + """ + Return an array with the elements of `self` converted to + lowercase. + + See Also + -------- + char.lower + + """ + return asarray(lower(self)) + + def lstrip(self, chars=None): + """ + For each element in `self`, return a copy with the leading characters + removed. + + See Also + -------- + char.lstrip + + """ + return asarray(lstrip(self, chars)) + + def partition(self, sep): + """ + Partition each element in `self` around `sep`. + + See Also + -------- + partition + """ + return asarray(partition(self, sep)) + + def replace(self, old, new, count=None): + """ + For each element in `self`, return a copy of the string with all + occurrences of substring `old` replaced by `new`. + + See Also + -------- + char.replace + + """ + return asarray(replace(self, old, new, count)) + + def rfind(self, sub, start=0, end=None): + """ + For each element in `self`, return the highest index in the string + where substring `sub` is found, such that `sub` is contained + within [`start`, `end`]. + + See Also + -------- + char.rfind + + """ + return rfind(self, sub, start, end) + + def rindex(self, sub, start=0, end=None): + """ + Like `rfind`, but raises `ValueError` when the substring `sub` is + not found. + + See Also + -------- + char.rindex + + """ + return rindex(self, sub, start, end) + + def rjust(self, width, fillchar=' '): + """ + Return an array with the elements of `self` + right-justified in a string of length `width`. + + See Also + -------- + char.rjust + + """ + return asarray(rjust(self, width, fillchar)) + + def rpartition(self, sep): + """ + Partition each element in `self` around `sep`. + + See Also + -------- + rpartition + """ + return asarray(rpartition(self, sep)) + + def rsplit(self, sep=None, maxsplit=None): + """ + For each element in `self`, return a list of the words in + the string, using `sep` as the delimiter string. + + See Also + -------- + char.rsplit + + """ + return rsplit(self, sep, maxsplit) + + def rstrip(self, chars=None): + """ + For each element in `self`, return a copy with the trailing + characters removed. + + See Also + -------- + char.rstrip + + """ + return asarray(rstrip(self, chars)) + + def split(self, sep=None, maxsplit=None): + """ + For each element in `self`, return a list of the words in the + string, using `sep` as the delimiter string. + + See Also + -------- + char.split + + """ + return split(self, sep, maxsplit) + + def splitlines(self, keepends=None): + """ + For each element in `self`, return a list of the lines in the + element, breaking at line boundaries. + + See Also + -------- + char.splitlines + + """ + return splitlines(self, keepends) + + def startswith(self, prefix, start=0, end=None): + """ + Returns a boolean array which is `True` where the string element + in `self` starts with `prefix`, otherwise `False`. + + See Also + -------- + char.startswith + + """ + return startswith(self, prefix, start, end) + + def strip(self, chars=None): + """ + For each element in `self`, return a copy with the leading and + trailing characters removed. + + See Also + -------- + char.strip + + """ + return asarray(strip(self, chars)) + + def swapcase(self): + """ + For each element in `self`, return a copy of the string with + uppercase characters converted to lowercase and vice versa. + + See Also + -------- + char.swapcase + + """ + return asarray(swapcase(self)) + + def title(self): + """ + For each element in `self`, return a titlecased version of the + string: words start with uppercase characters, all remaining cased + characters are lowercase. + + See Also + -------- + char.title + + """ + return asarray(title(self)) + + def translate(self, table, deletechars=None): + """ + For each element in `self`, return a copy of the string where + all characters occurring in the optional argument + `deletechars` are removed, and the remaining characters have + been mapped through the given translation table. + + See Also + -------- + char.translate + + """ + return asarray(translate(self, table, deletechars)) + + def upper(self): + """ + Return an array with the elements of `self` converted to + uppercase. + + See Also + -------- + char.upper + + """ + return asarray(upper(self)) + + def zfill(self, width): + """ + Return the numeric string left-filled with zeros in a string of + length `width`. + + See Also + -------- + char.zfill + + """ + return asarray(zfill(self, width)) + + def isnumeric(self): + """ + For each element in `self`, return True if there are only + numeric characters in the element. + + See Also + -------- + char.isnumeric + + """ + return isnumeric(self) + + def isdecimal(self): + """ + For each element in `self`, return True if there are only + decimal characters in the element. + + See Also + -------- + char.isdecimal + + """ + return isdecimal(self) + + +@set_module("numpy.char") +def array(obj, itemsize=None, copy=True, unicode=None, order=None): + """ + Create a `chararray`. + + .. note:: + This class is provided for numarray backward-compatibility. + New code (not concerned with numarray compatibility) should use + arrays of type `string_` or `unicode_` and use the free functions + in :mod:`numpy.char ` for fast + vectorized string operations instead. + + Versus a regular NumPy array of type `str` or `unicode`, this + class adds the following functionality: + + 1) values automatically have whitespace removed from the end + when indexed + + 2) comparison operators automatically remove whitespace from the + end when comparing values + + 3) vectorized string operations are provided as methods + (e.g. `str.endswith`) and infix operators (e.g. ``+, *, %``) + + Parameters + ---------- + obj : array of str or unicode-like + + itemsize : int, optional + `itemsize` is the number of characters per scalar in the + resulting array. If `itemsize` is None, and `obj` is an + object array or a Python list, the `itemsize` will be + automatically determined. If `itemsize` is provided and `obj` + is of type str or unicode, then the `obj` string will be + chunked into `itemsize` pieces. + + copy : bool, optional + If true (default), then the object is copied. Otherwise, a copy + will only be made if __array__ returns a copy, if obj is a + nested sequence, or if a copy is needed to satisfy any of the other + requirements (`itemsize`, unicode, `order`, etc.). + + unicode : bool, optional + When true, the resulting `chararray` can contain Unicode + characters, when false only 8-bit characters. If unicode is + None and `obj` is one of the following: + + - a `chararray`, + - an ndarray of type `str` or `unicode` + - a Python str or unicode object, + + then the unicode setting of the output array will be + automatically determined. + + order : {'C', 'F', 'A'}, optional + Specify the order of the array. If order is 'C' (default), then the + array will be in C-contiguous order (last-index varies the + fastest). If order is 'F', then the returned array + will be in Fortran-contiguous order (first-index varies the + fastest). If order is 'A', then the returned array may + be in any order (either C-, Fortran-contiguous, or even + discontiguous). + """ + if isinstance(obj, (bytes, str)): + if unicode is None: + if isinstance(obj, str): + unicode = True + else: + unicode = False + + if itemsize is None: + itemsize = len(obj) + shape = len(obj) // itemsize + + return chararray(shape, itemsize=itemsize, unicode=unicode, + buffer=obj, order=order) + + if isinstance(obj, (list, tuple)): + obj = numpy.asarray(obj) + + if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character): + # If we just have a vanilla chararray, create a chararray + # view around it. + if not isinstance(obj, chararray): + obj = obj.view(chararray) + + if itemsize is None: + itemsize = obj.itemsize + # itemsize is in 8-bit chars, so for Unicode, we need + # to divide by the size of a single Unicode character, + # which for NumPy is always 4 + if issubclass(obj.dtype.type, unicode_): + itemsize //= 4 + + if unicode is None: + if issubclass(obj.dtype.type, unicode_): + unicode = True + else: + unicode = False + + if unicode: + dtype = unicode_ + else: + dtype = string_ + + if order is not None: + obj = numpy.asarray(obj, order=order) + if (copy or + (itemsize != obj.itemsize) or + (not unicode and isinstance(obj, unicode_)) or + (unicode and isinstance(obj, string_))): + obj = obj.astype((dtype, int(itemsize))) + return obj + + if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object): + if itemsize is None: + # Since no itemsize was specified, convert the input array to + # a list so the ndarray constructor will automatically + # determine the itemsize for us. + obj = obj.tolist() + # Fall through to the default case + + if unicode: + dtype = unicode_ + else: + dtype = string_ + + if itemsize is None: + val = narray(obj, dtype=dtype, order=order, subok=True) + else: + val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True) + return val.view(chararray) + + +@set_module("numpy.char") +def asarray(obj, itemsize=None, unicode=None, order=None): + """ + Convert the input to a `chararray`, copying the data only if + necessary. + + Versus a regular NumPy array of type `str` or `unicode`, this + class adds the following functionality: + + 1) values automatically have whitespace removed from the end + when indexed + + 2) comparison operators automatically remove whitespace from the + end when comparing values + + 3) vectorized string operations are provided as methods + (e.g. `str.endswith`) and infix operators (e.g. ``+``, ``*``,``%``) + + Parameters + ---------- + obj : array of str or unicode-like + + itemsize : int, optional + `itemsize` is the number of characters per scalar in the + resulting array. If `itemsize` is None, and `obj` is an + object array or a Python list, the `itemsize` will be + automatically determined. If `itemsize` is provided and `obj` + is of type str or unicode, then the `obj` string will be + chunked into `itemsize` pieces. + + unicode : bool, optional + When true, the resulting `chararray` can contain Unicode + characters, when false only 8-bit characters. If unicode is + None and `obj` is one of the following: + + - a `chararray`, + - an ndarray of type `str` or 'unicode` + - a Python str or unicode object, + + then the unicode setting of the output array will be + automatically determined. + + order : {'C', 'F'}, optional + Specify the order of the array. If order is 'C' (default), then the + array will be in C-contiguous order (last-index varies the + fastest). If order is 'F', then the returned array + will be in Fortran-contiguous order (first-index varies the + fastest). + """ + return array(obj, itemsize, copy=False, + unicode=unicode, order=order) diff --git a/.local/lib/python3.8/site-packages/numpy/core/defchararray.pyi b/.local/lib/python3.8/site-packages/numpy/core/defchararray.pyi new file mode 100644 index 0000000..28d247b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/defchararray.pyi @@ -0,0 +1,422 @@ +from typing import ( + Literal as L, + overload, + TypeVar, + Any, + List, +) + +from numpy import ( + chararray as chararray, + dtype, + str_, + bytes_, + int_, + bool_, + object_, + _OrderKACF, +) + +from numpy.typing import ( + NDArray, + _ArrayLikeStr_co as U_co, + _ArrayLikeBytes_co as S_co, + _ArrayLikeInt_co as i_co, + _ArrayLikeBool_co as b_co, +) + +from numpy.core.multiarray import compare_chararrays as compare_chararrays + +_SCT = TypeVar("_SCT", str_, bytes_) +_CharArray = chararray[Any, dtype[_SCT]] + +__all__: List[str] + +# Comparison +@overload +def equal(x1: U_co, x2: U_co) -> NDArray[bool_]: ... +@overload +def equal(x1: S_co, x2: S_co) -> NDArray[bool_]: ... + +@overload +def not_equal(x1: U_co, x2: U_co) -> NDArray[bool_]: ... +@overload +def not_equal(x1: S_co, x2: S_co) -> NDArray[bool_]: ... + +@overload +def greater_equal(x1: U_co, x2: U_co) -> NDArray[bool_]: ... +@overload +def greater_equal(x1: S_co, x2: S_co) -> NDArray[bool_]: ... + +@overload +def less_equal(x1: U_co, x2: U_co) -> NDArray[bool_]: ... +@overload +def less_equal(x1: S_co, x2: S_co) -> NDArray[bool_]: ... + +@overload +def greater(x1: U_co, x2: U_co) -> NDArray[bool_]: ... +@overload +def greater(x1: S_co, x2: S_co) -> NDArray[bool_]: ... + +@overload +def less(x1: U_co, x2: U_co) -> NDArray[bool_]: ... +@overload +def less(x1: S_co, x2: S_co) -> NDArray[bool_]: ... + +# String operations +@overload +def add(x1: U_co, x2: U_co) -> NDArray[str_]: ... +@overload +def add(x1: S_co, x2: S_co) -> NDArray[bytes_]: ... + +@overload +def multiply(a: U_co, i: i_co) -> NDArray[str_]: ... +@overload +def multiply(a: S_co, i: i_co) -> NDArray[bytes_]: ... + +@overload +def mod(a: U_co, value: Any) -> NDArray[str_]: ... +@overload +def mod(a: S_co, value: Any) -> NDArray[bytes_]: ... + +@overload +def capitalize(a: U_co) -> NDArray[str_]: ... +@overload +def capitalize(a: S_co) -> NDArray[bytes_]: ... + +@overload +def center(a: U_co, width: i_co, fillchar: U_co = ...) -> NDArray[str_]: ... +@overload +def center(a: S_co, width: i_co, fillchar: S_co = ...) -> NDArray[bytes_]: ... + +def decode( + a: S_co, + encoding: None | str = ..., + errors: None | str = ..., +) -> NDArray[str_]: ... + +def encode( + a: U_co, + encoding: None | str = ..., + errors: None | str = ..., +) -> NDArray[bytes_]: ... + +@overload +def expandtabs(a: U_co, tabsize: i_co = ...) -> NDArray[str_]: ... +@overload +def expandtabs(a: S_co, tabsize: i_co = ...) -> NDArray[bytes_]: ... + +@overload +def join(sep: U_co, seq: U_co) -> NDArray[str_]: ... +@overload +def join(sep: S_co, seq: S_co) -> NDArray[bytes_]: ... + +@overload +def ljust(a: U_co, width: i_co, fillchar: U_co = ...) -> NDArray[str_]: ... +@overload +def ljust(a: S_co, width: i_co, fillchar: S_co = ...) -> NDArray[bytes_]: ... + +@overload +def lower(a: U_co) -> NDArray[str_]: ... +@overload +def lower(a: S_co) -> NDArray[bytes_]: ... + +@overload +def lstrip(a: U_co, chars: None | U_co = ...) -> NDArray[str_]: ... +@overload +def lstrip(a: S_co, chars: None | S_co = ...) -> NDArray[bytes_]: ... + +@overload +def partition(a: U_co, sep: U_co) -> NDArray[str_]: ... +@overload +def partition(a: S_co, sep: S_co) -> NDArray[bytes_]: ... + +@overload +def replace( + a: U_co, + old: U_co, + new: U_co, + count: None | i_co = ..., +) -> NDArray[str_]: ... +@overload +def replace( + a: S_co, + old: S_co, + new: S_co, + count: None | i_co = ..., +) -> NDArray[bytes_]: ... + +@overload +def rjust( + a: U_co, + width: i_co, + fillchar: U_co = ..., +) -> NDArray[str_]: ... +@overload +def rjust( + a: S_co, + width: i_co, + fillchar: S_co = ..., +) -> NDArray[bytes_]: ... + +@overload +def rpartition(a: U_co, sep: U_co) -> NDArray[str_]: ... +@overload +def rpartition(a: S_co, sep: S_co) -> NDArray[bytes_]: ... + +@overload +def rsplit( + a: U_co, + sep: None | U_co = ..., + maxsplit: None | i_co = ..., +) -> NDArray[object_]: ... +@overload +def rsplit( + a: S_co, + sep: None | S_co = ..., + maxsplit: None | i_co = ..., +) -> NDArray[object_]: ... + +@overload +def rstrip(a: U_co, chars: None | U_co = ...) -> NDArray[str_]: ... +@overload +def rstrip(a: S_co, chars: None | S_co = ...) -> NDArray[bytes_]: ... + +@overload +def split( + a: U_co, + sep: None | U_co = ..., + maxsplit: None | i_co = ..., +) -> NDArray[object_]: ... +@overload +def split( + a: S_co, + sep: None | S_co = ..., + maxsplit: None | i_co = ..., +) -> NDArray[object_]: ... + +@overload +def splitlines(a: U_co, keepends: None | b_co = ...) -> NDArray[object_]: ... +@overload +def splitlines(a: S_co, keepends: None | b_co = ...) -> NDArray[object_]: ... + +@overload +def strip(a: U_co, chars: None | U_co = ...) -> NDArray[str_]: ... +@overload +def strip(a: S_co, chars: None | S_co = ...) -> NDArray[bytes_]: ... + +@overload +def swapcase(a: U_co) -> NDArray[str_]: ... +@overload +def swapcase(a: S_co) -> NDArray[bytes_]: ... + +@overload +def title(a: U_co) -> NDArray[str_]: ... +@overload +def title(a: S_co) -> NDArray[bytes_]: ... + +@overload +def translate( + a: U_co, + table: U_co, + deletechars: None | U_co = ..., +) -> NDArray[str_]: ... +@overload +def translate( + a: S_co, + table: S_co, + deletechars: None | S_co = ..., +) -> NDArray[bytes_]: ... + +@overload +def upper(a: U_co) -> NDArray[str_]: ... +@overload +def upper(a: S_co) -> NDArray[bytes_]: ... + +@overload +def zfill(a: U_co, width: i_co) -> NDArray[str_]: ... +@overload +def zfill(a: S_co, width: i_co) -> NDArray[bytes_]: ... + +# String information +@overload +def count( + a: U_co, + sub: U_co, + start: i_co = ..., + end: None | i_co = ..., +) -> NDArray[int_]: ... +@overload +def count( + a: S_co, + sub: S_co, + start: i_co = ..., + end: None | i_co = ..., +) -> NDArray[int_]: ... + +@overload +def endswith( + a: U_co, + suffix: U_co, + start: i_co = ..., + end: None | i_co = ..., +) -> NDArray[bool_]: ... +@overload +def endswith( + a: S_co, + suffix: S_co, + start: i_co = ..., + end: None | i_co = ..., +) -> NDArray[bool_]: ... + +@overload +def find( + a: U_co, + sub: U_co, + start: i_co = ..., + end: None | i_co = ..., +) -> NDArray[int_]: ... +@overload +def find( + a: S_co, + sub: S_co, + start: i_co = ..., + end: None | i_co = ..., +) -> NDArray[int_]: ... + +@overload +def index( + a: U_co, + sub: U_co, + start: i_co = ..., + end: None | i_co = ..., +) -> NDArray[int_]: ... +@overload +def index( + a: S_co, + sub: S_co, + start: i_co = ..., + end: None | i_co = ..., +) -> NDArray[int_]: ... + +def isalpha(a: U_co | S_co) -> NDArray[bool_]: ... +def isalnum(a: U_co | S_co) -> NDArray[bool_]: ... +def isdecimal(a: U_co | S_co) -> NDArray[bool_]: ... +def isdigit(a: U_co | S_co) -> NDArray[bool_]: ... +def islower(a: U_co | S_co) -> NDArray[bool_]: ... +def isnumeric(a: U_co | S_co) -> NDArray[bool_]: ... +def isspace(a: U_co | S_co) -> NDArray[bool_]: ... +def istitle(a: U_co | S_co) -> NDArray[bool_]: ... +def isupper(a: U_co | S_co) -> NDArray[bool_]: ... + +@overload +def rfind( + a: U_co, + sub: U_co, + start: i_co = ..., + end: None | i_co = ..., +) -> NDArray[int_]: ... +@overload +def rfind( + a: S_co, + sub: S_co, + start: i_co = ..., + end: None | i_co = ..., +) -> NDArray[int_]: ... + +@overload +def rindex( + a: U_co, + sub: U_co, + start: i_co = ..., + end: None | i_co = ..., +) -> NDArray[int_]: ... +@overload +def rindex( + a: S_co, + sub: S_co, + start: i_co = ..., + end: None | i_co = ..., +) -> NDArray[int_]: ... + +@overload +def startswith( + a: U_co, + prefix: U_co, + start: i_co = ..., + end: None | i_co = ..., +) -> NDArray[bool_]: ... +@overload +def startswith( + a: S_co, + prefix: S_co, + start: i_co = ..., + end: None | i_co = ..., +) -> NDArray[bool_]: ... + +def str_len(A: U_co | S_co) -> NDArray[int_]: ... + +# Overload 1 and 2: str- or bytes-based array-likes +# overload 3: arbitrary object with unicode=False (-> bytes_) +# overload 4: arbitrary object with unicode=True (-> str_) +@overload +def array( + obj: U_co, + itemsize: None | int = ..., + copy: bool = ..., + unicode: L[False] = ..., + order: _OrderKACF = ..., +) -> _CharArray[str_]: ... +@overload +def array( + obj: S_co, + itemsize: None | int = ..., + copy: bool = ..., + unicode: L[False] = ..., + order: _OrderKACF = ..., +) -> _CharArray[bytes_]: ... +@overload +def array( + obj: object, + itemsize: None | int = ..., + copy: bool = ..., + unicode: L[False] = ..., + order: _OrderKACF = ..., +) -> _CharArray[bytes_]: ... +@overload +def array( + obj: object, + itemsize: None | int = ..., + copy: bool = ..., + unicode: L[True] = ..., + order: _OrderKACF = ..., +) -> _CharArray[str_]: ... + +@overload +def asarray( + obj: U_co, + itemsize: None | int = ..., + unicode: L[False] = ..., + order: _OrderKACF = ..., +) -> _CharArray[str_]: ... +@overload +def asarray( + obj: S_co, + itemsize: None | int = ..., + unicode: L[False] = ..., + order: _OrderKACF = ..., +) -> _CharArray[bytes_]: ... +@overload +def asarray( + obj: object, + itemsize: None | int = ..., + unicode: L[False] = ..., + order: _OrderKACF = ..., +) -> _CharArray[bytes_]: ... +@overload +def asarray( + obj: object, + itemsize: None | int = ..., + unicode: L[True] = ..., + order: _OrderKACF = ..., +) -> _CharArray[str_]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/core/einsumfunc.py b/.local/lib/python3.8/site-packages/numpy/core/einsumfunc.py new file mode 100644 index 0000000..c78d3db --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/einsumfunc.py @@ -0,0 +1,1431 @@ +""" +Implementation of optimized einsum. + +""" +import itertools +import operator + +from numpy.core.multiarray import c_einsum +from numpy.core.numeric import asanyarray, tensordot +from numpy.core.overrides import array_function_dispatch + +__all__ = ['einsum', 'einsum_path'] + +einsum_symbols = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' +einsum_symbols_set = set(einsum_symbols) + + +def _flop_count(idx_contraction, inner, num_terms, size_dictionary): + """ + Computes the number of FLOPS in the contraction. + + Parameters + ---------- + idx_contraction : iterable + The indices involved in the contraction + inner : bool + Does this contraction require an inner product? + num_terms : int + The number of terms in a contraction + size_dictionary : dict + The size of each of the indices in idx_contraction + + Returns + ------- + flop_count : int + The total number of FLOPS required for the contraction. + + Examples + -------- + + >>> _flop_count('abc', False, 1, {'a': 2, 'b':3, 'c':5}) + 30 + + >>> _flop_count('abc', True, 2, {'a': 2, 'b':3, 'c':5}) + 60 + + """ + + overall_size = _compute_size_by_dict(idx_contraction, size_dictionary) + op_factor = max(1, num_terms - 1) + if inner: + op_factor += 1 + + return overall_size * op_factor + +def _compute_size_by_dict(indices, idx_dict): + """ + Computes the product of the elements in indices based on the dictionary + idx_dict. + + Parameters + ---------- + indices : iterable + Indices to base the product on. + idx_dict : dictionary + Dictionary of index sizes + + Returns + ------- + ret : int + The resulting product. + + Examples + -------- + >>> _compute_size_by_dict('abbc', {'a': 2, 'b':3, 'c':5}) + 90 + + """ + ret = 1 + for i in indices: + ret *= idx_dict[i] + return ret + + +def _find_contraction(positions, input_sets, output_set): + """ + Finds the contraction for a given set of input and output sets. + + Parameters + ---------- + positions : iterable + Integer positions of terms used in the contraction. + input_sets : list + List of sets that represent the lhs side of the einsum subscript + output_set : set + Set that represents the rhs side of the overall einsum subscript + + Returns + ------- + new_result : set + The indices of the resulting contraction + remaining : list + List of sets that have not been contracted, the new set is appended to + the end of this list + idx_removed : set + Indices removed from the entire contraction + idx_contraction : set + The indices used in the current contraction + + Examples + -------- + + # A simple dot product test case + >>> pos = (0, 1) + >>> isets = [set('ab'), set('bc')] + >>> oset = set('ac') + >>> _find_contraction(pos, isets, oset) + ({'a', 'c'}, [{'a', 'c'}], {'b'}, {'a', 'b', 'c'}) + + # A more complex case with additional terms in the contraction + >>> pos = (0, 2) + >>> isets = [set('abd'), set('ac'), set('bdc')] + >>> oset = set('ac') + >>> _find_contraction(pos, isets, oset) + ({'a', 'c'}, [{'a', 'c'}, {'a', 'c'}], {'b', 'd'}, {'a', 'b', 'c', 'd'}) + """ + + idx_contract = set() + idx_remain = output_set.copy() + remaining = [] + for ind, value in enumerate(input_sets): + if ind in positions: + idx_contract |= value + else: + remaining.append(value) + idx_remain |= value + + new_result = idx_remain & idx_contract + idx_removed = (idx_contract - new_result) + remaining.append(new_result) + + return (new_result, remaining, idx_removed, idx_contract) + + +def _optimal_path(input_sets, output_set, idx_dict, memory_limit): + """ + Computes all possible pair contractions, sieves the results based + on ``memory_limit`` and returns the lowest cost path. This algorithm + scales factorial with respect to the elements in the list ``input_sets``. + + Parameters + ---------- + input_sets : list + List of sets that represent the lhs side of the einsum subscript + output_set : set + Set that represents the rhs side of the overall einsum subscript + idx_dict : dictionary + Dictionary of index sizes + memory_limit : int + The maximum number of elements in a temporary array + + Returns + ------- + path : list + The optimal contraction order within the memory limit constraint. + + Examples + -------- + >>> isets = [set('abd'), set('ac'), set('bdc')] + >>> oset = set() + >>> idx_sizes = {'a': 1, 'b':2, 'c':3, 'd':4} + >>> _optimal_path(isets, oset, idx_sizes, 5000) + [(0, 2), (0, 1)] + """ + + full_results = [(0, [], input_sets)] + for iteration in range(len(input_sets) - 1): + iter_results = [] + + # Compute all unique pairs + for curr in full_results: + cost, positions, remaining = curr + for con in itertools.combinations(range(len(input_sets) - iteration), 2): + + # Find the contraction + cont = _find_contraction(con, remaining, output_set) + new_result, new_input_sets, idx_removed, idx_contract = cont + + # Sieve the results based on memory_limit + new_size = _compute_size_by_dict(new_result, idx_dict) + if new_size > memory_limit: + continue + + # Build (total_cost, positions, indices_remaining) + total_cost = cost + _flop_count(idx_contract, idx_removed, len(con), idx_dict) + new_pos = positions + [con] + iter_results.append((total_cost, new_pos, new_input_sets)) + + # Update combinatorial list, if we did not find anything return best + # path + remaining contractions + if iter_results: + full_results = iter_results + else: + path = min(full_results, key=lambda x: x[0])[1] + path += [tuple(range(len(input_sets) - iteration))] + return path + + # If we have not found anything return single einsum contraction + if len(full_results) == 0: + return [tuple(range(len(input_sets)))] + + path = min(full_results, key=lambda x: x[0])[1] + return path + +def _parse_possible_contraction(positions, input_sets, output_set, idx_dict, memory_limit, path_cost, naive_cost): + """Compute the cost (removed size + flops) and resultant indices for + performing the contraction specified by ``positions``. + + Parameters + ---------- + positions : tuple of int + The locations of the proposed tensors to contract. + input_sets : list of sets + The indices found on each tensors. + output_set : set + The output indices of the expression. + idx_dict : dict + Mapping of each index to its size. + memory_limit : int + The total allowed size for an intermediary tensor. + path_cost : int + The contraction cost so far. + naive_cost : int + The cost of the unoptimized expression. + + Returns + ------- + cost : (int, int) + A tuple containing the size of any indices removed, and the flop cost. + positions : tuple of int + The locations of the proposed tensors to contract. + new_input_sets : list of sets + The resulting new list of indices if this proposed contraction is performed. + + """ + + # Find the contraction + contract = _find_contraction(positions, input_sets, output_set) + idx_result, new_input_sets, idx_removed, idx_contract = contract + + # Sieve the results based on memory_limit + new_size = _compute_size_by_dict(idx_result, idx_dict) + if new_size > memory_limit: + return None + + # Build sort tuple + old_sizes = (_compute_size_by_dict(input_sets[p], idx_dict) for p in positions) + removed_size = sum(old_sizes) - new_size + + # NB: removed_size used to be just the size of any removed indices i.e.: + # helpers.compute_size_by_dict(idx_removed, idx_dict) + cost = _flop_count(idx_contract, idx_removed, len(positions), idx_dict) + sort = (-removed_size, cost) + + # Sieve based on total cost as well + if (path_cost + cost) > naive_cost: + return None + + # Add contraction to possible choices + return [sort, positions, new_input_sets] + + +def _update_other_results(results, best): + """Update the positions and provisional input_sets of ``results`` based on + performing the contraction result ``best``. Remove any involving the tensors + contracted. + + Parameters + ---------- + results : list + List of contraction results produced by ``_parse_possible_contraction``. + best : list + The best contraction of ``results`` i.e. the one that will be performed. + + Returns + ------- + mod_results : list + The list of modified results, updated with outcome of ``best`` contraction. + """ + + best_con = best[1] + bx, by = best_con + mod_results = [] + + for cost, (x, y), con_sets in results: + + # Ignore results involving tensors just contracted + if x in best_con or y in best_con: + continue + + # Update the input_sets + del con_sets[by - int(by > x) - int(by > y)] + del con_sets[bx - int(bx > x) - int(bx > y)] + con_sets.insert(-1, best[2][-1]) + + # Update the position indices + mod_con = x - int(x > bx) - int(x > by), y - int(y > bx) - int(y > by) + mod_results.append((cost, mod_con, con_sets)) + + return mod_results + +def _greedy_path(input_sets, output_set, idx_dict, memory_limit): + """ + Finds the path by contracting the best pair until the input list is + exhausted. The best pair is found by minimizing the tuple + ``(-prod(indices_removed), cost)``. What this amounts to is prioritizing + matrix multiplication or inner product operations, then Hadamard like + operations, and finally outer operations. Outer products are limited by + ``memory_limit``. This algorithm scales cubically with respect to the + number of elements in the list ``input_sets``. + + Parameters + ---------- + input_sets : list + List of sets that represent the lhs side of the einsum subscript + output_set : set + Set that represents the rhs side of the overall einsum subscript + idx_dict : dictionary + Dictionary of index sizes + memory_limit : int + The maximum number of elements in a temporary array + + Returns + ------- + path : list + The greedy contraction order within the memory limit constraint. + + Examples + -------- + >>> isets = [set('abd'), set('ac'), set('bdc')] + >>> oset = set() + >>> idx_sizes = {'a': 1, 'b':2, 'c':3, 'd':4} + >>> _greedy_path(isets, oset, idx_sizes, 5000) + [(0, 2), (0, 1)] + """ + + # Handle trivial cases that leaked through + if len(input_sets) == 1: + return [(0,)] + elif len(input_sets) == 2: + return [(0, 1)] + + # Build up a naive cost + contract = _find_contraction(range(len(input_sets)), input_sets, output_set) + idx_result, new_input_sets, idx_removed, idx_contract = contract + naive_cost = _flop_count(idx_contract, idx_removed, len(input_sets), idx_dict) + + # Initially iterate over all pairs + comb_iter = itertools.combinations(range(len(input_sets)), 2) + known_contractions = [] + + path_cost = 0 + path = [] + + for iteration in range(len(input_sets) - 1): + + # Iterate over all pairs on first step, only previously found pairs on subsequent steps + for positions in comb_iter: + + # Always initially ignore outer products + if input_sets[positions[0]].isdisjoint(input_sets[positions[1]]): + continue + + result = _parse_possible_contraction(positions, input_sets, output_set, idx_dict, memory_limit, path_cost, + naive_cost) + if result is not None: + known_contractions.append(result) + + # If we do not have a inner contraction, rescan pairs including outer products + if len(known_contractions) == 0: + + # Then check the outer products + for positions in itertools.combinations(range(len(input_sets)), 2): + result = _parse_possible_contraction(positions, input_sets, output_set, idx_dict, memory_limit, + path_cost, naive_cost) + if result is not None: + known_contractions.append(result) + + # If we still did not find any remaining contractions, default back to einsum like behavior + if len(known_contractions) == 0: + path.append(tuple(range(len(input_sets)))) + break + + # Sort based on first index + best = min(known_contractions, key=lambda x: x[0]) + + # Now propagate as many unused contractions as possible to next iteration + known_contractions = _update_other_results(known_contractions, best) + + # Next iteration only compute contractions with the new tensor + # All other contractions have been accounted for + input_sets = best[2] + new_tensor_pos = len(input_sets) - 1 + comb_iter = ((i, new_tensor_pos) for i in range(new_tensor_pos)) + + # Update path and total cost + path.append(best[1]) + path_cost += best[0][1] + + return path + + +def _can_dot(inputs, result, idx_removed): + """ + Checks if we can use BLAS (np.tensordot) call and its beneficial to do so. + + Parameters + ---------- + inputs : list of str + Specifies the subscripts for summation. + result : str + Resulting summation. + idx_removed : set + Indices that are removed in the summation + + + Returns + ------- + type : bool + Returns true if BLAS should and can be used, else False + + Notes + ----- + If the operations is BLAS level 1 or 2 and is not already aligned + we default back to einsum as the memory movement to copy is more + costly than the operation itself. + + + Examples + -------- + + # Standard GEMM operation + >>> _can_dot(['ij', 'jk'], 'ik', set('j')) + True + + # Can use the standard BLAS, but requires odd data movement + >>> _can_dot(['ijj', 'jk'], 'ik', set('j')) + False + + # DDOT where the memory is not aligned + >>> _can_dot(['ijk', 'ikj'], '', set('ijk')) + False + + """ + + # All `dot` calls remove indices + if len(idx_removed) == 0: + return False + + # BLAS can only handle two operands + if len(inputs) != 2: + return False + + input_left, input_right = inputs + + for c in set(input_left + input_right): + # can't deal with repeated indices on same input or more than 2 total + nl, nr = input_left.count(c), input_right.count(c) + if (nl > 1) or (nr > 1) or (nl + nr > 2): + return False + + # can't do implicit summation or dimension collapse e.g. + # "ab,bc->c" (implicitly sum over 'a') + # "ab,ca->ca" (take diagonal of 'a') + if nl + nr - 1 == int(c in result): + return False + + # Build a few temporaries + set_left = set(input_left) + set_right = set(input_right) + keep_left = set_left - idx_removed + keep_right = set_right - idx_removed + rs = len(idx_removed) + + # At this point we are a DOT, GEMV, or GEMM operation + + # Handle inner products + + # DDOT with aligned data + if input_left == input_right: + return True + + # DDOT without aligned data (better to use einsum) + if set_left == set_right: + return False + + # Handle the 4 possible (aligned) GEMV or GEMM cases + + # GEMM or GEMV no transpose + if input_left[-rs:] == input_right[:rs]: + return True + + # GEMM or GEMV transpose both + if input_left[:rs] == input_right[-rs:]: + return True + + # GEMM or GEMV transpose right + if input_left[-rs:] == input_right[-rs:]: + return True + + # GEMM or GEMV transpose left + if input_left[:rs] == input_right[:rs]: + return True + + # Einsum is faster than GEMV if we have to copy data + if not keep_left or not keep_right: + return False + + # We are a matrix-matrix product, but we need to copy data + return True + + +def _parse_einsum_input(operands): + """ + A reproduction of einsum c side einsum parsing in python. + + Returns + ------- + input_strings : str + Parsed input strings + output_string : str + Parsed output string + operands : list of array_like + The operands to use in the numpy contraction + + Examples + -------- + The operand list is simplified to reduce printing: + + >>> np.random.seed(123) + >>> a = np.random.rand(4, 4) + >>> b = np.random.rand(4, 4, 4) + >>> _parse_einsum_input(('...a,...a->...', a, b)) + ('za,xza', 'xz', [a, b]) # may vary + + >>> _parse_einsum_input((a, [Ellipsis, 0], b, [Ellipsis, 0])) + ('za,xza', 'xz', [a, b]) # may vary + """ + + if len(operands) == 0: + raise ValueError("No input operands") + + if isinstance(operands[0], str): + subscripts = operands[0].replace(" ", "") + operands = [asanyarray(v) for v in operands[1:]] + + # Ensure all characters are valid + for s in subscripts: + if s in '.,->': + continue + if s not in einsum_symbols: + raise ValueError("Character %s is not a valid symbol." % s) + + else: + tmp_operands = list(operands) + operand_list = [] + subscript_list = [] + for p in range(len(operands) // 2): + operand_list.append(tmp_operands.pop(0)) + subscript_list.append(tmp_operands.pop(0)) + + output_list = tmp_operands[-1] if len(tmp_operands) else None + operands = [asanyarray(v) for v in operand_list] + subscripts = "" + last = len(subscript_list) - 1 + for num, sub in enumerate(subscript_list): + for s in sub: + if s is Ellipsis: + subscripts += "..." + else: + try: + s = operator.index(s) + except TypeError as e: + raise TypeError("For this input type lists must contain " + "either int or Ellipsis") from e + subscripts += einsum_symbols[s] + if num != last: + subscripts += "," + + if output_list is not None: + subscripts += "->" + for s in output_list: + if s is Ellipsis: + subscripts += "..." + else: + try: + s = operator.index(s) + except TypeError as e: + raise TypeError("For this input type lists must contain " + "either int or Ellipsis") from e + subscripts += einsum_symbols[s] + # Check for proper "->" + if ("-" in subscripts) or (">" in subscripts): + invalid = (subscripts.count("-") > 1) or (subscripts.count(">") > 1) + if invalid or (subscripts.count("->") != 1): + raise ValueError("Subscripts can only contain one '->'.") + + # Parse ellipses + if "." in subscripts: + used = subscripts.replace(".", "").replace(",", "").replace("->", "") + unused = list(einsum_symbols_set - set(used)) + ellipse_inds = "".join(unused) + longest = 0 + + if "->" in subscripts: + input_tmp, output_sub = subscripts.split("->") + split_subscripts = input_tmp.split(",") + out_sub = True + else: + split_subscripts = subscripts.split(',') + out_sub = False + + for num, sub in enumerate(split_subscripts): + if "." in sub: + if (sub.count(".") != 3) or (sub.count("...") != 1): + raise ValueError("Invalid Ellipses.") + + # Take into account numerical values + if operands[num].shape == (): + ellipse_count = 0 + else: + ellipse_count = max(operands[num].ndim, 1) + ellipse_count -= (len(sub) - 3) + + if ellipse_count > longest: + longest = ellipse_count + + if ellipse_count < 0: + raise ValueError("Ellipses lengths do not match.") + elif ellipse_count == 0: + split_subscripts[num] = sub.replace('...', '') + else: + rep_inds = ellipse_inds[-ellipse_count:] + split_subscripts[num] = sub.replace('...', rep_inds) + + subscripts = ",".join(split_subscripts) + if longest == 0: + out_ellipse = "" + else: + out_ellipse = ellipse_inds[-longest:] + + if out_sub: + subscripts += "->" + output_sub.replace("...", out_ellipse) + else: + # Special care for outputless ellipses + output_subscript = "" + tmp_subscripts = subscripts.replace(",", "") + for s in sorted(set(tmp_subscripts)): + if s not in (einsum_symbols): + raise ValueError("Character %s is not a valid symbol." % s) + if tmp_subscripts.count(s) == 1: + output_subscript += s + normal_inds = ''.join(sorted(set(output_subscript) - + set(out_ellipse))) + + subscripts += "->" + out_ellipse + normal_inds + + # Build output string if does not exist + if "->" in subscripts: + input_subscripts, output_subscript = subscripts.split("->") + else: + input_subscripts = subscripts + # Build output subscripts + tmp_subscripts = subscripts.replace(",", "") + output_subscript = "" + for s in sorted(set(tmp_subscripts)): + if s not in einsum_symbols: + raise ValueError("Character %s is not a valid symbol." % s) + if tmp_subscripts.count(s) == 1: + output_subscript += s + + # Make sure output subscripts are in the input + for char in output_subscript: + if char not in input_subscripts: + raise ValueError("Output character %s did not appear in the input" + % char) + + # Make sure number operands is equivalent to the number of terms + if len(input_subscripts.split(',')) != len(operands): + raise ValueError("Number of einsum subscripts must be equal to the " + "number of operands.") + + return (input_subscripts, output_subscript, operands) + + +def _einsum_path_dispatcher(*operands, optimize=None, einsum_call=None): + # NOTE: technically, we should only dispatch on array-like arguments, not + # subscripts (given as strings). But separating operands into + # arrays/subscripts is a little tricky/slow (given einsum's two supported + # signatures), so as a practical shortcut we dispatch on everything. + # Strings will be ignored for dispatching since they don't define + # __array_function__. + return operands + + +@array_function_dispatch(_einsum_path_dispatcher, module='numpy') +def einsum_path(*operands, optimize='greedy', einsum_call=False): + """ + einsum_path(subscripts, *operands, optimize='greedy') + + Evaluates the lowest cost contraction order for an einsum expression by + considering the creation of intermediate arrays. + + Parameters + ---------- + subscripts : str + Specifies the subscripts for summation. + *operands : list of array_like + These are the arrays for the operation. + optimize : {bool, list, tuple, 'greedy', 'optimal'} + Choose the type of path. If a tuple is provided, the second argument is + assumed to be the maximum intermediate size created. If only a single + argument is provided the largest input or output array size is used + as a maximum intermediate size. + + * if a list is given that starts with ``einsum_path``, uses this as the + contraction path + * if False no optimization is taken + * if True defaults to the 'greedy' algorithm + * 'optimal' An algorithm that combinatorially explores all possible + ways of contracting the listed tensors and choosest the least costly + path. Scales exponentially with the number of terms in the + contraction. + * 'greedy' An algorithm that chooses the best pair contraction + at each step. Effectively, this algorithm searches the largest inner, + Hadamard, and then outer products at each step. Scales cubically with + the number of terms in the contraction. Equivalent to the 'optimal' + path for most contractions. + + Default is 'greedy'. + + Returns + ------- + path : list of tuples + A list representation of the einsum path. + string_repr : str + A printable representation of the einsum path. + + Notes + ----- + The resulting path indicates which terms of the input contraction should be + contracted first, the result of this contraction is then appended to the + end of the contraction list. This list can then be iterated over until all + intermediate contractions are complete. + + See Also + -------- + einsum, linalg.multi_dot + + Examples + -------- + + We can begin with a chain dot example. In this case, it is optimal to + contract the ``b`` and ``c`` tensors first as represented by the first + element of the path ``(1, 2)``. The resulting tensor is added to the end + of the contraction and the remaining contraction ``(0, 1)`` is then + completed. + + >>> np.random.seed(123) + >>> a = np.random.rand(2, 2) + >>> b = np.random.rand(2, 5) + >>> c = np.random.rand(5, 2) + >>> path_info = np.einsum_path('ij,jk,kl->il', a, b, c, optimize='greedy') + >>> print(path_info[0]) + ['einsum_path', (1, 2), (0, 1)] + >>> print(path_info[1]) + Complete contraction: ij,jk,kl->il # may vary + Naive scaling: 4 + Optimized scaling: 3 + Naive FLOP count: 1.600e+02 + Optimized FLOP count: 5.600e+01 + Theoretical speedup: 2.857 + Largest intermediate: 4.000e+00 elements + ------------------------------------------------------------------------- + scaling current remaining + ------------------------------------------------------------------------- + 3 kl,jk->jl ij,jl->il + 3 jl,ij->il il->il + + + A more complex index transformation example. + + >>> I = np.random.rand(10, 10, 10, 10) + >>> C = np.random.rand(10, 10) + >>> path_info = np.einsum_path('ea,fb,abcd,gc,hd->efgh', C, C, I, C, C, + ... optimize='greedy') + + >>> print(path_info[0]) + ['einsum_path', (0, 2), (0, 3), (0, 2), (0, 1)] + >>> print(path_info[1]) + Complete contraction: ea,fb,abcd,gc,hd->efgh # may vary + Naive scaling: 8 + Optimized scaling: 5 + Naive FLOP count: 8.000e+08 + Optimized FLOP count: 8.000e+05 + Theoretical speedup: 1000.000 + Largest intermediate: 1.000e+04 elements + -------------------------------------------------------------------------- + scaling current remaining + -------------------------------------------------------------------------- + 5 abcd,ea->bcde fb,gc,hd,bcde->efgh + 5 bcde,fb->cdef gc,hd,cdef->efgh + 5 cdef,gc->defg hd,defg->efgh + 5 defg,hd->efgh efgh->efgh + """ + + # Figure out what the path really is + path_type = optimize + if path_type is True: + path_type = 'greedy' + if path_type is None: + path_type = False + + memory_limit = None + + # No optimization or a named path algorithm + if (path_type is False) or isinstance(path_type, str): + pass + + # Given an explicit path + elif len(path_type) and (path_type[0] == 'einsum_path'): + pass + + # Path tuple with memory limit + elif ((len(path_type) == 2) and isinstance(path_type[0], str) and + isinstance(path_type[1], (int, float))): + memory_limit = int(path_type[1]) + path_type = path_type[0] + + else: + raise TypeError("Did not understand the path: %s" % str(path_type)) + + # Hidden option, only einsum should call this + einsum_call_arg = einsum_call + + # Python side parsing + input_subscripts, output_subscript, operands = _parse_einsum_input(operands) + + # Build a few useful list and sets + input_list = input_subscripts.split(',') + input_sets = [set(x) for x in input_list] + output_set = set(output_subscript) + indices = set(input_subscripts.replace(',', '')) + + # Get length of each unique dimension and ensure all dimensions are correct + dimension_dict = {} + broadcast_indices = [[] for x in range(len(input_list))] + for tnum, term in enumerate(input_list): + sh = operands[tnum].shape + if len(sh) != len(term): + raise ValueError("Einstein sum subscript %s does not contain the " + "correct number of indices for operand %d." + % (input_subscripts[tnum], tnum)) + for cnum, char in enumerate(term): + dim = sh[cnum] + + # Build out broadcast indices + if dim == 1: + broadcast_indices[tnum].append(char) + + if char in dimension_dict.keys(): + # For broadcasting cases we always want the largest dim size + if dimension_dict[char] == 1: + dimension_dict[char] = dim + elif dim not in (1, dimension_dict[char]): + raise ValueError("Size of label '%s' for operand %d (%d) " + "does not match previous terms (%d)." + % (char, tnum, dimension_dict[char], dim)) + else: + dimension_dict[char] = dim + + # Convert broadcast inds to sets + broadcast_indices = [set(x) for x in broadcast_indices] + + # Compute size of each input array plus the output array + size_list = [_compute_size_by_dict(term, dimension_dict) + for term in input_list + [output_subscript]] + max_size = max(size_list) + + if memory_limit is None: + memory_arg = max_size + else: + memory_arg = memory_limit + + # Compute naive cost + # This isn't quite right, need to look into exactly how einsum does this + inner_product = (sum(len(x) for x in input_sets) - len(indices)) > 0 + naive_cost = _flop_count(indices, inner_product, len(input_list), dimension_dict) + + # Compute the path + if (path_type is False) or (len(input_list) in [1, 2]) or (indices == output_set): + # Nothing to be optimized, leave it to einsum + path = [tuple(range(len(input_list)))] + elif path_type == "greedy": + path = _greedy_path(input_sets, output_set, dimension_dict, memory_arg) + elif path_type == "optimal": + path = _optimal_path(input_sets, output_set, dimension_dict, memory_arg) + elif path_type[0] == 'einsum_path': + path = path_type[1:] + else: + raise KeyError("Path name %s not found", path_type) + + cost_list, scale_list, size_list, contraction_list = [], [], [], [] + + # Build contraction tuple (positions, gemm, einsum_str, remaining) + for cnum, contract_inds in enumerate(path): + # Make sure we remove inds from right to left + contract_inds = tuple(sorted(list(contract_inds), reverse=True)) + + contract = _find_contraction(contract_inds, input_sets, output_set) + out_inds, input_sets, idx_removed, idx_contract = contract + + cost = _flop_count(idx_contract, idx_removed, len(contract_inds), dimension_dict) + cost_list.append(cost) + scale_list.append(len(idx_contract)) + size_list.append(_compute_size_by_dict(out_inds, dimension_dict)) + + bcast = set() + tmp_inputs = [] + for x in contract_inds: + tmp_inputs.append(input_list.pop(x)) + bcast |= broadcast_indices.pop(x) + + new_bcast_inds = bcast - idx_removed + + # If we're broadcasting, nix blas + if not len(idx_removed & bcast): + do_blas = _can_dot(tmp_inputs, out_inds, idx_removed) + else: + do_blas = False + + # Last contraction + if (cnum - len(path)) == -1: + idx_result = output_subscript + else: + sort_result = [(dimension_dict[ind], ind) for ind in out_inds] + idx_result = "".join([x[1] for x in sorted(sort_result)]) + + input_list.append(idx_result) + broadcast_indices.append(new_bcast_inds) + einsum_str = ",".join(tmp_inputs) + "->" + idx_result + + contraction = (contract_inds, idx_removed, einsum_str, input_list[:], do_blas) + contraction_list.append(contraction) + + opt_cost = sum(cost_list) + 1 + + if einsum_call_arg: + return (operands, contraction_list) + + # Return the path along with a nice string representation + overall_contraction = input_subscripts + "->" + output_subscript + header = ("scaling", "current", "remaining") + + speedup = naive_cost / opt_cost + max_i = max(size_list) + + path_print = " Complete contraction: %s\n" % overall_contraction + path_print += " Naive scaling: %d\n" % len(indices) + path_print += " Optimized scaling: %d\n" % max(scale_list) + path_print += " Naive FLOP count: %.3e\n" % naive_cost + path_print += " Optimized FLOP count: %.3e\n" % opt_cost + path_print += " Theoretical speedup: %3.3f\n" % speedup + path_print += " Largest intermediate: %.3e elements\n" % max_i + path_print += "-" * 74 + "\n" + path_print += "%6s %24s %40s\n" % header + path_print += "-" * 74 + + for n, contraction in enumerate(contraction_list): + inds, idx_rm, einsum_str, remaining, blas = contraction + remaining_str = ",".join(remaining) + "->" + output_subscript + path_run = (scale_list[n], einsum_str, remaining_str) + path_print += "\n%4d %24s %40s" % path_run + + path = ['einsum_path'] + path + return (path, path_print) + + +def _einsum_dispatcher(*operands, out=None, optimize=None, **kwargs): + # Arguably we dispatch on more arguments than we really should; see note in + # _einsum_path_dispatcher for why. + yield from operands + yield out + + +# Rewrite einsum to handle different cases +@array_function_dispatch(_einsum_dispatcher, module='numpy') +def einsum(*operands, out=None, optimize=False, **kwargs): + """ + einsum(subscripts, *operands, out=None, dtype=None, order='K', + casting='safe', optimize=False) + + Evaluates the Einstein summation convention on the operands. + + Using the Einstein summation convention, many common multi-dimensional, + linear algebraic array operations can be represented in a simple fashion. + In *implicit* mode `einsum` computes these values. + + In *explicit* mode, `einsum` provides further flexibility to compute + other array operations that might not be considered classical Einstein + summation operations, by disabling, or forcing summation over specified + subscript labels. + + See the notes and examples for clarification. + + Parameters + ---------- + subscripts : str + Specifies the subscripts for summation as comma separated list of + subscript labels. An implicit (classical Einstein summation) + calculation is performed unless the explicit indicator '->' is + included as well as subscript labels of the precise output form. + operands : list of array_like + These are the arrays for the operation. + out : ndarray, optional + If provided, the calculation is done into this array. + dtype : {data-type, None}, optional + If provided, forces the calculation to use the data type specified. + Note that you may have to also give a more liberal `casting` + parameter to allow the conversions. Default is None. + order : {'C', 'F', 'A', 'K'}, optional + Controls the memory layout of the output. 'C' means it should + be C contiguous. 'F' means it should be Fortran contiguous, + 'A' means it should be 'F' if the inputs are all 'F', 'C' otherwise. + 'K' means it should be as close to the layout as the inputs as + is possible, including arbitrarily permuted axes. + Default is 'K'. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + Controls what kind of data casting may occur. Setting this to + 'unsafe' is not recommended, as it can adversely affect accumulations. + + * 'no' means the data types should not be cast at all. + * 'equiv' means only byte-order changes are allowed. + * 'safe' means only casts which can preserve values are allowed. + * 'same_kind' means only safe casts or casts within a kind, + like float64 to float32, are allowed. + * 'unsafe' means any data conversions may be done. + + Default is 'safe'. + optimize : {False, True, 'greedy', 'optimal'}, optional + Controls if intermediate optimization should occur. No optimization + will occur if False and True will default to the 'greedy' algorithm. + Also accepts an explicit contraction list from the ``np.einsum_path`` + function. See ``np.einsum_path`` for more details. Defaults to False. + + Returns + ------- + output : ndarray + The calculation based on the Einstein summation convention. + + See Also + -------- + einsum_path, dot, inner, outer, tensordot, linalg.multi_dot + einops : + similar verbose interface is provided by + `einops `_ package to cover + additional operations: transpose, reshape/flatten, repeat/tile, + squeeze/unsqueeze and reductions. + opt_einsum : + `opt_einsum `_ + optimizes contraction order for einsum-like expressions + in backend-agnostic manner. + + Notes + ----- + .. versionadded:: 1.6.0 + + The Einstein summation convention can be used to compute + many multi-dimensional, linear algebraic array operations. `einsum` + provides a succinct way of representing these. + + A non-exhaustive list of these operations, + which can be computed by `einsum`, is shown below along with examples: + + * Trace of an array, :py:func:`numpy.trace`. + * Return a diagonal, :py:func:`numpy.diag`. + * Array axis summations, :py:func:`numpy.sum`. + * Transpositions and permutations, :py:func:`numpy.transpose`. + * Matrix multiplication and dot product, :py:func:`numpy.matmul` :py:func:`numpy.dot`. + * Vector inner and outer products, :py:func:`numpy.inner` :py:func:`numpy.outer`. + * Broadcasting, element-wise and scalar multiplication, :py:func:`numpy.multiply`. + * Tensor contractions, :py:func:`numpy.tensordot`. + * Chained array operations, in efficient calculation order, :py:func:`numpy.einsum_path`. + + The subscripts string is a comma-separated list of subscript labels, + where each label refers to a dimension of the corresponding operand. + Whenever a label is repeated it is summed, so ``np.einsum('i,i', a, b)`` + is equivalent to :py:func:`np.inner(a,b) `. If a label + appears only once, it is not summed, so ``np.einsum('i', a)`` produces a + view of ``a`` with no changes. A further example ``np.einsum('ij,jk', a, b)`` + describes traditional matrix multiplication and is equivalent to + :py:func:`np.matmul(a,b) `. Repeated subscript labels in one + operand take the diagonal. For example, ``np.einsum('ii', a)`` is equivalent + to :py:func:`np.trace(a) `. + + In *implicit mode*, the chosen subscripts are important + since the axes of the output are reordered alphabetically. This + means that ``np.einsum('ij', a)`` doesn't affect a 2D array, while + ``np.einsum('ji', a)`` takes its transpose. Additionally, + ``np.einsum('ij,jk', a, b)`` returns a matrix multiplication, while, + ``np.einsum('ij,jh', a, b)`` returns the transpose of the + multiplication since subscript 'h' precedes subscript 'i'. + + In *explicit mode* the output can be directly controlled by + specifying output subscript labels. This requires the + identifier '->' as well as the list of output subscript labels. + This feature increases the flexibility of the function since + summing can be disabled or forced when required. The call + ``np.einsum('i->', a)`` is like :py:func:`np.sum(a, axis=-1) `, + and ``np.einsum('ii->i', a)`` is like :py:func:`np.diag(a) `. + The difference is that `einsum` does not allow broadcasting by default. + Additionally ``np.einsum('ij,jh->ih', a, b)`` directly specifies the + order of the output subscript labels and therefore returns matrix + multiplication, unlike the example above in implicit mode. + + To enable and control broadcasting, use an ellipsis. Default + NumPy-style broadcasting is done by adding an ellipsis + to the left of each term, like ``np.einsum('...ii->...i', a)``. + To take the trace along the first and last axes, + you can do ``np.einsum('i...i', a)``, or to do a matrix-matrix + product with the left-most indices instead of rightmost, one can do + ``np.einsum('ij...,jk...->ik...', a, b)``. + + When there is only one operand, no axes are summed, and no output + parameter is provided, a view into the operand is returned instead + of a new array. Thus, taking the diagonal as ``np.einsum('ii->i', a)`` + produces a view (changed in version 1.10.0). + + `einsum` also provides an alternative way to provide the subscripts + and operands as ``einsum(op0, sublist0, op1, sublist1, ..., [sublistout])``. + If the output shape is not provided in this format `einsum` will be + calculated in implicit mode, otherwise it will be performed explicitly. + The examples below have corresponding `einsum` calls with the two + parameter methods. + + .. versionadded:: 1.10.0 + + Views returned from einsum are now writeable whenever the input array + is writeable. For example, ``np.einsum('ijk...->kji...', a)`` will now + have the same effect as :py:func:`np.swapaxes(a, 0, 2) ` + and ``np.einsum('ii->i', a)`` will return a writeable view of the diagonal + of a 2D array. + + .. versionadded:: 1.12.0 + + Added the ``optimize`` argument which will optimize the contraction order + of an einsum expression. For a contraction with three or more operands this + can greatly increase the computational efficiency at the cost of a larger + memory footprint during computation. + + Typically a 'greedy' algorithm is applied which empirical tests have shown + returns the optimal path in the majority of cases. In some cases 'optimal' + will return the superlative path through a more expensive, exhaustive search. + For iterative calculations it may be advisable to calculate the optimal path + once and reuse that path by supplying it as an argument. An example is given + below. + + See :py:func:`numpy.einsum_path` for more details. + + Examples + -------- + >>> a = np.arange(25).reshape(5,5) + >>> b = np.arange(5) + >>> c = np.arange(6).reshape(2,3) + + Trace of a matrix: + + >>> np.einsum('ii', a) + 60 + >>> np.einsum(a, [0,0]) + 60 + >>> np.trace(a) + 60 + + Extract the diagonal (requires explicit form): + + >>> np.einsum('ii->i', a) + array([ 0, 6, 12, 18, 24]) + >>> np.einsum(a, [0,0], [0]) + array([ 0, 6, 12, 18, 24]) + >>> np.diag(a) + array([ 0, 6, 12, 18, 24]) + + Sum over an axis (requires explicit form): + + >>> np.einsum('ij->i', a) + array([ 10, 35, 60, 85, 110]) + >>> np.einsum(a, [0,1], [0]) + array([ 10, 35, 60, 85, 110]) + >>> np.sum(a, axis=1) + array([ 10, 35, 60, 85, 110]) + + For higher dimensional arrays summing a single axis can be done with ellipsis: + + >>> np.einsum('...j->...', a) + array([ 10, 35, 60, 85, 110]) + >>> np.einsum(a, [Ellipsis,1], [Ellipsis]) + array([ 10, 35, 60, 85, 110]) + + Compute a matrix transpose, or reorder any number of axes: + + >>> np.einsum('ji', c) + array([[0, 3], + [1, 4], + [2, 5]]) + >>> np.einsum('ij->ji', c) + array([[0, 3], + [1, 4], + [2, 5]]) + >>> np.einsum(c, [1,0]) + array([[0, 3], + [1, 4], + [2, 5]]) + >>> np.transpose(c) + array([[0, 3], + [1, 4], + [2, 5]]) + + Vector inner products: + + >>> np.einsum('i,i', b, b) + 30 + >>> np.einsum(b, [0], b, [0]) + 30 + >>> np.inner(b,b) + 30 + + Matrix vector multiplication: + + >>> np.einsum('ij,j', a, b) + array([ 30, 80, 130, 180, 230]) + >>> np.einsum(a, [0,1], b, [1]) + array([ 30, 80, 130, 180, 230]) + >>> np.dot(a, b) + array([ 30, 80, 130, 180, 230]) + >>> np.einsum('...j,j', a, b) + array([ 30, 80, 130, 180, 230]) + + Broadcasting and scalar multiplication: + + >>> np.einsum('..., ...', 3, c) + array([[ 0, 3, 6], + [ 9, 12, 15]]) + >>> np.einsum(',ij', 3, c) + array([[ 0, 3, 6], + [ 9, 12, 15]]) + >>> np.einsum(3, [Ellipsis], c, [Ellipsis]) + array([[ 0, 3, 6], + [ 9, 12, 15]]) + >>> np.multiply(3, c) + array([[ 0, 3, 6], + [ 9, 12, 15]]) + + Vector outer product: + + >>> np.einsum('i,j', np.arange(2)+1, b) + array([[0, 1, 2, 3, 4], + [0, 2, 4, 6, 8]]) + >>> np.einsum(np.arange(2)+1, [0], b, [1]) + array([[0, 1, 2, 3, 4], + [0, 2, 4, 6, 8]]) + >>> np.outer(np.arange(2)+1, b) + array([[0, 1, 2, 3, 4], + [0, 2, 4, 6, 8]]) + + Tensor contraction: + + >>> a = np.arange(60.).reshape(3,4,5) + >>> b = np.arange(24.).reshape(4,3,2) + >>> np.einsum('ijk,jil->kl', a, b) + array([[4400., 4730.], + [4532., 4874.], + [4664., 5018.], + [4796., 5162.], + [4928., 5306.]]) + >>> np.einsum(a, [0,1,2], b, [1,0,3], [2,3]) + array([[4400., 4730.], + [4532., 4874.], + [4664., 5018.], + [4796., 5162.], + [4928., 5306.]]) + >>> np.tensordot(a,b, axes=([1,0],[0,1])) + array([[4400., 4730.], + [4532., 4874.], + [4664., 5018.], + [4796., 5162.], + [4928., 5306.]]) + + Writeable returned arrays (since version 1.10.0): + + >>> a = np.zeros((3, 3)) + >>> np.einsum('ii->i', a)[:] = 1 + >>> a + array([[1., 0., 0.], + [0., 1., 0.], + [0., 0., 1.]]) + + Example of ellipsis use: + + >>> a = np.arange(6).reshape((3,2)) + >>> b = np.arange(12).reshape((4,3)) + >>> np.einsum('ki,jk->ij', a, b) + array([[10, 28, 46, 64], + [13, 40, 67, 94]]) + >>> np.einsum('ki,...k->i...', a, b) + array([[10, 28, 46, 64], + [13, 40, 67, 94]]) + >>> np.einsum('k...,jk', a, b) + array([[10, 28, 46, 64], + [13, 40, 67, 94]]) + + Chained array operations. For more complicated contractions, speed ups + might be achieved by repeatedly computing a 'greedy' path or pre-computing the + 'optimal' path and repeatedly applying it, using an + `einsum_path` insertion (since version 1.12.0). Performance improvements can be + particularly significant with larger arrays: + + >>> a = np.ones(64).reshape(2,4,8) + + Basic `einsum`: ~1520ms (benchmarked on 3.1GHz Intel i5.) + + >>> for iteration in range(500): + ... _ = np.einsum('ijk,ilm,njm,nlk,abc->',a,a,a,a,a) + + Sub-optimal `einsum` (due to repeated path calculation time): ~330ms + + >>> for iteration in range(500): + ... _ = np.einsum('ijk,ilm,njm,nlk,abc->',a,a,a,a,a, optimize='optimal') + + Greedy `einsum` (faster optimal path approximation): ~160ms + + >>> for iteration in range(500): + ... _ = np.einsum('ijk,ilm,njm,nlk,abc->',a,a,a,a,a, optimize='greedy') + + Optimal `einsum` (best usage pattern in some use cases): ~110ms + + >>> path = np.einsum_path('ijk,ilm,njm,nlk,abc->',a,a,a,a,a, optimize='optimal')[0] + >>> for iteration in range(500): + ... _ = np.einsum('ijk,ilm,njm,nlk,abc->',a,a,a,a,a, optimize=path) + + """ + # Special handling if out is specified + specified_out = out is not None + + # If no optimization, run pure einsum + if optimize is False: + if specified_out: + kwargs['out'] = out + return c_einsum(*operands, **kwargs) + + # Check the kwargs to avoid a more cryptic error later, without having to + # repeat default values here + valid_einsum_kwargs = ['dtype', 'order', 'casting'] + unknown_kwargs = [k for (k, v) in kwargs.items() if + k not in valid_einsum_kwargs] + if len(unknown_kwargs): + raise TypeError("Did not understand the following kwargs: %s" + % unknown_kwargs) + + # Build the contraction list and operand + operands, contraction_list = einsum_path(*operands, optimize=optimize, + einsum_call=True) + + # Handle order kwarg for output array, c_einsum allows mixed case + output_order = kwargs.pop('order', 'K') + if output_order.upper() == 'A': + if all(arr.flags.f_contiguous for arr in operands): + output_order = 'F' + else: + output_order = 'C' + + # Start contraction loop + for num, contraction in enumerate(contraction_list): + inds, idx_rm, einsum_str, remaining, blas = contraction + tmp_operands = [operands.pop(x) for x in inds] + + # Do we need to deal with the output? + handle_out = specified_out and ((num + 1) == len(contraction_list)) + + # Call tensordot if still possible + if blas: + # Checks have already been handled + input_str, results_index = einsum_str.split('->') + input_left, input_right = input_str.split(',') + + tensor_result = input_left + input_right + for s in idx_rm: + tensor_result = tensor_result.replace(s, "") + + # Find indices to contract over + left_pos, right_pos = [], [] + for s in sorted(idx_rm): + left_pos.append(input_left.find(s)) + right_pos.append(input_right.find(s)) + + # Contract! + new_view = tensordot(*tmp_operands, axes=(tuple(left_pos), tuple(right_pos))) + + # Build a new view if needed + if (tensor_result != results_index) or handle_out: + if handle_out: + kwargs["out"] = out + new_view = c_einsum(tensor_result + '->' + results_index, new_view, **kwargs) + + # Call einsum + else: + # If out was specified + if handle_out: + kwargs["out"] = out + + # Do the contraction + new_view = c_einsum(einsum_str, *tmp_operands, **kwargs) + + # Append new items and dereference what we can + operands.append(new_view) + del tmp_operands, new_view + + if specified_out: + return out + else: + return asanyarray(operands[0], order=output_order) diff --git a/.local/lib/python3.8/site-packages/numpy/core/einsumfunc.pyi b/.local/lib/python3.8/site-packages/numpy/core/einsumfunc.pyi new file mode 100644 index 0000000..aabb04c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/einsumfunc.pyi @@ -0,0 +1,145 @@ +from typing import List, TypeVar, Optional, Any, overload, Union, Tuple, Sequence, Literal + +from numpy import ( + ndarray, + dtype, + bool_, + unsignedinteger, + signedinteger, + floating, + complexfloating, + number, + _OrderKACF, +) +from numpy.typing import ( + _ArrayLikeBool_co, + _ArrayLikeUInt_co, + _ArrayLikeInt_co, + _ArrayLikeFloat_co, + _ArrayLikeComplex_co, + _DTypeLikeBool, + _DTypeLikeUInt, + _DTypeLikeInt, + _DTypeLikeFloat, + _DTypeLikeComplex, + _DTypeLikeComplex_co, +) + +_ArrayType = TypeVar( + "_ArrayType", + bound=ndarray[Any, dtype[Union[bool_, number[Any]]]], +) + +_OptimizeKind = Union[ + None, bool, Literal["greedy", "optimal"], Sequence[Any] +] +_CastingSafe = Literal["no", "equiv", "safe", "same_kind"] +_CastingUnsafe = Literal["unsafe"] + +__all__: List[str] + +# TODO: Properly handle the `casting`-based combinatorics +# TODO: We need to evaluate the content `__subscripts` in order +# to identify whether or an array or scalar is returned. At a cursory +# glance this seems like something that can quite easily be done with +# a mypy plugin. +# Something like `is_scalar = bool(__subscripts.partition("->")[-1])` +@overload +def einsum( + subscripts: str, + /, + *operands: _ArrayLikeBool_co, + out: None = ..., + dtype: Optional[_DTypeLikeBool] = ..., + order: _OrderKACF = ..., + casting: _CastingSafe = ..., + optimize: _OptimizeKind = ..., +) -> Any: ... +@overload +def einsum( + subscripts: str, + /, + *operands: _ArrayLikeUInt_co, + out: None = ..., + dtype: Optional[_DTypeLikeUInt] = ..., + order: _OrderKACF = ..., + casting: _CastingSafe = ..., + optimize: _OptimizeKind = ..., +) -> Any: ... +@overload +def einsum( + subscripts: str, + /, + *operands: _ArrayLikeInt_co, + out: None = ..., + dtype: Optional[_DTypeLikeInt] = ..., + order: _OrderKACF = ..., + casting: _CastingSafe = ..., + optimize: _OptimizeKind = ..., +) -> Any: ... +@overload +def einsum( + subscripts: str, + /, + *operands: _ArrayLikeFloat_co, + out: None = ..., + dtype: Optional[_DTypeLikeFloat] = ..., + order: _OrderKACF = ..., + casting: _CastingSafe = ..., + optimize: _OptimizeKind = ..., +) -> Any: ... +@overload +def einsum( + subscripts: str, + /, + *operands: _ArrayLikeComplex_co, + out: None = ..., + dtype: Optional[_DTypeLikeComplex] = ..., + order: _OrderKACF = ..., + casting: _CastingSafe = ..., + optimize: _OptimizeKind = ..., +) -> Any: ... +@overload +def einsum( + subscripts: str, + /, + *operands: Any, + casting: _CastingUnsafe, + dtype: Optional[_DTypeLikeComplex_co] = ..., + out: None = ..., + order: _OrderKACF = ..., + optimize: _OptimizeKind = ..., +) -> Any: ... +@overload +def einsum( + subscripts: str, + /, + *operands: _ArrayLikeComplex_co, + out: _ArrayType, + dtype: Optional[_DTypeLikeComplex_co] = ..., + order: _OrderKACF = ..., + casting: _CastingSafe = ..., + optimize: _OptimizeKind = ..., +) -> _ArrayType: ... +@overload +def einsum( + subscripts: str, + /, + *operands: Any, + out: _ArrayType, + casting: _CastingUnsafe, + dtype: Optional[_DTypeLikeComplex_co] = ..., + order: _OrderKACF = ..., + optimize: _OptimizeKind = ..., +) -> _ArrayType: ... + +# NOTE: `einsum_call` is a hidden kwarg unavailable for public use. +# It is therefore excluded from the signatures below. +# NOTE: In practice the list consists of a `str` (first element) +# and a variable number of integer tuples. +def einsum_path( + subscripts: str, + /, + *operands: _ArrayLikeComplex_co, + optimize: _OptimizeKind = ..., +) -> Tuple[List[Any], str]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/core/fromnumeric.py b/.local/lib/python3.8/site-packages/numpy/core/fromnumeric.py new file mode 100644 index 0000000..3242124 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/fromnumeric.py @@ -0,0 +1,3823 @@ +"""Module containing non-deprecated functions borrowed from Numeric. + +""" +import functools +import types +import warnings + +import numpy as np +from . import multiarray as mu +from . import overrides +from . import umath as um +from . import numerictypes as nt +from .multiarray import asarray, array, asanyarray, concatenate +from . import _methods + +_dt_ = nt.sctype2char + +# functions that are methods +__all__ = [ + 'alen', 'all', 'alltrue', 'amax', 'amin', 'any', 'argmax', + 'argmin', 'argpartition', 'argsort', 'around', 'choose', 'clip', + 'compress', 'cumprod', 'cumproduct', 'cumsum', 'diagonal', 'mean', + 'ndim', 'nonzero', 'partition', 'prod', 'product', 'ptp', 'put', + 'ravel', 'repeat', 'reshape', 'resize', 'round_', + 'searchsorted', 'shape', 'size', 'sometrue', 'sort', 'squeeze', + 'std', 'sum', 'swapaxes', 'take', 'trace', 'transpose', 'var', +] + +_gentype = types.GeneratorType +# save away Python sum +_sum_ = sum + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + + +# functions that are now methods +def _wrapit(obj, method, *args, **kwds): + try: + wrap = obj.__array_wrap__ + except AttributeError: + wrap = None + result = getattr(asarray(obj), method)(*args, **kwds) + if wrap: + if not isinstance(result, mu.ndarray): + result = asarray(result) + result = wrap(result) + return result + + +def _wrapfunc(obj, method, *args, **kwds): + bound = getattr(obj, method, None) + if bound is None: + return _wrapit(obj, method, *args, **kwds) + + try: + return bound(*args, **kwds) + except TypeError: + # A TypeError occurs if the object does have such a method in its + # class, but its signature is not identical to that of NumPy's. This + # situation has occurred in the case of a downstream library like + # 'pandas'. + # + # Call _wrapit from within the except clause to ensure a potential + # exception has a traceback chain. + return _wrapit(obj, method, *args, **kwds) + + +def _wrapreduction(obj, ufunc, method, axis, dtype, out, **kwargs): + passkwargs = {k: v for k, v in kwargs.items() + if v is not np._NoValue} + + if type(obj) is not mu.ndarray: + try: + reduction = getattr(obj, method) + except AttributeError: + pass + else: + # This branch is needed for reductions like any which don't + # support a dtype. + if dtype is not None: + return reduction(axis=axis, dtype=dtype, out=out, **passkwargs) + else: + return reduction(axis=axis, out=out, **passkwargs) + + return ufunc.reduce(obj, axis, dtype, out, **passkwargs) + + +def _take_dispatcher(a, indices, axis=None, out=None, mode=None): + return (a, out) + + +@array_function_dispatch(_take_dispatcher) +def take(a, indices, axis=None, out=None, mode='raise'): + """ + Take elements from an array along an axis. + + When axis is not None, this function does the same thing as "fancy" + indexing (indexing arrays using arrays); however, it can be easier to use + if you need elements along a given axis. A call such as + ``np.take(arr, indices, axis=3)`` is equivalent to + ``arr[:,:,:,indices,...]``. + + Explained without fancy indexing, this is equivalent to the following use + of `ndindex`, which sets each of ``ii``, ``jj``, and ``kk`` to a tuple of + indices:: + + Ni, Nk = a.shape[:axis], a.shape[axis+1:] + Nj = indices.shape + for ii in ndindex(Ni): + for jj in ndindex(Nj): + for kk in ndindex(Nk): + out[ii + jj + kk] = a[ii + (indices[jj],) + kk] + + Parameters + ---------- + a : array_like (Ni..., M, Nk...) + The source array. + indices : array_like (Nj...) + The indices of the values to extract. + + .. versionadded:: 1.8.0 + + Also allow scalars for indices. + axis : int, optional + The axis over which to select values. By default, the flattened + input array is used. + out : ndarray, optional (Ni..., Nj..., Nk...) + If provided, the result will be placed in this array. It should + be of the appropriate shape and dtype. Note that `out` is always + buffered if `mode='raise'`; use other modes for better performance. + mode : {'raise', 'wrap', 'clip'}, optional + Specifies how out-of-bounds indices will behave. + + * 'raise' -- raise an error (default) + * 'wrap' -- wrap around + * 'clip' -- clip to the range + + 'clip' mode means that all indices that are too large are replaced + by the index that addresses the last element along that axis. Note + that this disables indexing with negative numbers. + + Returns + ------- + out : ndarray (Ni..., Nj..., Nk...) + The returned array has the same type as `a`. + + See Also + -------- + compress : Take elements using a boolean mask + ndarray.take : equivalent method + take_along_axis : Take elements by matching the array and the index arrays + + Notes + ----- + + By eliminating the inner loop in the description above, and using `s_` to + build simple slice objects, `take` can be expressed in terms of applying + fancy indexing to each 1-d slice:: + + Ni, Nk = a.shape[:axis], a.shape[axis+1:] + for ii in ndindex(Ni): + for kk in ndindex(Nj): + out[ii + s_[...,] + kk] = a[ii + s_[:,] + kk][indices] + + For this reason, it is equivalent to (but faster than) the following use + of `apply_along_axis`:: + + out = np.apply_along_axis(lambda a_1d: a_1d[indices], axis, a) + + Examples + -------- + >>> a = [4, 3, 5, 7, 6, 8] + >>> indices = [0, 1, 4] + >>> np.take(a, indices) + array([4, 3, 6]) + + In this example if `a` is an ndarray, "fancy" indexing can be used. + + >>> a = np.array(a) + >>> a[indices] + array([4, 3, 6]) + + If `indices` is not one dimensional, the output also has these dimensions. + + >>> np.take(a, [[0, 1], [2, 3]]) + array([[4, 3], + [5, 7]]) + """ + return _wrapfunc(a, 'take', indices, axis=axis, out=out, mode=mode) + + +def _reshape_dispatcher(a, newshape, order=None): + return (a,) + + +# not deprecated --- copy if necessary, view otherwise +@array_function_dispatch(_reshape_dispatcher) +def reshape(a, newshape, order='C'): + """ + Gives a new shape to an array without changing its data. + + Parameters + ---------- + a : array_like + Array to be reshaped. + newshape : int or tuple of ints + The new shape should be compatible with the original shape. If + an integer, then the result will be a 1-D array of that length. + One shape dimension can be -1. In this case, the value is + inferred from the length of the array and remaining dimensions. + order : {'C', 'F', 'A'}, optional + Read the elements of `a` using this index order, and place the + elements into the reshaped array using this index order. 'C' + means to read / write the elements using C-like index order, + with the last axis index changing fastest, back to the first + axis index changing slowest. 'F' means to read / write the + elements using Fortran-like index order, with the first index + changing fastest, and the last index changing slowest. Note that + the 'C' and 'F' options take no account of the memory layout of + the underlying array, and only refer to the order of indexing. + 'A' means to read / write the elements in Fortran-like index + order if `a` is Fortran *contiguous* in memory, C-like order + otherwise. + + Returns + ------- + reshaped_array : ndarray + This will be a new view object if possible; otherwise, it will + be a copy. Note there is no guarantee of the *memory layout* (C- or + Fortran- contiguous) of the returned array. + + See Also + -------- + ndarray.reshape : Equivalent method. + + Notes + ----- + It is not always possible to change the shape of an array without + copying the data. If you want an error to be raised when the data is copied, + you should assign the new shape to the shape attribute of the array:: + + >>> a = np.zeros((10, 2)) + + # A transpose makes the array non-contiguous + >>> b = a.T + + # Taking a view makes it possible to modify the shape without modifying + # the initial object. + >>> c = b.view() + >>> c.shape = (20) + Traceback (most recent call last): + ... + AttributeError: Incompatible shape for in-place modification. Use + `.reshape()` to make a copy with the desired shape. + + The `order` keyword gives the index ordering both for *fetching* the values + from `a`, and then *placing* the values into the output array. + For example, let's say you have an array: + + >>> a = np.arange(6).reshape((3, 2)) + >>> a + array([[0, 1], + [2, 3], + [4, 5]]) + + You can think of reshaping as first raveling the array (using the given + index order), then inserting the elements from the raveled array into the + new array using the same kind of index ordering as was used for the + raveling. + + >>> np.reshape(a, (2, 3)) # C-like index ordering + array([[0, 1, 2], + [3, 4, 5]]) + >>> np.reshape(np.ravel(a), (2, 3)) # equivalent to C ravel then C reshape + array([[0, 1, 2], + [3, 4, 5]]) + >>> np.reshape(a, (2, 3), order='F') # Fortran-like index ordering + array([[0, 4, 3], + [2, 1, 5]]) + >>> np.reshape(np.ravel(a, order='F'), (2, 3), order='F') + array([[0, 4, 3], + [2, 1, 5]]) + + Examples + -------- + >>> a = np.array([[1,2,3], [4,5,6]]) + >>> np.reshape(a, 6) + array([1, 2, 3, 4, 5, 6]) + >>> np.reshape(a, 6, order='F') + array([1, 4, 2, 5, 3, 6]) + + >>> np.reshape(a, (3,-1)) # the unspecified value is inferred to be 2 + array([[1, 2], + [3, 4], + [5, 6]]) + """ + return _wrapfunc(a, 'reshape', newshape, order=order) + + +def _choose_dispatcher(a, choices, out=None, mode=None): + yield a + yield from choices + yield out + + +@array_function_dispatch(_choose_dispatcher) +def choose(a, choices, out=None, mode='raise'): + """ + Construct an array from an index array and a list of arrays to choose from. + + First of all, if confused or uncertain, definitely look at the Examples - + in its full generality, this function is less simple than it might + seem from the following code description (below ndi = + `numpy.lib.index_tricks`): + + ``np.choose(a,c) == np.array([c[a[I]][I] for I in ndi.ndindex(a.shape)])``. + + But this omits some subtleties. Here is a fully general summary: + + Given an "index" array (`a`) of integers and a sequence of ``n`` arrays + (`choices`), `a` and each choice array are first broadcast, as necessary, + to arrays of a common shape; calling these *Ba* and *Bchoices[i], i = + 0,...,n-1* we have that, necessarily, ``Ba.shape == Bchoices[i].shape`` + for each ``i``. Then, a new array with shape ``Ba.shape`` is created as + follows: + + * if ``mode='raise'`` (the default), then, first of all, each element of + ``a`` (and thus ``Ba``) must be in the range ``[0, n-1]``; now, suppose + that ``i`` (in that range) is the value at the ``(j0, j1, ..., jm)`` + position in ``Ba`` - then the value at the same position in the new array + is the value in ``Bchoices[i]`` at that same position; + + * if ``mode='wrap'``, values in `a` (and thus `Ba`) may be any (signed) + integer; modular arithmetic is used to map integers outside the range + `[0, n-1]` back into that range; and then the new array is constructed + as above; + + * if ``mode='clip'``, values in `a` (and thus ``Ba``) may be any (signed) + integer; negative integers are mapped to 0; values greater than ``n-1`` + are mapped to ``n-1``; and then the new array is constructed as above. + + Parameters + ---------- + a : int array + This array must contain integers in ``[0, n-1]``, where ``n`` is the + number of choices, unless ``mode=wrap`` or ``mode=clip``, in which + cases any integers are permissible. + choices : sequence of arrays + Choice arrays. `a` and all of the choices must be broadcastable to the + same shape. If `choices` is itself an array (not recommended), then + its outermost dimension (i.e., the one corresponding to + ``choices.shape[0]``) is taken as defining the "sequence". + out : array, optional + If provided, the result will be inserted into this array. It should + be of the appropriate shape and dtype. Note that `out` is always + buffered if ``mode='raise'``; use other modes for better performance. + mode : {'raise' (default), 'wrap', 'clip'}, optional + Specifies how indices outside ``[0, n-1]`` will be treated: + + * 'raise' : an exception is raised + * 'wrap' : value becomes value mod ``n`` + * 'clip' : values < 0 are mapped to 0, values > n-1 are mapped to n-1 + + Returns + ------- + merged_array : array + The merged result. + + Raises + ------ + ValueError: shape mismatch + If `a` and each choice array are not all broadcastable to the same + shape. + + See Also + -------- + ndarray.choose : equivalent method + numpy.take_along_axis : Preferable if `choices` is an array + + Notes + ----- + To reduce the chance of misinterpretation, even though the following + "abuse" is nominally supported, `choices` should neither be, nor be + thought of as, a single array, i.e., the outermost sequence-like container + should be either a list or a tuple. + + Examples + -------- + + >>> choices = [[0, 1, 2, 3], [10, 11, 12, 13], + ... [20, 21, 22, 23], [30, 31, 32, 33]] + >>> np.choose([2, 3, 1, 0], choices + ... # the first element of the result will be the first element of the + ... # third (2+1) "array" in choices, namely, 20; the second element + ... # will be the second element of the fourth (3+1) choice array, i.e., + ... # 31, etc. + ... ) + array([20, 31, 12, 3]) + >>> np.choose([2, 4, 1, 0], choices, mode='clip') # 4 goes to 3 (4-1) + array([20, 31, 12, 3]) + >>> # because there are 4 choice arrays + >>> np.choose([2, 4, 1, 0], choices, mode='wrap') # 4 goes to (4 mod 4) + array([20, 1, 12, 3]) + >>> # i.e., 0 + + A couple examples illustrating how choose broadcasts: + + >>> a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]] + >>> choices = [-10, 10] + >>> np.choose(a, choices) + array([[ 10, -10, 10], + [-10, 10, -10], + [ 10, -10, 10]]) + + >>> # With thanks to Anne Archibald + >>> a = np.array([0, 1]).reshape((2,1,1)) + >>> c1 = np.array([1, 2, 3]).reshape((1,3,1)) + >>> c2 = np.array([-1, -2, -3, -4, -5]).reshape((1,1,5)) + >>> np.choose(a, (c1, c2)) # result is 2x3x5, res[0,:,:]=c1, res[1,:,:]=c2 + array([[[ 1, 1, 1, 1, 1], + [ 2, 2, 2, 2, 2], + [ 3, 3, 3, 3, 3]], + [[-1, -2, -3, -4, -5], + [-1, -2, -3, -4, -5], + [-1, -2, -3, -4, -5]]]) + + """ + return _wrapfunc(a, 'choose', choices, out=out, mode=mode) + + +def _repeat_dispatcher(a, repeats, axis=None): + return (a,) + + +@array_function_dispatch(_repeat_dispatcher) +def repeat(a, repeats, axis=None): + """ + Repeat elements of an array. + + Parameters + ---------- + a : array_like + Input array. + repeats : int or array of ints + The number of repetitions for each element. `repeats` is broadcasted + to fit the shape of the given axis. + axis : int, optional + The axis along which to repeat values. By default, use the + flattened input array, and return a flat output array. + + Returns + ------- + repeated_array : ndarray + Output array which has the same shape as `a`, except along + the given axis. + + See Also + -------- + tile : Tile an array. + unique : Find the unique elements of an array. + + Examples + -------- + >>> np.repeat(3, 4) + array([3, 3, 3, 3]) + >>> x = np.array([[1,2],[3,4]]) + >>> np.repeat(x, 2) + array([1, 1, 2, 2, 3, 3, 4, 4]) + >>> np.repeat(x, 3, axis=1) + array([[1, 1, 1, 2, 2, 2], + [3, 3, 3, 4, 4, 4]]) + >>> np.repeat(x, [1, 2], axis=0) + array([[1, 2], + [3, 4], + [3, 4]]) + + """ + return _wrapfunc(a, 'repeat', repeats, axis=axis) + + +def _put_dispatcher(a, ind, v, mode=None): + return (a, ind, v) + + +@array_function_dispatch(_put_dispatcher) +def put(a, ind, v, mode='raise'): + """ + Replaces specified elements of an array with given values. + + The indexing works on the flattened target array. `put` is roughly + equivalent to: + + :: + + a.flat[ind] = v + + Parameters + ---------- + a : ndarray + Target array. + ind : array_like + Target indices, interpreted as integers. + v : array_like + Values to place in `a` at target indices. If `v` is shorter than + `ind` it will be repeated as necessary. + mode : {'raise', 'wrap', 'clip'}, optional + Specifies how out-of-bounds indices will behave. + + * 'raise' -- raise an error (default) + * 'wrap' -- wrap around + * 'clip' -- clip to the range + + 'clip' mode means that all indices that are too large are replaced + by the index that addresses the last element along that axis. Note + that this disables indexing with negative numbers. In 'raise' mode, + if an exception occurs the target array may still be modified. + + See Also + -------- + putmask, place + put_along_axis : Put elements by matching the array and the index arrays + + Examples + -------- + >>> a = np.arange(5) + >>> np.put(a, [0, 2], [-44, -55]) + >>> a + array([-44, 1, -55, 3, 4]) + + >>> a = np.arange(5) + >>> np.put(a, 22, -5, mode='clip') + >>> a + array([ 0, 1, 2, 3, -5]) + + """ + try: + put = a.put + except AttributeError as e: + raise TypeError("argument 1 must be numpy.ndarray, " + "not {name}".format(name=type(a).__name__)) from e + + return put(ind, v, mode=mode) + + +def _swapaxes_dispatcher(a, axis1, axis2): + return (a,) + + +@array_function_dispatch(_swapaxes_dispatcher) +def swapaxes(a, axis1, axis2): + """ + Interchange two axes of an array. + + Parameters + ---------- + a : array_like + Input array. + axis1 : int + First axis. + axis2 : int + Second axis. + + Returns + ------- + a_swapped : ndarray + For NumPy >= 1.10.0, if `a` is an ndarray, then a view of `a` is + returned; otherwise a new array is created. For earlier NumPy + versions a view of `a` is returned only if the order of the + axes is changed, otherwise the input array is returned. + + Examples + -------- + >>> x = np.array([[1,2,3]]) + >>> np.swapaxes(x,0,1) + array([[1], + [2], + [3]]) + + >>> x = np.array([[[0,1],[2,3]],[[4,5],[6,7]]]) + >>> x + array([[[0, 1], + [2, 3]], + [[4, 5], + [6, 7]]]) + + >>> np.swapaxes(x,0,2) + array([[[0, 4], + [2, 6]], + [[1, 5], + [3, 7]]]) + + """ + return _wrapfunc(a, 'swapaxes', axis1, axis2) + + +def _transpose_dispatcher(a, axes=None): + return (a,) + + +@array_function_dispatch(_transpose_dispatcher) +def transpose(a, axes=None): + """ + Reverse or permute the axes of an array; returns the modified array. + + For an array a with two axes, transpose(a) gives the matrix transpose. + + Refer to `numpy.ndarray.transpose` for full documentation. + + Parameters + ---------- + a : array_like + Input array. + axes : tuple or list of ints, optional + If specified, it must be a tuple or list which contains a permutation of + [0,1,..,N-1] where N is the number of axes of a. The i'th axis of the + returned array will correspond to the axis numbered ``axes[i]`` of the + input. If not specified, defaults to ``range(a.ndim)[::-1]``, which + reverses the order of the axes. + + Returns + ------- + p : ndarray + `a` with its axes permuted. A view is returned whenever + possible. + + See Also + -------- + ndarray.transpose : Equivalent method + moveaxis + argsort + + Notes + ----- + Use `transpose(a, argsort(axes))` to invert the transposition of tensors + when using the `axes` keyword argument. + + Transposing a 1-D array returns an unchanged view of the original array. + + Examples + -------- + >>> x = np.arange(4).reshape((2,2)) + >>> x + array([[0, 1], + [2, 3]]) + + >>> np.transpose(x) + array([[0, 2], + [1, 3]]) + + >>> x = np.ones((1, 2, 3)) + >>> np.transpose(x, (1, 0, 2)).shape + (2, 1, 3) + + >>> x = np.ones((2, 3, 4, 5)) + >>> np.transpose(x).shape + (5, 4, 3, 2) + + """ + return _wrapfunc(a, 'transpose', axes) + + +def _partition_dispatcher(a, kth, axis=None, kind=None, order=None): + return (a,) + + +@array_function_dispatch(_partition_dispatcher) +def partition(a, kth, axis=-1, kind='introselect', order=None): + """ + Return a partitioned copy of an array. + + Creates a copy of the array with its elements rearranged in such a + way that the value of the element in k-th position is in the + position it would be in a sorted array. All elements smaller than + the k-th element are moved before this element and all equal or + greater are moved behind it. The ordering of the elements in the two + partitions is undefined. + + .. versionadded:: 1.8.0 + + Parameters + ---------- + a : array_like + Array to be sorted. + kth : int or sequence of ints + Element index to partition by. The k-th value of the element + will be in its final sorted position and all smaller elements + will be moved before it and all equal or greater elements behind + it. The order of all elements in the partitions is undefined. If + provided with a sequence of k-th it will partition all elements + indexed by k-th of them into their sorted position at once. + + .. deprecated:: 1.22.0 + Passing booleans as index is deprecated. + axis : int or None, optional + Axis along which to sort. If None, the array is flattened before + sorting. The default is -1, which sorts along the last axis. + kind : {'introselect'}, optional + Selection algorithm. Default is 'introselect'. + order : str or list of str, optional + When `a` is an array with fields defined, this argument + specifies which fields to compare first, second, etc. A single + field can be specified as a string. Not all fields need be + specified, but unspecified fields will still be used, in the + order in which they come up in the dtype, to break ties. + + Returns + ------- + partitioned_array : ndarray + Array of the same type and shape as `a`. + + See Also + -------- + ndarray.partition : Method to sort an array in-place. + argpartition : Indirect partition. + sort : Full sorting + + Notes + ----- + The various selection algorithms are characterized by their average + speed, worst case performance, work space size, and whether they are + stable. A stable sort keeps items with the same key in the same + relative order. The available algorithms have the following + properties: + + ================= ======= ============= ============ ======= + kind speed worst case work space stable + ================= ======= ============= ============ ======= + 'introselect' 1 O(n) 0 no + ================= ======= ============= ============ ======= + + All the partition algorithms make temporary copies of the data when + partitioning along any but the last axis. Consequently, + partitioning along the last axis is faster and uses less space than + partitioning along any other axis. + + The sort order for complex numbers is lexicographic. If both the + real and imaginary parts are non-nan then the order is determined by + the real parts except when they are equal, in which case the order + is determined by the imaginary parts. + + Examples + -------- + >>> a = np.array([3, 4, 2, 1]) + >>> np.partition(a, 3) + array([2, 1, 3, 4]) + + >>> np.partition(a, (1, 3)) + array([1, 2, 3, 4]) + + """ + if axis is None: + # flatten returns (1, N) for np.matrix, so always use the last axis + a = asanyarray(a).flatten() + axis = -1 + else: + a = asanyarray(a).copy(order="K") + a.partition(kth, axis=axis, kind=kind, order=order) + return a + + +def _argpartition_dispatcher(a, kth, axis=None, kind=None, order=None): + return (a,) + + +@array_function_dispatch(_argpartition_dispatcher) +def argpartition(a, kth, axis=-1, kind='introselect', order=None): + """ + Perform an indirect partition along the given axis using the + algorithm specified by the `kind` keyword. It returns an array of + indices of the same shape as `a` that index data along the given + axis in partitioned order. + + .. versionadded:: 1.8.0 + + Parameters + ---------- + a : array_like + Array to sort. + kth : int or sequence of ints + Element index to partition by. The k-th element will be in its + final sorted position and all smaller elements will be moved + before it and all larger elements behind it. The order all + elements in the partitions is undefined. If provided with a + sequence of k-th it will partition all of them into their sorted + position at once. + + .. deprecated:: 1.22.0 + Passing booleans as index is deprecated. + axis : int or None, optional + Axis along which to sort. The default is -1 (the last axis). If + None, the flattened array is used. + kind : {'introselect'}, optional + Selection algorithm. Default is 'introselect' + order : str or list of str, optional + When `a` is an array with fields defined, this argument + specifies which fields to compare first, second, etc. A single + field can be specified as a string, and not all fields need be + specified, but unspecified fields will still be used, in the + order in which they come up in the dtype, to break ties. + + Returns + ------- + index_array : ndarray, int + Array of indices that partition `a` along the specified axis. + If `a` is one-dimensional, ``a[index_array]`` yields a partitioned `a`. + More generally, ``np.take_along_axis(a, index_array, axis=a)`` always + yields the partitioned `a`, irrespective of dimensionality. + + See Also + -------- + partition : Describes partition algorithms used. + ndarray.partition : Inplace partition. + argsort : Full indirect sort. + take_along_axis : Apply ``index_array`` from argpartition + to an array as if by calling partition. + + Notes + ----- + See `partition` for notes on the different selection algorithms. + + Examples + -------- + One dimensional array: + + >>> x = np.array([3, 4, 2, 1]) + >>> x[np.argpartition(x, 3)] + array([2, 1, 3, 4]) + >>> x[np.argpartition(x, (1, 3))] + array([1, 2, 3, 4]) + + >>> x = [3, 4, 2, 1] + >>> np.array(x)[np.argpartition(x, 3)] + array([2, 1, 3, 4]) + + Multi-dimensional array: + + >>> x = np.array([[3, 4, 2], [1, 3, 1]]) + >>> index_array = np.argpartition(x, kth=1, axis=-1) + >>> np.take_along_axis(x, index_array, axis=-1) # same as np.partition(x, kth=1) + array([[2, 3, 4], + [1, 1, 3]]) + + """ + return _wrapfunc(a, 'argpartition', kth, axis=axis, kind=kind, order=order) + + +def _sort_dispatcher(a, axis=None, kind=None, order=None): + return (a,) + + +@array_function_dispatch(_sort_dispatcher) +def sort(a, axis=-1, kind=None, order=None): + """ + Return a sorted copy of an array. + + Parameters + ---------- + a : array_like + Array to be sorted. + axis : int or None, optional + Axis along which to sort. If None, the array is flattened before + sorting. The default is -1, which sorts along the last axis. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional + Sorting algorithm. The default is 'quicksort'. Note that both 'stable' + and 'mergesort' use timsort or radix sort under the covers and, in general, + the actual implementation will vary with data type. The 'mergesort' option + is retained for backwards compatibility. + + .. versionchanged:: 1.15.0. + The 'stable' option was added. + + order : str or list of str, optional + When `a` is an array with fields defined, this argument specifies + which fields to compare first, second, etc. A single field can + be specified as a string, and not all fields need be specified, + but unspecified fields will still be used, in the order in which + they come up in the dtype, to break ties. + + Returns + ------- + sorted_array : ndarray + Array of the same type and shape as `a`. + + See Also + -------- + ndarray.sort : Method to sort an array in-place. + argsort : Indirect sort. + lexsort : Indirect stable sort on multiple keys. + searchsorted : Find elements in a sorted array. + partition : Partial sort. + + Notes + ----- + The various sorting algorithms are characterized by their average speed, + worst case performance, work space size, and whether they are stable. A + stable sort keeps items with the same key in the same relative + order. The four algorithms implemented in NumPy have the following + properties: + + =========== ======= ============= ============ ======== + kind speed worst case work space stable + =========== ======= ============= ============ ======== + 'quicksort' 1 O(n^2) 0 no + 'heapsort' 3 O(n*log(n)) 0 no + 'mergesort' 2 O(n*log(n)) ~n/2 yes + 'timsort' 2 O(n*log(n)) ~n/2 yes + =========== ======= ============= ============ ======== + + .. note:: The datatype determines which of 'mergesort' or 'timsort' + is actually used, even if 'mergesort' is specified. User selection + at a finer scale is not currently available. + + All the sort algorithms make temporary copies of the data when + sorting along any but the last axis. Consequently, sorting along + the last axis is faster and uses less space than sorting along + any other axis. + + The sort order for complex numbers is lexicographic. If both the real + and imaginary parts are non-nan then the order is determined by the + real parts except when they are equal, in which case the order is + determined by the imaginary parts. + + Previous to numpy 1.4.0 sorting real and complex arrays containing nan + values led to undefined behaviour. In numpy versions >= 1.4.0 nan + values are sorted to the end. The extended sort order is: + + * Real: [R, nan] + * Complex: [R + Rj, R + nanj, nan + Rj, nan + nanj] + + where R is a non-nan real value. Complex values with the same nan + placements are sorted according to the non-nan part if it exists. + Non-nan values are sorted as before. + + .. versionadded:: 1.12.0 + + quicksort has been changed to `introsort `_. + When sorting does not make enough progress it switches to + `heapsort `_. + This implementation makes quicksort O(n*log(n)) in the worst case. + + 'stable' automatically chooses the best stable sorting algorithm + for the data type being sorted. + It, along with 'mergesort' is currently mapped to + `timsort `_ + or `radix sort `_ + depending on the data type. + API forward compatibility currently limits the + ability to select the implementation and it is hardwired for the different + data types. + + .. versionadded:: 1.17.0 + + Timsort is added for better performance on already or nearly + sorted data. On random data timsort is almost identical to + mergesort. It is now used for stable sort while quicksort is still the + default sort if none is chosen. For timsort details, refer to + `CPython listsort.txt `_. + 'mergesort' and 'stable' are mapped to radix sort for integer data types. Radix sort is an + O(n) sort instead of O(n log n). + + .. versionchanged:: 1.18.0 + + NaT now sorts to the end of arrays for consistency with NaN. + + Examples + -------- + >>> a = np.array([[1,4],[3,1]]) + >>> np.sort(a) # sort along the last axis + array([[1, 4], + [1, 3]]) + >>> np.sort(a, axis=None) # sort the flattened array + array([1, 1, 3, 4]) + >>> np.sort(a, axis=0) # sort along the first axis + array([[1, 1], + [3, 4]]) + + Use the `order` keyword to specify a field to use when sorting a + structured array: + + >>> dtype = [('name', 'S10'), ('height', float), ('age', int)] + >>> values = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38), + ... ('Galahad', 1.7, 38)] + >>> a = np.array(values, dtype=dtype) # create a structured array + >>> np.sort(a, order='height') # doctest: +SKIP + array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41), + ('Lancelot', 1.8999999999999999, 38)], + dtype=[('name', '|S10'), ('height', '>> np.sort(a, order=['age', 'height']) # doctest: +SKIP + array([('Galahad', 1.7, 38), ('Lancelot', 1.8999999999999999, 38), + ('Arthur', 1.8, 41)], + dtype=[('name', '|S10'), ('height', '>> x = np.array([3, 1, 2]) + >>> np.argsort(x) + array([1, 2, 0]) + + Two-dimensional array: + + >>> x = np.array([[0, 3], [2, 2]]) + >>> x + array([[0, 3], + [2, 2]]) + + >>> ind = np.argsort(x, axis=0) # sorts along first axis (down) + >>> ind + array([[0, 1], + [1, 0]]) + >>> np.take_along_axis(x, ind, axis=0) # same as np.sort(x, axis=0) + array([[0, 2], + [2, 3]]) + + >>> ind = np.argsort(x, axis=1) # sorts along last axis (across) + >>> ind + array([[0, 1], + [0, 1]]) + >>> np.take_along_axis(x, ind, axis=1) # same as np.sort(x, axis=1) + array([[0, 3], + [2, 2]]) + + Indices of the sorted elements of a N-dimensional array: + + >>> ind = np.unravel_index(np.argsort(x, axis=None), x.shape) + >>> ind + (array([0, 1, 1, 0]), array([0, 0, 1, 1])) + >>> x[ind] # same as np.sort(x, axis=None) + array([0, 2, 2, 3]) + + Sorting with keys: + + >>> x = np.array([(1, 0), (0, 1)], dtype=[('x', '>> x + array([(1, 0), (0, 1)], + dtype=[('x', '>> np.argsort(x, order=('x','y')) + array([1, 0]) + + >>> np.argsort(x, order=('y','x')) + array([0, 1]) + + """ + return _wrapfunc(a, 'argsort', axis=axis, kind=kind, order=order) + + +def _argmax_dispatcher(a, axis=None, out=None, *, keepdims=np._NoValue): + return (a, out) + + +@array_function_dispatch(_argmax_dispatcher) +def argmax(a, axis=None, out=None, *, keepdims=np._NoValue): + """ + Returns the indices of the maximum values along an axis. + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + By default, the index is into the flattened array, otherwise + along the specified axis. + out : array, optional + If provided, the result will be inserted into this array. It should + be of the appropriate shape and dtype. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. + + .. versionadded:: 1.22.0 + + Returns + ------- + index_array : ndarray of ints + Array of indices into the array. It has the same shape as `a.shape` + with the dimension along `axis` removed. If `keepdims` is set to True, + then the size of `axis` will be 1 with the resulting array having same + shape as `a.shape`. + + See Also + -------- + ndarray.argmax, argmin + amax : The maximum value along a given axis. + unravel_index : Convert a flat index into an index tuple. + take_along_axis : Apply ``np.expand_dims(index_array, axis)`` + from argmax to an array as if by calling max. + + Notes + ----- + In case of multiple occurrences of the maximum values, the indices + corresponding to the first occurrence are returned. + + Examples + -------- + >>> a = np.arange(6).reshape(2,3) + 10 + >>> a + array([[10, 11, 12], + [13, 14, 15]]) + >>> np.argmax(a) + 5 + >>> np.argmax(a, axis=0) + array([1, 1, 1]) + >>> np.argmax(a, axis=1) + array([2, 2]) + + Indexes of the maximal elements of a N-dimensional array: + + >>> ind = np.unravel_index(np.argmax(a, axis=None), a.shape) + >>> ind + (1, 2) + >>> a[ind] + 15 + + >>> b = np.arange(6) + >>> b[1] = 5 + >>> b + array([0, 5, 2, 3, 4, 5]) + >>> np.argmax(b) # Only the first occurrence is returned. + 1 + + >>> x = np.array([[4,2,3], [1,0,3]]) + >>> index_array = np.argmax(x, axis=-1) + >>> # Same as np.amax(x, axis=-1, keepdims=True) + >>> np.take_along_axis(x, np.expand_dims(index_array, axis=-1), axis=-1) + array([[4], + [3]]) + >>> # Same as np.amax(x, axis=-1) + >>> np.take_along_axis(x, np.expand_dims(index_array, axis=-1), axis=-1).squeeze(axis=-1) + array([4, 3]) + + Setting `keepdims` to `True`, + + >>> x = np.arange(24).reshape((2, 3, 4)) + >>> res = np.argmax(x, axis=1, keepdims=True) + >>> res.shape + (2, 1, 4) + """ + kwds = {'keepdims': keepdims} if keepdims is not np._NoValue else {} + return _wrapfunc(a, 'argmax', axis=axis, out=out, **kwds) + + +def _argmin_dispatcher(a, axis=None, out=None, *, keepdims=np._NoValue): + return (a, out) + + +@array_function_dispatch(_argmin_dispatcher) +def argmin(a, axis=None, out=None, *, keepdims=np._NoValue): + """ + Returns the indices of the minimum values along an axis. + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + By default, the index is into the flattened array, otherwise + along the specified axis. + out : array, optional + If provided, the result will be inserted into this array. It should + be of the appropriate shape and dtype. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. + + .. versionadded:: 1.22.0 + + Returns + ------- + index_array : ndarray of ints + Array of indices into the array. It has the same shape as `a.shape` + with the dimension along `axis` removed. If `keepdims` is set to True, + then the size of `axis` will be 1 with the resulting array having same + shape as `a.shape`. + + See Also + -------- + ndarray.argmin, argmax + amin : The minimum value along a given axis. + unravel_index : Convert a flat index into an index tuple. + take_along_axis : Apply ``np.expand_dims(index_array, axis)`` + from argmin to an array as if by calling min. + + Notes + ----- + In case of multiple occurrences of the minimum values, the indices + corresponding to the first occurrence are returned. + + Examples + -------- + >>> a = np.arange(6).reshape(2,3) + 10 + >>> a + array([[10, 11, 12], + [13, 14, 15]]) + >>> np.argmin(a) + 0 + >>> np.argmin(a, axis=0) + array([0, 0, 0]) + >>> np.argmin(a, axis=1) + array([0, 0]) + + Indices of the minimum elements of a N-dimensional array: + + >>> ind = np.unravel_index(np.argmin(a, axis=None), a.shape) + >>> ind + (0, 0) + >>> a[ind] + 10 + + >>> b = np.arange(6) + 10 + >>> b[4] = 10 + >>> b + array([10, 11, 12, 13, 10, 15]) + >>> np.argmin(b) # Only the first occurrence is returned. + 0 + + >>> x = np.array([[4,2,3], [1,0,3]]) + >>> index_array = np.argmin(x, axis=-1) + >>> # Same as np.amin(x, axis=-1, keepdims=True) + >>> np.take_along_axis(x, np.expand_dims(index_array, axis=-1), axis=-1) + array([[2], + [0]]) + >>> # Same as np.amax(x, axis=-1) + >>> np.take_along_axis(x, np.expand_dims(index_array, axis=-1), axis=-1).squeeze(axis=-1) + array([2, 0]) + + Setting `keepdims` to `True`, + + >>> x = np.arange(24).reshape((2, 3, 4)) + >>> res = np.argmin(x, axis=1, keepdims=True) + >>> res.shape + (2, 1, 4) + """ + kwds = {'keepdims': keepdims} if keepdims is not np._NoValue else {} + return _wrapfunc(a, 'argmin', axis=axis, out=out, **kwds) + + +def _searchsorted_dispatcher(a, v, side=None, sorter=None): + return (a, v, sorter) + + +@array_function_dispatch(_searchsorted_dispatcher) +def searchsorted(a, v, side='left', sorter=None): + """ + Find indices where elements should be inserted to maintain order. + + Find the indices into a sorted array `a` such that, if the + corresponding elements in `v` were inserted before the indices, the + order of `a` would be preserved. + + Assuming that `a` is sorted: + + ====== ============================ + `side` returned index `i` satisfies + ====== ============================ + left ``a[i-1] < v <= a[i]`` + right ``a[i-1] <= v < a[i]`` + ====== ============================ + + Parameters + ---------- + a : 1-D array_like + Input array. If `sorter` is None, then it must be sorted in + ascending order, otherwise `sorter` must be an array of indices + that sort it. + v : array_like + Values to insert into `a`. + side : {'left', 'right'}, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable + index, return either 0 or N (where N is the length of `a`). + sorter : 1-D array_like, optional + Optional array of integer indices that sort array a into ascending + order. They are typically the result of argsort. + + .. versionadded:: 1.7.0 + + Returns + ------- + indices : int or array of ints + Array of insertion points with the same shape as `v`, + or an integer if `v` is a scalar. + + See Also + -------- + sort : Return a sorted copy of an array. + histogram : Produce histogram from 1-D data. + + Notes + ----- + Binary search is used to find the required insertion points. + + As of NumPy 1.4.0 `searchsorted` works with real/complex arrays containing + `nan` values. The enhanced sort order is documented in `sort`. + + This function uses the same algorithm as the builtin python `bisect.bisect_left` + (``side='left'``) and `bisect.bisect_right` (``side='right'``) functions, + which is also vectorized in the `v` argument. + + Examples + -------- + >>> np.searchsorted([1,2,3,4,5], 3) + 2 + >>> np.searchsorted([1,2,3,4,5], 3, side='right') + 3 + >>> np.searchsorted([1,2,3,4,5], [-10, 10, 2, 3]) + array([0, 5, 1, 2]) + + """ + return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter) + + +def _resize_dispatcher(a, new_shape): + return (a,) + + +@array_function_dispatch(_resize_dispatcher) +def resize(a, new_shape): + """ + Return a new array with the specified shape. + + If the new array is larger than the original array, then the new + array is filled with repeated copies of `a`. Note that this behavior + is different from a.resize(new_shape) which fills with zeros instead + of repeated copies of `a`. + + Parameters + ---------- + a : array_like + Array to be resized. + + new_shape : int or tuple of int + Shape of resized array. + + Returns + ------- + reshaped_array : ndarray + The new array is formed from the data in the old array, repeated + if necessary to fill out the required number of elements. The + data are repeated iterating over the array in C-order. + + See Also + -------- + numpy.reshape : Reshape an array without changing the total size. + numpy.pad : Enlarge and pad an array. + numpy.repeat : Repeat elements of an array. + ndarray.resize : resize an array in-place. + + Notes + ----- + When the total size of the array does not change `~numpy.reshape` should + be used. In most other cases either indexing (to reduce the size) + or padding (to increase the size) may be a more appropriate solution. + + Warning: This functionality does **not** consider axes separately, + i.e. it does not apply interpolation/extrapolation. + It fills the return array with the required number of elements, iterating + over `a` in C-order, disregarding axes (and cycling back from the start if + the new shape is larger). This functionality is therefore not suitable to + resize images, or data where each axis represents a separate and distinct + entity. + + Examples + -------- + >>> a=np.array([[0,1],[2,3]]) + >>> np.resize(a,(2,3)) + array([[0, 1, 2], + [3, 0, 1]]) + >>> np.resize(a,(1,4)) + array([[0, 1, 2, 3]]) + >>> np.resize(a,(2,4)) + array([[0, 1, 2, 3], + [0, 1, 2, 3]]) + + """ + if isinstance(new_shape, (int, nt.integer)): + new_shape = (new_shape,) + + a = ravel(a) + + new_size = 1 + for dim_length in new_shape: + new_size *= dim_length + if dim_length < 0: + raise ValueError('all elements of `new_shape` must be non-negative') + + if a.size == 0 or new_size == 0: + # First case must zero fill. The second would have repeats == 0. + return np.zeros_like(a, shape=new_shape) + + repeats = -(-new_size // a.size) # ceil division + a = concatenate((a,) * repeats)[:new_size] + + return reshape(a, new_shape) + + +def _squeeze_dispatcher(a, axis=None): + return (a,) + + +@array_function_dispatch(_squeeze_dispatcher) +def squeeze(a, axis=None): + """ + Remove axes of length one from `a`. + + Parameters + ---------- + a : array_like + Input data. + axis : None or int or tuple of ints, optional + .. versionadded:: 1.7.0 + + Selects a subset of the entries of length one in the + shape. If an axis is selected with shape entry greater than + one, an error is raised. + + Returns + ------- + squeezed : ndarray + The input array, but with all or a subset of the + dimensions of length 1 removed. This is always `a` itself + or a view into `a`. Note that if all axes are squeezed, + the result is a 0d array and not a scalar. + + Raises + ------ + ValueError + If `axis` is not None, and an axis being squeezed is not of length 1 + + See Also + -------- + expand_dims : The inverse operation, adding entries of length one + reshape : Insert, remove, and combine dimensions, and resize existing ones + + Examples + -------- + >>> x = np.array([[[0], [1], [2]]]) + >>> x.shape + (1, 3, 1) + >>> np.squeeze(x).shape + (3,) + >>> np.squeeze(x, axis=0).shape + (3, 1) + >>> np.squeeze(x, axis=1).shape + Traceback (most recent call last): + ... + ValueError: cannot select an axis to squeeze out which has size not equal to one + >>> np.squeeze(x, axis=2).shape + (1, 3) + >>> x = np.array([[1234]]) + >>> x.shape + (1, 1) + >>> np.squeeze(x) + array(1234) # 0d array + >>> np.squeeze(x).shape + () + >>> np.squeeze(x)[()] + 1234 + + """ + try: + squeeze = a.squeeze + except AttributeError: + return _wrapit(a, 'squeeze', axis=axis) + if axis is None: + return squeeze() + else: + return squeeze(axis=axis) + + +def _diagonal_dispatcher(a, offset=None, axis1=None, axis2=None): + return (a,) + + +@array_function_dispatch(_diagonal_dispatcher) +def diagonal(a, offset=0, axis1=0, axis2=1): + """ + Return specified diagonals. + + If `a` is 2-D, returns the diagonal of `a` with the given offset, + i.e., the collection of elements of the form ``a[i, i+offset]``. If + `a` has more than two dimensions, then the axes specified by `axis1` + and `axis2` are used to determine the 2-D sub-array whose diagonal is + returned. The shape of the resulting array can be determined by + removing `axis1` and `axis2` and appending an index to the right equal + to the size of the resulting diagonals. + + In versions of NumPy prior to 1.7, this function always returned a new, + independent array containing a copy of the values in the diagonal. + + In NumPy 1.7 and 1.8, it continues to return a copy of the diagonal, + but depending on this fact is deprecated. Writing to the resulting + array continues to work as it used to, but a FutureWarning is issued. + + Starting in NumPy 1.9 it returns a read-only view on the original array. + Attempting to write to the resulting array will produce an error. + + In some future release, it will return a read/write view and writing to + the returned array will alter your original array. The returned array + will have the same type as the input array. + + If you don't write to the array returned by this function, then you can + just ignore all of the above. + + If you depend on the current behavior, then we suggest copying the + returned array explicitly, i.e., use ``np.diagonal(a).copy()`` instead + of just ``np.diagonal(a)``. This will work with both past and future + versions of NumPy. + + Parameters + ---------- + a : array_like + Array from which the diagonals are taken. + offset : int, optional + Offset of the diagonal from the main diagonal. Can be positive or + negative. Defaults to main diagonal (0). + axis1 : int, optional + Axis to be used as the first axis of the 2-D sub-arrays from which + the diagonals should be taken. Defaults to first axis (0). + axis2 : int, optional + Axis to be used as the second axis of the 2-D sub-arrays from + which the diagonals should be taken. Defaults to second axis (1). + + Returns + ------- + array_of_diagonals : ndarray + If `a` is 2-D, then a 1-D array containing the diagonal and of the + same type as `a` is returned unless `a` is a `matrix`, in which case + a 1-D array rather than a (2-D) `matrix` is returned in order to + maintain backward compatibility. + + If ``a.ndim > 2``, then the dimensions specified by `axis1` and `axis2` + are removed, and a new axis inserted at the end corresponding to the + diagonal. + + Raises + ------ + ValueError + If the dimension of `a` is less than 2. + + See Also + -------- + diag : MATLAB work-a-like for 1-D and 2-D arrays. + diagflat : Create diagonal arrays. + trace : Sum along diagonals. + + Examples + -------- + >>> a = np.arange(4).reshape(2,2) + >>> a + array([[0, 1], + [2, 3]]) + >>> a.diagonal() + array([0, 3]) + >>> a.diagonal(1) + array([1]) + + A 3-D example: + + >>> a = np.arange(8).reshape(2,2,2); a + array([[[0, 1], + [2, 3]], + [[4, 5], + [6, 7]]]) + >>> a.diagonal(0, # Main diagonals of two arrays created by skipping + ... 0, # across the outer(left)-most axis last and + ... 1) # the "middle" (row) axis first. + array([[0, 6], + [1, 7]]) + + The sub-arrays whose main diagonals we just obtained; note that each + corresponds to fixing the right-most (column) axis, and that the + diagonals are "packed" in rows. + + >>> a[:,:,0] # main diagonal is [0 6] + array([[0, 2], + [4, 6]]) + >>> a[:,:,1] # main diagonal is [1 7] + array([[1, 3], + [5, 7]]) + + The anti-diagonal can be obtained by reversing the order of elements + using either `numpy.flipud` or `numpy.fliplr`. + + >>> a = np.arange(9).reshape(3, 3) + >>> a + array([[0, 1, 2], + [3, 4, 5], + [6, 7, 8]]) + >>> np.fliplr(a).diagonal() # Horizontal flip + array([2, 4, 6]) + >>> np.flipud(a).diagonal() # Vertical flip + array([6, 4, 2]) + + Note that the order in which the diagonal is retrieved varies depending + on the flip function. + """ + if isinstance(a, np.matrix): + # Make diagonal of matrix 1-D to preserve backward compatibility. + return asarray(a).diagonal(offset=offset, axis1=axis1, axis2=axis2) + else: + return asanyarray(a).diagonal(offset=offset, axis1=axis1, axis2=axis2) + + +def _trace_dispatcher( + a, offset=None, axis1=None, axis2=None, dtype=None, out=None): + return (a, out) + + +@array_function_dispatch(_trace_dispatcher) +def trace(a, offset=0, axis1=0, axis2=1, dtype=None, out=None): + """ + Return the sum along diagonals of the array. + + If `a` is 2-D, the sum along its diagonal with the given offset + is returned, i.e., the sum of elements ``a[i,i+offset]`` for all i. + + If `a` has more than two dimensions, then the axes specified by axis1 and + axis2 are used to determine the 2-D sub-arrays whose traces are returned. + The shape of the resulting array is the same as that of `a` with `axis1` + and `axis2` removed. + + Parameters + ---------- + a : array_like + Input array, from which the diagonals are taken. + offset : int, optional + Offset of the diagonal from the main diagonal. Can be both positive + and negative. Defaults to 0. + axis1, axis2 : int, optional + Axes to be used as the first and second axis of the 2-D sub-arrays + from which the diagonals should be taken. Defaults are the first two + axes of `a`. + dtype : dtype, optional + Determines the data-type of the returned array and of the accumulator + where the elements are summed. If dtype has the value None and `a` is + of integer type of precision less than the default integer + precision, then the default integer precision is used. Otherwise, + the precision is the same as that of `a`. + out : ndarray, optional + Array into which the output is placed. Its type is preserved and + it must be of the right shape to hold the output. + + Returns + ------- + sum_along_diagonals : ndarray + If `a` is 2-D, the sum along the diagonal is returned. If `a` has + larger dimensions, then an array of sums along diagonals is returned. + + See Also + -------- + diag, diagonal, diagflat + + Examples + -------- + >>> np.trace(np.eye(3)) + 3.0 + >>> a = np.arange(8).reshape((2,2,2)) + >>> np.trace(a) + array([6, 8]) + + >>> a = np.arange(24).reshape((2,2,2,3)) + >>> np.trace(a).shape + (2, 3) + + """ + if isinstance(a, np.matrix): + # Get trace of matrix via an array to preserve backward compatibility. + return asarray(a).trace(offset=offset, axis1=axis1, axis2=axis2, dtype=dtype, out=out) + else: + return asanyarray(a).trace(offset=offset, axis1=axis1, axis2=axis2, dtype=dtype, out=out) + + +def _ravel_dispatcher(a, order=None): + return (a,) + + +@array_function_dispatch(_ravel_dispatcher) +def ravel(a, order='C'): + """Return a contiguous flattened array. + + A 1-D array, containing the elements of the input, is returned. A copy is + made only if needed. + + As of NumPy 1.10, the returned array will have the same type as the input + array. (for example, a masked array will be returned for a masked array + input) + + Parameters + ---------- + a : array_like + Input array. The elements in `a` are read in the order specified by + `order`, and packed as a 1-D array. + order : {'C','F', 'A', 'K'}, optional + + The elements of `a` are read using this index order. 'C' means + to index the elements in row-major, C-style order, + with the last axis index changing fastest, back to the first + axis index changing slowest. 'F' means to index the elements + in column-major, Fortran-style order, with the + first index changing fastest, and the last index changing + slowest. Note that the 'C' and 'F' options take no account of + the memory layout of the underlying array, and only refer to + the order of axis indexing. 'A' means to read the elements in + Fortran-like index order if `a` is Fortran *contiguous* in + memory, C-like order otherwise. 'K' means to read the + elements in the order they occur in memory, except for + reversing the data when strides are negative. By default, 'C' + index order is used. + + Returns + ------- + y : array_like + y is an array of the same subtype as `a`, with shape ``(a.size,)``. + Note that matrices are special cased for backward compatibility, if `a` + is a matrix, then y is a 1-D ndarray. + + See Also + -------- + ndarray.flat : 1-D iterator over an array. + ndarray.flatten : 1-D array copy of the elements of an array + in row-major order. + ndarray.reshape : Change the shape of an array without changing its data. + + Notes + ----- + In row-major, C-style order, in two dimensions, the row index + varies the slowest, and the column index the quickest. This can + be generalized to multiple dimensions, where row-major order + implies that the index along the first axis varies slowest, and + the index along the last quickest. The opposite holds for + column-major, Fortran-style index ordering. + + When a view is desired in as many cases as possible, ``arr.reshape(-1)`` + may be preferable. + + Examples + -------- + It is equivalent to ``reshape(-1, order=order)``. + + >>> x = np.array([[1, 2, 3], [4, 5, 6]]) + >>> np.ravel(x) + array([1, 2, 3, 4, 5, 6]) + + >>> x.reshape(-1) + array([1, 2, 3, 4, 5, 6]) + + >>> np.ravel(x, order='F') + array([1, 4, 2, 5, 3, 6]) + + When ``order`` is 'A', it will preserve the array's 'C' or 'F' ordering: + + >>> np.ravel(x.T) + array([1, 4, 2, 5, 3, 6]) + >>> np.ravel(x.T, order='A') + array([1, 2, 3, 4, 5, 6]) + + When ``order`` is 'K', it will preserve orderings that are neither 'C' + nor 'F', but won't reverse axes: + + >>> a = np.arange(3)[::-1]; a + array([2, 1, 0]) + >>> a.ravel(order='C') + array([2, 1, 0]) + >>> a.ravel(order='K') + array([2, 1, 0]) + + >>> a = np.arange(12).reshape(2,3,2).swapaxes(1,2); a + array([[[ 0, 2, 4], + [ 1, 3, 5]], + [[ 6, 8, 10], + [ 7, 9, 11]]]) + >>> a.ravel(order='C') + array([ 0, 2, 4, 1, 3, 5, 6, 8, 10, 7, 9, 11]) + >>> a.ravel(order='K') + array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) + + """ + if isinstance(a, np.matrix): + return asarray(a).ravel(order=order) + else: + return asanyarray(a).ravel(order=order) + + +def _nonzero_dispatcher(a): + return (a,) + + +@array_function_dispatch(_nonzero_dispatcher) +def nonzero(a): + """ + Return the indices of the elements that are non-zero. + + Returns a tuple of arrays, one for each dimension of `a`, + containing the indices of the non-zero elements in that + dimension. The values in `a` are always tested and returned in + row-major, C-style order. + + To group the indices by element, rather than dimension, use `argwhere`, + which returns a row for each non-zero element. + + .. note:: + + When called on a zero-d array or scalar, ``nonzero(a)`` is treated + as ``nonzero(atleast_1d(a))``. + + .. deprecated:: 1.17.0 + + Use `atleast_1d` explicitly if this behavior is deliberate. + + Parameters + ---------- + a : array_like + Input array. + + Returns + ------- + tuple_of_arrays : tuple + Indices of elements that are non-zero. + + See Also + -------- + flatnonzero : + Return indices that are non-zero in the flattened version of the input + array. + ndarray.nonzero : + Equivalent ndarray method. + count_nonzero : + Counts the number of non-zero elements in the input array. + + Notes + ----- + While the nonzero values can be obtained with ``a[nonzero(a)]``, it is + recommended to use ``x[x.astype(bool)]`` or ``x[x != 0]`` instead, which + will correctly handle 0-d arrays. + + Examples + -------- + >>> x = np.array([[3, 0, 0], [0, 4, 0], [5, 6, 0]]) + >>> x + array([[3, 0, 0], + [0, 4, 0], + [5, 6, 0]]) + >>> np.nonzero(x) + (array([0, 1, 2, 2]), array([0, 1, 0, 1])) + + >>> x[np.nonzero(x)] + array([3, 4, 5, 6]) + >>> np.transpose(np.nonzero(x)) + array([[0, 0], + [1, 1], + [2, 0], + [2, 1]]) + + A common use for ``nonzero`` is to find the indices of an array, where + a condition is True. Given an array `a`, the condition `a` > 3 is a + boolean array and since False is interpreted as 0, np.nonzero(a > 3) + yields the indices of the `a` where the condition is true. + + >>> a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + >>> a > 3 + array([[False, False, False], + [ True, True, True], + [ True, True, True]]) + >>> np.nonzero(a > 3) + (array([1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2])) + + Using this result to index `a` is equivalent to using the mask directly: + + >>> a[np.nonzero(a > 3)] + array([4, 5, 6, 7, 8, 9]) + >>> a[a > 3] # prefer this spelling + array([4, 5, 6, 7, 8, 9]) + + ``nonzero`` can also be called as a method of the array. + + >>> (a > 3).nonzero() + (array([1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2])) + + """ + return _wrapfunc(a, 'nonzero') + + +def _shape_dispatcher(a): + return (a,) + + +@array_function_dispatch(_shape_dispatcher) +def shape(a): + """ + Return the shape of an array. + + Parameters + ---------- + a : array_like + Input array. + + Returns + ------- + shape : tuple of ints + The elements of the shape tuple give the lengths of the + corresponding array dimensions. + + See Also + -------- + len + ndarray.shape : Equivalent array method. + + Examples + -------- + >>> np.shape(np.eye(3)) + (3, 3) + >>> np.shape([[1, 2]]) + (1, 2) + >>> np.shape([0]) + (1,) + >>> np.shape(0) + () + + >>> a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) + >>> np.shape(a) + (2,) + >>> a.shape + (2,) + + """ + try: + result = a.shape + except AttributeError: + result = asarray(a).shape + return result + + +def _compress_dispatcher(condition, a, axis=None, out=None): + return (condition, a, out) + + +@array_function_dispatch(_compress_dispatcher) +def compress(condition, a, axis=None, out=None): + """ + Return selected slices of an array along given axis. + + When working along a given axis, a slice along that axis is returned in + `output` for each index where `condition` evaluates to True. When + working on a 1-D array, `compress` is equivalent to `extract`. + + Parameters + ---------- + condition : 1-D array of bools + Array that selects which entries to return. If len(condition) + is less than the size of `a` along the given axis, then output is + truncated to the length of the condition array. + a : array_like + Array from which to extract a part. + axis : int, optional + Axis along which to take slices. If None (default), work on the + flattened array. + out : ndarray, optional + Output array. Its type is preserved and it must be of the right + shape to hold the output. + + Returns + ------- + compressed_array : ndarray + A copy of `a` without the slices along axis for which `condition` + is false. + + See Also + -------- + take, choose, diag, diagonal, select + ndarray.compress : Equivalent method in ndarray + extract : Equivalent method when working on 1-D arrays + :ref:`ufuncs-output-type` + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4], [5, 6]]) + >>> a + array([[1, 2], + [3, 4], + [5, 6]]) + >>> np.compress([0, 1], a, axis=0) + array([[3, 4]]) + >>> np.compress([False, True, True], a, axis=0) + array([[3, 4], + [5, 6]]) + >>> np.compress([False, True], a, axis=1) + array([[2], + [4], + [6]]) + + Working on the flattened array does not return slices along an axis but + selects elements. + + >>> np.compress([False, True], a) + array([2]) + + """ + return _wrapfunc(a, 'compress', condition, axis=axis, out=out) + + +def _clip_dispatcher(a, a_min, a_max, out=None, **kwargs): + return (a, a_min, a_max) + + +@array_function_dispatch(_clip_dispatcher) +def clip(a, a_min, a_max, out=None, **kwargs): + """ + Clip (limit) the values in an array. + + Given an interval, values outside the interval are clipped to + the interval edges. For example, if an interval of ``[0, 1]`` + is specified, values smaller than 0 become 0, and values larger + than 1 become 1. + + Equivalent to but faster than ``np.minimum(a_max, np.maximum(a, a_min))``. + + No check is performed to ensure ``a_min < a_max``. + + Parameters + ---------- + a : array_like + Array containing elements to clip. + a_min, a_max : array_like or None + Minimum and maximum value. If ``None``, clipping is not performed on + the corresponding edge. Only one of `a_min` and `a_max` may be + ``None``. Both are broadcast against `a`. + out : ndarray, optional + The results will be placed in this array. It may be the input + array for in-place clipping. `out` must be of the right shape + to hold the output. Its type is preserved. + **kwargs + For other keyword-only arguments, see the + :ref:`ufunc docs `. + + .. versionadded:: 1.17.0 + + Returns + ------- + clipped_array : ndarray + An array with the elements of `a`, but where values + < `a_min` are replaced with `a_min`, and those > `a_max` + with `a_max`. + + See Also + -------- + :ref:`ufuncs-output-type` + + Notes + ----- + When `a_min` is greater than `a_max`, `clip` returns an + array in which all values are equal to `a_max`, + as shown in the second example. + + Examples + -------- + >>> a = np.arange(10) + >>> a + array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + >>> np.clip(a, 1, 8) + array([1, 1, 2, 3, 4, 5, 6, 7, 8, 8]) + >>> np.clip(a, 8, 1) + array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) + >>> np.clip(a, 3, 6, out=a) + array([3, 3, 3, 3, 4, 5, 6, 6, 6, 6]) + >>> a + array([3, 3, 3, 3, 4, 5, 6, 6, 6, 6]) + >>> a = np.arange(10) + >>> a + array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + >>> np.clip(a, [3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8) + array([3, 4, 2, 3, 4, 5, 6, 7, 8, 8]) + + """ + return _wrapfunc(a, 'clip', a_min, a_max, out=out, **kwargs) + + +def _sum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None, + initial=None, where=None): + return (a, out) + + +@array_function_dispatch(_sum_dispatcher) +def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, + initial=np._NoValue, where=np._NoValue): + """ + Sum of array elements over a given axis. + + Parameters + ---------- + a : array_like + Elements to sum. + axis : None or int or tuple of ints, optional + Axis or axes along which a sum is performed. The default, + axis=None, will sum all of the elements of the input array. If + axis is negative it counts from the last to the first axis. + + .. versionadded:: 1.7.0 + + If axis is a tuple of ints, a sum is performed on all of the axes + specified in the tuple instead of a single axis or all the axes as + before. + dtype : dtype, optional + The type of the returned array and of the accumulator in which the + elements are summed. The dtype of `a` is used by default unless `a` + has an integer dtype of less precision than the default platform + integer. In that case, if `a` is signed then the platform integer + is used while if `a` is unsigned then an unsigned integer of the + same precision as the platform integer is used. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output, but the type of the output + values will be cast if necessary. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `sum` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + initial : scalar, optional + Starting value for the sum. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.15.0 + + where : array_like of bool, optional + Elements to include in the sum. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.17.0 + + Returns + ------- + sum_along_axis : ndarray + An array with the same shape as `a`, with the specified + axis removed. If `a` is a 0-d array, or if `axis` is None, a scalar + is returned. If an output array is specified, a reference to + `out` is returned. + + See Also + -------- + ndarray.sum : Equivalent method. + + add.reduce : Equivalent functionality of `add`. + + cumsum : Cumulative sum of array elements. + + trapz : Integration of array values using the composite trapezoidal rule. + + mean, average + + Notes + ----- + Arithmetic is modular when using integer types, and no error is + raised on overflow. + + The sum of an empty array is the neutral element 0: + + >>> np.sum([]) + 0.0 + + For floating point numbers the numerical precision of sum (and + ``np.add.reduce``) is in general limited by directly adding each number + individually to the result causing rounding errors in every step. + However, often numpy will use a numerically better approach (partial + pairwise summation) leading to improved precision in many use-cases. + This improved precision is always provided when no ``axis`` is given. + When ``axis`` is given, it will depend on which axis is summed. + Technically, to provide the best speed possible, the improved precision + is only used when the summation is along the fast axis in memory. + Note that the exact precision may vary depending on other parameters. + In contrast to NumPy, Python's ``math.fsum`` function uses a slower but + more precise approach to summation. + Especially when summing a large number of lower precision floating point + numbers, such as ``float32``, numerical errors can become significant. + In such cases it can be advisable to use `dtype="float64"` to use a higher + precision for the output. + + Examples + -------- + >>> np.sum([0.5, 1.5]) + 2.0 + >>> np.sum([0.5, 0.7, 0.2, 1.5], dtype=np.int32) + 1 + >>> np.sum([[0, 1], [0, 5]]) + 6 + >>> np.sum([[0, 1], [0, 5]], axis=0) + array([0, 6]) + >>> np.sum([[0, 1], [0, 5]], axis=1) + array([1, 5]) + >>> np.sum([[0, 1], [np.nan, 5]], where=[False, True], axis=1) + array([1., 5.]) + + If the accumulator is too small, overflow occurs: + + >>> np.ones(128, dtype=np.int8).sum(dtype=np.int8) + -128 + + You can also start the sum with a value other than zero: + + >>> np.sum([10], initial=5) + 15 + """ + if isinstance(a, _gentype): + # 2018-02-25, 1.15.0 + warnings.warn( + "Calling np.sum(generator) is deprecated, and in the future will give a different result. " + "Use np.sum(np.fromiter(generator)) or the python sum builtin instead.", + DeprecationWarning, stacklevel=3) + + res = _sum_(a) + if out is not None: + out[...] = res + return out + return res + + return _wrapreduction(a, np.add, 'sum', axis, dtype, out, keepdims=keepdims, + initial=initial, where=where) + + +def _any_dispatcher(a, axis=None, out=None, keepdims=None, *, + where=np._NoValue): + return (a, where, out) + + +@array_function_dispatch(_any_dispatcher) +def any(a, axis=None, out=None, keepdims=np._NoValue, *, where=np._NoValue): + """ + Test whether any array element along a given axis evaluates to True. + + Returns single boolean unless `axis` is not ``None`` + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + axis : None or int or tuple of ints, optional + Axis or axes along which a logical OR reduction is performed. + The default (``axis=None``) is to perform a logical OR over all + the dimensions of the input array. `axis` may be negative, in + which case it counts from the last to the first axis. + + .. versionadded:: 1.7.0 + + If this is a tuple of ints, a reduction is performed on multiple + axes, instead of a single axis or all the axes as before. + out : ndarray, optional + Alternate output array in which to place the result. It must have + the same shape as the expected output and its type is preserved + (e.g., if it is of type float, then it will remain so, returning + 1.0 for True and 0.0 for False, regardless of the type of `a`). + See :ref:`ufuncs-output-type` for more details. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `any` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + where : array_like of bool, optional + Elements to include in checking for any `True` values. + See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.20.0 + + Returns + ------- + any : bool or ndarray + A new boolean or `ndarray` is returned unless `out` is specified, + in which case a reference to `out` is returned. + + See Also + -------- + ndarray.any : equivalent method + + all : Test whether all elements along a given axis evaluate to True. + + Notes + ----- + Not a Number (NaN), positive infinity and negative infinity evaluate + to `True` because these are not equal to zero. + + Examples + -------- + >>> np.any([[True, False], [True, True]]) + True + + >>> np.any([[True, False], [False, False]], axis=0) + array([ True, False]) + + >>> np.any([-1, 0, 5]) + True + + >>> np.any(np.nan) + True + + >>> np.any([[True, False], [False, False]], where=[[False], [True]]) + False + + >>> o=np.array(False) + >>> z=np.any([-1, 4, 5], out=o) + >>> z, o + (array(True), array(True)) + >>> # Check now that z is a reference to o + >>> z is o + True + >>> id(z), id(o) # identity of z and o # doctest: +SKIP + (191614240, 191614240) + + """ + return _wrapreduction(a, np.logical_or, 'any', axis, None, out, + keepdims=keepdims, where=where) + + +def _all_dispatcher(a, axis=None, out=None, keepdims=None, *, + where=None): + return (a, where, out) + + +@array_function_dispatch(_all_dispatcher) +def all(a, axis=None, out=None, keepdims=np._NoValue, *, where=np._NoValue): + """ + Test whether all array elements along a given axis evaluate to True. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + axis : None or int or tuple of ints, optional + Axis or axes along which a logical AND reduction is performed. + The default (``axis=None``) is to perform a logical AND over all + the dimensions of the input array. `axis` may be negative, in + which case it counts from the last to the first axis. + + .. versionadded:: 1.7.0 + + If this is a tuple of ints, a reduction is performed on multiple + axes, instead of a single axis or all the axes as before. + out : ndarray, optional + Alternate output array in which to place the result. + It must have the same shape as the expected output and its + type is preserved (e.g., if ``dtype(out)`` is float, the result + will consist of 0.0's and 1.0's). See :ref:`ufuncs-output-type` for more + details. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `all` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + where : array_like of bool, optional + Elements to include in checking for all `True` values. + See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.20.0 + + Returns + ------- + all : ndarray, bool + A new boolean or array is returned unless `out` is specified, + in which case a reference to `out` is returned. + + See Also + -------- + ndarray.all : equivalent method + + any : Test whether any element along a given axis evaluates to True. + + Notes + ----- + Not a Number (NaN), positive infinity and negative infinity + evaluate to `True` because these are not equal to zero. + + Examples + -------- + >>> np.all([[True,False],[True,True]]) + False + + >>> np.all([[True,False],[True,True]], axis=0) + array([ True, False]) + + >>> np.all([-1, 4, 5]) + True + + >>> np.all([1.0, np.nan]) + True + + >>> np.all([[True, True], [False, True]], where=[[True], [False]]) + True + + >>> o=np.array(False) + >>> z=np.all([-1, 4, 5], out=o) + >>> id(z), id(o), z + (28293632, 28293632, array(True)) # may vary + + """ + return _wrapreduction(a, np.logical_and, 'all', axis, None, out, + keepdims=keepdims, where=where) + + +def _cumsum_dispatcher(a, axis=None, dtype=None, out=None): + return (a, out) + + +@array_function_dispatch(_cumsum_dispatcher) +def cumsum(a, axis=None, dtype=None, out=None): + """ + Return the cumulative sum of the elements along a given axis. + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + Axis along which the cumulative sum is computed. The default + (None) is to compute the cumsum over the flattened array. + dtype : dtype, optional + Type of the returned array and of the accumulator in which the + elements are summed. If `dtype` is not specified, it defaults + to the dtype of `a`, unless `a` has an integer dtype with a + precision less than that of the default platform integer. In + that case, the default platform integer is used. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output + but the type will be cast if necessary. See :ref:`ufuncs-output-type` for + more details. + + Returns + ------- + cumsum_along_axis : ndarray. + A new array holding the result is returned unless `out` is + specified, in which case a reference to `out` is returned. The + result has the same size as `a`, and the same shape as `a` if + `axis` is not None or `a` is a 1-d array. + + See Also + -------- + sum : Sum array elements. + trapz : Integration of array values using the composite trapezoidal rule. + diff : Calculate the n-th discrete difference along given axis. + + Notes + ----- + Arithmetic is modular when using integer types, and no error is + raised on overflow. + + ``cumsum(a)[-1]`` may not be equal to ``sum(a)`` for floating-point + values since ``sum`` may use a pairwise summation routine, reducing + the roundoff-error. See `sum` for more information. + + Examples + -------- + >>> a = np.array([[1,2,3], [4,5,6]]) + >>> a + array([[1, 2, 3], + [4, 5, 6]]) + >>> np.cumsum(a) + array([ 1, 3, 6, 10, 15, 21]) + >>> np.cumsum(a, dtype=float) # specifies type of output value(s) + array([ 1., 3., 6., 10., 15., 21.]) + + >>> np.cumsum(a,axis=0) # sum over rows for each of the 3 columns + array([[1, 2, 3], + [5, 7, 9]]) + >>> np.cumsum(a,axis=1) # sum over columns for each of the 2 rows + array([[ 1, 3, 6], + [ 4, 9, 15]]) + + ``cumsum(b)[-1]`` may not be equal to ``sum(b)`` + + >>> b = np.array([1, 2e-9, 3e-9] * 1000000) + >>> b.cumsum()[-1] + 1000000.0050045159 + >>> b.sum() + 1000000.0050000029 + + """ + return _wrapfunc(a, 'cumsum', axis=axis, dtype=dtype, out=out) + + +def _ptp_dispatcher(a, axis=None, out=None, keepdims=None): + return (a, out) + + +@array_function_dispatch(_ptp_dispatcher) +def ptp(a, axis=None, out=None, keepdims=np._NoValue): + """ + Range of values (maximum - minimum) along an axis. + + The name of the function comes from the acronym for 'peak to peak'. + + .. warning:: + `ptp` preserves the data type of the array. This means the + return value for an input of signed integers with n bits + (e.g. `np.int8`, `np.int16`, etc) is also a signed integer + with n bits. In that case, peak-to-peak values greater than + ``2**(n-1)-1`` will be returned as negative values. An example + with a work-around is shown below. + + Parameters + ---------- + a : array_like + Input values. + axis : None or int or tuple of ints, optional + Axis along which to find the peaks. By default, flatten the + array. `axis` may be negative, in + which case it counts from the last to the first axis. + + .. versionadded:: 1.15.0 + + If this is a tuple of ints, a reduction is performed on multiple + axes, instead of a single axis or all the axes as before. + out : array_like + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type of the output values will be cast if necessary. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `ptp` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + Returns + ------- + ptp : ndarray + A new array holding the result, unless `out` was + specified, in which case a reference to `out` is returned. + + Examples + -------- + >>> x = np.array([[4, 9, 2, 10], + ... [6, 9, 7, 12]]) + + >>> np.ptp(x, axis=1) + array([8, 6]) + + >>> np.ptp(x, axis=0) + array([2, 0, 5, 2]) + + >>> np.ptp(x) + 10 + + This example shows that a negative value can be returned when + the input is an array of signed integers. + + >>> y = np.array([[1, 127], + ... [0, 127], + ... [-1, 127], + ... [-2, 127]], dtype=np.int8) + >>> np.ptp(y, axis=1) + array([ 126, 127, -128, -127], dtype=int8) + + A work-around is to use the `view()` method to view the result as + unsigned integers with the same bit width: + + >>> np.ptp(y, axis=1).view(np.uint8) + array([126, 127, 128, 129], dtype=uint8) + + """ + kwargs = {} + if keepdims is not np._NoValue: + kwargs['keepdims'] = keepdims + if type(a) is not mu.ndarray: + try: + ptp = a.ptp + except AttributeError: + pass + else: + return ptp(axis=axis, out=out, **kwargs) + return _methods._ptp(a, axis=axis, out=out, **kwargs) + + +def _amax_dispatcher(a, axis=None, out=None, keepdims=None, initial=None, + where=None): + return (a, out) + + +@array_function_dispatch(_amax_dispatcher) +def amax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue, + where=np._NoValue): + """ + Return the maximum of an array or maximum along an axis. + + Parameters + ---------- + a : array_like + Input data. + axis : None or int or tuple of ints, optional + Axis or axes along which to operate. By default, flattened input is + used. + + .. versionadded:: 1.7.0 + + If this is a tuple of ints, the maximum is selected over multiple axes, + instead of a single axis or all the axes as before. + out : ndarray, optional + Alternative output array in which to place the result. Must + be of the same shape and buffer length as the expected output. + See :ref:`ufuncs-output-type` for more details. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `amax` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + initial : scalar, optional + The minimum value of an output element. Must be present to allow + computation on empty slice. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.15.0 + + where : array_like of bool, optional + Elements to compare for the maximum. See `~numpy.ufunc.reduce` + for details. + + .. versionadded:: 1.17.0 + + Returns + ------- + amax : ndarray or scalar + Maximum of `a`. If `axis` is None, the result is a scalar value. + If `axis` is given, the result is an array of dimension + ``a.ndim - 1``. + + See Also + -------- + amin : + The minimum value of an array along a given axis, propagating any NaNs. + nanmax : + The maximum value of an array along a given axis, ignoring any NaNs. + maximum : + Element-wise maximum of two arrays, propagating any NaNs. + fmax : + Element-wise maximum of two arrays, ignoring any NaNs. + argmax : + Return the indices of the maximum values. + + nanmin, minimum, fmin + + Notes + ----- + NaN values are propagated, that is if at least one item is NaN, the + corresponding max value will be NaN as well. To ignore NaN values + (MATLAB behavior), please use nanmax. + + Don't use `amax` for element-wise comparison of 2 arrays; when + ``a.shape[0]`` is 2, ``maximum(a[0], a[1])`` is faster than + ``amax(a, axis=0)``. + + Examples + -------- + >>> a = np.arange(4).reshape((2,2)) + >>> a + array([[0, 1], + [2, 3]]) + >>> np.amax(a) # Maximum of the flattened array + 3 + >>> np.amax(a, axis=0) # Maxima along the first axis + array([2, 3]) + >>> np.amax(a, axis=1) # Maxima along the second axis + array([1, 3]) + >>> np.amax(a, where=[False, True], initial=-1, axis=0) + array([-1, 3]) + >>> b = np.arange(5, dtype=float) + >>> b[2] = np.NaN + >>> np.amax(b) + nan + >>> np.amax(b, where=~np.isnan(b), initial=-1) + 4.0 + >>> np.nanmax(b) + 4.0 + + You can use an initial value to compute the maximum of an empty slice, or + to initialize it to a different value: + + >>> np.amax([[-50], [10]], axis=-1, initial=0) + array([ 0, 10]) + + Notice that the initial value is used as one of the elements for which the + maximum is determined, unlike for the default argument Python's max + function, which is only used for empty iterables. + + >>> np.amax([5], initial=6) + 6 + >>> max([5], default=6) + 5 + """ + return _wrapreduction(a, np.maximum, 'max', axis, None, out, + keepdims=keepdims, initial=initial, where=where) + + +def _amin_dispatcher(a, axis=None, out=None, keepdims=None, initial=None, + where=None): + return (a, out) + + +@array_function_dispatch(_amin_dispatcher) +def amin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue, + where=np._NoValue): + """ + Return the minimum of an array or minimum along an axis. + + Parameters + ---------- + a : array_like + Input data. + axis : None or int or tuple of ints, optional + Axis or axes along which to operate. By default, flattened input is + used. + + .. versionadded:: 1.7.0 + + If this is a tuple of ints, the minimum is selected over multiple axes, + instead of a single axis or all the axes as before. + out : ndarray, optional + Alternative output array in which to place the result. Must + be of the same shape and buffer length as the expected output. + See :ref:`ufuncs-output-type` for more details. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `amin` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + initial : scalar, optional + The maximum value of an output element. Must be present to allow + computation on empty slice. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.15.0 + + where : array_like of bool, optional + Elements to compare for the minimum. See `~numpy.ufunc.reduce` + for details. + + .. versionadded:: 1.17.0 + + Returns + ------- + amin : ndarray or scalar + Minimum of `a`. If `axis` is None, the result is a scalar value. + If `axis` is given, the result is an array of dimension + ``a.ndim - 1``. + + See Also + -------- + amax : + The maximum value of an array along a given axis, propagating any NaNs. + nanmin : + The minimum value of an array along a given axis, ignoring any NaNs. + minimum : + Element-wise minimum of two arrays, propagating any NaNs. + fmin : + Element-wise minimum of two arrays, ignoring any NaNs. + argmin : + Return the indices of the minimum values. + + nanmax, maximum, fmax + + Notes + ----- + NaN values are propagated, that is if at least one item is NaN, the + corresponding min value will be NaN as well. To ignore NaN values + (MATLAB behavior), please use nanmin. + + Don't use `amin` for element-wise comparison of 2 arrays; when + ``a.shape[0]`` is 2, ``minimum(a[0], a[1])`` is faster than + ``amin(a, axis=0)``. + + Examples + -------- + >>> a = np.arange(4).reshape((2,2)) + >>> a + array([[0, 1], + [2, 3]]) + >>> np.amin(a) # Minimum of the flattened array + 0 + >>> np.amin(a, axis=0) # Minima along the first axis + array([0, 1]) + >>> np.amin(a, axis=1) # Minima along the second axis + array([0, 2]) + >>> np.amin(a, where=[False, True], initial=10, axis=0) + array([10, 1]) + + >>> b = np.arange(5, dtype=float) + >>> b[2] = np.NaN + >>> np.amin(b) + nan + >>> np.amin(b, where=~np.isnan(b), initial=10) + 0.0 + >>> np.nanmin(b) + 0.0 + + >>> np.amin([[-50], [10]], axis=-1, initial=0) + array([-50, 0]) + + Notice that the initial value is used as one of the elements for which the + minimum is determined, unlike for the default argument Python's max + function, which is only used for empty iterables. + + Notice that this isn't the same as Python's ``default`` argument. + + >>> np.amin([6], initial=5) + 5 + >>> min([6], default=5) + 6 + """ + return _wrapreduction(a, np.minimum, 'min', axis, None, out, + keepdims=keepdims, initial=initial, where=where) + + +def _alen_dispathcer(a): + return (a,) + + +@array_function_dispatch(_alen_dispathcer) +def alen(a): + """ + Return the length of the first dimension of the input array. + + .. deprecated:: 1.18 + `numpy.alen` is deprecated, use `len` instead. + + Parameters + ---------- + a : array_like + Input array. + + Returns + ------- + alen : int + Length of the first dimension of `a`. + + See Also + -------- + shape, size + + Examples + -------- + >>> a = np.zeros((7,4,5)) + >>> a.shape[0] + 7 + >>> np.alen(a) + 7 + + """ + # NumPy 1.18.0, 2019-08-02 + warnings.warn( + "`np.alen` is deprecated, use `len` instead", + DeprecationWarning, stacklevel=2) + try: + return len(a) + except TypeError: + return len(array(a, ndmin=1)) + + +def _prod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None, + initial=None, where=None): + return (a, out) + + +@array_function_dispatch(_prod_dispatcher) +def prod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, + initial=np._NoValue, where=np._NoValue): + """ + Return the product of array elements over a given axis. + + Parameters + ---------- + a : array_like + Input data. + axis : None or int or tuple of ints, optional + Axis or axes along which a product is performed. The default, + axis=None, will calculate the product of all the elements in the + input array. If axis is negative it counts from the last to the + first axis. + + .. versionadded:: 1.7.0 + + If axis is a tuple of ints, a product is performed on all of the + axes specified in the tuple instead of a single axis or all the + axes as before. + dtype : dtype, optional + The type of the returned array, as well as of the accumulator in + which the elements are multiplied. The dtype of `a` is used by + default unless `a` has an integer dtype of less precision than the + default platform integer. In that case, if `a` is signed then the + platform integer is used while if `a` is unsigned then an unsigned + integer of the same precision as the platform integer is used. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output, but the type of the output + values will be cast if necessary. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in the + result as dimensions with size one. With this option, the result + will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `prod` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + initial : scalar, optional + The starting value for this product. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.15.0 + + where : array_like of bool, optional + Elements to include in the product. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.17.0 + + Returns + ------- + product_along_axis : ndarray, see `dtype` parameter above. + An array shaped as `a` but with the specified axis removed. + Returns a reference to `out` if specified. + + See Also + -------- + ndarray.prod : equivalent method + :ref:`ufuncs-output-type` + + Notes + ----- + Arithmetic is modular when using integer types, and no error is + raised on overflow. That means that, on a 32-bit platform: + + >>> x = np.array([536870910, 536870910, 536870910, 536870910]) + >>> np.prod(x) + 16 # may vary + + The product of an empty array is the neutral element 1: + + >>> np.prod([]) + 1.0 + + Examples + -------- + By default, calculate the product of all elements: + + >>> np.prod([1.,2.]) + 2.0 + + Even when the input array is two-dimensional: + + >>> np.prod([[1.,2.],[3.,4.]]) + 24.0 + + But we can also specify the axis over which to multiply: + + >>> np.prod([[1.,2.],[3.,4.]], axis=1) + array([ 2., 12.]) + + Or select specific elements to include: + + >>> np.prod([1., np.nan, 3.], where=[True, False, True]) + 3.0 + + If the type of `x` is unsigned, then the output type is + the unsigned platform integer: + + >>> x = np.array([1, 2, 3], dtype=np.uint8) + >>> np.prod(x).dtype == np.uint + True + + If `x` is of a signed integer type, then the output type + is the default platform integer: + + >>> x = np.array([1, 2, 3], dtype=np.int8) + >>> np.prod(x).dtype == int + True + + You can also start the product with a value other than one: + + >>> np.prod([1, 2], initial=5) + 10 + """ + return _wrapreduction(a, np.multiply, 'prod', axis, dtype, out, + keepdims=keepdims, initial=initial, where=where) + + +def _cumprod_dispatcher(a, axis=None, dtype=None, out=None): + return (a, out) + + +@array_function_dispatch(_cumprod_dispatcher) +def cumprod(a, axis=None, dtype=None, out=None): + """ + Return the cumulative product of elements along a given axis. + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + Axis along which the cumulative product is computed. By default + the input is flattened. + dtype : dtype, optional + Type of the returned array, as well as of the accumulator in which + the elements are multiplied. If *dtype* is not specified, it + defaults to the dtype of `a`, unless `a` has an integer dtype with + a precision less than that of the default platform integer. In + that case, the default platform integer is used instead. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output + but the type of the resulting values will be cast if necessary. + + Returns + ------- + cumprod : ndarray + A new array holding the result is returned unless `out` is + specified, in which case a reference to out is returned. + + See Also + -------- + :ref:`ufuncs-output-type` + + Notes + ----- + Arithmetic is modular when using integer types, and no error is + raised on overflow. + + Examples + -------- + >>> a = np.array([1,2,3]) + >>> np.cumprod(a) # intermediate results 1, 1*2 + ... # total product 1*2*3 = 6 + array([1, 2, 6]) + >>> a = np.array([[1, 2, 3], [4, 5, 6]]) + >>> np.cumprod(a, dtype=float) # specify type of output + array([ 1., 2., 6., 24., 120., 720.]) + + The cumulative product for each column (i.e., over the rows) of `a`: + + >>> np.cumprod(a, axis=0) + array([[ 1, 2, 3], + [ 4, 10, 18]]) + + The cumulative product for each row (i.e. over the columns) of `a`: + + >>> np.cumprod(a,axis=1) + array([[ 1, 2, 6], + [ 4, 20, 120]]) + + """ + return _wrapfunc(a, 'cumprod', axis=axis, dtype=dtype, out=out) + + +def _ndim_dispatcher(a): + return (a,) + + +@array_function_dispatch(_ndim_dispatcher) +def ndim(a): + """ + Return the number of dimensions of an array. + + Parameters + ---------- + a : array_like + Input array. If it is not already an ndarray, a conversion is + attempted. + + Returns + ------- + number_of_dimensions : int + The number of dimensions in `a`. Scalars are zero-dimensional. + + See Also + -------- + ndarray.ndim : equivalent method + shape : dimensions of array + ndarray.shape : dimensions of array + + Examples + -------- + >>> np.ndim([[1,2,3],[4,5,6]]) + 2 + >>> np.ndim(np.array([[1,2,3],[4,5,6]])) + 2 + >>> np.ndim(1) + 0 + + """ + try: + return a.ndim + except AttributeError: + return asarray(a).ndim + + +def _size_dispatcher(a, axis=None): + return (a,) + + +@array_function_dispatch(_size_dispatcher) +def size(a, axis=None): + """ + Return the number of elements along a given axis. + + Parameters + ---------- + a : array_like + Input data. + axis : int, optional + Axis along which the elements are counted. By default, give + the total number of elements. + + Returns + ------- + element_count : int + Number of elements along the specified axis. + + See Also + -------- + shape : dimensions of array + ndarray.shape : dimensions of array + ndarray.size : number of elements in array + + Examples + -------- + >>> a = np.array([[1,2,3],[4,5,6]]) + >>> np.size(a) + 6 + >>> np.size(a,1) + 3 + >>> np.size(a,0) + 2 + + """ + if axis is None: + try: + return a.size + except AttributeError: + return asarray(a).size + else: + try: + return a.shape[axis] + except AttributeError: + return asarray(a).shape[axis] + + +def _around_dispatcher(a, decimals=None, out=None): + return (a, out) + + +@array_function_dispatch(_around_dispatcher) +def around(a, decimals=0, out=None): + """ + Evenly round to the given number of decimals. + + Parameters + ---------- + a : array_like + Input data. + decimals : int, optional + Number of decimal places to round to (default: 0). If + decimals is negative, it specifies the number of positions to + the left of the decimal point. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output, but the type of the output + values will be cast if necessary. See :ref:`ufuncs-output-type` for more + details. + + Returns + ------- + rounded_array : ndarray + An array of the same type as `a`, containing the rounded values. + Unless `out` was specified, a new array is created. A reference to + the result is returned. + + The real and imaginary parts of complex numbers are rounded + separately. The result of rounding a float is a float. + + See Also + -------- + ndarray.round : equivalent method + + ceil, fix, floor, rint, trunc + + + Notes + ----- + For values exactly halfway between rounded decimal values, NumPy + rounds to the nearest even value. Thus 1.5 and 2.5 round to 2.0, + -0.5 and 0.5 round to 0.0, etc. + + ``np.around`` uses a fast but sometimes inexact algorithm to round + floating-point datatypes. For positive `decimals` it is equivalent to + ``np.true_divide(np.rint(a * 10**decimals), 10**decimals)``, which has + error due to the inexact representation of decimal fractions in the IEEE + floating point standard [1]_ and errors introduced when scaling by powers + of ten. For instance, note the extra "1" in the following: + + >>> np.round(56294995342131.5, 3) + 56294995342131.51 + + If your goal is to print such values with a fixed number of decimals, it is + preferable to use numpy's float printing routines to limit the number of + printed decimals: + + >>> np.format_float_positional(56294995342131.5, precision=3) + '56294995342131.5' + + The float printing routines use an accurate but much more computationally + demanding algorithm to compute the number of digits after the decimal + point. + + Alternatively, Python's builtin `round` function uses a more accurate + but slower algorithm for 64-bit floating point values: + + >>> round(56294995342131.5, 3) + 56294995342131.5 + >>> np.round(16.055, 2), round(16.055, 2) # equals 16.0549999999999997 + (16.06, 16.05) + + + References + ---------- + .. [1] "Lecture Notes on the Status of IEEE 754", William Kahan, + https://people.eecs.berkeley.edu/~wkahan/ieee754status/IEEE754.PDF + + Examples + -------- + >>> np.around([0.37, 1.64]) + array([0., 2.]) + >>> np.around([0.37, 1.64], decimals=1) + array([0.4, 1.6]) + >>> np.around([.5, 1.5, 2.5, 3.5, 4.5]) # rounds to nearest even value + array([0., 2., 2., 4., 4.]) + >>> np.around([1,2,3,11], decimals=1) # ndarray of ints is returned + array([ 1, 2, 3, 11]) + >>> np.around([1,2,3,11], decimals=-1) + array([ 0, 0, 0, 10]) + + """ + return _wrapfunc(a, 'round', decimals=decimals, out=out) + + +def _mean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None, *, + where=None): + return (a, where, out) + + +@array_function_dispatch(_mean_dispatcher) +def mean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, *, + where=np._NoValue): + """ + Compute the arithmetic mean along the specified axis. + + Returns the average of the array elements. The average is taken over + the flattened array by default, otherwise over the specified axis. + `float64` intermediate and return values are used for integer inputs. + + Parameters + ---------- + a : array_like + Array containing numbers whose mean is desired. If `a` is not an + array, a conversion is attempted. + axis : None or int or tuple of ints, optional + Axis or axes along which the means are computed. The default is to + compute the mean of the flattened array. + + .. versionadded:: 1.7.0 + + If this is a tuple of ints, a mean is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default + is `float64`; for floating point inputs, it is the same as the + input dtype. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. + See :ref:`ufuncs-output-type` for more details. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `mean` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + where : array_like of bool, optional + Elements to include in the mean. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.20.0 + + Returns + ------- + m : ndarray, see dtype parameter above + If `out=None`, returns a new array containing the mean values, + otherwise a reference to the output array is returned. + + See Also + -------- + average : Weighted average + std, var, nanmean, nanstd, nanvar + + Notes + ----- + The arithmetic mean is the sum of the elements along the axis divided + by the number of elements. + + Note that for floating-point input, the mean is computed using the + same precision the input has. Depending on the input data, this can + cause the results to be inaccurate, especially for `float32` (see + example below). Specifying a higher-precision accumulator using the + `dtype` keyword can alleviate this issue. + + By default, `float16` results are computed using `float32` intermediates + for extra precision. + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.mean(a) + 2.5 + >>> np.mean(a, axis=0) + array([2., 3.]) + >>> np.mean(a, axis=1) + array([1.5, 3.5]) + + In single precision, `mean` can be inaccurate: + + >>> a = np.zeros((2, 512*512), dtype=np.float32) + >>> a[0, :] = 1.0 + >>> a[1, :] = 0.1 + >>> np.mean(a) + 0.54999924 + + Computing the mean in float64 is more accurate: + + >>> np.mean(a, dtype=np.float64) + 0.55000000074505806 # may vary + + Specifying a where argument: + >>> a = np.array([[5, 9, 13], [14, 10, 12], [11, 15, 19]]) + >>> np.mean(a) + 12.0 + >>> np.mean(a, where=[[True], [False], [False]]) + 9.0 + + """ + kwargs = {} + if keepdims is not np._NoValue: + kwargs['keepdims'] = keepdims + if where is not np._NoValue: + kwargs['where'] = where + if type(a) is not mu.ndarray: + try: + mean = a.mean + except AttributeError: + pass + else: + return mean(axis=axis, dtype=dtype, out=out, **kwargs) + + return _methods._mean(a, axis=axis, dtype=dtype, + out=out, **kwargs) + + +def _std_dispatcher(a, axis=None, dtype=None, out=None, ddof=None, + keepdims=None, *, where=None): + return (a, where, out) + + +@array_function_dispatch(_std_dispatcher) +def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue, *, + where=np._NoValue): + """ + Compute the standard deviation along the specified axis. + + Returns the standard deviation, a measure of the spread of a distribution, + of the array elements. The standard deviation is computed for the + flattened array by default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Calculate the standard deviation of these values. + axis : None or int or tuple of ints, optional + Axis or axes along which the standard deviation is computed. The + default is to compute the standard deviation of the flattened array. + + .. versionadded:: 1.7.0 + + If this is a tuple of ints, a standard deviation is performed over + multiple axes, instead of a single axis or all the axes as before. + dtype : dtype, optional + Type to use in computing the standard deviation. For arrays of + integer type the default is float64, for arrays of float types it is + the same as the array type. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output but the type (of the calculated + values) will be cast if necessary. + ddof : int, optional + Means Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + By default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `std` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + where : array_like of bool, optional + Elements to include in the standard deviation. + See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.20.0 + + Returns + ------- + standard_deviation : ndarray, see dtype parameter above. + If `out` is None, return a new array containing the standard deviation, + otherwise return a reference to the output array. + + See Also + -------- + var, mean, nanmean, nanstd, nanvar + :ref:`ufuncs-output-type` + + Notes + ----- + The standard deviation is the square root of the average of the squared + deviations from the mean, i.e., ``std = sqrt(mean(x))``, where + ``x = abs(a - a.mean())**2``. + + The average squared deviation is typically calculated as ``x.sum() / N``, + where ``N = len(x)``. If, however, `ddof` is specified, the divisor + ``N - ddof`` is used instead. In standard statistical practice, ``ddof=1`` + provides an unbiased estimator of the variance of the infinite population. + ``ddof=0`` provides a maximum likelihood estimate of the variance for + normally distributed variables. The standard deviation computed in this + function is the square root of the estimated variance, so even with + ``ddof=1``, it will not be an unbiased estimate of the standard deviation + per se. + + Note that, for complex numbers, `std` takes the absolute + value before squaring, so that the result is always real and nonnegative. + + For floating-point input, the *std* is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for float32 (see example below). + Specifying a higher-accuracy accumulator using the `dtype` keyword can + alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.std(a) + 1.1180339887498949 # may vary + >>> np.std(a, axis=0) + array([1., 1.]) + >>> np.std(a, axis=1) + array([0.5, 0.5]) + + In single precision, std() can be inaccurate: + + >>> a = np.zeros((2, 512*512), dtype=np.float32) + >>> a[0, :] = 1.0 + >>> a[1, :] = 0.1 + >>> np.std(a) + 0.45000005 + + Computing the standard deviation in float64 is more accurate: + + >>> np.std(a, dtype=np.float64) + 0.44999999925494177 # may vary + + Specifying a where argument: + + >>> a = np.array([[14, 8, 11, 10], [7, 9, 10, 11], [10, 15, 5, 10]]) + >>> np.std(a) + 2.614064523559687 # may vary + >>> np.std(a, where=[[True], [True], [False]]) + 2.0 + + """ + kwargs = {} + if keepdims is not np._NoValue: + kwargs['keepdims'] = keepdims + if where is not np._NoValue: + kwargs['where'] = where + if type(a) is not mu.ndarray: + try: + std = a.std + except AttributeError: + pass + else: + return std(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs) + + return _methods._std(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + **kwargs) + + +def _var_dispatcher(a, axis=None, dtype=None, out=None, ddof=None, + keepdims=None, *, where=None): + return (a, where, out) + + +@array_function_dispatch(_var_dispatcher) +def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue, *, + where=np._NoValue): + """ + Compute the variance along the specified axis. + + Returns the variance of the array elements, a measure of the spread of a + distribution. The variance is computed for the flattened array by + default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Array containing numbers whose variance is desired. If `a` is not an + array, a conversion is attempted. + axis : None or int or tuple of ints, optional + Axis or axes along which the variance is computed. The default is to + compute the variance of the flattened array. + + .. versionadded:: 1.7.0 + + If this is a tuple of ints, a variance is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the variance. For arrays of integer type + the default is `float64`; for arrays of float types it is the same as + the array type. + out : ndarray, optional + Alternate output array in which to place the result. It must have + the same shape as the expected output, but the type is cast if + necessary. + ddof : int, optional + "Delta Degrees of Freedom": the divisor used in the calculation is + ``N - ddof``, where ``N`` represents the number of elements. By + default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `var` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + where : array_like of bool, optional + Elements to include in the variance. See `~numpy.ufunc.reduce` for + details. + + .. versionadded:: 1.20.0 + + Returns + ------- + variance : ndarray, see dtype parameter above + If ``out=None``, returns a new array containing the variance; + otherwise, a reference to the output array is returned. + + See Also + -------- + std, mean, nanmean, nanstd, nanvar + :ref:`ufuncs-output-type` + + Notes + ----- + The variance is the average of the squared deviations from the mean, + i.e., ``var = mean(x)``, where ``x = abs(a - a.mean())**2``. + + The mean is typically calculated as ``x.sum() / N``, where ``N = len(x)``. + If, however, `ddof` is specified, the divisor ``N - ddof`` is used + instead. In standard statistical practice, ``ddof=1`` provides an + unbiased estimator of the variance of a hypothetical infinite population. + ``ddof=0`` provides a maximum likelihood estimate of the variance for + normally distributed variables. + + Note that for complex numbers, the absolute value is taken before + squaring, so that the result is always real and nonnegative. + + For floating-point input, the variance is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for `float32` (see example + below). Specifying a higher-accuracy accumulator using the ``dtype`` + keyword can alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.var(a) + 1.25 + >>> np.var(a, axis=0) + array([1., 1.]) + >>> np.var(a, axis=1) + array([0.25, 0.25]) + + In single precision, var() can be inaccurate: + + >>> a = np.zeros((2, 512*512), dtype=np.float32) + >>> a[0, :] = 1.0 + >>> a[1, :] = 0.1 + >>> np.var(a) + 0.20250003 + + Computing the variance in float64 is more accurate: + + >>> np.var(a, dtype=np.float64) + 0.20249999932944759 # may vary + >>> ((1-0.55)**2 + (0.1-0.55)**2)/2 + 0.2025 + + Specifying a where argument: + + >>> a = np.array([[14, 8, 11, 10], [7, 9, 10, 11], [10, 15, 5, 10]]) + >>> np.var(a) + 6.833333333333333 # may vary + >>> np.var(a, where=[[True], [True], [False]]) + 4.0 + + """ + kwargs = {} + if keepdims is not np._NoValue: + kwargs['keepdims'] = keepdims + if where is not np._NoValue: + kwargs['where'] = where + + if type(a) is not mu.ndarray: + try: + var = a.var + + except AttributeError: + pass + else: + return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs) + + return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + **kwargs) + + +# Aliases of other functions. These have their own definitions only so that +# they can have unique docstrings. + +@array_function_dispatch(_around_dispatcher) +def round_(a, decimals=0, out=None): + """ + Round an array to the given number of decimals. + + See Also + -------- + around : equivalent function; see for details. + """ + return around(a, decimals=decimals, out=out) + + +@array_function_dispatch(_prod_dispatcher, verify=False) +def product(*args, **kwargs): + """ + Return the product of array elements over a given axis. + + See Also + -------- + prod : equivalent function; see for details. + """ + return prod(*args, **kwargs) + + +@array_function_dispatch(_cumprod_dispatcher, verify=False) +def cumproduct(*args, **kwargs): + """ + Return the cumulative product over the given axis. + + See Also + -------- + cumprod : equivalent function; see for details. + """ + return cumprod(*args, **kwargs) + + +@array_function_dispatch(_any_dispatcher, verify=False) +def sometrue(*args, **kwargs): + """ + Check whether some values are true. + + Refer to `any` for full documentation. + + See Also + -------- + any : equivalent function; see for details. + """ + return any(*args, **kwargs) + + +@array_function_dispatch(_all_dispatcher, verify=False) +def alltrue(*args, **kwargs): + """ + Check if all elements of input array are true. + + See Also + -------- + numpy.all : Equivalent function; see for details. + """ + return all(*args, **kwargs) diff --git a/.local/lib/python3.8/site-packages/numpy/core/fromnumeric.pyi b/.local/lib/python3.8/site-packages/numpy/core/fromnumeric.pyi new file mode 100644 index 0000000..4a5e505 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/fromnumeric.pyi @@ -0,0 +1,377 @@ +import datetime as dt +from typing import Optional, Union, Sequence, Tuple, Any, overload, TypeVar, Literal + +from numpy import ( + ndarray, + number, + integer, + intp, + bool_, + generic, + _OrderKACF, + _OrderACF, + _ModeKind, + _PartitionKind, + _SortKind, + _SortSide, +) +from numpy.typing import ( + DTypeLike, + ArrayLike, + _ShapeLike, + _Shape, + _ArrayLikeBool_co, + _ArrayLikeInt_co, + _NumberLike_co, +) + +# Various annotations for scalars + +# While dt.datetime and dt.timedelta are not technically part of NumPy, +# they are one of the rare few builtin scalars which serve as valid return types. +# See https://github.com/numpy/numpy-stubs/pull/67#discussion_r412604113. +_ScalarNumpy = Union[generic, dt.datetime, dt.timedelta] +_ScalarBuiltin = Union[str, bytes, dt.date, dt.timedelta, bool, int, float, complex] +_Scalar = Union[_ScalarBuiltin, _ScalarNumpy] + +# Integers and booleans can generally be used interchangeably +_ScalarGeneric = TypeVar("_ScalarGeneric", bound=generic) + +_Number = TypeVar("_Number", bound=number) + +# The signature of take() follows a common theme with its overloads: +# 1. A generic comes in; the same generic comes out +# 2. A scalar comes in; a generic comes out +# 3. An array-like object comes in; some keyword ensures that a generic comes out +# 4. An array-like object comes in; an ndarray or generic comes out +def take( + a: ArrayLike, + indices: _ArrayLikeInt_co, + axis: Optional[int] = ..., + out: Optional[ndarray] = ..., + mode: _ModeKind = ..., +) -> Any: ... + +def reshape( + a: ArrayLike, + newshape: _ShapeLike, + order: _OrderACF = ..., +) -> ndarray: ... + +def choose( + a: _ArrayLikeInt_co, + choices: ArrayLike, + out: Optional[ndarray] = ..., + mode: _ModeKind = ..., +) -> Any: ... + +def repeat( + a: ArrayLike, + repeats: _ArrayLikeInt_co, + axis: Optional[int] = ..., +) -> ndarray: ... + +def put( + a: ndarray, + ind: _ArrayLikeInt_co, + v: ArrayLike, + mode: _ModeKind = ..., +) -> None: ... + +def swapaxes( + a: ArrayLike, + axis1: int, + axis2: int, +) -> ndarray: ... + +def transpose( + a: ArrayLike, + axes: Union[None, Sequence[int], ndarray] = ... +) -> ndarray: ... + +def partition( + a: ArrayLike, + kth: _ArrayLikeInt_co, + axis: Optional[int] = ..., + kind: _PartitionKind = ..., + order: Union[None, str, Sequence[str]] = ..., +) -> ndarray: ... + +def argpartition( + a: ArrayLike, + kth: _ArrayLikeInt_co, + axis: Optional[int] = ..., + kind: _PartitionKind = ..., + order: Union[None, str, Sequence[str]] = ..., +) -> Any: ... + +def sort( + a: ArrayLike, + axis: Optional[int] = ..., + kind: Optional[_SortKind] = ..., + order: Union[None, str, Sequence[str]] = ..., +) -> ndarray: ... + +def argsort( + a: ArrayLike, + axis: Optional[int] = ..., + kind: Optional[_SortKind] = ..., + order: Union[None, str, Sequence[str]] = ..., +) -> ndarray: ... + +@overload +def argmax( + a: ArrayLike, + axis: None = ..., + out: Optional[ndarray] = ..., + *, + keepdims: Literal[False] = ..., +) -> intp: ... +@overload +def argmax( + a: ArrayLike, + axis: Optional[int] = ..., + out: Optional[ndarray] = ..., + *, + keepdims: bool = ..., +) -> Any: ... + +@overload +def argmin( + a: ArrayLike, + axis: None = ..., + out: Optional[ndarray] = ..., + *, + keepdims: Literal[False] = ..., +) -> intp: ... +@overload +def argmin( + a: ArrayLike, + axis: Optional[int] = ..., + out: Optional[ndarray] = ..., + *, + keepdims: bool = ..., +) -> Any: ... + +@overload +def searchsorted( + a: ArrayLike, + v: _Scalar, + side: _SortSide = ..., + sorter: Optional[_ArrayLikeInt_co] = ..., # 1D int array +) -> intp: ... +@overload +def searchsorted( + a: ArrayLike, + v: ArrayLike, + side: _SortSide = ..., + sorter: Optional[_ArrayLikeInt_co] = ..., # 1D int array +) -> ndarray: ... + +def resize( + a: ArrayLike, + new_shape: _ShapeLike, +) -> ndarray: ... + +@overload +def squeeze( + a: _ScalarGeneric, + axis: Optional[_ShapeLike] = ..., +) -> _ScalarGeneric: ... +@overload +def squeeze( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., +) -> ndarray: ... + +def diagonal( + a: ArrayLike, + offset: int = ..., + axis1: int = ..., + axis2: int = ..., # >= 2D array +) -> ndarray: ... + +def trace( + a: ArrayLike, # >= 2D array + offset: int = ..., + axis1: int = ..., + axis2: int = ..., + dtype: DTypeLike = ..., + out: Optional[ndarray] = ..., +) -> Any: ... + +def ravel(a: ArrayLike, order: _OrderKACF = ...) -> ndarray: ... + +def nonzero(a: ArrayLike) -> Tuple[ndarray, ...]: ... + +def shape(a: ArrayLike) -> _Shape: ... + +def compress( + condition: ArrayLike, # 1D bool array + a: ArrayLike, + axis: Optional[int] = ..., + out: Optional[ndarray] = ..., +) -> ndarray: ... + +@overload +def clip( + a: ArrayLike, + a_min: ArrayLike, + a_max: Optional[ArrayLike], + out: Optional[ndarray] = ..., + **kwargs: Any, +) -> Any: ... +@overload +def clip( + a: ArrayLike, + a_min: None, + a_max: ArrayLike, + out: Optional[ndarray] = ..., + **kwargs: Any, +) -> Any: ... + +def sum( + a: ArrayLike, + axis: _ShapeLike = ..., + dtype: DTypeLike = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., + initial: _NumberLike_co = ..., + where: _ArrayLikeBool_co = ..., +) -> Any: ... + +@overload +def all( + a: ArrayLike, + axis: None = ..., + out: None = ..., + keepdims: Literal[False] = ..., + *, + where: _ArrayLikeBool_co = ..., +) -> bool_: ... +@overload +def all( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., +) -> Any: ... + +@overload +def any( + a: ArrayLike, + axis: None = ..., + out: None = ..., + keepdims: Literal[False] = ..., + *, + where: _ArrayLikeBool_co = ..., +) -> bool_: ... +@overload +def any( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., +) -> Any: ... + +def cumsum( + a: ArrayLike, + axis: Optional[int] = ..., + dtype: DTypeLike = ..., + out: Optional[ndarray] = ..., +) -> ndarray: ... + +def ptp( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., +) -> Any: ... + +def amax( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., + initial: _NumberLike_co = ..., + where: _ArrayLikeBool_co = ..., +) -> Any: ... + +def amin( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., + initial: _NumberLike_co = ..., + where: _ArrayLikeBool_co = ..., +) -> Any: ... + +# TODO: `np.prod()``: For object arrays `initial` does not necessarily +# have to be a numerical scalar. +# The only requirement is that it is compatible +# with the `.__mul__()` method(s) of the passed array's elements. + +# Note that the same situation holds for all wrappers around +# `np.ufunc.reduce`, e.g. `np.sum()` (`.__add__()`). +def prod( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + dtype: DTypeLike = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., + initial: _NumberLike_co = ..., + where: _ArrayLikeBool_co = ..., +) -> Any: ... + +def cumprod( + a: ArrayLike, + axis: Optional[int] = ..., + dtype: DTypeLike = ..., + out: Optional[ndarray] = ..., +) -> ndarray: ... + +def ndim(a: ArrayLike) -> int: ... + +def size(a: ArrayLike, axis: Optional[int] = ...) -> int: ... + +def around( + a: ArrayLike, + decimals: int = ..., + out: Optional[ndarray] = ..., +) -> Any: ... + +def mean( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + dtype: DTypeLike = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., +) -> Any: ... + +def std( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + dtype: DTypeLike = ..., + out: Optional[ndarray] = ..., + ddof: int = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., +) -> Any: ... + +def var( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + dtype: DTypeLike = ..., + out: Optional[ndarray] = ..., + ddof: int = ..., + keepdims: bool = ..., + *, + where: _ArrayLikeBool_co = ..., +) -> Any: ... diff --git a/.local/lib/python3.8/site-packages/numpy/core/function_base.py b/.local/lib/python3.8/site-packages/numpy/core/function_base.py new file mode 100644 index 0000000..e940ac2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/function_base.py @@ -0,0 +1,529 @@ +import functools +import warnings +import operator +import types + +from . import numeric as _nx +from .numeric import result_type, NaN, asanyarray, ndim +from numpy.core.multiarray import add_docstring +from numpy.core import overrides + +__all__ = ['logspace', 'linspace', 'geomspace'] + + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + + +def _linspace_dispatcher(start, stop, num=None, endpoint=None, retstep=None, + dtype=None, axis=None): + return (start, stop) + + +@array_function_dispatch(_linspace_dispatcher) +def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, + axis=0): + """ + Return evenly spaced numbers over a specified interval. + + Returns `num` evenly spaced samples, calculated over the + interval [`start`, `stop`]. + + The endpoint of the interval can optionally be excluded. + + .. versionchanged:: 1.16.0 + Non-scalar `start` and `stop` are now supported. + + .. versionchanged:: 1.20.0 + Values are rounded towards ``-inf`` instead of ``0`` when an + integer ``dtype`` is specified. The old behavior can + still be obtained with ``np.linspace(start, stop, num).astype(int)`` + + Parameters + ---------- + start : array_like + The starting value of the sequence. + stop : array_like + The end value of the sequence, unless `endpoint` is set to False. + In that case, the sequence consists of all but the last of ``num + 1`` + evenly spaced samples, so that `stop` is excluded. Note that the step + size changes when `endpoint` is False. + num : int, optional + Number of samples to generate. Default is 50. Must be non-negative. + endpoint : bool, optional + If True, `stop` is the last sample. Otherwise, it is not included. + Default is True. + retstep : bool, optional + If True, return (`samples`, `step`), where `step` is the spacing + between samples. + dtype : dtype, optional + The type of the output array. If `dtype` is not given, the data type + is inferred from `start` and `stop`. The inferred dtype will never be + an integer; `float` is chosen even if the arguments would produce an + array of integers. + + .. versionadded:: 1.9.0 + + axis : int, optional + The axis in the result to store the samples. Relevant only if start + or stop are array-like. By default (0), the samples will be along a + new axis inserted at the beginning. Use -1 to get an axis at the end. + + .. versionadded:: 1.16.0 + + Returns + ------- + samples : ndarray + There are `num` equally spaced samples in the closed interval + ``[start, stop]`` or the half-open interval ``[start, stop)`` + (depending on whether `endpoint` is True or False). + step : float, optional + Only returned if `retstep` is True + + Size of spacing between samples. + + + See Also + -------- + arange : Similar to `linspace`, but uses a step size (instead of the + number of samples). + geomspace : Similar to `linspace`, but with numbers spaced evenly on a log + scale (a geometric progression). + logspace : Similar to `geomspace`, but with the end points specified as + logarithms. + + Examples + -------- + >>> np.linspace(2.0, 3.0, num=5) + array([2. , 2.25, 2.5 , 2.75, 3. ]) + >>> np.linspace(2.0, 3.0, num=5, endpoint=False) + array([2. , 2.2, 2.4, 2.6, 2.8]) + >>> np.linspace(2.0, 3.0, num=5, retstep=True) + (array([2. , 2.25, 2.5 , 2.75, 3. ]), 0.25) + + Graphical illustration: + + >>> import matplotlib.pyplot as plt + >>> N = 8 + >>> y = np.zeros(N) + >>> x1 = np.linspace(0, 10, N, endpoint=True) + >>> x2 = np.linspace(0, 10, N, endpoint=False) + >>> plt.plot(x1, y, 'o') + [] + >>> plt.plot(x2, y + 0.5, 'o') + [] + >>> plt.ylim([-0.5, 1]) + (-0.5, 1) + >>> plt.show() + + """ + num = operator.index(num) + if num < 0: + raise ValueError("Number of samples, %s, must be non-negative." % num) + div = (num - 1) if endpoint else num + + # Convert float/complex array scalars to float, gh-3504 + # and make sure one can use variables that have an __array_interface__, gh-6634 + start = asanyarray(start) * 1.0 + stop = asanyarray(stop) * 1.0 + + dt = result_type(start, stop, float(num)) + if dtype is None: + dtype = dt + + delta = stop - start + y = _nx.arange(0, num, dtype=dt).reshape((-1,) + (1,) * ndim(delta)) + # In-place multiplication y *= delta/div is faster, but prevents the multiplicant + # from overriding what class is produced, and thus prevents, e.g. use of Quantities, + # see gh-7142. Hence, we multiply in place only for standard scalar types. + _mult_inplace = _nx.isscalar(delta) + if div > 0: + step = delta / div + if _nx.any(step == 0): + # Special handling for denormal numbers, gh-5437 + y /= div + if _mult_inplace: + y *= delta + else: + y = y * delta + else: + if _mult_inplace: + y *= step + else: + y = y * step + else: + # sequences with 0 items or 1 item with endpoint=True (i.e. div <= 0) + # have an undefined step + step = NaN + # Multiply with delta to allow possible override of output class. + y = y * delta + + y += start + + if endpoint and num > 1: + y[-1] = stop + + if axis != 0: + y = _nx.moveaxis(y, 0, axis) + + if _nx.issubdtype(dtype, _nx.integer): + _nx.floor(y, out=y) + + if retstep: + return y.astype(dtype, copy=False), step + else: + return y.astype(dtype, copy=False) + + +def _logspace_dispatcher(start, stop, num=None, endpoint=None, base=None, + dtype=None, axis=None): + return (start, stop) + + +@array_function_dispatch(_logspace_dispatcher) +def logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None, + axis=0): + """ + Return numbers spaced evenly on a log scale. + + In linear space, the sequence starts at ``base ** start`` + (`base` to the power of `start`) and ends with ``base ** stop`` + (see `endpoint` below). + + .. versionchanged:: 1.16.0 + Non-scalar `start` and `stop` are now supported. + + Parameters + ---------- + start : array_like + ``base ** start`` is the starting value of the sequence. + stop : array_like + ``base ** stop`` is the final value of the sequence, unless `endpoint` + is False. In that case, ``num + 1`` values are spaced over the + interval in log-space, of which all but the last (a sequence of + length `num`) are returned. + num : integer, optional + Number of samples to generate. Default is 50. + endpoint : boolean, optional + If true, `stop` is the last sample. Otherwise, it is not included. + Default is True. + base : array_like, optional + The base of the log space. The step size between the elements in + ``ln(samples) / ln(base)`` (or ``log_base(samples)``) is uniform. + Default is 10.0. + dtype : dtype + The type of the output array. If `dtype` is not given, the data type + is inferred from `start` and `stop`. The inferred type will never be + an integer; `float` is chosen even if the arguments would produce an + array of integers. + axis : int, optional + The axis in the result to store the samples. Relevant only if start + or stop are array-like. By default (0), the samples will be along a + new axis inserted at the beginning. Use -1 to get an axis at the end. + + .. versionadded:: 1.16.0 + + + Returns + ------- + samples : ndarray + `num` samples, equally spaced on a log scale. + + See Also + -------- + arange : Similar to linspace, with the step size specified instead of the + number of samples. Note that, when used with a float endpoint, the + endpoint may or may not be included. + linspace : Similar to logspace, but with the samples uniformly distributed + in linear space, instead of log space. + geomspace : Similar to logspace, but with endpoints specified directly. + + Notes + ----- + Logspace is equivalent to the code + + >>> y = np.linspace(start, stop, num=num, endpoint=endpoint) + ... # doctest: +SKIP + >>> power(base, y).astype(dtype) + ... # doctest: +SKIP + + Examples + -------- + >>> np.logspace(2.0, 3.0, num=4) + array([ 100. , 215.443469 , 464.15888336, 1000. ]) + >>> np.logspace(2.0, 3.0, num=4, endpoint=False) + array([100. , 177.827941 , 316.22776602, 562.34132519]) + >>> np.logspace(2.0, 3.0, num=4, base=2.0) + array([4. , 5.0396842 , 6.34960421, 8. ]) + + Graphical illustration: + + >>> import matplotlib.pyplot as plt + >>> N = 10 + >>> x1 = np.logspace(0.1, 1, N, endpoint=True) + >>> x2 = np.logspace(0.1, 1, N, endpoint=False) + >>> y = np.zeros(N) + >>> plt.plot(x1, y, 'o') + [] + >>> plt.plot(x2, y + 0.5, 'o') + [] + >>> plt.ylim([-0.5, 1]) + (-0.5, 1) + >>> plt.show() + + """ + y = linspace(start, stop, num=num, endpoint=endpoint, axis=axis) + if dtype is None: + return _nx.power(base, y) + return _nx.power(base, y).astype(dtype, copy=False) + + +def _geomspace_dispatcher(start, stop, num=None, endpoint=None, dtype=None, + axis=None): + return (start, stop) + + +@array_function_dispatch(_geomspace_dispatcher) +def geomspace(start, stop, num=50, endpoint=True, dtype=None, axis=0): + """ + Return numbers spaced evenly on a log scale (a geometric progression). + + This is similar to `logspace`, but with endpoints specified directly. + Each output sample is a constant multiple of the previous. + + .. versionchanged:: 1.16.0 + Non-scalar `start` and `stop` are now supported. + + Parameters + ---------- + start : array_like + The starting value of the sequence. + stop : array_like + The final value of the sequence, unless `endpoint` is False. + In that case, ``num + 1`` values are spaced over the + interval in log-space, of which all but the last (a sequence of + length `num`) are returned. + num : integer, optional + Number of samples to generate. Default is 50. + endpoint : boolean, optional + If true, `stop` is the last sample. Otherwise, it is not included. + Default is True. + dtype : dtype + The type of the output array. If `dtype` is not given, the data type + is inferred from `start` and `stop`. The inferred dtype will never be + an integer; `float` is chosen even if the arguments would produce an + array of integers. + axis : int, optional + The axis in the result to store the samples. Relevant only if start + or stop are array-like. By default (0), the samples will be along a + new axis inserted at the beginning. Use -1 to get an axis at the end. + + .. versionadded:: 1.16.0 + + Returns + ------- + samples : ndarray + `num` samples, equally spaced on a log scale. + + See Also + -------- + logspace : Similar to geomspace, but with endpoints specified using log + and base. + linspace : Similar to geomspace, but with arithmetic instead of geometric + progression. + arange : Similar to linspace, with the step size specified instead of the + number of samples. + + Notes + ----- + If the inputs or dtype are complex, the output will follow a logarithmic + spiral in the complex plane. (There are an infinite number of spirals + passing through two points; the output will follow the shortest such path.) + + Examples + -------- + >>> np.geomspace(1, 1000, num=4) + array([ 1., 10., 100., 1000.]) + >>> np.geomspace(1, 1000, num=3, endpoint=False) + array([ 1., 10., 100.]) + >>> np.geomspace(1, 1000, num=4, endpoint=False) + array([ 1. , 5.62341325, 31.6227766 , 177.827941 ]) + >>> np.geomspace(1, 256, num=9) + array([ 1., 2., 4., 8., 16., 32., 64., 128., 256.]) + + Note that the above may not produce exact integers: + + >>> np.geomspace(1, 256, num=9, dtype=int) + array([ 1, 2, 4, 7, 16, 32, 63, 127, 256]) + >>> np.around(np.geomspace(1, 256, num=9)).astype(int) + array([ 1, 2, 4, 8, 16, 32, 64, 128, 256]) + + Negative, decreasing, and complex inputs are allowed: + + >>> np.geomspace(1000, 1, num=4) + array([1000., 100., 10., 1.]) + >>> np.geomspace(-1000, -1, num=4) + array([-1000., -100., -10., -1.]) + >>> np.geomspace(1j, 1000j, num=4) # Straight line + array([0. +1.j, 0. +10.j, 0. +100.j, 0.+1000.j]) + >>> np.geomspace(-1+0j, 1+0j, num=5) # Circle + array([-1.00000000e+00+1.22464680e-16j, -7.07106781e-01+7.07106781e-01j, + 6.12323400e-17+1.00000000e+00j, 7.07106781e-01+7.07106781e-01j, + 1.00000000e+00+0.00000000e+00j]) + + Graphical illustration of `endpoint` parameter: + + >>> import matplotlib.pyplot as plt + >>> N = 10 + >>> y = np.zeros(N) + >>> plt.semilogx(np.geomspace(1, 1000, N, endpoint=True), y + 1, 'o') + [] + >>> plt.semilogx(np.geomspace(1, 1000, N, endpoint=False), y + 2, 'o') + [] + >>> plt.axis([0.5, 2000, 0, 3]) + [0.5, 2000, 0, 3] + >>> plt.grid(True, color='0.7', linestyle='-', which='both', axis='both') + >>> plt.show() + + """ + start = asanyarray(start) + stop = asanyarray(stop) + if _nx.any(start == 0) or _nx.any(stop == 0): + raise ValueError('Geometric sequence cannot include zero') + + dt = result_type(start, stop, float(num), _nx.zeros((), dtype)) + if dtype is None: + dtype = dt + else: + # complex to dtype('complex128'), for instance + dtype = _nx.dtype(dtype) + + # Promote both arguments to the same dtype in case, for instance, one is + # complex and another is negative and log would produce NaN otherwise. + # Copy since we may change things in-place further down. + start = start.astype(dt, copy=True) + stop = stop.astype(dt, copy=True) + + out_sign = _nx.ones(_nx.broadcast(start, stop).shape, dt) + # Avoid negligible real or imaginary parts in output by rotating to + # positive real, calculating, then undoing rotation + if _nx.issubdtype(dt, _nx.complexfloating): + all_imag = (start.real == 0.) & (stop.real == 0.) + if _nx.any(all_imag): + start[all_imag] = start[all_imag].imag + stop[all_imag] = stop[all_imag].imag + out_sign[all_imag] = 1j + + both_negative = (_nx.sign(start) == -1) & (_nx.sign(stop) == -1) + if _nx.any(both_negative): + _nx.negative(start, out=start, where=both_negative) + _nx.negative(stop, out=stop, where=both_negative) + _nx.negative(out_sign, out=out_sign, where=both_negative) + + log_start = _nx.log10(start) + log_stop = _nx.log10(stop) + result = logspace(log_start, log_stop, num=num, + endpoint=endpoint, base=10.0, dtype=dtype) + + # Make sure the endpoints match the start and stop arguments. This is + # necessary because np.exp(np.log(x)) is not necessarily equal to x. + if num > 0: + result[0] = start + if num > 1 and endpoint: + result[-1] = stop + + result = out_sign * result + + if axis != 0: + result = _nx.moveaxis(result, 0, axis) + + return result.astype(dtype, copy=False) + + +def _needs_add_docstring(obj): + """ + Returns true if the only way to set the docstring of `obj` from python is + via add_docstring. + + This function errs on the side of being overly conservative. + """ + Py_TPFLAGS_HEAPTYPE = 1 << 9 + + if isinstance(obj, (types.FunctionType, types.MethodType, property)): + return False + + if isinstance(obj, type) and obj.__flags__ & Py_TPFLAGS_HEAPTYPE: + return False + + return True + + +def _add_docstring(obj, doc, warn_on_python): + if warn_on_python and not _needs_add_docstring(obj): + warnings.warn( + "add_newdoc was used on a pure-python object {}. " + "Prefer to attach it directly to the source." + .format(obj), + UserWarning, + stacklevel=3) + try: + add_docstring(obj, doc) + except Exception: + pass + + +def add_newdoc(place, obj, doc, warn_on_python=True): + """ + Add documentation to an existing object, typically one defined in C + + The purpose is to allow easier editing of the docstrings without requiring + a re-compile. This exists primarily for internal use within numpy itself. + + Parameters + ---------- + place : str + The absolute name of the module to import from + obj : str + The name of the object to add documentation to, typically a class or + function name + doc : {str, Tuple[str, str], List[Tuple[str, str]]} + If a string, the documentation to apply to `obj` + + If a tuple, then the first element is interpreted as an attribute of + `obj` and the second as the docstring to apply - ``(method, docstring)`` + + If a list, then each element of the list should be a tuple of length + two - ``[(method1, docstring1), (method2, docstring2), ...]`` + warn_on_python : bool + If True, the default, emit `UserWarning` if this is used to attach + documentation to a pure-python object. + + Notes + ----- + This routine never raises an error if the docstring can't be written, but + will raise an error if the object being documented does not exist. + + This routine cannot modify read-only docstrings, as appear + in new-style classes or built-in functions. Because this + routine never raises an error the caller must check manually + that the docstrings were changed. + + Since this function grabs the ``char *`` from a c-level str object and puts + it into the ``tp_doc`` slot of the type of `obj`, it violates a number of + C-API best-practices, by: + + - modifying a `PyTypeObject` after calling `PyType_Ready` + - calling `Py_INCREF` on the str and losing the reference, so the str + will never be released + + If possible it should be avoided. + """ + new = getattr(__import__(place, globals(), {}, [obj]), obj) + if isinstance(doc, str): + _add_docstring(new, doc.strip(), warn_on_python) + elif isinstance(doc, tuple): + attr, docstring = doc + _add_docstring(getattr(new, attr), docstring.strip(), warn_on_python) + elif isinstance(doc, list): + for attr, docstring in doc: + _add_docstring(getattr(new, attr), docstring.strip(), warn_on_python) diff --git a/.local/lib/python3.8/site-packages/numpy/core/function_base.pyi b/.local/lib/python3.8/site-packages/numpy/core/function_base.pyi new file mode 100644 index 0000000..68d3b3a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/function_base.pyi @@ -0,0 +1,60 @@ +from typing import overload, Tuple, Union, Sequence, Any, SupportsIndex, Literal, List + +from numpy import ndarray +from numpy.typing import ArrayLike, DTypeLike, _SupportsArray, _NumberLike_co + +# TODO: wait for support for recursive types +_ArrayLikeNested = Sequence[Sequence[Any]] +_ArrayLikeNumber = Union[ + _NumberLike_co, Sequence[_NumberLike_co], ndarray, _SupportsArray, _ArrayLikeNested +] + +__all__: List[str] + +@overload +def linspace( + start: _ArrayLikeNumber, + stop: _ArrayLikeNumber, + num: SupportsIndex = ..., + endpoint: bool = ..., + retstep: Literal[False] = ..., + dtype: DTypeLike = ..., + axis: SupportsIndex = ..., +) -> ndarray: ... +@overload +def linspace( + start: _ArrayLikeNumber, + stop: _ArrayLikeNumber, + num: SupportsIndex = ..., + endpoint: bool = ..., + retstep: Literal[True] = ..., + dtype: DTypeLike = ..., + axis: SupportsIndex = ..., +) -> Tuple[ndarray, Any]: ... + +def logspace( + start: _ArrayLikeNumber, + stop: _ArrayLikeNumber, + num: SupportsIndex = ..., + endpoint: bool = ..., + base: _ArrayLikeNumber = ..., + dtype: DTypeLike = ..., + axis: SupportsIndex = ..., +) -> ndarray: ... + +def geomspace( + start: _ArrayLikeNumber, + stop: _ArrayLikeNumber, + num: SupportsIndex = ..., + endpoint: bool = ..., + dtype: DTypeLike = ..., + axis: SupportsIndex = ..., +) -> ndarray: ... + +# Re-exported to `np.lib.function_base` +def add_newdoc( + place: str, + obj: str, + doc: str | Tuple[str, str] | List[Tuple[str, str]], + warn_on_python: bool = ..., +) -> None: ... diff --git a/.local/lib/python3.8/site-packages/numpy/core/generate_numpy_api.py b/.local/lib/python3.8/site-packages/numpy/core/generate_numpy_api.py new file mode 100644 index 0000000..3797596 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/generate_numpy_api.py @@ -0,0 +1,236 @@ +import os +import genapi + +from genapi import \ + TypeApi, GlobalVarApi, FunctionApi, BoolValuesApi + +import numpy_api + +# use annotated api when running under cpychecker +h_template = r""" +#if defined(_MULTIARRAYMODULE) || defined(WITH_CPYCHECKER_STEALS_REFERENCE_TO_ARG_ATTRIBUTE) + +typedef struct { + PyObject_HEAD + npy_bool obval; +} PyBoolScalarObject; + +extern NPY_NO_EXPORT PyTypeObject PyArrayMapIter_Type; +extern NPY_NO_EXPORT PyTypeObject PyArrayNeighborhoodIter_Type; +extern NPY_NO_EXPORT PyBoolScalarObject _PyArrayScalar_BoolValues[2]; + +%s + +#else + +#if defined(PY_ARRAY_UNIQUE_SYMBOL) +#define PyArray_API PY_ARRAY_UNIQUE_SYMBOL +#endif + +#if defined(NO_IMPORT) || defined(NO_IMPORT_ARRAY) +extern void **PyArray_API; +#else +#if defined(PY_ARRAY_UNIQUE_SYMBOL) +void **PyArray_API; +#else +static void **PyArray_API=NULL; +#endif +#endif + +%s + +#if !defined(NO_IMPORT_ARRAY) && !defined(NO_IMPORT) +static int +_import_array(void) +{ + int st; + PyObject *numpy = PyImport_ImportModule("numpy.core._multiarray_umath"); + PyObject *c_api = NULL; + + if (numpy == NULL) { + return -1; + } + c_api = PyObject_GetAttrString(numpy, "_ARRAY_API"); + Py_DECREF(numpy); + if (c_api == NULL) { + PyErr_SetString(PyExc_AttributeError, "_ARRAY_API not found"); + return -1; + } + + if (!PyCapsule_CheckExact(c_api)) { + PyErr_SetString(PyExc_RuntimeError, "_ARRAY_API is not PyCapsule object"); + Py_DECREF(c_api); + return -1; + } + PyArray_API = (void **)PyCapsule_GetPointer(c_api, NULL); + Py_DECREF(c_api); + if (PyArray_API == NULL) { + PyErr_SetString(PyExc_RuntimeError, "_ARRAY_API is NULL pointer"); + return -1; + } + + /* Perform runtime check of C API version */ + if (NPY_VERSION != PyArray_GetNDArrayCVersion()) { + PyErr_Format(PyExc_RuntimeError, "module compiled against "\ + "ABI version 0x%%x but this version of numpy is 0x%%x", \ + (int) NPY_VERSION, (int) PyArray_GetNDArrayCVersion()); + return -1; + } + if (NPY_FEATURE_VERSION > PyArray_GetNDArrayCFeatureVersion()) { + PyErr_Format(PyExc_RuntimeError, "module compiled against "\ + "API version 0x%%x but this version of numpy is 0x%%x", \ + (int) NPY_FEATURE_VERSION, (int) PyArray_GetNDArrayCFeatureVersion()); + return -1; + } + + /* + * Perform runtime check of endianness and check it matches the one set by + * the headers (npy_endian.h) as a safeguard + */ + st = PyArray_GetEndianness(); + if (st == NPY_CPU_UNKNOWN_ENDIAN) { + PyErr_Format(PyExc_RuntimeError, "FATAL: module compiled as unknown endian"); + return -1; + } +#if NPY_BYTE_ORDER == NPY_BIG_ENDIAN + if (st != NPY_CPU_BIG) { + PyErr_Format(PyExc_RuntimeError, "FATAL: module compiled as "\ + "big endian, but detected different endianness at runtime"); + return -1; + } +#elif NPY_BYTE_ORDER == NPY_LITTLE_ENDIAN + if (st != NPY_CPU_LITTLE) { + PyErr_Format(PyExc_RuntimeError, "FATAL: module compiled as "\ + "little endian, but detected different endianness at runtime"); + return -1; + } +#endif + + return 0; +} + +#define import_array() {if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import"); return NULL; } } + +#define import_array1(ret) {if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import"); return ret; } } + +#define import_array2(msg, ret) {if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, msg); return ret; } } + +#endif + +#endif +""" + + +c_template = r""" +/* These pointers will be stored in the C-object for use in other + extension modules +*/ + +void *PyArray_API[] = { +%s +}; +""" + +c_api_header = """ +=========== +NumPy C-API +=========== +""" + +def generate_api(output_dir, force=False): + basename = 'multiarray_api' + + h_file = os.path.join(output_dir, '__%s.h' % basename) + c_file = os.path.join(output_dir, '__%s.c' % basename) + d_file = os.path.join(output_dir, '%s.txt' % basename) + targets = (h_file, c_file, d_file) + + sources = numpy_api.multiarray_api + + if (not force and not genapi.should_rebuild(targets, [numpy_api.__file__, __file__])): + return targets + else: + do_generate_api(targets, sources) + + return targets + +def do_generate_api(targets, sources): + header_file = targets[0] + c_file = targets[1] + doc_file = targets[2] + + global_vars = sources[0] + scalar_bool_values = sources[1] + types_api = sources[2] + multiarray_funcs = sources[3] + + multiarray_api = sources[:] + + module_list = [] + extension_list = [] + init_list = [] + + # Check multiarray api indexes + multiarray_api_index = genapi.merge_api_dicts(multiarray_api) + genapi.check_api_dict(multiarray_api_index) + + numpyapi_list = genapi.get_api_functions('NUMPY_API', + multiarray_funcs) + + # Create dict name -> *Api instance + api_name = 'PyArray_API' + multiarray_api_dict = {} + for f in numpyapi_list: + name = f.name + index = multiarray_funcs[name][0] + annotations = multiarray_funcs[name][1:] + multiarray_api_dict[f.name] = FunctionApi(f.name, index, annotations, + f.return_type, + f.args, api_name) + + for name, val in global_vars.items(): + index, type = val + multiarray_api_dict[name] = GlobalVarApi(name, index, type, api_name) + + for name, val in scalar_bool_values.items(): + index = val[0] + multiarray_api_dict[name] = BoolValuesApi(name, index, api_name) + + for name, val in types_api.items(): + index = val[0] + internal_type = None if len(val) == 1 else val[1] + multiarray_api_dict[name] = TypeApi( + name, index, 'PyTypeObject', api_name, internal_type) + + if len(multiarray_api_dict) != len(multiarray_api_index): + keys_dict = set(multiarray_api_dict.keys()) + keys_index = set(multiarray_api_index.keys()) + raise AssertionError( + "Multiarray API size mismatch - " + "index has extra keys {}, dict has extra keys {}" + .format(keys_index - keys_dict, keys_dict - keys_index) + ) + + extension_list = [] + for name, index in genapi.order_dict(multiarray_api_index): + api_item = multiarray_api_dict[name] + extension_list.append(api_item.define_from_array_api_string()) + init_list.append(api_item.array_api_define()) + module_list.append(api_item.internal_define()) + + # Write to header + s = h_template % ('\n'.join(module_list), '\n'.join(extension_list)) + genapi.write_file(header_file, s) + + # Write to c-code + s = c_template % ',\n'.join(init_list) + genapi.write_file(c_file, s) + + # write to documentation + s = c_api_header + for func in numpyapi_list: + s += func.to_ReST() + s += '\n\n' + genapi.write_file(doc_file, s) + + return targets diff --git a/.local/lib/python3.8/site-packages/numpy/core/getlimits.py b/.local/lib/python3.8/site-packages/numpy/core/getlimits.py new file mode 100644 index 0000000..ab4a4d2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/getlimits.py @@ -0,0 +1,697 @@ +"""Machine limits for Float32 and Float64 and (long double) if available... + +""" +__all__ = ['finfo', 'iinfo'] + +import warnings + +from ._machar import MachAr +from .overrides import set_module +from . import numeric +from . import numerictypes as ntypes +from .numeric import array, inf, NaN +from .umath import log10, exp2, nextafter, isnan + + +def _fr0(a): + """fix rank-0 --> rank-1""" + if a.ndim == 0: + a = a.copy() + a.shape = (1,) + return a + + +def _fr1(a): + """fix rank > 0 --> rank-0""" + if a.size == 1: + a = a.copy() + a.shape = () + return a + + +class MachArLike: + """ Object to simulate MachAr instance """ + def __init__(self, ftype, *, eps, epsneg, huge, tiny, + ibeta, smallest_subnormal=None, **kwargs): + self.params = _MACHAR_PARAMS[ftype] + self.ftype = ftype + self.title = self.params['title'] + # Parameter types same as for discovered MachAr object. + if not smallest_subnormal: + self._smallest_subnormal = nextafter( + self.ftype(0), self.ftype(1), dtype=self.ftype) + else: + self._smallest_subnormal = smallest_subnormal + self.epsilon = self.eps = self._float_to_float(eps) + self.epsneg = self._float_to_float(epsneg) + self.xmax = self.huge = self._float_to_float(huge) + self.xmin = self._float_to_float(tiny) + self.smallest_normal = self.tiny = self._float_to_float(tiny) + self.ibeta = self.params['itype'](ibeta) + self.__dict__.update(kwargs) + self.precision = int(-log10(self.eps)) + self.resolution = self._float_to_float( + self._float_conv(10) ** (-self.precision)) + self._str_eps = self._float_to_str(self.eps) + self._str_epsneg = self._float_to_str(self.epsneg) + self._str_xmin = self._float_to_str(self.xmin) + self._str_xmax = self._float_to_str(self.xmax) + self._str_resolution = self._float_to_str(self.resolution) + self._str_smallest_normal = self._float_to_str(self.xmin) + + @property + def smallest_subnormal(self): + """Return the value for the smallest subnormal. + + Returns + ------- + smallest_subnormal : float + value for the smallest subnormal. + + Warns + ----- + UserWarning + If the calculated value for the smallest subnormal is zero. + """ + # Check that the calculated value is not zero, in case it raises a + # warning. + value = self._smallest_subnormal + if self.ftype(0) == value: + warnings.warn( + 'The value of the smallest subnormal for {} type ' + 'is zero.'.format(self.ftype), UserWarning, stacklevel=2) + + return self._float_to_float(value) + + @property + def _str_smallest_subnormal(self): + """Return the string representation of the smallest subnormal.""" + return self._float_to_str(self.smallest_subnormal) + + def _float_to_float(self, value): + """Converts float to float. + + Parameters + ---------- + value : float + value to be converted. + """ + return _fr1(self._float_conv(value)) + + def _float_conv(self, value): + """Converts float to conv. + + Parameters + ---------- + value : float + value to be converted. + """ + return array([value], self.ftype) + + def _float_to_str(self, value): + """Converts float to str. + + Parameters + ---------- + value : float + value to be converted. + """ + return self.params['fmt'] % array(_fr0(value)[0], self.ftype) + + +_convert_to_float = { + ntypes.csingle: ntypes.single, + ntypes.complex_: ntypes.float_, + ntypes.clongfloat: ntypes.longfloat + } + +# Parameters for creating MachAr / MachAr-like objects +_title_fmt = 'numpy {} precision floating point number' +_MACHAR_PARAMS = { + ntypes.double: dict( + itype = ntypes.int64, + fmt = '%24.16e', + title = _title_fmt.format('double')), + ntypes.single: dict( + itype = ntypes.int32, + fmt = '%15.7e', + title = _title_fmt.format('single')), + ntypes.longdouble: dict( + itype = ntypes.longlong, + fmt = '%s', + title = _title_fmt.format('long double')), + ntypes.half: dict( + itype = ntypes.int16, + fmt = '%12.5e', + title = _title_fmt.format('half'))} + +# Key to identify the floating point type. Key is result of +# ftype('-0.1').newbyteorder('<').tobytes() +# See: +# https://perl5.git.perl.org/perl.git/blob/3118d7d684b56cbeb702af874f4326683c45f045:/Configure +_KNOWN_TYPES = {} +def _register_type(machar, bytepat): + _KNOWN_TYPES[bytepat] = machar +_float_ma = {} + + +def _register_known_types(): + # Known parameters for float16 + # See docstring of MachAr class for description of parameters. + f16 = ntypes.float16 + float16_ma = MachArLike(f16, + machep=-10, + negep=-11, + minexp=-14, + maxexp=16, + it=10, + iexp=5, + ibeta=2, + irnd=5, + ngrd=0, + eps=exp2(f16(-10)), + epsneg=exp2(f16(-11)), + huge=f16(65504), + tiny=f16(2 ** -14)) + _register_type(float16_ma, b'f\xae') + _float_ma[16] = float16_ma + + # Known parameters for float32 + f32 = ntypes.float32 + float32_ma = MachArLike(f32, + machep=-23, + negep=-24, + minexp=-126, + maxexp=128, + it=23, + iexp=8, + ibeta=2, + irnd=5, + ngrd=0, + eps=exp2(f32(-23)), + epsneg=exp2(f32(-24)), + huge=f32((1 - 2 ** -24) * 2**128), + tiny=exp2(f32(-126))) + _register_type(float32_ma, b'\xcd\xcc\xcc\xbd') + _float_ma[32] = float32_ma + + # Known parameters for float64 + f64 = ntypes.float64 + epsneg_f64 = 2.0 ** -53.0 + tiny_f64 = 2.0 ** -1022.0 + float64_ma = MachArLike(f64, + machep=-52, + negep=-53, + minexp=-1022, + maxexp=1024, + it=52, + iexp=11, + ibeta=2, + irnd=5, + ngrd=0, + eps=2.0 ** -52.0, + epsneg=epsneg_f64, + huge=(1.0 - epsneg_f64) / tiny_f64 * f64(4), + tiny=tiny_f64) + _register_type(float64_ma, b'\x9a\x99\x99\x99\x99\x99\xb9\xbf') + _float_ma[64] = float64_ma + + # Known parameters for IEEE 754 128-bit binary float + ld = ntypes.longdouble + epsneg_f128 = exp2(ld(-113)) + tiny_f128 = exp2(ld(-16382)) + # Ignore runtime error when this is not f128 + with numeric.errstate(all='ignore'): + huge_f128 = (ld(1) - epsneg_f128) / tiny_f128 * ld(4) + float128_ma = MachArLike(ld, + machep=-112, + negep=-113, + minexp=-16382, + maxexp=16384, + it=112, + iexp=15, + ibeta=2, + irnd=5, + ngrd=0, + eps=exp2(ld(-112)), + epsneg=epsneg_f128, + huge=huge_f128, + tiny=tiny_f128) + # IEEE 754 128-bit binary float + _register_type(float128_ma, + b'\x9a\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\xfb\xbf') + _register_type(float128_ma, + b'\x9a\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\xfb\xbf') + _float_ma[128] = float128_ma + + # Known parameters for float80 (Intel 80-bit extended precision) + epsneg_f80 = exp2(ld(-64)) + tiny_f80 = exp2(ld(-16382)) + # Ignore runtime error when this is not f80 + with numeric.errstate(all='ignore'): + huge_f80 = (ld(1) - epsneg_f80) / tiny_f80 * ld(4) + float80_ma = MachArLike(ld, + machep=-63, + negep=-64, + minexp=-16382, + maxexp=16384, + it=63, + iexp=15, + ibeta=2, + irnd=5, + ngrd=0, + eps=exp2(ld(-63)), + epsneg=epsneg_f80, + huge=huge_f80, + tiny=tiny_f80) + # float80, first 10 bytes containing actual storage + _register_type(float80_ma, b'\xcd\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xfb\xbf') + _float_ma[80] = float80_ma + + # Guessed / known parameters for double double; see: + # https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format#Double-double_arithmetic + # These numbers have the same exponent range as float64, but extended number of + # digits in the significand. + huge_dd = nextafter(ld(inf), ld(0), dtype=ld) + # As the smallest_normal in double double is so hard to calculate we set + # it to NaN. + smallest_normal_dd = NaN + # Leave the same value for the smallest subnormal as double + smallest_subnormal_dd = ld(nextafter(0., 1.)) + float_dd_ma = MachArLike(ld, + machep=-105, + negep=-106, + minexp=-1022, + maxexp=1024, + it=105, + iexp=11, + ibeta=2, + irnd=5, + ngrd=0, + eps=exp2(ld(-105)), + epsneg=exp2(ld(-106)), + huge=huge_dd, + tiny=smallest_normal_dd, + smallest_subnormal=smallest_subnormal_dd) + # double double; low, high order (e.g. PPC 64) + _register_type(float_dd_ma, + b'\x9a\x99\x99\x99\x99\x99Y<\x9a\x99\x99\x99\x99\x99\xb9\xbf') + # double double; high, low order (e.g. PPC 64 le) + _register_type(float_dd_ma, + b'\x9a\x99\x99\x99\x99\x99\xb9\xbf\x9a\x99\x99\x99\x99\x99Y<') + _float_ma['dd'] = float_dd_ma + + +def _get_machar(ftype): + """ Get MachAr instance or MachAr-like instance + + Get parameters for floating point type, by first trying signatures of + various known floating point types, then, if none match, attempting to + identify parameters by analysis. + + Parameters + ---------- + ftype : class + Numpy floating point type class (e.g. ``np.float64``) + + Returns + ------- + ma_like : instance of :class:`MachAr` or :class:`MachArLike` + Object giving floating point parameters for `ftype`. + + Warns + ----- + UserWarning + If the binary signature of the float type is not in the dictionary of + known float types. + """ + params = _MACHAR_PARAMS.get(ftype) + if params is None: + raise ValueError(repr(ftype)) + # Detect known / suspected types + key = ftype('-0.1').newbyteorder('<').tobytes() + ma_like = None + if ftype == ntypes.longdouble: + # Could be 80 bit == 10 byte extended precision, where last bytes can + # be random garbage. + # Comparing first 10 bytes to pattern first to avoid branching on the + # random garbage. + ma_like = _KNOWN_TYPES.get(key[:10]) + if ma_like is None: + ma_like = _KNOWN_TYPES.get(key) + if ma_like is not None: + return ma_like + # Fall back to parameter discovery + warnings.warn( + 'Signature {} for {} does not match any known type: ' + 'falling back to type probe function'.format(key, ftype), + UserWarning, stacklevel=2) + return _discovered_machar(ftype) + + +def _discovered_machar(ftype): + """ Create MachAr instance with found information on float types + """ + params = _MACHAR_PARAMS[ftype] + return MachAr(lambda v: array([v], ftype), + lambda v:_fr0(v.astype(params['itype']))[0], + lambda v:array(_fr0(v)[0], ftype), + lambda v: params['fmt'] % array(_fr0(v)[0], ftype), + params['title']) + + +@set_module('numpy') +class finfo: + """ + finfo(dtype) + + Machine limits for floating point types. + + Attributes + ---------- + bits : int + The number of bits occupied by the type. + eps : float + The difference between 1.0 and the next smallest representable float + larger than 1.0. For example, for 64-bit binary floats in the IEEE-754 + standard, ``eps = 2**-52``, approximately 2.22e-16. + epsneg : float + The difference between 1.0 and the next smallest representable float + less than 1.0. For example, for 64-bit binary floats in the IEEE-754 + standard, ``epsneg = 2**-53``, approximately 1.11e-16. + iexp : int + The number of bits in the exponent portion of the floating point + representation. + machar : MachAr + The object which calculated these parameters and holds more + detailed information. + + .. deprecated:: 1.22 + machep : int + The exponent that yields `eps`. + max : floating point number of the appropriate type + The largest representable number. + maxexp : int + The smallest positive power of the base (2) that causes overflow. + min : floating point number of the appropriate type + The smallest representable number, typically ``-max``. + minexp : int + The most negative power of the base (2) consistent with there + being no leading 0's in the mantissa. + negep : int + The exponent that yields `epsneg`. + nexp : int + The number of bits in the exponent including its sign and bias. + nmant : int + The number of bits in the mantissa. + precision : int + The approximate number of decimal digits to which this kind of + float is precise. + resolution : floating point number of the appropriate type + The approximate decimal resolution of this type, i.e., + ``10**-precision``. + tiny : float + An alias for `smallest_normal`, kept for backwards compatibility. + smallest_normal : float + The smallest positive floating point number with 1 as leading bit in + the mantissa following IEEE-754 (see Notes). + smallest_subnormal : float + The smallest positive floating point number with 0 as leading bit in + the mantissa following IEEE-754. + + Parameters + ---------- + dtype : float, dtype, or instance + Kind of floating point data-type about which to get information. + + See Also + -------- + MachAr : The implementation of the tests that produce this information. + iinfo : The equivalent for integer data types. + spacing : The distance between a value and the nearest adjacent number + nextafter : The next floating point value after x1 towards x2 + + Notes + ----- + For developers of NumPy: do not instantiate this at the module level. + The initial calculation of these parameters is expensive and negatively + impacts import times. These objects are cached, so calling ``finfo()`` + repeatedly inside your functions is not a problem. + + Note that ``smallest_normal`` is not actually the smallest positive + representable value in a NumPy floating point type. As in the IEEE-754 + standard [1]_, NumPy floating point types make use of subnormal numbers to + fill the gap between 0 and ``smallest_normal``. However, subnormal numbers + may have significantly reduced precision [2]_. + + References + ---------- + .. [1] IEEE Standard for Floating-Point Arithmetic, IEEE Std 754-2008, + pp.1-70, 2008, http://www.doi.org/10.1109/IEEESTD.2008.4610935 + .. [2] Wikipedia, "Denormal Numbers", + https://en.wikipedia.org/wiki/Denormal_number + """ + + _finfo_cache = {} + + def __new__(cls, dtype): + try: + dtype = numeric.dtype(dtype) + except TypeError: + # In case a float instance was given + dtype = numeric.dtype(type(dtype)) + + obj = cls._finfo_cache.get(dtype, None) + if obj is not None: + return obj + dtypes = [dtype] + newdtype = numeric.obj2sctype(dtype) + if newdtype is not dtype: + dtypes.append(newdtype) + dtype = newdtype + if not issubclass(dtype, numeric.inexact): + raise ValueError("data type %r not inexact" % (dtype)) + obj = cls._finfo_cache.get(dtype, None) + if obj is not None: + return obj + if not issubclass(dtype, numeric.floating): + newdtype = _convert_to_float[dtype] + if newdtype is not dtype: + dtypes.append(newdtype) + dtype = newdtype + obj = cls._finfo_cache.get(dtype, None) + if obj is not None: + return obj + obj = object.__new__(cls)._init(dtype) + for dt in dtypes: + cls._finfo_cache[dt] = obj + return obj + + def _init(self, dtype): + self.dtype = numeric.dtype(dtype) + machar = _get_machar(dtype) + + for word in ['precision', 'iexp', + 'maxexp', 'minexp', 'negep', + 'machep']: + setattr(self, word, getattr(machar, word)) + for word in ['resolution', 'epsneg', 'smallest_subnormal']: + setattr(self, word, getattr(machar, word).flat[0]) + self.bits = self.dtype.itemsize * 8 + self.max = machar.huge.flat[0] + self.min = -self.max + self.eps = machar.eps.flat[0] + self.nexp = machar.iexp + self.nmant = machar.it + self._machar = machar + self._str_tiny = machar._str_xmin.strip() + self._str_max = machar._str_xmax.strip() + self._str_epsneg = machar._str_epsneg.strip() + self._str_eps = machar._str_eps.strip() + self._str_resolution = machar._str_resolution.strip() + self._str_smallest_normal = machar._str_smallest_normal.strip() + self._str_smallest_subnormal = machar._str_smallest_subnormal.strip() + return self + + def __str__(self): + fmt = ( + 'Machine parameters for %(dtype)s\n' + '---------------------------------------------------------------\n' + 'precision = %(precision)3s resolution = %(_str_resolution)s\n' + 'machep = %(machep)6s eps = %(_str_eps)s\n' + 'negep = %(negep)6s epsneg = %(_str_epsneg)s\n' + 'minexp = %(minexp)6s tiny = %(_str_tiny)s\n' + 'maxexp = %(maxexp)6s max = %(_str_max)s\n' + 'nexp = %(nexp)6s min = -max\n' + 'smallest_normal = %(_str_smallest_normal)s ' + 'smallest_subnormal = %(_str_smallest_subnormal)s\n' + '---------------------------------------------------------------\n' + ) + return fmt % self.__dict__ + + def __repr__(self): + c = self.__class__.__name__ + d = self.__dict__.copy() + d['klass'] = c + return (("%(klass)s(resolution=%(resolution)s, min=-%(_str_max)s," + " max=%(_str_max)s, dtype=%(dtype)s)") % d) + + @property + def smallest_normal(self): + """Return the value for the smallest normal. + + Returns + ------- + smallest_normal : float + Value for the smallest normal. + + Warns + ----- + UserWarning + If the calculated value for the smallest normal is requested for + double-double. + """ + # This check is necessary because the value for smallest_normal is + # platform dependent for longdouble types. + if isnan(self._machar.smallest_normal.flat[0]): + warnings.warn( + 'The value of smallest normal is undefined for double double', + UserWarning, stacklevel=2) + return self._machar.smallest_normal.flat[0] + + @property + def tiny(self): + """Return the value for tiny, alias of smallest_normal. + + Returns + ------- + tiny : float + Value for the smallest normal, alias of smallest_normal. + + Warns + ----- + UserWarning + If the calculated value for the smallest normal is requested for + double-double. + """ + return self.smallest_normal + + @property + def machar(self): + """The object which calculated these parameters and holds more + detailed information. + + .. deprecated:: 1.22 + """ + # Deprecated 2021-10-27, NumPy 1.22 + warnings.warn( + "`finfo.machar` is deprecated (NumPy 1.22)", + DeprecationWarning, stacklevel=2, + ) + return self._machar + + +@set_module('numpy') +class iinfo: + """ + iinfo(type) + + Machine limits for integer types. + + Attributes + ---------- + bits : int + The number of bits occupied by the type. + min : int + The smallest integer expressible by the type. + max : int + The largest integer expressible by the type. + + Parameters + ---------- + int_type : integer type, dtype, or instance + The kind of integer data type to get information about. + + See Also + -------- + finfo : The equivalent for floating point data types. + + Examples + -------- + With types: + + >>> ii16 = np.iinfo(np.int16) + >>> ii16.min + -32768 + >>> ii16.max + 32767 + >>> ii32 = np.iinfo(np.int32) + >>> ii32.min + -2147483648 + >>> ii32.max + 2147483647 + + With instances: + + >>> ii32 = np.iinfo(np.int32(10)) + >>> ii32.min + -2147483648 + >>> ii32.max + 2147483647 + + """ + + _min_vals = {} + _max_vals = {} + + def __init__(self, int_type): + try: + self.dtype = numeric.dtype(int_type) + except TypeError: + self.dtype = numeric.dtype(type(int_type)) + self.kind = self.dtype.kind + self.bits = self.dtype.itemsize * 8 + self.key = "%s%d" % (self.kind, self.bits) + if self.kind not in 'iu': + raise ValueError("Invalid integer data type %r." % (self.kind,)) + + @property + def min(self): + """Minimum value of given dtype.""" + if self.kind == 'u': + return 0 + else: + try: + val = iinfo._min_vals[self.key] + except KeyError: + val = int(-(1 << (self.bits-1))) + iinfo._min_vals[self.key] = val + return val + + @property + def max(self): + """Maximum value of given dtype.""" + try: + val = iinfo._max_vals[self.key] + except KeyError: + if self.kind == 'u': + val = int((1 << self.bits) - 1) + else: + val = int((1 << (self.bits-1)) - 1) + iinfo._max_vals[self.key] = val + return val + + def __str__(self): + """String representation.""" + fmt = ( + 'Machine parameters for %(dtype)s\n' + '---------------------------------------------------------------\n' + 'min = %(min)s\n' + 'max = %(max)s\n' + '---------------------------------------------------------------\n' + ) + return fmt % {'dtype': self.dtype, 'min': self.min, 'max': self.max} + + def __repr__(self): + return "%s(min=%s, max=%s, dtype=%s)" % (self.__class__.__name__, + self.min, self.max, self.dtype) diff --git a/.local/lib/python3.8/site-packages/numpy/core/getlimits.pyi b/.local/lib/python3.8/site-packages/numpy/core/getlimits.pyi new file mode 100644 index 0000000..66d0629 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/getlimits.pyi @@ -0,0 +1,8 @@ +from typing import List + +from numpy import ( + finfo as finfo, + iinfo as iinfo, +) + +__all__: List[str] diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/.doxyfile b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/.doxyfile new file mode 100644 index 0000000..ed2aeff --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/.doxyfile @@ -0,0 +1,2 @@ +INCLUDE_PATH += @CUR_DIR +PREDEFINED += NPY_INTERNAL_BUILD diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/__multiarray_api.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/__multiarray_api.h new file mode 100644 index 0000000..94ffaf8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/__multiarray_api.h @@ -0,0 +1,1553 @@ + +#if defined(_MULTIARRAYMODULE) || defined(WITH_CPYCHECKER_STEALS_REFERENCE_TO_ARG_ATTRIBUTE) + +typedef struct { + PyObject_HEAD + npy_bool obval; +} PyBoolScalarObject; + +extern NPY_NO_EXPORT PyTypeObject PyArrayMapIter_Type; +extern NPY_NO_EXPORT PyTypeObject PyArrayNeighborhoodIter_Type; +extern NPY_NO_EXPORT PyBoolScalarObject _PyArrayScalar_BoolValues[2]; + +NPY_NO_EXPORT unsigned int PyArray_GetNDArrayCVersion \ + (void); +extern NPY_NO_EXPORT PyTypeObject PyBigArray_Type; + +extern NPY_NO_EXPORT PyTypeObject PyArray_Type; + +extern NPY_NO_EXPORT PyArray_DTypeMeta PyArrayDescr_TypeFull; +#define PyArrayDescr_Type (*(PyTypeObject *)(&PyArrayDescr_TypeFull)) + +extern NPY_NO_EXPORT PyTypeObject PyArrayFlags_Type; + +extern NPY_NO_EXPORT PyTypeObject PyArrayIter_Type; + +extern NPY_NO_EXPORT PyTypeObject PyArrayMultiIter_Type; + +extern NPY_NO_EXPORT int NPY_NUMUSERTYPES; + +extern NPY_NO_EXPORT PyTypeObject PyBoolArrType_Type; + +extern NPY_NO_EXPORT PyBoolScalarObject _PyArrayScalar_BoolValues[2]; + +extern NPY_NO_EXPORT PyTypeObject PyGenericArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyNumberArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyIntegerArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PySignedIntegerArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyUnsignedIntegerArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyInexactArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyFloatingArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyComplexFloatingArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyFlexibleArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyCharacterArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyByteArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyShortArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyIntArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyLongArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyLongLongArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyUByteArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyUShortArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyUIntArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyULongArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyULongLongArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyFloatArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyDoubleArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyLongDoubleArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyCFloatArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyCDoubleArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyCLongDoubleArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyObjectArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyStringArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyUnicodeArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyVoidArrType_Type; + +NPY_NO_EXPORT int PyArray_SetNumericOps \ + (PyObject *); +NPY_NO_EXPORT PyObject * PyArray_GetNumericOps \ + (void); +NPY_NO_EXPORT int PyArray_INCREF \ + (PyArrayObject *); +NPY_NO_EXPORT int PyArray_XDECREF \ + (PyArrayObject *); +NPY_NO_EXPORT void PyArray_SetStringFunction \ + (PyObject *, int); +NPY_NO_EXPORT PyArray_Descr * PyArray_DescrFromType \ + (int); +NPY_NO_EXPORT PyObject * PyArray_TypeObjectFromType \ + (int); +NPY_NO_EXPORT char * PyArray_Zero \ + (PyArrayObject *); +NPY_NO_EXPORT char * PyArray_One \ + (PyArrayObject *); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(2) NPY_GCC_NONNULL(2) PyObject * PyArray_CastToType \ + (PyArrayObject *, PyArray_Descr *, int); +NPY_NO_EXPORT int PyArray_CastTo \ + (PyArrayObject *, PyArrayObject *); +NPY_NO_EXPORT int PyArray_CastAnyTo \ + (PyArrayObject *, PyArrayObject *); +NPY_NO_EXPORT int PyArray_CanCastSafely \ + (int, int); +NPY_NO_EXPORT npy_bool PyArray_CanCastTo \ + (PyArray_Descr *, PyArray_Descr *); +NPY_NO_EXPORT int PyArray_ObjectType \ + (PyObject *, int); +NPY_NO_EXPORT PyArray_Descr * PyArray_DescrFromObject \ + (PyObject *, PyArray_Descr *); +NPY_NO_EXPORT PyArrayObject ** PyArray_ConvertToCommonType \ + (PyObject *, int *); +NPY_NO_EXPORT PyArray_Descr * PyArray_DescrFromScalar \ + (PyObject *); +NPY_NO_EXPORT PyArray_Descr * PyArray_DescrFromTypeObject \ + (PyObject *); +NPY_NO_EXPORT npy_intp PyArray_Size \ + (PyObject *); +NPY_NO_EXPORT PyObject * PyArray_Scalar \ + (void *, PyArray_Descr *, PyObject *); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(2) PyObject * PyArray_FromScalar \ + (PyObject *, PyArray_Descr *); +NPY_NO_EXPORT void PyArray_ScalarAsCtype \ + (PyObject *, void *); +NPY_NO_EXPORT int PyArray_CastScalarToCtype \ + (PyObject *, void *, PyArray_Descr *); +NPY_NO_EXPORT int PyArray_CastScalarDirect \ + (PyObject *, PyArray_Descr *, void *, int); +NPY_NO_EXPORT PyObject * PyArray_ScalarFromObject \ + (PyObject *); +NPY_NO_EXPORT PyArray_VectorUnaryFunc * PyArray_GetCastFunc \ + (PyArray_Descr *, int); +NPY_NO_EXPORT PyObject * PyArray_FromDims \ + (int NPY_UNUSED(nd), int *NPY_UNUSED(d), int NPY_UNUSED(type)); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(3) PyObject * PyArray_FromDimsAndDataAndDescr \ + (int NPY_UNUSED(nd), int *NPY_UNUSED(d), PyArray_Descr *, char *NPY_UNUSED(data)); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(2) PyObject * PyArray_FromAny \ + (PyObject *, PyArray_Descr *, int, int, int, PyObject *); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(1) PyObject * PyArray_EnsureArray \ + (PyObject *); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(1) PyObject * PyArray_EnsureAnyArray \ + (PyObject *); +NPY_NO_EXPORT PyObject * PyArray_FromFile \ + (FILE *, PyArray_Descr *, npy_intp, char *); +NPY_NO_EXPORT PyObject * PyArray_FromString \ + (char *, npy_intp, PyArray_Descr *, npy_intp, char *); +NPY_NO_EXPORT PyObject * PyArray_FromBuffer \ + (PyObject *, PyArray_Descr *, npy_intp, npy_intp); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(2) PyObject * PyArray_FromIter \ + (PyObject *, PyArray_Descr *, npy_intp); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(1) PyObject * PyArray_Return \ + (PyArrayObject *); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(2) NPY_GCC_NONNULL(2) PyObject * PyArray_GetField \ + (PyArrayObject *, PyArray_Descr *, int); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(2) NPY_GCC_NONNULL(2) int PyArray_SetField \ + (PyArrayObject *, PyArray_Descr *, int, PyObject *); +NPY_NO_EXPORT PyObject * PyArray_Byteswap \ + (PyArrayObject *, npy_bool); +NPY_NO_EXPORT PyObject * PyArray_Resize \ + (PyArrayObject *, PyArray_Dims *, int, NPY_ORDER NPY_UNUSED(order)); +NPY_NO_EXPORT int PyArray_MoveInto \ + (PyArrayObject *, PyArrayObject *); +NPY_NO_EXPORT int PyArray_CopyInto \ + (PyArrayObject *, PyArrayObject *); +NPY_NO_EXPORT int PyArray_CopyAnyInto \ + (PyArrayObject *, PyArrayObject *); +NPY_NO_EXPORT int PyArray_CopyObject \ + (PyArrayObject *, PyObject *); +NPY_NO_EXPORT NPY_GCC_NONNULL(1) PyObject * PyArray_NewCopy \ + (PyArrayObject *, NPY_ORDER); +NPY_NO_EXPORT PyObject * PyArray_ToList \ + (PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_ToString \ + (PyArrayObject *, NPY_ORDER); +NPY_NO_EXPORT int PyArray_ToFile \ + (PyArrayObject *, FILE *, char *, char *); +NPY_NO_EXPORT int PyArray_Dump \ + (PyObject *, PyObject *, int); +NPY_NO_EXPORT PyObject * PyArray_Dumps \ + (PyObject *, int); +NPY_NO_EXPORT int PyArray_ValidType \ + (int); +NPY_NO_EXPORT void PyArray_UpdateFlags \ + (PyArrayObject *, int); +NPY_NO_EXPORT NPY_GCC_NONNULL(1) PyObject * PyArray_New \ + (PyTypeObject *, int, npy_intp const *, int, npy_intp const *, void *, int, int, PyObject *); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(2) NPY_GCC_NONNULL(1) NPY_GCC_NONNULL(2) PyObject * PyArray_NewFromDescr \ + (PyTypeObject *, PyArray_Descr *, int, npy_intp const *, npy_intp const *, void *, int, PyObject *); +NPY_NO_EXPORT PyArray_Descr * PyArray_DescrNew \ + (PyArray_Descr *); +NPY_NO_EXPORT PyArray_Descr * PyArray_DescrNewFromType \ + (int); +NPY_NO_EXPORT double PyArray_GetPriority \ + (PyObject *, double); +NPY_NO_EXPORT PyObject * PyArray_IterNew \ + (PyObject *); +NPY_NO_EXPORT PyObject* PyArray_MultiIterNew \ + (int, ...); +NPY_NO_EXPORT int PyArray_PyIntAsInt \ + (PyObject *); +NPY_NO_EXPORT npy_intp PyArray_PyIntAsIntp \ + (PyObject *); +NPY_NO_EXPORT int PyArray_Broadcast \ + (PyArrayMultiIterObject *); +NPY_NO_EXPORT void PyArray_FillObjectArray \ + (PyArrayObject *, PyObject *); +NPY_NO_EXPORT int PyArray_FillWithScalar \ + (PyArrayObject *, PyObject *); +NPY_NO_EXPORT npy_bool PyArray_CheckStrides \ + (int, int, npy_intp, npy_intp, npy_intp const *, npy_intp const *); +NPY_NO_EXPORT PyArray_Descr * PyArray_DescrNewByteorder \ + (PyArray_Descr *, char); +NPY_NO_EXPORT PyObject * PyArray_IterAllButAxis \ + (PyObject *, int *); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(2) PyObject * PyArray_CheckFromAny \ + (PyObject *, PyArray_Descr *, int, int, int, PyObject *); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(2) PyObject * PyArray_FromArray \ + (PyArrayObject *, PyArray_Descr *, int); +NPY_NO_EXPORT PyObject * PyArray_FromInterface \ + (PyObject *); +NPY_NO_EXPORT PyObject * PyArray_FromStructInterface \ + (PyObject *); +NPY_NO_EXPORT PyObject * PyArray_FromArrayAttr \ + (PyObject *, PyArray_Descr *, PyObject *); +NPY_NO_EXPORT NPY_SCALARKIND PyArray_ScalarKind \ + (int, PyArrayObject **); +NPY_NO_EXPORT int PyArray_CanCoerceScalar \ + (int, int, NPY_SCALARKIND); +NPY_NO_EXPORT PyObject * PyArray_NewFlagsObject \ + (PyObject *); +NPY_NO_EXPORT npy_bool PyArray_CanCastScalar \ + (PyTypeObject *, PyTypeObject *); +NPY_NO_EXPORT int PyArray_CompareUCS4 \ + (npy_ucs4 const *, npy_ucs4 const *, size_t); +NPY_NO_EXPORT int PyArray_RemoveSmallest \ + (PyArrayMultiIterObject *); +NPY_NO_EXPORT int PyArray_ElementStrides \ + (PyObject *); +NPY_NO_EXPORT void PyArray_Item_INCREF \ + (char *, PyArray_Descr *); +NPY_NO_EXPORT void PyArray_Item_XDECREF \ + (char *, PyArray_Descr *); +NPY_NO_EXPORT PyObject * PyArray_FieldNames \ + (PyObject *); +NPY_NO_EXPORT PyObject * PyArray_Transpose \ + (PyArrayObject *, PyArray_Dims *); +NPY_NO_EXPORT PyObject * PyArray_TakeFrom \ + (PyArrayObject *, PyObject *, int, PyArrayObject *, NPY_CLIPMODE); +NPY_NO_EXPORT PyObject * PyArray_PutTo \ + (PyArrayObject *, PyObject*, PyObject *, NPY_CLIPMODE); +NPY_NO_EXPORT PyObject * PyArray_PutMask \ + (PyArrayObject *, PyObject*, PyObject*); +NPY_NO_EXPORT PyObject * PyArray_Repeat \ + (PyArrayObject *, PyObject *, int); +NPY_NO_EXPORT PyObject * PyArray_Choose \ + (PyArrayObject *, PyObject *, PyArrayObject *, NPY_CLIPMODE); +NPY_NO_EXPORT int PyArray_Sort \ + (PyArrayObject *, int, NPY_SORTKIND); +NPY_NO_EXPORT PyObject * PyArray_ArgSort \ + (PyArrayObject *, int, NPY_SORTKIND); +NPY_NO_EXPORT PyObject * PyArray_SearchSorted \ + (PyArrayObject *, PyObject *, NPY_SEARCHSIDE, PyObject *); +NPY_NO_EXPORT PyObject * PyArray_ArgMax \ + (PyArrayObject *, int, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_ArgMin \ + (PyArrayObject *, int, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_Reshape \ + (PyArrayObject *, PyObject *); +NPY_NO_EXPORT PyObject * PyArray_Newshape \ + (PyArrayObject *, PyArray_Dims *, NPY_ORDER); +NPY_NO_EXPORT PyObject * PyArray_Squeeze \ + (PyArrayObject *); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(2) PyObject * PyArray_View \ + (PyArrayObject *, PyArray_Descr *, PyTypeObject *); +NPY_NO_EXPORT PyObject * PyArray_SwapAxes \ + (PyArrayObject *, int, int); +NPY_NO_EXPORT PyObject * PyArray_Max \ + (PyArrayObject *, int, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_Min \ + (PyArrayObject *, int, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_Ptp \ + (PyArrayObject *, int, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_Mean \ + (PyArrayObject *, int, int, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_Trace \ + (PyArrayObject *, int, int, int, int, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_Diagonal \ + (PyArrayObject *, int, int, int); +NPY_NO_EXPORT PyObject * PyArray_Clip \ + (PyArrayObject *, PyObject *, PyObject *, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_Conjugate \ + (PyArrayObject *, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_Nonzero \ + (PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_Std \ + (PyArrayObject *, int, int, PyArrayObject *, int); +NPY_NO_EXPORT PyObject * PyArray_Sum \ + (PyArrayObject *, int, int, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_CumSum \ + (PyArrayObject *, int, int, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_Prod \ + (PyArrayObject *, int, int, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_CumProd \ + (PyArrayObject *, int, int, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_All \ + (PyArrayObject *, int, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_Any \ + (PyArrayObject *, int, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_Compress \ + (PyArrayObject *, PyObject *, int, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyArray_Flatten \ + (PyArrayObject *, NPY_ORDER); +NPY_NO_EXPORT PyObject * PyArray_Ravel \ + (PyArrayObject *, NPY_ORDER); +NPY_NO_EXPORT npy_intp PyArray_MultiplyList \ + (npy_intp const *, int); +NPY_NO_EXPORT int PyArray_MultiplyIntList \ + (int const *, int); +NPY_NO_EXPORT void * PyArray_GetPtr \ + (PyArrayObject *, npy_intp const*); +NPY_NO_EXPORT int PyArray_CompareLists \ + (npy_intp const *, npy_intp const *, int); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(5) int PyArray_AsCArray \ + (PyObject **, void *, npy_intp *, int, PyArray_Descr*); +NPY_NO_EXPORT int PyArray_As1D \ + (PyObject **NPY_UNUSED(op), char **NPY_UNUSED(ptr), int *NPY_UNUSED(d1), int NPY_UNUSED(typecode)); +NPY_NO_EXPORT int PyArray_As2D \ + (PyObject **NPY_UNUSED(op), char ***NPY_UNUSED(ptr), int *NPY_UNUSED(d1), int *NPY_UNUSED(d2), int NPY_UNUSED(typecode)); +NPY_NO_EXPORT int PyArray_Free \ + (PyObject *, void *); +NPY_NO_EXPORT int PyArray_Converter \ + (PyObject *, PyObject **); +NPY_NO_EXPORT int PyArray_IntpFromSequence \ + (PyObject *, npy_intp *, int); +NPY_NO_EXPORT PyObject * PyArray_Concatenate \ + (PyObject *, int); +NPY_NO_EXPORT PyObject * PyArray_InnerProduct \ + (PyObject *, PyObject *); +NPY_NO_EXPORT PyObject * PyArray_MatrixProduct \ + (PyObject *, PyObject *); +NPY_NO_EXPORT PyObject * PyArray_CopyAndTranspose \ + (PyObject *); +NPY_NO_EXPORT PyObject * PyArray_Correlate \ + (PyObject *, PyObject *, int); +NPY_NO_EXPORT int PyArray_TypestrConvert \ + (int, int); +NPY_NO_EXPORT int PyArray_DescrConverter \ + (PyObject *, PyArray_Descr **); +NPY_NO_EXPORT int PyArray_DescrConverter2 \ + (PyObject *, PyArray_Descr **); +NPY_NO_EXPORT int PyArray_IntpConverter \ + (PyObject *, PyArray_Dims *); +NPY_NO_EXPORT int PyArray_BufferConverter \ + (PyObject *, PyArray_Chunk *); +NPY_NO_EXPORT int PyArray_AxisConverter \ + (PyObject *, int *); +NPY_NO_EXPORT int PyArray_BoolConverter \ + (PyObject *, npy_bool *); +NPY_NO_EXPORT int PyArray_ByteorderConverter \ + (PyObject *, char *); +NPY_NO_EXPORT int PyArray_OrderConverter \ + (PyObject *, NPY_ORDER *); +NPY_NO_EXPORT unsigned char PyArray_EquivTypes \ + (PyArray_Descr *, PyArray_Descr *); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(3) PyObject * PyArray_Zeros \ + (int, npy_intp const *, PyArray_Descr *, int); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(3) PyObject * PyArray_Empty \ + (int, npy_intp const *, PyArray_Descr *, int); +NPY_NO_EXPORT PyObject * PyArray_Where \ + (PyObject *, PyObject *, PyObject *); +NPY_NO_EXPORT PyObject * PyArray_Arange \ + (double, double, double, int); +NPY_NO_EXPORT PyObject * PyArray_ArangeObj \ + (PyObject *, PyObject *, PyObject *, PyArray_Descr *); +NPY_NO_EXPORT int PyArray_SortkindConverter \ + (PyObject *, NPY_SORTKIND *); +NPY_NO_EXPORT PyObject * PyArray_LexSort \ + (PyObject *, int); +NPY_NO_EXPORT PyObject * PyArray_Round \ + (PyArrayObject *, int, PyArrayObject *); +NPY_NO_EXPORT unsigned char PyArray_EquivTypenums \ + (int, int); +NPY_NO_EXPORT int PyArray_RegisterDataType \ + (PyArray_Descr *); +NPY_NO_EXPORT int PyArray_RegisterCastFunc \ + (PyArray_Descr *, int, PyArray_VectorUnaryFunc *); +NPY_NO_EXPORT int PyArray_RegisterCanCast \ + (PyArray_Descr *, int, NPY_SCALARKIND); +NPY_NO_EXPORT void PyArray_InitArrFuncs \ + (PyArray_ArrFuncs *); +NPY_NO_EXPORT PyObject * PyArray_IntTupleFromIntp \ + (int, npy_intp const *); +NPY_NO_EXPORT int PyArray_TypeNumFromName \ + (char const *); +NPY_NO_EXPORT int PyArray_ClipmodeConverter \ + (PyObject *, NPY_CLIPMODE *); +NPY_NO_EXPORT int PyArray_OutputConverter \ + (PyObject *, PyArrayObject **); +NPY_NO_EXPORT PyObject * PyArray_BroadcastToShape \ + (PyObject *, npy_intp *, int); +NPY_NO_EXPORT void _PyArray_SigintHandler \ + (int); +NPY_NO_EXPORT void* _PyArray_GetSigintBuf \ + (void); +NPY_NO_EXPORT int PyArray_DescrAlignConverter \ + (PyObject *, PyArray_Descr **); +NPY_NO_EXPORT int PyArray_DescrAlignConverter2 \ + (PyObject *, PyArray_Descr **); +NPY_NO_EXPORT int PyArray_SearchsideConverter \ + (PyObject *, void *); +NPY_NO_EXPORT PyObject * PyArray_CheckAxis \ + (PyArrayObject *, int *, int); +NPY_NO_EXPORT npy_intp PyArray_OverflowMultiplyList \ + (npy_intp const *, int); +NPY_NO_EXPORT int PyArray_CompareString \ + (const char *, const char *, size_t); +NPY_NO_EXPORT PyObject* PyArray_MultiIterFromObjects \ + (PyObject **, int, int, ...); +NPY_NO_EXPORT int PyArray_GetEndianness \ + (void); +NPY_NO_EXPORT unsigned int PyArray_GetNDArrayCFeatureVersion \ + (void); +NPY_NO_EXPORT PyObject * PyArray_Correlate2 \ + (PyObject *, PyObject *, int); +NPY_NO_EXPORT PyObject* PyArray_NeighborhoodIterNew \ + (PyArrayIterObject *, const npy_intp *, int, PyArrayObject*); +extern NPY_NO_EXPORT PyTypeObject PyTimeIntegerArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyDatetimeArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyTimedeltaArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject PyHalfArrType_Type; + +extern NPY_NO_EXPORT PyTypeObject NpyIter_Type; + +NPY_NO_EXPORT void PyArray_SetDatetimeParseFunction \ + (PyObject *NPY_UNUSED(op)); +NPY_NO_EXPORT void PyArray_DatetimeToDatetimeStruct \ + (npy_datetime NPY_UNUSED(val), NPY_DATETIMEUNIT NPY_UNUSED(fr), npy_datetimestruct *); +NPY_NO_EXPORT void PyArray_TimedeltaToTimedeltaStruct \ + (npy_timedelta NPY_UNUSED(val), NPY_DATETIMEUNIT NPY_UNUSED(fr), npy_timedeltastruct *); +NPY_NO_EXPORT npy_datetime PyArray_DatetimeStructToDatetime \ + (NPY_DATETIMEUNIT NPY_UNUSED(fr), npy_datetimestruct *NPY_UNUSED(d)); +NPY_NO_EXPORT npy_datetime PyArray_TimedeltaStructToTimedelta \ + (NPY_DATETIMEUNIT NPY_UNUSED(fr), npy_timedeltastruct *NPY_UNUSED(d)); +NPY_NO_EXPORT NpyIter * NpyIter_New \ + (PyArrayObject *, npy_uint32, NPY_ORDER, NPY_CASTING, PyArray_Descr*); +NPY_NO_EXPORT NpyIter * NpyIter_MultiNew \ + (int, PyArrayObject **, npy_uint32, NPY_ORDER, NPY_CASTING, npy_uint32 *, PyArray_Descr **); +NPY_NO_EXPORT NpyIter * NpyIter_AdvancedNew \ + (int, PyArrayObject **, npy_uint32, NPY_ORDER, NPY_CASTING, npy_uint32 *, PyArray_Descr **, int, int **, npy_intp *, npy_intp); +NPY_NO_EXPORT NpyIter * NpyIter_Copy \ + (NpyIter *); +NPY_NO_EXPORT int NpyIter_Deallocate \ + (NpyIter *); +NPY_NO_EXPORT npy_bool NpyIter_HasDelayedBufAlloc \ + (NpyIter *); +NPY_NO_EXPORT npy_bool NpyIter_HasExternalLoop \ + (NpyIter *); +NPY_NO_EXPORT int NpyIter_EnableExternalLoop \ + (NpyIter *); +NPY_NO_EXPORT npy_intp * NpyIter_GetInnerStrideArray \ + (NpyIter *); +NPY_NO_EXPORT npy_intp * NpyIter_GetInnerLoopSizePtr \ + (NpyIter *); +NPY_NO_EXPORT int NpyIter_Reset \ + (NpyIter *, char **); +NPY_NO_EXPORT int NpyIter_ResetBasePointers \ + (NpyIter *, char **, char **); +NPY_NO_EXPORT int NpyIter_ResetToIterIndexRange \ + (NpyIter *, npy_intp, npy_intp, char **); +NPY_NO_EXPORT int NpyIter_GetNDim \ + (NpyIter *); +NPY_NO_EXPORT int NpyIter_GetNOp \ + (NpyIter *); +NPY_NO_EXPORT NpyIter_IterNextFunc * NpyIter_GetIterNext \ + (NpyIter *, char **); +NPY_NO_EXPORT npy_intp NpyIter_GetIterSize \ + (NpyIter *); +NPY_NO_EXPORT void NpyIter_GetIterIndexRange \ + (NpyIter *, npy_intp *, npy_intp *); +NPY_NO_EXPORT npy_intp NpyIter_GetIterIndex \ + (NpyIter *); +NPY_NO_EXPORT int NpyIter_GotoIterIndex \ + (NpyIter *, npy_intp); +NPY_NO_EXPORT npy_bool NpyIter_HasMultiIndex \ + (NpyIter *); +NPY_NO_EXPORT int NpyIter_GetShape \ + (NpyIter *, npy_intp *); +NPY_NO_EXPORT NpyIter_GetMultiIndexFunc * NpyIter_GetGetMultiIndex \ + (NpyIter *, char **); +NPY_NO_EXPORT int NpyIter_GotoMultiIndex \ + (NpyIter *, npy_intp const *); +NPY_NO_EXPORT int NpyIter_RemoveMultiIndex \ + (NpyIter *); +NPY_NO_EXPORT npy_bool NpyIter_HasIndex \ + (NpyIter *); +NPY_NO_EXPORT npy_bool NpyIter_IsBuffered \ + (NpyIter *); +NPY_NO_EXPORT npy_bool NpyIter_IsGrowInner \ + (NpyIter *); +NPY_NO_EXPORT npy_intp NpyIter_GetBufferSize \ + (NpyIter *); +NPY_NO_EXPORT npy_intp * NpyIter_GetIndexPtr \ + (NpyIter *); +NPY_NO_EXPORT int NpyIter_GotoIndex \ + (NpyIter *, npy_intp); +NPY_NO_EXPORT char ** NpyIter_GetDataPtrArray \ + (NpyIter *); +NPY_NO_EXPORT PyArray_Descr ** NpyIter_GetDescrArray \ + (NpyIter *); +NPY_NO_EXPORT PyArrayObject ** NpyIter_GetOperandArray \ + (NpyIter *); +NPY_NO_EXPORT PyArrayObject * NpyIter_GetIterView \ + (NpyIter *, npy_intp); +NPY_NO_EXPORT void NpyIter_GetReadFlags \ + (NpyIter *, char *); +NPY_NO_EXPORT void NpyIter_GetWriteFlags \ + (NpyIter *, char *); +NPY_NO_EXPORT void NpyIter_DebugPrint \ + (NpyIter *); +NPY_NO_EXPORT npy_bool NpyIter_IterationNeedsAPI \ + (NpyIter *); +NPY_NO_EXPORT void NpyIter_GetInnerFixedStrideArray \ + (NpyIter *, npy_intp *); +NPY_NO_EXPORT int NpyIter_RemoveAxis \ + (NpyIter *, int); +NPY_NO_EXPORT npy_intp * NpyIter_GetAxisStrideArray \ + (NpyIter *, int); +NPY_NO_EXPORT npy_bool NpyIter_RequiresBuffering \ + (NpyIter *); +NPY_NO_EXPORT char ** NpyIter_GetInitialDataPtrArray \ + (NpyIter *); +NPY_NO_EXPORT int NpyIter_CreateCompatibleStrides \ + (NpyIter *, npy_intp, npy_intp *); +NPY_NO_EXPORT int PyArray_CastingConverter \ + (PyObject *, NPY_CASTING *); +NPY_NO_EXPORT npy_intp PyArray_CountNonzero \ + (PyArrayObject *); +NPY_NO_EXPORT PyArray_Descr * PyArray_PromoteTypes \ + (PyArray_Descr *, PyArray_Descr *); +NPY_NO_EXPORT PyArray_Descr * PyArray_MinScalarType \ + (PyArrayObject *); +NPY_NO_EXPORT PyArray_Descr * PyArray_ResultType \ + (npy_intp, PyArrayObject *arrs[], npy_intp, PyArray_Descr *descrs[]); +NPY_NO_EXPORT npy_bool PyArray_CanCastArrayTo \ + (PyArrayObject *, PyArray_Descr *, NPY_CASTING); +NPY_NO_EXPORT npy_bool PyArray_CanCastTypeTo \ + (PyArray_Descr *, PyArray_Descr *, NPY_CASTING); +NPY_NO_EXPORT PyArrayObject * PyArray_EinsteinSum \ + (char *, npy_intp, PyArrayObject **, PyArray_Descr *, NPY_ORDER, NPY_CASTING, PyArrayObject *); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(3) NPY_GCC_NONNULL(1) PyObject * PyArray_NewLikeArray \ + (PyArrayObject *, NPY_ORDER, PyArray_Descr *, int); +NPY_NO_EXPORT int PyArray_GetArrayParamsFromObject \ + (PyObject *NPY_UNUSED(op), PyArray_Descr *NPY_UNUSED(requested_dtype), npy_bool NPY_UNUSED(writeable), PyArray_Descr **NPY_UNUSED(out_dtype), int *NPY_UNUSED(out_ndim), npy_intp *NPY_UNUSED(out_dims), PyArrayObject **NPY_UNUSED(out_arr), PyObject *NPY_UNUSED(context)); +NPY_NO_EXPORT int PyArray_ConvertClipmodeSequence \ + (PyObject *, NPY_CLIPMODE *, int); +NPY_NO_EXPORT PyObject * PyArray_MatrixProduct2 \ + (PyObject *, PyObject *, PyArrayObject*); +NPY_NO_EXPORT npy_bool NpyIter_IsFirstVisit \ + (NpyIter *, int); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(2) int PyArray_SetBaseObject \ + (PyArrayObject *, PyObject *); +NPY_NO_EXPORT void PyArray_CreateSortedStridePerm \ + (int, npy_intp const *, npy_stride_sort_item *); +NPY_NO_EXPORT void PyArray_RemoveAxesInPlace \ + (PyArrayObject *, const npy_bool *); +NPY_NO_EXPORT void PyArray_DebugPrint \ + (PyArrayObject *); +NPY_NO_EXPORT int PyArray_FailUnlessWriteable \ + (PyArrayObject *, const char *); +NPY_NO_EXPORT NPY_STEALS_REF_TO_ARG(2) int PyArray_SetUpdateIfCopyBase \ + (PyArrayObject *, PyArrayObject *); +NPY_NO_EXPORT void * PyDataMem_NEW \ + (size_t); +NPY_NO_EXPORT void PyDataMem_FREE \ + (void *); +NPY_NO_EXPORT void * PyDataMem_RENEW \ + (void *, size_t); +NPY_NO_EXPORT PyDataMem_EventHookFunc * PyDataMem_SetEventHook \ + (PyDataMem_EventHookFunc *, void *, void **); +extern NPY_NO_EXPORT NPY_CASTING NPY_DEFAULT_ASSIGN_CASTING; + +NPY_NO_EXPORT void PyArray_MapIterSwapAxes \ + (PyArrayMapIterObject *, PyArrayObject **, int); +NPY_NO_EXPORT PyObject * PyArray_MapIterArray \ + (PyArrayObject *, PyObject *); +NPY_NO_EXPORT void PyArray_MapIterNext \ + (PyArrayMapIterObject *); +NPY_NO_EXPORT int PyArray_Partition \ + (PyArrayObject *, PyArrayObject *, int, NPY_SELECTKIND); +NPY_NO_EXPORT PyObject * PyArray_ArgPartition \ + (PyArrayObject *, PyArrayObject *, int, NPY_SELECTKIND); +NPY_NO_EXPORT int PyArray_SelectkindConverter \ + (PyObject *, NPY_SELECTKIND *); +NPY_NO_EXPORT void * PyDataMem_NEW_ZEROED \ + (size_t, size_t); +NPY_NO_EXPORT NPY_GCC_NONNULL(1) int PyArray_CheckAnyScalarExact \ + (PyObject *); +NPY_NO_EXPORT PyObject * PyArray_MapIterArrayCopyIfOverlap \ + (PyArrayObject *, PyObject *, int, PyArrayObject *); +NPY_NO_EXPORT int PyArray_ResolveWritebackIfCopy \ + (PyArrayObject *); +NPY_NO_EXPORT int PyArray_SetWritebackIfCopyBase \ + (PyArrayObject *, PyArrayObject *); +NPY_NO_EXPORT PyObject * PyDataMem_SetHandler \ + (PyObject *); +NPY_NO_EXPORT PyObject * PyDataMem_GetHandler \ + (void); +extern NPY_NO_EXPORT PyObject* PyDataMem_DefaultHandler; + + +#else + +#if defined(PY_ARRAY_UNIQUE_SYMBOL) +#define PyArray_API PY_ARRAY_UNIQUE_SYMBOL +#endif + +#if defined(NO_IMPORT) || defined(NO_IMPORT_ARRAY) +extern void **PyArray_API; +#else +#if defined(PY_ARRAY_UNIQUE_SYMBOL) +void **PyArray_API; +#else +static void **PyArray_API=NULL; +#endif +#endif + +#define PyArray_GetNDArrayCVersion \ + (*(unsigned int (*)(void)) \ + PyArray_API[0]) +#define PyBigArray_Type (*(PyTypeObject *)PyArray_API[1]) +#define PyArray_Type (*(PyTypeObject *)PyArray_API[2]) +#define PyArrayDescr_Type (*(PyTypeObject *)PyArray_API[3]) +#define PyArrayFlags_Type (*(PyTypeObject *)PyArray_API[4]) +#define PyArrayIter_Type (*(PyTypeObject *)PyArray_API[5]) +#define PyArrayMultiIter_Type (*(PyTypeObject *)PyArray_API[6]) +#define NPY_NUMUSERTYPES (*(int *)PyArray_API[7]) +#define PyBoolArrType_Type (*(PyTypeObject *)PyArray_API[8]) +#define _PyArrayScalar_BoolValues ((PyBoolScalarObject *)PyArray_API[9]) +#define PyGenericArrType_Type (*(PyTypeObject *)PyArray_API[10]) +#define PyNumberArrType_Type (*(PyTypeObject *)PyArray_API[11]) +#define PyIntegerArrType_Type (*(PyTypeObject *)PyArray_API[12]) +#define PySignedIntegerArrType_Type (*(PyTypeObject *)PyArray_API[13]) +#define PyUnsignedIntegerArrType_Type (*(PyTypeObject *)PyArray_API[14]) +#define PyInexactArrType_Type (*(PyTypeObject *)PyArray_API[15]) +#define PyFloatingArrType_Type (*(PyTypeObject *)PyArray_API[16]) +#define PyComplexFloatingArrType_Type (*(PyTypeObject *)PyArray_API[17]) +#define PyFlexibleArrType_Type (*(PyTypeObject *)PyArray_API[18]) +#define PyCharacterArrType_Type (*(PyTypeObject *)PyArray_API[19]) +#define PyByteArrType_Type (*(PyTypeObject *)PyArray_API[20]) +#define PyShortArrType_Type (*(PyTypeObject *)PyArray_API[21]) +#define PyIntArrType_Type (*(PyTypeObject *)PyArray_API[22]) +#define PyLongArrType_Type (*(PyTypeObject *)PyArray_API[23]) +#define PyLongLongArrType_Type (*(PyTypeObject *)PyArray_API[24]) +#define PyUByteArrType_Type (*(PyTypeObject *)PyArray_API[25]) +#define PyUShortArrType_Type (*(PyTypeObject *)PyArray_API[26]) +#define PyUIntArrType_Type (*(PyTypeObject *)PyArray_API[27]) +#define PyULongArrType_Type (*(PyTypeObject *)PyArray_API[28]) +#define PyULongLongArrType_Type (*(PyTypeObject *)PyArray_API[29]) +#define PyFloatArrType_Type (*(PyTypeObject *)PyArray_API[30]) +#define PyDoubleArrType_Type (*(PyTypeObject *)PyArray_API[31]) +#define PyLongDoubleArrType_Type (*(PyTypeObject *)PyArray_API[32]) +#define PyCFloatArrType_Type (*(PyTypeObject *)PyArray_API[33]) +#define PyCDoubleArrType_Type (*(PyTypeObject *)PyArray_API[34]) +#define PyCLongDoubleArrType_Type (*(PyTypeObject *)PyArray_API[35]) +#define PyObjectArrType_Type (*(PyTypeObject *)PyArray_API[36]) +#define PyStringArrType_Type (*(PyTypeObject *)PyArray_API[37]) +#define PyUnicodeArrType_Type (*(PyTypeObject *)PyArray_API[38]) +#define PyVoidArrType_Type (*(PyTypeObject *)PyArray_API[39]) +#define PyArray_SetNumericOps \ + (*(int (*)(PyObject *)) \ + PyArray_API[40]) +#define PyArray_GetNumericOps \ + (*(PyObject * (*)(void)) \ + PyArray_API[41]) +#define PyArray_INCREF \ + (*(int (*)(PyArrayObject *)) \ + PyArray_API[42]) +#define PyArray_XDECREF \ + (*(int (*)(PyArrayObject *)) \ + PyArray_API[43]) +#define PyArray_SetStringFunction \ + (*(void (*)(PyObject *, int)) \ + PyArray_API[44]) +#define PyArray_DescrFromType \ + (*(PyArray_Descr * (*)(int)) \ + PyArray_API[45]) +#define PyArray_TypeObjectFromType \ + (*(PyObject * (*)(int)) \ + PyArray_API[46]) +#define PyArray_Zero \ + (*(char * (*)(PyArrayObject *)) \ + PyArray_API[47]) +#define PyArray_One \ + (*(char * (*)(PyArrayObject *)) \ + PyArray_API[48]) +#define PyArray_CastToType \ + (*(PyObject * (*)(PyArrayObject *, PyArray_Descr *, int)) \ + PyArray_API[49]) +#define PyArray_CastTo \ + (*(int (*)(PyArrayObject *, PyArrayObject *)) \ + PyArray_API[50]) +#define PyArray_CastAnyTo \ + (*(int (*)(PyArrayObject *, PyArrayObject *)) \ + PyArray_API[51]) +#define PyArray_CanCastSafely \ + (*(int (*)(int, int)) \ + PyArray_API[52]) +#define PyArray_CanCastTo \ + (*(npy_bool (*)(PyArray_Descr *, PyArray_Descr *)) \ + PyArray_API[53]) +#define PyArray_ObjectType \ + (*(int (*)(PyObject *, int)) \ + PyArray_API[54]) +#define PyArray_DescrFromObject \ + (*(PyArray_Descr * (*)(PyObject *, PyArray_Descr *)) \ + PyArray_API[55]) +#define PyArray_ConvertToCommonType \ + (*(PyArrayObject ** (*)(PyObject *, int *)) \ + PyArray_API[56]) +#define PyArray_DescrFromScalar \ + (*(PyArray_Descr * (*)(PyObject *)) \ + PyArray_API[57]) +#define PyArray_DescrFromTypeObject \ + (*(PyArray_Descr * (*)(PyObject *)) \ + PyArray_API[58]) +#define PyArray_Size \ + (*(npy_intp (*)(PyObject *)) \ + PyArray_API[59]) +#define PyArray_Scalar \ + (*(PyObject * (*)(void *, PyArray_Descr *, PyObject *)) \ + PyArray_API[60]) +#define PyArray_FromScalar \ + (*(PyObject * (*)(PyObject *, PyArray_Descr *)) \ + PyArray_API[61]) +#define PyArray_ScalarAsCtype \ + (*(void (*)(PyObject *, void *)) \ + PyArray_API[62]) +#define PyArray_CastScalarToCtype \ + (*(int (*)(PyObject *, void *, PyArray_Descr *)) \ + PyArray_API[63]) +#define PyArray_CastScalarDirect \ + (*(int (*)(PyObject *, PyArray_Descr *, void *, int)) \ + PyArray_API[64]) +#define PyArray_ScalarFromObject \ + (*(PyObject * (*)(PyObject *)) \ + PyArray_API[65]) +#define PyArray_GetCastFunc \ + (*(PyArray_VectorUnaryFunc * (*)(PyArray_Descr *, int)) \ + PyArray_API[66]) +#define PyArray_FromDims \ + (*(PyObject * (*)(int NPY_UNUSED(nd), int *NPY_UNUSED(d), int NPY_UNUSED(type))) \ + PyArray_API[67]) +#define PyArray_FromDimsAndDataAndDescr \ + (*(PyObject * (*)(int NPY_UNUSED(nd), int *NPY_UNUSED(d), PyArray_Descr *, char *NPY_UNUSED(data))) \ + PyArray_API[68]) +#define PyArray_FromAny \ + (*(PyObject * (*)(PyObject *, PyArray_Descr *, int, int, int, PyObject *)) \ + PyArray_API[69]) +#define PyArray_EnsureArray \ + (*(PyObject * (*)(PyObject *)) \ + PyArray_API[70]) +#define PyArray_EnsureAnyArray \ + (*(PyObject * (*)(PyObject *)) \ + PyArray_API[71]) +#define PyArray_FromFile \ + (*(PyObject * (*)(FILE *, PyArray_Descr *, npy_intp, char *)) \ + PyArray_API[72]) +#define PyArray_FromString \ + (*(PyObject * (*)(char *, npy_intp, PyArray_Descr *, npy_intp, char *)) \ + PyArray_API[73]) +#define PyArray_FromBuffer \ + (*(PyObject * (*)(PyObject *, PyArray_Descr *, npy_intp, npy_intp)) \ + PyArray_API[74]) +#define PyArray_FromIter \ + (*(PyObject * (*)(PyObject *, PyArray_Descr *, npy_intp)) \ + PyArray_API[75]) +#define PyArray_Return \ + (*(PyObject * (*)(PyArrayObject *)) \ + PyArray_API[76]) +#define PyArray_GetField \ + (*(PyObject * (*)(PyArrayObject *, PyArray_Descr *, int)) \ + PyArray_API[77]) +#define PyArray_SetField \ + (*(int (*)(PyArrayObject *, PyArray_Descr *, int, PyObject *)) \ + PyArray_API[78]) +#define PyArray_Byteswap \ + (*(PyObject * (*)(PyArrayObject *, npy_bool)) \ + PyArray_API[79]) +#define PyArray_Resize \ + (*(PyObject * (*)(PyArrayObject *, PyArray_Dims *, int, NPY_ORDER NPY_UNUSED(order))) \ + PyArray_API[80]) +#define PyArray_MoveInto \ + (*(int (*)(PyArrayObject *, PyArrayObject *)) \ + PyArray_API[81]) +#define PyArray_CopyInto \ + (*(int (*)(PyArrayObject *, PyArrayObject *)) \ + PyArray_API[82]) +#define PyArray_CopyAnyInto \ + (*(int (*)(PyArrayObject *, PyArrayObject *)) \ + PyArray_API[83]) +#define PyArray_CopyObject \ + (*(int (*)(PyArrayObject *, PyObject *)) \ + PyArray_API[84]) +#define PyArray_NewCopy \ + (*(PyObject * (*)(PyArrayObject *, NPY_ORDER)) \ + PyArray_API[85]) +#define PyArray_ToList \ + (*(PyObject * (*)(PyArrayObject *)) \ + PyArray_API[86]) +#define PyArray_ToString \ + (*(PyObject * (*)(PyArrayObject *, NPY_ORDER)) \ + PyArray_API[87]) +#define PyArray_ToFile \ + (*(int (*)(PyArrayObject *, FILE *, char *, char *)) \ + PyArray_API[88]) +#define PyArray_Dump \ + (*(int (*)(PyObject *, PyObject *, int)) \ + PyArray_API[89]) +#define PyArray_Dumps \ + (*(PyObject * (*)(PyObject *, int)) \ + PyArray_API[90]) +#define PyArray_ValidType \ + (*(int (*)(int)) \ + PyArray_API[91]) +#define PyArray_UpdateFlags \ + (*(void (*)(PyArrayObject *, int)) \ + PyArray_API[92]) +#define PyArray_New \ + (*(PyObject * (*)(PyTypeObject *, int, npy_intp const *, int, npy_intp const *, void *, int, int, PyObject *)) \ + PyArray_API[93]) +#define PyArray_NewFromDescr \ + (*(PyObject * (*)(PyTypeObject *, PyArray_Descr *, int, npy_intp const *, npy_intp const *, void *, int, PyObject *)) \ + PyArray_API[94]) +#define PyArray_DescrNew \ + (*(PyArray_Descr * (*)(PyArray_Descr *)) \ + PyArray_API[95]) +#define PyArray_DescrNewFromType \ + (*(PyArray_Descr * (*)(int)) \ + PyArray_API[96]) +#define PyArray_GetPriority \ + (*(double (*)(PyObject *, double)) \ + PyArray_API[97]) +#define PyArray_IterNew \ + (*(PyObject * (*)(PyObject *)) \ + PyArray_API[98]) +#define PyArray_MultiIterNew \ + (*(PyObject* (*)(int, ...)) \ + PyArray_API[99]) +#define PyArray_PyIntAsInt \ + (*(int (*)(PyObject *)) \ + PyArray_API[100]) +#define PyArray_PyIntAsIntp \ + (*(npy_intp (*)(PyObject *)) \ + PyArray_API[101]) +#define PyArray_Broadcast \ + (*(int (*)(PyArrayMultiIterObject *)) \ + PyArray_API[102]) +#define PyArray_FillObjectArray \ + (*(void (*)(PyArrayObject *, PyObject *)) \ + PyArray_API[103]) +#define PyArray_FillWithScalar \ + (*(int (*)(PyArrayObject *, PyObject *)) \ + PyArray_API[104]) +#define PyArray_CheckStrides \ + (*(npy_bool (*)(int, int, npy_intp, npy_intp, npy_intp const *, npy_intp const *)) \ + PyArray_API[105]) +#define PyArray_DescrNewByteorder \ + (*(PyArray_Descr * (*)(PyArray_Descr *, char)) \ + PyArray_API[106]) +#define PyArray_IterAllButAxis \ + (*(PyObject * (*)(PyObject *, int *)) \ + PyArray_API[107]) +#define PyArray_CheckFromAny \ + (*(PyObject * (*)(PyObject *, PyArray_Descr *, int, int, int, PyObject *)) \ + PyArray_API[108]) +#define PyArray_FromArray \ + (*(PyObject * (*)(PyArrayObject *, PyArray_Descr *, int)) \ + PyArray_API[109]) +#define PyArray_FromInterface \ + (*(PyObject * (*)(PyObject *)) \ + PyArray_API[110]) +#define PyArray_FromStructInterface \ + (*(PyObject * (*)(PyObject *)) \ + PyArray_API[111]) +#define PyArray_FromArrayAttr \ + (*(PyObject * (*)(PyObject *, PyArray_Descr *, PyObject *)) \ + PyArray_API[112]) +#define PyArray_ScalarKind \ + (*(NPY_SCALARKIND (*)(int, PyArrayObject **)) \ + PyArray_API[113]) +#define PyArray_CanCoerceScalar \ + (*(int (*)(int, int, NPY_SCALARKIND)) \ + PyArray_API[114]) +#define PyArray_NewFlagsObject \ + (*(PyObject * (*)(PyObject *)) \ + PyArray_API[115]) +#define PyArray_CanCastScalar \ + (*(npy_bool (*)(PyTypeObject *, PyTypeObject *)) \ + PyArray_API[116]) +#define PyArray_CompareUCS4 \ + (*(int (*)(npy_ucs4 const *, npy_ucs4 const *, size_t)) \ + PyArray_API[117]) +#define PyArray_RemoveSmallest \ + (*(int (*)(PyArrayMultiIterObject *)) \ + PyArray_API[118]) +#define PyArray_ElementStrides \ + (*(int (*)(PyObject *)) \ + PyArray_API[119]) +#define PyArray_Item_INCREF \ + (*(void (*)(char *, PyArray_Descr *)) \ + PyArray_API[120]) +#define PyArray_Item_XDECREF \ + (*(void (*)(char *, PyArray_Descr *)) \ + PyArray_API[121]) +#define PyArray_FieldNames \ + (*(PyObject * (*)(PyObject *)) \ + PyArray_API[122]) +#define PyArray_Transpose \ + (*(PyObject * (*)(PyArrayObject *, PyArray_Dims *)) \ + PyArray_API[123]) +#define PyArray_TakeFrom \ + (*(PyObject * (*)(PyArrayObject *, PyObject *, int, PyArrayObject *, NPY_CLIPMODE)) \ + PyArray_API[124]) +#define PyArray_PutTo \ + (*(PyObject * (*)(PyArrayObject *, PyObject*, PyObject *, NPY_CLIPMODE)) \ + PyArray_API[125]) +#define PyArray_PutMask \ + (*(PyObject * (*)(PyArrayObject *, PyObject*, PyObject*)) \ + PyArray_API[126]) +#define PyArray_Repeat \ + (*(PyObject * (*)(PyArrayObject *, PyObject *, int)) \ + PyArray_API[127]) +#define PyArray_Choose \ + (*(PyObject * (*)(PyArrayObject *, PyObject *, PyArrayObject *, NPY_CLIPMODE)) \ + PyArray_API[128]) +#define PyArray_Sort \ + (*(int (*)(PyArrayObject *, int, NPY_SORTKIND)) \ + PyArray_API[129]) +#define PyArray_ArgSort \ + (*(PyObject * (*)(PyArrayObject *, int, NPY_SORTKIND)) \ + PyArray_API[130]) +#define PyArray_SearchSorted \ + (*(PyObject * (*)(PyArrayObject *, PyObject *, NPY_SEARCHSIDE, PyObject *)) \ + PyArray_API[131]) +#define PyArray_ArgMax \ + (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \ + PyArray_API[132]) +#define PyArray_ArgMin \ + (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \ + PyArray_API[133]) +#define PyArray_Reshape \ + (*(PyObject * (*)(PyArrayObject *, PyObject *)) \ + PyArray_API[134]) +#define PyArray_Newshape \ + (*(PyObject * (*)(PyArrayObject *, PyArray_Dims *, NPY_ORDER)) \ + PyArray_API[135]) +#define PyArray_Squeeze \ + (*(PyObject * (*)(PyArrayObject *)) \ + PyArray_API[136]) +#define PyArray_View \ + (*(PyObject * (*)(PyArrayObject *, PyArray_Descr *, PyTypeObject *)) \ + PyArray_API[137]) +#define PyArray_SwapAxes \ + (*(PyObject * (*)(PyArrayObject *, int, int)) \ + PyArray_API[138]) +#define PyArray_Max \ + (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \ + PyArray_API[139]) +#define PyArray_Min \ + (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \ + PyArray_API[140]) +#define PyArray_Ptp \ + (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \ + PyArray_API[141]) +#define PyArray_Mean \ + (*(PyObject * (*)(PyArrayObject *, int, int, PyArrayObject *)) \ + PyArray_API[142]) +#define PyArray_Trace \ + (*(PyObject * (*)(PyArrayObject *, int, int, int, int, PyArrayObject *)) \ + PyArray_API[143]) +#define PyArray_Diagonal \ + (*(PyObject * (*)(PyArrayObject *, int, int, int)) \ + PyArray_API[144]) +#define PyArray_Clip \ + (*(PyObject * (*)(PyArrayObject *, PyObject *, PyObject *, PyArrayObject *)) \ + PyArray_API[145]) +#define PyArray_Conjugate \ + (*(PyObject * (*)(PyArrayObject *, PyArrayObject *)) \ + PyArray_API[146]) +#define PyArray_Nonzero \ + (*(PyObject * (*)(PyArrayObject *)) \ + PyArray_API[147]) +#define PyArray_Std \ + (*(PyObject * (*)(PyArrayObject *, int, int, PyArrayObject *, int)) \ + PyArray_API[148]) +#define PyArray_Sum \ + (*(PyObject * (*)(PyArrayObject *, int, int, PyArrayObject *)) \ + PyArray_API[149]) +#define PyArray_CumSum \ + (*(PyObject * (*)(PyArrayObject *, int, int, PyArrayObject *)) \ + PyArray_API[150]) +#define PyArray_Prod \ + (*(PyObject * (*)(PyArrayObject *, int, int, PyArrayObject *)) \ + PyArray_API[151]) +#define PyArray_CumProd \ + (*(PyObject * (*)(PyArrayObject *, int, int, PyArrayObject *)) \ + PyArray_API[152]) +#define PyArray_All \ + (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \ + PyArray_API[153]) +#define PyArray_Any \ + (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \ + PyArray_API[154]) +#define PyArray_Compress \ + (*(PyObject * (*)(PyArrayObject *, PyObject *, int, PyArrayObject *)) \ + PyArray_API[155]) +#define PyArray_Flatten \ + (*(PyObject * (*)(PyArrayObject *, NPY_ORDER)) \ + PyArray_API[156]) +#define PyArray_Ravel \ + (*(PyObject * (*)(PyArrayObject *, NPY_ORDER)) \ + PyArray_API[157]) +#define PyArray_MultiplyList \ + (*(npy_intp (*)(npy_intp const *, int)) \ + PyArray_API[158]) +#define PyArray_MultiplyIntList \ + (*(int (*)(int const *, int)) \ + PyArray_API[159]) +#define PyArray_GetPtr \ + (*(void * (*)(PyArrayObject *, npy_intp const*)) \ + PyArray_API[160]) +#define PyArray_CompareLists \ + (*(int (*)(npy_intp const *, npy_intp const *, int)) \ + PyArray_API[161]) +#define PyArray_AsCArray \ + (*(int (*)(PyObject **, void *, npy_intp *, int, PyArray_Descr*)) \ + PyArray_API[162]) +#define PyArray_As1D \ + (*(int (*)(PyObject **NPY_UNUSED(op), char **NPY_UNUSED(ptr), int *NPY_UNUSED(d1), int NPY_UNUSED(typecode))) \ + PyArray_API[163]) +#define PyArray_As2D \ + (*(int (*)(PyObject **NPY_UNUSED(op), char ***NPY_UNUSED(ptr), int *NPY_UNUSED(d1), int *NPY_UNUSED(d2), int NPY_UNUSED(typecode))) \ + PyArray_API[164]) +#define PyArray_Free \ + (*(int (*)(PyObject *, void *)) \ + PyArray_API[165]) +#define PyArray_Converter \ + (*(int (*)(PyObject *, PyObject **)) \ + PyArray_API[166]) +#define PyArray_IntpFromSequence \ + (*(int (*)(PyObject *, npy_intp *, int)) \ + PyArray_API[167]) +#define PyArray_Concatenate \ + (*(PyObject * (*)(PyObject *, int)) \ + PyArray_API[168]) +#define PyArray_InnerProduct \ + (*(PyObject * (*)(PyObject *, PyObject *)) \ + PyArray_API[169]) +#define PyArray_MatrixProduct \ + (*(PyObject * (*)(PyObject *, PyObject *)) \ + PyArray_API[170]) +#define PyArray_CopyAndTranspose \ + (*(PyObject * (*)(PyObject *)) \ + PyArray_API[171]) +#define PyArray_Correlate \ + (*(PyObject * (*)(PyObject *, PyObject *, int)) \ + PyArray_API[172]) +#define PyArray_TypestrConvert \ + (*(int (*)(int, int)) \ + PyArray_API[173]) +#define PyArray_DescrConverter \ + (*(int (*)(PyObject *, PyArray_Descr **)) \ + PyArray_API[174]) +#define PyArray_DescrConverter2 \ + (*(int (*)(PyObject *, PyArray_Descr **)) \ + PyArray_API[175]) +#define PyArray_IntpConverter \ + (*(int (*)(PyObject *, PyArray_Dims *)) \ + PyArray_API[176]) +#define PyArray_BufferConverter \ + (*(int (*)(PyObject *, PyArray_Chunk *)) \ + PyArray_API[177]) +#define PyArray_AxisConverter \ + (*(int (*)(PyObject *, int *)) \ + PyArray_API[178]) +#define PyArray_BoolConverter \ + (*(int (*)(PyObject *, npy_bool *)) \ + PyArray_API[179]) +#define PyArray_ByteorderConverter \ + (*(int (*)(PyObject *, char *)) \ + PyArray_API[180]) +#define PyArray_OrderConverter \ + (*(int (*)(PyObject *, NPY_ORDER *)) \ + PyArray_API[181]) +#define PyArray_EquivTypes \ + (*(unsigned char (*)(PyArray_Descr *, PyArray_Descr *)) \ + PyArray_API[182]) +#define PyArray_Zeros \ + (*(PyObject * (*)(int, npy_intp const *, PyArray_Descr *, int)) \ + PyArray_API[183]) +#define PyArray_Empty \ + (*(PyObject * (*)(int, npy_intp const *, PyArray_Descr *, int)) \ + PyArray_API[184]) +#define PyArray_Where \ + (*(PyObject * (*)(PyObject *, PyObject *, PyObject *)) \ + PyArray_API[185]) +#define PyArray_Arange \ + (*(PyObject * (*)(double, double, double, int)) \ + PyArray_API[186]) +#define PyArray_ArangeObj \ + (*(PyObject * (*)(PyObject *, PyObject *, PyObject *, PyArray_Descr *)) \ + PyArray_API[187]) +#define PyArray_SortkindConverter \ + (*(int (*)(PyObject *, NPY_SORTKIND *)) \ + PyArray_API[188]) +#define PyArray_LexSort \ + (*(PyObject * (*)(PyObject *, int)) \ + PyArray_API[189]) +#define PyArray_Round \ + (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \ + PyArray_API[190]) +#define PyArray_EquivTypenums \ + (*(unsigned char (*)(int, int)) \ + PyArray_API[191]) +#define PyArray_RegisterDataType \ + (*(int (*)(PyArray_Descr *)) \ + PyArray_API[192]) +#define PyArray_RegisterCastFunc \ + (*(int (*)(PyArray_Descr *, int, PyArray_VectorUnaryFunc *)) \ + PyArray_API[193]) +#define PyArray_RegisterCanCast \ + (*(int (*)(PyArray_Descr *, int, NPY_SCALARKIND)) \ + PyArray_API[194]) +#define PyArray_InitArrFuncs \ + (*(void (*)(PyArray_ArrFuncs *)) \ + PyArray_API[195]) +#define PyArray_IntTupleFromIntp \ + (*(PyObject * (*)(int, npy_intp const *)) \ + PyArray_API[196]) +#define PyArray_TypeNumFromName \ + (*(int (*)(char const *)) \ + PyArray_API[197]) +#define PyArray_ClipmodeConverter \ + (*(int (*)(PyObject *, NPY_CLIPMODE *)) \ + PyArray_API[198]) +#define PyArray_OutputConverter \ + (*(int (*)(PyObject *, PyArrayObject **)) \ + PyArray_API[199]) +#define PyArray_BroadcastToShape \ + (*(PyObject * (*)(PyObject *, npy_intp *, int)) \ + PyArray_API[200]) +#define _PyArray_SigintHandler \ + (*(void (*)(int)) \ + PyArray_API[201]) +#define _PyArray_GetSigintBuf \ + (*(void* (*)(void)) \ + PyArray_API[202]) +#define PyArray_DescrAlignConverter \ + (*(int (*)(PyObject *, PyArray_Descr **)) \ + PyArray_API[203]) +#define PyArray_DescrAlignConverter2 \ + (*(int (*)(PyObject *, PyArray_Descr **)) \ + PyArray_API[204]) +#define PyArray_SearchsideConverter \ + (*(int (*)(PyObject *, void *)) \ + PyArray_API[205]) +#define PyArray_CheckAxis \ + (*(PyObject * (*)(PyArrayObject *, int *, int)) \ + PyArray_API[206]) +#define PyArray_OverflowMultiplyList \ + (*(npy_intp (*)(npy_intp const *, int)) \ + PyArray_API[207]) +#define PyArray_CompareString \ + (*(int (*)(const char *, const char *, size_t)) \ + PyArray_API[208]) +#define PyArray_MultiIterFromObjects \ + (*(PyObject* (*)(PyObject **, int, int, ...)) \ + PyArray_API[209]) +#define PyArray_GetEndianness \ + (*(int (*)(void)) \ + PyArray_API[210]) +#define PyArray_GetNDArrayCFeatureVersion \ + (*(unsigned int (*)(void)) \ + PyArray_API[211]) +#define PyArray_Correlate2 \ + (*(PyObject * (*)(PyObject *, PyObject *, int)) \ + PyArray_API[212]) +#define PyArray_NeighborhoodIterNew \ + (*(PyObject* (*)(PyArrayIterObject *, const npy_intp *, int, PyArrayObject*)) \ + PyArray_API[213]) +#define PyTimeIntegerArrType_Type (*(PyTypeObject *)PyArray_API[214]) +#define PyDatetimeArrType_Type (*(PyTypeObject *)PyArray_API[215]) +#define PyTimedeltaArrType_Type (*(PyTypeObject *)PyArray_API[216]) +#define PyHalfArrType_Type (*(PyTypeObject *)PyArray_API[217]) +#define NpyIter_Type (*(PyTypeObject *)PyArray_API[218]) +#define PyArray_SetDatetimeParseFunction \ + (*(void (*)(PyObject *NPY_UNUSED(op))) \ + PyArray_API[219]) +#define PyArray_DatetimeToDatetimeStruct \ + (*(void (*)(npy_datetime NPY_UNUSED(val), NPY_DATETIMEUNIT NPY_UNUSED(fr), npy_datetimestruct *)) \ + PyArray_API[220]) +#define PyArray_TimedeltaToTimedeltaStruct \ + (*(void (*)(npy_timedelta NPY_UNUSED(val), NPY_DATETIMEUNIT NPY_UNUSED(fr), npy_timedeltastruct *)) \ + PyArray_API[221]) +#define PyArray_DatetimeStructToDatetime \ + (*(npy_datetime (*)(NPY_DATETIMEUNIT NPY_UNUSED(fr), npy_datetimestruct *NPY_UNUSED(d))) \ + PyArray_API[222]) +#define PyArray_TimedeltaStructToTimedelta \ + (*(npy_datetime (*)(NPY_DATETIMEUNIT NPY_UNUSED(fr), npy_timedeltastruct *NPY_UNUSED(d))) \ + PyArray_API[223]) +#define NpyIter_New \ + (*(NpyIter * (*)(PyArrayObject *, npy_uint32, NPY_ORDER, NPY_CASTING, PyArray_Descr*)) \ + PyArray_API[224]) +#define NpyIter_MultiNew \ + (*(NpyIter * (*)(int, PyArrayObject **, npy_uint32, NPY_ORDER, NPY_CASTING, npy_uint32 *, PyArray_Descr **)) \ + PyArray_API[225]) +#define NpyIter_AdvancedNew \ + (*(NpyIter * (*)(int, PyArrayObject **, npy_uint32, NPY_ORDER, NPY_CASTING, npy_uint32 *, PyArray_Descr **, int, int **, npy_intp *, npy_intp)) \ + PyArray_API[226]) +#define NpyIter_Copy \ + (*(NpyIter * (*)(NpyIter *)) \ + PyArray_API[227]) +#define NpyIter_Deallocate \ + (*(int (*)(NpyIter *)) \ + PyArray_API[228]) +#define NpyIter_HasDelayedBufAlloc \ + (*(npy_bool (*)(NpyIter *)) \ + PyArray_API[229]) +#define NpyIter_HasExternalLoop \ + (*(npy_bool (*)(NpyIter *)) \ + PyArray_API[230]) +#define NpyIter_EnableExternalLoop \ + (*(int (*)(NpyIter *)) \ + PyArray_API[231]) +#define NpyIter_GetInnerStrideArray \ + (*(npy_intp * (*)(NpyIter *)) \ + PyArray_API[232]) +#define NpyIter_GetInnerLoopSizePtr \ + (*(npy_intp * (*)(NpyIter *)) \ + PyArray_API[233]) +#define NpyIter_Reset \ + (*(int (*)(NpyIter *, char **)) \ + PyArray_API[234]) +#define NpyIter_ResetBasePointers \ + (*(int (*)(NpyIter *, char **, char **)) \ + PyArray_API[235]) +#define NpyIter_ResetToIterIndexRange \ + (*(int (*)(NpyIter *, npy_intp, npy_intp, char **)) \ + PyArray_API[236]) +#define NpyIter_GetNDim \ + (*(int (*)(NpyIter *)) \ + PyArray_API[237]) +#define NpyIter_GetNOp \ + (*(int (*)(NpyIter *)) \ + PyArray_API[238]) +#define NpyIter_GetIterNext \ + (*(NpyIter_IterNextFunc * (*)(NpyIter *, char **)) \ + PyArray_API[239]) +#define NpyIter_GetIterSize \ + (*(npy_intp (*)(NpyIter *)) \ + PyArray_API[240]) +#define NpyIter_GetIterIndexRange \ + (*(void (*)(NpyIter *, npy_intp *, npy_intp *)) \ + PyArray_API[241]) +#define NpyIter_GetIterIndex \ + (*(npy_intp (*)(NpyIter *)) \ + PyArray_API[242]) +#define NpyIter_GotoIterIndex \ + (*(int (*)(NpyIter *, npy_intp)) \ + PyArray_API[243]) +#define NpyIter_HasMultiIndex \ + (*(npy_bool (*)(NpyIter *)) \ + PyArray_API[244]) +#define NpyIter_GetShape \ + (*(int (*)(NpyIter *, npy_intp *)) \ + PyArray_API[245]) +#define NpyIter_GetGetMultiIndex \ + (*(NpyIter_GetMultiIndexFunc * (*)(NpyIter *, char **)) \ + PyArray_API[246]) +#define NpyIter_GotoMultiIndex \ + (*(int (*)(NpyIter *, npy_intp const *)) \ + PyArray_API[247]) +#define NpyIter_RemoveMultiIndex \ + (*(int (*)(NpyIter *)) \ + PyArray_API[248]) +#define NpyIter_HasIndex \ + (*(npy_bool (*)(NpyIter *)) \ + PyArray_API[249]) +#define NpyIter_IsBuffered \ + (*(npy_bool (*)(NpyIter *)) \ + PyArray_API[250]) +#define NpyIter_IsGrowInner \ + (*(npy_bool (*)(NpyIter *)) \ + PyArray_API[251]) +#define NpyIter_GetBufferSize \ + (*(npy_intp (*)(NpyIter *)) \ + PyArray_API[252]) +#define NpyIter_GetIndexPtr \ + (*(npy_intp * (*)(NpyIter *)) \ + PyArray_API[253]) +#define NpyIter_GotoIndex \ + (*(int (*)(NpyIter *, npy_intp)) \ + PyArray_API[254]) +#define NpyIter_GetDataPtrArray \ + (*(char ** (*)(NpyIter *)) \ + PyArray_API[255]) +#define NpyIter_GetDescrArray \ + (*(PyArray_Descr ** (*)(NpyIter *)) \ + PyArray_API[256]) +#define NpyIter_GetOperandArray \ + (*(PyArrayObject ** (*)(NpyIter *)) \ + PyArray_API[257]) +#define NpyIter_GetIterView \ + (*(PyArrayObject * (*)(NpyIter *, npy_intp)) \ + PyArray_API[258]) +#define NpyIter_GetReadFlags \ + (*(void (*)(NpyIter *, char *)) \ + PyArray_API[259]) +#define NpyIter_GetWriteFlags \ + (*(void (*)(NpyIter *, char *)) \ + PyArray_API[260]) +#define NpyIter_DebugPrint \ + (*(void (*)(NpyIter *)) \ + PyArray_API[261]) +#define NpyIter_IterationNeedsAPI \ + (*(npy_bool (*)(NpyIter *)) \ + PyArray_API[262]) +#define NpyIter_GetInnerFixedStrideArray \ + (*(void (*)(NpyIter *, npy_intp *)) \ + PyArray_API[263]) +#define NpyIter_RemoveAxis \ + (*(int (*)(NpyIter *, int)) \ + PyArray_API[264]) +#define NpyIter_GetAxisStrideArray \ + (*(npy_intp * (*)(NpyIter *, int)) \ + PyArray_API[265]) +#define NpyIter_RequiresBuffering \ + (*(npy_bool (*)(NpyIter *)) \ + PyArray_API[266]) +#define NpyIter_GetInitialDataPtrArray \ + (*(char ** (*)(NpyIter *)) \ + PyArray_API[267]) +#define NpyIter_CreateCompatibleStrides \ + (*(int (*)(NpyIter *, npy_intp, npy_intp *)) \ + PyArray_API[268]) +#define PyArray_CastingConverter \ + (*(int (*)(PyObject *, NPY_CASTING *)) \ + PyArray_API[269]) +#define PyArray_CountNonzero \ + (*(npy_intp (*)(PyArrayObject *)) \ + PyArray_API[270]) +#define PyArray_PromoteTypes \ + (*(PyArray_Descr * (*)(PyArray_Descr *, PyArray_Descr *)) \ + PyArray_API[271]) +#define PyArray_MinScalarType \ + (*(PyArray_Descr * (*)(PyArrayObject *)) \ + PyArray_API[272]) +#define PyArray_ResultType \ + (*(PyArray_Descr * (*)(npy_intp, PyArrayObject *arrs[], npy_intp, PyArray_Descr *descrs[])) \ + PyArray_API[273]) +#define PyArray_CanCastArrayTo \ + (*(npy_bool (*)(PyArrayObject *, PyArray_Descr *, NPY_CASTING)) \ + PyArray_API[274]) +#define PyArray_CanCastTypeTo \ + (*(npy_bool (*)(PyArray_Descr *, PyArray_Descr *, NPY_CASTING)) \ + PyArray_API[275]) +#define PyArray_EinsteinSum \ + (*(PyArrayObject * (*)(char *, npy_intp, PyArrayObject **, PyArray_Descr *, NPY_ORDER, NPY_CASTING, PyArrayObject *)) \ + PyArray_API[276]) +#define PyArray_NewLikeArray \ + (*(PyObject * (*)(PyArrayObject *, NPY_ORDER, PyArray_Descr *, int)) \ + PyArray_API[277]) +#define PyArray_GetArrayParamsFromObject \ + (*(int (*)(PyObject *NPY_UNUSED(op), PyArray_Descr *NPY_UNUSED(requested_dtype), npy_bool NPY_UNUSED(writeable), PyArray_Descr **NPY_UNUSED(out_dtype), int *NPY_UNUSED(out_ndim), npy_intp *NPY_UNUSED(out_dims), PyArrayObject **NPY_UNUSED(out_arr), PyObject *NPY_UNUSED(context))) \ + PyArray_API[278]) +#define PyArray_ConvertClipmodeSequence \ + (*(int (*)(PyObject *, NPY_CLIPMODE *, int)) \ + PyArray_API[279]) +#define PyArray_MatrixProduct2 \ + (*(PyObject * (*)(PyObject *, PyObject *, PyArrayObject*)) \ + PyArray_API[280]) +#define NpyIter_IsFirstVisit \ + (*(npy_bool (*)(NpyIter *, int)) \ + PyArray_API[281]) +#define PyArray_SetBaseObject \ + (*(int (*)(PyArrayObject *, PyObject *)) \ + PyArray_API[282]) +#define PyArray_CreateSortedStridePerm \ + (*(void (*)(int, npy_intp const *, npy_stride_sort_item *)) \ + PyArray_API[283]) +#define PyArray_RemoveAxesInPlace \ + (*(void (*)(PyArrayObject *, const npy_bool *)) \ + PyArray_API[284]) +#define PyArray_DebugPrint \ + (*(void (*)(PyArrayObject *)) \ + PyArray_API[285]) +#define PyArray_FailUnlessWriteable \ + (*(int (*)(PyArrayObject *, const char *)) \ + PyArray_API[286]) +#define PyArray_SetUpdateIfCopyBase \ + (*(int (*)(PyArrayObject *, PyArrayObject *)) \ + PyArray_API[287]) +#define PyDataMem_NEW \ + (*(void * (*)(size_t)) \ + PyArray_API[288]) +#define PyDataMem_FREE \ + (*(void (*)(void *)) \ + PyArray_API[289]) +#define PyDataMem_RENEW \ + (*(void * (*)(void *, size_t)) \ + PyArray_API[290]) +#define PyDataMem_SetEventHook \ + (*(PyDataMem_EventHookFunc * (*)(PyDataMem_EventHookFunc *, void *, void **)) \ + PyArray_API[291]) +#define NPY_DEFAULT_ASSIGN_CASTING (*(NPY_CASTING *)PyArray_API[292]) +#define PyArray_MapIterSwapAxes \ + (*(void (*)(PyArrayMapIterObject *, PyArrayObject **, int)) \ + PyArray_API[293]) +#define PyArray_MapIterArray \ + (*(PyObject * (*)(PyArrayObject *, PyObject *)) \ + PyArray_API[294]) +#define PyArray_MapIterNext \ + (*(void (*)(PyArrayMapIterObject *)) \ + PyArray_API[295]) +#define PyArray_Partition \ + (*(int (*)(PyArrayObject *, PyArrayObject *, int, NPY_SELECTKIND)) \ + PyArray_API[296]) +#define PyArray_ArgPartition \ + (*(PyObject * (*)(PyArrayObject *, PyArrayObject *, int, NPY_SELECTKIND)) \ + PyArray_API[297]) +#define PyArray_SelectkindConverter \ + (*(int (*)(PyObject *, NPY_SELECTKIND *)) \ + PyArray_API[298]) +#define PyDataMem_NEW_ZEROED \ + (*(void * (*)(size_t, size_t)) \ + PyArray_API[299]) +#define PyArray_CheckAnyScalarExact \ + (*(int (*)(PyObject *)) \ + PyArray_API[300]) +#define PyArray_MapIterArrayCopyIfOverlap \ + (*(PyObject * (*)(PyArrayObject *, PyObject *, int, PyArrayObject *)) \ + PyArray_API[301]) +#define PyArray_ResolveWritebackIfCopy \ + (*(int (*)(PyArrayObject *)) \ + PyArray_API[302]) +#define PyArray_SetWritebackIfCopyBase \ + (*(int (*)(PyArrayObject *, PyArrayObject *)) \ + PyArray_API[303]) +#define PyDataMem_SetHandler \ + (*(PyObject * (*)(PyObject *)) \ + PyArray_API[304]) +#define PyDataMem_GetHandler \ + (*(PyObject * (*)(void)) \ + PyArray_API[305]) +#define PyDataMem_DefaultHandler (*(PyObject* *)PyArray_API[306]) + +#if !defined(NO_IMPORT_ARRAY) && !defined(NO_IMPORT) +static int +_import_array(void) +{ + int st; + PyObject *numpy = PyImport_ImportModule("numpy.core._multiarray_umath"); + PyObject *c_api = NULL; + + if (numpy == NULL) { + return -1; + } + c_api = PyObject_GetAttrString(numpy, "_ARRAY_API"); + Py_DECREF(numpy); + if (c_api == NULL) { + PyErr_SetString(PyExc_AttributeError, "_ARRAY_API not found"); + return -1; + } + + if (!PyCapsule_CheckExact(c_api)) { + PyErr_SetString(PyExc_RuntimeError, "_ARRAY_API is not PyCapsule object"); + Py_DECREF(c_api); + return -1; + } + PyArray_API = (void **)PyCapsule_GetPointer(c_api, NULL); + Py_DECREF(c_api); + if (PyArray_API == NULL) { + PyErr_SetString(PyExc_RuntimeError, "_ARRAY_API is NULL pointer"); + return -1; + } + + /* Perform runtime check of C API version */ + if (NPY_VERSION != PyArray_GetNDArrayCVersion()) { + PyErr_Format(PyExc_RuntimeError, "module compiled against "\ + "ABI version 0x%x but this version of numpy is 0x%x", \ + (int) NPY_VERSION, (int) PyArray_GetNDArrayCVersion()); + return -1; + } + if (NPY_FEATURE_VERSION > PyArray_GetNDArrayCFeatureVersion()) { + PyErr_Format(PyExc_RuntimeError, "module compiled against "\ + "API version 0x%x but this version of numpy is 0x%x", \ + (int) NPY_FEATURE_VERSION, (int) PyArray_GetNDArrayCFeatureVersion()); + return -1; + } + + /* + * Perform runtime check of endianness and check it matches the one set by + * the headers (npy_endian.h) as a safeguard + */ + st = PyArray_GetEndianness(); + if (st == NPY_CPU_UNKNOWN_ENDIAN) { + PyErr_Format(PyExc_RuntimeError, "FATAL: module compiled as unknown endian"); + return -1; + } +#if NPY_BYTE_ORDER == NPY_BIG_ENDIAN + if (st != NPY_CPU_BIG) { + PyErr_Format(PyExc_RuntimeError, "FATAL: module compiled as "\ + "big endian, but detected different endianness at runtime"); + return -1; + } +#elif NPY_BYTE_ORDER == NPY_LITTLE_ENDIAN + if (st != NPY_CPU_LITTLE) { + PyErr_Format(PyExc_RuntimeError, "FATAL: module compiled as "\ + "little endian, but detected different endianness at runtime"); + return -1; + } +#endif + + return 0; +} + +#define import_array() {if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import"); return NULL; } } + +#define import_array1(ret) {if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import"); return ret; } } + +#define import_array2(msg, ret) {if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, msg); return ret; } } + +#endif + +#endif diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/__ufunc_api.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/__ufunc_api.h new file mode 100644 index 0000000..5b5a9f4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/__ufunc_api.h @@ -0,0 +1,311 @@ + +#ifdef _UMATHMODULE + +extern NPY_NO_EXPORT PyTypeObject PyUFunc_Type; + +extern NPY_NO_EXPORT PyTypeObject PyUFunc_Type; + +NPY_NO_EXPORT PyObject * PyUFunc_FromFuncAndData \ + (PyUFuncGenericFunction *, void **, char *, int, int, int, int, const char *, const char *, int); +NPY_NO_EXPORT int PyUFunc_RegisterLoopForType \ + (PyUFuncObject *, int, PyUFuncGenericFunction, const int *, void *); +NPY_NO_EXPORT int PyUFunc_GenericFunction \ + (PyUFuncObject *NPY_UNUSED(ufunc), PyObject *NPY_UNUSED(args), PyObject *NPY_UNUSED(kwds), PyArrayObject **NPY_UNUSED(op)); +NPY_NO_EXPORT void PyUFunc_f_f_As_d_d \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_d_d \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_f_f \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_g_g \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_F_F_As_D_D \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_F_F \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_D_D \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_G_G \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_O_O \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_ff_f_As_dd_d \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_ff_f \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_dd_d \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_gg_g \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_FF_F_As_DD_D \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_DD_D \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_FF_F \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_GG_G \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_OO_O \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_O_O_method \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_OO_O_method \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_On_Om \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT int PyUFunc_GetPyValues \ + (char *, int *, int *, PyObject **); +NPY_NO_EXPORT int PyUFunc_checkfperr \ + (int, PyObject *, int *); +NPY_NO_EXPORT void PyUFunc_clearfperr \ + (void); +NPY_NO_EXPORT int PyUFunc_getfperr \ + (void); +NPY_NO_EXPORT int PyUFunc_handlefperr \ + (int, PyObject *, int, int *); +NPY_NO_EXPORT int PyUFunc_ReplaceLoopBySignature \ + (PyUFuncObject *, PyUFuncGenericFunction, const int *, PyUFuncGenericFunction *); +NPY_NO_EXPORT PyObject * PyUFunc_FromFuncAndDataAndSignature \ + (PyUFuncGenericFunction *, void **, char *, int, int, int, int, const char *, const char *, int, const char *); +NPY_NO_EXPORT int PyUFunc_SetUsesArraysAsData \ + (void **NPY_UNUSED(data), size_t NPY_UNUSED(i)); +NPY_NO_EXPORT void PyUFunc_e_e \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_e_e_As_f_f \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_e_e_As_d_d \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_ee_e \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_ee_e_As_ff_f \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT void PyUFunc_ee_e_As_dd_d \ + (char **, npy_intp const *, npy_intp const *, void *); +NPY_NO_EXPORT int PyUFunc_DefaultTypeResolver \ + (PyUFuncObject *, NPY_CASTING, PyArrayObject **, PyObject *, PyArray_Descr **); +NPY_NO_EXPORT int PyUFunc_ValidateCasting \ + (PyUFuncObject *, NPY_CASTING, PyArrayObject **, PyArray_Descr **); +NPY_NO_EXPORT int PyUFunc_RegisterLoopForDescr \ + (PyUFuncObject *, PyArray_Descr *, PyUFuncGenericFunction, PyArray_Descr **, void *); +NPY_NO_EXPORT PyObject * PyUFunc_FromFuncAndDataAndSignatureAndIdentity \ + (PyUFuncGenericFunction *, void **, char *, int, int, int, int, const char *, const char *, const int, const char *, PyObject *); + +#else + +#if defined(PY_UFUNC_UNIQUE_SYMBOL) +#define PyUFunc_API PY_UFUNC_UNIQUE_SYMBOL +#endif + +#if defined(NO_IMPORT) || defined(NO_IMPORT_UFUNC) +extern void **PyUFunc_API; +#else +#if defined(PY_UFUNC_UNIQUE_SYMBOL) +void **PyUFunc_API; +#else +static void **PyUFunc_API=NULL; +#endif +#endif + +#define PyUFunc_Type (*(PyTypeObject *)PyUFunc_API[0]) +#define PyUFunc_FromFuncAndData \ + (*(PyObject * (*)(PyUFuncGenericFunction *, void **, char *, int, int, int, int, const char *, const char *, int)) \ + PyUFunc_API[1]) +#define PyUFunc_RegisterLoopForType \ + (*(int (*)(PyUFuncObject *, int, PyUFuncGenericFunction, const int *, void *)) \ + PyUFunc_API[2]) +#define PyUFunc_GenericFunction \ + (*(int (*)(PyUFuncObject *NPY_UNUSED(ufunc), PyObject *NPY_UNUSED(args), PyObject *NPY_UNUSED(kwds), PyArrayObject **NPY_UNUSED(op))) \ + PyUFunc_API[3]) +#define PyUFunc_f_f_As_d_d \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[4]) +#define PyUFunc_d_d \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[5]) +#define PyUFunc_f_f \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[6]) +#define PyUFunc_g_g \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[7]) +#define PyUFunc_F_F_As_D_D \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[8]) +#define PyUFunc_F_F \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[9]) +#define PyUFunc_D_D \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[10]) +#define PyUFunc_G_G \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[11]) +#define PyUFunc_O_O \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[12]) +#define PyUFunc_ff_f_As_dd_d \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[13]) +#define PyUFunc_ff_f \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[14]) +#define PyUFunc_dd_d \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[15]) +#define PyUFunc_gg_g \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[16]) +#define PyUFunc_FF_F_As_DD_D \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[17]) +#define PyUFunc_DD_D \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[18]) +#define PyUFunc_FF_F \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[19]) +#define PyUFunc_GG_G \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[20]) +#define PyUFunc_OO_O \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[21]) +#define PyUFunc_O_O_method \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[22]) +#define PyUFunc_OO_O_method \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[23]) +#define PyUFunc_On_Om \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[24]) +#define PyUFunc_GetPyValues \ + (*(int (*)(char *, int *, int *, PyObject **)) \ + PyUFunc_API[25]) +#define PyUFunc_checkfperr \ + (*(int (*)(int, PyObject *, int *)) \ + PyUFunc_API[26]) +#define PyUFunc_clearfperr \ + (*(void (*)(void)) \ + PyUFunc_API[27]) +#define PyUFunc_getfperr \ + (*(int (*)(void)) \ + PyUFunc_API[28]) +#define PyUFunc_handlefperr \ + (*(int (*)(int, PyObject *, int, int *)) \ + PyUFunc_API[29]) +#define PyUFunc_ReplaceLoopBySignature \ + (*(int (*)(PyUFuncObject *, PyUFuncGenericFunction, const int *, PyUFuncGenericFunction *)) \ + PyUFunc_API[30]) +#define PyUFunc_FromFuncAndDataAndSignature \ + (*(PyObject * (*)(PyUFuncGenericFunction *, void **, char *, int, int, int, int, const char *, const char *, int, const char *)) \ + PyUFunc_API[31]) +#define PyUFunc_SetUsesArraysAsData \ + (*(int (*)(void **NPY_UNUSED(data), size_t NPY_UNUSED(i))) \ + PyUFunc_API[32]) +#define PyUFunc_e_e \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[33]) +#define PyUFunc_e_e_As_f_f \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[34]) +#define PyUFunc_e_e_As_d_d \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[35]) +#define PyUFunc_ee_e \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[36]) +#define PyUFunc_ee_e_As_ff_f \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[37]) +#define PyUFunc_ee_e_As_dd_d \ + (*(void (*)(char **, npy_intp const *, npy_intp const *, void *)) \ + PyUFunc_API[38]) +#define PyUFunc_DefaultTypeResolver \ + (*(int (*)(PyUFuncObject *, NPY_CASTING, PyArrayObject **, PyObject *, PyArray_Descr **)) \ + PyUFunc_API[39]) +#define PyUFunc_ValidateCasting \ + (*(int (*)(PyUFuncObject *, NPY_CASTING, PyArrayObject **, PyArray_Descr **)) \ + PyUFunc_API[40]) +#define PyUFunc_RegisterLoopForDescr \ + (*(int (*)(PyUFuncObject *, PyArray_Descr *, PyUFuncGenericFunction, PyArray_Descr **, void *)) \ + PyUFunc_API[41]) +#define PyUFunc_FromFuncAndDataAndSignatureAndIdentity \ + (*(PyObject * (*)(PyUFuncGenericFunction *, void **, char *, int, int, int, int, const char *, const char *, const int, const char *, PyObject *)) \ + PyUFunc_API[42]) + +static NPY_INLINE int +_import_umath(void) +{ + PyObject *numpy = PyImport_ImportModule("numpy.core._multiarray_umath"); + PyObject *c_api = NULL; + + if (numpy == NULL) { + PyErr_SetString(PyExc_ImportError, + "numpy.core._multiarray_umath failed to import"); + return -1; + } + c_api = PyObject_GetAttrString(numpy, "_UFUNC_API"); + Py_DECREF(numpy); + if (c_api == NULL) { + PyErr_SetString(PyExc_AttributeError, "_UFUNC_API not found"); + return -1; + } + + if (!PyCapsule_CheckExact(c_api)) { + PyErr_SetString(PyExc_RuntimeError, "_UFUNC_API is not PyCapsule object"); + Py_DECREF(c_api); + return -1; + } + PyUFunc_API = (void **)PyCapsule_GetPointer(c_api, NULL); + Py_DECREF(c_api); + if (PyUFunc_API == NULL) { + PyErr_SetString(PyExc_RuntimeError, "_UFUNC_API is NULL pointer"); + return -1; + } + return 0; +} + +#define import_umath() \ + do {\ + UFUNC_NOFPE\ + if (_import_umath() < 0) {\ + PyErr_Print();\ + PyErr_SetString(PyExc_ImportError,\ + "numpy.core.umath failed to import");\ + return NULL;\ + }\ + } while(0) + +#define import_umath1(ret) \ + do {\ + UFUNC_NOFPE\ + if (_import_umath() < 0) {\ + PyErr_Print();\ + PyErr_SetString(PyExc_ImportError,\ + "numpy.core.umath failed to import");\ + return ret;\ + }\ + } while(0) + +#define import_umath2(ret, msg) \ + do {\ + UFUNC_NOFPE\ + if (_import_umath() < 0) {\ + PyErr_Print();\ + PyErr_SetString(PyExc_ImportError, msg);\ + return ret;\ + }\ + } while(0) + +#define import_ufunc() \ + do {\ + UFUNC_NOFPE\ + if (_import_umath() < 0) {\ + PyErr_Print();\ + PyErr_SetString(PyExc_ImportError,\ + "numpy.core.umath failed to import");\ + }\ + } while(0) + +#endif diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/_neighborhood_iterator_imp.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/_neighborhood_iterator_imp.h new file mode 100644 index 0000000..07e2363 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/_neighborhood_iterator_imp.h @@ -0,0 +1,90 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY__NEIGHBORHOOD_IMP_H_ +#error You should not include this header directly +#endif +/* + * Private API (here for inline) + */ +static NPY_INLINE int +_PyArrayNeighborhoodIter_IncrCoord(PyArrayNeighborhoodIterObject* iter); + +/* + * Update to next item of the iterator + * + * Note: this simply increment the coordinates vector, last dimension + * incremented first , i.e, for dimension 3 + * ... + * -1, -1, -1 + * -1, -1, 0 + * -1, -1, 1 + * .... + * -1, 0, -1 + * -1, 0, 0 + * .... + * 0, -1, -1 + * 0, -1, 0 + * .... + */ +#define _UPDATE_COORD_ITER(c) \ + wb = iter->coordinates[c] < iter->bounds[c][1]; \ + if (wb) { \ + iter->coordinates[c] += 1; \ + return 0; \ + } \ + else { \ + iter->coordinates[c] = iter->bounds[c][0]; \ + } + +static NPY_INLINE int +_PyArrayNeighborhoodIter_IncrCoord(PyArrayNeighborhoodIterObject* iter) +{ + npy_intp i, wb; + + for (i = iter->nd - 1; i >= 0; --i) { + _UPDATE_COORD_ITER(i) + } + + return 0; +} + +/* + * Version optimized for 2d arrays, manual loop unrolling + */ +static NPY_INLINE int +_PyArrayNeighborhoodIter_IncrCoord2D(PyArrayNeighborhoodIterObject* iter) +{ + npy_intp wb; + + _UPDATE_COORD_ITER(1) + _UPDATE_COORD_ITER(0) + + return 0; +} +#undef _UPDATE_COORD_ITER + +/* + * Advance to the next neighbour + */ +static NPY_INLINE int +PyArrayNeighborhoodIter_Next(PyArrayNeighborhoodIterObject* iter) +{ + _PyArrayNeighborhoodIter_IncrCoord (iter); + iter->dataptr = iter->translate((PyArrayIterObject*)iter, iter->coordinates); + + return 0; +} + +/* + * Reset functions + */ +static NPY_INLINE int +PyArrayNeighborhoodIter_Reset(PyArrayNeighborhoodIterObject* iter) +{ + npy_intp i; + + for (i = 0; i < iter->nd; ++i) { + iter->coordinates[i] = iter->bounds[i][0]; + } + iter->dataptr = iter->translate((PyArrayIterObject*)iter, iter->coordinates); + + return 0; +} diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/_numpyconfig.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/_numpyconfig.h new file mode 100644 index 0000000..a94934a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/_numpyconfig.h @@ -0,0 +1,32 @@ +#define NPY_HAVE_ENDIAN_H 1 +#define NPY_SIZEOF_SHORT SIZEOF_SHORT +#define NPY_SIZEOF_INT SIZEOF_INT +#define NPY_SIZEOF_LONG SIZEOF_LONG +#define NPY_SIZEOF_FLOAT 4 +#define NPY_SIZEOF_COMPLEX_FLOAT 8 +#define NPY_SIZEOF_DOUBLE 8 +#define NPY_SIZEOF_COMPLEX_DOUBLE 16 +#define NPY_SIZEOF_LONGDOUBLE 16 +#define NPY_SIZEOF_COMPLEX_LONGDOUBLE 32 +#define NPY_SIZEOF_PY_INTPTR_T 8 +#define NPY_SIZEOF_OFF_T 8 +#define NPY_SIZEOF_PY_LONG_LONG 8 +#define NPY_SIZEOF_LONGLONG 8 +#define NPY_NO_SMP 0 +#define NPY_HAVE_DECL_ISNAN +#define NPY_HAVE_DECL_ISINF +#define NPY_HAVE_DECL_ISFINITE +#define NPY_HAVE_DECL_SIGNBIT +#define NPY_USE_C99_COMPLEX 1 +#define NPY_HAVE_COMPLEX_DOUBLE 1 +#define NPY_HAVE_COMPLEX_FLOAT 1 +#define NPY_HAVE_COMPLEX_LONG_DOUBLE 1 +#define NPY_RELAXED_STRIDES_CHECKING 1 +#define NPY_USE_C99_FORMATS 1 +#define NPY_VISIBILITY_HIDDEN __attribute__((visibility("hidden"))) +#define NPY_ABI_VERSION 0x01000009 +#define NPY_API_VERSION 0x0000000F + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS 1 +#endif diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/arrayobject.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/arrayobject.h new file mode 100644 index 0000000..da47bb0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/arrayobject.h @@ -0,0 +1,12 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_ARRAYOBJECT_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_ARRAYOBJECT_H_ +#define Py_ARRAYOBJECT_H + +#include "ndarrayobject.h" +#include "npy_interrupt.h" + +#ifdef NPY_NO_PREFIX +#include "noprefix.h" +#endif + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_ARRAYOBJECT_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/arrayscalars.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/arrayscalars.h new file mode 100644 index 0000000..a20a680 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/arrayscalars.h @@ -0,0 +1,182 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_ARRAYSCALARS_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_ARRAYSCALARS_H_ + +#ifndef _MULTIARRAYMODULE +typedef struct { + PyObject_HEAD + npy_bool obval; +} PyBoolScalarObject; +#endif + + +typedef struct { + PyObject_HEAD + signed char obval; +} PyByteScalarObject; + + +typedef struct { + PyObject_HEAD + short obval; +} PyShortScalarObject; + + +typedef struct { + PyObject_HEAD + int obval; +} PyIntScalarObject; + + +typedef struct { + PyObject_HEAD + long obval; +} PyLongScalarObject; + + +typedef struct { + PyObject_HEAD + npy_longlong obval; +} PyLongLongScalarObject; + + +typedef struct { + PyObject_HEAD + unsigned char obval; +} PyUByteScalarObject; + + +typedef struct { + PyObject_HEAD + unsigned short obval; +} PyUShortScalarObject; + + +typedef struct { + PyObject_HEAD + unsigned int obval; +} PyUIntScalarObject; + + +typedef struct { + PyObject_HEAD + unsigned long obval; +} PyULongScalarObject; + + +typedef struct { + PyObject_HEAD + npy_ulonglong obval; +} PyULongLongScalarObject; + + +typedef struct { + PyObject_HEAD + npy_half obval; +} PyHalfScalarObject; + + +typedef struct { + PyObject_HEAD + float obval; +} PyFloatScalarObject; + + +typedef struct { + PyObject_HEAD + double obval; +} PyDoubleScalarObject; + + +typedef struct { + PyObject_HEAD + npy_longdouble obval; +} PyLongDoubleScalarObject; + + +typedef struct { + PyObject_HEAD + npy_cfloat obval; +} PyCFloatScalarObject; + + +typedef struct { + PyObject_HEAD + npy_cdouble obval; +} PyCDoubleScalarObject; + + +typedef struct { + PyObject_HEAD + npy_clongdouble obval; +} PyCLongDoubleScalarObject; + + +typedef struct { + PyObject_HEAD + PyObject * obval; +} PyObjectScalarObject; + +typedef struct { + PyObject_HEAD + npy_datetime obval; + PyArray_DatetimeMetaData obmeta; +} PyDatetimeScalarObject; + +typedef struct { + PyObject_HEAD + npy_timedelta obval; + PyArray_DatetimeMetaData obmeta; +} PyTimedeltaScalarObject; + + +typedef struct { + PyObject_HEAD + char obval; +} PyScalarObject; + +#define PyStringScalarObject PyBytesObject +typedef struct { + /* note that the PyObject_HEAD macro lives right here */ + PyUnicodeObject base; + Py_UCS4 *obval; + char *buffer_fmt; +} PyUnicodeScalarObject; + + +typedef struct { + PyObject_VAR_HEAD + char *obval; + PyArray_Descr *descr; + int flags; + PyObject *base; + void *_buffer_info; /* private buffer info, tagged to allow warning */ +} PyVoidScalarObject; + +/* Macros + PyScalarObject + PyArrType_Type + are defined in ndarrayobject.h +*/ + +#define PyArrayScalar_False ((PyObject *)(&(_PyArrayScalar_BoolValues[0]))) +#define PyArrayScalar_True ((PyObject *)(&(_PyArrayScalar_BoolValues[1]))) +#define PyArrayScalar_FromLong(i) \ + ((PyObject *)(&(_PyArrayScalar_BoolValues[((i)!=0)]))) +#define PyArrayScalar_RETURN_BOOL_FROM_LONG(i) \ + return Py_INCREF(PyArrayScalar_FromLong(i)), \ + PyArrayScalar_FromLong(i) +#define PyArrayScalar_RETURN_FALSE \ + return Py_INCREF(PyArrayScalar_False), \ + PyArrayScalar_False +#define PyArrayScalar_RETURN_TRUE \ + return Py_INCREF(PyArrayScalar_True), \ + PyArrayScalar_True + +#define PyArrayScalar_New(cls) \ + Py##cls##ArrType_Type.tp_alloc(&Py##cls##ArrType_Type, 0) +#define PyArrayScalar_VAL(obj, cls) \ + ((Py##cls##ScalarObject *)obj)->obval +#define PyArrayScalar_ASSIGN(obj, cls, val) \ + PyArrayScalar_VAL(obj, cls) = val + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_ARRAYSCALARS_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/experimental_dtype_api.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/experimental_dtype_api.h new file mode 100644 index 0000000..effa66b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/experimental_dtype_api.h @@ -0,0 +1,386 @@ +/* + * This header exports the new experimental DType API as proposed in + * NEPs 41 to 43. For background, please check these NEPs. Otherwise, + * this header also serves as documentation for the time being. + * + * Please do not hesitate to contact @seberg with questions. This is + * developed together with https://github.com/seberg/experimental_user_dtypes + * and those interested in experimenting are encouraged to contribute there. + * + * To use the functions defined in the header, call:: + * + * if (import_experimental_dtype_api(version) < 0) { + * return NULL; + * } + * + * in your module init. (A version mismatch will be reported, just update + * to the correct one, this will alert you of possible changes.) + * + * The following lists the main symbols currently exported. Please do not + * hesitate to ask for help or clarification: + * + * - PyUFunc_AddLoopFromSpec: + * + * Register a new loop for a ufunc. This uses the `PyArrayMethod_Spec` + * which must be filled in (see in-line comments). + * + * - PyUFunc_AddPromoter: + * + * Register a new promoter for a ufunc. A promoter is a function stored + * in a PyCapsule (see in-line comments). It is passed the operation and + * requested DType signatures and can mutate it to attempt a new search + * for a matching loop/promoter. + * I.e. for Numba a promoter could even add the desired loop. + * + * - PyArrayInitDTypeMeta_FromSpec: + * + * Initialize a new DType. It must currently be a static Python C type + * that is declared as `PyArray_DTypeMeta` and not `PyTypeObject`. + * Further, it must subclass `np.dtype` and set its type to + * `PyArrayDTypeMeta_Type` (before calling `PyType_Read()`). + * + * - PyArray_CommonDType: + * + * Find the common-dtype ("promotion") for two DType classes. Similar + * to `np.result_type`, but works on the classes and not instances. + * + * - PyArray_PromoteDTypeSequence: + * + * Same as CommonDType, but works with an arbitrary number of DTypes. + * This function is smarter and can often return successful and unambiguous + * results when `common_dtype(common_dtype(dt1, dt2), dt3)` would + * depend on the operation order or fail. Nevertheless, DTypes should + * aim to ensure that their common-dtype implementation is associative + * and commutative! (Mainly, unsigned and signed integers are not.) + * + * For guaranteed consistent results DTypes must implement common-Dtype + * "transitively". If A promotes B and B promotes C, than A must generally + * also promote C; where "promotes" means implements the promotion. + * (There are some exceptions for abstract DTypes) + * + * WARNING + * ======= + * + * By using this header, you understand that this is a fully experimental + * exposure. Details are expected to change, and some options may have no + * effect. (Please contact @seberg if you have questions!) + * If the exposure stops working, please file a bug report with NumPy. + * Further, a DType created using this API/header should still be expected + * to be incompatible with some functionality inside and outside of NumPy. + * In this case crashes must be expected. Please report any such problems + * so that they can be fixed before final exposure. + * Furthermore, expect missing checks for programming errors which the final + * API is expected to have. + * + * Symbols with a leading underscore are likely to not be included in the + * first public version, if these are central to your use-case, please let + * us know, so that we can reconsider. + * + * "Array-like" consumer API not yet under considerations + * ====================================================== + * + * The new DType API is designed in a way to make it potentially useful for + * alternative "array-like" implementations. This will require careful + * exposure of details and functions and is not part of this experimental API. + */ + +#ifndef NUMPY_CORE_INCLUDE_NUMPY_EXPERIMENTAL_DTYPE_API_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_EXPERIMENTAL_DTYPE_API_H_ + +#include +#include "ndarraytypes.h" + + +/* + * Just a hack so I don't forget importing as much myself, I spend way too + * much time noticing it the first time around :). + */ +static void +__not_imported(void) +{ + printf("*****\nCritical error, dtype API not imported\n*****\n"); +} +static void *__uninitialized_table[] = { + &__not_imported, &__not_imported, &__not_imported, &__not_imported, + &__not_imported, &__not_imported, &__not_imported, &__not_imported}; + + +static void **__experimental_dtype_api_table = __uninitialized_table; + + +/* + * DTypeMeta struct, the content may be made fully opaque (except the size). + * We may also move everything into a single `void *dt_slots`. + */ +typedef struct { + PyHeapTypeObject super; + PyArray_Descr *singleton; + int type_num; + PyTypeObject *scalar_type; + npy_uint64 flags; + void *dt_slots; + void *reserved[3]; +} PyArray_DTypeMeta; + + +/* + * ****************************************************** + * ArrayMethod API (Casting and UFuncs) + * ****************************************************** + */ +/* + * NOTE: Expected changes: + * * invert logic of floating point error flag + * * probably split runtime and general flags into two + * * should possibly not use an enum for typdef for more stable ABI? + */ +typedef enum { + /* Flag for whether the GIL is required */ + NPY_METH_REQUIRES_PYAPI = 1 << 1, + /* + * Some functions cannot set floating point error flags, this flag + * gives us the option (not requirement) to skip floating point error + * setup/check. No function should set error flags and ignore them + * since it would interfere with chaining operations (e.g. casting). + */ + NPY_METH_NO_FLOATINGPOINT_ERRORS = 1 << 2, + /* Whether the method supports unaligned access (not runtime) */ + NPY_METH_SUPPORTS_UNALIGNED = 1 << 3, + + /* All flags which can change at runtime */ + NPY_METH_RUNTIME_FLAGS = ( + NPY_METH_REQUIRES_PYAPI | + NPY_METH_NO_FLOATINGPOINT_ERRORS), +} NPY_ARRAYMETHOD_FLAGS; + + +/* + * The main object for creating a new ArrayMethod. We use the typical `slots` + * mechanism used by the Python limited API (see below for the slot defs). + */ +typedef struct { + const char *name; + int nin, nout; + NPY_CASTING casting; + NPY_ARRAYMETHOD_FLAGS flags; + PyObject **dtypes; /* array of DType class objects */ + PyType_Slot *slots; +} PyArrayMethod_Spec; + + +typedef PyObject *_ufunc_addloop_fromspec_func( + PyObject *ufunc, PyArrayMethod_Spec *spec); +/* + * The main ufunc registration function. This adds a new implementation/loop + * to a ufunc. It replaces `PyUFunc_RegisterLoopForType`. + */ +#define PyUFunc_AddLoopFromSpec \ + (*(_ufunc_addloop_fromspec_func *)(__experimental_dtype_api_table[0])) + + +/* + * Type of the C promoter function, which must be wrapped into a + * PyCapsule with name "numpy._ufunc_promoter". + * + * Note that currently the output dtypes are always NULL unless they are + * also part of the signature. This is an implementation detail and could + * change in the future. However, in general promoters should not have a + * need for output dtypes. + * (There are potential use-cases, these are currently unsupported.) + */ +typedef int promoter_function(PyObject *ufunc, + PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], + PyArray_DTypeMeta *new_op_dtypes[]); + +/* + * Function to register a promoter. + * + * @param ufunc The ufunc object to register the promoter with. + * @param DType_tuple A Python tuple containing DTypes or None matching the + * number of inputs and outputs of the ufunc. + * @param promoter A PyCapsule with name "numpy._ufunc_promoter" containing + * a pointer to a `promoter_function`. + */ +typedef int _ufunc_addpromoter_func( + PyObject *ufunc, PyObject *DType_tuple, PyObject *promoter); +#define PyUFunc_AddPromoter \ + (*(_ufunc_addpromoter_func *)(__experimental_dtype_api_table[1])) + +/* + * In addition to the normal casting levels, NPY_CAST_IS_VIEW indicates + * that no cast operation is necessary at all (although a copy usually will be) + * + * NOTE: The most likely modification here is to add an additional + * `view_offset` output to resolve_descriptors. If set, it would + * indicate both that it is a view and what offset to use. This means that + * e.g. `arr.imag` could be implemented by an ArrayMethod. + */ +#define NPY_CAST_IS_VIEW _NPY_CAST_IS_VIEW + +/* + * The resolve descriptors function, must be able to handle NULL values for + * all output (but not input) `given_descrs` and fill `loop_descrs`. + * Return -1 on error or 0 if the operation is not possible without an error + * set. (This may still be in flux.) + * Otherwise must return the "casting safety", for normal functions, this is + * almost always "safe" (or even "equivalent"?). + * + * `resolve_descriptors` is optional if all output DTypes are non-parametric. + */ +#define NPY_METH_resolve_descriptors 1 +typedef NPY_CASTING (resolve_descriptors_function)( + /* "method" is currently opaque (necessary e.g. to wrap Python) */ + PyObject *method, + /* DTypes the method was created for */ + PyObject **dtypes, + /* Input descriptors (instances). Outputs may be NULL. */ + PyArray_Descr **given_descrs, + /* Exact loop descriptors to use, must not hold references on error */ + PyArray_Descr **loop_descrs); + +/* NOT public yet: Signature needs adapting as external API. */ +#define _NPY_METH_get_loop 2 + +/* + * Current public API to define fast inner-loops. You must provide a + * strided loop. If this is a cast between two "versions" of the same dtype + * you must also provide an unaligned strided loop. + * Other loops are useful to optimize the very common contiguous case. + * + * NOTE: As of now, NumPy will NOT use unaligned loops in ufuncs! + */ +#define NPY_METH_strided_loop 3 +#define NPY_METH_contiguous_loop 4 +#define NPY_METH_unaligned_strided_loop 5 +#define NPY_METH_unaligned_contiguous_loop 6 + + +typedef struct { + PyObject *caller; /* E.g. the original ufunc, may be NULL */ + PyObject *method; /* The method "self". Currently an opaque object */ + + /* Operand descriptors, filled in by resolve_descriptors */ + PyArray_Descr **descriptors; + /* Structure may grow (this is harmless for DType authors) */ +} PyArrayMethod_Context; + +typedef int (PyArrayMethod_StridedLoop)(PyArrayMethod_Context *context, + char *const *data, const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *transferdata); + + + +/* + * **************************** + * DTYPE API + * **************************** + */ + +#define NPY_DT_ABSTRACT 1 << 1 +#define NPY_DT_PARAMETRIC 1 << 2 + +#define NPY_DT_discover_descr_from_pyobject 1 +#define _NPY_DT_is_known_scalar_type 2 +#define NPY_DT_default_descr 3 +#define NPY_DT_common_dtype 4 +#define NPY_DT_common_instance 5 +#define NPY_DT_setitem 6 +#define NPY_DT_getitem 7 + + +// TODO: These slots probably still need some thought, and/or a way to "grow"? +typedef struct{ + PyTypeObject *typeobj; /* type of python scalar or NULL */ + int flags; /* flags, including parametric and abstract */ + /* NULL terminated cast definitions. Use NULL for the newly created DType */ + PyArrayMethod_Spec **casts; + PyType_Slot *slots; + /* Baseclass or NULL (will always subclass `np.dtype`) */ + PyTypeObject *baseclass; +} PyArrayDTypeMeta_Spec; + + +#define PyArrayDTypeMeta_Type \ + (*(PyTypeObject *)__experimental_dtype_api_table[2]) +typedef int __dtypemeta_fromspec( + PyArray_DTypeMeta *DType, PyArrayDTypeMeta_Spec *dtype_spec); +/* + * Finalize creation of a DTypeMeta. You must ensure that the DTypeMeta is + * a proper subclass. The DTypeMeta object has additional fields compared to + * a normal PyTypeObject! + * The only (easy) creation of a new DType is to create a static Type which + * inherits `PyArray_DescrType`, sets its type to `PyArrayDTypeMeta_Type` and + * uses `PyArray_DTypeMeta` defined above as the C-structure. + */ +#define PyArrayInitDTypeMeta_FromSpec \ + ((__dtypemeta_fromspec *)(__experimental_dtype_api_table[3])) + + +/* + * ************************************* + * WORKING WITH DTYPES + * ************************************* + */ + +typedef PyArray_DTypeMeta *__common_dtype( + PyArray_DTypeMeta *DType1, PyArray_DTypeMeta *DType2); +#define PyArray_CommonDType \ + ((__common_dtype *)(__experimental_dtype_api_table[4])) + + +typedef PyArray_DTypeMeta *__promote_dtype_sequence( + npy_intp num, PyArray_DTypeMeta *DTypes[]); +#define PyArray_PromoteDTypeSequence \ + ((__promote_dtype_sequence *)(__experimental_dtype_api_table[5])) + + +/* + * ******************************** + * Initialization + * ******************************** + * + * Import the experimental API, the version must match the one defined in + * the header to ensure changes are taken into account. NumPy will further + * runtime-check this. + * You must call this function to use the symbols defined in this file. + */ +#define __EXPERIMENTAL_DTYPE_VERSION 2 + +static int +import_experimental_dtype_api(int version) +{ + if (version != __EXPERIMENTAL_DTYPE_VERSION) { + PyErr_Format(PyExc_RuntimeError, + "DType API version %d did not match header version %d. Please " + "update the import statement and check for API changes.", + version, __EXPERIMENTAL_DTYPE_VERSION); + return -1; + } + if (__experimental_dtype_api_table != __uninitialized_table) { + /* already imported. */ + return 0; + } + + PyObject *multiarray = PyImport_ImportModule("numpy.core._multiarray_umath"); + if (multiarray == NULL) { + return -1; + } + + PyObject *api = PyObject_CallMethod(multiarray, + "_get_experimental_dtype_api", "i", version); + Py_DECREF(multiarray); + if (api == NULL) { + return -1; + } + __experimental_dtype_api_table = PyCapsule_GetPointer(api, + "experimental_dtype_api_table"); + Py_DECREF(api); + + if (__experimental_dtype_api_table == NULL) { + __experimental_dtype_api_table = __uninitialized_table; + return -1; + } + return 0; +} + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_EXPERIMENTAL_DTYPE_API_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/halffloat.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/halffloat.h new file mode 100644 index 0000000..9504016 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/halffloat.h @@ -0,0 +1,70 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_HALFFLOAT_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_HALFFLOAT_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Half-precision routines + */ + +/* Conversions */ +float npy_half_to_float(npy_half h); +double npy_half_to_double(npy_half h); +npy_half npy_float_to_half(float f); +npy_half npy_double_to_half(double d); +/* Comparisons */ +int npy_half_eq(npy_half h1, npy_half h2); +int npy_half_ne(npy_half h1, npy_half h2); +int npy_half_le(npy_half h1, npy_half h2); +int npy_half_lt(npy_half h1, npy_half h2); +int npy_half_ge(npy_half h1, npy_half h2); +int npy_half_gt(npy_half h1, npy_half h2); +/* faster *_nonan variants for when you know h1 and h2 are not NaN */ +int npy_half_eq_nonan(npy_half h1, npy_half h2); +int npy_half_lt_nonan(npy_half h1, npy_half h2); +int npy_half_le_nonan(npy_half h1, npy_half h2); +/* Miscellaneous functions */ +int npy_half_iszero(npy_half h); +int npy_half_isnan(npy_half h); +int npy_half_isinf(npy_half h); +int npy_half_isfinite(npy_half h); +int npy_half_signbit(npy_half h); +npy_half npy_half_copysign(npy_half x, npy_half y); +npy_half npy_half_spacing(npy_half h); +npy_half npy_half_nextafter(npy_half x, npy_half y); +npy_half npy_half_divmod(npy_half x, npy_half y, npy_half *modulus); + +/* + * Half-precision constants + */ + +#define NPY_HALF_ZERO (0x0000u) +#define NPY_HALF_PZERO (0x0000u) +#define NPY_HALF_NZERO (0x8000u) +#define NPY_HALF_ONE (0x3c00u) +#define NPY_HALF_NEGONE (0xbc00u) +#define NPY_HALF_PINF (0x7c00u) +#define NPY_HALF_NINF (0xfc00u) +#define NPY_HALF_NAN (0x7e00u) + +#define NPY_MAX_HALF (0x7bffu) + +/* + * Bit-level conversions + */ + +npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f); +npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d); +npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h); +npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h); + +#ifdef __cplusplus +} +#endif + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_HALFFLOAT_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/libdivide/LICENSE.txt b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/libdivide/LICENSE.txt new file mode 100644 index 0000000..d72a7c3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/libdivide/LICENSE.txt @@ -0,0 +1,21 @@ + zlib License + ------------ + + Copyright (C) 2010 - 2019 ridiculous_fish, + Copyright (C) 2016 - 2019 Kim Walisch, + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/libdivide/libdivide.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/libdivide/libdivide.h new file mode 100644 index 0000000..f4eb803 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/libdivide/libdivide.h @@ -0,0 +1,2079 @@ +// libdivide.h - Optimized integer division +// https://libdivide.com +// +// Copyright (C) 2010 - 2019 ridiculous_fish, +// Copyright (C) 2016 - 2019 Kim Walisch, +// +// libdivide is dual-licensed under the Boost or zlib licenses. +// You may use libdivide under the terms of either of these. +// See LICENSE.txt for more details. + +#ifndef NUMPY_CORE_INCLUDE_NUMPY_LIBDIVIDE_LIBDIVIDE_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_LIBDIVIDE_LIBDIVIDE_H_ + +#define LIBDIVIDE_VERSION "3.0" +#define LIBDIVIDE_VERSION_MAJOR 3 +#define LIBDIVIDE_VERSION_MINOR 0 + +#include + +#if defined(__cplusplus) + #include + #include + #include +#else + #include + #include +#endif + +#if defined(LIBDIVIDE_AVX512) + #include +#elif defined(LIBDIVIDE_AVX2) + #include +#elif defined(LIBDIVIDE_SSE2) + #include +#endif + +#if defined(_MSC_VER) + #include + // disable warning C4146: unary minus operator applied + // to unsigned type, result still unsigned + #pragma warning(disable: 4146) + #define LIBDIVIDE_VC +#endif + +#if !defined(__has_builtin) + #define __has_builtin(x) 0 +#endif + +#if defined(__SIZEOF_INT128__) + #define HAS_INT128_T + // clang-cl on Windows does not yet support 128-bit division + #if !(defined(__clang__) && defined(LIBDIVIDE_VC)) + #define HAS_INT128_DIV + #endif +#endif + +#if defined(__x86_64__) || defined(_M_X64) + #define LIBDIVIDE_X86_64 +#endif + +#if defined(__i386__) + #define LIBDIVIDE_i386 +#endif + +#if defined(__GNUC__) || defined(__clang__) + #define LIBDIVIDE_GCC_STYLE_ASM +#endif + +#if defined(__cplusplus) || defined(LIBDIVIDE_VC) + #define LIBDIVIDE_FUNCTION __FUNCTION__ +#else + #define LIBDIVIDE_FUNCTION __func__ +#endif + +#define LIBDIVIDE_ERROR(msg) \ + do { \ + fprintf(stderr, "libdivide.h:%d: %s(): Error: %s\n", \ + __LINE__, LIBDIVIDE_FUNCTION, msg); \ + abort(); \ + } while (0) + +#if defined(LIBDIVIDE_ASSERTIONS_ON) + #define LIBDIVIDE_ASSERT(x) \ + do { \ + if (!(x)) { \ + fprintf(stderr, "libdivide.h:%d: %s(): Assertion failed: %s\n", \ + __LINE__, LIBDIVIDE_FUNCTION, #x); \ + abort(); \ + } \ + } while (0) +#else + #define LIBDIVIDE_ASSERT(x) +#endif + +#ifdef __cplusplus +namespace libdivide { +#endif + +// pack divider structs to prevent compilers from padding. +// This reduces memory usage by up to 43% when using a large +// array of libdivide dividers and improves performance +// by up to 10% because of reduced memory bandwidth. +#pragma pack(push, 1) + +struct libdivide_u32_t { + uint32_t magic; + uint8_t more; +}; + +struct libdivide_s32_t { + int32_t magic; + uint8_t more; +}; + +struct libdivide_u64_t { + uint64_t magic; + uint8_t more; +}; + +struct libdivide_s64_t { + int64_t magic; + uint8_t more; +}; + +struct libdivide_u32_branchfree_t { + uint32_t magic; + uint8_t more; +}; + +struct libdivide_s32_branchfree_t { + int32_t magic; + uint8_t more; +}; + +struct libdivide_u64_branchfree_t { + uint64_t magic; + uint8_t more; +}; + +struct libdivide_s64_branchfree_t { + int64_t magic; + uint8_t more; +}; + +#pragma pack(pop) + +// Explanation of the "more" field: +// +// * Bits 0-5 is the shift value (for shift path or mult path). +// * Bit 6 is the add indicator for mult path. +// * Bit 7 is set if the divisor is negative. We use bit 7 as the negative +// divisor indicator so that we can efficiently use sign extension to +// create a bitmask with all bits set to 1 (if the divisor is negative) +// or 0 (if the divisor is positive). +// +// u32: [0-4] shift value +// [5] ignored +// [6] add indicator +// magic number of 0 indicates shift path +// +// s32: [0-4] shift value +// [5] ignored +// [6] add indicator +// [7] indicates negative divisor +// magic number of 0 indicates shift path +// +// u64: [0-5] shift value +// [6] add indicator +// magic number of 0 indicates shift path +// +// s64: [0-5] shift value +// [6] add indicator +// [7] indicates negative divisor +// magic number of 0 indicates shift path +// +// In s32 and s64 branchfree modes, the magic number is negated according to +// whether the divisor is negated. In branchfree strategy, it is not negated. + +enum { + LIBDIVIDE_32_SHIFT_MASK = 0x1F, + LIBDIVIDE_64_SHIFT_MASK = 0x3F, + LIBDIVIDE_ADD_MARKER = 0x40, + LIBDIVIDE_NEGATIVE_DIVISOR = 0x80 +}; + +static inline struct libdivide_s32_t libdivide_s32_gen(int32_t d); +static inline struct libdivide_u32_t libdivide_u32_gen(uint32_t d); +static inline struct libdivide_s64_t libdivide_s64_gen(int64_t d); +static inline struct libdivide_u64_t libdivide_u64_gen(uint64_t d); + +static inline struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d); +static inline struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d); +static inline struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d); +static inline struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d); + +static inline int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom); +static inline uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom); +static inline int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom); +static inline uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom); + +static inline int32_t libdivide_s32_branchfree_do(int32_t numer, const struct libdivide_s32_branchfree_t *denom); +static inline uint32_t libdivide_u32_branchfree_do(uint32_t numer, const struct libdivide_u32_branchfree_t *denom); +static inline int64_t libdivide_s64_branchfree_do(int64_t numer, const struct libdivide_s64_branchfree_t *denom); +static inline uint64_t libdivide_u64_branchfree_do(uint64_t numer, const struct libdivide_u64_branchfree_t *denom); + +static inline int32_t libdivide_s32_recover(const struct libdivide_s32_t *denom); +static inline uint32_t libdivide_u32_recover(const struct libdivide_u32_t *denom); +static inline int64_t libdivide_s64_recover(const struct libdivide_s64_t *denom); +static inline uint64_t libdivide_u64_recover(const struct libdivide_u64_t *denom); + +static inline int32_t libdivide_s32_branchfree_recover(const struct libdivide_s32_branchfree_t *denom); +static inline uint32_t libdivide_u32_branchfree_recover(const struct libdivide_u32_branchfree_t *denom); +static inline int64_t libdivide_s64_branchfree_recover(const struct libdivide_s64_branchfree_t *denom); +static inline uint64_t libdivide_u64_branchfree_recover(const struct libdivide_u64_branchfree_t *denom); + +//////// Internal Utility Functions + +static inline uint32_t libdivide_mullhi_u32(uint32_t x, uint32_t y) { + uint64_t xl = x, yl = y; + uint64_t rl = xl * yl; + return (uint32_t)(rl >> 32); +} + +static inline int32_t libdivide_mullhi_s32(int32_t x, int32_t y) { + int64_t xl = x, yl = y; + int64_t rl = xl * yl; + // needs to be arithmetic shift + return (int32_t)(rl >> 32); +} + +static inline uint64_t libdivide_mullhi_u64(uint64_t x, uint64_t y) { +#if defined(LIBDIVIDE_VC) && \ + defined(LIBDIVIDE_X86_64) + return __umulh(x, y); +#elif defined(HAS_INT128_T) + __uint128_t xl = x, yl = y; + __uint128_t rl = xl * yl; + return (uint64_t)(rl >> 64); +#else + // full 128 bits are x0 * y0 + (x0 * y1 << 32) + (x1 * y0 << 32) + (x1 * y1 << 64) + uint32_t mask = 0xFFFFFFFF; + uint32_t x0 = (uint32_t)(x & mask); + uint32_t x1 = (uint32_t)(x >> 32); + uint32_t y0 = (uint32_t)(y & mask); + uint32_t y1 = (uint32_t)(y >> 32); + uint32_t x0y0_hi = libdivide_mullhi_u32(x0, y0); + uint64_t x0y1 = x0 * (uint64_t)y1; + uint64_t x1y0 = x1 * (uint64_t)y0; + uint64_t x1y1 = x1 * (uint64_t)y1; + uint64_t temp = x1y0 + x0y0_hi; + uint64_t temp_lo = temp & mask; + uint64_t temp_hi = temp >> 32; + + return x1y1 + temp_hi + ((temp_lo + x0y1) >> 32); +#endif +} + +static inline int64_t libdivide_mullhi_s64(int64_t x, int64_t y) { +#if defined(LIBDIVIDE_VC) && \ + defined(LIBDIVIDE_X86_64) + return __mulh(x, y); +#elif defined(HAS_INT128_T) + __int128_t xl = x, yl = y; + __int128_t rl = xl * yl; + return (int64_t)(rl >> 64); +#else + // full 128 bits are x0 * y0 + (x0 * y1 << 32) + (x1 * y0 << 32) + (x1 * y1 << 64) + uint32_t mask = 0xFFFFFFFF; + uint32_t x0 = (uint32_t)(x & mask); + uint32_t y0 = (uint32_t)(y & mask); + int32_t x1 = (int32_t)(x >> 32); + int32_t y1 = (int32_t)(y >> 32); + uint32_t x0y0_hi = libdivide_mullhi_u32(x0, y0); + int64_t t = x1 * (int64_t)y0 + x0y0_hi; + int64_t w1 = x0 * (int64_t)y1 + (t & mask); + + return x1 * (int64_t)y1 + (t >> 32) + (w1 >> 32); +#endif +} + +static inline int32_t libdivide_count_leading_zeros32(uint32_t val) { +#if defined(__GNUC__) || \ + __has_builtin(__builtin_clz) + // Fast way to count leading zeros + return __builtin_clz(val); +#elif defined(LIBDIVIDE_VC) + unsigned long result; + if (_BitScanReverse(&result, val)) { + return 31 - result; + } + return 0; +#else + if (val == 0) + return 32; + int32_t result = 8; + uint32_t hi = 0xFFU << 24; + while ((val & hi) == 0) { + hi >>= 8; + result += 8; + } + while (val & hi) { + result -= 1; + hi <<= 1; + } + return result; +#endif +} + +static inline int32_t libdivide_count_leading_zeros64(uint64_t val) { +#if defined(__GNUC__) || \ + __has_builtin(__builtin_clzll) + // Fast way to count leading zeros + return __builtin_clzll(val); +#elif defined(LIBDIVIDE_VC) && defined(_WIN64) + unsigned long result; + if (_BitScanReverse64(&result, val)) { + return 63 - result; + } + return 0; +#else + uint32_t hi = val >> 32; + uint32_t lo = val & 0xFFFFFFFF; + if (hi != 0) return libdivide_count_leading_zeros32(hi); + return 32 + libdivide_count_leading_zeros32(lo); +#endif +} + +// libdivide_64_div_32_to_32: divides a 64-bit uint {u1, u0} by a 32-bit +// uint {v}. The result must fit in 32 bits. +// Returns the quotient directly and the remainder in *r +static inline uint32_t libdivide_64_div_32_to_32(uint32_t u1, uint32_t u0, uint32_t v, uint32_t *r) { +#if (defined(LIBDIVIDE_i386) || defined(LIBDIVIDE_X86_64)) && \ + defined(LIBDIVIDE_GCC_STYLE_ASM) + uint32_t result; + __asm__("divl %[v]" + : "=a"(result), "=d"(*r) + : [v] "r"(v), "a"(u0), "d"(u1) + ); + return result; +#else + uint64_t n = ((uint64_t)u1 << 32) | u0; + uint32_t result = (uint32_t)(n / v); + *r = (uint32_t)(n - result * (uint64_t)v); + return result; +#endif +} + +// libdivide_128_div_64_to_64: divides a 128-bit uint {u1, u0} by a 64-bit +// uint {v}. The result must fit in 64 bits. +// Returns the quotient directly and the remainder in *r +static uint64_t libdivide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r) { +#if defined(LIBDIVIDE_X86_64) && \ + defined(LIBDIVIDE_GCC_STYLE_ASM) + uint64_t result; + __asm__("divq %[v]" + : "=a"(result), "=d"(*r) + : [v] "r"(v), "a"(u0), "d"(u1) + ); + return result; +#elif defined(HAS_INT128_T) && \ + defined(HAS_INT128_DIV) + __uint128_t n = ((__uint128_t)u1 << 64) | u0; + uint64_t result = (uint64_t)(n / v); + *r = (uint64_t)(n - result * (__uint128_t)v); + return result; +#else + // Code taken from Hacker's Delight: + // http://www.hackersdelight.org/HDcode/divlu.c. + // License permits inclusion here per: + // http://www.hackersdelight.org/permissions.htm + + const uint64_t b = (1ULL << 32); // Number base (32 bits) + uint64_t un1, un0; // Norm. dividend LSD's + uint64_t vn1, vn0; // Norm. divisor digits + uint64_t q1, q0; // Quotient digits + uint64_t un64, un21, un10; // Dividend digit pairs + uint64_t rhat; // A remainder + int32_t s; // Shift amount for norm + + // If overflow, set rem. to an impossible value, + // and return the largest possible quotient + if (u1 >= v) { + *r = (uint64_t) -1; + return (uint64_t) -1; + } + + // count leading zeros + s = libdivide_count_leading_zeros64(v); + if (s > 0) { + // Normalize divisor + v = v << s; + un64 = (u1 << s) | (u0 >> (64 - s)); + un10 = u0 << s; // Shift dividend left + } else { + // Avoid undefined behavior of (u0 >> 64). + // The behavior is undefined if the right operand is + // negative, or greater than or equal to the length + // in bits of the promoted left operand. + un64 = u1; + un10 = u0; + } + + // Break divisor up into two 32-bit digits + vn1 = v >> 32; + vn0 = v & 0xFFFFFFFF; + + // Break right half of dividend into two digits + un1 = un10 >> 32; + un0 = un10 & 0xFFFFFFFF; + + // Compute the first quotient digit, q1 + q1 = un64 / vn1; + rhat = un64 - q1 * vn1; + + while (q1 >= b || q1 * vn0 > b * rhat + un1) { + q1 = q1 - 1; + rhat = rhat + vn1; + if (rhat >= b) + break; + } + + // Multiply and subtract + un21 = un64 * b + un1 - q1 * v; + + // Compute the second quotient digit + q0 = un21 / vn1; + rhat = un21 - q0 * vn1; + + while (q0 >= b || q0 * vn0 > b * rhat + un0) { + q0 = q0 - 1; + rhat = rhat + vn1; + if (rhat >= b) + break; + } + + *r = (un21 * b + un0 - q0 * v) >> s; + return q1 * b + q0; +#endif +} + +// Bitshift a u128 in place, left (signed_shift > 0) or right (signed_shift < 0) +static inline void libdivide_u128_shift(uint64_t *u1, uint64_t *u0, int32_t signed_shift) { + if (signed_shift > 0) { + uint32_t shift = signed_shift; + *u1 <<= shift; + *u1 |= *u0 >> (64 - shift); + *u0 <<= shift; + } + else if (signed_shift < 0) { + uint32_t shift = -signed_shift; + *u0 >>= shift; + *u0 |= *u1 << (64 - shift); + *u1 >>= shift; + } +} + +// Computes a 128 / 128 -> 64 bit division, with a 128 bit remainder. +static uint64_t libdivide_128_div_128_to_64(uint64_t u_hi, uint64_t u_lo, uint64_t v_hi, uint64_t v_lo, uint64_t *r_hi, uint64_t *r_lo) { +#if defined(HAS_INT128_T) && \ + defined(HAS_INT128_DIV) + __uint128_t ufull = u_hi; + __uint128_t vfull = v_hi; + ufull = (ufull << 64) | u_lo; + vfull = (vfull << 64) | v_lo; + uint64_t res = (uint64_t)(ufull / vfull); + __uint128_t remainder = ufull - (vfull * res); + *r_lo = (uint64_t)remainder; + *r_hi = (uint64_t)(remainder >> 64); + return res; +#else + // Adapted from "Unsigned Doubleword Division" in Hacker's Delight + // We want to compute u / v + typedef struct { uint64_t hi; uint64_t lo; } u128_t; + u128_t u = {u_hi, u_lo}; + u128_t v = {v_hi, v_lo}; + + if (v.hi == 0) { + // divisor v is a 64 bit value, so we just need one 128/64 division + // Note that we are simpler than Hacker's Delight here, because we know + // the quotient fits in 64 bits whereas Hacker's Delight demands a full + // 128 bit quotient + *r_hi = 0; + return libdivide_128_div_64_to_64(u.hi, u.lo, v.lo, r_lo); + } + // Here v >= 2**64 + // We know that v.hi != 0, so count leading zeros is OK + // We have 0 <= n <= 63 + uint32_t n = libdivide_count_leading_zeros64(v.hi); + + // Normalize the divisor so its MSB is 1 + u128_t v1t = v; + libdivide_u128_shift(&v1t.hi, &v1t.lo, n); + uint64_t v1 = v1t.hi; // i.e. v1 = v1t >> 64 + + // To ensure no overflow + u128_t u1 = u; + libdivide_u128_shift(&u1.hi, &u1.lo, -1); + + // Get quotient from divide unsigned insn. + uint64_t rem_ignored; + uint64_t q1 = libdivide_128_div_64_to_64(u1.hi, u1.lo, v1, &rem_ignored); + + // Undo normalization and division of u by 2. + u128_t q0 = {0, q1}; + libdivide_u128_shift(&q0.hi, &q0.lo, n); + libdivide_u128_shift(&q0.hi, &q0.lo, -63); + + // Make q0 correct or too small by 1 + // Equivalent to `if (q0 != 0) q0 = q0 - 1;` + if (q0.hi != 0 || q0.lo != 0) { + q0.hi -= (q0.lo == 0); // borrow + q0.lo -= 1; + } + + // Now q0 is correct. + // Compute q0 * v as q0v + // = (q0.hi << 64 + q0.lo) * (v.hi << 64 + v.lo) + // = (q0.hi * v.hi << 128) + (q0.hi * v.lo << 64) + + // (q0.lo * v.hi << 64) + q0.lo * v.lo) + // Each term is 128 bit + // High half of full product (upper 128 bits!) are dropped + u128_t q0v = {0, 0}; + q0v.hi = q0.hi*v.lo + q0.lo*v.hi + libdivide_mullhi_u64(q0.lo, v.lo); + q0v.lo = q0.lo*v.lo; + + // Compute u - q0v as u_q0v + // This is the remainder + u128_t u_q0v = u; + u_q0v.hi -= q0v.hi + (u.lo < q0v.lo); // second term is borrow + u_q0v.lo -= q0v.lo; + + // Check if u_q0v >= v + // This checks if our remainder is larger than the divisor + if ((u_q0v.hi > v.hi) || + (u_q0v.hi == v.hi && u_q0v.lo >= v.lo)) { + // Increment q0 + q0.lo += 1; + q0.hi += (q0.lo == 0); // carry + + // Subtract v from remainder + u_q0v.hi -= v.hi + (u_q0v.lo < v.lo); + u_q0v.lo -= v.lo; + } + + *r_hi = u_q0v.hi; + *r_lo = u_q0v.lo; + + LIBDIVIDE_ASSERT(q0.hi == 0); + return q0.lo; +#endif +} + +////////// UINT32 + +static inline struct libdivide_u32_t libdivide_internal_u32_gen(uint32_t d, int branchfree) { + if (d == 0) { + LIBDIVIDE_ERROR("divider must be != 0"); + } + + struct libdivide_u32_t result; + uint32_t floor_log_2_d = 31 - libdivide_count_leading_zeros32(d); + + // Power of 2 + if ((d & (d - 1)) == 0) { + // We need to subtract 1 from the shift value in case of an unsigned + // branchfree divider because there is a hardcoded right shift by 1 + // in its division algorithm. Because of this we also need to add back + // 1 in its recovery algorithm. + result.magic = 0; + result.more = (uint8_t)(floor_log_2_d - (branchfree != 0)); + } else { + uint8_t more; + uint32_t rem, proposed_m; + proposed_m = libdivide_64_div_32_to_32(1U << floor_log_2_d, 0, d, &rem); + + LIBDIVIDE_ASSERT(rem > 0 && rem < d); + const uint32_t e = d - rem; + + // This power works if e < 2**floor_log_2_d. + if (!branchfree && (e < (1U << floor_log_2_d))) { + // This power works + more = floor_log_2_d; + } else { + // We have to use the general 33-bit algorithm. We need to compute + // (2**power) / d. However, we already have (2**(power-1))/d and + // its remainder. By doubling both, and then correcting the + // remainder, we can compute the larger division. + // don't care about overflow here - in fact, we expect it + proposed_m += proposed_m; + const uint32_t twice_rem = rem + rem; + if (twice_rem >= d || twice_rem < rem) proposed_m += 1; + more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; + } + result.magic = 1 + proposed_m; + result.more = more; + // result.more's shift should in general be ceil_log_2_d. But if we + // used the smaller power, we subtract one from the shift because we're + // using the smaller power. If we're using the larger power, we + // subtract one from the shift because it's taken care of by the add + // indicator. So floor_log_2_d happens to be correct in both cases. + } + return result; +} + +struct libdivide_u32_t libdivide_u32_gen(uint32_t d) { + return libdivide_internal_u32_gen(d, 0); +} + +struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d) { + if (d == 1) { + LIBDIVIDE_ERROR("branchfree divider must be != 1"); + } + struct libdivide_u32_t tmp = libdivide_internal_u32_gen(d, 1); + struct libdivide_u32_branchfree_t ret = {tmp.magic, (uint8_t)(tmp.more & LIBDIVIDE_32_SHIFT_MASK)}; + return ret; +} + +uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + return numer >> more; + } + else { + uint32_t q = libdivide_mullhi_u32(denom->magic, numer); + if (more & LIBDIVIDE_ADD_MARKER) { + uint32_t t = ((numer - q) >> 1) + q; + return t >> (more & LIBDIVIDE_32_SHIFT_MASK); + } + else { + // All upper bits are 0, + // don't need to mask them off. + return q >> more; + } + } +} + +uint32_t libdivide_u32_branchfree_do(uint32_t numer, const struct libdivide_u32_branchfree_t *denom) { + uint32_t q = libdivide_mullhi_u32(denom->magic, numer); + uint32_t t = ((numer - q) >> 1) + q; + return t >> denom->more; +} + +uint32_t libdivide_u32_recover(const struct libdivide_u32_t *denom) { + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + + if (!denom->magic) { + return 1U << shift; + } else if (!(more & LIBDIVIDE_ADD_MARKER)) { + // We compute q = n/d = n*m / 2^(32 + shift) + // Therefore we have d = 2^(32 + shift) / m + // We need to ceil it. + // We know d is not a power of 2, so m is not a power of 2, + // so we can just add 1 to the floor + uint32_t hi_dividend = 1U << shift; + uint32_t rem_ignored; + return 1 + libdivide_64_div_32_to_32(hi_dividend, 0, denom->magic, &rem_ignored); + } else { + // Here we wish to compute d = 2^(32+shift+1)/(m+2^32). + // Notice (m + 2^32) is a 33 bit number. Use 64 bit division for now + // Also note that shift may be as high as 31, so shift + 1 will + // overflow. So we have to compute it as 2^(32+shift)/(m+2^32), and + // then double the quotient and remainder. + uint64_t half_n = 1ULL << (32 + shift); + uint64_t d = (1ULL << 32) | denom->magic; + // Note that the quotient is guaranteed <= 32 bits, but the remainder + // may need 33! + uint32_t half_q = (uint32_t)(half_n / d); + uint64_t rem = half_n % d; + // We computed 2^(32+shift)/(m+2^32) + // Need to double it, and then add 1 to the quotient if doubling th + // remainder would increase the quotient. + // Note that rem<<1 cannot overflow, since rem < d and d is 33 bits + uint32_t full_q = half_q + half_q + ((rem<<1) >= d); + + // We rounded down in gen (hence +1) + return full_q + 1; + } +} + +uint32_t libdivide_u32_branchfree_recover(const struct libdivide_u32_branchfree_t *denom) { + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + + if (!denom->magic) { + return 1U << (shift + 1); + } else { + // Here we wish to compute d = 2^(32+shift+1)/(m+2^32). + // Notice (m + 2^32) is a 33 bit number. Use 64 bit division for now + // Also note that shift may be as high as 31, so shift + 1 will + // overflow. So we have to compute it as 2^(32+shift)/(m+2^32), and + // then double the quotient and remainder. + uint64_t half_n = 1ULL << (32 + shift); + uint64_t d = (1ULL << 32) | denom->magic; + // Note that the quotient is guaranteed <= 32 bits, but the remainder + // may need 33! + uint32_t half_q = (uint32_t)(half_n / d); + uint64_t rem = half_n % d; + // We computed 2^(32+shift)/(m+2^32) + // Need to double it, and then add 1 to the quotient if doubling th + // remainder would increase the quotient. + // Note that rem<<1 cannot overflow, since rem < d and d is 33 bits + uint32_t full_q = half_q + half_q + ((rem<<1) >= d); + + // We rounded down in gen (hence +1) + return full_q + 1; + } +} + +/////////// UINT64 + +static inline struct libdivide_u64_t libdivide_internal_u64_gen(uint64_t d, int branchfree) { + if (d == 0) { + LIBDIVIDE_ERROR("divider must be != 0"); + } + + struct libdivide_u64_t result; + uint32_t floor_log_2_d = 63 - libdivide_count_leading_zeros64(d); + + // Power of 2 + if ((d & (d - 1)) == 0) { + // We need to subtract 1 from the shift value in case of an unsigned + // branchfree divider because there is a hardcoded right shift by 1 + // in its division algorithm. Because of this we also need to add back + // 1 in its recovery algorithm. + result.magic = 0; + result.more = (uint8_t)(floor_log_2_d - (branchfree != 0)); + } else { + uint64_t proposed_m, rem; + uint8_t more; + // (1 << (64 + floor_log_2_d)) / d + proposed_m = libdivide_128_div_64_to_64(1ULL << floor_log_2_d, 0, d, &rem); + + LIBDIVIDE_ASSERT(rem > 0 && rem < d); + const uint64_t e = d - rem; + + // This power works if e < 2**floor_log_2_d. + if (!branchfree && e < (1ULL << floor_log_2_d)) { + // This power works + more = floor_log_2_d; + } else { + // We have to use the general 65-bit algorithm. We need to compute + // (2**power) / d. However, we already have (2**(power-1))/d and + // its remainder. By doubling both, and then correcting the + // remainder, we can compute the larger division. + // don't care about overflow here - in fact, we expect it + proposed_m += proposed_m; + const uint64_t twice_rem = rem + rem; + if (twice_rem >= d || twice_rem < rem) proposed_m += 1; + more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; + } + result.magic = 1 + proposed_m; + result.more = more; + // result.more's shift should in general be ceil_log_2_d. But if we + // used the smaller power, we subtract one from the shift because we're + // using the smaller power. If we're using the larger power, we + // subtract one from the shift because it's taken care of by the add + // indicator. So floor_log_2_d happens to be correct in both cases, + // which is why we do it outside of the if statement. + } + return result; +} + +struct libdivide_u64_t libdivide_u64_gen(uint64_t d) { + return libdivide_internal_u64_gen(d, 0); +} + +struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d) { + if (d == 1) { + LIBDIVIDE_ERROR("branchfree divider must be != 1"); + } + struct libdivide_u64_t tmp = libdivide_internal_u64_gen(d, 1); + struct libdivide_u64_branchfree_t ret = {tmp.magic, (uint8_t)(tmp.more & LIBDIVIDE_64_SHIFT_MASK)}; + return ret; +} + +uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + return numer >> more; + } + else { + uint64_t q = libdivide_mullhi_u64(denom->magic, numer); + if (more & LIBDIVIDE_ADD_MARKER) { + uint64_t t = ((numer - q) >> 1) + q; + return t >> (more & LIBDIVIDE_64_SHIFT_MASK); + } + else { + // All upper bits are 0, + // don't need to mask them off. + return q >> more; + } + } +} + +uint64_t libdivide_u64_branchfree_do(uint64_t numer, const struct libdivide_u64_branchfree_t *denom) { + uint64_t q = libdivide_mullhi_u64(denom->magic, numer); + uint64_t t = ((numer - q) >> 1) + q; + return t >> denom->more; +} + +uint64_t libdivide_u64_recover(const struct libdivide_u64_t *denom) { + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + + if (!denom->magic) { + return 1ULL << shift; + } else if (!(more & LIBDIVIDE_ADD_MARKER)) { + // We compute q = n/d = n*m / 2^(64 + shift) + // Therefore we have d = 2^(64 + shift) / m + // We need to ceil it. + // We know d is not a power of 2, so m is not a power of 2, + // so we can just add 1 to the floor + uint64_t hi_dividend = 1ULL << shift; + uint64_t rem_ignored; + return 1 + libdivide_128_div_64_to_64(hi_dividend, 0, denom->magic, &rem_ignored); + } else { + // Here we wish to compute d = 2^(64+shift+1)/(m+2^64). + // Notice (m + 2^64) is a 65 bit number. This gets hairy. See + // libdivide_u32_recover for more on what we do here. + // TODO: do something better than 128 bit math + + // Full n is a (potentially) 129 bit value + // half_n is a 128 bit value + // Compute the hi half of half_n. Low half is 0. + uint64_t half_n_hi = 1ULL << shift, half_n_lo = 0; + // d is a 65 bit value. The high bit is always set to 1. + const uint64_t d_hi = 1, d_lo = denom->magic; + // Note that the quotient is guaranteed <= 64 bits, + // but the remainder may need 65! + uint64_t r_hi, r_lo; + uint64_t half_q = libdivide_128_div_128_to_64(half_n_hi, half_n_lo, d_hi, d_lo, &r_hi, &r_lo); + // We computed 2^(64+shift)/(m+2^64) + // Double the remainder ('dr') and check if that is larger than d + // Note that d is a 65 bit value, so r1 is small and so r1 + r1 + // cannot overflow + uint64_t dr_lo = r_lo + r_lo; + uint64_t dr_hi = r_hi + r_hi + (dr_lo < r_lo); // last term is carry + int dr_exceeds_d = (dr_hi > d_hi) || (dr_hi == d_hi && dr_lo >= d_lo); + uint64_t full_q = half_q + half_q + (dr_exceeds_d ? 1 : 0); + return full_q + 1; + } +} + +uint64_t libdivide_u64_branchfree_recover(const struct libdivide_u64_branchfree_t *denom) { + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + + if (!denom->magic) { + return 1ULL << (shift + 1); + } else { + // Here we wish to compute d = 2^(64+shift+1)/(m+2^64). + // Notice (m + 2^64) is a 65 bit number. This gets hairy. See + // libdivide_u32_recover for more on what we do here. + // TODO: do something better than 128 bit math + + // Full n is a (potentially) 129 bit value + // half_n is a 128 bit value + // Compute the hi half of half_n. Low half is 0. + uint64_t half_n_hi = 1ULL << shift, half_n_lo = 0; + // d is a 65 bit value. The high bit is always set to 1. + const uint64_t d_hi = 1, d_lo = denom->magic; + // Note that the quotient is guaranteed <= 64 bits, + // but the remainder may need 65! + uint64_t r_hi, r_lo; + uint64_t half_q = libdivide_128_div_128_to_64(half_n_hi, half_n_lo, d_hi, d_lo, &r_hi, &r_lo); + // We computed 2^(64+shift)/(m+2^64) + // Double the remainder ('dr') and check if that is larger than d + // Note that d is a 65 bit value, so r1 is small and so r1 + r1 + // cannot overflow + uint64_t dr_lo = r_lo + r_lo; + uint64_t dr_hi = r_hi + r_hi + (dr_lo < r_lo); // last term is carry + int dr_exceeds_d = (dr_hi > d_hi) || (dr_hi == d_hi && dr_lo >= d_lo); + uint64_t full_q = half_q + half_q + (dr_exceeds_d ? 1 : 0); + return full_q + 1; + } +} + +/////////// SINT32 + +static inline struct libdivide_s32_t libdivide_internal_s32_gen(int32_t d, int branchfree) { + if (d == 0) { + LIBDIVIDE_ERROR("divider must be != 0"); + } + + struct libdivide_s32_t result; + + // If d is a power of 2, or negative a power of 2, we have to use a shift. + // This is especially important because the magic algorithm fails for -1. + // To check if d is a power of 2 or its inverse, it suffices to check + // whether its absolute value has exactly one bit set. This works even for + // INT_MIN, because abs(INT_MIN) == INT_MIN, and INT_MIN has one bit set + // and is a power of 2. + uint32_t ud = (uint32_t)d; + uint32_t absD = (d < 0) ? -ud : ud; + uint32_t floor_log_2_d = 31 - libdivide_count_leading_zeros32(absD); + // check if exactly one bit is set, + // don't care if absD is 0 since that's divide by zero + if ((absD & (absD - 1)) == 0) { + // Branchfree and normal paths are exactly the same + result.magic = 0; + result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0); + } else { + LIBDIVIDE_ASSERT(floor_log_2_d >= 1); + + uint8_t more; + // the dividend here is 2**(floor_log_2_d + 31), so the low 32 bit word + // is 0 and the high word is floor_log_2_d - 1 + uint32_t rem, proposed_m; + proposed_m = libdivide_64_div_32_to_32(1U << (floor_log_2_d - 1), 0, absD, &rem); + const uint32_t e = absD - rem; + + // We are going to start with a power of floor_log_2_d - 1. + // This works if works if e < 2**floor_log_2_d. + if (!branchfree && e < (1U << floor_log_2_d)) { + // This power works + more = floor_log_2_d - 1; + } else { + // We need to go one higher. This should not make proposed_m + // overflow, but it will make it negative when interpreted as an + // int32_t. + proposed_m += proposed_m; + const uint32_t twice_rem = rem + rem; + if (twice_rem >= absD || twice_rem < rem) proposed_m += 1; + more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; + } + + proposed_m += 1; + int32_t magic = (int32_t)proposed_m; + + // Mark if we are negative. Note we only negate the magic number in the + // branchfull case. + if (d < 0) { + more |= LIBDIVIDE_NEGATIVE_DIVISOR; + if (!branchfree) { + magic = -magic; + } + } + + result.more = more; + result.magic = magic; + } + return result; +} + +struct libdivide_s32_t libdivide_s32_gen(int32_t d) { + return libdivide_internal_s32_gen(d, 0); +} + +struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d) { + struct libdivide_s32_t tmp = libdivide_internal_s32_gen(d, 1); + struct libdivide_s32_branchfree_t result = {tmp.magic, tmp.more}; + return result; +} + +int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) { + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + + if (!denom->magic) { + uint32_t sign = (int8_t)more >> 7; + uint32_t mask = (1U << shift) - 1; + uint32_t uq = numer + ((numer >> 31) & mask); + int32_t q = (int32_t)uq; + q >>= shift; + q = (q ^ sign) - sign; + return q; + } else { + uint32_t uq = (uint32_t)libdivide_mullhi_s32(denom->magic, numer); + if (more & LIBDIVIDE_ADD_MARKER) { + // must be arithmetic shift and then sign extend + int32_t sign = (int8_t)more >> 7; + // q += (more < 0 ? -numer : numer) + // cast required to avoid UB + uq += ((uint32_t)numer ^ sign) - sign; + } + int32_t q = (int32_t)uq; + q >>= shift; + q += (q < 0); + return q; + } +} + +int32_t libdivide_s32_branchfree_do(int32_t numer, const struct libdivide_s32_branchfree_t *denom) { + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + // must be arithmetic shift and then sign extend + int32_t sign = (int8_t)more >> 7; + int32_t magic = denom->magic; + int32_t q = libdivide_mullhi_s32(magic, numer); + q += numer; + + // If q is non-negative, we have nothing to do + // If q is negative, we want to add either (2**shift)-1 if d is a power of + // 2, or (2**shift) if it is not a power of 2 + uint32_t is_power_of_2 = (magic == 0); + uint32_t q_sign = (uint32_t)(q >> 31); + q += q_sign & ((1U << shift) - is_power_of_2); + + // Now arithmetic right shift + q >>= shift; + // Negate if needed + q = (q ^ sign) - sign; + + return q; +} + +int32_t libdivide_s32_recover(const struct libdivide_s32_t *denom) { + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + if (!denom->magic) { + uint32_t absD = 1U << shift; + if (more & LIBDIVIDE_NEGATIVE_DIVISOR) { + absD = -absD; + } + return (int32_t)absD; + } else { + // Unsigned math is much easier + // We negate the magic number only in the branchfull case, and we don't + // know which case we're in. However we have enough information to + // determine the correct sign of the magic number. The divisor was + // negative if LIBDIVIDE_NEGATIVE_DIVISOR is set. If ADD_MARKER is set, + // the magic number's sign is opposite that of the divisor. + // We want to compute the positive magic number. + int negative_divisor = (more & LIBDIVIDE_NEGATIVE_DIVISOR); + int magic_was_negated = (more & LIBDIVIDE_ADD_MARKER) + ? denom->magic > 0 : denom->magic < 0; + + // Handle the power of 2 case (including branchfree) + if (denom->magic == 0) { + int32_t result = 1U << shift; + return negative_divisor ? -result : result; + } + + uint32_t d = (uint32_t)(magic_was_negated ? -denom->magic : denom->magic); + uint64_t n = 1ULL << (32 + shift); // this shift cannot exceed 30 + uint32_t q = (uint32_t)(n / d); + int32_t result = (int32_t)q; + result += 1; + return negative_divisor ? -result : result; + } +} + +int32_t libdivide_s32_branchfree_recover(const struct libdivide_s32_branchfree_t *denom) { + return libdivide_s32_recover((const struct libdivide_s32_t *)denom); +} + +///////////// SINT64 + +static inline struct libdivide_s64_t libdivide_internal_s64_gen(int64_t d, int branchfree) { + if (d == 0) { + LIBDIVIDE_ERROR("divider must be != 0"); + } + + struct libdivide_s64_t result; + + // If d is a power of 2, or negative a power of 2, we have to use a shift. + // This is especially important because the magic algorithm fails for -1. + // To check if d is a power of 2 or its inverse, it suffices to check + // whether its absolute value has exactly one bit set. This works even for + // INT_MIN, because abs(INT_MIN) == INT_MIN, and INT_MIN has one bit set + // and is a power of 2. + uint64_t ud = (uint64_t)d; + uint64_t absD = (d < 0) ? -ud : ud; + uint32_t floor_log_2_d = 63 - libdivide_count_leading_zeros64(absD); + // check if exactly one bit is set, + // don't care if absD is 0 since that's divide by zero + if ((absD & (absD - 1)) == 0) { + // Branchfree and non-branchfree cases are the same + result.magic = 0; + result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0); + } else { + // the dividend here is 2**(floor_log_2_d + 63), so the low 64 bit word + // is 0 and the high word is floor_log_2_d - 1 + uint8_t more; + uint64_t rem, proposed_m; + proposed_m = libdivide_128_div_64_to_64(1ULL << (floor_log_2_d - 1), 0, absD, &rem); + const uint64_t e = absD - rem; + + // We are going to start with a power of floor_log_2_d - 1. + // This works if works if e < 2**floor_log_2_d. + if (!branchfree && e < (1ULL << floor_log_2_d)) { + // This power works + more = floor_log_2_d - 1; + } else { + // We need to go one higher. This should not make proposed_m + // overflow, but it will make it negative when interpreted as an + // int32_t. + proposed_m += proposed_m; + const uint64_t twice_rem = rem + rem; + if (twice_rem >= absD || twice_rem < rem) proposed_m += 1; + // note that we only set the LIBDIVIDE_NEGATIVE_DIVISOR bit if we + // also set ADD_MARKER this is an annoying optimization that + // enables algorithm #4 to avoid the mask. However we always set it + // in the branchfree case + more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; + } + proposed_m += 1; + int64_t magic = (int64_t)proposed_m; + + // Mark if we are negative + if (d < 0) { + more |= LIBDIVIDE_NEGATIVE_DIVISOR; + if (!branchfree) { + magic = -magic; + } + } + + result.more = more; + result.magic = magic; + } + return result; +} + +struct libdivide_s64_t libdivide_s64_gen(int64_t d) { + return libdivide_internal_s64_gen(d, 0); +} + +struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d) { + struct libdivide_s64_t tmp = libdivide_internal_s64_gen(d, 1); + struct libdivide_s64_branchfree_t ret = {tmp.magic, tmp.more}; + return ret; +} + +int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) { + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + + if (!denom->magic) { // shift path + uint64_t mask = (1ULL << shift) - 1; + uint64_t uq = numer + ((numer >> 63) & mask); + int64_t q = (int64_t)uq; + q >>= shift; + // must be arithmetic shift and then sign-extend + int64_t sign = (int8_t)more >> 7; + q = (q ^ sign) - sign; + return q; + } else { + uint64_t uq = (uint64_t)libdivide_mullhi_s64(denom->magic, numer); + if (more & LIBDIVIDE_ADD_MARKER) { + // must be arithmetic shift and then sign extend + int64_t sign = (int8_t)more >> 7; + // q += (more < 0 ? -numer : numer) + // cast required to avoid UB + uq += ((uint64_t)numer ^ sign) - sign; + } + int64_t q = (int64_t)uq; + q >>= shift; + q += (q < 0); + return q; + } +} + +int64_t libdivide_s64_branchfree_do(int64_t numer, const struct libdivide_s64_branchfree_t *denom) { + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + // must be arithmetic shift and then sign extend + int64_t sign = (int8_t)more >> 7; + int64_t magic = denom->magic; + int64_t q = libdivide_mullhi_s64(magic, numer); + q += numer; + + // If q is non-negative, we have nothing to do. + // If q is negative, we want to add either (2**shift)-1 if d is a power of + // 2, or (2**shift) if it is not a power of 2. + uint64_t is_power_of_2 = (magic == 0); + uint64_t q_sign = (uint64_t)(q >> 63); + q += q_sign & ((1ULL << shift) - is_power_of_2); + + // Arithmetic right shift + q >>= shift; + // Negate if needed + q = (q ^ sign) - sign; + + return q; +} + +int64_t libdivide_s64_recover(const struct libdivide_s64_t *denom) { + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + if (denom->magic == 0) { // shift path + uint64_t absD = 1ULL << shift; + if (more & LIBDIVIDE_NEGATIVE_DIVISOR) { + absD = -absD; + } + return (int64_t)absD; + } else { + // Unsigned math is much easier + int negative_divisor = (more & LIBDIVIDE_NEGATIVE_DIVISOR); + int magic_was_negated = (more & LIBDIVIDE_ADD_MARKER) + ? denom->magic > 0 : denom->magic < 0; + + uint64_t d = (uint64_t)(magic_was_negated ? -denom->magic : denom->magic); + uint64_t n_hi = 1ULL << shift, n_lo = 0; + uint64_t rem_ignored; + uint64_t q = libdivide_128_div_64_to_64(n_hi, n_lo, d, &rem_ignored); + int64_t result = (int64_t)(q + 1); + if (negative_divisor) { + result = -result; + } + return result; + } +} + +int64_t libdivide_s64_branchfree_recover(const struct libdivide_s64_branchfree_t *denom) { + return libdivide_s64_recover((const struct libdivide_s64_t *)denom); +} + +#if defined(LIBDIVIDE_AVX512) + +static inline __m512i libdivide_u32_do_vector(__m512i numers, const struct libdivide_u32_t *denom); +static inline __m512i libdivide_s32_do_vector(__m512i numers, const struct libdivide_s32_t *denom); +static inline __m512i libdivide_u64_do_vector(__m512i numers, const struct libdivide_u64_t *denom); +static inline __m512i libdivide_s64_do_vector(__m512i numers, const struct libdivide_s64_t *denom); + +static inline __m512i libdivide_u32_branchfree_do_vector(__m512i numers, const struct libdivide_u32_branchfree_t *denom); +static inline __m512i libdivide_s32_branchfree_do_vector(__m512i numers, const struct libdivide_s32_branchfree_t *denom); +static inline __m512i libdivide_u64_branchfree_do_vector(__m512i numers, const struct libdivide_u64_branchfree_t *denom); +static inline __m512i libdivide_s64_branchfree_do_vector(__m512i numers, const struct libdivide_s64_branchfree_t *denom); + +//////// Internal Utility Functions + +static inline __m512i libdivide_s64_signbits(__m512i v) {; + return _mm512_srai_epi64(v, 63); +} + +static inline __m512i libdivide_s64_shift_right_vector(__m512i v, int amt) { + return _mm512_srai_epi64(v, amt); +} + +// Here, b is assumed to contain one 32-bit value repeated. +static inline __m512i libdivide_mullhi_u32_vector(__m512i a, __m512i b) { + __m512i hi_product_0Z2Z = _mm512_srli_epi64(_mm512_mul_epu32(a, b), 32); + __m512i a1X3X = _mm512_srli_epi64(a, 32); + __m512i mask = _mm512_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0); + __m512i hi_product_Z1Z3 = _mm512_and_si512(_mm512_mul_epu32(a1X3X, b), mask); + return _mm512_or_si512(hi_product_0Z2Z, hi_product_Z1Z3); +} + +// b is one 32-bit value repeated. +static inline __m512i libdivide_mullhi_s32_vector(__m512i a, __m512i b) { + __m512i hi_product_0Z2Z = _mm512_srli_epi64(_mm512_mul_epi32(a, b), 32); + __m512i a1X3X = _mm512_srli_epi64(a, 32); + __m512i mask = _mm512_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0); + __m512i hi_product_Z1Z3 = _mm512_and_si512(_mm512_mul_epi32(a1X3X, b), mask); + return _mm512_or_si512(hi_product_0Z2Z, hi_product_Z1Z3); +} + +// Here, y is assumed to contain one 64-bit value repeated. +// https://stackoverflow.com/a/28827013 +static inline __m512i libdivide_mullhi_u64_vector(__m512i x, __m512i y) { + __m512i lomask = _mm512_set1_epi64(0xffffffff); + __m512i xh = _mm512_shuffle_epi32(x, (_MM_PERM_ENUM) 0xB1); + __m512i yh = _mm512_shuffle_epi32(y, (_MM_PERM_ENUM) 0xB1); + __m512i w0 = _mm512_mul_epu32(x, y); + __m512i w1 = _mm512_mul_epu32(x, yh); + __m512i w2 = _mm512_mul_epu32(xh, y); + __m512i w3 = _mm512_mul_epu32(xh, yh); + __m512i w0h = _mm512_srli_epi64(w0, 32); + __m512i s1 = _mm512_add_epi64(w1, w0h); + __m512i s1l = _mm512_and_si512(s1, lomask); + __m512i s1h = _mm512_srli_epi64(s1, 32); + __m512i s2 = _mm512_add_epi64(w2, s1l); + __m512i s2h = _mm512_srli_epi64(s2, 32); + __m512i hi = _mm512_add_epi64(w3, s1h); + hi = _mm512_add_epi64(hi, s2h); + + return hi; +} + +// y is one 64-bit value repeated. +static inline __m512i libdivide_mullhi_s64_vector(__m512i x, __m512i y) { + __m512i p = libdivide_mullhi_u64_vector(x, y); + __m512i t1 = _mm512_and_si512(libdivide_s64_signbits(x), y); + __m512i t2 = _mm512_and_si512(libdivide_s64_signbits(y), x); + p = _mm512_sub_epi64(p, t1); + p = _mm512_sub_epi64(p, t2); + return p; +} + +////////// UINT32 + +__m512i libdivide_u32_do_vector(__m512i numers, const struct libdivide_u32_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + return _mm512_srli_epi32(numers, more); + } + else { + __m512i q = libdivide_mullhi_u32_vector(numers, _mm512_set1_epi32(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // uint32_t t = ((numer - q) >> 1) + q; + // return t >> denom->shift; + uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + __m512i t = _mm512_add_epi32(_mm512_srli_epi32(_mm512_sub_epi32(numers, q), 1), q); + return _mm512_srli_epi32(t, shift); + } + else { + return _mm512_srli_epi32(q, more); + } + } +} + +__m512i libdivide_u32_branchfree_do_vector(__m512i numers, const struct libdivide_u32_branchfree_t *denom) { + __m512i q = libdivide_mullhi_u32_vector(numers, _mm512_set1_epi32(denom->magic)); + __m512i t = _mm512_add_epi32(_mm512_srli_epi32(_mm512_sub_epi32(numers, q), 1), q); + return _mm512_srli_epi32(t, denom->more); +} + +////////// UINT64 + +__m512i libdivide_u64_do_vector(__m512i numers, const struct libdivide_u64_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + return _mm512_srli_epi64(numers, more); + } + else { + __m512i q = libdivide_mullhi_u64_vector(numers, _mm512_set1_epi64(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // uint32_t t = ((numer - q) >> 1) + q; + // return t >> denom->shift; + uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + __m512i t = _mm512_add_epi64(_mm512_srli_epi64(_mm512_sub_epi64(numers, q), 1), q); + return _mm512_srli_epi64(t, shift); + } + else { + return _mm512_srli_epi64(q, more); + } + } +} + +__m512i libdivide_u64_branchfree_do_vector(__m512i numers, const struct libdivide_u64_branchfree_t *denom) { + __m512i q = libdivide_mullhi_u64_vector(numers, _mm512_set1_epi64(denom->magic)); + __m512i t = _mm512_add_epi64(_mm512_srli_epi64(_mm512_sub_epi64(numers, q), 1), q); + return _mm512_srli_epi64(t, denom->more); +} + +////////// SINT32 + +__m512i libdivide_s32_do_vector(__m512i numers, const struct libdivide_s32_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + uint32_t mask = (1U << shift) - 1; + __m512i roundToZeroTweak = _mm512_set1_epi32(mask); + // q = numer + ((numer >> 31) & roundToZeroTweak); + __m512i q = _mm512_add_epi32(numers, _mm512_and_si512(_mm512_srai_epi32(numers, 31), roundToZeroTweak)); + q = _mm512_srai_epi32(q, shift); + __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); + // q = (q ^ sign) - sign; + q = _mm512_sub_epi32(_mm512_xor_si512(q, sign), sign); + return q; + } + else { + __m512i q = libdivide_mullhi_s32_vector(numers, _mm512_set1_epi32(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // must be arithmetic shift + __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); + // q += ((numer ^ sign) - sign); + q = _mm512_add_epi32(q, _mm512_sub_epi32(_mm512_xor_si512(numers, sign), sign)); + } + // q >>= shift + q = _mm512_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); + q = _mm512_add_epi32(q, _mm512_srli_epi32(q, 31)); // q += (q < 0) + return q; + } +} + +__m512i libdivide_s32_branchfree_do_vector(__m512i numers, const struct libdivide_s32_branchfree_t *denom) { + int32_t magic = denom->magic; + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + // must be arithmetic shift + __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); + __m512i q = libdivide_mullhi_s32_vector(numers, _mm512_set1_epi32(magic)); + q = _mm512_add_epi32(q, numers); // q += numers + + // If q is non-negative, we have nothing to do + // If q is negative, we want to add either (2**shift)-1 if d is + // a power of 2, or (2**shift) if it is not a power of 2 + uint32_t is_power_of_2 = (magic == 0); + __m512i q_sign = _mm512_srai_epi32(q, 31); // q_sign = q >> 31 + __m512i mask = _mm512_set1_epi32((1U << shift) - is_power_of_2); + q = _mm512_add_epi32(q, _mm512_and_si512(q_sign, mask)); // q = q + (q_sign & mask) + q = _mm512_srai_epi32(q, shift); // q >>= shift + q = _mm512_sub_epi32(_mm512_xor_si512(q, sign), sign); // q = (q ^ sign) - sign + return q; +} + +////////// SINT64 + +__m512i libdivide_s64_do_vector(__m512i numers, const struct libdivide_s64_t *denom) { + uint8_t more = denom->more; + int64_t magic = denom->magic; + if (magic == 0) { // shift path + uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + uint64_t mask = (1ULL << shift) - 1; + __m512i roundToZeroTweak = _mm512_set1_epi64(mask); + // q = numer + ((numer >> 63) & roundToZeroTweak); + __m512i q = _mm512_add_epi64(numers, _mm512_and_si512(libdivide_s64_signbits(numers), roundToZeroTweak)); + q = libdivide_s64_shift_right_vector(q, shift); + __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); + // q = (q ^ sign) - sign; + q = _mm512_sub_epi64(_mm512_xor_si512(q, sign), sign); + return q; + } + else { + __m512i q = libdivide_mullhi_s64_vector(numers, _mm512_set1_epi64(magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // must be arithmetic shift + __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); + // q += ((numer ^ sign) - sign); + q = _mm512_add_epi64(q, _mm512_sub_epi64(_mm512_xor_si512(numers, sign), sign)); + } + // q >>= denom->mult_path.shift + q = libdivide_s64_shift_right_vector(q, more & LIBDIVIDE_64_SHIFT_MASK); + q = _mm512_add_epi64(q, _mm512_srli_epi64(q, 63)); // q += (q < 0) + return q; + } +} + +__m512i libdivide_s64_branchfree_do_vector(__m512i numers, const struct libdivide_s64_branchfree_t *denom) { + int64_t magic = denom->magic; + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + // must be arithmetic shift + __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); + + // libdivide_mullhi_s64(numers, magic); + __m512i q = libdivide_mullhi_s64_vector(numers, _mm512_set1_epi64(magic)); + q = _mm512_add_epi64(q, numers); // q += numers + + // If q is non-negative, we have nothing to do. + // If q is negative, we want to add either (2**shift)-1 if d is + // a power of 2, or (2**shift) if it is not a power of 2. + uint32_t is_power_of_2 = (magic == 0); + __m512i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 + __m512i mask = _mm512_set1_epi64((1ULL << shift) - is_power_of_2); + q = _mm512_add_epi64(q, _mm512_and_si512(q_sign, mask)); // q = q + (q_sign & mask) + q = libdivide_s64_shift_right_vector(q, shift); // q >>= shift + q = _mm512_sub_epi64(_mm512_xor_si512(q, sign), sign); // q = (q ^ sign) - sign + return q; +} + +#elif defined(LIBDIVIDE_AVX2) + +static inline __m256i libdivide_u32_do_vector(__m256i numers, const struct libdivide_u32_t *denom); +static inline __m256i libdivide_s32_do_vector(__m256i numers, const struct libdivide_s32_t *denom); +static inline __m256i libdivide_u64_do_vector(__m256i numers, const struct libdivide_u64_t *denom); +static inline __m256i libdivide_s64_do_vector(__m256i numers, const struct libdivide_s64_t *denom); + +static inline __m256i libdivide_u32_branchfree_do_vector(__m256i numers, const struct libdivide_u32_branchfree_t *denom); +static inline __m256i libdivide_s32_branchfree_do_vector(__m256i numers, const struct libdivide_s32_branchfree_t *denom); +static inline __m256i libdivide_u64_branchfree_do_vector(__m256i numers, const struct libdivide_u64_branchfree_t *denom); +static inline __m256i libdivide_s64_branchfree_do_vector(__m256i numers, const struct libdivide_s64_branchfree_t *denom); + +//////// Internal Utility Functions + +// Implementation of _mm256_srai_epi64(v, 63) (from AVX512). +static inline __m256i libdivide_s64_signbits(__m256i v) { + __m256i hiBitsDuped = _mm256_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1)); + __m256i signBits = _mm256_srai_epi32(hiBitsDuped, 31); + return signBits; +} + +// Implementation of _mm256_srai_epi64 (from AVX512). +static inline __m256i libdivide_s64_shift_right_vector(__m256i v, int amt) { + const int b = 64 - amt; + __m256i m = _mm256_set1_epi64x(1ULL << (b - 1)); + __m256i x = _mm256_srli_epi64(v, amt); + __m256i result = _mm256_sub_epi64(_mm256_xor_si256(x, m), m); + return result; +} + +// Here, b is assumed to contain one 32-bit value repeated. +static inline __m256i libdivide_mullhi_u32_vector(__m256i a, __m256i b) { + __m256i hi_product_0Z2Z = _mm256_srli_epi64(_mm256_mul_epu32(a, b), 32); + __m256i a1X3X = _mm256_srli_epi64(a, 32); + __m256i mask = _mm256_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0); + __m256i hi_product_Z1Z3 = _mm256_and_si256(_mm256_mul_epu32(a1X3X, b), mask); + return _mm256_or_si256(hi_product_0Z2Z, hi_product_Z1Z3); +} + +// b is one 32-bit value repeated. +static inline __m256i libdivide_mullhi_s32_vector(__m256i a, __m256i b) { + __m256i hi_product_0Z2Z = _mm256_srli_epi64(_mm256_mul_epi32(a, b), 32); + __m256i a1X3X = _mm256_srli_epi64(a, 32); + __m256i mask = _mm256_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0); + __m256i hi_product_Z1Z3 = _mm256_and_si256(_mm256_mul_epi32(a1X3X, b), mask); + return _mm256_or_si256(hi_product_0Z2Z, hi_product_Z1Z3); +} + +// Here, y is assumed to contain one 64-bit value repeated. +// https://stackoverflow.com/a/28827013 +static inline __m256i libdivide_mullhi_u64_vector(__m256i x, __m256i y) { + __m256i lomask = _mm256_set1_epi64x(0xffffffff); + __m256i xh = _mm256_shuffle_epi32(x, 0xB1); // x0l, x0h, x1l, x1h + __m256i yh = _mm256_shuffle_epi32(y, 0xB1); // y0l, y0h, y1l, y1h + __m256i w0 = _mm256_mul_epu32(x, y); // x0l*y0l, x1l*y1l + __m256i w1 = _mm256_mul_epu32(x, yh); // x0l*y0h, x1l*y1h + __m256i w2 = _mm256_mul_epu32(xh, y); // x0h*y0l, x1h*y0l + __m256i w3 = _mm256_mul_epu32(xh, yh); // x0h*y0h, x1h*y1h + __m256i w0h = _mm256_srli_epi64(w0, 32); + __m256i s1 = _mm256_add_epi64(w1, w0h); + __m256i s1l = _mm256_and_si256(s1, lomask); + __m256i s1h = _mm256_srli_epi64(s1, 32); + __m256i s2 = _mm256_add_epi64(w2, s1l); + __m256i s2h = _mm256_srli_epi64(s2, 32); + __m256i hi = _mm256_add_epi64(w3, s1h); + hi = _mm256_add_epi64(hi, s2h); + + return hi; +} + +// y is one 64-bit value repeated. +static inline __m256i libdivide_mullhi_s64_vector(__m256i x, __m256i y) { + __m256i p = libdivide_mullhi_u64_vector(x, y); + __m256i t1 = _mm256_and_si256(libdivide_s64_signbits(x), y); + __m256i t2 = _mm256_and_si256(libdivide_s64_signbits(y), x); + p = _mm256_sub_epi64(p, t1); + p = _mm256_sub_epi64(p, t2); + return p; +} + +////////// UINT32 + +__m256i libdivide_u32_do_vector(__m256i numers, const struct libdivide_u32_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + return _mm256_srli_epi32(numers, more); + } + else { + __m256i q = libdivide_mullhi_u32_vector(numers, _mm256_set1_epi32(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // uint32_t t = ((numer - q) >> 1) + q; + // return t >> denom->shift; + uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + __m256i t = _mm256_add_epi32(_mm256_srli_epi32(_mm256_sub_epi32(numers, q), 1), q); + return _mm256_srli_epi32(t, shift); + } + else { + return _mm256_srli_epi32(q, more); + } + } +} + +__m256i libdivide_u32_branchfree_do_vector(__m256i numers, const struct libdivide_u32_branchfree_t *denom) { + __m256i q = libdivide_mullhi_u32_vector(numers, _mm256_set1_epi32(denom->magic)); + __m256i t = _mm256_add_epi32(_mm256_srli_epi32(_mm256_sub_epi32(numers, q), 1), q); + return _mm256_srli_epi32(t, denom->more); +} + +////////// UINT64 + +__m256i libdivide_u64_do_vector(__m256i numers, const struct libdivide_u64_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + return _mm256_srli_epi64(numers, more); + } + else { + __m256i q = libdivide_mullhi_u64_vector(numers, _mm256_set1_epi64x(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // uint32_t t = ((numer - q) >> 1) + q; + // return t >> denom->shift; + uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + __m256i t = _mm256_add_epi64(_mm256_srli_epi64(_mm256_sub_epi64(numers, q), 1), q); + return _mm256_srli_epi64(t, shift); + } + else { + return _mm256_srli_epi64(q, more); + } + } +} + +__m256i libdivide_u64_branchfree_do_vector(__m256i numers, const struct libdivide_u64_branchfree_t *denom) { + __m256i q = libdivide_mullhi_u64_vector(numers, _mm256_set1_epi64x(denom->magic)); + __m256i t = _mm256_add_epi64(_mm256_srli_epi64(_mm256_sub_epi64(numers, q), 1), q); + return _mm256_srli_epi64(t, denom->more); +} + +////////// SINT32 + +__m256i libdivide_s32_do_vector(__m256i numers, const struct libdivide_s32_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + uint32_t mask = (1U << shift) - 1; + __m256i roundToZeroTweak = _mm256_set1_epi32(mask); + // q = numer + ((numer >> 31) & roundToZeroTweak); + __m256i q = _mm256_add_epi32(numers, _mm256_and_si256(_mm256_srai_epi32(numers, 31), roundToZeroTweak)); + q = _mm256_srai_epi32(q, shift); + __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); + // q = (q ^ sign) - sign; + q = _mm256_sub_epi32(_mm256_xor_si256(q, sign), sign); + return q; + } + else { + __m256i q = libdivide_mullhi_s32_vector(numers, _mm256_set1_epi32(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // must be arithmetic shift + __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); + // q += ((numer ^ sign) - sign); + q = _mm256_add_epi32(q, _mm256_sub_epi32(_mm256_xor_si256(numers, sign), sign)); + } + // q >>= shift + q = _mm256_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); + q = _mm256_add_epi32(q, _mm256_srli_epi32(q, 31)); // q += (q < 0) + return q; + } +} + +__m256i libdivide_s32_branchfree_do_vector(__m256i numers, const struct libdivide_s32_branchfree_t *denom) { + int32_t magic = denom->magic; + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + // must be arithmetic shift + __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); + __m256i q = libdivide_mullhi_s32_vector(numers, _mm256_set1_epi32(magic)); + q = _mm256_add_epi32(q, numers); // q += numers + + // If q is non-negative, we have nothing to do + // If q is negative, we want to add either (2**shift)-1 if d is + // a power of 2, or (2**shift) if it is not a power of 2 + uint32_t is_power_of_2 = (magic == 0); + __m256i q_sign = _mm256_srai_epi32(q, 31); // q_sign = q >> 31 + __m256i mask = _mm256_set1_epi32((1U << shift) - is_power_of_2); + q = _mm256_add_epi32(q, _mm256_and_si256(q_sign, mask)); // q = q + (q_sign & mask) + q = _mm256_srai_epi32(q, shift); // q >>= shift + q = _mm256_sub_epi32(_mm256_xor_si256(q, sign), sign); // q = (q ^ sign) - sign + return q; +} + +////////// SINT64 + +__m256i libdivide_s64_do_vector(__m256i numers, const struct libdivide_s64_t *denom) { + uint8_t more = denom->more; + int64_t magic = denom->magic; + if (magic == 0) { // shift path + uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + uint64_t mask = (1ULL << shift) - 1; + __m256i roundToZeroTweak = _mm256_set1_epi64x(mask); + // q = numer + ((numer >> 63) & roundToZeroTweak); + __m256i q = _mm256_add_epi64(numers, _mm256_and_si256(libdivide_s64_signbits(numers), roundToZeroTweak)); + q = libdivide_s64_shift_right_vector(q, shift); + __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); + // q = (q ^ sign) - sign; + q = _mm256_sub_epi64(_mm256_xor_si256(q, sign), sign); + return q; + } + else { + __m256i q = libdivide_mullhi_s64_vector(numers, _mm256_set1_epi64x(magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // must be arithmetic shift + __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); + // q += ((numer ^ sign) - sign); + q = _mm256_add_epi64(q, _mm256_sub_epi64(_mm256_xor_si256(numers, sign), sign)); + } + // q >>= denom->mult_path.shift + q = libdivide_s64_shift_right_vector(q, more & LIBDIVIDE_64_SHIFT_MASK); + q = _mm256_add_epi64(q, _mm256_srli_epi64(q, 63)); // q += (q < 0) + return q; + } +} + +__m256i libdivide_s64_branchfree_do_vector(__m256i numers, const struct libdivide_s64_branchfree_t *denom) { + int64_t magic = denom->magic; + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + // must be arithmetic shift + __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); + + // libdivide_mullhi_s64(numers, magic); + __m256i q = libdivide_mullhi_s64_vector(numers, _mm256_set1_epi64x(magic)); + q = _mm256_add_epi64(q, numers); // q += numers + + // If q is non-negative, we have nothing to do. + // If q is negative, we want to add either (2**shift)-1 if d is + // a power of 2, or (2**shift) if it is not a power of 2. + uint32_t is_power_of_2 = (magic == 0); + __m256i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 + __m256i mask = _mm256_set1_epi64x((1ULL << shift) - is_power_of_2); + q = _mm256_add_epi64(q, _mm256_and_si256(q_sign, mask)); // q = q + (q_sign & mask) + q = libdivide_s64_shift_right_vector(q, shift); // q >>= shift + q = _mm256_sub_epi64(_mm256_xor_si256(q, sign), sign); // q = (q ^ sign) - sign + return q; +} + +#elif defined(LIBDIVIDE_SSE2) + +static inline __m128i libdivide_u32_do_vector(__m128i numers, const struct libdivide_u32_t *denom); +static inline __m128i libdivide_s32_do_vector(__m128i numers, const struct libdivide_s32_t *denom); +static inline __m128i libdivide_u64_do_vector(__m128i numers, const struct libdivide_u64_t *denom); +static inline __m128i libdivide_s64_do_vector(__m128i numers, const struct libdivide_s64_t *denom); + +static inline __m128i libdivide_u32_branchfree_do_vector(__m128i numers, const struct libdivide_u32_branchfree_t *denom); +static inline __m128i libdivide_s32_branchfree_do_vector(__m128i numers, const struct libdivide_s32_branchfree_t *denom); +static inline __m128i libdivide_u64_branchfree_do_vector(__m128i numers, const struct libdivide_u64_branchfree_t *denom); +static inline __m128i libdivide_s64_branchfree_do_vector(__m128i numers, const struct libdivide_s64_branchfree_t *denom); + +//////// Internal Utility Functions + +// Implementation of _mm_srai_epi64(v, 63) (from AVX512). +static inline __m128i libdivide_s64_signbits(__m128i v) { + __m128i hiBitsDuped = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1)); + __m128i signBits = _mm_srai_epi32(hiBitsDuped, 31); + return signBits; +} + +// Implementation of _mm_srai_epi64 (from AVX512). +static inline __m128i libdivide_s64_shift_right_vector(__m128i v, int amt) { + const int b = 64 - amt; + __m128i m = _mm_set1_epi64x(1ULL << (b - 1)); + __m128i x = _mm_srli_epi64(v, amt); + __m128i result = _mm_sub_epi64(_mm_xor_si128(x, m), m); + return result; +} + +// Here, b is assumed to contain one 32-bit value repeated. +static inline __m128i libdivide_mullhi_u32_vector(__m128i a, __m128i b) { + __m128i hi_product_0Z2Z = _mm_srli_epi64(_mm_mul_epu32(a, b), 32); + __m128i a1X3X = _mm_srli_epi64(a, 32); + __m128i mask = _mm_set_epi32(-1, 0, -1, 0); + __m128i hi_product_Z1Z3 = _mm_and_si128(_mm_mul_epu32(a1X3X, b), mask); + return _mm_or_si128(hi_product_0Z2Z, hi_product_Z1Z3); +} + +// SSE2 does not have a signed multiplication instruction, but we can convert +// unsigned to signed pretty efficiently. Again, b is just a 32 bit value +// repeated four times. +static inline __m128i libdivide_mullhi_s32_vector(__m128i a, __m128i b) { + __m128i p = libdivide_mullhi_u32_vector(a, b); + // t1 = (a >> 31) & y, arithmetic shift + __m128i t1 = _mm_and_si128(_mm_srai_epi32(a, 31), b); + __m128i t2 = _mm_and_si128(_mm_srai_epi32(b, 31), a); + p = _mm_sub_epi32(p, t1); + p = _mm_sub_epi32(p, t2); + return p; +} + +// Here, y is assumed to contain one 64-bit value repeated. +// https://stackoverflow.com/a/28827013 +static inline __m128i libdivide_mullhi_u64_vector(__m128i x, __m128i y) { + __m128i lomask = _mm_set1_epi64x(0xffffffff); + __m128i xh = _mm_shuffle_epi32(x, 0xB1); // x0l, x0h, x1l, x1h + __m128i yh = _mm_shuffle_epi32(y, 0xB1); // y0l, y0h, y1l, y1h + __m128i w0 = _mm_mul_epu32(x, y); // x0l*y0l, x1l*y1l + __m128i w1 = _mm_mul_epu32(x, yh); // x0l*y0h, x1l*y1h + __m128i w2 = _mm_mul_epu32(xh, y); // x0h*y0l, x1h*y0l + __m128i w3 = _mm_mul_epu32(xh, yh); // x0h*y0h, x1h*y1h + __m128i w0h = _mm_srli_epi64(w0, 32); + __m128i s1 = _mm_add_epi64(w1, w0h); + __m128i s1l = _mm_and_si128(s1, lomask); + __m128i s1h = _mm_srli_epi64(s1, 32); + __m128i s2 = _mm_add_epi64(w2, s1l); + __m128i s2h = _mm_srli_epi64(s2, 32); + __m128i hi = _mm_add_epi64(w3, s1h); + hi = _mm_add_epi64(hi, s2h); + + return hi; +} + +// y is one 64-bit value repeated. +static inline __m128i libdivide_mullhi_s64_vector(__m128i x, __m128i y) { + __m128i p = libdivide_mullhi_u64_vector(x, y); + __m128i t1 = _mm_and_si128(libdivide_s64_signbits(x), y); + __m128i t2 = _mm_and_si128(libdivide_s64_signbits(y), x); + p = _mm_sub_epi64(p, t1); + p = _mm_sub_epi64(p, t2); + return p; +} + +////////// UINT32 + +__m128i libdivide_u32_do_vector(__m128i numers, const struct libdivide_u32_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + return _mm_srli_epi32(numers, more); + } + else { + __m128i q = libdivide_mullhi_u32_vector(numers, _mm_set1_epi32(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // uint32_t t = ((numer - q) >> 1) + q; + // return t >> denom->shift; + uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_sub_epi32(numers, q), 1), q); + return _mm_srli_epi32(t, shift); + } + else { + return _mm_srli_epi32(q, more); + } + } +} + +__m128i libdivide_u32_branchfree_do_vector(__m128i numers, const struct libdivide_u32_branchfree_t *denom) { + __m128i q = libdivide_mullhi_u32_vector(numers, _mm_set1_epi32(denom->magic)); + __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_sub_epi32(numers, q), 1), q); + return _mm_srli_epi32(t, denom->more); +} + +////////// UINT64 + +__m128i libdivide_u64_do_vector(__m128i numers, const struct libdivide_u64_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + return _mm_srli_epi64(numers, more); + } + else { + __m128i q = libdivide_mullhi_u64_vector(numers, _mm_set1_epi64x(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // uint32_t t = ((numer - q) >> 1) + q; + // return t >> denom->shift; + uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + __m128i t = _mm_add_epi64(_mm_srli_epi64(_mm_sub_epi64(numers, q), 1), q); + return _mm_srli_epi64(t, shift); + } + else { + return _mm_srli_epi64(q, more); + } + } +} + +__m128i libdivide_u64_branchfree_do_vector(__m128i numers, const struct libdivide_u64_branchfree_t *denom) { + __m128i q = libdivide_mullhi_u64_vector(numers, _mm_set1_epi64x(denom->magic)); + __m128i t = _mm_add_epi64(_mm_srli_epi64(_mm_sub_epi64(numers, q), 1), q); + return _mm_srli_epi64(t, denom->more); +} + +////////// SINT32 + +__m128i libdivide_s32_do_vector(__m128i numers, const struct libdivide_s32_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + uint32_t mask = (1U << shift) - 1; + __m128i roundToZeroTweak = _mm_set1_epi32(mask); + // q = numer + ((numer >> 31) & roundToZeroTweak); + __m128i q = _mm_add_epi32(numers, _mm_and_si128(_mm_srai_epi32(numers, 31), roundToZeroTweak)); + q = _mm_srai_epi32(q, shift); + __m128i sign = _mm_set1_epi32((int8_t)more >> 7); + // q = (q ^ sign) - sign; + q = _mm_sub_epi32(_mm_xor_si128(q, sign), sign); + return q; + } + else { + __m128i q = libdivide_mullhi_s32_vector(numers, _mm_set1_epi32(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // must be arithmetic shift + __m128i sign = _mm_set1_epi32((int8_t)more >> 7); + // q += ((numer ^ sign) - sign); + q = _mm_add_epi32(q, _mm_sub_epi32(_mm_xor_si128(numers, sign), sign)); + } + // q >>= shift + q = _mm_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); + q = _mm_add_epi32(q, _mm_srli_epi32(q, 31)); // q += (q < 0) + return q; + } +} + +__m128i libdivide_s32_branchfree_do_vector(__m128i numers, const struct libdivide_s32_branchfree_t *denom) { + int32_t magic = denom->magic; + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + // must be arithmetic shift + __m128i sign = _mm_set1_epi32((int8_t)more >> 7); + __m128i q = libdivide_mullhi_s32_vector(numers, _mm_set1_epi32(magic)); + q = _mm_add_epi32(q, numers); // q += numers + + // If q is non-negative, we have nothing to do + // If q is negative, we want to add either (2**shift)-1 if d is + // a power of 2, or (2**shift) if it is not a power of 2 + uint32_t is_power_of_2 = (magic == 0); + __m128i q_sign = _mm_srai_epi32(q, 31); // q_sign = q >> 31 + __m128i mask = _mm_set1_epi32((1U << shift) - is_power_of_2); + q = _mm_add_epi32(q, _mm_and_si128(q_sign, mask)); // q = q + (q_sign & mask) + q = _mm_srai_epi32(q, shift); // q >>= shift + q = _mm_sub_epi32(_mm_xor_si128(q, sign), sign); // q = (q ^ sign) - sign + return q; +} + +////////// SINT64 + +__m128i libdivide_s64_do_vector(__m128i numers, const struct libdivide_s64_t *denom) { + uint8_t more = denom->more; + int64_t magic = denom->magic; + if (magic == 0) { // shift path + uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + uint64_t mask = (1ULL << shift) - 1; + __m128i roundToZeroTweak = _mm_set1_epi64x(mask); + // q = numer + ((numer >> 63) & roundToZeroTweak); + __m128i q = _mm_add_epi64(numers, _mm_and_si128(libdivide_s64_signbits(numers), roundToZeroTweak)); + q = libdivide_s64_shift_right_vector(q, shift); + __m128i sign = _mm_set1_epi32((int8_t)more >> 7); + // q = (q ^ sign) - sign; + q = _mm_sub_epi64(_mm_xor_si128(q, sign), sign); + return q; + } + else { + __m128i q = libdivide_mullhi_s64_vector(numers, _mm_set1_epi64x(magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // must be arithmetic shift + __m128i sign = _mm_set1_epi32((int8_t)more >> 7); + // q += ((numer ^ sign) - sign); + q = _mm_add_epi64(q, _mm_sub_epi64(_mm_xor_si128(numers, sign), sign)); + } + // q >>= denom->mult_path.shift + q = libdivide_s64_shift_right_vector(q, more & LIBDIVIDE_64_SHIFT_MASK); + q = _mm_add_epi64(q, _mm_srli_epi64(q, 63)); // q += (q < 0) + return q; + } +} + +__m128i libdivide_s64_branchfree_do_vector(__m128i numers, const struct libdivide_s64_branchfree_t *denom) { + int64_t magic = denom->magic; + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + // must be arithmetic shift + __m128i sign = _mm_set1_epi32((int8_t)more >> 7); + + // libdivide_mullhi_s64(numers, magic); + __m128i q = libdivide_mullhi_s64_vector(numers, _mm_set1_epi64x(magic)); + q = _mm_add_epi64(q, numers); // q += numers + + // If q is non-negative, we have nothing to do. + // If q is negative, we want to add either (2**shift)-1 if d is + // a power of 2, or (2**shift) if it is not a power of 2. + uint32_t is_power_of_2 = (magic == 0); + __m128i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 + __m128i mask = _mm_set1_epi64x((1ULL << shift) - is_power_of_2); + q = _mm_add_epi64(q, _mm_and_si128(q_sign, mask)); // q = q + (q_sign & mask) + q = libdivide_s64_shift_right_vector(q, shift); // q >>= shift + q = _mm_sub_epi64(_mm_xor_si128(q, sign), sign); // q = (q ^ sign) - sign + return q; +} + +#endif + +/////////// C++ stuff + +#ifdef __cplusplus + +// The C++ divider class is templated on both an integer type +// (like uint64_t) and an algorithm type. +// * BRANCHFULL is the default algorithm type. +// * BRANCHFREE is the branchfree algorithm type. +enum { + BRANCHFULL, + BRANCHFREE +}; + +#if defined(LIBDIVIDE_AVX512) + #define LIBDIVIDE_VECTOR_TYPE __m512i +#elif defined(LIBDIVIDE_AVX2) + #define LIBDIVIDE_VECTOR_TYPE __m256i +#elif defined(LIBDIVIDE_SSE2) + #define LIBDIVIDE_VECTOR_TYPE __m128i +#endif + +#if !defined(LIBDIVIDE_VECTOR_TYPE) + #define LIBDIVIDE_DIVIDE_VECTOR(ALGO) +#else + #define LIBDIVIDE_DIVIDE_VECTOR(ALGO) \ + LIBDIVIDE_VECTOR_TYPE divide(LIBDIVIDE_VECTOR_TYPE n) const { \ + return libdivide_##ALGO##_do_vector(n, &denom); \ + } +#endif + +// The DISPATCHER_GEN() macro generates C++ methods (for the given integer +// and algorithm types) that redirect to libdivide's C API. +#define DISPATCHER_GEN(T, ALGO) \ + libdivide_##ALGO##_t denom; \ + dispatcher() { } \ + dispatcher(T d) \ + : denom(libdivide_##ALGO##_gen(d)) \ + { } \ + T divide(T n) const { \ + return libdivide_##ALGO##_do(n, &denom); \ + } \ + LIBDIVIDE_DIVIDE_VECTOR(ALGO) \ + T recover() const { \ + return libdivide_##ALGO##_recover(&denom); \ + } + +// The dispatcher selects a specific division algorithm for a given +// type and ALGO using partial template specialization. +template struct dispatcher { }; + +template<> struct dispatcher { DISPATCHER_GEN(int32_t, s32) }; +template<> struct dispatcher { DISPATCHER_GEN(int32_t, s32_branchfree) }; +template<> struct dispatcher { DISPATCHER_GEN(uint32_t, u32) }; +template<> struct dispatcher { DISPATCHER_GEN(uint32_t, u32_branchfree) }; +template<> struct dispatcher { DISPATCHER_GEN(int64_t, s64) }; +template<> struct dispatcher { DISPATCHER_GEN(int64_t, s64_branchfree) }; +template<> struct dispatcher { DISPATCHER_GEN(uint64_t, u64) }; +template<> struct dispatcher { DISPATCHER_GEN(uint64_t, u64_branchfree) }; + +// This is the main divider class for use by the user (C++ API). +// The actual division algorithm is selected using the dispatcher struct +// based on the integer and algorithm template parameters. +template +class divider { +public: + // We leave the default constructor empty so that creating + // an array of dividers and then initializing them + // later doesn't slow us down. + divider() { } + + // Constructor that takes the divisor as a parameter + divider(T d) : div(d) { } + + // Divides n by the divisor + T divide(T n) const { + return div.divide(n); + } + + // Recovers the divisor, returns the value that was + // used to initialize this divider object. + T recover() const { + return div.recover(); + } + + bool operator==(const divider& other) const { + return div.denom.magic == other.denom.magic && + div.denom.more == other.denom.more; + } + + bool operator!=(const divider& other) const { + return !(*this == other); + } + +#if defined(LIBDIVIDE_VECTOR_TYPE) + // Treats the vector as packed integer values with the same type as + // the divider (e.g. s32, u32, s64, u64) and divides each of + // them by the divider, returning the packed quotients. + LIBDIVIDE_VECTOR_TYPE divide(LIBDIVIDE_VECTOR_TYPE n) const { + return div.divide(n); + } +#endif + +private: + // Storage for the actual divisor + dispatcher::value, + std::is_signed::value, sizeof(T), ALGO> div; +}; + +// Overload of operator / for scalar division +template +T operator/(T n, const divider& div) { + return div.divide(n); +} + +// Overload of operator /= for scalar division +template +T& operator/=(T& n, const divider& div) { + n = div.divide(n); + return n; +} + +#if defined(LIBDIVIDE_VECTOR_TYPE) + // Overload of operator / for vector division + template + LIBDIVIDE_VECTOR_TYPE operator/(LIBDIVIDE_VECTOR_TYPE n, const divider& div) { + return div.divide(n); + } + // Overload of operator /= for vector division + template + LIBDIVIDE_VECTOR_TYPE& operator/=(LIBDIVIDE_VECTOR_TYPE& n, const divider& div) { + n = div.divide(n); + return n; + } +#endif + +// libdivdie::branchfree_divider +template +using branchfree_divider = divider; + +} // namespace libdivide + +#endif // __cplusplus + +#endif // NUMPY_CORE_INCLUDE_NUMPY_LIBDIVIDE_LIBDIVIDE_H_ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/multiarray_api.txt b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/multiarray_api.txt new file mode 100644 index 0000000..2e07cb0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/multiarray_api.txt @@ -0,0 +1,2502 @@ + +=========== +NumPy C-API +=========== +:: + + unsigned int + PyArray_GetNDArrayCVersion(void ) + + +Included at the very first so not auto-grabbed and thus not labeled. + +:: + + int + PyArray_SetNumericOps(PyObject *dict) + +Set internal structure with number functions that all arrays will use + +:: + + PyObject * + PyArray_GetNumericOps(void ) + +Get dictionary showing number functions that all arrays will use + +:: + + int + PyArray_INCREF(PyArrayObject *mp) + +For object arrays, increment all internal references. + +:: + + int + PyArray_XDECREF(PyArrayObject *mp) + +Decrement all internal references for object arrays. +(or arrays with object fields) + +:: + + void + PyArray_SetStringFunction(PyObject *op, int repr) + +Set the array print function to be a Python function. + +:: + + PyArray_Descr * + PyArray_DescrFromType(int type) + +Get the PyArray_Descr structure for a type. + +:: + + PyObject * + PyArray_TypeObjectFromType(int type) + +Get a typeobject from a type-number -- can return NULL. + +New reference + +:: + + char * + PyArray_Zero(PyArrayObject *arr) + +Get pointer to zero of correct type for array. + +:: + + char * + PyArray_One(PyArrayObject *arr) + +Get pointer to one of correct type for array + +:: + + PyObject * + PyArray_CastToType(PyArrayObject *arr, PyArray_Descr *dtype, int + is_f_order) + +For backward compatibility + +Cast an array using typecode structure. +steals reference to dtype --- cannot be NULL + +This function always makes a copy of arr, even if the dtype +doesn't change. + +:: + + int + PyArray_CastTo(PyArrayObject *out, PyArrayObject *mp) + +Cast to an already created array. + +:: + + int + PyArray_CastAnyTo(PyArrayObject *out, PyArrayObject *mp) + +Cast to an already created array. Arrays don't have to be "broadcastable" +Only requirement is they have the same number of elements. + +:: + + int + PyArray_CanCastSafely(int fromtype, int totype) + +Check the type coercion rules. + +:: + + npy_bool + PyArray_CanCastTo(PyArray_Descr *from, PyArray_Descr *to) + +leaves reference count alone --- cannot be NULL + +PyArray_CanCastTypeTo is equivalent to this, but adds a 'casting' +parameter. + +:: + + int + PyArray_ObjectType(PyObject *op, int minimum_type) + +Return the typecode of the array a Python object would be converted to + +Returns the type number the result should have, or NPY_NOTYPE on error. + +:: + + PyArray_Descr * + PyArray_DescrFromObject(PyObject *op, PyArray_Descr *mintype) + +new reference -- accepts NULL for mintype + +:: + + PyArrayObject ** + PyArray_ConvertToCommonType(PyObject *op, int *retn) + + +This function is only used in one place within NumPy and should +generally be avoided. It is provided mainly for backward compatibility. + +The user of the function has to free the returned array with PyDataMem_FREE. + +:: + + PyArray_Descr * + PyArray_DescrFromScalar(PyObject *sc) + +Return descr object from array scalar. + +New reference + +:: + + PyArray_Descr * + PyArray_DescrFromTypeObject(PyObject *type) + + +:: + + npy_intp + PyArray_Size(PyObject *op) + +Compute the size of an array (in number of items) + +:: + + PyObject * + PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base) + +Get scalar-equivalent to a region of memory described by a descriptor. + +:: + + PyObject * + PyArray_FromScalar(PyObject *scalar, PyArray_Descr *outcode) + +Get 0-dim array from scalar + +0-dim array from array-scalar object +always contains a copy of the data +unless outcode is NULL, it is of void type and the referrer does +not own it either. + +steals reference to outcode + +:: + + void + PyArray_ScalarAsCtype(PyObject *scalar, void *ctypeptr) + +Convert to c-type + +no error checking is performed -- ctypeptr must be same type as scalar +in case of flexible type, the data is not copied +into ctypeptr which is expected to be a pointer to pointer + +:: + + int + PyArray_CastScalarToCtype(PyObject *scalar, void + *ctypeptr, PyArray_Descr *outcode) + +Cast Scalar to c-type + +The output buffer must be large-enough to receive the value +Even for flexible types which is different from ScalarAsCtype +where only a reference for flexible types is returned + +This may not work right on narrow builds for NumPy unicode scalars. + +:: + + int + PyArray_CastScalarDirect(PyObject *scalar, PyArray_Descr + *indescr, void *ctypeptr, int outtype) + +Cast Scalar to c-type + +:: + + PyObject * + PyArray_ScalarFromObject(PyObject *object) + +Get an Array Scalar From a Python Object + +Returns NULL if unsuccessful but error is only set if another error occurred. +Currently only Numeric-like object supported. + +:: + + PyArray_VectorUnaryFunc * + PyArray_GetCastFunc(PyArray_Descr *descr, int type_num) + +Get a cast function to cast from the input descriptor to the +output type_number (must be a registered data-type). +Returns NULL if un-successful. + +:: + + PyObject * + PyArray_FromDims(int NPY_UNUSED(nd) , int *NPY_UNUSED(d) , int + NPY_UNUSED(type) ) + +Deprecated, use PyArray_SimpleNew instead. + +:: + + PyObject * + PyArray_FromDimsAndDataAndDescr(int NPY_UNUSED(nd) , int + *NPY_UNUSED(d) , PyArray_Descr + *descr, char *NPY_UNUSED(data) ) + +Deprecated, use PyArray_NewFromDescr instead. + +:: + + PyObject * + PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int + min_depth, int max_depth, int flags, PyObject + *context) + +Does not check for NPY_ARRAY_ENSURECOPY and NPY_ARRAY_NOTSWAPPED in flags +Steals a reference to newtype --- which can be NULL + +:: + + PyObject * + PyArray_EnsureArray(PyObject *op) + +This is a quick wrapper around +PyArray_FromAny(op, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL) +that special cases Arrays and PyArray_Scalars up front +It *steals a reference* to the object +It also guarantees that the result is PyArray_Type +Because it decrefs op if any conversion needs to take place +so it can be used like PyArray_EnsureArray(some_function(...)) + +:: + + PyObject * + PyArray_EnsureAnyArray(PyObject *op) + + +:: + + PyObject * + PyArray_FromFile(FILE *fp, PyArray_Descr *dtype, npy_intp num, char + *sep) + + +Given a ``FILE *`` pointer ``fp``, and a ``PyArray_Descr``, return an +array corresponding to the data encoded in that file. + +The reference to `dtype` is stolen (it is possible that the passed in +dtype is not held on to). + +The number of elements to read is given as ``num``; if it is < 0, then +then as many as possible are read. + +If ``sep`` is NULL or empty, then binary data is assumed, else +text data, with ``sep`` as the separator between elements. Whitespace in +the separator matches any length of whitespace in the text, and a match +for whitespace around the separator is added. + +For memory-mapped files, use the buffer interface. No more data than +necessary is read by this routine. + +:: + + PyObject * + PyArray_FromString(char *data, npy_intp slen, PyArray_Descr + *dtype, npy_intp num, char *sep) + + +Given a pointer to a string ``data``, a string length ``slen``, and +a ``PyArray_Descr``, return an array corresponding to the data +encoded in that string. + +If the dtype is NULL, the default array type is used (double). +If non-null, the reference is stolen. + +If ``slen`` is < 0, then the end of string is used for text data. +It is an error for ``slen`` to be < 0 for binary data (since embedded NULLs +would be the norm). + +The number of elements to read is given as ``num``; if it is < 0, then +then as many as possible are read. + +If ``sep`` is NULL or empty, then binary data is assumed, else +text data, with ``sep`` as the separator between elements. Whitespace in +the separator matches any length of whitespace in the text, and a match +for whitespace around the separator is added. + +:: + + PyObject * + PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type, npy_intp + count, npy_intp offset) + + +:: + + PyObject * + PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count) + + +steals a reference to dtype (which cannot be NULL) + +:: + + PyObject * + PyArray_Return(PyArrayObject *mp) + + +Return either an array or the appropriate Python object if the array +is 0d and matches a Python type. +steals reference to mp + +:: + + PyObject * + PyArray_GetField(PyArrayObject *self, PyArray_Descr *typed, int + offset) + +Get a subset of bytes from each element of the array +steals reference to typed, must not be NULL + +:: + + int + PyArray_SetField(PyArrayObject *self, PyArray_Descr *dtype, int + offset, PyObject *val) + +Set a subset of bytes from each element of the array +steals reference to dtype, must not be NULL + +:: + + PyObject * + PyArray_Byteswap(PyArrayObject *self, npy_bool inplace) + + +:: + + PyObject * + PyArray_Resize(PyArrayObject *self, PyArray_Dims *newshape, int + refcheck, NPY_ORDER NPY_UNUSED(order) ) + +Resize (reallocate data). Only works if nothing else is referencing this +array and it is contiguous. If refcheck is 0, then the reference count is +not checked and assumed to be 1. You still must own this data and have no +weak-references and no base object. + +:: + + int + PyArray_MoveInto(PyArrayObject *dst, PyArrayObject *src) + +Move the memory of one array into another, allowing for overlapping data. + +Returns 0 on success, negative on failure. + +:: + + int + PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src) + +Copy an Array into another array. +Broadcast to the destination shape if necessary. + +Returns 0 on success, -1 on failure. + +:: + + int + PyArray_CopyAnyInto(PyArrayObject *dst, PyArrayObject *src) + +Copy an Array into another array -- memory must not overlap +Does not require src and dest to have "broadcastable" shapes +(only the same number of elements). + +TODO: For NumPy 2.0, this could accept an order parameter which +only allows NPY_CORDER and NPY_FORDER. Could also rename +this to CopyAsFlat to make the name more intuitive. + +Returns 0 on success, -1 on error. + +:: + + int + PyArray_CopyObject(PyArrayObject *dest, PyObject *src_object) + + +:: + + PyObject * + PyArray_NewCopy(PyArrayObject *obj, NPY_ORDER order) + +Copy an array. + +:: + + PyObject * + PyArray_ToList(PyArrayObject *self) + +To List + +:: + + PyObject * + PyArray_ToString(PyArrayObject *self, NPY_ORDER order) + + +:: + + int + PyArray_ToFile(PyArrayObject *self, FILE *fp, char *sep, char *format) + +To File + +:: + + int + PyArray_Dump(PyObject *self, PyObject *file, int protocol) + + +:: + + PyObject * + PyArray_Dumps(PyObject *self, int protocol) + + +:: + + int + PyArray_ValidType(int type) + +Is the typenum valid? + +:: + + void + PyArray_UpdateFlags(PyArrayObject *ret, int flagmask) + +Update Several Flags at once. + +:: + + PyObject * + PyArray_New(PyTypeObject *subtype, int nd, npy_intp const *dims, int + type_num, npy_intp const *strides, void *data, int + itemsize, int flags, PyObject *obj) + +Generic new array creation routine. + +:: + + PyObject * + PyArray_NewFromDescr(PyTypeObject *subtype, PyArray_Descr *descr, int + nd, npy_intp const *dims, npy_intp const + *strides, void *data, int flags, PyObject *obj) + +Generic new array creation routine. + +steals a reference to descr. On failure or when dtype->subarray is +true, dtype will be decrefed. + +:: + + PyArray_Descr * + PyArray_DescrNew(PyArray_Descr *base) + +base cannot be NULL + +:: + + PyArray_Descr * + PyArray_DescrNewFromType(int type_num) + + +:: + + double + PyArray_GetPriority(PyObject *obj, double default_) + +Get Priority from object + +:: + + PyObject * + PyArray_IterNew(PyObject *obj) + +Get Iterator. + +:: + + PyObject* + PyArray_MultiIterNew(int n, ... ) + +Get MultiIterator, + +:: + + int + PyArray_PyIntAsInt(PyObject *o) + + +:: + + npy_intp + PyArray_PyIntAsIntp(PyObject *o) + + +:: + + int + PyArray_Broadcast(PyArrayMultiIterObject *mit) + + +:: + + void + PyArray_FillObjectArray(PyArrayObject *arr, PyObject *obj) + +Assumes contiguous + +:: + + int + PyArray_FillWithScalar(PyArrayObject *arr, PyObject *obj) + + +:: + + npy_bool + PyArray_CheckStrides(int elsize, int nd, npy_intp numbytes, npy_intp + offset, npy_intp const *dims, npy_intp const + *newstrides) + + +:: + + PyArray_Descr * + PyArray_DescrNewByteorder(PyArray_Descr *self, char newendian) + + +returns a copy of the PyArray_Descr structure with the byteorder +altered: +no arguments: The byteorder is swapped (in all subfields as well) +single argument: The byteorder is forced to the given state +(in all subfields as well) + +Valid states: ('big', '>') or ('little' or '<') +('native', or '=') + +If a descr structure with | is encountered it's own +byte-order is not changed but any fields are: + + +Deep bytorder change of a data-type descriptor +Leaves reference count of self unchanged --- does not DECREF self *** + +:: + + PyObject * + PyArray_IterAllButAxis(PyObject *obj, int *inaxis) + +Get Iterator that iterates over all but one axis (don't use this with +PyArray_ITER_GOTO1D). The axis will be over-written if negative +with the axis having the smallest stride. + +:: + + PyObject * + PyArray_CheckFromAny(PyObject *op, PyArray_Descr *descr, int + min_depth, int max_depth, int requires, PyObject + *context) + +steals a reference to descr -- accepts NULL + +:: + + PyObject * + PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int + flags) + +steals reference to newtype --- acc. NULL + +:: + + PyObject * + PyArray_FromInterface(PyObject *origin) + + +:: + + PyObject * + PyArray_FromStructInterface(PyObject *input) + + +:: + + PyObject * + PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject + *context) + + +:: + + NPY_SCALARKIND + PyArray_ScalarKind(int typenum, PyArrayObject **arr) + +ScalarKind + +Returns the scalar kind of a type number, with an +optional tweak based on the scalar value itself. +If no scalar is provided, it returns INTPOS_SCALAR +for both signed and unsigned integers, otherwise +it checks the sign of any signed integer to choose +INTNEG_SCALAR when appropriate. + +:: + + int + PyArray_CanCoerceScalar(int thistype, int neededtype, NPY_SCALARKIND + scalar) + + +Determines whether the data type 'thistype', with +scalar kind 'scalar', can be coerced into 'neededtype'. + +:: + + PyObject * + PyArray_NewFlagsObject(PyObject *obj) + + +Get New ArrayFlagsObject + +:: + + npy_bool + PyArray_CanCastScalar(PyTypeObject *from, PyTypeObject *to) + +See if array scalars can be cast. + +TODO: For NumPy 2.0, add a NPY_CASTING parameter. + +:: + + int + PyArray_CompareUCS4(npy_ucs4 const *s1, npy_ucs4 const *s2, size_t + len) + + +:: + + int + PyArray_RemoveSmallest(PyArrayMultiIterObject *multi) + +Adjusts previously broadcasted iterators so that the axis with +the smallest sum of iterator strides is not iterated over. +Returns dimension which is smallest in the range [0,multi->nd). +A -1 is returned if multi->nd == 0. + +don't use with PyArray_ITER_GOTO1D because factors are not adjusted + +:: + + int + PyArray_ElementStrides(PyObject *obj) + + +:: + + void + PyArray_Item_INCREF(char *data, PyArray_Descr *descr) + +XINCREF all objects in a single array item. This is complicated for +structured datatypes where the position of objects needs to be extracted. +The function is execute recursively for each nested field or subarrays dtype +such as as `np.dtype([("field1", "O"), ("field2", "f,O", (3,2))])` + +:: + + void + PyArray_Item_XDECREF(char *data, PyArray_Descr *descr) + + +XDECREF all objects in a single array item. This is complicated for +structured datatypes where the position of objects needs to be extracted. +The function is execute recursively for each nested field or subarrays dtype +such as as `np.dtype([("field1", "O"), ("field2", "f,O", (3,2))])` + +:: + + PyObject * + PyArray_FieldNames(PyObject *fields) + +Return the tuple of ordered field names from a dictionary. + +:: + + PyObject * + PyArray_Transpose(PyArrayObject *ap, PyArray_Dims *permute) + +Return Transpose. + +:: + + PyObject * + PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int + axis, PyArrayObject *out, NPY_CLIPMODE clipmode) + +Take + +:: + + PyObject * + PyArray_PutTo(PyArrayObject *self, PyObject*values0, PyObject + *indices0, NPY_CLIPMODE clipmode) + +Put values into an array + +:: + + PyObject * + PyArray_PutMask(PyArrayObject *self, PyObject*values0, PyObject*mask0) + +Put values into an array according to a mask. + +:: + + PyObject * + PyArray_Repeat(PyArrayObject *aop, PyObject *op, int axis) + +Repeat the array. + +:: + + PyObject * + PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject + *out, NPY_CLIPMODE clipmode) + + +:: + + int + PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) + +Sort an array in-place + +:: + + PyObject * + PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) + +ArgSort an array + +:: + + PyObject * + PyArray_SearchSorted(PyArrayObject *op1, PyObject *op2, NPY_SEARCHSIDE + side, PyObject *perm) + + +Search the sorted array op1 for the location of the items in op2. The +result is an array of indexes, one for each element in op2, such that if +the item were to be inserted in op1 just before that index the array +would still be in sorted order. + +Parameters +---------- +op1 : PyArrayObject * +Array to be searched, must be 1-D. +op2 : PyObject * +Array of items whose insertion indexes in op1 are wanted +side : {NPY_SEARCHLEFT, NPY_SEARCHRIGHT} +If NPY_SEARCHLEFT, return first valid insertion indexes +If NPY_SEARCHRIGHT, return last valid insertion indexes +perm : PyObject * +Permutation array that sorts op1 (optional) + +Returns +------- +ret : PyObject * +New reference to npy_intp array containing indexes where items in op2 +could be validly inserted into op1. NULL on error. + +Notes +----- +Binary search is used to find the indexes. + +:: + + PyObject * + PyArray_ArgMax(PyArrayObject *op, int axis, PyArrayObject *out) + +ArgMax + +:: + + PyObject * + PyArray_ArgMin(PyArrayObject *op, int axis, PyArrayObject *out) + +ArgMin + +:: + + PyObject * + PyArray_Reshape(PyArrayObject *self, PyObject *shape) + +Reshape + +:: + + PyObject * + PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims, NPY_ORDER + order) + +New shape for an array + +:: + + PyObject * + PyArray_Squeeze(PyArrayObject *self) + + +return a new view of the array object with all of its unit-length +dimensions squeezed out if needed, otherwise +return the same array. + +:: + + PyObject * + PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject + *pytype) + +View +steals a reference to type -- accepts NULL + +:: + + PyObject * + PyArray_SwapAxes(PyArrayObject *ap, int a1, int a2) + +SwapAxes + +:: + + PyObject * + PyArray_Max(PyArrayObject *ap, int axis, PyArrayObject *out) + +Max + +:: + + PyObject * + PyArray_Min(PyArrayObject *ap, int axis, PyArrayObject *out) + +Min + +:: + + PyObject * + PyArray_Ptp(PyArrayObject *ap, int axis, PyArrayObject *out) + +Ptp + +:: + + PyObject * + PyArray_Mean(PyArrayObject *self, int axis, int rtype, PyArrayObject + *out) + +Mean + +:: + + PyObject * + PyArray_Trace(PyArrayObject *self, int offset, int axis1, int + axis2, int rtype, PyArrayObject *out) + +Trace + +:: + + PyObject * + PyArray_Diagonal(PyArrayObject *self, int offset, int axis1, int + axis2) + +Diagonal + +In NumPy versions prior to 1.7, this function always returned a copy of +the diagonal array. In 1.7, the code has been updated to compute a view +onto 'self', but it still copies this array before returning, as well as +setting the internal WARN_ON_WRITE flag. In a future version, it will +simply return a view onto self. + +:: + + PyObject * + PyArray_Clip(PyArrayObject *self, PyObject *min, PyObject + *max, PyArrayObject *out) + +Clip + +:: + + PyObject * + PyArray_Conjugate(PyArrayObject *self, PyArrayObject *out) + +Conjugate + +:: + + PyObject * + PyArray_Nonzero(PyArrayObject *self) + +Nonzero + +TODO: In NumPy 2.0, should make the iteration order a parameter. + +:: + + PyObject * + PyArray_Std(PyArrayObject *self, int axis, int rtype, PyArrayObject + *out, int variance) + +Set variance to 1 to by-pass square-root calculation and return variance +Std + +:: + + PyObject * + PyArray_Sum(PyArrayObject *self, int axis, int rtype, PyArrayObject + *out) + +Sum + +:: + + PyObject * + PyArray_CumSum(PyArrayObject *self, int axis, int rtype, PyArrayObject + *out) + +CumSum + +:: + + PyObject * + PyArray_Prod(PyArrayObject *self, int axis, int rtype, PyArrayObject + *out) + +Prod + +:: + + PyObject * + PyArray_CumProd(PyArrayObject *self, int axis, int + rtype, PyArrayObject *out) + +CumProd + +:: + + PyObject * + PyArray_All(PyArrayObject *self, int axis, PyArrayObject *out) + +All + +:: + + PyObject * + PyArray_Any(PyArrayObject *self, int axis, PyArrayObject *out) + +Any + +:: + + PyObject * + PyArray_Compress(PyArrayObject *self, PyObject *condition, int + axis, PyArrayObject *out) + +Compress + +:: + + PyObject * + PyArray_Flatten(PyArrayObject *a, NPY_ORDER order) + +Flatten + +:: + + PyObject * + PyArray_Ravel(PyArrayObject *arr, NPY_ORDER order) + +Ravel +Returns a contiguous array + +:: + + npy_intp + PyArray_MultiplyList(npy_intp const *l1, int n) + +Multiply a List + +:: + + int + PyArray_MultiplyIntList(int const *l1, int n) + +Multiply a List of ints + +:: + + void * + PyArray_GetPtr(PyArrayObject *obj, npy_intp const*ind) + +Produce a pointer into array + +:: + + int + PyArray_CompareLists(npy_intp const *l1, npy_intp const *l2, int n) + +Compare Lists + +:: + + int + PyArray_AsCArray(PyObject **op, void *ptr, npy_intp *dims, int + nd, PyArray_Descr*typedescr) + +Simulate a C-array +steals a reference to typedescr -- can be NULL + +:: + + int + PyArray_As1D(PyObject **NPY_UNUSED(op) , char **NPY_UNUSED(ptr) , int + *NPY_UNUSED(d1) , int NPY_UNUSED(typecode) ) + +Convert to a 1D C-array + +:: + + int + PyArray_As2D(PyObject **NPY_UNUSED(op) , char ***NPY_UNUSED(ptr) , int + *NPY_UNUSED(d1) , int *NPY_UNUSED(d2) , int + NPY_UNUSED(typecode) ) + +Convert to a 2D C-array + +:: + + int + PyArray_Free(PyObject *op, void *ptr) + +Free pointers created if As2D is called + +:: + + int + PyArray_Converter(PyObject *object, PyObject **address) + + +Useful to pass as converter function for O& processing in PyArgs_ParseTuple. + +This conversion function can be used with the "O&" argument for +PyArg_ParseTuple. It will immediately return an object of array type +or will convert to a NPY_ARRAY_CARRAY any other object. + +If you use PyArray_Converter, you must DECREF the array when finished +as you get a new reference to it. + +:: + + int + PyArray_IntpFromSequence(PyObject *seq, npy_intp *vals, int maxvals) + +PyArray_IntpFromSequence +Returns the number of integers converted or -1 if an error occurred. +vals must be large enough to hold maxvals + +:: + + PyObject * + PyArray_Concatenate(PyObject *op, int axis) + +Concatenate + +Concatenate an arbitrary Python sequence into an array. +op is a python object supporting the sequence interface. +Its elements will be concatenated together to form a single +multidimensional array. If axis is NPY_MAXDIMS or bigger, then +each sequence object will be flattened before concatenation + +:: + + PyObject * + PyArray_InnerProduct(PyObject *op1, PyObject *op2) + +Numeric.innerproduct(a,v) + +:: + + PyObject * + PyArray_MatrixProduct(PyObject *op1, PyObject *op2) + +Numeric.matrixproduct(a,v) +just like inner product but does the swapaxes stuff on the fly + +:: + + PyObject * + PyArray_CopyAndTranspose(PyObject *op) + +Copy and Transpose + +Could deprecate this function, as there isn't a speed benefit over +calling Transpose and then Copy. + +:: + + PyObject * + PyArray_Correlate(PyObject *op1, PyObject *op2, int mode) + +Numeric.correlate(a1,a2,mode) + +:: + + int + PyArray_TypestrConvert(int itemsize, int gentype) + +Typestr converter + +:: + + int + PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at) + +Get typenum from an object -- None goes to NPY_DEFAULT_TYPE +This function takes a Python object representing a type and converts it +to a the correct PyArray_Descr * structure to describe the type. + +Many objects can be used to represent a data-type which in NumPy is +quite a flexible concept. + +This is the central code that converts Python objects to +Type-descriptor objects that are used throughout numpy. + +Returns a new reference in *at, but the returned should not be +modified as it may be one of the canonical immutable objects or +a reference to the input obj. + +:: + + int + PyArray_DescrConverter2(PyObject *obj, PyArray_Descr **at) + +Get typenum from an object -- None goes to NULL + +:: + + int + PyArray_IntpConverter(PyObject *obj, PyArray_Dims *seq) + +Get intp chunk from sequence + +This function takes a Python sequence object and allocates and +fills in an intp array with the converted values. + +Remember to free the pointer seq.ptr when done using +PyDimMem_FREE(seq.ptr)** + +:: + + int + PyArray_BufferConverter(PyObject *obj, PyArray_Chunk *buf) + +Get buffer chunk from object + +this function takes a Python object which exposes the (single-segment) +buffer interface and returns a pointer to the data segment + +You should increment the reference count by one of buf->base +if you will hang on to a reference + +You only get a borrowed reference to the object. Do not free the +memory... + +:: + + int + PyArray_AxisConverter(PyObject *obj, int *axis) + +Get axis from an object (possibly None) -- a converter function, + +See also PyArray_ConvertMultiAxis, which also handles a tuple of axes. + +:: + + int + PyArray_BoolConverter(PyObject *object, npy_bool *val) + +Convert an object to true / false + +:: + + int + PyArray_ByteorderConverter(PyObject *obj, char *endian) + +Convert object to endian + +:: + + int + PyArray_OrderConverter(PyObject *object, NPY_ORDER *val) + +Convert an object to FORTRAN / C / ANY / KEEP + +:: + + unsigned char + PyArray_EquivTypes(PyArray_Descr *type1, PyArray_Descr *type2) + + +This function returns true if the two typecodes are +equivalent (same basic kind and same itemsize). + +:: + + PyObject * + PyArray_Zeros(int nd, npy_intp const *dims, PyArray_Descr *type, int + is_f_order) + +Zeros + +steals a reference to type. On failure or when dtype->subarray is +true, dtype will be decrefed. +accepts NULL type + +:: + + PyObject * + PyArray_Empty(int nd, npy_intp const *dims, PyArray_Descr *type, int + is_f_order) + +Empty + +accepts NULL type +steals a reference to type + +:: + + PyObject * + PyArray_Where(PyObject *condition, PyObject *x, PyObject *y) + +Where + +:: + + PyObject * + PyArray_Arange(double start, double stop, double step, int type_num) + +Arange, + +:: + + PyObject * + PyArray_ArangeObj(PyObject *start, PyObject *stop, PyObject + *step, PyArray_Descr *dtype) + + +ArangeObj, + +this doesn't change the references + +:: + + int + PyArray_SortkindConverter(PyObject *obj, NPY_SORTKIND *sortkind) + +Convert object to sort kind + +:: + + PyObject * + PyArray_LexSort(PyObject *sort_keys, int axis) + +LexSort an array providing indices that will sort a collection of arrays +lexicographically. The first key is sorted on first, followed by the second key +-- requires that arg"merge"sort is available for each sort_key + +Returns an index array that shows the indexes for the lexicographic sort along +the given axis. + +:: + + PyObject * + PyArray_Round(PyArrayObject *a, int decimals, PyArrayObject *out) + +Round + +:: + + unsigned char + PyArray_EquivTypenums(int typenum1, int typenum2) + + +:: + + int + PyArray_RegisterDataType(PyArray_Descr *descr) + +Register Data type +Does not change the reference count of descr + +:: + + int + PyArray_RegisterCastFunc(PyArray_Descr *descr, int + totype, PyArray_VectorUnaryFunc *castfunc) + +Register Casting Function +Replaces any function currently stored. + +:: + + int + PyArray_RegisterCanCast(PyArray_Descr *descr, int + totype, NPY_SCALARKIND scalar) + +Register a type number indicating that a descriptor can be cast +to it safely + +:: + + void + PyArray_InitArrFuncs(PyArray_ArrFuncs *f) + +Initialize arrfuncs to NULL + +:: + + PyObject * + PyArray_IntTupleFromIntp(int len, npy_intp const *vals) + +PyArray_IntTupleFromIntp + +:: + + int + PyArray_TypeNumFromName(char const *str) + + +:: + + int + PyArray_ClipmodeConverter(PyObject *object, NPY_CLIPMODE *val) + +Convert an object to NPY_RAISE / NPY_CLIP / NPY_WRAP + +:: + + int + PyArray_OutputConverter(PyObject *object, PyArrayObject **address) + +Useful to pass as converter function for O& processing in +PyArgs_ParseTuple for output arrays + +:: + + PyObject * + PyArray_BroadcastToShape(PyObject *obj, npy_intp *dims, int nd) + +Get Iterator broadcast to a particular shape + +:: + + void + _PyArray_SigintHandler(int signum) + + +:: + + void* + _PyArray_GetSigintBuf(void ) + + +:: + + int + PyArray_DescrAlignConverter(PyObject *obj, PyArray_Descr **at) + + +Get type-descriptor from an object forcing alignment if possible +None goes to DEFAULT type. + +any object with the .fields attribute and/or .itemsize attribute (if the +.fields attribute does not give the total size -- i.e. a partial record +naming). If itemsize is given it must be >= size computed from fields + +The .fields attribute must return a convertible dictionary if present. +Result inherits from NPY_VOID. + +:: + + int + PyArray_DescrAlignConverter2(PyObject *obj, PyArray_Descr **at) + + +Get type-descriptor from an object forcing alignment if possible +None goes to NULL. + +:: + + int + PyArray_SearchsideConverter(PyObject *obj, void *addr) + +Convert object to searchsorted side + +:: + + PyObject * + PyArray_CheckAxis(PyArrayObject *arr, int *axis, int flags) + +PyArray_CheckAxis + +check that axis is valid +convert 0-d arrays to 1-d arrays + +:: + + npy_intp + PyArray_OverflowMultiplyList(npy_intp const *l1, int n) + +Multiply a List of Non-negative numbers with over-flow detection. + +:: + + int + PyArray_CompareString(const char *s1, const char *s2, size_t len) + + +:: + + PyObject* + PyArray_MultiIterFromObjects(PyObject **mps, int n, int nadd, ... ) + +Get MultiIterator from array of Python objects and any additional + +PyObject **mps - array of PyObjects +int n - number of PyObjects in the array +int nadd - number of additional arrays to include in the iterator. + +Returns a multi-iterator object. + +:: + + int + PyArray_GetEndianness(void ) + + +:: + + unsigned int + PyArray_GetNDArrayCFeatureVersion(void ) + +Returns the built-in (at compilation time) C API version + +:: + + PyObject * + PyArray_Correlate2(PyObject *op1, PyObject *op2, int mode) + +correlate(a1,a2,mode) + +This function computes the usual correlation (correlate(a1, a2) != +correlate(a2, a1), and conjugate the second argument for complex inputs + +:: + + PyObject* + PyArray_NeighborhoodIterNew(PyArrayIterObject *x, const npy_intp + *bounds, int mode, PyArrayObject*fill) + +A Neighborhood Iterator object. + +:: + + void + PyArray_SetDatetimeParseFunction(PyObject *NPY_UNUSED(op) ) + +This function is scheduled to be removed + +TO BE REMOVED - NOT USED INTERNALLY. + +:: + + void + PyArray_DatetimeToDatetimeStruct(npy_datetime NPY_UNUSED(val) + , NPY_DATETIMEUNIT NPY_UNUSED(fr) + , npy_datetimestruct *result) + +Fill the datetime struct from the value and resolution unit. + +TO BE REMOVED - NOT USED INTERNALLY. + +:: + + void + PyArray_TimedeltaToTimedeltaStruct(npy_timedelta NPY_UNUSED(val) + , NPY_DATETIMEUNIT NPY_UNUSED(fr) + , npy_timedeltastruct *result) + +Fill the timedelta struct from the timedelta value and resolution unit. + +TO BE REMOVED - NOT USED INTERNALLY. + +:: + + npy_datetime + PyArray_DatetimeStructToDatetime(NPY_DATETIMEUNIT NPY_UNUSED(fr) + , npy_datetimestruct *NPY_UNUSED(d) ) + +Create a datetime value from a filled datetime struct and resolution unit. + +TO BE REMOVED - NOT USED INTERNALLY. + +:: + + npy_datetime + PyArray_TimedeltaStructToTimedelta(NPY_DATETIMEUNIT NPY_UNUSED(fr) + , npy_timedeltastruct + *NPY_UNUSED(d) ) + +Create a timedelta value from a filled timedelta struct and resolution unit. + +TO BE REMOVED - NOT USED INTERNALLY. + +:: + + NpyIter * + NpyIter_New(PyArrayObject *op, npy_uint32 flags, NPY_ORDER + order, NPY_CASTING casting, PyArray_Descr*dtype) + +Allocate a new iterator for one array object. + +:: + + NpyIter * + NpyIter_MultiNew(int nop, PyArrayObject **op_in, npy_uint32 + flags, NPY_ORDER order, NPY_CASTING + casting, npy_uint32 *op_flags, PyArray_Descr + **op_request_dtypes) + +Allocate a new iterator for more than one array object, using +standard NumPy broadcasting rules and the default buffer size. + +:: + + NpyIter * + NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 + flags, NPY_ORDER order, NPY_CASTING + casting, npy_uint32 *op_flags, PyArray_Descr + **op_request_dtypes, int oa_ndim, int + **op_axes, npy_intp *itershape, npy_intp + buffersize) + +Allocate a new iterator for multiple array objects, and advanced +options for controlling the broadcasting, shape, and buffer size. + +:: + + NpyIter * + NpyIter_Copy(NpyIter *iter) + +Makes a copy of the iterator + +:: + + int + NpyIter_Deallocate(NpyIter *iter) + +Deallocate an iterator. + +To correctly work when an error is in progress, we have to check +`PyErr_Occurred()`. This is necessary when buffers are not finalized +or WritebackIfCopy is used. We could avoid that check by exposing a new +function which is passed in whether or not a Python error is already set. + +:: + + npy_bool + NpyIter_HasDelayedBufAlloc(NpyIter *iter) + +Whether the buffer allocation is being delayed + +:: + + npy_bool + NpyIter_HasExternalLoop(NpyIter *iter) + +Whether the iterator handles the inner loop + +:: + + int + NpyIter_EnableExternalLoop(NpyIter *iter) + +Removes the inner loop handling (so HasExternalLoop returns true) + +:: + + npy_intp * + NpyIter_GetInnerStrideArray(NpyIter *iter) + +Get the array of strides for the inner loop (when HasExternalLoop is true) + +This function may be safely called without holding the Python GIL. + +:: + + npy_intp * + NpyIter_GetInnerLoopSizePtr(NpyIter *iter) + +Get a pointer to the size of the inner loop (when HasExternalLoop is true) + +This function may be safely called without holding the Python GIL. + +:: + + int + NpyIter_Reset(NpyIter *iter, char **errmsg) + +Resets the iterator to its initial state + +The use of errmsg is discouraged, it cannot be guaranteed that the GIL +will not be grabbed on casting errors even when this is passed. + +If errmsg is non-NULL, it should point to a variable which will +receive the error message, and no Python exception will be set. +This is so that the function can be called from code not holding +the GIL. Note that cast errors may still lead to the GIL being +grabbed temporarily. + +:: + + int + NpyIter_ResetBasePointers(NpyIter *iter, char **baseptrs, char + **errmsg) + +Resets the iterator to its initial state, with new base data pointers. +This function requires great caution. + +If errmsg is non-NULL, it should point to a variable which will +receive the error message, and no Python exception will be set. +This is so that the function can be called from code not holding +the GIL. Note that cast errors may still lead to the GIL being +grabbed temporarily. + +:: + + int + NpyIter_ResetToIterIndexRange(NpyIter *iter, npy_intp istart, npy_intp + iend, char **errmsg) + +Resets the iterator to a new iterator index range + +If errmsg is non-NULL, it should point to a variable which will +receive the error message, and no Python exception will be set. +This is so that the function can be called from code not holding +the GIL. Note that cast errors may still lead to the GIL being +grabbed temporarily. + +:: + + int + NpyIter_GetNDim(NpyIter *iter) + +Gets the number of dimensions being iterated + +:: + + int + NpyIter_GetNOp(NpyIter *iter) + +Gets the number of operands being iterated + +:: + + NpyIter_IterNextFunc * + NpyIter_GetIterNext(NpyIter *iter, char **errmsg) + +Compute the specialized iteration function for an iterator + +If errmsg is non-NULL, it should point to a variable which will +receive the error message, and no Python exception will be set. +This is so that the function can be called from code not holding +the GIL. + +:: + + npy_intp + NpyIter_GetIterSize(NpyIter *iter) + +Gets the number of elements being iterated + +:: + + void + NpyIter_GetIterIndexRange(NpyIter *iter, npy_intp *istart, npy_intp + *iend) + +Gets the range of iteration indices being iterated + +:: + + npy_intp + NpyIter_GetIterIndex(NpyIter *iter) + +Gets the current iteration index + +:: + + int + NpyIter_GotoIterIndex(NpyIter *iter, npy_intp iterindex) + +Sets the iterator position to the specified iterindex, +which matches the iteration order of the iterator. + +Returns NPY_SUCCEED on success, NPY_FAIL on failure. + +:: + + npy_bool + NpyIter_HasMultiIndex(NpyIter *iter) + +Whether the iterator is tracking a multi-index + +:: + + int + NpyIter_GetShape(NpyIter *iter, npy_intp *outshape) + +Gets the broadcast shape if a multi-index is being tracked by the iterator, +otherwise gets the shape of the iteration as Fortran-order +(fastest-changing index first). + +The reason Fortran-order is returned when a multi-index +is not enabled is that this is providing a direct view into how +the iterator traverses the n-dimensional space. The iterator organizes +its memory from fastest index to slowest index, and when +a multi-index is enabled, it uses a permutation to recover the original +order. + +Returns NPY_SUCCEED or NPY_FAIL. + +:: + + NpyIter_GetMultiIndexFunc * + NpyIter_GetGetMultiIndex(NpyIter *iter, char **errmsg) + +Compute a specialized get_multi_index function for the iterator + +If errmsg is non-NULL, it should point to a variable which will +receive the error message, and no Python exception will be set. +This is so that the function can be called from code not holding +the GIL. + +:: + + int + NpyIter_GotoMultiIndex(NpyIter *iter, npy_intp const *multi_index) + +Sets the iterator to the specified multi-index, which must have the +correct number of entries for 'ndim'. It is only valid +when NPY_ITER_MULTI_INDEX was passed to the constructor. This operation +fails if the multi-index is out of bounds. + +Returns NPY_SUCCEED on success, NPY_FAIL on failure. + +:: + + int + NpyIter_RemoveMultiIndex(NpyIter *iter) + +Removes multi-index support from an iterator. + +Returns NPY_SUCCEED or NPY_FAIL. + +:: + + npy_bool + NpyIter_HasIndex(NpyIter *iter) + +Whether the iterator is tracking an index + +:: + + npy_bool + NpyIter_IsBuffered(NpyIter *iter) + +Whether the iterator is buffered + +:: + + npy_bool + NpyIter_IsGrowInner(NpyIter *iter) + +Whether the inner loop can grow if buffering is unneeded + +:: + + npy_intp + NpyIter_GetBufferSize(NpyIter *iter) + +Gets the size of the buffer, or 0 if buffering is not enabled + +:: + + npy_intp * + NpyIter_GetIndexPtr(NpyIter *iter) + +Get a pointer to the index, if it is being tracked + +:: + + int + NpyIter_GotoIndex(NpyIter *iter, npy_intp flat_index) + +If the iterator is tracking an index, sets the iterator +to the specified index. + +Returns NPY_SUCCEED on success, NPY_FAIL on failure. + +:: + + char ** + NpyIter_GetDataPtrArray(NpyIter *iter) + +Get the array of data pointers (1 per object being iterated) + +This function may be safely called without holding the Python GIL. + +:: + + PyArray_Descr ** + NpyIter_GetDescrArray(NpyIter *iter) + +Get the array of data type pointers (1 per object being iterated) + +:: + + PyArrayObject ** + NpyIter_GetOperandArray(NpyIter *iter) + +Get the array of objects being iterated + +:: + + PyArrayObject * + NpyIter_GetIterView(NpyIter *iter, npy_intp i) + +Returns a view to the i-th object with the iterator's internal axes + +:: + + void + NpyIter_GetReadFlags(NpyIter *iter, char *outreadflags) + +Gets an array of read flags (1 per object being iterated) + +:: + + void + NpyIter_GetWriteFlags(NpyIter *iter, char *outwriteflags) + +Gets an array of write flags (1 per object being iterated) + +:: + + void + NpyIter_DebugPrint(NpyIter *iter) + +For debugging + +:: + + npy_bool + NpyIter_IterationNeedsAPI(NpyIter *iter) + +Whether the iteration loop, and in particular the iternext() +function, needs API access. If this is true, the GIL must +be retained while iterating. + +:: + + void + NpyIter_GetInnerFixedStrideArray(NpyIter *iter, npy_intp *out_strides) + +Get an array of strides which are fixed. Any strides which may +change during iteration receive the value NPY_MAX_INTP. Once +the iterator is ready to iterate, call this to get the strides +which will always be fixed in the inner loop, then choose optimized +inner loop functions which take advantage of those fixed strides. + +This function may be safely called without holding the Python GIL. + +:: + + int + NpyIter_RemoveAxis(NpyIter *iter, int axis) + +Removes an axis from iteration. This requires that NPY_ITER_MULTI_INDEX +was set for iterator creation, and does not work if buffering is +enabled. This function also resets the iterator to its initial state. + +Returns NPY_SUCCEED or NPY_FAIL. + +:: + + npy_intp * + NpyIter_GetAxisStrideArray(NpyIter *iter, int axis) + +Gets the array of strides for the specified axis. +If the iterator is tracking a multi-index, gets the strides +for the axis specified, otherwise gets the strides for +the iteration axis as Fortran order (fastest-changing axis first). + +Returns NULL if an error occurs. + +:: + + npy_bool + NpyIter_RequiresBuffering(NpyIter *iter) + +Whether the iteration could be done with no buffering. + +:: + + char ** + NpyIter_GetInitialDataPtrArray(NpyIter *iter) + +Get the array of data pointers (1 per object being iterated), +directly into the arrays (never pointing to a buffer), for starting +unbuffered iteration. This always returns the addresses for the +iterator position as reset to iterator index 0. + +These pointers are different from the pointers accepted by +NpyIter_ResetBasePointers, because the direction along some +axes may have been reversed, requiring base offsets. + +This function may be safely called without holding the Python GIL. + +:: + + int + NpyIter_CreateCompatibleStrides(NpyIter *iter, npy_intp + itemsize, npy_intp *outstrides) + +Builds a set of strides which are the same as the strides of an +output array created using the NPY_ITER_ALLOCATE flag, where NULL +was passed for op_axes. This is for data packed contiguously, +but not necessarily in C or Fortran order. This should be used +together with NpyIter_GetShape and NpyIter_GetNDim. + +A use case for this function is to match the shape and layout of +the iterator and tack on one or more dimensions. For example, +in order to generate a vector per input value for a numerical gradient, +you pass in ndim*itemsize for itemsize, then add another dimension to +the end with size ndim and stride itemsize. To do the Hessian matrix, +you do the same thing but add two dimensions, or take advantage of +the symmetry and pack it into 1 dimension with a particular encoding. + +This function may only be called if the iterator is tracking a multi-index +and if NPY_ITER_DONT_NEGATE_STRIDES was used to prevent an axis from +being iterated in reverse order. + +If an array is created with this method, simply adding 'itemsize' +for each iteration will traverse the new array matching the +iterator. + +Returns NPY_SUCCEED or NPY_FAIL. + +:: + + int + PyArray_CastingConverter(PyObject *obj, NPY_CASTING *casting) + +Convert any Python object, *obj*, to an NPY_CASTING enum. + +:: + + npy_intp + PyArray_CountNonzero(PyArrayObject *self) + +Counts the number of non-zero elements in the array. + +Returns -1 on error. + +:: + + PyArray_Descr * + PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) + +Produces the smallest size and lowest kind type to which both +input types can be cast. + +:: + + PyArray_Descr * + PyArray_MinScalarType(PyArrayObject *arr) + +If arr is a scalar (has 0 dimensions) with a built-in number data type, +finds the smallest type size/kind which can still represent its data. +Otherwise, returns the array's data type. + + +:: + + PyArray_Descr * + PyArray_ResultType(npy_intp narrs, PyArrayObject *arrs[] , npy_intp + ndtypes, PyArray_Descr *descrs[] ) + + +Produces the result type of a bunch of inputs, using the same rules +as `np.result_type`. + +NOTE: This function is expected to through a transitional period or +change behaviour. DTypes should always be strictly enforced for +0-D arrays, while "weak DTypes" will be used to represent Python +integers, floats, and complex in all cases. +(Within this function, these are currently flagged on the array +object to work through `np.result_type`, this may change.) + +Until a time where this transition is complete, we probably cannot +add new "weak DTypes" or allow users to create their own. + +:: + + npy_bool + PyArray_CanCastArrayTo(PyArrayObject *arr, PyArray_Descr + *to, NPY_CASTING casting) + +Returns 1 if the array object may be cast to the given data type using +the casting rule, 0 otherwise. This differs from PyArray_CanCastTo in +that it handles scalar arrays (0 dimensions) specially, by checking +their value. + +:: + + npy_bool + PyArray_CanCastTypeTo(PyArray_Descr *from, PyArray_Descr + *to, NPY_CASTING casting) + +Returns true if data of type 'from' may be cast to data of type +'to' according to the rule 'casting'. + +:: + + PyArrayObject * + PyArray_EinsteinSum(char *subscripts, npy_intp nop, PyArrayObject + **op_in, PyArray_Descr *dtype, NPY_ORDER + order, NPY_CASTING casting, PyArrayObject *out) + +This function provides summation of array elements according to +the Einstein summation convention. For example: +- trace(a) -> einsum("ii", a) +- transpose(a) -> einsum("ji", a) +- multiply(a,b) -> einsum(",", a, b) +- inner(a,b) -> einsum("i,i", a, b) +- outer(a,b) -> einsum("i,j", a, b) +- matvec(a,b) -> einsum("ij,j", a, b) +- matmat(a,b) -> einsum("ij,jk", a, b) + +subscripts: The string of subscripts for einstein summation. +nop: The number of operands +op_in: The array of operands +dtype: Either NULL, or the data type to force the calculation as. +order: The order for the calculation/the output axes. +casting: What kind of casts should be permitted. +out: Either NULL, or an array into which the output should be placed. + +By default, the labels get placed in alphabetical order +at the end of the output. So, if c = einsum("i,j", a, b) +then c[i,j] == a[i]*b[j], but if c = einsum("j,i", a, b) +then c[i,j] = a[j]*b[i]. + +Alternatively, you can control the output order or prevent +an axis from being summed/force an axis to be summed by providing +indices for the output. This allows us to turn 'trace' into +'diag', for example. +- diag(a) -> einsum("ii->i", a) +- sum(a, axis=0) -> einsum("i...->", a) + +Subscripts at the beginning and end may be specified by +putting an ellipsis "..." in the middle. For example, +the function einsum("i...i", a) takes the diagonal of +the first and last dimensions of the operand, and +einsum("ij...,jk...->ik...") takes the matrix product using +the first two indices of each operand instead of the last two. + +When there is only one operand, no axes being summed, and +no output parameter, this function returns a view +into the operand instead of making a copy. + +:: + + PyObject * + PyArray_NewLikeArray(PyArrayObject *prototype, NPY_ORDER + order, PyArray_Descr *dtype, int subok) + +Creates a new array with the same shape as the provided one, +with possible memory layout order and data type changes. + +prototype - The array the new one should be like. +order - NPY_CORDER - C-contiguous result. +NPY_FORTRANORDER - Fortran-contiguous result. +NPY_ANYORDER - Fortran if prototype is Fortran, C otherwise. +NPY_KEEPORDER - Keeps the axis ordering of prototype. +dtype - If not NULL, overrides the data type of the result. +subok - If 1, use the prototype's array subtype, otherwise +always create a base-class array. + +NOTE: If dtype is not NULL, steals the dtype reference. On failure or when +dtype->subarray is true, dtype will be decrefed. + +:: + + int + PyArray_GetArrayParamsFromObject(PyObject *NPY_UNUSED(op) + , PyArray_Descr + *NPY_UNUSED(requested_dtype) + , npy_bool NPY_UNUSED(writeable) + , PyArray_Descr + **NPY_UNUSED(out_dtype) , int + *NPY_UNUSED(out_ndim) , npy_intp + *NPY_UNUSED(out_dims) , PyArrayObject + **NPY_UNUSED(out_arr) , PyObject + *NPY_UNUSED(context) ) + + +:: + + int + PyArray_ConvertClipmodeSequence(PyObject *object, NPY_CLIPMODE + *modes, int n) + +Convert an object to an array of n NPY_CLIPMODE values. +This is intended to be used in functions where a different mode +could be applied to each axis, like in ravel_multi_index. + +:: + + PyObject * + PyArray_MatrixProduct2(PyObject *op1, PyObject + *op2, PyArrayObject*out) + +Numeric.matrixproduct2(a,v,out) +just like inner product but does the swapaxes stuff on the fly + +:: + + npy_bool + NpyIter_IsFirstVisit(NpyIter *iter, int iop) + +Checks to see whether this is the first time the elements +of the specified reduction operand which the iterator points at are +being seen for the first time. The function returns +a reasonable answer for reduction operands and when buffering is +disabled. The answer may be incorrect for buffered non-reduction +operands. + +This function is intended to be used in EXTERNAL_LOOP mode only, +and will produce some wrong answers when that mode is not enabled. + +If this function returns true, the caller should also +check the inner loop stride of the operand, because if +that stride is 0, then only the first element of the innermost +external loop is being visited for the first time. + +WARNING: For performance reasons, 'iop' is not bounds-checked, +it is not confirmed that 'iop' is actually a reduction +operand, and it is not confirmed that EXTERNAL_LOOP +mode is enabled. These checks are the responsibility of +the caller, and should be done outside of any inner loops. + +:: + + int + PyArray_SetBaseObject(PyArrayObject *arr, PyObject *obj) + +Sets the 'base' attribute of the array. This steals a reference +to 'obj'. + +Returns 0 on success, -1 on failure. + +:: + + void + PyArray_CreateSortedStridePerm(int ndim, npy_intp const + *strides, npy_stride_sort_item + *out_strideperm) + + +This function populates the first ndim elements +of strideperm with sorted descending by their absolute values. +For example, the stride array (4, -2, 12) becomes +[(2, 12), (0, 4), (1, -2)]. + +:: + + void + PyArray_RemoveAxesInPlace(PyArrayObject *arr, const npy_bool *flags) + + +Removes the axes flagged as True from the array, +modifying it in place. If an axis flagged for removal +has a shape entry bigger than one, this effectively selects +index zero for that axis. + +WARNING: If an axis flagged for removal has a shape equal to zero, +the array will point to invalid memory. The caller must +validate this! +If an axis flagged for removal has a shape larger than one, +the aligned flag (and in the future the contiguous flags), +may need explicit update. +(check also NPY_RELAXED_STRIDES_CHECKING) + +For example, this can be used to remove the reduction axes +from a reduction result once its computation is complete. + +:: + + void + PyArray_DebugPrint(PyArrayObject *obj) + +Prints the raw data of the ndarray in a form useful for debugging +low-level C issues. + +:: + + int + PyArray_FailUnlessWriteable(PyArrayObject *obj, const char *name) + + +This function does nothing if obj is writeable, and raises an exception +(and returns -1) if obj is not writeable. It may also do other +house-keeping, such as issuing warnings on arrays which are transitioning +to become views. Always call this function at some point before writing to +an array. + +'name' is a name for the array, used to give better error +messages. Something like "assignment destination", "output array", or even +just "array". + +:: + + int + PyArray_SetUpdateIfCopyBase(PyArrayObject *arr, PyArrayObject *base) + + +Precondition: 'arr' is a copy of 'base' (though possibly with different +strides, ordering, etc.). This function sets the UPDATEIFCOPY flag and the +->base pointer on 'arr', so that when 'arr' is destructed, it will copy any +changes back to 'base'. DEPRECATED, use PyArray_SetWritebackIfCopyBase + +Steals a reference to 'base'. + +Returns 0 on success, -1 on failure. + +:: + + void * + PyDataMem_NEW(size_t size) + +Allocates memory for array data. + +:: + + void + PyDataMem_FREE(void *ptr) + +Free memory for array data. + +:: + + void * + PyDataMem_RENEW(void *ptr, size_t size) + +Reallocate/resize memory for array data. + +:: + + PyDataMem_EventHookFunc * + PyDataMem_SetEventHook(PyDataMem_EventHookFunc *newhook, void + *user_data, void **old_data) + +Sets the allocation event hook for numpy array data. +Takes a PyDataMem_EventHookFunc *, which has the signature: +void hook(void *old, void *new, size_t size, void *user_data). +Also takes a void *user_data, and void **old_data. + +Returns a pointer to the previous hook or NULL. If old_data is +non-NULL, the previous user_data pointer will be copied to it. + +If not NULL, hook will be called at the end of each PyDataMem_NEW/FREE/RENEW: +result = PyDataMem_NEW(size) -> (*hook)(NULL, result, size, user_data) +PyDataMem_FREE(ptr) -> (*hook)(ptr, NULL, 0, user_data) +result = PyDataMem_RENEW(ptr, size) -> (*hook)(ptr, result, size, user_data) + +When the hook is called, the GIL will be held by the calling +thread. The hook should be written to be reentrant, if it performs +operations that might cause new allocation events (such as the +creation/destruction numpy objects, or creating/destroying Python +objects which might cause a gc) + +:: + + void + PyArray_MapIterSwapAxes(PyArrayMapIterObject *mit, PyArrayObject + **ret, int getmap) + + +Swap the axes to or from their inserted form. MapIter always puts the +advanced (array) indices first in the iteration. But if they are +consecutive, will insert/transpose them back before returning. +This is stored as `mit->consec != 0` (the place where they are inserted) +For assignments, the opposite happens: The values to be assigned are +transposed (getmap=1 instead of getmap=0). `getmap=0` and `getmap=1` +undo the other operation. + +:: + + PyObject * + PyArray_MapIterArray(PyArrayObject *a, PyObject *index) + + +Use advanced indexing to iterate an array. + +:: + + void + PyArray_MapIterNext(PyArrayMapIterObject *mit) + +This function needs to update the state of the map iterator +and point mit->dataptr to the memory-location of the next object + +Note that this function never handles an extra operand but provides +compatibility for an old (exposed) API. + +:: + + int + PyArray_Partition(PyArrayObject *op, PyArrayObject *ktharray, int + axis, NPY_SELECTKIND which) + +Partition an array in-place + +:: + + PyObject * + PyArray_ArgPartition(PyArrayObject *op, PyArrayObject *ktharray, int + axis, NPY_SELECTKIND which) + +ArgPartition an array + +:: + + int + PyArray_SelectkindConverter(PyObject *obj, NPY_SELECTKIND *selectkind) + +Convert object to select kind + +:: + + void * + PyDataMem_NEW_ZEROED(size_t nmemb, size_t size) + +Allocates zeroed memory for array data. + +:: + + int + PyArray_CheckAnyScalarExact(PyObject *obj) + +return true an object is exactly a numpy scalar + +:: + + PyObject * + PyArray_MapIterArrayCopyIfOverlap(PyArrayObject *a, PyObject + *index, int + copy_if_overlap, PyArrayObject + *extra_op) + + +Same as PyArray_MapIterArray, but: + +If copy_if_overlap != 0, check if `a` has memory overlap with any of the +arrays in `index` and with `extra_op`. If yes, make copies as appropriate +to avoid problems if `a` is modified during the iteration. +`iter->array` may contain a copied array (UPDATEIFCOPY/WRITEBACKIFCOPY set). + +:: + + int + PyArray_ResolveWritebackIfCopy(PyArrayObject *self) + + +If WRITEBACKIFCOPY and self has data, reset the base WRITEABLE flag, +copy the local data to base, release the local data, and set flags +appropriately. Return 0 if not relevant, 1 if success, < 0 on failure + +:: + + int + PyArray_SetWritebackIfCopyBase(PyArrayObject *arr, PyArrayObject + *base) + + +Precondition: 'arr' is a copy of 'base' (though possibly with different +strides, ordering, etc.). This function sets the WRITEBACKIFCOPY flag and the +->base pointer on 'arr', call PyArray_ResolveWritebackIfCopy to copy any +changes back to 'base' before deallocating the array. + +Steals a reference to 'base'. + +Returns 0 on success, -1 on failure. + +:: + + PyObject * + PyDataMem_SetHandler(PyObject *handler) + +Set a new allocation policy. If the input value is NULL, will reset +the policy to the default. Return the previous policy, or +return NULL if an error has occurred. We wrap the user-provided +functions so they will still call the python and numpy +memory management callback hooks. + +:: + + PyObject * + PyDataMem_GetHandler() + +Return the policy that will be used to allocate data +for the next PyArrayObject. On failure, return NULL. + diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/ndarrayobject.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/ndarrayobject.h new file mode 100644 index 0000000..2eb9514 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/ndarrayobject.h @@ -0,0 +1,268 @@ +/* + * DON'T INCLUDE THIS DIRECTLY. + */ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_NDARRAYOBJECT_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_NDARRAYOBJECT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "ndarraytypes.h" + +/* Includes the "function" C-API -- these are all stored in a + list of pointers --- one for each file + The two lists are concatenated into one in multiarray. + + They are available as import_array() +*/ + +#include "__multiarray_api.h" + + +/* C-API that requires previous API to be defined */ + +#define PyArray_DescrCheck(op) PyObject_TypeCheck(op, &PyArrayDescr_Type) + +#define PyArray_Check(op) PyObject_TypeCheck(op, &PyArray_Type) +#define PyArray_CheckExact(op) (((PyObject*)(op))->ob_type == &PyArray_Type) + +#define PyArray_HasArrayInterfaceType(op, type, context, out) \ + ((((out)=PyArray_FromStructInterface(op)) != Py_NotImplemented) || \ + (((out)=PyArray_FromInterface(op)) != Py_NotImplemented) || \ + (((out)=PyArray_FromArrayAttr(op, type, context)) != \ + Py_NotImplemented)) + +#define PyArray_HasArrayInterface(op, out) \ + PyArray_HasArrayInterfaceType(op, NULL, NULL, out) + +#define PyArray_IsZeroDim(op) (PyArray_Check(op) && \ + (PyArray_NDIM((PyArrayObject *)op) == 0)) + +#define PyArray_IsScalar(obj, cls) \ + (PyObject_TypeCheck(obj, &Py##cls##ArrType_Type)) + +#define PyArray_CheckScalar(m) (PyArray_IsScalar(m, Generic) || \ + PyArray_IsZeroDim(m)) +#define PyArray_IsPythonNumber(obj) \ + (PyFloat_Check(obj) || PyComplex_Check(obj) || \ + PyLong_Check(obj) || PyBool_Check(obj)) +#define PyArray_IsIntegerScalar(obj) (PyLong_Check(obj) \ + || PyArray_IsScalar((obj), Integer)) +#define PyArray_IsPythonScalar(obj) \ + (PyArray_IsPythonNumber(obj) || PyBytes_Check(obj) || \ + PyUnicode_Check(obj)) + +#define PyArray_IsAnyScalar(obj) \ + (PyArray_IsScalar(obj, Generic) || PyArray_IsPythonScalar(obj)) + +#define PyArray_CheckAnyScalar(obj) (PyArray_IsPythonScalar(obj) || \ + PyArray_CheckScalar(obj)) + + +#define PyArray_GETCONTIGUOUS(m) (PyArray_ISCONTIGUOUS(m) ? \ + Py_INCREF(m), (m) : \ + (PyArrayObject *)(PyArray_Copy(m))) + +#define PyArray_SAMESHAPE(a1,a2) ((PyArray_NDIM(a1) == PyArray_NDIM(a2)) && \ + PyArray_CompareLists(PyArray_DIMS(a1), \ + PyArray_DIMS(a2), \ + PyArray_NDIM(a1))) + +#define PyArray_SIZE(m) PyArray_MultiplyList(PyArray_DIMS(m), PyArray_NDIM(m)) +#define PyArray_NBYTES(m) (PyArray_ITEMSIZE(m) * PyArray_SIZE(m)) +#define PyArray_FROM_O(m) PyArray_FromAny(m, NULL, 0, 0, 0, NULL) + +#define PyArray_FROM_OF(m,flags) PyArray_CheckFromAny(m, NULL, 0, 0, flags, \ + NULL) + +#define PyArray_FROM_OT(m,type) PyArray_FromAny(m, \ + PyArray_DescrFromType(type), 0, 0, 0, NULL) + +#define PyArray_FROM_OTF(m, type, flags) \ + PyArray_FromAny(m, PyArray_DescrFromType(type), 0, 0, \ + (((flags) & NPY_ARRAY_ENSURECOPY) ? \ + ((flags) | NPY_ARRAY_DEFAULT) : (flags)), NULL) + +#define PyArray_FROMANY(m, type, min, max, flags) \ + PyArray_FromAny(m, PyArray_DescrFromType(type), min, max, \ + (((flags) & NPY_ARRAY_ENSURECOPY) ? \ + (flags) | NPY_ARRAY_DEFAULT : (flags)), NULL) + +#define PyArray_ZEROS(m, dims, type, is_f_order) \ + PyArray_Zeros(m, dims, PyArray_DescrFromType(type), is_f_order) + +#define PyArray_EMPTY(m, dims, type, is_f_order) \ + PyArray_Empty(m, dims, PyArray_DescrFromType(type), is_f_order) + +#define PyArray_FILLWBYTE(obj, val) memset(PyArray_DATA(obj), val, \ + PyArray_NBYTES(obj)) +#ifndef PYPY_VERSION +#define PyArray_REFCOUNT(obj) (((PyObject *)(obj))->ob_refcnt) +#define NPY_REFCOUNT PyArray_REFCOUNT +#endif +#define NPY_MAX_ELSIZE (2 * NPY_SIZEOF_LONGDOUBLE) + +#define PyArray_ContiguousFromAny(op, type, min_depth, max_depth) \ + PyArray_FromAny(op, PyArray_DescrFromType(type), min_depth, \ + max_depth, NPY_ARRAY_DEFAULT, NULL) + +#define PyArray_EquivArrTypes(a1, a2) \ + PyArray_EquivTypes(PyArray_DESCR(a1), PyArray_DESCR(a2)) + +#define PyArray_EquivByteorders(b1, b2) \ + (((b1) == (b2)) || (PyArray_ISNBO(b1) == PyArray_ISNBO(b2))) + +#define PyArray_SimpleNew(nd, dims, typenum) \ + PyArray_New(&PyArray_Type, nd, dims, typenum, NULL, NULL, 0, 0, NULL) + +#define PyArray_SimpleNewFromData(nd, dims, typenum, data) \ + PyArray_New(&PyArray_Type, nd, dims, typenum, NULL, \ + data, 0, NPY_ARRAY_CARRAY, NULL) + +#define PyArray_SimpleNewFromDescr(nd, dims, descr) \ + PyArray_NewFromDescr(&PyArray_Type, descr, nd, dims, \ + NULL, NULL, 0, NULL) + +#define PyArray_ToScalar(data, arr) \ + PyArray_Scalar(data, PyArray_DESCR(arr), (PyObject *)arr) + + +/* These might be faster without the dereferencing of obj + going on inside -- of course an optimizing compiler should + inline the constants inside a for loop making it a moot point +*/ + +#define PyArray_GETPTR1(obj, i) ((void *)(PyArray_BYTES(obj) + \ + (i)*PyArray_STRIDES(obj)[0])) + +#define PyArray_GETPTR2(obj, i, j) ((void *)(PyArray_BYTES(obj) + \ + (i)*PyArray_STRIDES(obj)[0] + \ + (j)*PyArray_STRIDES(obj)[1])) + +#define PyArray_GETPTR3(obj, i, j, k) ((void *)(PyArray_BYTES(obj) + \ + (i)*PyArray_STRIDES(obj)[0] + \ + (j)*PyArray_STRIDES(obj)[1] + \ + (k)*PyArray_STRIDES(obj)[2])) + +#define PyArray_GETPTR4(obj, i, j, k, l) ((void *)(PyArray_BYTES(obj) + \ + (i)*PyArray_STRIDES(obj)[0] + \ + (j)*PyArray_STRIDES(obj)[1] + \ + (k)*PyArray_STRIDES(obj)[2] + \ + (l)*PyArray_STRIDES(obj)[3])) + +/* Move to arrayobject.c once PyArray_XDECREF_ERR is removed */ +static NPY_INLINE void +PyArray_DiscardWritebackIfCopy(PyArrayObject *arr) +{ + PyArrayObject_fields *fa = (PyArrayObject_fields *)arr; + if (fa && fa->base) { + if ((fa->flags & NPY_ARRAY_UPDATEIFCOPY) || + (fa->flags & NPY_ARRAY_WRITEBACKIFCOPY)) { + PyArray_ENABLEFLAGS((PyArrayObject*)fa->base, NPY_ARRAY_WRITEABLE); + Py_DECREF(fa->base); + fa->base = NULL; + PyArray_CLEARFLAGS(arr, NPY_ARRAY_WRITEBACKIFCOPY); + PyArray_CLEARFLAGS(arr, NPY_ARRAY_UPDATEIFCOPY); + } + } +} + +#define PyArray_DESCR_REPLACE(descr) do { \ + PyArray_Descr *_new_; \ + _new_ = PyArray_DescrNew(descr); \ + Py_XDECREF(descr); \ + descr = _new_; \ + } while(0) + +/* Copy should always return contiguous array */ +#define PyArray_Copy(obj) PyArray_NewCopy(obj, NPY_CORDER) + +#define PyArray_FromObject(op, type, min_depth, max_depth) \ + PyArray_FromAny(op, PyArray_DescrFromType(type), min_depth, \ + max_depth, NPY_ARRAY_BEHAVED | \ + NPY_ARRAY_ENSUREARRAY, NULL) + +#define PyArray_ContiguousFromObject(op, type, min_depth, max_depth) \ + PyArray_FromAny(op, PyArray_DescrFromType(type), min_depth, \ + max_depth, NPY_ARRAY_DEFAULT | \ + NPY_ARRAY_ENSUREARRAY, NULL) + +#define PyArray_CopyFromObject(op, type, min_depth, max_depth) \ + PyArray_FromAny(op, PyArray_DescrFromType(type), min_depth, \ + max_depth, NPY_ARRAY_ENSURECOPY | \ + NPY_ARRAY_DEFAULT | \ + NPY_ARRAY_ENSUREARRAY, NULL) + +#define PyArray_Cast(mp, type_num) \ + PyArray_CastToType(mp, PyArray_DescrFromType(type_num), 0) + +#define PyArray_Take(ap, items, axis) \ + PyArray_TakeFrom(ap, items, axis, NULL, NPY_RAISE) + +#define PyArray_Put(ap, items, values) \ + PyArray_PutTo(ap, items, values, NPY_RAISE) + +/* Compatibility with old Numeric stuff -- don't use in new code */ + +#define PyArray_FromDimsAndData(nd, d, type, data) \ + PyArray_FromDimsAndDataAndDescr(nd, d, PyArray_DescrFromType(type), \ + data) + + +/* + Check to see if this key in the dictionary is the "title" + entry of the tuple (i.e. a duplicate dictionary entry in the fields + dict). +*/ + +static NPY_INLINE int +NPY_TITLE_KEY_check(PyObject *key, PyObject *value) +{ + PyObject *title; + if (PyTuple_Size(value) != 3) { + return 0; + } + title = PyTuple_GetItem(value, 2); + if (key == title) { + return 1; + } +#ifdef PYPY_VERSION + /* + * On PyPy, dictionary keys do not always preserve object identity. + * Fall back to comparison by value. + */ + if (PyUnicode_Check(title) && PyUnicode_Check(key)) { + return PyUnicode_Compare(title, key) == 0 ? 1 : 0; + } +#endif + return 0; +} + +/* Macro, for backward compat with "if NPY_TITLE_KEY(key, value) { ..." */ +#define NPY_TITLE_KEY(key, value) (NPY_TITLE_KEY_check((key), (value))) + +#define DEPRECATE(msg) PyErr_WarnEx(PyExc_DeprecationWarning,msg,1) +#define DEPRECATE_FUTUREWARNING(msg) PyErr_WarnEx(PyExc_FutureWarning,msg,1) + +#if !defined(NPY_NO_DEPRECATED_API) || \ + (NPY_NO_DEPRECATED_API < NPY_1_14_API_VERSION) +static NPY_INLINE void +PyArray_XDECREF_ERR(PyArrayObject *arr) +{ + /* 2017-Nov-10 1.14 */ + DEPRECATE("PyArray_XDECREF_ERR is deprecated, call " + "PyArray_DiscardWritebackIfCopy then Py_XDECREF instead"); + PyArray_DiscardWritebackIfCopy(arr); + Py_XDECREF(arr); +} +#endif + + +#ifdef __cplusplus +} +#endif + + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_NDARRAYOBJECT_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/ndarraytypes.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/ndarraytypes.h new file mode 100644 index 0000000..6240adc --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/ndarraytypes.h @@ -0,0 +1,1976 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_NDARRAYTYPES_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_NDARRAYTYPES_H_ + +#include "npy_common.h" +#include "npy_endian.h" +#include "npy_cpu.h" +#include "utils.h" + +#define NPY_NO_EXPORT NPY_VISIBILITY_HIDDEN + +/* Only use thread if configured in config and python supports it */ +#if defined WITH_THREAD && !NPY_NO_SMP + #define NPY_ALLOW_THREADS 1 +#else + #define NPY_ALLOW_THREADS 0 +#endif + +#ifndef __has_extension +#define __has_extension(x) 0 +#endif + +#if !defined(_NPY_NO_DEPRECATIONS) && \ + ((defined(__GNUC__)&& __GNUC__ >= 6) || \ + __has_extension(attribute_deprecated_with_message)) +#define NPY_ATTR_DEPRECATE(text) __attribute__ ((deprecated (text))) +#else +#define NPY_ATTR_DEPRECATE(text) +#endif + +/* + * There are several places in the code where an array of dimensions + * is allocated statically. This is the size of that static + * allocation. + * + * The array creation itself could have arbitrary dimensions but all + * the places where static allocation is used would need to be changed + * to dynamic (including inside of several structures) + */ + +#define NPY_MAXDIMS 32 +#define NPY_MAXARGS 32 + +/* Used for Converter Functions "O&" code in ParseTuple */ +#define NPY_FAIL 0 +#define NPY_SUCCEED 1 + +/* + * Binary compatibility version number. This number is increased + * whenever the C-API is changed such that binary compatibility is + * broken, i.e. whenever a recompile of extension modules is needed. + */ +#define NPY_VERSION NPY_ABI_VERSION + +/* + * Minor API version. This number is increased whenever a change is + * made to the C-API -- whether it breaks binary compatibility or not. + * Some changes, such as adding a function pointer to the end of the + * function table, can be made without breaking binary compatibility. + * In this case, only the NPY_FEATURE_VERSION (*not* NPY_VERSION) + * would be increased. Whenever binary compatibility is broken, both + * NPY_VERSION and NPY_FEATURE_VERSION should be increased. + */ +#define NPY_FEATURE_VERSION NPY_API_VERSION + +enum NPY_TYPES { NPY_BOOL=0, + NPY_BYTE, NPY_UBYTE, + NPY_SHORT, NPY_USHORT, + NPY_INT, NPY_UINT, + NPY_LONG, NPY_ULONG, + NPY_LONGLONG, NPY_ULONGLONG, + NPY_FLOAT, NPY_DOUBLE, NPY_LONGDOUBLE, + NPY_CFLOAT, NPY_CDOUBLE, NPY_CLONGDOUBLE, + NPY_OBJECT=17, + NPY_STRING, NPY_UNICODE, + NPY_VOID, + /* + * New 1.6 types appended, may be integrated + * into the above in 2.0. + */ + NPY_DATETIME, NPY_TIMEDELTA, NPY_HALF, + + NPY_NTYPES, + NPY_NOTYPE, + NPY_CHAR NPY_ATTR_DEPRECATE("Use NPY_STRING"), + NPY_USERDEF=256, /* leave room for characters */ + + /* The number of types not including the new 1.6 types */ + NPY_NTYPES_ABI_COMPATIBLE=21 +}; +#ifdef _MSC_VER +#pragma deprecated(NPY_CHAR) +#endif + +/* basetype array priority */ +#define NPY_PRIORITY 0.0 + +/* default subtype priority */ +#define NPY_SUBTYPE_PRIORITY 1.0 + +/* default scalar priority */ +#define NPY_SCALAR_PRIORITY -1000000.0 + +/* How many floating point types are there (excluding half) */ +#define NPY_NUM_FLOATTYPE 3 + +/* + * These characters correspond to the array type and the struct + * module + */ + +enum NPY_TYPECHAR { + NPY_BOOLLTR = '?', + NPY_BYTELTR = 'b', + NPY_UBYTELTR = 'B', + NPY_SHORTLTR = 'h', + NPY_USHORTLTR = 'H', + NPY_INTLTR = 'i', + NPY_UINTLTR = 'I', + NPY_LONGLTR = 'l', + NPY_ULONGLTR = 'L', + NPY_LONGLONGLTR = 'q', + NPY_ULONGLONGLTR = 'Q', + NPY_HALFLTR = 'e', + NPY_FLOATLTR = 'f', + NPY_DOUBLELTR = 'd', + NPY_LONGDOUBLELTR = 'g', + NPY_CFLOATLTR = 'F', + NPY_CDOUBLELTR = 'D', + NPY_CLONGDOUBLELTR = 'G', + NPY_OBJECTLTR = 'O', + NPY_STRINGLTR = 'S', + NPY_STRINGLTR2 = 'a', + NPY_UNICODELTR = 'U', + NPY_VOIDLTR = 'V', + NPY_DATETIMELTR = 'M', + NPY_TIMEDELTALTR = 'm', + NPY_CHARLTR = 'c', + + /* + * No Descriptor, just a define -- this let's + * Python users specify an array of integers + * large enough to hold a pointer on the + * platform + */ + NPY_INTPLTR = 'p', + NPY_UINTPLTR = 'P', + + /* + * These are for dtype 'kinds', not dtype 'typecodes' + * as the above are for. + */ + NPY_GENBOOLLTR ='b', + NPY_SIGNEDLTR = 'i', + NPY_UNSIGNEDLTR = 'u', + NPY_FLOATINGLTR = 'f', + NPY_COMPLEXLTR = 'c' +}; + +/* + * Changing this may break Numpy API compatibility + * due to changing offsets in PyArray_ArrFuncs, so be + * careful. Here we have reused the mergesort slot for + * any kind of stable sort, the actual implementation will + * depend on the data type. + */ +typedef enum { + NPY_QUICKSORT=0, + NPY_HEAPSORT=1, + NPY_MERGESORT=2, + NPY_STABLESORT=2, +} NPY_SORTKIND; +#define NPY_NSORTS (NPY_STABLESORT + 1) + + +typedef enum { + NPY_INTROSELECT=0 +} NPY_SELECTKIND; +#define NPY_NSELECTS (NPY_INTROSELECT + 1) + + +typedef enum { + NPY_SEARCHLEFT=0, + NPY_SEARCHRIGHT=1 +} NPY_SEARCHSIDE; +#define NPY_NSEARCHSIDES (NPY_SEARCHRIGHT + 1) + + +typedef enum { + NPY_NOSCALAR=-1, + NPY_BOOL_SCALAR, + NPY_INTPOS_SCALAR, + NPY_INTNEG_SCALAR, + NPY_FLOAT_SCALAR, + NPY_COMPLEX_SCALAR, + NPY_OBJECT_SCALAR +} NPY_SCALARKIND; +#define NPY_NSCALARKINDS (NPY_OBJECT_SCALAR + 1) + +/* For specifying array memory layout or iteration order */ +typedef enum { + /* Fortran order if inputs are all Fortran, C otherwise */ + NPY_ANYORDER=-1, + /* C order */ + NPY_CORDER=0, + /* Fortran order */ + NPY_FORTRANORDER=1, + /* An order as close to the inputs as possible */ + NPY_KEEPORDER=2 +} NPY_ORDER; + +/* For specifying allowed casting in operations which support it */ +typedef enum { + _NPY_ERROR_OCCURRED_IN_CAST = -1, + /* Only allow identical types */ + NPY_NO_CASTING=0, + /* Allow identical and byte swapped types */ + NPY_EQUIV_CASTING=1, + /* Only allow safe casts */ + NPY_SAFE_CASTING=2, + /* Allow safe casts or casts within the same kind */ + NPY_SAME_KIND_CASTING=3, + /* Allow any casts */ + NPY_UNSAFE_CASTING=4, + /* + * Flag to allow signalling that a cast is a view, this flag is not + * valid when requesting a cast of specific safety. + * _NPY_CAST_IS_VIEW|NPY_EQUIV_CASTING means the same as NPY_NO_CASTING. + */ + // TODO-DTYPES: Needs to be documented. + _NPY_CAST_IS_VIEW = 1 << 16, +} NPY_CASTING; + +typedef enum { + NPY_CLIP=0, + NPY_WRAP=1, + NPY_RAISE=2 +} NPY_CLIPMODE; + +typedef enum { + NPY_VALID=0, + NPY_SAME=1, + NPY_FULL=2 +} NPY_CORRELATEMODE; + +/* The special not-a-time (NaT) value */ +#define NPY_DATETIME_NAT NPY_MIN_INT64 + +/* + * Upper bound on the length of a DATETIME ISO 8601 string + * YEAR: 21 (64-bit year) + * MONTH: 3 + * DAY: 3 + * HOURS: 3 + * MINUTES: 3 + * SECONDS: 3 + * ATTOSECONDS: 1 + 3*6 + * TIMEZONE: 5 + * NULL TERMINATOR: 1 + */ +#define NPY_DATETIME_MAX_ISO8601_STRLEN (21 + 3*5 + 1 + 3*6 + 6 + 1) + +/* The FR in the unit names stands for frequency */ +typedef enum { + /* Force signed enum type, must be -1 for code compatibility */ + NPY_FR_ERROR = -1, /* error or undetermined */ + + /* Start of valid units */ + NPY_FR_Y = 0, /* Years */ + NPY_FR_M = 1, /* Months */ + NPY_FR_W = 2, /* Weeks */ + /* Gap where 1.6 NPY_FR_B (value 3) was */ + NPY_FR_D = 4, /* Days */ + NPY_FR_h = 5, /* hours */ + NPY_FR_m = 6, /* minutes */ + NPY_FR_s = 7, /* seconds */ + NPY_FR_ms = 8, /* milliseconds */ + NPY_FR_us = 9, /* microseconds */ + NPY_FR_ns = 10, /* nanoseconds */ + NPY_FR_ps = 11, /* picoseconds */ + NPY_FR_fs = 12, /* femtoseconds */ + NPY_FR_as = 13, /* attoseconds */ + NPY_FR_GENERIC = 14 /* unbound units, can convert to anything */ +} NPY_DATETIMEUNIT; + +/* + * NOTE: With the NPY_FR_B gap for 1.6 ABI compatibility, NPY_DATETIME_NUMUNITS + * is technically one more than the actual number of units. + */ +#define NPY_DATETIME_NUMUNITS (NPY_FR_GENERIC + 1) +#define NPY_DATETIME_DEFAULTUNIT NPY_FR_GENERIC + +/* + * Business day conventions for mapping invalid business + * days to valid business days. + */ +typedef enum { + /* Go forward in time to the following business day. */ + NPY_BUSDAY_FORWARD, + NPY_BUSDAY_FOLLOWING = NPY_BUSDAY_FORWARD, + /* Go backward in time to the preceding business day. */ + NPY_BUSDAY_BACKWARD, + NPY_BUSDAY_PRECEDING = NPY_BUSDAY_BACKWARD, + /* + * Go forward in time to the following business day, unless it + * crosses a month boundary, in which case go backward + */ + NPY_BUSDAY_MODIFIEDFOLLOWING, + /* + * Go backward in time to the preceding business day, unless it + * crosses a month boundary, in which case go forward. + */ + NPY_BUSDAY_MODIFIEDPRECEDING, + /* Produce a NaT for non-business days. */ + NPY_BUSDAY_NAT, + /* Raise an exception for non-business days. */ + NPY_BUSDAY_RAISE +} NPY_BUSDAY_ROLL; + +/************************************************************ + * NumPy Auxiliary Data for inner loops, sort functions, etc. + ************************************************************/ + +/* + * When creating an auxiliary data struct, this should always appear + * as the first member, like this: + * + * typedef struct { + * NpyAuxData base; + * double constant; + * } constant_multiplier_aux_data; + */ +typedef struct NpyAuxData_tag NpyAuxData; + +/* Function pointers for freeing or cloning auxiliary data */ +typedef void (NpyAuxData_FreeFunc) (NpyAuxData *); +typedef NpyAuxData *(NpyAuxData_CloneFunc) (NpyAuxData *); + +struct NpyAuxData_tag { + NpyAuxData_FreeFunc *free; + NpyAuxData_CloneFunc *clone; + /* To allow for a bit of expansion without breaking the ABI */ + void *reserved[2]; +}; + +/* Macros to use for freeing and cloning auxiliary data */ +#define NPY_AUXDATA_FREE(auxdata) \ + do { \ + if ((auxdata) != NULL) { \ + (auxdata)->free(auxdata); \ + } \ + } while(0) +#define NPY_AUXDATA_CLONE(auxdata) \ + ((auxdata)->clone(auxdata)) + +#define NPY_ERR(str) fprintf(stderr, #str); fflush(stderr); +#define NPY_ERR2(str) fprintf(stderr, str); fflush(stderr); + +/* +* Macros to define how array, and dimension/strides data is +* allocated. These should be made private +*/ + +#define NPY_USE_PYMEM 1 + + +#if NPY_USE_PYMEM == 1 +/* use the Raw versions which are safe to call with the GIL released */ +#define PyArray_malloc PyMem_RawMalloc +#define PyArray_free PyMem_RawFree +#define PyArray_realloc PyMem_RawRealloc +#else +#define PyArray_malloc malloc +#define PyArray_free free +#define PyArray_realloc realloc +#endif + +/* Dimensions and strides */ +#define PyDimMem_NEW(size) \ + ((npy_intp *)PyArray_malloc(size*sizeof(npy_intp))) + +#define PyDimMem_FREE(ptr) PyArray_free(ptr) + +#define PyDimMem_RENEW(ptr,size) \ + ((npy_intp *)PyArray_realloc(ptr,size*sizeof(npy_intp))) + +/* forward declaration */ +struct _PyArray_Descr; + +/* These must deal with unaligned and swapped data if necessary */ +typedef PyObject * (PyArray_GetItemFunc) (void *, void *); +typedef int (PyArray_SetItemFunc)(PyObject *, void *, void *); + +typedef void (PyArray_CopySwapNFunc)(void *, npy_intp, void *, npy_intp, + npy_intp, int, void *); + +typedef void (PyArray_CopySwapFunc)(void *, void *, int, void *); +typedef npy_bool (PyArray_NonzeroFunc)(void *, void *); + + +/* + * These assume aligned and notswapped data -- a buffer will be used + * before or contiguous data will be obtained + */ + +typedef int (PyArray_CompareFunc)(const void *, const void *, void *); +typedef int (PyArray_ArgFunc)(void*, npy_intp, npy_intp*, void *); + +typedef void (PyArray_DotFunc)(void *, npy_intp, void *, npy_intp, void *, + npy_intp, void *); + +typedef void (PyArray_VectorUnaryFunc)(void *, void *, npy_intp, void *, + void *); + +/* + * XXX the ignore argument should be removed next time the API version + * is bumped. It used to be the separator. + */ +typedef int (PyArray_ScanFunc)(FILE *fp, void *dptr, + char *ignore, struct _PyArray_Descr *); +typedef int (PyArray_FromStrFunc)(char *s, void *dptr, char **endptr, + struct _PyArray_Descr *); + +typedef int (PyArray_FillFunc)(void *, npy_intp, void *); + +typedef int (PyArray_SortFunc)(void *, npy_intp, void *); +typedef int (PyArray_ArgSortFunc)(void *, npy_intp *, npy_intp, void *); +typedef int (PyArray_PartitionFunc)(void *, npy_intp, npy_intp, + npy_intp *, npy_intp *, + void *); +typedef int (PyArray_ArgPartitionFunc)(void *, npy_intp *, npy_intp, npy_intp, + npy_intp *, npy_intp *, + void *); + +typedef int (PyArray_FillWithScalarFunc)(void *, npy_intp, void *, void *); + +typedef int (PyArray_ScalarKindFunc)(void *); + +typedef void (PyArray_FastClipFunc)(void *in, npy_intp n_in, void *min, + void *max, void *out); +typedef void (PyArray_FastPutmaskFunc)(void *in, void *mask, npy_intp n_in, + void *values, npy_intp nv); +typedef int (PyArray_FastTakeFunc)(void *dest, void *src, npy_intp *indarray, + npy_intp nindarray, npy_intp n_outer, + npy_intp m_middle, npy_intp nelem, + NPY_CLIPMODE clipmode); + +typedef struct { + npy_intp *ptr; + int len; +} PyArray_Dims; + +typedef struct { + /* + * Functions to cast to most other standard types + * Can have some NULL entries. The types + * DATETIME, TIMEDELTA, and HALF go into the castdict + * even though they are built-in. + */ + PyArray_VectorUnaryFunc *cast[NPY_NTYPES_ABI_COMPATIBLE]; + + /* The next four functions *cannot* be NULL */ + + /* + * Functions to get and set items with standard Python types + * -- not array scalars + */ + PyArray_GetItemFunc *getitem; + PyArray_SetItemFunc *setitem; + + /* + * Copy and/or swap data. Memory areas may not overlap + * Use memmove first if they might + */ + PyArray_CopySwapNFunc *copyswapn; + PyArray_CopySwapFunc *copyswap; + + /* + * Function to compare items + * Can be NULL + */ + PyArray_CompareFunc *compare; + + /* + * Function to select largest + * Can be NULL + */ + PyArray_ArgFunc *argmax; + + /* + * Function to compute dot product + * Can be NULL + */ + PyArray_DotFunc *dotfunc; + + /* + * Function to scan an ASCII file and + * place a single value plus possible separator + * Can be NULL + */ + PyArray_ScanFunc *scanfunc; + + /* + * Function to read a single value from a string + * and adjust the pointer; Can be NULL + */ + PyArray_FromStrFunc *fromstr; + + /* + * Function to determine if data is zero or not + * If NULL a default version is + * used at Registration time. + */ + PyArray_NonzeroFunc *nonzero; + + /* + * Used for arange. Should return 0 on success + * and -1 on failure. + * Can be NULL. + */ + PyArray_FillFunc *fill; + + /* + * Function to fill arrays with scalar values + * Can be NULL + */ + PyArray_FillWithScalarFunc *fillwithscalar; + + /* + * Sorting functions + * Can be NULL + */ + PyArray_SortFunc *sort[NPY_NSORTS]; + PyArray_ArgSortFunc *argsort[NPY_NSORTS]; + + /* + * Dictionary of additional casting functions + * PyArray_VectorUnaryFuncs + * which can be populated to support casting + * to other registered types. Can be NULL + */ + PyObject *castdict; + + /* + * Functions useful for generalizing + * the casting rules. + * Can be NULL; + */ + PyArray_ScalarKindFunc *scalarkind; + int **cancastscalarkindto; + int *cancastto; + + PyArray_FastClipFunc *fastclip; + PyArray_FastPutmaskFunc *fastputmask; + PyArray_FastTakeFunc *fasttake; + + /* + * Function to select smallest + * Can be NULL + */ + PyArray_ArgFunc *argmin; + +} PyArray_ArrFuncs; + +/* The item must be reference counted when it is inserted or extracted. */ +#define NPY_ITEM_REFCOUNT 0x01 +/* Same as needing REFCOUNT */ +#define NPY_ITEM_HASOBJECT 0x01 +/* Convert to list for pickling */ +#define NPY_LIST_PICKLE 0x02 +/* The item is a POINTER */ +#define NPY_ITEM_IS_POINTER 0x04 +/* memory needs to be initialized for this data-type */ +#define NPY_NEEDS_INIT 0x08 +/* operations need Python C-API so don't give-up thread. */ +#define NPY_NEEDS_PYAPI 0x10 +/* Use f.getitem when extracting elements of this data-type */ +#define NPY_USE_GETITEM 0x20 +/* Use f.setitem when setting creating 0-d array from this data-type.*/ +#define NPY_USE_SETITEM 0x40 +/* A sticky flag specifically for structured arrays */ +#define NPY_ALIGNED_STRUCT 0x80 + +/* + *These are inherited for global data-type if any data-types in the + * field have them + */ +#define NPY_FROM_FIELDS (NPY_NEEDS_INIT | NPY_LIST_PICKLE | \ + NPY_ITEM_REFCOUNT | NPY_NEEDS_PYAPI) + +#define NPY_OBJECT_DTYPE_FLAGS (NPY_LIST_PICKLE | NPY_USE_GETITEM | \ + NPY_ITEM_IS_POINTER | NPY_ITEM_REFCOUNT | \ + NPY_NEEDS_INIT | NPY_NEEDS_PYAPI) + +#define PyDataType_FLAGCHK(dtype, flag) \ + (((dtype)->flags & (flag)) == (flag)) + +#define PyDataType_REFCHK(dtype) \ + PyDataType_FLAGCHK(dtype, NPY_ITEM_REFCOUNT) + +typedef struct _PyArray_Descr { + PyObject_HEAD + /* + * the type object representing an + * instance of this type -- should not + * be two type_numbers with the same type + * object. + */ + PyTypeObject *typeobj; + /* kind for this type */ + char kind; + /* unique-character representing this type */ + char type; + /* + * '>' (big), '<' (little), '|' + * (not-applicable), or '=' (native). + */ + char byteorder; + /* flags describing data type */ + char flags; + /* number representing this type */ + int type_num; + /* element size (itemsize) for this type */ + int elsize; + /* alignment needed for this type */ + int alignment; + /* + * Non-NULL if this type is + * is an array (C-contiguous) + * of some other type + */ + struct _arr_descr *subarray; + /* + * The fields dictionary for this type + * For statically defined descr this + * is always Py_None + */ + PyObject *fields; + /* + * An ordered tuple of field names or NULL + * if no fields are defined + */ + PyObject *names; + /* + * a table of functions specific for each + * basic data descriptor + */ + PyArray_ArrFuncs *f; + /* Metadata about this dtype */ + PyObject *metadata; + /* + * Metadata specific to the C implementation + * of the particular dtype. This was added + * for NumPy 1.7.0. + */ + NpyAuxData *c_metadata; + /* Cached hash value (-1 if not yet computed). + * This was added for NumPy 2.0.0. + */ + npy_hash_t hash; +} PyArray_Descr; + +typedef struct _arr_descr { + PyArray_Descr *base; + PyObject *shape; /* a tuple */ +} PyArray_ArrayDescr; + +/* + * Memory handler structure for array data. + */ +/* The declaration of free differs from PyMemAllocatorEx */ +typedef struct { + void *ctx; + void* (*malloc) (void *ctx, size_t size); + void* (*calloc) (void *ctx, size_t nelem, size_t elsize); + void* (*realloc) (void *ctx, void *ptr, size_t new_size); + void (*free) (void *ctx, void *ptr, size_t size); + /* + * This is the end of the version=1 struct. Only add new fields after + * this line + */ +} PyDataMemAllocator; + +typedef struct { + char name[127]; /* multiple of 64 to keep the struct aligned */ + uint8_t version; /* currently 1 */ + PyDataMemAllocator allocator; +} PyDataMem_Handler; + + +/* + * The main array object structure. + * + * It has been recommended to use the inline functions defined below + * (PyArray_DATA and friends) to access fields here for a number of + * releases. Direct access to the members themselves is deprecated. + * To ensure that your code does not use deprecated access, + * #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + * (or NPY_1_8_API_VERSION or higher as required). + */ +/* This struct will be moved to a private header in a future release */ +typedef struct tagPyArrayObject_fields { + PyObject_HEAD + /* Pointer to the raw data buffer */ + char *data; + /* The number of dimensions, also called 'ndim' */ + int nd; + /* The size in each dimension, also called 'shape' */ + npy_intp *dimensions; + /* + * Number of bytes to jump to get to the + * next element in each dimension + */ + npy_intp *strides; + /* + * This object is decref'd upon + * deletion of array. Except in the + * case of WRITEBACKIFCOPY which has + * special handling. + * + * For views it points to the original + * array, collapsed so no chains of + * views occur. + * + * For creation from buffer object it + * points to an object that should be + * decref'd on deletion + * + * For WRITEBACKIFCOPY flag this is an + * array to-be-updated upon calling + * PyArray_ResolveWritebackIfCopy + */ + PyObject *base; + /* Pointer to type structure */ + PyArray_Descr *descr; + /* Flags describing array -- see below */ + int flags; + /* For weak references */ + PyObject *weakreflist; + void *_buffer_info; /* private buffer info, tagged to allow warning */ + /* + * For malloc/calloc/realloc/free per object + */ + PyObject *mem_handler; +} PyArrayObject_fields; + +/* + * To hide the implementation details, we only expose + * the Python struct HEAD. + */ +#if !defined(NPY_NO_DEPRECATED_API) || \ + (NPY_NO_DEPRECATED_API < NPY_1_7_API_VERSION) +/* + * Can't put this in npy_deprecated_api.h like the others. + * PyArrayObject field access is deprecated as of NumPy 1.7. + */ +typedef PyArrayObject_fields PyArrayObject; +#else +typedef struct tagPyArrayObject { + PyObject_HEAD +} PyArrayObject; +#endif + +/* + * Removed 2020-Nov-25, NumPy 1.20 + * #define NPY_SIZEOF_PYARRAYOBJECT (sizeof(PyArrayObject_fields)) + * + * The above macro was removed as it gave a false sense of a stable ABI + * with respect to the structures size. If you require a runtime constant, + * you can use `PyArray_Type.tp_basicsize` instead. Otherwise, please + * see the PyArrayObject documentation or ask the NumPy developers for + * information on how to correctly replace the macro in a way that is + * compatible with multiple NumPy versions. + */ + + +/* Array Flags Object */ +typedef struct PyArrayFlagsObject { + PyObject_HEAD + PyObject *arr; + int flags; +} PyArrayFlagsObject; + +/* Mirrors buffer object to ptr */ + +typedef struct { + PyObject_HEAD + PyObject *base; + void *ptr; + npy_intp len; + int flags; +} PyArray_Chunk; + +typedef struct { + NPY_DATETIMEUNIT base; + int num; +} PyArray_DatetimeMetaData; + +typedef struct { + NpyAuxData base; + PyArray_DatetimeMetaData meta; +} PyArray_DatetimeDTypeMetaData; + +/* + * This structure contains an exploded view of a date-time value. + * NaT is represented by year == NPY_DATETIME_NAT. + */ +typedef struct { + npy_int64 year; + npy_int32 month, day, hour, min, sec, us, ps, as; +} npy_datetimestruct; + +/* This is not used internally. */ +typedef struct { + npy_int64 day; + npy_int32 sec, us, ps, as; +} npy_timedeltastruct; + +typedef int (PyArray_FinalizeFunc)(PyArrayObject *, PyObject *); + +/* + * Means c-style contiguous (last index varies the fastest). The data + * elements right after each other. + * + * This flag may be requested in constructor functions. + * This flag may be tested for in PyArray_FLAGS(arr). + */ +#define NPY_ARRAY_C_CONTIGUOUS 0x0001 + +/* + * Set if array is a contiguous Fortran array: the first index varies + * the fastest in memory (strides array is reverse of C-contiguous + * array) + * + * This flag may be requested in constructor functions. + * This flag may be tested for in PyArray_FLAGS(arr). + */ +#define NPY_ARRAY_F_CONTIGUOUS 0x0002 + +/* + * Note: all 0-d arrays are C_CONTIGUOUS and F_CONTIGUOUS. If a + * 1-d array is C_CONTIGUOUS it is also F_CONTIGUOUS. Arrays with + * more then one dimension can be C_CONTIGUOUS and F_CONTIGUOUS + * at the same time if they have either zero or one element. + * If NPY_RELAXED_STRIDES_CHECKING is set, a higher dimensional + * array is always C_CONTIGUOUS and F_CONTIGUOUS if it has zero elements + * and the array is contiguous if ndarray.squeeze() is contiguous. + * I.e. dimensions for which `ndarray.shape[dimension] == 1` are + * ignored. + */ + +/* + * If set, the array owns the data: it will be free'd when the array + * is deleted. + * + * This flag may be tested for in PyArray_FLAGS(arr). + */ +#define NPY_ARRAY_OWNDATA 0x0004 + +/* + * An array never has the next four set; they're only used as parameter + * flags to the various FromAny functions + * + * This flag may be requested in constructor functions. + */ + +/* Cause a cast to occur regardless of whether or not it is safe. */ +#define NPY_ARRAY_FORCECAST 0x0010 + +/* + * Always copy the array. Returned arrays are always CONTIGUOUS, + * ALIGNED, and WRITEABLE. See also: NPY_ARRAY_ENSURENOCOPY = 0x4000. + * + * This flag may be requested in constructor functions. + */ +#define NPY_ARRAY_ENSURECOPY 0x0020 + +/* + * Make sure the returned array is a base-class ndarray + * + * This flag may be requested in constructor functions. + */ +#define NPY_ARRAY_ENSUREARRAY 0x0040 + +#if defined(NPY_INTERNAL_BUILD) && NPY_INTERNAL_BUILD + /* + * Dual use of the ENSUREARRAY flag, to indicate that this was converted + * from a python float, int, or complex. + * An array using this flag must be a temporary array that can never + * leave the C internals of NumPy. Even if it does, ENSUREARRAY is + * absolutely safe to abuse, since it already is a base class array :). + */ + #define _NPY_ARRAY_WAS_PYSCALAR 0x0040 +#endif /* NPY_INTERNAL_BUILD */ + +/* + * Make sure that the strides are in units of the element size Needed + * for some operations with record-arrays. + * + * This flag may be requested in constructor functions. + */ +#define NPY_ARRAY_ELEMENTSTRIDES 0x0080 + +/* + * Array data is aligned on the appropriate memory address for the type + * stored according to how the compiler would align things (e.g., an + * array of integers (4 bytes each) starts on a memory address that's + * a multiple of 4) + * + * This flag may be requested in constructor functions. + * This flag may be tested for in PyArray_FLAGS(arr). + */ +#define NPY_ARRAY_ALIGNED 0x0100 + +/* + * Array data has the native endianness + * + * This flag may be requested in constructor functions. + */ +#define NPY_ARRAY_NOTSWAPPED 0x0200 + +/* + * Array data is writeable + * + * This flag may be requested in constructor functions. + * This flag may be tested for in PyArray_FLAGS(arr). + */ +#define NPY_ARRAY_WRITEABLE 0x0400 + +/* + * If this flag is set, then base contains a pointer to an array of + * the same size that should be updated with the current contents of + * this array when PyArray_ResolveWritebackIfCopy is called. + * + * This flag may be requested in constructor functions. + * This flag may be tested for in PyArray_FLAGS(arr). + */ +#define NPY_ARRAY_UPDATEIFCOPY 0x1000 /* Deprecated in 1.14 */ +#define NPY_ARRAY_WRITEBACKIFCOPY 0x2000 + +/* + * No copy may be made while converting from an object/array (result is a view) + * + * This flag may be requested in constructor functions. + */ +#define NPY_ARRAY_ENSURENOCOPY 0x4000 + +/* + * NOTE: there are also internal flags defined in multiarray/arrayobject.h, + * which start at bit 31 and work down. + */ + +#define NPY_ARRAY_BEHAVED (NPY_ARRAY_ALIGNED | \ + NPY_ARRAY_WRITEABLE) +#define NPY_ARRAY_BEHAVED_NS (NPY_ARRAY_ALIGNED | \ + NPY_ARRAY_WRITEABLE | \ + NPY_ARRAY_NOTSWAPPED) +#define NPY_ARRAY_CARRAY (NPY_ARRAY_C_CONTIGUOUS | \ + NPY_ARRAY_BEHAVED) +#define NPY_ARRAY_CARRAY_RO (NPY_ARRAY_C_CONTIGUOUS | \ + NPY_ARRAY_ALIGNED) +#define NPY_ARRAY_FARRAY (NPY_ARRAY_F_CONTIGUOUS | \ + NPY_ARRAY_BEHAVED) +#define NPY_ARRAY_FARRAY_RO (NPY_ARRAY_F_CONTIGUOUS | \ + NPY_ARRAY_ALIGNED) +#define NPY_ARRAY_DEFAULT (NPY_ARRAY_CARRAY) +#define NPY_ARRAY_IN_ARRAY (NPY_ARRAY_CARRAY_RO) +#define NPY_ARRAY_OUT_ARRAY (NPY_ARRAY_CARRAY) +#define NPY_ARRAY_INOUT_ARRAY (NPY_ARRAY_CARRAY | \ + NPY_ARRAY_UPDATEIFCOPY) +#define NPY_ARRAY_INOUT_ARRAY2 (NPY_ARRAY_CARRAY | \ + NPY_ARRAY_WRITEBACKIFCOPY) +#define NPY_ARRAY_IN_FARRAY (NPY_ARRAY_FARRAY_RO) +#define NPY_ARRAY_OUT_FARRAY (NPY_ARRAY_FARRAY) +#define NPY_ARRAY_INOUT_FARRAY (NPY_ARRAY_FARRAY | \ + NPY_ARRAY_UPDATEIFCOPY) +#define NPY_ARRAY_INOUT_FARRAY2 (NPY_ARRAY_FARRAY | \ + NPY_ARRAY_WRITEBACKIFCOPY) + +#define NPY_ARRAY_UPDATE_ALL (NPY_ARRAY_C_CONTIGUOUS | \ + NPY_ARRAY_F_CONTIGUOUS | \ + NPY_ARRAY_ALIGNED) + +/* This flag is for the array interface, not PyArrayObject */ +#define NPY_ARR_HAS_DESCR 0x0800 + + + + +/* + * Size of internal buffers used for alignment Make BUFSIZE a multiple + * of sizeof(npy_cdouble) -- usually 16 so that ufunc buffers are aligned + */ +#define NPY_MIN_BUFSIZE ((int)sizeof(npy_cdouble)) +#define NPY_MAX_BUFSIZE (((int)sizeof(npy_cdouble))*1000000) +#define NPY_BUFSIZE 8192 +/* buffer stress test size: */ +/*#define NPY_BUFSIZE 17*/ + +#define PyArray_MAX(a,b) (((a)>(b))?(a):(b)) +#define PyArray_MIN(a,b) (((a)<(b))?(a):(b)) +#define PyArray_CLT(p,q) ((((p).real==(q).real) ? ((p).imag < (q).imag) : \ + ((p).real < (q).real))) +#define PyArray_CGT(p,q) ((((p).real==(q).real) ? ((p).imag > (q).imag) : \ + ((p).real > (q).real))) +#define PyArray_CLE(p,q) ((((p).real==(q).real) ? ((p).imag <= (q).imag) : \ + ((p).real <= (q).real))) +#define PyArray_CGE(p,q) ((((p).real==(q).real) ? ((p).imag >= (q).imag) : \ + ((p).real >= (q).real))) +#define PyArray_CEQ(p,q) (((p).real==(q).real) && ((p).imag == (q).imag)) +#define PyArray_CNE(p,q) (((p).real!=(q).real) || ((p).imag != (q).imag)) + +/* + * C API: consists of Macros and functions. The MACROS are defined + * here. + */ + + +#define PyArray_ISCONTIGUOUS(m) PyArray_CHKFLAGS((m), NPY_ARRAY_C_CONTIGUOUS) +#define PyArray_ISWRITEABLE(m) PyArray_CHKFLAGS((m), NPY_ARRAY_WRITEABLE) +#define PyArray_ISALIGNED(m) PyArray_CHKFLAGS((m), NPY_ARRAY_ALIGNED) + +#define PyArray_IS_C_CONTIGUOUS(m) PyArray_CHKFLAGS((m), NPY_ARRAY_C_CONTIGUOUS) +#define PyArray_IS_F_CONTIGUOUS(m) PyArray_CHKFLAGS((m), NPY_ARRAY_F_CONTIGUOUS) + +/* the variable is used in some places, so always define it */ +#define NPY_BEGIN_THREADS_DEF PyThreadState *_save=NULL; +#if NPY_ALLOW_THREADS +#define NPY_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS +#define NPY_END_ALLOW_THREADS Py_END_ALLOW_THREADS +#define NPY_BEGIN_THREADS do {_save = PyEval_SaveThread();} while (0); +#define NPY_END_THREADS do { if (_save) \ + { PyEval_RestoreThread(_save); _save = NULL;} } while (0); +#define NPY_BEGIN_THREADS_THRESHOLDED(loop_size) do { if ((loop_size) > 500) \ + { _save = PyEval_SaveThread();} } while (0); + +#define NPY_BEGIN_THREADS_DESCR(dtype) \ + do {if (!(PyDataType_FLAGCHK((dtype), NPY_NEEDS_PYAPI))) \ + NPY_BEGIN_THREADS;} while (0); + +#define NPY_END_THREADS_DESCR(dtype) \ + do {if (!(PyDataType_FLAGCHK((dtype), NPY_NEEDS_PYAPI))) \ + NPY_END_THREADS; } while (0); + +#define NPY_ALLOW_C_API_DEF PyGILState_STATE __save__; +#define NPY_ALLOW_C_API do {__save__ = PyGILState_Ensure();} while (0); +#define NPY_DISABLE_C_API do {PyGILState_Release(__save__);} while (0); +#else +#define NPY_BEGIN_ALLOW_THREADS +#define NPY_END_ALLOW_THREADS +#define NPY_BEGIN_THREADS +#define NPY_END_THREADS +#define NPY_BEGIN_THREADS_THRESHOLDED(loop_size) +#define NPY_BEGIN_THREADS_DESCR(dtype) +#define NPY_END_THREADS_DESCR(dtype) +#define NPY_ALLOW_C_API_DEF +#define NPY_ALLOW_C_API +#define NPY_DISABLE_C_API +#endif + +/********************************** + * The nditer object, added in 1.6 + **********************************/ + +/* The actual structure of the iterator is an internal detail */ +typedef struct NpyIter_InternalOnly NpyIter; + +/* Iterator function pointers that may be specialized */ +typedef int (NpyIter_IterNextFunc)(NpyIter *iter); +typedef void (NpyIter_GetMultiIndexFunc)(NpyIter *iter, + npy_intp *outcoords); + +/*** Global flags that may be passed to the iterator constructors ***/ + +/* Track an index representing C order */ +#define NPY_ITER_C_INDEX 0x00000001 +/* Track an index representing Fortran order */ +#define NPY_ITER_F_INDEX 0x00000002 +/* Track a multi-index */ +#define NPY_ITER_MULTI_INDEX 0x00000004 +/* User code external to the iterator does the 1-dimensional innermost loop */ +#define NPY_ITER_EXTERNAL_LOOP 0x00000008 +/* Convert all the operands to a common data type */ +#define NPY_ITER_COMMON_DTYPE 0x00000010 +/* Operands may hold references, requiring API access during iteration */ +#define NPY_ITER_REFS_OK 0x00000020 +/* Zero-sized operands should be permitted, iteration checks IterSize for 0 */ +#define NPY_ITER_ZEROSIZE_OK 0x00000040 +/* Permits reductions (size-0 stride with dimension size > 1) */ +#define NPY_ITER_REDUCE_OK 0x00000080 +/* Enables sub-range iteration */ +#define NPY_ITER_RANGED 0x00000100 +/* Enables buffering */ +#define NPY_ITER_BUFFERED 0x00000200 +/* When buffering is enabled, grows the inner loop if possible */ +#define NPY_ITER_GROWINNER 0x00000400 +/* Delay allocation of buffers until first Reset* call */ +#define NPY_ITER_DELAY_BUFALLOC 0x00000800 +/* When NPY_KEEPORDER is specified, disable reversing negative-stride axes */ +#define NPY_ITER_DONT_NEGATE_STRIDES 0x00001000 +/* + * If output operands overlap with other operands (based on heuristics that + * has false positives but no false negatives), make temporary copies to + * eliminate overlap. + */ +#define NPY_ITER_COPY_IF_OVERLAP 0x00002000 + +/*** Per-operand flags that may be passed to the iterator constructors ***/ + +/* The operand will be read from and written to */ +#define NPY_ITER_READWRITE 0x00010000 +/* The operand will only be read from */ +#define NPY_ITER_READONLY 0x00020000 +/* The operand will only be written to */ +#define NPY_ITER_WRITEONLY 0x00040000 +/* The operand's data must be in native byte order */ +#define NPY_ITER_NBO 0x00080000 +/* The operand's data must be aligned */ +#define NPY_ITER_ALIGNED 0x00100000 +/* The operand's data must be contiguous (within the inner loop) */ +#define NPY_ITER_CONTIG 0x00200000 +/* The operand may be copied to satisfy requirements */ +#define NPY_ITER_COPY 0x00400000 +/* The operand may be copied with WRITEBACKIFCOPY to satisfy requirements */ +#define NPY_ITER_UPDATEIFCOPY 0x00800000 +/* Allocate the operand if it is NULL */ +#define NPY_ITER_ALLOCATE 0x01000000 +/* If an operand is allocated, don't use any subtype */ +#define NPY_ITER_NO_SUBTYPE 0x02000000 +/* This is a virtual array slot, operand is NULL but temporary data is there */ +#define NPY_ITER_VIRTUAL 0x04000000 +/* Require that the dimension match the iterator dimensions exactly */ +#define NPY_ITER_NO_BROADCAST 0x08000000 +/* A mask is being used on this array, affects buffer -> array copy */ +#define NPY_ITER_WRITEMASKED 0x10000000 +/* This array is the mask for all WRITEMASKED operands */ +#define NPY_ITER_ARRAYMASK 0x20000000 +/* Assume iterator order data access for COPY_IF_OVERLAP */ +#define NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE 0x40000000 + +#define NPY_ITER_GLOBAL_FLAGS 0x0000ffff +#define NPY_ITER_PER_OP_FLAGS 0xffff0000 + + +/***************************** + * Basic iterator object + *****************************/ + +/* FWD declaration */ +typedef struct PyArrayIterObject_tag PyArrayIterObject; + +/* + * type of the function which translates a set of coordinates to a + * pointer to the data + */ +typedef char* (*npy_iter_get_dataptr_t)( + PyArrayIterObject* iter, const npy_intp*); + +struct PyArrayIterObject_tag { + PyObject_HEAD + int nd_m1; /* number of dimensions - 1 */ + npy_intp index, size; + npy_intp coordinates[NPY_MAXDIMS];/* N-dimensional loop */ + npy_intp dims_m1[NPY_MAXDIMS]; /* ao->dimensions - 1 */ + npy_intp strides[NPY_MAXDIMS]; /* ao->strides or fake */ + npy_intp backstrides[NPY_MAXDIMS];/* how far to jump back */ + npy_intp factors[NPY_MAXDIMS]; /* shape factors */ + PyArrayObject *ao; + char *dataptr; /* pointer to current item*/ + npy_bool contiguous; + + npy_intp bounds[NPY_MAXDIMS][2]; + npy_intp limits[NPY_MAXDIMS][2]; + npy_intp limits_sizes[NPY_MAXDIMS]; + npy_iter_get_dataptr_t translate; +} ; + + +/* Iterator API */ +#define PyArrayIter_Check(op) PyObject_TypeCheck((op), &PyArrayIter_Type) + +#define _PyAIT(it) ((PyArrayIterObject *)(it)) +#define PyArray_ITER_RESET(it) do { \ + _PyAIT(it)->index = 0; \ + _PyAIT(it)->dataptr = PyArray_BYTES(_PyAIT(it)->ao); \ + memset(_PyAIT(it)->coordinates, 0, \ + (_PyAIT(it)->nd_m1+1)*sizeof(npy_intp)); \ +} while (0) + +#define _PyArray_ITER_NEXT1(it) do { \ + (it)->dataptr += _PyAIT(it)->strides[0]; \ + (it)->coordinates[0]++; \ +} while (0) + +#define _PyArray_ITER_NEXT2(it) do { \ + if ((it)->coordinates[1] < (it)->dims_m1[1]) { \ + (it)->coordinates[1]++; \ + (it)->dataptr += (it)->strides[1]; \ + } \ + else { \ + (it)->coordinates[1] = 0; \ + (it)->coordinates[0]++; \ + (it)->dataptr += (it)->strides[0] - \ + (it)->backstrides[1]; \ + } \ +} while (0) + +#define PyArray_ITER_NEXT(it) do { \ + _PyAIT(it)->index++; \ + if (_PyAIT(it)->nd_m1 == 0) { \ + _PyArray_ITER_NEXT1(_PyAIT(it)); \ + } \ + else if (_PyAIT(it)->contiguous) \ + _PyAIT(it)->dataptr += PyArray_DESCR(_PyAIT(it)->ao)->elsize; \ + else if (_PyAIT(it)->nd_m1 == 1) { \ + _PyArray_ITER_NEXT2(_PyAIT(it)); \ + } \ + else { \ + int __npy_i; \ + for (__npy_i=_PyAIT(it)->nd_m1; __npy_i >= 0; __npy_i--) { \ + if (_PyAIT(it)->coordinates[__npy_i] < \ + _PyAIT(it)->dims_m1[__npy_i]) { \ + _PyAIT(it)->coordinates[__npy_i]++; \ + _PyAIT(it)->dataptr += \ + _PyAIT(it)->strides[__npy_i]; \ + break; \ + } \ + else { \ + _PyAIT(it)->coordinates[__npy_i] = 0; \ + _PyAIT(it)->dataptr -= \ + _PyAIT(it)->backstrides[__npy_i]; \ + } \ + } \ + } \ +} while (0) + +#define PyArray_ITER_GOTO(it, destination) do { \ + int __npy_i; \ + _PyAIT(it)->index = 0; \ + _PyAIT(it)->dataptr = PyArray_BYTES(_PyAIT(it)->ao); \ + for (__npy_i = _PyAIT(it)->nd_m1; __npy_i>=0; __npy_i--) { \ + if (destination[__npy_i] < 0) { \ + destination[__npy_i] += \ + _PyAIT(it)->dims_m1[__npy_i]+1; \ + } \ + _PyAIT(it)->dataptr += destination[__npy_i] * \ + _PyAIT(it)->strides[__npy_i]; \ + _PyAIT(it)->coordinates[__npy_i] = \ + destination[__npy_i]; \ + _PyAIT(it)->index += destination[__npy_i] * \ + ( __npy_i==_PyAIT(it)->nd_m1 ? 1 : \ + _PyAIT(it)->dims_m1[__npy_i+1]+1) ; \ + } \ +} while (0) + +#define PyArray_ITER_GOTO1D(it, ind) do { \ + int __npy_i; \ + npy_intp __npy_ind = (npy_intp)(ind); \ + if (__npy_ind < 0) __npy_ind += _PyAIT(it)->size; \ + _PyAIT(it)->index = __npy_ind; \ + if (_PyAIT(it)->nd_m1 == 0) { \ + _PyAIT(it)->dataptr = PyArray_BYTES(_PyAIT(it)->ao) + \ + __npy_ind * _PyAIT(it)->strides[0]; \ + } \ + else if (_PyAIT(it)->contiguous) \ + _PyAIT(it)->dataptr = PyArray_BYTES(_PyAIT(it)->ao) + \ + __npy_ind * PyArray_DESCR(_PyAIT(it)->ao)->elsize; \ + else { \ + _PyAIT(it)->dataptr = PyArray_BYTES(_PyAIT(it)->ao); \ + for (__npy_i = 0; __npy_i<=_PyAIT(it)->nd_m1; \ + __npy_i++) { \ + _PyAIT(it)->coordinates[__npy_i] = \ + (__npy_ind / _PyAIT(it)->factors[__npy_i]); \ + _PyAIT(it)->dataptr += \ + (__npy_ind / _PyAIT(it)->factors[__npy_i]) \ + * _PyAIT(it)->strides[__npy_i]; \ + __npy_ind %= _PyAIT(it)->factors[__npy_i]; \ + } \ + } \ +} while (0) + +#define PyArray_ITER_DATA(it) ((void *)(_PyAIT(it)->dataptr)) + +#define PyArray_ITER_NOTDONE(it) (_PyAIT(it)->index < _PyAIT(it)->size) + + +/* + * Any object passed to PyArray_Broadcast must be binary compatible + * with this structure. + */ + +typedef struct { + PyObject_HEAD + int numiter; /* number of iters */ + npy_intp size; /* broadcasted size */ + npy_intp index; /* current index */ + int nd; /* number of dims */ + npy_intp dimensions[NPY_MAXDIMS]; /* dimensions */ + PyArrayIterObject *iters[NPY_MAXARGS]; /* iterators */ +} PyArrayMultiIterObject; + +#define _PyMIT(m) ((PyArrayMultiIterObject *)(m)) +#define PyArray_MultiIter_RESET(multi) do { \ + int __npy_mi; \ + _PyMIT(multi)->index = 0; \ + for (__npy_mi=0; __npy_mi < _PyMIT(multi)->numiter; __npy_mi++) { \ + PyArray_ITER_RESET(_PyMIT(multi)->iters[__npy_mi]); \ + } \ +} while (0) + +#define PyArray_MultiIter_NEXT(multi) do { \ + int __npy_mi; \ + _PyMIT(multi)->index++; \ + for (__npy_mi=0; __npy_mi < _PyMIT(multi)->numiter; __npy_mi++) { \ + PyArray_ITER_NEXT(_PyMIT(multi)->iters[__npy_mi]); \ + } \ +} while (0) + +#define PyArray_MultiIter_GOTO(multi, dest) do { \ + int __npy_mi; \ + for (__npy_mi=0; __npy_mi < _PyMIT(multi)->numiter; __npy_mi++) { \ + PyArray_ITER_GOTO(_PyMIT(multi)->iters[__npy_mi], dest); \ + } \ + _PyMIT(multi)->index = _PyMIT(multi)->iters[0]->index; \ +} while (0) + +#define PyArray_MultiIter_GOTO1D(multi, ind) do { \ + int __npy_mi; \ + for (__npy_mi=0; __npy_mi < _PyMIT(multi)->numiter; __npy_mi++) { \ + PyArray_ITER_GOTO1D(_PyMIT(multi)->iters[__npy_mi], ind); \ + } \ + _PyMIT(multi)->index = _PyMIT(multi)->iters[0]->index; \ +} while (0) + +#define PyArray_MultiIter_DATA(multi, i) \ + ((void *)(_PyMIT(multi)->iters[i]->dataptr)) + +#define PyArray_MultiIter_NEXTi(multi, i) \ + PyArray_ITER_NEXT(_PyMIT(multi)->iters[i]) + +#define PyArray_MultiIter_NOTDONE(multi) \ + (_PyMIT(multi)->index < _PyMIT(multi)->size) + +/* + * Store the information needed for fancy-indexing over an array. The + * fields are slightly unordered to keep consec, dataptr and subspace + * where they were originally. + */ +typedef struct { + PyObject_HEAD + /* + * Multi-iterator portion --- needs to be present in this + * order to work with PyArray_Broadcast + */ + + int numiter; /* number of index-array + iterators */ + npy_intp size; /* size of broadcasted + result */ + npy_intp index; /* current index */ + int nd; /* number of dims */ + npy_intp dimensions[NPY_MAXDIMS]; /* dimensions */ + NpyIter *outer; /* index objects + iterator */ + void *unused[NPY_MAXDIMS - 2]; + PyArrayObject *array; + /* Flat iterator for the indexed array. For compatibility solely. */ + PyArrayIterObject *ait; + + /* + * Subspace array. For binary compatibility (was an iterator, + * but only the check for NULL should be used). + */ + PyArrayObject *subspace; + + /* + * if subspace iteration, then this is the array of axes in + * the underlying array represented by the index objects + */ + int iteraxes[NPY_MAXDIMS]; + npy_intp fancy_strides[NPY_MAXDIMS]; + + /* pointer when all fancy indices are 0 */ + char *baseoffset; + + /* + * after binding consec denotes at which axis the fancy axes + * are inserted. + */ + int consec; + char *dataptr; + + int nd_fancy; + npy_intp fancy_dims[NPY_MAXDIMS]; + + /* Whether the iterator (any of the iterators) requires API */ + int needs_api; + + /* + * Extra op information. + */ + PyArrayObject *extra_op; + PyArray_Descr *extra_op_dtype; /* desired dtype */ + npy_uint32 *extra_op_flags; /* Iterator flags */ + + NpyIter *extra_op_iter; + NpyIter_IterNextFunc *extra_op_next; + char **extra_op_ptrs; + + /* + * Information about the iteration state. + */ + NpyIter_IterNextFunc *outer_next; + char **outer_ptrs; + npy_intp *outer_strides; + + /* + * Information about the subspace iterator. + */ + NpyIter *subspace_iter; + NpyIter_IterNextFunc *subspace_next; + char **subspace_ptrs; + npy_intp *subspace_strides; + + /* Count for the external loop (which ever it is) for API iteration */ + npy_intp iter_count; + +} PyArrayMapIterObject; + +enum { + NPY_NEIGHBORHOOD_ITER_ZERO_PADDING, + NPY_NEIGHBORHOOD_ITER_ONE_PADDING, + NPY_NEIGHBORHOOD_ITER_CONSTANT_PADDING, + NPY_NEIGHBORHOOD_ITER_CIRCULAR_PADDING, + NPY_NEIGHBORHOOD_ITER_MIRROR_PADDING +}; + +typedef struct { + PyObject_HEAD + + /* + * PyArrayIterObject part: keep this in this exact order + */ + int nd_m1; /* number of dimensions - 1 */ + npy_intp index, size; + npy_intp coordinates[NPY_MAXDIMS];/* N-dimensional loop */ + npy_intp dims_m1[NPY_MAXDIMS]; /* ao->dimensions - 1 */ + npy_intp strides[NPY_MAXDIMS]; /* ao->strides or fake */ + npy_intp backstrides[NPY_MAXDIMS];/* how far to jump back */ + npy_intp factors[NPY_MAXDIMS]; /* shape factors */ + PyArrayObject *ao; + char *dataptr; /* pointer to current item*/ + npy_bool contiguous; + + npy_intp bounds[NPY_MAXDIMS][2]; + npy_intp limits[NPY_MAXDIMS][2]; + npy_intp limits_sizes[NPY_MAXDIMS]; + npy_iter_get_dataptr_t translate; + + /* + * New members + */ + npy_intp nd; + + /* Dimensions is the dimension of the array */ + npy_intp dimensions[NPY_MAXDIMS]; + + /* + * Neighborhood points coordinates are computed relatively to the + * point pointed by _internal_iter + */ + PyArrayIterObject* _internal_iter; + /* + * To keep a reference to the representation of the constant value + * for constant padding + */ + char* constant; + + int mode; +} PyArrayNeighborhoodIterObject; + +/* + * Neighborhood iterator API + */ + +/* General: those work for any mode */ +static NPY_INLINE int +PyArrayNeighborhoodIter_Reset(PyArrayNeighborhoodIterObject* iter); +static NPY_INLINE int +PyArrayNeighborhoodIter_Next(PyArrayNeighborhoodIterObject* iter); +#if 0 +static NPY_INLINE int +PyArrayNeighborhoodIter_Next2D(PyArrayNeighborhoodIterObject* iter); +#endif + +/* + * Include inline implementations - functions defined there are not + * considered public API + */ +#define NUMPY_CORE_INCLUDE_NUMPY__NEIGHBORHOOD_IMP_H_ +#include "_neighborhood_iterator_imp.h" +#undef NUMPY_CORE_INCLUDE_NUMPY__NEIGHBORHOOD_IMP_H_ + + + +/* The default array type */ +#define NPY_DEFAULT_TYPE NPY_DOUBLE + +/* + * All sorts of useful ways to look into a PyArrayObject. It is recommended + * to use PyArrayObject * objects instead of always casting from PyObject *, + * for improved type checking. + * + * In many cases here the macro versions of the accessors are deprecated, + * but can't be immediately changed to inline functions because the + * preexisting macros accept PyObject * and do automatic casts. Inline + * functions accepting PyArrayObject * provides for some compile-time + * checking of correctness when working with these objects in C. + */ + +#define PyArray_ISONESEGMENT(m) (PyArray_CHKFLAGS(m, NPY_ARRAY_C_CONTIGUOUS) || \ + PyArray_CHKFLAGS(m, NPY_ARRAY_F_CONTIGUOUS)) + +#define PyArray_ISFORTRAN(m) (PyArray_CHKFLAGS(m, NPY_ARRAY_F_CONTIGUOUS) && \ + (!PyArray_CHKFLAGS(m, NPY_ARRAY_C_CONTIGUOUS))) + +#define PyArray_FORTRAN_IF(m) ((PyArray_CHKFLAGS(m, NPY_ARRAY_F_CONTIGUOUS) ? \ + NPY_ARRAY_F_CONTIGUOUS : 0)) + +#if (defined(NPY_NO_DEPRECATED_API) && (NPY_1_7_API_VERSION <= NPY_NO_DEPRECATED_API)) +/* + * Changing access macros into functions, to allow for future hiding + * of the internal memory layout. This later hiding will allow the 2.x series + * to change the internal representation of arrays without affecting + * ABI compatibility. + */ + +static NPY_INLINE int +PyArray_NDIM(const PyArrayObject *arr) +{ + return ((PyArrayObject_fields *)arr)->nd; +} + +static NPY_INLINE void * +PyArray_DATA(PyArrayObject *arr) +{ + return ((PyArrayObject_fields *)arr)->data; +} + +static NPY_INLINE char * +PyArray_BYTES(PyArrayObject *arr) +{ + return ((PyArrayObject_fields *)arr)->data; +} + +static NPY_INLINE npy_intp * +PyArray_DIMS(PyArrayObject *arr) +{ + return ((PyArrayObject_fields *)arr)->dimensions; +} + +static NPY_INLINE npy_intp * +PyArray_STRIDES(PyArrayObject *arr) +{ + return ((PyArrayObject_fields *)arr)->strides; +} + +static NPY_INLINE npy_intp +PyArray_DIM(const PyArrayObject *arr, int idim) +{ + return ((PyArrayObject_fields *)arr)->dimensions[idim]; +} + +static NPY_INLINE npy_intp +PyArray_STRIDE(const PyArrayObject *arr, int istride) +{ + return ((PyArrayObject_fields *)arr)->strides[istride]; +} + +static NPY_INLINE NPY_RETURNS_BORROWED_REF PyObject * +PyArray_BASE(PyArrayObject *arr) +{ + return ((PyArrayObject_fields *)arr)->base; +} + +static NPY_INLINE NPY_RETURNS_BORROWED_REF PyArray_Descr * +PyArray_DESCR(PyArrayObject *arr) +{ + return ((PyArrayObject_fields *)arr)->descr; +} + +static NPY_INLINE int +PyArray_FLAGS(const PyArrayObject *arr) +{ + return ((PyArrayObject_fields *)arr)->flags; +} + +static NPY_INLINE npy_intp +PyArray_ITEMSIZE(const PyArrayObject *arr) +{ + return ((PyArrayObject_fields *)arr)->descr->elsize; +} + +static NPY_INLINE int +PyArray_TYPE(const PyArrayObject *arr) +{ + return ((PyArrayObject_fields *)arr)->descr->type_num; +} + +static NPY_INLINE int +PyArray_CHKFLAGS(const PyArrayObject *arr, int flags) +{ + return (PyArray_FLAGS(arr) & flags) == flags; +} + +static NPY_INLINE PyObject * +PyArray_GETITEM(const PyArrayObject *arr, const char *itemptr) +{ + return ((PyArrayObject_fields *)arr)->descr->f->getitem( + (void *)itemptr, (PyArrayObject *)arr); +} + +/* + * SETITEM should only be used if it is known that the value is a scalar + * and of a type understood by the arrays dtype. + * Use `PyArray_Pack` if the value may be of a different dtype. + */ +static NPY_INLINE int +PyArray_SETITEM(PyArrayObject *arr, char *itemptr, PyObject *v) +{ + return ((PyArrayObject_fields *)arr)->descr->f->setitem(v, itemptr, arr); +} + +#else + +/* These macros are deprecated as of NumPy 1.7. */ +#define PyArray_NDIM(obj) (((PyArrayObject_fields *)(obj))->nd) +#define PyArray_BYTES(obj) (((PyArrayObject_fields *)(obj))->data) +#define PyArray_DATA(obj) ((void *)((PyArrayObject_fields *)(obj))->data) +#define PyArray_DIMS(obj) (((PyArrayObject_fields *)(obj))->dimensions) +#define PyArray_STRIDES(obj) (((PyArrayObject_fields *)(obj))->strides) +#define PyArray_DIM(obj,n) (PyArray_DIMS(obj)[n]) +#define PyArray_STRIDE(obj,n) (PyArray_STRIDES(obj)[n]) +#define PyArray_BASE(obj) (((PyArrayObject_fields *)(obj))->base) +#define PyArray_DESCR(obj) (((PyArrayObject_fields *)(obj))->descr) +#define PyArray_FLAGS(obj) (((PyArrayObject_fields *)(obj))->flags) +#define PyArray_CHKFLAGS(m, FLAGS) \ + ((((PyArrayObject_fields *)(m))->flags & (FLAGS)) == (FLAGS)) +#define PyArray_ITEMSIZE(obj) \ + (((PyArrayObject_fields *)(obj))->descr->elsize) +#define PyArray_TYPE(obj) \ + (((PyArrayObject_fields *)(obj))->descr->type_num) +#define PyArray_GETITEM(obj,itemptr) \ + PyArray_DESCR(obj)->f->getitem((char *)(itemptr), \ + (PyArrayObject *)(obj)) + +#define PyArray_SETITEM(obj,itemptr,v) \ + PyArray_DESCR(obj)->f->setitem((PyObject *)(v), \ + (char *)(itemptr), \ + (PyArrayObject *)(obj)) +#endif + +static NPY_INLINE PyArray_Descr * +PyArray_DTYPE(PyArrayObject *arr) +{ + return ((PyArrayObject_fields *)arr)->descr; +} + +static NPY_INLINE npy_intp * +PyArray_SHAPE(PyArrayObject *arr) +{ + return ((PyArrayObject_fields *)arr)->dimensions; +} + +/* + * Enables the specified array flags. Does no checking, + * assumes you know what you're doing. + */ +static NPY_INLINE void +PyArray_ENABLEFLAGS(PyArrayObject *arr, int flags) +{ + ((PyArrayObject_fields *)arr)->flags |= flags; +} + +/* + * Clears the specified array flags. Does no checking, + * assumes you know what you're doing. + */ +static NPY_INLINE void +PyArray_CLEARFLAGS(PyArrayObject *arr, int flags) +{ + ((PyArrayObject_fields *)arr)->flags &= ~flags; +} + +static NPY_INLINE NPY_RETURNS_BORROWED_REF PyObject * +PyArray_HANDLER(PyArrayObject *arr) +{ + return ((PyArrayObject_fields *)arr)->mem_handler; +} + +#define PyTypeNum_ISBOOL(type) ((type) == NPY_BOOL) + +#define PyTypeNum_ISUNSIGNED(type) (((type) == NPY_UBYTE) || \ + ((type) == NPY_USHORT) || \ + ((type) == NPY_UINT) || \ + ((type) == NPY_ULONG) || \ + ((type) == NPY_ULONGLONG)) + +#define PyTypeNum_ISSIGNED(type) (((type) == NPY_BYTE) || \ + ((type) == NPY_SHORT) || \ + ((type) == NPY_INT) || \ + ((type) == NPY_LONG) || \ + ((type) == NPY_LONGLONG)) + +#define PyTypeNum_ISINTEGER(type) (((type) >= NPY_BYTE) && \ + ((type) <= NPY_ULONGLONG)) + +#define PyTypeNum_ISFLOAT(type) ((((type) >= NPY_FLOAT) && \ + ((type) <= NPY_LONGDOUBLE)) || \ + ((type) == NPY_HALF)) + +#define PyTypeNum_ISNUMBER(type) (((type) <= NPY_CLONGDOUBLE) || \ + ((type) == NPY_HALF)) + +#define PyTypeNum_ISSTRING(type) (((type) == NPY_STRING) || \ + ((type) == NPY_UNICODE)) + +#define PyTypeNum_ISCOMPLEX(type) (((type) >= NPY_CFLOAT) && \ + ((type) <= NPY_CLONGDOUBLE)) + +#define PyTypeNum_ISPYTHON(type) (((type) == NPY_LONG) || \ + ((type) == NPY_DOUBLE) || \ + ((type) == NPY_CDOUBLE) || \ + ((type) == NPY_BOOL) || \ + ((type) == NPY_OBJECT )) + +#define PyTypeNum_ISFLEXIBLE(type) (((type) >=NPY_STRING) && \ + ((type) <=NPY_VOID)) + +#define PyTypeNum_ISDATETIME(type) (((type) >=NPY_DATETIME) && \ + ((type) <=NPY_TIMEDELTA)) + +#define PyTypeNum_ISUSERDEF(type) (((type) >= NPY_USERDEF) && \ + ((type) < NPY_USERDEF+ \ + NPY_NUMUSERTYPES)) + +#define PyTypeNum_ISEXTENDED(type) (PyTypeNum_ISFLEXIBLE(type) || \ + PyTypeNum_ISUSERDEF(type)) + +#define PyTypeNum_ISOBJECT(type) ((type) == NPY_OBJECT) + + +#define PyDataType_ISBOOL(obj) PyTypeNum_ISBOOL(((PyArray_Descr*)(obj))->type_num) +#define PyDataType_ISUNSIGNED(obj) PyTypeNum_ISUNSIGNED(((PyArray_Descr*)(obj))->type_num) +#define PyDataType_ISSIGNED(obj) PyTypeNum_ISSIGNED(((PyArray_Descr*)(obj))->type_num) +#define PyDataType_ISINTEGER(obj) PyTypeNum_ISINTEGER(((PyArray_Descr*)(obj))->type_num ) +#define PyDataType_ISFLOAT(obj) PyTypeNum_ISFLOAT(((PyArray_Descr*)(obj))->type_num) +#define PyDataType_ISNUMBER(obj) PyTypeNum_ISNUMBER(((PyArray_Descr*)(obj))->type_num) +#define PyDataType_ISSTRING(obj) PyTypeNum_ISSTRING(((PyArray_Descr*)(obj))->type_num) +#define PyDataType_ISCOMPLEX(obj) PyTypeNum_ISCOMPLEX(((PyArray_Descr*)(obj))->type_num) +#define PyDataType_ISPYTHON(obj) PyTypeNum_ISPYTHON(((PyArray_Descr*)(obj))->type_num) +#define PyDataType_ISFLEXIBLE(obj) PyTypeNum_ISFLEXIBLE(((PyArray_Descr*)(obj))->type_num) +#define PyDataType_ISDATETIME(obj) PyTypeNum_ISDATETIME(((PyArray_Descr*)(obj))->type_num) +#define PyDataType_ISUSERDEF(obj) PyTypeNum_ISUSERDEF(((PyArray_Descr*)(obj))->type_num) +#define PyDataType_ISEXTENDED(obj) PyTypeNum_ISEXTENDED(((PyArray_Descr*)(obj))->type_num) +#define PyDataType_ISOBJECT(obj) PyTypeNum_ISOBJECT(((PyArray_Descr*)(obj))->type_num) +#define PyDataType_HASFIELDS(obj) (((PyArray_Descr *)(obj))->names != NULL) +#define PyDataType_HASSUBARRAY(dtype) ((dtype)->subarray != NULL) +#define PyDataType_ISUNSIZED(dtype) ((dtype)->elsize == 0 && \ + !PyDataType_HASFIELDS(dtype)) +#define PyDataType_MAKEUNSIZED(dtype) ((dtype)->elsize = 0) + +#define PyArray_ISBOOL(obj) PyTypeNum_ISBOOL(PyArray_TYPE(obj)) +#define PyArray_ISUNSIGNED(obj) PyTypeNum_ISUNSIGNED(PyArray_TYPE(obj)) +#define PyArray_ISSIGNED(obj) PyTypeNum_ISSIGNED(PyArray_TYPE(obj)) +#define PyArray_ISINTEGER(obj) PyTypeNum_ISINTEGER(PyArray_TYPE(obj)) +#define PyArray_ISFLOAT(obj) PyTypeNum_ISFLOAT(PyArray_TYPE(obj)) +#define PyArray_ISNUMBER(obj) PyTypeNum_ISNUMBER(PyArray_TYPE(obj)) +#define PyArray_ISSTRING(obj) PyTypeNum_ISSTRING(PyArray_TYPE(obj)) +#define PyArray_ISCOMPLEX(obj) PyTypeNum_ISCOMPLEX(PyArray_TYPE(obj)) +#define PyArray_ISPYTHON(obj) PyTypeNum_ISPYTHON(PyArray_TYPE(obj)) +#define PyArray_ISFLEXIBLE(obj) PyTypeNum_ISFLEXIBLE(PyArray_TYPE(obj)) +#define PyArray_ISDATETIME(obj) PyTypeNum_ISDATETIME(PyArray_TYPE(obj)) +#define PyArray_ISUSERDEF(obj) PyTypeNum_ISUSERDEF(PyArray_TYPE(obj)) +#define PyArray_ISEXTENDED(obj) PyTypeNum_ISEXTENDED(PyArray_TYPE(obj)) +#define PyArray_ISOBJECT(obj) PyTypeNum_ISOBJECT(PyArray_TYPE(obj)) +#define PyArray_HASFIELDS(obj) PyDataType_HASFIELDS(PyArray_DESCR(obj)) + + /* + * FIXME: This should check for a flag on the data-type that + * states whether or not it is variable length. Because the + * ISFLEXIBLE check is hard-coded to the built-in data-types. + */ +#define PyArray_ISVARIABLE(obj) PyTypeNum_ISFLEXIBLE(PyArray_TYPE(obj)) + +#define PyArray_SAFEALIGNEDCOPY(obj) (PyArray_ISALIGNED(obj) && !PyArray_ISVARIABLE(obj)) + + +#define NPY_LITTLE '<' +#define NPY_BIG '>' +#define NPY_NATIVE '=' +#define NPY_SWAP 's' +#define NPY_IGNORE '|' + +#if NPY_BYTE_ORDER == NPY_BIG_ENDIAN +#define NPY_NATBYTE NPY_BIG +#define NPY_OPPBYTE NPY_LITTLE +#else +#define NPY_NATBYTE NPY_LITTLE +#define NPY_OPPBYTE NPY_BIG +#endif + +#define PyArray_ISNBO(arg) ((arg) != NPY_OPPBYTE) +#define PyArray_IsNativeByteOrder PyArray_ISNBO +#define PyArray_ISNOTSWAPPED(m) PyArray_ISNBO(PyArray_DESCR(m)->byteorder) +#define PyArray_ISBYTESWAPPED(m) (!PyArray_ISNOTSWAPPED(m)) + +#define PyArray_FLAGSWAP(m, flags) (PyArray_CHKFLAGS(m, flags) && \ + PyArray_ISNOTSWAPPED(m)) + +#define PyArray_ISCARRAY(m) PyArray_FLAGSWAP(m, NPY_ARRAY_CARRAY) +#define PyArray_ISCARRAY_RO(m) PyArray_FLAGSWAP(m, NPY_ARRAY_CARRAY_RO) +#define PyArray_ISFARRAY(m) PyArray_FLAGSWAP(m, NPY_ARRAY_FARRAY) +#define PyArray_ISFARRAY_RO(m) PyArray_FLAGSWAP(m, NPY_ARRAY_FARRAY_RO) +#define PyArray_ISBEHAVED(m) PyArray_FLAGSWAP(m, NPY_ARRAY_BEHAVED) +#define PyArray_ISBEHAVED_RO(m) PyArray_FLAGSWAP(m, NPY_ARRAY_ALIGNED) + + +#define PyDataType_ISNOTSWAPPED(d) PyArray_ISNBO(((PyArray_Descr *)(d))->byteorder) +#define PyDataType_ISBYTESWAPPED(d) (!PyDataType_ISNOTSWAPPED(d)) + +/************************************************************ + * A struct used by PyArray_CreateSortedStridePerm, new in 1.7. + ************************************************************/ + +typedef struct { + npy_intp perm, stride; +} npy_stride_sort_item; + +/************************************************************ + * This is the form of the struct that's stored in the + * PyCapsule returned by an array's __array_struct__ attribute. See + * https://docs.scipy.org/doc/numpy/reference/arrays.interface.html for the full + * documentation. + ************************************************************/ +typedef struct { + int two; /* + * contains the integer 2 as a sanity + * check + */ + + int nd; /* number of dimensions */ + + char typekind; /* + * kind in array --- character code of + * typestr + */ + + int itemsize; /* size of each element */ + + int flags; /* + * how should be data interpreted. Valid + * flags are CONTIGUOUS (1), F_CONTIGUOUS (2), + * ALIGNED (0x100), NOTSWAPPED (0x200), and + * WRITEABLE (0x400). ARR_HAS_DESCR (0x800) + * states that arrdescr field is present in + * structure + */ + + npy_intp *shape; /* + * A length-nd array of shape + * information + */ + + npy_intp *strides; /* A length-nd array of stride information */ + + void *data; /* A pointer to the first element of the array */ + + PyObject *descr; /* + * A list of fields or NULL (ignored if flags + * does not have ARR_HAS_DESCR flag set) + */ +} PyArrayInterface; + +/* + * This is a function for hooking into the PyDataMem_NEW/FREE/RENEW functions. + * See the documentation for PyDataMem_SetEventHook. + */ +typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size, + void *user_data); + + +/* + * PyArray_DTypeMeta related definitions. + * + * As of now, this API is preliminary and will be extended as necessary. + */ +#if defined(NPY_INTERNAL_BUILD) && NPY_INTERNAL_BUILD + /* + * The Structures defined in this block are currently considered + * private API and may change without warning! + * Part of this (at least the size) is exepcted to be public API without + * further modifications. + */ + /* TODO: Make this definition public in the API, as soon as its settled */ + NPY_NO_EXPORT extern PyTypeObject PyArrayDTypeMeta_Type; + + /* + * While NumPy DTypes would not need to be heap types the plan is to + * make DTypes available in Python at which point they will be heap types. + * Since we also wish to add fields to the DType class, this looks like + * a typical instance definition, but with PyHeapTypeObject instead of + * only the PyObject_HEAD. + * This must only be exposed very extremely careful consideration, since + * it is a fairly complex construct which may be better to allow + * refactoring of. + */ + typedef struct { + PyHeapTypeObject super; + + /* + * Most DTypes will have a singleton default instance, for the + * parametric legacy DTypes (bytes, string, void, datetime) this + * may be a pointer to the *prototype* instance? + */ + PyArray_Descr *singleton; + /* Copy of the legacy DTypes type number, usually invalid. */ + int type_num; + + /* The type object of the scalar instances (may be NULL?) */ + PyTypeObject *scalar_type; + /* + * DType flags to signal legacy, parametric, or + * abstract. But plenty of space for additional information/flags. + */ + npy_uint64 flags; + + /* + * Use indirection in order to allow a fixed size for this struct. + * A stable ABI size makes creating a static DType less painful + * while also ensuring flexibility for all opaque API (with one + * indirection due the pointer lookup). + */ + void *dt_slots; + void *reserved[3]; + } PyArray_DTypeMeta; + +#endif /* NPY_INTERNAL_BUILD */ + + +/* + * Use the keyword NPY_DEPRECATED_INCLUDES to ensure that the header files + * npy_*_*_deprecated_api.h are only included from here and nowhere else. + */ +#ifdef NPY_DEPRECATED_INCLUDES +#error "Do not use the reserved keyword NPY_DEPRECATED_INCLUDES." +#endif +#define NPY_DEPRECATED_INCLUDES +#if !defined(NPY_NO_DEPRECATED_API) || \ + (NPY_NO_DEPRECATED_API < NPY_1_7_API_VERSION) +#include "npy_1_7_deprecated_api.h" +#endif +/* + * There is no file npy_1_8_deprecated_api.h since there are no additional + * deprecated API features in NumPy 1.8. + * + * Note to maintainers: insert code like the following in future NumPy + * versions. + * + * #if !defined(NPY_NO_DEPRECATED_API) || \ + * (NPY_NO_DEPRECATED_API < NPY_1_9_API_VERSION) + * #include "npy_1_9_deprecated_api.h" + * #endif + */ +#undef NPY_DEPRECATED_INCLUDES + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_NDARRAYTYPES_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/noprefix.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/noprefix.h new file mode 100644 index 0000000..2c0ce14 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/noprefix.h @@ -0,0 +1,212 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_NOPREFIX_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_NOPREFIX_H_ + +/* + * You can directly include noprefix.h as a backward + * compatibility measure + */ +#ifndef NPY_NO_PREFIX +#include "ndarrayobject.h" +#include "npy_interrupt.h" +#endif + +#define SIGSETJMP NPY_SIGSETJMP +#define SIGLONGJMP NPY_SIGLONGJMP +#define SIGJMP_BUF NPY_SIGJMP_BUF + +#define MAX_DIMS NPY_MAXDIMS + +#define longlong npy_longlong +#define ulonglong npy_ulonglong +#define Bool npy_bool +#define longdouble npy_longdouble +#define byte npy_byte + +#ifndef _BSD_SOURCE +#define ushort npy_ushort +#define uint npy_uint +#define ulong npy_ulong +#endif + +#define ubyte npy_ubyte +#define ushort npy_ushort +#define uint npy_uint +#define ulong npy_ulong +#define cfloat npy_cfloat +#define cdouble npy_cdouble +#define clongdouble npy_clongdouble +#define Int8 npy_int8 +#define UInt8 npy_uint8 +#define Int16 npy_int16 +#define UInt16 npy_uint16 +#define Int32 npy_int32 +#define UInt32 npy_uint32 +#define Int64 npy_int64 +#define UInt64 npy_uint64 +#define Int128 npy_int128 +#define UInt128 npy_uint128 +#define Int256 npy_int256 +#define UInt256 npy_uint256 +#define Float16 npy_float16 +#define Complex32 npy_complex32 +#define Float32 npy_float32 +#define Complex64 npy_complex64 +#define Float64 npy_float64 +#define Complex128 npy_complex128 +#define Float80 npy_float80 +#define Complex160 npy_complex160 +#define Float96 npy_float96 +#define Complex192 npy_complex192 +#define Float128 npy_float128 +#define Complex256 npy_complex256 +#define intp npy_intp +#define uintp npy_uintp +#define datetime npy_datetime +#define timedelta npy_timedelta + +#define SIZEOF_LONGLONG NPY_SIZEOF_LONGLONG +#define SIZEOF_INTP NPY_SIZEOF_INTP +#define SIZEOF_UINTP NPY_SIZEOF_UINTP +#define SIZEOF_HALF NPY_SIZEOF_HALF +#define SIZEOF_LONGDOUBLE NPY_SIZEOF_LONGDOUBLE +#define SIZEOF_DATETIME NPY_SIZEOF_DATETIME +#define SIZEOF_TIMEDELTA NPY_SIZEOF_TIMEDELTA + +#define LONGLONG_FMT NPY_LONGLONG_FMT +#define ULONGLONG_FMT NPY_ULONGLONG_FMT +#define LONGLONG_SUFFIX NPY_LONGLONG_SUFFIX +#define ULONGLONG_SUFFIX NPY_ULONGLONG_SUFFIX + +#define MAX_INT8 127 +#define MIN_INT8 -128 +#define MAX_UINT8 255 +#define MAX_INT16 32767 +#define MIN_INT16 -32768 +#define MAX_UINT16 65535 +#define MAX_INT32 2147483647 +#define MIN_INT32 (-MAX_INT32 - 1) +#define MAX_UINT32 4294967295U +#define MAX_INT64 LONGLONG_SUFFIX(9223372036854775807) +#define MIN_INT64 (-MAX_INT64 - LONGLONG_SUFFIX(1)) +#define MAX_UINT64 ULONGLONG_SUFFIX(18446744073709551615) +#define MAX_INT128 LONGLONG_SUFFIX(85070591730234615865843651857942052864) +#define MIN_INT128 (-MAX_INT128 - LONGLONG_SUFFIX(1)) +#define MAX_UINT128 ULONGLONG_SUFFIX(170141183460469231731687303715884105728) +#define MAX_INT256 LONGLONG_SUFFIX(57896044618658097711785492504343953926634992332820282019728792003956564819967) +#define MIN_INT256 (-MAX_INT256 - LONGLONG_SUFFIX(1)) +#define MAX_UINT256 ULONGLONG_SUFFIX(115792089237316195423570985008687907853269984665640564039457584007913129639935) + +#define MAX_BYTE NPY_MAX_BYTE +#define MIN_BYTE NPY_MIN_BYTE +#define MAX_UBYTE NPY_MAX_UBYTE +#define MAX_SHORT NPY_MAX_SHORT +#define MIN_SHORT NPY_MIN_SHORT +#define MAX_USHORT NPY_MAX_USHORT +#define MAX_INT NPY_MAX_INT +#define MIN_INT NPY_MIN_INT +#define MAX_UINT NPY_MAX_UINT +#define MAX_LONG NPY_MAX_LONG +#define MIN_LONG NPY_MIN_LONG +#define MAX_ULONG NPY_MAX_ULONG +#define MAX_LONGLONG NPY_MAX_LONGLONG +#define MIN_LONGLONG NPY_MIN_LONGLONG +#define MAX_ULONGLONG NPY_MAX_ULONGLONG +#define MIN_DATETIME NPY_MIN_DATETIME +#define MAX_DATETIME NPY_MAX_DATETIME +#define MIN_TIMEDELTA NPY_MIN_TIMEDELTA +#define MAX_TIMEDELTA NPY_MAX_TIMEDELTA + +#define BITSOF_BOOL NPY_BITSOF_BOOL +#define BITSOF_CHAR NPY_BITSOF_CHAR +#define BITSOF_SHORT NPY_BITSOF_SHORT +#define BITSOF_INT NPY_BITSOF_INT +#define BITSOF_LONG NPY_BITSOF_LONG +#define BITSOF_LONGLONG NPY_BITSOF_LONGLONG +#define BITSOF_HALF NPY_BITSOF_HALF +#define BITSOF_FLOAT NPY_BITSOF_FLOAT +#define BITSOF_DOUBLE NPY_BITSOF_DOUBLE +#define BITSOF_LONGDOUBLE NPY_BITSOF_LONGDOUBLE +#define BITSOF_DATETIME NPY_BITSOF_DATETIME +#define BITSOF_TIMEDELTA NPY_BITSOF_TIMEDELTA + +#define _pya_malloc PyArray_malloc +#define _pya_free PyArray_free +#define _pya_realloc PyArray_realloc + +#define BEGIN_THREADS_DEF NPY_BEGIN_THREADS_DEF +#define BEGIN_THREADS NPY_BEGIN_THREADS +#define END_THREADS NPY_END_THREADS +#define ALLOW_C_API_DEF NPY_ALLOW_C_API_DEF +#define ALLOW_C_API NPY_ALLOW_C_API +#define DISABLE_C_API NPY_DISABLE_C_API + +#define PY_FAIL NPY_FAIL +#define PY_SUCCEED NPY_SUCCEED + +#ifndef TRUE +#define TRUE NPY_TRUE +#endif + +#ifndef FALSE +#define FALSE NPY_FALSE +#endif + +#define LONGDOUBLE_FMT NPY_LONGDOUBLE_FMT + +#define CONTIGUOUS NPY_CONTIGUOUS +#define C_CONTIGUOUS NPY_C_CONTIGUOUS +#define FORTRAN NPY_FORTRAN +#define F_CONTIGUOUS NPY_F_CONTIGUOUS +#define OWNDATA NPY_OWNDATA +#define FORCECAST NPY_FORCECAST +#define ENSURECOPY NPY_ENSURECOPY +#define ENSUREARRAY NPY_ENSUREARRAY +#define ELEMENTSTRIDES NPY_ELEMENTSTRIDES +#define ALIGNED NPY_ALIGNED +#define NOTSWAPPED NPY_NOTSWAPPED +#define WRITEABLE NPY_WRITEABLE +#define UPDATEIFCOPY NPY_UPDATEIFCOPY +#define WRITEBACKIFCOPY NPY_ARRAY_WRITEBACKIFCOPY +#define ARR_HAS_DESCR NPY_ARR_HAS_DESCR +#define BEHAVED NPY_BEHAVED +#define BEHAVED_NS NPY_BEHAVED_NS +#define CARRAY NPY_CARRAY +#define CARRAY_RO NPY_CARRAY_RO +#define FARRAY NPY_FARRAY +#define FARRAY_RO NPY_FARRAY_RO +#define DEFAULT NPY_DEFAULT +#define IN_ARRAY NPY_IN_ARRAY +#define OUT_ARRAY NPY_OUT_ARRAY +#define INOUT_ARRAY NPY_INOUT_ARRAY +#define IN_FARRAY NPY_IN_FARRAY +#define OUT_FARRAY NPY_OUT_FARRAY +#define INOUT_FARRAY NPY_INOUT_FARRAY +#define UPDATE_ALL NPY_UPDATE_ALL + +#define OWN_DATA NPY_OWNDATA +#define BEHAVED_FLAGS NPY_BEHAVED +#define BEHAVED_FLAGS_NS NPY_BEHAVED_NS +#define CARRAY_FLAGS_RO NPY_CARRAY_RO +#define CARRAY_FLAGS NPY_CARRAY +#define FARRAY_FLAGS NPY_FARRAY +#define FARRAY_FLAGS_RO NPY_FARRAY_RO +#define DEFAULT_FLAGS NPY_DEFAULT +#define UPDATE_ALL_FLAGS NPY_UPDATE_ALL_FLAGS + +#ifndef MIN +#define MIN PyArray_MIN +#endif +#ifndef MAX +#define MAX PyArray_MAX +#endif +#define MAX_INTP NPY_MAX_INTP +#define MIN_INTP NPY_MIN_INTP +#define MAX_UINTP NPY_MAX_UINTP +#define INTP_FMT NPY_INTP_FMT + +#ifndef PYPY_VERSION +#define REFCOUNT PyArray_REFCOUNT +#define MAX_ELSIZE NPY_MAX_ELSIZE +#endif + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_NOPREFIX_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h new file mode 100644 index 0000000..4fd4015 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h @@ -0,0 +1,125 @@ +#ifndef NPY_DEPRECATED_INCLUDES +#error "Should never include npy_*_*_deprecated_api directly." +#endif + +#ifndef NUMPY_CORE_INCLUDE_NUMPY_NPY_1_7_DEPRECATED_API_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_NPY_1_7_DEPRECATED_API_H_ + +/* Emit a warning if the user did not specifically request the old API */ +#ifndef NPY_NO_DEPRECATED_API +#if defined(_WIN32) +#define _WARN___STR2__(x) #x +#define _WARN___STR1__(x) _WARN___STR2__(x) +#define _WARN___LOC__ __FILE__ "(" _WARN___STR1__(__LINE__) ") : Warning Msg: " +#pragma message(_WARN___LOC__"Using deprecated NumPy API, disable it with " \ + "#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION") +#else +#warning "Using deprecated NumPy API, disable it with " \ + "#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" +#endif +#endif + +/* + * This header exists to collect all dangerous/deprecated NumPy API + * as of NumPy 1.7. + * + * This is an attempt to remove bad API, the proliferation of macros, + * and namespace pollution currently produced by the NumPy headers. + */ + +/* These array flags are deprecated as of NumPy 1.7 */ +#define NPY_CONTIGUOUS NPY_ARRAY_C_CONTIGUOUS +#define NPY_FORTRAN NPY_ARRAY_F_CONTIGUOUS + +/* + * The consistent NPY_ARRAY_* names which don't pollute the NPY_* + * namespace were added in NumPy 1.7. + * + * These versions of the carray flags are deprecated, but + * probably should only be removed after two releases instead of one. + */ +#define NPY_C_CONTIGUOUS NPY_ARRAY_C_CONTIGUOUS +#define NPY_F_CONTIGUOUS NPY_ARRAY_F_CONTIGUOUS +#define NPY_OWNDATA NPY_ARRAY_OWNDATA +#define NPY_FORCECAST NPY_ARRAY_FORCECAST +#define NPY_ENSURECOPY NPY_ARRAY_ENSURECOPY +#define NPY_ENSUREARRAY NPY_ARRAY_ENSUREARRAY +#define NPY_ELEMENTSTRIDES NPY_ARRAY_ELEMENTSTRIDES +#define NPY_ALIGNED NPY_ARRAY_ALIGNED +#define NPY_NOTSWAPPED NPY_ARRAY_NOTSWAPPED +#define NPY_WRITEABLE NPY_ARRAY_WRITEABLE +#define NPY_UPDATEIFCOPY NPY_ARRAY_UPDATEIFCOPY +#define NPY_BEHAVED NPY_ARRAY_BEHAVED +#define NPY_BEHAVED_NS NPY_ARRAY_BEHAVED_NS +#define NPY_CARRAY NPY_ARRAY_CARRAY +#define NPY_CARRAY_RO NPY_ARRAY_CARRAY_RO +#define NPY_FARRAY NPY_ARRAY_FARRAY +#define NPY_FARRAY_RO NPY_ARRAY_FARRAY_RO +#define NPY_DEFAULT NPY_ARRAY_DEFAULT +#define NPY_IN_ARRAY NPY_ARRAY_IN_ARRAY +#define NPY_OUT_ARRAY NPY_ARRAY_OUT_ARRAY +#define NPY_INOUT_ARRAY NPY_ARRAY_INOUT_ARRAY +#define NPY_IN_FARRAY NPY_ARRAY_IN_FARRAY +#define NPY_OUT_FARRAY NPY_ARRAY_OUT_FARRAY +#define NPY_INOUT_FARRAY NPY_ARRAY_INOUT_FARRAY +#define NPY_UPDATE_ALL NPY_ARRAY_UPDATE_ALL + +/* This way of accessing the default type is deprecated as of NumPy 1.7 */ +#define PyArray_DEFAULT NPY_DEFAULT_TYPE + +/* These DATETIME bits aren't used internally */ +#define PyDataType_GetDatetimeMetaData(descr) \ + ((descr->metadata == NULL) ? NULL : \ + ((PyArray_DatetimeMetaData *)(PyCapsule_GetPointer( \ + PyDict_GetItemString( \ + descr->metadata, NPY_METADATA_DTSTR), NULL)))) + +/* + * Deprecated as of NumPy 1.7, this kind of shortcut doesn't + * belong in the public API. + */ +#define NPY_AO PyArrayObject + +/* + * Deprecated as of NumPy 1.7, an all-lowercase macro doesn't + * belong in the public API. + */ +#define fortran fortran_ + +/* + * Deprecated as of NumPy 1.7, as it is a namespace-polluting + * macro. + */ +#define FORTRAN_IF PyArray_FORTRAN_IF + +/* Deprecated as of NumPy 1.7, datetime64 uses c_metadata instead */ +#define NPY_METADATA_DTSTR "__timeunit__" + +/* + * Deprecated as of NumPy 1.7. + * The reasoning: + * - These are for datetime, but there's no datetime "namespace". + * - They just turn NPY_STR_ into "", which is just + * making something simple be indirected. + */ +#define NPY_STR_Y "Y" +#define NPY_STR_M "M" +#define NPY_STR_W "W" +#define NPY_STR_D "D" +#define NPY_STR_h "h" +#define NPY_STR_m "m" +#define NPY_STR_s "s" +#define NPY_STR_ms "ms" +#define NPY_STR_us "us" +#define NPY_STR_ns "ns" +#define NPY_STR_ps "ps" +#define NPY_STR_fs "fs" +#define NPY_STR_as "as" + +/* + * The macros in old_defines.h are Deprecated as of NumPy 1.7 and will be + * removed in the next major release. + */ +#include "old_defines.h" + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_NPY_1_7_DEPRECATED_API_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_3kcompat.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_3kcompat.h new file mode 100644 index 0000000..22c103e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_3kcompat.h @@ -0,0 +1,595 @@ +/* + * This is a convenience header file providing compatibility utilities + * for supporting Python 2 and Python 3 in the same code base. + * + * If you want to use this for your own projects, it's recommended to make a + * copy of it. Although the stuff below is unlikely to change, we don't provide + * strong backwards compatibility guarantees at the moment. + */ + +#ifndef NUMPY_CORE_INCLUDE_NUMPY_NPY_3KCOMPAT_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_NPY_3KCOMPAT_H_ + +#include +#include + +#ifndef NPY_PY3K +#define NPY_PY3K 1 +#endif + +#include "numpy/npy_common.h" +#include "numpy/ndarrayobject.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * PyInt -> PyLong + */ + + +/* + * This is a renamed copy of the Python non-limited API function _PyLong_AsInt. It is + * included here because it is missing from the PyPy API. It completes the PyLong_As* + * group of functions and can be useful in replacing PyInt_Check. + */ +static NPY_INLINE int +Npy__PyLong_AsInt(PyObject *obj) +{ + int overflow; + long result = PyLong_AsLongAndOverflow(obj, &overflow); + + /* INT_MAX and INT_MIN are defined in Python.h */ + if (overflow || result > INT_MAX || result < INT_MIN) { + /* XXX: could be cute and give a different + message for overflow == -1 */ + PyErr_SetString(PyExc_OverflowError, + "Python int too large to convert to C int"); + return -1; + } + return (int)result; +} + + +#if defined(NPY_PY3K) +/* Return True only if the long fits in a C long */ +static NPY_INLINE int PyInt_Check(PyObject *op) { + int overflow = 0; + if (!PyLong_Check(op)) { + return 0; + } + PyLong_AsLongAndOverflow(op, &overflow); + return (overflow == 0); +} + + +#define PyInt_FromLong PyLong_FromLong +#define PyInt_AsLong PyLong_AsLong +#define PyInt_AS_LONG PyLong_AsLong +#define PyInt_AsSsize_t PyLong_AsSsize_t +#define PyNumber_Int PyNumber_Long + +/* NOTE: + * + * Since the PyLong type is very different from the fixed-range PyInt, + * we don't define PyInt_Type -> PyLong_Type. + */ +#endif /* NPY_PY3K */ + +/* Py3 changes PySlice_GetIndicesEx' first argument's type to PyObject* */ +#ifdef NPY_PY3K +# define NpySlice_GetIndicesEx PySlice_GetIndicesEx +#else +# define NpySlice_GetIndicesEx(op, nop, start, end, step, slicelength) \ + PySlice_GetIndicesEx((PySliceObject *)op, nop, start, end, step, slicelength) +#endif + +#if PY_VERSION_HEX < 0x030900a4 + /* Introduced in https://github.com/python/cpython/commit/d2ec81a8c99796b51fb8c49b77a7fe369863226f */ + #define Py_SET_TYPE(obj, type) ((Py_TYPE(obj) = (type)), (void)0) + /* Introduced in https://github.com/python/cpython/commit/b10dc3e7a11fcdb97e285882eba6da92594f90f9 */ + #define Py_SET_SIZE(obj, size) ((Py_SIZE(obj) = (size)), (void)0) + /* Introduced in https://github.com/python/cpython/commit/c86a11221df7e37da389f9c6ce6e47ea22dc44ff */ + #define Py_SET_REFCNT(obj, refcnt) ((Py_REFCNT(obj) = (refcnt)), (void)0) +#endif + + +#define Npy_EnterRecursiveCall(x) Py_EnterRecursiveCall(x) + +/* Py_SETREF was added in 3.5.2, and only if Py_LIMITED_API is absent */ +#if PY_VERSION_HEX < 0x03050200 + #define Py_SETREF(op, op2) \ + do { \ + PyObject *_py_tmp = (PyObject *)(op); \ + (op) = (op2); \ + Py_DECREF(_py_tmp); \ + } while (0) +#endif + +/* introduced in https://github.com/python/cpython/commit/a24107b04c1277e3c1105f98aff5bfa3a98b33a0 */ +#if PY_VERSION_HEX < 0x030800A3 + static NPY_INLINE PyObject * + _PyDict_GetItemStringWithError(PyObject *v, const char *key) + { + PyObject *kv, *rv; + kv = PyUnicode_FromString(key); + if (kv == NULL) { + return NULL; + } + rv = PyDict_GetItemWithError(v, kv); + Py_DECREF(kv); + return rv; + } +#endif + +/* + * PyString -> PyBytes + */ + +#if defined(NPY_PY3K) + +#define PyString_Type PyBytes_Type +#define PyString_Check PyBytes_Check +#define PyStringObject PyBytesObject +#define PyString_FromString PyBytes_FromString +#define PyString_FromStringAndSize PyBytes_FromStringAndSize +#define PyString_AS_STRING PyBytes_AS_STRING +#define PyString_AsStringAndSize PyBytes_AsStringAndSize +#define PyString_FromFormat PyBytes_FromFormat +#define PyString_Concat PyBytes_Concat +#define PyString_ConcatAndDel PyBytes_ConcatAndDel +#define PyString_AsString PyBytes_AsString +#define PyString_GET_SIZE PyBytes_GET_SIZE +#define PyString_Size PyBytes_Size + +#define PyUString_Type PyUnicode_Type +#define PyUString_Check PyUnicode_Check +#define PyUStringObject PyUnicodeObject +#define PyUString_FromString PyUnicode_FromString +#define PyUString_FromStringAndSize PyUnicode_FromStringAndSize +#define PyUString_FromFormat PyUnicode_FromFormat +#define PyUString_Concat PyUnicode_Concat2 +#define PyUString_ConcatAndDel PyUnicode_ConcatAndDel +#define PyUString_GET_SIZE PyUnicode_GET_SIZE +#define PyUString_Size PyUnicode_Size +#define PyUString_InternFromString PyUnicode_InternFromString +#define PyUString_Format PyUnicode_Format + +#define PyBaseString_Check(obj) (PyUnicode_Check(obj)) + +#else + +#define PyBytes_Type PyString_Type +#define PyBytes_Check PyString_Check +#define PyBytesObject PyStringObject +#define PyBytes_FromString PyString_FromString +#define PyBytes_FromStringAndSize PyString_FromStringAndSize +#define PyBytes_AS_STRING PyString_AS_STRING +#define PyBytes_AsStringAndSize PyString_AsStringAndSize +#define PyBytes_FromFormat PyString_FromFormat +#define PyBytes_Concat PyString_Concat +#define PyBytes_ConcatAndDel PyString_ConcatAndDel +#define PyBytes_AsString PyString_AsString +#define PyBytes_GET_SIZE PyString_GET_SIZE +#define PyBytes_Size PyString_Size + +#define PyUString_Type PyString_Type +#define PyUString_Check PyString_Check +#define PyUStringObject PyStringObject +#define PyUString_FromString PyString_FromString +#define PyUString_FromStringAndSize PyString_FromStringAndSize +#define PyUString_FromFormat PyString_FromFormat +#define PyUString_Concat PyString_Concat +#define PyUString_ConcatAndDel PyString_ConcatAndDel +#define PyUString_GET_SIZE PyString_GET_SIZE +#define PyUString_Size PyString_Size +#define PyUString_InternFromString PyString_InternFromString +#define PyUString_Format PyString_Format + +#define PyBaseString_Check(obj) (PyBytes_Check(obj) || PyUnicode_Check(obj)) + +#endif /* NPY_PY3K */ + + +static NPY_INLINE void +PyUnicode_ConcatAndDel(PyObject **left, PyObject *right) +{ + Py_SETREF(*left, PyUnicode_Concat(*left, right)); + Py_DECREF(right); +} + +static NPY_INLINE void +PyUnicode_Concat2(PyObject **left, PyObject *right) +{ + Py_SETREF(*left, PyUnicode_Concat(*left, right)); +} + +/* + * PyFile_* compatibility + */ + +/* + * Get a FILE* handle to the file represented by the Python object + */ +static NPY_INLINE FILE* +npy_PyFile_Dup2(PyObject *file, char *mode, npy_off_t *orig_pos) +{ + int fd, fd2, unbuf; + Py_ssize_t fd2_tmp; + PyObject *ret, *os, *io, *io_raw; + npy_off_t pos; + FILE *handle; + + /* For Python 2 PyFileObject, use PyFile_AsFile */ +#if !defined(NPY_PY3K) + if (PyFile_Check(file)) { + return PyFile_AsFile(file); + } +#endif + + /* Flush first to ensure things end up in the file in the correct order */ + ret = PyObject_CallMethod(file, "flush", ""); + if (ret == NULL) { + return NULL; + } + Py_DECREF(ret); + fd = PyObject_AsFileDescriptor(file); + if (fd == -1) { + return NULL; + } + + /* + * The handle needs to be dup'd because we have to call fclose + * at the end + */ + os = PyImport_ImportModule("os"); + if (os == NULL) { + return NULL; + } + ret = PyObject_CallMethod(os, "dup", "i", fd); + Py_DECREF(os); + if (ret == NULL) { + return NULL; + } + fd2_tmp = PyNumber_AsSsize_t(ret, PyExc_IOError); + Py_DECREF(ret); + if (fd2_tmp == -1 && PyErr_Occurred()) { + return NULL; + } + if (fd2_tmp < INT_MIN || fd2_tmp > INT_MAX) { + PyErr_SetString(PyExc_IOError, + "Getting an 'int' from os.dup() failed"); + return NULL; + } + fd2 = (int)fd2_tmp; + + /* Convert to FILE* handle */ +#ifdef _WIN32 + handle = _fdopen(fd2, mode); +#else + handle = fdopen(fd2, mode); +#endif + if (handle == NULL) { + PyErr_SetString(PyExc_IOError, + "Getting a FILE* from a Python file object failed"); + return NULL; + } + + /* Record the original raw file handle position */ + *orig_pos = npy_ftell(handle); + if (*orig_pos == -1) { + /* The io module is needed to determine if buffering is used */ + io = PyImport_ImportModule("io"); + if (io == NULL) { + fclose(handle); + return NULL; + } + /* File object instances of RawIOBase are unbuffered */ + io_raw = PyObject_GetAttrString(io, "RawIOBase"); + Py_DECREF(io); + if (io_raw == NULL) { + fclose(handle); + return NULL; + } + unbuf = PyObject_IsInstance(file, io_raw); + Py_DECREF(io_raw); + if (unbuf == 1) { + /* Succeed if the IO is unbuffered */ + return handle; + } + else { + PyErr_SetString(PyExc_IOError, "obtaining file position failed"); + fclose(handle); + return NULL; + } + } + + /* Seek raw handle to the Python-side position */ + ret = PyObject_CallMethod(file, "tell", ""); + if (ret == NULL) { + fclose(handle); + return NULL; + } + pos = PyLong_AsLongLong(ret); + Py_DECREF(ret); + if (PyErr_Occurred()) { + fclose(handle); + return NULL; + } + if (npy_fseek(handle, pos, SEEK_SET) == -1) { + PyErr_SetString(PyExc_IOError, "seeking file failed"); + fclose(handle); + return NULL; + } + return handle; +} + +/* + * Close the dup-ed file handle, and seek the Python one to the current position + */ +static NPY_INLINE int +npy_PyFile_DupClose2(PyObject *file, FILE* handle, npy_off_t orig_pos) +{ + int fd, unbuf; + PyObject *ret, *io, *io_raw; + npy_off_t position; + + /* For Python 2 PyFileObject, do nothing */ +#if !defined(NPY_PY3K) + if (PyFile_Check(file)) { + return 0; + } +#endif + + position = npy_ftell(handle); + + /* Close the FILE* handle */ + fclose(handle); + + /* + * Restore original file handle position, in order to not confuse + * Python-side data structures + */ + fd = PyObject_AsFileDescriptor(file); + if (fd == -1) { + return -1; + } + + if (npy_lseek(fd, orig_pos, SEEK_SET) == -1) { + + /* The io module is needed to determine if buffering is used */ + io = PyImport_ImportModule("io"); + if (io == NULL) { + return -1; + } + /* File object instances of RawIOBase are unbuffered */ + io_raw = PyObject_GetAttrString(io, "RawIOBase"); + Py_DECREF(io); + if (io_raw == NULL) { + return -1; + } + unbuf = PyObject_IsInstance(file, io_raw); + Py_DECREF(io_raw); + if (unbuf == 1) { + /* Succeed if the IO is unbuffered */ + return 0; + } + else { + PyErr_SetString(PyExc_IOError, "seeking file failed"); + return -1; + } + } + + if (position == -1) { + PyErr_SetString(PyExc_IOError, "obtaining file position failed"); + return -1; + } + + /* Seek Python-side handle to the FILE* handle position */ + ret = PyObject_CallMethod(file, "seek", NPY_OFF_T_PYFMT "i", position, 0); + if (ret == NULL) { + return -1; + } + Py_DECREF(ret); + return 0; +} + +static NPY_INLINE int +npy_PyFile_Check(PyObject *file) +{ + int fd; + /* For Python 2, check if it is a PyFileObject */ +#if !defined(NPY_PY3K) + if (PyFile_Check(file)) { + return 1; + } +#endif + fd = PyObject_AsFileDescriptor(file); + if (fd == -1) { + PyErr_Clear(); + return 0; + } + return 1; +} + +static NPY_INLINE PyObject* +npy_PyFile_OpenFile(PyObject *filename, const char *mode) +{ + PyObject *open; + open = PyDict_GetItemString(PyEval_GetBuiltins(), "open"); + if (open == NULL) { + return NULL; + } + return PyObject_CallFunction(open, "Os", filename, mode); +} + +static NPY_INLINE int +npy_PyFile_CloseFile(PyObject *file) +{ + PyObject *ret; + + ret = PyObject_CallMethod(file, "close", NULL); + if (ret == NULL) { + return -1; + } + Py_DECREF(ret); + return 0; +} + + +/* This is a copy of _PyErr_ChainExceptions + */ +static NPY_INLINE void +npy_PyErr_ChainExceptions(PyObject *exc, PyObject *val, PyObject *tb) +{ + if (exc == NULL) + return; + + if (PyErr_Occurred()) { + /* only py3 supports this anyway */ + #ifdef NPY_PY3K + PyObject *exc2, *val2, *tb2; + PyErr_Fetch(&exc2, &val2, &tb2); + PyErr_NormalizeException(&exc, &val, &tb); + if (tb != NULL) { + PyException_SetTraceback(val, tb); + Py_DECREF(tb); + } + Py_DECREF(exc); + PyErr_NormalizeException(&exc2, &val2, &tb2); + PyException_SetContext(val2, val); + PyErr_Restore(exc2, val2, tb2); + #endif + } + else { + PyErr_Restore(exc, val, tb); + } +} + + +/* This is a copy of _PyErr_ChainExceptions, with: + * - a minimal implementation for python 2 + * - __cause__ used instead of __context__ + */ +static NPY_INLINE void +npy_PyErr_ChainExceptionsCause(PyObject *exc, PyObject *val, PyObject *tb) +{ + if (exc == NULL) + return; + + if (PyErr_Occurred()) { + /* only py3 supports this anyway */ + #ifdef NPY_PY3K + PyObject *exc2, *val2, *tb2; + PyErr_Fetch(&exc2, &val2, &tb2); + PyErr_NormalizeException(&exc, &val, &tb); + if (tb != NULL) { + PyException_SetTraceback(val, tb); + Py_DECREF(tb); + } + Py_DECREF(exc); + PyErr_NormalizeException(&exc2, &val2, &tb2); + PyException_SetCause(val2, val); + PyErr_Restore(exc2, val2, tb2); + #endif + } + else { + PyErr_Restore(exc, val, tb); + } +} + +/* + * PyObject_Cmp + */ +#if defined(NPY_PY3K) +static NPY_INLINE int +PyObject_Cmp(PyObject *i1, PyObject *i2, int *cmp) +{ + int v; + v = PyObject_RichCompareBool(i1, i2, Py_LT); + if (v == 1) { + *cmp = -1; + return 1; + } + else if (v == -1) { + return -1; + } + + v = PyObject_RichCompareBool(i1, i2, Py_GT); + if (v == 1) { + *cmp = 1; + return 1; + } + else if (v == -1) { + return -1; + } + + v = PyObject_RichCompareBool(i1, i2, Py_EQ); + if (v == 1) { + *cmp = 0; + return 1; + } + else { + *cmp = 0; + return -1; + } +} +#endif + +/* + * PyCObject functions adapted to PyCapsules. + * + * The main job here is to get rid of the improved error handling + * of PyCapsules. It's a shame... + */ +static NPY_INLINE PyObject * +NpyCapsule_FromVoidPtr(void *ptr, void (*dtor)(PyObject *)) +{ + PyObject *ret = PyCapsule_New(ptr, NULL, dtor); + if (ret == NULL) { + PyErr_Clear(); + } + return ret; +} + +static NPY_INLINE PyObject * +NpyCapsule_FromVoidPtrAndDesc(void *ptr, void* context, void (*dtor)(PyObject *)) +{ + PyObject *ret = NpyCapsule_FromVoidPtr(ptr, dtor); + if (ret != NULL && PyCapsule_SetContext(ret, context) != 0) { + PyErr_Clear(); + Py_DECREF(ret); + ret = NULL; + } + return ret; +} + +static NPY_INLINE void * +NpyCapsule_AsVoidPtr(PyObject *obj) +{ + void *ret = PyCapsule_GetPointer(obj, NULL); + if (ret == NULL) { + PyErr_Clear(); + } + return ret; +} + +static NPY_INLINE void * +NpyCapsule_GetDesc(PyObject *obj) +{ + return PyCapsule_GetContext(obj); +} + +static NPY_INLINE int +NpyCapsule_Check(PyObject *ptr) +{ + return PyCapsule_CheckExact(ptr); +} + +#ifdef __cplusplus +} +#endif + + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_NPY_3KCOMPAT_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_common.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_common.h new file mode 100644 index 0000000..1d6234e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_common.h @@ -0,0 +1,1121 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_NPY_COMMON_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_NPY_COMMON_H_ + +/* need Python.h for npy_intp, npy_uintp */ +#include + +/* numpconfig.h is auto-generated */ +#include "numpyconfig.h" +#ifdef HAVE_NPY_CONFIG_H +#include +#endif + +/* + * using static inline modifiers when defining npy_math functions + * allows the compiler to make optimizations when possible + */ +#ifndef NPY_INLINE_MATH +#if defined(NPY_INTERNAL_BUILD) && NPY_INTERNAL_BUILD + #define NPY_INLINE_MATH 1 +#else + #define NPY_INLINE_MATH 0 +#endif +#endif + +/* + * gcc does not unroll even with -O3 + * use with care, unrolling on modern cpus rarely speeds things up + */ +#ifdef HAVE_ATTRIBUTE_OPTIMIZE_UNROLL_LOOPS +#define NPY_GCC_UNROLL_LOOPS \ + __attribute__((optimize("unroll-loops"))) +#else +#define NPY_GCC_UNROLL_LOOPS +#endif + +/* highest gcc optimization level, enabled autovectorizer */ +#ifdef HAVE_ATTRIBUTE_OPTIMIZE_OPT_3 +#define NPY_GCC_OPT_3 __attribute__((optimize("O3"))) +#else +#define NPY_GCC_OPT_3 +#endif + +/* compile target attributes */ +#if defined HAVE_ATTRIBUTE_TARGET_AVX && defined HAVE_LINK_AVX +#define NPY_GCC_TARGET_AVX __attribute__((target("avx"))) +#else +#define NPY_GCC_TARGET_AVX +#endif + +#if defined HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS +#define HAVE_ATTRIBUTE_TARGET_FMA +#define NPY_GCC_TARGET_FMA __attribute__((target("avx2,fma"))) +#endif + +#if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined HAVE_LINK_AVX2 +#define NPY_GCC_TARGET_AVX2 __attribute__((target("avx2"))) +#else +#define NPY_GCC_TARGET_AVX2 +#endif + +#if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined HAVE_LINK_AVX512F +#define NPY_GCC_TARGET_AVX512F __attribute__((target("avx512f"))) +#elif defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS +#define NPY_GCC_TARGET_AVX512F __attribute__((target("avx512f"))) +#else +#define NPY_GCC_TARGET_AVX512F +#endif + +#if defined HAVE_ATTRIBUTE_TARGET_AVX512_SKX && defined HAVE_LINK_AVX512_SKX +#define NPY_GCC_TARGET_AVX512_SKX __attribute__((target("avx512f,avx512dq,avx512vl,avx512bw,avx512cd"))) +#elif defined HAVE_ATTRIBUTE_TARGET_AVX512_SKX_WITH_INTRINSICS +#define NPY_GCC_TARGET_AVX512_SKX __attribute__((target("avx512f,avx512dq,avx512vl,avx512bw,avx512cd"))) +#else +#define NPY_GCC_TARGET_AVX512_SKX +#endif +/* + * mark an argument (starting from 1) that must not be NULL and is not checked + * DO NOT USE IF FUNCTION CHECKS FOR NULL!! the compiler will remove the check + */ +#ifdef HAVE_ATTRIBUTE_NONNULL +#define NPY_GCC_NONNULL(n) __attribute__((nonnull(n))) +#else +#define NPY_GCC_NONNULL(n) +#endif + +#if defined HAVE_XMMINTRIN_H && defined HAVE__MM_LOAD_PS +#define NPY_HAVE_SSE_INTRINSICS +#endif + +#if defined HAVE_EMMINTRIN_H && defined HAVE__MM_LOAD_PD +#define NPY_HAVE_SSE2_INTRINSICS +#endif + +#if defined HAVE_IMMINTRIN_H && defined HAVE_LINK_AVX2 +#define NPY_HAVE_AVX2_INTRINSICS +#endif + +#if defined HAVE_IMMINTRIN_H && defined HAVE_LINK_AVX512F +#define NPY_HAVE_AVX512F_INTRINSICS +#endif +/* + * give a hint to the compiler which branch is more likely or unlikely + * to occur, e.g. rare error cases: + * + * if (NPY_UNLIKELY(failure == 0)) + * return NULL; + * + * the double !! is to cast the expression (e.g. NULL) to a boolean required by + * the intrinsic + */ +#ifdef HAVE___BUILTIN_EXPECT +#define NPY_LIKELY(x) __builtin_expect(!!(x), 1) +#define NPY_UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +#define NPY_LIKELY(x) (x) +#define NPY_UNLIKELY(x) (x) +#endif + +#ifdef HAVE___BUILTIN_PREFETCH +/* unlike _mm_prefetch also works on non-x86 */ +#define NPY_PREFETCH(x, rw, loc) __builtin_prefetch((x), (rw), (loc)) +#else +#ifdef HAVE__MM_PREFETCH +/* _MM_HINT_ET[01] (rw = 1) unsupported, only available in gcc >= 4.9 */ +#define NPY_PREFETCH(x, rw, loc) _mm_prefetch((x), loc == 0 ? _MM_HINT_NTA : \ + (loc == 1 ? _MM_HINT_T2 : \ + (loc == 2 ? _MM_HINT_T1 : \ + (loc == 3 ? _MM_HINT_T0 : -1)))) +#else +#define NPY_PREFETCH(x, rw,loc) +#endif +#endif + +#if defined(_MSC_VER) + #define NPY_INLINE __inline +#elif defined(__GNUC__) + #if defined(__STRICT_ANSI__) + #define NPY_INLINE __inline__ + #else + #define NPY_INLINE inline + #endif +#else + #define NPY_INLINE +#endif + +#ifdef _MSC_VER + #define NPY_FINLINE static __forceinline +#elif defined(__GNUC__) + #define NPY_FINLINE static NPY_INLINE __attribute__((always_inline)) +#else + #define NPY_FINLINE static +#endif + +#ifdef HAVE___THREAD + #define NPY_TLS __thread +#else + #ifdef HAVE___DECLSPEC_THREAD_ + #define NPY_TLS __declspec(thread) + #else + #define NPY_TLS + #endif +#endif + +#ifdef WITH_CPYCHECKER_RETURNS_BORROWED_REF_ATTRIBUTE + #define NPY_RETURNS_BORROWED_REF \ + __attribute__((cpychecker_returns_borrowed_ref)) +#else + #define NPY_RETURNS_BORROWED_REF +#endif + +#ifdef WITH_CPYCHECKER_STEALS_REFERENCE_TO_ARG_ATTRIBUTE + #define NPY_STEALS_REF_TO_ARG(n) \ + __attribute__((cpychecker_steals_reference_to_arg(n))) +#else + #define NPY_STEALS_REF_TO_ARG(n) +#endif + +/* 64 bit file position support, also on win-amd64. Ticket #1660 */ +#if defined(_MSC_VER) && defined(_WIN64) && (_MSC_VER > 1400) || \ + defined(__MINGW32__) || defined(__MINGW64__) + #include + + #define npy_fseek _fseeki64 + #define npy_ftell _ftelli64 + #define npy_lseek _lseeki64 + #define npy_off_t npy_int64 + + #if NPY_SIZEOF_INT == 8 + #define NPY_OFF_T_PYFMT "i" + #elif NPY_SIZEOF_LONG == 8 + #define NPY_OFF_T_PYFMT "l" + #elif NPY_SIZEOF_LONGLONG == 8 + #define NPY_OFF_T_PYFMT "L" + #else + #error Unsupported size for type off_t + #endif +#else +#ifdef HAVE_FSEEKO + #define npy_fseek fseeko +#else + #define npy_fseek fseek +#endif +#ifdef HAVE_FTELLO + #define npy_ftell ftello +#else + #define npy_ftell ftell +#endif + #include + #define npy_lseek lseek + #define npy_off_t off_t + + #if NPY_SIZEOF_OFF_T == NPY_SIZEOF_SHORT + #define NPY_OFF_T_PYFMT "h" + #elif NPY_SIZEOF_OFF_T == NPY_SIZEOF_INT + #define NPY_OFF_T_PYFMT "i" + #elif NPY_SIZEOF_OFF_T == NPY_SIZEOF_LONG + #define NPY_OFF_T_PYFMT "l" + #elif NPY_SIZEOF_OFF_T == NPY_SIZEOF_LONGLONG + #define NPY_OFF_T_PYFMT "L" + #else + #error Unsupported size for type off_t + #endif +#endif + +/* enums for detected endianness */ +enum { + NPY_CPU_UNKNOWN_ENDIAN, + NPY_CPU_LITTLE, + NPY_CPU_BIG +}; + +/* + * This is to typedef npy_intp to the appropriate pointer size for this + * platform. Py_intptr_t, Py_uintptr_t are defined in pyport.h. + */ +typedef Py_intptr_t npy_intp; +typedef Py_uintptr_t npy_uintp; + +/* + * Define sizes that were not defined in numpyconfig.h. + */ +#define NPY_SIZEOF_CHAR 1 +#define NPY_SIZEOF_BYTE 1 +#define NPY_SIZEOF_DATETIME 8 +#define NPY_SIZEOF_TIMEDELTA 8 +#define NPY_SIZEOF_INTP NPY_SIZEOF_PY_INTPTR_T +#define NPY_SIZEOF_UINTP NPY_SIZEOF_PY_INTPTR_T +#define NPY_SIZEOF_HALF 2 +#define NPY_SIZEOF_CFLOAT NPY_SIZEOF_COMPLEX_FLOAT +#define NPY_SIZEOF_CDOUBLE NPY_SIZEOF_COMPLEX_DOUBLE +#define NPY_SIZEOF_CLONGDOUBLE NPY_SIZEOF_COMPLEX_LONGDOUBLE + +#ifdef constchar +#undef constchar +#endif + +#define NPY_SSIZE_T_PYFMT "n" +#define constchar char + +/* NPY_INTP_FMT Note: + * Unlike the other NPY_*_FMT macros, which are used with PyOS_snprintf, + * NPY_INTP_FMT is used with PyErr_Format and PyUnicode_FromFormat. Those + * functions use different formatting codes that are portably specified + * according to the Python documentation. See issue gh-2388. + */ +#if NPY_SIZEOF_PY_INTPTR_T == NPY_SIZEOF_INT + #define NPY_INTP NPY_INT + #define NPY_UINTP NPY_UINT + #define PyIntpArrType_Type PyIntArrType_Type + #define PyUIntpArrType_Type PyUIntArrType_Type + #define NPY_MAX_INTP NPY_MAX_INT + #define NPY_MIN_INTP NPY_MIN_INT + #define NPY_MAX_UINTP NPY_MAX_UINT + #define NPY_INTP_FMT "d" +#elif NPY_SIZEOF_PY_INTPTR_T == NPY_SIZEOF_LONG + #define NPY_INTP NPY_LONG + #define NPY_UINTP NPY_ULONG + #define PyIntpArrType_Type PyLongArrType_Type + #define PyUIntpArrType_Type PyULongArrType_Type + #define NPY_MAX_INTP NPY_MAX_LONG + #define NPY_MIN_INTP NPY_MIN_LONG + #define NPY_MAX_UINTP NPY_MAX_ULONG + #define NPY_INTP_FMT "ld" +#elif defined(PY_LONG_LONG) && (NPY_SIZEOF_PY_INTPTR_T == NPY_SIZEOF_LONGLONG) + #define NPY_INTP NPY_LONGLONG + #define NPY_UINTP NPY_ULONGLONG + #define PyIntpArrType_Type PyLongLongArrType_Type + #define PyUIntpArrType_Type PyULongLongArrType_Type + #define NPY_MAX_INTP NPY_MAX_LONGLONG + #define NPY_MIN_INTP NPY_MIN_LONGLONG + #define NPY_MAX_UINTP NPY_MAX_ULONGLONG + #define NPY_INTP_FMT "lld" +#endif + +/* + * We can only use C99 formats for npy_int_p if it is the same as + * intp_t, hence the condition on HAVE_UNITPTR_T + */ +#if (NPY_USE_C99_FORMATS) == 1 \ + && (defined HAVE_UINTPTR_T) \ + && (defined HAVE_INTTYPES_H) + #include + #undef NPY_INTP_FMT + #define NPY_INTP_FMT PRIdPTR +#endif + + +/* + * Some platforms don't define bool, long long, or long double. + * Handle that here. + */ +#define NPY_BYTE_FMT "hhd" +#define NPY_UBYTE_FMT "hhu" +#define NPY_SHORT_FMT "hd" +#define NPY_USHORT_FMT "hu" +#define NPY_INT_FMT "d" +#define NPY_UINT_FMT "u" +#define NPY_LONG_FMT "ld" +#define NPY_ULONG_FMT "lu" +#define NPY_HALF_FMT "g" +#define NPY_FLOAT_FMT "g" +#define NPY_DOUBLE_FMT "g" + + +#ifdef PY_LONG_LONG +typedef PY_LONG_LONG npy_longlong; +typedef unsigned PY_LONG_LONG npy_ulonglong; +# ifdef _MSC_VER +# define NPY_LONGLONG_FMT "I64d" +# define NPY_ULONGLONG_FMT "I64u" +# else +# define NPY_LONGLONG_FMT "lld" +# define NPY_ULONGLONG_FMT "llu" +# endif +# ifdef _MSC_VER +# define NPY_LONGLONG_SUFFIX(x) (x##i64) +# define NPY_ULONGLONG_SUFFIX(x) (x##Ui64) +# else +# define NPY_LONGLONG_SUFFIX(x) (x##LL) +# define NPY_ULONGLONG_SUFFIX(x) (x##ULL) +# endif +#else +typedef long npy_longlong; +typedef unsigned long npy_ulonglong; +# define NPY_LONGLONG_SUFFIX(x) (x##L) +# define NPY_ULONGLONG_SUFFIX(x) (x##UL) +#endif + + +typedef unsigned char npy_bool; +#define NPY_FALSE 0 +#define NPY_TRUE 1 +/* + * `NPY_SIZEOF_LONGDOUBLE` isn't usually equal to sizeof(long double). + * In some certain cases, it may forced to be equal to sizeof(double) + * even against the compiler implementation and the same goes for + * `complex long double`. + * + * Therefore, avoid `long double`, use `npy_longdouble` instead, + * and when it comes to standard math functions make sure of using + * the double version when `NPY_SIZEOF_LONGDOUBLE` == `NPY_SIZEOF_DOUBLE`. + * For example: + * npy_longdouble *ptr, x; + * #if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE + * npy_longdouble r = modf(x, ptr); + * #else + * npy_longdouble r = modfl(x, ptr); + * #endif + * + * See https://github.com/numpy/numpy/issues/20348 + */ +#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE + #define NPY_LONGDOUBLE_FMT "g" + typedef double npy_longdouble; +#else + #define NPY_LONGDOUBLE_FMT "Lg" + typedef long double npy_longdouble; +#endif + +#ifndef Py_USING_UNICODE +#error Must use Python with unicode enabled. +#endif + + +typedef signed char npy_byte; +typedef unsigned char npy_ubyte; +typedef unsigned short npy_ushort; +typedef unsigned int npy_uint; +typedef unsigned long npy_ulong; + +/* These are for completeness */ +typedef char npy_char; +typedef short npy_short; +typedef int npy_int; +typedef long npy_long; +typedef float npy_float; +typedef double npy_double; + +typedef Py_hash_t npy_hash_t; +#define NPY_SIZEOF_HASH_T NPY_SIZEOF_INTP + +/* + * Disabling C99 complex usage: a lot of C code in numpy/scipy rely on being + * able to do .real/.imag. Will have to convert code first. + */ +#if 0 +#if defined(NPY_USE_C99_COMPLEX) && defined(NPY_HAVE_COMPLEX_DOUBLE) +typedef complex npy_cdouble; +#else +typedef struct { double real, imag; } npy_cdouble; +#endif + +#if defined(NPY_USE_C99_COMPLEX) && defined(NPY_HAVE_COMPLEX_FLOAT) +typedef complex float npy_cfloat; +#else +typedef struct { float real, imag; } npy_cfloat; +#endif + +#if defined(NPY_USE_C99_COMPLEX) && defined(NPY_HAVE_COMPLEX_LONG_DOUBLE) +typedef complex long double npy_clongdouble; +#else +typedef struct {npy_longdouble real, imag;} npy_clongdouble; +#endif +#endif +#if NPY_SIZEOF_COMPLEX_DOUBLE != 2 * NPY_SIZEOF_DOUBLE +#error npy_cdouble definition is not compatible with C99 complex definition ! \ + Please contact NumPy maintainers and give detailed information about your \ + compiler and platform +#endif +typedef struct { double real, imag; } npy_cdouble; + +#if NPY_SIZEOF_COMPLEX_FLOAT != 2 * NPY_SIZEOF_FLOAT +#error npy_cfloat definition is not compatible with C99 complex definition ! \ + Please contact NumPy maintainers and give detailed information about your \ + compiler and platform +#endif +typedef struct { float real, imag; } npy_cfloat; + +#if NPY_SIZEOF_COMPLEX_LONGDOUBLE != 2 * NPY_SIZEOF_LONGDOUBLE +#error npy_clongdouble definition is not compatible with C99 complex definition ! \ + Please contact NumPy maintainers and give detailed information about your \ + compiler and platform +#endif +typedef struct { npy_longdouble real, imag; } npy_clongdouble; + +/* + * numarray-style bit-width typedefs + */ +#define NPY_MAX_INT8 127 +#define NPY_MIN_INT8 -128 +#define NPY_MAX_UINT8 255 +#define NPY_MAX_INT16 32767 +#define NPY_MIN_INT16 -32768 +#define NPY_MAX_UINT16 65535 +#define NPY_MAX_INT32 2147483647 +#define NPY_MIN_INT32 (-NPY_MAX_INT32 - 1) +#define NPY_MAX_UINT32 4294967295U +#define NPY_MAX_INT64 NPY_LONGLONG_SUFFIX(9223372036854775807) +#define NPY_MIN_INT64 (-NPY_MAX_INT64 - NPY_LONGLONG_SUFFIX(1)) +#define NPY_MAX_UINT64 NPY_ULONGLONG_SUFFIX(18446744073709551615) +#define NPY_MAX_INT128 NPY_LONGLONG_SUFFIX(85070591730234615865843651857942052864) +#define NPY_MIN_INT128 (-NPY_MAX_INT128 - NPY_LONGLONG_SUFFIX(1)) +#define NPY_MAX_UINT128 NPY_ULONGLONG_SUFFIX(170141183460469231731687303715884105728) +#define NPY_MAX_INT256 NPY_LONGLONG_SUFFIX(57896044618658097711785492504343953926634992332820282019728792003956564819967) +#define NPY_MIN_INT256 (-NPY_MAX_INT256 - NPY_LONGLONG_SUFFIX(1)) +#define NPY_MAX_UINT256 NPY_ULONGLONG_SUFFIX(115792089237316195423570985008687907853269984665640564039457584007913129639935) +#define NPY_MIN_DATETIME NPY_MIN_INT64 +#define NPY_MAX_DATETIME NPY_MAX_INT64 +#define NPY_MIN_TIMEDELTA NPY_MIN_INT64 +#define NPY_MAX_TIMEDELTA NPY_MAX_INT64 + + /* Need to find the number of bits for each type and + make definitions accordingly. + + C states that sizeof(char) == 1 by definition + + So, just using the sizeof keyword won't help. + + It also looks like Python itself uses sizeof(char) quite a + bit, which by definition should be 1 all the time. + + Idea: Make Use of CHAR_BIT which should tell us how many + BITS per CHARACTER + */ + + /* Include platform definitions -- These are in the C89/90 standard */ +#include +#define NPY_MAX_BYTE SCHAR_MAX +#define NPY_MIN_BYTE SCHAR_MIN +#define NPY_MAX_UBYTE UCHAR_MAX +#define NPY_MAX_SHORT SHRT_MAX +#define NPY_MIN_SHORT SHRT_MIN +#define NPY_MAX_USHORT USHRT_MAX +#define NPY_MAX_INT INT_MAX +#ifndef INT_MIN +#define INT_MIN (-INT_MAX - 1) +#endif +#define NPY_MIN_INT INT_MIN +#define NPY_MAX_UINT UINT_MAX +#define NPY_MAX_LONG LONG_MAX +#define NPY_MIN_LONG LONG_MIN +#define NPY_MAX_ULONG ULONG_MAX + +#define NPY_BITSOF_BOOL (sizeof(npy_bool) * CHAR_BIT) +#define NPY_BITSOF_CHAR CHAR_BIT +#define NPY_BITSOF_BYTE (NPY_SIZEOF_BYTE * CHAR_BIT) +#define NPY_BITSOF_SHORT (NPY_SIZEOF_SHORT * CHAR_BIT) +#define NPY_BITSOF_INT (NPY_SIZEOF_INT * CHAR_BIT) +#define NPY_BITSOF_LONG (NPY_SIZEOF_LONG * CHAR_BIT) +#define NPY_BITSOF_LONGLONG (NPY_SIZEOF_LONGLONG * CHAR_BIT) +#define NPY_BITSOF_INTP (NPY_SIZEOF_INTP * CHAR_BIT) +#define NPY_BITSOF_HALF (NPY_SIZEOF_HALF * CHAR_BIT) +#define NPY_BITSOF_FLOAT (NPY_SIZEOF_FLOAT * CHAR_BIT) +#define NPY_BITSOF_DOUBLE (NPY_SIZEOF_DOUBLE * CHAR_BIT) +#define NPY_BITSOF_LONGDOUBLE (NPY_SIZEOF_LONGDOUBLE * CHAR_BIT) +#define NPY_BITSOF_CFLOAT (NPY_SIZEOF_CFLOAT * CHAR_BIT) +#define NPY_BITSOF_CDOUBLE (NPY_SIZEOF_CDOUBLE * CHAR_BIT) +#define NPY_BITSOF_CLONGDOUBLE (NPY_SIZEOF_CLONGDOUBLE * CHAR_BIT) +#define NPY_BITSOF_DATETIME (NPY_SIZEOF_DATETIME * CHAR_BIT) +#define NPY_BITSOF_TIMEDELTA (NPY_SIZEOF_TIMEDELTA * CHAR_BIT) + +#if NPY_BITSOF_LONG == 8 +#define NPY_INT8 NPY_LONG +#define NPY_UINT8 NPY_ULONG + typedef long npy_int8; + typedef unsigned long npy_uint8; +#define PyInt8ScalarObject PyLongScalarObject +#define PyInt8ArrType_Type PyLongArrType_Type +#define PyUInt8ScalarObject PyULongScalarObject +#define PyUInt8ArrType_Type PyULongArrType_Type +#define NPY_INT8_FMT NPY_LONG_FMT +#define NPY_UINT8_FMT NPY_ULONG_FMT +#elif NPY_BITSOF_LONG == 16 +#define NPY_INT16 NPY_LONG +#define NPY_UINT16 NPY_ULONG + typedef long npy_int16; + typedef unsigned long npy_uint16; +#define PyInt16ScalarObject PyLongScalarObject +#define PyInt16ArrType_Type PyLongArrType_Type +#define PyUInt16ScalarObject PyULongScalarObject +#define PyUInt16ArrType_Type PyULongArrType_Type +#define NPY_INT16_FMT NPY_LONG_FMT +#define NPY_UINT16_FMT NPY_ULONG_FMT +#elif NPY_BITSOF_LONG == 32 +#define NPY_INT32 NPY_LONG +#define NPY_UINT32 NPY_ULONG + typedef long npy_int32; + typedef unsigned long npy_uint32; + typedef unsigned long npy_ucs4; +#define PyInt32ScalarObject PyLongScalarObject +#define PyInt32ArrType_Type PyLongArrType_Type +#define PyUInt32ScalarObject PyULongScalarObject +#define PyUInt32ArrType_Type PyULongArrType_Type +#define NPY_INT32_FMT NPY_LONG_FMT +#define NPY_UINT32_FMT NPY_ULONG_FMT +#elif NPY_BITSOF_LONG == 64 +#define NPY_INT64 NPY_LONG +#define NPY_UINT64 NPY_ULONG + typedef long npy_int64; + typedef unsigned long npy_uint64; +#define PyInt64ScalarObject PyLongScalarObject +#define PyInt64ArrType_Type PyLongArrType_Type +#define PyUInt64ScalarObject PyULongScalarObject +#define PyUInt64ArrType_Type PyULongArrType_Type +#define NPY_INT64_FMT NPY_LONG_FMT +#define NPY_UINT64_FMT NPY_ULONG_FMT +#define MyPyLong_FromInt64 PyLong_FromLong +#define MyPyLong_AsInt64 PyLong_AsLong +#elif NPY_BITSOF_LONG == 128 +#define NPY_INT128 NPY_LONG +#define NPY_UINT128 NPY_ULONG + typedef long npy_int128; + typedef unsigned long npy_uint128; +#define PyInt128ScalarObject PyLongScalarObject +#define PyInt128ArrType_Type PyLongArrType_Type +#define PyUInt128ScalarObject PyULongScalarObject +#define PyUInt128ArrType_Type PyULongArrType_Type +#define NPY_INT128_FMT NPY_LONG_FMT +#define NPY_UINT128_FMT NPY_ULONG_FMT +#endif + +#if NPY_BITSOF_LONGLONG == 8 +# ifndef NPY_INT8 +# define NPY_INT8 NPY_LONGLONG +# define NPY_UINT8 NPY_ULONGLONG + typedef npy_longlong npy_int8; + typedef npy_ulonglong npy_uint8; +# define PyInt8ScalarObject PyLongLongScalarObject +# define PyInt8ArrType_Type PyLongLongArrType_Type +# define PyUInt8ScalarObject PyULongLongScalarObject +# define PyUInt8ArrType_Type PyULongLongArrType_Type +#define NPY_INT8_FMT NPY_LONGLONG_FMT +#define NPY_UINT8_FMT NPY_ULONGLONG_FMT +# endif +# define NPY_MAX_LONGLONG NPY_MAX_INT8 +# define NPY_MIN_LONGLONG NPY_MIN_INT8 +# define NPY_MAX_ULONGLONG NPY_MAX_UINT8 +#elif NPY_BITSOF_LONGLONG == 16 +# ifndef NPY_INT16 +# define NPY_INT16 NPY_LONGLONG +# define NPY_UINT16 NPY_ULONGLONG + typedef npy_longlong npy_int16; + typedef npy_ulonglong npy_uint16; +# define PyInt16ScalarObject PyLongLongScalarObject +# define PyInt16ArrType_Type PyLongLongArrType_Type +# define PyUInt16ScalarObject PyULongLongScalarObject +# define PyUInt16ArrType_Type PyULongLongArrType_Type +#define NPY_INT16_FMT NPY_LONGLONG_FMT +#define NPY_UINT16_FMT NPY_ULONGLONG_FMT +# endif +# define NPY_MAX_LONGLONG NPY_MAX_INT16 +# define NPY_MIN_LONGLONG NPY_MIN_INT16 +# define NPY_MAX_ULONGLONG NPY_MAX_UINT16 +#elif NPY_BITSOF_LONGLONG == 32 +# ifndef NPY_INT32 +# define NPY_INT32 NPY_LONGLONG +# define NPY_UINT32 NPY_ULONGLONG + typedef npy_longlong npy_int32; + typedef npy_ulonglong npy_uint32; + typedef npy_ulonglong npy_ucs4; +# define PyInt32ScalarObject PyLongLongScalarObject +# define PyInt32ArrType_Type PyLongLongArrType_Type +# define PyUInt32ScalarObject PyULongLongScalarObject +# define PyUInt32ArrType_Type PyULongLongArrType_Type +#define NPY_INT32_FMT NPY_LONGLONG_FMT +#define NPY_UINT32_FMT NPY_ULONGLONG_FMT +# endif +# define NPY_MAX_LONGLONG NPY_MAX_INT32 +# define NPY_MIN_LONGLONG NPY_MIN_INT32 +# define NPY_MAX_ULONGLONG NPY_MAX_UINT32 +#elif NPY_BITSOF_LONGLONG == 64 +# ifndef NPY_INT64 +# define NPY_INT64 NPY_LONGLONG +# define NPY_UINT64 NPY_ULONGLONG + typedef npy_longlong npy_int64; + typedef npy_ulonglong npy_uint64; +# define PyInt64ScalarObject PyLongLongScalarObject +# define PyInt64ArrType_Type PyLongLongArrType_Type +# define PyUInt64ScalarObject PyULongLongScalarObject +# define PyUInt64ArrType_Type PyULongLongArrType_Type +#define NPY_INT64_FMT NPY_LONGLONG_FMT +#define NPY_UINT64_FMT NPY_ULONGLONG_FMT +# define MyPyLong_FromInt64 PyLong_FromLongLong +# define MyPyLong_AsInt64 PyLong_AsLongLong +# endif +# define NPY_MAX_LONGLONG NPY_MAX_INT64 +# define NPY_MIN_LONGLONG NPY_MIN_INT64 +# define NPY_MAX_ULONGLONG NPY_MAX_UINT64 +#elif NPY_BITSOF_LONGLONG == 128 +# ifndef NPY_INT128 +# define NPY_INT128 NPY_LONGLONG +# define NPY_UINT128 NPY_ULONGLONG + typedef npy_longlong npy_int128; + typedef npy_ulonglong npy_uint128; +# define PyInt128ScalarObject PyLongLongScalarObject +# define PyInt128ArrType_Type PyLongLongArrType_Type +# define PyUInt128ScalarObject PyULongLongScalarObject +# define PyUInt128ArrType_Type PyULongLongArrType_Type +#define NPY_INT128_FMT NPY_LONGLONG_FMT +#define NPY_UINT128_FMT NPY_ULONGLONG_FMT +# endif +# define NPY_MAX_LONGLONG NPY_MAX_INT128 +# define NPY_MIN_LONGLONG NPY_MIN_INT128 +# define NPY_MAX_ULONGLONG NPY_MAX_UINT128 +#elif NPY_BITSOF_LONGLONG == 256 +# define NPY_INT256 NPY_LONGLONG +# define NPY_UINT256 NPY_ULONGLONG + typedef npy_longlong npy_int256; + typedef npy_ulonglong npy_uint256; +# define PyInt256ScalarObject PyLongLongScalarObject +# define PyInt256ArrType_Type PyLongLongArrType_Type +# define PyUInt256ScalarObject PyULongLongScalarObject +# define PyUInt256ArrType_Type PyULongLongArrType_Type +#define NPY_INT256_FMT NPY_LONGLONG_FMT +#define NPY_UINT256_FMT NPY_ULONGLONG_FMT +# define NPY_MAX_LONGLONG NPY_MAX_INT256 +# define NPY_MIN_LONGLONG NPY_MIN_INT256 +# define NPY_MAX_ULONGLONG NPY_MAX_UINT256 +#endif + +#if NPY_BITSOF_INT == 8 +#ifndef NPY_INT8 +#define NPY_INT8 NPY_INT +#define NPY_UINT8 NPY_UINT + typedef int npy_int8; + typedef unsigned int npy_uint8; +# define PyInt8ScalarObject PyIntScalarObject +# define PyInt8ArrType_Type PyIntArrType_Type +# define PyUInt8ScalarObject PyUIntScalarObject +# define PyUInt8ArrType_Type PyUIntArrType_Type +#define NPY_INT8_FMT NPY_INT_FMT +#define NPY_UINT8_FMT NPY_UINT_FMT +#endif +#elif NPY_BITSOF_INT == 16 +#ifndef NPY_INT16 +#define NPY_INT16 NPY_INT +#define NPY_UINT16 NPY_UINT + typedef int npy_int16; + typedef unsigned int npy_uint16; +# define PyInt16ScalarObject PyIntScalarObject +# define PyInt16ArrType_Type PyIntArrType_Type +# define PyUInt16ScalarObject PyIntUScalarObject +# define PyUInt16ArrType_Type PyIntUArrType_Type +#define NPY_INT16_FMT NPY_INT_FMT +#define NPY_UINT16_FMT NPY_UINT_FMT +#endif +#elif NPY_BITSOF_INT == 32 +#ifndef NPY_INT32 +#define NPY_INT32 NPY_INT +#define NPY_UINT32 NPY_UINT + typedef int npy_int32; + typedef unsigned int npy_uint32; + typedef unsigned int npy_ucs4; +# define PyInt32ScalarObject PyIntScalarObject +# define PyInt32ArrType_Type PyIntArrType_Type +# define PyUInt32ScalarObject PyUIntScalarObject +# define PyUInt32ArrType_Type PyUIntArrType_Type +#define NPY_INT32_FMT NPY_INT_FMT +#define NPY_UINT32_FMT NPY_UINT_FMT +#endif +#elif NPY_BITSOF_INT == 64 +#ifndef NPY_INT64 +#define NPY_INT64 NPY_INT +#define NPY_UINT64 NPY_UINT + typedef int npy_int64; + typedef unsigned int npy_uint64; +# define PyInt64ScalarObject PyIntScalarObject +# define PyInt64ArrType_Type PyIntArrType_Type +# define PyUInt64ScalarObject PyUIntScalarObject +# define PyUInt64ArrType_Type PyUIntArrType_Type +#define NPY_INT64_FMT NPY_INT_FMT +#define NPY_UINT64_FMT NPY_UINT_FMT +# define MyPyLong_FromInt64 PyLong_FromLong +# define MyPyLong_AsInt64 PyLong_AsLong +#endif +#elif NPY_BITSOF_INT == 128 +#ifndef NPY_INT128 +#define NPY_INT128 NPY_INT +#define NPY_UINT128 NPY_UINT + typedef int npy_int128; + typedef unsigned int npy_uint128; +# define PyInt128ScalarObject PyIntScalarObject +# define PyInt128ArrType_Type PyIntArrType_Type +# define PyUInt128ScalarObject PyUIntScalarObject +# define PyUInt128ArrType_Type PyUIntArrType_Type +#define NPY_INT128_FMT NPY_INT_FMT +#define NPY_UINT128_FMT NPY_UINT_FMT +#endif +#endif + +#if NPY_BITSOF_SHORT == 8 +#ifndef NPY_INT8 +#define NPY_INT8 NPY_SHORT +#define NPY_UINT8 NPY_USHORT + typedef short npy_int8; + typedef unsigned short npy_uint8; +# define PyInt8ScalarObject PyShortScalarObject +# define PyInt8ArrType_Type PyShortArrType_Type +# define PyUInt8ScalarObject PyUShortScalarObject +# define PyUInt8ArrType_Type PyUShortArrType_Type +#define NPY_INT8_FMT NPY_SHORT_FMT +#define NPY_UINT8_FMT NPY_USHORT_FMT +#endif +#elif NPY_BITSOF_SHORT == 16 +#ifndef NPY_INT16 +#define NPY_INT16 NPY_SHORT +#define NPY_UINT16 NPY_USHORT + typedef short npy_int16; + typedef unsigned short npy_uint16; +# define PyInt16ScalarObject PyShortScalarObject +# define PyInt16ArrType_Type PyShortArrType_Type +# define PyUInt16ScalarObject PyUShortScalarObject +# define PyUInt16ArrType_Type PyUShortArrType_Type +#define NPY_INT16_FMT NPY_SHORT_FMT +#define NPY_UINT16_FMT NPY_USHORT_FMT +#endif +#elif NPY_BITSOF_SHORT == 32 +#ifndef NPY_INT32 +#define NPY_INT32 NPY_SHORT +#define NPY_UINT32 NPY_USHORT + typedef short npy_int32; + typedef unsigned short npy_uint32; + typedef unsigned short npy_ucs4; +# define PyInt32ScalarObject PyShortScalarObject +# define PyInt32ArrType_Type PyShortArrType_Type +# define PyUInt32ScalarObject PyUShortScalarObject +# define PyUInt32ArrType_Type PyUShortArrType_Type +#define NPY_INT32_FMT NPY_SHORT_FMT +#define NPY_UINT32_FMT NPY_USHORT_FMT +#endif +#elif NPY_BITSOF_SHORT == 64 +#ifndef NPY_INT64 +#define NPY_INT64 NPY_SHORT +#define NPY_UINT64 NPY_USHORT + typedef short npy_int64; + typedef unsigned short npy_uint64; +# define PyInt64ScalarObject PyShortScalarObject +# define PyInt64ArrType_Type PyShortArrType_Type +# define PyUInt64ScalarObject PyUShortScalarObject +# define PyUInt64ArrType_Type PyUShortArrType_Type +#define NPY_INT64_FMT NPY_SHORT_FMT +#define NPY_UINT64_FMT NPY_USHORT_FMT +# define MyPyLong_FromInt64 PyLong_FromLong +# define MyPyLong_AsInt64 PyLong_AsLong +#endif +#elif NPY_BITSOF_SHORT == 128 +#ifndef NPY_INT128 +#define NPY_INT128 NPY_SHORT +#define NPY_UINT128 NPY_USHORT + typedef short npy_int128; + typedef unsigned short npy_uint128; +# define PyInt128ScalarObject PyShortScalarObject +# define PyInt128ArrType_Type PyShortArrType_Type +# define PyUInt128ScalarObject PyUShortScalarObject +# define PyUInt128ArrType_Type PyUShortArrType_Type +#define NPY_INT128_FMT NPY_SHORT_FMT +#define NPY_UINT128_FMT NPY_USHORT_FMT +#endif +#endif + + +#if NPY_BITSOF_CHAR == 8 +#ifndef NPY_INT8 +#define NPY_INT8 NPY_BYTE +#define NPY_UINT8 NPY_UBYTE + typedef signed char npy_int8; + typedef unsigned char npy_uint8; +# define PyInt8ScalarObject PyByteScalarObject +# define PyInt8ArrType_Type PyByteArrType_Type +# define PyUInt8ScalarObject PyUByteScalarObject +# define PyUInt8ArrType_Type PyUByteArrType_Type +#define NPY_INT8_FMT NPY_BYTE_FMT +#define NPY_UINT8_FMT NPY_UBYTE_FMT +#endif +#elif NPY_BITSOF_CHAR == 16 +#ifndef NPY_INT16 +#define NPY_INT16 NPY_BYTE +#define NPY_UINT16 NPY_UBYTE + typedef signed char npy_int16; + typedef unsigned char npy_uint16; +# define PyInt16ScalarObject PyByteScalarObject +# define PyInt16ArrType_Type PyByteArrType_Type +# define PyUInt16ScalarObject PyUByteScalarObject +# define PyUInt16ArrType_Type PyUByteArrType_Type +#define NPY_INT16_FMT NPY_BYTE_FMT +#define NPY_UINT16_FMT NPY_UBYTE_FMT +#endif +#elif NPY_BITSOF_CHAR == 32 +#ifndef NPY_INT32 +#define NPY_INT32 NPY_BYTE +#define NPY_UINT32 NPY_UBYTE + typedef signed char npy_int32; + typedef unsigned char npy_uint32; + typedef unsigned char npy_ucs4; +# define PyInt32ScalarObject PyByteScalarObject +# define PyInt32ArrType_Type PyByteArrType_Type +# define PyUInt32ScalarObject PyUByteScalarObject +# define PyUInt32ArrType_Type PyUByteArrType_Type +#define NPY_INT32_FMT NPY_BYTE_FMT +#define NPY_UINT32_FMT NPY_UBYTE_FMT +#endif +#elif NPY_BITSOF_CHAR == 64 +#ifndef NPY_INT64 +#define NPY_INT64 NPY_BYTE +#define NPY_UINT64 NPY_UBYTE + typedef signed char npy_int64; + typedef unsigned char npy_uint64; +# define PyInt64ScalarObject PyByteScalarObject +# define PyInt64ArrType_Type PyByteArrType_Type +# define PyUInt64ScalarObject PyUByteScalarObject +# define PyUInt64ArrType_Type PyUByteArrType_Type +#define NPY_INT64_FMT NPY_BYTE_FMT +#define NPY_UINT64_FMT NPY_UBYTE_FMT +# define MyPyLong_FromInt64 PyLong_FromLong +# define MyPyLong_AsInt64 PyLong_AsLong +#endif +#elif NPY_BITSOF_CHAR == 128 +#ifndef NPY_INT128 +#define NPY_INT128 NPY_BYTE +#define NPY_UINT128 NPY_UBYTE + typedef signed char npy_int128; + typedef unsigned char npy_uint128; +# define PyInt128ScalarObject PyByteScalarObject +# define PyInt128ArrType_Type PyByteArrType_Type +# define PyUInt128ScalarObject PyUByteScalarObject +# define PyUInt128ArrType_Type PyUByteArrType_Type +#define NPY_INT128_FMT NPY_BYTE_FMT +#define NPY_UINT128_FMT NPY_UBYTE_FMT +#endif +#endif + + + +#if NPY_BITSOF_DOUBLE == 32 +#ifndef NPY_FLOAT32 +#define NPY_FLOAT32 NPY_DOUBLE +#define NPY_COMPLEX64 NPY_CDOUBLE + typedef double npy_float32; + typedef npy_cdouble npy_complex64; +# define PyFloat32ScalarObject PyDoubleScalarObject +# define PyComplex64ScalarObject PyCDoubleScalarObject +# define PyFloat32ArrType_Type PyDoubleArrType_Type +# define PyComplex64ArrType_Type PyCDoubleArrType_Type +#define NPY_FLOAT32_FMT NPY_DOUBLE_FMT +#define NPY_COMPLEX64_FMT NPY_CDOUBLE_FMT +#endif +#elif NPY_BITSOF_DOUBLE == 64 +#ifndef NPY_FLOAT64 +#define NPY_FLOAT64 NPY_DOUBLE +#define NPY_COMPLEX128 NPY_CDOUBLE + typedef double npy_float64; + typedef npy_cdouble npy_complex128; +# define PyFloat64ScalarObject PyDoubleScalarObject +# define PyComplex128ScalarObject PyCDoubleScalarObject +# define PyFloat64ArrType_Type PyDoubleArrType_Type +# define PyComplex128ArrType_Type PyCDoubleArrType_Type +#define NPY_FLOAT64_FMT NPY_DOUBLE_FMT +#define NPY_COMPLEX128_FMT NPY_CDOUBLE_FMT +#endif +#elif NPY_BITSOF_DOUBLE == 80 +#ifndef NPY_FLOAT80 +#define NPY_FLOAT80 NPY_DOUBLE +#define NPY_COMPLEX160 NPY_CDOUBLE + typedef double npy_float80; + typedef npy_cdouble npy_complex160; +# define PyFloat80ScalarObject PyDoubleScalarObject +# define PyComplex160ScalarObject PyCDoubleScalarObject +# define PyFloat80ArrType_Type PyDoubleArrType_Type +# define PyComplex160ArrType_Type PyCDoubleArrType_Type +#define NPY_FLOAT80_FMT NPY_DOUBLE_FMT +#define NPY_COMPLEX160_FMT NPY_CDOUBLE_FMT +#endif +#elif NPY_BITSOF_DOUBLE == 96 +#ifndef NPY_FLOAT96 +#define NPY_FLOAT96 NPY_DOUBLE +#define NPY_COMPLEX192 NPY_CDOUBLE + typedef double npy_float96; + typedef npy_cdouble npy_complex192; +# define PyFloat96ScalarObject PyDoubleScalarObject +# define PyComplex192ScalarObject PyCDoubleScalarObject +# define PyFloat96ArrType_Type PyDoubleArrType_Type +# define PyComplex192ArrType_Type PyCDoubleArrType_Type +#define NPY_FLOAT96_FMT NPY_DOUBLE_FMT +#define NPY_COMPLEX192_FMT NPY_CDOUBLE_FMT +#endif +#elif NPY_BITSOF_DOUBLE == 128 +#ifndef NPY_FLOAT128 +#define NPY_FLOAT128 NPY_DOUBLE +#define NPY_COMPLEX256 NPY_CDOUBLE + typedef double npy_float128; + typedef npy_cdouble npy_complex256; +# define PyFloat128ScalarObject PyDoubleScalarObject +# define PyComplex256ScalarObject PyCDoubleScalarObject +# define PyFloat128ArrType_Type PyDoubleArrType_Type +# define PyComplex256ArrType_Type PyCDoubleArrType_Type +#define NPY_FLOAT128_FMT NPY_DOUBLE_FMT +#define NPY_COMPLEX256_FMT NPY_CDOUBLE_FMT +#endif +#endif + + + +#if NPY_BITSOF_FLOAT == 32 +#ifndef NPY_FLOAT32 +#define NPY_FLOAT32 NPY_FLOAT +#define NPY_COMPLEX64 NPY_CFLOAT + typedef float npy_float32; + typedef npy_cfloat npy_complex64; +# define PyFloat32ScalarObject PyFloatScalarObject +# define PyComplex64ScalarObject PyCFloatScalarObject +# define PyFloat32ArrType_Type PyFloatArrType_Type +# define PyComplex64ArrType_Type PyCFloatArrType_Type +#define NPY_FLOAT32_FMT NPY_FLOAT_FMT +#define NPY_COMPLEX64_FMT NPY_CFLOAT_FMT +#endif +#elif NPY_BITSOF_FLOAT == 64 +#ifndef NPY_FLOAT64 +#define NPY_FLOAT64 NPY_FLOAT +#define NPY_COMPLEX128 NPY_CFLOAT + typedef float npy_float64; + typedef npy_cfloat npy_complex128; +# define PyFloat64ScalarObject PyFloatScalarObject +# define PyComplex128ScalarObject PyCFloatScalarObject +# define PyFloat64ArrType_Type PyFloatArrType_Type +# define PyComplex128ArrType_Type PyCFloatArrType_Type +#define NPY_FLOAT64_FMT NPY_FLOAT_FMT +#define NPY_COMPLEX128_FMT NPY_CFLOAT_FMT +#endif +#elif NPY_BITSOF_FLOAT == 80 +#ifndef NPY_FLOAT80 +#define NPY_FLOAT80 NPY_FLOAT +#define NPY_COMPLEX160 NPY_CFLOAT + typedef float npy_float80; + typedef npy_cfloat npy_complex160; +# define PyFloat80ScalarObject PyFloatScalarObject +# define PyComplex160ScalarObject PyCFloatScalarObject +# define PyFloat80ArrType_Type PyFloatArrType_Type +# define PyComplex160ArrType_Type PyCFloatArrType_Type +#define NPY_FLOAT80_FMT NPY_FLOAT_FMT +#define NPY_COMPLEX160_FMT NPY_CFLOAT_FMT +#endif +#elif NPY_BITSOF_FLOAT == 96 +#ifndef NPY_FLOAT96 +#define NPY_FLOAT96 NPY_FLOAT +#define NPY_COMPLEX192 NPY_CFLOAT + typedef float npy_float96; + typedef npy_cfloat npy_complex192; +# define PyFloat96ScalarObject PyFloatScalarObject +# define PyComplex192ScalarObject PyCFloatScalarObject +# define PyFloat96ArrType_Type PyFloatArrType_Type +# define PyComplex192ArrType_Type PyCFloatArrType_Type +#define NPY_FLOAT96_FMT NPY_FLOAT_FMT +#define NPY_COMPLEX192_FMT NPY_CFLOAT_FMT +#endif +#elif NPY_BITSOF_FLOAT == 128 +#ifndef NPY_FLOAT128 +#define NPY_FLOAT128 NPY_FLOAT +#define NPY_COMPLEX256 NPY_CFLOAT + typedef float npy_float128; + typedef npy_cfloat npy_complex256; +# define PyFloat128ScalarObject PyFloatScalarObject +# define PyComplex256ScalarObject PyCFloatScalarObject +# define PyFloat128ArrType_Type PyFloatArrType_Type +# define PyComplex256ArrType_Type PyCFloatArrType_Type +#define NPY_FLOAT128_FMT NPY_FLOAT_FMT +#define NPY_COMPLEX256_FMT NPY_CFLOAT_FMT +#endif +#endif + +/* half/float16 isn't a floating-point type in C */ +#define NPY_FLOAT16 NPY_HALF +typedef npy_uint16 npy_half; +typedef npy_half npy_float16; + +#if NPY_BITSOF_LONGDOUBLE == 32 +#ifndef NPY_FLOAT32 +#define NPY_FLOAT32 NPY_LONGDOUBLE +#define NPY_COMPLEX64 NPY_CLONGDOUBLE + typedef npy_longdouble npy_float32; + typedef npy_clongdouble npy_complex64; +# define PyFloat32ScalarObject PyLongDoubleScalarObject +# define PyComplex64ScalarObject PyCLongDoubleScalarObject +# define PyFloat32ArrType_Type PyLongDoubleArrType_Type +# define PyComplex64ArrType_Type PyCLongDoubleArrType_Type +#define NPY_FLOAT32_FMT NPY_LONGDOUBLE_FMT +#define NPY_COMPLEX64_FMT NPY_CLONGDOUBLE_FMT +#endif +#elif NPY_BITSOF_LONGDOUBLE == 64 +#ifndef NPY_FLOAT64 +#define NPY_FLOAT64 NPY_LONGDOUBLE +#define NPY_COMPLEX128 NPY_CLONGDOUBLE + typedef npy_longdouble npy_float64; + typedef npy_clongdouble npy_complex128; +# define PyFloat64ScalarObject PyLongDoubleScalarObject +# define PyComplex128ScalarObject PyCLongDoubleScalarObject +# define PyFloat64ArrType_Type PyLongDoubleArrType_Type +# define PyComplex128ArrType_Type PyCLongDoubleArrType_Type +#define NPY_FLOAT64_FMT NPY_LONGDOUBLE_FMT +#define NPY_COMPLEX128_FMT NPY_CLONGDOUBLE_FMT +#endif +#elif NPY_BITSOF_LONGDOUBLE == 80 +#ifndef NPY_FLOAT80 +#define NPY_FLOAT80 NPY_LONGDOUBLE +#define NPY_COMPLEX160 NPY_CLONGDOUBLE + typedef npy_longdouble npy_float80; + typedef npy_clongdouble npy_complex160; +# define PyFloat80ScalarObject PyLongDoubleScalarObject +# define PyComplex160ScalarObject PyCLongDoubleScalarObject +# define PyFloat80ArrType_Type PyLongDoubleArrType_Type +# define PyComplex160ArrType_Type PyCLongDoubleArrType_Type +#define NPY_FLOAT80_FMT NPY_LONGDOUBLE_FMT +#define NPY_COMPLEX160_FMT NPY_CLONGDOUBLE_FMT +#endif +#elif NPY_BITSOF_LONGDOUBLE == 96 +#ifndef NPY_FLOAT96 +#define NPY_FLOAT96 NPY_LONGDOUBLE +#define NPY_COMPLEX192 NPY_CLONGDOUBLE + typedef npy_longdouble npy_float96; + typedef npy_clongdouble npy_complex192; +# define PyFloat96ScalarObject PyLongDoubleScalarObject +# define PyComplex192ScalarObject PyCLongDoubleScalarObject +# define PyFloat96ArrType_Type PyLongDoubleArrType_Type +# define PyComplex192ArrType_Type PyCLongDoubleArrType_Type +#define NPY_FLOAT96_FMT NPY_LONGDOUBLE_FMT +#define NPY_COMPLEX192_FMT NPY_CLONGDOUBLE_FMT +#endif +#elif NPY_BITSOF_LONGDOUBLE == 128 +#ifndef NPY_FLOAT128 +#define NPY_FLOAT128 NPY_LONGDOUBLE +#define NPY_COMPLEX256 NPY_CLONGDOUBLE + typedef npy_longdouble npy_float128; + typedef npy_clongdouble npy_complex256; +# define PyFloat128ScalarObject PyLongDoubleScalarObject +# define PyComplex256ScalarObject PyCLongDoubleScalarObject +# define PyFloat128ArrType_Type PyLongDoubleArrType_Type +# define PyComplex256ArrType_Type PyCLongDoubleArrType_Type +#define NPY_FLOAT128_FMT NPY_LONGDOUBLE_FMT +#define NPY_COMPLEX256_FMT NPY_CLONGDOUBLE_FMT +#endif +#elif NPY_BITSOF_LONGDOUBLE == 256 +#define NPY_FLOAT256 NPY_LONGDOUBLE +#define NPY_COMPLEX512 NPY_CLONGDOUBLE + typedef npy_longdouble npy_float256; + typedef npy_clongdouble npy_complex512; +# define PyFloat256ScalarObject PyLongDoubleScalarObject +# define PyComplex512ScalarObject PyCLongDoubleScalarObject +# define PyFloat256ArrType_Type PyLongDoubleArrType_Type +# define PyComplex512ArrType_Type PyCLongDoubleArrType_Type +#define NPY_FLOAT256_FMT NPY_LONGDOUBLE_FMT +#define NPY_COMPLEX512_FMT NPY_CLONGDOUBLE_FMT +#endif + +/* datetime typedefs */ +typedef npy_int64 npy_timedelta; +typedef npy_int64 npy_datetime; +#define NPY_DATETIME_FMT NPY_INT64_FMT +#define NPY_TIMEDELTA_FMT NPY_INT64_FMT + +/* End of typedefs for numarray style bit-width names */ + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_NPY_COMMON_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_cpu.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_cpu.h new file mode 100644 index 0000000..78d229e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_cpu.h @@ -0,0 +1,129 @@ +/* + * This set (target) cpu specific macros: + * - Possible values: + * NPY_CPU_X86 + * NPY_CPU_AMD64 + * NPY_CPU_PPC + * NPY_CPU_PPC64 + * NPY_CPU_PPC64LE + * NPY_CPU_SPARC + * NPY_CPU_S390 + * NPY_CPU_IA64 + * NPY_CPU_HPPA + * NPY_CPU_ALPHA + * NPY_CPU_ARMEL + * NPY_CPU_ARMEB + * NPY_CPU_SH_LE + * NPY_CPU_SH_BE + * NPY_CPU_ARCEL + * NPY_CPU_ARCEB + * NPY_CPU_RISCV64 + * NPY_CPU_LOONGARCH + * NPY_CPU_WASM + */ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_NPY_CPU_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_NPY_CPU_H_ + +#include "numpyconfig.h" + +#if defined( __i386__ ) || defined(i386) || defined(_M_IX86) + /* + * __i386__ is defined by gcc and Intel compiler on Linux, + * _M_IX86 by VS compiler, + * i386 by Sun compilers on opensolaris at least + */ + #define NPY_CPU_X86 +#elif defined(__x86_64__) || defined(__amd64__) || defined(__x86_64) || defined(_M_AMD64) + /* + * both __x86_64__ and __amd64__ are defined by gcc + * __x86_64 defined by sun compiler on opensolaris at least + * _M_AMD64 defined by MS compiler + */ + #define NPY_CPU_AMD64 +#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) + #define NPY_CPU_PPC64LE +#elif defined(__powerpc64__) && defined(__BIG_ENDIAN__) + #define NPY_CPU_PPC64 +#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) + /* + * __ppc__ is defined by gcc, I remember having seen __powerpc__ once, + * but can't find it ATM + * _ARCH_PPC is used by at least gcc on AIX + * As __powerpc__ and _ARCH_PPC are also defined by PPC64 check + * for those specifically first before defaulting to ppc + */ + #define NPY_CPU_PPC +#elif defined(__sparc__) || defined(__sparc) + /* __sparc__ is defined by gcc and Forte (e.g. Sun) compilers */ + #define NPY_CPU_SPARC +#elif defined(__s390__) + #define NPY_CPU_S390 +#elif defined(__ia64) + #define NPY_CPU_IA64 +#elif defined(__hppa) + #define NPY_CPU_HPPA +#elif defined(__alpha__) + #define NPY_CPU_ALPHA +#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM64) + /* _M_ARM64 is defined in MSVC for ARM64 compilation on Windows */ + #if defined(__ARMEB__) || defined(__AARCH64EB__) + #if defined(__ARM_32BIT_STATE) + #define NPY_CPU_ARMEB_AARCH32 + #elif defined(__ARM_64BIT_STATE) + #define NPY_CPU_ARMEB_AARCH64 + #else + #define NPY_CPU_ARMEB + #endif + #elif defined(__ARMEL__) || defined(__AARCH64EL__) || defined(_M_ARM64) + #if defined(__ARM_32BIT_STATE) + #define NPY_CPU_ARMEL_AARCH32 + #elif defined(__ARM_64BIT_STATE) || defined(_M_ARM64) + #define NPY_CPU_ARMEL_AARCH64 + #else + #define NPY_CPU_ARMEL + #endif + #else + # error Unknown ARM CPU, please report this to numpy maintainers with \ + information about your platform (OS, CPU and compiler) + #endif +#elif defined(__sh__) && defined(__LITTLE_ENDIAN__) + #define NPY_CPU_SH_LE +#elif defined(__sh__) && defined(__BIG_ENDIAN__) + #define NPY_CPU_SH_BE +#elif defined(__MIPSEL__) + #define NPY_CPU_MIPSEL +#elif defined(__MIPSEB__) + #define NPY_CPU_MIPSEB +#elif defined(__or1k__) + #define NPY_CPU_OR1K +#elif defined(__mc68000__) + #define NPY_CPU_M68K +#elif defined(__arc__) && defined(__LITTLE_ENDIAN__) + #define NPY_CPU_ARCEL +#elif defined(__arc__) && defined(__BIG_ENDIAN__) + #define NPY_CPU_ARCEB +#elif defined(__riscv) && defined(__riscv_xlen) && __riscv_xlen == 64 + #define NPY_CPU_RISCV64 +#elif defined(__loongarch__) + #define NPY_CPU_LOONGARCH +#elif defined(__EMSCRIPTEN__) + /* __EMSCRIPTEN__ is defined by emscripten: an LLVM-to-Web compiler */ + #define NPY_CPU_WASM +#else + #error Unknown CPU, please report this to numpy maintainers with \ + information about your platform (OS, CPU and compiler) +#endif + +/* + * Except for the following architectures, memory access is limited to the natural + * alignment of data types otherwise it may lead to bus error or performance regression. + * For more details about unaligned access, see https://www.kernel.org/doc/Documentation/unaligned-memory-access.txt. +*/ +#if defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64) || defined(__aarch64__) || defined(__powerpc64__) + #define NPY_ALIGNMENT_REQUIRED 0 +#endif +#ifndef NPY_ALIGNMENT_REQUIRED + #define NPY_ALIGNMENT_REQUIRED 1 +#endif + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_NPY_CPU_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_endian.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_endian.h new file mode 100644 index 0000000..5e58a7f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_endian.h @@ -0,0 +1,77 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_NPY_ENDIAN_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_NPY_ENDIAN_H_ + +/* + * NPY_BYTE_ORDER is set to the same value as BYTE_ORDER set by glibc in + * endian.h + */ + +#if defined(NPY_HAVE_ENDIAN_H) || defined(NPY_HAVE_SYS_ENDIAN_H) + /* Use endian.h if available */ + + #if defined(NPY_HAVE_ENDIAN_H) + #include + #elif defined(NPY_HAVE_SYS_ENDIAN_H) + #include + #endif + + #if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && defined(LITTLE_ENDIAN) + #define NPY_BYTE_ORDER BYTE_ORDER + #define NPY_LITTLE_ENDIAN LITTLE_ENDIAN + #define NPY_BIG_ENDIAN BIG_ENDIAN + #elif defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) && defined(_LITTLE_ENDIAN) + #define NPY_BYTE_ORDER _BYTE_ORDER + #define NPY_LITTLE_ENDIAN _LITTLE_ENDIAN + #define NPY_BIG_ENDIAN _BIG_ENDIAN + #elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) + #define NPY_BYTE_ORDER __BYTE_ORDER + #define NPY_LITTLE_ENDIAN __LITTLE_ENDIAN + #define NPY_BIG_ENDIAN __BIG_ENDIAN + #endif +#endif + +#ifndef NPY_BYTE_ORDER + /* Set endianness info using target CPU */ + #include "npy_cpu.h" + + #define NPY_LITTLE_ENDIAN 1234 + #define NPY_BIG_ENDIAN 4321 + + #if defined(NPY_CPU_X86) \ + || defined(NPY_CPU_AMD64) \ + || defined(NPY_CPU_IA64) \ + || defined(NPY_CPU_ALPHA) \ + || defined(NPY_CPU_ARMEL) \ + || defined(NPY_CPU_ARMEL_AARCH32) \ + || defined(NPY_CPU_ARMEL_AARCH64) \ + || defined(NPY_CPU_SH_LE) \ + || defined(NPY_CPU_MIPSEL) \ + || defined(NPY_CPU_PPC64LE) \ + || defined(NPY_CPU_ARCEL) \ + || defined(NPY_CPU_RISCV64) \ + || defined(NPY_CPU_LOONGARCH) \ + || defined(NPY_CPU_WASM) + #define NPY_BYTE_ORDER NPY_LITTLE_ENDIAN + + #elif defined(NPY_CPU_PPC) \ + || defined(NPY_CPU_SPARC) \ + || defined(NPY_CPU_S390) \ + || defined(NPY_CPU_HPPA) \ + || defined(NPY_CPU_PPC64) \ + || defined(NPY_CPU_ARMEB) \ + || defined(NPY_CPU_ARMEB_AARCH32) \ + || defined(NPY_CPU_ARMEB_AARCH64) \ + || defined(NPY_CPU_SH_BE) \ + || defined(NPY_CPU_MIPSEB) \ + || defined(NPY_CPU_OR1K) \ + || defined(NPY_CPU_M68K) \ + || defined(NPY_CPU_ARCEB) + #define NPY_BYTE_ORDER NPY_BIG_ENDIAN + + #else + #error Unknown CPU: can not set endianness + #endif + +#endif + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_NPY_ENDIAN_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_interrupt.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_interrupt.h new file mode 100644 index 0000000..69a0374 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_interrupt.h @@ -0,0 +1,56 @@ +/* + * This API is only provided because it is part of publicly exported + * headers. Its use is considered DEPRECATED, and it will be removed + * eventually. + * (This includes the _PyArray_SigintHandler and _PyArray_GetSigintBuf + * functions which are however, public API, and not headers.) + * + * Instead of using these non-threadsafe macros consider periodically + * querying `PyErr_CheckSignals()` or `PyOS_InterruptOccurred()` will work. + * Both of these require holding the GIL, although cpython could add a + * version of `PyOS_InterruptOccurred()` which does not. Such a version + * actually exists as private API in Python 3.10, and backported to 3.9 and 3.8, + * see also https://bugs.python.org/issue41037 and + * https://github.com/python/cpython/pull/20599). + */ + +#ifndef NUMPY_CORE_INCLUDE_NUMPY_NPY_INTERRUPT_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_NPY_INTERRUPT_H_ + +#ifndef NPY_NO_SIGNAL + +#include +#include + +#ifndef sigsetjmp + +#define NPY_SIGSETJMP(arg1, arg2) setjmp(arg1) +#define NPY_SIGLONGJMP(arg1, arg2) longjmp(arg1, arg2) +#define NPY_SIGJMP_BUF jmp_buf + +#else + +#define NPY_SIGSETJMP(arg1, arg2) sigsetjmp(arg1, arg2) +#define NPY_SIGLONGJMP(arg1, arg2) siglongjmp(arg1, arg2) +#define NPY_SIGJMP_BUF sigjmp_buf + +#endif + +# define NPY_SIGINT_ON { \ + PyOS_sighandler_t _npy_sig_save; \ + _npy_sig_save = PyOS_setsig(SIGINT, _PyArray_SigintHandler); \ + if (NPY_SIGSETJMP(*((NPY_SIGJMP_BUF *)_PyArray_GetSigintBuf()), \ + 1) == 0) { \ + +# define NPY_SIGINT_OFF } \ + PyOS_setsig(SIGINT, _npy_sig_save); \ + } + +#else /* NPY_NO_SIGNAL */ + +#define NPY_SIGINT_ON +#define NPY_SIGINT_OFF + +#endif /* HAVE_SIGSETJMP */ + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_NPY_INTERRUPT_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_math.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_math.h new file mode 100644 index 0000000..bead0dc --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_math.h @@ -0,0 +1,599 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_NPY_MATH_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_NPY_MATH_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include +#ifdef __SUNPRO_CC +#include +#endif + +/* By adding static inline specifiers to npy_math function definitions when + appropriate, compiler is given the opportunity to optimize */ +#if NPY_INLINE_MATH +#define NPY_INPLACE NPY_INLINE static +#else +#define NPY_INPLACE +#endif + + +/* + * NAN and INFINITY like macros (same behavior as glibc for NAN, same as C99 + * for INFINITY) + * + * XXX: I should test whether INFINITY and NAN are available on the platform + */ +NPY_INLINE static float __npy_inff(void) +{ + const union { npy_uint32 __i; float __f;} __bint = {0x7f800000UL}; + return __bint.__f; +} + +NPY_INLINE static float __npy_nanf(void) +{ + const union { npy_uint32 __i; float __f;} __bint = {0x7fc00000UL}; + return __bint.__f; +} + +NPY_INLINE static float __npy_pzerof(void) +{ + const union { npy_uint32 __i; float __f;} __bint = {0x00000000UL}; + return __bint.__f; +} + +NPY_INLINE static float __npy_nzerof(void) +{ + const union { npy_uint32 __i; float __f;} __bint = {0x80000000UL}; + return __bint.__f; +} + +#define NPY_INFINITYF __npy_inff() +#define NPY_NANF __npy_nanf() +#define NPY_PZEROF __npy_pzerof() +#define NPY_NZEROF __npy_nzerof() + +#define NPY_INFINITY ((npy_double)NPY_INFINITYF) +#define NPY_NAN ((npy_double)NPY_NANF) +#define NPY_PZERO ((npy_double)NPY_PZEROF) +#define NPY_NZERO ((npy_double)NPY_NZEROF) + +#define NPY_INFINITYL ((npy_longdouble)NPY_INFINITYF) +#define NPY_NANL ((npy_longdouble)NPY_NANF) +#define NPY_PZEROL ((npy_longdouble)NPY_PZEROF) +#define NPY_NZEROL ((npy_longdouble)NPY_NZEROF) + +/* + * Useful constants + */ +#define NPY_E 2.718281828459045235360287471352662498 /* e */ +#define NPY_LOG2E 1.442695040888963407359924681001892137 /* log_2 e */ +#define NPY_LOG10E 0.434294481903251827651128918916605082 /* log_10 e */ +#define NPY_LOGE2 0.693147180559945309417232121458176568 /* log_e 2 */ +#define NPY_LOGE10 2.302585092994045684017991454684364208 /* log_e 10 */ +#define NPY_PI 3.141592653589793238462643383279502884 /* pi */ +#define NPY_PI_2 1.570796326794896619231321691639751442 /* pi/2 */ +#define NPY_PI_4 0.785398163397448309615660845819875721 /* pi/4 */ +#define NPY_1_PI 0.318309886183790671537767526745028724 /* 1/pi */ +#define NPY_2_PI 0.636619772367581343075535053490057448 /* 2/pi */ +#define NPY_EULER 0.577215664901532860606512090082402431 /* Euler constant */ +#define NPY_SQRT2 1.414213562373095048801688724209698079 /* sqrt(2) */ +#define NPY_SQRT1_2 0.707106781186547524400844362104849039 /* 1/sqrt(2) */ + +#define NPY_Ef 2.718281828459045235360287471352662498F /* e */ +#define NPY_LOG2Ef 1.442695040888963407359924681001892137F /* log_2 e */ +#define NPY_LOG10Ef 0.434294481903251827651128918916605082F /* log_10 e */ +#define NPY_LOGE2f 0.693147180559945309417232121458176568F /* log_e 2 */ +#define NPY_LOGE10f 2.302585092994045684017991454684364208F /* log_e 10 */ +#define NPY_PIf 3.141592653589793238462643383279502884F /* pi */ +#define NPY_PI_2f 1.570796326794896619231321691639751442F /* pi/2 */ +#define NPY_PI_4f 0.785398163397448309615660845819875721F /* pi/4 */ +#define NPY_1_PIf 0.318309886183790671537767526745028724F /* 1/pi */ +#define NPY_2_PIf 0.636619772367581343075535053490057448F /* 2/pi */ +#define NPY_EULERf 0.577215664901532860606512090082402431F /* Euler constant */ +#define NPY_SQRT2f 1.414213562373095048801688724209698079F /* sqrt(2) */ +#define NPY_SQRT1_2f 0.707106781186547524400844362104849039F /* 1/sqrt(2) */ + +#define NPY_El 2.718281828459045235360287471352662498L /* e */ +#define NPY_LOG2El 1.442695040888963407359924681001892137L /* log_2 e */ +#define NPY_LOG10El 0.434294481903251827651128918916605082L /* log_10 e */ +#define NPY_LOGE2l 0.693147180559945309417232121458176568L /* log_e 2 */ +#define NPY_LOGE10l 2.302585092994045684017991454684364208L /* log_e 10 */ +#define NPY_PIl 3.141592653589793238462643383279502884L /* pi */ +#define NPY_PI_2l 1.570796326794896619231321691639751442L /* pi/2 */ +#define NPY_PI_4l 0.785398163397448309615660845819875721L /* pi/4 */ +#define NPY_1_PIl 0.318309886183790671537767526745028724L /* 1/pi */ +#define NPY_2_PIl 0.636619772367581343075535053490057448L /* 2/pi */ +#define NPY_EULERl 0.577215664901532860606512090082402431L /* Euler constant */ +#define NPY_SQRT2l 1.414213562373095048801688724209698079L /* sqrt(2) */ +#define NPY_SQRT1_2l 0.707106781186547524400844362104849039L /* 1/sqrt(2) */ + +/* + * Integer functions. + */ +NPY_INPLACE npy_uint npy_gcdu(npy_uint a, npy_uint b); +NPY_INPLACE npy_uint npy_lcmu(npy_uint a, npy_uint b); +NPY_INPLACE npy_ulong npy_gcdul(npy_ulong a, npy_ulong b); +NPY_INPLACE npy_ulong npy_lcmul(npy_ulong a, npy_ulong b); +NPY_INPLACE npy_ulonglong npy_gcdull(npy_ulonglong a, npy_ulonglong b); +NPY_INPLACE npy_ulonglong npy_lcmull(npy_ulonglong a, npy_ulonglong b); + +NPY_INPLACE npy_int npy_gcd(npy_int a, npy_int b); +NPY_INPLACE npy_int npy_lcm(npy_int a, npy_int b); +NPY_INPLACE npy_long npy_gcdl(npy_long a, npy_long b); +NPY_INPLACE npy_long npy_lcml(npy_long a, npy_long b); +NPY_INPLACE npy_longlong npy_gcdll(npy_longlong a, npy_longlong b); +NPY_INPLACE npy_longlong npy_lcmll(npy_longlong a, npy_longlong b); + +NPY_INPLACE npy_ubyte npy_rshiftuhh(npy_ubyte a, npy_ubyte b); +NPY_INPLACE npy_ubyte npy_lshiftuhh(npy_ubyte a, npy_ubyte b); +NPY_INPLACE npy_ushort npy_rshiftuh(npy_ushort a, npy_ushort b); +NPY_INPLACE npy_ushort npy_lshiftuh(npy_ushort a, npy_ushort b); +NPY_INPLACE npy_uint npy_rshiftu(npy_uint a, npy_uint b); +NPY_INPLACE npy_uint npy_lshiftu(npy_uint a, npy_uint b); +NPY_INPLACE npy_ulong npy_rshiftul(npy_ulong a, npy_ulong b); +NPY_INPLACE npy_ulong npy_lshiftul(npy_ulong a, npy_ulong b); +NPY_INPLACE npy_ulonglong npy_rshiftull(npy_ulonglong a, npy_ulonglong b); +NPY_INPLACE npy_ulonglong npy_lshiftull(npy_ulonglong a, npy_ulonglong b); + +NPY_INPLACE npy_byte npy_rshifthh(npy_byte a, npy_byte b); +NPY_INPLACE npy_byte npy_lshifthh(npy_byte a, npy_byte b); +NPY_INPLACE npy_short npy_rshifth(npy_short a, npy_short b); +NPY_INPLACE npy_short npy_lshifth(npy_short a, npy_short b); +NPY_INPLACE npy_int npy_rshift(npy_int a, npy_int b); +NPY_INPLACE npy_int npy_lshift(npy_int a, npy_int b); +NPY_INPLACE npy_long npy_rshiftl(npy_long a, npy_long b); +NPY_INPLACE npy_long npy_lshiftl(npy_long a, npy_long b); +NPY_INPLACE npy_longlong npy_rshiftll(npy_longlong a, npy_longlong b); +NPY_INPLACE npy_longlong npy_lshiftll(npy_longlong a, npy_longlong b); + +NPY_INPLACE uint8_t npy_popcountuhh(npy_ubyte a); +NPY_INPLACE uint8_t npy_popcountuh(npy_ushort a); +NPY_INPLACE uint8_t npy_popcountu(npy_uint a); +NPY_INPLACE uint8_t npy_popcountul(npy_ulong a); +NPY_INPLACE uint8_t npy_popcountull(npy_ulonglong a); +NPY_INPLACE uint8_t npy_popcounthh(npy_byte a); +NPY_INPLACE uint8_t npy_popcounth(npy_short a); +NPY_INPLACE uint8_t npy_popcount(npy_int a); +NPY_INPLACE uint8_t npy_popcountl(npy_long a); +NPY_INPLACE uint8_t npy_popcountll(npy_longlong a); + +/* + * C99 double math funcs + */ +NPY_INPLACE double npy_sin(double x); +NPY_INPLACE double npy_cos(double x); +NPY_INPLACE double npy_tan(double x); +NPY_INPLACE double npy_sinh(double x); +NPY_INPLACE double npy_cosh(double x); +NPY_INPLACE double npy_tanh(double x); + +NPY_INPLACE double npy_asin(double x); +NPY_INPLACE double npy_acos(double x); +NPY_INPLACE double npy_atan(double x); + +NPY_INPLACE double npy_log(double x); +NPY_INPLACE double npy_log10(double x); +NPY_INPLACE double npy_exp(double x); +NPY_INPLACE double npy_sqrt(double x); +NPY_INPLACE double npy_cbrt(double x); + +NPY_INPLACE double npy_fabs(double x); +NPY_INPLACE double npy_ceil(double x); +NPY_INPLACE double npy_fmod(double x, double y); +NPY_INPLACE double npy_floor(double x); + +NPY_INPLACE double npy_expm1(double x); +NPY_INPLACE double npy_log1p(double x); +NPY_INPLACE double npy_hypot(double x, double y); +NPY_INPLACE double npy_acosh(double x); +NPY_INPLACE double npy_asinh(double xx); +NPY_INPLACE double npy_atanh(double x); +NPY_INPLACE double npy_rint(double x); +NPY_INPLACE double npy_trunc(double x); +NPY_INPLACE double npy_exp2(double x); +NPY_INPLACE double npy_log2(double x); + +NPY_INPLACE double npy_atan2(double x, double y); +NPY_INPLACE double npy_pow(double x, double y); +NPY_INPLACE double npy_modf(double x, double* y); +NPY_INPLACE double npy_frexp(double x, int* y); +NPY_INPLACE double npy_ldexp(double n, int y); + +NPY_INPLACE double npy_copysign(double x, double y); +double npy_nextafter(double x, double y); +double npy_spacing(double x); + +/* + * IEEE 754 fpu handling. Those are guaranteed to be macros + */ + +/* use builtins to avoid function calls in tight loops + * only available if npy_config.h is available (= numpys own build) */ +#ifdef HAVE___BUILTIN_ISNAN + #define npy_isnan(x) __builtin_isnan(x) +#else + #ifndef NPY_HAVE_DECL_ISNAN + #define npy_isnan(x) ((x) != (x)) + #else + #if defined(_MSC_VER) && (_MSC_VER < 1900) + #define npy_isnan(x) _isnan((x)) + #else + #define npy_isnan(x) isnan(x) + #endif + #endif +#endif + + +/* only available if npy_config.h is available (= numpys own build) */ +#ifdef HAVE___BUILTIN_ISFINITE + #define npy_isfinite(x) __builtin_isfinite(x) +#else + #ifndef NPY_HAVE_DECL_ISFINITE + #ifdef _MSC_VER + #define npy_isfinite(x) _finite((x)) + #else + #define npy_isfinite(x) !npy_isnan((x) + (-x)) + #endif + #else + #define npy_isfinite(x) isfinite((x)) + #endif +#endif + +/* only available if npy_config.h is available (= numpys own build) */ +#ifdef HAVE___BUILTIN_ISINF + #define npy_isinf(x) __builtin_isinf(x) +#else + #ifndef NPY_HAVE_DECL_ISINF + #define npy_isinf(x) (!npy_isfinite(x) && !npy_isnan(x)) + #else + #if defined(_MSC_VER) && (_MSC_VER < 1900) + #define npy_isinf(x) (!_finite((x)) && !_isnan((x))) + #else + #define npy_isinf(x) isinf((x)) + #endif + #endif +#endif + +#ifndef NPY_HAVE_DECL_SIGNBIT + int _npy_signbit_f(float x); + int _npy_signbit_d(double x); + int _npy_signbit_ld(long double x); + #define npy_signbit(x) \ + (sizeof (x) == sizeof (long double) ? _npy_signbit_ld (x) \ + : sizeof (x) == sizeof (double) ? _npy_signbit_d (x) \ + : _npy_signbit_f (x)) +#else + #define npy_signbit(x) signbit((x)) +#endif + +/* + * float C99 math functions + */ +NPY_INPLACE float npy_sinf(float x); +NPY_INPLACE float npy_cosf(float x); +NPY_INPLACE float npy_tanf(float x); +NPY_INPLACE float npy_sinhf(float x); +NPY_INPLACE float npy_coshf(float x); +NPY_INPLACE float npy_tanhf(float x); +NPY_INPLACE float npy_fabsf(float x); +NPY_INPLACE float npy_floorf(float x); +NPY_INPLACE float npy_ceilf(float x); +NPY_INPLACE float npy_rintf(float x); +NPY_INPLACE float npy_truncf(float x); +NPY_INPLACE float npy_sqrtf(float x); +NPY_INPLACE float npy_cbrtf(float x); +NPY_INPLACE float npy_log10f(float x); +NPY_INPLACE float npy_logf(float x); +NPY_INPLACE float npy_expf(float x); +NPY_INPLACE float npy_expm1f(float x); +NPY_INPLACE float npy_asinf(float x); +NPY_INPLACE float npy_acosf(float x); +NPY_INPLACE float npy_atanf(float x); +NPY_INPLACE float npy_asinhf(float x); +NPY_INPLACE float npy_acoshf(float x); +NPY_INPLACE float npy_atanhf(float x); +NPY_INPLACE float npy_log1pf(float x); +NPY_INPLACE float npy_exp2f(float x); +NPY_INPLACE float npy_log2f(float x); + +NPY_INPLACE float npy_atan2f(float x, float y); +NPY_INPLACE float npy_hypotf(float x, float y); +NPY_INPLACE float npy_powf(float x, float y); +NPY_INPLACE float npy_fmodf(float x, float y); + +NPY_INPLACE float npy_modff(float x, float* y); +NPY_INPLACE float npy_frexpf(float x, int* y); +NPY_INPLACE float npy_ldexpf(float x, int y); + +NPY_INPLACE float npy_copysignf(float x, float y); +float npy_nextafterf(float x, float y); +float npy_spacingf(float x); + +/* + * long double C99 math functions + */ +NPY_INPLACE npy_longdouble npy_sinl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_cosl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_tanl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_sinhl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_coshl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_tanhl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_fabsl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_floorl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_ceill(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_rintl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_truncl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_sqrtl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_cbrtl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_log10l(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_logl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_expl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_expm1l(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_asinl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_acosl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_atanl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_asinhl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_acoshl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_atanhl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_log1pl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_exp2l(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_log2l(npy_longdouble x); + +NPY_INPLACE npy_longdouble npy_atan2l(npy_longdouble x, npy_longdouble y); +NPY_INPLACE npy_longdouble npy_hypotl(npy_longdouble x, npy_longdouble y); +NPY_INPLACE npy_longdouble npy_powl(npy_longdouble x, npy_longdouble y); +NPY_INPLACE npy_longdouble npy_fmodl(npy_longdouble x, npy_longdouble y); + +NPY_INPLACE npy_longdouble npy_modfl(npy_longdouble x, npy_longdouble* y); +NPY_INPLACE npy_longdouble npy_frexpl(npy_longdouble x, int* y); +NPY_INPLACE npy_longdouble npy_ldexpl(npy_longdouble x, int y); + +NPY_INPLACE npy_longdouble npy_copysignl(npy_longdouble x, npy_longdouble y); +npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y); +npy_longdouble npy_spacingl(npy_longdouble x); + +/* + * Non standard functions + */ +NPY_INPLACE double npy_deg2rad(double x); +NPY_INPLACE double npy_rad2deg(double x); +NPY_INPLACE double npy_logaddexp(double x, double y); +NPY_INPLACE double npy_logaddexp2(double x, double y); +NPY_INPLACE double npy_divmod(double x, double y, double *modulus); +NPY_INPLACE double npy_heaviside(double x, double h0); + +NPY_INPLACE float npy_deg2radf(float x); +NPY_INPLACE float npy_rad2degf(float x); +NPY_INPLACE float npy_logaddexpf(float x, float y); +NPY_INPLACE float npy_logaddexp2f(float x, float y); +NPY_INPLACE float npy_divmodf(float x, float y, float *modulus); +NPY_INPLACE float npy_heavisidef(float x, float h0); + +NPY_INPLACE npy_longdouble npy_deg2radl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_rad2degl(npy_longdouble x); +NPY_INPLACE npy_longdouble npy_logaddexpl(npy_longdouble x, npy_longdouble y); +NPY_INPLACE npy_longdouble npy_logaddexp2l(npy_longdouble x, npy_longdouble y); +NPY_INPLACE npy_longdouble npy_divmodl(npy_longdouble x, npy_longdouble y, + npy_longdouble *modulus); +NPY_INPLACE npy_longdouble npy_heavisidel(npy_longdouble x, npy_longdouble h0); + +#define npy_degrees npy_rad2deg +#define npy_degreesf npy_rad2degf +#define npy_degreesl npy_rad2degl + +#define npy_radians npy_deg2rad +#define npy_radiansf npy_deg2radf +#define npy_radiansl npy_deg2radl + +/* + * Complex declarations + */ + +/* + * C99 specifies that complex numbers have the same representation as + * an array of two elements, where the first element is the real part + * and the second element is the imaginary part. + */ +#define __NPY_CPACK_IMP(x, y, type, ctype) \ + union { \ + ctype z; \ + type a[2]; \ + } z1; \ + \ + z1.a[0] = (x); \ + z1.a[1] = (y); \ + \ + return z1.z; + +static NPY_INLINE npy_cdouble npy_cpack(double x, double y) +{ + __NPY_CPACK_IMP(x, y, double, npy_cdouble); +} + +static NPY_INLINE npy_cfloat npy_cpackf(float x, float y) +{ + __NPY_CPACK_IMP(x, y, float, npy_cfloat); +} + +static NPY_INLINE npy_clongdouble npy_cpackl(npy_longdouble x, npy_longdouble y) +{ + __NPY_CPACK_IMP(x, y, npy_longdouble, npy_clongdouble); +} +#undef __NPY_CPACK_IMP + +/* + * Same remark as above, but in the other direction: extract first/second + * member of complex number, assuming a C99-compatible representation + * + * Those are defineds as static inline, and such as a reasonable compiler would + * most likely compile this to one or two instructions (on CISC at least) + */ +#define __NPY_CEXTRACT_IMP(z, index, type, ctype) \ + union { \ + ctype z; \ + type a[2]; \ + } __z_repr; \ + __z_repr.z = z; \ + \ + return __z_repr.a[index]; + +static NPY_INLINE double npy_creal(npy_cdouble z) +{ + __NPY_CEXTRACT_IMP(z, 0, double, npy_cdouble); +} + +static NPY_INLINE double npy_cimag(npy_cdouble z) +{ + __NPY_CEXTRACT_IMP(z, 1, double, npy_cdouble); +} + +static NPY_INLINE float npy_crealf(npy_cfloat z) +{ + __NPY_CEXTRACT_IMP(z, 0, float, npy_cfloat); +} + +static NPY_INLINE float npy_cimagf(npy_cfloat z) +{ + __NPY_CEXTRACT_IMP(z, 1, float, npy_cfloat); +} + +static NPY_INLINE npy_longdouble npy_creall(npy_clongdouble z) +{ + __NPY_CEXTRACT_IMP(z, 0, npy_longdouble, npy_clongdouble); +} + +static NPY_INLINE npy_longdouble npy_cimagl(npy_clongdouble z) +{ + __NPY_CEXTRACT_IMP(z, 1, npy_longdouble, npy_clongdouble); +} +#undef __NPY_CEXTRACT_IMP + +/* + * Double precision complex functions + */ +double npy_cabs(npy_cdouble z); +double npy_carg(npy_cdouble z); + +npy_cdouble npy_cexp(npy_cdouble z); +npy_cdouble npy_clog(npy_cdouble z); +npy_cdouble npy_cpow(npy_cdouble x, npy_cdouble y); + +npy_cdouble npy_csqrt(npy_cdouble z); + +npy_cdouble npy_ccos(npy_cdouble z); +npy_cdouble npy_csin(npy_cdouble z); +npy_cdouble npy_ctan(npy_cdouble z); + +npy_cdouble npy_ccosh(npy_cdouble z); +npy_cdouble npy_csinh(npy_cdouble z); +npy_cdouble npy_ctanh(npy_cdouble z); + +npy_cdouble npy_cacos(npy_cdouble z); +npy_cdouble npy_casin(npy_cdouble z); +npy_cdouble npy_catan(npy_cdouble z); + +npy_cdouble npy_cacosh(npy_cdouble z); +npy_cdouble npy_casinh(npy_cdouble z); +npy_cdouble npy_catanh(npy_cdouble z); + +/* + * Single precision complex functions + */ +float npy_cabsf(npy_cfloat z); +float npy_cargf(npy_cfloat z); + +npy_cfloat npy_cexpf(npy_cfloat z); +npy_cfloat npy_clogf(npy_cfloat z); +npy_cfloat npy_cpowf(npy_cfloat x, npy_cfloat y); + +npy_cfloat npy_csqrtf(npy_cfloat z); + +npy_cfloat npy_ccosf(npy_cfloat z); +npy_cfloat npy_csinf(npy_cfloat z); +npy_cfloat npy_ctanf(npy_cfloat z); + +npy_cfloat npy_ccoshf(npy_cfloat z); +npy_cfloat npy_csinhf(npy_cfloat z); +npy_cfloat npy_ctanhf(npy_cfloat z); + +npy_cfloat npy_cacosf(npy_cfloat z); +npy_cfloat npy_casinf(npy_cfloat z); +npy_cfloat npy_catanf(npy_cfloat z); + +npy_cfloat npy_cacoshf(npy_cfloat z); +npy_cfloat npy_casinhf(npy_cfloat z); +npy_cfloat npy_catanhf(npy_cfloat z); + + +/* + * Extended precision complex functions + */ +npy_longdouble npy_cabsl(npy_clongdouble z); +npy_longdouble npy_cargl(npy_clongdouble z); + +npy_clongdouble npy_cexpl(npy_clongdouble z); +npy_clongdouble npy_clogl(npy_clongdouble z); +npy_clongdouble npy_cpowl(npy_clongdouble x, npy_clongdouble y); + +npy_clongdouble npy_csqrtl(npy_clongdouble z); + +npy_clongdouble npy_ccosl(npy_clongdouble z); +npy_clongdouble npy_csinl(npy_clongdouble z); +npy_clongdouble npy_ctanl(npy_clongdouble z); + +npy_clongdouble npy_ccoshl(npy_clongdouble z); +npy_clongdouble npy_csinhl(npy_clongdouble z); +npy_clongdouble npy_ctanhl(npy_clongdouble z); + +npy_clongdouble npy_cacosl(npy_clongdouble z); +npy_clongdouble npy_casinl(npy_clongdouble z); +npy_clongdouble npy_catanl(npy_clongdouble z); + +npy_clongdouble npy_cacoshl(npy_clongdouble z); +npy_clongdouble npy_casinhl(npy_clongdouble z); +npy_clongdouble npy_catanhl(npy_clongdouble z); + + +/* + * Functions that set the floating point error + * status word. + */ + +/* + * platform-dependent code translates floating point + * status to an integer sum of these values + */ +#define NPY_FPE_DIVIDEBYZERO 1 +#define NPY_FPE_OVERFLOW 2 +#define NPY_FPE_UNDERFLOW 4 +#define NPY_FPE_INVALID 8 + +int npy_clear_floatstatus_barrier(char*); +int npy_get_floatstatus_barrier(char*); +/* + * use caution with these - clang and gcc8.1 are known to reorder calls + * to this form of the function which can defeat the check. The _barrier + * form of the call is preferable, where the argument is + * (char*)&local_variable + */ +int npy_clear_floatstatus(void); +int npy_get_floatstatus(void); + +void npy_set_floatstatus_divbyzero(void); +void npy_set_floatstatus_overflow(void); +void npy_set_floatstatus_underflow(void); +void npy_set_floatstatus_invalid(void); + +#ifdef __cplusplus +} +#endif + +#if NPY_INLINE_MATH +#include "npy_math_internal.h" +#endif + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_NPY_MATH_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_no_deprecated_api.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_no_deprecated_api.h new file mode 100644 index 0000000..39658c0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_no_deprecated_api.h @@ -0,0 +1,20 @@ +/* + * This include file is provided for inclusion in Cython *.pyd files where + * one would like to define the NPY_NO_DEPRECATED_API macro. It can be + * included by + * + * cdef extern from "npy_no_deprecated_api.h": pass + * + */ +#ifndef NPY_NO_DEPRECATED_API + +/* put this check here since there may be multiple includes in C extensions. */ +#if defined(NUMPY_CORE_INCLUDE_NUMPY_NDARRAYTYPES_H_) || \ + defined(NUMPY_CORE_INCLUDE_NUMPY_NPY_DEPRECATED_API_H) || \ + defined(NUMPY_CORE_INCLUDE_NUMPY_OLD_DEFINES_H_) +#error "npy_no_deprecated_api.h" must be first among numpy includes. +#else +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#endif + +#endif /* NPY_NO_DEPRECATED_API */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_os.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_os.h new file mode 100644 index 0000000..efa0e40 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/npy_os.h @@ -0,0 +1,30 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_NPY_OS_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_NPY_OS_H_ + +#if defined(linux) || defined(__linux) || defined(__linux__) + #define NPY_OS_LINUX +#elif defined(__FreeBSD__) || defined(__NetBSD__) || \ + defined(__OpenBSD__) || defined(__DragonFly__) + #define NPY_OS_BSD + #ifdef __FreeBSD__ + #define NPY_OS_FREEBSD + #elif defined(__NetBSD__) + #define NPY_OS_NETBSD + #elif defined(__OpenBSD__) + #define NPY_OS_OPENBSD + #elif defined(__DragonFly__) + #define NPY_OS_DRAGONFLY + #endif +#elif defined(sun) || defined(__sun) + #define NPY_OS_SOLARIS +#elif defined(__CYGWIN__) + #define NPY_OS_CYGWIN +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) + #define NPY_OS_WIN32 +#elif defined(__APPLE__) + #define NPY_OS_DARWIN +#else + #define NPY_OS_UNKNOWN +#endif + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_NPY_OS_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/numpyconfig.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/numpyconfig.h new file mode 100644 index 0000000..e4c17f7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/numpyconfig.h @@ -0,0 +1,67 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_NPY_NUMPYCONFIG_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_NPY_NUMPYCONFIG_H_ + +#include "_numpyconfig.h" + +/* + * On Mac OS X, because there is only one configuration stage for all the archs + * in universal builds, any macro which depends on the arch needs to be + * hardcoded + */ +#ifdef __APPLE__ + #undef NPY_SIZEOF_LONG + #undef NPY_SIZEOF_PY_INTPTR_T + + #ifdef __LP64__ + #define NPY_SIZEOF_LONG 8 + #define NPY_SIZEOF_PY_INTPTR_T 8 + #else + #define NPY_SIZEOF_LONG 4 + #define NPY_SIZEOF_PY_INTPTR_T 4 + #endif + + #undef NPY_SIZEOF_LONGDOUBLE + #undef NPY_SIZEOF_COMPLEX_LONGDOUBLE + + #if defined(__arm64__) + #define NPY_SIZEOF_LONGDOUBLE 8 + #define NPY_SIZEOF_COMPLEX_LONGDOUBLE 16 + #elif defined(__x86_64) + #define NPY_SIZEOF_LONGDOUBLE 16 + #define NPY_SIZEOF_COMPLEX_LONGDOUBLE 32 + #elif defined (__i386) + #define NPY_SIZEOF_LONGDOUBLE 12 + #define NPY_SIZEOF_COMPLEX_LONGDOUBLE 24 + #elif defined(__ppc__) || defined (__ppc64__) + #define NPY_SIZEOF_LONGDOUBLE 16 + #define NPY_SIZEOF_COMPLEX_LONGDOUBLE 32 + #else + #error "unknown architecture" + #endif +#endif + +/** + * To help with the NPY_NO_DEPRECATED_API macro, we include API version + * numbers for specific versions of NumPy. To exclude all API that was + * deprecated as of 1.7, add the following before #including any NumPy + * headers: + * #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + */ +#define NPY_1_7_API_VERSION 0x00000007 +#define NPY_1_8_API_VERSION 0x00000008 +#define NPY_1_9_API_VERSION 0x00000008 +#define NPY_1_10_API_VERSION 0x00000008 +#define NPY_1_11_API_VERSION 0x00000008 +#define NPY_1_12_API_VERSION 0x00000008 +#define NPY_1_13_API_VERSION 0x00000008 +#define NPY_1_14_API_VERSION 0x00000008 +#define NPY_1_15_API_VERSION 0x00000008 +#define NPY_1_16_API_VERSION 0x00000008 +#define NPY_1_17_API_VERSION 0x00000008 +#define NPY_1_18_API_VERSION 0x00000008 +#define NPY_1_19_API_VERSION 0x00000008 +#define NPY_1_20_API_VERSION 0x0000000e +#define NPY_1_21_API_VERSION 0x0000000e +#define NPY_1_22_API_VERSION 0x0000000f + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_NPY_NUMPYCONFIG_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/old_defines.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/old_defines.h new file mode 100644 index 0000000..b3fa677 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/old_defines.h @@ -0,0 +1,187 @@ +/* This header is deprecated as of NumPy 1.7 */ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_OLD_DEFINES_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_OLD_DEFINES_H_ + +#if defined(NPY_NO_DEPRECATED_API) && NPY_NO_DEPRECATED_API >= NPY_1_7_API_VERSION +#error The header "old_defines.h" is deprecated as of NumPy 1.7. +#endif + +#define NDARRAY_VERSION NPY_VERSION + +#define PyArray_MIN_BUFSIZE NPY_MIN_BUFSIZE +#define PyArray_MAX_BUFSIZE NPY_MAX_BUFSIZE +#define PyArray_BUFSIZE NPY_BUFSIZE + +#define PyArray_PRIORITY NPY_PRIORITY +#define PyArray_SUBTYPE_PRIORITY NPY_PRIORITY +#define PyArray_NUM_FLOATTYPE NPY_NUM_FLOATTYPE + +#define NPY_MAX PyArray_MAX +#define NPY_MIN PyArray_MIN + +#define PyArray_TYPES NPY_TYPES +#define PyArray_BOOL NPY_BOOL +#define PyArray_BYTE NPY_BYTE +#define PyArray_UBYTE NPY_UBYTE +#define PyArray_SHORT NPY_SHORT +#define PyArray_USHORT NPY_USHORT +#define PyArray_INT NPY_INT +#define PyArray_UINT NPY_UINT +#define PyArray_LONG NPY_LONG +#define PyArray_ULONG NPY_ULONG +#define PyArray_LONGLONG NPY_LONGLONG +#define PyArray_ULONGLONG NPY_ULONGLONG +#define PyArray_HALF NPY_HALF +#define PyArray_FLOAT NPY_FLOAT +#define PyArray_DOUBLE NPY_DOUBLE +#define PyArray_LONGDOUBLE NPY_LONGDOUBLE +#define PyArray_CFLOAT NPY_CFLOAT +#define PyArray_CDOUBLE NPY_CDOUBLE +#define PyArray_CLONGDOUBLE NPY_CLONGDOUBLE +#define PyArray_OBJECT NPY_OBJECT +#define PyArray_STRING NPY_STRING +#define PyArray_UNICODE NPY_UNICODE +#define PyArray_VOID NPY_VOID +#define PyArray_DATETIME NPY_DATETIME +#define PyArray_TIMEDELTA NPY_TIMEDELTA +#define PyArray_NTYPES NPY_NTYPES +#define PyArray_NOTYPE NPY_NOTYPE +#define PyArray_CHAR NPY_CHAR +#define PyArray_USERDEF NPY_USERDEF +#define PyArray_NUMUSERTYPES NPY_NUMUSERTYPES + +#define PyArray_INTP NPY_INTP +#define PyArray_UINTP NPY_UINTP + +#define PyArray_INT8 NPY_INT8 +#define PyArray_UINT8 NPY_UINT8 +#define PyArray_INT16 NPY_INT16 +#define PyArray_UINT16 NPY_UINT16 +#define PyArray_INT32 NPY_INT32 +#define PyArray_UINT32 NPY_UINT32 + +#ifdef NPY_INT64 +#define PyArray_INT64 NPY_INT64 +#define PyArray_UINT64 NPY_UINT64 +#endif + +#ifdef NPY_INT128 +#define PyArray_INT128 NPY_INT128 +#define PyArray_UINT128 NPY_UINT128 +#endif + +#ifdef NPY_FLOAT16 +#define PyArray_FLOAT16 NPY_FLOAT16 +#define PyArray_COMPLEX32 NPY_COMPLEX32 +#endif + +#ifdef NPY_FLOAT80 +#define PyArray_FLOAT80 NPY_FLOAT80 +#define PyArray_COMPLEX160 NPY_COMPLEX160 +#endif + +#ifdef NPY_FLOAT96 +#define PyArray_FLOAT96 NPY_FLOAT96 +#define PyArray_COMPLEX192 NPY_COMPLEX192 +#endif + +#ifdef NPY_FLOAT128 +#define PyArray_FLOAT128 NPY_FLOAT128 +#define PyArray_COMPLEX256 NPY_COMPLEX256 +#endif + +#define PyArray_FLOAT32 NPY_FLOAT32 +#define PyArray_COMPLEX64 NPY_COMPLEX64 +#define PyArray_FLOAT64 NPY_FLOAT64 +#define PyArray_COMPLEX128 NPY_COMPLEX128 + + +#define PyArray_TYPECHAR NPY_TYPECHAR +#define PyArray_BOOLLTR NPY_BOOLLTR +#define PyArray_BYTELTR NPY_BYTELTR +#define PyArray_UBYTELTR NPY_UBYTELTR +#define PyArray_SHORTLTR NPY_SHORTLTR +#define PyArray_USHORTLTR NPY_USHORTLTR +#define PyArray_INTLTR NPY_INTLTR +#define PyArray_UINTLTR NPY_UINTLTR +#define PyArray_LONGLTR NPY_LONGLTR +#define PyArray_ULONGLTR NPY_ULONGLTR +#define PyArray_LONGLONGLTR NPY_LONGLONGLTR +#define PyArray_ULONGLONGLTR NPY_ULONGLONGLTR +#define PyArray_HALFLTR NPY_HALFLTR +#define PyArray_FLOATLTR NPY_FLOATLTR +#define PyArray_DOUBLELTR NPY_DOUBLELTR +#define PyArray_LONGDOUBLELTR NPY_LONGDOUBLELTR +#define PyArray_CFLOATLTR NPY_CFLOATLTR +#define PyArray_CDOUBLELTR NPY_CDOUBLELTR +#define PyArray_CLONGDOUBLELTR NPY_CLONGDOUBLELTR +#define PyArray_OBJECTLTR NPY_OBJECTLTR +#define PyArray_STRINGLTR NPY_STRINGLTR +#define PyArray_STRINGLTR2 NPY_STRINGLTR2 +#define PyArray_UNICODELTR NPY_UNICODELTR +#define PyArray_VOIDLTR NPY_VOIDLTR +#define PyArray_DATETIMELTR NPY_DATETIMELTR +#define PyArray_TIMEDELTALTR NPY_TIMEDELTALTR +#define PyArray_CHARLTR NPY_CHARLTR +#define PyArray_INTPLTR NPY_INTPLTR +#define PyArray_UINTPLTR NPY_UINTPLTR +#define PyArray_GENBOOLLTR NPY_GENBOOLLTR +#define PyArray_SIGNEDLTR NPY_SIGNEDLTR +#define PyArray_UNSIGNEDLTR NPY_UNSIGNEDLTR +#define PyArray_FLOATINGLTR NPY_FLOATINGLTR +#define PyArray_COMPLEXLTR NPY_COMPLEXLTR + +#define PyArray_QUICKSORT NPY_QUICKSORT +#define PyArray_HEAPSORT NPY_HEAPSORT +#define PyArray_MERGESORT NPY_MERGESORT +#define PyArray_SORTKIND NPY_SORTKIND +#define PyArray_NSORTS NPY_NSORTS + +#define PyArray_NOSCALAR NPY_NOSCALAR +#define PyArray_BOOL_SCALAR NPY_BOOL_SCALAR +#define PyArray_INTPOS_SCALAR NPY_INTPOS_SCALAR +#define PyArray_INTNEG_SCALAR NPY_INTNEG_SCALAR +#define PyArray_FLOAT_SCALAR NPY_FLOAT_SCALAR +#define PyArray_COMPLEX_SCALAR NPY_COMPLEX_SCALAR +#define PyArray_OBJECT_SCALAR NPY_OBJECT_SCALAR +#define PyArray_SCALARKIND NPY_SCALARKIND +#define PyArray_NSCALARKINDS NPY_NSCALARKINDS + +#define PyArray_ANYORDER NPY_ANYORDER +#define PyArray_CORDER NPY_CORDER +#define PyArray_FORTRANORDER NPY_FORTRANORDER +#define PyArray_ORDER NPY_ORDER + +#define PyDescr_ISBOOL PyDataType_ISBOOL +#define PyDescr_ISUNSIGNED PyDataType_ISUNSIGNED +#define PyDescr_ISSIGNED PyDataType_ISSIGNED +#define PyDescr_ISINTEGER PyDataType_ISINTEGER +#define PyDescr_ISFLOAT PyDataType_ISFLOAT +#define PyDescr_ISNUMBER PyDataType_ISNUMBER +#define PyDescr_ISSTRING PyDataType_ISSTRING +#define PyDescr_ISCOMPLEX PyDataType_ISCOMPLEX +#define PyDescr_ISPYTHON PyDataType_ISPYTHON +#define PyDescr_ISFLEXIBLE PyDataType_ISFLEXIBLE +#define PyDescr_ISUSERDEF PyDataType_ISUSERDEF +#define PyDescr_ISEXTENDED PyDataType_ISEXTENDED +#define PyDescr_ISOBJECT PyDataType_ISOBJECT +#define PyDescr_HASFIELDS PyDataType_HASFIELDS + +#define PyArray_LITTLE NPY_LITTLE +#define PyArray_BIG NPY_BIG +#define PyArray_NATIVE NPY_NATIVE +#define PyArray_SWAP NPY_SWAP +#define PyArray_IGNORE NPY_IGNORE + +#define PyArray_NATBYTE NPY_NATBYTE +#define PyArray_OPPBYTE NPY_OPPBYTE + +#define PyArray_MAX_ELSIZE NPY_MAX_ELSIZE + +#define PyArray_USE_PYMEM NPY_USE_PYMEM + +#define PyArray_RemoveLargest PyArray_RemoveSmallest + +#define PyArray_UCS4 npy_ucs4 + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_OLD_DEFINES_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/oldnumeric.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/oldnumeric.h new file mode 100644 index 0000000..6604e8d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/oldnumeric.h @@ -0,0 +1,32 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_OLDNUMERIC_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_OLDNUMERIC_H_ + +/* FIXME -- this file can be deleted? */ + +#include "arrayobject.h" + +#ifndef PYPY_VERSION +#ifndef REFCOUNT +# define REFCOUNT NPY_REFCOUNT +# define MAX_ELSIZE 16 +#endif +#endif + +#define PyArray_UNSIGNED_TYPES +#define PyArray_SBYTE NPY_BYTE +#define PyArray_CopyArray PyArray_CopyInto +#define _PyArray_multiply_list PyArray_MultiplyIntList +#define PyArray_ISSPACESAVER(m) NPY_FALSE +#define PyScalarArray_Check PyArray_CheckScalar + +#define CONTIGUOUS NPY_CONTIGUOUS +#define OWN_DIMENSIONS 0 +#define OWN_STRIDES 0 +#define OWN_DATA NPY_OWNDATA +#define SAVESPACE 0 +#define SAVESPACEBIT 0 + +#undef import_array +#define import_array() { if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import"); } } + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_OLDNUMERIC_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/random/bitgen.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/random/bitgen.h new file mode 100644 index 0000000..162dd5c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/random/bitgen.h @@ -0,0 +1,20 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_RANDOM_BITGEN_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_RANDOM_BITGEN_H_ + +#pragma once +#include +#include +#include + +/* Must match the declaration in numpy/random/.pxd */ + +typedef struct bitgen { + void *state; + uint64_t (*next_uint64)(void *st); + uint32_t (*next_uint32)(void *st); + double (*next_double)(void *st); + uint64_t (*next_raw)(void *st); +} bitgen_t; + + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_RANDOM_BITGEN_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/random/distributions.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/random/distributions.h new file mode 100644 index 0000000..dacf778 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/random/distributions.h @@ -0,0 +1,209 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_RANDOM_DISTRIBUTIONS_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_RANDOM_DISTRIBUTIONS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "numpy/npy_common.h" +#include +#include +#include + +#include "numpy/npy_math.h" +#include "numpy/random/bitgen.h" + +/* + * RAND_INT_TYPE is used to share integer generators with RandomState which + * used long in place of int64_t. If changing a distribution that uses + * RAND_INT_TYPE, then the original unmodified copy must be retained for + * use in RandomState by copying to the legacy distributions source file. + */ +#ifdef NP_RANDOM_LEGACY +#define RAND_INT_TYPE long +#define RAND_INT_MAX LONG_MAX +#else +#define RAND_INT_TYPE int64_t +#define RAND_INT_MAX INT64_MAX +#endif + +#if defined(_MSC_VER) || defined(__CYGWIN__) +#define DECLDIR __declspec(dllexport) +#else +#define DECLDIR extern +#endif + +#ifndef MIN +#define MIN(x, y) (((x) < (y)) ? x : y) +#define MAX(x, y) (((x) > (y)) ? x : y) +#endif + +#ifndef M_PI +#define M_PI 3.14159265358979323846264338328 +#endif + +typedef struct s_binomial_t { + int has_binomial; /* !=0: following parameters initialized for binomial */ + double psave; + RAND_INT_TYPE nsave; + double r; + double q; + double fm; + RAND_INT_TYPE m; + double p1; + double xm; + double xl; + double xr; + double c; + double laml; + double lamr; + double p2; + double p3; + double p4; +} binomial_t; + +DECLDIR float random_standard_uniform_f(bitgen_t *bitgen_state); +DECLDIR double random_standard_uniform(bitgen_t *bitgen_state); +DECLDIR void random_standard_uniform_fill(bitgen_t *, npy_intp, double *); +DECLDIR void random_standard_uniform_fill_f(bitgen_t *, npy_intp, float *); + +DECLDIR int64_t random_positive_int64(bitgen_t *bitgen_state); +DECLDIR int32_t random_positive_int32(bitgen_t *bitgen_state); +DECLDIR int64_t random_positive_int(bitgen_t *bitgen_state); +DECLDIR uint64_t random_uint(bitgen_t *bitgen_state); + +DECLDIR double random_standard_exponential(bitgen_t *bitgen_state); +DECLDIR float random_standard_exponential_f(bitgen_t *bitgen_state); +DECLDIR void random_standard_exponential_fill(bitgen_t *, npy_intp, double *); +DECLDIR void random_standard_exponential_fill_f(bitgen_t *, npy_intp, float *); +DECLDIR void random_standard_exponential_inv_fill(bitgen_t *, npy_intp, double *); +DECLDIR void random_standard_exponential_inv_fill_f(bitgen_t *, npy_intp, float *); + +DECLDIR double random_standard_normal(bitgen_t *bitgen_state); +DECLDIR float random_standard_normal_f(bitgen_t *bitgen_state); +DECLDIR void random_standard_normal_fill(bitgen_t *, npy_intp, double *); +DECLDIR void random_standard_normal_fill_f(bitgen_t *, npy_intp, float *); +DECLDIR double random_standard_gamma(bitgen_t *bitgen_state, double shape); +DECLDIR float random_standard_gamma_f(bitgen_t *bitgen_state, float shape); + +DECLDIR double random_normal(bitgen_t *bitgen_state, double loc, double scale); + +DECLDIR double random_gamma(bitgen_t *bitgen_state, double shape, double scale); +DECLDIR float random_gamma_f(bitgen_t *bitgen_state, float shape, float scale); + +DECLDIR double random_exponential(bitgen_t *bitgen_state, double scale); +DECLDIR double random_uniform(bitgen_t *bitgen_state, double lower, double range); +DECLDIR double random_beta(bitgen_t *bitgen_state, double a, double b); +DECLDIR double random_chisquare(bitgen_t *bitgen_state, double df); +DECLDIR double random_f(bitgen_t *bitgen_state, double dfnum, double dfden); +DECLDIR double random_standard_cauchy(bitgen_t *bitgen_state); +DECLDIR double random_pareto(bitgen_t *bitgen_state, double a); +DECLDIR double random_weibull(bitgen_t *bitgen_state, double a); +DECLDIR double random_power(bitgen_t *bitgen_state, double a); +DECLDIR double random_laplace(bitgen_t *bitgen_state, double loc, double scale); +DECLDIR double random_gumbel(bitgen_t *bitgen_state, double loc, double scale); +DECLDIR double random_logistic(bitgen_t *bitgen_state, double loc, double scale); +DECLDIR double random_lognormal(bitgen_t *bitgen_state, double mean, double sigma); +DECLDIR double random_rayleigh(bitgen_t *bitgen_state, double mode); +DECLDIR double random_standard_t(bitgen_t *bitgen_state, double df); +DECLDIR double random_noncentral_chisquare(bitgen_t *bitgen_state, double df, + double nonc); +DECLDIR double random_noncentral_f(bitgen_t *bitgen_state, double dfnum, + double dfden, double nonc); +DECLDIR double random_wald(bitgen_t *bitgen_state, double mean, double scale); +DECLDIR double random_vonmises(bitgen_t *bitgen_state, double mu, double kappa); +DECLDIR double random_triangular(bitgen_t *bitgen_state, double left, double mode, + double right); + +DECLDIR RAND_INT_TYPE random_poisson(bitgen_t *bitgen_state, double lam); +DECLDIR RAND_INT_TYPE random_negative_binomial(bitgen_t *bitgen_state, double n, + double p); + +DECLDIR int64_t random_binomial(bitgen_t *bitgen_state, double p, + int64_t n, binomial_t *binomial); + +DECLDIR int64_t random_logseries(bitgen_t *bitgen_state, double p); +DECLDIR int64_t random_geometric(bitgen_t *bitgen_state, double p); +DECLDIR RAND_INT_TYPE random_geometric_search(bitgen_t *bitgen_state, double p); +DECLDIR RAND_INT_TYPE random_zipf(bitgen_t *bitgen_state, double a); +DECLDIR int64_t random_hypergeometric(bitgen_t *bitgen_state, + int64_t good, int64_t bad, int64_t sample); +DECLDIR uint64_t random_interval(bitgen_t *bitgen_state, uint64_t max); + +/* Generate random uint64 numbers in closed interval [off, off + rng]. */ +DECLDIR uint64_t random_bounded_uint64(bitgen_t *bitgen_state, uint64_t off, + uint64_t rng, uint64_t mask, + bool use_masked); + +/* Generate random uint32 numbers in closed interval [off, off + rng]. */ +DECLDIR uint32_t random_buffered_bounded_uint32(bitgen_t *bitgen_state, + uint32_t off, uint32_t rng, + uint32_t mask, bool use_masked, + int *bcnt, uint32_t *buf); +DECLDIR uint16_t random_buffered_bounded_uint16(bitgen_t *bitgen_state, + uint16_t off, uint16_t rng, + uint16_t mask, bool use_masked, + int *bcnt, uint32_t *buf); +DECLDIR uint8_t random_buffered_bounded_uint8(bitgen_t *bitgen_state, uint8_t off, + uint8_t rng, uint8_t mask, + bool use_masked, int *bcnt, + uint32_t *buf); +DECLDIR npy_bool random_buffered_bounded_bool(bitgen_t *bitgen_state, npy_bool off, + npy_bool rng, npy_bool mask, + bool use_masked, int *bcnt, + uint32_t *buf); + +DECLDIR void random_bounded_uint64_fill(bitgen_t *bitgen_state, uint64_t off, + uint64_t rng, npy_intp cnt, + bool use_masked, uint64_t *out); +DECLDIR void random_bounded_uint32_fill(bitgen_t *bitgen_state, uint32_t off, + uint32_t rng, npy_intp cnt, + bool use_masked, uint32_t *out); +DECLDIR void random_bounded_uint16_fill(bitgen_t *bitgen_state, uint16_t off, + uint16_t rng, npy_intp cnt, + bool use_masked, uint16_t *out); +DECLDIR void random_bounded_uint8_fill(bitgen_t *bitgen_state, uint8_t off, + uint8_t rng, npy_intp cnt, + bool use_masked, uint8_t *out); +DECLDIR void random_bounded_bool_fill(bitgen_t *bitgen_state, npy_bool off, + npy_bool rng, npy_intp cnt, + bool use_masked, npy_bool *out); + +DECLDIR void random_multinomial(bitgen_t *bitgen_state, RAND_INT_TYPE n, RAND_INT_TYPE *mnix, + double *pix, npy_intp d, binomial_t *binomial); + +/* multivariate hypergeometric, "count" method */ +DECLDIR int random_multivariate_hypergeometric_count(bitgen_t *bitgen_state, + int64_t total, + size_t num_colors, int64_t *colors, + int64_t nsample, + size_t num_variates, int64_t *variates); + +/* multivariate hypergeometric, "marginals" method */ +DECLDIR void random_multivariate_hypergeometric_marginals(bitgen_t *bitgen_state, + int64_t total, + size_t num_colors, int64_t *colors, + int64_t nsample, + size_t num_variates, int64_t *variates); + +/* Common to legacy-distributions.c and distributions.c but not exported */ + +RAND_INT_TYPE random_binomial_btpe(bitgen_t *bitgen_state, + RAND_INT_TYPE n, + double p, + binomial_t *binomial); +RAND_INT_TYPE random_binomial_inversion(bitgen_t *bitgen_state, + RAND_INT_TYPE n, + double p, + binomial_t *binomial); +double random_loggam(double x); +static NPY_INLINE double next_double(bitgen_t *bitgen_state) { + return bitgen_state->next_double(bitgen_state->state); +} + +#ifdef __cplusplus +} +#endif + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_RANDOM_DISTRIBUTIONS_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/ufunc_api.txt b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/ufunc_api.txt new file mode 100644 index 0000000..20a8a75 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/ufunc_api.txt @@ -0,0 +1,335 @@ + +================= +NumPy Ufunc C-API +================= +:: + + PyObject * + PyUFunc_FromFuncAndData(PyUFuncGenericFunction *func, void + **data, char *types, int ntypes, int nin, int + nout, int identity, const char *name, const + char *doc, int unused) + + +:: + + int + PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc, int + usertype, PyUFuncGenericFunction + function, const int *arg_types, void + *data) + + +:: + + int + PyUFunc_GenericFunction(PyUFuncObject *NPY_UNUSED(ufunc) , PyObject + *NPY_UNUSED(args) , PyObject *NPY_UNUSED(kwds) + , PyArrayObject **NPY_UNUSED(op) ) + + +:: + + void + PyUFunc_f_f_As_d_d(char **args, npy_intp const *dimensions, npy_intp + const *steps, void *func) + + +:: + + void + PyUFunc_d_d(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_f_f(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_g_g(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_F_F_As_D_D(char **args, npy_intp const *dimensions, npy_intp + const *steps, void *func) + + +:: + + void + PyUFunc_F_F(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_D_D(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_G_G(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_O_O(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_ff_f_As_dd_d(char **args, npy_intp const *dimensions, npy_intp + const *steps, void *func) + + +:: + + void + PyUFunc_ff_f(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_dd_d(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_gg_g(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_FF_F_As_DD_D(char **args, npy_intp const *dimensions, npy_intp + const *steps, void *func) + + +:: + + void + PyUFunc_DD_D(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_FF_F(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_GG_G(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_OO_O(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_O_O_method(char **args, npy_intp const *dimensions, npy_intp + const *steps, void *func) + + +:: + + void + PyUFunc_OO_O_method(char **args, npy_intp const *dimensions, npy_intp + const *steps, void *func) + + +:: + + void + PyUFunc_On_Om(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + int + PyUFunc_GetPyValues(char *name, int *bufsize, int *errmask, PyObject + **errobj) + + +On return, if errobj is populated with a non-NULL value, the caller +owns a new reference to errobj. + +:: + + int + PyUFunc_checkfperr(int errmask, PyObject *errobj, int *first) + + +:: + + void + PyUFunc_clearfperr() + + +:: + + int + PyUFunc_getfperr(void ) + + +:: + + int + PyUFunc_handlefperr(int errmask, PyObject *errobj, int retstatus, int + *first) + + +:: + + int + PyUFunc_ReplaceLoopBySignature(PyUFuncObject + *func, PyUFuncGenericFunction + newfunc, const int + *signature, PyUFuncGenericFunction + *oldfunc) + + +:: + + PyObject * + PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void + **data, char *types, int + ntypes, int nin, int nout, int + identity, const char *name, const + char *doc, int unused, const char + *signature) + + +:: + + int + PyUFunc_SetUsesArraysAsData(void **NPY_UNUSED(data) , size_t + NPY_UNUSED(i) ) + + +:: + + void + PyUFunc_e_e(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_e_e_As_f_f(char **args, npy_intp const *dimensions, npy_intp + const *steps, void *func) + + +:: + + void + PyUFunc_e_e_As_d_d(char **args, npy_intp const *dimensions, npy_intp + const *steps, void *func) + + +:: + + void + PyUFunc_ee_e(char **args, npy_intp const *dimensions, npy_intp const + *steps, void *func) + + +:: + + void + PyUFunc_ee_e_As_ff_f(char **args, npy_intp const *dimensions, npy_intp + const *steps, void *func) + + +:: + + void + PyUFunc_ee_e_As_dd_d(char **args, npy_intp const *dimensions, npy_intp + const *steps, void *func) + + +:: + + int + PyUFunc_DefaultTypeResolver(PyUFuncObject *ufunc, NPY_CASTING + casting, PyArrayObject + **operands, PyObject + *type_tup, PyArray_Descr **out_dtypes) + + +This function applies the default type resolution rules +for the provided ufunc. + +Returns 0 on success, -1 on error. + +:: + + int + PyUFunc_ValidateCasting(PyUFuncObject *ufunc, NPY_CASTING + casting, PyArrayObject + **operands, PyArray_Descr **dtypes) + + +Validates that the input operands can be cast to +the input types, and the output types can be cast to +the output operands where provided. + +Returns 0 on success, -1 (with exception raised) on validation failure. + +:: + + int + PyUFunc_RegisterLoopForDescr(PyUFuncObject *ufunc, PyArray_Descr + *user_dtype, PyUFuncGenericFunction + function, PyArray_Descr + **arg_dtypes, void *data) + + +:: + + PyObject * + PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction + *func, void + **data, char + *types, int ntypes, int + nin, int nout, int + identity, const char + *name, const char + *doc, const int + unused, const char + *signature, PyObject + *identity_value) + + diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/ufuncobject.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/ufuncobject.h new file mode 100644 index 0000000..bb06331 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/ufuncobject.h @@ -0,0 +1,357 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_UFUNCOBJECT_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_UFUNCOBJECT_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The legacy generic inner loop for a standard element-wise or + * generalized ufunc. + */ +typedef void (*PyUFuncGenericFunction) + (char **args, + npy_intp const *dimensions, + npy_intp const *strides, + void *innerloopdata); + +/* + * The most generic one-dimensional inner loop for + * a masked standard element-wise ufunc. "Masked" here means that it skips + * doing calculations on any items for which the maskptr array has a true + * value. + */ +typedef void (PyUFunc_MaskedStridedInnerLoopFunc)( + char **dataptrs, npy_intp *strides, + char *maskptr, npy_intp mask_stride, + npy_intp count, + NpyAuxData *innerloopdata); + +/* Forward declaration for the type resolver and loop selector typedefs */ +struct _tagPyUFuncObject; + +/* + * Given the operands for calling a ufunc, should determine the + * calculation input and output data types and return an inner loop function. + * This function should validate that the casting rule is being followed, + * and fail if it is not. + * + * For backwards compatibility, the regular type resolution function does not + * support auxiliary data with object semantics. The type resolution call + * which returns a masked generic function returns a standard NpyAuxData + * object, for which the NPY_AUXDATA_FREE and NPY_AUXDATA_CLONE macros + * work. + * + * ufunc: The ufunc object. + * casting: The 'casting' parameter provided to the ufunc. + * operands: An array of length (ufunc->nin + ufunc->nout), + * with the output parameters possibly NULL. + * type_tup: Either NULL, or the type_tup passed to the ufunc. + * out_dtypes: An array which should be populated with new + * references to (ufunc->nin + ufunc->nout) new + * dtypes, one for each input and output. These + * dtypes should all be in native-endian format. + * + * Should return 0 on success, -1 on failure (with exception set), + * or -2 if Py_NotImplemented should be returned. + */ +typedef int (PyUFunc_TypeResolutionFunc)( + struct _tagPyUFuncObject *ufunc, + NPY_CASTING casting, + PyArrayObject **operands, + PyObject *type_tup, + PyArray_Descr **out_dtypes); + +/* + * Legacy loop selector. (This should NOT normally be used and we can expect + * that only the `PyUFunc_DefaultLegacyInnerLoopSelector` is ever set). + * However, unlike the masked version, it probably still works. + * + * ufunc: The ufunc object. + * dtypes: An array which has been populated with dtypes, + * in most cases by the type resolution function + * for the same ufunc. + * out_innerloop: Should be populated with the correct ufunc inner + * loop for the given type. + * out_innerloopdata: Should be populated with the void* data to + * be passed into the out_innerloop function. + * out_needs_api: If the inner loop needs to use the Python API, + * should set the to 1, otherwise should leave + * this untouched. + */ +typedef int (PyUFunc_LegacyInnerLoopSelectionFunc)( + struct _tagPyUFuncObject *ufunc, + PyArray_Descr **dtypes, + PyUFuncGenericFunction *out_innerloop, + void **out_innerloopdata, + int *out_needs_api); + + +typedef struct _tagPyUFuncObject { + PyObject_HEAD + /* + * nin: Number of inputs + * nout: Number of outputs + * nargs: Always nin + nout (Why is it stored?) + */ + int nin, nout, nargs; + + /* + * Identity for reduction, any of PyUFunc_One, PyUFunc_Zero + * PyUFunc_MinusOne, PyUFunc_None, PyUFunc_ReorderableNone, + * PyUFunc_IdentityValue. + */ + int identity; + + /* Array of one-dimensional core loops */ + PyUFuncGenericFunction *functions; + /* Array of funcdata that gets passed into the functions */ + void **data; + /* The number of elements in 'functions' and 'data' */ + int ntypes; + + /* Used to be unused field 'check_return' */ + int reserved1; + + /* The name of the ufunc */ + const char *name; + + /* Array of type numbers, of size ('nargs' * 'ntypes') */ + char *types; + + /* Documentation string */ + const char *doc; + + void *ptr; + PyObject *obj; + PyObject *userloops; + + /* generalized ufunc parameters */ + + /* 0 for scalar ufunc; 1 for generalized ufunc */ + int core_enabled; + /* number of distinct dimension names in signature */ + int core_num_dim_ix; + + /* + * dimension indices of input/output argument k are stored in + * core_dim_ixs[core_offsets[k]..core_offsets[k]+core_num_dims[k]-1] + */ + + /* numbers of core dimensions of each argument */ + int *core_num_dims; + /* + * dimension indices in a flatted form; indices + * are in the range of [0,core_num_dim_ix) + */ + int *core_dim_ixs; + /* + * positions of 1st core dimensions of each + * argument in core_dim_ixs, equivalent to cumsum(core_num_dims) + */ + int *core_offsets; + /* signature string for printing purpose */ + char *core_signature; + + /* + * A function which resolves the types and fills an array + * with the dtypes for the inputs and outputs. + */ + PyUFunc_TypeResolutionFunc *type_resolver; + /* + * A function which returns an inner loop written for + * NumPy 1.6 and earlier ufuncs. This is for backwards + * compatibility, and may be NULL if inner_loop_selector + * is specified. + */ + PyUFunc_LegacyInnerLoopSelectionFunc *legacy_inner_loop_selector; + /* + * This was blocked off to be the "new" inner loop selector in 1.7, + * but this was never implemented. (This is also why the above + * selector is called the "legacy" selector.) + */ + #ifndef Py_LIMITED_API + vectorcallfunc vectorcall; + #else + void *vectorcall; + #endif + + /* Was previously the `PyUFunc_MaskedInnerLoopSelectionFunc` */ + void *_always_null_previously_masked_innerloop_selector; + + /* + * List of flags for each operand when ufunc is called by nditer object. + * These flags will be used in addition to the default flags for each + * operand set by nditer object. + */ + npy_uint32 *op_flags; + + /* + * List of global flags used when ufunc is called by nditer object. + * These flags will be used in addition to the default global flags + * set by nditer object. + */ + npy_uint32 iter_flags; + + /* New in NPY_API_VERSION 0x0000000D and above */ + + /* + * for each core_num_dim_ix distinct dimension names, + * the possible "frozen" size (-1 if not frozen). + */ + npy_intp *core_dim_sizes; + + /* + * for each distinct core dimension, a set of UFUNC_CORE_DIM* flags + */ + npy_uint32 *core_dim_flags; + + /* Identity for reduction, when identity == PyUFunc_IdentityValue */ + PyObject *identity_value; + + /* New in NPY_API_VERSION 0x0000000F and above */ + + /* New private fields related to dispatching */ + void *_dispatch_cache; + /* A PyListObject of `(tuple of DTypes, ArrayMethod/Promoter)` */ + PyObject *_loops; +} PyUFuncObject; + +#include "arrayobject.h" +/* Generalized ufunc; 0x0001 reserved for possible use as CORE_ENABLED */ +/* the core dimension's size will be determined by the operands. */ +#define UFUNC_CORE_DIM_SIZE_INFERRED 0x0002 +/* the core dimension may be absent */ +#define UFUNC_CORE_DIM_CAN_IGNORE 0x0004 +/* flags inferred during execution */ +#define UFUNC_CORE_DIM_MISSING 0x00040000 + +#define UFUNC_ERR_IGNORE 0 +#define UFUNC_ERR_WARN 1 +#define UFUNC_ERR_RAISE 2 +#define UFUNC_ERR_CALL 3 +#define UFUNC_ERR_PRINT 4 +#define UFUNC_ERR_LOG 5 + + /* Python side integer mask */ + +#define UFUNC_MASK_DIVIDEBYZERO 0x07 +#define UFUNC_MASK_OVERFLOW 0x3f +#define UFUNC_MASK_UNDERFLOW 0x1ff +#define UFUNC_MASK_INVALID 0xfff + +#define UFUNC_SHIFT_DIVIDEBYZERO 0 +#define UFUNC_SHIFT_OVERFLOW 3 +#define UFUNC_SHIFT_UNDERFLOW 6 +#define UFUNC_SHIFT_INVALID 9 + + +#define UFUNC_OBJ_ISOBJECT 1 +#define UFUNC_OBJ_NEEDS_API 2 + + /* Default user error mode */ +#define UFUNC_ERR_DEFAULT \ + (UFUNC_ERR_WARN << UFUNC_SHIFT_DIVIDEBYZERO) + \ + (UFUNC_ERR_WARN << UFUNC_SHIFT_OVERFLOW) + \ + (UFUNC_ERR_WARN << UFUNC_SHIFT_INVALID) + +#if NPY_ALLOW_THREADS +#define NPY_LOOP_BEGIN_THREADS do {if (!(loop->obj & UFUNC_OBJ_NEEDS_API)) _save = PyEval_SaveThread();} while (0); +#define NPY_LOOP_END_THREADS do {if (!(loop->obj & UFUNC_OBJ_NEEDS_API)) PyEval_RestoreThread(_save);} while (0); +#else +#define NPY_LOOP_BEGIN_THREADS +#define NPY_LOOP_END_THREADS +#endif + +/* + * UFunc has unit of 0, and the order of operations can be reordered + * This case allows reduction with multiple axes at once. + */ +#define PyUFunc_Zero 0 +/* + * UFunc has unit of 1, and the order of operations can be reordered + * This case allows reduction with multiple axes at once. + */ +#define PyUFunc_One 1 +/* + * UFunc has unit of -1, and the order of operations can be reordered + * This case allows reduction with multiple axes at once. Intended for + * bitwise_and reduction. + */ +#define PyUFunc_MinusOne 2 +/* + * UFunc has no unit, and the order of operations cannot be reordered. + * This case does not allow reduction with multiple axes at once. + */ +#define PyUFunc_None -1 +/* + * UFunc has no unit, and the order of operations can be reordered + * This case allows reduction with multiple axes at once. + */ +#define PyUFunc_ReorderableNone -2 +/* + * UFunc unit is an identity_value, and the order of operations can be reordered + * This case allows reduction with multiple axes at once. + */ +#define PyUFunc_IdentityValue -3 + + +#define UFUNC_REDUCE 0 +#define UFUNC_ACCUMULATE 1 +#define UFUNC_REDUCEAT 2 +#define UFUNC_OUTER 3 + + +typedef struct { + int nin; + int nout; + PyObject *callable; +} PyUFunc_PyFuncData; + +/* A linked-list of function information for + user-defined 1-d loops. + */ +typedef struct _loop1d_info { + PyUFuncGenericFunction func; + void *data; + int *arg_types; + struct _loop1d_info *next; + int nargs; + PyArray_Descr **arg_dtypes; +} PyUFunc_Loop1d; + + +#include "__ufunc_api.h" + +#define UFUNC_PYVALS_NAME "UFUNC_PYVALS" + +/* + * THESE MACROS ARE DEPRECATED. + * Use npy_set_floatstatus_* in the npymath library. + */ +#define UFUNC_FPE_DIVIDEBYZERO NPY_FPE_DIVIDEBYZERO +#define UFUNC_FPE_OVERFLOW NPY_FPE_OVERFLOW +#define UFUNC_FPE_UNDERFLOW NPY_FPE_UNDERFLOW +#define UFUNC_FPE_INVALID NPY_FPE_INVALID + +#define generate_divbyzero_error() npy_set_floatstatus_divbyzero() +#define generate_overflow_error() npy_set_floatstatus_overflow() + + /* Make sure it gets defined if it isn't already */ +#ifndef UFUNC_NOFPE +/* Clear the floating point exception default of Borland C++ */ +#if defined(__BORLANDC__) +#define UFUNC_NOFPE _control87(MCW_EM, MCW_EM); +#else +#define UFUNC_NOFPE +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_UFUNCOBJECT_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/include/numpy/utils.h b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/utils.h new file mode 100644 index 0000000..e2b57f9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/include/numpy/utils.h @@ -0,0 +1,37 @@ +#ifndef NUMPY_CORE_INCLUDE_NUMPY_UTILS_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_UTILS_H_ + +#ifndef __COMP_NPY_UNUSED + #if defined(__GNUC__) + #define __COMP_NPY_UNUSED __attribute__ ((__unused__)) + #elif defined(__ICC) + #define __COMP_NPY_UNUSED __attribute__ ((__unused__)) + #elif defined(__clang__) + #define __COMP_NPY_UNUSED __attribute__ ((unused)) + #else + #define __COMP_NPY_UNUSED + #endif +#endif + +#if defined(__GNUC__) || defined(__ICC) || defined(__clang__) + #define NPY_DECL_ALIGNED(x) __attribute__ ((aligned (x))) +#elif defined(_MSC_VER) + #define NPY_DECL_ALIGNED(x) __declspec(align(x)) +#else + #define NPY_DECL_ALIGNED(x) +#endif + +/* Use this to tag a variable as not used. It will remove unused variable + * warning on support platforms (see __COM_NPY_UNUSED) and mangle the variable + * to avoid accidental use */ +#define NPY_UNUSED(x) (__NPY_UNUSED_TAGGED ## x) __COMP_NPY_UNUSED +#define NPY_EXPAND(x) x + +#define NPY_STRINGIFY(x) #x +#define NPY_TOSTRING(x) NPY_STRINGIFY(x) + +#define NPY_CAT__(a, b) a ## b +#define NPY_CAT_(a, b) NPY_CAT__(a, b) +#define NPY_CAT(a, b) NPY_CAT_(a, b) + +#endif /* NUMPY_CORE_INCLUDE_NUMPY_UTILS_H_ */ diff --git a/.local/lib/python3.8/site-packages/numpy/core/lib/libnpymath.a b/.local/lib/python3.8/site-packages/numpy/core/lib/libnpymath.a new file mode 100644 index 0000000..de83790 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/lib/libnpymath.a differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/lib/npy-pkg-config/mlib.ini b/.local/lib/python3.8/site-packages/numpy/core/lib/npy-pkg-config/mlib.ini new file mode 100644 index 0000000..5840f5e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/lib/npy-pkg-config/mlib.ini @@ -0,0 +1,12 @@ +[meta] +Name = mlib +Description = Math library used with this version of numpy +Version = 1.0 + +[default] +Libs=-lm +Cflags= + +[msvc] +Libs=m.lib +Cflags= diff --git a/.local/lib/python3.8/site-packages/numpy/core/lib/npy-pkg-config/npymath.ini b/.local/lib/python3.8/site-packages/numpy/core/lib/npy-pkg-config/npymath.ini new file mode 100644 index 0000000..3e465ad --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/lib/npy-pkg-config/npymath.ini @@ -0,0 +1,20 @@ +[meta] +Name=npymath +Description=Portable, core math library implementing C99 standard +Version=0.1 + +[variables] +pkgname=numpy.core +prefix=${pkgdir} +libdir=${prefix}/lib +includedir=${prefix}/include + +[default] +Libs=-L${libdir} -lnpymath +Cflags=-I${includedir} +Requires=mlib + +[msvc] +Libs=/LIBPATH:${libdir} npymath.lib +Cflags=/INCLUDE:${includedir} +Requires=mlib diff --git a/.local/lib/python3.8/site-packages/numpy/core/memmap.py b/.local/lib/python3.8/site-packages/numpy/core/memmap.py new file mode 100644 index 0000000..b0d9cb3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/memmap.py @@ -0,0 +1,337 @@ +from contextlib import nullcontext + +import numpy as np +from .numeric import uint8, ndarray, dtype +from numpy.compat import os_fspath, is_pathlib_path +from numpy.core.overrides import set_module + +__all__ = ['memmap'] + +dtypedescr = dtype +valid_filemodes = ["r", "c", "r+", "w+"] +writeable_filemodes = ["r+", "w+"] + +mode_equivalents = { + "readonly":"r", + "copyonwrite":"c", + "readwrite":"r+", + "write":"w+" + } + + +@set_module('numpy') +class memmap(ndarray): + """Create a memory-map to an array stored in a *binary* file on disk. + + Memory-mapped files are used for accessing small segments of large files + on disk, without reading the entire file into memory. NumPy's + memmap's are array-like objects. This differs from Python's ``mmap`` + module, which uses file-like objects. + + This subclass of ndarray has some unpleasant interactions with + some operations, because it doesn't quite fit properly as a subclass. + An alternative to using this subclass is to create the ``mmap`` + object yourself, then create an ndarray with ndarray.__new__ directly, + passing the object created in its 'buffer=' parameter. + + This class may at some point be turned into a factory function + which returns a view into an mmap buffer. + + Flush the memmap instance to write the changes to the file. Currently there + is no API to close the underlying ``mmap``. It is tricky to ensure the + resource is actually closed, since it may be shared between different + memmap instances. + + + Parameters + ---------- + filename : str, file-like object, or pathlib.Path instance + The file name or file object to be used as the array data buffer. + dtype : data-type, optional + The data-type used to interpret the file contents. + Default is `uint8`. + mode : {'r+', 'r', 'w+', 'c'}, optional + The file is opened in this mode: + + +------+-------------------------------------------------------------+ + | 'r' | Open existing file for reading only. | + +------+-------------------------------------------------------------+ + | 'r+' | Open existing file for reading and writing. | + +------+-------------------------------------------------------------+ + | 'w+' | Create or overwrite existing file for reading and writing. | + +------+-------------------------------------------------------------+ + | 'c' | Copy-on-write: assignments affect data in memory, but | + | | changes are not saved to disk. The file on disk is | + | | read-only. | + +------+-------------------------------------------------------------+ + + Default is 'r+'. + offset : int, optional + In the file, array data starts at this offset. Since `offset` is + measured in bytes, it should normally be a multiple of the byte-size + of `dtype`. When ``mode != 'r'``, even positive offsets beyond end of + file are valid; The file will be extended to accommodate the + additional data. By default, ``memmap`` will start at the beginning of + the file, even if ``filename`` is a file pointer ``fp`` and + ``fp.tell() != 0``. + shape : tuple, optional + The desired shape of the array. If ``mode == 'r'`` and the number + of remaining bytes after `offset` is not a multiple of the byte-size + of `dtype`, you must specify `shape`. By default, the returned array + will be 1-D with the number of elements determined by file size + and data-type. + order : {'C', 'F'}, optional + Specify the order of the ndarray memory layout: + :term:`row-major`, C-style or :term:`column-major`, + Fortran-style. This only has an effect if the shape is + greater than 1-D. The default order is 'C'. + + Attributes + ---------- + filename : str or pathlib.Path instance + Path to the mapped file. + offset : int + Offset position in the file. + mode : str + File mode. + + Methods + ------- + flush + Flush any changes in memory to file on disk. + When you delete a memmap object, flush is called first to write + changes to disk. + + + See also + -------- + lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file. + + Notes + ----- + The memmap object can be used anywhere an ndarray is accepted. + Given a memmap ``fp``, ``isinstance(fp, numpy.ndarray)`` returns + ``True``. + + Memory-mapped files cannot be larger than 2GB on 32-bit systems. + + When a memmap causes a file to be created or extended beyond its + current size in the filesystem, the contents of the new part are + unspecified. On systems with POSIX filesystem semantics, the extended + part will be filled with zero bytes. + + Examples + -------- + >>> data = np.arange(12, dtype='float32') + >>> data.resize((3,4)) + + This example uses a temporary file so that doctest doesn't write + files to your directory. You would use a 'normal' filename. + + >>> from tempfile import mkdtemp + >>> import os.path as path + >>> filename = path.join(mkdtemp(), 'newfile.dat') + + Create a memmap with dtype and shape that matches our data: + + >>> fp = np.memmap(filename, dtype='float32', mode='w+', shape=(3,4)) + >>> fp + memmap([[0., 0., 0., 0.], + [0., 0., 0., 0.], + [0., 0., 0., 0.]], dtype=float32) + + Write data to memmap array: + + >>> fp[:] = data[:] + >>> fp + memmap([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.]], dtype=float32) + + >>> fp.filename == path.abspath(filename) + True + + Flushes memory changes to disk in order to read them back + + >>> fp.flush() + + Load the memmap and verify data was stored: + + >>> newfp = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) + >>> newfp + memmap([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.]], dtype=float32) + + Read-only memmap: + + >>> fpr = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) + >>> fpr.flags.writeable + False + + Copy-on-write memmap: + + >>> fpc = np.memmap(filename, dtype='float32', mode='c', shape=(3,4)) + >>> fpc.flags.writeable + True + + It's possible to assign to copy-on-write array, but values are only + written into the memory copy of the array, and not written to disk: + + >>> fpc + memmap([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.]], dtype=float32) + >>> fpc[0,:] = 0 + >>> fpc + memmap([[ 0., 0., 0., 0.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.]], dtype=float32) + + File on disk is unchanged: + + >>> fpr + memmap([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.]], dtype=float32) + + Offset into a memmap: + + >>> fpo = np.memmap(filename, dtype='float32', mode='r', offset=16) + >>> fpo + memmap([ 4., 5., 6., 7., 8., 9., 10., 11.], dtype=float32) + + """ + + __array_priority__ = -100.0 + + def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0, + shape=None, order='C'): + # Import here to minimize 'import numpy' overhead + import mmap + import os.path + try: + mode = mode_equivalents[mode] + except KeyError as e: + if mode not in valid_filemodes: + raise ValueError( + "mode must be one of {!r} (got {!r})" + .format(valid_filemodes + list(mode_equivalents.keys()), mode) + ) from None + + if mode == 'w+' and shape is None: + raise ValueError("shape must be given") + + if hasattr(filename, 'read'): + f_ctx = nullcontext(filename) + else: + f_ctx = open(os_fspath(filename), ('r' if mode == 'c' else mode)+'b') + + with f_ctx as fid: + fid.seek(0, 2) + flen = fid.tell() + descr = dtypedescr(dtype) + _dbytes = descr.itemsize + + if shape is None: + bytes = flen - offset + if bytes % _dbytes: + raise ValueError("Size of available data is not a " + "multiple of the data-type size.") + size = bytes // _dbytes + shape = (size,) + else: + if not isinstance(shape, tuple): + shape = (shape,) + size = np.intp(1) # avoid default choice of np.int_, which might overflow + for k in shape: + size *= k + + bytes = int(offset + size*_dbytes) + + if mode in ('w+', 'r+') and flen < bytes: + fid.seek(bytes - 1, 0) + fid.write(b'\0') + fid.flush() + + if mode == 'c': + acc = mmap.ACCESS_COPY + elif mode == 'r': + acc = mmap.ACCESS_READ + else: + acc = mmap.ACCESS_WRITE + + start = offset - offset % mmap.ALLOCATIONGRANULARITY + bytes -= start + array_offset = offset - start + mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start) + + self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm, + offset=array_offset, order=order) + self._mmap = mm + self.offset = offset + self.mode = mode + + if is_pathlib_path(filename): + # special case - if we were constructed with a pathlib.path, + # then filename is a path object, not a string + self.filename = filename.resolve() + elif hasattr(fid, "name") and isinstance(fid.name, str): + # py3 returns int for TemporaryFile().name + self.filename = os.path.abspath(fid.name) + # same as memmap copies (e.g. memmap + 1) + else: + self.filename = None + + return self + + def __array_finalize__(self, obj): + if hasattr(obj, '_mmap') and np.may_share_memory(self, obj): + self._mmap = obj._mmap + self.filename = obj.filename + self.offset = obj.offset + self.mode = obj.mode + else: + self._mmap = None + self.filename = None + self.offset = None + self.mode = None + + def flush(self): + """ + Write any changes in the array to the file on disk. + + For further information, see `memmap`. + + Parameters + ---------- + None + + See Also + -------- + memmap + + """ + if self.base is not None and hasattr(self.base, 'flush'): + self.base.flush() + + def __array_wrap__(self, arr, context=None): + arr = super().__array_wrap__(arr, context) + + # Return a memmap if a memmap was given as the output of the + # ufunc. Leave the arr class unchanged if self is not a memmap + # to keep original memmap subclasses behavior + if self is arr or type(self) is not memmap: + return arr + # Return scalar instead of 0d memmap, e.g. for np.sum with + # axis=None + if arr.shape == (): + return arr[()] + # Return ndarray otherwise + return arr.view(np.ndarray) + + def __getitem__(self, index): + res = super().__getitem__(index) + if type(res) is memmap and res._mmap is None: + return res.view(type=ndarray) + return res diff --git a/.local/lib/python3.8/site-packages/numpy/core/memmap.pyi b/.local/lib/python3.8/site-packages/numpy/core/memmap.pyi new file mode 100644 index 0000000..ba595bf --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/memmap.pyi @@ -0,0 +1,5 @@ +from typing import List + +from numpy import memmap as memmap + +__all__: List[str] diff --git a/.local/lib/python3.8/site-packages/numpy/core/multiarray.py b/.local/lib/python3.8/site-packages/numpy/core/multiarray.py new file mode 100644 index 0000000..f88d759 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/multiarray.py @@ -0,0 +1,1690 @@ +""" +Create the numpy.core.multiarray namespace for backward compatibility. In v1.16 +the multiarray and umath c-extension modules were merged into a single +_multiarray_umath extension module. So we replicate the old namespace +by importing from the extension module. + +""" + +import functools +from . import overrides +from . import _multiarray_umath +from ._multiarray_umath import * # noqa: F403 +# These imports are needed for backward compatibility, +# do not change them. issue gh-15518 +# _get_ndarray_c_version is semi-public, on purpose not added to __all__ +from ._multiarray_umath import ( + _fastCopyAndTranspose, _flagdict, _from_dlpack, _insert, _reconstruct, + _vec_string, _ARRAY_API, _monotonicity, _get_ndarray_c_version, + _set_madvise_hugepage, + ) + +__all__ = [ + '_ARRAY_API', 'ALLOW_THREADS', 'BUFSIZE', 'CLIP', 'DATETIMEUNITS', + 'ITEM_HASOBJECT', 'ITEM_IS_POINTER', 'LIST_PICKLE', 'MAXDIMS', + 'MAY_SHARE_BOUNDS', 'MAY_SHARE_EXACT', 'NEEDS_INIT', 'NEEDS_PYAPI', + 'RAISE', 'USE_GETITEM', 'USE_SETITEM', 'WRAP', '_fastCopyAndTranspose', + '_flagdict', '_from_dlpack', '_insert', '_reconstruct', '_vec_string', + '_monotonicity', 'add_docstring', 'arange', 'array', 'asarray', + 'asanyarray', 'ascontiguousarray', 'asfortranarray', 'bincount', + 'broadcast', 'busday_count', 'busday_offset', 'busdaycalendar', 'can_cast', + 'compare_chararrays', 'concatenate', 'copyto', 'correlate', 'correlate2', + 'count_nonzero', 'c_einsum', 'datetime_as_string', 'datetime_data', + 'dot', 'dragon4_positional', 'dragon4_scientific', 'dtype', + 'empty', 'empty_like', 'error', 'flagsobj', 'flatiter', 'format_longfloat', + 'frombuffer', 'fromfile', 'fromiter', 'fromstring', + 'get_handler_name', 'get_handler_version', 'inner', 'interp', + 'interp_complex', 'is_busday', 'lexsort', 'matmul', 'may_share_memory', + 'min_scalar_type', 'ndarray', 'nditer', 'nested_iters', + 'normalize_axis_index', 'packbits', 'promote_types', 'putmask', + 'ravel_multi_index', 'result_type', 'scalar', 'set_datetimeparse_function', + 'set_legacy_print_mode', 'set_numeric_ops', 'set_string_function', + 'set_typeDict', 'shares_memory', 'tracemalloc_domain', 'typeinfo', + 'unpackbits', 'unravel_index', 'vdot', 'where', 'zeros'] + +# For backward compatibility, make sure pickle imports these functions from here +_reconstruct.__module__ = 'numpy.core.multiarray' +scalar.__module__ = 'numpy.core.multiarray' + + +_from_dlpack.__module__ = 'numpy' +arange.__module__ = 'numpy' +array.__module__ = 'numpy' +asarray.__module__ = 'numpy' +asanyarray.__module__ = 'numpy' +ascontiguousarray.__module__ = 'numpy' +asfortranarray.__module__ = 'numpy' +datetime_data.__module__ = 'numpy' +empty.__module__ = 'numpy' +frombuffer.__module__ = 'numpy' +fromfile.__module__ = 'numpy' +fromiter.__module__ = 'numpy' +frompyfunc.__module__ = 'numpy' +fromstring.__module__ = 'numpy' +geterrobj.__module__ = 'numpy' +may_share_memory.__module__ = 'numpy' +nested_iters.__module__ = 'numpy' +promote_types.__module__ = 'numpy' +set_numeric_ops.__module__ = 'numpy' +seterrobj.__module__ = 'numpy' +zeros.__module__ = 'numpy' + + +# We can't verify dispatcher signatures because NumPy's C functions don't +# support introspection. +array_function_from_c_func_and_dispatcher = functools.partial( + overrides.array_function_from_dispatcher, + module='numpy', docs_from_dispatcher=True, verify=False) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.empty_like) +def empty_like(prototype, dtype=None, order=None, subok=None, shape=None): + """ + empty_like(prototype, dtype=None, order='K', subok=True, shape=None) + + Return a new array with the same shape and type as a given array. + + Parameters + ---------- + prototype : array_like + The shape and data-type of `prototype` define these same attributes + of the returned array. + dtype : data-type, optional + Overrides the data type of the result. + + .. versionadded:: 1.6.0 + order : {'C', 'F', 'A', or 'K'}, optional + Overrides the memory layout of the result. 'C' means C-order, + 'F' means F-order, 'A' means 'F' if `prototype` is Fortran + contiguous, 'C' otherwise. 'K' means match the layout of `prototype` + as closely as possible. + + .. versionadded:: 1.6.0 + subok : bool, optional. + If True, then the newly created array will use the sub-class + type of `prototype`, otherwise it will be a base-class array. Defaults + to True. + shape : int or sequence of ints, optional. + Overrides the shape of the result. If order='K' and the number of + dimensions is unchanged, will try to keep order, otherwise, + order='C' is implied. + + .. versionadded:: 1.17.0 + + Returns + ------- + out : ndarray + Array of uninitialized (arbitrary) data with the same + shape and type as `prototype`. + + See Also + -------- + ones_like : Return an array of ones with shape and type of input. + zeros_like : Return an array of zeros with shape and type of input. + full_like : Return a new array with shape of input filled with value. + empty : Return a new uninitialized array. + + Notes + ----- + This function does *not* initialize the returned array; to do that use + `zeros_like` or `ones_like` instead. It may be marginally faster than + the functions that do set the array values. + + Examples + -------- + >>> a = ([1,2,3], [4,5,6]) # a is array-like + >>> np.empty_like(a) + array([[-1073741821, -1073741821, 3], # uninitialized + [ 0, 0, -1073741821]]) + >>> a = np.array([[1., 2., 3.],[4.,5.,6.]]) + >>> np.empty_like(a) + array([[ -2.00000715e+000, 1.48219694e-323, -2.00000572e+000], # uninitialized + [ 4.38791518e-305, -2.00000715e+000, 4.17269252e-309]]) + + """ + return (prototype,) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.concatenate) +def concatenate(arrays, axis=None, out=None, *, dtype=None, casting=None): + """ + concatenate((a1, a2, ...), axis=0, out=None, dtype=None, casting="same_kind") + + Join a sequence of arrays along an existing axis. + + Parameters + ---------- + a1, a2, ... : sequence of array_like + The arrays must have the same shape, except in the dimension + corresponding to `axis` (the first, by default). + axis : int, optional + The axis along which the arrays will be joined. If axis is None, + arrays are flattened before use. Default is 0. + out : ndarray, optional + If provided, the destination to place the result. The shape must be + correct, matching that of what concatenate would have returned if no + out argument were specified. + dtype : str or dtype + If provided, the destination array will have this dtype. Cannot be + provided together with `out`. + + .. versionadded:: 1.20.0 + + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + Controls what kind of data casting may occur. Defaults to 'same_kind'. + + .. versionadded:: 1.20.0 + + Returns + ------- + res : ndarray + The concatenated array. + + See Also + -------- + ma.concatenate : Concatenate function that preserves input masks. + array_split : Split an array into multiple sub-arrays of equal or + near-equal size. + split : Split array into a list of multiple sub-arrays of equal size. + hsplit : Split array into multiple sub-arrays horizontally (column wise). + vsplit : Split array into multiple sub-arrays vertically (row wise). + dsplit : Split array into multiple sub-arrays along the 3rd axis (depth). + stack : Stack a sequence of arrays along a new axis. + block : Assemble arrays from blocks. + hstack : Stack arrays in sequence horizontally (column wise). + vstack : Stack arrays in sequence vertically (row wise). + dstack : Stack arrays in sequence depth wise (along third dimension). + column_stack : Stack 1-D arrays as columns into a 2-D array. + + Notes + ----- + When one or more of the arrays to be concatenated is a MaskedArray, + this function will return a MaskedArray object instead of an ndarray, + but the input masks are *not* preserved. In cases where a MaskedArray + is expected as input, use the ma.concatenate function from the masked + array module instead. + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> b = np.array([[5, 6]]) + >>> np.concatenate((a, b), axis=0) + array([[1, 2], + [3, 4], + [5, 6]]) + >>> np.concatenate((a, b.T), axis=1) + array([[1, 2, 5], + [3, 4, 6]]) + >>> np.concatenate((a, b), axis=None) + array([1, 2, 3, 4, 5, 6]) + + This function will not preserve masking of MaskedArray inputs. + + >>> a = np.ma.arange(3) + >>> a[1] = np.ma.masked + >>> b = np.arange(2, 5) + >>> a + masked_array(data=[0, --, 2], + mask=[False, True, False], + fill_value=999999) + >>> b + array([2, 3, 4]) + >>> np.concatenate([a, b]) + masked_array(data=[0, 1, 2, 2, 3, 4], + mask=False, + fill_value=999999) + >>> np.ma.concatenate([a, b]) + masked_array(data=[0, --, 2, 2, 3, 4], + mask=[False, True, False, False, False, False], + fill_value=999999) + + """ + if out is not None: + # optimize for the typical case where only arrays is provided + arrays = list(arrays) + arrays.append(out) + return arrays + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.inner) +def inner(a, b): + """ + inner(a, b, /) + + Inner product of two arrays. + + Ordinary inner product of vectors for 1-D arrays (without complex + conjugation), in higher dimensions a sum product over the last axes. + + Parameters + ---------- + a, b : array_like + If `a` and `b` are nonscalar, their last dimensions must match. + + Returns + ------- + out : ndarray + If `a` and `b` are both + scalars or both 1-D arrays then a scalar is returned; otherwise + an array is returned. + ``out.shape = (*a.shape[:-1], *b.shape[:-1])`` + + Raises + ------ + ValueError + If both `a` and `b` are nonscalar and their last dimensions have + different sizes. + + See Also + -------- + tensordot : Sum products over arbitrary axes. + dot : Generalised matrix product, using second last dimension of `b`. + einsum : Einstein summation convention. + + Notes + ----- + For vectors (1-D arrays) it computes the ordinary inner-product:: + + np.inner(a, b) = sum(a[:]*b[:]) + + More generally, if `ndim(a) = r > 0` and `ndim(b) = s > 0`:: + + np.inner(a, b) = np.tensordot(a, b, axes=(-1,-1)) + + or explicitly:: + + np.inner(a, b)[i0,...,ir-2,j0,...,js-2] + = sum(a[i0,...,ir-2,:]*b[j0,...,js-2,:]) + + In addition `a` or `b` may be scalars, in which case:: + + np.inner(a,b) = a*b + + Examples + -------- + Ordinary inner product for vectors: + + >>> a = np.array([1,2,3]) + >>> b = np.array([0,1,0]) + >>> np.inner(a, b) + 2 + + Some multidimensional examples: + + >>> a = np.arange(24).reshape((2,3,4)) + >>> b = np.arange(4) + >>> c = np.inner(a, b) + >>> c.shape + (2, 3) + >>> c + array([[ 14, 38, 62], + [ 86, 110, 134]]) + + >>> a = np.arange(2).reshape((1,1,2)) + >>> b = np.arange(6).reshape((3,2)) + >>> c = np.inner(a, b) + >>> c.shape + (1, 1, 3) + >>> c + array([[[1, 3, 5]]]) + + An example where `b` is a scalar: + + >>> np.inner(np.eye(2), 7) + array([[7., 0.], + [0., 7.]]) + + """ + return (a, b) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.where) +def where(condition, x=None, y=None): + """ + where(condition, [x, y], /) + + Return elements chosen from `x` or `y` depending on `condition`. + + .. note:: + When only `condition` is provided, this function is a shorthand for + ``np.asarray(condition).nonzero()``. Using `nonzero` directly should be + preferred, as it behaves correctly for subclasses. The rest of this + documentation covers only the case where all three arguments are + provided. + + Parameters + ---------- + condition : array_like, bool + Where True, yield `x`, otherwise yield `y`. + x, y : array_like + Values from which to choose. `x`, `y` and `condition` need to be + broadcastable to some shape. + + Returns + ------- + out : ndarray + An array with elements from `x` where `condition` is True, and elements + from `y` elsewhere. + + See Also + -------- + choose + nonzero : The function that is called when x and y are omitted + + Notes + ----- + If all the arrays are 1-D, `where` is equivalent to:: + + [xv if c else yv + for c, xv, yv in zip(condition, x, y)] + + Examples + -------- + >>> a = np.arange(10) + >>> a + array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + >>> np.where(a < 5, a, 10*a) + array([ 0, 1, 2, 3, 4, 50, 60, 70, 80, 90]) + + This can be used on multidimensional arrays too: + + >>> np.where([[True, False], [True, True]], + ... [[1, 2], [3, 4]], + ... [[9, 8], [7, 6]]) + array([[1, 8], + [3, 4]]) + + The shapes of x, y, and the condition are broadcast together: + + >>> x, y = np.ogrid[:3, :4] + >>> np.where(x < y, x, 10 + y) # both x and 10+y are broadcast + array([[10, 0, 0, 0], + [10, 11, 1, 1], + [10, 11, 12, 2]]) + + >>> a = np.array([[0, 1, 2], + ... [0, 2, 4], + ... [0, 3, 6]]) + >>> np.where(a < 4, a, -1) # -1 is broadcast + array([[ 0, 1, 2], + [ 0, 2, -1], + [ 0, 3, -1]]) + """ + return (condition, x, y) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.lexsort) +def lexsort(keys, axis=None): + """ + lexsort(keys, axis=-1) + + Perform an indirect stable sort using a sequence of keys. + + Given multiple sorting keys, which can be interpreted as columns in a + spreadsheet, lexsort returns an array of integer indices that describes + the sort order by multiple columns. The last key in the sequence is used + for the primary sort order, the second-to-last key for the secondary sort + order, and so on. The keys argument must be a sequence of objects that + can be converted to arrays of the same shape. If a 2D array is provided + for the keys argument, its rows are interpreted as the sorting keys and + sorting is according to the last row, second last row etc. + + Parameters + ---------- + keys : (k, N) array or tuple containing k (N,)-shaped sequences + The `k` different "columns" to be sorted. The last column (or row if + `keys` is a 2D array) is the primary sort key. + axis : int, optional + Axis to be indirectly sorted. By default, sort over the last axis. + + Returns + ------- + indices : (N,) ndarray of ints + Array of indices that sort the keys along the specified axis. + + See Also + -------- + argsort : Indirect sort. + ndarray.sort : In-place sort. + sort : Return a sorted copy of an array. + + Examples + -------- + Sort names: first by surname, then by name. + + >>> surnames = ('Hertz', 'Galilei', 'Hertz') + >>> first_names = ('Heinrich', 'Galileo', 'Gustav') + >>> ind = np.lexsort((first_names, surnames)) + >>> ind + array([1, 2, 0]) + + >>> [surnames[i] + ", " + first_names[i] for i in ind] + ['Galilei, Galileo', 'Hertz, Gustav', 'Hertz, Heinrich'] + + Sort two columns of numbers: + + >>> a = [1,5,1,4,3,4,4] # First column + >>> b = [9,4,0,4,0,2,1] # Second column + >>> ind = np.lexsort((b,a)) # Sort by a, then by b + >>> ind + array([2, 0, 4, 6, 5, 3, 1]) + + >>> [(a[i],b[i]) for i in ind] + [(1, 0), (1, 9), (3, 0), (4, 1), (4, 2), (4, 4), (5, 4)] + + Note that sorting is first according to the elements of ``a``. + Secondary sorting is according to the elements of ``b``. + + A normal ``argsort`` would have yielded: + + >>> [(a[i],b[i]) for i in np.argsort(a)] + [(1, 9), (1, 0), (3, 0), (4, 4), (4, 2), (4, 1), (5, 4)] + + Structured arrays are sorted lexically by ``argsort``: + + >>> x = np.array([(1,9), (5,4), (1,0), (4,4), (3,0), (4,2), (4,1)], + ... dtype=np.dtype([('x', int), ('y', int)])) + + >>> np.argsort(x) # or np.argsort(x, order=('x', 'y')) + array([2, 0, 4, 6, 5, 3, 1]) + + """ + if isinstance(keys, tuple): + return keys + else: + return (keys,) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.can_cast) +def can_cast(from_, to, casting=None): + """ + can_cast(from_, to, casting='safe') + + Returns True if cast between data types can occur according to the + casting rule. If from is a scalar or array scalar, also returns + True if the scalar value can be cast without overflow or truncation + to an integer. + + Parameters + ---------- + from_ : dtype, dtype specifier, scalar, or array + Data type, scalar, or array to cast from. + to : dtype or dtype specifier + Data type to cast to. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + Controls what kind of data casting may occur. + + * 'no' means the data types should not be cast at all. + * 'equiv' means only byte-order changes are allowed. + * 'safe' means only casts which can preserve values are allowed. + * 'same_kind' means only safe casts or casts within a kind, + like float64 to float32, are allowed. + * 'unsafe' means any data conversions may be done. + + Returns + ------- + out : bool + True if cast can occur according to the casting rule. + + Notes + ----- + .. versionchanged:: 1.17.0 + Casting between a simple data type and a structured one is possible only + for "unsafe" casting. Casting to multiple fields is allowed, but + casting from multiple fields is not. + + .. versionchanged:: 1.9.0 + Casting from numeric to string types in 'safe' casting mode requires + that the string dtype length is long enough to store the maximum + integer/float value converted. + + See also + -------- + dtype, result_type + + Examples + -------- + Basic examples + + >>> np.can_cast(np.int32, np.int64) + True + >>> np.can_cast(np.float64, complex) + True + >>> np.can_cast(complex, float) + False + + >>> np.can_cast('i8', 'f8') + True + >>> np.can_cast('i8', 'f4') + False + >>> np.can_cast('i4', 'S4') + False + + Casting scalars + + >>> np.can_cast(100, 'i1') + True + >>> np.can_cast(150, 'i1') + False + >>> np.can_cast(150, 'u1') + True + + >>> np.can_cast(3.5e100, np.float32) + False + >>> np.can_cast(1000.0, np.float32) + True + + Array scalar checks the value, array does not + + >>> np.can_cast(np.array(1000.0), np.float32) + True + >>> np.can_cast(np.array([1000.0]), np.float32) + False + + Using the casting rules + + >>> np.can_cast('i8', 'i8', 'no') + True + >>> np.can_cast('i8', 'no') + False + + >>> np.can_cast('i8', 'equiv') + True + >>> np.can_cast('i8', 'equiv') + False + + >>> np.can_cast('i8', 'safe') + True + >>> np.can_cast('i4', 'safe') + False + + >>> np.can_cast('i4', 'same_kind') + True + >>> np.can_cast('u4', 'same_kind') + False + + >>> np.can_cast('u4', 'unsafe') + True + + """ + return (from_,) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.min_scalar_type) +def min_scalar_type(a): + """ + min_scalar_type(a, /) + + For scalar ``a``, returns the data type with the smallest size + and smallest scalar kind which can hold its value. For non-scalar + array ``a``, returns the vector's dtype unmodified. + + Floating point values are not demoted to integers, + and complex values are not demoted to floats. + + Parameters + ---------- + a : scalar or array_like + The value whose minimal data type is to be found. + + Returns + ------- + out : dtype + The minimal data type. + + Notes + ----- + .. versionadded:: 1.6.0 + + See Also + -------- + result_type, promote_types, dtype, can_cast + + Examples + -------- + >>> np.min_scalar_type(10) + dtype('uint8') + + >>> np.min_scalar_type(-260) + dtype('int16') + + >>> np.min_scalar_type(3.1) + dtype('float16') + + >>> np.min_scalar_type(1e50) + dtype('float64') + + >>> np.min_scalar_type(np.arange(4,dtype='f8')) + dtype('float64') + + """ + return (a,) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.result_type) +def result_type(*arrays_and_dtypes): + """ + result_type(*arrays_and_dtypes) + + Returns the type that results from applying the NumPy + type promotion rules to the arguments. + + Type promotion in NumPy works similarly to the rules in languages + like C++, with some slight differences. When both scalars and + arrays are used, the array's type takes precedence and the actual value + of the scalar is taken into account. + + For example, calculating 3*a, where a is an array of 32-bit floats, + intuitively should result in a 32-bit float output. If the 3 is a + 32-bit integer, the NumPy rules indicate it can't convert losslessly + into a 32-bit float, so a 64-bit float should be the result type. + By examining the value of the constant, '3', we see that it fits in + an 8-bit integer, which can be cast losslessly into the 32-bit float. + + Parameters + ---------- + arrays_and_dtypes : list of arrays and dtypes + The operands of some operation whose result type is needed. + + Returns + ------- + out : dtype + The result type. + + See also + -------- + dtype, promote_types, min_scalar_type, can_cast + + Notes + ----- + .. versionadded:: 1.6.0 + + The specific algorithm used is as follows. + + Categories are determined by first checking which of boolean, + integer (int/uint), or floating point (float/complex) the maximum + kind of all the arrays and the scalars are. + + If there are only scalars or the maximum category of the scalars + is higher than the maximum category of the arrays, + the data types are combined with :func:`promote_types` + to produce the return value. + + Otherwise, `min_scalar_type` is called on each array, and + the resulting data types are all combined with :func:`promote_types` + to produce the return value. + + The set of int values is not a subset of the uint values for types + with the same number of bits, something not reflected in + :func:`min_scalar_type`, but handled as a special case in `result_type`. + + Examples + -------- + >>> np.result_type(3, np.arange(7, dtype='i1')) + dtype('int8') + + >>> np.result_type('i4', 'c8') + dtype('complex128') + + >>> np.result_type(3.0, -2) + dtype('float64') + + """ + return arrays_and_dtypes + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.dot) +def dot(a, b, out=None): + """ + dot(a, b, out=None) + + Dot product of two arrays. Specifically, + + - If both `a` and `b` are 1-D arrays, it is inner product of vectors + (without complex conjugation). + + - If both `a` and `b` are 2-D arrays, it is matrix multiplication, + but using :func:`matmul` or ``a @ b`` is preferred. + + - If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply` + and using ``numpy.multiply(a, b)`` or ``a * b`` is preferred. + + - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over + the last axis of `a` and `b`. + + - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a + sum product over the last axis of `a` and the second-to-last axis of `b`:: + + dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m]) + + Parameters + ---------- + a : array_like + First argument. + b : array_like + Second argument. + out : ndarray, optional + Output argument. This must have the exact kind that would be returned + if it was not used. In particular, it must have the right type, must be + C-contiguous, and its dtype must be the dtype that would be returned + for `dot(a,b)`. This is a performance feature. Therefore, if these + conditions are not met, an exception is raised, instead of attempting + to be flexible. + + Returns + ------- + output : ndarray + Returns the dot product of `a` and `b`. If `a` and `b` are both + scalars or both 1-D arrays then a scalar is returned; otherwise + an array is returned. + If `out` is given, then it is returned. + + Raises + ------ + ValueError + If the last dimension of `a` is not the same size as + the second-to-last dimension of `b`. + + See Also + -------- + vdot : Complex-conjugating dot product. + tensordot : Sum products over arbitrary axes. + einsum : Einstein summation convention. + matmul : '@' operator as method with out parameter. + linalg.multi_dot : Chained dot product. + + Examples + -------- + >>> np.dot(3, 4) + 12 + + Neither argument is complex-conjugated: + + >>> np.dot([2j, 3j], [2j, 3j]) + (-13+0j) + + For 2-D arrays it is the matrix product: + + >>> a = [[1, 0], [0, 1]] + >>> b = [[4, 1], [2, 2]] + >>> np.dot(a, b) + array([[4, 1], + [2, 2]]) + + >>> a = np.arange(3*4*5*6).reshape((3,4,5,6)) + >>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3)) + >>> np.dot(a, b)[2,3,2,1,2,2] + 499128 + >>> sum(a[2,3,2,:] * b[1,2,:,2]) + 499128 + + """ + return (a, b, out) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.vdot) +def vdot(a, b): + """ + vdot(a, b, /) + + Return the dot product of two vectors. + + The vdot(`a`, `b`) function handles complex numbers differently than + dot(`a`, `b`). If the first argument is complex the complex conjugate + of the first argument is used for the calculation of the dot product. + + Note that `vdot` handles multidimensional arrays differently than `dot`: + it does *not* perform a matrix product, but flattens input arguments + to 1-D vectors first. Consequently, it should only be used for vectors. + + Parameters + ---------- + a : array_like + If `a` is complex the complex conjugate is taken before calculation + of the dot product. + b : array_like + Second argument to the dot product. + + Returns + ------- + output : ndarray + Dot product of `a` and `b`. Can be an int, float, or + complex depending on the types of `a` and `b`. + + See Also + -------- + dot : Return the dot product without using the complex conjugate of the + first argument. + + Examples + -------- + >>> a = np.array([1+2j,3+4j]) + >>> b = np.array([5+6j,7+8j]) + >>> np.vdot(a, b) + (70-8j) + >>> np.vdot(b, a) + (70+8j) + + Note that higher-dimensional arrays are flattened! + + >>> a = np.array([[1, 4], [5, 6]]) + >>> b = np.array([[4, 1], [2, 2]]) + >>> np.vdot(a, b) + 30 + >>> np.vdot(b, a) + 30 + >>> 1*4 + 4*1 + 5*2 + 6*2 + 30 + + """ + return (a, b) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.bincount) +def bincount(x, weights=None, minlength=None): + """ + bincount(x, /, weights=None, minlength=0) + + Count number of occurrences of each value in array of non-negative ints. + + The number of bins (of size 1) is one larger than the largest value in + `x`. If `minlength` is specified, there will be at least this number + of bins in the output array (though it will be longer if necessary, + depending on the contents of `x`). + Each bin gives the number of occurrences of its index value in `x`. + If `weights` is specified the input array is weighted by it, i.e. if a + value ``n`` is found at position ``i``, ``out[n] += weight[i]`` instead + of ``out[n] += 1``. + + Parameters + ---------- + x : array_like, 1 dimension, nonnegative ints + Input array. + weights : array_like, optional + Weights, array of the same shape as `x`. + minlength : int, optional + A minimum number of bins for the output array. + + .. versionadded:: 1.6.0 + + Returns + ------- + out : ndarray of ints + The result of binning the input array. + The length of `out` is equal to ``np.amax(x)+1``. + + Raises + ------ + ValueError + If the input is not 1-dimensional, or contains elements with negative + values, or if `minlength` is negative. + TypeError + If the type of the input is float or complex. + + See Also + -------- + histogram, digitize, unique + + Examples + -------- + >>> np.bincount(np.arange(5)) + array([1, 1, 1, 1, 1]) + >>> np.bincount(np.array([0, 1, 1, 3, 2, 1, 7])) + array([1, 3, 1, 1, 0, 0, 0, 1]) + + >>> x = np.array([0, 1, 1, 3, 2, 1, 7, 23]) + >>> np.bincount(x).size == np.amax(x)+1 + True + + The input array needs to be of integer dtype, otherwise a + TypeError is raised: + + >>> np.bincount(np.arange(5, dtype=float)) + Traceback (most recent call last): + ... + TypeError: Cannot cast array data from dtype('float64') to dtype('int64') + according to the rule 'safe' + + A possible use of ``bincount`` is to perform sums over + variable-size chunks of an array, using the ``weights`` keyword. + + >>> w = np.array([0.3, 0.5, 0.2, 0.7, 1., -0.6]) # weights + >>> x = np.array([0, 1, 1, 2, 2, 2]) + >>> np.bincount(x, weights=w) + array([ 0.3, 0.7, 1.1]) + + """ + return (x, weights) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.ravel_multi_index) +def ravel_multi_index(multi_index, dims, mode=None, order=None): + """ + ravel_multi_index(multi_index, dims, mode='raise', order='C') + + Converts a tuple of index arrays into an array of flat + indices, applying boundary modes to the multi-index. + + Parameters + ---------- + multi_index : tuple of array_like + A tuple of integer arrays, one array for each dimension. + dims : tuple of ints + The shape of array into which the indices from ``multi_index`` apply. + mode : {'raise', 'wrap', 'clip'}, optional + Specifies how out-of-bounds indices are handled. Can specify + either one mode or a tuple of modes, one mode per index. + + * 'raise' -- raise an error (default) + * 'wrap' -- wrap around + * 'clip' -- clip to the range + + In 'clip' mode, a negative index which would normally + wrap will clip to 0 instead. + order : {'C', 'F'}, optional + Determines whether the multi-index should be viewed as + indexing in row-major (C-style) or column-major + (Fortran-style) order. + + Returns + ------- + raveled_indices : ndarray + An array of indices into the flattened version of an array + of dimensions ``dims``. + + See Also + -------- + unravel_index + + Notes + ----- + .. versionadded:: 1.6.0 + + Examples + -------- + >>> arr = np.array([[3,6,6],[4,5,1]]) + >>> np.ravel_multi_index(arr, (7,6)) + array([22, 41, 37]) + >>> np.ravel_multi_index(arr, (7,6), order='F') + array([31, 41, 13]) + >>> np.ravel_multi_index(arr, (4,6), mode='clip') + array([22, 23, 19]) + >>> np.ravel_multi_index(arr, (4,4), mode=('clip','wrap')) + array([12, 13, 13]) + + >>> np.ravel_multi_index((3,1,4,1), (6,7,8,9)) + 1621 + """ + return multi_index + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.unravel_index) +def unravel_index(indices, shape=None, order=None): + """ + unravel_index(indices, shape, order='C') + + Converts a flat index or array of flat indices into a tuple + of coordinate arrays. + + Parameters + ---------- + indices : array_like + An integer array whose elements are indices into the flattened + version of an array of dimensions ``shape``. Before version 1.6.0, + this function accepted just one index value. + shape : tuple of ints + The shape of the array to use for unraveling ``indices``. + + .. versionchanged:: 1.16.0 + Renamed from ``dims`` to ``shape``. + + order : {'C', 'F'}, optional + Determines whether the indices should be viewed as indexing in + row-major (C-style) or column-major (Fortran-style) order. + + .. versionadded:: 1.6.0 + + Returns + ------- + unraveled_coords : tuple of ndarray + Each array in the tuple has the same shape as the ``indices`` + array. + + See Also + -------- + ravel_multi_index + + Examples + -------- + >>> np.unravel_index([22, 41, 37], (7,6)) + (array([3, 6, 6]), array([4, 5, 1])) + >>> np.unravel_index([31, 41, 13], (7,6), order='F') + (array([3, 6, 6]), array([4, 5, 1])) + + >>> np.unravel_index(1621, (6,7,8,9)) + (3, 1, 4, 1) + + """ + return (indices,) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.copyto) +def copyto(dst, src, casting=None, where=None): + """ + copyto(dst, src, casting='same_kind', where=True) + + Copies values from one array to another, broadcasting as necessary. + + Raises a TypeError if the `casting` rule is violated, and if + `where` is provided, it selects which elements to copy. + + .. versionadded:: 1.7.0 + + Parameters + ---------- + dst : ndarray + The array into which values are copied. + src : array_like + The array from which values are copied. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + Controls what kind of data casting may occur when copying. + + * 'no' means the data types should not be cast at all. + * 'equiv' means only byte-order changes are allowed. + * 'safe' means only casts which can preserve values are allowed. + * 'same_kind' means only safe casts or casts within a kind, + like float64 to float32, are allowed. + * 'unsafe' means any data conversions may be done. + where : array_like of bool, optional + A boolean array which is broadcasted to match the dimensions + of `dst`, and selects elements to copy from `src` to `dst` + wherever it contains the value True. + """ + return (dst, src, where) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.putmask) +def putmask(a, mask, values): + """ + putmask(a, mask, values) + + Changes elements of an array based on conditional and input values. + + Sets ``a.flat[n] = values[n]`` for each n where ``mask.flat[n]==True``. + + If `values` is not the same size as `a` and `mask` then it will repeat. + This gives behavior different from ``a[mask] = values``. + + Parameters + ---------- + a : ndarray + Target array. + mask : array_like + Boolean mask array. It has to be the same shape as `a`. + values : array_like + Values to put into `a` where `mask` is True. If `values` is smaller + than `a` it will be repeated. + + See Also + -------- + place, put, take, copyto + + Examples + -------- + >>> x = np.arange(6).reshape(2, 3) + >>> np.putmask(x, x>2, x**2) + >>> x + array([[ 0, 1, 2], + [ 9, 16, 25]]) + + If `values` is smaller than `a` it is repeated: + + >>> x = np.arange(5) + >>> np.putmask(x, x>1, [-33, -44]) + >>> x + array([ 0, 1, -33, -44, -33]) + + """ + return (a, mask, values) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.packbits) +def packbits(a, axis=None, bitorder='big'): + """ + packbits(a, /, axis=None, bitorder='big') + + Packs the elements of a binary-valued array into bits in a uint8 array. + + The result is padded to full bytes by inserting zero bits at the end. + + Parameters + ---------- + a : array_like + An array of integers or booleans whose elements should be packed to + bits. + axis : int, optional + The dimension over which bit-packing is done. + ``None`` implies packing the flattened array. + bitorder : {'big', 'little'}, optional + The order of the input bits. 'big' will mimic bin(val), + ``[0, 0, 0, 0, 0, 0, 1, 1] => 3 = 0b00000011``, 'little' will + reverse the order so ``[1, 1, 0, 0, 0, 0, 0, 0] => 3``. + Defaults to 'big'. + + .. versionadded:: 1.17.0 + + Returns + ------- + packed : ndarray + Array of type uint8 whose elements represent bits corresponding to the + logical (0 or nonzero) value of the input elements. The shape of + `packed` has the same number of dimensions as the input (unless `axis` + is None, in which case the output is 1-D). + + See Also + -------- + unpackbits: Unpacks elements of a uint8 array into a binary-valued output + array. + + Examples + -------- + >>> a = np.array([[[1,0,1], + ... [0,1,0]], + ... [[1,1,0], + ... [0,0,1]]]) + >>> b = np.packbits(a, axis=-1) + >>> b + array([[[160], + [ 64]], + [[192], + [ 32]]], dtype=uint8) + + Note that in binary 160 = 1010 0000, 64 = 0100 0000, 192 = 1100 0000, + and 32 = 0010 0000. + + """ + return (a,) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.unpackbits) +def unpackbits(a, axis=None, count=None, bitorder='big'): + """ + unpackbits(a, /, axis=None, count=None, bitorder='big') + + Unpacks elements of a uint8 array into a binary-valued output array. + + Each element of `a` represents a bit-field that should be unpacked + into a binary-valued output array. The shape of the output array is + either 1-D (if `axis` is ``None``) or the same shape as the input + array with unpacking done along the axis specified. + + Parameters + ---------- + a : ndarray, uint8 type + Input array. + axis : int, optional + The dimension over which bit-unpacking is done. + ``None`` implies unpacking the flattened array. + count : int or None, optional + The number of elements to unpack along `axis`, provided as a way + of undoing the effect of packing a size that is not a multiple + of eight. A non-negative number means to only unpack `count` + bits. A negative number means to trim off that many bits from + the end. ``None`` means to unpack the entire array (the + default). Counts larger than the available number of bits will + add zero padding to the output. Negative counts must not + exceed the available number of bits. + + .. versionadded:: 1.17.0 + + bitorder : {'big', 'little'}, optional + The order of the returned bits. 'big' will mimic bin(val), + ``3 = 0b00000011 => [0, 0, 0, 0, 0, 0, 1, 1]``, 'little' will reverse + the order to ``[1, 1, 0, 0, 0, 0, 0, 0]``. + Defaults to 'big'. + + .. versionadded:: 1.17.0 + + Returns + ------- + unpacked : ndarray, uint8 type + The elements are binary-valued (0 or 1). + + See Also + -------- + packbits : Packs the elements of a binary-valued array into bits in + a uint8 array. + + Examples + -------- + >>> a = np.array([[2], [7], [23]], dtype=np.uint8) + >>> a + array([[ 2], + [ 7], + [23]], dtype=uint8) + >>> b = np.unpackbits(a, axis=1) + >>> b + array([[0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 0, 1, 1, 1]], dtype=uint8) + >>> c = np.unpackbits(a, axis=1, count=-3) + >>> c + array([[0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 1, 0]], dtype=uint8) + + >>> p = np.packbits(b, axis=0) + >>> np.unpackbits(p, axis=0) + array([[0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 0, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0]], dtype=uint8) + >>> np.array_equal(b, np.unpackbits(p, axis=0, count=b.shape[0])) + True + + """ + return (a,) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.shares_memory) +def shares_memory(a, b, max_work=None): + """ + shares_memory(a, b, /, max_work=None) + + Determine if two arrays share memory. + + .. warning:: + + This function can be exponentially slow for some inputs, unless + `max_work` is set to a finite number or ``MAY_SHARE_BOUNDS``. + If in doubt, use `numpy.may_share_memory` instead. + + Parameters + ---------- + a, b : ndarray + Input arrays + max_work : int, optional + Effort to spend on solving the overlap problem (maximum number + of candidate solutions to consider). The following special + values are recognized: + + max_work=MAY_SHARE_EXACT (default) + The problem is solved exactly. In this case, the function returns + True only if there is an element shared between the arrays. Finding + the exact solution may take extremely long in some cases. + max_work=MAY_SHARE_BOUNDS + Only the memory bounds of a and b are checked. + + Raises + ------ + numpy.TooHardError + Exceeded max_work. + + Returns + ------- + out : bool + + See Also + -------- + may_share_memory + + Examples + -------- + >>> x = np.array([1, 2, 3, 4]) + >>> np.shares_memory(x, np.array([5, 6, 7])) + False + >>> np.shares_memory(x[::2], x) + True + >>> np.shares_memory(x[::2], x[1::2]) + False + + Checking whether two arrays share memory is NP-complete, and + runtime may increase exponentially in the number of + dimensions. Hence, `max_work` should generally be set to a finite + number, as it is possible to construct examples that take + extremely long to run: + + >>> from numpy.lib.stride_tricks import as_strided + >>> x = np.zeros([192163377], dtype=np.int8) + >>> x1 = as_strided(x, strides=(36674, 61119, 85569), shape=(1049, 1049, 1049)) + >>> x2 = as_strided(x[64023025:], strides=(12223, 12224, 1), shape=(1049, 1049, 1)) + >>> np.shares_memory(x1, x2, max_work=1000) + Traceback (most recent call last): + ... + numpy.TooHardError: Exceeded max_work + + Running ``np.shares_memory(x1, x2)`` without `max_work` set takes + around 1 minute for this case. It is possible to find problems + that take still significantly longer. + + """ + return (a, b) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.may_share_memory) +def may_share_memory(a, b, max_work=None): + """ + may_share_memory(a, b, /, max_work=None) + + Determine if two arrays might share memory + + A return of True does not necessarily mean that the two arrays + share any element. It just means that they *might*. + + Only the memory bounds of a and b are checked by default. + + Parameters + ---------- + a, b : ndarray + Input arrays + max_work : int, optional + Effort to spend on solving the overlap problem. See + `shares_memory` for details. Default for ``may_share_memory`` + is to do a bounds check. + + Returns + ------- + out : bool + + See Also + -------- + shares_memory + + Examples + -------- + >>> np.may_share_memory(np.array([1,2]), np.array([5,8,9])) + False + >>> x = np.zeros([3, 4]) + >>> np.may_share_memory(x[:,0], x[:,1]) + True + + """ + return (a, b) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.is_busday) +def is_busday(dates, weekmask=None, holidays=None, busdaycal=None, out=None): + """ + is_busday(dates, weekmask='1111100', holidays=None, busdaycal=None, out=None) + + Calculates which of the given dates are valid days, and which are not. + + .. versionadded:: 1.7.0 + + Parameters + ---------- + dates : array_like of datetime64[D] + The array of dates to process. + weekmask : str or array_like of bool, optional + A seven-element array indicating which of Monday through Sunday are + valid days. May be specified as a length-seven list or array, like + [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string + like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for + weekdays, optionally separated by white space. Valid abbreviations + are: Mon Tue Wed Thu Fri Sat Sun + holidays : array_like of datetime64[D], optional + An array of dates to consider as invalid dates. They may be + specified in any order, and NaT (not-a-time) dates are ignored. + This list is saved in a normalized form that is suited for + fast calculations of valid days. + busdaycal : busdaycalendar, optional + A `busdaycalendar` object which specifies the valid days. If this + parameter is provided, neither weekmask nor holidays may be + provided. + out : array of bool, optional + If provided, this array is filled with the result. + + Returns + ------- + out : array of bool + An array with the same shape as ``dates``, containing True for + each valid day, and False for each invalid day. + + See Also + -------- + busdaycalendar : An object that specifies a custom set of valid days. + busday_offset : Applies an offset counted in valid days. + busday_count : Counts how many valid days are in a half-open date range. + + Examples + -------- + >>> # The weekdays are Friday, Saturday, and Monday + ... np.is_busday(['2011-07-01', '2011-07-02', '2011-07-18'], + ... holidays=['2011-07-01', '2011-07-04', '2011-07-17']) + array([False, False, True]) + """ + return (dates, weekmask, holidays, out) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.busday_offset) +def busday_offset(dates, offsets, roll=None, weekmask=None, holidays=None, + busdaycal=None, out=None): + """ + busday_offset(dates, offsets, roll='raise', weekmask='1111100', holidays=None, busdaycal=None, out=None) + + First adjusts the date to fall on a valid day according to + the ``roll`` rule, then applies offsets to the given dates + counted in valid days. + + .. versionadded:: 1.7.0 + + Parameters + ---------- + dates : array_like of datetime64[D] + The array of dates to process. + offsets : array_like of int + The array of offsets, which is broadcast with ``dates``. + roll : {'raise', 'nat', 'forward', 'following', 'backward', 'preceding', 'modifiedfollowing', 'modifiedpreceding'}, optional + How to treat dates that do not fall on a valid day. The default + is 'raise'. + + * 'raise' means to raise an exception for an invalid day. + * 'nat' means to return a NaT (not-a-time) for an invalid day. + * 'forward' and 'following' mean to take the first valid day + later in time. + * 'backward' and 'preceding' mean to take the first valid day + earlier in time. + * 'modifiedfollowing' means to take the first valid day + later in time unless it is across a Month boundary, in which + case to take the first valid day earlier in time. + * 'modifiedpreceding' means to take the first valid day + earlier in time unless it is across a Month boundary, in which + case to take the first valid day later in time. + weekmask : str or array_like of bool, optional + A seven-element array indicating which of Monday through Sunday are + valid days. May be specified as a length-seven list or array, like + [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string + like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for + weekdays, optionally separated by white space. Valid abbreviations + are: Mon Tue Wed Thu Fri Sat Sun + holidays : array_like of datetime64[D], optional + An array of dates to consider as invalid dates. They may be + specified in any order, and NaT (not-a-time) dates are ignored. + This list is saved in a normalized form that is suited for + fast calculations of valid days. + busdaycal : busdaycalendar, optional + A `busdaycalendar` object which specifies the valid days. If this + parameter is provided, neither weekmask nor holidays may be + provided. + out : array of datetime64[D], optional + If provided, this array is filled with the result. + + Returns + ------- + out : array of datetime64[D] + An array with a shape from broadcasting ``dates`` and ``offsets`` + together, containing the dates with offsets applied. + + See Also + -------- + busdaycalendar : An object that specifies a custom set of valid days. + is_busday : Returns a boolean array indicating valid days. + busday_count : Counts how many valid days are in a half-open date range. + + Examples + -------- + >>> # First business day in October 2011 (not accounting for holidays) + ... np.busday_offset('2011-10', 0, roll='forward') + numpy.datetime64('2011-10-03') + >>> # Last business day in February 2012 (not accounting for holidays) + ... np.busday_offset('2012-03', -1, roll='forward') + numpy.datetime64('2012-02-29') + >>> # Third Wednesday in January 2011 + ... np.busday_offset('2011-01', 2, roll='forward', weekmask='Wed') + numpy.datetime64('2011-01-19') + >>> # 2012 Mother's Day in Canada and the U.S. + ... np.busday_offset('2012-05', 1, roll='forward', weekmask='Sun') + numpy.datetime64('2012-05-13') + + >>> # First business day on or after a date + ... np.busday_offset('2011-03-20', 0, roll='forward') + numpy.datetime64('2011-03-21') + >>> np.busday_offset('2011-03-22', 0, roll='forward') + numpy.datetime64('2011-03-22') + >>> # First business day after a date + ... np.busday_offset('2011-03-20', 1, roll='backward') + numpy.datetime64('2011-03-21') + >>> np.busday_offset('2011-03-22', 1, roll='backward') + numpy.datetime64('2011-03-23') + """ + return (dates, offsets, weekmask, holidays, out) + + +@array_function_from_c_func_and_dispatcher(_multiarray_umath.busday_count) +def busday_count(begindates, enddates, weekmask=None, holidays=None, + busdaycal=None, out=None): + """ + busday_count(begindates, enddates, weekmask='1111100', holidays=[], busdaycal=None, out=None) + + Counts the number of valid days between `begindates` and + `enddates`, not including the day of `enddates`. + + If ``enddates`` specifies a date value that is earlier than the + corresponding ``begindates`` date value, the count will be negative. + + .. versionadded:: 1.7.0 + + Parameters + ---------- + begindates : array_like of datetime64[D] + The array of the first dates for counting. + enddates : array_like of datetime64[D] + The array of the end dates for counting, which are excluded + from the count themselves. + weekmask : str or array_like of bool, optional + A seven-element array indicating which of Monday through Sunday are + valid days. May be specified as a length-seven list or array, like + [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string + like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for + weekdays, optionally separated by white space. Valid abbreviations + are: Mon Tue Wed Thu Fri Sat Sun + holidays : array_like of datetime64[D], optional + An array of dates to consider as invalid dates. They may be + specified in any order, and NaT (not-a-time) dates are ignored. + This list is saved in a normalized form that is suited for + fast calculations of valid days. + busdaycal : busdaycalendar, optional + A `busdaycalendar` object which specifies the valid days. If this + parameter is provided, neither weekmask nor holidays may be + provided. + out : array of int, optional + If provided, this array is filled with the result. + + Returns + ------- + out : array of int + An array with a shape from broadcasting ``begindates`` and ``enddates`` + together, containing the number of valid days between + the begin and end dates. + + See Also + -------- + busdaycalendar : An object that specifies a custom set of valid days. + is_busday : Returns a boolean array indicating valid days. + busday_offset : Applies an offset counted in valid days. + + Examples + -------- + >>> # Number of weekdays in January 2011 + ... np.busday_count('2011-01', '2011-02') + 21 + >>> # Number of weekdays in 2011 + >>> np.busday_count('2011', '2012') + 260 + >>> # Number of Saturdays in 2011 + ... np.busday_count('2011', '2012', weekmask='Sat') + 53 + """ + return (begindates, enddates, weekmask, holidays, out) + + +@array_function_from_c_func_and_dispatcher( + _multiarray_umath.datetime_as_string) +def datetime_as_string(arr, unit=None, timezone=None, casting=None): + """ + datetime_as_string(arr, unit=None, timezone='naive', casting='same_kind') + + Convert an array of datetimes into an array of strings. + + Parameters + ---------- + arr : array_like of datetime64 + The array of UTC timestamps to format. + unit : str + One of None, 'auto', or a :ref:`datetime unit `. + timezone : {'naive', 'UTC', 'local'} or tzinfo + Timezone information to use when displaying the datetime. If 'UTC', end + with a Z to indicate UTC time. If 'local', convert to the local timezone + first, and suffix with a +-#### timezone offset. If a tzinfo object, + then do as with 'local', but use the specified timezone. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'} + Casting to allow when changing between datetime units. + + Returns + ------- + str_arr : ndarray + An array of strings the same shape as `arr`. + + Examples + -------- + >>> import pytz + >>> d = np.arange('2002-10-27T04:30', 4*60, 60, dtype='M8[m]') + >>> d + array(['2002-10-27T04:30', '2002-10-27T05:30', '2002-10-27T06:30', + '2002-10-27T07:30'], dtype='datetime64[m]') + + Setting the timezone to UTC shows the same information, but with a Z suffix + + >>> np.datetime_as_string(d, timezone='UTC') + array(['2002-10-27T04:30Z', '2002-10-27T05:30Z', '2002-10-27T06:30Z', + '2002-10-27T07:30Z'], dtype='>> np.datetime_as_string(d, timezone=pytz.timezone('US/Eastern')) + array(['2002-10-27T00:30-0400', '2002-10-27T01:30-0400', + '2002-10-27T01:30-0500', '2002-10-27T02:30-0500'], dtype='>> np.datetime_as_string(d, unit='h') + array(['2002-10-27T04', '2002-10-27T05', '2002-10-27T06', '2002-10-27T07'], + dtype='>> np.datetime_as_string(d, unit='s') + array(['2002-10-27T04:30:00', '2002-10-27T05:30:00', '2002-10-27T06:30:00', + '2002-10-27T07:30:00'], dtype='>> np.datetime_as_string(d, unit='h', casting='safe') + Traceback (most recent call last): + ... + TypeError: Cannot create a datetime string as units 'h' from a NumPy + datetime with units 'm' according to the rule 'safe' + """ + return (arr,) diff --git a/.local/lib/python3.8/site-packages/numpy/core/multiarray.pyi b/.local/lib/python3.8/site-packages/numpy/core/multiarray.pyi new file mode 100644 index 0000000..5a89995 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/multiarray.pyi @@ -0,0 +1,1035 @@ +# TODO: Sort out any and all missing functions in this namespace + +import os +import datetime as dt +from typing import ( + Literal as L, + Any, + Callable, + Iterable, + Optional, + overload, + TypeVar, + List, + Type, + Union, + Sequence, + Tuple, + SupportsIndex, + final, + Final, + Protocol, +) + +from numpy import ( + # Re-exports + busdaycalendar as busdaycalendar, + broadcast as broadcast, + dtype as dtype, + ndarray as ndarray, + nditer as nditer, + + # The rest + ufunc, + str_, + bool_, + uint8, + intp, + int_, + float64, + timedelta64, + datetime64, + generic, + unsignedinteger, + signedinteger, + floating, + complexfloating, + _OrderKACF, + _OrderCF, + _CastingKind, + _ModeKind, + _SupportsBuffer, + _IOProtocol, + _CopyMode, + _NDIterFlagsKind, + _NDIterOpFlagsKind, +) + +from numpy.typing import ( + # Shapes + _ShapeLike, + + # DTypes + DTypeLike, + _SupportsDType, + + # Arrays + NDArray, + ArrayLike, + _SupportsArray, + _NestedSequence, + _FiniteNestedSequence, + _ArrayLikeBool_co, + _ArrayLikeUInt_co, + _ArrayLikeInt_co, + _ArrayLikeFloat_co, + _ArrayLikeComplex_co, + _ArrayLikeTD64_co, + _ArrayLikeDT64_co, + _ArrayLikeObject_co, + _ArrayLikeStr_co, + _ArrayLikeBytes_co, + _ScalarLike_co, + _IntLike_co, + _FloatLike_co, + _TD64Like_co, +) + +_T_co = TypeVar("_T_co", covariant=True) +_T_contra = TypeVar("_T_contra", contravariant=True) +_SCT = TypeVar("_SCT", bound=generic) +_ArrayType = TypeVar("_ArrayType", bound=NDArray[Any]) + +# Subscriptable subsets of `npt.DTypeLike` and `npt.ArrayLike` +_DTypeLike = Union[ + dtype[_SCT], + Type[_SCT], + _SupportsDType[dtype[_SCT]], +] +_ArrayLike = _FiniteNestedSequence[_SupportsArray[dtype[_SCT]]] + +# Valid time units +_UnitKind = L[ + "Y", + "M", + "D", + "h", + "m", + "s", + "ms", + "us", "μs", + "ns", + "ps", + "fs", + "as", +] +_RollKind = L[ # `raise` is deliberately excluded + "nat", + "forward", + "following", + "backward", + "preceding", + "modifiedfollowing", + "modifiedpreceding", +] + +class _SupportsLenAndGetItem(Protocol[_T_contra, _T_co]): + def __len__(self) -> int: ... + def __getitem__(self, __key: _T_contra) -> _T_co: ... + +__all__: List[str] + +ALLOW_THREADS: Final[int] # 0 or 1 (system-specific) +BUFSIZE: L[8192] +CLIP: L[0] +WRAP: L[1] +RAISE: L[2] +MAXDIMS: L[32] +MAY_SHARE_BOUNDS: L[0] +MAY_SHARE_EXACT: L[-1] +tracemalloc_domain: L[389047] + +@overload +def empty_like( + prototype: _ArrayType, + dtype: None = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: Optional[_ShapeLike] = ..., +) -> _ArrayType: ... +@overload +def empty_like( + prototype: _ArrayLike[_SCT], + dtype: None = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: Optional[_ShapeLike] = ..., +) -> NDArray[_SCT]: ... +@overload +def empty_like( + prototype: object, + dtype: None = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: Optional[_ShapeLike] = ..., +) -> NDArray[Any]: ... +@overload +def empty_like( + prototype: Any, + dtype: _DTypeLike[_SCT], + order: _OrderKACF = ..., + subok: bool = ..., + shape: Optional[_ShapeLike] = ..., +) -> NDArray[_SCT]: ... +@overload +def empty_like( + prototype: Any, + dtype: DTypeLike, + order: _OrderKACF = ..., + subok: bool = ..., + shape: Optional[_ShapeLike] = ..., +) -> NDArray[Any]: ... + +@overload +def array( + object: _ArrayType, + dtype: None = ..., + *, + copy: bool | _CopyMode = ..., + order: _OrderKACF = ..., + subok: L[True], + ndmin: int = ..., + like: ArrayLike = ..., +) -> _ArrayType: ... +@overload +def array( + object: _ArrayLike[_SCT], + dtype: None = ..., + *, + copy: bool | _CopyMode = ..., + order: _OrderKACF = ..., + subok: bool = ..., + ndmin: int = ..., + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def array( + object: object, + dtype: None = ..., + *, + copy: bool | _CopyMode = ..., + order: _OrderKACF = ..., + subok: bool = ..., + ndmin: int = ..., + like: ArrayLike = ..., +) -> NDArray[Any]: ... +@overload +def array( + object: Any, + dtype: _DTypeLike[_SCT], + *, + copy: bool | _CopyMode = ..., + order: _OrderKACF = ..., + subok: bool = ..., + ndmin: int = ..., + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def array( + object: Any, + dtype: DTypeLike, + *, + copy: bool | _CopyMode = ..., + order: _OrderKACF = ..., + subok: bool = ..., + ndmin: int = ..., + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +@overload +def zeros( + shape: _ShapeLike, + dtype: None = ..., + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[float64]: ... +@overload +def zeros( + shape: _ShapeLike, + dtype: _DTypeLike[_SCT], + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def zeros( + shape: _ShapeLike, + dtype: DTypeLike, + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +@overload +def empty( + shape: _ShapeLike, + dtype: None = ..., + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[float64]: ... +@overload +def empty( + shape: _ShapeLike, + dtype: _DTypeLike[_SCT], + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def empty( + shape: _ShapeLike, + dtype: DTypeLike, + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +@overload +def unravel_index( # type: ignore[misc] + indices: _IntLike_co, + shape: _ShapeLike, + order: _OrderCF = ..., +) -> Tuple[intp, ...]: ... +@overload +def unravel_index( + indices: _ArrayLikeInt_co, + shape: _ShapeLike, + order: _OrderCF = ..., +) -> Tuple[NDArray[intp], ...]: ... + +@overload +def ravel_multi_index( # type: ignore[misc] + multi_index: Sequence[_IntLike_co], + dims: Sequence[SupportsIndex], + mode: Union[_ModeKind, Tuple[_ModeKind, ...]] = ..., + order: _OrderCF = ..., +) -> intp: ... +@overload +def ravel_multi_index( + multi_index: Sequence[_ArrayLikeInt_co], + dims: Sequence[SupportsIndex], + mode: Union[_ModeKind, Tuple[_ModeKind, ...]] = ..., + order: _OrderCF = ..., +) -> NDArray[intp]: ... + +# NOTE: Allow any sequence of array-like objects +@overload +def concatenate( # type: ignore[misc] + arrays: _ArrayLike[_SCT], + /, + axis: Optional[SupportsIndex] = ..., + out: None = ..., + *, + dtype: None = ..., + casting: Optional[_CastingKind] = ... +) -> NDArray[_SCT]: ... +@overload +def concatenate( # type: ignore[misc] + arrays: _SupportsLenAndGetItem[int, ArrayLike], + /, + axis: Optional[SupportsIndex] = ..., + out: None = ..., + *, + dtype: None = ..., + casting: Optional[_CastingKind] = ... +) -> NDArray[Any]: ... +@overload +def concatenate( # type: ignore[misc] + arrays: _SupportsLenAndGetItem[int, ArrayLike], + /, + axis: Optional[SupportsIndex] = ..., + out: None = ..., + *, + dtype: _DTypeLike[_SCT], + casting: Optional[_CastingKind] = ... +) -> NDArray[_SCT]: ... +@overload +def concatenate( # type: ignore[misc] + arrays: _SupportsLenAndGetItem[int, ArrayLike], + /, + axis: Optional[SupportsIndex] = ..., + out: None = ..., + *, + dtype: DTypeLike, + casting: Optional[_CastingKind] = ... +) -> NDArray[Any]: ... +@overload +def concatenate( + arrays: _SupportsLenAndGetItem[int, ArrayLike], + /, + axis: Optional[SupportsIndex] = ..., + out: _ArrayType = ..., + *, + dtype: DTypeLike = ..., + casting: Optional[_CastingKind] = ... +) -> _ArrayType: ... + +def inner( + a: ArrayLike, + b: ArrayLike, + /, +) -> Any: ... + +@overload +def where( + condition: ArrayLike, + /, +) -> Tuple[NDArray[intp], ...]: ... +@overload +def where( + condition: ArrayLike, + x: ArrayLike, + y: ArrayLike, + /, +) -> NDArray[Any]: ... + +def lexsort( + keys: ArrayLike, + axis: Optional[SupportsIndex] = ..., +) -> Any: ... + +def can_cast( + from_: Union[ArrayLike, DTypeLike], + to: DTypeLike, + casting: Optional[_CastingKind] = ..., +) -> bool: ... + +def min_scalar_type( + a: ArrayLike, /, +) -> dtype[Any]: ... + +def result_type( + *arrays_and_dtypes: Union[ArrayLike, DTypeLike], +) -> dtype[Any]: ... + +@overload +def dot(a: ArrayLike, b: ArrayLike, out: None = ...) -> Any: ... +@overload +def dot(a: ArrayLike, b: ArrayLike, out: _ArrayType) -> _ArrayType: ... + +@overload +def vdot(a: _ArrayLikeBool_co, b: _ArrayLikeBool_co, /) -> bool_: ... # type: ignore[misc] +@overload +def vdot(a: _ArrayLikeUInt_co, b: _ArrayLikeUInt_co, /) -> unsignedinteger[Any]: ... # type: ignore[misc] +@overload +def vdot(a: _ArrayLikeInt_co, b: _ArrayLikeInt_co, /) -> signedinteger[Any]: ... # type: ignore[misc] +@overload +def vdot(a: _ArrayLikeFloat_co, b: _ArrayLikeFloat_co, /) -> floating[Any]: ... # type: ignore[misc] +@overload +def vdot(a: _ArrayLikeComplex_co, b: _ArrayLikeComplex_co, /) -> complexfloating[Any, Any]: ... # type: ignore[misc] +@overload +def vdot(a: _ArrayLikeTD64_co, b: _ArrayLikeTD64_co, /) -> timedelta64: ... +@overload +def vdot(a: _ArrayLikeObject_co, b: Any, /) -> Any: ... +@overload +def vdot(a: Any, b: _ArrayLikeObject_co, /) -> Any: ... + +def bincount( + x: ArrayLike, + /, + weights: Optional[ArrayLike] = ..., + minlength: SupportsIndex = ..., +) -> NDArray[intp]: ... + +def copyto( + dst: NDArray[Any], + src: ArrayLike, + casting: Optional[_CastingKind] = ..., + where: Optional[_ArrayLikeBool_co] = ..., +) -> None: ... + +def putmask( + a: NDArray[Any], + mask: _ArrayLikeBool_co, + values: ArrayLike, +) -> None: ... + +def packbits( + a: _ArrayLikeInt_co, + /, + axis: Optional[SupportsIndex] = ..., + bitorder: L["big", "little"] = ..., +) -> NDArray[uint8]: ... + +def unpackbits( + a: _ArrayLike[uint8], + /, + axis: Optional[SupportsIndex] = ..., + count: Optional[SupportsIndex] = ..., + bitorder: L["big", "little"] = ..., +) -> NDArray[uint8]: ... + +def shares_memory( + a: object, + b: object, + /, + max_work: Optional[int] = ..., +) -> bool: ... + +def may_share_memory( + a: object, + b: object, + /, + max_work: Optional[int] = ..., +) -> bool: ... + +@overload +def asarray( + a: _ArrayLike[_SCT], + dtype: None = ..., + order: _OrderKACF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def asarray( + a: object, + dtype: None = ..., + order: _OrderKACF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... +@overload +def asarray( + a: Any, + dtype: _DTypeLike[_SCT], + order: _OrderKACF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def asarray( + a: Any, + dtype: DTypeLike, + order: _OrderKACF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +@overload +def asanyarray( + a: _ArrayType, # Preserve subclass-information + dtype: None = ..., + order: _OrderKACF = ..., + *, + like: ArrayLike = ..., +) -> _ArrayType: ... +@overload +def asanyarray( + a: _ArrayLike[_SCT], + dtype: None = ..., + order: _OrderKACF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def asanyarray( + a: object, + dtype: None = ..., + order: _OrderKACF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... +@overload +def asanyarray( + a: Any, + dtype: _DTypeLike[_SCT], + order: _OrderKACF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def asanyarray( + a: Any, + dtype: DTypeLike, + order: _OrderKACF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +@overload +def ascontiguousarray( + a: _ArrayLike[_SCT], + dtype: None = ..., + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def ascontiguousarray( + a: object, + dtype: None = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... +@overload +def ascontiguousarray( + a: Any, + dtype: _DTypeLike[_SCT], + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def ascontiguousarray( + a: Any, + dtype: DTypeLike, + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +@overload +def asfortranarray( + a: _ArrayLike[_SCT], + dtype: None = ..., + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def asfortranarray( + a: object, + dtype: None = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... +@overload +def asfortranarray( + a: Any, + dtype: _DTypeLike[_SCT], + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def asfortranarray( + a: Any, + dtype: DTypeLike, + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +# In practice `List[Any]` is list with an int, int and a valid +# `np.seterrcall()` object +def geterrobj() -> List[Any]: ... +def seterrobj(errobj: List[Any], /) -> None: ... + +def promote_types(__type1: DTypeLike, __type2: DTypeLike) -> dtype[Any]: ... + +# `sep` is a de facto mandatory argument, as its default value is deprecated +@overload +def fromstring( + string: str | bytes, + dtype: None = ..., + count: SupportsIndex = ..., + *, + sep: str, + like: ArrayLike = ..., +) -> NDArray[float64]: ... +@overload +def fromstring( + string: str | bytes, + dtype: _DTypeLike[_SCT], + count: SupportsIndex = ..., + *, + sep: str, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def fromstring( + string: str | bytes, + dtype: DTypeLike, + count: SupportsIndex = ..., + *, + sep: str, + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +def frompyfunc( + func: Callable[..., Any], /, + nin: SupportsIndex, + nout: SupportsIndex, + *, + identity: Any = ..., +) -> ufunc: ... + +@overload +def fromfile( + file: str | bytes | os.PathLike[Any] | _IOProtocol, + dtype: None = ..., + count: SupportsIndex = ..., + sep: str = ..., + offset: SupportsIndex = ..., + *, + like: ArrayLike = ..., +) -> NDArray[float64]: ... +@overload +def fromfile( + file: str | bytes | os.PathLike[Any] | _IOProtocol, + dtype: _DTypeLike[_SCT], + count: SupportsIndex = ..., + sep: str = ..., + offset: SupportsIndex = ..., + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def fromfile( + file: str | bytes | os.PathLike[Any] | _IOProtocol, + dtype: DTypeLike, + count: SupportsIndex = ..., + sep: str = ..., + offset: SupportsIndex = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +@overload +def fromiter( + iter: Iterable[Any], + dtype: _DTypeLike[_SCT], + count: SupportsIndex = ..., + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def fromiter( + iter: Iterable[Any], + dtype: DTypeLike, + count: SupportsIndex = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +@overload +def frombuffer( + buffer: _SupportsBuffer, + dtype: None = ..., + count: SupportsIndex = ..., + offset: SupportsIndex = ..., + *, + like: ArrayLike = ..., +) -> NDArray[float64]: ... +@overload +def frombuffer( + buffer: _SupportsBuffer, + dtype: _DTypeLike[_SCT], + count: SupportsIndex = ..., + offset: SupportsIndex = ..., + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def frombuffer( + buffer: _SupportsBuffer, + dtype: DTypeLike, + count: SupportsIndex = ..., + offset: SupportsIndex = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +@overload +def arange( # type: ignore[misc] + stop: _IntLike_co, + /, *, + dtype: None = ..., + like: ArrayLike = ..., +) -> NDArray[signedinteger[Any]]: ... +@overload +def arange( # type: ignore[misc] + start: _IntLike_co, + stop: _IntLike_co, + step: _IntLike_co = ..., + dtype: None = ..., + *, + like: ArrayLike = ..., +) -> NDArray[signedinteger[Any]]: ... +@overload +def arange( # type: ignore[misc] + stop: _FloatLike_co, + /, *, + dtype: None = ..., + like: ArrayLike = ..., +) -> NDArray[floating[Any]]: ... +@overload +def arange( # type: ignore[misc] + start: _FloatLike_co, + stop: _FloatLike_co, + step: _FloatLike_co = ..., + dtype: None = ..., + *, + like: ArrayLike = ..., +) -> NDArray[floating[Any]]: ... +@overload +def arange( + stop: _TD64Like_co, + /, *, + dtype: None = ..., + like: ArrayLike = ..., +) -> NDArray[timedelta64]: ... +@overload +def arange( + start: _TD64Like_co, + stop: _TD64Like_co, + step: _TD64Like_co = ..., + dtype: None = ..., + *, + like: ArrayLike = ..., +) -> NDArray[timedelta64]: ... +@overload +def arange( # both start and stop must always be specified for datetime64 + start: datetime64, + stop: datetime64, + step: datetime64 = ..., + dtype: None = ..., + *, + like: ArrayLike = ..., +) -> NDArray[datetime64]: ... +@overload +def arange( + stop: Any, + /, *, + dtype: _DTypeLike[_SCT], + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def arange( + start: Any, + stop: Any, + step: Any = ..., + dtype: _DTypeLike[_SCT] = ..., + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def arange( + stop: Any, /, + *, + dtype: DTypeLike, + like: ArrayLike = ..., +) -> NDArray[Any]: ... +@overload +def arange( + start: Any, + stop: Any, + step: Any = ..., + dtype: DTypeLike = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +def datetime_data( + dtype: str | _DTypeLike[datetime64] | _DTypeLike[timedelta64], /, +) -> Tuple[str, int]: ... + +# The datetime functions perform unsafe casts to `datetime64[D]`, +# so a lot of different argument types are allowed here + +@overload +def busday_count( # type: ignore[misc] + begindates: _ScalarLike_co | dt.date, + enddates: _ScalarLike_co | dt.date, + weekmask: ArrayLike = ..., + holidays: None | ArrayLike | dt.date | _NestedSequence[dt.date] = ..., + busdaycal: None | busdaycalendar = ..., + out: None = ..., +) -> int_: ... +@overload +def busday_count( # type: ignore[misc] + begindates: ArrayLike | dt.date | _NestedSequence[dt.date], + enddates: ArrayLike | dt.date | _NestedSequence[dt.date], + weekmask: ArrayLike = ..., + holidays: None | ArrayLike | dt.date | _NestedSequence[dt.date] = ..., + busdaycal: None | busdaycalendar = ..., + out: None = ..., +) -> NDArray[int_]: ... +@overload +def busday_count( + begindates: ArrayLike | dt.date | _NestedSequence[dt.date], + enddates: ArrayLike | dt.date | _NestedSequence[dt.date], + weekmask: ArrayLike = ..., + holidays: None | ArrayLike | dt.date | _NestedSequence[dt.date] = ..., + busdaycal: None | busdaycalendar = ..., + out: _ArrayType = ..., +) -> _ArrayType: ... + +# `roll="raise"` is (more or less?) equivalent to `casting="safe"` +@overload +def busday_offset( # type: ignore[misc] + dates: datetime64 | dt.date, + offsets: _TD64Like_co | dt.timedelta, + roll: L["raise"] = ..., + weekmask: ArrayLike = ..., + holidays: None | ArrayLike | dt.date | _NestedSequence[dt.date] = ..., + busdaycal: None | busdaycalendar = ..., + out: None = ..., +) -> datetime64: ... +@overload +def busday_offset( # type: ignore[misc] + dates: _ArrayLike[datetime64] | dt.date | _NestedSequence[dt.date], + offsets: _ArrayLikeTD64_co | dt.timedelta | _NestedSequence[dt.timedelta], + roll: L["raise"] = ..., + weekmask: ArrayLike = ..., + holidays: None | ArrayLike | dt.date | _NestedSequence[dt.date] = ..., + busdaycal: None | busdaycalendar = ..., + out: None = ..., +) -> NDArray[datetime64]: ... +@overload +def busday_offset( # type: ignore[misc] + dates: _ArrayLike[datetime64] | dt.date | _NestedSequence[dt.date], + offsets: _ArrayLikeTD64_co | dt.timedelta | _NestedSequence[dt.timedelta], + roll: L["raise"] = ..., + weekmask: ArrayLike = ..., + holidays: None | ArrayLike | dt.date | _NestedSequence[dt.date] = ..., + busdaycal: None | busdaycalendar = ..., + out: _ArrayType = ..., +) -> _ArrayType: ... +@overload +def busday_offset( # type: ignore[misc] + dates: _ScalarLike_co | dt.date, + offsets: _ScalarLike_co | dt.timedelta, + roll: _RollKind, + weekmask: ArrayLike = ..., + holidays: None | ArrayLike | dt.date | _NestedSequence[dt.date] = ..., + busdaycal: None | busdaycalendar = ..., + out: None = ..., +) -> datetime64: ... +@overload +def busday_offset( # type: ignore[misc] + dates: ArrayLike | dt.date | _NestedSequence[dt.date], + offsets: ArrayLike | dt.timedelta | _NestedSequence[dt.timedelta], + roll: _RollKind, + weekmask: ArrayLike = ..., + holidays: None | ArrayLike | dt.date | _NestedSequence[dt.date] = ..., + busdaycal: None | busdaycalendar = ..., + out: None = ..., +) -> NDArray[datetime64]: ... +@overload +def busday_offset( + dates: ArrayLike | dt.date | _NestedSequence[dt.date], + offsets: ArrayLike | dt.timedelta | _NestedSequence[dt.timedelta], + roll: _RollKind, + weekmask: ArrayLike = ..., + holidays: None | ArrayLike | dt.date | _NestedSequence[dt.date] = ..., + busdaycal: None | busdaycalendar = ..., + out: _ArrayType = ..., +) -> _ArrayType: ... + +@overload +def is_busday( # type: ignore[misc] + dates: _ScalarLike_co | dt.date, + weekmask: ArrayLike = ..., + holidays: None | ArrayLike | dt.date | _NestedSequence[dt.date] = ..., + busdaycal: None | busdaycalendar = ..., + out: None = ..., +) -> bool_: ... +@overload +def is_busday( # type: ignore[misc] + dates: ArrayLike | _NestedSequence[dt.date], + weekmask: ArrayLike = ..., + holidays: None | ArrayLike | dt.date | _NestedSequence[dt.date] = ..., + busdaycal: None | busdaycalendar = ..., + out: None = ..., +) -> NDArray[bool_]: ... +@overload +def is_busday( + dates: ArrayLike | _NestedSequence[dt.date], + weekmask: ArrayLike = ..., + holidays: None | ArrayLike | dt.date | _NestedSequence[dt.date] = ..., + busdaycal: None | busdaycalendar = ..., + out: _ArrayType = ..., +) -> _ArrayType: ... + +@overload +def datetime_as_string( # type: ignore[misc] + arr: datetime64 | dt.date, + unit: None | L["auto"] | _UnitKind = ..., + timezone: L["naive", "UTC", "local"] | dt.tzinfo = ..., + casting: _CastingKind = ..., +) -> str_: ... +@overload +def datetime_as_string( + arr: _ArrayLikeDT64_co | _NestedSequence[dt.date], + unit: None | L["auto"] | _UnitKind = ..., + timezone: L["naive", "UTC", "local"] | dt.tzinfo = ..., + casting: _CastingKind = ..., +) -> NDArray[str_]: ... + +@overload +def compare_chararrays( + a1: _ArrayLikeStr_co, + a2: _ArrayLikeStr_co, + cmp: L["<", "<=", "==", ">=", ">", "!="], + rstrip: bool, +) -> NDArray[bool_]: ... +@overload +def compare_chararrays( + a1: _ArrayLikeBytes_co, + a2: _ArrayLikeBytes_co, + cmp: L["<", "<=", "==", ">=", ">", "!="], + rstrip: bool, +) -> NDArray[bool_]: ... + +def add_docstring(obj: Callable[..., Any], docstring: str, /) -> None: ... + +_GetItemKeys = L[ + "C", "CONTIGUOUS", "C_CONTIGUOUS", + "F", "FORTRAN", "F_CONTIGUOUS", + "W", "WRITEABLE", + "B", "BEHAVED", + "O", "OWNDATA", + "A", "ALIGNED", + "X", "WRITEBACKIFCOPY", + "CA", "CARRAY", + "FA", "FARRAY", + "FNC", + "FORC", +] +_SetItemKeys = L[ + "A", "ALIGNED", + "W", "WRITABLE", + "X", "WRITEBACKIFCOPY", +] + +@final +class flagsobj: + __hash__: None # type: ignore[assignment] + aligned: bool + # NOTE: deprecated + # updateifcopy: bool + writeable: bool + writebackifcopy: bool + @property + def behaved(self) -> bool: ... + @property + def c_contiguous(self) -> bool: ... + @property + def carray(self) -> bool: ... + @property + def contiguous(self) -> bool: ... + @property + def f_contiguous(self) -> bool: ... + @property + def farray(self) -> bool: ... + @property + def fnc(self) -> bool: ... + @property + def forc(self) -> bool: ... + @property + def fortran(self) -> bool: ... + @property + def num(self) -> int: ... + @property + def owndata(self) -> bool: ... + def __getitem__(self, key: _GetItemKeys) -> bool: ... + def __setitem__(self, key: _SetItemKeys, value: bool) -> None: ... + +def nested_iters( + op: ArrayLike | Sequence[ArrayLike], + axes: Sequence[Sequence[SupportsIndex]], + flags: None | Sequence[_NDIterFlagsKind] = ..., + op_flags: None | Sequence[Sequence[_NDIterOpFlagsKind]] = ..., + op_dtypes: DTypeLike | Sequence[DTypeLike] = ..., + order: _OrderKACF = ..., + casting: _CastingKind = ..., + buffersize: SupportsIndex = ..., +) -> Tuple[nditer, ...]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/core/numeric.py b/.local/lib/python3.8/site-packages/numpy/core/numeric.py new file mode 100644 index 0000000..344d40d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/numeric.py @@ -0,0 +1,2540 @@ +import functools +import itertools +import operator +import sys +import warnings +import numbers + +import numpy as np +from . import multiarray +from .multiarray import ( + _fastCopyAndTranspose as fastCopyAndTranspose, ALLOW_THREADS, + BUFSIZE, CLIP, MAXDIMS, MAY_SHARE_BOUNDS, MAY_SHARE_EXACT, RAISE, + WRAP, arange, array, asarray, asanyarray, ascontiguousarray, + asfortranarray, broadcast, can_cast, compare_chararrays, + concatenate, copyto, dot, dtype, empty, + empty_like, flatiter, frombuffer, _from_dlpack, fromfile, fromiter, + fromstring, inner, lexsort, matmul, may_share_memory, + min_scalar_type, ndarray, nditer, nested_iters, promote_types, + putmask, result_type, set_numeric_ops, shares_memory, vdot, where, + zeros, normalize_axis_index) + +from . import overrides +from . import umath +from . import shape_base +from .overrides import set_array_function_like_doc, set_module +from .umath import (multiply, invert, sin, PINF, NAN) +from . import numerictypes +from .numerictypes import longlong, intc, int_, float_, complex_, bool_ +from ._exceptions import TooHardError, AxisError +from ._ufunc_config import errstate + +bitwise_not = invert +ufunc = type(sin) +newaxis = None + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + + +__all__ = [ + 'newaxis', 'ndarray', 'flatiter', 'nditer', 'nested_iters', 'ufunc', + 'arange', 'array', 'asarray', 'asanyarray', 'ascontiguousarray', + 'asfortranarray', 'zeros', 'count_nonzero', 'empty', 'broadcast', 'dtype', + 'fromstring', 'fromfile', 'frombuffer', '_from_dlpack', 'where', + 'argwhere', 'copyto', 'concatenate', 'fastCopyAndTranspose', 'lexsort', + 'set_numeric_ops', 'can_cast', 'promote_types', 'min_scalar_type', + 'result_type', 'isfortran', 'empty_like', 'zeros_like', 'ones_like', + 'correlate', 'convolve', 'inner', 'dot', 'outer', 'vdot', 'roll', + 'rollaxis', 'moveaxis', 'cross', 'tensordot', 'little_endian', + 'fromiter', 'array_equal', 'array_equiv', 'indices', 'fromfunction', + 'isclose', 'isscalar', 'binary_repr', 'base_repr', 'ones', + 'identity', 'allclose', 'compare_chararrays', 'putmask', + 'flatnonzero', 'Inf', 'inf', 'infty', 'Infinity', 'nan', 'NaN', + 'False_', 'True_', 'bitwise_not', 'CLIP', 'RAISE', 'WRAP', 'MAXDIMS', + 'BUFSIZE', 'ALLOW_THREADS', 'ComplexWarning', 'full', 'full_like', + 'matmul', 'shares_memory', 'may_share_memory', 'MAY_SHARE_BOUNDS', + 'MAY_SHARE_EXACT', 'TooHardError', 'AxisError'] + + +@set_module('numpy') +class ComplexWarning(RuntimeWarning): + """ + The warning raised when casting a complex dtype to a real dtype. + + As implemented, casting a complex number to a real discards its imaginary + part, but this behavior may not be what the user actually wants. + + """ + pass + + +def _zeros_like_dispatcher(a, dtype=None, order=None, subok=None, shape=None): + return (a,) + + +@array_function_dispatch(_zeros_like_dispatcher) +def zeros_like(a, dtype=None, order='K', subok=True, shape=None): + """ + Return an array of zeros with the same shape and type as a given array. + + Parameters + ---------- + a : array_like + The shape and data-type of `a` define these same attributes of + the returned array. + dtype : data-type, optional + Overrides the data type of the result. + + .. versionadded:: 1.6.0 + order : {'C', 'F', 'A', or 'K'}, optional + Overrides the memory layout of the result. 'C' means C-order, + 'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous, + 'C' otherwise. 'K' means match the layout of `a` as closely + as possible. + + .. versionadded:: 1.6.0 + subok : bool, optional. + If True, then the newly created array will use the sub-class + type of `a`, otherwise it will be a base-class array. Defaults + to True. + shape : int or sequence of ints, optional. + Overrides the shape of the result. If order='K' and the number of + dimensions is unchanged, will try to keep order, otherwise, + order='C' is implied. + + .. versionadded:: 1.17.0 + + Returns + ------- + out : ndarray + Array of zeros with the same shape and type as `a`. + + See Also + -------- + empty_like : Return an empty array with shape and type of input. + ones_like : Return an array of ones with shape and type of input. + full_like : Return a new array with shape of input filled with value. + zeros : Return a new array setting values to zero. + + Examples + -------- + >>> x = np.arange(6) + >>> x = x.reshape((2, 3)) + >>> x + array([[0, 1, 2], + [3, 4, 5]]) + >>> np.zeros_like(x) + array([[0, 0, 0], + [0, 0, 0]]) + + >>> y = np.arange(3, dtype=float) + >>> y + array([0., 1., 2.]) + >>> np.zeros_like(y) + array([0., 0., 0.]) + + """ + res = empty_like(a, dtype=dtype, order=order, subok=subok, shape=shape) + # needed instead of a 0 to get same result as zeros for for string dtypes + z = zeros(1, dtype=res.dtype) + multiarray.copyto(res, z, casting='unsafe') + return res + + +def _ones_dispatcher(shape, dtype=None, order=None, *, like=None): + return(like,) + + +@set_array_function_like_doc +@set_module('numpy') +def ones(shape, dtype=None, order='C', *, like=None): + """ + Return a new array of given shape and type, filled with ones. + + Parameters + ---------- + shape : int or sequence of ints + Shape of the new array, e.g., ``(2, 3)`` or ``2``. + dtype : data-type, optional + The desired data-type for the array, e.g., `numpy.int8`. Default is + `numpy.float64`. + order : {'C', 'F'}, optional, default: C + Whether to store multi-dimensional data in row-major + (C-style) or column-major (Fortran-style) order in + memory. + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + out : ndarray + Array of ones with the given shape, dtype, and order. + + See Also + -------- + ones_like : Return an array of ones with shape and type of input. + empty : Return a new uninitialized array. + zeros : Return a new array setting values to zero. + full : Return a new array of given shape filled with value. + + + Examples + -------- + >>> np.ones(5) + array([1., 1., 1., 1., 1.]) + + >>> np.ones((5,), dtype=int) + array([1, 1, 1, 1, 1]) + + >>> np.ones((2, 1)) + array([[1.], + [1.]]) + + >>> s = (2,2) + >>> np.ones(s) + array([[1., 1.], + [1., 1.]]) + + """ + if like is not None: + return _ones_with_like(shape, dtype=dtype, order=order, like=like) + + a = empty(shape, dtype, order) + multiarray.copyto(a, 1, casting='unsafe') + return a + + +_ones_with_like = array_function_dispatch( + _ones_dispatcher +)(ones) + + +def _ones_like_dispatcher(a, dtype=None, order=None, subok=None, shape=None): + return (a,) + + +@array_function_dispatch(_ones_like_dispatcher) +def ones_like(a, dtype=None, order='K', subok=True, shape=None): + """ + Return an array of ones with the same shape and type as a given array. + + Parameters + ---------- + a : array_like + The shape and data-type of `a` define these same attributes of + the returned array. + dtype : data-type, optional + Overrides the data type of the result. + + .. versionadded:: 1.6.0 + order : {'C', 'F', 'A', or 'K'}, optional + Overrides the memory layout of the result. 'C' means C-order, + 'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous, + 'C' otherwise. 'K' means match the layout of `a` as closely + as possible. + + .. versionadded:: 1.6.0 + subok : bool, optional. + If True, then the newly created array will use the sub-class + type of `a`, otherwise it will be a base-class array. Defaults + to True. + shape : int or sequence of ints, optional. + Overrides the shape of the result. If order='K' and the number of + dimensions is unchanged, will try to keep order, otherwise, + order='C' is implied. + + .. versionadded:: 1.17.0 + + Returns + ------- + out : ndarray + Array of ones with the same shape and type as `a`. + + See Also + -------- + empty_like : Return an empty array with shape and type of input. + zeros_like : Return an array of zeros with shape and type of input. + full_like : Return a new array with shape of input filled with value. + ones : Return a new array setting values to one. + + Examples + -------- + >>> x = np.arange(6) + >>> x = x.reshape((2, 3)) + >>> x + array([[0, 1, 2], + [3, 4, 5]]) + >>> np.ones_like(x) + array([[1, 1, 1], + [1, 1, 1]]) + + >>> y = np.arange(3, dtype=float) + >>> y + array([0., 1., 2.]) + >>> np.ones_like(y) + array([1., 1., 1.]) + + """ + res = empty_like(a, dtype=dtype, order=order, subok=subok, shape=shape) + multiarray.copyto(res, 1, casting='unsafe') + return res + + +def _full_dispatcher(shape, fill_value, dtype=None, order=None, *, like=None): + return(like,) + + +@set_array_function_like_doc +@set_module('numpy') +def full(shape, fill_value, dtype=None, order='C', *, like=None): + """ + Return a new array of given shape and type, filled with `fill_value`. + + Parameters + ---------- + shape : int or sequence of ints + Shape of the new array, e.g., ``(2, 3)`` or ``2``. + fill_value : scalar or array_like + Fill value. + dtype : data-type, optional + The desired data-type for the array The default, None, means + ``np.array(fill_value).dtype``. + order : {'C', 'F'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + out : ndarray + Array of `fill_value` with the given shape, dtype, and order. + + See Also + -------- + full_like : Return a new array with shape of input filled with value. + empty : Return a new uninitialized array. + ones : Return a new array setting values to one. + zeros : Return a new array setting values to zero. + + Examples + -------- + >>> np.full((2, 2), np.inf) + array([[inf, inf], + [inf, inf]]) + >>> np.full((2, 2), 10) + array([[10, 10], + [10, 10]]) + + >>> np.full((2, 2), [1, 2]) + array([[1, 2], + [1, 2]]) + + """ + if like is not None: + return _full_with_like(shape, fill_value, dtype=dtype, order=order, like=like) + + if dtype is None: + fill_value = asarray(fill_value) + dtype = fill_value.dtype + a = empty(shape, dtype, order) + multiarray.copyto(a, fill_value, casting='unsafe') + return a + + +_full_with_like = array_function_dispatch( + _full_dispatcher +)(full) + + +def _full_like_dispatcher(a, fill_value, dtype=None, order=None, subok=None, shape=None): + return (a,) + + +@array_function_dispatch(_full_like_dispatcher) +def full_like(a, fill_value, dtype=None, order='K', subok=True, shape=None): + """ + Return a full array with the same shape and type as a given array. + + Parameters + ---------- + a : array_like + The shape and data-type of `a` define these same attributes of + the returned array. + fill_value : scalar + Fill value. + dtype : data-type, optional + Overrides the data type of the result. + order : {'C', 'F', 'A', or 'K'}, optional + Overrides the memory layout of the result. 'C' means C-order, + 'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous, + 'C' otherwise. 'K' means match the layout of `a` as closely + as possible. + subok : bool, optional. + If True, then the newly created array will use the sub-class + type of `a`, otherwise it will be a base-class array. Defaults + to True. + shape : int or sequence of ints, optional. + Overrides the shape of the result. If order='K' and the number of + dimensions is unchanged, will try to keep order, otherwise, + order='C' is implied. + + .. versionadded:: 1.17.0 + + Returns + ------- + out : ndarray + Array of `fill_value` with the same shape and type as `a`. + + See Also + -------- + empty_like : Return an empty array with shape and type of input. + ones_like : Return an array of ones with shape and type of input. + zeros_like : Return an array of zeros with shape and type of input. + full : Return a new array of given shape filled with value. + + Examples + -------- + >>> x = np.arange(6, dtype=int) + >>> np.full_like(x, 1) + array([1, 1, 1, 1, 1, 1]) + >>> np.full_like(x, 0.1) + array([0, 0, 0, 0, 0, 0]) + >>> np.full_like(x, 0.1, dtype=np.double) + array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1]) + >>> np.full_like(x, np.nan, dtype=np.double) + array([nan, nan, nan, nan, nan, nan]) + + >>> y = np.arange(6, dtype=np.double) + >>> np.full_like(y, 0.1) + array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1]) + + """ + res = empty_like(a, dtype=dtype, order=order, subok=subok, shape=shape) + multiarray.copyto(res, fill_value, casting='unsafe') + return res + + +def _count_nonzero_dispatcher(a, axis=None, *, keepdims=None): + return (a,) + + +@array_function_dispatch(_count_nonzero_dispatcher) +def count_nonzero(a, axis=None, *, keepdims=False): + """ + Counts the number of non-zero values in the array ``a``. + + The word "non-zero" is in reference to the Python 2.x + built-in method ``__nonzero__()`` (renamed ``__bool__()`` + in Python 3.x) of Python objects that tests an object's + "truthfulness". For example, any number is considered + truthful if it is nonzero, whereas any string is considered + truthful if it is not the empty string. Thus, this function + (recursively) counts how many elements in ``a`` (and in + sub-arrays thereof) have their ``__nonzero__()`` or ``__bool__()`` + method evaluated to ``True``. + + Parameters + ---------- + a : array_like + The array for which to count non-zeros. + axis : int or tuple, optional + Axis or tuple of axes along which to count non-zeros. + Default is None, meaning that non-zeros will be counted + along a flattened version of ``a``. + + .. versionadded:: 1.12.0 + + keepdims : bool, optional + If this is set to True, the axes that are counted are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + .. versionadded:: 1.19.0 + + Returns + ------- + count : int or array of int + Number of non-zero values in the array along a given axis. + Otherwise, the total number of non-zero values in the array + is returned. + + See Also + -------- + nonzero : Return the coordinates of all the non-zero values. + + Examples + -------- + >>> np.count_nonzero(np.eye(4)) + 4 + >>> a = np.array([[0, 1, 7, 0], + ... [3, 0, 2, 19]]) + >>> np.count_nonzero(a) + 5 + >>> np.count_nonzero(a, axis=0) + array([1, 1, 2, 1]) + >>> np.count_nonzero(a, axis=1) + array([2, 3]) + >>> np.count_nonzero(a, axis=1, keepdims=True) + array([[2], + [3]]) + """ + if axis is None and not keepdims: + return multiarray.count_nonzero(a) + + a = asanyarray(a) + + # TODO: this works around .astype(bool) not working properly (gh-9847) + if np.issubdtype(a.dtype, np.character): + a_bool = a != a.dtype.type() + else: + a_bool = a.astype(np.bool_, copy=False) + + return a_bool.sum(axis=axis, dtype=np.intp, keepdims=keepdims) + + +@set_module('numpy') +def isfortran(a): + """ + Check if the array is Fortran contiguous but *not* C contiguous. + + This function is obsolete and, because of changes due to relaxed stride + checking, its return value for the same array may differ for versions + of NumPy >= 1.10.0 and previous versions. If you only want to check if an + array is Fortran contiguous use ``a.flags.f_contiguous`` instead. + + Parameters + ---------- + a : ndarray + Input array. + + Returns + ------- + isfortran : bool + Returns True if the array is Fortran contiguous but *not* C contiguous. + + + Examples + -------- + + np.array allows to specify whether the array is written in C-contiguous + order (last index varies the fastest), or FORTRAN-contiguous order in + memory (first index varies the fastest). + + >>> a = np.array([[1, 2, 3], [4, 5, 6]], order='C') + >>> a + array([[1, 2, 3], + [4, 5, 6]]) + >>> np.isfortran(a) + False + + >>> b = np.array([[1, 2, 3], [4, 5, 6]], order='F') + >>> b + array([[1, 2, 3], + [4, 5, 6]]) + >>> np.isfortran(b) + True + + + The transpose of a C-ordered array is a FORTRAN-ordered array. + + >>> a = np.array([[1, 2, 3], [4, 5, 6]], order='C') + >>> a + array([[1, 2, 3], + [4, 5, 6]]) + >>> np.isfortran(a) + False + >>> b = a.T + >>> b + array([[1, 4], + [2, 5], + [3, 6]]) + >>> np.isfortran(b) + True + + C-ordered arrays evaluate as False even if they are also FORTRAN-ordered. + + >>> np.isfortran(np.array([1, 2], order='F')) + False + + """ + return a.flags.fnc + + +def _argwhere_dispatcher(a): + return (a,) + + +@array_function_dispatch(_argwhere_dispatcher) +def argwhere(a): + """ + Find the indices of array elements that are non-zero, grouped by element. + + Parameters + ---------- + a : array_like + Input data. + + Returns + ------- + index_array : (N, a.ndim) ndarray + Indices of elements that are non-zero. Indices are grouped by element. + This array will have shape ``(N, a.ndim)`` where ``N`` is the number of + non-zero items. + + See Also + -------- + where, nonzero + + Notes + ----- + ``np.argwhere(a)`` is almost the same as ``np.transpose(np.nonzero(a))``, + but produces a result of the correct shape for a 0D array. + + The output of ``argwhere`` is not suitable for indexing arrays. + For this purpose use ``nonzero(a)`` instead. + + Examples + -------- + >>> x = np.arange(6).reshape(2,3) + >>> x + array([[0, 1, 2], + [3, 4, 5]]) + >>> np.argwhere(x>1) + array([[0, 2], + [1, 0], + [1, 1], + [1, 2]]) + + """ + # nonzero does not behave well on 0d, so promote to 1d + if np.ndim(a) == 0: + a = shape_base.atleast_1d(a) + # then remove the added dimension + return argwhere(a)[:,:0] + return transpose(nonzero(a)) + + +def _flatnonzero_dispatcher(a): + return (a,) + + +@array_function_dispatch(_flatnonzero_dispatcher) +def flatnonzero(a): + """ + Return indices that are non-zero in the flattened version of a. + + This is equivalent to np.nonzero(np.ravel(a))[0]. + + Parameters + ---------- + a : array_like + Input data. + + Returns + ------- + res : ndarray + Output array, containing the indices of the elements of `a.ravel()` + that are non-zero. + + See Also + -------- + nonzero : Return the indices of the non-zero elements of the input array. + ravel : Return a 1-D array containing the elements of the input array. + + Examples + -------- + >>> x = np.arange(-2, 3) + >>> x + array([-2, -1, 0, 1, 2]) + >>> np.flatnonzero(x) + array([0, 1, 3, 4]) + + Use the indices of the non-zero elements as an index array to extract + these elements: + + >>> x.ravel()[np.flatnonzero(x)] + array([-2, -1, 1, 2]) + + """ + return np.nonzero(np.ravel(a))[0] + + +def _correlate_dispatcher(a, v, mode=None): + return (a, v) + + +@array_function_dispatch(_correlate_dispatcher) +def correlate(a, v, mode='valid'): + """ + Cross-correlation of two 1-dimensional sequences. + + This function computes the correlation as generally defined in signal + processing texts:: + + c_{av}[k] = sum_n a[n+k] * conj(v[n]) + + with a and v sequences being zero-padded where necessary and conj being + the conjugate. + + Parameters + ---------- + a, v : array_like + Input sequences. + mode : {'valid', 'same', 'full'}, optional + Refer to the `convolve` docstring. Note that the default + is 'valid', unlike `convolve`, which uses 'full'. + old_behavior : bool + `old_behavior` was removed in NumPy 1.10. If you need the old + behavior, use `multiarray.correlate`. + + Returns + ------- + out : ndarray + Discrete cross-correlation of `a` and `v`. + + See Also + -------- + convolve : Discrete, linear convolution of two one-dimensional sequences. + multiarray.correlate : Old, no conjugate, version of correlate. + scipy.signal.correlate : uses FFT which has superior performance on large arrays. + + Notes + ----- + The definition of correlation above is not unique and sometimes correlation + may be defined differently. Another common definition is:: + + c'_{av}[k] = sum_n a[n] conj(v[n+k]) + + which is related to ``c_{av}[k]`` by ``c'_{av}[k] = c_{av}[-k]``. + + `numpy.correlate` may perform slowly in large arrays (i.e. n = 1e5) because it does + not use the FFT to compute the convolution; in that case, `scipy.signal.correlate` might + be preferable. + + + Examples + -------- + >>> np.correlate([1, 2, 3], [0, 1, 0.5]) + array([3.5]) + >>> np.correlate([1, 2, 3], [0, 1, 0.5], "same") + array([2. , 3.5, 3. ]) + >>> np.correlate([1, 2, 3], [0, 1, 0.5], "full") + array([0.5, 2. , 3.5, 3. , 0. ]) + + Using complex sequences: + + >>> np.correlate([1+1j, 2, 3-1j], [0, 1, 0.5j], 'full') + array([ 0.5-0.5j, 1.0+0.j , 1.5-1.5j, 3.0-1.j , 0.0+0.j ]) + + Note that you get the time reversed, complex conjugated result + when the two input sequences change places, i.e., + ``c_{va}[k] = c^{*}_{av}[-k]``: + + >>> np.correlate([0, 1, 0.5j], [1+1j, 2, 3-1j], 'full') + array([ 0.0+0.j , 3.0+1.j , 1.5+1.5j, 1.0+0.j , 0.5+0.5j]) + + """ + return multiarray.correlate2(a, v, mode) + + +def _convolve_dispatcher(a, v, mode=None): + return (a, v) + + +@array_function_dispatch(_convolve_dispatcher) +def convolve(a, v, mode='full'): + """ + Returns the discrete, linear convolution of two one-dimensional sequences. + + The convolution operator is often seen in signal processing, where it + models the effect of a linear time-invariant system on a signal [1]_. In + probability theory, the sum of two independent random variables is + distributed according to the convolution of their individual + distributions. + + If `v` is longer than `a`, the arrays are swapped before computation. + + Parameters + ---------- + a : (N,) array_like + First one-dimensional input array. + v : (M,) array_like + Second one-dimensional input array. + mode : {'full', 'valid', 'same'}, optional + 'full': + By default, mode is 'full'. This returns the convolution + at each point of overlap, with an output shape of (N+M-1,). At + the end-points of the convolution, the signals do not overlap + completely, and boundary effects may be seen. + + 'same': + Mode 'same' returns output of length ``max(M, N)``. Boundary + effects are still visible. + + 'valid': + Mode 'valid' returns output of length + ``max(M, N) - min(M, N) + 1``. The convolution product is only given + for points where the signals overlap completely. Values outside + the signal boundary have no effect. + + Returns + ------- + out : ndarray + Discrete, linear convolution of `a` and `v`. + + See Also + -------- + scipy.signal.fftconvolve : Convolve two arrays using the Fast Fourier + Transform. + scipy.linalg.toeplitz : Used to construct the convolution operator. + polymul : Polynomial multiplication. Same output as convolve, but also + accepts poly1d objects as input. + + Notes + ----- + The discrete convolution operation is defined as + + .. math:: (a * v)[n] = \\sum_{m = -\\infty}^{\\infty} a[m] v[n - m] + + It can be shown that a convolution :math:`x(t) * y(t)` in time/space + is equivalent to the multiplication :math:`X(f) Y(f)` in the Fourier + domain, after appropriate padding (padding is necessary to prevent + circular convolution). Since multiplication is more efficient (faster) + than convolution, the function `scipy.signal.fftconvolve` exploits the + FFT to calculate the convolution of large data-sets. + + References + ---------- + .. [1] Wikipedia, "Convolution", + https://en.wikipedia.org/wiki/Convolution + + Examples + -------- + Note how the convolution operator flips the second array + before "sliding" the two across one another: + + >>> np.convolve([1, 2, 3], [0, 1, 0.5]) + array([0. , 1. , 2.5, 4. , 1.5]) + + Only return the middle values of the convolution. + Contains boundary effects, where zeros are taken + into account: + + >>> np.convolve([1,2,3],[0,1,0.5], 'same') + array([1. , 2.5, 4. ]) + + The two arrays are of the same length, so there + is only one position where they completely overlap: + + >>> np.convolve([1,2,3],[0,1,0.5], 'valid') + array([2.5]) + + """ + a, v = array(a, copy=False, ndmin=1), array(v, copy=False, ndmin=1) + if (len(v) > len(a)): + a, v = v, a + if len(a) == 0: + raise ValueError('a cannot be empty') + if len(v) == 0: + raise ValueError('v cannot be empty') + return multiarray.correlate(a, v[::-1], mode) + + +def _outer_dispatcher(a, b, out=None): + return (a, b, out) + + +@array_function_dispatch(_outer_dispatcher) +def outer(a, b, out=None): + """ + Compute the outer product of two vectors. + + Given two vectors, ``a = [a0, a1, ..., aM]`` and + ``b = [b0, b1, ..., bN]``, + the outer product [1]_ is:: + + [[a0*b0 a0*b1 ... a0*bN ] + [a1*b0 . + [ ... . + [aM*b0 aM*bN ]] + + Parameters + ---------- + a : (M,) array_like + First input vector. Input is flattened if + not already 1-dimensional. + b : (N,) array_like + Second input vector. Input is flattened if + not already 1-dimensional. + out : (M, N) ndarray, optional + A location where the result is stored + + .. versionadded:: 1.9.0 + + Returns + ------- + out : (M, N) ndarray + ``out[i, j] = a[i] * b[j]`` + + See also + -------- + inner + einsum : ``einsum('i,j->ij', a.ravel(), b.ravel())`` is the equivalent. + ufunc.outer : A generalization to dimensions other than 1D and other + operations. ``np.multiply.outer(a.ravel(), b.ravel())`` + is the equivalent. + tensordot : ``np.tensordot(a.ravel(), b.ravel(), axes=((), ()))`` + is the equivalent. + + References + ---------- + .. [1] : G. H. Golub and C. F. Van Loan, *Matrix Computations*, 3rd + ed., Baltimore, MD, Johns Hopkins University Press, 1996, + pg. 8. + + Examples + -------- + Make a (*very* coarse) grid for computing a Mandelbrot set: + + >>> rl = np.outer(np.ones((5,)), np.linspace(-2, 2, 5)) + >>> rl + array([[-2., -1., 0., 1., 2.], + [-2., -1., 0., 1., 2.], + [-2., -1., 0., 1., 2.], + [-2., -1., 0., 1., 2.], + [-2., -1., 0., 1., 2.]]) + >>> im = np.outer(1j*np.linspace(2, -2, 5), np.ones((5,))) + >>> im + array([[0.+2.j, 0.+2.j, 0.+2.j, 0.+2.j, 0.+2.j], + [0.+1.j, 0.+1.j, 0.+1.j, 0.+1.j, 0.+1.j], + [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j], + [0.-1.j, 0.-1.j, 0.-1.j, 0.-1.j, 0.-1.j], + [0.-2.j, 0.-2.j, 0.-2.j, 0.-2.j, 0.-2.j]]) + >>> grid = rl + im + >>> grid + array([[-2.+2.j, -1.+2.j, 0.+2.j, 1.+2.j, 2.+2.j], + [-2.+1.j, -1.+1.j, 0.+1.j, 1.+1.j, 2.+1.j], + [-2.+0.j, -1.+0.j, 0.+0.j, 1.+0.j, 2.+0.j], + [-2.-1.j, -1.-1.j, 0.-1.j, 1.-1.j, 2.-1.j], + [-2.-2.j, -1.-2.j, 0.-2.j, 1.-2.j, 2.-2.j]]) + + An example using a "vector" of letters: + + >>> x = np.array(['a', 'b', 'c'], dtype=object) + >>> np.outer(x, [1, 2, 3]) + array([['a', 'aa', 'aaa'], + ['b', 'bb', 'bbb'], + ['c', 'cc', 'ccc']], dtype=object) + + """ + a = asarray(a) + b = asarray(b) + return multiply(a.ravel()[:, newaxis], b.ravel()[newaxis, :], out) + + +def _tensordot_dispatcher(a, b, axes=None): + return (a, b) + + +@array_function_dispatch(_tensordot_dispatcher) +def tensordot(a, b, axes=2): + """ + Compute tensor dot product along specified axes. + + Given two tensors, `a` and `b`, and an array_like object containing + two array_like objects, ``(a_axes, b_axes)``, sum the products of + `a`'s and `b`'s elements (components) over the axes specified by + ``a_axes`` and ``b_axes``. The third argument can be a single non-negative + integer_like scalar, ``N``; if it is such, then the last ``N`` dimensions + of `a` and the first ``N`` dimensions of `b` are summed over. + + Parameters + ---------- + a, b : array_like + Tensors to "dot". + + axes : int or (2,) array_like + * integer_like + If an int N, sum over the last N axes of `a` and the first N axes + of `b` in order. The sizes of the corresponding axes must match. + * (2,) array_like + Or, a list of axes to be summed over, first sequence applying to `a`, + second to `b`. Both elements array_like must be of the same length. + + Returns + ------- + output : ndarray + The tensor dot product of the input. + + See Also + -------- + dot, einsum + + Notes + ----- + Three common use cases are: + * ``axes = 0`` : tensor product :math:`a\\otimes b` + * ``axes = 1`` : tensor dot product :math:`a\\cdot b` + * ``axes = 2`` : (default) tensor double contraction :math:`a:b` + + When `axes` is integer_like, the sequence for evaluation will be: first + the -Nth axis in `a` and 0th axis in `b`, and the -1th axis in `a` and + Nth axis in `b` last. + + When there is more than one axis to sum over - and they are not the last + (first) axes of `a` (`b`) - the argument `axes` should consist of + two sequences of the same length, with the first axis to sum over given + first in both sequences, the second axis second, and so forth. + + The shape of the result consists of the non-contracted axes of the + first tensor, followed by the non-contracted axes of the second. + + Examples + -------- + A "traditional" example: + + >>> a = np.arange(60.).reshape(3,4,5) + >>> b = np.arange(24.).reshape(4,3,2) + >>> c = np.tensordot(a,b, axes=([1,0],[0,1])) + >>> c.shape + (5, 2) + >>> c + array([[4400., 4730.], + [4532., 4874.], + [4664., 5018.], + [4796., 5162.], + [4928., 5306.]]) + >>> # A slower but equivalent way of computing the same... + >>> d = np.zeros((5,2)) + >>> for i in range(5): + ... for j in range(2): + ... for k in range(3): + ... for n in range(4): + ... d[i,j] += a[k,n,i] * b[n,k,j] + >>> c == d + array([[ True, True], + [ True, True], + [ True, True], + [ True, True], + [ True, True]]) + + An extended example taking advantage of the overloading of + and \\*: + + >>> a = np.array(range(1, 9)) + >>> a.shape = (2, 2, 2) + >>> A = np.array(('a', 'b', 'c', 'd'), dtype=object) + >>> A.shape = (2, 2) + >>> a; A + array([[[1, 2], + [3, 4]], + [[5, 6], + [7, 8]]]) + array([['a', 'b'], + ['c', 'd']], dtype=object) + + >>> np.tensordot(a, A) # third argument default is 2 for double-contraction + array(['abbcccdddd', 'aaaaabbbbbbcccccccdddddddd'], dtype=object) + + >>> np.tensordot(a, A, 1) + array([[['acc', 'bdd'], + ['aaacccc', 'bbbdddd']], + [['aaaaacccccc', 'bbbbbdddddd'], + ['aaaaaaacccccccc', 'bbbbbbbdddddddd']]], dtype=object) + + >>> np.tensordot(a, A, 0) # tensor product (result too long to incl.) + array([[[[['a', 'b'], + ['c', 'd']], + ... + + >>> np.tensordot(a, A, (0, 1)) + array([[['abbbbb', 'cddddd'], + ['aabbbbbb', 'ccdddddd']], + [['aaabbbbbbb', 'cccddddddd'], + ['aaaabbbbbbbb', 'ccccdddddddd']]], dtype=object) + + >>> np.tensordot(a, A, (2, 1)) + array([[['abb', 'cdd'], + ['aaabbbb', 'cccdddd']], + [['aaaaabbbbbb', 'cccccdddddd'], + ['aaaaaaabbbbbbbb', 'cccccccdddddddd']]], dtype=object) + + >>> np.tensordot(a, A, ((0, 1), (0, 1))) + array(['abbbcccccddddddd', 'aabbbbccccccdddddddd'], dtype=object) + + >>> np.tensordot(a, A, ((2, 1), (1, 0))) + array(['acccbbdddd', 'aaaaacccccccbbbbbbdddddddd'], dtype=object) + + """ + try: + iter(axes) + except Exception: + axes_a = list(range(-axes, 0)) + axes_b = list(range(0, axes)) + else: + axes_a, axes_b = axes + try: + na = len(axes_a) + axes_a = list(axes_a) + except TypeError: + axes_a = [axes_a] + na = 1 + try: + nb = len(axes_b) + axes_b = list(axes_b) + except TypeError: + axes_b = [axes_b] + nb = 1 + + a, b = asarray(a), asarray(b) + as_ = a.shape + nda = a.ndim + bs = b.shape + ndb = b.ndim + equal = True + if na != nb: + equal = False + else: + for k in range(na): + if as_[axes_a[k]] != bs[axes_b[k]]: + equal = False + break + if axes_a[k] < 0: + axes_a[k] += nda + if axes_b[k] < 0: + axes_b[k] += ndb + if not equal: + raise ValueError("shape-mismatch for sum") + + # Move the axes to sum over to the end of "a" + # and to the front of "b" + notin = [k for k in range(nda) if k not in axes_a] + newaxes_a = notin + axes_a + N2 = 1 + for axis in axes_a: + N2 *= as_[axis] + newshape_a = (int(multiply.reduce([as_[ax] for ax in notin])), N2) + olda = [as_[axis] for axis in notin] + + notin = [k for k in range(ndb) if k not in axes_b] + newaxes_b = axes_b + notin + N2 = 1 + for axis in axes_b: + N2 *= bs[axis] + newshape_b = (N2, int(multiply.reduce([bs[ax] for ax in notin]))) + oldb = [bs[axis] for axis in notin] + + at = a.transpose(newaxes_a).reshape(newshape_a) + bt = b.transpose(newaxes_b).reshape(newshape_b) + res = dot(at, bt) + return res.reshape(olda + oldb) + + +def _roll_dispatcher(a, shift, axis=None): + return (a,) + + +@array_function_dispatch(_roll_dispatcher) +def roll(a, shift, axis=None): + """ + Roll array elements along a given axis. + + Elements that roll beyond the last position are re-introduced at + the first. + + Parameters + ---------- + a : array_like + Input array. + shift : int or tuple of ints + The number of places by which elements are shifted. If a tuple, + then `axis` must be a tuple of the same size, and each of the + given axes is shifted by the corresponding number. If an int + while `axis` is a tuple of ints, then the same value is used for + all given axes. + axis : int or tuple of ints, optional + Axis or axes along which elements are shifted. By default, the + array is flattened before shifting, after which the original + shape is restored. + + Returns + ------- + res : ndarray + Output array, with the same shape as `a`. + + See Also + -------- + rollaxis : Roll the specified axis backwards, until it lies in a + given position. + + Notes + ----- + .. versionadded:: 1.12.0 + + Supports rolling over multiple dimensions simultaneously. + + Examples + -------- + >>> x = np.arange(10) + >>> np.roll(x, 2) + array([8, 9, 0, 1, 2, 3, 4, 5, 6, 7]) + >>> np.roll(x, -2) + array([2, 3, 4, 5, 6, 7, 8, 9, 0, 1]) + + >>> x2 = np.reshape(x, (2, 5)) + >>> x2 + array([[0, 1, 2, 3, 4], + [5, 6, 7, 8, 9]]) + >>> np.roll(x2, 1) + array([[9, 0, 1, 2, 3], + [4, 5, 6, 7, 8]]) + >>> np.roll(x2, -1) + array([[1, 2, 3, 4, 5], + [6, 7, 8, 9, 0]]) + >>> np.roll(x2, 1, axis=0) + array([[5, 6, 7, 8, 9], + [0, 1, 2, 3, 4]]) + >>> np.roll(x2, -1, axis=0) + array([[5, 6, 7, 8, 9], + [0, 1, 2, 3, 4]]) + >>> np.roll(x2, 1, axis=1) + array([[4, 0, 1, 2, 3], + [9, 5, 6, 7, 8]]) + >>> np.roll(x2, -1, axis=1) + array([[1, 2, 3, 4, 0], + [6, 7, 8, 9, 5]]) + >>> np.roll(x2, (1, 1), axis=(1, 0)) + array([[9, 5, 6, 7, 8], + [4, 0, 1, 2, 3]]) + >>> np.roll(x2, (2, 1), axis=(1, 0)) + array([[8, 9, 5, 6, 7], + [3, 4, 0, 1, 2]]) + + """ + a = asanyarray(a) + if axis is None: + return roll(a.ravel(), shift, 0).reshape(a.shape) + + else: + axis = normalize_axis_tuple(axis, a.ndim, allow_duplicate=True) + broadcasted = broadcast(shift, axis) + if broadcasted.ndim > 1: + raise ValueError( + "'shift' and 'axis' should be scalars or 1D sequences") + shifts = {ax: 0 for ax in range(a.ndim)} + for sh, ax in broadcasted: + shifts[ax] += sh + + rolls = [((slice(None), slice(None)),)] * a.ndim + for ax, offset in shifts.items(): + offset %= a.shape[ax] or 1 # If `a` is empty, nothing matters. + if offset: + # (original, result), (original, result) + rolls[ax] = ((slice(None, -offset), slice(offset, None)), + (slice(-offset, None), slice(None, offset))) + + result = empty_like(a) + for indices in itertools.product(*rolls): + arr_index, res_index = zip(*indices) + result[res_index] = a[arr_index] + + return result + + +def _rollaxis_dispatcher(a, axis, start=None): + return (a,) + + +@array_function_dispatch(_rollaxis_dispatcher) +def rollaxis(a, axis, start=0): + """ + Roll the specified axis backwards, until it lies in a given position. + + This function continues to be supported for backward compatibility, but you + should prefer `moveaxis`. The `moveaxis` function was added in NumPy + 1.11. + + Parameters + ---------- + a : ndarray + Input array. + axis : int + The axis to be rolled. The positions of the other axes do not + change relative to one another. + start : int, optional + When ``start <= axis``, the axis is rolled back until it lies in + this position. When ``start > axis``, the axis is rolled until it + lies before this position. The default, 0, results in a "complete" + roll. The following table describes how negative values of ``start`` + are interpreted: + + .. table:: + :align: left + + +-------------------+----------------------+ + | ``start`` | Normalized ``start`` | + +===================+======================+ + | ``-(arr.ndim+1)`` | raise ``AxisError`` | + +-------------------+----------------------+ + | ``-arr.ndim`` | 0 | + +-------------------+----------------------+ + | |vdots| | |vdots| | + +-------------------+----------------------+ + | ``-1`` | ``arr.ndim-1`` | + +-------------------+----------------------+ + | ``0`` | ``0`` | + +-------------------+----------------------+ + | |vdots| | |vdots| | + +-------------------+----------------------+ + | ``arr.ndim`` | ``arr.ndim`` | + +-------------------+----------------------+ + | ``arr.ndim + 1`` | raise ``AxisError`` | + +-------------------+----------------------+ + + .. |vdots| unicode:: U+22EE .. Vertical Ellipsis + + Returns + ------- + res : ndarray + For NumPy >= 1.10.0 a view of `a` is always returned. For earlier + NumPy versions a view of `a` is returned only if the order of the + axes is changed, otherwise the input array is returned. + + See Also + -------- + moveaxis : Move array axes to new positions. + roll : Roll the elements of an array by a number of positions along a + given axis. + + Examples + -------- + >>> a = np.ones((3,4,5,6)) + >>> np.rollaxis(a, 3, 1).shape + (3, 6, 4, 5) + >>> np.rollaxis(a, 2).shape + (5, 3, 4, 6) + >>> np.rollaxis(a, 1, 4).shape + (3, 5, 6, 4) + + """ + n = a.ndim + axis = normalize_axis_index(axis, n) + if start < 0: + start += n + msg = "'%s' arg requires %d <= %s < %d, but %d was passed in" + if not (0 <= start < n + 1): + raise AxisError(msg % ('start', -n, 'start', n + 1, start)) + if axis < start: + # it's been removed + start -= 1 + if axis == start: + return a[...] + axes = list(range(0, n)) + axes.remove(axis) + axes.insert(start, axis) + return a.transpose(axes) + + +def normalize_axis_tuple(axis, ndim, argname=None, allow_duplicate=False): + """ + Normalizes an axis argument into a tuple of non-negative integer axes. + + This handles shorthands such as ``1`` and converts them to ``(1,)``, + as well as performing the handling of negative indices covered by + `normalize_axis_index`. + + By default, this forbids axes from being specified multiple times. + + Used internally by multi-axis-checking logic. + + .. versionadded:: 1.13.0 + + Parameters + ---------- + axis : int, iterable of int + The un-normalized index or indices of the axis. + ndim : int + The number of dimensions of the array that `axis` should be normalized + against. + argname : str, optional + A prefix to put before the error message, typically the name of the + argument. + allow_duplicate : bool, optional + If False, the default, disallow an axis from being specified twice. + + Returns + ------- + normalized_axes : tuple of int + The normalized axis index, such that `0 <= normalized_axis < ndim` + + Raises + ------ + AxisError + If any axis provided is out of range + ValueError + If an axis is repeated + + See also + -------- + normalize_axis_index : normalizing a single scalar axis + """ + # Optimization to speed-up the most common cases. + if type(axis) not in (tuple, list): + try: + axis = [operator.index(axis)] + except TypeError: + pass + # Going via an iterator directly is slower than via list comprehension. + axis = tuple([normalize_axis_index(ax, ndim, argname) for ax in axis]) + if not allow_duplicate and len(set(axis)) != len(axis): + if argname: + raise ValueError('repeated axis in `{}` argument'.format(argname)) + else: + raise ValueError('repeated axis') + return axis + + +def _moveaxis_dispatcher(a, source, destination): + return (a,) + + +@array_function_dispatch(_moveaxis_dispatcher) +def moveaxis(a, source, destination): + """ + Move axes of an array to new positions. + + Other axes remain in their original order. + + .. versionadded:: 1.11.0 + + Parameters + ---------- + a : np.ndarray + The array whose axes should be reordered. + source : int or sequence of int + Original positions of the axes to move. These must be unique. + destination : int or sequence of int + Destination positions for each of the original axes. These must also be + unique. + + Returns + ------- + result : np.ndarray + Array with moved axes. This array is a view of the input array. + + See Also + -------- + transpose : Permute the dimensions of an array. + swapaxes : Interchange two axes of an array. + + Examples + -------- + >>> x = np.zeros((3, 4, 5)) + >>> np.moveaxis(x, 0, -1).shape + (4, 5, 3) + >>> np.moveaxis(x, -1, 0).shape + (5, 3, 4) + + These all achieve the same result: + + >>> np.transpose(x).shape + (5, 4, 3) + >>> np.swapaxes(x, 0, -1).shape + (5, 4, 3) + >>> np.moveaxis(x, [0, 1], [-1, -2]).shape + (5, 4, 3) + >>> np.moveaxis(x, [0, 1, 2], [-1, -2, -3]).shape + (5, 4, 3) + + """ + try: + # allow duck-array types if they define transpose + transpose = a.transpose + except AttributeError: + a = asarray(a) + transpose = a.transpose + + source = normalize_axis_tuple(source, a.ndim, 'source') + destination = normalize_axis_tuple(destination, a.ndim, 'destination') + if len(source) != len(destination): + raise ValueError('`source` and `destination` arguments must have ' + 'the same number of elements') + + order = [n for n in range(a.ndim) if n not in source] + + for dest, src in sorted(zip(destination, source)): + order.insert(dest, src) + + result = transpose(order) + return result + + +def _cross_dispatcher(a, b, axisa=None, axisb=None, axisc=None, axis=None): + return (a, b) + + +@array_function_dispatch(_cross_dispatcher) +def cross(a, b, axisa=-1, axisb=-1, axisc=-1, axis=None): + """ + Return the cross product of two (arrays of) vectors. + + The cross product of `a` and `b` in :math:`R^3` is a vector perpendicular + to both `a` and `b`. If `a` and `b` are arrays of vectors, the vectors + are defined by the last axis of `a` and `b` by default, and these axes + can have dimensions 2 or 3. Where the dimension of either `a` or `b` is + 2, the third component of the input vector is assumed to be zero and the + cross product calculated accordingly. In cases where both input vectors + have dimension 2, the z-component of the cross product is returned. + + Parameters + ---------- + a : array_like + Components of the first vector(s). + b : array_like + Components of the second vector(s). + axisa : int, optional + Axis of `a` that defines the vector(s). By default, the last axis. + axisb : int, optional + Axis of `b` that defines the vector(s). By default, the last axis. + axisc : int, optional + Axis of `c` containing the cross product vector(s). Ignored if + both input vectors have dimension 2, as the return is scalar. + By default, the last axis. + axis : int, optional + If defined, the axis of `a`, `b` and `c` that defines the vector(s) + and cross product(s). Overrides `axisa`, `axisb` and `axisc`. + + Returns + ------- + c : ndarray + Vector cross product(s). + + Raises + ------ + ValueError + When the dimension of the vector(s) in `a` and/or `b` does not + equal 2 or 3. + + See Also + -------- + inner : Inner product + outer : Outer product. + ix_ : Construct index arrays. + + Notes + ----- + .. versionadded:: 1.9.0 + + Supports full broadcasting of the inputs. + + Examples + -------- + Vector cross-product. + + >>> x = [1, 2, 3] + >>> y = [4, 5, 6] + >>> np.cross(x, y) + array([-3, 6, -3]) + + One vector with dimension 2. + + >>> x = [1, 2] + >>> y = [4, 5, 6] + >>> np.cross(x, y) + array([12, -6, -3]) + + Equivalently: + + >>> x = [1, 2, 0] + >>> y = [4, 5, 6] + >>> np.cross(x, y) + array([12, -6, -3]) + + Both vectors with dimension 2. + + >>> x = [1,2] + >>> y = [4,5] + >>> np.cross(x, y) + array(-3) + + Multiple vector cross-products. Note that the direction of the cross + product vector is defined by the `right-hand rule`. + + >>> x = np.array([[1,2,3], [4,5,6]]) + >>> y = np.array([[4,5,6], [1,2,3]]) + >>> np.cross(x, y) + array([[-3, 6, -3], + [ 3, -6, 3]]) + + The orientation of `c` can be changed using the `axisc` keyword. + + >>> np.cross(x, y, axisc=0) + array([[-3, 3], + [ 6, -6], + [-3, 3]]) + + Change the vector definition of `x` and `y` using `axisa` and `axisb`. + + >>> x = np.array([[1,2,3], [4,5,6], [7, 8, 9]]) + >>> y = np.array([[7, 8, 9], [4,5,6], [1,2,3]]) + >>> np.cross(x, y) + array([[ -6, 12, -6], + [ 0, 0, 0], + [ 6, -12, 6]]) + >>> np.cross(x, y, axisa=0, axisb=0) + array([[-24, 48, -24], + [-30, 60, -30], + [-36, 72, -36]]) + + """ + if axis is not None: + axisa, axisb, axisc = (axis,) * 3 + a = asarray(a) + b = asarray(b) + # Check axisa and axisb are within bounds + axisa = normalize_axis_index(axisa, a.ndim, msg_prefix='axisa') + axisb = normalize_axis_index(axisb, b.ndim, msg_prefix='axisb') + + # Move working axis to the end of the shape + a = moveaxis(a, axisa, -1) + b = moveaxis(b, axisb, -1) + msg = ("incompatible dimensions for cross product\n" + "(dimension must be 2 or 3)") + if a.shape[-1] not in (2, 3) or b.shape[-1] not in (2, 3): + raise ValueError(msg) + + # Create the output array + shape = broadcast(a[..., 0], b[..., 0]).shape + if a.shape[-1] == 3 or b.shape[-1] == 3: + shape += (3,) + # Check axisc is within bounds + axisc = normalize_axis_index(axisc, len(shape), msg_prefix='axisc') + dtype = promote_types(a.dtype, b.dtype) + cp = empty(shape, dtype) + + # create local aliases for readability + a0 = a[..., 0] + a1 = a[..., 1] + if a.shape[-1] == 3: + a2 = a[..., 2] + b0 = b[..., 0] + b1 = b[..., 1] + if b.shape[-1] == 3: + b2 = b[..., 2] + if cp.ndim != 0 and cp.shape[-1] == 3: + cp0 = cp[..., 0] + cp1 = cp[..., 1] + cp2 = cp[..., 2] + + if a.shape[-1] == 2: + if b.shape[-1] == 2: + # a0 * b1 - a1 * b0 + multiply(a0, b1, out=cp) + cp -= a1 * b0 + return cp + else: + assert b.shape[-1] == 3 + # cp0 = a1 * b2 - 0 (a2 = 0) + # cp1 = 0 - a0 * b2 (a2 = 0) + # cp2 = a0 * b1 - a1 * b0 + multiply(a1, b2, out=cp0) + multiply(a0, b2, out=cp1) + negative(cp1, out=cp1) + multiply(a0, b1, out=cp2) + cp2 -= a1 * b0 + else: + assert a.shape[-1] == 3 + if b.shape[-1] == 3: + # cp0 = a1 * b2 - a2 * b1 + # cp1 = a2 * b0 - a0 * b2 + # cp2 = a0 * b1 - a1 * b0 + multiply(a1, b2, out=cp0) + tmp = array(a2 * b1) + cp0 -= tmp + multiply(a2, b0, out=cp1) + multiply(a0, b2, out=tmp) + cp1 -= tmp + multiply(a0, b1, out=cp2) + multiply(a1, b0, out=tmp) + cp2 -= tmp + else: + assert b.shape[-1] == 2 + # cp0 = 0 - a2 * b1 (b2 = 0) + # cp1 = a2 * b0 - 0 (b2 = 0) + # cp2 = a0 * b1 - a1 * b0 + multiply(a2, b1, out=cp0) + negative(cp0, out=cp0) + multiply(a2, b0, out=cp1) + multiply(a0, b1, out=cp2) + cp2 -= a1 * b0 + + return moveaxis(cp, -1, axisc) + + +little_endian = (sys.byteorder == 'little') + + +@set_module('numpy') +def indices(dimensions, dtype=int, sparse=False): + """ + Return an array representing the indices of a grid. + + Compute an array where the subarrays contain index values 0, 1, ... + varying only along the corresponding axis. + + Parameters + ---------- + dimensions : sequence of ints + The shape of the grid. + dtype : dtype, optional + Data type of the result. + sparse : boolean, optional + Return a sparse representation of the grid instead of a dense + representation. Default is False. + + .. versionadded:: 1.17 + + Returns + ------- + grid : one ndarray or tuple of ndarrays + If sparse is False: + Returns one array of grid indices, + ``grid.shape = (len(dimensions),) + tuple(dimensions)``. + If sparse is True: + Returns a tuple of arrays, with + ``grid[i].shape = (1, ..., 1, dimensions[i], 1, ..., 1)`` with + dimensions[i] in the ith place + + See Also + -------- + mgrid, ogrid, meshgrid + + Notes + ----- + The output shape in the dense case is obtained by prepending the number + of dimensions in front of the tuple of dimensions, i.e. if `dimensions` + is a tuple ``(r0, ..., rN-1)`` of length ``N``, the output shape is + ``(N, r0, ..., rN-1)``. + + The subarrays ``grid[k]`` contains the N-D array of indices along the + ``k-th`` axis. Explicitly:: + + grid[k, i0, i1, ..., iN-1] = ik + + Examples + -------- + >>> grid = np.indices((2, 3)) + >>> grid.shape + (2, 2, 3) + >>> grid[0] # row indices + array([[0, 0, 0], + [1, 1, 1]]) + >>> grid[1] # column indices + array([[0, 1, 2], + [0, 1, 2]]) + + The indices can be used as an index into an array. + + >>> x = np.arange(20).reshape(5, 4) + >>> row, col = np.indices((2, 3)) + >>> x[row, col] + array([[0, 1, 2], + [4, 5, 6]]) + + Note that it would be more straightforward in the above example to + extract the required elements directly with ``x[:2, :3]``. + + If sparse is set to true, the grid will be returned in a sparse + representation. + + >>> i, j = np.indices((2, 3), sparse=True) + >>> i.shape + (2, 1) + >>> j.shape + (1, 3) + >>> i # row indices + array([[0], + [1]]) + >>> j # column indices + array([[0, 1, 2]]) + + """ + dimensions = tuple(dimensions) + N = len(dimensions) + shape = (1,)*N + if sparse: + res = tuple() + else: + res = empty((N,)+dimensions, dtype=dtype) + for i, dim in enumerate(dimensions): + idx = arange(dim, dtype=dtype).reshape( + shape[:i] + (dim,) + shape[i+1:] + ) + if sparse: + res = res + (idx,) + else: + res[i] = idx + return res + + +def _fromfunction_dispatcher(function, shape, *, dtype=None, like=None, **kwargs): + return (like,) + + +@set_array_function_like_doc +@set_module('numpy') +def fromfunction(function, shape, *, dtype=float, like=None, **kwargs): + """ + Construct an array by executing a function over each coordinate. + + The resulting array therefore has a value ``fn(x, y, z)`` at + coordinate ``(x, y, z)``. + + Parameters + ---------- + function : callable + The function is called with N parameters, where N is the rank of + `shape`. Each parameter represents the coordinates of the array + varying along a specific axis. For example, if `shape` + were ``(2, 2)``, then the parameters would be + ``array([[0, 0], [1, 1]])`` and ``array([[0, 1], [0, 1]])`` + shape : (N,) tuple of ints + Shape of the output array, which also determines the shape of + the coordinate arrays passed to `function`. + dtype : data-type, optional + Data-type of the coordinate arrays passed to `function`. + By default, `dtype` is float. + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + fromfunction : any + The result of the call to `function` is passed back directly. + Therefore the shape of `fromfunction` is completely determined by + `function`. If `function` returns a scalar value, the shape of + `fromfunction` would not match the `shape` parameter. + + See Also + -------- + indices, meshgrid + + Notes + ----- + Keywords other than `dtype` are passed to `function`. + + Examples + -------- + >>> np.fromfunction(lambda i, j: i == j, (3, 3), dtype=int) + array([[ True, False, False], + [False, True, False], + [False, False, True]]) + + >>> np.fromfunction(lambda i, j: i + j, (3, 3), dtype=int) + array([[0, 1, 2], + [1, 2, 3], + [2, 3, 4]]) + + """ + if like is not None: + return _fromfunction_with_like(function, shape, dtype=dtype, like=like, **kwargs) + + args = indices(shape, dtype=dtype) + return function(*args, **kwargs) + + +_fromfunction_with_like = array_function_dispatch( + _fromfunction_dispatcher +)(fromfunction) + + +def _frombuffer(buf, dtype, shape, order): + return frombuffer(buf, dtype=dtype).reshape(shape, order=order) + + +@set_module('numpy') +def isscalar(element): + """ + Returns True if the type of `element` is a scalar type. + + Parameters + ---------- + element : any + Input argument, can be of any type and shape. + + Returns + ------- + val : bool + True if `element` is a scalar type, False if it is not. + + See Also + -------- + ndim : Get the number of dimensions of an array + + Notes + ----- + If you need a stricter way to identify a *numerical* scalar, use + ``isinstance(x, numbers.Number)``, as that returns ``False`` for most + non-numerical elements such as strings. + + In most cases ``np.ndim(x) == 0`` should be used instead of this function, + as that will also return true for 0d arrays. This is how numpy overloads + functions in the style of the ``dx`` arguments to `gradient` and the ``bins`` + argument to `histogram`. Some key differences: + + +--------------------------------------+---------------+-------------------+ + | x |``isscalar(x)``|``np.ndim(x) == 0``| + +======================================+===============+===================+ + | PEP 3141 numeric objects (including | ``True`` | ``True`` | + | builtins) | | | + +--------------------------------------+---------------+-------------------+ + | builtin string and buffer objects | ``True`` | ``True`` | + +--------------------------------------+---------------+-------------------+ + | other builtin objects, like | ``False`` | ``True`` | + | `pathlib.Path`, `Exception`, | | | + | the result of `re.compile` | | | + +--------------------------------------+---------------+-------------------+ + | third-party objects like | ``False`` | ``True`` | + | `matplotlib.figure.Figure` | | | + +--------------------------------------+---------------+-------------------+ + | zero-dimensional numpy arrays | ``False`` | ``True`` | + +--------------------------------------+---------------+-------------------+ + | other numpy arrays | ``False`` | ``False`` | + +--------------------------------------+---------------+-------------------+ + | `list`, `tuple`, and other sequence | ``False`` | ``False`` | + | objects | | | + +--------------------------------------+---------------+-------------------+ + + Examples + -------- + >>> np.isscalar(3.1) + True + >>> np.isscalar(np.array(3.1)) + False + >>> np.isscalar([3.1]) + False + >>> np.isscalar(False) + True + >>> np.isscalar('numpy') + True + + NumPy supports PEP 3141 numbers: + + >>> from fractions import Fraction + >>> np.isscalar(Fraction(5, 17)) + True + >>> from numbers import Number + >>> np.isscalar(Number()) + True + + """ + return (isinstance(element, generic) + or type(element) in ScalarType + or isinstance(element, numbers.Number)) + + +@set_module('numpy') +def binary_repr(num, width=None): + """ + Return the binary representation of the input number as a string. + + For negative numbers, if width is not given, a minus sign is added to the + front. If width is given, the two's complement of the number is + returned, with respect to that width. + + In a two's-complement system negative numbers are represented by the two's + complement of the absolute value. This is the most common method of + representing signed integers on computers [1]_. A N-bit two's-complement + system can represent every integer in the range + :math:`-2^{N-1}` to :math:`+2^{N-1}-1`. + + Parameters + ---------- + num : int + Only an integer decimal number can be used. + width : int, optional + The length of the returned string if `num` is positive, or the length + of the two's complement if `num` is negative, provided that `width` is + at least a sufficient number of bits for `num` to be represented in the + designated form. + + If the `width` value is insufficient, it will be ignored, and `num` will + be returned in binary (`num` > 0) or two's complement (`num` < 0) form + with its width equal to the minimum number of bits needed to represent + the number in the designated form. This behavior is deprecated and will + later raise an error. + + .. deprecated:: 1.12.0 + + Returns + ------- + bin : str + Binary representation of `num` or two's complement of `num`. + + See Also + -------- + base_repr: Return a string representation of a number in the given base + system. + bin: Python's built-in binary representation generator of an integer. + + Notes + ----- + `binary_repr` is equivalent to using `base_repr` with base 2, but about 25x + faster. + + References + ---------- + .. [1] Wikipedia, "Two's complement", + https://en.wikipedia.org/wiki/Two's_complement + + Examples + -------- + >>> np.binary_repr(3) + '11' + >>> np.binary_repr(-3) + '-11' + >>> np.binary_repr(3, width=4) + '0011' + + The two's complement is returned when the input number is negative and + width is specified: + + >>> np.binary_repr(-3, width=3) + '101' + >>> np.binary_repr(-3, width=5) + '11101' + + """ + def warn_if_insufficient(width, binwidth): + if width is not None and width < binwidth: + warnings.warn( + "Insufficient bit width provided. This behavior " + "will raise an error in the future.", DeprecationWarning, + stacklevel=3) + + # Ensure that num is a Python integer to avoid overflow or unwanted + # casts to floating point. + num = operator.index(num) + + if num == 0: + return '0' * (width or 1) + + elif num > 0: + binary = bin(num)[2:] + binwidth = len(binary) + outwidth = (binwidth if width is None + else max(binwidth, width)) + warn_if_insufficient(width, binwidth) + return binary.zfill(outwidth) + + else: + if width is None: + return '-' + bin(-num)[2:] + + else: + poswidth = len(bin(-num)[2:]) + + # See gh-8679: remove extra digit + # for numbers at boundaries. + if 2**(poswidth - 1) == -num: + poswidth -= 1 + + twocomp = 2**(poswidth + 1) + num + binary = bin(twocomp)[2:] + binwidth = len(binary) + + outwidth = max(binwidth, width) + warn_if_insufficient(width, binwidth) + return '1' * (outwidth - binwidth) + binary + + +@set_module('numpy') +def base_repr(number, base=2, padding=0): + """ + Return a string representation of a number in the given base system. + + Parameters + ---------- + number : int + The value to convert. Positive and negative values are handled. + base : int, optional + Convert `number` to the `base` number system. The valid range is 2-36, + the default value is 2. + padding : int, optional + Number of zeros padded on the left. Default is 0 (no padding). + + Returns + ------- + out : str + String representation of `number` in `base` system. + + See Also + -------- + binary_repr : Faster version of `base_repr` for base 2. + + Examples + -------- + >>> np.base_repr(5) + '101' + >>> np.base_repr(6, 5) + '11' + >>> np.base_repr(7, base=5, padding=3) + '00012' + + >>> np.base_repr(10, base=16) + 'A' + >>> np.base_repr(32, base=16) + '20' + + """ + digits = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ' + if base > len(digits): + raise ValueError("Bases greater than 36 not handled in base_repr.") + elif base < 2: + raise ValueError("Bases less than 2 not handled in base_repr.") + + num = abs(number) + res = [] + while num: + res.append(digits[num % base]) + num //= base + if padding: + res.append('0' * padding) + if number < 0: + res.append('-') + return ''.join(reversed(res or '0')) + + +# These are all essentially abbreviations +# These might wind up in a special abbreviations module + + +def _maketup(descr, val): + dt = dtype(descr) + # Place val in all scalar tuples: + fields = dt.fields + if fields is None: + return val + else: + res = [_maketup(fields[name][0], val) for name in dt.names] + return tuple(res) + + +def _identity_dispatcher(n, dtype=None, *, like=None): + return (like,) + + +@set_array_function_like_doc +@set_module('numpy') +def identity(n, dtype=None, *, like=None): + """ + Return the identity array. + + The identity array is a square array with ones on + the main diagonal. + + Parameters + ---------- + n : int + Number of rows (and columns) in `n` x `n` output. + dtype : data-type, optional + Data-type of the output. Defaults to ``float``. + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + out : ndarray + `n` x `n` array with its main diagonal set to one, + and all other elements 0. + + Examples + -------- + >>> np.identity(3) + array([[1., 0., 0.], + [0., 1., 0.], + [0., 0., 1.]]) + + """ + if like is not None: + return _identity_with_like(n, dtype=dtype, like=like) + + from numpy import eye + return eye(n, dtype=dtype, like=like) + + +_identity_with_like = array_function_dispatch( + _identity_dispatcher +)(identity) + + +def _allclose_dispatcher(a, b, rtol=None, atol=None, equal_nan=None): + return (a, b) + + +@array_function_dispatch(_allclose_dispatcher) +def allclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False): + """ + Returns True if two arrays are element-wise equal within a tolerance. + + The tolerance values are positive, typically very small numbers. The + relative difference (`rtol` * abs(`b`)) and the absolute difference + `atol` are added together to compare against the absolute difference + between `a` and `b`. + + NaNs are treated as equal if they are in the same place and if + ``equal_nan=True``. Infs are treated as equal if they are in the same + place and of the same sign in both arrays. + + Parameters + ---------- + a, b : array_like + Input arrays to compare. + rtol : float + The relative tolerance parameter (see Notes). + atol : float + The absolute tolerance parameter (see Notes). + equal_nan : bool + Whether to compare NaN's as equal. If True, NaN's in `a` will be + considered equal to NaN's in `b` in the output array. + + .. versionadded:: 1.10.0 + + Returns + ------- + allclose : bool + Returns True if the two arrays are equal within the given + tolerance; False otherwise. + + See Also + -------- + isclose, all, any, equal + + Notes + ----- + If the following equation is element-wise True, then allclose returns + True. + + absolute(`a` - `b`) <= (`atol` + `rtol` * absolute(`b`)) + + The above equation is not symmetric in `a` and `b`, so that + ``allclose(a, b)`` might be different from ``allclose(b, a)`` in + some rare cases. + + The comparison of `a` and `b` uses standard broadcasting, which + means that `a` and `b` need not have the same shape in order for + ``allclose(a, b)`` to evaluate to True. The same is true for + `equal` but not `array_equal`. + + `allclose` is not defined for non-numeric data types. + `bool` is considered a numeric data-type for this purpose. + + Examples + -------- + >>> np.allclose([1e10,1e-7], [1.00001e10,1e-8]) + False + >>> np.allclose([1e10,1e-8], [1.00001e10,1e-9]) + True + >>> np.allclose([1e10,1e-8], [1.0001e10,1e-9]) + False + >>> np.allclose([1.0, np.nan], [1.0, np.nan]) + False + >>> np.allclose([1.0, np.nan], [1.0, np.nan], equal_nan=True) + True + + """ + res = all(isclose(a, b, rtol=rtol, atol=atol, equal_nan=equal_nan)) + return bool(res) + + +def _isclose_dispatcher(a, b, rtol=None, atol=None, equal_nan=None): + return (a, b) + + +@array_function_dispatch(_isclose_dispatcher) +def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False): + """ + Returns a boolean array where two arrays are element-wise equal within a + tolerance. + + The tolerance values are positive, typically very small numbers. The + relative difference (`rtol` * abs(`b`)) and the absolute difference + `atol` are added together to compare against the absolute difference + between `a` and `b`. + + .. warning:: The default `atol` is not appropriate for comparing numbers + that are much smaller than one (see Notes). + + Parameters + ---------- + a, b : array_like + Input arrays to compare. + rtol : float + The relative tolerance parameter (see Notes). + atol : float + The absolute tolerance parameter (see Notes). + equal_nan : bool + Whether to compare NaN's as equal. If True, NaN's in `a` will be + considered equal to NaN's in `b` in the output array. + + Returns + ------- + y : array_like + Returns a boolean array of where `a` and `b` are equal within the + given tolerance. If both `a` and `b` are scalars, returns a single + boolean value. + + See Also + -------- + allclose + math.isclose + + Notes + ----- + .. versionadded:: 1.7.0 + + For finite values, isclose uses the following equation to test whether + two floating point values are equivalent. + + absolute(`a` - `b`) <= (`atol` + `rtol` * absolute(`b`)) + + Unlike the built-in `math.isclose`, the above equation is not symmetric + in `a` and `b` -- it assumes `b` is the reference value -- so that + `isclose(a, b)` might be different from `isclose(b, a)`. Furthermore, + the default value of atol is not zero, and is used to determine what + small values should be considered close to zero. The default value is + appropriate for expected values of order unity: if the expected values + are significantly smaller than one, it can result in false positives. + `atol` should be carefully selected for the use case at hand. A zero value + for `atol` will result in `False` if either `a` or `b` is zero. + + `isclose` is not defined for non-numeric data types. + `bool` is considered a numeric data-type for this purpose. + + Examples + -------- + >>> np.isclose([1e10,1e-7], [1.00001e10,1e-8]) + array([ True, False]) + >>> np.isclose([1e10,1e-8], [1.00001e10,1e-9]) + array([ True, True]) + >>> np.isclose([1e10,1e-8], [1.0001e10,1e-9]) + array([False, True]) + >>> np.isclose([1.0, np.nan], [1.0, np.nan]) + array([ True, False]) + >>> np.isclose([1.0, np.nan], [1.0, np.nan], equal_nan=True) + array([ True, True]) + >>> np.isclose([1e-8, 1e-7], [0.0, 0.0]) + array([ True, False]) + >>> np.isclose([1e-100, 1e-7], [0.0, 0.0], atol=0.0) + array([False, False]) + >>> np.isclose([1e-10, 1e-10], [1e-20, 0.0]) + array([ True, True]) + >>> np.isclose([1e-10, 1e-10], [1e-20, 0.999999e-10], atol=0.0) + array([False, True]) + """ + def within_tol(x, y, atol, rtol): + with errstate(invalid='ignore'): + return less_equal(abs(x-y), atol + rtol * abs(y)) + + x = asanyarray(a) + y = asanyarray(b) + + # Make sure y is an inexact type to avoid bad behavior on abs(MIN_INT). + # This will cause casting of x later. Also, make sure to allow subclasses + # (e.g., for numpy.ma). + # NOTE: We explicitly allow timedelta, which used to work. This could + # possibly be deprecated. See also gh-18286. + # timedelta works if `atol` is an integer or also a timedelta. + # Although, the default tolerances are unlikely to be useful + if y.dtype.kind != "m": + dt = multiarray.result_type(y, 1.) + y = asanyarray(y, dtype=dt) + + xfin = isfinite(x) + yfin = isfinite(y) + if all(xfin) and all(yfin): + return within_tol(x, y, atol, rtol) + else: + finite = xfin & yfin + cond = zeros_like(finite, subok=True) + # Because we're using boolean indexing, x & y must be the same shape. + # Ideally, we'd just do x, y = broadcast_arrays(x, y). It's in + # lib.stride_tricks, though, so we can't import it here. + x = x * ones_like(cond) + y = y * ones_like(cond) + # Avoid subtraction with infinite/nan values... + cond[finite] = within_tol(x[finite], y[finite], atol, rtol) + # Check for equality of infinite values... + cond[~finite] = (x[~finite] == y[~finite]) + if equal_nan: + # Make NaN == NaN + both_nan = isnan(x) & isnan(y) + + # Needed to treat masked arrays correctly. = True would not work. + cond[both_nan] = both_nan[both_nan] + + return cond[()] # Flatten 0d arrays to scalars + + +def _array_equal_dispatcher(a1, a2, equal_nan=None): + return (a1, a2) + + +@array_function_dispatch(_array_equal_dispatcher) +def array_equal(a1, a2, equal_nan=False): + """ + True if two arrays have the same shape and elements, False otherwise. + + Parameters + ---------- + a1, a2 : array_like + Input arrays. + equal_nan : bool + Whether to compare NaN's as equal. If the dtype of a1 and a2 is + complex, values will be considered equal if either the real or the + imaginary component of a given value is ``nan``. + + .. versionadded:: 1.19.0 + + Returns + ------- + b : bool + Returns True if the arrays are equal. + + See Also + -------- + allclose: Returns True if two arrays are element-wise equal within a + tolerance. + array_equiv: Returns True if input arrays are shape consistent and all + elements equal. + + Examples + -------- + >>> np.array_equal([1, 2], [1, 2]) + True + >>> np.array_equal(np.array([1, 2]), np.array([1, 2])) + True + >>> np.array_equal([1, 2], [1, 2, 3]) + False + >>> np.array_equal([1, 2], [1, 4]) + False + >>> a = np.array([1, np.nan]) + >>> np.array_equal(a, a) + False + >>> np.array_equal(a, a, equal_nan=True) + True + + When ``equal_nan`` is True, complex values with nan components are + considered equal if either the real *or* the imaginary components are nan. + + >>> a = np.array([1 + 1j]) + >>> b = a.copy() + >>> a.real = np.nan + >>> b.imag = np.nan + >>> np.array_equal(a, b, equal_nan=True) + True + """ + try: + a1, a2 = asarray(a1), asarray(a2) + except Exception: + return False + if a1.shape != a2.shape: + return False + if not equal_nan: + return bool(asarray(a1 == a2).all()) + # Handling NaN values if equal_nan is True + a1nan, a2nan = isnan(a1), isnan(a2) + # NaN's occur at different locations + if not (a1nan == a2nan).all(): + return False + # Shapes of a1, a2 and masks are guaranteed to be consistent by this point + return bool(asarray(a1[~a1nan] == a2[~a1nan]).all()) + + +def _array_equiv_dispatcher(a1, a2): + return (a1, a2) + + +@array_function_dispatch(_array_equiv_dispatcher) +def array_equiv(a1, a2): + """ + Returns True if input arrays are shape consistent and all elements equal. + + Shape consistent means they are either the same shape, or one input array + can be broadcasted to create the same shape as the other one. + + Parameters + ---------- + a1, a2 : array_like + Input arrays. + + Returns + ------- + out : bool + True if equivalent, False otherwise. + + Examples + -------- + >>> np.array_equiv([1, 2], [1, 2]) + True + >>> np.array_equiv([1, 2], [1, 3]) + False + + Showing the shape equivalence: + + >>> np.array_equiv([1, 2], [[1, 2], [1, 2]]) + True + >>> np.array_equiv([1, 2], [[1, 2, 1, 2], [1, 2, 1, 2]]) + False + + >>> np.array_equiv([1, 2], [[1, 2], [1, 3]]) + False + + """ + try: + a1, a2 = asarray(a1), asarray(a2) + except Exception: + return False + try: + multiarray.broadcast(a1, a2) + except Exception: + return False + + return bool(asarray(a1 == a2).all()) + + +Inf = inf = infty = Infinity = PINF +nan = NaN = NAN +False_ = bool_(False) +True_ = bool_(True) + + +def extend_all(module): + existing = set(__all__) + mall = getattr(module, '__all__') + for a in mall: + if a not in existing: + __all__.append(a) + + +from .umath import * +from .numerictypes import * +from . import fromnumeric +from .fromnumeric import * +from . import arrayprint +from .arrayprint import * +from . import _asarray +from ._asarray import * +from . import _ufunc_config +from ._ufunc_config import * +extend_all(fromnumeric) +extend_all(umath) +extend_all(numerictypes) +extend_all(arrayprint) +extend_all(_asarray) +extend_all(_ufunc_config) diff --git a/.local/lib/python3.8/site-packages/numpy/core/numeric.pyi b/.local/lib/python3.8/site-packages/numpy/core/numeric.pyi new file mode 100644 index 0000000..d7ec303 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/numeric.pyi @@ -0,0 +1,635 @@ +from typing import ( + Any, + Union, + Sequence, + Tuple, + Callable, + List, + overload, + TypeVar, + Literal, + Type, + SupportsAbs, + SupportsIndex, + NoReturn, +) +from typing_extensions import TypeGuard + +from numpy import ( + ComplexWarning as ComplexWarning, + dtype, + generic, + unsignedinteger, + signedinteger, + floating, + complexfloating, + bool_, + int_, + intp, + float64, + timedelta64, + object_, + _OrderKACF, + _OrderCF, +) + +from numpy.typing import ( + ArrayLike, + NDArray, + DTypeLike, + _ShapeLike, + _SupportsDType, + _FiniteNestedSequence, + _SupportsArray, + _ScalarLike_co, + _ArrayLikeBool_co, + _ArrayLikeUInt_co, + _ArrayLikeInt_co, + _ArrayLikeFloat_co, + _ArrayLikeComplex_co, + _ArrayLikeTD64_co, + _ArrayLikeObject_co, +) + +_T = TypeVar("_T") +_SCT = TypeVar("_SCT", bound=generic) +_ArrayType = TypeVar("_ArrayType", bound=NDArray[Any]) + +_DTypeLike = Union[ + dtype[_SCT], + Type[_SCT], + _SupportsDType[dtype[_SCT]], +] +_ArrayLike = _FiniteNestedSequence[_SupportsArray[dtype[_SCT]]] +_CorrelateMode = Literal["valid", "same", "full"] + +__all__: List[str] + +@overload +def zeros_like( + a: _ArrayType, + dtype: None = ..., + order: _OrderKACF = ..., + subok: Literal[True] = ..., + shape: None = ..., +) -> _ArrayType: ... +@overload +def zeros_like( + a: _ArrayLike[_SCT], + dtype: None = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike = ..., +) -> NDArray[_SCT]: ... +@overload +def zeros_like( + a: object, + dtype: None = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[Any]: ... +@overload +def zeros_like( + a: Any, + dtype: _DTypeLike[_SCT], + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[_SCT]: ... +@overload +def zeros_like( + a: Any, + dtype: DTypeLike, + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[Any]: ... + +@overload +def ones( + shape: _ShapeLike, + dtype: None = ..., + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[float64]: ... +@overload +def ones( + shape: _ShapeLike, + dtype: _DTypeLike[_SCT], + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def ones( + shape: _ShapeLike, + dtype: DTypeLike, + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +@overload +def ones_like( + a: _ArrayType, + dtype: None = ..., + order: _OrderKACF = ..., + subok: Literal[True] = ..., + shape: None = ..., +) -> _ArrayType: ... +@overload +def ones_like( + a: _ArrayLike[_SCT], + dtype: None = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike = ..., +) -> NDArray[_SCT]: ... +@overload +def ones_like( + a: object, + dtype: None = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[Any]: ... +@overload +def ones_like( + a: Any, + dtype: _DTypeLike[_SCT], + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[_SCT]: ... +@overload +def ones_like( + a: Any, + dtype: DTypeLike, + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[Any]: ... + +@overload +def full( + shape: _ShapeLike, + fill_value: Any, + dtype: None = ..., + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... +@overload +def full( + shape: _ShapeLike, + fill_value: Any, + dtype: _DTypeLike[_SCT], + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def full( + shape: _ShapeLike, + fill_value: Any, + dtype: DTypeLike, + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +@overload +def full_like( + a: _ArrayType, + fill_value: Any, + dtype: None = ..., + order: _OrderKACF = ..., + subok: Literal[True] = ..., + shape: None = ..., +) -> _ArrayType: ... +@overload +def full_like( + a: _ArrayLike[_SCT], + fill_value: Any, + dtype: None = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike = ..., +) -> NDArray[_SCT]: ... +@overload +def full_like( + a: object, + fill_value: Any, + dtype: None = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[Any]: ... +@overload +def full_like( + a: Any, + fill_value: Any, + dtype: _DTypeLike[_SCT], + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[_SCT]: ... +@overload +def full_like( + a: Any, + fill_value: Any, + dtype: DTypeLike, + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[Any]: ... + +@overload +def count_nonzero( + a: ArrayLike, + axis: None = ..., + *, + keepdims: Literal[False] = ..., +) -> int: ... +@overload +def count_nonzero( + a: ArrayLike, + axis: _ShapeLike = ..., + *, + keepdims: bool = ..., +) -> Any: ... # TODO: np.intp or ndarray[np.intp] + +def isfortran(a: NDArray[Any] | generic) -> bool: ... + +def argwhere(a: ArrayLike) -> NDArray[intp]: ... + +def flatnonzero(a: ArrayLike) -> NDArray[intp]: ... + +@overload +def correlate( + a: _ArrayLikeBool_co, + v: _ArrayLikeBool_co, + mode: _CorrelateMode = ..., +) -> NDArray[bool_]: ... +@overload +def correlate( + a: _ArrayLikeUInt_co, + v: _ArrayLikeUInt_co, + mode: _CorrelateMode = ..., +) -> NDArray[unsignedinteger[Any]]: ... +@overload +def correlate( + a: _ArrayLikeInt_co, + v: _ArrayLikeInt_co, + mode: _CorrelateMode = ..., +) -> NDArray[signedinteger[Any]]: ... +@overload +def correlate( + a: _ArrayLikeFloat_co, + v: _ArrayLikeFloat_co, + mode: _CorrelateMode = ..., +) -> NDArray[floating[Any]]: ... +@overload +def correlate( + a: _ArrayLikeComplex_co, + v: _ArrayLikeComplex_co, + mode: _CorrelateMode = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def correlate( + a: _ArrayLikeTD64_co, + v: _ArrayLikeTD64_co, + mode: _CorrelateMode = ..., +) -> NDArray[timedelta64]: ... +@overload +def correlate( + a: _ArrayLikeObject_co, + v: _ArrayLikeObject_co, + mode: _CorrelateMode = ..., +) -> NDArray[object_]: ... + +@overload +def convolve( + a: _ArrayLikeBool_co, + v: _ArrayLikeBool_co, + mode: _CorrelateMode = ..., +) -> NDArray[bool_]: ... +@overload +def convolve( + a: _ArrayLikeUInt_co, + v: _ArrayLikeUInt_co, + mode: _CorrelateMode = ..., +) -> NDArray[unsignedinteger[Any]]: ... +@overload +def convolve( + a: _ArrayLikeInt_co, + v: _ArrayLikeInt_co, + mode: _CorrelateMode = ..., +) -> NDArray[signedinteger[Any]]: ... +@overload +def convolve( + a: _ArrayLikeFloat_co, + v: _ArrayLikeFloat_co, + mode: _CorrelateMode = ..., +) -> NDArray[floating[Any]]: ... +@overload +def convolve( + a: _ArrayLikeComplex_co, + v: _ArrayLikeComplex_co, + mode: _CorrelateMode = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def convolve( + a: _ArrayLikeTD64_co, + v: _ArrayLikeTD64_co, + mode: _CorrelateMode = ..., +) -> NDArray[timedelta64]: ... +@overload +def convolve( + a: _ArrayLikeObject_co, + v: _ArrayLikeObject_co, + mode: _CorrelateMode = ..., +) -> NDArray[object_]: ... + +@overload +def outer( + a: _ArrayLikeBool_co, + b: _ArrayLikeBool_co, + out: None = ..., +) -> NDArray[bool_]: ... +@overload +def outer( + a: _ArrayLikeUInt_co, + b: _ArrayLikeUInt_co, + out: None = ..., +) -> NDArray[unsignedinteger[Any]]: ... +@overload +def outer( + a: _ArrayLikeInt_co, + b: _ArrayLikeInt_co, + out: None = ..., +) -> NDArray[signedinteger[Any]]: ... +@overload +def outer( + a: _ArrayLikeFloat_co, + b: _ArrayLikeFloat_co, + out: None = ..., +) -> NDArray[floating[Any]]: ... +@overload +def outer( + a: _ArrayLikeComplex_co, + b: _ArrayLikeComplex_co, + out: None = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def outer( + a: _ArrayLikeTD64_co, + b: _ArrayLikeTD64_co, + out: None = ..., +) -> NDArray[timedelta64]: ... +@overload +def outer( + a: _ArrayLikeObject_co, + b: _ArrayLikeObject_co, + out: None = ..., +) -> NDArray[object_]: ... +@overload +def outer( + a: _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeObject_co, + b: _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeObject_co, + out: _ArrayType, +) -> _ArrayType: ... + +@overload +def tensordot( + a: _ArrayLikeBool_co, + b: _ArrayLikeBool_co, + axes: int | Tuple[_ShapeLike, _ShapeLike] = ..., +) -> NDArray[bool_]: ... +@overload +def tensordot( + a: _ArrayLikeUInt_co, + b: _ArrayLikeUInt_co, + axes: int | Tuple[_ShapeLike, _ShapeLike] = ..., +) -> NDArray[unsignedinteger[Any]]: ... +@overload +def tensordot( + a: _ArrayLikeInt_co, + b: _ArrayLikeInt_co, + axes: int | Tuple[_ShapeLike, _ShapeLike] = ..., +) -> NDArray[signedinteger[Any]]: ... +@overload +def tensordot( + a: _ArrayLikeFloat_co, + b: _ArrayLikeFloat_co, + axes: int | Tuple[_ShapeLike, _ShapeLike] = ..., +) -> NDArray[floating[Any]]: ... +@overload +def tensordot( + a: _ArrayLikeComplex_co, + b: _ArrayLikeComplex_co, + axes: int | Tuple[_ShapeLike, _ShapeLike] = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def tensordot( + a: _ArrayLikeTD64_co, + b: _ArrayLikeTD64_co, + axes: int | Tuple[_ShapeLike, _ShapeLike] = ..., +) -> NDArray[timedelta64]: ... +@overload +def tensordot( + a: _ArrayLikeObject_co, + b: _ArrayLikeObject_co, + axes: int | Tuple[_ShapeLike, _ShapeLike] = ..., +) -> NDArray[object_]: ... + +@overload +def roll( + a: _ArrayLike[_SCT], + shift: _ShapeLike, + axis: None | _ShapeLike = ..., +) -> NDArray[_SCT]: ... +@overload +def roll( + a: ArrayLike, + shift: _ShapeLike, + axis: None | _ShapeLike = ..., +) -> NDArray[Any]: ... + +def rollaxis( + a: NDArray[_SCT], + axis: int, + start: int = ..., +) -> NDArray[_SCT]: ... + +def moveaxis( + a: NDArray[_SCT], + source: _ShapeLike, + destination: _ShapeLike, +) -> NDArray[_SCT]: ... + +@overload +def cross( + a: _ArrayLikeBool_co, + b: _ArrayLikeBool_co, + axisa: int = ..., + axisb: int = ..., + axisc: int = ..., + axis: None | int = ..., +) -> NoReturn: ... +@overload +def cross( + a: _ArrayLikeUInt_co, + b: _ArrayLikeUInt_co, + axisa: int = ..., + axisb: int = ..., + axisc: int = ..., + axis: None | int = ..., +) -> NDArray[unsignedinteger[Any]]: ... +@overload +def cross( + a: _ArrayLikeInt_co, + b: _ArrayLikeInt_co, + axisa: int = ..., + axisb: int = ..., + axisc: int = ..., + axis: None | int = ..., +) -> NDArray[signedinteger[Any]]: ... +@overload +def cross( + a: _ArrayLikeFloat_co, + b: _ArrayLikeFloat_co, + axisa: int = ..., + axisb: int = ..., + axisc: int = ..., + axis: None | int = ..., +) -> NDArray[floating[Any]]: ... +@overload +def cross( + a: _ArrayLikeComplex_co, + b: _ArrayLikeComplex_co, + axisa: int = ..., + axisb: int = ..., + axisc: int = ..., + axis: None | int = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def cross( + a: _ArrayLikeObject_co, + b: _ArrayLikeObject_co, + axisa: int = ..., + axisb: int = ..., + axisc: int = ..., + axis: None | int = ..., +) -> NDArray[object_]: ... + +@overload +def indices( + dimensions: Sequence[int], + dtype: Type[int] = ..., + sparse: Literal[False] = ..., +) -> NDArray[int_]: ... +@overload +def indices( + dimensions: Sequence[int], + dtype: Type[int] = ..., + sparse: Literal[True] = ..., +) -> Tuple[NDArray[int_], ...]: ... +@overload +def indices( + dimensions: Sequence[int], + dtype: _DTypeLike[_SCT], + sparse: Literal[False] = ..., +) -> NDArray[_SCT]: ... +@overload +def indices( + dimensions: Sequence[int], + dtype: _DTypeLike[_SCT], + sparse: Literal[True], +) -> Tuple[NDArray[_SCT], ...]: ... +@overload +def indices( + dimensions: Sequence[int], + dtype: DTypeLike, + sparse: Literal[False] = ..., +) -> NDArray[Any]: ... +@overload +def indices( + dimensions: Sequence[int], + dtype: DTypeLike, + sparse: Literal[True], +) -> Tuple[NDArray[Any], ...]: ... + +def fromfunction( + function: Callable[..., _T], + shape: Sequence[int], + *, + dtype: DTypeLike = ..., + like: ArrayLike = ..., + **kwargs: Any, +) -> _T: ... + +def isscalar(element: object) -> TypeGuard[ + generic | bool | int | float | complex | str | bytes | memoryview +]: ... + +def binary_repr(num: int, width: None | int = ...) -> str: ... + +def base_repr( + number: SupportsAbs[float], + base: float = ..., + padding: SupportsIndex = ..., +) -> str: ... + +@overload +def identity( + n: int, + dtype: None = ..., + *, + like: ArrayLike = ..., +) -> NDArray[float64]: ... +@overload +def identity( + n: int, + dtype: _DTypeLike[_SCT], + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def identity( + n: int, + dtype: DTypeLike, + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... + +def allclose( + a: ArrayLike, + b: ArrayLike, + rtol: float = ..., + atol: float = ..., + equal_nan: bool = ..., +) -> bool: ... + +@overload +def isclose( + a: _ScalarLike_co, + b: _ScalarLike_co, + rtol: float = ..., + atol: float = ..., + equal_nan: bool = ..., +) -> bool_: ... +@overload +def isclose( + a: ArrayLike, + b: ArrayLike, + rtol: float = ..., + atol: float = ..., + equal_nan: bool = ..., +) -> NDArray[bool_]: ... + +def array_equal(a1: ArrayLike, a2: ArrayLike, equal_nan: bool = ...) -> bool: ... + +def array_equiv(a1: ArrayLike, a2: ArrayLike) -> bool: ... diff --git a/.local/lib/python3.8/site-packages/numpy/core/numerictypes.py b/.local/lib/python3.8/site-packages/numpy/core/numerictypes.py new file mode 100644 index 0000000..8e5de85 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/numerictypes.py @@ -0,0 +1,670 @@ +""" +numerictypes: Define the numeric type objects + +This module is designed so "from numerictypes import \\*" is safe. +Exported symbols include: + + Dictionary with all registered number types (including aliases): + sctypeDict + + Type objects (not all will be available, depends on platform): + see variable sctypes for which ones you have + + Bit-width names + + int8 int16 int32 int64 int128 + uint8 uint16 uint32 uint64 uint128 + float16 float32 float64 float96 float128 float256 + complex32 complex64 complex128 complex192 complex256 complex512 + datetime64 timedelta64 + + c-based names + + bool_ + + object_ + + void, str_, unicode_ + + byte, ubyte, + short, ushort + intc, uintc, + intp, uintp, + int_, uint, + longlong, ulonglong, + + single, csingle, + float_, complex_, + longfloat, clongfloat, + + As part of the type-hierarchy: xx -- is bit-width + + generic + +-> bool_ (kind=b) + +-> number + | +-> integer + | | +-> signedinteger (intxx) (kind=i) + | | | byte + | | | short + | | | intc + | | | intp int0 + | | | int_ + | | | longlong + | | \\-> unsignedinteger (uintxx) (kind=u) + | | ubyte + | | ushort + | | uintc + | | uintp uint0 + | | uint_ + | | ulonglong + | +-> inexact + | +-> floating (floatxx) (kind=f) + | | half + | | single + | | float_ (double) + | | longfloat + | \\-> complexfloating (complexxx) (kind=c) + | csingle (singlecomplex) + | complex_ (cfloat, cdouble) + | clongfloat (longcomplex) + +-> flexible + | +-> character + | | str_ (string_, bytes_) (kind=S) [Python 2] + | | unicode_ (kind=U) [Python 2] + | | + | | bytes_ (string_) (kind=S) [Python 3] + | | str_ (unicode_) (kind=U) [Python 3] + | | + | \\-> void (kind=V) + \\-> object_ (not used much) (kind=O) + +""" +import numbers + +from numpy.core.multiarray import ( + ndarray, array, dtype, datetime_data, datetime_as_string, + busday_offset, busday_count, is_busday, busdaycalendar + ) +from numpy.core.overrides import set_module + +# we add more at the bottom +__all__ = ['sctypeDict', 'sctypes', + 'ScalarType', 'obj2sctype', 'cast', 'nbytes', 'sctype2char', + 'maximum_sctype', 'issctype', 'typecodes', 'find_common_type', + 'issubdtype', 'datetime_data', 'datetime_as_string', + 'busday_offset', 'busday_count', 'is_busday', 'busdaycalendar', + ] + +# we don't need all these imports, but we need to keep them for compatibility +# for users using np.core.numerictypes.UPPER_TABLE +from ._string_helpers import ( + english_lower, english_upper, english_capitalize, LOWER_TABLE, UPPER_TABLE +) + +from ._type_aliases import ( + sctypeDict, + allTypes, + bitname, + sctypes, + _concrete_types, + _concrete_typeinfo, + _bits_of, +) +from ._dtype import _kind_name + +# we don't export these for import *, but we do want them accessible +# as numerictypes.bool, etc. +from builtins import bool, int, float, complex, object, str, bytes +from numpy.compat import long, unicode + + +# We use this later +generic = allTypes['generic'] + +genericTypeRank = ['bool', 'int8', 'uint8', 'int16', 'uint16', + 'int32', 'uint32', 'int64', 'uint64', 'int128', + 'uint128', 'float16', + 'float32', 'float64', 'float80', 'float96', 'float128', + 'float256', + 'complex32', 'complex64', 'complex128', 'complex160', + 'complex192', 'complex256', 'complex512', 'object'] + +@set_module('numpy') +def maximum_sctype(t): + """ + Return the scalar type of highest precision of the same kind as the input. + + Parameters + ---------- + t : dtype or dtype specifier + The input data type. This can be a `dtype` object or an object that + is convertible to a `dtype`. + + Returns + ------- + out : dtype + The highest precision data type of the same kind (`dtype.kind`) as `t`. + + See Also + -------- + obj2sctype, mintypecode, sctype2char + dtype + + Examples + -------- + >>> np.maximum_sctype(int) + + >>> np.maximum_sctype(np.uint8) + + >>> np.maximum_sctype(complex) + # may vary + + >>> np.maximum_sctype(str) + + + >>> np.maximum_sctype('i2') + + >>> np.maximum_sctype('f4') + # may vary + + """ + g = obj2sctype(t) + if g is None: + return t + t = g + base = _kind_name(dtype(t)) + if base in sctypes: + return sctypes[base][-1] + else: + return t + + +@set_module('numpy') +def issctype(rep): + """ + Determines whether the given object represents a scalar data-type. + + Parameters + ---------- + rep : any + If `rep` is an instance of a scalar dtype, True is returned. If not, + False is returned. + + Returns + ------- + out : bool + Boolean result of check whether `rep` is a scalar dtype. + + See Also + -------- + issubsctype, issubdtype, obj2sctype, sctype2char + + Examples + -------- + >>> np.issctype(np.int32) + True + >>> np.issctype(list) + False + >>> np.issctype(1.1) + False + + Strings are also a scalar type: + + >>> np.issctype(np.dtype('str')) + True + + """ + if not isinstance(rep, (type, dtype)): + return False + try: + res = obj2sctype(rep) + if res and res != object_: + return True + return False + except Exception: + return False + + +@set_module('numpy') +def obj2sctype(rep, default=None): + """ + Return the scalar dtype or NumPy equivalent of Python type of an object. + + Parameters + ---------- + rep : any + The object of which the type is returned. + default : any, optional + If given, this is returned for objects whose types can not be + determined. If not given, None is returned for those objects. + + Returns + ------- + dtype : dtype or Python type + The data type of `rep`. + + See Also + -------- + sctype2char, issctype, issubsctype, issubdtype, maximum_sctype + + Examples + -------- + >>> np.obj2sctype(np.int32) + + >>> np.obj2sctype(np.array([1., 2.])) + + >>> np.obj2sctype(np.array([1.j])) + + + >>> np.obj2sctype(dict) + + >>> np.obj2sctype('string') + + >>> np.obj2sctype(1, default=list) + + + """ + # prevent abstract classes being upcast + if isinstance(rep, type) and issubclass(rep, generic): + return rep + # extract dtype from arrays + if isinstance(rep, ndarray): + return rep.dtype.type + # fall back on dtype to convert + try: + res = dtype(rep) + except Exception: + return default + else: + return res.type + + +@set_module('numpy') +def issubclass_(arg1, arg2): + """ + Determine if a class is a subclass of a second class. + + `issubclass_` is equivalent to the Python built-in ``issubclass``, + except that it returns False instead of raising a TypeError if one + of the arguments is not a class. + + Parameters + ---------- + arg1 : class + Input class. True is returned if `arg1` is a subclass of `arg2`. + arg2 : class or tuple of classes. + Input class. If a tuple of classes, True is returned if `arg1` is a + subclass of any of the tuple elements. + + Returns + ------- + out : bool + Whether `arg1` is a subclass of `arg2` or not. + + See Also + -------- + issubsctype, issubdtype, issctype + + Examples + -------- + >>> np.issubclass_(np.int32, int) + False + >>> np.issubclass_(np.int32, float) + False + >>> np.issubclass_(np.float64, float) + True + + """ + try: + return issubclass(arg1, arg2) + except TypeError: + return False + + +@set_module('numpy') +def issubsctype(arg1, arg2): + """ + Determine if the first argument is a subclass of the second argument. + + Parameters + ---------- + arg1, arg2 : dtype or dtype specifier + Data-types. + + Returns + ------- + out : bool + The result. + + See Also + -------- + issctype, issubdtype, obj2sctype + + Examples + -------- + >>> np.issubsctype('S8', str) + False + >>> np.issubsctype(np.array([1]), int) + True + >>> np.issubsctype(np.array([1]), float) + False + + """ + return issubclass(obj2sctype(arg1), obj2sctype(arg2)) + + +@set_module('numpy') +def issubdtype(arg1, arg2): + r""" + Returns True if first argument is a typecode lower/equal in type hierarchy. + + This is like the builtin :func:`issubclass`, but for `dtype`\ s. + + Parameters + ---------- + arg1, arg2 : dtype_like + `dtype` or object coercible to one + + Returns + ------- + out : bool + + See Also + -------- + :ref:`arrays.scalars` : Overview of the numpy type hierarchy. + issubsctype, issubclass_ + + Examples + -------- + `issubdtype` can be used to check the type of arrays: + + >>> ints = np.array([1, 2, 3], dtype=np.int32) + >>> np.issubdtype(ints.dtype, np.integer) + True + >>> np.issubdtype(ints.dtype, np.floating) + False + + >>> floats = np.array([1, 2, 3], dtype=np.float32) + >>> np.issubdtype(floats.dtype, np.integer) + False + >>> np.issubdtype(floats.dtype, np.floating) + True + + Similar types of different sizes are not subdtypes of each other: + + >>> np.issubdtype(np.float64, np.float32) + False + >>> np.issubdtype(np.float32, np.float64) + False + + but both are subtypes of `floating`: + + >>> np.issubdtype(np.float64, np.floating) + True + >>> np.issubdtype(np.float32, np.floating) + True + + For convenience, dtype-like objects are allowed too: + + >>> np.issubdtype('S1', np.string_) + True + >>> np.issubdtype('i4', np.signedinteger) + True + + """ + if not issubclass_(arg1, generic): + arg1 = dtype(arg1).type + if not issubclass_(arg2, generic): + arg2 = dtype(arg2).type + + return issubclass(arg1, arg2) + + +# This dictionary allows look up based on any alias for an array data-type +class _typedict(dict): + """ + Base object for a dictionary for look-up with any alias for an array dtype. + + Instances of `_typedict` can not be used as dictionaries directly, + first they have to be populated. + + """ + + def __getitem__(self, obj): + return dict.__getitem__(self, obj2sctype(obj)) + +nbytes = _typedict() +_alignment = _typedict() +_maxvals = _typedict() +_minvals = _typedict() +def _construct_lookups(): + for name, info in _concrete_typeinfo.items(): + obj = info.type + nbytes[obj] = info.bits // 8 + _alignment[obj] = info.alignment + if len(info) > 5: + _maxvals[obj] = info.max + _minvals[obj] = info.min + else: + _maxvals[obj] = None + _minvals[obj] = None + +_construct_lookups() + + +@set_module('numpy') +def sctype2char(sctype): + """ + Return the string representation of a scalar dtype. + + Parameters + ---------- + sctype : scalar dtype or object + If a scalar dtype, the corresponding string character is + returned. If an object, `sctype2char` tries to infer its scalar type + and then return the corresponding string character. + + Returns + ------- + typechar : str + The string character corresponding to the scalar type. + + Raises + ------ + ValueError + If `sctype` is an object for which the type can not be inferred. + + See Also + -------- + obj2sctype, issctype, issubsctype, mintypecode + + Examples + -------- + >>> for sctype in [np.int32, np.double, np.complex_, np.string_, np.ndarray]: + ... print(np.sctype2char(sctype)) + l # may vary + d + D + S + O + + >>> x = np.array([1., 2-1.j]) + >>> np.sctype2char(x) + 'D' + >>> np.sctype2char(list) + 'O' + + """ + sctype = obj2sctype(sctype) + if sctype is None: + raise ValueError("unrecognized type") + if sctype not in _concrete_types: + # for compatibility + raise KeyError(sctype) + return dtype(sctype).char + +# Create dictionary of casting functions that wrap sequences +# indexed by type or type character +cast = _typedict() +for key in _concrete_types: + cast[key] = lambda x, k=key: array(x, copy=False).astype(k) + + +def _scalar_type_key(typ): + """A ``key`` function for `sorted`.""" + dt = dtype(typ) + return (dt.kind.lower(), dt.itemsize) + + +ScalarType = [int, float, complex, int, bool, bytes, str, memoryview] +ScalarType += sorted(_concrete_types, key=_scalar_type_key) +ScalarType = tuple(ScalarType) + + +# Now add the types we've determined to this module +for key in allTypes: + globals()[key] = allTypes[key] + __all__.append(key) + +del key + +typecodes = {'Character':'c', + 'Integer':'bhilqp', + 'UnsignedInteger':'BHILQP', + 'Float':'efdg', + 'Complex':'FDG', + 'AllInteger':'bBhHiIlLqQpP', + 'AllFloat':'efdgFDG', + 'Datetime': 'Mm', + 'All':'?bhilqpBHILQPefdgFDGSUVOMm'} + +# backwards compatibility --- deprecated name +# Formal deprecation: Numpy 1.20.0, 2020-10-19 (see numpy/__init__.py) +typeDict = sctypeDict + +# b -> boolean +# u -> unsigned integer +# i -> signed integer +# f -> floating point +# c -> complex +# M -> datetime +# m -> timedelta +# S -> string +# U -> Unicode string +# V -> record +# O -> Python object +_kind_list = ['b', 'u', 'i', 'f', 'c', 'S', 'U', 'V', 'O', 'M', 'm'] + +__test_types = '?'+typecodes['AllInteger'][:-2]+typecodes['AllFloat']+'O' +__len_test_types = len(__test_types) + +# Keep incrementing until a common type both can be coerced to +# is found. Otherwise, return None +def _find_common_coerce(a, b): + if a > b: + return a + try: + thisind = __test_types.index(a.char) + except ValueError: + return None + return _can_coerce_all([a, b], start=thisind) + +# Find a data-type that all data-types in a list can be coerced to +def _can_coerce_all(dtypelist, start=0): + N = len(dtypelist) + if N == 0: + return None + if N == 1: + return dtypelist[0] + thisind = start + while thisind < __len_test_types: + newdtype = dtype(__test_types[thisind]) + numcoerce = len([x for x in dtypelist if newdtype >= x]) + if numcoerce == N: + return newdtype + thisind += 1 + return None + +def _register_types(): + numbers.Integral.register(integer) + numbers.Complex.register(inexact) + numbers.Real.register(floating) + numbers.Number.register(number) + +_register_types() + + +@set_module('numpy') +def find_common_type(array_types, scalar_types): + """ + Determine common type following standard coercion rules. + + Parameters + ---------- + array_types : sequence + A list of dtypes or dtype convertible objects representing arrays. + scalar_types : sequence + A list of dtypes or dtype convertible objects representing scalars. + + Returns + ------- + datatype : dtype + The common data type, which is the maximum of `array_types` ignoring + `scalar_types`, unless the maximum of `scalar_types` is of a + different kind (`dtype.kind`). If the kind is not understood, then + None is returned. + + See Also + -------- + dtype, common_type, can_cast, mintypecode + + Examples + -------- + >>> np.find_common_type([], [np.int64, np.float32, complex]) + dtype('complex128') + >>> np.find_common_type([np.int64, np.float32], []) + dtype('float64') + + The standard casting rules ensure that a scalar cannot up-cast an + array unless the scalar is of a fundamentally different kind of data + (i.e. under a different hierarchy in the data type hierarchy) then + the array: + + >>> np.find_common_type([np.float32], [np.int64, np.float64]) + dtype('float32') + + Complex is of a different type, so it up-casts the float in the + `array_types` argument: + + >>> np.find_common_type([np.float32], [complex]) + dtype('complex128') + + Type specifier strings are convertible to dtypes and can therefore + be used instead of dtypes: + + >>> np.find_common_type(['f4', 'f4', 'i4'], ['c8']) + dtype('complex128') + + """ + array_types = [dtype(x) for x in array_types] + scalar_types = [dtype(x) for x in scalar_types] + + maxa = _can_coerce_all(array_types) + maxsc = _can_coerce_all(scalar_types) + + if maxa is None: + return maxsc + + if maxsc is None: + return maxa + + try: + index_a = _kind_list.index(maxa.kind) + index_sc = _kind_list.index(maxsc.kind) + except ValueError: + return None + + if index_sc > index_a: + return _find_common_coerce(maxsc, maxa) + else: + return maxa diff --git a/.local/lib/python3.8/site-packages/numpy/core/numerictypes.pyi b/.local/lib/python3.8/site-packages/numpy/core/numerictypes.pyi new file mode 100644 index 0000000..1d3ff77 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/numerictypes.pyi @@ -0,0 +1,173 @@ +import sys +import types +from typing import ( + Literal as L, + Type, + Union, + Tuple, + overload, + Any, + TypeVar, + Dict, + List, + Iterable, + Protocol, + TypedDict, +) + +from numpy import ( + ndarray, + dtype, + generic, + bool_, + ubyte, + ushort, + uintc, + uint, + ulonglong, + byte, + short, + intc, + int_, + longlong, + half, + single, + double, + longdouble, + csingle, + cdouble, + clongdouble, + datetime64, + timedelta64, + object_, + str_, + bytes_, + void, +) + +from numpy.core._type_aliases import ( + sctypeDict as sctypeDict, + sctypes as sctypes, +) + +from numpy.typing import DTypeLike, ArrayLike, _SupportsDType + +_T = TypeVar("_T") +_SCT = TypeVar("_SCT", bound=generic) + +# A paramtrizable subset of `npt.DTypeLike` +_DTypeLike = Union[ + Type[_SCT], + dtype[_SCT], + _SupportsDType[dtype[_SCT]], +] + +class _CastFunc(Protocol): + def __call__( + self, x: ArrayLike, k: DTypeLike = ... + ) -> ndarray[Any, dtype[Any]]: ... + +class _TypeCodes(TypedDict): + Character: L['c'] + Integer: L['bhilqp'] + UnsignedInteger: L['BHILQP'] + Float: L['efdg'] + Complex: L['FDG'] + AllInteger: L['bBhHiIlLqQpP'] + AllFloat: L['efdgFDG'] + Datetime: L['Mm'] + All: L['?bhilqpBHILQPefdgFDGSUVOMm'] + +class _typedict(Dict[Type[generic], _T]): + def __getitem__(self, key: DTypeLike) -> _T: ... + +if sys.version_info >= (3, 10): + _TypeTuple = Union[ + Type[Any], + types.UnionType, + Tuple[Union[Type[Any], types.UnionType, Tuple[Any, ...]], ...], + ] +else: + _TypeTuple = Union[ + Type[Any], + Tuple[Union[Type[Any], Tuple[Any, ...]], ...], + ] + +__all__: List[str] + +@overload +def maximum_sctype(t: _DTypeLike[_SCT]) -> Type[_SCT]: ... +@overload +def maximum_sctype(t: DTypeLike) -> Type[Any]: ... + +@overload +def issctype(rep: dtype[Any] | Type[Any]) -> bool: ... +@overload +def issctype(rep: object) -> L[False]: ... + +@overload +def obj2sctype(rep: _DTypeLike[_SCT], default: None = ...) -> None | Type[_SCT]: ... +@overload +def obj2sctype(rep: _DTypeLike[_SCT], default: _T) -> _T | Type[_SCT]: ... +@overload +def obj2sctype(rep: DTypeLike, default: None = ...) -> None | Type[Any]: ... +@overload +def obj2sctype(rep: DTypeLike, default: _T) -> _T | Type[Any]: ... +@overload +def obj2sctype(rep: object, default: None = ...) -> None: ... +@overload +def obj2sctype(rep: object, default: _T) -> _T: ... + +@overload +def issubclass_(arg1: Type[Any], arg2: _TypeTuple) -> bool: ... +@overload +def issubclass_(arg1: object, arg2: object) -> L[False]: ... + +def issubsctype(arg1: DTypeLike, arg2: DTypeLike) -> bool: ... + +def issubdtype(arg1: DTypeLike, arg2: DTypeLike) -> bool: ... + +def sctype2char(sctype: DTypeLike) -> str: ... + +def find_common_type( + array_types: Iterable[DTypeLike], + scalar_types: Iterable[DTypeLike], +) -> dtype[Any]: ... + +cast: _typedict[_CastFunc] +nbytes: _typedict[int] +typecodes: _TypeCodes +ScalarType: Tuple[ + Type[int], + Type[float], + Type[complex], + Type[int], + Type[bool], + Type[bytes], + Type[str], + Type[memoryview], + Type[bool_], + Type[csingle], + Type[cdouble], + Type[clongdouble], + Type[half], + Type[single], + Type[double], + Type[longdouble], + Type[byte], + Type[short], + Type[intc], + Type[int_], + Type[longlong], + Type[timedelta64], + Type[datetime64], + Type[object_], + Type[bytes_], + Type[str_], + Type[ubyte], + Type[ushort], + Type[uintc], + Type[uint], + Type[ulonglong], + Type[void], +] diff --git a/.local/lib/python3.8/site-packages/numpy/core/overrides.py b/.local/lib/python3.8/site-packages/numpy/core/overrides.py new file mode 100644 index 0000000..840cf38 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/overrides.py @@ -0,0 +1,204 @@ +"""Implementation of __array_function__ overrides from NEP-18.""" +import collections +import functools +import os + +from numpy.core._multiarray_umath import ( + add_docstring, implement_array_function, _get_implementing_args) +from numpy.compat._inspect import getargspec + + +ARRAY_FUNCTION_ENABLED = bool( + int(os.environ.get('NUMPY_EXPERIMENTAL_ARRAY_FUNCTION', 1))) + +array_function_like_doc = ( + """like : array_like + Reference object to allow the creation of arrays which are not + NumPy arrays. If an array-like passed in as ``like`` supports + the ``__array_function__`` protocol, the result will be defined + by it. In this case, it ensures the creation of an array object + compatible with that passed in via this argument.""" +) + +def set_array_function_like_doc(public_api): + if public_api.__doc__ is not None: + public_api.__doc__ = public_api.__doc__.replace( + "${ARRAY_FUNCTION_LIKE}", + array_function_like_doc, + ) + return public_api + + +add_docstring( + implement_array_function, + """ + Implement a function with checks for __array_function__ overrides. + + All arguments are required, and can only be passed by position. + + Parameters + ---------- + implementation : function + Function that implements the operation on NumPy array without + overrides when called like ``implementation(*args, **kwargs)``. + public_api : function + Function exposed by NumPy's public API originally called like + ``public_api(*args, **kwargs)`` on which arguments are now being + checked. + relevant_args : iterable + Iterable of arguments to check for __array_function__ methods. + args : tuple + Arbitrary positional arguments originally passed into ``public_api``. + kwargs : dict + Arbitrary keyword arguments originally passed into ``public_api``. + + Returns + ------- + Result from calling ``implementation()`` or an ``__array_function__`` + method, as appropriate. + + Raises + ------ + TypeError : if no implementation is found. + """) + + +# exposed for testing purposes; used internally by implement_array_function +add_docstring( + _get_implementing_args, + """ + Collect arguments on which to call __array_function__. + + Parameters + ---------- + relevant_args : iterable of array-like + Iterable of possibly array-like arguments to check for + __array_function__ methods. + + Returns + ------- + Sequence of arguments with __array_function__ methods, in the order in + which they should be called. + """) + + +ArgSpec = collections.namedtuple('ArgSpec', 'args varargs keywords defaults') + + +def verify_matching_signatures(implementation, dispatcher): + """Verify that a dispatcher function has the right signature.""" + implementation_spec = ArgSpec(*getargspec(implementation)) + dispatcher_spec = ArgSpec(*getargspec(dispatcher)) + + if (implementation_spec.args != dispatcher_spec.args or + implementation_spec.varargs != dispatcher_spec.varargs or + implementation_spec.keywords != dispatcher_spec.keywords or + (bool(implementation_spec.defaults) != + bool(dispatcher_spec.defaults)) or + (implementation_spec.defaults is not None and + len(implementation_spec.defaults) != + len(dispatcher_spec.defaults))): + raise RuntimeError('implementation and dispatcher for %s have ' + 'different function signatures' % implementation) + + if implementation_spec.defaults is not None: + if dispatcher_spec.defaults != (None,) * len(dispatcher_spec.defaults): + raise RuntimeError('dispatcher functions can only use None for ' + 'default argument values') + + +def set_module(module): + """Decorator for overriding __module__ on a function or class. + + Example usage:: + + @set_module('numpy') + def example(): + pass + + assert example.__module__ == 'numpy' + """ + def decorator(func): + if module is not None: + func.__module__ = module + return func + return decorator + + +def array_function_dispatch(dispatcher, module=None, verify=True, + docs_from_dispatcher=False): + """Decorator for adding dispatch with the __array_function__ protocol. + + See NEP-18 for example usage. + + Parameters + ---------- + dispatcher : callable + Function that when called like ``dispatcher(*args, **kwargs)`` with + arguments from the NumPy function call returns an iterable of + array-like arguments to check for ``__array_function__``. + module : str, optional + __module__ attribute to set on new function, e.g., ``module='numpy'``. + By default, module is copied from the decorated function. + verify : bool, optional + If True, verify the that the signature of the dispatcher and decorated + function signatures match exactly: all required and optional arguments + should appear in order with the same names, but the default values for + all optional arguments should be ``None``. Only disable verification + if the dispatcher's signature needs to deviate for some particular + reason, e.g., because the function has a signature like + ``func(*args, **kwargs)``. + docs_from_dispatcher : bool, optional + If True, copy docs from the dispatcher function onto the dispatched + function, rather than from the implementation. This is useful for + functions defined in C, which otherwise don't have docstrings. + + Returns + ------- + Function suitable for decorating the implementation of a NumPy function. + """ + + if not ARRAY_FUNCTION_ENABLED: + def decorator(implementation): + if docs_from_dispatcher: + add_docstring(implementation, dispatcher.__doc__) + if module is not None: + implementation.__module__ = module + return implementation + return decorator + + def decorator(implementation): + if verify: + verify_matching_signatures(implementation, dispatcher) + + if docs_from_dispatcher: + add_docstring(implementation, dispatcher.__doc__) + + @functools.wraps(implementation) + def public_api(*args, **kwargs): + relevant_args = dispatcher(*args, **kwargs) + return implement_array_function( + implementation, public_api, relevant_args, args, kwargs) + + public_api.__code__ = public_api.__code__.replace( + co_name=implementation.__name__, + co_filename='<__array_function__ internals>') + if module is not None: + public_api.__module__ = module + + public_api._implementation = implementation + + return public_api + + return decorator + + +def array_function_from_dispatcher( + implementation, module=None, verify=True, docs_from_dispatcher=True): + """Like array_function_dispatcher, but with function arguments flipped.""" + + def decorator(dispatcher): + return array_function_dispatch( + dispatcher, module, verify=verify, + docs_from_dispatcher=docs_from_dispatcher)(implementation) + return decorator diff --git a/.local/lib/python3.8/site-packages/numpy/core/records.py b/.local/lib/python3.8/site-packages/numpy/core/records.py new file mode 100644 index 0000000..c014bc9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/records.py @@ -0,0 +1,1099 @@ +""" +Record Arrays +============= +Record arrays expose the fields of structured arrays as properties. + +Most commonly, ndarrays contain elements of a single type, e.g. floats, +integers, bools etc. However, it is possible for elements to be combinations +of these using structured types, such as:: + + >>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', np.int64), ('y', np.float64)]) + >>> a + array([(1, 2.), (1, 2.)], dtype=[('x', '>> a['x'] + array([1, 1]) + + >>> a['y'] + array([2., 2.]) + +Record arrays allow us to access fields as properties:: + + >>> ar = np.rec.array(a) + + >>> ar.x + array([1, 1]) + + >>> ar.y + array([2., 2.]) + +""" +import warnings +from collections import Counter +from contextlib import nullcontext + +from . import numeric as sb +from . import numerictypes as nt +from numpy.compat import os_fspath +from numpy.core.overrides import set_module +from .arrayprint import _get_legacy_print_mode + +# All of the functions allow formats to be a dtype +__all__ = [ + 'record', 'recarray', 'format_parser', + 'fromarrays', 'fromrecords', 'fromstring', 'fromfile', 'array', +] + + +ndarray = sb.ndarray + +_byteorderconv = {'b':'>', + 'l':'<', + 'n':'=', + 'B':'>', + 'L':'<', + 'N':'=', + 'S':'s', + 's':'s', + '>':'>', + '<':'<', + '=':'=', + '|':'|', + 'I':'|', + 'i':'|'} + +# formats regular expression +# allows multidimensional spec with a tuple syntax in front +# of the letter code '(2,3)f4' and ' ( 2 , 3 ) f4 ' +# are equally allowed + +numfmt = nt.sctypeDict + + +def find_duplicate(list): + """Find duplication in a list, return a list of duplicated elements""" + return [ + item + for item, counts in Counter(list).items() + if counts > 1 + ] + + +@set_module('numpy') +class format_parser: + """ + Class to convert formats, names, titles description to a dtype. + + After constructing the format_parser object, the dtype attribute is + the converted data-type: + ``dtype = format_parser(formats, names, titles).dtype`` + + Attributes + ---------- + dtype : dtype + The converted data-type. + + Parameters + ---------- + formats : str or list of str + The format description, either specified as a string with + comma-separated format descriptions in the form ``'f8, i4, a5'``, or + a list of format description strings in the form + ``['f8', 'i4', 'a5']``. + names : str or list/tuple of str + The field names, either specified as a comma-separated string in the + form ``'col1, col2, col3'``, or as a list or tuple of strings in the + form ``['col1', 'col2', 'col3']``. + An empty list can be used, in that case default field names + ('f0', 'f1', ...) are used. + titles : sequence + Sequence of title strings. An empty list can be used to leave titles + out. + aligned : bool, optional + If True, align the fields by padding as the C-compiler would. + Default is False. + byteorder : str, optional + If specified, all the fields will be changed to the + provided byte-order. Otherwise, the default byte-order is + used. For all available string specifiers, see `dtype.newbyteorder`. + + See Also + -------- + dtype, typename, sctype2char + + Examples + -------- + >>> np.format_parser(['>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'], + ... []).dtype + dtype([('col1', '>> np.format_parser([' len(titles): + self._titles += [None] * (self._nfields - len(titles)) + + def _createdtype(self, byteorder): + dtype = sb.dtype({ + 'names': self._names, + 'formats': self._f_formats, + 'offsets': self._offsets, + 'titles': self._titles, + }) + if byteorder is not None: + byteorder = _byteorderconv[byteorder[0]] + dtype = dtype.newbyteorder(byteorder) + + self.dtype = dtype + + +class record(nt.void): + """A data-type scalar that allows field access as attribute lookup. + """ + + # manually set name and module so that this class's type shows up + # as numpy.record when printed + __name__ = 'record' + __module__ = 'numpy' + + def __repr__(self): + if _get_legacy_print_mode() <= 113: + return self.__str__() + return super().__repr__() + + def __str__(self): + if _get_legacy_print_mode() <= 113: + return str(self.item()) + return super().__str__() + + def __getattribute__(self, attr): + if attr in ('setfield', 'getfield', 'dtype'): + return nt.void.__getattribute__(self, attr) + try: + return nt.void.__getattribute__(self, attr) + except AttributeError: + pass + fielddict = nt.void.__getattribute__(self, 'dtype').fields + res = fielddict.get(attr, None) + if res: + obj = self.getfield(*res[:2]) + # if it has fields return a record, + # otherwise return the object + try: + dt = obj.dtype + except AttributeError: + #happens if field is Object type + return obj + if dt.names is not None: + return obj.view((self.__class__, obj.dtype)) + return obj + else: + raise AttributeError("'record' object has no " + "attribute '%s'" % attr) + + def __setattr__(self, attr, val): + if attr in ('setfield', 'getfield', 'dtype'): + raise AttributeError("Cannot set '%s' attribute" % attr) + fielddict = nt.void.__getattribute__(self, 'dtype').fields + res = fielddict.get(attr, None) + if res: + return self.setfield(val, *res[:2]) + else: + if getattr(self, attr, None): + return nt.void.__setattr__(self, attr, val) + else: + raise AttributeError("'record' object has no " + "attribute '%s'" % attr) + + def __getitem__(self, indx): + obj = nt.void.__getitem__(self, indx) + + # copy behavior of record.__getattribute__, + if isinstance(obj, nt.void) and obj.dtype.names is not None: + return obj.view((self.__class__, obj.dtype)) + else: + # return a single element + return obj + + def pprint(self): + """Pretty-print all fields.""" + # pretty-print all fields + names = self.dtype.names + maxlen = max(len(name) for name in names) + fmt = '%% %ds: %%s' % maxlen + rows = [fmt % (name, getattr(self, name)) for name in names] + return "\n".join(rows) + +# The recarray is almost identical to a standard array (which supports +# named fields already) The biggest difference is that it can use +# attribute-lookup to find the fields and it is constructed using +# a record. + +# If byteorder is given it forces a particular byteorder on all +# the fields (and any subfields) + +class recarray(ndarray): + """Construct an ndarray that allows field access using attributes. + + Arrays may have a data-types containing fields, analogous + to columns in a spread sheet. An example is ``[(x, int), (y, float)]``, + where each entry in the array is a pair of ``(int, float)``. Normally, + these attributes are accessed using dictionary lookups such as ``arr['x']`` + and ``arr['y']``. Record arrays allow the fields to be accessed as members + of the array, using ``arr.x`` and ``arr.y``. + + Parameters + ---------- + shape : tuple + Shape of output array. + dtype : data-type, optional + The desired data-type. By default, the data-type is determined + from `formats`, `names`, `titles`, `aligned` and `byteorder`. + formats : list of data-types, optional + A list containing the data-types for the different columns, e.g. + ``['i4', 'f8', 'i4']``. `formats` does *not* support the new + convention of using types directly, i.e. ``(int, float, int)``. + Note that `formats` must be a list, not a tuple. + Given that `formats` is somewhat limited, we recommend specifying + `dtype` instead. + names : tuple of str, optional + The name of each column, e.g. ``('x', 'y', 'z')``. + buf : buffer, optional + By default, a new array is created of the given shape and data-type. + If `buf` is specified and is an object exposing the buffer interface, + the array will use the memory from the existing buffer. In this case, + the `offset` and `strides` keywords are available. + + Other Parameters + ---------------- + titles : tuple of str, optional + Aliases for column names. For example, if `names` were + ``('x', 'y', 'z')`` and `titles` is + ``('x_coordinate', 'y_coordinate', 'z_coordinate')``, then + ``arr['x']`` is equivalent to both ``arr.x`` and ``arr.x_coordinate``. + byteorder : {'<', '>', '='}, optional + Byte-order for all fields. + aligned : bool, optional + Align the fields in memory as the C-compiler would. + strides : tuple of ints, optional + Buffer (`buf`) is interpreted according to these strides (strides + define how many bytes each array element, row, column, etc. + occupy in memory). + offset : int, optional + Start reading buffer (`buf`) from this offset onwards. + order : {'C', 'F'}, optional + Row-major (C-style) or column-major (Fortran-style) order. + + Returns + ------- + rec : recarray + Empty array of the given shape and type. + + See Also + -------- + core.records.fromrecords : Construct a record array from data. + record : fundamental data-type for `recarray`. + format_parser : determine a data-type from formats, names, titles. + + Notes + ----- + This constructor can be compared to ``empty``: it creates a new record + array but does not fill it with data. To create a record array from data, + use one of the following methods: + + 1. Create a standard ndarray and convert it to a record array, + using ``arr.view(np.recarray)`` + 2. Use the `buf` keyword. + 3. Use `np.rec.fromrecords`. + + Examples + -------- + Create an array with two fields, ``x`` and ``y``: + + >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', '>> x + array([(1., 2), (3., 4)], dtype=[('x', '>> x['x'] + array([1., 3.]) + + View the array as a record array: + + >>> x = x.view(np.recarray) + + >>> x.x + array([1., 3.]) + + >>> x.y + array([2, 4]) + + Create a new, empty record array: + + >>> np.recarray((2,), + ... dtype=[('x', int), ('y', float), ('z', int)]) #doctest: +SKIP + rec.array([(-1073741821, 1.2249118382103472e-301, 24547520), + (3471280, 1.2134086255804012e-316, 0)], + dtype=[('x', ' 0 or self.shape == (0,): + lst = sb.array2string( + self, separator=', ', prefix=prefix, suffix=',') + else: + # show zero-length shape unless it is (0,) + lst = "[], shape=%s" % (repr(self.shape),) + + lf = '\n'+' '*len(prefix) + if _get_legacy_print_mode() <= 113: + lf = ' ' + lf # trailing space + return fmt % (lst, lf, repr_dtype) + + def field(self, attr, val=None): + if isinstance(attr, int): + names = ndarray.__getattribute__(self, 'dtype').names + attr = names[attr] + + fielddict = ndarray.__getattribute__(self, 'dtype').fields + + res = fielddict[attr][:2] + + if val is None: + obj = self.getfield(*res) + if obj.dtype.names is not None: + return obj + return obj.view(ndarray) + else: + return self.setfield(val, *res) + + +def _deprecate_shape_0_as_None(shape): + if shape == 0: + warnings.warn( + "Passing `shape=0` to have the shape be inferred is deprecated, " + "and in future will be equivalent to `shape=(0,)`. To infer " + "the shape and suppress this warning, pass `shape=None` instead.", + FutureWarning, stacklevel=3) + return None + else: + return shape + + +@set_module("numpy.rec") +def fromarrays(arrayList, dtype=None, shape=None, formats=None, + names=None, titles=None, aligned=False, byteorder=None): + """Create a record array from a (flat) list of arrays + + Parameters + ---------- + arrayList : list or tuple + List of array-like objects (such as lists, tuples, + and ndarrays). + dtype : data-type, optional + valid dtype for all arrays + shape : int or tuple of ints, optional + Shape of the resulting array. If not provided, inferred from + ``arrayList[0]``. + formats, names, titles, aligned, byteorder : + If `dtype` is ``None``, these arguments are passed to + `numpy.format_parser` to construct a dtype. See that function for + detailed documentation. + + Returns + ------- + np.recarray + Record array consisting of given arrayList columns. + + Examples + -------- + >>> x1=np.array([1,2,3,4]) + >>> x2=np.array(['a','dd','xyz','12']) + >>> x3=np.array([1.1,2,3,4]) + >>> r = np.core.records.fromarrays([x1,x2,x3],names='a,b,c') + >>> print(r[1]) + (2, 'dd', 2.0) # may vary + >>> x1[1]=34 + >>> r.a + array([1, 2, 3, 4]) + + >>> x1 = np.array([1, 2, 3, 4]) + >>> x2 = np.array(['a', 'dd', 'xyz', '12']) + >>> x3 = np.array([1.1, 2, 3,4]) + >>> r = np.core.records.fromarrays( + ... [x1, x2, x3], + ... dtype=np.dtype([('a', np.int32), ('b', 'S3'), ('c', np.float32)])) + >>> r + rec.array([(1, b'a', 1.1), (2, b'dd', 2. ), (3, b'xyz', 3. ), + (4, b'12', 4. )], + dtype=[('a', ' 0: + shape = shape[:-nn] + + _array = recarray(shape, descr) + + # populate the record array (makes a copy) + for k, obj in enumerate(arrayList): + nn = descr[k].ndim + testshape = obj.shape[:obj.ndim - nn] + name = _names[k] + if testshape != shape: + raise ValueError(f'array-shape mismatch in array {k} ("{name}")') + + _array[name] = obj + + return _array + + +@set_module("numpy.rec") +def fromrecords(recList, dtype=None, shape=None, formats=None, names=None, + titles=None, aligned=False, byteorder=None): + """Create a recarray from a list of records in text form. + + Parameters + ---------- + recList : sequence + data in the same field may be heterogeneous - they will be promoted + to the highest data type. + dtype : data-type, optional + valid dtype for all arrays + shape : int or tuple of ints, optional + shape of each array. + formats, names, titles, aligned, byteorder : + If `dtype` is ``None``, these arguments are passed to + `numpy.format_parser` to construct a dtype. See that function for + detailed documentation. + + If both `formats` and `dtype` are None, then this will auto-detect + formats. Use list of tuples rather than list of lists for faster + processing. + + Returns + ------- + np.recarray + record array consisting of given recList rows. + + Examples + -------- + >>> r=np.core.records.fromrecords([(456,'dbe',1.2),(2,'de',1.3)], + ... names='col1,col2,col3') + >>> print(r[0]) + (456, 'dbe', 1.2) + >>> r.col1 + array([456, 2]) + >>> r.col2 + array(['dbe', 'de'], dtype='>> import pickle + >>> pickle.loads(pickle.dumps(r)) + rec.array([(456, 'dbe', 1.2), ( 2, 'de', 1.3)], + dtype=[('col1', ' 1: + raise ValueError("Can only deal with 1-d array.") + _array = recarray(shape, descr) + for k in range(_array.size): + _array[k] = tuple(recList[k]) + # list of lists instead of list of tuples ? + # 2018-02-07, 1.14.1 + warnings.warn( + "fromrecords expected a list of tuples, may have received a list " + "of lists instead. In the future that will raise an error", + FutureWarning, stacklevel=2) + return _array + else: + if shape is not None and retval.shape != shape: + retval.shape = shape + + res = retval.view(recarray) + + return res + + +@set_module("numpy.rec") +def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None, + names=None, titles=None, aligned=False, byteorder=None): + r"""Create a record array from binary data + + Note that despite the name of this function it does not accept `str` + instances. + + Parameters + ---------- + datastring : bytes-like + Buffer of binary data + dtype : data-type, optional + Valid dtype for all arrays + shape : int or tuple of ints, optional + Shape of each array. + offset : int, optional + Position in the buffer to start reading from. + formats, names, titles, aligned, byteorder : + If `dtype` is ``None``, these arguments are passed to + `numpy.format_parser` to construct a dtype. See that function for + detailed documentation. + + + Returns + ------- + np.recarray + Record array view into the data in datastring. This will be readonly + if `datastring` is readonly. + + See Also + -------- + numpy.frombuffer + + Examples + -------- + >>> a = b'\x01\x02\x03abc' + >>> np.core.records.fromstring(a, dtype='u1,u1,u1,S3') + rec.array([(1, 2, 3, b'abc')], + dtype=[('f0', 'u1'), ('f1', 'u1'), ('f2', 'u1'), ('f3', 'S3')]) + + >>> grades_dtype = [('Name', (np.str_, 10)), ('Marks', np.float64), + ... ('GradeLevel', np.int32)] + >>> grades_array = np.array([('Sam', 33.3, 3), ('Mike', 44.4, 5), + ... ('Aadi', 66.6, 6)], dtype=grades_dtype) + >>> np.core.records.fromstring(grades_array.tobytes(), dtype=grades_dtype) + rec.array([('Sam', 33.3, 3), ('Mike', 44.4, 5), ('Aadi', 66.6, 6)], + dtype=[('Name', '>> s = '\x01\x02\x03abc' + >>> np.core.records.fromstring(s, dtype='u1,u1,u1,S3') + Traceback (most recent call last) + ... + TypeError: a bytes-like object is required, not 'str' + """ + + if dtype is None and formats is None: + raise TypeError("fromstring() needs a 'dtype' or 'formats' argument") + + if dtype is not None: + descr = sb.dtype(dtype) + else: + descr = format_parser(formats, names, titles, aligned, byteorder).dtype + + itemsize = descr.itemsize + + # NumPy 1.19.0, 2020-01-01 + shape = _deprecate_shape_0_as_None(shape) + + if shape in (None, -1): + shape = (len(datastring) - offset) // itemsize + + _array = recarray(shape, descr, buf=datastring, offset=offset) + return _array + +def get_remaining_size(fd): + pos = fd.tell() + try: + fd.seek(0, 2) + return fd.tell() - pos + finally: + fd.seek(pos, 0) + + +@set_module("numpy.rec") +def fromfile(fd, dtype=None, shape=None, offset=0, formats=None, + names=None, titles=None, aligned=False, byteorder=None): + """Create an array from binary file data + + Parameters + ---------- + fd : str or file type + If file is a string or a path-like object then that file is opened, + else it is assumed to be a file object. The file object must + support random access (i.e. it must have tell and seek methods). + dtype : data-type, optional + valid dtype for all arrays + shape : int or tuple of ints, optional + shape of each array. + offset : int, optional + Position in the file to start reading from. + formats, names, titles, aligned, byteorder : + If `dtype` is ``None``, these arguments are passed to + `numpy.format_parser` to construct a dtype. See that function for + detailed documentation + + Returns + ------- + np.recarray + record array consisting of data enclosed in file. + + Examples + -------- + >>> from tempfile import TemporaryFile + >>> a = np.empty(10,dtype='f8,i4,a5') + >>> a[5] = (0.5,10,'abcde') + >>> + >>> fd=TemporaryFile() + >>> a = a.newbyteorder('<') + >>> a.tofile(fd) + >>> + >>> _ = fd.seek(0) + >>> r=np.core.records.fromfile(fd, formats='f8,i4,a5', shape=10, + ... byteorder='<') + >>> print(r[5]) + (0.5, 10, 'abcde') + >>> r.shape + (10,) + """ + + if dtype is None and formats is None: + raise TypeError("fromfile() needs a 'dtype' or 'formats' argument") + + # NumPy 1.19.0, 2020-01-01 + shape = _deprecate_shape_0_as_None(shape) + + if shape is None: + shape = (-1,) + elif isinstance(shape, int): + shape = (shape,) + + if hasattr(fd, 'readinto'): + # GH issue 2504. fd supports io.RawIOBase or io.BufferedIOBase interface. + # Example of fd: gzip, BytesIO, BufferedReader + # file already opened + ctx = nullcontext(fd) + else: + # open file + ctx = open(os_fspath(fd), 'rb') + + with ctx as fd: + if offset > 0: + fd.seek(offset, 1) + size = get_remaining_size(fd) + + if dtype is not None: + descr = sb.dtype(dtype) + else: + descr = format_parser(formats, names, titles, aligned, byteorder).dtype + + itemsize = descr.itemsize + + shapeprod = sb.array(shape).prod(dtype=nt.intp) + shapesize = shapeprod * itemsize + if shapesize < 0: + shape = list(shape) + shape[shape.index(-1)] = size // -shapesize + shape = tuple(shape) + shapeprod = sb.array(shape).prod(dtype=nt.intp) + + nbytes = shapeprod * itemsize + + if nbytes > size: + raise ValueError( + "Not enough bytes left in file for specified shape and type") + + # create the array + _array = recarray(shape, descr) + nbytesread = fd.readinto(_array.data) + if nbytesread != nbytes: + raise OSError("Didn't read as many bytes as expected") + + return _array + + +@set_module("numpy.rec") +def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None, + names=None, titles=None, aligned=False, byteorder=None, copy=True): + """ + Construct a record array from a wide-variety of objects. + + A general-purpose record array constructor that dispatches to the + appropriate `recarray` creation function based on the inputs (see Notes). + + Parameters + ---------- + obj : any + Input object. See Notes for details on how various input types are + treated. + dtype : data-type, optional + Valid dtype for array. + shape : int or tuple of ints, optional + Shape of each array. + offset : int, optional + Position in the file or buffer to start reading from. + strides : tuple of ints, optional + Buffer (`buf`) is interpreted according to these strides (strides + define how many bytes each array element, row, column, etc. + occupy in memory). + formats, names, titles, aligned, byteorder : + If `dtype` is ``None``, these arguments are passed to + `numpy.format_parser` to construct a dtype. See that function for + detailed documentation. + copy : bool, optional + Whether to copy the input object (True), or to use a reference instead. + This option only applies when the input is an ndarray or recarray. + Defaults to True. + + Returns + ------- + np.recarray + Record array created from the specified object. + + Notes + ----- + If `obj` is ``None``, then call the `~numpy.recarray` constructor. If + `obj` is a string, then call the `fromstring` constructor. If `obj` is a + list or a tuple, then if the first object is an `~numpy.ndarray`, call + `fromarrays`, otherwise call `fromrecords`. If `obj` is a + `~numpy.recarray`, then make a copy of the data in the recarray + (if ``copy=True``) and use the new formats, names, and titles. If `obj` + is a file, then call `fromfile`. Finally, if obj is an `ndarray`, then + return ``obj.view(recarray)``, making a copy of the data if ``copy=True``. + + Examples + -------- + >>> a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + + >>> np.core.records.array(a) + rec.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]], + dtype=int32) + + >>> b = [(1, 1), (2, 4), (3, 9)] + >>> c = np.core.records.array(b, formats = ['i2', 'f2'], names = ('x', 'y')) + >>> c + rec.array([(1, 1.0), (2, 4.0), (3, 9.0)], + dtype=[('x', '>> c.x + rec.array([1, 2, 3], dtype=int16) + + >>> c.y + rec.array([ 1.0, 4.0, 9.0], dtype=float16) + + >>> r = np.rec.array(['abc','def'], names=['col1','col2']) + >>> print(r.col1) + abc + + >>> r.col1 + array('abc', dtype='>> r.col2 + array('def', dtype=' object: ... + def tell(self, /) -> int: ... + def readinto(self, buffer: memoryview, /) -> int: ... + +__all__: List[str] + +@overload +def fromarrays( + arrayList: Iterable[ArrayLike], + dtype: DTypeLike = ..., + shape: None | _ShapeLike = ..., + formats: None = ..., + names: None = ..., + titles: None = ..., + aligned: bool = ..., + byteorder: None = ..., +) -> _RecArray[Any]: ... +@overload +def fromarrays( + arrayList: Iterable[ArrayLike], + dtype: None = ..., + shape: None | _ShapeLike = ..., + *, + formats: DTypeLike, + names: None | str | Sequence[str] = ..., + titles: None | str | Sequence[str] = ..., + aligned: bool = ..., + byteorder: None | _ByteOrder = ..., +) -> _RecArray[record]: ... + +@overload +def fromrecords( + recList: _ArrayLikeVoid_co | Tuple[Any, ...] | _NestedSequence[Tuple[Any, ...]], + dtype: DTypeLike = ..., + shape: None | _ShapeLike = ..., + formats: None = ..., + names: None = ..., + titles: None = ..., + aligned: bool = ..., + byteorder: None = ..., +) -> _RecArray[record]: ... +@overload +def fromrecords( + recList: _ArrayLikeVoid_co | Tuple[Any, ...] | _NestedSequence[Tuple[Any, ...]], + dtype: None = ..., + shape: None | _ShapeLike = ..., + *, + formats: DTypeLike, + names: None | str | Sequence[str] = ..., + titles: None | str | Sequence[str] = ..., + aligned: bool = ..., + byteorder: None | _ByteOrder = ..., +) -> _RecArray[record]: ... + +@overload +def fromstring( + datastring: _SupportsBuffer, + dtype: DTypeLike, + shape: None | _ShapeLike = ..., + offset: int = ..., + formats: None = ..., + names: None = ..., + titles: None = ..., + aligned: bool = ..., + byteorder: None = ..., +) -> _RecArray[record]: ... +@overload +def fromstring( + datastring: _SupportsBuffer, + dtype: None = ..., + shape: None | _ShapeLike = ..., + offset: int = ..., + *, + formats: DTypeLike, + names: None | str | Sequence[str] = ..., + titles: None | str | Sequence[str] = ..., + aligned: bool = ..., + byteorder: None | _ByteOrder = ..., +) -> _RecArray[record]: ... + +@overload +def fromfile( + fd: str | bytes | os.PathLike[str] | os.PathLike[bytes] | _SupportsReadInto, + dtype: DTypeLike, + shape: None | _ShapeLike = ..., + offset: int = ..., + formats: None = ..., + names: None = ..., + titles: None = ..., + aligned: bool = ..., + byteorder: None = ..., +) -> _RecArray[Any]: ... +@overload +def fromfile( + fd: str | bytes | os.PathLike[str] | os.PathLike[bytes] | _SupportsReadInto, + dtype: None = ..., + shape: None | _ShapeLike = ..., + offset: int = ..., + *, + formats: DTypeLike, + names: None | str | Sequence[str] = ..., + titles: None | str | Sequence[str] = ..., + aligned: bool = ..., + byteorder: None | _ByteOrder = ..., +) -> _RecArray[record]: ... + +@overload +def array( + obj: _SCT | NDArray[_SCT], + dtype: None = ..., + shape: None | _ShapeLike = ..., + offset: int = ..., + formats: None = ..., + names: None = ..., + titles: None = ..., + aligned: bool = ..., + byteorder: None = ..., + copy: bool = ..., +) -> _RecArray[_SCT]: ... +@overload +def array( + obj: ArrayLike, + dtype: DTypeLike, + shape: None | _ShapeLike = ..., + offset: int = ..., + formats: None = ..., + names: None = ..., + titles: None = ..., + aligned: bool = ..., + byteorder: None = ..., + copy: bool = ..., +) -> _RecArray[Any]: ... +@overload +def array( + obj: ArrayLike, + dtype: None = ..., + shape: None | _ShapeLike = ..., + offset: int = ..., + *, + formats: DTypeLike, + names: None | str | Sequence[str] = ..., + titles: None | str | Sequence[str] = ..., + aligned: bool = ..., + byteorder: None | _ByteOrder = ..., + copy: bool = ..., +) -> _RecArray[record]: ... +@overload +def array( + obj: None, + dtype: DTypeLike, + shape: _ShapeLike, + offset: int = ..., + formats: None = ..., + names: None = ..., + titles: None = ..., + aligned: bool = ..., + byteorder: None = ..., + copy: bool = ..., +) -> _RecArray[Any]: ... +@overload +def array( + obj: None, + dtype: None = ..., + *, + shape: _ShapeLike, + offset: int = ..., + formats: DTypeLike, + names: None | str | Sequence[str] = ..., + titles: None | str | Sequence[str] = ..., + aligned: bool = ..., + byteorder: None | _ByteOrder = ..., + copy: bool = ..., +) -> _RecArray[record]: ... +@overload +def array( + obj: _SupportsReadInto, + dtype: DTypeLike, + shape: None | _ShapeLike = ..., + offset: int = ..., + formats: None = ..., + names: None = ..., + titles: None = ..., + aligned: bool = ..., + byteorder: None = ..., + copy: bool = ..., +) -> _RecArray[Any]: ... +@overload +def array( + obj: _SupportsReadInto, + dtype: None = ..., + shape: None | _ShapeLike = ..., + offset: int = ..., + *, + formats: DTypeLike, + names: None | str | Sequence[str] = ..., + titles: None | str | Sequence[str] = ..., + aligned: bool = ..., + byteorder: None | _ByteOrder = ..., + copy: bool = ..., +) -> _RecArray[record]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/core/setup.py b/.local/lib/python3.8/site-packages/numpy/core/setup.py new file mode 100644 index 0000000..7eb0813 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/setup.py @@ -0,0 +1,1110 @@ +import os +import sys +import pickle +import copy +import warnings +import platform +import textwrap +import glob +from os.path import join + +from numpy.distutils import log +from distutils.dep_util import newer +from sysconfig import get_config_var +from numpy.compat import npy_load_module +from setup_common import * # noqa: F403 + +# Set to True to enable relaxed strides checking. This (mostly) means +# that `strides[dim]` is ignored if `shape[dim] == 1` when setting flags. +NPY_RELAXED_STRIDES_CHECKING = (os.environ.get('NPY_RELAXED_STRIDES_CHECKING', "1") != "0") + +# Put NPY_RELAXED_STRIDES_DEBUG=1 in the environment if you want numpy to use a +# bogus value for affected strides in order to help smoke out bad stride usage +# when relaxed stride checking is enabled. +NPY_RELAXED_STRIDES_DEBUG = (os.environ.get('NPY_RELAXED_STRIDES_DEBUG', "0") != "0") +NPY_RELAXED_STRIDES_DEBUG = NPY_RELAXED_STRIDES_DEBUG and NPY_RELAXED_STRIDES_CHECKING + +# Set NPY_DISABLE_SVML=1 in the environment to disable the vendored SVML +# library. This option only has significance on a Linux x86_64 host and is most +# useful to avoid improperly requiring SVML when cross compiling. +NPY_DISABLE_SVML = (os.environ.get('NPY_DISABLE_SVML', "0") == "1") + +# XXX: ugly, we use a class to avoid calling twice some expensive functions in +# config.h/numpyconfig.h. I don't see a better way because distutils force +# config.h generation inside an Extension class, and as such sharing +# configuration information between extensions is not easy. +# Using a pickled-based memoize does not work because config_cmd is an instance +# method, which cPickle does not like. +# +# Use pickle in all cases, as cPickle is gone in python3 and the difference +# in time is only in build. -- Charles Harris, 2013-03-30 + +class CallOnceOnly: + def __init__(self): + self._check_types = None + self._check_ieee_macros = None + self._check_complex = None + + def check_types(self, *a, **kw): + if self._check_types is None: + out = check_types(*a, **kw) + self._check_types = pickle.dumps(out) + else: + out = copy.deepcopy(pickle.loads(self._check_types)) + return out + + def check_ieee_macros(self, *a, **kw): + if self._check_ieee_macros is None: + out = check_ieee_macros(*a, **kw) + self._check_ieee_macros = pickle.dumps(out) + else: + out = copy.deepcopy(pickle.loads(self._check_ieee_macros)) + return out + + def check_complex(self, *a, **kw): + if self._check_complex is None: + out = check_complex(*a, **kw) + self._check_complex = pickle.dumps(out) + else: + out = copy.deepcopy(pickle.loads(self._check_complex)) + return out + +def can_link_svml(): + """SVML library is supported only on x86_64 architecture and currently + only on linux + """ + if NPY_DISABLE_SVML: + return False + machine = platform.machine() + system = platform.system() + return "x86_64" in machine and system == "Linux" + +def check_svml_submodule(svmlpath): + if not os.path.exists(svmlpath + "/README.md"): + raise RuntimeError("Missing `SVML` submodule! Run `git submodule " + "update --init` to fix this.") + return True + +def pythonlib_dir(): + """return path where libpython* is.""" + if sys.platform == 'win32': + return os.path.join(sys.prefix, "libs") + else: + return get_config_var('LIBDIR') + +def is_npy_no_signal(): + """Return True if the NPY_NO_SIGNAL symbol must be defined in configuration + header.""" + return sys.platform == 'win32' + +def is_npy_no_smp(): + """Return True if the NPY_NO_SMP symbol must be defined in public + header (when SMP support cannot be reliably enabled).""" + # Perhaps a fancier check is in order here. + # so that threads are only enabled if there + # are actually multiple CPUS? -- but + # threaded code can be nice even on a single + # CPU so that long-calculating code doesn't + # block. + return 'NPY_NOSMP' in os.environ + +def win32_checks(deflist): + from numpy.distutils.misc_util import get_build_architecture + a = get_build_architecture() + + # Distutils hack on AMD64 on windows + print('BUILD_ARCHITECTURE: %r, os.name=%r, sys.platform=%r' % + (a, os.name, sys.platform)) + if a == 'AMD64': + deflist.append('DISTUTILS_USE_SDK') + + # On win32, force long double format string to be 'g', not + # 'Lg', since the MS runtime does not support long double whose + # size is > sizeof(double) + if a == "Intel" or a == "AMD64": + deflist.append('FORCE_NO_LONG_DOUBLE_FORMATTING') + +def check_math_capabilities(config, ext, moredefs, mathlibs): + def check_func(func_name): + return config.check_func(func_name, libraries=mathlibs, + decl=True, call=True) + + def check_funcs_once(funcs_name): + decl = dict([(f, True) for f in funcs_name]) + st = config.check_funcs_once(funcs_name, libraries=mathlibs, + decl=decl, call=decl) + if st: + moredefs.extend([(fname2def(f), 1) for f in funcs_name]) + return st + + def check_funcs(funcs_name): + # Use check_funcs_once first, and if it does not work, test func per + # func. Return success only if all the functions are available + if not check_funcs_once(funcs_name): + # Global check failed, check func per func + for f in funcs_name: + if check_func(f): + moredefs.append((fname2def(f), 1)) + return 0 + else: + return 1 + + #use_msvc = config.check_decl("_MSC_VER") + + if not check_funcs_once(MANDATORY_FUNCS): + raise SystemError("One of the required function to build numpy is not" + " available (the list is %s)." % str(MANDATORY_FUNCS)) + + # Standard functions which may not be available and for which we have a + # replacement implementation. Note that some of these are C99 functions. + + # XXX: hack to circumvent cpp pollution from python: python put its + # config.h in the public namespace, so we have a clash for the common + # functions we test. We remove every function tested by python's + # autoconf, hoping their own test are correct + for f in OPTIONAL_STDFUNCS_MAYBE: + if config.check_decl(fname2def(f), + headers=["Python.h", "math.h"]): + OPTIONAL_STDFUNCS.remove(f) + + check_funcs(OPTIONAL_STDFUNCS) + + for h in OPTIONAL_HEADERS: + if config.check_func("", decl=False, call=False, headers=[h]): + h = h.replace(".", "_").replace(os.path.sep, "_") + moredefs.append((fname2def(h), 1)) + + for tup in OPTIONAL_INTRINSICS: + headers = None + if len(tup) == 2: + f, args, m = tup[0], tup[1], fname2def(tup[0]) + elif len(tup) == 3: + f, args, headers, m = tup[0], tup[1], [tup[2]], fname2def(tup[0]) + else: + f, args, headers, m = tup[0], tup[1], [tup[2]], fname2def(tup[3]) + if config.check_func(f, decl=False, call=True, call_args=args, + headers=headers): + moredefs.append((m, 1)) + + for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES: + if config.check_gcc_function_attribute(dec, fn): + moredefs.append((fname2def(fn), 1)) + if fn == 'attribute_target_avx512f': + # GH-14787: Work around GCC<8.4 bug when compiling with AVX512 + # support on Windows-based platforms + if (sys.platform in ('win32', 'cygwin') and + config.check_compiler_gcc() and + not config.check_gcc_version_at_least(8, 4)): + ext.extra_compile_args.extend( + ['-ffixed-xmm%s' % n for n in range(16, 32)]) + + for dec, fn, code, header in OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS: + if config.check_gcc_function_attribute_with_intrinsics(dec, fn, code, + header): + moredefs.append((fname2def(fn), 1)) + + for fn in OPTIONAL_VARIABLE_ATTRIBUTES: + if config.check_gcc_variable_attribute(fn): + m = fn.replace("(", "_").replace(")", "_") + moredefs.append((fname2def(m), 1)) + + # C99 functions: float and long double versions + check_funcs(C99_FUNCS_SINGLE) + check_funcs(C99_FUNCS_EXTENDED) + +def check_complex(config, mathlibs): + priv = [] + pub = [] + + try: + if os.uname()[0] == "Interix": + warnings.warn("Disabling broken complex support. See #1365", stacklevel=2) + return priv, pub + except Exception: + # os.uname not available on all platforms. blanket except ugly but safe + pass + + # Check for complex support + st = config.check_header('complex.h') + if st: + priv.append(('HAVE_COMPLEX_H', 1)) + pub.append(('NPY_USE_C99_COMPLEX', 1)) + + for t in C99_COMPLEX_TYPES: + st = config.check_type(t, headers=["complex.h"]) + if st: + pub.append(('NPY_HAVE_%s' % type2def(t), 1)) + + def check_prec(prec): + flist = [f + prec for f in C99_COMPLEX_FUNCS] + decl = dict([(f, True) for f in flist]) + if not config.check_funcs_once(flist, call=decl, decl=decl, + libraries=mathlibs): + for f in flist: + if config.check_func(f, call=True, decl=True, + libraries=mathlibs): + priv.append((fname2def(f), 1)) + else: + priv.extend([(fname2def(f), 1) for f in flist]) + + check_prec('') + check_prec('f') + check_prec('l') + + return priv, pub + +def check_ieee_macros(config): + priv = [] + pub = [] + + macros = [] + + def _add_decl(f): + priv.append(fname2def("decl_%s" % f)) + pub.append('NPY_%s' % fname2def("decl_%s" % f)) + + # XXX: hack to circumvent cpp pollution from python: python put its + # config.h in the public namespace, so we have a clash for the common + # functions we test. We remove every function tested by python's + # autoconf, hoping their own test are correct + _macros = ["isnan", "isinf", "signbit", "isfinite"] + for f in _macros: + py_symbol = fname2def("decl_%s" % f) + already_declared = config.check_decl(py_symbol, + headers=["Python.h", "math.h"]) + if already_declared: + if config.check_macro_true(py_symbol, + headers=["Python.h", "math.h"]): + pub.append('NPY_%s' % fname2def("decl_%s" % f)) + else: + macros.append(f) + # Normally, isnan and isinf are macro (C99), but some platforms only have + # func, or both func and macro version. Check for macro only, and define + # replacement ones if not found. + # Note: including Python.h is necessary because it modifies some math.h + # definitions + for f in macros: + st = config.check_decl(f, headers=["Python.h", "math.h"]) + if st: + _add_decl(f) + + return priv, pub + +def check_types(config_cmd, ext, build_dir): + private_defines = [] + public_defines = [] + + # Expected size (in number of bytes) for each type. This is an + # optimization: those are only hints, and an exhaustive search for the size + # is done if the hints are wrong. + expected = {'short': [2], 'int': [4], 'long': [8, 4], + 'float': [4], 'double': [8], 'long double': [16, 12, 8], + 'Py_intptr_t': [8, 4], 'PY_LONG_LONG': [8], 'long long': [8], + 'off_t': [8, 4]} + + # Check we have the python header (-dev* packages on Linux) + result = config_cmd.check_header('Python.h') + if not result: + python = 'python' + if '__pypy__' in sys.builtin_module_names: + python = 'pypy' + raise SystemError( + "Cannot compile 'Python.h'. Perhaps you need to " + "install {0}-dev|{0}-devel.".format(python)) + res = config_cmd.check_header("endian.h") + if res: + private_defines.append(('HAVE_ENDIAN_H', 1)) + public_defines.append(('NPY_HAVE_ENDIAN_H', 1)) + res = config_cmd.check_header("sys/endian.h") + if res: + private_defines.append(('HAVE_SYS_ENDIAN_H', 1)) + public_defines.append(('NPY_HAVE_SYS_ENDIAN_H', 1)) + + # Check basic types sizes + for type in ('short', 'int', 'long'): + res = config_cmd.check_decl("SIZEOF_%s" % sym2def(type), headers=["Python.h"]) + if res: + public_defines.append(('NPY_SIZEOF_%s' % sym2def(type), "SIZEOF_%s" % sym2def(type))) + else: + res = config_cmd.check_type_size(type, expected=expected[type]) + if res >= 0: + public_defines.append(('NPY_SIZEOF_%s' % sym2def(type), '%d' % res)) + else: + raise SystemError("Checking sizeof (%s) failed !" % type) + + for type in ('float', 'double', 'long double'): + already_declared = config_cmd.check_decl("SIZEOF_%s" % sym2def(type), + headers=["Python.h"]) + res = config_cmd.check_type_size(type, expected=expected[type]) + if res >= 0: + public_defines.append(('NPY_SIZEOF_%s' % sym2def(type), '%d' % res)) + if not already_declared and not type == 'long double': + private_defines.append(('SIZEOF_%s' % sym2def(type), '%d' % res)) + else: + raise SystemError("Checking sizeof (%s) failed !" % type) + + # Compute size of corresponding complex type: used to check that our + # definition is binary compatible with C99 complex type (check done at + # build time in npy_common.h) + complex_def = "struct {%s __x; %s __y;}" % (type, type) + res = config_cmd.check_type_size(complex_def, + expected=[2 * x for x in expected[type]]) + if res >= 0: + public_defines.append(('NPY_SIZEOF_COMPLEX_%s' % sym2def(type), '%d' % res)) + else: + raise SystemError("Checking sizeof (%s) failed !" % complex_def) + + for type in ('Py_intptr_t', 'off_t'): + res = config_cmd.check_type_size(type, headers=["Python.h"], + library_dirs=[pythonlib_dir()], + expected=expected[type]) + + if res >= 0: + private_defines.append(('SIZEOF_%s' % sym2def(type), '%d' % res)) + public_defines.append(('NPY_SIZEOF_%s' % sym2def(type), '%d' % res)) + else: + raise SystemError("Checking sizeof (%s) failed !" % type) + + # We check declaration AND type because that's how distutils does it. + if config_cmd.check_decl('PY_LONG_LONG', headers=['Python.h']): + res = config_cmd.check_type_size('PY_LONG_LONG', headers=['Python.h'], + library_dirs=[pythonlib_dir()], + expected=expected['PY_LONG_LONG']) + if res >= 0: + private_defines.append(('SIZEOF_%s' % sym2def('PY_LONG_LONG'), '%d' % res)) + public_defines.append(('NPY_SIZEOF_%s' % sym2def('PY_LONG_LONG'), '%d' % res)) + else: + raise SystemError("Checking sizeof (%s) failed !" % 'PY_LONG_LONG') + + res = config_cmd.check_type_size('long long', + expected=expected['long long']) + if res >= 0: + #private_defines.append(('SIZEOF_%s' % sym2def('long long'), '%d' % res)) + public_defines.append(('NPY_SIZEOF_%s' % sym2def('long long'), '%d' % res)) + else: + raise SystemError("Checking sizeof (%s) failed !" % 'long long') + + if not config_cmd.check_decl('CHAR_BIT', headers=['Python.h']): + raise RuntimeError( + "Config wo CHAR_BIT is not supported" + ", please contact the maintainers") + + return private_defines, public_defines + +def check_mathlib(config_cmd): + # Testing the C math library + mathlibs = [] + mathlibs_choices = [[], ['m'], ['cpml']] + mathlib = os.environ.get('MATHLIB') + if mathlib: + mathlibs_choices.insert(0, mathlib.split(',')) + for libs in mathlibs_choices: + if config_cmd.check_func("exp", libraries=libs, decl=True, call=True): + mathlibs = libs + break + else: + raise RuntimeError( + "math library missing; rerun setup.py after setting the " + "MATHLIB env variable") + return mathlibs + +def visibility_define(config): + """Return the define value to use for NPY_VISIBILITY_HIDDEN (may be empty + string).""" + hide = '__attribute__((visibility("hidden")))' + if config.check_gcc_function_attribute(hide, 'hideme'): + return hide + else: + return '' + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration, dot_join + from numpy.distutils.system_info import (get_info, blas_opt_info, + lapack_opt_info) + + config = Configuration('core', parent_package, top_path) + local_dir = config.local_path + codegen_dir = join(local_dir, 'code_generators') + + if is_released(config): + warnings.simplefilter('error', MismatchCAPIWarning) + + # Check whether we have a mismatch between the set C API VERSION and the + # actual C API VERSION + check_api_version(C_API_VERSION, codegen_dir) + + generate_umath_py = join(codegen_dir, 'generate_umath.py') + n = dot_join(config.name, 'generate_umath') + generate_umath = npy_load_module('_'.join(n.split('.')), + generate_umath_py, ('.py', 'U', 1)) + + header_dir = 'include/numpy' # this is relative to config.path_in_package + + cocache = CallOnceOnly() + + def generate_config_h(ext, build_dir): + target = join(build_dir, header_dir, 'config.h') + d = os.path.dirname(target) + if not os.path.exists(d): + os.makedirs(d) + + if newer(__file__, target): + config_cmd = config.get_config_cmd() + log.info('Generating %s', target) + + # Check sizeof + moredefs, ignored = cocache.check_types(config_cmd, ext, build_dir) + + # Check math library and C99 math funcs availability + mathlibs = check_mathlib(config_cmd) + moredefs.append(('MATHLIB', ','.join(mathlibs))) + + check_math_capabilities(config_cmd, ext, moredefs, mathlibs) + moredefs.extend(cocache.check_ieee_macros(config_cmd)[0]) + moredefs.extend(cocache.check_complex(config_cmd, mathlibs)[0]) + + # Signal check + if is_npy_no_signal(): + moredefs.append('__NPY_PRIVATE_NO_SIGNAL') + + # Windows checks + if sys.platform == 'win32' or os.name == 'nt': + win32_checks(moredefs) + + # C99 restrict keyword + moredefs.append(('NPY_RESTRICT', config_cmd.check_restrict())) + + # Inline check + inline = config_cmd.check_inline() + + if can_link_svml(): + moredefs.append(('NPY_CAN_LINK_SVML', 1)) + + # Use relaxed stride checking + if NPY_RELAXED_STRIDES_CHECKING: + moredefs.append(('NPY_RELAXED_STRIDES_CHECKING', 1)) + else: + moredefs.append(('NPY_RELAXED_STRIDES_CHECKING', 0)) + + # Use bogus stride debug aid when relaxed strides are enabled + if NPY_RELAXED_STRIDES_DEBUG: + moredefs.append(('NPY_RELAXED_STRIDES_DEBUG', 1)) + else: + moredefs.append(('NPY_RELAXED_STRIDES_DEBUG', 0)) + + # Get long double representation + rep = check_long_double_representation(config_cmd) + moredefs.append(('HAVE_LDOUBLE_%s' % rep, 1)) + + if check_for_right_shift_internal_compiler_error(config_cmd): + moredefs.append('NPY_DO_NOT_OPTIMIZE_LONG_right_shift') + moredefs.append('NPY_DO_NOT_OPTIMIZE_ULONG_right_shift') + moredefs.append('NPY_DO_NOT_OPTIMIZE_LONGLONG_right_shift') + moredefs.append('NPY_DO_NOT_OPTIMIZE_ULONGLONG_right_shift') + + # Generate the config.h file from moredefs + with open(target, 'w') as target_f: + for d in moredefs: + if isinstance(d, str): + target_f.write('#define %s\n' % (d)) + else: + target_f.write('#define %s %s\n' % (d[0], d[1])) + + # define inline to our keyword, or nothing + target_f.write('#ifndef __cplusplus\n') + if inline == 'inline': + target_f.write('/* #undef inline */\n') + else: + target_f.write('#define inline %s\n' % inline) + target_f.write('#endif\n') + + # add the guard to make sure config.h is never included directly, + # but always through npy_config.h + target_f.write(textwrap.dedent(""" + #ifndef NUMPY_CORE_SRC_COMMON_NPY_CONFIG_H_ + #error config.h should never be included directly, include npy_config.h instead + #endif + """)) + + log.info('File: %s' % target) + with open(target) as target_f: + log.info(target_f.read()) + log.info('EOF') + else: + mathlibs = [] + with open(target) as target_f: + for line in target_f: + s = '#define MATHLIB' + if line.startswith(s): + value = line[len(s):].strip() + if value: + mathlibs.extend(value.split(',')) + + # Ugly: this can be called within a library and not an extension, + # in which case there is no libraries attributes (and none is + # needed). + if hasattr(ext, 'libraries'): + ext.libraries.extend(mathlibs) + + incl_dir = os.path.dirname(target) + if incl_dir not in config.numpy_include_dirs: + config.numpy_include_dirs.append(incl_dir) + + return target + + def generate_numpyconfig_h(ext, build_dir): + """Depends on config.h: generate_config_h has to be called before !""" + # put common include directory in build_dir on search path + # allows using code generation in headers + config.add_include_dirs(join(build_dir, "src", "common")) + config.add_include_dirs(join(build_dir, "src", "npymath")) + + target = join(build_dir, header_dir, '_numpyconfig.h') + d = os.path.dirname(target) + if not os.path.exists(d): + os.makedirs(d) + if newer(__file__, target): + config_cmd = config.get_config_cmd() + log.info('Generating %s', target) + + # Check sizeof + ignored, moredefs = cocache.check_types(config_cmd, ext, build_dir) + + if is_npy_no_signal(): + moredefs.append(('NPY_NO_SIGNAL', 1)) + + if is_npy_no_smp(): + moredefs.append(('NPY_NO_SMP', 1)) + else: + moredefs.append(('NPY_NO_SMP', 0)) + + mathlibs = check_mathlib(config_cmd) + moredefs.extend(cocache.check_ieee_macros(config_cmd)[1]) + moredefs.extend(cocache.check_complex(config_cmd, mathlibs)[1]) + + if NPY_RELAXED_STRIDES_CHECKING: + moredefs.append(('NPY_RELAXED_STRIDES_CHECKING', 1)) + + if NPY_RELAXED_STRIDES_DEBUG: + moredefs.append(('NPY_RELAXED_STRIDES_DEBUG', 1)) + + # Check whether we can use inttypes (C99) formats + if config_cmd.check_decl('PRIdPTR', headers=['inttypes.h']): + moredefs.append(('NPY_USE_C99_FORMATS', 1)) + + # visibility check + hidden_visibility = visibility_define(config_cmd) + moredefs.append(('NPY_VISIBILITY_HIDDEN', hidden_visibility)) + + # Add the C API/ABI versions + moredefs.append(('NPY_ABI_VERSION', '0x%.8X' % C_ABI_VERSION)) + moredefs.append(('NPY_API_VERSION', '0x%.8X' % C_API_VERSION)) + + # Add moredefs to header + with open(target, 'w') as target_f: + for d in moredefs: + if isinstance(d, str): + target_f.write('#define %s\n' % (d)) + else: + target_f.write('#define %s %s\n' % (d[0], d[1])) + + # Define __STDC_FORMAT_MACROS + target_f.write(textwrap.dedent(""" + #ifndef __STDC_FORMAT_MACROS + #define __STDC_FORMAT_MACROS 1 + #endif + """)) + + # Dump the numpyconfig.h header to stdout + log.info('File: %s' % target) + with open(target) as target_f: + log.info(target_f.read()) + log.info('EOF') + config.add_data_files((header_dir, target)) + return target + + def generate_api_func(module_name): + def generate_api(ext, build_dir): + script = join(codegen_dir, module_name + '.py') + sys.path.insert(0, codegen_dir) + try: + m = __import__(module_name) + log.info('executing %s', script) + h_file, c_file, doc_file = m.generate_api(os.path.join(build_dir, header_dir)) + finally: + del sys.path[0] + config.add_data_files((header_dir, h_file), + (header_dir, doc_file)) + return (h_file,) + return generate_api + + generate_numpy_api = generate_api_func('generate_numpy_api') + generate_ufunc_api = generate_api_func('generate_ufunc_api') + + config.add_include_dirs(join(local_dir, "src", "common")) + config.add_include_dirs(join(local_dir, "src")) + config.add_include_dirs(join(local_dir)) + + config.add_data_dir('include/numpy') + config.add_include_dirs(join('src', 'npymath')) + config.add_include_dirs(join('src', 'multiarray')) + config.add_include_dirs(join('src', 'umath')) + config.add_include_dirs(join('src', 'npysort')) + config.add_include_dirs(join('src', '_simd')) + + config.add_define_macros([("NPY_INTERNAL_BUILD", "1")]) # this macro indicates that Numpy build is in process + config.add_define_macros([("HAVE_NPY_CONFIG_H", "1")]) + if sys.platform[:3] == "aix": + config.add_define_macros([("_LARGE_FILES", None)]) + else: + config.add_define_macros([("_FILE_OFFSET_BITS", "64")]) + config.add_define_macros([('_LARGEFILE_SOURCE', '1')]) + config.add_define_macros([('_LARGEFILE64_SOURCE', '1')]) + + config.numpy_include_dirs.extend(config.paths('include')) + + deps = [join('src', 'npymath', '_signbit.c'), + join('include', 'numpy', '*object.h'), + join(codegen_dir, 'genapi.py'), + ] + + ####################################################################### + # npymath library # + ####################################################################### + + subst_dict = dict([("sep", os.path.sep), ("pkgname", "numpy.core")]) + + def get_mathlib_info(*args): + # Another ugly hack: the mathlib info is known once build_src is run, + # but we cannot use add_installed_pkg_config here either, so we only + # update the substitution dictionary during npymath build + config_cmd = config.get_config_cmd() + # Check that the toolchain works, to fail early if it doesn't + # (avoid late errors with MATHLIB which are confusing if the + # compiler does not work). + for lang, test_code, note in ( + ('c', 'int main(void) { return 0;}', ''), + ('c++', ( + 'int main(void)' + '{ auto x = 0.0; return static_cast(x); }' + ), ( + 'note: A compiler with support for C++11 language ' + 'features is required.' + ) + ), + ): + is_cpp = lang == 'c++' + if is_cpp: + # this a workround to get rid of invalid c++ flags + # without doing big changes to config. + # c tested first, compiler should be here + bk_c = config_cmd.compiler + config_cmd.compiler = bk_c.cxx_compiler() + st = config_cmd.try_link(test_code, lang=lang) + if not st: + # rerun the failing command in verbose mode + config_cmd.compiler.verbose = True + config_cmd.try_link(test_code, lang=lang) + raise RuntimeError( + f"Broken toolchain: cannot link a simple {lang.upper()} " + f"program. {note}" + ) + if is_cpp: + config_cmd.compiler = bk_c + mlibs = check_mathlib(config_cmd) + + posix_mlib = ' '.join(['-l%s' % l for l in mlibs]) + msvc_mlib = ' '.join(['%s.lib' % l for l in mlibs]) + subst_dict["posix_mathlib"] = posix_mlib + subst_dict["msvc_mathlib"] = msvc_mlib + + npymath_sources = [join('src', 'npymath', 'npy_math_internal.h.src'), + join('src', 'npymath', 'npy_math.c'), + join('src', 'npymath', 'ieee754.c.src'), + join('src', 'npymath', 'npy_math_complex.c.src'), + join('src', 'npymath', 'halffloat.c') + ] + + def gl_if_msvc(build_cmd): + """ Add flag if we are using MSVC compiler + + We can't see this in our scope, because we have not initialized the + distutils build command, so use this deferred calculation to run when + we are building the library. + """ + if build_cmd.compiler.compiler_type == 'msvc': + # explicitly disable whole-program optimization + return ['/GL-'] + return [] + + config.add_installed_library('npymath', + sources=npymath_sources + [get_mathlib_info], + install_dir='lib', + build_info={ + 'include_dirs' : [], # empty list required for creating npy_math_internal.h + 'extra_compiler_args': [gl_if_msvc], + }) + config.add_npy_pkg_config("npymath.ini.in", "lib/npy-pkg-config", + subst_dict) + config.add_npy_pkg_config("mlib.ini.in", "lib/npy-pkg-config", + subst_dict) + + ####################################################################### + # multiarray_tests module # + ####################################################################### + + config.add_extension('_multiarray_tests', + sources=[join('src', 'multiarray', '_multiarray_tests.c.src'), + join('src', 'common', 'mem_overlap.c'), + join('src', 'common', 'npy_argparse.c'), + join('src', 'common', 'npy_hashtable.c')], + depends=[join('src', 'common', 'mem_overlap.h'), + join('src', 'common', 'npy_argparse.h'), + join('src', 'common', 'npy_hashtable.h'), + join('src', 'common', 'npy_extint128.h')], + libraries=['npymath']) + + ####################################################################### + # _multiarray_umath module - common part # + ####################################################################### + + common_deps = [ + join('src', 'common', 'dlpack', 'dlpack.h'), + join('src', 'common', 'array_assign.h'), + join('src', 'common', 'binop_override.h'), + join('src', 'common', 'cblasfuncs.h'), + join('src', 'common', 'lowlevel_strided_loops.h'), + join('src', 'common', 'mem_overlap.h'), + join('src', 'common', 'npy_argparse.h'), + join('src', 'common', 'npy_cblas.h'), + join('src', 'common', 'npy_config.h'), + join('src', 'common', 'npy_ctypes.h'), + join('src', 'common', 'npy_dlpack.h'), + join('src', 'common', 'npy_extint128.h'), + join('src', 'common', 'npy_import.h'), + join('src', 'common', 'npy_hashtable.h'), + join('src', 'common', 'npy_longdouble.h'), + join('src', 'common', 'npy_svml.h'), + join('src', 'common', 'templ_common.h.src'), + join('src', 'common', 'ucsnarrow.h'), + join('src', 'common', 'ufunc_override.h'), + join('src', 'common', 'umathmodule.h'), + join('src', 'common', 'numpyos.h'), + join('src', 'common', 'npy_cpu_dispatch.h'), + join('src', 'common', 'simd', 'simd.h'), + ] + + common_src = [ + join('src', 'common', 'array_assign.c'), + join('src', 'common', 'mem_overlap.c'), + join('src', 'common', 'npy_argparse.c'), + join('src', 'common', 'npy_hashtable.c'), + join('src', 'common', 'npy_longdouble.c'), + join('src', 'common', 'templ_common.h.src'), + join('src', 'common', 'ucsnarrow.c'), + join('src', 'common', 'ufunc_override.c'), + join('src', 'common', 'numpyos.c'), + join('src', 'common', 'npy_cpu_features.c.src'), + ] + + if os.environ.get('NPY_USE_BLAS_ILP64', "0") != "0": + blas_info = get_info('blas_ilp64_opt', 2) + else: + blas_info = get_info('blas_opt', 0) + + have_blas = blas_info and ('HAVE_CBLAS', None) in blas_info.get('define_macros', []) + + if have_blas: + extra_info = blas_info + # These files are also in MANIFEST.in so that they are always in + # the source distribution independently of HAVE_CBLAS. + common_src.extend([join('src', 'common', 'cblasfuncs.c'), + join('src', 'common', 'python_xerbla.c'), + ]) + else: + extra_info = {} + + ####################################################################### + # _multiarray_umath module - multiarray part # + ####################################################################### + + multiarray_deps = [ + join('src', 'multiarray', 'abstractdtypes.h'), + join('src', 'multiarray', 'arrayobject.h'), + join('src', 'multiarray', 'arraytypes.h'), + join('src', 'multiarray', 'arrayfunction_override.h'), + join('src', 'multiarray', 'array_coercion.h'), + join('src', 'multiarray', 'array_method.h'), + join('src', 'multiarray', 'npy_buffer.h'), + join('src', 'multiarray', 'calculation.h'), + join('src', 'multiarray', 'common.h'), + join('src', 'multiarray', 'common_dtype.h'), + join('src', 'multiarray', 'convert_datatype.h'), + join('src', 'multiarray', 'convert.h'), + join('src', 'multiarray', 'conversion_utils.h'), + join('src', 'multiarray', 'ctors.h'), + join('src', 'multiarray', 'descriptor.h'), + join('src', 'multiarray', 'dtypemeta.h'), + join('src', 'multiarray', 'dtype_transfer.h'), + join('src', 'multiarray', 'dragon4.h'), + join('src', 'multiarray', 'einsum_debug.h'), + join('src', 'multiarray', 'einsum_sumprod.h'), + join('src', 'multiarray', 'experimental_public_dtype_api.h'), + join('src', 'multiarray', 'getset.h'), + join('src', 'multiarray', 'hashdescr.h'), + join('src', 'multiarray', 'iterators.h'), + join('src', 'multiarray', 'legacy_dtype_implementation.h'), + join('src', 'multiarray', 'mapping.h'), + join('src', 'multiarray', 'methods.h'), + join('src', 'multiarray', 'multiarraymodule.h'), + join('src', 'multiarray', 'nditer_impl.h'), + join('src', 'multiarray', 'number.h'), + join('src', 'multiarray', 'refcount.h'), + join('src', 'multiarray', 'scalartypes.h'), + join('src', 'multiarray', 'sequence.h'), + join('src', 'multiarray', 'shape.h'), + join('src', 'multiarray', 'strfuncs.h'), + join('src', 'multiarray', 'typeinfo.h'), + join('src', 'multiarray', 'usertypes.h'), + join('src', 'multiarray', 'vdot.h'), + join('include', 'numpy', 'arrayobject.h'), + join('include', 'numpy', '_neighborhood_iterator_imp.h'), + join('include', 'numpy', 'npy_endian.h'), + join('include', 'numpy', 'arrayscalars.h'), + join('include', 'numpy', 'noprefix.h'), + join('include', 'numpy', 'npy_interrupt.h'), + join('include', 'numpy', 'npy_3kcompat.h'), + join('include', 'numpy', 'npy_math.h'), + join('include', 'numpy', 'halffloat.h'), + join('include', 'numpy', 'npy_common.h'), + join('include', 'numpy', 'npy_os.h'), + join('include', 'numpy', 'utils.h'), + join('include', 'numpy', 'ndarrayobject.h'), + join('include', 'numpy', 'npy_cpu.h'), + join('include', 'numpy', 'numpyconfig.h'), + join('include', 'numpy', 'ndarraytypes.h'), + join('include', 'numpy', 'npy_1_7_deprecated_api.h'), + # add library sources as distuils does not consider libraries + # dependencies + ] + npymath_sources + + multiarray_src = [ + join('src', 'multiarray', 'abstractdtypes.c'), + join('src', 'multiarray', 'alloc.c'), + join('src', 'multiarray', 'arrayobject.c'), + join('src', 'multiarray', 'arraytypes.c.src'), + join('src', 'multiarray', 'array_coercion.c'), + join('src', 'multiarray', 'array_method.c'), + join('src', 'multiarray', 'array_assign_scalar.c'), + join('src', 'multiarray', 'array_assign_array.c'), + join('src', 'multiarray', 'arrayfunction_override.c'), + join('src', 'multiarray', 'buffer.c'), + join('src', 'multiarray', 'calculation.c'), + join('src', 'multiarray', 'compiled_base.c'), + join('src', 'multiarray', 'common.c'), + join('src', 'multiarray', 'common_dtype.c'), + join('src', 'multiarray', 'convert.c'), + join('src', 'multiarray', 'convert_datatype.c'), + join('src', 'multiarray', 'conversion_utils.c'), + join('src', 'multiarray', 'ctors.c'), + join('src', 'multiarray', 'datetime.c'), + join('src', 'multiarray', 'datetime_strings.c'), + join('src', 'multiarray', 'datetime_busday.c'), + join('src', 'multiarray', 'datetime_busdaycal.c'), + join('src', 'multiarray', 'descriptor.c'), + join('src', 'multiarray', 'dlpack.c'), + join('src', 'multiarray', 'dtypemeta.c'), + join('src', 'multiarray', 'dragon4.c'), + join('src', 'multiarray', 'dtype_transfer.c'), + join('src', 'multiarray', 'einsum.c.src'), + join('src', 'multiarray', 'einsum_sumprod.c.src'), + join('src', 'multiarray', 'experimental_public_dtype_api.c'), + join('src', 'multiarray', 'flagsobject.c'), + join('src', 'multiarray', 'getset.c'), + join('src', 'multiarray', 'hashdescr.c'), + join('src', 'multiarray', 'item_selection.c'), + join('src', 'multiarray', 'iterators.c'), + join('src', 'multiarray', 'legacy_dtype_implementation.c'), + join('src', 'multiarray', 'lowlevel_strided_loops.c.src'), + join('src', 'multiarray', 'mapping.c'), + join('src', 'multiarray', 'methods.c'), + join('src', 'multiarray', 'multiarraymodule.c'), + join('src', 'multiarray', 'nditer_templ.c.src'), + join('src', 'multiarray', 'nditer_api.c'), + join('src', 'multiarray', 'nditer_constr.c'), + join('src', 'multiarray', 'nditer_pywrap.c'), + join('src', 'multiarray', 'number.c'), + join('src', 'multiarray', 'refcount.c'), + join('src', 'multiarray', 'sequence.c'), + join('src', 'multiarray', 'shape.c'), + join('src', 'multiarray', 'scalarapi.c'), + join('src', 'multiarray', 'scalartypes.c.src'), + join('src', 'multiarray', 'strfuncs.c'), + join('src', 'multiarray', 'temp_elide.c'), + join('src', 'multiarray', 'typeinfo.c'), + join('src', 'multiarray', 'usertypes.c'), + join('src', 'multiarray', 'vdot.c'), + join('src', 'common', 'npy_sort.h.src'), + join('src', 'npysort', 'quicksort.c.src'), + join('src', 'npysort', 'mergesort.c.src'), + join('src', 'npysort', 'timsort.c.src'), + join('src', 'npysort', 'heapsort.c.src'), + join('src', 'npysort', 'radixsort.cpp'), + join('src', 'common', 'npy_partition.h.src'), + join('src', 'npysort', 'selection.c.src'), + join('src', 'common', 'npy_binsearch.h.src'), + join('src', 'npysort', 'binsearch.c.src'), + ] + + ####################################################################### + # _multiarray_umath module - umath part # + ####################################################################### + + def generate_umath_c(ext, build_dir): + target = join(build_dir, header_dir, '__umath_generated.c') + dir = os.path.dirname(target) + if not os.path.exists(dir): + os.makedirs(dir) + script = generate_umath_py + if newer(script, target): + with open(target, 'w') as f: + f.write(generate_umath.make_code(generate_umath.defdict, + generate_umath.__file__)) + return [] + + umath_src = [ + join('src', 'umath', 'umathmodule.c'), + join('src', 'umath', 'reduction.c'), + join('src', 'umath', 'funcs.inc.src'), + join('src', 'umath', 'simd.inc.src'), + join('src', 'umath', 'loops.h.src'), + join('src', 'umath', 'loops_utils.h.src'), + join('src', 'umath', 'loops.c.src'), + join('src', 'umath', 'loops_unary_fp.dispatch.c.src'), + join('src', 'umath', 'loops_arithm_fp.dispatch.c.src'), + join('src', 'umath', 'loops_arithmetic.dispatch.c.src'), + join('src', 'umath', 'loops_trigonometric.dispatch.c.src'), + join('src', 'umath', 'loops_umath_fp.dispatch.c.src'), + join('src', 'umath', 'loops_exponent_log.dispatch.c.src'), + join('src', 'umath', 'matmul.h.src'), + join('src', 'umath', 'matmul.c.src'), + join('src', 'umath', 'clip.h'), + join('src', 'umath', 'clip.cpp'), + join('src', 'umath', 'dispatching.c'), + join('src', 'umath', 'legacy_array_method.c'), + join('src', 'umath', 'ufunc_object.c'), + join('src', 'umath', 'extobj.c'), + join('src', 'umath', 'scalarmath.c.src'), + join('src', 'umath', 'ufunc_type_resolution.c'), + join('src', 'umath', 'override.c'), + # For testing. Eventually, should use public API and be separate: + join('src', 'umath', '_scaled_float_dtype.c'), + ] + + umath_deps = [ + generate_umath_py, + join('include', 'numpy', 'npy_math.h'), + join('include', 'numpy', 'halffloat.h'), + join('src', 'multiarray', 'common.h'), + join('src', 'multiarray', 'number.h'), + join('src', 'common', 'templ_common.h.src'), + join('src', 'umath', 'simd.inc.src'), + join('src', 'umath', 'override.h'), + join(codegen_dir, 'generate_ufunc_api.py'), + ] + + svml_path = join('numpy', 'core', 'src', 'umath', 'svml') + svml_objs = [] + if can_link_svml() and check_svml_submodule(svml_path): + svml_objs = glob.glob(svml_path + '/**/*.s', recursive=True) + + config.add_extension('_multiarray_umath', + # Forcing C language even though we have C++ sources. + # It forces the C linker and don't link C++ runtime. + language = 'c', + sources=multiarray_src + umath_src + + common_src + + [generate_config_h, + generate_numpyconfig_h, + generate_numpy_api, + join(codegen_dir, 'generate_numpy_api.py'), + join('*.py'), + generate_umath_c, + generate_ufunc_api, + ], + depends=deps + multiarray_deps + umath_deps + + common_deps, + libraries=['npymath'], + extra_objects=svml_objs, + extra_info=extra_info, + extra_cxx_compile_args=['-std=c++11', + '-D__STDC_VERSION__=0', + '-fno-exceptions', + '-fno-rtti']) + + ####################################################################### + # umath_tests module # + ####################################################################### + + config.add_extension('_umath_tests', sources=[ + join('src', 'umath', '_umath_tests.c.src'), + join('src', 'umath', '_umath_tests.dispatch.c'), + join('src', 'common', 'npy_cpu_features.c.src'), + ]) + + ####################################################################### + # custom rational dtype module # + ####################################################################### + + config.add_extension('_rational_tests', + sources=[join('src', 'umath', '_rational_tests.c.src')]) + + ####################################################################### + # struct_ufunc_test module # + ####################################################################### + + config.add_extension('_struct_ufunc_tests', + sources=[join('src', 'umath', '_struct_ufunc_tests.c.src')]) + + + ####################################################################### + # operand_flag_tests module # + ####################################################################### + + config.add_extension('_operand_flag_tests', + sources=[join('src', 'umath', '_operand_flag_tests.c.src')]) + + ####################################################################### + # SIMD module # + ####################################################################### + + config.add_extension('_simd', sources=[ + join('src', 'common', 'npy_cpu_features.c.src'), + join('src', '_simd', '_simd.c'), + join('src', '_simd', '_simd_inc.h.src'), + join('src', '_simd', '_simd_data.inc.src'), + join('src', '_simd', '_simd.dispatch.c.src'), + ], depends=[ + join('src', 'common', 'npy_cpu_dispatch.h'), + join('src', 'common', 'simd', 'simd.h'), + join('src', '_simd', '_simd.h'), + join('src', '_simd', '_simd_inc.h.src'), + join('src', '_simd', '_simd_data.inc.src'), + join('src', '_simd', '_simd_arg.inc'), + join('src', '_simd', '_simd_convert.inc'), + join('src', '_simd', '_simd_easyintrin.inc'), + join('src', '_simd', '_simd_vector.inc'), + ]) + + config.add_subpackage('tests') + config.add_data_dir('tests/data') + config.add_data_dir('tests/examples') + config.add_data_files('*.pyi') + + config.make_svn_version_py() + + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/.local/lib/python3.8/site-packages/numpy/core/setup_common.py b/.local/lib/python3.8/site-packages/numpy/core/setup_common.py new file mode 100644 index 0000000..70e8fc8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/setup_common.py @@ -0,0 +1,452 @@ +# Code common to build tools +import sys +import warnings +import copy +import textwrap + +from numpy.distutils.misc_util import mingw32 + + +#------------------- +# Versioning support +#------------------- +# How to change C_API_VERSION ? +# - increase C_API_VERSION value +# - record the hash for the new C API with the cversions.py script +# and add the hash to cversions.txt +# The hash values are used to remind developers when the C API number was not +# updated - generates a MismatchCAPIWarning warning which is turned into an +# exception for released version. + +# Binary compatibility version number. This number is increased whenever the +# C-API is changed such that binary compatibility is broken, i.e. whenever a +# recompile of extension modules is needed. +C_ABI_VERSION = 0x01000009 + +# Minor API version. This number is increased whenever a change is made to the +# C-API -- whether it breaks binary compatibility or not. Some changes, such +# as adding a function pointer to the end of the function table, can be made +# without breaking binary compatibility. In this case, only the C_API_VERSION +# (*not* C_ABI_VERSION) would be increased. Whenever binary compatibility is +# broken, both C_API_VERSION and C_ABI_VERSION should be increased. +# +# 0x00000008 - 1.7.x +# 0x00000009 - 1.8.x +# 0x00000009 - 1.9.x +# 0x0000000a - 1.10.x +# 0x0000000a - 1.11.x +# 0x0000000a - 1.12.x +# 0x0000000b - 1.13.x +# 0x0000000c - 1.14.x +# 0x0000000c - 1.15.x +# 0x0000000d - 1.16.x +# 0x0000000d - 1.19.x +# 0x0000000e - 1.20.x +# 0x0000000e - 1.21.x +# 0x0000000f - 1.22.x +C_API_VERSION = 0x0000000f + +class MismatchCAPIWarning(Warning): + pass + +def is_released(config): + """Return True if a released version of numpy is detected.""" + from distutils.version import LooseVersion + + v = config.get_version('../_version.py') + if v is None: + raise ValueError("Could not get version") + pv = LooseVersion(vstring=v).version + if len(pv) > 3: + return False + return True + +def get_api_versions(apiversion, codegen_dir): + """ + Return current C API checksum and the recorded checksum. + + Return current C API checksum and the recorded checksum for the given + version of the C API version. + + """ + # Compute the hash of the current API as defined in the .txt files in + # code_generators + sys.path.insert(0, codegen_dir) + try: + m = __import__('genapi') + numpy_api = __import__('numpy_api') + curapi_hash = m.fullapi_hash(numpy_api.full_api) + apis_hash = m.get_versions_hash() + finally: + del sys.path[0] + + return curapi_hash, apis_hash[apiversion] + +def check_api_version(apiversion, codegen_dir): + """Emits a MismatchCAPIWarning if the C API version needs updating.""" + curapi_hash, api_hash = get_api_versions(apiversion, codegen_dir) + + # If different hash, it means that the api .txt files in + # codegen_dir have been updated without the API version being + # updated. Any modification in those .txt files should be reflected + # in the api and eventually abi versions. + # To compute the checksum of the current API, use numpy/core/cversions.py + if not curapi_hash == api_hash: + msg = ("API mismatch detected, the C API version " + "numbers have to be updated. Current C api version is %d, " + "with checksum %s, but recorded checksum for C API version %d " + "in core/codegen_dir/cversions.txt is %s. If functions were " + "added in the C API, you have to update C_API_VERSION in %s." + ) + warnings.warn(msg % (apiversion, curapi_hash, apiversion, api_hash, + __file__), + MismatchCAPIWarning, stacklevel=2) +# Mandatory functions: if not found, fail the build +MANDATORY_FUNCS = ["sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs", + "floor", "ceil", "sqrt", "log10", "log", "exp", "asin", + "acos", "atan", "fmod", 'modf', 'frexp', 'ldexp'] + +# Standard functions which may not be available and for which we have a +# replacement implementation. Note that some of these are C99 functions. +OPTIONAL_STDFUNCS = ["expm1", "log1p", "acosh", "asinh", "atanh", + "rint", "trunc", "exp2", "log2", "hypot", "atan2", "pow", + "copysign", "nextafter", "ftello", "fseeko", + "strtoll", "strtoull", "cbrt", "strtold_l", "fallocate", + "backtrace", "madvise"] + + +OPTIONAL_HEADERS = [ +# sse headers only enabled automatically on amd64/x32 builds + "xmmintrin.h", # SSE + "emmintrin.h", # SSE2 + "immintrin.h", # AVX + "features.h", # for glibc version linux + "xlocale.h", # see GH#8367 + "dlfcn.h", # dladdr + "sys/mman.h", #madvise +] + +# optional gcc compiler builtins and their call arguments and optional a +# required header and definition name (HAVE_ prepended) +# call arguments are required as the compiler will do strict signature checking +OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'), + ("__builtin_isinf", '5.'), + ("__builtin_isfinite", '5.'), + ("__builtin_bswap32", '5u'), + ("__builtin_bswap64", '5u'), + ("__builtin_expect", '5, 0'), + ("__builtin_mul_overflow", '5, 5, (int*)5'), + # MMX only needed for icc, but some clangs don't have it + ("_m_from_int64", '0', "emmintrin.h"), + ("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE + ("_mm_prefetch", '(float*)0, _MM_HINT_NTA', + "xmmintrin.h"), # SSE + ("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2 + ("__builtin_prefetch", "(float*)0, 0, 3"), + # check that the linker can handle avx + ("__asm__ volatile", '"vpand %xmm1, %xmm2, %xmm3"', + "stdio.h", "LINK_AVX"), + ("__asm__ volatile", '"vpand %ymm1, %ymm2, %ymm3"', + "stdio.h", "LINK_AVX2"), + ("__asm__ volatile", '"vpaddd %zmm1, %zmm2, %zmm3"', + "stdio.h", "LINK_AVX512F"), + ("__asm__ volatile", '"vfpclasspd $0x40, %zmm15, %k6\\n"\ + "vmovdqu8 %xmm0, %xmm1\\n"\ + "vpbroadcastmb2q %k0, %xmm0\\n"', + "stdio.h", "LINK_AVX512_SKX"), + ("__asm__ volatile", '"xgetbv"', "stdio.h", "XGETBV"), + ] + +# function attributes +# tested via "int %s %s(void *);" % (attribute, name) +# function name will be converted to HAVE_ preprocessor macro +OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', + 'attribute_optimize_unroll_loops'), + ('__attribute__((optimize("O3")))', + 'attribute_optimize_opt_3'), + ('__attribute__((nonnull (1)))', + 'attribute_nonnull'), + ('__attribute__((target ("avx")))', + 'attribute_target_avx'), + ('__attribute__((target ("avx2")))', + 'attribute_target_avx2'), + ('__attribute__((target ("avx512f")))', + 'attribute_target_avx512f'), + ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', + 'attribute_target_avx512_skx'), + ] + +# function attributes with intrinsics +# To ensure your compiler can compile avx intrinsics with just the attributes +# gcc 4.8.4 support attributes but not with intrisics +# tested via "#include<%s> int %s %s(void *){code; return 0;};" % (header, attribute, name, code) +# function name will be converted to HAVE_ preprocessor macro +# The _mm512_castps_si512 instruction is specific check for AVX-512F support +# in gcc-4.9 which is missing a subset of intrinsics. See +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878 +OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2,fma")))', + 'attribute_target_avx2_with_intrinsics', + '__m256 temp = _mm256_set1_ps(1.0); temp = \ + _mm256_fmadd_ps(temp, temp, temp)', + 'immintrin.h'), + ('__attribute__((target("avx512f")))', + 'attribute_target_avx512f_with_intrinsics', + '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))', + 'immintrin.h'), + ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', + 'attribute_target_avx512_skx_with_intrinsics', + '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\ + __m512i unused_temp = \ + _mm512_castps_si512(_mm512_set1_ps(1.0));\ + _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))', + 'immintrin.h'), + ] + +# variable attributes tested via "int %s a" % attribute +OPTIONAL_VARIABLE_ATTRIBUTES = ["__thread", "__declspec(thread)"] + +# Subset of OPTIONAL_STDFUNCS which may already have HAVE_* defined by Python.h +OPTIONAL_STDFUNCS_MAYBE = [ + "expm1", "log1p", "acosh", "atanh", "asinh", "hypot", "copysign", + "ftello", "fseeko" + ] + +# C99 functions: float and long double versions +C99_FUNCS = [ + "sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs", "floor", "ceil", + "rint", "trunc", "sqrt", "log10", "log", "log1p", "exp", "expm1", + "asin", "acos", "atan", "asinh", "acosh", "atanh", "hypot", "atan2", + "pow", "fmod", "modf", 'frexp', 'ldexp', "exp2", "log2", "copysign", + "nextafter", "cbrt" + ] +C99_FUNCS_SINGLE = [f + 'f' for f in C99_FUNCS] +C99_FUNCS_EXTENDED = [f + 'l' for f in C99_FUNCS] +C99_COMPLEX_TYPES = [ + 'complex double', 'complex float', 'complex long double' + ] +C99_COMPLEX_FUNCS = [ + "cabs", "cacos", "cacosh", "carg", "casin", "casinh", "catan", + "catanh", "ccos", "ccosh", "cexp", "cimag", "clog", "conj", "cpow", + "cproj", "creal", "csin", "csinh", "csqrt", "ctan", "ctanh" + ] + +def fname2def(name): + return "HAVE_%s" % name.upper() + +def sym2def(symbol): + define = symbol.replace(' ', '') + return define.upper() + +def type2def(symbol): + define = symbol.replace(' ', '_') + return define.upper() + +# Code to detect long double representation taken from MPFR m4 macro +def check_long_double_representation(cmd): + cmd._check_compiler() + body = LONG_DOUBLE_REPRESENTATION_SRC % {'type': 'long double'} + + # Disable whole program optimization (the default on vs2015, with python 3.5+) + # which generates intermediary object files and prevents checking the + # float representation. + if sys.platform == "win32" and not mingw32(): + try: + cmd.compiler.compile_options.remove("/GL") + except (AttributeError, ValueError): + pass + + # Disable multi-file interprocedural optimization in the Intel compiler on Linux + # which generates intermediary object files and prevents checking the + # float representation. + elif (sys.platform != "win32" + and cmd.compiler.compiler_type.startswith('intel') + and '-ipo' in cmd.compiler.cc_exe): + newcompiler = cmd.compiler.cc_exe.replace(' -ipo', '') + cmd.compiler.set_executables( + compiler=newcompiler, + compiler_so=newcompiler, + compiler_cxx=newcompiler, + linker_exe=newcompiler, + linker_so=newcompiler + ' -shared' + ) + + # We need to use _compile because we need the object filename + src, obj = cmd._compile(body, None, None, 'c') + try: + ltype = long_double_representation(pyod(obj)) + return ltype + except ValueError: + # try linking to support CC="gcc -flto" or icc -ipo + # struct needs to be volatile so it isn't optimized away + # additionally "clang -flto" requires the foo struct to be used + body = body.replace('struct', 'volatile struct') + body += "int main(void) { return foo.before[0]; }\n" + src, obj = cmd._compile(body, None, None, 'c') + cmd.temp_files.append("_configtest") + cmd.compiler.link_executable([obj], "_configtest") + ltype = long_double_representation(pyod("_configtest")) + return ltype + finally: + cmd._clean() + +LONG_DOUBLE_REPRESENTATION_SRC = r""" +/* "before" is 16 bytes to ensure there's no padding between it and "x". + * We're not expecting any "long double" bigger than 16 bytes or with + * alignment requirements stricter than 16 bytes. */ +typedef %(type)s test_type; + +struct { + char before[16]; + test_type x; + char after[8]; +} foo = { + { '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\001', '\043', '\105', '\147', '\211', '\253', '\315', '\357' }, + -123456789.0, + { '\376', '\334', '\272', '\230', '\166', '\124', '\062', '\020' } +}; +""" + +def pyod(filename): + """Python implementation of the od UNIX utility (od -b, more exactly). + + Parameters + ---------- + filename : str + name of the file to get the dump from. + + Returns + ------- + out : seq + list of lines of od output + + Notes + ----- + We only implement enough to get the necessary information for long double + representation, this is not intended as a compatible replacement for od. + """ + out = [] + with open(filename, 'rb') as fid: + yo2 = [oct(o)[2:] for o in fid.read()] + for i in range(0, len(yo2), 16): + line = ['%07d' % int(oct(i)[2:])] + line.extend(['%03d' % int(c) for c in yo2[i:i+16]]) + out.append(" ".join(line)) + return out + + +_BEFORE_SEQ = ['000', '000', '000', '000', '000', '000', '000', '000', + '001', '043', '105', '147', '211', '253', '315', '357'] +_AFTER_SEQ = ['376', '334', '272', '230', '166', '124', '062', '020'] + +_IEEE_DOUBLE_BE = ['301', '235', '157', '064', '124', '000', '000', '000'] +_IEEE_DOUBLE_LE = _IEEE_DOUBLE_BE[::-1] +_INTEL_EXTENDED_12B = ['000', '000', '000', '000', '240', '242', '171', '353', + '031', '300', '000', '000'] +_INTEL_EXTENDED_16B = ['000', '000', '000', '000', '240', '242', '171', '353', + '031', '300', '000', '000', '000', '000', '000', '000'] +_MOTOROLA_EXTENDED_12B = ['300', '031', '000', '000', '353', '171', + '242', '240', '000', '000', '000', '000'] +_IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000', + '000', '000', '000', '000', '000', '000', '000', '000'] +_IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1] +_IBM_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] + + ['000'] * 8) +_IBM_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] + + ['000'] * 8) + +def long_double_representation(lines): + """Given a binary dump as given by GNU od -b, look for long double + representation.""" + + # Read contains a list of 32 items, each item is a byte (in octal + # representation, as a string). We 'slide' over the output until read is of + # the form before_seq + content + after_sequence, where content is the long double + # representation: + # - content is 12 bytes: 80 bits Intel representation + # - content is 16 bytes: 80 bits Intel representation (64 bits) or quad precision + # - content is 8 bytes: same as double (not implemented yet) + read = [''] * 32 + saw = None + for line in lines: + # we skip the first word, as od -b output an index at the beginning of + # each line + for w in line.split()[1:]: + read.pop(0) + read.append(w) + + # If the end of read is equal to the after_sequence, read contains + # the long double + if read[-8:] == _AFTER_SEQ: + saw = copy.copy(read) + # if the content was 12 bytes, we only have 32 - 8 - 12 = 12 + # "before" bytes. In other words the first 4 "before" bytes went + # past the sliding window. + if read[:12] == _BEFORE_SEQ[4:]: + if read[12:-8] == _INTEL_EXTENDED_12B: + return 'INTEL_EXTENDED_12_BYTES_LE' + if read[12:-8] == _MOTOROLA_EXTENDED_12B: + return 'MOTOROLA_EXTENDED_12_BYTES_BE' + # if the content was 16 bytes, we are left with 32-8-16 = 16 + # "before" bytes, so 8 went past the sliding window. + elif read[:8] == _BEFORE_SEQ[8:]: + if read[8:-8] == _INTEL_EXTENDED_16B: + return 'INTEL_EXTENDED_16_BYTES_LE' + elif read[8:-8] == _IEEE_QUAD_PREC_BE: + return 'IEEE_QUAD_BE' + elif read[8:-8] == _IEEE_QUAD_PREC_LE: + return 'IEEE_QUAD_LE' + elif read[8:-8] == _IBM_DOUBLE_DOUBLE_LE: + return 'IBM_DOUBLE_DOUBLE_LE' + elif read[8:-8] == _IBM_DOUBLE_DOUBLE_BE: + return 'IBM_DOUBLE_DOUBLE_BE' + # if the content was 8 bytes, left with 32-8-8 = 16 bytes + elif read[:16] == _BEFORE_SEQ: + if read[16:-8] == _IEEE_DOUBLE_LE: + return 'IEEE_DOUBLE_LE' + elif read[16:-8] == _IEEE_DOUBLE_BE: + return 'IEEE_DOUBLE_BE' + + if saw is not None: + raise ValueError("Unrecognized format (%s)" % saw) + else: + # We never detected the after_sequence + raise ValueError("Could not lock sequences (%s)" % saw) + + +def check_for_right_shift_internal_compiler_error(cmd): + """ + On our arm CI, this fails with an internal compilation error + + The failure looks like the following, and can be reproduced on ARM64 GCC 5.4: + + : In function 'right_shift': + :4:20: internal compiler error: in expand_shift_1, at expmed.c:2349 + ip1[i] = ip1[i] >> in2; + ^ + Please submit a full bug report, + with preprocessed source if appropriate. + See for instructions. + Compiler returned: 1 + + This function returns True if this compiler bug is present, and we need to + turn off optimization for the function + """ + cmd._check_compiler() + has_optimize = cmd.try_compile(textwrap.dedent("""\ + __attribute__((optimize("O3"))) void right_shift() {} + """), None, None) + if not has_optimize: + return False + + no_err = cmd.try_compile(textwrap.dedent("""\ + typedef long the_type; /* fails also for unsigned and long long */ + __attribute__((optimize("O3"))) void right_shift(the_type in2, the_type *ip1, int n) { + for (int i = 0; i < n; i++) { + if (in2 < (the_type)sizeof(the_type) * 8) { + ip1[i] = ip1[i] >> in2; + } + } + } + """), None, None) + return not no_err diff --git a/.local/lib/python3.8/site-packages/numpy/core/shape_base.py b/.local/lib/python3.8/site-packages/numpy/core/shape_base.py new file mode 100644 index 0000000..a81a04f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/shape_base.py @@ -0,0 +1,900 @@ +__all__ = ['atleast_1d', 'atleast_2d', 'atleast_3d', 'block', 'hstack', + 'stack', 'vstack'] + +import functools +import itertools +import operator +import warnings + +from . import numeric as _nx +from . import overrides +from .multiarray import array, asanyarray, normalize_axis_index +from . import fromnumeric as _from_nx + + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + + +def _atleast_1d_dispatcher(*arys): + return arys + + +@array_function_dispatch(_atleast_1d_dispatcher) +def atleast_1d(*arys): + """ + Convert inputs to arrays with at least one dimension. + + Scalar inputs are converted to 1-dimensional arrays, whilst + higher-dimensional inputs are preserved. + + Parameters + ---------- + arys1, arys2, ... : array_like + One or more input arrays. + + Returns + ------- + ret : ndarray + An array, or list of arrays, each with ``a.ndim >= 1``. + Copies are made only if necessary. + + See Also + -------- + atleast_2d, atleast_3d + + Examples + -------- + >>> np.atleast_1d(1.0) + array([1.]) + + >>> x = np.arange(9.0).reshape(3,3) + >>> np.atleast_1d(x) + array([[0., 1., 2.], + [3., 4., 5.], + [6., 7., 8.]]) + >>> np.atleast_1d(x) is x + True + + >>> np.atleast_1d(1, [3, 4]) + [array([1]), array([3, 4])] + + """ + res = [] + for ary in arys: + ary = asanyarray(ary) + if ary.ndim == 0: + result = ary.reshape(1) + else: + result = ary + res.append(result) + if len(res) == 1: + return res[0] + else: + return res + + +def _atleast_2d_dispatcher(*arys): + return arys + + +@array_function_dispatch(_atleast_2d_dispatcher) +def atleast_2d(*arys): + """ + View inputs as arrays with at least two dimensions. + + Parameters + ---------- + arys1, arys2, ... : array_like + One or more array-like sequences. Non-array inputs are converted + to arrays. Arrays that already have two or more dimensions are + preserved. + + Returns + ------- + res, res2, ... : ndarray + An array, or list of arrays, each with ``a.ndim >= 2``. + Copies are avoided where possible, and views with two or more + dimensions are returned. + + See Also + -------- + atleast_1d, atleast_3d + + Examples + -------- + >>> np.atleast_2d(3.0) + array([[3.]]) + + >>> x = np.arange(3.0) + >>> np.atleast_2d(x) + array([[0., 1., 2.]]) + >>> np.atleast_2d(x).base is x + True + + >>> np.atleast_2d(1, [1, 2], [[1, 2]]) + [array([[1]]), array([[1, 2]]), array([[1, 2]])] + + """ + res = [] + for ary in arys: + ary = asanyarray(ary) + if ary.ndim == 0: + result = ary.reshape(1, 1) + elif ary.ndim == 1: + result = ary[_nx.newaxis, :] + else: + result = ary + res.append(result) + if len(res) == 1: + return res[0] + else: + return res + + +def _atleast_3d_dispatcher(*arys): + return arys + + +@array_function_dispatch(_atleast_3d_dispatcher) +def atleast_3d(*arys): + """ + View inputs as arrays with at least three dimensions. + + Parameters + ---------- + arys1, arys2, ... : array_like + One or more array-like sequences. Non-array inputs are converted to + arrays. Arrays that already have three or more dimensions are + preserved. + + Returns + ------- + res1, res2, ... : ndarray + An array, or list of arrays, each with ``a.ndim >= 3``. Copies are + avoided where possible, and views with three or more dimensions are + returned. For example, a 1-D array of shape ``(N,)`` becomes a view + of shape ``(1, N, 1)``, and a 2-D array of shape ``(M, N)`` becomes a + view of shape ``(M, N, 1)``. + + See Also + -------- + atleast_1d, atleast_2d + + Examples + -------- + >>> np.atleast_3d(3.0) + array([[[3.]]]) + + >>> x = np.arange(3.0) + >>> np.atleast_3d(x).shape + (1, 3, 1) + + >>> x = np.arange(12.0).reshape(4,3) + >>> np.atleast_3d(x).shape + (4, 3, 1) + >>> np.atleast_3d(x).base is x.base # x is a reshape, so not base itself + True + + >>> for arr in np.atleast_3d([1, 2], [[1, 2]], [[[1, 2]]]): + ... print(arr, arr.shape) # doctest: +SKIP + ... + [[[1] + [2]]] (1, 2, 1) + [[[1] + [2]]] (1, 2, 1) + [[[1 2]]] (1, 1, 2) + + """ + res = [] + for ary in arys: + ary = asanyarray(ary) + if ary.ndim == 0: + result = ary.reshape(1, 1, 1) + elif ary.ndim == 1: + result = ary[_nx.newaxis, :, _nx.newaxis] + elif ary.ndim == 2: + result = ary[:, :, _nx.newaxis] + else: + result = ary + res.append(result) + if len(res) == 1: + return res[0] + else: + return res + + +def _arrays_for_stack_dispatcher(arrays, stacklevel=4): + if not hasattr(arrays, '__getitem__') and hasattr(arrays, '__iter__'): + warnings.warn('arrays to stack must be passed as a "sequence" type ' + 'such as list or tuple. Support for non-sequence ' + 'iterables such as generators is deprecated as of ' + 'NumPy 1.16 and will raise an error in the future.', + FutureWarning, stacklevel=stacklevel) + return () + return arrays + + +def _vhstack_dispatcher(tup): + return _arrays_for_stack_dispatcher(tup) + + +@array_function_dispatch(_vhstack_dispatcher) +def vstack(tup): + """ + Stack arrays in sequence vertically (row wise). + + This is equivalent to concatenation along the first axis after 1-D arrays + of shape `(N,)` have been reshaped to `(1,N)`. Rebuilds arrays divided by + `vsplit`. + + This function makes most sense for arrays with up to 3 dimensions. For + instance, for pixel-data with a height (first axis), width (second axis), + and r/g/b channels (third axis). The functions `concatenate`, `stack` and + `block` provide more general stacking and concatenation operations. + + Parameters + ---------- + tup : sequence of ndarrays + The arrays must have the same shape along all but the first axis. + 1-D arrays must have the same length. + + Returns + ------- + stacked : ndarray + The array formed by stacking the given arrays, will be at least 2-D. + + See Also + -------- + concatenate : Join a sequence of arrays along an existing axis. + stack : Join a sequence of arrays along a new axis. + block : Assemble an nd-array from nested lists of blocks. + hstack : Stack arrays in sequence horizontally (column wise). + dstack : Stack arrays in sequence depth wise (along third axis). + column_stack : Stack 1-D arrays as columns into a 2-D array. + vsplit : Split an array into multiple sub-arrays vertically (row-wise). + + Examples + -------- + >>> a = np.array([1, 2, 3]) + >>> b = np.array([4, 5, 6]) + >>> np.vstack((a,b)) + array([[1, 2, 3], + [4, 5, 6]]) + + >>> a = np.array([[1], [2], [3]]) + >>> b = np.array([[4], [5], [6]]) + >>> np.vstack((a,b)) + array([[1], + [2], + [3], + [4], + [5], + [6]]) + + """ + if not overrides.ARRAY_FUNCTION_ENABLED: + # raise warning if necessary + _arrays_for_stack_dispatcher(tup, stacklevel=2) + arrs = atleast_2d(*tup) + if not isinstance(arrs, list): + arrs = [arrs] + return _nx.concatenate(arrs, 0) + + +@array_function_dispatch(_vhstack_dispatcher) +def hstack(tup): + """ + Stack arrays in sequence horizontally (column wise). + + This is equivalent to concatenation along the second axis, except for 1-D + arrays where it concatenates along the first axis. Rebuilds arrays divided + by `hsplit`. + + This function makes most sense for arrays with up to 3 dimensions. For + instance, for pixel-data with a height (first axis), width (second axis), + and r/g/b channels (third axis). The functions `concatenate`, `stack` and + `block` provide more general stacking and concatenation operations. + + Parameters + ---------- + tup : sequence of ndarrays + The arrays must have the same shape along all but the second axis, + except 1-D arrays which can be any length. + + Returns + ------- + stacked : ndarray + The array formed by stacking the given arrays. + + See Also + -------- + concatenate : Join a sequence of arrays along an existing axis. + stack : Join a sequence of arrays along a new axis. + block : Assemble an nd-array from nested lists of blocks. + vstack : Stack arrays in sequence vertically (row wise). + dstack : Stack arrays in sequence depth wise (along third axis). + column_stack : Stack 1-D arrays as columns into a 2-D array. + hsplit : Split an array into multiple sub-arrays horizontally (column-wise). + + Examples + -------- + >>> a = np.array((1,2,3)) + >>> b = np.array((4,5,6)) + >>> np.hstack((a,b)) + array([1, 2, 3, 4, 5, 6]) + >>> a = np.array([[1],[2],[3]]) + >>> b = np.array([[4],[5],[6]]) + >>> np.hstack((a,b)) + array([[1, 4], + [2, 5], + [3, 6]]) + + """ + if not overrides.ARRAY_FUNCTION_ENABLED: + # raise warning if necessary + _arrays_for_stack_dispatcher(tup, stacklevel=2) + + arrs = atleast_1d(*tup) + if not isinstance(arrs, list): + arrs = [arrs] + # As a special case, dimension 0 of 1-dimensional arrays is "horizontal" + if arrs and arrs[0].ndim == 1: + return _nx.concatenate(arrs, 0) + else: + return _nx.concatenate(arrs, 1) + + +def _stack_dispatcher(arrays, axis=None, out=None): + arrays = _arrays_for_stack_dispatcher(arrays, stacklevel=6) + if out is not None: + # optimize for the typical case where only arrays is provided + arrays = list(arrays) + arrays.append(out) + return arrays + + +@array_function_dispatch(_stack_dispatcher) +def stack(arrays, axis=0, out=None): + """ + Join a sequence of arrays along a new axis. + + The ``axis`` parameter specifies the index of the new axis in the + dimensions of the result. For example, if ``axis=0`` it will be the first + dimension and if ``axis=-1`` it will be the last dimension. + + .. versionadded:: 1.10.0 + + Parameters + ---------- + arrays : sequence of array_like + Each array must have the same shape. + + axis : int, optional + The axis in the result array along which the input arrays are stacked. + + out : ndarray, optional + If provided, the destination to place the result. The shape must be + correct, matching that of what stack would have returned if no + out argument were specified. + + Returns + ------- + stacked : ndarray + The stacked array has one more dimension than the input arrays. + + See Also + -------- + concatenate : Join a sequence of arrays along an existing axis. + block : Assemble an nd-array from nested lists of blocks. + split : Split array into a list of multiple sub-arrays of equal size. + + Examples + -------- + >>> arrays = [np.random.randn(3, 4) for _ in range(10)] + >>> np.stack(arrays, axis=0).shape + (10, 3, 4) + + >>> np.stack(arrays, axis=1).shape + (3, 10, 4) + + >>> np.stack(arrays, axis=2).shape + (3, 4, 10) + + >>> a = np.array([1, 2, 3]) + >>> b = np.array([4, 5, 6]) + >>> np.stack((a, b)) + array([[1, 2, 3], + [4, 5, 6]]) + + >>> np.stack((a, b), axis=-1) + array([[1, 4], + [2, 5], + [3, 6]]) + + """ + if not overrides.ARRAY_FUNCTION_ENABLED: + # raise warning if necessary + _arrays_for_stack_dispatcher(arrays, stacklevel=2) + + arrays = [asanyarray(arr) for arr in arrays] + if not arrays: + raise ValueError('need at least one array to stack') + + shapes = {arr.shape for arr in arrays} + if len(shapes) != 1: + raise ValueError('all input arrays must have the same shape') + + result_ndim = arrays[0].ndim + 1 + axis = normalize_axis_index(axis, result_ndim) + + sl = (slice(None),) * axis + (_nx.newaxis,) + expanded_arrays = [arr[sl] for arr in arrays] + return _nx.concatenate(expanded_arrays, axis=axis, out=out) + + +# Internal functions to eliminate the overhead of repeated dispatch in one of +# the two possible paths inside np.block. +# Use getattr to protect against __array_function__ being disabled. +_size = getattr(_from_nx.size, '__wrapped__', _from_nx.size) +_ndim = getattr(_from_nx.ndim, '__wrapped__', _from_nx.ndim) +_concatenate = getattr(_from_nx.concatenate, '__wrapped__', _from_nx.concatenate) + + +def _block_format_index(index): + """ + Convert a list of indices ``[0, 1, 2]`` into ``"arrays[0][1][2]"``. + """ + idx_str = ''.join('[{}]'.format(i) for i in index if i is not None) + return 'arrays' + idx_str + + +def _block_check_depths_match(arrays, parent_index=[]): + """ + Recursive function checking that the depths of nested lists in `arrays` + all match. Mismatch raises a ValueError as described in the block + docstring below. + + The entire index (rather than just the depth) needs to be calculated + for each innermost list, in case an error needs to be raised, so that + the index of the offending list can be printed as part of the error. + + Parameters + ---------- + arrays : nested list of arrays + The arrays to check + parent_index : list of int + The full index of `arrays` within the nested lists passed to + `_block_check_depths_match` at the top of the recursion. + + Returns + ------- + first_index : list of int + The full index of an element from the bottom of the nesting in + `arrays`. If any element at the bottom is an empty list, this will + refer to it, and the last index along the empty axis will be None. + max_arr_ndim : int + The maximum of the ndims of the arrays nested in `arrays`. + final_size: int + The number of elements in the final array. This is used the motivate + the choice of algorithm used using benchmarking wisdom. + + """ + if type(arrays) is tuple: + # not strictly necessary, but saves us from: + # - more than one way to do things - no point treating tuples like + # lists + # - horribly confusing behaviour that results when tuples are + # treated like ndarray + raise TypeError( + '{} is a tuple. ' + 'Only lists can be used to arrange blocks, and np.block does ' + 'not allow implicit conversion from tuple to ndarray.'.format( + _block_format_index(parent_index) + ) + ) + elif type(arrays) is list and len(arrays) > 0: + idxs_ndims = (_block_check_depths_match(arr, parent_index + [i]) + for i, arr in enumerate(arrays)) + + first_index, max_arr_ndim, final_size = next(idxs_ndims) + for index, ndim, size in idxs_ndims: + final_size += size + if ndim > max_arr_ndim: + max_arr_ndim = ndim + if len(index) != len(first_index): + raise ValueError( + "List depths are mismatched. First element was at depth " + "{}, but there is an element at depth {} ({})".format( + len(first_index), + len(index), + _block_format_index(index) + ) + ) + # propagate our flag that indicates an empty list at the bottom + if index[-1] is None: + first_index = index + + return first_index, max_arr_ndim, final_size + elif type(arrays) is list and len(arrays) == 0: + # We've 'bottomed out' on an empty list + return parent_index + [None], 0, 0 + else: + # We've 'bottomed out' - arrays is either a scalar or an array + size = _size(arrays) + return parent_index, _ndim(arrays), size + + +def _atleast_nd(a, ndim): + # Ensures `a` has at least `ndim` dimensions by prepending + # ones to `a.shape` as necessary + return array(a, ndmin=ndim, copy=False, subok=True) + + +def _accumulate(values): + return list(itertools.accumulate(values)) + + +def _concatenate_shapes(shapes, axis): + """Given array shapes, return the resulting shape and slices prefixes. + + These help in nested concatenation. + + Returns + ------- + shape: tuple of int + This tuple satisfies: + ``` + shape, _ = _concatenate_shapes([arr.shape for shape in arrs], axis) + shape == concatenate(arrs, axis).shape + ``` + + slice_prefixes: tuple of (slice(start, end), ) + For a list of arrays being concatenated, this returns the slice + in the larger array at axis that needs to be sliced into. + + For example, the following holds: + ``` + ret = concatenate([a, b, c], axis) + _, (sl_a, sl_b, sl_c) = concatenate_slices([a, b, c], axis) + + ret[(slice(None),) * axis + sl_a] == a + ret[(slice(None),) * axis + sl_b] == b + ret[(slice(None),) * axis + sl_c] == c + ``` + + These are called slice prefixes since they are used in the recursive + blocking algorithm to compute the left-most slices during the + recursion. Therefore, they must be prepended to rest of the slice + that was computed deeper in the recursion. + + These are returned as tuples to ensure that they can quickly be added + to existing slice tuple without creating a new tuple every time. + + """ + # Cache a result that will be reused. + shape_at_axis = [shape[axis] for shape in shapes] + + # Take a shape, any shape + first_shape = shapes[0] + first_shape_pre = first_shape[:axis] + first_shape_post = first_shape[axis+1:] + + if any(shape[:axis] != first_shape_pre or + shape[axis+1:] != first_shape_post for shape in shapes): + raise ValueError( + 'Mismatched array shapes in block along axis {}.'.format(axis)) + + shape = (first_shape_pre + (sum(shape_at_axis),) + first_shape[axis+1:]) + + offsets_at_axis = _accumulate(shape_at_axis) + slice_prefixes = [(slice(start, end),) + for start, end in zip([0] + offsets_at_axis, + offsets_at_axis)] + return shape, slice_prefixes + + +def _block_info_recursion(arrays, max_depth, result_ndim, depth=0): + """ + Returns the shape of the final array, along with a list + of slices and a list of arrays that can be used for assignment inside the + new array + + Parameters + ---------- + arrays : nested list of arrays + The arrays to check + max_depth : list of int + The number of nested lists + result_ndim : int + The number of dimensions in thefinal array. + + Returns + ------- + shape : tuple of int + The shape that the final array will take on. + slices: list of tuple of slices + The slices into the full array required for assignment. These are + required to be prepended with ``(Ellipsis, )`` to obtain to correct + final index. + arrays: list of ndarray + The data to assign to each slice of the full array + + """ + if depth < max_depth: + shapes, slices, arrays = zip( + *[_block_info_recursion(arr, max_depth, result_ndim, depth+1) + for arr in arrays]) + + axis = result_ndim - max_depth + depth + shape, slice_prefixes = _concatenate_shapes(shapes, axis) + + # Prepend the slice prefix and flatten the slices + slices = [slice_prefix + the_slice + for slice_prefix, inner_slices in zip(slice_prefixes, slices) + for the_slice in inner_slices] + + # Flatten the array list + arrays = functools.reduce(operator.add, arrays) + + return shape, slices, arrays + else: + # We've 'bottomed out' - arrays is either a scalar or an array + # type(arrays) is not list + # Return the slice and the array inside a list to be consistent with + # the recursive case. + arr = _atleast_nd(arrays, result_ndim) + return arr.shape, [()], [arr] + + +def _block(arrays, max_depth, result_ndim, depth=0): + """ + Internal implementation of block based on repeated concatenation. + `arrays` is the argument passed to + block. `max_depth` is the depth of nested lists within `arrays` and + `result_ndim` is the greatest of the dimensions of the arrays in + `arrays` and the depth of the lists in `arrays` (see block docstring + for details). + """ + if depth < max_depth: + arrs = [_block(arr, max_depth, result_ndim, depth+1) + for arr in arrays] + return _concatenate(arrs, axis=-(max_depth-depth)) + else: + # We've 'bottomed out' - arrays is either a scalar or an array + # type(arrays) is not list + return _atleast_nd(arrays, result_ndim) + + +def _block_dispatcher(arrays): + # Use type(...) is list to match the behavior of np.block(), which special + # cases list specifically rather than allowing for generic iterables or + # tuple. Also, we know that list.__array_function__ will never exist. + if type(arrays) is list: + for subarrays in arrays: + yield from _block_dispatcher(subarrays) + else: + yield arrays + + +@array_function_dispatch(_block_dispatcher) +def block(arrays): + """ + Assemble an nd-array from nested lists of blocks. + + Blocks in the innermost lists are concatenated (see `concatenate`) along + the last dimension (-1), then these are concatenated along the + second-last dimension (-2), and so on until the outermost list is reached. + + Blocks can be of any dimension, but will not be broadcasted using the normal + rules. Instead, leading axes of size 1 are inserted, to make ``block.ndim`` + the same for all blocks. This is primarily useful for working with scalars, + and means that code like ``np.block([v, 1])`` is valid, where + ``v.ndim == 1``. + + When the nested list is two levels deep, this allows block matrices to be + constructed from their components. + + .. versionadded:: 1.13.0 + + Parameters + ---------- + arrays : nested list of array_like or scalars (but not tuples) + If passed a single ndarray or scalar (a nested list of depth 0), this + is returned unmodified (and not copied). + + Elements shapes must match along the appropriate axes (without + broadcasting), but leading 1s will be prepended to the shape as + necessary to make the dimensions match. + + Returns + ------- + block_array : ndarray + The array assembled from the given blocks. + + The dimensionality of the output is equal to the greatest of: + * the dimensionality of all the inputs + * the depth to which the input list is nested + + Raises + ------ + ValueError + * If list depths are mismatched - for instance, ``[[a, b], c]`` is + illegal, and should be spelt ``[[a, b], [c]]`` + * If lists are empty - for instance, ``[[a, b], []]`` + + See Also + -------- + concatenate : Join a sequence of arrays along an existing axis. + stack : Join a sequence of arrays along a new axis. + vstack : Stack arrays in sequence vertically (row wise). + hstack : Stack arrays in sequence horizontally (column wise). + dstack : Stack arrays in sequence depth wise (along third axis). + column_stack : Stack 1-D arrays as columns into a 2-D array. + vsplit : Split an array into multiple sub-arrays vertically (row-wise). + + Notes + ----- + + When called with only scalars, ``np.block`` is equivalent to an ndarray + call. So ``np.block([[1, 2], [3, 4]])`` is equivalent to + ``np.array([[1, 2], [3, 4]])``. + + This function does not enforce that the blocks lie on a fixed grid. + ``np.block([[a, b], [c, d]])`` is not restricted to arrays of the form:: + + AAAbb + AAAbb + cccDD + + But is also allowed to produce, for some ``a, b, c, d``:: + + AAAbb + AAAbb + cDDDD + + Since concatenation happens along the last axis first, `block` is _not_ + capable of producing the following directly:: + + AAAbb + cccbb + cccDD + + Matlab's "square bracket stacking", ``[A, B, ...; p, q, ...]``, is + equivalent to ``np.block([[A, B, ...], [p, q, ...]])``. + + Examples + -------- + The most common use of this function is to build a block matrix + + >>> A = np.eye(2) * 2 + >>> B = np.eye(3) * 3 + >>> np.block([ + ... [A, np.zeros((2, 3))], + ... [np.ones((3, 2)), B ] + ... ]) + array([[2., 0., 0., 0., 0.], + [0., 2., 0., 0., 0.], + [1., 1., 3., 0., 0.], + [1., 1., 0., 3., 0.], + [1., 1., 0., 0., 3.]]) + + With a list of depth 1, `block` can be used as `hstack` + + >>> np.block([1, 2, 3]) # hstack([1, 2, 3]) + array([1, 2, 3]) + + >>> a = np.array([1, 2, 3]) + >>> b = np.array([4, 5, 6]) + >>> np.block([a, b, 10]) # hstack([a, b, 10]) + array([ 1, 2, 3, 4, 5, 6, 10]) + + >>> A = np.ones((2, 2), int) + >>> B = 2 * A + >>> np.block([A, B]) # hstack([A, B]) + array([[1, 1, 2, 2], + [1, 1, 2, 2]]) + + With a list of depth 2, `block` can be used in place of `vstack`: + + >>> a = np.array([1, 2, 3]) + >>> b = np.array([4, 5, 6]) + >>> np.block([[a], [b]]) # vstack([a, b]) + array([[1, 2, 3], + [4, 5, 6]]) + + >>> A = np.ones((2, 2), int) + >>> B = 2 * A + >>> np.block([[A], [B]]) # vstack([A, B]) + array([[1, 1], + [1, 1], + [2, 2], + [2, 2]]) + + It can also be used in places of `atleast_1d` and `atleast_2d` + + >>> a = np.array(0) + >>> b = np.array([1]) + >>> np.block([a]) # atleast_1d(a) + array([0]) + >>> np.block([b]) # atleast_1d(b) + array([1]) + + >>> np.block([[a]]) # atleast_2d(a) + array([[0]]) + >>> np.block([[b]]) # atleast_2d(b) + array([[1]]) + + + """ + arrays, list_ndim, result_ndim, final_size = _block_setup(arrays) + + # It was found through benchmarking that making an array of final size + # around 256x256 was faster by straight concatenation on a + # i7-7700HQ processor and dual channel ram 2400MHz. + # It didn't seem to matter heavily on the dtype used. + # + # A 2D array using repeated concatenation requires 2 copies of the array. + # + # The fastest algorithm will depend on the ratio of CPU power to memory + # speed. + # One can monitor the results of the benchmark + # https://pv.github.io/numpy-bench/#bench_shape_base.Block2D.time_block2d + # to tune this parameter until a C version of the `_block_info_recursion` + # algorithm is implemented which would likely be faster than the python + # version. + if list_ndim * final_size > (2 * 512 * 512): + return _block_slicing(arrays, list_ndim, result_ndim) + else: + return _block_concatenate(arrays, list_ndim, result_ndim) + + +# These helper functions are mostly used for testing. +# They allow us to write tests that directly call `_block_slicing` +# or `_block_concatenate` without blocking large arrays to force the wisdom +# to trigger the desired path. +def _block_setup(arrays): + """ + Returns + (`arrays`, list_ndim, result_ndim, final_size) + """ + bottom_index, arr_ndim, final_size = _block_check_depths_match(arrays) + list_ndim = len(bottom_index) + if bottom_index and bottom_index[-1] is None: + raise ValueError( + 'List at {} cannot be empty'.format( + _block_format_index(bottom_index) + ) + ) + result_ndim = max(arr_ndim, list_ndim) + return arrays, list_ndim, result_ndim, final_size + + +def _block_slicing(arrays, list_ndim, result_ndim): + shape, slices, arrays = _block_info_recursion( + arrays, list_ndim, result_ndim) + dtype = _nx.result_type(*[arr.dtype for arr in arrays]) + + # Test preferring F only in the case that all input arrays are F + F_order = all(arr.flags['F_CONTIGUOUS'] for arr in arrays) + C_order = all(arr.flags['C_CONTIGUOUS'] for arr in arrays) + order = 'F' if F_order and not C_order else 'C' + result = _nx.empty(shape=shape, dtype=dtype, order=order) + # Note: In a c implementation, the function + # PyArray_CreateMultiSortedStridePerm could be used for more advanced + # guessing of the desired order. + + for the_slice, arr in zip(slices, arrays): + result[(Ellipsis,) + the_slice] = arr + return result + + +def _block_concatenate(arrays, list_ndim, result_ndim): + result = _block(arrays, list_ndim, result_ndim) + if list_ndim == 0: + # Catch an edge case where _block returns a view because + # `arrays` is a single numpy array and not a list of numpy arrays. + # This might copy scalars or lists twice, but this isn't a likely + # usecase for those interested in performance + result = result.copy() + return result diff --git a/.local/lib/python3.8/site-packages/numpy/core/shape_base.pyi b/.local/lib/python3.8/site-packages/numpy/core/shape_base.pyi new file mode 100644 index 0000000..159ad27 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/shape_base.pyi @@ -0,0 +1,66 @@ +from typing import TypeVar, overload, List, Sequence, Any, SupportsIndex + +from numpy import generic, dtype +from numpy.typing import ArrayLike, NDArray, _FiniteNestedSequence, _SupportsArray + +_SCT = TypeVar("_SCT", bound=generic) +_ArrayType = TypeVar("_ArrayType", bound=NDArray[Any]) + +_ArrayLike = _FiniteNestedSequence[_SupportsArray[dtype[_SCT]]] + +__all__: List[str] + +@overload +def atleast_1d(arys: _ArrayLike[_SCT], /) -> NDArray[_SCT]: ... +@overload +def atleast_1d(arys: ArrayLike, /) -> NDArray[Any]: ... +@overload +def atleast_1d(*arys: ArrayLike) -> List[NDArray[Any]]: ... + +@overload +def atleast_2d(arys: _ArrayLike[_SCT], /) -> NDArray[_SCT]: ... +@overload +def atleast_2d(arys: ArrayLike, /) -> NDArray[Any]: ... +@overload +def atleast_2d(*arys: ArrayLike) -> List[NDArray[Any]]: ... + +@overload +def atleast_3d(arys: _ArrayLike[_SCT], /) -> NDArray[_SCT]: ... +@overload +def atleast_3d(arys: ArrayLike, /) -> NDArray[Any]: ... +@overload +def atleast_3d(*arys: ArrayLike) -> List[NDArray[Any]]: ... + +@overload +def vstack(tup: Sequence[_ArrayLike[_SCT]]) -> NDArray[_SCT]: ... +@overload +def vstack(tup: Sequence[ArrayLike]) -> NDArray[Any]: ... + +@overload +def hstack(tup: Sequence[_ArrayLike[_SCT]]) -> NDArray[_SCT]: ... +@overload +def hstack(tup: Sequence[ArrayLike]) -> NDArray[Any]: ... + +@overload +def stack( + arrays: Sequence[_ArrayLike[_SCT]], + axis: SupportsIndex = ..., + out: None = ..., +) -> NDArray[_SCT]: ... +@overload +def stack( + arrays: Sequence[ArrayLike], + axis: SupportsIndex = ..., + out: None = ..., +) -> NDArray[Any]: ... +@overload +def stack( + arrays: Sequence[ArrayLike], + axis: SupportsIndex = ..., + out: _ArrayType = ..., +) -> _ArrayType: ... + +@overload +def block(arrays: _ArrayLike[_SCT]) -> NDArray[_SCT]: ... +@overload +def block(arrays: ArrayLike) -> NDArray[Any]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/core/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..54a72b4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/_locales.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/_locales.cpython-38.pyc new file mode 100644 index 0000000..cfc25d6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/_locales.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test__exceptions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test__exceptions.cpython-38.pyc new file mode 100644 index 0000000..ce92122 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test__exceptions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_abc.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_abc.cpython-38.pyc new file mode 100644 index 0000000..a73e248 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_abc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_api.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_api.cpython-38.pyc new file mode 100644 index 0000000..9abd45a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_argparse.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_argparse.cpython-38.pyc new file mode 100644 index 0000000..2debd48 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_argparse.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_array_coercion.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_array_coercion.cpython-38.pyc new file mode 100644 index 0000000..0fdd257 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_array_coercion.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_arraymethod.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_arraymethod.cpython-38.pyc new file mode 100644 index 0000000..5470dd9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_arraymethod.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_arrayprint.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_arrayprint.cpython-38.pyc new file mode 100644 index 0000000..c1e6a40 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_arrayprint.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_casting_unittests.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_casting_unittests.cpython-38.pyc new file mode 100644 index 0000000..ffdb35f Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_casting_unittests.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_conversion_utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_conversion_utils.cpython-38.pyc new file mode 100644 index 0000000..0ef36cf Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_conversion_utils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_cpu_dispatcher.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_cpu_dispatcher.cpython-38.pyc new file mode 100644 index 0000000..8a95358 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_cpu_dispatcher.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_cpu_features.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_cpu_features.cpython-38.pyc new file mode 100644 index 0000000..4d8d61f Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_cpu_features.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_custom_dtypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_custom_dtypes.cpython-38.pyc new file mode 100644 index 0000000..9991e83 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_custom_dtypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_cython.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_cython.cpython-38.pyc new file mode 100644 index 0000000..6ff555f Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_cython.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_datetime.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_datetime.cpython-38.pyc new file mode 100644 index 0000000..1a1296e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_datetime.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_defchararray.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_defchararray.cpython-38.pyc new file mode 100644 index 0000000..ce5b286 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_defchararray.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_deprecations.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_deprecations.cpython-38.pyc new file mode 100644 index 0000000..08f3da1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_deprecations.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_dlpack.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_dlpack.cpython-38.pyc new file mode 100644 index 0000000..4028f63 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_dlpack.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_dtype.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_dtype.cpython-38.pyc new file mode 100644 index 0000000..32c9731 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_dtype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_einsum.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_einsum.cpython-38.pyc new file mode 100644 index 0000000..323be07 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_einsum.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_errstate.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_errstate.cpython-38.pyc new file mode 100644 index 0000000..3f442fe Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_errstate.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_extint128.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_extint128.cpython-38.pyc new file mode 100644 index 0000000..6bf2259 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_extint128.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_function_base.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_function_base.cpython-38.pyc new file mode 100644 index 0000000..33bd1a5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_function_base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_getlimits.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_getlimits.cpython-38.pyc new file mode 100644 index 0000000..4dd6a1a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_getlimits.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_half.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_half.cpython-38.pyc new file mode 100644 index 0000000..27790f8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_half.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_hashtable.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_hashtable.cpython-38.pyc new file mode 100644 index 0000000..6e3813a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_hashtable.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_indexerrors.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_indexerrors.cpython-38.pyc new file mode 100644 index 0000000..2e72111 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_indexerrors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..a87f3d6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_item_selection.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_item_selection.cpython-38.pyc new file mode 100644 index 0000000..4653228 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_item_selection.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_limited_api.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_limited_api.cpython-38.pyc new file mode 100644 index 0000000..4fce845 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_limited_api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_longdouble.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_longdouble.cpython-38.pyc new file mode 100644 index 0000000..14026f8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_longdouble.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_machar.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_machar.cpython-38.pyc new file mode 100644 index 0000000..c40b292 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_machar.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_mem_overlap.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_mem_overlap.cpython-38.pyc new file mode 100644 index 0000000..4ef3ef8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_mem_overlap.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_mem_policy.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_mem_policy.cpython-38.pyc new file mode 100644 index 0000000..f466a44 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_mem_policy.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_memmap.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_memmap.cpython-38.pyc new file mode 100644 index 0000000..a7097c5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_memmap.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_multiarray.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_multiarray.cpython-38.pyc new file mode 100644 index 0000000..268e464 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_multiarray.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_nditer.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_nditer.cpython-38.pyc new file mode 100644 index 0000000..bc56f0a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_nditer.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_numeric.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_numeric.cpython-38.pyc new file mode 100644 index 0000000..1414122 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_numeric.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_numerictypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_numerictypes.cpython-38.pyc new file mode 100644 index 0000000..c5cb35e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_numerictypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_overrides.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_overrides.cpython-38.pyc new file mode 100644 index 0000000..f6b0602 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_overrides.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_print.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_print.cpython-38.pyc new file mode 100644 index 0000000..35c9d32 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_print.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_protocols.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_protocols.cpython-38.pyc new file mode 100644 index 0000000..fbd3f5d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_protocols.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_records.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_records.cpython-38.pyc new file mode 100644 index 0000000..bdbac9b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_records.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_regression.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_regression.cpython-38.pyc new file mode 100644 index 0000000..ba3b864 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_regression.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalar_ctors.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalar_ctors.cpython-38.pyc new file mode 100644 index 0000000..8a81d3e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalar_ctors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalar_methods.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalar_methods.cpython-38.pyc new file mode 100644 index 0000000..31641a9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalar_methods.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalarbuffer.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalarbuffer.cpython-38.pyc new file mode 100644 index 0000000..87fddd4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalarbuffer.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalarinherit.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalarinherit.cpython-38.pyc new file mode 100644 index 0000000..5934518 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalarinherit.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalarmath.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalarmath.cpython-38.pyc new file mode 100644 index 0000000..21c82e7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalarmath.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalarprint.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalarprint.cpython-38.pyc new file mode 100644 index 0000000..e1b4a03 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_scalarprint.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_shape_base.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_shape_base.cpython-38.pyc new file mode 100644 index 0000000..2f7f5c9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_shape_base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_simd.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_simd.cpython-38.pyc new file mode 100644 index 0000000..48411f4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_simd.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_simd_module.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_simd_module.cpython-38.pyc new file mode 100644 index 0000000..11b89a7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_simd_module.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_ufunc.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_ufunc.cpython-38.pyc new file mode 100644 index 0000000..2a1905a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_ufunc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_umath.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_umath.cpython-38.pyc new file mode 100644 index 0000000..1bb80b0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_umath.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_umath_accuracy.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_umath_accuracy.cpython-38.pyc new file mode 100644 index 0000000..65f34af Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_umath_accuracy.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_umath_complex.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_umath_complex.cpython-38.pyc new file mode 100644 index 0000000..7109ff4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_umath_complex.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_unicode.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_unicode.cpython-38.pyc new file mode 100644 index 0000000..50dca5f Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/__pycache__/test_unicode.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/_locales.py b/.local/lib/python3.8/site-packages/numpy/core/tests/_locales.py new file mode 100644 index 0000000..ce7b81f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/_locales.py @@ -0,0 +1,74 @@ +"""Provide class for testing in French locale + +""" +import sys +import locale + +import pytest + +__ALL__ = ['CommaDecimalPointLocale'] + + +def find_comma_decimal_point_locale(): + """See if platform has a decimal point as comma locale. + + Find a locale that uses a comma instead of a period as the + decimal point. + + Returns + ------- + old_locale: str + Locale when the function was called. + new_locale: {str, None) + First French locale found, None if none found. + + """ + if sys.platform == 'win32': + locales = ['FRENCH'] + else: + locales = ['fr_FR', 'fr_FR.UTF-8', 'fi_FI', 'fi_FI.UTF-8'] + + old_locale = locale.getlocale(locale.LC_NUMERIC) + new_locale = None + try: + for loc in locales: + try: + locale.setlocale(locale.LC_NUMERIC, loc) + new_locale = loc + break + except locale.Error: + pass + finally: + locale.setlocale(locale.LC_NUMERIC, locale=old_locale) + return old_locale, new_locale + + +class CommaDecimalPointLocale: + """Sets LC_NUMERIC to a locale with comma as decimal point. + + Classes derived from this class have setup and teardown methods that run + tests with locale.LC_NUMERIC set to a locale where commas (',') are used as + the decimal point instead of periods ('.'). On exit the locale is restored + to the initial locale. It also serves as context manager with the same + effect. If no such locale is available, the test is skipped. + + .. versionadded:: 1.15.0 + + """ + (cur_locale, tst_locale) = find_comma_decimal_point_locale() + + def setup(self): + if self.tst_locale is None: + pytest.skip("No French locale available") + locale.setlocale(locale.LC_NUMERIC, locale=self.tst_locale) + + def teardown(self): + locale.setlocale(locale.LC_NUMERIC, locale=self.cur_locale) + + def __enter__(self): + if self.tst_locale is None: + pytest.skip("No French locale available") + locale.setlocale(locale.LC_NUMERIC, locale=self.tst_locale) + + def __exit__(self, type, value, traceback): + locale.setlocale(locale.LC_NUMERIC, locale=self.cur_locale) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/astype_copy.pkl b/.local/lib/python3.8/site-packages/numpy/core/tests/data/astype_copy.pkl new file mode 100644 index 0000000..7397c97 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/data/astype_copy.pkl differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/generate_umath_validation_data.cpp b/.local/lib/python3.8/site-packages/numpy/core/tests/data/generate_umath_validation_data.cpp new file mode 100644 index 0000000..418eae6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/generate_umath_validation_data.cpp @@ -0,0 +1,170 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +struct ufunc { + std::string name; + double (*f32func)(double); + long double (*f64func)(long double); + float f32ulp; + float f64ulp; +}; + +template +T +RandomFloat(T a, T b) +{ + T random = ((T)rand()) / (T)RAND_MAX; + T diff = b - a; + T r = random * diff; + return a + r; +} + +template +void +append_random_array(std::vector &arr, T min, T max, size_t N) +{ + for (size_t ii = 0; ii < N; ++ii) + arr.emplace_back(RandomFloat(min, max)); +} + +template +std::vector +computeTrueVal(const std::vector &in, T2 (*mathfunc)(T2)) +{ + std::vector out; + for (T1 elem : in) { + T2 elem_d = (T2)elem; + T1 out_elem = (T1)mathfunc(elem_d); + out.emplace_back(out_elem); + } + return out; +} + +/* + * FP range: + * [-inf, -maxflt, -1., -minflt, -minden, 0., minden, minflt, 1., maxflt, inf] + */ + +#define MINDEN std::numeric_limits::denorm_min() +#define MINFLT std::numeric_limits::min() +#define MAXFLT std::numeric_limits::max() +#define INF std::numeric_limits::infinity() +#define qNAN std::numeric_limits::quiet_NaN() +#define sNAN std::numeric_limits::signaling_NaN() + +template +std::vector +generate_input_vector(std::string func) +{ + std::vector input = {MINDEN, -MINDEN, MINFLT, -MINFLT, MAXFLT, + -MAXFLT, INF, -INF, qNAN, sNAN, + -1.0, 1.0, 0.0, -0.0}; + + // [-1.0, 1.0] + if ((func == "arcsin") || (func == "arccos") || (func == "arctanh")) { + append_random_array(input, -1.0, 1.0, 700); + } + // (0.0, INF] + else if ((func == "log2") || (func == "log10")) { + append_random_array(input, 0.0, 1.0, 200); + append_random_array(input, MINDEN, MINFLT, 200); + append_random_array(input, MINFLT, 1.0, 200); + append_random_array(input, 1.0, MAXFLT, 200); + } + // (-1.0, INF] + else if (func == "log1p") { + append_random_array(input, -1.0, 1.0, 200); + append_random_array(input, -MINFLT, -MINDEN, 100); + append_random_array(input, -1.0, -MINFLT, 100); + append_random_array(input, MINDEN, MINFLT, 100); + append_random_array(input, MINFLT, 1.0, 100); + append_random_array(input, 1.0, MAXFLT, 100); + } + // [1.0, INF] + else if (func == "arccosh") { + append_random_array(input, 1.0, 2.0, 400); + append_random_array(input, 2.0, MAXFLT, 300); + } + // [-INF, INF] + else { + append_random_array(input, -1.0, 1.0, 100); + append_random_array(input, MINDEN, MINFLT, 100); + append_random_array(input, -MINFLT, -MINDEN, 100); + append_random_array(input, MINFLT, 1.0, 100); + append_random_array(input, -1.0, -MINFLT, 100); + append_random_array(input, 1.0, MAXFLT, 100); + append_random_array(input, -MAXFLT, -100.0, 100); + } + + std::random_shuffle(input.begin(), input.end()); + return input; +} + +int +main() +{ + srand(42); + std::vector umathfunc = { + {"sin", sin, sin, 2.37, 3.3}, + {"cos", cos, cos, 2.36, 3.38}, + {"tan", tan, tan, 3.91, 3.93}, + {"arcsin", asin, asin, 3.12, 2.55}, + {"arccos", acos, acos, 2.1, 1.67}, + {"arctan", atan, atan, 2.3, 2.52}, + {"sinh", sinh, sinh, 1.55, 1.89}, + {"cosh", cosh, cosh, 2.48, 1.97}, + {"tanh", tanh, tanh, 1.38, 1.19}, + {"arcsinh", asinh, asinh, 1.01, 1.48}, + {"arccosh", acosh, acosh, 1.16, 1.05}, + {"arctanh", atanh, atanh, 1.45, 1.46}, + {"cbrt", cbrt, cbrt, 1.94, 1.82}, + //{"exp",exp,exp,3.76,1.53}, + {"exp2", exp2, exp2, 1.01, 1.04}, + {"expm1", expm1, expm1, 2.62, 2.1}, + //{"log",log,log,1.84,1.67}, + {"log10", log10, log10, 3.5, 1.92}, + {"log1p", log1p, log1p, 1.96, 1.93}, + {"log2", log2, log2, 2.12, 1.84}, + }; + + for (int ii = 0; ii < umathfunc.size(); ++ii) { + // ignore sin/cos + if ((umathfunc[ii].name != "sin") && (umathfunc[ii].name != "cos")) { + std::string fileName = + "umath-validation-set-" + umathfunc[ii].name + ".csv"; + std::ofstream txtOut; + txtOut.open(fileName, std::ofstream::trunc); + txtOut << "dtype,input,output,ulperrortol" << std::endl; + + // Single Precision + auto f32in = generate_input_vector(umathfunc[ii].name); + auto f32out = computeTrueVal(f32in, + umathfunc[ii].f32func); + for (int jj = 0; jj < f32in.size(); ++jj) { + txtOut << "np.float32" << std::hex << ",0x" + << *reinterpret_cast(&f32in[jj]) << ",0x" + << *reinterpret_cast(&f32out[jj]) << "," + << ceil(umathfunc[ii].f32ulp) << std::endl; + } + + // Double Precision + auto f64in = generate_input_vector(umathfunc[ii].name); + auto f64out = computeTrueVal( + f64in, umathfunc[ii].f64func); + for (int jj = 0; jj < f64in.size(); ++jj) { + txtOut << "np.float64" << std::hex << ",0x" + << *reinterpret_cast(&f64in[jj]) << ",0x" + << *reinterpret_cast(&f64out[jj]) << "," + << ceil(umathfunc[ii].f64ulp) << std::endl; + } + txtOut.close(); + } + } + return 0; +} diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/recarray_from_file.fits b/.local/lib/python3.8/site-packages/numpy/core/tests/data/recarray_from_file.fits new file mode 100644 index 0000000..ca48ee8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/data/recarray_from_file.fits differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-README.txt b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-README.txt new file mode 100644 index 0000000..cfc9e41 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-README.txt @@ -0,0 +1,15 @@ +Steps to validate transcendental functions: +1) Add a file 'umath-validation-set-.txt', where ufuncname is name of + the function in NumPy you want to validate +2) The file should contain 4 columns: dtype,input,expected output,ulperror + a. dtype: one of np.float16, np.float32, np.float64 + b. input: floating point input to ufunc in hex. Example: 0x414570a4 + represents 12.340000152587890625 + c. expected output: floating point output for the corresponding input in hex. + This should be computed using a high(er) precision library and then rounded to + same format as the input. + d. ulperror: expected maximum ulp error of the function. This + should be same across all rows of the same dtype. Otherwise, the function is + tested for the maximum ulp error among all entries of that dtype. +3) Add file umath-validation-set-.txt to the test file test_umath_accuracy.py + which will then validate your ufunc. diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arccos.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arccos.csv new file mode 100644 index 0000000..6697ae9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arccos.csv @@ -0,0 +1,1429 @@ +dtype,input,output,ulperrortol +np.float32,0xbddd7f50,0x3fd6eec2,3 +np.float32,0xbe32a20c,0x3fdf8182,3 +np.float32,0xbf607c09,0x4028f84f,3 +np.float32,0x3f25d906,0x3f5db544,3 +np.float32,0x3f01cec8,0x3f84febf,3 +np.float32,0x3f1d5c6e,0x3f68a735,3 +np.float32,0xbf0cab89,0x4009c36d,3 +np.float32,0xbf176b40,0x400d0941,3 +np.float32,0x3f3248b2,0x3f4ce6d4,3 +np.float32,0x3f390b48,0x3f434e0d,3 +np.float32,0xbe261698,0x3fddea43,3 +np.float32,0x3f0e1154,0x3f7b848b,3 +np.float32,0xbf379a3c,0x4017b764,3 +np.float32,0xbeda6f2c,0x4000bd62,3 +np.float32,0xbf6a0c3f,0x402e5d5a,3 +np.float32,0x3ef1d700,0x3f8a17b7,3 +np.float32,0xbf6f4f65,0x4031d30d,3 +np.float32,0x3f2c9eee,0x3f54adfd,3 +np.float32,0x3f3cfb18,0x3f3d8a1e,3 +np.float32,0x3ba80800,0x3fc867d2,3 +np.float32,0x3e723b08,0x3faa7e4d,3 +np.float32,0xbf65820f,0x402bb054,3 +np.float32,0xbee64e7a,0x40026410,3 +np.float32,0x3cb15140,0x3fc64a87,3 +np.float32,0x3f193660,0x3f6ddf2a,3 +np.float32,0xbf0e5b52,0x400a44f7,3 +np.float32,0x3ed55f14,0x3f920a4b,3 +np.float32,0x3dd11a80,0x3fbbf85c,3 +np.float32,0xbf4f5c4b,0x4020f4f9,3 +np.float32,0x3f787532,0x3e792e87,3 +np.float32,0x3f40e6ac,0x3f37a74f,3 +np.float32,0x3f1c1318,0x3f6a47b6,3 +np.float32,0xbe3c48d8,0x3fe0bb70,3 +np.float32,0xbe94d4bc,0x3feed08e,3 +np.float32,0xbe5c3688,0x3fe4ce26,3 +np.float32,0xbf6fe026,0x403239cb,3 +np.float32,0x3ea5983c,0x3f9ee7bf,3 +np.float32,0x3f1471e6,0x3f73c5bb,3 +np.float32,0x3f0e2622,0x3f7b6b87,3 +np.float32,0xbf597180,0x40257ad1,3 +np.float32,0xbeb5321c,0x3ff75d34,3 +np.float32,0x3f5afcd2,0x3f0b6012,3 +np.float32,0xbef2ff88,0x40042e14,3 +np.float32,0xbedc747e,0x400104f5,3 +np.float32,0xbee0c2f4,0x40019dfc,3 +np.float32,0xbf152cd8,0x400c57dc,3 +np.float32,0xbf6cf9e2,0x40303bbe,3 +np.float32,0x3ed9cd74,0x3f90d1a1,3 +np.float32,0xbf754406,0x4036767f,3 +np.float32,0x3f59c5c2,0x3f0db42f,3 +np.float32,0x3f2eefd8,0x3f518684,3 +np.float32,0xbf156bf9,0x400c6b49,3 +np.float32,0xbd550790,0x3fcfb8dc,3 +np.float32,0x3ede58fc,0x3f8f8f77,3 +np.float32,0xbf00ac19,0x40063c4b,3 +np.float32,0x3f4d25ba,0x3f24280e,3 +np.float32,0xbe9568be,0x3feef73c,3 +np.float32,0x3f67d154,0x3ee05547,3 +np.float32,0x3f617226,0x3efcb4f4,3 +np.float32,0xbf3ab41a,0x4018d6cc,3 +np.float32,0xbf3186fe,0x401592cd,3 +np.float32,0x3de3ba50,0x3fbacca9,3 +np.float32,0x3e789f98,0x3fa9ab97,3 +np.float32,0x3f016e08,0x3f8536d8,3 +np.float32,0x3e8b618c,0x3fa5c571,3 +np.float32,0x3eff97bc,0x3f8628a9,3 +np.float32,0xbf6729f0,0x402ca32f,3 +np.float32,0xbebec146,0x3ff9eddc,3 +np.float32,0x3ddb2e60,0x3fbb563a,3 +np.float32,0x3caa8e40,0x3fc66595,3 +np.float32,0xbf5973f2,0x40257bfa,3 +np.float32,0xbdd82c70,0x3fd69916,3 +np.float32,0xbedf4c82,0x400169ef,3 +np.float32,0x3ef8f22c,0x3f881184,3 +np.float32,0xbf1d74d4,0x400eedc9,3 +np.float32,0x3f2e10a6,0x3f52b790,3 +np.float32,0xbf08ecc0,0x4008a628,3 +np.float32,0x3ecb7db4,0x3f94be9f,3 +np.float32,0xbf052ded,0x40078bfc,3 +np.float32,0x3f2ee78a,0x3f5191e4,3 +np.float32,0xbf56f4e1,0x40245194,3 +np.float32,0x3f600a3e,0x3f014a25,3 +np.float32,0x3f3836f8,0x3f44808b,3 +np.float32,0x3ecabfbc,0x3f94f25c,3 +np.float32,0x3c70f500,0x3fc72dec,3 +np.float32,0x3f17c444,0x3f6fabf0,3 +np.float32,0xbf4c22a5,0x401f9a09,3 +np.float32,0xbe4205dc,0x3fe1765a,3 +np.float32,0x3ea49138,0x3f9f2d36,3 +np.float32,0xbece0082,0x3ffe106b,3 +np.float32,0xbe387578,0x3fe03eef,3 +np.float32,0xbf2b6466,0x40137a30,3 +np.float32,0xbe9dadb2,0x3ff12204,3 +np.float32,0xbf56b3f2,0x402433bb,3 +np.float32,0xbdf9b4d8,0x3fd8b51f,3 +np.float32,0x3f58a596,0x3f0fd4b4,3 +np.float32,0xbedf5748,0x40016b6e,3 +np.float32,0x3f446442,0x3f32476f,3 +np.float32,0x3f5be886,0x3f099658,3 +np.float32,0x3ea1e44c,0x3f9fe1de,3 +np.float32,0xbf11e9b8,0x400b585f,3 +np.float32,0xbf231f8f,0x4010befb,3 +np.float32,0xbf4395ea,0x401c2dd0,3 +np.float32,0x3e9e7784,0x3fa0c8a6,3 +np.float32,0xbe255184,0x3fddd14c,3 +np.float32,0x3f70d25e,0x3eb13148,3 +np.float32,0x3f220cdc,0x3f62a722,3 +np.float32,0xbd027bf0,0x3fcd23e7,3 +np.float32,0x3e4ef8b8,0x3faf02d2,3 +np.float32,0xbf76fc6b,0x40380728,3 +np.float32,0xbf57e761,0x4024c1cd,3 +np.float32,0x3ed4fc20,0x3f922580,3 +np.float32,0xbf09b64a,0x4008e1db,3 +np.float32,0x3f21ca62,0x3f62fcf5,3 +np.float32,0xbe55f610,0x3fe40170,3 +np.float32,0xbc0def80,0x3fca2bbb,3 +np.float32,0xbebc8764,0x3ff9547b,3 +np.float32,0x3ec1b200,0x3f9766d1,3 +np.float32,0xbf4ee44e,0x4020c1ee,3 +np.float32,0xbea85852,0x3ff3f22a,3 +np.float32,0xbf195c0c,0x400da3d3,3 +np.float32,0xbf754b5d,0x40367ce8,3 +np.float32,0xbdcbfe50,0x3fd5d52b,3 +np.float32,0xbf1adb87,0x400e1be3,3 +np.float32,0xbf6f8491,0x4031f898,3 +np.float32,0xbf6f9ae7,0x4032086e,3 +np.float32,0xbf52b3f0,0x40226790,3 +np.float32,0xbf698452,0x402e09f4,3 +np.float32,0xbf43dc9a,0x401c493a,3 +np.float32,0xbf165f7f,0x400cb664,3 +np.float32,0x3e635468,0x3fac682f,3 +np.float32,0xbe8cf2b6,0x3fecc28a,3 +np.float32,0x7f7fffff,0x7fc00000,3 +np.float32,0xbf4c6513,0x401fb597,3 +np.float32,0xbf02b8f8,0x4006d47e,3 +np.float32,0x3ed3759c,0x3f9290c8,3 +np.float32,0xbf2a7a5f,0x40132b98,3 +np.float32,0xbae65000,0x3fc9496f,3 +np.float32,0x3f65f5ea,0x3ee8ef07,3 +np.float32,0xbe7712fc,0x3fe84106,3 +np.float32,0xbb9ff700,0x3fc9afd2,3 +np.float32,0x3d8d87a0,0x3fc03592,3 +np.float32,0xbefc921c,0x40058c23,3 +np.float32,0xbf286566,0x401279d8,3 +np.float32,0x3f53857e,0x3f192eaf,3 +np.float32,0xbee9b0f4,0x4002dd90,3 +np.float32,0x3f4041f8,0x3f38a14a,3 +np.float32,0x3f54ea96,0x3f16b02d,3 +np.float32,0x3ea50ef8,0x3f9f0c01,3 +np.float32,0xbeaad2dc,0x3ff49a4a,3 +np.float32,0xbec428c8,0x3ffb636f,3 +np.float32,0xbda46178,0x3fd358c7,3 +np.float32,0xbefacfc4,0x40054b7f,3 +np.float32,0xbf7068f9,0x40329c85,3 +np.float32,0x3f70b850,0x3eb1caa7,3 +np.float32,0x7fa00000,0x7fe00000,3 +np.float32,0x80000000,0x3fc90fdb,3 +np.float32,0x3f68d5c8,0x3edb7cf3,3 +np.float32,0x3d9443d0,0x3fbfc98a,3 +np.float32,0xff7fffff,0x7fc00000,3 +np.float32,0xbeee7ba8,0x40038a5e,3 +np.float32,0xbf0aaaba,0x40092a73,3 +np.float32,0x3f36a4e8,0x3f46c0ee,3 +np.float32,0x3ed268e4,0x3f92da82,3 +np.float32,0xbee6002c,0x4002591b,3 +np.float32,0xbe8f2752,0x3fed5576,3 +np.float32,0x3f525912,0x3f1b40e0,3 +np.float32,0xbe8e151e,0x3fed0e16,3 +np.float32,0x1,0x3fc90fdb,3 +np.float32,0x3ee23b84,0x3f8e7ae1,3 +np.float32,0xbf5961ca,0x40257361,3 +np.float32,0x3f6bbca0,0x3ecd14cd,3 +np.float32,0x3e27b230,0x3fb4014d,3 +np.float32,0xbf183bb8,0x400d49fc,3 +np.float32,0x3f57759c,0x3f120b68,3 +np.float32,0xbd6994c0,0x3fd05d84,3 +np.float32,0xbf1dd684,0x400f0cc8,3 +np.float32,0xbececc1c,0x3ffe480a,3 +np.float32,0xbf48855f,0x401e206d,3 +np.float32,0x3f28c922,0x3f59d382,3 +np.float32,0xbf65c094,0x402bd3b0,3 +np.float32,0x3f657d42,0x3eeb11dd,3 +np.float32,0xbed32d4e,0x3fff7b15,3 +np.float32,0xbf31af02,0x4015a0b1,3 +np.float32,0x3d89eb00,0x3fc06f7f,3 +np.float32,0x3dac2830,0x3fbe4a17,3 +np.float32,0x3f7f7cb6,0x3d81a7df,3 +np.float32,0xbedbb570,0x4000ea82,3 +np.float32,0x3db37830,0x3fbdd4a8,3 +np.float32,0xbf376f48,0x4017a7fd,3 +np.float32,0x3f319f12,0x3f4dd2c9,3 +np.float32,0x7fc00000,0x7fc00000,3 +np.float32,0x3f1b4f70,0x3f6b3e31,3 +np.float32,0x3e33c880,0x3fb278d1,3 +np.float32,0x3f2796e0,0x3f5b69bd,3 +np.float32,0x3f4915d6,0x3f2ad4d0,3 +np.float32,0x3e4db120,0x3faf2ca0,3 +np.float32,0x3ef03dd4,0x3f8a8ba9,3 +np.float32,0x3e96ca88,0x3fa2cbf7,3 +np.float32,0xbeb136ce,0x3ff64d2b,3 +np.float32,0xbf2f3938,0x4014c75e,3 +np.float32,0x3f769dde,0x3e8b0d76,3 +np.float32,0x3f67cec8,0x3ee06148,3 +np.float32,0x3f0a1ade,0x3f80204e,3 +np.float32,0x3e4b9718,0x3faf7144,3 +np.float32,0x3cccb480,0x3fc5dcf3,3 +np.float32,0x3caeb740,0x3fc654f0,3 +np.float32,0x3f684e0e,0x3ede0678,3 +np.float32,0x3f0ba93c,0x3f7e6663,3 +np.float32,0xbf12bbc4,0x400b985e,3 +np.float32,0xbf2a8e1a,0x40133235,3 +np.float32,0x3f42029c,0x3f35f5c5,3 +np.float32,0x3eed1728,0x3f8b6f9c,3 +np.float32,0xbe5779ac,0x3fe432fd,3 +np.float32,0x3f6ed8b8,0x3ebc7e4b,3 +np.float32,0x3eea25b0,0x3f8c43c7,3 +np.float32,0x3f1988a4,0x3f6d786b,3 +np.float32,0xbe751674,0x3fe7ff8a,3 +np.float32,0xbe9f7418,0x3ff1997d,3 +np.float32,0x3dca11d0,0x3fbc6979,3 +np.float32,0x3f795226,0x3e6a6cab,3 +np.float32,0xbea780e0,0x3ff3b926,3 +np.float32,0xbed92770,0x4000901e,3 +np.float32,0xbf3e9f8c,0x401a49f8,3 +np.float32,0x3f0f7054,0x3f79ddb2,3 +np.float32,0x3a99d400,0x3fc8e966,3 +np.float32,0xbef082b0,0x4003d3c6,3 +np.float32,0xbf0d0790,0x4009defb,3 +np.float32,0xbf1649da,0x400cafb4,3 +np.float32,0xbea5aca8,0x3ff33d5c,3 +np.float32,0xbf4e1843,0x40206ba1,3 +np.float32,0xbe3d7d5c,0x3fe0e2ad,3 +np.float32,0xbf0e802d,0x400a500e,3 +np.float32,0xbf0de8f0,0x400a2295,3 +np.float32,0xbf3016ba,0x4015137e,3 +np.float32,0x3f36b1ea,0x3f46ae5d,3 +np.float32,0xbd27f170,0x3fce4fc7,3 +np.float32,0x3e96ec54,0x3fa2c31f,3 +np.float32,0x3eb4dfdc,0x3f9ad87d,3 +np.float32,0x3f5cac6c,0x3f0815cc,3 +np.float32,0xbf0489aa,0x40075bf1,3 +np.float32,0x3df010c0,0x3fba05f5,3 +np.float32,0xbf229f4a,0x4010956a,3 +np.float32,0x3f75e474,0x3e905a99,3 +np.float32,0xbcece6a0,0x3fccc397,3 +np.float32,0xbdb41528,0x3fd454e7,3 +np.float32,0x3ec8b2f8,0x3f958118,3 +np.float32,0x3f5eaa70,0x3f041a1d,3 +np.float32,0xbf32e1cc,0x40160b91,3 +np.float32,0xbe8e6026,0x3fed219c,3 +np.float32,0x3e6b3160,0x3fab65e3,3 +np.float32,0x3e6d7460,0x3fab1b81,3 +np.float32,0xbf13fbde,0x400bfa3b,3 +np.float32,0xbe8235ec,0x3fe9f9e3,3 +np.float32,0x3d71c4a0,0x3fc18096,3 +np.float32,0x3eb769d0,0x3f9a2aa0,3 +np.float32,0xbf68cb3b,0x402d99e4,3 +np.float32,0xbd917610,0x3fd22932,3 +np.float32,0x3d3cba60,0x3fc3297f,3 +np.float32,0xbf383cbe,0x4017f1cc,3 +np.float32,0xbeee96d0,0x40038e34,3 +np.float32,0x3ec89cb4,0x3f958725,3 +np.float32,0x3ebf92d8,0x3f97f95f,3 +np.float32,0x3f30f3da,0x3f4ec021,3 +np.float32,0xbd26b560,0x3fce45e4,3 +np.float32,0xbec0eb12,0x3ffa8330,3 +np.float32,0x3f6d592a,0x3ec4a6c1,3 +np.float32,0x3ea6d39c,0x3f9e9463,3 +np.float32,0x3e884184,0x3fa6951e,3 +np.float32,0x3ea566c4,0x3f9ef4d1,3 +np.float32,0x3f0c8f4c,0x3f7d5380,3 +np.float32,0x3f28e1ba,0x3f59b2cb,3 +np.float32,0x3f798538,0x3e66e1c3,3 +np.float32,0xbe2889b8,0x3fde39b8,3 +np.float32,0x3f3da05e,0x3f3c949c,3 +np.float32,0x3f24d700,0x3f5f073e,3 +np.float32,0xbe5b5768,0x3fe4b198,3 +np.float32,0xbed3b03a,0x3fff9f05,3 +np.float32,0x3e8a1c4c,0x3fa619eb,3 +np.float32,0xbf075d24,0x40083030,3 +np.float32,0x3f765648,0x3e8d1f52,3 +np.float32,0xbf70fc5e,0x403308bb,3 +np.float32,0x3f557ae8,0x3f15ab76,3 +np.float32,0x3f02f7ea,0x3f84521c,3 +np.float32,0x3f7ebbde,0x3dcbc5c5,3 +np.float32,0xbefbdfc6,0x40057285,3 +np.float32,0x3ec687ac,0x3f9617d9,3 +np.float32,0x3e4831c8,0x3fafe01b,3 +np.float32,0x3e25cde0,0x3fb43ea8,3 +np.float32,0x3e4f2ab8,0x3faefc70,3 +np.float32,0x3ea60ae4,0x3f9ec973,3 +np.float32,0xbf1ed55f,0x400f5dde,3 +np.float32,0xbf5ad4aa,0x40262479,3 +np.float32,0x3e8b3594,0x3fa5d0de,3 +np.float32,0x3f3a77aa,0x3f413c80,3 +np.float32,0xbf07512b,0x40082ca9,3 +np.float32,0x3f33d990,0x3f4ab5e5,3 +np.float32,0x3f521556,0x3f1bb78f,3 +np.float32,0xbecf6036,0x3ffe7086,3 +np.float32,0x3db91bd0,0x3fbd7a11,3 +np.float32,0x3ef63a74,0x3f88d839,3 +np.float32,0xbf2f1116,0x4014b99c,3 +np.float32,0xbf17fdc0,0x400d36b9,3 +np.float32,0xbe87df2c,0x3feb7117,3 +np.float32,0x80800000,0x3fc90fdb,3 +np.float32,0x3ee24c1c,0x3f8e7641,3 +np.float32,0x3f688dce,0x3edcd644,3 +np.float32,0xbf0f4e1c,0x400a8e1b,3 +np.float32,0x0,0x3fc90fdb,3 +np.float32,0x3f786eba,0x3e7999d4,3 +np.float32,0xbf404f80,0x401aeca8,3 +np.float32,0xbe9ffb6a,0x3ff1bd18,3 +np.float32,0x3f146bfc,0x3f73ccfd,3 +np.float32,0xbe47d630,0x3fe233ee,3 +np.float32,0xbe95847c,0x3feefe7c,3 +np.float32,0xbf135df0,0x400bc9e5,3 +np.float32,0x3ea19f3c,0x3f9ff411,3 +np.float32,0x3f235e20,0x3f60f247,3 +np.float32,0xbec789ec,0x3ffc4def,3 +np.float32,0x3f04b656,0x3f834db6,3 +np.float32,0x3dfaf440,0x3fb95679,3 +np.float32,0xbe4a7f28,0x3fe28abe,3 +np.float32,0x3ed4850c,0x3f92463b,3 +np.float32,0x3ec4ba5c,0x3f9694dd,3 +np.float32,0xbce24ca0,0x3fcc992b,3 +np.float32,0xbf5b7c6e,0x402675a0,3 +np.float32,0xbea3ce2a,0x3ff2bf04,3 +np.float32,0x3db02c60,0x3fbe0998,3 +np.float32,0x3c47b780,0x3fc78069,3 +np.float32,0x3ed33b20,0x3f92a0d5,3 +np.float32,0xbf4556d7,0x401cdcde,3 +np.float32,0xbe1b6e28,0x3fdc90ec,3 +np.float32,0xbf3289b7,0x4015ecd0,3 +np.float32,0x3df3f240,0x3fb9c76d,3 +np.float32,0x3eefa7d0,0x3f8ab61d,3 +np.float32,0xbe945838,0x3feeb006,3 +np.float32,0xbf0b1386,0x400949a3,3 +np.float32,0x3f77e546,0x3e812cc1,3 +np.float32,0x3e804ba0,0x3fa8a480,3 +np.float32,0x3f43dcea,0x3f331a06,3 +np.float32,0x3eb87450,0x3f99e33c,3 +np.float32,0x3e5f4898,0x3facecea,3 +np.float32,0x3f646640,0x3eeff10e,3 +np.float32,0x3f1aa832,0x3f6c1051,3 +np.float32,0xbebf6bfa,0x3ffa1bdc,3 +np.float32,0xbb77f300,0x3fc98bd4,3 +np.float32,0x3f3587fe,0x3f485645,3 +np.float32,0x3ef85f34,0x3f883b8c,3 +np.float32,0x3f50e584,0x3f1dc82c,3 +np.float32,0x3f1d30a8,0x3f68deb0,3 +np.float32,0x3ee75a78,0x3f8d0c86,3 +np.float32,0x3f2c023a,0x3f5581e1,3 +np.float32,0xbf074e34,0x40082bca,3 +np.float32,0xbead71f0,0x3ff54c6d,3 +np.float32,0xbf39ed88,0x40188e69,3 +np.float32,0x3f5d2fe6,0x3f07118b,3 +np.float32,0xbf1f79f8,0x400f9267,3 +np.float32,0x3e900c58,0x3fa48e99,3 +np.float32,0xbf759cb2,0x4036c47b,3 +np.float32,0x3f63329c,0x3ef5359c,3 +np.float32,0xbf5d6755,0x40276709,3 +np.float32,0x3f2ce31c,0x3f54519a,3 +np.float32,0x7f800000,0x7fc00000,3 +np.float32,0x3f1bf50e,0x3f6a6d9a,3 +np.float32,0x3f258334,0x3f5e25d8,3 +np.float32,0xbf661a3f,0x402c06ac,3 +np.float32,0x3d1654c0,0x3fc45cef,3 +np.float32,0xbef14a36,0x4003f009,3 +np.float32,0xbf356051,0x4016ec3a,3 +np.float32,0x3f6ccc42,0x3ec79193,3 +np.float32,0xbf2fe3d6,0x401501f9,3 +np.float32,0x3deedc80,0x3fba195b,3 +np.float32,0x3f2e5a28,0x3f52533e,3 +np.float32,0x3e6b68b8,0x3fab5ec8,3 +np.float32,0x3e458240,0x3fb037b7,3 +np.float32,0xbf24bab0,0x401144cb,3 +np.float32,0x3f600f4c,0x3f013fb2,3 +np.float32,0x3f021a04,0x3f84d316,3 +np.float32,0x3f741732,0x3e9cc948,3 +np.float32,0x3f0788aa,0x3f81a5b0,3 +np.float32,0x3f28802c,0x3f5a347c,3 +np.float32,0x3c9eb400,0x3fc69500,3 +np.float32,0x3e5d11e8,0x3fad357a,3 +np.float32,0x3d921250,0x3fbfecb9,3 +np.float32,0x3f354866,0x3f48b066,3 +np.float32,0xbf72cf43,0x40346d84,3 +np.float32,0x3eecdbb8,0x3f8b805f,3 +np.float32,0xbee585d0,0x400247fd,3 +np.float32,0x3e3607a8,0x3fb22fc6,3 +np.float32,0xbf0cb7d6,0x4009c71c,3 +np.float32,0xbf56b230,0x402432ec,3 +np.float32,0xbf4ced02,0x401fee29,3 +np.float32,0xbf3a325c,0x4018a776,3 +np.float32,0x3ecae8bc,0x3f94e732,3 +np.float32,0xbe48c7e8,0x3fe252bd,3 +np.float32,0xbe175d7c,0x3fdc0d5b,3 +np.float32,0x3ea78dac,0x3f9e632d,3 +np.float32,0xbe7434a8,0x3fe7e279,3 +np.float32,0x3f1f9e02,0x3f65c7b9,3 +np.float32,0xbe150f2c,0x3fdbc2c2,3 +np.float32,0x3ee13480,0x3f8ec423,3 +np.float32,0x3ecb7d54,0x3f94beb9,3 +np.float32,0x3f1cef42,0x3f693181,3 +np.float32,0xbf1ec06a,0x400f5730,3 +np.float32,0xbe112acc,0x3fdb44e8,3 +np.float32,0xbe77b024,0x3fe85545,3 +np.float32,0x3ec86fe0,0x3f959353,3 +np.float32,0x3f36b326,0x3f46ac9a,3 +np.float32,0x3e581a70,0x3fadd829,3 +np.float32,0xbf032c0c,0x4006f5f9,3 +np.float32,0xbf43b1fd,0x401c38b1,3 +np.float32,0x3f3701b4,0x3f463c5c,3 +np.float32,0x3f1a995a,0x3f6c22f1,3 +np.float32,0xbf05de0b,0x4007bf97,3 +np.float32,0x3d4bd960,0x3fc2b063,3 +np.float32,0x3f0e1618,0x3f7b7ed0,3 +np.float32,0x3edfd420,0x3f8f2628,3 +np.float32,0xbf6662fe,0x402c3047,3 +np.float32,0x3ec0690c,0x3f97bf9b,3 +np.float32,0xbeaf4146,0x3ff5c7a0,3 +np.float32,0x3f5e7764,0x3f04816d,3 +np.float32,0xbedd192c,0x40011bc5,3 +np.float32,0x3eb76350,0x3f9a2c5e,3 +np.float32,0xbed8108c,0x400069a5,3 +np.float32,0xbe59f31c,0x3fe48401,3 +np.float32,0xbea3e1e6,0x3ff2c439,3 +np.float32,0x3e26d1f8,0x3fb41db5,3 +np.float32,0x3f3a0a7c,0x3f41dba5,3 +np.float32,0x3ebae068,0x3f993ce4,3 +np.float32,0x3f2d8e30,0x3f536942,3 +np.float32,0xbe838bbe,0x3fea5247,3 +np.float32,0x3ebe4420,0x3f98538f,3 +np.float32,0xbcc59b80,0x3fcc265c,3 +np.float32,0x3eebb5c8,0x3f8bd334,3 +np.float32,0xbafc3400,0x3fc94ee8,3 +np.float32,0xbf63ddc1,0x402ac683,3 +np.float32,0xbeabdf80,0x3ff4e18f,3 +np.float32,0x3ea863f0,0x3f9e2a78,3 +np.float32,0x3f45b292,0x3f303bc1,3 +np.float32,0xbe68aa60,0x3fe666bf,3 +np.float32,0x3eb9de18,0x3f998239,3 +np.float32,0xbf719d85,0x4033815e,3 +np.float32,0x3edef9a8,0x3f8f62db,3 +np.float32,0xbd7781c0,0x3fd0cd1e,3 +np.float32,0x3f0b3b90,0x3f7ee92a,3 +np.float32,0xbe3eb3b4,0x3fe10a27,3 +np.float32,0xbf31a4c4,0x40159d23,3 +np.float32,0x3e929434,0x3fa3e5b0,3 +np.float32,0xbeb1a90e,0x3ff66b9e,3 +np.float32,0xbeba9b5e,0x3ff8d048,3 +np.float32,0xbf272a84,0x4012119e,3 +np.float32,0x3f1ebbd0,0x3f66e889,3 +np.float32,0x3ed3cdc8,0x3f927893,3 +np.float32,0xbf50dfce,0x40219b58,3 +np.float32,0x3f0c02de,0x3f7dfb62,3 +np.float32,0xbf694de3,0x402de8d2,3 +np.float32,0xbeaeb13e,0x3ff5a14f,3 +np.float32,0xbf61aa7a,0x40299702,3 +np.float32,0xbf13d159,0x400bed35,3 +np.float32,0xbeecd034,0x40034e0b,3 +np.float32,0xbe50c2e8,0x3fe35761,3 +np.float32,0x3f714406,0x3eae8e57,3 +np.float32,0xbf1ca486,0x400eabd8,3 +np.float32,0x3f5858cc,0x3f106497,3 +np.float32,0x3f670288,0x3ee41c84,3 +np.float32,0xbf20bd2c,0x400ff9f5,3 +np.float32,0xbe29afd8,0x3fde5eff,3 +np.float32,0xbf635e6a,0x402a80f3,3 +np.float32,0x3e82b7b0,0x3fa80446,3 +np.float32,0x3e982e7c,0x3fa26ece,3 +np.float32,0x3d9f0e00,0x3fbf1c6a,3 +np.float32,0x3e8299b4,0x3fa80c07,3 +np.float32,0xbf0529c1,0x40078ac3,3 +np.float32,0xbf403b8a,0x401ae519,3 +np.float32,0xbe57e09c,0x3fe44027,3 +np.float32,0x3ea1c8f4,0x3f9fe913,3 +np.float32,0xbe216a94,0x3fdd52d0,3 +np.float32,0x3f59c442,0x3f0db709,3 +np.float32,0xbd636260,0x3fd02bdd,3 +np.float32,0xbdbbc788,0x3fd4d08d,3 +np.float32,0x3dd19560,0x3fbbf0a3,3 +np.float32,0x3f060ad4,0x3f828641,3 +np.float32,0x3b102e00,0x3fc8c7c4,3 +np.float32,0x3f42b3b8,0x3f34e5a6,3 +np.float32,0x3f0255ac,0x3f84b071,3 +np.float32,0xbf014898,0x40066996,3 +np.float32,0x3e004dc0,0x3fb8fb51,3 +np.float32,0xbf594ff8,0x40256af2,3 +np.float32,0x3efafddc,0x3f877b80,3 +np.float32,0xbf5f0780,0x40283899,3 +np.float32,0x3ee95e54,0x3f8c7bcc,3 +np.float32,0x3eba2f0c,0x3f996c80,3 +np.float32,0x3f37721c,0x3f459b68,3 +np.float32,0x3e2be780,0x3fb378bf,3 +np.float32,0x3e550270,0x3fae3d69,3 +np.float32,0x3e0f9500,0x3fb70e0a,3 +np.float32,0xbf51974a,0x4021eaf4,3 +np.float32,0x3f393832,0x3f430d05,3 +np.float32,0x3f3df16a,0x3f3c1bd8,3 +np.float32,0xbd662340,0x3fd041ed,3 +np.float32,0x3f7e8418,0x3ddc9fce,3 +np.float32,0xbf392734,0x40184672,3 +np.float32,0x3ee3b278,0x3f8e124e,3 +np.float32,0x3eed4808,0x3f8b61d2,3 +np.float32,0xbf6fccbd,0x40322beb,3 +np.float32,0x3e3ecdd0,0x3fb1123b,3 +np.float32,0x3f4419e0,0x3f32bb45,3 +np.float32,0x3f595e00,0x3f0e7914,3 +np.float32,0xbe8c1486,0x3fec88c6,3 +np.float32,0xbf800000,0x40490fdb,3 +np.float32,0xbdaf5020,0x3fd4084d,3 +np.float32,0xbf407660,0x401afb63,3 +np.float32,0x3f0c3aa8,0x3f7db8b8,3 +np.float32,0xbcdb5980,0x3fcc7d5b,3 +np.float32,0x3f4738d4,0x3f2dd1ed,3 +np.float32,0x3f4d7064,0x3f23ab14,3 +np.float32,0xbeb1d576,0x3ff67774,3 +np.float32,0xbf507166,0x40216bb3,3 +np.float32,0x3e86484c,0x3fa71813,3 +np.float32,0x3f09123e,0x3f80bd35,3 +np.float32,0xbe9abe0e,0x3ff05cb2,3 +np.float32,0x3f3019dc,0x3f4fed21,3 +np.float32,0xbe99e00e,0x3ff0227d,3 +np.float32,0xbf155ec5,0x400c6739,3 +np.float32,0x3f5857ba,0x3f106698,3 +np.float32,0x3edf619c,0x3f8f45fb,3 +np.float32,0xbf5ab76a,0x40261664,3 +np.float32,0x3e54b5a8,0x3fae4738,3 +np.float32,0xbee92772,0x4002ca40,3 +np.float32,0x3f2fd610,0x3f504a7a,3 +np.float32,0xbf38521c,0x4017f97e,3 +np.float32,0xff800000,0x7fc00000,3 +np.float32,0x3e2da348,0x3fb34077,3 +np.float32,0x3f2f85fa,0x3f50b894,3 +np.float32,0x3e88f9c8,0x3fa66551,3 +np.float32,0xbf61e570,0x4029b648,3 +np.float32,0xbeab362c,0x3ff4b4a1,3 +np.float32,0x3ec6c310,0x3f9607bd,3 +np.float32,0x3f0d7bda,0x3f7c3810,3 +np.float32,0xbeba5d36,0x3ff8bf99,3 +np.float32,0x3f4b0554,0x3f27adda,3 +np.float32,0x3f60f5dc,0x3efebfb3,3 +np.float32,0x3f36ce2c,0x3f468603,3 +np.float32,0xbe70afac,0x3fe76e8e,3 +np.float32,0x3f673350,0x3ee339b5,3 +np.float32,0xbe124cf0,0x3fdb698c,3 +np.float32,0xbf1243dc,0x400b73d0,3 +np.float32,0x3f3c8850,0x3f3e3407,3 +np.float32,0x3ea02f24,0x3fa05500,3 +np.float32,0xbeffed34,0x400607db,3 +np.float32,0x3f5c75c2,0x3f08817c,3 +np.float32,0x3f4b2fbe,0x3f27682d,3 +np.float32,0x3ee47c34,0x3f8dd9f9,3 +np.float32,0x3f50d48c,0x3f1de584,3 +np.float32,0x3f12dc5e,0x3f75b628,3 +np.float32,0xbefe7e4a,0x4005d2f4,3 +np.float32,0xbec2e846,0x3ffb0cbc,3 +np.float32,0xbedc3036,0x4000fb80,3 +np.float32,0xbf48aedc,0x401e311f,3 +np.float32,0x3f6e032e,0x3ec11363,3 +np.float32,0xbf60de15,0x40292b72,3 +np.float32,0x3f06585e,0x3f8258ba,3 +np.float32,0x3ef49b98,0x3f894e66,3 +np.float32,0x3cc5fe00,0x3fc5f7cf,3 +np.float32,0xbf7525c5,0x40365c2c,3 +np.float32,0x3f64f9f8,0x3eed5fb2,3 +np.float32,0x3e8849c0,0x3fa692fb,3 +np.float32,0x3e50c878,0x3faec79e,3 +np.float32,0x3ed61530,0x3f91d831,3 +np.float32,0xbf54872e,0x40233724,3 +np.float32,0xbf52ee7f,0x4022815e,3 +np.float32,0xbe708c24,0x3fe769fc,3 +np.float32,0xbf26fc54,0x40120260,3 +np.float32,0x3f226e8a,0x3f6228db,3 +np.float32,0xbef30406,0x40042eb8,3 +np.float32,0x3f5d996c,0x3f063f5f,3 +np.float32,0xbf425f9c,0x401bb618,3 +np.float32,0x3e4bb260,0x3faf6dc9,3 +np.float32,0xbe52d5a4,0x3fe39b29,3 +np.float32,0xbe169cf0,0x3fdbf505,3 +np.float32,0xbedfc422,0x40017a8e,3 +np.float32,0x3d8ffef0,0x3fc00e05,3 +np.float32,0xbf12bdab,0x400b98f2,3 +np.float32,0x3f295d0a,0x3f590e88,3 +np.float32,0x3f49d8e4,0x3f2998aa,3 +np.float32,0xbef914f4,0x40050c12,3 +np.float32,0xbf4ea2b5,0x4020a61e,3 +np.float32,0xbf3a89e5,0x4018c762,3 +np.float32,0x3e8707b4,0x3fa6e67a,3 +np.float32,0x3ac55400,0x3fc8de86,3 +np.float32,0x800000,0x3fc90fdb,3 +np.float32,0xbeb9762c,0x3ff8819b,3 +np.float32,0xbebbe23c,0x3ff92815,3 +np.float32,0xbf598c88,0x402587a1,3 +np.float32,0x3e95d864,0x3fa30b4a,3 +np.float32,0x3f7f6f40,0x3d882486,3 +np.float32,0xbf53658c,0x4022b604,3 +np.float32,0xbf2a35f2,0x401314ad,3 +np.float32,0x3eb14380,0x3f9bcf28,3 +np.float32,0x3f0e0c64,0x3f7b8a7a,3 +np.float32,0x3d349920,0x3fc36a9a,3 +np.float32,0xbec2092c,0x3ffad071,3 +np.float32,0xbe1d08e8,0x3fdcc4e0,3 +np.float32,0xbf008968,0x40063243,3 +np.float32,0xbefad582,0x40054c51,3 +np.float32,0xbe52d010,0x3fe39a72,3 +np.float32,0x3f4afdac,0x3f27ba6b,3 +np.float32,0x3f6c483c,0x3eca4408,3 +np.float32,0xbef3cb68,0x40044b0c,3 +np.float32,0x3e94687c,0x3fa36b6f,3 +np.float32,0xbf64ae5c,0x402b39bb,3 +np.float32,0xbf0022b4,0x40061497,3 +np.float32,0x80000001,0x3fc90fdb,3 +np.float32,0x3f25bcd0,0x3f5dda4b,3 +np.float32,0x3ed91b40,0x3f9102d7,3 +np.float32,0x3f800000,0x0,3 +np.float32,0xbebc6aca,0x3ff94cca,3 +np.float32,0x3f239e9a,0x3f609e7d,3 +np.float32,0xbf7312be,0x4034a305,3 +np.float32,0x3efd16d0,0x3f86e148,3 +np.float32,0x3f52753a,0x3f1b0f72,3 +np.float32,0xbde58960,0x3fd7702c,3 +np.float32,0x3ef88580,0x3f883099,3 +np.float32,0x3eebaefc,0x3f8bd51e,3 +np.float32,0x3e877d2c,0x3fa6c807,3 +np.float32,0x3f1a0324,0x3f6cdf32,3 +np.float32,0xbedfe20a,0x40017eb6,3 +np.float32,0x3f205a3c,0x3f64d69d,3 +np.float32,0xbeed5b7c,0x400361b0,3 +np.float32,0xbf69ba10,0x402e2ad0,3 +np.float32,0x3c4fe200,0x3fc77014,3 +np.float32,0x3f043310,0x3f839a69,3 +np.float32,0xbeaf359a,0x3ff5c485,3 +np.float32,0x3db3f110,0x3fbdcd12,3 +np.float32,0x3e24af88,0x3fb462ed,3 +np.float32,0xbf34e858,0x4016c1c8,3 +np.float32,0x3f3334f2,0x3f4b9cd0,3 +np.float32,0xbf145882,0x400c16a2,3 +np.float32,0xbf541c38,0x40230748,3 +np.float32,0x3eba7e10,0x3f99574b,3 +np.float32,0xbe34c6e0,0x3fdfc731,3 +np.float32,0xbe957abe,0x3feefbf0,3 +np.float32,0xbf595a59,0x40256fdb,3 +np.float32,0xbdedc7b8,0x3fd7f4f0,3 +np.float32,0xbf627c02,0x402a06a9,3 +np.float32,0x3f339b78,0x3f4b0d18,3 +np.float32,0xbf2df6d2,0x40145929,3 +np.float32,0x3f617726,0x3efc9fd8,3 +np.float32,0xbee3a8fc,0x40020561,3 +np.float32,0x3efe9f68,0x3f867043,3 +np.float32,0xbf2c3e76,0x4013c3ba,3 +np.float32,0xbf218f28,0x40103d84,3 +np.float32,0xbf1ea847,0x400f4f7f,3 +np.float32,0x3ded9160,0x3fba2e31,3 +np.float32,0x3bce1b00,0x3fc841bf,3 +np.float32,0xbe90566e,0x3feda46a,3 +np.float32,0xbf5ea2ba,0x4028056b,3 +np.float32,0x3f538e62,0x3f191ee6,3 +np.float32,0xbf59e054,0x4025af74,3 +np.float32,0xbe8c98ba,0x3fecab24,3 +np.float32,0x3ee7bdb0,0x3f8cf0b7,3 +np.float32,0xbf2eb828,0x40149b2b,3 +np.float32,0xbe5eb904,0x3fe52068,3 +np.float32,0xbf16b422,0x400cd08d,3 +np.float32,0x3f1ab9b4,0x3f6bfa58,3 +np.float32,0x3dc23040,0x3fbce82a,3 +np.float32,0xbf29d9e7,0x4012f5e5,3 +np.float32,0xbf38f30a,0x40183393,3 +np.float32,0x3e88e798,0x3fa66a09,3 +np.float32,0x3f1d07e6,0x3f69124f,3 +np.float32,0xbe1d3d34,0x3fdccb7e,3 +np.float32,0xbf1715be,0x400ceec2,3 +np.float32,0x3f7a0eac,0x3e5d11f7,3 +np.float32,0xbe764924,0x3fe82707,3 +np.float32,0xbf01a1f8,0x4006837c,3 +np.float32,0x3f2be730,0x3f55a661,3 +np.float32,0xbf7bb070,0x403d4ce5,3 +np.float32,0xbd602110,0x3fd011c9,3 +np.float32,0x3f5d080c,0x3f07609d,3 +np.float32,0xbda20400,0x3fd332d1,3 +np.float32,0x3f1c62da,0x3f69e308,3 +np.float32,0xbf2c6916,0x4013d223,3 +np.float32,0xbf44f8fd,0x401cb816,3 +np.float32,0x3f4da392,0x3f235539,3 +np.float32,0x3e9e8aa0,0x3fa0c3a0,3 +np.float32,0x3e9633c4,0x3fa2f366,3 +np.float32,0xbf0422ab,0x40073ddd,3 +np.float32,0x3f518386,0x3f1cb603,3 +np.float32,0x3f24307a,0x3f5fe096,3 +np.float32,0xbdfb4220,0x3fd8ce24,3 +np.float32,0x3f179d28,0x3f6fdc7d,3 +np.float32,0xbecc2df0,0x3ffd911e,3 +np.float32,0x3f3dff0c,0x3f3c0782,3 +np.float32,0xbf58c4d8,0x4025295b,3 +np.float32,0xbdcf8438,0x3fd60dd3,3 +np.float32,0xbeeaf1b2,0x40030aa7,3 +np.float32,0xbf298a28,0x4012db45,3 +np.float32,0x3f6c4dec,0x3eca2678,3 +np.float32,0x3f4d1ac8,0x3f243a59,3 +np.float32,0x3f62cdfa,0x3ef6e8f8,3 +np.float32,0xbee8acce,0x4002b909,3 +np.float32,0xbd5f2af0,0x3fd00a15,3 +np.float32,0x3f5fde8e,0x3f01a453,3 +np.float32,0x3e95233c,0x3fa33aa4,3 +np.float32,0x3ecd2a60,0x3f9449be,3 +np.float32,0x3f10aa86,0x3f78619d,3 +np.float32,0x3f3888e8,0x3f440a70,3 +np.float32,0x3eeb5bfc,0x3f8bec7d,3 +np.float32,0xbe12d654,0x3fdb7ae6,3 +np.float32,0x3eca3110,0x3f951931,3 +np.float32,0xbe2d1b7c,0x3fdece05,3 +np.float32,0xbf29e9db,0x4012fb3a,3 +np.float32,0xbf0c50b8,0x4009a845,3 +np.float32,0xbed9f0e4,0x4000abef,3 +np.float64,0x3fd078ec5ba0f1d8,0x3ff4f7c00595a4d3,2 +np.float64,0xbfdbc39743b7872e,0x400027f85bce43b2,2 +np.float64,0xbfacd2707c39a4e0,0x3ffa08ae1075d766,2 +np.float64,0xbfc956890f32ad14,0x3ffc52308e7285fd,2 +np.float64,0xbf939c2298273840,0x3ff9706d18e6ea6b,2 +np.float64,0xbfe0d7048961ae09,0x4000fff4406bd395,2 +np.float64,0xbfe9d19b86f3a337,0x4004139bc683a69f,2 +np.float64,0x3fd35c7f90a6b900,0x3ff437220e9123f8,2 +np.float64,0x3fdddca171bbb944,0x3ff15da61e61ec08,2 +np.float64,0x3feb300de9f6601c,0x3fe1c6fadb68cdca,2 +np.float64,0xbfef1815327e302a,0x400739808fc6f964,2 +np.float64,0xbfe332d78e6665af,0x4001b6c4ef922f7c,2 +np.float64,0xbfedbf4dfb7b7e9c,0x40061cefed62a58b,2 +np.float64,0xbfd8dcc7e3b1b990,0x3fff84307713c2c3,2 +np.float64,0xbfedaf161c7b5e2c,0x400612027c1b2b25,2 +np.float64,0xbfed9bde897b37bd,0x4006053f05bd7d26,2 +np.float64,0xbfe081ebc26103d8,0x4000e70755eb66e0,2 +np.float64,0xbfe0366f9c606cdf,0x4000d11212f29afd,2 +np.float64,0xbfc7c115212f822c,0x3ffc1e8c9d58f7db,2 +np.float64,0x3fd8dd9a78b1bb34,0x3ff2bf8d0f4c9376,2 +np.float64,0xbfe54eff466a9dfe,0x4002655950b611f4,2 +np.float64,0xbfe4aad987e955b3,0x40022efb19882518,2 +np.float64,0x3f70231ca0204600,0x3ff911d834e7abf4,2 +np.float64,0x3fede01d047bc03a,0x3fd773cecbd8561b,2 +np.float64,0xbfd6a00d48ad401a,0x3ffee9fd7051633f,2 +np.float64,0x3fd44f3d50a89e7c,0x3ff3f74dd0fc9c91,2 +np.float64,0x3fe540f0d0ea81e2,0x3feb055a7c7d43d6,2 +np.float64,0xbf3ba2e200374800,0x3ff923b582650c6c,2 +np.float64,0x3fe93b2d3f72765a,0x3fe532fa15331072,2 +np.float64,0x3fee8ce5a17d19cc,0x3fd35666eefbe336,2 +np.float64,0x3fe55d5f8feabac0,0x3feadf3dcfe251d4,2 +np.float64,0xbfd1d2ede8a3a5dc,0x3ffda600041ac884,2 +np.float64,0xbfee41186e7c8231,0x40067a625cc6f64d,2 +np.float64,0x3fe521a8b9ea4352,0x3feb2f1a6c8084e5,2 +np.float64,0x3fc65378ef2ca6f0,0x3ff653dfe81ee9f2,2 +np.float64,0x3fdaba0fbcb57420,0x3ff23d630995c6ba,2 +np.float64,0xbfe6b7441d6d6e88,0x4002e182539a2994,2 +np.float64,0x3fda00b6dcb4016c,0x3ff2703d516f28e7,2 +np.float64,0xbfe8699f01f0d33e,0x400382326920ea9e,2 +np.float64,0xbfef5889367eb112,0x4007832af5983793,2 +np.float64,0x3fefb57c8aff6afa,0x3fc14700ab38dcef,2 +np.float64,0xbfda0dfdaab41bfc,0x3fffd75b6fd497f6,2 +np.float64,0xbfb059c36620b388,0x3ffa27c528b97a42,2 +np.float64,0xbfdd450ab1ba8a16,0x40005dcac6ab50fd,2 +np.float64,0xbfe54d6156ea9ac2,0x400264ce9f3f0fb9,2 +np.float64,0xbfe076e94760edd2,0x4000e3d1374884da,2 +np.float64,0xbfc063286720c650,0x3ffb2fd1d6bff0ef,2 +np.float64,0xbfe24680f2e48d02,0x40016ddfbb5bcc0e,2 +np.float64,0xbfdc9351d2b926a4,0x400044e3756fb765,2 +np.float64,0x3fefb173d8ff62e8,0x3fc1bd5626f80850,2 +np.float64,0x3fe77c117a6ef822,0x3fe7e57089bad2ec,2 +np.float64,0xbfddbcebf7bb79d8,0x40006eadb60406b3,2 +np.float64,0xbfecf6625ff9ecc5,0x40059e6c6961a6db,2 +np.float64,0x3fdc8950b8b912a0,0x3ff1bcfb2e27795b,2 +np.float64,0xbfeb2fa517765f4a,0x4004b00aee3e6888,2 +np.float64,0x3fd0efc88da1df90,0x3ff4d8f7cbd8248a,2 +np.float64,0xbfe6641a2becc834,0x4002c43362c1bd0f,2 +np.float64,0xbfe28aec0fe515d8,0x400182c91d4df039,2 +np.float64,0xbfd5ede8d0abdbd2,0x3ffeba7baef05ae8,2 +np.float64,0xbfbd99702a3b32e0,0x3ffafca21c1053f1,2 +np.float64,0x3f96f043f82de080,0x3ff8c6384d5eb610,2 +np.float64,0xbfe5badbc9eb75b8,0x400289c8cd5873d1,2 +np.float64,0x3fe5c6bf95eb8d80,0x3fea5093e9a3e43e,2 +np.float64,0x3fb1955486232ab0,0x3ff8086d4c3e71d5,2 +np.float64,0xbfea145f397428be,0x4004302237a35871,2 +np.float64,0xbfdabe685db57cd0,0x400003e2e29725fb,2 +np.float64,0xbfefc79758ff8f2f,0x400831814e23bfc8,2 +np.float64,0x3fd7edb66cafdb6c,0x3ff3006c5123bfaf,2 +np.float64,0xbfeaf7644bf5eec8,0x400495a7963ce4ed,2 +np.float64,0x3fdf838d78bf071c,0x3ff0e527eed73800,2 +np.float64,0xbfd1a0165ba3402c,0x3ffd98c5ab76d375,2 +np.float64,0x3fd75b67a9aeb6d0,0x3ff327c8d80b17cf,2 +np.float64,0x3fc2aa9647255530,0x3ff6ca854b157df1,2 +np.float64,0xbfe0957fd4612b00,0x4000ecbf3932becd,2 +np.float64,0x3fda1792c0b42f24,0x3ff269fbb2360487,2 +np.float64,0x3fd480706ca900e0,0x3ff3ea53a6aa3ae8,2 +np.float64,0xbfd0780ed9a0f01e,0x3ffd4bfd544c7d47,2 +np.float64,0x3feeec0cd77dd81a,0x3fd0a8a241fdb441,2 +np.float64,0x3fcfa933e93f5268,0x3ff5223478621a6b,2 +np.float64,0x3fdad2481fb5a490,0x3ff236b86c6b2b49,2 +np.float64,0x3fe03b129de07626,0x3ff09f21fb868451,2 +np.float64,0xbfc01212cd202424,0x3ffb259a07159ae9,2 +np.float64,0x3febdb912df7b722,0x3fe0768e20dac8c9,2 +np.float64,0xbfbf2148763e4290,0x3ffb154c361ce5bf,2 +np.float64,0xbfb1a7eb1e234fd8,0x3ffa3cb37ac4a176,2 +np.float64,0xbfe26ad1ec64d5a4,0x400178f480ecce8d,2 +np.float64,0x3fe6d1cd1b6da39a,0x3fe8dc20ec4dad3b,2 +np.float64,0xbfede0e53dfbc1ca,0x4006340d3bdd7c97,2 +np.float64,0xbfe8fd1bd9f1fa38,0x4003bc3477f93f40,2 +np.float64,0xbfe329d0f26653a2,0x4001b3f345af5648,2 +np.float64,0xbfe4bb20eee97642,0x40023451404d6d08,2 +np.float64,0x3fb574832e2ae900,0x3ff7ca4bed0c7110,2 +np.float64,0xbfdf3c098fbe7814,0x4000a525bb72d659,2 +np.float64,0x3fa453e6d428a7c0,0x3ff87f512bb9b0c6,2 +np.float64,0x3faaec888435d920,0x3ff84a7d9e4def63,2 +np.float64,0xbfcdc240df3b8480,0x3ffce30ece754e7f,2 +np.float64,0xbf8c3220f0386440,0x3ff95a600ae6e157,2 +np.float64,0x3fe806076c700c0e,0x3fe71784a96c76eb,2 +np.float64,0x3fedf9b0e17bf362,0x3fd6e35fc0a7b6c3,2 +np.float64,0xbfe1b48422636908,0x400141bd8ed251bc,2 +np.float64,0xbfe82e2817705c50,0x40036b5a5556d021,2 +np.float64,0xbfc8ef8ff931df20,0x3ffc450ffae7ce58,2 +np.float64,0xbfe919fa94f233f5,0x4003c7cce4697fe8,2 +np.float64,0xbfc3ace4a72759c8,0x3ffb9a197bb22651,2 +np.float64,0x3fe479f71ee8f3ee,0x3fec0bd2f59097aa,2 +np.float64,0xbfeeb54a967d6a95,0x4006da12c83649c5,2 +np.float64,0x3fe5e74ea8ebce9e,0x3fea2407cef0f08c,2 +np.float64,0x3fb382baf2270570,0x3ff7e98213b921ba,2 +np.float64,0xbfdd86fd3cbb0dfa,0x40006712952ddbcf,2 +np.float64,0xbfd250eb52a4a1d6,0x3ffdc6d56253b1cd,2 +np.float64,0x3fea30c4ed74618a,0x3fe3962deba4f30e,2 +np.float64,0x3fc895963d312b30,0x3ff60a5d52fcbccc,2 +np.float64,0x3fe9cc4f6273989e,0x3fe442740942c80f,2 +np.float64,0xbfe8769f5cf0ed3f,0x4003873b4cb5bfce,2 +np.float64,0xbfe382f3726705e7,0x4001cfeb3204d110,2 +np.float64,0x3fbfe9a9163fd350,0x3ff7220bd2b97c8f,2 +np.float64,0xbfca6162bb34c2c4,0x3ffc743f939358f1,2 +np.float64,0x3fe127a014e24f40,0x3ff0147c4bafbc39,2 +np.float64,0x3fee9cdd2a7d39ba,0x3fd2e9ef45ab122f,2 +np.float64,0x3fa9ffb97c33ff80,0x3ff851e69fa3542c,2 +np.float64,0x3fd378f393a6f1e8,0x3ff42faafa77de56,2 +np.float64,0xbfe4df1e1669be3c,0x400240284df1c321,2 +np.float64,0x3fed0ed79bfa1db0,0x3fdba89060aa96fb,2 +np.float64,0x3fdef2ee52bde5dc,0x3ff10e942244f4f1,2 +np.float64,0xbfdab38f3ab5671e,0x40000264d8d5b49b,2 +np.float64,0x3fbe95a96e3d2b50,0x3ff73774cb59ce2d,2 +np.float64,0xbfe945653af28aca,0x4003d9657bf129c2,2 +np.float64,0xbfb18f3f2a231e80,0x3ffa3b27cba23f50,2 +np.float64,0xbfef50bf22fea17e,0x40077998a850082c,2 +np.float64,0xbfc52b8c212a5718,0x3ffbca8d6560a2da,2 +np.float64,0x7ff8000000000000,0x7ff8000000000000,2 +np.float64,0x3fc1e3a02d23c740,0x3ff6e3a5fcac12a4,2 +np.float64,0xbfeb5e4ea5f6bc9d,0x4004c65abef9426f,2 +np.float64,0xbfe425b132684b62,0x400203c29608b00d,2 +np.float64,0xbfbfa1c19e3f4380,0x3ffb1d6367711158,2 +np.float64,0x3fbba2776e3744f0,0x3ff766f6df586fad,2 +np.float64,0xbfb5d0951e2ba128,0x3ffa7f712480b25e,2 +np.float64,0xbfe949fdab7293fb,0x4003db4530a18507,2 +np.float64,0xbfcf13519b3e26a4,0x3ffd0e6f0a6c38ee,2 +np.float64,0x3f91e6d72823cdc0,0x3ff8da5f08909b6e,2 +np.float64,0x3f78a2e360314600,0x3ff909586727caef,2 +np.float64,0xbfe1ae7e8fe35cfd,0x40013fef082caaa3,2 +np.float64,0x3fe97a6dd1f2f4dc,0x3fe4cb4b99863478,2 +np.float64,0xbfcc1e1e69383c3c,0x3ffcad250a949843,2 +np.float64,0x3faccb797c399700,0x3ff83b8066b49330,2 +np.float64,0x3fe7a2647a6f44c8,0x3fe7acceae6ec425,2 +np.float64,0xbfec3bfcf0f877fa,0x4005366af5a7175b,2 +np.float64,0xbfe2310b94646217,0x400167588fceb228,2 +np.float64,0x3feb167372762ce6,0x3fe1f74c0288fad8,2 +np.float64,0xbfb722b4ee2e4568,0x3ffa94a81b94dfca,2 +np.float64,0x3fc58da9712b1b50,0x3ff66cf8f072aa14,2 +np.float64,0xbfe7fff9d6effff4,0x400359d01b8141de,2 +np.float64,0xbfd56691c5aacd24,0x3ffe9686697797e8,2 +np.float64,0x3fe3ab0557e7560a,0x3fed1593959ef8e8,2 +np.float64,0x3fdd458995ba8b14,0x3ff1883d6f22a322,2 +np.float64,0x3fe7bbed2cef77da,0x3fe786d618094cda,2 +np.float64,0x3fa31a30c4263460,0x3ff88920b936fd79,2 +np.float64,0x8010000000000000,0x3ff921fb54442d18,2 +np.float64,0xbfdc5effbdb8be00,0x40003d95fe0dff11,2 +np.float64,0x3febfdad7e77fb5a,0x3fe030b5297dbbdd,2 +np.float64,0x3fe4f3f3b2e9e7e8,0x3feb6bc59eeb2be2,2 +np.float64,0xbfe44469fd6888d4,0x40020daa5488f97a,2 +np.float64,0xbfe19fddb0e33fbc,0x40013b8c902b167b,2 +np.float64,0x3fa36ad17c26d5a0,0x3ff8869b3e828134,2 +np.float64,0x3fcf23e6c93e47d0,0x3ff5336491a65d1e,2 +np.float64,0xffefffffffffffff,0x7ff8000000000000,2 +np.float64,0xbfe375f4cee6ebea,0x4001cbd2ba42e8b5,2 +np.float64,0xbfaef1215c3de240,0x3ffa19ab02081189,2 +np.float64,0xbfec39c59c78738b,0x4005353dc38e3d78,2 +np.float64,0x7ff4000000000000,0x7ffc000000000000,2 +np.float64,0xbfec09bb7b781377,0x40051c0a5754cb3a,2 +np.float64,0x3fe8301f2870603e,0x3fe6d783c5ef0944,2 +np.float64,0xbfed418c987a8319,0x4005cbae1b8693d1,2 +np.float64,0xbfdc16e7adb82dd0,0x4000338b634eaf03,2 +np.float64,0x3fd5d361bdaba6c4,0x3ff390899300a54c,2 +np.float64,0xbff0000000000000,0x400921fb54442d18,2 +np.float64,0x3fd5946232ab28c4,0x3ff3a14767813f29,2 +np.float64,0x3fe833e5fef067cc,0x3fe6d1be720edf2d,2 +np.float64,0x3fedf746a67bee8e,0x3fd6f127fdcadb7b,2 +np.float64,0x3fd90353d3b206a8,0x3ff2b54f7d369ba9,2 +np.float64,0x3fec4b4b72f89696,0x3fdf1b38d2e93532,2 +np.float64,0xbfe9c67596f38ceb,0x40040ee5f524ce03,2 +np.float64,0x3fd350d91aa6a1b4,0x3ff43a303c0da27f,2 +np.float64,0x3fd062603ba0c4c0,0x3ff4fd9514b935d8,2 +np.float64,0xbfe24c075f64980e,0x40016f8e9f2663b3,2 +np.float64,0x3fdaa546eeb54a8c,0x3ff2431a88fef1d5,2 +np.float64,0x3fe92b8151f25702,0x3fe54c67e005cbf9,2 +np.float64,0xbfe1be8b8a637d17,0x400144c078f67c6e,2 +np.float64,0xbfe468a1d7e8d144,0x40021964b118cbf4,2 +np.float64,0xbfdc6de4fab8dbca,0x40003fa9e27893d8,2 +np.float64,0xbfe3c2788ae784f1,0x4001e407ba3aa956,2 +np.float64,0xbfe2bf1542e57e2a,0x400192d4a9072016,2 +np.float64,0xbfe6982f4c6d305e,0x4002d681b1991bbb,2 +np.float64,0x3fdbceb1c4b79d64,0x3ff1f0f117b9d354,2 +np.float64,0x3fdb3705e7b66e0c,0x3ff21af01ca27ace,2 +np.float64,0x3fe3e6358ee7cc6c,0x3fecca4585053983,2 +np.float64,0xbfe16d6a9a62dad5,0x40012c7988aee247,2 +np.float64,0xbfce66e4413ccdc8,0x3ffcf83b08043a0c,2 +np.float64,0xbfeb6cd46876d9a9,0x4004cd61733bfb79,2 +np.float64,0xbfdb1cdd64b639ba,0x400010e6cf087cb7,2 +np.float64,0xbfe09e4e30e13c9c,0x4000ef5277c47721,2 +np.float64,0xbfee88dd127d11ba,0x4006b3cd443643ac,2 +np.float64,0xbf911e06c8223c00,0x3ff966744064fb05,2 +np.float64,0xbfe8f22bc471e458,0x4003b7d5513af295,2 +np.float64,0x3fe3d7329567ae66,0x3fecdd6c241f83ee,2 +np.float64,0x3fc8a9404b315280,0x3ff607dc175edf3f,2 +np.float64,0x3fe7eb80ad6fd702,0x3fe73f8fdb3e6a6c,2 +np.float64,0x3fef0931e37e1264,0x3fcf7fde80a3c5ab,2 +np.float64,0x3fe2ed3c3fe5da78,0x3fee038334cd1860,2 +np.float64,0x3fe251fdb8e4a3fc,0x3feec26dc636ac31,2 +np.float64,0x3feb239436764728,0x3fe1de9462455da7,2 +np.float64,0xbfe63fd7eeec7fb0,0x4002b78cfa3d2fa6,2 +np.float64,0x3fdd639cb5bac738,0x3ff17fc7d92b3eee,2 +np.float64,0x3fd0a7a13fa14f44,0x3ff4eba95c559c84,2 +np.float64,0x3fe804362d70086c,0x3fe71a44cd91ffa4,2 +np.float64,0xbfe0fecf6e61fd9f,0x40010bac8edbdc4f,2 +np.float64,0x3fcb74acfd36e958,0x3ff5ac84437f1b7c,2 +np.float64,0x3fe55053e1eaa0a8,0x3feaf0bf76304c30,2 +np.float64,0x3fc06b508d20d6a0,0x3ff7131da17f3902,2 +np.float64,0x3fdd78750fbaf0ec,0x3ff179e97fbf7f65,2 +np.float64,0x3fe44cb946689972,0x3fec46859b5da6be,2 +np.float64,0xbfeb165a7ff62cb5,0x4004a41c9cc9589e,2 +np.float64,0x3fe01ffb2b603ff6,0x3ff0aed52bf1c3c1,2 +np.float64,0x3f983c60a83078c0,0x3ff8c107805715ab,2 +np.float64,0x3fd8b5ff13b16c00,0x3ff2ca4a837a476a,2 +np.float64,0x3fc80510a1300a20,0x3ff61cc3b4af470b,2 +np.float64,0xbfd3935b06a726b6,0x3ffe1b3a2066f473,2 +np.float64,0xbfdd4a1f31ba943e,0x40005e81979ed445,2 +np.float64,0xbfa76afdd42ed600,0x3ff9dd63ffba72d2,2 +np.float64,0x3fe7e06d496fc0da,0x3fe7503773566707,2 +np.float64,0xbfea5fbfe874bf80,0x40045106af6c538f,2 +np.float64,0x3fee000c487c0018,0x3fd6bef1f8779d88,2 +np.float64,0xbfb39f4ee2273ea0,0x3ffa5c3f2b3888ab,2 +np.float64,0x3feb9247b0772490,0x3fe1092d2905efce,2 +np.float64,0x3fdaa39b4cb54738,0x3ff243901da0da17,2 +np.float64,0x3fcd5b2b493ab658,0x3ff56e262e65b67d,2 +np.float64,0x3fcf82512f3f04a0,0x3ff52738847c55f2,2 +np.float64,0x3fe2af5e0c655ebc,0x3fee4ffab0c82348,2 +np.float64,0xbfec0055d0f800ac,0x4005172d325933e8,2 +np.float64,0x3fe71da9336e3b52,0x3fe86f2e12f6e303,2 +np.float64,0x3fbefab0723df560,0x3ff731188ac716ec,2 +np.float64,0xbfe11dca28623b94,0x400114d3d4ad370d,2 +np.float64,0x3fbcbda8ca397b50,0x3ff755281078abd4,2 +np.float64,0x3fe687c7126d0f8e,0x3fe945099a7855cc,2 +np.float64,0xbfecde510579bca2,0x400590606e244591,2 +np.float64,0xbfd72de681ae5bce,0x3fff0ff797ad1755,2 +np.float64,0xbfe7c0f7386f81ee,0x40034226e0805309,2 +np.float64,0x3fd8d55619b1aaac,0x3ff2c1cb3267b14e,2 +np.float64,0x3fecd7a2ad79af46,0x3fdcabbffeaa279e,2 +np.float64,0x3fee7fb1a8fcff64,0x3fd3ae620286fe19,2 +np.float64,0xbfc5f3a3592be748,0x3ffbe3ed204d9842,2 +np.float64,0x3fec9e5527793caa,0x3fddb00bc8687e4b,2 +np.float64,0x3fc35dc70f26bb90,0x3ff6b3ded7191e33,2 +np.float64,0x3fda91c07ab52380,0x3ff24878848fec8f,2 +np.float64,0xbfe12cde1fe259bc,0x4001194ab99d5134,2 +np.float64,0xbfd35ab736a6b56e,0x3ffe0c5ce8356d16,2 +np.float64,0x3fc9c94123339280,0x3ff5e3239f3ad795,2 +np.float64,0xbfe72f54926e5ea9,0x40030c95d1d02b56,2 +np.float64,0xbfee283186fc5063,0x40066786bd0feb79,2 +np.float64,0xbfe7b383f56f6708,0x40033d23ef0e903d,2 +np.float64,0x3fd6037327ac06e8,0x3ff383bf2f311ddb,2 +np.float64,0x3fe0e344b561c68a,0x3ff03cd90fd4ba65,2 +np.float64,0xbfef0ff54b7e1feb,0x400730fa5fce381e,2 +np.float64,0x3fd269929da4d324,0x3ff476b230136d32,2 +np.float64,0xbfbc5fb9f638bf70,0x3ffae8e63a4e3234,2 +np.float64,0xbfe2e8bc84e5d179,0x40019fb5874f4310,2 +np.float64,0xbfd7017413ae02e8,0x3fff040d843c1531,2 +np.float64,0x3fefd362fa7fa6c6,0x3fbababc3ddbb21d,2 +np.float64,0x3fecb62ed3f96c5e,0x3fdd44ba77ccff94,2 +np.float64,0xbfb16fad5222df58,0x3ffa392d7f02b522,2 +np.float64,0x3fbcf4abc639e950,0x3ff751b23c40e27f,2 +np.float64,0x3fe128adbce2515c,0x3ff013dc91db04b5,2 +np.float64,0x3fa5dd9d842bbb40,0x3ff87300c88d512f,2 +np.float64,0xbfe61efcaf6c3dfa,0x4002ac27117f87c9,2 +np.float64,0x3feffe1233fffc24,0x3f9638d3796a4954,2 +np.float64,0xbfe78548b66f0a92,0x40032c0447b7bfe2,2 +np.float64,0x3fe7bd38416f7a70,0x3fe784e86d6546b6,2 +np.float64,0x3fe0d6bc5961ad78,0x3ff0443899e747ac,2 +np.float64,0xbfd0bb6e47a176dc,0x3ffd5d6dff390d41,2 +np.float64,0xbfec1d16b8f83a2e,0x40052620378d3b78,2 +np.float64,0x3fe9bbec20f377d8,0x3fe45e167c7a3871,2 +np.float64,0xbfeed81d9dfdb03b,0x4006f9dec2db7310,2 +np.float64,0xbfe1e35179e3c6a3,0x40014fd1b1186ac0,2 +np.float64,0xbfc9c7e605338fcc,0x3ffc60a6bd1a7126,2 +np.float64,0x3feec92810fd9250,0x3fd1afde414ab338,2 +np.float64,0xbfeb9f1d90773e3b,0x4004e606b773f5b0,2 +np.float64,0x3fcbabdf6b3757c0,0x3ff5a573866404af,2 +np.float64,0x3fe9f4e1fff3e9c4,0x3fe3fd7b6712dd7b,2 +np.float64,0xbfe6c0175ded802e,0x4002e4a4dc12f3fe,2 +np.float64,0xbfeefc96f37df92e,0x40071d367cd721ff,2 +np.float64,0xbfeaab58dc7556b2,0x400472ce37e31e50,2 +np.float64,0xbfc62668772c4cd0,0x3ffbea5e6c92010a,2 +np.float64,0x3fafe055fc3fc0a0,0x3ff822ce6502519a,2 +np.float64,0x3fd7b648ffaf6c90,0x3ff30f5a42f11418,2 +np.float64,0xbfe934fe827269fd,0x4003d2b9fed9e6ad,2 +np.float64,0xbfe6d691f2edad24,0x4002eca6a4b1797b,2 +np.float64,0x3fc7e62ced2fcc58,0x3ff620b1f44398b7,2 +np.float64,0xbfc89be9f33137d4,0x3ffc3a67a497f59c,2 +np.float64,0xbfe7793d536ef27a,0x40032794bf14dd64,2 +np.float64,0x3fde55a02dbcab40,0x3ff13b5f82d223e4,2 +np.float64,0xbfc8eabd7b31d57c,0x3ffc4472a81cb6d0,2 +np.float64,0x3fddcb5468bb96a8,0x3ff162899c381f2e,2 +np.float64,0xbfec7554d8f8eaaa,0x40055550e18ec463,2 +np.float64,0x3fd0b6e8b6a16dd0,0x3ff4e7b4781a50e3,2 +np.float64,0x3fedaae01b7b55c0,0x3fd8964916cdf53d,2 +np.float64,0x3fe0870f8a610e20,0x3ff072e7db95c2a2,2 +np.float64,0xbfec3e3ce2787c7a,0x4005379d0f6be873,2 +np.float64,0xbfe65502586caa04,0x4002beecff89147f,2 +np.float64,0xbfe0df39a961be74,0x4001025e36d1c061,2 +np.float64,0xbfb5d8edbe2bb1d8,0x3ffa7ff72b7d6a2b,2 +np.float64,0xbfde89574bbd12ae,0x40008ba4cd74544d,2 +np.float64,0xbfe72938f0ee5272,0x40030a5efd1acb6d,2 +np.float64,0xbfcd500d133aa01c,0x3ffcd462f9104689,2 +np.float64,0x3fe0350766606a0e,0x3ff0a2a3664e2c14,2 +np.float64,0xbfc892fb573125f8,0x3ffc3944641cc69d,2 +np.float64,0xbfba7dc7c634fb90,0x3ffaca9a6a0ffe61,2 +np.float64,0xbfeac94478759289,0x40048068a8b83e45,2 +np.float64,0xbfe8f60c1af1ec18,0x4003b961995b6e51,2 +np.float64,0x3fea1c0817743810,0x3fe3ba28c1643cf7,2 +np.float64,0xbfe42a0fefe85420,0x4002052aadd77f01,2 +np.float64,0x3fd2c61c56a58c38,0x3ff45e84cb9a7fa9,2 +np.float64,0xbfd83fb7cdb07f70,0x3fff59ab4790074c,2 +np.float64,0x3fd95e630fb2bcc8,0x3ff29c8bee1335ad,2 +np.float64,0x3feee88f387dd11e,0x3fd0c3ad3ded4094,2 +np.float64,0x3fe061291160c252,0x3ff0890010199bbc,2 +np.float64,0xbfdc7db3b5b8fb68,0x400041dea3759443,2 +np.float64,0x3fee23b320fc4766,0x3fd5ee73d7aa5c56,2 +np.float64,0xbfdc25c590b84b8c,0x4000359cf98a00b4,2 +np.float64,0xbfd63cbfd2ac7980,0x3ffecf7b9cf99b3c,2 +np.float64,0xbfbeb3c29a3d6788,0x3ffb0e66ecc0fc3b,2 +np.float64,0xbfd2f57fd6a5eb00,0x3ffdf1d7c79e1532,2 +np.float64,0xbfab3eda9c367db0,0x3ff9fc0c875f42e9,2 +np.float64,0xbfe12df1c6e25be4,0x4001199c673e698c,2 +np.float64,0x3fef8ab23a7f1564,0x3fc5aff358c59f1c,2 +np.float64,0x3fe562f50feac5ea,0x3fead7bce205f7d9,2 +np.float64,0x3fdc41adbeb8835c,0x3ff1d0f71341b8f2,2 +np.float64,0x3fe2748967e4e912,0x3fee9837f970ff9e,2 +np.float64,0xbfdaa89d57b5513a,0x400000e3889ba4cf,2 +np.float64,0x3fdf2a137dbe5428,0x3ff0fecfbecbbf86,2 +np.float64,0xbfea1fdcd2f43fba,0x4004351974b32163,2 +np.float64,0xbfe34a93a3e69528,0x4001be323946a3e0,2 +np.float64,0x3fe929bacff25376,0x3fe54f47bd7f4cf2,2 +np.float64,0xbfd667fbd6accff8,0x3ffedb04032b3a1a,2 +np.float64,0xbfeb695796f6d2af,0x4004cbb08ec6f525,2 +np.float64,0x3fd204df2ea409c0,0x3ff490f51e6670f5,2 +np.float64,0xbfd89a2757b1344e,0x3fff722127b988c4,2 +np.float64,0xbfd0787187a0f0e4,0x3ffd4c16dbe94f32,2 +np.float64,0x3fd44239bfa88474,0x3ff3fabbfb24b1fa,2 +np.float64,0xbfeb0b3489f61669,0x40049ee33d811d33,2 +np.float64,0x3fdcf04eaab9e09c,0x3ff1a02a29996c4e,2 +np.float64,0x3fd4c51e4fa98a3c,0x3ff3d8302c68fc9a,2 +np.float64,0x3fd1346645a268cc,0x3ff4c72b4970ecaf,2 +np.float64,0x3fd6a89d09ad513c,0x3ff357af6520afac,2 +np.float64,0xbfba0f469a341e90,0x3ffac3a8f41bed23,2 +np.float64,0xbfe13f8ddce27f1c,0x40011ed557719fd6,2 +np.float64,0x3fd43e5e26a87cbc,0x3ff3fbc040fc30dc,2 +np.float64,0x3fe838125a707024,0x3fe6cb5c987248f3,2 +np.float64,0x3fe128c30c625186,0x3ff013cff238dd1b,2 +np.float64,0xbfcd4718833a8e30,0x3ffcd33c96bde6f9,2 +np.float64,0x3fe43fcd08e87f9a,0x3fec573997456ec1,2 +np.float64,0xbfe9a29104734522,0x4003ffd502a1b57f,2 +np.float64,0xbfe4709d7968e13b,0x40021bfc5cd55af4,2 +np.float64,0x3fd21c3925a43874,0x3ff48adf48556cbb,2 +np.float64,0x3fe9a521b2734a44,0x3fe4844fc054e839,2 +np.float64,0xbfdfa6a912bf4d52,0x4000b4730ad8521e,2 +np.float64,0x3fe3740702e6e80e,0x3fed5b106283b6ed,2 +np.float64,0x3fd0a3aa36a14754,0x3ff4ecb02a5e3f49,2 +np.float64,0x3fdcb903d0b97208,0x3ff1afa5d692c5b9,2 +np.float64,0xbfe7d67839efacf0,0x40034a3146abf6f2,2 +np.float64,0x3f9981c6d8330380,0x3ff8bbf1853d7b90,2 +np.float64,0xbfe9d4191673a832,0x400414a9ab453c5d,2 +np.float64,0x3fef0a1e5c7e143c,0x3fcf70b02a54c415,2 +np.float64,0xbfd996dee6b32dbe,0x3fffb6cf707ad8e4,2 +np.float64,0x3fe19bef17e337de,0x3fef9e70d4fcedae,2 +np.float64,0x3fe34a59716694b2,0x3fed8f6d5cfba474,2 +np.float64,0x3fdf27e27cbe4fc4,0x3ff0ff70500e0c7c,2 +np.float64,0xbfe19df87fe33bf1,0x40013afb401de24c,2 +np.float64,0xbfbdfd97ba3bfb30,0x3ffb02ef8c225e57,2 +np.float64,0xbfe3d3417267a683,0x4001e95ed240b0f8,2 +np.float64,0x3fe566498b6acc94,0x3fead342957d4910,2 +np.float64,0x3ff0000000000000,0x0,2 +np.float64,0x3feb329bd8766538,0x3fe1c2225aafe3b4,2 +np.float64,0xbfc19ca703233950,0x3ffb575b5df057b9,2 +np.float64,0x3fe755027d6eaa04,0x3fe81eb99c262e00,2 +np.float64,0xbfe6c2b8306d8570,0x4002e594199f9eec,2 +np.float64,0x3fd69438e6ad2870,0x3ff35d2275ae891d,2 +np.float64,0x3fda3e7285b47ce4,0x3ff25f5573dd47ae,2 +np.float64,0x3fe7928a166f2514,0x3fe7c4490ef4b9a9,2 +np.float64,0xbfd4eb71b9a9d6e4,0x3ffe75e8ccb74be1,2 +np.float64,0xbfcc3a07f1387410,0x3ffcb0b8af914a5b,2 +np.float64,0xbfe6e80225edd004,0x4002f2e26eae8999,2 +np.float64,0xbfb347728a268ee8,0x3ffa56bd526a12db,2 +np.float64,0x3fe5140ead6a281e,0x3feb4132c9140a1c,2 +np.float64,0xbfc147f125228fe4,0x3ffb4cab18b9050f,2 +np.float64,0xbfcb9145b537228c,0x3ffc9b1b6227a8c9,2 +np.float64,0xbfda84ef4bb509de,0x3ffff7f8a674e17d,2 +np.float64,0x3fd2eb6bbfa5d6d8,0x3ff454c225529d7e,2 +np.float64,0x3fe18c95f1e3192c,0x3fefb0cf0efba75a,2 +np.float64,0x3fe78606efef0c0e,0x3fe7d6c3a092d64c,2 +np.float64,0x3fbad5119a35aa20,0x3ff773dffe3ce660,2 +np.float64,0x3fd0cf5903a19eb4,0x3ff4e15fd21fdb42,2 +np.float64,0xbfd85ce90bb0b9d2,0x3fff618ee848e974,2 +np.float64,0x3fe90e11b9f21c24,0x3fe57be62f606f4a,2 +np.float64,0x3fd7a2040faf4408,0x3ff314ce85457ec2,2 +np.float64,0xbfd73fba69ae7f74,0x3fff14bff3504811,2 +np.float64,0x3fa04b4bd42096a0,0x3ff89f9b52f521a2,2 +np.float64,0xbfd7219ce5ae433a,0x3fff0cac0b45cc18,2 +np.float64,0xbfe0cf4661e19e8d,0x4000fdadb14e3c22,2 +np.float64,0x3fd07469fea0e8d4,0x3ff4f8eaa9b2394a,2 +np.float64,0x3f9b05c5d8360b80,0x3ff8b5e10672db5c,2 +np.float64,0x3fe4c25b916984b8,0x3febad29bd0e25e2,2 +np.float64,0xbfde8b4891bd1692,0x40008beb88d5c409,2 +np.float64,0xbfe199a7efe33350,0x400139b089aee21c,2 +np.float64,0x3fecdad25cf9b5a4,0x3fdc9d062867e8c3,2 +np.float64,0xbfe979b277f2f365,0x4003eedb061e25a4,2 +np.float64,0x3fc8c7311f318e60,0x3ff6040b9aeaad9d,2 +np.float64,0x3fd2b605b8a56c0c,0x3ff462b9a955c224,2 +np.float64,0x3fc073b6ad20e770,0x3ff7120e9f2fd63c,2 +np.float64,0xbfec60ede678c1dc,0x40054a3863e24dc2,2 +np.float64,0x3fe225171be44a2e,0x3feef910dca420ea,2 +np.float64,0xbfd7529762aea52e,0x3fff19d00661f650,2 +np.float64,0xbfd781783daf02f0,0x3fff2667b90be461,2 +np.float64,0x3fe3f6ec6d67edd8,0x3fecb4e814a2e33a,2 +np.float64,0x3fece6702df9cce0,0x3fdc6719d92a50d2,2 +np.float64,0xbfb5c602ce2b8c08,0x3ffa7ec761ba856a,2 +np.float64,0xbfd61f0153ac3e02,0x3ffec78e3b1a6c4d,2 +np.float64,0xbfec3462b2f868c5,0x400532630bbd7050,2 +np.float64,0xbfdd248485ba490a,0x400059391c07c1bb,2 +np.float64,0xbfd424921fa84924,0x3ffe416a85d1dcdf,2 +np.float64,0x3fbb23a932364750,0x3ff76eef79209f7f,2 +np.float64,0x3fca248b0f344918,0x3ff5d77c5c1b4e5e,2 +np.float64,0xbfe69af4a4ed35ea,0x4002d77c2e4fbd4e,2 +np.float64,0x3fdafe3cdcb5fc78,0x3ff22a9be6efbbf2,2 +np.float64,0xbfebba3377f77467,0x4004f3836e1fe71a,2 +np.float64,0xbfe650fae06ca1f6,0x4002bd851406377c,2 +np.float64,0x3fda630007b4c600,0x3ff2554f1832bd94,2 +np.float64,0xbfda8107d9b50210,0x3ffff6e6209659f3,2 +np.float64,0x3fea759a02f4eb34,0x3fe31d1a632c9aae,2 +np.float64,0x3fbf88149e3f1030,0x3ff728313aa12ccb,2 +np.float64,0x3f7196d2a0232e00,0x3ff910647e1914c1,2 +np.float64,0x3feeae51d17d5ca4,0x3fd2709698d31f6f,2 +np.float64,0xbfd73cd663ae79ac,0x3fff13f96300b55a,2 +np.float64,0x3fd4fc5f06a9f8c0,0x3ff3c99359854b97,2 +np.float64,0x3fb29f5d6e253ec0,0x3ff7f7c20e396b20,2 +np.float64,0xbfd757c82aaeaf90,0x3fff1b34c6141e98,2 +np.float64,0x3fc56fd4cf2adfa8,0x3ff670c145122909,2 +np.float64,0x3fc609a2f52c1348,0x3ff65d3ef3cade2c,2 +np.float64,0xbfe1de631163bcc6,0x40014e5528fadb73,2 +np.float64,0xbfe7eb4a726fd695,0x40035202f49d95c4,2 +np.float64,0xbfc9223771324470,0x3ffc4b84d5e263b9,2 +np.float64,0x3fee91a8a87d2352,0x3fd3364befde8de6,2 +np.float64,0x3fbc9784fe392f10,0x3ff7578e29f6a1b2,2 +np.float64,0xbfec627c2c78c4f8,0x40054b0ff2cb9c55,2 +np.float64,0xbfb8b406a6316810,0x3ffaadd97062fb8c,2 +np.float64,0xbfecf98384f9f307,0x4005a043d9110d79,2 +np.float64,0xbfe5834bab6b0698,0x400276f114aebee4,2 +np.float64,0xbfd90f391eb21e72,0x3fff91e26a8f48f3,2 +np.float64,0xbfee288ce2fc511a,0x400667cb09aa04b3,2 +np.float64,0x3fd5aa5e32ab54bc,0x3ff39b7080a52214,2 +np.float64,0xbfee7ef907fcfdf2,0x4006ab96a8eba4c5,2 +np.float64,0x3fd6097973ac12f4,0x3ff3822486978bd1,2 +np.float64,0xbfe02d14b8e05a2a,0x4000ce5be53047b1,2 +np.float64,0xbf9c629a6838c540,0x3ff993897728c3f9,2 +np.float64,0xbfee2024667c4049,0x40066188782fb1f0,2 +np.float64,0xbfa42a88fc285510,0x3ff9c35a4bbce104,2 +np.float64,0x3fa407af5c280f60,0x3ff881b360d8eea1,2 +np.float64,0x3fed0ba42cfa1748,0x3fdbb7d55609175f,2 +np.float64,0xbfdd0b5844ba16b0,0x400055b0bb59ebb2,2 +np.float64,0x3fd88d97e6b11b30,0x3ff2d53c1ecb8f8c,2 +np.float64,0xbfeb7a915ef6f523,0x4004d410812eb84c,2 +np.float64,0xbfb5f979ca2bf2f0,0x3ffa8201d73cd4ca,2 +np.float64,0x3fb3b65dd6276cc0,0x3ff7e64576199505,2 +np.float64,0x3fcd47a7793a8f50,0x3ff570a7b672f160,2 +np.float64,0xbfa41dd30c283ba0,0x3ff9c2f488127eb3,2 +np.float64,0x3fe4b1ea1f6963d4,0x3febc2bed7760427,2 +np.float64,0xbfdd0f81d2ba1f04,0x400056463724b768,2 +np.float64,0x3fd15d93f7a2bb28,0x3ff4bc7a24eacfd7,2 +np.float64,0xbfe3213af8e64276,0x4001b14579dfded3,2 +np.float64,0x3fd90dfbeab21bf8,0x3ff2b26a6c2c3bb3,2 +np.float64,0xbfd02d54bca05aaa,0x3ffd38ab3886b203,2 +np.float64,0x3fc218dcad2431b8,0x3ff6dced56d5b417,2 +np.float64,0x3fea5edf71f4bdbe,0x3fe3455ee09f27e6,2 +np.float64,0x3fa74319042e8640,0x3ff867d224545438,2 +np.float64,0x3fd970ad92b2e15c,0x3ff2979084815dc1,2 +np.float64,0x3fce0a4bf73c1498,0x3ff557a4df32df3e,2 +np.float64,0x3fef5c8e10feb91c,0x3fc99ca0eeaaebe4,2 +np.float64,0xbfedae997ffb5d33,0x400611af18f407ab,2 +np.float64,0xbfbcf07d6239e0f8,0x3ffaf201177a2d36,2 +np.float64,0xbfc3c52541278a4c,0x3ffb9d2af0408e4a,2 +np.float64,0x3fe4ef44e4e9de8a,0x3feb71f7331255e5,2 +np.float64,0xbfccd9f5f539b3ec,0x3ffcc53a99339592,2 +np.float64,0xbfda32c745b4658e,0x3fffe16e8727ef89,2 +np.float64,0xbfef54932a7ea926,0x40077e4605e61ca1,2 +np.float64,0x3fe9d4ae3573a95c,0x3fe4344a069a3fd0,2 +np.float64,0x3fda567e73b4acfc,0x3ff258bd77a663c7,2 +np.float64,0xbfd5bcac5eab7958,0x3ffead6379c19c52,2 +np.float64,0xbfee5e56f97cbcae,0x40069131fc54018d,2 +np.float64,0x3fc2d4413925a880,0x3ff6c54163816298,2 +np.float64,0xbfe9ddf6e873bbee,0x400418d8c722f7c5,2 +np.float64,0x3fdaf2a683b5e54c,0x3ff22dcda599d69c,2 +np.float64,0xbfca69789f34d2f0,0x3ffc7547ff10b1a6,2 +np.float64,0x3fed076f62fa0ede,0x3fdbcbda03c1d72a,2 +np.float64,0xbfcb38326f367064,0x3ffc8fb55dadeae5,2 +np.float64,0x3fe1938705e3270e,0x3fefa88130c5adda,2 +np.float64,0x3feaffae3b75ff5c,0x3fe221e3da537c7e,2 +np.float64,0x3fefc94acb7f9296,0x3fbd9a360ace67b4,2 +np.float64,0xbfe8bddeb0f17bbe,0x4003a316685c767e,2 +np.float64,0x3fbe10fbee3c21f0,0x3ff73fceb10650f5,2 +np.float64,0x3fde9126c1bd224c,0x3ff12a742f734d0a,2 +np.float64,0xbfe9686c91f2d0d9,0x4003e7bc6ee77906,2 +np.float64,0xbfb1ba4892237490,0x3ffa3dda064c2509,2 +np.float64,0xbfe2879100e50f22,0x400181c1a5b16f0f,2 +np.float64,0x3fd1cd40b6a39a80,0x3ff49f70e3064e95,2 +np.float64,0xbfc965869132cb0c,0x3ffc5419f3b43701,2 +np.float64,0x3fea7a6f2874f4de,0x3fe31480fb2dd862,2 +np.float64,0x3fc3bc56892778b0,0x3ff6a7e8fa0e8b0e,2 +np.float64,0x3fec1ed451f83da8,0x3fdfd78e564b8ad7,2 +np.float64,0x3feb77d16df6efa2,0x3fe13d083344e45e,2 +np.float64,0xbfe822e7c67045d0,0x400367104a830cf6,2 +np.float64,0x8000000000000001,0x3ff921fb54442d18,2 +np.float64,0xbfd4900918a92012,0x3ffe5dc0e19737b4,2 +np.float64,0x3fed184187fa3084,0x3fdb7b7a39f234f4,2 +np.float64,0x3fecef846179df08,0x3fdc3cb2228c3682,2 +np.float64,0xbfe2d2aed165a55e,0x400198e21c5b861b,2 +np.float64,0x7ff0000000000000,0x7ff8000000000000,2 +np.float64,0xbfee9409a07d2813,0x4006bd358232d073,2 +np.float64,0xbfecedc2baf9db86,0x4005995df566fc21,2 +np.float64,0x3fe6d857396db0ae,0x3fe8d2cb8794aa99,2 +np.float64,0xbf9a579e7834af40,0x3ff98b5cc8021e1c,2 +np.float64,0x3fc664fefb2cca00,0x3ff651a664ccf8fa,2 +np.float64,0xbfe8a7aa0e714f54,0x40039a5b4df938a0,2 +np.float64,0xbfdf27d380be4fa8,0x4000a241074dbae6,2 +np.float64,0x3fe00ddf55e01bbe,0x3ff0b94eb1ea1851,2 +np.float64,0x3feb47edbff68fdc,0x3fe199822d075959,2 +np.float64,0x3fb4993822293270,0x3ff7d80c838186d0,2 +np.float64,0xbfca2cd1473459a4,0x3ffc6d88c8de3d0d,2 +np.float64,0xbfea7d9c7674fb39,0x40045e4559e9e52d,2 +np.float64,0x3fe0dce425e1b9c8,0x3ff04099cab23289,2 +np.float64,0x3fd6bb7e97ad76fc,0x3ff352a30434499c,2 +np.float64,0x3fd4a4f16da949e4,0x3ff3e0b07432c9aa,2 +np.float64,0x8000000000000000,0x3ff921fb54442d18,2 +np.float64,0x3fe688f5b56d11ec,0x3fe9435f63264375,2 +np.float64,0xbfdf5a427ebeb484,0x4000a97a6c5d4abc,2 +np.float64,0xbfd1f3483fa3e690,0x3ffdae6c8a299383,2 +np.float64,0xbfeac920db759242,0x4004805862be51ec,2 +np.float64,0x3fef5bc711feb78e,0x3fc9ac40fba5b93b,2 +np.float64,0x3fe4bd9e12e97b3c,0x3febb363c787d381,2 +np.float64,0x3fef6a59ab7ed4b4,0x3fc880f1324eafce,2 +np.float64,0x3fc07a362120f470,0x3ff7113cf2c672b3,2 +np.float64,0xbfe4d6dbe2e9adb8,0x40023d6f6bea44b7,2 +np.float64,0xbfec2d6a15785ad4,0x40052eb425cc37a2,2 +np.float64,0x3fc90dae05321b60,0x3ff5fb10015d2934,2 +np.float64,0xbfa9239f74324740,0x3ff9eb2d057068ea,2 +np.float64,0xbfeb4fc8baf69f92,0x4004bf5e17fb08a4,2 +np.float64,0x0,0x3ff921fb54442d18,2 +np.float64,0x3faaf1884c35e320,0x3ff84a5591dbe1f3,2 +np.float64,0xbfed842561fb084b,0x4005f5c0a19116ce,2 +np.float64,0xbfc64850c32c90a0,0x3ffbeeac2ee70f9a,2 +np.float64,0x3fd7d879f5afb0f4,0x3ff306254c453436,2 +np.float64,0xbfdabaa586b5754c,0x4000035e6ac83a2b,2 +np.float64,0xbfebfeefa977fddf,0x4005167446fb9faf,2 +np.float64,0xbfe9383462727069,0x4003d407aa6a1577,2 +np.float64,0x3fe108dfb6e211c0,0x3ff026ac924b281d,2 +np.float64,0xbf85096df02a12c0,0x3ff94c0e60a22ede,2 +np.float64,0xbfe3121cd566243a,0x4001ac8f90db5882,2 +np.float64,0xbfd227f62aa44fec,0x3ffdbc26bb175dcc,2 +np.float64,0x3fd931af2cb26360,0x3ff2a8b62dfe003c,2 +np.float64,0xbfd9b794e3b36f2a,0x3fffbfbc89ec013d,2 +np.float64,0x3fc89b2e6f313660,0x3ff609a6e67f15f2,2 +np.float64,0x3fc0b14a8f216298,0x3ff70a4b6905aad2,2 +np.float64,0xbfeda11a657b4235,0x400608b3f9fff574,2 +np.float64,0xbfed2ee9ec7a5dd4,0x4005c040b7c02390,2 +np.float64,0xbfef7819d8fef034,0x4007ac6bf75cf09d,2 +np.float64,0xbfcc4720fb388e40,0x3ffcb2666a00b336,2 +np.float64,0xbfe05dec4be0bbd8,0x4000dc8a25ca3760,2 +np.float64,0x3fb093416e212680,0x3ff81897b6d8b374,2 +np.float64,0xbfc6ab89332d5714,0x3ffbfb4559d143e7,2 +np.float64,0x3fc51948512a3290,0x3ff67bb9df662c0a,2 +np.float64,0x3fed4d94177a9b28,0x3fda76c92f0c0132,2 +np.float64,0x3fdd195fbeba32c0,0x3ff194a5586dd18e,2 +np.float64,0x3fe3f82799e7f050,0x3fecb354c2faf55c,2 +np.float64,0x3fecac2169f95842,0x3fdd7222296cb7a7,2 +np.float64,0x3fe3d3f36fe7a7e6,0x3fece18f45e30dd7,2 +np.float64,0x3fe31ff63d663fec,0x3fedc46c77d30c6a,2 +np.float64,0xbfe3120c83e62419,0x4001ac8a7c4aa742,2 +np.float64,0x3fe7c1a7976f8350,0x3fe77e4a9307c9f8,2 +np.float64,0x3fe226fe9de44dfe,0x3feef6c0f3cb00fa,2 +np.float64,0x3fd5c933baab9268,0x3ff3933e8a37de42,2 +np.float64,0x3feaa98496f5530a,0x3fe2c003832ebf21,2 +np.float64,0xbfc6f80a2f2df014,0x3ffc04fd54cb1317,2 +np.float64,0x3fde5e18d0bcbc30,0x3ff138f7b32a2ca3,2 +np.float64,0xbfe30c8dd566191c,0x4001aad4af935a78,2 +np.float64,0x3fbe8d196e3d1a30,0x3ff737fec8149ecc,2 +np.float64,0x3feaee6731f5dcce,0x3fe241fa42cce22d,2 +np.float64,0x3fef9cc46cff3988,0x3fc3f17b708dbdbb,2 +np.float64,0xbfdb181bdeb63038,0x4000103ecf405602,2 +np.float64,0xbfc58de0ed2b1bc0,0x3ffbd704c14e15cd,2 +np.float64,0xbfee05d5507c0bab,0x40064e480faba6d8,2 +np.float64,0x3fe27d0ffa64fa20,0x3fee8dc71ef79f2c,2 +np.float64,0xbfe4f7ad4c69ef5a,0x400248456cd09a07,2 +np.float64,0xbfe4843e91e9087d,0x4002225f3e139c84,2 +np.float64,0x3fe7158b9c6e2b18,0x3fe87ae845c5ba96,2 +np.float64,0xbfea64316074c863,0x400452fd2bc23a44,2 +np.float64,0xbfc9f3ae4133e75c,0x3ffc663d482afa42,2 +np.float64,0xbfd5e18513abc30a,0x3ffeb72fc76d7071,2 +np.float64,0xbfd52f6438aa5ec8,0x3ffe87e5b18041e5,2 +np.float64,0xbfea970650f52e0d,0x400469a4a6758154,2 +np.float64,0xbfe44321b7e88644,0x40020d404a2141b1,2 +np.float64,0x3fdf5a39bbbeb474,0x3ff0f10453059dbd,2 +np.float64,0xbfa1d4069423a810,0x3ff9b0a2eacd2ce2,2 +np.float64,0xbfc36d16a326da2c,0x3ffb92077d41d26a,2 +np.float64,0x1,0x3ff921fb54442d18,2 +np.float64,0x3feb232a79764654,0x3fe1df5beeb249d0,2 +np.float64,0xbfed2003d5fa4008,0x4005b737c2727583,2 +np.float64,0x3fd5b093a3ab6128,0x3ff399ca2db1d96d,2 +np.float64,0x3fca692c3d34d258,0x3ff5ceb86b79223e,2 +np.float64,0x3fd6bbdf89ad77c0,0x3ff3528916df652d,2 +np.float64,0xbfefdadd46ffb5bb,0x40085ee735e19f19,2 +np.float64,0x3feb69fb2676d3f6,0x3fe157ee0c15691e,2 +np.float64,0x3fe44c931f689926,0x3fec46b6f5e3f265,2 +np.float64,0xbfc43ddbcb287bb8,0x3ffbac71d268d74d,2 +np.float64,0x3fe6e16d43edc2da,0x3fe8c5cf0f0daa66,2 +np.float64,0x3fe489efc76913e0,0x3febf704ca1ac2a6,2 +np.float64,0xbfe590aadceb2156,0x40027b764205cf78,2 +np.float64,0xbf782e8aa0305d00,0x3ff93a29e81928ab,2 +np.float64,0x3fedcb80cffb9702,0x3fd7e5d1f98a418b,2 +np.float64,0x3fe075858060eb0c,0x3ff07d23ab46b60f,2 +np.float64,0x3fe62a68296c54d0,0x3fe9c77f7068043b,2 +np.float64,0x3feff16a3c7fe2d4,0x3fae8e8a739cc67a,2 +np.float64,0xbfd6ed93e3addb28,0x3ffefebab206fa99,2 +np.float64,0x3fe40d8ccf681b1a,0x3fec97e9cd29966d,2 +np.float64,0x3fd6408210ac8104,0x3ff3737a7d374107,2 +np.float64,0x3fec8023b8f90048,0x3fde35ebfb2b3afd,2 +np.float64,0xbfe13babd4627758,0x40011dae5c07c56b,2 +np.float64,0xbfd2183e61a4307c,0x3ffdb80dd747cfbe,2 +np.float64,0x3feae8eb1d75d1d6,0x3fe24c1f6e42ae77,2 +np.float64,0xbfea559b9c74ab37,0x40044c8e5e123b20,2 +np.float64,0xbfd12c9d57a2593a,0x3ffd7ac6222f561c,2 +np.float64,0x3fe32eb697e65d6e,0x3fedb202693875b6,2 +np.float64,0xbfde0808c3bc1012,0x4000794bd8616ea3,2 +np.float64,0x3fe14958a06292b2,0x3ff0007b40ac648a,2 +np.float64,0x3fe3d388a6e7a712,0x3fece21751a6dd7c,2 +np.float64,0x3fe7ad7897ef5af2,0x3fe79c5b3da302a7,2 +np.float64,0x3fec75527e78eaa4,0x3fde655de0cf0508,2 +np.float64,0x3fea920d4c75241a,0x3fe2ea48f031d908,2 +np.float64,0x7fefffffffffffff,0x7ff8000000000000,2 +np.float64,0xbfc17a68cb22f4d0,0x3ffb530925f41aa0,2 +np.float64,0xbfe1c93166e39263,0x400147f3cb435dec,2 +np.float64,0x3feb97c402f72f88,0x3fe0fe5b561bf869,2 +np.float64,0x3fb58ff5162b1ff0,0x3ff7c8933fa969dc,2 +np.float64,0x3fe68e2beded1c58,0x3fe93c075283703b,2 +np.float64,0xbf94564cc828aca0,0x3ff97355e5ee35db,2 +np.float64,0x3fd31061c9a620c4,0x3ff44b150ec96998,2 +np.float64,0xbfc7d0c89f2fa190,0x3ffc208bf4eddc4d,2 +np.float64,0x3fe5736f1d6ae6de,0x3feac18f84992d1e,2 +np.float64,0x3fdb62e480b6c5c8,0x3ff20ecfdc4afe7c,2 +np.float64,0xbfc417228b282e44,0x3ffba78afea35979,2 +np.float64,0x3f8f5ba1303eb780,0x3ff8e343714630ff,2 +np.float64,0x3fe8e99126f1d322,0x3fe5b6511d4c0798,2 +np.float64,0xbfe2ec08a1e5d812,0x4001a0bb28a85875,2 +np.float64,0x3fea3b46cf74768e,0x3fe383dceaa74296,2 +np.float64,0xbfe008b5ed60116c,0x4000c3d62c275d40,2 +np.float64,0xbfcd9f8a4b3b3f14,0x3ffcde98d6484202,2 +np.float64,0xbfdb5fb112b6bf62,0x40001a22137ef1c9,2 +np.float64,0xbfe9079565f20f2b,0x4003c0670c92e401,2 +np.float64,0xbfce250dc53c4a1c,0x3ffcefc2b3dc3332,2 +np.float64,0x3fe9ba85d373750c,0x3fe4607131b28773,2 +np.float64,0x10000000000000,0x3ff921fb54442d18,2 +np.float64,0xbfeb9ef42c773de8,0x4004e5f239203ad8,2 +np.float64,0xbfd6bf457dad7e8a,0x3ffef2563d87b18d,2 +np.float64,0x3fe4de9aa5e9bd36,0x3feb87f97defb04a,2 +np.float64,0x3fedb4f67cfb69ec,0x3fd8603c465bffac,2 +np.float64,0x3fe7b6d9506f6db2,0x3fe78e670c7bdb67,2 +np.float64,0x3fe071717460e2e2,0x3ff07f84472d9cc5,2 +np.float64,0xbfed2e79dbfa5cf4,0x4005bffc6f9ad24f,2 +np.float64,0x3febb8adc377715c,0x3fe0bcebfbd45900,2 +np.float64,0xbfee2cffd87c5a00,0x40066b20a037c478,2 +np.float64,0x3fef7e358d7efc6c,0x3fc6d0ba71a542a8,2 +np.float64,0xbfef027eef7e04fe,0x400723291cb00a7a,2 +np.float64,0x3fac96da34392dc0,0x3ff83d260a936c6a,2 +np.float64,0x3fe9dba94a73b752,0x3fe428736b94885e,2 +np.float64,0x3fed37581efa6eb0,0x3fdae49dcadf1d90,2 +np.float64,0xbfe6e61037edcc20,0x4002f23031b8d522,2 +np.float64,0xbfdea7204dbd4e40,0x40008fe1f37918b7,2 +np.float64,0x3feb9f8edb773f1e,0x3fe0eef20bd4387b,2 +np.float64,0x3feeb0b6ed7d616e,0x3fd25fb3b7a525d6,2 +np.float64,0xbfd7ce9061af9d20,0x3fff3b25d531aa2b,2 +np.float64,0xbfc806b509300d6c,0x3ffc2768743a8360,2 +np.float64,0xbfa283882c250710,0x3ff9b61fda28914a,2 +np.float64,0x3fdec70050bd8e00,0x3ff11b1d769b578f,2 +np.float64,0xbfc858a44930b148,0x3ffc31d6758b4721,2 +np.float64,0x3fdc321150b86424,0x3ff1d5504c3c91e4,2 +np.float64,0x3fd9416870b282d0,0x3ff2a46f3a850f5b,2 +np.float64,0x3fdd756968baead4,0x3ff17ac510a5573f,2 +np.float64,0xbfedfd632cfbfac6,0x400648345a2f89b0,2 +np.float64,0x3fd6874285ad0e84,0x3ff36098ebff763f,2 +np.float64,0x3fe6daacc9edb55a,0x3fe8cf75fae1e35f,2 +np.float64,0x3fe53f19766a7e32,0x3feb07d0e97cd55b,2 +np.float64,0x3fd13cc36ca27988,0x3ff4c4ff801b1faa,2 +np.float64,0x3fe4f21cbce9e43a,0x3feb6e34a72ef529,2 +np.float64,0xbfc21c1cc9243838,0x3ffb67726394ca89,2 +np.float64,0x3fe947a3f2728f48,0x3fe51eae4660e23c,2 +np.float64,0xbfce78cd653cf19c,0x3ffcfa89194b3f5e,2 +np.float64,0x3fe756f049eeade0,0x3fe81be7f2d399e2,2 +np.float64,0xbfcc727cf138e4f8,0x3ffcb7f547841bb0,2 +np.float64,0xbfc2d8d58f25b1ac,0x3ffb7f496cc72458,2 +np.float64,0xbfcfd0e4653fa1c8,0x3ffd26e1309bc80b,2 +np.float64,0xbfe2126c106424d8,0x40015e0e01db6a4a,2 +np.float64,0x3fe580e4306b01c8,0x3feaaf683ce51aa5,2 +np.float64,0x3fcea8a1b93d5140,0x3ff543456c0d28c7,2 +np.float64,0xfff0000000000000,0x7ff8000000000000,2 +np.float64,0xbfd9d5da72b3abb4,0x3fffc8013113f968,2 +np.float64,0xbfe1fdfcea63fbfa,0x400157def2e4808d,2 +np.float64,0xbfc0022e0720045c,0x3ffb239963e7cbf2,2 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arccosh.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arccosh.csv new file mode 100644 index 0000000..0defe50 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arccosh.csv @@ -0,0 +1,1429 @@ +dtype,input,output,ulperrortol +np.float32,0x3f83203f,0x3e61d9d6,2 +np.float32,0x3f98dea1,0x3f1d1af6,2 +np.float32,0x7fa00000,0x7fe00000,2 +np.float32,0x7eba99af,0x42b0d032,2 +np.float32,0x3fc95a13,0x3f833650,2 +np.float32,0x3fce9a45,0x3f8771e1,2 +np.float32,0x3fc1bd96,0x3f797811,2 +np.float32,0x7eba2391,0x42b0ceed,2 +np.float32,0x7d4e8f15,0x42acdb8c,2 +np.float32,0x3feca42e,0x3f9cc88e,2 +np.float32,0x7e2b314e,0x42af412e,2 +np.float32,0x7f7fffff,0x42b2d4fc,2 +np.float32,0x3f803687,0x3d6c4380,2 +np.float32,0x3fa0edbd,0x3f33e706,2 +np.float32,0x3faa8074,0x3f4b3d3c,2 +np.float32,0x3fa0c49e,0x3f337af3,2 +np.float32,0x3f8c9ec4,0x3ee18812,2 +np.float32,0x7efef78e,0x42b17006,2 +np.float32,0x3fc75720,0x3f818aa4,2 +np.float32,0x7f52d4c8,0x42b27198,2 +np.float32,0x3f88f21e,0x3ebe52b0,2 +np.float32,0x3ff7a042,0x3fa3a07a,2 +np.float32,0x7f52115c,0x42b26fbd,2 +np.float32,0x3fc6bf6f,0x3f810b42,2 +np.float32,0x3fd105d0,0x3f895649,2 +np.float32,0x3fee7c2a,0x3f9df66e,2 +np.float32,0x7f0ff9a5,0x42b1ae4f,2 +np.float32,0x7e81f075,0x42b016e7,2 +np.float32,0x3fa57d65,0x3f3f70c6,2 +np.float32,0x80800000,0xffc00000,2 +np.float32,0x7da239f5,0x42adc2bf,2 +np.float32,0x3f9e432c,0x3f2cbd80,2 +np.float32,0x3ff2839b,0x3fa07ee4,2 +np.float32,0x3fec8aef,0x3f9cb850,2 +np.float32,0x7d325893,0x42ac905b,2 +np.float32,0x3fa27431,0x3f37dade,2 +np.float32,0x3fce7408,0x3f8753ae,2 +np.float32,0x3fde6684,0x3f93353f,2 +np.float32,0x3feb9a3e,0x3f9c1cff,2 +np.float32,0x7deb34bb,0x42ae80f0,2 +np.float32,0x3fed9300,0x3f9d61b7,2 +np.float32,0x7f35e253,0x42b225fb,2 +np.float32,0x7e6db57f,0x42afe93f,2 +np.float32,0x3fa41f08,0x3f3c10bc,2 +np.float32,0x3fb0d4da,0x3f590de3,2 +np.float32,0x3fb5c690,0x3f632351,2 +np.float32,0x3fcde9ce,0x3f86e638,2 +np.float32,0x3f809c7b,0x3dc81161,2 +np.float32,0x3fd77291,0x3f8e3226,2 +np.float32,0x3fc21a06,0x3f7a1a82,2 +np.float32,0x3fba177e,0x3f6b8139,2 +np.float32,0x7f370dff,0x42b22944,2 +np.float32,0x3fe5bfcc,0x3f9841c1,2 +np.float32,0x3feb0caa,0x3f9bc139,2 +np.float32,0x7f4fe5c3,0x42b26a6c,2 +np.float32,0x7f1e1419,0x42b1de28,2 +np.float32,0x7f5e3c96,0x42b28c92,2 +np.float32,0x3f8cd313,0x3ee3521e,2 +np.float32,0x3fa97824,0x3f48e049,2 +np.float32,0x7d8ca281,0x42ad799e,2 +np.float32,0x3f96b51b,0x3f165193,2 +np.float32,0x3f81328a,0x3e0bf504,2 +np.float32,0x3ff60bf3,0x3fa2ab45,2 +np.float32,0x3ff9b629,0x3fa4e107,2 +np.float32,0x3fecacfc,0x3f9cce37,2 +np.float32,0x3fba8804,0x3f6c5600,2 +np.float32,0x3f81f752,0x3e333fdd,2 +np.float32,0x3fb5b262,0x3f62fb46,2 +np.float32,0x3fa21bc0,0x3f36f7e6,2 +np.float32,0x3fbc87bb,0x3f7011dc,2 +np.float32,0x3fe18b32,0x3f9565ae,2 +np.float32,0x7dfb6dd5,0x42aea316,2 +np.float32,0x3fb7c602,0x3f670ee3,2 +np.float32,0x7efeb6a2,0x42b16f84,2 +np.float32,0x3fa56180,0x3f3f2ca4,2 +np.float32,0x3f8dcaff,0x3eeb9ac0,2 +np.float32,0x7e876238,0x42b02beb,2 +np.float32,0x7f0bb67d,0x42b19eec,2 +np.float32,0x3faca01c,0x3f4fffa5,2 +np.float32,0x3fdb57ee,0x3f9108b8,2 +np.float32,0x3fe3bade,0x3f96e4b7,2 +np.float32,0x7f7aa2dd,0x42b2ca25,2 +np.float32,0x3fed92ec,0x3f9d61aa,2 +np.float32,0x7eb789b1,0x42b0c7b9,2 +np.float32,0x7f7f16e4,0x42b2d329,2 +np.float32,0x3fb6647e,0x3f645b84,2 +np.float32,0x3f99335e,0x3f1e1d96,2 +np.float32,0x7e690a11,0x42afdf17,2 +np.float32,0x7dff2f95,0x42aeaaae,2 +np.float32,0x7f70adfd,0x42b2b564,2 +np.float32,0x3fe92252,0x3f9a80fe,2 +np.float32,0x3fef54ce,0x3f9e7fe5,2 +np.float32,0x3ff24eaa,0x3fa05df9,2 +np.float32,0x7f04565a,0x42b18328,2 +np.float32,0x3fcb8b80,0x3f85007f,2 +np.float32,0x3fcd4d0a,0x3f866983,2 +np.float32,0x3fbe7d82,0x3f73a911,2 +np.float32,0x3f8a7a8a,0x3ecdc8f6,2 +np.float32,0x3f912441,0x3f030d56,2 +np.float32,0x3f9b29d6,0x3f23f663,2 +np.float32,0x3fab7f36,0x3f4d7c6c,2 +np.float32,0x7dfedafc,0x42aeaa04,2 +np.float32,0x3fe190c0,0x3f956982,2 +np.float32,0x3f927515,0x3f07e0bb,2 +np.float32,0x3ff6442a,0x3fa2cd7e,2 +np.float32,0x7f6656d0,0x42b29ee8,2 +np.float32,0x3fe29aa0,0x3f96201f,2 +np.float32,0x3fa4a247,0x3f3d5687,2 +np.float32,0x3fa1cf19,0x3f363226,2 +np.float32,0x3fc20037,0x3f79ed36,2 +np.float32,0x7cc1241a,0x42ab5645,2 +np.float32,0x3fafd540,0x3f56f25a,2 +np.float32,0x7e5b3f5f,0x42afbfdb,2 +np.float32,0x7f48de5f,0x42b258d0,2 +np.float32,0x3fce1ca0,0x3f870e85,2 +np.float32,0x7ee40bb2,0x42b136e4,2 +np.float32,0x7ecdb133,0x42b10212,2 +np.float32,0x3f9f181c,0x3f2f02ca,2 +np.float32,0x3f936cbf,0x3f0b4f63,2 +np.float32,0x3fa4f8ea,0x3f3e2c2f,2 +np.float32,0x3fcc03e2,0x3f8561ac,2 +np.float32,0x3fb801f2,0x3f67831b,2 +np.float32,0x7e141dad,0x42aef70c,2 +np.float32,0x3fe8c04e,0x3f9a4087,2 +np.float32,0x3f8548d5,0x3e929f37,2 +np.float32,0x7f148d7d,0x42b1be56,2 +np.float32,0x3fd2c9a2,0x3f8ab1ed,2 +np.float32,0x7eb374fd,0x42b0bc36,2 +np.float32,0x7f296d36,0x42b201a7,2 +np.float32,0x3ff138e2,0x3f9fb09d,2 +np.float32,0x3ff42898,0x3fa18347,2 +np.float32,0x7da8c5e1,0x42add700,2 +np.float32,0x7dcf72c4,0x42ae40a4,2 +np.float32,0x7ea571fc,0x42b09296,2 +np.float32,0x3fc0953d,0x3f776ba3,2 +np.float32,0x7f1773dd,0x42b1c83c,2 +np.float32,0x7ef53b68,0x42b15c17,2 +np.float32,0x3f85d69f,0x3e9a0f3a,2 +np.float32,0x7e8b9a05,0x42b03ba0,2 +np.float32,0x3ff07d20,0x3f9f3ad2,2 +np.float32,0x7e8da32c,0x42b0430a,2 +np.float32,0x7ef96004,0x42b164ab,2 +np.float32,0x3fdfaa62,0x3f941837,2 +np.float32,0x7f0057c5,0x42b17377,2 +np.float32,0x3fb2663f,0x3f5c5065,2 +np.float32,0x3fd3d8c3,0x3f8b8055,2 +np.float32,0x1,0xffc00000,2 +np.float32,0x3fd536c1,0x3f8c8862,2 +np.float32,0x3f91b953,0x3f053619,2 +np.float32,0x3fb3305c,0x3f5deee1,2 +np.float32,0x7ecd86b9,0x42b101a8,2 +np.float32,0x3fbf71c5,0x3f75624d,2 +np.float32,0x3ff5f0f4,0x3fa29ad2,2 +np.float32,0x3fe50389,0x3f97c328,2 +np.float32,0x3fa325a1,0x3f399e69,2 +np.float32,0x3fe4397a,0x3f973a9f,2 +np.float32,0x3f8684c6,0x3ea2b784,2 +np.float32,0x7f25ae00,0x42b1f634,2 +np.float32,0x3ff7cbf7,0x3fa3badb,2 +np.float32,0x7f73f0e0,0x42b2bc48,2 +np.float32,0x3fc88b70,0x3f828b92,2 +np.float32,0x3fb01c16,0x3f578886,2 +np.float32,0x7e557623,0x42afb229,2 +np.float32,0x3fcbcd5b,0x3f8535b4,2 +np.float32,0x7f7157e4,0x42b2b6cd,2 +np.float32,0x7f51d9d4,0x42b26f36,2 +np.float32,0x7f331a3b,0x42b21e17,2 +np.float32,0x7f777fb5,0x42b2c3b2,2 +np.float32,0x3f832001,0x3e61d11f,2 +np.float32,0x7f2cd055,0x42b20bca,2 +np.float32,0x3f89831f,0x3ec42f76,2 +np.float32,0x7f21da33,0x42b1ea3d,2 +np.float32,0x3f99e416,0x3f20330a,2 +np.float32,0x7f2c8ea1,0x42b20b07,2 +np.float32,0x7f462c98,0x42b251e6,2 +np.float32,0x7f4fdb3f,0x42b26a52,2 +np.float32,0x3fcc1338,0x3f856e07,2 +np.float32,0x3f823673,0x3e3e20da,2 +np.float32,0x7dbfe89d,0x42ae18c6,2 +np.float32,0x3fc9b04c,0x3f837d38,2 +np.float32,0x7dba3213,0x42ae094d,2 +np.float32,0x7ec5a483,0x42b0eda1,2 +np.float32,0x3fbc4d14,0x3f6fa543,2 +np.float32,0x3fc85ce2,0x3f8264f1,2 +np.float32,0x7f77c816,0x42b2c447,2 +np.float32,0x3f9c9281,0x3f280492,2 +np.float32,0x7f49b3e2,0x42b25aef,2 +np.float32,0x3fa7e4da,0x3f45347c,2 +np.float32,0x7e0c9df5,0x42aedc72,2 +np.float32,0x7f21fd1a,0x42b1eaab,2 +np.float32,0x7f7c63ad,0x42b2cdb6,2 +np.float32,0x7f4eb80a,0x42b26783,2 +np.float32,0x7e98038c,0x42b0673c,2 +np.float32,0x7e89ba08,0x42b034b4,2 +np.float32,0x3ffc06ba,0x3fa64094,2 +np.float32,0x3fae63f6,0x3f53db36,2 +np.float32,0x3fbc2d30,0x3f6f6a1c,2 +np.float32,0x7de0e5e5,0x42ae69fe,2 +np.float32,0x7e09ed18,0x42aed28d,2 +np.float32,0x3fea78f8,0x3f9b6129,2 +np.float32,0x7dfe0bcc,0x42aea863,2 +np.float32,0x7ee21d03,0x42b13289,2 +np.float32,0x3fcc3aed,0x3f858dfc,2 +np.float32,0x3fe6b3ba,0x3f98e4ea,2 +np.float32,0x3f90f25f,0x3f025225,2 +np.float32,0x7f1bcaf4,0x42b1d6b3,2 +np.float32,0x3f83ac81,0x3e74c20e,2 +np.float32,0x3f98681d,0x3f1bae16,2 +np.float32,0x3fe1f2d9,0x3f95ad08,2 +np.float32,0x3fa279d7,0x3f37e951,2 +np.float32,0x3feb922a,0x3f9c17c4,2 +np.float32,0x7f1c72e8,0x42b1d8da,2 +np.float32,0x3fea156b,0x3f9b2038,2 +np.float32,0x3fed6bda,0x3f9d48aa,2 +np.float32,0x3fa86142,0x3f46589c,2 +np.float32,0x3ff16bc2,0x3f9fd072,2 +np.float32,0x3fbebf65,0x3f74207b,2 +np.float32,0x7e7b78b5,0x42b00610,2 +np.float32,0x3ff51ab8,0x3fa217f0,2 +np.float32,0x3f8361bb,0x3e6adf07,2 +np.float32,0x7edbceed,0x42b1240e,2 +np.float32,0x7f10e2c0,0x42b1b18a,2 +np.float32,0x3fa7bc58,0x3f44d4ef,2 +np.float32,0x3f813bde,0x3e0e1138,2 +np.float32,0x7f30d5b9,0x42b21791,2 +np.float32,0x3fb4f450,0x3f61806a,2 +np.float32,0x7eee02c4,0x42b14cca,2 +np.float32,0x7ec74b62,0x42b0f1e4,2 +np.float32,0x3ff96bca,0x3fa4b498,2 +np.float32,0x7f50e304,0x42b26cda,2 +np.float32,0x7eb14c57,0x42b0b603,2 +np.float32,0x7c3f0733,0x42a9edbf,2 +np.float32,0x7ea57acb,0x42b092b1,2 +np.float32,0x7f2788dc,0x42b1fbe7,2 +np.float32,0x3fa39f14,0x3f3ad09b,2 +np.float32,0x3fc3a7e0,0x3f7ccfa0,2 +np.float32,0x3fe70a73,0x3f991eb0,2 +np.float32,0x7f4831f7,0x42b25718,2 +np.float32,0x3fe947d0,0x3f9a999c,2 +np.float32,0x7ef2b1c7,0x42b156c4,2 +np.float32,0x3fede0ea,0x3f9d937f,2 +np.float32,0x3f9fef8e,0x3f314637,2 +np.float32,0x3fc313c5,0x3f7bcebd,2 +np.float32,0x7ee99337,0x42b14328,2 +np.float32,0x7eb9042e,0x42b0cbd5,2 +np.float32,0x3fc9d3dc,0x3f839a69,2 +np.float32,0x3fb2c018,0x3f5d091d,2 +np.float32,0x3fcc4e8f,0x3f859dc5,2 +np.float32,0x3fa9363b,0x3f484819,2 +np.float32,0x7f72ce2e,0x42b2b9e4,2 +np.float32,0x7e639326,0x42afd2f1,2 +np.float32,0x7f4595d3,0x42b25060,2 +np.float32,0x7f6d0ac4,0x42b2ad97,2 +np.float32,0x7f1bda0d,0x42b1d6e5,2 +np.float32,0x3fd85ffd,0x3f8ee0ed,2 +np.float32,0x3f91d53f,0x3f059c8e,2 +np.float32,0x7d06e103,0x42ac0155,2 +np.float32,0x3fb83126,0x3f67de6e,2 +np.float32,0x7d81ce1f,0x42ad5097,2 +np.float32,0x7f79cb3b,0x42b2c86b,2 +np.float32,0x7f800000,0x7f800000,2 +np.float32,0x3fdbfffd,0x3f918137,2 +np.float32,0x7f4ecb1c,0x42b267b2,2 +np.float32,0x3fc2c122,0x3f7b3ed3,2 +np.float32,0x7f415854,0x42b24544,2 +np.float32,0x7e3d988b,0x42af7575,2 +np.float32,0x3f83ca99,0x3e789fcb,2 +np.float32,0x7f274f70,0x42b1fb38,2 +np.float32,0x7f0d20e6,0x42b1a416,2 +np.float32,0x3fdf3a1d,0x3f93c9c1,2 +np.float32,0x7efaa13e,0x42b1673d,2 +np.float32,0x3fb20b15,0x3f5b9434,2 +np.float32,0x3f86af9f,0x3ea4c664,2 +np.float32,0x3fe4fcb0,0x3f97be8a,2 +np.float32,0x3f920683,0x3f065085,2 +np.float32,0x3fa4b278,0x3f3d7e8b,2 +np.float32,0x3f8077a8,0x3daef77f,2 +np.float32,0x7e865be4,0x42b02807,2 +np.float32,0x3fcea7e2,0x3f877c9f,2 +np.float32,0x7e7e9db1,0x42b00c6d,2 +np.float32,0x3f9819aa,0x3f1aba7e,2 +np.float32,0x7f2b6c4b,0x42b207a7,2 +np.float32,0x7ef85e3e,0x42b16299,2 +np.float32,0x3fbd8290,0x3f71df8b,2 +np.float32,0x3fbbb615,0x3f6e8c8c,2 +np.float32,0x7f1bc7f5,0x42b1d6a9,2 +np.float32,0x3fbb4fea,0x3f6dcdad,2 +np.float32,0x3fb67e09,0x3f648dd1,2 +np.float32,0x3fc83495,0x3f824374,2 +np.float32,0x3fe52980,0x3f97dcbc,2 +np.float32,0x3f87d893,0x3eb25d7c,2 +np.float32,0x3fdb805a,0x3f9125c0,2 +np.float32,0x3fb33f0f,0x3f5e0ce1,2 +np.float32,0x3facc524,0x3f50516b,2 +np.float32,0x3ff40484,0x3fa16d0e,2 +np.float32,0x3ff078bf,0x3f9f3811,2 +np.float32,0x7f736747,0x42b2bb27,2 +np.float32,0x7f55768b,0x42b277f3,2 +np.float32,0x80000001,0xffc00000,2 +np.float32,0x7f6463d1,0x42b29a8e,2 +np.float32,0x3f8f8b59,0x3ef9d792,2 +np.float32,0x3f8a6f4d,0x3ecd5bf4,2 +np.float32,0x3fe958d9,0x3f9aa4ca,2 +np.float32,0x7f1e2ce2,0x42b1de78,2 +np.float32,0x3fb8584a,0x3f682a05,2 +np.float32,0x7dea3dc6,0x42ae7ed5,2 +np.float32,0x7f53a815,0x42b27399,2 +np.float32,0x7e0cf986,0x42aeddbf,2 +np.float32,0x7f3afb71,0x42b23422,2 +np.float32,0x3fd87d6e,0x3f8ef685,2 +np.float32,0x3ffcaa46,0x3fa6a0d7,2 +np.float32,0x7eecd276,0x42b14a3a,2 +np.float32,0x3ffc30b4,0x3fa65951,2 +np.float32,0x7e9c85e2,0x42b07634,2 +np.float32,0x3f95d862,0x3f1383de,2 +np.float32,0x7ef21410,0x42b15577,2 +np.float32,0x3fbfa1b5,0x3f75b86e,2 +np.float32,0x3fd6d90f,0x3f8dc086,2 +np.float32,0x0,0xffc00000,2 +np.float32,0x7e885dcd,0x42b02f9f,2 +np.float32,0x3fb3e057,0x3f5f54bf,2 +np.float32,0x7f40afdd,0x42b24385,2 +np.float32,0x3fb795c2,0x3f66b120,2 +np.float32,0x3fba7c11,0x3f6c3f73,2 +np.float32,0x3ffef620,0x3fa7f828,2 +np.float32,0x7d430508,0x42acbe1e,2 +np.float32,0x3f8d2892,0x3ee6369f,2 +np.float32,0x3fbea139,0x3f73e9d5,2 +np.float32,0x3ffaa928,0x3fa571b9,2 +np.float32,0x7fc00000,0x7fc00000,2 +np.float32,0x7f16f9ce,0x42b1c69f,2 +np.float32,0x3fa8f753,0x3f47b657,2 +np.float32,0x3fd48a63,0x3f8c06ac,2 +np.float32,0x7f13419e,0x42b1b9d9,2 +np.float32,0x3fdf1526,0x3f93afde,2 +np.float32,0x3f903c8b,0x3eff3be8,2 +np.float32,0x7f085323,0x42b1925b,2 +np.float32,0x7cdbe309,0x42ab98ac,2 +np.float32,0x3fba2cfd,0x3f6ba9f1,2 +np.float32,0x7f5a805d,0x42b283e4,2 +np.float32,0x7f6753dd,0x42b2a119,2 +np.float32,0x3fed9f02,0x3f9d6964,2 +np.float32,0x3f96422c,0x3f14ddba,2 +np.float32,0x7f22f2a9,0x42b1edb1,2 +np.float32,0x3fe3fcfd,0x3f97119d,2 +np.float32,0x7e018ad0,0x42aeb271,2 +np.float32,0x7db896f5,0x42ae04de,2 +np.float32,0x7e55c795,0x42afb2ec,2 +np.float32,0x7f58ef8d,0x42b28036,2 +np.float32,0x7f24a16a,0x42b1f2f3,2 +np.float32,0x3fcf714c,0x3f881b09,2 +np.float32,0x3fcdd056,0x3f86d200,2 +np.float32,0x7f02fad0,0x42b17de0,2 +np.float32,0x7eeab877,0x42b145a9,2 +np.float32,0x3fd6029d,0x3f8d20f7,2 +np.float32,0x3fd4f8cd,0x3f8c59d6,2 +np.float32,0x3fb29d4a,0x3f5cc1a5,2 +np.float32,0x3fb11e2d,0x3f59a77a,2 +np.float32,0x7eded576,0x42b12b0e,2 +np.float32,0x7f26c2a5,0x42b1f988,2 +np.float32,0x3fb6165b,0x3f63c151,2 +np.float32,0x7f3bca47,0x42b23657,2 +np.float32,0x7d8c93bf,0x42ad7968,2 +np.float32,0x3f8ede02,0x3ef47176,2 +np.float32,0x3fbef762,0x3f7485b9,2 +np.float32,0x7f1419af,0x42b1bcc6,2 +np.float32,0x7d9e8c79,0x42adb701,2 +np.float32,0x3fa26336,0x3f37af63,2 +np.float32,0x7f5f5590,0x42b28f18,2 +np.float32,0x3fddc93a,0x3f92c651,2 +np.float32,0x3ff0a5fc,0x3f9f547f,2 +np.float32,0x3fb2f6b8,0x3f5d790e,2 +np.float32,0x3ffe59a4,0x3fa79d2c,2 +np.float32,0x7e4df848,0x42af9fde,2 +np.float32,0x3fb0ab3b,0x3f58b678,2 +np.float32,0x7ea54d47,0x42b09225,2 +np.float32,0x3fdd6404,0x3f927eb2,2 +np.float32,0x3f846dc0,0x3e864caa,2 +np.float32,0x7d046aee,0x42abf7e7,2 +np.float32,0x7f7c5a05,0x42b2cda3,2 +np.float32,0x3faf6126,0x3f55fb21,2 +np.float32,0x7f36a910,0x42b22829,2 +np.float32,0x3fdc7b36,0x3f91d938,2 +np.float32,0x3fff443e,0x3fa82577,2 +np.float32,0x7ee7154a,0x42b13daa,2 +np.float32,0x3f944742,0x3f0e435c,2 +np.float32,0x7f5b510a,0x42b285cc,2 +np.float32,0x3f9bc940,0x3f25c4d2,2 +np.float32,0x3fee4782,0x3f9dd4ea,2 +np.float32,0x3fcfc2dd,0x3f885aea,2 +np.float32,0x7eab65cf,0x42b0a4af,2 +np.float32,0x3f9cf908,0x3f292689,2 +np.float32,0x7ed35501,0x42b10feb,2 +np.float32,0x7dabb70a,0x42addfd9,2 +np.float32,0x7f348919,0x42b2222b,2 +np.float32,0x3fb137d4,0x3f59dd17,2 +np.float32,0x7e7b36c9,0x42b0058a,2 +np.float32,0x7e351fa4,0x42af5e0d,2 +np.float32,0x3f973c0c,0x3f18011e,2 +np.float32,0xff800000,0xffc00000,2 +np.float32,0x3f9b0a4b,0x3f239a33,2 +np.float32,0x3f87c4cf,0x3eb17e7e,2 +np.float32,0x7ef67760,0x42b15eaa,2 +np.float32,0x3fc4d2c8,0x3f7ed20f,2 +np.float32,0x7e940dac,0x42b059b8,2 +np.float32,0x7f6e6a52,0x42b2b08d,2 +np.float32,0x3f838752,0x3e6fe4b2,2 +np.float32,0x3fd8f046,0x3f8f4a94,2 +np.float32,0x3fa82112,0x3f45c223,2 +np.float32,0x3fd49b16,0x3f8c1345,2 +np.float32,0x7f02a941,0x42b17ca1,2 +np.float32,0x3f8a9d2c,0x3ecf1768,2 +np.float32,0x7c9372e3,0x42aacc0f,2 +np.float32,0x3fd260b3,0x3f8a619a,2 +np.float32,0x3f8a1b88,0x3eca27cb,2 +np.float32,0x7d25d510,0x42ac6b1c,2 +np.float32,0x7ef5a578,0x42b15cf5,2 +np.float32,0x3fe6625d,0x3f98ae9a,2 +np.float32,0x3ff53240,0x3fa22658,2 +np.float32,0x3f8bb2e6,0x3ed944cf,2 +np.float32,0x7f4679b1,0x42b252ad,2 +np.float32,0x3fa8db30,0x3f4774fc,2 +np.float32,0x7ee5fafd,0x42b13b37,2 +np.float32,0x3fc405e0,0x3f7d71fb,2 +np.float32,0x3f9303cd,0x3f09ddfd,2 +np.float32,0x7f486e67,0x42b257b2,2 +np.float32,0x7e73f12b,0x42aff680,2 +np.float32,0x3fe80f8b,0x3f99cbe4,2 +np.float32,0x3f84200a,0x3e81a3f3,2 +np.float32,0x3fa14e5c,0x3f34e3ce,2 +np.float32,0x3fda22ec,0x3f9029bb,2 +np.float32,0x3f801772,0x3d1aef98,2 +np.float32,0x7eaa1428,0x42b0a0bb,2 +np.float32,0x3feae0b3,0x3f9ba4aa,2 +np.float32,0x7ea439b4,0x42b08ecc,2 +np.float32,0x3fa28b1c,0x3f381579,2 +np.float32,0x7e8af247,0x42b03937,2 +np.float32,0x3fd19216,0x3f89c2b7,2 +np.float32,0x7f6ea033,0x42b2b100,2 +np.float32,0x3fad4fbf,0x3f518224,2 +np.float32,0x3febd940,0x3f9c45bd,2 +np.float32,0x7f4643a3,0x42b25221,2 +np.float32,0x7ec34478,0x42b0e771,2 +np.float32,0x7f18c83b,0x42b1ccb5,2 +np.float32,0x3fc665ad,0x3f80bf94,2 +np.float32,0x3ff0a999,0x3f9f56c4,2 +np.float32,0x3faf1cd2,0x3f5568fe,2 +np.float32,0x7ecd9dc6,0x42b101e1,2 +np.float32,0x3faad282,0x3f4bf754,2 +np.float32,0x3ff905a0,0x3fa47771,2 +np.float32,0x7f596481,0x42b28149,2 +np.float32,0x7f1cb31f,0x42b1d9ac,2 +np.float32,0x7e266719,0x42af32a6,2 +np.float32,0x7eccce06,0x42b0ffdb,2 +np.float32,0x3f9b6f71,0x3f24c102,2 +np.float32,0x3f80e4ba,0x3df1d6bc,2 +np.float32,0x3f843d51,0x3e836a60,2 +np.float32,0x7f70bd88,0x42b2b585,2 +np.float32,0x3fe4cc96,0x3f979e18,2 +np.float32,0x3ff737c7,0x3fa36151,2 +np.float32,0x3ff1197e,0x3f9f9cf4,2 +np.float32,0x7f08e190,0x42b19471,2 +np.float32,0x3ff1542e,0x3f9fc1b2,2 +np.float32,0x3ff6673c,0x3fa2e2d2,2 +np.float32,0xbf800000,0xffc00000,2 +np.float32,0x7e3f9ba7,0x42af7add,2 +np.float32,0x7f658ff6,0x42b29d2d,2 +np.float32,0x3f93441c,0x3f0ac0d9,2 +np.float32,0x7f526a74,0x42b27096,2 +np.float32,0x7f5b00c8,0x42b28511,2 +np.float32,0x3ff212f8,0x3fa038cf,2 +np.float32,0x7e0bd60d,0x42aed998,2 +np.float32,0x7f71ef7f,0x42b2b80e,2 +np.float32,0x7f7a897e,0x42b2c9f1,2 +np.float32,0x7e8b76a6,0x42b03b1e,2 +np.float32,0x7efa0da3,0x42b1660f,2 +np.float32,0x3fce9166,0x3f876ae0,2 +np.float32,0x3fc4163d,0x3f7d8e30,2 +np.float32,0x3fdb3784,0x3f90f16b,2 +np.float32,0x7c5f177b,0x42aa3d30,2 +np.float32,0x3fc6276d,0x3f808af5,2 +np.float32,0x7bac9cc2,0x42a856f4,2 +np.float32,0x3fe5876f,0x3f981bea,2 +np.float32,0x3fef60e3,0x3f9e878a,2 +np.float32,0x3fb23cd8,0x3f5bfb06,2 +np.float32,0x3fe114e2,0x3f951402,2 +np.float32,0x7ca8ef04,0x42ab11b4,2 +np.float32,0x7d93c2ad,0x42ad92ec,2 +np.float32,0x3fe5bb8a,0x3f983ee6,2 +np.float32,0x7f0182fd,0x42b1781b,2 +np.float32,0x7da63bb2,0x42adcf3d,2 +np.float32,0x3fac46b7,0x3f4f399e,2 +np.float32,0x7f7a5d8f,0x42b2c997,2 +np.float32,0x7f76572e,0x42b2c14b,2 +np.float32,0x7f42d53e,0x42b24931,2 +np.float32,0x7f7ffd00,0x42b2d4f6,2 +np.float32,0x3fc346c3,0x3f7c2756,2 +np.float32,0x7f1f6ae3,0x42b1e27a,2 +np.float32,0x3f87fb56,0x3eb3e2ee,2 +np.float32,0x3fed17a2,0x3f9d12b4,2 +np.float32,0x7f5ea903,0x42b28d8c,2 +np.float32,0x3f967f82,0x3f15a4ab,2 +np.float32,0x7d3b540c,0x42aca984,2 +np.float32,0x7f56711a,0x42b27a4a,2 +np.float32,0x7f122223,0x42b1b5ee,2 +np.float32,0x3fd6fa34,0x3f8dd919,2 +np.float32,0x3fadd62e,0x3f52a7b3,2 +np.float32,0x3fb7bf0c,0x3f67015f,2 +np.float32,0x7edf4ba7,0x42b12c1d,2 +np.float32,0x7e33cc65,0x42af5a4b,2 +np.float32,0x3fa6be17,0x3f427831,2 +np.float32,0x3fa07aa8,0x3f32b7d4,2 +np.float32,0x3fa4a3af,0x3f3d5a01,2 +np.float32,0x3fdbb267,0x3f9149a8,2 +np.float32,0x7ed45e25,0x42b1126c,2 +np.float32,0x3fe3f432,0x3f970ba6,2 +np.float32,0x7f752080,0x42b2bec3,2 +np.float32,0x3f872747,0x3eaa62ea,2 +np.float32,0x7e52175d,0x42afaa03,2 +np.float32,0x3fdc766c,0x3f91d5ce,2 +np.float32,0x7ecd6841,0x42b1015c,2 +np.float32,0x7f3d6c40,0x42b23ac6,2 +np.float32,0x3fb80c14,0x3f6796b9,2 +np.float32,0x3ff6ad56,0x3fa30d68,2 +np.float32,0x3fda44c3,0x3f90423e,2 +np.float32,0x3fdcba0c,0x3f9205fc,2 +np.float32,0x7e14a720,0x42aef8e6,2 +np.float32,0x3fe9e489,0x3f9b0047,2 +np.float32,0x7e69f933,0x42afe123,2 +np.float32,0x3ff3ee6d,0x3fa15f71,2 +np.float32,0x3f8538cd,0x3e91c1a7,2 +np.float32,0x3fdc3f07,0x3f91ae46,2 +np.float32,0x3fba2ef0,0x3f6bada2,2 +np.float32,0x7da64cd8,0x42adcf71,2 +np.float32,0x3fc34bd2,0x3f7c301d,2 +np.float32,0x3fa273aa,0x3f37d984,2 +np.float32,0x3ff0338c,0x3f9f0c86,2 +np.float32,0x7ed62cef,0x42b116c3,2 +np.float32,0x3f911e7e,0x3f02f7c6,2 +np.float32,0x7c8514c9,0x42aa9792,2 +np.float32,0x3fea2a74,0x3f9b2df5,2 +np.float32,0x3fe036f8,0x3f947a25,2 +np.float32,0x7c5654bf,0x42aa28ad,2 +np.float32,0x3fd9e423,0x3f8ffc32,2 +np.float32,0x7eec0439,0x42b1487b,2 +np.float32,0x3fc580f4,0x3f7ffb62,2 +np.float32,0x3fb0e316,0x3f592bbe,2 +np.float32,0x7c4cfb7d,0x42aa11d8,2 +np.float32,0x3faf9704,0x3f566e00,2 +np.float32,0x3fa7cf8a,0x3f45023d,2 +np.float32,0x7f7b724d,0x42b2cbcc,2 +np.float32,0x7f05bfe3,0x42b18897,2 +np.float32,0x3f90bde3,0x3f018bf3,2 +np.float32,0x7c565479,0x42aa28ad,2 +np.float32,0x3f94b517,0x3f0fb8e5,2 +np.float32,0x3fd6aadd,0x3f8d9e3c,2 +np.float32,0x7f09b37c,0x42b1977f,2 +np.float32,0x7f2b45ea,0x42b20734,2 +np.float32,0x3ff1d15e,0x3fa00fe9,2 +np.float32,0x3f99bce6,0x3f1fbd6c,2 +np.float32,0x7ecd1f76,0x42b100a7,2 +np.float32,0x7f443e2b,0x42b24ce2,2 +np.float32,0x7da7d6a5,0x42add428,2 +np.float32,0x7ebe0193,0x42b0d975,2 +np.float32,0x7ee13c43,0x42b1308b,2 +np.float32,0x3f8adf1b,0x3ed18e0c,2 +np.float32,0x7f76ce65,0x42b2c242,2 +np.float32,0x7e34f43d,0x42af5d92,2 +np.float32,0x7f306b76,0x42b2165d,2 +np.float32,0x7e1fd07f,0x42af1df7,2 +np.float32,0x3fab9a41,0x3f4db909,2 +np.float32,0x3fc23d1a,0x3f7a5803,2 +np.float32,0x3f8b7403,0x3ed70245,2 +np.float32,0x3f8c4dd6,0x3edebbae,2 +np.float32,0x3fe5f411,0x3f9864cd,2 +np.float32,0x3f88128b,0x3eb4e508,2 +np.float32,0x3fcb09de,0x3f84976f,2 +np.float32,0x7f32f2f5,0x42b21da6,2 +np.float32,0x3fe75610,0x3f9950f6,2 +np.float32,0x3f993edf,0x3f1e408d,2 +np.float32,0x3fc4a9d7,0x3f7e8be9,2 +np.float32,0x7f74551a,0x42b2bd1a,2 +np.float32,0x7de87129,0x42ae7ae2,2 +np.float32,0x7f18bbbd,0x42b1cc8c,2 +np.float32,0x7e7e1dd4,0x42b00b6c,2 +np.float32,0x3ff6e55b,0x3fa32f64,2 +np.float32,0x3fa634c8,0x3f412df3,2 +np.float32,0x3fd0fb7c,0x3f894e49,2 +np.float32,0x3ff4f6a6,0x3fa201d7,2 +np.float32,0x7f69d418,0x42b2a69a,2 +np.float32,0x7cb9632d,0x42ab414a,2 +np.float32,0x3fc57d36,0x3f7ff503,2 +np.float32,0x7e9e2ed7,0x42b07b9b,2 +np.float32,0x7f2e6868,0x42b2107d,2 +np.float32,0x3fa3169a,0x3f39785d,2 +np.float32,0x7f03cde0,0x42b18117,2 +np.float32,0x7f6d75d2,0x42b2ae7f,2 +np.float32,0x3ff483f2,0x3fa1bb75,2 +np.float32,0x7f1b39f7,0x42b1d4d6,2 +np.float32,0x3f8c7a7d,0x3ee0481e,2 +np.float32,0x3f989095,0x3f1c2b19,2 +np.float32,0x3fa4cbfd,0x3f3dbd87,2 +np.float32,0x7f75b00f,0x42b2bfef,2 +np.float32,0x3f940724,0x3f0d6756,2 +np.float32,0x7f5e5a1a,0x42b28cd6,2 +np.float32,0x800000,0xffc00000,2 +np.float32,0x7edd1d29,0x42b12716,2 +np.float32,0x3fa3e9e4,0x3f3b8c16,2 +np.float32,0x7e46d70e,0x42af8dd5,2 +np.float32,0x3f824745,0x3e40ec1e,2 +np.float32,0x3fd67623,0x3f8d770a,2 +np.float32,0x3fe9a6f3,0x3f9ad7fa,2 +np.float32,0x3fdda67c,0x3f92adc1,2 +np.float32,0x7ccb6c9a,0x42ab70d4,2 +np.float32,0x3ffd364a,0x3fa6f2fe,2 +np.float32,0x7e02424c,0x42aeb545,2 +np.float32,0x3fb6d2f2,0x3f6534a1,2 +np.float32,0x3fe1fe26,0x3f95b4cc,2 +np.float32,0x7e93ac57,0x42b05867,2 +np.float32,0x7f7b3433,0x42b2cb4d,2 +np.float32,0x3fb76803,0x3f66580d,2 +np.float32,0x3f9af881,0x3f23661b,2 +np.float32,0x3fd58062,0x3f8cbf98,2 +np.float32,0x80000000,0xffc00000,2 +np.float32,0x7f1af8f4,0x42b1d3ff,2 +np.float32,0x3fe66bba,0x3f98b4dc,2 +np.float32,0x7f6bd7bf,0x42b2aaff,2 +np.float32,0x3f84f79a,0x3e8e2e49,2 +np.float32,0x7e475b06,0x42af8f28,2 +np.float32,0x3faff89b,0x3f573d5e,2 +np.float32,0x7de5aa77,0x42ae74bb,2 +np.float32,0x3f8e9e42,0x3ef26cd2,2 +np.float32,0x3fb1cec3,0x3f5b1740,2 +np.float32,0x3f8890d6,0x3eba4821,2 +np.float32,0x3f9b39e9,0x3f242547,2 +np.float32,0x3fc895a4,0x3f829407,2 +np.float32,0x7f77943c,0x42b2c3dc,2 +np.float32,0x7f390d58,0x42b22ed2,2 +np.float32,0x3fe7e160,0x3f99ad58,2 +np.float32,0x3f93d2a0,0x3f0cb205,2 +np.float32,0x7f29499b,0x42b2013c,2 +np.float32,0x3f8c11b2,0x3edca10f,2 +np.float32,0x7e898ef8,0x42b03413,2 +np.float32,0x3fdff942,0x3f944f34,2 +np.float32,0x7f3d602f,0x42b23aa5,2 +np.float32,0x3f8a50f3,0x3ecc345b,2 +np.float32,0x3fa1f86d,0x3f369ce4,2 +np.float32,0x3f97ad95,0x3f19681d,2 +np.float32,0x3ffad1e0,0x3fa589e5,2 +np.float32,0x3fa70590,0x3f432311,2 +np.float32,0x7e6840cb,0x42afdd5c,2 +np.float32,0x3fd4036d,0x3f8ba0aa,2 +np.float32,0x7f7cc953,0x42b2ce84,2 +np.float32,0x7f228e1e,0x42b1ec74,2 +np.float32,0x7e37a866,0x42af652a,2 +np.float32,0x3fda22d0,0x3f9029a7,2 +np.float32,0x7f736bff,0x42b2bb31,2 +np.float32,0x3f9833b6,0x3f1b0b8e,2 +np.float32,0x7f466001,0x42b2526a,2 +np.float32,0xff7fffff,0xffc00000,2 +np.float32,0x7dd62bcd,0x42ae50f8,2 +np.float32,0x7f1d2bfe,0x42b1db36,2 +np.float32,0x7ecffe9e,0x42b107c5,2 +np.float32,0x7ebefe0a,0x42b0dc1b,2 +np.float32,0x7f45c63d,0x42b250dd,2 +np.float32,0x7f601af0,0x42b290db,2 +np.float32,0x3fcbb88a,0x3f8524e5,2 +np.float32,0x7ede55ff,0x42b129e8,2 +np.float32,0x7ea5dd5a,0x42b093e2,2 +np.float32,0x3ff53857,0x3fa22a12,2 +np.float32,0x3f8dbd6a,0x3eeb28a4,2 +np.float32,0x3fd1b467,0x3f89dd2c,2 +np.float32,0x3fe0423f,0x3f9481fc,2 +np.float32,0x3f84b421,0x3e8a6174,2 +np.float32,0x7f4efc97,0x42b2682c,2 +np.float32,0x7f601b33,0x42b290dc,2 +np.float32,0x3f94f240,0x3f108719,2 +np.float32,0x7decd251,0x42ae8471,2 +np.float32,0x3fdc457c,0x3f91b2e2,2 +np.float32,0x3f92a966,0x3f089c5a,2 +np.float32,0x3fc9732f,0x3f834afc,2 +np.float32,0x3f97948f,0x3f19194e,2 +np.float32,0x7f0824a1,0x42b191ac,2 +np.float32,0x7f0365a5,0x42b17f81,2 +np.float32,0x3f800000,0x0,2 +np.float32,0x7f0054c6,0x42b1736b,2 +np.float32,0x3fe86544,0x3f9a0484,2 +np.float32,0x7e95f844,0x42b0604e,2 +np.float32,0x3fce8602,0x3f8761e2,2 +np.float32,0x3fc726c8,0x3f81621d,2 +np.float32,0x3fcf6b03,0x3f88161b,2 +np.float32,0x3fceb843,0x3f87898a,2 +np.float32,0x3fe2f8b2,0x3f966071,2 +np.float32,0x7f3c8e7f,0x42b2386d,2 +np.float32,0x3fcee13a,0x3f87a9d2,2 +np.float32,0x3fc4df27,0x3f7ee73c,2 +np.float32,0x3ffde486,0x3fa758e3,2 +np.float32,0x3fa91be0,0x3f480b17,2 +np.float32,0x7f2a5a7d,0x42b20472,2 +np.float32,0x7e278d80,0x42af362d,2 +np.float32,0x3f96d091,0x3f16a9d5,2 +np.float32,0x7e925225,0x42b053b2,2 +np.float32,0x7f7ef83a,0x42b2d2ec,2 +np.float32,0x7eb4923a,0x42b0bf61,2 +np.float32,0x7e98bf19,0x42b069b3,2 +np.float32,0x3fac93a2,0x3f4fe410,2 +np.float32,0x7f46389c,0x42b25205,2 +np.float32,0x3f9fd447,0x3f30fd54,2 +np.float32,0x3fef42d4,0x3f9e7483,2 +np.float32,0x7f482174,0x42b256ed,2 +np.float32,0x3f97aedb,0x3f196c1e,2 +np.float32,0x7f764edd,0x42b2c13a,2 +np.float32,0x3f9117b5,0x3f02de5c,2 +np.float32,0x3fc7984e,0x3f81c12d,2 +np.float64,0x3ff1e2cb7463c597,0x3fdec6caf39e0c0e,2 +np.float64,0x3ffe4f89789c9f13,0x3ff40f4b1da0f3e9,2 +np.float64,0x7f6a5c9ac034b935,0x408605e51703c145,2 +np.float64,0x7fdcb6ece3b96dd9,0x40862d6521e16d60,2 +np.float64,0x3ff6563e182cac7c,0x3feb9d8210f3fa88,2 +np.float64,0x7fde32025f3c6404,0x40862dcc1d1a9b7f,2 +np.float64,0x7fd755ed35aeabd9,0x40862bbc5522b779,2 +np.float64,0x3ff5c81f4bcb903e,0x3fea71f10b954ea3,2 +np.float64,0x3fffe805d35fd00c,0x3ff50463a1ba2938,2 +np.float64,0x7fd045a1c1a08b43,0x408628d9f431f2f5,2 +np.float64,0x3ff49f7dd9893efc,0x3fe7c6736e17ea8e,2 +np.float64,0x7fccfbc1fd39f783,0x408627eca79acf51,2 +np.float64,0x3ff1af0a00035e14,0x3fdd1c0e7d5706ea,2 +np.float64,0x7fe7bd17162f7a2d,0x4086316af683502b,2 +np.float64,0x3ff0941b8d012837,0x3fd128d274065ac0,2 +np.float64,0x3ffa0c5d98b418bb,0x3ff11af9c8edd17f,2 +np.float64,0x3ffad9733355b2e6,0x3ff1b6d1307acb42,2 +np.float64,0x3ffabb2a33d57654,0x3ff1a0442b034e50,2 +np.float64,0x3ff36118b0c6c231,0x3fe472b7dfb23516,2 +np.float64,0x3ff2441d3664883a,0x3fe0d61145608f0c,2 +np.float64,0x7fe039862d20730b,0x40862e5f8ed752d3,2 +np.float64,0x7fb1dde24023bbc4,0x40861e824cdb0664,2 +np.float64,0x7face6335839cc66,0x40861ccf90a26e16,2 +np.float64,0x3ffb5d0e1af6ba1c,0x3ff2170f6f42fafe,2 +np.float64,0x3ff5c2c6a50b858d,0x3fea665aabf04407,2 +np.float64,0x3ffabb409db57681,0x3ff1a054ea32bfc3,2 +np.float64,0x3ff1e054e983c0aa,0x3fdeb30c17286cb6,2 +np.float64,0x7fe467f73268cfed,0x4086303529e52e9b,2 +np.float64,0x7fe0e86bf961d0d7,0x40862eb40788b04a,2 +np.float64,0x3ffb743542f6e86a,0x3ff227b4ea5acee0,2 +np.float64,0x3ff2de6826e5bcd0,0x3fe2e31fcde0a96c,2 +np.float64,0x7fd6b27ccfad64f9,0x40862b8385697c31,2 +np.float64,0x7fe0918e8d21231c,0x40862e8a82d9517a,2 +np.float64,0x7fd0ca0395a19406,0x4086291a0696ed33,2 +np.float64,0x3ffb042496960849,0x3ff1d658c928abfc,2 +np.float64,0x3ffcd0409799a081,0x3ff31877df0cb245,2 +np.float64,0x7fe429bd06685379,0x4086301c9f259934,2 +np.float64,0x3ff933076092660f,0x3ff06d2e5f4d9ab7,2 +np.float64,0x7feaefcb28f5df95,0x4086326dccf88e6f,2 +np.float64,0x7fb5f2c1f82be583,0x40862027ac02a39d,2 +np.float64,0x3ffb5d9e3bd6bb3c,0x3ff21777501d097e,2 +np.float64,0x10000000000000,0xfff8000000000000,2 +np.float64,0x3ff70361596e06c3,0x3fecf675ceda7e19,2 +np.float64,0x3ff71a21b5ee3444,0x3fed224fa048d9a9,2 +np.float64,0x3ffb102b86762057,0x3ff1df2cc9390518,2 +np.float64,0x7feaaeb35c355d66,0x4086325a60704a90,2 +np.float64,0x7fd9a3d0a93347a0,0x40862c7d300fc076,2 +np.float64,0x7fabcf159c379e2a,0x40861c80cdbbff27,2 +np.float64,0x7fd1c066ec2380cd,0x4086298c3006fee6,2 +np.float64,0x3ff3d5ae2d67ab5c,0x3fe5bc16447428db,2 +np.float64,0x3ff4b76add696ed6,0x3fe800f5bbf21376,2 +np.float64,0x3ff60d89ee0c1b14,0x3feb063fdebe1a68,2 +np.float64,0x7f1d2648003a4c8f,0x4085eaf9238af95a,2 +np.float64,0x7fe8b45f6df168be,0x408631bca5abf6d6,2 +np.float64,0x7fe9ea5308f3d4a5,0x4086321ea2bd3af9,2 +np.float64,0x7fcb6ba5a636d74a,0x4086277b208075ed,2 +np.float64,0x3ff621cfd74c43a0,0x3feb30d59baf5919,2 +np.float64,0x3ff7bc8ca0af7919,0x3fee524da8032896,2 +np.float64,0x7fda22dd0c3445b9,0x40862ca47326d063,2 +np.float64,0x7fd02ed4b2a05da8,0x408628ceb6919421,2 +np.float64,0x3ffe64309fdcc861,0x3ff41c1b18940709,2 +np.float64,0x3ffee4042abdc808,0x3ff46a6005bccb41,2 +np.float64,0x3ff078145b00f029,0x3fceeb3d6bfae0eb,2 +np.float64,0x7fda20fd20b441f9,0x40862ca3e03b990b,2 +np.float64,0x3ffa9e9e9af53d3d,0x3ff18ade3cbee789,2 +np.float64,0x3ff0a1062501420c,0x3fd1e32de6d18c0d,2 +np.float64,0x3ff3bdf118477be2,0x3fe57ad89b7fdf8b,2 +np.float64,0x3ff101c0d5c20382,0x3fd6965d3539be47,2 +np.float64,0x7feba3b53b774769,0x408632a28c7aca4d,2 +np.float64,0x3ff598db5d4b31b7,0x3fea0aa65c0b421a,2 +np.float64,0x3ff5fdfbb72bfbf8,0x3feae55accde4a5e,2 +np.float64,0x7fe5bae53aab75c9,0x408630b5e7a5b92a,2 +np.float64,0x3ff8f668afd1ecd2,0x3ff03af686666c9c,2 +np.float64,0x3ff5ba72dd2b74e6,0x3fea5441f223c093,2 +np.float64,0x3ff8498147109302,0x3fef4e45d501601d,2 +np.float64,0x7feddcfa5efbb9f4,0x4086334106a6e76b,2 +np.float64,0x7fd1a30200234603,0x4086297ee5cc562c,2 +np.float64,0x3ffffa8ee07ff51e,0x3ff50f1dc46f1303,2 +np.float64,0x7fef7ed00ebefd9f,0x408633ae01dabe52,2 +np.float64,0x3ffb6e062276dc0c,0x3ff22344c58c2016,2 +np.float64,0x7fcf2b59943e56b2,0x4086288190dd5eeb,2 +np.float64,0x3ffa589f9254b13f,0x3ff155cc081eee0b,2 +np.float64,0x3ff05415ca60a82c,0x3fc9e45565baef0a,2 +np.float64,0x7feb34bed576697d,0x408632822d5a178c,2 +np.float64,0x3ff3993845c73270,0x3fe51423baf246c3,2 +np.float64,0x3ff88367aaf106d0,0x3fefb2d9ca9f1192,2 +np.float64,0x7fef364304fe6c85,0x4086339b7ed82997,2 +np.float64,0x7fcba2c317374585,0x4086278b24e42934,2 +np.float64,0x3ff1aef885e35df1,0x3fdd1b79f55b20c0,2 +np.float64,0x7fe19367886326ce,0x40862f035f867445,2 +np.float64,0x3ff3c8295e279053,0x3fe5970aa670d32e,2 +np.float64,0x3ff6edda164ddbb4,0x3feccca9eb59d6b9,2 +np.float64,0x7fdeaea940bd5d52,0x40862dece02d151b,2 +np.float64,0x7fea9d6324353ac5,0x408632552ddf0d4f,2 +np.float64,0x7fe60e39e66c1c73,0x408630d45b1ad0c4,2 +np.float64,0x7fde06325abc0c64,0x40862dc07910038c,2 +np.float64,0x7f9ec89d303d9139,0x408617c55ea4c576,2 +np.float64,0x3ff9801930530032,0x3ff0abe5be046051,2 +np.float64,0x3ff4d5859689ab0b,0x3fe849a7f7a19fa3,2 +np.float64,0x3ff38afbc48715f8,0x3fe4ebb7710cbab9,2 +np.float64,0x3ffd88a0e77b1142,0x3ff3916964407e21,2 +np.float64,0x1,0xfff8000000000000,2 +np.float64,0x3ff5db59e58bb6b4,0x3fea9b6b5ccc116f,2 +np.float64,0x3ffd4b05b15a960c,0x3ff369792f661a90,2 +np.float64,0x7fdcebc4fb39d789,0x40862d73cd623378,2 +np.float64,0x3ff5b56f944b6adf,0x3fea4955d6b06ca3,2 +np.float64,0x7fd4e4abf2a9c957,0x40862ad9e9da3c61,2 +np.float64,0x7fe08e0d6aa11c1a,0x40862e88d17ef277,2 +np.float64,0x3ff0dfc97da1bf93,0x3fd50f9004136d8f,2 +np.float64,0x7fdec38eaebd871c,0x40862df2511e26b4,2 +np.float64,0x7ff8000000000000,0x7ff8000000000000,2 +np.float64,0x3ff21865504430cb,0x3fe033fe3cf3947a,2 +np.float64,0x7fdc139708b8272d,0x40862d371cfbad03,2 +np.float64,0x7fe1fe3be3a3fc77,0x40862f336e3ba63a,2 +np.float64,0x7fd9fa2493b3f448,0x40862c97f2960be9,2 +np.float64,0x3ff0a027db414050,0x3fd1d6e54a707c87,2 +np.float64,0x3ff568b16f4ad163,0x3fe99f5c6d7b6e18,2 +np.float64,0x3ffe2f82877c5f05,0x3ff3fb54bd0da753,2 +np.float64,0x7fbaf5778435eaee,0x408621ccc9e2c1be,2 +np.float64,0x7fc5aaf8362b55ef,0x40862598e7072a49,2 +np.float64,0x7fe0ebfdd4a1d7fb,0x40862eb5b7bf99d5,2 +np.float64,0x7fd8efeb5931dfd6,0x40862c444636f408,2 +np.float64,0x3ff361a308c6c346,0x3fe4744cae63e6df,2 +np.float64,0x7fef287d39be50f9,0x40863397f65c807e,2 +np.float64,0x7fe72c4a14ae5893,0x4086313992e52082,2 +np.float64,0x3ffd1be44cba37c8,0x3ff34a9a45239eb9,2 +np.float64,0x3ff50369c18a06d4,0x3fe8b69319f091f1,2 +np.float64,0x3ffb333c25766678,0x3ff1f8c78eeb28f1,2 +np.float64,0x7fe12050416240a0,0x40862ece4e2f2f24,2 +np.float64,0x7fe348f5526691ea,0x40862fc16fbe7b6c,2 +np.float64,0x3ff343cc4d068799,0x3fe41c2a30cab7d2,2 +np.float64,0x7fd1b0daaa2361b4,0x408629852b3104ff,2 +np.float64,0x3ff6a41f37ad483e,0x3fec3b36ee6c6d4a,2 +np.float64,0x3ffad9439435b287,0x3ff1b6add9a1b3d7,2 +np.float64,0x7fbeb9a2f23d7345,0x408622d89ac1eaba,2 +np.float64,0x3ffab3d39fb567a7,0x3ff19ac75b4427f3,2 +np.float64,0x3ff890003ed12000,0x3fefc8844471c6ad,2 +np.float64,0x3ffc9f595e593eb2,0x3ff2f7a8699f06d8,2 +np.float64,0x7fe2224ef6e4449d,0x40862f43684a154a,2 +np.float64,0x3ffa67ba08d4cf74,0x3ff161525778df99,2 +np.float64,0x7fe87e24b570fc48,0x408631ab02b159fb,2 +np.float64,0x7fd6e99be92dd337,0x40862b96dba73685,2 +np.float64,0x7fe90f39fdf21e73,0x408631d9dbd36c1e,2 +np.float64,0x3ffb7806abd6f00e,0x3ff22a719b0f4c46,2 +np.float64,0x3ffa511ba3d4a238,0x3ff1500c124f6e17,2 +np.float64,0x3ff5d7a569abaf4b,0x3fea937391c280e8,2 +np.float64,0x7fc4279d20284f39,0x40862504a5cdcb96,2 +np.float64,0x3ffe8791b1fd0f24,0x3ff431f1ed7eaba0,2 +np.float64,0x7fe3b2f5276765e9,0x40862fecf15e2535,2 +np.float64,0x7feeab0e7abd561c,0x408633778044cfbc,2 +np.float64,0x7fdba88531375109,0x40862d1860306d7a,2 +np.float64,0x7fe7b19b3def6335,0x4086316716d6890b,2 +np.float64,0x3ff9e9437413d287,0x3ff0ff89431c748c,2 +np.float64,0x3ff960716a52c0e3,0x3ff092498028f802,2 +np.float64,0x3ff271bf56a4e37f,0x3fe1786fc8dd775d,2 +np.float64,0x3fff2a6578be54cb,0x3ff494bbe303eeb5,2 +np.float64,0x3ffd842eb5fb085e,0x3ff38e8b7ba42bc5,2 +np.float64,0x3ff91600e5d22c02,0x3ff0553c6a6b3d93,2 +np.float64,0x3ff9153f45f22a7e,0x3ff0549c0eaecf95,2 +np.float64,0x7fe0ab319da15662,0x40862e96da3b19f9,2 +np.float64,0x3ff06acd1f60d59a,0x3fcd2aca543d2772,2 +np.float64,0x3ffb3e7a54d67cf4,0x3ff200f288cd391b,2 +np.float64,0x3ffd01356f1a026b,0x3ff339003462a56c,2 +np.float64,0x3ffacd35def59a6c,0x3ff1adb8d32b3ec0,2 +np.float64,0x3ff6f953264df2a6,0x3fece2f992948d6e,2 +np.float64,0x3ff0fa91f5a1f524,0x3fd64609a28f1590,2 +np.float64,0x7fd1b7610ca36ec1,0x408629881e03dc7d,2 +np.float64,0x3ff4317fb7c86300,0x3fe6b086ed265887,2 +np.float64,0x3ff3856198070ac3,0x3fe4dbb6bc88b9e3,2 +np.float64,0x7fed7fc4573aff88,0x40863327e7013a81,2 +np.float64,0x3ffe53cbbf5ca798,0x3ff411f07a29b1f4,2 +np.float64,0x3ff092195b012433,0x3fd10b1c0b4b14fe,2 +np.float64,0x3ff1a3171163462e,0x3fdcb5c301d5d40d,2 +np.float64,0x3ffa1401f1742804,0x3ff120eb319e9faa,2 +np.float64,0x7fd352f6f426a5ed,0x40862a3a048feb6d,2 +np.float64,0x7fd4ee246fa9dc48,0x40862add895d808f,2 +np.float64,0x3ff0675cfa00ceba,0x3fccb2222c5493ca,2 +np.float64,0x3ffe5cb38f3cb967,0x3ff417773483d161,2 +np.float64,0x7fe11469ea2228d3,0x40862ec8bd3e497f,2 +np.float64,0x3fff13cba67e2798,0x3ff4872fe2c26104,2 +np.float64,0x3ffb73d3d316e7a8,0x3ff2276f08612ea2,2 +np.float64,0x7febfb70f237f6e1,0x408632bbc9450721,2 +np.float64,0x3ff84a0d87b0941b,0x3fef4f3b707e3145,2 +np.float64,0x7fd71fd5082e3fa9,0x40862ba9b4091172,2 +np.float64,0x3ff560737d8ac0e7,0x3fe98cc9c9ba2f61,2 +np.float64,0x3ff46a266ae8d44d,0x3fe74190e5234822,2 +np.float64,0x7fe8cc9225719923,0x408631c477db9708,2 +np.float64,0x3ff871de5930e3bc,0x3fef948f7d00fbef,2 +np.float64,0x3ffd0bc7895a178f,0x3ff33ffc18357721,2 +np.float64,0x3ff66099f9ccc134,0x3febb2bc775b4720,2 +np.float64,0x7fe91f1be9723e37,0x408631deec3a5c9e,2 +np.float64,0x7fd60462f12c08c5,0x40862b4537e1c1c6,2 +np.float64,0x3ff053100ba0a620,0x3fc9bc0c21e2284f,2 +np.float64,0x7fd864c611b0c98b,0x40862c1724506255,2 +np.float64,0x7fd191decb2323bd,0x408629771bfb68cc,2 +np.float64,0x3ff792a1656f2543,0x3fee054f2e135fcf,2 +np.float64,0x7fd03625cea06c4b,0x408628d253b840e3,2 +np.float64,0x7fc3967716272ced,0x408624ca35451042,2 +np.float64,0x7fe6636cb32cc6d8,0x408630f3073a22a7,2 +np.float64,0x3ffc2d3976585a73,0x3ff2a9d4c0dae607,2 +np.float64,0x3fffd10ee79fa21e,0x3ff4f70db69888be,2 +np.float64,0x3ff1d4fcae23a9f9,0x3fde57675007b23c,2 +np.float64,0x3ffa5da19e14bb43,0x3ff1599f74d1c113,2 +np.float64,0x3ff7f4eb0d6fe9d6,0x3feeb85189659e99,2 +np.float64,0x7fbcca44d8399489,0x408622536234f7c1,2 +np.float64,0x7fef5f97ec3ebf2f,0x408633a60fdde0d7,2 +np.float64,0x7fde4a66da3c94cd,0x40862dd290ebc184,2 +np.float64,0x3ff072957a40e52b,0x3fce34d913d87613,2 +np.float64,0x3ff2bc4c9dc57899,0x3fe27497e6ebe27d,2 +np.float64,0x7fd7d152b4afa2a4,0x40862be63469eecd,2 +np.float64,0x3ff957d768f2afaf,0x3ff08b4ad8062a73,2 +np.float64,0x7fe4bc5f45a978be,0x40863055fd66e4eb,2 +np.float64,0x7fc90de345321bc6,0x408626c24ce7e370,2 +np.float64,0x3ff2d7a37d85af47,0x3fe2cd6a40b544a0,2 +np.float64,0x7fe536ea1f6a6dd3,0x40863084bade76a3,2 +np.float64,0x3fff970c9cdf2e19,0x3ff4d524572356dd,2 +np.float64,0x3ffe173ae63c2e76,0x3ff3ec1ee35ad28c,2 +np.float64,0x3ff714025cce2805,0x3fed168aedff4a2b,2 +np.float64,0x7fce7b414c3cf682,0x40862853dcdd19d4,2 +np.float64,0x3ff019623f2032c4,0x3fbc7c602df0bbaf,2 +np.float64,0x3ff72f57fd0e5eb0,0x3fed4ae75f697432,2 +np.float64,0x3ff283778e8506ef,0x3fe1b5c5725b0dfd,2 +np.float64,0x3ff685a29aed0b45,0x3febfdfdedd581e2,2 +np.float64,0x3ff942d24fb285a4,0x3ff07a224c3ecfaf,2 +np.float64,0x3ff2e4a9f465c954,0x3fe2f71905399e8f,2 +np.float64,0x7fdfa1c7fa3f438f,0x40862e2b4e06f098,2 +np.float64,0x3ff49b59c26936b4,0x3fe7bc41c8c1e59d,2 +np.float64,0x3ff2102d3704205a,0x3fe014bf7e28924e,2 +np.float64,0x3ff88de3b8311bc8,0x3fefc4e3e0a15a89,2 +np.float64,0x7fea5ba25374b744,0x40863241519c9b66,2 +np.float64,0x3fffe5df637fcbbf,0x3ff5032488f570f9,2 +np.float64,0x7fe67cfefe6cf9fd,0x408630fc25333cb4,2 +np.float64,0x3ff090bf2b01217e,0x3fd0f6fcf1092b4a,2 +np.float64,0x7fecd75bc5f9aeb7,0x408632f9b6c2e013,2 +np.float64,0x7fe15df38c62bbe6,0x40862eeae5ac944b,2 +np.float64,0x3ff4757875a8eaf1,0x3fe75e0eafbe28ce,2 +np.float64,0x7fecca8a51b99514,0x408632f627c23923,2 +np.float64,0x3ff91ca529d2394a,0x3ff05abb327fd1ca,2 +np.float64,0x3ffb962993b72c53,0x3ff23ff831717579,2 +np.float64,0x3ffd548a2c7aa914,0x3ff36fac7f56d716,2 +np.float64,0x7fbafb5cb035f6b8,0x408621ce898a02fb,2 +np.float64,0x3ff1d86daca3b0db,0x3fde73536c29218c,2 +np.float64,0x7fa8d0f8f431a1f1,0x40861b97a03c3a18,2 +np.float64,0x3ff44f1067489e21,0x3fe6fcbd8144ab2a,2 +np.float64,0x7fec062b07380c55,0x408632bed9c6ce85,2 +np.float64,0x3ff7e11e0fcfc23c,0x3fee94ada7efaac4,2 +np.float64,0x7fe77505c1aeea0b,0x4086315287dda0ba,2 +np.float64,0x7fc465af2728cb5d,0x4086251d236107f7,2 +np.float64,0x3ffe811c4a7d0238,0x3ff42df7e8b6cf2d,2 +np.float64,0x7fe05a471260b48d,0x40862e6fa502738b,2 +np.float64,0x7fec32cd9778659a,0x408632cb8d98c5a3,2 +np.float64,0x7fd203a220a40743,0x408629aa43b010c0,2 +np.float64,0x7fed71f7d17ae3ef,0x4086332428207101,2 +np.float64,0x3ff3918999e72313,0x3fe4fe5e8991402f,2 +np.float64,0x3ff3ecae38c7d95c,0x3fe5fa787d887981,2 +np.float64,0x7fd65345b82ca68a,0x40862b61aed8c64e,2 +np.float64,0x3ff1efdd01c3dfba,0x3fdf2eae36139204,2 +np.float64,0x3ffba9344f375268,0x3ff24d7fdcfc313b,2 +np.float64,0x7fd0469b35208d35,0x408628da6ed24bdd,2 +np.float64,0x7fe525782daa4aef,0x4086307e240c8b30,2 +np.float64,0x3ff8e473d371c8e8,0x3ff02beebd4171c7,2 +np.float64,0x3ff59a43898b3487,0x3fea0dc0a6acea0a,2 +np.float64,0x7fef50c7263ea18d,0x408633a247d7cd42,2 +np.float64,0x7fe8b5a301f16b45,0x408631bd0e71c855,2 +np.float64,0x3ff209369de4126d,0x3fdff4264334446b,2 +np.float64,0x3ffbe2ff4437c5fe,0x3ff2763b356814c7,2 +np.float64,0x3ff55938156ab270,0x3fe97c70514f91bf,2 +np.float64,0x3fff5d8bf81ebb18,0x3ff4b333b230672a,2 +np.float64,0x3ff16a317bc2d463,0x3fdab84e7faa468f,2 +np.float64,0x3ff7e64f8dafcc9f,0x3fee9e0bd57e9566,2 +np.float64,0x7fef4dc065be9b80,0x408633a181e25abb,2 +np.float64,0x3ff64a24a62c9449,0x3feb849ced76437e,2 +np.float64,0x7fc3cb85ef27970b,0x408624dfc39c8f74,2 +np.float64,0x7fec2162a77842c4,0x408632c69b0d43b6,2 +np.float64,0x7feccee6dc399dcd,0x408632f75de98c46,2 +np.float64,0x7faff4f5f43fe9eb,0x40861d9d89be14c9,2 +np.float64,0x7fee82df60fd05be,0x4086336cfdeb7317,2 +np.float64,0x3ffe54588d9ca8b1,0x3ff41247eb2f75ca,2 +np.float64,0x3ffe5615b55cac2c,0x3ff4135c4eb11620,2 +np.float64,0x3ffdaf9a6a1b5f35,0x3ff3aa70e50d1692,2 +np.float64,0x3ff69c045f4d3809,0x3fec2b00734e2cde,2 +np.float64,0x7fd049239aa09246,0x408628dbad6dd995,2 +np.float64,0x3ff2acbe8465597d,0x3fe24138652195e1,2 +np.float64,0x3ffb288302365106,0x3ff1f0f86ca7e5d1,2 +np.float64,0x3fff6fe8d87edfd2,0x3ff4be136acf53c5,2 +np.float64,0x3ffc87c8bfb90f92,0x3ff2e7bbd65867cb,2 +np.float64,0x3ff173327ca2e665,0x3fdb0b945abb00d7,2 +np.float64,0x3ff9a5cf7a134b9f,0x3ff0ca2450f07c78,2 +np.float64,0x7faf782b043ef055,0x40861d7e0e9b35ef,2 +np.float64,0x3ffa0874975410e9,0x3ff117ee3dc8f5ba,2 +np.float64,0x7fc710fc7f2e21f8,0x40862618fed167fb,2 +np.float64,0x7feb73f4c876e7e9,0x40863294ae3ac1eb,2 +np.float64,0x8000000000000000,0xfff8000000000000,2 +np.float64,0x7fb46615c028cc2b,0x40861f91bade4dad,2 +np.float64,0x7fc26b064624d60c,0x4086244c1b76c938,2 +np.float64,0x3ff06ab9fa40d574,0x3fcd282fd971d1b4,2 +np.float64,0x3ff61da7410c3b4e,0x3feb28201031af02,2 +np.float64,0x3ffec7ba1b9d8f74,0x3ff459342511f952,2 +np.float64,0x7ff4000000000000,0x7ffc000000000000,2 +np.float64,0x7fe5d570422baae0,0x408630bfa75008c9,2 +np.float64,0x3ffa895832f512b0,0x3ff17ad41555dccb,2 +np.float64,0x7fd343ac21a68757,0x40862a33ad59947a,2 +np.float64,0x3ffc1eeb37383dd6,0x3ff29ff29e55a006,2 +np.float64,0x7fee3c5c507c78b8,0x4086335a6b768090,2 +np.float64,0x7fe96d774a32daee,0x408631f7b9937e36,2 +np.float64,0x7fb878362430f06b,0x40862106603497b6,2 +np.float64,0x7fec0a79c03814f3,0x408632c01479905e,2 +np.float64,0x3ffa2f143c145e28,0x3ff135e25d902e1a,2 +np.float64,0x3ff14ccff80299a0,0x3fd9a0cd3397b14c,2 +np.float64,0x3ff97980dcb2f302,0x3ff0a6942a8133ab,2 +np.float64,0x3ff872e2d1f0e5c6,0x3fef96526eb2f756,2 +np.float64,0x7fdf1c9b46be3936,0x40862e0957fee329,2 +np.float64,0x7fcab6525d356ca4,0x408627458791f029,2 +np.float64,0x3ff964e74a52c9ce,0x3ff095e8845d523c,2 +np.float64,0x3ffb3aa23c967544,0x3ff1fe282d897c13,2 +np.float64,0x7fdd8a36afbb146c,0x40862d9f2b05f61b,2 +np.float64,0x3ffea39f42fd473e,0x3ff4432a48176399,2 +np.float64,0x7fea614f68b4c29e,0x408632430a750385,2 +np.float64,0x7feeafb86abd5f70,0x40863378b79f70cf,2 +np.float64,0x3ff80bc94eb01792,0x3feee138e9d626bd,2 +np.float64,0x7fcaca74743594e8,0x4086274b8ce4d1e1,2 +np.float64,0x3ff8b14815316290,0x3ff000b3526c8321,2 +np.float64,0x7fc698eb5f2d31d6,0x408625eeec86cd2b,2 +np.float64,0x7fe15429a3e2a852,0x40862ee6621205b8,2 +np.float64,0x7fee37f81b7c6fef,0x4086335941ed80dd,2 +np.float64,0x3ff8097ab3f012f6,0x3feedd1bafc3196e,2 +np.float64,0x7fe7c889ceaf9113,0x4086316ed13f2394,2 +np.float64,0x7fceca94513d9528,0x4086286893a06824,2 +np.float64,0x3ff593a103cb2742,0x3fe9ff1af4f63cc9,2 +np.float64,0x7fee237d24bc46f9,0x40863353d4142c87,2 +np.float64,0x3ffbf71e4777ee3c,0x3ff2844c0ed9f4d9,2 +np.float64,0x3ff490c65c09218d,0x3fe7a2216d9f69fd,2 +np.float64,0x3fff5ceaf1feb9d6,0x3ff4b2d430a90110,2 +np.float64,0x3ff55baecceab75e,0x3fe98203980666c4,2 +np.float64,0x3ff511bc306a2378,0x3fe8d81ce7be7b50,2 +np.float64,0x3ff38f83dcc71f08,0x3fe4f89f130d5f87,2 +np.float64,0x3ff73a3676ee746d,0x3fed5f98a65107ee,2 +np.float64,0x7fc27e50c824fca1,0x408624547828bc49,2 +np.float64,0xfff0000000000000,0xfff8000000000000,2 +np.float64,0x3fff38959ebe712b,0x3ff49d362c7ba16a,2 +np.float64,0x3ffad6d23a75ada4,0x3ff1b4dda6394ed0,2 +np.float64,0x3ffe77c6c2dcef8e,0x3ff4283698835ecb,2 +np.float64,0x3fff5feb413ebfd6,0x3ff4b49bcbdb3aa9,2 +np.float64,0x3ff0d30aa161a615,0x3fd4751bcdd7d727,2 +np.float64,0x3ff51e07e00a3c10,0x3fe8f4bd1408d694,2 +np.float64,0x8010000000000000,0xfff8000000000000,2 +np.float64,0x7fd231d2fe2463a5,0x408629beaceafcba,2 +np.float64,0x3fff6b4aee1ed696,0x3ff4bb58544bf8eb,2 +np.float64,0x3ff91fcd2f323f9a,0x3ff05d56e33db6b3,2 +np.float64,0x3ff3b889ab477113,0x3fe56bdeab74cce5,2 +np.float64,0x3ff99bfe30d337fc,0x3ff0c24bbf265561,2 +np.float64,0x3ffbe9e5eaf7d3cc,0x3ff27b0fe60f827a,2 +np.float64,0x7fd65678e92cacf1,0x40862b62d44fe8b6,2 +np.float64,0x7fd9cc477233988e,0x40862c89c638ee48,2 +np.float64,0x3ffc123c72d82479,0x3ff297294d05cbc0,2 +np.float64,0x3ff58abad58b1576,0x3fe9eb65da2a867a,2 +np.float64,0x7fe534887b2a6910,0x40863083d4ec2877,2 +np.float64,0x7fe1d3dcb123a7b8,0x40862f208116c55e,2 +np.float64,0x7fd4d570dba9aae1,0x40862ad412c413cd,2 +np.float64,0x3fffce7d3fdf9cfa,0x3ff4f58f02451928,2 +np.float64,0x3ffa76901c74ed20,0x3ff16c9a5851539c,2 +np.float64,0x7fdd88ffa23b11fe,0x40862d9ed6c6f426,2 +np.float64,0x3ff09fdbb9e13fb7,0x3fd1d2ae4fcbf713,2 +np.float64,0x7fe64567772c8ace,0x408630e845dbc290,2 +np.float64,0x7fb1a849ba235092,0x40861e6a291535b2,2 +np.float64,0x3ffaddb105f5bb62,0x3ff1b9f68f4c419b,2 +np.float64,0x7fd2fc3d5025f87a,0x40862a15cbc1df75,2 +np.float64,0x7fdea7d872bd4fb0,0x40862deb190b2c50,2 +np.float64,0x7fd50ea97eaa1d52,0x40862ae9edc4c812,2 +np.float64,0x3fff659c245ecb38,0x3ff4b7fb18b31aea,2 +np.float64,0x3ff3f1fbb7c7e3f7,0x3fe608bd9d76268c,2 +np.float64,0x3ff76869d9aed0d4,0x3fedb6c23d3a317b,2 +np.float64,0x7fedd4efe93ba9df,0x4086333edeecaa43,2 +np.float64,0x3ff9a5bd4eb34b7a,0x3ff0ca15d02bc960,2 +np.float64,0x3ffd9359cc5b26b4,0x3ff39850cb1a6b6c,2 +np.float64,0x7fe912d0427225a0,0x408631db00e46272,2 +np.float64,0x3ffb3802fe567006,0x3ff1fc4093646465,2 +np.float64,0x3ff02cc38a205987,0x3fc2e8182802a07b,2 +np.float64,0x3ffda953dd1b52a8,0x3ff3a66c504cf207,2 +np.float64,0x7fe0a487e4a1490f,0x40862e93a6f20152,2 +np.float64,0x7fed265ed1fa4cbd,0x4086330f838ae431,2 +np.float64,0x7fd0000114200001,0x408628b76ec48b5c,2 +np.float64,0x3ff2c262786584c5,0x3fe288860d354b0f,2 +np.float64,0x8000000000000001,0xfff8000000000000,2 +np.float64,0x3ffdae9f075b5d3e,0x3ff3a9d006ae55c1,2 +np.float64,0x3ffb69c72156d38e,0x3ff22037cbb85e5b,2 +np.float64,0x7feeae255f7d5c4a,0x408633784e89bc05,2 +np.float64,0x7feb13927c362724,0x408632786630c55d,2 +np.float64,0x7fef49e072be93c0,0x408633a08451d476,2 +np.float64,0x3fff23d6337e47ac,0x3ff490ceb6e634ae,2 +np.float64,0x3ffba82cf8f7505a,0x3ff24cc51c73234d,2 +np.float64,0x7fe948719ef290e2,0x408631ec0b36476e,2 +np.float64,0x3ff41926c5e8324e,0x3fe670e14bbda8cd,2 +np.float64,0x3ff91f09c1523e14,0x3ff05cb5731878da,2 +np.float64,0x3ff6ae6afccd5cd6,0x3fec4fbeca764086,2 +np.float64,0x3ff927f7e0f24ff0,0x3ff06413eeb8eb1e,2 +np.float64,0x3ff19dd2b9e33ba5,0x3fdc882f97994600,2 +np.float64,0x7fe8e502c5b1ca05,0x408631cc56526fff,2 +np.float64,0x7feb49f70fb693ed,0x4086328868486fcd,2 +np.float64,0x3ffd942d535b285a,0x3ff398d8d89f52ca,2 +np.float64,0x7fc3b9c5c627738b,0x408624d893e692ca,2 +np.float64,0x7fea0780ff340f01,0x408632279fa46704,2 +np.float64,0x7fe4c90066a99200,0x4086305adb47a598,2 +np.float64,0x7fdb209113364121,0x40862cf0ab64fd7d,2 +np.float64,0x3ff38617e5470c30,0x3fe4ddc0413b524f,2 +np.float64,0x7fea1b5b803436b6,0x4086322db767f091,2 +np.float64,0x7fe2004898e40090,0x40862f3457795dc5,2 +np.float64,0x3ff3c4360ac7886c,0x3fe58c29843a4c75,2 +np.float64,0x3ff504bc168a0978,0x3fe8b9ada7f698e6,2 +np.float64,0x3ffd3e936fda7d27,0x3ff3615912c5b4ac,2 +np.float64,0x3ffbdc52fb97b8a6,0x3ff2718dae5f1f2b,2 +np.float64,0x3fffef6d84ffdedb,0x3ff508adbc8556cf,2 +np.float64,0x3ff23b65272476ca,0x3fe0b646ed2579eb,2 +np.float64,0x7fe4633068a8c660,0x408630334a4b7ff7,2 +np.float64,0x3ff769b754aed36f,0x3fedb932af0223f9,2 +np.float64,0x7fe7482d92ee905a,0x408631432de1b057,2 +np.float64,0x3ff5dd682aabbad0,0x3fea9fd5e506a86d,2 +np.float64,0x7fd68399a2ad0732,0x40862b72ed89805d,2 +np.float64,0x3ffad7acc3d5af5a,0x3ff1b57fe632c948,2 +np.float64,0x3ffc68e43698d1c8,0x3ff2d2be6f758761,2 +np.float64,0x3ff4e517fbc9ca30,0x3fe86eddf5e63a58,2 +np.float64,0x3ff34c63c56698c8,0x3fe435b74ccd6a13,2 +np.float64,0x7fea9456c17528ad,0x4086325275237015,2 +np.float64,0x7fee6573f2fccae7,0x4086336543760346,2 +np.float64,0x7fd5496fb9aa92de,0x40862b0023235667,2 +np.float64,0x7ff0000000000000,0x7ff0000000000000,2 +np.float64,0x3ffb70e31256e1c6,0x3ff22552f54b13e0,2 +np.float64,0x3ff66a33988cd467,0x3febc656da46a1ca,2 +np.float64,0x3fff0af2eb1e15e6,0x3ff481dec325f5c8,2 +np.float64,0x3ff6a0233d0d4046,0x3fec33400958eda1,2 +np.float64,0x7fdb11e2d5b623c5,0x40862cec55e405f9,2 +np.float64,0x3ffb8a015ad71402,0x3ff2374d7b563a72,2 +np.float64,0x3ff1807d8ce300fb,0x3fdb849e4bce8335,2 +np.float64,0x3ffefd535e3dfaa6,0x3ff479aaac6ffe79,2 +np.float64,0x3ff701e23a6e03c4,0x3fecf39072d96fc7,2 +np.float64,0x3ff4ac809f895901,0x3fe7e6598f2335a5,2 +np.float64,0x3ff0309f26a0613e,0x3fc3b3f4b2783690,2 +np.float64,0x3ff241dd0ce483ba,0x3fe0cde2cb639144,2 +np.float64,0x3ffabce63fb579cc,0x3ff1a18fe2a2da59,2 +np.float64,0x3ffd84b967db0973,0x3ff38ee4f240645d,2 +np.float64,0x7fc3f88b9a27f116,0x408624f1e10cdf3f,2 +np.float64,0x7fe1d5fd5923abfa,0x40862f2175714a3a,2 +np.float64,0x7fe487b145690f62,0x4086304190700183,2 +np.float64,0x7fe7997feaef32ff,0x4086315eeefdddd2,2 +np.float64,0x3ff8f853b671f0a8,0x3ff03c907353a8da,2 +np.float64,0x7fca4c23b5349846,0x408627257ace5778,2 +np.float64,0x7fe0c9bf3a21937d,0x40862ea576c3ea43,2 +np.float64,0x7fc442b389288566,0x4086250f5f126ec9,2 +np.float64,0x7fc6d382ed2da705,0x40862603900431b0,2 +np.float64,0x7fe40b069068160c,0x4086301066468124,2 +np.float64,0x3ff7f62a146fec54,0x3feeba8dfc4363fe,2 +np.float64,0x3ff721e8e94e43d2,0x3fed313a6755d34f,2 +np.float64,0x7fe579feaf2af3fc,0x4086309ddefb6112,2 +np.float64,0x3ffe2c6bde5c58d8,0x3ff3f9665dc9a16e,2 +np.float64,0x7fcf9998ed3f3331,0x4086289dab274788,2 +np.float64,0x7fdb03af2236075d,0x40862ce82252e490,2 +np.float64,0x7fe72799392e4f31,0x40863137f428ee71,2 +np.float64,0x7f9f2190603e4320,0x408617dc5b3b3c3c,2 +np.float64,0x3ff69c56d52d38ae,0x3fec2ba59fe938b2,2 +np.float64,0x7fdcde27bf39bc4e,0x40862d70086cd06d,2 +np.float64,0x3ff654d6b8eca9ae,0x3feb9aa0107609a6,2 +np.float64,0x7fdf69d967bed3b2,0x40862e1d1c2b94c2,2 +np.float64,0xffefffffffffffff,0xfff8000000000000,2 +np.float64,0x7fedfd073f3bfa0d,0x40863349980c2c8b,2 +np.float64,0x7f7c1856803830ac,0x40860bf312b458c7,2 +np.float64,0x7fe9553f1bb2aa7d,0x408631f0173eadd5,2 +np.float64,0x3ff6e92efc2dd25e,0x3fecc38f98e7e1a7,2 +np.float64,0x7fe9719ac532e335,0x408631f906cd79c3,2 +np.float64,0x3ff60e56ae4c1cad,0x3feb07ef8637ec7e,2 +np.float64,0x3ff0d0803501a100,0x3fd455c0af195a9c,2 +np.float64,0x7fe75248a3eea490,0x40863146a614aec1,2 +np.float64,0x7fdff61ead3fec3c,0x40862e408643d7aa,2 +np.float64,0x7fed4ac7a4fa958e,0x408633197b5cf6ea,2 +np.float64,0x7fe58d44562b1a88,0x408630a5098d1bbc,2 +np.float64,0x7fd89dcdb1b13b9a,0x40862c29c2979288,2 +np.float64,0x3ff205deda240bbe,0x3fdfda67c84fd3a8,2 +np.float64,0x7fdf84c15abf0982,0x40862e23f361923d,2 +np.float64,0x3ffe012b3afc0256,0x3ff3de3dfa5f47ce,2 +np.float64,0x3ffe2f3512dc5e6a,0x3ff3fb245206398e,2 +np.float64,0x7fed6174c2bac2e9,0x4086331faa699617,2 +np.float64,0x3ff1f30f8783e61f,0x3fdf47e06f2c40d1,2 +np.float64,0x3ff590da9eab21b5,0x3fe9f8f7b4baf3c2,2 +np.float64,0x3ffb3ca1eb967944,0x3ff1ff9baf66d704,2 +np.float64,0x7fe50ba9a5aa1752,0x408630745ab7fd3c,2 +np.float64,0x3ff43743a4a86e87,0x3fe6bf7ae80b1dda,2 +np.float64,0x3ff47e1a24e8fc34,0x3fe773acca44c7d6,2 +np.float64,0x3ff589ede9eb13dc,0x3fe9e99f28fab3a4,2 +np.float64,0x3ff72f2cbf8e5e5a,0x3fed4a94e7edbf24,2 +np.float64,0x3ffa4f9bbc549f38,0x3ff14ee60aea45d3,2 +np.float64,0x3ff975dae732ebb6,0x3ff0a3a1fbd7284a,2 +np.float64,0x7fbcf14ee039e29d,0x4086225e33f3793e,2 +np.float64,0x3ff10e027f621c05,0x3fd71cce2452b4e0,2 +np.float64,0x3ff33ea193067d43,0x3fe40cbac4daaddc,2 +np.float64,0x7fbef8f2263df1e3,0x408622e905c8e1b4,2 +np.float64,0x3fff7f5bfe3efeb8,0x3ff4c732e83df253,2 +np.float64,0x3ff5700a6b4ae015,0x3fe9afdd7b8b82b0,2 +np.float64,0x3ffd5099da5aa134,0x3ff36d1bf26e55bf,2 +np.float64,0x3ffed8e0f89db1c2,0x3ff4639ff065107a,2 +np.float64,0x3fff9d0c463f3a18,0x3ff4d8a9f297cf52,2 +np.float64,0x3ff23db5b2e47b6b,0x3fe0bebdd48f961a,2 +np.float64,0x3ff042bff1e08580,0x3fc713bf24cc60ef,2 +np.float64,0x7feb4fe97a769fd2,0x4086328a26675646,2 +np.float64,0x3ffeafbfeedd5f80,0x3ff44a955a553b1c,2 +np.float64,0x3ff83fb524507f6a,0x3fef3d1729ae0976,2 +np.float64,0x3ff1992294433245,0x3fdc5f5ce53dd197,2 +np.float64,0x7fe89fe629b13fcb,0x408631b601a83867,2 +np.float64,0x7fe53e4d74aa7c9a,0x40863087839b52f1,2 +np.float64,0x3ff113713e6226e2,0x3fd757631ca7cd09,2 +np.float64,0x7fd4a0b7a629416e,0x40862abfba27a09b,2 +np.float64,0x3ff184c6e2a3098e,0x3fdbab2e3966ae57,2 +np.float64,0x3ffafbbf77f5f77f,0x3ff1d02bb331d9f9,2 +np.float64,0x3ffc6099a358c134,0x3ff2cd16941613d1,2 +np.float64,0x3ffb7c441ef6f888,0x3ff22d7b12e31432,2 +np.float64,0x3ff625ba5eec4b75,0x3feb39060e55fb79,2 +np.float64,0x7fde879acbbd0f35,0x40862de2aab4d72d,2 +np.float64,0x7f930aed982615da,0x408613edb6df8528,2 +np.float64,0x7fa4b82dac29705a,0x40861a261c0a9aae,2 +np.float64,0x7fced5c16b3dab82,0x4086286b7a73e611,2 +np.float64,0x7fe133749d2266e8,0x40862ed73a41b112,2 +np.float64,0x3ff2d8146ea5b029,0x3fe2ced55dbf997d,2 +np.float64,0x3ff60dac77ac1b59,0x3feb0688b0e54c7b,2 +np.float64,0x3ff275d9b024ebb3,0x3fe186b87258b834,2 +np.float64,0x3ff533e6500a67cd,0x3fe92746c8b50ddd,2 +np.float64,0x7fe370896666e112,0x40862fd1ca144736,2 +np.float64,0x7fee7695357ced29,0x40863369c459420e,2 +np.float64,0x7fd1e0528023c0a4,0x4086299a85caffd0,2 +np.float64,0x7fd05c7b24a0b8f5,0x408628e52824386f,2 +np.float64,0x3ff11dcc3b023b98,0x3fd7c56c8cef1be1,2 +np.float64,0x7fc9d9fae933b3f5,0x408627027404bc5f,2 +np.float64,0x7fe2359981246b32,0x40862f4be675e90d,2 +np.float64,0x3ffb10a949962152,0x3ff1df88f83b8cde,2 +np.float64,0x3ffa65b53654cb6a,0x3ff15fc8956ccc87,2 +np.float64,0x3ff0000000000000,0x0,2 +np.float64,0x7fad97ef703b2fde,0x40861d002f3d02da,2 +np.float64,0x3ff57aaf93aaf55f,0x3fe9c7b01f194edb,2 +np.float64,0x7fe9ecd73f33d9ad,0x4086321f69917205,2 +np.float64,0x3ff0dcb79c61b96f,0x3fd4eac86a7a9c38,2 +np.float64,0x7fee9c12ffbd3825,0x4086337396cd706d,2 +np.float64,0x3ff52c40af4a5881,0x3fe915a8a7de8f00,2 +np.float64,0x3ffbcfff59779ffe,0x3ff268e523fe8dda,2 +np.float64,0x7fe014cb4b602996,0x40862e4d5de42a03,2 +np.float64,0x7fae2370e83c46e1,0x40861d258dd5b3ee,2 +np.float64,0x7fe9e33602f3c66b,0x4086321c704ac2bb,2 +np.float64,0x3ff648acd74c915a,0x3feb8195ca53bcaa,2 +np.float64,0x7fe385f507670be9,0x40862fda95ebaf44,2 +np.float64,0x3ffb0e382c361c70,0x3ff1ddbea963e0a7,2 +np.float64,0x3ff47d6b6ae8fad7,0x3fe771f80ad37cd2,2 +np.float64,0x3ffca7d538f94faa,0x3ff2fd5f62e851ac,2 +np.float64,0x3ff83e949c107d29,0x3fef3b1c5bbac99b,2 +np.float64,0x7fc6fb933a2df725,0x408626118e51a286,2 +np.float64,0x7fe43a1454e87428,0x4086302318512d9b,2 +np.float64,0x7fe51fe32aaa3fc5,0x4086307c07271348,2 +np.float64,0x3ff35e563966bcac,0x3fe46aa2856ef85f,2 +np.float64,0x3ff84dd4e4909baa,0x3fef55d86d1d5c2e,2 +np.float64,0x7febe3d84077c7b0,0x408632b507686f03,2 +np.float64,0x3ff6aca2e32d5946,0x3fec4c32a2368ee3,2 +np.float64,0x7fe7070e3e6e0e1b,0x4086312caddb0454,2 +np.float64,0x7fd3657f2aa6cafd,0x40862a41acf47e70,2 +np.float64,0x3ff61534456c2a68,0x3feb1663900af13b,2 +np.float64,0x3ff8bc556eb178ab,0x3ff00a16b5403f88,2 +np.float64,0x3ffa7782e3f4ef06,0x3ff16d529c94a438,2 +np.float64,0x7fc15785ed22af0b,0x408623d0cd94fb86,2 +np.float64,0x3ff2e3eeb6e5c7dd,0x3fe2f4c4876d3edf,2 +np.float64,0x3ff2e4e17e85c9c3,0x3fe2f7c9e437b22e,2 +np.float64,0x7feb3aaf67f6755e,0x40863283ec4a0d76,2 +np.float64,0x7fe89efcf7313df9,0x408631b5b5e41263,2 +np.float64,0x3ffcc6fad4f98df6,0x3ff31245778dff6d,2 +np.float64,0x3ff356114466ac22,0x3fe45253d040a024,2 +np.float64,0x3ff81c70d2d038e2,0x3feefed71ebac776,2 +np.float64,0x7fdb75c96136eb92,0x40862d09a603f03e,2 +np.float64,0x3ff340f91b8681f2,0x3fe413bb6e6d4a54,2 +np.float64,0x3fff906079df20c1,0x3ff4d13869d16bc7,2 +np.float64,0x3ff226a42d644d48,0x3fe0698d316f1ac0,2 +np.float64,0x3ff948abc3b29158,0x3ff07eeb0b3c81ba,2 +np.float64,0x3ffc25df1fb84bbe,0x3ff2a4c13ad4edad,2 +np.float64,0x7fe07ea3b960fd46,0x40862e815b4cf43d,2 +np.float64,0x3ff497d3dae92fa8,0x3fe7b3917bf10311,2 +np.float64,0x7fea561db1f4ac3a,0x4086323fa4aef2a9,2 +np.float64,0x7fd1b49051236920,0x40862986d8759ce5,2 +np.float64,0x7f7ba3bd6037477a,0x40860bd19997fd90,2 +np.float64,0x3ff01126dd00224e,0x3fb76b67938dfb11,2 +np.float64,0x3ff29e1105053c22,0x3fe2102a4c5fa102,2 +np.float64,0x3ff9de2a6553bc55,0x3ff0f6cfe4dea30e,2 +np.float64,0x7fc558e7d42ab1cf,0x4086257a608fc055,2 +np.float64,0x3ff79830a74f3061,0x3fee0f93db153d65,2 +np.float64,0x7fe2661648e4cc2c,0x40862f6117a71eb2,2 +np.float64,0x3ff140cf4262819e,0x3fd92aefedae1ab4,2 +np.float64,0x3ff5f36251abe6c5,0x3feaced481ceaee3,2 +np.float64,0x7fc80911d5301223,0x4086266d4757f768,2 +np.float64,0x3ff9079a6c320f35,0x3ff04949d21ebe1e,2 +np.float64,0x3ffde8d2e09bd1a6,0x3ff3cedca8a5db5d,2 +np.float64,0x3ffadd1de375ba3c,0x3ff1b989790e8d93,2 +np.float64,0x3ffdbc40ee1b7882,0x3ff3b286b1c7da57,2 +np.float64,0x3ff8ff514771fea2,0x3ff04264add00971,2 +np.float64,0x7fefd7d0e63fafa1,0x408633c47d9f7ae4,2 +np.float64,0x3ffc47798c588ef3,0x3ff2bbe441fa783a,2 +np.float64,0x7fe6ebc55b6dd78a,0x408631232d9abf31,2 +np.float64,0xbff0000000000000,0xfff8000000000000,2 +np.float64,0x7fd378e4afa6f1c8,0x40862a49a8f98cb4,2 +np.float64,0x0,0xfff8000000000000,2 +np.float64,0x3ffe88ed7efd11db,0x3ff432c7ecb95492,2 +np.float64,0x3ff4f5509289eaa1,0x3fe8955a11656323,2 +np.float64,0x7fda255b41344ab6,0x40862ca53676a23e,2 +np.float64,0x3ffebe85b9bd7d0c,0x3ff453992cd55dea,2 +np.float64,0x3ff5d6180b8bac30,0x3fea901c2160c3bc,2 +np.float64,0x3ffcdfb8fcf9bf72,0x3ff322c83b3bc735,2 +np.float64,0x3ff3c91c26679238,0x3fe599a652b7cf59,2 +np.float64,0x7fc389f7a62713ee,0x408624c518edef93,2 +np.float64,0x3ffe1245ba1c248c,0x3ff3e901b2c4a47a,2 +np.float64,0x7fe1e76e95e3cedc,0x40862f29446f9eff,2 +np.float64,0x3ff02ae4f92055ca,0x3fc28221abd63daa,2 +np.float64,0x7fbf648a143ec913,0x40862304a0619d03,2 +np.float64,0x3ff2be7ef8657cfe,0x3fe27bcc6c97522e,2 +np.float64,0x3ffa7595e514eb2c,0x3ff16bdc64249ad1,2 +np.float64,0x3ff4ee130049dc26,0x3fe884354cbad8c9,2 +np.float64,0x3ff19211fc232424,0x3fdc2160bf3eae40,2 +np.float64,0x3ffec215aedd842c,0x3ff455c4cdd50c32,2 +np.float64,0x7fe7cb50ffaf96a1,0x4086316fc06a53af,2 +np.float64,0x3fffa679161f4cf2,0x3ff4de30ba7ac5b8,2 +np.float64,0x7fdcb459763968b2,0x40862d646a21011d,2 +np.float64,0x3ff9f338d6d3e672,0x3ff1075835d8f64e,2 +np.float64,0x3ff8de3319d1bc66,0x3ff026ae858c0458,2 +np.float64,0x7fee0199d33c0333,0x4086334ad03ac683,2 +np.float64,0x3ffc06076c380c0f,0x3ff28eaec3814faa,2 +np.float64,0x3ffe9e2e235d3c5c,0x3ff43fd4d2191a7f,2 +np.float64,0x3ffd93b06adb2761,0x3ff398888239cde8,2 +np.float64,0x7fefe4b71cffc96d,0x408633c7ba971b92,2 +np.float64,0x7fb2940352252806,0x40861ed244bcfed6,2 +np.float64,0x3ffba4647e3748c9,0x3ff24a15f02e11b9,2 +np.float64,0x7fd2d9543725b2a7,0x40862a0708446596,2 +np.float64,0x7fc04997f120932f,0x4086235055d35251,2 +np.float64,0x3ff6d14313ada286,0x3fec94b177f5d3fc,2 +np.float64,0x3ff279fc8684f3f9,0x3fe19511c3e5b9a8,2 +np.float64,0x3ff42f4609085e8c,0x3fe6aabe526ce2bc,2 +np.float64,0x7fc1c6c62a238d8b,0x408624037de7f6ec,2 +np.float64,0x7fe31ff4b8e63fe8,0x40862fb05b40fd16,2 +np.float64,0x7fd2a8825fa55104,0x408629f234d460d6,2 +np.float64,0x3ffe8c1d725d183b,0x3ff434bdc444143f,2 +np.float64,0x3ff0e9dc3e21d3b8,0x3fd58676e2c13fc9,2 +np.float64,0x3ffed03172fda063,0x3ff45e59f7aa6c8b,2 +np.float64,0x7fd74621962e8c42,0x40862bb6e90d66f8,2 +np.float64,0x3ff1faa29663f545,0x3fdf833a2c5efde1,2 +np.float64,0x7fda02834db40506,0x40862c9a860d6747,2 +np.float64,0x7f709b2fc021365f,0x408607be328eb3eb,2 +np.float64,0x7fec0d58aa381ab0,0x408632c0e61a1af6,2 +np.float64,0x3ff524d1720a49a3,0x3fe90479968d40fd,2 +np.float64,0x7fd64cb3b32c9966,0x40862b5f53c4b0b4,2 +np.float64,0x3ff9593e3ed2b27c,0x3ff08c6eea5f6e8b,2 +np.float64,0x3ff7de8b1f6fbd16,0x3fee9007abcfdf7b,2 +np.float64,0x7fe8d816d6b1b02d,0x408631c82e38a894,2 +np.float64,0x7fd726bbe22e4d77,0x40862bac16ee8d52,2 +np.float64,0x7fa70b07d42e160f,0x40861affcc4265e2,2 +np.float64,0x7fe18b4091e31680,0x40862effa8bce66f,2 +np.float64,0x3ff830253010604a,0x3fef21b2eaa75758,2 +np.float64,0x3fffcade407f95bc,0x3ff4f3734b24c419,2 +np.float64,0x3ff8c17cecb182fa,0x3ff00e75152d7bda,2 +np.float64,0x7fdad9b9d035b373,0x40862cdbabb793ba,2 +np.float64,0x3ff9f9e154f3f3c2,0x3ff10c8dfdbd2510,2 +np.float64,0x3ff465e162e8cbc3,0x3fe736c751c75b73,2 +np.float64,0x3ff9b4cd8493699b,0x3ff0d616235544b8,2 +np.float64,0x7fe557c4a56aaf88,0x4086309114ed12d9,2 +np.float64,0x7fe5999133eb3321,0x408630a9991a9b54,2 +np.float64,0x7fe7c9009e2f9200,0x4086316ef9359a47,2 +np.float64,0x3ff8545cabd0a8ba,0x3fef6141f1030c36,2 +np.float64,0x3ffa1f1712943e2e,0x3ff129849d492ce3,2 +np.float64,0x7fea803a14750073,0x4086324c652c276c,2 +np.float64,0x3ff5b6f97fcb6df3,0x3fea4cb0b97b18e9,2 +np.float64,0x7fc2efdfc425dfbf,0x40862485036a5c6e,2 +np.float64,0x7fe2c78e5be58f1c,0x40862f8b0a5e7baf,2 +np.float64,0x7fe80d7fff301aff,0x40863185e234060a,2 +np.float64,0x3ffd895d457b12ba,0x3ff391e2cac7a3f8,2 +np.float64,0x3ff44c9764a8992f,0x3fe6f6690396c232,2 +np.float64,0x3ff731688b8e62d1,0x3fed4ed70fac3839,2 +np.float64,0x3ff060200460c040,0x3fcbad4a07d97f0e,2 +np.float64,0x3ffbd2f70a17a5ee,0x3ff26afb46ade929,2 +np.float64,0x7febe9e841f7d3d0,0x408632b6c465ddd9,2 +np.float64,0x3ff2532f8be4a65f,0x3fe10c6cd8d64cf4,2 +np.float64,0x7fefffffffffffff,0x408633ce8fb9f87e,2 +np.float64,0x3ff3a1ae3a47435c,0x3fe52c00210cc459,2 +np.float64,0x7fe9c34ae6b38695,0x408632128d150149,2 +np.float64,0x3fff311029fe6220,0x3ff498b852f30bff,2 +np.float64,0x3ffd4485a1ba890c,0x3ff3653b6fa701cd,2 +np.float64,0x7fd52718b1aa4e30,0x40862af330d9c68c,2 +np.float64,0x3ff10b695a4216d3,0x3fd7009294e367b7,2 +np.float64,0x3ffdf73de59bee7c,0x3ff3d7fa96d2c1ae,2 +np.float64,0x3ff2f1c75965e38f,0x3fe320aaff3db882,2 +np.float64,0x3ff2a56a5a854ad5,0x3fe228cc4ad7e7a5,2 +np.float64,0x7fe60cd1cf6c19a3,0x408630d3d87a04b3,2 +np.float64,0x3ff89fa65c113f4c,0x3fefe3543773180c,2 +np.float64,0x3ffd253130ba4a62,0x3ff350b76ba692a0,2 +np.float64,0x7feaad7051f55ae0,0x40863259ff932d62,2 +np.float64,0x7fd9cc37cf33986f,0x40862c89c15f963b,2 +np.float64,0x3ff8c08de771811c,0x3ff00daa9c17acd7,2 +np.float64,0x7fea58b25d34b164,0x408632406d54cc6f,2 +np.float64,0x7fe5f161fd2be2c3,0x408630c9ddf272a5,2 +np.float64,0x3ff5840dbf8b081c,0x3fe9dc9117b4cbc7,2 +np.float64,0x3ff3fd762307faec,0x3fe6277cd530c640,2 +np.float64,0x3ff9095c98b212b9,0x3ff04abff170ac24,2 +np.float64,0x7feaac66017558cb,0x40863259afb4f8ce,2 +np.float64,0x7fd78f96bcaf1f2c,0x40862bd00175fdf9,2 +np.float64,0x3ffaca27e0959450,0x3ff1ab72b8f8633e,2 +np.float64,0x3ffb7f18cb96fe32,0x3ff22f81bcb8907b,2 +np.float64,0x3ffcce48d1199c92,0x3ff317276f62c0b2,2 +np.float64,0x3ffcb9a7f3797350,0x3ff30958e0d6a34d,2 +np.float64,0x7fda569ef6b4ad3d,0x40862cb43b33275a,2 +np.float64,0x7fde9f0893bd3e10,0x40862de8cc036283,2 +np.float64,0x3ff428be3928517c,0x3fe699bb5ab58904,2 +np.float64,0x7fa4d3344029a668,0x40861a3084989291,2 +np.float64,0x3ff03607bd006c0f,0x3fc4c4840cf35f48,2 +np.float64,0x3ff2b1335c056267,0x3fe25000846b75a2,2 +np.float64,0x7fe0cb8bd8e19717,0x40862ea65237d496,2 +np.float64,0x3fff4b1b7b9e9637,0x3ff4a83fb08e7b24,2 +np.float64,0x7fe7526140aea4c2,0x40863146ae86069c,2 +np.float64,0x7fbfcfb7c23f9f6f,0x4086231fc246ede5,2 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arcsin.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arcsin.csv new file mode 100644 index 0000000..cb94c93 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arcsin.csv @@ -0,0 +1,1429 @@ +dtype,input,output,ulperrortol +np.float32,0xbe7d3a7c,0xbe7fe217,4 +np.float32,0x3dc102f0,0x3dc14c60,4 +np.float32,0xbe119c28,0xbe121aef,4 +np.float32,0xbe51cd68,0xbe534c75,4 +np.float32,0x3c04a300,0x3c04a35f,4 +np.float32,0xbf4f0b62,0xbf712a69,4 +np.float32,0x3ef61a5c,0x3f005cf6,4 +np.float32,0xbf13024c,0xbf1c97df,4 +np.float32,0x3e93b580,0x3e95d6b5,4 +np.float32,0x3e44e7b8,0x3e4623a5,4 +np.float32,0xbe35df20,0xbe36d773,4 +np.float32,0x3eecd2c0,0x3ef633cf,4 +np.float32,0x3f2772ba,0x3f36862a,4 +np.float32,0x3e211ea8,0x3e21cac5,4 +np.float32,0x3e3b3d90,0x3e3c4cc6,4 +np.float32,0x3f37c962,0x3f4d018c,4 +np.float32,0x3e92ad88,0x3e94c31a,4 +np.float32,0x3f356ffc,0x3f49a766,4 +np.float32,0x3f487ba2,0x3f665254,4 +np.float32,0x3f061c46,0x3f0d27ae,4 +np.float32,0xbee340a2,0xbeeb7722,4 +np.float32,0xbe85aede,0xbe874026,4 +np.float32,0x3f34cf9a,0x3f48c474,4 +np.float32,0x3e29a690,0x3e2a6fbd,4 +np.float32,0xbeb29428,0xbeb669d1,4 +np.float32,0xbe606d40,0xbe624370,4 +np.float32,0x3dae6860,0x3dae9e85,4 +np.float32,0xbf04872b,0xbf0b4d25,4 +np.float32,0x3f2080e2,0x3f2d7ab0,4 +np.float32,0xbec77dcc,0xbecceb27,4 +np.float32,0x3e0dda10,0x3e0e4f38,4 +np.float32,0xbefaf970,0xbf03262c,4 +np.float32,0x3f576a0c,0x3f7ffee6,4 +np.float32,0x3f222382,0x3f2f95d6,4 +np.float32,0x7fc00000,0x7fc00000,4 +np.float32,0x3e41c468,0x3e42f14e,4 +np.float32,0xbf2f64dd,0xbf4139a8,4 +np.float32,0xbf60ef90,0xbf895956,4 +np.float32,0xbf67c855,0xbf90eff0,4 +np.float32,0xbed35aee,0xbed9df00,4 +np.float32,0xbf2c7d92,0xbf3d448f,4 +np.float32,0x3f7b1604,0x3faff122,4 +np.float32,0xbf7c758b,0xbfb3bf87,4 +np.float32,0x3ecda1c8,0x3ed39acf,4 +np.float32,0x3f3af8ae,0x3f519fcb,4 +np.float32,0xbf16e6a3,0xbf2160fd,4 +np.float32,0x3f0c97d2,0x3f14d668,4 +np.float32,0x3f0a8060,0x3f1257b9,4 +np.float32,0x3f27905a,0x3f36ad57,4 +np.float32,0x3eeaeba4,0x3ef40efe,4 +np.float32,0x3e58dde0,0x3e5a8580,4 +np.float32,0xbf0cabe2,0xbf14ee6b,4 +np.float32,0xbe805ca8,0xbe81bf03,4 +np.float32,0x3f5462ba,0x3f7a7b85,4 +np.float32,0xbee235d0,0xbeea4d8b,4 +np.float32,0xbe880cb0,0xbe89b426,4 +np.float32,0x80000001,0x80000001,4 +np.float32,0x3f208c00,0x3f2d88f6,4 +np.float32,0xbf34f3d2,0xbf48f7a2,4 +np.float32,0x3f629428,0x3f8b1763,4 +np.float32,0xbf52a900,0xbf776b4a,4 +np.float32,0xbd17f8d0,0xbd1801be,4 +np.float32,0xbef7cada,0xbf0153d1,4 +np.float32,0x3f7d3b90,0x3fb63967,4 +np.float32,0xbd6a20b0,0xbd6a4160,4 +np.float32,0x3f740496,0x3fa1beb7,4 +np.float32,0x3ed8762c,0x3edf7dd9,4 +np.float32,0x3f53b066,0x3f793d42,4 +np.float32,0xbe9de718,0xbea084f9,4 +np.float32,0x3ea3ae90,0x3ea69b4b,4 +np.float32,0x3f1b8f00,0x3f273183,4 +np.float32,0x3f5cd6ac,0x3f852ead,4 +np.float32,0x3f29d510,0x3f39b169,4 +np.float32,0x3ee2a934,0x3eeace33,4 +np.float32,0x3eecac94,0x3ef608c2,4 +np.float32,0xbea915e2,0xbeac5203,4 +np.float32,0xbd316e90,0xbd317cc8,4 +np.float32,0xbf70b495,0xbf9c97b6,4 +np.float32,0xbe80d976,0xbe823ff3,4 +np.float32,0x3e9205f8,0x3e94143f,4 +np.float32,0x3f49247e,0x3f676296,4 +np.float32,0x3d9030c0,0x3d904f50,4 +np.float32,0x3e4df058,0x3e4f5a5c,4 +np.float32,0xbe1fd360,0xbe207b58,4 +np.float32,0xbf69dc7c,0xbf937006,4 +np.float32,0x3f36babe,0x3f4b7df3,4 +np.float32,0xbe8c9758,0xbe8e6bb7,4 +np.float32,0xbf4de72d,0xbf6f3c20,4 +np.float32,0xbecdad68,0xbed3a780,4 +np.float32,0xbf73e2cf,0xbfa18702,4 +np.float32,0xbece16a8,0xbed41a75,4 +np.float32,0x3f618a96,0x3f89fc6d,4 +np.float32,0xbf325853,0xbf454ea9,4 +np.float32,0x3f138568,0x3f1d3828,4 +np.float32,0xbf56a6e9,0xbf7e9748,4 +np.float32,0x3ef5d594,0x3f0035bf,4 +np.float32,0xbf408220,0xbf59dfaa,4 +np.float32,0xbed120e6,0xbed76dd5,4 +np.float32,0xbf6dbda5,0xbf986cee,4 +np.float32,0x3f744a38,0x3fa23282,4 +np.float32,0xbe4b56d8,0xbe4cb329,4 +np.float32,0x3f54c5f2,0x3f7b2d97,4 +np.float32,0xbd8b1c90,0xbd8b3801,4 +np.float32,0x3ee19a48,0x3ee9a03b,4 +np.float32,0x3f48460e,0x3f65fc3d,4 +np.float32,0x3eb541c0,0x3eb9461e,4 +np.float32,0xbea7d098,0xbeaaf98c,4 +np.float32,0xbda99e40,0xbda9d00c,4 +np.float32,0xbefb2ca6,0xbf03438d,4 +np.float32,0x3f4256be,0x3f5cab0b,4 +np.float32,0xbdbdb198,0xbdbdf74d,4 +np.float32,0xbf325b5f,0xbf4552e9,4 +np.float32,0xbf704d1a,0xbf9c00b4,4 +np.float32,0x3ebb1d04,0x3ebf8cf8,4 +np.float32,0xbed03566,0xbed66bf1,4 +np.float32,0x3e8fcee8,0x3e91c501,4 +np.float32,0xbf2e1eec,0xbf3f7b9d,4 +np.float32,0x3f33c4d2,0x3f474cac,4 +np.float32,0x3f598ef4,0x3f8201b4,4 +np.float32,0x3e09bb30,0x3e0a2660,4 +np.float32,0x3ed4e228,0x3edb8cdb,4 +np.float32,0x3eb7a190,0x3ebbd0a1,4 +np.float32,0xbd9ae630,0xbd9b0c18,4 +np.float32,0x3f43020e,0x3f5db2d7,4 +np.float32,0xbec06ac0,0xbec542d4,4 +np.float32,0x3f3dfde0,0x3f561674,4 +np.float32,0xbf64084a,0xbf8cabe6,4 +np.float32,0xbd6f95b0,0xbd6fb8b7,4 +np.float32,0x3f268640,0x3f354e2d,4 +np.float32,0xbe72b4bc,0xbe7509b2,4 +np.float32,0xbf3414fa,0xbf47bd5a,4 +np.float32,0xbf375218,0xbf4c566b,4 +np.float32,0x3f203c1a,0x3f2d2273,4 +np.float32,0xbd503530,0xbd504c2b,4 +np.float32,0xbc45e540,0xbc45e67b,4 +np.float32,0xbf175c4f,0xbf21f2c6,4 +np.float32,0x3f7432a6,0x3fa20b2b,4 +np.float32,0xbf43367f,0xbf5e03d8,4 +np.float32,0x3eb3997c,0x3eb780c4,4 +np.float32,0x3e5574c8,0x3e570878,4 +np.float32,0xbf04b57b,0xbf0b8349,4 +np.float32,0x3f6216d8,0x3f8a914b,4 +np.float32,0xbf57a237,0xbf80337d,4 +np.float32,0xbee1403a,0xbee93bee,4 +np.float32,0xbeaf9b9a,0xbeb33f3b,4 +np.float32,0xbf109374,0xbf19a223,4 +np.float32,0xbeae6824,0xbeb1f810,4 +np.float32,0xbcff9320,0xbcff9dbe,4 +np.float32,0x3ed205c0,0x3ed868a9,4 +np.float32,0x3d897c30,0x3d8996ad,4 +np.float32,0xbf2899d2,0xbf380d4c,4 +np.float32,0xbf54cb0b,0xbf7b36c2,4 +np.float32,0x3ea8e8ec,0x3eac2262,4 +np.float32,0x3ef5e1a0,0x3f003c9d,4 +np.float32,0xbf00c81e,0xbf06f1e2,4 +np.float32,0xbf346775,0xbf483181,4 +np.float32,0x3f7a4fe4,0x3fae077c,4 +np.float32,0x3f00776e,0x3f06948f,4 +np.float32,0xbe0a3078,0xbe0a9cbc,4 +np.float32,0xbeba0b06,0xbebe66be,4 +np.float32,0xbdff4e38,0xbdfff8b2,4 +np.float32,0xbe927f70,0xbe9492ff,4 +np.float32,0x3ebb07e0,0x3ebf7642,4 +np.float32,0x3ebcf8e0,0x3ec18c95,4 +np.float32,0x3f49bdfc,0x3f685b51,4 +np.float32,0x3cbc29c0,0x3cbc2dfd,4 +np.float32,0xbe9e951a,0xbea13bf1,4 +np.float32,0xbe8c237c,0xbe8df33d,4 +np.float32,0x3e17f198,0x3e1881c4,4 +np.float32,0xbd0b5220,0xbd0b5902,4 +np.float32,0xbf34c4a2,0xbf48b4f5,4 +np.float32,0xbedaa814,0xbee1ea94,4 +np.float32,0x3ebf5d6c,0x3ec42053,4 +np.float32,0x3cd04b40,0x3cd050ff,4 +np.float32,0xbec33fe0,0xbec85244,4 +np.float32,0xbf00b27a,0xbf06d8d8,4 +np.float32,0x3f15d7be,0x3f201243,4 +np.float32,0xbe3debd0,0xbe3f06f7,4 +np.float32,0xbea81704,0xbeab4418,4 +np.float32,0x1,0x1,4 +np.float32,0x3f49e6ba,0x3f689d8b,4 +np.float32,0x3f351030,0x3f491fc0,4 +np.float32,0x3e607de8,0x3e625482,4 +np.float32,0xbe8dbbe4,0xbe8f9c0e,4 +np.float32,0x3edbf350,0x3ee35924,4 +np.float32,0xbf0c84c4,0xbf14bf9c,4 +np.float32,0x3eb218b0,0x3eb5e61a,4 +np.float32,0x3e466dd0,0x3e47b138,4 +np.float32,0xbe8ece94,0xbe90ba01,4 +np.float32,0xbe82ec2a,0xbe84649a,4 +np.float32,0xbf7e1f10,0xbfb98b9e,4 +np.float32,0xbf2d00ea,0xbf3df688,4 +np.float32,0x3db7cdd0,0x3db80d36,4 +np.float32,0xbe388b98,0xbe398f25,4 +np.float32,0xbd86cb40,0xbd86e436,4 +np.float32,0x7f7fffff,0x7fc00000,4 +np.float32,0x3f472a60,0x3f6436c6,4 +np.float32,0xbf5b2c1d,0xbf838d87,4 +np.float32,0x3f0409ea,0x3f0abad8,4 +np.float32,0x3f47dd0e,0x3f6553f0,4 +np.float32,0x3e3eab00,0x3e3fc98a,4 +np.float32,0xbf7c2a7f,0xbfb2e19b,4 +np.float32,0xbeda0048,0xbee13112,4 +np.float32,0x3f46600a,0x3f62f5b2,4 +np.float32,0x3f45aef4,0x3f61de43,4 +np.float32,0x3dd40a50,0x3dd46bc4,4 +np.float32,0xbf6cdd0b,0xbf974191,4 +np.float32,0x3f78de4c,0x3faac725,4 +np.float32,0x3f3c39a4,0x3f53777f,4 +np.float32,0xbe2a30ec,0xbe2afc0b,4 +np.float32,0xbf3c0ef0,0xbf533887,4 +np.float32,0x3ecb6548,0x3ed12a53,4 +np.float32,0x3eb994e8,0x3ebde7fc,4 +np.float32,0x3d4c1ee0,0x3d4c3487,4 +np.float32,0xbf52cb6d,0xbf77a7eb,4 +np.float32,0x3eb905d4,0x3ebd4e80,4 +np.float32,0x3e712428,0x3e736d72,4 +np.float32,0xbf79ee6e,0xbfad22be,4 +np.float32,0x3de6f8b0,0x3de776c1,4 +np.float32,0x3e9b2898,0x3e9da325,4 +np.float32,0x3ea09b20,0x3ea35d20,4 +np.float32,0x3d0ea9a0,0x3d0eb103,4 +np.float32,0xbd911500,0xbd913423,4 +np.float32,0x3e004618,0x3e009c97,4 +np.float32,0x3f5e0e5a,0x3f86654c,4 +np.float32,0x3f2e6300,0x3f3fd88b,4 +np.float32,0x3e0cf5d0,0x3e0d68c3,4 +np.float32,0x3d6a16c0,0x3d6a376c,4 +np.float32,0x3f7174aa,0x3f9db53c,4 +np.float32,0xbe04bba0,0xbe051b81,4 +np.float32,0xbe6fdcb4,0xbe721c92,4 +np.float32,0x3f4379f0,0x3f5e6c31,4 +np.float32,0xbf680098,0xbf913257,4 +np.float32,0xbf3c31ca,0xbf536bea,4 +np.float32,0x3f59db58,0x3f824a4e,4 +np.float32,0xbf3ffc84,0xbf591554,4 +np.float32,0x3d1d5160,0x3d1d5b48,4 +np.float32,0x3f6c64ae,0x3f96a3da,4 +np.float32,0xbf1b49fd,0xbf26daaa,4 +np.float32,0x3ec80be0,0x3ecd8576,4 +np.float32,0x3f3becc0,0x3f530629,4 +np.float32,0xbea93890,0xbeac76c1,4 +np.float32,0x3f5b3acc,0x3f839bbd,4 +np.float32,0xbf5d6818,0xbf85bef9,4 +np.float32,0x3f794266,0x3fab9fa6,4 +np.float32,0xbee8eb7c,0xbef1cf3b,4 +np.float32,0xbf360a06,0xbf4a821e,4 +np.float32,0x3f441cf6,0x3f5f693d,4 +np.float32,0x3e60de40,0x3e62b742,4 +np.float32,0xbebb3d7e,0xbebfafdc,4 +np.float32,0x3e56a3a0,0x3e583e28,4 +np.float32,0x3f375bfe,0x3f4c6499,4 +np.float32,0xbf384d7d,0xbf4dbf9a,4 +np.float32,0x3efb03a4,0x3f032c06,4 +np.float32,0x3f1d5d10,0x3f29794d,4 +np.float32,0xbe25f7dc,0xbe26b41d,4 +np.float32,0x3f6d2f88,0x3f97aebb,4 +np.float32,0xbe9fa100,0xbea255cb,4 +np.float32,0xbf21dafa,0xbf2f382a,4 +np.float32,0x3d3870e0,0x3d3880d9,4 +np.float32,0x3eeaf00c,0x3ef413f4,4 +np.float32,0xbc884ea0,0xbc88503c,4 +np.float32,0xbf7dbdad,0xbfb80b6d,4 +np.float32,0xbf4eb713,0xbf709b46,4 +np.float32,0xbf1c0ad4,0xbf27cd92,4 +np.float32,0x3f323088,0x3f451737,4 +np.float32,0x3e405d88,0x3e4183e1,4 +np.float32,0x3d7ad580,0x3d7afdb4,4 +np.float32,0xbf207338,0xbf2d6927,4 +np.float32,0xbecf7948,0xbed59e1a,4 +np.float32,0x3f16ff94,0x3f217fde,4 +np.float32,0xbdf19588,0xbdf225dd,4 +np.float32,0xbf4d9654,0xbf6eb442,4 +np.float32,0xbf390b9b,0xbf4ed220,4 +np.float32,0xbe155a74,0xbe15e354,4 +np.float32,0x3f519e4c,0x3f759850,4 +np.float32,0xbee3f08c,0xbeec3b84,4 +np.float32,0xbf478be7,0xbf64d23b,4 +np.float32,0xbefdee50,0xbf04d92a,4 +np.float32,0x3e8def78,0x3e8fd1bc,4 +np.float32,0x3e3df2a8,0x3e3f0dee,4 +np.float32,0xbf413e22,0xbf5afd97,4 +np.float32,0xbf1b8bc4,0xbf272d71,4 +np.float32,0xbf31e5be,0xbf44af22,4 +np.float32,0x3de7e080,0x3de86010,4 +np.float32,0xbf5ddf7e,0xbf863645,4 +np.float32,0x3f3eba6a,0x3f57306e,4 +np.float32,0xff7fffff,0x7fc00000,4 +np.float32,0x3ec22d5c,0x3ec72973,4 +np.float32,0x80800000,0x80800000,4 +np.float32,0x3f032e0c,0x3f09ba82,4 +np.float32,0x3d74bd60,0x3d74e2b7,4 +np.float32,0xbea0d61e,0xbea39b42,4 +np.float32,0xbefdfa78,0xbf04e02a,4 +np.float32,0x3e5cb220,0x3e5e70ec,4 +np.float32,0xbe239e54,0xbe2452a4,4 +np.float32,0x3f452738,0x3f61090e,4 +np.float32,0x3e99a2e0,0x3e9c0a66,4 +np.float32,0x3e4394d8,0x3e44ca5f,4 +np.float32,0x3f4472e2,0x3f5fef14,4 +np.float32,0xbf46bc70,0xbf638814,4 +np.float32,0xbf0b910f,0xbf139c7a,4 +np.float32,0x3f36b4a6,0x3f4b753f,4 +np.float32,0x3e0bf478,0x3e0c64f6,4 +np.float32,0x3ce02480,0x3ce02ba9,4 +np.float32,0xbd904b10,0xbd9069b1,4 +np.float32,0xbf7f5d72,0xbfc00b70,4 +np.float32,0x3f62127e,0x3f8a8ca8,4 +np.float32,0xbf320253,0xbf44d6e4,4 +np.float32,0x3f2507be,0x3f335833,4 +np.float32,0x3f299284,0x3f395887,4 +np.float32,0xbd8211b0,0xbd82281d,4 +np.float32,0xbd3374c0,0xbd338376,4 +np.float32,0x3f36c56a,0x3f4b8d30,4 +np.float32,0xbf51f704,0xbf76331f,4 +np.float32,0xbe9871ca,0xbe9acab2,4 +np.float32,0xbe818d8c,0xbe82fa0f,4 +np.float32,0x3f08b958,0x3f103c18,4 +np.float32,0x3f22559a,0x3f2fd698,4 +np.float32,0xbf11f388,0xbf1b4db8,4 +np.float32,0x3ebe1990,0x3ec2c359,4 +np.float32,0xbe75ab38,0xbe7816b6,4 +np.float32,0x3e96102c,0x3e984c99,4 +np.float32,0xbe80d9d2,0xbe824052,4 +np.float32,0x3ef47588,0x3efeda7f,4 +np.float32,0xbe45e524,0xbe4725ea,4 +np.float32,0x3f7f9e7a,0x3fc213ff,4 +np.float32,0x3f1d3c36,0x3f294faa,4 +np.float32,0xbf3c58db,0xbf53a591,4 +np.float32,0x3f0d3d20,0x3f159c69,4 +np.float32,0x3f744be6,0x3fa23552,4 +np.float32,0x3f2e0cea,0x3f3f630e,4 +np.float32,0x3e193c10,0x3e19cff7,4 +np.float32,0xbf4150ac,0xbf5b19dd,4 +np.float32,0xbf145f72,0xbf1e4355,4 +np.float32,0xbb76cc00,0xbb76cc26,4 +np.float32,0x3f756780,0x3fa41b3e,4 +np.float32,0x3ea9b868,0x3eacfe3c,4 +np.float32,0x3d07c920,0x3d07cf7f,4 +np.float32,0xbf2263d4,0xbf2fe8ff,4 +np.float32,0x3e53b3f8,0x3e553daa,4 +np.float32,0xbf785be8,0xbfa9b5ba,4 +np.float32,0x3f324f7a,0x3f454254,4 +np.float32,0xbf2188f2,0xbf2ece5b,4 +np.float32,0xbe33781c,0xbe3466a2,4 +np.float32,0xbd3cf120,0xbd3d024c,4 +np.float32,0x3f06b18a,0x3f0dd70f,4 +np.float32,0x3f40d63e,0x3f5a5f6a,4 +np.float32,0x3f752340,0x3fa3a41e,4 +np.float32,0xbe1cf1c0,0xbe1d90bc,4 +np.float32,0xbf02d948,0xbf0957d7,4 +np.float32,0x3f73bed0,0x3fa14bf7,4 +np.float32,0x3d914920,0x3d916864,4 +np.float32,0x7fa00000,0x7fe00000,4 +np.float32,0xbe67a5d8,0xbe69aba7,4 +np.float32,0x3f689c4a,0x3f91eb9f,4 +np.float32,0xbf196e00,0xbf248601,4 +np.float32,0xbf50dacb,0xbf7444fe,4 +np.float32,0x3f628b86,0x3f8b0e1e,4 +np.float32,0x3f6ee2f2,0x3f99fe7f,4 +np.float32,0x3ee5df40,0x3eee6492,4 +np.float32,0x3f501746,0x3f72f41b,4 +np.float32,0xbf1f0f18,0xbf2ba164,4 +np.float32,0xbf1a8bfd,0xbf25ec01,4 +np.float32,0xbd4926f0,0xbd493ba9,4 +np.float32,0xbf4e364f,0xbf6fc17b,4 +np.float32,0x3e50c578,0x3e523ed4,4 +np.float32,0x3f65bf10,0x3f8e95ce,4 +np.float32,0xbe8d75a2,0xbe8f52f2,4 +np.float32,0xbf3f557e,0xbf581962,4 +np.float32,0xbeff2bfc,0xbf05903a,4 +np.float32,0x3f5e8bde,0x3f86e3d8,4 +np.float32,0xbf7a0012,0xbfad4b9b,4 +np.float32,0x3edefce0,0x3ee6b790,4 +np.float32,0xbf0003de,0xbf060f09,4 +np.float32,0x3efc4650,0x3f03e548,4 +np.float32,0x3f4582e4,0x3f6198f5,4 +np.float32,0x3f10086c,0x3f18f9d0,4 +np.float32,0x3f1cd304,0x3f28ca77,4 +np.float32,0x3f683366,0x3f916e8d,4 +np.float32,0xbed49392,0xbedb3675,4 +np.float32,0xbf6fe5f6,0xbf9b6c0e,4 +np.float32,0xbf59b416,0xbf8224f6,4 +np.float32,0x3d20c960,0x3d20d3f4,4 +np.float32,0x3f6b00d6,0x3f94dbe7,4 +np.float32,0x3f6c26ae,0x3f965352,4 +np.float32,0xbf370ea6,0xbf4bf5dd,4 +np.float32,0x3dfe7230,0x3dff1af1,4 +np.float32,0xbefc21a8,0xbf03d038,4 +np.float32,0x3f16a990,0x3f21156a,4 +np.float32,0xbef8ac0c,0xbf01d48f,4 +np.float32,0x3f170de8,0x3f21919d,4 +np.float32,0x3db9ef80,0x3dba3122,4 +np.float32,0x3d696400,0x3d698461,4 +np.float32,0x3f007aa2,0x3f069843,4 +np.float32,0x3f22827c,0x3f3010a9,4 +np.float32,0x3f3650dc,0x3f4ae6f1,4 +np.float32,0xbf1d8037,0xbf29a5e1,4 +np.float32,0xbf08fdc4,0xbf108d0e,4 +np.float32,0xbd8df350,0xbd8e1079,4 +np.float32,0xbf36bb32,0xbf4b7e98,4 +np.float32,0x3f2e3756,0x3f3f9ced,4 +np.float32,0x3d5a6f20,0x3d5a89aa,4 +np.float32,0x3f55d568,0x3f7d1889,4 +np.float32,0x3e1ed110,0x3e1f75d9,4 +np.float32,0x3e7386b8,0x3e75e1dc,4 +np.float32,0x3f48ea0e,0x3f670434,4 +np.float32,0x3e921fb0,0x3e942f14,4 +np.float32,0xbf0d4d0b,0xbf15af7f,4 +np.float32,0x3f179ed2,0x3f224549,4 +np.float32,0xbf3a328e,0xbf507e6d,4 +np.float32,0xbf74591a,0xbfa24b6e,4 +np.float32,0x3ec7d1c4,0x3ecd4657,4 +np.float32,0xbf6ecbed,0xbf99de85,4 +np.float32,0x3db0bd00,0x3db0f559,4 +np.float32,0x7f800000,0x7fc00000,4 +np.float32,0x3e0373b8,0x3e03d0d6,4 +np.float32,0xbf439784,0xbf5e9a04,4 +np.float32,0xbef97a9e,0xbf024ac6,4 +np.float32,0x3e4d71a8,0x3e4ed90a,4 +np.float32,0xbf14d868,0xbf1ed7e3,4 +np.float32,0xbf776870,0xbfa7ce37,4 +np.float32,0xbe32a500,0xbe339038,4 +np.float32,0xbf326d8a,0xbf456c3d,4 +np.float32,0xbe9b758c,0xbe9df3e7,4 +np.float32,0x3d9515a0,0x3d95376a,4 +np.float32,0x3e3f7320,0x3e40953e,4 +np.float32,0xbee57e7e,0xbeedf84f,4 +np.float32,0x3e821e94,0x3e838ffd,4 +np.float32,0x3f74beaa,0x3fa2f721,4 +np.float32,0xbe9b7672,0xbe9df4d9,4 +np.float32,0x3f4041fc,0x3f597e71,4 +np.float32,0xbe9ea7c4,0xbea14f92,4 +np.float32,0xbf800000,0xbfc90fdb,4 +np.float32,0x3e04fb90,0x3e055bfd,4 +np.float32,0xbf14d3d6,0xbf1ed245,4 +np.float32,0xbe84ebec,0xbe86763e,4 +np.float32,0x3f08e568,0x3f107039,4 +np.float32,0x3d8dc9e0,0x3d8de6ef,4 +np.float32,0x3ea4549c,0x3ea74a94,4 +np.float32,0xbebd2806,0xbec1bf51,4 +np.float32,0x3f311a26,0x3f439498,4 +np.float32,0xbf3d2222,0xbf54cf7e,4 +np.float32,0x3e00c500,0x3e011c81,4 +np.float32,0xbe35ed1c,0xbe36e5a9,4 +np.float32,0xbd4ec020,0xbd4ed6a0,4 +np.float32,0x3e1eb088,0x3e1f54eb,4 +np.float32,0x3cf94840,0x3cf9521a,4 +np.float32,0xbf010c5d,0xbf0740e0,4 +np.float32,0xbf3bd63b,0xbf52e502,4 +np.float32,0x3f233f30,0x3f310542,4 +np.float32,0x3ea24128,0x3ea519d7,4 +np.float32,0x3f478b38,0x3f64d124,4 +np.float32,0x3f1e0c6c,0x3f2a57ec,4 +np.float32,0xbf3ad294,0xbf51680a,4 +np.float32,0x3ede0554,0x3ee5a4b4,4 +np.float32,0x3e451a98,0x3e46577d,4 +np.float32,0x3f520164,0x3f764542,4 +np.float32,0x0,0x0,4 +np.float32,0xbd056cd0,0xbd0572db,4 +np.float32,0xbf58b018,0xbf812f5e,4 +np.float32,0x3e036eb0,0x3e03cbc3,4 +np.float32,0x3d1377a0,0x3d137fc9,4 +np.float32,0xbf692d3a,0xbf929a2c,4 +np.float32,0xbec60fb8,0xbecb5dea,4 +np.float32,0x3ed23340,0x3ed89a8e,4 +np.float32,0x3c87f040,0x3c87f1d9,4 +np.float32,0x3dac62f0,0x3dac9737,4 +np.float32,0xbed97c16,0xbee09f02,4 +np.float32,0xbf2d5f3c,0xbf3e769c,4 +np.float32,0xbc3b7c40,0xbc3b7d4c,4 +np.float32,0x3ed998ec,0x3ee0bedd,4 +np.float32,0x3dd86630,0x3dd8cdcb,4 +np.float32,0x3e8b4304,0x3e8d09ea,4 +np.float32,0x3f51e6b0,0x3f761697,4 +np.float32,0x3ec51f24,0x3eca5923,4 +np.float32,0xbf647430,0xbf8d2307,4 +np.float32,0x3f253d9c,0x3f339eb2,4 +np.float32,0x3dc969d0,0x3dc9bd4b,4 +np.float32,0xbc2f1300,0xbc2f13da,4 +np.float32,0xbf170007,0xbf21806d,4 +np.float32,0x3f757d10,0x3fa4412e,4 +np.float32,0xbe7864ac,0xbe7ae564,4 +np.float32,0x3f2ffe90,0x3f420cfb,4 +np.float32,0xbe576138,0xbe590012,4 +np.float32,0xbf517a21,0xbf755959,4 +np.float32,0xbf159cfe,0xbf1fc9d5,4 +np.float32,0xbf638b2a,0xbf8c22cf,4 +np.float32,0xff800000,0x7fc00000,4 +np.float32,0x3ed19ca0,0x3ed7f569,4 +np.float32,0x3f7c4460,0x3fb32d26,4 +np.float32,0x3ebfae6c,0x3ec477ab,4 +np.float32,0x3dd452d0,0x3dd4b4a8,4 +np.float32,0x3f471482,0x3f6413fb,4 +np.float32,0xbf49d704,0xbf6883fe,4 +np.float32,0xbd42c4e0,0xbd42d7af,4 +np.float32,0xbeb02994,0xbeb3d668,4 +np.float32,0x3f4d1fd8,0x3f6dedd2,4 +np.float32,0x3efb591c,0x3f035d11,4 +np.float32,0x80000000,0x80000000,4 +np.float32,0xbf50f782,0xbf7476ad,4 +np.float32,0x3d7232c0,0x3d7256f0,4 +np.float32,0x3f649460,0x3f8d46bb,4 +np.float32,0x3f5561bc,0x3f7c46a9,4 +np.float32,0x3e64f6a0,0x3e66ea5d,4 +np.float32,0x3e5b0470,0x3e5cb8f9,4 +np.float32,0xbe9b6b2c,0xbe9de904,4 +np.float32,0x3f6c33f4,0x3f966486,4 +np.float32,0x3f5cee54,0x3f854613,4 +np.float32,0x3ed3e044,0x3eda716e,4 +np.float32,0xbf3cac7f,0xbf542131,4 +np.float32,0x3c723500,0x3c723742,4 +np.float32,0x3de59900,0x3de614d3,4 +np.float32,0xbdf292f8,0xbdf32517,4 +np.float32,0x3f05c8b2,0x3f0cc59b,4 +np.float32,0xbf1ab182,0xbf261b14,4 +np.float32,0xbda396f0,0xbda3c39a,4 +np.float32,0xbf270ed0,0xbf360231,4 +np.float32,0x3f2063e6,0x3f2d557e,4 +np.float32,0x3c550280,0x3c550409,4 +np.float32,0xbe103b48,0xbe10b679,4 +np.float32,0xbebae390,0xbebf4f40,4 +np.float32,0x3f3bc868,0x3f52d0aa,4 +np.float32,0xbd62f880,0xbd631647,4 +np.float32,0xbe7a38f4,0xbe7cc833,4 +np.float32,0x3f09d796,0x3f118f39,4 +np.float32,0xbf5fa558,0xbf8802d0,4 +np.float32,0x3f111cc8,0x3f1a48b0,4 +np.float32,0x3e831958,0x3e849356,4 +np.float32,0xbf614dbd,0xbf89bc3b,4 +np.float32,0xbd521510,0xbd522cac,4 +np.float32,0x3f05af22,0x3f0ca7a0,4 +np.float32,0xbf1ac60e,0xbf2634df,4 +np.float32,0xbf6bd05e,0xbf95e3fe,4 +np.float32,0xbd1fa6e0,0xbd1fb13b,4 +np.float32,0xbeb82f7a,0xbebc68b1,4 +np.float32,0xbd92aaf8,0xbd92cb23,4 +np.float32,0xbe073a54,0xbe079fbf,4 +np.float32,0xbf198655,0xbf24a468,4 +np.float32,0x3f62f6d8,0x3f8b81ba,4 +np.float32,0x3eef4310,0x3ef8f4f9,4 +np.float32,0x3e8988e0,0x3e8b3eae,4 +np.float32,0xbf3ddba5,0xbf55e367,4 +np.float32,0x3dc6d2e0,0x3dc7232b,4 +np.float32,0xbf31040e,0xbf437601,4 +np.float32,0x3f1bb74a,0x3f276442,4 +np.float32,0xbf0075d2,0xbf0692b3,4 +np.float32,0xbf606ce0,0xbf88d0ff,4 +np.float32,0xbf083856,0xbf0fa39d,4 +np.float32,0xbdb25b20,0xbdb2950a,4 +np.float32,0xbeb86860,0xbebca5ae,4 +np.float32,0x3de83160,0x3de8b176,4 +np.float32,0xbf33a98f,0xbf472664,4 +np.float32,0x3e7795f8,0x3e7a1058,4 +np.float32,0x3e0ca6f8,0x3e0d192a,4 +np.float32,0xbf1aef60,0xbf2668c3,4 +np.float32,0xbda53b58,0xbda5695e,4 +np.float32,0xbf178096,0xbf221fc5,4 +np.float32,0xbf0a4159,0xbf120ccf,4 +np.float32,0x3f7bca36,0x3fb1d0df,4 +np.float32,0xbef94360,0xbf022b26,4 +np.float32,0xbef16f36,0xbefb6ad6,4 +np.float32,0x3f53a7e6,0x3f792e25,4 +np.float32,0xbf7c536f,0xbfb35993,4 +np.float32,0xbe84aaa0,0xbe8632a2,4 +np.float32,0x3ecb3998,0x3ed0fab9,4 +np.float32,0x3f539304,0x3f79090a,4 +np.float32,0xbf3c7816,0xbf53d3b3,4 +np.float32,0xbe7a387c,0xbe7cc7b7,4 +np.float32,0x3f7000e4,0x3f9b92b1,4 +np.float32,0x3e08fd70,0x3e0966e5,4 +np.float32,0x3db97ba0,0x3db9bcc8,4 +np.float32,0xbee99056,0xbef2886a,4 +np.float32,0xbf0668da,0xbf0d819e,4 +np.float32,0x3e58a408,0x3e5a4a51,4 +np.float32,0x3f3440b8,0x3f47faed,4 +np.float32,0xbf19a2ce,0xbf24c7ff,4 +np.float32,0xbe75e990,0xbe7856ee,4 +np.float32,0x3f3c865c,0x3f53e8cb,4 +np.float32,0x3e5e03d0,0x3e5fcac9,4 +np.float32,0x3edb8e34,0x3ee2e932,4 +np.float32,0xbf7e1f5f,0xbfb98ce4,4 +np.float32,0xbf7372ff,0xbfa0d0ae,4 +np.float32,0xbf3ee850,0xbf577548,4 +np.float32,0x3ef19658,0x3efb9737,4 +np.float32,0xbe8088de,0xbe81ecaf,4 +np.float32,0x800000,0x800000,4 +np.float32,0xbde39dd8,0xbde4167a,4 +np.float32,0xbf065d7a,0xbf0d7441,4 +np.float32,0xbde52c78,0xbde5a79b,4 +np.float32,0xbe3a28c0,0xbe3b333e,4 +np.float32,0x3f6e8b3c,0x3f998516,4 +np.float32,0x3f3485c2,0x3f485c39,4 +np.float32,0x3e6f2c68,0x3e71673e,4 +np.float32,0xbe4ec9cc,0xbe50385e,4 +np.float32,0xbf1c3bb0,0xbf280b39,4 +np.float32,0x3ec8ea18,0x3ece76f7,4 +np.float32,0x3e26b5f8,0x3e2774c9,4 +np.float32,0x3e1e4a38,0x3e1eed5c,4 +np.float32,0xbee7a106,0xbef05c6b,4 +np.float32,0xbf305928,0xbf4289d8,4 +np.float32,0x3f0c431c,0x3f147118,4 +np.float32,0xbe57ba6c,0xbe595b52,4 +np.float32,0x3eabc9cc,0x3eaf2fc7,4 +np.float32,0xbef1ed24,0xbefbf9ae,4 +np.float32,0xbf61b576,0xbf8a29cc,4 +np.float32,0x3e9c1ff4,0x3e9ea6cb,4 +np.float32,0x3f6c53b2,0x3f968dbe,4 +np.float32,0x3e2d1b80,0x3e2df156,4 +np.float32,0x3e9f2f70,0x3ea1de4a,4 +np.float32,0xbf5861ee,0xbf80e61a,4 +np.float32,0x3f429144,0x3f5d0505,4 +np.float32,0x3e235cc8,0x3e24103e,4 +np.float32,0xbf354879,0xbf496f6a,4 +np.float32,0xbf20a146,0xbf2da447,4 +np.float32,0x3e8d8968,0x3e8f6785,4 +np.float32,0x3f3fbc94,0x3f58b4c1,4 +np.float32,0x3f2c5f50,0x3f3d1b9f,4 +np.float32,0x3f7bf0f8,0x3fb23d23,4 +np.float32,0xbf218282,0xbf2ec60f,4 +np.float32,0x3f2545aa,0x3f33a93e,4 +np.float32,0xbf4b17be,0xbf6a9018,4 +np.float32,0xbb9df700,0xbb9df728,4 +np.float32,0x3f685d54,0x3f91a06c,4 +np.float32,0x3efdfe2c,0x3f04e24c,4 +np.float32,0x3ef1c5a0,0x3efbccd9,4 +np.float32,0xbf41d731,0xbf5be76e,4 +np.float32,0x3ebd1360,0x3ec1a919,4 +np.float32,0xbf706bd4,0xbf9c2d58,4 +np.float32,0x3ea525e4,0x3ea8279d,4 +np.float32,0xbe51f1b0,0xbe537186,4 +np.float32,0x3f5e8cf6,0x3f86e4f4,4 +np.float32,0xbdad2520,0xbdad5a19,4 +np.float32,0xbf5c5704,0xbf84b0e5,4 +np.float32,0x3f47b54e,0x3f65145e,4 +np.float32,0x3eb4fc78,0x3eb8fc0c,4 +np.float32,0x3dca1450,0x3dca68a1,4 +np.float32,0x3eb02a74,0x3eb3d757,4 +np.float32,0x3f74ae6a,0x3fa2db75,4 +np.float32,0x3f800000,0x3fc90fdb,4 +np.float32,0xbdb46a00,0xbdb4a5f2,4 +np.float32,0xbe9f2ba6,0xbea1da4e,4 +np.float32,0x3f0afa70,0x3f12e8f7,4 +np.float32,0xbf677b20,0xbf909547,4 +np.float32,0x3eff9188,0x3f05cacf,4 +np.float32,0x3f720562,0x3f9e911b,4 +np.float32,0xbf7180d8,0xbf9dc794,4 +np.float32,0xbee7d076,0xbef0919d,4 +np.float32,0x3f0432ce,0x3f0aea95,4 +np.float32,0x3f3bc4c8,0x3f52cb54,4 +np.float32,0xbea72f30,0xbeaa4ebe,4 +np.float32,0x3e90ed00,0x3e92ef33,4 +np.float32,0xbda63670,0xbda6654a,4 +np.float32,0xbf5a6f85,0xbf82d7e0,4 +np.float32,0x3e6e8808,0x3e70be34,4 +np.float32,0xbf4f3822,0xbf71768f,4 +np.float32,0x3e5c8a68,0x3e5e483f,4 +np.float32,0xbf0669d4,0xbf0d82c4,4 +np.float32,0xbf79f77c,0xbfad37b0,4 +np.float32,0x3f25c82c,0x3f345453,4 +np.float32,0x3f1b2948,0x3f26b188,4 +np.float32,0x3ef7e288,0x3f016159,4 +np.float32,0x3c274280,0x3c27433e,4 +np.float32,0xbf4c8fa0,0xbf6cfd5e,4 +np.float32,0x3ea4ccb4,0x3ea7c966,4 +np.float32,0xbf7b157e,0xbfafefca,4 +np.float32,0xbee4c2b0,0xbeed264d,4 +np.float32,0xbc1fd640,0xbc1fd6e6,4 +np.float32,0x3e892308,0x3e8ad4f6,4 +np.float32,0xbf3f69c7,0xbf5837ed,4 +np.float32,0x3ec879e8,0x3ecdfd05,4 +np.float32,0x3f07a8c6,0x3f0efa30,4 +np.float32,0x3f67b880,0x3f90dd4d,4 +np.float32,0x3e8a11c8,0x3e8bccd5,4 +np.float32,0x3f7df6fc,0x3fb8e935,4 +np.float32,0xbef3e498,0xbefe3599,4 +np.float32,0xbf18ad7d,0xbf2395d8,4 +np.float32,0x3f2bce74,0x3f3c57f5,4 +np.float32,0xbf38086e,0xbf4d5c2e,4 +np.float32,0x3f772d7a,0x3fa75c35,4 +np.float32,0xbf3b6e24,0xbf524c00,4 +np.float32,0xbdd39108,0xbdd3f1d4,4 +np.float32,0xbf691f6b,0xbf928974,4 +np.float32,0x3f146188,0x3f1e45e4,4 +np.float32,0xbf56045b,0xbf7d6e03,4 +np.float32,0xbf4b2ee4,0xbf6ab622,4 +np.float32,0xbf3fa3f6,0xbf588f9d,4 +np.float32,0x3f127bb0,0x3f1bf398,4 +np.float32,0x3ed858a0,0x3edf5d3e,4 +np.float32,0xbd6de3b0,0xbd6e05fa,4 +np.float32,0xbecc662c,0xbed24261,4 +np.float32,0xbd6791d0,0xbd67b170,4 +np.float32,0xbf146016,0xbf1e441e,4 +np.float32,0xbf61f04c,0xbf8a6841,4 +np.float32,0xbe7f16d0,0xbe80e6e7,4 +np.float32,0xbebf93e6,0xbec45b10,4 +np.float32,0xbe8a59fc,0xbe8c17d1,4 +np.float32,0xbebc7a0c,0xbec10426,4 +np.float32,0xbf2a682e,0xbf3a7649,4 +np.float32,0xbe18d0cc,0xbe19637b,4 +np.float32,0x3d7f5100,0x3d7f7b66,4 +np.float32,0xbf10f5fa,0xbf1a1998,4 +np.float32,0x3f25e956,0x3f347fdc,4 +np.float32,0x3e6e8658,0x3e70bc78,4 +np.float32,0x3f21a5de,0x3f2ef3a5,4 +np.float32,0xbf4e71d4,0xbf702607,4 +np.float32,0xbf49d6b6,0xbf688380,4 +np.float32,0xbdb729c0,0xbdb7687c,4 +np.float32,0xbf63e1f4,0xbf8c81c7,4 +np.float32,0x3dda6cb0,0x3ddad73e,4 +np.float32,0x3ee1bc40,0x3ee9c612,4 +np.float32,0x3ebdb5f8,0x3ec2581b,4 +np.float32,0x3f7d9576,0x3fb77646,4 +np.float32,0x3e087140,0x3e08d971,4 +np.float64,0xbfdba523cfb74a48,0xbfdc960ddd9c0506,3 +np.float64,0x3fb51773622a2ee0,0x3fb51d93f77089d5,3 +np.float64,0x3fc839f6d33073f0,0x3fc85f9a47dfe8e6,3 +np.float64,0xbfecba2d82f9745b,0xbff1d55416c6c993,3 +np.float64,0x3fd520fe47aa41fc,0x3fd58867f1179634,3 +np.float64,0x3fe1b369c56366d4,0x3fe2c1ac9dd2c45a,3 +np.float64,0xbfec25a7cd784b50,0xbff133417389b12d,3 +np.float64,0xbfd286342ea50c68,0xbfd2cb0bca22e66d,3 +np.float64,0x3fd5f6fe5eabedfc,0x3fd66bad16680d08,3 +np.float64,0xbfe863a87570c751,0xbfebbb9b637eb6dc,3 +np.float64,0x3fc97f5b4d32feb8,0x3fc9ab5066d8eaec,3 +np.float64,0xbfcb667af936ccf4,0xbfcb9d3017047a1d,3 +np.float64,0xbfd1b7b9afa36f74,0xbfd1f3c175706154,3 +np.float64,0x3fef97385b7f2e70,0x3ff6922a1a6c709f,3 +np.float64,0xbfd13e4205a27c84,0xbfd1757c993cdb74,3 +np.float64,0xbfd18d88aca31b12,0xbfd1c7dd75068f7d,3 +np.float64,0x3fe040ce0f60819c,0x3fe10c59d2a27089,3 +np.float64,0xbfddc7deddbb8fbe,0xbfdef9de5baecdda,3 +np.float64,0xbfcf6e96193edd2c,0xbfcfc1bb7396b9a3,3 +np.float64,0x3fd544f494aa89e8,0x3fd5ae850e2b37dd,3 +np.float64,0x3fe15b381fe2b670,0x3fe25841c7bfe2af,3 +np.float64,0xbfde793420bcf268,0xbfdfc2ddc7b4a341,3 +np.float64,0x3fd0d5db30a1abb8,0x3fd1092cef4aa4fb,3 +np.float64,0x3fe386a08c670d42,0x3fe50059bbf7f491,3 +np.float64,0xbfe0aae3a96155c8,0xbfe1880ef13e95ce,3 +np.float64,0xbfe80eeb03f01dd6,0xbfeb39e9f107e944,3 +np.float64,0xbfd531af3caa635e,0xbfd59a178f17552a,3 +np.float64,0x3fcced14ab39da28,0x3fcd2d9a806337ef,3 +np.float64,0xbfdb4c71bcb698e4,0xbfdc33d9d9daf708,3 +np.float64,0xbfde7375ecbce6ec,0xbfdfbc5611bc48ff,3 +np.float64,0x3fecc5707a798ae0,0x3ff1e2268d778017,3 +np.float64,0x3fe8f210a1f1e422,0x3fec9b3349a5baa2,3 +np.float64,0x3fe357f9b8e6aff4,0x3fe4c5a0b89a9228,3 +np.float64,0xbfe0f863b761f0c8,0xbfe1e3283494c3d4,3 +np.float64,0x3fd017c395a02f88,0x3fd044761f2f4a66,3 +np.float64,0x3febeb4746f7d68e,0x3ff0f6b955e7feb6,3 +np.float64,0xbfbdaaeeae3b55e0,0xbfbdbc0950109261,3 +np.float64,0xbfea013095f40261,0xbfee5b8fe8ad8593,3 +np.float64,0xbfe9f87b7973f0f7,0xbfee4ca3a8438d72,3 +np.float64,0x3fd37f77cfa6fef0,0x3fd3d018c825f057,3 +np.float64,0x3fb0799cee20f340,0x3fb07c879e7cb63f,3 +np.float64,0xbfdcfd581cb9fab0,0xbfde15e35314b52d,3 +np.float64,0xbfd49781b8a92f04,0xbfd4f6fa1516fefc,3 +np.float64,0x3fb3fcb6d627f970,0x3fb401ed44a713a8,3 +np.float64,0x3fd5737ef8aae6fc,0x3fd5dfe42d4416c7,3 +np.float64,0x7ff4000000000000,0x7ffc000000000000,3 +np.float64,0xbfe56ae780ead5cf,0xbfe776ea5721b900,3 +np.float64,0x3fd4567786a8acf0,0x3fd4b255421c161a,3 +np.float64,0x3fef6fb58cfedf6c,0x3ff62012dfcf0a33,3 +np.float64,0xbfd1dbcd3da3b79a,0xbfd2194fd628f74d,3 +np.float64,0x3fd9350016b26a00,0x3fd9e8b01eb023e9,3 +np.float64,0xbfe4fb3a69e9f675,0xbfe6e1d2c9eca56c,3 +np.float64,0x3fe9fe0f73f3fc1e,0x3fee5631cfd39772,3 +np.float64,0xbfd51c1bc6aa3838,0xbfd5833b3bd53543,3 +np.float64,0x3fc64158e12c82b0,0x3fc65e7352f237d7,3 +np.float64,0x3fd0d8ee1ba1b1dc,0x3fd10c5c99a16f0e,3 +np.float64,0x3fd5554e15aaaa9c,0x3fd5bfdb9ec9e873,3 +np.float64,0x3fe61ce209ec39c4,0x3fe869bc4c28437d,3 +np.float64,0xbfe4e42c8c69c859,0xbfe6c356dac7e2db,3 +np.float64,0xbfe157021062ae04,0xbfe2533ed39f4212,3 +np.float64,0x3fe844066cf0880c,0x3feb8aea0b7bd0a4,3 +np.float64,0x3fe55016586aa02c,0x3fe752e4b2a67b9f,3 +np.float64,0x3fdabce619b579cc,0x3fdb95809bc789d9,3 +np.float64,0x3fee03bae37c0776,0x3ff3778ba38ca882,3 +np.float64,0xbfeb2f5844f65eb0,0xbff03dd1b767d3c8,3 +np.float64,0x3fedcfdbaffb9fb8,0x3ff32e81d0639164,3 +np.float64,0x3fe06fc63ee0df8c,0x3fe142fc27f92eaf,3 +np.float64,0x3fe7ce90fd6f9d22,0x3fead8f832bbbf5d,3 +np.float64,0xbfbc0015ce380028,0xbfbc0e7470e06e86,3 +np.float64,0xbfe9b3de90f367bd,0xbfedd857931dfc6b,3 +np.float64,0xbfcb588f5936b120,0xbfcb8ef0124a4f21,3 +np.float64,0x3f8d376a503a6f00,0x3f8d37ab43e7988d,3 +np.float64,0xbfdb123a40b62474,0xbfdbf38b6cf5db92,3 +np.float64,0xbfee7da6be7cfb4e,0xbff433042cd9d5eb,3 +np.float64,0xbfc4c9e01b2993c0,0xbfc4e18dbafe37ef,3 +np.float64,0x3fedd42faffba860,0x3ff334790cd18a19,3 +np.float64,0x3fe9cdf772f39bee,0x3fee044f87b856ab,3 +np.float64,0x3fe0245881e048b2,0x3fe0eb5a1f739c8d,3 +np.float64,0xbfe4712bd9e8e258,0xbfe62cb3d82034aa,3 +np.float64,0x3fe9a16b46f342d6,0x3fedb972b2542551,3 +np.float64,0xbfe57ab4536af568,0xbfe78c34b03569c2,3 +np.float64,0x3fb6d6ceb22dada0,0x3fb6de976964d6dd,3 +np.float64,0x3fc3ac23a3275848,0x3fc3c02de53919b8,3 +np.float64,0xbfccb531e7396a64,0xbfccf43ec69f6281,3 +np.float64,0xbfd2f07fc8a5e100,0xbfd33a35a8c41b62,3 +np.float64,0xbfe3e5dd04e7cbba,0xbfe57940157c27ba,3 +np.float64,0x3feefe40757dfc80,0x3ff51bc72b846af6,3 +np.float64,0x8000000000000001,0x8000000000000001,3 +np.float64,0x3fecb7b766796f6e,0x3ff1d28972a0fc7e,3 +np.float64,0xbfea1bf1357437e2,0xbfee89a6532bfd71,3 +np.float64,0xbfca3983b7347308,0xbfca696463b791ef,3 +np.float64,0x10000000000000,0x10000000000000,3 +np.float64,0xbf886b45d030d680,0xbf886b6bbc04314b,3 +np.float64,0x3fd5224bb5aa4498,0x3fd589c92e82218f,3 +np.float64,0xbfec799874f8f331,0xbff18d5158b8e640,3 +np.float64,0xbf88124410302480,0xbf88126863350a16,3 +np.float64,0xbfe37feaaa66ffd6,0xbfe4f7e24382e79d,3 +np.float64,0x3fd777eca1aeefd8,0x3fd8076ead6d55dc,3 +np.float64,0x3fecaaeb3af955d6,0x3ff1c4159fa3e965,3 +np.float64,0xbfeb81e4e6f703ca,0xbff08d4e4c77fada,3 +np.float64,0xbfd7d0a0edafa142,0xbfd866e37010312e,3 +np.float64,0x3feda48c00fb4918,0x3ff2f3fd33c36307,3 +np.float64,0x3feb87ecc4770fda,0x3ff09336e490deda,3 +np.float64,0xbfefd78ad27faf16,0xbff78abbafb50ac1,3 +np.float64,0x3fe58e918c6b1d24,0x3fe7a70b38cbf016,3 +np.float64,0x3fda163b95b42c78,0x3fdade86b88ba4ee,3 +np.float64,0x3fe8fc1aaf71f836,0x3fecab3f93b59df5,3 +np.float64,0xbf8de56f903bcac0,0xbf8de5b527cec797,3 +np.float64,0xbfec112db2f8225b,0xbff11dd648de706f,3 +np.float64,0x3fc3214713264290,0x3fc333b1c862f7d0,3 +np.float64,0xbfeb5e5836f6bcb0,0xbff06ac364b49177,3 +np.float64,0x3fc23d9777247b30,0x3fc24d8ae3bcb615,3 +np.float64,0xbfdf0eed65be1dda,0xbfe036cea9b9dfb6,3 +np.float64,0xbfb2d5c85a25ab90,0xbfb2da24bb409ff3,3 +np.float64,0xbfecdda0c3f9bb42,0xbff1fdf94fc6e89e,3 +np.float64,0x3fdfe79154bfcf24,0x3fe0b338e0476a9d,3 +np.float64,0xbfd712ac6bae2558,0xbfd79abde21f287b,3 +np.float64,0x3fea3f148a747e2a,0x3feec6bed9d4fa04,3 +np.float64,0x3fd4879e4ca90f3c,0x3fd4e632fa4e2edd,3 +np.float64,0x3fe9137a9e7226f6,0x3fecd0c441088d6a,3 +np.float64,0xbfc75bf4ef2eb7e8,0xbfc77da8347d742d,3 +np.float64,0xbfd94090a0b28122,0xbfd9f5458816ed5a,3 +np.float64,0x3fde439cbcbc8738,0x3fdf85fbf496b61f,3 +np.float64,0xbfe18bacdce3175a,0xbfe29210e01237f7,3 +np.float64,0xbfd58ec413ab1d88,0xbfd5fcd838f0a934,3 +np.float64,0xbfeae5af2d75cb5e,0xbfeff1de1b4a06be,3 +np.float64,0x3fb64d1a162c9a30,0x3fb65458fb831354,3 +np.float64,0x3fc18b1e15231640,0x3fc1994c6ffd7a6a,3 +np.float64,0xbfd7b881bcaf7104,0xbfd84ce89a9ee8c7,3 +np.float64,0x3feb916a40f722d4,0x3ff09c8aa851d7c4,3 +np.float64,0x3fdab5fbb5b56bf8,0x3fdb8de43961bbde,3 +np.float64,0x3fe4f35402e9e6a8,0x3fe6d75dc5082894,3 +np.float64,0x3fe2fdb2e5e5fb66,0x3fe454e32a5d2182,3 +np.float64,0x3fe8607195f0c0e4,0x3febb6a4c3bf6a5c,3 +np.float64,0x3fd543ca9aaa8794,0x3fd5ad49203ae572,3 +np.float64,0x3fe8e05ca1f1c0ba,0x3fec7eff123dcc58,3 +np.float64,0x3fe298b6ca65316e,0x3fe3d81d2927c4dd,3 +np.float64,0x3fcfecea733fd9d8,0x3fd0220f1d0faf78,3 +np.float64,0xbfe2e739f065ce74,0xbfe439004e73772a,3 +np.float64,0xbfd1ae6b82a35cd8,0xbfd1ea129a5ee756,3 +np.float64,0xbfeb7edff576fdc0,0xbff08a5a638b8a8b,3 +np.float64,0x3fe5b645ff6b6c8c,0x3fe7dcee1faefe3f,3 +np.float64,0xbfd478427ba8f084,0xbfd4d5fc7c239e60,3 +np.float64,0xbfe39904e3e7320a,0xbfe517972b30b1e5,3 +np.float64,0xbfd3b75b6ba76eb6,0xbfd40acf20a6e074,3 +np.float64,0x3fd596267aab2c4c,0x3fd604b01faeaf75,3 +np.float64,0x3fe134463762688c,0x3fe229fc36784a72,3 +np.float64,0x3fd25dadf7a4bb5c,0x3fd2a0b9e04ea060,3 +np.float64,0xbfc05d3e0b20ba7c,0xbfc068bd2bb9966f,3 +np.float64,0x3f8cf517b039ea00,0x3f8cf556ed74b163,3 +np.float64,0x3fda87361cb50e6c,0x3fdb5a75af897e7f,3 +np.float64,0x3fe53e1926ea7c32,0x3fe73acf01b8ff31,3 +np.float64,0x3fe2e94857e5d290,0x3fe43b8cc820f9c7,3 +np.float64,0x3fd81fe6acb03fcc,0x3fd8bc623c0068cf,3 +np.float64,0xbfddf662c3bbecc6,0xbfdf2e76dc90786e,3 +np.float64,0x3fece174fbf9c2ea,0x3ff2026a1a889580,3 +np.float64,0xbfdc83c5b8b9078c,0xbfdd8dcf6ee3b7da,3 +np.float64,0x3feaf5448f75ea8a,0x3ff0075b108bcd0d,3 +np.float64,0xbfebf32f7ef7e65f,0xbff0fed42aaa826a,3 +np.float64,0x3fe389e5e8e713cc,0x3fe5047ade055ccb,3 +np.float64,0x3f635cdcc026ba00,0x3f635cddeea082ce,3 +np.float64,0x3fae580f543cb020,0x3fae5c9d5108a796,3 +np.float64,0x3fec9fafce793f60,0x3ff1b77bec654f00,3 +np.float64,0x3fb19d226e233a40,0x3fb1a0b32531f7ee,3 +np.float64,0xbfdf9a71e7bf34e4,0xbfe086cef88626c7,3 +np.float64,0x8010000000000000,0x8010000000000000,3 +np.float64,0xbfef170ba2fe2e17,0xbff54ed4675f5b8a,3 +np.float64,0xbfcc6e2f8f38dc60,0xbfccab65fc34d183,3 +np.float64,0x3fee756c4bfcead8,0x3ff4258782c137e6,3 +np.float64,0xbfd461c218a8c384,0xbfd4be3e391f0ff4,3 +np.float64,0xbfe3b64686e76c8d,0xbfe53caa16d6c90f,3 +np.float64,0xbfc1c65d8d238cbc,0xbfc1d51e58f82403,3 +np.float64,0x3fe6e06c63edc0d8,0x3fe97cb832eeb6a2,3 +np.float64,0xbfc9fc20b933f840,0xbfca2ab004312d85,3 +np.float64,0xbfe29aa6df65354e,0xbfe3da7ecf3ba466,3 +np.float64,0x3fea4df7d1749bf0,0x3feee0d448bd4746,3 +np.float64,0xbfedec6161fbd8c3,0xbff3563e1d943aa2,3 +np.float64,0x3fdb6f0437b6de08,0x3fdc5a1888b1213d,3 +np.float64,0xbfe270cbd3e4e198,0xbfe3a72ac27a0b0c,3 +np.float64,0xbfdfff8068bfff00,0xbfe0c1088e3b8983,3 +np.float64,0xbfd28edbe6a51db8,0xbfd2d416c8ed363e,3 +np.float64,0xbfb4e35f9229c6c0,0xbfb4e9531d2a737f,3 +np.float64,0xbfee6727e97cce50,0xbff40e7717576e46,3 +np.float64,0xbfddb5fbddbb6bf8,0xbfdee5aad78f5361,3 +np.float64,0xbfdf9d3e9dbf3a7e,0xbfe0886b191f2957,3 +np.float64,0x3fa57e77042afce0,0x3fa5801518ea9342,3 +np.float64,0x3f95c4e4882b89c0,0x3f95c55003c8e714,3 +np.float64,0x3fd9b10f61b36220,0x3fda6fe5d635a8aa,3 +np.float64,0xbfe2973411652e68,0xbfe3d641fe9885fd,3 +np.float64,0xbfee87bd5a7d0f7b,0xbff443bea81b3fff,3 +np.float64,0x3f9ea064c83d40c0,0x3f9ea19025085b2f,3 +np.float64,0xbfe4b823dfe97048,0xbfe689623d30dc75,3 +np.float64,0xbfa06a326c20d460,0xbfa06aeacbcd3eb8,3 +np.float64,0x3fe1e5c4c1e3cb8a,0x3fe2fe44b822f20e,3 +np.float64,0x3f99dafaa833b600,0x3f99dbaec10a1a0a,3 +np.float64,0xbfed7cb3877af967,0xbff2bfe9e556aaf9,3 +np.float64,0x3fd604f2e2ac09e4,0x3fd67a89408ce6ba,3 +np.float64,0x3fec57b60f78af6c,0x3ff16881f46d60f7,3 +np.float64,0xbfea2e3a17745c74,0xbfeea95c7190fd42,3 +np.float64,0xbfd60a7c37ac14f8,0xbfd6806ed642de35,3 +np.float64,0xbfe544b9726a8973,0xbfe743ac399d81d7,3 +np.float64,0xbfd13520faa26a42,0xbfd16c02034a8fe0,3 +np.float64,0xbfea9ea59ff53d4b,0xbfef70538ee12e00,3 +np.float64,0x3fd66633f8accc68,0x3fd6e23c13ab0e9e,3 +np.float64,0xbfe4071bd3e80e38,0xbfe5a3c9ba897d81,3 +np.float64,0xbfbe1659fa3c2cb0,0xbfbe2831d4fed196,3 +np.float64,0xbfd3312777a6624e,0xbfd37df09b9baeba,3 +np.float64,0x3fd13997caa27330,0x3fd170a4900c8907,3 +np.float64,0xbfe7cbc235ef9784,0xbfead4c4d6cbf129,3 +np.float64,0xbfe1456571628acb,0xbfe23e4ec768c8e2,3 +np.float64,0xbfedf1a044fbe340,0xbff35da96773e176,3 +np.float64,0x3fce38b1553c7160,0x3fce8270709774f9,3 +np.float64,0xbfecb01761f9602f,0xbff1c9e9d382f1f8,3 +np.float64,0xbfe0a03560e1406b,0xbfe17b8d5a1ca662,3 +np.float64,0x3fe50f37cbea1e70,0x3fe6fc55e1ae7da6,3 +np.float64,0xbfe12d64a0625aca,0xbfe221d3a7834e43,3 +np.float64,0xbf6fb288403f6500,0xbf6fb28d6f389db6,3 +np.float64,0x3fda831765b50630,0x3fdb55eecae58ca9,3 +np.float64,0x3fe1a0fe4c6341fc,0x3fe2ab9564304425,3 +np.float64,0xbfef2678a77e4cf1,0xbff56ff42b2797bb,3 +np.float64,0xbfab269c1c364d40,0xbfab29df1cd48779,3 +np.float64,0x3fe8ec82a271d906,0x3fec92567d7a6675,3 +np.float64,0xbfc235115f246a24,0xbfc244ee567682ea,3 +np.float64,0x3feef5bf8d7deb80,0x3ff50ad4875ee9bd,3 +np.float64,0x3fe768b5486ed16a,0x3fea421356160e65,3 +np.float64,0xbfd4255684a84aae,0xbfd47e8baf7ec7f6,3 +np.float64,0x3fc7f67f2b2fed00,0x3fc81ae83cf92dd5,3 +np.float64,0x3fe9b1b19a736364,0x3fedd4b0e24ee741,3 +np.float64,0x3fb27eb9e624fd70,0x3fb282dacd89ce28,3 +np.float64,0xbfd490b710a9216e,0xbfd4efcdeb213458,3 +np.float64,0xbfd1347b2ca268f6,0xbfd16b55dece2d38,3 +np.float64,0x3fc6a5668d2d4ad0,0x3fc6c41452c0c087,3 +np.float64,0xbfca7b209f34f640,0xbfcaac710486f6bd,3 +np.float64,0x3fc23a1a47247438,0x3fc24a047fd4c27a,3 +np.float64,0x3fdb1413a8b62828,0x3fdbf595e2d994bc,3 +np.float64,0xbfea69b396f4d367,0xbfef11bdd2b0709a,3 +np.float64,0x3fd14c9958a29934,0x3fd1846161b10422,3 +np.float64,0xbfe205f44be40be8,0xbfe325283aa3c6a8,3 +np.float64,0x3fecd03c9ef9a07a,0x3ff1ee85aaf52a01,3 +np.float64,0x3fe34281d7e68504,0x3fe4aab63e6de816,3 +np.float64,0xbfe120e2376241c4,0xbfe213023ab03939,3 +np.float64,0xbfe951edc4f2a3dc,0xbfed3615e38576f8,3 +np.float64,0x3fe5a2286f6b4450,0x3fe7c196e0ec10ed,3 +np.float64,0xbfed7a3e1f7af47c,0xbff2bcc0793555d2,3 +np.float64,0x3fe050274960a04e,0x3fe11e2e256ea5cc,3 +np.float64,0xbfcfa71f653f4e40,0xbfcffc11483d6a06,3 +np.float64,0x3f6ead2e403d5a00,0x3f6ead32f314c052,3 +np.float64,0x3fe3a2a026674540,0x3fe523bfe085f6ec,3 +np.float64,0xbfe294a62e65294c,0xbfe3d31ebd0b4ca2,3 +np.float64,0xbfb4894d06291298,0xbfb48ef4b8e256b8,3 +np.float64,0xbfc0c042c1218084,0xbfc0cc98ac2767c4,3 +np.float64,0xbfc6a32cb52d4658,0xbfc6c1d1597ed06b,3 +np.float64,0xbfd30f7777a61eee,0xbfd35aa39fee34eb,3 +np.float64,0x3fe7fc2c2eeff858,0x3feb1d8a558b5537,3 +np.float64,0x7fefffffffffffff,0x7ff8000000000000,3 +np.float64,0xbfdadf917bb5bf22,0xbfdbbbae9a9f67a0,3 +np.float64,0xbfcf0395e13e072c,0xbfcf5366015f7362,3 +np.float64,0xbfe8644c9170c899,0xbfebbc98e74a227d,3 +np.float64,0x3fc3b2d8e52765b0,0x3fc3c6f7d44cffaa,3 +np.float64,0x3fc57407b92ae810,0x3fc58e12ccdd47a1,3 +np.float64,0x3fd56a560daad4ac,0x3fd5d62b8dfcc058,3 +np.float64,0x3fd595deefab2bbc,0x3fd6046420b2f79b,3 +np.float64,0xbfd5360f50aa6c1e,0xbfd59ebaacd815b8,3 +np.float64,0x3fdfb6aababf6d54,0x3fe0970b8aac9f61,3 +np.float64,0x3ff0000000000000,0x3ff921fb54442d18,3 +np.float64,0xbfeb3a8958f67513,0xbff04872e8278c79,3 +np.float64,0x3f9e1ea6683c3d40,0x3f9e1fc326186705,3 +np.float64,0x3fe6b6d5986d6dac,0x3fe94175bd60b19d,3 +np.float64,0xbfee4d90b77c9b21,0xbff3e60e9134edc2,3 +np.float64,0x3fd806ce0cb00d9c,0x3fd8a14c4855a8f5,3 +np.float64,0x3fd54acc75aa9598,0x3fd5b4b72fcbb5df,3 +np.float64,0xbfe59761f16b2ec4,0xbfe7b2fa5d0244ac,3 +np.float64,0xbfcd4fa3513a9f48,0xbfcd92d0814a5383,3 +np.float64,0xbfdc827523b904ea,0xbfdd8c577b53053c,3 +np.float64,0xbfd4bb7f34a976fe,0xbfd51d00d9a99360,3 +np.float64,0xbfe818bc87f03179,0xbfeb48d1ea0199c5,3 +np.float64,0xbfa8a2e15c3145c0,0xbfa8a5510ba0e45c,3 +np.float64,0xbfb6d15f422da2c0,0xbfb6d922689da015,3 +np.float64,0x3fcd04eaab3a09d8,0x3fcd46131746ef08,3 +np.float64,0x3fcfb5cfbb3f6ba0,0x3fd0059d308237f3,3 +np.float64,0x3fe8dcf609f1b9ec,0x3fec7997973010b6,3 +np.float64,0xbfdf1834d7be306a,0xbfe03c1d4e2b48f0,3 +np.float64,0x3fee82ae50fd055c,0x3ff43b545066fe1a,3 +np.float64,0xbfde039c08bc0738,0xbfdf3d6ed4d2ee5c,3 +np.float64,0x3fec07389bf80e72,0x3ff1137ed0acd161,3 +np.float64,0xbfef44c010fe8980,0xbff5b488ad22a4c5,3 +np.float64,0x3f76e722e02dce00,0x3f76e72ab2759d88,3 +np.float64,0xbfcaa9e6053553cc,0xbfcadc41125fca93,3 +np.float64,0x3fed6088147ac110,0x3ff29c06c4ef35fc,3 +np.float64,0x3fd32bd836a657b0,0x3fd3785fdb75909f,3 +np.float64,0xbfeedbb1d97db764,0xbff4d87f6c82a93c,3 +np.float64,0xbfe40f31d5e81e64,0xbfe5ae292cf258a2,3 +np.float64,0x7ff8000000000000,0x7ff8000000000000,3 +np.float64,0xbfeb2b25bc76564c,0xbff039d81388550c,3 +np.float64,0x3fec5008fa78a012,0x3ff1604195801da3,3 +np.float64,0x3fce2d4f293c5aa0,0x3fce76b99c2db4da,3 +np.float64,0xbfdc435412b886a8,0xbfdd45e7b7813f1e,3 +np.float64,0x3fdf2c9d06be593c,0x3fe047cb03c141b6,3 +np.float64,0x3fddefc61ebbdf8c,0x3fdf26fb8fad9fae,3 +np.float64,0x3fab50218436a040,0x3fab537395eaf3bb,3 +np.float64,0xbfd5b95a8fab72b6,0xbfd62a191a59343a,3 +np.float64,0x3fdbf803b4b7f008,0x3fdcf211578e98c3,3 +np.float64,0xbfec8c255979184b,0xbff1a1bee108ed30,3 +np.float64,0x3fe33cdaffe679b6,0x3fe4a3a318cd994f,3 +np.float64,0x3fd8cf585cb19eb0,0x3fd97a408bf3c38c,3 +np.float64,0x3fe919dde07233bc,0x3fecdb0ea13a2455,3 +np.float64,0xbfd5ba35e4ab746c,0xbfd62b024805542d,3 +np.float64,0x3fd2f933e7a5f268,0x3fd343527565e97c,3 +np.float64,0xbfe5b9f8ddeb73f2,0xbfe7e1f772c3e438,3 +np.float64,0x3fe843cd92f0879c,0x3feb8a92d68eae3e,3 +np.float64,0xbfd096b234a12d64,0xbfd0c7beca2c6605,3 +np.float64,0xbfef3363da7e66c8,0xbff58c98dde6c27c,3 +np.float64,0x3fd51b01ddaa3604,0x3fd582109d89ead1,3 +np.float64,0x3fea0f10ff741e22,0x3fee736c2d2a2067,3 +np.float64,0x3fc276e7b724edd0,0x3fc28774520bc6d4,3 +np.float64,0xbfef9abc9f7f3579,0xbff69d49762b1889,3 +np.float64,0x3fe1539ec0e2a73e,0x3fe24f370b7687d0,3 +np.float64,0x3fad72350c3ae460,0x3fad765e7766682a,3 +np.float64,0x3fa289a47c251340,0x3fa28aae12f41646,3 +np.float64,0xbfe5c488e5eb8912,0xbfe7f05d7e7dcddb,3 +np.float64,0xbfc22ef1d7245de4,0xbfc23ebeb990a1b8,3 +np.float64,0x3fe59a0b80eb3418,0x3fe7b695fdcba1de,3 +np.float64,0xbfe9cad619f395ac,0xbfedff0514d91e2c,3 +np.float64,0x3fc8bc74eb3178e8,0x3fc8e48cb22da666,3 +np.float64,0xbfc5389a3f2a7134,0xbfc551cd6febc544,3 +np.float64,0x3fce82feb33d0600,0x3fceceecce2467ef,3 +np.float64,0x3fda346791b468d0,0x3fdaff95154a4ca6,3 +np.float64,0x3fd04501fea08a04,0x3fd073397b32607e,3 +np.float64,0xbfb6be498a2d7c90,0xbfb6c5f93aeb0e57,3 +np.float64,0x3fe1f030dd63e062,0x3fe30ad8fb97cce0,3 +np.float64,0xbfee3fb36dfc7f67,0xbff3d0a5e380b86f,3 +np.float64,0xbfa876773c30ecf0,0xbfa878d9d3df6a3f,3 +np.float64,0x3fdb58296eb6b054,0x3fdc40ceffb17f82,3 +np.float64,0xbfea16b5d8742d6c,0xbfee809b99fd6adc,3 +np.float64,0xbfdc5062b6b8a0c6,0xbfdd547623275fdb,3 +np.float64,0x3fef6db242fedb64,0x3ff61ab4cdaef467,3 +np.float64,0xbfc9f778f933eef0,0xbfca25eef1088167,3 +np.float64,0xbfd22063eba440c8,0xbfd260c8766c69cf,3 +np.float64,0x3fdd2379f2ba46f4,0x3fde40b025cb1ffa,3 +np.float64,0xbfea967af2f52cf6,0xbfef61a178774636,3 +np.float64,0x3fe4f5b49fe9eb6a,0x3fe6da8311a5520e,3 +np.float64,0x3feccde17b799bc2,0x3ff1ebd0ea228b71,3 +np.float64,0x3fe1bb76506376ec,0x3fe2cb56fca01840,3 +np.float64,0xbfef94e583ff29cb,0xbff68aeab8ba75a2,3 +np.float64,0x3fed024a55fa0494,0x3ff228ea5d456e9d,3 +np.float64,0xbfe877b2a8f0ef65,0xbfebdaa1a4712459,3 +np.float64,0x3fef687a8d7ed0f6,0x3ff60cf5fef8d448,3 +np.float64,0xbfeeb2dc8afd65b9,0xbff48dda6a906cd6,3 +np.float64,0x3fdb2e28aeb65c50,0x3fdc12620655eb7a,3 +np.float64,0x3fedc1863afb830c,0x3ff31ae823315e83,3 +np.float64,0xbfe6b1bb546d6376,0xbfe93a38163e3a59,3 +np.float64,0x3fe479c78468f390,0x3fe637e5c0fc5730,3 +np.float64,0x3fbad1fade35a3f0,0x3fbade9a43ca05cf,3 +np.float64,0xbfe2d1c563e5a38b,0xbfe41e712785900c,3 +np.float64,0xbfc08c33ed211868,0xbfc09817a752d500,3 +np.float64,0xbfecce0935f99c12,0xbff1ebfe84524037,3 +np.float64,0x3fce4ef0e73c9de0,0x3fce995638a3dc48,3 +np.float64,0xbfd2fb2343a5f646,0xbfd345592517ca18,3 +np.float64,0x3fd848f7cdb091f0,0x3fd8e8bee5f7b49a,3 +np.float64,0x3fe532b7d2ea6570,0x3fe72b9ac747926a,3 +np.float64,0x3fd616aadcac2d54,0x3fd68d692c5cad42,3 +np.float64,0x3fd7720eb3aee41c,0x3fd801206a0e1e43,3 +np.float64,0x3fee835a35fd06b4,0x3ff43c7175eb7a54,3 +np.float64,0xbfe2e8f70b65d1ee,0xbfe43b2800a947a7,3 +np.float64,0xbfed38f45d7a71e9,0xbff26acd6bde7174,3 +np.float64,0xbfc0c62661218c4c,0xbfc0d28964d66120,3 +np.float64,0x3fe97940bef2f282,0x3fed76b986a74ee3,3 +np.float64,0x3fc96f7dc532def8,0x3fc99b20044c8fcf,3 +np.float64,0xbfd60201eeac0404,0xbfd677675efaaedc,3 +np.float64,0x3fe63c0867ec7810,0x3fe894f060200140,3 +np.float64,0xbfef6144b37ec289,0xbff5fa589a515ba8,3 +np.float64,0xbfde2da0c8bc5b42,0xbfdf6d0b59e3232a,3 +np.float64,0xbfd7401612ae802c,0xbfd7cb74ddd413b9,3 +np.float64,0x3fe41c012de83802,0x3fe5be9d87da3f82,3 +np.float64,0x3fdf501609bea02c,0x3fe05c1d96a2270b,3 +np.float64,0x3fcf9fa1233f3f40,0x3fcff45598e72f07,3 +np.float64,0x3fd4e3895ea9c714,0x3fd547580d8392a2,3 +np.float64,0x3fe1e8ff5fe3d1fe,0x3fe3022a0b86a2ab,3 +np.float64,0xbfe0aa55956154ab,0xbfe18768823da589,3 +np.float64,0x3fb2a0aa26254150,0x3fb2a4e1faff1c93,3 +np.float64,0x3fd3823417a70468,0x3fd3d2f808dbb167,3 +np.float64,0xbfaed323643da640,0xbfaed7e9bef69811,3 +np.float64,0x3fe661e8c4ecc3d2,0x3fe8c9c535f43c16,3 +np.float64,0xbfa429777c2852f0,0xbfa42acd38ba02a6,3 +np.float64,0x3fb5993ea22b3280,0x3fb59fd353e47397,3 +np.float64,0x3fee62d21efcc5a4,0x3ff40788f9278ade,3 +np.float64,0xbf813fb810227f80,0xbf813fc56d8f3c53,3 +np.float64,0x3fd56205deaac40c,0x3fd5cd59671ef193,3 +np.float64,0x3fd31a4de5a6349c,0x3fd365fe401b66e8,3 +np.float64,0xbfec7cc7a478f98f,0xbff190cf69703ca4,3 +np.float64,0xbf755881a02ab100,0xbf755887f52e7794,3 +np.float64,0x3fdd1c92e6ba3924,0x3fde38efb4e8605c,3 +np.float64,0x3fdf49da80be93b4,0x3fe0588af8dd4a34,3 +np.float64,0x3fe1fcdbf2e3f9b8,0x3fe31a27b9d273f2,3 +np.float64,0x3fe2a0f18be541e4,0x3fe3e23b159ce20f,3 +np.float64,0xbfed0f1561fa1e2b,0xbff23820fc0a54ca,3 +np.float64,0x3fe34a006c669400,0x3fe4b419b9ed2b83,3 +np.float64,0xbfd51be430aa37c8,0xbfd583005a4d62e7,3 +np.float64,0x3fe5ec4e336bd89c,0x3fe826caad6b0f65,3 +np.float64,0xbfdad71b1fb5ae36,0xbfdbb25bef8b53d8,3 +np.float64,0xbfe8eac2d871d586,0xbfec8f8cac7952f9,3 +np.float64,0xbfe1d5aef663ab5e,0xbfe2eae14b7ccdfd,3 +np.float64,0x3fec11d3157823a6,0x3ff11e8279506753,3 +np.float64,0xbfe67ff1166cffe2,0xbfe8f3e61c1dfd32,3 +np.float64,0xbfd101eecda203de,0xbfd136e0e9557022,3 +np.float64,0x3fde6c9e5cbcd93c,0x3fdfb48ee7efe134,3 +np.float64,0x3fec3ede9c787dbe,0x3ff14dead1e5cc1c,3 +np.float64,0x3fe7a022086f4044,0x3fea93ce2980b161,3 +np.float64,0xbfc3b2b1b7276564,0xbfc3c6d02d60bb21,3 +np.float64,0x7ff0000000000000,0x7ff8000000000000,3 +np.float64,0x3fe60b5647ec16ac,0x3fe8517ef0544b40,3 +np.float64,0xbfd20ab654a4156c,0xbfd24a2f1b8e4932,3 +np.float64,0xbfe4aa1e2f69543c,0xbfe677005cbd2646,3 +np.float64,0xbfc831cc0b306398,0xbfc8574910d0b86d,3 +np.float64,0xbfc3143495262868,0xbfc3267961b79198,3 +np.float64,0x3fc14d64c1229ac8,0x3fc15afea90a319d,3 +np.float64,0x3fc0a5a207214b48,0x3fc0b1bd2f15c1b0,3 +np.float64,0xbfc0b8351521706c,0xbfc0c4792672d6db,3 +np.float64,0xbfdc383600b8706c,0xbfdd398429e163bd,3 +np.float64,0x3fd9e17321b3c2e8,0x3fdaa4c4d140a622,3 +np.float64,0xbfd44f079ea89e10,0xbfd4aa7d6deff4ab,3 +np.float64,0xbfc3de52a927bca4,0xbfc3f2f8f65f4c3f,3 +np.float64,0x3fe7779d566eef3a,0x3fea57f8592dbaad,3 +np.float64,0xbfe309039e661207,0xbfe462f47f9a64e5,3 +np.float64,0x3fd8e06d08b1c0dc,0x3fd98cc946e440a6,3 +np.float64,0x3fdde66c9ebbccd8,0x3fdf1c68009a8dc1,3 +np.float64,0x3fd4369c6ba86d38,0x3fd490bf460a69e4,3 +np.float64,0xbfe132252fe2644a,0xbfe22775e109cc2e,3 +np.float64,0x3fee15483c7c2a90,0x3ff39111de89036f,3 +np.float64,0xbfc1d5ee8123abdc,0xbfc1e4d66c6871a5,3 +np.float64,0x3fc851c52b30a388,0x3fc877d93fb4ae1a,3 +np.float64,0x3fdaade707b55bd0,0x3fdb85001661fffe,3 +np.float64,0xbfe79fb7f96f3f70,0xbfea9330ec27ac10,3 +np.float64,0xbfe8b0f725f161ee,0xbfec3411c0e4517a,3 +np.float64,0xbfea79f5f374f3ec,0xbfef2e9dd9270488,3 +np.float64,0x3fe0b5fe5b616bfc,0x3fe19512a36a4534,3 +np.float64,0xbfad7c622c3af8c0,0xbfad808fea96a804,3 +np.float64,0xbfe3e24dbce7c49c,0xbfe574b4c1ea9818,3 +np.float64,0xbfe80b038af01607,0xbfeb33fec279576a,3 +np.float64,0xbfef69e2ea7ed3c6,0xbff610a5593a18bc,3 +np.float64,0x3fdcc0bb39b98178,0x3fddd1f8c9a46430,3 +np.float64,0xbfba39976a347330,0xbfba4563bb5369a4,3 +np.float64,0xbfebf9768ef7f2ed,0xbff10548ab725f74,3 +np.float64,0xbfec21c066f84381,0xbff12f2803ba052f,3 +np.float64,0xbfca216a6b3442d4,0xbfca50c5e1e5748e,3 +np.float64,0x3fd5e40da4abc81c,0x3fd65783f9a22946,3 +np.float64,0x3fc235ca17246b98,0x3fc245a8f453173f,3 +np.float64,0x3fecb5b867796b70,0x3ff1d046a0bfda69,3 +np.float64,0x3fcb457fef368b00,0x3fcb7b6daa8165a7,3 +np.float64,0xbfa5ed6f7c2bdae0,0xbfa5ef27244e2e42,3 +np.float64,0x3fecf618a1f9ec32,0x3ff21a86cc104542,3 +np.float64,0x3fe9d95413f3b2a8,0x3fee178dcafa11fc,3 +np.float64,0xbfe93a5357f274a7,0xbfed0f9a565da84a,3 +np.float64,0xbfeb9e45ff773c8c,0xbff0a93cab8e258d,3 +np.float64,0x3fcbd9d0bd37b3a0,0x3fcc134e87cae241,3 +np.float64,0x3fe55d4db76aba9c,0x3fe764a0e028475a,3 +np.float64,0xbfc8a6fc71314df8,0xbfc8ceaafbfc59a7,3 +np.float64,0x3fe0615fa660c2c0,0x3fe1323611c4cbc2,3 +np.float64,0x3fb965558632cab0,0x3fb9700b84de20ab,3 +np.float64,0x8000000000000000,0x8000000000000000,3 +np.float64,0x3fe76776c6eeceee,0x3fea40403e24a9f1,3 +np.float64,0x3fe3b7f672676fec,0x3fe53ece71a1a1b1,3 +np.float64,0xbfa9b82ba4337050,0xbfa9baf15394ca64,3 +np.float64,0xbfe31faf49663f5e,0xbfe47f31b1ca73dc,3 +np.float64,0xbfcc4c6beb3898d8,0xbfcc88c5f814b2c1,3 +np.float64,0x3fd481530aa902a8,0x3fd4df8df03bc155,3 +np.float64,0x3fd47593b8a8eb28,0x3fd4d327ab78a1a8,3 +np.float64,0x3fd70e6ccbae1cd8,0x3fd7962fe8b63d46,3 +np.float64,0x3fd25191f7a4a324,0x3fd2941623c88e02,3 +np.float64,0x3fd0603ef0a0c07c,0x3fd08f64e97588dc,3 +np.float64,0xbfc653bae52ca774,0xbfc6711e5e0d8ea9,3 +np.float64,0xbfd11db8fea23b72,0xbfd153b63c6e8812,3 +np.float64,0xbfea9bde25f537bc,0xbfef6b52268e139a,3 +np.float64,0x1,0x1,3 +np.float64,0xbfefd3806d7fa701,0xbff776dcef9583ca,3 +np.float64,0xbfe0fb8cfde1f71a,0xbfe1e6e2e774a8f8,3 +np.float64,0x3fea384534f4708a,0x3feebadaa389be0d,3 +np.float64,0x3feff761c97feec4,0x3ff866157b9d072d,3 +np.float64,0x3fe7131ccb6e263a,0x3fe9c58b4389f505,3 +np.float64,0x3fe9084f7872109e,0x3fecbed0355dbc8f,3 +np.float64,0x3f708e89e0211d00,0x3f708e8cd4946b9e,3 +np.float64,0xbfe39185f067230c,0xbfe50e1cd178244d,3 +np.float64,0x3fd67cc1a9acf984,0x3fd6fa514784b48c,3 +np.float64,0xbfecaef005f95de0,0xbff1c89c9c3ef94a,3 +np.float64,0xbfe12eec81e25dd9,0xbfe223a4285bba9a,3 +np.float64,0x3fbe7f9faa3cff40,0x3fbe92363525068d,3 +np.float64,0xbfe1950b2b632a16,0xbfe29d45fc1e4ce9,3 +np.float64,0x3fe45049e6e8a094,0x3fe6020de759e383,3 +np.float64,0x3fe4d10c8969a21a,0x3fe6aa1fe42cbeb9,3 +np.float64,0xbfe9d04658f3a08d,0xbfee08370a0dbf0c,3 +np.float64,0x3fe14fb314e29f66,0x3fe24a8d73663521,3 +np.float64,0xbfef4abfe4fe9580,0xbff5c2c1ff1250ca,3 +np.float64,0xbfe6162b366c2c56,0xbfe86073ac3c6243,3 +np.float64,0x3feffe781e7ffcf0,0x3ff8d2cbedd6a1b5,3 +np.float64,0xbff0000000000000,0xbff921fb54442d18,3 +np.float64,0x3fc1dc45ad23b888,0x3fc1eb3d9bddda58,3 +np.float64,0xbfe793f6fcef27ee,0xbfea81c93d65aa64,3 +np.float64,0x3fdef6d2bbbdeda4,0x3fe029079d42efb5,3 +np.float64,0xbfdf0ac479be1588,0xbfe0346dbc95963f,3 +np.float64,0xbfd33927d7a67250,0xbfd38653f90a5b73,3 +np.float64,0xbfe248b072e49161,0xbfe37631ef6572e1,3 +np.float64,0xbfc8ceb6af319d6c,0xbfc8f7288657f471,3 +np.float64,0x3fdd7277fcbae4f0,0x3fde99886e6766ef,3 +np.float64,0xbfe0d30c6561a619,0xbfe1b72f90bf53d6,3 +np.float64,0xbfcb0fe07d361fc0,0xbfcb448e2eae9542,3 +np.float64,0xbfe351f57fe6a3eb,0xbfe4be13eef250f2,3 +np.float64,0x3fe85ec02cf0bd80,0x3febb407e2e52e4c,3 +np.float64,0x3fc8bc59b53178b0,0x3fc8e470f65800ec,3 +np.float64,0xbfd278d447a4f1a8,0xbfd2bd133c9c0620,3 +np.float64,0x3feda5cfd87b4ba0,0x3ff2f5ab4324f43f,3 +np.float64,0xbfd2b32a36a56654,0xbfd2fa09c36afd34,3 +np.float64,0xbfed4a81cb7a9504,0xbff28077a4f4fff4,3 +np.float64,0x3fdf079bf9be0f38,0x3fe0329f7fb13f54,3 +np.float64,0x3fd14097f6a28130,0x3fd177e9834ec23f,3 +np.float64,0xbfaeab11843d5620,0xbfaeafc5531eb6b5,3 +np.float64,0xbfac3f8c14387f20,0xbfac433893d53360,3 +np.float64,0xbfc139d7ed2273b0,0xbfc14743adbbe660,3 +np.float64,0x3fe78cb02cef1960,0x3fea7707f76edba9,3 +np.float64,0x3fefe16b41ffc2d6,0x3ff7bff36a7aa7b8,3 +np.float64,0x3fec5260d378a4c2,0x3ff162c588b0da38,3 +np.float64,0x3fedb146f17b628e,0x3ff304f90d3a15d1,3 +np.float64,0x3fd1fd45f7a3fa8c,0x3fd23c2dc3929e20,3 +np.float64,0x3fe0898a5ee11314,0x3fe1610c63e726eb,3 +np.float64,0x3fe7719946eee332,0x3fea4f205eecb59f,3 +np.float64,0x3fe955218972aa44,0x3fed3b530c1f7651,3 +np.float64,0x3fe0ccbf4461997e,0x3fe1afc7b4587836,3 +np.float64,0xbfe9204314f24086,0xbfece5605780e346,3 +np.float64,0xbfe552017feaa403,0xbfe755773cbd74d5,3 +np.float64,0x3fd8ce4b32b19c98,0x3fd9791c8dd44eae,3 +np.float64,0x3fef89acd9ff135a,0x3ff668f78adf7ced,3 +np.float64,0x3fc9d713ad33ae28,0x3fca04da6c293bbd,3 +np.float64,0xbfe22d9c4de45b38,0xbfe3553effadcf92,3 +np.float64,0x3fa5cda38c2b9b40,0x3fa5cf53c5787482,3 +np.float64,0x3fa878ebdc30f1e0,0x3fa87b4f2bf1d4c3,3 +np.float64,0x3fe8030353700606,0x3feb27e196928789,3 +np.float64,0x3fb50607222a0c10,0x3fb50c188ce391e6,3 +np.float64,0x3fd9ba4ab4b37494,0x3fda79fa8bd40f45,3 +np.float64,0x3fb564598e2ac8b0,0x3fb56abe42d1ba13,3 +np.float64,0xbfd1177c83a22efa,0xbfd14d3d7ef30cc4,3 +np.float64,0xbfd952cec7b2a59e,0xbfda09215d17c0ac,3 +np.float64,0x3fe1d8066663b00c,0x3fe2edb35770b8dd,3 +np.float64,0xbfc89427a3312850,0xbfc8bb7a7c389497,3 +np.float64,0xbfe86ebfd3f0dd80,0xbfebccc2ba0f506c,3 +np.float64,0x3fc390578b2720b0,0x3fc3a40cb7f5f728,3 +np.float64,0xbfd122f9b8a245f4,0xbfd15929dc57a897,3 +np.float64,0x3f8d0636d03a0c80,0x3f8d06767de576df,3 +np.float64,0xbfe4b55d8b696abb,0xbfe685be537a9637,3 +np.float64,0xbfdfd51cf9bfaa3a,0xbfe0a894fcff0c76,3 +np.float64,0xbfd37c1f52a6f83e,0xbfd3cc9593c37aad,3 +np.float64,0x3fd0e8283ea1d050,0x3fd11c25c800785a,3 +np.float64,0x3fd3160784a62c10,0x3fd36183a6c2880c,3 +np.float64,0x3fd4c66e57a98cdc,0x3fd5288fe3394eff,3 +np.float64,0x3fee2f7e3afc5efc,0x3ff3b8063eb30cdc,3 +np.float64,0xbfe526773a6a4cee,0xbfe71b4364215b18,3 +np.float64,0x3fea01181e740230,0x3fee5b65eccfd130,3 +np.float64,0xbfe51c03f76a3808,0xbfe70d5919d37587,3 +np.float64,0x3fd97e1375b2fc28,0x3fda3845da40b22b,3 +np.float64,0x3fd5c14a14ab8294,0x3fd632890d07ed03,3 +np.float64,0xbfec9b474279368e,0xbff1b28f50584fe3,3 +np.float64,0x3fe0139ca860273a,0x3fe0d7fc377f001c,3 +np.float64,0x3fdb080c9db61018,0x3fdbe85056358fa0,3 +np.float64,0xbfdd72ceb1bae59e,0xbfde99ea171661eb,3 +np.float64,0xbfe64e934fec9d26,0xbfe8aec2ef24be63,3 +np.float64,0x3fd1036a93a206d4,0x3fd1386adabe01bd,3 +np.float64,0x3febc9d4a5f793aa,0x3ff0d4c069f1e67d,3 +np.float64,0xbfe547a16fea8f43,0xbfe747902fe6fb4d,3 +np.float64,0x3fc289b0f9251360,0x3fc29a709de6bdd9,3 +np.float64,0xbfe694494a6d2892,0xbfe9108f3dc133e2,3 +np.float64,0x3fd827dfe4b04fc0,0x3fd8c4fe40532b91,3 +np.float64,0xbfe8b89418f17128,0xbfec400c5a334b2e,3 +np.float64,0x3fed5605147aac0a,0x3ff28ed1f612814a,3 +np.float64,0xbfed36af31fa6d5e,0xbff26804e1f71af0,3 +np.float64,0x3fdbb01c02b76038,0x3fdca2381558bbf0,3 +np.float64,0x3fe2a951666552a2,0x3fe3ec88f780f9e6,3 +np.float64,0x3fe662defbecc5be,0x3fe8cb1dbfca98ab,3 +np.float64,0x3fd098b1b3a13164,0x3fd0c9d064e4eaf2,3 +np.float64,0x3fefa10edeff421e,0x3ff6b1c6187b18a8,3 +np.float64,0xbfec4feb7a789fd7,0xbff16021ef37a219,3 +np.float64,0x3fd8e415bbb1c82c,0x3fd990c1f8b786bd,3 +np.float64,0xbfead5a09275ab41,0xbfefd44fab5b4f6e,3 +np.float64,0xbfe8666c16f0ccd8,0xbfebbfe0c9f2a9ae,3 +np.float64,0x3fdc962132b92c44,0x3fdda2525a6f406c,3 +np.float64,0xbfe2037f03e406fe,0xbfe3222ec2a3449e,3 +np.float64,0xbfec82c27e790585,0xbff197626ea9df1e,3 +np.float64,0x3fd2b4e03ca569c0,0x3fd2fbd3c7fda23e,3 +np.float64,0xbfe9b0dee5f361be,0xbfedd34f6d3dfe8a,3 +np.float64,0x3feef45cd17de8ba,0x3ff508180687b591,3 +np.float64,0x3f82c39bf0258700,0x3f82c3ad24c3b3f1,3 +np.float64,0xbfca848cfd350918,0xbfcab612ce258546,3 +np.float64,0x3fd6442aaaac8854,0x3fd6bdea54016e48,3 +np.float64,0x3fe550799e6aa0f4,0x3fe75369c9ea5b1e,3 +np.float64,0xbfe0e9d5a361d3ac,0xbfe1d20011139d89,3 +np.float64,0x3fbfc9ff1e3f9400,0x3fbfdf0ea6885c80,3 +np.float64,0xbfa187e8b4230fd0,0xbfa188c95072092e,3 +np.float64,0x3fcd28c9533a5190,0x3fcd6ae879c21b47,3 +np.float64,0x3fc6227ec52c4500,0x3fc63f1fbb441d29,3 +np.float64,0x3fe9b7a2ed736f46,0x3feddeab49b2d176,3 +np.float64,0x3fd4aee93da95dd4,0x3fd50fb3b71e0339,3 +np.float64,0xbfe164dacf62c9b6,0xbfe263bb2f7dd5d9,3 +np.float64,0x3fec62e525f8c5ca,0x3ff17496416d9921,3 +np.float64,0x3fdd363ee0ba6c7c,0x3fde55c6a49a5f86,3 +np.float64,0x3fe65cbf75ecb97e,0x3fe8c28d31ff3ebd,3 +np.float64,0xbfe76d27ca6eda50,0xbfea4899e3661425,3 +np.float64,0xbfc305738d260ae8,0xbfc3178dcfc9d30f,3 +np.float64,0xbfd3aa2a54a75454,0xbfd3fcf1e1ce8328,3 +np.float64,0x3fd1609fc9a2c140,0x3fd1992efa539b9f,3 +np.float64,0xbfac1291bc382520,0xbfac162cc7334b4d,3 +np.float64,0xbfedb461ea7b68c4,0xbff309247850455d,3 +np.float64,0xbfe8d2adf8f1a55c,0xbfec6947be90ba92,3 +np.float64,0xbfd7128965ae2512,0xbfd79a9855bcfc5a,3 +np.float64,0x3fe8deb09471bd62,0x3fec7c56b3aee531,3 +np.float64,0xbfe5f4d329ebe9a6,0xbfe8327ea8189af8,3 +np.float64,0xbfd3b46ac9a768d6,0xbfd407b80b12ff17,3 +np.float64,0x3fec899d7cf9133a,0x3ff19ef26baca36f,3 +np.float64,0xbfec192fd5783260,0xbff126306e507fd0,3 +np.float64,0x3fe945bdaef28b7c,0x3fed222f787310bf,3 +np.float64,0xbfeff9635d7ff2c7,0xbff87d6773f318eb,3 +np.float64,0xbfd604b81cac0970,0xbfd67a4aa852559a,3 +np.float64,0x3fcd1cc9d53a3990,0x3fcd5e962e237c24,3 +np.float64,0xbfed77b0fffaef62,0xbff2b97a1c9b6483,3 +np.float64,0xbfc9c69325338d28,0xbfc9f401500402fb,3 +np.float64,0xbfdf97e246bf2fc4,0xbfe0855601ea9db3,3 +np.float64,0x3fc7e6304f2fcc60,0x3fc80a4e718504cd,3 +np.float64,0x3fec3b599e7876b4,0x3ff14a2d1b9c68e6,3 +np.float64,0xbfe98618e1f30c32,0xbfed8bfbb31c394a,3 +np.float64,0xbfe59b3c0feb3678,0xbfe7b832d6df81de,3 +np.float64,0xbfe54ce2fe6a99c6,0xbfe74e9a85be4116,3 +np.float64,0x3fc9db49cb33b690,0x3fca092737ef500a,3 +np.float64,0xbfb4a922ae295248,0xbfb4aee4e39078a9,3 +np.float64,0xbfd0e542e0a1ca86,0xbfd11925208d66af,3 +np.float64,0x3fd70543f2ae0a88,0x3fd78c5e9238a3ee,3 +np.float64,0x3fd67f7a7facfef4,0x3fd6fd3998df8545,3 +np.float64,0xbfe40b643d6816c8,0xbfe5a947e427f298,3 +np.float64,0xbfcd85f69b3b0bec,0xbfcdcaa24b75f1a3,3 +np.float64,0x3fec705fb4f8e0c0,0x3ff1833c82163ee2,3 +np.float64,0x3fb37650ea26eca0,0x3fb37b20c16fb717,3 +np.float64,0x3fe5ebfa55ebd7f4,0x3fe826578d716e70,3 +np.float64,0x3fe991dfe5f323c0,0x3fed9f8a4bf1f588,3 +np.float64,0xbfd658bd0aacb17a,0xbfd6d3dd06e54900,3 +np.float64,0xbfc24860252490c0,0xbfc258701a0b9290,3 +np.float64,0xbfefb8d763ff71af,0xbff705b6ea4a569d,3 +np.float64,0x3fb8fcb4ae31f970,0x3fb906e809e7899f,3 +np.float64,0x3fce6343cb3cc688,0x3fceae41d1629625,3 +np.float64,0xbfd43d5a11a87ab4,0xbfd497da25687e07,3 +np.float64,0xbfe9568851f2ad11,0xbfed3d9e5fe83a76,3 +np.float64,0x3fe1b66153e36cc2,0x3fe2c53c7e016271,3 +np.float64,0x3fef27452bfe4e8a,0x3ff571b3486ed416,3 +np.float64,0x3fca87c0a7350f80,0x3fcab958a7bb82d4,3 +np.float64,0xbfd8776a8fb0eed6,0xbfd91afaf2f50edf,3 +np.float64,0x3fe9522a76f2a454,0x3fed3679264e1525,3 +np.float64,0x3fea14ff2cf429fe,0x3fee7da6431cc316,3 +np.float64,0x3fe970618bf2e0c4,0x3fed68154d54dd97,3 +np.float64,0x3fd3410cfca68218,0x3fd38e9b21792240,3 +np.float64,0xbf6a8070c0350100,0xbf6a8073c7c34517,3 +np.float64,0xbfbe449de23c8938,0xbfbe56c8e5e4d98b,3 +np.float64,0x3fedbc92e27b7926,0x3ff314313216d8e6,3 +np.float64,0xbfe3be4706677c8e,0xbfe546d3ceb85aea,3 +np.float64,0x3fe30cd6d76619ae,0x3fe467b6f2664a8d,3 +np.float64,0x3fd7d69b21afad38,0x3fd86d54284d05ad,3 +np.float64,0xbfe501001fea0200,0xbfe6e978afcff4d9,3 +np.float64,0xbfe44ba3d8e89748,0xbfe5fc0a31cd1e3e,3 +np.float64,0x3fec52f7c078a5f0,0x3ff16367acb209b2,3 +np.float64,0xbfcb19efcb3633e0,0xbfcb4ed9235a7d47,3 +np.float64,0xbfab86796c370cf0,0xbfab89df7bf15710,3 +np.float64,0xbfb962feda32c600,0xbfb96db1e1679c98,3 +np.float64,0x3fe0dd14e861ba2a,0x3fe1c2fc72810567,3 +np.float64,0x3fe41bcc6de83798,0x3fe5be59b7f9003b,3 +np.float64,0x3fc82f4c4f305e98,0x3fc854bd9798939f,3 +np.float64,0xbfcd143a613a2874,0xbfcd55cbd1619d84,3 +np.float64,0xbfd52da61baa5b4c,0xbfd595d0b3543439,3 +np.float64,0xbfb71b4a8e2e3698,0xbfb7235a4ab8432f,3 +np.float64,0xbfec141a19782834,0xbff120e1e39fc856,3 +np.float64,0xbfdba9319db75264,0xbfdc9a8ca2578bb2,3 +np.float64,0xbfbce5d74639cbb0,0xbfbcf5a4878cfa51,3 +np.float64,0x3fde67f7b3bccff0,0x3fdfaf45a9f843ad,3 +np.float64,0xbfe12d87bc625b10,0xbfe221fd4476eb71,3 +np.float64,0x3fe35b8f6be6b71e,0x3fe4ca20f65179e1,3 +np.float64,0xbfdbada1d3b75b44,0xbfdc9f78b19f93d1,3 +np.float64,0xbfc60159c52c02b4,0xbfc61d79b879f598,3 +np.float64,0x3fd6b81c38ad7038,0x3fd739c27bfa16d8,3 +np.float64,0xbfd646a253ac8d44,0xbfd6c08c19612bbb,3 +np.float64,0xbfe6babef0ed757e,0xbfe94703d0bfa311,3 +np.float64,0xbfed5671f1faace4,0xbff28f5a3f3683d0,3 +np.float64,0x3fc01d1e85203a40,0x3fc02817ec0dfd38,3 +np.float64,0xbfe9188a61f23115,0xbfecd8eb5da84223,3 +np.float64,0x3fdca3bab9b94774,0x3fddb1868660c239,3 +np.float64,0xbfa255750c24aaf0,0xbfa25675f7b36343,3 +np.float64,0x3fb3602db626c060,0x3fb364ed2d5b2876,3 +np.float64,0xbfd30a14bda6142a,0xbfd354ff703b8862,3 +np.float64,0xbfe1cfe381639fc7,0xbfe2e3e720b968c8,3 +np.float64,0xbfd2af6a4fa55ed4,0xbfd2f61e190bcd1f,3 +np.float64,0xbfe93c50937278a1,0xbfed12d64bb10d73,3 +np.float64,0x3fddd8bc44bbb178,0x3fdf0ced7f9005cc,3 +np.float64,0x3fdb2bc73cb65790,0x3fdc0fc0e18e425e,3 +np.float64,0xbfd073f6aba0e7ee,0xbfd0a3cb5468a961,3 +np.float64,0x3fed4bad7b7a975a,0x3ff281ebeb75e414,3 +np.float64,0xbfdc75b50bb8eb6a,0xbfdd7e1a7631cb22,3 +np.float64,0x3fd458a90fa8b154,0x3fd4b4a5817248ce,3 +np.float64,0x3feead5db57d5abc,0x3ff484286fab55ff,3 +np.float64,0x3fb3894382271280,0x3fb38e217b4e7905,3 +np.float64,0xffefffffffffffff,0x7ff8000000000000,3 +np.float64,0xbfe428212ae85042,0xbfe5ce36f226bea8,3 +np.float64,0xbfc08b39f7211674,0xbfc0971b93ebc7ad,3 +np.float64,0xbfc2e7cf5525cfa0,0xbfc2f994eb72b623,3 +np.float64,0xbfdb0d85afb61b0c,0xbfdbee5a2de3c5db,3 +np.float64,0xfff0000000000000,0x7ff8000000000000,3 +np.float64,0xbfd0d36af7a1a6d6,0xbfd106a5f05ef6ff,3 +np.float64,0xbfc333d0912667a0,0xbfc3467162b7289a,3 +np.float64,0x3fcdababc53b5758,0x3fcdf16458c20fa8,3 +np.float64,0x3fd0821b38a10438,0x3fd0b26e3e0b9185,3 +np.float64,0x0,0x0,3 +np.float64,0x3feb7f70edf6fee2,0x3ff08ae81854bf20,3 +np.float64,0x3fe6e075716dc0ea,0x3fe97cc5254be6ff,3 +np.float64,0x3fea13b682f4276e,0x3fee7b6f18073b5b,3 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arcsinh.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arcsinh.csv new file mode 100644 index 0000000..1da29c8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arcsinh.csv @@ -0,0 +1,1429 @@ +dtype,input,output,ulperrortol +np.float32,0xbf24142a,0xbf1a85ef,2 +np.float32,0x3e71cf91,0x3e6f9e37,2 +np.float32,0xe52a7,0xe52a7,2 +np.float32,0x3ef1e074,0x3ee9add9,2 +np.float32,0x806160ac,0x806160ac,2 +np.float32,0x7e2d59a2,0x42af4798,2 +np.float32,0xbf32cac9,0xbf26bf96,2 +np.float32,0x3f081701,0x3f026142,2 +np.float32,0x3f23cc88,0x3f1a499c,2 +np.float32,0xbf090d94,0xbf033ad0,2 +np.float32,0x803af2fc,0x803af2fc,2 +np.float32,0x807eb17e,0x807eb17e,2 +np.float32,0x5c0d8e,0x5c0d8e,2 +np.float32,0x3f7b79d2,0x3f5e6b1d,2 +np.float32,0x806feeae,0x806feeae,2 +np.float32,0x3e4b423a,0x3e49f274,2 +np.float32,0x3f49e5ac,0x3f394a41,2 +np.float32,0x3f18cd4e,0x3f10ef35,2 +np.float32,0xbed75734,0xbed17322,2 +np.float32,0x7f591151,0x42b28085,2 +np.float32,0xfefe9da6,0xc2b16f51,2 +np.float32,0xfeac90fc,0xc2b0a82a,2 +np.float32,0x805c198e,0x805c198e,2 +np.float32,0x7f66d6df,0x42b2a004,2 +np.float32,0x505438,0x505438,2 +np.float32,0xbf39a209,0xbf2c5255,2 +np.float32,0x7fa00000,0x7fe00000,2 +np.float32,0xc84cb,0xc84cb,2 +np.float32,0x7f07d6f5,0x42b19088,2 +np.float32,0x79d7e4,0x79d7e4,2 +np.float32,0xff32f6a0,0xc2b21db1,2 +np.float32,0x7c005c05,0x42a9222e,2 +np.float32,0x3ec449aa,0x3ebfc5ae,2 +np.float32,0x800ec323,0x800ec323,2 +np.float32,0xff1c904c,0xc2b1d93a,2 +np.float32,0x7f4eca52,0x42b267b0,2 +np.float32,0x3ee06540,0x3ed9c514,2 +np.float32,0x6aab4,0x6aab4,2 +np.float32,0x3e298d8c,0x3e28c99e,2 +np.float32,0xbf38d162,0xbf2ba94a,2 +np.float32,0x2d9083,0x2d9083,2 +np.float32,0x7eae5032,0x42b0ad52,2 +np.float32,0x3ead5b3c,0x3eaa3443,2 +np.float32,0x806fef66,0x806fef66,2 +np.float32,0x3f5b614e,0x3f46ca71,2 +np.float32,0xbf4c906a,0xbf3b60fc,2 +np.float32,0x8049453e,0x8049453e,2 +np.float32,0x3d305220,0x3d304432,2 +np.float32,0x2e1a89,0x2e1a89,2 +np.float32,0xbf4e74ec,0xbf3cdacf,2 +np.float32,0x807a827a,0x807a827a,2 +np.float32,0x80070745,0x80070745,2 +np.float32,0xbe1ba2fc,0xbe1b0b28,2 +np.float32,0xbe5131d0,0xbe4fc421,2 +np.float32,0x5bfd98,0x5bfd98,2 +np.float32,0xbd8e1a48,0xbd8dfd27,2 +np.float32,0x8006c160,0x8006c160,2 +np.float32,0x346490,0x346490,2 +np.float32,0xbdbdf060,0xbdbdaaf0,2 +np.float32,0x3ea9d0c4,0x3ea6d8c7,2 +np.float32,0xbf2aaa28,0xbf200916,2 +np.float32,0xbf160c26,0xbf0e9047,2 +np.float32,0x80081fd4,0x80081fd4,2 +np.float32,0x7db44283,0x42adf8b6,2 +np.float32,0xbf1983f8,0xbf118bf5,2 +np.float32,0x2c4a35,0x2c4a35,2 +np.float32,0x6165a7,0x6165a7,2 +np.float32,0xbe776b44,0xbe75129f,2 +np.float32,0xfe81841a,0xc2b0153b,2 +np.float32,0xbf7d1b2f,0xbf5f9461,2 +np.float32,0x80602d36,0x80602d36,2 +np.float32,0xfe8d5046,0xc2b041dd,2 +np.float32,0xfe5037bc,0xc2afa56d,2 +np.float32,0x4bbea6,0x4bbea6,2 +np.float32,0xfea039de,0xc2b0822d,2 +np.float32,0x7ea627a4,0x42b094c7,2 +np.float32,0x3f556198,0x3f423591,2 +np.float32,0xfedbae04,0xc2b123c1,2 +np.float32,0xbe30432c,0xbe2f6744,2 +np.float32,0x80202c77,0x80202c77,2 +np.float32,0xff335cc1,0xc2b21ed5,2 +np.float32,0x3e1e1ebe,0x3e1d7f95,2 +np.float32,0x8021c9c0,0x8021c9c0,2 +np.float32,0x7dc978,0x7dc978,2 +np.float32,0xff6cfabc,0xc2b2ad75,2 +np.float32,0x7f2bd542,0x42b208e0,2 +np.float32,0x53bf33,0x53bf33,2 +np.float32,0x804e04bb,0x804e04bb,2 +np.float32,0x3f30d2f9,0x3f2521ca,2 +np.float32,0x3dfde876,0x3dfd4316,2 +np.float32,0x46f8b1,0x46f8b1,2 +np.float32,0xbd5f9e20,0xbd5f81ba,2 +np.float32,0x807d6a22,0x807d6a22,2 +np.float32,0xff3881da,0xc2b22d50,2 +np.float32,0x1b1cb5,0x1b1cb5,2 +np.float32,0x3f75f2d0,0x3f5a7435,2 +np.float32,0xfee39c1a,0xc2b135e9,2 +np.float32,0x7f79f14a,0x42b2c8b9,2 +np.float32,0x8000e2d1,0x8000e2d1,2 +np.float32,0xab779,0xab779,2 +np.float32,0xbede6690,0xbed7f102,2 +np.float32,0x76e20d,0x76e20d,2 +np.float32,0x3ed714cb,0x3ed135e9,2 +np.float32,0xbeaa6f44,0xbea76f31,2 +np.float32,0x7f7dc8b1,0x42b2d089,2 +np.float32,0x108cb2,0x108cb2,2 +np.float32,0x7d37ba82,0x42ac9f94,2 +np.float32,0x3f31d068,0x3f25f221,2 +np.float32,0x8010a331,0x8010a331,2 +np.float32,0x3f2fdc7c,0x3f2456cd,2 +np.float32,0x7f7a9a67,0x42b2ca13,2 +np.float32,0x3f2acb31,0x3f202492,2 +np.float32,0x7f54fa94,0x42b276c9,2 +np.float32,0x3ebf8a70,0x3ebb553c,2 +np.float32,0x7f75b1a7,0x42b2bff2,2 +np.float32,0x7daebe07,0x42ade8cc,2 +np.float32,0xbd3a3ef0,0xbd3a2e86,2 +np.float32,0x8078ec9e,0x8078ec9e,2 +np.float32,0x3eda206a,0x3ed403ec,2 +np.float32,0x3f7248f2,0x3f57cd77,2 +np.float32,0x805d55ba,0x805d55ba,2 +np.float32,0xff30dc3e,0xc2b217a3,2 +np.float32,0xbe12b27c,0xbe123333,2 +np.float32,0xbf6ed9cf,0xbf554cd0,2 +np.float32,0xbed9eb5c,0xbed3d31c,2 +np.float32,0xbf1c9aea,0xbf14307b,2 +np.float32,0x3f540ac4,0x3f412de2,2 +np.float32,0x800333ac,0x800333ac,2 +np.float32,0x3f74cdb4,0x3f59a09a,2 +np.float32,0xbf41dc41,0xbf32ee6f,2 +np.float32,0xff2c7804,0xc2b20ac4,2 +np.float32,0x514493,0x514493,2 +np.float32,0xbddf1220,0xbddea1cf,2 +np.float32,0xfeaf74de,0xc2b0b0ab,2 +np.float32,0xfe5dfb30,0xc2afc633,2 +np.float32,0xbf4785c4,0xbf376bdb,2 +np.float32,0x80191cd3,0x80191cd3,2 +np.float32,0xfe44f708,0xc2af88fb,2 +np.float32,0x3d4cd8a0,0x3d4cc2ca,2 +np.float32,0x7f572eff,0x42b27c0f,2 +np.float32,0x8031bacb,0x8031bacb,2 +np.float32,0x7f2ea684,0x42b21133,2 +np.float32,0xbea1976a,0xbe9f05bb,2 +np.float32,0x3d677b41,0x3d675bc1,2 +np.float32,0x3f61bf24,0x3f4b9870,2 +np.float32,0x7ef55ddf,0x42b15c5f,2 +np.float32,0x3eabcb20,0x3ea8b91c,2 +np.float32,0xff73d9ec,0xc2b2bc18,2 +np.float32,0x77b9f5,0x77b9f5,2 +np.float32,0x4c6c6c,0x4c6c6c,2 +np.float32,0x7ed09c94,0x42b10949,2 +np.float32,0xdeeec,0xdeeec,2 +np.float32,0x7eac5858,0x42b0a782,2 +np.float32,0x7e190658,0x42af07bd,2 +np.float32,0xbe3c8980,0xbe3b7ce2,2 +np.float32,0x8059e86e,0x8059e86e,2 +np.float32,0xff201836,0xc2b1e4a5,2 +np.float32,0xbeac109c,0xbea8fafb,2 +np.float32,0x7edd1e2b,0x42b12718,2 +np.float32,0x639cd8,0x639cd8,2 +np.float32,0x3f5e4cae,0x3f490059,2 +np.float32,0x3d84c185,0x3d84a9c4,2 +np.float32,0xbe8c1130,0xbe8a605b,2 +np.float32,0x80000000,0x80000000,2 +np.float32,0x3f1da5e4,0x3f151404,2 +np.float32,0x7f75a873,0x42b2bfdf,2 +np.float32,0xbd873540,0xbd871c28,2 +np.float32,0xbe8e5e10,0xbe8c9808,2 +np.float32,0x7f004bf2,0x42b17347,2 +np.float32,0x800000,0x800000,2 +np.float32,0xbf6d6b79,0xbf544095,2 +np.float32,0x7ed7b563,0x42b11a6a,2 +np.float32,0x80693745,0x80693745,2 +np.float32,0x3ee0f608,0x3eda49a8,2 +np.float32,0xfe1285a4,0xc2aef181,2 +np.float32,0x72d946,0x72d946,2 +np.float32,0x6a0dca,0x6a0dca,2 +np.float32,0x3f5c9df6,0x3f47ba99,2 +np.float32,0xff002af6,0xc2b172c4,2 +np.float32,0x3f4ac98f,0x3f39fd0a,2 +np.float32,0x8066acf7,0x8066acf7,2 +np.float32,0xbcaa4e60,0xbcaa4b3c,2 +np.float32,0x80162813,0x80162813,2 +np.float32,0xff34b318,0xc2b222a2,2 +np.float32,0x7f1ce33c,0x42b1da49,2 +np.float32,0x3f0e55ab,0x3f07ddb0,2 +np.float32,0x7c75d996,0x42aa6eec,2 +np.float32,0xbf221bc6,0xbf18dc89,2 +np.float32,0x3f5a1a4c,0x3f45d1d4,2 +np.float32,0x7f2451b8,0x42b1f1fb,2 +np.float32,0x3ec55ca0,0x3ec0c655,2 +np.float32,0x3f752dc2,0x3f59e600,2 +np.float32,0xbe33f638,0xbe330c4d,2 +np.float32,0x3e2a9148,0x3e29c9d8,2 +np.float32,0x3f3362a1,0x3f273c01,2 +np.float32,0x5f83b3,0x5f83b3,2 +np.float32,0x3e362488,0x3e353216,2 +np.float32,0x140bcf,0x140bcf,2 +np.float32,0x7e3e96df,0x42af7822,2 +np.float32,0xbebc7082,0xbeb86ce6,2 +np.float32,0xbe92a92e,0xbe90b9d2,2 +np.float32,0xff3d8afc,0xc2b23b19,2 +np.float32,0x804125e3,0x804125e3,2 +np.float32,0x3f3675d1,0x3f29bedb,2 +np.float32,0xff70bb09,0xc2b2b57f,2 +np.float32,0x3f29681c,0x3f1efcd2,2 +np.float32,0xbdc70380,0xbdc6b3a8,2 +np.float32,0x54e0dd,0x54e0dd,2 +np.float32,0x3d545de0,0x3d54458c,2 +np.float32,0x7f800000,0x7f800000,2 +np.float32,0x8014a4c2,0x8014a4c2,2 +np.float32,0xbe93f58a,0xbe91f938,2 +np.float32,0x17de33,0x17de33,2 +np.float32,0xfefb679a,0xc2b168d2,2 +np.float32,0xbf23423e,0xbf19d511,2 +np.float32,0x7e893fa1,0x42b032ec,2 +np.float32,0x3f44fe2d,0x3f356bda,2 +np.float32,0xbebb2e78,0xbeb73e8f,2 +np.float32,0x3f5632e0,0x3f42d633,2 +np.float32,0x3ddd8698,0x3ddd1896,2 +np.float32,0x80164ea7,0x80164ea7,2 +np.float32,0x80087b37,0x80087b37,2 +np.float32,0xbf06ab1e,0xbf011f95,2 +np.float32,0x3db95524,0x3db9149f,2 +np.float32,0x7aa1fbb3,0x42a570a1,2 +np.float32,0xbd84fc48,0xbd84e467,2 +np.float32,0x3d65c6f5,0x3d65a826,2 +np.float32,0xfe987800,0xc2b068c4,2 +np.float32,0x7ec59532,0x42b0ed7a,2 +np.float32,0x3ea0232c,0x3e9da29a,2 +np.float32,0x80292a08,0x80292a08,2 +np.float32,0x734cfe,0x734cfe,2 +np.float32,0x3f3b6d63,0x3f2dc596,2 +np.float32,0x3f27bcc1,0x3f1d97e6,2 +np.float32,0xfe1da554,0xc2af16f9,2 +np.float32,0x7c91f5,0x7c91f5,2 +np.float32,0xfe4e78cc,0xc2afa11e,2 +np.float32,0x7e4b4e08,0x42af9933,2 +np.float32,0xfe0949ec,0xc2aed02e,2 +np.float32,0x7e2f057f,0x42af4c81,2 +np.float32,0xbf200ae0,0xbf171ce1,2 +np.float32,0x3ebcc244,0x3eb8b99e,2 +np.float32,0xbf68f58d,0xbf50f7aa,2 +np.float32,0x4420b1,0x4420b1,2 +np.float32,0x3f5b61bf,0x3f46cac7,2 +np.float32,0x3fec78,0x3fec78,2 +np.float32,0x7f4183c8,0x42b245b7,2 +np.float32,0xbf10587c,0xbf099ee2,2 +np.float32,0x0,0x0,2 +np.float32,0x7ec84dc3,0x42b0f47a,2 +np.float32,0x3f5fbd7b,0x3f4a166d,2 +np.float32,0xbd884eb8,0xbd883502,2 +np.float32,0xfe3f10a4,0xc2af7969,2 +np.float32,0xff3f4920,0xc2b23fc9,2 +np.float32,0x8013900f,0x8013900f,2 +np.float32,0x8003529d,0x8003529d,2 +np.float32,0xbf032384,0xbefbfb3c,2 +np.float32,0xff418c7c,0xc2b245ce,2 +np.float32,0xbec0aad0,0xbebc633b,2 +np.float32,0xfdbff178,0xc2ae18de,2 +np.float32,0x68ab15,0x68ab15,2 +np.float32,0xbdfc4a88,0xbdfba848,2 +np.float32,0xbf5adec6,0xbf466747,2 +np.float32,0x807d5dcc,0x807d5dcc,2 +np.float32,0x61d144,0x61d144,2 +np.float32,0x807e3a03,0x807e3a03,2 +np.float32,0x1872f2,0x1872f2,2 +np.float32,0x7f2a272c,0x42b203d8,2 +np.float32,0xfe7f8314,0xc2b00e3a,2 +np.float32,0xbe42aeac,0xbe418737,2 +np.float32,0x8024b614,0x8024b614,2 +np.float32,0xbe41b6b8,0xbe40939a,2 +np.float32,0xa765c,0xa765c,2 +np.float32,0x7ea74f4b,0x42b09853,2 +np.float32,0x7f7ef631,0x42b2d2e7,2 +np.float32,0x7eaef5e6,0x42b0af38,2 +np.float32,0xff733d85,0xc2b2bacf,2 +np.float32,0x537ac0,0x537ac0,2 +np.float32,0xbeca4790,0xbec55b1d,2 +np.float32,0x80117314,0x80117314,2 +np.float32,0xfe958536,0xc2b05ec5,2 +np.float32,0x8066ecc2,0x8066ecc2,2 +np.float32,0xbf56baf3,0xbf433e82,2 +np.float32,0x1f7fd7,0x1f7fd7,2 +np.float32,0x3e942104,0x3e9222fc,2 +np.float32,0xfeaffe82,0xc2b0b23c,2 +np.float32,0xfe0e02b0,0xc2aee17e,2 +np.float32,0xbf800000,0xbf61a1b3,2 +np.float32,0x800b7e49,0x800b7e49,2 +np.float32,0x6c514f,0x6c514f,2 +np.float32,0xff800000,0xff800000,2 +np.float32,0x7f7d9a45,0x42b2d02b,2 +np.float32,0x800c9c69,0x800c9c69,2 +np.float32,0x274b14,0x274b14,2 +np.float32,0xbf4b22b0,0xbf3a42e2,2 +np.float32,0x63e5ae,0x63e5ae,2 +np.float32,0xbe18facc,0xbe186a90,2 +np.float32,0x7e137351,0x42aef4bd,2 +np.float32,0x80518ffd,0x80518ffd,2 +np.float32,0xbf0a8ffc,0xbf048f0d,2 +np.float32,0x841d,0x841d,2 +np.float32,0x7edfdc9e,0x42b12d69,2 +np.float32,0xfd1092b0,0xc2ac24de,2 +np.float32,0x7e2c9bdf,0x42af4566,2 +np.float32,0x7f7fffff,0x42b2d4fc,2 +np.float32,0x3f4954a6,0x3f38d853,2 +np.float32,0xbe83efd2,0xbe8284c3,2 +np.float32,0x800e8e02,0x800e8e02,2 +np.float32,0x78ad39,0x78ad39,2 +np.float32,0x7eb0f967,0x42b0b514,2 +np.float32,0xbe39aa94,0xbe38a9ee,2 +np.float32,0x80194e7b,0x80194e7b,2 +np.float32,0x3cf3a340,0x3cf39a0f,2 +np.float32,0x3ed3117a,0x3ecd8173,2 +np.float32,0x7f530b11,0x42b2721c,2 +np.float32,0xff756ba2,0xc2b2bf60,2 +np.float32,0x15ea25,0x15ea25,2 +np.float32,0x803cbb64,0x803cbb64,2 +np.float32,0x3f34722d,0x3f281a2c,2 +np.float32,0x3ddd88e0,0x3ddd1adb,2 +np.float32,0x3f54244c,0x3f41418b,2 +np.float32,0x3e0adb98,0x3e0a6f8b,2 +np.float32,0x80800000,0x80800000,2 +np.float32,0x58902b,0x58902b,2 +np.float32,0xfe3b50b8,0xc2af6f43,2 +np.float32,0xfe0846d0,0xc2aecc64,2 +np.float32,0xbe0299d0,0xbe023fd4,2 +np.float32,0x18dde6,0x18dde6,2 +np.float32,0x8039fe8b,0x8039fe8b,2 +np.float32,0x8015d179,0x8015d179,2 +np.float32,0x3f551322,0x3f41f947,2 +np.float32,0x2ab387,0x2ab387,2 +np.float32,0xbf7e311e,0xbf6059d0,2 +np.float32,0xbdba58a8,0xbdba1713,2 +np.float32,0xbf1d008a,0xbf148724,2 +np.float32,0xbf6b9c97,0xbf52ec98,2 +np.float32,0x802acf04,0x802acf04,2 +np.float32,0x1,0x1,2 +np.float32,0xbe9e16d6,0xbe9bade3,2 +np.float32,0xbf048a14,0xbefe78c7,2 +np.float32,0x7e432ad3,0x42af8449,2 +np.float32,0xbdcc7fe0,0xbdcc2944,2 +np.float32,0x6dfc27,0x6dfc27,2 +np.float32,0xfef6eed8,0xc2b15fa1,2 +np.float32,0xbeeff6e8,0xbee7f2e4,2 +np.float32,0x7e3a6ca8,0x42af6cd2,2 +np.float32,0xff2c82e8,0xc2b20ae4,2 +np.float32,0x3e9f8d74,0x3e9d13b0,2 +np.float32,0x7ea36191,0x42b08c29,2 +np.float32,0x7f734bed,0x42b2baed,2 +np.float32,0x7f2df96d,0x42b20f37,2 +np.float32,0x5036fd,0x5036fd,2 +np.float32,0x806eab38,0x806eab38,2 +np.float32,0xbe9db90e,0xbe9b5446,2 +np.float32,0xfeef6fac,0xc2b14fd9,2 +np.float32,0xc2bf7,0xc2bf7,2 +np.float32,0xff53ec3d,0xc2b2743d,2 +np.float32,0x7e837637,0x42b01cde,2 +np.float32,0xbefb5934,0xbef23662,2 +np.float32,0x3f6cec80,0x3f53e371,2 +np.float32,0x3e86e7de,0x3e85643f,2 +np.float32,0x3f09cb42,0x3f03e1ef,2 +np.float32,0xbec3d236,0xbebf5620,2 +np.float32,0xfedef246,0xc2b12b50,2 +np.float32,0xbf08d6a8,0xbf030a62,2 +np.float32,0x8036cbf9,0x8036cbf9,2 +np.float32,0x3f74d3e3,0x3f59a512,2 +np.float32,0x6a600c,0x6a600c,2 +np.float32,0xfd1295b0,0xc2ac2bf1,2 +np.float32,0xbeb61142,0xbeb26efa,2 +np.float32,0x80216556,0x80216556,2 +np.float32,0xbf1fa0f6,0xbf16c30a,2 +np.float32,0x3e0af8e1,0x3e0a8c90,2 +np.float32,0x80434709,0x80434709,2 +np.float32,0x49efd9,0x49efd9,2 +np.float32,0x7f7cce6c,0x42b2ce8f,2 +np.float32,0x6e5450,0x6e5450,2 +np.float32,0x7f0fc115,0x42b1ad86,2 +np.float32,0x632db0,0x632db0,2 +np.float32,0x3f6f4c2a,0x3f55a064,2 +np.float32,0x7ec4f273,0x42b0ebd3,2 +np.float32,0x61ae1e,0x61ae1e,2 +np.float32,0x5f47c4,0x5f47c4,2 +np.float32,0xbf3c8f62,0xbf2eaf54,2 +np.float32,0xfca38900,0xc2ab0113,2 +np.float32,0x3ec89d52,0x3ec3ce78,2 +np.float32,0xbe0e3f70,0xbe0dcb53,2 +np.float32,0x805d3156,0x805d3156,2 +np.float32,0x3eee33f8,0x3ee65a4e,2 +np.float32,0xbeda7e9a,0xbed45a90,2 +np.float32,0x7e2fac7b,0x42af4e69,2 +np.float32,0x7efd0e28,0x42b16c2c,2 +np.float32,0x3f0c7b17,0x3f063e46,2 +np.float32,0xbf395bec,0xbf2c198f,2 +np.float32,0xfdf1c3f8,0xc2ae8f05,2 +np.float32,0xbe11f4e4,0xbe117783,2 +np.float32,0x7eddc901,0x42b128a3,2 +np.float32,0x3f4bad09,0x3f3aaf33,2 +np.float32,0xfefb5d76,0xc2b168bd,2 +np.float32,0x3ed3a4cf,0x3ece09a3,2 +np.float32,0x7ec582e4,0x42b0ed4a,2 +np.float32,0x3dc2268a,0x3dc1dc64,2 +np.float32,0x3ef9b17c,0x3ef0b9c9,2 +np.float32,0x2748ac,0x2748ac,2 +np.float32,0xfed6a602,0xc2b117e4,2 +np.float32,0xbefc9c36,0xbef35832,2 +np.float32,0x7e0476,0x7e0476,2 +np.float32,0x804be1a0,0x804be1a0,2 +np.float32,0xbefbc1c2,0xbef2943a,2 +np.float32,0xbd4698f0,0xbd46850a,2 +np.float32,0x688627,0x688627,2 +np.float32,0x3f7f7685,0x3f61406f,2 +np.float32,0x827fb,0x827fb,2 +np.float32,0x3f503264,0x3f3e34fd,2 +np.float32,0x7f5458d1,0x42b27543,2 +np.float32,0x800ac01f,0x800ac01f,2 +np.float32,0x6188dd,0x6188dd,2 +np.float32,0x806ac0ba,0x806ac0ba,2 +np.float32,0xbe14493c,0xbe13c5cc,2 +np.float32,0x3f77542c,0x3f5b72ae,2 +np.float32,0xfeaacab6,0xc2b0a2df,2 +np.float32,0x7f2893d5,0x42b1ff15,2 +np.float32,0x66b528,0x66b528,2 +np.float32,0xbf653e24,0xbf4e3573,2 +np.float32,0x801a2853,0x801a2853,2 +np.float32,0x3f3d8c98,0x3f2f7b04,2 +np.float32,0xfdffbad8,0xc2aeabc5,2 +np.float32,0x3dd50f,0x3dd50f,2 +np.float32,0x3f325a4c,0x3f266353,2 +np.float32,0xfcc48ec0,0xc2ab5f3f,2 +np.float32,0x3e6f5b9a,0x3e6d3ae5,2 +np.float32,0x3dbcd62b,0x3dbc91ee,2 +np.float32,0xbf7458d9,0xbf594c1c,2 +np.float32,0xff5adb24,0xc2b284b9,2 +np.float32,0x807b246d,0x807b246d,2 +np.float32,0x3f800000,0x3f61a1b3,2 +np.float32,0x231a28,0x231a28,2 +np.float32,0xbdc66258,0xbdc61341,2 +np.float32,0x3c84b4b4,0x3c84b338,2 +np.float32,0xbf215894,0xbf183783,2 +np.float32,0xff4ee298,0xc2b267ec,2 +np.float32,0x801ef52e,0x801ef52e,2 +np.float32,0x1040b0,0x1040b0,2 +np.float32,0xff545582,0xc2b2753b,2 +np.float32,0x3f3b9dda,0x3f2decaf,2 +np.float32,0x730f99,0x730f99,2 +np.float32,0xff7fffff,0xc2b2d4fc,2 +np.float32,0xff24cc5e,0xc2b1f379,2 +np.float32,0xbe9b456a,0xbe98fc0b,2 +np.float32,0x188fb,0x188fb,2 +np.float32,0x3f5c7ce2,0x3f47a18a,2 +np.float32,0x7fc00000,0x7fc00000,2 +np.float32,0x806ea4da,0x806ea4da,2 +np.float32,0xfe810570,0xc2b01345,2 +np.float32,0x8036af89,0x8036af89,2 +np.float32,0x8043cec6,0x8043cec6,2 +np.float32,0x80342bb3,0x80342bb3,2 +np.float32,0x1a2bd4,0x1a2bd4,2 +np.float32,0x3f6248c2,0x3f4bff9a,2 +np.float32,0x8024eb35,0x8024eb35,2 +np.float32,0x7ea55872,0x42b09247,2 +np.float32,0x806d6e56,0x806d6e56,2 +np.float32,0x25c21a,0x25c21a,2 +np.float32,0x3f4e95f3,0x3f3cf483,2 +np.float32,0x15ca38,0x15ca38,2 +np.float32,0x803f01b2,0x803f01b2,2 +np.float32,0xbe731634,0xbe70dc10,2 +np.float32,0x3e80cee4,0x3e7ef933,2 +np.float32,0x3ef6dda5,0x3eee2e7b,2 +np.float32,0x3f3dfdc2,0x3f2fd5ed,2 +np.float32,0xff0492a7,0xc2b18411,2 +np.float32,0xbf1d0adf,0xbf148ff3,2 +np.float32,0xfcf75460,0xc2abd4e3,2 +np.float32,0x3f46fca6,0x3f36ffa6,2 +np.float32,0xbe63b5c0,0xbe61dfb3,2 +np.float32,0xff019bec,0xc2b1787d,2 +np.float32,0x801f14a9,0x801f14a9,2 +np.float32,0x3f176cfa,0x3f0fc051,2 +np.float32,0x3f69d976,0x3f51a015,2 +np.float32,0x3f4917cb,0x3f38a87a,2 +np.float32,0x3b2a0bea,0x3b2a0bdd,2 +np.float32,0xbf41d857,0xbf32eb50,2 +np.float32,0xbf08841a,0xbf02c18f,2 +np.float32,0x7ec86f14,0x42b0f4d0,2 +np.float32,0xbf7d15d1,0xbf5f9090,2 +np.float32,0xbd080550,0xbd07feea,2 +np.float32,0xbf6f1bef,0xbf557d26,2 +np.float32,0xfebc282c,0xc2b0d473,2 +np.float32,0x3e68d2f5,0x3e66dd03,2 +np.float32,0x3f3ed8fe,0x3f3085d5,2 +np.float32,0xff2f78ae,0xc2b2139a,2 +np.float32,0xff647a70,0xc2b29ac1,2 +np.float32,0xfd0859a0,0xc2ac06e2,2 +np.float32,0x3ea578a8,0x3ea2b7e1,2 +np.float32,0x6c58c6,0x6c58c6,2 +np.float32,0xff23f26a,0xc2b1f0d2,2 +np.float32,0x800902a4,0x800902a4,2 +np.float32,0xfe8ba64e,0xc2b03bcd,2 +np.float32,0x3f091143,0x3f033e0f,2 +np.float32,0x8017c4bd,0x8017c4bd,2 +np.float32,0xbf708fd4,0xbf568c8c,2 +np.float32,0x3be1d8,0x3be1d8,2 +np.float32,0x80091f07,0x80091f07,2 +np.float32,0x68eabe,0x68eabe,2 +np.float32,0xfe9ab2c8,0xc2b07033,2 +np.float32,0x3eabe752,0x3ea8d3d7,2 +np.float32,0xbf7adcb2,0xbf5dfaf5,2 +np.float32,0x801ecc01,0x801ecc01,2 +np.float32,0xbf5570a9,0xbf424123,2 +np.float32,0x3e89eecd,0x3e88510e,2 +np.float32,0xfeb2feee,0xc2b0bae4,2 +np.float32,0xbeb25ec2,0xbeaef22b,2 +np.float32,0x201e49,0x201e49,2 +np.float32,0x800a35f6,0x800a35f6,2 +np.float32,0xbf02d449,0xbefb6e2a,2 +np.float32,0x3f062bea,0x3f00aef6,2 +np.float32,0x7f5219ff,0x42b26fd2,2 +np.float32,0xbd4561d0,0xbd454e47,2 +np.float32,0x3f6c4789,0x3f536a4b,2 +np.float32,0x7f58b06d,0x42b27fa1,2 +np.float32,0x7f132f39,0x42b1b999,2 +np.float32,0x3e05dcb4,0x3e057bd8,2 +np.float32,0x7f526045,0x42b2707d,2 +np.float32,0x3f6117d0,0x3f4b1adb,2 +np.float32,0xbf21f47d,0xbf18bb57,2 +np.float32,0x1a26d6,0x1a26d6,2 +np.float32,0x46b114,0x46b114,2 +np.float32,0x3eb24518,0x3eaed9ef,2 +np.float32,0xfe2139c8,0xc2af2278,2 +np.float32,0xbf7c36fb,0xbf5ef1f6,2 +np.float32,0x3f193834,0x3f114af7,2 +np.float32,0xff3ea650,0xc2b23e14,2 +np.float32,0xfeeb3bca,0xc2b146c7,2 +np.float32,0x7e8b8ca0,0x42b03b6f,2 +np.float32,0x3eed903d,0x3ee5c5d2,2 +np.float32,0xbdc73740,0xbdc6e72a,2 +np.float32,0x7e500307,0x42afa4ec,2 +np.float32,0xe003c,0xe003c,2 +np.float32,0x3e612bb4,0x3e5f64fd,2 +np.float32,0xfd81e248,0xc2ad50e6,2 +np.float32,0x766a4f,0x766a4f,2 +np.float32,0x3e8708c9,0x3e858414,2 +np.float32,0xbf206c58,0xbf176f7f,2 +np.float32,0x7e93aeb0,0x42b0586f,2 +np.float32,0xfd9d36b8,0xc2adb2ad,2 +np.float32,0xff1f4e0e,0xc2b1e21d,2 +np.float32,0x3f22bd5a,0x3f1964f8,2 +np.float32,0x7f6a517a,0x42b2a7ad,2 +np.float32,0xff6ca773,0xc2b2acc1,2 +np.float32,0x7f6bf453,0x42b2ab3d,2 +np.float32,0x3edfdd64,0x3ed9489f,2 +np.float32,0xbeafc5ba,0xbeac7daa,2 +np.float32,0x7d862039,0x42ad615b,2 +np.float32,0xbe9d2002,0xbe9ac1fc,2 +np.float32,0xbdcc54c0,0xbdcbfe5b,2 +np.float32,0xbf1bc0aa,0xbf13762a,2 +np.float32,0xbf4679ce,0xbf36984b,2 +np.float32,0x3ef45696,0x3eebe713,2 +np.float32,0xff6eb999,0xc2b2b137,2 +np.float32,0xbe4b2e4c,0xbe49dee8,2 +np.float32,0x3f498951,0x3f3901b7,2 +np.float32,0xbe9692f4,0xbe947be1,2 +np.float32,0xbf44ce26,0xbf3545c8,2 +np.float32,0x805787a8,0x805787a8,2 +np.float32,0xbf342650,0xbf27dc26,2 +np.float32,0x3edafbf0,0x3ed4cdd2,2 +np.float32,0x3f6fb858,0x3f55ef63,2 +np.float32,0xff227d0a,0xc2b1ec3f,2 +np.float32,0xfeb9a202,0xc2b0cd89,2 +np.float32,0x7f5b12c1,0x42b2853b,2 +np.float32,0x584578,0x584578,2 +np.float32,0x7ec0b76f,0x42b0e0b5,2 +np.float32,0x3f57f54b,0x3f442f10,2 +np.float32,0x7eef3620,0x42b14f5d,2 +np.float32,0x4525b5,0x4525b5,2 +np.float32,0x801bd407,0x801bd407,2 +np.float32,0xbed1f166,0xbecc7703,2 +np.float32,0x3f57e732,0x3f442449,2 +np.float32,0x80767cd5,0x80767cd5,2 +np.float32,0xbef1a7d2,0xbee97aa3,2 +np.float32,0x3dd5b1af,0x3dd54ee6,2 +np.float32,0x960c,0x960c,2 +np.float32,0x7c392d41,0x42a9ddd1,2 +np.float32,0x3f5c9a34,0x3f47b7c1,2 +np.float32,0x3f5cecee,0x3f47f667,2 +np.float32,0xbee482ce,0xbedd8899,2 +np.float32,0x8066ba7e,0x8066ba7e,2 +np.float32,0x7ed76127,0x42b119a2,2 +np.float32,0x805ca40b,0x805ca40b,2 +np.float32,0x7f5ed5d1,0x42b28df3,2 +np.float32,0xfe9e1b1e,0xc2b07b5b,2 +np.float32,0x3f0201a2,0x3ef9f6c4,2 +np.float32,0xbf2e6430,0xbf232039,2 +np.float32,0x80326b4d,0x80326b4d,2 +np.float32,0x3f11dc7c,0x3f0af06e,2 +np.float32,0xbe89c42e,0xbe8827e6,2 +np.float32,0x3f3c69f8,0x3f2e9133,2 +np.float32,0x806326a9,0x806326a9,2 +np.float32,0x3f1c5286,0x3f13f2b6,2 +np.float32,0xff5c0ead,0xc2b28786,2 +np.float32,0xff32b952,0xc2b21d01,2 +np.float32,0x7dd27c4e,0x42ae4815,2 +np.float32,0xbf7a6816,0xbf5da7a2,2 +np.float32,0xfeac72f8,0xc2b0a7d1,2 +np.float32,0x335ad7,0x335ad7,2 +np.float32,0xbe682da4,0xbe663bcc,2 +np.float32,0x3f2df244,0x3f22c208,2 +np.float32,0x80686e8e,0x80686e8e,2 +np.float32,0x7f50120f,0x42b26ad9,2 +np.float32,0x3dbc596a,0x3dbc15b3,2 +np.float32,0xbf4f2868,0xbf3d666d,2 +np.float32,0x80000001,0x80000001,2 +np.float32,0xff66c059,0xc2b29fd2,2 +np.float32,0xfe8bbcaa,0xc2b03c1f,2 +np.float32,0x3ece6a51,0x3ec93271,2 +np.float32,0x7f06cd26,0x42b18c9a,2 +np.float32,0x7e41e6dc,0x42af80f5,2 +np.float32,0x7d878334,0x42ad669f,2 +np.float32,0xfe8c5c4c,0xc2b03e67,2 +np.float32,0x337a05,0x337a05,2 +np.float32,0x3e63801d,0x3e61ab58,2 +np.float32,0x62c315,0x62c315,2 +np.float32,0x802aa888,0x802aa888,2 +np.float32,0x80038b43,0x80038b43,2 +np.float32,0xff5c1271,0xc2b2878f,2 +np.float32,0xff4184a5,0xc2b245b9,2 +np.float32,0x7ef58f4b,0x42b15cc6,2 +np.float32,0x7f42d8ac,0x42b2493a,2 +np.float32,0x806609f2,0x806609f2,2 +np.float32,0x801e763b,0x801e763b,2 +np.float32,0x7f2bc073,0x42b208a2,2 +np.float32,0x801d7d7f,0x801d7d7f,2 +np.float32,0x7d415dc1,0x42acb9c2,2 +np.float32,0xbf624ff9,0xbf4c0502,2 +np.float32,0xbf603afd,0xbf4a74e2,2 +np.float32,0x8007fe42,0x8007fe42,2 +np.float32,0x800456db,0x800456db,2 +np.float32,0x620871,0x620871,2 +np.float32,0x3e9c6c1e,0x3e9a15fa,2 +np.float32,0x4245d,0x4245d,2 +np.float32,0x8035bde9,0x8035bde9,2 +np.float32,0xbf597418,0xbf45533c,2 +np.float32,0x3c730f80,0x3c730d38,2 +np.float32,0x3f7cd8ed,0x3f5f6540,2 +np.float32,0x807e49c3,0x807e49c3,2 +np.float32,0x3d6584c0,0x3d65660c,2 +np.float32,0xff42a744,0xc2b248b8,2 +np.float32,0xfedc6f56,0xc2b12583,2 +np.float32,0x806263a4,0x806263a4,2 +np.float32,0x175a17,0x175a17,2 +np.float32,0x3f1e8537,0x3f15d208,2 +np.float32,0x4055b5,0x4055b5,2 +np.float32,0x438aa6,0x438aa6,2 +np.float32,0x8038507f,0x8038507f,2 +np.float32,0xbed75348,0xbed16f85,2 +np.float32,0x7f07b7d6,0x42b19012,2 +np.float32,0xfe8b9d30,0xc2b03bac,2 +np.float32,0x805c501c,0x805c501c,2 +np.float32,0x3ef22b1d,0x3ee9f159,2 +np.float32,0x802b6759,0x802b6759,2 +np.float32,0x45281a,0x45281a,2 +np.float32,0xbf7e9970,0xbf60a3cf,2 +np.float32,0xbf14d152,0xbf0d8062,2 +np.float32,0x3d9ff950,0x3d9fcfc8,2 +np.float32,0x7865d9,0x7865d9,2 +np.float32,0xbee67fa4,0xbedf58eb,2 +np.float32,0x7dc822d1,0x42ae2e44,2 +np.float32,0x3f3af0fe,0x3f2d612c,2 +np.float32,0xbefea106,0xbef5274e,2 +np.float32,0xbf758a3f,0xbf5a28c5,2 +np.float32,0xbf331bdd,0xbf270209,2 +np.float32,0x7f51c901,0x42b26f0d,2 +np.float32,0x3f67c33b,0x3f5014d8,2 +np.float32,0xbbc9d980,0xbbc9d92c,2 +np.float32,0xbc407540,0xbc40741e,2 +np.float32,0x7eed9a3c,0x42b14be9,2 +np.float32,0x1be0fe,0x1be0fe,2 +np.float32,0xbf6b4913,0xbf52af1f,2 +np.float32,0xbda8eba8,0xbda8bac6,2 +np.float32,0x8004bcea,0x8004bcea,2 +np.float32,0xff6f6afe,0xc2b2b2b3,2 +np.float32,0xbf205810,0xbf175e50,2 +np.float32,0x80651944,0x80651944,2 +np.float32,0xbec73016,0xbec27a3f,2 +np.float32,0x5701b9,0x5701b9,2 +np.float32,0xbf1062ce,0xbf09a7df,2 +np.float32,0x3e0306ae,0x3e02abd1,2 +np.float32,0x7bfc62,0x7bfc62,2 +np.float32,0xbf48dd3c,0xbf387a6b,2 +np.float32,0x8009573e,0x8009573e,2 +np.float32,0x660a2c,0x660a2c,2 +np.float32,0xff2280da,0xc2b1ec4b,2 +np.float32,0xbf7034fe,0xbf564a54,2 +np.float32,0xbeeb448e,0xbee3b045,2 +np.float32,0xff4e949c,0xc2b2672b,2 +np.float32,0xbf3c4486,0xbf2e7309,2 +np.float32,0x7eb086d8,0x42b0b3c8,2 +np.float32,0x7eac8aca,0x42b0a817,2 +np.float32,0xfd3d2d60,0xc2acae8b,2 +np.float32,0xbf363226,0xbf2987bd,2 +np.float32,0x7f02e524,0x42b17d8c,2 +np.float32,0x8049a148,0x8049a148,2 +np.float32,0x147202,0x147202,2 +np.float32,0x8031d3f6,0x8031d3f6,2 +np.float32,0xfe78bf68,0xc2b0007d,2 +np.float32,0x7ebd16d0,0x42b0d6fb,2 +np.float32,0xbdaed2e8,0xbdae9cbb,2 +np.float32,0x802833ae,0x802833ae,2 +np.float32,0x7f62adf6,0x42b296b5,2 +np.float32,0xff2841c0,0xc2b1fe1b,2 +np.float32,0xbeb2c47e,0xbeaf523b,2 +np.float32,0x7e42a36e,0x42af82e6,2 +np.float32,0x41ea29,0x41ea29,2 +np.float32,0xbcaaa800,0xbcaaa4d7,2 +np.float64,0x3fed71f27ebae3e5,0x3fea5c6095012ca6,2 +np.float64,0x224dc392449b9,0x224dc392449b9,2 +np.float64,0x3fdf897a7d3f12f5,0x3fde620339360992,2 +np.float64,0xbfe1f99a5123f334,0xbfe124a57cfaf556,2 +np.float64,0xbfd9725c3bb2e4b8,0xbfd8d1e3f75110c7,2 +np.float64,0x3fe38977546712ee,0x3fe27d9d37f4b91f,2 +np.float64,0xbfc36c29e526d854,0xbfc3594743ee45c4,2 +np.float64,0xbfe5cbec332b97d8,0xbfe4638802316849,2 +np.float64,0x2ff35efe5fe6d,0x2ff35efe5fe6d,2 +np.float64,0x7fd3f828e227f051,0x40862a7d4a40b1e0,2 +np.float64,0xffd06fc11620df82,0xc08628ee8f1bf6c8,2 +np.float64,0x3fe5321bf4aa6438,0x3fe3e3d9fa453199,2 +np.float64,0xffd07a323ca0f464,0xc08628f3a2930f8c,2 +np.float64,0x3fdf7abe7abef57c,0x3fde54cb193d49cb,2 +np.float64,0x40941f1881285,0x40941f1881285,2 +np.float64,0xffef18defc7e31bd,0xc0863393f2c9f061,2 +np.float64,0xbfe379f871e6f3f1,0xbfe270620cb68347,2 +np.float64,0xffec829848f90530,0xc08632e210edaa2b,2 +np.float64,0x80070c00574e1801,0x80070c00574e1801,2 +np.float64,0xffce7654b23ceca8,0xc086285291e89975,2 +np.float64,0x7fc9932daa33265a,0x408626ec6cc2b807,2 +np.float64,0x355ee98c6abde,0x355ee98c6abde,2 +np.float64,0x3fac54962c38a920,0x3fac50e40b6c19f2,2 +np.float64,0x800857984af0af31,0x800857984af0af31,2 +np.float64,0x7fea6a3d55f4d47a,0x40863245bf39f179,2 +np.float64,0x3fdb8fab33371f56,0x3fdac5ffc9e1c347,2 +np.float64,0x800a887a7bf510f5,0x800a887a7bf510f5,2 +np.float64,0xbfbdbda3c63b7b48,0xbfbdac9dd5a2d3e8,2 +np.float64,0xbfd4a2457b29448a,0xbfd44acb3b316d6d,2 +np.float64,0x7fd5329a502a6534,0x40862af789b528b5,2 +np.float64,0x3fd96a7bceb2d4f8,0x3fd8ca92104d6cd6,2 +np.float64,0x3fde6a0cd6bcd41a,0x3fdd5f4b85abf749,2 +np.float64,0xbfc7faaff32ff560,0xbfc7d7560b8c4a52,2 +np.float64,0x7fec381b2f787035,0x408632cd0e9c095c,2 +np.float64,0x1fc2eb543f85e,0x1fc2eb543f85e,2 +np.float64,0x7ac6000af58c1,0x7ac6000af58c1,2 +np.float64,0xffe060a87920c150,0xc0862e72c37d5a4e,2 +np.float64,0xbfb7d8c89e2fb190,0xbfb7cffd3c3f8e3a,2 +np.float64,0x3fd91033deb22068,0x3fd87695b067aa1e,2 +np.float64,0x3fec1aff01b835fe,0x3fe95d5cbd729af7,2 +np.float64,0x7fb97f69ec32fed3,0x4086215aaae5c697,2 +np.float64,0x7feaf1e4e5f5e3c9,0x4086326e6ca6a2bb,2 +np.float64,0x800537e44d0a6fc9,0x800537e44d0a6fc9,2 +np.float64,0x800b2a0d0d36541a,0x800b2a0d0d36541a,2 +np.float64,0x3fe2193846e43270,0x3fe140308550138e,2 +np.float64,0x5e2a0a32bc542,0x5e2a0a32bc542,2 +np.float64,0xffe5888b09eb1116,0xc08630a348783aa3,2 +np.float64,0xbfceb9b5033d736c,0xbfce701049c10435,2 +np.float64,0x7fe5d68589abad0a,0x408630c00ce63f23,2 +np.float64,0x8009b5457ff36a8b,0x8009b5457ff36a8b,2 +np.float64,0xbfb5518c2e2aa318,0xbfb54b42638ca718,2 +np.float64,0x3f9c58469838b080,0x3f9c575974fbcd7b,2 +np.float64,0x3fe8db4b4731b697,0x3fe6dc9231587966,2 +np.float64,0x8007d0f77f4fa1f0,0x8007d0f77f4fa1f0,2 +np.float64,0x7fe79eef542f3dde,0x40863160c673c67f,2 +np.float64,0xffbdc0b6163b8170,0xc0862296be4bf032,2 +np.float64,0x3fbb8d3312371a66,0x3fbb7fa76fb4cf8d,2 +np.float64,0xffd8a0eedbb141de,0xc0862c2ac6e512f0,2 +np.float64,0x7fee99d8d87d33b1,0x4086337301c4c8df,2 +np.float64,0xffe7479b552e8f36,0xc0863142fba0f0ec,2 +np.float64,0xffedf8ef4abbf1de,0xc08633488068fe69,2 +np.float64,0x895c4d9f12b8a,0x895c4d9f12b8a,2 +np.float64,0x29b4caf05369a,0x29b4caf05369a,2 +np.float64,0xbfefb90d657f721b,0xbfec01efa2425b35,2 +np.float64,0xde07c3bdbc0f9,0xde07c3bdbc0f9,2 +np.float64,0x7feae9fd02f5d3f9,0x4086326c1368ed5a,2 +np.float64,0x3feab792da756f26,0x3fe84f6e15338ed7,2 +np.float64,0xbfeff8ed72fff1db,0xbfec2f35da06daaf,2 +np.float64,0x8004b2c132896583,0x8004b2c132896583,2 +np.float64,0xbf9fcb00103f9600,0xbf9fc9b1751c569e,2 +np.float64,0x4182b72e83058,0x4182b72e83058,2 +np.float64,0x90820d812105,0x90820d812105,2 +np.float64,0xbfdec9a0ba3d9342,0xbfddb585df607ce1,2 +np.float64,0x7fdc0a69a03814d2,0x40862d347f201b63,2 +np.float64,0xbfef0708937e0e11,0xbfeb82d27f8ea97f,2 +np.float64,0xffda57e4ddb4afca,0xc0862cb49e2e0c4c,2 +np.float64,0xbfa30b9af4261730,0xbfa30a7b4a633060,2 +np.float64,0x7feb57fcc4b6aff9,0x4086328c83957a0b,2 +np.float64,0x7fe6759153eceb22,0x408630f980433963,2 +np.float64,0x7fdd3278c8ba64f1,0x40862d87445243e9,2 +np.float64,0xd3b8e6b9a771d,0xd3b8e6b9a771d,2 +np.float64,0x6267dc88c4cfc,0x6267dc88c4cfc,2 +np.float64,0x7fedd3cf00bba79d,0x4086333e91712ff5,2 +np.float64,0xffbe512ce03ca258,0xc08622bd39314cea,2 +np.float64,0xbfe71742ca6e2e86,0xbfe572ccbf2d010d,2 +np.float64,0x8002fb048c65f60a,0x8002fb048c65f60a,2 +np.float64,0x800d9d9ddf7b3b3c,0x800d9d9ddf7b3b3c,2 +np.float64,0xbfeaf6230df5ec46,0xbfe87f5d751ec3d5,2 +np.float64,0xbfe69973a42d32e8,0xbfe50c680f7002fe,2 +np.float64,0x3fe309cf87e613a0,0x3fe21048714ce1ac,2 +np.float64,0x800435d17a286ba4,0x800435d17a286ba4,2 +np.float64,0x7fefffffffffffff,0x408633ce8fb9f87e,2 +np.float64,0x3fe36ade1766d5bc,0x3fe26379fb285dde,2 +np.float64,0x3f98d8d94831b1c0,0x3f98d839885dc527,2 +np.float64,0xbfd08f7ae5211ef6,0xbfd0618ab5293e1e,2 +np.float64,0xbfcf630bd53ec618,0xbfcf14a0cd20704d,2 +np.float64,0xbfe58f0ca6eb1e1a,0xbfe4312225df8e28,2 +np.float64,0xffef4f6406be9ec7,0xc08633a1ed1d27e5,2 +np.float64,0x7fe10120b3e20240,0x40862ebfaf94e6e8,2 +np.float64,0xffe96c52fbb2d8a5,0xc08631f75d9a59a0,2 +np.float64,0xbfe448a333e89146,0xbfe31fee44c3ec43,2 +np.float64,0x80045ff4e788bfeb,0x80045ff4e788bfeb,2 +np.float64,0x7fefaa2f823f545e,0x408633b8fea29524,2 +np.float64,0xffea6b8bf234d717,0xc0863246248e5960,2 +np.float64,0xbfdb085d80b610bc,0xbfda498b15b43eec,2 +np.float64,0xbfd5e12da3abc25c,0xbfd57970e2b8aecc,2 +np.float64,0x3fcc84928a390925,0x3fcc497c417a89f3,2 +np.float64,0xbfdcb713bf396e28,0xbfdbd46c5e731fd9,2 +np.float64,0xffdf50c0453ea180,0xc0862e16b5562f25,2 +np.float64,0x800342c2f7268587,0x800342c2f7268587,2 +np.float64,0x7feb8b6d743716da,0x4086329b8248de2c,2 +np.float64,0x800a9b18b4953632,0x800a9b18b4953632,2 +np.float64,0xffedaf0d12fb5e19,0xc0863334af82de1a,2 +np.float64,0x800aebda4ab5d7b5,0x800aebda4ab5d7b5,2 +np.float64,0xbfa9f5848433eb10,0xbfa9f2ac7ac065d4,2 +np.float64,0x3fea375928f46eb2,0x3fe7ec9f10eeac7d,2 +np.float64,0x3fd6c213fead8428,0x3fd64dcc1eff5f1b,2 +np.float64,0xbfa0476f44208ee0,0xbfa046bb986007ac,2 +np.float64,0x6c8e18aed91c4,0x6c8e18aed91c4,2 +np.float64,0x8000000000000001,0x8000000000000001,2 +np.float64,0x7fea86b5ba350d6a,0x4086324e59f13027,2 +np.float64,0x2316c3b0462d9,0x2316c3b0462d9,2 +np.float64,0x3fec4e3281389c65,0x3fe983c5c9d65940,2 +np.float64,0x3fbb87c47f772,0x3fbb87c47f772,2 +np.float64,0x8004af00fdc95e03,0x8004af00fdc95e03,2 +np.float64,0xbfd316db9ba62db8,0xbfd2d12765b9d155,2 +np.float64,0x3fec1a7a99f834f6,0x3fe95cf941889b3d,2 +np.float64,0x3feff7e1477fefc3,0x3fec2e782392d4b9,2 +np.float64,0xbfc683ea042d07d4,0xbfc66698cfa5026e,2 +np.float64,0x3fdbc8aaa9b79154,0x3fdafa50e6fc3fff,2 +np.float64,0xfb3b630ff676d,0xfb3b630ff676d,2 +np.float64,0x7fe715ef8eae2bde,0x40863131d794b41f,2 +np.float64,0x7fefa06c11bf40d7,0x408633b686c7996a,2 +np.float64,0x80002a40f5205483,0x80002a40f5205483,2 +np.float64,0x7fe95f3c74b2be78,0x408631f33e37bf76,2 +np.float64,0x3fb2977b32252ef0,0x3fb2934eaf5a4be8,2 +np.float64,0x3fc0f3dbc821e7b8,0x3fc0e745288c84c3,2 +np.float64,0x3fda98da56b531b5,0x3fd9e2b19447dacc,2 +np.float64,0x3f95b9d5202b73aa,0x3f95b96a53282949,2 +np.float64,0x3fdc1ace7738359d,0x3fdb4597d31df7ff,2 +np.float64,0xffeac5bb2e358b76,0xc0863261452ab66c,2 +np.float64,0xbfefb1b78f7f636f,0xbfebfcb9be100ced,2 +np.float64,0xf5c9e191eb93c,0xf5c9e191eb93c,2 +np.float64,0x3fe83a977630752f,0x3fe65d0df90ff6ef,2 +np.float64,0x3fc317515d262ea0,0x3fc3056072b719f0,2 +np.float64,0x7fe2dcfab225b9f4,0x40862f94257c28a2,2 +np.float64,0xca2b115794562,0xca2b115794562,2 +np.float64,0x3fd495301aa92a60,0x3fd43e57108761d5,2 +np.float64,0x800ccc4293199885,0x800ccc4293199885,2 +np.float64,0xc8d3173d91a63,0xc8d3173d91a63,2 +np.float64,0xbf2541bb7e4a8,0xbf2541bb7e4a8,2 +np.float64,0xbfe9a330df334662,0xbfe779816573f5be,2 +np.float64,0xffd5e4c8252bc990,0xc0862b39b3ca5d72,2 +np.float64,0x3fe90f3a53721e75,0x3fe70585ae09531d,2 +np.float64,0xbfe2b5ddc7a56bbc,0xbfe1c7fa91a675ed,2 +np.float64,0xbf981a0360303400,0xbf9819719345073a,2 +np.float64,0x19174b0e322ea,0x19174b0e322ea,2 +np.float64,0xbfd2f71a1725ee34,0xbfd2b2b6f7cd10b1,2 +np.float64,0x80056e83236add07,0x80056e83236add07,2 +np.float64,0x7fe4bc41d9697883,0x40863055f20ce0cb,2 +np.float64,0xffe76e06c46edc0d,0xc086315024b25559,2 +np.float64,0x3fe3c4f0f96789e2,0x3fe2b04b584609bf,2 +np.float64,0x3fe6cfc533ed9f8a,0x3fe538b4d784d5ee,2 +np.float64,0x7fd234a640a4694c,0x408629bfead4f0b2,2 +np.float64,0x3fdbc49c9ab78939,0x3fdaf698a83d08e2,2 +np.float64,0x3fe4c5336ee98a66,0x3fe388c6ddb60e0a,2 +np.float64,0xf4b9497be9729,0xf4b9497be9729,2 +np.float64,0x3fb312be12262580,0x3fb30e3c847c1d16,2 +np.float64,0x3fe9554218f2aa84,0x3fe73c8b311c7a98,2 +np.float64,0xff899816a0333040,0xc08610bfb2cd8559,2 +np.float64,0x8006008ad52c0116,0x8006008ad52c0116,2 +np.float64,0x3fd7d47be4afa8f8,0x3fd74fa71ec17fd0,2 +np.float64,0x8010000000000000,0x8010000000000000,2 +np.float64,0xdf2a9943be553,0xdf2a9943be553,2 +np.float64,0xbfeb86bf1eb70d7e,0xbfe8ed797580ba5c,2 +np.float64,0x800e2c0c28bc5818,0x800e2c0c28bc5818,2 +np.float64,0xbfe2be65d4657ccc,0xbfe1cf578dec2323,2 +np.float64,0xbfedea3a5afbd475,0xbfeab490bf05e585,2 +np.float64,0xbfe04b1583a0962b,0xbfdf523dfd7be25c,2 +np.float64,0x75929bb4eb254,0x75929bb4eb254,2 +np.float64,0x3fd7b4968caf692d,0x3fd731c0938ff97c,2 +np.float64,0x60bd8fd2c17b3,0x60bd8fd2c17b3,2 +np.float64,0xbfdaf15e70b5e2bc,0xbfda345a95ce18fe,2 +np.float64,0x7fdd7c35c2baf86b,0x40862d9b5f40c6b2,2 +np.float64,0x7feeb4d2ab7d69a4,0x4086337a0c0dffaf,2 +np.float64,0xffe65b5a1decb6b4,0xc08630f024420efb,2 +np.float64,0x7feb272b30764e55,0x4086327e2e553aa2,2 +np.float64,0x3fd27513e8a4ea28,0x3fd235ea49670f6a,2 +np.float64,0x3fe6541a6aeca834,0x3fe4d3a5b69fd1b6,2 +np.float64,0xbfe0c6ca0f618d94,0xbfe017058259efdb,2 +np.float64,0x7fc1bf07b7237e0e,0x4086240000fa5a52,2 +np.float64,0x7fe96af9c0f2d5f3,0x408631f6f0f4faa2,2 +np.float64,0x3fe0728be7a0e518,0x3fdf9881a5869de9,2 +np.float64,0xffe8ea4441b1d488,0xc08631ce0685ae7e,2 +np.float64,0xffd0b973f02172e8,0xc08629121e7fdf85,2 +np.float64,0xffe37b907a26f720,0xc0862fd6529401a0,2 +np.float64,0x3fe0ee826461dd05,0x3fe03a2a424a1b40,2 +np.float64,0xbfe8073c92300e79,0xbfe6340cbd179ac1,2 +np.float64,0x800768383f8ed071,0x800768383f8ed071,2 +np.float64,0x8002e467c7c5c8d0,0x8002e467c7c5c8d0,2 +np.float64,0xbfd8d53ea5b1aa7e,0xbfd83fa7243289d7,2 +np.float64,0xffebefce2bb7df9c,0xc08632b874f4f8dc,2 +np.float64,0xffe3be9eb9277d3d,0xc0862ff1ac70ad0b,2 +np.float64,0xffe2f8a82e65f150,0xc0862f9fd9e77d86,2 +np.float64,0xbfa01d151c203a30,0xbfa01c66dc13a70a,2 +np.float64,0x800877062d30ee0d,0x800877062d30ee0d,2 +np.float64,0xaade16a755bc3,0xaade16a755bc3,2 +np.float64,0xbfeb1abc70363579,0xbfe89b52c3b003aa,2 +np.float64,0x80097d0b2ad2fa17,0x80097d0b2ad2fa17,2 +np.float64,0x8001499907429333,0x8001499907429333,2 +np.float64,0x3fe8db2aaf71b656,0x3fe6dc7873f1b235,2 +np.float64,0x5cfeadc4b9fd6,0x5cfeadc4b9fd6,2 +np.float64,0xff3f77d1fe7ef,0xff3f77d1fe7ef,2 +np.float64,0xffeecd56f9bd9aad,0xc08633806cb1163d,2 +np.float64,0xbf96f3ca582de7a0,0xbf96f34c6b8e1c85,2 +np.float64,0x7ed6b44afdad7,0x7ed6b44afdad7,2 +np.float64,0x80071808da4e3012,0x80071808da4e3012,2 +np.float64,0x3feb8aee2bf715dc,0x3fe8f0a55516615c,2 +np.float64,0x800038f62e2071ed,0x800038f62e2071ed,2 +np.float64,0x3fb13f9af2227f30,0x3fb13c456ced8e08,2 +np.float64,0xffd584d1812b09a4,0xc0862b165558ec0c,2 +np.float64,0x800b20c30fb64186,0x800b20c30fb64186,2 +np.float64,0x80024f9646e49f2d,0x80024f9646e49f2d,2 +np.float64,0xffefffffffffffff,0xc08633ce8fb9f87e,2 +np.float64,0x3fdddbcb5bbbb797,0x3fdcde981111f650,2 +np.float64,0xffed14077f3a280e,0xc086330a795ad634,2 +np.float64,0x800fec2da7ffd85b,0x800fec2da7ffd85b,2 +np.float64,0x3fe8205ffc7040c0,0x3fe6482318d217f9,2 +np.float64,0x3013e5226027d,0x3013e5226027d,2 +np.float64,0xffe4e5aad469cb55,0xc0863065dc2fb4e3,2 +np.float64,0x5cb0f7b2b9620,0x5cb0f7b2b9620,2 +np.float64,0xbfeb4537d2768a70,0xbfe8bbb2c1d3bff9,2 +np.float64,0xbfd859e297b0b3c6,0xbfd7cc807948bf9d,2 +np.float64,0x71f00b8ce3e02,0x71f00b8ce3e02,2 +np.float64,0xf5c1b875eb837,0xf5c1b875eb837,2 +np.float64,0xa0f35c8141e8,0xa0f35c8141e8,2 +np.float64,0xffe24860b42490c1,0xc0862f54222f616e,2 +np.float64,0xffcd9ae8583b35d0,0xc08628181e643a42,2 +np.float64,0x7fe9b710c7736e21,0x4086320ec033490f,2 +np.float64,0x3fd2b9ca1d257394,0x3fd277e631f0c0b3,2 +np.float64,0x23559bfc46ab4,0x23559bfc46ab4,2 +np.float64,0x8002adf75e455bef,0x8002adf75e455bef,2 +np.float64,0xbfefa4d75cbf49af,0xbfebf392e51d6a1a,2 +np.float64,0xffcfef263e3fde4c,0xc08628b336adb611,2 +np.float64,0x80061acaa8ec3596,0x80061acaa8ec3596,2 +np.float64,0x7fc1b33be0236677,0x408623faaddcc17e,2 +np.float64,0x7fe3a84083675080,0x40862fe8972e41e1,2 +np.float64,0xbfe756c1276ead82,0xbfe5a6318b061e1b,2 +np.float64,0xbfae4b71b43c96e0,0xbfae46ed0b6203a4,2 +np.float64,0x800421c6d0a8438e,0x800421c6d0a8438e,2 +np.float64,0x8009ad56fe335aae,0x8009ad56fe335aae,2 +np.float64,0xbfe71afc976e35f9,0xbfe575d21f3d7193,2 +np.float64,0x7fec0bbe4c38177c,0x408632c0710f1d8a,2 +np.float64,0x750e1daeea1c4,0x750e1daeea1c4,2 +np.float64,0x800501d4240a03a9,0x800501d4240a03a9,2 +np.float64,0x800794955cef292b,0x800794955cef292b,2 +np.float64,0x3fdf8a87f5bf1510,0x3fde62f4f00cfa19,2 +np.float64,0xbfebebdbc7f7d7b8,0xbfe939e51ba1340c,2 +np.float64,0xbfe3a16217a742c4,0xbfe292039dd08a71,2 +np.float64,0x3fed6cd04c3ad9a1,0x3fea58995973f74b,2 +np.float64,0xffcad8787335b0f0,0xc086274fbb35dd37,2 +np.float64,0x3fcb178e3d362f1c,0x3fcae4c9f3e6dddc,2 +np.float64,0xbfcadc669435b8cc,0xbfcaaae7cf075420,2 +np.float64,0x7fe0e3906321c720,0x40862eb1bacc5c43,2 +np.float64,0xff8ad5edb035abc0,0xc0861120b6404d0b,2 +np.float64,0x3fe175a21562eb44,0x3fe0b13120a46549,2 +np.float64,0xbfeb4c4a5f769895,0xbfe8c1147f1c9d8f,2 +np.float64,0x7fca22f4e63445e9,0x40862718e9b4094e,2 +np.float64,0x3fe4269d0c684d3a,0x3fe3032aa2015c53,2 +np.float64,0x3fef551c09beaa38,0x3febbabe03f49c83,2 +np.float64,0xffd843df9fb087c0,0xc0862c0c52d5e5d9,2 +np.float64,0x7fc497e2ca292fc5,0x40862530bbd9fcc7,2 +np.float64,0x3fee02919efc0523,0x3feac655588a4acd,2 +np.float64,0x7fed1e52c0fa3ca5,0x4086330d4ddd8a2c,2 +np.float64,0xba04d4ef7409b,0xba04d4ef7409b,2 +np.float64,0x3fee22d0937c45a2,0x3feaddd4ca66b447,2 +np.float64,0xffeb2558cf764ab1,0xc086327da4e84053,2 +np.float64,0xbfe103d987e207b3,0xbfe04d04818ad1ff,2 +np.float64,0x3f9fd7fed03faffe,0x3f9fd6ae9a45be84,2 +np.float64,0x800a53ec4c34a7d9,0x800a53ec4c34a7d9,2 +np.float64,0xbfe2feb17f65fd63,0xbfe206b9d33a78a2,2 +np.float64,0x989bdd613139,0x989bdd613139,2 +np.float64,0xbfdd0ad3fb3a15a8,0xbfdc20c32a530741,2 +np.float64,0xbfc4222163284444,0xbfc40d1c612784b5,2 +np.float64,0xc30cf5c78619f,0xc30cf5c78619f,2 +np.float64,0x3fe913bd6732277b,0x3fe70912f76bad71,2 +np.float64,0x98f175f531e2f,0x98f175f531e2f,2 +np.float64,0x3fed8c1f717b183f,0x3fea6f9fb3af3423,2 +np.float64,0x7fee46b085bc8d60,0x4086335d269eb7e9,2 +np.float64,0x8007480f564e901f,0x8007480f564e901f,2 +np.float64,0xc9b96e179372e,0xc9b96e179372e,2 +np.float64,0x3fe44deac4289bd6,0x3fe32463a74a69e7,2 +np.float64,0x80021d6c5c243ad9,0x80021d6c5c243ad9,2 +np.float64,0xbfebc805a6f7900b,0xbfe91edcf65a1c19,2 +np.float64,0x80044748adc88e92,0x80044748adc88e92,2 +np.float64,0x4007ee44800fe,0x4007ee44800fe,2 +np.float64,0xbfe24307a4648610,0xbfe1648ad5c47b6f,2 +np.float64,0xbfee6d3a93fcda75,0xbfeb13e1a3196e78,2 +np.float64,0x3fe49a287f293451,0x3fe364a11b9f0068,2 +np.float64,0x80052b37ceaa5670,0x80052b37ceaa5670,2 +np.float64,0xbfd42be893a857d2,0xbfd3da05dac7c286,2 +np.float64,0xffb4bbe4ac2977c8,0xc0861fb31bda6956,2 +np.float64,0xbfc732a4142e6548,0xbfc7129a4eafa399,2 +np.float64,0x7fd0696791a0d2ce,0x408628eb7756cb9c,2 +np.float64,0x3fe46c8f8d68d91f,0x3fe33e3df16187c1,2 +np.float64,0x3fe3a28f1ce7451e,0x3fe293043238d08c,2 +np.float64,0xffedc4eb723b89d6,0xc086333a92258c15,2 +np.float64,0x8000d15b4c41a2b7,0x8000d15b4c41a2b7,2 +np.float64,0xffeb73450236e689,0xc08632947b0148ab,2 +np.float64,0xffe68cf4722d19e8,0xc0863101d08d77bd,2 +np.float64,0x800c70eb4698e1d7,0x800c70eb4698e1d7,2 +np.float64,0xffa94387ff529,0xffa94387ff529,2 +np.float64,0x7fe3835d996706ba,0x40862fd985ff8e7d,2 +np.float64,0x3fe55e476feabc8e,0x3fe408a15594ec52,2 +np.float64,0xffc69672222d2ce4,0xc08625ee0c4c0f6a,2 +np.float64,0xbf9d900b883b2020,0xbf9d8efe811d36df,2 +np.float64,0xbfdb9b9755b7372e,0xbfdad0f2aa2cb110,2 +np.float64,0xffeade6073b5bcc0,0xc08632689f17a25d,2 +np.float64,0xffd1d6a6baa3ad4e,0xc086299630a93a7b,2 +np.float64,0x7fd05ba25620b744,0x408628e4be1ef845,2 +np.float64,0xbfc7d422d52fa844,0xbfc7b170a61531bf,2 +np.float64,0x3fd5196797aa32d0,0x3fd4bc0f0e7d8e1d,2 +np.float64,0x617594a4c2eb3,0x617594a4c2eb3,2 +np.float64,0x7fd779bc4caef378,0x40862bc89271b882,2 +np.float64,0xffd2fb262ba5f64c,0xc0862a15561e9524,2 +np.float64,0x72fd661ae5fad,0x72fd661ae5fad,2 +np.float64,0x3fecf441f339e884,0x3fe9ff880d584f64,2 +np.float64,0x7fc3a8968827512c,0x408624d198b05c61,2 +np.float64,0x3fe7a25c56ef44b9,0x3fe5e32509a7c32d,2 +np.float64,0x7fd117d514222fa9,0x4086293ec640d5f2,2 +np.float64,0x3fe37dfe5ee6fbfc,0x3fe273d1bcaa1ef0,2 +np.float64,0xbfed4cd19d7a99a3,0xbfea41064cba4c8b,2 +np.float64,0x8003ff12aaa7fe26,0x8003ff12aaa7fe26,2 +np.float64,0x3fcbc3d1193787a2,0x3fcb8d39e3e88264,2 +np.float64,0xe9ba1a91d3744,0xe9ba1a91d3744,2 +np.float64,0x8002ab71998556e4,0x8002ab71998556e4,2 +np.float64,0x800110057922200c,0x800110057922200c,2 +np.float64,0xbfe3b7af19a76f5e,0xbfe2a502fc0a2882,2 +np.float64,0x7fd9de9d5e33bd3a,0x40862c8f73cccabf,2 +np.float64,0xbfba0f0a86341e18,0xbfba0392f44c2771,2 +np.float64,0x8000000000000000,0x8000000000000000,2 +np.float64,0x7fe5d162e96ba2c5,0x408630be2b15e01b,2 +np.float64,0x800b7f0eac76fe1e,0x800b7f0eac76fe1e,2 +np.float64,0xff98bed150317da0,0xc086160633164f5f,2 +np.float64,0x3fef91fd70ff23fb,0x3febe629709d0ae7,2 +np.float64,0x7fe5bea7f16b7d4f,0x408630b749f445e9,2 +np.float64,0xbfe3dc428467b885,0xbfe2c41ea93fab07,2 +np.float64,0xbfeba1fbfcf743f8,0xbfe9021b52851bb9,2 +np.float64,0x7fd2fb2108a5f641,0x40862a1553f45830,2 +np.float64,0x7feb8199a4370332,0x40863298a7169dad,2 +np.float64,0x800f97ff8d7f2fff,0x800f97ff8d7f2fff,2 +np.float64,0x3fd5e20b6b2bc417,0x3fd57a42bd1c0993,2 +np.float64,0x8006b4072dad680f,0x8006b4072dad680f,2 +np.float64,0x605dccf2c0bba,0x605dccf2c0bba,2 +np.float64,0x3fc705ed142e0bda,0x3fc6e69971d86f73,2 +np.float64,0xffd2ba1aad257436,0xc08629f9bc918f8b,2 +np.float64,0x8002954e23c52a9d,0x8002954e23c52a9d,2 +np.float64,0xbfecc65da7798cbb,0xbfe9dd745be18562,2 +np.float64,0x7fc66110482cc220,0x408625db0db57ef8,2 +np.float64,0x3fcd09446d3a1289,0x3fcccaf2dd0a41ea,2 +np.float64,0x3febe7095437ce13,0x3fe93642d1e73b2a,2 +np.float64,0x8004773c7da8ee7a,0x8004773c7da8ee7a,2 +np.float64,0x8001833241230665,0x8001833241230665,2 +np.float64,0x3fe6a262db6d44c6,0x3fe513b3dab5adce,2 +np.float64,0xe6282cc1cc506,0xe6282cc1cc506,2 +np.float64,0x800b9d8553973b0b,0x800b9d8553973b0b,2 +np.float64,0x3fdfbe0c7b3f7c19,0x3fde912375d867a8,2 +np.float64,0x7fd5ac11ebab5823,0x40862b24dfc6d08e,2 +np.float64,0x800e4b7cb1fc96f9,0x800e4b7cb1fc96f9,2 +np.float64,0x3fe14706da628e0e,0x3fe0883aec2a917a,2 +np.float64,0x7fc963f97532c7f2,0x408626dd9b0cafe1,2 +np.float64,0xbfe9c250b5b384a2,0xbfe791c5eabcb05d,2 +np.float64,0x3fe8d16e6c71a2dd,0x3fe6d4c7a33a0bf4,2 +np.float64,0x3fe474ae4628e95d,0x3fe34515c93f4733,2 +np.float64,0x3fbf3257ee3e64b0,0x3fbf1eb530e126ea,2 +np.float64,0x8005f089b3abe114,0x8005f089b3abe114,2 +np.float64,0x3fece07bccf9c0f8,0x3fe9f0dc228124d5,2 +np.float64,0xbfc52521632a4a44,0xbfc50ccebdf59c2c,2 +np.float64,0x7fdf53beb13ea77c,0x40862e177918195e,2 +np.float64,0x8003d9f6ad07b3ee,0x8003d9f6ad07b3ee,2 +np.float64,0xffeacf96bbb59f2d,0xc086326436b38b1a,2 +np.float64,0xdccaea29b995e,0xdccaea29b995e,2 +np.float64,0x5948d21eb291b,0x5948d21eb291b,2 +np.float64,0x10000000000000,0x10000000000000,2 +np.float64,0x7fef6d2c543eda58,0x408633a98593cdf5,2 +np.float64,0x7feda454f47b48a9,0x40863331cb6dc9f7,2 +np.float64,0x3fdd377cecba6ef8,0x3fdc4968f74a9c83,2 +np.float64,0x800644096d4c8814,0x800644096d4c8814,2 +np.float64,0xbfe33ca15ae67942,0xbfe23be5de832bd8,2 +np.float64,0xffce9582bd3d2b04,0xc086285abdf9bf9d,2 +np.float64,0x3fe6621e86acc43d,0x3fe4df231bfa93e1,2 +np.float64,0xee7d19e9dcfa3,0xee7d19e9dcfa3,2 +np.float64,0x800be5997277cb33,0x800be5997277cb33,2 +np.float64,0x82069041040e,0x82069041040e,2 +np.float64,0x800d6efdc19addfc,0x800d6efdc19addfc,2 +np.float64,0x7fb27770ee24eee1,0x40861ec5ed91b839,2 +np.float64,0x3fd506064caa0c0d,0x3fd4a9a66353fefd,2 +np.float64,0xbfeca9b36bf95367,0xbfe9c81f03ba37b8,2 +np.float64,0xffeab1b7bab5636f,0xc086325b47f61f2b,2 +np.float64,0xffc99f5b2e333eb8,0xc08626f03b08b412,2 +np.float64,0x3fbf1a71bc3e34e3,0x3fbf06fbcaa5de58,2 +np.float64,0x3fe75015736ea02b,0x3fe5a0cd8d763d8d,2 +np.float64,0xffe6a7442fad4e88,0xc086310b20addba4,2 +np.float64,0x3fe5d62ff86bac60,0x3fe46c033195bf28,2 +np.float64,0x7fd0b1f0362163df,0x4086290e857dc1be,2 +np.float64,0xbe0353737c06b,0xbe0353737c06b,2 +np.float64,0x7fec912d8739225a,0x408632e627704635,2 +np.float64,0xded8ba2fbdb18,0xded8ba2fbdb18,2 +np.float64,0x7fec0b53fdf816a7,0x408632c052bc1bd2,2 +np.float64,0x7fe9640d12b2c819,0x408631f4c2ba54d8,2 +np.float64,0x800be714eeb7ce2a,0x800be714eeb7ce2a,2 +np.float64,0xbfcf444a793e8894,0xbfcef6c126b54853,2 +np.float64,0xffeb20cf1bf6419e,0xc086327c4e6ffe80,2 +np.float64,0xc07de22180fd,0xc07de22180fd,2 +np.float64,0xffed129d387a253a,0xc086330a15ad0adb,2 +np.float64,0x3fd9e94fedb3d2a0,0x3fd94049924706a8,2 +np.float64,0x7fe6ba488c2d7490,0x40863111d51e7861,2 +np.float64,0xbfebbdf25db77be5,0xbfe91740ad7ba521,2 +np.float64,0x7fbc6c3c4838d878,0x40862239160cb613,2 +np.float64,0xbfefa82ecebf505e,0xbfebf5f31957dffd,2 +np.float64,0x800bebeb7ad7d7d7,0x800bebeb7ad7d7d7,2 +np.float64,0x7fecccc6f8f9998d,0x408632f6c6da8aac,2 +np.float64,0xcbe4926197ca,0xcbe4926197ca,2 +np.float64,0x2c5d9fd858bb5,0x2c5d9fd858bb5,2 +np.float64,0xbfe9fb021073f604,0xbfe7bddc61f1151a,2 +np.float64,0xbfebb18572f7630b,0xbfe90ddc5002313f,2 +np.float64,0x13bb0d3227763,0x13bb0d3227763,2 +np.float64,0x3feefa5e5cbdf4bd,0x3feb79b9e8ce16bf,2 +np.float64,0x3fc97f086132fe10,0x3fc9549fc8e15ecb,2 +np.float64,0xffe70887c06e110f,0xc086312d30fd31cf,2 +np.float64,0xa00c113540182,0xa00c113540182,2 +np.float64,0x800950984772a131,0x800950984772a131,2 +np.float64,0x1,0x1,2 +np.float64,0x3fd83b4026b07680,0x3fd7afdc659d9a34,2 +np.float64,0xbfe32348fbe64692,0xbfe226292a706a1a,2 +np.float64,0x800b894dcc77129c,0x800b894dcc77129c,2 +np.float64,0xeb2ca419d6595,0xeb2ca419d6595,2 +np.float64,0xbff0000000000000,0xbfec34366179d427,2 +np.float64,0x3feb269e99f64d3d,0x3fe8a4634b927a21,2 +np.float64,0xbfe83149d7706294,0xbfe655a2b245254e,2 +np.float64,0xbfe6eef3ca6ddde8,0xbfe5521310e24d16,2 +np.float64,0x3fea89a4b7b51349,0x3fe82c1fc69edcec,2 +np.float64,0x800f2a8bf17e5518,0x800f2a8bf17e5518,2 +np.float64,0x800f71fac29ee3f6,0x800f71fac29ee3f6,2 +np.float64,0xe7cb31f1cf966,0xe7cb31f1cf966,2 +np.float64,0x3b0f8752761f2,0x3b0f8752761f2,2 +np.float64,0x3fea27dea3744fbd,0x3fe7e0a4705476b2,2 +np.float64,0xbfa97c019c32f800,0xbfa97950c1257b92,2 +np.float64,0xffeff13647ffe26c,0xc08633cadc7105ed,2 +np.float64,0x3feee162353dc2c4,0x3feb67c2da0fbce8,2 +np.float64,0x80088c0807911810,0x80088c0807911810,2 +np.float64,0x3fe936ab1db26d56,0x3fe72489bc69719d,2 +np.float64,0xa2f84bd545f0a,0xa2f84bd545f0a,2 +np.float64,0xbfed445ed27a88be,0xbfea3acac0aaf482,2 +np.float64,0x800faf3e69df5e7d,0x800faf3e69df5e7d,2 +np.float64,0x3fc145a330228b46,0x3fc13853f11b1c90,2 +np.float64,0xbfe25ec5abe4bd8c,0xbfe17c9e9b486f07,2 +np.float64,0x3fe119b160e23363,0x3fe0604b10178966,2 +np.float64,0x7fe0cbf2836197e4,0x40862ea6831e5f4a,2 +np.float64,0x3fe75dd3b4eebba8,0x3fe5abe80fd628fb,2 +np.float64,0x3f7c391000387220,0x3f7c39015d8f3a36,2 +np.float64,0x899d9cad133b4,0x899d9cad133b4,2 +np.float64,0x3fe5f0e34febe1c6,0x3fe4820cefe138fc,2 +np.float64,0x7fe060dfdba0c1bf,0x40862e72de8afcd0,2 +np.float64,0xbfae42f7103c85f0,0xbfae3e7630819c60,2 +np.float64,0x35f1f2c06be5,0x35f1f2c06be5,2 +np.float64,0xffc5194d362a329c,0xc086256266c8b7ad,2 +np.float64,0xbfda034f1b34069e,0xbfd95860a44c43ad,2 +np.float64,0x32bcebca6579e,0x32bcebca6579e,2 +np.float64,0xbfd1751ebca2ea3e,0xbfd13f79f45bf75c,2 +np.float64,0x3fee4fa1e5bc9f44,0x3feafe69e0d6c1c7,2 +np.float64,0x7f9c03cd5038079a,0x4086170459172900,2 +np.float64,0x7fc5fb6d6d2bf6da,0x408625b6651cfc73,2 +np.float64,0x7ff8000000000000,0x7ff8000000000000,2 +np.float64,0xffd1a8162ca3502c,0xc0862981333931ad,2 +np.float64,0x7fc415c198282b82,0x408624fd8c155d1b,2 +np.float64,0xffda37fbe7b46ff8,0xc0862caae7865c43,2 +np.float64,0xbfef4312257e8624,0xbfebadd89f3ee31c,2 +np.float64,0xbfec45e1fd788bc4,0xbfe97d8b14db6274,2 +np.float64,0xbfe6fdcfd26dfba0,0xbfe55e25b770d00a,2 +np.float64,0x7feb66d424f6cda7,0x40863290d9ff7ea2,2 +np.float64,0x8b08a29916115,0x8b08a29916115,2 +np.float64,0xffe12ca25c625944,0xc0862ed40d769f72,2 +np.float64,0x7ff4000000000000,0x7ffc000000000000,2 +np.float64,0x804925e100925,0x804925e100925,2 +np.float64,0xcebf3e019d9,0xcebf3e019d9,2 +np.float64,0xbfd5d75d4aabaeba,0xbfd57027671dedf7,2 +np.float64,0x800b829ecd37053e,0x800b829ecd37053e,2 +np.float64,0x800b1205daf6240c,0x800b1205daf6240c,2 +np.float64,0x3fdf7e9889befd31,0x3fde583fdff406c3,2 +np.float64,0x7ff0000000000000,0x7ff0000000000000,2 +np.float64,0x3fdc09760d3812ec,0x3fdb35b55c8090c6,2 +np.float64,0x800c4d99e4f89b34,0x800c4d99e4f89b34,2 +np.float64,0xffbaa6772e354cf0,0xc08621b535badb2f,2 +np.float64,0xbfc91188fd322310,0xbfc8e933b5d25ea7,2 +np.float64,0xffc1b947f4237290,0xc08623fd69164251,2 +np.float64,0x3fc6ab3b252d5678,0x3fc68d50bbac106d,2 +np.float64,0xffac8eb968391d70,0xc0861cb734833355,2 +np.float64,0xffe29a35c365346b,0xc0862f77a1aed6d8,2 +np.float64,0x3fde14b9543c2973,0x3fdd122697779015,2 +np.float64,0xbf10f5400021e000,0xbf10f53fffef1383,2 +np.float64,0xffe0831aa3e10635,0xc0862e838553d0ca,2 +np.float64,0x3fccbadbcf3975b8,0x3fcc7e768d0154ec,2 +np.float64,0x3fe092ef66e125df,0x3fdfd212a7116c9b,2 +np.float64,0xbfd727f039ae4fe0,0xbfd6adad040b2334,2 +np.float64,0xbfe4223b93a84477,0xbfe2ff7587364db4,2 +np.float64,0x3f4e5c3a003cb874,0x3f4e5c39b75c70f7,2 +np.float64,0x800e76b1a87ced63,0x800e76b1a87ced63,2 +np.float64,0x3fed2b7368fa56e7,0x3fea2863b9131b8c,2 +np.float64,0xffadb76ec43b6ee0,0xc0861d08ae79f20c,2 +np.float64,0x800b6a0cd1f6d41a,0x800b6a0cd1f6d41a,2 +np.float64,0xffee6aa943fcd552,0xc0863366a24250d5,2 +np.float64,0xbfe68cbc4e6d1978,0xbfe502040591aa5b,2 +np.float64,0xff859a38002b3480,0xc0860f64726235cc,2 +np.float64,0x3474d13e68e9b,0x3474d13e68e9b,2 +np.float64,0xffc11d49f6223a94,0xc08623b5c2df9712,2 +np.float64,0x800d82d019bb05a0,0x800d82d019bb05a0,2 +np.float64,0xbfe2af0192255e03,0xbfe1c20e38106388,2 +np.float64,0x3fe97d13c032fa28,0x3fe75bba11a65f86,2 +np.float64,0x7fcd457e133a8afb,0x40862800e80f5863,2 +np.float64,0x9d7254cf3ae4b,0x9d7254cf3ae4b,2 +np.float64,0x8003047675a608ee,0x8003047675a608ee,2 +np.float64,0x3fead6cd7d75ad9a,0x3fe8676138e5ff93,2 +np.float64,0x3fea6ee3b0f4ddc7,0x3fe817838a2bcbe3,2 +np.float64,0x3feed0edea7da1dc,0x3feb5bea3cb12fe2,2 +np.float64,0x88003fe510008,0x88003fe510008,2 +np.float64,0x3fe64cadc56c995c,0x3fe4cd8ead87fc79,2 +np.float64,0xaae30c5955c62,0xaae30c5955c62,2 +np.float64,0x7fc8c97cae3192f8,0x408626ac579f4fc5,2 +np.float64,0xbfc2bc0e8b25781c,0xbfc2ab188fdab7dc,2 +np.float64,0xc8f8e5e791f1d,0xc8f8e5e791f1d,2 +np.float64,0x3fecfaa5d6f9f54c,0x3fea0444dabe5a15,2 +np.float64,0xbfeb93740ff726e8,0xbfe8f71a9ab13baf,2 +np.float64,0xffd951236c32a246,0xc0862c633a4661eb,2 +np.float64,0x3fddbc5fcd3b78c0,0x3fdcc21c1a0a9246,2 +np.float64,0xbfd242443da48488,0xbfd20512d91f7924,2 +np.float64,0x2a3689b2546d2,0x2a3689b2546d2,2 +np.float64,0xffe24c67382498ce,0xc0862f55e4ea6283,2 +np.float64,0x800cbfce22197f9c,0x800cbfce22197f9c,2 +np.float64,0x8002269428044d29,0x8002269428044d29,2 +np.float64,0x7fd44babbd289756,0x40862a9e79b51c3b,2 +np.float64,0x3feea056a27d40ad,0x3feb38dcddb682f0,2 +np.float64,0xffeca8174b39502e,0xc08632ec8f88a5b2,2 +np.float64,0x7fbe0853a03c10a6,0x408622a9e8d53a9e,2 +np.float64,0xbfa9704b2432e090,0xbfa96d9dfc8c0cc2,2 +np.float64,0x800bda28fab7b452,0x800bda28fab7b452,2 +np.float64,0xbfb0ffa2f621ff48,0xbfb0fc71f405e82a,2 +np.float64,0xbfe66c04216cd808,0xbfe4e73ea3b58cf6,2 +np.float64,0x3fe336ea5d266dd5,0x3fe236ffcf078c62,2 +np.float64,0xbfe7729ae6aee536,0xbfe5bcad4b8ac62d,2 +np.float64,0x558cfc96ab1a0,0x558cfc96ab1a0,2 +np.float64,0xbfe7d792aaefaf26,0xbfe60de1b8f0279d,2 +np.float64,0xffd19ef6bda33dee,0xc086297d0ffee3c7,2 +np.float64,0x666b3ab4ccd68,0x666b3ab4ccd68,2 +np.float64,0xffa3d89e3c27b140,0xc08619cdeb2c1e49,2 +np.float64,0xbfb1728f7f62f,0xbfb1728f7f62f,2 +np.float64,0x3fc76319f32ec634,0x3fc74247bd005e20,2 +np.float64,0xbfbf1caee23e3960,0xbfbf0934c13d70e2,2 +np.float64,0x7fe79626f32f2c4d,0x4086315dcc68a5cb,2 +np.float64,0xffee78c4603cf188,0xc086336a572c05c2,2 +np.float64,0x3fce546eda3ca8de,0x3fce0d8d737fd31d,2 +np.float64,0xa223644d4446d,0xa223644d4446d,2 +np.float64,0x3fecea878b79d510,0x3fe9f850d50973f6,2 +np.float64,0x3fc20e0ea1241c1d,0x3fc1fedda87c5e75,2 +np.float64,0xffd1c5a99ca38b54,0xc086298e8e94cd47,2 +np.float64,0x7feb2c299d765852,0x4086327fa6db2808,2 +np.float64,0xcaf9d09595f3a,0xcaf9d09595f3a,2 +np.float64,0xbfe293bf21e5277e,0xbfe1aa7f6ac274ef,2 +np.float64,0xbfbaa3c8ce354790,0xbfba97891df19c01,2 +np.float64,0x3faf5784543eaf09,0x3faf5283acc7d71d,2 +np.float64,0x7fc014f8f62029f1,0x40862336531c662d,2 +np.float64,0xbfe0d9ac2d61b358,0xbfe027bce36699ca,2 +np.float64,0x8003e112ff27c227,0x8003e112ff27c227,2 +np.float64,0xffec0d4151381a82,0xc08632c0df718dd0,2 +np.float64,0x7fa2156fb0242ade,0x4086190f7587d708,2 +np.float64,0xd698358dad307,0xd698358dad307,2 +np.float64,0xbfed8d1b0efb1a36,0xbfea70588ef9ba18,2 +np.float64,0xbfd2cae6a92595ce,0xbfd28851e2185dee,2 +np.float64,0xffe7a36764ef46ce,0xc086316249c9287a,2 +np.float64,0xbfdb8ad8e5b715b2,0xbfdac19213c14315,2 +np.float64,0x3b5dba6076bc,0x3b5dba6076bc,2 +np.float64,0x800e6e8347bcdd07,0x800e6e8347bcdd07,2 +np.float64,0x800bea9f3fb7d53f,0x800bea9f3fb7d53f,2 +np.float64,0x7fb6d0e5fc2da1cb,0x4086207714c4ab85,2 +np.float64,0x0,0x0,2 +np.float64,0xbfe2aa1e1465543c,0xbfe1bdd550ef2966,2 +np.float64,0x7fd3f6a47fa7ed48,0x40862a7caea33055,2 +np.float64,0x800094e292c129c6,0x800094e292c129c6,2 +np.float64,0x800e1500ecbc2a02,0x800e1500ecbc2a02,2 +np.float64,0xbfd8ff6f97b1fee0,0xbfd866f84346ecdc,2 +np.float64,0x681457d0d028c,0x681457d0d028c,2 +np.float64,0x3feed0b5987da16b,0x3feb5bc1ab424984,2 +np.float64,0x3fdbcb34cdb79668,0x3fdafca540f32c06,2 +np.float64,0xbfdc9eacdcb93d5a,0xbfdbbe274aa8aeb0,2 +np.float64,0xffe6e35d526dc6ba,0xc08631203df38ed2,2 +np.float64,0x3fcac1cc65358398,0x3fca90de41889613,2 +np.float64,0xbfebf07a55b7e0f5,0xbfe93d6007db0c67,2 +np.float64,0xbfd7a7b1e7af4f64,0xbfd725a9081c22cb,2 +np.float64,0x800232bd7de4657c,0x800232bd7de4657c,2 +np.float64,0x7fb1dae43c23b5c7,0x40861e80f5c0a64e,2 +np.float64,0x8013ded70027c,0x8013ded70027c,2 +np.float64,0x7fc4373a59286e74,0x4086250ad60575d0,2 +np.float64,0xbfe9980fd6733020,0xbfe770d1352d0ed3,2 +np.float64,0x8008a66b8dd14cd7,0x8008a66b8dd14cd7,2 +np.float64,0xbfaebc67f83d78d0,0xbfaeb7b015848478,2 +np.float64,0xffd0c52762218a4e,0xc0862917b564afc6,2 +np.float64,0xbfd503860aaa070c,0xbfd4a74618441561,2 +np.float64,0x5bdacabcb7b5a,0x5bdacabcb7b5a,2 +np.float64,0xf3623cffe6c48,0xf3623cffe6c48,2 +np.float64,0x7fe16c6c7ea2d8d8,0x40862ef18d90201f,2 +np.float64,0x3ff0000000000000,0x3fec34366179d427,2 +np.float64,0x7fe19cbc84233978,0x40862f079dcbc169,2 +np.float64,0x3fcfd3d6933fa7ad,0x3fcf822187907f6b,2 +np.float64,0x8007d65d672facbc,0x8007d65d672facbc,2 +np.float64,0xffca6115aa34c22c,0xc086272bd7728750,2 +np.float64,0xbfe77ab1556ef562,0xbfe5c332fb55b66e,2 +np.float64,0x8001ed797c23daf4,0x8001ed797c23daf4,2 +np.float64,0x7fdd3d16cb3a7a2d,0x40862d8a2c869281,2 +np.float64,0x75f36beaebe6e,0x75f36beaebe6e,2 +np.float64,0xffda3c2798b47850,0xc0862cac2d3435df,2 +np.float64,0xbfa37cc3c426f980,0xbfa37b8f9d3ec4b7,2 +np.float64,0x80030ea8bd061d52,0x80030ea8bd061d52,2 +np.float64,0xffe41f7617683eec,0xc08630188a3e135e,2 +np.float64,0x800e40590dfc80b2,0x800e40590dfc80b2,2 +np.float64,0x3fea950d80f52a1c,0x3fe834e74481e66f,2 +np.float64,0xffec95e39a792bc6,0xc08632e779150084,2 +np.float64,0xbfd54310ecaa8622,0xbfd4e39c4d767002,2 +np.float64,0xffd40c9971a81932,0xc0862a85764eb2f4,2 +np.float64,0xb0a2230761445,0xb0a2230761445,2 +np.float64,0x80092973661252e7,0x80092973661252e7,2 +np.float64,0x7fb13b030a227605,0x40861e380aeb5549,2 +np.float64,0x3fbd5d8db23abb1b,0x3fbd4d2a0b94af36,2 +np.float64,0xbfd6cb8567ad970a,0xbfd656b19ab8fa61,2 +np.float64,0xbfe7c0fd346f81fa,0xbfe5fbc28807c794,2 +np.float64,0xffd586579eab0cb0,0xc0862b16e65c0754,2 +np.float64,0x8000e52da461ca5c,0x8000e52da461ca5c,2 +np.float64,0x3fc69d17112d3a2e,0x3fc67f63fe1fea1c,2 +np.float64,0x3fd36ba892a6d750,0x3fd3225be1fa87af,2 +np.float64,0x7fe2850598e50a0a,0x40862f6e7fcd6c1a,2 +np.float64,0x80074a4dacce949c,0x80074a4dacce949c,2 +np.float64,0x3fe25eea4d64bdd5,0x3fe17cbe5fefbd4e,2 +np.float64,0xbfe250c08be4a181,0xbfe17074c520e5de,2 +np.float64,0x8000f5665481eacd,0x8000f5665481eacd,2 +np.float64,0x7fdb3172f83662e5,0x40862cf5a46764f1,2 +np.float64,0x7fd8ed82d631db05,0x40862c4380658afa,2 +np.float64,0xffec5163feb8a2c7,0xc08632d4366aab06,2 +np.float64,0x800ff14ac6ffe296,0x800ff14ac6ffe296,2 +np.float64,0xbfc7cc7aea2f98f4,0xbfc7a9e9cb38f023,2 +np.float64,0xbfd50cdfc32a19c0,0xbfd4b0282b452fb2,2 +np.float64,0xbfec256d75b84adb,0xbfe965328c1860b2,2 +np.float64,0xffe860c4cdb0c189,0xc08631a164b7059a,2 +np.float64,0xbfe23de164247bc3,0xbfe16011bffa4651,2 +np.float64,0xcc96b39d992d7,0xcc96b39d992d7,2 +np.float64,0xbfec43acf938875a,0xbfe97be3a13b50c3,2 +np.float64,0xc4f587bb89eb1,0xc4f587bb89eb1,2 +np.float64,0xbfcd971d9a3b2e3c,0xbfcd5537ad15dab4,2 +np.float64,0xffcaf00d8035e01c,0xc0862756bf2cdf8f,2 +np.float64,0x8008c26f93f184e0,0x8008c26f93f184e0,2 +np.float64,0xfff0000000000000,0xfff0000000000000,2 +np.float64,0xbfd13552c3a26aa6,0xbfd101e5e252eb7b,2 +np.float64,0x7fe497235e292e46,0x4086304792fb423a,2 +np.float64,0x7fd6dc0192adb802,0x40862b921a5e935d,2 +np.float64,0xf16d49a1e2da9,0xf16d49a1e2da9,2 +np.float64,0xffef6b1b71bed636,0xc08633a8feed0178,2 +np.float64,0x7fe15ec62f62bd8b,0x40862eeb46b193dc,2 +np.float64,0x3fef4369ec7e86d4,0x3febae1768be52cc,2 +np.float64,0x4f84e8e89f09e,0x4f84e8e89f09e,2 +np.float64,0xbfe19e71ade33ce4,0xbfe0d4fad05e0ebc,2 +np.float64,0xbfe7e1df1defc3be,0xbfe616233e15b3d0,2 +np.float64,0x7fe9349afdb26935,0x408631e5c1c5c6cd,2 +np.float64,0xff90c35ac82186c0,0xc08612e896a06467,2 +np.float64,0xbfe88bf8807117f1,0xbfe69dc786464422,2 +np.float64,0x3feaf9ff6475f3fe,0x3fe8825132410d18,2 +np.float64,0x9ff487a33fe91,0x9ff487a33fe91,2 +np.float64,0x7fedb30159bb6602,0x40863335c0419322,2 +np.float64,0x800bddf6ed77bbee,0x800bddf6ed77bbee,2 +np.float64,0x3fd919df133233be,0x3fd87f963b9584ce,2 +np.float64,0x7fd64da3b52c9b46,0x40862b5fa9dd3b6d,2 +np.float64,0xbfce288db43c511c,0xbfcde2d953407ae8,2 +np.float64,0x3fe88bc72771178e,0x3fe69da05e9e9b4e,2 +np.float64,0x800feafe259fd5fc,0x800feafe259fd5fc,2 +np.float64,0x3febbbff4a7777ff,0x3fe915c78f6a280f,2 +np.float64,0xbfefbde4417f7bc9,0xbfec055f4fb2cd21,2 +np.float64,0xf13ca103e2794,0xf13ca103e2794,2 +np.float64,0x3fe6423884ec8471,0x3fe4c4f97eaa876a,2 +np.float64,0x800ca01c8cb94039,0x800ca01c8cb94039,2 +np.float64,0x3fbc5073f638a0e0,0x3fbc41c163ac0001,2 +np.float64,0xbfda0d83cfb41b08,0xbfd961d4cacc82cf,2 +np.float64,0x800f37b8f17e6f72,0x800f37b8f17e6f72,2 +np.float64,0x7fe0b08cd7216119,0x40862e996becb771,2 +np.float64,0xffd4222a40a84454,0xc0862a8e0c984917,2 +np.float64,0x7feb3df98ff67bf2,0x40863284e3a86ee6,2 +np.float64,0x8001d5d291e3aba6,0x8001d5d291e3aba6,2 +np.float64,0xbfd3c21629a7842c,0xbfd3750095a5894a,2 +np.float64,0xbfd069eb48a0d3d6,0xbfd03d2b1c2ae9db,2 +np.float64,0xffeb1be2973637c4,0xc086327ada954662,2 +np.float64,0x3fc659f97e2cb3f3,0x3fc63d497a451f10,2 +np.float64,0xbfeb624bc776c498,0xbfe8d1cf7c0626ca,2 +np.float64,0xffeedf26e23dbe4d,0xc08633850baab425,2 +np.float64,0xffe70da48a6e1b48,0xc086312ef75d5036,2 +np.float64,0x2b4f4830569ea,0x2b4f4830569ea,2 +np.float64,0xffe82e7fcfb05cff,0xc0863190d4771f75,2 +np.float64,0x3fcc2c1fd5385840,0x3fcbf3211ddc5123,2 +np.float64,0x7fe22ced5a6459da,0x40862f481629ee6a,2 +np.float64,0x7fe13d2895e27a50,0x40862edbbc411899,2 +np.float64,0x3fd54c4280aa9884,0x3fd4ec55a946c5d7,2 +np.float64,0xffd75b8e01aeb71c,0xc0862bbe42d76e5e,2 +np.float64,0x7f1d5376fe3ab,0x7f1d5376fe3ab,2 +np.float64,0x3fe6ec6c902dd8d9,0x3fe55004f35192bd,2 +np.float64,0x5634504aac68b,0x5634504aac68b,2 +np.float64,0x3feedb0d83bdb61b,0x3feb633467467ce6,2 +np.float64,0x3fddb1c0dcbb6380,0x3fdcb87a02daf1fa,2 +np.float64,0xbfa832da443065b0,0xbfa8308c70257209,2 +np.float64,0x87a9836b0f531,0x87a9836b0f531,2 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arctan.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arctan.csv new file mode 100644 index 0000000..1e92073 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arctan.csv @@ -0,0 +1,1429 @@ +dtype,input,output,ulperrortol +np.float32,0x3f338252,0x3f1c8d9c,3 +np.float32,0x7e569df2,0x3fc90fdb,3 +np.float32,0xbf347e25,0xbf1d361f,3 +np.float32,0xbf0a654e,0xbefdbfd2,3 +np.float32,0x8070968e,0x8070968e,3 +np.float32,0x803cfb27,0x803cfb27,3 +np.float32,0x8024362e,0x8024362e,3 +np.float32,0xfd55dca0,0xbfc90fdb,3 +np.float32,0x592b82,0x592b82,3 +np.float32,0x802eb8e1,0x802eb8e1,3 +np.float32,0xbc5fef40,0xbc5febae,3 +np.float32,0x3f1f6ce8,0x3f0e967c,3 +np.float32,0x20bedc,0x20bedc,3 +np.float32,0xbf058860,0xbef629c7,3 +np.float32,0x311504,0x311504,3 +np.float32,0xbd23f560,0xbd23defa,3 +np.float32,0x800ff4e8,0x800ff4e8,3 +np.float32,0x355009,0x355009,3 +np.float32,0x3f7be42e,0x3f46fdb3,3 +np.float32,0xbf225f7c,0xbf10b364,3 +np.float32,0x8074fa9e,0x8074fa9e,3 +np.float32,0xbea4b418,0xbe9f59ce,3 +np.float32,0xbe909c14,0xbe8cf045,3 +np.float32,0x80026bee,0x80026bee,3 +np.float32,0x3d789c20,0x3d784e25,3 +np.float32,0x7f56a4ba,0x3fc90fdb,3 +np.float32,0xbf70d141,0xbf413db7,3 +np.float32,0xbf2c4886,0xbf17a505,3 +np.float32,0x7e2993bf,0x3fc90fdb,3 +np.float32,0xbe2c8a30,0xbe2aef28,3 +np.float32,0x803f82d9,0x803f82d9,3 +np.float32,0x3f062fbc,0x3ef730a1,3 +np.float32,0x3f349ee0,0x3f1d4bfa,3 +np.float32,0x3eccfb69,0x3ec2f9e8,3 +np.float32,0x7e8a85dd,0x3fc90fdb,3 +np.float32,0x25331,0x25331,3 +np.float32,0x464f19,0x464f19,3 +np.float32,0x8035c818,0x8035c818,3 +np.float32,0x802e5799,0x802e5799,3 +np.float32,0x64e1c0,0x64e1c0,3 +np.float32,0x701cc2,0x701cc2,3 +np.float32,0x265c57,0x265c57,3 +np.float32,0x807a053f,0x807a053f,3 +np.float32,0x3bd2c412,0x3bd2c354,3 +np.float32,0xff28f1c8,0xbfc90fdb,3 +np.float32,0x7f08f08b,0x3fc90fdb,3 +np.float32,0x800c50e4,0x800c50e4,3 +np.float32,0x369674,0x369674,3 +np.float32,0xbf5b7db3,0xbf3571bf,3 +np.float32,0x7edcf5e2,0x3fc90fdb,3 +np.float32,0x800e5d4b,0x800e5d4b,3 +np.float32,0x80722554,0x80722554,3 +np.float32,0x693f33,0x693f33,3 +np.float32,0x800844e4,0x800844e4,3 +np.float32,0xbf111b82,0xbf0402ec,3 +np.float32,0x7df9c9ac,0x3fc90fdb,3 +np.float32,0xbf6619a6,0xbf3b6f57,3 +np.float32,0x8002fafe,0x8002fafe,3 +np.float32,0xfe1e67f8,0xbfc90fdb,3 +np.float32,0x3f7f4bf8,0x3f48b5b7,3 +np.float32,0x7f017b20,0x3fc90fdb,3 +np.float32,0x2d9b07,0x2d9b07,3 +np.float32,0x803aa174,0x803aa174,3 +np.float32,0x7d530336,0x3fc90fdb,3 +np.float32,0x80662195,0x80662195,3 +np.float32,0xfd5ebcf0,0xbfc90fdb,3 +np.float32,0xbe7b8dcc,0xbe76ab59,3 +np.float32,0x7f2bacaf,0x3fc90fdb,3 +np.float32,0x3f194fc4,0x3f0a229e,3 +np.float32,0x7ee21cdf,0x3fc90fdb,3 +np.float32,0x3f5a17fc,0x3f34a307,3 +np.float32,0x7f100c58,0x3fc90fdb,3 +np.float32,0x7e9128f5,0x3fc90fdb,3 +np.float32,0xbf2107c6,0xbf0fbdb4,3 +np.float32,0xbd29c800,0xbd29af22,3 +np.float32,0xbf5af499,0xbf3522a6,3 +np.float32,0x801bde44,0x801bde44,3 +np.float32,0xfeb4761a,0xbfc90fdb,3 +np.float32,0x3d88aa1b,0x3d887650,3 +np.float32,0x7eba5e0b,0x3fc90fdb,3 +np.float32,0x803906bd,0x803906bd,3 +np.float32,0x80101512,0x80101512,3 +np.float32,0x7e898f83,0x3fc90fdb,3 +np.float32,0x806406d3,0x806406d3,3 +np.float32,0x7ed20fc0,0x3fc90fdb,3 +np.float32,0x20827d,0x20827d,3 +np.float32,0x3f361359,0x3f1e43fe,3 +np.float32,0xfe4ef8d8,0xbfc90fdb,3 +np.float32,0x805e7d2d,0x805e7d2d,3 +np.float32,0xbe4316b0,0xbe40c745,3 +np.float32,0xbf0a1c06,0xbefd4e5a,3 +np.float32,0x3e202860,0x3e1edee1,3 +np.float32,0xbeb32a2c,0xbeac5899,3 +np.float32,0xfe528838,0xbfc90fdb,3 +np.float32,0x2f73e2,0x2f73e2,3 +np.float32,0xbe16e010,0xbe15cc27,3 +np.float32,0x3f50d6c5,0x3f2f2d75,3 +np.float32,0xbe88a6a2,0xbe8589c7,3 +np.float32,0x3ee36060,0x3ed5fb36,3 +np.float32,0x6c978b,0x6c978b,3 +np.float32,0x7f1b735f,0x3fc90fdb,3 +np.float32,0x3dad8256,0x3dad1885,3 +np.float32,0x807f5094,0x807f5094,3 +np.float32,0x65c358,0x65c358,3 +np.float32,0xff315ce4,0xbfc90fdb,3 +np.float32,0x7411a6,0x7411a6,3 +np.float32,0x80757b04,0x80757b04,3 +np.float32,0x3eec73a6,0x3edd82f4,3 +np.float32,0xfe9f69e8,0xbfc90fdb,3 +np.float32,0x801f4fa8,0x801f4fa8,3 +np.float32,0xbf6f2fae,0xbf405f79,3 +np.float32,0xfea206b6,0xbfc90fdb,3 +np.float32,0x3f257301,0x3f12e1ee,3 +np.float32,0x7ea6a506,0x3fc90fdb,3 +np.float32,0x80800000,0x80800000,3 +np.float32,0xff735c2d,0xbfc90fdb,3 +np.float32,0x80197f95,0x80197f95,3 +np.float32,0x7f4a354f,0x3fc90fdb,3 +np.float32,0xff320c00,0xbfc90fdb,3 +np.float32,0x3f2659de,0x3f138484,3 +np.float32,0xbe5451bc,0xbe515a52,3 +np.float32,0x3f6e228c,0x3f3fcf7c,3 +np.float32,0x66855a,0x66855a,3 +np.float32,0x8034b3a3,0x8034b3a3,3 +np.float32,0xbe21a2fc,0xbe20505d,3 +np.float32,0x7f79e2dc,0x3fc90fdb,3 +np.float32,0xbe19a8e0,0xbe18858c,3 +np.float32,0x10802c,0x10802c,3 +np.float32,0xfeee579e,0xbfc90fdb,3 +np.float32,0x3f3292c8,0x3f1becc0,3 +np.float32,0xbf595a71,0xbf34350a,3 +np.float32,0xbf7c3373,0xbf4725f4,3 +np.float32,0xbdd30938,0xbdd24b36,3 +np.float32,0x153a17,0x153a17,3 +np.float32,0x807282a0,0x807282a0,3 +np.float32,0xfe817322,0xbfc90fdb,3 +np.float32,0x3f1b3628,0x3f0b8771,3 +np.float32,0x41be8f,0x41be8f,3 +np.float32,0x7f4a8343,0x3fc90fdb,3 +np.float32,0x3dc4ea2b,0x3dc44fae,3 +np.float32,0x802aac25,0x802aac25,3 +np.float32,0xbf20e1d7,0xbf0fa284,3 +np.float32,0xfd91a1b0,0xbfc90fdb,3 +np.float32,0x3f0d5476,0x3f012265,3 +np.float32,0x21c916,0x21c916,3 +np.float32,0x807df399,0x807df399,3 +np.float32,0x7e207b4c,0x3fc90fdb,3 +np.float32,0x8055f8ff,0x8055f8ff,3 +np.float32,0x7edf3b01,0x3fc90fdb,3 +np.float32,0x803a8df3,0x803a8df3,3 +np.float32,0x3ce3b002,0x3ce3a101,3 +np.float32,0x3f62dd54,0x3f39a248,3 +np.float32,0xff33ae10,0xbfc90fdb,3 +np.float32,0x7e3de69d,0x3fc90fdb,3 +np.float32,0x8024581e,0x8024581e,3 +np.float32,0xbf4ac99d,0xbf2b807a,3 +np.float32,0x3f157d19,0x3f074d8c,3 +np.float32,0xfed383f4,0xbfc90fdb,3 +np.float32,0xbf5a39fa,0xbf34b6b8,3 +np.float32,0x800d757d,0x800d757d,3 +np.float32,0x807d606b,0x807d606b,3 +np.float32,0x3e828f89,0x3e7fac2d,3 +np.float32,0x7a6604,0x7a6604,3 +np.float32,0x7dc7e72b,0x3fc90fdb,3 +np.float32,0x80144146,0x80144146,3 +np.float32,0x7c2eed69,0x3fc90fdb,3 +np.float32,0x3f5b4d8c,0x3f3555fc,3 +np.float32,0xfd8b7778,0xbfc90fdb,3 +np.float32,0xfc9d9140,0xbfc90fdb,3 +np.float32,0xbea265d4,0xbe9d4232,3 +np.float32,0xbe9344d0,0xbe8f65da,3 +np.float32,0x3f71f19a,0x3f41d65b,3 +np.float32,0x804a3f59,0x804a3f59,3 +np.float32,0x3e596290,0x3e563476,3 +np.float32,0x3e994ee4,0x3e94f546,3 +np.float32,0xbc103e00,0xbc103d0c,3 +np.float32,0xbf1cd896,0xbf0cb889,3 +np.float32,0x7f52b080,0x3fc90fdb,3 +np.float32,0xff584452,0xbfc90fdb,3 +np.float32,0x58b26b,0x58b26b,3 +np.float32,0x3f23cd4c,0x3f11b799,3 +np.float32,0x707d7,0x707d7,3 +np.float32,0xff732cff,0xbfc90fdb,3 +np.float32,0x3e41c2a6,0x3e3f7f0f,3 +np.float32,0xbf7058e9,0xbf40fdcf,3 +np.float32,0x7dca9857,0x3fc90fdb,3 +np.float32,0x7f0eb44b,0x3fc90fdb,3 +np.float32,0x8000405c,0x8000405c,3 +np.float32,0x4916ab,0x4916ab,3 +np.float32,0x4811a8,0x4811a8,3 +np.float32,0x3d69bf,0x3d69bf,3 +np.float32,0xfeadcf1e,0xbfc90fdb,3 +np.float32,0x3e08dbbf,0x3e080d58,3 +np.float32,0xff031f88,0xbfc90fdb,3 +np.float32,0xbe09cab8,0xbe08f818,3 +np.float32,0x21d7cd,0x21d7cd,3 +np.float32,0x3f23230d,0x3f113ea9,3 +np.float32,0x7e8a48d4,0x3fc90fdb,3 +np.float32,0x413869,0x413869,3 +np.float32,0x7e832990,0x3fc90fdb,3 +np.float32,0x800f5c09,0x800f5c09,3 +np.float32,0x7f5893b6,0x3fc90fdb,3 +np.float32,0x7f06b5b1,0x3fc90fdb,3 +np.float32,0xbe1cbee8,0xbe1b89d6,3 +np.float32,0xbf279f14,0xbf1468a8,3 +np.float32,0xfea86060,0xbfc90fdb,3 +np.float32,0x3e828174,0x3e7f91bb,3 +np.float32,0xff682c82,0xbfc90fdb,3 +np.float32,0x4e20f3,0x4e20f3,3 +np.float32,0x7f17d7e9,0x3fc90fdb,3 +np.float32,0x80671f92,0x80671f92,3 +np.float32,0x7f6dd100,0x3fc90fdb,3 +np.float32,0x3f219a4d,0x3f102695,3 +np.float32,0x803c9808,0x803c9808,3 +np.float32,0x3c432ada,0x3c43287d,3 +np.float32,0xbd3db450,0xbd3d91a2,3 +np.float32,0x3baac135,0x3baac0d0,3 +np.float32,0xff7fffe1,0xbfc90fdb,3 +np.float32,0xfe38a6f4,0xbfc90fdb,3 +np.float32,0x3dfb0a04,0x3df9cb04,3 +np.float32,0x800b05c2,0x800b05c2,3 +np.float32,0x644163,0x644163,3 +np.float32,0xff03a025,0xbfc90fdb,3 +np.float32,0x3f7d506c,0x3f47b641,3 +np.float32,0xff0e682a,0xbfc90fdb,3 +np.float32,0x3e09b7b0,0x3e08e567,3 +np.float32,0x7f72a216,0x3fc90fdb,3 +np.float32,0x7f800000,0x3fc90fdb,3 +np.float32,0x8050a281,0x8050a281,3 +np.float32,0x7edafa2f,0x3fc90fdb,3 +np.float32,0x3f4e0df6,0x3f2d7f2f,3 +np.float32,0xbf6728e0,0xbf3c050f,3 +np.float32,0x3e904ce4,0x3e8ca6eb,3 +np.float32,0x0,0x0,3 +np.float32,0xfd215070,0xbfc90fdb,3 +np.float32,0x7e406b15,0x3fc90fdb,3 +np.float32,0xbf2803c9,0xbf14af18,3 +np.float32,0x5950c8,0x5950c8,3 +np.float32,0xbeddcec8,0xbed14faa,3 +np.float32,0xbec6457e,0xbebd2aa5,3 +np.float32,0xbf42843c,0xbf2656db,3 +np.float32,0x3ee9cba8,0x3edb5163,3 +np.float32,0xbe30c954,0xbe2f0f90,3 +np.float32,0xbeee6b44,0xbedf216f,3 +np.float32,0xbe35d818,0xbe33f7cd,3 +np.float32,0xbe47c630,0xbe454bc6,3 +np.float32,0x801b146f,0x801b146f,3 +np.float32,0x7f6788da,0x3fc90fdb,3 +np.float32,0x3eaef088,0x3ea8927d,3 +np.float32,0x3eb5983e,0x3eae81fc,3 +np.float32,0x40b51d,0x40b51d,3 +np.float32,0xfebddd04,0xbfc90fdb,3 +np.float32,0x3e591aee,0x3e55efea,3 +np.float32,0xbe2b6b48,0xbe29d81f,3 +np.float32,0xff4a8826,0xbfc90fdb,3 +np.float32,0x3e791df0,0x3e745eac,3 +np.float32,0x7c8f681f,0x3fc90fdb,3 +np.float32,0xfe7a15c4,0xbfc90fdb,3 +np.float32,0x3c8963,0x3c8963,3 +np.float32,0x3f0afa0a,0x3efea5cc,3 +np.float32,0xbf0d2680,0xbf00ff29,3 +np.float32,0x3dc306b0,0x3dc27096,3 +np.float32,0x7f4cf105,0x3fc90fdb,3 +np.float32,0xbe196060,0xbe183ea4,3 +np.float32,0x5caf1c,0x5caf1c,3 +np.float32,0x801f2852,0x801f2852,3 +np.float32,0xbe01aa0c,0xbe00fa53,3 +np.float32,0x3f0cfd32,0x3f00df7a,3 +np.float32,0x7d82038e,0x3fc90fdb,3 +np.float32,0x7f7b927f,0x3fc90fdb,3 +np.float32,0xbe93b2e4,0xbe8fcb7f,3 +np.float32,0x1ffe8c,0x1ffe8c,3 +np.float32,0x3faaf6,0x3faaf6,3 +np.float32,0x3e32b1b8,0x3e30e9ab,3 +np.float32,0x802953c0,0x802953c0,3 +np.float32,0xfe5d9844,0xbfc90fdb,3 +np.float32,0x3e1a59d0,0x3e193292,3 +np.float32,0x801c6edc,0x801c6edc,3 +np.float32,0x1ecf41,0x1ecf41,3 +np.float32,0xfe56b09c,0xbfc90fdb,3 +np.float32,0x7e878351,0x3fc90fdb,3 +np.float32,0x3f401e2c,0x3f24cfcb,3 +np.float32,0xbf204a40,0xbf0f35bb,3 +np.float32,0x3e155a98,0x3e144ee1,3 +np.float32,0xbf34f929,0xbf1d8838,3 +np.float32,0x801bbf70,0x801bbf70,3 +np.float32,0x7e7c9730,0x3fc90fdb,3 +np.float32,0x7cc23432,0x3fc90fdb,3 +np.float32,0xbf351638,0xbf1d9b97,3 +np.float32,0x80152094,0x80152094,3 +np.float32,0x3f2d731c,0x3f187219,3 +np.float32,0x804ab0b7,0x804ab0b7,3 +np.float32,0x37d6db,0x37d6db,3 +np.float32,0xbf3ccc56,0xbf22acbf,3 +np.float32,0x3e546f8c,0x3e5176e7,3 +np.float32,0xbe90e87e,0xbe8d3707,3 +np.float32,0x48256c,0x48256c,3 +np.float32,0x7e2468d0,0x3fc90fdb,3 +np.float32,0x807af47e,0x807af47e,3 +np.float32,0x3ed4b221,0x3ec996f0,3 +np.float32,0x3d3b1956,0x3d3af811,3 +np.float32,0xbe69d93c,0xbe65e7f0,3 +np.float32,0xff03ff14,0xbfc90fdb,3 +np.float32,0x801e79dc,0x801e79dc,3 +np.float32,0x3f467c53,0x3f28d63d,3 +np.float32,0x3eab6baa,0x3ea56a1c,3 +np.float32,0xbf15519c,0xbf072d1c,3 +np.float32,0x7f0bd8e8,0x3fc90fdb,3 +np.float32,0xbe1e0d1c,0xbe1cd053,3 +np.float32,0x8016edab,0x8016edab,3 +np.float32,0x7ecaa09b,0x3fc90fdb,3 +np.float32,0x3f72e6d9,0x3f4257a8,3 +np.float32,0xbefe787e,0xbeec29a4,3 +np.float32,0xbee989e8,0xbedb1af9,3 +np.float32,0xbe662db0,0xbe626a45,3 +np.float32,0x495bf7,0x495bf7,3 +np.float32,0x26c379,0x26c379,3 +np.float32,0x7f54d41a,0x3fc90fdb,3 +np.float32,0x801e7dd9,0x801e7dd9,3 +np.float32,0x80000000,0x80000000,3 +np.float32,0xfa3d3000,0xbfc90fdb,3 +np.float32,0xfa3cb800,0xbfc90fdb,3 +np.float32,0x264894,0x264894,3 +np.float32,0xff6de011,0xbfc90fdb,3 +np.float32,0x7e9045b2,0x3fc90fdb,3 +np.float32,0x3f2253a8,0x3f10aaf4,3 +np.float32,0xbd462bf0,0xbd460469,3 +np.float32,0x7f1796af,0x3fc90fdb,3 +np.float32,0x3e718858,0x3e6d3279,3 +np.float32,0xff437d7e,0xbfc90fdb,3 +np.float32,0x805ae7cb,0x805ae7cb,3 +np.float32,0x807e32e9,0x807e32e9,3 +np.float32,0x3ee0bafc,0x3ed3c453,3 +np.float32,0xbf721dee,0xbf41edc3,3 +np.float32,0xfec9f792,0xbfc90fdb,3 +np.float32,0x7f050720,0x3fc90fdb,3 +np.float32,0x182261,0x182261,3 +np.float32,0x3e39e678,0x3e37e5be,3 +np.float32,0x7e096e4b,0x3fc90fdb,3 +np.float32,0x103715,0x103715,3 +np.float32,0x3f7e7741,0x3f484ae4,3 +np.float32,0x3e29aea5,0x3e28277c,3 +np.float32,0x58c183,0x58c183,3 +np.float32,0xff72fdb2,0xbfc90fdb,3 +np.float32,0xbd9a9420,0xbd9a493c,3 +np.float32,0x7f1e07e7,0x3fc90fdb,3 +np.float32,0xff79f522,0xbfc90fdb,3 +np.float32,0x7c7d0e96,0x3fc90fdb,3 +np.float32,0xbeba9e8e,0xbeb2f504,3 +np.float32,0xfd880a80,0xbfc90fdb,3 +np.float32,0xff7f2a33,0xbfc90fdb,3 +np.float32,0x3e861ae0,0x3e83289c,3 +np.float32,0x7f0161c1,0x3fc90fdb,3 +np.float32,0xfe844ff8,0xbfc90fdb,3 +np.float32,0xbebf4b98,0xbeb7128e,3 +np.float32,0x652bee,0x652bee,3 +np.float32,0xff188a4b,0xbfc90fdb,3 +np.float32,0xbf800000,0xbf490fdb,3 +np.float32,0x80418711,0x80418711,3 +np.float32,0xbeb712d4,0xbeafd1f6,3 +np.float32,0xbf7cee28,0xbf478491,3 +np.float32,0xfe66c59c,0xbfc90fdb,3 +np.float32,0x4166a2,0x4166a2,3 +np.float32,0x3dfa1a2c,0x3df8deb5,3 +np.float32,0xbdbfbcb8,0xbdbf2e0f,3 +np.float32,0xfe60ef70,0xbfc90fdb,3 +np.float32,0xfe009444,0xbfc90fdb,3 +np.float32,0xfeb27aa0,0xbfc90fdb,3 +np.float32,0xbe99f7bc,0xbe95902b,3 +np.float32,0x8043d28d,0x8043d28d,3 +np.float32,0xfe5328c4,0xbfc90fdb,3 +np.float32,0x8017b27e,0x8017b27e,3 +np.float32,0x3ef1d2cf,0x3ee1ebd7,3 +np.float32,0x805ddd90,0x805ddd90,3 +np.float32,0xbf424263,0xbf262d17,3 +np.float32,0xfc99dde0,0xbfc90fdb,3 +np.float32,0xbf7ec13b,0xbf487015,3 +np.float32,0xbef727ea,0xbee64377,3 +np.float32,0xff15ce95,0xbfc90fdb,3 +np.float32,0x1fbba4,0x1fbba4,3 +np.float32,0x3f3b2368,0x3f2198a9,3 +np.float32,0xfefda26e,0xbfc90fdb,3 +np.float32,0x801519ad,0x801519ad,3 +np.float32,0x80473fa2,0x80473fa2,3 +np.float32,0x7e7a8bc1,0x3fc90fdb,3 +np.float32,0x3e8a9289,0x3e87548a,3 +np.float32,0x3ed68987,0x3ecb2872,3 +np.float32,0x805bca66,0x805bca66,3 +np.float32,0x8079c4e3,0x8079c4e3,3 +np.float32,0x3a2510,0x3a2510,3 +np.float32,0x7eedc598,0x3fc90fdb,3 +np.float32,0x80681956,0x80681956,3 +np.float32,0xff64c778,0xbfc90fdb,3 +np.float32,0x806bbc46,0x806bbc46,3 +np.float32,0x433643,0x433643,3 +np.float32,0x705b92,0x705b92,3 +np.float32,0xff359392,0xbfc90fdb,3 +np.float32,0xbee78672,0xbed96fa7,3 +np.float32,0x3e21717b,0x3e202010,3 +np.float32,0xfea13c34,0xbfc90fdb,3 +np.float32,0x2c8895,0x2c8895,3 +np.float32,0x3ed33290,0x3ec84f7c,3 +np.float32,0x3e63031e,0x3e5f662e,3 +np.float32,0x7e30907b,0x3fc90fdb,3 +np.float32,0xbe293708,0xbe27b310,3 +np.float32,0x3ed93738,0x3ecd6ea3,3 +np.float32,0x9db7e,0x9db7e,3 +np.float32,0x3f7cd1b8,0x3f47762c,3 +np.float32,0x3eb5143c,0x3eae0cb0,3 +np.float32,0xbe69b234,0xbe65c2d7,3 +np.float32,0x3f6e74de,0x3f3ffb97,3 +np.float32,0x5d0559,0x5d0559,3 +np.float32,0x3e1e8c30,0x3e1d4c70,3 +np.float32,0xbf2d1878,0xbf1833ef,3 +np.float32,0xff2adf82,0xbfc90fdb,3 +np.float32,0x8012e2c1,0x8012e2c1,3 +np.float32,0x7f031be3,0x3fc90fdb,3 +np.float32,0x805ff94e,0x805ff94e,3 +np.float32,0x3e9d5b27,0x3e98aa31,3 +np.float32,0x3f56d5cf,0x3f32bc9e,3 +np.float32,0x3eaa0412,0x3ea4267f,3 +np.float32,0xbe899ea4,0xbe86712f,3 +np.float32,0x800f2f48,0x800f2f48,3 +np.float32,0x3f1c2269,0x3f0c33ea,3 +np.float32,0x3f4a5f64,0x3f2b3f28,3 +np.float32,0x80739318,0x80739318,3 +np.float32,0x806e9b47,0x806e9b47,3 +np.float32,0x3c8cd300,0x3c8ccf73,3 +np.float32,0x7f39a39d,0x3fc90fdb,3 +np.float32,0x3ec95d61,0x3ebfd9dc,3 +np.float32,0xff351ff8,0xbfc90fdb,3 +np.float32,0xff3a8f58,0xbfc90fdb,3 +np.float32,0x7f313ec0,0x3fc90fdb,3 +np.float32,0x803aed13,0x803aed13,3 +np.float32,0x7f771d9b,0x3fc90fdb,3 +np.float32,0x8045a6d6,0x8045a6d6,3 +np.float32,0xbc85f280,0xbc85ef72,3 +np.float32,0x7e9c68f5,0x3fc90fdb,3 +np.float32,0xbf0f9379,0xbf02d975,3 +np.float32,0x7e97bcb1,0x3fc90fdb,3 +np.float32,0x804a07d5,0x804a07d5,3 +np.float32,0x802e6117,0x802e6117,3 +np.float32,0x7ed5e388,0x3fc90fdb,3 +np.float32,0x80750455,0x80750455,3 +np.float32,0xff4a8325,0xbfc90fdb,3 +np.float32,0xbedb6866,0xbecf497c,3 +np.float32,0x52ea3b,0x52ea3b,3 +np.float32,0xff773172,0xbfc90fdb,3 +np.float32,0xbeaa8ff0,0xbea4a46e,3 +np.float32,0x7eef2058,0x3fc90fdb,3 +np.float32,0x3f712472,0x3f4169d3,3 +np.float32,0xff6c8608,0xbfc90fdb,3 +np.float32,0xbf6eaa41,0xbf40182a,3 +np.float32,0x3eb03c24,0x3ea9bb34,3 +np.float32,0xfe118cd4,0xbfc90fdb,3 +np.float32,0x3e5b03b0,0x3e57c378,3 +np.float32,0x7f34d92d,0x3fc90fdb,3 +np.float32,0x806c3418,0x806c3418,3 +np.float32,0x7f3074e3,0x3fc90fdb,3 +np.float32,0x8002df02,0x8002df02,3 +np.float32,0x3f6df63a,0x3f3fb7b7,3 +np.float32,0xfd2b4100,0xbfc90fdb,3 +np.float32,0x80363d5c,0x80363d5c,3 +np.float32,0xbeac1f98,0xbea60bd6,3 +np.float32,0xff7fffff,0xbfc90fdb,3 +np.float32,0x80045097,0x80045097,3 +np.float32,0xfe011100,0xbfc90fdb,3 +np.float32,0x80739ef5,0x80739ef5,3 +np.float32,0xff3976ed,0xbfc90fdb,3 +np.float32,0xbe18e3a0,0xbe17c49e,3 +np.float32,0xbe289294,0xbe2712f6,3 +np.float32,0x3f1d41e7,0x3f0d050e,3 +np.float32,0x39364a,0x39364a,3 +np.float32,0x8072b77e,0x8072b77e,3 +np.float32,0x3f7cfec0,0x3f478cf6,3 +np.float32,0x2f68f6,0x2f68f6,3 +np.float32,0xbf031fb8,0xbef25c84,3 +np.float32,0xbf0b842c,0xbeff7afc,3 +np.float32,0x3f081e7e,0x3efa3676,3 +np.float32,0x7f7fffff,0x3fc90fdb,3 +np.float32,0xff15da0e,0xbfc90fdb,3 +np.float32,0x3d2001b2,0x3d1fece1,3 +np.float32,0x7f76efef,0x3fc90fdb,3 +np.float32,0x3f2405dd,0x3f11dfb7,3 +np.float32,0xa0319,0xa0319,3 +np.float32,0x3e23d2bd,0x3e227255,3 +np.float32,0xbd4d4c50,0xbd4d205e,3 +np.float32,0x382344,0x382344,3 +np.float32,0x21bbf,0x21bbf,3 +np.float32,0xbf209e82,0xbf0f7239,3 +np.float32,0xff03bf9f,0xbfc90fdb,3 +np.float32,0x7b1789,0x7b1789,3 +np.float32,0xff314944,0xbfc90fdb,3 +np.float32,0x1a63eb,0x1a63eb,3 +np.float32,0x803dc983,0x803dc983,3 +np.float32,0x3f0ff558,0x3f0323dc,3 +np.float32,0x3f544f2c,0x3f313f58,3 +np.float32,0xff032948,0xbfc90fdb,3 +np.float32,0x7f4933cc,0x3fc90fdb,3 +np.float32,0x7f14c5ed,0x3fc90fdb,3 +np.float32,0x803aeebf,0x803aeebf,3 +np.float32,0xbf0d4c0f,0xbf011bf5,3 +np.float32,0xbeaf8de2,0xbea91f57,3 +np.float32,0xff3ae030,0xbfc90fdb,3 +np.float32,0xbb362d00,0xbb362ce1,3 +np.float32,0x3d1f79e0,0x3d1f6544,3 +np.float32,0x3f56e9d9,0x3f32c860,3 +np.float32,0x3f723e5e,0x3f41fee2,3 +np.float32,0x4c0179,0x4c0179,3 +np.float32,0xfee36132,0xbfc90fdb,3 +np.float32,0x619ae6,0x619ae6,3 +np.float32,0xfde5d670,0xbfc90fdb,3 +np.float32,0xff079ac5,0xbfc90fdb,3 +np.float32,0x3e974fbd,0x3e931fae,3 +np.float32,0x8020ae6b,0x8020ae6b,3 +np.float32,0x6b5af1,0x6b5af1,3 +np.float32,0xbeb57cd6,0xbeae69a3,3 +np.float32,0x806e7eb2,0x806e7eb2,3 +np.float32,0x7e666edb,0x3fc90fdb,3 +np.float32,0xbf458c18,0xbf283ff0,3 +np.float32,0x3e50518e,0x3e4d8399,3 +np.float32,0x3e9ce224,0x3e983b98,3 +np.float32,0x3e6bc067,0x3e67b6c6,3 +np.float32,0x13783d,0x13783d,3 +np.float32,0xff3d518c,0xbfc90fdb,3 +np.float32,0xfeba5968,0xbfc90fdb,3 +np.float32,0xbf0b9f76,0xbeffa50f,3 +np.float32,0xfe174900,0xbfc90fdb,3 +np.float32,0x3f38bb0a,0x3f200527,3 +np.float32,0x7e94a77d,0x3fc90fdb,3 +np.float32,0x29d776,0x29d776,3 +np.float32,0xbf4e058d,0xbf2d7a15,3 +np.float32,0xbd94abc8,0xbd946923,3 +np.float32,0xbee62db0,0xbed85124,3 +np.float32,0x800000,0x800000,3 +np.float32,0xbef1df7e,0xbee1f636,3 +np.float32,0xbcf3cd20,0xbcf3bab5,3 +np.float32,0x80007b05,0x80007b05,3 +np.float32,0x3d9b3f2e,0x3d9af351,3 +np.float32,0xbf714a68,0xbf417dee,3 +np.float32,0xbf2a2d37,0xbf163069,3 +np.float32,0x8055104f,0x8055104f,3 +np.float32,0x7f5c40d7,0x3fc90fdb,3 +np.float32,0x1,0x1,3 +np.float32,0xff35f3a6,0xbfc90fdb,3 +np.float32,0xd9c7c,0xd9c7c,3 +np.float32,0xbf440cfc,0xbf274f22,3 +np.float32,0x8050ac43,0x8050ac43,3 +np.float32,0x63ee16,0x63ee16,3 +np.float32,0x7d90419b,0x3fc90fdb,3 +np.float32,0xfee22198,0xbfc90fdb,3 +np.float32,0xc2ead,0xc2ead,3 +np.float32,0x7f5cd6a6,0x3fc90fdb,3 +np.float32,0x3f6fab7e,0x3f40a184,3 +np.float32,0x3ecf998c,0x3ec53a73,3 +np.float32,0x7e5271f0,0x3fc90fdb,3 +np.float32,0x67c016,0x67c016,3 +np.float32,0x2189c8,0x2189c8,3 +np.float32,0x27d892,0x27d892,3 +np.float32,0x3f0d02c4,0x3f00e3c0,3 +np.float32,0xbf69ebca,0xbf3d8862,3 +np.float32,0x3e60c0d6,0x3e5d3ebb,3 +np.float32,0x3f45206c,0x3f27fc66,3 +np.float32,0xbf6b47dc,0xbf3e4592,3 +np.float32,0xfe9be2e2,0xbfc90fdb,3 +np.float32,0x7fa00000,0x7fe00000,3 +np.float32,0xff271562,0xbfc90fdb,3 +np.float32,0x3e2e5270,0x3e2caaaf,3 +np.float32,0x80222934,0x80222934,3 +np.float32,0xbd01d220,0xbd01c701,3 +np.float32,0x223aa0,0x223aa0,3 +np.float32,0x3f4b5a7e,0x3f2bd967,3 +np.float32,0x3f217d85,0x3f101200,3 +np.float32,0xbf57663a,0xbf331144,3 +np.float32,0x3f219862,0x3f102536,3 +np.float32,0x28a28c,0x28a28c,3 +np.float32,0xbf3f55f4,0xbf244f86,3 +np.float32,0xbf3de287,0xbf236092,3 +np.float32,0xbf1c1ce2,0xbf0c2fe3,3 +np.float32,0x80000001,0x80000001,3 +np.float32,0x3db695d0,0x3db61a90,3 +np.float32,0x6c39bf,0x6c39bf,3 +np.float32,0x7e33a12f,0x3fc90fdb,3 +np.float32,0x67623a,0x67623a,3 +np.float32,0x3e45dc54,0x3e4373b6,3 +np.float32,0x7f62fa68,0x3fc90fdb,3 +np.float32,0x3f0e1d01,0x3f01bbe5,3 +np.float32,0x3f13dc69,0x3f0615f5,3 +np.float32,0x246703,0x246703,3 +np.float32,0xbf1055b5,0xbf036d07,3 +np.float32,0x7f46d3d0,0x3fc90fdb,3 +np.float32,0x3d2b8086,0x3d2b66e5,3 +np.float32,0xbf03be44,0xbef35776,3 +np.float32,0x3f800000,0x3f490fdb,3 +np.float32,0xbec8d226,0xbebf613d,3 +np.float32,0x3d8faf00,0x3d8f72d4,3 +np.float32,0x170c4e,0x170c4e,3 +np.float32,0xff14c0f0,0xbfc90fdb,3 +np.float32,0xff16245d,0xbfc90fdb,3 +np.float32,0x7f44ce6d,0x3fc90fdb,3 +np.float32,0xbe8175d8,0xbe7d9aeb,3 +np.float32,0x3df7a4a1,0x3df67254,3 +np.float32,0xfe2cc46c,0xbfc90fdb,3 +np.float32,0x3f284e63,0x3f14e335,3 +np.float32,0x7e46e5d6,0x3fc90fdb,3 +np.float32,0x397be4,0x397be4,3 +np.float32,0xbf2560bc,0xbf12d50b,3 +np.float32,0x3ed9b8c1,0x3ecddc60,3 +np.float32,0xfec18c5a,0xbfc90fdb,3 +np.float32,0x64894d,0x64894d,3 +np.float32,0x36a65d,0x36a65d,3 +np.float32,0x804ffcd7,0x804ffcd7,3 +np.float32,0x800f79e4,0x800f79e4,3 +np.float32,0x5d45ac,0x5d45ac,3 +np.float32,0x6cdda0,0x6cdda0,3 +np.float32,0xbf7f2077,0xbf489fe5,3 +np.float32,0xbf152f78,0xbf0713a1,3 +np.float32,0x807bf344,0x807bf344,3 +np.float32,0x3f775023,0x3f44a4d8,3 +np.float32,0xbf3edf67,0xbf240365,3 +np.float32,0x7eed729c,0x3fc90fdb,3 +np.float32,0x14cc29,0x14cc29,3 +np.float32,0x7edd7b6b,0x3fc90fdb,3 +np.float32,0xbf3c6e2c,0xbf226fb7,3 +np.float32,0x51b9ad,0x51b9ad,3 +np.float32,0x3f617ee8,0x3f38dd7c,3 +np.float32,0xff800000,0xbfc90fdb,3 +np.float32,0x7f440ea0,0x3fc90fdb,3 +np.float32,0x3e639893,0x3e5ff49e,3 +np.float32,0xbd791bb0,0xbd78cd3c,3 +np.float32,0x8059fcbc,0x8059fcbc,3 +np.float32,0xbf7d1214,0xbf4796bd,3 +np.float32,0x3ef368fa,0x3ee33788,3 +np.float32,0xbecec0f4,0xbec48055,3 +np.float32,0xbc83d940,0xbc83d656,3 +np.float32,0xbce01220,0xbce003d4,3 +np.float32,0x803192a5,0x803192a5,3 +np.float32,0xbe40e0c0,0xbe3ea4f0,3 +np.float32,0xfb692600,0xbfc90fdb,3 +np.float32,0x3f1bec65,0x3f0c0c88,3 +np.float32,0x7f042798,0x3fc90fdb,3 +np.float32,0xbe047374,0xbe03b83b,3 +np.float32,0x7f7c6630,0x3fc90fdb,3 +np.float32,0x7f58dae3,0x3fc90fdb,3 +np.float32,0x80691c92,0x80691c92,3 +np.float32,0x7dbe76,0x7dbe76,3 +np.float32,0xbf231384,0xbf11339d,3 +np.float32,0xbef4acf8,0xbee43f8b,3 +np.float32,0x3ee9f9d0,0x3edb7793,3 +np.float32,0x3f0064f6,0x3eee04a8,3 +np.float32,0x313732,0x313732,3 +np.float32,0xfd58cf80,0xbfc90fdb,3 +np.float32,0x3f7a2bc9,0x3f461d30,3 +np.float32,0x7f7681af,0x3fc90fdb,3 +np.float32,0x7f504211,0x3fc90fdb,3 +np.float32,0xfeae0c00,0xbfc90fdb,3 +np.float32,0xbee14396,0xbed436d1,3 +np.float32,0x7fc00000,0x7fc00000,3 +np.float32,0x693406,0x693406,3 +np.float32,0x3eb4a679,0x3eadab1b,3 +np.float32,0x550505,0x550505,3 +np.float32,0xfd493d10,0xbfc90fdb,3 +np.float32,0x3f4fc907,0x3f2e8b2c,3 +np.float32,0x80799aa4,0x80799aa4,3 +np.float32,0xff1ea89b,0xbfc90fdb,3 +np.float32,0xff424510,0xbfc90fdb,3 +np.float32,0x7f68d026,0x3fc90fdb,3 +np.float32,0xbea230ca,0xbe9d1200,3 +np.float32,0x7ea585da,0x3fc90fdb,3 +np.float32,0x3f3db211,0x3f23414c,3 +np.float32,0xfea4d964,0xbfc90fdb,3 +np.float32,0xbf17fe18,0xbf092984,3 +np.float32,0x7cc8a2,0x7cc8a2,3 +np.float32,0xff0330ba,0xbfc90fdb,3 +np.float32,0x3f769835,0x3f444592,3 +np.float32,0xeb0ac,0xeb0ac,3 +np.float32,0x7f7e45de,0x3fc90fdb,3 +np.float32,0xbdb510a8,0xbdb49873,3 +np.float32,0x3ebf900b,0x3eb74e9c,3 +np.float32,0xbf21bbce,0xbf103e89,3 +np.float32,0xbf3f4682,0xbf24459d,3 +np.float32,0x7eb6e9c8,0x3fc90fdb,3 +np.float32,0xbf42532d,0xbf2637be,3 +np.float32,0xbd3b2600,0xbd3b04b4,3 +np.float32,0x3f1fa9aa,0x3f0ec23e,3 +np.float32,0x7ed6a0f1,0x3fc90fdb,3 +np.float32,0xff4759a1,0xbfc90fdb,3 +np.float32,0x6d26e3,0x6d26e3,3 +np.float32,0xfe1108e0,0xbfc90fdb,3 +np.float32,0xfdf76900,0xbfc90fdb,3 +np.float32,0xfec66f22,0xbfc90fdb,3 +np.float32,0xbf3d097f,0xbf22d458,3 +np.float32,0x3d85be25,0x3d858d99,3 +np.float32,0x7f36739f,0x3fc90fdb,3 +np.float32,0x7bc0a304,0x3fc90fdb,3 +np.float32,0xff48dd90,0xbfc90fdb,3 +np.float32,0x48cab0,0x48cab0,3 +np.float32,0x3ed3943c,0x3ec8a2ef,3 +np.float32,0xbf61488e,0xbf38bede,3 +np.float32,0x3f543df5,0x3f313525,3 +np.float32,0x5cf2ca,0x5cf2ca,3 +np.float32,0x572686,0x572686,3 +np.float32,0x80369c7c,0x80369c7c,3 +np.float32,0xbd2c1d20,0xbd2c0338,3 +np.float32,0x3e255428,0x3e23ea0b,3 +np.float32,0xbeba9ee0,0xbeb2f54c,3 +np.float32,0x8015c165,0x8015c165,3 +np.float32,0x3d31f488,0x3d31d7e6,3 +np.float32,0x3f68591c,0x3f3cac43,3 +np.float32,0xf5ed5,0xf5ed5,3 +np.float32,0xbf3b1d34,0xbf21949e,3 +np.float32,0x1f0343,0x1f0343,3 +np.float32,0x3f0e52b5,0x3f01e4ef,3 +np.float32,0x7f57c596,0x3fc90fdb,3 +np.float64,0x7fd8e333ddb1c667,0x3ff921fb54442d18,3 +np.float64,0x800bcc9cdad7993a,0x800bcc9cdad7993a,3 +np.float64,0x3fcd6f81df3adf00,0x3fcceebbafc5d55e,3 +np.float64,0x3fed7338a57ae671,0x3fe7ce3e5811fc0a,3 +np.float64,0x7fe64994fcac9329,0x3ff921fb54442d18,3 +np.float64,0xfa5a6345f4b4d,0xfa5a6345f4b4d,3 +np.float64,0xe9dcd865d3b9b,0xe9dcd865d3b9b,3 +np.float64,0x7fea6cffabf4d9fe,0x3ff921fb54442d18,3 +np.float64,0xa9e1de6153c3c,0xa9e1de6153c3c,3 +np.float64,0xab6bdc5356d7c,0xab6bdc5356d7c,3 +np.float64,0x80062864a02c50ca,0x80062864a02c50ca,3 +np.float64,0xbfdac03aa7b58076,0xbfd9569f3230128d,3 +np.float64,0xbfe61b77752c36ef,0xbfe3588f51b8be8f,3 +np.float64,0x800bc854c8d790aa,0x800bc854c8d790aa,3 +np.float64,0x3feed1a2da3da346,0x3fe887f9b8ea031f,3 +np.float64,0x3fe910d3697221a7,0x3fe54365a53d840e,3 +np.float64,0x7fe7ab4944ef5692,0x3ff921fb54442d18,3 +np.float64,0x3fa462f1a028c5e3,0x3fa460303a6a4e69,3 +np.float64,0x800794f1a3af29e4,0x800794f1a3af29e4,3 +np.float64,0x3fee6fe7fafcdfd0,0x3fe854f863816d55,3 +np.float64,0x8000000000000000,0x8000000000000000,3 +np.float64,0x7f336472fe66d,0x7f336472fe66d,3 +np.float64,0xffb1623ac822c478,0xbff921fb54442d18,3 +np.float64,0x3fbacd68ce359ad2,0x3fbab480b3638846,3 +np.float64,0xffd5c02706ab804e,0xbff921fb54442d18,3 +np.float64,0xbfd4daf03d29b5e0,0xbfd42928f069c062,3 +np.float64,0x800c6e85dbd8dd0c,0x800c6e85dbd8dd0c,3 +np.float64,0x800e3599c5bc6b34,0x800e3599c5bc6b34,3 +np.float64,0x2c0d654c581ad,0x2c0d654c581ad,3 +np.float64,0xbfdd3eb13fba7d62,0xbfdb6e8143302de7,3 +np.float64,0x800b60cb8776c197,0x800b60cb8776c197,3 +np.float64,0x80089819ad113034,0x80089819ad113034,3 +np.float64,0x29fe721453fcf,0x29fe721453fcf,3 +np.float64,0x3fe8722f4df0e45f,0x3fe4e026d9eadb4d,3 +np.float64,0xffd1fbcd01a3f79a,0xbff921fb54442d18,3 +np.float64,0x7fc74e1e982e9c3c,0x3ff921fb54442d18,3 +np.float64,0x800c09d3d15813a8,0x800c09d3d15813a8,3 +np.float64,0xbfeee4578b3dc8af,0xbfe891ab3d6c3ce4,3 +np.float64,0xffdd01a6f33a034e,0xbff921fb54442d18,3 +np.float64,0x7fcc130480382608,0x3ff921fb54442d18,3 +np.float64,0xffcbb6bd1d376d7c,0xbff921fb54442d18,3 +np.float64,0xc068a53780d15,0xc068a53780d15,3 +np.float64,0xbfc974f15532e9e4,0xbfc92100b355f3e7,3 +np.float64,0x3fe6da79442db4f3,0x3fe3d87393b082e7,3 +np.float64,0xd9d9be4db3b38,0xd9d9be4db3b38,3 +np.float64,0x5ea50a20bd4a2,0x5ea50a20bd4a2,3 +np.float64,0xbfe5597f7d2ab2ff,0xbfe2d3ccc544b52b,3 +np.float64,0x80019364e4e326cb,0x80019364e4e326cb,3 +np.float64,0x3fed2902c3fa5206,0x3fe7a5e1df07e5c1,3 +np.float64,0xbfa7b72b5c2f6e50,0xbfa7b2d545b3cc1f,3 +np.float64,0xffdb60dd43b6c1ba,0xbff921fb54442d18,3 +np.float64,0x81a65d8b034cc,0x81a65d8b034cc,3 +np.float64,0x8000c30385818608,0x8000c30385818608,3 +np.float64,0x6022f5f4c045f,0x6022f5f4c045f,3 +np.float64,0x8007a2bb810f4578,0x8007a2bb810f4578,3 +np.float64,0x7fdc68893238d111,0x3ff921fb54442d18,3 +np.float64,0x7fd443454ea8868a,0x3ff921fb54442d18,3 +np.float64,0xffe6b04209ed6084,0xbff921fb54442d18,3 +np.float64,0x7fcd9733d13b2e67,0x3ff921fb54442d18,3 +np.float64,0xf5ee80a9ebdd0,0xf5ee80a9ebdd0,3 +np.float64,0x3fe3788e8de6f11e,0x3fe17dec7e6843a0,3 +np.float64,0x3fee36f62f7c6dec,0x3fe836f832515b43,3 +np.float64,0xf6cb49aded969,0xf6cb49aded969,3 +np.float64,0x3fd2b15ea4a562bc,0x3fd22fdc09920e67,3 +np.float64,0x7fccf6aef139ed5d,0x3ff921fb54442d18,3 +np.float64,0x3fd396b8ce272d72,0x3fd3026118857bd4,3 +np.float64,0x7fe53d3c80ea7a78,0x3ff921fb54442d18,3 +np.float64,0x3feae88fc4f5d120,0x3fe65fb04b18ef7a,3 +np.float64,0x3fedc643747b8c86,0x3fe7fafa6c20e25a,3 +np.float64,0xffdb2dc0df365b82,0xbff921fb54442d18,3 +np.float64,0xbfa2af3658255e70,0xbfa2ad17348f4253,3 +np.float64,0x3f8aa77b30354ef6,0x3f8aa71892336a69,3 +np.float64,0xbfdd1b1efbba363e,0xbfdb510dcd186820,3 +np.float64,0x800f50d99c5ea1b3,0x800f50d99c5ea1b3,3 +np.float64,0xff6ed602403dac00,0xbff921fb54442d18,3 +np.float64,0x800477d71aa8efaf,0x800477d71aa8efaf,3 +np.float64,0xbfe729a9e86e5354,0xbfe40ca78d9eefcf,3 +np.float64,0x3fd81ab2d4303566,0x3fd70d7e3937ea22,3 +np.float64,0xb617cbab6c2fa,0xb617cbab6c2fa,3 +np.float64,0x7fefffffffffffff,0x3ff921fb54442d18,3 +np.float64,0xffa40933ac281260,0xbff921fb54442d18,3 +np.float64,0xbfe1ede621e3dbcc,0xbfe057bb2b341ced,3 +np.float64,0xbfec700f03b8e01e,0xbfe73fb190bc722e,3 +np.float64,0x6e28af02dc517,0x6e28af02dc517,3 +np.float64,0x3fe37ad37ae6f5a7,0x3fe17f94674818a9,3 +np.float64,0x8000cbdeeae197bf,0x8000cbdeeae197bf,3 +np.float64,0x3fe8fd1f01f1fa3e,0x3fe5372bbec5d72c,3 +np.float64,0x3f8f9229103f2452,0x3f8f918531894256,3 +np.float64,0x800536858e0a6d0c,0x800536858e0a6d0c,3 +np.float64,0x7fe82bb4f9f05769,0x3ff921fb54442d18,3 +np.float64,0xffc1c2fb592385f8,0xbff921fb54442d18,3 +np.float64,0x7f924ddfc0249bbf,0x3ff921fb54442d18,3 +np.float64,0xffd5e125c52bc24c,0xbff921fb54442d18,3 +np.float64,0xbfef0d8738be1b0e,0xbfe8a6ef17b16c10,3 +np.float64,0x3fc9c8875233910f,0x3fc9715e708503cb,3 +np.float64,0xbfe2d926f4e5b24e,0xbfe108956e61cbb3,3 +np.float64,0x7fd61c496dac3892,0x3ff921fb54442d18,3 +np.float64,0x7fed545c6b7aa8b8,0x3ff921fb54442d18,3 +np.float64,0x8003746fea86e8e1,0x8003746fea86e8e1,3 +np.float64,0x3fdf515e75bea2bd,0x3fdd201a5585caa3,3 +np.float64,0xffda87c8ee350f92,0xbff921fb54442d18,3 +np.float64,0xffc675d8e22cebb0,0xbff921fb54442d18,3 +np.float64,0xffcdc173433b82e8,0xbff921fb54442d18,3 +np.float64,0xffed9df1517b3be2,0xbff921fb54442d18,3 +np.float64,0x3fd6a2eec72d45de,0x3fd5c1f1d7dcddcf,3 +np.float64,0xffec116a66f822d4,0xbff921fb54442d18,3 +np.float64,0x8007c2a2458f8545,0x8007c2a2458f8545,3 +np.float64,0x3fe4ee80d969dd02,0x3fe2895076094668,3 +np.float64,0x3fe3cae7116795ce,0x3fe1b9c07e0d03a7,3 +np.float64,0xbfd81bf8d8b037f2,0xbfd70e9bbbb4ca57,3 +np.float64,0x800c88ccd1f9119a,0x800c88ccd1f9119a,3 +np.float64,0xffdab2aee2b5655e,0xbff921fb54442d18,3 +np.float64,0x3fe743d227ee87a4,0x3fe41dcaef186d96,3 +np.float64,0x3fb060fd0220c1fa,0x3fb05b47f56ebbb4,3 +np.float64,0xbfd3f03772a7e06e,0xbfd3541522377291,3 +np.float64,0x190a5ae03216,0x190a5ae03216,3 +np.float64,0x3fe48c71916918e4,0x3fe24442f45b3183,3 +np.float64,0x800862470590c48e,0x800862470590c48e,3 +np.float64,0x7fd3ced89d279db0,0x3ff921fb54442d18,3 +np.float64,0x3feb3d9b4ab67b37,0x3fe69140cf2623f7,3 +np.float64,0xbc3f296b787e5,0xbc3f296b787e5,3 +np.float64,0xbfed6b905dfad721,0xbfe7ca1881a8c0fd,3 +np.float64,0xbfe621c2aaac4386,0xbfe35cd1969a82db,3 +np.float64,0x8009e7b17593cf63,0x8009e7b17593cf63,3 +np.float64,0x80045f580ca8beb1,0x80045f580ca8beb1,3 +np.float64,0xbfea2e177e745c2f,0xbfe5f13971633339,3 +np.float64,0x3fee655787fccab0,0x3fe84f6b98b6de26,3 +np.float64,0x3fc9cde92f339bd0,0x3fc9768a88b2c97c,3 +np.float64,0x3fc819c3b3303388,0x3fc7d25e1526e731,3 +np.float64,0x3fd3e848d2a7d090,0x3fd34cd9e6af558f,3 +np.float64,0x3fe19dacac633b5a,0x3fe01a6b4d27adc2,3 +np.float64,0x800b190da316321c,0x800b190da316321c,3 +np.float64,0xd5c69711ab8d3,0xd5c69711ab8d3,3 +np.float64,0xbfdc31bed7b8637e,0xbfda8ea3c1309d6d,3 +np.float64,0xbfd02ba007a05740,0xbfcfad86f0d756dc,3 +np.float64,0x3fe874473d70e88e,0x3fe4e1793cd82123,3 +np.float64,0xffb465585c28cab0,0xbff921fb54442d18,3 +np.float64,0xbfb5d8e13e2bb1c0,0xbfb5cb5c7807fc4d,3 +np.float64,0xffe80f933bf01f26,0xbff921fb54442d18,3 +np.float64,0x7feea783f5fd4f07,0x3ff921fb54442d18,3 +np.float64,0xbfae6665f43cccd0,0xbfae5d45b0a6f90a,3 +np.float64,0x800bd6ef5a77addf,0x800bd6ef5a77addf,3 +np.float64,0x800d145babda28b8,0x800d145babda28b8,3 +np.float64,0x39de155473bc3,0x39de155473bc3,3 +np.float64,0x3fefbd6bb1ff7ad8,0x3fe9008e73a3296e,3 +np.float64,0x3fc40bca3d281798,0x3fc3e2710e167007,3 +np.float64,0x3fcae0918335c120,0x3fca7e09e704a678,3 +np.float64,0x51287fbea2511,0x51287fbea2511,3 +np.float64,0x7fa6bc33a82d7866,0x3ff921fb54442d18,3 +np.float64,0xe72a2bebce546,0xe72a2bebce546,3 +np.float64,0x3fe1c8fd686391fa,0x3fe03b9622aeb4e3,3 +np.float64,0x3fe2a73ac3654e76,0x3fe0e36bc1ee4ac4,3 +np.float64,0x59895218b312b,0x59895218b312b,3 +np.float64,0xc6dc25c78db85,0xc6dc25c78db85,3 +np.float64,0xbfc06cfac520d9f4,0xbfc0561f85d2c907,3 +np.float64,0xbfea912dc4f5225c,0xbfe62c3b1c01c793,3 +np.float64,0x3fb78ce89a2f19d0,0x3fb77bfcb65a67d3,3 +np.float64,0xbfece5cdea39cb9c,0xbfe78103d24099e5,3 +np.float64,0x30d3054e61a61,0x30d3054e61a61,3 +np.float64,0xbfd3fe26fba7fc4e,0xbfd360c8447c4f7a,3 +np.float64,0x800956072a92ac0f,0x800956072a92ac0f,3 +np.float64,0x7fe639b3b6ec7366,0x3ff921fb54442d18,3 +np.float64,0x800ee30240bdc605,0x800ee30240bdc605,3 +np.float64,0x7fef6af0d2bed5e1,0x3ff921fb54442d18,3 +np.float64,0xffefce8725ff9d0d,0xbff921fb54442d18,3 +np.float64,0x3fe2e311da65c624,0x3fe10ff1623089dc,3 +np.float64,0xbfe7e5cbe56fcb98,0xbfe486c3daeda67c,3 +np.float64,0x80095bc14472b783,0x80095bc14472b783,3 +np.float64,0xffef0cb4553e1968,0xbff921fb54442d18,3 +np.float64,0xe3e60567c7cc1,0xe3e60567c7cc1,3 +np.float64,0xffde919f06bd233e,0xbff921fb54442d18,3 +np.float64,0x3fe3f9632e27f2c6,0x3fe1db49ebd21c4e,3 +np.float64,0x9dee9a233bdd4,0x9dee9a233bdd4,3 +np.float64,0xbfe3bb0602e7760c,0xbfe1ae41b6d4c488,3 +np.float64,0x3fc46945a128d288,0x3fc43da54c6c6a2a,3 +np.float64,0x7fdef149ac3de292,0x3ff921fb54442d18,3 +np.float64,0x800a96c76d752d8f,0x800a96c76d752d8f,3 +np.float64,0x3f971a32382e3464,0x3f9719316b9e9baf,3 +np.float64,0x7fe97bcf15b2f79d,0x3ff921fb54442d18,3 +np.float64,0x7fea894558f5128a,0x3ff921fb54442d18,3 +np.float64,0x3fc9e3be1933c780,0x3fc98b847c3923eb,3 +np.float64,0x3f7accac40359959,0x3f7acc9330741b64,3 +np.float64,0xa80c136950183,0xa80c136950183,3 +np.float64,0x3fe408732b2810e6,0x3fe1e61e7cbc8824,3 +np.float64,0xffa775bc042eeb80,0xbff921fb54442d18,3 +np.float64,0x3fbf04bd223e0980,0x3fbede37b8fc697e,3 +np.float64,0x7fd999b34c333366,0x3ff921fb54442d18,3 +np.float64,0xe72146dfce429,0xe72146dfce429,3 +np.float64,0x4f511ee49ea24,0x4f511ee49ea24,3 +np.float64,0xffb3e6e58827cdc8,0xbff921fb54442d18,3 +np.float64,0x3fd1f180cfa3e300,0x3fd17e85b2871de2,3 +np.float64,0x97c8e45b2f91d,0x97c8e45b2f91d,3 +np.float64,0xbfeeb20e88fd641d,0xbfe8778f878440bf,3 +np.float64,0xbfe1fc6dee23f8dc,0xbfe062c815a93cde,3 +np.float64,0xab4bf71f5697f,0xab4bf71f5697f,3 +np.float64,0xa9675a2952cec,0xa9675a2952cec,3 +np.float64,0xbfef3ea4a33e7d49,0xbfe8c02743ebc1b6,3 +np.float64,0x3fe22a2eafa4545d,0x3fe08577afca52a9,3 +np.float64,0x3fe8a08daaf1411c,0x3fe4fd5a34f05305,3 +np.float64,0xbfc6cda77b2d9b50,0xbfc6910bcfa0cf4f,3 +np.float64,0x3fec398394387307,0x3fe7211dd5276500,3 +np.float64,0x3fe36c95c626d92c,0x3fe1752e5aa2357b,3 +np.float64,0xffd8b9e7073173ce,0xbff921fb54442d18,3 +np.float64,0xffe19f043ae33e08,0xbff921fb54442d18,3 +np.float64,0x800e3640709c6c81,0x800e3640709c6c81,3 +np.float64,0x3fe7d6c20aafad84,0x3fe47d1a3307d9c8,3 +np.float64,0x80093fd63b727fad,0x80093fd63b727fad,3 +np.float64,0xffe1a671a4634ce3,0xbff921fb54442d18,3 +np.float64,0xbfe53a6b386a74d6,0xbfe2be41859cb10d,3 +np.float64,0xbfed149a097a2934,0xbfe79ab7e3e93c1c,3 +np.float64,0x7fc2769a5724ed34,0x3ff921fb54442d18,3 +np.float64,0xffd01e4e99a03c9e,0xbff921fb54442d18,3 +np.float64,0xa61f38434c3e7,0xa61f38434c3e7,3 +np.float64,0x800ad4ac5195a959,0x800ad4ac5195a959,3 +np.float64,0x7ff8000000000000,0x7ff8000000000000,3 +np.float64,0x80034a45b6c6948c,0x80034a45b6c6948c,3 +np.float64,0x6350b218c6a17,0x6350b218c6a17,3 +np.float64,0xfff0000000000000,0xbff921fb54442d18,3 +np.float64,0x3fe363e759e6c7cf,0x3fe16ed58d80f9ce,3 +np.float64,0xffe3b98e59e7731c,0xbff921fb54442d18,3 +np.float64,0x3fdbf7b40337ef68,0x3fda5df7ad3c80f9,3 +np.float64,0xbfe9cdf784739bef,0xbfe5b74f346ef93d,3 +np.float64,0xbfc321bea326437c,0xbfc2fdc0d4ff7561,3 +np.float64,0xbfe40f77d2a81ef0,0xbfe1eb28c4ae4dde,3 +np.float64,0x7fe071806960e300,0x3ff921fb54442d18,3 +np.float64,0x7fd269006ea4d200,0x3ff921fb54442d18,3 +np.float64,0x80017a56e0e2f4af,0x80017a56e0e2f4af,3 +np.float64,0x8004b4ea09a969d5,0x8004b4ea09a969d5,3 +np.float64,0xbfedbb01e63b7604,0xbfe7f4f0e84297df,3 +np.float64,0x3fe44454826888a9,0x3fe210ff6d005706,3 +np.float64,0xbfe0e77e6ea1cefd,0xbfdf1a977da33402,3 +np.float64,0xbfed6d4c8c3ada99,0xbfe7cb0932093f60,3 +np.float64,0x1d74cb9e3ae9a,0x1d74cb9e3ae9a,3 +np.float64,0x80082a785d1054f1,0x80082a785d1054f1,3 +np.float64,0x3fe58393266b0726,0x3fe2f0d8e91d4887,3 +np.float64,0xffe4028899680510,0xbff921fb54442d18,3 +np.float64,0x783a2e5af0746,0x783a2e5af0746,3 +np.float64,0x7fcdce88e73b9d11,0x3ff921fb54442d18,3 +np.float64,0x3fc58672a72b0ce5,0x3fc5535e090e56e2,3 +np.float64,0x800889c839b11391,0x800889c839b11391,3 +np.float64,0xffe5e05c466bc0b8,0xbff921fb54442d18,3 +np.float64,0xbfcbef6ebe37dedc,0xbfcb810752468f49,3 +np.float64,0xffe9408563b2810a,0xbff921fb54442d18,3 +np.float64,0xbfee4738367c8e70,0xbfe83f8e5dd7602f,3 +np.float64,0xbfe4aeb587295d6b,0xbfe25c7a0c76a454,3 +np.float64,0xffc9aea0a7335d40,0xbff921fb54442d18,3 +np.float64,0xe1e02199c3c04,0xe1e02199c3c04,3 +np.float64,0xbfbd9400783b2800,0xbfbd729345d1d14f,3 +np.float64,0x7a5418bcf4a84,0x7a5418bcf4a84,3 +np.float64,0x3fdc1c2fa5b83860,0x3fda7c935965ae72,3 +np.float64,0x80076a9f58ced53f,0x80076a9f58ced53f,3 +np.float64,0x3fedc4bf957b897f,0x3fe7fa2a83148f1c,3 +np.float64,0x800981b8a9d30372,0x800981b8a9d30372,3 +np.float64,0xffe1082311621046,0xbff921fb54442d18,3 +np.float64,0xe0091f89c0124,0xe0091f89c0124,3 +np.float64,0xbfce8d674f3d1ad0,0xbfcdfdbf2ddaa0ca,3 +np.float64,0x800516e72eaa2dcf,0x800516e72eaa2dcf,3 +np.float64,0xffe61ee64c6c3dcc,0xbff921fb54442d18,3 +np.float64,0x7fed2683cafa4d07,0x3ff921fb54442d18,3 +np.float64,0xffd4faf27729f5e4,0xbff921fb54442d18,3 +np.float64,0x7fe308fa842611f4,0x3ff921fb54442d18,3 +np.float64,0x3fc612a62b2c2550,0x3fc5db9ddbd4e159,3 +np.float64,0xbfe5b01e766b603d,0xbfe30f72a875e988,3 +np.float64,0x3fc2dd8b9a25bb17,0x3fc2bb06246b9f78,3 +np.float64,0x8170908102e12,0x8170908102e12,3 +np.float64,0x800c1c8a8a583915,0x800c1c8a8a583915,3 +np.float64,0xffe5d91e8b6bb23c,0xbff921fb54442d18,3 +np.float64,0xffd140adee22815c,0xbff921fb54442d18,3 +np.float64,0xbfe2f1f5f8e5e3ec,0xbfe11afa5d749952,3 +np.float64,0xbfed6d1d587ada3b,0xbfe7caef9ecf7651,3 +np.float64,0x3fe9b85e67f370bd,0x3fe5aa3474768982,3 +np.float64,0x7fdc8932edb91265,0x3ff921fb54442d18,3 +np.float64,0x7fd136bc54a26d78,0x3ff921fb54442d18,3 +np.float64,0x800a1ea12a343d43,0x800a1ea12a343d43,3 +np.float64,0x3fec6a5c1b78d4b8,0x3fe73c82235c3f8f,3 +np.float64,0x800fbf6a00df7ed4,0x800fbf6a00df7ed4,3 +np.float64,0xbfd0e6e0cda1cdc2,0xbfd0864bf8cad294,3 +np.float64,0x3fc716df482e2dbf,0x3fc6d7fbfd4a8470,3 +np.float64,0xbfe75990936eb321,0xbfe42bffec3fa0d7,3 +np.float64,0x3fd58e54a02b1ca9,0x3fd4cace1107a5cc,3 +np.float64,0xbfc9c04136338084,0xbfc9696ad2591d54,3 +np.float64,0xdd1f0147ba3e0,0xdd1f0147ba3e0,3 +np.float64,0x5c86a940b90e,0x5c86a940b90e,3 +np.float64,0xbfecae3b8e795c77,0xbfe7624d4988c612,3 +np.float64,0xffd0370595206e0c,0xbff921fb54442d18,3 +np.float64,0xbfdc26d443384da8,0xbfda857ecd33ba9f,3 +np.float64,0xbfd1c849d9a39094,0xbfd15849449cc378,3 +np.float64,0xffee04acdb3c0959,0xbff921fb54442d18,3 +np.float64,0xbfded1056dbda20a,0xbfdcb83b30e1528c,3 +np.float64,0x7fb7b826622f704c,0x3ff921fb54442d18,3 +np.float64,0xbfee4df8ae7c9bf1,0xbfe8431df9dfd05d,3 +np.float64,0x7fe7f3670e2fe6cd,0x3ff921fb54442d18,3 +np.float64,0x8008ac9ae0d15936,0x8008ac9ae0d15936,3 +np.float64,0x800dce9f3b3b9d3f,0x800dce9f3b3b9d3f,3 +np.float64,0x7fbb19db203633b5,0x3ff921fb54442d18,3 +np.float64,0x3fe56c7f302ad8fe,0x3fe2e0eec3ad45fd,3 +np.float64,0x7fe82c05c570580b,0x3ff921fb54442d18,3 +np.float64,0xc0552b7780aa6,0xc0552b7780aa6,3 +np.float64,0x39d40e3073a83,0x39d40e3073a83,3 +np.float64,0x3fd8db54d731b6aa,0x3fd7b589b3ee9b20,3 +np.float64,0xffcdd355233ba6ac,0xbff921fb54442d18,3 +np.float64,0x3fbe97b3a43d2f67,0x3fbe72bca9be0348,3 +np.float64,0xbff0000000000000,0xbfe921fb54442d18,3 +np.float64,0xbfb4f55e6229eac0,0xbfb4e96df18a75a7,3 +np.float64,0xbfc66399ba2cc734,0xbfc62a3298bd96fc,3 +np.float64,0x3fd00988bb201311,0x3fcf6d67a9374c38,3 +np.float64,0x7fe471867d28e30c,0x3ff921fb54442d18,3 +np.float64,0xbfe38e0e64271c1d,0xbfe18d9888b7523b,3 +np.float64,0x8009dc127573b825,0x8009dc127573b825,3 +np.float64,0x800047bde4608f7d,0x800047bde4608f7d,3 +np.float64,0xffeede42c77dbc85,0xbff921fb54442d18,3 +np.float64,0xd8cf6d13b19ee,0xd8cf6d13b19ee,3 +np.float64,0xbfd08fb302a11f66,0xbfd034b1f8235e23,3 +np.float64,0x7fdb404c0b368097,0x3ff921fb54442d18,3 +np.float64,0xbfd6ba0438ad7408,0xbfd5d673e3276ec1,3 +np.float64,0xffd9568027b2ad00,0xbff921fb54442d18,3 +np.float64,0xbfb313b73e262770,0xbfb30ab4acb4fa67,3 +np.float64,0xbfe2dc1a15e5b834,0xbfe10ac5f8f3acd3,3 +np.float64,0xbfee426bf4bc84d8,0xbfe83d061df91edd,3 +np.float64,0xd9142c2fb2286,0xd9142c2fb2286,3 +np.float64,0x7feb0d11dff61a23,0x3ff921fb54442d18,3 +np.float64,0x800fea5b509fd4b7,0x800fea5b509fd4b7,3 +np.float64,0x3fe1a8818da35103,0x3fe022ba1bdf366e,3 +np.float64,0x8010000000000000,0x8010000000000000,3 +np.float64,0xbfd8fc6de6b1f8dc,0xbfd7d24726ed8dcc,3 +np.float64,0xf4b3dc2de967c,0xf4b3dc2de967c,3 +np.float64,0x8af0409b15e08,0x8af0409b15e08,3 +np.float64,0x3fb21e6934243cd2,0x3fb216b065f8709a,3 +np.float64,0x3fc53069392a60d2,0x3fc4ffa931211fb9,3 +np.float64,0xffc955812c32ab04,0xbff921fb54442d18,3 +np.float64,0xbfe3de42b1a7bc86,0xbfe1c7bd1324de75,3 +np.float64,0x1dc149a03b82a,0x1dc149a03b82a,3 +np.float64,0x8001bc5a24a378b5,0x8001bc5a24a378b5,3 +np.float64,0x3da14c407b44,0x3da14c407b44,3 +np.float64,0x80025e8da924bd1c,0x80025e8da924bd1c,3 +np.float64,0xbfcb0141c9360284,0xbfca9d572ea5e1f3,3 +np.float64,0xc90036fd92007,0xc90036fd92007,3 +np.float64,0x138312c427063,0x138312c427063,3 +np.float64,0x800dda3a963bb475,0x800dda3a963bb475,3 +np.float64,0x3fe9339934f26732,0x3fe558e723291f78,3 +np.float64,0xbfea8357027506ae,0xbfe6240826faaf48,3 +np.float64,0x7fe04735cae08e6b,0x3ff921fb54442d18,3 +np.float64,0x3fe29aca3c653594,0x3fe0da214c8bc6a4,3 +np.float64,0x3fbe1f09a03c3e13,0x3fbdfbbefef0155b,3 +np.float64,0x816ee4ad02ddd,0x816ee4ad02ddd,3 +np.float64,0xffddd1b31d3ba366,0xbff921fb54442d18,3 +np.float64,0x3fe2e01e0625c03c,0x3fe10dc0bd6677c2,3 +np.float64,0x3fec6bcf1978d79e,0x3fe73d518cddeb7c,3 +np.float64,0x7fe01aaaf8603555,0x3ff921fb54442d18,3 +np.float64,0xdf300cc5be602,0xdf300cc5be602,3 +np.float64,0xbfe71c01a36e3804,0xbfe403af80ce47b8,3 +np.float64,0xffa5be00ac2b7c00,0xbff921fb54442d18,3 +np.float64,0xbfda9ba711b5374e,0xbfd93775e3ac6bda,3 +np.float64,0xbfe56d8a27eadb14,0xbfe2e1a7185e8e6d,3 +np.float64,0x800f1bc937be3792,0x800f1bc937be3792,3 +np.float64,0x800a61d93c74c3b3,0x800a61d93c74c3b3,3 +np.float64,0x7fe71a52fcae34a5,0x3ff921fb54442d18,3 +np.float64,0x7fb4aef256295de4,0x3ff921fb54442d18,3 +np.float64,0x3fe6c1e861ed83d1,0x3fe3c828f281a7ef,3 +np.float64,0x3fba128402342508,0x3fb9fb94cf141860,3 +np.float64,0x3fee55a7ecfcab50,0x3fe8472a9af893ee,3 +np.float64,0x3fe586f31b2b0de6,0x3fe2f32bce9e91bc,3 +np.float64,0xbfbb1d1442363a28,0xbfbb034c7729d5f2,3 +np.float64,0xc78b4d3f8f16a,0xc78b4d3f8f16a,3 +np.float64,0x7fdbc277d4b784ef,0x3ff921fb54442d18,3 +np.float64,0xbfa728ca2c2e5190,0xbfa724c04e73ccbd,3 +np.float64,0x7fefc7b2143f8f63,0x3ff921fb54442d18,3 +np.float64,0x3fd153a3dda2a748,0x3fd0ebccd33a4dca,3 +np.float64,0xbfe18a6eace314de,0xbfe00ba32ec89d30,3 +np.float64,0x7feef518537dea30,0x3ff921fb54442d18,3 +np.float64,0x8005f007cd4be010,0x8005f007cd4be010,3 +np.float64,0x7fd890b840b12170,0x3ff921fb54442d18,3 +np.float64,0x7feed0582ebda0af,0x3ff921fb54442d18,3 +np.float64,0x1013f53220280,0x1013f53220280,3 +np.float64,0xbfe77273986ee4e7,0xbfe43c375a8bf6de,3 +np.float64,0x7fe3ab8918675711,0x3ff921fb54442d18,3 +np.float64,0xbfc6ad515b2d5aa4,0xbfc671b2f7f86624,3 +np.float64,0x7fcd86231d3b0c45,0x3ff921fb54442d18,3 +np.float64,0xffe2523299a4a464,0xbff921fb54442d18,3 +np.float64,0x7fcadc5a1b35b8b3,0x3ff921fb54442d18,3 +np.float64,0x3fe5e020c4ebc042,0x3fe330418eec75bd,3 +np.float64,0x7fe332a9dc266553,0x3ff921fb54442d18,3 +np.float64,0xfa11dc21f425,0xfa11dc21f425,3 +np.float64,0xbec800177d900,0xbec800177d900,3 +np.float64,0x3fcadd057835ba0b,0x3fca7aa42face8bc,3 +np.float64,0xbfe6b9a206ad7344,0xbfe3c2a9719803de,3 +np.float64,0x3fbb4250b63684a0,0x3fbb281e9cefc519,3 +np.float64,0x7fef8787517f0f0e,0x3ff921fb54442d18,3 +np.float64,0x8001315c2d6262b9,0x8001315c2d6262b9,3 +np.float64,0xbfd94e3cf2b29c7a,0xbfd819257d36f56c,3 +np.float64,0xf1f325abe3e65,0xf1f325abe3e65,3 +np.float64,0x7fd6c07079ad80e0,0x3ff921fb54442d18,3 +np.float64,0x7fe328b075a65160,0x3ff921fb54442d18,3 +np.float64,0x7fe7998f812f331e,0x3ff921fb54442d18,3 +np.float64,0xffe026bb65604d76,0xbff921fb54442d18,3 +np.float64,0xffd6c06de8ad80dc,0xbff921fb54442d18,3 +np.float64,0x3fcd5a37bf3ab46f,0x3fccda82935d98ce,3 +np.float64,0xffc3e5a45227cb48,0xbff921fb54442d18,3 +np.float64,0x3febf7dd8177efbc,0x3fe6fc0bb999883e,3 +np.float64,0x7fd7047ea92e08fc,0x3ff921fb54442d18,3 +np.float64,0x35b3fc406b680,0x35b3fc406b680,3 +np.float64,0x7fd52e97632a5d2e,0x3ff921fb54442d18,3 +np.float64,0x3fd464d401a8c9a8,0x3fd3be2967fc97c3,3 +np.float64,0x800e815b2ebd02b6,0x800e815b2ebd02b6,3 +np.float64,0x3fca8428af350850,0x3fca257b466b8970,3 +np.float64,0x8007b7526f6f6ea6,0x8007b7526f6f6ea6,3 +np.float64,0x82f60a8f05ec2,0x82f60a8f05ec2,3 +np.float64,0x3fb71a5d0a2e34c0,0x3fb70a629ef8e2a2,3 +np.float64,0x7fc8570c7d30ae18,0x3ff921fb54442d18,3 +np.float64,0x7fe5528e77eaa51c,0x3ff921fb54442d18,3 +np.float64,0xffc20dbbf1241b78,0xbff921fb54442d18,3 +np.float64,0xeb13368fd6267,0xeb13368fd6267,3 +np.float64,0x7fe7d529056faa51,0x3ff921fb54442d18,3 +np.float64,0x3fecd02eabf9a05d,0x3fe77516f0ba1ac4,3 +np.float64,0x800fcba6a09f974d,0x800fcba6a09f974d,3 +np.float64,0x7fe7e8e015afd1bf,0x3ff921fb54442d18,3 +np.float64,0xbfd271a382a4e348,0xbfd1f513a191c595,3 +np.float64,0x9f1014013e21,0x9f1014013e21,3 +np.float64,0x3fc05da47f20bb49,0x3fc04708a13a3a47,3 +np.float64,0x3fe0f427dda1e850,0x3fdf2e60ba8678b9,3 +np.float64,0xbfecb29fa539653f,0xbfe764bc791c45dd,3 +np.float64,0x45881ec68b104,0x45881ec68b104,3 +np.float64,0x8000000000000001,0x8000000000000001,3 +np.float64,0x3fe9c67ee1338cfe,0x3fe5b2c7b3df6ce8,3 +np.float64,0x7fedb8fef6bb71fd,0x3ff921fb54442d18,3 +np.float64,0x3fe54f6aaaea9ed6,0x3fe2ccd1df2abaa9,3 +np.float64,0x7feff58a1bbfeb13,0x3ff921fb54442d18,3 +np.float64,0x7fe3b62827276c4f,0x3ff921fb54442d18,3 +np.float64,0x3fe5feb682ebfd6d,0x3fe345105bc6d980,3 +np.float64,0x3fe49f38d9693e72,0x3fe2518b2824757f,3 +np.float64,0x8006bfd27c6d7fa6,0x8006bfd27c6d7fa6,3 +np.float64,0x3fc13409e2226814,0x3fc119ce0c01a5a2,3 +np.float64,0x95f8c7212bf19,0x95f8c7212bf19,3 +np.float64,0x3fd9f0fa6133e1f5,0x3fd8a567515edecf,3 +np.float64,0x3fef95cbe5ff2b98,0x3fe8ec88c768ba0b,3 +np.float64,0x3fbed28bba3da510,0x3fbeacbf136e51c2,3 +np.float64,0xbfd3987aeca730f6,0xbfd303fca58e3e60,3 +np.float64,0xbfed0f90cbfa1f22,0xbfe797f59249410d,3 +np.float64,0xffe55d8cbf2abb19,0xbff921fb54442d18,3 +np.float64,0x3feb4d9fc6769b40,0x3fe69a88131a1f1f,3 +np.float64,0x80085569acd0aad4,0x80085569acd0aad4,3 +np.float64,0x20557a6e40ab0,0x20557a6e40ab0,3 +np.float64,0x3fead2fd5df5a5fb,0x3fe653091f33b27f,3 +np.float64,0x3fe7b9983eaf7330,0x3fe46a50c4b5235e,3 +np.float64,0xffdad237ffb5a470,0xbff921fb54442d18,3 +np.float64,0xbfe5cc39a4eb9874,0xbfe322ad3a903f93,3 +np.float64,0x800ad6eecb35adde,0x800ad6eecb35adde,3 +np.float64,0xffec620f6438c41e,0xbff921fb54442d18,3 +np.float64,0xbfe5ef29122bde52,0xbfe33a7dfcc255e2,3 +np.float64,0x3fd451e7d0a8a3d0,0x3fd3acfa4939af10,3 +np.float64,0x8003ea93c127d528,0x8003ea93c127d528,3 +np.float64,0x800b48d37c9691a7,0x800b48d37c9691a7,3 +np.float64,0x3fe7e202acafc405,0x3fe484558246069b,3 +np.float64,0x80070c9b686e1938,0x80070c9b686e1938,3 +np.float64,0xbfda90bbc6352178,0xbfd92e25fcd12288,3 +np.float64,0x800e1ffebb1c3ffe,0x800e1ffebb1c3ffe,3 +np.float64,0x3ff0000000000000,0x3fe921fb54442d18,3 +np.float64,0xffd8cfdd46319fba,0xbff921fb54442d18,3 +np.float64,0x7fd8cd4182319a82,0x3ff921fb54442d18,3 +np.float64,0x3fed8bb778bb176f,0x3fe7db7c77c4c694,3 +np.float64,0x3fc74a70302e94e0,0x3fc709e95d6defec,3 +np.float64,0x3fe87269d070e4d4,0x3fe4e04bcc4a2137,3 +np.float64,0x7fb48223f6290447,0x3ff921fb54442d18,3 +np.float64,0xffe8ec444b71d888,0xbff921fb54442d18,3 +np.float64,0x7fde17d280bc2fa4,0x3ff921fb54442d18,3 +np.float64,0x3fd1cbde01a397bc,0x3fd15b9bb7b3147b,3 +np.float64,0x800883a64451074d,0x800883a64451074d,3 +np.float64,0x7fe3160a3f262c13,0x3ff921fb54442d18,3 +np.float64,0xbfe051d4d9a0a3aa,0xbfde2ecf14dc75fb,3 +np.float64,0xbfd89de689b13bce,0xbfd780176d1a28a3,3 +np.float64,0x3fecde2bf779bc58,0x3fe77ccf10bdd8e2,3 +np.float64,0xffe75774dc6eaee9,0xbff921fb54442d18,3 +np.float64,0x7fe834414d706882,0x3ff921fb54442d18,3 +np.float64,0x1,0x1,3 +np.float64,0xbfea5e4e4a74bc9c,0xbfe60e0601711835,3 +np.float64,0xffec248d4cb8491a,0xbff921fb54442d18,3 +np.float64,0xffd9942c2c332858,0xbff921fb54442d18,3 +np.float64,0xa9db36a553b67,0xa9db36a553b67,3 +np.float64,0x7fec630718b8c60d,0x3ff921fb54442d18,3 +np.float64,0xbfd062188f20c432,0xbfd009ecd652be89,3 +np.float64,0x8001b84e3023709d,0x8001b84e3023709d,3 +np.float64,0xbfe9e26d7cb3c4db,0xbfe5c3b157ecf668,3 +np.float64,0xbfef66ddf33ecdbc,0xbfe8d4b1f6410a24,3 +np.float64,0x3fd8d7109431ae21,0x3fd7b1d4860719a2,3 +np.float64,0xffee0f53107c1ea5,0xbff921fb54442d18,3 +np.float64,0x80000b4fd60016a0,0x80000b4fd60016a0,3 +np.float64,0xbfd99ff6e5333fee,0xbfd85fb3cbdaa049,3 +np.float64,0xbfe9cfd268339fa5,0xbfe5b86ef021a1b1,3 +np.float64,0xe32eace1c65d6,0xe32eace1c65d6,3 +np.float64,0xffc81f6627303ecc,0xbff921fb54442d18,3 +np.float64,0x7fe98dadde331b5b,0x3ff921fb54442d18,3 +np.float64,0xbfbcebd11e39d7a0,0xbfbccc8ec47883c7,3 +np.float64,0x7fe164880f22c90f,0x3ff921fb54442d18,3 +np.float64,0x800467c0cae8cf82,0x800467c0cae8cf82,3 +np.float64,0x800071e4b140e3ca,0x800071e4b140e3ca,3 +np.float64,0xbfc87a7eae30f4fc,0xbfc82fbc55bb0f24,3 +np.float64,0xffb2e0e23225c1c8,0xbff921fb54442d18,3 +np.float64,0x20ef338041df,0x20ef338041df,3 +np.float64,0x7fe6de71ca6dbce3,0x3ff921fb54442d18,3 +np.float64,0x5d1fa026ba3f5,0x5d1fa026ba3f5,3 +np.float64,0xffd112a9ce222554,0xbff921fb54442d18,3 +np.float64,0x3fb351f66626a3ed,0x3fb3489ab578c452,3 +np.float64,0x7fef7b2bd3bef657,0x3ff921fb54442d18,3 +np.float64,0xffe144f5d4e289eb,0xbff921fb54442d18,3 +np.float64,0xffd63a6750ac74ce,0xbff921fb54442d18,3 +np.float64,0x7fd2d8bb25a5b175,0x3ff921fb54442d18,3 +np.float64,0x3fec5920a078b242,0x3fe732dcffcf6521,3 +np.float64,0x80009a8b7f813518,0x80009a8b7f813518,3 +np.float64,0x3fdea220893d4441,0x3fdc921edf6bf3d8,3 +np.float64,0x8006cee2208d9dc5,0x8006cee2208d9dc5,3 +np.float64,0xdd0b0081ba17,0xdd0b0081ba17,3 +np.float64,0x7ff4000000000000,0x7ffc000000000000,3 +np.float64,0xbfdac33955358672,0xbfd9592bce7daf1f,3 +np.float64,0x7fe8301d7170603a,0x3ff921fb54442d18,3 +np.float64,0xbfc1d34d8523a69c,0xbfc1b62449af9684,3 +np.float64,0x800c62239458c447,0x800c62239458c447,3 +np.float64,0xffd398c009a73180,0xbff921fb54442d18,3 +np.float64,0xbfe0c6d9ee218db4,0xbfdee777557f4401,3 +np.float64,0x3feccdd373799ba7,0x3fe773c9c2263f89,3 +np.float64,0xbfd21898bda43132,0xbfd1a2be8545fcc5,3 +np.float64,0x3fd77019b62ee033,0x3fd67793cabdf267,3 +np.float64,0x7fa609cad42c1395,0x3ff921fb54442d18,3 +np.float64,0x7fb4eaea5a29d5d4,0x3ff921fb54442d18,3 +np.float64,0x3fc570dc9a2ae1b9,0x3fc53e5f6218a799,3 +np.float64,0x800344ae8466895e,0x800344ae8466895e,3 +np.float64,0xbfc7c985252f930c,0xbfc784d60fa27bac,3 +np.float64,0xffaa2929fc345250,0xbff921fb54442d18,3 +np.float64,0xffe63e5ee9ac7cbe,0xbff921fb54442d18,3 +np.float64,0x73f0280ce7e06,0x73f0280ce7e06,3 +np.float64,0xffc525f8822a4bf0,0xbff921fb54442d18,3 +np.float64,0x7fd744d00aae899f,0x3ff921fb54442d18,3 +np.float64,0xbfe0fe590761fcb2,0xbfdf3e493e8b1f32,3 +np.float64,0xfae04ae7f5c0a,0xfae04ae7f5c0a,3 +np.float64,0xef821939df043,0xef821939df043,3 +np.float64,0x7fef6135843ec26a,0x3ff921fb54442d18,3 +np.float64,0xbfebf34dcbf7e69c,0xbfe6f97588a8f911,3 +np.float64,0xbfeec0b498fd8169,0xbfe87f2eceeead12,3 +np.float64,0x7fb67161b42ce2c2,0x3ff921fb54442d18,3 +np.float64,0x3fdcfd998639fb33,0x3fdb38934927c096,3 +np.float64,0xffda5960bc34b2c2,0xbff921fb54442d18,3 +np.float64,0xbfe11f8c71223f19,0xbfdf71fe770c96ab,3 +np.float64,0x3fe4ac1bab695838,0x3fe25aa4517b8322,3 +np.float64,0x3f730458a02608b1,0x3f73044fabb5e999,3 +np.float64,0x3fdb14ffcdb62a00,0x3fd99ea6c241a3ed,3 +np.float64,0xbfc93208cd326410,0xbfc8e09d78b6d4db,3 +np.float64,0x19e734dc33ce8,0x19e734dc33ce8,3 +np.float64,0x3fe5e98428abd308,0x3fe336a6a085eb55,3 +np.float64,0x7fec672a1378ce53,0x3ff921fb54442d18,3 +np.float64,0x800f8bd8d4ff17b2,0x800f8bd8d4ff17b2,3 +np.float64,0xbfe5a12e4e6b425c,0xbfe30533f99d5d06,3 +np.float64,0x75a34cb0eb46a,0x75a34cb0eb46a,3 +np.float64,0x7fe1d21d16a3a439,0x3ff921fb54442d18,3 +np.float64,0x7ff0000000000000,0x3ff921fb54442d18,3 +np.float64,0xffe0f50db261ea1b,0xbff921fb54442d18,3 +np.float64,0xbfd9dc22feb3b846,0xbfd8937ec965a501,3 +np.float64,0x8009d68e48d3ad1d,0x8009d68e48d3ad1d,3 +np.float64,0xbfe2eba620e5d74c,0xbfe1164d7d273c60,3 +np.float64,0x992efa09325e0,0x992efa09325e0,3 +np.float64,0x3fdab640ea356c82,0x3fd94e20cab88db2,3 +np.float64,0x69a6f04ad34df,0x69a6f04ad34df,3 +np.float64,0x3fe397df25272fbe,0x3fe194bd1a3a6192,3 +np.float64,0xebcce9fdd799d,0xebcce9fdd799d,3 +np.float64,0x3fbb49490c369292,0x3fbb2f02eccc497d,3 +np.float64,0xffd871f980b0e3f4,0xbff921fb54442d18,3 +np.float64,0x800348f6966691ee,0x800348f6966691ee,3 +np.float64,0xbfebc270a7f784e1,0xbfe6dda8d0d80f26,3 +np.float64,0xffd6d559b1adaab4,0xbff921fb54442d18,3 +np.float64,0x3fec3635c0b86c6c,0x3fe71f420256e43e,3 +np.float64,0x7fbc82ad7039055a,0x3ff921fb54442d18,3 +np.float64,0x7f873050602e60a0,0x3ff921fb54442d18,3 +np.float64,0x3fca44b8c3348970,0x3fc9e8a1a1a2d96e,3 +np.float64,0x3fe0fc308fe1f861,0x3fdf3aeb469ea225,3 +np.float64,0x7fefc27de8bf84fb,0x3ff921fb54442d18,3 +np.float64,0x8005f3f3916be7e8,0x8005f3f3916be7e8,3 +np.float64,0xbfd4278c7c284f18,0xbfd38678988873b6,3 +np.float64,0x435eafc486bd7,0x435eafc486bd7,3 +np.float64,0xbfd01f5199203ea4,0xbfcf96631f2108a3,3 +np.float64,0xffd5ee9185abdd24,0xbff921fb54442d18,3 +np.float64,0xffedb363257b66c5,0xbff921fb54442d18,3 +np.float64,0x800d68e6e11ad1ce,0x800d68e6e11ad1ce,3 +np.float64,0xbfcf687f8e3ed100,0xbfceccb771b0d39a,3 +np.float64,0x7feb3b9ef2f6773d,0x3ff921fb54442d18,3 +np.float64,0x3fe15ec5ca62bd8c,0x3fdfd3fab9d96f81,3 +np.float64,0x10000000000000,0x10000000000000,3 +np.float64,0xd2386f81a470e,0xd2386f81a470e,3 +np.float64,0xb9feed4573fde,0xb9feed4573fde,3 +np.float64,0x3fe7ed25c9efda4c,0x3fe48b7b72db4014,3 +np.float64,0xbfe01478726028f1,0xbfddcd1f5a2efc59,3 +np.float64,0x9946d02f328da,0x9946d02f328da,3 +np.float64,0xbfe3bb67f06776d0,0xbfe1ae88aa81c5a6,3 +np.float64,0xbfd3fd8a4c27fb14,0xbfd3603982e3b78d,3 +np.float64,0xffd5c3ab912b8758,0xbff921fb54442d18,3 +np.float64,0xffd5f502b12bea06,0xbff921fb54442d18,3 +np.float64,0xbfc64981ec2c9304,0xbfc610e0382b1fa6,3 +np.float64,0xffec42e3413885c6,0xbff921fb54442d18,3 +np.float64,0x80084eb4ed109d6a,0x80084eb4ed109d6a,3 +np.float64,0xbfd17cac9fa2f95a,0xbfd112020588a4b3,3 +np.float64,0xbfd06c1359a0d826,0xbfd0134a28aa9a66,3 +np.float64,0x7fdc3d7c03b87af7,0x3ff921fb54442d18,3 +np.float64,0x7bdf5aaaf7bec,0x7bdf5aaaf7bec,3 +np.float64,0xbfee3cd966fc79b3,0xbfe83a14bc07ac3b,3 +np.float64,0x7fec910da3f9221a,0x3ff921fb54442d18,3 +np.float64,0xffb4ea667029d4d0,0xbff921fb54442d18,3 +np.float64,0x800103d7cce207b0,0x800103d7cce207b0,3 +np.float64,0x7fbb229a6c364534,0x3ff921fb54442d18,3 +np.float64,0x0,0x0,3 +np.float64,0xffd8fccd0331f99a,0xbff921fb54442d18,3 +np.float64,0xbfd0784ae1a0f096,0xbfd01ebff62e39ad,3 +np.float64,0xbfed2ec9b3ba5d93,0xbfe7a9099410bc76,3 +np.float64,0x800690b8d16d2172,0x800690b8d16d2172,3 +np.float64,0x7fc061b26520c364,0x3ff921fb54442d18,3 +np.float64,0x8007ec47054fd88f,0x8007ec47054fd88f,3 +np.float64,0x775546b6eeaa9,0x775546b6eeaa9,3 +np.float64,0x8005e00fb56bc020,0x8005e00fb56bc020,3 +np.float64,0xbfe510f8d0ea21f2,0xbfe2a16862b5a37f,3 +np.float64,0xffd87a6bf3b0f4d8,0xbff921fb54442d18,3 +np.float64,0x800906e3d0520dc8,0x800906e3d0520dc8,3 +np.float64,0x2296f000452f,0x2296f000452f,3 +np.float64,0xbfe3189fa2e63140,0xbfe1378c0e005be4,3 +np.float64,0xb4d2447f69a49,0xb4d2447f69a49,3 +np.float64,0xffd056a24a20ad44,0xbff921fb54442d18,3 +np.float64,0xbfe3b23fe4e76480,0xbfe1a7e5840fcbeb,3 +np.float64,0x80018ee270831dc6,0x80018ee270831dc6,3 +np.float64,0x800df89f245bf13e,0x800df89f245bf13e,3 +np.float64,0x3fee1409d7bc2814,0x3fe824779d133232,3 +np.float64,0xbfef8d81667f1b03,0xbfe8e85523620368,3 +np.float64,0xffd8a6519b314ca4,0xbff921fb54442d18,3 +np.float64,0x7fc7bc86f32f790d,0x3ff921fb54442d18,3 +np.float64,0xffea6159e674c2b3,0xbff921fb54442d18,3 +np.float64,0x3fe153c3fba2a788,0x3fdfc2f74769d300,3 +np.float64,0xffc4261ef3284c3c,0xbff921fb54442d18,3 +np.float64,0x7fe8a8961ff1512b,0x3ff921fb54442d18,3 +np.float64,0xbfe3fb1fd167f640,0xbfe1dc89dcb7ecdf,3 +np.float64,0x3fd88577c2b10af0,0x3fd76acc09660704,3 +np.float64,0x3fe128ec27e251d8,0x3fdf808fc7ebcd8f,3 +np.float64,0xbfed6ca7c4fad950,0xbfe7caafe9a3e213,3 +np.float64,0xbf9a3912b8347220,0xbf9a379b3349352e,3 +np.float64,0xbfd724d7bcae49b0,0xbfd6351efa2a5fc5,3 +np.float64,0xbfed59700a7ab2e0,0xbfe7c043014c694c,3 +np.float64,0x8002ad435bc55a87,0x8002ad435bc55a87,3 +np.float64,0xffe46ed345a8dda6,0xbff921fb54442d18,3 +np.float64,0x7fd2f1d1d825e3a3,0x3ff921fb54442d18,3 +np.float64,0xbfea0265e23404cc,0xbfe5d6fb3fd30464,3 +np.float64,0xbfd17e049122fc0a,0xbfd113421078bbae,3 +np.float64,0xffea03b986b40772,0xbff921fb54442d18,3 +np.float64,0x800b55331a16aa67,0x800b55331a16aa67,3 +np.float64,0xbfc6fcafbf2df960,0xbfc6be9ecd0ebc1f,3 +np.float64,0xd6a36017ad46c,0xd6a36017ad46c,3 +np.float64,0xbfe9ba86dfb3750e,0xbfe5ab840cb0ef86,3 +np.float64,0x75c4a108eb895,0x75c4a108eb895,3 +np.float64,0x8008d6bc8051ad79,0x8008d6bc8051ad79,3 +np.float64,0xbfd3dc5984a7b8b4,0xbfd341f78e0528ec,3 +np.float64,0xffe1cbb01aa39760,0xbff921fb54442d18,3 +np.float64,0x3fc7e292f52fc526,0x3fc79d0ce9365767,3 +np.float64,0xbfcbeae2bd37d5c4,0xbfcb7cb034f82467,3 +np.float64,0x8000f0c62e21e18d,0x8000f0c62e21e18d,3 +np.float64,0xbfe23d8bc6247b18,0xbfe09418ee35c3c7,3 +np.float64,0x717394bae2e73,0x717394bae2e73,3 +np.float64,0xffa2ef1cc425de40,0xbff921fb54442d18,3 +np.float64,0x3fd938c229b27184,0x3fd806900735c99d,3 +np.float64,0x800bf3ec8a77e7d9,0x800bf3ec8a77e7d9,3 +np.float64,0xffeef41dd57de83b,0xbff921fb54442d18,3 +np.float64,0x8008df97e5b1bf30,0x8008df97e5b1bf30,3 +np.float64,0xffe9ab9d0db35739,0xbff921fb54442d18,3 +np.float64,0x99ff391333fe7,0x99ff391333fe7,3 +np.float64,0x3fb864b4a630c969,0x3fb851e883ea2cf9,3 +np.float64,0x22c1230a45825,0x22c1230a45825,3 +np.float64,0xff2336fbfe467,0xff2336fbfe467,3 +np.float64,0xbfd488f4cea911ea,0xbfd3def0490f5414,3 +np.float64,0x3fa379c78426f38f,0x3fa377607370800b,3 +np.float64,0xbfb0873302210e68,0xbfb08155b78dfd53,3 +np.float64,0xbfdf9ff7c2bf3ff0,0xbfdd5f658e357ad2,3 +np.float64,0x800978719192f0e4,0x800978719192f0e4,3 +np.float64,0xbfba8759ea350eb0,0xbfba6f325013b9e5,3 +np.float64,0xbfdd3e6b06ba7cd6,0xbfdb6e472b6091b0,3 +np.float64,0x7fe0c334a7a18668,0x3ff921fb54442d18,3 +np.float64,0xbfeb971feb772e40,0xbfe6c4e0f61404d1,3 +np.float64,0x3fe2a50968e54a13,0x3fe0e1c8b8d96e85,3 +np.float64,0x800fa9c5515f538b,0x800fa9c5515f538b,3 +np.float64,0x800f8532fbbf0a66,0x800f8532fbbf0a66,3 +np.float64,0x167d6f1e2cfaf,0x167d6f1e2cfaf,3 +np.float64,0xffee88e769fd11ce,0xbff921fb54442d18,3 +np.float64,0xbfeecc8529fd990a,0xbfe885520cdad8ea,3 +np.float64,0xffefffffffffffff,0xbff921fb54442d18,3 +np.float64,0xbfef6a566afed4ad,0xbfe8d6767b4c4235,3 +np.float64,0xffec12415af82482,0xbff921fb54442d18,3 +np.float64,0x3678a20a6cf15,0x3678a20a6cf15,3 +np.float64,0xffe468d54ee8d1aa,0xbff921fb54442d18,3 +np.float64,0x800ad6006795ac01,0x800ad6006795ac01,3 +np.float64,0x8001d5b61063ab6d,0x8001d5b61063ab6d,3 +np.float64,0x800dfcd1863bf9a3,0x800dfcd1863bf9a3,3 +np.float64,0xc9fbff6f93f80,0xc9fbff6f93f80,3 +np.float64,0xffe55c20f9eab842,0xbff921fb54442d18,3 +np.float64,0xbfcb596b6536b2d8,0xbfcaf1b339c5c615,3 +np.float64,0xbfe092689ea124d1,0xbfde94fa58946e51,3 +np.float64,0x3fe9ec733af3d8e6,0x3fe5c9bf5dee2623,3 +np.float64,0x3fe30f3d83261e7b,0x3fe1309fd6620e03,3 +np.float64,0xffd31d7f84263b00,0xbff921fb54442d18,3 +np.float64,0xbfe88d2d3e711a5a,0xbfe4f12b5a136178,3 +np.float64,0xffc81e4ce1303c98,0xbff921fb54442d18,3 +np.float64,0xffe5b96ebfab72dd,0xbff921fb54442d18,3 +np.float64,0x512f0502a25e1,0x512f0502a25e1,3 +np.float64,0x7fa3a376982746ec,0x3ff921fb54442d18,3 +np.float64,0x80005b5f2f60b6bf,0x80005b5f2f60b6bf,3 +np.float64,0xc337cc69866fa,0xc337cc69866fa,3 +np.float64,0x3fe7719c4caee339,0x3fe43bab42b19e64,3 +np.float64,0x7fde7ec1d93cfd83,0x3ff921fb54442d18,3 +np.float64,0x3fd2f38f3825e71e,0x3fd26cc7b1dd0acb,3 +np.float64,0x7fce298b993c5316,0x3ff921fb54442d18,3 +np.float64,0x56ae3b2cad5c8,0x56ae3b2cad5c8,3 +np.float64,0x3fe9299f2bf2533e,0x3fe552bddd999e72,3 +np.float64,0x7feff3a4823fe748,0x3ff921fb54442d18,3 +np.float64,0xbfd05c670aa0b8ce,0xbfd00494d78e9e97,3 +np.float64,0xffe745323eae8a64,0xbff921fb54442d18,3 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arctanh.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arctanh.csv new file mode 100644 index 0000000..a655269 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-arctanh.csv @@ -0,0 +1,1429 @@ +dtype,input,output,ulperrortol +np.float32,0x3ee82930,0x3efa60fd,2 +np.float32,0x3f0aa640,0x3f1b3e13,2 +np.float32,0x3ec1a21c,0x3ecbbf8d,2 +np.float32,0x3cdb1740,0x3cdb24a1,2 +np.float32,0xbf28b6f3,0xbf4a86ac,2 +np.float32,0xbe490dcc,0xbe4bb2eb,2 +np.float32,0x80000001,0x80000001,2 +np.float32,0xbf44f9dd,0xbf826ce1,2 +np.float32,0xbf1d66c4,0xbf37786b,2 +np.float32,0x3f0ad26a,0x3f1b7c9b,2 +np.float32,0x3f7b6c54,0x4016aab0,2 +np.float32,0xbf715bb8,0xbfe1a0bc,2 +np.float32,0xbee8a562,0xbefafd6a,2 +np.float32,0x3db94d00,0x3db9cf16,2 +np.float32,0x3ee2970c,0x3ef368b3,2 +np.float32,0x3f3f8614,0x3f77fdca,2 +np.float32,0xbf1fb5f0,0xbf3b3789,2 +np.float32,0x3f798dc0,0x400b96bb,2 +np.float32,0x3e975d64,0x3e9c0573,2 +np.float32,0xbe3f1908,0xbe415d1f,2 +np.float32,0x3f2cea38,0x3f52192e,2 +np.float32,0x3e82f1ac,0x3e85eaa1,2 +np.float32,0x3eab6b30,0x3eb24acd,2 +np.float32,0xbe9bb90c,0xbea0cf5f,2 +np.float32,0xbf43e847,0xbf81202f,2 +np.float32,0xbd232fa0,0xbd2345c0,2 +np.float32,0xbbabbc00,0xbbabbc67,2 +np.float32,0xbf0b2975,0xbf1bf808,2 +np.float32,0xbef5ab0a,0xbf05d305,2 +np.float32,0x3f2cad16,0x3f51a8e2,2 +np.float32,0xbef75940,0xbf06eb08,2 +np.float32,0xbf0c1216,0xbf1d4325,2 +np.float32,0x3e7bdc08,0x3e8090c2,2 +np.float32,0x3da14e10,0x3da1a3c5,2 +np.float32,0x3f627412,0x3fb2bf21,2 +np.float32,0xbd6d08c0,0xbd6d4ca0,2 +np.float32,0x3f3e2368,0x3f74df8b,2 +np.float32,0xbe0df104,0xbe0edc77,2 +np.float32,0x3e8a265c,0x3e8da833,2 +np.float32,0xbdccdbb0,0xbdcd8ba8,2 +np.float32,0x3eb080c4,0x3eb80a44,2 +np.float32,0x3e627800,0x3e6645fe,2 +np.float32,0xbd8be0b0,0xbd8c1886,2 +np.float32,0xbf3282ac,0xbf5cae8c,2 +np.float32,0xbe515910,0xbe545707,2 +np.float32,0xbf2e64ac,0xbf54d637,2 +np.float32,0x3e0fc230,0x3e10b6de,2 +np.float32,0x3eb13ca0,0x3eb8df94,2 +np.float32,0x3f07a3ca,0x3f170572,2 +np.float32,0x3f2c7026,0x3f513935,2 +np.float32,0x3f3c4ec8,0x3f70d67c,2 +np.float32,0xbee9cce8,0xbefc724f,2 +np.float32,0xbe53ca60,0xbe56e3f3,2 +np.float32,0x3dd9e9a0,0x3ddabd98,2 +np.float32,0x3f38b8d4,0x3f69319b,2 +np.float32,0xbe176dc4,0xbe188c1d,2 +np.float32,0xbf322f2e,0xbf5c0c51,2 +np.float32,0xbe9b8676,0xbea097a2,2 +np.float32,0xbca44280,0xbca44823,2 +np.float32,0xbe2b0248,0xbe2ca036,2 +np.float32,0x3d101e80,0x3d102dbd,2 +np.float32,0xbf4eb610,0xbf8f526d,2 +np.float32,0xbec32a50,0xbecd89d1,2 +np.float32,0x3d549100,0x3d54c1ee,2 +np.float32,0x3f78e55e,0x40087025,2 +np.float32,0x3e592798,0x3e5c802d,2 +np.float32,0x3de045d0,0x3de12cfb,2 +np.float32,0xbdad28e0,0xbdad92f7,2 +np.float32,0x3e9a69e0,0x3e9f5e59,2 +np.float32,0x3e809778,0x3e836716,2 +np.float32,0xbf3278d9,0xbf5c9b6d,2 +np.float32,0x3f39fa00,0x3f6bd4a5,2 +np.float32,0xbec8143c,0xbed34ffa,2 +np.float32,0x3ddb7f40,0x3ddc57e6,2 +np.float32,0x3f0e8342,0x3f20c634,2 +np.float32,0x3f353dda,0x3f6213a4,2 +np.float32,0xbe96b400,0xbe9b4bea,2 +np.float32,0x3e626580,0x3e66328a,2 +np.float32,0xbde091c8,0xbde179df,2 +np.float32,0x3eb47b5c,0x3ebc91ca,2 +np.float32,0xbf282182,0xbf497f2f,2 +np.float32,0x3ea9f64c,0x3eb0a748,2 +np.float32,0x3f28dd4e,0x3f4aca86,2 +np.float32,0xbf71de18,0xbfe3f587,2 +np.float32,0x7fa00000,0x7fe00000,2 +np.float32,0xbf6696a6,0xbfbcf11a,2 +np.float32,0xbc853ae0,0xbc853de2,2 +np.float32,0xbeced246,0xbedb51b8,2 +np.float32,0x3f3472a4,0x3f607e00,2 +np.float32,0xbee90124,0xbefb7117,2 +np.float32,0x3eb45b90,0x3ebc6d7c,2 +np.float32,0xbe53ead0,0xbe5705d6,2 +np.float32,0x3f630c80,0x3fb420e2,2 +np.float32,0xbf408cd0,0xbf7a56a2,2 +np.float32,0x3dda4ed0,0x3ddb23f1,2 +np.float32,0xbf37ae88,0xbf67096b,2 +np.float32,0xbdd48c28,0xbdd550c9,2 +np.float32,0xbf5745b0,0xbf9cb4a4,2 +np.float32,0xbf44e6fc,0xbf8255c1,2 +np.float32,0x3f5c8e6a,0x3fa65020,2 +np.float32,0xbea45fe8,0xbeaa6630,2 +np.float32,0x3f08bdee,0x3f188ef5,2 +np.float32,0x3ec77e74,0x3ed29f4b,2 +np.float32,0xbf1a1d3c,0xbf324029,2 +np.float32,0x3cad7340,0x3cad79e3,2 +np.float32,0xbf4fac2e,0xbf90b72a,2 +np.float32,0x3f58516e,0x3f9e8330,2 +np.float32,0x3f442008,0x3f816391,2 +np.float32,0xbf6e0c6c,0xbfd42854,2 +np.float32,0xbf266f7a,0xbf4689b2,2 +np.float32,0x3eb7e2f0,0x3ec077ba,2 +np.float32,0xbf320fd0,0xbf5bcf83,2 +np.float32,0xbf6a76b9,0xbfc80a11,2 +np.float32,0xbf2a91b4,0xbf4dd526,2 +np.float32,0x3f176e30,0x3f2e150e,2 +np.float32,0xbdcccad0,0xbdcd7a9c,2 +np.float32,0x3f60a8a4,0x3faebbf7,2 +np.float32,0x3d9706f0,0x3d974d40,2 +np.float32,0x3ef3cd34,0x3f049d58,2 +np.float32,0xbf73c615,0xbfed79fe,2 +np.float32,0x3df1b170,0x3df2d31b,2 +np.float32,0x3f632a46,0x3fb466c7,2 +np.float32,0xbf3ea18e,0xbf75f9ce,2 +np.float32,0xbf3ea05c,0xbf75f71f,2 +np.float32,0xbdd76750,0xbdd83403,2 +np.float32,0xbca830c0,0xbca836cd,2 +np.float32,0x3f1d4162,0x3f373c59,2 +np.float32,0x3c115700,0x3c1157fa,2 +np.float32,0x3dae8ab0,0x3daef758,2 +np.float32,0xbcad5020,0xbcad56bf,2 +np.float32,0x3ee299c4,0x3ef36c15,2 +np.float32,0xbf7f566c,0xc054c3bd,2 +np.float32,0x3f0cc698,0x3f1e4557,2 +np.float32,0xbe75c648,0xbe7aaa04,2 +np.float32,0x3ea29238,0x3ea86417,2 +np.float32,0x3f09d9c0,0x3f1a1d61,2 +np.float32,0x3f67275c,0x3fbe74b3,2 +np.float32,0x3e1a4e18,0x3e1b7d3a,2 +np.float32,0xbef6e3fc,0xbf069e98,2 +np.float32,0xbf6038ac,0xbfadc9fd,2 +np.float32,0xbe46bdd4,0xbe494b7f,2 +np.float32,0xbf4df1f4,0xbf8e3a98,2 +np.float32,0x3d094dc0,0x3d095aed,2 +np.float32,0x3f44c7d2,0x3f822fa3,2 +np.float32,0xbea30816,0xbea8e737,2 +np.float32,0xbe3c27c4,0xbe3e511b,2 +np.float32,0x3f3bb47c,0x3f6f8789,2 +np.float32,0xbe423760,0xbe4498c3,2 +np.float32,0x3ece1a74,0x3eda7634,2 +np.float32,0x3f14d1f6,0x3f2a1a89,2 +np.float32,0xbf4d9e8f,0xbf8dc4c1,2 +np.float32,0xbe92968e,0xbe96cd7f,2 +np.float32,0x3e99e6c0,0x3e9ece26,2 +np.float32,0xbf397361,0xbf6ab878,2 +np.float32,0xbf4fcea4,0xbf90e99f,2 +np.float32,0x3de37640,0x3de46779,2 +np.float32,0x3eb1b604,0x3eb9698c,2 +np.float32,0xbf52d0a2,0xbf957361,2 +np.float32,0xbe20435c,0xbe21975a,2 +np.float32,0x3f437a58,0x3f809bf1,2 +np.float32,0x3f27d1cc,0x3f48f335,2 +np.float32,0x3f7d4ff2,0x4027d1e2,2 +np.float32,0xbef732e4,0xbf06d205,2 +np.float32,0x3f4a0ae6,0x3f88e18e,2 +np.float32,0x3f800000,0x7f800000,2 +np.float32,0x3e3e56a0,0x3e4093ba,2 +np.float32,0xbed2fcfa,0xbee0517d,2 +np.float32,0xbe0e0114,0xbe0eecd7,2 +np.float32,0xbe808574,0xbe8353db,2 +np.float32,0x3f572e2a,0x3f9c8c86,2 +np.float32,0x80800000,0x80800000,2 +np.float32,0x3f3f3c82,0x3f775703,2 +np.float32,0xbf6e2482,0xbfd4818b,2 +np.float32,0xbf3943b0,0xbf6a5439,2 +np.float32,0x3f6e42ac,0x3fd4f1ea,2 +np.float32,0x3eb676c4,0x3ebed619,2 +np.float32,0xbe5e56c4,0xbe61ef6c,2 +np.float32,0x3eea200c,0x3efcdb65,2 +np.float32,0x3e3d2c78,0x3e3f5ef8,2 +np.float32,0xbdfd8fb0,0xbdfede71,2 +np.float32,0xbee69c8a,0xbef86e89,2 +np.float32,0x3e9efca0,0x3ea46a1c,2 +np.float32,0x3e4c2498,0x3e4ee9ee,2 +np.float32,0xbf3cc93c,0xbf71e21d,2 +np.float32,0x3ee0d77c,0x3ef13d2b,2 +np.float32,0xbefbcd2a,0xbf09d6a3,2 +np.float32,0x3f6dbe5c,0x3fd30a3e,2 +np.float32,0x3dae63e0,0x3daed03f,2 +np.float32,0xbd5001e0,0xbd502fb9,2 +np.float32,0x3f59632a,0x3fa067c8,2 +np.float32,0x3f0d355a,0x3f1ee452,2 +np.float32,0x3f2cbe5c,0x3f51c896,2 +np.float32,0x3c5e6e80,0x3c5e7200,2 +np.float32,0xbe8ac49c,0xbe8e52f0,2 +np.float32,0x3f54e576,0x3f98c0e6,2 +np.float32,0xbeaa0762,0xbeb0ba7c,2 +np.float32,0x3ec81e88,0x3ed35c21,2 +np.float32,0x3f5a6738,0x3fa23fb6,2 +np.float32,0xbf24a682,0xbf43784a,2 +np.float32,0x1,0x1,2 +np.float32,0x3ee6bc24,0x3ef89630,2 +np.float32,0x3f19444a,0x3f30ecf5,2 +np.float32,0x3ec1fc70,0x3ecc28fc,2 +np.float32,0xbf706e14,0xbfdd92fb,2 +np.float32,0x3eccb630,0x3ed8cd98,2 +np.float32,0xbcdf7aa0,0xbcdf88d3,2 +np.float32,0xbe450da8,0xbe478a8e,2 +np.float32,0x3ec9c210,0x3ed54c0b,2 +np.float32,0xbf3b86ca,0xbf6f24d1,2 +np.float32,0x3edcc7a0,0x3eec3a5c,2 +np.float32,0x3f075d5c,0x3f16a39a,2 +np.float32,0xbf5719ce,0xbf9c69de,2 +np.float32,0x3f62cb22,0x3fb3885a,2 +np.float32,0x3f639216,0x3fb55c93,2 +np.float32,0xbf473ee7,0xbf85413a,2 +np.float32,0xbf01b66c,0xbf0eea86,2 +np.float32,0x3e872d80,0x3e8a74f8,2 +np.float32,0xbf60957e,0xbfae925c,2 +np.float32,0xbf6847b2,0xbfc1929b,2 +np.float32,0x3f78bb94,0x4007b363,2 +np.float32,0xbf47efdb,0xbf8622db,2 +np.float32,0xbe1f2308,0xbe206fd6,2 +np.float32,0xbf414926,0xbf7c0a7e,2 +np.float32,0x3eecc268,0x3f00194d,2 +np.float32,0x3eb086d0,0x3eb81120,2 +np.float32,0xbef1af80,0xbf033ff5,2 +np.float32,0xbf454e56,0xbf82d4aa,2 +np.float32,0x3e622560,0x3e65ef20,2 +np.float32,0x3f50d2b2,0x3f926a83,2 +np.float32,0x3eb2c45c,0x3eba9d2c,2 +np.float32,0x3e42d1a0,0x3e4538c9,2 +np.float32,0xbf24cc5c,0xbf43b8e3,2 +np.float32,0x3e8c6464,0x3e90141a,2 +np.float32,0xbf3abff2,0xbf6d79c5,2 +np.float32,0xbec8f2e6,0xbed456fa,2 +np.float32,0xbf787b38,0xc00698b4,2 +np.float32,0xbf58d5cd,0xbf9f6c03,2 +np.float32,0x3df4ee20,0x3df61ba8,2 +np.float32,0xbf34581e,0xbf604951,2 +np.float32,0xbeba5cf4,0xbec35119,2 +np.float32,0xbf76c22d,0xbfffc51c,2 +np.float32,0x3ef63b2c,0x3f0630b4,2 +np.float32,0x3eeadb64,0x3efdc877,2 +np.float32,0x3dfd8c70,0x3dfedb24,2 +np.float32,0x3f441600,0x3f81576d,2 +np.float32,0x3f23a0d8,0x3f41bbf6,2 +np.float32,0x3cb84d40,0x3cb85536,2 +np.float32,0xbf25cb5c,0xbf456e38,2 +np.float32,0xbc108540,0xbc108636,2 +np.float32,0xbc5b9140,0xbc5b949e,2 +np.float32,0xbf62ff40,0xbfb401dd,2 +np.float32,0x3e8e0710,0x3e91d93e,2 +np.float32,0x3f1b6ae0,0x3f344dfd,2 +np.float32,0xbf4dbbbe,0xbf8dedea,2 +np.float32,0x3f1a5fb2,0x3f32a880,2 +np.float32,0xbe56bd00,0xbe59f8cb,2 +np.float32,0xbf490a5c,0xbf87902d,2 +np.float32,0xbf513072,0xbf92f717,2 +np.float32,0x3e73ee28,0x3e78b542,2 +np.float32,0x3f0a4c7a,0x3f1abf2c,2 +np.float32,0x3e10d5c8,0x3e11d00b,2 +np.float32,0xbf771aac,0xc001207e,2 +np.float32,0x3efe2f54,0x3f0b6a46,2 +np.float32,0xbea5f3ea,0xbeac291f,2 +np.float32,0xbf1a73e8,0xbf32c845,2 +np.float32,0x3ebcc82c,0x3ec61c4f,2 +np.float32,0xbf24f492,0xbf43fd9a,2 +np.float32,0x3ecbd908,0x3ed7c691,2 +np.float32,0x3f461c5e,0x3f83d3f0,2 +np.float32,0x3eed0524,0x3f0043c1,2 +np.float32,0x3d06e840,0x3d06f4bf,2 +np.float32,0x3eb6c974,0x3ebf34d7,2 +np.float32,0xbf1c85e1,0xbf36100f,2 +np.float32,0x3ed697d0,0x3ee4ad04,2 +np.float32,0x3eab0484,0x3eb1d733,2 +np.float32,0xbf3b02f2,0xbf6e0935,2 +np.float32,0xbeeab154,0xbefd9334,2 +np.float32,0xbf695372,0xbfc49881,2 +np.float32,0x3e8aaa7c,0x3e8e36be,2 +np.float32,0xbf208754,0xbf3c8f7b,2 +np.float32,0xbe0dbf28,0xbe0ea9a1,2 +np.float32,0x3ca780c0,0x3ca786ba,2 +np.float32,0xbeb320b4,0xbebb065e,2 +np.float32,0x3f13c698,0x3f288821,2 +np.float32,0xbe8cbbec,0xbe9072c4,2 +np.float32,0x3f1ed534,0x3f39c8df,2 +np.float32,0x3e1ca450,0x3e1de190,2 +np.float32,0x3f54be1c,0x3f988134,2 +np.float32,0x3f34e4ee,0x3f6161b4,2 +np.float32,0xbf7e6913,0xc038b246,2 +np.float32,0x3d3c3f20,0x3d3c6119,2 +np.float32,0x3ca9dc80,0x3ca9e2bc,2 +np.float32,0xbf577ea2,0xbf9d161a,2 +np.float32,0xbedb22c8,0xbeea3644,2 +np.float32,0x3f22a044,0x3f400bfa,2 +np.float32,0xbe214b8c,0xbe22a637,2 +np.float32,0x3e8cd300,0x3e908bbc,2 +np.float32,0xbec4d214,0xbecf7a58,2 +np.float32,0x3e9399a4,0x3e97e7e4,2 +np.float32,0xbee6a1a2,0xbef874ed,2 +np.float32,0xbf323742,0xbf5c1bfd,2 +np.float32,0x3f48b882,0x3f8725ac,2 +np.float32,0xbf4d4dba,0xbf8d532e,2 +np.float32,0xbf59640a,0xbfa0695a,2 +np.float32,0xbf2ad562,0xbf4e4f03,2 +np.float32,0x3e317d98,0x3e334d03,2 +np.float32,0xbf6a5b71,0xbfc7b5a2,2 +np.float32,0x3e87b434,0x3e8b05cf,2 +np.float32,0xbf1c344c,0xbf358dee,2 +np.float32,0x3e449428,0x3e470c65,2 +np.float32,0xbf2c0f2f,0xbf508808,2 +np.float32,0xbec5b5ac,0xbed0859c,2 +np.float32,0xbf4aa956,0xbf89b4b1,2 +np.float32,0x3f6dd374,0x3fd35717,2 +np.float32,0x3f45f76c,0x3f83a5ef,2 +np.float32,0xbed1fba8,0xbedf1bd5,2 +np.float32,0xbd26b2d0,0xbd26ca66,2 +np.float32,0xbe9817c2,0xbe9cd1c3,2 +np.float32,0x3e725988,0x3e770875,2 +np.float32,0xbf1a8ded,0xbf32f132,2 +np.float32,0xbe695860,0xbe6d83d3,2 +np.float32,0x3d8cecd0,0x3d8d25ea,2 +np.float32,0x3f574706,0x3f9cb6ec,2 +np.float32,0xbf5c5a1f,0xbfa5eaf3,2 +np.float32,0x3e7a7c88,0x3e7fab83,2 +np.float32,0xff800000,0xffc00000,2 +np.float32,0x3f66396a,0x3fbbfbb0,2 +np.float32,0x3ed6e588,0x3ee50b53,2 +np.float32,0xbb56d500,0xbb56d532,2 +np.float32,0x3ebd23fc,0x3ec6869a,2 +np.float32,0xbf70d490,0xbfdf4af5,2 +np.float32,0x3e514f88,0x3e544d15,2 +np.float32,0x3e660f98,0x3e6a0dac,2 +np.float32,0xbf034da1,0xbf1110bb,2 +np.float32,0xbf60d9be,0xbfaf2714,2 +np.float32,0x3df67b10,0x3df7ae64,2 +np.float32,0xbeeedc0a,0xbf017010,2 +np.float32,0xbe149224,0xbe15a072,2 +np.float32,0x3f455084,0x3f82d759,2 +np.float32,0x3f210f9e,0x3f3d7093,2 +np.float32,0xbeaea3e0,0xbeb5edd3,2 +np.float32,0x3e0724b0,0x3e07efad,2 +np.float32,0x3f09a784,0x3f19d6ac,2 +np.float32,0xbf044340,0xbf125ee8,2 +np.float32,0xbf71adc9,0xbfe315fe,2 +np.float32,0x3efd3870,0x3f0ac6a8,2 +np.float32,0xbf53c7a6,0xbf96f6df,2 +np.float32,0xbf3cf784,0xbf7247af,2 +np.float32,0x3e0ce9e0,0x3e0dd035,2 +np.float32,0xbd3051a0,0xbd306d89,2 +np.float32,0x3ecab804,0x3ed66f77,2 +np.float32,0x3e984350,0x3e9d0189,2 +np.float32,0x3edd1c00,0x3eeca20b,2 +np.float32,0xbe8e22a0,0xbe91f71b,2 +np.float32,0x3ebebc18,0x3ec85fd6,2 +np.float32,0xba275c00,0xba275c01,2 +np.float32,0x3f1d8190,0x3f37a385,2 +np.float32,0x3f17343e,0x3f2dbbfe,2 +np.float32,0x3caa8000,0x3caa864e,2 +np.float32,0x3e7a7308,0x3e7fa168,2 +np.float32,0x3f7359a6,0x3feb3e1a,2 +np.float32,0xbf7ad15a,0xc012a743,2 +np.float32,0xbf122efb,0xbf262812,2 +np.float32,0xbf03ba04,0xbf11a3fa,2 +np.float32,0x3ed7a90c,0x3ee5f8d4,2 +np.float32,0xbe23e318,0xbe254eed,2 +np.float32,0xbe2866f4,0xbe29f20a,2 +np.float32,0xbeaedff2,0xbeb631d0,2 +np.float32,0x0,0x0,2 +np.float32,0x3ef2a034,0x3f03dafd,2 +np.float32,0x3f35806c,0x3f62994e,2 +np.float32,0xbf655e19,0xbfb9c718,2 +np.float32,0x3f5d54ce,0x3fa7d4f4,2 +np.float32,0x3f33e64a,0x3f5f67e3,2 +np.float32,0x3ebf4010,0x3ec8f923,2 +np.float32,0xbe050dc8,0xbe05cf70,2 +np.float32,0x3f61693e,0x3fb063b0,2 +np.float32,0xbd94ac00,0xbd94ef12,2 +np.float32,0x3e9de008,0x3ea32f61,2 +np.float32,0xbe3d042c,0xbe3f3540,2 +np.float32,0x3e8fdfc0,0x3e93d9e4,2 +np.float32,0x3f28bc48,0x3f4a9019,2 +np.float32,0x3edea928,0x3eee8b09,2 +np.float32,0xbf05f673,0xbf14b362,2 +np.float32,0xbf360730,0xbf63a914,2 +np.float32,0xbe3fb454,0xbe41fe0a,2 +np.float32,0x3f6d99a8,0x3fd28552,2 +np.float32,0xbf3ae866,0xbf6dd052,2 +np.float32,0x3f5b1164,0x3fa37aec,2 +np.float32,0xbf64a451,0xbfb7f61b,2 +np.float32,0xbdd79bd0,0xbdd86919,2 +np.float32,0x3e89fc00,0x3e8d7a85,2 +np.float32,0x3f4bf690,0x3f8b77ea,2 +np.float32,0x3cbdf280,0x3cbdfb38,2 +np.float32,0x3f138f98,0x3f2835b4,2 +np.float32,0xbe33967c,0xbe3576bc,2 +np.float32,0xbf298164,0xbf4bedda,2 +np.float32,0x3e9955cc,0x3e9e2edb,2 +np.float32,0xbf79b383,0xc00c56c0,2 +np.float32,0x3ea0834c,0x3ea61aea,2 +np.float32,0xbf511184,0xbf92c89a,2 +np.float32,0x3f4d9fba,0x3f8dc666,2 +np.float32,0x3f3387c2,0x3f5ead80,2 +np.float32,0x3e3f7360,0x3e41babb,2 +np.float32,0xbf3cc4d6,0xbf71d879,2 +np.float32,0x3f2e4402,0x3f54994e,2 +np.float32,0x3e6a7118,0x3e6eabff,2 +np.float32,0xbf05d83e,0xbf1489cc,2 +np.float32,0xbdce4fd8,0xbdcf039a,2 +np.float32,0xbf03e2f4,0xbf11dbaf,2 +np.float32,0x3f1ea0a0,0x3f397375,2 +np.float32,0x3f7aff54,0x4013cb1b,2 +np.float32,0x3f5ef158,0x3fab1801,2 +np.float32,0xbe33bcc8,0xbe359e40,2 +np.float32,0xbf04dd0e,0xbf133111,2 +np.float32,0xbf14f887,0xbf2a54d1,2 +np.float32,0x3f75c37a,0x3ff9196e,2 +np.float32,0x3f35c3c8,0x3f6320f2,2 +np.float32,0x3f53bb94,0x3f96e3c3,2 +np.float32,0x3f4d473e,0x3f8d4a19,2 +np.float32,0xbdfe19e0,0xbdff6ac9,2 +np.float32,0xbf7f0cc4,0xc049342d,2 +np.float32,0xbdbfc778,0xbdc057bb,2 +np.float32,0xbf7575b7,0xbff73067,2 +np.float32,0xbe9df488,0xbea34609,2 +np.float32,0xbefbd3c6,0xbf09daff,2 +np.float32,0x3f19962c,0x3f316cbd,2 +np.float32,0x3f7acec6,0x40129732,2 +np.float32,0xbf5db7de,0xbfa89a21,2 +np.float32,0x3f62f444,0x3fb3e830,2 +np.float32,0xbf522adb,0xbf94737f,2 +np.float32,0xbef6ceb2,0xbf0690ba,2 +np.float32,0xbf57c41e,0xbf9d8db0,2 +np.float32,0x3eb3360c,0x3ebb1eb0,2 +np.float32,0x3f29327e,0x3f4b618e,2 +np.float32,0xbf08d099,0xbf18a916,2 +np.float32,0x3ea21014,0x3ea7d369,2 +np.float32,0x3f39e516,0x3f6ba861,2 +np.float32,0x3e7c4f28,0x3e80ce08,2 +np.float32,0xbec5a7f8,0xbed07582,2 +np.float32,0xbf0b1b46,0xbf1be3e7,2 +np.float32,0xbef0e0ec,0xbf02bb2e,2 +np.float32,0x3d835a30,0x3d838869,2 +np.float32,0x3f08aa40,0x3f18736e,2 +np.float32,0x3eb0e4c8,0x3eb87bcd,2 +np.float32,0x3eb3821c,0x3ebb7564,2 +np.float32,0xbe3a7320,0xbe3c8d5a,2 +np.float32,0x3e43f8c0,0x3e466b10,2 +np.float32,0x3e914288,0x3e955b69,2 +np.float32,0x3ec7d800,0x3ed308e7,2 +np.float32,0x3e603df8,0x3e63eef2,2 +np.float32,0x3f225cac,0x3f3f9ac6,2 +np.float32,0x3e3db8f0,0x3e3ff06b,2 +np.float32,0x3f358d78,0x3f62b38c,2 +np.float32,0xbed9bd64,0xbee88158,2 +np.float32,0x800000,0x800000,2 +np.float32,0x3f1adfce,0x3f337230,2 +np.float32,0xbefdc346,0xbf0b229d,2 +np.float32,0xbf091018,0xbf190208,2 +np.float32,0xbf800000,0xff800000,2 +np.float32,0x3f27c2c4,0x3f48d8db,2 +np.float32,0x3ef59c80,0x3f05c993,2 +np.float32,0x3e18a340,0x3e19c893,2 +np.float32,0x3f209610,0x3f3ca7c5,2 +np.float32,0x3f69cc22,0x3fc60087,2 +np.float32,0xbf66cf07,0xbfbd8721,2 +np.float32,0xbf768098,0xbffdfcc4,2 +np.float32,0x3df27a40,0x3df39ec4,2 +np.float32,0x3daf5bd0,0x3dafca02,2 +np.float32,0x3f53f2be,0x3f973b41,2 +np.float32,0xbf7edcbc,0xc0436ce3,2 +np.float32,0xbdf61db8,0xbdf74fae,2 +np.float32,0x3e2c9328,0x3e2e3cb2,2 +np.float32,0x3f1a4570,0x3f327f41,2 +np.float32,0xbf766306,0xbffd32f1,2 +np.float32,0xbf468b9d,0xbf845f0f,2 +np.float32,0x3e398970,0x3e3b9bb1,2 +np.float32,0xbbefa900,0xbbefaa18,2 +np.float32,0xbf54c989,0xbf9893ad,2 +np.float32,0x3f262cf6,0x3f46169d,2 +np.float32,0x3f638a8a,0x3fb54a98,2 +np.float32,0xbeb36c78,0xbebb5cb8,2 +np.float32,0xbeac4d42,0xbeb34993,2 +np.float32,0x3f1d1942,0x3f36fbf2,2 +np.float32,0xbf5d49ba,0xbfa7bf07,2 +np.float32,0xbf182b5c,0xbf2f38d0,2 +np.float32,0x3f41a742,0x3f7ce5ef,2 +np.float32,0x3f0b9a6c,0x3f1c9898,2 +np.float32,0x3e847494,0x3e8788f3,2 +np.float32,0xbde41608,0xbde50941,2 +np.float32,0x3f693944,0x3fc44b5a,2 +np.float32,0x3f0386b2,0x3f115e37,2 +np.float32,0x3f3a08b0,0x3f6bf3c1,2 +np.float32,0xbf78ee64,0xc0089977,2 +np.float32,0xbf013a11,0xbf0e436e,2 +np.float32,0x3f00668e,0x3f0d2836,2 +np.float32,0x3e6d9850,0x3e720081,2 +np.float32,0x3eacf578,0x3eb4075d,2 +np.float32,0x3f18aef8,0x3f3004b4,2 +np.float32,0x3de342f0,0x3de43385,2 +np.float32,0x3e56cee8,0x3e5a0b85,2 +np.float32,0xbf287912,0xbf4a1966,2 +np.float32,0x3e92c948,0x3e9704c2,2 +np.float32,0x3c07d080,0x3c07d14c,2 +np.float32,0xbe90f6a0,0xbe9508e0,2 +np.float32,0x3e8b4f28,0x3e8ee884,2 +np.float32,0xbf35b56c,0xbf6303ff,2 +np.float32,0xbef512b8,0xbf057027,2 +np.float32,0x3e36c630,0x3e38c0cd,2 +np.float32,0x3f0b3ca8,0x3f1c134a,2 +np.float32,0x3e4cd610,0x3e4fa2c5,2 +np.float32,0xbf5a8372,0xbfa273a3,2 +np.float32,0xbecaad3c,0xbed662ae,2 +np.float32,0xbec372d2,0xbecddeac,2 +np.float32,0x3f6fb2b2,0x3fda8a22,2 +np.float32,0x3f365f28,0x3f645b5a,2 +np.float32,0xbecd00fa,0xbed926a4,2 +np.float32,0xbebafa32,0xbec40672,2 +np.float32,0xbf235b73,0xbf4146c4,2 +np.float32,0x3f7a4658,0x400f6e2c,2 +np.float32,0x3f35e824,0x3f636a54,2 +np.float32,0x3cb87640,0x3cb87e3c,2 +np.float32,0xbf296288,0xbf4bb6ee,2 +np.float32,0x7f800000,0xffc00000,2 +np.float32,0xbf4de86e,0xbf8e2d1a,2 +np.float32,0xbf4ace12,0xbf89e5f3,2 +np.float32,0x3d65a300,0x3d65e0b5,2 +np.float32,0xbe10c534,0xbe11bf21,2 +np.float32,0xbeba3c1c,0xbec32b3e,2 +np.float32,0x3e87eaf8,0x3e8b40b8,2 +np.float32,0x3d5c3bc0,0x3d5c722d,2 +np.float32,0x3e8c14b8,0x3e8fbdf8,2 +np.float32,0xbf06c6f0,0xbf15d327,2 +np.float32,0xbe0f1e30,0xbe100f96,2 +np.float32,0xbee244b0,0xbef30251,2 +np.float32,0x3f2a21b0,0x3f4d0c1d,2 +np.float32,0xbf5f7f81,0xbfac408e,2 +np.float32,0xbe3dba2c,0xbe3ff1b2,2 +np.float32,0x3f3ffc22,0x3f790abf,2 +np.float32,0x3edc3dac,0x3eeb90fd,2 +np.float32,0x7f7fffff,0xffc00000,2 +np.float32,0x3ecfaaac,0x3edc5485,2 +np.float32,0x3f0affbe,0x3f1bbcd9,2 +np.float32,0x3f5f2264,0x3fab7dca,2 +np.float32,0x3f37394c,0x3f66186c,2 +np.float32,0xbe6b2f6c,0xbe6f74e3,2 +np.float32,0x3f284772,0x3f49c1f1,2 +np.float32,0xbdf27bc8,0xbdf3a051,2 +np.float32,0xbc8b14e0,0xbc8b184c,2 +np.float32,0x3f6a867c,0x3fc83b07,2 +np.float32,0x3f1ec876,0x3f39b429,2 +np.float32,0x3f6fd9a8,0x3fdb28d6,2 +np.float32,0xbf473cca,0xbf853e8c,2 +np.float32,0x3e23eff8,0x3e255c23,2 +np.float32,0x3ebefdfc,0x3ec8ac5d,2 +np.float32,0x3f6c8c22,0x3fced2b1,2 +np.float32,0x3f168388,0x3f2cad44,2 +np.float32,0xbece2410,0xbeda81ac,2 +np.float32,0x3f5532f0,0x3f993eea,2 +np.float32,0x3ef1938c,0x3f032dfa,2 +np.float32,0xbef05268,0xbf025fba,2 +np.float32,0x3f552e4a,0x3f993754,2 +np.float32,0x3e9ed068,0x3ea4392d,2 +np.float32,0xbe1a0c24,0xbe1b39be,2 +np.float32,0xbf2623aa,0xbf46068c,2 +np.float32,0xbe1cc300,0xbe1e00fc,2 +np.float32,0xbe9c0576,0xbea12397,2 +np.float32,0xbd827338,0xbd82a07e,2 +np.float32,0x3f0fc31a,0x3f229786,2 +np.float32,0x3e577810,0x3e5abc7d,2 +np.float32,0x3e0e1cb8,0x3e0f0906,2 +np.float32,0x3e84d344,0x3e87ee73,2 +np.float32,0xbf39c45e,0xbf6b6337,2 +np.float32,0x3edfb25c,0x3eefd273,2 +np.float32,0x3e016398,0x3e021596,2 +np.float32,0xbefeb1be,0xbf0bc0de,2 +np.float32,0x3f37e104,0x3f677196,2 +np.float32,0x3f545316,0x3f97d500,2 +np.float32,0xbefc165a,0xbf0a06ed,2 +np.float32,0xbf0923e6,0xbf191dcd,2 +np.float32,0xbf386508,0xbf68831f,2 +np.float32,0xbf3d4630,0xbf72f4e1,2 +np.float32,0x3f3dbe82,0x3f73ff13,2 +np.float32,0xbf703de4,0xbfdcc7e2,2 +np.float32,0xbf531482,0xbf95dd1a,2 +np.float32,0xbf0af1b6,0xbf1ba8f4,2 +np.float32,0xbec8fd9c,0xbed463a4,2 +np.float32,0xbe230320,0xbe24691a,2 +np.float32,0xbf7de541,0xc02faf38,2 +np.float32,0x3efd2360,0x3f0ab8b7,2 +np.float32,0x3db7f350,0x3db87291,2 +np.float32,0x3e74c510,0x3e799924,2 +np.float32,0x3da549c0,0x3da5a5fc,2 +np.float32,0x3e8a3bc4,0x3e8dbf4a,2 +np.float32,0xbf69f086,0xbfc66e84,2 +np.float32,0x3f323f8e,0x3f5c2c17,2 +np.float32,0x3ec0ae3c,0x3ecaa334,2 +np.float32,0xbebe8966,0xbec824fc,2 +np.float32,0x3f34691e,0x3f606b13,2 +np.float32,0x3f13790e,0x3f2813f5,2 +np.float32,0xbf61c027,0xbfb12618,2 +np.float32,0x3e90c690,0x3e94d4a1,2 +np.float32,0xbefce8f0,0xbf0a920e,2 +np.float32,0xbf5c0e8a,0xbfa559a7,2 +np.float32,0x3f374f60,0x3f6645b6,2 +np.float32,0x3f25f6fa,0x3f45b967,2 +np.float32,0x3f2421aa,0x3f42963a,2 +np.float32,0x3ebfa328,0x3ec96c57,2 +np.float32,0x3e3bef28,0x3e3e1685,2 +np.float32,0x3ea3fa3c,0x3ea9f4dd,2 +np.float32,0x3f362b8e,0x3f63f2b2,2 +np.float32,0xbedcef18,0xbeec6ada,2 +np.float32,0xbdd29c88,0xbdd35bd0,2 +np.float32,0x3f261aea,0x3f45f76f,2 +np.float32,0xbe62c470,0xbe66965e,2 +np.float32,0x7fc00000,0x7fc00000,2 +np.float32,0xbee991aa,0xbefc277b,2 +np.float32,0xbf571960,0xbf9c6923,2 +np.float32,0xbe6fb410,0xbe743b41,2 +np.float32,0x3eb1bed0,0x3eb9738d,2 +np.float32,0x80000000,0x80000000,2 +np.float32,0x3eddcbe4,0x3eed7a69,2 +np.float32,0xbf2a81ba,0xbf4db86d,2 +np.float32,0x3f74da54,0x3ff38737,2 +np.float32,0xbeb6bff4,0xbebf29f4,2 +np.float32,0x3f445752,0x3f81a698,2 +np.float32,0x3ed081b4,0x3edd5618,2 +np.float32,0xbee73802,0xbef931b4,2 +np.float32,0xbd13f2a0,0xbd14031c,2 +np.float32,0xbb4d1200,0xbb4d122c,2 +np.float32,0xbee8777a,0xbefac393,2 +np.float32,0x3f42047c,0x3f7dc06c,2 +np.float32,0xbd089270,0xbd089f67,2 +np.float32,0xbf628c16,0xbfb2f66b,2 +np.float32,0x3e72e098,0x3e77978d,2 +np.float32,0x3ed967cc,0x3ee818e4,2 +np.float32,0x3e284c80,0x3e29d6d9,2 +np.float32,0x3f74e8ba,0x3ff3dbef,2 +np.float32,0x3f013e86,0x3f0e4969,2 +np.float32,0xbf610d4f,0xbfaf983c,2 +np.float32,0xbf3c8d36,0xbf715eba,2 +np.float32,0xbedbc756,0xbeeaffdb,2 +np.float32,0x3e143ec8,0x3e154b4c,2 +np.float32,0xbe1c9808,0xbe1dd4fc,2 +np.float32,0xbe887a1e,0xbe8bdac5,2 +np.float32,0xbe85c4bc,0xbe88f17a,2 +np.float32,0x3f35967e,0x3f62c5b4,2 +np.float32,0x3ea2c4a4,0x3ea89c2d,2 +np.float32,0xbc8703c0,0xbc8706e1,2 +np.float32,0xbf13d52c,0xbf289dff,2 +np.float32,0xbf63bb56,0xbfb5bf29,2 +np.float32,0xbf61c5ef,0xbfb13319,2 +np.float32,0xbf128410,0xbf26a675,2 +np.float32,0x3f03fcf2,0x3f11ff13,2 +np.float32,0xbe49c924,0xbe4c75cd,2 +np.float32,0xbf211a9c,0xbf3d82c5,2 +np.float32,0x3f7e9d52,0x403d1b42,2 +np.float32,0x3edfefd4,0x3ef01e71,2 +np.float32,0x3ebc5bd8,0x3ec59efb,2 +np.float32,0x3d7b02e0,0x3d7b537f,2 +np.float32,0xbf1163ba,0xbf24fb43,2 +np.float32,0x3f5072f2,0x3f91dbf1,2 +np.float32,0xbee700ce,0xbef8ec60,2 +np.float32,0x3f534168,0x3f962359,2 +np.float32,0x3e6d6c40,0x3e71d1ef,2 +np.float32,0x3def9d70,0x3df0b7a8,2 +np.float32,0x3e89cf80,0x3e8d4a8a,2 +np.float32,0xbf687ca7,0xbfc2290f,2 +np.float32,0x3f35e134,0x3f635c51,2 +np.float32,0x3e59eef8,0x3e5d50fa,2 +np.float32,0xbf65c9e1,0xbfbada61,2 +np.float32,0xbf759292,0xbff7e43d,2 +np.float32,0x3f4635a0,0x3f83f372,2 +np.float32,0x3f29baaa,0x3f4c53f1,2 +np.float32,0x3f6b15a6,0x3fc9fe04,2 +np.float32,0x3edabc88,0x3ee9b922,2 +np.float32,0x3ef382e0,0x3f046d4d,2 +np.float32,0xbe351310,0xbe36ff7f,2 +np.float32,0xbf05c935,0xbf14751c,2 +np.float32,0xbf0e7c50,0xbf20bc24,2 +np.float32,0xbf69bc94,0xbfc5d1b8,2 +np.float32,0xbed41aca,0xbee1aa23,2 +np.float32,0x3f518c08,0x3f938162,2 +np.float32,0xbf3d7974,0xbf73661a,2 +np.float32,0x3f1951a6,0x3f3101c9,2 +np.float32,0xbeb3f436,0xbebbf787,2 +np.float32,0xbf77a190,0xc0031d43,2 +np.float32,0x3eb5b3cc,0x3ebdf6e7,2 +np.float32,0xbed534b4,0xbee2fed2,2 +np.float32,0xbe53e1b8,0xbe56fc56,2 +np.float32,0x3f679e20,0x3fbfb91c,2 +np.float32,0xff7fffff,0xffc00000,2 +np.float32,0xbf7b9bcb,0xc0180073,2 +np.float32,0xbf5635e8,0xbf9aea15,2 +np.float32,0xbe5a3318,0xbe5d9856,2 +np.float32,0xbe003284,0xbe00df9a,2 +np.float32,0x3eb119a4,0x3eb8b7d6,2 +np.float32,0xbf3bccf8,0xbf6fbc84,2 +np.float32,0x3f36f600,0x3f658ea8,2 +np.float32,0x3f1ea834,0x3f397fc2,2 +np.float32,0xbe7cfb54,0xbe8129b3,2 +np.float32,0xbe9b3746,0xbea0406a,2 +np.float32,0x3edc0f90,0x3eeb586c,2 +np.float32,0x3e1842e8,0x3e19660c,2 +np.float32,0xbd8f10b0,0xbd8f4c70,2 +np.float32,0xbf064aca,0xbf1527a2,2 +np.float32,0x3e632e58,0x3e6705be,2 +np.float32,0xbef28ba4,0xbf03cdbb,2 +np.float32,0x3f27b21e,0x3f48bbaf,2 +np.float32,0xbe6f30d4,0xbe73b06e,2 +np.float32,0x3f3e6cb0,0x3f75834b,2 +np.float32,0xbf264aa5,0xbf4649f0,2 +np.float32,0xbf690775,0xbfc3b978,2 +np.float32,0xbf3e4a38,0xbf753632,2 +np.float64,0x3fe12bbe8c62577e,0x3fe32de8e5f961b0,2 +np.float64,0x3fc9b8909b337120,0x3fca1366da00efff,2 +np.float64,0x3feaee4245f5dc84,0x3ff3a011ea0432f3,2 +np.float64,0xbfe892c000f12580,0xbff03e5adaed6f0c,2 +np.float64,0xbf9be8de4837d1c0,0xbf9beaa367756bd1,2 +np.float64,0x3fe632e58fec65cc,0x3feb5ccc5114ca38,2 +np.float64,0x3fe78a0ef7ef141e,0x3fee1b4521d8eb6c,2 +np.float64,0x3feec27a65fd84f4,0x3fff643c8318e81e,2 +np.float64,0x3fbed6efce3dade0,0x3fbefd76cff00111,2 +np.float64,0xbfe3a05fab6740c0,0xbfe6db078aeeb0ca,2 +np.float64,0x3fdca11a56b94234,0x3fdece9e6eacff1b,2 +np.float64,0x3fe0fb15aae1f62c,0x3fe2e9e095ec2089,2 +np.float64,0x3fede12abf7bc256,0x3ffafd0ff4142807,2 +np.float64,0x3feb919edcf7233e,0x3ff4c9aa0bc2432f,2 +np.float64,0x3fd39633b5a72c68,0x3fd43c2e6d5f441c,2 +np.float64,0x3fd9efcbfeb3df98,0x3fdb83f03e58f91c,2 +np.float64,0x3fe2867a36650cf4,0x3fe525858c8ce72e,2 +np.float64,0x3fdacbb8f3b59770,0x3fdc8cd431b6e3ff,2 +np.float64,0x3fcc120503382408,0x3fcc88a8fa43e1c6,2 +np.float64,0xbfd99ff4eab33fea,0xbfdb24a20ae3687d,2 +np.float64,0xbfe8caf0157195e0,0xbff083b8dd0941d3,2 +np.float64,0x3fddc9bf92bb9380,0x3fe022aac0f761d5,2 +np.float64,0x3fe2dbb66e65b76c,0x3fe5a6e7caf3f1f2,2 +np.float64,0x3fe95f5c4a72beb8,0x3ff1444697e96138,2 +np.float64,0xbfc6b163d92d62c8,0xbfc6ef6e006658a1,2 +np.float64,0x3fdf1b2616be364c,0x3fe0fcbd2848c9e8,2 +np.float64,0xbfdca1ccf7b9439a,0xbfdecf7dc0eaa663,2 +np.float64,0x3fe078d6a260f1ae,0x3fe236a7c66ef6c2,2 +np.float64,0x3fdf471bb9be8e38,0x3fe11990ec74e704,2 +np.float64,0xbfe417626be82ec5,0xbfe79c9aa5ed2e2f,2 +np.float64,0xbfeb9cf5677739eb,0xbff4dfc24c012c90,2 +np.float64,0x3f8d9142b03b2280,0x3f8d91c9559d4779,2 +np.float64,0x3fb052c67220a590,0x3fb05873c90d1cd6,2 +np.float64,0x3fd742e2c7ae85c4,0x3fd860128947d15d,2 +np.float64,0x3fec2e2a2bf85c54,0x3ff60eb554bb8d71,2 +np.float64,0xbfeb2b8bc8f65718,0xbff40b734679497a,2 +np.float64,0x3fe25f8e0d64bf1c,0x3fe4eb381d077803,2 +np.float64,0x3fe56426256ac84c,0x3fe9dafbe79370f0,2 +np.float64,0x3feecc1e5d7d983c,0x3fffa49bedc7aa25,2 +np.float64,0xbfc88ce94b3119d4,0xbfc8dbba0fdee2d2,2 +np.float64,0xbfabcf51ac379ea0,0xbfabd6552aa63da3,2 +np.float64,0xbfccc8b849399170,0xbfcd48d6ff057a4d,2 +np.float64,0x3fd2f831e8a5f064,0x3fd38e67b0dda905,2 +np.float64,0x3fcafdcd6135fb98,0x3fcb670ae2ef4d36,2 +np.float64,0x3feda6042efb4c08,0x3ffa219442ac4ea5,2 +np.float64,0x3fed382b157a7056,0x3ff8bc01bc6d10bc,2 +np.float64,0x3fed858a50fb0b14,0x3ff9b1c05cb6cc0f,2 +np.float64,0x3fcc3960653872c0,0x3fccb2045373a3d1,2 +np.float64,0xbfec5177e478a2f0,0xbff65eb4557d94eb,2 +np.float64,0x3feafe0d5e75fc1a,0x3ff3bb4a260a0dcb,2 +np.float64,0x3fe08bc87ee11790,0x3fe25078aac99d31,2 +np.float64,0xffefffffffffffff,0xfff8000000000000,2 +np.float64,0x3f79985ce0333100,0x3f799872b591d1cb,2 +np.float64,0xbfd4001cf9a8003a,0xbfd4b14b9035b94f,2 +np.float64,0x3fe54a17e6ea9430,0x3fe9ac0f18682343,2 +np.float64,0xbfb4e07fea29c100,0xbfb4ec6520dd0689,2 +np.float64,0xbfed2b6659fa56cd,0xbff895ed57dc1450,2 +np.float64,0xbfe81fc8b5f03f92,0xbfef6b95e72a7a7c,2 +np.float64,0xbfe6aced16ed59da,0xbfec4ce131ee3704,2 +np.float64,0xbfe599f30ceb33e6,0xbfea3d07c1cd78e2,2 +np.float64,0xbfe0ff278b61fe4f,0xbfe2ef8b5efa89ed,2 +np.float64,0xbfe3e9406467d281,0xbfe750e43e841736,2 +np.float64,0x3fcc6b52cf38d6a8,0x3fcce688f4fb2cf1,2 +np.float64,0xbfc890e8133121d0,0xbfc8dfdfee72d258,2 +np.float64,0x3fe46e81dbe8dd04,0x3fe82e09783811a8,2 +np.float64,0x3fd94455e5b288ac,0x3fdab7cef2de0b1f,2 +np.float64,0xbfe82151fff042a4,0xbfef6f254c9696ca,2 +np.float64,0x3fcee1ac1d3dc358,0x3fcf80a6ed07070a,2 +np.float64,0x3fcce8f90939d1f0,0x3fcd6ad18d34f8b5,2 +np.float64,0x3fd6afe56fad5fcc,0x3fd7b7567526b1fb,2 +np.float64,0x3fb1a77092234ee0,0x3fb1ae9fe0d176fc,2 +np.float64,0xbfeb758b0d76eb16,0xbff493d105652edc,2 +np.float64,0xbfb857c24e30af88,0xbfb86aa4da3be53f,2 +np.float64,0x3fe89064eff120ca,0x3ff03b7c5b3339a8,2 +np.float64,0xbfc1bd2fef237a60,0xbfc1da99893473ed,2 +np.float64,0xbfe5ad6e2eeb5adc,0xbfea60ed181b5c05,2 +np.float64,0x3fd5a66358ab4cc8,0x3fd6899e640aeb1f,2 +np.float64,0xbfe198e832e331d0,0xbfe3c8c9496d0de5,2 +np.float64,0xbfdaa5c0d7b54b82,0xbfdc5ed7d3c5ce49,2 +np.float64,0x3fcceccb6939d998,0x3fcd6ed88c2dd3a5,2 +np.float64,0xbfe44413eae88828,0xbfe7e6cd32b34046,2 +np.float64,0xbfc7cbeccf2f97d8,0xbfc8139a2626edae,2 +np.float64,0x3fbf31e4fa3e63d0,0x3fbf59c6e863255e,2 +np.float64,0x3fdf03fa05be07f4,0x3fe0ed953f7989ad,2 +np.float64,0x3fe7f4eaceefe9d6,0x3fef092ca7e2ac39,2 +np.float64,0xbfc084e9d92109d4,0xbfc09ca10fd6aaea,2 +np.float64,0xbf88cfbf70319f80,0xbf88d00effa6d897,2 +np.float64,0x7ff4000000000000,0x7ffc000000000000,2 +np.float64,0xbfa0176e9c202ee0,0xbfa018ca0a6ceef3,2 +np.float64,0xbfd88d0815b11a10,0xbfd9dfc6c6bcbe4e,2 +np.float64,0x3fe89f7730713eee,0x3ff04de52fb536f3,2 +np.float64,0xbfedc9707bfb92e1,0xbffaa25fcf9dd6da,2 +np.float64,0x3fe936d1a6726da4,0x3ff10e40c2d94bc9,2 +np.float64,0x3fdb64aec7b6c95c,0x3fdd473177317b3f,2 +np.float64,0xbfee4f9aaefc9f35,0xbffcdd212667003c,2 +np.float64,0x3fe3730067e6e600,0x3fe692b0a0babf5f,2 +np.float64,0xbfc257e58924afcc,0xbfc27871f8c218d7,2 +np.float64,0x3fe62db12dec5b62,0x3feb52c61b97d9f6,2 +np.float64,0xbfe3ff491367fe92,0xbfe774f1b3a96fd6,2 +np.float64,0x3fea43255274864a,0x3ff28b0c4b7b8d21,2 +np.float64,0xbfea37923c746f24,0xbff27962159f2072,2 +np.float64,0x3fcd0ac3c73a1588,0x3fcd8e6f8de41755,2 +np.float64,0xbfdccafde6b995fc,0xbfdf030fea8a0630,2 +np.float64,0x3fdba35268b746a4,0x3fdd94094f6f50c1,2 +np.float64,0x3fc68ea1d92d1d40,0x3fc6cb8d07cbb0e4,2 +np.float64,0xbfb88b1f6e311640,0xbfb89e7af4e58778,2 +np.float64,0xbfedc7cadffb8f96,0xbffa9c3766227956,2 +np.float64,0x3fe7928d3eef251a,0x3fee2dcf2ac7961b,2 +np.float64,0xbfeff42ede7fe85e,0xc00cef6b0f1e8323,2 +np.float64,0xbfebf07fa477e0ff,0xbff5893f99e15236,2 +np.float64,0x3fe3002ab9660056,0x3fe5defba550c583,2 +np.float64,0x3feb8f4307f71e86,0x3ff4c517ec8d6de9,2 +np.float64,0x3fd3c16f49a782e0,0x3fd46becaacf74da,2 +np.float64,0x3fc7613df12ec278,0x3fc7a52b2a3c3368,2 +np.float64,0xbfe33af560e675eb,0xbfe63a6528ff1587,2 +np.float64,0xbfde86495abd0c92,0xbfe09bd7ba05b461,2 +np.float64,0x3fe1e7fb4ee3cff6,0x3fe43b04311c0ab6,2 +np.float64,0xbfc528b6bd2a516c,0xbfc55ae0a0c184c8,2 +np.float64,0xbfd81025beb0204c,0xbfd94dd72d804613,2 +np.float64,0x10000000000000,0x10000000000000,2 +np.float64,0x3fc1151c47222a38,0x3fc12f5aad80a6bf,2 +np.float64,0x3feafa136775f426,0x3ff3b46854da0b3a,2 +np.float64,0x3fed2da0747a5b40,0x3ff89c85b658459e,2 +np.float64,0x3fda2a4b51b45498,0x3fdbca0d908ddbbd,2 +np.float64,0xbfd04cf518a099ea,0xbfd0aae0033b9e4c,2 +np.float64,0xbfb9065586320ca8,0xbfb91adb7e31f322,2 +np.float64,0xbfd830b428b06168,0xbfd973ca3c484d8d,2 +np.float64,0x3fc952f7ed32a5f0,0x3fc9a9994561fc1a,2 +np.float64,0xbfeb06c83c760d90,0xbff3ca77b326df20,2 +np.float64,0xbfeb1c98ac763931,0xbff3f0d0900f6149,2 +np.float64,0x3fdf061dbebe0c3c,0x3fe0eefb32b48d17,2 +np.float64,0xbf9acbaf28359760,0xbf9acd4024be9fec,2 +np.float64,0x3fec0adde2f815bc,0x3ff5c1628423794d,2 +np.float64,0xbfc4bc750d2978ec,0xbfc4eba43f590b94,2 +np.float64,0x3fdbe47878b7c8f0,0x3fdde44a2b500d73,2 +np.float64,0x3fe160d18162c1a4,0x3fe378cff08f18f0,2 +np.float64,0x3fc3b58dfd276b18,0x3fc3de01d3802de9,2 +np.float64,0x3fa860343430c060,0x3fa864ecd07ec962,2 +np.float64,0x3fcaebfb4b35d7f8,0x3fcb546512d1b4c7,2 +np.float64,0x3fe3fda558e7fb4a,0x3fe772412e5776de,2 +np.float64,0xbfe8169f2c702d3e,0xbfef5666c9a10f6d,2 +np.float64,0x3feda78e9efb4f1e,0x3ffa270712ded769,2 +np.float64,0xbfda483161b49062,0xbfdbedfbf2e850ba,2 +np.float64,0x3fd7407cf3ae80f8,0x3fd85d4f52622743,2 +np.float64,0xbfd63de4d4ac7bca,0xbfd73550a33e3c32,2 +np.float64,0xbfd9c30b90b38618,0xbfdb4e7695c856f3,2 +np.float64,0x3fcd70c00b3ae180,0x3fcdfa0969e0a119,2 +np.float64,0x3feb4f127f769e24,0x3ff44bf42514e0f4,2 +np.float64,0xbfec1db44af83b69,0xbff5ea54aed1f8e9,2 +np.float64,0x3fd68ff051ad1fe0,0x3fd792d0ed6d6122,2 +np.float64,0x3fe0a048a5614092,0x3fe26c80a826b2a2,2 +np.float64,0x3fd59f3742ab3e70,0x3fd6818563fcaf80,2 +np.float64,0x3fca26ecf9344dd8,0x3fca867ceb5d7ba8,2 +np.float64,0x3fdc1d547ab83aa8,0x3fde2a9cea866484,2 +np.float64,0xbfc78df6312f1bec,0xbfc7d3719b698a39,2 +np.float64,0x3fe754e72b6ea9ce,0x3feda89ea844a2e5,2 +np.float64,0x3fe740c1a4ee8184,0x3fed7dc56ec0c425,2 +np.float64,0x3fe77566a9eeeace,0x3fedee6f408df6de,2 +np.float64,0xbfbbf5bf8e37eb80,0xbfbc126a223781b4,2 +np.float64,0xbfe0acb297615965,0xbfe27d86681ca2b5,2 +np.float64,0xbfc20a0487241408,0xbfc228f5f7d52ce8,2 +np.float64,0xfff0000000000000,0xfff8000000000000,2 +np.float64,0x3fef98a4dbff314a,0x40043cfb60bd46fa,2 +np.float64,0x3fd059102ca0b220,0x3fd0b7d2be6d7822,2 +np.float64,0x3fe89f18a1f13e32,0x3ff04d714bbbf400,2 +np.float64,0x3fd45b6275a8b6c4,0x3fd516a44a276a4b,2 +np.float64,0xbfe04463e86088c8,0xbfe1ef9dfc9f9a53,2 +np.float64,0xbfe086e279610dc5,0xbfe249c9c1040a13,2 +np.float64,0x3f89c9b110339380,0x3f89ca0a641454b5,2 +np.float64,0xbfb5f5b4322beb68,0xbfb6038dc3fd1516,2 +np.float64,0x3fe6eae76f6dd5ce,0x3feccabae04d5c14,2 +np.float64,0x3fa9ef6c9c33dee0,0x3fa9f51c9a8c8a2f,2 +np.float64,0xbfe171b45f62e368,0xbfe390ccc4c01bf6,2 +np.float64,0x3fb2999442253330,0x3fb2a1fc006804b5,2 +np.float64,0x3fd124bf04a24980,0x3fd1927abb92472d,2 +np.float64,0xbfe6e05938edc0b2,0xbfecb519ba78114f,2 +np.float64,0x3fed466ee6fa8cde,0x3ff8e75405b50490,2 +np.float64,0xbfb999aa92333358,0xbfb9afa4f19f80a2,2 +np.float64,0xbfe98969ed7312d4,0xbff17d887b0303e7,2 +np.float64,0x3fe782843e6f0508,0x3fee0adbeebe3486,2 +np.float64,0xbfe232fcc26465fa,0xbfe4a90a68d46040,2 +np.float64,0x3fd190a90fa32154,0x3fd206f56ffcdca2,2 +np.float64,0xbfc4f8b75929f170,0xbfc5298b2d4e7740,2 +np.float64,0xbfba3a63d63474c8,0xbfba520835c2fdc2,2 +np.float64,0xbfb7708eea2ee120,0xbfb781695ec17846,2 +np.float64,0x3fed9fb7a5fb3f70,0x3ffa0b717bcd1609,2 +np.float64,0xbfc1b158cd2362b0,0xbfc1ce87345f3473,2 +np.float64,0x3f963478082c6900,0x3f96355c3000953b,2 +np.float64,0x3fc5050e532a0a20,0x3fc536397f38f616,2 +np.float64,0x3fe239f9eee473f4,0x3fe4b360da3b2faa,2 +np.float64,0xbfd66bd80eacd7b0,0xbfd769a29fd784c0,2 +np.float64,0x3fc57cdad52af9b8,0x3fc5b16b937f5f72,2 +np.float64,0xbfd3c36a0aa786d4,0xbfd46e1cd0b4eddc,2 +np.float64,0x3feff433487fe866,0x400cf0ea1def3161,2 +np.float64,0xbfed5577807aaaef,0xbff915e8f6bfdf22,2 +np.float64,0xbfca0dd3eb341ba8,0xbfca6c4d11836cb6,2 +np.float64,0x7ff8000000000000,0x7ff8000000000000,2 +np.float64,0xbf974deaa82e9be0,0xbf974ef26a3130d1,2 +np.float64,0xbfe7f425e1efe84c,0xbfef076cb00d649d,2 +np.float64,0xbfe4413605e8826c,0xbfe7e20448b8a4b1,2 +np.float64,0xbfdfad202cbf5a40,0xbfe15cd9eb2be707,2 +np.float64,0xbfe43261ee6864c4,0xbfe7c952c951fe33,2 +np.float64,0xbfec141225782824,0xbff5d54d33861d98,2 +np.float64,0x3fd0f47abaa1e8f4,0x3fd15e8691a7f1c2,2 +np.float64,0x3fd378f0baa6f1e0,0x3fd41bea4a599081,2 +np.float64,0xbfb52523462a4a48,0xbfb5317fa7f436e2,2 +np.float64,0x3fcb30797d3660f0,0x3fcb9c174ea401ff,2 +np.float64,0xbfd48480dea90902,0xbfd5446e02c8b329,2 +np.float64,0xbfee4ae3ab7c95c7,0xbffcc650340ba274,2 +np.float64,0xbfeab086d075610e,0xbff3387f4e83ae26,2 +np.float64,0x3fa17cddf422f9c0,0x3fa17e9bf1b25736,2 +np.float64,0xbfe3064536e60c8a,0xbfe5e86aa5244319,2 +np.float64,0x3feb2882c5765106,0x3ff40604c7d97d44,2 +np.float64,0xbfa6923ff42d2480,0xbfa695ff57b2fc3f,2 +np.float64,0xbfa8bdbdcc317b80,0xbfa8c2ada0d94aa7,2 +np.float64,0x3fe7f16b8e6fe2d8,0x3fef013948c391a6,2 +np.float64,0x3fe4e7169f69ce2e,0x3fe8fceef835050a,2 +np.float64,0x3fed877638fb0eec,0x3ff9b83694127959,2 +np.float64,0xbfe0cc9ecf61993e,0xbfe2a978234cbde5,2 +np.float64,0xbfe977e79672efcf,0xbff16589ea494a38,2 +np.float64,0xbfe240130ae48026,0xbfe4bc69113e0d7f,2 +np.float64,0x3feb1e9b70763d36,0x3ff3f4615938a491,2 +np.float64,0xbfdf197dfcbe32fc,0xbfe0fba78a0fc816,2 +np.float64,0xbfee0f8543fc1f0a,0xbffbb9d9a4ee5387,2 +np.float64,0x3fe88d2191f11a44,0x3ff037843b5b6313,2 +np.float64,0xbfd11bb850a23770,0xbfd188c1cef40007,2 +np.float64,0xbfa1b36e9c2366e0,0xbfa1b53d1d8a8bc4,2 +np.float64,0xbfea2d70d9f45ae2,0xbff26a0629e36b3e,2 +np.float64,0xbfd9188703b2310e,0xbfda83f9ddc18348,2 +np.float64,0xbfee194894fc3291,0xbffbe3c83b61e7cb,2 +np.float64,0xbfe093b4a9e1276a,0xbfe25b4ad6f8f83d,2 +np.float64,0x3fea031489f4062a,0x3ff22accc000082e,2 +np.float64,0xbfc6c0827b2d8104,0xbfc6ff0a94326381,2 +np.float64,0x3fef5cd340feb9a6,0x4002659c5a1b34af,2 +np.float64,0x8010000000000000,0x8010000000000000,2 +np.float64,0x3fd97cb533b2f96c,0x3fdafab28aaae8e3,2 +np.float64,0x3fe2123334642466,0x3fe478bd83a8ce02,2 +np.float64,0xbfd9a69637b34d2c,0xbfdb2c87c6b6fb8c,2 +np.float64,0x3fc58def7f2b1be0,0x3fc5c2ff724a9f61,2 +np.float64,0xbfedd5da1f7babb4,0xbffad15949b7fb22,2 +np.float64,0x3fe90e92a0721d26,0x3ff0d9b64323efb8,2 +np.float64,0x3fd34b9442a69728,0x3fd3e9f8fe80654e,2 +np.float64,0xbfc5f509ab2bea14,0xbfc62d2ad325c59f,2 +np.float64,0x3feb245634f648ac,0x3ff3fe91a46acbe1,2 +np.float64,0x3fd101e539a203cc,0x3fd16cf52ae6d203,2 +np.float64,0xbfc51e9ba72a3d38,0xbfc5507d00521ba3,2 +np.float64,0x3fe5fe1683ebfc2e,0x3feaf7dd8b1f92b0,2 +np.float64,0x3fc362e59126c5c8,0x3fc389601814170b,2 +np.float64,0x3fea34dbd77469b8,0x3ff27542eb721e7e,2 +np.float64,0xbfc13ed241227da4,0xbfc159d42c0a35a9,2 +np.float64,0xbfe6df118cedbe23,0xbfecb27bb5d3f784,2 +np.float64,0x3fd92895f6b2512c,0x3fda96f5f94b625e,2 +np.float64,0xbfe7ea3aa76fd476,0xbfeef0e93939086e,2 +np.float64,0xbfc855498330aa94,0xbfc8a1ff690c9533,2 +np.float64,0x3fd9f27b3ab3e4f8,0x3fdb8726979afc3b,2 +np.float64,0x3fc65d52232cbaa8,0x3fc698ac4367afba,2 +np.float64,0x3fd1271dd0a24e3c,0x3fd195087649d54e,2 +np.float64,0xbfe983445df30689,0xbff175158b773b90,2 +np.float64,0xbfe0d9b13261b362,0xbfe2bb8908fc9e6e,2 +np.float64,0x3fd7671f2aaece40,0x3fd889dccbf21629,2 +np.float64,0x3fe748aebfee915e,0x3fed8e970d94c17d,2 +np.float64,0x3fea756e4e74eadc,0x3ff2d947ef3a54f4,2 +np.float64,0x3fde22311cbc4464,0x3fe05b4ce9df1fdd,2 +np.float64,0x3fe2b55ec1e56abe,0x3fe56c6849e3985a,2 +np.float64,0x3fed7b47437af68e,0x3ff98f8e82de99a0,2 +np.float64,0x3fec8184b179030a,0x3ff6d03aaf0135ba,2 +np.float64,0x3fc9ea825533d508,0x3fca4776d7190e71,2 +np.float64,0xbfe8ddd58b71bbab,0xbff09b770ed7bc9a,2 +np.float64,0xbfed41741bfa82e8,0xbff8d81c2a9fc615,2 +np.float64,0x3fe0a73888e14e72,0x3fe27602ad9a3726,2 +np.float64,0xbfe9d0a565f3a14b,0xbff1e1897b628f66,2 +np.float64,0x3fda12b381b42568,0x3fdbadbec22fbd5a,2 +np.float64,0x3fef0081187e0102,0x4000949eff8313c2,2 +np.float64,0x3fef6942b67ed286,0x4002b7913eb1ee76,2 +np.float64,0x3fda10f882b421f0,0x3fdbababa2d6659d,2 +np.float64,0x3fe5828971eb0512,0x3fea122b5088315a,2 +np.float64,0x3fe9d4b53ff3a96a,0x3ff1e75c148bda01,2 +np.float64,0x3fe95d246bf2ba48,0x3ff1414a61a136ec,2 +np.float64,0x3f9e575eb83caec0,0x3f9e59a4f17179e3,2 +np.float64,0x3fdb0a20b5b61440,0x3fdcd8a56178a17f,2 +np.float64,0xbfdef425e3bde84c,0xbfe0e33eeacf3861,2 +np.float64,0x3fd6afcf6bad5fa0,0x3fd7b73d47288347,2 +np.float64,0x3fe89256367124ac,0x3ff03dd9f36ce40e,2 +np.float64,0x3fe7e560fcefcac2,0x3feee5ef8688b60b,2 +np.float64,0x3fedef55e1fbdeac,0x3ffb350ee1df986b,2 +np.float64,0xbfe44b926de89725,0xbfe7f3539910c41f,2 +np.float64,0x3fc58310f32b0620,0x3fc5b7cfdba15bd0,2 +np.float64,0x3f736d256026da00,0x3f736d2eebe91a90,2 +np.float64,0x3feb012d2076025a,0x3ff3c0b5d21a7259,2 +np.float64,0xbfe466a6c468cd4e,0xbfe820c9c197601f,2 +np.float64,0x3fe1aba8aa635752,0x3fe3e3b73920f64c,2 +np.float64,0x3fe5597c336ab2f8,0x3fe9c7bc4b765b15,2 +np.float64,0x3fe1004ac5e20096,0x3fe2f12116e99821,2 +np.float64,0x3fecbc67477978ce,0x3ff76377434dbdad,2 +np.float64,0x3fe0e64515e1cc8a,0x3fe2ccf5447c1579,2 +np.float64,0x3febcfa874f79f50,0x3ff54528f0822144,2 +np.float64,0x3fc36915ed26d228,0x3fc38fb5b28d3f72,2 +np.float64,0xbfe01213e5e02428,0xbfe1ac0e1e7418f1,2 +np.float64,0x3fcd97875b3b2f10,0x3fce22fe3fc98702,2 +np.float64,0xbfe30383c5e60708,0xbfe5e427e62cc957,2 +np.float64,0xbfde339bf9bc6738,0xbfe0667f337924f5,2 +np.float64,0xbfda7c1c49b4f838,0xbfdc2c8801ce654a,2 +np.float64,0x3fb6b3489e2d6690,0x3fb6c29650387b92,2 +np.float64,0xbfe1fd4d76e3fa9b,0xbfe45a1f60077678,2 +np.float64,0xbf67c5e0402f8c00,0xbf67c5e49fce115a,2 +np.float64,0xbfd4f9aa2da9f354,0xbfd5c759603d0b9b,2 +np.float64,0x3fe83c227bf07844,0x3fefada9f1bd7fa9,2 +np.float64,0xbf97f717982fee20,0xbf97f836701a8cd5,2 +np.float64,0x3fe9688a2472d114,0x3ff150aa575e7d51,2 +np.float64,0xbfc5a9779d2b52f0,0xbfc5df56509c48b1,2 +np.float64,0xbfe958d5f472b1ac,0xbff13b813f9bee20,2 +np.float64,0xbfd7b3b944af6772,0xbfd8e276c2b2920f,2 +np.float64,0x3fed10198e7a2034,0x3ff8469c817572f0,2 +np.float64,0xbfeeecc4517dd989,0xc000472b1f858be3,2 +np.float64,0xbfdbcce47eb799c8,0xbfddc734aa67812b,2 +np.float64,0xbfd013ee24a027dc,0xbfd06df3089384ca,2 +np.float64,0xbfd215f2bfa42be6,0xbfd29774ffe26a74,2 +np.float64,0x3fdfd0ae67bfa15c,0x3fe1746e3a963a9f,2 +np.float64,0xbfc84aa10b309544,0xbfc896f0d25b723a,2 +np.float64,0xbfcd0c627d3a18c4,0xbfcd9024c73747a9,2 +np.float64,0x3fd87df6dbb0fbec,0x3fd9ce1dde757f31,2 +np.float64,0xbfdad85e05b5b0bc,0xbfdc9c2addb6ce47,2 +np.float64,0xbfee4f8977fc9f13,0xbffcdccd68e514b3,2 +np.float64,0x3fa5c290542b8520,0x3fa5c5ebdf09ca70,2 +np.float64,0xbfd7e401d2afc804,0xbfd91a7e4eb5a026,2 +np.float64,0xbfe33ff73b667fee,0xbfe6423cc6eb07d7,2 +np.float64,0x3fdfb7d6c4bf6fac,0x3fe163f2e8175177,2 +np.float64,0xbfd515d69eaa2bae,0xbfd5e6eedd6a1598,2 +np.float64,0x3fb322232e264440,0x3fb32b49d91c3cbe,2 +np.float64,0xbfe20ac39e641587,0xbfe46dd4b3803f19,2 +np.float64,0x3fe282dc18e505b8,0x3fe520152120c297,2 +np.float64,0xbfc905a4cd320b48,0xbfc95929b74865fb,2 +np.float64,0x3fe0ae3b83615c78,0x3fe27fa1dafc825b,2 +np.float64,0xbfc1bfed0f237fdc,0xbfc1dd6466225cdf,2 +np.float64,0xbfeca4d47d7949a9,0xbff72761a34fb682,2 +np.float64,0xbfe8cf8c48f19f18,0xbff0897ebc003626,2 +np.float64,0xbfe1aaf0a36355e2,0xbfe3e2ae7b17a286,2 +np.float64,0x3fe2ca442e659488,0x3fe58c3a2fb4f14a,2 +np.float64,0xbfda3c2deeb4785c,0xbfdbdf89fe96a243,2 +np.float64,0xbfdc12bfecb82580,0xbfde1d81dea3c221,2 +np.float64,0xbfe2d6d877e5adb1,0xbfe59f73e22c1fc7,2 +np.float64,0x3fe5f930636bf260,0x3feaee96a462e4de,2 +np.float64,0x3fcf3c0ea53e7820,0x3fcfe0b0f92be7e9,2 +np.float64,0xbfa5bb90f42b7720,0xbfa5bee9424004cc,2 +np.float64,0xbfe2fb3a3265f674,0xbfe5d75b988bb279,2 +np.float64,0x3fcaec7aab35d8f8,0x3fcb54ea582fff6f,2 +np.float64,0xbfd8d3228db1a646,0xbfda322297747fbc,2 +np.float64,0x3fedd2e0ad7ba5c2,0x3ffac6002b65c424,2 +np.float64,0xbfd9edeca2b3dbda,0xbfdb81b2b7785e33,2 +np.float64,0xbfef5febb17ebfd7,0xc002796b15950960,2 +np.float64,0x3fde22f787bc45f0,0x3fe05bcc624b9ba2,2 +np.float64,0xbfc716a4ab2e2d48,0xbfc758073839dd44,2 +np.float64,0xbf9bed852837db00,0xbf9bef4b2a3f3bdc,2 +np.float64,0x3fef8f88507f1f10,0x4003e5e566444571,2 +np.float64,0xbfdc1bbed6b8377e,0xbfde28a64e174e60,2 +np.float64,0x3fe02d30eae05a62,0x3fe1d064ec027cd3,2 +np.float64,0x3fd9dbb500b3b76c,0x3fdb6bea40162279,2 +np.float64,0x3fe353ff1d66a7fe,0x3fe661b3358c925e,2 +np.float64,0x3fac3ebfb4387d80,0x3fac4618effff2b0,2 +np.float64,0x3fe63cf0ba6c79e2,0x3feb7030cff5f434,2 +np.float64,0x3fd0e915f8a1d22c,0x3fd152464597b510,2 +np.float64,0xbfd36987cda6d310,0xbfd40af049d7621e,2 +np.float64,0xbfdc5b4dc7b8b69c,0xbfde7790a35da2bc,2 +np.float64,0x3feee7ff4a7dcffe,0x40003545989e07c7,2 +np.float64,0xbfeb2c8308765906,0xbff40d2e6469249e,2 +np.float64,0x3fe535a894ea6b52,0x3fe98781648550d0,2 +np.float64,0xbfef168eb9fe2d1d,0xc000f274ed3cd312,2 +np.float64,0x3fc3e2d98927c5b0,0x3fc40c6991b8900c,2 +np.float64,0xbfcd8fe3e73b1fc8,0xbfce1aec7f9b7f7d,2 +np.float64,0xbfd55d8c3aaabb18,0xbfd6378132ee4892,2 +np.float64,0xbfe424a66168494d,0xbfe7b289d72c98b3,2 +np.float64,0x3fd81af13eb035e4,0x3fd95a6a9696ab45,2 +np.float64,0xbfe3016722e602ce,0xbfe5e0e46db228cd,2 +np.float64,0x3fe9a20beff34418,0x3ff19faca17fc468,2 +np.float64,0xbfe2124bc7e42498,0xbfe478e19927e723,2 +np.float64,0x3fd96f8622b2df0c,0x3fdaeb08da6b08ae,2 +np.float64,0x3fecd6796579acf2,0x3ff7a7d02159e181,2 +np.float64,0x3fe60015df6c002c,0x3feafba6f2682a61,2 +np.float64,0x3fc7181cf72e3038,0x3fc7598c2cc3c3b4,2 +np.float64,0xbfce6e2e0b3cdc5c,0xbfcf0621b3e37115,2 +np.float64,0xbfe52a829e6a5505,0xbfe973a785980af9,2 +np.float64,0x3fed4bbac37a9776,0x3ff8f7a0e68a2bbe,2 +np.float64,0x3fabdfaacc37bf60,0x3fabe6bab42bd246,2 +np.float64,0xbfcd9598cb3b2b30,0xbfce20f3c4c2c261,2 +np.float64,0x3fd717d859ae2fb0,0x3fd82e88eca09ab1,2 +np.float64,0x3fe28ccb18e51996,0x3fe52f071d2694fd,2 +np.float64,0xbfe43f064ae87e0c,0xbfe7de5eab36b5b9,2 +np.float64,0x7fefffffffffffff,0xfff8000000000000,2 +np.float64,0xbfb39b045a273608,0xbfb3a4dd3395fdd5,2 +np.float64,0xbfb3358bae266b18,0xbfb33ece5e95970a,2 +np.float64,0xbfeeafb6717d5f6d,0xbffeec3f9695b575,2 +np.float64,0xbfe7a321afef4644,0xbfee522dd80f41f4,2 +np.float64,0x3fe3a17e5be742fc,0x3fe6dcd32af51e92,2 +np.float64,0xbfc61694bd2c2d28,0xbfc64fbbd835f6e7,2 +np.float64,0xbfd795906faf2b20,0xbfd8bf89b370655c,2 +np.float64,0xbfe4b39b59e96736,0xbfe8a3c5c645b6e3,2 +np.float64,0x3fd310af3ba62160,0x3fd3a9442e825e1c,2 +np.float64,0xbfd45198a6a8a332,0xbfd50bc10311a0a3,2 +np.float64,0x3fd0017eaaa002fc,0x3fd05a472a837999,2 +np.float64,0xbfea974d98752e9b,0xbff30f67f1835183,2 +np.float64,0xbf978f60582f1ec0,0xbf979070e1c2b59d,2 +np.float64,0x3fe1c715d4e38e2c,0x3fe40b479e1241a2,2 +np.float64,0xbfccb965cd3972cc,0xbfcd38b40c4a352d,2 +np.float64,0xbfd9897048b312e0,0xbfdb09d55624c2a3,2 +np.float64,0x3fe7f5de4befebbc,0x3fef0b56be259f9c,2 +np.float64,0x3fcc6c6d4338d8d8,0x3fcce7b20ed68a78,2 +np.float64,0xbfe63884046c7108,0xbfeb67a3b945c3ee,2 +np.float64,0xbfce64e2ad3cc9c4,0xbfcefc47fae2e81f,2 +np.float64,0x3fefeb57b27fd6b0,0x400ab2eac6321cfb,2 +np.float64,0x3fe679627e6cf2c4,0x3febe6451b6ee0c4,2 +np.float64,0x3fc5f710172bee20,0x3fc62f40f85cb040,2 +np.float64,0x3fc34975e52692e8,0x3fc36f58588c7fa2,2 +np.float64,0x3fe8a3784cf146f0,0x3ff052ced9bb9406,2 +np.float64,0x3fd11a607ca234c0,0x3fd1874f876233fe,2 +np.float64,0x3fb2d653f625aca0,0x3fb2df0f4c9633f3,2 +np.float64,0x3fe555f39eeaabe8,0x3fe9c15ee962a28c,2 +np.float64,0xbfea297e3bf452fc,0xbff264107117f709,2 +np.float64,0x3fe1581cdde2b03a,0x3fe36c79acedf99c,2 +np.float64,0x3fd4567063a8ace0,0x3fd51123dbd9106f,2 +np.float64,0x3fa3883aec271080,0x3fa38aa86ec71218,2 +np.float64,0x3fe40e5d7de81cba,0x3fe78dbb9b568850,2 +np.float64,0xbfe9a2f7347345ee,0xbff1a0f4faa05041,2 +np.float64,0x3f9eef03a83dde00,0x3f9ef16caa0c1478,2 +np.float64,0xbfcb4641d1368c84,0xbfcbb2e7ff8c266d,2 +np.float64,0xbfa8403b2c308070,0xbfa844e148b735b7,2 +np.float64,0xbfe1875cd6e30eba,0xbfe3afadc08369f5,2 +np.float64,0xbfdd3c3d26ba787a,0xbfdf919b3e296766,2 +np.float64,0x3fcd6c4c853ad898,0x3fcdf55647b518b8,2 +np.float64,0xbfe360a173e6c143,0xbfe6759eb3a08cf2,2 +np.float64,0x3fe5a13147eb4262,0x3fea4a5a060f5adb,2 +np.float64,0x3feb3cdd7af679ba,0x3ff42aae0cf61234,2 +np.float64,0x3fe5205128ea40a2,0x3fe9618f3d0c54af,2 +np.float64,0x3fce35343f3c6a68,0x3fcec9c4e612b050,2 +np.float64,0xbfc345724d268ae4,0xbfc36b3ce6338e6a,2 +np.float64,0x3fedc4fc0e7b89f8,0x3ffa91c1d775c1f7,2 +np.float64,0x3fe41fbf21683f7e,0x3fe7aa6c174a0e65,2 +np.float64,0xbfc7a1a5d32f434c,0xbfc7e7d27a4c5241,2 +np.float64,0x3fd3e33eaca7c67c,0x3fd4915264441e2f,2 +np.float64,0x3feb3f02f6f67e06,0x3ff42e942249e596,2 +np.float64,0x3fdb75fcb0b6ebf8,0x3fdd5c63f98b6275,2 +np.float64,0x3fd6476603ac8ecc,0x3fd74020b164cf38,2 +np.float64,0x3fed535372faa6a6,0x3ff90f3791821841,2 +np.float64,0x3fe8648ead70c91e,0x3ff006a62befd7ed,2 +np.float64,0x3fd0f90760a1f210,0x3fd1636b39bb1525,2 +np.float64,0xbfca052443340a48,0xbfca633d6e777ae0,2 +np.float64,0xbfa6a5e3342d4bc0,0xbfa6a9ac6a488f5f,2 +np.float64,0x3fd5598038aab300,0x3fd632f35c0c3d52,2 +np.float64,0xbfdf66218fbecc44,0xbfe12df83b19f300,2 +np.float64,0x3fe78e15b56f1c2c,0x3fee240d12489cd1,2 +np.float64,0x3fe3d6a7b3e7ad50,0x3fe7329dcf7401e2,2 +np.float64,0xbfddb8e97bbb71d2,0xbfe017ed6d55a673,2 +np.float64,0xbfd57afd55aaf5fa,0xbfd658a9607c3370,2 +np.float64,0xbfdba4c9abb74994,0xbfdd95d69e5e8814,2 +np.float64,0xbfe71d8090ee3b01,0xbfed3390be6d2eef,2 +np.float64,0xbfc738ac0f2e7158,0xbfc77b3553b7c026,2 +np.float64,0x3f873656302e6c80,0x3f873697556ae011,2 +np.float64,0x3fe559491d6ab292,0x3fe9c7603b12c608,2 +np.float64,0xbfe262776864c4ef,0xbfe4ef905dda8599,2 +np.float64,0x3fe59d8917eb3b12,0x3fea439f44b7573f,2 +np.float64,0xbfd4b5afb5a96b60,0xbfd57b4e3df4dbc8,2 +np.float64,0x3fe81158447022b0,0x3fef4a3cea3eb6a9,2 +np.float64,0xbfeb023441f60468,0xbff3c27f0fc1a4dc,2 +np.float64,0x3fefb212eaff6426,0x40055fc6d949cf44,2 +np.float64,0xbfe1300ac1e26016,0xbfe333f297a1260e,2 +np.float64,0xbfeae0a2f575c146,0xbff388d58c380b8c,2 +np.float64,0xbfeddd8e55fbbb1d,0xbffaef045b2e21d9,2 +np.float64,0x3fec7c6c1d78f8d8,0x3ff6c3ebb019a8e5,2 +np.float64,0xbfe27e071f64fc0e,0xbfe518d2ff630f33,2 +np.float64,0x8000000000000001,0x8000000000000001,2 +np.float64,0x3fc5872abf2b0e58,0x3fc5bc083105db76,2 +np.float64,0x3fe65114baeca22a,0x3feb9745b82ef15a,2 +np.float64,0xbfc783abe52f0758,0xbfc7c8cb23f93e79,2 +np.float64,0x3fe4b7a5dd696f4c,0x3fe8aab9d492f0ca,2 +np.float64,0xbf91a8e8a82351e0,0xbf91a95b6ae806f1,2 +np.float64,0xbfee482eb77c905d,0xbffcb952830e715a,2 +np.float64,0x3fba0eee2a341de0,0x3fba261d495e3a1b,2 +np.float64,0xbfeb8876ae7710ed,0xbff4b7f7f4343506,2 +np.float64,0xbfe4d29e46e9a53c,0xbfe8d9547a601ba7,2 +np.float64,0xbfe12413b8e24828,0xbfe3232656541d10,2 +np.float64,0x3fc0bd8f61217b20,0x3fc0d63f937f0aa4,2 +np.float64,0xbfd3debafda7bd76,0xbfd48c534e5329e4,2 +np.float64,0x3fc0f92de921f258,0x3fc112eb7d47349b,2 +np.float64,0xbfe576b95f6aed72,0xbfe9fca859239b3c,2 +np.float64,0x3fd10e520da21ca4,0x3fd17a546e4152f7,2 +np.float64,0x3fcef917eb3df230,0x3fcf998677a8fa8f,2 +np.float64,0x3fdfcf863abf9f0c,0x3fe173a98af1cb13,2 +np.float64,0x3fc28c4b4f251898,0x3fc2adf43792e917,2 +np.float64,0x3fceb837ad3d7070,0x3fcf54a63b7d8c5c,2 +np.float64,0x3fc0140a05202818,0x3fc029e4f75330cb,2 +np.float64,0xbfd76c3362aed866,0xbfd88fb9e790b4e8,2 +np.float64,0xbfe475300868ea60,0xbfe8395334623e1f,2 +np.float64,0x3fea70b9b4f4e174,0x3ff2d1dad92173ba,2 +np.float64,0xbfe2edbd4965db7a,0xbfe5c29449a9365d,2 +np.float64,0xbfddf86f66bbf0de,0xbfe0408439cada9b,2 +np.float64,0xbfb443cdfa288798,0xbfb44eae796ad3ea,2 +np.float64,0xbf96a8a0482d5140,0xbf96a992b6ef073b,2 +np.float64,0xbfd279db2fa4f3b6,0xbfd3043db6acbd9e,2 +np.float64,0x3fe5d99088ebb322,0x3feab30be14e1605,2 +np.float64,0xbfe1a917abe35230,0xbfe3e0063d0f5f63,2 +np.float64,0x3fc77272f52ee4e8,0x3fc7b6f8ab6f4591,2 +np.float64,0x3fd6b62146ad6c44,0x3fd7be77eef8390a,2 +np.float64,0xbfe39fd9bc673fb4,0xbfe6da30dc4eadde,2 +np.float64,0x3fe35545c066aa8c,0x3fe663b5873e4d4b,2 +np.float64,0xbfcbbeffb3377e00,0xbfcc317edf7f6992,2 +np.float64,0xbfe28a58366514b0,0xbfe52b5734579ffa,2 +np.float64,0xbfbf0c87023e1910,0xbfbf33d970a0dfa5,2 +np.float64,0xbfd31144cba6228a,0xbfd3a9e84f9168f9,2 +np.float64,0xbfe5c044056b8088,0xbfea83d607c1a88a,2 +np.float64,0x3fdaabdf18b557c0,0x3fdc663ee8eddc83,2 +np.float64,0xbfeb883006f71060,0xbff4b76feff615be,2 +np.float64,0xbfebaef41d775de8,0xbff5034111440754,2 +np.float64,0x3fd9b6eb3bb36dd8,0x3fdb3fff5071dacf,2 +np.float64,0x3fe4e33c45e9c678,0x3fe8f637779ddedf,2 +np.float64,0x3fe52213a06a4428,0x3fe964adeff5c14e,2 +np.float64,0x3fe799254cef324a,0x3fee3c3ecfd3cdc5,2 +np.float64,0x3fd0533f35a0a680,0x3fd0b19a003469d3,2 +np.float64,0x3fec7ef5c7f8fdec,0x3ff6ca0abe055048,2 +np.float64,0xbfd1b5da82a36bb6,0xbfd22f357acbee79,2 +np.float64,0xbfd8f9c652b1f38c,0xbfda5faacbce9cf9,2 +np.float64,0x3fc8fc818b31f900,0x3fc94fa9a6aa53c8,2 +np.float64,0x3fcf42cc613e8598,0x3fcfe7dc128f33f2,2 +np.float64,0x3fd393a995a72754,0x3fd4396127b19305,2 +np.float64,0x3fec7b7df9f8f6fc,0x3ff6c1ae51753ef2,2 +np.float64,0x3fc07f175b20fe30,0x3fc096b55c11568c,2 +np.float64,0xbf979170082f22e0,0xbf979280d9555f44,2 +np.float64,0xbfb9d110c633a220,0xbfb9e79ba19b3c4a,2 +np.float64,0x3fedcd7d417b9afa,0x3ffab19734e86d58,2 +np.float64,0xbfec116f27f822de,0xbff5cf9425cb415b,2 +np.float64,0xbfec4fa0bef89f42,0xbff65a771982c920,2 +np.float64,0x3f94d4452829a880,0x3f94d501789ad11c,2 +np.float64,0xbfefe5ede27fcbdc,0xc009c440d3c2a4ce,2 +np.float64,0xbfe7e5f7b5efcbf0,0xbfeee74449aee1db,2 +np.float64,0xbfeb71dc8976e3b9,0xbff48cd84ea54ed2,2 +np.float64,0xbfe4cdb65f699b6c,0xbfe8d0d3bce901ef,2 +np.float64,0x3fb78ef1ee2f1de0,0x3fb7a00e7d183c48,2 +np.float64,0x3fb681864a2d0310,0x3fb6906fe64b4cd7,2 +np.float64,0xbfd2ad3b31a55a76,0xbfd33c57b5985399,2 +np.float64,0x3fdcdaaa95b9b554,0x3fdf16b99628db1e,2 +np.float64,0x3fa4780b7428f020,0x3fa47ad6ce9b8081,2 +np.float64,0x3fc546b0ad2a8d60,0x3fc579b361b3b18f,2 +np.float64,0x3feaf98dd6f5f31c,0x3ff3b38189c3539c,2 +np.float64,0x3feb0b2eca76165e,0x3ff3d22797083f9a,2 +np.float64,0xbfdc02ae3ab8055c,0xbfde099ecb5dbacf,2 +np.float64,0x3fd248bf17a49180,0x3fd2ceb77b346d1d,2 +np.float64,0x3fe349d666e693ac,0x3fe651b9933a8853,2 +np.float64,0xbfca526fc534a4e0,0xbfcab3e83f0d9b93,2 +np.float64,0x3fc156421722ac88,0x3fc171b38826563b,2 +np.float64,0xbfe4244569e8488b,0xbfe7b1e93e7d4f92,2 +np.float64,0x3fe010faabe021f6,0x3fe1aa961338886d,2 +np.float64,0xbfc52dacb72a5b58,0xbfc55ffa50eba380,2 +np.float64,0x8000000000000000,0x8000000000000000,2 +np.float64,0x3fea1d4865f43a90,0x3ff251b839eb4817,2 +np.float64,0xbfa0f65c8421ecc0,0xbfa0f7f37c91be01,2 +np.float64,0x3fcab29c0b356538,0x3fcb1863edbee184,2 +np.float64,0x3fe7949162ef2922,0x3fee323821958b88,2 +np.float64,0x3fdaf9288ab5f250,0x3fdcc400190a4839,2 +np.float64,0xbfe13ece6be27d9d,0xbfe348ba07553179,2 +np.float64,0x3f8a0c4fd0341880,0x3f8a0cabdf710185,2 +np.float64,0x3fdd0442a2ba0884,0x3fdf4b016c4da452,2 +np.float64,0xbfaf06d2343e0da0,0xbfaf1090b1600422,2 +np.float64,0xbfd3b65225a76ca4,0xbfd45fa49ae76cca,2 +np.float64,0x3fef5d75fefebaec,0x400269a5e7c11891,2 +np.float64,0xbfe048e35ce091c6,0xbfe1f5af45dd64f8,2 +np.float64,0xbfe27d4599e4fa8b,0xbfe517b07843d04c,2 +np.float64,0xbfe6f2a637ede54c,0xbfecdaa730462576,2 +np.float64,0x3fc63fbb752c7f78,0x3fc67a2854974109,2 +np.float64,0x3fedda6bfbfbb4d8,0x3ffae2e6131f3475,2 +np.float64,0x3fe7a6f5286f4dea,0x3fee5a9b1ef46016,2 +np.float64,0xbfd4ea8bcea9d518,0xbfd5b66ab7e5cf00,2 +np.float64,0x3fdc116568b822cc,0x3fde1bd4d0d9fd6c,2 +np.float64,0x3fdc45cb1bb88b98,0x3fde5cd1d2751032,2 +np.float64,0x3feabd932f757b26,0x3ff34e06e56a62a1,2 +np.float64,0xbfae5dbe0c3cbb80,0xbfae66e062ac0d65,2 +np.float64,0xbfdb385a00b670b4,0xbfdd10fedf3a58a7,2 +np.float64,0xbfebb14755f7628f,0xbff507e123a2b47c,2 +np.float64,0x3fe6de2fdfedbc60,0x3fecb0ae6e131da2,2 +np.float64,0xbfd86de640b0dbcc,0xbfd9bb4dbf0bf6af,2 +np.float64,0x3fe39e86d9e73d0e,0x3fe6d811c858d5d9,2 +np.float64,0x7ff0000000000000,0xfff8000000000000,2 +np.float64,0x3fa8101684302020,0x3fa814a12176e937,2 +np.float64,0x3fefdd5ad37fbab6,0x4008a08c0b76fbb5,2 +np.float64,0x3fe645c727ec8b8e,0x3feb814ebc470940,2 +np.float64,0x3fe3ba79dce774f4,0x3fe70500db564cb6,2 +np.float64,0xbfe0e5a254e1cb44,0xbfe2cc13940c6d9a,2 +np.float64,0x3fe2cac62465958c,0x3fe58d008c5e31f8,2 +np.float64,0xbfd3ffb531a7ff6a,0xbfd4b0d88cff2040,2 +np.float64,0x3fe0929104612522,0x3fe259bc42dce788,2 +np.float64,0x1,0x1,2 +np.float64,0xbfe7db77e6efb6f0,0xbfeecf93e8a61cb3,2 +np.float64,0xbfe37e9559e6fd2a,0xbfe6a514e29cb7aa,2 +np.float64,0xbfc53a843f2a7508,0xbfc56d2e9ad8b716,2 +np.float64,0xbfedb04485fb6089,0xbffa4615d4334ec3,2 +np.float64,0xbfc44349b1288694,0xbfc46f484b6f1cd6,2 +np.float64,0xbfe265188264ca31,0xbfe4f37d61cd9e17,2 +np.float64,0xbfd030351da0606a,0xbfd08c2537287ee1,2 +np.float64,0x3fd8fb131db1f628,0x3fda613363ca601e,2 +np.float64,0xbff0000000000000,0xfff0000000000000,2 +np.float64,0xbfe48d9a60691b35,0xbfe862c02d8fec1e,2 +np.float64,0x3fd185e050a30bc0,0x3fd1fb4c614ddb07,2 +np.float64,0xbfe4a5807e694b01,0xbfe88b8ff2d6caa7,2 +np.float64,0xbfc934d7ad3269b0,0xbfc98a405d25a666,2 +np.float64,0xbfea0e3c62741c79,0xbff23b4bd3a7b15d,2 +np.float64,0x3fe7244071ee4880,0x3fed41b27ba6bb22,2 +np.float64,0xbfd419f81ba833f0,0xbfd4cdf71b4533a3,2 +np.float64,0xbfe1e73a34e3ce74,0xbfe439eb15fa6baf,2 +np.float64,0x3fcdd9a63f3bb350,0x3fce68e1c401eff0,2 +np.float64,0x3fd1b5960ba36b2c,0x3fd22eeb566f1976,2 +np.float64,0x3fe9ad18e0735a32,0x3ff1af23c534260d,2 +np.float64,0xbfd537918aaa6f24,0xbfd60ccc8df0962b,2 +np.float64,0x3fcba3d3c73747a8,0x3fcc14fd5e5c49ad,2 +np.float64,0x3fd367e3c0a6cfc8,0x3fd40921b14e288e,2 +np.float64,0x3fe94303c6f28608,0x3ff11e62db2db6ac,2 +np.float64,0xbfcc5f77fd38bef0,0xbfccda110c087519,2 +np.float64,0xbfd63b74d7ac76ea,0xbfd7328af9f37402,2 +np.float64,0xbfe5321289ea6425,0xbfe9811ce96609ad,2 +np.float64,0xbfde910879bd2210,0xbfe0a2cd0ed1d368,2 +np.float64,0xbfcc9d9bad393b38,0xbfcd1b722a0b1371,2 +np.float64,0xbfe6dd39e16dba74,0xbfecaeb7c8c069f6,2 +np.float64,0xbfe98316eff3062e,0xbff174d7347d48bf,2 +np.float64,0xbfda88f8d1b511f2,0xbfdc3c0e75dad903,2 +np.float64,0x3fd400d8c2a801b0,0x3fd4b21bacff1f5d,2 +np.float64,0xbfe1ed335863da66,0xbfe4429e45e99779,2 +np.float64,0xbf3423a200284800,0xbf3423a20acb0342,2 +np.float64,0xbfe97bc59672f78b,0xbff16ad1adc44a33,2 +np.float64,0xbfeeca60d7fd94c2,0xbfff98d7f18f7728,2 +np.float64,0x3fd1eb13b2a3d628,0x3fd268e6ff4d56ce,2 +np.float64,0xbfa5594c242ab2a0,0xbfa55c77d6740a39,2 +np.float64,0x3fe72662006e4cc4,0x3fed462a9dedbfee,2 +np.float64,0x3fef4bb221fe9764,0x4001fe4f4cdfedb2,2 +np.float64,0xbfe938d417f271a8,0xbff110e78724ca2b,2 +np.float64,0xbfcc29ab2f385358,0xbfcca182140ef541,2 +np.float64,0x3fe18cd42c6319a8,0x3fe3b77e018165e7,2 +np.float64,0xbfec6c5cae78d8b9,0xbff69d8e01309b48,2 +np.float64,0xbfd5723da7aae47c,0xbfd64ecde17da471,2 +np.float64,0xbfe3096722e612ce,0xbfe5ed43634f37ff,2 +np.float64,0xbfdacaceb1b5959e,0xbfdc8bb826bbed39,2 +np.float64,0x3fc59a57cb2b34b0,0x3fc5cfc4a7c9bac8,2 +np.float64,0x3f84adce10295b80,0x3f84adfc1f1f6e97,2 +np.float64,0x3fdd5b28bbbab650,0x3fdfb8b906d77df4,2 +np.float64,0x3fdebf94c6bd7f28,0x3fe0c10188e1bc7c,2 +np.float64,0x3fdb30c612b6618c,0x3fdd07bf18597821,2 +np.float64,0x3fe7eeb3176fdd66,0x3feefb0be694b855,2 +np.float64,0x0,0x0,2 +np.float64,0xbfe10057e9e200b0,0xbfe2f13365e5b1c9,2 +np.float64,0xbfeb61a82376c350,0xbff46e665d3a60f5,2 +np.float64,0xbfe7f54aec6fea96,0xbfef0a0759f726dc,2 +np.float64,0xbfe4f6da3de9edb4,0xbfe9187d85bd1ab5,2 +np.float64,0xbfeb8be1b3f717c4,0xbff4be8efaab2e75,2 +np.float64,0x3fed40bc31fa8178,0x3ff8d5ec4a7f3e9b,2 +np.float64,0xbfe40f8711681f0e,0xbfe78fa5c62b191b,2 +np.float64,0x3fd1034d94a2069c,0x3fd16e78e9efb85b,2 +np.float64,0x3fc74db15b2e9b60,0x3fc790f26e894098,2 +np.float64,0x3fd912a88cb22550,0x3fda7d0ab3b21308,2 +np.float64,0x3fd8948a3bb12914,0x3fd9e8950c7874c8,2 +np.float64,0xbfa7ada5242f5b50,0xbfa7b1f8db50c104,2 +np.float64,0x3feeb2e1c27d65c4,0x3fff000b7d09c9b7,2 +np.float64,0x3fe9d46cbbf3a8da,0x3ff1e6f405265a6e,2 +np.float64,0xbfe2480b77e49017,0xbfe4c83b9b37bf0c,2 +np.float64,0x3fe950ea9372a1d6,0x3ff130e62468bf2c,2 +np.float64,0x3fefa7272a7f4e4e,0x4004d8c9bf31ab58,2 +np.float64,0xbfe7309209ee6124,0xbfed5b94acef917a,2 +np.float64,0x3fd05e8c64a0bd18,0x3fd0bdb11e0903c6,2 +np.float64,0x3fd9236043b246c0,0x3fda90ccbe4bab1e,2 +np.float64,0xbfdc3d6805b87ad0,0xbfde5266e17154c3,2 +np.float64,0x3fe5e6bad76bcd76,0x3feacbc306c63445,2 +np.float64,0x3ff0000000000000,0x7ff0000000000000,2 +np.float64,0xbfde3d7390bc7ae8,0xbfe06cd480bd0196,2 +np.float64,0xbfd3e2e3c0a7c5c8,0xbfd490edc0a45e26,2 +np.float64,0x3fe39871d76730e4,0x3fe6ce54d1719953,2 +np.float64,0x3fdff00ebcbfe01c,0x3fe1894b6655a6d0,2 +np.float64,0x3f91b7ad58236f40,0x3f91b8213bcb8b0b,2 +np.float64,0xbfd99f48f7b33e92,0xbfdb23d544f62591,2 +np.float64,0x3fae3512cc3c6a20,0x3fae3e10939fd7b5,2 +np.float64,0x3fcc4cf3db3899e8,0x3fccc698a15176d6,2 +np.float64,0xbfd0927e39a124fc,0xbfd0f5522e2bc030,2 +np.float64,0x3fcee859633dd0b0,0x3fcf87bdef7a1e82,2 +np.float64,0xbfe2a8b69565516d,0xbfe5593437b6659a,2 +np.float64,0x3fecf61e20f9ec3c,0x3ff7fda16b0209d4,2 +np.float64,0xbfbf37571e3e6eb0,0xbfbf5f4e1379a64c,2 +np.float64,0xbfd54e1b75aa9c36,0xbfd626223b68971a,2 +np.float64,0x3fe1035a56e206b4,0x3fe2f5651ca0f4b0,2 +np.float64,0x3fe4992989e93254,0x3fe876751afa70dc,2 +np.float64,0x3fc8c313d3318628,0x3fc913faf15d1562,2 +np.float64,0x3f99f6ba8833ed80,0x3f99f8274fb94828,2 +np.float64,0xbfd4a58af0a94b16,0xbfd56947c276e04f,2 +np.float64,0x3fc66f8c872cdf18,0x3fc6ab7a14372a73,2 +np.float64,0x3fc41eee0d283de0,0x3fc449ff1ff0e7a6,2 +np.float64,0x3fefd04d287fa09a,0x4007585010cfa9b0,2 +np.float64,0x3fce9e746f3d3ce8,0x3fcf39514bbe5070,2 +np.float64,0xbfe8056f72700adf,0xbfef2ee2c13e67ba,2 +np.float64,0x3fdd6b1ec0bad63c,0x3fdfccf2ba144fa8,2 +np.float64,0x3fd92ee432b25dc8,0x3fda9e6b96b2b142,2 +np.float64,0xbfc4d18f9529a320,0xbfc50150fb4de0cc,2 +np.float64,0xbfe09939a7613274,0xbfe262d703c317af,2 +np.float64,0xbfd130b132a26162,0xbfd19f5a00ae29c4,2 +np.float64,0x3fa06e21d420dc40,0x3fa06f93aba415fb,2 +np.float64,0x3fc5c48fbd2b8920,0x3fc5fb3bfad3bf55,2 +np.float64,0xbfdfa2bacbbf4576,0xbfe155f839825308,2 +np.float64,0x3fe3e1fa0f67c3f4,0x3fe745081dd4fd03,2 +np.float64,0x3fdae58289b5cb04,0x3fdcac1f6789130a,2 +np.float64,0xbf8ed3ba103da780,0xbf8ed452a9cc1442,2 +np.float64,0xbfec06b46f780d69,0xbff5b86f30d70908,2 +np.float64,0xbfe990c13b732182,0xbff187a90ae611f8,2 +np.float64,0xbfdd46c738ba8d8e,0xbfdf9eee0a113230,2 +np.float64,0x3fe08b83f3611708,0x3fe2501b1c77035c,2 +np.float64,0xbfd501b65baa036c,0xbfd5d05de3fceac8,2 +np.float64,0xbfcf4fa21f3e9f44,0xbfcff5829582c0b6,2 +np.float64,0xbfefbc0bfbff7818,0xc005eca1a2c56b38,2 +np.float64,0xbfe1ba6959e374d2,0xbfe3f8f88d128ce5,2 +np.float64,0xbfd4e74ee3a9ce9e,0xbfd5b2cabeb45e6c,2 +np.float64,0xbfe77c38eaeef872,0xbfedfd332d6f1c75,2 +np.float64,0x3fa9b5e4fc336bc0,0x3fa9bb6f6b80b4af,2 +np.float64,0xbfecba63917974c7,0xbff75e44df7f8e81,2 +np.float64,0x3fd6cf17b2ad9e30,0x3fd7db0b93b7f2b5,2 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-cbrt.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-cbrt.csv new file mode 100644 index 0000000..ad141cb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-cbrt.csv @@ -0,0 +1,1429 @@ +dtype,input,output,ulperrortol +np.float32,0x3ee7054c,0x3f4459ea,2 +np.float32,0x7d1e2489,0x54095925,2 +np.float32,0x7ee5edf5,0x549b992b,2 +np.float32,0x380607,0x2a425e72,2 +np.float32,0x34a8f3,0x2a3e6603,2 +np.float32,0x3eee2844,0x3f465a45,2 +np.float32,0x59e49c,0x2a638d0a,2 +np.float32,0xbf72c77a,0xbf7b83d4,2 +np.float32,0x7f2517b4,0x54af8bf0,2 +np.float32,0x80068a69,0xa9bdfe8b,2 +np.float32,0xbe8e3578,0xbf270775,2 +np.float32,0xbe4224dc,0xbf131119,2 +np.float32,0xbe0053b8,0xbf001be2,2 +np.float32,0x70e8d,0x29c2ddc5,2 +np.float32,0xff63f7b5,0xd4c37b7f,2 +np.float32,0x3f00bbed,0x3f4b9335,2 +np.float32,0x3f135f4e,0x3f54f5d4,2 +np.float32,0xbe13a488,0xbf063d13,2 +np.float32,0x3f14ec78,0x3f55b478,2 +np.float32,0x7ec35cfb,0x54935fbf,2 +np.float32,0x7d41c589,0x5412f904,2 +np.float32,0x3ef8a16e,0x3f4937f7,2 +np.float32,0x3f5d8464,0x3f73f279,2 +np.float32,0xbeec85ac,0xbf45e5cb,2 +np.float32,0x7f11f722,0x54a87cb1,2 +np.float32,0x8032c085,0xaa3c1219,2 +np.float32,0x80544bac,0xaa5eb9f2,2 +np.float32,0x3e944a10,0x3f296065,2 +np.float32,0xbf29fe50,0xbf5f5796,2 +np.float32,0x7e204d8d,0x545b03d5,2 +np.float32,0xfe1d0254,0xd4598127,2 +np.float32,0x80523129,0xaa5cdba9,2 +np.float32,0x806315fa,0xaa6b0eaf,2 +np.float32,0x3ed3d2a4,0x3f3ec117,2 +np.float32,0x7ee15007,0x549a8cc0,2 +np.float32,0x801ffb5e,0xaa213d4f,2 +np.float32,0x807f9f4a,0xaa7fbf76,2 +np.float32,0xbe45e854,0xbf1402d3,2 +np.float32,0x3d9e2e70,0x3eda0b64,2 +np.float32,0x51f404,0x2a5ca4d7,2 +np.float32,0xbe26a8b0,0xbf0bc54d,2 +np.float32,0x22c99a,0x2a25d2a7,2 +np.float32,0xbf71248b,0xbf7af2d5,2 +np.float32,0x7219fe,0x2a76608e,2 +np.float32,0x7f16fd7d,0x54aa6610,2 +np.float32,0x80716faa,0xaa75e5b9,2 +np.float32,0xbe24f9a4,0xbf0b4c65,2 +np.float32,0x800000,0x2a800000,2 +np.float32,0x80747456,0xaa780f27,2 +np.float32,0x68f9e8,0x2a6fa035,2 +np.float32,0x3f6a297e,0x3f7880d8,2 +np.float32,0x3f28b973,0x3f5ec8f6,2 +np.float32,0x7f58c577,0x54c03a70,2 +np.float32,0x804befcc,0xaa571b4f,2 +np.float32,0x3e2be027,0x3f0d36cf,2 +np.float32,0xfe7e80a4,0xd47f7ff7,2 +np.float32,0xfe9d444a,0xd489181b,2 +np.float32,0x3db3e790,0x3ee399d6,2 +np.float32,0xbf154c3e,0xbf55e23e,2 +np.float32,0x3d1096b7,0x3ea7f4aa,2 +np.float32,0x7fc00000,0x7fc00000,2 +np.float32,0x804e2521,0xaa592c06,2 +np.float32,0xbeda2f00,0xbf40a513,2 +np.float32,0x3f191788,0x3f57ae30,2 +np.float32,0x3ed24ade,0x3f3e4b34,2 +np.float32,0x807fadb4,0xaa7fc917,2 +np.float32,0xbe0a06dc,0xbf034234,2 +np.float32,0x3f250bba,0x3f5d276d,2 +np.float32,0x7e948b00,0x548682c8,2 +np.float32,0xfe65ecdc,0xd476fed2,2 +np.float32,0x6fdbdd,0x2a74c095,2 +np.float32,0x800112de,0xa9500fa6,2 +np.float32,0xfe63225c,0xd475fdee,2 +np.float32,0x7f3d9acd,0x54b7d648,2 +np.float32,0xfc46f480,0xd3bacf87,2 +np.float32,0xfe5deaac,0xd47417ff,2 +np.float32,0x60ce53,0x2a693d93,2 +np.float32,0x6a6e2f,0x2a70ba2c,2 +np.float32,0x7f43f0f1,0x54b9dcd0,2 +np.float32,0xbf6170c9,0xbf756104,2 +np.float32,0xbe5c9f74,0xbf197852,2 +np.float32,0xff1502b0,0xd4a9a693,2 +np.float32,0x8064f6af,0xaa6c886e,2 +np.float32,0xbf380564,0xbf6552e5,2 +np.float32,0xfeb9b7dc,0xd490e85f,2 +np.float32,0x7f34f941,0x54b5010d,2 +np.float32,0xbe9d4ca0,0xbf2cbd5f,2 +np.float32,0x3f6e43d2,0x3f79f240,2 +np.float32,0xbdad0530,0xbee0a8f2,2 +np.float32,0x3da18459,0x3edb9105,2 +np.float32,0xfd968340,0xd42a3808,2 +np.float32,0x3ea03e64,0x3f2dcf96,2 +np.float32,0x801d2f5b,0xaa1c6525,2 +np.float32,0xbf47d92d,0xbf6bb7e9,2 +np.float32,0x55a6b9,0x2a5fe9fb,2 +np.float32,0x77a7c2,0x2a7a4fb8,2 +np.float32,0xfebbc16e,0xd4916f88,2 +np.float32,0x3f5d3d6e,0x3f73d86a,2 +np.float32,0xfccd2b60,0xd3edcacb,2 +np.float32,0xbd026460,0xbea244b0,2 +np.float32,0x3e55bd,0x2a4968e4,2 +np.float32,0xbe7b5708,0xbf20490d,2 +np.float32,0xfe413cf4,0xd469171f,2 +np.float32,0x7710e3,0x2a79e657,2 +np.float32,0xfc932520,0xd3d4d9ca,2 +np.float32,0xbf764a1b,0xbf7cb8aa,2 +np.float32,0x6b1923,0x2a713aca,2 +np.float32,0xfe4dcd04,0xd46e092d,2 +np.float32,0xff3085ac,0xd4b381f8,2 +np.float32,0x3f72c438,0x3f7b82b4,2 +np.float32,0xbf6f0c6e,0xbf7a3852,2 +np.float32,0x801d2b1b,0xaa1c5d8d,2 +np.float32,0x3e9db91e,0x3f2ce50d,2 +np.float32,0x3f684f9d,0x3f77d8c5,2 +np.float32,0x7dc784,0x2a7e82cc,2 +np.float32,0x7d2c88e9,0x540d64f8,2 +np.float32,0x807fb708,0xaa7fcf51,2 +np.float32,0x8003c49a,0xa99e16e0,2 +np.float32,0x3ee4f5b8,0x3f43c3ff,2 +np.float32,0xfe992c5e,0xd487e4ec,2 +np.float32,0x4b4dfa,0x2a568216,2 +np.float32,0x3d374c80,0x3eb5c6a8,2 +np.float32,0xbd3a4700,0xbeb6c15c,2 +np.float32,0xbf13cb80,0xbf5529e5,2 +np.float32,0xbe7306d4,0xbf1e7f91,2 +np.float32,0xbf800000,0xbf800000,2 +np.float32,0xbea42efe,0xbf2f394e,2 +np.float32,0x3e1981d0,0x3f07fe2c,2 +np.float32,0x3f17ea1d,0x3f572047,2 +np.float32,0x7dc1e0,0x2a7e7efe,2 +np.float32,0x80169c08,0xaa0fa320,2 +np.float32,0x3f3e1972,0x3f67d248,2 +np.float32,0xfe5d3c88,0xd473d815,2 +np.float32,0xbf677448,0xbf778aac,2 +np.float32,0x7e799b7d,0x547dd9e4,2 +np.float32,0x3f00bb2c,0x3f4b92cf,2 +np.float32,0xbeb29f9c,0xbf343798,2 +np.float32,0xbd6b7830,0xbec59a86,2 +np.float32,0x807a524a,0xaa7c282a,2 +np.float32,0xbe0a7a04,0xbf0366ab,2 +np.float32,0x80237470,0xaa26e061,2 +np.float32,0x3ccbc0f6,0x3e95744f,2 +np.float32,0x3edec6bc,0x3f41fcb6,2 +np.float32,0x3f635198,0x3f760efa,2 +np.float32,0x800eca4f,0xa9f960d8,2 +np.float32,0x3f800000,0x3f800000,2 +np.float32,0xff4eeb9e,0xd4bd456a,2 +np.float32,0x56f4e,0x29b29e70,2 +np.float32,0xff5383a0,0xd4bea95c,2 +np.float32,0x3f4c3a77,0x3f6d6d94,2 +np.float32,0x3f6c324a,0x3f79388c,2 +np.float32,0xbebdc092,0xbf37e27c,2 +np.float32,0xff258956,0xd4afb42e,2 +np.float32,0xdc78c,0x29f39012,2 +np.float32,0xbf2db06a,0xbf60f2f5,2 +np.float32,0xbe3c5808,0xbf119660,2 +np.float32,0xbf1ba866,0xbf58e0f4,2 +np.float32,0x80377640,0xaa41b79d,2 +np.float32,0x4fdc4d,0x2a5abfea,2 +np.float32,0x7f5e7560,0x54c1e516,2 +np.float32,0xfeb4d3f2,0xd48f9fde,2 +np.float32,0x3f12a622,0x3f549c7d,2 +np.float32,0x7f737ed7,0x54c7d2dc,2 +np.float32,0xa0ddc,0x29db456d,2 +np.float32,0xfe006740,0xd44b6689,2 +np.float32,0x3f17dfd4,0x3f571b6c,2 +np.float32,0x67546e,0x2a6e5dd1,2 +np.float32,0xff0d0f11,0xd4a693e2,2 +np.float32,0xbd170090,0xbeaa6738,2 +np.float32,0x5274a0,0x2a5d1806,2 +np.float32,0x3e154fe0,0x3f06be1a,2 +np.float32,0x7ddb302e,0x5440f0a7,2 +np.float32,0x3f579d10,0x3f71c2af,2 +np.float32,0xff2bc5bb,0xd4b1e20c,2 +np.float32,0xfee8fa6a,0xd49c4872,2 +np.float32,0xbea551b0,0xbf2fa07b,2 +np.float32,0xfeabc75c,0xd48d3004,2 +np.float32,0x7f50a5a8,0x54bdcbd1,2 +np.float32,0x50354b,0x2a5b110d,2 +np.float32,0x7d139f13,0x54063b6b,2 +np.float32,0xbeee1b08,0xbf465699,2 +np.float32,0xfe5e1650,0xd47427fe,2 +np.float32,0x7f7fffff,0x54cb2ff5,2 +np.float32,0xbf52ede8,0xbf6fff35,2 +np.float32,0x804bba81,0xaa56e8f1,2 +np.float32,0x6609e2,0x2a6d5e94,2 +np.float32,0x692621,0x2a6fc1d6,2 +np.float32,0xbf288bb6,0xbf5eb4d3,2 +np.float32,0x804f28c4,0xaa5a1b82,2 +np.float32,0xbdaad2a8,0xbedfb46e,2 +np.float32,0x5e04f8,0x2a66fb13,2 +np.float32,0x804c10da,0xaa573a81,2 +np.float32,0xbe412764,0xbf12d0fd,2 +np.float32,0x801c35cc,0xaa1aa250,2 +np.float32,0x6364d4,0x2a6b4cf9,2 +np.float32,0xbf6d3cea,0xbf79962f,2 +np.float32,0x7e5a9935,0x5472defb,2 +np.float32,0xbe73a38c,0xbf1ea19c,2 +np.float32,0xbd35e950,0xbeb550f2,2 +np.float32,0x46cc16,0x2a5223d6,2 +np.float32,0x3f005288,0x3f4b5b97,2 +np.float32,0x8034e8b7,0xaa3eb2be,2 +np.float32,0xbea775fc,0xbf3061cf,2 +np.float32,0xea0e9,0x29f87751,2 +np.float32,0xbf38faaf,0xbf65b89d,2 +np.float32,0xbedf3184,0xbf421bb0,2 +np.float32,0xbe04250c,0xbf015def,2 +np.float32,0x7f56dae8,0x54bfa901,2 +np.float32,0xfebe3e04,0xd492132e,2 +np.float32,0x3e4dc326,0x3f15f19e,2 +np.float32,0x803da197,0xaa48a621,2 +np.float32,0x7eeb35aa,0x549cc7c6,2 +np.float32,0xfebb3eb6,0xd4914dc0,2 +np.float32,0xfed17478,0xd496d5e2,2 +np.float32,0x80243694,0xaa280ed2,2 +np.float32,0x8017e666,0xaa1251d3,2 +np.float32,0xbf07e942,0xbf4f4a3e,2 +np.float32,0xbf578fa6,0xbf71bdab,2 +np.float32,0x7ed8d80f,0x549896b6,2 +np.float32,0x3f2277ae,0x3f5bff11,2 +np.float32,0x7e6f195b,0x547a3cd4,2 +np.float32,0xbf441559,0xbf6a3a91,2 +np.float32,0x7f1fb427,0x54ad9d8d,2 +np.float32,0x71695f,0x2a75e12d,2 +np.float32,0xbd859588,0xbece19a1,2 +np.float32,0x7f5702fc,0x54bfb4eb,2 +np.float32,0x3f040008,0x3f4d4842,2 +np.float32,0x3de00ca5,0x3ef4df89,2 +np.float32,0x3eeabb03,0x3f45658c,2 +np.float32,0x3dfe5e65,0x3eff7480,2 +np.float32,0x1,0x26a14518,2 +np.float32,0x8065e400,0xaa6d4130,2 +np.float32,0xff50e1bb,0xd4bdde07,2 +np.float32,0xbe88635a,0xbf24b7e9,2 +np.float32,0x3f46bfab,0x3f6b4908,2 +np.float32,0xbd85c3c8,0xbece3168,2 +np.float32,0xbe633f64,0xbf1afdb1,2 +np.float32,0xff2c7706,0xd4b21f2a,2 +np.float32,0xbf02816c,0xbf4c812a,2 +np.float32,0x80653aeb,0xaa6cbdab,2 +np.float32,0x3eef1d10,0x3f469e24,2 +np.float32,0x3d9944bf,0x3ed7c36a,2 +np.float32,0x1b03d4,0x2a186b2b,2 +np.float32,0x3f251b7c,0x3f5d2e76,2 +np.float32,0x3edebab0,0x3f41f937,2 +np.float32,0xfefc2148,0xd4a073ff,2 +np.float32,0x7448ee,0x2a77f051,2 +np.float32,0x3bb8a400,0x3e3637ee,2 +np.float32,0x57df36,0x2a61d527,2 +np.float32,0xfd8b9098,0xd425fccb,2 +np.float32,0x7f67627e,0x54c4744d,2 +np.float32,0x801165d7,0xaa039fba,2 +np.float32,0x53aae5,0x2a5e2bfd,2 +np.float32,0x8014012b,0xaa09e4f1,2 +np.float32,0x3f7a2d53,0x3f7e0b4b,2 +np.float32,0x3f5fb700,0x3f74c052,2 +np.float32,0x7f192a06,0x54ab366c,2 +np.float32,0x3f569611,0x3f71603b,2 +np.float32,0x25e2dc,0x2a2a9b65,2 +np.float32,0x8036465e,0xaa405342,2 +np.float32,0x804118e1,0xaa4c5785,2 +np.float32,0xbef08d3e,0xbf4703e1,2 +np.float32,0x3447e2,0x2a3df0be,2 +np.float32,0xbf2a350b,0xbf5f6f8c,2 +np.float32,0xbec87e3e,0xbf3b4a73,2 +np.float32,0xbe99a4a8,0xbf2b6412,2 +np.float32,0x2ea2ae,0x2a36d77e,2 +np.float32,0xfcb69600,0xd3e4b9e3,2 +np.float32,0x717700,0x2a75eb06,2 +np.float32,0xbf4e81ce,0xbf6e4ecc,2 +np.float32,0xbe2021ac,0xbf09ebee,2 +np.float32,0xfef94eee,0xd49fda31,2 +np.float32,0x8563e,0x29ce0015,2 +np.float32,0x7f5d0ca5,0x54c17c0f,2 +np.float32,0x3f16459a,0x3f56590f,2 +np.float32,0xbe12f7bc,0xbf0608a0,2 +np.float32,0x3f10fd3d,0x3f53ce5f,2 +np.float32,0x3ca5e1b0,0x3e8b8d96,2 +np.float32,0xbe5288e0,0xbf17181f,2 +np.float32,0xbf7360f6,0xbf7bb8c9,2 +np.float32,0x7e989d33,0x5487ba88,2 +np.float32,0x3ea7b5dc,0x3f307839,2 +np.float32,0x7e8da0c9,0x548463f0,2 +np.float32,0xfeaf7888,0xd48e3122,2 +np.float32,0x7d90402d,0x5427d321,2 +np.float32,0x72e309,0x2a76f0ee,2 +np.float32,0xbe1faa34,0xbf09c998,2 +np.float32,0xbf2b1652,0xbf5fd1f4,2 +np.float32,0x8051eb0c,0xaa5c9cca,2 +np.float32,0x7edf02bf,0x549a058e,2 +np.float32,0x7fa00000,0x7fe00000,2 +np.float32,0x3f67f873,0x3f77b9c1,2 +np.float32,0x3f276b63,0x3f5e358c,2 +np.float32,0x7eeb4bf2,0x549cccb9,2 +np.float32,0x3bfa2c,0x2a46d675,2 +np.float32,0x3e133c50,0x3f061d75,2 +np.float32,0x3ca302c0,0x3e8abe4a,2 +np.float32,0x802e152e,0xaa361dd5,2 +np.float32,0x3f504810,0x3f6efd0a,2 +np.float32,0xbf43e0b5,0xbf6a2599,2 +np.float32,0x80800000,0xaa800000,2 +np.float32,0x3f1c0980,0x3f590e03,2 +np.float32,0xbf0084f6,0xbf4b7638,2 +np.float32,0xfee72d32,0xd49be10d,2 +np.float32,0x3f3c00ed,0x3f66f763,2 +np.float32,0x80511e81,0xaa5be492,2 +np.float32,0xfdd1b8a0,0xd43e1f0d,2 +np.float32,0x7d877474,0x54245785,2 +np.float32,0x7f110bfe,0x54a82207,2 +np.float32,0xff800000,0xff800000,2 +np.float32,0x6b6a2,0x29bfa706,2 +np.float32,0xbf5bdfd9,0xbf7357b7,2 +np.float32,0x8025bfa3,0xaa2a6676,2 +np.float32,0x3a3581,0x2a44dd3a,2 +np.float32,0x542c2a,0x2a5e9e2f,2 +np.float32,0xbe1d5650,0xbf091d57,2 +np.float32,0x3e97760d,0x3f2a935e,2 +np.float32,0x7f5dcde2,0x54c1b460,2 +np.float32,0x800bde1e,0xa9e7bbaf,2 +np.float32,0x3e6b9e61,0x3f1cdf07,2 +np.float32,0x7d46c003,0x54143884,2 +np.float32,0x80073fbb,0xa9c49e67,2 +np.float32,0x503c23,0x2a5b1748,2 +np.float32,0x7eb7b070,0x549060c8,2 +np.float32,0xe9d8f,0x29f86456,2 +np.float32,0xbeedd4f0,0xbf464320,2 +np.float32,0x3f40d5d6,0x3f68eda1,2 +np.float32,0xff201f28,0xd4adc44b,2 +np.float32,0xbdf61e98,0xbefca9c7,2 +np.float32,0x3e8a0dc9,0x3f2562e3,2 +np.float32,0xbc0c0c80,0xbe515f61,2 +np.float32,0x2b3c15,0x2a3248e3,2 +np.float32,0x42a7bb,0x2a4df592,2 +np.float32,0x7f337947,0x54b480af,2 +np.float32,0xfec21db4,0xd4930f4b,2 +np.float32,0x7f4fdbf3,0x54bd8e94,2 +np.float32,0x1e2253,0x2a1e1286,2 +np.float32,0x800c4c80,0xa9ea819e,2 +np.float32,0x7e96f5b7,0x54873c88,2 +np.float32,0x7ce4e131,0x53f69ed4,2 +np.float32,0xbead8372,0xbf327b63,2 +np.float32,0x3e15ca7e,0x3f06e2f3,2 +np.float32,0xbf63e17b,0xbf7642da,2 +np.float32,0xff5bdbdb,0xd4c122f9,2 +np.float32,0x3f44411e,0x3f6a4bfd,2 +np.float32,0xfd007da0,0xd40029d2,2 +np.float32,0xbe940168,0xbf2944b7,2 +np.float32,0x80000000,0x80000000,2 +np.float32,0x3d28e356,0x3eb0e1b8,2 +np.float32,0x3eb9fcd8,0x3f36a918,2 +np.float32,0x4f6410,0x2a5a51eb,2 +np.float32,0xbdf18e30,0xbefb1775,2 +np.float32,0x32edbd,0x2a3c49e3,2 +np.float32,0x801f70a5,0xaa2052da,2 +np.float32,0x8045a045,0xaa50f98c,2 +np.float32,0xbdd6cb00,0xbef17412,2 +np.float32,0x3f118f2c,0x3f541557,2 +np.float32,0xbe65c378,0xbf1b8f95,2 +np.float32,0xfd9a9060,0xd42bbb8b,2 +np.float32,0x3f04244f,0x3f4d5b0f,2 +np.float32,0xff05214b,0xd4a3656f,2 +np.float32,0xfe342cd0,0xd463b706,2 +np.float32,0x3f3409a8,0x3f63a836,2 +np.float32,0x80205db2,0xaa21e1e5,2 +np.float32,0xbf37c982,0xbf653a03,2 +np.float32,0x3f36ce8f,0x3f64d17e,2 +np.float32,0x36ffda,0x2a412d61,2 +np.float32,0xff569752,0xd4bf94e6,2 +np.float32,0x802fdb0f,0xaa386c3a,2 +np.float32,0x7ec55a87,0x5493df71,2 +np.float32,0x7f2234c7,0x54ae847e,2 +np.float32,0xbf02df76,0xbf4cb23d,2 +np.float32,0x3d68731a,0x3ec4c156,2 +np.float32,0x8146,0x2921cd8e,2 +np.float32,0x80119364,0xaa041235,2 +np.float32,0xfe6c1c00,0xd47930b5,2 +np.float32,0x8070da44,0xaa757996,2 +np.float32,0xfefbf50c,0xd4a06a9d,2 +np.float32,0xbf01b6a8,0xbf4c170a,2 +np.float32,0x110702,0x2a02aedb,2 +np.float32,0xbf063cd4,0xbf4e6f87,2 +np.float32,0x3f1ff178,0x3f5ad9dd,2 +np.float32,0xbf76dcd4,0xbf7cead0,2 +np.float32,0x80527281,0xaa5d1620,2 +np.float32,0xfea96df8,0xd48c8a7f,2 +np.float32,0x68db02,0x2a6f88b0,2 +np.float32,0x62d971,0x2a6adec7,2 +np.float32,0x3e816fe0,0x3f21df04,2 +np.float32,0x3f586379,0x3f720cc0,2 +np.float32,0x804a3718,0xaa5577ff,2 +np.float32,0x2e2506,0x2a3632b2,2 +np.float32,0x3f297d,0x2a4a4bf3,2 +np.float32,0xbe37aba8,0xbf105f88,2 +np.float32,0xbf18b264,0xbf577ea7,2 +np.float32,0x7f50d02d,0x54bdd8b5,2 +np.float32,0xfee296dc,0xd49ad757,2 +np.float32,0x7ec5137e,0x5493cdb1,2 +np.float32,0x3f4811f4,0x3f6bce3a,2 +np.float32,0xfdff32a0,0xd44af991,2 +np.float32,0x3f6ef140,0x3f7a2ed6,2 +np.float32,0x250838,0x2a2950b5,2 +np.float32,0x25c28e,0x2a2a6ada,2 +np.float32,0xbe875e50,0xbf244e90,2 +np.float32,0x3e3bdff8,0x3f11776a,2 +np.float32,0x3e9fe493,0x3f2daf17,2 +np.float32,0x804d8599,0xaa5897d9,2 +np.float32,0x3f0533da,0x3f4de759,2 +np.float32,0xbe63023c,0xbf1aefc8,2 +np.float32,0x80636e5e,0xaa6b547f,2 +np.float32,0xff112958,0xd4a82d5d,2 +np.float32,0x3e924112,0x3f28991f,2 +np.float32,0xbe996ffc,0xbf2b507a,2 +np.float32,0x802a7cda,0xaa314081,2 +np.float32,0x8022b524,0xaa25b21e,2 +np.float32,0x3f0808c8,0x3f4f5a43,2 +np.float32,0xbef0ec2a,0xbf471e0b,2 +np.float32,0xff4c2345,0xd4bc6b3c,2 +np.float32,0x25ccc8,0x2a2a7a3b,2 +np.float32,0x7f4467d6,0x54ba0260,2 +np.float32,0x7f506539,0x54bdb846,2 +np.float32,0x412ab4,0x2a4c6a2a,2 +np.float32,0x80672c4a,0xaa6e3ef0,2 +np.float32,0xbddfb7f8,0xbef4c0ac,2 +np.float32,0xbf250bb9,0xbf5d276c,2 +np.float32,0x807dca65,0xaa7e84bd,2 +np.float32,0xbf63b8e0,0xbf763438,2 +np.float32,0xbeed1b0c,0xbf460f6b,2 +np.float32,0x8021594f,0xaa238136,2 +np.float32,0xbebc74c8,0xbf377710,2 +np.float32,0x3e9f8e3b,0x3f2d8fce,2 +np.float32,0x7f50ca09,0x54bdd6d8,2 +np.float32,0x805797c1,0xaa6197df,2 +np.float32,0x3de198f9,0x3ef56f98,2 +np.float32,0xf154d,0x29fb0392,2 +np.float32,0xff7fffff,0xd4cb2ff5,2 +np.float32,0xfed22fa8,0xd49702c4,2 +np.float32,0xbf733736,0xbf7baa64,2 +np.float32,0xbf206a8a,0xbf5b1108,2 +np.float32,0xbca49680,0xbe8b3078,2 +np.float32,0xfecba794,0xd4956e1a,2 +np.float32,0x80126582,0xaa061886,2 +np.float32,0xfee5cc82,0xd49b919f,2 +np.float32,0xbf7ad6ae,0xbf7e4491,2 +np.float32,0x7ea88c81,0x548c4c0c,2 +np.float32,0xbf493a0d,0xbf6c4255,2 +np.float32,0xbf06dda0,0xbf4ec1d4,2 +np.float32,0xff3f6e84,0xd4b86cf6,2 +np.float32,0x3e4fe093,0x3f1674b0,2 +np.float32,0x8048ad60,0xaa53fbde,2 +np.float32,0x7ebb7112,0x54915ac5,2 +np.float32,0x5bd191,0x2a652a0d,2 +np.float32,0xfe3121d0,0xd4626cfb,2 +np.float32,0x7e4421c6,0x546a3f83,2 +np.float32,0x19975b,0x2a15b14f,2 +np.float32,0x801c8087,0xaa1b2a64,2 +np.float32,0xfdf6e950,0xd448c0f6,2 +np.float32,0x74e711,0x2a786083,2 +np.float32,0xbf2b2f2e,0xbf5fdccb,2 +np.float32,0x7ed19ece,0x5496e00b,2 +np.float32,0x7f6f8322,0x54c6ba63,2 +np.float32,0x3e90316d,0x3f27cd69,2 +np.float32,0x7ecb42ce,0x54955571,2 +np.float32,0x3f6d49be,0x3f799aaf,2 +np.float32,0x8053d327,0xaa5e4f9a,2 +np.float32,0x7ebd7361,0x5491df3e,2 +np.float32,0xfdb6eed0,0xd435a7aa,2 +np.float32,0x7f3e79f4,0x54b81e4b,2 +np.float32,0xfe83afa6,0xd4813794,2 +np.float32,0x37c443,0x2a421246,2 +np.float32,0xff075a10,0xd4a44cd8,2 +np.float32,0x3ebc5fe0,0x3f377047,2 +np.float32,0x739694,0x2a77714e,2 +np.float32,0xfe832946,0xd4810b91,2 +np.float32,0x7f2638e6,0x54aff235,2 +np.float32,0xfe87f7a6,0xd4829a3f,2 +np.float32,0x3f50f3f8,0x3f6f3eb8,2 +np.float32,0x3eafa3d0,0x3f333548,2 +np.float32,0xbec26ee6,0xbf39626f,2 +np.float32,0x7e6f924f,0x547a66ff,2 +np.float32,0x7f0baa46,0x54a606f8,2 +np.float32,0xbf6dfc49,0xbf79d939,2 +np.float32,0x7f005709,0x54a1699d,2 +np.float32,0x7ee3d7ef,0x549b2057,2 +np.float32,0x803709a4,0xaa4138d7,2 +np.float32,0x3f7bf49a,0x3f7ea509,2 +np.float32,0x509db7,0x2a5b6ff5,2 +np.float32,0x7eb1b0d4,0x548ec9ff,2 +np.float32,0x7eb996ec,0x5490dfce,2 +np.float32,0xbf1fcbaa,0xbf5ac89e,2 +np.float32,0x3e2c9a98,0x3f0d69cc,2 +np.float32,0x3ea77994,0x3f306312,2 +np.float32,0x3f3cbfe4,0x3f67457c,2 +np.float32,0x8422a,0x29cd5a30,2 +np.float32,0xbd974558,0xbed6d264,2 +np.float32,0xfecee77a,0xd496387f,2 +np.float32,0x3f51876b,0x3f6f76f1,2 +np.float32,0x3b1a25,0x2a45ddad,2 +np.float32,0xfe9912f0,0xd487dd67,2 +np.float32,0x3f3ab13d,0x3f666d99,2 +np.float32,0xbf35565a,0xbf64341b,2 +np.float32,0x7d4e84aa,0x54162091,2 +np.float32,0x4c2570,0x2a574dea,2 +np.float32,0x7e82dca6,0x5480f26b,2 +np.float32,0x7f5503e7,0x54bf1c8d,2 +np.float32,0xbeb85034,0xbf361c59,2 +np.float32,0x80460a69,0xaa516387,2 +np.float32,0x805fbbab,0xaa68602c,2 +np.float32,0x7d4b4c1b,0x541557b8,2 +np.float32,0xbefa9a0a,0xbf49bfbc,2 +np.float32,0x3dbd233f,0x3ee76e09,2 +np.float32,0x58b6df,0x2a628d50,2 +np.float32,0xfcdcc180,0xd3f3aad9,2 +np.float32,0x423a37,0x2a4d8487,2 +np.float32,0xbed8b32a,0xbf403507,2 +np.float32,0x3f68e85d,0x3f780f0b,2 +np.float32,0x7ee13c4b,0x549a883d,2 +np.float32,0xff2ed4c5,0xd4b2eec1,2 +np.float32,0xbf54dadc,0xbf70b99a,2 +np.float32,0x3f78b0af,0x3f7d8a32,2 +np.float32,0x3f377372,0x3f651635,2 +np.float32,0xfdaa6178,0xd43166bc,2 +np.float32,0x8060c337,0xaa6934a6,2 +np.float32,0x7ec752c2,0x54945cf6,2 +np.float32,0xbd01a760,0xbea1f624,2 +np.float32,0x6f6599,0x2a746a35,2 +np.float32,0x3f6315b0,0x3f75f95b,2 +np.float32,0x7f2baf32,0x54b1da44,2 +np.float32,0x3e400353,0x3f1286d8,2 +np.float32,0x40d3bf,0x2a4c0f15,2 +np.float32,0x7f733aca,0x54c7c03d,2 +np.float32,0x7e5c5407,0x5473828b,2 +np.float32,0x80191703,0xaa14b56a,2 +np.float32,0xbf4fc144,0xbf6ec970,2 +np.float32,0xbf1137a7,0xbf53eacd,2 +np.float32,0x80575410,0xaa615db3,2 +np.float32,0xbd0911d0,0xbea4fe07,2 +np.float32,0x3e98534a,0x3f2ae643,2 +np.float32,0x3f3b089a,0x3f669185,2 +np.float32,0x4fc752,0x2a5aacc1,2 +np.float32,0xbef44ddc,0xbf480b6e,2 +np.float32,0x80464217,0xaa519af4,2 +np.float32,0x80445fae,0xaa4fb6de,2 +np.float32,0x80771cf4,0xaa79eec8,2 +np.float32,0xfd9182e8,0xd4284fed,2 +np.float32,0xff0a5d16,0xd4a58288,2 +np.float32,0x3f33e169,0x3f63973e,2 +np.float32,0x8021a247,0xaa23f820,2 +np.float32,0xbf362522,0xbf648ab8,2 +np.float32,0x3f457cd7,0x3f6ac95e,2 +np.float32,0xbcadf400,0xbe8dc7e2,2 +np.float32,0x80237210,0xaa26dca7,2 +np.float32,0xbf1293c9,0xbf54939f,2 +np.float32,0xbc5e73c0,0xbe744a37,2 +np.float32,0x3c03f980,0x3e4d44df,2 +np.float32,0x7da46f,0x2a7e6b20,2 +np.float32,0x5d4570,0x2a665dd0,2 +np.float32,0x3e93fbac,0x3f294287,2 +np.float32,0x7e6808fd,0x5477bfa4,2 +np.float32,0xff5aa9a6,0xd4c0c925,2 +np.float32,0xbf5206ba,0xbf6fa767,2 +np.float32,0xbf6e513e,0xbf79f6f1,2 +np.float32,0x3ed01c0f,0x3f3da20f,2 +np.float32,0xff47d93d,0xd4bb1704,2 +np.float32,0x7f466cfd,0x54baa514,2 +np.float32,0x665e10,0x2a6d9fc8,2 +np.float32,0x804d0629,0xaa5820e8,2 +np.float32,0x7e0beaa0,0x54514e7e,2 +np.float32,0xbf7fcb6c,0xbf7fee78,2 +np.float32,0x3f6c5b03,0x3f7946dd,2 +np.float32,0x3e941504,0x3f294c30,2 +np.float32,0xbf2749ad,0xbf5e26a1,2 +np.float32,0xfec2a00a,0xd493302d,2 +np.float32,0x3f15a358,0x3f560bce,2 +np.float32,0x3f15c4e7,0x3f561bcd,2 +np.float32,0xfedc8692,0xd499728c,2 +np.float32,0x7e8f6902,0x5484f180,2 +np.float32,0x7f663d62,0x54c42136,2 +np.float32,0x8027ea62,0xaa2d99b4,2 +np.float32,0x3f3d093d,0x3f67636d,2 +np.float32,0x7f118c33,0x54a85382,2 +np.float32,0x803e866a,0xaa499d43,2 +np.float32,0x80053632,0xa9b02407,2 +np.float32,0xbf36dd66,0xbf64d7af,2 +np.float32,0xbf560358,0xbf71292b,2 +np.float32,0x139a8,0x29596bc0,2 +np.float32,0xbe04f75c,0xbf01a26c,2 +np.float32,0xfe1c3268,0xd45920fa,2 +np.float32,0x7ec77f72,0x5494680c,2 +np.float32,0xbedde724,0xbf41bbba,2 +np.float32,0x3e81dbe0,0x3f220bfd,2 +np.float32,0x800373ac,0xa99989d4,2 +np.float32,0x3f7f859a,0x3f7fd72d,2 +np.float32,0x3eb9dc7e,0x3f369e80,2 +np.float32,0xff5f8eb7,0xd4c236b1,2 +np.float32,0xff1c03cb,0xd4ac44ac,2 +np.float32,0x18cfe1,0x2a14285b,2 +np.float32,0x7f21b075,0x54ae54fd,2 +np.float32,0xff490bd8,0xd4bb7680,2 +np.float32,0xbf15dc22,0xbf5626de,2 +np.float32,0xfe1d5a10,0xd459a9a3,2 +np.float32,0x750544,0x2a7875e4,2 +np.float32,0x8023d5df,0xaa2778b3,2 +np.float32,0x3e42aa08,0x3f1332b2,2 +np.float32,0x3ecaa751,0x3f3bf60d,2 +np.float32,0x0,0x0,2 +np.float32,0x80416da6,0xaa4cb011,2 +np.float32,0x3f4ea9ae,0x3f6e5e22,2 +np.float32,0x2113f4,0x2a230f8e,2 +np.float32,0x3f35c2e6,0x3f64619a,2 +np.float32,0xbf50db8a,0xbf6f3564,2 +np.float32,0xff4d5cea,0xd4bccb8a,2 +np.float32,0x7ee54420,0x549b72d2,2 +np.float32,0x64ee68,0x2a6c81f7,2 +np.float32,0x5330da,0x2a5dbfc2,2 +np.float32,0x80047f88,0xa9a7b467,2 +np.float32,0xbda01078,0xbedae800,2 +np.float32,0xfe96d05a,0xd487315f,2 +np.float32,0x8003cc10,0xa99e7ef4,2 +np.float32,0x8007b4ac,0xa9c8aa3d,2 +np.float32,0x5d4bcf,0x2a66630e,2 +np.float32,0xfdd0c0b0,0xd43dd403,2 +np.float32,0xbf7a1d82,0xbf7e05f0,2 +np.float32,0x74ca33,0x2a784c0f,2 +np.float32,0x804f45e5,0xaa5a3640,2 +np.float32,0x7e6d16aa,0x547988c4,2 +np.float32,0x807d5762,0xaa7e3714,2 +np.float32,0xfecf93d0,0xd4966229,2 +np.float32,0xfecbd25c,0xd4957890,2 +np.float32,0xff7db31c,0xd4ca93b0,2 +np.float32,0x3dac9e18,0x3ee07c4a,2 +np.float32,0xbf4b2d28,0xbf6d0509,2 +np.float32,0xbd4f4c50,0xbebd62e0,2 +np.float32,0xbd2eac40,0xbeb2e0ee,2 +np.float32,0x3d01b69b,0x3ea1fc7b,2 +np.float32,0x7ec63902,0x549416ed,2 +np.float32,0xfcc47700,0xd3ea616d,2 +np.float32,0xbf5ddec2,0xbf7413a1,2 +np.float32,0xff6a6110,0xd4c54c52,2 +np.float32,0xfdfae2a0,0xd449d335,2 +np.float32,0x7e54868c,0x547099cd,2 +np.float32,0x802b5b88,0xaa327413,2 +np.float32,0x80440e72,0xaa4f647a,2 +np.float32,0x3e313c94,0x3f0eaad5,2 +np.float32,0x3ebb492a,0x3f3715a2,2 +np.float32,0xbef56286,0xbf4856d5,2 +np.float32,0x3f0154ba,0x3f4be3a0,2 +np.float32,0xff2df86c,0xd4b2a376,2 +np.float32,0x3ef6a850,0x3f48af57,2 +np.float32,0x3d8d33e1,0x3ed1f22d,2 +np.float32,0x4dd9b9,0x2a58e615,2 +np.float32,0x7f1caf83,0x54ac83c9,2 +np.float32,0xbf7286b3,0xbf7b6d73,2 +np.float32,0x80064f88,0xa9bbbd9f,2 +np.float32,0xbf1f55fa,0xbf5a92db,2 +np.float32,0x546a81,0x2a5ed516,2 +np.float32,0xbe912880,0xbf282d0a,2 +np.float32,0x5df587,0x2a66ee6e,2 +np.float32,0x801f706c,0xaa205279,2 +np.float32,0x58cb6d,0x2a629ece,2 +np.float32,0xfe754f8c,0xd47c62da,2 +np.float32,0xbefb6f4c,0xbf49f8e7,2 +np.float32,0x80000001,0xa6a14518,2 +np.float32,0xbf067837,0xbf4e8df4,2 +np.float32,0x3e8e715c,0x3f271ee4,2 +np.float32,0x8009de9b,0xa9d9ebc8,2 +np.float32,0xbf371ff1,0xbf64f36e,2 +np.float32,0x7f5ce661,0x54c170e4,2 +np.float32,0x3f3c47d1,0x3f671467,2 +np.float32,0xfea5e5a6,0xd48b8eb2,2 +np.float32,0xff62b17f,0xd4c31e15,2 +np.float32,0xff315932,0xd4b3c98f,2 +np.float32,0xbf1c3ca8,0xbf5925b9,2 +np.float32,0x7f800000,0x7f800000,2 +np.float32,0xfdf20868,0xd4476c3b,2 +np.float32,0x5b790e,0x2a64e052,2 +np.float32,0x3f5ddf4e,0x3f7413d4,2 +np.float32,0x7f1a3182,0x54ab9861,2 +np.float32,0x3f4b906e,0x3f6d2b9d,2 +np.float32,0x7ebac760,0x54912edb,2 +np.float32,0x7f626d3f,0x54c30a7e,2 +np.float32,0x3e27b058,0x3f0c0edc,2 +np.float32,0x8041e69c,0xaa4d2de8,2 +np.float32,0x3f42cee0,0x3f69b84a,2 +np.float32,0x7ec5fe83,0x5494085b,2 +np.float32,0x9d3e6,0x29d99cde,2 +np.float32,0x3edc50c0,0x3f41452d,2 +np.float32,0xbf2c463a,0xbf60562c,2 +np.float32,0x800bfa33,0xa9e871e8,2 +np.float32,0x7c9f2c,0x2a7dba4d,2 +np.float32,0x7f2ef9fd,0x54b2fb73,2 +np.float32,0x80741847,0xaa77cdb9,2 +np.float32,0x7e9c462a,0x5488ce1b,2 +np.float32,0x3ea47ec1,0x3f2f55a9,2 +np.float32,0x7f311c43,0x54b3b4f5,2 +np.float32,0x3d8f4c73,0x3ed2facd,2 +np.float32,0x806d7bd2,0xaa7301ef,2 +np.float32,0xbf633d24,0xbf760799,2 +np.float32,0xff4f9a3f,0xd4bd7a99,2 +np.float32,0x3f6021ca,0x3f74e73d,2 +np.float32,0x7e447015,0x546a5eac,2 +np.float32,0x6bff3c,0x2a71e711,2 +np.float32,0xe9c9f,0x29f85f06,2 +np.float32,0x8009fe14,0xa9dad277,2 +np.float32,0x807cf79c,0xaa7df644,2 +np.float32,0xff440e1b,0xd4b9e608,2 +np.float32,0xbddf9a50,0xbef4b5db,2 +np.float32,0x7f3b1c39,0x54b706fc,2 +np.float32,0x3c7471a0,0x3e7c16a7,2 +np.float32,0x8065b02b,0xaa6d18ee,2 +np.float32,0x7f63a3b2,0x54c36379,2 +np.float32,0xbe9c9d92,0xbf2c7d33,2 +np.float32,0x3d93aad3,0x3ed51a2e,2 +np.float32,0xbf41b040,0xbf694571,2 +np.float32,0x80396b9e,0xaa43f899,2 +np.float64,0x800fa025695f404b,0xaaa4000ff64bb00c,2 +np.float64,0xbfecc00198f98003,0xbfeee0b623fbd94b,2 +np.float64,0x7f9eeb60b03dd6c0,0x55291bf8554bb303,2 +np.float64,0x3fba74485634e890,0x3fde08710bdb148d,2 +np.float64,0xbfdd9a75193b34ea,0xbfe8bf711660a2f5,2 +np.float64,0xbfcf92e17a3f25c4,0xbfe4119eda6f3773,2 +np.float64,0xbfe359e2ba66b3c6,0xbfeb0f7ae97ea142,2 +np.float64,0x20791a5640f24,0x2a9441f13d262bed,2 +np.float64,0x3fe455fbfae8abf8,0x3feb830d63e1022c,2 +np.float64,0xbd112b7b7a226,0x2aa238c097ec269a,2 +np.float64,0x93349ba126694,0x2aa0c363cd74465a,2 +np.float64,0x20300cd440602,0x2a9432b4f4081209,2 +np.float64,0x3fdcfae677b9f5cc,0x3fe892a9ee56fe8d,2 +np.float64,0xbfefaae3f7bf55c8,0xbfefe388066132c4,2 +np.float64,0x1a7d6eb634faf,0x2a92ed9851d29ab5,2 +np.float64,0x7fd5308d39aa6119,0x553be444e30326c6,2 +np.float64,0xff811c7390223900,0xd5205cb404952fa7,2 +np.float64,0x80083d24aff07a4a,0xaaa0285cf764d898,2 +np.float64,0x800633810ccc6703,0xaa9d65341419586b,2 +np.float64,0x800ff456223fe8ac,0xaaa423bbcc24dff1,2 +np.float64,0x7fde5c99aebcb932,0x553f71be7d6d9daa,2 +np.float64,0x3fed961c4b3b2c39,0x3fef2ca146270cac,2 +np.float64,0x7fe744d30c6e89a5,0x554220a4cdc78e62,2 +np.float64,0x3fd8f527c7b1ea50,0x3fe76101085be1cb,2 +np.float64,0xbfc96a14b232d428,0xbfe2ab1a8962606c,2 +np.float64,0xffe85f540cf0bea7,0xd54268dff964519a,2 +np.float64,0x800e3be0fe7c77c2,0xaaa3634efd7f020b,2 +np.float64,0x3feb90d032f721a0,0x3fee72a4579e8b12,2 +np.float64,0xffe05674aaa0ace9,0xd5401c9e3fb4abcf,2 +np.float64,0x3fefc2e32c3f85c6,0x3fefeb940924bf42,2 +np.float64,0xbfecfd89e9f9fb14,0xbfeef6addf73ee49,2 +np.float64,0xf5862717eb0c5,0x2aa3e1428780382d,2 +np.float64,0xffc3003b32260078,0xd53558f92202dcdb,2 +np.float64,0x3feb4c152c36982a,0x3fee5940f7da0825,2 +np.float64,0x3fe7147b002e28f6,0x3fecb2948f46d1e3,2 +np.float64,0x7fe00ad9b4a015b2,0x5540039d15e1da54,2 +np.float64,0x8010000000000000,0xaaa428a2f98d728b,2 +np.float64,0xbfd3a41bfea74838,0xbfe595ab45b1be91,2 +np.float64,0x7fdbfd6e5537fadc,0x553e9a6e1107b8d0,2 +np.float64,0x800151d9d9a2a3b4,0xaa918cd8fb63f40f,2 +np.float64,0x7fe6828401ad0507,0x5541eda05dcd1fcf,2 +np.float64,0x3fdae1e7a1b5c3d0,0x3fe7f711e72ecc35,2 +np.float64,0x7fdf4936133e926b,0x553fc29c8d5edea3,2 +np.float64,0x80079de12d4f3bc3,0xaa9f7b06a9286da4,2 +np.float64,0x3fe1261cade24c39,0x3fe9fe09488e417a,2 +np.float64,0xbfc20dce21241b9c,0xbfe0a842fb207a28,2 +np.float64,0x3fe3285dfa2650bc,0x3feaf85215f59ef9,2 +np.float64,0x7fe42b93aea85726,0x554148c3c3bb35e3,2 +np.float64,0xffe6c74e7f6d8e9c,0xd541ffd13fa36dbd,2 +np.float64,0x3fe73ea139ee7d42,0x3fecc402242ab7d3,2 +np.float64,0xffbd4b46be3a9690,0xd53392de917c72e4,2 +np.float64,0x800caed8df395db2,0xaaa2a811a02e6be4,2 +np.float64,0x800aacdb6c9559b7,0xaaa19d6fbc8feebf,2 +np.float64,0x839fb4eb073f7,0x2aa0264b98327c12,2 +np.float64,0xffd0157ba9a02af8,0xd5397157a11c0d05,2 +np.float64,0x7fddc8ff173b91fd,0x553f3e7663fb2ac7,2 +np.float64,0x67b365facf66d,0x2a9dd4d838b0d853,2 +np.float64,0xffe12e7fc7225cff,0xd5406272a83a8e1b,2 +np.float64,0x7fea5b19a034b632,0x5542e567658b3e36,2 +np.float64,0x124989d824932,0x2a90ba8dc7a39532,2 +np.float64,0xffe12ef098225de0,0xd54062968450a078,2 +np.float64,0x3fea2f44a3f45e8a,0x3fedee3c461f4716,2 +np.float64,0x3fe6b033e66d6068,0x3fec88c8035e06b1,2 +np.float64,0x3fe928a2ccf25146,0x3fed88d4cde7a700,2 +np.float64,0x3feead27e97d5a50,0x3fef8d7537d82e60,2 +np.float64,0x8003ab80b6875702,0xaa98adfedd7715a9,2 +np.float64,0x45a405828b481,0x2a9a1fa99a4eff1e,2 +np.float64,0x8002ddebad85bbd8,0xaa96babfda4e0031,2 +np.float64,0x3fc278c32824f186,0x3fe0c8e7c979fbd5,2 +np.float64,0x2e10fffc5c221,0x2a96c30a766d06fa,2 +np.float64,0xffd6ba8c2ead7518,0xd53c8d1d92bc2788,2 +np.float64,0xbfeb5ec3a036bd87,0xbfee602bbf0a0d01,2 +np.float64,0x3fed5bd58f7ab7ab,0x3fef181bf591a4a7,2 +np.float64,0x7feb5274a5b6a4e8,0x55431fcf81876218,2 +np.float64,0xaf8fd6cf5f1fb,0x2aa1c6edbb1e2aaf,2 +np.float64,0x7fece718f179ce31,0x55437c74efb90933,2 +np.float64,0xbfa3c42d0c278860,0xbfd5a16407c77e73,2 +np.float64,0x800b5cff0576b9fe,0xaaa1fc4ecb0dec4f,2 +np.float64,0x800be89ae557d136,0xaaa244d115fc0963,2 +np.float64,0x800d2578f5ba4af2,0xaaa2e18a3a3fc134,2 +np.float64,0x80090ff93e321ff3,0xaaa0add578e3cc3c,2 +np.float64,0x28c5a240518c,0x2a81587cccd7e202,2 +np.float64,0x7fec066929780cd1,0x55434971435d1069,2 +np.float64,0x7fc84d4d15309a99,0x55372c204515694f,2 +np.float64,0xffe070a75de0e14e,0xd54025365046dad2,2 +np.float64,0x7fe5b27cc36b64f9,0x5541b5b822f0b6ca,2 +np.float64,0x3fdea35ac8bd46b6,0x3fe9086a0fb792c2,2 +np.float64,0xbfe79996f7af332e,0xbfece9571d37a5b3,2 +np.float64,0xffdfb47f943f6900,0xd53fe6c14c3366db,2 +np.float64,0xc015cf63802ba,0x2aa2517164d075f4,2 +np.float64,0x7feba98948375312,0x5543340b5b1f1181,2 +np.float64,0x8008678e6550cf1d,0xaaa043e7cea90da5,2 +np.float64,0x3fb11b92fa223726,0x3fd9f8b53be4d90b,2 +np.float64,0x7fc9b18cf0336319,0x55379b42da882047,2 +np.float64,0xbfe5043e736a087d,0xbfebd0c67db7a8e3,2 +np.float64,0x7fde88546a3d10a8,0x553f80cfe5bcf5fe,2 +np.float64,0x8006a6c82dcd4d91,0xaa9e171d182ba049,2 +np.float64,0xbfa0f707ac21ee10,0xbfd48e5d3faa1699,2 +np.float64,0xbfe7716bffaee2d8,0xbfecd8e6abfb8964,2 +np.float64,0x9511ccab2a23a,0x2aa0d56d748f0313,2 +np.float64,0x8003ddb9b847bb74,0xaa991ca06fd9d308,2 +np.float64,0x80030710fac60e23,0xaa9725845ac95fe8,2 +np.float64,0xffece5bbaeb9cb76,0xd5437c2670f894f4,2 +np.float64,0x3fd9be5c72b37cb9,0x3fe79f2e932a5708,2 +np.float64,0x1f050cca3e0a3,0x2a93f36499fe5228,2 +np.float64,0x3fd5422becaa8458,0x3fe6295d6150df58,2 +np.float64,0xffd72c050e2e580a,0xd53cbc52d73b495f,2 +np.float64,0xbfe66d5235ecdaa4,0xbfec6ca27e60bf23,2 +np.float64,0x17ac49a42f58a,0x2a923b5b757087a0,2 +np.float64,0xffd39edc40273db8,0xd53b2f7bb99b96bf,2 +np.float64,0x7fde6cf009bcd9df,0x553f77614eb30d75,2 +np.float64,0x80042b4c3fa85699,0xaa99c05fbdd057db,2 +np.float64,0xbfde5547f8bcaa90,0xbfe8f3147d67a940,2 +np.float64,0xbfdd02f9bf3a05f4,0xbfe894f2048aa3fe,2 +np.float64,0xbfa20ec82c241d90,0xbfd4fd02ee55aac7,2 +np.float64,0x8002f670f8c5ece3,0xaa96fad7e53dd479,2 +np.float64,0x80059f24d7eb3e4a,0xaa9c7312dae0d7bc,2 +np.float64,0x7fe6ae7423ad5ce7,0x5541f9430be53062,2 +np.float64,0xe135ea79c26be,0x2aa350d8f8c526e1,2 +np.float64,0x3fec188ce4f8311a,0x3feea44d21c23f68,2 +np.float64,0x800355688286aad2,0xaa97e6ca51eb8357,2 +np.float64,0xa2d6530b45acb,0x2aa15635bbd366e8,2 +np.float64,0x600e0150c01c1,0x2a9d1456ea6c239c,2 +np.float64,0x8009c30863338611,0xaaa118f94b188bcf,2 +np.float64,0x3fe7e4c0dfefc982,0x3fed07e8480b8c07,2 +np.float64,0xbfddac6407bb58c8,0xbfe8c46f63a50225,2 +np.float64,0xbc85e977790bd,0x2aa2344636ed713d,2 +np.float64,0xfff0000000000000,0xfff0000000000000,2 +np.float64,0xffcd1570303a2ae0,0xd5389a27d5148701,2 +np.float64,0xbf937334d026e660,0xbfd113762e4e29a7,2 +np.float64,0x3fdbfdaa9b37fb55,0x3fe84a425fdff7df,2 +np.float64,0xffc10800f5221000,0xd5349535ffe12030,2 +np.float64,0xaf40f3755e81f,0x2aa1c443af16cd27,2 +np.float64,0x800f7da34f7efb47,0xaaa3f14bf25fc89f,2 +np.float64,0xffe4a60125a94c02,0xd5416b764a294128,2 +np.float64,0xbf8e25aa903c4b40,0xbfcf5ebc275b4789,2 +np.float64,0x3fca681bbb34d038,0x3fe2e882bcaee320,2 +np.float64,0xbfd0f3c9c1a1e794,0xbfe48d0df7b47572,2 +np.float64,0xffeb99b49d373368,0xd5433060dc641910,2 +np.float64,0x3fe554fb916aa9f8,0x3febf437cf30bd67,2 +np.float64,0x80079518d0af2a32,0xaa9f6ee87044745a,2 +np.float64,0x5e01a8a0bc036,0x2a9cdf0badf222c3,2 +np.float64,0xbfea9831b3f53064,0xbfee1601ee953ab3,2 +np.float64,0xbfc369d1a826d3a4,0xbfe110b675c311e0,2 +np.float64,0xa82e640d505cd,0x2aa1863d4e523b9c,2 +np.float64,0x3fe506d70a2a0dae,0x3febd1eba3aa83fa,2 +np.float64,0xcbacba7197598,0x2aa2adeb9927f1f2,2 +np.float64,0xc112d6038225b,0x2aa25978f12038b0,2 +np.float64,0xffa7f5f44c2febf0,0xd52d0ede02d4e18b,2 +np.float64,0x8006f218e34de433,0xaa9e870cf373b4eb,2 +np.float64,0xffe6d9a5d06db34b,0xd54204a4adc608c7,2 +np.float64,0x7fe717210eae2e41,0x554214bf3e2b5228,2 +np.float64,0xbfdd4b45cdba968c,0xbfe8a94c7f225f8e,2 +np.float64,0x883356571066b,0x2aa055ab0b2a8833,2 +np.float64,0x3fe307fc02a60ff8,0x3feae9175053288f,2 +np.float64,0x3fefa985f77f530c,0x3fefe31289446615,2 +np.float64,0x8005698a98aad316,0xaa9c17814ff7d630,2 +np.float64,0x3fea77333c74ee66,0x3fee098ba70e10fd,2 +np.float64,0xbfd1d00b0023a016,0xbfe4e497fd1cbea1,2 +np.float64,0x80009b0c39813619,0xaa8b130a6909cc3f,2 +np.float64,0x3fdbeb896fb7d714,0x3fe84502ba5437f8,2 +np.float64,0x3fb6e7e3562dcfc7,0x3fdca00d35c389ad,2 +np.float64,0xb2d46ebf65a8e,0x2aa1e2fe158d0838,2 +np.float64,0xbfd5453266aa8a64,0xbfe62a6a74c8ef6e,2 +np.float64,0x7fe993aa07732753,0x5542b5438bf31cb7,2 +np.float64,0xbfda5a098cb4b414,0xbfe7ce6d4d606203,2 +np.float64,0xbfe40c3ce068187a,0xbfeb61a32c57a6d0,2 +np.float64,0x3fcf17671d3e2ed0,0x3fe3f753170ab686,2 +np.float64,0xbfe4f814b6e9f02a,0xbfebcb67c60b7b08,2 +np.float64,0x800efedf59fdfdbf,0xaaa3ba4ed44ad45a,2 +np.float64,0x800420b556e8416b,0xaa99aa7fb14edeab,2 +np.float64,0xbf6e4ae6403c9600,0xbfc3cb2b29923989,2 +np.float64,0x3fda5c760a34b8ec,0x3fe7cf2821c52391,2 +np.float64,0x7f898faac0331f55,0x5522b44a01408188,2 +np.float64,0x3fd55af4b7aab5e9,0x3fe631f6d19503b3,2 +np.float64,0xbfa30a255c261450,0xbfd55caf0826361d,2 +np.float64,0x7fdfb801343f7001,0x553fe7ee50b9199a,2 +np.float64,0x7fa89ee91c313dd1,0x552d528ca2a4d659,2 +np.float64,0xffea72921d34e524,0xd542eb01af2e470d,2 +np.float64,0x3feddf0f33fbbe1e,0x3fef462b67fc0a91,2 +np.float64,0x3fe36700b566ce01,0x3feb1596caa8eff7,2 +np.float64,0x7fe6284a25ac5093,0x5541d58be3956601,2 +np.float64,0xffda16f7c8b42df0,0xd53de4f722485205,2 +np.float64,0x7f9355b94026ab72,0x552578cdeb41d2ca,2 +np.float64,0xffd3a9b022275360,0xd53b347b02dcea21,2 +np.float64,0x3fcb7f4f4a36fe9f,0x3fe32a40e9f6c1aa,2 +np.float64,0x7fdb958836372b0f,0x553e746103f92111,2 +np.float64,0x3fd37761c0a6eec4,0x3fe5853c5654027e,2 +np.float64,0x3fe449f1a2e893e4,0x3feb7d9e4eacc356,2 +np.float64,0x80077dfbef0efbf9,0xaa9f4ed788d2fadd,2 +np.float64,0x4823aa7890476,0x2a9a6eb4b653bad5,2 +np.float64,0xbfede01a373bc034,0xbfef468895fbcd29,2 +np.float64,0xbfe2bac5f125758c,0xbfeac4811c4dd66f,2 +np.float64,0x3fec10373af8206e,0x3feea14529e0f178,2 +np.float64,0x3fe305e30ca60bc6,0x3feae81a2f9d0302,2 +np.float64,0xa9668c5f52cd2,0x2aa1910e3a8f2113,2 +np.float64,0xbfd98b1717b3162e,0xbfe78f75995335d2,2 +np.float64,0x800fa649c35f4c94,0xaaa402ae79026a8f,2 +np.float64,0xbfb07dacf620fb58,0xbfd9a7d33d93a30f,2 +np.float64,0x80015812f382b027,0xaa91a843e9c85c0e,2 +np.float64,0x3fc687d96c2d0fb3,0x3fe1ef0ac16319c5,2 +np.float64,0xbfecad2ecd795a5e,0xbfeed9f786697af0,2 +np.float64,0x1608c1242c119,0x2a91cd11e9b4ccd2,2 +np.float64,0x6df775e8dbeef,0x2a9e6ba8c71130eb,2 +np.float64,0xffe96e9332b2dd26,0xd542ac342d06299b,2 +np.float64,0x7fecb6a3b8396d46,0x5543718af8162472,2 +np.float64,0x800d379f893a6f3f,0xaaa2ea36bbcb9308,2 +np.float64,0x3f924cdb202499b6,0x3fd0bb90af8d1f79,2 +np.float64,0x0,0x0,2 +np.float64,0x7feaf3b365f5e766,0x5543099a160e2427,2 +np.float64,0x3fea169ed0742d3e,0x3fede4d526e404f8,2 +np.float64,0x7feaf5f2f775ebe5,0x55430a2196c5f35a,2 +np.float64,0xbfc80d4429301a88,0xbfe2541f2ddd3334,2 +np.float64,0xffc75203b32ea408,0xd536db2837068689,2 +np.float64,0xffed2850e63a50a1,0xd5438b1217b72b8a,2 +np.float64,0x7fc16b0e7f22d61c,0x5534bcd0bfddb6f0,2 +np.float64,0x7feee8ed09fdd1d9,0x5543ed5b3ca483ab,2 +np.float64,0x7fb6c7ee662d8fdc,0x5531fffb5d46dafb,2 +np.float64,0x3fd77cebf8aef9d8,0x3fe6e9242e2bd29d,2 +np.float64,0x3f81c33f70238680,0x3fca4c7f3c9848f7,2 +np.float64,0x3fd59fea92ab3fd5,0x3fe649c1558cadd5,2 +np.float64,0xffeba82d4bf7505a,0xd54333bad387f7bd,2 +np.float64,0xffd37630e1a6ec62,0xd53b1ca62818c670,2 +np.float64,0xffec2c1e70b8583c,0xd5435213dcd27c22,2 +np.float64,0x7fec206971f840d2,0x55434f6660a8ae41,2 +np.float64,0x3fed2964adba52c9,0x3fef0642fe72e894,2 +np.float64,0xffd08e30d6211c62,0xd539b060e0ae02da,2 +np.float64,0x3e5f976c7cbf4,0x2a992e6ff991a122,2 +np.float64,0xffe6eee761adddce,0xd5420a393c67182f,2 +np.float64,0xbfe8ec9a31f1d934,0xbfed714426f58147,2 +np.float64,0x7fefffffffffffff,0x554428a2f98d728b,2 +np.float64,0x3fb3ae8b2c275d16,0x3fdb36b81b18a546,2 +np.float64,0x800f73df4dfee7bf,0xaaa3ed1a3e2cf49c,2 +np.float64,0xffd0c8873b21910e,0xd539ce6a3eab5dfd,2 +np.float64,0x3facd6c49439ad80,0x3fd8886f46335df1,2 +np.float64,0x3935859c726b2,0x2a98775f6438dbb1,2 +np.float64,0x7feed879fbfdb0f3,0x5543e9d1ac239469,2 +np.float64,0xbfe84dd990f09bb3,0xbfed323af09543b1,2 +np.float64,0xbfe767cc5a6ecf98,0xbfecd4f39aedbacb,2 +np.float64,0xffd8bd91d5b17b24,0xd53d5eb3734a2609,2 +np.float64,0xbfe13edeb2a27dbe,0xbfea0a856f0b9656,2 +np.float64,0xd933dd53b267c,0x2aa3158784e428c9,2 +np.float64,0xbfef6fef987edfdf,0xbfefcfb1c160462b,2 +np.float64,0x8009eeda4893ddb5,0xaaa13268a41045b1,2 +np.float64,0xab48c7a156919,0x2aa1a1a9c124c87d,2 +np.float64,0xa997931d532f3,0x2aa192bfe5b7bbb4,2 +np.float64,0xffe39ce8b1e739d1,0xd5411fa1c5c2cbd8,2 +np.float64,0x7e7ac2f6fcf59,0x2a9fdf6f263a9e9f,2 +np.float64,0xbfee1e35a6fc3c6b,0xbfef5c25d32b4047,2 +np.float64,0xffe5589c626ab138,0xd5419d220cc9a6da,2 +np.float64,0x7fe12509bf224a12,0x55405f7036dc5932,2 +np.float64,0xa6f15ba94de2c,0x2aa17b3367b1fc1b,2 +np.float64,0x3fca8adbfa3515b8,0x3fe2f0ca775749e5,2 +np.float64,0xbfcb03aa21360754,0xbfe30d5b90ca41f7,2 +np.float64,0x3fefafb2da7f5f66,0x3fefe5251aead4e7,2 +np.float64,0xffd90a59d23214b4,0xd53d7cf63a644f0e,2 +np.float64,0x3fba499988349333,0x3fddf84154fab7e5,2 +np.float64,0x800a76a0bc54ed42,0xaaa17f68cf67f2fa,2 +np.float64,0x3fea33d15bb467a3,0x3fedeff7f445b2ff,2 +np.float64,0x8005d9b0726bb362,0xaa9cd48624afeca9,2 +np.float64,0x7febf42e9a77e85c,0x55434541d8073376,2 +np.float64,0xbfedfc4469bbf889,0xbfef505989f7ee7d,2 +np.float64,0x8001211f1422423f,0xaa90a9889d865349,2 +np.float64,0x800e852f7fdd0a5f,0xaaa3845f11917f8e,2 +np.float64,0xffefd613c87fac27,0xd5441fd17ec669b4,2 +np.float64,0x7fed2a74543a54e8,0x55438b8c637da8b8,2 +np.float64,0xb83d50ff707aa,0x2aa210b4fc11e4b2,2 +np.float64,0x10000000000000,0x2aa428a2f98d728b,2 +np.float64,0x474ad9208e97,0x2a84e5a31530368a,2 +np.float64,0xffd0c5498ea18a94,0xd539ccc0e5cb425e,2 +np.float64,0x8001a8e9c82351d4,0xaa92f1aee6ca5b7c,2 +np.float64,0xd28db1e5a51b6,0x2aa2e328c0788f4a,2 +np.float64,0x3bf734ac77ee7,0x2a98da65c014b761,2 +np.float64,0x3fe56e17c96adc30,0x3febff2b6b829b7a,2 +np.float64,0x7783113eef063,0x2a9f46c3f09eb42c,2 +np.float64,0x3fd69d4e42ad3a9d,0x3fe69f83a21679f4,2 +np.float64,0x3fd34f4841a69e90,0x3fe5766b3c771616,2 +np.float64,0x3febb49895b76931,0x3fee7fcb603416c9,2 +np.float64,0x7fe8d6cb55f1ad96,0x554286c3b3bf4313,2 +np.float64,0xbfe67c6ba36cf8d8,0xbfec730218f2e284,2 +np.float64,0xffef9d97723f3b2e,0xd54413e38b6c29be,2 +np.float64,0x12d8cd2a25b1b,0x2a90e5ccd37b8563,2 +np.float64,0x81fe019103fc0,0x2aa01524155e73c5,2 +np.float64,0x7fe95d546f72baa8,0x5542a7fabfd425ff,2 +np.float64,0x800e742f1f9ce85e,0xaaa37cbe09e1f874,2 +np.float64,0xffd96bd3a732d7a8,0xd53da3086071264a,2 +np.float64,0x4ef2691e9de4e,0x2a9b3d316047fd6d,2 +np.float64,0x1a91684c3522e,0x2a92f25913c213de,2 +np.float64,0x3d5151b87aa2b,0x2a9909dbd9a44a84,2 +np.float64,0x800d9049435b2093,0xaaa31424e32d94a2,2 +np.float64,0xffe5b25fcc2b64bf,0xd541b5b0416b40b5,2 +np.float64,0xffe0eb784c21d6f0,0xd5404d083c3d6bc6,2 +np.float64,0x8007ceefbf0f9de0,0xaa9fbe0d739368b4,2 +np.float64,0xb78529416f0b,0x2a8ca3b29b5b3f18,2 +np.float64,0x7fba61130034c225,0x5532e6d4ca0f2918,2 +np.float64,0x3fba8d67ae351acf,0x3fde11efd6239b09,2 +np.float64,0x3fe7f24c576fe498,0x3fed0d63947a854d,2 +np.float64,0x2bb58dec576b3,0x2a965de7fca12aff,2 +np.float64,0xbfe86ceec4f0d9de,0xbfed3ea7f1d084e2,2 +np.float64,0x7fd1a7f7bca34fee,0x553a3f01b67fad2a,2 +np.float64,0x3fd9a43acfb34874,0x3fe7972dc5d8dfd6,2 +np.float64,0x7fd9861acdb30c35,0x553dad3b1bbb3b4d,2 +np.float64,0xffecc0c388398186,0xd54373d3b903deec,2 +np.float64,0x3fa6f86e9c2df0e0,0x3fd6bdbe40fcf710,2 +np.float64,0x800ddd99815bbb33,0xaaa33820d2f889bb,2 +np.float64,0x7fe087089b610e10,0x55402c868348a6d3,2 +np.float64,0x3fdf43d249be87a5,0x3fe933d29fbf7c23,2 +np.float64,0x7fe4f734c7a9ee69,0x5541822e56c40725,2 +np.float64,0x3feb39a9d3b67354,0x3fee526bf1f69f0e,2 +np.float64,0x3fe61454a0ec28a9,0x3fec46d7c36f7566,2 +np.float64,0xbfeafaa0a375f541,0xbfee3af2e49d457a,2 +np.float64,0x3fda7378e1b4e6f0,0x3fe7d613a3f92c40,2 +np.float64,0xe3e31c5fc7c64,0x2aa3645c12e26171,2 +np.float64,0xbfe97a556df2f4ab,0xbfeda8aa84cf3544,2 +np.float64,0xff612f9c80225f00,0xd514a51e5a2a8a97,2 +np.float64,0x800c51c8a0f8a391,0xaaa279fe7d40b50b,2 +np.float64,0xffd6f9d2312df3a4,0xd53ca783a5f8d110,2 +np.float64,0xbfead48bd7f5a918,0xbfee2cb2f89c5e57,2 +np.float64,0x800f5949e89eb294,0xaaa3e1a67a10cfef,2 +np.float64,0x800faf292b7f5e52,0xaaa40675e0c96cfd,2 +np.float64,0xbfedc238453b8470,0xbfef3c179d2d0209,2 +np.float64,0x3feb0443c5760888,0x3fee3e8bf29089c2,2 +np.float64,0xb26f69e164ded,0x2aa1df9f3dd7d765,2 +np.float64,0x3fcacdc053359b80,0x3fe300a67765b667,2 +np.float64,0x3fe8b274647164e8,0x3fed5a4cd4da8155,2 +np.float64,0x291e6782523ce,0x2a95ea7ac1b13a68,2 +np.float64,0xbfc4fc094e29f814,0xbfe1838671fc8513,2 +np.float64,0x3fbf1301f23e2600,0x3fdfb03a6f13e597,2 +np.float64,0xffeb36554ab66caa,0xd543193d8181e4f9,2 +np.float64,0xbfd969a52db2d34a,0xbfe78528ae61f16d,2 +np.float64,0x800cccd04d3999a1,0xaaa2b6b7a2d2d2d6,2 +np.float64,0x808eb4cb011d7,0x2aa005effecb2b4a,2 +np.float64,0x7fe839b3f9b07367,0x55425f61e344cd6d,2 +np.float64,0xbfeb25b6ed764b6e,0xbfee4b0234fee365,2 +np.float64,0xffefffffffffffff,0xd54428a2f98d728b,2 +np.float64,0xbfe01305da60260c,0xbfe9700b784af7e9,2 +np.float64,0xffcbf36b0a37e6d8,0xd538474b1d74ffe1,2 +np.float64,0xffaeebe3e83dd7c0,0xd52fa2e8dabf7209,2 +np.float64,0xbfd9913bf0b32278,0xbfe7915907aab13c,2 +np.float64,0xbfe7d125d9efa24c,0xbfecfff563177706,2 +np.float64,0xbfee98d23cbd31a4,0xbfef867ae393e446,2 +np.float64,0x3fe30efb67e61df6,0x3feaec6344633d11,2 +np.float64,0x1,0x2990000000000000,2 +np.float64,0x7fd5524fd3aaa49f,0x553bf30d18ab877e,2 +np.float64,0xc98b403f93168,0x2aa29d2fadb13c07,2 +np.float64,0xffe57080046ae100,0xd541a3b1b687360e,2 +np.float64,0x7fe20bade5e4175b,0x5540a79b94294f40,2 +np.float64,0x3fe155400a22aa80,0x3fea15c45f5b5837,2 +np.float64,0x7fe428dc8f6851b8,0x554147fd2ce93cc1,2 +np.float64,0xffefb77eb67f6efc,0xd544195dcaff4980,2 +np.float64,0x3fe49e733b293ce6,0x3feba394b833452a,2 +np.float64,0x38e01e3e71c05,0x2a986b2c955bad21,2 +np.float64,0x7fe735eb376e6bd5,0x55421cc51290d92d,2 +np.float64,0xbfd81d8644b03b0c,0xbfe71ce6d6fbd51a,2 +np.float64,0x8009a32325134647,0xaaa10645d0e6b0d7,2 +np.float64,0x56031ab8ac064,0x2a9c074be40b1f80,2 +np.float64,0xff8989aa30331340,0xd522b2d319a0ac6e,2 +np.float64,0xbfd6c183082d8306,0xbfe6ab8ffb3a8293,2 +np.float64,0x7ff8000000000000,0x7ff8000000000000,2 +np.float64,0xbfe17b68b1e2f6d2,0xbfea28dac8e0c457,2 +np.float64,0x3fbb50e42236a1c8,0x3fde5b090d51e3bd,2 +np.float64,0xffc2bb7cbf2576f8,0xd5353f1b3571c17f,2 +np.float64,0xbfe7576bca6eaed8,0xbfecce388241f47c,2 +np.float64,0x3fe7b52b04ef6a56,0x3fecf495bef99e7e,2 +np.float64,0xffe5511af82aa236,0xd5419b11524e8350,2 +np.float64,0xbfe66d5edf2cdabe,0xbfec6ca7d7b5be8c,2 +np.float64,0xc84a0ba790942,0x2aa29346f16a2cb4,2 +np.float64,0x6db5e7a0db6be,0x2a9e659c0e8244a0,2 +np.float64,0x7fef8f7b647f1ef6,0x554410e67af75d27,2 +np.float64,0xbfe2b4ada7e5695c,0xbfeac1997ec5a064,2 +np.float64,0xbfe99372e03326e6,0xbfedb2662b287543,2 +np.float64,0x3fa45d352428ba6a,0x3fd5d8a895423abb,2 +np.float64,0x3fa029695c2052d3,0x3fd439f858998886,2 +np.float64,0xffe0a9bd3261537a,0xd54037d0cd8bfcda,2 +np.float64,0xbfef83e09a7f07c1,0xbfefd66a4070ce73,2 +np.float64,0x7fee3dcc31fc7b97,0x5543c8503869407e,2 +np.float64,0xffbd16f1603a2de0,0xd533872fa5be978b,2 +np.float64,0xbfe8173141b02e62,0xbfed1c478614c6f4,2 +np.float64,0xbfef57aa277eaf54,0xbfefc77fdab27771,2 +np.float64,0x7fe883a02f31073f,0x554271ff0e3208da,2 +np.float64,0xe3adb63bc75b7,0x2aa362d833d0e41c,2 +np.float64,0x8001c430bac38862,0xaa93575026d26510,2 +np.float64,0x12fb347225f67,0x2a90f00eb9edb3fe,2 +np.float64,0x3fe53f83cbaa7f08,0x3febead40de452c2,2 +np.float64,0xbfe7f67227efece4,0xbfed0f10e32ad220,2 +np.float64,0xb8c5b45d718b7,0x2aa2152912cda86d,2 +np.float64,0x3fd23bb734a4776e,0x3fe50e5d3008c095,2 +np.float64,0x8001fd558ee3faac,0xaa941faa1f7ed450,2 +np.float64,0xffe6bbeda9ed77db,0xd541fcd185a63afa,2 +np.float64,0x4361d79086c3c,0x2a99d692237c30b7,2 +np.float64,0xbfd012f004a025e0,0xbfe43093e290fd0d,2 +np.float64,0xffe1d8850423b10a,0xd54097cf79d8d01e,2 +np.float64,0x3fccf4df7939e9bf,0x3fe37f8cf8be6436,2 +np.float64,0x8000546bc6c0a8d8,0xaa861bb3588556f2,2 +np.float64,0xbfecb4d6ba7969ae,0xbfeedcb6239135fe,2 +np.float64,0xbfaeb425cc3d6850,0xbfd90cfc103bb896,2 +np.float64,0x800ec037ec7d8070,0xaaa39eae8bde9774,2 +np.float64,0xbfeeaf863dfd5f0c,0xbfef8e4514772a8a,2 +np.float64,0xffec67c6c4b8cf8d,0xd5435fad89f900cf,2 +np.float64,0x3fda4498da348932,0x3fe7c7f6b3f84048,2 +np.float64,0xbfd05fd3dea0bfa8,0xbfe4509265a9b65f,2 +np.float64,0x3fe42cc713a8598e,0x3feb706ba9cd533c,2 +np.float64,0xec22d4d7d845b,0x2aa39f8cccb9711c,2 +np.float64,0x7fda30606c3460c0,0x553deea865065196,2 +np.float64,0xbfd58cba8bab1976,0xbfe64327ce32d611,2 +np.float64,0xadd521c75baa4,0x2aa1b7efce201a98,2 +np.float64,0x7fed43c1027a8781,0x55439131832b6429,2 +np.float64,0x800bee278fb7dc4f,0xaaa247a71e776db4,2 +np.float64,0xbfe9be5dd2737cbc,0xbfedc2f9501755b0,2 +np.float64,0x8003f4854447e90b,0xaa994d9b5372b13b,2 +np.float64,0xbfe5d0f867eba1f1,0xbfec29f8dd8b33a4,2 +np.float64,0x3fd79102d5af2206,0x3fe6efaa7a1efddb,2 +np.float64,0xbfeae783c835cf08,0xbfee33cdb4a44e81,2 +np.float64,0x3fcf1713e83e2e28,0x3fe3f7414753ddfb,2 +np.float64,0xffe5ab3cff2b567a,0xd541b3bf0213274a,2 +np.float64,0x7fe0fc65d8a1f8cb,0x554052761ac96386,2 +np.float64,0x7e81292efd026,0x2a9fdff8c01ae86f,2 +np.float64,0x80091176039222ec,0xaaa0aebf0565dfa6,2 +np.float64,0x800d2bf5ab5a57ec,0xaaa2e4a4c31e7e29,2 +np.float64,0xffd1912ea923225e,0xd53a33b2856726ab,2 +np.float64,0x800869918ed0d323,0xaaa0453408e1295d,2 +np.float64,0xffba0898fa341130,0xd532d19b202a9646,2 +np.float64,0xbfe09fac29613f58,0xbfe9b9687b5811a1,2 +np.float64,0xbfbd4ae82e3a95d0,0xbfdf1220f6f0fdfa,2 +np.float64,0xffea11d27bb423a4,0xd542d3d3e1522474,2 +np.float64,0xbfe6b05705ad60ae,0xbfec88d6bcab2683,2 +np.float64,0x3fe624a3f2ec4948,0x3fec4dcc78ddf871,2 +np.float64,0x53483018a6907,0x2a9bba8f92006b69,2 +np.float64,0xbfec0a6eeb7814de,0xbfee9f2a741248d7,2 +np.float64,0x3fe8c8ce6371919d,0x3fed63250c643482,2 +np.float64,0xbfe26b0ef964d61e,0xbfea9e511db83437,2 +np.float64,0xffa0408784208110,0xd52987f62c369ae9,2 +np.float64,0xffc153abc322a758,0xd534b384b5c5fe63,2 +np.float64,0xbfbdce88a63b9d10,0xbfdf4065ef0b01d4,2 +np.float64,0xffed4a4136fa9482,0xd54392a450f8b0af,2 +np.float64,0x8007aa18748f5432,0xaa9f8bd2226d4299,2 +np.float64,0xbfdab4d3e8b569a8,0xbfe7e9a5402540e5,2 +np.float64,0x7fe68914f92d1229,0x5541ef5e78fa35de,2 +np.float64,0x800a538bb1b4a718,0xaaa16bc487711295,2 +np.float64,0xffe02edbc8605db7,0xd5400f8f713df890,2 +np.float64,0xffe8968053712d00,0xd54276b9cc7f460a,2 +np.float64,0x800a4ce211d499c5,0xaaa1680491deb40c,2 +np.float64,0x3f988080f8310102,0x3fd2713691e99329,2 +np.float64,0xf64e42a7ec9c9,0x2aa3e6a7af780878,2 +np.float64,0xff73cc7100279900,0xd51b4478c3409618,2 +np.float64,0x71e6722ce3ccf,0x2a9ec76ddf296ce0,2 +np.float64,0x8006ca16ab0d942e,0xaa9e4bfd862af570,2 +np.float64,0x8000000000000000,0x8000000000000000,2 +np.float64,0xbfed373e02ba6e7c,0xbfef0b2b7bb767b3,2 +np.float64,0xa6cb0f694d962,0x2aa179dd16b0242b,2 +np.float64,0x7fec14626cf828c4,0x55434ca55b7c85d5,2 +np.float64,0x3fcda404513b4808,0x3fe3a68e8d977752,2 +np.float64,0xbfeb94995f772933,0xbfee74091d288b81,2 +np.float64,0x3fce2299a13c4530,0x3fe3c2603f28d23b,2 +np.float64,0xffd07f4534a0fe8a,0xd539a8a6ebc5a603,2 +np.float64,0x7fdb1c651e3638c9,0x553e478a6385c86b,2 +np.float64,0x3fec758336f8eb06,0x3feec5f3b92c8b28,2 +np.float64,0x796fc87cf2dfa,0x2a9f7184a4ad8c49,2 +np.float64,0x3fef9ba866ff3750,0x3fefde6a446fc2cd,2 +np.float64,0x964d26c72c9a5,0x2aa0e143f1820179,2 +np.float64,0xbfef6af750bed5ef,0xbfefce04870a97bd,2 +np.float64,0x3fe2f3961aa5e72c,0x3feadf769321a3ff,2 +np.float64,0xbfd6b706e9ad6e0e,0xbfe6a8141c5c3b5d,2 +np.float64,0x7fe0ecc40a21d987,0x55404d72c2b46a82,2 +np.float64,0xbfe560d19deac1a3,0xbfebf962681a42a4,2 +np.float64,0xbfea37170ab46e2e,0xbfedf136ee9df02b,2 +np.float64,0xbfebf78947b7ef12,0xbfee9847ef160257,2 +np.float64,0x800551f8312aa3f1,0xaa9bee7d3aa5491b,2 +np.float64,0xffed2513897a4a26,0xd5438a58c4ae28ec,2 +np.float64,0x7fd962d75cb2c5ae,0x553d9f8a0c2016f3,2 +np.float64,0x3fefdd8512bfbb0a,0x3feff47d8da7424d,2 +np.float64,0xbfefa5b43bff4b68,0xbfefe1ca42867af0,2 +np.float64,0xbfc8a2853531450c,0xbfe279bb7b965729,2 +np.float64,0x800c8843bc391088,0xaaa2951344e7b29b,2 +np.float64,0x7fe22587bae44b0e,0x5540af8bb58cfe86,2 +np.float64,0xbfe159fae822b3f6,0xbfea182394eafd8d,2 +np.float64,0xbfe6fdfd50edfbfa,0xbfeca93f2a3597d0,2 +np.float64,0xbfe5cd5afaeb9ab6,0xbfec286a8ce0470f,2 +np.float64,0xbfc84bb97f309774,0xbfe263ef0f8f1f6e,2 +np.float64,0x7fd9c1e548b383ca,0x553dc4556874ecb9,2 +np.float64,0x7fda43d33bb487a5,0x553df60f61532fc0,2 +np.float64,0xbfe774bd25eee97a,0xbfecda42e8578c1f,2 +np.float64,0x800df1f5ab9be3ec,0xaaa34184712e69db,2 +np.float64,0xbff0000000000000,0xbff0000000000000,2 +np.float64,0x3fe14ec21b629d84,0x3fea128244215713,2 +np.float64,0x7fc1ce7843239cf0,0x5534e3fa8285b7b8,2 +np.float64,0xbfe922b204724564,0xbfed86818687d649,2 +np.float64,0x3fc58924fb2b1248,0x3fe1aa715ff6ebbf,2 +np.float64,0x8008b637e4d16c70,0xaaa0760b53abcf46,2 +np.float64,0xffbf55bd4c3eab78,0xd53404a23091a842,2 +np.float64,0x9f6b4a753ed6a,0x2aa136ef9fef9596,2 +np.float64,0xbfd11da7f8a23b50,0xbfe49deb493710d8,2 +np.float64,0x800a2f07fcd45e10,0xaaa157237c98b4f6,2 +np.float64,0x3fdd4defa4ba9bdf,0x3fe8aa0bcf895f4f,2 +np.float64,0x7fe9b0ab05f36155,0x5542bc5335414473,2 +np.float64,0x3fe89c97de313930,0x3fed51a1189b8982,2 +np.float64,0x3fdd45c8773a8b91,0x3fe8a7c2096fbf5a,2 +np.float64,0xbfeb6f64daf6deca,0xbfee665167ef43ad,2 +np.float64,0xffdf9da1c4bf3b44,0xd53fdf141944a983,2 +np.float64,0x3fde092ed0bc125c,0x3fe8de25bfbfc2db,2 +np.float64,0xbfcb21f96b3643f4,0xbfe3147904c258cf,2 +np.float64,0x800c9c934f993927,0xaaa29f17c43f021b,2 +np.float64,0x9b91814d37230,0x2aa11329e59bf6b0,2 +np.float64,0x3fe28a7e0b6514fc,0x3feaad6d23e2eadd,2 +np.float64,0xffecf38395f9e706,0xd5437f3ee1cd61e4,2 +np.float64,0x3fcade92a935bd25,0x3fe3049f4c1da1d0,2 +np.float64,0x800ab25d95d564bc,0xaaa1a076d7c66e04,2 +np.float64,0xffc0989e1e21313c,0xd53467f3b8158298,2 +np.float64,0x3fd81523eeb02a48,0x3fe71a38d2da8a82,2 +np.float64,0x7fe5b9dd402b73ba,0x5541b7b9b8631010,2 +np.float64,0x2c160d94582c3,0x2a966e51b503a3d1,2 +np.float64,0x2c416ffa5882f,0x2a9675aaef8b29c4,2 +np.float64,0x7fefe2ff01bfc5fd,0x55442289faf22b86,2 +np.float64,0xbfd469bf5d28d37e,0xbfe5dd239ffdc7eb,2 +np.float64,0xbfdd56f3eabaade8,0xbfe8ac93244ca17b,2 +np.float64,0xbfe057b89160af71,0xbfe9941557340bb3,2 +np.float64,0x800c50e140b8a1c3,0xaaa2798ace9097ee,2 +np.float64,0xbfda5a8984b4b514,0xbfe7ce93d65a56b0,2 +np.float64,0xbfcd6458323ac8b0,0xbfe39872514127bf,2 +np.float64,0x3fefb1f5ebff63ec,0x3fefe5e761b49b89,2 +np.float64,0x3fea3abc1df47578,0x3fedf29a1c997863,2 +np.float64,0x7fcb4a528e3694a4,0x553815f169667213,2 +np.float64,0x8c77da7b18efc,0x2aa080e52bdedb54,2 +np.float64,0x800e5dde4c5cbbbd,0xaaa372b16fd8b1ad,2 +np.float64,0x3fd2976038a52ec0,0x3fe5316b4f79fdbc,2 +np.float64,0x69413a0ed2828,0x2a9dfacd9cb44286,2 +np.float64,0xbfebbac0bdb77582,0xbfee820d9288b631,2 +np.float64,0x1a12aa7c34256,0x2a92d407e073bbfe,2 +np.float64,0xbfc41a27c3283450,0xbfe143c8665b0d3c,2 +np.float64,0xffe4faa41369f548,0xd54183230e0ce613,2 +np.float64,0xbfdeae81f23d5d04,0xbfe90b734bf35b68,2 +np.float64,0x3fc984ba58330975,0x3fe2b19e9052008e,2 +np.float64,0x7fe6e51b8d2dca36,0x554207a74ae2bb39,2 +np.float64,0x80081a58a81034b2,0xaaa0117d4aff11c8,2 +np.float64,0x7fde3fddfe3c7fbb,0x553f67d0082acc67,2 +np.float64,0x3fac7c999038f933,0x3fd86ec2f5dc3aa4,2 +np.float64,0x7fa26b4c4c24d698,0x552a9e6ea8545c18,2 +np.float64,0x3fdacd06e6b59a0e,0x3fe7f0dc0e8f9c6d,2 +np.float64,0x80064b62cbec96c6,0xaa9d8ac0506fdd05,2 +np.float64,0xb858116170b1,0x2a8caea703d9ccc8,2 +np.float64,0xbfe8d94ccef1b29a,0xbfed69a8782cbf3d,2 +np.float64,0x8005607d6a6ac0fc,0xaa9c07cf8620b037,2 +np.float64,0xbfe66a52daacd4a6,0xbfec6b5e403e6864,2 +np.float64,0x7fc398c2e0273185,0x5535918245894606,2 +np.float64,0x74b2d7dce965c,0x2a9f077020defdbc,2 +np.float64,0x7fe8f7a4d9b1ef49,0x55428eeae210e8eb,2 +np.float64,0x80027deddc84fbdc,0xaa95b11ff9089745,2 +np.float64,0xffeba2a94e774552,0xd5433273f6568902,2 +np.float64,0x80002f8259405f05,0xaa8240b68d7b9dc4,2 +np.float64,0xbfdf0d84883e1b0a,0xbfe92532c69c5802,2 +np.float64,0xbfcdfa7b6b3bf4f8,0xbfe3b997a84d0914,2 +np.float64,0x800c18b04e183161,0xaaa25d46d60b15c6,2 +np.float64,0xffeaf1e37c35e3c6,0xd543092cd929ac19,2 +np.float64,0xbfc5aa07752b5410,0xbfe1b36ab5ec741f,2 +np.float64,0x3fe5c491d1eb8924,0x3fec24a1c3f6a178,2 +np.float64,0xbfeb736937f6e6d2,0xbfee67cd296e6fa9,2 +np.float64,0xffec3d5718787aad,0xd5435602e1a2cc43,2 +np.float64,0x7fe71e1da86e3c3a,0x55421691ead882cb,2 +np.float64,0x3fdd6ed0c93adda2,0x3fe8b341d066c43c,2 +np.float64,0x7fbe3d7a203c7af3,0x5533c83e53283430,2 +np.float64,0x3fdc20cb56384197,0x3fe854676360aba9,2 +np.float64,0xb7a1ac636f436,0x2aa20b9d40d66e78,2 +np.float64,0x3fb1491bb8229237,0x3fda0fabad1738ee,2 +np.float64,0xbfdf9c0ce73f381a,0xbfe94b716dbe35ee,2 +np.float64,0xbfbd4f0ad23a9e18,0xbfdf1397329a2dce,2 +np.float64,0xbfe4e0caac69c196,0xbfebc119b8a181cd,2 +np.float64,0x5753641aaea6d,0x2a9c2ba3e92b0cd2,2 +np.float64,0x72bb814ae5771,0x2a9eda92fada66de,2 +np.float64,0x57ed8f5aafdb3,0x2a9c3c2e1d42e609,2 +np.float64,0xffec33359c38666a,0xd54353b2acd0daf1,2 +np.float64,0x3fa5fe6e8c2bfce0,0x3fd66a0b3bf2720a,2 +np.float64,0xffe2dc8d7ca5b91a,0xd540e6ebc097d601,2 +np.float64,0x7fd99d260eb33a4b,0x553db626c9c75f78,2 +np.float64,0xbfe2dd73e425bae8,0xbfead4fc4b93a727,2 +np.float64,0xdcd4a583b9a95,0x2aa33094c9a17ad7,2 +np.float64,0x7fb0af6422215ec7,0x553039a606e8e64f,2 +np.float64,0x7fdfab6227bf56c3,0x553fe3b26164aeda,2 +np.float64,0x1e4d265e3c9a6,0x2a93cba8a1a8ae6d,2 +np.float64,0xbfdc7d097238fa12,0xbfe86ee2f24fd473,2 +np.float64,0x7fe5d35d29eba6b9,0x5541bea5878bce2b,2 +np.float64,0xffcb886a903710d4,0xd53828281710aab5,2 +np.float64,0xffe058c7ffe0b190,0xd5401d61e9a7cbcf,2 +np.float64,0x3ff0000000000000,0x3ff0000000000000,2 +np.float64,0xffd5b1c1132b6382,0xd53c1c839c098340,2 +np.float64,0x3fe2e7956725cf2b,0x3fead9c907b9d041,2 +np.float64,0x800a8ee293951dc6,0xaaa18ce3f079f118,2 +np.float64,0x7febcd3085b79a60,0x55433c47e1f822ad,2 +np.float64,0x3feb0e14cd761c2a,0x3fee423542102546,2 +np.float64,0x3fb45e6d0628bcda,0x3fdb86db67d0c992,2 +np.float64,0x7fa836e740306dce,0x552d2907cb8118b2,2 +np.float64,0x3fd15ba25b22b745,0x3fe4b6b018409d78,2 +np.float64,0xbfb59980ce2b3300,0xbfdc1206274cb51d,2 +np.float64,0x3fdef1b87fbde371,0x3fe91dafc62124a1,2 +np.float64,0x7fed37a4337a6f47,0x55438e7e0b50ae37,2 +np.float64,0xffe6c87633ad90ec,0xd542001f216ab448,2 +np.float64,0x8008d2548ab1a4a9,0xaaa087ad272d8e17,2 +np.float64,0xbfd1d6744da3ace8,0xbfe4e71965adda74,2 +np.float64,0xbfb27f751224fee8,0xbfdaa82132775406,2 +np.float64,0x3fe2b336ae65666d,0x3feac0e6b13ec2d2,2 +np.float64,0xffc6bac2262d7584,0xd536a951a2eecb49,2 +np.float64,0x7fdb661321b6cc25,0x553e62dfd7fcd3f3,2 +np.float64,0xffe83567d5706acf,0xd5425e4bb5027568,2 +np.float64,0xbf7f0693e03e0d00,0xbfc9235314d53f82,2 +np.float64,0x3feb32b218766564,0x3fee4fd5847f3722,2 +np.float64,0x3fec25d33df84ba6,0x3feea91fcd4aebab,2 +np.float64,0x7fe17abecb22f57d,0x55407a8ba661207c,2 +np.float64,0xbfe5674b1eeace96,0xbfebfc351708dc70,2 +np.float64,0xbfe51a2d2f6a345a,0xbfebda702c9d302a,2 +np.float64,0x3fec05584af80ab0,0x3fee9d502a7bf54d,2 +np.float64,0xffda8871dcb510e4,0xd53e10105f0365b5,2 +np.float64,0xbfc279c31824f388,0xbfe0c9354d871484,2 +np.float64,0x1cbed61e397dc,0x2a937364712cd518,2 +np.float64,0x800787d198af0fa4,0xaa9f5c847affa1d2,2 +np.float64,0x80079f6d65af3edc,0xaa9f7d2863368bbd,2 +np.float64,0xb942f1e97285e,0x2aa2193e0c513b7f,2 +np.float64,0x7fe9078263320f04,0x554292d85dee2c18,2 +np.float64,0xbfe4de0761a9bc0f,0xbfebbfe04116b829,2 +np.float64,0xbfdbe6f3fc37cde8,0xbfe843aea59a0749,2 +np.float64,0xffcb6c0de136d81c,0xd5381fd9c525b813,2 +np.float64,0x9b6bda9336d7c,0x2aa111c924c35386,2 +np.float64,0x3fe17eece422fdda,0x3fea2a9bacd78607,2 +np.float64,0xd8011c49b0024,0x2aa30c87574fc0c6,2 +np.float64,0xbfc0a08b3f214118,0xbfe034d48f0d8dc0,2 +np.float64,0x3fd60adb1eac15b8,0x3fe66e42e4e7e6b5,2 +np.float64,0x80011d68ea023ad3,0xaa909733befbb962,2 +np.float64,0xffb35ac32426b588,0xd5310c4be1c37270,2 +np.float64,0x3fee8b56c9bd16ae,0x3fef81d8d15f6939,2 +np.float64,0x3fdc10a45e382149,0x3fe84fbe4cf11e68,2 +np.float64,0xbfc85dc45e30bb88,0xbfe2687b5518abde,2 +np.float64,0x3fd53b85212a770a,0x3fe6270d6d920d0f,2 +np.float64,0x800fc158927f82b1,0xaaa40e303239586f,2 +np.float64,0x11af5e98235ed,0x2a908b04a790083f,2 +np.float64,0xbfe2a097afe54130,0xbfeab80269eece99,2 +np.float64,0xbfd74ac588ae958c,0xbfe6d8ca3828d0b8,2 +np.float64,0xffea18ab2ef43156,0xd542d579ab31df1e,2 +np.float64,0xbfecda7058f9b4e1,0xbfeeea29c33b7913,2 +np.float64,0x3fc4ac56ed2958b0,0x3fe16d3e2bd7806d,2 +np.float64,0x3feccc898cb99913,0x3feee531f217dcfa,2 +np.float64,0xffeb3a64c5b674c9,0xd5431a30a41f0905,2 +np.float64,0x3fe5a7ee212b4fdc,0x3fec1844af9076fc,2 +np.float64,0x80080fdb52301fb7,0xaaa00a8b4274db67,2 +np.float64,0x800b3e7e47d67cfd,0xaaa1ec2876959852,2 +np.float64,0x80063fb8ee2c7f73,0xaa9d7875c9f20d6f,2 +np.float64,0x7fdacf80d0b59f01,0x553e2acede4c62a8,2 +np.float64,0x401e9b24803d4,0x2a996a0a75d0e093,2 +np.float64,0x3fe6c29505ed852a,0x3fec907a6d8c10af,2 +np.float64,0x8005c04ee2cb809f,0xaa9caa9813faef46,2 +np.float64,0xbfe1360f21e26c1e,0xbfea06155d6985b6,2 +np.float64,0xffc70606682e0c0c,0xd536c239b9d4be0a,2 +np.float64,0x800e639afefcc736,0xaaa37547d0229a26,2 +np.float64,0x3fe5589290aab125,0x3febf5c925c4e6db,2 +np.float64,0x8003b59330276b27,0xaa98c47e44524335,2 +np.float64,0x800d67ec22dacfd8,0xaaa301251b6a730a,2 +np.float64,0x7fdaeb5025b5d69f,0x553e35397dfe87eb,2 +np.float64,0x3fdae32a24b5c654,0x3fe7f771bc108f6c,2 +np.float64,0xffe6c1fc93ad83f8,0xd541fe6a6a716756,2 +np.float64,0xbfd7b9c1d32f7384,0xbfe6fcdae563d638,2 +np.float64,0x800e1bea06fc37d4,0xaaa354c0bf61449c,2 +np.float64,0xbfd78f097aaf1e12,0xbfe6ef068329bdf4,2 +np.float64,0x7fea6a400874d47f,0x5542e905978ad722,2 +np.float64,0x8008b4377cb1686f,0xaaa074c87eee29f9,2 +np.float64,0x8002f3fb8d45e7f8,0xaa96f47ac539b614,2 +np.float64,0xbfcf2b3fd13e5680,0xbfe3fb91c0cc66ad,2 +np.float64,0xffecca2f5279945e,0xd54375f361075927,2 +np.float64,0x7ff0000000000000,0x7ff0000000000000,2 +np.float64,0x7f84d5a5a029ab4a,0x552178d1d4e8640e,2 +np.float64,0x3fea8a4b64351497,0x3fee10c332440eb2,2 +np.float64,0x800fe01ac1dfc036,0xaaa41b34d91a4bee,2 +np.float64,0x3fc0b3d8872167b1,0x3fe03b178d354f8d,2 +np.float64,0x5ee8b0acbdd17,0x2a9cf69f2e317729,2 +np.float64,0x8006ef0407adde09,0xaa9e82888f3dd83e,2 +np.float64,0x7fdbb08a07b76113,0x553e7e4e35b938b9,2 +np.float64,0x49663f9c92cc9,0x2a9a95e0affe5108,2 +np.float64,0x7fd9b87e79b370fc,0x553dc0b5cff3dc7d,2 +np.float64,0xbfd86ae657b0d5cc,0xbfe73584d02bdd2b,2 +np.float64,0x3fd4d4a13729a942,0x3fe6030a962aaaf8,2 +np.float64,0x7fcc246bcb3848d7,0x5538557309449bba,2 +np.float64,0xbfdc86a7d5b90d50,0xbfe871a2983c2a29,2 +np.float64,0xd2a6e995a54dd,0x2aa2e3e9c0fdd6c0,2 +np.float64,0x3f92eb447825d680,0x3fd0eb4fd2ba16d2,2 +np.float64,0x800d4001697a8003,0xaaa2ee358661b75c,2 +np.float64,0x3fd3705fd1a6e0c0,0x3fe582a6f321d7d6,2 +np.float64,0xbfcfdf51533fbea4,0xbfe421c3bdd9f2a3,2 +np.float64,0x3fe268e87964d1d1,0x3fea9d47e08aad8a,2 +np.float64,0x24b8901e49713,0x2a951adeefe7b31b,2 +np.float64,0x3fedb35d687b66bb,0x3fef36e440850bf8,2 +np.float64,0x3fb7ab5cbe2f56c0,0x3fdcf097380721c6,2 +np.float64,0x3f8c4eaa10389d54,0x3fceb7ecb605b73b,2 +np.float64,0xbfed831ed6fb063e,0xbfef25f462a336f1,2 +np.float64,0x7fd8c52112318a41,0x553d61b0ee609f58,2 +np.float64,0xbfe71c4ff76e38a0,0xbfecb5d32e789771,2 +np.float64,0xbfe35fb7b166bf70,0xbfeb12328e75ee6b,2 +np.float64,0x458e1a3a8b1c4,0x2a9a1cebadc81342,2 +np.float64,0x8003c1b3ad478368,0xaa98df5ed060b28c,2 +np.float64,0x7ff4000000000000,0x7ffc000000000000,2 +np.float64,0x7fe17098c162e131,0x5540775a9a3a104f,2 +np.float64,0xbfd95cb71732b96e,0xbfe7812acf7ea511,2 +np.float64,0x8000000000000001,0xa990000000000000,2 +np.float64,0xbfde0e7d9ebc1cfc,0xbfe8df9ca9e49a5b,2 +np.float64,0xffef4f67143e9ecd,0xd5440348a6a2f231,2 +np.float64,0x7fe37d23c826fa47,0x5541165de17caa03,2 +np.float64,0xbfcc0e5f85381cc0,0xbfe34b44b0deefe9,2 +np.float64,0x3fe858f1c470b1e4,0x3fed36ab90557d89,2 +np.float64,0x800e857278fd0ae5,0xaaa3847d13220545,2 +np.float64,0x3febd31a66f7a635,0x3fee8af90e66b043,2 +np.float64,0x7fd3fde1b127fbc2,0x553b5b186a49b968,2 +np.float64,0x3fd3dabb8b27b577,0x3fe5a99b446bed26,2 +np.float64,0xffeb4500f1768a01,0xd5431cab828e254a,2 +np.float64,0xffccca8fc6399520,0xd53884f8b505e79e,2 +np.float64,0xffeee9406b7dd280,0xd543ed6d27a1a899,2 +np.float64,0xffecdde0f0f9bbc1,0xd5437a6258b14092,2 +np.float64,0xe6b54005cd6a8,0x2aa378c25938dfda,2 +np.float64,0x7fe610f1022c21e1,0x5541cf460b972925,2 +np.float64,0xbfe5a170ec6b42e2,0xbfec1576081e3232,2 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-cos.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-cos.csv new file mode 100644 index 0000000..e315c28 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-cos.csv @@ -0,0 +1,1375 @@ +dtype,input,output,ulperrortol +## +ve denormals ## +np.float32,0x004b4716,0x3f800000,2 +np.float32,0x007b2490,0x3f800000,2 +np.float32,0x007c99fa,0x3f800000,2 +np.float32,0x00734a0c,0x3f800000,2 +np.float32,0x0070de24,0x3f800000,2 +np.float32,0x007fffff,0x3f800000,2 +np.float32,0x00000001,0x3f800000,2 +## -ve denormals ## +np.float32,0x80495d65,0x3f800000,2 +np.float32,0x806894f6,0x3f800000,2 +np.float32,0x80555a76,0x3f800000,2 +np.float32,0x804e1fb8,0x3f800000,2 +np.float32,0x80687de9,0x3f800000,2 +np.float32,0x807fffff,0x3f800000,2 +np.float32,0x80000001,0x3f800000,2 +## +/-0.0f, +/-FLT_MIN +/-FLT_MAX ## +np.float32,0x00000000,0x3f800000,2 +np.float32,0x80000000,0x3f800000,2 +np.float32,0x00800000,0x3f800000,2 +np.float32,0x80800000,0x3f800000,2 +## 1.00f + 0x00000001 ## +np.float32,0x3f800000,0x3f0a5140,2 +np.float32,0x3f800001,0x3f0a513f,2 +np.float32,0x3f800002,0x3f0a513d,2 +np.float32,0xc090a8b0,0xbe4332ce,2 +np.float32,0x41ce3184,0x3f4d1de1,2 +np.float32,0xc1d85848,0xbeaa8980,2 +np.float32,0x402b8820,0xbf653aa3,2 +np.float32,0x42b4e454,0xbf4a338b,2 +np.float32,0x42a67a60,0x3c58202e,2 +np.float32,0x41d92388,0xbed987c7,2 +np.float32,0x422dd66c,0x3f5dcab3,2 +np.float32,0xc28f5be6,0xbf5688d8,2 +np.float32,0x41ab2674,0xbf53aa3b,2 +np.float32,0x3f490fdb,0x3f3504f3,2 +np.float32,0xbf490fdb,0x3f3504f3,2 +np.float32,0x3fc90fdb,0xb33bbd2e,2 +np.float32,0xbfc90fdb,0xb33bbd2e,2 +np.float32,0x40490fdb,0xbf800000,2 +np.float32,0xc0490fdb,0xbf800000,2 +np.float32,0x3fc90fdb,0xb33bbd2e,2 +np.float32,0xbfc90fdb,0xb33bbd2e,2 +np.float32,0x40490fdb,0xbf800000,2 +np.float32,0xc0490fdb,0xbf800000,2 +np.float32,0x40c90fdb,0x3f800000,2 +np.float32,0xc0c90fdb,0x3f800000,2 +np.float32,0x4016cbe4,0xbf3504f3,2 +np.float32,0xc016cbe4,0xbf3504f3,2 +np.float32,0x4096cbe4,0x324cde2e,2 +np.float32,0xc096cbe4,0x324cde2e,2 +np.float32,0x4116cbe4,0xbf800000,2 +np.float32,0xc116cbe4,0xbf800000,2 +np.float32,0x40490fdb,0xbf800000,2 +np.float32,0xc0490fdb,0xbf800000,2 +np.float32,0x40c90fdb,0x3f800000,2 +np.float32,0xc0c90fdb,0x3f800000,2 +np.float32,0x41490fdb,0x3f800000,2 +np.float32,0xc1490fdb,0x3f800000,2 +np.float32,0x407b53d2,0xbf3504f1,2 +np.float32,0xc07b53d2,0xbf3504f1,2 +np.float32,0x40fb53d2,0xb4b5563d,2 +np.float32,0xc0fb53d2,0xb4b5563d,2 +np.float32,0x417b53d2,0xbf800000,2 +np.float32,0xc17b53d2,0xbf800000,2 +np.float32,0x4096cbe4,0x324cde2e,2 +np.float32,0xc096cbe4,0x324cde2e,2 +np.float32,0x4116cbe4,0xbf800000,2 +np.float32,0xc116cbe4,0xbf800000,2 +np.float32,0x4196cbe4,0x3f800000,2 +np.float32,0xc196cbe4,0x3f800000,2 +np.float32,0x40afede0,0x3f3504f7,2 +np.float32,0xc0afede0,0x3f3504f7,2 +np.float32,0x412fede0,0x353222c4,2 +np.float32,0xc12fede0,0x353222c4,2 +np.float32,0x41afede0,0xbf800000,2 +np.float32,0xc1afede0,0xbf800000,2 +np.float32,0x40c90fdb,0x3f800000,2 +np.float32,0xc0c90fdb,0x3f800000,2 +np.float32,0x41490fdb,0x3f800000,2 +np.float32,0xc1490fdb,0x3f800000,2 +np.float32,0x41c90fdb,0x3f800000,2 +np.float32,0xc1c90fdb,0x3f800000,2 +np.float32,0x40e231d6,0x3f3504f3,2 +np.float32,0xc0e231d6,0x3f3504f3,2 +np.float32,0x416231d6,0xb319a6a2,2 +np.float32,0xc16231d6,0xb319a6a2,2 +np.float32,0x41e231d6,0xbf800000,2 +np.float32,0xc1e231d6,0xbf800000,2 +np.float32,0x40fb53d2,0xb4b5563d,2 +np.float32,0xc0fb53d2,0xb4b5563d,2 +np.float32,0x417b53d2,0xbf800000,2 +np.float32,0xc17b53d2,0xbf800000,2 +np.float32,0x41fb53d2,0x3f800000,2 +np.float32,0xc1fb53d2,0x3f800000,2 +np.float32,0x410a3ae7,0xbf3504fb,2 +np.float32,0xc10a3ae7,0xbf3504fb,2 +np.float32,0x418a3ae7,0x35b08908,2 +np.float32,0xc18a3ae7,0x35b08908,2 +np.float32,0x420a3ae7,0xbf800000,2 +np.float32,0xc20a3ae7,0xbf800000,2 +np.float32,0x4116cbe4,0xbf800000,2 +np.float32,0xc116cbe4,0xbf800000,2 +np.float32,0x4196cbe4,0x3f800000,2 +np.float32,0xc196cbe4,0x3f800000,2 +np.float32,0x4216cbe4,0x3f800000,2 +np.float32,0xc216cbe4,0x3f800000,2 +np.float32,0x41235ce2,0xbf3504ef,2 +np.float32,0xc1235ce2,0xbf3504ef,2 +np.float32,0x41a35ce2,0xb53889b6,2 +np.float32,0xc1a35ce2,0xb53889b6,2 +np.float32,0x42235ce2,0xbf800000,2 +np.float32,0xc2235ce2,0xbf800000,2 +np.float32,0x412fede0,0x353222c4,2 +np.float32,0xc12fede0,0x353222c4,2 +np.float32,0x41afede0,0xbf800000,2 +np.float32,0xc1afede0,0xbf800000,2 +np.float32,0x422fede0,0x3f800000,2 +np.float32,0xc22fede0,0x3f800000,2 +np.float32,0x413c7edd,0x3f3504f4,2 +np.float32,0xc13c7edd,0x3f3504f4,2 +np.float32,0x41bc7edd,0x33800add,2 +np.float32,0xc1bc7edd,0x33800add,2 +np.float32,0x423c7edd,0xbf800000,2 +np.float32,0xc23c7edd,0xbf800000,2 +np.float32,0x41490fdb,0x3f800000,2 +np.float32,0xc1490fdb,0x3f800000,2 +np.float32,0x41c90fdb,0x3f800000,2 +np.float32,0xc1c90fdb,0x3f800000,2 +np.float32,0x42490fdb,0x3f800000,2 +np.float32,0xc2490fdb,0x3f800000,2 +np.float32,0x4155a0d9,0x3f3504eb,2 +np.float32,0xc155a0d9,0x3f3504eb,2 +np.float32,0x41d5a0d9,0xb5b3bc81,2 +np.float32,0xc1d5a0d9,0xb5b3bc81,2 +np.float32,0x4255a0d9,0xbf800000,2 +np.float32,0xc255a0d9,0xbf800000,2 +np.float32,0x416231d6,0xb319a6a2,2 +np.float32,0xc16231d6,0xb319a6a2,2 +np.float32,0x41e231d6,0xbf800000,2 +np.float32,0xc1e231d6,0xbf800000,2 +np.float32,0x426231d6,0x3f800000,2 +np.float32,0xc26231d6,0x3f800000,2 +np.float32,0x416ec2d4,0xbf3504f7,2 +np.float32,0xc16ec2d4,0xbf3504f7,2 +np.float32,0x41eec2d4,0x353ef0a7,2 +np.float32,0xc1eec2d4,0x353ef0a7,2 +np.float32,0x426ec2d4,0xbf800000,2 +np.float32,0xc26ec2d4,0xbf800000,2 +np.float32,0x417b53d2,0xbf800000,2 +np.float32,0xc17b53d2,0xbf800000,2 +np.float32,0x41fb53d2,0x3f800000,2 +np.float32,0xc1fb53d2,0x3f800000,2 +np.float32,0x427b53d2,0x3f800000,2 +np.float32,0xc27b53d2,0x3f800000,2 +np.float32,0x4183f268,0xbf3504e7,2 +np.float32,0xc183f268,0xbf3504e7,2 +np.float32,0x4203f268,0xb6059a13,2 +np.float32,0xc203f268,0xb6059a13,2 +np.float32,0x4283f268,0xbf800000,2 +np.float32,0xc283f268,0xbf800000,2 +np.float32,0x418a3ae7,0x35b08908,2 +np.float32,0xc18a3ae7,0x35b08908,2 +np.float32,0x420a3ae7,0xbf800000,2 +np.float32,0xc20a3ae7,0xbf800000,2 +np.float32,0x428a3ae7,0x3f800000,2 +np.float32,0xc28a3ae7,0x3f800000,2 +np.float32,0x41908365,0x3f3504f0,2 +np.float32,0xc1908365,0x3f3504f0,2 +np.float32,0x42108365,0xb512200d,2 +np.float32,0xc2108365,0xb512200d,2 +np.float32,0x42908365,0xbf800000,2 +np.float32,0xc2908365,0xbf800000,2 +np.float32,0x4196cbe4,0x3f800000,2 +np.float32,0xc196cbe4,0x3f800000,2 +np.float32,0x4216cbe4,0x3f800000,2 +np.float32,0xc216cbe4,0x3f800000,2 +np.float32,0x4296cbe4,0x3f800000,2 +np.float32,0xc296cbe4,0x3f800000,2 +np.float32,0x419d1463,0x3f3504ef,2 +np.float32,0xc19d1463,0x3f3504ef,2 +np.float32,0x421d1463,0xb5455799,2 +np.float32,0xc21d1463,0xb5455799,2 +np.float32,0x429d1463,0xbf800000,2 +np.float32,0xc29d1463,0xbf800000,2 +np.float32,0x41a35ce2,0xb53889b6,2 +np.float32,0xc1a35ce2,0xb53889b6,2 +np.float32,0x42235ce2,0xbf800000,2 +np.float32,0xc2235ce2,0xbf800000,2 +np.float32,0x42a35ce2,0x3f800000,2 +np.float32,0xc2a35ce2,0x3f800000,2 +np.float32,0x41a9a561,0xbf3504ff,2 +np.float32,0xc1a9a561,0xbf3504ff,2 +np.float32,0x4229a561,0x360733d0,2 +np.float32,0xc229a561,0x360733d0,2 +np.float32,0x42a9a561,0xbf800000,2 +np.float32,0xc2a9a561,0xbf800000,2 +np.float32,0x41afede0,0xbf800000,2 +np.float32,0xc1afede0,0xbf800000,2 +np.float32,0x422fede0,0x3f800000,2 +np.float32,0xc22fede0,0x3f800000,2 +np.float32,0x42afede0,0x3f800000,2 +np.float32,0xc2afede0,0x3f800000,2 +np.float32,0x41b6365e,0xbf3504f6,2 +np.float32,0xc1b6365e,0xbf3504f6,2 +np.float32,0x4236365e,0x350bb91c,2 +np.float32,0xc236365e,0x350bb91c,2 +np.float32,0x42b6365e,0xbf800000,2 +np.float32,0xc2b6365e,0xbf800000,2 +np.float32,0x41bc7edd,0x33800add,2 +np.float32,0xc1bc7edd,0x33800add,2 +np.float32,0x423c7edd,0xbf800000,2 +np.float32,0xc23c7edd,0xbf800000,2 +np.float32,0x42bc7edd,0x3f800000,2 +np.float32,0xc2bc7edd,0x3f800000,2 +np.float32,0x41c2c75c,0x3f3504f8,2 +np.float32,0xc1c2c75c,0x3f3504f8,2 +np.float32,0x4242c75c,0x354bbe8a,2 +np.float32,0xc242c75c,0x354bbe8a,2 +np.float32,0x42c2c75c,0xbf800000,2 +np.float32,0xc2c2c75c,0xbf800000,2 +np.float32,0x41c90fdb,0x3f800000,2 +np.float32,0xc1c90fdb,0x3f800000,2 +np.float32,0x42490fdb,0x3f800000,2 +np.float32,0xc2490fdb,0x3f800000,2 +np.float32,0x42c90fdb,0x3f800000,2 +np.float32,0xc2c90fdb,0x3f800000,2 +np.float32,0x41cf585a,0x3f3504e7,2 +np.float32,0xc1cf585a,0x3f3504e7,2 +np.float32,0x424f585a,0xb608cd8c,2 +np.float32,0xc24f585a,0xb608cd8c,2 +np.float32,0x42cf585a,0xbf800000,2 +np.float32,0xc2cf585a,0xbf800000,2 +np.float32,0x41d5a0d9,0xb5b3bc81,2 +np.float32,0xc1d5a0d9,0xb5b3bc81,2 +np.float32,0x4255a0d9,0xbf800000,2 +np.float32,0xc255a0d9,0xbf800000,2 +np.float32,0x42d5a0d9,0x3f800000,2 +np.float32,0xc2d5a0d9,0x3f800000,2 +np.float32,0x41dbe958,0xbf350507,2 +np.float32,0xc1dbe958,0xbf350507,2 +np.float32,0x425be958,0x365eab75,2 +np.float32,0xc25be958,0x365eab75,2 +np.float32,0x42dbe958,0xbf800000,2 +np.float32,0xc2dbe958,0xbf800000,2 +np.float32,0x41e231d6,0xbf800000,2 +np.float32,0xc1e231d6,0xbf800000,2 +np.float32,0x426231d6,0x3f800000,2 +np.float32,0xc26231d6,0x3f800000,2 +np.float32,0x42e231d6,0x3f800000,2 +np.float32,0xc2e231d6,0x3f800000,2 +np.float32,0x41e87a55,0xbf3504ef,2 +np.float32,0xc1e87a55,0xbf3504ef,2 +np.float32,0x42687a55,0xb552257b,2 +np.float32,0xc2687a55,0xb552257b,2 +np.float32,0x42e87a55,0xbf800000,2 +np.float32,0xc2e87a55,0xbf800000,2 +np.float32,0x41eec2d4,0x353ef0a7,2 +np.float32,0xc1eec2d4,0x353ef0a7,2 +np.float32,0x426ec2d4,0xbf800000,2 +np.float32,0xc26ec2d4,0xbf800000,2 +np.float32,0x42eec2d4,0x3f800000,2 +np.float32,0xc2eec2d4,0x3f800000,2 +np.float32,0x41f50b53,0x3f3504ff,2 +np.float32,0xc1f50b53,0x3f3504ff,2 +np.float32,0x42750b53,0x360a6748,2 +np.float32,0xc2750b53,0x360a6748,2 +np.float32,0x42f50b53,0xbf800000,2 +np.float32,0xc2f50b53,0xbf800000,2 +np.float32,0x41fb53d2,0x3f800000,2 +np.float32,0xc1fb53d2,0x3f800000,2 +np.float32,0x427b53d2,0x3f800000,2 +np.float32,0xc27b53d2,0x3f800000,2 +np.float32,0x42fb53d2,0x3f800000,2 +np.float32,0xc2fb53d2,0x3f800000,2 +np.float32,0x4200ce28,0x3f3504f6,2 +np.float32,0xc200ce28,0x3f3504f6,2 +np.float32,0x4280ce28,0x34fdd672,2 +np.float32,0xc280ce28,0x34fdd672,2 +np.float32,0x4300ce28,0xbf800000,2 +np.float32,0xc300ce28,0xbf800000,2 +np.float32,0x4203f268,0xb6059a13,2 +np.float32,0xc203f268,0xb6059a13,2 +np.float32,0x4283f268,0xbf800000,2 +np.float32,0xc283f268,0xbf800000,2 +np.float32,0x4303f268,0x3f800000,2 +np.float32,0xc303f268,0x3f800000,2 +np.float32,0x420716a7,0xbf3504f8,2 +np.float32,0xc20716a7,0xbf3504f8,2 +np.float32,0x428716a7,0x35588c6d,2 +np.float32,0xc28716a7,0x35588c6d,2 +np.float32,0x430716a7,0xbf800000,2 +np.float32,0xc30716a7,0xbf800000,2 +np.float32,0x420a3ae7,0xbf800000,2 +np.float32,0xc20a3ae7,0xbf800000,2 +np.float32,0x428a3ae7,0x3f800000,2 +np.float32,0xc28a3ae7,0x3f800000,2 +np.float32,0x430a3ae7,0x3f800000,2 +np.float32,0xc30a3ae7,0x3f800000,2 +np.float32,0x420d5f26,0xbf3504e7,2 +np.float32,0xc20d5f26,0xbf3504e7,2 +np.float32,0x428d5f26,0xb60c0105,2 +np.float32,0xc28d5f26,0xb60c0105,2 +np.float32,0x430d5f26,0xbf800000,2 +np.float32,0xc30d5f26,0xbf800000,2 +np.float32,0x42108365,0xb512200d,2 +np.float32,0xc2108365,0xb512200d,2 +np.float32,0x42908365,0xbf800000,2 +np.float32,0xc2908365,0xbf800000,2 +np.float32,0x43108365,0x3f800000,2 +np.float32,0xc3108365,0x3f800000,2 +np.float32,0x4213a7a5,0x3f350507,2 +np.float32,0xc213a7a5,0x3f350507,2 +np.float32,0x4293a7a5,0x3661deee,2 +np.float32,0xc293a7a5,0x3661deee,2 +np.float32,0x4313a7a5,0xbf800000,2 +np.float32,0xc313a7a5,0xbf800000,2 +np.float32,0x4216cbe4,0x3f800000,2 +np.float32,0xc216cbe4,0x3f800000,2 +np.float32,0x4296cbe4,0x3f800000,2 +np.float32,0xc296cbe4,0x3f800000,2 +np.float32,0x4316cbe4,0x3f800000,2 +np.float32,0xc316cbe4,0x3f800000,2 +np.float32,0x4219f024,0x3f3504d8,2 +np.float32,0xc219f024,0x3f3504d8,2 +np.float32,0x4299f024,0xb69bde6c,2 +np.float32,0xc299f024,0xb69bde6c,2 +np.float32,0x4319f024,0xbf800000,2 +np.float32,0xc319f024,0xbf800000,2 +np.float32,0x421d1463,0xb5455799,2 +np.float32,0xc21d1463,0xb5455799,2 +np.float32,0x429d1463,0xbf800000,2 +np.float32,0xc29d1463,0xbf800000,2 +np.float32,0x431d1463,0x3f800000,2 +np.float32,0xc31d1463,0x3f800000,2 +np.float32,0x422038a3,0xbf350516,2 +np.float32,0xc22038a3,0xbf350516,2 +np.float32,0x42a038a3,0x36c6cd61,2 +np.float32,0xc2a038a3,0x36c6cd61,2 +np.float32,0x432038a3,0xbf800000,2 +np.float32,0xc32038a3,0xbf800000,2 +np.float32,0x42235ce2,0xbf800000,2 +np.float32,0xc2235ce2,0xbf800000,2 +np.float32,0x42a35ce2,0x3f800000,2 +np.float32,0xc2a35ce2,0x3f800000,2 +np.float32,0x43235ce2,0x3f800000,2 +np.float32,0xc3235ce2,0x3f800000,2 +np.float32,0x42268121,0xbf3504f6,2 +np.float32,0xc2268121,0xbf3504f6,2 +np.float32,0x42a68121,0x34e43aac,2 +np.float32,0xc2a68121,0x34e43aac,2 +np.float32,0x43268121,0xbf800000,2 +np.float32,0xc3268121,0xbf800000,2 +np.float32,0x4229a561,0x360733d0,2 +np.float32,0xc229a561,0x360733d0,2 +np.float32,0x42a9a561,0xbf800000,2 +np.float32,0xc2a9a561,0xbf800000,2 +np.float32,0x4329a561,0x3f800000,2 +np.float32,0xc329a561,0x3f800000,2 +np.float32,0x422cc9a0,0x3f3504f8,2 +np.float32,0xc22cc9a0,0x3f3504f8,2 +np.float32,0x42acc9a0,0x35655a50,2 +np.float32,0xc2acc9a0,0x35655a50,2 +np.float32,0x432cc9a0,0xbf800000,2 +np.float32,0xc32cc9a0,0xbf800000,2 +np.float32,0x422fede0,0x3f800000,2 +np.float32,0xc22fede0,0x3f800000,2 +np.float32,0x42afede0,0x3f800000,2 +np.float32,0xc2afede0,0x3f800000,2 +np.float32,0x432fede0,0x3f800000,2 +np.float32,0xc32fede0,0x3f800000,2 +np.float32,0x4233121f,0x3f3504e7,2 +np.float32,0xc233121f,0x3f3504e7,2 +np.float32,0x42b3121f,0xb60f347d,2 +np.float32,0xc2b3121f,0xb60f347d,2 +np.float32,0x4333121f,0xbf800000,2 +np.float32,0xc333121f,0xbf800000,2 +np.float32,0x4236365e,0x350bb91c,2 +np.float32,0xc236365e,0x350bb91c,2 +np.float32,0x42b6365e,0xbf800000,2 +np.float32,0xc2b6365e,0xbf800000,2 +np.float32,0x4336365e,0x3f800000,2 +np.float32,0xc336365e,0x3f800000,2 +np.float32,0x42395a9e,0xbf350507,2 +np.float32,0xc2395a9e,0xbf350507,2 +np.float32,0x42b95a9e,0x36651267,2 +np.float32,0xc2b95a9e,0x36651267,2 +np.float32,0x43395a9e,0xbf800000,2 +np.float32,0xc3395a9e,0xbf800000,2 +np.float32,0x423c7edd,0xbf800000,2 +np.float32,0xc23c7edd,0xbf800000,2 +np.float32,0x42bc7edd,0x3f800000,2 +np.float32,0xc2bc7edd,0x3f800000,2 +np.float32,0x433c7edd,0x3f800000,2 +np.float32,0xc33c7edd,0x3f800000,2 +np.float32,0x423fa31d,0xbf3504d7,2 +np.float32,0xc23fa31d,0xbf3504d7,2 +np.float32,0x42bfa31d,0xb69d7828,2 +np.float32,0xc2bfa31d,0xb69d7828,2 +np.float32,0x433fa31d,0xbf800000,2 +np.float32,0xc33fa31d,0xbf800000,2 +np.float32,0x4242c75c,0x354bbe8a,2 +np.float32,0xc242c75c,0x354bbe8a,2 +np.float32,0x42c2c75c,0xbf800000,2 +np.float32,0xc2c2c75c,0xbf800000,2 +np.float32,0x4342c75c,0x3f800000,2 +np.float32,0xc342c75c,0x3f800000,2 +np.float32,0x4245eb9c,0x3f350517,2 +np.float32,0xc245eb9c,0x3f350517,2 +np.float32,0x42c5eb9c,0x36c8671d,2 +np.float32,0xc2c5eb9c,0x36c8671d,2 +np.float32,0x4345eb9c,0xbf800000,2 +np.float32,0xc345eb9c,0xbf800000,2 +np.float32,0x42490fdb,0x3f800000,2 +np.float32,0xc2490fdb,0x3f800000,2 +np.float32,0x42c90fdb,0x3f800000,2 +np.float32,0xc2c90fdb,0x3f800000,2 +np.float32,0x43490fdb,0x3f800000,2 +np.float32,0xc3490fdb,0x3f800000,2 +np.float32,0x424c341a,0x3f3504f5,2 +np.float32,0xc24c341a,0x3f3504f5,2 +np.float32,0x42cc341a,0x34ca9ee6,2 +np.float32,0xc2cc341a,0x34ca9ee6,2 +np.float32,0x434c341a,0xbf800000,2 +np.float32,0xc34c341a,0xbf800000,2 +np.float32,0x424f585a,0xb608cd8c,2 +np.float32,0xc24f585a,0xb608cd8c,2 +np.float32,0x42cf585a,0xbf800000,2 +np.float32,0xc2cf585a,0xbf800000,2 +np.float32,0x434f585a,0x3f800000,2 +np.float32,0xc34f585a,0x3f800000,2 +np.float32,0x42527c99,0xbf3504f9,2 +np.float32,0xc2527c99,0xbf3504f9,2 +np.float32,0x42d27c99,0x35722833,2 +np.float32,0xc2d27c99,0x35722833,2 +np.float32,0x43527c99,0xbf800000,2 +np.float32,0xc3527c99,0xbf800000,2 +np.float32,0x4255a0d9,0xbf800000,2 +np.float32,0xc255a0d9,0xbf800000,2 +np.float32,0x42d5a0d9,0x3f800000,2 +np.float32,0xc2d5a0d9,0x3f800000,2 +np.float32,0x4355a0d9,0x3f800000,2 +np.float32,0xc355a0d9,0x3f800000,2 +np.float32,0x4258c518,0xbf3504e6,2 +np.float32,0xc258c518,0xbf3504e6,2 +np.float32,0x42d8c518,0xb61267f6,2 +np.float32,0xc2d8c518,0xb61267f6,2 +np.float32,0x4358c518,0xbf800000,2 +np.float32,0xc358c518,0xbf800000,2 +np.float32,0x425be958,0x365eab75,2 +np.float32,0xc25be958,0x365eab75,2 +np.float32,0x42dbe958,0xbf800000,2 +np.float32,0xc2dbe958,0xbf800000,2 +np.float32,0x435be958,0x3f800000,2 +np.float32,0xc35be958,0x3f800000,2 +np.float32,0x425f0d97,0x3f350508,2 +np.float32,0xc25f0d97,0x3f350508,2 +np.float32,0x42df0d97,0x366845e0,2 +np.float32,0xc2df0d97,0x366845e0,2 +np.float32,0x435f0d97,0xbf800000,2 +np.float32,0xc35f0d97,0xbf800000,2 +np.float32,0x426231d6,0x3f800000,2 +np.float32,0xc26231d6,0x3f800000,2 +np.float32,0x42e231d6,0x3f800000,2 +np.float32,0xc2e231d6,0x3f800000,2 +np.float32,0x436231d6,0x3f800000,2 +np.float32,0xc36231d6,0x3f800000,2 +np.float32,0x42655616,0x3f3504d7,2 +np.float32,0xc2655616,0x3f3504d7,2 +np.float32,0x42e55616,0xb69f11e5,2 +np.float32,0xc2e55616,0xb69f11e5,2 +np.float32,0x43655616,0xbf800000,2 +np.float32,0xc3655616,0xbf800000,2 +np.float32,0x42687a55,0xb552257b,2 +np.float32,0xc2687a55,0xb552257b,2 +np.float32,0x42e87a55,0xbf800000,2 +np.float32,0xc2e87a55,0xbf800000,2 +np.float32,0x43687a55,0x3f800000,2 +np.float32,0xc3687a55,0x3f800000,2 +np.float32,0x426b9e95,0xbf350517,2 +np.float32,0xc26b9e95,0xbf350517,2 +np.float32,0x42eb9e95,0x36ca00d9,2 +np.float32,0xc2eb9e95,0x36ca00d9,2 +np.float32,0x436b9e95,0xbf800000,2 +np.float32,0xc36b9e95,0xbf800000,2 +np.float32,0x426ec2d4,0xbf800000,2 +np.float32,0xc26ec2d4,0xbf800000,2 +np.float32,0x42eec2d4,0x3f800000,2 +np.float32,0xc2eec2d4,0x3f800000,2 +np.float32,0x436ec2d4,0x3f800000,2 +np.float32,0xc36ec2d4,0x3f800000,2 +np.float32,0x4271e713,0xbf3504f5,2 +np.float32,0xc271e713,0xbf3504f5,2 +np.float32,0x42f1e713,0x34b10321,2 +np.float32,0xc2f1e713,0x34b10321,2 +np.float32,0x4371e713,0xbf800000,2 +np.float32,0xc371e713,0xbf800000,2 +np.float32,0x42750b53,0x360a6748,2 +np.float32,0xc2750b53,0x360a6748,2 +np.float32,0x42f50b53,0xbf800000,2 +np.float32,0xc2f50b53,0xbf800000,2 +np.float32,0x43750b53,0x3f800000,2 +np.float32,0xc3750b53,0x3f800000,2 +np.float32,0x42782f92,0x3f3504f9,2 +np.float32,0xc2782f92,0x3f3504f9,2 +np.float32,0x42f82f92,0x357ef616,2 +np.float32,0xc2f82f92,0x357ef616,2 +np.float32,0x43782f92,0xbf800000,2 +np.float32,0xc3782f92,0xbf800000,2 +np.float32,0x427b53d2,0x3f800000,2 +np.float32,0xc27b53d2,0x3f800000,2 +np.float32,0x42fb53d2,0x3f800000,2 +np.float32,0xc2fb53d2,0x3f800000,2 +np.float32,0x437b53d2,0x3f800000,2 +np.float32,0xc37b53d2,0x3f800000,2 +np.float32,0x427e7811,0x3f3504e6,2 +np.float32,0xc27e7811,0x3f3504e6,2 +np.float32,0x42fe7811,0xb6159b6f,2 +np.float32,0xc2fe7811,0xb6159b6f,2 +np.float32,0x437e7811,0xbf800000,2 +np.float32,0xc37e7811,0xbf800000,2 +np.float32,0x4280ce28,0x34fdd672,2 +np.float32,0xc280ce28,0x34fdd672,2 +np.float32,0x4300ce28,0xbf800000,2 +np.float32,0xc300ce28,0xbf800000,2 +np.float32,0x4380ce28,0x3f800000,2 +np.float32,0xc380ce28,0x3f800000,2 +np.float32,0x42826048,0xbf350508,2 +np.float32,0xc2826048,0xbf350508,2 +np.float32,0x43026048,0x366b7958,2 +np.float32,0xc3026048,0x366b7958,2 +np.float32,0x43826048,0xbf800000,2 +np.float32,0xc3826048,0xbf800000,2 +np.float32,0x4283f268,0xbf800000,2 +np.float32,0xc283f268,0xbf800000,2 +np.float32,0x4303f268,0x3f800000,2 +np.float32,0xc303f268,0x3f800000,2 +np.float32,0x4383f268,0x3f800000,2 +np.float32,0xc383f268,0x3f800000,2 +np.float32,0x42858487,0xbf350504,2 +np.float32,0xc2858487,0xbf350504,2 +np.float32,0x43058487,0x363ea8be,2 +np.float32,0xc3058487,0x363ea8be,2 +np.float32,0x43858487,0xbf800000,2 +np.float32,0xc3858487,0xbf800000,2 +np.float32,0x428716a7,0x35588c6d,2 +np.float32,0xc28716a7,0x35588c6d,2 +np.float32,0x430716a7,0xbf800000,2 +np.float32,0xc30716a7,0xbf800000,2 +np.float32,0x438716a7,0x3f800000,2 +np.float32,0xc38716a7,0x3f800000,2 +np.float32,0x4288a8c7,0x3f350517,2 +np.float32,0xc288a8c7,0x3f350517,2 +np.float32,0x4308a8c7,0x36cb9a96,2 +np.float32,0xc308a8c7,0x36cb9a96,2 +np.float32,0x4388a8c7,0xbf800000,2 +np.float32,0xc388a8c7,0xbf800000,2 +np.float32,0x428a3ae7,0x3f800000,2 +np.float32,0xc28a3ae7,0x3f800000,2 +np.float32,0x430a3ae7,0x3f800000,2 +np.float32,0xc30a3ae7,0x3f800000,2 +np.float32,0x438a3ae7,0x3f800000,2 +np.float32,0xc38a3ae7,0x3f800000,2 +np.float32,0x428bcd06,0x3f3504f5,2 +np.float32,0xc28bcd06,0x3f3504f5,2 +np.float32,0x430bcd06,0x3497675b,2 +np.float32,0xc30bcd06,0x3497675b,2 +np.float32,0x438bcd06,0xbf800000,2 +np.float32,0xc38bcd06,0xbf800000,2 +np.float32,0x428d5f26,0xb60c0105,2 +np.float32,0xc28d5f26,0xb60c0105,2 +np.float32,0x430d5f26,0xbf800000,2 +np.float32,0xc30d5f26,0xbf800000,2 +np.float32,0x438d5f26,0x3f800000,2 +np.float32,0xc38d5f26,0x3f800000,2 +np.float32,0x428ef146,0xbf350526,2 +np.float32,0xc28ef146,0xbf350526,2 +np.float32,0x430ef146,0x3710bc40,2 +np.float32,0xc30ef146,0x3710bc40,2 +np.float32,0x438ef146,0xbf800000,2 +np.float32,0xc38ef146,0xbf800000,2 +np.float32,0x42908365,0xbf800000,2 +np.float32,0xc2908365,0xbf800000,2 +np.float32,0x43108365,0x3f800000,2 +np.float32,0xc3108365,0x3f800000,2 +np.float32,0x43908365,0x3f800000,2 +np.float32,0xc3908365,0x3f800000,2 +np.float32,0x42921585,0xbf3504e6,2 +np.float32,0xc2921585,0xbf3504e6,2 +np.float32,0x43121585,0xb618cee8,2 +np.float32,0xc3121585,0xb618cee8,2 +np.float32,0x43921585,0xbf800000,2 +np.float32,0xc3921585,0xbf800000,2 +np.float32,0x4293a7a5,0x3661deee,2 +np.float32,0xc293a7a5,0x3661deee,2 +np.float32,0x4313a7a5,0xbf800000,2 +np.float32,0xc313a7a5,0xbf800000,2 +np.float32,0x4393a7a5,0x3f800000,2 +np.float32,0xc393a7a5,0x3f800000,2 +np.float32,0x429539c5,0x3f350536,2 +np.float32,0xc29539c5,0x3f350536,2 +np.float32,0x431539c5,0x373bab34,2 +np.float32,0xc31539c5,0x373bab34,2 +np.float32,0x439539c5,0xbf800000,2 +np.float32,0xc39539c5,0xbf800000,2 +np.float32,0x4296cbe4,0x3f800000,2 +np.float32,0xc296cbe4,0x3f800000,2 +np.float32,0x4316cbe4,0x3f800000,2 +np.float32,0xc316cbe4,0x3f800000,2 +np.float32,0x4396cbe4,0x3f800000,2 +np.float32,0xc396cbe4,0x3f800000,2 +np.float32,0x42985e04,0x3f3504d7,2 +np.float32,0xc2985e04,0x3f3504d7,2 +np.float32,0x43185e04,0xb6a2455d,2 +np.float32,0xc3185e04,0xb6a2455d,2 +np.float32,0x43985e04,0xbf800000,2 +np.float32,0xc3985e04,0xbf800000,2 +np.float32,0x4299f024,0xb69bde6c,2 +np.float32,0xc299f024,0xb69bde6c,2 +np.float32,0x4319f024,0xbf800000,2 +np.float32,0xc319f024,0xbf800000,2 +np.float32,0x4399f024,0x3f800000,2 +np.float32,0xc399f024,0x3f800000,2 +np.float32,0x429b8243,0xbf3504ea,2 +np.float32,0xc29b8243,0xbf3504ea,2 +np.float32,0x431b8243,0xb5cb2eb8,2 +np.float32,0xc31b8243,0xb5cb2eb8,2 +np.float32,0x439b8243,0xbf800000,2 +np.float32,0xc39b8243,0xbf800000,2 +np.float32,0x435b2047,0x3f3504c1,2 +np.float32,0x42a038a2,0xb5e4ca7e,2 +np.float32,0x432038a2,0xbf800000,2 +np.float32,0x4345eb9b,0xbf800000,2 +np.float32,0x42c5eb9b,0xb5de638c,2 +np.float32,0x42eb9e94,0xb5d7fc9b,2 +np.float32,0x4350ea79,0x3631dadb,2 +np.float32,0x42dbe957,0xbf800000,2 +np.float32,0x425be957,0xb505522a,2 +np.float32,0x435be957,0x3f800000,2 +np.float32,0x46027eb2,0x3e7d94c9,2 +np.float32,0x4477baed,0xbe7f1824,2 +np.float32,0x454b8024,0x3e7f5268,2 +np.float32,0x455d2c09,0x3e7f40cb,2 +np.float32,0x4768d3de,0xba14b4af,2 +np.float32,0x46c1e7cd,0x3e7fb102,2 +np.float32,0x44a52949,0xbe7dc9d5,2 +np.float32,0x4454633a,0x3e7dbc7d,2 +np.float32,0x4689810b,0x3e7eb02b,2 +np.float32,0x473473cd,0xbe7eef6f,2 +np.float32,0x44a5193f,0x3e7e1b1f,2 +np.float32,0x46004b36,0x3e7dac59,2 +np.float32,0x467f604b,0x3d7ffd3a,2 +np.float32,0x45ea1805,0x3dffd2e0,2 +np.float32,0x457b6af3,0x3dff7831,2 +np.float32,0x44996159,0xbe7d85f4,2 +np.float32,0x47883553,0xbb80584e,2 +np.float32,0x44e19f0c,0xbdffcfe6,2 +np.float32,0x472b3bf6,0xbe7f7a82,2 +np.float32,0x4600bb4e,0x3a135e33,2 +np.float32,0x449f4556,0x3e7e42e5,2 +np.float32,0x474e9420,0x3dff77b2,2 +np.float32,0x45cbdb23,0x3dff7240,2 +np.float32,0x44222747,0x3dffb039,2 +np.float32,0x4772e419,0xbdff74b8,2 +np.float64,0x1,0x3ff0000000000000,4 +np.float64,0x8000000000000001,0x3ff0000000000000,4 +np.float64,0x10000000000000,0x3ff0000000000000,4 +np.float64,0x8010000000000000,0x3ff0000000000000,4 +np.float64,0x7fefffffffffffff,0xbfefffe62ecfab75,4 +np.float64,0xffefffffffffffff,0xbfefffe62ecfab75,4 +np.float64,0x7ff0000000000000,0xfff8000000000000,4 +np.float64,0xfff0000000000000,0xfff8000000000000,4 +np.float64,0x7ff8000000000000,0x7ff8000000000000,4 +np.float64,0x7ff4000000000000,0x7ffc000000000000,4 +np.float64,0xbfc28bd9dd2517b4,0x3fefaa28ba13a702,4 +np.float64,0x3fb673c62e2ce790,0x3fefe083847a717f,4 +np.float64,0xbfe3e1dac7e7c3b6,0x3fea0500ba099f3a,4 +np.float64,0xbfbe462caa3c8c58,0x3fefc6c8b9c1c87c,4 +np.float64,0xbfb9353576326a68,0x3fefd8513e50e6b1,4 +np.float64,0xbfc05e798520bcf4,0x3fefbd1ad81cf089,4 +np.float64,0xbfe3ca3be2e79478,0x3fea12b995ea6574,4 +np.float64,0xbfde875d46bd0eba,0x3fec6d888662a824,4 +np.float64,0x3fafc4e02c3f89c0,0x3feff03c34bffd69,4 +np.float64,0xbf98855848310ac0,0x3feffda6c1588bdb,4 +np.float64,0x3fe66c51186cd8a2,0x3fe875c61c630ecb,4 +np.float64,0xbfedff1c3b7bfe38,0x3fe2f0c8c9e8fa39,4 +np.float64,0x3fd6082267ac1044,0x3fee1f6023695050,4 +np.float64,0xbfe78449b06f0894,0x3fe7bda2b223850e,4 +np.float64,0x3feedb8e63fdb71c,0x3fe23d5dfd2dd33f,4 +np.float64,0xbfc0a9de3d2153bc,0x3fefbaadf5e5285e,4 +np.float64,0x3fc04c67432098d0,0x3fefbdae07b7de8d,4 +np.float64,0xbfeeef84c4fddf0a,0x3fe22cf37f309d88,4 +np.float64,0x3fc04bb025209760,0x3fefbdb3d7d34ecf,4 +np.float64,0x3fd6b84d48ad709c,0x3fee013403da6e2a,4 +np.float64,0x3fec1ae25d7835c4,0x3fe46e62195cf274,4 +np.float64,0xbfdc6fdf9bb8dfc0,0x3fece48dc78bbb2e,4 +np.float64,0x3fb4db2c9229b660,0x3fefe4d42f79bf49,4 +np.float64,0xbfc0ed698521dad4,0x3fefb8785ea658c9,4 +np.float64,0xbfee82772b7d04ee,0x3fe2864a80efe8e9,4 +np.float64,0x3fd575b664aaeb6c,0x3fee37c669a12879,4 +np.float64,0x3fe4afb1c5e95f64,0x3fe98b177194439c,4 +np.float64,0x3fd93962f9b272c4,0x3fed8bef61876294,4 +np.float64,0x3fd97ae025b2f5c0,0x3fed7f4cfbf4d300,4 +np.float64,0xbfd9afdb1bb35fb6,0x3fed74fdc44dabb1,4 +np.float64,0x3f8ae65e3035cc80,0x3fefff4b1a0ea62b,4 +np.float64,0xbfe7e58664efcb0d,0x3fe77c02a1cbb670,4 +np.float64,0x3fe5f68b37ebed16,0x3fe8c10f849a5d4d,4 +np.float64,0x3fd9137d61b226fc,0x3fed9330eb4815a1,4 +np.float64,0x3fc146d019228da0,0x3fefb57e2d4d52f8,4 +np.float64,0xbfda6036edb4c06e,0x3fed521b2b578679,4 +np.float64,0xbfe78ddfb0ef1bc0,0x3fe7b734319a77e4,4 +np.float64,0x3fe0877823610ef0,0x3febd33a993dd786,4 +np.float64,0x3fbc61af2e38c360,0x3fefcdb4f889756d,4 +np.float64,0x3fd4dcdca4a9b9b8,0x3fee50962ffea5ae,4 +np.float64,0xbfe03cb29f607965,0x3febf7dbf640a75a,4 +np.float64,0xbfc81de407303bc8,0x3fef6f066cef64bc,4 +np.float64,0x3fd8dea42db1bd48,0x3fed9d3e00dbe0b3,4 +np.float64,0x3feac75e94f58ebe,0x3fe56f1f47f97896,4 +np.float64,0x3fb3a1ea6e2743d0,0x3fefe7ec1247cdaa,4 +np.float64,0x3fd695c0f4ad2b80,0x3fee0730bd40883d,4 +np.float64,0xbfd2c631f5a58c64,0x3feea20cbd1105d7,4 +np.float64,0xbfe978a8e1f2f152,0x3fe663014d40ad7a,4 +np.float64,0x3fd8b6b76ab16d70,0x3feda4c879aacc19,4 +np.float64,0x3feaafd30e755fa6,0x3fe5809514c28453,4 +np.float64,0x3fe1e37dc263c6fc,0x3feb20f9ad1f3f5c,4 +np.float64,0x3fd0ec7c24a1d8f8,0x3feee34048f43b75,4 +np.float64,0xbfe3881cbf67103a,0x3fea38d7886e6f53,4 +np.float64,0xbfd7023957ae0472,0x3fedf4471c765a1c,4 +np.float64,0xbfebc51c4ef78a38,0x3fe4b01c424e297b,4 +np.float64,0xbfe20a93eae41528,0x3feb0c2aa321d2e0,4 +np.float64,0x3fef39be867e737e,0x3fe1efaba9164d27,4 +np.float64,0x3fe8ea9576f1d52a,0x3fe6c7a8826ce1be,4 +np.float64,0x3fea921d91f5243c,0x3fe5968c6cf78963,4 +np.float64,0x3fd7ee5d31afdcbc,0x3fedc9f19d43fe61,4 +np.float64,0xbfe3ed581767dab0,0x3fe9fe4ee2f2b1cd,4 +np.float64,0xbfc40923d5281248,0x3fef9bd8ee9f6e68,4 +np.float64,0x3fe411a834682350,0x3fe9e9103854f057,4 +np.float64,0xbfedf6ccdf7bed9a,0x3fe2f77ad6543246,4 +np.float64,0xbfe8788a44f0f114,0x3fe7172f3aa0c742,4 +np.float64,0xbfce728f173ce520,0x3fef1954083bea04,4 +np.float64,0xbfd64dd0acac9ba2,0x3fee138c3293c246,4 +np.float64,0xbfe00669f5600cd4,0x3fec121443945350,4 +np.float64,0xbfe7152ba2ee2a58,0x3fe8079465d09846,4 +np.float64,0x3fe8654d8f70ca9c,0x3fe7247c94f09596,4 +np.float64,0x3fea68045cf4d008,0x3fe5b58cfe81a243,4 +np.float64,0xbfcd4779073a8ef4,0x3fef2a9d78153fa5,4 +np.float64,0xbfdb4456e5b688ae,0x3fed23b11614203f,4 +np.float64,0x3fcb5d59cd36bab0,0x3fef45818216a515,4 +np.float64,0xbfd914ff5ab229fe,0x3fed92e73746fea8,4 +np.float64,0x3fe4d211db69a424,0x3fe97653f433d15f,4 +np.float64,0xbfdbbb9224b77724,0x3fed0adb593dde80,4 +np.float64,0x3fd424ceafa8499c,0x3fee6d9124795d33,4 +np.float64,0x3feb5968f976b2d2,0x3fe501d116efbf54,4 +np.float64,0x3fee7d92a2fcfb26,0x3fe28a479b6a9dcf,4 +np.float64,0x3fc308e9972611d0,0x3fefa595f4df0c89,4 +np.float64,0x3fda79cd77b4f39c,0x3fed4cf8e69ba1f8,4 +np.float64,0x3fcbcf42d5379e88,0x3fef3f6a6a77c187,4 +np.float64,0x3fe13a1da662743c,0x3feb79504faea888,4 +np.float64,0xbfee4435f07c886c,0x3fe2b8ea98d2fc29,4 +np.float64,0x3fd65d68ccacbad0,0x3fee10e1ac7ada89,4 +np.float64,0x3fef2f89bb7e5f14,0x3fe1f81e882cc3f4,4 +np.float64,0xbfef0a7769fe14ef,0x3fe216bf384fc646,4 +np.float64,0x3fc065277320ca50,0x3fefbce44835c193,4 +np.float64,0x3fe9c1a74d73834e,0x3fe62e9ee0c2f2bf,4 +np.float64,0x3fd9d96e5db3b2dc,0x3fed6cd88eb51f6a,4 +np.float64,0x3fe02bf1c56057e4,0x3febfffc24b5a7ba,4 +np.float64,0xbfd6814350ad0286,0x3fee0ab9ad318b84,4 +np.float64,0x3f9fcbec583f97c0,0x3feffc0d0f1d8e75,4 +np.float64,0x3fe23524e5e46a4a,0x3feaf55372949a06,4 +np.float64,0xbfbdc95f6a3b92c0,0x3fefc89c21d44995,4 +np.float64,0x3fe961bb9cf2c378,0x3fe6735d6e1cca58,4 +np.float64,0xbfe8f1c370f1e387,0x3fe6c29d1be8bee9,4 +np.float64,0x3fd880d43ab101a8,0x3fedaee3c7ccfc96,4 +np.float64,0xbfedb37005fb66e0,0x3fe32d91ef2e3bd3,4 +np.float64,0xfdce287bfb9c5,0x3ff0000000000000,4 +np.float64,0x9aa1b9e735437,0x3ff0000000000000,4 +np.float64,0x6beac6e0d7d59,0x3ff0000000000000,4 +np.float64,0x47457aae8e8b0,0x3ff0000000000000,4 +np.float64,0x35ff13b46bfe3,0x3ff0000000000000,4 +np.float64,0xb9c0c82b73819,0x3ff0000000000000,4 +np.float64,0x1a8dc21a351b9,0x3ff0000000000000,4 +np.float64,0x7e87ef6afd0ff,0x3ff0000000000000,4 +np.float64,0x620a6588c414d,0x3ff0000000000000,4 +np.float64,0x7f366000fe6e,0x3ff0000000000000,4 +np.float64,0x787e39f4f0fc8,0x3ff0000000000000,4 +np.float64,0xf5134f1fea26a,0x3ff0000000000000,4 +np.float64,0xbce700ef79ce0,0x3ff0000000000000,4 +np.float64,0x144d7cc8289b1,0x3ff0000000000000,4 +np.float64,0xb9fbc5b973f79,0x3ff0000000000000,4 +np.float64,0xc3d6292d87ac5,0x3ff0000000000000,4 +np.float64,0xc1084e618210a,0x3ff0000000000000,4 +np.float64,0xb6b9eca56d73e,0x3ff0000000000000,4 +np.float64,0xc7ac4b858f58a,0x3ff0000000000000,4 +np.float64,0x516d75d2a2daf,0x3ff0000000000000,4 +np.float64,0x9dc089d93b811,0x3ff0000000000000,4 +np.float64,0x7b5f2840f6be6,0x3ff0000000000000,4 +np.float64,0x121d3ce8243a9,0x3ff0000000000000,4 +np.float64,0xf0be0337e17c1,0x3ff0000000000000,4 +np.float64,0xff58a5cbfeb15,0x3ff0000000000000,4 +np.float64,0xdaf1d07fb5e3a,0x3ff0000000000000,4 +np.float64,0x61d95382c3b2b,0x3ff0000000000000,4 +np.float64,0xe4df943fc9bf3,0x3ff0000000000000,4 +np.float64,0xf72ac2bdee559,0x3ff0000000000000,4 +np.float64,0x12dafbf625b60,0x3ff0000000000000,4 +np.float64,0xee11d427dc23b,0x3ff0000000000000,4 +np.float64,0xf4f8eb37e9f1e,0x3ff0000000000000,4 +np.float64,0xad7cb5df5af97,0x3ff0000000000000,4 +np.float64,0x59fc9b06b3f94,0x3ff0000000000000,4 +np.float64,0x3c3e65e4787ce,0x3ff0000000000000,4 +np.float64,0xe37bc993c6f79,0x3ff0000000000000,4 +np.float64,0x13bd6330277ad,0x3ff0000000000000,4 +np.float64,0x56cc2800ad986,0x3ff0000000000000,4 +np.float64,0x6203b8fcc4078,0x3ff0000000000000,4 +np.float64,0x75c7c8b8eb8fa,0x3ff0000000000000,4 +np.float64,0x5ebf8e00bd7f2,0x3ff0000000000000,4 +np.float64,0xda81f2f1b503f,0x3ff0000000000000,4 +np.float64,0x6adb17d6d5b64,0x3ff0000000000000,4 +np.float64,0x1ba68eee374d3,0x3ff0000000000000,4 +np.float64,0xeecf6fbbdd9ee,0x3ff0000000000000,4 +np.float64,0x24d6dd8e49add,0x3ff0000000000000,4 +np.float64,0xdf7cb81bbef97,0x3ff0000000000000,4 +np.float64,0xafd7be1b5faf8,0x3ff0000000000000,4 +np.float64,0xdb90ca35b721a,0x3ff0000000000000,4 +np.float64,0xa72903a14e521,0x3ff0000000000000,4 +np.float64,0x14533ee028a7,0x3ff0000000000000,4 +np.float64,0x7951540cf2a2b,0x3ff0000000000000,4 +np.float64,0x22882be045106,0x3ff0000000000000,4 +np.float64,0x136270d626c4f,0x3ff0000000000000,4 +np.float64,0x6a0f5744d41ec,0x3ff0000000000000,4 +np.float64,0x21e0d1aa43c1b,0x3ff0000000000000,4 +np.float64,0xee544155dca88,0x3ff0000000000000,4 +np.float64,0xcbe8aac797d16,0x3ff0000000000000,4 +np.float64,0x6c065e80d80e,0x3ff0000000000000,4 +np.float64,0xe57f0411cafe1,0x3ff0000000000000,4 +np.float64,0xdec3a6bdbd875,0x3ff0000000000000,4 +np.float64,0xf4d23a0fe9a48,0x3ff0000000000000,4 +np.float64,0xda77ef47b4efe,0x3ff0000000000000,4 +np.float64,0x8c405c9b1880c,0x3ff0000000000000,4 +np.float64,0x4eced5149d9db,0x3ff0000000000000,4 +np.float64,0x16b6552c2d6cc,0x3ff0000000000000,4 +np.float64,0x6fbc262cdf785,0x3ff0000000000000,4 +np.float64,0x628c3844c5188,0x3ff0000000000000,4 +np.float64,0x6d827d2cdb050,0x3ff0000000000000,4 +np.float64,0xd1bfdf29a37fc,0x3ff0000000000000,4 +np.float64,0xd85400fdb0a80,0x3ff0000000000000,4 +np.float64,0xcc420b2d98842,0x3ff0000000000000,4 +np.float64,0xac41d21b5883b,0x3ff0000000000000,4 +np.float64,0x432f18d4865e4,0x3ff0000000000000,4 +np.float64,0xe7e89a1bcfd14,0x3ff0000000000000,4 +np.float64,0x9b1141d536228,0x3ff0000000000000,4 +np.float64,0x6805f662d00bf,0x3ff0000000000000,4 +np.float64,0xc76552358ecab,0x3ff0000000000000,4 +np.float64,0x4ae8ffee95d21,0x3ff0000000000000,4 +np.float64,0x4396c096872d9,0x3ff0000000000000,4 +np.float64,0x6e8e55d4dd1cb,0x3ff0000000000000,4 +np.float64,0x4c2e33dc985c7,0x3ff0000000000000,4 +np.float64,0xbce814a579d03,0x3ff0000000000000,4 +np.float64,0x911681b5222d0,0x3ff0000000000000,4 +np.float64,0x5f90a4b2bf215,0x3ff0000000000000,4 +np.float64,0x26f76be84deee,0x3ff0000000000000,4 +np.float64,0xb2f7536165eeb,0x3ff0000000000000,4 +np.float64,0x4de4e6089bc9d,0x3ff0000000000000,4 +np.float64,0xf2e016afe5c03,0x3ff0000000000000,4 +np.float64,0xb9b7b949736f7,0x3ff0000000000000,4 +np.float64,0x3363ea1866c7e,0x3ff0000000000000,4 +np.float64,0xd1a3bd6ba3478,0x3ff0000000000000,4 +np.float64,0xae89f3595d13f,0x3ff0000000000000,4 +np.float64,0xddbd9601bb7c,0x3ff0000000000000,4 +np.float64,0x5de41a06bbc84,0x3ff0000000000000,4 +np.float64,0xfd58c86dfab19,0x3ff0000000000000,4 +np.float64,0x24922e8c49247,0x3ff0000000000000,4 +np.float64,0xcda040339b408,0x3ff0000000000000,4 +np.float64,0x5fe500b2bfca1,0x3ff0000000000000,4 +np.float64,0x9214abb924296,0x3ff0000000000000,4 +np.float64,0x800609fe0a2c13fd,0x3ff0000000000000,4 +np.float64,0x800c7c6fe518f8e0,0x3ff0000000000000,4 +np.float64,0x800a1a9491b4352a,0x3ff0000000000000,4 +np.float64,0x800b45e0e8968bc2,0x3ff0000000000000,4 +np.float64,0x8008497e57d092fd,0x3ff0000000000000,4 +np.float64,0x800b9c0af0173816,0x3ff0000000000000,4 +np.float64,0x800194cccb43299a,0x3ff0000000000000,4 +np.float64,0x8001c91ef183923f,0x3ff0000000000000,4 +np.float64,0x800f25b5ccde4b6c,0x3ff0000000000000,4 +np.float64,0x800ce63ccc79cc7a,0x3ff0000000000000,4 +np.float64,0x800d8fb2e83b1f66,0x3ff0000000000000,4 +np.float64,0x80083cd06f7079a1,0x3ff0000000000000,4 +np.float64,0x800823598e9046b3,0x3ff0000000000000,4 +np.float64,0x8001c1319de38264,0x3ff0000000000000,4 +np.float64,0x800f2b68543e56d1,0x3ff0000000000000,4 +np.float64,0x80022a4f4364549f,0x3ff0000000000000,4 +np.float64,0x800f51badf7ea376,0x3ff0000000000000,4 +np.float64,0x8003fbf31e27f7e7,0x3ff0000000000000,4 +np.float64,0x800d4c00e2fa9802,0x3ff0000000000000,4 +np.float64,0x800023b974804774,0x3ff0000000000000,4 +np.float64,0x800860778990c0ef,0x3ff0000000000000,4 +np.float64,0x800a15c241542b85,0x3ff0000000000000,4 +np.float64,0x8003097d9dc612fc,0x3ff0000000000000,4 +np.float64,0x800d77d8541aefb1,0x3ff0000000000000,4 +np.float64,0x80093804ab52700a,0x3ff0000000000000,4 +np.float64,0x800d2b3bfd7a5678,0x3ff0000000000000,4 +np.float64,0x800da24bcd5b4498,0x3ff0000000000000,4 +np.float64,0x8006eee1c28dddc4,0x3ff0000000000000,4 +np.float64,0x80005137fa40a271,0x3ff0000000000000,4 +np.float64,0x8007a3fbc22f47f8,0x3ff0000000000000,4 +np.float64,0x800dcd97071b9b2e,0x3ff0000000000000,4 +np.float64,0x80065b36048cb66d,0x3ff0000000000000,4 +np.float64,0x8004206ba72840d8,0x3ff0000000000000,4 +np.float64,0x8007e82b98cfd058,0x3ff0000000000000,4 +np.float64,0x8001a116ed23422f,0x3ff0000000000000,4 +np.float64,0x800c69e9ff18d3d4,0x3ff0000000000000,4 +np.float64,0x8003843688e7086e,0x3ff0000000000000,4 +np.float64,0x800335e3b8866bc8,0x3ff0000000000000,4 +np.float64,0x800e3308f0bc6612,0x3ff0000000000000,4 +np.float64,0x8002a9ec55c553d9,0x3ff0000000000000,4 +np.float64,0x80001c2084e03842,0x3ff0000000000000,4 +np.float64,0x800bc2bbd8d78578,0x3ff0000000000000,4 +np.float64,0x800ae6bcc555cd7a,0x3ff0000000000000,4 +np.float64,0x80083f7a13907ef5,0x3ff0000000000000,4 +np.float64,0x800d83ed76db07db,0x3ff0000000000000,4 +np.float64,0x800a12251974244b,0x3ff0000000000000,4 +np.float64,0x800a69c95714d393,0x3ff0000000000000,4 +np.float64,0x800cd5a85639ab51,0x3ff0000000000000,4 +np.float64,0x800e0e1837bc1c31,0x3ff0000000000000,4 +np.float64,0x8007b5ca39ef6b95,0x3ff0000000000000,4 +np.float64,0x800cf961cad9f2c4,0x3ff0000000000000,4 +np.float64,0x80066e8fc14cdd20,0x3ff0000000000000,4 +np.float64,0x8001cb8c7b43971a,0x3ff0000000000000,4 +np.float64,0x800002df68a005c0,0x3ff0000000000000,4 +np.float64,0x8003e6681567ccd1,0x3ff0000000000000,4 +np.float64,0x800b039126b60723,0x3ff0000000000000,4 +np.float64,0x800d2e1b663a5c37,0x3ff0000000000000,4 +np.float64,0x800188b3e2a31169,0x3ff0000000000000,4 +np.float64,0x8001f272e943e4e7,0x3ff0000000000000,4 +np.float64,0x800d7f53607afea7,0x3ff0000000000000,4 +np.float64,0x80092cafa4f25960,0x3ff0000000000000,4 +np.float64,0x800fc009f07f8014,0x3ff0000000000000,4 +np.float64,0x8003da896507b514,0x3ff0000000000000,4 +np.float64,0x800d4d1b4c3a9a37,0x3ff0000000000000,4 +np.float64,0x8007a835894f506c,0x3ff0000000000000,4 +np.float64,0x80057ba0522af741,0x3ff0000000000000,4 +np.float64,0x8009b7054b336e0b,0x3ff0000000000000,4 +np.float64,0x800b2c6c125658d9,0x3ff0000000000000,4 +np.float64,0x8008b1840ad16308,0x3ff0000000000000,4 +np.float64,0x8007ea0e3befd41d,0x3ff0000000000000,4 +np.float64,0x800dd658683bacb1,0x3ff0000000000000,4 +np.float64,0x8008cda48fd19b49,0x3ff0000000000000,4 +np.float64,0x8003acca14c75995,0x3ff0000000000000,4 +np.float64,0x8008bd152d717a2b,0x3ff0000000000000,4 +np.float64,0x80010d1ea3621a3e,0x3ff0000000000000,4 +np.float64,0x800130b78b826170,0x3ff0000000000000,4 +np.float64,0x8002cf3a46e59e75,0x3ff0000000000000,4 +np.float64,0x800b76e7fa76edd0,0x3ff0000000000000,4 +np.float64,0x800e065fe1dc0cc0,0x3ff0000000000000,4 +np.float64,0x8000dd527ea1baa6,0x3ff0000000000000,4 +np.float64,0x80032cb234665965,0x3ff0000000000000,4 +np.float64,0x800affc1acb5ff84,0x3ff0000000000000,4 +np.float64,0x80074be23fee97c5,0x3ff0000000000000,4 +np.float64,0x8004f83eafc9f07e,0x3ff0000000000000,4 +np.float64,0x800b02a115560543,0x3ff0000000000000,4 +np.float64,0x800b324a55766495,0x3ff0000000000000,4 +np.float64,0x800ffbcfd69ff7a0,0x3ff0000000000000,4 +np.float64,0x800830bc7b906179,0x3ff0000000000000,4 +np.float64,0x800cbafe383975fd,0x3ff0000000000000,4 +np.float64,0x8001ee42bfe3dc86,0x3ff0000000000000,4 +np.float64,0x8005b00fdc0b6020,0x3ff0000000000000,4 +np.float64,0x8005e7addd0bcf5c,0x3ff0000000000000,4 +np.float64,0x8001ae4cb0635c9a,0x3ff0000000000000,4 +np.float64,0x80098a9941131533,0x3ff0000000000000,4 +np.float64,0x800334c929466993,0x3ff0000000000000,4 +np.float64,0x8009568239d2ad05,0x3ff0000000000000,4 +np.float64,0x800f0639935e0c73,0x3ff0000000000000,4 +np.float64,0x800cebce7499d79d,0x3ff0000000000000,4 +np.float64,0x800482ee4c2905dd,0x3ff0000000000000,4 +np.float64,0x8007b7bd9e2f6f7c,0x3ff0000000000000,4 +np.float64,0x3fe654469f2ca88d,0x3fe8853f6c01ffb3,4 +np.float64,0x3feb4d7297369ae5,0x3fe50ad5bb621408,4 +np.float64,0x3feef53ba43dea77,0x3fe2283f356f8658,4 +np.float64,0x3fddf564eabbeaca,0x3fec8ec0e0dead9c,4 +np.float64,0x3fd3a69078274d21,0x3fee80e05c320000,4 +np.float64,0x3fecdafe5d39b5fd,0x3fe3d91a5d440fd9,4 +np.float64,0x3fd93286bc32650d,0x3fed8d40696cd10e,4 +np.float64,0x3fc0d34eb821a69d,0x3fefb954023d4284,4 +np.float64,0x3fc7b4b9a02f6973,0x3fef73e8739787ce,4 +np.float64,0x3fe08c839a611907,0x3febd0bc6f5641cd,4 +np.float64,0x3fb3d1758627a2eb,0x3fefe776f6183f96,4 +np.float64,0x3fef93c9ff3f2794,0x3fe1a4d2f622627d,4 +np.float64,0x3fea8d0041351a01,0x3fe59a52a1c78c9e,4 +np.float64,0x3fe3e26a30e7c4d4,0x3fea04ad3e0bbf8d,4 +np.float64,0x3fe5a34c9f6b4699,0x3fe8f57c5ccd1eab,4 +np.float64,0x3fc21ef859243df1,0x3fefae0b68a3a2e7,4 +np.float64,0x3fed7dd585fafbab,0x3fe35860041e5b0d,4 +np.float64,0x3fe5abacf22b575a,0x3fe8f03d8b6ef0f2,4 +np.float64,0x3fe426451f284c8a,0x3fe9dcf21f13205b,4 +np.float64,0x3fc01f6456203ec9,0x3fefbf19e2a8e522,4 +np.float64,0x3fe1cf2772239e4f,0x3feb2bbd645c7697,4 +np.float64,0x3fd18c4ace231896,0x3feecdfdd086c110,4 +np.float64,0x3fe8387d5b7070fb,0x3fe74358f2ec4910,4 +np.float64,0x3fdce51c2239ca38,0x3feccb2ae5459632,4 +np.float64,0x3fe5b0f2e4eb61e6,0x3fe8ecef4dbe4277,4 +np.float64,0x3fe1ceeb08a39dd6,0x3feb2bdd4dcfb3df,4 +np.float64,0x3febc5899d778b13,0x3fe4afc8dd8ad228,4 +np.float64,0x3fe7a47fbe2f48ff,0x3fe7a7fd9b352ea5,4 +np.float64,0x3fe7f74e1fafee9c,0x3fe76feb2755b247,4 +np.float64,0x3fe2bfad04e57f5a,0x3feaa9b46adddaeb,4 +np.float64,0x3fd06a090320d412,0x3feef40c334f8fba,4 +np.float64,0x3fdc97297d392e53,0x3fecdc16a3e22fcb,4 +np.float64,0x3fdc1a3f3838347e,0x3fecf6db2769d404,4 +np.float64,0x3fcca90096395201,0x3fef338156fcd218,4 +np.float64,0x3fed464733fa8c8e,0x3fe38483f0465d91,4 +np.float64,0x3fe7e067d82fc0d0,0x3fe77f7c8c9de896,4 +np.float64,0x3fc014fa0b2029f4,0x3fefbf6d84c933f8,4 +np.float64,0x3fd3bf1524277e2a,0x3fee7d2997b74dec,4 +np.float64,0x3fec153b86782a77,0x3fe472bb5497bb2a,4 +np.float64,0x3fd3e4d9d5a7c9b4,0x3fee776842691902,4 +np.float64,0x3fea6c0e2c74d81c,0x3fe5b2954cb458d9,4 +np.float64,0x3fee8f6a373d1ed4,0x3fe27bb9e348125b,4 +np.float64,0x3fd30c6dd42618dc,0x3fee97d2cab2b0bc,4 +np.float64,0x3fe4f90e6d69f21d,0x3fe95ea3dd4007f2,4 +np.float64,0x3fe271d467e4e3a9,0x3fead470d6d4008b,4 +np.float64,0x3fef2983897e5307,0x3fe1fd1a4debe33b,4 +np.float64,0x3fe980cc83b30199,0x3fe65d2fb8a0eb46,4 +np.float64,0x3fdfdf53db3fbea8,0x3fec1cf95b2a1cc7,4 +np.float64,0x3fe4d5307ba9aa61,0x3fe974701b4156cb,4 +np.float64,0x3fdb4e2345b69c47,0x3fed21aa6c146512,4 +np.float64,0x3fe3f7830327ef06,0x3fe9f85f6c88c2a8,4 +np.float64,0x3fca915fb63522bf,0x3fef502b73a52ecf,4 +np.float64,0x3fe66d3709ecda6e,0x3fe87531d7372d7a,4 +np.float64,0x3fd86000bcb0c001,0x3fedb5018dd684ca,4 +np.float64,0x3fe516e5feea2dcc,0x3fe94c68b111404e,4 +np.float64,0x3fd83c53dd3078a8,0x3fedbb9e5dd9e165,4 +np.float64,0x3fedfeeb673bfdd7,0x3fe2f0f0253c5d5d,4 +np.float64,0x3fe0dc6f9c21b8df,0x3feba8e2452410c2,4 +np.float64,0x3fbe154d643c2a9b,0x3fefc780a9357457,4 +np.float64,0x3fe5f63986abec73,0x3fe8c1434951a40a,4 +np.float64,0x3fbce0e50839c1ca,0x3fefcbeeaa27de75,4 +np.float64,0x3fd7ef5c5c2fdeb9,0x3fedc9c3022495b3,4 +np.float64,0x3fc1073914220e72,0x3fefb79de80fc0fd,4 +np.float64,0x3fe1a93c3d235278,0x3feb3fb21f86ac67,4 +np.float64,0x3fe321ee53e643dd,0x3fea72e2999f1e22,4 +np.float64,0x3fa881578c3102af,0x3feff69e6e51e0d6,4 +np.float64,0x3fd313482a262690,0x3fee96d161199495,4 +np.float64,0x3fe7272cd6ae4e5a,0x3fe7fbacbd0d8f43,4 +np.float64,0x3fd6cf4015ad9e80,0x3fedfd3513d544b8,4 +np.float64,0x3fc67b7e6d2cf6fd,0x3fef81f5c16923a4,4 +np.float64,0x3fa1999c14233338,0x3feffb2913a14184,4 +np.float64,0x3fc74eb8dd2e9d72,0x3fef78909a138e3c,4 +np.float64,0x3fc0b9274921724f,0x3fefba2ebd5f3e1c,4 +np.float64,0x3fd53fa156aa7f43,0x3fee40a18e952e88,4 +np.float64,0x3feaccbca4b59979,0x3fe56b22b33eb713,4 +np.float64,0x3fe6a01e3a2d403c,0x3fe8543fbd820ecc,4 +np.float64,0x3fd392a869a72551,0x3fee83e0ffe0e8de,4 +np.float64,0x3fe44d8928689b12,0x3fe9c5bf3c8fffdb,4 +np.float64,0x3fca3f209f347e41,0x3fef5461b6fa0924,4 +np.float64,0x3fee9e84b07d3d09,0x3fe26f638f733549,4 +np.float64,0x3faf49acb03e9359,0x3feff0b583cd8c48,4 +np.float64,0x3fea874b2af50e96,0x3fe59e882fa6febf,4 +np.float64,0x3fc50b72772a16e5,0x3fef918777dc41be,4 +np.float64,0x3fe861d1d4f0c3a4,0x3fe726e44d9d42c2,4 +np.float64,0x3fcadd2e2535ba5c,0x3fef4c3e2b56da38,4 +np.float64,0x3fea59c29cb4b385,0x3fe5c0043e586439,4 +np.float64,0x3fc1ffef0d23ffde,0x3fefaf22be452d13,4 +np.float64,0x3fc2d8dbc125b1b8,0x3fefa75b646d8e4e,4 +np.float64,0x3fd66c6471acd8c9,0x3fee0e5038b895c0,4 +np.float64,0x3fd0854adfa10a96,0x3feef0945bcc5c99,4 +np.float64,0x3feaac7076f558e1,0x3fe58316c23a82ad,4 +np.float64,0x3fdda49db3bb493b,0x3feca0e347c0ad6f,4 +np.float64,0x3fe43a539de874a7,0x3fe9d11d722d4822,4 +np.float64,0x3feeee3ebbfddc7d,0x3fe22dffd251e9af,4 +np.float64,0x3f8ee2c5b03dc58b,0x3fefff11855a7b6c,4 +np.float64,0x3fcd7107c63ae210,0x3fef2840bb55ca52,4 +np.float64,0x3f8d950d203b2a1a,0x3fefff253a08e40e,4 +np.float64,0x3fd40a5e57a814bd,0x3fee71a633c761fc,4 +np.float64,0x3fee836ec83d06de,0x3fe28580975be2fd,4 +np.float64,0x3fd7bbe87f2f77d1,0x3fedd31f661890cc,4 +np.float64,0xbfe05bf138a0b7e2,0x3febe8a000d96e47,4 +np.float64,0xbf88bddd90317bc0,0x3fefff66f6e2ff26,4 +np.float64,0xbfdc9cbb12393976,0x3fecdae2982335db,4 +np.float64,0xbfd85b4eccb0b69e,0x3fedb5e0dd87f702,4 +np.float64,0xbfe5c326cb2b864e,0x3fe8e180f525fa12,4 +np.float64,0xbfe381a0e4a70342,0x3fea3c8e5e3ab78e,4 +np.float64,0xbfe58d892c2b1b12,0x3fe9031551617aed,4 +np.float64,0xbfd7f3a52cafe74a,0x3fedc8fa97edd080,4 +np.float64,0xbfef3417bc7e682f,0x3fe1f45989f6a009,4 +np.float64,0xbfddfb8208bbf704,0x3fec8d5fa9970773,4 +np.float64,0xbfdab69bcc356d38,0x3fed40b2f6c347c6,4 +np.float64,0xbfed3f7cf17a7efa,0x3fe389e4ff4d9235,4 +np.float64,0xbfe47675d9a8ecec,0x3fe9ad6829a69e94,4 +np.float64,0xbfd030e2902061c6,0x3feefb3f811e024f,4 +np.float64,0xbfc376ac7226ed58,0x3fefa1798712b37e,4 +np.float64,0xbfdb7e54a0b6fcaa,0x3fed17a974c4bc28,4 +np.float64,0xbfdb7d5d5736faba,0x3fed17dcf31a8d84,4 +np.float64,0xbf876bd6502ed7c0,0x3fefff76dce6232c,4 +np.float64,0xbfd211e6c02423ce,0x3feebba41f0a1764,4 +np.float64,0xbfb443e3962887c8,0x3fefe658953629d4,4 +np.float64,0xbfe81b09e9b03614,0x3fe757882e4fdbae,4 +np.float64,0xbfdcb905d2b9720c,0x3fecd4c22cfe84e5,4 +np.float64,0xbfe3b62d99276c5b,0x3fea1e5520b3098d,4 +np.float64,0xbfbf05b25c3e0b68,0x3fefc3ecc04bca8e,4 +np.float64,0xbfdedc885b3db910,0x3fec59e22feb49f3,4 +np.float64,0xbfe33aa282667545,0x3fea64f2d55ec471,4 +np.float64,0xbfec84745a3908e9,0x3fe41cb3214e7044,4 +np.float64,0xbfddefdff1bbdfc0,0x3fec8fff88d4d0ec,4 +np.float64,0xbfd26ae6aca4d5ce,0x3feeaf208c7fedf6,4 +np.float64,0xbfee010591fc020b,0x3fe2ef3e57211a5e,4 +np.float64,0xbfb8cfddca319fb8,0x3fefd98d8f7918ed,4 +np.float64,0xbfe991648f3322c9,0x3fe6514e54670bae,4 +np.float64,0xbfee63fd087cc7fa,0x3fe29f1bfa3297cc,4 +np.float64,0xbfe1685942a2d0b2,0x3feb617f5f839eee,4 +np.float64,0xbfc6fc2fd62df860,0x3fef7c4698fd58cf,4 +np.float64,0xbfe42723d3a84e48,0x3fe9dc6ef7243e90,4 +np.float64,0xbfc3a7e89d274fd0,0x3fef9f99e3314e77,4 +np.float64,0xbfeb4c9521f6992a,0x3fe50b7c919bc6d8,4 +np.float64,0xbf707b34e020f680,0x3fefffef05e30264,4 +np.float64,0xbfc078478e20f090,0x3fefbc479305d5aa,4 +np.float64,0xbfd494ac4ca92958,0x3fee5c11f1cd8269,4 +np.float64,0xbfdaf888a035f112,0x3fed3346ae600469,4 +np.float64,0xbfa5d8ed502bb1e0,0x3feff88b0f262609,4 +np.float64,0xbfeec0cbfffd8198,0x3fe253543b2371cb,4 +np.float64,0xbfe594b5986b296b,0x3fe8fe9b39fb3940,4 +np.float64,0xbfc8ece7c631d9d0,0x3fef652bd0611ac7,4 +np.float64,0xbfd8ffeca0b1ffda,0x3fed96ebdf9b65cb,4 +np.float64,0xbfba9b221e353648,0x3fefd3cc21e2f15c,4 +np.float64,0xbfca63a52c34c74c,0x3fef52848eb9ed3b,4 +np.float64,0xbfe588e9b06b11d4,0x3fe905f7403e8881,4 +np.float64,0xbfc76f82db2edf04,0x3fef77138fe9bbc2,4 +np.float64,0xbfeeb3f334bd67e6,0x3fe25ddadb1096d6,4 +np.float64,0xbfbf2b64ce3e56c8,0x3fefc35a9555f6df,4 +np.float64,0xbfe9920e4ff3241c,0x3fe650d4ab8f5c42,4 +np.float64,0xbfb4a54c02294a98,0x3fefe55fc85ae5e9,4 +np.float64,0xbfe353b0c766a762,0x3fea56c02d17e4b7,4 +np.float64,0xbfd99961a4b332c4,0x3fed795fcd00dbf9,4 +np.float64,0xbfef191ddabe323c,0x3fe20aa79524f636,4 +np.float64,0xbfb25d060224ba10,0x3fefeaeee5cc8c0b,4 +np.float64,0xbfe6022428ec0448,0x3fe8b9b46e776194,4 +np.float64,0xbfed1a236cba3447,0x3fe3a76bee0d9861,4 +np.float64,0xbfc59671e72b2ce4,0x3fef8bc4daef6f14,4 +np.float64,0xbfdf2711703e4e22,0x3fec4886a8c9ceb5,4 +np.float64,0xbfeb7e207536fc41,0x3fe4e610c783f168,4 +np.float64,0xbfe6cdf5bcad9bec,0x3fe8365f8a59bc81,4 +np.float64,0xbfe55294adaaa52a,0x3fe927b0af5ccd09,4 +np.float64,0xbfdf4a88913e9512,0x3fec4036df58ba74,4 +np.float64,0xbfebb7efe4376fe0,0x3fe4ba276006992d,4 +np.float64,0xbfe09f29cfa13e54,0x3febc77f4f9c95e7,4 +np.float64,0xbfdf8c75653f18ea,0x3fec30ac924e4f46,4 +np.float64,0xbfefd601c7ffac04,0x3fe16d6f21bcb9c1,4 +np.float64,0xbfeae97ff5f5d300,0x3fe555bb5b87efe9,4 +np.float64,0xbfed427f02fa84fe,0x3fe387830db093bc,4 +np.float64,0xbfa33909cc267210,0x3feffa3a1bcb50dd,4 +np.float64,0xbfe9aa4bf5f35498,0x3fe63f6e98f6aa0f,4 +np.float64,0xbfe2d7349b25ae69,0x3fea9caa7c331e7e,4 +np.float64,0xbfcdbb2a3a3b7654,0x3fef2401c9659e4b,4 +np.float64,0xbfc8a90919315214,0x3fef686fe7fc0513,4 +np.float64,0xbfe62a98df2c5532,0x3fe89ff22a02cc6b,4 +np.float64,0xbfdc0f67b3b81ed0,0x3fecf928b637798f,4 +np.float64,0xbfebb32bf6f76658,0x3fe4bdc893c09698,4 +np.float64,0xbfec067996380cf3,0x3fe47e132741db97,4 +np.float64,0xbfd9774e1d32ee9c,0x3fed7ffe1e87c434,4 +np.float64,0xbfef989890bf3131,0x3fe1a0d025c80cf4,4 +np.float64,0xbfe59887e62b3110,0x3fe8fc382a3d4197,4 +np.float64,0xbfdea0a11e3d4142,0x3fec67b987e236ec,4 +np.float64,0xbfe2ec495825d892,0x3fea90efb231602d,4 +np.float64,0xbfb329c5c2265388,0x3fefe90f1b8209c3,4 +np.float64,0xbfdcd2dcd339a5ba,0x3feccf24c60b1478,4 +np.float64,0xbfe537ea18aa6fd4,0x3fe938237e217fe0,4 +np.float64,0xbfe8675ce170ceba,0x3fe723105925ce3a,4 +np.float64,0xbfd70723acae0e48,0x3fedf369ac070e65,4 +np.float64,0xbfea9d8692b53b0d,0x3fe58e1ee42e3fdb,4 +np.float64,0xbfcfeb96653fd72c,0x3fef029770033bdc,4 +np.float64,0xbfcc06c92d380d94,0x3fef3c69797d9b0a,4 +np.float64,0xbfe16b7c4f62d6f8,0x3feb5fdf9f0a9a07,4 +np.float64,0xbfed4d7a473a9af4,0x3fe37ecee27b1eb7,4 +np.float64,0xbfe6a6f6942d4ded,0x3fe84fccdf762b19,4 +np.float64,0xbfda46d867348db0,0x3fed572d928fa657,4 +np.float64,0xbfdbd9482db7b290,0x3fed049b5f907b52,4 +np.float64,0x7fe992ceb933259c,0xbfeb15af92aad70e,4 +np.float64,0x7fe3069204a60d23,0xbfe5eeff454240e9,4 +np.float64,0x7fe729dbf32e53b7,0xbfefe0528a330e4c,4 +np.float64,0x7fec504fb638a09e,0x3fd288e95dbedf65,4 +np.float64,0x7fe1d30167a3a602,0xbfeffc41f946fd02,4 +np.float64,0x7fed7f8ffd3aff1f,0x3fefe68ec604a19d,4 +np.float64,0x7fd2f23635a5e46b,0x3fea63032efbb447,4 +np.float64,0x7fd4c86db1a990da,0x3fdf6b9f7888db5d,4 +np.float64,0x7fe7554db6eeaa9a,0x3fe1b41476861bb0,4 +np.float64,0x7fe34e823ba69d03,0x3fefc435532e6294,4 +np.float64,0x7fec5c82fef8b905,0x3fef8f0c6473034f,4 +np.float64,0x7feba221bff74442,0xbfea95b81eb19b47,4 +np.float64,0x7fe74808a5ae9010,0xbfd3aa322917c3e5,4 +np.float64,0x7fdf41b7e0be836f,0x3fd14283c7147282,4 +np.float64,0x7fec09892f381311,0x3fe5240376ae484b,4 +np.float64,0x7faaf80bf435f017,0x3fe20227fa811423,4 +np.float64,0x7f8422d8402845b0,0x3fe911714593b8a0,4 +np.float64,0x7fd23a7fada474fe,0x3feff9f40aa37e9c,4 +np.float64,0x7fef4a4806fe948f,0x3fec6eca89cb4a62,4 +np.float64,0x7fe1e71cf763ce39,0xbfea6ac63f9ba457,4 +np.float64,0x7fe3e555be27caaa,0xbfe75b305d0dbbfd,4 +np.float64,0x7fcb8bac96371758,0xbfe8b126077f9d4c,4 +np.float64,0x7fc98e2c84331c58,0x3fef9092eb0bc85a,4 +np.float64,0x7fe947cf2b728f9d,0xbfebfff2c5b7d198,4 +np.float64,0x7feee8058c3dd00a,0xbfef21ebaae2eb17,4 +np.float64,0x7fef61d8d5bec3b1,0xbfdf1a032fb1c864,4 +np.float64,0x7fcf714b6f3ee296,0x3fe6fc89a8084098,4 +np.float64,0x7fa9a8b44c335168,0xbfeb16c149cea943,4 +np.float64,0x7fd175c482a2eb88,0xbfef64d341e73f88,4 +np.float64,0x7feab8e6a87571cc,0x3feb10069c397464,4 +np.float64,0x7fe3ade72de75bcd,0x3fd1753e333d5790,4 +np.float64,0x7fb26d87d224db0f,0xbfe753d36b18f4ca,4 +np.float64,0x7fdb7ef159b6fde2,0x3fe5c0a6044d3607,4 +np.float64,0x7fd5af86422b5f0c,0x3fe77193c95f6484,4 +np.float64,0x7fee9e00b07d3c00,0x3fe864d494596845,4 +np.float64,0x7fef927a147f24f3,0xbfe673b14715693d,4 +np.float64,0x7fd0aea63c215d4b,0xbfeff435f119fce9,4 +np.float64,0x7fd02e3796a05c6e,0x3fe4f7e3706e9a3d,4 +np.float64,0x7fd3ed61da27dac3,0xbfefef2f057f168c,4 +np.float64,0x7fefaca0d4ff5941,0x3fd3e8ad205cd4ab,4 +np.float64,0x7feb659e06f6cb3b,0x3fd64d803203e027,4 +np.float64,0x7fc94ccfaf32999e,0x3fee04922209369a,4 +np.float64,0x7feb4ec294f69d84,0xbfd102763a056c89,4 +np.float64,0x7fe2ada6ac655b4c,0x3fef4f6792aa6093,4 +np.float64,0x7fe5f40fdc2be81f,0xbfb4a6327186eee8,4 +np.float64,0x7fe7584bc3eeb097,0xbfd685b8ff94651d,4 +np.float64,0x7fe45d276be8ba4e,0x3fee53b13f7e442f,4 +np.float64,0x7fe6449b3d6c8935,0xbfe7e08bafa75251,4 +np.float64,0x7f8d62e6b03ac5cc,0x3fe73d30762f38fd,4 +np.float64,0x7fe3a76f72a74ede,0xbfeb48a28bc60968,4 +np.float64,0x7fd057706920aee0,0x3fdece8fa06f626c,4 +np.float64,0x7fe45ae158e8b5c2,0x3fe7a70f47b4d349,4 +np.float64,0x7fea8a5a983514b4,0x3fefb053d5f9ddd7,4 +np.float64,0x7fdd1e86ab3a3d0c,0x3fe3cded1b93816b,4 +np.float64,0x7fdb456108b68ac1,0xbfe37574c0b9bf8f,4 +np.float64,0x7fe972602432e4bf,0x3fef9a26e65ec01c,4 +np.float64,0x7fdbe2385637c470,0x3fed541df57969e1,4 +np.float64,0x7fe57f03602afe06,0x3fbd90f595cbbd94,4 +np.float64,0x7feb0ceb68f619d6,0xbfeae9cb8ee5261f,4 +np.float64,0x7fe6abfe6c6d57fc,0xbfef40a6edaca26f,4 +np.float64,0x7fe037ea08606fd3,0xbfda817d75858597,4 +np.float64,0x7fdd75a52dbaeb49,0x3feef2a0d91d6aa1,4 +np.float64,0x7fe8f9af66b1f35e,0xbfedfceef2a3bfc9,4 +np.float64,0x7fedf762b53beec4,0x3fd8b4f21ef69ee3,4 +np.float64,0x7fe99295b7f3252a,0x3feffc24d970383e,4 +np.float64,0x7fe797b0172f2f5f,0x3fee089aa56f7ce8,4 +np.float64,0x7fed89dcc97b13b9,0xbfcfa2bb0c3ea41f,4 +np.float64,0x7fae9e8d5c3d3d1a,0xbfe512ffe16c6b08,4 +np.float64,0x7fefaecbe27f5d97,0x3fbfc718a5e972f1,4 +np.float64,0x7fce0236d93c046d,0xbfa9b7cd790db256,4 +np.float64,0x7fa9689aac32d134,0x3feced501946628a,4 +np.float64,0x7feb1469e93628d3,0x3fef2a988e7673ed,4 +np.float64,0x7fdba78344b74f06,0xbfe092e78965b30c,4 +np.float64,0x7fece54c3fb9ca97,0x3fd3cfd184bed2e6,4 +np.float64,0x7fdb84212b370841,0xbfe25ebf2db6ee55,4 +np.float64,0x7fbe3e8bf23c7d17,0x3fe2ee72df573345,4 +np.float64,0x7fe43d9803687b2f,0xbfed2eff6a9e66a0,4 +np.float64,0x7fb0f9c00a21f37f,0x3feff70f3276fdb7,4 +np.float64,0x7fea0c6cbbb418d8,0xbfefa612494798b2,4 +np.float64,0x7fe4b3239e296646,0xbfe74dd959af8cdc,4 +np.float64,0x7fe5c6a773eb8d4e,0xbfd06944048f8d2b,4 +np.float64,0x7fb1c1278223824e,0xbfeb533a34655bde,4 +np.float64,0x7fd21c09ee243813,0xbfe921ccbc9255c3,4 +np.float64,0x7fe051020c20a203,0x3fbd519d700c1f2f,4 +np.float64,0x7fe0c76845e18ed0,0x3fefb9595191a31b,4 +np.float64,0x7fe6b0b57b6d616a,0xbf8c59a8ba5fcd9a,4 +np.float64,0x7fd386c460270d88,0x3fe8ffea5d1a5c46,4 +np.float64,0x7feeb884713d7108,0x3fee9b2247ef6c0d,4 +np.float64,0x7fd85f71b6b0bee2,0xbfefc30ec3e28f07,4 +np.float64,0x7fc341366426826c,0x3fd4234d35386d3b,4 +np.float64,0x7fe56482dd6ac905,0x3fe7189de6a50668,4 +np.float64,0x7fec67a2e3f8cf45,0xbfef86d0b940f37f,4 +np.float64,0x7fe38b202fe7163f,0x3feb90b75caa2030,4 +np.float64,0x7fdcbc64883978c8,0x3fed4f758fbf64d4,4 +np.float64,0x7fea5f0598f4be0a,0x3fdd503a417b3d4d,4 +np.float64,0x7fda3b6bcf3476d7,0x3fea6e9af3f7f9f5,4 +np.float64,0x7fc7d7896c2faf12,0x3fda2bebc36a2363,4 +np.float64,0x7fe7e8e2626fd1c4,0xbfe7d5e390c4cc3f,4 +np.float64,0x7fde0f3d7abc1e7a,0xbfede7a0ecfa3606,4 +np.float64,0x7fc692b8f52d2571,0x3feff0cd7ab6f61b,4 +np.float64,0xff92d1fce825a400,0xbfc921c36fc014fa,4 +np.float64,0xffdec3af2fbd875e,0xbfed6a77e6a0364e,4 +np.float64,0xffef46e7d9be8dcf,0xbfed7d39476f7e27,4 +np.float64,0xffe2c2ce4525859c,0x3fe1757261316bc9,4 +np.float64,0xffe27c8b5864f916,0xbfefe017c0d43457,4 +np.float64,0xffe184d7442309ae,0x3fa1fb8c49dba596,4 +np.float64,0xffddf5f98d3bebf4,0x3fee4f8eaa5f847e,4 +np.float64,0xffee3ef354fc7de6,0xbfebfd60fa51b2ba,4 +np.float64,0xffdecb3e85bd967e,0x3fbfad2667a8b468,4 +np.float64,0xffe4ee900b29dd20,0xbfdc02dc626f91cd,4 +np.float64,0xffd3179f6da62f3e,0xbfe2cfe442511776,4 +np.float64,0xffe99ef7cef33def,0x3f50994542a7f303,4 +np.float64,0xffe2b66b1ae56cd6,0xbfefe3e066eb6329,4 +np.float64,0xff8f72aff03ee540,0x3fe9c46224cf5003,4 +np.float64,0xffd29beb85a537d8,0x3fefcb0b6166be71,4 +np.float64,0xffaef02d4c3de060,0xbfef5fb71028fc72,4 +np.float64,0xffd39a2a89273456,0x3fe6d4b183205dca,4 +np.float64,0xffef8a9392ff1526,0x3fedb99fbf402468,4 +np.float64,0xffb9b3f31e3367e8,0x3fee1005270fcf80,4 +np.float64,0xffed9d5c693b3ab8,0x3fd110f4b02365d5,4 +np.float64,0xffeaba45f9f5748b,0x3fe499e0a6f4afb2,4 +np.float64,0xffdba3f70d3747ee,0xbfca0c30493ae519,4 +np.float64,0xffa35b985426b730,0xbfdb625df56bcf45,4 +np.float64,0xffccbc9728397930,0x3fc53cbc59020704,4 +np.float64,0xffef73c942bee792,0xbfdc647a7a5e08be,4 +np.float64,0xffcb5acfb236b5a0,0x3feeb4ec038c39fc,4 +np.float64,0xffea116fe2b422df,0x3fefe03b6ae0b435,4 +np.float64,0xffe97de6e7b2fbcd,0xbfd2025698fab9eb,4 +np.float64,0xffdddba314bbb746,0x3fd31f0fdb8f93be,4 +np.float64,0xffd613a24a2c2744,0xbfebbb1efae884b3,4 +np.float64,0xffe3d938aa67b271,0xbfc2099cead3d3be,4 +np.float64,0xffdf08c2e33e1186,0xbfefd236839b900d,4 +np.float64,0xffea6ba8bd34d751,0x3fe8dfc032114719,4 +np.float64,0xffe3202083e64040,0x3fed513b81432a22,4 +np.float64,0xffb2397db62472f8,0xbfee7d7fe1c3f76c,4 +np.float64,0xffd9d0682ab3a0d0,0x3fe0bcf9e531ad79,4 +np.float64,0xffc293df202527c0,0xbfe58d0bdece5e64,4 +np.float64,0xffe1422c7da28458,0xbf81bd72595f2341,4 +np.float64,0xffd64e4ed4ac9c9e,0x3fa4334cc011c703,4 +np.float64,0xffe40a970ae8152e,0x3fead3d258b55b7d,4 +np.float64,0xffc8c2f2223185e4,0xbfef685f07c8b9fd,4 +np.float64,0xffe4b2f7216965ee,0x3fe3861d3d896a83,4 +np.float64,0xffdb531db3b6a63c,0x3fe18cb8332dd59d,4 +np.float64,0xffe8e727a3b1ce4e,0xbfe57b15abb677b9,4 +np.float64,0xffe530c1e12a6184,0xbfb973ea5535e48f,4 +np.float64,0xffe6f7849cedef08,0x3fd39a37ec5af4b6,4 +np.float64,0xffead62a78b5ac54,0x3fe69b3f6c7aa24b,4 +np.float64,0xffeefdd725fdfbad,0xbfc08a456111fdd5,4 +np.float64,0xffe682182fed0430,0x3fecc7c1292761d2,4 +np.float64,0xffee0ca8dcbc1951,0x3fef6cc361ef2c19,4 +np.float64,0xffec9b338f393666,0x3fefa9ab8e0471b5,4 +np.float64,0xffe13c5e29a278bc,0xbfef8da74ad83398,4 +np.float64,0xffd7bd48c62f7a92,0x3fe3468cd4ac9d34,4 +np.float64,0xffedd0ed14bba1d9,0xbfd563a83477077b,4 +np.float64,0xffe86b83f3f0d707,0x3fe9eb3c658e4b2d,4 +np.float64,0xffd6a4db4bad49b6,0xbfc7e11276166e17,4 +np.float64,0xffc29e8404253d08,0x3fd35971961c789f,4 +np.float64,0xffe27cf3d664f9e7,0xbfeca0f73c72f810,4 +np.float64,0xffc34152352682a4,0x3fef384e564c002c,4 +np.float64,0xffe395728ba72ae4,0x3f8fe18c2de86eba,4 +np.float64,0xffed86c4fbbb0d89,0x3fef709db881c672,4 +np.float64,0xffe8a98d37f1531a,0x3fd4879c8f73c3dc,4 +np.float64,0xffb8ce9fea319d40,0xbfb853c8fe46b08d,4 +np.float64,0xffe7f26db8efe4db,0xbfec1cfd3e5c2ac1,4 +np.float64,0xffd7935b77af26b6,0x3fb7368c89b2a460,4 +np.float64,0xffc5840ed02b081c,0x3fd92220b56631f3,4 +np.float64,0xffc36a873926d510,0x3fa84d61baf61811,4 +np.float64,0xffe06ea583e0dd4a,0x3feb647e348b9e39,4 +np.float64,0xffe6a33031ed4660,0xbfe096b851dc1a0a,4 +np.float64,0xffe001c938e00392,0x3fe4eece77623e7a,4 +np.float64,0xffc1e4f23b23c9e4,0xbfdb9bb1f83f6ac4,4 +np.float64,0xffecd3ecbab9a7d9,0x3fbafb1f800f177d,4 +np.float64,0xffc2d3016825a604,0xbfef650e8b0d6afb,4 +np.float64,0xffe222cb68e44596,0x3fde3690e44de5bd,4 +np.float64,0xffe5bb145e2b7628,0x3fedbb98e23c9dc1,4 +np.float64,0xffe9e5823b73cb04,0xbfee41661016c03c,4 +np.float64,0xffd234a00ba46940,0x3fda0312cda580c2,4 +np.float64,0xffe0913ed6e1227d,0xbfed508bb529bd23,4 +np.float64,0xffe8e3596171c6b2,0xbfdc33e1c1d0310e,4 +np.float64,0xffef9c6835ff38cf,0x3fea8ce6d27dfba3,4 +np.float64,0xffdd3bcf66ba779e,0x3fe50523d2b6470e,4 +np.float64,0xffe57e8cf06afd1a,0xbfee600933347247,4 +np.float64,0xffe0d8c65fa1b18c,0x3fe75091f93d5e4c,4 +np.float64,0xffea7c8c16b4f918,0x3fee681724795198,4 +np.float64,0xffe34f7a05269ef4,0xbfe3c3e179676f13,4 +np.float64,0xffd28894a6a5112a,0xbfe5d1027aee615d,4 +np.float64,0xffc73be6f22e77cc,0x3fe469bbc08b472a,4 +np.float64,0xffe7f71b066fee36,0x3fe7ed136c8fdfaa,4 +np.float64,0xffebc13e29f7827c,0x3fefcdc6e677d314,4 +np.float64,0xffd53e9c942a7d3a,0x3fea5a02c7341749,4 +np.float64,0xffd7191b23ae3236,0x3fea419b66023443,4 +np.float64,0xffe9480325b29006,0xbfefeaff5fa38cd5,4 +np.float64,0xffba46dc0e348db8,0xbfefa54f4de28eba,4 +np.float64,0xffdd4cc31eba9986,0x3fe60bb41fe1c4da,4 +np.float64,0xffe13a70dea274e1,0xbfaa9192f7bd6c9b,4 +np.float64,0xffde25127bbc4a24,0x3f7c75f45e29be7d,4 +np.float64,0xffe4076543a80eca,0x3fea5aad50d2f687,4 +np.float64,0xffe61512acec2a25,0xbfefffeb67401649,4 +np.float64,0xffef812ec1ff025d,0xbfe919c7c073c766,4 +np.float64,0xffd5552aeaaaaa56,0x3fc89d38ab047396,4 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-cosh.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-cosh.csv new file mode 100644 index 0000000..c9e446c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-cosh.csv @@ -0,0 +1,1429 @@ +dtype,input,output,ulperrortol +np.float32,0xfe0ac238,0x7f800000,3 +np.float32,0xbf553b86,0x3faf079b,3 +np.float32,0xff4457da,0x7f800000,3 +np.float32,0xff7253f3,0x7f800000,3 +np.float32,0x5a5802,0x3f800000,3 +np.float32,0x3db03413,0x3f80795b,3 +np.float32,0x7f6795c9,0x7f800000,3 +np.float32,0x805b9142,0x3f800000,3 +np.float32,0xfeea581a,0x7f800000,3 +np.float32,0x3f7e2dba,0x3fc472f6,3 +np.float32,0x3d9c4d74,0x3f805f7a,3 +np.float32,0x7f18c665,0x7f800000,3 +np.float32,0x7f003e23,0x7f800000,3 +np.float32,0x3d936fa0,0x3f8054f3,3 +np.float32,0x3f32034f,0x3fa0368e,3 +np.float32,0xff087604,0x7f800000,3 +np.float32,0x380a5,0x3f800000,3 +np.float32,0x3f59694e,0x3fb10077,3 +np.float32,0x3e63e648,0x3f832ee4,3 +np.float32,0x80712f42,0x3f800000,3 +np.float32,0x3e169908,0x3f816302,3 +np.float32,0x3f2d766e,0x3f9e8692,3 +np.float32,0x3d6412e0,0x3f8032d0,3 +np.float32,0xbde689e8,0x3f80cfd4,3 +np.float32,0x483e2e,0x3f800000,3 +np.float32,0xff1ba2d0,0x7f800000,3 +np.float32,0x80136bff,0x3f800000,3 +np.float32,0x3f72534c,0x3fbdc1d4,3 +np.float32,0x3e9eb381,0x3f8632c6,3 +np.float32,0x3e142892,0x3f815795,3 +np.float32,0x0,0x3f800000,3 +np.float32,0x2f2528,0x3f800000,3 +np.float32,0x7f38be13,0x7f800000,3 +np.float32,0xfeee6896,0x7f800000,3 +np.float32,0x7f09095d,0x7f800000,3 +np.float32,0xbe94d,0x3f800000,3 +np.float32,0xbedcf8d4,0x3f8c1b74,3 +np.float32,0xbf694c02,0x3fb8ef07,3 +np.float32,0x3e2261f8,0x3f819cde,3 +np.float32,0xbf01d3ce,0x3f90d0e0,3 +np.float32,0xbeb7b3a2,0x3f8853de,3 +np.float32,0x8046de7b,0x3f800000,3 +np.float32,0xbcb45ea0,0x3f8007f1,3 +np.float32,0x3eef14af,0x3f8e35dd,3 +np.float32,0xbf047316,0x3f91846e,3 +np.float32,0x801cef45,0x3f800000,3 +np.float32,0x3e9ad891,0x3f85e609,3 +np.float32,0xff20e9cf,0x7f800000,3 +np.float32,0x80068434,0x3f800000,3 +np.float32,0xbe253020,0x3f81ab49,3 +np.float32,0x3f13f4b8,0x3f95fac9,3 +np.float32,0x804accd1,0x3f800000,3 +np.float32,0x3dee3e10,0x3f80ddf7,3 +np.float32,0xbe6c4690,0x3f836c29,3 +np.float32,0xff30d431,0x7f800000,3 +np.float32,0xbec82416,0x3f89e791,3 +np.float32,0x3f30bbcb,0x3f9fbbcc,3 +np.float32,0x3f5620a2,0x3faf72b8,3 +np.float32,0x807a8130,0x3f800000,3 +np.float32,0x3e3cb02d,0x3f822de0,3 +np.float32,0xff4839ac,0x7f800000,3 +np.float32,0x800a3e9c,0x3f800000,3 +np.float32,0x3dffd65b,0x3f810002,3 +np.float32,0xbf2b1492,0x3f9da987,3 +np.float32,0xbf21602c,0x3f9a48fe,3 +np.float32,0x512531,0x3f800000,3 +np.float32,0x24b99a,0x3f800000,3 +np.float32,0xbf53e345,0x3fae67b1,3 +np.float32,0xff2126ec,0x7f800000,3 +np.float32,0x7e79b49d,0x7f800000,3 +np.float32,0x3ea3cf04,0x3f869b6f,3 +np.float32,0x7f270059,0x7f800000,3 +np.float32,0x3f625b2f,0x3fb561e1,3 +np.float32,0xbf59947e,0x3fb11519,3 +np.float32,0xfe0d1c64,0x7f800000,3 +np.float32,0xbf3f3eae,0x3fa568e2,3 +np.float32,0x7c04d1,0x3f800000,3 +np.float32,0x7e66bd,0x3f800000,3 +np.float32,0x8011880d,0x3f800000,3 +np.float32,0x3f302f07,0x3f9f8759,3 +np.float32,0x4e3375,0x3f800000,3 +np.float32,0xfe67a134,0x7f800000,3 +np.float32,0xff670249,0x7f800000,3 +np.float32,0x7e19f27d,0x7f800000,3 +np.float32,0xbf36ce12,0x3fa20b81,3 +np.float32,0xbe6bcfc4,0x3f8368b5,3 +np.float32,0x76fcba,0x3f800000,3 +np.float32,0x7f30abaf,0x7f800000,3 +np.float32,0x3f4c1f6d,0x3faae43c,3 +np.float32,0x7f61f44a,0x7f800000,3 +np.float32,0xbf4bb3c9,0x3faab4af,3 +np.float32,0xbda15ee0,0x3f8065c6,3 +np.float32,0xfbb4e800,0x7f800000,3 +np.float32,0x7fa00000,0x7fe00000,3 +np.float32,0x80568501,0x3f800000,3 +np.float32,0xfeb285e4,0x7f800000,3 +np.float32,0x804423a7,0x3f800000,3 +np.float32,0x7e6c0f21,0x7f800000,3 +np.float32,0x7f136b3c,0x7f800000,3 +np.float32,0x3f2d08e6,0x3f9e5e9c,3 +np.float32,0xbf6b454e,0x3fb9f7e6,3 +np.float32,0x3e6bceb0,0x3f8368ad,3 +np.float32,0xff1ad16a,0x7f800000,3 +np.float32,0x7cce1a04,0x7f800000,3 +np.float32,0xff7bcf95,0x7f800000,3 +np.float32,0x8049788d,0x3f800000,3 +np.float32,0x7ec45918,0x7f800000,3 +np.float32,0xff7fffff,0x7f800000,3 +np.float32,0x8039a1a0,0x3f800000,3 +np.float32,0x7e90cd72,0x7f800000,3 +np.float32,0xbf7dfd53,0x3fc456cc,3 +np.float32,0x3eeeb664,0x3f8e2a76,3 +np.float32,0x8055ef9b,0x3f800000,3 +np.float32,0x7ee06ddd,0x7f800000,3 +np.float32,0xba2cc000,0x3f800002,3 +np.float32,0x806da632,0x3f800000,3 +np.float32,0x7ecfaaf5,0x7f800000,3 +np.float32,0x3ddd12e6,0x3f80bf19,3 +np.float32,0xbf754394,0x3fbf60b1,3 +np.float32,0x6f3f19,0x3f800000,3 +np.float32,0x800a9af0,0x3f800000,3 +np.float32,0xfeef13ea,0x7f800000,3 +np.float32,0x7f74841f,0x7f800000,3 +np.float32,0xbeb9a2f0,0x3f888181,3 +np.float32,0x77cbb,0x3f800000,3 +np.float32,0xbf587f84,0x3fb0911b,3 +np.float32,0x210ba5,0x3f800000,3 +np.float32,0x3ee60a28,0x3f8d2367,3 +np.float32,0xbe3731ac,0x3f820dc7,3 +np.float32,0xbee8cfee,0x3f8d765e,3 +np.float32,0x7b2ef179,0x7f800000,3 +np.float32,0xfe81377c,0x7f800000,3 +np.float32,0x6ac98c,0x3f800000,3 +np.float32,0x3f51f144,0x3fad8288,3 +np.float32,0x80785750,0x3f800000,3 +np.float32,0x3f46615a,0x3fa864ff,3 +np.float32,0xbf35ac9e,0x3fa19b8e,3 +np.float32,0x7f0982ac,0x7f800000,3 +np.float32,0x1b2610,0x3f800000,3 +np.float32,0x3ed8bb25,0x3f8ba3df,3 +np.float32,0xbeb41bac,0x3f88006d,3 +np.float32,0xff48e89d,0x7f800000,3 +np.float32,0x3ed0ab8c,0x3f8ac755,3 +np.float32,0xbe64671c,0x3f833282,3 +np.float32,0x64bce4,0x3f800000,3 +np.float32,0x284f79,0x3f800000,3 +np.float32,0x7e09faa7,0x7f800000,3 +np.float32,0x4376c1,0x3f800000,3 +np.float32,0x805ca8c0,0x3f800000,3 +np.float32,0xff0859d5,0x7f800000,3 +np.float32,0xbed2f3b2,0x3f8b04dd,3 +np.float32,0x8045bd0c,0x3f800000,3 +np.float32,0x3f0e6216,0x3f94503f,3 +np.float32,0x3f41e3ae,0x3fa68035,3 +np.float32,0x80088ccc,0x3f800000,3 +np.float32,0x3f37fc19,0x3fa2812f,3 +np.float32,0x71c87d,0x3f800000,3 +np.float32,0x8024f4b2,0x3f800000,3 +np.float32,0xff78dd88,0x7f800000,3 +np.float32,0xbda66c90,0x3f806c40,3 +np.float32,0x7f33ef0d,0x7f800000,3 +np.float32,0x46a343,0x3f800000,3 +np.float32,0xff1dce38,0x7f800000,3 +np.float32,0x1b935d,0x3f800000,3 +np.float32,0x3ebec598,0x3f88fd0e,3 +np.float32,0xff115530,0x7f800000,3 +np.float32,0x803916aa,0x3f800000,3 +np.float32,0xff60a3e2,0x7f800000,3 +np.float32,0x3b8ddd48,0x3f80004f,3 +np.float32,0x3f761b6e,0x3fbfd8ea,3 +np.float32,0xbdf55b88,0x3f80eb70,3 +np.float32,0x37374,0x3f800000,3 +np.float32,0x3de150e0,0x3f80c682,3 +np.float32,0x3f343278,0x3fa10a83,3 +np.float32,0xbe9baefa,0x3f85f68b,3 +np.float32,0x3d8d43,0x3f800000,3 +np.float32,0x3e80994b,0x3f840f0c,3 +np.float32,0xbe573c6c,0x3f82d685,3 +np.float32,0x805b83b4,0x3f800000,3 +np.float32,0x683d88,0x3f800000,3 +np.float32,0x692465,0x3f800000,3 +np.float32,0xbdc345f8,0x3f809511,3 +np.float32,0x3f7c1c5a,0x3fc3406f,3 +np.float32,0xbf40bef3,0x3fa606df,3 +np.float32,0xff1e25b9,0x7f800000,3 +np.float32,0x3e4481e0,0x3f825d37,3 +np.float32,0x75d188,0x3f800000,3 +np.float32,0x3ea53cec,0x3f86b956,3 +np.float32,0xff105a54,0x7f800000,3 +np.float32,0x7f800000,0x7f800000,3 +np.float32,0x7f11f0b0,0x7f800000,3 +np.float32,0xbf58a57d,0x3fb0a328,3 +np.float32,0xbdd11e38,0x3f80aaf8,3 +np.float32,0xbea94adc,0x3f870fa0,3 +np.float32,0x3e9dd780,0x3f862180,3 +np.float32,0xff1786b9,0x7f800000,3 +np.float32,0xfec46aa2,0x7f800000,3 +np.float32,0x7f4300c1,0x7f800000,3 +np.float32,0x29ba2b,0x3f800000,3 +np.float32,0x3f4112e2,0x3fa62993,3 +np.float32,0xbe6c9224,0x3f836e5d,3 +np.float32,0x7f0e42a3,0x7f800000,3 +np.float32,0xff6390ad,0x7f800000,3 +np.float32,0x3f54e374,0x3faede94,3 +np.float32,0x7f2642a2,0x7f800000,3 +np.float32,0x7f46b2be,0x7f800000,3 +np.float32,0xfe59095c,0x7f800000,3 +np.float32,0x7146a0,0x3f800000,3 +np.float32,0x3f07763d,0x3f925786,3 +np.float32,0x3d172780,0x3f801651,3 +np.float32,0xff66f1c5,0x7f800000,3 +np.float32,0xff025349,0x7f800000,3 +np.float32,0x6ce99d,0x3f800000,3 +np.float32,0xbf7e4f50,0x3fc48685,3 +np.float32,0xbeff8ca2,0x3f904708,3 +np.float32,0x3e6c8,0x3f800000,3 +np.float32,0x7f7153dc,0x7f800000,3 +np.float32,0xbedcf612,0x3f8c1b26,3 +np.float32,0xbbc2f180,0x3f800094,3 +np.float32,0xbf397399,0x3fa314b8,3 +np.float32,0x6c6e35,0x3f800000,3 +np.float32,0x7f50a88b,0x7f800000,3 +np.float32,0xfe84093e,0x7f800000,3 +np.float32,0x3f737b9d,0x3fbe6478,3 +np.float32,0x7f6a5340,0x7f800000,3 +np.float32,0xbde83c20,0x3f80d2e7,3 +np.float32,0xff769ce9,0x7f800000,3 +np.float32,0xfdd33c30,0x7f800000,3 +np.float32,0xbc95cb60,0x3f80057a,3 +np.float32,0x8007a40d,0x3f800000,3 +np.float32,0x3f55d90c,0x3faf5132,3 +np.float32,0x80282082,0x3f800000,3 +np.float32,0xbf43b1f2,0x3fa7418c,3 +np.float32,0x3f1dc7cb,0x3f991731,3 +np.float32,0xbd4346a0,0x3f80253f,3 +np.float32,0xbf5aa82a,0x3fb19946,3 +np.float32,0x3f4b8c22,0x3faaa333,3 +np.float32,0x3d13468c,0x3f80152f,3 +np.float32,0x7db77097,0x7f800000,3 +np.float32,0x4a00df,0x3f800000,3 +np.float32,0xbedea5e0,0x3f8c4b64,3 +np.float32,0x80482543,0x3f800000,3 +np.float32,0xbef344fe,0x3f8eb8dd,3 +np.float32,0x7ebd4044,0x7f800000,3 +np.float32,0xbf512c0e,0x3fad287e,3 +np.float32,0x3db28cce,0x3f807c9c,3 +np.float32,0xbd0f5ae0,0x3f801412,3 +np.float32,0xfe7ed9ac,0x7f800000,3 +np.float32,0x3eb1aa82,0x3f87c8b4,3 +np.float32,0xfef1679e,0x7f800000,3 +np.float32,0xff3629f2,0x7f800000,3 +np.float32,0xff3562b4,0x7f800000,3 +np.float32,0x3dcafe1d,0x3f80a118,3 +np.float32,0xfedf242a,0x7f800000,3 +np.float32,0xbf43102a,0x3fa6fda4,3 +np.float32,0x8028834e,0x3f800000,3 +np.float32,0x805c8513,0x3f800000,3 +np.float32,0x3f59306a,0x3fb0e550,3 +np.float32,0x3eda2c9c,0x3f8bcc4a,3 +np.float32,0x80023524,0x3f800000,3 +np.float32,0x7ef72879,0x7f800000,3 +np.float32,0x661c8a,0x3f800000,3 +np.float32,0xfec3ba6c,0x7f800000,3 +np.float32,0x805aaca6,0x3f800000,3 +np.float32,0xff5c1f13,0x7f800000,3 +np.float32,0x3f6ab3f4,0x3fb9ab6b,3 +np.float32,0x3f014896,0x3f90ac20,3 +np.float32,0x3f030584,0x3f91222a,3 +np.float32,0xbf74853d,0x3fbef71d,3 +np.float32,0xbf534ee0,0x3fae2323,3 +np.float32,0x2c90c3,0x3f800000,3 +np.float32,0x7f62ad25,0x7f800000,3 +np.float32,0x1c8847,0x3f800000,3 +np.float32,0x7e2a8d43,0x7f800000,3 +np.float32,0x807a09cd,0x3f800000,3 +np.float32,0x413871,0x3f800000,3 +np.float32,0x80063692,0x3f800000,3 +np.float32,0x3edaf29b,0x3f8be211,3 +np.float32,0xbf64a7ab,0x3fb68b2d,3 +np.float32,0xfe56a720,0x7f800000,3 +np.float32,0xbf54a8d4,0x3faec350,3 +np.float32,0x3ecbaef7,0x3f8a4350,3 +np.float32,0x3f413714,0x3fa63890,3 +np.float32,0x7d3aa8,0x3f800000,3 +np.float32,0xbea9a13c,0x3f8716e7,3 +np.float32,0x7ef7553e,0x7f800000,3 +np.float32,0x8056f29f,0x3f800000,3 +np.float32,0xff1f7ffe,0x7f800000,3 +np.float32,0x3f41953b,0x3fa65f9c,3 +np.float32,0x3daa2f,0x3f800000,3 +np.float32,0xff0893e4,0x7f800000,3 +np.float32,0xbefc7ec6,0x3f8fe207,3 +np.float32,0xbb026800,0x3f800011,3 +np.float32,0x341e4f,0x3f800000,3 +np.float32,0x3e7b708a,0x3f83e0d1,3 +np.float32,0xa18cb,0x3f800000,3 +np.float32,0x7e290239,0x7f800000,3 +np.float32,0xbf4254f2,0x3fa6af62,3 +np.float32,0x80000000,0x3f800000,3 +np.float32,0x3f0a6c,0x3f800000,3 +np.float32,0xbec44d28,0x3f898609,3 +np.float32,0xf841f,0x3f800000,3 +np.float32,0x7f01a693,0x7f800000,3 +np.float32,0x8053340b,0x3f800000,3 +np.float32,0xfd4e7990,0x7f800000,3 +np.float32,0xbf782f1f,0x3fc10356,3 +np.float32,0xbe962118,0x3f858acc,3 +np.float32,0xfe8cd702,0x7f800000,3 +np.float32,0x7ecd986f,0x7f800000,3 +np.float32,0x3ebe775f,0x3f88f59b,3 +np.float32,0x8065524f,0x3f800000,3 +np.float32,0x3ede7fc4,0x3f8c471e,3 +np.float32,0x7f5e15ea,0x7f800000,3 +np.float32,0xbe871ada,0x3f847b78,3 +np.float32,0x3f21958b,0x3f9a5af7,3 +np.float32,0x3f64d480,0x3fb6a1fa,3 +np.float32,0xff18b0e9,0x7f800000,3 +np.float32,0xbf0840dd,0x3f928fd9,3 +np.float32,0x80104f5d,0x3f800000,3 +np.float32,0x643b94,0x3f800000,3 +np.float32,0xbc560a80,0x3f8002cc,3 +np.float32,0x3f5c75d6,0x3fb2786e,3 +np.float32,0x7f365fc9,0x7f800000,3 +np.float32,0x54e965,0x3f800000,3 +np.float32,0x6dcd4d,0x3f800000,3 +np.float32,0x3f2057a0,0x3f99f04d,3 +np.float32,0x272fa3,0x3f800000,3 +np.float32,0xff423dc9,0x7f800000,3 +np.float32,0x80273463,0x3f800000,3 +np.float32,0xfe21cc78,0x7f800000,3 +np.float32,0x7fc00000,0x7fc00000,3 +np.float32,0x802feb65,0x3f800000,3 +np.float32,0x3dc733d0,0x3f809b21,3 +np.float32,0x65d56b,0x3f800000,3 +np.float32,0x80351d8e,0x3f800000,3 +np.float32,0xbf244247,0x3f9b43dd,3 +np.float32,0x7f328e7e,0x7f800000,3 +np.float32,0x7f4d9712,0x7f800000,3 +np.float32,0x2c505d,0x3f800000,3 +np.float32,0xbf232ebe,0x3f9ae5a0,3 +np.float32,0x804a363a,0x3f800000,3 +np.float32,0x80417102,0x3f800000,3 +np.float32,0xbf48b170,0x3fa963d4,3 +np.float32,0x7ea3e3b6,0x7f800000,3 +np.float32,0xbf41415b,0x3fa63cd2,3 +np.float32,0xfe3af7c8,0x7f800000,3 +np.float32,0x7f478010,0x7f800000,3 +np.float32,0x80143113,0x3f800000,3 +np.float32,0x3f7626a7,0x3fbfdf2e,3 +np.float32,0xfea20b0a,0x7f800000,3 +np.float32,0x80144d64,0x3f800000,3 +np.float32,0x7db9ba47,0x7f800000,3 +np.float32,0x7f7fffff,0x7f800000,3 +np.float32,0xbe410834,0x3f8247ef,3 +np.float32,0x14a7af,0x3f800000,3 +np.float32,0x7eaebf9e,0x7f800000,3 +np.float32,0xff800000,0x7f800000,3 +np.float32,0x3f0a7d8e,0x3f9330fd,3 +np.float32,0x3ef780,0x3f800000,3 +np.float32,0x3f62253e,0x3fb546d1,3 +np.float32,0x3f4cbeac,0x3fab2acc,3 +np.float32,0x25db1,0x3f800000,3 +np.float32,0x65c54a,0x3f800000,3 +np.float32,0x800f0645,0x3f800000,3 +np.float32,0x3ed28c78,0x3f8af9f0,3 +np.float32,0x8040c6ce,0x3f800000,3 +np.float32,0x5e4e9a,0x3f800000,3 +np.float32,0xbd3fd2b0,0x3f8023f1,3 +np.float32,0xbf5d2d3f,0x3fb2d1b6,3 +np.float32,0x7ead999f,0x7f800000,3 +np.float32,0xbf30dc86,0x3f9fc805,3 +np.float32,0xff2b0a62,0x7f800000,3 +np.float32,0x3d5180e9,0x3f802adf,3 +np.float32,0x3f62716f,0x3fb56d0d,3 +np.float32,0x7e82ae9c,0x7f800000,3 +np.float32,0xfe2d4bdc,0x7f800000,3 +np.float32,0x805cc7d4,0x3f800000,3 +np.float32,0xfb50f700,0x7f800000,3 +np.float32,0xff57b684,0x7f800000,3 +np.float32,0x80344f01,0x3f800000,3 +np.float32,0x7f2af372,0x7f800000,3 +np.float32,0xfeab6204,0x7f800000,3 +np.float32,0x30b251,0x3f800000,3 +np.float32,0x3eed8cc4,0x3f8e0698,3 +np.float32,0x7eeb1c6a,0x7f800000,3 +np.float32,0x3f17ece6,0x3f9735b0,3 +np.float32,0x21e985,0x3f800000,3 +np.float32,0x3f3a7df3,0x3fa37e34,3 +np.float32,0x802a14a2,0x3f800000,3 +np.float32,0x807d4d5b,0x3f800000,3 +np.float32,0x7f6093ce,0x7f800000,3 +np.float32,0x3f800000,0x3fc583ab,3 +np.float32,0x3da2c26e,0x3f806789,3 +np.float32,0xfe05f278,0x7f800000,3 +np.float32,0x800000,0x3f800000,3 +np.float32,0xbee63342,0x3f8d282e,3 +np.float32,0xbf225586,0x3f9a9bd4,3 +np.float32,0xbed60e86,0x3f8b59ba,3 +np.float32,0xbec99484,0x3f8a0ca3,3 +np.float32,0x3e967c71,0x3f859199,3 +np.float32,0x7f26ab62,0x7f800000,3 +np.float32,0xca7f4,0x3f800000,3 +np.float32,0xbf543790,0x3fae8ebc,3 +np.float32,0x3e4c1ed9,0x3f828d2d,3 +np.float32,0xbdf37f88,0x3f80e7e1,3 +np.float32,0xff0cc44e,0x7f800000,3 +np.float32,0x5dea48,0x3f800000,3 +np.float32,0x31023c,0x3f800000,3 +np.float32,0x3ea10733,0x3f866208,3 +np.float32,0x3e11e6f2,0x3f814d2e,3 +np.float32,0x80641960,0x3f800000,3 +np.float32,0x3ef779a8,0x3f8f3edb,3 +np.float32,0x3f2a5062,0x3f9d632a,3 +np.float32,0x2b7d34,0x3f800000,3 +np.float32,0x3eeb95c5,0x3f8dca67,3 +np.float32,0x805c1357,0x3f800000,3 +np.float32,0x3db3a79d,0x3f807e29,3 +np.float32,0xfded1900,0x7f800000,3 +np.float32,0x45f362,0x3f800000,3 +np.float32,0x451f38,0x3f800000,3 +np.float32,0x801d3ae5,0x3f800000,3 +np.float32,0x458d45,0x3f800000,3 +np.float32,0xfda9d298,0x7f800000,3 +np.float32,0x467439,0x3f800000,3 +np.float32,0x7f66554a,0x7f800000,3 +np.float32,0xfef2375a,0x7f800000,3 +np.float32,0xbf33fc47,0x3fa0f5d7,3 +np.float32,0x3f75ba69,0x3fbfa2d0,3 +np.float32,0xfeb625b2,0x7f800000,3 +np.float32,0x8066b371,0x3f800000,3 +np.float32,0x3f5cb4e9,0x3fb29718,3 +np.float32,0x7f3b6a58,0x7f800000,3 +np.float32,0x7f6b35ea,0x7f800000,3 +np.float32,0xbf6ee555,0x3fbbe5be,3 +np.float32,0x3d836e21,0x3f804380,3 +np.float32,0xff43cd0c,0x7f800000,3 +np.float32,0xff55c1fa,0x7f800000,3 +np.float32,0xbf0dfccc,0x3f9432a6,3 +np.float32,0x3ed92121,0x3f8baf00,3 +np.float32,0x80068cc1,0x3f800000,3 +np.float32,0xff0103f9,0x7f800000,3 +np.float32,0x7e51b175,0x7f800000,3 +np.float32,0x8012f214,0x3f800000,3 +np.float32,0x62d298,0x3f800000,3 +np.float32,0xbf3e1525,0x3fa4ef8d,3 +np.float32,0x806b4882,0x3f800000,3 +np.float32,0xbf38c146,0x3fa2ce7c,3 +np.float32,0xbed59c30,0x3f8b4d70,3 +np.float32,0x3d1910c0,0x3f8016e2,3 +np.float32,0x7f33d55b,0x7f800000,3 +np.float32,0x7f5800e3,0x7f800000,3 +np.float32,0x5b2c5d,0x3f800000,3 +np.float32,0x807be750,0x3f800000,3 +np.float32,0x7eb297c1,0x7f800000,3 +np.float32,0x7dafee62,0x7f800000,3 +np.float32,0x7d9e23f0,0x7f800000,3 +np.float32,0x3e580537,0x3f82dbd8,3 +np.float32,0xbf800000,0x3fc583ab,3 +np.float32,0x7f40f880,0x7f800000,3 +np.float32,0x775ad3,0x3f800000,3 +np.float32,0xbedacd36,0x3f8bddf3,3 +np.float32,0x2138f6,0x3f800000,3 +np.float32,0x52c3b7,0x3f800000,3 +np.float32,0x8041cfdd,0x3f800000,3 +np.float32,0x7bf16791,0x7f800000,3 +np.float32,0xbe95869c,0x3f857f55,3 +np.float32,0xbf199796,0x3f97bcaf,3 +np.float32,0x3ef8da38,0x3f8f6b45,3 +np.float32,0x803f3648,0x3f800000,3 +np.float32,0x80026fd2,0x3f800000,3 +np.float32,0x7eb3ac26,0x7f800000,3 +np.float32,0x3e49921b,0x3f827ce8,3 +np.float32,0xbf689aed,0x3fb892de,3 +np.float32,0x3f253509,0x3f9b9779,3 +np.float32,0xff17894a,0x7f800000,3 +np.float32,0x3cd12639,0x3f800aae,3 +np.float32,0x1db14b,0x3f800000,3 +np.float32,0x39a0bf,0x3f800000,3 +np.float32,0xfdfe1d08,0x7f800000,3 +np.float32,0xff416cd2,0x7f800000,3 +np.float32,0x8070d818,0x3f800000,3 +np.float32,0x3e516e12,0x3f82afb8,3 +np.float32,0x80536651,0x3f800000,3 +np.float32,0xbf2903d2,0x3f9cecb7,3 +np.float32,0x3e896ae4,0x3f84a353,3 +np.float32,0xbd6ba2c0,0x3f80363d,3 +np.float32,0x80126d3e,0x3f800000,3 +np.float32,0xfd9d43d0,0x7f800000,3 +np.float32,0x7b56b6,0x3f800000,3 +np.float32,0xff04718e,0x7f800000,3 +np.float32,0x31440f,0x3f800000,3 +np.float32,0xbf7a1313,0x3fc215c9,3 +np.float32,0x7f43d6a0,0x7f800000,3 +np.float32,0x3f566503,0x3faf92cc,3 +np.float32,0xbf39eb0e,0x3fa343f1,3 +np.float32,0xbe35fd70,0x3f8206df,3 +np.float32,0x800c36ac,0x3f800000,3 +np.float32,0x60d061,0x3f800000,3 +np.float32,0x80453e12,0x3f800000,3 +np.float32,0xfe17c36c,0x7f800000,3 +np.float32,0x3d8c72,0x3f800000,3 +np.float32,0xfe8e9134,0x7f800000,3 +np.float32,0xff5d89de,0x7f800000,3 +np.float32,0x7f45020e,0x7f800000,3 +np.float32,0x3f28225e,0x3f9c9d01,3 +np.float32,0xbf3b6900,0x3fa3dbdd,3 +np.float32,0x80349023,0x3f800000,3 +np.float32,0xbf14d780,0x3f964042,3 +np.float32,0x3f56b5d2,0x3fafb8c3,3 +np.float32,0x800c639c,0x3f800000,3 +np.float32,0x7f7a19c8,0x7f800000,3 +np.float32,0xbf7a0815,0x3fc20f86,3 +np.float32,0xbec55926,0x3f89a06e,3 +np.float32,0x4b2cd2,0x3f800000,3 +np.float32,0xbf271eb2,0x3f9c41c8,3 +np.float32,0xff26e168,0x7f800000,3 +np.float32,0x800166b2,0x3f800000,3 +np.float32,0xbde97e38,0x3f80d532,3 +np.float32,0xbf1f93ec,0x3f99af1a,3 +np.float32,0x7f2896ed,0x7f800000,3 +np.float32,0x3da7d96d,0x3f806e1d,3 +np.float32,0x802b7237,0x3f800000,3 +np.float32,0xfdca6bc0,0x7f800000,3 +np.float32,0xbed2e300,0x3f8b0318,3 +np.float32,0x8079d9e8,0x3f800000,3 +np.float32,0x3f388c81,0x3fa2b9c2,3 +np.float32,0x3ed2607c,0x3f8af54a,3 +np.float32,0xff287de6,0x7f800000,3 +np.float32,0x3f55ed89,0x3faf5ac9,3 +np.float32,0x7f5b6af7,0x7f800000,3 +np.float32,0xbeb24730,0x3f87d698,3 +np.float32,0x1,0x3f800000,3 +np.float32,0x3f3a2350,0x3fa35a3b,3 +np.float32,0x8013b422,0x3f800000,3 +np.float32,0x3e9a6560,0x3f85dd35,3 +np.float32,0x80510631,0x3f800000,3 +np.float32,0xfeae39d6,0x7f800000,3 +np.float32,0x7eb437ad,0x7f800000,3 +np.float32,0x8047545b,0x3f800000,3 +np.float32,0x806a1c71,0x3f800000,3 +np.float32,0xbe5543f0,0x3f82c93b,3 +np.float32,0x40e8d,0x3f800000,3 +np.float32,0x63d18b,0x3f800000,3 +np.float32,0x1fa1ea,0x3f800000,3 +np.float32,0x801944e0,0x3f800000,3 +np.float32,0xbf4c7ac6,0x3fab0cae,3 +np.float32,0x7f2679d4,0x7f800000,3 +np.float32,0x3f0102fc,0x3f9099d0,3 +np.float32,0x7e44bdc1,0x7f800000,3 +np.float32,0xbf2072f6,0x3f99f970,3 +np.float32,0x5c7d38,0x3f800000,3 +np.float32,0x30a2e6,0x3f800000,3 +np.float32,0x805b9ca3,0x3f800000,3 +np.float32,0x7cc24ad5,0x7f800000,3 +np.float32,0x3f4f7920,0x3fac6357,3 +np.float32,0x111d62,0x3f800000,3 +np.float32,0xbf4de40a,0x3fabad77,3 +np.float32,0x805d0354,0x3f800000,3 +np.float32,0xbb3d2b00,0x3f800023,3 +np.float32,0x3ef229e7,0x3f8e960b,3 +np.float32,0x3f15754e,0x3f9670e0,3 +np.float32,0xbf689c6b,0x3fb893a5,3 +np.float32,0xbf3796c6,0x3fa2599b,3 +np.float32,0xbe95303c,0x3f8578f2,3 +np.float32,0xfee330de,0x7f800000,3 +np.float32,0xff0d9705,0x7f800000,3 +np.float32,0xbeb0ebd0,0x3f87b7dd,3 +np.float32,0xbf4d5a13,0x3fab6fe7,3 +np.float32,0x80142f5a,0x3f800000,3 +np.float32,0x7e01a87b,0x7f800000,3 +np.float32,0xbe45e5ec,0x3f8265d7,3 +np.float32,0x7f4ac255,0x7f800000,3 +np.float32,0x3ebf6a60,0x3f890ccb,3 +np.float32,0x7f771e16,0x7f800000,3 +np.float32,0x3f41834e,0x3fa6582b,3 +np.float32,0x3f7f6f98,0x3fc52ef0,3 +np.float32,0x7e4ad775,0x7f800000,3 +np.float32,0x3eb39991,0x3f87f4c4,3 +np.float32,0x1e3f4,0x3f800000,3 +np.float32,0x7e84ba19,0x7f800000,3 +np.float32,0x80640be4,0x3f800000,3 +np.float32,0x3f459fc8,0x3fa81272,3 +np.float32,0x3f554ed0,0x3faf109b,3 +np.float32,0x3c6617,0x3f800000,3 +np.float32,0x7f441158,0x7f800000,3 +np.float32,0x7f66e6d8,0x7f800000,3 +np.float32,0x7f565152,0x7f800000,3 +np.float32,0x7f16d550,0x7f800000,3 +np.float32,0xbd4f1950,0x3f8029e5,3 +np.float32,0xcf722,0x3f800000,3 +np.float32,0x3f37d6fd,0x3fa272ad,3 +np.float32,0xff7324ea,0x7f800000,3 +np.float32,0x804bc246,0x3f800000,3 +np.float32,0x7f099ef8,0x7f800000,3 +np.float32,0x5f838b,0x3f800000,3 +np.float32,0x80523534,0x3f800000,3 +np.float32,0x3f595e84,0x3fb0fb50,3 +np.float32,0xfdef8ac8,0x7f800000,3 +np.float32,0x3d9a07,0x3f800000,3 +np.float32,0x410f61,0x3f800000,3 +np.float32,0xbf715dbb,0x3fbd3bcb,3 +np.float32,0xbedd4734,0x3f8c242f,3 +np.float32,0x7e86739a,0x7f800000,3 +np.float32,0x3e81f144,0x3f8424fe,3 +np.float32,0x7f6342d1,0x7f800000,3 +np.float32,0xff6919a3,0x7f800000,3 +np.float32,0xff051878,0x7f800000,3 +np.float32,0x800ba28f,0x3f800000,3 +np.float32,0xfefab3d8,0x7f800000,3 +np.float32,0xff612a84,0x7f800000,3 +np.float32,0x800cd5ab,0x3f800000,3 +np.float32,0x802a07ae,0x3f800000,3 +np.float32,0xfef6ee3a,0x7f800000,3 +np.float32,0x8037e896,0x3f800000,3 +np.float32,0x3ef2d86f,0x3f8eab7d,3 +np.float32,0x3eafe53d,0x3f87a0cb,3 +np.float32,0xba591c00,0x3f800003,3 +np.float32,0x3e9ed028,0x3f863508,3 +np.float32,0x4a12a8,0x3f800000,3 +np.float32,0xbee55c84,0x3f8d0f45,3 +np.float32,0x8038a8d3,0x3f800000,3 +np.float32,0xff055243,0x7f800000,3 +np.float32,0xbf659067,0x3fb701ca,3 +np.float32,0xbee36a86,0x3f8cd5e0,3 +np.float32,0x7f1d74c1,0x7f800000,3 +np.float32,0xbf7657df,0x3fbffaad,3 +np.float32,0x7e37ee34,0x7f800000,3 +np.float32,0xff04bc74,0x7f800000,3 +np.float32,0x806d194e,0x3f800000,3 +np.float32,0x7f5596c3,0x7f800000,3 +np.float32,0xbe09d268,0x3f81293e,3 +np.float32,0x79ff75,0x3f800000,3 +np.float32,0xbf55479c,0x3faf0d3e,3 +np.float32,0xbe5428ec,0x3f82c1d4,3 +np.float32,0x3f624134,0x3fb554d7,3 +np.float32,0x2ccb8a,0x3f800000,3 +np.float32,0xfc082040,0x7f800000,3 +np.float32,0xff315467,0x7f800000,3 +np.float32,0x3e6ea2d2,0x3f837dd5,3 +np.float32,0x8020fdd1,0x3f800000,3 +np.float32,0x7f0416a1,0x7f800000,3 +np.float32,0x710a1b,0x3f800000,3 +np.float32,0x3dfcd050,0x3f80f9fc,3 +np.float32,0xfe995e96,0x7f800000,3 +np.float32,0x3f020d00,0x3f90e006,3 +np.float32,0x8064263e,0x3f800000,3 +np.float32,0xfcee4160,0x7f800000,3 +np.float32,0x801b3a18,0x3f800000,3 +np.float32,0x3f62c984,0x3fb59955,3 +np.float32,0x806e8355,0x3f800000,3 +np.float32,0x7e94f65d,0x7f800000,3 +np.float32,0x1173de,0x3f800000,3 +np.float32,0x3e3ff3b7,0x3f824166,3 +np.float32,0x803b4aea,0x3f800000,3 +np.float32,0x804c5bcc,0x3f800000,3 +np.float32,0x509fe5,0x3f800000,3 +np.float32,0xbf33b5ee,0x3fa0db0b,3 +np.float32,0x3f2ac15c,0x3f9d8ba4,3 +np.float32,0x7f2c54f8,0x7f800000,3 +np.float32,0x7f33d933,0x7f800000,3 +np.float32,0xbf09b2b4,0x3f92f795,3 +np.float32,0x805db8d6,0x3f800000,3 +np.float32,0x6d6e66,0x3f800000,3 +np.float32,0x3ddfea92,0x3f80c40c,3 +np.float32,0xfda719b8,0x7f800000,3 +np.float32,0x5d657f,0x3f800000,3 +np.float32,0xbf005ba3,0x3f906df6,3 +np.float32,0xbf45e606,0x3fa8305c,3 +np.float32,0x5e9fd1,0x3f800000,3 +np.float32,0x8079dc45,0x3f800000,3 +np.float32,0x7e9c40e3,0x7f800000,3 +np.float32,0x6bd5f6,0x3f800000,3 +np.float32,0xbea14a0e,0x3f866761,3 +np.float32,0x7e7323f3,0x7f800000,3 +np.float32,0x7f0c0a79,0x7f800000,3 +np.float32,0xbf7d7aeb,0x3fc40b0f,3 +np.float32,0x437588,0x3f800000,3 +np.float32,0xbf356376,0x3fa17f63,3 +np.float32,0x7f129921,0x7f800000,3 +np.float32,0x7f47a52e,0x7f800000,3 +np.float32,0xba8cb400,0x3f800005,3 +np.float32,0x802284e0,0x3f800000,3 +np.float32,0xbe820f56,0x3f8426ec,3 +np.float32,0x7f2ef6cf,0x7f800000,3 +np.float32,0xbf70a090,0x3fbcd501,3 +np.float32,0xbf173fea,0x3f96ff6d,3 +np.float32,0x3e19c489,0x3f817224,3 +np.float32,0x7f429b30,0x7f800000,3 +np.float32,0xbdae4118,0x3f8076af,3 +np.float32,0x3e70ad30,0x3f838d41,3 +np.float32,0x335fed,0x3f800000,3 +np.float32,0xff5359cf,0x7f800000,3 +np.float32,0xbf17e42b,0x3f9732f1,3 +np.float32,0xff3a950b,0x7f800000,3 +np.float32,0xbcca70c0,0x3f800a02,3 +np.float32,0x3f2cda62,0x3f9e4dad,3 +np.float32,0x3f50c185,0x3facf805,3 +np.float32,0x80000001,0x3f800000,3 +np.float32,0x807b86d2,0x3f800000,3 +np.float32,0x8010c2cf,0x3f800000,3 +np.float32,0x3f130fb8,0x3f95b519,3 +np.float32,0x807dc546,0x3f800000,3 +np.float32,0xbee20740,0x3f8cad3f,3 +np.float32,0x80800000,0x3f800000,3 +np.float32,0x3cbd90c0,0x3f8008c6,3 +np.float32,0x3e693488,0x3f835571,3 +np.float32,0xbe70cd44,0x3f838e35,3 +np.float32,0xbe348dc8,0x3f81feb1,3 +np.float32,0x3f31ea90,0x3fa02d3f,3 +np.float32,0xfcd7e180,0x7f800000,3 +np.float32,0xbe30a75c,0x3f81e8d0,3 +np.float32,0x3e552c5a,0x3f82c89d,3 +np.float32,0xff513f74,0x7f800000,3 +np.float32,0xbdb16248,0x3f807afd,3 +np.float64,0x7fbbf954e437f2a9,0x7ff0000000000000,2 +np.float64,0x581bbf0cb0379,0x3ff0000000000000,2 +np.float64,0x7ff8000000000000,0x7ff8000000000000,2 +np.float64,0xffb959a2a632b348,0x7ff0000000000000,2 +np.float64,0xbfdbd6baebb7ad76,0x3ff189a5ca25a6e1,2 +np.float64,0xbfd094ec9aa129da,0x3ff08a3f6b918065,2 +np.float64,0x3fe236753f646cea,0x3ff2a982660b8b43,2 +np.float64,0xbfe537fadfaa6ff6,0x3ff3a5f1c49c31bf,2 +np.float64,0xbfe31fa7dc663f50,0x3ff2f175374aef0e,2 +np.float64,0x3fc4b6569f296cb0,0x3ff035bde801bb53,2 +np.float64,0x800ce3c00f99c780,0x3ff0000000000000,2 +np.float64,0xbfebcde33e779bc6,0x3ff66de82cd30fc5,2 +np.float64,0x800dc09d3b7b813b,0x3ff0000000000000,2 +np.float64,0x80067d4c450cfa99,0x3ff0000000000000,2 +np.float64,0x1f6ade203ed7,0x3ff0000000000000,2 +np.float64,0xbfd4e311eca9c624,0x3ff0dc1383d6c3db,2 +np.float64,0x800649b3a54c9368,0x3ff0000000000000,2 +np.float64,0xcc14d1ab9829a,0x3ff0000000000000,2 +np.float64,0x3fc290c5bb25218b,0x3ff02b290f46dd6d,2 +np.float64,0x3fe78eb8376f1d70,0x3ff488f3bc259537,2 +np.float64,0xffc60f58e82c1eb0,0x7ff0000000000000,2 +np.float64,0x3fd35666ad26accd,0x3ff0bc6573da6bcd,2 +np.float64,0x7fc20257a62404ae,0x7ff0000000000000,2 +np.float64,0x80076d842e0edb09,0x3ff0000000000000,2 +np.float64,0x3fd8e44b08b1c898,0x3ff139b9a1f8428e,2 +np.float64,0x7fd6f6fc7a2dedf8,0x7ff0000000000000,2 +np.float64,0x3fa01b9f0820373e,0x3ff00206f8ad0f1b,2 +np.float64,0x69ed190ed3da4,0x3ff0000000000000,2 +np.float64,0xbfd997eb34b32fd6,0x3ff14be65a5db4a0,2 +np.float64,0x7feada2d0935b459,0x7ff0000000000000,2 +np.float64,0xbf80987120213100,0x3ff000226d29a9fc,2 +np.float64,0xbfef203e37fe407c,0x3ff82f51f04e8821,2 +np.float64,0xffe3dcf91fa7b9f2,0x7ff0000000000000,2 +np.float64,0x9a367283346cf,0x3ff0000000000000,2 +np.float64,0x800feb09f7bfd614,0x3ff0000000000000,2 +np.float64,0xbfe0319f9520633f,0x3ff217c5205c403f,2 +np.float64,0xbfa91eabd4323d50,0x3ff004ee4347f627,2 +np.float64,0x3fd19cbf7d23397f,0x3ff09c13e8e43571,2 +np.float64,0xffeb8945f0b7128b,0x7ff0000000000000,2 +np.float64,0x800a0eb4f2141d6a,0x3ff0000000000000,2 +np.float64,0xffe83e7312f07ce6,0x7ff0000000000000,2 +np.float64,0xffca53fee834a7fc,0x7ff0000000000000,2 +np.float64,0x800881cbf1710398,0x3ff0000000000000,2 +np.float64,0x80003e6abbe07cd6,0x3ff0000000000000,2 +np.float64,0xbfef6a998afed533,0x3ff859b7852d1b4d,2 +np.float64,0x3fd4eb7577a9d6eb,0x3ff0dcc601261aab,2 +np.float64,0xbfc9c12811338250,0x3ff05331268b05c8,2 +np.float64,0x7fddf84e5e3bf09c,0x7ff0000000000000,2 +np.float64,0xbfd4d6fbbc29adf8,0x3ff0db12db19d187,2 +np.float64,0x80077892bfaef126,0x3ff0000000000000,2 +np.float64,0xffae9d49543d3a90,0x7ff0000000000000,2 +np.float64,0xbfd8bef219317de4,0x3ff136034e5d2f1b,2 +np.float64,0xffe89c74ddb138e9,0x7ff0000000000000,2 +np.float64,0x8003b6bbb7e76d78,0x3ff0000000000000,2 +np.float64,0x315a4e8462b4b,0x3ff0000000000000,2 +np.float64,0x800ee616edddcc2e,0x3ff0000000000000,2 +np.float64,0xdfb27f97bf650,0x3ff0000000000000,2 +np.float64,0x8004723dc328e47c,0x3ff0000000000000,2 +np.float64,0xbfe529500daa52a0,0x3ff3a0b9b33fc84c,2 +np.float64,0xbfe4e46a7ce9c8d5,0x3ff3886ce0f92612,2 +np.float64,0xbf52003680240000,0x3ff00000a203d61a,2 +np.float64,0xffd3400458268008,0x7ff0000000000000,2 +np.float64,0x80076deb444edbd7,0x3ff0000000000000,2 +np.float64,0xa612f6c14c27,0x3ff0000000000000,2 +np.float64,0xbfd41c74c9a838ea,0x3ff0cbe61e16aecf,2 +np.float64,0x43f464a887e8d,0x3ff0000000000000,2 +np.float64,0x800976e748b2edcf,0x3ff0000000000000,2 +np.float64,0xffc79d6ba12f3ad8,0x7ff0000000000000,2 +np.float64,0xffd6dbcb022db796,0x7ff0000000000000,2 +np.float64,0xffd6a9672a2d52ce,0x7ff0000000000000,2 +np.float64,0x3fe95dcfa632bb9f,0x3ff54bbad2ee919e,2 +np.float64,0x3febadd2e1375ba6,0x3ff65e336c47c018,2 +np.float64,0x7fd47c37d828f86f,0x7ff0000000000000,2 +np.float64,0xbfd4ea59e0a9d4b4,0x3ff0dcae6af3e443,2 +np.float64,0x2c112afc58226,0x3ff0000000000000,2 +np.float64,0x8008122bced02458,0x3ff0000000000000,2 +np.float64,0x7fe7105ab3ee20b4,0x7ff0000000000000,2 +np.float64,0x80089634df312c6a,0x3ff0000000000000,2 +np.float64,0x68e9fbc8d1d40,0x3ff0000000000000,2 +np.float64,0xbfec1e1032f83c20,0x3ff69590b9f18ea8,2 +np.float64,0xbfedf181623be303,0x3ff787ef48935dc6,2 +np.float64,0xffe8600457f0c008,0x7ff0000000000000,2 +np.float64,0x7a841ec6f5084,0x3ff0000000000000,2 +np.float64,0x459a572e8b34c,0x3ff0000000000000,2 +np.float64,0x3fe8a232bef14465,0x3ff4fac1780f731e,2 +np.float64,0x3fcb37597d366eb3,0x3ff05cf08ab14ebd,2 +np.float64,0xbfb0261d00204c38,0x3ff00826fb86ca8a,2 +np.float64,0x3fc6e7a6dd2dcf4e,0x3ff041c1222ffa79,2 +np.float64,0xee65dd03dccbc,0x3ff0000000000000,2 +np.float64,0xffe26fdc23e4dfb8,0x7ff0000000000000,2 +np.float64,0x7fe8d6c8cab1ad91,0x7ff0000000000000,2 +np.float64,0xbfeb64bf2676c97e,0x3ff63abb8607828c,2 +np.float64,0x3fd28417b425082f,0x3ff0ac9eb22a732b,2 +np.float64,0xbfd26835b3a4d06c,0x3ff0aa94c48fb6d2,2 +np.float64,0xffec617a01b8c2f3,0x7ff0000000000000,2 +np.float64,0xe1bfff01c3800,0x3ff0000000000000,2 +np.float64,0x3fd4def913a9bdf4,0x3ff0dbbc7271046f,2 +np.float64,0x94f4c17129e98,0x3ff0000000000000,2 +np.float64,0x8009b2eaa33365d6,0x3ff0000000000000,2 +np.float64,0x3fd9633b41b2c678,0x3ff1468388bdfb65,2 +np.float64,0xffe0ae5c80e15cb8,0x7ff0000000000000,2 +np.float64,0x7fdfc35996bf86b2,0x7ff0000000000000,2 +np.float64,0x3fcfc5bdc23f8b7c,0x3ff07ed5caa4545c,2 +np.float64,0xd48b4907a9169,0x3ff0000000000000,2 +np.float64,0xbfe0a2cc52614598,0x3ff2361665895d95,2 +np.float64,0xbfe9068f90720d1f,0x3ff525b82491a1a5,2 +np.float64,0x4238b9208472,0x3ff0000000000000,2 +np.float64,0x800e6b2bf69cd658,0x3ff0000000000000,2 +np.float64,0x7fb638b6ae2c716c,0x7ff0000000000000,2 +np.float64,0x7fe267641764cec7,0x7ff0000000000000,2 +np.float64,0xffc0933d3521267c,0x7ff0000000000000,2 +np.float64,0x7fddfdfb533bfbf6,0x7ff0000000000000,2 +np.float64,0xced2a8e99da55,0x3ff0000000000000,2 +np.float64,0x2a80d5165501b,0x3ff0000000000000,2 +np.float64,0xbfeead2ab63d5a55,0x3ff7eeb5cbcfdcab,2 +np.float64,0x80097f6f92f2fee0,0x3ff0000000000000,2 +np.float64,0x3fee1f29b77c3e54,0x3ff7a0a58c13df62,2 +np.float64,0x3f9d06b8383a0d70,0x3ff001a54a2d8cf8,2 +np.float64,0xbfc8b41d3f31683c,0x3ff04c85379dd6b0,2 +np.float64,0xffd2a04c1e254098,0x7ff0000000000000,2 +np.float64,0xbfb71c01e02e3800,0x3ff010b34220e838,2 +np.float64,0xbfe69249ef6d2494,0x3ff425e48d1e938b,2 +np.float64,0xffefffffffffffff,0x7ff0000000000000,2 +np.float64,0x3feb1d52fbf63aa6,0x3ff618813ae922d7,2 +np.float64,0x7fb8d1a77e31a34e,0x7ff0000000000000,2 +np.float64,0xffc3cfc4ed279f88,0x7ff0000000000000,2 +np.float64,0x2164b9fc42c98,0x3ff0000000000000,2 +np.float64,0x3fbb868cee370d1a,0x3ff017b31b0d4d27,2 +np.float64,0x3fcd6dea583adbd5,0x3ff06cbd16bf44a0,2 +np.float64,0xbfecd041d479a084,0x3ff6efb25f61012d,2 +np.float64,0xbfb0552e6e20aa60,0x3ff00856ca83834a,2 +np.float64,0xe6293cbfcc528,0x3ff0000000000000,2 +np.float64,0x7fba58394034b072,0x7ff0000000000000,2 +np.float64,0x33bc96d467794,0x3ff0000000000000,2 +np.float64,0xffe90ea86bf21d50,0x7ff0000000000000,2 +np.float64,0xbfc626ea6d2c4dd4,0x3ff03d7e01ec3849,2 +np.float64,0x65b56fe4cb6af,0x3ff0000000000000,2 +np.float64,0x3fea409fb7f4813f,0x3ff5b171deab0ebd,2 +np.float64,0x3fe849c1df709384,0x3ff4d59063ff98c4,2 +np.float64,0x169073082d20f,0x3ff0000000000000,2 +np.float64,0xcc8b6add9916e,0x3ff0000000000000,2 +np.float64,0xbfef3d78d5fe7af2,0x3ff83fecc26abeea,2 +np.float64,0x3fe8c65a4a718cb4,0x3ff50a23bfeac7df,2 +np.float64,0x3fde9fa5c8bd3f4c,0x3ff1ddeb12b9d623,2 +np.float64,0xffe2af536da55ea6,0x7ff0000000000000,2 +np.float64,0x800186d0b0c30da2,0x3ff0000000000000,2 +np.float64,0x3fe9ba3c1d737478,0x3ff574ab2bf3a560,2 +np.float64,0xbfe1489c46a29138,0x3ff2641d36b30e21,2 +np.float64,0xbfe4b6b7c0e96d70,0x3ff37880ac8b0540,2 +np.float64,0x800e66ad82fccd5b,0x3ff0000000000000,2 +np.float64,0x7ff0000000000000,0x7ff0000000000000,2 +np.float64,0x7febb0fd477761fa,0x7ff0000000000000,2 +np.float64,0xbfdc433f2eb8867e,0x3ff195ec2a6cce27,2 +np.float64,0x3fe12c5a172258b4,0x3ff25c225b8a34bb,2 +np.float64,0xbfef6f116c3ede23,0x3ff85c47eaed49a0,2 +np.float64,0x800af6f60f35edec,0x3ff0000000000000,2 +np.float64,0xffe567999a2acf32,0x7ff0000000000000,2 +np.float64,0xbfc5ac5ae72b58b4,0x3ff03adb50ec04f3,2 +np.float64,0x3fea1b57e23436b0,0x3ff5a06f98541767,2 +np.float64,0x7fcc3e36fb387c6d,0x7ff0000000000000,2 +np.float64,0x8000c8dc698191ba,0x3ff0000000000000,2 +np.float64,0x3fee5085ed7ca10c,0x3ff7bb92f61245b8,2 +np.float64,0x7fbb9f803a373eff,0x7ff0000000000000,2 +np.float64,0xbfe1e5e806e3cbd0,0x3ff2918f2d773007,2 +np.float64,0x8008f8c3f3b1f188,0x3ff0000000000000,2 +np.float64,0x7fe53df515ea7be9,0x7ff0000000000000,2 +np.float64,0x7fdbb87fb3b770fe,0x7ff0000000000000,2 +np.float64,0x3fefcc0f50ff981f,0x3ff89210a6a04e6b,2 +np.float64,0x3fe33f87d0267f10,0x3ff2fb989ea4f2bc,2 +np.float64,0x1173992022e8,0x3ff0000000000000,2 +np.float64,0x3fef534632bea68c,0x3ff84c5ca9713ff9,2 +np.float64,0x3fc5991d552b3238,0x3ff03a72bfdb6e5f,2 +np.float64,0x3fdad90dc1b5b21c,0x3ff16db868180034,2 +np.float64,0xffe20b8078e41700,0x7ff0000000000000,2 +np.float64,0x7fdf409a82be8134,0x7ff0000000000000,2 +np.float64,0x3fccb7e691396fcd,0x3ff06786b6ccdbcb,2 +np.float64,0xffe416e0b7282dc1,0x7ff0000000000000,2 +np.float64,0xffe3a8a981275152,0x7ff0000000000000,2 +np.float64,0x3fd9c8bd31b3917c,0x3ff150ee6f5f692f,2 +np.float64,0xffeab6fef6356dfd,0x7ff0000000000000,2 +np.float64,0x3fe9c5e3faf38bc8,0x3ff579e18c9bd548,2 +np.float64,0x800b173e44762e7d,0x3ff0000000000000,2 +np.float64,0xffe2719db764e33b,0x7ff0000000000000,2 +np.float64,0x3fd1fcf31223f9e6,0x3ff0a2da7ad99856,2 +np.float64,0x80082c4afcd05896,0x3ff0000000000000,2 +np.float64,0xa56e5e4b4adcc,0x3ff0000000000000,2 +np.float64,0xffbbbddab2377bb8,0x7ff0000000000000,2 +np.float64,0x3b3927c076726,0x3ff0000000000000,2 +np.float64,0x3fec03fd58f807fb,0x3ff6889b8a774728,2 +np.float64,0xbfaa891fb4351240,0x3ff00580987bd914,2 +np.float64,0x7fb4800c4a290018,0x7ff0000000000000,2 +np.float64,0xffbb5d2b6036ba58,0x7ff0000000000000,2 +np.float64,0x7fd6608076acc100,0x7ff0000000000000,2 +np.float64,0x31267e4c624d1,0x3ff0000000000000,2 +np.float64,0x33272266664e5,0x3ff0000000000000,2 +np.float64,0x47bb37f28f768,0x3ff0000000000000,2 +np.float64,0x3fe134bb4ee26977,0x3ff25e7ea647a928,2 +np.float64,0xbfe2b5f42ba56be8,0x3ff2d05cbdc7344b,2 +np.float64,0xbfe0e013fd61c028,0x3ff246dfce572914,2 +np.float64,0x7fecedcda4f9db9a,0x7ff0000000000000,2 +np.float64,0x8001816c2da302d9,0x3ff0000000000000,2 +np.float64,0xffced8b65b3db16c,0x7ff0000000000000,2 +np.float64,0xffdc1d4a0b383a94,0x7ff0000000000000,2 +np.float64,0x7fe94e7339f29ce5,0x7ff0000000000000,2 +np.float64,0x33fb846667f71,0x3ff0000000000000,2 +np.float64,0x800a1380e9542702,0x3ff0000000000000,2 +np.float64,0x800b74eaa776e9d6,0x3ff0000000000000,2 +np.float64,0x5681784aad030,0x3ff0000000000000,2 +np.float64,0xbfee0eb7917c1d6f,0x3ff797b949f7f6b4,2 +np.float64,0xffe4ec5fd2a9d8bf,0x7ff0000000000000,2 +np.float64,0xbfcd7401dd3ae804,0x3ff06cea52c792c0,2 +np.float64,0x800587563beb0ead,0x3ff0000000000000,2 +np.float64,0x3fc15c6f3322b8de,0x3ff025bbd030166d,2 +np.float64,0x7feb6b4caf76d698,0x7ff0000000000000,2 +np.float64,0x7fe136ef82a26dde,0x7ff0000000000000,2 +np.float64,0xf592dac3eb25c,0x3ff0000000000000,2 +np.float64,0x7fd300baf6a60175,0x7ff0000000000000,2 +np.float64,0x7fc880de9e3101bc,0x7ff0000000000000,2 +np.float64,0x7fe7a1aa5caf4354,0x7ff0000000000000,2 +np.float64,0x2f9b8e0e5f373,0x3ff0000000000000,2 +np.float64,0xffcc9071993920e4,0x7ff0000000000000,2 +np.float64,0x8009e151b313c2a4,0x3ff0000000000000,2 +np.float64,0xbfd46e2d18a8dc5a,0x3ff0d27a7b37c1ae,2 +np.float64,0x3fe65c7961acb8f3,0x3ff4116946062a4c,2 +np.float64,0x7fd31b371626366d,0x7ff0000000000000,2 +np.float64,0x98dc924d31b93,0x3ff0000000000000,2 +np.float64,0x268bef364d17f,0x3ff0000000000000,2 +np.float64,0x7fd883ba56310774,0x7ff0000000000000,2 +np.float64,0x3fc53f01a32a7e03,0x3ff0388dea9cd63e,2 +np.float64,0xffe1ea8c0563d518,0x7ff0000000000000,2 +np.float64,0x3fd0bf0e63a17e1d,0x3ff08d0577f5ffa6,2 +np.float64,0x7fef42418f7e8482,0x7ff0000000000000,2 +np.float64,0x8000bccd38c1799b,0x3ff0000000000000,2 +np.float64,0xbfe6c48766ed890f,0x3ff43936fa4048c8,2 +np.float64,0xbfb2a38f3a254720,0x3ff00adc7f7b2822,2 +np.float64,0x3fd5262b2eaa4c56,0x3ff0e1af492c08f5,2 +np.float64,0x80065b4691ecb68e,0x3ff0000000000000,2 +np.float64,0xfb6b9e9ff6d74,0x3ff0000000000000,2 +np.float64,0x8006c71e6ecd8e3e,0x3ff0000000000000,2 +np.float64,0x3fd0a3e43ca147c8,0x3ff08b3ad7b42485,2 +np.float64,0xbfc82d8607305b0c,0x3ff04949d6733ef6,2 +np.float64,0xde048c61bc092,0x3ff0000000000000,2 +np.float64,0xffcf73e0fa3ee7c0,0x7ff0000000000000,2 +np.float64,0xbfe8639d7830c73b,0x3ff4e05f97948376,2 +np.float64,0x8010000000000000,0x3ff0000000000000,2 +np.float64,0x67f01a2acfe04,0x3ff0000000000000,2 +np.float64,0x3fe222e803e445d0,0x3ff2a3a75e5f29d8,2 +np.float64,0xffef84c6387f098b,0x7ff0000000000000,2 +np.float64,0x3fe5969c1e6b2d38,0x3ff3c80130462bb2,2 +np.float64,0x8009f56953d3ead3,0x3ff0000000000000,2 +np.float64,0x3fe05c9b6360b937,0x3ff2232e1cba5617,2 +np.float64,0x3fd8888d63b1111b,0x3ff130a5b788d52f,2 +np.float64,0xffe3a9e6f26753ce,0x7ff0000000000000,2 +np.float64,0x800e2aaa287c5554,0x3ff0000000000000,2 +np.float64,0x3fea8d6c82351ad9,0x3ff5d4d8cde9a11d,2 +np.float64,0x7feef700723dee00,0x7ff0000000000000,2 +np.float64,0x3fa5cb77242b96e0,0x3ff003b62b3e50f1,2 +np.float64,0x7fb68f0a862d1e14,0x7ff0000000000000,2 +np.float64,0x7fb97ee83432fdcf,0x7ff0000000000000,2 +np.float64,0x7fd74a78632e94f0,0x7ff0000000000000,2 +np.float64,0x7fcfe577713fcaee,0x7ff0000000000000,2 +np.float64,0xffe192ee5ea325dc,0x7ff0000000000000,2 +np.float64,0x477d6ae48efae,0x3ff0000000000000,2 +np.float64,0xffe34d5237669aa4,0x7ff0000000000000,2 +np.float64,0x7fe3ce8395a79d06,0x7ff0000000000000,2 +np.float64,0x80019c01ffa33805,0x3ff0000000000000,2 +np.float64,0x74b5b56ce96b7,0x3ff0000000000000,2 +np.float64,0x7fe05ecdeda0bd9b,0x7ff0000000000000,2 +np.float64,0xffe9693eb232d27d,0x7ff0000000000000,2 +np.float64,0xffd2be2c7da57c58,0x7ff0000000000000,2 +np.float64,0x800dbd5cbc1b7aba,0x3ff0000000000000,2 +np.float64,0xbfa36105d426c210,0x3ff002ef2e3a87f7,2 +np.float64,0x800b2d69fb765ad4,0x3ff0000000000000,2 +np.float64,0xbfdb81c9a9370394,0x3ff1802d409cbf7a,2 +np.float64,0x7fd481d014a9039f,0x7ff0000000000000,2 +np.float64,0xffe66c3c1fecd878,0x7ff0000000000000,2 +np.float64,0x3fc55865192ab0c8,0x3ff03915b51e8839,2 +np.float64,0xd6a78987ad4f1,0x3ff0000000000000,2 +np.float64,0x800c6cc80d58d990,0x3ff0000000000000,2 +np.float64,0x979435a12f29,0x3ff0000000000000,2 +np.float64,0xbfbd971e7a3b2e40,0x3ff01b647e45f5a6,2 +np.float64,0x80067565bfeceacc,0x3ff0000000000000,2 +np.float64,0x8001ad689ce35ad2,0x3ff0000000000000,2 +np.float64,0x7fa43253dc2864a7,0x7ff0000000000000,2 +np.float64,0xbfe3dda307e7bb46,0x3ff32ef99a2efe1d,2 +np.float64,0x3fe5d7b395ebaf68,0x3ff3dfd33cdc8ef4,2 +np.float64,0xd94cc9c3b2999,0x3ff0000000000000,2 +np.float64,0x3fee5a513fbcb4a2,0x3ff7c0f17b876ce5,2 +np.float64,0xffe27761fa64eec4,0x7ff0000000000000,2 +np.float64,0x3feb788119b6f102,0x3ff64446f67f4efa,2 +np.float64,0xbfed6e10dffadc22,0x3ff741d5ef610ca0,2 +np.float64,0x7fe73cf98b2e79f2,0x7ff0000000000000,2 +np.float64,0x7847d09af08fb,0x3ff0000000000000,2 +np.float64,0x29ded2da53bdb,0x3ff0000000000000,2 +np.float64,0xbfe51c1ec1aa383e,0x3ff39c0b7cf832e2,2 +np.float64,0xbfeafd5e65f5fabd,0x3ff609548a787f57,2 +np.float64,0x3fd872a26fb0e545,0x3ff12e7fbd95505c,2 +np.float64,0x7fed6b7c1b7ad6f7,0x7ff0000000000000,2 +np.float64,0xffe7ba9ec16f753d,0x7ff0000000000000,2 +np.float64,0x7f89b322f0336645,0x7ff0000000000000,2 +np.float64,0xbfad1677383a2cf0,0x3ff0069ca67e7baa,2 +np.float64,0x3fe0906d04a120da,0x3ff2311b04b7bfef,2 +np.float64,0xffe4b3c9d4296793,0x7ff0000000000000,2 +np.float64,0xbfe476bb0ce8ed76,0x3ff36277d2921a74,2 +np.float64,0x7fc35655cf26acab,0x7ff0000000000000,2 +np.float64,0x7fe9980f0373301d,0x7ff0000000000000,2 +np.float64,0x9e6e04cb3cdc1,0x3ff0000000000000,2 +np.float64,0x800b89e0afb713c2,0x3ff0000000000000,2 +np.float64,0x800bd951a3f7b2a4,0x3ff0000000000000,2 +np.float64,0x29644a9e52c8a,0x3ff0000000000000,2 +np.float64,0x3fe1be2843637c51,0x3ff285e90d8387e4,2 +np.float64,0x7fa233cce4246799,0x7ff0000000000000,2 +np.float64,0xbfcfb7bc2d3f6f78,0x3ff07e657de3e2ed,2 +np.float64,0xffd7c953e7af92a8,0x7ff0000000000000,2 +np.float64,0xbfc5bbaf772b7760,0x3ff03b2ee4febb1e,2 +np.float64,0x8007b7315a6f6e63,0x3ff0000000000000,2 +np.float64,0xbfe906d902320db2,0x3ff525d7e16acfe0,2 +np.float64,0x3fde33d8553c67b1,0x3ff1d09faa19aa53,2 +np.float64,0x61fe76a0c3fcf,0x3ff0000000000000,2 +np.float64,0xa75e355b4ebc7,0x3ff0000000000000,2 +np.float64,0x3fc9e6d86033cdb1,0x3ff05426299c7064,2 +np.float64,0x7fd83f489eb07e90,0x7ff0000000000000,2 +np.float64,0x8000000000000001,0x3ff0000000000000,2 +np.float64,0x80014434ae62886a,0x3ff0000000000000,2 +np.float64,0xbfe21af9686435f3,0x3ff2a149338bdefe,2 +np.float64,0x9354e6cd26a9d,0x3ff0000000000000,2 +np.float64,0xb42b95f768573,0x3ff0000000000000,2 +np.float64,0xbfecb4481bb96890,0x3ff6e15d269dd651,2 +np.float64,0x3f97842ae82f0840,0x3ff0011485156f28,2 +np.float64,0xffdef63d90bdec7c,0x7ff0000000000000,2 +np.float64,0x7fe511a8d36a2351,0x7ff0000000000000,2 +np.float64,0xbf8cb638a0396c80,0x3ff000670c318fb6,2 +np.float64,0x3fe467e1f668cfc4,0x3ff35d65f93ccac6,2 +np.float64,0xbfce7d88f03cfb10,0x3ff074c22475fe5b,2 +np.float64,0x6d0a4994da14a,0x3ff0000000000000,2 +np.float64,0xbfb3072580260e48,0x3ff00b51d3913e9f,2 +np.float64,0x8008fcde36b1f9bd,0x3ff0000000000000,2 +np.float64,0x3fd984df66b309c0,0x3ff149f29125eca4,2 +np.float64,0xffee2a10fe7c5421,0x7ff0000000000000,2 +np.float64,0x80039168ace722d2,0x3ff0000000000000,2 +np.float64,0xffda604379b4c086,0x7ff0000000000000,2 +np.float64,0xffdc6a405bb8d480,0x7ff0000000000000,2 +np.float64,0x3fe62888b26c5111,0x3ff3fdda754c4372,2 +np.float64,0x8008b452cb5168a6,0x3ff0000000000000,2 +np.float64,0x6165d540c2cbb,0x3ff0000000000000,2 +np.float64,0xbfee0c04d17c180a,0x3ff796431c64bcbe,2 +np.float64,0x800609b8448c1371,0x3ff0000000000000,2 +np.float64,0x800fc3fca59f87f9,0x3ff0000000000000,2 +np.float64,0x77f64848efeca,0x3ff0000000000000,2 +np.float64,0x8007cf522d8f9ea5,0x3ff0000000000000,2 +np.float64,0xbfe9fb0b93f3f617,0x3ff591cb0052e22c,2 +np.float64,0x7fd569d5f0aad3ab,0x7ff0000000000000,2 +np.float64,0x7fe5cf489d6b9e90,0x7ff0000000000000,2 +np.float64,0x7fd6e193e92dc327,0x7ff0000000000000,2 +np.float64,0xf78988a5ef131,0x3ff0000000000000,2 +np.float64,0x3fe8f97562b1f2eb,0x3ff5201080fbc12d,2 +np.float64,0x7febfd69d7b7fad3,0x7ff0000000000000,2 +np.float64,0xffc07b5c1720f6b8,0x7ff0000000000000,2 +np.float64,0xbfd966926832cd24,0x3ff146da9adf492e,2 +np.float64,0x7fef5bd9edfeb7b3,0x7ff0000000000000,2 +np.float64,0xbfd2afbc96255f7a,0x3ff0afd601febf44,2 +np.float64,0x7fdd4ea6293a9d4b,0x7ff0000000000000,2 +np.float64,0xbfe8a1e916b143d2,0x3ff4faa23c2793e5,2 +np.float64,0x800188fcd8c311fa,0x3ff0000000000000,2 +np.float64,0xbfe30803f1661008,0x3ff2e9fc729baaee,2 +np.float64,0x7fefffffffffffff,0x7ff0000000000000,2 +np.float64,0x3fd287bec3250f7e,0x3ff0ace34d3102f6,2 +np.float64,0x1f0ee9443e1de,0x3ff0000000000000,2 +np.float64,0xbfd92f73da325ee8,0x3ff14143e4fa2c5a,2 +np.float64,0x3fed7c9bdffaf938,0x3ff74984168734d3,2 +np.float64,0x8002c4d1696589a4,0x3ff0000000000000,2 +np.float64,0xfe03011bfc060,0x3ff0000000000000,2 +np.float64,0x7f7a391e6034723c,0x7ff0000000000000,2 +np.float64,0xffd6fd46f82dfa8e,0x7ff0000000000000,2 +np.float64,0xbfd7520a742ea414,0x3ff112f1ba5d4f91,2 +np.float64,0x8009389d8812713b,0x3ff0000000000000,2 +np.float64,0x7fefb846aaff708c,0x7ff0000000000000,2 +np.float64,0x3fd98a0983331413,0x3ff14a79efb8adbf,2 +np.float64,0xbfd897158db12e2c,0x3ff132137902cf3e,2 +np.float64,0xffc4048d5928091c,0x7ff0000000000000,2 +np.float64,0x80036ae46046d5ca,0x3ff0000000000000,2 +np.float64,0x7faba7ed3c374fd9,0x7ff0000000000000,2 +np.float64,0xbfec4265e1f884cc,0x3ff6a7b8602422c9,2 +np.float64,0xaa195e0b5432c,0x3ff0000000000000,2 +np.float64,0x3feac15d317582ba,0x3ff5ed115758145f,2 +np.float64,0x6c13a5bcd8275,0x3ff0000000000000,2 +np.float64,0xbfed20b8883a4171,0x3ff7194dbd0dc988,2 +np.float64,0x800cde65c899bccc,0x3ff0000000000000,2 +np.float64,0x7c72912af8e53,0x3ff0000000000000,2 +np.float64,0x3fe49d2bb4e93a57,0x3ff36fab3aba15d4,2 +np.float64,0xbfd598fa02ab31f4,0x3ff0eb72fc472025,2 +np.float64,0x8007a191712f4324,0x3ff0000000000000,2 +np.float64,0xbfdeb14872bd6290,0x3ff1e01ca83f35fd,2 +np.float64,0xbfe1da46b3e3b48e,0x3ff28e23ad2f5615,2 +np.float64,0x800a2f348e745e69,0x3ff0000000000000,2 +np.float64,0xbfee66928afccd25,0x3ff7c7ac7dbb3273,2 +np.float64,0xffd78a0a2b2f1414,0x7ff0000000000000,2 +np.float64,0x7fc5fa80b82bf500,0x7ff0000000000000,2 +np.float64,0x800e6d7260dcdae5,0x3ff0000000000000,2 +np.float64,0xbfd6cff2aaad9fe6,0x3ff106f78ee61642,2 +np.float64,0x7fe1041d1d220839,0x7ff0000000000000,2 +np.float64,0xbfdf75586cbeeab0,0x3ff1f8dbaa7e57f0,2 +np.float64,0xffdcaae410b955c8,0x7ff0000000000000,2 +np.float64,0x800fe5e0d1ffcbc2,0x3ff0000000000000,2 +np.float64,0x800d7999527af333,0x3ff0000000000000,2 +np.float64,0xbfe62c233bac5846,0x3ff3ff34220a204c,2 +np.float64,0x7fe99bbff8f3377f,0x7ff0000000000000,2 +np.float64,0x7feeaf471d3d5e8d,0x7ff0000000000000,2 +np.float64,0xd5904ff5ab20a,0x3ff0000000000000,2 +np.float64,0x3fd07aae3320f55c,0x3ff08888c227c968,2 +np.float64,0x7fea82b8dff50571,0x7ff0000000000000,2 +np.float64,0xffef2db9057e5b71,0x7ff0000000000000,2 +np.float64,0xbfe2077fef640f00,0x3ff29b7dd0d39d36,2 +np.float64,0xbfe09a4d7c61349b,0x3ff233c7e88881f4,2 +np.float64,0x3fda50c4cbb4a188,0x3ff15f28a71deee7,2 +np.float64,0x7fe7d9ee6b2fb3dc,0x7ff0000000000000,2 +np.float64,0x3febbf6faeb77edf,0x3ff666d13682ea93,2 +np.float64,0xc401a32988035,0x3ff0000000000000,2 +np.float64,0xbfeab30aa8f56615,0x3ff5e65dcc6603f8,2 +np.float64,0x92c8cea32591a,0x3ff0000000000000,2 +np.float64,0xbff0000000000000,0x3ff8b07551d9f550,2 +np.float64,0xbfbddfb4dc3bbf68,0x3ff01bebaec38faa,2 +np.float64,0xbfd8de3e2a31bc7c,0x3ff1391f4830d20b,2 +np.float64,0xffc83a8f8a307520,0x7ff0000000000000,2 +np.float64,0x3fee026ef53c04de,0x3ff7911337085827,2 +np.float64,0x7fbaf380b235e700,0x7ff0000000000000,2 +np.float64,0xffe5b89fa62b713f,0x7ff0000000000000,2 +np.float64,0xbfdc1ff54ab83fea,0x3ff191e8c0b60bb2,2 +np.float64,0x6ae3534cd5c6b,0x3ff0000000000000,2 +np.float64,0xbfea87e558750fcb,0x3ff5d24846013794,2 +np.float64,0xffe0f467bee1e8cf,0x7ff0000000000000,2 +np.float64,0x7fee3b0dc7bc761b,0x7ff0000000000000,2 +np.float64,0x3fed87521afb0ea4,0x3ff74f2f5cd36a5c,2 +np.float64,0x7b3c9882f6794,0x3ff0000000000000,2 +np.float64,0x7fdd1a62243a34c3,0x7ff0000000000000,2 +np.float64,0x800f1dc88d3e3b91,0x3ff0000000000000,2 +np.float64,0x7fc3213cfa264279,0x7ff0000000000000,2 +np.float64,0x3fe40e0f3d681c1e,0x3ff33f135e9d5ded,2 +np.float64,0x7febf14e51f7e29c,0x7ff0000000000000,2 +np.float64,0xffe96c630c72d8c5,0x7ff0000000000000,2 +np.float64,0x7fdd82fbe7bb05f7,0x7ff0000000000000,2 +np.float64,0xbf9a6a0b1034d420,0x3ff0015ce009f7d8,2 +np.float64,0xbfceb4f8153d69f0,0x3ff0766e3ecc77df,2 +np.float64,0x3fd9de31e633bc64,0x3ff15327b794a16e,2 +np.float64,0x3faa902a30352054,0x3ff00583848d1969,2 +np.float64,0x0,0x3ff0000000000000,2 +np.float64,0x3fbe3459c43c68b4,0x3ff01c8af6710ef6,2 +np.float64,0xbfa8df010031be00,0x3ff004d5632dc9f5,2 +np.float64,0x7fbcf6cf2a39ed9d,0x7ff0000000000000,2 +np.float64,0xffe4236202a846c4,0x7ff0000000000000,2 +np.float64,0x3fd35ed52e26bdaa,0x3ff0bd0b231f11f7,2 +np.float64,0x7fe7a2df532f45be,0x7ff0000000000000,2 +np.float64,0xffe32f8315665f06,0x7ff0000000000000,2 +np.float64,0x7fe1a69f03e34d3d,0x7ff0000000000000,2 +np.float64,0x7fa5542b742aa856,0x7ff0000000000000,2 +np.float64,0x3fe84e9f8ef09d3f,0x3ff4d79816359765,2 +np.float64,0x29076fe6520ef,0x3ff0000000000000,2 +np.float64,0xffd70894f7ae112a,0x7ff0000000000000,2 +np.float64,0x800188edcbe311dc,0x3ff0000000000000,2 +np.float64,0x3fe2c7acda258f5a,0x3ff2d5dad4617703,2 +np.float64,0x3f775d41a02ebb00,0x3ff000110f212445,2 +np.float64,0x7fe8a084d1714109,0x7ff0000000000000,2 +np.float64,0x3fe31562d8a62ac6,0x3ff2ee35055741cd,2 +np.float64,0xbfd195d4d1a32baa,0x3ff09b98a50c151b,2 +np.float64,0xffaae9ff0c35d400,0x7ff0000000000000,2 +np.float64,0xff819866502330c0,0x7ff0000000000000,2 +np.float64,0x7fddc64815bb8c8f,0x7ff0000000000000,2 +np.float64,0xbfd442b428288568,0x3ff0cef70aa73ae6,2 +np.float64,0x8002e7625aa5cec5,0x3ff0000000000000,2 +np.float64,0x7fe8d4f70e71a9ed,0x7ff0000000000000,2 +np.float64,0xbfc3bd015f277a04,0x3ff030cbf16f29d9,2 +np.float64,0x3fd315d5baa62bab,0x3ff0b77a551a5335,2 +np.float64,0x7fa638b4642c7168,0x7ff0000000000000,2 +np.float64,0x3fdea8b795bd516f,0x3ff1df0bb70cdb79,2 +np.float64,0xbfd78754762f0ea8,0x3ff117ee0f29abed,2 +np.float64,0x8009f6a37633ed47,0x3ff0000000000000,2 +np.float64,0x3fea1daf75343b5f,0x3ff5a1804789bf13,2 +np.float64,0x3fd044b6c0a0896e,0x3ff0850b7297d02f,2 +np.float64,0x8003547a9c86a8f6,0x3ff0000000000000,2 +np.float64,0x3fa6c2cd782d859b,0x3ff0040c4ac8f44a,2 +np.float64,0x3fe225baaae44b76,0x3ff2a47f5e1f5e85,2 +np.float64,0x8000000000000000,0x3ff0000000000000,2 +np.float64,0x3fcb53da8736a7b8,0x3ff05db45af470ac,2 +np.float64,0x80079f8f140f3f1f,0x3ff0000000000000,2 +np.float64,0xbfcd1d7e2b3a3afc,0x3ff06a6b6845d05f,2 +np.float64,0x96df93672dbf3,0x3ff0000000000000,2 +np.float64,0xdef86e43bdf0e,0x3ff0000000000000,2 +np.float64,0xbfec05a09db80b41,0x3ff6896b768eea08,2 +np.float64,0x7fe3ff91d267ff23,0x7ff0000000000000,2 +np.float64,0xffea3eaa07347d53,0x7ff0000000000000,2 +np.float64,0xbfebde1cc1f7bc3a,0x3ff675e34ac2afc2,2 +np.float64,0x629bcde8c537a,0x3ff0000000000000,2 +np.float64,0xbfdde4fcff3bc9fa,0x3ff1c7061d21f0fe,2 +np.float64,0x3fee60fd003cc1fa,0x3ff7c49af3878a51,2 +np.float64,0x3fe5c92ac32b9256,0x3ff3da7a7929588b,2 +np.float64,0xbfe249c78f64938f,0x3ff2af52a06f1a50,2 +np.float64,0xbfc6de9dbe2dbd3c,0x3ff0418d284ee29f,2 +np.float64,0xffc8ef094631de14,0x7ff0000000000000,2 +np.float64,0x3fdef05f423de0bf,0x3ff1e800caba8ab5,2 +np.float64,0xffc1090731221210,0x7ff0000000000000,2 +np.float64,0xbfedec9b5fbbd937,0x3ff7854b6792a24a,2 +np.float64,0xbfb873507630e6a0,0x3ff012b23b3b7a67,2 +np.float64,0xbfe3cd6692679acd,0x3ff3299d6936ec4b,2 +np.float64,0xbfb107c890220f90,0x3ff0091122162472,2 +np.float64,0xbfe4e6ee48e9cddc,0x3ff3894e5a5e70a6,2 +np.float64,0xffe6fa3413edf468,0x7ff0000000000000,2 +np.float64,0x3fe2faf79b65f5ef,0x3ff2e5e11fae8b54,2 +np.float64,0xbfdfeb8df9bfd71c,0x3ff208189691b15f,2 +np.float64,0x75d2d03ceba5b,0x3ff0000000000000,2 +np.float64,0x3feb48c182b69183,0x3ff62d4462eba6cb,2 +np.float64,0xffcda9f7ff3b53f0,0x7ff0000000000000,2 +np.float64,0x7fcafbdcbd35f7b8,0x7ff0000000000000,2 +np.float64,0xbfd1895523a312aa,0x3ff09aba642a78d9,2 +np.float64,0x3fe3129c3f662538,0x3ff2ed546bbfafcf,2 +np.float64,0x3fb444dee02889be,0x3ff00cd86273b964,2 +np.float64,0xbf73b32d7ee77,0x3ff0000000000000,2 +np.float64,0x3fae19904c3c3321,0x3ff00714865c498a,2 +np.float64,0x7fefbfaef5bf7f5d,0x7ff0000000000000,2 +np.float64,0x8000dc3816e1b871,0x3ff0000000000000,2 +np.float64,0x8003f957ba47f2b0,0x3ff0000000000000,2 +np.float64,0xbfe3563c7ea6ac79,0x3ff302dcebc92856,2 +np.float64,0xbfdc80fbae3901f8,0x3ff19cfe73e58092,2 +np.float64,0x8009223b04524476,0x3ff0000000000000,2 +np.float64,0x3fd95f431c32be86,0x3ff1461c21cb03f0,2 +np.float64,0x7ff4000000000000,0x7ffc000000000000,2 +np.float64,0xbfe7c12ed3ef825e,0x3ff49d59c265efcd,2 +np.float64,0x10000000000000,0x3ff0000000000000,2 +np.float64,0x7fc5e2632f2bc4c5,0x7ff0000000000000,2 +np.float64,0xffd8f6b4c7b1ed6a,0x7ff0000000000000,2 +np.float64,0x80034b93d4069728,0x3ff0000000000000,2 +np.float64,0xffdf5d4c1dbeba98,0x7ff0000000000000,2 +np.float64,0x800bc63d70178c7b,0x3ff0000000000000,2 +np.float64,0xbfeba31ea0f7463d,0x3ff658fa27073d2b,2 +np.float64,0xbfeebeede97d7ddc,0x3ff7f89a8e80dec4,2 +np.float64,0x7feb0f1f91361e3e,0x7ff0000000000000,2 +np.float64,0xffec3158d0b862b1,0x7ff0000000000000,2 +np.float64,0x3fde51cbfbbca398,0x3ff1d44c2ff15b3d,2 +np.float64,0xd58fb2b3ab1f7,0x3ff0000000000000,2 +np.float64,0x80028b9e32e5173d,0x3ff0000000000000,2 +np.float64,0x7fea77a56c74ef4a,0x7ff0000000000000,2 +np.float64,0x3fdaabbd4a35577b,0x3ff168d82edf2fe0,2 +np.float64,0xbfe69c39cc2d3874,0x3ff429b2f4cdb362,2 +np.float64,0x3b78f5d876f20,0x3ff0000000000000,2 +np.float64,0x7fa47d116428fa22,0x7ff0000000000000,2 +np.float64,0xbfe4118b0ce82316,0x3ff3403d989f780f,2 +np.float64,0x800482e793c905d0,0x3ff0000000000000,2 +np.float64,0xbfe48e5728e91cae,0x3ff36a9020bf9d20,2 +np.float64,0x7fe078ba8860f174,0x7ff0000000000000,2 +np.float64,0x3fd80843e5b01088,0x3ff1242f401e67da,2 +np.float64,0x3feb1f6965f63ed3,0x3ff6197fc590e143,2 +np.float64,0xffa41946d8283290,0x7ff0000000000000,2 +np.float64,0xffe30de129661bc2,0x7ff0000000000000,2 +np.float64,0x3fec9c8e1ab9391c,0x3ff6d542ea2f49b4,2 +np.float64,0x3fdc3e4490387c89,0x3ff1955ae18cac37,2 +np.float64,0xffef49d9c77e93b3,0x7ff0000000000000,2 +np.float64,0xfff0000000000000,0x7ff0000000000000,2 +np.float64,0x3fe0442455608849,0x3ff21cab90067d5c,2 +np.float64,0xbfed86aebd3b0d5e,0x3ff74ed8d4b75f50,2 +np.float64,0xffe4600d2b28c01a,0x7ff0000000000000,2 +np.float64,0x7fc1e8ccff23d199,0x7ff0000000000000,2 +np.float64,0x8008d49b0091a936,0x3ff0000000000000,2 +np.float64,0xbfe4139df028273c,0x3ff340ef3c86227c,2 +np.float64,0xbfe9ab4542b3568a,0x3ff56dfe32061247,2 +np.float64,0xbfd76dd365aedba6,0x3ff11589bab5fe71,2 +np.float64,0x3fd42cf829a859f0,0x3ff0cd3844bb0e11,2 +np.float64,0x7fd077cf2e20ef9d,0x7ff0000000000000,2 +np.float64,0x3fd7505760aea0b0,0x3ff112c937b3f088,2 +np.float64,0x1f93341a3f267,0x3ff0000000000000,2 +np.float64,0x7fe3c3c1b0678782,0x7ff0000000000000,2 +np.float64,0x800f85cec97f0b9e,0x3ff0000000000000,2 +np.float64,0xd93ab121b2756,0x3ff0000000000000,2 +np.float64,0xbfef8066fd7f00ce,0x3ff8663ed7d15189,2 +np.float64,0xffe31dd4af663ba9,0x7ff0000000000000,2 +np.float64,0xbfd7ff05a6affe0c,0x3ff1234c09bb686d,2 +np.float64,0xbfe718c31fee3186,0x3ff45a0c2d0ef7b0,2 +np.float64,0x800484bf33e9097f,0x3ff0000000000000,2 +np.float64,0xffd409dad02813b6,0x7ff0000000000000,2 +np.float64,0x3fe59679896b2cf4,0x3ff3c7f49e4fbbd3,2 +np.float64,0xbfd830c54d30618a,0x3ff1281729861390,2 +np.float64,0x1d4fc81c3a9fa,0x3ff0000000000000,2 +np.float64,0x3fd334e4272669c8,0x3ff0b9d5d82894f0,2 +np.float64,0xffc827e65c304fcc,0x7ff0000000000000,2 +np.float64,0xffe2d1814aa5a302,0x7ff0000000000000,2 +np.float64,0xffd7b5b8d32f6b72,0x7ff0000000000000,2 +np.float64,0xbfdbc9f077b793e0,0x3ff18836b9106ad0,2 +np.float64,0x7fc724c2082e4983,0x7ff0000000000000,2 +np.float64,0x3fa39ed72c273da0,0x3ff00302051ce17e,2 +np.float64,0xbfe3c4c209678984,0x3ff326c4fd16b5cd,2 +np.float64,0x7fe91f6d00f23ed9,0x7ff0000000000000,2 +np.float64,0x8004ee93fea9dd29,0x3ff0000000000000,2 +np.float64,0xbfe7c32d0eaf865a,0x3ff49e290ed2ca0e,2 +np.float64,0x800ea996b29d532d,0x3ff0000000000000,2 +np.float64,0x2df9ec1c5bf3e,0x3ff0000000000000,2 +np.float64,0xabb175df5762f,0x3ff0000000000000,2 +np.float64,0xffe3fc9c8e27f938,0x7ff0000000000000,2 +np.float64,0x7fb358a62826b14b,0x7ff0000000000000,2 +np.float64,0x800aedcccaf5db9a,0x3ff0000000000000,2 +np.float64,0xffca530c5234a618,0x7ff0000000000000,2 +np.float64,0x40f91e9681f24,0x3ff0000000000000,2 +np.float64,0x80098f4572f31e8b,0x3ff0000000000000,2 +np.float64,0xbfdc58c21fb8b184,0x3ff1986115f8fe92,2 +np.float64,0xbfebeafd40b7d5fa,0x3ff67c3cf34036e3,2 +np.float64,0x7fd108861a22110b,0x7ff0000000000000,2 +np.float64,0xff8e499ae03c9340,0x7ff0000000000000,2 +np.float64,0xbfd2f58caa25eb1a,0x3ff0b50b1bffafdf,2 +np.float64,0x3fa040c9bc208193,0x3ff002105e95aefa,2 +np.float64,0xbfd2ebc0a5a5d782,0x3ff0b44ed5a11584,2 +np.float64,0xffe237bc93a46f78,0x7ff0000000000000,2 +np.float64,0x3fd557c5eeaaaf8c,0x3ff0e5e0a575e1ba,2 +np.float64,0x7abb419ef5769,0x3ff0000000000000,2 +np.float64,0xffefa1fe353f43fb,0x7ff0000000000000,2 +np.float64,0x3fa6f80ba02df017,0x3ff0041f51fa0d76,2 +np.float64,0xbfdce79488b9cf2a,0x3ff1a8e32877beb4,2 +np.float64,0x2285f3e4450bf,0x3ff0000000000000,2 +np.float64,0x3bf7eb7277efe,0x3ff0000000000000,2 +np.float64,0xbfd5925fd3ab24c0,0x3ff0eae1c2ac2e78,2 +np.float64,0xbfed6325227ac64a,0x3ff73c14a2ad5bfe,2 +np.float64,0x8000429c02408539,0x3ff0000000000000,2 +np.float64,0xb67c21e76cf84,0x3ff0000000000000,2 +np.float64,0x3fec3d3462f87a69,0x3ff6a51e4c027eb7,2 +np.float64,0x3feae69cbcf5cd3a,0x3ff5fe9387314afd,2 +np.float64,0x7fd0c9a0ec219341,0x7ff0000000000000,2 +np.float64,0x8004adb7f6295b71,0x3ff0000000000000,2 +np.float64,0xffd61fe8bb2c3fd2,0x7ff0000000000000,2 +np.float64,0xffe7fb3834aff670,0x7ff0000000000000,2 +np.float64,0x7fd1eef163a3dde2,0x7ff0000000000000,2 +np.float64,0x2e84547a5d08b,0x3ff0000000000000,2 +np.float64,0x8002d8875ee5b10f,0x3ff0000000000000,2 +np.float64,0x3fe1d1c5f763a38c,0x3ff28ba524fb6de8,2 +np.float64,0x8001dea0bc43bd42,0x3ff0000000000000,2 +np.float64,0xfecfad91fd9f6,0x3ff0000000000000,2 +np.float64,0xffed7965fa3af2cb,0x7ff0000000000000,2 +np.float64,0xbfe6102ccc2c205a,0x3ff3f4c082506686,2 +np.float64,0x3feff75b777feeb6,0x3ff8ab6222578e0c,2 +np.float64,0x3fb8a97bd43152f8,0x3ff013057f0a9d89,2 +np.float64,0xffe234b5e964696c,0x7ff0000000000000,2 +np.float64,0x984d9137309b2,0x3ff0000000000000,2 +np.float64,0xbfe42e9230e85d24,0x3ff349fb7d1a7560,2 +np.float64,0xbfecc8b249f99165,0x3ff6ebd0fea0ea72,2 +np.float64,0x8000840910410813,0x3ff0000000000000,2 +np.float64,0xbfd81db9e7303b74,0x3ff126402d3539ec,2 +np.float64,0x800548eb7fea91d8,0x3ff0000000000000,2 +np.float64,0xbfe4679ad0e8cf36,0x3ff35d4db89296a3,2 +np.float64,0x3fd4c55b5a298ab7,0x3ff0d99da31081f9,2 +np.float64,0xbfa8f5b38c31eb60,0x3ff004de3a23b32d,2 +np.float64,0x80005d348e80ba6a,0x3ff0000000000000,2 +np.float64,0x800c348d6118691b,0x3ff0000000000000,2 +np.float64,0xffd6b88f84ad7120,0x7ff0000000000000,2 +np.float64,0x3fc1aaaa82235555,0x3ff027136afd08e0,2 +np.float64,0x7fca7d081b34fa0f,0x7ff0000000000000,2 +np.float64,0x1,0x3ff0000000000000,2 +np.float64,0xbfdc810d1139021a,0x3ff19d007408cfe3,2 +np.float64,0xbfe5dce05f2bb9c0,0x3ff3e1bb9234617b,2 +np.float64,0xffecfe2c32b9fc58,0x7ff0000000000000,2 +np.float64,0x95b2891b2b651,0x3ff0000000000000,2 +np.float64,0x8000b60c6c616c1a,0x3ff0000000000000,2 +np.float64,0x4944f0889289f,0x3ff0000000000000,2 +np.float64,0x3fe6e508696dca10,0x3ff445d1b94863e9,2 +np.float64,0xbfe63355d0ec66ac,0x3ff401e74f16d16f,2 +np.float64,0xbfe9b9595af372b3,0x3ff57445e1b4d670,2 +np.float64,0x800e16f7313c2dee,0x3ff0000000000000,2 +np.float64,0xffe898f5f0b131eb,0x7ff0000000000000,2 +np.float64,0x3fe91ac651f2358d,0x3ff52e787c21c004,2 +np.float64,0x7fbfaac6783f558c,0x7ff0000000000000,2 +np.float64,0xd8ef3dfbb1de8,0x3ff0000000000000,2 +np.float64,0xbfc58c13a52b1828,0x3ff03a2c19d65019,2 +np.float64,0xbfbde55e8a3bcac0,0x3ff01bf648a3e0a7,2 +np.float64,0xffc3034930260694,0x7ff0000000000000,2 +np.float64,0xea77a64dd4ef5,0x3ff0000000000000,2 +np.float64,0x800cfe7e7739fcfd,0x3ff0000000000000,2 +np.float64,0x4960f31a92c1f,0x3ff0000000000000,2 +np.float64,0x3fd9552c94b2aa58,0x3ff14515a29add09,2 +np.float64,0xffe8b3244c316648,0x7ff0000000000000,2 +np.float64,0x3fe8201e6a70403d,0x3ff4c444fa679cce,2 +np.float64,0xffe9ab7c20f356f8,0x7ff0000000000000,2 +np.float64,0x3fed8bba5f7b1774,0x3ff751853c4c95c5,2 +np.float64,0x8007639cb76ec73a,0x3ff0000000000000,2 +np.float64,0xbfe396db89672db7,0x3ff317bfd1d6fa8c,2 +np.float64,0xbfeb42f888f685f1,0x3ff62a7e0eee56b1,2 +np.float64,0x3fe894827c712904,0x3ff4f4f561d9ea13,2 +np.float64,0xb66b3caf6cd68,0x3ff0000000000000,2 +np.float64,0x800f8907fdbf1210,0x3ff0000000000000,2 +np.float64,0x7fe9b0cddb73619b,0x7ff0000000000000,2 +np.float64,0xbfda70c0e634e182,0x3ff1628c6fdffc53,2 +np.float64,0x3fe0b5f534a16bea,0x3ff23b4ed4c2b48e,2 +np.float64,0xbfe8eee93671ddd2,0x3ff51b85b3c50ae4,2 +np.float64,0xbfe8c22627f1844c,0x3ff50858787a3bfe,2 +np.float64,0x37bb83c86f771,0x3ff0000000000000,2 +np.float64,0xffb7827ffe2f0500,0x7ff0000000000000,2 +np.float64,0x64317940c864,0x3ff0000000000000,2 +np.float64,0x800430ecee6861db,0x3ff0000000000000,2 +np.float64,0x3fa4291fbc285240,0x3ff0032d0204f6dd,2 +np.float64,0xffec69f76af8d3ee,0x7ff0000000000000,2 +np.float64,0x3ff0000000000000,0x3ff8b07551d9f550,2 +np.float64,0x3fc4cf3c42299e79,0x3ff0363fb1d3c254,2 +np.float64,0x7fe0223a77e04474,0x7ff0000000000000,2 +np.float64,0x800a3d4fa4347aa0,0x3ff0000000000000,2 +np.float64,0x3fdd273f94ba4e7f,0x3ff1b05b686e6879,2 +np.float64,0x3feca79052f94f20,0x3ff6dadedfa283aa,2 +np.float64,0x5e7f6f80bcfef,0x3ff0000000000000,2 +np.float64,0xbfef035892fe06b1,0x3ff81efb39cbeba2,2 +np.float64,0x3fee6c08e07cd812,0x3ff7caad952860a1,2 +np.float64,0xffeda715877b4e2a,0x7ff0000000000000,2 +np.float64,0x800580286b0b0052,0x3ff0000000000000,2 +np.float64,0x800703a73fee074f,0x3ff0000000000000,2 +np.float64,0xbfccf96a6639f2d4,0x3ff0696330a60832,2 +np.float64,0x7feb408442368108,0x7ff0000000000000,2 +np.float64,0x3fedc87a46fb90f5,0x3ff771e3635649a9,2 +np.float64,0x3fd8297b773052f7,0x3ff12762bc0cea76,2 +np.float64,0x3fee41bb03fc8376,0x3ff7b37b2da48ab4,2 +np.float64,0xbfe2b05a226560b4,0x3ff2cea17ae7c528,2 +np.float64,0xbfd2e92cf2a5d25a,0x3ff0b41d605ced61,2 +np.float64,0x4817f03a902ff,0x3ff0000000000000,2 +np.float64,0x8c9d4f0d193aa,0x3ff0000000000000,2 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-exp.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-exp.csv new file mode 100644 index 0000000..7c5ef3b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-exp.csv @@ -0,0 +1,412 @@ +dtype,input,output,ulperrortol +## +ve denormals ## +np.float32,0x004b4716,0x3f800000,3 +np.float32,0x007b2490,0x3f800000,3 +np.float32,0x007c99fa,0x3f800000,3 +np.float32,0x00734a0c,0x3f800000,3 +np.float32,0x0070de24,0x3f800000,3 +np.float32,0x00495d65,0x3f800000,3 +np.float32,0x006894f6,0x3f800000,3 +np.float32,0x00555a76,0x3f800000,3 +np.float32,0x004e1fb8,0x3f800000,3 +np.float32,0x00687de9,0x3f800000,3 +## -ve denormals ## +np.float32,0x805b59af,0x3f800000,3 +np.float32,0x807ed8ed,0x3f800000,3 +np.float32,0x807142ad,0x3f800000,3 +np.float32,0x80772002,0x3f800000,3 +np.float32,0x8062abcb,0x3f800000,3 +np.float32,0x8045e31c,0x3f800000,3 +np.float32,0x805f01c2,0x3f800000,3 +np.float32,0x80506432,0x3f800000,3 +np.float32,0x8060089d,0x3f800000,3 +np.float32,0x8071292f,0x3f800000,3 +## floats that output a denormal ## +np.float32,0xc2cf3fc1,0x00000001,3 +np.float32,0xc2c79726,0x00000021,3 +np.float32,0xc2cb295d,0x00000005,3 +np.float32,0xc2b49e6b,0x00068c4c,3 +np.float32,0xc2ca8116,0x00000008,3 +np.float32,0xc2c23f82,0x000001d7,3 +np.float32,0xc2cb69c0,0x00000005,3 +np.float32,0xc2cc1f4d,0x00000003,3 +np.float32,0xc2ae094e,0x00affc4c,3 +np.float32,0xc2c86c44,0x00000015,3 +## random floats between -87.0f and 88.0f ## +np.float32,0x4030d7e0,0x417d9a05,3 +np.float32,0x426f60e8,0x6aa1be2c,3 +np.float32,0x41a1b220,0x4e0efc11,3 +np.float32,0xc20cc722,0x26159da7,3 +np.float32,0x41c492bc,0x512ec79d,3 +np.float32,0x40980210,0x42e73a0e,3 +np.float32,0xbf1f7b80,0x3f094de3,3 +np.float32,0x42a678a4,0x7b87a383,3 +np.float32,0xc20f3cfd,0x25a1c304,3 +np.float32,0x423ff34c,0x6216467f,3 +np.float32,0x00000000,0x3f800000,3 +## floats that cause an overflow ## +np.float32,0x7f06d8c1,0x7f800000,3 +np.float32,0x7f451912,0x7f800000,3 +np.float32,0x7ecceac3,0x7f800000,3 +np.float32,0x7f643b45,0x7f800000,3 +np.float32,0x7e910ea0,0x7f800000,3 +np.float32,0x7eb4756b,0x7f800000,3 +np.float32,0x7f4ec708,0x7f800000,3 +np.float32,0x7f6b4551,0x7f800000,3 +np.float32,0x7d8edbda,0x7f800000,3 +np.float32,0x7f730718,0x7f800000,3 +np.float32,0x42b17217,0x7f7fff84,3 +np.float32,0x42b17218,0x7f800000,3 +np.float32,0x42b17219,0x7f800000,3 +np.float32,0xfef2b0bc,0x00000000,3 +np.float32,0xff69f83e,0x00000000,3 +np.float32,0xff4ecb12,0x00000000,3 +np.float32,0xfeac6d86,0x00000000,3 +np.float32,0xfde0cdb8,0x00000000,3 +np.float32,0xff26aef4,0x00000000,3 +np.float32,0xff6f9277,0x00000000,3 +np.float32,0xff7adfc4,0x00000000,3 +np.float32,0xff0ad40e,0x00000000,3 +np.float32,0xff6fd8f3,0x00000000,3 +np.float32,0xc2cff1b4,0x00000001,3 +np.float32,0xc2cff1b5,0x00000000,3 +np.float32,0xc2cff1b6,0x00000000,3 +np.float32,0x7f800000,0x7f800000,3 +np.float32,0xff800000,0x00000000,3 +np.float32,0x4292f27c,0x7480000a,3 +np.float32,0x42a920be,0x7c7fff94,3 +np.float32,0x41c214c9,0x50ffffd9,3 +np.float32,0x41abe686,0x4effffd9,3 +np.float32,0x4287db5a,0x707fffd3,3 +np.float32,0x41902cbb,0x4c800078,3 +np.float32,0x42609466,0x67ffffeb,3 +np.float32,0x41a65af5,0x4e7fffd1,3 +np.float32,0x417f13ff,0x4affffc9,3 +np.float32,0x426d0e6c,0x6a3504f2,3 +np.float32,0x41bc8934,0x507fff51,3 +np.float32,0x42a7bdde,0x7c0000d6,3 +np.float32,0x4120cf66,0x46b504f6,3 +np.float32,0x4244da8f,0x62ffff1a,3 +np.float32,0x41a0cf69,0x4e000034,3 +np.float32,0x41cd2bec,0x52000005,3 +np.float32,0x42893e41,0x7100009e,3 +np.float32,0x41b437e1,0x4fb50502,3 +np.float32,0x41d8430f,0x5300001d,3 +np.float32,0x4244da92,0x62ffffda,3 +np.float32,0x41a0cf63,0x4dffffa9,3 +np.float32,0x3eb17218,0x3fb504f3,3 +np.float32,0x428729e8,0x703504dc,3 +np.float32,0x41a0cf67,0x4e000014,3 +np.float32,0x4252b77d,0x65800011,3 +np.float32,0x41902cb9,0x4c800058,3 +np.float32,0x42a0cf67,0x79800052,3 +np.float32,0x4152b77b,0x48ffffe9,3 +np.float32,0x41265af3,0x46ffffc8,3 +np.float32,0x42187e0b,0x5affff9a,3 +np.float32,0xc0d2b77c,0x3ab504f6,3 +np.float32,0xc283b2ac,0x10000072,3 +np.float32,0xc1cff1b4,0x2cb504f5,3 +np.float32,0xc05dce9e,0x3d000000,3 +np.float32,0xc28ec9d2,0x0bfffea5,3 +np.float32,0xc23c893a,0x1d7fffde,3 +np.float32,0xc2a920c0,0x027fff6c,3 +np.float32,0xc1f9886f,0x2900002b,3 +np.float32,0xc2c42920,0x000000b5,3 +np.float32,0xc2893e41,0x0dfffec5,3 +np.float32,0xc2c4da93,0x00000080,3 +np.float32,0xc17f1401,0x3400000c,3 +np.float32,0xc1902cb6,0x327fffaf,3 +np.float32,0xc27c4e3b,0x11ffffc5,3 +np.float32,0xc268e5c5,0x157ffe9d,3 +np.float32,0xc2b4e953,0x0005a826,3 +np.float32,0xc287db5a,0x0e800016,3 +np.float32,0xc207db5a,0x2700000b,3 +np.float32,0xc2b2d4fe,0x000ffff1,3 +np.float32,0xc268e5c0,0x157fffdd,3 +np.float32,0xc22920bd,0x2100003b,3 +np.float32,0xc2902caf,0x0b80011e,3 +np.float32,0xc1902cba,0x327fff2f,3 +np.float32,0xc2ca6625,0x00000008,3 +np.float32,0xc280ece8,0x10fffeb5,3 +np.float32,0xc2918f94,0x0b0000ea,3 +np.float32,0xc29b43d5,0x077ffffc,3 +np.float32,0xc1e61ff7,0x2ab504f5,3 +np.float32,0xc2867878,0x0effff15,3 +np.float32,0xc2a2324a,0x04fffff4,3 +#float64 +## near zero ## +np.float64,0x8000000000000000,0x3ff0000000000000,1 +np.float64,0x8010000000000000,0x3ff0000000000000,1 +np.float64,0x8000000000000001,0x3ff0000000000000,1 +np.float64,0x8360000000000000,0x3ff0000000000000,1 +np.float64,0x9a70000000000000,0x3ff0000000000000,1 +np.float64,0xb9b0000000000000,0x3ff0000000000000,1 +np.float64,0xb810000000000000,0x3ff0000000000000,1 +np.float64,0xbc30000000000000,0x3ff0000000000000,1 +np.float64,0xb6a0000000000000,0x3ff0000000000000,1 +np.float64,0x0000000000000000,0x3ff0000000000000,1 +np.float64,0x0010000000000000,0x3ff0000000000000,1 +np.float64,0x0000000000000001,0x3ff0000000000000,1 +np.float64,0x0360000000000000,0x3ff0000000000000,1 +np.float64,0x1a70000000000000,0x3ff0000000000000,1 +np.float64,0x3c30000000000000,0x3ff0000000000000,1 +np.float64,0x36a0000000000000,0x3ff0000000000000,1 +np.float64,0x39b0000000000000,0x3ff0000000000000,1 +np.float64,0x3810000000000000,0x3ff0000000000000,1 +## underflow ## +np.float64,0xc0c6276800000000,0x0000000000000000,1 +np.float64,0xc0c62d918ce2421d,0x0000000000000000,1 +np.float64,0xc0c62d918ce2421e,0x0000000000000000,1 +np.float64,0xc0c62d91a0000000,0x0000000000000000,1 +np.float64,0xc0c62d9180000000,0x0000000000000000,1 +np.float64,0xc0c62dea45ee3e06,0x0000000000000000,1 +np.float64,0xc0c62dea45ee3e07,0x0000000000000000,1 +np.float64,0xc0c62dea40000000,0x0000000000000000,1 +np.float64,0xc0c62dea60000000,0x0000000000000000,1 +np.float64,0xc0875f1120000000,0x0000000000000000,1 +np.float64,0xc0875f113c30b1c8,0x0000000000000000,1 +np.float64,0xc0875f1140000000,0x0000000000000000,1 +np.float64,0xc093480000000000,0x0000000000000000,1 +np.float64,0xffefffffffffffff,0x0000000000000000,1 +np.float64,0xc7efffffe0000000,0x0000000000000000,1 +## overflow ## +np.float64,0x40862e52fefa39ef,0x7ff0000000000000,1 +np.float64,0x40872e42fefa39ef,0x7ff0000000000000,1 +## +/- INF, +/- NAN ## +np.float64,0x7ff0000000000000,0x7ff0000000000000,1 +np.float64,0xfff0000000000000,0x0000000000000000,1 +np.float64,0x7ff8000000000000,0x7ff8000000000000,1 +np.float64,0xfff8000000000000,0xfff8000000000000,1 +## output denormal ## +np.float64,0xc087438520000000,0x0000000000000001,1 +np.float64,0xc08743853f2f4461,0x0000000000000001,1 +np.float64,0xc08743853f2f4460,0x0000000000000001,1 +np.float64,0xc087438540000000,0x0000000000000001,1 +## between -745.13321910 and 709.78271289 ## +np.float64,0xbff760cd14774bd9,0x3fcdb14ced00ceb6,1 +np.float64,0xbff760cd20000000,0x3fcdb14cd7993879,1 +np.float64,0xbff760cd00000000,0x3fcdb14d12fbd264,1 +np.float64,0xc07f1cf360000000,0x130c1b369af14fda,1 +np.float64,0xbeb0000000000000,0x3feffffe00001000,1 +np.float64,0xbd70000000000000,0x3fefffffffffe000,1 +np.float64,0xc084fd46e5c84952,0x0360000000000139,1 +np.float64,0xc084fd46e5c84953,0x035ffffffffffe71,1 +np.float64,0xc084fd46e0000000,0x0360000b9096d32c,1 +np.float64,0xc084fd4700000000,0x035fff9721d12104,1 +np.float64,0xc086232bc0000000,0x0010003af5e64635,1 +np.float64,0xc086232bdd7abcd2,0x001000000000007c,1 +np.float64,0xc086232bdd7abcd3,0x000ffffffffffe7c,1 +np.float64,0xc086232be0000000,0x000ffffaf57a6fc9,1 +np.float64,0xc086233920000000,0x000fe590e3b45eb0,1 +np.float64,0xc086233938000000,0x000fe56133493c57,1 +np.float64,0xc086233940000000,0x000fe5514deffbbc,1 +np.float64,0xc086234c98000000,0x000fbf1024c32ccb,1 +np.float64,0xc086234ca0000000,0x000fbf0065bae78d,1 +np.float64,0xc086234c80000000,0x000fbf3f623a7724,1 +np.float64,0xc086234ec0000000,0x000fbad237c846f9,1 +np.float64,0xc086234ec8000000,0x000fbac27cfdec97,1 +np.float64,0xc086234ee0000000,0x000fba934cfd3dc2,1 +np.float64,0xc086234ef0000000,0x000fba73d7f618d9,1 +np.float64,0xc086234f00000000,0x000fba54632dddc0,1 +np.float64,0xc0862356e0000000,0x000faae0945b761a,1 +np.float64,0xc0862356f0000000,0x000faac13eb9a310,1 +np.float64,0xc086235700000000,0x000faaa1e9567b0a,1 +np.float64,0xc086236020000000,0x000f98cd75c11ed7,1 +np.float64,0xc086236ca0000000,0x000f8081b4d93f89,1 +np.float64,0xc086236cb0000000,0x000f8062b3f4d6c5,1 +np.float64,0xc086236cc0000000,0x000f8043b34e6f8c,1 +np.float64,0xc086238d98000000,0x000f41220d9b0d2c,1 +np.float64,0xc086238da0000000,0x000f4112cc80a01f,1 +np.float64,0xc086238d80000000,0x000f414fd145db5b,1 +np.float64,0xc08624fd00000000,0x000cbfce8ea1e6c4,1 +np.float64,0xc086256080000000,0x000c250747fcd46e,1 +np.float64,0xc08626c480000000,0x000a34f4bd975193,1 +np.float64,0xbf50000000000000,0x3feff800ffeaac00,1 +np.float64,0xbe10000000000000,0x3fefffffff800000,1 +np.float64,0xbcd0000000000000,0x3feffffffffffff8,1 +np.float64,0xc055d589e0000000,0x38100004bf94f63e,1 +np.float64,0xc055d58a00000000,0x380ffff97f292ce8,1 +np.float64,0xbfd962d900000000,0x3fe585a4b00110e1,1 +np.float64,0x3ff4bed280000000,0x400d411e7a58a303,1 +np.float64,0x3fff0b3620000000,0x401bd7737ffffcf3,1 +np.float64,0x3ff0000000000000,0x4005bf0a8b145769,1 +np.float64,0x3eb0000000000000,0x3ff0000100000800,1 +np.float64,0x3d70000000000000,0x3ff0000000001000,1 +np.float64,0x40862e42e0000000,0x7fefff841808287f,1 +np.float64,0x40862e42fefa39ef,0x7fefffffffffff2a,1 +np.float64,0x40862e0000000000,0x7feef85a11e73f2d,1 +np.float64,0x4000000000000000,0x401d8e64b8d4ddae,1 +np.float64,0x4009242920000000,0x40372a52c383a488,1 +np.float64,0x4049000000000000,0x44719103e4080b45,1 +np.float64,0x4008000000000000,0x403415e5bf6fb106,1 +np.float64,0x3f50000000000000,0x3ff00400800aab55,1 +np.float64,0x3e10000000000000,0x3ff0000000400000,1 +np.float64,0x3cd0000000000000,0x3ff0000000000004,1 +np.float64,0x40562e40a0000000,0x47effed088821c3f,1 +np.float64,0x40562e42e0000000,0x47effff082e6c7ff,1 +np.float64,0x40562e4300000000,0x47f00000417184b8,1 +np.float64,0x3fe8000000000000,0x4000ef9db467dcf8,1 +np.float64,0x402b12e8d4f33589,0x412718f68c71a6fe,1 +np.float64,0x402b12e8d4f3358a,0x412718f68c71a70a,1 +np.float64,0x402b12e8c0000000,0x412718f59a7f472e,1 +np.float64,0x402b12e8e0000000,0x412718f70c0eac62,1 +##use 1th entry +np.float64,0x40631659AE147CB4,0x4db3a95025a4890f,1 +np.float64,0xC061B87D2E85A4E2,0x332640c8e2de2c51,1 +np.float64,0x405A4A50BE243AF4,0x496a45e4b7f0339a,1 +np.float64,0xC0839898B98EC5C6,0x0764027828830df4,1 +#use 2th entry +np.float64,0xC072428C44B6537C,0x2596ade838b96f3e,1 +np.float64,0xC053057C5E1AE9BF,0x3912c8fad18fdadf,1 +np.float64,0x407E89C78328BAA3,0x6bfe35d5b9a1a194,1 +np.float64,0x4083501B6DD87112,0x77a855503a38924e,1 +#use 3th entry +np.float64,0x40832C6195F24540,0x7741e73c80e5eb2f,1 +np.float64,0xC083D4CD557C2EC9,0x06b61727c2d2508e,1 +np.float64,0x400C48F5F67C99BD,0x404128820f02b92e,1 +np.float64,0x4056E36D9B2DF26A,0x4830f52ff34a8242,1 +#use 4th entry +np.float64,0x4080FF700D8CBD06,0x70fa70df9bc30f20,1 +np.float64,0x406C276D39E53328,0x543eb8e20a8f4741,1 +np.float64,0xC070D6159BBD8716,0x27a4a0548c904a75,1 +np.float64,0xC052EBCF8ED61F83,0x391c0e92368d15e4,1 +#use 5th entry +np.float64,0xC061F892A8AC5FBE,0x32f807a89efd3869,1 +np.float64,0x4021D885D2DBA085,0x40bd4dc86d3e3270,1 +np.float64,0x40767AEEEE7D4FCF,0x605e22851ee2afb7,1 +np.float64,0xC0757C5D75D08C80,0x20f0751599b992a2,1 +#use 6th entry +np.float64,0x405ACF7A284C4CE3,0x499a4e0b7a27027c,1 +np.float64,0xC085A6C9E80D7AF5,0x0175914009d62ec2,1 +np.float64,0xC07E4C02F86F1DAE,0x1439269b29a9231e,1 +np.float64,0x4080D80F9691CC87,0x7088a6cdafb041de,1 +#use 7th entry +np.float64,0x407FDFD84FBA0AC1,0x6deb1ae6f9bc4767,1 +np.float64,0x40630C06A1A2213D,0x4dac7a9d51a838b7,1 +np.float64,0x40685FDB30BB8B4F,0x5183f5cc2cac9e79,1 +np.float64,0x408045A2208F77F4,0x6ee299e08e2aa2f0,1 +#use 8th entry +np.float64,0xC08104E391F5078B,0x0ed397b7cbfbd230,1 +np.float64,0xC031501CAEFAE395,0x3e6040fd1ea35085,1 +np.float64,0xC079229124F6247C,0x1babf4f923306b1e,1 +np.float64,0x407FB65F44600435,0x6db03beaf2512b8a,1 +#use 9th entry +np.float64,0xC07EDEE8E8E8A5AC,0x136536cec9cbef48,1 +np.float64,0x4072BB4086099A14,0x5af4d3c3008b56cc,1 +np.float64,0x4050442A2EC42CB4,0x45cd393bd8fad357,1 +np.float64,0xC06AC28FB3D419B4,0x2ca1b9d3437df85f,1 +#use 10th entry +np.float64,0x40567FC6F0A68076,0x480c977fd5f3122e,1 +np.float64,0x40620A2F7EDA59BB,0x4cf278e96f4ce4d7,1 +np.float64,0xC085044707CD557C,0x034aad6c968a045a,1 +np.float64,0xC07374EA5AC516AA,0x23dd6afdc03e83d5,1 +#use 11th entry +np.float64,0x4073CC95332619C1,0x5c804b1498bbaa54,1 +np.float64,0xC0799FEBBE257F31,0x1af6a954c43b87d2,1 +np.float64,0x408159F19EA424F6,0x7200858efcbfc84d,1 +np.float64,0x404A81F6F24C0792,0x44b664a07ce5bbfa,1 +#use 12th entry +np.float64,0x40295FF1EFB9A741,0x4113c0e74c52d7b0,1 +np.float64,0x4073975F4CC411DA,0x5c32be40b4fec2c1,1 +np.float64,0x406E9DE52E82A77E,0x56049c9a3f1ae089,1 +np.float64,0x40748C2F52560ED9,0x5d93bc14fd4cd23b,1 +#use 13th entry +np.float64,0x4062A553CDC4D04C,0x4d6266bfde301318,1 +np.float64,0xC079EC1D63598AB7,0x1a88cb184dab224c,1 +np.float64,0xC0725C1CB3167427,0x25725b46f8a081f6,1 +np.float64,0x407888771D9B45F9,0x6353b1ec6bd7ce80,1 +#use 14th entry +np.float64,0xC082CBA03AA89807,0x09b383723831ce56,1 +np.float64,0xC083A8961BB67DD7,0x0735b118d5275552,1 +np.float64,0xC076BC6ECA12E7E3,0x1f2222679eaef615,1 +np.float64,0xC072752503AA1A5B,0x254eb832242c77e1,1 +#use 15th entry +np.float64,0xC058800792125DEC,0x371882372a0b48d4,1 +np.float64,0x4082909FD863E81C,0x7580d5f386920142,1 +np.float64,0xC071616F8FB534F9,0x26dbe20ef64a412b,1 +np.float64,0x406D1AB571CAA747,0x54ee0d55cb38ac20,1 +#use 16th entry +np.float64,0x406956428B7DAD09,0x52358682c271237f,1 +np.float64,0xC07EFC2D9D17B621,0x133b3e77c27a4d45,1 +np.float64,0xC08469BAC5BA3CCA,0x050863e5f42cc52f,1 +np.float64,0x407189D9626386A5,0x593cb1c0b3b5c1d3,1 +#use 17th entry +np.float64,0x4077E652E3DEB8C6,0x6269a10dcbd3c752,1 +np.float64,0x407674C97DB06878,0x605485dcc2426ec2,1 +np.float64,0xC07CE9969CF4268D,0x16386cf8996669f2,1 +np.float64,0x40780EE32D5847C4,0x62a436bd1abe108d,1 +#use 18th entry +np.float64,0x4076C3AA5E1E8DA1,0x60c62f56a5e72e24,1 +np.float64,0xC0730AFC7239B9BE,0x24758ead095cec1e,1 +np.float64,0xC085CC2B9C420DDB,0x0109cdaa2e5694c1,1 +np.float64,0x406D0765CB6D7AA4,0x54e06f8dd91bd945,1 +#use 19th entry +np.float64,0xC082D011F3B495E7,0x09a6647661d279c2,1 +np.float64,0xC072826AF8F6AFBC,0x253acd3cd224507e,1 +np.float64,0x404EB9C4810CEA09,0x457933dbf07e8133,1 +np.float64,0x408284FBC97C58CE,0x755f6eb234aa4b98,1 +#use 20th entry +np.float64,0x40856008CF6EDC63,0x7d9c0b3c03f4f73c,1 +np.float64,0xC077CB2E9F013B17,0x1d9b3d3a166a55db,1 +np.float64,0xC0479CA3C20AD057,0x3bad40e081555b99,1 +np.float64,0x40844CD31107332A,0x7a821d70aea478e2,1 +#use 21th entry +np.float64,0xC07C8FCC0BFCC844,0x16ba1cc8c539d19b,1 +np.float64,0xC085C4E9A3ABA488,0x011ff675ba1a2217,1 +np.float64,0x4074D538B32966E5,0x5dfd9d78043c6ad9,1 +np.float64,0xC0630CA16902AD46,0x3231a446074cede6,1 +#use 22th entry +np.float64,0xC06C826733D7D0B7,0x2b5f1078314d41e1,1 +np.float64,0xC0520DF55B2B907F,0x396c13a6ce8e833e,1 +np.float64,0xC080712072B0F437,0x107eae02d11d98ea,1 +np.float64,0x40528A6150E19EFB,0x469fdabda02228c5,1 +#use 23th entry +np.float64,0xC07B1D74B6586451,0x18d1253883ae3b48,1 +np.float64,0x4045AFD7867DAEC0,0x43d7d634fc4c5d98,1 +np.float64,0xC07A08B91F9ED3E2,0x1a60973e6397fc37,1 +np.float64,0x407B3ECF0AE21C8C,0x673e03e9d98d7235,1 +#use 24th entry +np.float64,0xC078AEB6F30CEABF,0x1c530b93ab54a1b3,1 +np.float64,0x4084495006A41672,0x7a775b6dc7e63064,1 +np.float64,0x40830B1C0EBF95DD,0x76e1e6eed77cfb89,1 +np.float64,0x407D93E8F33D8470,0x6a9adbc9e1e4f1e5,1 +#use 25th entry +np.float64,0x4066B11A09EFD9E8,0x504dd528065c28a7,1 +np.float64,0x408545823723AEEB,0x7d504a9b1844f594,1 +np.float64,0xC068C711F2CA3362,0x2e104f3496ea118e,1 +np.float64,0x407F317FCC3CA873,0x6cf0732c9948ebf4,1 +#use 26th entry +np.float64,0x407AFB3EBA2ED50F,0x66dc28a129c868d5,1 +np.float64,0xC075377037708ADE,0x21531a329f3d793e,1 +np.float64,0xC07C30066A1F3246,0x174448baa16ded2b,1 +np.float64,0xC06689A75DE2ABD3,0x2fad70662fae230b,1 +#use 27th entry +np.float64,0x4081514E9FCCF1E0,0x71e673b9efd15f44,1 +np.float64,0xC0762C710AF68460,0x1ff1ed7d8947fe43,1 +np.float64,0xC0468102FF70D9C4,0x3be0c3a8ff3419a3,1 +np.float64,0xC07EA4CEEF02A83E,0x13b908f085102c61,1 +#use 28th entry +np.float64,0xC06290B04AE823C4,0x328a83da3c2e3351,1 +np.float64,0xC0770EB1D1C395FB,0x1eab281c1f1db5fe,1 +np.float64,0xC06F5D4D838A5BAE,0x29500ea32fb474ea,1 +np.float64,0x40723B3133B54C5D,0x5a3c82c7c3a2b848,1 +#use 29th entry +np.float64,0x4085E6454CE3B4AA,0x7f20319b9638d06a,1 +np.float64,0x408389F2A0585D4B,0x7850667c58aab3d0,1 +np.float64,0xC0382798F9C8AE69,0x3dc1c79fe8739d6d,1 +np.float64,0xC08299D827608418,0x0a4335f76cdbaeb5,1 +#use 30th entry +np.float64,0xC06F3DED43301BF1,0x2965670ae46750a8,1 +np.float64,0xC070CAF6BDD577D9,0x27b4aa4ffdd29981,1 +np.float64,0x4078529AD4B2D9F2,0x6305c12755d5e0a6,1 +np.float64,0xC055B14E75A31B96,0x381c2eda6d111e5d,1 +#use 31th entry +np.float64,0x407B13EE414FA931,0x6700772c7544564d,1 +np.float64,0x407EAFDE9DE3EC54,0x6c346a0e49724a3c,1 +np.float64,0xC08362F398B9530D,0x07ffeddbadf980cb,1 +np.float64,0x407E865CDD9EEB86,0x6bf866cac5e0d126,1 +#use 32th entry +np.float64,0x407FB62DBC794C86,0x6db009f708ac62cb,1 +np.float64,0xC063D0BAA68CDDDE,0x31a3b2a51ce50430,1 +np.float64,0xC05E7706A2231394,0x34f24bead6fab5c9,1 +np.float64,0x4083E3A06FDE444E,0x79527b7a386d1937,1 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-exp2.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-exp2.csv new file mode 100644 index 0000000..e19e9eb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-exp2.csv @@ -0,0 +1,1429 @@ +dtype,input,output,ulperrortol +np.float32,0xbdfe94b0,0x3f6adda6,2 +np.float32,0x3f20f8f8,0x3fc5ec69,2 +np.float32,0x7040b5,0x3f800000,2 +np.float32,0x30ec5,0x3f800000,2 +np.float32,0x3eb63070,0x3fa3ce29,2 +np.float32,0xff4dda3d,0x0,2 +np.float32,0x805b832f,0x3f800000,2 +np.float32,0x3e883fb7,0x3f99ed8c,2 +np.float32,0x3f14d71f,0x3fbf8708,2 +np.float32,0xff7b1e55,0x0,2 +np.float32,0xbf691ac6,0x3f082fa2,2 +np.float32,0x7ee3e6ab,0x7f800000,2 +np.float32,0xbec6e2b4,0x3f439248,2 +np.float32,0xbf5f5ec2,0x3f0bd2c0,2 +np.float32,0x8025cc2c,0x3f800000,2 +np.float32,0x7e0d7672,0x7f800000,2 +np.float32,0xff4bbc5c,0x0,2 +np.float32,0xbd94fb30,0x3f73696b,2 +np.float32,0x6cc079,0x3f800000,2 +np.float32,0x803cf080,0x3f800000,2 +np.float32,0x71d418,0x3f800000,2 +np.float32,0xbf24a442,0x3f23ec1e,2 +np.float32,0xbe6c9510,0x3f5a1e1d,2 +np.float32,0xbe8fb284,0x3f52be38,2 +np.float32,0x7ea64754,0x7f800000,2 +np.float32,0x7fc00000,0x7fc00000,2 +np.float32,0x80620cfd,0x3f800000,2 +np.float32,0x3f3e20e8,0x3fd62e72,2 +np.float32,0x3f384600,0x3fd2d00e,2 +np.float32,0xff362150,0x0,2 +np.float32,0xbf349fa8,0x3f1cfaef,2 +np.float32,0xbf776cf2,0x3f0301a6,2 +np.float32,0x8021fc60,0x3f800000,2 +np.float32,0xbdb75280,0x3f70995c,2 +np.float32,0x7e9363a6,0x7f800000,2 +np.float32,0x7e728422,0x7f800000,2 +np.float32,0xfe91edc2,0x0,2 +np.float32,0x3f5f438c,0x3fea491d,2 +np.float32,0x3f2afae9,0x3fcb5c1f,2 +np.float32,0xbef8e766,0x3f36c448,2 +np.float32,0xba522c00,0x3f7fdb97,2 +np.float32,0xff18ee8c,0x0,2 +np.float32,0xbee8c5f4,0x3f3acd44,2 +np.float32,0x3e790448,0x3f97802c,2 +np.float32,0x3e8c9541,0x3f9ad571,2 +np.float32,0xbf03fa9f,0x3f331460,2 +np.float32,0x801ee053,0x3f800000,2 +np.float32,0xbf773230,0x3f03167f,2 +np.float32,0x356fd9,0x3f800000,2 +np.float32,0x8009cd88,0x3f800000,2 +np.float32,0x7f2bac51,0x7f800000,2 +np.float32,0x4d9eeb,0x3f800000,2 +np.float32,0x3133,0x3f800000,2 +np.float32,0x7f4290e0,0x7f800000,2 +np.float32,0xbf5e6523,0x3f0c3161,2 +np.float32,0x3f19182e,0x3fc1bf10,2 +np.float32,0x7e1248bb,0x7f800000,2 +np.float32,0xff5f7aae,0x0,2 +np.float32,0x7e8557b5,0x7f800000,2 +np.float32,0x26fc7f,0x3f800000,2 +np.float32,0x80397d61,0x3f800000,2 +np.float32,0x3cb1825d,0x3f81efe0,2 +np.float32,0x3ed808d0,0x3fab7c45,2 +np.float32,0xbf6f668a,0x3f05e259,2 +np.float32,0x3e3c7802,0x3f916abd,2 +np.float32,0xbd5ac5a0,0x3f76b21b,2 +np.float32,0x805aa6c9,0x3f800000,2 +np.float32,0xbe4d6f68,0x3f5ec3e1,2 +np.float32,0x3f3108b2,0x3fceb87f,2 +np.float32,0x3ec385cc,0x3fa6c9fb,2 +np.float32,0xbe9fc1ce,0x3f4e35e8,2 +np.float32,0x43b68,0x3f800000,2 +np.float32,0x3ef0cdcc,0x3fb15557,2 +np.float32,0x3e3f729b,0x3f91b5e1,2 +np.float32,0x7f52a4df,0x7f800000,2 +np.float32,0xbf56da96,0x3f0f15b9,2 +np.float32,0xbf161d2b,0x3f2a7faf,2 +np.float32,0x3e8df763,0x3f9b1fbe,2 +np.float32,0xff4f0780,0x0,2 +np.float32,0x8048f594,0x3f800000,2 +np.float32,0x3e62bb1d,0x3f953b7e,2 +np.float32,0xfe58e764,0x0,2 +np.float32,0x3dd2c922,0x3f897718,2 +np.float32,0x7fa00000,0x7fe00000,2 +np.float32,0xff07b4b2,0x0,2 +np.float32,0x7f6231a0,0x7f800000,2 +np.float32,0xb8d1d,0x3f800000,2 +np.float32,0x3ee01d24,0x3fad5f16,2 +np.float32,0xbf43f59f,0x3f169869,2 +np.float32,0x801f5257,0x3f800000,2 +np.float32,0x803c15d8,0x3f800000,2 +np.float32,0x3f171a08,0x3fc0b42a,2 +np.float32,0x127aef,0x3f800000,2 +np.float32,0xfd1c6,0x3f800000,2 +np.float32,0x3f1ed13e,0x3fc4c59a,2 +np.float32,0x57fd4f,0x3f800000,2 +np.float32,0x6e8c61,0x3f800000,2 +np.float32,0x804019ab,0x3f800000,2 +np.float32,0x3ef4e5c6,0x3fb251a1,2 +np.float32,0x5044c3,0x3f800000,2 +np.float32,0x3f04460f,0x3fb7204b,2 +np.float32,0x7e326b47,0x7f800000,2 +np.float32,0x800a7e4c,0x3f800000,2 +np.float32,0xbf47ec82,0x3f14fccc,2 +np.float32,0xbedb1b3e,0x3f3e4a4d,2 +np.float32,0x3f741d86,0x3ff7e4b0,2 +np.float32,0xbe249d20,0x3f6501a6,2 +np.float32,0xbf2ea152,0x3f1f8c68,2 +np.float32,0x3ec6dbcc,0x3fa78b3f,2 +np.float32,0x7ebd9bb4,0x7f800000,2 +np.float32,0x3f61b574,0x3febd77a,2 +np.float32,0x3f3dfb2b,0x3fd61891,2 +np.float32,0x3c7d95,0x3f800000,2 +np.float32,0x8071e840,0x3f800000,2 +np.float32,0x15c6fe,0x3f800000,2 +np.float32,0xbf096601,0x3f307893,2 +np.float32,0x7f5c2ef9,0x7f800000,2 +np.float32,0xbe79f750,0x3f582689,2 +np.float32,0x1eb692,0x3f800000,2 +np.float32,0xbd8024f0,0x3f75226d,2 +np.float32,0xbf5a8be8,0x3f0da950,2 +np.float32,0xbf4d28f3,0x3f12e3e1,2 +np.float32,0x7f800000,0x7f800000,2 +np.float32,0xfea8a758,0x0,2 +np.float32,0x8075d2cf,0x3f800000,2 +np.float32,0xfd99af58,0x0,2 +np.float32,0x9e6a,0x3f800000,2 +np.float32,0x2fa19f,0x3f800000,2 +np.float32,0x3e9f4206,0x3f9ecc56,2 +np.float32,0xbee0b666,0x3f3cd9fc,2 +np.float32,0xbec558c4,0x3f43fab1,2 +np.float32,0x7e9a77df,0x7f800000,2 +np.float32,0xff3a9694,0x0,2 +np.float32,0x3f3b3708,0x3fd47f9a,2 +np.float32,0x807cd6d4,0x3f800000,2 +np.float32,0x804aa422,0x3f800000,2 +np.float32,0xfead7a70,0x0,2 +np.float32,0x3f08c610,0x3fb95efe,2 +np.float32,0xff390126,0x0,2 +np.float32,0x5d2d47,0x3f800000,2 +np.float32,0x8006849c,0x3f800000,2 +np.float32,0x654f6e,0x3f800000,2 +np.float32,0xff478a16,0x0,2 +np.float32,0x3f480b0c,0x3fdc024c,2 +np.float32,0xbc3b96c0,0x3f7df9f4,2 +np.float32,0xbcc96460,0x3f7bacb5,2 +np.float32,0x7f349f30,0x7f800000,2 +np.float32,0xbe08fa98,0x3f6954a1,2 +np.float32,0x4f3a13,0x3f800000,2 +np.float32,0x7f6a5ab4,0x7f800000,2 +np.float32,0x7eb85247,0x7f800000,2 +np.float32,0xbf287246,0x3f223e08,2 +np.float32,0x801584d0,0x3f800000,2 +np.float32,0x7ec25371,0x7f800000,2 +np.float32,0x3f002165,0x3fb51552,2 +np.float32,0x3e1108a8,0x3f8d3429,2 +np.float32,0x4f0f88,0x3f800000,2 +np.float32,0x7f67c1ce,0x7f800000,2 +np.float32,0xbf4348f8,0x3f16dedf,2 +np.float32,0xbe292b64,0x3f644d24,2 +np.float32,0xbf2bfa36,0x3f20b2d6,2 +np.float32,0xbf2a6e58,0x3f215f71,2 +np.float32,0x3e97d5d3,0x3f9d35df,2 +np.float32,0x31f597,0x3f800000,2 +np.float32,0x100544,0x3f800000,2 +np.float32,0x10a197,0x3f800000,2 +np.float32,0x3f44df50,0x3fda20d2,2 +np.float32,0x59916d,0x3f800000,2 +np.float32,0x707472,0x3f800000,2 +np.float32,0x8054194e,0x3f800000,2 +np.float32,0x80627b01,0x3f800000,2 +np.float32,0x7f4d5a5b,0x7f800000,2 +np.float32,0xbcecad00,0x3f7aeca5,2 +np.float32,0xff69c541,0x0,2 +np.float32,0xbe164e20,0x3f673c3a,2 +np.float32,0x3dd321de,0x3f897b39,2 +np.float32,0x3c9c4900,0x3f81b431,2 +np.float32,0x7f0efae3,0x7f800000,2 +np.float32,0xbf1b3ee6,0x3f282567,2 +np.float32,0x3ee858ac,0x3faf5083,2 +np.float32,0x3f0e6a39,0x3fbc3965,2 +np.float32,0x7f0c06d8,0x7f800000,2 +np.float32,0x801dd236,0x3f800000,2 +np.float32,0x564245,0x3f800000,2 +np.float32,0x7e99d3ad,0x7f800000,2 +np.float32,0xff3b0164,0x0,2 +np.float32,0x3f386f18,0x3fd2e785,2 +np.float32,0x7f603c39,0x7f800000,2 +np.float32,0x3cbd9b00,0x3f8211f0,2 +np.float32,0x2178e2,0x3f800000,2 +np.float32,0x5db226,0x3f800000,2 +np.float32,0xfec78d62,0x0,2 +np.float32,0x7f40bc1e,0x7f800000,2 +np.float32,0x80325064,0x3f800000,2 +np.float32,0x3f6068dc,0x3feb0377,2 +np.float32,0xfe8b95c6,0x0,2 +np.float32,0xbe496894,0x3f5f5f87,2 +np.float32,0xbf18722a,0x3f296cf4,2 +np.float32,0x332d0e,0x3f800000,2 +np.float32,0x3f6329dc,0x3fecc5c0,2 +np.float32,0x807d1802,0x3f800000,2 +np.float32,0x3e8afcee,0x3f9a7ff1,2 +np.float32,0x26a0a7,0x3f800000,2 +np.float32,0x7f13085d,0x7f800000,2 +np.float32,0x68d547,0x3f800000,2 +np.float32,0x7e9b04ae,0x7f800000,2 +np.float32,0x3f3ecdfe,0x3fd692ea,2 +np.float32,0x805256f4,0x3f800000,2 +np.float32,0x3f312dc8,0x3fcecd42,2 +np.float32,0x23ca15,0x3f800000,2 +np.float32,0x3f53c455,0x3fe31ad6,2 +np.float32,0xbf21186c,0x3f2580fd,2 +np.float32,0x803b9bb1,0x3f800000,2 +np.float32,0xff6ae1fc,0x0,2 +np.float32,0x2103cf,0x3f800000,2 +np.float32,0xbedcec6c,0x3f3dd29d,2 +np.float32,0x7f520afa,0x7f800000,2 +np.float32,0x7e8b44f2,0x7f800000,2 +np.float32,0xfef7f6ce,0x0,2 +np.float32,0xbd5e7c30,0x3f768a6f,2 +np.float32,0xfeb36848,0x0,2 +np.float32,0xff49effb,0x0,2 +np.float32,0xbec207c0,0x3f44dc74,2 +np.float32,0x3e91147f,0x3f9bc77f,2 +np.float32,0xfe784cd4,0x0,2 +np.float32,0xfd1a7250,0x0,2 +np.float32,0xff3b3f48,0x0,2 +np.float32,0x3f685db5,0x3ff0219f,2 +np.float32,0x3f370976,0x3fd21bae,2 +np.float32,0xfed4cc20,0x0,2 +np.float32,0xbf41e337,0x3f17714a,2 +np.float32,0xbf4e8638,0x3f12593a,2 +np.float32,0x3edaf0f1,0x3fac295e,2 +np.float32,0x803cbb4f,0x3f800000,2 +np.float32,0x7f492043,0x7f800000,2 +np.float32,0x2cabcf,0x3f800000,2 +np.float32,0x17f8ac,0x3f800000,2 +np.float32,0x3e846478,0x3f99205a,2 +np.float32,0x76948f,0x3f800000,2 +np.float32,0x1,0x3f800000,2 +np.float32,0x7ea6419e,0x7f800000,2 +np.float32,0xa5315,0x3f800000,2 +np.float32,0xff3a8e32,0x0,2 +np.float32,0xbe5714e8,0x3f5d50b7,2 +np.float32,0xfeadf960,0x0,2 +np.float32,0x3ebbd1a8,0x3fa50efc,2 +np.float32,0x7f31dce7,0x7f800000,2 +np.float32,0x80314999,0x3f800000,2 +np.float32,0x8017f41b,0x3f800000,2 +np.float32,0x7ed6d051,0x7f800000,2 +np.float32,0x7f525688,0x7f800000,2 +np.float32,0x7f7fffff,0x7f800000,2 +np.float32,0x3e8b0461,0x3f9a8180,2 +np.float32,0x3d9fe46e,0x3f871e1f,2 +np.float32,0x5e6d8f,0x3f800000,2 +np.float32,0xbf09ae55,0x3f305608,2 +np.float32,0xfe7028c4,0x0,2 +np.float32,0x7f3ade56,0x7f800000,2 +np.float32,0xff4c9ef9,0x0,2 +np.float32,0x7e3199cf,0x7f800000,2 +np.float32,0x8048652f,0x3f800000,2 +np.float32,0x805e1237,0x3f800000,2 +np.float32,0x189ed8,0x3f800000,2 +np.float32,0xbea7c094,0x3f4bfd98,2 +np.float32,0xbf2f109c,0x3f1f5c5c,2 +np.float32,0xbf0e7f4c,0x3f2e0d2c,2 +np.float32,0x8005981f,0x3f800000,2 +np.float32,0xbf762005,0x3f0377f3,2 +np.float32,0xbf0f60ab,0x3f2da317,2 +np.float32,0xbf4aa3e7,0x3f13e54e,2 +np.float32,0xbf348fd2,0x3f1d01aa,2 +np.float32,0x3e530b50,0x3f93a7fb,2 +np.float32,0xbf0b05a4,0x3f2fb26a,2 +np.float32,0x3eea416c,0x3fafc4aa,2 +np.float32,0x805ad04d,0x3f800000,2 +np.float32,0xbf6328d8,0x3f0a655e,2 +np.float32,0x3f7347b9,0x3ff75558,2 +np.float32,0xfda3ca68,0x0,2 +np.float32,0x80497d21,0x3f800000,2 +np.float32,0x3e740452,0x3f96fd22,2 +np.float32,0x3e528e57,0x3f939b7e,2 +np.float32,0x3e9e19fa,0x3f9e8cbd,2 +np.float32,0x8078060b,0x3f800000,2 +np.float32,0x3f3fea7a,0x3fd73872,2 +np.float32,0xfcfa30a0,0x0,2 +np.float32,0x7f4eb4bf,0x7f800000,2 +np.float32,0x3f712618,0x3ff5e900,2 +np.float32,0xbf668f0e,0x3f0920c6,2 +np.float32,0x3f3001e9,0x3fce259d,2 +np.float32,0xbe9b6fac,0x3f4f6b9c,2 +np.float32,0xbf61fcf3,0x3f0ad5ec,2 +np.float32,0xff08a55c,0x0,2 +np.float32,0x3e805014,0x3f984872,2 +np.float32,0x6ce04c,0x3f800000,2 +np.float32,0x7f7cbc07,0x7f800000,2 +np.float32,0x3c87dc,0x3f800000,2 +np.float32,0x3f2ee498,0x3fcd869a,2 +np.float32,0x4b1116,0x3f800000,2 +np.float32,0x3d382d06,0x3f840d5f,2 +np.float32,0xff7de21e,0x0,2 +np.float32,0x3f2f1d6d,0x3fcda63c,2 +np.float32,0xbf1c1618,0x3f27c38a,2 +np.float32,0xff4264b1,0x0,2 +np.float32,0x8026e5e7,0x3f800000,2 +np.float32,0xbe6fa180,0x3f59ab02,2 +np.float32,0xbe923c02,0x3f52053b,2 +np.float32,0xff3aa453,0x0,2 +np.float32,0x3f77a7ac,0x3ffa47d0,2 +np.float32,0xbed15f36,0x3f40d08a,2 +np.float32,0xa62d,0x3f800000,2 +np.float32,0xbf342038,0x3f1d3123,2 +np.float32,0x7f2f7f80,0x7f800000,2 +np.float32,0x7f2b6fc1,0x7f800000,2 +np.float32,0xff323540,0x0,2 +np.float32,0x3f1a2b6e,0x3fc24faa,2 +np.float32,0x800cc1d2,0x3f800000,2 +np.float32,0xff38fa01,0x0,2 +np.float32,0x80800000,0x3f800000,2 +np.float32,0xbf3d22e0,0x3f196745,2 +np.float32,0x7f40fd62,0x7f800000,2 +np.float32,0x7e1785c7,0x7f800000,2 +np.float32,0x807408c4,0x3f800000,2 +np.float32,0xbf300192,0x3f1ef485,2 +np.float32,0x351e3d,0x3f800000,2 +np.float32,0x7f5ab736,0x7f800000,2 +np.float32,0x2f1696,0x3f800000,2 +np.float32,0x806ac5d7,0x3f800000,2 +np.float32,0x42ec59,0x3f800000,2 +np.float32,0x7f79f52d,0x7f800000,2 +np.float32,0x44ad28,0x3f800000,2 +np.float32,0xbf49dc9c,0x3f143532,2 +np.float32,0x3f6c1f1f,0x3ff295e7,2 +np.float32,0x1589b3,0x3f800000,2 +np.float32,0x3f49b44e,0x3fdd0031,2 +np.float32,0x7f5942c9,0x7f800000,2 +np.float32,0x3f2dab28,0x3fccd877,2 +np.float32,0xff7fffff,0x0,2 +np.float32,0x80578eb2,0x3f800000,2 +np.float32,0x3f39ba67,0x3fd3a50b,2 +np.float32,0x8020340d,0x3f800000,2 +np.float32,0xbf6025b2,0x3f0b8783,2 +np.float32,0x8015ccfe,0x3f800000,2 +np.float32,0x3f6b9762,0x3ff23cd0,2 +np.float32,0xfeeb0c86,0x0,2 +np.float32,0x802779bc,0x3f800000,2 +np.float32,0xbf32bf64,0x3f1dc796,2 +np.float32,0xbf577eb6,0x3f0ed631,2 +np.float32,0x0,0x3f800000,2 +np.float32,0xfe99de6c,0x0,2 +np.float32,0x7a4e53,0x3f800000,2 +np.float32,0x1a15d3,0x3f800000,2 +np.float32,0x8035fe16,0x3f800000,2 +np.float32,0x3e845784,0x3f991dab,2 +np.float32,0x43d688,0x3f800000,2 +np.float32,0xbd447cc0,0x3f77a0b7,2 +np.float32,0x3f83fa,0x3f800000,2 +np.float32,0x3f141df2,0x3fbf2719,2 +np.float32,0x805c586a,0x3f800000,2 +np.float32,0x14c47e,0x3f800000,2 +np.float32,0x3d3bed00,0x3f8422d4,2 +np.float32,0x7f6f4ecd,0x7f800000,2 +np.float32,0x3f0a5e5a,0x3fba2c5c,2 +np.float32,0x523ecf,0x3f800000,2 +np.float32,0xbef4a6e8,0x3f37d262,2 +np.float32,0xff54eb58,0x0,2 +np.float32,0xff3fc875,0x0,2 +np.float32,0x8067c392,0x3f800000,2 +np.float32,0xfedae910,0x0,2 +np.float32,0x80595979,0x3f800000,2 +np.float32,0x3ee87d1d,0x3faf5929,2 +np.float32,0x7f5bad33,0x7f800000,2 +np.float32,0xbf45b868,0x3f15e109,2 +np.float32,0x3ef2277d,0x3fb1a868,2 +np.float32,0x3ca5a950,0x3f81ce8c,2 +np.float32,0x3e70f4e6,0x3f96ad25,2 +np.float32,0xfe3515bc,0x0,2 +np.float32,0xfe4af088,0x0,2 +np.float32,0xff3c78b2,0x0,2 +np.float32,0x7f50f51a,0x7f800000,2 +np.float32,0x3e3a232a,0x3f913009,2 +np.float32,0x7dfec6ff,0x7f800000,2 +np.float32,0x3e1bbaec,0x3f8e3ad6,2 +np.float32,0xbd658fa0,0x3f763ee7,2 +np.float32,0xfe958684,0x0,2 +np.float32,0x503670,0x3f800000,2 +np.float32,0x3f800000,0x40000000,2 +np.float32,0x1bbec6,0x3f800000,2 +np.float32,0xbea7bb7c,0x3f4bff00,2 +np.float32,0xff3a24a2,0x0,2 +np.float32,0xbf416240,0x3f17a635,2 +np.float32,0xbf800000,0x3f000000,2 +np.float32,0xff0c965c,0x0,2 +np.float32,0x80000000,0x3f800000,2 +np.float32,0xbec2c69a,0x3f44a99e,2 +np.float32,0x5b68d4,0x3f800000,2 +np.float32,0xb9a93000,0x3f7ff158,2 +np.float32,0x3d5a0dd8,0x3f84cfbc,2 +np.float32,0xbeaf7a28,0x3f49de4e,2 +np.float32,0x3ee83555,0x3faf4820,2 +np.float32,0xfd320330,0x0,2 +np.float32,0xe1af2,0x3f800000,2 +np.float32,0x7cf28caf,0x7f800000,2 +np.float32,0x80781009,0x3f800000,2 +np.float32,0xbf1e0baf,0x3f26e04d,2 +np.float32,0x7edb05b1,0x7f800000,2 +np.float32,0x3de004,0x3f800000,2 +np.float32,0xff436af6,0x0,2 +np.float32,0x802a9408,0x3f800000,2 +np.float32,0x7ed82205,0x7f800000,2 +np.float32,0x3e3f8212,0x3f91b767,2 +np.float32,0x16a2b2,0x3f800000,2 +np.float32,0xff1e5af3,0x0,2 +np.float32,0xbf1c860c,0x3f2790b7,2 +np.float32,0x3f3bc5da,0x3fd4d1d6,2 +np.float32,0x7f5f7085,0x7f800000,2 +np.float32,0x7f68e409,0x7f800000,2 +np.float32,0x7f4b3388,0x7f800000,2 +np.float32,0x7ecaf440,0x7f800000,2 +np.float32,0x80078785,0x3f800000,2 +np.float32,0x3ebd800d,0x3fa56f45,2 +np.float32,0xbe39a140,0x3f61c58e,2 +np.float32,0x803b587e,0x3f800000,2 +np.float32,0xbeaaa418,0x3f4b31c4,2 +np.float32,0xff7e2b9f,0x0,2 +np.float32,0xff5180a3,0x0,2 +np.float32,0xbf291394,0x3f21f73c,2 +np.float32,0x7f7b9698,0x7f800000,2 +np.float32,0x4218da,0x3f800000,2 +np.float32,0x7f135262,0x7f800000,2 +np.float32,0x804c10e8,0x3f800000,2 +np.float32,0xbf1c2a54,0x3f27ba5a,2 +np.float32,0x7f41fd32,0x7f800000,2 +np.float32,0x3e5cc464,0x3f94a195,2 +np.float32,0xff7a2fa7,0x0,2 +np.float32,0x3e05dc30,0x3f8c23c9,2 +np.float32,0x7f206d99,0x7f800000,2 +np.float32,0xbe9ae520,0x3f4f9287,2 +np.float32,0xfe4f4d58,0x0,2 +np.float32,0xbf44db42,0x3f163ae3,2 +np.float32,0x3f65ac48,0x3fee6300,2 +np.float32,0x3ebfaf36,0x3fa5ecb0,2 +np.float32,0x3f466719,0x3fdb08b0,2 +np.float32,0x80000001,0x3f800000,2 +np.float32,0xff4b3c7b,0x0,2 +np.float32,0x3df44374,0x3f8b0819,2 +np.float32,0xfea4b540,0x0,2 +np.float32,0x7f358e3d,0x7f800000,2 +np.float32,0x801f5e63,0x3f800000,2 +np.float32,0x804ae77e,0x3f800000,2 +np.float32,0xdbb5,0x3f800000,2 +np.float32,0x7f0a7e3b,0x7f800000,2 +np.float32,0xbe4152e4,0x3f609953,2 +np.float32,0x4b9579,0x3f800000,2 +np.float32,0x3ece0bd4,0x3fa92ea5,2 +np.float32,0x7e499d9a,0x7f800000,2 +np.float32,0x80637d8a,0x3f800000,2 +np.float32,0x3e50a425,0x3f936a8b,2 +np.float32,0xbf0e8cb0,0x3f2e06dd,2 +np.float32,0x802763e2,0x3f800000,2 +np.float32,0xff73041b,0x0,2 +np.float32,0xfea466da,0x0,2 +np.float32,0x80064c73,0x3f800000,2 +np.float32,0xbef29222,0x3f385728,2 +np.float32,0x8029c215,0x3f800000,2 +np.float32,0xbd3994e0,0x3f7815d1,2 +np.float32,0xbe6ac9e4,0x3f5a61f3,2 +np.float32,0x804b58b0,0x3f800000,2 +np.float32,0xbdb83be0,0x3f70865c,2 +np.float32,0x7ee18da2,0x7f800000,2 +np.float32,0xfd4ca010,0x0,2 +np.float32,0x807c668b,0x3f800000,2 +np.float32,0xbd40ed90,0x3f77c6e9,2 +np.float32,0x7efc6881,0x7f800000,2 +np.float32,0xfe633bfc,0x0,2 +np.float32,0x803ce363,0x3f800000,2 +np.float32,0x7ecba81e,0x7f800000,2 +np.float32,0xfdcb2378,0x0,2 +np.float32,0xbebc5524,0x3f4662b2,2 +np.float32,0xfaa30000,0x0,2 +np.float32,0x805d451b,0x3f800000,2 +np.float32,0xbee85600,0x3f3ae996,2 +np.float32,0xfefb0a54,0x0,2 +np.float32,0xbdfc6690,0x3f6b0a08,2 +np.float32,0x58a57,0x3f800000,2 +np.float32,0x3b41b7,0x3f800000,2 +np.float32,0x7c99812d,0x7f800000,2 +np.float32,0xbd3ae740,0x3f78079d,2 +np.float32,0xbf4a48a7,0x3f1409dd,2 +np.float32,0xfdeaad58,0x0,2 +np.float32,0xbe9aa65a,0x3f4fa42c,2 +np.float32,0x3f79d78c,0x3ffbc458,2 +np.float32,0x805e7389,0x3f800000,2 +np.float32,0x7ebb3612,0x7f800000,2 +np.float32,0x2e27dc,0x3f800000,2 +np.float32,0x80726dec,0x3f800000,2 +np.float32,0xfe8fb738,0x0,2 +np.float32,0xff1ff3bd,0x0,2 +np.float32,0x7f5264a2,0x7f800000,2 +np.float32,0x3f5a6893,0x3fe739ca,2 +np.float32,0xbec4029c,0x3f44558d,2 +np.float32,0xbef65cfa,0x3f37657e,2 +np.float32,0x63aba1,0x3f800000,2 +np.float32,0xfbb6e200,0x0,2 +np.float32,0xbf3466fc,0x3f1d1307,2 +np.float32,0x3f258844,0x3fc861d7,2 +np.float32,0xbf5f29a7,0x3f0be6dc,2 +np.float32,0x802b51cd,0x3f800000,2 +np.float32,0xbe9094dc,0x3f527dae,2 +np.float32,0xfec2e68c,0x0,2 +np.float32,0x807b38bd,0x3f800000,2 +np.float32,0xbf594662,0x3f0e2663,2 +np.float32,0x7cbcf747,0x7f800000,2 +np.float32,0xbe4b88f0,0x3f5f0d47,2 +np.float32,0x3c53c4,0x3f800000,2 +np.float32,0xbe883562,0x3f54e3f7,2 +np.float32,0xbf1efaf0,0x3f267456,2 +np.float32,0x3e22cd3e,0x3f8ee98b,2 +np.float32,0x80434875,0x3f800000,2 +np.float32,0xbf000b44,0x3f34ff6e,2 +np.float32,0x7f311c3a,0x7f800000,2 +np.float32,0x802f7f3f,0x3f800000,2 +np.float32,0x805155fe,0x3f800000,2 +np.float32,0x7f5d7485,0x7f800000,2 +np.float32,0x80119197,0x3f800000,2 +np.float32,0x3f445b8b,0x3fd9d30d,2 +np.float32,0xbf638eb3,0x3f0a3f38,2 +np.float32,0x402410,0x3f800000,2 +np.float32,0xbc578a40,0x3f7dad1d,2 +np.float32,0xbeecbf8a,0x3f39cc9e,2 +np.float32,0x7f2935a4,0x7f800000,2 +np.float32,0x3f570fea,0x3fe523e2,2 +np.float32,0xbf06bffa,0x3f31bdb6,2 +np.float32,0xbf2afdfd,0x3f2120ba,2 +np.float32,0x7f76f7ab,0x7f800000,2 +np.float32,0xfee2d1e8,0x0,2 +np.float32,0x800b026d,0x3f800000,2 +np.float32,0xff0eda75,0x0,2 +np.float32,0x3d4c,0x3f800000,2 +np.float32,0xbed538a2,0x3f3fcffb,2 +np.float32,0x3f73f4f9,0x3ff7c979,2 +np.float32,0x2aa9fc,0x3f800000,2 +np.float32,0x806a45b3,0x3f800000,2 +np.float32,0xff770d35,0x0,2 +np.float32,0x7e999be3,0x7f800000,2 +np.float32,0x80741128,0x3f800000,2 +np.float32,0xff6aac34,0x0,2 +np.float32,0x470f74,0x3f800000,2 +np.float32,0xff423b7b,0x0,2 +np.float32,0x17dfdd,0x3f800000,2 +np.float32,0x7f029e12,0x7f800000,2 +np.float32,0x803fcb9d,0x3f800000,2 +np.float32,0x3f3dc3,0x3f800000,2 +np.float32,0x7f3a27bc,0x7f800000,2 +np.float32,0x3e473108,0x3f9279ec,2 +np.float32,0x7f4add5d,0x7f800000,2 +np.float32,0xfd9736e0,0x0,2 +np.float32,0x805f1df2,0x3f800000,2 +np.float32,0x6c49c1,0x3f800000,2 +np.float32,0x7ec733c7,0x7f800000,2 +np.float32,0x804c1abf,0x3f800000,2 +np.float32,0x3de2e887,0x3f8a37a5,2 +np.float32,0x3f51630a,0x3fe1a561,2 +np.float32,0x3de686a8,0x3f8a62ff,2 +np.float32,0xbedb3538,0x3f3e439c,2 +np.float32,0xbf3aa892,0x3f1a6f9e,2 +np.float32,0x7ee5fb32,0x7f800000,2 +np.float32,0x7e916c9b,0x7f800000,2 +np.float32,0x3f033f1c,0x3fb69e19,2 +np.float32,0x25324b,0x3f800000,2 +np.float32,0x3f348d1d,0x3fd0b2e2,2 +np.float32,0x3f5797e8,0x3fe57851,2 +np.float32,0xbf69c316,0x3f07f1a0,2 +np.float32,0xbe8b7fb0,0x3f53f1bf,2 +np.float32,0xbdbbc190,0x3f703d00,2 +np.float32,0xff6c4fc0,0x0,2 +np.float32,0x7f29fcbe,0x7f800000,2 +np.float32,0x3f678d19,0x3fef9a23,2 +np.float32,0x73d140,0x3f800000,2 +np.float32,0x3e25bdd2,0x3f8f326b,2 +np.float32,0xbeb775ec,0x3f47b2c6,2 +np.float32,0xff451c4d,0x0,2 +np.float32,0x8072c466,0x3f800000,2 +np.float32,0x3f65e836,0x3fee89b2,2 +np.float32,0x52ca7a,0x3f800000,2 +np.float32,0x62cfed,0x3f800000,2 +np.float32,0xbf583dd0,0x3f0e8c5c,2 +np.float32,0xbf683842,0x3f088342,2 +np.float32,0x3f1a7828,0x3fc2780c,2 +np.float32,0x800ea979,0x3f800000,2 +np.float32,0xbeb9133c,0x3f474328,2 +np.float32,0x3ef09fc7,0x3fb14a4b,2 +np.float32,0x7ebbcb75,0x7f800000,2 +np.float32,0xff316c0e,0x0,2 +np.float32,0x805b84e3,0x3f800000,2 +np.float32,0x3d6a55e0,0x3f852d8a,2 +np.float32,0x3e755788,0x3f971fd1,2 +np.float32,0x3ee7aacb,0x3faf2743,2 +np.float32,0x7f714039,0x7f800000,2 +np.float32,0xff70bad8,0x0,2 +np.float32,0xbe0b74c8,0x3f68f08c,2 +np.float32,0xbf6cb170,0x3f06de86,2 +np.float32,0x7ec1fbff,0x7f800000,2 +np.float32,0x8014b1f6,0x3f800000,2 +np.float32,0xfe8b45fe,0x0,2 +np.float32,0x6e2220,0x3f800000,2 +np.float32,0x3ed1777d,0x3fa9f7ab,2 +np.float32,0xff48e467,0x0,2 +np.float32,0xff76c5aa,0x0,2 +np.float32,0x3e9bd330,0x3f9e0fd7,2 +np.float32,0x3f17de4f,0x3fc11aae,2 +np.float32,0x7eeaa2fd,0x7f800000,2 +np.float32,0xbf572746,0x3f0ef806,2 +np.float32,0x7e235554,0x7f800000,2 +np.float32,0xfe24fc1c,0x0,2 +np.float32,0x7daf71ad,0x7f800000,2 +np.float32,0x800d4a6b,0x3f800000,2 +np.float32,0xbf6fc31d,0x3f05c0ce,2 +np.float32,0x1c4d93,0x3f800000,2 +np.float32,0x7ee9200c,0x7f800000,2 +np.float32,0x3f54b4da,0x3fe3aeec,2 +np.float32,0x2b37b1,0x3f800000,2 +np.float32,0x3f7468bd,0x3ff81731,2 +np.float32,0x3f2850ea,0x3fc9e5f4,2 +np.float32,0xbe0d47ac,0x3f68a6f9,2 +np.float32,0x314877,0x3f800000,2 +np.float32,0x802700c3,0x3f800000,2 +np.float32,0x7e2c915f,0x7f800000,2 +np.float32,0x800d0059,0x3f800000,2 +np.float32,0x3f7f3c25,0x3fff7862,2 +np.float32,0xff735d31,0x0,2 +np.float32,0xff7e339e,0x0,2 +np.float32,0xbef96cf0,0x3f36a340,2 +np.float32,0x3db6ea21,0x3f882cb2,2 +np.float32,0x67cb3d,0x3f800000,2 +np.float32,0x801f349d,0x3f800000,2 +np.float32,0x3f1390ec,0x3fbede29,2 +np.float32,0x7f13644a,0x7f800000,2 +np.float32,0x804a369b,0x3f800000,2 +np.float32,0x80262666,0x3f800000,2 +np.float32,0x7e850fbc,0x7f800000,2 +np.float32,0x18b002,0x3f800000,2 +np.float32,0x8051f1ed,0x3f800000,2 +np.float32,0x3eba48f6,0x3fa4b753,2 +np.float32,0xbf3f4130,0x3f1886a9,2 +np.float32,0xbedac006,0x3f3e61cf,2 +np.float32,0xbf097c70,0x3f306ddc,2 +np.float32,0x4aba6d,0x3f800000,2 +np.float32,0x580078,0x3f800000,2 +np.float32,0x3f64d82e,0x3fedda40,2 +np.float32,0x7f781fd6,0x7f800000,2 +np.float32,0x6aff3d,0x3f800000,2 +np.float32,0xff25e074,0x0,2 +np.float32,0x7ea9ec89,0x7f800000,2 +np.float32,0xbf63b816,0x3f0a2fbb,2 +np.float32,0x133f07,0x3f800000,2 +np.float32,0xff800000,0x0,2 +np.float32,0x8013dde7,0x3f800000,2 +np.float32,0xff770b95,0x0,2 +np.float32,0x806154e8,0x3f800000,2 +np.float32,0x3f1e7bce,0x3fc4981a,2 +np.float32,0xff262c78,0x0,2 +np.float32,0x3f59a652,0x3fe6c04c,2 +np.float32,0x7f220166,0x7f800000,2 +np.float32,0x7eb24939,0x7f800000,2 +np.float32,0xbed58bb0,0x3f3fba6a,2 +np.float32,0x3c2ad000,0x3f80eda7,2 +np.float32,0x2adb2e,0x3f800000,2 +np.float32,0xfe8b213e,0x0,2 +np.float32,0xbf2e0c1e,0x3f1fccea,2 +np.float32,0x7e1716be,0x7f800000,2 +np.float32,0x80184e73,0x3f800000,2 +np.float32,0xbf254743,0x3f23a3d5,2 +np.float32,0x8063a722,0x3f800000,2 +np.float32,0xbe50adf0,0x3f5e46c7,2 +np.float32,0x3f614158,0x3feb8d60,2 +np.float32,0x8014bbc8,0x3f800000,2 +np.float32,0x283bc7,0x3f800000,2 +np.float32,0x3ffb5c,0x3f800000,2 +np.float32,0xfe8de6bc,0x0,2 +np.float32,0xbea6e086,0x3f4c3b82,2 +np.float32,0xfee64b92,0x0,2 +np.float32,0x506c1a,0x3f800000,2 +np.float32,0xff342af8,0x0,2 +np.float32,0x6b6f4c,0x3f800000,2 +np.float32,0xfeb42b1e,0x0,2 +np.float32,0x3e49384a,0x3f92ad71,2 +np.float32,0x152d08,0x3f800000,2 +np.float32,0x804c8f09,0x3f800000,2 +np.float32,0xff5e927d,0x0,2 +np.float32,0x6374da,0x3f800000,2 +np.float32,0x3f48f011,0x3fdc8ae4,2 +np.float32,0xbf446a30,0x3f1668e8,2 +np.float32,0x3ee77073,0x3faf196e,2 +np.float32,0xff4caa40,0x0,2 +np.float32,0x7efc9363,0x7f800000,2 +np.float32,0xbf706dcc,0x3f05830d,2 +np.float32,0xfe29c7e8,0x0,2 +np.float32,0x803cfe58,0x3f800000,2 +np.float32,0x3ec34c7c,0x3fa6bd0a,2 +np.float32,0x3eb85b62,0x3fa44968,2 +np.float32,0xfda1b9d8,0x0,2 +np.float32,0x802932cd,0x3f800000,2 +np.float32,0xbf5cde78,0x3f0cc5fa,2 +np.float32,0x3f31bf44,0x3fcf1ec8,2 +np.float32,0x803a0882,0x3f800000,2 +np.float32,0x800000,0x3f800000,2 +np.float32,0x3f54110e,0x3fe34a08,2 +np.float32,0x80645ea9,0x3f800000,2 +np.float32,0xbd8c1070,0x3f7425c3,2 +np.float32,0x801a006a,0x3f800000,2 +np.float32,0x7f5d161e,0x7f800000,2 +np.float32,0x805b5df3,0x3f800000,2 +np.float32,0xbf71a7c0,0x3f0511be,2 +np.float32,0xbe9a55c0,0x3f4fbad6,2 +np.float64,0xde7e2fd9bcfc6,0x3ff0000000000000,2 +np.float64,0xbfd8cd88eb319b12,0x3fe876349efbfa2b,2 +np.float64,0x3fe4fa13ace9f428,0x3ff933fbb117d196,2 +np.float64,0x475b3d048eb68,0x3ff0000000000000,2 +np.float64,0x7fef39ed07be73d9,0x7ff0000000000000,2 +np.float64,0x80026b84d904d70a,0x3ff0000000000000,2 +np.float64,0xebd60627d7ac1,0x3ff0000000000000,2 +np.float64,0xbfd7cbefdbaf97e0,0x3fe8bad30f6cf8e1,2 +np.float64,0x7fc17c605a22f8c0,0x7ff0000000000000,2 +np.float64,0x8cdac05119b58,0x3ff0000000000000,2 +np.float64,0x3fc45cd60a28b9ac,0x3ff1dd8028ec3f41,2 +np.float64,0x7fef4fce137e9f9b,0x7ff0000000000000,2 +np.float64,0xe5a2b819cb457,0x3ff0000000000000,2 +np.float64,0xe3bcfd4dc77a0,0x3ff0000000000000,2 +np.float64,0x68f0b670d1e17,0x3ff0000000000000,2 +np.float64,0xae69a6455cd35,0x3ff0000000000000,2 +np.float64,0xffe7007a0c6e00f4,0x0,2 +np.float64,0x59fc57a8b3f8c,0x3ff0000000000000,2 +np.float64,0xbfeee429c0bdc854,0x3fe0638fa62bed9f,2 +np.float64,0x80030bb6e206176f,0x3ff0000000000000,2 +np.float64,0x8006967a36ad2cf5,0x3ff0000000000000,2 +np.float64,0x3fe128176a22502f,0x3ff73393301e5dc8,2 +np.float64,0x218de20c431bd,0x3ff0000000000000,2 +np.float64,0x3fe7dbc48aafb789,0x3ffad38989b5955c,2 +np.float64,0xffda1ef411343de8,0x0,2 +np.float64,0xc6b392838d673,0x3ff0000000000000,2 +np.float64,0x7fe6d080c1ada101,0x7ff0000000000000,2 +np.float64,0xbfed36dd67fa6dbb,0x3fe0fec342c4ee89,2 +np.float64,0x3fee2bb6a3fc576e,0x3ffec1c149f1f092,2 +np.float64,0xbfd1f785eb23ef0c,0x3fea576eb01233cb,2 +np.float64,0x7fdad29a1f35a533,0x7ff0000000000000,2 +np.float64,0xffe8928c4fb12518,0x0,2 +np.float64,0x7fb123160022462b,0x7ff0000000000000,2 +np.float64,0x8007ab56cfaf56ae,0x3ff0000000000000,2 +np.float64,0x7fda342d6634685a,0x7ff0000000000000,2 +np.float64,0xbfe3b7e42c676fc8,0x3fe4e05cf8685b8a,2 +np.float64,0xffa708be7c2e1180,0x0,2 +np.float64,0xbfe8ffbece31ff7e,0x3fe29eb84077a34a,2 +np.float64,0xbf91002008220040,0x3fefa245058f05cb,2 +np.float64,0x8000281f0ee0503f,0x3ff0000000000000,2 +np.float64,0x8005617adc2ac2f6,0x3ff0000000000000,2 +np.float64,0x7fa84fec60309fd8,0x7ff0000000000000,2 +np.float64,0x8d00c0231a018,0x3ff0000000000000,2 +np.float64,0xbfdfe52ca63fca5a,0x3fe6a7324cc00d57,2 +np.float64,0x7fcc81073d39020d,0x7ff0000000000000,2 +np.float64,0x800134ff5a6269ff,0x3ff0000000000000,2 +np.float64,0xffc7fff98d2ffff4,0x0,2 +np.float64,0x8000925ce50124bb,0x3ff0000000000000,2 +np.float64,0xffe2530c66a4a618,0x0,2 +np.float64,0x7fc99070673320e0,0x7ff0000000000000,2 +np.float64,0xbfddd5c1f13bab84,0x3fe72a0c80f8df39,2 +np.float64,0x3fe1c220fee38442,0x3ff7817ec66aa55b,2 +np.float64,0x3fb9a1e1043343c2,0x3ff1265e575e6404,2 +np.float64,0xffef72e0833ee5c0,0x0,2 +np.float64,0x3fe710c0416e2181,0x3ffa5e93588aaa69,2 +np.float64,0xbfd8d23cbab1a47a,0x3fe874f5b9d99885,2 +np.float64,0x7fe9628ebd72c51c,0x7ff0000000000000,2 +np.float64,0xdd5fa611babf5,0x3ff0000000000000,2 +np.float64,0x8002bafac86575f6,0x3ff0000000000000,2 +np.float64,0x68acea44d159e,0x3ff0000000000000,2 +np.float64,0xffd776695eaeecd2,0x0,2 +np.float64,0x80059b59bb4b36b4,0x3ff0000000000000,2 +np.float64,0xbdcdd2af7b9bb,0x3ff0000000000000,2 +np.float64,0x8002b432ee856867,0x3ff0000000000000,2 +np.float64,0xcbc72f09978e6,0x3ff0000000000000,2 +np.float64,0xbfee8f4bf6fd1e98,0x3fe081cc0318b170,2 +np.float64,0xffc6e2892d2dc514,0x0,2 +np.float64,0x7feb682e4db6d05c,0x7ff0000000000000,2 +np.float64,0x8004b70a04296e15,0x3ff0000000000000,2 +np.float64,0x42408a4284812,0x3ff0000000000000,2 +np.float64,0xbfe9b8b197f37163,0x3fe254b4c003ce0a,2 +np.float64,0x3fcaadf5f5355bec,0x3ff27ca7876a8d20,2 +np.float64,0xfff0000000000000,0x0,2 +np.float64,0x7fea8376d33506ed,0x7ff0000000000000,2 +np.float64,0xffef73c2d63ee785,0x0,2 +np.float64,0xffe68b2bae2d1657,0x0,2 +np.float64,0x3fd8339cb2306739,0x3ff4cb774d616f90,2 +np.float64,0xbfc6d1db4d2da3b8,0x3fec47bb873a309c,2 +np.float64,0x7fe858016230b002,0x7ff0000000000000,2 +np.float64,0x7fe74cb99d2e9972,0x7ff0000000000000,2 +np.float64,0xffec2e96dc385d2d,0x0,2 +np.float64,0xb762a9876ec55,0x3ff0000000000000,2 +np.float64,0x3feca230c5794462,0x3ffdbfe62a572f52,2 +np.float64,0xbfb5ebad3a2bd758,0x3fee27eed86dcc39,2 +np.float64,0x471c705a8e38f,0x3ff0000000000000,2 +np.float64,0x7fc79bb5cf2f376b,0x7ff0000000000000,2 +np.float64,0xbfe53d6164ea7ac3,0x3fe4331b3beb73bd,2 +np.float64,0xbfe375a3f766eb48,0x3fe4fe67edb516e6,2 +np.float64,0x3fe1c7686ca38ed1,0x3ff7842f04770ba9,2 +np.float64,0x242e74dc485cf,0x3ff0000000000000,2 +np.float64,0x8009c06ab71380d6,0x3ff0000000000000,2 +np.float64,0x3fd08505efa10a0c,0x3ff3227b735b956d,2 +np.float64,0xffe3dfcecda7bf9d,0x0,2 +np.float64,0x8001f079bbc3e0f4,0x3ff0000000000000,2 +np.float64,0x3fddc706b6bb8e0c,0x3ff616d927987363,2 +np.float64,0xbfd151373ea2a26e,0x3fea870ba53ec126,2 +np.float64,0x7fe89533bfb12a66,0x7ff0000000000000,2 +np.float64,0xffed302cbc3a6059,0x0,2 +np.float64,0x3fd871cc28b0e398,0x3ff4d97d58c16ae2,2 +np.float64,0x7fbe9239683d2472,0x7ff0000000000000,2 +np.float64,0x848a445909149,0x3ff0000000000000,2 +np.float64,0x8007b104ce2f620a,0x3ff0000000000000,2 +np.float64,0x7fc2cd6259259ac4,0x7ff0000000000000,2 +np.float64,0xbfeadb640df5b6c8,0x3fe1e2b068de10af,2 +np.float64,0x800033b2f1a06767,0x3ff0000000000000,2 +np.float64,0x7fe54e5b7caa9cb6,0x7ff0000000000000,2 +np.float64,0x4f928f209f26,0x3ff0000000000000,2 +np.float64,0x8003c3dc6f2787ba,0x3ff0000000000000,2 +np.float64,0xbfd55a59daaab4b4,0x3fe9649d57b32b5d,2 +np.float64,0xffe3e2968d67c52c,0x0,2 +np.float64,0x80087434d550e86a,0x3ff0000000000000,2 +np.float64,0xffdde800083bd000,0x0,2 +np.float64,0xffe291f0542523e0,0x0,2 +np.float64,0xbfe1419bc3e28338,0x3fe6051d4f95a34a,2 +np.float64,0x3fd9d00ee1b3a01e,0x3ff5292bb8d5f753,2 +np.float64,0x3fdb720b60b6e417,0x3ff589d133625374,2 +np.float64,0xbfe3e21f0967c43e,0x3fe4cd4d02e3ef9a,2 +np.float64,0x7fd7e27f3dafc4fd,0x7ff0000000000000,2 +np.float64,0x3fd1cc2620a3984c,0x3ff366befbc38e3e,2 +np.float64,0x3fe78d05436f1a0b,0x3ffaa5ee4ea54b79,2 +np.float64,0x7e2acc84fc55a,0x3ff0000000000000,2 +np.float64,0x800ffb861c5ff70c,0x3ff0000000000000,2 +np.float64,0xffb2b0db1a2561b8,0x0,2 +np.float64,0xbfe80c2363701847,0x3fe301fdfe789576,2 +np.float64,0x7fe383c1c3e70783,0x7ff0000000000000,2 +np.float64,0xbfeefc02e6fdf806,0x3fe05b1a8528bf6c,2 +np.float64,0xbfe42c9268285925,0x3fe4abdc14793cb8,2 +np.float64,0x1,0x3ff0000000000000,2 +np.float64,0xa71c7ce94e390,0x3ff0000000000000,2 +np.float64,0x800ed4e6777da9cd,0x3ff0000000000000,2 +np.float64,0x3fde11b35d3c2367,0x3ff628bdc6dd1b78,2 +np.float64,0x3fef3964dbfe72ca,0x3fff777cae357608,2 +np.float64,0x3fefe369b7ffc6d4,0x3fffec357be508a3,2 +np.float64,0xbfdef1855f3de30a,0x3fe6e348c58e3fed,2 +np.float64,0x3fee0e2bc13c1c58,0x3ffeae1909c1b973,2 +np.float64,0xbfd31554ffa62aaa,0x3fea06628b2f048a,2 +np.float64,0x800dc56bcc7b8ad8,0x3ff0000000000000,2 +np.float64,0x7fbba01b8e374036,0x7ff0000000000000,2 +np.float64,0x7fd9737a92b2e6f4,0x7ff0000000000000,2 +np.float64,0x3feeae0fac3d5c1f,0x3fff1913705f1f07,2 +np.float64,0x3fdcc64fcdb98ca0,0x3ff5d9c3e5862972,2 +np.float64,0x3fdad9f83db5b3f0,0x3ff56674e81c1bd1,2 +np.float64,0x32b8797065710,0x3ff0000000000000,2 +np.float64,0x3fd20deae6241bd6,0x3ff37495bc057394,2 +np.float64,0x7fc899f0763133e0,0x7ff0000000000000,2 +np.float64,0x80045805fc08b00d,0x3ff0000000000000,2 +np.float64,0xbfcd8304cb3b0608,0x3feb4611f1eaa30c,2 +np.float64,0x3fd632a2fcac6544,0x3ff4592e1ea14fb0,2 +np.float64,0xffeeb066007d60cb,0x0,2 +np.float64,0x800bb12a42b76255,0x3ff0000000000000,2 +np.float64,0xbfe060fe1760c1fc,0x3fe6714640ab2574,2 +np.float64,0x80067ed737acfdaf,0x3ff0000000000000,2 +np.float64,0x3fd5ec3211abd864,0x3ff449adea82e73e,2 +np.float64,0x7fc4b2fdc22965fb,0x7ff0000000000000,2 +np.float64,0xff656afd002ad600,0x0,2 +np.float64,0xffeadefcdcb5bdf9,0x0,2 +np.float64,0x80052f18610a5e32,0x3ff0000000000000,2 +np.float64,0xbfd5b75c78ab6eb8,0x3fe94b15e0f39194,2 +np.float64,0xa4d3de2b49a7c,0x3ff0000000000000,2 +np.float64,0xbfe321c93de64392,0x3fe524ac7bbee401,2 +np.float64,0x3feb32f5def665ec,0x3ffcd6e4e5f9c271,2 +np.float64,0x7fe6b07e4ced60fc,0x7ff0000000000000,2 +np.float64,0x3fe013bb2de02776,0x3ff6aa4c32ab5ba4,2 +np.float64,0xbfeadd81d375bb04,0x3fe1e1de89b4aebf,2 +np.float64,0xffece7678079cece,0x0,2 +np.float64,0x3fe3d87b8467b0f8,0x3ff897cf22505e4d,2 +np.float64,0xffc4e3a05129c740,0x0,2 +np.float64,0xbfddee6b03bbdcd6,0x3fe723dd83ab49bd,2 +np.float64,0x3fcc4e2672389c4d,0x3ff2a680db769116,2 +np.float64,0x3fd8ed221ab1da44,0x3ff4f569aec8b850,2 +np.float64,0x80000a3538a0146b,0x3ff0000000000000,2 +np.float64,0x8004832eb109065e,0x3ff0000000000000,2 +np.float64,0xffdca83c60395078,0x0,2 +np.float64,0xffef551cda3eaa39,0x0,2 +np.float64,0x800fd95dd65fb2bc,0x3ff0000000000000,2 +np.float64,0x3ff0000000000000,0x4000000000000000,2 +np.float64,0xbfc06f5c4f20deb8,0x3fed466c17305ad8,2 +np.float64,0xbfeb01b5f476036c,0x3fe1d3de0f4211f4,2 +np.float64,0xbfdb2b9284365726,0x3fe7d7b02f790b05,2 +np.float64,0xff76ba83202d7500,0x0,2 +np.float64,0x3fd3f1c59ea7e38c,0x3ff3db96b3a0aaad,2 +np.float64,0x8b99ff6d17340,0x3ff0000000000000,2 +np.float64,0xbfeb383aa0f67075,0x3fe1bedcf2531c08,2 +np.float64,0x3fe321e35fa643c7,0x3ff83749a5d686ee,2 +np.float64,0xbfd863eb2130c7d6,0x3fe8923fcc39bac7,2 +np.float64,0x9e71dd333ce3c,0x3ff0000000000000,2 +np.float64,0x9542962b2a853,0x3ff0000000000000,2 +np.float64,0xba2c963b74593,0x3ff0000000000000,2 +np.float64,0x80019f4d0ca33e9b,0x3ff0000000000000,2 +np.float64,0xffde3e39a73c7c74,0x0,2 +np.float64,0x800258ae02c4b15d,0x3ff0000000000000,2 +np.float64,0xbfd99a535a3334a6,0x3fe8402f3a0662a5,2 +np.float64,0xe6c62143cd8c4,0x3ff0000000000000,2 +np.float64,0x7fbcc828f0399051,0x7ff0000000000000,2 +np.float64,0xbfe42e3596285c6b,0x3fe4ab2066d66071,2 +np.float64,0xffe2ee42d365dc85,0x0,2 +np.float64,0x3fe1f98abea3f315,0x3ff79dc68002a80b,2 +np.float64,0x7fd7225891ae44b0,0x7ff0000000000000,2 +np.float64,0x477177408ee30,0x3ff0000000000000,2 +np.float64,0xbfe16a7e2162d4fc,0x3fe5f1a5c745385d,2 +np.float64,0xbf98aaee283155e0,0x3fef785952e9c089,2 +np.float64,0x7fd7c14a8daf8294,0x7ff0000000000000,2 +np.float64,0xf7e7713defcee,0x3ff0000000000000,2 +np.float64,0x800769aa11aed355,0x3ff0000000000000,2 +np.float64,0xbfed30385e3a6071,0x3fe10135a3bd9ae6,2 +np.float64,0x3fe6dd7205edbae4,0x3ffa4155899efd70,2 +np.float64,0x800d705d26bae0ba,0x3ff0000000000000,2 +np.float64,0xa443ac1f48876,0x3ff0000000000000,2 +np.float64,0xbfec8cfec43919fe,0x3fe13dbf966e6633,2 +np.float64,0x7fd246efaa248dde,0x7ff0000000000000,2 +np.float64,0x800f2ad14afe55a3,0x3ff0000000000000,2 +np.float64,0x800487a894c90f52,0x3ff0000000000000,2 +np.float64,0x80014c4f19e2989f,0x3ff0000000000000,2 +np.float64,0x3fc11f265f223e4d,0x3ff18def05c971e5,2 +np.float64,0xffeb6d565776daac,0x0,2 +np.float64,0x7fd5ca5df8ab94bb,0x7ff0000000000000,2 +np.float64,0xbfe33de4fde67bca,0x3fe517d0e212cd1c,2 +np.float64,0xbfd1c738e5a38e72,0x3fea6539e9491693,2 +np.float64,0xbfec1d8c33b83b18,0x3fe16790fbca0c65,2 +np.float64,0xbfeecb464b7d968d,0x3fe06c67e2aefa55,2 +np.float64,0xbfd621dbf1ac43b8,0x3fe92dfa32d93846,2 +np.float64,0x80069a02860d3406,0x3ff0000000000000,2 +np.float64,0xbfe84f650e309eca,0x3fe2e661300f1975,2 +np.float64,0x7fc1d2cec523a59d,0x7ff0000000000000,2 +np.float64,0x3fd7706d79aee0db,0x3ff49fb033353dfe,2 +np.float64,0xffd94ba458329748,0x0,2 +np.float64,0x7fea98ba1a753173,0x7ff0000000000000,2 +np.float64,0xbfe756ba092ead74,0x3fe34d428d1857bc,2 +np.float64,0xffecfbd836b9f7b0,0x0,2 +np.float64,0x3fd211fbe5a423f8,0x3ff375711a3641e0,2 +np.float64,0x7fee24f7793c49ee,0x7ff0000000000000,2 +np.float64,0x7fe6a098886d4130,0x7ff0000000000000,2 +np.float64,0xbfd4ade909a95bd2,0x3fe99436524db1f4,2 +np.float64,0xbfeb704e6476e09d,0x3fe1a95be4a21bc6,2 +np.float64,0xffefc0f6627f81ec,0x0,2 +np.float64,0x7feff3f896ffe7f0,0x7ff0000000000000,2 +np.float64,0xa3f74edb47eea,0x3ff0000000000000,2 +np.float64,0xbfe0a551cf214aa4,0x3fe65027a7ff42e3,2 +np.float64,0x3fe164b23622c964,0x3ff7521c6225f51d,2 +np.float64,0x7fc258752324b0e9,0x7ff0000000000000,2 +np.float64,0x4739b3348e737,0x3ff0000000000000,2 +np.float64,0xb0392b1d60726,0x3ff0000000000000,2 +np.float64,0x7fe26f42e5e4de85,0x7ff0000000000000,2 +np.float64,0x8004601f87e8c040,0x3ff0000000000000,2 +np.float64,0xffe92ce37b3259c6,0x0,2 +np.float64,0x3fe620da3a6c41b4,0x3ff9d6ee3d005466,2 +np.float64,0x3fd850cfa2b0a1a0,0x3ff4d20bd249d411,2 +np.float64,0xffdcdfdfb5b9bfc0,0x0,2 +np.float64,0x800390297d672054,0x3ff0000000000000,2 +np.float64,0x3fde5864f6bcb0ca,0x3ff639bb9321f5ef,2 +np.float64,0x3fee484cec7c909a,0x3ffed4d2c6274219,2 +np.float64,0x7fe9b9a064b37340,0x7ff0000000000000,2 +np.float64,0xffe50028b8aa0051,0x0,2 +np.float64,0x3fe37774ade6eee9,0x3ff864558498a9a8,2 +np.float64,0x7fef83c724bf078d,0x7ff0000000000000,2 +np.float64,0xbfeb58450fb6b08a,0x3fe1b290556be73d,2 +np.float64,0x7fd7161475ae2c28,0x7ff0000000000000,2 +np.float64,0x3fece09621f9c12c,0x3ffde836a583bbdd,2 +np.float64,0x3fd045790ea08af2,0x3ff31554778fd4e2,2 +np.float64,0xbfe7c7dd6cef8fbb,0x3fe31e2eeda857fc,2 +np.float64,0xffe9632f5372c65e,0x0,2 +np.float64,0x800d4f3a703a9e75,0x3ff0000000000000,2 +np.float64,0xffea880e4df5101c,0x0,2 +np.float64,0xbfeb7edc4ff6fdb8,0x3fe1a3cb5dc33594,2 +np.float64,0xbfcaae4bab355c98,0x3febb1ee65e16b58,2 +np.float64,0xbfde598a19bcb314,0x3fe709145eafaaf8,2 +np.float64,0x3feefb6d78fdf6db,0x3fff4d5c8c68e39a,2 +np.float64,0x13efc75427dfa,0x3ff0000000000000,2 +np.float64,0xffe26f65c064decb,0x0,2 +np.float64,0xbfed5c1addfab836,0x3fe0f1133bd2189a,2 +np.float64,0x7fe7a7cf756f4f9e,0x7ff0000000000000,2 +np.float64,0xffc681702e2d02e0,0x0,2 +np.float64,0x8003d6ab5067ad57,0x3ff0000000000000,2 +np.float64,0xffa695f1342d2be0,0x0,2 +np.float64,0xbfcf8857db3f10b0,0x3feafa14da8c29a4,2 +np.float64,0xbfe8ca06be71940e,0x3fe2b46f6d2c64b4,2 +np.float64,0x3451c74468a3a,0x3ff0000000000000,2 +np.float64,0x3fde47d5f6bc8fac,0x3ff635bf8e024716,2 +np.float64,0xffda159d5db42b3a,0x0,2 +np.float64,0x7fef9fecaa3f3fd8,0x7ff0000000000000,2 +np.float64,0x3fd4e745e3a9ce8c,0x3ff410a9cb6fd8bf,2 +np.float64,0xffef57019b3eae02,0x0,2 +np.float64,0xbfe6604f4f6cc09e,0x3fe3b55de43c626d,2 +np.float64,0xffe066a424a0cd48,0x0,2 +np.float64,0x3fd547de85aa8fbc,0x3ff425b2a7a16675,2 +np.float64,0xffb3c69280278d28,0x0,2 +np.float64,0xffebe0b759f7c16e,0x0,2 +np.float64,0x3fefc84106ff9082,0x3fffd973687337d8,2 +np.float64,0x501c42a4a0389,0x3ff0000000000000,2 +np.float64,0x7feb45d13eb68ba1,0x7ff0000000000000,2 +np.float64,0xbfb16a8c2e22d518,0x3fee86a9c0f9291a,2 +np.float64,0x3be327b877c66,0x3ff0000000000000,2 +np.float64,0x7fe4a58220694b03,0x7ff0000000000000,2 +np.float64,0x3fe0286220a050c4,0x3ff6b472157ab8f2,2 +np.float64,0x3fc9381825327030,0x3ff2575fbea2bf5d,2 +np.float64,0xbfd1af7ee8a35efe,0x3fea6c032cf7e669,2 +np.float64,0xbfea9b0f39b5361e,0x3fe1fbae14b40b4d,2 +np.float64,0x39efe4aa73dfd,0x3ff0000000000000,2 +np.float64,0xffeb06fdc8360dfb,0x0,2 +np.float64,0xbfda481e72b4903c,0x3fe812b4b08d4884,2 +np.float64,0xbfd414ba5ba82974,0x3fe9bec9474bdfe6,2 +np.float64,0x7fe707177b6e0e2e,0x7ff0000000000000,2 +np.float64,0x8000000000000001,0x3ff0000000000000,2 +np.float64,0xbfede6a75bbbcd4f,0x3fe0be874cccd399,2 +np.float64,0x8006cdb577cd9b6c,0x3ff0000000000000,2 +np.float64,0x800051374f20a26f,0x3ff0000000000000,2 +np.float64,0x3fe5cba8c96b9752,0x3ff9a76b3adcc122,2 +np.float64,0xbfee3933487c7267,0x3fe0a0b190f9609a,2 +np.float64,0x3fd574b8d8aae970,0x3ff42f7e83de1af9,2 +np.float64,0xba5db72b74bb7,0x3ff0000000000000,2 +np.float64,0x3fa9bf512c337ea0,0x3ff0914a7f743a94,2 +np.float64,0xffe8cb736c3196e6,0x0,2 +np.float64,0x3761b2f06ec37,0x3ff0000000000000,2 +np.float64,0x8b4d4433169a9,0x3ff0000000000000,2 +np.float64,0x800f0245503e048b,0x3ff0000000000000,2 +np.float64,0x7fb20d54ac241aa8,0x7ff0000000000000,2 +np.float64,0x3fdf26666b3e4ccd,0x3ff66b8995142017,2 +np.float64,0xbfcbf2a83737e550,0x3feb8173a7b9d6b5,2 +np.float64,0x3fd31572a0a62ae5,0x3ff3ac6c94313dcd,2 +np.float64,0x7fb6c2807a2d8500,0x7ff0000000000000,2 +np.float64,0x800799758f2f32ec,0x3ff0000000000000,2 +np.float64,0xe72f1f6bce5e4,0x3ff0000000000000,2 +np.float64,0x3fe0e0f223a1c1e4,0x3ff70fed5b761673,2 +np.float64,0x3fe6d4f133eda9e2,0x3ffa3c8000c169eb,2 +np.float64,0xbfe1ccc3d8639988,0x3fe5c32148bedbda,2 +np.float64,0x3fea71c53574e38a,0x3ffc5f31201fe9be,2 +np.float64,0x9e0323eb3c065,0x3ff0000000000000,2 +np.float64,0x8005cc79a5cb98f4,0x3ff0000000000000,2 +np.float64,0x1dace1f83b59d,0x3ff0000000000000,2 +np.float64,0x10000000000000,0x3ff0000000000000,2 +np.float64,0xbfdef50830bdea10,0x3fe6e269fc17ebef,2 +np.float64,0x8010000000000000,0x3ff0000000000000,2 +np.float64,0xbfdfa82192bf5044,0x3fe6b6313ee0a095,2 +np.float64,0x3fd9398fe2b27320,0x3ff506ca2093c060,2 +np.float64,0x8002721fe664e441,0x3ff0000000000000,2 +np.float64,0x800c04166ad8082d,0x3ff0000000000000,2 +np.float64,0xffec3918b3387230,0x0,2 +np.float64,0x3fec62d5dfb8c5ac,0x3ffd972ea4a54b32,2 +np.float64,0x3fe7e42a0b6fc854,0x3ffad86b0443181d,2 +np.float64,0x3fc0aff5f3215fec,0x3ff1836058d4d210,2 +np.float64,0xbf82ff68a025fec0,0x3fefcb7f06862dce,2 +np.float64,0xae2e35195c5c7,0x3ff0000000000000,2 +np.float64,0x3fece3bddf79c77c,0x3ffdea41fb1ba8fa,2 +np.float64,0xbfa97b947832f730,0x3feeea34ebedbbd2,2 +np.float64,0xbfdfb1b1ce3f6364,0x3fe6b3d72871335c,2 +np.float64,0xbfe61a4f24ac349e,0x3fe3d356bf991b06,2 +np.float64,0x7fe23117a5e4622e,0x7ff0000000000000,2 +np.float64,0x800552a8cccaa552,0x3ff0000000000000,2 +np.float64,0x625b4d0ac4b6a,0x3ff0000000000000,2 +np.float64,0x3f86cf15702d9e00,0x3ff01fbe0381676d,2 +np.float64,0x800d7d1b685afa37,0x3ff0000000000000,2 +np.float64,0x3fe2cb6e40a596dd,0x3ff80a1a562f7fc9,2 +np.float64,0x3fe756eb8e2eadd7,0x3ffa86c638aad07d,2 +np.float64,0x800dc9a5513b934b,0x3ff0000000000000,2 +np.float64,0xbfbbdd118a37ba20,0x3fedacb4624f3cee,2 +np.float64,0x800de01f8efbc03f,0x3ff0000000000000,2 +np.float64,0x800da1a3fe9b4348,0x3ff0000000000000,2 +np.float64,0xbf87d8c7602fb180,0x3fefbe2614998ab6,2 +np.float64,0xbfdfff6141bffec2,0x3fe6a0c54d9f1bc8,2 +np.float64,0xee8fbba5dd1f8,0x3ff0000000000000,2 +np.float64,0x3fe79dc93e6f3b92,0x3ffaaf9d7d955b2c,2 +np.float64,0xffedd4b3d07ba967,0x0,2 +np.float64,0x800905dfc1720bc0,0x3ff0000000000000,2 +np.float64,0x3fd9e483b8b3c907,0x3ff52ddc6c950e7f,2 +np.float64,0xe34ffefdc6a00,0x3ff0000000000000,2 +np.float64,0x2168e62242d1e,0x3ff0000000000000,2 +np.float64,0x800349950e26932b,0x3ff0000000000000,2 +np.float64,0x7fc50da8532a1b50,0x7ff0000000000000,2 +np.float64,0xae1a4d115c34a,0x3ff0000000000000,2 +np.float64,0xa020f0b74041e,0x3ff0000000000000,2 +np.float64,0x3fd2aa2f77a5545f,0x3ff3959f09519a25,2 +np.float64,0x3fbfefc3223fdf86,0x3ff171f3df2d408b,2 +np.float64,0xbfea9fc340b53f86,0x3fe1f9d92b712654,2 +np.float64,0xffe9b920a5337240,0x0,2 +np.float64,0xbfe2eb0265e5d605,0x3fe53dd195782de3,2 +np.float64,0x7fb932c70e32658d,0x7ff0000000000000,2 +np.float64,0x3fda816bfcb502d8,0x3ff551f8d5c84c82,2 +np.float64,0x3fed68cbe9fad198,0x3ffe40f6692d5693,2 +np.float64,0x32df077665be2,0x3ff0000000000000,2 +np.float64,0x7fdc9c2f3539385d,0x7ff0000000000000,2 +np.float64,0x7fe71091a2ee2122,0x7ff0000000000000,2 +np.float64,0xbfe68106c46d020e,0x3fe3a76b56024c2c,2 +np.float64,0xffcf0572823e0ae4,0x0,2 +np.float64,0xbfeeab341fbd5668,0x3fe077d496941cda,2 +np.float64,0x7fe7ada0d2af5b41,0x7ff0000000000000,2 +np.float64,0xffacdef2a439bde0,0x0,2 +np.float64,0x3fe4200f3128401e,0x3ff8be0ddf30fd1e,2 +np.float64,0xffd9022a69320454,0x0,2 +np.float64,0xbfe8e06914f1c0d2,0x3fe2ab5fe7fffb5a,2 +np.float64,0x3fc4b976602972ed,0x3ff1e6786fa7a890,2 +np.float64,0xbfd784c105af0982,0x3fe8cdeb1cdbd57e,2 +np.float64,0x7feb20a20eb64143,0x7ff0000000000000,2 +np.float64,0xbfc87dd83630fbb0,0x3fec067c1e7e6983,2 +np.float64,0x7fe5400cbe6a8018,0x7ff0000000000000,2 +np.float64,0xbfb4a1f5e22943e8,0x3fee42e6c81559a9,2 +np.float64,0x3fe967c575f2cf8a,0x3ffbbd8bc0d5c50d,2 +np.float64,0xbfeb059cf4760b3a,0x3fe1d25c592c4dab,2 +np.float64,0xbfeef536d5bdea6e,0x3fe05d832c15c64a,2 +np.float64,0x3fa90b3f6432167f,0x3ff08d410dd732cc,2 +np.float64,0xbfeaff265e75fe4d,0x3fe1d4db3fb3208d,2 +np.float64,0x6d93d688db27b,0x3ff0000000000000,2 +np.float64,0x800ab9b4ea55736a,0x3ff0000000000000,2 +np.float64,0x3fd444b39d288967,0x3ff3ed749d48d444,2 +np.float64,0xbfd5f2c0d0abe582,0x3fe93ad6124d88e7,2 +np.float64,0x3fea8fd915f51fb2,0x3ffc71b32cb92d60,2 +np.float64,0xbfd23d6491a47aca,0x3fea43875709b0f0,2 +np.float64,0xffe76f75ce6edeeb,0x0,2 +np.float64,0x1f5670da3eacf,0x3ff0000000000000,2 +np.float64,0x8000d89c9621b13a,0x3ff0000000000000,2 +np.float64,0x3fedb51c52bb6a39,0x3ffe732279c228ff,2 +np.float64,0x7f99215ac83242b5,0x7ff0000000000000,2 +np.float64,0x742a6864e854e,0x3ff0000000000000,2 +np.float64,0xbfe02fb340205f66,0x3fe689495f9164e3,2 +np.float64,0x7fef4c12b0fe9824,0x7ff0000000000000,2 +np.float64,0x3fd40e17c2a81c30,0x3ff3e1aee8ed972f,2 +np.float64,0x7fdcd264e939a4c9,0x7ff0000000000000,2 +np.float64,0x3fdb675838b6ceb0,0x3ff587526241c550,2 +np.float64,0x3fdf1a4081be3480,0x3ff66896a18c2385,2 +np.float64,0xbfea5082b874a106,0x3fe218cf8f11be13,2 +np.float64,0xffe1a0ebf7e341d8,0x0,2 +np.float64,0x3fed0a2222ba1444,0x3ffe032ce928ae7d,2 +np.float64,0xffeae036da75c06d,0x0,2 +np.float64,0x5b05fc8ab60c0,0x3ff0000000000000,2 +np.float64,0x7fd8aae5f03155cb,0x7ff0000000000000,2 +np.float64,0xbfd0b4d9fda169b4,0x3feab41e58b6ccb7,2 +np.float64,0xffdcaffa57395ff4,0x0,2 +np.float64,0xbfcbf1455437e28c,0x3feb81a884182c5d,2 +np.float64,0x3f9d6700b83ace01,0x3ff0525657db35d4,2 +np.float64,0x4fd5b0b29fab7,0x3ff0000000000000,2 +np.float64,0x3fe9af2df5b35e5c,0x3ffbe895684df916,2 +np.float64,0x800dfd41f9dbfa84,0x3ff0000000000000,2 +np.float64,0xbf2a30457e546,0x3ff0000000000000,2 +np.float64,0x7fc6be37182d7c6d,0x7ff0000000000000,2 +np.float64,0x800e0f9788dc1f2f,0x3ff0000000000000,2 +np.float64,0x8006890c704d121a,0x3ff0000000000000,2 +np.float64,0xffecb1a7cbb9634f,0x0,2 +np.float64,0xffb35c330426b868,0x0,2 +np.float64,0x7fe8f2ba8a71e574,0x7ff0000000000000,2 +np.float64,0xf3ccff8fe79a0,0x3ff0000000000000,2 +np.float64,0x3fdf19a84e3e3351,0x3ff66871b17474c1,2 +np.float64,0x80049a662d0934cd,0x3ff0000000000000,2 +np.float64,0xdf5bb4bbbeb77,0x3ff0000000000000,2 +np.float64,0x8005eca030cbd941,0x3ff0000000000000,2 +np.float64,0xffe5f239586be472,0x0,2 +np.float64,0xbfc4526a0728a4d4,0x3fecaa52fbf5345e,2 +np.float64,0xbfe8f1ecda31e3da,0x3fe2a44c080848b3,2 +np.float64,0x3feebd32f4bd7a66,0x3fff234788938c3e,2 +np.float64,0xffd6ca04e9ad940a,0x0,2 +np.float64,0x7ff0000000000000,0x7ff0000000000000,2 +np.float64,0xbfd4c560a9a98ac2,0x3fe98db6d97442fc,2 +np.float64,0x8005723471cae46a,0x3ff0000000000000,2 +np.float64,0xbfeb278299764f05,0x3fe1c54b48f8ba4b,2 +np.float64,0x8007907b376f20f7,0x3ff0000000000000,2 +np.float64,0x7fe9c2fd01b385f9,0x7ff0000000000000,2 +np.float64,0x7fdaa37368b546e6,0x7ff0000000000000,2 +np.float64,0xbfe6d0f3786da1e7,0x3fe38582271cada7,2 +np.float64,0xbfea9b77823536ef,0x3fe1fb8575cd1b7d,2 +np.float64,0xbfe90ac38bf21587,0x3fe29a471b47a2e8,2 +np.float64,0xbfe9c51844738a30,0x3fe24fc8de03ea84,2 +np.float64,0x3fe45a9013a8b520,0x3ff8dd7c80f1cf75,2 +np.float64,0xbfe5780551eaf00a,0x3fe419832a6a4c56,2 +np.float64,0xffefffffffffffff,0x0,2 +np.float64,0x7fe3778c84a6ef18,0x7ff0000000000000,2 +np.float64,0xbfdc8a60413914c0,0x3fe77dc55b85028f,2 +np.float64,0xef47ae2fde8f6,0x3ff0000000000000,2 +np.float64,0x8001269fa4c24d40,0x3ff0000000000000,2 +np.float64,0x3fe9d2d39e73a5a7,0x3ffbfe2a66c4148e,2 +np.float64,0xffee61f528fcc3e9,0x0,2 +np.float64,0x3fe8a259ab7144b3,0x3ffb47e797a34bd2,2 +np.float64,0x3f906d610820dac0,0x3ff02dccda8e1a75,2 +np.float64,0x3fe70739f32e0e74,0x3ffa59232f4fcd07,2 +np.float64,0x3fe6b7f5e6ad6fec,0x3ffa2c0cc54f2c16,2 +np.float64,0x95a91a792b524,0x3ff0000000000000,2 +np.float64,0xbfedf6fcf57bedfa,0x3fe0b89bb40081cc,2 +np.float64,0xbfa4d2de9c29a5c0,0x3fef1c485678d657,2 +np.float64,0x3fe130470d22608e,0x3ff737b0be409a38,2 +np.float64,0x3fcf8035423f006b,0x3ff2f9d7c3c6a302,2 +np.float64,0xffe5995a3eab32b4,0x0,2 +np.float64,0xffca68c63034d18c,0x0,2 +np.float64,0xff9d53af903aa760,0x0,2 +np.float64,0x800563f1de6ac7e4,0x3ff0000000000000,2 +np.float64,0x7fce284fa63c509e,0x7ff0000000000000,2 +np.float64,0x7fb2a3959a25472a,0x7ff0000000000000,2 +np.float64,0x7fdbe2652f37c4c9,0x7ff0000000000000,2 +np.float64,0x800d705bbc1ae0b8,0x3ff0000000000000,2 +np.float64,0x7fd9bd2347b37a46,0x7ff0000000000000,2 +np.float64,0x3fcac3c0fb358782,0x3ff27ed62d6c8221,2 +np.float64,0x800110691ec220d3,0x3ff0000000000000,2 +np.float64,0x3fef79a8157ef350,0x3fffa368513eb909,2 +np.float64,0x7fe8bd2f0e317a5d,0x7ff0000000000000,2 +np.float64,0x7fd3040e60a6081c,0x7ff0000000000000,2 +np.float64,0xffea50723234a0e4,0x0,2 +np.float64,0xbfe6220054ac4400,0x3fe3d00961238a93,2 +np.float64,0x3f9eddd8c83dbbc0,0x3ff0567b0c73005a,2 +np.float64,0xbfa4a062c42940c0,0x3fef1e68badde324,2 +np.float64,0xbfd077ad4720ef5a,0x3feac5d577581d07,2 +np.float64,0x7fdfd4b025bfa95f,0x7ff0000000000000,2 +np.float64,0xd00d3cf3a01a8,0x3ff0000000000000,2 +np.float64,0x7fe3010427260207,0x7ff0000000000000,2 +np.float64,0x22ea196645d44,0x3ff0000000000000,2 +np.float64,0x7fd747e8cd2e8fd1,0x7ff0000000000000,2 +np.float64,0xd50665e7aa0cd,0x3ff0000000000000,2 +np.float64,0x7fe1da580ae3b4af,0x7ff0000000000000,2 +np.float64,0xffeb218ecfb6431d,0x0,2 +np.float64,0xbf887d0dd030fa00,0x3fefbc6252c8b354,2 +np.float64,0x3fcaa31067354621,0x3ff27b904c07e07f,2 +np.float64,0x7fe698cc4ded3198,0x7ff0000000000000,2 +np.float64,0x1c40191a38804,0x3ff0000000000000,2 +np.float64,0x80086fd20e30dfa4,0x3ff0000000000000,2 +np.float64,0x7fed34d5eaba69ab,0x7ff0000000000000,2 +np.float64,0xffd00b52622016a4,0x0,2 +np.float64,0x3f80abcdb021579b,0x3ff0172d27945851,2 +np.float64,0x3fe614cfd66c29a0,0x3ff9d031e1839191,2 +np.float64,0x80021d71c8843ae4,0x3ff0000000000000,2 +np.float64,0x800bc2adc657855c,0x3ff0000000000000,2 +np.float64,0x6b9fec1cd73fe,0x3ff0000000000000,2 +np.float64,0xffd9093b5f321276,0x0,2 +np.float64,0x800d3c6c77fa78d9,0x3ff0000000000000,2 +np.float64,0xffe80fc1cbf01f83,0x0,2 +np.float64,0xffbffbaf2a3ff760,0x0,2 +np.float64,0x3fea1ed29eb43da5,0x3ffc2c64ec0e17a3,2 +np.float64,0x7ff4000000000000,0x7ffc000000000000,2 +np.float64,0x3fd944a052328941,0x3ff5094f4c43ecca,2 +np.float64,0x800b1f9416163f29,0x3ff0000000000000,2 +np.float64,0x800f06bf33de0d7e,0x3ff0000000000000,2 +np.float64,0xbfdbf0d226b7e1a4,0x3fe7a4f73793d95b,2 +np.float64,0xffe7306c30ae60d8,0x0,2 +np.float64,0x7fe991accfb32359,0x7ff0000000000000,2 +np.float64,0x3fcc0040d2380082,0x3ff29ea47e4f07d4,2 +np.float64,0x7fefffffffffffff,0x7ff0000000000000,2 +np.float64,0x0,0x3ff0000000000000,2 +np.float64,0x3fe1423f7be2847e,0x3ff740bc1d3b20f8,2 +np.float64,0xbfeae3a3cab5c748,0x3fe1df7e936f8504,2 +np.float64,0x800b2da7d6165b50,0x3ff0000000000000,2 +np.float64,0x800b2404fcd6480a,0x3ff0000000000000,2 +np.float64,0x6fcbcf88df97b,0x3ff0000000000000,2 +np.float64,0xa248c0e14492,0x3ff0000000000000,2 +np.float64,0xffd255776824aaee,0x0,2 +np.float64,0x80057b3effeaf67f,0x3ff0000000000000,2 +np.float64,0x3feb0b07d7761610,0x3ffcbdfe1be5a594,2 +np.float64,0x924e1019249c2,0x3ff0000000000000,2 +np.float64,0x80074307e80e8611,0x3ff0000000000000,2 +np.float64,0xffb207fa46240ff8,0x0,2 +np.float64,0x95ac388d2b587,0x3ff0000000000000,2 +np.float64,0xbff0000000000000,0x3fe0000000000000,2 +np.float64,0x3fd38b6a492716d5,0x3ff3c59f62b5add5,2 +np.float64,0x7fe49362c3e926c5,0x7ff0000000000000,2 +np.float64,0x7fe842889db08510,0x7ff0000000000000,2 +np.float64,0xbfba6003e834c008,0x3fedcb620a2d9856,2 +np.float64,0xffe7e782bd6fcf05,0x0,2 +np.float64,0x7fd9b93d9433727a,0x7ff0000000000000,2 +np.float64,0x7fc8fcb61d31f96b,0x7ff0000000000000,2 +np.float64,0xbfef9be8db3f37d2,0x3fe022d603b81dc2,2 +np.float64,0x6f4fc766de9fa,0x3ff0000000000000,2 +np.float64,0xbfe93016f132602e,0x3fe28b42d782d949,2 +np.float64,0x3fe10e52b8e21ca5,0x3ff726a38b0bb895,2 +np.float64,0x3fbbba0ae6377416,0x3ff13f56084a9da3,2 +np.float64,0x3fe09e42ece13c86,0x3ff6eeb57e775e24,2 +np.float64,0x800942e39fb285c8,0x3ff0000000000000,2 +np.float64,0xffe5964370eb2c86,0x0,2 +np.float64,0x3fde479f32bc8f3e,0x3ff635b2619ba53a,2 +np.float64,0x3fe826e187f04dc3,0x3ffaff52b79c3a08,2 +np.float64,0x3febcbf1eab797e4,0x3ffd37152e5e2598,2 +np.float64,0x3fa0816a202102d4,0x3ff05c8e6a8b00d5,2 +np.float64,0xbd005ccb7a00c,0x3ff0000000000000,2 +np.float64,0x44c12fdc89827,0x3ff0000000000000,2 +np.float64,0xffc8fdffa431fc00,0x0,2 +np.float64,0xffeb4f5a87b69eb4,0x0,2 +np.float64,0xbfb07e7f8420fd00,0x3fee9a32924fe6a0,2 +np.float64,0xbfbd9d1bb63b3a38,0x3fed88ca81e5771c,2 +np.float64,0x8008682a74f0d055,0x3ff0000000000000,2 +np.float64,0x3fdeedbc7b3ddb79,0x3ff65dcb7c55f4dc,2 +np.float64,0x8009e889c613d114,0x3ff0000000000000,2 +np.float64,0x3faea831f43d5064,0x3ff0ad935e890e49,2 +np.float64,0xf0af1703e15e3,0x3ff0000000000000,2 +np.float64,0xffec06c4a5f80d88,0x0,2 +np.float64,0x53a1cc0ca743a,0x3ff0000000000000,2 +np.float64,0x7fd10c9eea22193d,0x7ff0000000000000,2 +np.float64,0xbfd48a6bf0a914d8,0x3fe99e0d109f2bac,2 +np.float64,0x3fd6dfe931adbfd4,0x3ff47f81c2dfc5d3,2 +np.float64,0x3fed20e86b7a41d0,0x3ffe11fecc7bc686,2 +np.float64,0xbfea586818b4b0d0,0x3fe215b7747d5cb8,2 +np.float64,0xbfd4ad3e20295a7c,0x3fe99465ab8c3275,2 +np.float64,0x3fd6619ee4acc33e,0x3ff4638b7b80c08a,2 +np.float64,0x3fdf6fcb63bedf97,0x3ff67d62fd3d560c,2 +np.float64,0x800a9191e7152324,0x3ff0000000000000,2 +np.float64,0x3fd2ff3c0da5fe78,0x3ff3a7b17e892a28,2 +np.float64,0x8003dbf1f327b7e5,0x3ff0000000000000,2 +np.float64,0xffea6b89a934d712,0x0,2 +np.float64,0x7fcfb879043f70f1,0x7ff0000000000000,2 +np.float64,0xea6a84dbd4d51,0x3ff0000000000000,2 +np.float64,0x800ec97a815d92f5,0x3ff0000000000000,2 +np.float64,0xffe304c3a8660987,0x0,2 +np.float64,0xbfefe24dd3ffc49c,0x3fe00a4e065be96d,2 +np.float64,0xffd3cc8c00a79918,0x0,2 +np.float64,0x95be8b7b2b7d2,0x3ff0000000000000,2 +np.float64,0x7fe20570cba40ae1,0x7ff0000000000000,2 +np.float64,0x7f97a06da02f40da,0x7ff0000000000000,2 +np.float64,0xffe702b9522e0572,0x0,2 +np.float64,0x3fada2d8543b45b1,0x3ff0a7adc4201e08,2 +np.float64,0x235e6acc46bce,0x3ff0000000000000,2 +np.float64,0x3fea6bc28ef4d786,0x3ffc5b7fc68fddac,2 +np.float64,0xffdbc9f505b793ea,0x0,2 +np.float64,0xffe98b137ff31626,0x0,2 +np.float64,0x800e26c6721c4d8d,0x3ff0000000000000,2 +np.float64,0x80080de445301bc9,0x3ff0000000000000,2 +np.float64,0x37e504a86fca1,0x3ff0000000000000,2 +np.float64,0x8002f5f60325ebed,0x3ff0000000000000,2 +np.float64,0x5c8772feb90ef,0x3ff0000000000000,2 +np.float64,0xbfe021abb4604358,0x3fe69023a51d22b8,2 +np.float64,0x3fde744f8fbce8a0,0x3ff64074dc84edd7,2 +np.float64,0xbfdd92899f3b2514,0x3fe73aefd9701858,2 +np.float64,0x7fc1ad5c51235ab8,0x7ff0000000000000,2 +np.float64,0xaae2f98955c5f,0x3ff0000000000000,2 +np.float64,0x7f9123d5782247aa,0x7ff0000000000000,2 +np.float64,0xbfe3f8e94b67f1d2,0x3fe4c30ab28e9cb7,2 +np.float64,0x7fdaba8b4cb57516,0x7ff0000000000000,2 +np.float64,0x7fefc85cfeff90b9,0x7ff0000000000000,2 +np.float64,0xffb83b4f523076a0,0x0,2 +np.float64,0xbfe888a68c71114d,0x3fe2ceff17c203d1,2 +np.float64,0x800de1dac4bbc3b6,0x3ff0000000000000,2 +np.float64,0xbfe4f27f09e9e4fe,0x3fe453f9af407eac,2 +np.float64,0xffe3d2713467a4e2,0x0,2 +np.float64,0xbfebaab840375570,0x3fe1931131b98842,2 +np.float64,0x93892a1b27126,0x3ff0000000000000,2 +np.float64,0x1e8e7f983d1d1,0x3ff0000000000000,2 +np.float64,0x3fecc950627992a0,0x3ffdd926f036add0,2 +np.float64,0xbfd41dfb1aa83bf6,0x3fe9bc34ece35b94,2 +np.float64,0x800aebfc6555d7f9,0x3ff0000000000000,2 +np.float64,0x7fe33ba52ca67749,0x7ff0000000000000,2 +np.float64,0xffe57c9b3feaf936,0x0,2 +np.float64,0x3fdd12464fba248c,0x3ff5ebc5598e6bd0,2 +np.float64,0xffe06d7f0fe0dafe,0x0,2 +np.float64,0x800e55b7fe9cab70,0x3ff0000000000000,2 +np.float64,0x3fd33803c8267008,0x3ff3b3cb78b2d642,2 +np.float64,0xe9cab8a1d3957,0x3ff0000000000000,2 +np.float64,0x3fb38ac166271580,0x3ff0de906947c0f0,2 +np.float64,0xbfd67aa552acf54a,0x3fe915cf64a389fd,2 +np.float64,0x1db96daa3b72f,0x3ff0000000000000,2 +np.float64,0xbfee9f08f4fd3e12,0x3fe07c2c615add3c,2 +np.float64,0xf14f6d65e29ee,0x3ff0000000000000,2 +np.float64,0x800bce089e179c12,0x3ff0000000000000,2 +np.float64,0xffc42dcc37285b98,0x0,2 +np.float64,0x7fd5f37063abe6e0,0x7ff0000000000000,2 +np.float64,0xbfd943c2cbb28786,0x3fe856f6452ec753,2 +np.float64,0x8ddfbc091bbf8,0x3ff0000000000000,2 +np.float64,0xbfe153491e22a692,0x3fe5fcb075dbbd5d,2 +np.float64,0xffe7933999ef2672,0x0,2 +np.float64,0x7ff8000000000000,0x7ff8000000000000,2 +np.float64,0x8000000000000000,0x3ff0000000000000,2 +np.float64,0xbfe9154580b22a8b,0x3fe2960bac3a8220,2 +np.float64,0x800dc6dda21b8dbb,0x3ff0000000000000,2 +np.float64,0xbfb26225a824c448,0x3fee7239a457df81,2 +np.float64,0xbfd7b68c83af6d1a,0x3fe8c08e351ab468,2 +np.float64,0xffde01f7213c03ee,0x0,2 +np.float64,0x3fe54cbe0faa997c,0x3ff9614527191d72,2 +np.float64,0xbfd6bec3732d7d86,0x3fe90354909493de,2 +np.float64,0xbfef3c85bd7e790b,0x3fe0444f8c489ca6,2 +np.float64,0x899501b7132a0,0x3ff0000000000000,2 +np.float64,0xbfe17a456462f48b,0x3fe5ea2719a9a84b,2 +np.float64,0xffe34003b8668007,0x0,2 +np.float64,0x7feff6a3633fed46,0x7ff0000000000000,2 +np.float64,0x3fba597ecc34b2fe,0x3ff12ee72e4de474,2 +np.float64,0x4084c7b68109a,0x3ff0000000000000,2 +np.float64,0x3fad23bf4c3a4780,0x3ff0a4d06193ff6d,2 +np.float64,0xffd0fe2707a1fc4e,0x0,2 +np.float64,0xb96cb43f72d97,0x3ff0000000000000,2 +np.float64,0x7fc4d684d829ad09,0x7ff0000000000000,2 +np.float64,0x7fdc349226b86923,0x7ff0000000000000,2 +np.float64,0x7fd82851cd3050a3,0x7ff0000000000000,2 +np.float64,0x800cde0041b9bc01,0x3ff0000000000000,2 +np.float64,0x4e8caa1e9d196,0x3ff0000000000000,2 +np.float64,0xbfed06a6d2fa0d4e,0x3fe1108c3682b05a,2 +np.float64,0xffe8908122312102,0x0,2 +np.float64,0xffe56ed6d9aaddad,0x0,2 +np.float64,0x3fedd6db00fbadb6,0x3ffe896c68c4b26e,2 +np.float64,0x3fde31f9b4bc63f4,0x3ff6307e08f8b6ba,2 +np.float64,0x6bb963c2d772d,0x3ff0000000000000,2 +np.float64,0x787b7142f0f6f,0x3ff0000000000000,2 +np.float64,0x3fe6e4147c6dc829,0x3ffa451bbdece240,2 +np.float64,0x8003857401470ae9,0x3ff0000000000000,2 +np.float64,0xbfeae82c3c75d058,0x3fe1ddbd66e65aab,2 +np.float64,0x7fe174707c62e8e0,0x7ff0000000000000,2 +np.float64,0x80008d2545e11a4b,0x3ff0000000000000,2 +np.float64,0xbfecc2dce17985ba,0x3fe129ad4325985a,2 +np.float64,0xbfe1fa1daf63f43c,0x3fe5adcb0731a44b,2 +np.float64,0x7fcf2530203e4a5f,0x7ff0000000000000,2 +np.float64,0xbfea5cefe874b9e0,0x3fe213f134b61f4a,2 +np.float64,0x800103729f2206e6,0x3ff0000000000000,2 +np.float64,0xbfe8442ff7708860,0x3fe2eaf850faa169,2 +np.float64,0x8006c78e19ed8f1d,0x3ff0000000000000,2 +np.float64,0x3fc259589c24b2b1,0x3ff1abe6a4d28816,2 +np.float64,0xffed02b7b5ba056e,0x0,2 +np.float64,0xbfce0aa4fe3c1548,0x3feb32115d92103e,2 +np.float64,0x7fec06e78bf80dce,0x7ff0000000000000,2 +np.float64,0xbfe0960bbc612c18,0x3fe6578ab29b70d4,2 +np.float64,0x3fee45841cbc8b08,0x3ffed2f6ca808ad3,2 +np.float64,0xbfeb0f8ebef61f1e,0x3fe1ce86003044cd,2 +np.float64,0x8002c357358586af,0x3ff0000000000000,2 +np.float64,0x3fe9aa10cc735422,0x3ffbe57e294ce68b,2 +np.float64,0x800256c0a544ad82,0x3ff0000000000000,2 +np.float64,0x4de6e1449bcdd,0x3ff0000000000000,2 +np.float64,0x65e9bc9ccbd38,0x3ff0000000000000,2 +np.float64,0xbfe53b0fa9aa7620,0x3fe4341f0aa29bbc,2 +np.float64,0xbfcdd94cd13bb298,0x3feb3956acd2e2dd,2 +np.float64,0x8004a49b65a94938,0x3ff0000000000000,2 +np.float64,0x800d3d05deba7a0c,0x3ff0000000000000,2 +np.float64,0x3fe4e05bce69c0b8,0x3ff925f55602a7e0,2 +np.float64,0xffe391e3256723c6,0x0,2 +np.float64,0xbfe92f0f37b25e1e,0x3fe28bacc76ae753,2 +np.float64,0x3f990238d8320472,0x3ff045edd36e2d62,2 +np.float64,0xffed8d15307b1a2a,0x0,2 +np.float64,0x3fee82e01afd05c0,0x3ffefc09e8b9c2b7,2 +np.float64,0xffb2d94b2225b298,0x0,2 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-expm1.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-expm1.csv new file mode 100644 index 0000000..732ae86 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-expm1.csv @@ -0,0 +1,1429 @@ +dtype,input,output,ulperrortol +np.float32,0x80606724,0x80606724,3 +np.float32,0xbf16790f,0xbee38e14,3 +np.float32,0xbf1778a1,0xbee4a97f,3 +np.float32,0x7d4fc610,0x7f800000,3 +np.float32,0xbec30a20,0xbea230d5,3 +np.float32,0x3eae8a36,0x3ecffac5,3 +np.float32,0xbf1f08f1,0xbeece93c,3 +np.float32,0x80374376,0x80374376,3 +np.float32,0x3f2e04ca,0x3f793115,3 +np.float32,0x7e2c7e36,0x7f800000,3 +np.float32,0xbf686cae,0xbf18bcf0,3 +np.float32,0xbf5518cd,0xbf10a3da,3 +np.float32,0x807e233c,0x807e233c,3 +np.float32,0x7f4edd54,0x7f800000,3 +np.float32,0x7ed70088,0x7f800000,3 +np.float32,0x801675da,0x801675da,3 +np.float32,0x806735d5,0x806735d5,3 +np.float32,0xfe635fec,0xbf800000,3 +np.float32,0xfed88a0a,0xbf800000,3 +np.float32,0xff52c052,0xbf800000,3 +np.float32,0x7fc00000,0x7fc00000,3 +np.float32,0xff4f65f9,0xbf800000,3 +np.float32,0xfe0f6c20,0xbf800000,3 +np.float32,0x80322b30,0x80322b30,3 +np.float32,0xfb757000,0xbf800000,3 +np.float32,0x3c81e0,0x3c81e0,3 +np.float32,0x79d56a,0x79d56a,3 +np.float32,0x8029d7af,0x8029d7af,3 +np.float32,0x8058a593,0x8058a593,3 +np.float32,0x3f3a13c7,0x3f88c75c,3 +np.float32,0x2a6b05,0x2a6b05,3 +np.float32,0xbd64c960,0xbd5e83ae,3 +np.float32,0x80471052,0x80471052,3 +np.float32,0xbe5dd950,0xbe47766c,3 +np.float32,0xfd8f88f0,0xbf800000,3 +np.float32,0x75a4b7,0x75a4b7,3 +np.float32,0x3f726f2e,0x3fc9fb7d,3 +np.float32,0x3ed6795c,0x3f053115,3 +np.float32,0x17d7f5,0x17d7f5,3 +np.float32,0xbf4cf19b,0xbf0d094f,3 +np.float32,0x3e0ec532,0x3e1933c6,3 +np.float32,0xff084016,0xbf800000,3 +np.float32,0x800829aa,0x800829aa,3 +np.float32,0x806d7302,0x806d7302,3 +np.float32,0x7f59d9da,0x7f800000,3 +np.float32,0x15f8b9,0x15f8b9,3 +np.float32,0x803befb3,0x803befb3,3 +np.float32,0x525043,0x525043,3 +np.float32,0x51a647,0x51a647,3 +np.float32,0xbf1cfce4,0xbeeab3d9,3 +np.float32,0x3f1f27a4,0x3f5cb1d2,3 +np.float32,0xbebc3a04,0xbe9d8142,3 +np.float32,0xbeea548c,0xbebc07e5,3 +np.float32,0x3f47401c,0x3f96c2a3,3 +np.float32,0x806b1ea3,0x806b1ea3,3 +np.float32,0x3ea56bb8,0x3ec3450c,3 +np.float32,0x3f7b4963,0x3fd597b5,3 +np.float32,0x7f051fa0,0x7f800000,3 +np.float32,0x1d411c,0x1d411c,3 +np.float32,0xff0b6a35,0xbf800000,3 +np.float32,0xbead63c0,0xbe9314f7,3 +np.float32,0x3738be,0x3738be,3 +np.float32,0x3f138cc8,0x3f479155,3 +np.float32,0x800a539f,0x800a539f,3 +np.float32,0x801b0ebd,0x801b0ebd,3 +np.float32,0x318fcd,0x318fcd,3 +np.float32,0x3ed67556,0x3f052e06,3 +np.float32,0x702886,0x702886,3 +np.float32,0x80000001,0x80000001,3 +np.float32,0x70a174,0x70a174,3 +np.float32,0x4f9c66,0x4f9c66,3 +np.float32,0x3e3e1927,0x3e50e351,3 +np.float32,0x7eac9a4d,0x7f800000,3 +np.float32,0x4b7407,0x4b7407,3 +np.float32,0x7f5bd2fd,0x7f800000,3 +np.float32,0x3eaafc58,0x3ecaffbd,3 +np.float32,0xbc989360,0xbc9729e2,3 +np.float32,0x3f470e5c,0x3f968c7b,3 +np.float32,0x4c5672,0x4c5672,3 +np.float32,0xff2b2ee2,0xbf800000,3 +np.float32,0xbf28a104,0xbef7079b,3 +np.float32,0x2c6175,0x2c6175,3 +np.float32,0x3d7e4fb0,0x3d832f9f,3 +np.float32,0x763276,0x763276,3 +np.float32,0x3cf364,0x3cf364,3 +np.float32,0xbf7ace75,0xbf1fe48c,3 +np.float32,0xff19e858,0xbf800000,3 +np.float32,0x80504c70,0x80504c70,3 +np.float32,0xff390210,0xbf800000,3 +np.float32,0x8046a743,0x8046a743,3 +np.float32,0x80000000,0x80000000,3 +np.float32,0x806c51da,0x806c51da,3 +np.float32,0x806ab38f,0x806ab38f,3 +np.float32,0x3f3de863,0x3f8cc538,3 +np.float32,0x7f6d45bb,0x7f800000,3 +np.float32,0xfd16ec60,0xbf800000,3 +np.float32,0x80513cba,0x80513cba,3 +np.float32,0xbf68996b,0xbf18cefa,3 +np.float32,0xfe039f2c,0xbf800000,3 +np.float32,0x3f013207,0x3f280c55,3 +np.float32,0x7ef4bc07,0x7f800000,3 +np.float32,0xbe8b65ac,0xbe741069,3 +np.float32,0xbf7a8186,0xbf1fc7a6,3 +np.float32,0x802532e5,0x802532e5,3 +np.float32,0x32c7df,0x32c7df,3 +np.float32,0x3ce4dceb,0x3ce81701,3 +np.float32,0xfe801118,0xbf800000,3 +np.float32,0x3d905f20,0x3d9594fb,3 +np.float32,0xbe11ed28,0xbe080168,3 +np.float32,0x59e773,0x59e773,3 +np.float32,0x3e9a2547,0x3eb3dd57,3 +np.float32,0x7ecb7c67,0x7f800000,3 +np.float32,0x7f69a67e,0x7f800000,3 +np.float32,0xff121e11,0xbf800000,3 +np.float32,0x3f7917cb,0x3fd2ad8c,3 +np.float32,0xbf1a7da8,0xbee7fc0c,3 +np.float32,0x3f077e66,0x3f329c40,3 +np.float32,0x3ce8e040,0x3cec37b3,3 +np.float32,0xbf3f0b8e,0xbf069f4d,3 +np.float32,0x3f52f194,0x3fa3c9d6,3 +np.float32,0xbf0e7422,0xbeda80f2,3 +np.float32,0xfd67e230,0xbf800000,3 +np.float32,0xff14d9a9,0xbf800000,3 +np.float32,0x3f3546e3,0x3f83dc2b,3 +np.float32,0x3e152e3a,0x3e20983d,3 +np.float32,0x4a89a3,0x4a89a3,3 +np.float32,0x63217,0x63217,3 +np.float32,0xbeb9e2a8,0xbe9be153,3 +np.float32,0x7e9fa049,0x7f800000,3 +np.float32,0x7f58110c,0x7f800000,3 +np.float32,0x3e88290c,0x3e9bfba9,3 +np.float32,0xbf2cb206,0xbefb3494,3 +np.float32,0xff5880c4,0xbf800000,3 +np.float32,0x7ecff3ac,0x7f800000,3 +np.float32,0x3f4b3de6,0x3f9b23fd,3 +np.float32,0xbebd2048,0xbe9e208c,3 +np.float32,0xff08f7a2,0xbf800000,3 +np.float32,0xff473330,0xbf800000,3 +np.float32,0x1,0x1,3 +np.float32,0xbf5dc239,0xbf14584b,3 +np.float32,0x458e3f,0x458e3f,3 +np.float32,0xbdb8a650,0xbdb091f8,3 +np.float32,0xff336ffc,0xbf800000,3 +np.float32,0x3c60bd00,0x3c624966,3 +np.float32,0xbe16a4f8,0xbe0c1664,3 +np.float32,0x3f214246,0x3f60a0f0,3 +np.float32,0x7fa00000,0x7fe00000,3 +np.float32,0x7e08737e,0x7f800000,3 +np.float32,0x3f70574c,0x3fc74b8e,3 +np.float32,0xbed5745c,0xbeae8c77,3 +np.float32,0x361752,0x361752,3 +np.float32,0x3eb276d6,0x3ed584ea,3 +np.float32,0x3f03fc1e,0x3f2cb1a5,3 +np.float32,0x3fafd1,0x3fafd1,3 +np.float32,0x7e50d74c,0x7f800000,3 +np.float32,0x3eeca5,0x3eeca5,3 +np.float32,0x5dc963,0x5dc963,3 +np.float32,0x7f0e63ae,0x7f800000,3 +np.float32,0x8021745f,0x8021745f,3 +np.float32,0xbf5881a9,0xbf121d07,3 +np.float32,0x7dadc7fd,0x7f800000,3 +np.float32,0xbf2c0798,0xbefa86bb,3 +np.float32,0x3e635f50,0x3e7e97a9,3 +np.float32,0xbf2053fa,0xbeee4c0e,3 +np.float32,0x3e8eee2b,0x3ea4dfcc,3 +np.float32,0xfc8a03c0,0xbf800000,3 +np.float32,0xfd9e4948,0xbf800000,3 +np.float32,0x801e817e,0x801e817e,3 +np.float32,0xbf603a27,0xbf1560c3,3 +np.float32,0x7f729809,0x7f800000,3 +np.float32,0x3f5a1864,0x3fac0e04,3 +np.float32,0x3e7648b8,0x3e8b3677,3 +np.float32,0x3edade24,0x3f088bc1,3 +np.float32,0x65e16e,0x65e16e,3 +np.float32,0x3f24aa50,0x3f671117,3 +np.float32,0x803cb1d0,0x803cb1d0,3 +np.float32,0xbe7b1858,0xbe5eadcc,3 +np.float32,0xbf19bb27,0xbee726fb,3 +np.float32,0xfd1f6e60,0xbf800000,3 +np.float32,0xfeb0de60,0xbf800000,3 +np.float32,0xff511a52,0xbf800000,3 +np.float32,0xff7757f7,0xbf800000,3 +np.float32,0x463ff5,0x463ff5,3 +np.float32,0x3f770d12,0x3fcffcc2,3 +np.float32,0xbf208562,0xbeee80dc,3 +np.float32,0x6df204,0x6df204,3 +np.float32,0xbf62d24f,0xbf1673fb,3 +np.float32,0x3dfcf210,0x3e069d5f,3 +np.float32,0xbef26002,0xbec114d7,3 +np.float32,0x7f800000,0x7f800000,3 +np.float32,0x7f30fb85,0x7f800000,3 +np.float32,0x7ee5dfef,0x7f800000,3 +np.float32,0x3f317829,0x3f800611,3 +np.float32,0x3f4b0bbd,0x3f9aec88,3 +np.float32,0x7edf708c,0x7f800000,3 +np.float32,0xff071260,0xbf800000,3 +np.float32,0x3e7b8c30,0x3e8e9198,3 +np.float32,0x3f33778b,0x3f82077f,3 +np.float32,0x3e8cd11d,0x3ea215fd,3 +np.float32,0x8004483d,0x8004483d,3 +np.float32,0x801633e3,0x801633e3,3 +np.float32,0x7e76eb15,0x7f800000,3 +np.float32,0x3c1571,0x3c1571,3 +np.float32,0x7de3de52,0x7f800000,3 +np.float32,0x804ae906,0x804ae906,3 +np.float32,0x7f3a2616,0x7f800000,3 +np.float32,0xff7fffff,0xbf800000,3 +np.float32,0xff5d17e4,0xbf800000,3 +np.float32,0xbeaa6704,0xbe90f252,3 +np.float32,0x7e6a43af,0x7f800000,3 +np.float32,0x2a0f35,0x2a0f35,3 +np.float32,0xfd8fece0,0xbf800000,3 +np.float32,0xfeef2e2a,0xbf800000,3 +np.float32,0xff800000,0xbf800000,3 +np.float32,0xbeefcc52,0xbebf78e4,3 +np.float32,0x3db6c490,0x3dbf2bd5,3 +np.float32,0x8290f,0x8290f,3 +np.float32,0xbeace648,0xbe92bb7f,3 +np.float32,0x801fea79,0x801fea79,3 +np.float32,0x3ea6c230,0x3ec51ebf,3 +np.float32,0x3e5f2ca3,0x3e795c8a,3 +np.float32,0x3eb6f634,0x3edbeb9f,3 +np.float32,0xff790b45,0xbf800000,3 +np.float32,0x3d82e240,0x3d872816,3 +np.float32,0x3f0d6a57,0x3f3cc7db,3 +np.float32,0x7f08531a,0x7f800000,3 +np.float32,0x702b6d,0x702b6d,3 +np.float32,0x7d3a3c38,0x7f800000,3 +np.float32,0x3d0a7fb3,0x3d0cddf3,3 +np.float32,0xff28084c,0xbf800000,3 +np.float32,0xfeee8804,0xbf800000,3 +np.float32,0x804094eb,0x804094eb,3 +np.float32,0x7acb39,0x7acb39,3 +np.float32,0x3f01c07a,0x3f28f88c,3 +np.float32,0x3e05c500,0x3e0ee674,3 +np.float32,0xbe6f7c38,0xbe558ac1,3 +np.float32,0x803b1f4b,0x803b1f4b,3 +np.float32,0xbf76561f,0xbf1e332b,3 +np.float32,0xff30d368,0xbf800000,3 +np.float32,0x7e2e1f38,0x7f800000,3 +np.float32,0x3ee085b8,0x3f0ce7c0,3 +np.float32,0x8064c4a7,0x8064c4a7,3 +np.float32,0xa7c1d,0xa7c1d,3 +np.float32,0x3f27498a,0x3f6c14bc,3 +np.float32,0x137ca,0x137ca,3 +np.float32,0x3d0a5c60,0x3d0cb969,3 +np.float32,0x80765f1f,0x80765f1f,3 +np.float32,0x80230a71,0x80230a71,3 +np.float32,0x3f321ed2,0x3f80acf4,3 +np.float32,0x7d61e7f4,0x7f800000,3 +np.float32,0xbf39f7f2,0xbf0430f7,3 +np.float32,0xbe2503f8,0xbe1867e8,3 +np.float32,0x29333d,0x29333d,3 +np.float32,0x7edc5a0e,0x7f800000,3 +np.float32,0xbe81a8a2,0xbe651663,3 +np.float32,0x7f76ab6d,0x7f800000,3 +np.float32,0x7f46111f,0x7f800000,3 +np.float32,0xff0fc888,0xbf800000,3 +np.float32,0x805ece89,0x805ece89,3 +np.float32,0xc390b,0xc390b,3 +np.float32,0xff64bdee,0xbf800000,3 +np.float32,0x3dd07e4e,0x3ddb79bd,3 +np.float32,0xfecc1f10,0xbf800000,3 +np.float32,0x803f5177,0x803f5177,3 +np.float32,0x802a24d2,0x802a24d2,3 +np.float32,0x7f27d0cc,0x7f800000,3 +np.float32,0x3ef57c98,0x3f1d7e88,3 +np.float32,0x7b848d,0x7b848d,3 +np.float32,0x7f7fffff,0x7f800000,3 +np.float32,0xfe889c46,0xbf800000,3 +np.float32,0xff2d6dc5,0xbf800000,3 +np.float32,0x3f53a186,0x3fa492a6,3 +np.float32,0xbf239c94,0xbef1c90c,3 +np.float32,0xff7c0f4e,0xbf800000,3 +np.float32,0x3e7c69a9,0x3e8f1f3a,3 +np.float32,0xbf47c9e9,0xbf0ab2a9,3 +np.float32,0xbc1eaf00,0xbc1deae9,3 +np.float32,0x3f4a6d39,0x3f9a3d8e,3 +np.float32,0x3f677930,0x3fbc26eb,3 +np.float32,0x3f45eea1,0x3f955418,3 +np.float32,0x7f61a1f8,0x7f800000,3 +np.float32,0xff58c7c6,0xbf800000,3 +np.float32,0x80239801,0x80239801,3 +np.float32,0xff56e616,0xbf800000,3 +np.float32,0xff62052c,0xbf800000,3 +np.float32,0x8009b615,0x8009b615,3 +np.float32,0x293d6b,0x293d6b,3 +np.float32,0xfe9e585c,0xbf800000,3 +np.float32,0x7f58ff4b,0x7f800000,3 +np.float32,0x10937c,0x10937c,3 +np.float32,0x7f5cc13f,0x7f800000,3 +np.float32,0x110c5d,0x110c5d,3 +np.float32,0x805e51fc,0x805e51fc,3 +np.float32,0xbedcf70a,0xbeb3766c,3 +np.float32,0x3f4d5e42,0x3f9d8091,3 +np.float32,0xff5925a0,0xbf800000,3 +np.float32,0x7e87cafa,0x7f800000,3 +np.float32,0xbf6474b2,0xbf171fee,3 +np.float32,0x4b39b2,0x4b39b2,3 +np.float32,0x8020cc28,0x8020cc28,3 +np.float32,0xff004ed8,0xbf800000,3 +np.float32,0xbf204cf5,0xbeee448d,3 +np.float32,0x3e30cf10,0x3e40fdb1,3 +np.float32,0x80202bee,0x80202bee,3 +np.float32,0xbf55a985,0xbf10e2bc,3 +np.float32,0xbe297dd8,0xbe1c351c,3 +np.float32,0x5780d9,0x5780d9,3 +np.float32,0x7ef729fa,0x7f800000,3 +np.float32,0x8039a3b5,0x8039a3b5,3 +np.float32,0x7cdd3f,0x7cdd3f,3 +np.float32,0x7ef0145a,0x7f800000,3 +np.float32,0x807ad7ae,0x807ad7ae,3 +np.float32,0x7f6c2643,0x7f800000,3 +np.float32,0xbec56124,0xbea3c929,3 +np.float32,0x512c3b,0x512c3b,3 +np.float32,0xbed3effe,0xbead8c1e,3 +np.float32,0x7f5e0a4d,0x7f800000,3 +np.float32,0x3f315316,0x3f7fc200,3 +np.float32,0x7eca5727,0x7f800000,3 +np.float32,0x7f4834f3,0x7f800000,3 +np.float32,0x8004af6d,0x8004af6d,3 +np.float32,0x3f223ca4,0x3f6277e3,3 +np.float32,0x7eea4fdd,0x7f800000,3 +np.float32,0x3e7143e8,0x3e880763,3 +np.float32,0xbf737008,0xbf1d160e,3 +np.float32,0xfc408b00,0xbf800000,3 +np.float32,0x803912ca,0x803912ca,3 +np.float32,0x7db31f4e,0x7f800000,3 +np.float32,0xff578b54,0xbf800000,3 +np.float32,0x3f068ec4,0x3f31062b,3 +np.float32,0x35f64f,0x35f64f,3 +np.float32,0x80437df4,0x80437df4,3 +np.float32,0x568059,0x568059,3 +np.float32,0x8005f8ba,0x8005f8ba,3 +np.float32,0x6824ad,0x6824ad,3 +np.float32,0xff3fdf30,0xbf800000,3 +np.float32,0xbf6f7682,0xbf1b89d6,3 +np.float32,0x3dcea8a0,0x3dd971f5,3 +np.float32,0x3ee32a62,0x3f0ef5a9,3 +np.float32,0xbf735bcd,0xbf1d0e3d,3 +np.float32,0x7e8c7c28,0x7f800000,3 +np.float32,0x3ed552bc,0x3f045161,3 +np.float32,0xfed90a8a,0xbf800000,3 +np.float32,0xbe454368,0xbe336d2a,3 +np.float32,0xbf171d26,0xbee4442d,3 +np.float32,0x80652bf9,0x80652bf9,3 +np.float32,0xbdbaaa20,0xbdb26914,3 +np.float32,0x3f56063d,0x3fa7522e,3 +np.float32,0x3d3d4fd3,0x3d41c13f,3 +np.float32,0x80456040,0x80456040,3 +np.float32,0x3dc15586,0x3dcac0ef,3 +np.float32,0x7f753060,0x7f800000,3 +np.float32,0x7f7d8039,0x7f800000,3 +np.float32,0xfdebf280,0xbf800000,3 +np.float32,0xbf1892c3,0xbee5e116,3 +np.float32,0xbf0f1468,0xbedb3878,3 +np.float32,0x40d85c,0x40d85c,3 +np.float32,0x3f93dd,0x3f93dd,3 +np.float32,0xbf5730fd,0xbf118c24,3 +np.float32,0xfe17aa44,0xbf800000,3 +np.float32,0x3dc0baf4,0x3dca1716,3 +np.float32,0xbf3433d8,0xbf015efb,3 +np.float32,0x1c59f5,0x1c59f5,3 +np.float32,0x802b1540,0x802b1540,3 +np.float32,0xbe47df6c,0xbe35936e,3 +np.float32,0xbe8e7070,0xbe78af32,3 +np.float32,0xfe7057f4,0xbf800000,3 +np.float32,0x80668b69,0x80668b69,3 +np.float32,0xbe677810,0xbe4f2c2d,3 +np.float32,0xbe7a2f1c,0xbe5df733,3 +np.float32,0xfeb79e3c,0xbf800000,3 +np.float32,0xbeb6e320,0xbe99c9e8,3 +np.float32,0xfea188f2,0xbf800000,3 +np.float32,0x7dcaeb15,0x7f800000,3 +np.float32,0x1be567,0x1be567,3 +np.float32,0xbf4041cc,0xbf07320d,3 +np.float32,0x3f721aa7,0x3fc98e9a,3 +np.float32,0x7f5aa835,0x7f800000,3 +np.float32,0x15180e,0x15180e,3 +np.float32,0x3f73d739,0x3fcbccdb,3 +np.float32,0xbeecd380,0xbebd9b36,3 +np.float32,0x3f2caec7,0x3f768fea,3 +np.float32,0xbeaf65f2,0xbe9482bb,3 +np.float32,0xfe6aa384,0xbf800000,3 +np.float32,0xbf4f2c0a,0xbf0e085e,3 +np.float32,0xbf2b5907,0xbef9d431,3 +np.float32,0x3e855e0d,0x3e985960,3 +np.float32,0x8056cc64,0x8056cc64,3 +np.float32,0xff746bb5,0xbf800000,3 +np.float32,0x3e0332f6,0x3e0bf986,3 +np.float32,0xff637720,0xbf800000,3 +np.float32,0xbf330676,0xbf00c990,3 +np.float32,0x3ec449a1,0x3eef3862,3 +np.float32,0x766541,0x766541,3 +np.float32,0xfe2edf6c,0xbf800000,3 +np.float32,0xbebb28ca,0xbe9cc3e2,3 +np.float32,0x3f16c930,0x3f4d5ce4,3 +np.float32,0x7f1a9a4a,0x7f800000,3 +np.float32,0x3e9ba1,0x3e9ba1,3 +np.float32,0xbf73d5f6,0xbf1d3d69,3 +np.float32,0xfdc8a8b0,0xbf800000,3 +np.float32,0x50f051,0x50f051,3 +np.float32,0xff0add02,0xbf800000,3 +np.float32,0x1e50bf,0x1e50bf,3 +np.float32,0x3f04d287,0x3f2e1948,3 +np.float32,0x7f1e50,0x7f1e50,3 +np.float32,0x2affb3,0x2affb3,3 +np.float32,0x80039f07,0x80039f07,3 +np.float32,0x804ba79e,0x804ba79e,3 +np.float32,0x7b5a8eed,0x7f800000,3 +np.float32,0x3e1a8b28,0x3e26d0a7,3 +np.float32,0x3ea95f29,0x3ec8bfa4,3 +np.float32,0x7e09fa55,0x7f800000,3 +np.float32,0x7eacb1b3,0x7f800000,3 +np.float32,0x3e8ad7c0,0x3e9f7dec,3 +np.float32,0x7e0e997c,0x7f800000,3 +np.float32,0x3f4422b4,0x3f936398,3 +np.float32,0x806bd222,0x806bd222,3 +np.float32,0x677ae6,0x677ae6,3 +np.float32,0x62cf68,0x62cf68,3 +np.float32,0x7e4e594e,0x7f800000,3 +np.float32,0x80445fd1,0x80445fd1,3 +np.float32,0xff3a0d04,0xbf800000,3 +np.float32,0x8052b256,0x8052b256,3 +np.float32,0x3cb34440,0x3cb53e11,3 +np.float32,0xbf0e3865,0xbeda3c6d,3 +np.float32,0x3f49f5df,0x3f99ba17,3 +np.float32,0xbed75a22,0xbeafcc09,3 +np.float32,0xbf7aec64,0xbf1fefc8,3 +np.float32,0x7f35a62d,0x7f800000,3 +np.float32,0xbf787b03,0xbf1f03fc,3 +np.float32,0x8006a62a,0x8006a62a,3 +np.float32,0x3f6419e7,0x3fb803c7,3 +np.float32,0x3ecea2e5,0x3efe8f01,3 +np.float32,0x80603577,0x80603577,3 +np.float32,0xff73198c,0xbf800000,3 +np.float32,0x7def110a,0x7f800000,3 +np.float32,0x544efd,0x544efd,3 +np.float32,0x3f052340,0x3f2ea0fc,3 +np.float32,0xff306666,0xbf800000,3 +np.float32,0xbf800000,0xbf21d2a7,3 +np.float32,0xbed3e150,0xbead826a,3 +np.float32,0x3f430c99,0x3f92390f,3 +np.float32,0xbf4bffa4,0xbf0c9c73,3 +np.float32,0xfd97a710,0xbf800000,3 +np.float32,0x3cadf0fe,0x3cafcd1a,3 +np.float32,0x807af7b4,0x807af7b4,3 +np.float32,0xbc508600,0xbc4f33bc,3 +np.float32,0x7f3e0ec7,0x7f800000,3 +np.float32,0xbe51334c,0xbe3d36f7,3 +np.float32,0xfe7b7fb4,0xbf800000,3 +np.float32,0xfed9c45e,0xbf800000,3 +np.float32,0x3da024eb,0x3da6926a,3 +np.float32,0x7eed9e76,0x7f800000,3 +np.float32,0xbf2b8f1f,0xbefa0b91,3 +np.float32,0x3f2b9286,0x3f746318,3 +np.float32,0xfe8af49c,0xbf800000,3 +np.float32,0x9c4f7,0x9c4f7,3 +np.float32,0x801d7543,0x801d7543,3 +np.float32,0xbf66474a,0xbf17de66,3 +np.float32,0xbf562155,0xbf1116b1,3 +np.float32,0x46a8de,0x46a8de,3 +np.float32,0x8053fe6b,0x8053fe6b,3 +np.float32,0xbf6ee842,0xbf1b51f3,3 +np.float32,0xbf6ad78e,0xbf19b565,3 +np.float32,0xbf012574,0xbecad7ff,3 +np.float32,0x748364,0x748364,3 +np.float32,0x8073f59b,0x8073f59b,3 +np.float32,0xff526825,0xbf800000,3 +np.float32,0xfeb02dc4,0xbf800000,3 +np.float32,0x8033eb1c,0x8033eb1c,3 +np.float32,0x3f3685ea,0x3f8520cc,3 +np.float32,0x7f657902,0x7f800000,3 +np.float32,0xbf75eac4,0xbf1e0a1f,3 +np.float32,0xfe67f384,0xbf800000,3 +np.float32,0x3f56d3cc,0x3fa83faf,3 +np.float32,0x44a4ce,0x44a4ce,3 +np.float32,0x1dc4b3,0x1dc4b3,3 +np.float32,0x4fb3b2,0x4fb3b2,3 +np.float32,0xbea904a4,0xbe8ff3ed,3 +np.float32,0x7e668f16,0x7f800000,3 +np.float32,0x7f538378,0x7f800000,3 +np.float32,0x80541709,0x80541709,3 +np.float32,0x80228040,0x80228040,3 +np.float32,0x7ef9694e,0x7f800000,3 +np.float32,0x3f5fca9b,0x3fb2ce54,3 +np.float32,0xbe9c43c2,0xbe86ab84,3 +np.float32,0xfecee000,0xbf800000,3 +np.float32,0x5a65c2,0x5a65c2,3 +np.float32,0x3f736572,0x3fcb3985,3 +np.float32,0xbf2a03f7,0xbef87600,3 +np.float32,0xfe96b488,0xbf800000,3 +np.float32,0xfedd8800,0xbf800000,3 +np.float32,0x80411804,0x80411804,3 +np.float32,0x7edcb0a6,0x7f800000,3 +np.float32,0x2bb882,0x2bb882,3 +np.float32,0x3f800000,0x3fdbf0a9,3 +np.float32,0x764b27,0x764b27,3 +np.float32,0x7e92035d,0x7f800000,3 +np.float32,0x3e80facb,0x3e92ae1d,3 +np.float32,0x8040b81a,0x8040b81a,3 +np.float32,0x7f487fe4,0x7f800000,3 +np.float32,0xbc641780,0xbc6282ed,3 +np.float32,0x804b0bb9,0x804b0bb9,3 +np.float32,0x7d0b7c39,0x7f800000,3 +np.float32,0xff072080,0xbf800000,3 +np.float32,0xbed7aff8,0xbeb00462,3 +np.float32,0x35e247,0x35e247,3 +np.float32,0xbf7edd19,0xbf216766,3 +np.float32,0x8004a539,0x8004a539,3 +np.float32,0xfdfc1790,0xbf800000,3 +np.float32,0x8037a841,0x8037a841,3 +np.float32,0xfed0a8a8,0xbf800000,3 +np.float32,0x7f1f1697,0x7f800000,3 +np.float32,0x3f2ccc6e,0x3f76ca23,3 +np.float32,0x35eada,0x35eada,3 +np.float32,0xff111f42,0xbf800000,3 +np.float32,0x3ee1ab7f,0x3f0dcbbe,3 +np.float32,0xbf6e89ee,0xbf1b2cd4,3 +np.float32,0x3f58611c,0x3faa0cdc,3 +np.float32,0x1ac6a6,0x1ac6a6,3 +np.float32,0xbf1286fa,0xbedf2312,3 +np.float32,0x7e451137,0x7f800000,3 +np.float32,0xbe92c326,0xbe7f3405,3 +np.float32,0x3f2fdd16,0x3f7cd87b,3 +np.float32,0xbe5c0ea0,0xbe4604c2,3 +np.float32,0xbdb29968,0xbdab0883,3 +np.float32,0x3964,0x3964,3 +np.float32,0x3f0dc236,0x3f3d60a0,3 +np.float32,0x7c3faf06,0x7f800000,3 +np.float32,0xbef41f7a,0xbec22b16,3 +np.float32,0x3f4c0289,0x3f9bfdcc,3 +np.float32,0x806084e9,0x806084e9,3 +np.float32,0x3ed1d8dd,0x3f01b0c1,3 +np.float32,0x806d8d8b,0x806d8d8b,3 +np.float32,0x3f052180,0x3f2e9e0a,3 +np.float32,0x803d85d5,0x803d85d5,3 +np.float32,0x3e0afd70,0x3e14dd48,3 +np.float32,0x2fbc63,0x2fbc63,3 +np.float32,0x2e436f,0x2e436f,3 +np.float32,0xbf7b19e6,0xbf2000da,3 +np.float32,0x3f34022e,0x3f829362,3 +np.float32,0x3d2b40e0,0x3d2ee246,3 +np.float32,0x3f5298b4,0x3fa3649b,3 +np.float32,0xbdb01328,0xbda8b7de,3 +np.float32,0x7f693c81,0x7f800000,3 +np.float32,0xbeb1abc0,0xbe961edc,3 +np.float32,0x801d9b5d,0x801d9b5d,3 +np.float32,0x80628668,0x80628668,3 +np.float32,0x800f57dd,0x800f57dd,3 +np.float32,0x8017c94f,0x8017c94f,3 +np.float32,0xbf16f5f4,0xbee418b8,3 +np.float32,0x3e686476,0x3e827022,3 +np.float32,0xbf256796,0xbef3abd9,3 +np.float32,0x7f1b4485,0x7f800000,3 +np.float32,0xbea0b3cc,0xbe89ed21,3 +np.float32,0xfee08b2e,0xbf800000,3 +np.float32,0x523cb4,0x523cb4,3 +np.float32,0x3daf2cb2,0x3db6e273,3 +np.float32,0xbd531c40,0xbd4dc323,3 +np.float32,0x80078fe5,0x80078fe5,3 +np.float32,0x80800000,0x80800000,3 +np.float32,0x3f232438,0x3f642d1a,3 +np.float32,0x3ec29446,0x3eecb7c0,3 +np.float32,0x3dbcd2a4,0x3dc5cd1d,3 +np.float32,0x7f045b0d,0x7f800000,3 +np.float32,0x7f22e6d1,0x7f800000,3 +np.float32,0xbf5d3430,0xbf141c80,3 +np.float32,0xbe03ec70,0xbdf78ee6,3 +np.float32,0x3e93ec9a,0x3eab822f,3 +np.float32,0x7f3b9262,0x7f800000,3 +np.float32,0x65ac6a,0x65ac6a,3 +np.float32,0x3db9a8,0x3db9a8,3 +np.float32,0xbf37ab59,0xbf031306,3 +np.float32,0x33c40e,0x33c40e,3 +np.float32,0x7f7a478f,0x7f800000,3 +np.float32,0xbe8532d0,0xbe6a906f,3 +np.float32,0x801c081d,0x801c081d,3 +np.float32,0xbe4212a0,0xbe30ca73,3 +np.float32,0xff0b603e,0xbf800000,3 +np.float32,0x4554dc,0x4554dc,3 +np.float32,0x3dd324be,0x3dde695e,3 +np.float32,0x3f224c44,0x3f629557,3 +np.float32,0x8003cd79,0x8003cd79,3 +np.float32,0xbf31351c,0xbeffc2fd,3 +np.float32,0x8034603a,0x8034603a,3 +np.float32,0xbf6fcb70,0xbf1bab24,3 +np.float32,0x804eb67e,0x804eb67e,3 +np.float32,0xff05c00e,0xbf800000,3 +np.float32,0x3eb5b36f,0x3eda1ec7,3 +np.float32,0x3f1ed7f9,0x3f5c1d90,3 +np.float32,0x3f052d8a,0x3f2eb24b,3 +np.float32,0x5ddf51,0x5ddf51,3 +np.float32,0x7e50c11c,0x7f800000,3 +np.float32,0xff74f55a,0xbf800000,3 +np.float32,0x4322d,0x4322d,3 +np.float32,0x3f16f8a9,0x3f4db27a,3 +np.float32,0x3f4f23d6,0x3f9f7c2c,3 +np.float32,0xbf706c1e,0xbf1bea0a,3 +np.float32,0x3f2cbd52,0x3f76ac77,3 +np.float32,0xf3043,0xf3043,3 +np.float32,0xfee79de0,0xbf800000,3 +np.float32,0x7e942f69,0x7f800000,3 +np.float32,0x180139,0x180139,3 +np.float32,0xff69c678,0xbf800000,3 +np.float32,0x3f46773f,0x3f95e840,3 +np.float32,0x804aae1c,0x804aae1c,3 +np.float32,0x3eb383b4,0x3ed7024c,3 +np.float32,0x8032624e,0x8032624e,3 +np.float32,0xbd0a0f80,0xbd07c27d,3 +np.float32,0xbf1c9b98,0xbeea4a61,3 +np.float32,0x7f370999,0x7f800000,3 +np.float32,0x801931f9,0x801931f9,3 +np.float32,0x3f6f45ce,0x3fc5eea0,3 +np.float32,0xff0ab4cc,0xbf800000,3 +np.float32,0x4c043d,0x4c043d,3 +np.float32,0x8002a599,0x8002a599,3 +np.float32,0xbc4a6080,0xbc4921d7,3 +np.float32,0x3f008d14,0x3f26fb72,3 +np.float32,0x7f48b3d9,0x7f800000,3 +np.float32,0x7cb2ec7e,0x7f800000,3 +np.float32,0xbf1338bd,0xbedfeb61,3 +np.float32,0x0,0x0,3 +np.float32,0xbf2f5b64,0xbefde71c,3 +np.float32,0xbe422974,0xbe30dd56,3 +np.float32,0x3f776be8,0x3fd07950,3 +np.float32,0xbf3e97a1,0xbf06684a,3 +np.float32,0x7d28cb26,0x7f800000,3 +np.float32,0x801618d2,0x801618d2,3 +np.float32,0x807e4f83,0x807e4f83,3 +np.float32,0x8006b07d,0x8006b07d,3 +np.float32,0xfea1c042,0xbf800000,3 +np.float32,0xff24ef74,0xbf800000,3 +np.float32,0xfef7ab16,0xbf800000,3 +np.float32,0x70b771,0x70b771,3 +np.float32,0x7daeb64e,0x7f800000,3 +np.float32,0xbe66e378,0xbe4eb59c,3 +np.float32,0xbead1534,0xbe92dcf7,3 +np.float32,0x7e6769b8,0x7f800000,3 +np.float32,0x7ecd0890,0x7f800000,3 +np.float32,0xbe7380d8,0xbe58b747,3 +np.float32,0x3efa6f2f,0x3f218265,3 +np.float32,0x3f59dada,0x3fabc5eb,3 +np.float32,0xff0f2d20,0xbf800000,3 +np.float32,0x8060210e,0x8060210e,3 +np.float32,0x3ef681e8,0x3f1e51c8,3 +np.float32,0x77a6dd,0x77a6dd,3 +np.float32,0xbebfdd0e,0xbea00399,3 +np.float32,0xfe889b72,0xbf800000,3 +np.float32,0x8049ed2c,0x8049ed2c,3 +np.float32,0x3b089dc4,0x3b08c23e,3 +np.float32,0xbf13c7c4,0xbee08c28,3 +np.float32,0x3efa13b9,0x3f2137d7,3 +np.float32,0x3e9385dc,0x3eaaf914,3 +np.float32,0x7e0e6a43,0x7f800000,3 +np.float32,0x7df6d63f,0x7f800000,3 +np.float32,0x3f3efead,0x3f8dea03,3 +np.float32,0xff52548c,0xbf800000,3 +np.float32,0x803ff9d8,0x803ff9d8,3 +np.float32,0x3c825823,0x3c836303,3 +np.float32,0xfc9e97a0,0xbf800000,3 +np.float32,0xfe644f48,0xbf800000,3 +np.float32,0x802f5017,0x802f5017,3 +np.float32,0x3d5753b9,0x3d5d1661,3 +np.float32,0x7f2a55d2,0x7f800000,3 +np.float32,0x7f4dabfe,0x7f800000,3 +np.float32,0x3f49492a,0x3f98fc47,3 +np.float32,0x3f4d1589,0x3f9d2f82,3 +np.float32,0xff016208,0xbf800000,3 +np.float32,0xbf571cb7,0xbf118365,3 +np.float32,0xbf1ef297,0xbeecd136,3 +np.float32,0x36266b,0x36266b,3 +np.float32,0xbed07b0e,0xbeab4129,3 +np.float32,0x7f553365,0x7f800000,3 +np.float32,0xfe9bb8c6,0xbf800000,3 +np.float32,0xbeb497d6,0xbe982e19,3 +np.float32,0xbf27af6c,0xbef60d16,3 +np.float32,0x55cf51,0x55cf51,3 +np.float32,0x3eab1db0,0x3ecb2e4f,3 +np.float32,0x3e777603,0x3e8bf62f,3 +np.float32,0x7f10e374,0x7f800000,3 +np.float32,0xbf1f6480,0xbeed4b8d,3 +np.float32,0x40479d,0x40479d,3 +np.float32,0x156259,0x156259,3 +np.float32,0x3d852e30,0x3d899b2d,3 +np.float32,0x80014ff3,0x80014ff3,3 +np.float32,0xbd812fa8,0xbd7a645c,3 +np.float32,0x800ab780,0x800ab780,3 +np.float32,0x3ea02ff4,0x3ebc13bd,3 +np.float32,0x7e858b8e,0x7f800000,3 +np.float32,0x75d63b,0x75d63b,3 +np.float32,0xbeb15c94,0xbe95e6e3,3 +np.float32,0x3da0cee0,0x3da74a39,3 +np.float32,0xff21c01c,0xbf800000,3 +np.float32,0x8049b5eb,0x8049b5eb,3 +np.float32,0x80177ab0,0x80177ab0,3 +np.float32,0xff137a50,0xbf800000,3 +np.float32,0x3f7febba,0x3fdbd51c,3 +np.float32,0x8041e4dd,0x8041e4dd,3 +np.float32,0x99b8c,0x99b8c,3 +np.float32,0x5621ba,0x5621ba,3 +np.float32,0x14b534,0x14b534,3 +np.float32,0xbe2eb3a8,0xbe209c95,3 +np.float32,0x7e510c28,0x7f800000,3 +np.float32,0x804ec2f2,0x804ec2f2,3 +np.float32,0x3f662406,0x3fba82b0,3 +np.float32,0x800000,0x800000,3 +np.float32,0x3f3120d6,0x3f7f5d96,3 +np.float32,0x7f179b8e,0x7f800000,3 +np.float32,0x7f65278e,0x7f800000,3 +np.float32,0xfeb50f52,0xbf800000,3 +np.float32,0x7f051bd1,0x7f800000,3 +np.float32,0x7ea0558d,0x7f800000,3 +np.float32,0xbd0a96c0,0xbd08453f,3 +np.float64,0xee82da5ddd05c,0xee82da5ddd05c,3 +np.float64,0x800c3a22d7f87446,0x800c3a22d7f87446,3 +np.float64,0xbfd34b20eaa69642,0xbfd0a825e7688d3e,3 +np.float64,0x3fd6a0f2492d41e5,0x3fdb253b906057b3,3 +np.float64,0xbfda13d8783427b0,0xbfd56b1d76684332,3 +np.float64,0xbfe50b5a99ea16b5,0xbfded7dd82c6f746,3 +np.float64,0x3f82468fc0248d20,0x3f825b7fa9378ee9,3 +np.float64,0x7ff0000000000000,0x7ff0000000000000,3 +np.float64,0x856e50290adca,0x856e50290adca,3 +np.float64,0x7fde55a5fa3cab4b,0x7ff0000000000000,3 +np.float64,0x7fcf2c8dd93e591b,0x7ff0000000000000,3 +np.float64,0x8001b3a0e3236743,0x8001b3a0e3236743,3 +np.float64,0x8000fdb14821fb63,0x8000fdb14821fb63,3 +np.float64,0xbfe3645e08e6c8bc,0xbfdd161362a5e9ef,3 +np.float64,0x7feb34d28b3669a4,0x7ff0000000000000,3 +np.float64,0x80099dd810933bb1,0x80099dd810933bb1,3 +np.float64,0xbfedbcc1097b7982,0xbfe35d86414d53dc,3 +np.float64,0x7fdc406fbdb880de,0x7ff0000000000000,3 +np.float64,0x800c4bf85ab897f1,0x800c4bf85ab897f1,3 +np.float64,0x3fd8f7b0e0b1ef60,0x3fde89b497ae20d8,3 +np.float64,0xffe4fced5c69f9da,0xbff0000000000000,3 +np.float64,0xbfe54d421fea9a84,0xbfdf1be0cbfbfcba,3 +np.float64,0x800af72f3535ee5f,0x800af72f3535ee5f,3 +np.float64,0x3fe24e6570e49ccb,0x3fe8b3a86d970411,3 +np.float64,0xbfdd7b22d0baf646,0xbfd79fac2e4f7558,3 +np.float64,0xbfe6a7654c6d4eca,0xbfe03c1f13f3b409,3 +np.float64,0x3fe2c3eb662587d7,0x3fe98566e625d4f5,3 +np.float64,0x3b1ef71e763e0,0x3b1ef71e763e0,3 +np.float64,0xffed03c6baba078d,0xbff0000000000000,3 +np.float64,0x3febac19d0b75834,0x3ff5fdacc9d51bcd,3 +np.float64,0x800635d6794c6bae,0x800635d6794c6bae,3 +np.float64,0xbfe8cafc827195f9,0xbfe1411438608ae1,3 +np.float64,0x7feeb616a83d6c2c,0x7ff0000000000000,3 +np.float64,0x3fd52d62a2aa5ac5,0x3fd91a07a7f18f44,3 +np.float64,0x80036996b8a6d32e,0x80036996b8a6d32e,3 +np.float64,0x2b1945965632a,0x2b1945965632a,3 +np.float64,0xbfecb5e8c9796bd2,0xbfe2f40fca276aa2,3 +np.float64,0x3fe8669ed4f0cd3e,0x3ff24c89fc9cdbff,3 +np.float64,0x71e9f65ee3d3f,0x71e9f65ee3d3f,3 +np.float64,0xbfd5ab262bab564c,0xbfd261ae108ef79e,3 +np.float64,0xbfe7091342ee1226,0xbfe06bf5622d75f6,3 +np.float64,0x49e888d093d12,0x49e888d093d12,3 +np.float64,0x2272f3dc44e5f,0x2272f3dc44e5f,3 +np.float64,0x7fe98736e0b30e6d,0x7ff0000000000000,3 +np.float64,0x30fa9cde61f54,0x30fa9cde61f54,3 +np.float64,0x7fdc163fc0382c7f,0x7ff0000000000000,3 +np.float64,0xffb40d04ee281a08,0xbff0000000000000,3 +np.float64,0xffe624617f2c48c2,0xbff0000000000000,3 +np.float64,0x3febb582bd376b05,0x3ff608da584d1716,3 +np.float64,0xfc30a5a5f8615,0xfc30a5a5f8615,3 +np.float64,0x3fef202efd7e405e,0x3ffa52009319b069,3 +np.float64,0x8004d0259829a04c,0x8004d0259829a04c,3 +np.float64,0x800622dc71ec45ba,0x800622dc71ec45ba,3 +np.float64,0xffefffffffffffff,0xbff0000000000000,3 +np.float64,0x800e89113c9d1223,0x800e89113c9d1223,3 +np.float64,0x7fba7fde3034ffbb,0x7ff0000000000000,3 +np.float64,0xbfeea31e807d463d,0xbfe3b7369b725915,3 +np.float64,0x3feb7c9589f6f92c,0x3ff5c56cf71b0dff,3 +np.float64,0x3fd52d3b59aa5a77,0x3fd919d0f683fd07,3 +np.float64,0x800de90a43fbd215,0x800de90a43fbd215,3 +np.float64,0x3fe7eb35a9efd66b,0x3ff1c940dbfc6ef9,3 +np.float64,0xbda0adcb7b416,0xbda0adcb7b416,3 +np.float64,0x7fc5753e3a2aea7b,0x7ff0000000000000,3 +np.float64,0xffdd101d103a203a,0xbff0000000000000,3 +np.float64,0x7fcb54f56836a9ea,0x7ff0000000000000,3 +np.float64,0xbfd61c8d6eac391a,0xbfd2b23bc0a2cef4,3 +np.float64,0x3feef55de37deabc,0x3ffa198639a0161d,3 +np.float64,0x7fe4ffbfaea9ff7e,0x7ff0000000000000,3 +np.float64,0x9d1071873a20e,0x9d1071873a20e,3 +np.float64,0x3fef1ecb863e3d97,0x3ffa502a81e09cfc,3 +np.float64,0xad2da12b5a5b4,0xad2da12b5a5b4,3 +np.float64,0xffe614b74c6c296e,0xbff0000000000000,3 +np.float64,0xffe60d3f286c1a7e,0xbff0000000000000,3 +np.float64,0x7fda7d91f4b4fb23,0x7ff0000000000000,3 +np.float64,0x800023f266a047e6,0x800023f266a047e6,3 +np.float64,0x7fdf5f9ad23ebf35,0x7ff0000000000000,3 +np.float64,0x3fa7459f002e8b3e,0x3fa7cf178dcf0af6,3 +np.float64,0x3fe9938d61f3271b,0x3ff39516a13caec3,3 +np.float64,0xbfd59314c3ab262a,0xbfd250830f73efd2,3 +np.float64,0xbfc7e193f72fc328,0xbfc5c924339dd7a8,3 +np.float64,0x7fec1965f17832cb,0x7ff0000000000000,3 +np.float64,0xbfd932908eb26522,0xbfd4d4312d272580,3 +np.float64,0xbfdf2d08e2be5a12,0xbfd8add1413b0b1b,3 +np.float64,0x7fdcf7cc74b9ef98,0x7ff0000000000000,3 +np.float64,0x7fc79300912f2600,0x7ff0000000000000,3 +np.float64,0xffd4bd8f23297b1e,0xbff0000000000000,3 +np.float64,0x41869ce0830e,0x41869ce0830e,3 +np.float64,0x3fe5dcec91ebb9da,0x3fef5e213598cbd4,3 +np.float64,0x800815d9c2902bb4,0x800815d9c2902bb4,3 +np.float64,0x800ba1a4b877434a,0x800ba1a4b877434a,3 +np.float64,0x80069d7bdc4d3af8,0x80069d7bdc4d3af8,3 +np.float64,0xcf00d4339e01b,0xcf00d4339e01b,3 +np.float64,0x80072b71bd4e56e4,0x80072b71bd4e56e4,3 +np.float64,0x80059ca6fbab394f,0x80059ca6fbab394f,3 +np.float64,0x3fe522fc092a45f8,0x3fedf212682bf894,3 +np.float64,0x7fe17f384ea2fe70,0x7ff0000000000000,3 +np.float64,0x0,0x0,3 +np.float64,0x3f72bb4c20257698,0x3f72c64766b52069,3 +np.float64,0x7fbc97c940392f92,0x7ff0000000000000,3 +np.float64,0xffc5904ebd2b209c,0xbff0000000000000,3 +np.float64,0xbfe34fb55b669f6a,0xbfdcff81dd30a49d,3 +np.float64,0x8007ccda006f99b5,0x8007ccda006f99b5,3 +np.float64,0x3fee50e4c8fca1ca,0x3ff9434c7750ad0f,3 +np.float64,0x7fee7b07c67cf60f,0x7ff0000000000000,3 +np.float64,0x3fdcce4a5a399c95,0x3fe230c83f28218a,3 +np.float64,0x7fee5187b37ca30e,0x7ff0000000000000,3 +np.float64,0x3fc48f6a97291ed8,0x3fc64db6200a9833,3 +np.float64,0xc7fec3498ffd9,0xc7fec3498ffd9,3 +np.float64,0x800769c59d2ed38c,0x800769c59d2ed38c,3 +np.float64,0xffe69ede782d3dbc,0xbff0000000000000,3 +np.float64,0x3fecd9770979b2ee,0x3ff76a1f2f0f08f2,3 +np.float64,0x5aa358a8b546c,0x5aa358a8b546c,3 +np.float64,0xbfe795a0506f2b40,0xbfe0afcc52c0166b,3 +np.float64,0xffd4ada1e8a95b44,0xbff0000000000000,3 +np.float64,0xffcac1dc213583b8,0xbff0000000000000,3 +np.float64,0xffe393c15fa72782,0xbff0000000000000,3 +np.float64,0xbfcd6a3c113ad478,0xbfca47a2157b9cdd,3 +np.float64,0xffedde20647bbc40,0xbff0000000000000,3 +np.float64,0x3fd0d011b1a1a024,0x3fd33a57945559f4,3 +np.float64,0x3fef27e29f7e4fc6,0x3ffa5c314e0e3d69,3 +np.float64,0xffe96ff71f72dfee,0xbff0000000000000,3 +np.float64,0xffe762414f2ec482,0xbff0000000000000,3 +np.float64,0x3fc2dcfd3d25b9fa,0x3fc452f41682a12e,3 +np.float64,0xbfbdb125b63b6248,0xbfbc08e6553296d4,3 +np.float64,0x7b915740f724,0x7b915740f724,3 +np.float64,0x60b502b2c16a1,0x60b502b2c16a1,3 +np.float64,0xbfeb38b0be367162,0xbfe254f6782cfc47,3 +np.float64,0x800dc39a3edb8735,0x800dc39a3edb8735,3 +np.float64,0x3fea4fb433349f68,0x3ff468b97cf699f5,3 +np.float64,0xbfd49967962932d0,0xbfd19ceb41ff4cd0,3 +np.float64,0xbfebf75cd377eeba,0xbfe2a576bdbccccc,3 +np.float64,0xbfb653d65c2ca7b0,0xbfb561ab8fcb3f26,3 +np.float64,0xffe3f34b8727e696,0xbff0000000000000,3 +np.float64,0x3fdd798064baf301,0x3fe2b7c130a6fc63,3 +np.float64,0x3febe027e6b7c050,0x3ff63bac1b22e12d,3 +np.float64,0x7fcaa371af3546e2,0x7ff0000000000000,3 +np.float64,0xbfe6ee980a2ddd30,0xbfe05f0bc5dc80d2,3 +np.float64,0xc559c33f8ab39,0xc559c33f8ab39,3 +np.float64,0x84542c2b08a86,0x84542c2b08a86,3 +np.float64,0xbfe5645e046ac8bc,0xbfdf3398dc3cc1bd,3 +np.float64,0x3fee8c48ae7d1892,0x3ff9902899480526,3 +np.float64,0x3fb706471c2e0c8e,0x3fb817787aace8db,3 +np.float64,0x7fefe78f91ffcf1e,0x7ff0000000000000,3 +np.float64,0xbfcf6d560b3edaac,0xbfcbddc72a2130df,3 +np.float64,0x7fd282bfd925057f,0x7ff0000000000000,3 +np.float64,0x3fb973dbee32e7b8,0x3fbac2c87cbd0215,3 +np.float64,0x3fd1ce38ff239c72,0x3fd4876de5164420,3 +np.float64,0x8008ac2e3c31585d,0x8008ac2e3c31585d,3 +np.float64,0x3fa05e06dc20bc00,0x3fa0a1b7de904dce,3 +np.float64,0x7fd925f215324be3,0x7ff0000000000000,3 +np.float64,0x3f949d95d0293b2c,0x3f94d31197d51874,3 +np.float64,0xffdded9e67bbdb3c,0xbff0000000000000,3 +np.float64,0x3fed390dcfba721c,0x3ff7e08c7a709240,3 +np.float64,0x7fe6e62300adcc45,0x7ff0000000000000,3 +np.float64,0xbfd779bc312ef378,0xbfd3a6cb64bb0181,3 +np.float64,0x3fe43e9877287d31,0x3fec3e100ef935fd,3 +np.float64,0x210b68e44216e,0x210b68e44216e,3 +np.float64,0x3fcdffc1e73bff84,0x3fd0e729d02ec539,3 +np.float64,0xcea10c0f9d422,0xcea10c0f9d422,3 +np.float64,0x7feb97a82d772f4f,0x7ff0000000000000,3 +np.float64,0x9b4b4d953696a,0x9b4b4d953696a,3 +np.float64,0x3fd1bd8e95237b1d,0x3fd4716dd34cf828,3 +np.float64,0x800fc273841f84e7,0x800fc273841f84e7,3 +np.float64,0xbfd2aef167255de2,0xbfd0340f30d82f18,3 +np.float64,0x800d021a551a0435,0x800d021a551a0435,3 +np.float64,0xffebf934a8b7f268,0xbff0000000000000,3 +np.float64,0x3fd819849fb03308,0x3fdd43bca0aac749,3 +np.float64,0x7ff8000000000000,0x7ff8000000000000,3 +np.float64,0x27c34b064f86a,0x27c34b064f86a,3 +np.float64,0x7fef4f5a373e9eb3,0x7ff0000000000000,3 +np.float64,0x7fd92fccce325f99,0x7ff0000000000000,3 +np.float64,0x800520869d6a410e,0x800520869d6a410e,3 +np.float64,0x3fccbcaddf397958,0x3fd01bf6b0c4d97f,3 +np.float64,0x80039ebfc4273d80,0x80039ebfc4273d80,3 +np.float64,0xbfed1f0b3c7a3e16,0xbfe31ea6e4c69141,3 +np.float64,0x7fee1bb7c4bc376f,0x7ff0000000000000,3 +np.float64,0xbfa8bee1d8317dc0,0xbfa8283b7dbf95a9,3 +np.float64,0x3fe797db606f2fb6,0x3ff171b1c2bc8fe5,3 +np.float64,0xbfee2ecfdbbc5da0,0xbfe38a3f0a43d14e,3 +np.float64,0x3fe815c7f1302b90,0x3ff1f65165c45d71,3 +np.float64,0xbfbb265c94364cb8,0xbfb9c27ec61a9a1d,3 +np.float64,0x3fcf1cab5d3e3957,0x3fd19c07444642f9,3 +np.float64,0xbfe6ae753f6d5cea,0xbfe03f99666dbe17,3 +np.float64,0xbfd18a2a73a31454,0xbfceaee204aca016,3 +np.float64,0x3fb8a1dffc3143c0,0x3fb9db38341ab1a3,3 +np.float64,0x7fd2a0376025406e,0x7ff0000000000000,3 +np.float64,0x7fe718c0e3ae3181,0x7ff0000000000000,3 +np.float64,0x3fb264d42424c9a8,0x3fb3121f071d4db4,3 +np.float64,0xd27190a7a4e32,0xd27190a7a4e32,3 +np.float64,0xbfe467668c68cecd,0xbfde2c4616738d5e,3 +np.float64,0x800ab9a2b9357346,0x800ab9a2b9357346,3 +np.float64,0x7fcbd108d537a211,0x7ff0000000000000,3 +np.float64,0x3fb79bba6e2f3770,0x3fb8bb2c140d3445,3 +np.float64,0xffefa7165e3f4e2c,0xbff0000000000000,3 +np.float64,0x7fb40185a428030a,0x7ff0000000000000,3 +np.float64,0xbfe9e3d58e73c7ab,0xbfe1c04d51c83d69,3 +np.float64,0x7fef5b97b17eb72e,0x7ff0000000000000,3 +np.float64,0x800a2957683452af,0x800a2957683452af,3 +np.float64,0x800f54f1925ea9e3,0x800f54f1925ea9e3,3 +np.float64,0xeffa4e77dff4a,0xeffa4e77dff4a,3 +np.float64,0xffbe501aa03ca038,0xbff0000000000000,3 +np.float64,0x8006c651bced8ca4,0x8006c651bced8ca4,3 +np.float64,0x3fe159faff22b3f6,0x3fe708f78efbdbed,3 +np.float64,0x800e7d59a31cfab3,0x800e7d59a31cfab3,3 +np.float64,0x3fe6ac2f272d585e,0x3ff07ee5305385c3,3 +np.float64,0x7fd014c054202980,0x7ff0000000000000,3 +np.float64,0xbfe4800b11e90016,0xbfde4648c6f29ce5,3 +np.float64,0xbfe6738470ece709,0xbfe0227b5b42b713,3 +np.float64,0x3fed052add3a0a56,0x3ff7a01819e65c6e,3 +np.float64,0xffe03106f120620e,0xbff0000000000000,3 +np.float64,0x7fe11df4d4e23be9,0x7ff0000000000000,3 +np.float64,0xbfcea25d7b3d44bc,0xbfcb3e808e7ce852,3 +np.float64,0xd0807b03a1010,0xd0807b03a1010,3 +np.float64,0x8004eda4fec9db4b,0x8004eda4fec9db4b,3 +np.float64,0x3fceb5c98d3d6b90,0x3fd15a894b15dd9f,3 +np.float64,0xbfee27228afc4e45,0xbfe38741702f3c0b,3 +np.float64,0xbfe606278c6c0c4f,0xbfdfd7cb6093652d,3 +np.float64,0xbfd66f59bc2cdeb4,0xbfd2ecb2297f6afc,3 +np.float64,0x4aee390095dc8,0x4aee390095dc8,3 +np.float64,0xbfe391355d67226a,0xbfdd46ddc0997014,3 +np.float64,0xffd27765e7a4eecc,0xbff0000000000000,3 +np.float64,0xbfe795e20a2f2bc4,0xbfe0afebc66c4dbd,3 +np.float64,0x7fc9a62e81334c5c,0x7ff0000000000000,3 +np.float64,0xffe4e57e52a9cafc,0xbff0000000000000,3 +np.float64,0x7fac326c8c3864d8,0x7ff0000000000000,3 +np.float64,0x3fe8675f6370cebf,0x3ff24d5863029c15,3 +np.float64,0x7fcf4745e73e8e8b,0x7ff0000000000000,3 +np.float64,0x7fcc9aec9f3935d8,0x7ff0000000000000,3 +np.float64,0x3fec2e8fcab85d20,0x3ff699ccd0b2fed6,3 +np.float64,0x3fd110a968222153,0x3fd38e81a88c2d13,3 +np.float64,0xffb3a68532274d08,0xbff0000000000000,3 +np.float64,0xf0e562bbe1cad,0xf0e562bbe1cad,3 +np.float64,0xbfe815b9e5f02b74,0xbfe0ec9f5023aebc,3 +np.float64,0xbf5151d88022a400,0xbf514f80c465feea,3 +np.float64,0x2547e3144a8fd,0x2547e3144a8fd,3 +np.float64,0x3fedcc0c28fb9818,0x3ff899612fbeb4c5,3 +np.float64,0x3fdc3d1c0f387a38,0x3fe1bf6e2d39bd75,3 +np.float64,0x7fe544dbe62a89b7,0x7ff0000000000000,3 +np.float64,0x8001500e48e2a01d,0x8001500e48e2a01d,3 +np.float64,0xbfed3b2b09fa7656,0xbfe329f3e7bada64,3 +np.float64,0xbfe76a943aeed528,0xbfe09b24e3aa3f79,3 +np.float64,0x3fe944330e328866,0x3ff33d472dee70c5,3 +np.float64,0x8004bbbd6cc9777c,0x8004bbbd6cc9777c,3 +np.float64,0xbfe28133fb650268,0xbfdc1ac230ac4ef5,3 +np.float64,0xc1370af7826e2,0xc1370af7826e2,3 +np.float64,0x7fcfa47f5f3f48fe,0x7ff0000000000000,3 +np.float64,0xbfa3002a04260050,0xbfa2a703a538b54e,3 +np.float64,0xffef44f3903e89e6,0xbff0000000000000,3 +np.float64,0xc32cce298659a,0xc32cce298659a,3 +np.float64,0x7b477cc2f68f0,0x7b477cc2f68f0,3 +np.float64,0x40a7f4ec814ff,0x40a7f4ec814ff,3 +np.float64,0xffee38edf67c71db,0xbff0000000000000,3 +np.float64,0x3fe23f6f1ce47ede,0x3fe8992b8bb03499,3 +np.float64,0x7fc8edfe7f31dbfc,0x7ff0000000000000,3 +np.float64,0x800bb8e6fb3771ce,0x800bb8e6fb3771ce,3 +np.float64,0xbfe11d364ee23a6c,0xbfda82a0c2ef9e46,3 +np.float64,0xbfeb993cb4b7327a,0xbfe27df565da85dc,3 +np.float64,0x10000000000000,0x10000000000000,3 +np.float64,0x3fc1f997d723f330,0x3fc34c5cff060af1,3 +np.float64,0x6e326fa0dc64f,0x6e326fa0dc64f,3 +np.float64,0x800fa30c2c5f4618,0x800fa30c2c5f4618,3 +np.float64,0x7fed16ad603a2d5a,0x7ff0000000000000,3 +np.float64,0x9411cf172823a,0x9411cf172823a,3 +np.float64,0xffece51d4cb9ca3a,0xbff0000000000000,3 +np.float64,0x3fdda3d1453b47a3,0x3fe2d954f7849890,3 +np.float64,0xffd58330172b0660,0xbff0000000000000,3 +np.float64,0xbfc6962ae52d2c54,0xbfc4b4bdf0069f17,3 +np.float64,0xbfb4010a8e280218,0xbfb33e1236f7efa0,3 +np.float64,0x7fd0444909208891,0x7ff0000000000000,3 +np.float64,0xbfe027a24de04f44,0xbfd95e9064101e7c,3 +np.float64,0xa6f3f3214de9,0xa6f3f3214de9,3 +np.float64,0xbfe112eb0fe225d6,0xbfda768f7cbdf346,3 +np.float64,0xbfe99e90d4b33d22,0xbfe1a153e45a382a,3 +np.float64,0xffecb34f8e79669e,0xbff0000000000000,3 +np.float64,0xbfdf32c9653e6592,0xbfd8b159caf5633d,3 +np.float64,0x3fe9519829b2a330,0x3ff34c0a8152e20f,3 +np.float64,0xffd08ec8a7a11d92,0xbff0000000000000,3 +np.float64,0xffd19b71b6a336e4,0xbff0000000000000,3 +np.float64,0x7feda6b9377b4d71,0x7ff0000000000000,3 +np.float64,0x800fda2956bfb453,0x800fda2956bfb453,3 +np.float64,0x3fe54f601bea9ec0,0x3fee483cb03cbde4,3 +np.float64,0xbfe2a8ad5ee5515a,0xbfdc46ee7a10bf0d,3 +np.float64,0xbfd336c8bd266d92,0xbfd09916d432274a,3 +np.float64,0xfff0000000000000,0xbff0000000000000,3 +np.float64,0x3fd9a811a9b35024,0x3fdf8fa68cc048e3,3 +np.float64,0x3fe078c68520f18d,0x3fe58aecc1f9649b,3 +np.float64,0xbfc6d5aa3a2dab54,0xbfc4e9ea84f3d73c,3 +np.float64,0xf9682007f2d04,0xf9682007f2d04,3 +np.float64,0x3fee54523dbca8a4,0x3ff947b826de81f4,3 +np.float64,0x80461e5d008c4,0x80461e5d008c4,3 +np.float64,0x3fdd6d12d5bada26,0x3fe2ade8dee2fa02,3 +np.float64,0x3fcd5f0dfd3abe18,0x3fd081d6cd25731d,3 +np.float64,0x7fa36475c826c8eb,0x7ff0000000000000,3 +np.float64,0xbfdf3ce052be79c0,0xbfd8b78baccfb908,3 +np.float64,0x7fcd890dd13b121b,0x7ff0000000000000,3 +np.float64,0x8000000000000001,0x8000000000000001,3 +np.float64,0x800ec0f4281d81e8,0x800ec0f4281d81e8,3 +np.float64,0xbfba960116352c00,0xbfb94085424496d9,3 +np.float64,0x3fdddedc9bbbbdb8,0x3fe30853fe4ef5ce,3 +np.float64,0x238092a847013,0x238092a847013,3 +np.float64,0xbfe38d4803271a90,0xbfdd429a955c46af,3 +np.float64,0xbfd4c9067329920c,0xbfd1bf6255ed91a4,3 +np.float64,0xbfbee213923dc428,0xbfbd17ce1bda6088,3 +np.float64,0xffd5a2d337ab45a6,0xbff0000000000000,3 +np.float64,0x7fe21bfcf82437f9,0x7ff0000000000000,3 +np.float64,0x3fe2a2714da544e3,0x3fe949594a74ea25,3 +np.float64,0x800e05cf8ebc0b9f,0x800e05cf8ebc0b9f,3 +np.float64,0x559a1526ab343,0x559a1526ab343,3 +np.float64,0xffe6a1b7906d436e,0xbff0000000000000,3 +np.float64,0xffef27d6253e4fab,0xbff0000000000000,3 +np.float64,0xbfe0f90ab0a1f216,0xbfda5828a1edde48,3 +np.float64,0x9675d2ab2cebb,0x9675d2ab2cebb,3 +np.float64,0xffee0f7eecfc1efd,0xbff0000000000000,3 +np.float64,0x2ec005625d801,0x2ec005625d801,3 +np.float64,0x7fde35ff14bc6bfd,0x7ff0000000000000,3 +np.float64,0xffe03f36d9e07e6d,0xbff0000000000000,3 +np.float64,0x7fe09ff7c4213fef,0x7ff0000000000000,3 +np.float64,0xffeac29dd1b5853b,0xbff0000000000000,3 +np.float64,0x3fb63120aa2c6241,0x3fb72ea3de98a853,3 +np.float64,0xffd079eb84a0f3d8,0xbff0000000000000,3 +np.float64,0xbfd3c2cc75a78598,0xbfd1005996880b3f,3 +np.float64,0x7fb80507ee300a0f,0x7ff0000000000000,3 +np.float64,0xffe8006105f000c1,0xbff0000000000000,3 +np.float64,0x8009138b0ab22716,0x8009138b0ab22716,3 +np.float64,0xbfd6dfb40b2dbf68,0xbfd33b8e4008e3b0,3 +np.float64,0xbfe7c2cf9bef859f,0xbfe0c55c807460df,3 +np.float64,0xbfe75fe4da6ebfca,0xbfe09600256d3b81,3 +np.float64,0xffd662fc73acc5f8,0xbff0000000000000,3 +np.float64,0x20b99dbc41735,0x20b99dbc41735,3 +np.float64,0x3fe10b38ade21671,0x3fe68229a9bbeefc,3 +np.float64,0x3743b99c6e878,0x3743b99c6e878,3 +np.float64,0xff9eb5ed903d6be0,0xbff0000000000000,3 +np.float64,0x3ff0000000000000,0x3ffb7e151628aed3,3 +np.float64,0xffb9e0569e33c0b0,0xbff0000000000000,3 +np.float64,0x7fd39c804fa73900,0x7ff0000000000000,3 +np.float64,0x3fe881ef67f103df,0x3ff269dd704b7129,3 +np.float64,0x1b6eb40236dd7,0x1b6eb40236dd7,3 +np.float64,0xbfe734ea432e69d4,0xbfe0813e6355d02f,3 +np.float64,0xffcf48f3743e91e8,0xbff0000000000000,3 +np.float64,0xffed10bcf6fa2179,0xbff0000000000000,3 +np.float64,0x3fef07723b7e0ee4,0x3ffa3156123f3c15,3 +np.float64,0xffe45c704aa8b8e0,0xbff0000000000000,3 +np.float64,0xb7b818d96f703,0xb7b818d96f703,3 +np.float64,0x42fcc04085f99,0x42fcc04085f99,3 +np.float64,0xbfda7ced01b4f9da,0xbfd5b0ce1e5524ae,3 +np.float64,0xbfe1e5963d63cb2c,0xbfdb6a87b6c09185,3 +np.float64,0x7fdfa18003bf42ff,0x7ff0000000000000,3 +np.float64,0xbfe3790a43e6f214,0xbfdd2c9a38b4f089,3 +np.float64,0xffe0ff5b9ae1feb6,0xbff0000000000000,3 +np.float64,0x80085a7d3110b4fb,0x80085a7d3110b4fb,3 +np.float64,0xffd6bfa6622d7f4c,0xbff0000000000000,3 +np.float64,0xbfef5ddc7cfebbb9,0xbfe3fe170521593e,3 +np.float64,0x3fc21773fa242ee8,0x3fc36ebda1f91a72,3 +np.float64,0x7fc04d98da209b31,0x7ff0000000000000,3 +np.float64,0xbfeba3b535b7476a,0xbfe282602e3c322e,3 +np.float64,0xffd41fb5c1a83f6c,0xbff0000000000000,3 +np.float64,0xf87d206df0fa4,0xf87d206df0fa4,3 +np.float64,0x800060946fc0c12a,0x800060946fc0c12a,3 +np.float64,0x3fe69d5f166d3abe,0x3ff06fdddcf4ca93,3 +np.float64,0x7fe9b5793b336af1,0x7ff0000000000000,3 +np.float64,0x7fe0dd4143e1ba82,0x7ff0000000000000,3 +np.float64,0xbfa8eaea3c31d5d0,0xbfa8522e397da3bd,3 +np.float64,0x119f0078233e1,0x119f0078233e1,3 +np.float64,0xbfd78a207aaf1440,0xbfd3b225bbf2ab4f,3 +np.float64,0xc66a6d4d8cd4e,0xc66a6d4d8cd4e,3 +np.float64,0xe7fc4b57cff8a,0xe7fc4b57cff8a,3 +np.float64,0x800883e8091107d0,0x800883e8091107d0,3 +np.float64,0x3fa6520c842ca419,0x3fa6d06e1041743a,3 +np.float64,0x3fa563182c2ac630,0x3fa5d70e27a84c97,3 +np.float64,0xe6a30b61cd462,0xe6a30b61cd462,3 +np.float64,0x3fee85dac37d0bb6,0x3ff987cfa41a9778,3 +np.float64,0x3fe8f621db71ec44,0x3ff2e7b768a2e9d0,3 +np.float64,0x800f231d861e463b,0x800f231d861e463b,3 +np.float64,0xbfe22eb07c645d61,0xbfdbbdbb853ab4c6,3 +np.float64,0x7fd2dda2dea5bb45,0x7ff0000000000000,3 +np.float64,0xbfd09b79a0a136f4,0xbfcd4147606ffd27,3 +np.float64,0xca039cc394074,0xca039cc394074,3 +np.float64,0x8000000000000000,0x8000000000000000,3 +np.float64,0xcb34575d9668b,0xcb34575d9668b,3 +np.float64,0x3fea62c1f3f4c584,0x3ff47e6dc67ec89f,3 +np.float64,0x7fe544c8606a8990,0x7ff0000000000000,3 +np.float64,0xffe0a980c4615301,0xbff0000000000000,3 +np.float64,0x3fdd67d5f8bacfac,0x3fe2a9c3421830f1,3 +np.float64,0xffe41d3dda283a7b,0xbff0000000000000,3 +np.float64,0xffeed59e5ffdab3c,0xbff0000000000000,3 +np.float64,0xffeeae8326fd5d05,0xbff0000000000000,3 +np.float64,0x800d70b4fa7ae16a,0x800d70b4fa7ae16a,3 +np.float64,0xffec932e6839265c,0xbff0000000000000,3 +np.float64,0xee30b185dc616,0xee30b185dc616,3 +np.float64,0x7fc3cf4397279e86,0x7ff0000000000000,3 +np.float64,0xbfeab34f1875669e,0xbfe21b868229de7d,3 +np.float64,0xf45f5f7de8bec,0xf45f5f7de8bec,3 +np.float64,0x3fad2c4b203a5896,0x3fae0528b568f3cf,3 +np.float64,0xbfe2479543e48f2a,0xbfdbd9e57cf64028,3 +np.float64,0x3fd41a1473283429,0x3fd79df2bc60debb,3 +np.float64,0x3febb5155ef76a2a,0x3ff608585afd698b,3 +np.float64,0xffe21f5303e43ea6,0xbff0000000000000,3 +np.float64,0x7fe9ef390833de71,0x7ff0000000000000,3 +np.float64,0xffe8ee873d71dd0e,0xbff0000000000000,3 +np.float64,0x7fd7cbc55e2f978a,0x7ff0000000000000,3 +np.float64,0x80081f9080d03f21,0x80081f9080d03f21,3 +np.float64,0x7fecbafc8b3975f8,0x7ff0000000000000,3 +np.float64,0x800b6c4b0b16d896,0x800b6c4b0b16d896,3 +np.float64,0xbfaa0fc2d4341f80,0xbfa968cdf32b98ad,3 +np.float64,0x3fec79fe4078f3fc,0x3ff6f5361a4a5d93,3 +np.float64,0xbfb14b79de2296f0,0xbfb0b93b75ecec11,3 +np.float64,0x800009d084c013a2,0x800009d084c013a2,3 +np.float64,0x4a4cdfe29499d,0x4a4cdfe29499d,3 +np.float64,0xbfe721c2d56e4386,0xbfe077f541987d76,3 +np.float64,0x3e5f539e7cbeb,0x3e5f539e7cbeb,3 +np.float64,0x3fd23f044c247e09,0x3fd51ceafcdd64aa,3 +np.float64,0x3fc70785b02e0f0b,0x3fc93b2a37eb342a,3 +np.float64,0xbfe7ab4ec7af569e,0xbfe0ba28eecbf6b0,3 +np.float64,0x800c1d4134583a83,0x800c1d4134583a83,3 +np.float64,0xffd9a73070334e60,0xbff0000000000000,3 +np.float64,0x68a4bf24d1499,0x68a4bf24d1499,3 +np.float64,0x7feba9d9507753b2,0x7ff0000000000000,3 +np.float64,0xbfe9d747db73ae90,0xbfe1bab53d932010,3 +np.float64,0x800a9a4aed953496,0x800a9a4aed953496,3 +np.float64,0xffcb89b0ad371360,0xbff0000000000000,3 +np.float64,0xbfc62388b82c4710,0xbfc4547be442a38c,3 +np.float64,0x800a006d187400db,0x800a006d187400db,3 +np.float64,0x3fcef2fbd33de5f8,0x3fd18177b2150148,3 +np.float64,0x8000b74e3da16e9d,0x8000b74e3da16e9d,3 +np.float64,0x25be536e4b7cb,0x25be536e4b7cb,3 +np.float64,0x3fa86e189430dc31,0x3fa905b4684c9f01,3 +np.float64,0xa7584b114eb0a,0xa7584b114eb0a,3 +np.float64,0x800331133c866227,0x800331133c866227,3 +np.float64,0x3fb52b48142a5690,0x3fb611a6f6e7c664,3 +np.float64,0x3fe825797cf04af2,0x3ff206fd60e98116,3 +np.float64,0x3fd0bec4e5217d8a,0x3fd323db3ffd59b2,3 +np.float64,0x907b43a120f7,0x907b43a120f7,3 +np.float64,0x3fed31eb1d3a63d6,0x3ff7d7a91c6930a4,3 +np.float64,0x7f97a13d782f427a,0x7ff0000000000000,3 +np.float64,0xffc7121a702e2434,0xbff0000000000000,3 +np.float64,0xbfe8bb4cbbf1769a,0xbfe139d7f46f1fb1,3 +np.float64,0xbfe3593cc5a6b27a,0xbfdd09ec91d6cd48,3 +np.float64,0x7fcff218ff9ff,0x7fcff218ff9ff,3 +np.float64,0x3fe73651d4ae6ca4,0x3ff10c5c1d21d127,3 +np.float64,0x80054e396eaa9c74,0x80054e396eaa9c74,3 +np.float64,0x3fe527d5f9aa4fac,0x3fedfb7743db9b53,3 +np.float64,0x7fec6f28c5f8de51,0x7ff0000000000000,3 +np.float64,0x3fcd2bbff53a5780,0x3fd061987416b49b,3 +np.float64,0xffd1f0046423e008,0xbff0000000000000,3 +np.float64,0x80034d97fac69b31,0x80034d97fac69b31,3 +np.float64,0x3faa803f14350080,0x3fab32e3f8073be4,3 +np.float64,0x3fcf8da0163f1b40,0x3fd1e42ba2354c8e,3 +np.float64,0x3fd573c2632ae785,0x3fd97c37609d18d7,3 +np.float64,0x7f922960482452c0,0x7ff0000000000000,3 +np.float64,0x800ebd0c5d3d7a19,0x800ebd0c5d3d7a19,3 +np.float64,0xbfee63b7807cc76f,0xbfe39ec7981035db,3 +np.float64,0xffdc023f8e380480,0xbff0000000000000,3 +np.float64,0x3fe3ffa02c67ff40,0x3febc7f8b900ceba,3 +np.float64,0x36c508b86d8a2,0x36c508b86d8a2,3 +np.float64,0x3fc9fbb0f133f760,0x3fcccee9f6ba801c,3 +np.float64,0x3fd75c1d5faeb83b,0x3fdc3150f9eff99e,3 +np.float64,0x3fe9a8d907b351b2,0x3ff3accc78a31df8,3 +np.float64,0x3fdd8fdcafbb1fb8,0x3fe2c97c97757994,3 +np.float64,0x3fb10c34ca22186a,0x3fb1a0cc42c76b86,3 +np.float64,0xbff0000000000000,0xbfe43a54e4e98864,3 +np.float64,0xffd046aefda08d5e,0xbff0000000000000,3 +np.float64,0x80067989758cf314,0x80067989758cf314,3 +np.float64,0x3fee9d77763d3aef,0x3ff9a67ff0841ba5,3 +np.float64,0xffe4d3cbf8e9a798,0xbff0000000000000,3 +np.float64,0x800f9cab273f3956,0x800f9cab273f3956,3 +np.float64,0x800a5c84f9f4b90a,0x800a5c84f9f4b90a,3 +np.float64,0x4fd377009fa8,0x4fd377009fa8,3 +np.float64,0xbfe7ba26af6f744e,0xbfe0c13ce45d6f95,3 +np.float64,0x609c8a86c1392,0x609c8a86c1392,3 +np.float64,0x7fe4d0296ea9a052,0x7ff0000000000000,3 +np.float64,0x59847bccb3090,0x59847bccb3090,3 +np.float64,0xbfdf944157bf2882,0xbfd8ed092bacad43,3 +np.float64,0xbfe7560a632eac15,0xbfe091405ec34973,3 +np.float64,0x3fea0699f4340d34,0x3ff415eb72089230,3 +np.float64,0x800a5533f374aa68,0x800a5533f374aa68,3 +np.float64,0xbf8e8cdb103d19c0,0xbf8e52cffcb83774,3 +np.float64,0x3fe87d9e52f0fb3d,0x3ff2653952344b81,3 +np.float64,0x7fca3950f73472a1,0x7ff0000000000000,3 +np.float64,0xffd5d1068aaba20e,0xbff0000000000000,3 +np.float64,0x3fd1a5f169a34be4,0x3fd4524b6ef17f91,3 +np.float64,0x3fdc4b95a8b8972c,0x3fe1caafd8652bf7,3 +np.float64,0x3fe333f65a6667ed,0x3fea502fb1f8a578,3 +np.float64,0xbfc117aaac222f54,0xbfc00018a4b84b6e,3 +np.float64,0x7fecf2efdf39e5df,0x7ff0000000000000,3 +np.float64,0x4e99d83e9d33c,0x4e99d83e9d33c,3 +np.float64,0x800d18937bda3127,0x800d18937bda3127,3 +np.float64,0x3fd6c67778ad8cef,0x3fdb5aba70a3ea9e,3 +np.float64,0x3fdbb71770b76e2f,0x3fe157ae8da20bc5,3 +np.float64,0xbfe9faf6ebf3f5ee,0xbfe1ca963d83f17f,3 +np.float64,0x80038850ac0710a2,0x80038850ac0710a2,3 +np.float64,0x8006beb72f8d7d6f,0x8006beb72f8d7d6f,3 +np.float64,0x3feead67bffd5acf,0x3ff9bb43e8b15e2f,3 +np.float64,0xbfd1174b89222e98,0xbfcdff9972799907,3 +np.float64,0x7fee2c077cfc580e,0x7ff0000000000000,3 +np.float64,0xbfbdbd904e3b7b20,0xbfbc13f4916ed466,3 +np.float64,0xffee47b8fe3c8f71,0xbff0000000000000,3 +np.float64,0xffd161884222c310,0xbff0000000000000,3 +np.float64,0xbfd42f27c4a85e50,0xbfd14fa8d67ba5ee,3 +np.float64,0x7fefffffffffffff,0x7ff0000000000000,3 +np.float64,0x8008151791b02a30,0x8008151791b02a30,3 +np.float64,0xbfba79029234f208,0xbfb926616cf41755,3 +np.float64,0x8004c486be29890e,0x8004c486be29890e,3 +np.float64,0x7fe5325a252a64b3,0x7ff0000000000000,3 +np.float64,0x5a880f04b5103,0x5a880f04b5103,3 +np.float64,0xbfe6f4b7702de96f,0xbfe06209002dd72c,3 +np.float64,0xbfdf8b3739bf166e,0xbfd8e783efe3c30f,3 +np.float64,0xbfe32571c8e64ae4,0xbfdcd128b9aa49a1,3 +np.float64,0xbfe97c98c172f932,0xbfe1920ac0fc040f,3 +np.float64,0x3fd0b513a2a16a28,0x3fd31744e3a1bf0a,3 +np.float64,0xffe3ab70832756e0,0xbff0000000000000,3 +np.float64,0x80030f055ce61e0b,0x80030f055ce61e0b,3 +np.float64,0xffd5f3b21b2be764,0xbff0000000000000,3 +np.float64,0x800c1f2d6c783e5b,0x800c1f2d6c783e5b,3 +np.float64,0x80075f4f148ebe9f,0x80075f4f148ebe9f,3 +np.float64,0xbfa5a046f42b4090,0xbfa52cfbf8992256,3 +np.float64,0xffd6702583ace04c,0xbff0000000000000,3 +np.float64,0x800dc0a5cf1b814c,0x800dc0a5cf1b814c,3 +np.float64,0x14f2203a29e45,0x14f2203a29e45,3 +np.float64,0x800421a40ee84349,0x800421a40ee84349,3 +np.float64,0xbfea7c279df4f84f,0xbfe2037fff3ed877,3 +np.float64,0xbfe9b41ddcf3683c,0xbfe1aafe18a44bf8,3 +np.float64,0xffe7b037022f606e,0xbff0000000000000,3 +np.float64,0x800bafb648775f6d,0x800bafb648775f6d,3 +np.float64,0x800b81681d5702d1,0x800b81681d5702d1,3 +np.float64,0x3fe29f8dc8653f1c,0x3fe9442da1c32c6b,3 +np.float64,0xffef9a05dc7f340b,0xbff0000000000000,3 +np.float64,0x800c8c65a65918cb,0x800c8c65a65918cb,3 +np.float64,0xffe99df0d5f33be1,0xbff0000000000000,3 +np.float64,0x9afeb22535fd7,0x9afeb22535fd7,3 +np.float64,0x7fc620dd822c41ba,0x7ff0000000000000,3 +np.float64,0x29c2cdf25385b,0x29c2cdf25385b,3 +np.float64,0x2d92284e5b246,0x2d92284e5b246,3 +np.float64,0xffc794aa942f2954,0xbff0000000000000,3 +np.float64,0xbfe7ed907eafdb21,0xbfe0d9a7b1442497,3 +np.float64,0xbfd4e0d4aea9c1aa,0xbfd1d09366dba2a7,3 +np.float64,0xa70412c34e083,0xa70412c34e083,3 +np.float64,0x41dc0ee083b9,0x41dc0ee083b9,3 +np.float64,0x8000ece20da1d9c5,0x8000ece20da1d9c5,3 +np.float64,0x3fdf3dae103e7b5c,0x3fe42314bf826bc5,3 +np.float64,0x3fe972533c72e4a6,0x3ff3703761e70f04,3 +np.float64,0xffba1d2b82343a58,0xbff0000000000000,3 +np.float64,0xe0086c83c010e,0xe0086c83c010e,3 +np.float64,0x3fe6fb0dde6df61c,0x3ff0cf5fae01aa08,3 +np.float64,0x3fcfaf057e3f5e0b,0x3fd1f98c1fd20139,3 +np.float64,0xbfdca19d9239433c,0xbfd7158745192ca9,3 +np.float64,0xffb17f394e22fe70,0xbff0000000000000,3 +np.float64,0x7fe40f05c7681e0b,0x7ff0000000000000,3 +np.float64,0x800b3c575d5678af,0x800b3c575d5678af,3 +np.float64,0x7fa4ab20ac295640,0x7ff0000000000000,3 +np.float64,0xbfd2fff4f6a5ffea,0xbfd07069bb50e1a6,3 +np.float64,0xbfef81b9147f0372,0xbfe40b845a749787,3 +np.float64,0x7fd7400e54ae801c,0x7ff0000000000000,3 +np.float64,0x3fd4401a17a88034,0x3fd7d20fb76a4f3d,3 +np.float64,0xbfd3e907fd27d210,0xbfd11c64b7577fc5,3 +np.float64,0x7fe34bed9ae697da,0x7ff0000000000000,3 +np.float64,0x80039119c0472234,0x80039119c0472234,3 +np.float64,0xbfe2e36ac565c6d6,0xbfdc88454ee997b3,3 +np.float64,0xbfec57204478ae40,0xbfe2cd3183de1d2d,3 +np.float64,0x7fed7e2a12fafc53,0x7ff0000000000000,3 +np.float64,0x7fd5c5fa7d2b8bf4,0x7ff0000000000000,3 +np.float64,0x3fdcf368d6b9e6d0,0x3fe24decce1ebd35,3 +np.float64,0xbfe0ebfcf2e1d7fa,0xbfda48c9247ae8cf,3 +np.float64,0xbfe10dbea2e21b7e,0xbfda707d68b59674,3 +np.float64,0xbfdf201b6ebe4036,0xbfd8a5df27742fdf,3 +np.float64,0xffe16555be62caab,0xbff0000000000000,3 +np.float64,0xffc23a5db22474bc,0xbff0000000000000,3 +np.float64,0xffe1cbb3f8a39768,0xbff0000000000000,3 +np.float64,0x8007b823be0f7048,0x8007b823be0f7048,3 +np.float64,0xbfa5d1f3042ba3e0,0xbfa55c97cd77bf6e,3 +np.float64,0xbfe316a074662d41,0xbfdcc0da4e7334d0,3 +np.float64,0xbfdfab2bf2bf5658,0xbfd8fb046b88b51f,3 +np.float64,0xfacc9dabf5994,0xfacc9dabf5994,3 +np.float64,0xffe7e420a4efc841,0xbff0000000000000,3 +np.float64,0x800bb986cd57730e,0x800bb986cd57730e,3 +np.float64,0xbfe314fa38e629f4,0xbfdcbf09302c3bf5,3 +np.float64,0x7fc56b17772ad62e,0x7ff0000000000000,3 +np.float64,0x8006a87d54ad50fb,0x8006a87d54ad50fb,3 +np.float64,0xbfe6633e4a6cc67c,0xbfe01a67c3b3ff32,3 +np.float64,0x3fe0ff56eb21feae,0x3fe66df01defb0fb,3 +np.float64,0xffc369cfc126d3a0,0xbff0000000000000,3 +np.float64,0x7fe8775d9a30eeba,0x7ff0000000000000,3 +np.float64,0x3fb53db13e2a7b60,0x3fb625a7279cdac3,3 +np.float64,0xffee76e7e6fcedcf,0xbff0000000000000,3 +np.float64,0xb45595b568ab3,0xb45595b568ab3,3 +np.float64,0xffa09a1d50213440,0xbff0000000000000,3 +np.float64,0x7d11dc16fa23c,0x7d11dc16fa23c,3 +np.float64,0x7fd4cc2928299851,0x7ff0000000000000,3 +np.float64,0x6a30e0ead461d,0x6a30e0ead461d,3 +np.float64,0x7fd3ee735a27dce6,0x7ff0000000000000,3 +np.float64,0x8008d7084b31ae11,0x8008d7084b31ae11,3 +np.float64,0x3fe469353fe8d26a,0x3fec8e7e2df38590,3 +np.float64,0x3fcecef2743d9de5,0x3fd16a888b715dfd,3 +np.float64,0x460130d68c027,0x460130d68c027,3 +np.float64,0xbfd76510c62eca22,0xbfd398766b741d6e,3 +np.float64,0x800ec88c2a5d9118,0x800ec88c2a5d9118,3 +np.float64,0x3fac969c6c392d40,0x3fad66ca6a1e583c,3 +np.float64,0x3fe5c616bf6b8c2e,0x3fef30f931e8dde5,3 +np.float64,0xb4cb6cd56996e,0xb4cb6cd56996e,3 +np.float64,0xffc3eacf8827d5a0,0xbff0000000000000,3 +np.float64,0x3fe1ceaf60e39d5f,0x3fe7d31e0a627cf9,3 +np.float64,0xffea69b42ff4d368,0xbff0000000000000,3 +np.float64,0x800ff8aef99ff15e,0x800ff8aef99ff15e,3 +np.float64,0x6c3953f0d872b,0x6c3953f0d872b,3 +np.float64,0x8007ca5a0d0f94b5,0x8007ca5a0d0f94b5,3 +np.float64,0x800993ce3ad3279d,0x800993ce3ad3279d,3 +np.float64,0x3fe5a4d1516b49a2,0x3feeef67b22ac65b,3 +np.float64,0x8003d7512a67aea3,0x8003d7512a67aea3,3 +np.float64,0x33864430670c9,0x33864430670c9,3 +np.float64,0xbfdbf477e3b7e8f0,0xbfd6a63f1b36f424,3 +np.float64,0x3fb5da92582bb525,0x3fb6d04ef1a1d31a,3 +np.float64,0xe38aae71c7156,0xe38aae71c7156,3 +np.float64,0x3fcaf5590a35eab2,0x3fce01ed6eb6188e,3 +np.float64,0x800deba9b05bd754,0x800deba9b05bd754,3 +np.float64,0x7fee0cde287c19bb,0x7ff0000000000000,3 +np.float64,0xbfe0c2ae70e1855d,0xbfda17fa64d84fcf,3 +np.float64,0x518618faa30c4,0x518618faa30c4,3 +np.float64,0xbfeb4c49b8769894,0xbfe25d52cd7e529f,3 +np.float64,0xbfeb3aa21b367544,0xbfe255cae1df4cfd,3 +np.float64,0xffd23f1c5d247e38,0xbff0000000000000,3 +np.float64,0xff9a75132034ea20,0xbff0000000000000,3 +np.float64,0xbfef9d96307f3b2c,0xbfe415e8b6ce0e50,3 +np.float64,0x8004046f2f0808df,0x8004046f2f0808df,3 +np.float64,0x3fe15871aea2b0e3,0x3fe706532ea5c770,3 +np.float64,0x7fd86b1576b0d62a,0x7ff0000000000000,3 +np.float64,0xbfc240a5c724814c,0xbfc102c7971ca455,3 +np.float64,0xffd8ea670bb1d4ce,0xbff0000000000000,3 +np.float64,0xbfeb1ddd1ff63bba,0xbfe2497c4e27bb8e,3 +np.float64,0x3fcd47e0a33a8fc1,0x3fd0734444150d83,3 +np.float64,0xe00b6a65c016e,0xe00b6a65c016e,3 +np.float64,0xbfc7d582142fab04,0xbfc5bf1fbe755a4c,3 +np.float64,0x8cc91ca11993,0x8cc91ca11993,3 +np.float64,0x7fdbc530e3b78a61,0x7ff0000000000000,3 +np.float64,0x7fee437522bc86e9,0x7ff0000000000000,3 +np.float64,0xffe9e09ae2b3c135,0xbff0000000000000,3 +np.float64,0x8002841cada5083a,0x8002841cada5083a,3 +np.float64,0x3fd6b485f8ad690c,0x3fdb412135932699,3 +np.float64,0x80070e8d0b0e1d1b,0x80070e8d0b0e1d1b,3 +np.float64,0x7fed5df165babbe2,0x7ff0000000000000,3 +np.float64,0x7ff4000000000000,0x7ffc000000000000,3 +np.float64,0x7fe99d08cd333a11,0x7ff0000000000000,3 +np.float64,0xdfff4201bfff,0xdfff4201bfff,3 +np.float64,0x800ccf7aaf999ef6,0x800ccf7aaf999ef6,3 +np.float64,0x3fddb05aad3b60b5,0x3fe2e34bdd1dd9d5,3 +np.float64,0xbfe5e1c60e6bc38c,0xbfdfb3275cc1675f,3 +np.float64,0x8004fe674269fccf,0x8004fe674269fccf,3 +np.float64,0x7fe9280363325006,0x7ff0000000000000,3 +np.float64,0xf605b9f1ec0b7,0xf605b9f1ec0b7,3 +np.float64,0x800c7c214018f843,0x800c7c214018f843,3 +np.float64,0x7fd97eb6b9b2fd6c,0x7ff0000000000000,3 +np.float64,0x7fd03f8fb6207f1e,0x7ff0000000000000,3 +np.float64,0x7fc526b64d2a4d6c,0x7ff0000000000000,3 +np.float64,0xbfef1a7c42fe34f9,0xbfe3e4b4399e0fcf,3 +np.float64,0xffdde10a2fbbc214,0xbff0000000000000,3 +np.float64,0xbfdd274f72ba4e9e,0xbfd76aa73788863c,3 +np.float64,0xbfecf7f77af9efef,0xbfe30ee2ae03fed1,3 +np.float64,0xffde709322bce126,0xbff0000000000000,3 +np.float64,0x268b5dac4d16d,0x268b5dac4d16d,3 +np.float64,0x8005c099606b8134,0x8005c099606b8134,3 +np.float64,0xffcf54c1593ea984,0xbff0000000000000,3 +np.float64,0xbfee9b8ebabd371d,0xbfe3b44f2663139d,3 +np.float64,0x3faf0330643e0661,0x3faff88fab74b447,3 +np.float64,0x7fe1c6011be38c01,0x7ff0000000000000,3 +np.float64,0xbfe9d58053b3ab01,0xbfe1b9ea12242485,3 +np.float64,0xbfe15a80fee2b502,0xbfdaca2aa7d1231a,3 +np.float64,0x7fe0d766d8a1aecd,0x7ff0000000000000,3 +np.float64,0x800f65e6a21ecbcd,0x800f65e6a21ecbcd,3 +np.float64,0x7fc85e45a530bc8a,0x7ff0000000000000,3 +np.float64,0x3fcc240e5438481d,0x3fcf7954fc080ac3,3 +np.float64,0xffddd49da2bba93c,0xbff0000000000000,3 +np.float64,0x1376f36c26edf,0x1376f36c26edf,3 +np.float64,0x3feffb7af17ff6f6,0x3ffb77f0ead2f881,3 +np.float64,0x3fd9354ea9b26a9d,0x3fdee4e4c8db8239,3 +np.float64,0xffdf7beed4bef7de,0xbff0000000000000,3 +np.float64,0xbfdef256ecbde4ae,0xbfd889b0e213a019,3 +np.float64,0x800d78bd1e7af17a,0x800d78bd1e7af17a,3 +np.float64,0xb66d66276cdad,0xb66d66276cdad,3 +np.float64,0x7fd8f51138b1ea21,0x7ff0000000000000,3 +np.float64,0xffe8c9c302b19385,0xbff0000000000000,3 +np.float64,0x8000be4cf5417c9b,0x8000be4cf5417c9b,3 +np.float64,0xbfe2293a25645274,0xbfdbb78a8c547c68,3 +np.float64,0xce8392c19d08,0xce8392c19d08,3 +np.float64,0xbfe075736b60eae7,0xbfd9bc0f6e34a283,3 +np.float64,0xbfe8d6fe6a71adfd,0xbfe1469ba80b4915,3 +np.float64,0xffe0c7993fa18f32,0xbff0000000000000,3 +np.float64,0x3fce5210fd3ca422,0x3fd11b40a1270a95,3 +np.float64,0x6c0534a8d80a7,0x6c0534a8d80a7,3 +np.float64,0x23c1823647831,0x23c1823647831,3 +np.float64,0x3fc901253732024a,0x3fcb9d264accb07c,3 +np.float64,0x3fe42b8997685714,0x3fec1a39e207b6e4,3 +np.float64,0x3fec4fd00fb89fa0,0x3ff6c1fdd0c262c8,3 +np.float64,0x8007b333caaf6668,0x8007b333caaf6668,3 +np.float64,0x800f9275141f24ea,0x800f9275141f24ea,3 +np.float64,0xffbba361a23746c0,0xbff0000000000000,3 +np.float64,0xbfee4effa9fc9dff,0xbfe396c11d0cd524,3 +np.float64,0x3e47e84c7c8fe,0x3e47e84c7c8fe,3 +np.float64,0x3fe80eb7b1301d6f,0x3ff1eed318a00153,3 +np.float64,0x7fd3f4c5b4a7e98a,0x7ff0000000000000,3 +np.float64,0x158abab02b158,0x158abab02b158,3 +np.float64,0x1,0x1,3 +np.float64,0x1f1797883e2f4,0x1f1797883e2f4,3 +np.float64,0x3feec055d03d80ac,0x3ff9d3fb0394de33,3 +np.float64,0x8010000000000000,0x8010000000000000,3 +np.float64,0xbfd070860ea0e10c,0xbfccfeec2828efef,3 +np.float64,0x80015c8b3e82b917,0x80015c8b3e82b917,3 +np.float64,0xffef9956d9ff32ad,0xbff0000000000000,3 +np.float64,0x7fe7f087dd2fe10f,0x7ff0000000000000,3 +np.float64,0x8002e7718665cee4,0x8002e7718665cee4,3 +np.float64,0x3fdfb9adb2bf735c,0x3fe4887a86214c1e,3 +np.float64,0xffc7747dfb2ee8fc,0xbff0000000000000,3 +np.float64,0x3fec309bb5386137,0x3ff69c44e1738547,3 +np.float64,0xffdbe2bf9ab7c580,0xbff0000000000000,3 +np.float64,0xbfe6a274daed44ea,0xbfe039aff2be9d48,3 +np.float64,0x7fd5a4e4efab49c9,0x7ff0000000000000,3 +np.float64,0xffbe6aaeb03cd560,0xbff0000000000000,3 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-log.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-log.csv new file mode 100644 index 0000000..b8f6b08 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-log.csv @@ -0,0 +1,271 @@ +dtype,input,output,ulperrortol +## +ve denormals ## +np.float32,0x004b4716,0xc2afbc1b,4 +np.float32,0x007b2490,0xc2aec01e,4 +np.float32,0x007c99fa,0xc2aeba17,4 +np.float32,0x00734a0c,0xc2aee1dc,4 +np.float32,0x0070de24,0xc2aeecba,4 +np.float32,0x007fffff,0xc2aeac50,4 +np.float32,0x00000001,0xc2ce8ed0,4 +## -ve denormals ## +np.float32,0x80495d65,0xffc00000,4 +np.float32,0x806894f6,0xffc00000,4 +np.float32,0x80555a76,0xffc00000,4 +np.float32,0x804e1fb8,0xffc00000,4 +np.float32,0x80687de9,0xffc00000,4 +np.float32,0x807fffff,0xffc00000,4 +np.float32,0x80000001,0xffc00000,4 +## +/-0.0f, +/-FLT_MIN +/-FLT_MAX ## +np.float32,0x00000000,0xff800000,4 +np.float32,0x80000000,0xff800000,4 +np.float32,0x7f7fffff,0x42b17218,4 +np.float32,0x80800000,0xffc00000,4 +np.float32,0xff7fffff,0xffc00000,4 +## 1.00f + 0x00000001 ## +np.float32,0x3f800000,0x00000000,4 +np.float32,0x3f800001,0x33ffffff,4 +np.float32,0x3f800002,0x347ffffe,4 +np.float32,0x3f7fffff,0xb3800000,4 +np.float32,0x3f7ffffe,0xb4000000,4 +np.float32,0x3f7ffffd,0xb4400001,4 +np.float32,0x402df853,0x3f7ffffe,4 +np.float32,0x402df854,0x3f7fffff,4 +np.float32,0x402df855,0x3f800000,4 +np.float32,0x402df856,0x3f800001,4 +np.float32,0x3ebc5ab0,0xbf800001,4 +np.float32,0x3ebc5ab1,0xbf800000,4 +np.float32,0x3ebc5ab2,0xbf800000,4 +np.float32,0x3ebc5ab3,0xbf7ffffe,4 +np.float32,0x423ef575,0x407768ab,4 +np.float32,0x427b8c61,0x408485dd,4 +np.float32,0x4211e9ee,0x406630b0,4 +np.float32,0x424d5c41,0x407c0fed,4 +np.float32,0x42be722a,0x4091cc91,4 +np.float32,0x42b73d30,0x4090908b,4 +np.float32,0x427e48e2,0x4084de7f,4 +np.float32,0x428f759b,0x4088bba3,4 +np.float32,0x41629069,0x4029a0cc,4 +np.float32,0x4272c99d,0x40836379,4 +np.float32,0x4d1b7458,0x4197463d,4 +np.float32,0x4f10c594,0x41ace2b2,4 +np.float32,0x4ea397c2,0x41a85171,4 +np.float32,0x4fefa9d1,0x41b6769c,4 +np.float32,0x4ebac6ab,0x41a960dc,4 +np.float32,0x4f6efb42,0x41b0e535,4 +np.float32,0x4e9ab8e7,0x41a7df44,4 +np.float32,0x4e81b5d1,0x41a67625,4 +np.float32,0x5014d9f2,0x41b832bd,4 +np.float32,0x4f02175c,0x41ac07b8,4 +np.float32,0x7f034f89,0x42b01c47,4 +np.float32,0x7f56d00e,0x42b11849,4 +np.float32,0x7f1cd5f6,0x42b0773a,4 +np.float32,0x7e979174,0x42af02d7,4 +np.float32,0x7f23369f,0x42b08ba2,4 +np.float32,0x7f0637ae,0x42b0277d,4 +np.float32,0x7efcb6e8,0x42b00897,4 +np.float32,0x7f7907c8,0x42b163f6,4 +np.float32,0x7e95c4c2,0x42aefcba,4 +np.float32,0x7f4577b2,0x42b0ed2d,4 +np.float32,0x3f49c92e,0xbe73ae84,4 +np.float32,0x3f4a23d1,0xbe71e2f8,4 +np.float32,0x3f4abb67,0xbe6ee430,4 +np.float32,0x3f48169a,0xbe7c5532,4 +np.float32,0x3f47f5fa,0xbe7cfc37,4 +np.float32,0x3f488309,0xbe7a2ad8,4 +np.float32,0x3f479df4,0xbe7ebf5f,4 +np.float32,0x3f47cfff,0xbe7dbec9,4 +np.float32,0x3f496704,0xbe75a125,4 +np.float32,0x3f478ee8,0xbe7f0c92,4 +np.float32,0x3f4a763b,0xbe7041ce,4 +np.float32,0x3f47a108,0xbe7eaf94,4 +np.float32,0x3f48136c,0xbe7c6578,4 +np.float32,0x3f481c17,0xbe7c391c,4 +np.float32,0x3f47cd28,0xbe7dcd56,4 +np.float32,0x3f478be8,0xbe7f1bf7,4 +np.float32,0x3f4c1f8e,0xbe67e367,4 +np.float32,0x3f489b0c,0xbe79b03f,4 +np.float32,0x3f4934cf,0xbe76a08a,4 +np.float32,0x3f4954df,0xbe75fd6a,4 +np.float32,0x3f47a3f5,0xbe7ea093,4 +np.float32,0x3f4ba4fc,0xbe6a4b02,4 +np.float32,0x3f47a0e1,0xbe7eb05c,4 +np.float32,0x3f48c30a,0xbe78e42f,4 +np.float32,0x3f48cab8,0xbe78bd05,4 +np.float32,0x3f4b0569,0xbe6d6ea4,4 +np.float32,0x3f47de32,0xbe7d7607,4 +np.float32,0x3f477328,0xbe7f9b00,4 +np.float32,0x3f496dab,0xbe757f52,4 +np.float32,0x3f47662c,0xbe7fddac,4 +np.float32,0x3f48ddd8,0xbe785b80,4 +np.float32,0x3f481866,0xbe7c4bff,4 +np.float32,0x3f48b119,0xbe793fb6,4 +np.float32,0x3f48c7e8,0xbe78cb5c,4 +np.float32,0x3f4985f6,0xbe7503da,4 +np.float32,0x3f483fdf,0xbe7b8212,4 +np.float32,0x3f4b1c76,0xbe6cfa67,4 +np.float32,0x3f480b2e,0xbe7c8fa8,4 +np.float32,0x3f48745f,0xbe7a75bf,4 +np.float32,0x3f485bda,0xbe7af308,4 +np.float32,0x3f47a660,0xbe7e942c,4 +np.float32,0x3f47d4d5,0xbe7da600,4 +np.float32,0x3f4b0a26,0xbe6d56be,4 +np.float32,0x3f4a4883,0xbe712924,4 +np.float32,0x3f4769e7,0xbe7fca84,4 +np.float32,0x3f499702,0xbe74ad3f,4 +np.float32,0x3f494ab1,0xbe763131,4 +np.float32,0x3f476b69,0xbe7fc2c6,4 +np.float32,0x3f4884e8,0xbe7a214a,4 +np.float32,0x3f486945,0xbe7aae76,4 +#float64 +## +ve denormal ## +np.float64,0x0000000000000001,0xc0874385446d71c3,1 +np.float64,0x0001000000000000,0xc086395a2079b70c,1 +np.float64,0x000fffffffffffff,0xc086232bdd7abcd2,1 +np.float64,0x0007ad63e2168cb6,0xc086290bc0b2980f,1 +## -ve denormal ## +np.float64,0x8000000000000001,0xfff8000000000001,1 +np.float64,0x8001000000000000,0xfff8000000000001,1 +np.float64,0x800fffffffffffff,0xfff8000000000001,1 +np.float64,0x8007ad63e2168cb6,0xfff8000000000001,1 +## +/-0.0f, MAX, MIN## +np.float64,0x0000000000000000,0xfff0000000000000,1 +np.float64,0x8000000000000000,0xfff0000000000000,1 +np.float64,0x7fefffffffffffff,0x40862e42fefa39ef,1 +np.float64,0xffefffffffffffff,0xfff8000000000001,1 +## near 1.0f ## +np.float64,0x3ff0000000000000,0x0000000000000000,1 +np.float64,0x3fe8000000000000,0xbfd269621134db92,1 +np.float64,0x3ff0000000000001,0x3cafffffffffffff,1 +np.float64,0x3ff0000020000000,0x3e7fffffe000002b,1 +np.float64,0x3ff0000000000001,0x3cafffffffffffff,1 +np.float64,0x3fefffffe0000000,0xbe70000008000005,1 +np.float64,0x3fefffffffffffff,0xbca0000000000000,1 +## random numbers ## +np.float64,0x02500186f3d9da56,0xc0855b8abf135773,1 +np.float64,0x09200815a3951173,0xc082ff1ad7131bdc,1 +np.float64,0x0da029623b0243d4,0xc0816fc994695bb5,1 +np.float64,0x48703b8ac483a382,0x40579213a313490b,1 +np.float64,0x09207b74c87c9860,0xc082fee20ff349ef,1 +np.float64,0x62c077698e8df947,0x407821c996d110f0,1 +np.float64,0x2350b45e87c3cfb0,0xc073d6b16b51d072,1 +np.float64,0x3990a23f9ff2b623,0xc051aa60eadd8c61,1 +np.float64,0x0d011386a116c348,0xc081a6cc7ea3b8fb,1 +np.float64,0x1fe0f0303ebe273a,0xc0763870b78a81ca,1 +np.float64,0x0cd1260121d387da,0xc081b7668d61a9d1,1 +np.float64,0x1e6135a8f581d422,0xc077425ac10f08c2,1 +np.float64,0x622168db5fe52d30,0x4077b3c669b9fadb,1 +np.float64,0x69f188e1ec6d1718,0x407d1e2f18c63889,1 +np.float64,0x3aa1bf1d9c4dd1a3,0xc04d682e24bde479,1 +np.float64,0x6c81c4011ce4f683,0x407ee5190e8a8e6a,1 +np.float64,0x2191fa55aa5a5095,0xc0750c0c318b5e2d,1 +np.float64,0x32a1f602a32bf360,0xc06270caa493fc17,1 +np.float64,0x16023c90ba93249b,0xc07d0f88e0801638,1 +np.float64,0x1c525fe6d71fa9ff,0xc078af49c66a5d63,1 +np.float64,0x1a927675815d65b7,0xc079e5bdd7fe376e,1 +np.float64,0x41227b8fe70da028,0x402aa0c9f9a84c71,1 +np.float64,0x4962bb6e853fe87d,0x405a34aa04c83747,1 +np.float64,0x23d2cda00b26b5a4,0xc0737c13a06d00ea,1 +np.float64,0x2d13083fd62987fa,0xc06a25055aeb474e,1 +np.float64,0x10e31e4c9b4579a1,0xc0804e181929418e,1 +np.float64,0x26d3247d556a86a9,0xc0716774171da7e8,1 +np.float64,0x6603379398d0d4ac,0x407a64f51f8a887b,1 +np.float64,0x02d38af17d9442ba,0xc0852d955ac9dd68,1 +np.float64,0x6a2382b4818dd967,0x407d4129d688e5d4,1 +np.float64,0x2ee3c403c79b3934,0xc067a091fefaf8b6,1 +np.float64,0x6493a699acdbf1a4,0x4079663c8602bfc5,1 +np.float64,0x1c8413c4f0de3100,0xc0788c99697059b6,1 +np.float64,0x4573f1ed350d9622,0x404e9bd1e4c08920,1 +np.float64,0x2f34265c9200b69c,0xc067310cfea4e986,1 +np.float64,0x19b43e65fa22029b,0xc07a7f8877de22d6,1 +np.float64,0x0af48ab7925ed6bc,0xc0825c4fbc0e5ade,1 +np.float64,0x4fa49699cad82542,0x4065c76d2a318235,1 +np.float64,0x7204a15e56ade492,0x40815bb87484dffb,1 +np.float64,0x4734aa08a230982d,0x40542a4bf7a361a9,1 +np.float64,0x1ae4ed296c2fd749,0xc079ac4921f20abb,1 +np.float64,0x472514ea4370289c,0x4053ff372bd8f18f,1 +np.float64,0x53a54b3f73820430,0x406b5411fc5f2e33,1 +np.float64,0x64754de5a15684fa,0x407951592e99a5ab,1 +np.float64,0x69358e279868a7c3,0x407c9c671a882c31,1 +np.float64,0x284579ec61215945,0xc0706688e55f0927,1 +np.float64,0x68b5c58806447adc,0x407c43d6f4eff760,1 +np.float64,0x1945a83f98b0e65d,0xc07acc15eeb032cc,1 +np.float64,0x0fc5eb98a16578bf,0xc080b0d02eddca0e,1 +np.float64,0x6a75e208f5784250,0x407d7a7383bf8f05,1 +np.float64,0x0fe63a029c47645d,0xc080a59ca1e98866,1 +np.float64,0x37963ac53f065510,0xc057236281f7bdb6,1 +np.float64,0x135661bb07067ff7,0xc07ee924930c21e4,1 +np.float64,0x4b4699469d458422,0x405f73843756e887,1 +np.float64,0x1a66d73e4bf4881b,0xc07a039ba1c63adf,1 +np.float64,0x12a6b9b119a7da59,0xc07f62e49c6431f3,1 +np.float64,0x24c719aa8fd1bdb5,0xc072d26da4bf84d3,1 +np.float64,0x0fa6ff524ffef314,0xc080bb8514662e77,1 +np.float64,0x1db751d66fdd4a9a,0xc077b77cb50d7c92,1 +np.float64,0x4947374c516da82c,0x4059e9acfc7105bf,1 +np.float64,0x1b1771ab98f3afc8,0xc07989326b8e1f66,1 +np.float64,0x25e78805baac8070,0xc0720a818e6ef080,1 +np.float64,0x4bd7a148225d3687,0x406082d004ea3ee7,1 +np.float64,0x53d7d6b2bbbda00a,0x406b9a398967cbd5,1 +np.float64,0x6997fb9f4e1c685f,0x407ce0a703413eba,1 +np.float64,0x069802c2ff71b951,0xc083df39bf7acddc,1 +np.float64,0x4d683ac9890f66d8,0x4062ae21d8c2acf0,1 +np.float64,0x5a2825863ec14f4c,0x40722d718d549552,1 +np.float64,0x0398799a88f4db80,0xc084e93dab8e2158,1 +np.float64,0x5ed87a8b77e135a5,0x40756d7051777b33,1 +np.float64,0x5828cd6d79b9bede,0x4070cafb22fc6ca1,1 +np.float64,0x7b18ba2a5ec6f068,0x408481386b3ed6fe,1 +np.float64,0x4938fd60922198fe,0x4059c206b762ea7e,1 +np.float64,0x31b8f44fcdd1a46e,0xc063b2faa8b6434e,1 +np.float64,0x5729341c0d918464,0x407019cac0c4a7d7,1 +np.float64,0x13595e9228ee878e,0xc07ee7235a7d8088,1 +np.float64,0x17698b0dc9dd4135,0xc07c1627e3a5ad5f,1 +np.float64,0x63b977c283abb0cc,0x4078cf1ec6ed65be,1 +np.float64,0x7349cc0d4dc16943,0x4081cc697ce4cb53,1 +np.float64,0x4e49a80b732fb28d,0x4063e67e3c5cbe90,1 +np.float64,0x07ba14b848a8ae02,0xc0837ac032a094e0,1 +np.float64,0x3da9f17b691bfddc,0xc03929c25366acda,1 +np.float64,0x02ea39aa6c3ac007,0xc08525af6f21e1c4,1 +np.float64,0x3a6a42f04ed9563d,0xc04e98e825dca46b,1 +np.float64,0x1afa877cd7900be7,0xc0799d6648cb34a9,1 +np.float64,0x58ea986649e052c6,0x4071512e939ad790,1 +np.float64,0x691abbc04647f536,0x407c89aaae0fcb83,1 +np.float64,0x43aabc5063e6f284,0x4044b45d18106fd2,1 +np.float64,0x488b003c893e0bea,0x4057df012a2dafbe,1 +np.float64,0x77eb076ed67caee5,0x40836720de94769e,1 +np.float64,0x5c1b46974aba46f4,0x40738731ba256007,1 +np.float64,0x1a5b29ecb5d3c261,0xc07a0becc77040d6,1 +np.float64,0x5d8b6ccf868c6032,0x4074865c1865e2db,1 +np.float64,0x4cfb6690b4aaf5af,0x406216cd8c7e8ddb,1 +np.float64,0x76cbd8eb5c5fc39e,0x4083038dc66d682b,1 +np.float64,0x28bbd1fec5012814,0xc07014c2dd1b9711,1 +np.float64,0x33dc1b3a4fd6bf7a,0xc060bd0756e07d8a,1 +np.float64,0x52bbe89b37de99f3,0x406a10041aa7d343,1 +np.float64,0x07bc479d15eb2dd3,0xc0837a1a6e3a3b61,1 +np.float64,0x18fc5275711a901d,0xc07aff3e9d62bc93,1 +np.float64,0x114c9758e247dc71,0xc080299a7cf15b05,1 +np.float64,0x25ac8f6d60755148,0xc07233c4c0c511d4,1 +np.float64,0x260cae2bb9e9fd7e,0xc071f128c7e82eac,1 +np.float64,0x572ccdfe0241de82,0x40701bedc84bb504,1 +np.float64,0x0ddcef6c8d41f5ee,0xc0815a7e16d07084,1 +np.float64,0x6dad1d59c988af68,0x407fb4a0bc0142b1,1 +np.float64,0x025d200580d8b6d1,0xc08556c0bc32b1b2,1 +np.float64,0x7aad344b6aa74c18,0x40845bbc453f22be,1 +np.float64,0x5b5d9d6ad9d14429,0x4073036d2d21f382,1 +np.float64,0x49cd8d8dcdf19954,0x405b5c034f5c7353,1 +np.float64,0x63edb9483335c1e6,0x4078f2dd21378786,1 +np.float64,0x7b1dd64c9d2c26bd,0x408482b922017bc9,1 +np.float64,0x782e13e0b574be5f,0x40837e2a0090a5ad,1 +np.float64,0x592dfe18b9d6db2f,0x40717f777fbcb1ec,1 +np.float64,0x654e3232ac60d72c,0x4079e71a95a70446,1 +np.float64,0x7b8e42ad22091456,0x4084a9a6f1e61722,1 +np.float64,0x570e88dfd5860ae6,0x407006ae6c0d137a,1 +np.float64,0x294e98346cb98ef1,0xc06f5edaac12bd44,1 +np.float64,0x1adeaa4ab792e642,0xc079b1431d5e2633,1 +np.float64,0x7b6ead3377529ac8,0x40849eabc8c7683c,1 +np.float64,0x2b8eedae8a9b2928,0xc06c400054deef11,1 +np.float64,0x65defb45b2dcf660,0x407a4b53f181c05a,1 +np.float64,0x1baf582d475e7701,0xc07920bcad4a502c,1 +np.float64,0x461f39cf05a0f15a,0x405126368f984fa1,1 +np.float64,0x7e5f6f5dcfff005b,0x4085a37d610439b4,1 +np.float64,0x136f66e4d09bd662,0xc07ed8a2719f2511,1 +np.float64,0x65afd8983fb6ca1f,0x407a2a7f48bf7fc1,1 +np.float64,0x572fa7f95ed22319,0x40701d706cf82e6f,1 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-log10.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-log10.csv new file mode 100644 index 0000000..7f5241a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-log10.csv @@ -0,0 +1,1629 @@ +dtype,input,output,ulperrortol +np.float32,0x3f6fd5c8,0xbce80e8e,4 +np.float32,0x3ea4ab17,0xbefc3deb,4 +np.float32,0x3e87a133,0xbf13b0b7,4 +np.float32,0x3f0d9069,0xbe83bb19,4 +np.float32,0x3f7b9269,0xbbf84f47,4 +np.float32,0x3f7a9ffa,0xbc16fd97,4 +np.float32,0x7f535d34,0x4219cb66,4 +np.float32,0x3e79ad7c,0xbf1ce857,4 +np.float32,0x7e8bfd3b,0x4217dfe9,4 +np.float32,0x3f2d2ee9,0xbe2dcec6,4 +np.float32,0x572e04,0xc21862e4,4 +np.float32,0x7f36f8,0xc217bad5,4 +np.float32,0x3f7982fb,0xbc36aaed,4 +np.float32,0x45b019,0xc218c67c,4 +np.float32,0x3f521c46,0xbdafb3e3,4 +np.float32,0x80000001,0x7fc00000,4 +np.float32,0x3f336c81,0xbe1e107f,4 +np.float32,0x3eac92d7,0xbef1d0bb,4 +np.float32,0x47bdfc,0xc218b990,4 +np.float32,0x7f2d94c8,0x421973d1,4 +np.float32,0x7d53ff8d,0x4214fbb6,4 +np.float32,0x3f581e4e,0xbd96a079,4 +np.float32,0x7ddaf20d,0x42163e4e,4 +np.float32,0x3f341d3c,0xbe1c5b4c,4 +np.float32,0x7ef04ba9,0x4218d032,4 +np.float32,0x620ed2,0xc2182e99,4 +np.float32,0x507850,0xc2188682,4 +np.float32,0x7d08f9,0xc217c284,4 +np.float32,0x7f0cf2aa,0x42191734,4 +np.float32,0x3f109a17,0xbe7e04fe,4 +np.float32,0x7f426152,0x4219a625,4 +np.float32,0x7f32d5a3,0x42198113,4 +np.float32,0x2e14b2,0xc2197e6f,4 +np.float32,0x3a5acd,0xc219156a,4 +np.float32,0x50a565,0xc2188589,4 +np.float32,0x5b751c,0xc2184d97,4 +np.float32,0x7e4149f6,0x42173b22,4 +np.float32,0x3dc34bf9,0xbf82a42a,4 +np.float32,0x3d12bc28,0xbfb910d6,4 +np.float32,0x7ebd2584,0x421865c1,4 +np.float32,0x7f6b3375,0x4219faeb,4 +np.float32,0x7fa00000,0x7fe00000,4 +np.float32,0x3f35fe7d,0xbe17bd33,4 +np.float32,0x7db45c87,0x4215e818,4 +np.float32,0x3efff366,0xbe9a2b8d,4 +np.float32,0x3eb331d0,0xbee971a3,4 +np.float32,0x3f259d5f,0xbe41ae2e,4 +np.float32,0x3eab85ec,0xbef32c4a,4 +np.float32,0x7f194b8a,0x42193c8c,4 +np.float32,0x3f11a614,0xbe7acfc7,4 +np.float32,0x5b17,0xc221f16b,4 +np.float32,0x3f33dadc,0xbe1cff4d,4 +np.float32,0x3cda1506,0xbfc9920f,4 +np.float32,0x3f6856f1,0xbd2c8290,4 +np.float32,0x7f3357fb,0x42198257,4 +np.float32,0x7f56f329,0x4219d2e1,4 +np.float32,0x3ef84108,0xbea0f595,4 +np.float32,0x3f72340f,0xbcc51916,4 +np.float32,0x3daf28,0xc218fcbd,4 +np.float32,0x131035,0xc21b06f4,4 +np.float32,0x3f275c3b,0xbe3d0487,4 +np.float32,0x3ef06130,0xbea82069,4 +np.float32,0x3f57f3b0,0xbd974fef,4 +np.float32,0x7f6c4a78,0x4219fcfa,4 +np.float32,0x7e8421d0,0x4217c639,4 +np.float32,0x3f17a479,0xbe68e08e,4 +np.float32,0x7f03774e,0x4218f83b,4 +np.float32,0x441a33,0xc218d0b8,4 +np.float32,0x539158,0xc21875b6,4 +np.float32,0x3e8fcc75,0xbf0d3018,4 +np.float32,0x7ef74130,0x4218dce4,4 +np.float32,0x3ea6f4fa,0xbef92c38,4 +np.float32,0x7f3948ab,0x421990d5,4 +np.float32,0x7db6f8f5,0x4215ee7c,4 +np.float32,0x3ee44a2f,0xbeb399e5,4 +np.float32,0x156c59,0xc21ad30d,4 +np.float32,0x3f21ee53,0xbe4baf16,4 +np.float32,0x3f2c08f4,0xbe30c424,4 +np.float32,0x3f49885c,0xbdd4c6a9,4 +np.float32,0x3eae0b9c,0xbeefed54,4 +np.float32,0x1b5c1f,0xc21a6646,4 +np.float32,0x3e7330e2,0xbf1fd592,4 +np.float32,0x3ebbeb4c,0xbededf82,4 +np.float32,0x427154,0xc218dbb1,4 +np.float32,0x3f6b8b4b,0xbd142498,4 +np.float32,0x8e769,0xc21c5981,4 +np.float32,0x3e9db557,0xbf02ec1c,4 +np.float32,0x3f001bef,0xbe99f019,4 +np.float32,0x3e58b48c,0xbf2ca77a,4 +np.float32,0x3d46c16b,0xbfa8327c,4 +np.float32,0x7eeeb305,0x4218cd3b,4 +np.float32,0x3e3f163d,0xbf3aa446,4 +np.float32,0x3f66c872,0xbd3877d9,4 +np.float32,0x7f7162f8,0x421a0677,4 +np.float32,0x3edca3bc,0xbebb2e28,4 +np.float32,0x3dc1055b,0xbf834afa,4 +np.float32,0x12b16f,0xc21b0fad,4 +np.float32,0x3f733898,0xbcb62e16,4 +np.float32,0x3e617af8,0xbf283db0,4 +np.float32,0x7e86577a,0x4217cd99,4 +np.float32,0x3f0ba3c7,0xbe86c633,4 +np.float32,0x3f4cad25,0xbdc70247,4 +np.float32,0xb6cdf,0xc21bea9f,4 +np.float32,0x3f42971a,0xbdf3f49e,4 +np.float32,0x3e6ccad2,0xbf22cc78,4 +np.float32,0x7f2121b2,0x421952b8,4 +np.float32,0x3f6d3f55,0xbd075366,4 +np.float32,0x3f524f,0xc218f117,4 +np.float32,0x3e95b5d9,0xbf08b56a,4 +np.float32,0x7f6ae47d,0x4219fa56,4 +np.float32,0x267539,0xc219ceda,4 +np.float32,0x3ef72f6d,0xbea1eb2e,4 +np.float32,0x2100b2,0xc21a12e2,4 +np.float32,0x3d9777d1,0xbf90c4e7,4 +np.float32,0x44c6f5,0xc218cc56,4 +np.float32,0x7f2a613d,0x42196b8a,4 +np.float32,0x390a25,0xc2191f8d,4 +np.float32,0x3f1de5ad,0xbe56e703,4 +np.float32,0x2f59ce,0xc2197258,4 +np.float32,0x7f3b12a1,0x4219951b,4 +np.float32,0x3ecb66d4,0xbecd44ca,4 +np.float32,0x7e74ff,0xc217bd7d,4 +np.float32,0x7ed83f78,0x4218a14d,4 +np.float32,0x685994,0xc21812f1,4 +np.float32,0xbf800000,0x7fc00000,4 +np.float32,0x736f47,0xc217e60b,4 +np.float32,0x7f09c371,0x42190d0a,4 +np.float32,0x3f7ca51d,0xbbbbbce0,4 +np.float32,0x7f4b4d3b,0x4219ba1a,4 +np.float32,0x3f6c4471,0xbd0eb076,4 +np.float32,0xd944e,0xc21b9dcf,4 +np.float32,0x7cb06ffc,0x421375cd,4 +np.float32,0x586187,0xc2185cce,4 +np.float32,0x3f3cbf5b,0xbe078911,4 +np.float32,0x3f30b504,0xbe24d983,4 +np.float32,0x3f0a16ba,0xbe8941fd,4 +np.float32,0x5c43b0,0xc21849af,4 +np.float32,0x3dad74f6,0xbf893bd5,4 +np.float32,0x3c586958,0xbff087a6,4 +np.float32,0x3e8307a8,0xbf1786ba,4 +np.float32,0x7dcd1776,0x4216213d,4 +np.float32,0x3f44d107,0xbde9d662,4 +np.float32,0x3e2e6823,0xbf44cbec,4 +np.float32,0x3d87ea27,0xbf96caca,4 +np.float32,0x3e0c715b,0xbf5ce07e,4 +np.float32,0x7ec9cd5a,0x4218828e,4 +np.float32,0x3e26c0b4,0xbf49c93e,4 +np.float32,0x75b94e,0xc217dd50,4 +np.float32,0x3df7b9f5,0xbf6ad7f4,4 +np.float32,0x0,0xff800000,4 +np.float32,0x3f284795,0xbe3a94da,4 +np.float32,0x7ee49092,0x4218b9f0,4 +np.float32,0x7f4c20e0,0x4219bbe8,4 +np.float32,0x3efbbce8,0xbe9ddc4b,4 +np.float32,0x12274a,0xc21b1cb4,4 +np.float32,0x5fa1b1,0xc21839be,4 +np.float32,0x7f0b210e,0x4219116d,4 +np.float32,0x3f67092a,0xbd368545,4 +np.float32,0x3d572721,0xbfa3ca5b,4 +np.float32,0x3f7913ce,0xbc431028,4 +np.float32,0x3b0613,0xc2191059,4 +np.float32,0x3e1d16c0,0xbf506c6f,4 +np.float32,0xab130,0xc21c081d,4 +np.float32,0x3e23ac97,0xbf4bdb9d,4 +np.float32,0x7ef52368,0x4218d911,4 +np.float32,0x7f38e686,0x42198fe9,4 +np.float32,0x3f106a21,0xbe7e9897,4 +np.float32,0x3ecef8d5,0xbec96644,4 +np.float32,0x3ec37e02,0xbed61683,4 +np.float32,0x3efbd063,0xbe9dcb17,4 +np.float32,0x3f318fe3,0xbe22b402,4 +np.float32,0x7e5e5228,0x4217795d,4 +np.float32,0x72a046,0xc217e92c,4 +np.float32,0x7f6f970b,0x421a0324,4 +np.float32,0x3ed871b4,0xbebf72fb,4 +np.float32,0x7a2eaa,0xc217ccc8,4 +np.float32,0x3e819655,0xbf18c1d7,4 +np.float32,0x80800000,0x7fc00000,4 +np.float32,0x7eab0719,0x421838f9,4 +np.float32,0x7f0763cb,0x4219054f,4 +np.float32,0x3f191672,0xbe64a8af,4 +np.float32,0x7d4327,0xc217c1b6,4 +np.float32,0x3f724ba6,0xbcc3bea3,4 +np.float32,0x60fe06,0xc2183375,4 +np.float32,0x48cd59,0xc218b30b,4 +np.float32,0x3f7fec2b,0xb909d3f3,4 +np.float32,0x1c7bb9,0xc21a5460,4 +np.float32,0x24d8a8,0xc219e1e4,4 +np.float32,0x3e727c52,0xbf20283c,4 +np.float32,0x4bc460,0xc218a14a,4 +np.float32,0x63e313,0xc2182661,4 +np.float32,0x7f625581,0x4219e9d4,4 +np.float32,0x3eeb3e77,0xbeacedc0,4 +np.float32,0x7ef27a47,0x4218d437,4 +np.float32,0x27105a,0xc219c7e6,4 +np.float32,0x22a10b,0xc219fd7d,4 +np.float32,0x3f41e907,0xbdf711ab,4 +np.float32,0x7c1fbf95,0x4212155b,4 +np.float32,0x7e5acceb,0x42177244,4 +np.float32,0x3e0892fa,0xbf5ffb83,4 +np.float32,0x3ea0e51d,0xbf00b2c0,4 +np.float32,0x3e56fc29,0xbf2d8a51,4 +np.float32,0x7ee724ed,0x4218beed,4 +np.float32,0x7ebf142b,0x42186a46,4 +np.float32,0x7f6cf35c,0x4219fe37,4 +np.float32,0x3f11abf7,0xbe7abdcd,4 +np.float32,0x588d7a,0xc2185bf1,4 +np.float32,0x3f6e81d2,0xbcfbcf97,4 +np.float32,0x3f1b6be8,0xbe5dee2b,4 +np.float32,0x7f3815e0,0x42198df2,4 +np.float32,0x3f5bfc88,0xbd86d93d,4 +np.float32,0x3f3775d0,0xbe142bbc,4 +np.float32,0x78a958,0xc217d25a,4 +np.float32,0x2ff7c3,0xc2196c96,4 +np.float32,0x4b9c0,0xc21d733c,4 +np.float32,0x3ec025af,0xbed9ecf3,4 +np.float32,0x6443f0,0xc21824b3,4 +np.float32,0x3f754e28,0xbc97d299,4 +np.float32,0x3eaa91d3,0xbef4699d,4 +np.float32,0x3e5f2837,0xbf296478,4 +np.float32,0xe5676,0xc21b85a4,4 +np.float32,0x3f6859f2,0xbd2c6b90,4 +np.float32,0x3f68686b,0xbd2bfcc6,4 +np.float32,0x4b39b8,0xc218a47b,4 +np.float32,0x630ac4,0xc2182a28,4 +np.float32,0x160980,0xc21ac67d,4 +np.float32,0x3ed91c4d,0xbebec3fd,4 +np.float32,0x7ec27b0d,0x4218721f,4 +np.float32,0x3f3c0a5f,0xbe09344b,4 +np.float32,0x3dbff9c1,0xbf839841,4 +np.float32,0x7f0e8ea7,0x42191c40,4 +np.float32,0x3f36b162,0xbe1608e4,4 +np.float32,0x228bb3,0xc219fe90,4 +np.float32,0x2fdd30,0xc2196d8c,4 +np.float32,0x3e8fce8e,0xbf0d2e79,4 +np.float32,0x3f36acc7,0xbe16141a,4 +np.float32,0x7f44b51c,0x4219ab70,4 +np.float32,0x3ec3371c,0xbed66736,4 +np.float32,0x4388a2,0xc218d473,4 +np.float32,0x3f5aa6c3,0xbd8c4344,4 +np.float32,0x7f09fce4,0x42190dc3,4 +np.float32,0x7ed7854a,0x42189fce,4 +np.float32,0x7f4da83a,0x4219bf3a,4 +np.float32,0x3db8da28,0xbf85b25a,4 +np.float32,0x7f449686,0x4219ab2b,4 +np.float32,0x2eb25,0xc21e498c,4 +np.float32,0x3f2bcc08,0xbe3161bd,4 +np.float32,0x36c923,0xc219317b,4 +np.float32,0x3d52a866,0xbfa4f6d2,4 +np.float32,0x3f7d6688,0xbb913e4e,4 +np.float32,0x3f5a6ba4,0xbd8d33e3,4 +np.float32,0x719740,0xc217ed35,4 +np.float32,0x78a472,0xc217d26c,4 +np.float32,0x7ee33d0c,0x4218b759,4 +np.float32,0x7f668c1d,0x4219f208,4 +np.float32,0x3e29c600,0xbf47ca46,4 +np.float32,0x3f3cefc3,0xbe071712,4 +np.float32,0x3e224ebd,0xbf4cca41,4 +np.float32,0x7f1417be,0x42192d31,4 +np.float32,0x7f29d7d5,0x42196a23,4 +np.float32,0x3338ce,0xc2194f65,4 +np.float32,0x2a7897,0xc219a2b6,4 +np.float32,0x3d6bc3d8,0xbf9eb468,4 +np.float32,0x3f6bd7bf,0xbd11e392,4 +np.float32,0x7f6d26bf,0x4219fe98,4 +np.float32,0x3f52d378,0xbdacadb5,4 +np.float32,0x3efac453,0xbe9eb84a,4 +np.float32,0x3f692eb7,0xbd261184,4 +np.float32,0x3f6a0bb5,0xbd1f7ec1,4 +np.float32,0x3f037a49,0xbe942aa8,4 +np.float32,0x3f465bd4,0xbde2e530,4 +np.float32,0x7ef0f47b,0x4218d16a,4 +np.float32,0x637127,0xc218285e,4 +np.float32,0x3f41e511,0xbdf723d7,4 +np.float32,0x7f800000,0x7f800000,4 +np.float32,0x3f3342d5,0xbe1e77d5,4 +np.float32,0x7f57cfe6,0x4219d4a9,4 +np.float32,0x3e4358ed,0xbf3830a7,4 +np.float32,0x3ce25f15,0xbfc77f2b,4 +np.float32,0x7ed057e7,0x421890be,4 +np.float32,0x7ce154d9,0x4213e295,4 +np.float32,0x3ee91984,0xbeaef703,4 +np.float32,0x7e4e919c,0x421758af,4 +np.float32,0x6830e7,0xc218139e,4 +np.float32,0x3f12f08e,0xbe76e328,4 +np.float32,0x7f0a7a32,0x42190f56,4 +np.float32,0x7f38e,0xc21c8bd3,4 +np.float32,0x3e01def9,0xbf6593e3,4 +np.float32,0x3f5c8c6d,0xbd849432,4 +np.float32,0x3eed8747,0xbeaac7a3,4 +np.float32,0x3cadaa0e,0xbfd63b21,4 +np.float32,0x3f7532a9,0xbc996178,4 +np.float32,0x31f3ac,0xc2195a8f,4 +np.float32,0x3f0e0f97,0xbe82f3af,4 +np.float32,0x3f2a1f35,0xbe35bd3f,4 +np.float32,0x3f4547b2,0xbde7bebd,4 +np.float32,0x3f7988a6,0xbc36094c,4 +np.float32,0x74464c,0xc217e2d2,4 +np.float32,0x7f7518be,0x421a0d3f,4 +np.float32,0x7e97fa0a,0x42180473,4 +np.float32,0x584e3a,0xc2185d2f,4 +np.float32,0x3e7291f3,0xbf201e52,4 +np.float32,0xc0a05,0xc21bd359,4 +np.float32,0x3a3177,0xc21916a6,4 +np.float32,0x4f417f,0xc2188d45,4 +np.float32,0x263fce,0xc219d145,4 +np.float32,0x7e1d58,0xc217beb1,4 +np.float32,0x7f056af3,0x4218fec9,4 +np.float32,0x3f21c181,0xbe4c2a3f,4 +np.float32,0x7eca4956,0x4218839f,4 +np.float32,0x3e58afa8,0xbf2ca9fd,4 +np.float32,0x3f40d583,0xbdfc04ef,4 +np.float32,0x7f432fbb,0x4219a7fc,4 +np.float32,0x43aaa4,0xc218d393,4 +np.float32,0x7f2c9b62,0x42197150,4 +np.float32,0x5c3876,0xc21849e5,4 +np.float32,0x7f2034e8,0x42195029,4 +np.float32,0x7e5be772,0x42177481,4 +np.float32,0x80000000,0xff800000,4 +np.float32,0x3f5be03b,0xbd874bb0,4 +np.float32,0x3e32494f,0xbf4259be,4 +np.float32,0x3e1f4671,0xbf4ee30b,4 +np.float32,0x4606cc,0xc218c454,4 +np.float32,0x425cbc,0xc218dc3b,4 +np.float32,0x7dd9b8bf,0x42163bd0,4 +np.float32,0x3f0465d0,0xbe929db7,4 +np.float32,0x3f735077,0xbcb4d0fa,4 +np.float32,0x4d6a43,0xc21897b8,4 +np.float32,0x3e27d600,0xbf4910f5,4 +np.float32,0x3f06e0cc,0xbe8e7d24,4 +np.float32,0x3f3fd064,0xbe005e45,4 +np.float32,0x176f1,0xc21f7c2d,4 +np.float32,0x3eb64e6f,0xbee59d9c,4 +np.float32,0x7f0f075d,0x42191db8,4 +np.float32,0x3f718cbe,0xbcceb621,4 +np.float32,0x3ead7bda,0xbef0a54a,4 +np.float32,0x7f77c1a8,0x421a120c,4 +np.float32,0x3f6a79c5,0xbd1c3afd,4 +np.float32,0x3e992d1f,0xbf062a02,4 +np.float32,0x3e6f6335,0xbf219639,4 +np.float32,0x7f6d9a3e,0x4219ff70,4 +np.float32,0x557ed1,0xc2186b91,4 +np.float32,0x3f13a456,0xbe74c457,4 +np.float32,0x15c2dc,0xc21acc17,4 +np.float32,0x71f36f,0xc217ebcc,4 +np.float32,0x748dea,0xc217e1c1,4 +np.float32,0x7f0f32e0,0x42191e3f,4 +np.float32,0x5b1da8,0xc2184f41,4 +np.float32,0x3d865d3a,0xbf976e11,4 +np.float32,0x3f800000,0x0,4 +np.float32,0x7f67b56d,0x4219f444,4 +np.float32,0x6266a1,0xc2182d0c,4 +np.float32,0x3ec9c5e4,0xbecf0e6b,4 +np.float32,0x6a6a0e,0xc2180a3b,4 +np.float32,0x7e9db6fd,0x421814ef,4 +np.float32,0x3e7458f7,0xbf1f4e88,4 +np.float32,0x3ead8016,0xbef09fdc,4 +np.float32,0x3e263d1c,0xbf4a211e,4 +np.float32,0x7f6b3329,0x4219faeb,4 +np.float32,0x800000,0xc217b818,4 +np.float32,0x3f0654c7,0xbe8f6471,4 +np.float32,0x3f281b71,0xbe3b0990,4 +np.float32,0x7c4c8e,0xc217c524,4 +np.float32,0x7d113a87,0x4214537d,4 +np.float32,0x734b5f,0xc217e696,4 +np.float32,0x7f079d05,0x4219060b,4 +np.float32,0x3ee830b1,0xbeafd58b,4 +np.float32,0x3f1c3b8b,0xbe5b9d96,4 +np.float32,0x3f2bf0c6,0xbe3102aa,4 +np.float32,0x7ddffe22,0x42164871,4 +np.float32,0x3f1e58b4,0xbe55a37f,4 +np.float32,0x5f3edf,0xc2183b8a,4 +np.float32,0x7f1fb6ec,0x42194eca,4 +np.float32,0x3f78718e,0xbc55311e,4 +np.float32,0x3e574b7d,0xbf2d6152,4 +np.float32,0x7eab27c6,0x4218394e,4 +np.float32,0x7f34603c,0x421984e5,4 +np.float32,0x3f3a8b57,0xbe0cc1ca,4 +np.float32,0x3f744181,0xbca7134e,4 +np.float32,0x3f7e3bc4,0xbb45156b,4 +np.float32,0x93ab4,0xc21c498b,4 +np.float32,0x7ed5541e,0x42189b42,4 +np.float32,0x6bf8ec,0xc21803c4,4 +np.float32,0x757395,0xc217de58,4 +np.float32,0x7f177214,0x42193726,4 +np.float32,0x59935f,0xc21856d6,4 +np.float32,0x2cd9ba,0xc2198a78,4 +np.float32,0x3ef6fd5c,0xbea2183c,4 +np.float32,0x3ebb6c63,0xbedf75e0,4 +np.float32,0x7f43272c,0x4219a7e9,4 +np.float32,0x7f42e67d,0x4219a755,4 +np.float32,0x3f3f744f,0xbe0133f6,4 +np.float32,0x7f5fddaa,0x4219e4f4,4 +np.float32,0x3dc9874f,0xbf80e529,4 +np.float32,0x3f2efe64,0xbe292ec8,4 +np.float32,0x3e0406a6,0xbf63bf7c,4 +np.float32,0x3cdbb0aa,0xbfc92984,4 +np.float32,0x3e6597e7,0xbf263b30,4 +np.float32,0x3f0c1153,0xbe861807,4 +np.float32,0x7fce16,0xc217b8c6,4 +np.float32,0x3f5f4e5f,0xbd730dc6,4 +np.float32,0x3ed41ffa,0xbec3ee69,4 +np.float32,0x3f216c78,0xbe4d1446,4 +np.float32,0x3f123ed7,0xbe78fe4b,4 +np.float32,0x7f7e0ca9,0x421a1d34,4 +np.float32,0x7e318af4,0x42171558,4 +np.float32,0x7f1e1659,0x42194a3d,4 +np.float32,0x34d12a,0xc21941c2,4 +np.float32,0x3d9566ad,0xbf918870,4 +np.float32,0x3e799a47,0xbf1cf0e5,4 +np.float32,0x3e89dd6f,0xbf11df76,4 +np.float32,0x32f0d3,0xc21951d8,4 +np.float32,0x7e89d17e,0x4217d8f6,4 +np.float32,0x1f3b38,0xc21a2b6b,4 +np.float32,0x7ee9e060,0x4218c427,4 +np.float32,0x31a673,0xc2195d41,4 +np.float32,0x5180f1,0xc21880d5,4 +np.float32,0x3cd36f,0xc21902f8,4 +np.float32,0x3bb63004,0xc01050cb,4 +np.float32,0x3e8ee9d1,0xbf0ddfde,4 +np.float32,0x3d2a7da3,0xbfb0b970,4 +np.float32,0x3ea58107,0xbefb1dc3,4 +np.float32,0x7f6760b0,0x4219f3a2,4 +np.float32,0x7f7f9e08,0x421a1ff0,4 +np.float32,0x37e7f1,0xc219287b,4 +np.float32,0x3ef7eb53,0xbea14267,4 +np.float32,0x3e2eb581,0xbf449aa5,4 +np.float32,0x3da7671c,0xbf8b3568,4 +np.float32,0x7af36f7b,0x420f33ee,4 +np.float32,0x3eb3602c,0xbee93823,4 +np.float32,0x3f68bcff,0xbd2975de,4 +np.float32,0x3ea7cefb,0xbef80a9d,4 +np.float32,0x3f329689,0xbe202414,4 +np.float32,0x7f0c7c80,0x421915be,4 +np.float32,0x7f4739b8,0x4219b118,4 +np.float32,0x73af58,0xc217e515,4 +np.float32,0x7f13eb2a,0x42192cab,4 +np.float32,0x30f2d9,0xc2196395,4 +np.float32,0x7ea7066c,0x42182e71,4 +np.float32,0x669fec,0xc2181a5b,4 +np.float32,0x3f7d6876,0xbb90d1ef,4 +np.float32,0x3f08a4ef,0xbe8b9897,4 +np.float32,0x7f2a906c,0x42196c05,4 +np.float32,0x3ed3ca42,0xbec44856,4 +np.float32,0x9d27,0xc220fee2,4 +np.float32,0x3e4508a1,0xbf373c03,4 +np.float32,0x3e41f8de,0xbf38f9bb,4 +np.float32,0x3e912714,0xbf0c255b,4 +np.float32,0xff800000,0x7fc00000,4 +np.float32,0x7eefd13d,0x4218cf4f,4 +np.float32,0x3f491674,0xbdd6bded,4 +np.float32,0x3ef49512,0xbea445c9,4 +np.float32,0x3f045b79,0xbe92af15,4 +np.float32,0x3ef6c412,0xbea24bd5,4 +np.float32,0x3e6f3c28,0xbf21a85d,4 +np.float32,0x3ef71839,0xbea2000e,4 +np.float32,0x1,0xc23369f4,4 +np.float32,0x3e3fcfe4,0xbf3a3876,4 +np.float32,0x3e9d7a65,0xbf0315b2,4 +np.float32,0x20b7c4,0xc21a16bd,4 +np.float32,0x7f707b10,0x421a04cb,4 +np.float32,0x7fc00000,0x7fc00000,4 +np.float32,0x3f285ebd,0xbe3a57ac,4 +np.float32,0x74c9ea,0xc217e0dc,4 +np.float32,0x3f6501f2,0xbd4634ab,4 +np.float32,0x3f248959,0xbe4495cc,4 +np.float32,0x7e915ff0,0x4217f0b3,4 +np.float32,0x7edbb910,0x4218a864,4 +np.float32,0x3f7042dd,0xbce1bddb,4 +np.float32,0x6f08c9,0xc217f754,4 +np.float32,0x7f423993,0x4219a5ca,4 +np.float32,0x3f125704,0xbe78b4cd,4 +np.float32,0x7ef7f5ae,0x4218de28,4 +np.float32,0x3f2dd940,0xbe2c1a33,4 +np.float32,0x3f1ca78e,0xbe5a6a8b,4 +np.float32,0x244863,0xc219e8be,4 +np.float32,0x3f2614fe,0xbe406d6b,4 +np.float32,0x3e75e7a3,0xbf1e99b5,4 +np.float32,0x2bdd6e,0xc2199459,4 +np.float32,0x7e49e279,0x42174e7b,4 +np.float32,0x3e3bb09a,0xbf3ca2cd,4 +np.float32,0x649f06,0xc2182320,4 +np.float32,0x7f4a44e1,0x4219b7d6,4 +np.float32,0x400473,0xc218ec3a,4 +np.float32,0x3edb19ad,0xbebcbcad,4 +np.float32,0x3d8ee956,0xbf94006c,4 +np.float32,0x7e91c603,0x4217f1eb,4 +np.float32,0x221384,0xc21a04a6,4 +np.float32,0x7f7dd660,0x421a1cd5,4 +np.float32,0x7ef34609,0x4218d5ac,4 +np.float32,0x7f5ed529,0x4219e2e5,4 +np.float32,0x7f1bf685,0x42194438,4 +np.float32,0x3cdd094a,0xbfc8d294,4 +np.float32,0x7e87fc8e,0x4217d303,4 +np.float32,0x7f53d971,0x4219cc6b,4 +np.float32,0xabc8b,0xc21c0646,4 +np.float32,0x7f5011e6,0x4219c46a,4 +np.float32,0x7e460638,0x421745e5,4 +np.float32,0xa8126,0xc21c0ffd,4 +np.float32,0x3eec2a66,0xbeac0f2d,4 +np.float32,0x3f3a1213,0xbe0de340,4 +np.float32,0x7f5908db,0x4219d72c,4 +np.float32,0x7e0ad3c5,0x4216a7f3,4 +np.float32,0x3f2de40e,0xbe2bfe90,4 +np.float32,0x3d0463c5,0xbfbec8e4,4 +np.float32,0x7c7cde0b,0x4212e19a,4 +np.float32,0x74c24f,0xc217e0f9,4 +np.float32,0x3f14b4cb,0xbe71929b,4 +np.float32,0x3e94e192,0xbf09537f,4 +np.float32,0x3eebde71,0xbeac56bd,4 +np.float32,0x3f65e413,0xbd3f5b8a,4 +np.float32,0x7e109199,0x4216b9f9,4 +np.float32,0x3f22f5d0,0xbe48ddc0,4 +np.float32,0x3e22d3bc,0xbf4c6f4d,4 +np.float32,0x3f7a812f,0xbc1a680b,4 +np.float32,0x3f67f361,0xbd2f7d7c,4 +np.float32,0x3f1caa63,0xbe5a6281,4 +np.float32,0x3f306fde,0xbe2587ab,4 +np.float32,0x3e8df9d3,0xbf0e9b2f,4 +np.float32,0x3eaaccc4,0xbef41cd4,4 +np.float32,0x7f3f65ec,0x42199f45,4 +np.float32,0x3dc706e0,0xbf8196ec,4 +np.float32,0x3e14eaba,0xbf565cf6,4 +np.float32,0xcc60,0xc2208a09,4 +np.float32,0x358447,0xc2193be7,4 +np.float32,0x3dcecade,0xbf7eec70,4 +np.float32,0x3f20b4f8,0xbe4f0ef0,4 +np.float32,0x7e7c979f,0x4217b222,4 +np.float32,0x7f2387b9,0x4219594a,4 +np.float32,0x3f6f6e5c,0xbcee0e05,4 +np.float32,0x7f19ad81,0x42193da8,4 +np.float32,0x5635e1,0xc21867dd,4 +np.float32,0x4c5e97,0xc2189dc4,4 +np.float32,0x7f35f97f,0x421988d1,4 +np.float32,0x7f685224,0x4219f571,4 +np.float32,0x3eca0616,0xbecec7b8,4 +np.float32,0x3f436d0d,0xbdf024ca,4 +np.float32,0x12a97d,0xc21b106a,4 +np.float32,0x7f0fdc93,0x4219204d,4 +np.float32,0x3debfb42,0xbf703e65,4 +np.float32,0x3c6c54d2,0xbfeba291,4 +np.float32,0x7e5d7491,0x421777a1,4 +np.float32,0x3f4bd2f0,0xbdcab87d,4 +np.float32,0x3f7517f4,0xbc9ae510,4 +np.float32,0x3f71a59a,0xbccd480d,4 +np.float32,0x3f514653,0xbdb33f61,4 +np.float32,0x3f4e6ea4,0xbdbf694b,4 +np.float32,0x3eadadec,0xbef06526,4 +np.float32,0x3f3b41c1,0xbe0b0fbf,4 +np.float32,0xc35a,0xc2209e1e,4 +np.float32,0x384982,0xc2192575,4 +np.float32,0x3464c3,0xc2194556,4 +np.float32,0x7f5e20d9,0x4219e17d,4 +np.float32,0x3ea18b62,0xbf004016,4 +np.float32,0x63a02b,0xc218278c,4 +np.float32,0x7ef547ba,0x4218d953,4 +np.float32,0x3f2496fb,0xbe4470f4,4 +np.float32,0x7ea0c8c6,0x42181d81,4 +np.float32,0x3f42ba60,0xbdf35372,4 +np.float32,0x7e40d9,0xc217be34,4 +np.float32,0x3e95883b,0xbf08d750,4 +np.float32,0x3e0cddf3,0xbf5c8aa8,4 +np.float32,0x3f2305d5,0xbe48b20a,4 +np.float32,0x7f0d0941,0x4219177b,4 +np.float32,0x3f7b98d3,0xbbf6e477,4 +np.float32,0x3f687cdc,0xbd2b6057,4 +np.float32,0x3f42ce91,0xbdf2f73d,4 +np.float32,0x3ee00fc0,0xbeb7c217,4 +np.float32,0x7f3d483a,0x42199a53,4 +np.float32,0x3e1e08eb,0xbf4fc18d,4 +np.float32,0x7e202ff5,0x4216e798,4 +np.float32,0x582898,0xc2185ded,4 +np.float32,0x3e3552b1,0xbf40790c,4 +np.float32,0x3d3f7c87,0xbfaa44b6,4 +np.float32,0x669d8e,0xc2181a65,4 +np.float32,0x3f0e21b4,0xbe82d757,4 +np.float32,0x686f95,0xc2181293,4 +np.float32,0x3f48367f,0xbdda9ead,4 +np.float32,0x3dc27802,0xbf82e0a0,4 +np.float32,0x3f6ac40c,0xbd1a07d4,4 +np.float32,0x3bba6d,0xc2190b12,4 +np.float32,0x3ec7b6b0,0xbed15665,4 +np.float32,0x3f1f9ca4,0xbe521955,4 +np.float32,0x3ef2f147,0xbea5c4b8,4 +np.float32,0x7c65f769,0x4212b762,4 +np.float32,0x7e98e162,0x42180716,4 +np.float32,0x3f0f0c09,0xbe8169ea,4 +np.float32,0x3d67f03b,0xbf9f9d48,4 +np.float32,0x7f3751e4,0x42198c18,4 +np.float32,0x7f1fac61,0x42194ead,4 +np.float32,0x3e9b698b,0xbf048d89,4 +np.float32,0x7e66507b,0x42178913,4 +np.float32,0x7f5cb680,0x4219dea5,4 +np.float32,0x234700,0xc219f53e,4 +np.float32,0x3d9984ad,0xbf900591,4 +np.float32,0x3f33a3f2,0xbe1d872a,4 +np.float32,0x3eaf52b6,0xbeee4cf4,4 +np.float32,0x7f078930,0x421905ca,4 +np.float32,0x3f083b39,0xbe8c44df,4 +np.float32,0x3e3823f8,0xbf3ec231,4 +np.float32,0x3eef6f5d,0xbea9008c,4 +np.float32,0x6145e1,0xc218322c,4 +np.float32,0x16d9ae,0xc21ab65f,4 +np.float32,0x7e543376,0x421764a5,4 +np.float32,0x3ef77ccb,0xbea1a5a0,4 +np.float32,0x3f4a443f,0xbdd18af5,4 +np.float32,0x8f209,0xc21c5770,4 +np.float32,0x3ecac126,0xbecdfa33,4 +np.float32,0x3e8662f9,0xbf14b6c7,4 +np.float32,0x23759a,0xc219f2f4,4 +np.float32,0xf256d,0xc21b6d3f,4 +np.float32,0x3f579f93,0xbd98aaa2,4 +np.float32,0x3ed4cc8e,0xbec339cb,4 +np.float32,0x3ed25400,0xbec5d2a1,4 +np.float32,0x3ed6f8ba,0xbec0f795,4 +np.float32,0x7f36efd9,0x42198b2a,4 +np.float32,0x7f5169dd,0x4219c746,4 +np.float32,0x7de18a20,0x42164b80,4 +np.float32,0x3e8de526,0xbf0eab61,4 +np.float32,0x3de0cbcd,0xbf75a47e,4 +np.float32,0xe265f,0xc21b8b82,4 +np.float32,0x3df3cdbd,0xbf6c9e40,4 +np.float32,0x3f38a25a,0xbe115589,4 +np.float32,0x7f01f2c0,0x4218f311,4 +np.float32,0x3da7d5f4,0xbf8b10a5,4 +np.float32,0x4d4fe8,0xc2189850,4 +np.float32,0x3cc96d9d,0xbfcdfc8d,4 +np.float32,0x259a88,0xc219d8d7,4 +np.float32,0x7f1d5102,0x42194810,4 +np.float32,0x7e17ca91,0x4216cfa7,4 +np.float32,0x3f73d110,0xbcad7a8f,4 +np.float32,0x3f009383,0xbe9920ed,4 +np.float32,0x7e22af,0xc217be9f,4 +np.float32,0x3f7de2ce,0xbb6c0394,4 +np.float32,0x3edd0cd2,0xbebac45a,4 +np.float32,0x3ec9b5c1,0xbecf2035,4 +np.float32,0x3168c5,0xc2195f6b,4 +np.float32,0x3e935522,0xbf0a7d18,4 +np.float32,0x3e494077,0xbf34e120,4 +np.float32,0x3f52ed06,0xbdac41ec,4 +np.float32,0x3f73d51e,0xbcad3f65,4 +np.float32,0x3f03d453,0xbe939295,4 +np.float32,0x7ef4ee68,0x4218d8b1,4 +np.float32,0x3ed0e2,0xc218f4a7,4 +np.float32,0x4efab8,0xc2188ed3,4 +np.float32,0x3dbd5632,0xbf845d3b,4 +np.float32,0x7eecad4f,0x4218c972,4 +np.float32,0x9d636,0xc21c2d32,4 +np.float32,0x3e5f3b6b,0xbf295ae7,4 +np.float32,0x7f4932df,0x4219b57a,4 +np.float32,0x4b59b5,0xc218a3be,4 +np.float32,0x3e5de97f,0xbf2a03b4,4 +np.float32,0x3f1c479d,0xbe5b7b3c,4 +np.float32,0x3f42e7e4,0xbdf283a5,4 +np.float32,0x2445,0xc2238af2,4 +np.float32,0x7aa71b43,0x420e8c9e,4 +np.float32,0x3ede6e4e,0xbeb961e1,4 +np.float32,0x7f05dd3b,0x42190045,4 +np.float32,0x3ef5b55c,0xbea3404b,4 +np.float32,0x7f738624,0x421a0a62,4 +np.float32,0x3e7d50a1,0xbf1b4cb4,4 +np.float32,0x3f44cc4a,0xbde9ebcc,4 +np.float32,0x7e1a7b0b,0x4216d777,4 +np.float32,0x3f1d9868,0xbe57c0da,4 +np.float32,0x1ebee2,0xc21a3263,4 +np.float32,0x31685f,0xc2195f6e,4 +np.float32,0x368a8e,0xc2193379,4 +np.float32,0xa9847,0xc21c0c2e,4 +np.float32,0x3bd3b3,0xc2190a56,4 +np.float32,0x3961e4,0xc2191ce3,4 +np.float32,0x7e13a243,0x4216c34e,4 +np.float32,0x7f7b1790,0x421a17ff,4 +np.float32,0x3e55f020,0xbf2e1545,4 +np.float32,0x3f513861,0xbdb37aa8,4 +np.float32,0x3dd9e754,0xbf791ad2,4 +np.float32,0x5e8d86,0xc2183ec9,4 +np.float32,0x26b796,0xc219cbdd,4 +np.float32,0x429daa,0xc218da89,4 +np.float32,0x3f477caa,0xbdddd9ba,4 +np.float32,0x3f0e5114,0xbe828d45,4 +np.float32,0x3f54f362,0xbda3c286,4 +np.float32,0x6eac1c,0xc217f8c8,4 +np.float32,0x3f04c479,0xbe91fef5,4 +np.float32,0x3e993765,0xbf06228e,4 +np.float32,0x3eafd99f,0xbeeda21b,4 +np.float32,0x3f2a759e,0xbe34db96,4 +np.float32,0x3f05adfb,0xbe907937,4 +np.float32,0x3f6e2dfc,0xbd005980,4 +np.float32,0x3f2f2daa,0xbe28b6b5,4 +np.float32,0x15e746,0xc21ac931,4 +np.float32,0x7d34ca26,0x4214b4e5,4 +np.float32,0x7ebd175c,0x4218659f,4 +np.float32,0x7f1ed26b,0x42194c4c,4 +np.float32,0x2588b,0xc21eaab0,4 +np.float32,0x3f0065e3,0xbe996fe2,4 +np.float32,0x3f610376,0xbd658122,4 +np.float32,0x451995,0xc218ca41,4 +np.float32,0x70e083,0xc217f002,4 +np.float32,0x7e19821a,0x4216d4a8,4 +np.float32,0x3e7cd9a0,0xbf1b80fb,4 +np.float32,0x7f1a8f18,0x42194033,4 +np.float32,0x3f008fee,0xbe99271f,4 +np.float32,0xff7fffff,0x7fc00000,4 +np.float32,0x7f31d826,0x42197e9b,4 +np.float32,0x3f18cf12,0xbe657838,4 +np.float32,0x3e5c1bc7,0xbf2aebf9,4 +np.float32,0x3e3d3993,0xbf3bbaf8,4 +np.float32,0x68457a,0xc2181347,4 +np.float32,0x7ddf7561,0x42164761,4 +np.float32,0x7f47341b,0x4219b10c,4 +np.float32,0x4d3ecd,0xc21898b2,4 +np.float32,0x7f43dee8,0x4219a98b,4 +np.float32,0x3f0def7c,0xbe8325f5,4 +np.float32,0x3d5a551f,0xbfa2f994,4 +np.float32,0x7ed26602,0x4218951b,4 +np.float32,0x3ee7fa5b,0xbeb0099a,4 +np.float32,0x7ef74ea8,0x4218dcfc,4 +np.float32,0x6a3bb2,0xc2180afd,4 +np.float32,0x7f4c1e6e,0x4219bbe3,4 +np.float32,0x3e26f625,0xbf49a5a2,4 +np.float32,0xb8482,0xc21be70b,4 +np.float32,0x3f32f077,0xbe1f445b,4 +np.float32,0x7dd694b6,0x4216355a,4 +np.float32,0x7f3d62fd,0x42199a92,4 +np.float32,0x3f48e41a,0xbdd79cbf,4 +np.float32,0x338fc3,0xc2194c75,4 +np.float32,0x3e8355f0,0xbf174462,4 +np.float32,0x7f487e83,0x4219b3eb,4 +np.float32,0x2227f7,0xc21a039b,4 +np.float32,0x7e4383dd,0x4217403a,4 +np.float32,0x52d28b,0xc21879b2,4 +np.float32,0x12472c,0xc21b19a9,4 +np.float32,0x353530,0xc2193e7b,4 +np.float32,0x3f4e4728,0xbdc0137a,4 +np.float32,0x3bf169,0xc2190979,4 +np.float32,0x3eb3ee2e,0xbee8885f,4 +np.float32,0x3f03e3c0,0xbe937892,4 +np.float32,0x3c9f8408,0xbfdaf47f,4 +np.float32,0x40e792,0xc218e61b,4 +np.float32,0x5a6b29,0xc21852ab,4 +np.float32,0x7f268b83,0x4219616a,4 +np.float32,0x3ee25997,0xbeb57fa7,4 +np.float32,0x3f175324,0xbe69cf53,4 +np.float32,0x3f781d91,0xbc5e9827,4 +np.float32,0x7dba5210,0x4215f68c,4 +np.float32,0x7f1e66,0xc217bb2b,4 +np.float32,0x7f7fffff,0x421a209b,4 +np.float32,0x3f646202,0xbd4b10b8,4 +np.float32,0x575248,0xc218622b,4 +np.float32,0x7c67faa1,0x4212bb42,4 +np.float32,0x7f1683f2,0x42193469,4 +np.float32,0x1a3864,0xc21a7931,4 +np.float32,0x7f30ad75,0x42197bae,4 +np.float32,0x7f1c9d05,0x42194612,4 +np.float32,0x3e791795,0xbf1d2b2c,4 +np.float32,0x7e9ebc19,0x421817cd,4 +np.float32,0x4999b7,0xc218ae31,4 +np.float32,0x3d130e2c,0xbfb8f1cc,4 +np.float32,0x3f7e436f,0xbb41bb07,4 +np.float32,0x3ee00241,0xbeb7cf7d,4 +np.float32,0x7e496181,0x42174d5f,4 +np.float32,0x7efe58be,0x4218e978,4 +np.float32,0x3f5e5b0c,0xbd7aa43f,4 +np.float32,0x7ee4c6ab,0x4218ba59,4 +np.float32,0x3f6da8c6,0xbd043d7e,4 +np.float32,0x3e3e6e0f,0xbf3b064b,4 +np.float32,0x3f0143b3,0xbe97f10a,4 +np.float32,0x79170f,0xc217d0c6,4 +np.float32,0x517645,0xc218810f,4 +np.float32,0x3f1f9960,0xbe52226e,4 +np.float32,0x2a8df9,0xc219a1d6,4 +np.float32,0x2300a6,0xc219f8b8,4 +np.float32,0x3ee31355,0xbeb4c97a,4 +np.float32,0x3f20b05f,0xbe4f1ba9,4 +np.float32,0x3ee64249,0xbeb1b0ff,4 +np.float32,0x3a94b7,0xc21913b2,4 +np.float32,0x7ef7ef43,0x4218de1d,4 +np.float32,0x3f1abb5d,0xbe5fe872,4 +np.float32,0x7f65360b,0x4219ef72,4 +np.float32,0x3d315d,0xc219004c,4 +np.float32,0x3f26bbc4,0xbe3eafb9,4 +np.float32,0x3ee8c6e9,0xbeaf45de,4 +np.float32,0x7e5f1452,0x42177ae1,4 +np.float32,0x3f32e777,0xbe1f5aba,4 +np.float32,0x4d39a1,0xc21898d0,4 +np.float32,0x3e59ad15,0xbf2c2841,4 +np.float32,0x3f4be746,0xbdca5fc4,4 +np.float32,0x72e4fd,0xc217e821,4 +np.float32,0x1af0b8,0xc21a6d25,4 +np.float32,0x3f311147,0xbe23f18d,4 +np.float32,0x3f1ecebb,0xbe545880,4 +np.float32,0x7e90d293,0x4217ef02,4 +np.float32,0x3e3b366a,0xbf3ceb46,4 +np.float32,0x3f133239,0xbe761c96,4 +np.float32,0x7541ab,0xc217df15,4 +np.float32,0x3d8c8275,0xbf94f1a1,4 +np.float32,0x483b92,0xc218b689,4 +np.float32,0x3eb0dbed,0xbeec5c6b,4 +np.float32,0x3f00c676,0xbe98c8e2,4 +np.float32,0x3f445ac2,0xbdebed7c,4 +np.float32,0x3d2af4,0xc219007a,4 +np.float32,0x7f196ee1,0x42193cf2,4 +np.float32,0x290c94,0xc219b1db,4 +np.float32,0x3f5dbdc9,0xbd7f9019,4 +np.float32,0x3e80c62e,0xbf1974fc,4 +np.float32,0x3ec9ed2c,0xbecee326,4 +np.float32,0x7f469d60,0x4219afbb,4 +np.float32,0x3f698413,0xbd2386ce,4 +np.float32,0x42163f,0xc218de14,4 +np.float32,0x67a554,0xc21815f4,4 +np.float32,0x3f4bff74,0xbdc9f651,4 +np.float32,0x16a743,0xc21aba39,4 +np.float32,0x2eb8b0,0xc219784b,4 +np.float32,0x3eed9be1,0xbeaab45b,4 +np.float64,0x7fe0d76873e1aed0,0x40733f9d783bad7a,2 +np.float64,0x3fe22626bb244c4d,0xbfcf86a59864eea2,2 +np.float64,0x7f874113d02e8227,0x407324f54c4015b8,2 +np.float64,0x3fe40a46a9e8148d,0xbfca0411f533fcb9,2 +np.float64,0x3fd03932eea07266,0xbfe312bc9cf5649e,2 +np.float64,0x7fee5d2a1b3cba53,0x407343b5f56367a0,2 +np.float64,0x3feb7bda4a76f7b5,0xbfb0ea2c6edc784a,2 +np.float64,0x3fd6cd831a2d9b06,0xbfdcaf2e1a5faf51,2 +np.float64,0x98324e273064a,0xc0733e0e4c6d11c6,2 +np.float64,0x7fe1dd63b363bac6,0x4073400667c405c3,2 +np.float64,0x3fec5971f178b2e4,0xbfaaef32a7d94563,2 +np.float64,0x17abc07e2f579,0xc0734afca4da721e,2 +np.float64,0x3feec6ab5cfd8d57,0xbf9157f3545a8235,2 +np.float64,0x3fe3ae9622a75d2c,0xbfcb04b5ad254581,2 +np.float64,0x7fea73d854b4e7b0,0x407342c0a548f4c5,2 +np.float64,0x7fe29babf4653757,0x4073404eeb5fe714,2 +np.float64,0x7fd3a55d85a74aba,0x40733bde72e86c27,2 +np.float64,0x3fe83ce305f079c6,0xbfbee3511e85e0f1,2 +np.float64,0x3fd72087ea2e4110,0xbfdc4ab30802d7c2,2 +np.float64,0x7feb54ddab76a9ba,0x407342facb6f3ede,2 +np.float64,0xc57e34a18afd,0xc0734f82ec815baa,2 +np.float64,0x7a8cb97ef5198,0xc0733f8fb3777a67,2 +np.float64,0x7fe801032c300205,0x40734213dbe4eda9,2 +np.float64,0x3aefb1f475df7,0xc07344a5f08a0584,2 +np.float64,0x7fee85f1dd3d0be3,0x407343bf4441c2a7,2 +np.float64,0x3fdc7f1055b8fe21,0xbfd67d300630e893,2 +np.float64,0xe8ecddb3d1d9c,0xc0733b194f18f466,2 +np.float64,0x3fdf2b23c73e5648,0xbfd3ff6872c1f887,2 +np.float64,0x3fdba4aef2b7495e,0xbfd7557205e18b7b,2 +np.float64,0x3fe2ac34c6e5586a,0xbfcdf1dac69bfa08,2 +np.float64,0x3fc9852628330a4c,0xbfe66914f0fb9b0a,2 +np.float64,0x7fda211acf344235,0x40733dd9c2177aeb,2 +np.float64,0x3fe9420eb432841d,0xbfba4dd969a32575,2 +np.float64,0xb2f9d1ed65f3a,0xc0733cedfb6527ff,2 +np.float64,0x3fe9768a68f2ed15,0xbfb967c39c35c435,2 +np.float64,0x7fe8268462b04d08,0x4073421eaed32734,2 +np.float64,0x3fcf331f063e663e,0xbfe39e2f4b427ca9,2 +np.float64,0x7fd4eb9e2b29d73b,0x40733c4e4141418d,2 +np.float64,0x7fd2bba658a5774c,0x40733b89cd53d5b1,2 +np.float64,0x3fdfdf04913fbe09,0xbfd360c7fd9d251b,2 +np.float64,0x3fca5bfd0534b7fa,0xbfe5f5f844b2b20c,2 +np.float64,0x3feacd5032f59aa0,0xbfb3b5234ba8bf7b,2 +np.float64,0x7fe9241cec724839,0x4073426631362cec,2 +np.float64,0x3fe57aca20eaf594,0xbfc628e3ac2c6387,2 +np.float64,0x3fec6553ca38caa8,0xbfaa921368d3b222,2 +np.float64,0x3fe1e9676563d2cf,0xbfd020f866ba9b24,2 +np.float64,0x3fd5590667aab20d,0xbfde8458af5a4fd6,2 +np.float64,0x3fdf7528f43eea52,0xbfd3bdb438d6ba5e,2 +np.float64,0xb8dddc5571bbc,0xc0733cb4601e5bb2,2 +np.float64,0xe6d4e1fbcda9c,0xc0733b295ef4a4ba,2 +np.float64,0x3fe7019d962e033b,0xbfc257c0a6e8de16,2 +np.float64,0x3f94ef585029deb1,0xbffb07e5dfb0e936,2 +np.float64,0x7fc863b08030c760,0x4073388e28d7b354,2 +np.float64,0xf684443bed089,0xc0733ab46cfbff9a,2 +np.float64,0x7fe00e901d201d1f,0x40733f489c05a0f0,2 +np.float64,0x9e5c0a273cb82,0xc0733dc7af797e19,2 +np.float64,0x7fe49734f0692e69,0x4073410303680df0,2 +np.float64,0x7fb7b584442f6b08,0x4073338acff72502,2 +np.float64,0x3f99984c30333098,0xbff9a2642a6ed8cc,2 +np.float64,0x7fea2fcda8745f9a,0x407342aeae7f5e64,2 +np.float64,0xe580caadcb01a,0xc0733b33a3639217,2 +np.float64,0x1899ab3831336,0xc0734ab823729417,2 +np.float64,0x39bd4c76737aa,0xc07344ca6fac6d21,2 +np.float64,0xd755b2dbaeab7,0xc0733ba4fe19f2cc,2 +np.float64,0x3f952bebf82a57d8,0xbffaf3e7749c2512,2 +np.float64,0x3fe62ee5d72c5dcc,0xbfc45e3cb5baad08,2 +np.float64,0xb1264a7d624ca,0xc0733d003a1d0a66,2 +np.float64,0x3fc4bd1bcd297a38,0xbfe94b3058345c46,2 +np.float64,0x7fc5758bb32aeb16,0x407337aa7805497f,2 +np.float64,0x3fb0edcaf421db96,0xbff2dfb09c405294,2 +np.float64,0x3fd240fceaa481fa,0xbfe16f356bb36134,2 +np.float64,0x38c0c62a7181a,0xc07344e916d1e9b7,2 +np.float64,0x3fe98f2b3bf31e56,0xbfb8fc6eb622a820,2 +np.float64,0x3fe2bdf99c257bf3,0xbfcdbd0dbbae4d0b,2 +np.float64,0xce4b390d9c967,0xc0733bf14ada3134,2 +np.float64,0x3fd2ad607ba55ac1,0xbfe11da15167b37b,2 +np.float64,0x3fd8154f11b02a9e,0xbfdb2a6fabb9a026,2 +np.float64,0xf37849fde6f09,0xc0733aca8c64344c,2 +np.float64,0x3fcbae43b2375c87,0xbfe547f267c8e570,2 +np.float64,0x3fcd46fd7d3a8dfb,0xbfe48070f7232929,2 +np.float64,0x7fcdd245273ba489,0x407339f3d907b101,2 +np.float64,0x3fac75cd0838eb9a,0xbff4149d177b057b,2 +np.float64,0x7fe8ff3fd7f1fe7f,0x4073425bf968ba6f,2 +np.float64,0x7febadaa4df75b54,0x407343113a91f0e9,2 +np.float64,0x7fd5e4649c2bc8c8,0x40733c9f0620b065,2 +np.float64,0x903429812069,0xc07351b255e27887,2 +np.float64,0x3fe1d8c51c63b18a,0xbfd03ad448c1f1ee,2 +np.float64,0x3fe573ea646ae7d5,0xbfc63ab0bfd0e601,2 +np.float64,0x3f83b3f3c02767e8,0xc00022677e310649,2 +np.float64,0x7fd15d1582a2ba2a,0x40733b02c469c1d6,2 +np.float64,0x3fe63d3dabec7a7b,0xbfc43a56ee97b27e,2 +np.float64,0x7fe3a452fb2748a5,0x407340af1973c228,2 +np.float64,0x3fafac6b303f58d6,0xbff35651703ae9f2,2 +np.float64,0x513ddd24a27bc,0xc073426af96aaebb,2 +np.float64,0x3fef152246be2a45,0xbf89df79d7719282,2 +np.float64,0x3fe8c923e9f19248,0xbfbc67228e8db5f6,2 +np.float64,0x3fd6e2325fadc465,0xbfdc9602fb0b950f,2 +np.float64,0x3fe9616815f2c2d0,0xbfb9c4311a3b415b,2 +np.float64,0x2fe4e4005fc9d,0xc0734616fe294395,2 +np.float64,0x3fbceb02dc39d606,0xbfee4e68f1c7886f,2 +np.float64,0x7fe35e843d66bd07,0x407340963b066ad6,2 +np.float64,0x7fecd6c648f9ad8c,0x4073435a4c176e94,2 +np.float64,0x7fcbd72bf437ae57,0x4073397994b85665,2 +np.float64,0x3feff6443b3fec88,0xbf40eb380d5318ae,2 +np.float64,0x7fb9373cf6326e79,0x407333f869edef08,2 +np.float64,0x63790d9cc6f22,0xc0734102d4793cda,2 +np.float64,0x3f9de6efe83bcde0,0xbff88db6f0a6b56e,2 +np.float64,0xe00f2dc1c01f,0xc0734ea26ab84ff2,2 +np.float64,0xd7a9aa8baf536,0xc0733ba248fa33ab,2 +np.float64,0x3fee0089ea7c0114,0xbf9cab936ac31c4b,2 +np.float64,0x3fdec0d51cbd81aa,0xbfd45ed8878c5860,2 +np.float64,0x7fe91bf5e9f237eb,0x40734263f005081d,2 +np.float64,0x34ea7d1e69d50,0xc07345659dde7444,2 +np.float64,0x7fe67321a3ace642,0x4073419cc8130d95,2 +np.float64,0x9d1aeb2f3a35e,0xc0733dd5d506425c,2 +np.float64,0x7fbb01df003603bd,0x4073347282f1391d,2 +np.float64,0x42b945b285729,0xc07343c92d1bbef9,2 +np.float64,0x7fc92799b8324f32,0x407338c51e3f0733,2 +np.float64,0x3fe119c19b223383,0xbfd16ab707f65686,2 +np.float64,0x3fc9f9ac5333f359,0xbfe62a2f91ec0dff,2 +np.float64,0x3fd820d5a8b041ab,0xbfdb1d2586fe7b18,2 +np.float64,0x10000000000000,0xc0733a7146f72a42,2 +np.float64,0x3fe7e1543eafc2a8,0xbfc045362889592d,2 +np.float64,0xcbc0e1819783,0xc0734f4b68e05b1c,2 +np.float64,0xeb57e411d6afd,0xc0733b06efec001a,2 +np.float64,0xa9b74b47536ea,0xc0733d4c7bd06ddc,2 +np.float64,0x3fe56d4022eada80,0xbfc64bf8c7e3dd59,2 +np.float64,0x3fd445ca27288b94,0xbfdff40aecd0f882,2 +np.float64,0x3fe5af1cf5ab5e3a,0xbfc5a21d83699a04,2 +np.float64,0x7fed3431eb7a6863,0x40734370aa6131e1,2 +np.float64,0x3fd878dea1b0f1bd,0xbfdab8730dc00517,2 +np.float64,0x7ff8000000000000,0x7ff8000000000000,2 +np.float64,0x3feba9fcc1f753fa,0xbfb03027dcecbf65,2 +np.float64,0x7fca4feed6349fdd,0x4073391526327eb0,2 +np.float64,0x3fe7748ddbaee91c,0xbfc144b438218065,2 +np.float64,0x3fb5fbd94c2bf7b3,0xbff10ee6342c21a0,2 +np.float64,0x3feb603b97f6c077,0xbfb15a1f99d6d25e,2 +np.float64,0x3fe2e6fc8ce5cdf9,0xbfcd43edd7f3b4e6,2 +np.float64,0x7feb2b31f7765663,0x407342f02b306688,2 +np.float64,0x3fe290e2282521c4,0xbfce436deb8dbcf3,2 +np.float64,0x3fe3d5adf9e7ab5c,0xbfca96b8aa55d942,2 +np.float64,0x691899f2d2314,0xc07340a1026897c8,2 +np.float64,0x7fe468b008e8d15f,0x407340f33eadc628,2 +np.float64,0x3fb3a4c416274988,0xbff1d71da539a56e,2 +np.float64,0x3fe2442b29e48856,0xbfcf2b0037322661,2 +np.float64,0x3f376fbc7e6ef,0xc073442939a84643,2 +np.float64,0x3fe7c78d65ef8f1b,0xbfc08157cff411de,2 +np.float64,0xd4f27acba9e50,0xc0733bb8d38daa50,2 +np.float64,0x5198919ea3313,0xc07342633ba7cbea,2 +np.float64,0x7fd09f66f0a13ecd,0x40733ab5310b4385,2 +np.float64,0x3fdfe5531dbfcaa6,0xbfd35b487c7e739f,2 +np.float64,0x3fc4b0fecc2961fe,0xbfe95350c38c1640,2 +np.float64,0x7fd5ae21962b5c42,0x40733c8db78b7250,2 +np.float64,0x3fa4a8fcd42951fa,0xbff64e62fe602b72,2 +np.float64,0x7fc8e0e25831c1c4,0x407338b179b91223,2 +np.float64,0x7fdde1df6f3bc3be,0x40733ec87f9f027e,2 +np.float64,0x3fd8b9ad86b1735b,0xbfda6f385532c41b,2 +np.float64,0x3fd9f20ee933e41e,0xbfd91872fd858597,2 +np.float64,0x7feb35332df66a65,0x407342f2b9c715f0,2 +np.float64,0x7fe783dc7eaf07b8,0x407341ef41873706,2 +np.float64,0x7fceee929f3ddd24,0x40733a34e3c660fd,2 +np.float64,0x985b58d730b6b,0xc0733e0c6cfbb6f8,2 +np.float64,0x3fef4bb55cfe976b,0xbf83cb246c6f2a78,2 +np.float64,0x3fe218014f243003,0xbfcfb20ac683e1f6,2 +np.float64,0x7fe43b9fbea8773e,0x407340e3d5d5d29e,2 +np.float64,0x7fe148c74c62918e,0x40733fcba4367b8b,2 +np.float64,0x3feea4ad083d495a,0xbf93443917f3c991,2 +np.float64,0x8bcf6311179ed,0xc0733ea54d59dd31,2 +np.float64,0xf4b7a2dbe96f5,0xc0733ac175182401,2 +np.float64,0x543338baa8668,0xc073422b59165fe4,2 +np.float64,0x3fdb467317368ce6,0xbfd7b4d515929635,2 +np.float64,0x7fe3bbbc89e77778,0x407340b75cdf3de7,2 +np.float64,0x7fe693377aad266e,0x407341a6af60a0f1,2 +np.float64,0x3fc66210502cc421,0xbfe83bb940610a24,2 +np.float64,0x7fa75638982eac70,0x40732e9da476b816,2 +np.float64,0x3fe0d72a4761ae55,0xbfd1d7c82c479fab,2 +np.float64,0x97dec0dd2fbd8,0xc0733e121e072804,2 +np.float64,0x3fef33ec8c7e67d9,0xbf86701be6be8df1,2 +np.float64,0x7fcfca9b423f9536,0x40733a65a51efb94,2 +np.float64,0x9f2215633e443,0xc0733dbf043de9ed,2 +np.float64,0x2469373e48d28,0xc07347fe9e904b77,2 +np.float64,0x7fecc2e18cb985c2,0x407343557f58dfa2,2 +np.float64,0x3fde4acbfdbc9598,0xbfd4ca559e575e74,2 +np.float64,0x3fd6b11cf1ad623a,0xbfdcd1e17ef36114,2 +np.float64,0x3fc19ec494233d89,0xbfeb8ef228e8826a,2 +np.float64,0x4c89ee389913e,0xc07342d50c904f61,2 +np.float64,0x88c2046f11841,0xc0733ecc91369431,2 +np.float64,0x7fc88c13fd311827,0x40733899a125b392,2 +np.float64,0x3fcebd893a3d7b12,0xbfe3d2f35ab93765,2 +np.float64,0x3feb582a1476b054,0xbfb17ae8ec6a0465,2 +np.float64,0x7fd4369e5da86d3c,0x40733c1118b8cd67,2 +np.float64,0x3fda013fc1340280,0xbfd90831b85e98b2,2 +np.float64,0x7fed33d73fba67ad,0x4073437094ce1bd9,2 +np.float64,0x3fed3191053a6322,0xbfa468cc26a8f685,2 +np.float64,0x3fc04ed51c209daa,0xbfeca24a6f093bca,2 +np.float64,0x3fee4ac8763c9591,0xbf986458abbb90b5,2 +np.float64,0xa2d39dd145a74,0xc0733d9633651fbc,2 +np.float64,0x3fe7d9f86f2fb3f1,0xbfc0565a0b059f1c,2 +np.float64,0x3fe3250144e64a03,0xbfcc8eb2b9ae494b,2 +np.float64,0x7fe2b29507a56529,0x4073405774492075,2 +np.float64,0x7fdcdfcbe2b9bf97,0x40733e8b736b1bd8,2 +np.float64,0x3fc832730f3064e6,0xbfe7267ac9b2e7c3,2 +np.float64,0x3fc7e912e52fd226,0xbfe750dfc0aeae57,2 +np.float64,0x7fc960472f32c08d,0x407338d4b4cb3957,2 +np.float64,0x3fbdf182ea3be306,0xbfedd27150283ffb,2 +np.float64,0x3fd1e9359823d26b,0xbfe1b2ac7fd25f8d,2 +np.float64,0x7fbcf75f6039eebe,0x407334ef13eb16f8,2 +np.float64,0x3fe5a3c910eb4792,0xbfc5bf2f57c5d643,2 +np.float64,0x3fcf4f2a6e3e9e55,0xbfe391b6f065c4b8,2 +np.float64,0x3fee067873fc0cf1,0xbf9c53af0373fc0e,2 +np.float64,0xd3f08b85a7e12,0xc0733bc14357e686,2 +np.float64,0x7ff0000000000000,0x7ff0000000000000,2 +np.float64,0x3fc8635f6430c6bf,0xbfe70a7dc77749a7,2 +np.float64,0x3fe3ff5c52a7feb9,0xbfca22617c6636d5,2 +np.float64,0x3fbbae91fa375d24,0xbfeee9d4c300543f,2 +np.float64,0xe3f71b59c7ee4,0xc0733b3f99187375,2 +np.float64,0x7fca93d3be3527a6,0x40733926fd48ecd6,2 +np.float64,0x3fcd29f7223a53ee,0xbfe48e3edf32fe57,2 +np.float64,0x7fdc4ef6f8389ded,0x40733e68401cf2a6,2 +np.float64,0xe009bc81c014,0xc0734ea295ee3e5b,2 +np.float64,0x61f56c78c3eae,0xc073411e1dbd7c54,2 +np.float64,0x3fde131928bc2632,0xbfd4fda024f6927c,2 +np.float64,0x3fb21ee530243dca,0xbff266aaf0358129,2 +np.float64,0x7feaac82a4f55904,0x407342cf7809d9f9,2 +np.float64,0x3fe66ab177ecd563,0xbfc3c92d4d522819,2 +np.float64,0xfe9f9c2bfd3f4,0xc0733a7ade3a88a7,2 +np.float64,0x7fd0c5217c218a42,0x40733ac4e4c6dfa5,2 +np.float64,0x430f4ae6861ea,0xc07343c03d8a9442,2 +np.float64,0x494bff2a92981,0xc073432209d2fd16,2 +np.float64,0x3f8860e9d030c1d4,0xbffeca059ebf5e89,2 +np.float64,0x3fe43732dc286e66,0xbfc98800388bad2e,2 +np.float64,0x6443b60ec8877,0xc07340f4bab11827,2 +np.float64,0x3feda9be6d7b537d,0xbfa0dcb9a6914069,2 +np.float64,0x3fc5ceb6772b9d6d,0xbfe89868c881db70,2 +np.float64,0x3fbdf153023be2a6,0xbfedd2878c3b4949,2 +np.float64,0x7fe8f6b8e8f1ed71,0x407342599a30b273,2 +np.float64,0x3fea6fbdb8b4df7b,0xbfb53bf66f71ee96,2 +np.float64,0xc7ac3dbb8f588,0xc0733c2b525b7963,2 +np.float64,0x3fef3a91f77e7524,0xbf85b2bd3adbbe31,2 +np.float64,0x3f887cb97030f973,0xbffec21ccbb5d22a,2 +np.float64,0x8b2f1c9f165e4,0xc0733ead49300951,2 +np.float64,0x2c1cb32058397,0xc07346a951bd8d2b,2 +np.float64,0x3fe057edd620afdc,0xbfd2acf1881b7e99,2 +np.float64,0x7f82e9530025d2a5,0x4073238591dd52ce,2 +np.float64,0x3fe4e03dff69c07c,0xbfc7be96c5c006fc,2 +np.float64,0x52727b4aa4e50,0xc0734250c58ebbc1,2 +np.float64,0x3f99a62160334c43,0xbff99ea3ca09d8f9,2 +np.float64,0x3fd5314b4faa6297,0xbfdeb843daf01e03,2 +np.float64,0x3fefde89e13fbd14,0xbf5d1facb7a1e9de,2 +np.float64,0x7fb460f1a228c1e2,0x4073327d8cbc5f86,2 +np.float64,0xeb93efb3d727e,0xc0733b052a4990e4,2 +np.float64,0x3fe884baecf10976,0xbfbd9ba9cfe23713,2 +np.float64,0x7fefffffffffffff,0x40734413509f79ff,2 +np.float64,0x149dc7c6293ba,0xc0734bf26b1df025,2 +np.float64,0x64188f88c8313,0xc07340f7b8e6f4b5,2 +np.float64,0x3fdfac314abf5863,0xbfd38d3e9dba1b0e,2 +np.float64,0x3fd72052a42e40a5,0xbfdc4af30ee0b245,2 +np.float64,0x7fdd951f743b2a3e,0x40733eb68fafa838,2 +np.float64,0x65a2dd5acb45c,0xc07340dc8ed625e1,2 +np.float64,0x7fe89a79997134f2,0x4073423fbceb1cbe,2 +np.float64,0x3fe70a000d6e1400,0xbfc24381e09d02f7,2 +np.float64,0x3fe2cec160259d83,0xbfcd8b5e92354129,2 +np.float64,0x3feb9ef77a773def,0xbfb05c7b2ee6f388,2 +np.float64,0xe0d66689c1acd,0xc0733b582c779620,2 +np.float64,0x3fee86bd0ffd0d7a,0xbf94f7870502c325,2 +np.float64,0x186afc6230d60,0xc0734ac55fb66d5d,2 +np.float64,0xc0631f4b80c64,0xc0733c6d7149d373,2 +np.float64,0x3fdad1b87735a371,0xbfd82cca73ec663b,2 +np.float64,0x7fe7f6d313efeda5,0x40734210e84576ab,2 +np.float64,0x7fd7b7fce6af6ff9,0x40733d2d92ffdaaf,2 +np.float64,0x3fe6f35a28ade6b4,0xbfc27a4239b540c3,2 +np.float64,0x7fdb0b834eb61706,0x40733e17073a61f3,2 +np.float64,0x82f4661105e8d,0xc0733f19b34adeed,2 +np.float64,0x3fc77230112ee460,0xbfe796a7603c0d16,2 +np.float64,0x8000000000000000,0xfff0000000000000,2 +np.float64,0x7fb8317bc63062f7,0x407333aec761a739,2 +np.float64,0x7fd165609a22cac0,0x40733b061541ff15,2 +np.float64,0x3fed394768fa728f,0xbfa42e1596e1faf6,2 +np.float64,0x7febab693d7756d1,0x40734310a9ac828e,2 +np.float64,0x7fe809a69230134c,0x407342165b9acb69,2 +np.float64,0x3fc091d38f2123a7,0xbfec69a70fc23548,2 +np.float64,0x3fb2a8f5dc2551ec,0xbff2327f2641dd0d,2 +np.float64,0x7fc60b6fe02c16df,0x407337da5adc342c,2 +np.float64,0x3fefa53c3bbf4a78,0xbf73d1be15b73b00,2 +np.float64,0x7fee09c1717c1382,0x407343a2c479e1cb,2 +np.float64,0x8000000000000001,0x7ff8000000000000,2 +np.float64,0x3fede0b2733bc165,0xbf9e848ac2ecf604,2 +np.float64,0x3fee2ac331bc5586,0xbf9a3b699b721c9a,2 +np.float64,0x3fd4db12d829b626,0xbfdf2a413d1e453a,2 +np.float64,0x7fe605230dec0a45,0x4073417a67db06be,2 +np.float64,0x3fe378b2bf26f165,0xbfcb9dbb2b6d6832,2 +np.float64,0xc1d4c1ab83a98,0xc0733c60244cadbf,2 +np.float64,0x3feb15500e762aa0,0xbfb28c071d5efc22,2 +np.float64,0x3fe36225a626c44b,0xbfcbde4259e9047e,2 +np.float64,0x3fe7c586a72f8b0d,0xbfc08614b13ed4b2,2 +np.float64,0x7fb0f2d8cc21e5b1,0x40733135b2c7dd99,2 +np.float64,0x5957f3feb2aff,0xc07341c1df75638c,2 +np.float64,0x3fca4851bd3490a3,0xbfe6005ae5279485,2 +np.float64,0x824217d904843,0xc0733f232fd58f0f,2 +np.float64,0x4f9332269f267,0xc073428fd8e9cb32,2 +np.float64,0x3fea6f087374de11,0xbfb53ef0d03918b2,2 +np.float64,0x3fd9409ab4328135,0xbfd9d9231381e2b8,2 +np.float64,0x3fdba03b00374076,0xbfd759ec94a7ab5b,2 +np.float64,0x3fe0ce3766619c6f,0xbfd1e6912582ccf0,2 +np.float64,0x3fabd45ddc37a8bc,0xbff43c78d3188423,2 +np.float64,0x3fc3cadd592795bb,0xbfe9f1576c9b2c79,2 +np.float64,0x3fe10df049621be1,0xbfd17df2f2c28022,2 +np.float64,0x945b5d1328b6c,0xc0733e3bc06f1e75,2 +np.float64,0x7fc1c3742b2386e7,0x4073365a403d1051,2 +np.float64,0x7fdc957138b92ae1,0x40733e7977717586,2 +np.float64,0x7f943fa1a0287f42,0x407328d01de143f5,2 +np.float64,0x3fec9631c4392c64,0xbfa914b176d8f9d2,2 +np.float64,0x3fd8e7c008b1cf80,0xbfda3b9d9b6da8f4,2 +np.float64,0x7222f9fee4460,0xc073400e371516cc,2 +np.float64,0x3fe890e43eb121c8,0xbfbd64921462e823,2 +np.float64,0x3fcfd7fe2a3faffc,0xbfe3557e2f207800,2 +np.float64,0x3fed5dd1c1babba4,0xbfa318bb20db64e6,2 +np.float64,0x3fe6aa34c66d546a,0xbfc32c8a8991c11e,2 +np.float64,0x8ca79801196,0xc0736522bd5adf6a,2 +np.float64,0x3feb274079364e81,0xbfb2427b24b0ca20,2 +np.float64,0x7fe04927e4a0924f,0x40733f61c96f7f89,2 +np.float64,0x7c05f656f80bf,0xc0733f7a70555b4e,2 +np.float64,0x7fe97819eff2f033,0x4073427d4169b0f8,2 +np.float64,0x9def86e33bdf1,0xc0733dcc740b7175,2 +np.float64,0x7fedd1ef3f3ba3dd,0x40734395ceab8238,2 +np.float64,0x77bed86cef7dc,0xc0733fb8e0e9bf73,2 +np.float64,0x9274b41b24e97,0xc0733e52b16dff71,2 +np.float64,0x8010000000000000,0x7ff8000000000000,2 +np.float64,0x9c977855392ef,0xc0733ddba7d421d9,2 +np.float64,0xfb4560a3f68ac,0xc0733a9271e6a118,2 +np.float64,0xa67d9f394cfb4,0xc0733d6e9d58cc94,2 +np.float64,0x3fbfa766b03f4ecd,0xbfed0cccfecfc900,2 +np.float64,0x3fe177417522ee83,0xbfd0d45803bff01a,2 +np.float64,0x7fe85e077bb0bc0e,0x4073422e957a4aa3,2 +np.float64,0x7feeb0a6883d614c,0x407343c8f6568f7c,2 +np.float64,0xbab82edb75706,0xc0733ca2a2b20094,2 +np.float64,0xfadb44bdf5b69,0xc0733a9561b7ec04,2 +np.float64,0x3fefb9b82b3f7370,0xbf6ea776b2dcc3a9,2 +np.float64,0x7fe080ba8a610174,0x40733f795779b220,2 +np.float64,0x3f87faa1c02ff544,0xbffee76acafc92b7,2 +np.float64,0x7fed474108fa8e81,0x4073437531d4313e,2 +np.float64,0x3fdb7b229336f645,0xbfd77f583a4a067f,2 +np.float64,0x256dbf0c4adb9,0xc07347cd94e6fa81,2 +np.float64,0x3fd034ae25a0695c,0xbfe3169c15decdac,2 +np.float64,0x3a72177274e44,0xc07344b4cf7d68cd,2 +np.float64,0x7fa2522d5c24a45a,0x40732cef2f793470,2 +np.float64,0x3fb052bdde20a57c,0xbff3207fd413c848,2 +np.float64,0x3fdccfecbbb99fd9,0xbfd62ec04a1a687a,2 +np.float64,0x3fd403ac53280759,0xbfe027a31df2c8cc,2 +np.float64,0x3fab708e4036e11d,0xbff45591df4f2e8b,2 +np.float64,0x7fcfc001993f8002,0x40733a63539acf9d,2 +np.float64,0x3fd2b295dfa5652c,0xbfe119c1b476c536,2 +np.float64,0x7fe8061262b00c24,0x4073421552ae4538,2 +np.float64,0xffefffffffffffff,0x7ff8000000000000,2 +np.float64,0x7fed52093ffaa411,0x40734377c072a7e8,2 +np.float64,0xf3df902fe7bf2,0xc0733ac79a75ff7a,2 +np.float64,0x7fe13d382e227a6f,0x40733fc6fd0486bd,2 +np.float64,0x3621d5086c43b,0xc073453d31effbcd,2 +np.float64,0x3ff0000000000000,0x0,2 +np.float64,0x3fdaffea27b5ffd4,0xbfd7fd139dc1c2c5,2 +np.float64,0x7fea6536dc34ca6d,0x407342bccc564fdd,2 +np.float64,0x7fd478f00c28f1df,0x40733c27c0072fde,2 +np.float64,0x7fa72ef0502e5de0,0x40732e91e83db75c,2 +np.float64,0x7fd302970626052d,0x40733ba3ec6775f6,2 +np.float64,0x7fbb57ab0036af55,0x407334887348e613,2 +np.float64,0x3fda0ff722b41fee,0xbfd8f87b77930330,2 +np.float64,0x1e983ce23d309,0xc073493438f57e61,2 +np.float64,0x7fc90de97c321bd2,0x407338be01ffd4bd,2 +np.float64,0x7fe074b09c20e960,0x40733f7443f0dbe1,2 +np.float64,0x3fed5dec9fbabbd9,0xbfa317efb1fe8a95,2 +np.float64,0x7fdb877632b70eeb,0x40733e3697c88ba8,2 +np.float64,0x7fe4fb0067e9f600,0x40734124604b99e8,2 +np.float64,0x7fd447dc96288fb8,0x40733c1703ab2cce,2 +np.float64,0x3feb2d1e64f65a3d,0xbfb22a781df61c05,2 +np.float64,0xb6c8e6676d91d,0xc0733cc8859a0b91,2 +np.float64,0x3fdc3c2418387848,0xbfd6bec3a3c3cdb5,2 +np.float64,0x3fdecb9ccdbd973a,0xbfd4551c05721a8e,2 +np.float64,0x3feb1100e7762202,0xbfb29db911fe6768,2 +np.float64,0x3fe0444bc2a08898,0xbfd2ce69582e78c1,2 +np.float64,0x7fda403218b48063,0x40733de201d8340c,2 +np.float64,0x3fdc70421238e084,0xbfd68ba4bd48322b,2 +np.float64,0x3fe06e747c60dce9,0xbfd286bcac34a981,2 +np.float64,0x7fc1931d9623263a,0x407336473da54de4,2 +np.float64,0x229914da45323,0xc073485979ff141c,2 +np.float64,0x3fe142f92da285f2,0xbfd1280909992cb6,2 +np.float64,0xf1d02fa9e3a06,0xc0733ad6b19d71a0,2 +np.float64,0x3fb1fe9b0023fd36,0xbff27317d8252c16,2 +np.float64,0x3fa544b9242a8972,0xbff61ac38569bcfc,2 +np.float64,0x3feeb129d4fd6254,0xbf928f23ad20c1ee,2 +np.float64,0xa2510b7f44a22,0xc0733d9bc81ea0a1,2 +np.float64,0x3fca75694d34ead3,0xbfe5e8975b3646c2,2 +np.float64,0x7fece10621b9c20b,0x4073435cc3dd9a1b,2 +np.float64,0x7fe98a57d3b314af,0x4073428239b6a135,2 +np.float64,0x3fe259c62a64b38c,0xbfcee96682a0f355,2 +np.float64,0x3feaaa9b9d755537,0xbfb445779f3359af,2 +np.float64,0xdaadecfdb55be,0xc0733b899338432a,2 +np.float64,0x3fed00eae4fa01d6,0xbfa5dc8d77be5991,2 +np.float64,0x7fcc96c773392d8e,0x407339a8c5cd786e,2 +np.float64,0x3fef7b8b203ef716,0xbf7cff655ecb6424,2 +np.float64,0x7fd4008113a80101,0x40733bfe6552acb7,2 +np.float64,0x7fe99ff035b33fdf,0x407342881753ee2e,2 +np.float64,0x3ee031e87dc07,0xc0734432d736e492,2 +np.float64,0x3fddfe390f3bfc72,0xbfd510f1d9ec3e36,2 +np.float64,0x3fd9ddce74b3bb9d,0xbfd92e2d75a061bb,2 +np.float64,0x7fe5f742edebee85,0x40734176058e3a77,2 +np.float64,0x3fdb04185b360831,0xbfd7f8c63aa5e1c4,2 +np.float64,0xea2b0f43d4562,0xc0733b0fd77c8118,2 +np.float64,0x7fc3f4973527e92d,0x407337293bbb22c4,2 +np.float64,0x3fb9adfb38335bf6,0xbfeff4f3ea85821a,2 +np.float64,0x87fb98750ff73,0xc0733ed6ad83c269,2 +np.float64,0x3fe005721a200ae4,0xbfd33a9f1ebfb0ac,2 +np.float64,0xd9e04fe7b3c0a,0xc0733b901ee257f3,2 +np.float64,0x2c39102658723,0xc07346a4db63bf55,2 +np.float64,0x3f7dc28e003b851c,0xc0011c1d1233d948,2 +np.float64,0x3430fd3868620,0xc073457e24e0b70d,2 +np.float64,0xbff0000000000000,0x7ff8000000000000,2 +np.float64,0x3fd23e45e0247c8c,0xbfe17146bcf87b57,2 +np.float64,0x6599df3ecb33d,0xc07340dd2c41644c,2 +np.float64,0x3fdf074f31be0e9e,0xbfd41f6e9dbb68a5,2 +np.float64,0x7fdd6233f3bac467,0x40733eaa8f674b72,2 +np.float64,0x7fe03e8481607d08,0x40733f5d3df3b087,2 +np.float64,0x3fcc3b79f13876f4,0xbfe501bf3b379b77,2 +np.float64,0xe5d97ae3cbb30,0xc0733b30f47cbd12,2 +np.float64,0x8acbc4a115979,0xc0733eb240a4d2c6,2 +np.float64,0x3fedbdbc48bb7b79,0xbfa0470fd70c4359,2 +np.float64,0x3fde1611103c2c22,0xbfd4fae1fa8e7e5e,2 +np.float64,0x3fe09478bd2128f1,0xbfd246b7e85711dc,2 +np.float64,0x3fd6dfe8f3adbfd2,0xbfdc98ca2f32c1ad,2 +np.float64,0x72ccf274e599f,0xc0734003e5b0da63,2 +np.float64,0xe27c7265c4f8f,0xc0733b4b2d808566,2 +np.float64,0x7fee3161703c62c2,0x407343abe90f5649,2 +np.float64,0xf54fb5c1eaa0,0xc0734e01384fcf78,2 +np.float64,0xcde5924d9bcb3,0xc0733bf4b83c66c2,2 +np.float64,0x3fc46fdbe528dfb8,0xbfe97f55ef5e9683,2 +np.float64,0x7fe513528a2a26a4,0x4073412c69baceca,2 +np.float64,0x3fd29eca4aa53d95,0xbfe128801cd33ed0,2 +np.float64,0x7febb21718b7642d,0x4073431256def857,2 +np.float64,0x3fcab536c0356a6e,0xbfe5c73c59f41578,2 +np.float64,0x7fc7e9f0d82fd3e1,0x4073386b213e5dfe,2 +np.float64,0xb5b121276b624,0xc0733cd33083941c,2 +np.float64,0x7e0dd9bcfc1bc,0xc0733f5d8bf35050,2 +np.float64,0x3fd1c75106238ea2,0xbfe1cd11cccda0f4,2 +np.float64,0x9f060e673e0c2,0xc0733dc03da71909,2 +np.float64,0x7fd915a2f3322b45,0x40733d912af07189,2 +np.float64,0x3fd8cbae4431975d,0xbfda5b02ca661139,2 +np.float64,0x3fde8b411f3d1682,0xbfd48f6f710a53b6,2 +np.float64,0x3fc17a780622f4f0,0xbfebabb10c55255f,2 +np.float64,0x3fde5cbe5f3cb97d,0xbfd4b9e2e0101fb1,2 +np.float64,0x7fd859036530b206,0x40733d5c2252ff81,2 +np.float64,0xb0f5040f61ea1,0xc0733d02292f527b,2 +np.float64,0x3fde5c49ae3cb893,0xbfd4ba4db3ce2cf3,2 +np.float64,0x3fecc4518df988a3,0xbfa7af0bfc98bc65,2 +np.float64,0x3feffee03cbffdc0,0xbf0f3ede6ca7d695,2 +np.float64,0xbc5eac9b78bd6,0xc0733c92fb51c8ae,2 +np.float64,0x3fe2bb4ef765769e,0xbfcdc4f70a65dadc,2 +np.float64,0x5089443ca1129,0xc073427a7d0cde4a,2 +np.float64,0x3fd0d6e29121adc5,0xbfe28e28ece1db86,2 +np.float64,0xbe171e397c2e4,0xc0733c82cede5d02,2 +np.float64,0x4ede27be9dbc6,0xc073429fba1a4af1,2 +np.float64,0x3fe2aff3af655fe7,0xbfcde6b52a8ed3c1,2 +np.float64,0x7fd85ca295b0b944,0x40733d5d2adcccf1,2 +np.float64,0x24919bba49234,0xc07347f6ed704a6f,2 +np.float64,0x7fd74bc1eeae9783,0x40733d0d94a89011,2 +np.float64,0x3fc1cd12cb239a26,0xbfeb6a9c25c2a11d,2 +np.float64,0x3fdafbc0ac35f781,0xbfd8015ccf1f1b51,2 +np.float64,0x3fee01327c3c0265,0xbf9ca1d0d762dc18,2 +np.float64,0x3fe65bd7702cb7af,0xbfc3ee0de5c36b8d,2 +np.float64,0x7349c82ee693a,0xc0733ffc5b6eccf2,2 +np.float64,0x3fdc5906f738b20e,0xbfd6a26288eb5933,2 +np.float64,0x1,0xc07434e6420f4374,2 +np.float64,0x3fb966128a32cc25,0xbff00e0aa7273838,2 +np.float64,0x3fd501ff9a2a03ff,0xbfdef69133482121,2 +np.float64,0x194d4f3c329ab,0xc0734a861b44cfbe,2 +np.float64,0x3fec5d34f8f8ba6a,0xbfaad1b31510e70b,2 +np.float64,0x1635e4c22c6be,0xc0734b6dec650943,2 +np.float64,0x3fead2f8edb5a5f2,0xbfb39dac30a962cf,2 +np.float64,0x3f7dfa4ce03bf49a,0xc00115a112141aa7,2 +np.float64,0x3fef6827223ed04e,0xbf80a42c9edebfe9,2 +np.float64,0xe771f303cee3f,0xc0733b24a6269fe4,2 +np.float64,0x1160ccc622c1b,0xc0734d22604eacb9,2 +np.float64,0x3fc485cd08290b9a,0xbfe970723008c8c9,2 +np.float64,0x7fef99c518bf3389,0x407343fcf9ed202f,2 +np.float64,0x7fd8c1447a318288,0x40733d79a440b44d,2 +np.float64,0xaf219f955e434,0xc0733d149c13f440,2 +np.float64,0xcf45f6239e8bf,0xc0733be8ddda045d,2 +np.float64,0x7599394aeb328,0xc0733fd90fdbb0ea,2 +np.float64,0xc7f6390f8fec7,0xc0733c28bfbc66a3,2 +np.float64,0x3fd39ae96c2735d3,0xbfe0712274a8742b,2 +np.float64,0xa4d6c18f49ad8,0xc0733d805a0528f7,2 +np.float64,0x7fd9ea78d7b3d4f1,0x40733dcb2b74802a,2 +np.float64,0x3fecd251cb39a4a4,0xbfa742ed41d4ae57,2 +np.float64,0x7fed7a07cd7af40f,0x407343813476027e,2 +np.float64,0x3fd328ae7f26515d,0xbfe0c30b56a83c64,2 +np.float64,0x7fc937ff7a326ffe,0x407338c9a45b9140,2 +np.float64,0x3fcf1d31143e3a62,0xbfe3a7f760fbd6a8,2 +np.float64,0x7fb911dcbc3223b8,0x407333ee158cccc7,2 +np.float64,0x3fd352fc83a6a5f9,0xbfe0a47d2f74d283,2 +np.float64,0x7fd310753fa620e9,0x40733ba8fc4300dd,2 +np.float64,0x3febd64b4577ac97,0xbfaefd4a79f95c4b,2 +np.float64,0x6a6961a4d4d2d,0xc073408ae1687943,2 +np.float64,0x3fe4ba73d16974e8,0xbfc8239341b9e457,2 +np.float64,0x3fed8e7cac3b1cf9,0xbfa1a96a0cc5fcdc,2 +np.float64,0x7fd505ec04aa0bd7,0x40733c56f86e3531,2 +np.float64,0x3fdf166e9abe2cdd,0xbfd411e5f8569d70,2 +np.float64,0x7fe1bc6434e378c7,0x40733ff9861bdabb,2 +np.float64,0x3fd3b0b175a76163,0xbfe061ba5703f3c8,2 +np.float64,0x7fed75d7ffbaebaf,0x4073438037ba6f19,2 +np.float64,0x5a9e109cb53c3,0xc07341a8b04819c8,2 +np.float64,0x3fe14786b4e28f0d,0xbfd120b541bb880e,2 +np.float64,0x3fed4948573a9291,0xbfa3b471ff91614b,2 +np.float64,0x66aac5d8cd559,0xc07340ca9b18af46,2 +np.float64,0x3fdb48efd23691e0,0xbfd7b24c5694838b,2 +np.float64,0x7fe6da7d1eadb4f9,0x407341bc7d1fae43,2 +np.float64,0x7feb702cf336e059,0x40734301b96cc3c0,2 +np.float64,0x3fd1e60987a3cc13,0xbfe1b522cfcc3d0e,2 +np.float64,0x3feca57f50794aff,0xbfa89dc90625d39c,2 +np.float64,0x7fdc46dc56b88db8,0x40733e664294a0f9,2 +np.float64,0x8dc8fd811b920,0xc0733e8c5955df06,2 +np.float64,0xf01634abe02c7,0xc0733ae370a76d0c,2 +np.float64,0x3fc6f8d8ab2df1b1,0xbfe7df5093829464,2 +np.float64,0xda3d7597b47af,0xc0733b8d2702727a,2 +np.float64,0x7feefd53227dfaa5,0x407343da3d04db28,2 +np.float64,0x3fe2fbca3525f794,0xbfcd06e134417c08,2 +np.float64,0x7fd36d3ce226da79,0x40733bca7c322df1,2 +np.float64,0x7fec37e00b786fbf,0x4073433397b48a5b,2 +np.float64,0x3fbf133f163e267e,0xbfed4e72f1362a77,2 +np.float64,0x3fc11efbb9223df7,0xbfebf53002a561fe,2 +np.float64,0x3fc89c0e5431381d,0xbfe6ea562364bf81,2 +np.float64,0x3f9cd45da839a8bb,0xbff8ceb14669ee4b,2 +np.float64,0x23dc8fa647b93,0xc0734819aaa9b0ee,2 +np.float64,0x3fe829110d305222,0xbfbf3e60c45e2399,2 +np.float64,0x7fed8144e57b0289,0x40734382e917a02a,2 +np.float64,0x7fe033fbf7a067f7,0x40733f58bb00b20f,2 +np.float64,0xe3807f45c7010,0xc0733b43379415d1,2 +np.float64,0x3fd708fb342e11f6,0xbfdc670ef9793782,2 +np.float64,0x3fe88c924b311925,0xbfbd78210d9e7164,2 +np.float64,0x3fe0a2a7c7614550,0xbfd22efaf0472c4a,2 +np.float64,0x7fe3a37501a746e9,0x407340aecaeade41,2 +np.float64,0x3fd05077ec20a0f0,0xbfe2fedbf07a5302,2 +np.float64,0x7fd33bf61da677eb,0x40733bb8c58912aa,2 +np.float64,0x3feb29bdae76537b,0xbfb2384a8f61b5f9,2 +np.float64,0x3fec0fc14ff81f83,0xbfad3423e7ade174,2 +np.float64,0x3fd0f8b1a1a1f163,0xbfe2725dd4ccea8b,2 +np.float64,0x3fe382d26a6705a5,0xbfcb80dba4218bdf,2 +np.float64,0x3fa873f2cc30e7e6,0xbff522911cb34279,2 +np.float64,0x7fed7fd7377affad,0x4073438292f6829b,2 +np.float64,0x3feeacd8067d59b0,0xbf92cdbeda94b35e,2 +np.float64,0x7fe464d62228c9ab,0x407340f1eee19aa9,2 +np.float64,0xe997648bd32ed,0xc0733b143aa0fad3,2 +np.float64,0x7fea4869f13490d3,0x407342b5333b54f7,2 +np.float64,0x935b871926b71,0xc0733e47c6683319,2 +np.float64,0x28a9d0c05155,0xc0735a7e3532af83,2 +np.float64,0x79026548f204d,0xc0733fa6339ffa2f,2 +np.float64,0x3fdb1daaabb63b55,0xbfd7de839c240ace,2 +np.float64,0x3fc0db73b421b6e7,0xbfec2c6e36c4f416,2 +np.float64,0xb8b50ac1716b,0xc0734ff9fc60ebce,2 +np.float64,0x7fdf13e0c6be27c1,0x40733f0e44f69437,2 +np.float64,0x3fcd0cb97b3a1973,0xbfe49c34ff531273,2 +np.float64,0x3fcbac034b375807,0xbfe54913d73f180d,2 +np.float64,0x3fe091d2a2e123a5,0xbfd24b290a9218de,2 +np.float64,0xede43627dbc87,0xc0733af3c7c7f716,2 +np.float64,0x7fc037e7ed206fcf,0x407335b85fb0fedb,2 +np.float64,0x3fce7ae4c63cf5ca,0xbfe3f1350fe03f28,2 +np.float64,0x7fcdd862263bb0c3,0x407339f5458bb20e,2 +np.float64,0x4d7adf709af5d,0xc07342bf4edfadb2,2 +np.float64,0xdc6c03f3b8d81,0xc0733b7b74d6a635,2 +np.float64,0x3fe72ae0a4ee55c1,0xbfc1f4665608b21f,2 +np.float64,0xcd62f19d9ac5e,0xc0733bf92235e4d8,2 +np.float64,0xe3a7b8fdc74f7,0xc0733b4204f8e166,2 +np.float64,0x3fdafd35adb5fa6b,0xbfd7ffdca0753b36,2 +np.float64,0x3fa023e8702047d1,0xbff8059150ea1464,2 +np.float64,0x99ff336933fe7,0xc0733df961197517,2 +np.float64,0x7feeb365b9bd66ca,0x407343c995864091,2 +np.float64,0x7fe449b49f689368,0x407340e8aa3369e3,2 +np.float64,0x7faf5843043eb085,0x407330aa700136ca,2 +np.float64,0x3fd47b2922a8f652,0xbfdfab3de86f09ee,2 +np.float64,0x7fd9fc3248b3f864,0x40733dcfea6f9b3e,2 +np.float64,0xe20b0d8dc4162,0xc0733b4ea8fe7b3f,2 +np.float64,0x7feff8e0e23ff1c1,0x40734411c490ed70,2 +np.float64,0x7fa58382d02b0705,0x40732e0cf28e14fe,2 +np.float64,0xb8ad9a1b715b4,0xc0733cb630b8f2d4,2 +np.float64,0xe90abcf1d2158,0xc0733b186b04eeee,2 +np.float64,0x7fd6aa6f32ad54dd,0x40733cdccc636604,2 +np.float64,0x3fd8f84eedb1f09e,0xbfda292909a5298a,2 +np.float64,0x7fecd6b1d9f9ad63,0x4073435a472b05b5,2 +np.float64,0x3fd9f47604b3e8ec,0xbfd915e028cbf4a6,2 +np.float64,0x3fd20d9398241b27,0xbfe19691363dd508,2 +np.float64,0x3fe5ed09bbabda13,0xbfc5043dfc9c8081,2 +np.float64,0x7fbe5265363ca4c9,0x407335406f8e4fac,2 +np.float64,0xac2878af5850f,0xc0733d3311be9786,2 +np.float64,0xac2074555840f,0xc0733d3364970018,2 +np.float64,0x3fcd49b96b3a9373,0xbfe47f24c8181d9c,2 +np.float64,0x3fd10caca6a21959,0xbfe2620ae5594f9a,2 +np.float64,0xec5b87e9d8b71,0xc0733aff499e72ca,2 +np.float64,0x9d5e9fad3abd4,0xc0733dd2d70eeb4a,2 +np.float64,0x7fe3d3a24227a744,0x407340bfc2072fdb,2 +np.float64,0x3fc5f7a77c2bef4f,0xbfe87e69d502d784,2 +np.float64,0x33161a66662c4,0xc07345a436308244,2 +np.float64,0xa27acdc744f5a,0xc0733d99feb3d8ea,2 +np.float64,0x3fe2d9301565b260,0xbfcd6c914e204437,2 +np.float64,0x7fd5d111e12ba223,0x40733c98e14a6fd0,2 +np.float64,0x6c3387bed8672,0xc073406d3648171a,2 +np.float64,0x24d89fe849b15,0xc07347e97bec008c,2 +np.float64,0x3fefd763677faec7,0xbf61ae69caa9cad9,2 +np.float64,0x7fe0a4684ba148d0,0x40733f884d32c464,2 +np.float64,0x3fd5c3c939ab8792,0xbfddfaaefc1c7fca,2 +np.float64,0x3fec9b87a6b9370f,0xbfa8eb34efcc6b9b,2 +np.float64,0x3feb062431f60c48,0xbfb2ca6036698877,2 +np.float64,0x3fef97f6633f2fed,0xbf76bc742860a340,2 +np.float64,0x74477490e88ef,0xc0733fed220986bc,2 +np.float64,0x3fe4bea67ce97d4d,0xbfc818525292b0f6,2 +np.float64,0x3fc6add3a92d5ba7,0xbfe80cfdc9a90bda,2 +np.float64,0x847c9ce308f94,0xc0733f05026f5965,2 +np.float64,0x7fea53fd2eb4a7f9,0x407342b841fc4723,2 +np.float64,0x3fc55a16fc2ab42e,0xbfe8e3849130da34,2 +np.float64,0x3fbdf7d07c3befa1,0xbfedcf84b9c6c161,2 +np.float64,0x3fe5fb25aa6bf64b,0xbfc4e083ff96b116,2 +np.float64,0x61c776a8c38ef,0xc0734121611d84d7,2 +np.float64,0x3fec413164f88263,0xbfabadbd05131546,2 +np.float64,0x9bf06fe137e0e,0xc0733de315469ee0,2 +np.float64,0x2075eefc40ebf,0xc07348cae84de924,2 +np.float64,0x3fdd42e0143a85c0,0xbfd5c0b6f60b3cea,2 +np.float64,0xdbb1ab45b7636,0xc0733b8157329daf,2 +np.float64,0x3feac6d56bf58dab,0xbfb3d00771b28621,2 +np.float64,0x7fb2dc825025b904,0x407331f3e950751a,2 +np.float64,0x3fecea6efd79d4de,0xbfa689309cc0e3fe,2 +np.float64,0x3fd83abec7b0757e,0xbfdaff5c674a9c59,2 +np.float64,0x3fd396f7c0272df0,0xbfe073ee75c414ba,2 +np.float64,0x3fe10036c162006e,0xbfd1945a38342ae1,2 +np.float64,0x3fd5bbded52b77be,0xbfde04cca40d4156,2 +np.float64,0x3fe870945ab0e129,0xbfbdf72f0e6206fa,2 +np.float64,0x3fef72fddcbee5fc,0xbf7ee2dba88b1bad,2 +np.float64,0x4e111aa09c224,0xc07342b1e2b29643,2 +np.float64,0x3fd926d8b5b24db1,0xbfd9f58b78d6b061,2 +np.float64,0x3fc55679172aacf2,0xbfe8e5df687842e2,2 +np.float64,0x7f5f1749803e2e92,0x40731886e16cfc4d,2 +np.float64,0x7fea082b53b41056,0x407342a42227700e,2 +np.float64,0x3fece1d1d039c3a4,0xbfa6cb780988a469,2 +np.float64,0x3b2721d8764e5,0xc073449f6a5a4832,2 +np.float64,0x365cb7006cba,0xc0735879ba5f0b6e,2 +np.float64,0x7ff4000000000000,0x7ffc000000000000,2 +np.float64,0x7fe606ce92ac0d9c,0x4073417aeebe97e8,2 +np.float64,0x3fe237b544a46f6b,0xbfcf50f8f76d7df9,2 +np.float64,0x3fe7265e5eee4cbd,0xbfc1ff39089ec8d0,2 +np.float64,0x7fe2bb3c5ea57678,0x4073405aaad81cf2,2 +np.float64,0x3fd811df84b023bf,0xbfdb2e670ea8d8de,2 +np.float64,0x3f6a0efd00341dfa,0xc003fac1ae831241,2 +np.float64,0x3fd0d214afa1a429,0xbfe2922080a91c72,2 +np.float64,0x3feca6a350b94d47,0xbfa894eea3a96809,2 +np.float64,0x7fe23e5c76247cb8,0x4073402bbaaf71c7,2 +np.float64,0x3fe739a1fdae7344,0xbfc1d109f66efb5d,2 +np.float64,0x3fdf4b8e283e971c,0xbfd3e28f46169cc5,2 +np.float64,0x38f2535271e4b,0xc07344e3085219fa,2 +np.float64,0x7fd263a0f9a4c741,0x40733b68d945dae0,2 +np.float64,0x7fdd941863bb2830,0x40733eb651e3dca9,2 +np.float64,0xace7279159ce5,0xc0733d2b63b5947e,2 +np.float64,0x7fe34670b2268ce0,0x4073408d92770cb5,2 +np.float64,0x7fd11fa6dfa23f4d,0x40733aea02e76ea3,2 +np.float64,0x3fe6d9cbca6db398,0xbfc2b84b5c8c7eab,2 +np.float64,0x3fd69a0274ad3405,0xbfdcee3c7e52c463,2 +np.float64,0x3feb5af671f6b5ed,0xbfb16f88d739477f,2 +np.float64,0x3feea400163d4800,0xbf934e071c64fd0b,2 +np.float64,0x3fefd6bcf17fad7a,0xbf61f711c392b119,2 +np.float64,0x3fe148d43da291a8,0xbfd11e9cd3f91cd3,2 +np.float64,0x7fedf1308b7be260,0x4073439d135656da,2 +np.float64,0x3fe614c99c6c2993,0xbfc49fd1984dfd6d,2 +np.float64,0xd6e8d4e5add1b,0xc0733ba88256026e,2 +np.float64,0xfff0000000000000,0x7ff8000000000000,2 +np.float64,0x3fb530b5562a616b,0xbff1504bcc5c8f73,2 +np.float64,0xb7da68396fb4d,0xc0733cbe2790f52e,2 +np.float64,0x7fad78e26c3af1c4,0x4073303cdbfb0a15,2 +np.float64,0x7fee5698447cad30,0x407343b474573a8b,2 +np.float64,0x3fd488325c291065,0xbfdf999296d901e7,2 +np.float64,0x2669283a4cd26,0xc073479f823109a4,2 +np.float64,0x7fef3b090afe7611,0x407343e805a3b264,2 +np.float64,0x7fe8b96ae0f172d5,0x4073424874a342ab,2 +np.float64,0x7fef409f56fe813e,0x407343e943c3cd44,2 +np.float64,0x3fed28073dfa500e,0xbfa4b17e4cd31a3a,2 +np.float64,0x7f87ecc4802fd988,0x40732527e027b24b,2 +np.float64,0x3fdda24da0bb449b,0xbfd566a43ac035af,2 +np.float64,0x179fc9e62f3fa,0xc0734b0028c80fc1,2 +np.float64,0x3fef85b0927f0b61,0xbf7ac27565d5ab4f,2 +np.float64,0x5631501aac62b,0xc0734201be12c5d4,2 +np.float64,0x3fd782e424af05c8,0xbfdbd57544f8a7c3,2 +np.float64,0x3fe603a9a6ac0753,0xbfc4caff04dc3caf,2 +np.float64,0x7fbd5225163aa449,0x40733504b88f0a56,2 +np.float64,0x3fecd27506b9a4ea,0xbfa741dd70e6b08c,2 +np.float64,0x9c99603b3932c,0xc0733ddb922dc5db,2 +np.float64,0x3fbeb57f1a3d6afe,0xbfed789ff217aa08,2 +np.float64,0x3fef9c0f85bf381f,0xbf75d5c3d6cb281a,2 +np.float64,0x3fde4afb613c95f7,0xbfd4ca2a231c9005,2 +np.float64,0x396233d472c47,0xc07344d56ee70631,2 +np.float64,0x3fb31ea1c6263d44,0xbff207356152138d,2 +np.float64,0x3fe50bdf78aa17bf,0xbfc74ae0cbffb735,2 +np.float64,0xef74c701dee99,0xc0733ae81e4bb443,2 +np.float64,0x9a3e13a1347c3,0xc0733df68b60afc7,2 +np.float64,0x33ba4f886774b,0xc073458e03f0c13e,2 +np.float64,0x3fe8ba0e9931741d,0xbfbcaadf974e8f64,2 +np.float64,0x3fe090a4cd61214a,0xbfd24d236cf365d6,2 +np.float64,0x7fd87d992930fb31,0x40733d668b73b820,2 +np.float64,0x3fe6422b296c8456,0xbfc42e070b695d01,2 +np.float64,0x3febe9334677d267,0xbfae667864606cfe,2 +np.float64,0x771a3ce4ee348,0xc0733fc274d12c97,2 +np.float64,0x3fe0413542e0826b,0xbfd2d3b08fb5b8a6,2 +np.float64,0x3fd00870ea2010e2,0xbfe33cc04cbd42e0,2 +np.float64,0x3fe74fb817ae9f70,0xbfc19c45dbf919e1,2 +np.float64,0x40382fa08071,0xc07357514ced5577,2 +np.float64,0xa14968474292d,0xc0733da71a990f3a,2 +np.float64,0x5487c740a90fa,0xc0734224622d5801,2 +np.float64,0x3fed7d8d14fafb1a,0xbfa228f7ecc2ac03,2 +np.float64,0x3fe39bb485e73769,0xbfcb3a235a722960,2 +np.float64,0x3fd01090b2202121,0xbfe335b752589a22,2 +np.float64,0x3fd21a3e7da4347d,0xbfe18cd435a7c582,2 +np.float64,0x3fe7fa855a2ff50b,0xbfc00ab0665709fe,2 +np.float64,0x3fedc0d4577b81a9,0xbfa02fef3ff553fc,2 +np.float64,0x3fe99d4906333a92,0xbfb8bf18220e5e8e,2 +np.float64,0x3fd944ee3c3289dc,0xbfd9d46071675e73,2 +np.float64,0x3fe3ed8d52e7db1b,0xbfca53f8d4aef484,2 +np.float64,0x7fe748623a6e90c3,0x407341dd97c9dd79,2 +np.float64,0x3fea1b4b98343697,0xbfb6a1560a56927f,2 +np.float64,0xe1215715c242b,0xc0733b55dbf1f0a8,2 +np.float64,0x3fd0d5bccca1ab7a,0xbfe28f1b66d7a470,2 +np.float64,0x881a962710353,0xc0733ed51848a30d,2 +np.float64,0x3fcf022afe3e0456,0xbfe3b40eabf24501,2 +np.float64,0x3fdf1ac6bbbe358d,0xbfd40e03e888288d,2 +np.float64,0x3fa51a5eac2a34bd,0xbff628a7c34d51b3,2 +np.float64,0x3fdbaf408d375e81,0xbfd74ad39d97c92a,2 +np.float64,0x3fcd2418ea3a4832,0xbfe4910b009d8b11,2 +np.float64,0x3fc7b3062a2f660c,0xbfe7706dc47993e1,2 +np.float64,0x7fb8232218304643,0x407333aaa7041a9f,2 +np.float64,0x7fd5f186362be30b,0x40733ca32fdf9cc6,2 +np.float64,0x3fe57ef1d6aafde4,0xbfc61e23d00210c7,2 +np.float64,0x7c6830baf8d07,0xc0733f74f19e9dad,2 +np.float64,0xcacbfd5595980,0xc0733c0fb49edca7,2 +np.float64,0x3fdfdeac873fbd59,0xbfd36114c56bed03,2 +np.float64,0x3fd31f0889263e11,0xbfe0ca0cc1250169,2 +np.float64,0x3fe839fbe47073f8,0xbfbef0a2abc3d63f,2 +np.float64,0x3fc36af57e26d5eb,0xbfea3553f38770b7,2 +np.float64,0x3fe73dbc44ee7b79,0xbfc1c738f8fa6b3d,2 +np.float64,0x3fd3760e4da6ec1d,0xbfe08b5b609d11e5,2 +np.float64,0x3fee1cfa297c39f4,0xbf9b06d081bc9d5b,2 +np.float64,0xdfb01561bf61,0xc0734ea55e559888,2 +np.float64,0x687bd01cd0f7b,0xc07340ab67fe1816,2 +np.float64,0x3fefc88f4cbf911f,0xbf6828c359cf19dc,2 +np.float64,0x8ad34adb15a6a,0xc0733eb1e03811e5,2 +np.float64,0x3fe2b49c12e56938,0xbfcdd8dbdbc0ce59,2 +np.float64,0x6e05037adc0a1,0xc073404f91261635,2 +np.float64,0x3fe2fd737fe5fae7,0xbfcd020407ef4d78,2 +np.float64,0x3fd0f3c0dc21e782,0xbfe2766a1ab02eae,2 +np.float64,0x28564d9850acb,0xc073474875f87c5e,2 +np.float64,0x3fe4758015a8eb00,0xbfc8ddb45134a1bd,2 +np.float64,0x7fe7f19306efe325,0x4073420f626141a7,2 +np.float64,0x7fd27f34c0a4fe69,0x40733b733d2a5b50,2 +np.float64,0x92c2366325847,0xc0733e4f04f8195a,2 +np.float64,0x3fc21f8441243f09,0xbfeb2ad23bc1ab0b,2 +np.float64,0x3fc721d3e42e43a8,0xbfe7c69bb47b40c2,2 +np.float64,0x3fe2f11a1625e234,0xbfcd26363b9c36c3,2 +np.float64,0x3fdcb585acb96b0b,0xbfd648446237cb55,2 +np.float64,0x3fd4060bf2280c18,0xbfe025fd4c8a658b,2 +np.float64,0x7fb8ae2750315c4e,0x407333d23b025d08,2 +np.float64,0x3fe3a03119a74062,0xbfcb2d6c91b38552,2 +np.float64,0x7fdd2af92bba55f1,0x40733e9d737e16e6,2 +np.float64,0x3fe50b05862a160b,0xbfc74d20815fe36b,2 +np.float64,0x164409f82c882,0xc0734b6980e19c03,2 +np.float64,0x3fe4093712a8126e,0xbfca070367fda5e3,2 +np.float64,0xae3049935c609,0xc0733d1e3608797b,2 +np.float64,0x3fd71df4b4ae3be9,0xbfdc4dcb7637600d,2 +np.float64,0x7fca01e8023403cf,0x407339006c521c49,2 +np.float64,0x3fb0c5c43e218b88,0xbff2f03211c63f25,2 +np.float64,0x3fee757af83ceaf6,0xbf95f33a6e56b454,2 +np.float64,0x3f865f1f402cbe3f,0xbfff62d9c9072bd7,2 +np.float64,0x89864e95130ca,0xc0733ec29f1e32c6,2 +np.float64,0x3fe51482bcea2905,0xbfc73414ddc8f1b7,2 +np.float64,0x7fd802f8fa3005f1,0x40733d43684e460a,2 +np.float64,0x3fbeb86ca63d70d9,0xbfed774ccca9b8f5,2 +np.float64,0x3fb355dcc826abba,0xbff1f33f9339e7a3,2 +np.float64,0x3fe506c61eaa0d8c,0xbfc7585a3f7565a6,2 +np.float64,0x7fe393f25ba727e4,0x407340a94bcea73b,2 +np.float64,0xf66f532decdeb,0xc0733ab5041feb0f,2 +np.float64,0x3fe26e872be4dd0e,0xbfceaaab466f32e0,2 +np.float64,0x3fefd9e290bfb3c5,0xbf60977d24496295,2 +np.float64,0x7fe19c5f692338be,0x40733fecef53ad95,2 +np.float64,0x3fe80365ab3006cb,0xbfbfec4090ef76ec,2 +np.float64,0x3fe88ab39eb11567,0xbfbd8099388d054d,2 +np.float64,0x3fe68fb09fad1f61,0xbfc36db9de38c2c0,2 +np.float64,0x3fe9051883b20a31,0xbfbb5b75b8cb8f24,2 +np.float64,0x3fd4708683a8e10d,0xbfdfb9b085dd8a83,2 +np.float64,0x3fe00ac11a601582,0xbfd3316af3e43500,2 +np.float64,0xd16af30ba2d5f,0xc0733bd68e8252f9,2 +np.float64,0x3fb97d654632facb,0xbff007ac1257f575,2 +np.float64,0x7fd637c10fac6f81,0x40733cb949d76546,2 +np.float64,0x7fed2cab6dba5956,0x4073436edfc3764e,2 +np.float64,0x3fed04afbbba095f,0xbfa5bfaa5074b7f4,2 +np.float64,0x0,0xfff0000000000000,2 +np.float64,0x389a1dc671345,0xc07344edd4206338,2 +np.float64,0x3fbc9ba25a393745,0xbfee74c34f49b921,2 +np.float64,0x3feee749947dce93,0xbf8f032d9cf6b5ae,2 +np.float64,0xedc4cf89db89a,0xc0733af4b2a57920,2 +np.float64,0x3fe41629eba82c54,0xbfc9e321faf79e1c,2 +np.float64,0x3feb0bcbf7b61798,0xbfb2b31e5d952869,2 +np.float64,0xad60654b5ac0d,0xc0733d26860df676,2 +np.float64,0x3fe154e1ff22a9c4,0xbfd10b416e58c867,2 +np.float64,0x7fb20e9c8a241d38,0x407331a66453b8bc,2 +np.float64,0x7fcbbaaf7d37755e,0x4073397274f28008,2 +np.float64,0x187d0fbc30fa3,0xc0734ac03cc98cc9,2 +np.float64,0x7fd153afeaa2a75f,0x40733aff00b4311d,2 +np.float64,0x3fe05310a5e0a621,0xbfd2b5386aeecaac,2 +np.float64,0x7fea863b2b750c75,0x407342c57807f700,2 +np.float64,0x3fed5f0c633abe19,0xbfa30f6cfbc4bf94,2 +np.float64,0xf227c8b3e44f9,0xc0733ad42daaec9f,2 +np.float64,0x3fe956524772aca5,0xbfb9f4cabed7081d,2 +np.float64,0xefd11af7dfa24,0xc0733ae570ed2552,2 +np.float64,0x1690fff02d221,0xc0734b51a56c2980,2 +np.float64,0x7fd2e547a825ca8e,0x40733b992d6d9635,2 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-log1p.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-log1p.csv new file mode 100644 index 0000000..6e4f88b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-log1p.csv @@ -0,0 +1,1429 @@ +dtype,input,output,ulperrortol +np.float32,0x3e10aca8,0x3e075347,2 +np.float32,0x3f776e66,0x3f2d2003,2 +np.float32,0xbf34e8ce,0xbf9cfd5c,2 +np.float32,0xbf0260ee,0xbf363f69,2 +np.float32,0x3ed285e8,0x3eb05870,2 +np.float32,0x262b88,0x262b88,2 +np.float32,0x3eeffd6c,0x3ec4cfdb,2 +np.float32,0x3ee86808,0x3ebf9f54,2 +np.float32,0x3f36eba8,0x3f0a0524,2 +np.float32,0xbf1c047a,0xbf70afc7,2 +np.float32,0x3ead2916,0x3e952902,2 +np.float32,0x61c9c9,0x61c9c9,2 +np.float32,0xff7fffff,0xffc00000,2 +np.float32,0x7f64ee52,0x42b138e0,2 +np.float32,0x7ed00b1e,0x42afa4ff,2 +np.float32,0x3db53340,0x3dada0b2,2 +np.float32,0x3e6b0a4a,0x3e5397a4,2 +np.float32,0x7ed5d64f,0x42afb310,2 +np.float32,0xbf12bc5f,0xbf59f5ee,2 +np.float32,0xbda12710,0xbda7d8b5,2 +np.float32,0xbe2e89d8,0xbe3f5a9f,2 +np.float32,0x3f5bee75,0x3f1ebea4,2 +np.float32,0x9317a,0x9317a,2 +np.float32,0x7ee00130,0x42afcad8,2 +np.float32,0x7ef0d16d,0x42afefe7,2 +np.float32,0xbec7463a,0xbefc6a44,2 +np.float32,0xbf760ecc,0xc04fe59c,2 +np.float32,0xbecacb3c,0xbf011ae3,2 +np.float32,0x3ead92be,0x3e9577f0,2 +np.float32,0xbf41510d,0xbfb41b3a,2 +np.float32,0x7f71d489,0x42b154f1,2 +np.float32,0x8023bcd5,0x8023bcd5,2 +np.float32,0x801d33d8,0x801d33d8,2 +np.float32,0x3f3f545d,0x3f0ee0d4,2 +np.float32,0xbf700682,0xc0318c25,2 +np.float32,0xbe54e990,0xbe6eb0a3,2 +np.float32,0x7f0289bf,0x42b01941,2 +np.float32,0xbd61ac90,0xbd682113,2 +np.float32,0xbf2ff310,0xbf94cd6f,2 +np.float32,0x7f10064a,0x42b04b98,2 +np.float32,0x804d0d6d,0x804d0d6d,2 +np.float32,0x80317b0a,0x80317b0a,2 +np.float32,0xbddfef18,0xbded2640,2 +np.float32,0x3f00c9ab,0x3ed0a5bd,2 +np.float32,0x7f04b905,0x42b021c1,2 +np.float32,0x7fc00000,0x7fc00000,2 +np.float32,0x6524c4,0x6524c4,2 +np.float32,0x3da08ae0,0x3d9a8f88,2 +np.float32,0x293ea9,0x293ea9,2 +np.float32,0x71499e,0x71499e,2 +np.float32,0xbf14f54d,0xbf5f38a5,2 +np.float32,0x806e60f5,0x806e60f5,2 +np.float32,0x3f5f34bb,0x3f207fff,2 +np.float32,0x80513427,0x80513427,2 +np.float32,0x7f379670,0x42b0c7dc,2 +np.float32,0x3efba888,0x3eccb20b,2 +np.float32,0x3eeadd1b,0x3ec14f4b,2 +np.float32,0x7ec5a27f,0x42af8ab8,2 +np.float32,0x3f2afe4e,0x3f02f7a2,2 +np.float32,0x5591c8,0x5591c8,2 +np.float32,0x3dbb7240,0x3db35bab,2 +np.float32,0x805b911b,0x805b911b,2 +np.float32,0x800000,0x800000,2 +np.float32,0x7e784c04,0x42ae9cab,2 +np.float32,0x7ebaae14,0x42af6d86,2 +np.float32,0xbec84f7a,0xbefe1d42,2 +np.float32,0x7cea8281,0x42aa56bf,2 +np.float32,0xbf542cf6,0xbfe1eb1b,2 +np.float32,0xbf6bfb13,0xc0231a5b,2 +np.float32,0x7d6eeaef,0x42abc32c,2 +np.float32,0xbf062f6b,0xbf3e2000,2 +np.float32,0x8073d8e9,0x8073d8e9,2 +np.float32,0xbea4db14,0xbec6f485,2 +np.float32,0x7d7e8d62,0x42abe3a0,2 +np.float32,0x7e8fc34e,0x42aee7c6,2 +np.float32,0x7dcbb0c3,0x42acd464,2 +np.float32,0x7e123c,0x7e123c,2 +np.float32,0x3d77af62,0x3d707c34,2 +np.float32,0x498cc8,0x498cc8,2 +np.float32,0x7f4e2206,0x42b1032a,2 +np.float32,0x3f734e0a,0x3f2b04a1,2 +np.float32,0x8053a9d0,0x8053a9d0,2 +np.float32,0xbe8a67e0,0xbea15be9,2 +np.float32,0xbf78e0ea,0xc065409e,2 +np.float32,0x352bdd,0x352bdd,2 +np.float32,0x3ee42be7,0x3ebcb38a,2 +np.float32,0x7f482d10,0x42b0f427,2 +np.float32,0xbf23155e,0xbf81b993,2 +np.float32,0x594920,0x594920,2 +np.float32,0x63f53f,0x63f53f,2 +np.float32,0x363592,0x363592,2 +np.float32,0x7dafbb78,0x42ac88cc,2 +np.float32,0x7f69516c,0x42b14298,2 +np.float32,0x3e1d5be2,0x3e126131,2 +np.float32,0x410c23,0x410c23,2 +np.float32,0x7ec9563c,0x42af9439,2 +np.float32,0xbedd3a0e,0xbf10d705,2 +np.float32,0x7f7c4f1f,0x42b16aa8,2 +np.float32,0xbe99b34e,0xbeb6c2d3,2 +np.float32,0x6cdc84,0x6cdc84,2 +np.float32,0x5b3bbe,0x5b3bbe,2 +np.float32,0x252178,0x252178,2 +np.float32,0x7d531865,0x42ab83c8,2 +np.float32,0xbf565b44,0xbfe873bf,2 +np.float32,0x5977ce,0x5977ce,2 +np.float32,0x588a58,0x588a58,2 +np.float32,0x3eae7054,0x3e961d51,2 +np.float32,0x725049,0x725049,2 +np.float32,0x7f2b9386,0x42b0a538,2 +np.float32,0xbe674714,0xbe831245,2 +np.float32,0x8044f0d8,0x8044f0d8,2 +np.float32,0x800a3c21,0x800a3c21,2 +np.float32,0x807b275b,0x807b275b,2 +np.float32,0xbf2463b6,0xbf83896e,2 +np.float32,0x801cca42,0x801cca42,2 +np.float32,0xbf28f2d0,0xbf8a121a,2 +np.float32,0x3f4168c2,0x3f1010ce,2 +np.float32,0x6f91a1,0x6f91a1,2 +np.float32,0xbf2b9eeb,0xbf8e0fc5,2 +np.float32,0xbea4c858,0xbec6d8e4,2 +np.float32,0xbf7abba0,0xc0788e88,2 +np.float32,0x802f18f7,0x802f18f7,2 +np.float32,0xbf7f6c75,0xc0c3145c,2 +np.float32,0xbe988210,0xbeb50f5e,2 +np.float32,0xbf219b7e,0xbf7f6a3b,2 +np.float32,0x7f800000,0x7f800000,2 +np.float32,0x7f7fffff,0x42b17218,2 +np.float32,0xbdca8d90,0xbdd5487e,2 +np.float32,0xbef683b0,0xbf2821b0,2 +np.float32,0x8043e648,0x8043e648,2 +np.float32,0xbf4319a4,0xbfb7cd1b,2 +np.float32,0x62c2b2,0x62c2b2,2 +np.float32,0xbf479ccd,0xbfc1a7b1,2 +np.float32,0x806c8a32,0x806c8a32,2 +np.float32,0x7f004447,0x42b01045,2 +np.float32,0x3f737d36,0x3f2b1ccf,2 +np.float32,0x3ee71f24,0x3ebebced,2 +np.float32,0x3ea0b6b4,0x3e8bc606,2 +np.float32,0x358fd7,0x358fd7,2 +np.float32,0xbe69780c,0xbe847d17,2 +np.float32,0x7f6bed18,0x42b14849,2 +np.float32,0xbf6a5113,0xc01dfe1d,2 +np.float32,0xbf255693,0xbf84de88,2 +np.float32,0x7f34acac,0x42b0bfac,2 +np.float32,0xbe8a3b6a,0xbea11efe,2 +np.float32,0x3f470d84,0x3f1342ab,2 +np.float32,0xbf2cbde3,0xbf8fc602,2 +np.float32,0x47c103,0x47c103,2 +np.float32,0xe3c94,0xe3c94,2 +np.float32,0xbec07afa,0xbef1693a,2 +np.float32,0x6a9cfe,0x6a9cfe,2 +np.float32,0xbe4339e0,0xbe5899da,2 +np.float32,0x7ea9bf1e,0x42af3cd6,2 +np.float32,0x3f6378b4,0x3f22c4c4,2 +np.float32,0xbd989ff0,0xbd9e9c77,2 +np.float32,0xbe6f2f50,0xbe88343d,2 +np.float32,0x3f7f2ac5,0x3f310764,2 +np.float32,0x3f256704,0x3eff2fb2,2 +np.float32,0x80786aca,0x80786aca,2 +np.float32,0x65d02f,0x65d02f,2 +np.float32,0x50d1c3,0x50d1c3,2 +np.float32,0x3f4a9d76,0x3f1541b4,2 +np.float32,0x802cf491,0x802cf491,2 +np.float32,0x3e935cec,0x3e81829b,2 +np.float32,0x3e2ad478,0x3e1dfd81,2 +np.float32,0xbf107cbd,0xbf54bef2,2 +np.float32,0xbf58c02e,0xbff007fe,2 +np.float32,0x80090808,0x80090808,2 +np.float32,0x805d1f66,0x805d1f66,2 +np.float32,0x6aec95,0x6aec95,2 +np.float32,0xbee3fc6e,0xbf16dc73,2 +np.float32,0x7f63314b,0x42b134f9,2 +np.float32,0x550443,0x550443,2 +np.float32,0xbefa8174,0xbf2c026e,2 +np.float32,0x3f7fb380,0x3f314bd5,2 +np.float32,0x80171f2c,0x80171f2c,2 +np.float32,0x3f2f56ae,0x3f058f2d,2 +np.float32,0x3eacaecb,0x3e94cd97,2 +np.float32,0xbe0c4f0c,0xbe16e69d,2 +np.float32,0x3f48e4cb,0x3f144b42,2 +np.float32,0x7f03efe2,0x42b01eb7,2 +np.float32,0xbf1019ac,0xbf53dbe9,2 +np.float32,0x3e958524,0x3e832eb5,2 +np.float32,0xbf1b23c6,0xbf6e72f2,2 +np.float32,0x12c554,0x12c554,2 +np.float32,0x7dee588c,0x42ad24d6,2 +np.float32,0xbe8c216c,0xbea3ba70,2 +np.float32,0x804553cb,0x804553cb,2 +np.float32,0xbe446324,0xbe5a0966,2 +np.float32,0xbef7150a,0xbf28adff,2 +np.float32,0xbf087282,0xbf42ec6e,2 +np.float32,0x3eeef15c,0x3ec41937,2 +np.float32,0x61bbd2,0x61bbd2,2 +np.float32,0x3e51b28d,0x3e3ec538,2 +np.float32,0x57e869,0x57e869,2 +np.float32,0x7e5e7711,0x42ae646c,2 +np.float32,0x8050b173,0x8050b173,2 +np.float32,0xbf63c90c,0xc00d2438,2 +np.float32,0xbeba774c,0xbee7dcf8,2 +np.float32,0x8016faac,0x8016faac,2 +np.float32,0xbe8b448c,0xbea28aaf,2 +np.float32,0x3e8cd448,0x3e78d29e,2 +np.float32,0x80484e02,0x80484e02,2 +np.float32,0x3f63ba68,0x3f22e78c,2 +np.float32,0x2e87bb,0x2e87bb,2 +np.float32,0x230496,0x230496,2 +np.float32,0x1327b2,0x1327b2,2 +np.float32,0xbf046c56,0xbf3a72d2,2 +np.float32,0x3ecefe60,0x3eadd69a,2 +np.float32,0x49c56e,0x49c56e,2 +np.float32,0x3df22d60,0x3de4e550,2 +np.float32,0x3f67c19d,0x3f250707,2 +np.float32,0x3f20eb9c,0x3ef9b624,2 +np.float32,0x3f05ca75,0x3ed742fa,2 +np.float32,0xbe8514f8,0xbe9a1d45,2 +np.float32,0x8070a003,0x8070a003,2 +np.float32,0x7e49650e,0x42ae317a,2 +np.float32,0x3de16ce9,0x3dd5dc3e,2 +np.float32,0xbf4ae952,0xbfc95f1f,2 +np.float32,0xbe44dd84,0xbe5aa0db,2 +np.float32,0x803c3bc0,0x803c3bc0,2 +np.float32,0x3eebb9e8,0x3ec1e692,2 +np.float32,0x80588275,0x80588275,2 +np.float32,0xbea1e69a,0xbec29d86,2 +np.float32,0x3f7b4bf8,0x3f2f154c,2 +np.float32,0x7eb47ecc,0x42af5c46,2 +np.float32,0x3d441e00,0x3d3f911a,2 +np.float32,0x7f54d40e,0x42b11388,2 +np.float32,0xbf47f17e,0xbfc26882,2 +np.float32,0x3ea7da57,0x3e912db4,2 +np.float32,0x3f59cc7b,0x3f1d984e,2 +np.float32,0x570e08,0x570e08,2 +np.float32,0x3e99560c,0x3e8620a2,2 +np.float32,0x3ecfbd14,0x3eae5e55,2 +np.float32,0x7e86be08,0x42aec698,2 +np.float32,0x3f10f28a,0x3ee5b5d3,2 +np.float32,0x7f228722,0x42b0897a,2 +np.float32,0x3f4b979b,0x3f15cd30,2 +np.float32,0xbf134283,0xbf5b30f9,2 +np.float32,0x3f2ae16a,0x3f02e64f,2 +np.float32,0x3e98e158,0x3e85c6cc,2 +np.float32,0x7ec39f27,0x42af857a,2 +np.float32,0x3effedb0,0x3ecf8cea,2 +np.float32,0xbd545620,0xbd5a09c1,2 +np.float32,0x503a28,0x503a28,2 +np.float32,0x3f712744,0x3f29e9a1,2 +np.float32,0x3edc6194,0x3eb748b1,2 +np.float32,0xbf4ec1e5,0xbfd2ff5f,2 +np.float32,0x3f46669e,0x3f12e4b5,2 +np.float32,0xabad3,0xabad3,2 +np.float32,0x80000000,0x80000000,2 +np.float32,0x803f2e6d,0x803f2e6d,2 +np.float32,0xbf431542,0xbfb7c3e6,2 +np.float32,0x3f6f2d53,0x3f28e496,2 +np.float32,0x546bd8,0x546bd8,2 +np.float32,0x25c80a,0x25c80a,2 +np.float32,0x3e50883c,0x3e3dcd7e,2 +np.float32,0xbf5fa2ba,0xc0045c14,2 +np.float32,0x80271c07,0x80271c07,2 +np.float32,0x8043755d,0x8043755d,2 +np.float32,0xbf3c5cea,0xbfaa5ee9,2 +np.float32,0x3f2fea38,0x3f05e6af,2 +np.float32,0x6da3dc,0x6da3dc,2 +np.float32,0xbf095945,0xbf44dc70,2 +np.float32,0xbe33d584,0xbe45c1f5,2 +np.float32,0x7eb41b2e,0x42af5b2b,2 +np.float32,0xbf0feb74,0xbf537242,2 +np.float32,0xbe96225a,0xbeb1b0b1,2 +np.float32,0x3f63b95f,0x3f22e700,2 +np.float32,0x0,0x0,2 +np.float32,0x3e20b0cc,0x3e154374,2 +np.float32,0xbf79880c,0xc06b6801,2 +np.float32,0xbea690b6,0xbec97b93,2 +np.float32,0xbf3e11ca,0xbfada449,2 +np.float32,0x7e7e6292,0x42aea912,2 +np.float32,0x3e793350,0x3e5f0b7b,2 +np.float32,0x802e7183,0x802e7183,2 +np.float32,0x3f1b3695,0x3ef2a788,2 +np.float32,0x801efa20,0x801efa20,2 +np.float32,0x3f1ec43a,0x3ef70f42,2 +np.float32,0xbf12c5ed,0xbf5a0c52,2 +np.float32,0x8005e99c,0x8005e99c,2 +np.float32,0xbf79f5e7,0xc06fcca5,2 +np.float32,0x3ecbaf50,0x3eab7a03,2 +np.float32,0x46b0fd,0x46b0fd,2 +np.float32,0x3edb9023,0x3eb6b631,2 +np.float32,0x7f24bc41,0x42b09063,2 +np.float32,0xbd8d9328,0xbd92b4c6,2 +np.float32,0x3f2c5d7f,0x3f03c9d9,2 +np.float32,0x807bebc9,0x807bebc9,2 +np.float32,0x7f797a99,0x42b164e2,2 +np.float32,0x756e3c,0x756e3c,2 +np.float32,0x80416f8a,0x80416f8a,2 +np.float32,0x3e0d512a,0x3e04611a,2 +np.float32,0x3f7be3e6,0x3f2f61ec,2 +np.float32,0x80075c41,0x80075c41,2 +np.float32,0xbe850294,0xbe9a046c,2 +np.float32,0x684679,0x684679,2 +np.float32,0x3eb393c4,0x3e99eed2,2 +np.float32,0x3f4177c6,0x3f10195b,2 +np.float32,0x3dd1f402,0x3dc7dfe5,2 +np.float32,0x3ef484d4,0x3ec7e2e1,2 +np.float32,0x53eb8f,0x53eb8f,2 +np.float32,0x7f072cb6,0x42b02b20,2 +np.float32,0xbf1b6b55,0xbf6f28d4,2 +np.float32,0xbd8a98d8,0xbd8f827d,2 +np.float32,0x3eafb418,0x3e970e96,2 +np.float32,0x6555af,0x6555af,2 +np.float32,0x7dd5118e,0x42aceb6f,2 +np.float32,0x800a13f7,0x800a13f7,2 +np.float32,0x331a9d,0x331a9d,2 +np.float32,0x8063773f,0x8063773f,2 +np.float32,0x3e95e068,0x3e837553,2 +np.float32,0x80654b32,0x80654b32,2 +np.float32,0x3dabe0e0,0x3da50bb3,2 +np.float32,0xbf6283c3,0xc00a5280,2 +np.float32,0x80751cc5,0x80751cc5,2 +np.float32,0x3f668eb6,0x3f2465c0,2 +np.float32,0x3e13c058,0x3e0a048c,2 +np.float32,0x77780c,0x77780c,2 +np.float32,0x3f7d6e48,0x3f302868,2 +np.float32,0x7e31f9e3,0x42adf22f,2 +np.float32,0x246c7b,0x246c7b,2 +np.float32,0xbe915bf0,0xbeaafa6c,2 +np.float32,0xbf800000,0xff800000,2 +np.float32,0x3f698f42,0x3f25f8e0,2 +np.float32,0x7e698885,0x42ae7d48,2 +np.float32,0x3f5bbd42,0x3f1ea42c,2 +np.float32,0x5b8444,0x5b8444,2 +np.float32,0xbf6065f6,0xc005e2c6,2 +np.float32,0xbeb95036,0xbee60dad,2 +np.float32,0xbf44f846,0xbfbbcade,2 +np.float32,0xc96e5,0xc96e5,2 +np.float32,0xbf213e90,0xbf7e6eae,2 +np.float32,0xbeb309cc,0xbedc4fe6,2 +np.float32,0xbe781cf4,0xbe8e0fe6,2 +np.float32,0x7f0cf0db,0x42b04083,2 +np.float32,0xbf7b6143,0xc08078f9,2 +np.float32,0x80526fc6,0x80526fc6,2 +np.float32,0x3f092bf3,0x3edbaeec,2 +np.float32,0x3ecdf154,0x3ead16df,2 +np.float32,0x2fe85b,0x2fe85b,2 +np.float32,0xbf5100a0,0xbfd8f871,2 +np.float32,0xbec09d40,0xbef1a028,2 +np.float32,0x5e6a85,0x5e6a85,2 +np.float32,0xbec0e2a0,0xbef20f6b,2 +np.float32,0x3f72e788,0x3f2ad00d,2 +np.float32,0x880a6,0x880a6,2 +np.float32,0x3d9e90bf,0x3d98b9fc,2 +np.float32,0x15cf25,0x15cf25,2 +np.float32,0x10171b,0x10171b,2 +np.float32,0x805cf1aa,0x805cf1aa,2 +np.float32,0x3f19bd36,0x3ef0d0d2,2 +np.float32,0x3ebe2bda,0x3ea1b774,2 +np.float32,0xbecd8192,0xbf035c49,2 +np.float32,0x3e2ce508,0x3e1fc21b,2 +np.float32,0x290f,0x290f,2 +np.float32,0x803b679f,0x803b679f,2 +np.float32,0x1,0x1,2 +np.float32,0x807a9c76,0x807a9c76,2 +np.float32,0xbf65fced,0xc01257f8,2 +np.float32,0x3f783414,0x3f2d8475,2 +np.float32,0x3f2d9d92,0x3f0488da,2 +np.float32,0xbddb5798,0xbde80018,2 +np.float32,0x3e91afb8,0x3e8034e7,2 +np.float32,0xbf1b775a,0xbf6f476d,2 +np.float32,0xbf73a32c,0xc041f3ba,2 +np.float32,0xbea39364,0xbec5121b,2 +np.float32,0x80375b94,0x80375b94,2 +np.float32,0x3f331252,0x3f07c3e9,2 +np.float32,0xbf285774,0xbf892e74,2 +np.float32,0x3e699bb8,0x3e526d55,2 +np.float32,0x3f08208a,0x3eda523a,2 +np.float32,0xbf42fb4a,0xbfb78d60,2 +np.float32,0x8029c894,0x8029c894,2 +np.float32,0x3e926c0c,0x3e80c76e,2 +np.float32,0x801e4715,0x801e4715,2 +np.float32,0x3e4b36d8,0x3e395ffd,2 +np.float32,0x8041556b,0x8041556b,2 +np.float32,0xbf2d99ba,0xbf9119bd,2 +np.float32,0x3ed83ea8,0x3eb46250,2 +np.float32,0xbe94a280,0xbeaf92b4,2 +np.float32,0x7f4c7a64,0x42b0ff0a,2 +np.float32,0x806d4022,0x806d4022,2 +np.float32,0xbed382f8,0xbf086d26,2 +np.float32,0x1846fe,0x1846fe,2 +np.float32,0xbe702558,0xbe88d4d8,2 +np.float32,0xbe650ee0,0xbe81a3cc,2 +np.float32,0x3ee9d088,0x3ec0970c,2 +np.float32,0x7f6d4498,0x42b14b30,2 +np.float32,0xbef9f9e6,0xbf2b7ddb,2 +np.float32,0xbf70c384,0xc0349370,2 +np.float32,0xbeff9e9e,0xbf3110c8,2 +np.float32,0xbef06372,0xbf224aa9,2 +np.float32,0xbf15a692,0xbf60e1fa,2 +np.float32,0x8058c117,0x8058c117,2 +np.float32,0xbd9f74b8,0xbda6017b,2 +np.float32,0x801bf130,0x801bf130,2 +np.float32,0x805da84c,0x805da84c,2 +np.float32,0xff800000,0xffc00000,2 +np.float32,0xbeb01de2,0xbed7d6d6,2 +np.float32,0x8077de08,0x8077de08,2 +np.float32,0x3e327668,0x3e2482c1,2 +np.float32,0xbe7add88,0xbe8fe1ab,2 +np.float32,0x805a3c2e,0x805a3c2e,2 +np.float32,0x80326a73,0x80326a73,2 +np.float32,0x800b8a34,0x800b8a34,2 +np.float32,0x8048c83a,0x8048c83a,2 +np.float32,0xbf3799d6,0xbfa1a975,2 +np.float32,0x807649c7,0x807649c7,2 +np.float32,0x3dfdbf90,0x3def3798,2 +np.float32,0xbf1b538a,0xbf6eec4c,2 +np.float32,0xbf1e5989,0xbf76baa0,2 +np.float32,0xc7a80,0xc7a80,2 +np.float32,0x8001be54,0x8001be54,2 +np.float32,0x3f435bbc,0x3f112c6d,2 +np.float32,0xbeabcff8,0xbed151d1,2 +np.float32,0x7de20c78,0x42ad09b7,2 +np.float32,0x3f0e6d2e,0x3ee27b1e,2 +np.float32,0xbf0cb352,0xbf4c3267,2 +np.float32,0x7f6ec06f,0x42b14e61,2 +np.float32,0x7f6fa8ef,0x42b15053,2 +np.float32,0xbf3d2a6a,0xbfabe623,2 +np.float32,0x7f077a4c,0x42b02c46,2 +np.float32,0xbf2a68dc,0xbf8c3cc4,2 +np.float32,0x802a5dbe,0x802a5dbe,2 +np.float32,0x807f631c,0x807f631c,2 +np.float32,0x3dc9b8,0x3dc9b8,2 +np.float32,0x3ebdc1b7,0x3ea16a0a,2 +np.float32,0x7ef29dab,0x42aff3b5,2 +np.float32,0x3e8ab1cc,0x3e757806,2 +np.float32,0x3f27e88e,0x3f011c6d,2 +np.float32,0x3cfd1455,0x3cf93fb5,2 +np.float32,0x7f7eebf5,0x42b16fef,2 +np.float32,0x3c9b2140,0x3c99ade9,2 +np.float32,0x7e928601,0x42aef183,2 +np.float32,0xbd7d2db0,0xbd82abae,2 +np.float32,0x3e6f0df3,0x3e56da20,2 +np.float32,0x7d36a2fc,0x42ab39a3,2 +np.float32,0xbf49d3a2,0xbfc6c859,2 +np.float32,0x7ee541d3,0x42afd6b6,2 +np.float32,0x80753dc0,0x80753dc0,2 +np.float32,0x3f4ce486,0x3f16865d,2 +np.float32,0x39e701,0x39e701,2 +np.float32,0x3f3d9ede,0x3f0de5fa,2 +np.float32,0x7fafb2,0x7fafb2,2 +np.float32,0x3e013fdc,0x3df37090,2 +np.float32,0x807b6a2c,0x807b6a2c,2 +np.float32,0xbe86800a,0xbe9c08c7,2 +np.float32,0x7f40f080,0x42b0e14d,2 +np.float32,0x7eef5afe,0x42afecc8,2 +np.float32,0x7ec30052,0x42af83da,2 +np.float32,0x3eacf768,0x3e9503e1,2 +np.float32,0x7f13ef0e,0x42b0594e,2 +np.float32,0x80419f4a,0x80419f4a,2 +np.float32,0xbf485932,0xbfc3562a,2 +np.float32,0xbe8a24d6,0xbea10011,2 +np.float32,0xbda791c0,0xbdaed2bc,2 +np.float32,0x3e9b5169,0x3e87a67d,2 +np.float32,0x807dd882,0x807dd882,2 +np.float32,0x7f40170e,0x42b0df0a,2 +np.float32,0x7f02f7f9,0x42b01af1,2 +np.float32,0x3ea38bf9,0x3e8decde,2 +np.float32,0x3e2e7ce8,0x3e211ed4,2 +np.float32,0x70a7a6,0x70a7a6,2 +np.float32,0x7d978592,0x42ac3ce7,2 +np.float32,0x804d12d0,0x804d12d0,2 +np.float32,0x80165dc8,0x80165dc8,2 +np.float32,0x80000001,0x80000001,2 +np.float32,0x3e325da0,0x3e246da6,2 +np.float32,0xbe063bb8,0xbe0fe281,2 +np.float32,0x160b8,0x160b8,2 +np.float32,0xbe5687a4,0xbe70bbef,2 +np.float32,0x7f11ab34,0x42b05168,2 +np.float32,0xc955c,0xc955c,2 +np.float32,0xbea0003a,0xbebfd826,2 +np.float32,0x3f7fbdd9,0x3f315102,2 +np.float32,0xbe61aefc,0xbe7ef121,2 +np.float32,0xbf1b9873,0xbf6f9bc3,2 +np.float32,0x3a6d14,0x3a6d14,2 +np.float32,0xbf1ad3b4,0xbf6da808,2 +np.float32,0x3ed2dd24,0x3eb0963d,2 +np.float32,0xbe81a4ca,0xbe957d52,2 +np.float32,0x7f1be3e9,0x42b07421,2 +np.float32,0x7f5ce943,0x42b1269e,2 +np.float32,0x7eebcbdf,0x42afe51d,2 +np.float32,0x807181b5,0x807181b5,2 +np.float32,0xbecb03ba,0xbf0149ad,2 +np.float32,0x42edb8,0x42edb8,2 +np.float32,0xbf3aeec8,0xbfa7b13f,2 +np.float32,0xbd0c4f00,0xbd0ec4a0,2 +np.float32,0x3e48d260,0x3e376070,2 +np.float32,0x1a9731,0x1a9731,2 +np.float32,0x7f323be4,0x42b0b8b5,2 +np.float32,0x1a327f,0x1a327f,2 +np.float32,0x17f1fc,0x17f1fc,2 +np.float32,0xbf2f4f9b,0xbf93c91a,2 +np.float32,0x3ede8934,0x3eb8c9c3,2 +np.float32,0xbf56aaac,0xbfe968bb,2 +np.float32,0x3e22cb5a,0x3e17148c,2 +np.float32,0x7d9def,0x7d9def,2 +np.float32,0x8045b963,0x8045b963,2 +np.float32,0x77404f,0x77404f,2 +np.float32,0x7e2c9efb,0x42ade28b,2 +np.float32,0x8058ad89,0x8058ad89,2 +np.float32,0x7f4139,0x7f4139,2 +np.float32,0x8020e12a,0x8020e12a,2 +np.float32,0x800c9daa,0x800c9daa,2 +np.float32,0x7f2c5ac5,0x42b0a789,2 +np.float32,0x3f04a47b,0x3ed5c043,2 +np.float32,0x804692d5,0x804692d5,2 +np.float32,0xbf6e7fa4,0xc02bb493,2 +np.float32,0x80330756,0x80330756,2 +np.float32,0x7f3e29ad,0x42b0d9e1,2 +np.float32,0xbebf689a,0xbeefb24d,2 +np.float32,0x3f29a86c,0x3f022a56,2 +np.float32,0x3e3bd1c0,0x3e2c72b3,2 +np.float32,0x3f78f2e8,0x3f2de546,2 +np.float32,0x3f3709be,0x3f0a16af,2 +np.float32,0x3e11f150,0x3e086f97,2 +np.float32,0xbf5867ad,0xbfeee8a0,2 +np.float32,0xbebfb328,0xbef0296c,2 +np.float32,0x2f7f15,0x2f7f15,2 +np.float32,0x805cfe84,0x805cfe84,2 +np.float32,0xbf504e01,0xbfd71589,2 +np.float32,0x3ee0903c,0x3eba330c,2 +np.float32,0xbd838990,0xbd87f399,2 +np.float32,0x3f14444e,0x3ee9ee7d,2 +np.float32,0x7e352583,0x42adfb3a,2 +np.float32,0x7e76f824,0x42ae99ec,2 +np.float32,0x3f772d00,0x3f2cfebf,2 +np.float32,0x801f7763,0x801f7763,2 +np.float32,0x3f760bf5,0x3f2c6b87,2 +np.float32,0xbf0bb696,0xbf4a03a5,2 +np.float32,0x3f175d2c,0x3eedd6d2,2 +np.float32,0xbf5723f8,0xbfeae288,2 +np.float32,0x24de0a,0x24de0a,2 +np.float32,0x3cd73f80,0x3cd47801,2 +np.float32,0x7f013305,0x42b013fa,2 +np.float32,0x3e3ad425,0x3e2b9c50,2 +np.float32,0x7d3d16,0x7d3d16,2 +np.float32,0x3ef49738,0x3ec7ef54,2 +np.float32,0x3f5b8612,0x3f1e8678,2 +np.float32,0x7f0eeb5c,0x42b047a7,2 +np.float32,0x7e9d7cb0,0x42af1675,2 +np.float32,0xbdd1cfb0,0xbddd5aa0,2 +np.float32,0xbf645dba,0xc00e78fe,2 +np.float32,0x3f511174,0x3f18d56c,2 +np.float32,0x3d91ad00,0x3d8cba62,2 +np.float32,0x805298da,0x805298da,2 +np.float32,0xbedb6af4,0xbf0f4090,2 +np.float32,0x3d23b1ba,0x3d208205,2 +np.float32,0xbea5783e,0xbec7dc87,2 +np.float32,0x79d191,0x79d191,2 +np.float32,0x3e894413,0x3e7337da,2 +np.float32,0x80800000,0x80800000,2 +np.float32,0xbf34a8d3,0xbf9c907b,2 +np.float32,0x3bae779a,0x3bae011f,2 +np.float32,0x8049284d,0x8049284d,2 +np.float32,0x3eb42cc4,0x3e9a600b,2 +np.float32,0x3da1e2d0,0x3d9bce5f,2 +np.float32,0x3f364b8a,0x3f09a7af,2 +np.float32,0x3d930b10,0x3d8e0118,2 +np.float32,0x8061f8d7,0x8061f8d7,2 +np.float32,0x3f473213,0x3f13573b,2 +np.float32,0x3f1e2a38,0x3ef65102,2 +np.float32,0x8068f7d9,0x8068f7d9,2 +np.float32,0x3f181ef8,0x3eeeca2c,2 +np.float32,0x3eeb6168,0x3ec1a9f5,2 +np.float32,0xc2db6,0xc2db6,2 +np.float32,0x3ef7b578,0x3eca0a69,2 +np.float32,0xbf5b5a84,0xbff8d075,2 +np.float32,0x7f479d5f,0x42b0f2b7,2 +np.float32,0x3e6f3c24,0x3e56ff92,2 +np.float32,0x3f45543a,0x3f1249f0,2 +np.float32,0xbea7c1fa,0xbecb40d2,2 +np.float32,0x7de082,0x7de082,2 +np.float32,0x383729,0x383729,2 +np.float32,0xbd91cb90,0xbd973eb3,2 +np.float32,0x7f320218,0x42b0b80f,2 +np.float32,0x5547f2,0x5547f2,2 +np.float32,0x291fe4,0x291fe4,2 +np.float32,0xbe078ba0,0xbe11655f,2 +np.float32,0x7e0c0658,0x42ad7764,2 +np.float32,0x7e129a2b,0x42ad8ee5,2 +np.float32,0x3f7c96d4,0x3f2fbc0c,2 +np.float32,0x3f800000,0x3f317218,2 +np.float32,0x7f131754,0x42b05662,2 +np.float32,0x15f833,0x15f833,2 +np.float32,0x80392ced,0x80392ced,2 +np.float32,0x3f7c141a,0x3f2f7a36,2 +np.float32,0xbf71c03f,0xc038dcfd,2 +np.float32,0xbe14fb2c,0xbe20fff3,2 +np.float32,0xbee0bac6,0xbf13f14c,2 +np.float32,0x801a32dd,0x801a32dd,2 +np.float32,0x8e12d,0x8e12d,2 +np.float32,0x3f48c606,0x3f143a04,2 +np.float32,0x7f418af5,0x42b0e2e6,2 +np.float32,0x3f1f2918,0x3ef78bb7,2 +np.float32,0x11141b,0x11141b,2 +np.float32,0x3e9fc9e8,0x3e8b11ad,2 +np.float32,0xbea5447a,0xbec79010,2 +np.float32,0xbe31d904,0xbe4359db,2 +np.float32,0x80184667,0x80184667,2 +np.float32,0xbf00503c,0xbf3212c2,2 +np.float32,0x3e0328cf,0x3df6d425,2 +np.float32,0x7ee8e1b7,0x42afdebe,2 +np.float32,0xbef95e24,0xbf2ae5db,2 +np.float32,0x7f3e4eed,0x42b0da45,2 +np.float32,0x3f43ee85,0x3f117fa0,2 +np.float32,0xbcfa2ac0,0xbcfe10fe,2 +np.float32,0x80162774,0x80162774,2 +np.float32,0x372e8b,0x372e8b,2 +np.float32,0x3f263802,0x3f0016b0,2 +np.float32,0x8008725f,0x8008725f,2 +np.float32,0x800beb40,0x800beb40,2 +np.float32,0xbe93308e,0xbead8a77,2 +np.float32,0x3d8a4240,0x3d85cab8,2 +np.float32,0x80179de0,0x80179de0,2 +np.float32,0x7f4a98f2,0x42b0fa4f,2 +np.float32,0x3f0d214e,0x3ee0cff1,2 +np.float32,0x80536c2c,0x80536c2c,2 +np.float32,0x7e7038ed,0x42ae8bbe,2 +np.float32,0x7f345af9,0x42b0bec4,2 +np.float32,0xbf243219,0xbf83442f,2 +np.float32,0x7e0d5555,0x42ad7c27,2 +np.float32,0x762e95,0x762e95,2 +np.float32,0x7ebf4548,0x42af79f6,2 +np.float32,0x8079639e,0x8079639e,2 +np.float32,0x3ef925c0,0x3ecb0260,2 +np.float32,0x3f708695,0x3f2996d6,2 +np.float32,0xfca9f,0xfca9f,2 +np.float32,0x8060dbf4,0x8060dbf4,2 +np.float32,0x4c8840,0x4c8840,2 +np.float32,0xbea922ee,0xbecd4ed5,2 +np.float32,0xbf4f28a9,0xbfd40b98,2 +np.float32,0xbe25ad48,0xbe34ba1b,2 +np.float32,0x3f2fb254,0x3f05c58c,2 +np.float32,0x3f73bcc2,0x3f2b3d5f,2 +np.float32,0xbf479a07,0xbfc1a165,2 +np.float32,0xbeb9a808,0xbee69763,2 +np.float32,0x7eb16a65,0x42af5376,2 +np.float32,0xbeb3e442,0xbedda042,2 +np.float32,0x3d8f439c,0x3d8a79ac,2 +np.float32,0x80347516,0x80347516,2 +np.float32,0x3e8a0c5d,0x3e74738c,2 +np.float32,0xbf0383a4,0xbf389289,2 +np.float32,0x806be8f5,0x806be8f5,2 +np.float32,0x8023f0c5,0x8023f0c5,2 +np.float32,0x2060e9,0x2060e9,2 +np.float32,0xbf759eba,0xc04d239f,2 +np.float32,0x3d84cc5a,0x3d80ab96,2 +np.float32,0xbf57746b,0xbfebdf87,2 +np.float32,0x3e418417,0x3e31401f,2 +np.float32,0xaecce,0xaecce,2 +np.float32,0x3cd1766f,0x3cced45c,2 +np.float32,0x53724a,0x53724a,2 +np.float32,0x3f773710,0x3f2d03de,2 +np.float32,0x8013d040,0x8013d040,2 +np.float32,0x4d0eb2,0x4d0eb2,2 +np.float32,0x8014364a,0x8014364a,2 +np.float32,0x7f3c56c9,0x42b0d4f2,2 +np.float32,0x3eee1e1c,0x3ec3891a,2 +np.float32,0xbdda3eb8,0xbde6c5a0,2 +np.float32,0x26ef4a,0x26ef4a,2 +np.float32,0x7ed3370c,0x42afacbf,2 +np.float32,0xbf06e31b,0xbf3f9ab7,2 +np.float32,0xbe3185f0,0xbe42f556,2 +np.float32,0x3dcf9abe,0x3dc5be41,2 +np.float32,0xbf3696d9,0xbf9fe2bd,2 +np.float32,0x3e68ee50,0x3e51e01a,2 +np.float32,0x3f3d4cc2,0x3f0db6ca,2 +np.float32,0x7fa00000,0x7fe00000,2 +np.float32,0xbf03070c,0xbf3792d0,2 +np.float32,0x3ea79e6c,0x3e910092,2 +np.float32,0xbf1a393a,0xbf6c2251,2 +np.float32,0x3f41eb0e,0x3f105afc,2 +np.float32,0x3ceadb2f,0x3ce78d79,2 +np.float32,0xbf5dc105,0xc000be2c,2 +np.float32,0x7ebb5a0e,0x42af6f5c,2 +np.float32,0xbf7c44eb,0xc0875058,2 +np.float32,0x6aaaf4,0x6aaaf4,2 +np.float32,0x807d8f23,0x807d8f23,2 +np.float32,0xbee6b142,0xbf194fef,2 +np.float32,0xbe83f256,0xbe989526,2 +np.float32,0x7d588e,0x7d588e,2 +np.float32,0x7cc80131,0x42aa0542,2 +np.float32,0x3e0ab198,0x3e02124f,2 +np.float32,0xbf6e64db,0xc02b52eb,2 +np.float32,0x3d238b56,0x3d205d1b,2 +np.float32,0xbeb408e2,0xbeddd8bc,2 +np.float32,0x3f78340d,0x3f2d8471,2 +np.float32,0x806162a3,0x806162a3,2 +np.float32,0x804e484f,0x804e484f,2 +np.float32,0xbeb8c576,0xbee53466,2 +np.float32,0x807aab15,0x807aab15,2 +np.float32,0x3f523e20,0x3f197ab8,2 +np.float32,0xbf009190,0xbf3295de,2 +np.float32,0x3df43da5,0x3de6bd82,2 +np.float32,0x7f639aea,0x42b135e6,2 +np.float32,0x3f1e638a,0x3ef697da,2 +np.float32,0xbf4884de,0xbfc3bac3,2 +np.float32,0xbe9336b6,0xbead931b,2 +np.float32,0x6daf7f,0x6daf7f,2 +np.float32,0xbf1fc152,0xbf7a70b1,2 +np.float32,0x3f103720,0x3ee4c649,2 +np.float32,0x3eeaa227,0x3ec126df,2 +np.float32,0x7f7ea945,0x42b16f69,2 +np.float32,0x3d3cd800,0x3d389ead,2 +np.float32,0x3f3d7268,0x3f0dcc6e,2 +np.float32,0xbf3c1b41,0xbfa9e2e3,2 +np.float32,0x3ecf3818,0x3eadffb2,2 +np.float32,0x3f1af312,0x3ef25372,2 +np.float32,0x48fae4,0x48fae4,2 +np.float64,0x7fedaa1ee4fb543d,0x40862da7ca7c308e,2 +np.float64,0x8007d2d810efa5b1,0x8007d2d810efa5b1,2 +np.float64,0x3fc385e069270bc0,0x3fc22b8884cf2c3b,2 +np.float64,0x68ed4130d1da9,0x68ed4130d1da9,2 +np.float64,0x8008e93e58d1d27d,0x8008e93e58d1d27d,2 +np.float64,0xbfd3d62852a7ac50,0xbfd7be3a7ad1af02,2 +np.float64,0xbfc1fa0ba923f418,0xbfc35f0f19447df7,2 +np.float64,0xbfe01b8cec20371a,0xbfe6658c7e6c8e50,2 +np.float64,0xbfeda81a147b5034,0xc004e9c94f2b91c1,2 +np.float64,0xbfe1c36a97e386d5,0xbfe9ead4d6beaa92,2 +np.float64,0x3fe50be51f2a17ca,0x3fe02c8067d9e5c5,2 +np.float64,0x3febed4d3337da9a,0x3fe413956466134f,2 +np.float64,0x80068ea59ced1d4c,0x80068ea59ced1d4c,2 +np.float64,0x3febe77d5877cefb,0x3fe4107ac088bc71,2 +np.float64,0x800ae77617d5ceed,0x800ae77617d5ceed,2 +np.float64,0x3fd0546b60a0a8d7,0x3fcd16c2e995ab23,2 +np.float64,0xbfe33e1476667c29,0xbfed6d7faec4db2f,2 +np.float64,0x3fe9d2fd51b3a5fb,0x3fe2eef834310219,2 +np.float64,0x8004249878284932,0x8004249878284932,2 +np.float64,0xbfd5b485c72b690c,0xbfda828ccc6a7a5c,2 +np.float64,0x7fcd6e6b6b3adcd6,0x408622807f04768e,2 +np.float64,0x3fd7f9c32caff386,0x3fd45d024514b8da,2 +np.float64,0x7f87eb9d702fd73a,0x40860aa99fcff27f,2 +np.float64,0xbfc5d1f6fb2ba3ec,0xbfc7ec367cb3fecc,2 +np.float64,0x8008316a44d062d5,0x8008316a44d062d5,2 +np.float64,0xbfd54e4358aa9c86,0xbfd9e889d2998a4a,2 +np.float64,0xda65facdb4cc0,0xda65facdb4cc0,2 +np.float64,0x3fc5b4f6f32b69f0,0x3fc40d13aa8e248b,2 +np.float64,0x3fd825a5d5b04b4c,0x3fd47ce73e04d3ff,2 +np.float64,0x7ac9d56ef593b,0x7ac9d56ef593b,2 +np.float64,0xbfd0a51977214a32,0xbfd34702071428be,2 +np.float64,0x3fd21f620b243ec4,0x3fcfea0c02193640,2 +np.float64,0x3fe6fb3f1b2df67e,0x3fe151ffb18c983b,2 +np.float64,0x700de022e01bd,0x700de022e01bd,2 +np.float64,0xbfbb76b81236ed70,0xbfbd0d31deea1ec7,2 +np.float64,0x3fecfc3856f9f870,0x3fe4a2fcadf221e0,2 +np.float64,0x3fede286517bc50c,0x3fe51af2fbd6ef63,2 +np.float64,0x7fdc8da96c391b52,0x408627ce09cfef2b,2 +np.float64,0x8000edfcfb81dbfb,0x8000edfcfb81dbfb,2 +np.float64,0x8009ebc42af3d789,0x8009ebc42af3d789,2 +np.float64,0x7fd658aaf8acb155,0x408625d80cd1ccc9,2 +np.float64,0x3feea584a37d4b09,0x3fe57f29a73729cd,2 +np.float64,0x4cfe494699fca,0x4cfe494699fca,2 +np.float64,0xbfe9d96460b3b2c9,0xbffa62ecfa026c77,2 +np.float64,0x7fdb3852c3b670a5,0x4086276c191dc9b1,2 +np.float64,0xbfe4d1fc9ee9a3f9,0xbff0d37ce37cf479,2 +np.float64,0xffefffffffffffff,0xfff8000000000000,2 +np.float64,0xbfd1c43d7fa3887a,0xbfd4cfbefb5f2c43,2 +np.float64,0x3fec4a8e0d78951c,0x3fe4453a82ca2570,2 +np.float64,0x7fafed74583fdae8,0x4086181017b8dac9,2 +np.float64,0x80076c4ebcced89e,0x80076c4ebcced89e,2 +np.float64,0x8001a9aa7b235356,0x8001a9aa7b235356,2 +np.float64,0x121260fe2424d,0x121260fe2424d,2 +np.float64,0x3fddd028e3bba052,0x3fd87998c4c43c5b,2 +np.float64,0x800ed1cf4a9da39f,0x800ed1cf4a9da39f,2 +np.float64,0xbfef2e63d7fe5cc8,0xc00d53480b16971b,2 +np.float64,0xbfedde3309fbbc66,0xc005ab55b7a7c127,2 +np.float64,0x3fda3e1e85b47c3d,0x3fd5fddafd8d6729,2 +np.float64,0x8007c6443c6f8c89,0x8007c6443c6f8c89,2 +np.float64,0xbfe101705f2202e0,0xbfe8420817665121,2 +np.float64,0x7fe0bff3c1e17fe7,0x4086291539c56d80,2 +np.float64,0x7fe6001dab6c003a,0x40862b43aa7cb060,2 +np.float64,0x7fbdecf7de3bd9ef,0x40861d170b1c51a5,2 +np.float64,0xbfc0fd508c21faa0,0xbfc23a5876e99fa3,2 +np.float64,0xbfcf6eb14f3edd64,0xbfd208cbf742c8ea,2 +np.float64,0x3f6d40ea403a81d5,0x3f6d33934ab8e799,2 +np.float64,0x7fc32600b6264c00,0x40861f10302357e0,2 +np.float64,0x3fd05870baa0b0e0,0x3fcd1d2af420fac7,2 +np.float64,0x80051d5120aa3aa3,0x80051d5120aa3aa3,2 +np.float64,0x3fdb783fcfb6f080,0x3fd6db229658c083,2 +np.float64,0x3fe0b61199e16c24,0x3fdae41e277be2eb,2 +np.float64,0x3daf62167b5ed,0x3daf62167b5ed,2 +np.float64,0xbfec3c53b6f878a7,0xc0011f0ce7a78a2a,2 +np.float64,0x800fc905161f920a,0x800fc905161f920a,2 +np.float64,0x3fdc7b9cc138f73a,0x3fd78f9c2360e661,2 +np.float64,0x7fe4079e97a80f3c,0x40862a83795f2443,2 +np.float64,0x8010000000000000,0x8010000000000000,2 +np.float64,0x7fe6da5345adb4a6,0x40862b9183c1e4b0,2 +np.float64,0xbfd0a76667214ecc,0xbfd34a1e0c1f6186,2 +np.float64,0x37fb0b906ff62,0x37fb0b906ff62,2 +np.float64,0x7fe170e59fa2e1ca,0x408629680a55e5c5,2 +np.float64,0x3fea900c77752019,0x3fe356eec75aa345,2 +np.float64,0x3fc575c63a2aeb8c,0x3fc3d701167d76b5,2 +np.float64,0x3fe8b45da87168bc,0x3fe24ecbb778fd44,2 +np.float64,0xbfcb990ab5373214,0xbfcf1596c076813c,2 +np.float64,0xf146fdfbe28e0,0xf146fdfbe28e0,2 +np.float64,0x8001fcd474c3f9aa,0x8001fcd474c3f9aa,2 +np.float64,0xbfe9b555eeb36aac,0xbffa0630c3bb485b,2 +np.float64,0x800f950be83f2a18,0x800f950be83f2a18,2 +np.float64,0x7feb0e03ab761c06,0x40862ceb30e36887,2 +np.float64,0x7fca51bd4a34a37a,0x4086219b9dfd35c9,2 +np.float64,0xbfdc27c34cb84f86,0xbfe28ccde8d6bc08,2 +np.float64,0x80009ce1714139c4,0x80009ce1714139c4,2 +np.float64,0x8005290fb1ea5220,0x8005290fb1ea5220,2 +np.float64,0xbfee81e6473d03cd,0xc00885972ca1699b,2 +np.float64,0x7fcfb11a373f6233,0x408623180b8f75d9,2 +np.float64,0xbfcb9c4bfd373898,0xbfcf19bd25881928,2 +np.float64,0x7feaec5885f5d8b0,0x40862ce136050e6c,2 +np.float64,0x8009e17a4a53c2f5,0x8009e17a4a53c2f5,2 +np.float64,0xbfe1cceb9e6399d7,0xbfea0038bd3def20,2 +np.float64,0x8009170bd7122e18,0x8009170bd7122e18,2 +np.float64,0xb2b6f7f1656df,0xb2b6f7f1656df,2 +np.float64,0x3fc75bfd1f2eb7f8,0x3fc574c858332265,2 +np.float64,0x3fa24c06ec249800,0x3fa1fa462ffcb8ec,2 +np.float64,0xaa9a4d2d5534a,0xaa9a4d2d5534a,2 +np.float64,0xbfd7b76208af6ec4,0xbfdda0c3200dcc9f,2 +np.float64,0x7f8cbab73039756d,0x40860c20cba57a94,2 +np.float64,0x3fdbcf9f48b79f3f,0x3fd71827a60e8b6d,2 +np.float64,0xbfdd60f71a3ac1ee,0xbfe3a94bc8cf134d,2 +np.float64,0xb9253589724a7,0xb9253589724a7,2 +np.float64,0xbfcf28e37e3e51c8,0xbfd1da9977b741e3,2 +np.float64,0x80011457f7e228b1,0x80011457f7e228b1,2 +np.float64,0x7fec33df737867be,0x40862d404a897122,2 +np.float64,0xae55f8f95cabf,0xae55f8f95cabf,2 +np.float64,0xbfc1ab9397235728,0xbfc303e5533d4a5f,2 +np.float64,0x7fef0f84b3be1f08,0x40862e05f9ba7118,2 +np.float64,0x7fdc94f328b929e5,0x408627d01449d825,2 +np.float64,0x3fee1b598c7c36b3,0x3fe53847be166834,2 +np.float64,0x3fee8326f37d064e,0x3fe56d96f3fbcf43,2 +np.float64,0x3fe7b18a83ef6316,0x3fe1bb6a6d48c675,2 +np.float64,0x3fe5db969c6bb72e,0x3fe0a8d7d151996c,2 +np.float64,0x3e3391d27c673,0x3e3391d27c673,2 +np.float64,0x3fe79a46d76f348e,0x3fe1ae09a96ea628,2 +np.float64,0x7ff4000000000000,0x7ffc000000000000,2 +np.float64,0x7fe57d6505aafac9,0x40862b13925547f1,2 +np.float64,0x3fc433371d28666e,0x3fc2c196a764c47b,2 +np.float64,0x8008dbf69cd1b7ee,0x8008dbf69cd1b7ee,2 +np.float64,0xbfe744f459ee89e8,0xbff4c847ad3ee152,2 +np.float64,0x80098aa245331545,0x80098aa245331545,2 +np.float64,0x6747112ece8e3,0x6747112ece8e3,2 +np.float64,0x5d342a40ba69,0x5d342a40ba69,2 +np.float64,0xf7a17739ef42f,0xf7a17739ef42f,2 +np.float64,0x3fe1b34a9d236695,0x3fdc2d7c4e2c347a,2 +np.float64,0x7fb53bf5ec2a77eb,0x40861a585ec8f7ff,2 +np.float64,0xbfe6256f1cec4ade,0xbff2d89a36be65ae,2 +np.float64,0xb783bc9b6f078,0xb783bc9b6f078,2 +np.float64,0xbfedf74a3bfbee94,0xc0060bb6f2bc11ef,2 +np.float64,0x3fda2a5eccb454be,0x3fd5efd7f18b8e81,2 +np.float64,0xbfb3838ab2270718,0xbfb44c337fbca3c3,2 +np.float64,0x3fb4ac6dc22958e0,0x3fb3e194ca01a502,2 +np.float64,0x76c11aaaed824,0x76c11aaaed824,2 +np.float64,0x80025bb1af04b764,0x80025bb1af04b764,2 +np.float64,0x3fdc02740ab804e8,0x3fd73b8cd6f95f19,2 +np.float64,0x3fe71856f5ee30ae,0x3fe162e9fafb4428,2 +np.float64,0x800236f332646de7,0x800236f332646de7,2 +np.float64,0x7fe13fd9d2e27fb3,0x408629516b42a317,2 +np.float64,0x7fdf6bbd34bed779,0x40862892069d805c,2 +np.float64,0x3fd4727beba8e4f8,0x3fd1be5b48d9e282,2 +np.float64,0x800e0fac9e5c1f59,0x800e0fac9e5c1f59,2 +np.float64,0xfb54423ff6a89,0xfb54423ff6a89,2 +np.float64,0x800fbf7ed47f7efe,0x800fbf7ed47f7efe,2 +np.float64,0x3fe9d41fa2f3a840,0x3fe2ef98dc1fd463,2 +np.float64,0x800d733e805ae67d,0x800d733e805ae67d,2 +np.float64,0x3feebe4c46fd7c98,0x3fe58bcf7f47264e,2 +np.float64,0x7fe1ab77b5e356ee,0x40862982bb3dce34,2 +np.float64,0xbfdddac05abbb580,0xbfe41aa45f72d5a2,2 +np.float64,0x3fe14219dee28434,0x3fdb9b137d1f1220,2 +np.float64,0x3fe25d3d5a24ba7b,0x3fdd06e1cf32d35a,2 +np.float64,0x8000fa4fbe81f4a0,0x8000fa4fbe81f4a0,2 +np.float64,0x3fe303e23e6607c4,0x3fddd94982efa9f1,2 +np.float64,0x3fe89cf5d83139ec,0x3fe24193a2e12f75,2 +np.float64,0x3fe9b36ef87366de,0x3fe2dd7cdc25a4a5,2 +np.float64,0xbfdb8b38f8371672,0xbfe2023ba7e002bb,2 +np.float64,0xafc354955f86b,0xafc354955f86b,2 +np.float64,0xbfe2f3d49e65e7a9,0xbfecb557a94123d3,2 +np.float64,0x800496617c092cc4,0x800496617c092cc4,2 +np.float64,0x32db0cfa65b62,0x32db0cfa65b62,2 +np.float64,0xbfd893bfa2b12780,0xbfdf02a8c1e545aa,2 +np.float64,0x7fd5ac927d2b5924,0x408625997e7c1f9b,2 +np.float64,0x3fde9defb8bd3be0,0x3fd9056190986349,2 +np.float64,0x80030cfeb54619fe,0x80030cfeb54619fe,2 +np.float64,0x3fcba85b273750b8,0x3fc90a5ca976594f,2 +np.float64,0x3fe98f6f5cf31edf,0x3fe2c97fcb4eca25,2 +np.float64,0x3fe33dbf90667b80,0x3fde21b83321b993,2 +np.float64,0x3fe4686636e8d0cc,0x3fdf928cdca751b3,2 +np.float64,0x80018ade6ce315be,0x80018ade6ce315be,2 +np.float64,0x7fa9af70c8335ee1,0x408616528cd5a906,2 +np.float64,0x3fbeb460aa3d68c0,0x3fbcff96b00a2193,2 +np.float64,0x7fa82c869830590c,0x408615d6598d9368,2 +np.float64,0xd08c0e6fa1182,0xd08c0e6fa1182,2 +np.float64,0x3fef4eb750fe9d6f,0x3fe5d522fd4e7f64,2 +np.float64,0xbfc586f5492b0dec,0xbfc791eaae92aad1,2 +np.float64,0x7fede64ac7bbcc95,0x40862db7f444fa7b,2 +np.float64,0x3fe540003d6a8000,0x3fe04bdfc2916a0b,2 +np.float64,0x8009417fe6f28300,0x8009417fe6f28300,2 +np.float64,0x3fe6959cf16d2b3a,0x3fe116a1ce01887b,2 +np.float64,0x3fb0a40036214800,0x3fb01f447778219a,2 +np.float64,0x3feff26e91ffe4dd,0x3fe627798fc859a7,2 +np.float64,0x7fed8e46cd7b1c8d,0x40862da044a1d102,2 +np.float64,0x7fec4eb774f89d6e,0x40862d47e43edb53,2 +np.float64,0x3fe800e5e07001cc,0x3fe1e8e2b9105fc2,2 +np.float64,0x800f4eb2f9be9d66,0x800f4eb2f9be9d66,2 +np.float64,0x800611659bcc22cc,0x800611659bcc22cc,2 +np.float64,0x3fd66e65d2acdccc,0x3fd33ad63a5e1000,2 +np.float64,0x800a9085b7f5210c,0x800a9085b7f5210c,2 +np.float64,0x7fdf933a3fbf2673,0x4086289c0e292f2b,2 +np.float64,0x1cd1ba7a39a38,0x1cd1ba7a39a38,2 +np.float64,0xbfefd0b10fffa162,0xc0149ded900ed851,2 +np.float64,0xbfe8c63485b18c69,0xbff7cf3078b1574f,2 +np.float64,0x3fecde56ca79bcae,0x3fe4934afbd7dda9,2 +np.float64,0x8006cd6888cd9ad2,0x8006cd6888cd9ad2,2 +np.float64,0x3fd7a391c2af4724,0x3fd41e2f74df2329,2 +np.float64,0x3fe6a8ad58ed515a,0x3fe121ccfb28e6f5,2 +np.float64,0x7fe18a80dd631501,0x40862973c09086b9,2 +np.float64,0xbf74fd6d8029fb00,0xbf750b3e368ebe6b,2 +np.float64,0x3fdd35e93dba6bd4,0x3fd810071faaffad,2 +np.float64,0x3feb0d8f57361b1f,0x3fe39b3abdef8b7a,2 +np.float64,0xbfd5ec7288abd8e6,0xbfdad764df0d2ca1,2 +np.float64,0x7fdc848272b90904,0x408627cb78f3fb9e,2 +np.float64,0x800ed3eda91da7db,0x800ed3eda91da7db,2 +np.float64,0x3fefac64857f58c9,0x3fe60459dbaad1ba,2 +np.float64,0x3fd1df7a5ba3bef4,0x3fcf864a39b926ff,2 +np.float64,0xfe26ca4bfc4da,0xfe26ca4bfc4da,2 +np.float64,0xbfd1099f8da21340,0xbfd3cf6e6efe934b,2 +np.float64,0xbfe15de9a7a2bbd4,0xbfe909cc895f8795,2 +np.float64,0x3fe89714ed712e2a,0x3fe23e40d31242a4,2 +np.float64,0x800387113e470e23,0x800387113e470e23,2 +np.float64,0x3fe4f80730e9f00e,0x3fe0208219314cf1,2 +np.float64,0x2f95a97c5f2b6,0x2f95a97c5f2b6,2 +np.float64,0x800ea7cdd87d4f9c,0x800ea7cdd87d4f9c,2 +np.float64,0xbf64b967c0297300,0xbf64c020a145b7a5,2 +np.float64,0xbfc5a91a342b5234,0xbfc7bafd77a61d81,2 +np.float64,0xbfe2226fe76444e0,0xbfeac33eb1d1b398,2 +np.float64,0x3fc6aaa8d42d5552,0x3fc4de79f5c68cd4,2 +np.float64,0x3fe54fd4c1ea9faa,0x3fe05561a9a5922b,2 +np.float64,0x80029c1f75653840,0x80029c1f75653840,2 +np.float64,0xbfcb4a84a2369508,0xbfceb1a23bac3995,2 +np.float64,0x80010abeff02157f,0x80010abeff02157f,2 +np.float64,0x7f92d12cf825a259,0x40860e49bde3a5b6,2 +np.float64,0x800933e7027267ce,0x800933e7027267ce,2 +np.float64,0x3fc022b12e204562,0x3fbe64acc53ed887,2 +np.float64,0xbfe35f938de6bf27,0xbfedc1f3e443c016,2 +np.float64,0x1f8d9bae3f1b4,0x1f8d9bae3f1b4,2 +np.float64,0x3fe552f22ceaa5e4,0x3fe057404072350f,2 +np.float64,0xbfa73753442e6ea0,0xbfa7c24a100190f1,2 +np.float64,0x7fb3e2982827c52f,0x408619d1efa676b6,2 +np.float64,0xbfd80cb7a5301970,0xbfde28e65f344f33,2 +np.float64,0xbfcde835973bd06c,0xbfd10806fba46c8f,2 +np.float64,0xbfd4e3c749a9c78e,0xbfd949aff65de39c,2 +np.float64,0x3fcb4b9d6f36973b,0x3fc8be02ad6dc0d3,2 +np.float64,0x1a63000034c7,0x1a63000034c7,2 +np.float64,0x7fdc9c751e3938e9,0x408627d22df71959,2 +np.float64,0x3fd74f3f712e9e7f,0x3fd3e07df0c37ec1,2 +np.float64,0xbfceab74d33d56e8,0xbfd187e99bf82903,2 +np.float64,0x7ff0000000000000,0x7ff0000000000000,2 +np.float64,0xbfb2cca466259948,0xbfb3868208e8de30,2 +np.float64,0x800204688b8408d2,0x800204688b8408d2,2 +np.float64,0x3e4547407c8aa,0x3e4547407c8aa,2 +np.float64,0xbfe4668846e8cd10,0xbff03c85189f3818,2 +np.float64,0x800dd350245ba6a0,0x800dd350245ba6a0,2 +np.float64,0xbfbc13c160382780,0xbfbdbd56ce996d16,2 +np.float64,0x7fe25a628a24b4c4,0x408629d06eb2d64d,2 +np.float64,0x3fd19dabbc233b57,0x3fcf1f3ed1d34c8c,2 +np.float64,0x547e20faa8fc5,0x547e20faa8fc5,2 +np.float64,0xbfe19392c6232726,0xbfe97ffe4f303335,2 +np.float64,0x3f87f9f6702ff400,0x3f87d64fb471bb04,2 +np.float64,0x9dfc52db3bf8b,0x9dfc52db3bf8b,2 +np.float64,0x800e1f5a9adc3eb5,0x800e1f5a9adc3eb5,2 +np.float64,0xbfddbd09c8bb7a14,0xbfe3fed7d7cffc70,2 +np.float64,0xbfeda71af87b4e36,0xc004e6631c514544,2 +np.float64,0xbfdbfcfe1bb7f9fc,0xbfe266b5d4a56265,2 +np.float64,0x3fe4ee78cd69dcf2,0x3fe01abba4e81fc9,2 +np.float64,0x800f13b820de2770,0x800f13b820de2770,2 +np.float64,0x3f861e09702c3c00,0x3f85ffae83b02c4f,2 +np.float64,0xbfc0972479212e48,0xbfc1c4bf70b30cbc,2 +np.float64,0x7fef057ef57e0afd,0x40862e036479f6a9,2 +np.float64,0x8bdbabe517b76,0x8bdbabe517b76,2 +np.float64,0xbfec495417f892a8,0xc0013ade88746d18,2 +np.float64,0x3fec680ab3f8d015,0x3fe454dd304b560d,2 +np.float64,0xbfae7ce60c3cf9d0,0xbfaf6eef15bbe56b,2 +np.float64,0x3fec314124786282,0x3fe437ca06294f5a,2 +np.float64,0x7fd5ed05b82bda0a,0x408625b125518e58,2 +np.float64,0x3feac9f02f3593e0,0x3fe3768104dd5cb7,2 +np.float64,0x0,0x0,2 +np.float64,0xbfddd2abd5bba558,0xbfe41312b8ea20de,2 +np.float64,0xbfedf9558c7bf2ab,0xc00613c53e0bb33a,2 +np.float64,0x3fef245ffefe48c0,0x3fe5bfb4dfe3b7a5,2 +np.float64,0x7fe178604922f0c0,0x4086296b77d5eaef,2 +np.float64,0x10000000000000,0x10000000000000,2 +np.float64,0x7fed026766ba04ce,0x40862d7a0dc45643,2 +np.float64,0xbfde27d8c3bc4fb2,0xbfe46336b6447697,2 +np.float64,0x3fe9485d9cb290bb,0x3fe2a1e4b6419423,2 +np.float64,0xbfe27b8a7464f715,0xbfeb9382f5b16f65,2 +np.float64,0x5c34d274b869b,0x5c34d274b869b,2 +np.float64,0xbfeee0b7453dc16f,0xc00acdb46459b6e6,2 +np.float64,0x7fe3dfb4d4e7bf69,0x40862a73785fdf12,2 +np.float64,0xb4635eef68c6c,0xb4635eef68c6c,2 +np.float64,0xbfe522a2c82a4546,0xbff148912a59a1d6,2 +np.float64,0x8009ba38a9737472,0x8009ba38a9737472,2 +np.float64,0xbfc056ff3820ae00,0xbfc17b2205fa180d,2 +np.float64,0x7fe1c8b8a0239170,0x4086298feeee6133,2 +np.float64,0x3fe2d2c6b9e5a58e,0x3fdd9b907471031b,2 +np.float64,0x3fa0a161bc2142c0,0x3fa05db36f6a073b,2 +np.float64,0x3fdef4268ebde84c,0x3fd93f980794d1e7,2 +np.float64,0x800ecd9fe2fd9b40,0x800ecd9fe2fd9b40,2 +np.float64,0xbfc9fbd45e33f7a8,0xbfcd0afc47c340f6,2 +np.float64,0x3fe8c3035b718606,0x3fe2570eb65551a1,2 +np.float64,0xbfe78c4ad2ef1896,0xbff54d25b3328742,2 +np.float64,0x8006f5dcf8adebbb,0x8006f5dcf8adebbb,2 +np.float64,0x800301dca2a603ba,0x800301dca2a603ba,2 +np.float64,0xad4289e55a851,0xad4289e55a851,2 +np.float64,0x80037764f9e6eecb,0x80037764f9e6eecb,2 +np.float64,0xbfe73575b26e6aec,0xbff4abfb5e985c62,2 +np.float64,0xbfc6cb91652d9724,0xbfc91a8001b33ec2,2 +np.float64,0xbfe3a918ffe75232,0xbfee7e6e4fd34c53,2 +np.float64,0x9bc84e2b3790a,0x9bc84e2b3790a,2 +np.float64,0x7fdeec303cbdd85f,0x408628714a49d996,2 +np.float64,0x3fe1d1dcb763a3ba,0x3fdc54ce060dc7f4,2 +np.float64,0x8008ae6432b15cc9,0x8008ae6432b15cc9,2 +np.float64,0x3fd8022fa2b00460,0x3fd46322bf02a609,2 +np.float64,0xbfc55b64472ab6c8,0xbfc75d9568f462e0,2 +np.float64,0xbfe8b165437162ca,0xbff7a15e2ead645f,2 +np.float64,0x7f759330feeb3,0x7f759330feeb3,2 +np.float64,0xbfd504f68eaa09ee,0xbfd97b06c01d7473,2 +np.float64,0x54702d5aa8e06,0x54702d5aa8e06,2 +np.float64,0xbfed1779337a2ef2,0xc0032f7109ef5a51,2 +np.float64,0xe248bd4dc4918,0xe248bd4dc4918,2 +np.float64,0xbfd8c59150318b22,0xbfdf53bca6ca8b1e,2 +np.float64,0xbfe3b9d942e773b2,0xbfeea9fcad277ba7,2 +np.float64,0x800934ec127269d9,0x800934ec127269d9,2 +np.float64,0xbfbb7f535a36fea8,0xbfbd16d61b6c52b8,2 +np.float64,0xccb185a199631,0xccb185a199631,2 +np.float64,0x3fe3dda76fe7bb4e,0x3fdee83bc6094301,2 +np.float64,0xbfe0c902f5e19206,0xbfe7ca7c0e888006,2 +np.float64,0xbfefeed08cbfdda1,0xc018aadc483c8724,2 +np.float64,0x7fd0c05c52a180b8,0x40862389daf64aac,2 +np.float64,0xbfd28e3323a51c66,0xbfd5e9ba278fb685,2 +np.float64,0xbef4103b7de82,0xbef4103b7de82,2 +np.float64,0x3fe7661fd12ecc40,0x3fe18ff7dfb696e2,2 +np.float64,0x3fddd5f2f0bbabe4,0x3fd87d8bb6719c3b,2 +np.float64,0x800b3914cfd6722a,0x800b3914cfd6722a,2 +np.float64,0xf3f09a97e7e14,0xf3f09a97e7e14,2 +np.float64,0x7f97092b502e1256,0x40860fe8054cf54e,2 +np.float64,0xbfdbec7917b7d8f2,0xbfe2580b4b792c79,2 +np.float64,0x7fe7ff215aaffe42,0x40862bf5887fa062,2 +np.float64,0x80080186e570030e,0x80080186e570030e,2 +np.float64,0xbfc27f05e624fe0c,0xbfc3fa214be4adc4,2 +np.float64,0x3fe4481be1689038,0x3fdf6b11e9c4ca72,2 +np.float64,0x3fd642cc9cac8598,0x3fd31a857fe70227,2 +np.float64,0xbef8782d7df0f,0xbef8782d7df0f,2 +np.float64,0x8003077dc2e60efc,0x8003077dc2e60efc,2 +np.float64,0x80083eb5a2507d6c,0x80083eb5a2507d6c,2 +np.float64,0x800e8d1eb77d1a3e,0x800e8d1eb77d1a3e,2 +np.float64,0xbfc7737cd22ee6f8,0xbfc9e7716f03f1fc,2 +np.float64,0xbfe9a2b4ddf3456a,0xbff9d71664a8fc78,2 +np.float64,0x7fe67c7d322cf8f9,0x40862b7066465194,2 +np.float64,0x3fec080ce2b8101a,0x3fe421dac225be46,2 +np.float64,0xbfe6d27beb6da4f8,0xbff3fbb1add521f7,2 +np.float64,0x3fdd4f96ceba9f2e,0x3fd821a638986dbe,2 +np.float64,0x3fbd89f1303b13e2,0x3fbbf49223a9d002,2 +np.float64,0xbfe94e2b9d329c57,0xbff907e549c534f5,2 +np.float64,0x3fe2f2cc51e5e599,0x3fddc3d6b4a834a1,2 +np.float64,0xfdcb5b49fb96c,0xfdcb5b49fb96c,2 +np.float64,0xbfea7108fa74e212,0xbffc01b392f4897b,2 +np.float64,0x3fd38baef7a7175c,0x3fd10e7fd3b958dd,2 +np.float64,0x3fa75bf9cc2eb800,0x3fa6d792ecdedb8e,2 +np.float64,0x7fd19fd20aa33fa3,0x408623f1e2cd04c3,2 +np.float64,0x3fd62c708dac58e0,0x3fd309ec7818d16e,2 +np.float64,0x3fdf489047be9120,0x3fd978640617c758,2 +np.float64,0x1,0x1,2 +np.float64,0xbfe21e7c3ea43cf8,0xbfeaba21320697d3,2 +np.float64,0xbfd3649047a6c920,0xbfd71a6f14223744,2 +np.float64,0xbfd68ca68c2d194e,0xbfdbcce6784e5d44,2 +np.float64,0x3fdb26b0ea364d62,0x3fd6a1f86f64ff74,2 +np.float64,0xbfd843821cb08704,0xbfde80e90805ab3f,2 +np.float64,0x3fd508a27aaa1144,0x3fd22fc203a7b9d8,2 +np.float64,0xbfdb951c7eb72a38,0xbfe20aeaec13699b,2 +np.float64,0x3fef556ba57eaad7,0x3fe5d8865cce0a6d,2 +np.float64,0x3fd0d224b3a1a448,0x3fcdde7be5d7e21e,2 +np.float64,0x8007ff272baffe4f,0x8007ff272baffe4f,2 +np.float64,0x3fe1c7bddf638f7c,0x3fdc47cc6cf2f5cd,2 +np.float64,0x7ff8000000000000,0x7ff8000000000000,2 +np.float64,0x2016d560402f,0x2016d560402f,2 +np.float64,0xbfcca10be9394218,0xbfd033f36b94fc54,2 +np.float64,0xbfdb833628b7066c,0xbfe1fb344b840c70,2 +np.float64,0x3fd8529cb3b0a539,0x3fd49d847fe77218,2 +np.float64,0xbfc0b0ebab2161d8,0xbfc1e260c60ffd1b,2 +np.float64,0xbfea8b9a79f51735,0xbffc4ee6be8a0fa2,2 +np.float64,0x7feca8fab7f951f4,0x40862d613e454646,2 +np.float64,0x7fd8c52d82318a5a,0x408626aaf37423a3,2 +np.float64,0xbfe364ad4526c95a,0xbfedcee39bc93ff5,2 +np.float64,0x800b78161256f02d,0x800b78161256f02d,2 +np.float64,0xbfd55f0153aabe02,0xbfda01a78f72d494,2 +np.float64,0x800315a5f0662b4d,0x800315a5f0662b4d,2 +np.float64,0x7fe4c0dca02981b8,0x40862acc27e4819f,2 +np.float64,0x8009825c703304b9,0x8009825c703304b9,2 +np.float64,0x3fe6e94e1cadd29c,0x3fe1478ccc634f49,2 +np.float64,0x7fe622d8586c45b0,0x40862b504177827e,2 +np.float64,0x3fe4458600688b0c,0x3fdf67e79a84b953,2 +np.float64,0xbfdd75d8a1baebb2,0xbfe3bc9e6ca1bbb5,2 +np.float64,0x3fde789c6bbcf138,0x3fd8ec1d435531b3,2 +np.float64,0x3fe7052b94ee0a58,0x3fe157c5c4418dc1,2 +np.float64,0x7fef31652abe62c9,0x40862e0eaeabcfc0,2 +np.float64,0x3fe279691ee4f2d2,0x3fdd2aa41eb43cd4,2 +np.float64,0xbfd533fa95aa67f6,0xbfd9c12f516d29d7,2 +np.float64,0x3fe6d057f96da0b0,0x3fe138fd96693a6a,2 +np.float64,0x800bad984f775b31,0x800bad984f775b31,2 +np.float64,0x7fdd6fdba4badfb6,0x4086280c73d8ef97,2 +np.float64,0x7fe9b5c0eef36b81,0x40862c82c6f57a53,2 +np.float64,0x8000bc02ece17807,0x8000bc02ece17807,2 +np.float64,0xbff0000000000000,0xfff0000000000000,2 +np.float64,0xbfed430be3fa8618,0xc003aaf338c75b3c,2 +np.float64,0x3fee17b759fc2f6f,0x3fe53668696bf48b,2 +np.float64,0x3f8d4cf9d03a9a00,0x3f8d17d2f532afdc,2 +np.float64,0x8005d6257b8bac4c,0x8005d6257b8bac4c,2 +np.float64,0xbfd17a6df9a2f4dc,0xbfd469e3848adc6e,2 +np.float64,0xb28a293965145,0xb28a293965145,2 +np.float64,0xbfe7d011e42fa024,0xbff5cf818998c8ec,2 +np.float64,0xbfe74f0f136e9e1e,0xbff4dad6ebb0443c,2 +np.float64,0x800f249fc9be4940,0x800f249fc9be4940,2 +np.float64,0x2542f8fe4a860,0x2542f8fe4a860,2 +np.float64,0xc48d40cd891a8,0xc48d40cd891a8,2 +np.float64,0x3fe4e64bc8e9cc98,0x3fe015c9eb3caa53,2 +np.float64,0x3fd33881eca67104,0x3fd0cea886be2457,2 +np.float64,0xbfd01748fba02e92,0xbfd28875959e6901,2 +np.float64,0x7fb7ab01f22f5603,0x40861b369927bf53,2 +np.float64,0xbfe340274ce6804e,0xbfed72b39f0ebb24,2 +np.float64,0x7fc16c0c3422d817,0x40861e4eaf1a286c,2 +np.float64,0x3fc26944a324d288,0x3fc133a77b356ac4,2 +np.float64,0xa149d7134293b,0xa149d7134293b,2 +np.float64,0x800837382d106e71,0x800837382d106e71,2 +np.float64,0x797d1740f2fa4,0x797d1740f2fa4,2 +np.float64,0xc3f15b7787e2c,0xc3f15b7787e2c,2 +np.float64,0x80cad1b90195a,0x80cad1b90195a,2 +np.float64,0x3fdd8f1142bb1e23,0x3fd84d21490d1ce6,2 +np.float64,0xbfbde6c9123bcd90,0xbfbfcc030a86836a,2 +np.float64,0x8007f77e032feefd,0x8007f77e032feefd,2 +np.float64,0x3fe74fed1c6e9fda,0x3fe18322cf19cb61,2 +np.float64,0xbfd8a40bbcb14818,0xbfdf1d23520ba74b,2 +np.float64,0xbfeb7a0e6076f41d,0xbfff4ddfb926efa5,2 +np.float64,0xbfcb8c5f663718c0,0xbfcf0570f702bda9,2 +np.float64,0xf668cd97ecd1a,0xf668cd97ecd1a,2 +np.float64,0xbfe92accf572559a,0xbff8b4393878ffdb,2 +np.float64,0xbfeaa955567552ab,0xbffca70c7d73eee5,2 +np.float64,0xbfe083a14f610742,0xbfe739d84bc35077,2 +np.float64,0x78290568f0521,0x78290568f0521,2 +np.float64,0x3fe94bae2372975c,0x3fe2a3beac5c9858,2 +np.float64,0x3fca4fbab9349f78,0x3fc7edbca2492acb,2 +np.float64,0x8000000000000000,0x8000000000000000,2 +np.float64,0x7fb9eb505433d6a0,0x40861bf0adedb74d,2 +np.float64,0x7fdc66f72a38cded,0x408627c32aeecf0f,2 +np.float64,0x2e8e6f445d1cf,0x2e8e6f445d1cf,2 +np.float64,0xbfec43195af88633,0xc0012d7e3f91b7e8,2 +np.float64,0x7fcdb971e93b72e3,0x40862294c9e3a7bc,2 +np.float64,0x800cabc461195789,0x800cabc461195789,2 +np.float64,0x2c79709c58f2f,0x2c79709c58f2f,2 +np.float64,0x8005d772d3cbaee6,0x8005d772d3cbaee6,2 +np.float64,0x3fe84d8c03709b18,0x3fe21490ce3673dd,2 +np.float64,0x7fe5578adc2aaf15,0x40862b056e8437d4,2 +np.float64,0xbf91298c58225320,0xbf914ec86c32d11f,2 +np.float64,0xc7ed2b6d8fda6,0xc7ed2b6d8fda6,2 +np.float64,0x2761404c4ec29,0x2761404c4ec29,2 +np.float64,0x3fbad3c48835a789,0x3fb9833c02385305,2 +np.float64,0x3fa46fee5428dfe0,0x3fa40a357fb24c23,2 +np.float64,0xbfe3900c6fe72019,0xbfee3dba29dd9d43,2 +np.float64,0x3fe7a9e41a6f53c8,0x3fe1b704dfb9884b,2 +np.float64,0xbfe74a7a1eee94f4,0xbff4d269cacb1f29,2 +np.float64,0xbfee609c72fcc139,0xc007da8499d34123,2 +np.float64,0x3fef2d5fc23e5ac0,0x3fe5c44414e59cb4,2 +np.float64,0xbfd7bdc0402f7b80,0xbfddaae1e7bb78fb,2 +np.float64,0xd71ee01dae3dc,0xd71ee01dae3dc,2 +np.float64,0x3fe98cbcdef3197a,0x3fe2c7ffe33c4541,2 +np.float64,0x8000f8dbb3a1f1b8,0x8000f8dbb3a1f1b8,2 +np.float64,0x3fe3e98ad567d316,0x3fdef6e58058313f,2 +np.float64,0x41ad0bfc835a2,0x41ad0bfc835a2,2 +np.float64,0x7fdcc2dc0d3985b7,0x408627dce39f77af,2 +np.float64,0xbfe47b980de8f730,0xbff059acdccd6e2b,2 +np.float64,0xbfef49b6577e936d,0xc00e714f46b2ccc1,2 +np.float64,0x3fac31816c386300,0x3fab71cb92b0db8f,2 +np.float64,0x3fe59097e76b2130,0x3fe07c299fd1127c,2 +np.float64,0xbfecf0df5cf9e1bf,0xc002c7ebdd65039c,2 +np.float64,0x3fd2b7d0b6a56fa1,0x3fd06b638990ae02,2 +np.float64,0xbfeb68deecf6d1be,0xbfff1187e042d3e4,2 +np.float64,0x3fd44a9771a8952f,0x3fd1a01867c5e302,2 +np.float64,0xf79a9dedef354,0xf79a9dedef354,2 +np.float64,0x800c25a170d84b43,0x800c25a170d84b43,2 +np.float64,0x3ff0000000000000,0x3fe62e42fefa39ef,2 +np.float64,0x3fbff4f7623fe9f0,0x3fbe1d3878f4c417,2 +np.float64,0xd284c845a5099,0xd284c845a5099,2 +np.float64,0xbfe3c7815f678f02,0xbfeecdab5ca2e651,2 +np.float64,0x3fc19c934e233927,0x3fc08036104b1f23,2 +np.float64,0x800b6096de16c12e,0x800b6096de16c12e,2 +np.float64,0xbfe962a67e32c54d,0xbff9392313a112a1,2 +np.float64,0x2b9d0116573a1,0x2b9d0116573a1,2 +np.float64,0x3fcab269ed3564d4,0x3fc83f7e1c3095b7,2 +np.float64,0x3fc8c78d86318f1b,0x3fc6a6cde5696f99,2 +np.float64,0xd5b1e9b5ab63d,0xd5b1e9b5ab63d,2 +np.float64,0xbfed802a47fb0054,0xc00465cad3b5b0ef,2 +np.float64,0xbfd73aaf08ae755e,0xbfdcdbd62b8af271,2 +np.float64,0xbfd4f13c0229e278,0xbfd95dacff79e570,2 +np.float64,0xbfe9622808f2c450,0xbff937f13c397e8d,2 +np.float64,0xbfeddfa62efbbf4c,0xc005b0c835eed829,2 +np.float64,0x3fd65663d4acacc8,0x3fd3290cd0e675dc,2 +np.float64,0x8005e890f1abd123,0x8005e890f1abd123,2 +np.float64,0xbfe924919fb24923,0xbff8a5a827a28756,2 +np.float64,0x3fe8cdf490719be9,0x3fe25d39535e8366,2 +np.float64,0x7fc229e6ff2453cd,0x40861ea40ef87a5a,2 +np.float64,0x3fe5cf53ceeb9ea8,0x3fe0a18e0b65f27e,2 +np.float64,0xa79cf6fb4f39f,0xa79cf6fb4f39f,2 +np.float64,0x7fddbb3c0f3b7677,0x40862820d5edf310,2 +np.float64,0x3e1011de7c203,0x3e1011de7c203,2 +np.float64,0x3fc0b59a83216b38,0x3fbf6916510ff411,2 +np.float64,0x8647f98d0c8ff,0x8647f98d0c8ff,2 +np.float64,0x8005dad33ecbb5a7,0x8005dad33ecbb5a7,2 +np.float64,0x8a80d0631501a,0x8a80d0631501a,2 +np.float64,0xbfe18f7d6ee31efb,0xbfe976f06713afc1,2 +np.float64,0xbfe06eaed560dd5e,0xbfe70eac696933e6,2 +np.float64,0xbfed8ef93c7b1df2,0xc00495bfa3195b53,2 +np.float64,0x3febe9c24677d385,0x3fe411b10db16c42,2 +np.float64,0x7fd5d80c1fabb017,0x408625a97a7787ba,2 +np.float64,0x3fca79b59334f368,0x3fc8108a521341dc,2 +np.float64,0xbfccf8db4339f1b8,0xbfd06c9a5424aadb,2 +np.float64,0xbfea5ac5a574b58b,0xbffbc21d1405d840,2 +np.float64,0x800ce2bf4b19c57f,0x800ce2bf4b19c57f,2 +np.float64,0xbfe8df896d31bf13,0xbff807ab38ac41ab,2 +np.float64,0x3feab83da9f5707c,0x3fe36cdd827c0eff,2 +np.float64,0x3fee717683bce2ed,0x3fe564879171719b,2 +np.float64,0x80025e5577c4bcac,0x80025e5577c4bcac,2 +np.float64,0x3fe3e5378e67ca70,0x3fdef1902c5d1efd,2 +np.float64,0x3fa014bb7c202980,0x3f9faacf9238d499,2 +np.float64,0x3fddbf5e16bb7ebc,0x3fd86e2311cb0f6d,2 +np.float64,0x3fd24e50e6a49ca0,0x3fd0198f04f82186,2 +np.float64,0x656b5214cad6b,0x656b5214cad6b,2 +np.float64,0x8b0a4bfd1614a,0x8b0a4bfd1614a,2 +np.float64,0xbfeeb6bd9e7d6d7b,0xc009b669285e319e,2 +np.float64,0x8000000000000001,0x8000000000000001,2 +np.float64,0xbfe719feceee33fe,0xbff47a4c8cbf0cca,2 +np.float64,0xbfd14fa8c8a29f52,0xbfd42f27b1aced39,2 +np.float64,0x7fec9dcb80f93b96,0x40862d5e1e70bbb9,2 +np.float64,0x7fecacb826f9596f,0x40862d6249746915,2 +np.float64,0x973459f52e68b,0x973459f52e68b,2 +np.float64,0x7f40a59e00214b3b,0x4085f194f45f82b1,2 +np.float64,0x7fc5dbaec32bb75d,0x4086201f3e7065d9,2 +np.float64,0x82d0801305a10,0x82d0801305a10,2 +np.float64,0x7fec81c0f4790381,0x40862d5643c0fc85,2 +np.float64,0xbfe2d81e9ee5b03d,0xbfec71a8e864ea40,2 +np.float64,0x6c545c9ad8a8c,0x6c545c9ad8a8c,2 +np.float64,0x3f9be95a5037d2b5,0x3f9b89b48ac8f5d8,2 +np.float64,0x8000cae9702195d4,0x8000cae9702195d4,2 +np.float64,0xbfd375f45126ebe8,0xbfd733677e54a80d,2 +np.float64,0x3fd29a5b81a534b7,0x3fd05494bf200278,2 +np.float64,0xfff0000000000000,0xfff8000000000000,2 +np.float64,0x7fca8fc195351f82,0x408621ae61aa6c13,2 +np.float64,0x1b28e2ae3651d,0x1b28e2ae3651d,2 +np.float64,0x3fe7fdbd14effb7a,0x3fe1e714884b46a8,2 +np.float64,0x3fdf1ce068be39c0,0x3fd95b054e0fad3d,2 +np.float64,0x3fe79f9a636f3f34,0x3fe1b11a40c00b3e,2 +np.float64,0x3fe60eb7036c1d6e,0x3fe0c72a02176874,2 +np.float64,0x229da17e453b5,0x229da17e453b5,2 +np.float64,0x3fc1a921b5235240,0x3fc08b3f35e47fb1,2 +np.float64,0xbb92d2af7725b,0xbb92d2af7725b,2 +np.float64,0x3fe4110cb1e8221a,0x3fdf2787de6c73f7,2 +np.float64,0xbfbc87771a390ef0,0xbfbe3f6e95622363,2 +np.float64,0xbfe74025dfee804c,0xbff4bf7b1895e697,2 +np.float64,0x964eb6592c9d7,0x964eb6592c9d7,2 +np.float64,0x3f951689b82a2d00,0x3f94dfb38d746fdf,2 +np.float64,0x800356271be6ac4f,0x800356271be6ac4f,2 +np.float64,0x7fefffffffffffff,0x40862e42fefa39ef,2 +np.float64,0xbfed5ce250fab9c5,0xc003f7ddfeb94345,2 +np.float64,0x3fec3d5dc1387abc,0x3fe43e39c02d86f4,2 +np.float64,0x3999897e73332,0x3999897e73332,2 +np.float64,0xbfdcb57744b96aee,0xbfe30c4b98f3d088,2 +np.float64,0x7f961fb0b82c3f60,0x40860f9549c3a380,2 +np.float64,0x67d6efcacfadf,0x67d6efcacfadf,2 +np.float64,0x8002c9498f859294,0x8002c9498f859294,2 +np.float64,0xbfa3033800260670,0xbfa35fe3bf43e188,2 +np.float64,0xbfeab2fc157565f8,0xbffcc413c486b4eb,2 +np.float64,0x3fe25e62f364bcc6,0x3fdd0856e19e3430,2 +np.float64,0x7fb2f42dda25e85b,0x4086196fb34a65fd,2 +np.float64,0x3fe0f1a5af61e34c,0x3fdb3235a1786efb,2 +np.float64,0x800a340ca1f4681a,0x800a340ca1f4681a,2 +np.float64,0x7c20b9def8418,0x7c20b9def8418,2 +np.float64,0xdf0842a1be109,0xdf0842a1be109,2 +np.float64,0x3fe9f22cc2f3e45a,0x3fe300359b842bf0,2 +np.float64,0x3fe389ed73e713da,0x3fde809780fe4432,2 +np.float64,0x9500fb932a020,0x9500fb932a020,2 +np.float64,0x3fd8a21ffdb14440,0x3fd4d70862345d86,2 +np.float64,0x800d99c15cbb3383,0x800d99c15cbb3383,2 +np.float64,0x3fd96c98c932d932,0x3fd568959c9b028f,2 +np.float64,0x7fc228483a24508f,0x40861ea358420976,2 +np.float64,0x7fc6737bef2ce6f7,0x408620560ffc6a98,2 +np.float64,0xbfb2c27cee2584f8,0xbfb37b8cc7774b5f,2 +np.float64,0xbfd18409f9230814,0xbfd4771d1a9a24fb,2 +np.float64,0x3fb53cb3f42a7968,0x3fb466f06f88044b,2 +np.float64,0x3fef61d0187ec3a0,0x3fe5dec8a9d13dd9,2 +np.float64,0x3fe59a6ffd2b34e0,0x3fe0820a99c6143d,2 +np.float64,0x3fce18aff43c3160,0x3fcb07c7b523f0d1,2 +np.float64,0xbfb1319a62226338,0xbfb1cc62f31b2b40,2 +np.float64,0xa00cce6d4019a,0xa00cce6d4019a,2 +np.float64,0x80068ae8e0ed15d3,0x80068ae8e0ed15d3,2 +np.float64,0x3fecef353239de6a,0x3fe49c280adc607b,2 +np.float64,0x3fdf1a7fb0be34ff,0x3fd9596bafe2d766,2 +np.float64,0x3feb5e12eeb6bc26,0x3fe3c6be3ede8d07,2 +np.float64,0x3fdeff5cd43dfeba,0x3fd947262ec96b05,2 +np.float64,0x3f995e75e832bd00,0x3f990f511f4c7f1c,2 +np.float64,0xbfeb5b3ed0b6b67e,0xbffee24fc0fc2881,2 +np.float64,0x7fb82aad0a305559,0x40861b614d901182,2 +np.float64,0xbfe5c3a4926b8749,0xbff23cd0ad144fe6,2 +np.float64,0x3fef47da373e8fb4,0x3fe5d1aaa4031993,2 +np.float64,0x7fc6a8c3872d5186,0x40862068f5ca84be,2 +np.float64,0x7fc0c2276221844e,0x40861dff2566d001,2 +np.float64,0x7fc9ce7d28339cf9,0x40862173541f84d1,2 +np.float64,0x3fce2c34933c5869,0x3fcb179428ad241d,2 +np.float64,0xbfcf864c293f0c98,0xbfd21872c4821cfc,2 +np.float64,0x3fc51fd1f82a3fa4,0x3fc38d4f1685c166,2 +np.float64,0xbfe2707b70a4e0f7,0xbfeb795fbd5bb444,2 +np.float64,0x46629b568cc54,0x46629b568cc54,2 +np.float64,0x7fe5f821f32bf043,0x40862b40c2cdea3f,2 +np.float64,0x3fedd2c9457ba592,0x3fe512ce92394526,2 +np.float64,0x7fe6dcb8ceadb971,0x40862b925a7dc05d,2 +np.float64,0x3fd1b983b4a37307,0x3fcf4ae2545cf64e,2 +np.float64,0xbfe1c93104639262,0xbfe9f7d28e4c0c82,2 +np.float64,0x995ebc2932bd8,0x995ebc2932bd8,2 +np.float64,0x800a4c3ee614987e,0x800a4c3ee614987e,2 +np.float64,0x3fbb58766e36b0f0,0x3fb9fb3b9810ec16,2 +np.float64,0xbfe36d636666dac7,0xbfede5080f69053c,2 +np.float64,0x3f4feee1003fddc2,0x3f4feae5f05443d1,2 +np.float64,0x3fed0b772ffa16ee,0x3fe4aafb924903c6,2 +np.float64,0x800bb3faef3767f6,0x800bb3faef3767f6,2 +np.float64,0x3fe285cda5e50b9c,0x3fdd3a58df06c427,2 +np.float64,0x7feb9d560bb73aab,0x40862d152362bb94,2 +np.float64,0x3fecd1f447f9a3e9,0x3fe48cc78288cb3f,2 +np.float64,0x3fca927b0c3524f6,0x3fc8250f49ba28df,2 +np.float64,0x7fcc19944e383328,0x40862221b02fcf43,2 +np.float64,0xbfd8ddf41db1bbe8,0xbfdf7b92073ff2fd,2 +np.float64,0x80006fe736e0dfcf,0x80006fe736e0dfcf,2 +np.float64,0x800bbeb66d577d6d,0x800bbeb66d577d6d,2 +np.float64,0xbfe4329353e86526,0xbfefeaf19ab92b42,2 +np.float64,0x2fad72805f5af,0x2fad72805f5af,2 +np.float64,0x3fe1b827aa637050,0x3fdc33bf46012c0d,2 +np.float64,0x3fc3f3f8e227e7f2,0x3fc28aeb86d65278,2 +np.float64,0x3fec018933780312,0x3fe41e619aa4285c,2 +np.float64,0xbfd92428e0b24852,0xbfdfeecb08d154df,2 +np.float64,0x2d7046845ae0a,0x2d7046845ae0a,2 +np.float64,0x7fde7fd2233cffa3,0x408628550f8a948f,2 +np.float64,0x8000a32cd241465a,0x8000a32cd241465a,2 +np.float64,0x8004267a45084cf5,0x8004267a45084cf5,2 +np.float64,0xbfe6b422556d6844,0xbff3c71f67661e6e,2 +np.float64,0x3fe3a37d922746fb,0x3fdea04e04d6195c,2 +np.float64,0xbfddcc54b53b98aa,0xbfe40d2389cdb848,2 +np.float64,0x3fe18b4b92a31697,0x3fdbf9e68cbf5794,2 +np.float64,0x7fc9c5b2ee338b65,0x408621709a17a47a,2 +np.float64,0x1ebd1ce03d7b,0x1ebd1ce03d7b,2 +np.float64,0x8008a6fc39d14df9,0x8008a6fc39d14df9,2 +np.float64,0x3fec11384c782270,0x3fe426bdaedd2965,2 +np.float64,0x3fefc28344ff8507,0x3fe60f75d34fc3d2,2 +np.float64,0xc35f379786be7,0xc35f379786be7,2 +np.float64,0x3feef51f4a7dea3e,0x3fe5a7b95d7786b5,2 +np.float64,0x3fec9b9f0379373e,0x3fe4702477abbb63,2 +np.float64,0x3fde94f8cdbd29f0,0x3fd8ff50f7df0a6f,2 +np.float64,0xbfed32d1cdfa65a4,0xc0037c1470f6f979,2 +np.float64,0x800d3ba44f5a7749,0x800d3ba44f5a7749,2 +np.float64,0x3fe3c56c8fe78ad9,0x3fdeca4eb9bb8918,2 +np.float64,0xbfe7c97242ef92e4,0xbff5c2950dfd6f69,2 +np.float64,0xbd9440057b288,0xbd9440057b288,2 +np.float64,0x7feb2fc111f65f81,0x40862cf524bd2001,2 +np.float64,0x800a431e2df4863d,0x800a431e2df4863d,2 +np.float64,0x80038a3b79e71478,0x80038a3b79e71478,2 +np.float64,0x80000c93d4601928,0x80000c93d4601928,2 +np.float64,0x7fe9fec022f3fd7f,0x40862c995db8ada0,2 +np.float64,0x3fead0129c35a025,0x3fe379d7a92c8f79,2 +np.float64,0x3fdd8cbaf7bb1974,0x3fd84b87ff0c26c7,2 +np.float64,0x3fe8fb7c60b1f6f9,0x3fe276d5339e7135,2 +np.float64,0x85a255e10b44b,0x85a255e10b44b,2 +np.float64,0xbfe507c23fea0f84,0xbff1212d2260022a,2 +np.float64,0x3fc5487c7b2a90f9,0x3fc3b03222d3d148,2 +np.float64,0x7fec0bdcb8f817b8,0x40862d34e8fd11e7,2 +np.float64,0xbfc5f34b4f2be698,0xbfc8146a899c7a0c,2 +np.float64,0xbfa2a49c14254940,0xbfa2fdab2eae3826,2 +np.float64,0x800ec52f15dd8a5e,0x800ec52f15dd8a5e,2 +np.float64,0xbfe3ba4b12a77496,0xbfeeab256b3e9422,2 +np.float64,0x80034d6c7ba69ada,0x80034d6c7ba69ada,2 +np.float64,0x7fd394d4202729a7,0x408624c98a216742,2 +np.float64,0xbfd4493a38289274,0xbfd865d67af2de91,2 +np.float64,0xe47d6203c8fad,0xe47d6203c8fad,2 +np.float64,0x98eb4e4b31d6a,0x98eb4e4b31d6a,2 +np.float64,0x4507fb128a100,0x4507fb128a100,2 +np.float64,0xbfc77032e42ee064,0xbfc9e36ab747a14d,2 +np.float64,0xa1f8a03b43f14,0xa1f8a03b43f14,2 +np.float64,0xbfc3d4da8527a9b4,0xbfc58c27af2476b0,2 +np.float64,0x3fc0eb7d6921d6fb,0x3fbfc858a077ed61,2 +np.float64,0x7fddb2e9403b65d2,0x4086281e98443709,2 +np.float64,0xbfa7ea62942fd4c0,0xbfa87dfd06b05d2a,2 +np.float64,0xbfe7d5c5426fab8a,0xbff5daa969c6d9e5,2 +np.float64,0x3fbf7cba0c3ef974,0x3fbdb23cd8fe875b,2 +np.float64,0x7fe92021eb324043,0x40862c53aee8b154,2 +np.float64,0x7fefbaa1827f7542,0x40862e3194737072,2 +np.float64,0x3fc6f82c402df059,0x3fc520432cbc533f,2 +np.float64,0x7fb37679a826ecf2,0x408619a5f857e27f,2 +np.float64,0x79ec1528f3d83,0x79ec1528f3d83,2 +np.float64,0x3fbefe1d0c3dfc3a,0x3fbd41650ba2c893,2 +np.float64,0x3fc3e5e11827cbc2,0x3fc27eb9b47c9c42,2 +np.float64,0x16aed1922d5db,0x16aed1922d5db,2 +np.float64,0x800124f7e58249f1,0x800124f7e58249f1,2 +np.float64,0x8004f7d12489efa3,0x8004f7d12489efa3,2 +np.float64,0x3fef80b8e27f0172,0x3fe5ee5fd43322c6,2 +np.float64,0xbfe7740c88eee819,0xbff51f823c8da14d,2 +np.float64,0xbfe6e1f1f6edc3e4,0xbff416bcb1302e7c,2 +np.float64,0x8001a2c4a7e3458a,0x8001a2c4a7e3458a,2 +np.float64,0x3fe861e155f0c3c2,0x3fe2201d3000c329,2 +np.float64,0x3fd00a101a201420,0x3fcca01087dbd728,2 +np.float64,0x7fdf0eb1133e1d61,0x4086287a327839b8,2 +np.float64,0x95e3ffdb2bc80,0x95e3ffdb2bc80,2 +np.float64,0x3fd87a1e8230f43d,0x3fd4ba1eb9be1270,2 +np.float64,0x3fedc4792afb88f2,0x3fe50b6529080f73,2 +np.float64,0x7fc9e81fa833d03e,0x4086217b428cc6ff,2 +np.float64,0xbfd21f1ba5a43e38,0xbfd54e048b988e09,2 +np.float64,0xbfbf52af5a3ea560,0xbfc0b4ab3b81fafc,2 +np.float64,0x7fe475f8e268ebf1,0x40862aaf14fee029,2 +np.float64,0x3fcf56899f3ead10,0x3fcc081de28ae9cf,2 +np.float64,0x917d407122fa8,0x917d407122fa8,2 +np.float64,0x22e23e3245c49,0x22e23e3245c49,2 +np.float64,0xbfeec2814f3d8503,0xc00a00ecca27b426,2 +np.float64,0xbfd97fee1c32ffdc,0xbfe04351dfe306ec,2 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-log2.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-log2.csv new file mode 100644 index 0000000..179c651 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-log2.csv @@ -0,0 +1,1629 @@ +dtype,input,output,ulperrortol +np.float32,0x80000000,0xff800000,3 +np.float32,0x7f12870a,0x42fe63db,3 +np.float32,0x3ef29cf5,0xbf89eb12,3 +np.float32,0x3d6ba8fb,0xc083d26c,3 +np.float32,0x3d9907e8,0xc06f8230,3 +np.float32,0x4ee592,0xc2fd656e,3 +np.float32,0x58d8b1,0xc2fd0db3,3 +np.float32,0x7ba103,0xc2fc19aa,3 +np.float32,0x7f52e90e,0x42ff70e4,3 +np.float32,0x7fcb15,0xc2fc0132,3 +np.float32,0x7cb7129f,0x42f50855,3 +np.float32,0x9faba,0xc301ae59,3 +np.float32,0x7f300a,0xc2fc04b4,3 +np.float32,0x3f0bf047,0xbf5f10cb,3 +np.float32,0x2fb1fb,0xc2fed934,3 +np.float32,0x3eedb0d1,0xbf8db417,3 +np.float32,0x3d7a0b40,0xc0811638,3 +np.float32,0x2e0bac,0xc2fef334,3 +np.float32,0x6278c1,0xc2fcc1b9,3 +np.float32,0x7f61ab2e,0x42ffa2d9,3 +np.float32,0x8fe7c,0xc301d4be,3 +np.float32,0x3f25e6ee,0xbf203536,3 +np.float32,0x7efc78f0,0x42fdf5c0,3 +np.float32,0x6d7304,0xc2fc73a7,3 +np.float32,0x7f1a472a,0x42fe89ed,3 +np.float32,0x7dd029a6,0x42f96734,3 +np.float32,0x3e9b9327,0xbfdbf8f7,3 +np.float32,0x3f4eefc1,0xbe9d2942,3 +np.float32,0x7f5b9b64,0x42ff8ebc,3 +np.float32,0x3e458ee1,0xc017ed6e,3 +np.float32,0x3f7b766b,0xbcd35acf,3 +np.float32,0x3e616070,0xc00bc378,3 +np.float32,0x7f20e633,0x42fea8f8,3 +np.float32,0x3ee3b461,0xbf95a126,3 +np.float32,0x7e7722ba,0x42fbe5f8,3 +np.float32,0x3f0873d7,0xbf6861fa,3 +np.float32,0x7b4cb2,0xc2fc1ba3,3 +np.float32,0x3f0b6b02,0xbf60712e,3 +np.float32,0x9bff4,0xc301b6f2,3 +np.float32,0x3f07be25,0xbf6a4f0c,3 +np.float32,0x3ef10e57,0xbf8b1b75,3 +np.float32,0x46ad75,0xc2fdb6b1,3 +np.float32,0x3f7bc542,0xbcc4e3a9,3 +np.float32,0x3f6673d4,0xbe1b509c,3 +np.float32,0x7f19fe59,0x42fe8890,3 +np.float32,0x7f800000,0x7f800000,3 +np.float32,0x7f2fe696,0x42feead0,3 +np.float32,0x3dc9432d,0xc0563655,3 +np.float32,0x3ee47623,0xbf950446,3 +np.float32,0x3f1f8817,0xbf2eab51,3 +np.float32,0x7f220ec5,0x42feae44,3 +np.float32,0x2325e3,0xc2ffbab1,3 +np.float32,0x29dfc8,0xc2ff395a,3 +np.float32,0x7f524950,0x42ff6eb3,3 +np.float32,0x3e2234e0,0xc02a21c8,3 +np.float32,0x7f1c6f5a,0x42fe942f,3 +np.float32,0x3b6a61,0xc2fe36e7,3 +np.float32,0x3f1df90e,0xbf324ba9,3 +np.float32,0xb57f0,0xc3017f07,3 +np.float32,0x7d0eba,0xc2fc112e,3 +np.float32,0x403aa9,0xc2fdfd5c,3 +np.float32,0x3e74ecc7,0xc004155f,3 +np.float32,0x17509c,0xc30074f2,3 +np.float32,0x7f62196b,0x42ffa442,3 +np.float32,0x3ecef9a9,0xbfa7417a,3 +np.float32,0x7f14b158,0x42fe6eb1,3 +np.float32,0x3ede12be,0xbf9a40fe,3 +np.float32,0x42cfaa,0xc2fde03f,3 +np.float32,0x3f407b0f,0xbed2a6f5,3 +np.float32,0x7f7fffff,0x43000000,3 +np.float32,0x5467c6,0xc2fd3394,3 +np.float32,0x7ea6b80f,0x42fcc336,3 +np.float32,0x3f21e7b2,0xbf293704,3 +np.float32,0x3dc7e9eb,0xc056d542,3 +np.float32,0x7f3e6e67,0x42ff2571,3 +np.float32,0x3e3e809d,0xc01b4911,3 +np.float32,0x3f800000,0x0,3 +np.float32,0x3d8fd238,0xc0753d52,3 +np.float32,0x3f74aa65,0xbd85cd0e,3 +np.float32,0x7ec30305,0x42fd36ff,3 +np.float32,0x3e97bb93,0xbfe0971d,3 +np.float32,0x3e109d9c,0xc034bb1b,3 +np.float32,0x3f4a0b67,0xbeaed537,3 +np.float32,0x3f25a7aa,0xbf20c228,3 +np.float32,0x3ebc05eb,0xbfb8fd6b,3 +np.float32,0x3eebe749,0xbf8f18e5,3 +np.float32,0x3e9dc479,0xbfd96356,3 +np.float32,0x7f245200,0x42feb882,3 +np.float32,0x1573a8,0xc30093b5,3 +np.float32,0x3e66c4b9,0xc00994a6,3 +np.float32,0x3e73bffc,0xc0048709,3 +np.float32,0x3dfef8e5,0xc0405f16,3 +np.float32,0x403750,0xc2fdfd83,3 +np.float32,0x3ebedf17,0xbfb636a4,3 +np.float32,0x15cae6,0xc3008de2,3 +np.float32,0x3edf4d4e,0xbf993c24,3 +np.float32,0x3f7cc41e,0xbc963fb3,3 +np.float32,0x3e9e12a4,0xbfd907ee,3 +np.float32,0x7ded7b59,0x42f9c889,3 +np.float32,0x7f034878,0x42fe12b5,3 +np.float32,0x7ddce43f,0x42f9930b,3 +np.float32,0x3d82b257,0xc07e1333,3 +np.float32,0x3dae89c1,0xc0635dd4,3 +np.float32,0x6b1d00,0xc2fc8396,3 +np.float32,0x449a5a,0xc2fdccb3,3 +np.float32,0x4e89d2,0xc2fd68cb,3 +np.float32,0x7e1ae83f,0x42fa8cef,3 +np.float32,0x7e4bb22c,0x42fb572e,3 +np.float32,0x3de308ea,0xc04b1634,3 +np.float32,0x7f238c7a,0x42feb508,3 +np.float32,0x3f6c62a3,0xbdeb86f3,3 +np.float32,0x3e58cba6,0xc00f5908,3 +np.float32,0x7f7dd91f,0x42fff9c4,3 +np.float32,0x3d989376,0xc06fc88d,3 +np.float32,0x3dd013c5,0xc0532339,3 +np.float32,0x4b17e6,0xc2fd89ed,3 +np.float32,0x7f67f287,0x42ffb71e,3 +np.float32,0x3f69365e,0xbe09ba3c,3 +np.float32,0x3e4b8b21,0xc0152bf1,3 +np.float32,0x3a75b,0xc3032171,3 +np.float32,0x7f303676,0x42feec1f,3 +np.float32,0x7f6570e5,0x42ffaf18,3 +np.float32,0x3f5ed61e,0xbe4cf676,3 +np.float32,0x3e9b22f9,0xbfdc7e4f,3 +np.float32,0x2c095e,0xc2ff1428,3 +np.float32,0x3f1b17c1,0xbf391754,3 +np.float32,0x422dc6,0xc2fde746,3 +np.float32,0x3f677c8d,0xbe14b365,3 +np.float32,0x3ef85d0c,0xbf8597a9,3 +np.float32,0x3ecaaa6b,0xbfab2430,3 +np.float32,0x3f0607d1,0xbf6eff3d,3 +np.float32,0x3f011fdb,0xbf7cc50d,3 +np.float32,0x6ed7c1,0xc2fc6a4e,3 +np.float32,0x7ec2d1a2,0x42fd3644,3 +np.float32,0x3f75b7fe,0xbd7238a2,3 +np.float32,0x3ef2d146,0xbf89c344,3 +np.float32,0x7ec2cd27,0x42fd3633,3 +np.float32,0x7ee1e55a,0x42fda397,3 +np.float32,0x7f464d6a,0x42ff435c,3 +np.float32,0x7f469a93,0x42ff447b,3 +np.float32,0x7ece752f,0x42fd6121,3 +np.float32,0x2ed878,0xc2fee67b,3 +np.float32,0x75b23,0xc3021eff,3 +np.float32,0x3e0f4be4,0xc03593b8,3 +np.float32,0x2778e1,0xc2ff64fc,3 +np.float32,0x5fe2b7,0xc2fcd561,3 +np.float32,0x19b8a9,0xc30050ab,3 +np.float32,0x7df303e5,0x42f9d98d,3 +np.float32,0x608b8d,0xc2fcd051,3 +np.float32,0x588f46,0xc2fd1017,3 +np.float32,0x3eec6a11,0xbf8eb2a1,3 +np.float32,0x3f714121,0xbdaf4906,3 +np.float32,0x7f4f7b9e,0x42ff64c9,3 +np.float32,0x3c271606,0xc0d3b29c,3 +np.float32,0x3f002fe0,0xbf7f75f6,3 +np.float32,0x7efa4798,0x42fdef4f,3 +np.float32,0x3f61a865,0xbe3a601a,3 +np.float32,0x7e8087aa,0x42fc030d,3 +np.float32,0x3f70f0c7,0xbdb321ba,3 +np.float32,0x5db898,0xc2fce63f,3 +np.float32,0x7a965f,0xc2fc1fea,3 +np.float32,0x7f68b112,0x42ffb97c,3 +np.float32,0x7ef0ed3d,0x42fdd32d,3 +np.float32,0x7f3156a1,0x42fef0d3,3 +np.float32,0x3f1d405f,0xbf33fc6e,3 +np.float32,0x3e3494cf,0xc0203945,3 +np.float32,0x6018de,0xc2fcd3c1,3 +np.float32,0x623e49,0xc2fcc370,3 +np.float32,0x3ea29f0f,0xbfd3cad4,3 +np.float32,0xa514,0xc305a20c,3 +np.float32,0x3e1b2ab1,0xc02e3a8f,3 +np.float32,0x3f450b6f,0xbec1578f,3 +np.float32,0x7eb12908,0x42fcf015,3 +np.float32,0x3f10b720,0xbf52ab48,3 +np.float32,0x3e0a93,0xc2fe16f6,3 +np.float32,0x93845,0xc301cb96,3 +np.float32,0x7f4e9ce3,0x42ff61af,3 +np.float32,0x3f6d4296,0xbde09ceb,3 +np.float32,0x6ddede,0xc2fc70d0,3 +np.float32,0x3f4fb6fd,0xbe9a636d,3 +np.float32,0x3f6d08de,0xbde36c0b,3 +np.float32,0x3f56f057,0xbe8122ad,3 +np.float32,0x334e95,0xc2fea349,3 +np.float32,0x7efadbcd,0x42fdf104,3 +np.float32,0x3db02e88,0xc0628046,3 +np.float32,0x3f3309d1,0xbf041066,3 +np.float32,0x2d8722,0xc2fefb8f,3 +np.float32,0x7e926cac,0x42fc6356,3 +np.float32,0x3e3674ab,0xc01f452e,3 +np.float32,0x1b46ce,0xc3003afc,3 +np.float32,0x3f06a338,0xbf6d53fc,3 +np.float32,0x1b1ba7,0xc3003d46,3 +np.float32,0x319dfb,0xc2febc06,3 +np.float32,0x3e2f126a,0xc02315a5,3 +np.float32,0x3f40fe65,0xbed0af9e,3 +np.float32,0x3f1d842f,0xbf335d4b,3 +np.float32,0x3d044e4f,0xc09e78f8,3 +np.float32,0x7f272674,0x42fec51f,3 +np.float32,0x3cda6d8f,0xc0a753db,3 +np.float32,0x3eb92f12,0xbfbbccbb,3 +np.float32,0x7e4318f4,0x42fb3752,3 +np.float32,0x3c5890,0xc2fe2b6d,3 +np.float32,0x3d1993c9,0xc09796f8,3 +np.float32,0x7f18ef24,0x42fe8377,3 +np.float32,0x3e30c3a0,0xc0223244,3 +np.float32,0x3f27cd27,0xbf1c00ef,3 +np.float32,0x3f150957,0xbf47cd6c,3 +np.float32,0x7e7178a3,0x42fbd4d8,3 +np.float32,0x3f298db8,0xbf182ac3,3 +np.float32,0x7cb3be,0xc2fc1348,3 +np.float32,0x3ef64266,0xbf8729de,3 +np.float32,0x3eeb06ce,0xbf8fc8f2,3 +np.float32,0x3f406e36,0xbed2d845,3 +np.float32,0x7f1e1bd3,0x42fe9c0b,3 +np.float32,0x478dcc,0xc2fdad97,3 +np.float32,0x7f7937b5,0x42ffec2b,3 +np.float32,0x3f20f350,0xbf2b6624,3 +np.float32,0x7f13661a,0x42fe683c,3 +np.float32,0x208177,0xc2fff46b,3 +np.float32,0x263cfb,0xc2ff7c72,3 +np.float32,0x7f0bd28c,0x42fe4141,3 +np.float32,0x7230d8,0xc2fc5453,3 +np.float32,0x3f261bbf,0xbf1fbfb4,3 +np.float32,0x737b56,0xc2fc4c05,3 +np.float32,0x3ef88f33,0xbf857263,3 +np.float32,0x7e036464,0x42fa1352,3 +np.float32,0x4b5c4f,0xc2fd874d,3 +np.float32,0x3f77984d,0xbd454596,3 +np.float32,0x3f674202,0xbe162932,3 +np.float32,0x3e7157d9,0xc0057197,3 +np.float32,0x3f3f21da,0xbed7d861,3 +np.float32,0x7f1fb40f,0x42fea375,3 +np.float32,0x7ef0157f,0x42fdd096,3 +np.float32,0x3f71e88d,0xbda74962,3 +np.float32,0x3f174855,0xbf424728,3 +np.float32,0x3f3fdd2c,0xbed505d5,3 +np.float32,0x7b95d1,0xc2fc19ed,3 +np.float32,0x7f23f4e5,0x42feb6df,3 +np.float32,0x7d741925,0x42f7dcd6,3 +np.float32,0x60f81d,0xc2fccd14,3 +np.float32,0x3f17d267,0xbf40f6ae,3 +np.float32,0x3f036fc8,0xbf7636f8,3 +np.float32,0x167653,0xc30082b5,3 +np.float32,0x256d05,0xc2ff8c4f,3 +np.float32,0x3eccc63d,0xbfa93adb,3 +np.float32,0x7f6c91ea,0x42ffc5b2,3 +np.float32,0x2ee52a,0xc2fee5b3,3 +np.float32,0x3dc3579e,0xc058f80d,3 +np.float32,0x4c7170,0xc2fd7cc4,3 +np.float32,0x7f737f20,0x42ffdb03,3 +np.float32,0x3f2f9dbf,0xbf0b3119,3 +np.float32,0x3f4d0c54,0xbea3eec5,3 +np.float32,0x7e380862,0x42fb0c32,3 +np.float32,0x5d637f,0xc2fce8df,3 +np.float32,0x3f0aa623,0xbf627c27,3 +np.float32,0x3e4d5896,0xc0145b88,3 +np.float32,0x3f6cacdc,0xbde7e7ca,3 +np.float32,0x63a2c3,0xc2fcb90a,3 +np.float32,0x6c138c,0xc2fc7cfa,3 +np.float32,0x2063c,0xc303fb88,3 +np.float32,0x7e9e5a3e,0x42fc9d2f,3 +np.float32,0x56ec64,0xc2fd1ddd,3 +np.float32,0x7f1d6a35,0x42fe98cc,3 +np.float32,0x73dc96,0xc2fc4998,3 +np.float32,0x3e5d74e5,0xc00d6238,3 +np.float32,0x7f033cbb,0x42fe1273,3 +np.float32,0x3f5143fc,0xbe94e4e7,3 +np.float32,0x1d56d9,0xc3002010,3 +np.float32,0x2bf3e4,0xc2ff1591,3 +np.float32,0x3f2a6ef1,0xbf164170,3 +np.float32,0x3f33238b,0xbf03db58,3 +np.float32,0x22780e,0xc2ffc91a,3 +np.float32,0x7f00b873,0x42fe0425,3 +np.float32,0x3f7f6145,0xbb654706,3 +np.float32,0x7fc00000,0x7fc00000,3 +np.float32,0x63895a,0xc2fcb9c7,3 +np.float32,0x18a1b2,0xc30060a8,3 +np.float32,0x7e43c6a6,0x42fb39e3,3 +np.float32,0x78676e,0xc2fc2d30,3 +np.float32,0x3f16d839,0xbf435940,3 +np.float32,0x7eff78ba,0x42fdfe79,3 +np.float32,0x3f2e152c,0xbf0e6e54,3 +np.float32,0x3db20ced,0xc06186e1,3 +np.float32,0x3f0cd1d8,0xbf5cbf57,3 +np.float32,0x3fd7a8,0xc2fe01d2,3 +np.float32,0x3ebb075e,0xbfb9f816,3 +np.float32,0x7f94ef,0xc2fc026b,3 +np.float32,0x3d80ba0e,0xc07f7a2b,3 +np.float32,0x7f227e15,0x42feb03f,3 +np.float32,0x792264bf,0x42e6afcc,3 +np.float32,0x7f501576,0x42ff66ec,3 +np.float32,0x223629,0xc2ffcea3,3 +np.float32,0x40a79e,0xc2fdf87b,3 +np.float32,0x449483,0xc2fdccf2,3 +np.float32,0x3f4fa978,0xbe9a9382,3 +np.float32,0x7f148c53,0x42fe6df9,3 +np.float32,0x3ec98b3c,0xbfac2a98,3 +np.float32,0x3e4da320,0xc0143a0a,3 +np.float32,0x3d1d94bb,0xc09666d0,3 +np.float32,0x3c8e624e,0xc0bb155b,3 +np.float32,0x66a9af,0xc2fca2ef,3 +np.float32,0x3ec76ed7,0xbfae1c57,3 +np.float32,0x3f4b52f3,0xbeaa2b81,3 +np.float32,0x7e99bbb5,0x42fc8750,3 +np.float32,0x3f69a46b,0xbe0701be,3 +np.float32,0x3f775400,0xbd4ba495,3 +np.float32,0x131e56,0xc300be3c,3 +np.float32,0x3f30abb4,0xbf08fb10,3 +np.float32,0x7f7e528c,0x42fffb25,3 +np.float32,0x3eb89515,0xbfbc668a,3 +np.float32,0x7e9191b6,0x42fc5f02,3 +np.float32,0x7e80c7e9,0x42fc047e,3 +np.float32,0x3f77ef58,0xbd3d2995,3 +np.float32,0x7ddb1f8a,0x42f98d1b,3 +np.float32,0x7ebc6c4f,0x42fd1d9c,3 +np.float32,0x3f6638e0,0xbe1ccab8,3 +np.float32,0x7f4c45,0xc2fc0410,3 +np.float32,0x3e7d8aad,0xc000e414,3 +np.float32,0x3f4d148b,0xbea3d12e,3 +np.float32,0x3e98c45c,0xbfdf55f4,3 +np.float32,0x3d754c78,0xc081f8a9,3 +np.float32,0x17e4cf,0xc3006be3,3 +np.float32,0x7eb65814,0x42fd0563,3 +np.float32,0x3f65e0d8,0xbe1f0008,3 +np.float32,0x3e99541f,0xbfdea87e,3 +np.float32,0x3f3cb80e,0xbee13b27,3 +np.float32,0x3e99f0c0,0xbfddec3b,3 +np.float32,0x3f43903e,0xbec6ea66,3 +np.float32,0x7e211cd4,0x42faa9f2,3 +np.float32,0x824af,0xc301f971,3 +np.float32,0x3e16a56e,0xc030f56c,3 +np.float32,0x542b3b,0xc2fd35a6,3 +np.float32,0x3eeea2d1,0xbf8cf873,3 +np.float32,0x232e93,0xc2ffb9fa,3 +np.float32,0x3e8c52b9,0xbfef06aa,3 +np.float32,0x7f69c7e3,0x42ffbcef,3 +np.float32,0x3f573e43,0xbe801714,3 +np.float32,0x43b009,0xc2fdd69f,3 +np.float32,0x3ee571ab,0xbf943966,3 +np.float32,0x3ee3d5d8,0xbf958604,3 +np.float32,0x338b12,0xc2fe9fe4,3 +np.float32,0x29cb1f,0xc2ff3ac6,3 +np.float32,0x3f0892b4,0xbf680e7a,3 +np.float32,0x3e8c4f7f,0xbfef0ae9,3 +np.float32,0x7c9d3963,0x42f497e6,3 +np.float32,0x3f26ba84,0xbf1e5f59,3 +np.float32,0x3dd0acc0,0xc052df6f,3 +np.float32,0x3e43fbda,0xc018aa8c,3 +np.float32,0x3ec4fd0f,0xbfb0635d,3 +np.float32,0x3f52c8c6,0xbe8f8d85,3 +np.float32,0x3f5fdc5d,0xbe462fdb,3 +np.float32,0x3f461920,0xbebd6743,3 +np.float32,0x6161ff,0xc2fcc9ef,3 +np.float32,0x7f7ed306,0x42fffc9a,3 +np.float32,0x3d212263,0xc0955f46,3 +np.float32,0x3eca5826,0xbfab6f36,3 +np.float32,0x7d6317ac,0x42f7a77e,3 +np.float32,0x3eb02063,0xbfc50f60,3 +np.float32,0x7f71a6f8,0x42ffd565,3 +np.float32,0x1a3efe,0xc3004935,3 +np.float32,0x3dc599c9,0xc057e856,3 +np.float32,0x3f3e1301,0xbedbf205,3 +np.float32,0xf17d4,0xc301158d,3 +np.float32,0x3f615f84,0xbe3c3d85,3 +np.float32,0x3de63be1,0xc049cb77,3 +np.float32,0x3e8d2f51,0xbfede541,3 +np.float32,0x3a5cdd,0xc2fe441c,3 +np.float32,0x3f443ec0,0xbec4586a,3 +np.float32,0x3eacbd00,0xbfc8a5ad,3 +np.float32,0x3f600f6a,0xbe44df1b,3 +np.float32,0x5f77a6,0xc2fcd89c,3 +np.float32,0x476706,0xc2fdaf28,3 +np.float32,0x2f469,0xc3036fde,3 +np.float32,0x7dc4ba24,0x42f93d77,3 +np.float32,0x3e2d6080,0xc023fb9b,3 +np.float32,0x7e8d7135,0x42fc49c3,3 +np.float32,0x3f589065,0xbe77247b,3 +np.float32,0x3f59e210,0xbe6e2c05,3 +np.float32,0x7f51d388,0x42ff6d15,3 +np.float32,0x7d9a5fda,0x42f88a63,3 +np.float32,0x3e67d5bc,0xc00927ab,3 +np.float32,0x61d72c,0xc2fcc679,3 +np.float32,0x3ef3351d,0xbf897766,3 +np.float32,0x1,0xc3150000,3 +np.float32,0x7f653429,0x42ffae54,3 +np.float32,0x7e1ad3e5,0x42fa8c8e,3 +np.float32,0x3f4ca01d,0xbea57500,3 +np.float32,0x3f7606db,0xbd6ad13e,3 +np.float32,0x7ec4a27d,0x42fd3d1f,3 +np.float32,0x3efe4fd5,0xbf8138c7,3 +np.float32,0x77c2f1,0xc2fc3124,3 +np.float32,0x7e4d3251,0x42fb5c9a,3 +np.float32,0x3f543ac7,0xbe8a8154,3 +np.float32,0x7c3dbe29,0x42f322c4,3 +np.float32,0x408e01,0xc2fdf9a0,3 +np.float32,0x45069b,0xc2fdc829,3 +np.float32,0x3d7ecab7,0xc08037e8,3 +np.float32,0xf8c22,0xc3010a99,3 +np.float32,0x7f69af63,0x42ffbca2,3 +np.float32,0x7ec7d228,0x42fd48fe,3 +np.float32,0xff800000,0xffc00000,3 +np.float32,0xdd7c5,0xc301357c,3 +np.float32,0x143f38,0xc300a90e,3 +np.float32,0x7e65c176,0x42fbb01b,3 +np.float32,0x2c1a9e,0xc2ff1307,3 +np.float32,0x7f6e9224,0x42ffcbeb,3 +np.float32,0x3d32ab39,0xc0909a77,3 +np.float32,0x3e150b42,0xc031f22b,3 +np.float32,0x1f84b4,0xc300059a,3 +np.float32,0x3f71ce21,0xbda88c2a,3 +np.float32,0x2625c4,0xc2ff7e33,3 +np.float32,0x3dd0b293,0xc052dcdc,3 +np.float32,0x625c11,0xc2fcc290,3 +np.float32,0x3f610297,0xbe3e9f24,3 +np.float32,0x7ebdd5e5,0x42fd2320,3 +np.float32,0x3e883458,0xbff486ff,3 +np.float32,0x782313,0xc2fc2ed4,3 +np.float32,0x7f39c843,0x42ff132f,3 +np.float32,0x7f326aa7,0x42fef54d,3 +np.float32,0x4d2c71,0xc2fd75be,3 +np.float32,0x3f55747c,0xbe86409e,3 +np.float32,0x7f7f0867,0x42fffd34,3 +np.float32,0x321316,0xc2feb53f,3 +np.float32,0x3e1b37ed,0xc02e32b0,3 +np.float32,0x80edf,0xc301fd54,3 +np.float32,0x3f0b08ad,0xbf617607,3 +np.float32,0x7f3f4174,0x42ff28a2,3 +np.float32,0x3d79306d,0xc0813eb0,3 +np.float32,0x3f5f657a,0xbe49413d,3 +np.float32,0x3f56c63a,0xbe81b376,3 +np.float32,0x7f667123,0x42ffb24f,3 +np.float32,0x3f71021b,0xbdb24d43,3 +np.float32,0x7f434ab1,0x42ff380f,3 +np.float32,0x3dcae496,0xc055779c,3 +np.float32,0x3f5a7d88,0xbe6a0f5b,3 +np.float32,0x3cdf5c32,0xc0a64bf5,3 +np.float32,0x3e56222c,0xc0107d11,3 +np.float32,0x561a3a,0xc2fd24df,3 +np.float32,0x7ddd953c,0x42f9955a,3 +np.float32,0x7e35d839,0x42fb035c,3 +np.float32,0x3ec1816c,0xbfb3aeb2,3 +np.float32,0x7c87cfcd,0x42f42bc2,3 +np.float32,0xd9cd,0xc3053baf,3 +np.float32,0x3f388234,0xbef1e5b7,3 +np.float32,0x3edfcaca,0xbf98d47b,3 +np.float32,0x3ef28852,0xbf89fac8,3 +np.float32,0x7f7525df,0x42ffe001,3 +np.float32,0x7f6c33ef,0x42ffc48c,3 +np.float32,0x3ea4a881,0xbfd17e61,3 +np.float32,0x3f3e379f,0xbedb63c6,3 +np.float32,0x3f0524c1,0xbf717301,3 +np.float32,0x3db3e7f0,0xc06091d3,3 +np.float32,0x800000,0xc2fc0000,3 +np.float32,0x3f2f2897,0xbf0c27ce,3 +np.float32,0x7eb1776d,0x42fcf15c,3 +np.float32,0x3f039018,0xbf75dc37,3 +np.float32,0x3c4055,0xc2fe2c96,3 +np.float32,0x3f603653,0xbe43dea5,3 +np.float32,0x7f700d24,0x42ffd07c,3 +np.float32,0x3f4741a3,0xbeb918dc,3 +np.float32,0x3f5fe959,0xbe45da2d,3 +np.float32,0x3f3e4401,0xbedb33b1,3 +np.float32,0x7f0705ff,0x42fe2775,3 +np.float32,0x3ea85662,0xbfcd69b0,3 +np.float32,0x3f15f49f,0xbf458829,3 +np.float32,0x3f17c50e,0xbf411728,3 +np.float32,0x3e483f60,0xc016add2,3 +np.float32,0x3f1ab9e5,0xbf39f71b,3 +np.float32,0x3de0b6fb,0xc04c08fe,3 +np.float32,0x7e671225,0x42fbb452,3 +np.float32,0x80800000,0xffc00000,3 +np.float32,0xe2df3,0xc3012c9d,3 +np.float32,0x3ede1e3c,0xbf9a3770,3 +np.float32,0x3df2ffde,0xc044cfec,3 +np.float32,0x3eed8da5,0xbf8dcf6c,3 +np.float32,0x3ead15c3,0xbfc846e1,3 +np.float32,0x7ef3750a,0x42fddae4,3 +np.float32,0x7e6ab7c0,0x42fbbfe4,3 +np.float32,0x7ea4bbe5,0x42fcba5d,3 +np.float32,0x3f227706,0xbf27f0a1,3 +np.float32,0x3ef39bfd,0xbf89295a,3 +np.float32,0x3f289a20,0xbf1a3edd,3 +np.float32,0x7f225f82,0x42feafb4,3 +np.float32,0x768963,0xc2fc38bc,3 +np.float32,0x3f493c00,0xbeb1ccfc,3 +np.float32,0x3f4e7249,0xbe9ee9a7,3 +np.float32,0x1d0c3a,0xc30023c0,3 +np.float32,0x7f3c5f78,0x42ff1d6a,3 +np.float32,0xff7fffff,0xffc00000,3 +np.float32,0x3ee7896a,0xbf928c2a,3 +np.float32,0x3e788479,0xc002bd2e,3 +np.float32,0x3ee4df17,0xbf94af84,3 +np.float32,0x5e06d7,0xc2fce3d7,3 +np.float32,0x3d7b2776,0xc080e1dc,3 +np.float32,0x3e3d39d3,0xc01be7fd,3 +np.float32,0x7c81dece,0x42f40ab7,3 +np.float32,0x3f7d2085,0xbc856255,3 +np.float32,0x7f7f6627,0x42fffe44,3 +np.float32,0x7f5f2e94,0x42ff9aaa,3 +np.float32,0x7f5835f2,0x42ff8339,3 +np.float32,0x3f6a0e32,0xbe046580,3 +np.float32,0x7e16f586,0x42fa79dd,3 +np.float32,0x3f04a2f2,0xbf72dbc5,3 +np.float32,0x3f35e334,0xbefc7740,3 +np.float32,0x3f0d056e,0xbf5c3824,3 +np.float32,0x7ebeb95e,0x42fd2693,3 +np.float32,0x3c6192,0xc2fe2aff,3 +np.float32,0x3e892b4f,0xbff33958,3 +np.float32,0x3f61d694,0xbe3931df,3 +np.float32,0x29d183,0xc2ff3a56,3 +np.float32,0x7f0b0598,0x42fe3d04,3 +np.float32,0x7f743b28,0x42ffdd3d,3 +np.float32,0x3a2ed6,0xc2fe4663,3 +np.float32,0x3e27403a,0xc0274de8,3 +np.float32,0x3f58ee78,0xbe74a349,3 +np.float32,0x3eaa4b,0xc2fe0f92,3 +np.float32,0x3ecb613b,0xbfaa7de8,3 +np.float32,0x7f637d81,0x42ffa8c9,3 +np.float32,0x3f026e96,0xbf790c73,3 +np.float32,0x386cdf,0xc2fe5d0c,3 +np.float32,0x35abd1,0xc2fe8202,3 +np.float32,0x3eac3cd1,0xbfc92ee8,3 +np.float32,0x3f567869,0xbe82bf47,3 +np.float32,0x3f65c643,0xbe1faae6,3 +np.float32,0x7f5422b9,0x42ff752b,3 +np.float32,0x7c26e9,0xc2fc168c,3 +np.float32,0x7eff5cfd,0x42fdfe29,3 +np.float32,0x3f728e7f,0xbd9f6142,3 +np.float32,0x3f10fd43,0xbf51f874,3 +np.float32,0x7e7ada08,0x42fbf0fe,3 +np.float32,0x3e82a611,0xbffc37be,3 +np.float32,0xbf800000,0xffc00000,3 +np.float32,0x3dbe2e12,0xc05b711c,3 +np.float32,0x7e768fa9,0x42fbe440,3 +np.float32,0x5e44e8,0xc2fce1f0,3 +np.float32,0x7f25071a,0x42febbae,3 +np.float32,0x3f54db5e,0xbe885339,3 +np.float32,0x3f0f2c26,0xbf56a0b8,3 +np.float32,0x22f9a7,0xc2ffbe55,3 +np.float32,0x7ed63dcb,0x42fd7c77,3 +np.float32,0x7ea4fae2,0x42fcbb78,3 +np.float32,0x3f1d7766,0xbf337b47,3 +np.float32,0x7f16d59f,0x42fe7941,3 +np.float32,0x3f3a1bb6,0xbeeb855c,3 +np.float32,0x3ef57128,0xbf87c709,3 +np.float32,0xb24ff,0xc3018591,3 +np.float32,0x3ef99e27,0xbf84a983,3 +np.float32,0x3eac2ccf,0xbfc94013,3 +np.float32,0x3e9d3e1e,0xbfda00dc,3 +np.float32,0x718213,0xc2fc58c1,3 +np.float32,0x7edbf509,0x42fd8fea,3 +np.float32,0x70c7f1,0xc2fc5d80,3 +np.float32,0x3f7012f5,0xbdbdc6cd,3 +np.float32,0x12cba,0xc304c487,3 +np.float32,0x7f5d445d,0x42ff944c,3 +np.float32,0x7f3e30bd,0x42ff2481,3 +np.float32,0x63b110,0xc2fcb8a0,3 +np.float32,0x3f39f728,0xbeec1680,3 +np.float32,0x3f5bea58,0xbe6074b1,3 +np.float32,0x3f350749,0xbefff679,3 +np.float32,0x3e91ab2c,0xbfe81f3e,3 +np.float32,0x7ec53fe0,0x42fd3f6d,3 +np.float32,0x3f6cbbdc,0xbde72c8e,3 +np.float32,0x3f4df49f,0xbea0abcf,3 +np.float32,0x3e9c9638,0xbfdac674,3 +np.float32,0x7f3b82ec,0x42ff1a07,3 +np.float32,0x7f612a09,0x42ffa132,3 +np.float32,0x7ea26650,0x42fcafd3,3 +np.float32,0x3a615138,0xc122f26d,3 +np.float32,0x3f1108bd,0xbf51db39,3 +np.float32,0x6f80f6,0xc2fc65ea,3 +np.float32,0x3f7cb578,0xbc98ecb1,3 +np.float32,0x7f54d31a,0x42ff7790,3 +np.float32,0x196868,0xc3005532,3 +np.float32,0x3f01ee0a,0xbf7a7925,3 +np.float32,0x3e184013,0xc02ffb11,3 +np.float32,0xadde3,0xc3018ee3,3 +np.float32,0x252a91,0xc2ff9173,3 +np.float32,0x3f0382c2,0xbf7601a9,3 +np.float32,0x6d818c,0xc2fc7345,3 +np.float32,0x3bfbfd,0xc2fe2fdd,3 +np.float32,0x7f3cad19,0x42ff1e9a,3 +np.float32,0x4169a7,0xc2fdefdf,3 +np.float32,0x3f615d96,0xbe3c4a2b,3 +np.float32,0x3f036480,0xbf7656ac,3 +np.float32,0x7f5fbda3,0x42ff9c83,3 +np.float32,0x3d202d,0xc2fe21f1,3 +np.float32,0x3d0f5e5d,0xc09ac3e9,3 +np.float32,0x3f0fff6e,0xbf548142,3 +np.float32,0x7f11ed32,0x42fe60d2,3 +np.float32,0x3e6f856b,0xc00624b6,3 +np.float32,0x7f7c4dd7,0x42fff542,3 +np.float32,0x3e76fb86,0xc0034fa0,3 +np.float32,0x3e8a0d6e,0xbff209e7,3 +np.float32,0x3eacad19,0xbfc8b6ad,3 +np.float32,0xa7776,0xc3019cbe,3 +np.float32,0x3dc84d74,0xc056a754,3 +np.float32,0x3efb8052,0xbf834626,3 +np.float32,0x3f0e55fc,0xbf58cacc,3 +np.float32,0x7e0e71e3,0x42fa4efb,3 +np.float32,0x3ed5a800,0xbfa1639c,3 +np.float32,0x3f33335b,0xbf03babf,3 +np.float32,0x38cad7,0xc2fe5842,3 +np.float32,0x3bc21256,0xc0ecc927,3 +np.float32,0x3f09522d,0xbf660a19,3 +np.float32,0xcbd5d,0xc3015428,3 +np.float32,0x492752,0xc2fd9d42,3 +np.float32,0x3f2b9b32,0xbf13b904,3 +np.float32,0x6544ac,0xc2fcad09,3 +np.float32,0x52eb12,0xc2fd40b5,3 +np.float32,0x3f66a7c0,0xbe1a03e8,3 +np.float32,0x7ab289,0xc2fc1f41,3 +np.float32,0x62af5e,0xc2fcc020,3 +np.float32,0x7f73e9cf,0x42ffdc46,3 +np.float32,0x3e5eca,0xc2fe130e,3 +np.float32,0x3e3a10f4,0xc01d7602,3 +np.float32,0x3f04db46,0xbf723f0d,3 +np.float32,0x18fc4a,0xc3005b63,3 +np.float32,0x525bcb,0xc2fd45b6,3 +np.float32,0x3f6b9108,0xbdf5c769,3 +np.float32,0x3e992e8c,0xbfded5c5,3 +np.float32,0x7efea647,0x42fdfc18,3 +np.float32,0x7e8371db,0x42fc139e,3 +np.float32,0x3f397cfb,0xbeedfc69,3 +np.float32,0x7e46d233,0x42fb454a,3 +np.float32,0x7d5281ad,0x42f76f79,3 +np.float32,0x7f4c1878,0x42ff58a1,3 +np.float32,0x3e96ca5e,0xbfe1bd97,3 +np.float32,0x6a2743,0xc2fc8a3d,3 +np.float32,0x7f688781,0x42ffb8f8,3 +np.float32,0x7814b7,0xc2fc2f2d,3 +np.float32,0x3f2ffdc9,0xbf0a6756,3 +np.float32,0x3f766fa8,0xbd60fe24,3 +np.float32,0x4dc64e,0xc2fd7003,3 +np.float32,0x3a296f,0xc2fe46a8,3 +np.float32,0x3f2af942,0xbf15162e,3 +np.float32,0x7f702c32,0x42ffd0dc,3 +np.float32,0x7e61e318,0x42fba390,3 +np.float32,0x7f7d3bdb,0x42fff7fa,3 +np.float32,0x3ee87f3f,0xbf91c881,3 +np.float32,0x2bbc28,0xc2ff193c,3 +np.float32,0x3e01f918,0xc03e966e,3 +np.float32,0x7f0b39f4,0x42fe3e1a,3 +np.float32,0x3eaa4d64,0xbfcb4516,3 +np.float32,0x3e53901e,0xc0119a88,3 +np.float32,0x603cb,0xc3026957,3 +np.float32,0x7e81f926,0x42fc0b4d,3 +np.float32,0x5dab7c,0xc2fce6a6,3 +np.float32,0x3f46fefd,0xbeba1018,3 +np.float32,0x648448,0xc2fcb28a,3 +np.float32,0x3ec49470,0xbfb0c58b,3 +np.float32,0x3e8a5393,0xbff1ac2b,3 +np.float32,0x3f27ccfc,0xbf1c014e,3 +np.float32,0x3ed886e6,0xbf9eeca8,3 +np.float32,0x7cfbe06e,0x42f5f401,3 +np.float32,0x3f5aa7ba,0xbe68f229,3 +np.float32,0x9500d,0xc301c7e3,3 +np.float32,0x3f4861,0xc2fe0853,3 +np.float32,0x3e5ae104,0xc00e76f5,3 +np.float32,0x71253a,0xc2fc5b1e,3 +np.float32,0xcf7b8,0xc3014d9c,3 +np.float32,0x7f7edd2d,0x42fffcb7,3 +np.float32,0x3e9039ee,0xbfe9f5ab,3 +np.float32,0x2fd54e,0xc2fed712,3 +np.float32,0x3f600752,0xbe45147a,3 +np.float32,0x3f4da8f6,0xbea1bb5c,3 +np.float32,0x3f2d34a9,0xbf104bd9,3 +np.float32,0x3e1e66dd,0xc02c52d2,3 +np.float32,0x798276,0xc2fc2670,3 +np.float32,0xd55e2,0xc3014347,3 +np.float32,0x80000001,0xffc00000,3 +np.float32,0x3e7a5ead,0xc0020da6,3 +np.float32,0x7ec4c744,0x42fd3da9,3 +np.float32,0x597e00,0xc2fd085a,3 +np.float32,0x3dff6bf4,0xc0403575,3 +np.float32,0x5d6f1a,0xc2fce883,3 +np.float32,0x7e21faff,0x42faadea,3 +np.float32,0x3e570fea,0xc01016c6,3 +np.float32,0x28e6b6,0xc2ff4ab7,3 +np.float32,0x7e77062d,0x42fbe5a3,3 +np.float32,0x74cac4,0xc2fc43b0,3 +np.float32,0x3f707273,0xbdb93078,3 +np.float32,0x228e96,0xc2ffc737,3 +np.float32,0x686ac1,0xc2fc966b,3 +np.float32,0x3d76400d,0xc081cae8,3 +np.float32,0x3e9f502f,0xbfd7966b,3 +np.float32,0x3f6bc656,0xbdf32b1f,3 +np.float32,0x3edb828b,0xbf9c65d4,3 +np.float32,0x6c6e56,0xc2fc7a8e,3 +np.float32,0x3f04552e,0xbf73b48f,3 +np.float32,0x3f39cb69,0xbeecc457,3 +np.float32,0x7f681c44,0x42ffb7a3,3 +np.float32,0x7f5b44ee,0x42ff8d99,3 +np.float32,0x3e71430a,0xc005798d,3 +np.float32,0x3edcfde3,0xbf9b27c6,3 +np.float32,0x3f616a5a,0xbe3bf67f,3 +np.float32,0x3f523936,0xbe918548,3 +np.float32,0x3f39ce3a,0xbeecb925,3 +np.float32,0x3eac589a,0xbfc91120,3 +np.float32,0x7efc8d3d,0x42fdf5fc,3 +np.float32,0x5704b0,0xc2fd1d0f,3 +np.float32,0x7e7972e9,0x42fbecda,3 +np.float32,0x3eb0811c,0xbfc4aa13,3 +np.float32,0x7f1efcbb,0x42fea023,3 +np.float32,0x3e0b9e32,0xc037fa6b,3 +np.float32,0x7eef6a48,0x42fdce87,3 +np.float32,0x3cc0a373,0xc0ad20c0,3 +np.float32,0x3f2a75bb,0xbf1632ba,3 +np.float32,0x0,0xff800000,3 +np.float32,0x7ecdb6f4,0x42fd5e77,3 +np.float32,0x7f2e2dfd,0x42fee38d,3 +np.float32,0x3ee17f6e,0xbf976d8c,3 +np.float32,0x3f51e7ee,0xbe92a319,3 +np.float32,0x3f06942f,0xbf6d7d3c,3 +np.float32,0x3f7ba528,0xbccac6f1,3 +np.float32,0x3f413787,0xbecfd513,3 +np.float32,0x3e085e48,0xc03a2716,3 +np.float32,0x7e4c5e0e,0x42fb599c,3 +np.float32,0x306f76,0xc2fecdd4,3 +np.float32,0x7f5c2203,0x42ff9081,3 +np.float32,0x3d5355b4,0xc088da05,3 +np.float32,0x9a2a,0xc305bb4f,3 +np.float32,0x3db93a1f,0xc05de0db,3 +np.float32,0x4e50c6,0xc2fd6ae4,3 +np.float32,0x7ec4afed,0x42fd3d51,3 +np.float32,0x3a8f27,0xc2fe41a0,3 +np.float32,0x7f213caf,0x42feaa84,3 +np.float32,0x7e7b5f00,0x42fbf286,3 +np.float32,0x7e367194,0x42fb05ca,3 +np.float32,0x7f56e6de,0x42ff7ebd,3 +np.float32,0x3ed7383e,0xbfa00aef,3 +np.float32,0x7e844752,0x42fc184a,3 +np.float32,0x15157,0xc3049a19,3 +np.float32,0x3f78cd92,0xbd28824a,3 +np.float32,0x7ecddb16,0x42fd5ef9,3 +np.float32,0x3e479f16,0xc016f7d8,3 +np.float32,0x3f5cb418,0xbe5b2bd3,3 +np.float32,0x7c0934cb,0x42f2334e,3 +np.float32,0x3ebe5505,0xbfb6bc69,3 +np.float32,0x3eb1335a,0xbfc3eff5,3 +np.float32,0x3f2488a3,0xbf234444,3 +np.float32,0x642906,0xc2fcb52a,3 +np.float32,0x3da635fa,0xc067e15a,3 +np.float32,0x7e0d80db,0x42fa4a15,3 +np.float32,0x4f0b9d,0xc2fd640a,3 +np.float32,0x7e083806,0x42fa2df8,3 +np.float32,0x7f77f8c6,0x42ffe877,3 +np.float32,0x3e7bb46a,0xc0018ff5,3 +np.float32,0x3f06eb2e,0xbf6c8eca,3 +np.float32,0x7eae8f7c,0x42fce52a,3 +np.float32,0x3de481a0,0xc04a7d7f,3 +np.float32,0x3eed4311,0xbf8e096f,3 +np.float32,0x3f7b0300,0xbce8903d,3 +np.float32,0x3811b,0xc30330dd,3 +np.float32,0x3eb6f8e1,0xbfbe04bc,3 +np.float32,0x3ec35210,0xbfb1f55a,3 +np.float32,0x3d386916,0xc08f24a5,3 +np.float32,0x3f1fa197,0xbf2e704d,3 +np.float32,0x7f2020a5,0x42fea56a,3 +np.float32,0x7e1ea53f,0x42fa9e8c,3 +np.float32,0x3f148903,0xbf490bf9,3 +np.float32,0x3f2f56a0,0xbf0bc6c9,3 +np.float32,0x7da9fc,0xc2fc0d9b,3 +np.float32,0x3d802134,0xc07fe810,3 +np.float32,0x3f6cb927,0xbde74e57,3 +np.float32,0x7e05b125,0x42fa2023,3 +np.float32,0x3f3307f9,0xbf041433,3 +np.float32,0x5666bf,0xc2fd2250,3 +np.float32,0x3f51c93b,0xbe930f28,3 +np.float32,0x3eb5dcfe,0xbfbf241e,3 +np.float32,0xb2773,0xc301853f,3 +np.float32,0x7f4dee96,0x42ff5f3f,3 +np.float32,0x3e3f5c33,0xc01adee1,3 +np.float32,0x3f2ed29a,0xbf0cdd4a,3 +np.float32,0x3e3c01ef,0xc01c80ab,3 +np.float32,0x3ec2236e,0xbfb31458,3 +np.float32,0x7e841dc4,0x42fc1761,3 +np.float32,0x3df2cd8e,0xc044e30c,3 +np.float32,0x3f010901,0xbf7d0670,3 +np.float32,0x3c05ceaa,0xc0ddf39b,3 +np.float32,0x3f517226,0xbe944206,3 +np.float32,0x3f23c83d,0xbf24f522,3 +np.float32,0x7fc9da,0xc2fc0139,3 +np.float32,0x7f1bde53,0x42fe9181,3 +np.float32,0x3ea3786c,0xbfd2d4a5,3 +np.float32,0x3e83a71b,0xbffacdd2,3 +np.float32,0x3f6f0d4f,0xbdca61d5,3 +np.float32,0x7f5ab613,0x42ff8bb7,3 +np.float32,0x3ab1ec,0xc2fe3fea,3 +np.float32,0x4fbf58,0xc2fd5d82,3 +np.float32,0x3dea141b,0xc0484403,3 +np.float32,0x7d86ad3b,0x42f8258f,3 +np.float32,0x7f345315,0x42fefd29,3 +np.float32,0x3f3752fe,0xbef6a780,3 +np.float32,0x64830d,0xc2fcb293,3 +np.float32,0x3d9dc1eb,0xc06cb32a,3 +np.float32,0x3f2f935a,0xbf0b46f6,3 +np.float32,0xb90a4,0xc30177e3,3 +np.float32,0x4111dd,0xc2fdf3c1,3 +np.float32,0x3d4cd078,0xc08a4c68,3 +np.float32,0x3e95c3f1,0xbfe30011,3 +np.float32,0x3ec9f356,0xbfabcb4e,3 +np.float32,0x1b90d5,0xc3003717,3 +np.float32,0xee70f,0xc3011a3e,3 +np.float32,0x7fa00000,0x7fe00000,3 +np.float32,0x3f74cdb6,0xbd8422af,3 +np.float32,0x3d9b56fe,0xc06e2037,3 +np.float32,0x3f1853df,0xbf3fbc40,3 +np.float32,0x7d86a011,0x42f82547,3 +np.float32,0x3dff9629,0xc0402634,3 +np.float32,0x46f8c9,0xc2fdb39f,3 +np.float32,0x3e9b410b,0xbfdc5a87,3 +np.float32,0x3f5aed42,0xbe671cac,3 +np.float32,0x3b739886,0xc101257f,3 +np.float64,0x3fe2f58d6565eb1b,0xbfe82a641138e19a,2 +np.float64,0x3fee7f0642fcfe0d,0xbfb1c702f6974932,2 +np.float64,0x25b71f244b6e5,0xc090030d3b3c5d2b,2 +np.float64,0x8c9cc8e1193b,0xc0900b752a678fa8,2 +np.float64,0x3fd329b5d326536c,0xbffbd607f6db945c,2 +np.float64,0x3fb5109b3a2a2136,0xc00cd36bd15dfb18,2 +np.float64,0x3fd5393ae12a7276,0xbff97a7e4a157154,2 +np.float64,0x3fd374d1b926e9a3,0xbffb7c3e1a3a7ed3,2 +np.float64,0x3fe2c7f4e2658fea,0xbfe899f15ca78fcb,2 +np.float64,0x7fe3d6b81ee7ad6f,0x408ffa7b63d407ee,2 +np.float64,0x3fe086d097e10da1,0xbfee81456ce8dd03,2 +np.float64,0x7fd374a64ca6e94c,0x408ff241c7306d39,2 +np.float64,0x3fc0709a5b20e135,0xc007afdede31b29c,2 +np.float64,0x3fd4218f4b28431f,0xbffab2c696966e2d,2 +np.float64,0x143134c828628,0xc09006a8372c4d8a,2 +np.float64,0x3f8bd0aa0037a154,0xc018cf0e8b9c3107,2 +np.float64,0x7fe0ce905ee19d20,0x408ff8915e71bd67,2 +np.float64,0x3fda0f5f32b41ebe,0xbff4bd5e0869e820,2 +np.float64,0x7fe9ae63d0b35cc7,0x408ffd760ca4f292,2 +np.float64,0x3fe75abd9eeeb57b,0xbfdd1476fc8b3089,2 +np.float64,0x786c3110f0d87,0xc08ff8b44cedbeea,2 +np.float64,0x22c5fe80458d,0xc09013853591c2f2,2 +np.float64,0x3fdc250797384a0f,0xbff2f6a02c961f0b,2 +np.float64,0x3fa2b367b02566cf,0xc013199238485054,2 +np.float64,0x3fd26a910ca4d522,0xbffcc0e2089b1c0c,2 +np.float64,0x8068d3b300d1b,0xc08ff7f690210aac,2 +np.float64,0x3fe663bfa9ecc77f,0xbfe07cd95a43a5ce,2 +np.float64,0x3fd0ddb07321bb61,0xbffec886665e895e,2 +np.float64,0x3f91c730b0238e61,0xc0176452badc8d22,2 +np.float64,0x4dd10d309ba22,0xc08ffdbe738b1d8d,2 +np.float64,0x7fe322afa4a6455e,0x408ffa10c038f9de,2 +np.float64,0x7fdf7f7c42befef8,0x408ff7d147ddaad5,2 +np.float64,0x7fd673f386ace7e6,0x408ff3e920d00eef,2 +np.float64,0x3feaebfcadb5d7f9,0xbfcfe8ec27083478,2 +np.float64,0x3fdc6dc23738db84,0xbff2bb46794f07b8,2 +np.float64,0xcd8819599b103,0xc08ff288c5b2cf0f,2 +np.float64,0xfda00e77fb402,0xc08ff01b895d2236,2 +np.float64,0x840b02ff08161,0xc08ff7a41e41114c,2 +np.float64,0x3fbdce3a383b9c74,0xc008d1e61903a289,2 +np.float64,0x3fd24ed3c4a49da8,0xbffce3c12136b6d3,2 +np.float64,0x3fe8d0834131a107,0xbfd77b194e7051d4,2 +np.float64,0x3fdd0cb11aba1962,0xbff23b9dbd554455,2 +np.float64,0x1a32d97e3465c,0xc090052781a37271,2 +np.float64,0x3fdb09d2b1b613a5,0xbff3e396b862bd83,2 +np.float64,0x3fe04c848aa09909,0xbfef2540dd90103a,2 +np.float64,0x3fce0c48613c1891,0xc000b9f76877d744,2 +np.float64,0x3fc37109a226e213,0xc005c05d8b2b9a2f,2 +np.float64,0x81cf3837039e7,0xc08ff7d686517dff,2 +np.float64,0xd9342c29b2686,0xc08ff1e591c9a895,2 +np.float64,0x7fec731b0638e635,0x408ffea4884550a9,2 +np.float64,0x3fba0fc138341f82,0xc00a5e839b085f64,2 +np.float64,0x7fdda893b03b5126,0x408ff71f7c5a2797,2 +np.float64,0xd2a4bb03a5498,0xc08ff2402f7a907c,2 +np.float64,0x3fea61fb0d34c3f6,0xbfd1d293fbe76183,2 +np.float64,0x3fed5cf486fab9e9,0xbfbfc2e01a7ffff1,2 +np.float64,0x3fcbabc2bf375785,0xc001ad7750c9dbdf,2 +np.float64,0x3fdb5fff53b6bfff,0xbff39a7973a0c6a5,2 +np.float64,0x7feef05a00bde0b3,0x408fff9c5cbc8651,2 +np.float64,0xb1cf24f1639e5,0xc08ff434de10fffb,2 +np.float64,0x3fa583989c2b0731,0xc0124a8a3bbf18ce,2 +np.float64,0x7feae90bf9f5d217,0x408ffe002e7bbbea,2 +np.float64,0x3fe9ef41c4b3de84,0xbfd367878ae4528e,2 +np.float64,0x9be24ce337c4a,0xc08ff5b9b1c31cf9,2 +np.float64,0x3fe916894cb22d13,0xbfd677f915d58503,2 +np.float64,0x3fec1bab20f83756,0xbfc7f2777aabe8ee,2 +np.float64,0x3feaabf2873557e5,0xbfd0d11f28341233,2 +np.float64,0x3fd4d3c3b529a787,0xbff9e9e47acc8ca9,2 +np.float64,0x3fe4cfe96c699fd3,0xbfe3dc53fa739169,2 +np.float64,0xccfdb97399fb7,0xc08ff2908d893400,2 +np.float64,0x3fec7598be78eb31,0xbfc5a750f8f3441a,2 +np.float64,0x355be5fc6ab7e,0xc090010ca315b50b,2 +np.float64,0x3fba9f9074353f21,0xc00a1f80eaf5e581,2 +np.float64,0x7fdcaff189395fe2,0x408ff6bd1c5b90d9,2 +np.float64,0x3fd94d3b64b29a77,0xbff56be1b43d25f3,2 +np.float64,0x4e5f29949cbe6,0xc08ffda972da1d73,2 +np.float64,0x3fe654e2d9aca9c6,0xbfe09b88dcd8f15d,2 +np.float64,0x7fdc130190b82602,0x408ff67d496c1a27,2 +np.float64,0x3fbcd4701e39a8e0,0xc009343e36627e80,2 +np.float64,0x7fdaa4d38f3549a6,0x408ff5e2c6d8678f,2 +np.float64,0x3febe95e5237d2bd,0xbfc93e16d453fe3a,2 +np.float64,0x9ef5ca553deba,0xc08ff57ff4f7883d,2 +np.float64,0x7fe878e91170f1d1,0x408ffce795868fc8,2 +np.float64,0x3fe63dff466c7bff,0xbfe0caf2b79c9e5f,2 +np.float64,0x6561446ccac29,0xc08ffab0e383834c,2 +np.float64,0x30c6c2ae618d9,0xc09001914b30381b,2 +np.float64,0x7ff0000000000000,0x7ff0000000000000,2 +np.float64,0x3fe5c9daf1ab93b6,0xbfe1be81baf4dbdb,2 +np.float64,0x3fe0a03e24a1407c,0xbfee3a73c4c0e8f8,2 +np.float64,0xff2a2cf3fe546,0xc08ff009a7e6e782,2 +np.float64,0x7fcf0332213e0663,0x408fefa36235e210,2 +np.float64,0x3fb612affc2c2560,0xc00c494be9c8c33b,2 +np.float64,0x3fd2b259702564b3,0xbffc67967f077e75,2 +np.float64,0x7fcb63685d36c6d0,0x408fee343343f913,2 +np.float64,0x3fe369f1d5a6d3e4,0xbfe71251139939ad,2 +np.float64,0x3fdd17c618ba2f8c,0xbff232d11c986251,2 +np.float64,0x3f92cc8040259901,0xc01711d8e06b52ee,2 +np.float64,0x69a81dc2d3504,0xc08ffa36cdaf1141,2 +np.float64,0x3fea0fad99b41f5b,0xbfd2f4625a652645,2 +np.float64,0xd1cd5799a39ab,0xc08ff24c02b90d26,2 +np.float64,0x324e59ce649cc,0xc0900163ad091c76,2 +np.float64,0x3fc3d460a227a8c1,0xc00585f903dc7a7f,2 +np.float64,0xa7185ec74e30c,0xc08ff4ec7d65ccd9,2 +np.float64,0x3fa254eaac24a9d5,0xc01337053963321a,2 +np.float64,0x3feaeb112435d622,0xbfcfef3be17f81f6,2 +np.float64,0x60144c3ac028a,0xc08ffb4f8eb94595,2 +np.float64,0x7fa4d2ec6829a5d8,0x408fdb0a9670ab83,2 +np.float64,0x3fed1372f97a26e6,0xbfc1b1fe50d48a55,2 +np.float64,0x3fd5ade5972b5bcb,0xbff8fcf28f525031,2 +np.float64,0x7fe72e335bee5c66,0x408ffc4759236437,2 +np.float64,0x7fdfafab143f5f55,0x408ff7e2e22a8129,2 +np.float64,0x3fe90d0db9321a1b,0xbfd69ae5fe10eb9e,2 +np.float64,0x7fe20a59072414b1,0x408ff962a2492484,2 +np.float64,0x3fed853690bb0a6d,0xbfbdc9dc5f199d2b,2 +np.float64,0x3fd709d469ae13a9,0xbff795a218deb700,2 +np.float64,0x3fe21c35f5e4386c,0xbfea47d71789329b,2 +np.float64,0x9ea5ec053d4be,0xc08ff585c2f6b7a3,2 +np.float64,0x3fc0580f9e20b01f,0xc007c1268f49d037,2 +np.float64,0xd99127abb3225,0xc08ff1e0a1ff339d,2 +np.float64,0x3fdc8c9bbfb91937,0xbff2a2478354effb,2 +np.float64,0x3fe15fc6b162bf8d,0xbfec323ac358e008,2 +np.float64,0xffefffffffffffff,0x7ff8000000000000,2 +np.float64,0x3fee341afb3c6836,0xbfb556b6faee9a84,2 +np.float64,0x3fe4b64c56296c99,0xbfe4154835ad2afe,2 +np.float64,0x85de22810bbc5,0xc08ff77b914fe5b5,2 +np.float64,0x3fd22c72e3a458e6,0xbffd0f4269d20bb9,2 +np.float64,0xc090e5218123,0xc09009a4a65a8a8f,2 +np.float64,0x7fd9641692b2c82c,0x408ff5547782bdfc,2 +np.float64,0x3fd9b9cb28b37396,0xbff509a8fb59a9f1,2 +np.float64,0x3fcd2726f93a4e4e,0xc001135059a22117,2 +np.float64,0x3fa4b493d4296928,0xc0128323c7a55f4a,2 +np.float64,0x47455e788e8ac,0xc08ffec2101c1e82,2 +np.float64,0x3fe0d7e2e261afc6,0xbfeda0f1e2d0f4bd,2 +np.float64,0x3fe860fc5b70c1f9,0xbfd91dc42eaf72c2,2 +np.float64,0xa5d7805b4baf0,0xc08ff502bc819ff6,2 +np.float64,0xd83395b1b0673,0xc08ff1f33c3f94c2,2 +np.float64,0x3f865972e02cb2e6,0xc01a1243651565c8,2 +np.float64,0x52fc6952a5f8e,0xc08ffd006b158179,2 +np.float64,0x7fecac6c793958d8,0x408ffebbb1c09a70,2 +np.float64,0x7fe621ff606c43fe,0x408ffbbeb2b1473a,2 +np.float64,0x3fdb9f3f9db73e7f,0xbff365610c52bda7,2 +np.float64,0x7feab92992757252,0x408ffdeb92a04813,2 +np.float64,0xcc46c79f988d9,0xc08ff29adf03fb7c,2 +np.float64,0x3fe3156a03262ad4,0xbfe7dd0f598781c7,2 +np.float64,0x3fc00e3a61201c75,0xc007f5c121a87302,2 +np.float64,0x3fdce8e9f739d1d4,0xbff2581d41ef50ef,2 +np.float64,0x0,0xfff0000000000000,2 +np.float64,0x7d373ac4fa6e8,0xc08ff840fa8beaec,2 +np.float64,0x3fee41e0653c83c1,0xbfb4ae786f2a0d54,2 +np.float64,0x3ff0000000000000,0x0,2 +np.float64,0x7feca6fff9794dff,0x408ffeb982a70556,2 +np.float64,0x7fc532716d2a64e2,0x408feb3f0f6c095b,2 +np.float64,0x3fe4ec2954a9d853,0xbfe39dd44aa5a040,2 +np.float64,0x7fd3321d52a6643a,0x408ff21a0ab9cd85,2 +np.float64,0x7fd8f1b2dfb1e365,0x408ff52001fa7922,2 +np.float64,0x3fee5e58cabcbcb2,0xbfb3539734a24d8b,2 +np.float64,0x3feebf6e7dfd7edd,0xbfad7c648f025102,2 +np.float64,0x6008026ec0101,0xc08ffb5108b54a93,2 +np.float64,0x3fea06f5e2340dec,0xbfd3134a48283360,2 +np.float64,0x41cad13c8395b,0xc08fffae654b2426,2 +np.float64,0x7fedb5c9353b6b91,0x408fff249f1f32b6,2 +np.float64,0xe00c5af9c018c,0xc08ff189e68c655f,2 +np.float64,0x7feac398ddf58731,0x408ffdf01374de9f,2 +np.float64,0x3fed21127c7a4225,0xbfc15b8cf55628fa,2 +np.float64,0x3fd3446711a688ce,0xbffbb5f7252a9fa3,2 +np.float64,0x7fe75fa07a6ebf40,0x408ffc5fdb096018,2 +np.float64,0x3feeb1618cbd62c3,0xbfaece3bd0863070,2 +np.float64,0x7f5226e180244dc2,0x408fb174d506e52f,2 +np.float64,0x3fcd67deca3acfbe,0xc000f9cd7a490749,2 +np.float64,0xdc6f30efb8de6,0xc08ff1b9f2a22d2e,2 +np.float64,0x9c14931338293,0xc08ff5b5f975ec5d,2 +np.float64,0x7fe93e802df27cff,0x408ffd4354eba0e0,2 +np.float64,0x3feb92ae5077255d,0xbfcb7f2084e44dbb,2 +np.float64,0xd78dbfddaf1b8,0xc08ff1fc19fa5a13,2 +np.float64,0x7fe14c301fa2985f,0x408ff8e666cb6592,2 +np.float64,0xbda3d8b77b47b,0xc08ff37689f4b2e5,2 +np.float64,0x8a42953b14853,0xc08ff71c2db3b8cf,2 +np.float64,0x7fe4ca7e186994fb,0x408ffb05e94254a7,2 +np.float64,0x7fe92ffc5e325ff8,0x408ffd3cb0265b12,2 +np.float64,0x91b262912364d,0xc08ff681619be214,2 +np.float64,0x33fe2b0667fc6,0xc0900132f3fab55e,2 +np.float64,0x3fde10e9183c21d2,0xbff17060fb4416c7,2 +np.float64,0xb6b811cb6d702,0xc08ff3e46303b541,2 +np.float64,0x3fe4a7bda0a94f7b,0xbfe435c6481cd0e3,2 +np.float64,0x7fd9fe6057b3fcc0,0x408ff599c79a822c,2 +np.float64,0x3fef44bf917e897f,0xbfa11484e351a6e9,2 +np.float64,0x3fe57d701daafae0,0xbfe2618ab40fc01b,2 +np.float64,0x7fe52d2adbaa5a55,0x408ffb3c2fb1c99d,2 +np.float64,0xb432f66d6865f,0xc08ff40d6b4084fe,2 +np.float64,0xbff0000000000000,0x7ff8000000000000,2 +np.float64,0x7fecd2292bf9a451,0x408ffecad860de6f,2 +np.float64,0x3fddd2ae153ba55c,0xbff1a059adaca33e,2 +np.float64,0x3fee55d6e5bcabae,0xbfb3bb1c6179d820,2 +np.float64,0x7fc1d0085623a010,0x408fe93d16ada7a7,2 +np.float64,0x829b000105360,0xc08ff7c47629a68f,2 +np.float64,0x7fe1e0257523c04a,0x408ff94782cf0717,2 +np.float64,0x7fd652f9ad2ca5f2,0x408ff3d820ec892e,2 +np.float64,0x3fef2246203e448c,0xbfa444ab6209d8cd,2 +np.float64,0x3fec6c0ae178d816,0xbfc5e559ebd4e790,2 +np.float64,0x3fe6ddfee92dbbfe,0xbfdf06dd7d3fa7a8,2 +np.float64,0x3fb7fbcbea2ff798,0xc00b5404d859d148,2 +np.float64,0x7feb9a154d37342a,0x408ffe4b26c29e55,2 +np.float64,0x3fe4db717aa9b6e3,0xbfe3c2c6b3ef13bc,2 +np.float64,0x3fbae17dda35c2fc,0xc00a030f7f4b37e7,2 +np.float64,0x7fd632b9082c6571,0x408ff3c76826ef19,2 +np.float64,0x7fc4184a15283093,0x408feaa14adf00be,2 +np.float64,0x3fe052d19920a5a3,0xbfef136b5df81a3e,2 +np.float64,0x7fe38b872b67170d,0x408ffa4f51aafc86,2 +np.float64,0x3fef9842d03f3086,0xbf92d3d2a21d4be2,2 +np.float64,0x9cea662139d4d,0xc08ff5a634810daa,2 +np.float64,0x3fe35f0855e6be11,0xbfe72c4b564e62aa,2 +np.float64,0x3fecee3d3779dc7a,0xbfc29ee942f8729e,2 +np.float64,0x3fe7903fd72f2080,0xbfdc41db9b5f4048,2 +np.float64,0xb958889572b11,0xc08ff3ba366cf84b,2 +np.float64,0x3fcb3a67c53674d0,0xc001dd21081ad1ea,2 +np.float64,0xe3b1b53fc7637,0xc08ff15a3505e1ce,2 +np.float64,0xe5954ae9cb2aa,0xc08ff141cbbf0ae4,2 +np.float64,0x3fe394af74e7295f,0xbfe6ad1d13f206e8,2 +np.float64,0x7fe21dd704643bad,0x408ff96f13f80c1a,2 +np.float64,0x3fd23a7cf02474fa,0xbffcfd7454117a05,2 +np.float64,0x7fe257515e24aea2,0x408ff99378764d52,2 +np.float64,0x7fe4c5d0a6e98ba0,0x408ffb03503cf939,2 +np.float64,0x3fadc2c1603b8583,0xc0106b2c17550e3a,2 +np.float64,0x3fc0f7f02421efe0,0xc007525ac446864c,2 +np.float64,0x3feaf0b27275e165,0xbfcfc8a03eaa32ad,2 +np.float64,0x5ce7503cb9ceb,0xc08ffbb2de365fa8,2 +np.float64,0x2a0014f654003,0xc090026e41761a0d,2 +np.float64,0x7fe2c848a8e59090,0x408ff9d9b723ee89,2 +np.float64,0x7f66f54bc02dea97,0x408fbc2ae0ec5623,2 +np.float64,0xa35a890146b6,0xc0900a97b358ddbd,2 +np.float64,0x7fee267ded7c4cfb,0x408fff501560c9f5,2 +np.float64,0x3fe07c328520f865,0xbfee9ef7c3435b58,2 +np.float64,0x3fe67122cf6ce246,0xbfe06147001932ba,2 +np.float64,0x3fdacc8925359912,0xbff41824cece219e,2 +np.float64,0xffa3047fff461,0xc08ff00431ec9be3,2 +np.float64,0x3e1af43e7c35f,0xc090002c6573d29b,2 +np.float64,0x86fa94590df53,0xc08ff7632525ed92,2 +np.float64,0x7fec4c76227898eb,0x408ffe94d032c657,2 +np.float64,0x7fe2274ce1e44e99,0x408ff975194cfdff,2 +np.float64,0x7fe670e1b4ace1c2,0x408ffbe78cc451de,2 +np.float64,0x7fe853871db0a70d,0x408ffcd5e6a6ff47,2 +np.float64,0x3fcbf265db37e4cc,0xc0019026336e1176,2 +np.float64,0x3fef033cef3e067a,0xbfa726712eaae7f0,2 +np.float64,0x5d74973abae94,0xc08ffba15e6bb992,2 +np.float64,0x7fdd9c99b6bb3932,0x408ff71ad24a7ae0,2 +np.float64,0xbdc8e09b7b91c,0xc08ff3744939e9a3,2 +np.float64,0xdbfcff71b7fa0,0xc08ff1bfeecc9dfb,2 +np.float64,0xf9b38cf5f3672,0xc08ff0499af34a43,2 +np.float64,0x3fea820aa6b50415,0xbfd162a38e1927b1,2 +np.float64,0x3fe67f59a12cfeb3,0xbfe04412adca49dc,2 +np.float64,0x3feb301d9c76603b,0xbfce17e6edeb92d5,2 +np.float64,0x828ce00b0519c,0xc08ff7c5b5c57cde,2 +np.float64,0x4f935e229f26c,0xc08ffd7c67c1c54f,2 +np.float64,0x7fcd139e023a273b,0x408feee4f12ff11e,2 +np.float64,0x666a9944ccd54,0xc08ffa92d5e5cd64,2 +np.float64,0x3fe792f0fa6f25e2,0xbfdc374fda28f470,2 +np.float64,0xe996029bd32c1,0xc08ff10eb9b47a11,2 +np.float64,0x3fe7b0dd1eef61ba,0xbfdbc2676dc77db0,2 +np.float64,0x7fd3ec0127a7d801,0x408ff287bf47e27d,2 +np.float64,0x3fe793a8ea6f2752,0xbfdc347f7717e48d,2 +np.float64,0x7fdb89d15e3713a2,0x408ff64457a13ea2,2 +np.float64,0x3fe35b3cbbe6b679,0xbfe73557c8321b70,2 +np.float64,0x66573c94ccae8,0xc08ffa9504af7eb5,2 +np.float64,0x3fc620a2302c4144,0xc00442036b944a67,2 +np.float64,0x49b2fe0693660,0xc08ffe5f131c3c7e,2 +np.float64,0x7fda936cdfb526d9,0x408ff5db3ab3f701,2 +np.float64,0xc774ceef8ee9a,0xc08ff2e16d082fa1,2 +np.float64,0x4da9f8a09b55,0xc0900ee2206d0c88,2 +np.float64,0x3fe2ca5d5ae594bb,0xbfe89406611a5f1a,2 +np.float64,0x7fe0832497e10648,0x408ff85d1de6056e,2 +np.float64,0x3fe6a9e3222d53c6,0xbfdfda35a9bc2de1,2 +np.float64,0x3fed3d92c8ba7b26,0xbfc0a73620db8b98,2 +np.float64,0x3fdd2ec093ba5d81,0xbff2209cf78ce3f1,2 +np.float64,0x62fcb968c5f98,0xc08ffaf775a593c7,2 +np.float64,0xfcfb019ff9f60,0xc08ff0230e95bd16,2 +np.float64,0x3fd7a63e8f2f4c7d,0xbff6faf4fff7dbe0,2 +np.float64,0x3fef23b0ec3e4762,0xbfa4230cb176f917,2 +np.float64,0x340d1e6a681a5,0xc09001314b68a0a2,2 +np.float64,0x7fc0b85ba02170b6,0x408fe8821487b802,2 +np.float64,0x7fe9976e84f32edc,0x408ffd6bb6aaf467,2 +np.float64,0x329a0e9e65343,0xc090015b044e3270,2 +np.float64,0x3fea4928d3f49252,0xbfd2299b05546eab,2 +np.float64,0x3f188c70003118e0,0xc02ac3ce23bc5d5a,2 +np.float64,0x3fecce5020b99ca0,0xbfc36b23153d5f50,2 +np.float64,0x3fe203873e24070e,0xbfea86edb3690830,2 +np.float64,0x3fe02d9eaa205b3d,0xbfef7d18c54a76d2,2 +np.float64,0xef7537ebdeea7,0xc08ff0c55e9d89e7,2 +np.float64,0x3fedf7572efbeeae,0xbfb840af357cf07c,2 +np.float64,0xd1a97a61a354,0xc0900926fdfb96cc,2 +np.float64,0x7fe6a0daeced41b5,0x408ffc001edf1407,2 +np.float64,0x3fe5063625aa0c6c,0xbfe3647cfb949d62,2 +np.float64,0x7fe9b28d31736519,0x408ffd77eb4a922b,2 +np.float64,0x7feea90d033d5219,0x408fff81a4bbff62,2 +np.float64,0x3fe9494d17f2929a,0xbfd5bde02eb5287a,2 +np.float64,0x7feee17a8cbdc2f4,0x408fff96cf0dc16a,2 +np.float64,0xb2ad18ef655a3,0xc08ff4267eda8af8,2 +np.float64,0x3fad3b52683a76a5,0xc01085ab75b797ce,2 +np.float64,0x2300a65846016,0xc090037b81ce9500,2 +np.float64,0x3feb1041f9b62084,0xbfcef0c87d8b3249,2 +np.float64,0x3fdd887d3e3b10fa,0xbff1da0e1ede6db2,2 +np.float64,0x3fd3e410eb27c822,0xbffaf9b5fc9cc8cc,2 +np.float64,0x3fe0aa53e3e154a8,0xbfee1e7b5c486578,2 +np.float64,0x7fe33e389aa67c70,0x408ffa214fe50961,2 +np.float64,0x3fd27e3a43a4fc75,0xbffca84a79e8adeb,2 +np.float64,0x3fb309e0082613c0,0xc00dfe407b77a508,2 +np.float64,0x7feaf2ed8cf5e5da,0x408ffe046a9d1ba9,2 +np.float64,0x1e76167a3cec4,0xc0900448cd35ec67,2 +np.float64,0x3fe0a18e1721431c,0xbfee36cf1165a0d4,2 +np.float64,0x3fa73b78c02e76f2,0xc011d9069823b172,2 +np.float64,0x3fef6d48287eda90,0xbf9ab2d08722c101,2 +np.float64,0x8fdf0da31fbe2,0xc08ff6a6a2accaa1,2 +np.float64,0x3fc3638db826c71b,0xc005c86191688826,2 +np.float64,0xaa9c09c555381,0xc08ff4aefe1d9473,2 +np.float64,0x7fccb0f4523961e8,0x408feebd84773f23,2 +np.float64,0xede75dcfdbcec,0xc08ff0d89ba887d1,2 +np.float64,0x7f8a051520340a29,0x408fcd9cc17f0d95,2 +np.float64,0x3fef5ca2babeb945,0xbf9dc221f3618e6a,2 +np.float64,0x7fea0ff4bcf41fe8,0x408ffda193359f22,2 +np.float64,0x7fe05c53fd20b8a7,0x408ff841dc7123e8,2 +np.float64,0x3fc625664b2c4acd,0xc0043f8749b9a1d8,2 +np.float64,0x7fed58f98f7ab1f2,0x408fff00585f48c2,2 +np.float64,0x3fb3e5e51427cbca,0xc00d7bcb6528cafe,2 +np.float64,0x3fe728bd3d6e517a,0xbfdddafa72bd0f60,2 +np.float64,0x3fe3f005dd27e00c,0xbfe5d7b3ec93bca0,2 +np.float64,0x3fd74fbd1a2e9f7a,0xbff750001b63ce81,2 +np.float64,0x3fd3af6d85a75edb,0xbffb371d678d11b4,2 +np.float64,0x7fa690ad8c2d215a,0x408fdbf7db9c7640,2 +np.float64,0x3fbdfd38e23bfa72,0xc008bfc1c5c9b89e,2 +np.float64,0x3fe2374684a46e8d,0xbfea030c4595dfba,2 +np.float64,0x7fc0806c372100d7,0x408fe85b36fee334,2 +np.float64,0x3fef3ac47b7e7589,0xbfa2007195c5213f,2 +np.float64,0x3fb55473922aa8e7,0xc00cae7af8230e0c,2 +np.float64,0x7fe018dc152031b7,0x408ff811e0d712fa,2 +np.float64,0x3fe3b3fca56767f9,0xbfe6638ae2c99c62,2 +np.float64,0x7fac79818c38f302,0x408fdea720b39c3c,2 +np.float64,0x7fefffffffffffff,0x4090000000000000,2 +np.float64,0xd2b290cba5652,0xc08ff23f6d7152a6,2 +np.float64,0x7fc5848eb52b091c,0x408feb6b6f8b77d0,2 +np.float64,0xf399f62de733f,0xc08ff092ae319ad8,2 +np.float64,0x7fdec56c12bd8ad7,0x408ff78c4ddbc667,2 +np.float64,0x3fca640f1e34c81e,0xc0023969c5cbfa4c,2 +np.float64,0x3fd55225db2aa44c,0xbff95f7442a2189e,2 +np.float64,0x7fefa009a97f4012,0x408fffdd2f42ef9f,2 +np.float64,0x4a3b70609478,0xc0900f24e449bc3d,2 +np.float64,0x7fe3738b1ba6e715,0x408ffa411f2cb5e7,2 +np.float64,0x7fe5e53f0b6bca7d,0x408ffb9ed8d95cea,2 +np.float64,0x3fe274dd24a4e9ba,0xbfe967fb114b2a83,2 +np.float64,0x3fcbc58b8c378b17,0xc001a2bb1e158bcc,2 +np.float64,0x3fefc2c0043f8580,0xbf862c9b464dcf38,2 +np.float64,0xc2c4fafd858a0,0xc08ff327aecc409b,2 +np.float64,0x3fd8bc39a9b17873,0xbff5f1ad46e5a51c,2 +np.float64,0x3fdf341656be682d,0xbff094f41e7cb4c4,2 +np.float64,0x3fef8495c13f092c,0xbf966cf6313bae4c,2 +np.float64,0x3fe14e0f05229c1e,0xbfec6166f26b7161,2 +np.float64,0x3fed42d3b2ba85a7,0xbfc0860b773d35d8,2 +np.float64,0x7fd92bbac5b25775,0x408ff53abcb3fe0c,2 +np.float64,0xb1635b6f62c6c,0xc08ff43bdf47accf,2 +np.float64,0x4a3a2dbc94746,0xc08ffe49fabddb36,2 +np.float64,0x87d831290fb06,0xc08ff750419dc6fb,2 +np.float64,0x3fec4713f7f88e28,0xbfc6d6217c9f5cf9,2 +np.float64,0x7fed43ba2d3a8773,0x408ffef7fa2fc303,2 +np.float64,0x7fd1ec5b56a3d8b6,0x408ff14f62615f1e,2 +np.float64,0x3fee534b6c7ca697,0xbfb3da1951aa3e68,2 +np.float64,0x3febb564c2b76aca,0xbfca9737062e55e7,2 +np.float64,0x943e6b0f287ce,0xc08ff64e2d09335c,2 +np.float64,0xf177d957e2efb,0xc08ff0acab2999fa,2 +np.float64,0x7fb5b881a82b7102,0x408fe3872b4fde5e,2 +np.float64,0x3fdb2b4a97b65695,0xbff3c715c91359bc,2 +np.float64,0x3fac0a17e4381430,0xc010c330967309fb,2 +np.float64,0x7fd8057990b00af2,0x408ff4b0a287a348,2 +np.float64,0x1f9026a23f206,0xc09004144f3a19dd,2 +np.float64,0x3fdb2977243652ee,0xbff3c8a2fd05803d,2 +np.float64,0x3fe0f6e74b21edcf,0xbfed4c3bb956bae0,2 +np.float64,0xde9cc3bbbd399,0xc08ff19ce5c1e762,2 +np.float64,0x3fe72ce106ae59c2,0xbfddca7ab14ceba2,2 +np.float64,0x3fa8ee14e031dc2a,0xc01170d54ca88e86,2 +np.float64,0x3fe0b09bbb216137,0xbfee0d189a95b877,2 +np.float64,0x7fdfdcb157bfb962,0x408ff7f33cf2afea,2 +np.float64,0x3fef84d5f53f09ac,0xbf966134e2a154f4,2 +np.float64,0x3fea0e0b1bb41c16,0xbfd2fa2d36637d19,2 +np.float64,0x1ab76fd6356ef,0xc090050a9616ffbd,2 +np.float64,0x7fd0ccf79a2199ee,0x408ff09045af2dee,2 +np.float64,0x7fea929345f52526,0x408ffddadc322b07,2 +np.float64,0x3fe9ef629cf3dec5,0xbfd367129c166838,2 +np.float64,0x3feedf0ea2fdbe1d,0xbfaa862afca44c00,2 +np.float64,0x7fce725f723ce4be,0x408fef6cfd2769a8,2 +np.float64,0x7fe4313b3ca86275,0x408ffaaf9557ef8c,2 +np.float64,0xe2d46463c5a8d,0xc08ff165725c6b08,2 +np.float64,0x7fbacb4ace359695,0x408fe5f3647bd0d5,2 +np.float64,0x3fbafd009635fa01,0xc009f745a7a5c5d5,2 +np.float64,0x3fe3cea66ce79d4d,0xbfe6253b895e2838,2 +np.float64,0x7feaa71484354e28,0x408ffde3c0bad2a6,2 +np.float64,0x3fd755b8b42eab71,0xbff74a1444c6e654,2 +np.float64,0x3fc313e2172627c4,0xc005f830e77940c3,2 +np.float64,0x12d699a225ad4,0xc090070ec00f2338,2 +np.float64,0x3fa975fe8432ebfd,0xc01151b3da48b3f9,2 +np.float64,0x7fdce3103b39c61f,0x408ff6d19b3326fa,2 +np.float64,0x7fd341cbba268396,0x408ff2237490fdca,2 +np.float64,0x3fd8405885b080b1,0xbff6666d8802a7d5,2 +np.float64,0x3fe0f0cca3a1e199,0xbfed5cdb3e600791,2 +np.float64,0x7fbd56680c3aaccf,0x408fe6ff55bf378d,2 +np.float64,0x3f939c4f3027389e,0xc016d364dd6313fb,2 +np.float64,0x3fe9e87fac73d0ff,0xbfd37f9a2be4fe38,2 +np.float64,0x7fc93c6a883278d4,0x408fed4260e614f1,2 +np.float64,0x7fa88c0ff031181f,0x408fdcf09a46bd3a,2 +np.float64,0xd5487f99aa910,0xc08ff21b6390ab3b,2 +np.float64,0x3fe34acc96e69599,0xbfe75c9d290428fb,2 +np.float64,0x3fd17f5964a2feb3,0xbffdef50b524137b,2 +np.float64,0xe23dec0dc47be,0xc08ff16d1ce61dcb,2 +np.float64,0x3fec8bd64fb917ad,0xbfc5173941614b8f,2 +np.float64,0x3fc81d97d7303b30,0xc00343ccb791401d,2 +np.float64,0x7fe79ad18e2f35a2,0x408ffc7cf0ab0f2a,2 +np.float64,0x3f96306b402c60d7,0xc0161ce54754cac1,2 +np.float64,0xfb09fc97f6140,0xc08ff039d1d30123,2 +np.float64,0x3fec9c4afa793896,0xbfc4ace43ee46079,2 +np.float64,0x3f9262dac824c5b6,0xc01732a3a7eeb598,2 +np.float64,0x3fa5cd33f42b9a68,0xc01236ed4d315a3a,2 +np.float64,0x3fe7bb336caf7667,0xbfdb9a268a82e267,2 +np.float64,0xc6c338f98d867,0xc08ff2ebb8475bbc,2 +np.float64,0x3fd50714482a0e29,0xbff9b14a9f84f2c2,2 +np.float64,0xfff0000000000000,0x7ff8000000000000,2 +np.float64,0x3fde2cd0f93c59a2,0xbff15afe35a43a37,2 +np.float64,0xf1719cb9e2e34,0xc08ff0acf77b06d3,2 +np.float64,0xfd3caaf9fa796,0xc08ff020101771bd,2 +np.float64,0x7f750d63a02a1ac6,0x408fc32ad0caa362,2 +np.float64,0x7fcc50f4e238a1e9,0x408fee96a5622f1a,2 +np.float64,0x421d1da0843a4,0xc08fff9ffe62d869,2 +np.float64,0x3fd9e17023b3c2e0,0xbff4e631d687ee8e,2 +np.float64,0x3fe4999a09693334,0xbfe4556b3734c215,2 +np.float64,0xd619ef03ac33e,0xc08ff21013c85529,2 +np.float64,0x3fc4da522229b4a4,0xc004f150b2c573aa,2 +np.float64,0x3feb04b053b60961,0xbfcf3fc9e00ebc40,2 +np.float64,0x3fbedec5ea3dbd8c,0xc0086a33dc22fab5,2 +np.float64,0x7fec3b217ab87642,0x408ffe8dbc8ca041,2 +np.float64,0xdb257d33b64b0,0xc08ff1cb42d3c182,2 +np.float64,0x7fa2d92ec025b25d,0x408fd9e414d11cb0,2 +np.float64,0x3fa425c550284b8b,0xc012ab7cbf83be12,2 +np.float64,0x10b4869021692,0xc09007c0487d648a,2 +np.float64,0x7f97918c902f2318,0x408fd47867806574,2 +np.float64,0x3fe4f91238e9f224,0xbfe38160b4e99919,2 +np.float64,0x3fc2b1af6125635f,0xc00634343bc58461,2 +np.float64,0x3fc2a98071255301,0xc0063942bc8301be,2 +np.float64,0x3fe4cfc585299f8b,0xbfe3dca39f114f34,2 +np.float64,0x3fd1ea75b3a3d4eb,0xbffd63acd02c5406,2 +np.float64,0x3fd6bf48492d7e91,0xbff7e0cd249f80f9,2 +np.float64,0x76643d36ecc88,0xc08ff8e68f13b38c,2 +np.float64,0x7feeabab3e7d5755,0x408fff82a0fd4501,2 +np.float64,0x46c0d4a68d81b,0xc08ffed79abaddc9,2 +np.float64,0x3fd088d57ca111ab,0xbfff3dd0ed7128ea,2 +np.float64,0x3fed25887cba4b11,0xbfc13f47639bd645,2 +np.float64,0x7fd90984b4b21308,0x408ff52b022c7fb4,2 +np.float64,0x3fe6ef31daadde64,0xbfdec185760cbf21,2 +np.float64,0x3fe48dbe83291b7d,0xbfe47005b99920bd,2 +np.float64,0x3fdce8422f39d084,0xbff258a33a96cc8e,2 +np.float64,0xb8ecdef771d9c,0xc08ff3c0eca61b10,2 +np.float64,0x3fe9bbf9a03377f3,0xbfd41ecfdcc336b9,2 +np.float64,0x7fe2565339a4aca5,0x408ff992d8851eaf,2 +np.float64,0x3fe1693e3822d27c,0xbfec1919da2ca697,2 +np.float64,0x3fd3680488a6d009,0xbffb8b7330275947,2 +np.float64,0x7fbe4f3d2c3c9e79,0x408fe75fa3f4e600,2 +np.float64,0x7fd4cfef3ca99fdd,0x408ff308ee3ab50f,2 +np.float64,0x3fd9c9a51cb3934a,0xbff4fb7440055ce6,2 +np.float64,0x3fe08a9640a1152d,0xbfee76bd1bfbf5c2,2 +np.float64,0x3fef012c41fe0259,0xbfa757a2da7f9707,2 +np.float64,0x3fee653fe2fcca80,0xbfb2ffae0c95025c,2 +np.float64,0x7fd0776933a0eed1,0x408ff054e7b43d41,2 +np.float64,0x4c94e5c09929d,0xc08ffdedb7f49e5e,2 +np.float64,0xca3e3d17947c8,0xc08ff2b86dce2f7a,2 +np.float64,0x3fb528e1342a51c2,0xc00cc626c8e2d9ba,2 +np.float64,0xd774df81aee9c,0xc08ff1fd6f0a7548,2 +np.float64,0x3fc47a9b6128f537,0xc00526c577b80849,2 +np.float64,0x3fe29a6f6a6534df,0xbfe90a5f83644911,2 +np.float64,0x3fecda4f59f9b49f,0xbfc31e4a80c4cbb6,2 +np.float64,0x7fe51d44f5aa3a89,0x408ffb3382437426,2 +np.float64,0x3fd677fc412ceff9,0xbff82999086977e7,2 +np.float64,0x3fe2a3c7e7254790,0xbfe8f33415cdba9d,2 +np.float64,0x3fe6d8d1dc6db1a4,0xbfdf1bc61bc24dff,2 +np.float64,0x7febb32d8ef7665a,0x408ffe55a043ded1,2 +np.float64,0x60677860c0d0,0xc0900da2caa7d571,2 +np.float64,0x7390c2e0e7219,0xc08ff92df18bb5d2,2 +np.float64,0x3fca53711b34a6e2,0xc00240b07a9b529b,2 +np.float64,0x7fe7ce6dd8ef9cdb,0x408ffc961164ead9,2 +np.float64,0x7fc0c9de0d2193bb,0x408fe88e245767f6,2 +np.float64,0xc0ee217981dc4,0xc08ff343b77ea770,2 +np.float64,0x72bd4668e57a9,0xc08ff94323fd74fc,2 +np.float64,0x7fd6970e252d2e1b,0x408ff3fb1e2fead2,2 +np.float64,0x7fdcb61040396c20,0x408ff6bf926bc98f,2 +np.float64,0xda4faa25b49f6,0xc08ff1d68b3877f0,2 +np.float64,0x3feb344749f6688f,0xbfcdfba2d66c72c5,2 +np.float64,0x3fe2aa4284e55485,0xbfe8e32ae0683f57,2 +np.float64,0x3f8e8fcfd03d1fa0,0xc01843efb2129908,2 +np.float64,0x8000000000000000,0xfff0000000000000,2 +np.float64,0x3fd8e01155b1c023,0xbff5d0529dae9515,2 +np.float64,0x3fe8033f3370067e,0xbfda837c80b87e7c,2 +np.float64,0x7fc5bf831e2b7f05,0x408feb8ae3b039a0,2 +np.float64,0x3fd8dcdf5331b9bf,0xbff5d349e1ed422a,2 +np.float64,0x3fe58b4e302b169c,0xbfe243c9cbccde44,2 +np.float64,0x3fea8a2e47b5145d,0xbfd1464e37221894,2 +np.float64,0x75cd1e88eb9a4,0xc08ff8f553ef0475,2 +np.float64,0x7fcfc876e23f90ed,0x408fefebe6cc95e6,2 +np.float64,0x7f51aceb002359d5,0x408fb1263f9003fb,2 +np.float64,0x7fc2a1b877254370,0x408fe9c1ec52f8b9,2 +np.float64,0x7fd495810e292b01,0x408ff2e859414d31,2 +np.float64,0x7fd72048632e4090,0x408ff440690cebdb,2 +np.float64,0x7fd7aafaffaf6,0xc08ff803a390779f,2 +np.float64,0x7fe18067d4a300cf,0x408ff9090a02693f,2 +np.float64,0x3fdc1080f8b82102,0xbff3077bf44a89bd,2 +np.float64,0x3fc34a462f26948c,0xc005d777b3cdf139,2 +np.float64,0x3fe21e4a1fe43c94,0xbfea428acfbc6ea9,2 +np.float64,0x1f0d79083e1b0,0xc090042c65a7abf2,2 +np.float64,0x3fe8d0d15931a1a3,0xbfd779f6bbd4db78,2 +np.float64,0x3fe74578022e8af0,0xbfdd68b6c15e9f5e,2 +np.float64,0x50995dd0a132c,0xc08ffd56a5c8accf,2 +np.float64,0x3f9a6342b034c685,0xc0151ce1973c62bd,2 +np.float64,0x3f30856a00210ad4,0xc027e852f4d1fcbc,2 +np.float64,0x3febcf7646b79eed,0xbfc9e9cc9d12425c,2 +np.float64,0x8010000000000000,0x7ff8000000000000,2 +np.float64,0x3fdf520c02bea418,0xbff07ed5013f3062,2 +np.float64,0x3fe5433ecbea867e,0xbfe2df38968b6d14,2 +np.float64,0x3fb933a84e326751,0xc00ac1a144ad26c5,2 +np.float64,0x7b6d72c2f6daf,0xc08ff86b7a67f962,2 +np.float64,0xaef5dae75debc,0xc08ff46496bb2932,2 +np.float64,0x522d869aa45b1,0xc08ffd1d55281e98,2 +np.float64,0xa2462b05448c6,0xc08ff542fe0ac5fd,2 +np.float64,0x3fe2b71dd6e56e3c,0xbfe8c3690cf15415,2 +np.float64,0x3fe5778231aaef04,0xbfe26e495d09b783,2 +np.float64,0x3fe9b8d564f371ab,0xbfd42a161132970d,2 +np.float64,0x3f89ebc34033d787,0xc019373f90bfc7f1,2 +np.float64,0x3fe438ddc6e871bc,0xbfe53039341b0a93,2 +np.float64,0x873c75250e78f,0xc08ff75d8478dccd,2 +np.float64,0x807134cb00e27,0xc08ff7f5cf59c57a,2 +np.float64,0x3fac459878388b31,0xc010b6fe803bcdc2,2 +np.float64,0xca9dc7eb953b9,0xc08ff2b2fb480784,2 +np.float64,0x7feb38587bb670b0,0x408ffe21ff6d521e,2 +np.float64,0x7fd70e9b782e1d36,0x408ff437936b393a,2 +np.float64,0x3fa4037bbc2806f7,0xc012b55744c65ab2,2 +np.float64,0x3fd3d4637427a8c7,0xbffb0beebf4311ef,2 +np.float64,0x7fdabbda5db577b4,0x408ff5ecbc0d4428,2 +np.float64,0x7fda9be0a2b537c0,0x408ff5dee5d03d5a,2 +np.float64,0x7fe9c74396338e86,0x408ffd813506a18a,2 +np.float64,0x3fd058243e20b048,0xbfff822ffd8a7f21,2 +np.float64,0x3fe6aa6ca9ed54d9,0xbfdfd805629ff49e,2 +np.float64,0x3fd91431d5322864,0xbff5a025eea8c78b,2 +np.float64,0x7fe4d7f02329afdf,0x408ffb0d5d9b7878,2 +np.float64,0x3fe2954a12252a94,0xbfe917266e3e22d5,2 +np.float64,0x3fb25f7c8224bef9,0xc00e6764c81b3718,2 +np.float64,0x3fda4bddeeb497bc,0xbff4880638908c81,2 +np.float64,0x55dfd12eabbfb,0xc08ffc9b54ff4002,2 +np.float64,0x3fe8f399e031e734,0xbfd6f8e5c4dcd93f,2 +np.float64,0x3fd954a24832a945,0xbff56521f4707a06,2 +np.float64,0x3fdea911f2bd5224,0xbff0fcb2d0c2b2e2,2 +np.float64,0x3fe6b4ff8a2d69ff,0xbfdfacfc85cafeab,2 +np.float64,0x3fc7fa02042ff404,0xc00354e13b0767ad,2 +np.float64,0x3fe955088c72aa11,0xbfd593130f29949e,2 +np.float64,0xd7e74ec1afcea,0xc08ff1f74f61721c,2 +np.float64,0x3fe9d69c1ab3ad38,0xbfd3bf710a337e06,2 +np.float64,0x3fd85669a2b0acd3,0xbff65176143ccc1e,2 +np.float64,0x3fea99b285353365,0xbfd11062744783f2,2 +np.float64,0x3fe2c79f80a58f3f,0xbfe89ac33f990289,2 +np.float64,0x3f8332ba30266574,0xc01af2cb7b635783,2 +np.float64,0x30d0150061a1,0xc090119030f74c5d,2 +np.float64,0x3fdbf4cb06b7e996,0xbff31e5207aaa754,2 +np.float64,0x3fe6b56c216d6ad8,0xbfdfab42fb2941c5,2 +np.float64,0x7fc4dc239829b846,0x408feb0fb0e13fbe,2 +np.float64,0x3fd0ab85ef21570c,0xbfff0d95d6c7a35c,2 +np.float64,0x7fe13d75e5e27aeb,0x408ff8dc8efa476b,2 +np.float64,0x3fece3b832f9c770,0xbfc2e21b165d583f,2 +np.float64,0x3fe3a279c4e744f4,0xbfe68ca4fbb55dbf,2 +np.float64,0x3feb64659ef6c8cb,0xbfccb6204b6bf724,2 +np.float64,0x2279a6bc44f36,0xc0900391eeeb3e7c,2 +np.float64,0xb88046d571009,0xc08ff3c7b5b45300,2 +np.float64,0x7ff4000000000000,0x7ffc000000000000,2 +np.float64,0x3fe49af059a935e1,0xbfe4526c294f248f,2 +np.float64,0xa3e5508147cc,0xc0900a92ce5924b1,2 +np.float64,0x7fc56def3d2adbdd,0x408feb5f46c360e8,2 +np.float64,0x7fd99f3574333e6a,0x408ff56f3807987c,2 +np.float64,0x3fdc38d56fb871ab,0xbff2e667cad8f36a,2 +np.float64,0xd0b03507a1607,0xc08ff25bbcf8aa9d,2 +np.float64,0xc493f9078927f,0xc08ff30c5fa4e759,2 +np.float64,0x3fc86ddbcb30dbb8,0xc0031da1fcb56d75,2 +np.float64,0x7fe75dc395aebb86,0x408ffc5eef841491,2 +np.float64,0x1647618a2c8ed,0xc0900616ef9479c1,2 +np.float64,0xdf144763be289,0xc08ff196b527f3c9,2 +np.float64,0x3fe0b29da6a1653b,0xbfee078b5f4d7744,2 +np.float64,0x3feb055852b60ab1,0xbfcf3b4db5779a7a,2 +np.float64,0x3fe8bc1625f1782c,0xbfd7c739ade904bc,2 +np.float64,0x7fd19bfb8ea337f6,0x408ff11b2b55699c,2 +np.float64,0x3fed1d80d1ba3b02,0xbfc1722e8d3ce094,2 +np.float64,0x2d9c65925b38e,0xc09001f46bcd3bc5,2 +np.float64,0x7fed6f4d857ade9a,0x408fff091cf6a3b4,2 +np.float64,0x3fd070cd6ba0e19b,0xbfff5f7609ca29e8,2 +np.float64,0x7fea3508b8f46a10,0x408ffdb1f30bd6be,2 +np.float64,0x508b897ca1172,0xc08ffd58a0eb3583,2 +np.float64,0x7feba367b07746ce,0x408ffe4f0bf4bd4e,2 +np.float64,0x3fefebd5c4bfd7ac,0xbf6d20b4fcf21b69,2 +np.float64,0x3fd8ef07b8b1de0f,0xbff5c2745c0795a5,2 +np.float64,0x3fd38ed518271daa,0xbffb5d75f00f6900,2 +np.float64,0x6de0fecedbc20,0xc08ff9c307bbc647,2 +np.float64,0xafc0ffc35f820,0xc08ff45737e5d6b4,2 +np.float64,0x7fd282097ca50412,0x408ff1ae3b27bf3b,2 +np.float64,0x3fe2f2d50b65e5aa,0xbfe831042e6a1e99,2 +np.float64,0x3faa437bac3486f7,0xc01123d8d962205a,2 +np.float64,0x3feea54434fd4a88,0xbfaff202cc456647,2 +np.float64,0x3fc9e65b8633ccb7,0xc00270e77ffd19da,2 +np.float64,0x7fee15af61fc2b5e,0x408fff49a49154a3,2 +np.float64,0x7fefe670a73fcce0,0x408ffff6c44c1005,2 +np.float64,0x3fc0832d0f21065a,0xc007a2dc2f25384a,2 +np.float64,0x3fecfc96bcb9f92d,0xbfc24367c3912620,2 +np.float64,0x3feb705682b6e0ad,0xbfcc65b1bb16f9c5,2 +np.float64,0x3fe185c4f9630b8a,0xbfebcdb401af67a4,2 +np.float64,0x3fb0a5a9f6214b54,0xc00f8ada2566a047,2 +np.float64,0x7fe2908cdda52119,0x408ff9b744861fb1,2 +np.float64,0x7fee776e183ceedb,0x408fff6ee7c2f86e,2 +np.float64,0x3fce1d608f3c3ac1,0xc000b3685d006474,2 +np.float64,0x7fecf92aa339f254,0x408ffeda6c998267,2 +np.float64,0xce13cb519c27a,0xc08ff280f02882a9,2 +np.float64,0x1,0xc090c80000000000,2 +np.float64,0x3fe485a8afa90b51,0xbfe4823265d5a50a,2 +np.float64,0x3feea60908bd4c12,0xbfafdf7ad7fe203f,2 +np.float64,0x3fd2253033a44a60,0xbffd187d0ec8d5b9,2 +np.float64,0x435338fc86a68,0xc08fff6a591059dd,2 +np.float64,0x7fce8763a73d0ec6,0x408fef74f1e715ff,2 +np.float64,0x3fbe5ddb783cbbb7,0xc0089acc5afa794b,2 +np.float64,0x7fe4cf19ada99e32,0x408ffb0877ca302b,2 +np.float64,0x3fe94c9ea1b2993d,0xbfd5b1c2e867b911,2 +np.float64,0x3fe75541c72eaa84,0xbfdd2a27aa117699,2 +np.float64,0x8000000000000001,0x7ff8000000000000,2 +np.float64,0x7fdbec7f2c37d8fd,0x408ff66d69a7f818,2 +np.float64,0x8ef10d091de22,0xc08ff6b9ca5094f8,2 +np.float64,0x3fea69025b74d205,0xbfd1b9fe2c252c70,2 +np.float64,0x562376d0ac46f,0xc08ffc924111cd31,2 +np.float64,0x8e8097ab1d013,0xc08ff6c2e2706f67,2 +np.float64,0x3fca6803ed34d008,0xc00237aef808825b,2 +np.float64,0x7fe8fe9067b1fd20,0x408ffd25f459a7d1,2 +np.float64,0x3f918e8c7f233,0xc0900009fe011d54,2 +np.float64,0x3fdfe773833fcee7,0xbff011bc1af87bb9,2 +np.float64,0xefffef6fdfffe,0xc08ff0beb0f09eb0,2 +np.float64,0x7fe64610282c8c1f,0x408ffbd17209db18,2 +np.float64,0xe66be8c1ccd7d,0xc08ff13706c056e1,2 +np.float64,0x2837e570506fd,0xc09002ae4dae0c1a,2 +np.float64,0x3febe3a081f7c741,0xbfc964171f2a5a47,2 +np.float64,0x3fe21ed09a243da1,0xbfea41342d29c3ff,2 +np.float64,0x3fe1596c8162b2d9,0xbfec431eee30823a,2 +np.float64,0x8f2b9a131e574,0xc08ff6b51104ed4e,2 +np.float64,0x3fe88ed179711da3,0xbfd870d08a4a4b0c,2 +np.float64,0x34159bc2682b4,0xc09001305a885f94,2 +np.float64,0x1ed31e543da65,0xc0900437481577f8,2 +np.float64,0x3feafbe9de75f7d4,0xbfcf7bcdbacf1c61,2 +np.float64,0xfb16fb27f62e0,0xc08ff03938e682a2,2 +np.float64,0x3fe5cd5ba7eb9ab7,0xbfe1b7165771af3c,2 +np.float64,0x7fe72905e76e520b,0x408ffc44c4e7e80c,2 +np.float64,0x7fb7136e2e2e26db,0x408fe439fd383fb7,2 +np.float64,0x8fa585e11f4c,0xc0900b55a08a486b,2 +np.float64,0x7fed985ce47b30b9,0x408fff192b596821,2 +np.float64,0x3feaaf0869755e11,0xbfd0c671571b3764,2 +np.float64,0x3fa40fd4ec281faa,0xc012b1c8dc0b9e5f,2 +np.float64,0x7fda2a70993454e0,0x408ff5ad47b0c68a,2 +np.float64,0x3fe5f7e931abefd2,0xbfe15d52b3605abf,2 +np.float64,0x3fe9fc6d3533f8da,0xbfd338b06a790994,2 +np.float64,0x3fe060649420c0c9,0xbfeeed1756111891,2 +np.float64,0x3fce8435e33d086c,0xc0008c41cea9ed40,2 +np.float64,0x7ff8000000000000,0x7ff8000000000000,2 +np.float64,0x617820aec2f05,0xc08ffb251e9af0f0,2 +np.float64,0x7fcc4ab6ee38956d,0x408fee9419c8f77d,2 +np.float64,0x7fdefda2fc3dfb45,0x408ff7a15063bc05,2 +np.float64,0x7fe5138ccaaa2719,0x408ffb2e30f3a46e,2 +np.float64,0x3fe3817a836702f5,0xbfe6da7c2b25e35a,2 +np.float64,0x3fb8a7dafa314fb6,0xc00b025bc0784ebe,2 +np.float64,0x349dc420693d,0xc09011215825d2c8,2 +np.float64,0x6b0e504ad61cb,0xc08ffa0fee9c5cd6,2 +np.float64,0x273987644e732,0xc09002d34294ed79,2 +np.float64,0x3fc0bd8a6e217b15,0xc0077a5828b4d2f5,2 +np.float64,0x758b48c4eb16a,0xc08ff8fbc8fbe46a,2 +np.float64,0x3fc8a9a52631534a,0xc00301854ec0ef81,2 +np.float64,0x7fe79d29a76f3a52,0x408ffc7e1607a4c1,2 +np.float64,0x3fd7d3ebce2fa7d8,0xbff6ce8a94aebcda,2 +np.float64,0x7fd1cb68a52396d0,0x408ff13a17533b2b,2 +np.float64,0x7fda514a5d34a294,0x408ff5be5e081578,2 +np.float64,0x3fc40b4382281687,0xc0056632c8067228,2 +np.float64,0x7feff1208c3fe240,0x408ffffaa180fa0d,2 +np.float64,0x8f58739f1eb0f,0xc08ff6b17402689d,2 +np.float64,0x1fdbe9a23fb7e,0xc090040685b2d24f,2 +np.float64,0xcb1d0e87963a2,0xc08ff2abbd903b82,2 +np.float64,0x3fc45a6a1a28b4d4,0xc00538f86c4aeaee,2 +np.float64,0x3fe61885b1ac310b,0xbfe118fd2251d2ec,2 +np.float64,0x3fedf584c8fbeb0a,0xbfb8572433ff67a9,2 +np.float64,0x7fb0bddd1a217bb9,0x408fe085e0d621db,2 +np.float64,0x72d8d3e0e5b3,0xc0900ca02f68c7a1,2 +np.float64,0x5cca6ff6b994f,0xc08ffbb6751fda01,2 +np.float64,0x7fe3197839a632ef,0x408ffa0b2fccfb68,2 +np.float64,0x3fcce4d9c139c9b4,0xc0012dae05baa91b,2 +np.float64,0x3fe76d00f62eda02,0xbfdccc5f12799be1,2 +np.float64,0x3fc53c22f72a7846,0xc004bbaa9cbc7958,2 +np.float64,0x7fdda02f1ebb405d,0x408ff71c37c71659,2 +np.float64,0x3fe0844eaba1089d,0xbfee884722762583,2 +np.float64,0x3febb438dc776872,0xbfca9f05e1c691f1,2 +np.float64,0x3fdf4170cdbe82e2,0xbff08b1561c8d848,2 +np.float64,0x3fce1b8d6f3c371b,0xc000b41b69507671,2 +np.float64,0x8370e60706e1d,0xc08ff7b19ea0b4ca,2 +np.float64,0x7fa5bf92382b7f23,0x408fdb8aebb3df87,2 +np.float64,0x7fe4a59979a94b32,0x408ffaf15c1358cd,2 +np.float64,0x3faa66086034cc11,0xc0111c466b7835d6,2 +np.float64,0x7fb7a958262f52af,0x408fe48408b1e093,2 +np.float64,0x3fdaacc5f635598c,0xbff43390d06b5614,2 +np.float64,0x3fd2825b9e2504b7,0xbffca3234264f109,2 +np.float64,0x3fcede160a3dbc2c,0xc0006a759e29060c,2 +np.float64,0x7fd3b19603a7632b,0x408ff265b528371c,2 +np.float64,0x7fcf8a86ea3f150d,0x408fefd552e7f3b2,2 +np.float64,0xedbcc0f7db798,0xc08ff0daad12096b,2 +np.float64,0xf1e1683de3c2d,0xc08ff0a7a0a37e00,2 +np.float64,0xb6ebd9bf6dd7b,0xc08ff3e11e28378d,2 +np.float64,0x3fec8090d6f90122,0xbfc56031b72194cc,2 +np.float64,0x3fd3e10e37a7c21c,0xbffafd34a3ebc933,2 +np.float64,0x7fbb1c96aa36392c,0x408fe616347b3342,2 +np.float64,0x3fe2f3996f25e733,0xbfe82f25bc5d1bbd,2 +np.float64,0x7fe8709da870e13a,0x408ffce3ab6ce59a,2 +np.float64,0x7fea3233d1b46467,0x408ffdb0b3bbc6de,2 +np.float64,0x65fa4112cbf49,0xc08ffa9f85eb72b9,2 +np.float64,0x3fca2cae9f34595d,0xc00251bb275afb87,2 +np.float64,0x8135fd9f026c0,0xc08ff7e42e14dce7,2 +np.float64,0x7fe0a6f057e14de0,0x408ff876081a4bfe,2 +np.float64,0x10000000000000,0xc08ff00000000000,2 +np.float64,0x3fe1fd506263faa1,0xbfea96dd8c543b72,2 +np.float64,0xa5532c554aa66,0xc08ff50bf5bfc66d,2 +np.float64,0xc239d00b8473a,0xc08ff32ff0ea3f92,2 +np.float64,0x7fdb5314e336a629,0x408ff62d4ff60d82,2 +np.float64,0x3fe5f506e2abea0e,0xbfe16362a4682120,2 +np.float64,0x3fa20c60202418c0,0xc0134e08d82608b6,2 +np.float64,0x7fe03864b22070c8,0x408ff82866d65e9a,2 +np.float64,0x3fe72cf5656e59eb,0xbfddca298969effa,2 +np.float64,0x5c295386b852b,0xc08ffbca90b136c9,2 +np.float64,0x7fd71e5020ae3c9f,0x408ff43f6d58eb7c,2 +np.float64,0x3fd1905a842320b5,0xbffdd8ecd288159c,2 +np.float64,0x3fe6bddb256d7bb6,0xbfdf88fee1a820bb,2 +np.float64,0xe061b967c0c37,0xc08ff18581951561,2 +np.float64,0x3fe534f65cea69ed,0xbfe2fe45fe7d3040,2 +np.float64,0xdc7dae07b8fb6,0xc08ff1b93074ea76,2 +np.float64,0x3fd0425082a084a1,0xbfffa11838b21633,2 +np.float64,0xba723fc974e48,0xc08ff3a8b8d01c58,2 +np.float64,0x3fce42ffc73c8600,0xc000a5062678406e,2 +np.float64,0x3f2e6d3c7e5ce,0xc090001304cfd1c7,2 +np.float64,0x3fd4b2e5f7a965cc,0xbffa0e6e6bae0a68,2 +np.float64,0x3fe6db1d18edb63a,0xbfdf128158ee92d9,2 +np.float64,0x7fe4e5792f29caf1,0x408ffb14d9dbf133,2 +np.float64,0x3fc11cdf992239bf,0xc00739569619cd77,2 +np.float64,0x3fc05ea11220bd42,0xc007bc841b48a890,2 +np.float64,0x4bd592d497ab3,0xc08ffe0ab1c962e2,2 +np.float64,0x280068fc5000e,0xc09002b64955e865,2 +np.float64,0x7fe2f2637065e4c6,0x408ff9f379c1253a,2 +np.float64,0x3fefc38467ff8709,0xbf85e53e64b9a424,2 +np.float64,0x2d78ec5a5af1e,0xc09001f8ea8601e0,2 +np.float64,0x7feeef2b957dde56,0x408fff9bebe995f7,2 +np.float64,0x2639baf44c738,0xc09002f9618d623b,2 +np.float64,0x3fc562964d2ac52d,0xc004a6d76959ef78,2 +np.float64,0x3fe21b071fe4360e,0xbfea4adb2cd96ade,2 +np.float64,0x7fe56aa6802ad54c,0x408ffb5d81d1a898,2 +np.float64,0x4296b452852d7,0xc08fff8ad7fbcbe1,2 +np.float64,0x7fe3fac4ff27f589,0x408ffa9049eec479,2 +np.float64,0x7fe7a83e6caf507c,0x408ffc837f436604,2 +np.float64,0x3fc4ac5b872958b7,0xc0050add72381ac3,2 +np.float64,0x3fd6d697c02dad30,0xbff7c931a3eefb01,2 +np.float64,0x3f61e391c023c724,0xc021ad91e754f94b,2 +np.float64,0x10817f9c21031,0xc09007d20434d7bc,2 +np.float64,0x3fdb9c4c4cb73899,0xbff367d8615c5ece,2 +np.float64,0x3fe26ead6b64dd5b,0xbfe977771def5989,2 +np.float64,0x3fc43ea5c3287d4c,0xc00548c2163ae631,2 +np.float64,0x3fe05bd8bba0b7b1,0xbfeef9ea0db91abc,2 +np.float64,0x3feac78369358f07,0xbfd071e2b0aeab39,2 +np.float64,0x7fe254922ca4a923,0x408ff991bdd4e5d3,2 +np.float64,0x3fe5a2f5842b45eb,0xbfe21135c9a71666,2 +np.float64,0x3fd5daf98c2bb5f3,0xbff8cd24f7c07003,2 +np.float64,0x3fcb2a1384365427,0xc001e40f0d04299a,2 +np.float64,0x3fe073974360e72f,0xbfeeb7183a9930b7,2 +np.float64,0xcf3440819e688,0xc08ff270d3a71001,2 +np.float64,0x3fd35656cda6acae,0xbffba083fba4939d,2 +np.float64,0x7fe6c59b4ded8b36,0x408ffc12ce725425,2 +np.float64,0x3fba896f943512df,0xc00a291cb6947701,2 +np.float64,0x7fe54917e86a922f,0x408ffb4b5e0fb848,2 +np.float64,0x7fed2a3f51ba547e,0x408ffeede945a948,2 +np.float64,0x3fdc72bd5038e57b,0xbff2b73b7e93e209,2 +np.float64,0x7fefdb3f9f3fb67e,0x408ffff2b702a768,2 +np.float64,0x3fb0184430203088,0xc00fee8c1351763c,2 +np.float64,0x7d6c3668fad87,0xc08ff83c195f2cca,2 +np.float64,0x3fd5aa254aab544b,0xbff900f16365991b,2 +np.float64,0x3f963daab02c7b55,0xc0161974495b1b71,2 +np.float64,0x3fa7a9c5982f538b,0xc011bde0f6052a89,2 +np.float64,0xb3a5a74b674b5,0xc08ff4167bc97c81,2 +np.float64,0x7fad0c14503a1828,0x408fdee1f2d56cd7,2 +np.float64,0x43e0e9d887c1e,0xc08fff522837b13b,2 +np.float64,0x3fe513b20aea2764,0xbfe346ea994100e6,2 +np.float64,0x7fe4e10393e9c206,0x408ffb12630f6a06,2 +np.float64,0x68b286e2d1651,0xc08ffa51c0d795d4,2 +np.float64,0x7fe8de453331bc89,0x408ffd17012b75ac,2 +np.float64,0x1b3d77d4367b0,0xc09004edea60aa36,2 +np.float64,0x3fd351cbc326a398,0xbffba5f0f4d5fdba,2 +np.float64,0x3fd264951b24c92a,0xbffcc8636788b9bf,2 +np.float64,0xd2465761a48cb,0xc08ff2455c9c53e5,2 +np.float64,0x7fe46a0ef028d41d,0x408ffacfe32c6f5d,2 +np.float64,0x3fafd8ac4c3fb159,0xc010071bf33195d0,2 +np.float64,0x902aec5d2055e,0xc08ff6a08e28aabc,2 +np.float64,0x3fcea61bb03d4c37,0xc0007f76e509b657,2 +np.float64,0x7fe8d90f9571b21e,0x408ffd1495f952e7,2 +np.float64,0x7fa650c9442ca192,0x408fdbd6ff22fdd8,2 +np.float64,0x3fe8ecfdf171d9fc,0xbfd7115df40e8580,2 +np.float64,0x7fd4e6fe7f29cdfc,0x408ff315b0dae183,2 +np.float64,0x77df4c52efbea,0xc08ff8c1d5c1df33,2 +np.float64,0xe200b0cfc4016,0xc08ff1703cfb8e79,2 +np.float64,0x3fe230ea7e2461d5,0xbfea132d2385160e,2 +np.float64,0x7fd1f7ced723ef9d,0x408ff156bfbf92a4,2 +np.float64,0x3fea762818f4ec50,0xbfd18c12a88e5f79,2 +np.float64,0x7feea4ba7c7d4974,0x408fff8004164054,2 +np.float64,0x833ec605067d9,0xc08ff7b606383841,2 +np.float64,0x7fd0c2d7fea185af,0x408ff0894f3a0cf4,2 +np.float64,0x3fe1d7d61d23afac,0xbfeaf76fee875d3e,2 +np.float64,0x65adecb0cb5be,0xc08ffaa82cb09d68,2 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-sin.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-sin.csv new file mode 100644 index 0000000..3b913cc --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-sin.csv @@ -0,0 +1,1370 @@ +dtype,input,output,ulperrortol +## +ve denormals ## +np.float32,0x004b4716,0x004b4716,2 +np.float32,0x007b2490,0x007b2490,2 +np.float32,0x007c99fa,0x007c99fa,2 +np.float32,0x00734a0c,0x00734a0c,2 +np.float32,0x0070de24,0x0070de24,2 +np.float32,0x007fffff,0x007fffff,2 +np.float32,0x00000001,0x00000001,2 +## -ve denormals ## +np.float32,0x80495d65,0x80495d65,2 +np.float32,0x806894f6,0x806894f6,2 +np.float32,0x80555a76,0x80555a76,2 +np.float32,0x804e1fb8,0x804e1fb8,2 +np.float32,0x80687de9,0x80687de9,2 +np.float32,0x807fffff,0x807fffff,2 +np.float32,0x80000001,0x80000001,2 +## +/-0.0f, +/-FLT_MIN +/-FLT_MAX ## +np.float32,0x00000000,0x00000000,2 +np.float32,0x80000000,0x80000000,2 +np.float32,0x00800000,0x00800000,2 +np.float32,0x80800000,0x80800000,2 +## 1.00f ## +np.float32,0x3f800000,0x3f576aa4,2 +np.float32,0x3f800001,0x3f576aa6,2 +np.float32,0x3f800002,0x3f576aa7,2 +np.float32,0xc090a8b0,0x3f7b4e48,2 +np.float32,0x41ce3184,0x3f192d43,2 +np.float32,0xc1d85848,0xbf7161cb,2 +np.float32,0x402b8820,0x3ee3f29f,2 +np.float32,0x42b4e454,0x3f1d0151,2 +np.float32,0x42a67a60,0x3f7ffa4c,2 +np.float32,0x41d92388,0x3f67beef,2 +np.float32,0x422dd66c,0xbeffb0c1,2 +np.float32,0xc28f5be6,0xbf0bae79,2 +np.float32,0x41ab2674,0x3f0ffe2b,2 +np.float32,0x3f490fdb,0x3f3504f3,2 +np.float32,0xbf490fdb,0xbf3504f3,2 +np.float32,0x3fc90fdb,0x3f800000,2 +np.float32,0xbfc90fdb,0xbf800000,2 +np.float32,0x40490fdb,0xb3bbbd2e,2 +np.float32,0xc0490fdb,0x33bbbd2e,2 +np.float32,0x3fc90fdb,0x3f800000,2 +np.float32,0xbfc90fdb,0xbf800000,2 +np.float32,0x40490fdb,0xb3bbbd2e,2 +np.float32,0xc0490fdb,0x33bbbd2e,2 +np.float32,0x40c90fdb,0x343bbd2e,2 +np.float32,0xc0c90fdb,0xb43bbd2e,2 +np.float32,0x4016cbe4,0x3f3504f3,2 +np.float32,0xc016cbe4,0xbf3504f3,2 +np.float32,0x4096cbe4,0xbf800000,2 +np.float32,0xc096cbe4,0x3f800000,2 +np.float32,0x4116cbe4,0xb2ccde2e,2 +np.float32,0xc116cbe4,0x32ccde2e,2 +np.float32,0x40490fdb,0xb3bbbd2e,2 +np.float32,0xc0490fdb,0x33bbbd2e,2 +np.float32,0x40c90fdb,0x343bbd2e,2 +np.float32,0xc0c90fdb,0xb43bbd2e,2 +np.float32,0x41490fdb,0x34bbbd2e,2 +np.float32,0xc1490fdb,0xb4bbbd2e,2 +np.float32,0x407b53d2,0xbf3504f5,2 +np.float32,0xc07b53d2,0x3f3504f5,2 +np.float32,0x40fb53d2,0x3f800000,2 +np.float32,0xc0fb53d2,0xbf800000,2 +np.float32,0x417b53d2,0xb535563d,2 +np.float32,0xc17b53d2,0x3535563d,2 +np.float32,0x4096cbe4,0xbf800000,2 +np.float32,0xc096cbe4,0x3f800000,2 +np.float32,0x4116cbe4,0xb2ccde2e,2 +np.float32,0xc116cbe4,0x32ccde2e,2 +np.float32,0x4196cbe4,0x334cde2e,2 +np.float32,0xc196cbe4,0xb34cde2e,2 +np.float32,0x40afede0,0xbf3504ef,2 +np.float32,0xc0afede0,0x3f3504ef,2 +np.float32,0x412fede0,0xbf800000,2 +np.float32,0xc12fede0,0x3f800000,2 +np.float32,0x41afede0,0xb5b222c4,2 +np.float32,0xc1afede0,0x35b222c4,2 +np.float32,0x40c90fdb,0x343bbd2e,2 +np.float32,0xc0c90fdb,0xb43bbd2e,2 +np.float32,0x41490fdb,0x34bbbd2e,2 +np.float32,0xc1490fdb,0xb4bbbd2e,2 +np.float32,0x41c90fdb,0x353bbd2e,2 +np.float32,0xc1c90fdb,0xb53bbd2e,2 +np.float32,0x40e231d6,0x3f3504f3,2 +np.float32,0xc0e231d6,0xbf3504f3,2 +np.float32,0x416231d6,0x3f800000,2 +np.float32,0xc16231d6,0xbf800000,2 +np.float32,0x41e231d6,0xb399a6a2,2 +np.float32,0xc1e231d6,0x3399a6a2,2 +np.float32,0x40fb53d2,0x3f800000,2 +np.float32,0xc0fb53d2,0xbf800000,2 +np.float32,0x417b53d2,0xb535563d,2 +np.float32,0xc17b53d2,0x3535563d,2 +np.float32,0x41fb53d2,0x35b5563d,2 +np.float32,0xc1fb53d2,0xb5b5563d,2 +np.float32,0x410a3ae7,0x3f3504eb,2 +np.float32,0xc10a3ae7,0xbf3504eb,2 +np.float32,0x418a3ae7,0xbf800000,2 +np.float32,0xc18a3ae7,0x3f800000,2 +np.float32,0x420a3ae7,0xb6308908,2 +np.float32,0xc20a3ae7,0x36308908,2 +np.float32,0x4116cbe4,0xb2ccde2e,2 +np.float32,0xc116cbe4,0x32ccde2e,2 +np.float32,0x4196cbe4,0x334cde2e,2 +np.float32,0xc196cbe4,0xb34cde2e,2 +np.float32,0x4216cbe4,0x33ccde2e,2 +np.float32,0xc216cbe4,0xb3ccde2e,2 +np.float32,0x41235ce2,0xbf3504f7,2 +np.float32,0xc1235ce2,0x3f3504f7,2 +np.float32,0x41a35ce2,0x3f800000,2 +np.float32,0xc1a35ce2,0xbf800000,2 +np.float32,0x42235ce2,0xb5b889b6,2 +np.float32,0xc2235ce2,0x35b889b6,2 +np.float32,0x412fede0,0xbf800000,2 +np.float32,0xc12fede0,0x3f800000,2 +np.float32,0x41afede0,0xb5b222c4,2 +np.float32,0xc1afede0,0x35b222c4,2 +np.float32,0x422fede0,0x363222c4,2 +np.float32,0xc22fede0,0xb63222c4,2 +np.float32,0x413c7edd,0xbf3504f3,2 +np.float32,0xc13c7edd,0x3f3504f3,2 +np.float32,0x41bc7edd,0xbf800000,2 +np.float32,0xc1bc7edd,0x3f800000,2 +np.float32,0x423c7edd,0xb4000add,2 +np.float32,0xc23c7edd,0x34000add,2 +np.float32,0x41490fdb,0x34bbbd2e,2 +np.float32,0xc1490fdb,0xb4bbbd2e,2 +np.float32,0x41c90fdb,0x353bbd2e,2 +np.float32,0xc1c90fdb,0xb53bbd2e,2 +np.float32,0x42490fdb,0x35bbbd2e,2 +np.float32,0xc2490fdb,0xb5bbbd2e,2 +np.float32,0x4155a0d9,0x3f3504fb,2 +np.float32,0xc155a0d9,0xbf3504fb,2 +np.float32,0x41d5a0d9,0x3f800000,2 +np.float32,0xc1d5a0d9,0xbf800000,2 +np.float32,0x4255a0d9,0xb633bc81,2 +np.float32,0xc255a0d9,0x3633bc81,2 +np.float32,0x416231d6,0x3f800000,2 +np.float32,0xc16231d6,0xbf800000,2 +np.float32,0x41e231d6,0xb399a6a2,2 +np.float32,0xc1e231d6,0x3399a6a2,2 +np.float32,0x426231d6,0x3419a6a2,2 +np.float32,0xc26231d6,0xb419a6a2,2 +np.float32,0x416ec2d4,0x3f3504ef,2 +np.float32,0xc16ec2d4,0xbf3504ef,2 +np.float32,0x41eec2d4,0xbf800000,2 +np.float32,0xc1eec2d4,0x3f800000,2 +np.float32,0x426ec2d4,0xb5bef0a7,2 +np.float32,0xc26ec2d4,0x35bef0a7,2 +np.float32,0x417b53d2,0xb535563d,2 +np.float32,0xc17b53d2,0x3535563d,2 +np.float32,0x41fb53d2,0x35b5563d,2 +np.float32,0xc1fb53d2,0xb5b5563d,2 +np.float32,0x427b53d2,0x3635563d,2 +np.float32,0xc27b53d2,0xb635563d,2 +np.float32,0x4183f268,0xbf3504ff,2 +np.float32,0xc183f268,0x3f3504ff,2 +np.float32,0x4203f268,0x3f800000,2 +np.float32,0xc203f268,0xbf800000,2 +np.float32,0x4283f268,0xb6859a13,2 +np.float32,0xc283f268,0x36859a13,2 +np.float32,0x418a3ae7,0xbf800000,2 +np.float32,0xc18a3ae7,0x3f800000,2 +np.float32,0x420a3ae7,0xb6308908,2 +np.float32,0xc20a3ae7,0x36308908,2 +np.float32,0x428a3ae7,0x36b08908,2 +np.float32,0xc28a3ae7,0xb6b08908,2 +np.float32,0x41908365,0xbf3504f6,2 +np.float32,0xc1908365,0x3f3504f6,2 +np.float32,0x42108365,0xbf800000,2 +np.float32,0xc2108365,0x3f800000,2 +np.float32,0x42908365,0x3592200d,2 +np.float32,0xc2908365,0xb592200d,2 +np.float32,0x4196cbe4,0x334cde2e,2 +np.float32,0xc196cbe4,0xb34cde2e,2 +np.float32,0x4216cbe4,0x33ccde2e,2 +np.float32,0xc216cbe4,0xb3ccde2e,2 +np.float32,0x4296cbe4,0x344cde2e,2 +np.float32,0xc296cbe4,0xb44cde2e,2 +np.float32,0x419d1463,0x3f3504f8,2 +np.float32,0xc19d1463,0xbf3504f8,2 +np.float32,0x421d1463,0x3f800000,2 +np.float32,0xc21d1463,0xbf800000,2 +np.float32,0x429d1463,0xb5c55799,2 +np.float32,0xc29d1463,0x35c55799,2 +np.float32,0x41a35ce2,0x3f800000,2 +np.float32,0xc1a35ce2,0xbf800000,2 +np.float32,0x42235ce2,0xb5b889b6,2 +np.float32,0xc2235ce2,0x35b889b6,2 +np.float32,0x42a35ce2,0x363889b6,2 +np.float32,0xc2a35ce2,0xb63889b6,2 +np.float32,0x41a9a561,0x3f3504e7,2 +np.float32,0xc1a9a561,0xbf3504e7,2 +np.float32,0x4229a561,0xbf800000,2 +np.float32,0xc229a561,0x3f800000,2 +np.float32,0x42a9a561,0xb68733d0,2 +np.float32,0xc2a9a561,0x368733d0,2 +np.float32,0x41afede0,0xb5b222c4,2 +np.float32,0xc1afede0,0x35b222c4,2 +np.float32,0x422fede0,0x363222c4,2 +np.float32,0xc22fede0,0xb63222c4,2 +np.float32,0x42afede0,0x36b222c4,2 +np.float32,0xc2afede0,0xb6b222c4,2 +np.float32,0x41b6365e,0xbf3504f0,2 +np.float32,0xc1b6365e,0x3f3504f0,2 +np.float32,0x4236365e,0x3f800000,2 +np.float32,0xc236365e,0xbf800000,2 +np.float32,0x42b6365e,0x358bb91c,2 +np.float32,0xc2b6365e,0xb58bb91c,2 +np.float32,0x41bc7edd,0xbf800000,2 +np.float32,0xc1bc7edd,0x3f800000,2 +np.float32,0x423c7edd,0xb4000add,2 +np.float32,0xc23c7edd,0x34000add,2 +np.float32,0x42bc7edd,0x34800add,2 +np.float32,0xc2bc7edd,0xb4800add,2 +np.float32,0x41c2c75c,0xbf3504ef,2 +np.float32,0xc1c2c75c,0x3f3504ef,2 +np.float32,0x4242c75c,0xbf800000,2 +np.float32,0xc242c75c,0x3f800000,2 +np.float32,0x42c2c75c,0xb5cbbe8a,2 +np.float32,0xc2c2c75c,0x35cbbe8a,2 +np.float32,0x41c90fdb,0x353bbd2e,2 +np.float32,0xc1c90fdb,0xb53bbd2e,2 +np.float32,0x42490fdb,0x35bbbd2e,2 +np.float32,0xc2490fdb,0xb5bbbd2e,2 +np.float32,0x42c90fdb,0x363bbd2e,2 +np.float32,0xc2c90fdb,0xb63bbd2e,2 +np.float32,0x41cf585a,0x3f3504ff,2 +np.float32,0xc1cf585a,0xbf3504ff,2 +np.float32,0x424f585a,0x3f800000,2 +np.float32,0xc24f585a,0xbf800000,2 +np.float32,0x42cf585a,0xb688cd8c,2 +np.float32,0xc2cf585a,0x3688cd8c,2 +np.float32,0x41d5a0d9,0x3f800000,2 +np.float32,0xc1d5a0d9,0xbf800000,2 +np.float32,0x4255a0d9,0xb633bc81,2 +np.float32,0xc255a0d9,0x3633bc81,2 +np.float32,0x42d5a0d9,0x36b3bc81,2 +np.float32,0xc2d5a0d9,0xb6b3bc81,2 +np.float32,0x41dbe958,0x3f3504e0,2 +np.float32,0xc1dbe958,0xbf3504e0,2 +np.float32,0x425be958,0xbf800000,2 +np.float32,0xc25be958,0x3f800000,2 +np.float32,0x42dbe958,0xb6deab75,2 +np.float32,0xc2dbe958,0x36deab75,2 +np.float32,0x41e231d6,0xb399a6a2,2 +np.float32,0xc1e231d6,0x3399a6a2,2 +np.float32,0x426231d6,0x3419a6a2,2 +np.float32,0xc26231d6,0xb419a6a2,2 +np.float32,0x42e231d6,0x3499a6a2,2 +np.float32,0xc2e231d6,0xb499a6a2,2 +np.float32,0x41e87a55,0xbf3504f8,2 +np.float32,0xc1e87a55,0x3f3504f8,2 +np.float32,0x42687a55,0x3f800000,2 +np.float32,0xc2687a55,0xbf800000,2 +np.float32,0x42e87a55,0xb5d2257b,2 +np.float32,0xc2e87a55,0x35d2257b,2 +np.float32,0x41eec2d4,0xbf800000,2 +np.float32,0xc1eec2d4,0x3f800000,2 +np.float32,0x426ec2d4,0xb5bef0a7,2 +np.float32,0xc26ec2d4,0x35bef0a7,2 +np.float32,0x42eec2d4,0x363ef0a7,2 +np.float32,0xc2eec2d4,0xb63ef0a7,2 +np.float32,0x41f50b53,0xbf3504e7,2 +np.float32,0xc1f50b53,0x3f3504e7,2 +np.float32,0x42750b53,0xbf800000,2 +np.float32,0xc2750b53,0x3f800000,2 +np.float32,0x42f50b53,0xb68a6748,2 +np.float32,0xc2f50b53,0x368a6748,2 +np.float32,0x41fb53d2,0x35b5563d,2 +np.float32,0xc1fb53d2,0xb5b5563d,2 +np.float32,0x427b53d2,0x3635563d,2 +np.float32,0xc27b53d2,0xb635563d,2 +np.float32,0x42fb53d2,0x36b5563d,2 +np.float32,0xc2fb53d2,0xb6b5563d,2 +np.float32,0x4200ce28,0x3f3504f0,2 +np.float32,0xc200ce28,0xbf3504f0,2 +np.float32,0x4280ce28,0x3f800000,2 +np.float32,0xc280ce28,0xbf800000,2 +np.float32,0x4300ce28,0x357dd672,2 +np.float32,0xc300ce28,0xb57dd672,2 +np.float32,0x4203f268,0x3f800000,2 +np.float32,0xc203f268,0xbf800000,2 +np.float32,0x4283f268,0xb6859a13,2 +np.float32,0xc283f268,0x36859a13,2 +np.float32,0x4303f268,0x37059a13,2 +np.float32,0xc303f268,0xb7059a13,2 +np.float32,0x420716a7,0x3f3504ee,2 +np.float32,0xc20716a7,0xbf3504ee,2 +np.float32,0x428716a7,0xbf800000,2 +np.float32,0xc28716a7,0x3f800000,2 +np.float32,0x430716a7,0xb5d88c6d,2 +np.float32,0xc30716a7,0x35d88c6d,2 +np.float32,0x420a3ae7,0xb6308908,2 +np.float32,0xc20a3ae7,0x36308908,2 +np.float32,0x428a3ae7,0x36b08908,2 +np.float32,0xc28a3ae7,0xb6b08908,2 +np.float32,0x430a3ae7,0x37308908,2 +np.float32,0xc30a3ae7,0xb7308908,2 +np.float32,0x420d5f26,0xbf350500,2 +np.float32,0xc20d5f26,0x3f350500,2 +np.float32,0x428d5f26,0x3f800000,2 +np.float32,0xc28d5f26,0xbf800000,2 +np.float32,0x430d5f26,0xb68c0105,2 +np.float32,0xc30d5f26,0x368c0105,2 +np.float32,0x42108365,0xbf800000,2 +np.float32,0xc2108365,0x3f800000,2 +np.float32,0x42908365,0x3592200d,2 +np.float32,0xc2908365,0xb592200d,2 +np.float32,0x43108365,0xb612200d,2 +np.float32,0xc3108365,0x3612200d,2 +np.float32,0x4213a7a5,0xbf3504df,2 +np.float32,0xc213a7a5,0x3f3504df,2 +np.float32,0x4293a7a5,0xbf800000,2 +np.float32,0xc293a7a5,0x3f800000,2 +np.float32,0x4313a7a5,0xb6e1deee,2 +np.float32,0xc313a7a5,0x36e1deee,2 +np.float32,0x4216cbe4,0x33ccde2e,2 +np.float32,0xc216cbe4,0xb3ccde2e,2 +np.float32,0x4296cbe4,0x344cde2e,2 +np.float32,0xc296cbe4,0xb44cde2e,2 +np.float32,0x4316cbe4,0x34ccde2e,2 +np.float32,0xc316cbe4,0xb4ccde2e,2 +np.float32,0x4219f024,0x3f35050f,2 +np.float32,0xc219f024,0xbf35050f,2 +np.float32,0x4299f024,0x3f800000,2 +np.float32,0xc299f024,0xbf800000,2 +np.float32,0x4319f024,0xb71bde6c,2 +np.float32,0xc319f024,0x371bde6c,2 +np.float32,0x421d1463,0x3f800000,2 +np.float32,0xc21d1463,0xbf800000,2 +np.float32,0x429d1463,0xb5c55799,2 +np.float32,0xc29d1463,0x35c55799,2 +np.float32,0x431d1463,0x36455799,2 +np.float32,0xc31d1463,0xb6455799,2 +np.float32,0x422038a3,0x3f3504d0,2 +np.float32,0xc22038a3,0xbf3504d0,2 +np.float32,0x42a038a3,0xbf800000,2 +np.float32,0xc2a038a3,0x3f800000,2 +np.float32,0x432038a3,0xb746cd61,2 +np.float32,0xc32038a3,0x3746cd61,2 +np.float32,0x42235ce2,0xb5b889b6,2 +np.float32,0xc2235ce2,0x35b889b6,2 +np.float32,0x42a35ce2,0x363889b6,2 +np.float32,0xc2a35ce2,0xb63889b6,2 +np.float32,0x43235ce2,0x36b889b6,2 +np.float32,0xc3235ce2,0xb6b889b6,2 +np.float32,0x42268121,0xbf3504f1,2 +np.float32,0xc2268121,0x3f3504f1,2 +np.float32,0x42a68121,0x3f800000,2 +np.float32,0xc2a68121,0xbf800000,2 +np.float32,0x43268121,0x35643aac,2 +np.float32,0xc3268121,0xb5643aac,2 +np.float32,0x4229a561,0xbf800000,2 +np.float32,0xc229a561,0x3f800000,2 +np.float32,0x42a9a561,0xb68733d0,2 +np.float32,0xc2a9a561,0x368733d0,2 +np.float32,0x4329a561,0x370733d0,2 +np.float32,0xc329a561,0xb70733d0,2 +np.float32,0x422cc9a0,0xbf3504ee,2 +np.float32,0xc22cc9a0,0x3f3504ee,2 +np.float32,0x42acc9a0,0xbf800000,2 +np.float32,0xc2acc9a0,0x3f800000,2 +np.float32,0x432cc9a0,0xb5e55a50,2 +np.float32,0xc32cc9a0,0x35e55a50,2 +np.float32,0x422fede0,0x363222c4,2 +np.float32,0xc22fede0,0xb63222c4,2 +np.float32,0x42afede0,0x36b222c4,2 +np.float32,0xc2afede0,0xb6b222c4,2 +np.float32,0x432fede0,0x373222c4,2 +np.float32,0xc32fede0,0xb73222c4,2 +np.float32,0x4233121f,0x3f350500,2 +np.float32,0xc233121f,0xbf350500,2 +np.float32,0x42b3121f,0x3f800000,2 +np.float32,0xc2b3121f,0xbf800000,2 +np.float32,0x4333121f,0xb68f347d,2 +np.float32,0xc333121f,0x368f347d,2 +np.float32,0x4236365e,0x3f800000,2 +np.float32,0xc236365e,0xbf800000,2 +np.float32,0x42b6365e,0x358bb91c,2 +np.float32,0xc2b6365e,0xb58bb91c,2 +np.float32,0x4336365e,0xb60bb91c,2 +np.float32,0xc336365e,0x360bb91c,2 +np.float32,0x42395a9e,0x3f3504df,2 +np.float32,0xc2395a9e,0xbf3504df,2 +np.float32,0x42b95a9e,0xbf800000,2 +np.float32,0xc2b95a9e,0x3f800000,2 +np.float32,0x43395a9e,0xb6e51267,2 +np.float32,0xc3395a9e,0x36e51267,2 +np.float32,0x423c7edd,0xb4000add,2 +np.float32,0xc23c7edd,0x34000add,2 +np.float32,0x42bc7edd,0x34800add,2 +np.float32,0xc2bc7edd,0xb4800add,2 +np.float32,0x433c7edd,0x35000add,2 +np.float32,0xc33c7edd,0xb5000add,2 +np.float32,0x423fa31d,0xbf35050f,2 +np.float32,0xc23fa31d,0x3f35050f,2 +np.float32,0x42bfa31d,0x3f800000,2 +np.float32,0xc2bfa31d,0xbf800000,2 +np.float32,0x433fa31d,0xb71d7828,2 +np.float32,0xc33fa31d,0x371d7828,2 +np.float32,0x4242c75c,0xbf800000,2 +np.float32,0xc242c75c,0x3f800000,2 +np.float32,0x42c2c75c,0xb5cbbe8a,2 +np.float32,0xc2c2c75c,0x35cbbe8a,2 +np.float32,0x4342c75c,0x364bbe8a,2 +np.float32,0xc342c75c,0xb64bbe8a,2 +np.float32,0x4245eb9c,0xbf3504d0,2 +np.float32,0xc245eb9c,0x3f3504d0,2 +np.float32,0x42c5eb9c,0xbf800000,2 +np.float32,0xc2c5eb9c,0x3f800000,2 +np.float32,0x4345eb9c,0xb748671d,2 +np.float32,0xc345eb9c,0x3748671d,2 +np.float32,0x42490fdb,0x35bbbd2e,2 +np.float32,0xc2490fdb,0xb5bbbd2e,2 +np.float32,0x42c90fdb,0x363bbd2e,2 +np.float32,0xc2c90fdb,0xb63bbd2e,2 +np.float32,0x43490fdb,0x36bbbd2e,2 +np.float32,0xc3490fdb,0xb6bbbd2e,2 +np.float32,0x424c341a,0x3f3504f1,2 +np.float32,0xc24c341a,0xbf3504f1,2 +np.float32,0x42cc341a,0x3f800000,2 +np.float32,0xc2cc341a,0xbf800000,2 +np.float32,0x434c341a,0x354a9ee6,2 +np.float32,0xc34c341a,0xb54a9ee6,2 +np.float32,0x424f585a,0x3f800000,2 +np.float32,0xc24f585a,0xbf800000,2 +np.float32,0x42cf585a,0xb688cd8c,2 +np.float32,0xc2cf585a,0x3688cd8c,2 +np.float32,0x434f585a,0x3708cd8c,2 +np.float32,0xc34f585a,0xb708cd8c,2 +np.float32,0x42527c99,0x3f3504ee,2 +np.float32,0xc2527c99,0xbf3504ee,2 +np.float32,0x42d27c99,0xbf800000,2 +np.float32,0xc2d27c99,0x3f800000,2 +np.float32,0x43527c99,0xb5f22833,2 +np.float32,0xc3527c99,0x35f22833,2 +np.float32,0x4255a0d9,0xb633bc81,2 +np.float32,0xc255a0d9,0x3633bc81,2 +np.float32,0x42d5a0d9,0x36b3bc81,2 +np.float32,0xc2d5a0d9,0xb6b3bc81,2 +np.float32,0x4355a0d9,0x3733bc81,2 +np.float32,0xc355a0d9,0xb733bc81,2 +np.float32,0x4258c518,0xbf350500,2 +np.float32,0xc258c518,0x3f350500,2 +np.float32,0x42d8c518,0x3f800000,2 +np.float32,0xc2d8c518,0xbf800000,2 +np.float32,0x4358c518,0xb69267f6,2 +np.float32,0xc358c518,0x369267f6,2 +np.float32,0x425be958,0xbf800000,2 +np.float32,0xc25be958,0x3f800000,2 +np.float32,0x42dbe958,0xb6deab75,2 +np.float32,0xc2dbe958,0x36deab75,2 +np.float32,0x435be958,0x375eab75,2 +np.float32,0xc35be958,0xb75eab75,2 +np.float32,0x425f0d97,0xbf3504df,2 +np.float32,0xc25f0d97,0x3f3504df,2 +np.float32,0x42df0d97,0xbf800000,2 +np.float32,0xc2df0d97,0x3f800000,2 +np.float32,0x435f0d97,0xb6e845e0,2 +np.float32,0xc35f0d97,0x36e845e0,2 +np.float32,0x426231d6,0x3419a6a2,2 +np.float32,0xc26231d6,0xb419a6a2,2 +np.float32,0x42e231d6,0x3499a6a2,2 +np.float32,0xc2e231d6,0xb499a6a2,2 +np.float32,0x436231d6,0x3519a6a2,2 +np.float32,0xc36231d6,0xb519a6a2,2 +np.float32,0x42655616,0x3f35050f,2 +np.float32,0xc2655616,0xbf35050f,2 +np.float32,0x42e55616,0x3f800000,2 +np.float32,0xc2e55616,0xbf800000,2 +np.float32,0x43655616,0xb71f11e5,2 +np.float32,0xc3655616,0x371f11e5,2 +np.float32,0x42687a55,0x3f800000,2 +np.float32,0xc2687a55,0xbf800000,2 +np.float32,0x42e87a55,0xb5d2257b,2 +np.float32,0xc2e87a55,0x35d2257b,2 +np.float32,0x43687a55,0x3652257b,2 +np.float32,0xc3687a55,0xb652257b,2 +np.float32,0x426b9e95,0x3f3504cf,2 +np.float32,0xc26b9e95,0xbf3504cf,2 +np.float32,0x42eb9e95,0xbf800000,2 +np.float32,0xc2eb9e95,0x3f800000,2 +np.float32,0x436b9e95,0xb74a00d9,2 +np.float32,0xc36b9e95,0x374a00d9,2 +np.float32,0x426ec2d4,0xb5bef0a7,2 +np.float32,0xc26ec2d4,0x35bef0a7,2 +np.float32,0x42eec2d4,0x363ef0a7,2 +np.float32,0xc2eec2d4,0xb63ef0a7,2 +np.float32,0x436ec2d4,0x36bef0a7,2 +np.float32,0xc36ec2d4,0xb6bef0a7,2 +np.float32,0x4271e713,0xbf3504f1,2 +np.float32,0xc271e713,0x3f3504f1,2 +np.float32,0x42f1e713,0x3f800000,2 +np.float32,0xc2f1e713,0xbf800000,2 +np.float32,0x4371e713,0x35310321,2 +np.float32,0xc371e713,0xb5310321,2 +np.float32,0x42750b53,0xbf800000,2 +np.float32,0xc2750b53,0x3f800000,2 +np.float32,0x42f50b53,0xb68a6748,2 +np.float32,0xc2f50b53,0x368a6748,2 +np.float32,0x43750b53,0x370a6748,2 +np.float32,0xc3750b53,0xb70a6748,2 +np.float32,0x42782f92,0xbf3504ee,2 +np.float32,0xc2782f92,0x3f3504ee,2 +np.float32,0x42f82f92,0xbf800000,2 +np.float32,0xc2f82f92,0x3f800000,2 +np.float32,0x43782f92,0xb5fef616,2 +np.float32,0xc3782f92,0x35fef616,2 +np.float32,0x427b53d2,0x3635563d,2 +np.float32,0xc27b53d2,0xb635563d,2 +np.float32,0x42fb53d2,0x36b5563d,2 +np.float32,0xc2fb53d2,0xb6b5563d,2 +np.float32,0x437b53d2,0x3735563d,2 +np.float32,0xc37b53d2,0xb735563d,2 +np.float32,0x427e7811,0x3f350500,2 +np.float32,0xc27e7811,0xbf350500,2 +np.float32,0x42fe7811,0x3f800000,2 +np.float32,0xc2fe7811,0xbf800000,2 +np.float32,0x437e7811,0xb6959b6f,2 +np.float32,0xc37e7811,0x36959b6f,2 +np.float32,0x4280ce28,0x3f800000,2 +np.float32,0xc280ce28,0xbf800000,2 +np.float32,0x4300ce28,0x357dd672,2 +np.float32,0xc300ce28,0xb57dd672,2 +np.float32,0x4380ce28,0xb5fdd672,2 +np.float32,0xc380ce28,0x35fdd672,2 +np.float32,0x42826048,0x3f3504de,2 +np.float32,0xc2826048,0xbf3504de,2 +np.float32,0x43026048,0xbf800000,2 +np.float32,0xc3026048,0x3f800000,2 +np.float32,0x43826048,0xb6eb7958,2 +np.float32,0xc3826048,0x36eb7958,2 +np.float32,0x4283f268,0xb6859a13,2 +np.float32,0xc283f268,0x36859a13,2 +np.float32,0x4303f268,0x37059a13,2 +np.float32,0xc303f268,0xb7059a13,2 +np.float32,0x4383f268,0x37859a13,2 +np.float32,0xc383f268,0xb7859a13,2 +np.float32,0x42858487,0xbf3504e2,2 +np.float32,0xc2858487,0x3f3504e2,2 +np.float32,0x43058487,0x3f800000,2 +np.float32,0xc3058487,0xbf800000,2 +np.float32,0x43858487,0x36bea8be,2 +np.float32,0xc3858487,0xb6bea8be,2 +np.float32,0x428716a7,0xbf800000,2 +np.float32,0xc28716a7,0x3f800000,2 +np.float32,0x430716a7,0xb5d88c6d,2 +np.float32,0xc30716a7,0x35d88c6d,2 +np.float32,0x438716a7,0x36588c6d,2 +np.float32,0xc38716a7,0xb6588c6d,2 +np.float32,0x4288a8c7,0xbf3504cf,2 +np.float32,0xc288a8c7,0x3f3504cf,2 +np.float32,0x4308a8c7,0xbf800000,2 +np.float32,0xc308a8c7,0x3f800000,2 +np.float32,0x4388a8c7,0xb74b9a96,2 +np.float32,0xc388a8c7,0x374b9a96,2 +np.float32,0x428a3ae7,0x36b08908,2 +np.float32,0xc28a3ae7,0xb6b08908,2 +np.float32,0x430a3ae7,0x37308908,2 +np.float32,0xc30a3ae7,0xb7308908,2 +np.float32,0x438a3ae7,0x37b08908,2 +np.float32,0xc38a3ae7,0xb7b08908,2 +np.float32,0x428bcd06,0x3f3504f2,2 +np.float32,0xc28bcd06,0xbf3504f2,2 +np.float32,0x430bcd06,0x3f800000,2 +np.float32,0xc30bcd06,0xbf800000,2 +np.float32,0x438bcd06,0x3517675b,2 +np.float32,0xc38bcd06,0xb517675b,2 +np.float32,0x428d5f26,0x3f800000,2 +np.float32,0xc28d5f26,0xbf800000,2 +np.float32,0x430d5f26,0xb68c0105,2 +np.float32,0xc30d5f26,0x368c0105,2 +np.float32,0x438d5f26,0x370c0105,2 +np.float32,0xc38d5f26,0xb70c0105,2 +np.float32,0x428ef146,0x3f3504c0,2 +np.float32,0xc28ef146,0xbf3504c0,2 +np.float32,0x430ef146,0xbf800000,2 +np.float32,0xc30ef146,0x3f800000,2 +np.float32,0x438ef146,0xb790bc40,2 +np.float32,0xc38ef146,0x3790bc40,2 +np.float32,0x42908365,0x3592200d,2 +np.float32,0xc2908365,0xb592200d,2 +np.float32,0x43108365,0xb612200d,2 +np.float32,0xc3108365,0x3612200d,2 +np.float32,0x43908365,0xb692200d,2 +np.float32,0xc3908365,0x3692200d,2 +np.float32,0x42921585,0xbf350501,2 +np.float32,0xc2921585,0x3f350501,2 +np.float32,0x43121585,0x3f800000,2 +np.float32,0xc3121585,0xbf800000,2 +np.float32,0x43921585,0xb698cee8,2 +np.float32,0xc3921585,0x3698cee8,2 +np.float32,0x4293a7a5,0xbf800000,2 +np.float32,0xc293a7a5,0x3f800000,2 +np.float32,0x4313a7a5,0xb6e1deee,2 +np.float32,0xc313a7a5,0x36e1deee,2 +np.float32,0x4393a7a5,0x3761deee,2 +np.float32,0xc393a7a5,0xb761deee,2 +np.float32,0x429539c5,0xbf3504b1,2 +np.float32,0xc29539c5,0x3f3504b1,2 +np.float32,0x431539c5,0xbf800000,2 +np.float32,0xc31539c5,0x3f800000,2 +np.float32,0x439539c5,0xb7bbab34,2 +np.float32,0xc39539c5,0x37bbab34,2 +np.float32,0x4296cbe4,0x344cde2e,2 +np.float32,0xc296cbe4,0xb44cde2e,2 +np.float32,0x4316cbe4,0x34ccde2e,2 +np.float32,0xc316cbe4,0xb4ccde2e,2 +np.float32,0x4396cbe4,0x354cde2e,2 +np.float32,0xc396cbe4,0xb54cde2e,2 +np.float32,0x42985e04,0x3f350510,2 +np.float32,0xc2985e04,0xbf350510,2 +np.float32,0x43185e04,0x3f800000,2 +np.float32,0xc3185e04,0xbf800000,2 +np.float32,0x43985e04,0xb722455d,2 +np.float32,0xc3985e04,0x3722455d,2 +np.float32,0x4299f024,0x3f800000,2 +np.float32,0xc299f024,0xbf800000,2 +np.float32,0x4319f024,0xb71bde6c,2 +np.float32,0xc319f024,0x371bde6c,2 +np.float32,0x4399f024,0x379bde6c,2 +np.float32,0xc399f024,0xb79bde6c,2 +np.float32,0x429b8243,0x3f3504fc,2 +np.float32,0xc29b8243,0xbf3504fc,2 +np.float32,0x431b8243,0xbf800000,2 +np.float32,0xc31b8243,0x3f800000,2 +np.float32,0x439b8243,0x364b2eb8,2 +np.float32,0xc39b8243,0xb64b2eb8,2 +np.float32,0x435b2047,0xbf350525,2 +np.float32,0x42a038a2,0xbf800000,2 +np.float32,0x432038a2,0x3664ca7e,2 +np.float32,0x4345eb9b,0x365e638c,2 +np.float32,0x42c5eb9b,0xbf800000,2 +np.float32,0x42eb9e94,0xbf800000,2 +np.float32,0x4350ea79,0x3f800000,2 +np.float32,0x42dbe957,0x3585522a,2 +np.float32,0x425be957,0xbf800000,2 +np.float32,0x435be957,0xb605522a,2 +np.float32,0x476362a2,0xbd7ff911,2 +np.float32,0x464c99a4,0x3e7f4d41,2 +np.float32,0x4471f73d,0x3e7fe1b0,2 +np.float32,0x445a6752,0x3e7ef367,2 +np.float32,0x474fa400,0x3e7f9fcd,2 +np.float32,0x45c1e72f,0xbe7fc7af,2 +np.float32,0x4558c91d,0x3e7e9f31,2 +np.float32,0x43784f94,0xbdff6654,2 +np.float32,0x466e8500,0xbe7ea0a3,2 +np.float32,0x468e1c25,0x3e7e22fb,2 +np.float32,0x44ea6cfc,0x3dff70c3,2 +np.float32,0x4605126c,0x3e7f89ef,2 +np.float32,0x4788b3c6,0xbb87d853,2 +np.float32,0x4531b042,0x3dffd163,2 +np.float32,0x43f1f71d,0x3dfff387,2 +np.float32,0x462c3fa5,0xbd7fe13d,2 +np.float32,0x441c5354,0xbdff76b4,2 +np.float32,0x44908b69,0x3e7dcf0d,2 +np.float32,0x478813ad,0xbe7e9d80,2 +np.float32,0x441c4351,0x3dff937b,2 +np.float64,0x1,0x1,4 +np.float64,0x8000000000000001,0x8000000000000001,4 +np.float64,0x10000000000000,0x10000000000000,4 +np.float64,0x8010000000000000,0x8010000000000000,4 +np.float64,0x7fefffffffffffff,0x3f7452fc98b34e97,4 +np.float64,0xffefffffffffffff,0xbf7452fc98b34e97,4 +np.float64,0x7ff0000000000000,0xfff8000000000000,4 +np.float64,0xfff0000000000000,0xfff8000000000000,4 +np.float64,0x7ff8000000000000,0x7ff8000000000000,4 +np.float64,0x7ff4000000000000,0x7ffc000000000000,4 +np.float64,0xbfda51b226b4a364,0xbfd9956328ff876c,4 +np.float64,0xbfb4a65aee294cb8,0xbfb4a09fd744f8a5,4 +np.float64,0xbfd73b914fae7722,0xbfd6b9cce55af379,4 +np.float64,0xbfd90c12b4b21826,0xbfd869a3867b51c2,4 +np.float64,0x3fe649bb3d6c9376,0x3fe48778d9b48a21,4 +np.float64,0xbfd5944532ab288a,0xbfd52c30e1951b42,4 +np.float64,0x3fb150c45222a190,0x3fb14d633eb8275d,4 +np.float64,0x3fe4a6ffa9e94e00,0x3fe33f8a95c33299,4 +np.float64,0x3fe8d2157171a42a,0x3fe667d904ac95a6,4 +np.float64,0xbfa889f52c3113f0,0xbfa8878d90a23fa5,4 +np.float64,0x3feb3234bef6646a,0x3fe809d541d9017a,4 +np.float64,0x3fc6de266f2dbc50,0x3fc6bf0ee80a0d86,4 +np.float64,0x3fe8455368f08aa6,0x3fe6028254338ed5,4 +np.float64,0xbfe5576079eaaec1,0xbfe3cb4a8f6bc3f5,4 +np.float64,0xbfe9f822ff73f046,0xbfe7360d7d5cb887,4 +np.float64,0xbfb1960e7e232c20,0xbfb1928438258602,4 +np.float64,0xbfca75938d34eb28,0xbfca4570979bf2fa,4 +np.float64,0x3fd767dd15aecfbc,0x3fd6e33039018bab,4 +np.float64,0xbfe987750ef30eea,0xbfe6e7ed30ce77f0,4 +np.float64,0xbfe87f95a1f0ff2b,0xbfe62ca7e928bb2a,4 +np.float64,0xbfd2465301a48ca6,0xbfd2070245775d76,4 +np.float64,0xbfb1306ed22260e0,0xbfb12d2088eaa4f9,4 +np.float64,0xbfd8089010b01120,0xbfd778f9db77f2f3,4 +np.float64,0x3fbf9cf4ee3f39f0,0x3fbf88674fde1ca2,4 +np.float64,0x3fe6d8468a6db08e,0x3fe4f403f38b7bec,4 +np.float64,0xbfd9e5deefb3cbbe,0xbfd932692c722351,4 +np.float64,0x3fd1584d55a2b09c,0x3fd122253eeecc2e,4 +np.float64,0x3fe857979cf0af30,0x3fe60fc12b5ba8db,4 +np.float64,0x3fe3644149e6c882,0x3fe239f47013cfe6,4 +np.float64,0xbfe22ea62be45d4c,0xbfe13834c17d56fe,4 +np.float64,0xbfe8d93e1df1b27c,0xbfe66cf4ee467fd2,4 +np.float64,0xbfe9c497c9f38930,0xbfe7127417da4204,4 +np.float64,0x3fd6791cecacf238,0x3fd6039ccb5a7fde,4 +np.float64,0xbfc1dc1b1523b838,0xbfc1cd48edd9ae19,4 +np.float64,0xbfc92a8491325508,0xbfc901176e0158a5,4 +np.float64,0x3fa8649b3430c940,0x3fa8623e82d9504f,4 +np.float64,0x3fe0bed6a1617dae,0x3fdffbb307fb1abe,4 +np.float64,0x3febdf7765f7beee,0x3fe87ad01a89b74a,4 +np.float64,0xbfd3a56d46a74ada,0xbfd356cf41bf83cd,4 +np.float64,0x3fd321d824a643b0,0x3fd2d93846a224b3,4 +np.float64,0xbfc6a49fb52d4940,0xbfc686704906e7d3,4 +np.float64,0xbfdd4103c9ba8208,0xbfdc3ef0c03615b4,4 +np.float64,0xbfe0b78a51e16f14,0xbfdfef0d9ffc38b5,4 +np.float64,0xbfdac7a908b58f52,0xbfda0158956ceecf,4 +np.float64,0xbfbfbf12f23f7e28,0xbfbfaa428989258c,4 +np.float64,0xbfd55f5aa2aabeb6,0xbfd4fa39de65f33a,4 +np.float64,0x3fe06969abe0d2d4,0x3fdf6744fafdd9cf,4 +np.float64,0x3fe56ab8be6ad572,0x3fe3da7a1986d543,4 +np.float64,0xbfeefbbec67df77e,0xbfea5d426132f4aa,4 +np.float64,0x3fe6e1f49cedc3ea,0x3fe4fb53f3d8e3d5,4 +np.float64,0x3feceb231c79d646,0x3fe923d3efa55414,4 +np.float64,0xbfd03dd08ea07ba2,0xbfd011549aa1998a,4 +np.float64,0xbfd688327aad1064,0xbfd611c61b56adbe,4 +np.float64,0xbfde3249d8bc6494,0xbfdd16a7237a39d5,4 +np.float64,0x3febd4b65677a96c,0x3fe873e1a401ef03,4 +np.float64,0xbfe46bd2b368d7a6,0xbfe31023c2467749,4 +np.float64,0x3fbf9f5cde3f3ec0,0x3fbf8aca8ec53c45,4 +np.float64,0x3fc20374032406e8,0x3fc1f43f1f2f4d5e,4 +np.float64,0xbfec143b16f82876,0xbfe89caa42582381,4 +np.float64,0xbfd14fa635a29f4c,0xbfd119ced11da669,4 +np.float64,0x3fe25236d4e4a46e,0x3fe156242d644b7a,4 +np.float64,0xbfe4ed793469daf2,0xbfe377a88928fd77,4 +np.float64,0xbfb363572626c6b0,0xbfb35e98d8fe87ae,4 +np.float64,0xbfb389d5aa2713a8,0xbfb384fae55565a7,4 +np.float64,0x3fca6e001934dc00,0x3fca3e0661eaca84,4 +np.float64,0x3fe748f3f76e91e8,0x3fe548ab2168aea6,4 +np.float64,0x3fef150efdfe2a1e,0x3fea6b92d74f60d3,4 +np.float64,0xbfd14b52b1a296a6,0xbfd115a387c0fa93,4 +np.float64,0x3fe3286b5ce650d6,0x3fe208a6469a7527,4 +np.float64,0xbfd57b4f4baaf69e,0xbfd514a12a9f7ab0,4 +np.float64,0xbfef14bd467e297b,0xbfea6b64bbfd42ce,4 +np.float64,0xbfe280bc90650179,0xbfe17d2c49955dba,4 +np.float64,0x3fca8759d7350eb0,0x3fca56d5c17bbc14,4 +np.float64,0xbfdf988f30bf311e,0xbfde53f96f69b05f,4 +np.float64,0x3f6b6eeb4036de00,0x3f6b6ee7e3f86f9a,4 +np.float64,0xbfed560be8faac18,0xbfe9656c5cf973d8,4 +np.float64,0x3fc6102c592c2058,0x3fc5f43efad5396d,4 +np.float64,0xbfdef64ed2bdec9e,0xbfddc4b7fbd45aea,4 +np.float64,0x3fe814acd570295a,0x3fe5df183d543bfe,4 +np.float64,0x3fca21313f344260,0x3fc9f2d47f64fbe2,4 +np.float64,0xbfe89932cc713266,0xbfe63f186a2f60ce,4 +np.float64,0x3fe4ffcff169ffa0,0x3fe386336115ee21,4 +np.float64,0x3fee6964087cd2c8,0x3fea093d31e2c2c5,4 +np.float64,0xbfbeea604e3dd4c0,0xbfbed72734852669,4 +np.float64,0xbfea1954fb7432aa,0xbfe74cdad8720032,4 +np.float64,0x3fea3e1a5ef47c34,0x3fe765ffba65a11d,4 +np.float64,0x3fcedb850b3db708,0x3fce8f39d92f00ba,4 +np.float64,0x3fd3b52d41a76a5c,0x3fd365d22b0003f9,4 +np.float64,0xbfa4108a0c282110,0xbfa40f397fcd844f,4 +np.float64,0x3fd7454c57ae8a98,0x3fd6c2e5542c6c83,4 +np.float64,0xbfeecd3c7a7d9a79,0xbfea42ca943a1695,4 +np.float64,0xbfdddda397bbbb48,0xbfdccb27283d4c4c,4 +np.float64,0x3fe6b52cf76d6a5a,0x3fe4d96ff32925ff,4 +np.float64,0xbfa39a75ec2734f0,0xbfa3993c0da84f87,4 +np.float64,0x3fdd3fe6fdba7fcc,0x3fdc3df12fe9e525,4 +np.float64,0xbfb57a98162af530,0xbfb5742525d5fbe2,4 +np.float64,0xbfd3e166cfa7c2ce,0xbfd38ff2891be9b0,4 +np.float64,0x3fdb6a04f9b6d408,0x3fda955e5018e9dc,4 +np.float64,0x3fe4ab03a4e95608,0x3fe342bfa76e1aa8,4 +np.float64,0xbfe6c8480b6d9090,0xbfe4e7eaa935b3f5,4 +np.float64,0xbdd6b5a17bae,0xbdd6b5a17bae,4 +np.float64,0xd6591979acb23,0xd6591979acb23,4 +np.float64,0x5adbed90b5b7e,0x5adbed90b5b7e,4 +np.float64,0xa664c5314cc99,0xa664c5314cc99,4 +np.float64,0x1727fb162e500,0x1727fb162e500,4 +np.float64,0xdb49a93db6935,0xdb49a93db6935,4 +np.float64,0xb10c958d62193,0xb10c958d62193,4 +np.float64,0xad38276f5a705,0xad38276f5a705,4 +np.float64,0x1d5d0b983aba2,0x1d5d0b983aba2,4 +np.float64,0x915f48e122be9,0x915f48e122be9,4 +np.float64,0x475958ae8eb2c,0x475958ae8eb2c,4 +np.float64,0x3af8406675f09,0x3af8406675f09,4 +np.float64,0x655e88a4cabd2,0x655e88a4cabd2,4 +np.float64,0x40fee8ce81fde,0x40fee8ce81fde,4 +np.float64,0xab83103f57062,0xab83103f57062,4 +np.float64,0x7cf934b8f9f27,0x7cf934b8f9f27,4 +np.float64,0x29f7524853eeb,0x29f7524853eeb,4 +np.float64,0x4a5e954894bd3,0x4a5e954894bd3,4 +np.float64,0x24638f3a48c73,0x24638f3a48c73,4 +np.float64,0xa4f32fc749e66,0xa4f32fc749e66,4 +np.float64,0xf8e92df7f1d26,0xf8e92df7f1d26,4 +np.float64,0x292e9d50525d4,0x292e9d50525d4,4 +np.float64,0xe937e897d26fd,0xe937e897d26fd,4 +np.float64,0xd3bde1d5a77bc,0xd3bde1d5a77bc,4 +np.float64,0xa447ffd548900,0xa447ffd548900,4 +np.float64,0xa3b7b691476f7,0xa3b7b691476f7,4 +np.float64,0x490095c892013,0x490095c892013,4 +np.float64,0xfc853235f90a7,0xfc853235f90a7,4 +np.float64,0x5a8bc082b5179,0x5a8bc082b5179,4 +np.float64,0x1baca45a37595,0x1baca45a37595,4 +np.float64,0x2164120842c83,0x2164120842c83,4 +np.float64,0x66692bdeccd26,0x66692bdeccd26,4 +np.float64,0xf205bdd3e40b8,0xf205bdd3e40b8,4 +np.float64,0x7c3fff98f8801,0x7c3fff98f8801,4 +np.float64,0xccdf10e199bf,0xccdf10e199bf,4 +np.float64,0x92db8e8125b8,0x92db8e8125b8,4 +np.float64,0x5789a8d6af136,0x5789a8d6af136,4 +np.float64,0xbdda869d7bb51,0xbdda869d7bb51,4 +np.float64,0xb665e0596ccbc,0xb665e0596ccbc,4 +np.float64,0x74e6b46ee9cd7,0x74e6b46ee9cd7,4 +np.float64,0x4f39cf7c9e73b,0x4f39cf7c9e73b,4 +np.float64,0xfdbf3907fb7e7,0xfdbf3907fb7e7,4 +np.float64,0xafdef4d55fbdf,0xafdef4d55fbdf,4 +np.float64,0xb49858236930b,0xb49858236930b,4 +np.float64,0x3ebe21d47d7c5,0x3ebe21d47d7c5,4 +np.float64,0x5b620512b6c41,0x5b620512b6c41,4 +np.float64,0x31918cda63232,0x31918cda63232,4 +np.float64,0x68b5741ed16af,0x68b5741ed16af,4 +np.float64,0xa5c09a5b4b814,0xa5c09a5b4b814,4 +np.float64,0x55f51c14abea4,0x55f51c14abea4,4 +np.float64,0xda8a3e41b515,0xda8a3e41b515,4 +np.float64,0x9ea9c8513d539,0x9ea9c8513d539,4 +np.float64,0x7f23b964fe478,0x7f23b964fe478,4 +np.float64,0xf6e08c7bedc12,0xf6e08c7bedc12,4 +np.float64,0x7267aa24e4cf6,0x7267aa24e4cf6,4 +np.float64,0x236bb93a46d78,0x236bb93a46d78,4 +np.float64,0x9a98430b35309,0x9a98430b35309,4 +np.float64,0xbb683fef76d08,0xbb683fef76d08,4 +np.float64,0x1ff0eb6e3fe1e,0x1ff0eb6e3fe1e,4 +np.float64,0xf524038fea481,0xf524038fea481,4 +np.float64,0xd714e449ae29d,0xd714e449ae29d,4 +np.float64,0x4154fd7682aa0,0x4154fd7682aa0,4 +np.float64,0x5b8d2f6cb71a7,0x5b8d2f6cb71a7,4 +np.float64,0xc91aa21d92355,0xc91aa21d92355,4 +np.float64,0xbd94fd117b2a0,0xbd94fd117b2a0,4 +np.float64,0x685b207ad0b65,0x685b207ad0b65,4 +np.float64,0xd2485b05a490c,0xd2485b05a490c,4 +np.float64,0x151ea5e62a3d6,0x151ea5e62a3d6,4 +np.float64,0x2635a7164c6b6,0x2635a7164c6b6,4 +np.float64,0x88ae3b5d115c8,0x88ae3b5d115c8,4 +np.float64,0x8a055a55140ac,0x8a055a55140ac,4 +np.float64,0x756f7694eadef,0x756f7694eadef,4 +np.float64,0x866d74630cdaf,0x866d74630cdaf,4 +np.float64,0x39e44f2873c8b,0x39e44f2873c8b,4 +np.float64,0x2a07ceb6540fb,0x2a07ceb6540fb,4 +np.float64,0xc52b96398a573,0xc52b96398a573,4 +np.float64,0x9546543b2a8cb,0x9546543b2a8cb,4 +np.float64,0x5b995b90b732c,0x5b995b90b732c,4 +np.float64,0x2de10a565bc22,0x2de10a565bc22,4 +np.float64,0x3b06ee94760df,0x3b06ee94760df,4 +np.float64,0xb18e77a5631cf,0xb18e77a5631cf,4 +np.float64,0x3b89ae3a77137,0x3b89ae3a77137,4 +np.float64,0xd9b0b6e5b3617,0xd9b0b6e5b3617,4 +np.float64,0x30b2310861647,0x30b2310861647,4 +np.float64,0x326a3ab464d48,0x326a3ab464d48,4 +np.float64,0x4c18610a9830d,0x4c18610a9830d,4 +np.float64,0x541dea42a83be,0x541dea42a83be,4 +np.float64,0xcd027dbf9a050,0xcd027dbf9a050,4 +np.float64,0x780a0f80f015,0x780a0f80f015,4 +np.float64,0x740ed5b2e81db,0x740ed5b2e81db,4 +np.float64,0xc226814d844d0,0xc226814d844d0,4 +np.float64,0xde958541bd2b1,0xde958541bd2b1,4 +np.float64,0xb563d3296ac7b,0xb563d3296ac7b,4 +np.float64,0x1db3b0b83b677,0x1db3b0b83b677,4 +np.float64,0xa7b0275d4f605,0xa7b0275d4f605,4 +np.float64,0x72f8d038e5f1b,0x72f8d038e5f1b,4 +np.float64,0x860ed1350c1da,0x860ed1350c1da,4 +np.float64,0x79f88262f3f11,0x79f88262f3f11,4 +np.float64,0x8817761f102ef,0x8817761f102ef,4 +np.float64,0xac44784b5888f,0xac44784b5888f,4 +np.float64,0x800fd594241fab28,0x800fd594241fab28,4 +np.float64,0x800ede32f8ddbc66,0x800ede32f8ddbc66,4 +np.float64,0x800de4c1121bc982,0x800de4c1121bc982,4 +np.float64,0x80076ebcddcedd7a,0x80076ebcddcedd7a,4 +np.float64,0x800b3fee06567fdc,0x800b3fee06567fdc,4 +np.float64,0x800b444426b68889,0x800b444426b68889,4 +np.float64,0x800b1c037a563807,0x800b1c037a563807,4 +np.float64,0x8001eb88c2a3d712,0x8001eb88c2a3d712,4 +np.float64,0x80058aae6dab155e,0x80058aae6dab155e,4 +np.float64,0x80083df2d4f07be6,0x80083df2d4f07be6,4 +np.float64,0x800e3b19d97c7634,0x800e3b19d97c7634,4 +np.float64,0x800a71c6f374e38e,0x800a71c6f374e38e,4 +np.float64,0x80048557f1490ab1,0x80048557f1490ab1,4 +np.float64,0x8000a00e6b01401e,0x8000a00e6b01401e,4 +np.float64,0x800766a3e2cecd49,0x800766a3e2cecd49,4 +np.float64,0x80015eb44602bd69,0x80015eb44602bd69,4 +np.float64,0x800bde885a77bd11,0x800bde885a77bd11,4 +np.float64,0x800224c53ea4498b,0x800224c53ea4498b,4 +np.float64,0x80048e8c6a291d1a,0x80048e8c6a291d1a,4 +np.float64,0x800b667e4af6ccfd,0x800b667e4af6ccfd,4 +np.float64,0x800ae3d7e395c7b0,0x800ae3d7e395c7b0,4 +np.float64,0x80086c245550d849,0x80086c245550d849,4 +np.float64,0x800d7d25f6fafa4c,0x800d7d25f6fafa4c,4 +np.float64,0x800f8d9ab0ff1b35,0x800f8d9ab0ff1b35,4 +np.float64,0x800690e949cd21d3,0x800690e949cd21d3,4 +np.float64,0x8003022381060448,0x8003022381060448,4 +np.float64,0x80085e0dad70bc1c,0x80085e0dad70bc1c,4 +np.float64,0x800e2ffc369c5ff9,0x800e2ffc369c5ff9,4 +np.float64,0x800b629b5af6c537,0x800b629b5af6c537,4 +np.float64,0x800fdc964b7fb92d,0x800fdc964b7fb92d,4 +np.float64,0x80036bb4b1c6d76a,0x80036bb4b1c6d76a,4 +np.float64,0x800b382f7f16705f,0x800b382f7f16705f,4 +np.float64,0x800ebac9445d7593,0x800ebac9445d7593,4 +np.float64,0x80015075c3e2a0ec,0x80015075c3e2a0ec,4 +np.float64,0x8002a6ec5ce54dd9,0x8002a6ec5ce54dd9,4 +np.float64,0x8009fab74a93f56f,0x8009fab74a93f56f,4 +np.float64,0x800c94b9ea992974,0x800c94b9ea992974,4 +np.float64,0x800dc2efd75b85e0,0x800dc2efd75b85e0,4 +np.float64,0x800be6400d57cc80,0x800be6400d57cc80,4 +np.float64,0x80021f6858443ed1,0x80021f6858443ed1,4 +np.float64,0x800600e2ac4c01c6,0x800600e2ac4c01c6,4 +np.float64,0x800a2159e6b442b4,0x800a2159e6b442b4,4 +np.float64,0x800c912f4bb9225f,0x800c912f4bb9225f,4 +np.float64,0x800a863a9db50c76,0x800a863a9db50c76,4 +np.float64,0x800ac16851d582d1,0x800ac16851d582d1,4 +np.float64,0x8003f7d32e87efa7,0x8003f7d32e87efa7,4 +np.float64,0x800be4eee3d7c9de,0x800be4eee3d7c9de,4 +np.float64,0x80069ff0ac4d3fe2,0x80069ff0ac4d3fe2,4 +np.float64,0x80061c986d4c3932,0x80061c986d4c3932,4 +np.float64,0x8000737b4de0e6f7,0x8000737b4de0e6f7,4 +np.float64,0x8002066ef7440cdf,0x8002066ef7440cdf,4 +np.float64,0x8001007050c200e1,0x8001007050c200e1,4 +np.float64,0x8008df9fa351bf40,0x8008df9fa351bf40,4 +np.float64,0x800f8394ee5f072a,0x800f8394ee5f072a,4 +np.float64,0x80008e0b01c11c17,0x80008e0b01c11c17,4 +np.float64,0x800f7088ed3ee112,0x800f7088ed3ee112,4 +np.float64,0x800285b86f650b72,0x800285b86f650b72,4 +np.float64,0x8008ec18af51d832,0x8008ec18af51d832,4 +np.float64,0x800da08523bb410a,0x800da08523bb410a,4 +np.float64,0x800de853ca7bd0a8,0x800de853ca7bd0a8,4 +np.float64,0x8008c8aefad1915e,0x8008c8aefad1915e,4 +np.float64,0x80010c39d5821874,0x80010c39d5821874,4 +np.float64,0x8009208349724107,0x8009208349724107,4 +np.float64,0x800783783f0f06f1,0x800783783f0f06f1,4 +np.float64,0x80025caf9984b960,0x80025caf9984b960,4 +np.float64,0x800bc76fa6778ee0,0x800bc76fa6778ee0,4 +np.float64,0x80017e2f89a2fc60,0x80017e2f89a2fc60,4 +np.float64,0x800ef169843de2d3,0x800ef169843de2d3,4 +np.float64,0x80098a5f7db314bf,0x80098a5f7db314bf,4 +np.float64,0x800d646f971ac8df,0x800d646f971ac8df,4 +np.float64,0x800110d1dc6221a4,0x800110d1dc6221a4,4 +np.float64,0x800f8b422a1f1684,0x800f8b422a1f1684,4 +np.float64,0x800785c97dcf0b94,0x800785c97dcf0b94,4 +np.float64,0x800da201283b4403,0x800da201283b4403,4 +np.float64,0x800a117cc7b422fa,0x800a117cc7b422fa,4 +np.float64,0x80024731cfa48e64,0x80024731cfa48e64,4 +np.float64,0x800199d456c333a9,0x800199d456c333a9,4 +np.float64,0x8005f66bab8becd8,0x8005f66bab8becd8,4 +np.float64,0x8008e7227c11ce45,0x8008e7227c11ce45,4 +np.float64,0x8007b66cc42f6cda,0x8007b66cc42f6cda,4 +np.float64,0x800669e6f98cd3cf,0x800669e6f98cd3cf,4 +np.float64,0x800aed917375db23,0x800aed917375db23,4 +np.float64,0x8008b6dd15116dbb,0x8008b6dd15116dbb,4 +np.float64,0x800f49869cfe930d,0x800f49869cfe930d,4 +np.float64,0x800a712661b4e24d,0x800a712661b4e24d,4 +np.float64,0x800944e816f289d1,0x800944e816f289d1,4 +np.float64,0x800eba0f8a1d741f,0x800eba0f8a1d741f,4 +np.float64,0x800cf6ded139edbe,0x800cf6ded139edbe,4 +np.float64,0x80023100c6246202,0x80023100c6246202,4 +np.float64,0x800c5a94add8b52a,0x800c5a94add8b52a,4 +np.float64,0x800adf329b95be66,0x800adf329b95be66,4 +np.float64,0x800af9afc115f360,0x800af9afc115f360,4 +np.float64,0x800d66ce837acd9d,0x800d66ce837acd9d,4 +np.float64,0x8003ffb5e507ff6d,0x8003ffb5e507ff6d,4 +np.float64,0x80027d280024fa51,0x80027d280024fa51,4 +np.float64,0x800fc37e1d1f86fc,0x800fc37e1d1f86fc,4 +np.float64,0x800fc7258b9f8e4b,0x800fc7258b9f8e4b,4 +np.float64,0x8003fb5789e7f6b0,0x8003fb5789e7f6b0,4 +np.float64,0x800eb4e7a13d69cf,0x800eb4e7a13d69cf,4 +np.float64,0x800951850952a30a,0x800951850952a30a,4 +np.float64,0x3fed4071be3a80e3,0x3fe95842074431df,4 +np.float64,0x3f8d2341203a4682,0x3f8d2300b453bd9f,4 +np.float64,0x3fdc8ce332b919c6,0x3fdb9cdf1440c28f,4 +np.float64,0x3fdc69bd84b8d37b,0x3fdb7d25c8166b7b,4 +np.float64,0x3fc4c22ad0298456,0x3fc4aae73e231b4f,4 +np.float64,0x3fea237809f446f0,0x3fe753cc6ca96193,4 +np.float64,0x3fd34cf6462699ed,0x3fd30268909bb47e,4 +np.float64,0x3fafce20643f9c41,0x3fafc8e41a240e35,4 +np.float64,0x3fdc6d416538da83,0x3fdb805262292863,4 +np.float64,0x3fe7d8362aefb06c,0x3fe5b2ce659db7fd,4 +np.float64,0x3fe290087de52011,0x3fe189f9a3eb123d,4 +np.float64,0x3fa62d2bf82c5a58,0x3fa62b65958ca2b8,4 +np.float64,0x3fafd134403fa269,0x3fafcbf670f8a6f3,4 +np.float64,0x3fa224e53c2449ca,0x3fa223ec5de1631b,4 +np.float64,0x3fb67e2c2c2cfc58,0x3fb676c445fb70a0,4 +np.float64,0x3fda358d01346b1a,0x3fd97b9441666eb2,4 +np.float64,0x3fdd30fc4bba61f9,0x3fdc308da423778d,4 +np.float64,0x3fc56e99c52add34,0x3fc5550004492621,4 +np.float64,0x3fe32d08de265a12,0x3fe20c761a73cec2,4 +np.float64,0x3fd46cf932a8d9f2,0x3fd414a7f3db03df,4 +np.float64,0x3fd94cfa2b3299f4,0x3fd8a5961b3e4bdd,4 +np.float64,0x3fed6ea3a6fadd47,0x3fe9745b2f6c9204,4 +np.float64,0x3fe4431d1768863a,0x3fe2ef61d0481de0,4 +np.float64,0x3fe1d8e00ea3b1c0,0x3fe0efab5050ee78,4 +np.float64,0x3fe56f37dcaade70,0x3fe3de00b0f392e0,4 +np.float64,0x3fde919a2dbd2334,0x3fdd6b6d2dcf2396,4 +np.float64,0x3fe251e3d4a4a3c8,0x3fe155de69605d60,4 +np.float64,0x3fe5e0ecc5abc1da,0x3fe436a5de5516cf,4 +np.float64,0x3fcd48780c3a90f0,0x3fcd073fa907ba9b,4 +np.float64,0x3fe4e8149229d029,0x3fe37360801d5b66,4 +np.float64,0x3fb9ef159633de2b,0x3fb9e3bc05a15d1d,4 +np.float64,0x3fc24a3f0424947e,0x3fc23a5432ca0e7c,4 +np.float64,0x3fe55ca196aab943,0x3fe3cf6b3143435a,4 +np.float64,0x3fe184544c2308a9,0x3fe0a7b49fa80aec,4 +np.float64,0x3fe2c76e83658edd,0x3fe1b8355c1ea771,4 +np.float64,0x3fea8d2c4ab51a59,0x3fe79ba85aabc099,4 +np.float64,0x3fd74f98abae9f31,0x3fd6cc85005d0593,4 +np.float64,0x3fec6de9a678dbd3,0x3fe8d59a1d23cdd1,4 +np.float64,0x3fec8a0e50f9141d,0x3fe8e7500f6f6a00,4 +np.float64,0x3fe9de6d08b3bcda,0x3fe7245319508767,4 +np.float64,0x3fe4461fd1688c40,0x3fe2f1cf0b93aba6,4 +np.float64,0x3fde342d9d3c685b,0x3fdd185609d5719d,4 +np.float64,0x3feb413fc8368280,0x3fe813c091d2519a,4 +np.float64,0x3fe64333156c8666,0x3fe48275b9a6a358,4 +np.float64,0x3fe03c65226078ca,0x3fdf18b26786be35,4 +np.float64,0x3fee11054dbc220b,0x3fe9d579a1cfa7ad,4 +np.float64,0x3fbaefccae35df99,0x3fbae314fef7c7ea,4 +np.float64,0x3feed4e3487da9c7,0x3fea4729241c8811,4 +np.float64,0x3fbb655df836cabc,0x3fbb57fcf9a097be,4 +np.float64,0x3fe68b0273ed1605,0x3fe4b96109afdf76,4 +np.float64,0x3fd216bfc3242d80,0x3fd1d957363f6a43,4 +np.float64,0x3fe01328d4a02652,0x3fded083bbf94aba,4 +np.float64,0x3fe3f9a61ae7f34c,0x3fe2b3f701b79028,4 +np.float64,0x3fed4e7cf8fa9cfa,0x3fe960d27084fb40,4 +np.float64,0x3faec08e343d811c,0x3faebbd2aa07ac1f,4 +np.float64,0x3fd2d1bbeea5a378,0x3fd28c9aefcf48ad,4 +np.float64,0x3fd92e941fb25d28,0x3fd889857f88410d,4 +np.float64,0x3fe43decb7e87bd9,0x3fe2eb32b4ee4667,4 +np.float64,0x3fef49cabcfe9395,0x3fea892f9a233f76,4 +np.float64,0x3fe3e96812e7d2d0,0x3fe2a6c6b45dd6ee,4 +np.float64,0x3fd24c0293a49805,0x3fd20c76d54473cb,4 +np.float64,0x3fb43d6b7e287ad7,0x3fb438060772795a,4 +np.float64,0x3fe87bf7d3f0f7f0,0x3fe62a0c47411c62,4 +np.float64,0x3fee82a2e07d0546,0x3fea17e27e752b7b,4 +np.float64,0x3fe40c01bbe81803,0x3fe2c2d9483f44d8,4 +np.float64,0x3fd686ccae2d0d99,0x3fd610763fb61097,4 +np.float64,0x3fe90fcf2af21f9e,0x3fe693c12df59ba9,4 +np.float64,0x3fefb3ce11ff679c,0x3feac3dd4787529d,4 +np.float64,0x3fcec53ff63d8a80,0x3fce79992af00c58,4 +np.float64,0x3fe599dd7bab33bb,0x3fe3ff5da7575d85,4 +np.float64,0x3fe9923b1a732476,0x3fe6ef71d13db456,4 +np.float64,0x3febf76fcef7eee0,0x3fe88a3952e11373,4 +np.float64,0x3fc2cfd128259fa2,0x3fc2be7fd47fd811,4 +np.float64,0x3fe4d37ae269a6f6,0x3fe36300d45e3745,4 +np.float64,0x3fe23aa2e4247546,0x3fe1424e172f756f,4 +np.float64,0x3fe4f0596ca9e0b3,0x3fe379f0c49de7ef,4 +np.float64,0x3fe2e4802fe5c900,0x3fe1d062a8812601,4 +np.float64,0x3fe5989c79eb3139,0x3fe3fe6308552dec,4 +np.float64,0x3fe3c53cb4e78a79,0x3fe28956e573aca4,4 +np.float64,0x3fe6512beeeca258,0x3fe48d2d5ece979f,4 +np.float64,0x3fd8473ddb308e7c,0x3fd7b33e38adc6ad,4 +np.float64,0x3fecd09c9679a139,0x3fe91361fa0c5bcb,4 +np.float64,0x3fc991530e3322a6,0x3fc965e2c514a9e9,4 +np.float64,0x3f6d4508403a8a11,0x3f6d45042b68acc5,4 +np.float64,0x3fea1f198f743e33,0x3fe750ce918d9330,4 +np.float64,0x3fd0a0bb4da14177,0x3fd07100f9c71e1c,4 +np.float64,0x3fd30c45ffa6188c,0x3fd2c499f9961f66,4 +np.float64,0x3fcad98e7c35b31d,0x3fcaa74293cbc52e,4 +np.float64,0x3fec8e4a5eb91c95,0x3fe8e9f898d118db,4 +np.float64,0x3fd19fdb79233fb7,0x3fd1670c00febd24,4 +np.float64,0x3fea9fcbb1f53f97,0x3fe7a836b29c4075,4 +np.float64,0x3fc6d12ea12da25d,0x3fc6b24bd2f89f59,4 +np.float64,0x3fd6af3658ad5e6d,0x3fd636613e08df3f,4 +np.float64,0x3fe31bc385a63787,0x3fe1fe3081621213,4 +np.float64,0x3fc0dbba2221b774,0x3fc0cf42c9313dba,4 +np.float64,0x3fef639ce87ec73a,0x3fea9795454f1036,4 +np.float64,0x3fee5f29dcbcbe54,0x3fea0349b288f355,4 +np.float64,0x3fed46bdb37a8d7b,0x3fe95c199f5aa569,4 +np.float64,0x3fef176afa3e2ed6,0x3fea6ce78b2aa3aa,4 +np.float64,0x3fc841e7683083cf,0x3fc81cccb84848cc,4 +np.float64,0xbfda3ec9a2347d94,0xbfd9840d180e9de3,4 +np.float64,0xbfcd5967ae3ab2d0,0xbfcd17be13142bb9,4 +np.float64,0xbfedf816573bf02d,0xbfe9c6bb06476c60,4 +np.float64,0xbfd0d6e10e21adc2,0xbfd0a54f99d2f3dc,4 +np.float64,0xbfe282df096505be,0xbfe17ef5e2e80760,4 +np.float64,0xbfd77ae6e62ef5ce,0xbfd6f4f6b603ad8a,4 +np.float64,0xbfe37b171aa6f62e,0xbfe24cb4b2d0ade4,4 +np.float64,0xbfef9e5ed9bf3cbe,0xbfeab817b41000bd,4 +np.float64,0xbfe624d6f96c49ae,0xbfe46b1e9c9aff86,4 +np.float64,0xbfefb5da65ff6bb5,0xbfeac4fc9c982772,4 +np.float64,0xbfd29a65d52534cc,0xbfd2579df8ff87b9,4 +np.float64,0xbfd40270172804e0,0xbfd3af6471104aef,4 +np.float64,0xbfb729ee7a2e53e0,0xbfb721d7dbd2705e,4 +np.float64,0xbfb746f1382e8de0,0xbfb73ebc1207f8e3,4 +np.float64,0xbfd3c7e606a78fcc,0xbfd377a8aa1b0dd9,4 +np.float64,0xbfd18c4880231892,0xbfd1543506584ad5,4 +np.float64,0xbfea988080753101,0xbfe7a34cba0d0fa1,4 +np.float64,0xbf877400e02ee800,0xbf8773df47fa7e35,4 +np.float64,0xbfb07e050820fc08,0xbfb07b198d4a52c9,4 +np.float64,0xbfee0a3621fc146c,0xbfe9d1745a05ba77,4 +np.float64,0xbfe78de246ef1bc4,0xbfe57bf2baab91c8,4 +np.float64,0xbfcdbfd3bd3b7fa8,0xbfcd7b728a955a06,4 +np.float64,0xbfe855ea79b0abd5,0xbfe60e8a4a17b921,4 +np.float64,0xbfd86c8e3530d91c,0xbfd7d5e36c918dc1,4 +np.float64,0xbfe4543169e8a863,0xbfe2fd23d42f552e,4 +np.float64,0xbfe41efbf1283df8,0xbfe2d235a2faed1a,4 +np.float64,0xbfd9a55464b34aa8,0xbfd8f7083f7281e5,4 +np.float64,0xbfe5f5078d6bea0f,0xbfe44637d910c270,4 +np.float64,0xbfe6d83e3dedb07c,0xbfe4f3fdadd10552,4 +np.float64,0xbfdb767e70b6ecfc,0xbfdaa0b6c17f3fb1,4 +np.float64,0xbfdfc91b663f9236,0xbfde7eb0dfbeaa26,4 +np.float64,0xbfbfbd18783f7a30,0xbfbfa84bf2fa1c8d,4 +np.float64,0xbfe51199242a2332,0xbfe39447dbe066ae,4 +np.float64,0xbfdbb94814b77290,0xbfdadd63bd796972,4 +np.float64,0xbfd8c6272cb18c4e,0xbfd828f2d9e8607e,4 +np.float64,0xbfce51e0b63ca3c0,0xbfce097ee908083a,4 +np.float64,0xbfe99a177d73342f,0xbfe6f4ec776a57ae,4 +np.float64,0xbfefde2ab0ffbc55,0xbfeadafdcbf54733,4 +np.float64,0xbfcccb5c1c3996b8,0xbfcc8d586a73d126,4 +np.float64,0xbfdf7ddcedbefbba,0xbfde3c749a906de7,4 +np.float64,0xbfef940516ff280a,0xbfeab26429e89f4b,4 +np.float64,0xbfe08009f1e10014,0xbfdf8eab352997eb,4 +np.float64,0xbfe9c02682b3804d,0xbfe70f5fd05f79ee,4 +np.float64,0xbfb3ca1732279430,0xbfb3c50bec5b453a,4 +np.float64,0xbfe368e81926d1d0,0xbfe23dc704d0887c,4 +np.float64,0xbfbd20cc2e3a4198,0xbfbd10b7e6d81c6c,4 +np.float64,0xbfd67ece4d2cfd9c,0xbfd608f527dcc5e7,4 +np.float64,0xbfdc02d1333805a2,0xbfdb20104454b79f,4 +np.float64,0xbfc007a626200f4c,0xbfbff9dc9dc70193,4 +np.float64,0xbfda9e4f8fb53ca0,0xbfd9db8af35dc630,4 +np.float64,0xbfd8173d77302e7a,0xbfd786a0cf3e2914,4 +np.float64,0xbfeb8fcbd0b71f98,0xbfe84734debc10fb,4 +np.float64,0xbfe4bf1cb7697e3a,0xbfe352c891113f29,4 +np.float64,0xbfc18624d5230c48,0xbfc178248e863b64,4 +np.float64,0xbfcf184bac3e3098,0xbfceca3b19be1ebe,4 +np.float64,0xbfd2269c42a44d38,0xbfd1e8920d72b694,4 +np.float64,0xbfe8808526b1010a,0xbfe62d5497292495,4 +np.float64,0xbfe498bd1da9317a,0xbfe334245eadea93,4 +np.float64,0xbfef0855aebe10ab,0xbfea6462f29aeaf9,4 +np.float64,0xbfdeb186c93d630e,0xbfdd87c37943c602,4 +np.float64,0xbfb29fe2ae253fc8,0xbfb29bae3c87efe4,4 +np.float64,0xbfddd9c6c3bbb38e,0xbfdcc7b400bf384b,4 +np.float64,0xbfe3506673e6a0cd,0xbfe2299f26295553,4 +np.float64,0xbfe765957a2ecb2b,0xbfe55e03cf22edab,4 +np.float64,0xbfecc9876c79930f,0xbfe90efaf15b6207,4 +np.float64,0xbfefb37a0a7f66f4,0xbfeac3af3898e7c2,4 +np.float64,0xbfeefa0da7bdf41b,0xbfea5c4cde53c1c3,4 +np.float64,0xbfe6639ee9ecc73e,0xbfe49b4e28a72482,4 +np.float64,0xbfef91a4bb7f2349,0xbfeab114ac9e25dd,4 +np.float64,0xbfc8b392bb316724,0xbfc88c657f4441a3,4 +np.float64,0xbfc88a358231146c,0xbfc863cb900970fe,4 +np.float64,0xbfef25a9d23e4b54,0xbfea74eda432aabe,4 +np.float64,0xbfe6aceea0ed59de,0xbfe4d32e54a3fd01,4 +np.float64,0xbfefe2b3e37fc568,0xbfeadd74f4605835,4 +np.float64,0xbfa9eecb8833dd90,0xbfa9ebf4f4cb2591,4 +np.float64,0xbfd42bad7428575a,0xbfd3d69de8e52d0a,4 +np.float64,0xbfbc366b4a386cd8,0xbfbc27ceee8f3019,4 +np.float64,0xbfd9bca7be337950,0xbfd90c80e6204e57,4 +np.float64,0xbfe8173f53f02e7f,0xbfe5e0f8d8ed329c,4 +np.float64,0xbfce22dbcb3c45b8,0xbfcddbc8159b63af,4 +np.float64,0xbfea2d7ba7345af7,0xbfe75aa62ad5b80a,4 +np.float64,0xbfc08b783e2116f0,0xbfc07faf8d501558,4 +np.float64,0xbfb8c4161c318830,0xbfb8ba33950748ec,4 +np.float64,0xbfddd930bcbbb262,0xbfdcc72dffdf51bb,4 +np.float64,0xbfd108ce8a22119e,0xbfd0d5801e7698bd,4 +np.float64,0xbfd5bd2b5dab7a56,0xbfd552c52c468c76,4 +np.float64,0xbfe7ffe67fefffcd,0xbfe5cfe96e35e6e5,4 +np.float64,0xbfa04ec6bc209d90,0xbfa04e120a2c25cc,4 +np.float64,0xbfef7752cc7eeea6,0xbfeaa28715addc4f,4 +np.float64,0xbfe7083c2eae1078,0xbfe5182bf8ddfc8e,4 +np.float64,0xbfe05dafd0a0bb60,0xbfdf52d397cfe5f6,4 +np.float64,0xbfacb4f2243969e0,0xbfacb118991ea235,4 +np.float64,0xbfc7d47e422fa8fc,0xbfc7b1504714a4fd,4 +np.float64,0xbfbd70b2243ae168,0xbfbd60182efb61de,4 +np.float64,0xbfe930e49cb261c9,0xbfe6ab272b3f9cfc,4 +np.float64,0xbfb5f537e62bea70,0xbfb5ee540dcdc635,4 +np.float64,0xbfbb0c8278361908,0xbfbaffa1f7642a87,4 +np.float64,0xbfe82af2447055e4,0xbfe5ef54ca8db9e8,4 +np.float64,0xbfe92245e6f2448c,0xbfe6a0d32168040b,4 +np.float64,0xbfb799a8522f3350,0xbfb7911a7ada3640,4 +np.float64,0x7faa8290c8350521,0x3fe5916f67209cd6,4 +np.float64,0x7f976597082ecb2d,0x3fcf94dce396bd37,4 +np.float64,0x7fede721237bce41,0x3fe3e7b1575b005f,4 +np.float64,0x7fd5f674d72bece9,0x3fe3210628eba199,4 +np.float64,0x7f9b0f1aa0361e34,0x3feffd34d15d1da7,4 +np.float64,0x7fec48346ab89068,0x3fe93dd84253d9a2,4 +np.float64,0x7f9cac76283958eb,0xbfec4cd999653868,4 +np.float64,0x7fed51ab6bbaa356,0x3fecc27fb5f37bca,4 +np.float64,0x7fded3c116bda781,0xbfda473efee47cf1,4 +np.float64,0x7fd19c48baa33890,0xbfe25700cbfc0326,4 +np.float64,0x7fe5c8f478ab91e8,0xbfee4ab6d84806be,4 +np.float64,0x7fe53c64e46a78c9,0x3fee19c3f227f4e1,4 +np.float64,0x7fc2ad1936255a31,0xbfe56db9b877f807,4 +np.float64,0x7fe2b071b52560e2,0xbfce3990a8d390a9,4 +np.float64,0x7fc93f3217327e63,0xbfd1f6d7ef838d2b,4 +np.float64,0x7fec26df08784dbd,0x3fd5397be41c93d9,4 +np.float64,0x7fcf4770183e8edf,0x3fe6354f5a785016,4 +np.float64,0x7fdc9fcc0bb93f97,0xbfeeeae952e8267d,4 +np.float64,0x7feb21f29c7643e4,0x3fec20122e33f1bf,4 +np.float64,0x7fd0b51273216a24,0x3fefb09f8daba00b,4 +np.float64,0x7fe747a9d76e8f53,0x3feb46a3232842a4,4 +np.float64,0x7fd58885972b110a,0xbfce5ea57c186221,4 +np.float64,0x7fca3ce85c3479d0,0x3fef93a24548e8ca,4 +np.float64,0x7fe1528a46a2a514,0xbfb54bb578d9da91,4 +np.float64,0x7fcc58b21b38b163,0x3feffb5b741ffc2d,4 +np.float64,0x7fdabcaaf5357955,0x3fecbf855db524d1,4 +np.float64,0x7fdd27c6933a4f8c,0xbfef2f41bb80144b,4 +np.float64,0x7fbda4e1be3b49c2,0x3fdb9b33f84f5381,4 +np.float64,0x7fe53363362a66c5,0x3fe4daff3a6a4ed0,4 +np.float64,0x7fe5719d62eae33a,0xbfef761d98f625d5,4 +np.float64,0x7f982ce5a83059ca,0x3fd0b27c3365f0a8,4 +np.float64,0x7fe6db8c42edb718,0x3fe786f4b1fe11a6,4 +np.float64,0x7fe62cca1b2c5993,0x3fd425b6c4c9714a,4 +np.float64,0x7feea88850bd5110,0xbfd7bbb432017175,4 +np.float64,0x7fad6c6ae43ad8d5,0x3fe82e49098bc6de,4 +np.float64,0x7fe70542f02e0a85,0x3fec3017960b4822,4 +np.float64,0x7feaf0bcbb35e178,0xbfc3aac74dd322d5,4 +np.float64,0x7fb5e152fe2bc2a5,0x3fd4b27a4720614c,4 +np.float64,0x7fe456ee5be8addc,0xbfe9e15ab5cff229,4 +np.float64,0x7fd4b53a8d296a74,0xbfefff450f503326,4 +np.float64,0x7fd7149d7a2e293a,0x3fef4ef0a9009096,4 +np.float64,0x7fd43fc5a8a87f8a,0x3fe0c929fee9dce7,4 +np.float64,0x7fef97022aff2e03,0x3fd4ea52a813da20,4 +np.float64,0x7fe035950ae06b29,0x3fef4e125394fb05,4 +np.float64,0x7fecd0548979a0a8,0x3fe89d226244037b,4 +np.float64,0x7fc79b3ac22f3675,0xbfee9c9cf78c8270,4 +np.float64,0x7fd8b8e8263171cf,0x3fe8e24437961db0,4 +np.float64,0x7fc288c23e251183,0xbfbaf8eca50986ca,4 +np.float64,0x7fe436b4b6686d68,0xbfecd661741931c4,4 +np.float64,0x7fcdf99abe3bf334,0x3feaa75c90830b92,4 +np.float64,0x7fd9f9739233f2e6,0xbfebbfcb301b0da5,4 +np.float64,0x7fd6fcbd1b2df979,0xbfccf2c77cb65f56,4 +np.float64,0x7fe242a97b248552,0xbfe5b0f13bcbabc8,4 +np.float64,0x7fe38bf3e06717e7,0x3fbc8fa9004d2668,4 +np.float64,0x7fecd0e8d479a1d1,0xbfe886a6b4f73a4a,4 +np.float64,0x7fe958d60232b1ab,0xbfeb7c4cf0cee2dd,4 +np.float64,0x7f9d492b583a9256,0xbfebe975d00221cb,4 +np.float64,0x7fd6c9983bad932f,0xbfefe817621a31f6,4 +np.float64,0x7fed0d7239fa1ae3,0x3feac7e1b6455b4b,4 +np.float64,0x7fe61dac90ec3b58,0x3fef845b9efe8421,4 +np.float64,0x7f9acd3010359a5f,0xbfe460d376200130,4 +np.float64,0x7fedced9673b9db2,0xbfeeaf23445e1944,4 +np.float64,0x7fd9f271a733e4e2,0xbfd41544535ecb78,4 +np.float64,0x7fe703339bee0666,0x3fef93334626b56c,4 +np.float64,0x7fec7761b7b8eec2,0xbfe6da9179e8e714,4 +np.float64,0x7fdd9fff043b3ffd,0xbfc0761dfb8d94f9,4 +np.float64,0x7fdc10ed17b821d9,0x3fe1481e2a26c77f,4 +np.float64,0x7fe7681e72aed03c,0x3fefff94a6d47c84,4 +np.float64,0x7fe18c29e1e31853,0x3fe86ebd2fd89456,4 +np.float64,0x7fb2fb273c25f64d,0xbfefc136f57e06de,4 +np.float64,0x7fac2bbb90385776,0x3fe25d8e3cdae7e3,4 +np.float64,0x7fed16789efa2cf0,0x3fe94555091fdfd9,4 +np.float64,0x7fd8fe8f7831fd1e,0xbfed58d520361902,4 +np.float64,0x7fa59bde3c2b37bb,0x3fef585391c077ff,4 +np.float64,0x7fda981b53353036,0x3fde02ca08737b5f,4 +np.float64,0x7fd29f388aa53e70,0xbfe04f5499246df2,4 +np.float64,0x7fcd0232513a0464,0xbfd9737f2f565829,4 +np.float64,0x7fe9a881bcf35102,0xbfe079cf285b35dd,4 +np.float64,0x7fdbe399a9b7c732,0x3fe965bc4220f340,4 +np.float64,0x7feb77414af6ee82,0xbfb7df2fcd491f55,4 +np.float64,0x7fa26e86c424dd0d,0xbfea474c3d65b9be,4 +np.float64,0x7feaee869e35dd0c,0xbfd7b333a888cd14,4 +np.float64,0x7fcbd67f6137acfe,0xbfe15a7a15dfcee6,4 +np.float64,0x7fe36991e766d323,0xbfeb288077c4ed9f,4 +np.float64,0x7fdcf4f4fcb9e9e9,0xbfea331ef7a75e7b,4 +np.float64,0x7fbe3445643c688a,0x3fedf21b94ae8e37,4 +np.float64,0x7fd984cfd2b3099f,0x3fc0d3ade71c395e,4 +np.float64,0x7fdec987b23d930e,0x3fe4af5e48f6c26e,4 +np.float64,0x7fde56a9953cad52,0x3fc8e7762cefb8b0,4 +np.float64,0x7fd39fb446273f68,0xbfe6c3443208f44d,4 +np.float64,0x7fc609c1a72c1382,0x3fe884e639571baa,4 +np.float64,0x7fe001be4b20037c,0xbfed0d90cbcb6010,4 +np.float64,0x7fce7ace283cf59b,0xbfd0303792e51f49,4 +np.float64,0x7fe27ba93da4f751,0x3fe548b5ce740d71,4 +np.float64,0x7fcc13c79b38278e,0xbfe2e14f5b64a1e9,4 +np.float64,0x7fc058550620b0a9,0x3fe44bb55ebd0590,4 +np.float64,0x7fa4ba8bf8297517,0x3fee59b39f9d08c4,4 +np.float64,0x7fe50d6872ea1ad0,0xbfea1eaa2d059e13,4 +np.float64,0x7feb7e33b476fc66,0xbfeff28a4424dd3e,4 +np.float64,0x7fe2d7d2a165afa4,0xbfdbaff0ba1ea460,4 +np.float64,0xffd126654b224cca,0xbfef0cd3031fb97c,4 +np.float64,0xffb5f884942bf108,0x3fe0de589bea2e4c,4 +np.float64,0xffe011b4bfe02369,0xbfe805a0edf1e1f2,4 +np.float64,0xffec13eae9b827d5,0x3fb5f30347d78447,4 +np.float64,0xffa6552ae82caa50,0x3fb1ecee60135f2f,4 +np.float64,0xffb62d38b02c5a70,0x3fbd35903148fd12,4 +np.float64,0xffe2c44ea425889d,0xbfd7616547f99a7d,4 +np.float64,0xffea24c61a74498c,0x3fef4a1b15ae9005,4 +np.float64,0xffd23a4ab2a47496,0x3fe933bfaa569ae9,4 +np.float64,0xffc34a073d269410,0xbfeec0f510bb7474,4 +np.float64,0xffeead84cfbd5b09,0x3feb2d635e5a78bd,4 +np.float64,0xffcfd8f3b43fb1e8,0xbfdd59625801771b,4 +np.float64,0xffd3c7f662a78fec,0x3f9cf3209edfbc4e,4 +np.float64,0xffe7b7e4f72f6fca,0xbfefdcff4925632c,4 +np.float64,0xffe48cab05e91956,0x3fe6b41217948423,4 +np.float64,0xffeb6980b336d301,0xbfca5de148f69324,4 +np.float64,0xffe3f15c4aa7e2b8,0xbfeb18efae892081,4 +np.float64,0xffcf290c713e5218,0x3fefe6f1a513ed26,4 +np.float64,0xffd80979b43012f4,0xbfde6c8df91af976,4 +np.float64,0xffc3181e0026303c,0x3fe7448f681def38,4 +np.float64,0xffedfa68f97bf4d1,0xbfeca6efb802d109,4 +np.float64,0xffca0931c0341264,0x3fe31b9f073b08cd,4 +np.float64,0xffe4c44934e98892,0x3feda393a2e8a0f7,4 +np.float64,0xffe65bb56f2cb76a,0xbfeffaf638a4b73e,4 +np.float64,0xffe406a332a80d46,0x3fe8151dadb853c1,4 +np.float64,0xffdb7eae9c36fd5e,0xbfeff89abf5ab16e,4 +np.float64,0xffe245a02da48b40,0x3fef1fb43e85f4b8,4 +np.float64,0xffe2bafa732575f4,0x3fcbab115c6fd86e,4 +np.float64,0xffe8b1eedb7163dd,0x3feff263df6f6b12,4 +np.float64,0xffe6c76c796d8ed8,0xbfe61a8668511293,4 +np.float64,0xffefe327d1ffc64f,0xbfd9b92887a84827,4 +np.float64,0xffa452180c28a430,0xbfa9b9e578a4e52f,4 +np.float64,0xffe9867d0bf30cf9,0xbfca577867588408,4 +np.float64,0xffdfe9b923bfd372,0x3fdab5c15f085c2d,4 +np.float64,0xffed590c6abab218,0xbfd7e7b6c5a120e6,4 +np.float64,0xffeaebcfbab5d79f,0x3fed58be8a9e2c3b,4 +np.float64,0xffe2ba83a8257507,0x3fe6c42a4ac1d4d9,4 +np.float64,0xffe01d5b0ee03ab6,0xbfe5dad6c9247db7,4 +np.float64,0xffe51095d52a212b,0x3fef822cebc32d8e,4 +np.float64,0xffebd7a901b7af51,0xbfe5e63f3e3b1185,4 +np.float64,0xffe4efdcde29dfb9,0xbfe811294dfa758f,4 +np.float64,0xffe3be1aa4a77c35,0x3fdd8dcfcd409bb1,4 +np.float64,0xffbe6f2f763cde60,0x3fd13766e43bd622,4 +np.float64,0xffeed3d80fbda7af,0x3fec10a23c1b7a4a,4 +np.float64,0xffd6ebff37add7fe,0xbfe6177411607c86,4 +np.float64,0xffe85a90f4b0b521,0x3fc09fdd66c8fde9,4 +np.float64,0xffea3d58c2b47ab1,0x3feb5bd4a04b3562,4 +np.float64,0xffef675be6beceb7,0x3fecd840683d1044,4 +np.float64,0xff726a088024d400,0x3feff2b4f47b5214,4 +np.float64,0xffc90856733210ac,0xbfe3c6ffbf6840a5,4 +np.float64,0xffc0b58d9a216b1c,0xbfe10314267d0611,4 +np.float64,0xffee1f3d0abc3e79,0xbfd12ea7efea9067,4 +np.float64,0xffd988c41a331188,0x3febe83802d8a32e,4 +np.float64,0xffe8f1ac9bb1e358,0xbfdbf5fa7e84f2f2,4 +np.float64,0xffe47af279e8f5e4,0x3fef11e339e5fa78,4 +np.float64,0xff9960a7f832c140,0xbfa150363f8ec5b2,4 +np.float64,0xffcac40fa7358820,0xbfec3d5847a3df1d,4 +np.float64,0xffcb024a9d360494,0xbfd060fa31fd6b6a,4 +np.float64,0xffe385ffb3270bff,0xbfee6859e8dcd9e8,4 +np.float64,0xffef62f2c53ec5e5,0x3fe0a71ffddfc718,4 +np.float64,0xffed87ff20fb0ffd,0xbfe661db7c4098e3,4 +np.float64,0xffe369278526d24e,0x3fd64d89a41822fc,4 +np.float64,0xff950288c02a0520,0x3fe1df91d1ad7d5c,4 +np.float64,0xffe70e7c2cee1cf8,0x3fc9fece08df2fd8,4 +np.float64,0xffbaf020b635e040,0xbfc68c43ff9911a7,4 +np.float64,0xffee0120b0fc0240,0x3f9f792e17b490b0,4 +np.float64,0xffe1fa4be7a3f498,0xbfef4b18ab4b319e,4 +np.float64,0xffe61887bf2c310f,0x3fe846714826cb32,4 +np.float64,0xffdc3cf77f3879ee,0x3fe033b948a36125,4 +np.float64,0xffcc2b86f238570c,0xbfefdcceac3f220f,4 +np.float64,0xffe1f030c0a3e061,0x3fef502a808c359a,4 +np.float64,0xffb872c4ee30e588,0x3fef66ed8d3e6175,4 +np.float64,0xffeac8fc617591f8,0xbfe5d8448602aac9,4 +np.float64,0xffe5be16afab7c2d,0x3fee75ccde3cd14d,4 +np.float64,0xffae230ad83c4610,0xbfe49bbe6074d459,4 +np.float64,0xffc8fbeff531f7e0,0x3f77201e0c927f97,4 +np.float64,0xffdc314f48b8629e,0x3fef810dfc5db118,4 +np.float64,0xffec1f8970783f12,0x3fe15567102e042a,4 +np.float64,0xffc6995f902d32c0,0xbfecd5d2eedf342c,4 +np.float64,0xffdc7af76b38f5ee,0xbfd6e754476ab320,4 +np.float64,0xffb30cf8682619f0,0x3fd5ac3dfc4048d0,4 +np.float64,0xffd3a77695a74eee,0xbfefb5d6889e36e9,4 +np.float64,0xffd8b971803172e4,0xbfeb7f62f0b6c70b,4 +np.float64,0xffde4c0234bc9804,0xbfed50ba9e16d5e0,4 +np.float64,0xffb62b3f342c5680,0xbfeabc0de4069b84,4 +np.float64,0xff9af5674035eac0,0xbfed6c198b6b1bd8,4 +np.float64,0xffdfe20cb43fc41a,0x3fb11f8238f66306,4 +np.float64,0xffd2ecd7a0a5d9b0,0xbfec17ef1a62b1e3,4 +np.float64,0xffce60f7863cc1f0,0x3fe6dbcad3e3a006,4 +np.float64,0xffbbb8306a377060,0xbfbfd0fbef485c4c,4 +np.float64,0xffd1b2bd2b23657a,0xbfda3e046d987b99,4 +np.float64,0xffc480f4092901e8,0xbfeeff0427f6897b,4 +np.float64,0xffe6e02d926dc05a,0xbfcd59552778890b,4 +np.float64,0xffd302e5b7a605cc,0xbfee7c08641366b0,4 +np.float64,0xffec2eb92f785d72,0xbfef5c9c7f771050,4 +np.float64,0xffea3e31a9747c62,0xbfc49cd54755faf0,4 +np.float64,0xffce0a4e333c149c,0x3feeb9a6d0db4aee,4 +np.float64,0xffdc520a2db8a414,0x3fefc7b72613dcd0,4 +np.float64,0xffe056b968a0ad72,0xbfe47a9fe1f827fb,4 +np.float64,0xffe5a10f4cab421e,0x3fec2b1f74b73dec,4 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-sinh.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-sinh.csv new file mode 100644 index 0000000..5888c91 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-sinh.csv @@ -0,0 +1,1429 @@ +dtype,input,output,ulperrortol +np.float32,0xfee27582,0xff800000,2 +np.float32,0xff19f092,0xff800000,2 +np.float32,0xbf393576,0xbf49cb31,2 +np.float32,0x8020fdea,0x8020fdea,2 +np.float32,0x455f4e,0x455f4e,2 +np.float32,0xff718c35,0xff800000,2 +np.float32,0x3f3215e3,0x3f40cce5,2 +np.float32,0x19e833,0x19e833,2 +np.float32,0xff2dcd49,0xff800000,2 +np.float32,0x7e8f6c95,0x7f800000,2 +np.float32,0xbf159dac,0xbf1e47a5,2 +np.float32,0x100d3d,0x100d3d,2 +np.float32,0xff673441,0xff800000,2 +np.float32,0x80275355,0x80275355,2 +np.float32,0x4812d0,0x4812d0,2 +np.float32,0x8072b956,0x8072b956,2 +np.float32,0xff3bb918,0xff800000,2 +np.float32,0x0,0x0,2 +np.float32,0xfe327798,0xff800000,2 +np.float32,0x41d4e2,0x41d4e2,2 +np.float32,0xfe34b1b8,0xff800000,2 +np.float32,0x80199f72,0x80199f72,2 +np.float32,0x807242ce,0x807242ce,2 +np.float32,0x3ef4202d,0x3efd7b48,2 +np.float32,0x763529,0x763529,2 +np.float32,0x4f6662,0x4f6662,2 +np.float32,0x3f18efe9,0x3f2232b5,2 +np.float32,0x80701846,0x80701846,2 +np.float32,0x3f599948,0x3f74c393,2 +np.float32,0x5a3d69,0x5a3d69,2 +np.float32,0xbf4a7e65,0xbf6047a3,2 +np.float32,0xff0d4c82,0xff800000,2 +np.float32,0x7a74db,0x7a74db,2 +np.float32,0x803388e6,0x803388e6,2 +np.float32,0x7f4430bb,0x7f800000,2 +np.float32,0x14c5b1,0x14c5b1,2 +np.float32,0xfa113400,0xff800000,2 +np.float32,0x7f4b3209,0x7f800000,2 +np.float32,0x8038d88c,0x8038d88c,2 +np.float32,0xbef2f9de,0xbefc330b,2 +np.float32,0xbe147b38,0xbe15008f,2 +np.float32,0x2b61e6,0x2b61e6,2 +np.float32,0x80000001,0x80000001,2 +np.float32,0x8060456c,0x8060456c,2 +np.float32,0x3f30fa82,0x3f3f6a99,2 +np.float32,0xfd1f0220,0xff800000,2 +np.float32,0xbf2b7555,0xbf389151,2 +np.float32,0xff100b7a,0xff800000,2 +np.float32,0x70d3cd,0x70d3cd,2 +np.float32,0x2a8d4a,0x2a8d4a,2 +np.float32,0xbf7b733f,0xbf92f05f,2 +np.float32,0x3f7106dc,0x3f8b1fc6,2 +np.float32,0x3f39da7a,0x3f4a9d79,2 +np.float32,0x3f5dd73f,0x3f7aaab5,2 +np.float32,0xbe8c8754,0xbe8e4cba,2 +np.float32,0xbf6c74c9,0xbf87c556,2 +np.float32,0x800efbbb,0x800efbbb,2 +np.float32,0xff054ab5,0xff800000,2 +np.float32,0x800b4b46,0x800b4b46,2 +np.float32,0xff77fd74,0xff800000,2 +np.float32,0x257d0,0x257d0,2 +np.float32,0x7caa0c,0x7caa0c,2 +np.float32,0x8025d24d,0x8025d24d,2 +np.float32,0x3d9f1b60,0x3d9f445c,2 +np.float32,0xbe3bf6e8,0xbe3d0595,2 +np.float32,0x54bb93,0x54bb93,2 +np.float32,0xbf3e6a45,0xbf507716,2 +np.float32,0x3f4bb26e,0x3f61e1cd,2 +np.float32,0x3f698edc,0x3f85aac5,2 +np.float32,0xff7bd0ef,0xff800000,2 +np.float32,0xbed07b68,0xbed64a8e,2 +np.float32,0xbf237c72,0xbf2ed3d2,2 +np.float32,0x27b0fa,0x27b0fa,2 +np.float32,0x3f7606d1,0x3f8ed7d6,2 +np.float32,0x790dc0,0x790dc0,2 +np.float32,0x7f68f3ac,0x7f800000,2 +np.float32,0xbed39288,0xbed9a52f,2 +np.float32,0x3f6f8266,0x3f8a0187,2 +np.float32,0x3fbdca,0x3fbdca,2 +np.float32,0xbf7c3e5d,0xbf938b2c,2 +np.float32,0x802321a8,0x802321a8,2 +np.float32,0x3eecab66,0x3ef53031,2 +np.float32,0x62b324,0x62b324,2 +np.float32,0x3f13afac,0x3f1c03fe,2 +np.float32,0xff315ad7,0xff800000,2 +np.float32,0xbf1fac0d,0xbf2a3a63,2 +np.float32,0xbf543984,0xbf6d61d6,2 +np.float32,0x71a212,0x71a212,2 +np.float32,0x114fbe,0x114fbe,2 +np.float32,0x3f5b6ff2,0x3f77505f,2 +np.float32,0xff6ff89e,0xff800000,2 +np.float32,0xff4527a1,0xff800000,2 +np.float32,0x22cb3,0x22cb3,2 +np.float32,0x7f53bb6b,0x7f800000,2 +np.float32,0xff3d2dea,0xff800000,2 +np.float32,0xfd21dac0,0xff800000,2 +np.float32,0xfc486140,0xff800000,2 +np.float32,0x7e2b693a,0x7f800000,2 +np.float32,0x8022a9fb,0x8022a9fb,2 +np.float32,0x80765de0,0x80765de0,2 +np.float32,0x13d299,0x13d299,2 +np.float32,0x7ee53713,0x7f800000,2 +np.float32,0xbde1c770,0xbde23c96,2 +np.float32,0xbd473fc0,0xbd4753de,2 +np.float32,0x3f1cb455,0x3f26acf3,2 +np.float32,0x683e49,0x683e49,2 +np.float32,0x3ed5a9fc,0x3edbeb79,2 +np.float32,0x3f4fe3f6,0x3f67814f,2 +np.float32,0x802a2bce,0x802a2bce,2 +np.float32,0x7e951b4c,0x7f800000,2 +np.float32,0xbe6eb260,0xbe70dd44,2 +np.float32,0xbe3daca8,0xbe3ec2cb,2 +np.float32,0xbe9c38b2,0xbe9ea822,2 +np.float32,0xff2e29dc,0xff800000,2 +np.float32,0x7f62c7cc,0x7f800000,2 +np.float32,0xbf6799a4,0xbf84416c,2 +np.float32,0xbe30a7f0,0xbe318898,2 +np.float32,0xc83d9,0xc83d9,2 +np.float32,0x3f05abf4,0x3f0bd447,2 +np.float32,0x7e9b018a,0x7f800000,2 +np.float32,0xbf0ed72e,0xbf165e5b,2 +np.float32,0x8011ac8c,0x8011ac8c,2 +np.float32,0xbeb7c706,0xbebbbfcb,2 +np.float32,0x803637f9,0x803637f9,2 +np.float32,0xfe787cc8,0xff800000,2 +np.float32,0x3f533d4b,0x3f6c0a50,2 +np.float32,0x3f5c0f1c,0x3f782dde,2 +np.float32,0x3f301f36,0x3f3e590d,2 +np.float32,0x2dc929,0x2dc929,2 +np.float32,0xff15018a,0xff800000,2 +np.float32,0x3f4d0c56,0x3f63afeb,2 +np.float32,0xbf7a2ae3,0xbf91f6e4,2 +np.float32,0xbe771b84,0xbe798346,2 +np.float32,0x80800000,0x80800000,2 +np.float32,0x7f5689ba,0x7f800000,2 +np.float32,0x3f1c3177,0x3f2610df,2 +np.float32,0x3f1b9664,0x3f255825,2 +np.float32,0x3f7e5066,0x3f9520d4,2 +np.float32,0xbf1935f8,0xbf2285ab,2 +np.float32,0x3f096cc7,0x3f101ef9,2 +np.float32,0x8030c180,0x8030c180,2 +np.float32,0x6627ed,0x6627ed,2 +np.float32,0x454595,0x454595,2 +np.float32,0x7de66a33,0x7f800000,2 +np.float32,0xbf800000,0xbf966cfe,2 +np.float32,0xbf35c0a8,0xbf456939,2 +np.float32,0x3f6a6266,0x3f8643e0,2 +np.float32,0x3f0cbcee,0x3f13ef6a,2 +np.float32,0x7efd1e58,0x7f800000,2 +np.float32,0xfe9a74c6,0xff800000,2 +np.float32,0x807ebe6c,0x807ebe6c,2 +np.float32,0x80656736,0x80656736,2 +np.float32,0x800e0608,0x800e0608,2 +np.float32,0xbf30e39a,0xbf3f4e00,2 +np.float32,0x802015fd,0x802015fd,2 +np.float32,0x3e3ce26d,0x3e3df519,2 +np.float32,0x7ec142ac,0x7f800000,2 +np.float32,0xbf68c9ce,0xbf851c78,2 +np.float32,0xfede8356,0xff800000,2 +np.float32,0xbf1507ce,0xbf1d978d,2 +np.float32,0x3e53914c,0x3e551374,2 +np.float32,0x7f3e1c14,0x7f800000,2 +np.float32,0x8070d2ba,0x8070d2ba,2 +np.float32,0xbf4eb793,0xbf65ecee,2 +np.float32,0x7365a6,0x7365a6,2 +np.float32,0x8045cba2,0x8045cba2,2 +np.float32,0x7e4af521,0x7f800000,2 +np.float32,0xbf228625,0xbf2da9e1,2 +np.float32,0x7ee0536c,0x7f800000,2 +np.float32,0x3e126607,0x3e12e5d5,2 +np.float32,0x80311d92,0x80311d92,2 +np.float32,0xbf386b8b,0xbf48ca54,2 +np.float32,0x7f800000,0x7f800000,2 +np.float32,0x8049ec7a,0x8049ec7a,2 +np.float32,0xbf1dfde4,0xbf2836be,2 +np.float32,0x7e719a8c,0x7f800000,2 +np.float32,0x3eb9c856,0x3ebde2e6,2 +np.float32,0xfe3efda8,0xff800000,2 +np.float32,0xbe89d60c,0xbe8b81d1,2 +np.float32,0x3eaad338,0x3eae0317,2 +np.float32,0x7f4e5217,0x7f800000,2 +np.float32,0x3e9d0f40,0x3e9f88ce,2 +np.float32,0xbe026708,0xbe02c155,2 +np.float32,0x5fc22f,0x5fc22f,2 +np.float32,0x1c4572,0x1c4572,2 +np.float32,0xbed89d96,0xbedf22c5,2 +np.float32,0xbf3debee,0xbf4fd441,2 +np.float32,0xbf465520,0xbf5ac6e5,2 +np.float32,0x3f797081,0x3f9169b3,2 +np.float32,0xbf250734,0xbf30b2aa,2 +np.float32,0x7f5068e9,0x7f800000,2 +np.float32,0x3f1b814e,0x3f253f0c,2 +np.float32,0xbf27c5d3,0xbf340b05,2 +np.float32,0x3f1b78ae,0x3f2534c8,2 +np.float32,0x8059b51a,0x8059b51a,2 +np.float32,0x8059f182,0x8059f182,2 +np.float32,0xbf1bb36e,0xbf257ab8,2 +np.float32,0x41ac35,0x41ac35,2 +np.float32,0x68f41f,0x68f41f,2 +np.float32,0xbea504dc,0xbea7e40f,2 +np.float32,0x1,0x1,2 +np.float32,0x3e96b5b0,0x3e98e542,2 +np.float32,0x7f7fffff,0x7f800000,2 +np.float32,0x3c557a80,0x3c557c0c,2 +np.float32,0x800ca3ec,0x800ca3ec,2 +np.float32,0x8077d4aa,0x8077d4aa,2 +np.float32,0x3f000af0,0x3f0572d6,2 +np.float32,0x3e0434dd,0x3e0492f8,2 +np.float32,0x7d1a710a,0x7f800000,2 +np.float32,0x3f70f996,0x3f8b15f8,2 +np.float32,0x8033391d,0x8033391d,2 +np.float32,0x11927c,0x11927c,2 +np.float32,0x7f7784be,0x7f800000,2 +np.float32,0x7acb22af,0x7f800000,2 +np.float32,0x7e8b153c,0x7f800000,2 +np.float32,0x66d402,0x66d402,2 +np.float32,0xfed6e7b0,0xff800000,2 +np.float32,0x7f6872d3,0x7f800000,2 +np.float32,0x1bd49c,0x1bd49c,2 +np.float32,0xfdc4f1b8,0xff800000,2 +np.float32,0xbed8a466,0xbedf2a33,2 +np.float32,0x7ee789,0x7ee789,2 +np.float32,0xbece94b4,0xbed43b52,2 +np.float32,0x3cf3f734,0x3cf4006f,2 +np.float32,0x7e44aa00,0x7f800000,2 +np.float32,0x7f19e99c,0x7f800000,2 +np.float32,0x806ff1bc,0x806ff1bc,2 +np.float32,0x80296934,0x80296934,2 +np.float32,0x7f463363,0x7f800000,2 +np.float32,0xbf212ac3,0xbf2c06bb,2 +np.float32,0x3dc63778,0x3dc686ba,2 +np.float32,0x7f1b4328,0x7f800000,2 +np.float32,0x6311f6,0x6311f6,2 +np.float32,0xbf6b6fb6,0xbf870751,2 +np.float32,0xbf2c44cf,0xbf399155,2 +np.float32,0x3e7a67bc,0x3e7ce887,2 +np.float32,0x7f57c5f7,0x7f800000,2 +np.float32,0x7f2bb4ff,0x7f800000,2 +np.float32,0xbe9d448e,0xbe9fc0a4,2 +np.float32,0xbf4840f0,0xbf5d4f6b,2 +np.float32,0x7f1e1176,0x7f800000,2 +np.float32,0xff76638e,0xff800000,2 +np.float32,0xff055555,0xff800000,2 +np.float32,0x3f32b82b,0x3f419834,2 +np.float32,0xff363aa8,0xff800000,2 +np.float32,0x7f737fd0,0x7f800000,2 +np.float32,0x3da5d798,0x3da60602,2 +np.float32,0x3f1cc126,0x3f26bc3e,2 +np.float32,0x7eb07541,0x7f800000,2 +np.float32,0x3f7b2ff2,0x3f92bd2a,2 +np.float32,0x474f7,0x474f7,2 +np.float32,0x7fc00000,0x7fc00000,2 +np.float32,0xff2b0a4e,0xff800000,2 +np.float32,0xfeb24f16,0xff800000,2 +np.float32,0x2cb9fc,0x2cb9fc,2 +np.float32,0x67189d,0x67189d,2 +np.float32,0x8033d854,0x8033d854,2 +np.float32,0xbe85e94c,0xbe87717a,2 +np.float32,0x80767c6c,0x80767c6c,2 +np.float32,0x7ea84d65,0x7f800000,2 +np.float32,0x3f024bc7,0x3f07fead,2 +np.float32,0xbdcb0100,0xbdcb5625,2 +np.float32,0x3f160a9e,0x3f1ec7c9,2 +np.float32,0xff1734c8,0xff800000,2 +np.float32,0x7f424d5e,0x7f800000,2 +np.float32,0xbf75b215,0xbf8e9862,2 +np.float32,0x3f262a42,0x3f3214c4,2 +np.float32,0xbf4cfb53,0xbf639927,2 +np.float32,0x3f4ac8b8,0x3f60aa7c,2 +np.float32,0x3e90e593,0x3e92d6b3,2 +np.float32,0xbf66bccf,0xbf83a2d8,2 +np.float32,0x7d3d851a,0x7f800000,2 +np.float32,0x7bac783c,0x7f800000,2 +np.float32,0x8001c626,0x8001c626,2 +np.float32,0xbdffd480,0xbe003f7b,2 +np.float32,0x7f6680bf,0x7f800000,2 +np.float32,0xbecf448e,0xbed4f9bb,2 +np.float32,0x584c7,0x584c7,2 +np.float32,0x3f3e8ea0,0x3f50a5fb,2 +np.float32,0xbf5a5f04,0xbf75d56e,2 +np.float32,0x8065ae47,0x8065ae47,2 +np.float32,0xbf48dce3,0xbf5e1dba,2 +np.float32,0xbe8dae2e,0xbe8f7ed8,2 +np.float32,0x3f7ca6ab,0x3f93dace,2 +np.float32,0x4c3e81,0x4c3e81,2 +np.float32,0x80000000,0x80000000,2 +np.float32,0x3ee1f7d9,0x3ee96033,2 +np.float32,0x80588c6f,0x80588c6f,2 +np.float32,0x5ba34e,0x5ba34e,2 +np.float32,0x80095d28,0x80095d28,2 +np.float32,0xbe7ba198,0xbe7e2bdd,2 +np.float32,0xbe0bdcb4,0xbe0c4c22,2 +np.float32,0x1776f7,0x1776f7,2 +np.float32,0x80328b2a,0x80328b2a,2 +np.float32,0x3e978d37,0x3e99c63e,2 +np.float32,0x7ed50906,0x7f800000,2 +np.float32,0x3f776a54,0x3f8fe2bd,2 +np.float32,0xbed624c4,0xbedc7120,2 +np.float32,0x7f0b6a31,0x7f800000,2 +np.float32,0x7eb13913,0x7f800000,2 +np.float32,0xbe733684,0xbe758190,2 +np.float32,0x80016474,0x80016474,2 +np.float32,0x7a51ee,0x7a51ee,2 +np.float32,0x3f6cb91e,0x3f87f729,2 +np.float32,0xbd99b050,0xbd99d540,2 +np.float32,0x7c6e3cba,0x7f800000,2 +np.float32,0xbf00179a,0xbf05811e,2 +np.float32,0x3e609b29,0x3e626954,2 +np.float32,0xff3fd71a,0xff800000,2 +np.float32,0x5d8c2,0x5d8c2,2 +np.float32,0x7ee93662,0x7f800000,2 +np.float32,0x4b0b31,0x4b0b31,2 +np.float32,0x3ec243b7,0x3ec6f594,2 +np.float32,0x804d60f1,0x804d60f1,2 +np.float32,0xbf0cb784,0xbf13e929,2 +np.float32,0x3f13b74d,0x3f1c0cee,2 +np.float32,0xfe37cb64,0xff800000,2 +np.float32,0x1a88,0x1a88,2 +np.float32,0x3e22a472,0x3e2353ba,2 +np.float32,0x7f07d6a0,0x7f800000,2 +np.float32,0x3f78f435,0x3f910bb5,2 +np.float32,0x555a4a,0x555a4a,2 +np.float32,0x3e306c1f,0x3e314be3,2 +np.float32,0x8005877c,0x8005877c,2 +np.float32,0x4df389,0x4df389,2 +np.float32,0x8069ffc7,0x8069ffc7,2 +np.float32,0x3f328f24,0x3f4164c6,2 +np.float32,0x53a31b,0x53a31b,2 +np.float32,0xbe4d6768,0xbe4ec8be,2 +np.float32,0x7fa00000,0x7fe00000,2 +np.float32,0x3f484c1b,0x3f5d5e2f,2 +np.float32,0x8038be05,0x8038be05,2 +np.float32,0x58ac0f,0x58ac0f,2 +np.float32,0x7ed7fb72,0x7f800000,2 +np.float32,0x5a22e1,0x5a22e1,2 +np.float32,0xbebb7394,0xbebfaad6,2 +np.float32,0xbda98160,0xbda9b2ef,2 +np.float32,0x7f3e5c42,0x7f800000,2 +np.float32,0xfed204ae,0xff800000,2 +np.float32,0xbf5ef782,0xbf7c3ec5,2 +np.float32,0xbef7a0a8,0xbf00b292,2 +np.float32,0xfee6e176,0xff800000,2 +np.float32,0xfe121140,0xff800000,2 +np.float32,0xfe9e13be,0xff800000,2 +np.float32,0xbf3c98b1,0xbf4e2003,2 +np.float32,0x77520d,0x77520d,2 +np.float32,0xf17b2,0xf17b2,2 +np.float32,0x724d2f,0x724d2f,2 +np.float32,0x7eb326f5,0x7f800000,2 +np.float32,0x3edd6bf2,0x3ee4636e,2 +np.float32,0x350f57,0x350f57,2 +np.float32,0xff7d4435,0xff800000,2 +np.float32,0x802b2b9d,0x802b2b9d,2 +np.float32,0xbf7fbeee,0xbf963acf,2 +np.float32,0x804f3100,0x804f3100,2 +np.float32,0x7c594a71,0x7f800000,2 +np.float32,0x3ef49340,0x3efdfbb6,2 +np.float32,0x2e0659,0x2e0659,2 +np.float32,0x8006d5fe,0x8006d5fe,2 +np.float32,0xfd2a00b0,0xff800000,2 +np.float32,0xbee1c016,0xbee922ed,2 +np.float32,0x3e3b7de8,0x3e3c8a8b,2 +np.float32,0x805e6bba,0x805e6bba,2 +np.float32,0x1a7da2,0x1a7da2,2 +np.float32,0x6caba4,0x6caba4,2 +np.float32,0x802f7eab,0x802f7eab,2 +np.float32,0xff68b16b,0xff800000,2 +np.float32,0x8064f5e5,0x8064f5e5,2 +np.float32,0x2e39b4,0x2e39b4,2 +np.float32,0x800000,0x800000,2 +np.float32,0xfd0334c0,0xff800000,2 +np.float32,0x3e952fc4,0x3e974e7e,2 +np.float32,0x80057d33,0x80057d33,2 +np.float32,0x3ed3ddc4,0x3ed9f6f1,2 +np.float32,0x3f74ce18,0x3f8dedf4,2 +np.float32,0xff6bb7c0,0xff800000,2 +np.float32,0xff43bc21,0xff800000,2 +np.float32,0x80207570,0x80207570,2 +np.float32,0x7e1dda75,0x7f800000,2 +np.float32,0x3efe335c,0x3f0462ff,2 +np.float32,0xbf252c0c,0xbf30df70,2 +np.float32,0x3ef4b8e3,0x3efe25ba,2 +np.float32,0x7c33938d,0x7f800000,2 +np.float32,0x3eb1593c,0x3eb4ea95,2 +np.float32,0xfe1d0068,0xff800000,2 +np.float32,0xbf10da9b,0xbf18b551,2 +np.float32,0xfeb65748,0xff800000,2 +np.float32,0xfe8c6014,0xff800000,2 +np.float32,0x3f0503e2,0x3f0b14e3,2 +np.float32,0xfe5e5248,0xff800000,2 +np.float32,0xbd10afa0,0xbd10b754,2 +np.float32,0xff64b609,0xff800000,2 +np.float32,0xbf674a96,0xbf84089c,2 +np.float32,0x7f5d200d,0x7f800000,2 +np.float32,0x3cf44900,0x3cf45245,2 +np.float32,0x8044445a,0x8044445a,2 +np.float32,0xff35b676,0xff800000,2 +np.float32,0x806452cd,0x806452cd,2 +np.float32,0xbf2930fb,0xbf35c7b4,2 +np.float32,0x7e500617,0x7f800000,2 +np.float32,0x543719,0x543719,2 +np.float32,0x3ed11068,0x3ed6ec1d,2 +np.float32,0xbd8db068,0xbd8dcd59,2 +np.float32,0x3ede62c8,0x3ee571d0,2 +np.float32,0xbf00a410,0xbf061f9c,2 +np.float32,0xbf44fa39,0xbf58ff5b,2 +np.float32,0x3f1c3114,0x3f261069,2 +np.float32,0xbdea6210,0xbdeae521,2 +np.float32,0x80059f6d,0x80059f6d,2 +np.float32,0xbdba15f8,0xbdba578c,2 +np.float32,0x6d8a61,0x6d8a61,2 +np.float32,0x6f5428,0x6f5428,2 +np.float32,0x18d0e,0x18d0e,2 +np.float32,0x50e131,0x50e131,2 +np.float32,0x3f2f52be,0x3f3d5a7e,2 +np.float32,0x7399d8,0x7399d8,2 +np.float32,0x106524,0x106524,2 +np.float32,0x7ebf1c53,0x7f800000,2 +np.float32,0x80276458,0x80276458,2 +np.float32,0x3ebbde67,0x3ec01ceb,2 +np.float32,0x80144d9d,0x80144d9d,2 +np.float32,0x8017ea6b,0x8017ea6b,2 +np.float32,0xff38f201,0xff800000,2 +np.float32,0x7f2daa82,0x7f800000,2 +np.float32,0x3f3cb7c7,0x3f4e47ed,2 +np.float32,0x7f08c779,0x7f800000,2 +np.float32,0xbecc907a,0xbed20cec,2 +np.float32,0x7d440002,0x7f800000,2 +np.float32,0xbd410d80,0xbd411fcd,2 +np.float32,0x3d63ae07,0x3d63cc0c,2 +np.float32,0x805a9c13,0x805a9c13,2 +np.float32,0x803bdcdc,0x803bdcdc,2 +np.float32,0xbe88b354,0xbe8a5497,2 +np.float32,0x3f4eaf43,0x3f65e1c2,2 +np.float32,0x3f15e5b8,0x3f1e9c60,2 +np.float32,0x3e8a870c,0x3e8c394e,2 +np.float32,0x7e113de9,0x7f800000,2 +np.float32,0x7ee5ba41,0x7f800000,2 +np.float32,0xbe73d178,0xbe7620eb,2 +np.float32,0xfe972e6a,0xff800000,2 +np.float32,0xbf65567d,0xbf82a25a,2 +np.float32,0x3f38247e,0x3f487010,2 +np.float32,0xbece1c62,0xbed3b918,2 +np.float32,0x442c8d,0x442c8d,2 +np.float32,0x2dc52,0x2dc52,2 +np.float32,0x802ed923,0x802ed923,2 +np.float32,0x788cf8,0x788cf8,2 +np.float32,0x8024888e,0x8024888e,2 +np.float32,0x3f789bde,0x3f90c8fc,2 +np.float32,0x3f5de620,0x3f7abf88,2 +np.float32,0x3f0ffc45,0x3f17b2a7,2 +np.float32,0xbf709678,0xbf8accd4,2 +np.float32,0x12181f,0x12181f,2 +np.float32,0xfe54bbe4,0xff800000,2 +np.float32,0x7f1daba0,0x7f800000,2 +np.float32,0xbf6226df,0xbf805e3c,2 +np.float32,0xbd120610,0xbd120dfb,2 +np.float32,0x7f75e951,0x7f800000,2 +np.float32,0x80068048,0x80068048,2 +np.float32,0x45f04a,0x45f04a,2 +np.float32,0xff4c4f58,0xff800000,2 +np.float32,0x311604,0x311604,2 +np.float32,0x805e809c,0x805e809c,2 +np.float32,0x3d1d62c0,0x3d1d6caa,2 +np.float32,0x7f14ccf9,0x7f800000,2 +np.float32,0xff10017c,0xff800000,2 +np.float32,0xbf43ec48,0xbf579df4,2 +np.float32,0xff64da57,0xff800000,2 +np.float32,0x7f0622c5,0x7f800000,2 +np.float32,0x7f5460cd,0x7f800000,2 +np.float32,0xff0ef1c6,0xff800000,2 +np.float32,0xbece1146,0xbed3ad13,2 +np.float32,0x3f4d457f,0x3f63fc70,2 +np.float32,0xbdc1da28,0xbdc2244b,2 +np.float32,0xbe46d3f4,0xbe481463,2 +np.float32,0xff36b3d6,0xff800000,2 +np.float32,0xbec2e76c,0xbec7a540,2 +np.float32,0x8078fb81,0x8078fb81,2 +np.float32,0x7ec819cb,0x7f800000,2 +np.float32,0x39c4d,0x39c4d,2 +np.float32,0xbe8cddc2,0xbe8ea670,2 +np.float32,0xbf36dffb,0xbf46d48b,2 +np.float32,0xbf2302a3,0xbf2e4065,2 +np.float32,0x3e7b34a2,0x3e7dbb9a,2 +np.float32,0x3e3d87e1,0x3e3e9d62,2 +np.float32,0x7f3c94b1,0x7f800000,2 +np.float32,0x80455a85,0x80455a85,2 +np.float32,0xfd875568,0xff800000,2 +np.float32,0xbf618103,0xbf7fd1c8,2 +np.float32,0xbe332e3c,0xbe3418ac,2 +np.float32,0x80736b79,0x80736b79,2 +np.float32,0x3f705d9a,0x3f8aa2e6,2 +np.float32,0xbf3a36d2,0xbf4b134b,2 +np.float32,0xfddc55c0,0xff800000,2 +np.float32,0x805606fd,0x805606fd,2 +np.float32,0x3f4f0bc4,0x3f665e25,2 +np.float32,0xfebe7494,0xff800000,2 +np.float32,0xff0c541b,0xff800000,2 +np.float32,0xff0b8e7f,0xff800000,2 +np.float32,0xbcc51640,0xbcc51b1e,2 +np.float32,0x7ec1c4d0,0x7f800000,2 +np.float32,0xfc5c8e00,0xff800000,2 +np.float32,0x7f48d682,0x7f800000,2 +np.float32,0x7d5c7d8d,0x7f800000,2 +np.float32,0x8052ed03,0x8052ed03,2 +np.float32,0x7d4db058,0x7f800000,2 +np.float32,0xff3a65ee,0xff800000,2 +np.float32,0x806eeb93,0x806eeb93,2 +np.float32,0x803f9733,0x803f9733,2 +np.float32,0xbf2d1388,0xbf3a90e3,2 +np.float32,0x68e260,0x68e260,2 +np.float32,0x3e47a69f,0x3e48eb0e,2 +np.float32,0x3f0c4623,0x3f136646,2 +np.float32,0x3f37a831,0x3f47d249,2 +np.float32,0xff153a0c,0xff800000,2 +np.float32,0x2e8086,0x2e8086,2 +np.float32,0xc3f5e,0xc3f5e,2 +np.float32,0x7f31dc14,0x7f800000,2 +np.float32,0xfee37d68,0xff800000,2 +np.float32,0x711d4,0x711d4,2 +np.float32,0x7ede2ce4,0x7f800000,2 +np.float32,0xbf5d76d0,0xbf7a23d0,2 +np.float32,0xbe2b9eb4,0xbe2c6cac,2 +np.float32,0x2b14d7,0x2b14d7,2 +np.float32,0x3ea1db72,0x3ea4910e,2 +np.float32,0x7f3f03f7,0x7f800000,2 +np.float32,0x92de5,0x92de5,2 +np.float32,0x80322e1b,0x80322e1b,2 +np.float32,0xbf5eb214,0xbf7bdd55,2 +np.float32,0xbf21bf87,0xbf2cba14,2 +np.float32,0xbf5d4b78,0xbf79e73a,2 +np.float32,0xbc302840,0xbc30291e,2 +np.float32,0xfee567c6,0xff800000,2 +np.float32,0x7f70ee14,0x7f800000,2 +np.float32,0x7e5c4b33,0x7f800000,2 +np.float32,0x3f1e7b64,0x3f28ccfd,2 +np.float32,0xbf6309f7,0xbf80ff3e,2 +np.float32,0x1c2fe3,0x1c2fe3,2 +np.float32,0x8e78d,0x8e78d,2 +np.float32,0x7f2fce73,0x7f800000,2 +np.float32,0x7f25f690,0x7f800000,2 +np.float32,0x8074cba5,0x8074cba5,2 +np.float32,0x16975f,0x16975f,2 +np.float32,0x8012cf5c,0x8012cf5c,2 +np.float32,0x7da72138,0x7f800000,2 +np.float32,0xbf563f35,0xbf7025be,2 +np.float32,0x3f69d3f5,0x3f85dcbe,2 +np.float32,0xbf15c148,0xbf1e7184,2 +np.float32,0xbe7a077c,0xbe7c8564,2 +np.float32,0x3ebb6ef1,0x3ebfa5e3,2 +np.float32,0xbe41fde4,0xbe43277b,2 +np.float32,0x7f10b479,0x7f800000,2 +np.float32,0x3e021ace,0x3e02747d,2 +np.float32,0x3e93d984,0x3e95e9be,2 +np.float32,0xfe17e924,0xff800000,2 +np.float32,0xfe21a7cc,0xff800000,2 +np.float32,0x8019b660,0x8019b660,2 +np.float32,0x7e954631,0x7f800000,2 +np.float32,0x7e7330d1,0x7f800000,2 +np.float32,0xbe007d98,0xbe00d3fb,2 +np.float32,0x3ef3870e,0x3efcd077,2 +np.float32,0x7f5bbde8,0x7f800000,2 +np.float32,0x14a5b3,0x14a5b3,2 +np.float32,0x3e84d23f,0x3e8650e8,2 +np.float32,0x80763017,0x80763017,2 +np.float32,0xfe871f36,0xff800000,2 +np.float32,0x7ed43150,0x7f800000,2 +np.float32,0x3cc44547,0x3cc44a16,2 +np.float32,0x3ef0c0fa,0x3ef9b97d,2 +np.float32,0xbede9944,0xbee5ad86,2 +np.float32,0xbf10f0b2,0xbf18cf0a,2 +np.float32,0x3ecdaa78,0x3ed33dd9,2 +np.float32,0x3f7cc058,0x3f93ee6b,2 +np.float32,0x2d952f,0x2d952f,2 +np.float32,0x3f2cf2de,0x3f3a687a,2 +np.float32,0x8029b33c,0x8029b33c,2 +np.float32,0xbf22c737,0xbf2df888,2 +np.float32,0xff53c84a,0xff800000,2 +np.float32,0x40a509,0x40a509,2 +np.float32,0x56abce,0x56abce,2 +np.float32,0xff7fffff,0xff800000,2 +np.float32,0xbf3e67f6,0xbf50741c,2 +np.float32,0xfde67580,0xff800000,2 +np.float32,0x3f103e9b,0x3f17ffc7,2 +np.float32,0x3f3f7232,0x3f51cbe2,2 +np.float32,0x803e6d78,0x803e6d78,2 +np.float32,0x3a61da,0x3a61da,2 +np.float32,0xbc04de80,0xbc04dedf,2 +np.float32,0x7f1e7c52,0x7f800000,2 +np.float32,0x8058ee88,0x8058ee88,2 +np.float32,0x806dd660,0x806dd660,2 +np.float32,0x7e4af9,0x7e4af9,2 +np.float32,0x80702d27,0x80702d27,2 +np.float32,0x802cdad1,0x802cdad1,2 +np.float32,0x3e9b5c23,0x3e9dc149,2 +np.float32,0x7f076e89,0x7f800000,2 +np.float32,0x7f129d68,0x7f800000,2 +np.float32,0x7f6f0b0a,0x7f800000,2 +np.float32,0x7eafafb5,0x7f800000,2 +np.float32,0xbf2ef2ca,0xbf3ce332,2 +np.float32,0xff34c000,0xff800000,2 +np.float32,0x7f559274,0x7f800000,2 +np.float32,0xfed08556,0xff800000,2 +np.float32,0xbf014621,0xbf06d6ad,2 +np.float32,0xff23086a,0xff800000,2 +np.float32,0x6cb33f,0x6cb33f,2 +np.float32,0xfe6e3ffc,0xff800000,2 +np.float32,0x3e6bbec0,0x3e6dd546,2 +np.float32,0x8036afa6,0x8036afa6,2 +np.float32,0xff800000,0xff800000,2 +np.float32,0x3e0ed05c,0x3e0f46ff,2 +np.float32,0x3ec9215c,0x3ece57e6,2 +np.float32,0xbf449fa4,0xbf5888aa,2 +np.float32,0xff2c6640,0xff800000,2 +np.float32,0x7f08f4a7,0x7f800000,2 +np.float32,0xbf4f63e5,0xbf66d4c1,2 +np.float32,0x3f800000,0x3f966cfe,2 +np.float32,0xfe86c7d2,0xff800000,2 +np.float32,0x3f63f969,0x3f81a970,2 +np.float32,0xbd7022d0,0xbd704609,2 +np.float32,0xbead906c,0xbeb0e853,2 +np.float32,0x7ef149ee,0x7f800000,2 +np.float32,0xff0b9ff7,0xff800000,2 +np.float32,0x3f38380d,0x3f4888e7,2 +np.float32,0x3ef3a3e2,0x3efcf09e,2 +np.float32,0xff616477,0xff800000,2 +np.float32,0x3f3f83e4,0x3f51e2c3,2 +np.float32,0xbf79963c,0xbf918642,2 +np.float32,0x801416f4,0x801416f4,2 +np.float32,0xff75ce6d,0xff800000,2 +np.float32,0xbdbf3588,0xbdbf7cad,2 +np.float32,0xbe6ea938,0xbe70d3dc,2 +np.float32,0x8066f977,0x8066f977,2 +np.float32,0x3f5b5362,0x3f7728aa,2 +np.float32,0xbf72052c,0xbf8bdbd8,2 +np.float32,0xbe21ed74,0xbe229a6f,2 +np.float32,0x8062d19c,0x8062d19c,2 +np.float32,0x3ed8d01f,0x3edf59e6,2 +np.float32,0x803ed42b,0x803ed42b,2 +np.float32,0xbe099a64,0xbe0a0481,2 +np.float32,0xbe173eb4,0xbe17cba2,2 +np.float32,0xbebdcf02,0xbec22faf,2 +np.float32,0x7e3ff29e,0x7f800000,2 +np.float32,0x367c92,0x367c92,2 +np.float32,0xbf5c9db8,0xbf78f4a4,2 +np.float32,0xff0b49ea,0xff800000,2 +np.float32,0x3f4f9bc4,0x3f672001,2 +np.float32,0x85d4a,0x85d4a,2 +np.float32,0x80643e33,0x80643e33,2 +np.float32,0x8013aabd,0x8013aabd,2 +np.float32,0xff6997c3,0xff800000,2 +np.float32,0x3f4dd43c,0x3f64bbb6,2 +np.float32,0xff13bbb9,0xff800000,2 +np.float32,0x3f34efa2,0x3f446187,2 +np.float32,0x3e4b2f10,0x3e4c850d,2 +np.float32,0xfef695c6,0xff800000,2 +np.float32,0x7f7e0057,0x7f800000,2 +np.float32,0x3f6e1b9c,0x3f88fa40,2 +np.float32,0x806e46cf,0x806e46cf,2 +np.float32,0x3f15a88a,0x3f1e546c,2 +np.float32,0xbd2de7d0,0xbd2df530,2 +np.float32,0xbf63cae0,0xbf818854,2 +np.float32,0xbdc3e1a0,0xbdc42e1e,2 +np.float32,0xbf11a038,0xbf199b98,2 +np.float32,0xbec13706,0xbec5d56b,2 +np.float32,0x3f1c5f54,0x3f26478d,2 +np.float32,0x3e9ea97e,0x3ea136b4,2 +np.float32,0xfeb5a508,0xff800000,2 +np.float32,0x7f4698f4,0x7f800000,2 +np.float32,0xff51ee2c,0xff800000,2 +np.float32,0xff5994df,0xff800000,2 +np.float32,0x4b9fb9,0x4b9fb9,2 +np.float32,0xfda10d98,0xff800000,2 +np.float32,0x525555,0x525555,2 +np.float32,0x7ed571ef,0x7f800000,2 +np.float32,0xbf600d18,0xbf7dc50c,2 +np.float32,0x3ec674ca,0x3ecb768b,2 +np.float32,0x3cb69115,0x3cb694f3,2 +np.float32,0x7eac75f2,0x7f800000,2 +np.float32,0x804d4d75,0x804d4d75,2 +np.float32,0xfed5292e,0xff800000,2 +np.float32,0x800ed06a,0x800ed06a,2 +np.float32,0xfec37584,0xff800000,2 +np.float32,0x3ef96ac7,0x3f01b326,2 +np.float32,0x42f743,0x42f743,2 +np.float32,0x3f56f442,0x3f711e39,2 +np.float32,0xbf7ea726,0xbf956375,2 +np.float32,0x806c7202,0x806c7202,2 +np.float32,0xbd8ee980,0xbd8f0733,2 +np.float32,0xbdf2e930,0xbdf37b18,2 +np.float32,0x3f103910,0x3f17f955,2 +np.float32,0xff123e8f,0xff800000,2 +np.float32,0x806e4b5d,0x806e4b5d,2 +np.float32,0xbf4f3bfc,0xbf669f07,2 +np.float32,0xbf070c16,0xbf0d6609,2 +np.float32,0xff00e0ba,0xff800000,2 +np.float32,0xff49d828,0xff800000,2 +np.float32,0x7e47f04a,0x7f800000,2 +np.float32,0x7e984dac,0x7f800000,2 +np.float32,0x3f77473c,0x3f8fc858,2 +np.float32,0x3f017439,0x3f070ac8,2 +np.float32,0x118417,0x118417,2 +np.float32,0xbcf7a2c0,0xbcf7ac68,2 +np.float32,0xfee46fee,0xff800000,2 +np.float32,0x3e42a648,0x3e43d2e9,2 +np.float32,0x80131916,0x80131916,2 +np.float32,0x806209d3,0x806209d3,2 +np.float32,0x807c1f12,0x807c1f12,2 +np.float32,0x2f3696,0x2f3696,2 +np.float32,0xff28722b,0xff800000,2 +np.float32,0x7f1416a1,0x7f800000,2 +np.float32,0x8054e7a1,0x8054e7a1,2 +np.float32,0xbddc39a0,0xbddca656,2 +np.float32,0x7dc60175,0x7f800000,2 +np.float64,0x7fd0ae584da15cb0,0x7ff0000000000000,2 +np.float64,0x7fd41d68e5283ad1,0x7ff0000000000000,2 +np.float64,0x7fe93073bb7260e6,0x7ff0000000000000,2 +np.float64,0x3fb4fd19d229fa34,0x3fb5031f57dbac0f,2 +np.float64,0x85609ce10ac2,0x85609ce10ac2,2 +np.float64,0xbfd7aa12ccaf5426,0xbfd8351003a320e2,2 +np.float64,0x8004487c9b4890fa,0x8004487c9b4890fa,2 +np.float64,0x7fe7584cfd2eb099,0x7ff0000000000000,2 +np.float64,0x800ea8edc6dd51dc,0x800ea8edc6dd51dc,2 +np.float64,0x3fe0924aa5a12495,0x3fe15276e271c6dc,2 +np.float64,0x3feb1abf6d36357f,0x3fee76b4d3d06964,2 +np.float64,0x3fa8c14534318280,0x3fa8c3bd5ce5923c,2 +np.float64,0x800b9f5915d73eb3,0x800b9f5915d73eb3,2 +np.float64,0xffc05aaa7820b554,0xfff0000000000000,2 +np.float64,0x800157eda8c2afdc,0x800157eda8c2afdc,2 +np.float64,0xffe8d90042b1b200,0xfff0000000000000,2 +np.float64,0x3feda02ea93b405d,0x3ff1057e61d08d59,2 +np.float64,0xffd03b7361a076e6,0xfff0000000000000,2 +np.float64,0x3fe1a8ecd7e351da,0x3fe291eda9080847,2 +np.float64,0xffc5bfdff82b7fc0,0xfff0000000000000,2 +np.float64,0xbfe6fb3d386df67a,0xbfe9022c05df0565,2 +np.float64,0x7fefffffffffffff,0x7ff0000000000000,2 +np.float64,0x7fa10c340c221867,0x7ff0000000000000,2 +np.float64,0x3fe55cbf1daab97e,0x3fe6fc1648258b75,2 +np.float64,0xbfddeb5f60bbd6be,0xbfdf056d4fb5825f,2 +np.float64,0xffddb1a8213b6350,0xfff0000000000000,2 +np.float64,0xbfb20545e4240a88,0xbfb2091579375176,2 +np.float64,0x3f735ded2026bbda,0x3f735df1dad4ee3a,2 +np.float64,0xbfd1eb91efa3d724,0xbfd227c044dead61,2 +np.float64,0xffd737c588ae6f8c,0xfff0000000000000,2 +np.float64,0x3fc46818ec28d032,0x3fc47e416c4237a6,2 +np.float64,0x0,0x0,2 +np.float64,0xffb632097a2c6410,0xfff0000000000000,2 +np.float64,0xbfcb5ae84b36b5d0,0xbfcb905613af55b8,2 +np.float64,0xbfe7b926402f724c,0xbfe9f4f0be6aacc3,2 +np.float64,0x80081840b3f03082,0x80081840b3f03082,2 +np.float64,0x3fe767a656eecf4d,0x3fe98c53b4779de7,2 +np.float64,0x8005834c088b0699,0x8005834c088b0699,2 +np.float64,0x80074e92658e9d26,0x80074e92658e9d26,2 +np.float64,0x80045d60c268bac2,0x80045d60c268bac2,2 +np.float64,0xffb9aecfe8335da0,0xfff0000000000000,2 +np.float64,0x7fcad3e1cd35a7c3,0x7ff0000000000000,2 +np.float64,0xbf881853d03030c0,0xbf8818783e28fc87,2 +np.float64,0xe18c6d23c318e,0xe18c6d23c318e,2 +np.float64,0x7fcb367b8f366cf6,0x7ff0000000000000,2 +np.float64,0x5c13436cb8269,0x5c13436cb8269,2 +np.float64,0xffe5399938aa7332,0xfff0000000000000,2 +np.float64,0xbfdc45dbc3b88bb8,0xbfdd33958222c27e,2 +np.float64,0xbfd714691bae28d2,0xbfd7954edbef810b,2 +np.float64,0xbfdf18b02b3e3160,0xbfe02ad13634c651,2 +np.float64,0x8003e6f276e7cde6,0x8003e6f276e7cde6,2 +np.float64,0x3febb6b412776d68,0x3fef4f753def31f9,2 +np.float64,0x7fe016a3b4a02d46,0x7ff0000000000000,2 +np.float64,0x3fdc899ac7b91336,0x3fdd7e1cee1cdfc8,2 +np.float64,0x800219271e24324f,0x800219271e24324f,2 +np.float64,0x1529d93e2a53c,0x1529d93e2a53c,2 +np.float64,0x800d5bc827fab790,0x800d5bc827fab790,2 +np.float64,0x3e1495107c293,0x3e1495107c293,2 +np.float64,0x3fe89da0f2b13b42,0x3feb1dc1f3015ad7,2 +np.float64,0x800ba8c17b975183,0x800ba8c17b975183,2 +np.float64,0x8002dacf0265b59f,0x8002dacf0265b59f,2 +np.float64,0xffe6d0a4cc2da149,0xfff0000000000000,2 +np.float64,0x3fdf23fe82be47fc,0x3fe03126d8e2b309,2 +np.float64,0xffe41b1f1c28363e,0xfff0000000000000,2 +np.float64,0xbfd635c634ac6b8c,0xbfd6a8966da6adaa,2 +np.float64,0x800755bc08eeab79,0x800755bc08eeab79,2 +np.float64,0x800ba4c47c374989,0x800ba4c47c374989,2 +np.float64,0x7fec9f7649793eec,0x7ff0000000000000,2 +np.float64,0x7fdbf45738b7e8ad,0x7ff0000000000000,2 +np.float64,0x3f5597f07eab4,0x3f5597f07eab4,2 +np.float64,0xbfbf4599183e8b30,0xbfbf5985d8c65097,2 +np.float64,0xbf5b200580364000,0xbf5b2006501b21ae,2 +np.float64,0x7f91868370230d06,0x7ff0000000000000,2 +np.float64,0x3838e2a67071d,0x3838e2a67071d,2 +np.float64,0xffefe3ff5d3fc7fe,0xfff0000000000000,2 +np.float64,0xffe66b26d06cd64d,0xfff0000000000000,2 +np.float64,0xbfd830a571b0614a,0xbfd8c526927c742c,2 +np.float64,0x7fe8442122f08841,0x7ff0000000000000,2 +np.float64,0x800efa8c637df519,0x800efa8c637df519,2 +np.float64,0xf0026835e004d,0xf0026835e004d,2 +np.float64,0xffb11beefe2237e0,0xfff0000000000000,2 +np.float64,0x3fef9bbb327f3776,0x3ff2809f10641c32,2 +np.float64,0x350595306a0b3,0x350595306a0b3,2 +np.float64,0xf7f6538befecb,0xf7f6538befecb,2 +np.float64,0xffe36379c4a6c6f3,0xfff0000000000000,2 +np.float64,0x28b1d82e5163c,0x28b1d82e5163c,2 +np.float64,0x70a3d804e147c,0x70a3d804e147c,2 +np.float64,0xffd96c1bc9b2d838,0xfff0000000000000,2 +np.float64,0xffce8e00893d1c00,0xfff0000000000000,2 +np.float64,0x800f2bdcb25e57b9,0x800f2bdcb25e57b9,2 +np.float64,0xbfe0d9c63361b38c,0xbfe1a3eb02192b76,2 +np.float64,0xbfdc7b8711b8f70e,0xbfdd6e9db3a01e51,2 +np.float64,0x99e22ec133c46,0x99e22ec133c46,2 +np.float64,0xffeaef6ddab5dedb,0xfff0000000000000,2 +np.float64,0x7fe89c22c0f13845,0x7ff0000000000000,2 +np.float64,0x8002d5207de5aa42,0x8002d5207de5aa42,2 +np.float64,0x3fd1b13353236267,0x3fd1eb1b9345dfca,2 +np.float64,0x800ccae0a41995c1,0x800ccae0a41995c1,2 +np.float64,0x3fdbdaba38b7b574,0x3fdcbdfcbca37ce6,2 +np.float64,0x5b06d12cb60db,0x5b06d12cb60db,2 +np.float64,0xffd52262752a44c4,0xfff0000000000000,2 +np.float64,0x5a17f050b42ff,0x5a17f050b42ff,2 +np.float64,0x3d24205e7a485,0x3d24205e7a485,2 +np.float64,0x7fbed4dec63da9bd,0x7ff0000000000000,2 +np.float64,0xbfe56e9776aadd2f,0xbfe71212863c284f,2 +np.float64,0x7fea0bc952341792,0x7ff0000000000000,2 +np.float64,0x800f692d139ed25a,0x800f692d139ed25a,2 +np.float64,0xffdb63feab36c7fe,0xfff0000000000000,2 +np.float64,0x3fe1c2297fe38452,0x3fe2af21293c9571,2 +np.float64,0x7fede384747bc708,0x7ff0000000000000,2 +np.float64,0x800440169288802e,0x800440169288802e,2 +np.float64,0xffe3241eeb26483e,0xfff0000000000000,2 +np.float64,0xffe28f3879651e70,0xfff0000000000000,2 +np.float64,0xa435cbc1486d,0xa435cbc1486d,2 +np.float64,0x7fe55e08db6abc11,0x7ff0000000000000,2 +np.float64,0x1405e624280be,0x1405e624280be,2 +np.float64,0x3fd861bdf0b0c37c,0x3fd8f9d2e33e45e5,2 +np.float64,0x3feeb67cdc3d6cfa,0x3ff1d337d81d1c14,2 +np.float64,0x3fd159a10e22b342,0x3fd1903be7c2ea0c,2 +np.float64,0x3fd84626bc308c4d,0x3fd8dc373645e65b,2 +np.float64,0xffd3da81d9a7b504,0xfff0000000000000,2 +np.float64,0xbfd4a768b8294ed2,0xbfd503aa7c240051,2 +np.float64,0x3fe3059f2a660b3e,0x3fe42983e0c6bb2e,2 +np.float64,0x3fe3b8353827706a,0x3fe4fdd635c7269b,2 +np.float64,0xbfe4af0399695e07,0xbfe6277d9002b46c,2 +np.float64,0xbfd7e18a92afc316,0xbfd87066b54c4fe6,2 +np.float64,0x800432bcab48657a,0x800432bcab48657a,2 +np.float64,0x80033d609d267ac2,0x80033d609d267ac2,2 +np.float64,0x7fef5f758e7ebeea,0x7ff0000000000000,2 +np.float64,0xbfed7833dbfaf068,0xbff0e85bf45a5ebc,2 +np.float64,0x3fe2283985a45073,0x3fe325b0a9099c74,2 +np.float64,0xe820b4b3d0417,0xe820b4b3d0417,2 +np.float64,0x8003ecb72aa7d96f,0x8003ecb72aa7d96f,2 +np.float64,0xbfeab2c755b5658f,0xbfede7c83e92a625,2 +np.float64,0xbfc7b287f72f6510,0xbfc7d53ef2ffe9dc,2 +np.float64,0xffd9a41d0f33483a,0xfff0000000000000,2 +np.float64,0x3fd3a5b6e3a74b6c,0x3fd3f516f39a4725,2 +np.float64,0x800bc72091578e42,0x800bc72091578e42,2 +np.float64,0x800ff405ce9fe80c,0x800ff405ce9fe80c,2 +np.float64,0x57918600af24,0x57918600af24,2 +np.float64,0x2a5be7fa54b7e,0x2a5be7fa54b7e,2 +np.float64,0xbfdca7886bb94f10,0xbfdd9f142b5b43e4,2 +np.float64,0xbfe216993ee42d32,0xbfe3112936590995,2 +np.float64,0xbfe06bd9cf20d7b4,0xbfe126cd353ab42f,2 +np.float64,0x8003e6c31827cd87,0x8003e6c31827cd87,2 +np.float64,0x8005f37d810be6fc,0x8005f37d810be6fc,2 +np.float64,0x800715b081ae2b62,0x800715b081ae2b62,2 +np.float64,0x3fef94c35bff2986,0x3ff27b4bed2f4051,2 +np.float64,0x6f5798e0deb0,0x6f5798e0deb0,2 +np.float64,0x3fcef1f05c3de3e1,0x3fcf3f557550598f,2 +np.float64,0xbf9a91c400352380,0xbf9a92876273b85c,2 +np.float64,0x3fc9143f7f322880,0x3fc93d678c05d26b,2 +np.float64,0x78ad847af15b1,0x78ad847af15b1,2 +np.float64,0x8000fdc088c1fb82,0x8000fdc088c1fb82,2 +np.float64,0x800200fd304401fb,0x800200fd304401fb,2 +np.float64,0x7fb8ab09dc315613,0x7ff0000000000000,2 +np.float64,0x3fe949771b7292ee,0x3fec00891c3fc5a2,2 +np.float64,0xbfc54cae0e2a995c,0xbfc565e0f3d0e3af,2 +np.float64,0xffd546161e2a8c2c,0xfff0000000000000,2 +np.float64,0x800fe1d1279fc3a2,0x800fe1d1279fc3a2,2 +np.float64,0x3fd9c45301b388a8,0x3fda77fa1f4c79bf,2 +np.float64,0x7fe10ff238221fe3,0x7ff0000000000000,2 +np.float64,0xbfbc2181ae384300,0xbfbc3002229155c4,2 +np.float64,0xbfe7bbfae4ef77f6,0xbfe9f895e91f468d,2 +np.float64,0x800d3d994f7a7b33,0x800d3d994f7a7b33,2 +np.float64,0xffe6e15a896dc2b4,0xfff0000000000000,2 +np.float64,0x800e6b6c8abcd6d9,0x800e6b6c8abcd6d9,2 +np.float64,0xbfd862c938b0c592,0xbfd8faf1cdcb09db,2 +np.float64,0xffe2411f8464823e,0xfff0000000000000,2 +np.float64,0xffd0b32efaa1665e,0xfff0000000000000,2 +np.float64,0x3ac4ace475896,0x3ac4ace475896,2 +np.float64,0xf9c3a7ebf3875,0xf9c3a7ebf3875,2 +np.float64,0xdb998ba5b7332,0xdb998ba5b7332,2 +np.float64,0xbfe438a14fe87142,0xbfe5981751e4c5cd,2 +np.float64,0xbfbcf48cbc39e918,0xbfbd045d60e65d3a,2 +np.float64,0x7fde499615bc932b,0x7ff0000000000000,2 +np.float64,0x800bba269057744e,0x800bba269057744e,2 +np.float64,0x3fc9bb1ba3337638,0x3fc9e78fdb6799c1,2 +np.float64,0xffd9f974fbb3f2ea,0xfff0000000000000,2 +np.float64,0x7fcf1ad1693e35a2,0x7ff0000000000000,2 +np.float64,0x7fe5dcedd32bb9db,0x7ff0000000000000,2 +np.float64,0xeb06500bd60ca,0xeb06500bd60ca,2 +np.float64,0x7fd73e7b592e7cf6,0x7ff0000000000000,2 +np.float64,0xbfe9d91ae873b236,0xbfecc08482849bcd,2 +np.float64,0xffc85338b730a670,0xfff0000000000000,2 +np.float64,0x7fbba41eee37483d,0x7ff0000000000000,2 +np.float64,0x3fed5624fb7aac4a,0x3ff0cf9f0de1fd54,2 +np.float64,0xffe566d80d6acdb0,0xfff0000000000000,2 +np.float64,0x3fd4477884a88ef1,0x3fd49ec7acdd25a0,2 +np.float64,0x3fcb98c5fd37318c,0x3fcbcfa20e2c2712,2 +np.float64,0xffdeba71d5bd74e4,0xfff0000000000000,2 +np.float64,0x8001edc59dc3db8c,0x8001edc59dc3db8c,2 +np.float64,0x3fe6b09e896d613e,0x3fe8a3bb541ec0e3,2 +np.float64,0x3fe8694b4970d296,0x3fead94d271d05cf,2 +np.float64,0xb52c27bf6a585,0xb52c27bf6a585,2 +np.float64,0x7fcb0a21d9361443,0x7ff0000000000000,2 +np.float64,0xbfd9efc68cb3df8e,0xbfdaa7058c0ccbd1,2 +np.float64,0x8007cd170fef9a2f,0x8007cd170fef9a2f,2 +np.float64,0x3fe83325e770664c,0x3fea92c55c9d567e,2 +np.float64,0x800bd0085537a011,0x800bd0085537a011,2 +np.float64,0xffe05b9e7820b73c,0xfff0000000000000,2 +np.float64,0x3fea4ce4347499c8,0x3fed5cea9fdc541b,2 +np.float64,0x7fe08aae1921155b,0x7ff0000000000000,2 +np.float64,0x3fe7a5e7deef4bd0,0x3fe9dc2e20cfb61c,2 +np.float64,0xbfe0ccc8e6e19992,0xbfe195175f32ee3f,2 +np.float64,0xbfe8649717f0c92e,0xbfead3298974dcf0,2 +np.float64,0x7fed6c5308bad8a5,0x7ff0000000000000,2 +np.float64,0xffdbd8c7af37b190,0xfff0000000000000,2 +np.float64,0xbfb2bc4d06257898,0xbfb2c09569912839,2 +np.float64,0x3fc62eca512c5d95,0x3fc64b4251bce8f9,2 +np.float64,0xbfcae2ddbd35c5bc,0xbfcb15971fc61312,2 +np.float64,0x18d26ce831a4f,0x18d26ce831a4f,2 +np.float64,0x7fe38b279267164e,0x7ff0000000000000,2 +np.float64,0x97e1d9ab2fc3b,0x97e1d9ab2fc3b,2 +np.float64,0xbfee8e4785fd1c8f,0xbff1b52d16807627,2 +np.float64,0xbfb189b4a6231368,0xbfb18d37e83860ee,2 +np.float64,0xffd435761ea86aec,0xfff0000000000000,2 +np.float64,0x3fe6c48ebced891e,0x3fe8bcea189c3867,2 +np.float64,0x7fdadd3678b5ba6c,0x7ff0000000000000,2 +np.float64,0x7fea8f15b7b51e2a,0x7ff0000000000000,2 +np.float64,0xbff0000000000000,0xbff2cd9fc44eb982,2 +np.float64,0x80004c071120980f,0x80004c071120980f,2 +np.float64,0x8005367adfea6cf6,0x8005367adfea6cf6,2 +np.float64,0x3fbdc9139a3b9220,0x3fbdda4aba667ce5,2 +np.float64,0x7fed5ee3ad7abdc6,0x7ff0000000000000,2 +np.float64,0x51563fb2a2ac9,0x51563fb2a2ac9,2 +np.float64,0xbfba7d26ce34fa50,0xbfba894229c50ea1,2 +np.float64,0x6c10db36d821c,0x6c10db36d821c,2 +np.float64,0xbfbdaec0d03b5d80,0xbfbdbfca6ede64f4,2 +np.float64,0x800a1cbe7414397d,0x800a1cbe7414397d,2 +np.float64,0x800ae6e7f2d5cdd0,0x800ae6e7f2d5cdd0,2 +np.float64,0x3fea63d3fef4c7a8,0x3fed7c1356688ddc,2 +np.float64,0xbfde1e3a88bc3c76,0xbfdf3dfb09cc2260,2 +np.float64,0xbfd082d75a2105ae,0xbfd0b1e28c84877b,2 +np.float64,0x7fea1e5e85f43cbc,0x7ff0000000000000,2 +np.float64,0xffe2237a1a6446f4,0xfff0000000000000,2 +np.float64,0x3fd1e2be8523c57d,0x3fd21e93dfd1bbc4,2 +np.float64,0x3fd1acd428a359a8,0x3fd1e6916a42bc3a,2 +np.float64,0x61a152f0c342b,0x61a152f0c342b,2 +np.float64,0xbfc61a6b902c34d8,0xbfc6369557690ba0,2 +np.float64,0x7fd1a84b1f235095,0x7ff0000000000000,2 +np.float64,0x1c5cc7e638b9a,0x1c5cc7e638b9a,2 +np.float64,0x8008039755f0072f,0x8008039755f0072f,2 +np.float64,0x80097532d6f2ea66,0x80097532d6f2ea66,2 +np.float64,0xbfc6d979a12db2f4,0xbfc6f89777c53f8f,2 +np.float64,0x8004293ab1085276,0x8004293ab1085276,2 +np.float64,0x3fc2af5c21255eb8,0x3fc2c05dc0652554,2 +np.float64,0xbfd9a5ab87b34b58,0xbfda56d1076abc98,2 +np.float64,0xbfebd360ba77a6c2,0xbfef779fd6595f9b,2 +np.float64,0xffd5313c43aa6278,0xfff0000000000000,2 +np.float64,0xbfe994a262b32945,0xbfec64b969852ed5,2 +np.float64,0x3fce01a52e3c034a,0x3fce48324eb29c31,2 +np.float64,0x56bd74b2ad7af,0x56bd74b2ad7af,2 +np.float64,0xb84093ff70813,0xb84093ff70813,2 +np.float64,0x7fe776df946eedbe,0x7ff0000000000000,2 +np.float64,0xbfe294ac2e652958,0xbfe3a480938afa26,2 +np.float64,0x7fe741b4d0ee8369,0x7ff0000000000000,2 +np.float64,0x800b7e8a1056fd15,0x800b7e8a1056fd15,2 +np.float64,0x7fd28f1269251e24,0x7ff0000000000000,2 +np.float64,0x8009d4492e73a893,0x8009d4492e73a893,2 +np.float64,0x3fe3f27fca67e500,0x3fe543aff825e244,2 +np.float64,0x3fd12447e5a24890,0x3fd158efe43c0452,2 +np.float64,0xbfd58df0f2ab1be2,0xbfd5f6d908e3ebce,2 +np.float64,0xffc0a8e4642151c8,0xfff0000000000000,2 +np.float64,0xbfedb197787b632f,0xbff112367ec9d3e7,2 +np.float64,0xffdde07a7f3bc0f4,0xfff0000000000000,2 +np.float64,0x3fe91f3e5b723e7d,0x3febc886a1d48364,2 +np.float64,0x3fe50415236a082a,0x3fe68f43a5468d8c,2 +np.float64,0xd9a0c875b3419,0xd9a0c875b3419,2 +np.float64,0xbfee04ccf4bc099a,0xbff14f4740a114cf,2 +np.float64,0xbfd2bcc6a125798e,0xbfd30198b1e7d7ed,2 +np.float64,0xbfeb3c16f8f6782e,0xbfeea4ce47d09f58,2 +np.float64,0xffd3ba19e4a77434,0xfff0000000000000,2 +np.float64,0x8010000000000000,0x8010000000000000,2 +np.float64,0x3fdef0a642bde14d,0x3fe0146677b3a488,2 +np.float64,0x3fdc3dd0a2b87ba0,0x3fdd2abe65651487,2 +np.float64,0x3fdbb1fd47b763fb,0x3fdc915a2fd19f4b,2 +np.float64,0x7fbaa375e63546eb,0x7ff0000000000000,2 +np.float64,0x433ef8ee867e0,0x433ef8ee867e0,2 +np.float64,0xf5345475ea68b,0xf5345475ea68b,2 +np.float64,0xa126419b424c8,0xa126419b424c8,2 +np.float64,0x3fe0057248200ae5,0x3fe0b2f488339709,2 +np.float64,0xffc5e3b82f2bc770,0xfff0000000000000,2 +np.float64,0xffb215c910242b90,0xfff0000000000000,2 +np.float64,0xbfeba4ae0837495c,0xbfef3642e4b54aac,2 +np.float64,0xffbb187ebe363100,0xfff0000000000000,2 +np.float64,0x3fe4c6a496a98d49,0x3fe64440cdf06aab,2 +np.float64,0x800767a28f6ecf46,0x800767a28f6ecf46,2 +np.float64,0x3fdbed63b1b7dac8,0x3fdcd27318c0b683,2 +np.float64,0x80006d8339e0db07,0x80006d8339e0db07,2 +np.float64,0x8000b504f0416a0b,0x8000b504f0416a0b,2 +np.float64,0xbfe88055bfb100ac,0xbfeaf767bd2767b9,2 +np.float64,0x3fefe503317fca06,0x3ff2b8d4057240c8,2 +np.float64,0x7fe307538b660ea6,0x7ff0000000000000,2 +np.float64,0x944963c12892d,0x944963c12892d,2 +np.float64,0xbfd2c20b38a58416,0xbfd30717900f8233,2 +np.float64,0x7feed04e3e3da09b,0x7ff0000000000000,2 +np.float64,0x3fe639619cac72c3,0x3fe80de7b8560a8d,2 +np.float64,0x3fde066c66bc0cd9,0x3fdf237fb759a652,2 +np.float64,0xbfc56b22b52ad644,0xbfc584c267a47ebd,2 +np.float64,0x3fc710d5b12e21ab,0x3fc730d817ba0d0c,2 +np.float64,0x3fee1dfc347c3bf8,0x3ff161d9c3e15f68,2 +np.float64,0x3fde400954bc8013,0x3fdf639e5cc9e7a9,2 +np.float64,0x56e701f8adce1,0x56e701f8adce1,2 +np.float64,0xbfe33bbc89e67779,0xbfe46996b39381fe,2 +np.float64,0x7fec89e2f87913c5,0x7ff0000000000000,2 +np.float64,0xbfdad58b40b5ab16,0xbfdba098cc0ad5d3,2 +np.float64,0x3fe99c76a13338ed,0x3fec6f31bae613e7,2 +np.float64,0x3fe4242a29a84854,0x3fe57f6b45e5c0ef,2 +np.float64,0xbfe79d3199ef3a63,0xbfe9d0fb96c846ba,2 +np.float64,0x7ff8000000000000,0x7ff8000000000000,2 +np.float64,0xbfeb35a6cf766b4e,0xbfee9be4e7e943f7,2 +np.float64,0x3e047f267c091,0x3e047f267c091,2 +np.float64,0x4bf1376a97e28,0x4bf1376a97e28,2 +np.float64,0x800ef419685de833,0x800ef419685de833,2 +np.float64,0x3fe0efa61a21df4c,0x3fe1bce98baf2f0f,2 +np.float64,0x3fcc13c4d738278a,0x3fcc4d8c778bcaf7,2 +np.float64,0x800f1d291afe3a52,0x800f1d291afe3a52,2 +np.float64,0x3fd3f10e6da7e21d,0x3fd444106761ea1d,2 +np.float64,0x800706d6d76e0dae,0x800706d6d76e0dae,2 +np.float64,0xffa1ffbc9023ff80,0xfff0000000000000,2 +np.float64,0xbfe098f26d6131e5,0xbfe15a08a5f3eac0,2 +np.float64,0x3fe984f9cc7309f4,0x3fec4fcdbdb1cb9b,2 +np.float64,0x7fd7c2f1eaaf85e3,0x7ff0000000000000,2 +np.float64,0x800a8adb64f515b7,0x800a8adb64f515b7,2 +np.float64,0x80060d3ffc8c1a81,0x80060d3ffc8c1a81,2 +np.float64,0xbfec37e4aef86fc9,0xbff0029a6a1d61e2,2 +np.float64,0x800b21bcfcf6437a,0x800b21bcfcf6437a,2 +np.float64,0xbfc08facc1211f58,0xbfc09b8380ea8032,2 +np.float64,0xffebb4b52577696a,0xfff0000000000000,2 +np.float64,0x800b08096df61013,0x800b08096df61013,2 +np.float64,0x8000000000000000,0x8000000000000000,2 +np.float64,0xffd2f0c9c8a5e194,0xfff0000000000000,2 +np.float64,0xffe78b2299af1644,0xfff0000000000000,2 +np.float64,0x7fd0444794a0888e,0x7ff0000000000000,2 +np.float64,0x307c47b460f8a,0x307c47b460f8a,2 +np.float64,0xffe6b4c851ad6990,0xfff0000000000000,2 +np.float64,0xffe1877224a30ee4,0xfff0000000000000,2 +np.float64,0x48d7b5c091af7,0x48d7b5c091af7,2 +np.float64,0xbfa1dc6b1c23b8d0,0xbfa1dd5889e1b7da,2 +np.float64,0x3fe5004737ea008e,0x3fe68a9c310b08c1,2 +np.float64,0x7fec5f0742b8be0e,0x7ff0000000000000,2 +np.float64,0x3fd0a86285a150c5,0x3fd0d8b238d557fa,2 +np.float64,0x7fed60380efac06f,0x7ff0000000000000,2 +np.float64,0xeeca74dfdd94f,0xeeca74dfdd94f,2 +np.float64,0x3fda05aaa8b40b54,0x3fdabebdbf405e84,2 +np.float64,0x800e530ceb1ca61a,0x800e530ceb1ca61a,2 +np.float64,0x800b3866379670cd,0x800b3866379670cd,2 +np.float64,0xffedb3e7fa3b67cf,0xfff0000000000000,2 +np.float64,0xffdfa4c0713f4980,0xfff0000000000000,2 +np.float64,0x7fe4679e0728cf3b,0x7ff0000000000000,2 +np.float64,0xffe978611ef2f0c2,0xfff0000000000000,2 +np.float64,0x7fc9f4601f33e8bf,0x7ff0000000000000,2 +np.float64,0x3fd4942de6a9285c,0x3fd4ef6e089357dd,2 +np.float64,0x3faafe064435fc00,0x3fab0139cd6564dc,2 +np.float64,0x800d145a519a28b5,0x800d145a519a28b5,2 +np.float64,0xbfd82636f2304c6e,0xbfd8b9f75ddd2f02,2 +np.float64,0xbfdf2e975e3e5d2e,0xbfe037174280788c,2 +np.float64,0x7fd7051d7c2e0a3a,0x7ff0000000000000,2 +np.float64,0x8007933d452f267b,0x8007933d452f267b,2 +np.float64,0xb2043beb64088,0xb2043beb64088,2 +np.float64,0x3febfd9708f7fb2e,0x3fefb2ef090f18d2,2 +np.float64,0xffd9bc6bc83378d8,0xfff0000000000000,2 +np.float64,0xc10f9fd3821f4,0xc10f9fd3821f4,2 +np.float64,0x3fe3c83413a79068,0x3fe510fa1dd8edf7,2 +np.float64,0x3fbe26ccda3c4da0,0x3fbe38a892279975,2 +np.float64,0x3fcc1873103830e6,0x3fcc5257a6ae168d,2 +np.float64,0xe7e000e9cfc00,0xe7e000e9cfc00,2 +np.float64,0xffda73852bb4e70a,0xfff0000000000000,2 +np.float64,0xbfe831be19f0637c,0xbfea90f1b34da3e5,2 +np.float64,0xbfeb568f3076ad1e,0xbfeec97eebfde862,2 +np.float64,0x510a6ad0a214e,0x510a6ad0a214e,2 +np.float64,0x3fe6ba7e35ed74fc,0x3fe8b032a9a28c6a,2 +np.float64,0xffeb5cdcff76b9b9,0xfff0000000000000,2 +np.float64,0x4f0a23e89e145,0x4f0a23e89e145,2 +np.float64,0x446ec20288dd9,0x446ec20288dd9,2 +np.float64,0x7fe2521b02e4a435,0x7ff0000000000000,2 +np.float64,0x8001cd2969e39a54,0x8001cd2969e39a54,2 +np.float64,0x3fdfe90600bfd20c,0x3fe09fdcca10001c,2 +np.float64,0x7fd660c5762cc18a,0x7ff0000000000000,2 +np.float64,0xbfb11b23aa223648,0xbfb11e661949b377,2 +np.float64,0x800e025285fc04a5,0x800e025285fc04a5,2 +np.float64,0xffb180bb18230178,0xfff0000000000000,2 +np.float64,0xaaf590df55eb2,0xaaf590df55eb2,2 +np.float64,0xbfe8637d9df0c6fb,0xbfead1ba429462ec,2 +np.float64,0x7fd2577866a4aef0,0x7ff0000000000000,2 +np.float64,0xbfcfb2ab5a3f6558,0xbfd002ee87f272b9,2 +np.float64,0x7fdd64ae2f3ac95b,0x7ff0000000000000,2 +np.float64,0xffd1a502c9234a06,0xfff0000000000000,2 +np.float64,0x7fc4be4b60297c96,0x7ff0000000000000,2 +np.float64,0xbfb46b712a28d6e0,0xbfb470fca9919172,2 +np.float64,0xffdef913033df226,0xfff0000000000000,2 +np.float64,0x3fd94a3545b2946b,0x3fd9f40431ce9f9c,2 +np.float64,0x7fef88a0b6ff1140,0x7ff0000000000000,2 +np.float64,0xbfbcc81876399030,0xbfbcd7a0ab6cb388,2 +np.float64,0x800a4acfdd9495a0,0x800a4acfdd9495a0,2 +np.float64,0xffe270b3d5e4e167,0xfff0000000000000,2 +np.float64,0xbfd23f601e247ec0,0xbfd27eeca50a49eb,2 +np.float64,0x7fec6e796a78dcf2,0x7ff0000000000000,2 +np.float64,0x3fb85e0c9630bc19,0x3fb867791ccd6c72,2 +np.float64,0x7fe49fc424a93f87,0x7ff0000000000000,2 +np.float64,0xbfe75a99fbaeb534,0xbfe97ba37663de4c,2 +np.float64,0xffe85011b630a023,0xfff0000000000000,2 +np.float64,0xffe5962e492b2c5c,0xfff0000000000000,2 +np.float64,0x6f36ed4cde6de,0x6f36ed4cde6de,2 +np.float64,0x3feb72170af6e42e,0x3feeefbe6f1a2084,2 +np.float64,0x80014d8d60629b1c,0x80014d8d60629b1c,2 +np.float64,0xbfe0eb40d321d682,0xbfe1b7e31f252bf1,2 +np.float64,0x31fe305663fc7,0x31fe305663fc7,2 +np.float64,0x3fd2cd6381a59ac7,0x3fd312edc9868a4d,2 +np.float64,0xffcf0720793e0e40,0xfff0000000000000,2 +np.float64,0xbfeef1ef133de3de,0xbff1ffd5e1a3b648,2 +np.float64,0xbfd01c787aa038f0,0xbfd0482be3158a01,2 +np.float64,0x3fda3607c5b46c10,0x3fdaf3301e217301,2 +np.float64,0xffda9a9911b53532,0xfff0000000000000,2 +np.float64,0x3fc0b37c392166f8,0x3fc0bfa076f3c43e,2 +np.float64,0xbfe06591c760cb24,0xbfe11fad179ea12c,2 +np.float64,0x8006e369c20dc6d4,0x8006e369c20dc6d4,2 +np.float64,0x3fdf2912a8be5224,0x3fe033ff74b92f4d,2 +np.float64,0xffc0feb07821fd60,0xfff0000000000000,2 +np.float64,0xa4b938c949727,0xa4b938c949727,2 +np.float64,0x8008fe676571fccf,0x8008fe676571fccf,2 +np.float64,0xbfdda68459bb4d08,0xbfdeb8faab34fcbc,2 +np.float64,0xbfda18b419343168,0xbfdad360ca52ec7c,2 +np.float64,0x3febcbae35b7975c,0x3fef6cd51c9ebc15,2 +np.float64,0x3fbec615f63d8c30,0x3fbed912ba729926,2 +np.float64,0x7f99a831c8335063,0x7ff0000000000000,2 +np.float64,0x3fe663e8826cc7d1,0x3fe84330bd9aada8,2 +np.float64,0x70a9f9e6e1540,0x70a9f9e6e1540,2 +np.float64,0x8a13a5db14275,0x8a13a5db14275,2 +np.float64,0x7fc4330a3b286613,0x7ff0000000000000,2 +np.float64,0xbfe580c6136b018c,0xbfe728806cc7a99a,2 +np.float64,0x8000000000000001,0x8000000000000001,2 +np.float64,0xffec079d5df80f3a,0xfff0000000000000,2 +np.float64,0x8e1173c31c22f,0x8e1173c31c22f,2 +np.float64,0x3fe088456d21108b,0x3fe14712ca414103,2 +np.float64,0x3fe1b76f73636edf,0x3fe2a2b658557112,2 +np.float64,0xbfd4a1dd162943ba,0xbfd4fdd45cae8fb8,2 +np.float64,0x7fd60b46c8ac168d,0x7ff0000000000000,2 +np.float64,0xffe36cc3b166d987,0xfff0000000000000,2 +np.float64,0x3fdc2ae0cfb855c0,0x3fdd15f026773151,2 +np.float64,0xbfc41aa203283544,0xbfc42fd1b145fdd5,2 +np.float64,0xffed90c55fbb218a,0xfff0000000000000,2 +np.float64,0x3fe67e3a9aecfc75,0x3fe86440db65b4f6,2 +np.float64,0x7fd12dbeaba25b7c,0x7ff0000000000000,2 +np.float64,0xbfe1267c0de24cf8,0xbfe1fbb611bdf1e9,2 +np.float64,0x22e5619645cad,0x22e5619645cad,2 +np.float64,0x7fe327c72ea64f8d,0x7ff0000000000000,2 +np.float64,0x7fd2c3f545a587ea,0x7ff0000000000000,2 +np.float64,0x7fc7b689372f6d11,0x7ff0000000000000,2 +np.float64,0xc5e140bd8bc28,0xc5e140bd8bc28,2 +np.float64,0x3fccb3627a3966c5,0x3fccf11b44fa4102,2 +np.float64,0xbfd2cf725c259ee4,0xbfd315138d0e5dca,2 +np.float64,0x10000000000000,0x10000000000000,2 +np.float64,0xbfd3dfa8b627bf52,0xbfd431d17b235477,2 +np.float64,0xbfb82124e6304248,0xbfb82a4b6d9c2663,2 +np.float64,0x3fdcd590d9b9ab22,0x3fddd1d548806347,2 +np.float64,0x7fdee0cd1b3dc199,0x7ff0000000000000,2 +np.float64,0x8004ebfc60a9d7fa,0x8004ebfc60a9d7fa,2 +np.float64,0x3fe8eb818b71d704,0x3feb842679806108,2 +np.float64,0xffdd5e8fe63abd20,0xfff0000000000000,2 +np.float64,0xbfe3efcbd9e7df98,0xbfe54071436645ee,2 +np.float64,0x3fd5102557aa204b,0x3fd57203d31a05b8,2 +np.float64,0x3fe6318af7ec6316,0x3fe8041a177cbf96,2 +np.float64,0x3fdf3cecdabe79da,0x3fe03f2084ffbc78,2 +np.float64,0x7fe0ab6673a156cc,0x7ff0000000000000,2 +np.float64,0x800037d5c6c06fac,0x800037d5c6c06fac,2 +np.float64,0xffce58b86a3cb170,0xfff0000000000000,2 +np.float64,0xbfe3455d6ce68abb,0xbfe475034cecb2b8,2 +np.float64,0x991b663d3236d,0x991b663d3236d,2 +np.float64,0x3fda82d37c3505a7,0x3fdb46973da05c12,2 +np.float64,0x3f9b736fa036e6df,0x3f9b74471c234411,2 +np.float64,0x8001c96525e392cb,0x8001c96525e392cb,2 +np.float64,0x7ff0000000000000,0x7ff0000000000000,2 +np.float64,0xbfaf59122c3eb220,0xbfaf5e15f8b272b0,2 +np.float64,0xbf9aa7d288354fa0,0xbf9aa897d2a40cb5,2 +np.float64,0x8004a43428694869,0x8004a43428694869,2 +np.float64,0x7feead476dbd5a8e,0x7ff0000000000000,2 +np.float64,0xffca150f81342a20,0xfff0000000000000,2 +np.float64,0x80047ec3bc88fd88,0x80047ec3bc88fd88,2 +np.float64,0xbfee3e5b123c7cb6,0xbff179c8b8334278,2 +np.float64,0x3fd172359f22e46b,0x3fd1a9ba6b1420a1,2 +np.float64,0x3fe8e5e242f1cbc5,0x3feb7cbcaefc4d5c,2 +np.float64,0x8007fb059a6ff60c,0x8007fb059a6ff60c,2 +np.float64,0xe3899e71c7134,0xe3899e71c7134,2 +np.float64,0x7fe3b98326a77305,0x7ff0000000000000,2 +np.float64,0x7fec4e206cb89c40,0x7ff0000000000000,2 +np.float64,0xbfa3b012c4276020,0xbfa3b150c13b3cc5,2 +np.float64,0xffefffffffffffff,0xfff0000000000000,2 +np.float64,0xffe28a5b9aa514b6,0xfff0000000000000,2 +np.float64,0xbfd76a6cc2aed4da,0xbfd7f10f4d04e7f6,2 +np.float64,0xbc2b1c0178564,0xbc2b1c0178564,2 +np.float64,0x6d9d444adb3a9,0x6d9d444adb3a9,2 +np.float64,0xbfdcadd368395ba6,0xbfdda6037b5c429c,2 +np.float64,0x3fe11891fde23124,0x3fe1ebc1c204b14b,2 +np.float64,0x3fdd66c3eebacd88,0x3fde72526b5304c4,2 +np.float64,0xbfe79d85612f3b0b,0xbfe9d1673bd1f6d6,2 +np.float64,0x3fed60abdabac158,0x3ff0d7426b3800a2,2 +np.float64,0xbfb0ffa54021ff48,0xbfb102d81073a9f0,2 +np.float64,0xd2452af5a48a6,0xd2452af5a48a6,2 +np.float64,0xf4b835c1e971,0xf4b835c1e971,2 +np.float64,0x7e269cdafc4d4,0x7e269cdafc4d4,2 +np.float64,0x800097a21d812f45,0x800097a21d812f45,2 +np.float64,0x3fdfcc85e8bf990c,0x3fe08fcf770fd456,2 +np.float64,0xd8d53155b1aa6,0xd8d53155b1aa6,2 +np.float64,0x7fb8ed658831daca,0x7ff0000000000000,2 +np.float64,0xbfec865415b90ca8,0xbff03a4584d719f9,2 +np.float64,0xffd8cda62a319b4c,0xfff0000000000000,2 +np.float64,0x273598d84e6b4,0x273598d84e6b4,2 +np.float64,0x7fd566b5c32acd6b,0x7ff0000000000000,2 +np.float64,0xff61d9d48023b400,0xfff0000000000000,2 +np.float64,0xbfec5c3bf4f8b878,0xbff01c594243337c,2 +np.float64,0x7fd1be0561a37c0a,0x7ff0000000000000,2 +np.float64,0xffeaee3271b5dc64,0xfff0000000000000,2 +np.float64,0x800c0e1931b81c33,0x800c0e1931b81c33,2 +np.float64,0xbfad1171583a22e0,0xbfad1570e5c466d2,2 +np.float64,0x7fd783b0fe2f0761,0x7ff0000000000000,2 +np.float64,0x7fc39903e6273207,0x7ff0000000000000,2 +np.float64,0xffe00003c5600007,0xfff0000000000000,2 +np.float64,0x35a7b9c06b50,0x35a7b9c06b50,2 +np.float64,0x7fee441a22bc8833,0x7ff0000000000000,2 +np.float64,0xff6e47fbc03c9000,0xfff0000000000000,2 +np.float64,0xbfd3c3c9c8a78794,0xbfd41499b1912534,2 +np.float64,0x82c9c87f05939,0x82c9c87f05939,2 +np.float64,0xbfedeb0fe4fbd620,0xbff13c573ce9d3d0,2 +np.float64,0x2b79298656f26,0x2b79298656f26,2 +np.float64,0xbf5ee44f003dc800,0xbf5ee4503353c0ba,2 +np.float64,0xbfe1dd264e63ba4c,0xbfe2ce68116c7bf6,2 +np.float64,0x3fece10b7579c217,0x3ff07b21b11799c6,2 +np.float64,0x3fba47143a348e28,0x3fba52e601adf24c,2 +np.float64,0xffe9816e7a7302dc,0xfff0000000000000,2 +np.float64,0x8009a8047fd35009,0x8009a8047fd35009,2 +np.float64,0x800ac28e4e95851d,0x800ac28e4e95851d,2 +np.float64,0x80093facf4f27f5a,0x80093facf4f27f5a,2 +np.float64,0x3ff0000000000000,0x3ff2cd9fc44eb982,2 +np.float64,0x3fe76a9857eed530,0x3fe99018a5895a4f,2 +np.float64,0xbfd13c59a3a278b4,0xbfd171e133df0b16,2 +np.float64,0x7feb43bc83368778,0x7ff0000000000000,2 +np.float64,0xbfe2970c5fa52e18,0xbfe3a74a434c6efe,2 +np.float64,0xffd091c380212388,0xfff0000000000000,2 +np.float64,0x3febb3b9d2f76774,0x3fef4b4af2bd8580,2 +np.float64,0x7fec66787ef8ccf0,0x7ff0000000000000,2 +np.float64,0xbf935e185826bc40,0xbf935e640557a354,2 +np.float64,0x979df1552f3be,0x979df1552f3be,2 +np.float64,0x7fc096ee73212ddc,0x7ff0000000000000,2 +np.float64,0xbfe9de88faf3bd12,0xbfecc7d1ae691d1b,2 +np.float64,0x7fdc733f06b8e67d,0x7ff0000000000000,2 +np.float64,0xffd71be1a0ae37c4,0xfff0000000000000,2 +np.float64,0xb50dabd36a1b6,0xb50dabd36a1b6,2 +np.float64,0x7fce3d94d63c7b29,0x7ff0000000000000,2 +np.float64,0x7fbaf95e4435f2bc,0x7ff0000000000000,2 +np.float64,0x81a32a6f03466,0x81a32a6f03466,2 +np.float64,0xa99b5b4d5336c,0xa99b5b4d5336c,2 +np.float64,0x7f97c1eeb82f83dc,0x7ff0000000000000,2 +np.float64,0x3fe761636d6ec2c6,0x3fe98451160d2ffb,2 +np.float64,0xbfe3224ef5e6449e,0xbfe44b73eeadac52,2 +np.float64,0x7fde6feb0dbcdfd5,0x7ff0000000000000,2 +np.float64,0xbfee87f9ca7d0ff4,0xbff1b079e9d7f706,2 +np.float64,0x3fe46f4c9828de99,0x3fe5da2ab9609ea5,2 +np.float64,0xffb92fe882325fd0,0xfff0000000000000,2 +np.float64,0x80054bc63cea978d,0x80054bc63cea978d,2 +np.float64,0x3d988bea7b312,0x3d988bea7b312,2 +np.float64,0x3fe6468e1d6c8d1c,0x3fe81e64d37d39a8,2 +np.float64,0x3fd68eefc22d1de0,0x3fd7074264faeead,2 +np.float64,0xffb218a074243140,0xfff0000000000000,2 +np.float64,0x3fdbcb3b6cb79678,0x3fdcad011de40b7d,2 +np.float64,0x7fe3c161772782c2,0x7ff0000000000000,2 +np.float64,0x25575c904aaec,0x25575c904aaec,2 +np.float64,0x800fa43a8f5f4875,0x800fa43a8f5f4875,2 +np.float64,0x3fe41fc9e1e83f94,0x3fe57a25dd1a37f1,2 +np.float64,0x3fd895f4a7b12be9,0x3fd931e7b721a08a,2 +np.float64,0xce31469f9c629,0xce31469f9c629,2 +np.float64,0xffea0f55ca341eab,0xfff0000000000000,2 +np.float64,0xffe831c9ba306393,0xfff0000000000000,2 +np.float64,0x7fe2056f03a40add,0x7ff0000000000000,2 +np.float64,0x7fd6b075e02d60eb,0x7ff0000000000000,2 +np.float64,0x3fdfbef4273f7de8,0x3fe0882c1f59efc0,2 +np.float64,0x8005b9e094ab73c2,0x8005b9e094ab73c2,2 +np.float64,0x3fea881ac6351036,0x3fedad7a319b887c,2 +np.float64,0xbfe2c61c7ee58c39,0xbfe3de9a99d8a9c6,2 +np.float64,0x30b0d3786161b,0x30b0d3786161b,2 +np.float64,0x3fa51d56a02a3aad,0x3fa51edee2d2ecef,2 +np.float64,0x79745732f2e8c,0x79745732f2e8c,2 +np.float64,0x800d55b4907aab69,0x800d55b4907aab69,2 +np.float64,0xbfbe8fcf0a3d1fa0,0xbfbea267fbb5bfdf,2 +np.float64,0xbfd04e2756a09c4e,0xbfd07b74d079f9a2,2 +np.float64,0x3fc65170552ca2e1,0x3fc66e6eb00c82ed,2 +np.float64,0xbfb0674b8020ce98,0xbfb06a2b4771b64c,2 +np.float64,0x2059975840b34,0x2059975840b34,2 +np.float64,0x33d1385467a28,0x33d1385467a28,2 +np.float64,0x3fea41b74ff4836f,0x3fed4dc1a09e53cc,2 +np.float64,0xbfe8e08c9d71c119,0xbfeb75b4c59a6bec,2 +np.float64,0x7fdbbf14d6377e29,0x7ff0000000000000,2 +np.float64,0x3fcd8b71513b16e0,0x3fcdcec80174f9ad,2 +np.float64,0x5c50bc94b8a18,0x5c50bc94b8a18,2 +np.float64,0x969a18f52d343,0x969a18f52d343,2 +np.float64,0x3fd7ae44462f5c89,0x3fd8398bc34e395c,2 +np.float64,0xffdd0f8617ba1f0c,0xfff0000000000000,2 +np.float64,0xfff0000000000000,0xfff0000000000000,2 +np.float64,0xbfe2f9badb65f376,0xbfe41b771320ece8,2 +np.float64,0x3fd140bc7fa29,0x3fd140bc7fa29,2 +np.float64,0xbfe14523b5628a48,0xbfe21ee850972043,2 +np.float64,0x3feedd0336bdba06,0x3ff1f01afc1f3a06,2 +np.float64,0x800de423ad7bc848,0x800de423ad7bc848,2 +np.float64,0x4cef857c99df1,0x4cef857c99df1,2 +np.float64,0xbfea55e0e374abc2,0xbfed691e41d648dd,2 +np.float64,0x3fe70d7a18ae1af4,0x3fe91955a34d8094,2 +np.float64,0xbfc62fc3032c5f88,0xbfc64c3ec25decb8,2 +np.float64,0x3fc915abb5322b58,0x3fc93edac5cc73fe,2 +np.float64,0x69aaff66d3561,0x69aaff66d3561,2 +np.float64,0x5c6a90f2b8d53,0x5c6a90f2b8d53,2 +np.float64,0x3fefe30dc1bfc61c,0x3ff2b752257bdacd,2 +np.float64,0x3fef15db15fe2bb6,0x3ff21aea05601396,2 +np.float64,0xbfe353e5ac66a7cc,0xbfe48644e6553d1a,2 +np.float64,0x3fe6d30cffada61a,0x3fe8cf3e4c61ddac,2 +np.float64,0x7fb7857eb62f0afc,0x7ff0000000000000,2 +np.float64,0xbfdd9b53d23b36a8,0xbfdeac91a7af1340,2 +np.float64,0x3fd1456357228ac7,0x3fd17b3f7d39b27a,2 +np.float64,0x3fb57d10ae2afa21,0x3fb5838702b806f4,2 +np.float64,0x800c59c96c98b393,0x800c59c96c98b393,2 +np.float64,0x7fc1f2413823e481,0x7ff0000000000000,2 +np.float64,0xbfa3983624273070,0xbfa3996fa26c419a,2 +np.float64,0x7fb28874ae2510e8,0x7ff0000000000000,2 +np.float64,0x3fe826d02a304da0,0x3fea82bec50bc0b6,2 +np.float64,0x8008d6f0d3d1ade2,0x8008d6f0d3d1ade2,2 +np.float64,0xffe7c970ca2f92e1,0xfff0000000000000,2 +np.float64,0x7fcf42bcaa3e8578,0x7ff0000000000000,2 +np.float64,0x7fda1ab517343569,0x7ff0000000000000,2 +np.float64,0xbfe7926a65ef24d5,0xbfe9c323dd890d5b,2 +np.float64,0xbfcaf6282d35ec50,0xbfcb294f36a0a33d,2 +np.float64,0x800ca49df8d9493c,0x800ca49df8d9493c,2 +np.float64,0xffea18d26af431a4,0xfff0000000000000,2 +np.float64,0x3fb72f276e2e5e50,0x3fb7374539fd1221,2 +np.float64,0xffa6b613842d6c20,0xfff0000000000000,2 +np.float64,0xbfeb3c7263f678e5,0xbfeea54cdb60b54c,2 +np.float64,0x3fc976d2ba32eda5,0x3fc9a1e83a058de4,2 +np.float64,0xbfe4acd4b0e959aa,0xbfe624d5d4f9b9a6,2 +np.float64,0x7fca410a0f348213,0x7ff0000000000000,2 +np.float64,0xbfde368f77bc6d1e,0xbfdf5910c8c8bcb0,2 +np.float64,0xbfed7412937ae825,0xbff0e55afc428453,2 +np.float64,0xffef6b7b607ed6f6,0xfff0000000000000,2 +np.float64,0xbfb936f17e326de0,0xbfb941629a53c694,2 +np.float64,0x800dbb0c469b7619,0x800dbb0c469b7619,2 +np.float64,0x800f68b0581ed161,0x800f68b0581ed161,2 +np.float64,0x3fe25b2aad64b656,0x3fe361266fa9c5eb,2 +np.float64,0xbfb87e445a30fc88,0xbfb887d676910c3f,2 +np.float64,0x6e6ba9b6dcd76,0x6e6ba9b6dcd76,2 +np.float64,0x3fad27ce583a4f9d,0x3fad2bd72782ffdb,2 +np.float64,0xbfec0bc5d638178c,0xbfefc6e8c8f9095f,2 +np.float64,0x7fcba4a296374944,0x7ff0000000000000,2 +np.float64,0x8004ca237cc99448,0x8004ca237cc99448,2 +np.float64,0xffe85b8c3270b718,0xfff0000000000000,2 +np.float64,0x7fe7ee3eddafdc7d,0x7ff0000000000000,2 +np.float64,0xffd275967ca4eb2c,0xfff0000000000000,2 +np.float64,0xbfa95bc3a032b780,0xbfa95e6b288ecf43,2 +np.float64,0x3fc9e3214b33c643,0x3fca10667e7e7ff4,2 +np.float64,0x8001b89c5d837139,0x8001b89c5d837139,2 +np.float64,0xbf8807dfc0300fc0,0xbf880803e3badfbd,2 +np.float64,0x800aca94b895952a,0x800aca94b895952a,2 +np.float64,0x7fd79534a02f2a68,0x7ff0000000000000,2 +np.float64,0x3fe1b81179e37023,0x3fe2a371d8cc26f0,2 +np.float64,0x800699539d6d32a8,0x800699539d6d32a8,2 +np.float64,0xffe51dfbb3aa3bf7,0xfff0000000000000,2 +np.float64,0xbfdfb775abbf6eec,0xbfe083f48be2f98f,2 +np.float64,0x3fe87979d7b0f2f4,0x3feaee701d959079,2 +np.float64,0x3fd8e4e6a731c9cd,0x3fd986d29f25f982,2 +np.float64,0x3fe3dadaaf67b5b6,0x3fe527520fb02920,2 +np.float64,0x8003c2262bc7844d,0x8003c2262bc7844d,2 +np.float64,0x800c930add392616,0x800c930add392616,2 +np.float64,0xffb7a152a22f42a8,0xfff0000000000000,2 +np.float64,0x80028fe03dc51fc1,0x80028fe03dc51fc1,2 +np.float64,0xffe32ae60c6655cc,0xfff0000000000000,2 +np.float64,0x3fea3527e4746a50,0x3fed3cbbf47f18eb,2 +np.float64,0x800a53059e14a60c,0x800a53059e14a60c,2 +np.float64,0xbfd79e3b202f3c76,0xbfd828672381207b,2 +np.float64,0xffeed7e2eb7dafc5,0xfff0000000000000,2 +np.float64,0x3fec51ed6778a3db,0x3ff01509e34df61d,2 +np.float64,0xbfd84bc577b0978a,0xbfd8e23ec55e42e8,2 +np.float64,0x2483aff849077,0x2483aff849077,2 +np.float64,0x6f57883adeaf2,0x6f57883adeaf2,2 +np.float64,0xffd3fd74d927faea,0xfff0000000000000,2 +np.float64,0x7fca49ec773493d8,0x7ff0000000000000,2 +np.float64,0x7fd08fe2e8211fc5,0x7ff0000000000000,2 +np.float64,0x800852086db0a411,0x800852086db0a411,2 +np.float64,0x3fe5b1f2c9eb63e6,0x3fe7654f511bafc6,2 +np.float64,0xbfe01e2a58e03c54,0xbfe0cedb68f021e6,2 +np.float64,0x800988421d331085,0x800988421d331085,2 +np.float64,0xffd5038b18aa0716,0xfff0000000000000,2 +np.float64,0x8002c9264c85924d,0x8002c9264c85924d,2 +np.float64,0x3fd21ca302243946,0x3fd25ac653a71aab,2 +np.float64,0xbfea60d6e6f4c1ae,0xbfed78031d9dfa2b,2 +np.float64,0xffef97b6263f2f6b,0xfff0000000000000,2 +np.float64,0xbfd524732faa48e6,0xbfd5876ecc415dcc,2 +np.float64,0x660387e8cc072,0x660387e8cc072,2 +np.float64,0x7fcfc108a33f8210,0x7ff0000000000000,2 +np.float64,0x7febe5b0f877cb61,0x7ff0000000000000,2 +np.float64,0xbfa55fdfac2abfc0,0xbfa56176991851a8,2 +np.float64,0x25250f4c4a4a3,0x25250f4c4a4a3,2 +np.float64,0xffe2f6a2f2a5ed46,0xfff0000000000000,2 +np.float64,0x7fa754fcc02ea9f9,0x7ff0000000000000,2 +np.float64,0x3febd19dea37a33c,0x3fef75279f75d3b8,2 +np.float64,0xc5ed55218bdab,0xc5ed55218bdab,2 +np.float64,0x3fe72ff6b3ee5fed,0x3fe945388b979882,2 +np.float64,0xbfe16b854e22d70a,0xbfe24b10fc0dff14,2 +np.float64,0xffb22cbe10245980,0xfff0000000000000,2 +np.float64,0xa54246b54a849,0xa54246b54a849,2 +np.float64,0x3fe7f4cda76fe99c,0x3fea41edc74888b6,2 +np.float64,0x1,0x1,2 +np.float64,0x800d84acce9b095a,0x800d84acce9b095a,2 +np.float64,0xb0eef04761dde,0xb0eef04761dde,2 +np.float64,0x7ff4000000000000,0x7ffc000000000000,2 +np.float64,0xffecaf1dbb795e3b,0xfff0000000000000,2 +np.float64,0x90dbab8d21b76,0x90dbab8d21b76,2 +np.float64,0x3fe79584a9ef2b09,0x3fe9c71fa9e40eb5,2 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-tan.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-tan.csv new file mode 100644 index 0000000..083cdb2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-tan.csv @@ -0,0 +1,1429 @@ +dtype,input,output,ulperrortol +np.float32,0xfd97ece0,0xc11186e9,4 +np.float32,0x8013bb34,0x8013bb34,4 +np.float32,0x316389,0x316389,4 +np.float32,0x7f7fffff,0xbf1c9eca,4 +np.float32,0x3f7674bb,0x3fb7e450,4 +np.float32,0x80800000,0x80800000,4 +np.float32,0x7f5995e8,0xbf94106c,4 +np.float32,0x74527,0x74527,4 +np.float32,0x7f08caea,0xbeceddb6,4 +np.float32,0x2d49b2,0x2d49b2,4 +np.float32,0x3f74e5e4,0x3fb58695,4 +np.float32,0x3f3fcd51,0x3f6e1e81,4 +np.float32,0xbf4f3608,0xbf864d3d,4 +np.float32,0xbed974a0,0xbee78c70,4 +np.float32,0xff5f483c,0x3ecf3cb2,4 +np.float32,0x7f4532f4,0xc0b96f7b,4 +np.float32,0x3f0a4f7c,0x3f198cc0,4 +np.float32,0x210193,0x210193,4 +np.float32,0xfeebad7a,0xbf92eba8,4 +np.float32,0xfed29f74,0xc134cab6,4 +np.float32,0x803433a0,0x803433a0,4 +np.float32,0x64eb46,0x64eb46,4 +np.float32,0xbf54ef22,0xbf8c757b,4 +np.float32,0x3f3d5fdd,0x3f69a17b,4 +np.float32,0x80000001,0x80000001,4 +np.float32,0x800a837a,0x800a837a,4 +np.float32,0x6ff0be,0x6ff0be,4 +np.float32,0xfe8f1186,0x3f518820,4 +np.float32,0x804963e5,0x804963e5,4 +np.float32,0xfebaa59a,0x3fa1dbb0,4 +np.float32,0x637970,0x637970,4 +np.float32,0x3e722a6b,0x3e76c89a,4 +np.float32,0xff2b0478,0xbddccb5f,4 +np.float32,0xbf7bd85b,0xbfc06821,4 +np.float32,0x3ec33600,0x3ecd4126,4 +np.float32,0x3e0a43b9,0x3e0b1c69,4 +np.float32,0x7f7511b6,0xbe427083,4 +np.float32,0x3f28c114,0x3f465a73,4 +np.float32,0x3f179e1c,0x3f2c3e7c,4 +np.float32,0x7b2963,0x7b2963,4 +np.float32,0x3f423d06,0x3f72b442,4 +np.float32,0x3f5a24c6,0x3f925508,4 +np.float32,0xff18c834,0xbf79b5c8,4 +np.float32,0x3f401ece,0x3f6eb6ac,4 +np.float32,0x7b8a3013,0xbffab968,4 +np.float32,0x80091ff0,0x80091ff0,4 +np.float32,0x3f389c51,0x3f610b47,4 +np.float32,0x5ea174,0x5ea174,4 +np.float32,0x807a9eb2,0x807a9eb2,4 +np.float32,0x806ce61e,0x806ce61e,4 +np.float32,0xbe956acc,0xbe99cefc,4 +np.float32,0x7e60e247,0xbf5e64a5,4 +np.float32,0x7f398e24,0x404d12ed,4 +np.float32,0x3d9049f8,0x3d908735,4 +np.float32,0x7db17ffc,0xbf5b3d87,4 +np.float32,0xff453f78,0xc0239c9f,4 +np.float32,0x3f024aac,0x3f0ed802,4 +np.float32,0xbe781c30,0xbe7d1508,4 +np.float32,0x3f77962a,0x3fb9a28e,4 +np.float32,0xff7fffff,0x3f1c9eca,4 +np.float32,0x3f7152e3,0x3fb03f9d,4 +np.float32,0xff7cb167,0x3f9ce831,4 +np.float32,0x3e763e30,0x3e7b1a10,4 +np.float32,0xbf126527,0xbf24c253,4 +np.float32,0x803f6660,0x803f6660,4 +np.float32,0xbf79de38,0xbfbd38b1,4 +np.float32,0x8046c2f0,0x8046c2f0,4 +np.float32,0x6dc74e,0x6dc74e,4 +np.float32,0xbec9c45e,0xbed4e768,4 +np.float32,0x3f0eedb6,0x3f1fe610,4 +np.float32,0x7e031999,0xbcc13026,4 +np.float32,0x7efc2fd7,0x41e4b284,4 +np.float32,0xbeab7454,0xbeb22a1b,4 +np.float32,0x805ee67b,0x805ee67b,4 +np.float32,0x7f76e58e,0xc2436659,4 +np.float32,0xbe62b024,0xbe667718,4 +np.float32,0x3eea0808,0x3efbd182,4 +np.float32,0xbf7fd00c,0xbfc70719,4 +np.float32,0x7f27b640,0xbf0d97e0,4 +np.float32,0x3f1b58a4,0x3f31b6f4,4 +np.float32,0x252a9f,0x252a9f,4 +np.float32,0x7f65f95a,0xbead5de3,4 +np.float32,0xfc6ea780,0x42d15801,4 +np.float32,0x7eac4c52,0xc0682424,4 +np.float32,0xbe8a3f5a,0xbe8db54d,4 +np.float32,0xbf1644e2,0xbf2a4abd,4 +np.float32,0x3fc96a,0x3fc96a,4 +np.float32,0x7f38c0e4,0x3cc04af8,4 +np.float32,0x3f623d75,0x3f9c065d,4 +np.float32,0x3ee6a51a,0x3ef7a058,4 +np.float32,0x3dd11020,0x3dd1cacf,4 +np.float32,0xb6918,0xb6918,4 +np.float32,0xfdd7a540,0x3f22f081,4 +np.float32,0x80798563,0x80798563,4 +np.float32,0x3e9a8b7a,0x3e9f6a7e,4 +np.float32,0xbea515d4,0xbeab0df5,4 +np.float32,0xbea9b9f4,0xbeb03abe,4 +np.float32,0xbf11a5fa,0xbf23b478,4 +np.float32,0xfd6cadf0,0xbfa2a878,4 +np.float32,0xbf6edd07,0xbfacbb78,4 +np.float32,0xff5c5328,0x3e2d1552,4 +np.float32,0xbea2f788,0xbea8b3f5,4 +np.float32,0x802efaeb,0x802efaeb,4 +np.float32,0xff1c85e5,0x41f8560e,4 +np.float32,0x3f53b123,0x3f8b18e1,4 +np.float32,0xff798c4a,0x4092e66f,4 +np.float32,0x7f2e6fe7,0xbdcbd58f,4 +np.float32,0xfe8a8196,0x3fd7fc56,4 +np.float32,0x5e7ad4,0x5e7ad4,4 +np.float32,0xbf23a02d,0xbf3e4533,4 +np.float32,0x3f31c55c,0x3f5531bf,4 +np.float32,0x80331be3,0x80331be3,4 +np.float32,0x8056960a,0x8056960a,4 +np.float32,0xff1c06ae,0xbfd26992,4 +np.float32,0xbe0cc4b0,0xbe0da96c,4 +np.float32,0x7e925ad5,0xbf8dba54,4 +np.float32,0x2c8cec,0x2c8cec,4 +np.float32,0x8011951e,0x8011951e,4 +np.float32,0x3f2caf84,0x3f4cb89f,4 +np.float32,0xbd32c220,0xbd32df33,4 +np.float32,0xbec358d6,0xbecd6996,4 +np.float32,0x3f6e4930,0x3fabeb92,4 +np.float32,0xbf6a3afd,0xbfa65a3a,4 +np.float32,0x80067764,0x80067764,4 +np.float32,0x3d8df1,0x3d8df1,4 +np.float32,0x7ee51cf2,0x409e4061,4 +np.float32,0x435f5d,0x435f5d,4 +np.float32,0xbf5b17f7,0xbf936ebe,4 +np.float32,0x3ecaacb5,0x3ed5f81f,4 +np.float32,0x807b0aa5,0x807b0aa5,4 +np.float32,0x52b40b,0x52b40b,4 +np.float32,0x146a97,0x146a97,4 +np.float32,0x7f42b952,0xbfdcb413,4 +np.float32,0xbf1a1af2,0xbf2fe1bb,4 +np.float32,0x3f312034,0x3f541aa2,4 +np.float32,0x3f281d60,0x3f4554f9,4 +np.float32,0x50e451,0x50e451,4 +np.float32,0xbe45838c,0xbe480016,4 +np.float32,0xff7d0aeb,0x3eb0746e,4 +np.float32,0x7f32a489,0xbf96af6d,4 +np.float32,0xbf1b4e27,0xbf31a769,4 +np.float32,0x3f242936,0x3f3f1a44,4 +np.float32,0xbf7482ff,0xbfb4f201,4 +np.float32,0x4bda38,0x4bda38,4 +np.float32,0xbf022208,0xbf0ea2bb,4 +np.float32,0x7d08ca95,0xbe904602,4 +np.float32,0x7ed2f356,0xc02b55ad,4 +np.float32,0xbf131204,0xbf25b734,4 +np.float32,0xff3464b4,0x3fb23706,4 +np.float32,0x5a97cf,0x5a97cf,4 +np.float32,0xbe52db70,0xbe55e388,4 +np.float32,0x3f52934f,0x3f89e2aa,4 +np.float32,0xfeea866a,0x40a2b33f,4 +np.float32,0x80333925,0x80333925,4 +np.float32,0xfef5d13e,0xc00139ec,4 +np.float32,0x3f4750ab,0x3f7c87ad,4 +np.float32,0x3e41bfdd,0x3e44185a,4 +np.float32,0xbf5b0572,0xbf935935,4 +np.float32,0xbe93c9da,0xbe9808d8,4 +np.float32,0x7f501f33,0xc0f9973c,4 +np.float32,0x800af035,0x800af035,4 +np.float32,0x3f29faf8,0x3f4852a8,4 +np.float32,0xbe1e4c20,0xbe1f920c,4 +np.float32,0xbf7e8616,0xbfc4d79d,4 +np.float32,0x43ffbf,0x43ffbf,4 +np.float32,0x7f28e8a9,0xbfa1ac24,4 +np.float32,0xbf1f9f92,0xbf3820bc,4 +np.float32,0x3f07e004,0x3f1641c4,4 +np.float32,0x3ef7ea7f,0x3f06a64a,4 +np.float32,0x7e013101,0x3f6080e6,4 +np.float32,0x7f122a4f,0xbf0a796f,4 +np.float32,0xfe096960,0x3ed7273a,4 +np.float32,0x3f06abf1,0x3f14a4b2,4 +np.float32,0x3e50ded3,0x3e53d0f1,4 +np.float32,0x7f50b346,0x3eabb536,4 +np.float32,0xff5adb0f,0xbd441972,4 +np.float32,0xbecefe46,0xbedb0f66,4 +np.float32,0x7da70bd4,0xbec66273,4 +np.float32,0x169811,0x169811,4 +np.float32,0xbee4dfee,0xbef5721a,4 +np.float32,0x3efbeae3,0x3f0936e6,4 +np.float32,0x8031bd61,0x8031bd61,4 +np.float32,0x8048e443,0x8048e443,4 +np.float32,0xff209aa6,0xbeb364cb,4 +np.float32,0xff477499,0x3c1b0041,4 +np.float32,0x803fe929,0x803fe929,4 +np.float32,0x3f70158b,0x3fae7725,4 +np.float32,0x7f795723,0x3e8e850a,4 +np.float32,0x3cba99,0x3cba99,4 +np.float32,0x80588d2a,0x80588d2a,4 +np.float32,0x805d1f05,0x805d1f05,4 +np.float32,0xff4ac09a,0xbefe614d,4 +np.float32,0x804af084,0x804af084,4 +np.float32,0x7c64ae63,0xc1a8b563,4 +np.float32,0x8078d793,0x8078d793,4 +np.float32,0x7f3e2436,0xbf8bf9d3,4 +np.float32,0x7ccec1,0x7ccec1,4 +np.float32,0xbf6462c7,0xbf9eb830,4 +np.float32,0x3f1002ca,0x3f216843,4 +np.float32,0xfe878ca6,0x409e73a5,4 +np.float32,0x3bd841d9,0x3bd842a7,4 +np.float32,0x7d406f41,0xbd9dcfa3,4 +np.float32,0x7c6d6,0x7c6d6,4 +np.float32,0x3f4ef360,0x3f86074b,4 +np.float32,0x805f534a,0x805f534a,4 +np.float32,0x1,0x1,4 +np.float32,0x3f739ee2,0x3fb39db2,4 +np.float32,0x3d0c2352,0x3d0c3153,4 +np.float32,0xfe8a4f2c,0x3edd8add,4 +np.float32,0x3e52eaa0,0x3e55f362,4 +np.float32,0x7bde9758,0xbf5ba5cf,4 +np.float32,0xff422654,0xbf41e487,4 +np.float32,0x385e5b,0x385e5b,4 +np.float32,0x5751dd,0x5751dd,4 +np.float32,0xff6c671c,0xc03e2d6d,4 +np.float32,0x1458be,0x1458be,4 +np.float32,0x80153d4d,0x80153d4d,4 +np.float32,0x7efd2adb,0x3e25458f,4 +np.float32,0xbe161880,0xbe172e12,4 +np.float32,0x7ecea1aa,0x40a66d79,4 +np.float32,0xbf5b02a2,0xbf9355f0,4 +np.float32,0x15d9ab,0x15d9ab,4 +np.float32,0x2dc7c7,0x2dc7c7,4 +np.float32,0xfebbf81a,0x4193f6e6,4 +np.float32,0xfe8e3594,0xc00a6695,4 +np.float32,0x185aa8,0x185aa8,4 +np.float32,0x3daea156,0x3daf0e00,4 +np.float32,0x3e071688,0x3e07e08e,4 +np.float32,0x802db9e6,0x802db9e6,4 +np.float32,0x7f7be2c4,0x3f1363dd,4 +np.float32,0x7eba3f5e,0xc13eb497,4 +np.float32,0x3de04a00,0x3de130a9,4 +np.float32,0xbf1022bc,0xbf2194eb,4 +np.float32,0xbf5b547e,0xbf93b53b,4 +np.float32,0x3e867bd6,0x3e89aa10,4 +np.float32,0xbea5eb5c,0xbeabfb73,4 +np.float32,0x7f1efae9,0x3ffca038,4 +np.float32,0xff5d0344,0xbe55dbbb,4 +np.float32,0x805167e7,0x805167e7,4 +np.float32,0xbdb3a020,0xbdb41667,4 +np.float32,0xbedea6b4,0xbeedd5fd,4 +np.float32,0x8053b45c,0x8053b45c,4 +np.float32,0x7ed370e9,0x3d90eba5,4 +np.float32,0xbefcd7da,0xbf09cf91,4 +np.float32,0x78b9ac,0x78b9ac,4 +np.float32,0xbf2f6dc0,0xbf5141ef,4 +np.float32,0x802d3a7b,0x802d3a7b,4 +np.float32,0xfd45d120,0x3fec31cc,4 +np.float32,0xbf7e7020,0xbfc4b2af,4 +np.float32,0xf04da,0xf04da,4 +np.float32,0xbe9819d4,0xbe9cbd35,4 +np.float32,0x8075ab35,0x8075ab35,4 +np.float32,0xbf052fdc,0xbf12aa2c,4 +np.float32,0x3f1530d0,0x3f28bd9f,4 +np.float32,0x80791881,0x80791881,4 +np.float32,0x67f309,0x67f309,4 +np.float32,0x3f12f16a,0x3f2588f5,4 +np.float32,0x3ecdac47,0x3ed97ff8,4 +np.float32,0xbf297fb7,0xbf478c39,4 +np.float32,0x8069fa80,0x8069fa80,4 +np.float32,0x807f940e,0x807f940e,4 +np.float32,0xbf648dc8,0xbf9eeecb,4 +np.float32,0x3de873b0,0x3de9748d,4 +np.float32,0x3f1aa645,0x3f30af1f,4 +np.float32,0xff227a62,0x3d8283cc,4 +np.float32,0xbf37187d,0xbf5e5f4c,4 +np.float32,0x803b1b1f,0x803b1b1f,4 +np.float32,0x3f58142a,0x3f8ff8da,4 +np.float32,0x8004339e,0x8004339e,4 +np.float32,0xbf0f5654,0xbf2077a4,4 +np.float32,0x3f17e509,0x3f2ca598,4 +np.float32,0x3f800000,0x3fc75923,4 +np.float32,0xfdf79980,0x42f13047,4 +np.float32,0x7f111381,0x3f13c4c9,4 +np.float32,0xbea40c70,0xbea9e724,4 +np.float32,0x110520,0x110520,4 +np.float32,0x60490d,0x60490d,4 +np.float32,0x3f6703ec,0x3fa21951,4 +np.float32,0xbf098256,0xbf187652,4 +np.float32,0x658951,0x658951,4 +np.float32,0x3f53bf16,0x3f8b2818,4 +np.float32,0xff451811,0xc0026068,4 +np.float32,0x80777ee0,0x80777ee0,4 +np.float32,0x3e4fcc19,0x3e52b286,4 +np.float32,0x7f387ee0,0x3ce93eb6,4 +np.float32,0xff51181f,0xbfca3ee4,4 +np.float32,0xbf5655ae,0xbf8e0304,4 +np.float32,0xff2f1dcd,0x40025471,4 +np.float32,0x7f6e58e5,0xbe9930d5,4 +np.float32,0x7adf11,0x7adf11,4 +np.float32,0xbe9a2bc2,0xbe9f0185,4 +np.float32,0x8065d3a0,0x8065d3a0,4 +np.float32,0x3ed6e826,0x3ee47c45,4 +np.float32,0x80598ea0,0x80598ea0,4 +np.float32,0x7f10b90a,0x40437bd0,4 +np.float32,0x27b447,0x27b447,4 +np.float32,0x7ecd861c,0x3fce250f,4 +np.float32,0x0,0x0,4 +np.float32,0xbeba82d6,0xbec3394c,4 +np.float32,0xbf4958b0,0xbf8048ea,4 +np.float32,0x7c643e,0x7c643e,4 +np.float32,0x580770,0x580770,4 +np.float32,0x805bf54a,0x805bf54a,4 +np.float32,0x7f1f3cee,0xbe1a54d6,4 +np.float32,0xfefefdea,0x3fa84576,4 +np.float32,0x7f007b7a,0x3e8a6d25,4 +np.float32,0xbf177959,0xbf2c0919,4 +np.float32,0xbf30fda0,0xbf53e058,4 +np.float32,0x3f0576be,0x3f130861,4 +np.float32,0x3f49380e,0x3f80283a,4 +np.float32,0xebc56,0xebc56,4 +np.float32,0x654e3b,0x654e3b,4 +np.float32,0x14a4d8,0x14a4d8,4 +np.float32,0xff69b3cb,0xbf822a88,4 +np.float32,0xbe9b6c1c,0xbea06109,4 +np.float32,0xbefddd7e,0xbf0a787b,4 +np.float32,0x4c4ebb,0x4c4ebb,4 +np.float32,0x7d0a74,0x7d0a74,4 +np.float32,0xbebb5f80,0xbec43635,4 +np.float32,0x7ee79723,0xc1c7f3f3,4 +np.float32,0x7f2be4c7,0xbfa6c693,4 +np.float32,0x805bc7d5,0x805bc7d5,4 +np.float32,0x8042f12c,0x8042f12c,4 +np.float32,0x3ef91be8,0x3f07697b,4 +np.float32,0x3cf37ac0,0x3cf38d1c,4 +np.float32,0x800000,0x800000,4 +np.float32,0xbe1ebf4c,0xbe200806,4 +np.float32,0x7f380862,0xbeb512e8,4 +np.float32,0xbe320064,0xbe33d0fc,4 +np.float32,0xff300b0c,0xbfadb805,4 +np.float32,0x308a06,0x308a06,4 +np.float32,0xbf084f6e,0xbf16d7b6,4 +np.float32,0xff47cab6,0x3f892b65,4 +np.float32,0xbed99f4a,0xbee7bfd5,4 +np.float32,0xff7d74c0,0x3ee88c9a,4 +np.float32,0x3c3d23,0x3c3d23,4 +np.float32,0x8074bde8,0x8074bde8,4 +np.float32,0x80042164,0x80042164,4 +np.float32,0x3e97c92a,0x3e9c6500,4 +np.float32,0x3b80e0,0x3b80e0,4 +np.float32,0xbf16646a,0xbf2a783d,4 +np.float32,0x7f3b4cb1,0xc01339be,4 +np.float32,0xbf31f36e,0xbf557fd0,4 +np.float32,0x7f540618,0xbe5f6fc1,4 +np.float32,0x7eee47d0,0x40a27e94,4 +np.float32,0x7f12f389,0xbebed654,4 +np.float32,0x56cff5,0x56cff5,4 +np.float32,0x8056032b,0x8056032b,4 +np.float32,0x3ed34e40,0x3ee02e38,4 +np.float32,0x7d51a908,0xbf19a90e,4 +np.float32,0x80000000,0x80000000,4 +np.float32,0xfdf73fd0,0xbf0f8cad,4 +np.float32,0x7ee4fe6d,0xbf1ea7e4,4 +np.float32,0x1f15ba,0x1f15ba,4 +np.float32,0xd18c3,0xd18c3,4 +np.float32,0x80797705,0x80797705,4 +np.float32,0x7ef07091,0x3f2f3b9a,4 +np.float32,0x7f552f41,0x3faf608c,4 +np.float32,0x3f779977,0x3fb9a7ad,4 +np.float32,0xfe1a7a50,0xbdadc4d1,4 +np.float32,0xbf449cf0,0xbf7740db,4 +np.float32,0xbe44e620,0xbe475cad,4 +np.float32,0x3f63a098,0x3f9dc2b5,4 +np.float32,0xfed40a12,0x4164533a,4 +np.float32,0x7a2bbb,0x7a2bbb,4 +np.float32,0xff7f7b9e,0xbeee8740,4 +np.float32,0x7ee27f8b,0x4233f53b,4 +np.float32,0xbf044c06,0xbf117c28,4 +np.float32,0xbeffde54,0xbf0bc49f,4 +np.float32,0xfeaef2e8,0x3ff258fe,4 +np.float32,0x527451,0x527451,4 +np.float32,0xbcef8d00,0xbcef9e7c,4 +np.float32,0xbf0e20c0,0xbf1ec9b2,4 +np.float32,0x8024afda,0x8024afda,4 +np.float32,0x7ef6cb3e,0x422cad0b,4 +np.float32,0x3c120,0x3c120,4 +np.float32,0xbf125c8f,0xbf24b62c,4 +np.float32,0x7e770a93,0x402c9d86,4 +np.float32,0xbd30a4e0,0xbd30c0ee,4 +np.float32,0xbf4d3388,0xbf843530,4 +np.float32,0x3f529072,0x3f89df92,4 +np.float32,0xff0270b1,0xbf81be9a,4 +np.float32,0x5e07e7,0x5e07e7,4 +np.float32,0x7bec32,0x7bec32,4 +np.float32,0x7fc00000,0x7fc00000,4 +np.float32,0x3e3ba5e0,0x3e3dc6e9,4 +np.float32,0x3ecb62d4,0x3ed6ce2c,4 +np.float32,0x3eb3dde8,0x3ebba68f,4 +np.float32,0x8063f952,0x8063f952,4 +np.float32,0x7f204aeb,0x3e88614e,4 +np.float32,0xbeae1ddc,0xbeb5278e,4 +np.float32,0x6829e9,0x6829e9,4 +np.float32,0xbf361a99,0xbf5ca354,4 +np.float32,0xbf24fbe6,0xbf406326,4 +np.float32,0x3f329d41,0x3f56a061,4 +np.float32,0xfed6d666,0x3e8f71a5,4 +np.float32,0x337f92,0x337f92,4 +np.float32,0xbe1c4970,0xbe1d8305,4 +np.float32,0xbe6b7e18,0xbe6fbbde,4 +np.float32,0x3f2267b9,0x3f3c61da,4 +np.float32,0xbee1ee94,0xbef1d628,4 +np.float32,0x7ecffc1a,0x3f02987e,4 +np.float32,0xbe9b1306,0xbe9fff3b,4 +np.float32,0xbeffacae,0xbf0ba468,4 +np.float32,0x7f800000,0xffc00000,4 +np.float32,0xfefc9aa8,0xc19de2a3,4 +np.float32,0x7d7185bb,0xbf9090ec,4 +np.float32,0x7edfbafd,0x3fe9352f,4 +np.float32,0x4ef2ec,0x4ef2ec,4 +np.float32,0x7f4cab2e,0xbff4e5dd,4 +np.float32,0xff3b1788,0x3e3c22e9,4 +np.float32,0x4e15ee,0x4e15ee,4 +np.float32,0xbf5451e6,0xbf8bc8a7,4 +np.float32,0x3f7f6d2e,0x3fc65e8b,4 +np.float32,0xbf1d9184,0xbf35071b,4 +np.float32,0xbf3a81cf,0xbf646d9b,4 +np.float32,0xbe71acc4,0xbe7643ab,4 +np.float32,0x528b7d,0x528b7d,4 +np.float32,0x2cb1d0,0x2cb1d0,4 +np.float32,0x3f324bf8,0x3f56161a,4 +np.float32,0x80709a21,0x80709a21,4 +np.float32,0x4bc448,0x4bc448,4 +np.float32,0x3e8bd600,0x3e8f6b7a,4 +np.float32,0xbeb97d30,0xbec20dd6,4 +np.float32,0x2a5669,0x2a5669,4 +np.float32,0x805f2689,0x805f2689,4 +np.float32,0xfe569f50,0x3fc51952,4 +np.float32,0x1de44c,0x1de44c,4 +np.float32,0x3ec7036c,0x3ed1ae67,4 +np.float32,0x8052b8e5,0x8052b8e5,4 +np.float32,0xff740a6b,0x3f4981a8,4 +np.float32,0xfee9bb70,0xc05e23be,4 +np.float32,0xff4e12c9,0x4002b4ad,4 +np.float32,0x803de0c2,0x803de0c2,4 +np.float32,0xbf433a07,0xbf74966f,4 +np.float32,0x803e60ca,0x803e60ca,4 +np.float32,0xbf19ee98,0xbf2fa07a,4 +np.float32,0x92929,0x92929,4 +np.float32,0x7f709c27,0x4257ba2d,4 +np.float32,0x803167c6,0x803167c6,4 +np.float32,0xbf095ead,0xbf184607,4 +np.float32,0x617060,0x617060,4 +np.float32,0x2d85b3,0x2d85b3,4 +np.float32,0x53d20b,0x53d20b,4 +np.float32,0x3e046838,0x3e052666,4 +np.float32,0xbe7c5fdc,0xbe80ce4b,4 +np.float32,0x3d18d060,0x3d18e289,4 +np.float32,0x804dc031,0x804dc031,4 +np.float32,0x3f224166,0x3f3c26cd,4 +np.float32,0x7d683e3c,0xbea24f25,4 +np.float32,0xbf3a92aa,0xbf648be4,4 +np.float32,0x8072670b,0x8072670b,4 +np.float32,0xbe281aec,0xbe29a1bc,4 +np.float32,0x7f09d918,0xc0942490,4 +np.float32,0x7ca9fd07,0x4018b990,4 +np.float32,0x7d36ac5d,0x3cf57184,4 +np.float32,0x8039b62f,0x8039b62f,4 +np.float32,0x6cad7b,0x6cad7b,4 +np.float32,0x3c0fd9ab,0x3c0fda9d,4 +np.float32,0x80299883,0x80299883,4 +np.float32,0x3c2d0e3e,0x3c2d0fe4,4 +np.float32,0x8002cf62,0x8002cf62,4 +np.float32,0x801dde97,0x801dde97,4 +np.float32,0x80411856,0x80411856,4 +np.float32,0x6ebce8,0x6ebce8,4 +np.float32,0x7b7d1a,0x7b7d1a,4 +np.float32,0x8031d3de,0x8031d3de,4 +np.float32,0x8005c4ab,0x8005c4ab,4 +np.float32,0xbf7dd803,0xbfc3b3ef,4 +np.float32,0x8017ae60,0x8017ae60,4 +np.float32,0xfe9316ce,0xbfe0544a,4 +np.float32,0x3f136bfe,0x3f2636ff,4 +np.float32,0x3df87b80,0x3df9b57d,4 +np.float32,0xff44c356,0xbf11c7ad,4 +np.float32,0x4914ae,0x4914ae,4 +np.float32,0x80524c21,0x80524c21,4 +np.float32,0x805c7dc8,0x805c7dc8,4 +np.float32,0xfed3c0aa,0xbff0c0ab,4 +np.float32,0x7eb2bfbb,0xbf4600bc,4 +np.float32,0xfec8df84,0x3f5bd350,4 +np.float32,0x3e5431a4,0x3e5748c3,4 +np.float32,0xbee6a3a0,0xbef79e86,4 +np.float32,0xbf6cc9b2,0xbfa9d61a,4 +np.float32,0x3f132bd5,0x3f25dbd9,4 +np.float32,0x7e6d2e48,0x3f9d025b,4 +np.float32,0x3edf430c,0x3eee942d,4 +np.float32,0x3f0d1b8a,0x3f1d60e1,4 +np.float32,0xbdf2f688,0xbdf41bfb,4 +np.float32,0xbe47a284,0xbe4a33ff,4 +np.float32,0x3eaa9fbc,0x3eb13be7,4 +np.float32,0xfe98d45e,0x3eb84517,4 +np.float32,0x7efc23b3,0x3dcc1c99,4 +np.float32,0x3ca36242,0x3ca367ce,4 +np.float32,0x3f76a944,0x3fb834e3,4 +np.float32,0xbf45207c,0xbf783f9b,4 +np.float32,0x3e7c1220,0x3e80a4f8,4 +np.float32,0x3f018200,0x3f0dd14e,4 +np.float32,0x3f53cdde,0x3f8b3839,4 +np.float32,0xbdbacb58,0xbdbb5063,4 +np.float32,0x804af68d,0x804af68d,4 +np.float32,0x3e2c12fc,0x3e2db65b,4 +np.float32,0x3f039433,0x3f10895a,4 +np.float32,0x7ef5193d,0x3f4115f7,4 +np.float32,0x8030afbe,0x8030afbe,4 +np.float32,0x3f06fa2a,0x3f150d5d,4 +np.float32,0x3f124442,0x3f2493d2,4 +np.float32,0xbeb5b792,0xbebdc090,4 +np.float32,0xbedc90a4,0xbeeb4de9,4 +np.float32,0x3f3ff8,0x3f3ff8,4 +np.float32,0x3ee75bc5,0x3ef881e4,4 +np.float32,0xfe80e3de,0xbf5cd535,4 +np.float32,0xf52eb,0xf52eb,4 +np.float32,0x80660ee8,0x80660ee8,4 +np.float32,0x3e173a58,0x3e185648,4 +np.float32,0xfe49520c,0xbf728d7c,4 +np.float32,0xbecbb8ec,0xbed73373,4 +np.float32,0xbf027ae0,0xbf0f173e,4 +np.float32,0xbcab6740,0xbcab6da8,4 +np.float32,0xbf2a15e2,0xbf487e11,4 +np.float32,0x3b781b,0x3b781b,4 +np.float32,0x44f559,0x44f559,4 +np.float32,0xff6a0ca6,0xc174d7c3,4 +np.float32,0x6460ef,0x6460ef,4 +np.float32,0xfe58009c,0x3ee2bb30,4 +np.float32,0xfec3c038,0x3e30d617,4 +np.float32,0x7f0687c0,0xbf62c820,4 +np.float32,0xbf44655e,0xbf76d589,4 +np.float32,0xbf42968c,0xbf735e78,4 +np.float32,0x80385503,0x80385503,4 +np.float32,0xbea7e3a2,0xbeae2d59,4 +np.float32,0x3dd0b770,0x3dd17131,4 +np.float32,0xbf4bc185,0xbf82b907,4 +np.float32,0xfefd7d64,0xbee05650,4 +np.float32,0xfaac3c00,0xbff23bc9,4 +np.float32,0xbf562f0d,0xbf8dd7f4,4 +np.float32,0x7fa00000,0x7fe00000,4 +np.float32,0x3e01bdb8,0x3e027098,4 +np.float32,0x3e2868ab,0x3e29f19e,4 +np.float32,0xfec55f2e,0x3f39f304,4 +np.float32,0xed4e,0xed4e,4 +np.float32,0x3e2b7330,0x3e2d11fa,4 +np.float32,0x7f738542,0x40cbbe16,4 +np.float32,0x3f123521,0x3f247e71,4 +np.float32,0x73572c,0x73572c,4 +np.float32,0x804936c8,0x804936c8,4 +np.float32,0x803b80d8,0x803b80d8,4 +np.float32,0x7f566c57,0xbee2855a,4 +np.float32,0xff0e3bd8,0xbff0543f,4 +np.float32,0x7d2b2fe7,0xbf94ba4c,4 +np.float32,0xbf0da470,0xbf1e1dc2,4 +np.float32,0xbd276500,0xbd277ce0,4 +np.float32,0xfcd15dc0,0x403ccc2a,4 +np.float32,0x80071e59,0x80071e59,4 +np.float32,0xbe9b0c34,0xbe9ff7be,4 +np.float32,0x3f4f9069,0x3f86ac50,4 +np.float32,0x80042a95,0x80042a95,4 +np.float32,0x7de28e39,0x3bc9b7f4,4 +np.float32,0xbf641935,0xbf9e5af8,4 +np.float32,0x8034f068,0x8034f068,4 +np.float32,0xff33a3d2,0xbf408e75,4 +np.float32,0xbcc51540,0xbcc51efc,4 +np.float32,0xff6d1ddf,0x3ef58f0e,4 +np.float32,0xbf64dfc4,0xbf9f5725,4 +np.float32,0xff068a06,0x3eea8987,4 +np.float32,0xff01c0af,0x3f24cdfe,4 +np.float32,0x3f4def7e,0x3f84f802,4 +np.float32,0xbf1b4ae7,0xbf31a299,4 +np.float32,0x8077df2d,0x8077df2d,4 +np.float32,0x3f0155c5,0x3f0d9785,4 +np.float32,0x5a54b2,0x5a54b2,4 +np.float32,0x7f271f9e,0x3efb2ef3,4 +np.float32,0xbf0ff2ec,0xbf215217,4 +np.float32,0x7f500130,0xbf8a7fdd,4 +np.float32,0xfed9891c,0xbf65c872,4 +np.float32,0xfecbfaae,0x403bdbc2,4 +np.float32,0x3f3a5aba,0x3f642772,4 +np.float32,0x7ebc681e,0xbd8df059,4 +np.float32,0xfe05e400,0xbfe35d74,4 +np.float32,0xbf295ace,0xbf4750ea,4 +np.float32,0x7ea055b2,0x3f62d6be,4 +np.float32,0xbd00b520,0xbd00bff9,4 +np.float32,0xbf7677aa,0xbfb7e8cf,4 +np.float32,0x3e83f788,0x3e86f816,4 +np.float32,0x801f6710,0x801f6710,4 +np.float32,0x801133cc,0x801133cc,4 +np.float32,0x41da2a,0x41da2a,4 +np.float32,0xff1622fd,0x3f023650,4 +np.float32,0x806c7a72,0x806c7a72,4 +np.float32,0x3f10779c,0x3f220bb4,4 +np.float32,0xbf08cf94,0xbf17848d,4 +np.float32,0xbecb55b4,0xbed6bebd,4 +np.float32,0xbf0a1528,0xbf193d7b,4 +np.float32,0x806a16bd,0x806a16bd,4 +np.float32,0xc222a,0xc222a,4 +np.float32,0x3930de,0x3930de,4 +np.float32,0x3f5c3588,0x3f94bca2,4 +np.float32,0x1215ad,0x1215ad,4 +np.float32,0x3ed15030,0x3eddcf67,4 +np.float32,0x7da83b2e,0x3fce0d39,4 +np.float32,0x32b0a8,0x32b0a8,4 +np.float32,0x805aed6b,0x805aed6b,4 +np.float32,0x3ef8e02f,0x3f074346,4 +np.float32,0xbdeb6780,0xbdec7250,4 +np.float32,0x3f6e3cec,0x3fabda61,4 +np.float32,0xfefd467a,0x3ef7821a,4 +np.float32,0xfef090fe,0x3bb752a2,4 +np.float32,0x8019c538,0x8019c538,4 +np.float32,0x3e8cf284,0x3e909e81,4 +np.float32,0xbe6c6618,0xbe70b0a2,4 +np.float32,0x7f50a539,0x3f367be1,4 +np.float32,0x8019fe2f,0x8019fe2f,4 +np.float32,0x800c3f48,0x800c3f48,4 +np.float32,0xfd054cc0,0xc0f52802,4 +np.float32,0x3d0cca20,0x3d0cd853,4 +np.float32,0xbf4a7c44,0xbf816e74,4 +np.float32,0x3f46fc40,0x3f7be153,4 +np.float32,0x807c5849,0x807c5849,4 +np.float32,0xd7e41,0xd7e41,4 +np.float32,0x70589b,0x70589b,4 +np.float32,0x80357b95,0x80357b95,4 +np.float32,0x3de239f0,0x3de326a5,4 +np.float32,0x800b08e3,0x800b08e3,4 +np.float32,0x807ec946,0x807ec946,4 +np.float32,0x3e2e4b83,0x3e2fff76,4 +np.float32,0x3f198e0f,0x3f2f12a6,4 +np.float32,0xbecb1aca,0xbed67979,4 +np.float32,0x80134082,0x80134082,4 +np.float32,0x3f3a269f,0x3f63ca05,4 +np.float32,0x3f1381e4,0x3f265622,4 +np.float32,0xff293080,0xbf10be6f,4 +np.float32,0xff800000,0xffc00000,4 +np.float32,0x37d196,0x37d196,4 +np.float32,0x7e57eea7,0x3e7d8138,4 +np.float32,0x804b1dae,0x804b1dae,4 +np.float32,0x7d9508f9,0xc1075b35,4 +np.float32,0x3f7bf468,0x3fc095e0,4 +np.float32,0x55472c,0x55472c,4 +np.float32,0x3ecdcd86,0x3ed9a738,4 +np.float32,0x3ed9be0f,0x3ee7e4e9,4 +np.float32,0x3e7e0ddb,0x3e81b2fe,4 +np.float32,0x7ee6c1d3,0x3f850634,4 +np.float32,0x800f6fad,0x800f6fad,4 +np.float32,0xfefb3bd6,0xbff68ecc,4 +np.float32,0x8013d6e2,0x8013d6e2,4 +np.float32,0x3f3a2cb6,0x3f63d4ee,4 +np.float32,0xff383c84,0x3e7854bb,4 +np.float32,0x3f21946e,0x3f3b1cea,4 +np.float32,0xff322ea2,0x3fb22f31,4 +np.float32,0x8065a024,0x8065a024,4 +np.float32,0x7f395e30,0xbefe0de1,4 +np.float32,0x5b52db,0x5b52db,4 +np.float32,0x7f7caea7,0x3dac8ded,4 +np.float32,0xbf0431f8,0xbf1159b2,4 +np.float32,0x7f15b25b,0xc02a3833,4 +np.float32,0x80131abc,0x80131abc,4 +np.float32,0x7e829d81,0xbeb2e93d,4 +np.float32,0x3f2c64d7,0x3f4c3e4d,4 +np.float32,0x7f228d48,0xc1518c74,4 +np.float32,0xfc3c6f40,0xbf00d585,4 +np.float32,0x7f754f0f,0x3e2152f5,4 +np.float32,0xff65d32b,0xbe8bd56c,4 +np.float32,0xfea6b8c0,0x41608655,4 +np.float32,0x3f7d4b05,0x3fc2c96a,4 +np.float32,0x3f463230,0x3f7a54da,4 +np.float32,0x805117bb,0x805117bb,4 +np.float32,0xbf2ad4f7,0xbf49b30e,4 +np.float32,0x3eaa01ff,0x3eb08b56,4 +np.float32,0xff7a02bb,0x3f095f73,4 +np.float32,0x759176,0x759176,4 +np.float32,0x803c18d5,0x803c18d5,4 +np.float32,0xbe0722d8,0xbe07ed16,4 +np.float32,0x3f4b4a99,0x3f823fc6,4 +np.float32,0x3f7d0451,0x3fc25463,4 +np.float32,0xfee31e40,0xbfb41091,4 +np.float32,0xbf733d2c,0xbfb30cf1,4 +np.float32,0x7ed81015,0x417c380c,4 +np.float32,0x7daafc3e,0xbe2a37ed,4 +np.float32,0x3e44f82b,0x3e476f67,4 +np.float32,0x7c8d99,0x7c8d99,4 +np.float32,0x3f7aec5a,0x3fbee991,4 +np.float32,0xff09fd55,0x3e0709d3,4 +np.float32,0xff4ba4df,0x4173c01f,4 +np.float32,0x3f43d944,0x3f75c7bd,4 +np.float32,0xff6a9106,0x40a10eff,4 +np.float32,0x3bc8341c,0x3bc834bf,4 +np.float32,0x3eea82,0x3eea82,4 +np.float32,0xfea36a3c,0x435729b2,4 +np.float32,0x7dcc1fb0,0x3e330053,4 +np.float32,0x3f616ae6,0x3f9b01ae,4 +np.float32,0x8030963f,0x8030963f,4 +np.float32,0x10d1e2,0x10d1e2,4 +np.float32,0xfeb9a8a6,0x40e6daac,4 +np.float32,0xbe1aba00,0xbe1bea3a,4 +np.float32,0x3cb6b4ea,0x3cb6bcac,4 +np.float32,0x3d8b0b64,0x3d8b422f,4 +np.float32,0x7b6894,0x7b6894,4 +np.float32,0x3e89dcde,0x3e8d4b4b,4 +np.float32,0x3f12b952,0x3f253974,4 +np.float32,0x1c316c,0x1c316c,4 +np.float32,0x7e2da535,0x3f95fe6b,4 +np.float32,0x3ae9a494,0x3ae9a4a4,4 +np.float32,0xbc5f5500,0xbc5f588b,4 +np.float32,0x3e7850fc,0x3e7d4d0e,4 +np.float32,0xbf800000,0xbfc75923,4 +np.float32,0x3e652d69,0x3e691502,4 +np.float32,0xbf6bdd26,0xbfa89129,4 +np.float32,0x3f441cfc,0x3f764a02,4 +np.float32,0x7f5445ff,0xc0906191,4 +np.float32,0x807b2ee3,0x807b2ee3,4 +np.float32,0xbeb6cab8,0xbebef9c0,4 +np.float32,0xff737277,0xbf327011,4 +np.float32,0xfc832aa0,0x402fd52e,4 +np.float32,0xbf0c7538,0xbf1c7c0f,4 +np.float32,0x7e1301c7,0xbf0ee63e,4 +np.float64,0xbfe0ef7df7a1defc,0xbfe2b76a8d8aeb35,4 +np.float64,0x7fdd9c2eae3b385c,0xbfc00d6885485039,4 +np.float64,0xbfb484c710290990,0xbfb4900e0a527555,4 +np.float64,0x7fe73e5d6cee7cba,0x3fefbf70a56b60d3,4 +np.float64,0x800a110aa8d42216,0x800a110aa8d42216,4 +np.float64,0xffedd4f3f3bba9e7,0xbff076f8c4124919,4 +np.float64,0x800093407f812682,0x800093407f812682,4 +np.float64,0x800a23150e54462a,0x800a23150e54462a,4 +np.float64,0xbfb1076864220ed0,0xbfb10dd95a74b733,4 +np.float64,0x3fed1f8b37fa3f16,0x3ff496100985211f,4 +np.float64,0x3fdf762f84beec5f,0x3fe1223eb04a17e0,4 +np.float64,0x53fd4e0aa7faa,0x53fd4e0aa7faa,4 +np.float64,0x3fdbd283bdb7a507,0x3fddb7ec9856a546,4 +np.float64,0xbfe43f449d687e89,0xbfe77724a0d3072b,4 +np.float64,0x618b73bcc316f,0x618b73bcc316f,4 +np.float64,0x67759424ceeb3,0x67759424ceeb3,4 +np.float64,0xbfe4b6f7d9a96df0,0xbfe831371f3bd7a8,4 +np.float64,0x800a531b8b74a637,0x800a531b8b74a637,4 +np.float64,0xffeeffd5c37dffab,0x3fea140cbc2c3726,4 +np.float64,0x3fe648e2002c91c4,0x3feac1b8816f972a,4 +np.float64,0x800f16242a1e2c48,0x800f16242a1e2c48,4 +np.float64,0xffeeff8e1dbdff1b,0xc000b555f117dce7,4 +np.float64,0x3fdf1cf73fbe39f0,0x3fe0e9032401135b,4 +np.float64,0x7fe19c388b633870,0x3fd5271b69317d5b,4 +np.float64,0x918f226d231e5,0x918f226d231e5,4 +np.float64,0x4cc19ab499834,0x4cc19ab499834,4 +np.float64,0xbd3121d57a624,0xbd3121d57a624,4 +np.float64,0xbfd145d334a28ba6,0xbfd1b468866124d6,4 +np.float64,0x8bdbf41517b7f,0x8bdbf41517b7f,4 +np.float64,0x3fd1b8cb3ea37198,0x3fd2306b13396cae,4 +np.float64,0xbfd632a959ac6552,0xbfd7220fcfb5ef78,4 +np.float64,0x1cdaafc639b57,0x1cdaafc639b57,4 +np.float64,0x3febdcce1577b99c,0x3ff2fe076195a2bc,4 +np.float64,0x7fca6e945934dd28,0x3ff43040df7024e8,4 +np.float64,0x3fbe08e78e3c11cf,0x3fbe2c60e6b48f75,4 +np.float64,0x7fc1ed0d0523da19,0x3ff55f8dcad9440f,4 +np.float64,0xbfdc729b8cb8e538,0xbfde7b6e15dd60c4,4 +np.float64,0x3fd219404f243281,0x3fd298d7b3546531,4 +np.float64,0x3fe715c3f56e2b88,0x3fec255b5a59456e,4 +np.float64,0x7fe8b88e74b1711c,0x3ff60efd2c81d13d,4 +np.float64,0xa1d2b9fd43a57,0xa1d2b9fd43a57,4 +np.float64,0xffc1818223230304,0xbfb85c6c1e8018e7,4 +np.float64,0x3fde38ac8b3c7159,0x3fe0580c7e228576,4 +np.float64,0x8008faf7b491f5f0,0x8008faf7b491f5f0,4 +np.float64,0xffe7a1d751af43ae,0xbf7114cd7bbcd981,4 +np.float64,0xffec2db1b4b85b62,0xbff5cae759667f83,4 +np.float64,0x7fefce1ae27f9c35,0x3ff4b8b88f4876cf,4 +np.float64,0x7fd1ff56a523feac,0xbff342ce192f14dd,4 +np.float64,0x80026b3e3f84d67d,0x80026b3e3f84d67d,4 +np.float64,0xffedee5879bbdcb0,0xc02fae11508b2be0,4 +np.float64,0x8003c0dc822781ba,0x8003c0dc822781ba,4 +np.float64,0xffe38a79eca714f4,0xc008aa23b7a63980,4 +np.float64,0xbfda70411eb4e082,0xbfdc0d7e29c89010,4 +np.float64,0x800a5e34f574bc6a,0x800a5e34f574bc6a,4 +np.float64,0x3fc19fac6e233f59,0x3fc1bc66ac0d73d4,4 +np.float64,0x3a8a61ea7514d,0x3a8a61ea7514d,4 +np.float64,0x3fb57b536e2af6a0,0x3fb588451f72f44c,4 +np.float64,0x7fd68c6d082d18d9,0xc032ac926b665c9a,4 +np.float64,0xd5b87cfdab710,0xd5b87cfdab710,4 +np.float64,0xfe80b20bfd017,0xfe80b20bfd017,4 +np.float64,0x3fef8781e37f0f04,0x3ff8215fe2c1315a,4 +np.float64,0xffedddbb9c3bbb76,0x3fd959b82258a32a,4 +np.float64,0x3fc7d41f382fa83e,0x3fc81b94c3a091ba,4 +np.float64,0xffc3275dcf264ebc,0x3fb2b3d4985c6078,4 +np.float64,0x7fe34d2b7ba69a56,0x40001f3618e3c7c9,4 +np.float64,0x3fd64ae35fac95c7,0x3fd73d77e0b730f8,4 +np.float64,0x800e53bf6b3ca77f,0x800e53bf6b3ca77f,4 +np.float64,0xbfddf7c9083bef92,0xbfe02f392744d2d1,4 +np.float64,0x1c237cc038471,0x1c237cc038471,4 +np.float64,0x3fe4172beea82e58,0x3fe739b4bf16bc7e,4 +np.float64,0xfa950523f52a1,0xfa950523f52a1,4 +np.float64,0xffc839a2c5307344,0xbff70ff8a3c9247f,4 +np.float64,0x264f828c4c9f1,0x264f828c4c9f1,4 +np.float64,0x148a650a2914e,0x148a650a2914e,4 +np.float64,0x3fe8d255c0b1a4ac,0x3fef623c3ea8d6e3,4 +np.float64,0x800f4fbb28be9f76,0x800f4fbb28be9f76,4 +np.float64,0x7fdca57bcfb94af7,0x3ff51207563fb6cb,4 +np.float64,0x3fe4944107692882,0x3fe7fad593235364,4 +np.float64,0x800119b4f1a2336b,0x800119b4f1a2336b,4 +np.float64,0xbfe734075e6e680e,0xbfec5b35381069f2,4 +np.float64,0xffeb3c00db767801,0xbfbbd7d22df7b4b3,4 +np.float64,0xbfe95c658cb2b8cb,0xbff03ad5e0bc888a,4 +np.float64,0xffeefeb58fbdfd6a,0xbfd5c9264deb0e11,4 +np.float64,0x7fccc80fde39901f,0xc012c60f914f3ca2,4 +np.float64,0x3fe5da289c2bb451,0x3fea07ad00a0ca63,4 +np.float64,0x800e364b0a5c6c96,0x800e364b0a5c6c96,4 +np.float64,0x3fcf9ea7d23f3d50,0x3fd023b72e8c9dcf,4 +np.float64,0x800a475cfc948eba,0x800a475cfc948eba,4 +np.float64,0xffd4e0d757a9c1ae,0xbfa89d573352e011,4 +np.float64,0xbfd4dbec8229b7da,0xbfd5a165f12c7c40,4 +np.float64,0xffe307ab51260f56,0x3fe6b1639da58c3f,4 +np.float64,0xbfe6955a546d2ab4,0xbfeb44ae2183fee9,4 +np.float64,0xbfca1f18f5343e30,0xbfca7d804ccccdf4,4 +np.float64,0xe9f4dfebd3e9c,0xe9f4dfebd3e9c,4 +np.float64,0xfff0000000000000,0xfff8000000000000,4 +np.float64,0x8008e69c0fb1cd38,0x8008e69c0fb1cd38,4 +np.float64,0xbfead1ccf975a39a,0xbff1c84b3db8ca93,4 +np.float64,0x25a982424b531,0x25a982424b531,4 +np.float64,0x8010000000000000,0x8010000000000000,4 +np.float64,0x80056204ea0ac40b,0x80056204ea0ac40b,4 +np.float64,0x800d1442d07a2886,0x800d1442d07a2886,4 +np.float64,0xbfaef3dadc3de7b0,0xbfaefd85ae6205f0,4 +np.float64,0x7fe969ce4b32d39c,0xbff3c4364fc6778f,4 +np.float64,0x7fe418bac0a83175,0x402167d16b1efe0b,4 +np.float64,0x3fd7c82a25af9054,0x3fd8f0c701315672,4 +np.float64,0x80013782a7826f06,0x80013782a7826f06,4 +np.float64,0x7fc031c7ee20638f,0x400747ab705e6904,4 +np.float64,0x3fe8cf327ff19e65,0x3fef5c14f8aafa89,4 +np.float64,0xbfe331a416a66348,0xbfe5e2290a098dd4,4 +np.float64,0x800607b2116c0f65,0x800607b2116c0f65,4 +np.float64,0x7fb40448f0280891,0xbfd43d4f0ffa1d64,4 +np.float64,0x7fefffffffffffff,0xbf74530cfe729484,4 +np.float64,0x3fe39b5444a736a9,0x3fe67eaa0b6acf27,4 +np.float64,0x3fee4733c4fc8e68,0x3ff631eabeef9696,4 +np.float64,0xbfec840f3b79081e,0xbff3cc8563ab2e74,4 +np.float64,0xbfc8f6854c31ed0c,0xbfc948caacb3bba0,4 +np.float64,0xffbcf754a639eea8,0xbfc88d17cad3992b,4 +np.float64,0x8000bd3163417a64,0x8000bd3163417a64,4 +np.float64,0x3fe766d0eaeecda2,0x3fecb660882f7024,4 +np.float64,0xb6cc30156d986,0xb6cc30156d986,4 +np.float64,0xffc0161f9f202c40,0x3fe19bdefe5cf8b1,4 +np.float64,0xffe1e462caa3c8c5,0x3fe392c47feea17b,4 +np.float64,0x30a36a566146e,0x30a36a566146e,4 +np.float64,0x3fa996f580332deb,0x3fa99c6b4f2abebe,4 +np.float64,0x3fba71716e34e2e0,0x3fba899f35edba1d,4 +np.float64,0xbfe8f7e5e971efcc,0xbfefac431a0e3d55,4 +np.float64,0xf48f1803e91e3,0xf48f1803e91e3,4 +np.float64,0x7fe3edc0a127db80,0xc03d1a579a5d74a8,4 +np.float64,0xffeba82056375040,0x3fdfd701308700db,4 +np.float64,0xbfeb5a924cf6b524,0xbff2640de7cd107f,4 +np.float64,0xfa4cd1a9f499a,0xfa4cd1a9f499a,4 +np.float64,0x800de1be7b9bc37d,0x800de1be7b9bc37d,4 +np.float64,0xffd44e56ad289cae,0x3fdf4b8085db9b67,4 +np.float64,0xbfe4fb3aea69f676,0xbfe89d2cc46fcc50,4 +np.float64,0xbfe596495d6b2c92,0xbfe997a589a1f632,4 +np.float64,0x6f55a2b8deab5,0x6f55a2b8deab5,4 +np.float64,0x7fe72dc4712e5b88,0x4039c4586b28c2bc,4 +np.float64,0x89348bd712692,0x89348bd712692,4 +np.float64,0xffe062156120c42a,0x4005f0580973bc77,4 +np.float64,0xbfeabc714d7578e2,0xbff1b07e2fa57dc0,4 +np.float64,0x8003a56b3e874ad7,0x8003a56b3e874ad7,4 +np.float64,0x800eeadfb85dd5c0,0x800eeadfb85dd5c0,4 +np.float64,0x46d77a4c8daf0,0x46d77a4c8daf0,4 +np.float64,0x8000c06e7dc180de,0x8000c06e7dc180de,4 +np.float64,0x3fe428d211e851a4,0x3fe754b1c00a89bc,4 +np.float64,0xc5be11818b7c2,0xc5be11818b7c2,4 +np.float64,0x7fefc244893f8488,0x401133dc54f52de5,4 +np.float64,0x3fde30eee93c61de,0x3fe0532b827543a6,4 +np.float64,0xbfd447f48b288fea,0xbfd4fd0654f90718,4 +np.float64,0xbfde98dc7b3d31b8,0xbfe094df12f84a06,4 +np.float64,0x3fed2c1a1dfa5834,0x3ff4a6c4f3470a65,4 +np.float64,0xbfe992165073242d,0xbff071ab039c9177,4 +np.float64,0x3fd0145d1b2028ba,0x3fd06d3867b703dc,4 +np.float64,0x3fe179457362f28b,0x3fe3722f1d045fda,4 +np.float64,0x800e28964fbc512d,0x800e28964fbc512d,4 +np.float64,0x8004a5d785294bb0,0x8004a5d785294bb0,4 +np.float64,0xbfd652f2272ca5e4,0xbfd7469713125120,4 +np.float64,0x7fe61f49036c3e91,0xbf9b6ccdf2d87e70,4 +np.float64,0xffb7d47dd02fa8f8,0xc004449a82320b13,4 +np.float64,0x3feb82f996b705f3,0x3ff29336c738a4c5,4 +np.float64,0x3fbb7fceea36ffa0,0x3fbb9b02c8ad7f93,4 +np.float64,0x80004519fb208a35,0x80004519fb208a35,4 +np.float64,0xbfe0539114e0a722,0xbfe1e86dc5aa039c,4 +np.float64,0x0,0x0,4 +np.float64,0xbfe99d1125f33a22,0xbff07cf8ec04300f,4 +np.float64,0xffd4fbeecc29f7de,0x3ffab76775a8455f,4 +np.float64,0xbfbf1c618e3e38c0,0xbfbf43d2764a8333,4 +np.float64,0x800cae02a9d95c06,0x800cae02a9d95c06,4 +np.float64,0x3febc47d3bf788fa,0x3ff2e0d7cf8ef509,4 +np.float64,0x3fef838f767f071f,0x3ff81aeac309bca0,4 +np.float64,0xbfd5e70716abce0e,0xbfd6ccb033ef7a35,4 +np.float64,0x3f9116fa60222df5,0x3f9117625f008e0b,4 +np.float64,0xffe02b1e5f20563c,0xbfe6b2ec293520b7,4 +np.float64,0xbf9b5aec3036b5e0,0xbf9b5c96c4c7f951,4 +np.float64,0xfdb0169bfb603,0xfdb0169bfb603,4 +np.float64,0x7fcdd1d51c3ba3a9,0x401f0e12fa0b7570,4 +np.float64,0xbfd088103fa11020,0xbfd0e8c4a333ffb2,4 +np.float64,0x3fe22df82ee45bf0,0x3fe46d03a7c14de2,4 +np.float64,0xbfd57b0c28aaf618,0xbfd65349a6191de5,4 +np.float64,0x3fe0a42f50a1485f,0x3fe252e26775d9a4,4 +np.float64,0x800fab4e363f569c,0x800fab4e363f569c,4 +np.float64,0xffe9f0ed63f3e1da,0xbfe278c341b171d5,4 +np.float64,0x7fe26c244664d848,0xbfb325269dad1996,4 +np.float64,0xffe830410bf06081,0xc00181a39f606e96,4 +np.float64,0x800c548a0c78a914,0x800c548a0c78a914,4 +np.float64,0x800f94761ebf28ec,0x800f94761ebf28ec,4 +np.float64,0x3fe5984845eb3091,0x3fe99aeb653c666d,4 +np.float64,0x7fe93e5bf8f27cb7,0xc010d159fa27396a,4 +np.float64,0xffefffffffffffff,0x3f74530cfe729484,4 +np.float64,0x4c83f1269907f,0x4c83f1269907f,4 +np.float64,0x3fde0065a8bc00cc,0x3fe034a1cdf026d4,4 +np.float64,0x800743810d6e8703,0x800743810d6e8703,4 +np.float64,0x80040662d5280cc6,0x80040662d5280cc6,4 +np.float64,0x3fed20b2c5ba4166,0x3ff497988519d7aa,4 +np.float64,0xffe8fa15e5f1f42b,0x3fff82ca76d797b4,4 +np.float64,0xbb72e22f76e5d,0xbb72e22f76e5d,4 +np.float64,0x7fc18ffa7c231ff4,0xbff4b8b4c3315026,4 +np.float64,0xbfe8d1ac44f1a358,0xbfef60efc4f821e3,4 +np.float64,0x3fd38c1fe8271840,0x3fd42dc37ff7262b,4 +np.float64,0xe577bee5caef8,0xe577bee5caef8,4 +np.float64,0xbff0000000000000,0xbff8eb245cbee3a6,4 +np.float64,0xffcb3a9dd436753c,0x3fcd1a3aff1c3fc7,4 +np.float64,0x7fe44bf2172897e3,0x3ff60bfe82a379f4,4 +np.float64,0x8009203823924071,0x8009203823924071,4 +np.float64,0x7fef8e0abc7f1c14,0x3fe90e4962d47ce5,4 +np.float64,0xffda50004434a000,0x3fb50dee03e1418b,4 +np.float64,0x7fe2ff276ea5fe4e,0xc0355b7d2a0a8d9d,4 +np.float64,0x3fd0711ba5a0e238,0x3fd0d03823d2d259,4 +np.float64,0xe7625b03cec4c,0xe7625b03cec4c,4 +np.float64,0xbfd492c8d7a92592,0xbfd55006cde8d300,4 +np.float64,0x8001fee99f23fdd4,0x8001fee99f23fdd4,4 +np.float64,0x7ff4000000000000,0x7ffc000000000000,4 +np.float64,0xfa15df97f42bc,0xfa15df97f42bc,4 +np.float64,0xbfec3fdca9787fb9,0xbff377164b13c7a9,4 +np.float64,0xbcec10e579d82,0xbcec10e579d82,4 +np.float64,0xbfc3b4e2132769c4,0xbfc3dd1fcc7150a6,4 +np.float64,0x80045b149ee8b62a,0x80045b149ee8b62a,4 +np.float64,0xffe044554c2088aa,0xbff741436d558785,4 +np.float64,0xffcc65f09f38cbe0,0xc0172b4adc2d317d,4 +np.float64,0xf68b2d3bed166,0xf68b2d3bed166,4 +np.float64,0x7fc7f44c572fe898,0x3fec69f3b1eca790,4 +np.float64,0x3fac51f61438a3ec,0x3fac595d34156002,4 +np.float64,0xbfeaa9f256f553e5,0xbff19bfdf5984326,4 +np.float64,0x800e4742149c8e84,0x800e4742149c8e84,4 +np.float64,0xbfc493df132927c0,0xbfc4c1ba4268ead9,4 +np.float64,0xbfbf0c56383e18b0,0xbfbf3389fcf50c72,4 +np.float64,0xbf978a0e082f1420,0xbf978b1dd1da3d3c,4 +np.float64,0xbfe04375356086ea,0xbfe1d34c57314dd1,4 +np.float64,0x3feaeeb29b75dd65,0x3ff1e8b772374979,4 +np.float64,0xbfe15e42c3a2bc86,0xbfe34d45d56c5c15,4 +np.float64,0x3fe507429a6a0e85,0x3fe8b058176b3225,4 +np.float64,0x3feee2b26c3dc565,0x3ff71b73203de921,4 +np.float64,0xbfd496577aa92cae,0xbfd553fa7fe15a5f,4 +np.float64,0x7fe2c10953e58212,0x3fc8ead6a0d14bbf,4 +np.float64,0x800035b77aa06b70,0x800035b77aa06b70,4 +np.float64,0x2329201e46525,0x2329201e46525,4 +np.float64,0xbfe6225c9a6c44b9,0xbfea80861590fa02,4 +np.float64,0xbfd6925030ad24a0,0xbfd78e70b1c2215d,4 +np.float64,0xbfd82225c4b0444c,0xbfd958a60f845b39,4 +np.float64,0xbb03d8a17609,0xbb03d8a17609,4 +np.float64,0x7fc33967b12672ce,0x40001e00c9af4002,4 +np.float64,0xff9373c6d026e780,0xbff308654a459d3d,4 +np.float64,0x3feab1f9c5f563f4,0x3ff1a4e0fd2f093d,4 +np.float64,0xbf993ef768327de0,0xbf994046b64e308b,4 +np.float64,0xffb87382fc30e708,0xbfde0accb83c891b,4 +np.float64,0x800bb3a118176743,0x800bb3a118176743,4 +np.float64,0x800c810250d90205,0x800c810250d90205,4 +np.float64,0xbfd2c4eb9ba589d8,0xbfd3539508b4a4a8,4 +np.float64,0xbee1f5437dc3f,0xbee1f5437dc3f,4 +np.float64,0x3fc07aeab520f5d8,0x3fc0926272f9d8e2,4 +np.float64,0xbfe23747a3246e90,0xbfe47a20a6e98687,4 +np.float64,0x3fde1296debc252c,0x3fe0401143ff6b5c,4 +np.float64,0xbfcec8c2f73d9184,0xbfcf644e25ed3b74,4 +np.float64,0xff9314f2c82629e0,0x40559a0f9099dfd1,4 +np.float64,0xbfe27487afa4e910,0xbfe4d0e01200bde6,4 +np.float64,0xffb3d6637627acc8,0x3fe326d4b1e1834f,4 +np.float64,0xffe6f84d642df09a,0x3fc73fa9f57c3acb,4 +np.float64,0xffe67cf76fecf9ee,0xc01cf48c97937ef9,4 +np.float64,0x7fdc73fc12b8e7f7,0xbfcfcecde9331104,4 +np.float64,0xffdcf8789239f0f2,0x3fe345e3b8e28776,4 +np.float64,0x800a70af5314e15f,0x800a70af5314e15f,4 +np.float64,0xffc862300730c460,0x3fc4e9ea813beca7,4 +np.float64,0xbfcc6961bd38d2c4,0xbfcce33bfa6c6bd1,4 +np.float64,0xbfc9b76bbf336ed8,0xbfca117456ac37e5,4 +np.float64,0x7fb86e829430dd04,0x400a5bd7a18e302d,4 +np.float64,0x7fb9813ef833027d,0xbfe5a6494f143625,4 +np.float64,0x8005085e2c2a10bd,0x8005085e2c2a10bd,4 +np.float64,0xffe5af099d6b5e12,0x40369bbe31e03e06,4 +np.float64,0xffde03b1fd3c0764,0x3ff061120aa1f52a,4 +np.float64,0x7fa4eb6cdc29d6d9,0x3fe9defbe9010322,4 +np.float64,0x800803f4b11007ea,0x800803f4b11007ea,4 +np.float64,0x7febd50f6df7aa1e,0xbffcf540ccf220dd,4 +np.float64,0x7fed454f08fa8a9d,0xbffc2a8b81079403,4 +np.float64,0xbfed7e8c69bafd19,0xbff5161e51ba6634,4 +np.float64,0xffef92e78eff25ce,0xbffefeecddae0ad3,4 +np.float64,0x7fe5b9b413ab7367,0xbfc681ba29704176,4 +np.float64,0x29284e805252,0x29284e805252,4 +np.float64,0xffed3955bcfa72ab,0xbfc695acb5f468de,4 +np.float64,0x3fe464ee1ca8c9dc,0x3fe7b140ce50fdca,4 +np.float64,0xffe522ae4bea455c,0x3feb957c146e66ef,4 +np.float64,0x8000000000000000,0x8000000000000000,4 +np.float64,0x3fd0c353a2a186a8,0x3fd1283aaa43a411,4 +np.float64,0x3fdb30a749b6614f,0x3fdcf40df006ed10,4 +np.float64,0x800109213cc21243,0x800109213cc21243,4 +np.float64,0xbfe72aa0c5ee5542,0xbfec4a713f513bc5,4 +np.float64,0x800865344ad0ca69,0x800865344ad0ca69,4 +np.float64,0x7feb7df60eb6fbeb,0x3fb1df06a67aa22f,4 +np.float64,0x3fe83a5dd93074bc,0x3fee3d63cda72636,4 +np.float64,0xbfde70e548bce1ca,0xbfe07b8e19c9dac6,4 +np.float64,0xbfeea38d537d471b,0xbff6bb18c230c0be,4 +np.float64,0x3fefeebbc47fdd78,0x3ff8cdaa53b7c7b4,4 +np.float64,0x7fe6512e20eca25b,0xbff623cee44a22b5,4 +np.float64,0xf8fa5ca3f1f4c,0xf8fa5ca3f1f4c,4 +np.float64,0x7fd12d00ed225a01,0xbfe90d518ea61faf,4 +np.float64,0x80027db43504fb69,0x80027db43504fb69,4 +np.float64,0xffc10a01aa221404,0x3fcc2065b3d0157b,4 +np.float64,0xbfef8286e87f050e,0xbff8193a54449b59,4 +np.float64,0xbfc73178092e62f0,0xbfc7735072ba4593,4 +np.float64,0x3fc859d70630b3ae,0x3fc8a626522af1c0,4 +np.float64,0x3fe4654c4268ca99,0x3fe7b1d2913eda1a,4 +np.float64,0xbfce93cd843d279c,0xbfcf2c2ef16a0957,4 +np.float64,0xffbcaa16d4395430,0xbfd511ced032d784,4 +np.float64,0xbfe91f980e723f30,0xbfeffb39cf8c7746,4 +np.float64,0x800556fb6f0aadf8,0x800556fb6f0aadf8,4 +np.float64,0xffd009cde520139c,0x3fe4fa83b1e93d28,4 +np.float64,0x7febc0675e3780ce,0x3feb53930c004dae,4 +np.float64,0xbfe7f975bdeff2ec,0xbfedc36e6729b010,4 +np.float64,0x45aff57c8b5ff,0x45aff57c8b5ff,4 +np.float64,0xbfec7ebd0138fd7a,0xbff3c5cab680aae0,4 +np.float64,0x8009448003b28900,0x8009448003b28900,4 +np.float64,0x3fca4b992d349732,0x3fcaabebcc86aa9c,4 +np.float64,0x3fca069161340d20,0x3fca63ecc742ff3a,4 +np.float64,0x80063bc80bec7791,0x80063bc80bec7791,4 +np.float64,0xbfe1764bffe2ec98,0xbfe36e1cb30cec94,4 +np.float64,0xffd0dba72f21b74e,0x3fb1834964d57ef6,4 +np.float64,0xbfe31848fc263092,0xbfe5bd066445cbc3,4 +np.float64,0xbfd1fb227323f644,0xbfd278334e27f02d,4 +np.float64,0xffdc59069fb8b20e,0xbfdfc363f559ea2c,4 +np.float64,0x3fdea52a52bd4a55,0x3fe09cada4e5344c,4 +np.float64,0x3f715e55a022bd00,0x3f715e5c72a2809e,4 +np.float64,0x1d1ac6023a35a,0x1d1ac6023a35a,4 +np.float64,0x7feacc71627598e2,0x400486b82121da19,4 +np.float64,0xa0287fa340510,0xa0287fa340510,4 +np.float64,0xffe352c5abe6a58b,0xc002623346060543,4 +np.float64,0x7fed577a23baaef3,0x3fda19bc8fa3b21f,4 +np.float64,0x3fde8dd5263d1baa,0x3fe08de0fedf7029,4 +np.float64,0x3feddd3be2bbba78,0x3ff599b2f3e018cc,4 +np.float64,0xc7a009f58f401,0xc7a009f58f401,4 +np.float64,0xbfef03d5a4fe07ab,0xbff74ee08681f47b,4 +np.float64,0x7fe2cf60eea59ec1,0x3fe905fb44f8cc60,4 +np.float64,0xbfe498fcab6931fa,0xbfe8023a6ff8becf,4 +np.float64,0xbfef7142acfee285,0xbff7fd196133a595,4 +np.float64,0xd214ffdba42a0,0xd214ffdba42a0,4 +np.float64,0x8006de7d78cdbcfc,0x8006de7d78cdbcfc,4 +np.float64,0xb247d34f648fb,0xb247d34f648fb,4 +np.float64,0xbfdd5bece6bab7da,0xbfdf9ba63ca2c5b2,4 +np.float64,0x7fe874650af0e8c9,0x3fe74204e122c10f,4 +np.float64,0x800768c49baed18a,0x800768c49baed18a,4 +np.float64,0x3fb4c0a192298140,0x3fb4cc4c8aa43300,4 +np.float64,0xbfa740531c2e80a0,0xbfa7446b7c74ae8e,4 +np.float64,0x7fe10d6edf221add,0x3fedbcd2eae26657,4 +np.float64,0xbfe9175d0f722eba,0xbfefeaca7f32c6e3,4 +np.float64,0x953e11d32a7c2,0x953e11d32a7c2,4 +np.float64,0x80032df90c465bf3,0x80032df90c465bf3,4 +np.float64,0xffec5b799638b6f2,0xbfe95cd2c69be12c,4 +np.float64,0xffe0c3cfa9a1879f,0x3fe20b99b0c108ce,4 +np.float64,0x3fb610d8e22c21b2,0x3fb61ee0d6c16df8,4 +np.float64,0xffe16bb39962d766,0xc016d370381b6b42,4 +np.float64,0xbfdc72edb238e5dc,0xbfde7bd2de10717a,4 +np.float64,0xffed52dee3baa5bd,0xc01994c08899129a,4 +np.float64,0xffa92aab08325550,0xbff2b881ce363cbd,4 +np.float64,0x7fe028282de0504f,0xc0157ff96c69a9c7,4 +np.float64,0xbfdb2151bf3642a4,0xbfdce196fcc35857,4 +np.float64,0x3fcffbd13c3ff7a2,0x3fd0554b5f0371ac,4 +np.float64,0x800d206bff1a40d8,0x800d206bff1a40d8,4 +np.float64,0x458f818c8b1f1,0x458f818c8b1f1,4 +np.float64,0x800a7b56a234f6ae,0x800a7b56a234f6ae,4 +np.float64,0xffe3d86161e7b0c2,0xbff58d0dbde9f188,4 +np.float64,0xe8ed82e3d1db1,0xe8ed82e3d1db1,4 +np.float64,0x3fe234e0176469c0,0x3fe476bd36b96a75,4 +np.float64,0xbfc7cb9c132f9738,0xbfc812c46e185e0b,4 +np.float64,0xbfeba116c1f7422e,0xbff2b6b7563ad854,4 +np.float64,0x7fe7041de62e083b,0x3f5d2b42aca47274,4 +np.float64,0xbfcf60f4ff3ec1e8,0xbfd002eb83406436,4 +np.float64,0xbfc06067a520c0d0,0xbfc0776e5839ecda,4 +np.float64,0x4384965a87093,0x4384965a87093,4 +np.float64,0xd2ed9d01a5db4,0xd2ed9d01a5db4,4 +np.float64,0x3fbea88cb63d5119,0x3fbece49cc34a379,4 +np.float64,0x3fe7e982ebefd306,0x3feda5bd4c435d43,4 +np.float64,0xffdb60a3e036c148,0xbfcb7ed21e7a8f49,4 +np.float64,0x7fdba9231eb75245,0xbfd750cab1536398,4 +np.float64,0x800d593534dab26b,0x800d593534dab26b,4 +np.float64,0xffdf15fb683e2bf6,0x3fb3aaea23357f06,4 +np.float64,0xbfd6f8a2e5adf146,0xbfd802e509d67c67,4 +np.float64,0x3feeaa31513d5463,0x3ff6c52147dc053c,4 +np.float64,0xf2f6dfd3e5edc,0xf2f6dfd3e5edc,4 +np.float64,0x7fd58d8279ab1b04,0x403243f23d02af2a,4 +np.float64,0x8000000000000001,0x8000000000000001,4 +np.float64,0x3fdffb8e0ebff71c,0x3fe1786cb0a6b0f3,4 +np.float64,0xc999826b93331,0xc999826b93331,4 +np.float64,0xffc4966f19292ce0,0x3ff0836c75c56cc7,4 +np.float64,0x7fef95a4b2ff2b48,0xbfbbe2c27c78154f,4 +np.float64,0xb8f1307f71e26,0xb8f1307f71e26,4 +np.float64,0x3fe807bc7eb00f79,0x3fedde19f2d3c42d,4 +np.float64,0x5e4b6580bc98,0x5e4b6580bc98,4 +np.float64,0xffe19353576326a6,0xc0278c51fee07d36,4 +np.float64,0xbfb0ca6f3e2194e0,0xbfb0d09be673fa72,4 +np.float64,0x3fea724211b4e484,0x3ff15ee06f0a0a13,4 +np.float64,0xbfda21e1c4b443c4,0xbfdbb041f3c86832,4 +np.float64,0x8008082b24901057,0x8008082b24901057,4 +np.float64,0xbfd031aa4ea06354,0xbfd08c77729634bb,4 +np.float64,0xbfc407e153280fc4,0xbfc432275711df5f,4 +np.float64,0xbb4fa4b5769f5,0xbb4fa4b5769f5,4 +np.float64,0x7fed6d1daffada3a,0xc037a14bc7b41fab,4 +np.float64,0xffeee589943dcb12,0x3ff2abfe47037778,4 +np.float64,0x301379d260270,0x301379d260270,4 +np.float64,0xbfec2fefc2b85fe0,0xbff36362c0363e06,4 +np.float64,0xbfe0b1c82e216390,0xbfe264f503f7c22c,4 +np.float64,0xbfea2bce78f4579d,0xbff112d6f07935ea,4 +np.float64,0x18508ef230a13,0x18508ef230a13,4 +np.float64,0x800667a74d6ccf4f,0x800667a74d6ccf4f,4 +np.float64,0x79ce5c8cf39cc,0x79ce5c8cf39cc,4 +np.float64,0x3feda61c8efb4c39,0x3ff54c9ade076f54,4 +np.float64,0x3fe27e06b0e4fc0d,0x3fe4de665c1dc3ca,4 +np.float64,0xbfd15fea2722bfd4,0xbfd1d081c55813b0,4 +np.float64,0xbfe5222c4cea4458,0xbfe8db62deb7d2ad,4 +np.float64,0xbfe8a16c33b142d8,0xbfef02d5831592a8,4 +np.float64,0x3fdb60e7c4b6c1d0,0x3fdd2e4265c4c3b6,4 +np.float64,0x800076d62b60edad,0x800076d62b60edad,4 +np.float64,0xbfec8f1527791e2a,0xbff3da7ed3641e8d,4 +np.float64,0x2af03bfe55e08,0x2af03bfe55e08,4 +np.float64,0xa862ee0950c5e,0xa862ee0950c5e,4 +np.float64,0x7fea5a7c1eb4b4f7,0xbffa6f07d28ef211,4 +np.float64,0x90e118fb21c23,0x90e118fb21c23,4 +np.float64,0xbfead0721bf5a0e4,0xbff1c6c7a771a128,4 +np.float64,0x3f63f4a4c027e94a,0x3f63f4a75665da67,4 +np.float64,0x3fece0efa579c1e0,0x3ff443bec52f021e,4 +np.float64,0xbfdbe743b737ce88,0xbfddd129bff89c15,4 +np.float64,0x3fd48c9b8fa91938,0x3fd5492a630a8cb5,4 +np.float64,0x3ff0000000000000,0x3ff8eb245cbee3a6,4 +np.float64,0xbfd51ea33baa3d46,0xbfd5ebd5dc710204,4 +np.float64,0x3fcfbab0183f7560,0x3fd032a054580b00,4 +np.float64,0x8007abce13cf579d,0x8007abce13cf579d,4 +np.float64,0xbfef0f4723be1e8e,0xbff760c7008e8913,4 +np.float64,0x8006340f524c681f,0x8006340f524c681f,4 +np.float64,0x87b7d7010f71,0x87b7d7010f71,4 +np.float64,0x3fe9422da9b2845b,0x3ff02052e6148c45,4 +np.float64,0x7fddd259b93ba4b2,0xc000731aa33d84b6,4 +np.float64,0x3fe0156d12202ada,0x3fe1972ba309cb29,4 +np.float64,0x8004f1264b89e24d,0x8004f1264b89e24d,4 +np.float64,0x3fececdcacb9d9b9,0x3ff4534d5861f731,4 +np.float64,0x3fd1790ab822f215,0x3fd1eb97b1bb6fb4,4 +np.float64,0xffce5d11863cba24,0xbfcb4f38c17210da,4 +np.float64,0x800a30c32a546187,0x800a30c32a546187,4 +np.float64,0x3fa58cc61c2b198c,0x3fa59008add7233e,4 +np.float64,0xbfe0ac77d62158f0,0xbfe25de3dba0bc4a,4 +np.float64,0xeb8c5753d718b,0xeb8c5753d718b,4 +np.float64,0x3fee5438dafca872,0x3ff644fef7e7adb5,4 +np.float64,0x3faad1eb2c35a3e0,0x3faad83499f94057,4 +np.float64,0x3fe39152c46722a6,0x3fe66fba0b96ab6e,4 +np.float64,0xffd6fd17712dfa2e,0xc010d697d1ab8731,4 +np.float64,0x5214a888a4296,0x5214a888a4296,4 +np.float64,0x8000127a5da024f5,0x8000127a5da024f5,4 +np.float64,0x7feb3a366cb6746c,0x3fbe49bd8d5f213a,4 +np.float64,0xca479501948f3,0xca479501948f3,4 +np.float64,0x7fe7c799ce6f8f33,0xbfd796cd98dc620c,4 +np.float64,0xffe20bcf30a4179e,0xbff8ca5453fa088f,4 +np.float64,0x3fe624638a6c48c7,0x3fea83f123832c3c,4 +np.float64,0xbfe5f1377c6be26f,0xbfea2e143a2d522c,4 +np.float64,0x7fd193f9f8a327f3,0xbfb04ee2602574d4,4 +np.float64,0xbfe7419d2fee833a,0xbfec737f140d363d,4 +np.float64,0x1,0x1,4 +np.float64,0x7fe2ac246c655848,0x3fd14fee3237727a,4 +np.float64,0xa459b42948b37,0xa459b42948b37,4 +np.float64,0x3fb26155ae24c2ab,0x3fb2696fc446d4c6,4 +np.float64,0xbfdd7b332e3af666,0xbfdfc296c21f1aa8,4 +np.float64,0xbfe00dbda4a01b7c,0xbfe18d2b060f0506,4 +np.float64,0x8003bb22d3e77646,0x8003bb22d3e77646,4 +np.float64,0x3fee21b0a57c4361,0x3ff5fb6a21dc911c,4 +np.float64,0x80ca69270194d,0x80ca69270194d,4 +np.float64,0xbfd6d80350adb006,0xbfd7ddb501edbde0,4 +np.float64,0xd2f8b801a5f2,0xd2f8b801a5f2,4 +np.float64,0xbfe856b3f170ad68,0xbfee7334fdc49296,4 +np.float64,0x3fed5c1b20bab836,0x3ff4e73ee5d5c7f3,4 +np.float64,0xbfd58085a5ab010c,0xbfd6596ddc381ffa,4 +np.float64,0x3fe4f0134b29e027,0x3fe88b70602fbd21,4 +np.float64,0xffc9098fdc321320,0x4011c334a74a92cf,4 +np.float64,0x794749bef28ea,0x794749bef28ea,4 +np.float64,0xbfc86b547f30d6a8,0xbfc8b84a4fafe0af,4 +np.float64,0x7fe1356b9da26ad6,0x3fd270bca208d899,4 +np.float64,0x7fca0ef1aa341de2,0xbff851044c0734fa,4 +np.float64,0x80064cb8b62c9972,0x80064cb8b62c9972,4 +np.float64,0xffd3a09a83a74136,0x3ffb66dae0accdf5,4 +np.float64,0x800e301aa15c6035,0x800e301aa15c6035,4 +np.float64,0x800e51f323bca3e6,0x800e51f323bca3e6,4 +np.float64,0x7ff0000000000000,0xfff8000000000000,4 +np.float64,0x800c4278c87884f2,0x800c4278c87884f2,4 +np.float64,0xbfe8481649f0902c,0xbfee576772695096,4 +np.float64,0xffe2344e3fa4689c,0x3fb10442ec0888de,4 +np.float64,0xbfeada313d75b462,0xbff1d1aee3fab3a9,4 +np.float64,0x8009ddfb1333bbf7,0x8009ddfb1333bbf7,4 +np.float64,0x7fed3314c93a6629,0x3ff7a9b12dc1cd37,4 +np.float64,0x3fd55c26da2ab84e,0x3fd630a7b8aac78a,4 +np.float64,0x800cdb5203f9b6a4,0x800cdb5203f9b6a4,4 +np.float64,0xffd04a875da0950e,0x4009a13810ab121d,4 +np.float64,0x800f1acb527e3597,0x800f1acb527e3597,4 +np.float64,0xbf9519bf282a3380,0xbf951a82e9b955ff,4 +np.float64,0x3fcd7a42fa3af486,0x3fce028f3c51072d,4 +np.float64,0xbfdd3e21b73a7c44,0xbfdf769f2ff2480b,4 +np.float64,0xffd4361e2aa86c3c,0xbfc211ce8e9f792c,4 +np.float64,0x7fccf97f6939f2fe,0xbff8464bad830f06,4 +np.float64,0x800ce47fb939c900,0x800ce47fb939c900,4 +np.float64,0xffe9e51df173ca3b,0xbfceaf990d652c4e,4 +np.float64,0x3fe05bba5b20b775,0x3fe1f326e4455442,4 +np.float64,0x800a29b4b134536a,0x800a29b4b134536a,4 +np.float64,0xe6f794b7cdef3,0xe6f794b7cdef3,4 +np.float64,0xffb5b688ce2b6d10,0x3ff924bb97ae2f6d,4 +np.float64,0x7fa74105d82e820b,0x3fd49643aaa9eee4,4 +np.float64,0x80020d15f7a41a2d,0x80020d15f7a41a2d,4 +np.float64,0x3fd6a983d5ad5308,0x3fd7a8cc8835b5b8,4 +np.float64,0x7fcd9798f03b2f31,0x3fc534c2f7bf4721,4 +np.float64,0xffdd31873a3a630e,0xbfe3171fcdffb3f7,4 +np.float64,0x80075183234ea307,0x80075183234ea307,4 +np.float64,0x82f3132505e63,0x82f3132505e63,4 +np.float64,0x3febfd9cb837fb39,0x3ff325bbf812515d,4 +np.float64,0xbfb4630fda28c620,0xbfb46e1f802ec278,4 +np.float64,0x3feeed7c89fddafa,0x3ff72c20ce5a9ee4,4 +np.float64,0x7fd3dcb3c127b967,0x40123d27ec9ec31d,4 +np.float64,0xbfe923450c72468a,0xbff00149c5742725,4 +np.float64,0x7fdef7f91abdeff1,0xbfe02ceb21f7923d,4 +np.float64,0x7fdd70d28fbae1a4,0xbfefcc5c9d10cdfd,4 +np.float64,0x800ca445a8d9488c,0x800ca445a8d9488c,4 +np.float64,0x7fec2754e1f84ea9,0x40173f6c1c97f825,4 +np.float64,0x7fcbca31f7379463,0x401e26bd2667075b,4 +np.float64,0x8003fa1d0847f43b,0x8003fa1d0847f43b,4 +np.float64,0xffe95cf85932b9f0,0xc01308e60278aa11,4 +np.float64,0x8009c53948f38a73,0x8009c53948f38a73,4 +np.float64,0x3fdcca9226b99524,0x3fdee7a008f75d41,4 +np.float64,0xbfe9ee241f33dc48,0xbff0d16bfff6c8e9,4 +np.float64,0xbfb3365058266ca0,0xbfb33f9176ebb51d,4 +np.float64,0x7fa98e10f4331c21,0x3fdee04ffd31314e,4 +np.float64,0xbfe1a11aea634236,0xbfe3a8e3d84fda38,4 +np.float64,0xbfd8df051131be0a,0xbfda342805d1948b,4 +np.float64,0x3d49a2407a935,0x3d49a2407a935,4 +np.float64,0xfc51eefff8a3e,0xfc51eefff8a3e,4 +np.float64,0xda63950bb4c73,0xda63950bb4c73,4 +np.float64,0x80050f3d4fea1e7b,0x80050f3d4fea1e7b,4 +np.float64,0x3fcdbd6e453b7ae0,0x3fce497478c28e77,4 +np.float64,0x7ebd4932fd7aa,0x7ebd4932fd7aa,4 +np.float64,0x7fa3904eac27209c,0xc0015f3125efc151,4 +np.float64,0x7fc59f956b2b3f2a,0xc00c012e7a2c281f,4 +np.float64,0xbfd436d716a86dae,0xbfd4ea13533a942b,4 +np.float64,0x9347ae3d268f6,0x9347ae3d268f6,4 +np.float64,0xffd001764d2002ec,0xbffab3462e515623,4 +np.float64,0x3fe6f406662de80d,0x3febe9bac3954999,4 +np.float64,0x3f943ecaf8287d96,0x3f943f77dee5e77f,4 +np.float64,0x3fd6250efcac4a1c,0x3fd712afa947d56f,4 +np.float64,0xbfe849ff777093ff,0xbfee5b089d03391f,4 +np.float64,0xffd3b8ef8f2771e0,0x4000463ff7f29214,4 +np.float64,0xbfc3bae9252775d4,0xbfc3e34c133f1933,4 +np.float64,0xbfea93943df52728,0xbff18355e4fc341d,4 +np.float64,0x3fc4d922ad29b245,0x3fc508d66869ef29,4 +np.float64,0x4329694a8652e,0x4329694a8652e,4 +np.float64,0x8834f1a71069e,0x8834f1a71069e,4 +np.float64,0xe0e5be8dc1cb8,0xe0e5be8dc1cb8,4 +np.float64,0x7fef4d103afe9a1f,0xc0047b88b94554fe,4 +np.float64,0x3fe9b57af4f36af6,0x3ff0963831d51c3f,4 +np.float64,0x3fe081e2fa6103c6,0x3fe22572e41be655,4 +np.float64,0x3fd78cf7b42f19ef,0x3fd8acafa1ad776a,4 +np.float64,0x7fbffd58d43ffab1,0x3fb16092c7de6036,4 +np.float64,0xbfe1e8bfae23d180,0xbfe40c1c6277dd52,4 +np.float64,0x800a9f59fb153eb4,0x800a9f59fb153eb4,4 +np.float64,0xffebe14e33b7c29c,0x3fe0ec532f4deedd,4 +np.float64,0xffc36ca00426d940,0xc000806a712d6e83,4 +np.float64,0xbfcc2be82d3857d0,0xbfcca2a7d372ec64,4 +np.float64,0x800c03b908780772,0x800c03b908780772,4 +np.float64,0xf315a64be62b5,0xf315a64be62b5,4 +np.float64,0xbfe644043cec8808,0xbfeab974d3dc6d80,4 +np.float64,0x3fedb7de3cbb6fbc,0x3ff56549a5acd324,4 +np.float64,0xbfb1a875522350e8,0xbfb1afa41dee338d,4 +np.float64,0xffee8d4a407d1a94,0x3fead1749a636ff6,4 +np.float64,0x8004061c13080c39,0x8004061c13080c39,4 +np.float64,0x3fe650ae7feca15c,0x3feacefb8bc25f64,4 +np.float64,0x3fda8340e6b50682,0x3fdc24275cab1df8,4 +np.float64,0x8009084344321087,0x8009084344321087,4 +np.float64,0x7fdd19cb823a3396,0xbfd1d8fb35d89e3f,4 +np.float64,0xbfe893172571262e,0xbfeee716b592b93c,4 +np.float64,0x8ff5acc11fec,0x8ff5acc11fec,4 +np.float64,0xbfdca0c57cb9418a,0xbfdeb42465a1b59e,4 +np.float64,0xffd77bd2a3aef7a6,0x4012cd69e85b82d8,4 +np.float64,0xbfe6ea78982dd4f1,0xbfebd8ec61fb9e1f,4 +np.float64,0x7fe14b1d80a2963a,0xc02241642102cf71,4 +np.float64,0x3fe712bf286e257e,0x3fec20012329a7fb,4 +np.float64,0x7fcb6fa4d636df49,0x400b899d14a886b3,4 +np.float64,0x3fb82cb39a305960,0x3fb83f29c5f0822e,4 +np.float64,0x7fed694c8b3ad298,0xbfe2724373c69808,4 +np.float64,0xbfcd21229f3a4244,0xbfcda497fc3e1245,4 +np.float64,0x564d3770ac9a8,0x564d3770ac9a8,4 +np.float64,0xf4409e13e8814,0xf4409e13e8814,4 +np.float64,0x80068dca9a8d1b96,0x80068dca9a8d1b96,4 +np.float64,0xbfe13f82afe27f06,0xbfe3236ddded353f,4 +np.float64,0x80023f8114647f03,0x80023f8114647f03,4 +np.float64,0xeafba7dfd5f75,0xeafba7dfd5f75,4 +np.float64,0x3feca74ddeb94e9c,0x3ff3f95dcce5a227,4 +np.float64,0x10000000000000,0x10000000000000,4 +np.float64,0xbfebdb4141f7b682,0xbff2fc29823ac64a,4 +np.float64,0xbfcd75ee2f3aebdc,0xbfcdfdfd87cc6a29,4 +np.float64,0x7fc010cda420219a,0x3fae4ca2cf1f2657,4 +np.float64,0x1a90209e35205,0x1a90209e35205,4 +np.float64,0x8008057d01900afa,0x8008057d01900afa,4 +np.float64,0x3f9cb5f280396be5,0x3f9cb7dfb4e4be4e,4 +np.float64,0xffe1bbb60b63776c,0xc00011b1ffcb2561,4 +np.float64,0xffda883f6fb5107e,0x4044238ef4e2a198,4 +np.float64,0x3fc07c0b4a20f817,0x3fc09387de9eebcf,4 +np.float64,0x8003a9ebc0c753d8,0x8003a9ebc0c753d8,4 +np.float64,0x1d7fd5923affc,0x1d7fd5923affc,4 +np.float64,0xbfe9cd8cf9b39b1a,0xbff0af43e567ba4a,4 +np.float64,0x11285cb42250c,0x11285cb42250c,4 +np.float64,0xffe81ae1ccb035c3,0xbfe038be7eb563a6,4 +np.float64,0xbfe56473b1eac8e8,0xbfe94654d8ab9e75,4 +np.float64,0x3fee904619fd208c,0x3ff69e198152fe17,4 +np.float64,0xbfeeb9a2cbfd7346,0xbff6dc8d96da78cd,4 +np.float64,0x8006cdfa59ed9bf5,0x8006cdfa59ed9bf5,4 +np.float64,0x8008f2366d31e46d,0x8008f2366d31e46d,4 +np.float64,0x8008d5f91e31abf3,0x8008d5f91e31abf3,4 +np.float64,0x3fe85886f8b0b10e,0x3fee76af16f5a126,4 +np.float64,0x3fefb9b2b73f7365,0x3ff8745128fa3e3b,4 +np.float64,0x7fdf3e721f3e7ce3,0xbfb19381541ca2a8,4 +np.float64,0x3fd2768c41a4ed18,0x3fd2fe2f85a3f3a6,4 +np.float64,0xbfcabe3c6a357c78,0xbfcb239fb88bc260,4 +np.float64,0xffdffb6a3dbff6d4,0xbff7af4759fd557c,4 +np.float64,0x800817f75f302fef,0x800817f75f302fef,4 +np.float64,0xbfe6a1d1762d43a3,0xbfeb5a399a095ef3,4 +np.float64,0x7fd6f32f912de65e,0x40016dedc51aabd0,4 +np.float64,0x3fc6cb26652d964d,0x3fc7099f047d924a,4 +np.float64,0x3fe8b975d67172ec,0x3fef31946123c0e7,4 +np.float64,0xffe44a09d1e89413,0x3fdee9e5eac6e540,4 +np.float64,0xbfece76d4cb9cedb,0xbff44c34849d07ba,4 +np.float64,0x7feb76027036ec04,0x3fe08595a5e263ac,4 +np.float64,0xffe194f591a329ea,0x3fbe5bd626400a70,4 +np.float64,0xbfc170698122e0d4,0xbfc18c3de8b63565,4 +np.float64,0x3fc82b2c0f305658,0x3fc875c3b5fbcd08,4 +np.float64,0x3fd5015634aa02ac,0x3fd5cb1df07213c3,4 +np.float64,0x7fe640884b6c8110,0xbff66255a420abb5,4 +np.float64,0x5a245206b448b,0x5a245206b448b,4 +np.float64,0xffe9d9fa2f73b3f4,0xc0272b0dd34ab9bf,4 +np.float64,0x3fd990e8aab321d0,0x3fdb04cd3a29bcc3,4 +np.float64,0xde9dda8bbd3bc,0xde9dda8bbd3bc,4 +np.float64,0xbfe81b32b4703666,0xbfee029937fa9f5a,4 +np.float64,0xbfe68116886d022d,0xbfeb21c62081cb73,4 +np.float64,0x3fb8da191231b432,0x3fb8ee28c71507d3,4 +np.float64,0x3fb111395a222273,0x3fb117b57de3dea4,4 +np.float64,0xffbafadc6a35f5b8,0x3ffcc6d2370297b9,4 +np.float64,0x8002ca475b05948f,0x8002ca475b05948f,4 +np.float64,0xbfeafef57875fdeb,0xbff1fb1315676f24,4 +np.float64,0x7fcda427d73b484f,0xbff9f70212694d17,4 +np.float64,0xffe2517b3ba4a2f6,0xc029ca6707305bf4,4 +np.float64,0x7fc5ee156b2bdc2a,0xbff8384b59e9056e,4 +np.float64,0xbfec22af3278455e,0xbff3530fe25816b4,4 +np.float64,0x6b5a8c2cd6b52,0x6b5a8c2cd6b52,4 +np.float64,0xffdaf6c4b935ed8a,0x4002f00ce58affcf,4 +np.float64,0x800a41813c748303,0x800a41813c748303,4 +np.float64,0xbfd09a1269213424,0xbfd0fc0a0c5de8eb,4 +np.float64,0x7fa2cb74d42596e9,0x3fc3d40e000fa69d,4 +np.float64,0x7ff8000000000000,0x7ff8000000000000,4 +np.float64,0x3fbfbf8ed63f7f1e,0x3fbfe97bcad9f53a,4 +np.float64,0x7fe0ebba65a1d774,0x401b0f17b28618df,4 +np.float64,0x3fd02c3a25a05874,0x3fd086aa55b19c9c,4 +np.float64,0xec628f95d8c52,0xec628f95d8c52,4 +np.float64,0x3fd319329fa63264,0x3fd3afb04e0dec63,4 +np.float64,0x180e0ade301c2,0x180e0ade301c2,4 +np.float64,0xbfe8d78324f1af06,0xbfef6c66153064ee,4 +np.float64,0xffb89fa200313f48,0xbfeb96ff2d9358dc,4 +np.float64,0x7fe6abcf86ed579e,0xc0269f4de86365ec,4 +np.float64,0x7fdff8cd65bff19a,0xbfd0f7c6b9052c9a,4 +np.float64,0xbfd2e3a53d25c74a,0xbfd37520cda5f6b2,4 +np.float64,0x7fe844b096708960,0x3ff696a6182e5a7a,4 +np.float64,0x7fdce0c7a3b9c18e,0x3fd42875d69ed379,4 +np.float64,0xffba5a91cc34b520,0x4001b571e8991951,4 +np.float64,0xffe78fe4a6ef1fc9,0x3ff4507b31f5b3bc,4 +np.float64,0xbfd7047493ae08ea,0xbfd810618a53fffb,4 +np.float64,0xc6559def8cab4,0xc6559def8cab4,4 +np.float64,0x3fe75d67a76ebacf,0x3feca56817de65e4,4 +np.float64,0xffd24adbd6a495b8,0xc012c491addf2df5,4 +np.float64,0x7fed35e28dba6bc4,0x403a0fa555ff7ec6,4 +np.float64,0x80078c4afa0f1897,0x80078c4afa0f1897,4 +np.float64,0xa6ec39114dd87,0xa6ec39114dd87,4 +np.float64,0x7fb1bd33ba237a66,0x4010092bb6810fd4,4 +np.float64,0x800ecf215edd9e43,0x800ecf215edd9e43,4 +np.float64,0x3fb7c169242f82d2,0x3fb7d2ed30c462e6,4 +np.float64,0xbf71b46d60236900,0xbf71b4749a10c112,4 +np.float64,0x800d7851787af0a3,0x800d7851787af0a3,4 +np.float64,0x3fcb4a45e7369488,0x3fcbb61701a1bcec,4 +np.float64,0x3fd4e3682429c6d0,0x3fd5a9bcb916eb94,4 +np.float64,0x800497564c292ead,0x800497564c292ead,4 +np.float64,0xbfca3737a1346e70,0xbfca96a86ae5d687,4 +np.float64,0x19aa87e03356,0x19aa87e03356,4 +np.float64,0xffb2593fe624b280,0xc05fedb99b467ced,4 +np.float64,0xbfdd8748fbbb0e92,0xbfdfd1a7df17252c,4 +np.float64,0x8004c7afc7098f60,0x8004c7afc7098f60,4 +np.float64,0x7fde48b2bf3c9164,0xbfe36ef1158ed420,4 +np.float64,0xbfec8e0eb0f91c1d,0xbff3d9319705a602,4 +np.float64,0xffea1be204f437c3,0xc0144f67298c3e6f,4 +np.float64,0x7fdb906b593720d6,0xbfce99233396eda7,4 +np.float64,0x3fef0f114ffe1e22,0x3ff76072a258a51b,4 +np.float64,0x3fe3e284c8e7c50a,0x3fe6e9b05e17c999,4 +np.float64,0xbfbda9eef23b53e0,0xbfbdcc1abb443597,4 +np.float64,0x3feb6454d4f6c8aa,0x3ff26f65a85baba4,4 +np.float64,0x3fea317439f462e8,0x3ff118e2187ef33f,4 +np.float64,0x376ad0646ed5b,0x376ad0646ed5b,4 +np.float64,0x7fdd461a1c3a8c33,0x3f7ba20fb79e785f,4 +np.float64,0xebc520a3d78a4,0xebc520a3d78a4,4 +np.float64,0x3fca90fe53352200,0x3fcaf45c7fae234d,4 +np.float64,0xbfe80dd1de701ba4,0xbfede97e12cde9de,4 +np.float64,0x3fd242b00ea48560,0x3fd2c5cf9bf69a31,4 +np.float64,0x7fe46c057828d80a,0xbfe2f76837488f94,4 +np.float64,0x3fc162bea322c580,0x3fc17e517c958867,4 +np.float64,0xffebf0452ff7e08a,0x3ffc3fd95c257b54,4 +np.float64,0xffd88043c6310088,0x4008b05598d0d95f,4 +np.float64,0x800d8c49da5b1894,0x800d8c49da5b1894,4 +np.float64,0xbfed33b487ba6769,0xbff4b0ea941f8a6a,4 +np.float64,0x16b881e22d711,0x16b881e22d711,4 +np.float64,0x288bae0051177,0x288bae0051177,4 +np.float64,0xffc83a0fe8307420,0x4006eff03da17f86,4 +np.float64,0x3fc7868b252f0d18,0x3fc7cb4954290324,4 +np.float64,0xbfe195514b232aa2,0xbfe398aae6c8ed76,4 +np.float64,0x800c001ae7f80036,0x800c001ae7f80036,4 +np.float64,0x7feb82abe7370557,0xbff1e13fe6fad23c,4 +np.float64,0xffecf609cdf9ec13,0xc0112aa1805ae59e,4 +np.float64,0xffddd654f63bacaa,0x3fe46cce899f710d,4 +np.float64,0x3fe2163138642c62,0x3fe44b9c760acd4c,4 +np.float64,0x4e570dc09cae2,0x4e570dc09cae2,4 +np.float64,0x7fe9e8d091f3d1a0,0xc000fe20f8e9a4b5,4 +np.float64,0x7fe60042952c0084,0x3fd0aa740f394c2a,4 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-tanh.csv b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-tanh.csv new file mode 100644 index 0000000..9e3ddc6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/data/umath-validation-set-tanh.csv @@ -0,0 +1,1429 @@ +dtype,input,output,ulperrortol +np.float32,0xbe26ebb0,0xbe25752f,2 +np.float32,0xbe22ecc0,0xbe219054,2 +np.float32,0x8010a6b3,0x8010a6b3,2 +np.float32,0x3135da,0x3135da,2 +np.float32,0xbe982afc,0xbe93d727,2 +np.float32,0x16a51f,0x16a51f,2 +np.float32,0x491e56,0x491e56,2 +np.float32,0x4bf7ca,0x4bf7ca,2 +np.float32,0x3eebc21c,0x3edc65b2,2 +np.float32,0x80155c94,0x80155c94,2 +np.float32,0x3e14f626,0x3e13eb6a,2 +np.float32,0x801a238f,0x801a238f,2 +np.float32,0xbde33a80,0xbde24cf9,2 +np.float32,0xbef8439c,0xbee67a51,2 +np.float32,0x7f60d0a5,0x3f800000,2 +np.float32,0x190ee3,0x190ee3,2 +np.float32,0x80759113,0x80759113,2 +np.float32,0x800afa9f,0x800afa9f,2 +np.float32,0x7110cf,0x7110cf,2 +np.float32,0x3cf709f0,0x3cf6f6c6,2 +np.float32,0x3ef58da4,0x3ee44fa7,2 +np.float32,0xbf220ff2,0xbf0f662c,2 +np.float32,0xfd888078,0xbf800000,2 +np.float32,0xbe324734,0xbe307f9b,2 +np.float32,0x3eb5cb4f,0x3eae8560,2 +np.float32,0xbf7e7d02,0xbf425493,2 +np.float32,0x3ddcdcf0,0x3ddc02c2,2 +np.float32,0x8026d27a,0x8026d27a,2 +np.float32,0x3d4c0fb1,0x3d4be484,2 +np.float32,0xbf27d2c9,0xbf134d7c,2 +np.float32,0x8029ff80,0x8029ff80,2 +np.float32,0x7f046d2c,0x3f800000,2 +np.float32,0x13f94b,0x13f94b,2 +np.float32,0x7f4ff922,0x3f800000,2 +np.float32,0x3f4ea2ed,0x3f2b03e4,2 +np.float32,0x3e7211f0,0x3e6da8cf,2 +np.float32,0x7f39d0cf,0x3f800000,2 +np.float32,0xfee57fc6,0xbf800000,2 +np.float32,0xff6fb326,0xbf800000,2 +np.float32,0xff800000,0xbf800000,2 +np.float32,0x3f0437a4,0x3ef32fcd,2 +np.float32,0xff546d1e,0xbf800000,2 +np.float32,0x3eb5645b,0x3eae2a5c,2 +np.float32,0x3f08a6e5,0x3ef9ff8f,2 +np.float32,0x80800000,0x80800000,2 +np.float32,0x7f3413da,0x3f800000,2 +np.float32,0xfd760140,0xbf800000,2 +np.float32,0x7f3ad24a,0x3f800000,2 +np.float32,0xbf56e812,0xbf2f7f14,2 +np.float32,0xbece0338,0xbec3920a,2 +np.float32,0xbeede54a,0xbede22ae,2 +np.float32,0x7eaeb215,0x3f800000,2 +np.float32,0x3c213c00,0x3c213aab,2 +np.float32,0x7eaac217,0x3f800000,2 +np.float32,0xbf2f740e,0xbf1851a6,2 +np.float32,0x7f6ca5b8,0x3f800000,2 +np.float32,0xff42ce95,0xbf800000,2 +np.float32,0x802e4189,0x802e4189,2 +np.float32,0x80000001,0x80000001,2 +np.float32,0xbf31f298,0xbf19ebbe,2 +np.float32,0x3dcb0e6c,0x3dca64c1,2 +np.float32,0xbf29599c,0xbf145204,2 +np.float32,0x2e33f2,0x2e33f2,2 +np.float32,0x1c11e7,0x1c11e7,2 +np.float32,0x3f3b188d,0x3f1fa302,2 +np.float32,0x113300,0x113300,2 +np.float32,0x8054589e,0x8054589e,2 +np.float32,0x2a9e69,0x2a9e69,2 +np.float32,0xff513af7,0xbf800000,2 +np.float32,0x7f2e987a,0x3f800000,2 +np.float32,0x807cd426,0x807cd426,2 +np.float32,0x7f0dc4e4,0x3f800000,2 +np.float32,0x7e7c0d56,0x3f800000,2 +np.float32,0x5cb076,0x5cb076,2 +np.float32,0x80576426,0x80576426,2 +np.float32,0xff616222,0xbf800000,2 +np.float32,0xbf7accb5,0xbf40c005,2 +np.float32,0xfe4118c8,0xbf800000,2 +np.float32,0x804b9327,0x804b9327,2 +np.float32,0x3ed2b428,0x3ec79026,2 +np.float32,0x3f4a048f,0x3f286d41,2 +np.float32,0x800000,0x800000,2 +np.float32,0x7efceb9f,0x3f800000,2 +np.float32,0xbf5fe2d3,0xbf34246f,2 +np.float32,0x807e086a,0x807e086a,2 +np.float32,0x7ef5e856,0x3f800000,2 +np.float32,0xfc546f00,0xbf800000,2 +np.float32,0x3a65b890,0x3a65b88c,2 +np.float32,0x800cfa70,0x800cfa70,2 +np.float32,0x80672ea7,0x80672ea7,2 +np.float32,0x3f2bf3f2,0x3f160a12,2 +np.float32,0xbf0ab67e,0xbefd2004,2 +np.float32,0x3f2a0bb4,0x3f14c824,2 +np.float32,0xbeff5374,0xbeec12d7,2 +np.float32,0xbf221b58,0xbf0f6dff,2 +np.float32,0x7cc1f3,0x7cc1f3,2 +np.float32,0x7f234e3c,0x3f800000,2 +np.float32,0x3f60ff10,0x3f34b37d,2 +np.float32,0xbdd957f0,0xbdd887fe,2 +np.float32,0x801ce048,0x801ce048,2 +np.float32,0x7f3a8f76,0x3f800000,2 +np.float32,0xfdd13d08,0xbf800000,2 +np.float32,0x3e9af4a4,0x3e966445,2 +np.float32,0x1e55f3,0x1e55f3,2 +np.float32,0x327905,0x327905,2 +np.float32,0xbf03cf0b,0xbef28dad,2 +np.float32,0x3f0223d3,0x3eeff4f4,2 +np.float32,0xfdd96ff8,0xbf800000,2 +np.float32,0x428db8,0x428db8,2 +np.float32,0xbd74a200,0xbd7457a5,2 +np.float32,0x2a63a3,0x2a63a3,2 +np.float32,0x7e8aa9d7,0x3f800000,2 +np.float32,0x7f50b810,0x3f800000,2 +np.float32,0xbce5ec80,0xbce5dd0d,2 +np.float32,0x54711,0x54711,2 +np.float32,0x8074212a,0x8074212a,2 +np.float32,0xbf13d0ec,0xbf0551b5,2 +np.float32,0x80217f89,0x80217f89,2 +np.float32,0x3f300824,0x3f18b12f,2 +np.float32,0x7d252462,0x3f800000,2 +np.float32,0x807a154c,0x807a154c,2 +np.float32,0x8064d4b9,0x8064d4b9,2 +np.float32,0x804543b4,0x804543b4,2 +np.float32,0x4c269e,0x4c269e,2 +np.float32,0xff39823b,0xbf800000,2 +np.float32,0x3f5040b1,0x3f2be80b,2 +np.float32,0xbf7028c1,0xbf3bfee5,2 +np.float32,0x3e94eb78,0x3e90db93,2 +np.float32,0x3ccc1b40,0x3ccc1071,2 +np.float32,0xbe8796f0,0xbe8481a1,2 +np.float32,0xfc767bc0,0xbf800000,2 +np.float32,0xbdd81ed0,0xbdd75259,2 +np.float32,0xbed31bfc,0xbec7e82d,2 +np.float32,0xbf350a9e,0xbf1be1c6,2 +np.float32,0x33d41f,0x33d41f,2 +np.float32,0x3f73e076,0x3f3db0b5,2 +np.float32,0x3f800000,0x3f42f7d6,2 +np.float32,0xfee27c14,0xbf800000,2 +np.float32,0x7f6e4388,0x3f800000,2 +np.float32,0x4ea19b,0x4ea19b,2 +np.float32,0xff2d75f2,0xbf800000,2 +np.float32,0x7ee225ca,0x3f800000,2 +np.float32,0x3f31cb4b,0x3f19d2a4,2 +np.float32,0x80554a9d,0x80554a9d,2 +np.float32,0x3f4d57fa,0x3f2a4c03,2 +np.float32,0x3eac6a88,0x3ea62e72,2 +np.float32,0x773520,0x773520,2 +np.float32,0x8079c20a,0x8079c20a,2 +np.float32,0xfeb1eb94,0xbf800000,2 +np.float32,0xfe8d81c0,0xbf800000,2 +np.float32,0xfeed6902,0xbf800000,2 +np.float32,0x8066bb65,0x8066bb65,2 +np.float32,0x7f800000,0x3f800000,2 +np.float32,0x1,0x1,2 +np.float32,0x3f2c66a4,0x3f16554a,2 +np.float32,0x3cd231,0x3cd231,2 +np.float32,0x3e932a64,0x3e8f3e0c,2 +np.float32,0xbf3ab1c3,0xbf1f6420,2 +np.float32,0xbc902b20,0xbc902751,2 +np.float32,0x7dac0a5b,0x3f800000,2 +np.float32,0x3f2b7e06,0x3f15bc93,2 +np.float32,0x75de0,0x75de0,2 +np.float32,0x8020b7bc,0x8020b7bc,2 +np.float32,0x3f257cda,0x3f11bb6b,2 +np.float32,0x807480e5,0x807480e5,2 +np.float32,0xfe00d758,0xbf800000,2 +np.float32,0xbd9b54e0,0xbd9b08cd,2 +np.float32,0x4dfbe3,0x4dfbe3,2 +np.float32,0xff645788,0xbf800000,2 +np.float32,0xbe92c80a,0xbe8ee360,2 +np.float32,0x3eb9b400,0x3eb1f77c,2 +np.float32,0xff20b69c,0xbf800000,2 +np.float32,0x623c28,0x623c28,2 +np.float32,0xff235748,0xbf800000,2 +np.float32,0xbf3bbc56,0xbf2006f3,2 +np.float32,0x7e6f78b1,0x3f800000,2 +np.float32,0x7e1584e9,0x3f800000,2 +np.float32,0xff463423,0xbf800000,2 +np.float32,0x8002861e,0x8002861e,2 +np.float32,0xbf0491d8,0xbef3bb6a,2 +np.float32,0x7ea3bc17,0x3f800000,2 +np.float32,0xbedde7ea,0xbed0fb49,2 +np.float32,0xbf4bac48,0xbf295c8b,2 +np.float32,0xff28e276,0xbf800000,2 +np.float32,0x7e8f3bf5,0x3f800000,2 +np.float32,0xbf0a4a73,0xbefc7c9d,2 +np.float32,0x7ec5bd96,0x3f800000,2 +np.float32,0xbf4c22e8,0xbf299f2c,2 +np.float32,0x3e3970a0,0x3e377064,2 +np.float32,0x3ecb1118,0x3ec10c88,2 +np.float32,0xff548a7a,0xbf800000,2 +np.float32,0xfe8ec550,0xbf800000,2 +np.float32,0x3e158985,0x3e147bb2,2 +np.float32,0x7eb79ad7,0x3f800000,2 +np.float32,0xbe811384,0xbe7cd1ab,2 +np.float32,0xbdc4b9e8,0xbdc41f94,2 +np.float32,0xe0fd5,0xe0fd5,2 +np.float32,0x3f2485f2,0x3f11142b,2 +np.float32,0xfdd3c3d8,0xbf800000,2 +np.float32,0xfe8458e6,0xbf800000,2 +np.float32,0x3f06e398,0x3ef74dd8,2 +np.float32,0xff4752cf,0xbf800000,2 +np.float32,0x6998e3,0x6998e3,2 +np.float32,0x626751,0x626751,2 +np.float32,0x806631d6,0x806631d6,2 +np.float32,0xbf0c3cf4,0xbeff6c54,2 +np.float32,0x802860f8,0x802860f8,2 +np.float32,0xff2952cb,0xbf800000,2 +np.float32,0xff31d40b,0xbf800000,2 +np.float32,0x7c389473,0x3f800000,2 +np.float32,0x3dcd2f1b,0x3dcc8010,2 +np.float32,0x3d70c29f,0x3d707bbc,2 +np.float32,0x3f6bd386,0x3f39f979,2 +np.float32,0x1efec9,0x1efec9,2 +np.float32,0x3f675518,0x3f37d338,2 +np.float32,0x5fdbe3,0x5fdbe3,2 +np.float32,0x5d684e,0x5d684e,2 +np.float32,0xbedfe748,0xbed2a4c7,2 +np.float32,0x3f0cb07a,0x3f000cdc,2 +np.float32,0xbf77151e,0xbf3f1f5d,2 +np.float32,0x7f038ea0,0x3f800000,2 +np.float32,0x3ea91be9,0x3ea3376f,2 +np.float32,0xbdf20738,0xbdf0e861,2 +np.float32,0x807ea380,0x807ea380,2 +np.float32,0x2760ca,0x2760ca,2 +np.float32,0x7f20a544,0x3f800000,2 +np.float32,0x76ed83,0x76ed83,2 +np.float32,0x15a441,0x15a441,2 +np.float32,0x74c76d,0x74c76d,2 +np.float32,0xff3d5c2a,0xbf800000,2 +np.float32,0x7f6a76a6,0x3f800000,2 +np.float32,0x3eb87067,0x3eb0dabe,2 +np.float32,0xbf515cfa,0xbf2c83af,2 +np.float32,0xbdececc0,0xbdebdf9d,2 +np.float32,0x7f51b7c2,0x3f800000,2 +np.float32,0x3eb867ac,0x3eb0d30d,2 +np.float32,0xff50fd84,0xbf800000,2 +np.float32,0x806945e9,0x806945e9,2 +np.float32,0x298eed,0x298eed,2 +np.float32,0x441f53,0x441f53,2 +np.float32,0x8066d4b0,0x8066d4b0,2 +np.float32,0x3f6a479c,0x3f393dae,2 +np.float32,0xbf6ce2a7,0xbf3a7921,2 +np.float32,0x8064c3cf,0x8064c3cf,2 +np.float32,0xbf2d8146,0xbf170dfd,2 +np.float32,0x3b0e82,0x3b0e82,2 +np.float32,0xbea97574,0xbea387dc,2 +np.float32,0x67ad15,0x67ad15,2 +np.float32,0xbf68478f,0xbf38485a,2 +np.float32,0xff6f593b,0xbf800000,2 +np.float32,0xbeda26f2,0xbecdd806,2 +np.float32,0xbd216d50,0xbd2157ee,2 +np.float32,0x7a8544db,0x3f800000,2 +np.float32,0x801df20b,0x801df20b,2 +np.float32,0xbe14ba24,0xbe13b0a8,2 +np.float32,0xfdc6d8a8,0xbf800000,2 +np.float32,0x1d6b49,0x1d6b49,2 +np.float32,0x7f5ff1b8,0x3f800000,2 +np.float32,0x3f75e032,0x3f3e9625,2 +np.float32,0x7f2c5687,0x3f800000,2 +np.float32,0x3d95fb6c,0x3d95b6ee,2 +np.float32,0xbea515e4,0xbe9f97c8,2 +np.float32,0x7f2b2cd7,0x3f800000,2 +np.float32,0x3f076f7a,0x3ef8241e,2 +np.float32,0x5178ca,0x5178ca,2 +np.float32,0xbeb5976a,0xbeae5781,2 +np.float32,0x3e3c3563,0x3e3a1e13,2 +np.float32,0xbd208530,0xbd20702a,2 +np.float32,0x3eb03b04,0x3ea995ef,2 +np.float32,0x17fb9c,0x17fb9c,2 +np.float32,0xfca68e40,0xbf800000,2 +np.float32,0xbf5e7433,0xbf336a9f,2 +np.float32,0xff5b8d3d,0xbf800000,2 +np.float32,0x8003121d,0x8003121d,2 +np.float32,0xbe6dd344,0xbe69a3b0,2 +np.float32,0x67cc4,0x67cc4,2 +np.float32,0x9b01d,0x9b01d,2 +np.float32,0x127c13,0x127c13,2 +np.float32,0xfea5e3d6,0xbf800000,2 +np.float32,0xbdf5c610,0xbdf499c1,2 +np.float32,0x3aff4c00,0x3aff4beb,2 +np.float32,0x3b00afd0,0x3b00afc5,2 +np.float32,0x479618,0x479618,2 +np.float32,0x801cbd05,0x801cbd05,2 +np.float32,0x3ec9249f,0x3ebf6579,2 +np.float32,0x3535c4,0x3535c4,2 +np.float32,0xbeb4f662,0xbeadc915,2 +np.float32,0x8006fda6,0x8006fda6,2 +np.float32,0xbf4f3097,0xbf2b5239,2 +np.float32,0xbf3cb9a8,0xbf20a0e9,2 +np.float32,0x32ced0,0x32ced0,2 +np.float32,0x7ea34e76,0x3f800000,2 +np.float32,0x80063046,0x80063046,2 +np.float32,0x80727e8b,0x80727e8b,2 +np.float32,0xfd6b5780,0xbf800000,2 +np.float32,0x80109815,0x80109815,2 +np.float32,0xfdcc8a78,0xbf800000,2 +np.float32,0x81562,0x81562,2 +np.float32,0x803dfacc,0x803dfacc,2 +np.float32,0xbe204318,0xbe1ef75f,2 +np.float32,0xbf745d34,0xbf3de8e2,2 +np.float32,0xff13fdcc,0xbf800000,2 +np.float32,0x7f75ba8c,0x3f800000,2 +np.float32,0x806c04b4,0x806c04b4,2 +np.float32,0x3ec61ca6,0x3ebcc877,2 +np.float32,0xbeaea984,0xbea8301f,2 +np.float32,0xbf4dcd0e,0xbf2a8d34,2 +np.float32,0x802a01d3,0x802a01d3,2 +np.float32,0xbf747be5,0xbf3df6ad,2 +np.float32,0xbf75cbd2,0xbf3e8d0f,2 +np.float32,0x7db86576,0x3f800000,2 +np.float32,0xff49a2c3,0xbf800000,2 +np.float32,0xbedc5314,0xbecfa978,2 +np.float32,0x8078877b,0x8078877b,2 +np.float32,0xbead4824,0xbea6f499,2 +np.float32,0xbf3926e3,0xbf1e716c,2 +np.float32,0x807f4a1c,0x807f4a1c,2 +np.float32,0x7f2cd8fd,0x3f800000,2 +np.float32,0x806cfcca,0x806cfcca,2 +np.float32,0xff1aa048,0xbf800000,2 +np.float32,0x7eb9ea08,0x3f800000,2 +np.float32,0xbf1034bc,0xbf02ab3a,2 +np.float32,0xbd087830,0xbd086b44,2 +np.float32,0x7e071034,0x3f800000,2 +np.float32,0xbefcc9de,0xbeea122f,2 +np.float32,0x80796d7a,0x80796d7a,2 +np.float32,0x33ce46,0x33ce46,2 +np.float32,0x8074a783,0x8074a783,2 +np.float32,0xbe95a56a,0xbe918691,2 +np.float32,0xbf2ff3f4,0xbf18a42d,2 +np.float32,0x1633e9,0x1633e9,2 +np.float32,0x7f0f104b,0x3f800000,2 +np.float32,0xbf800000,0xbf42f7d6,2 +np.float32,0x3d2cd6,0x3d2cd6,2 +np.float32,0xfed43e16,0xbf800000,2 +np.float32,0x3ee6faec,0x3ed87d2c,2 +np.float32,0x3f2c32d0,0x3f163352,2 +np.float32,0xff4290c0,0xbf800000,2 +np.float32,0xbf66500e,0xbf37546a,2 +np.float32,0x7dfb8fe3,0x3f800000,2 +np.float32,0x3f20ba5d,0x3f0e7b16,2 +np.float32,0xff30c7ae,0xbf800000,2 +np.float32,0x1728a4,0x1728a4,2 +np.float32,0x340d82,0x340d82,2 +np.float32,0xff7870b7,0xbf800000,2 +np.float32,0xbeac6ac4,0xbea62ea7,2 +np.float32,0xbef936fc,0xbee73c36,2 +np.float32,0x3ec7e12c,0x3ebe4ef8,2 +np.float32,0x80673488,0x80673488,2 +np.float32,0xfdf14c90,0xbf800000,2 +np.float32,0x3f182568,0x3f08726e,2 +np.float32,0x7ed7dcd0,0x3f800000,2 +np.float32,0x3de4da34,0x3de3e790,2 +np.float32,0xff7fffff,0xbf800000,2 +np.float32,0x4ff90c,0x4ff90c,2 +np.float32,0x3efb0d1c,0x3ee8b1d6,2 +np.float32,0xbf66e952,0xbf379ef4,2 +np.float32,0xba9dc,0xba9dc,2 +np.float32,0xff67c766,0xbf800000,2 +np.float32,0x7f1ffc29,0x3f800000,2 +np.float32,0x3f51c906,0x3f2cbe99,2 +np.float32,0x3f2e5792,0x3f179968,2 +np.float32,0x3ecb9750,0x3ec17fa0,2 +np.float32,0x7f3fcefc,0x3f800000,2 +np.float32,0xbe4e30fc,0xbe4b72f9,2 +np.float32,0x7e9bc4ce,0x3f800000,2 +np.float32,0x7e70aa1f,0x3f800000,2 +np.float32,0x14c6e9,0x14c6e9,2 +np.float32,0xbcf327c0,0xbcf3157a,2 +np.float32,0xff1fd204,0xbf800000,2 +np.float32,0x7d934a03,0x3f800000,2 +np.float32,0x8028bf1e,0x8028bf1e,2 +np.float32,0x7f0800b7,0x3f800000,2 +np.float32,0xfe04825c,0xbf800000,2 +np.float32,0x807210ac,0x807210ac,2 +np.float32,0x3f7faf7c,0x3f42d5fd,2 +np.float32,0x3e04a543,0x3e03e899,2 +np.float32,0x3e98ea15,0x3e94863e,2 +np.float32,0x3d2a2e48,0x3d2a153b,2 +np.float32,0x7fa00000,0x7fe00000,2 +np.float32,0x20a488,0x20a488,2 +np.float32,0x3f6ba86a,0x3f39e51a,2 +np.float32,0x0,0x0,2 +np.float32,0x3e892ddd,0x3e85fcfe,2 +np.float32,0x3e2da627,0x3e2c00e0,2 +np.float32,0xff000a50,0xbf800000,2 +np.float32,0x3eb749f4,0x3eafd739,2 +np.float32,0x8024c0ae,0x8024c0ae,2 +np.float32,0xfc8f3b40,0xbf800000,2 +np.float32,0xbf685fc7,0xbf385405,2 +np.float32,0x3f1510e6,0x3f063a4f,2 +np.float32,0x3f68e8ad,0x3f3895d8,2 +np.float32,0x3dba8608,0x3dba0271,2 +np.float32,0xbf16ea10,0xbf079017,2 +np.float32,0xb3928,0xb3928,2 +np.float32,0xfe447c00,0xbf800000,2 +np.float32,0x3db9cd57,0x3db94b45,2 +np.float32,0x803b66b0,0x803b66b0,2 +np.float32,0x805b5e02,0x805b5e02,2 +np.float32,0x7ec93f61,0x3f800000,2 +np.float32,0x8005a126,0x8005a126,2 +np.float32,0x6d8888,0x6d8888,2 +np.float32,0x3e21b7de,0x3e206314,2 +np.float32,0xbec9c31e,0xbebfedc2,2 +np.float32,0xbea88aa8,0xbea2b4e5,2 +np.float32,0x3d8fc310,0x3d8f86bb,2 +np.float32,0xbf3cc68a,0xbf20a8b8,2 +np.float32,0x432690,0x432690,2 +np.float32,0xbe51d514,0xbe4ef1a3,2 +np.float32,0xbcda6d20,0xbcda5fe1,2 +np.float32,0xfe24e458,0xbf800000,2 +np.float32,0xfedc8c14,0xbf800000,2 +np.float32,0x7f7e9bd4,0x3f800000,2 +np.float32,0x3ebcc880,0x3eb4ab44,2 +np.float32,0xbe0aa490,0xbe09cd44,2 +np.float32,0x3dc9158c,0x3dc870c3,2 +np.float32,0x3e5c319e,0x3e58dc90,2 +np.float32,0x1d4527,0x1d4527,2 +np.float32,0x2dbf5,0x2dbf5,2 +np.float32,0xbf1f121f,0xbf0d5534,2 +np.float32,0x7e3e9ab5,0x3f800000,2 +np.float32,0x7f74b5c1,0x3f800000,2 +np.float32,0xbf6321ba,0xbf35c42b,2 +np.float32,0xbe5c7488,0xbe591c79,2 +np.float32,0x7e7b02cd,0x3f800000,2 +np.float32,0xfe7cbfa4,0xbf800000,2 +np.float32,0xbeace360,0xbea69a86,2 +np.float32,0x7e149b00,0x3f800000,2 +np.float32,0xbf61a700,0xbf35079a,2 +np.float32,0x7eb592a7,0x3f800000,2 +np.float32,0x3f2105e6,0x3f0eaf30,2 +np.float32,0xfd997a88,0xbf800000,2 +np.float32,0xff5d093b,0xbf800000,2 +np.float32,0x63aede,0x63aede,2 +np.float32,0x6907ee,0x6907ee,2 +np.float32,0xbf7578ee,0xbf3e680f,2 +np.float32,0xfea971e8,0xbf800000,2 +np.float32,0x3f21d0f5,0x3f0f3aed,2 +np.float32,0x3a50e2,0x3a50e2,2 +np.float32,0x7f0f5b1e,0x3f800000,2 +np.float32,0x805b9765,0x805b9765,2 +np.float32,0xbe764ab8,0xbe71a664,2 +np.float32,0x3eafac7f,0x3ea91701,2 +np.float32,0x807f4130,0x807f4130,2 +np.float32,0x7c5f31,0x7c5f31,2 +np.float32,0xbdbe0e30,0xbdbd8300,2 +np.float32,0x7ecfe4e0,0x3f800000,2 +np.float32,0xff7cb628,0xbf800000,2 +np.float32,0xff1842bc,0xbf800000,2 +np.float32,0xfd4163c0,0xbf800000,2 +np.float32,0x800e11f7,0x800e11f7,2 +np.float32,0x7f3adec8,0x3f800000,2 +np.float32,0x7f597514,0x3f800000,2 +np.float32,0xbe986e14,0xbe9414a4,2 +np.float32,0x800fa9d7,0x800fa9d7,2 +np.float32,0xff5b79c4,0xbf800000,2 +np.float32,0x80070565,0x80070565,2 +np.float32,0xbee5628e,0xbed72d60,2 +np.float32,0x3f438ef2,0x3f24b3ca,2 +np.float32,0xcda91,0xcda91,2 +np.float32,0x7e64151a,0x3f800000,2 +np.float32,0xbe95d584,0xbe91b2c7,2 +np.float32,0x8022c2a1,0x8022c2a1,2 +np.float32,0x7e7097bf,0x3f800000,2 +np.float32,0x80139035,0x80139035,2 +np.float32,0x804de2cb,0x804de2cb,2 +np.float32,0xfde5d178,0xbf800000,2 +np.float32,0x6d238,0x6d238,2 +np.float32,0x807abedc,0x807abedc,2 +np.float32,0x3f450a12,0x3f259129,2 +np.float32,0x3ef1c120,0x3ee141f2,2 +np.float32,0xfeb64dae,0xbf800000,2 +np.float32,0x8001732c,0x8001732c,2 +np.float32,0x3f76062e,0x3f3ea711,2 +np.float32,0x3eddd550,0x3ed0ebc8,2 +np.float32,0xff5ca1d4,0xbf800000,2 +np.float32,0xbf49dc5e,0xbf285673,2 +np.float32,0x7e9e5438,0x3f800000,2 +np.float32,0x7e83625e,0x3f800000,2 +np.float32,0x3f5dc41c,0x3f3310da,2 +np.float32,0x3f583efa,0x3f30342f,2 +np.float32,0xbe26bf88,0xbe254a2d,2 +np.float32,0xff1e0beb,0xbf800000,2 +np.float32,0xbe2244c8,0xbe20ec86,2 +np.float32,0xff0b1630,0xbf800000,2 +np.float32,0xff338dd6,0xbf800000,2 +np.float32,0x3eafc22c,0x3ea92a51,2 +np.float32,0x800ea07f,0x800ea07f,2 +np.float32,0x3f46f006,0x3f26aa7e,2 +np.float32,0x3e5f57cd,0x3e5bde16,2 +np.float32,0xbf1b2d8e,0xbf0a9a93,2 +np.float32,0xfeacdbe0,0xbf800000,2 +np.float32,0x7e5ea4bc,0x3f800000,2 +np.float32,0xbf51cbe2,0xbf2cc027,2 +np.float32,0x8073644c,0x8073644c,2 +np.float32,0xff2d6bfe,0xbf800000,2 +np.float32,0x3f65f0f6,0x3f37260a,2 +np.float32,0xff4b37a6,0xbf800000,2 +np.float32,0x712df7,0x712df7,2 +np.float32,0x7f71ef17,0x3f800000,2 +np.float32,0x8042245c,0x8042245c,2 +np.float32,0x3e5dde7b,0x3e5a760d,2 +np.float32,0x8069317d,0x8069317d,2 +np.float32,0x807932dd,0x807932dd,2 +np.float32,0x802f847e,0x802f847e,2 +np.float32,0x7e9300,0x7e9300,2 +np.float32,0x8040b4ab,0x8040b4ab,2 +np.float32,0xff76ef8e,0xbf800000,2 +np.float32,0x4aae3a,0x4aae3a,2 +np.float32,0x8058de73,0x8058de73,2 +np.float32,0x7e4d58c0,0x3f800000,2 +np.float32,0x3d811b30,0x3d80ef79,2 +np.float32,0x7ec952cc,0x3f800000,2 +np.float32,0xfe162b1c,0xbf800000,2 +np.float32,0x3f0f1187,0x3f01d367,2 +np.float32,0xbf2f3458,0xbf182878,2 +np.float32,0x5ceb14,0x5ceb14,2 +np.float32,0xbec29476,0xbeb9b939,2 +np.float32,0x3e71f943,0x3e6d9176,2 +np.float32,0x3ededefc,0x3ed1c909,2 +np.float32,0x805df6ac,0x805df6ac,2 +np.float32,0x3e5ae2c8,0x3e579ca8,2 +np.float32,0x3f6ad2c3,0x3f397fdf,2 +np.float32,0x7d5f94d3,0x3f800000,2 +np.float32,0xbeec7fe4,0xbedd0037,2 +np.float32,0x3f645304,0x3f365b0d,2 +np.float32,0xbf69a087,0xbf38edef,2 +np.float32,0x8025102e,0x8025102e,2 +np.float32,0x800db486,0x800db486,2 +np.float32,0x4df6c7,0x4df6c7,2 +np.float32,0x806d8cdd,0x806d8cdd,2 +np.float32,0x7f0c78cc,0x3f800000,2 +np.float32,0x7e1cf70b,0x3f800000,2 +np.float32,0x3e0ae570,0x3e0a0cf7,2 +np.float32,0x80176ef8,0x80176ef8,2 +np.float32,0x3f38b60c,0x3f1e2bbb,2 +np.float32,0x3d3071e0,0x3d3055f5,2 +np.float32,0x3ebfcfdd,0x3eb750a9,2 +np.float32,0xfe2cdec0,0xbf800000,2 +np.float32,0x7eeb2eed,0x3f800000,2 +np.float32,0x8026c904,0x8026c904,2 +np.float32,0xbec79bde,0xbebe133a,2 +np.float32,0xbf7dfab6,0xbf421d47,2 +np.float32,0x805b3cfd,0x805b3cfd,2 +np.float32,0xfdfcfb68,0xbf800000,2 +np.float32,0xbd537ec0,0xbd534eaf,2 +np.float32,0x52ce73,0x52ce73,2 +np.float32,0xfeac6ea6,0xbf800000,2 +np.float32,0x3f2c2990,0x3f162d41,2 +np.float32,0x3e3354e0,0x3e318539,2 +np.float32,0x802db22b,0x802db22b,2 +np.float32,0x7f0faa83,0x3f800000,2 +np.float32,0x7f10e161,0x3f800000,2 +np.float32,0x7f165c60,0x3f800000,2 +np.float32,0xbf5a756f,0xbf315c82,2 +np.float32,0x7f5a4b68,0x3f800000,2 +np.float32,0xbd77fbf0,0xbd77ae7c,2 +np.float32,0x65d83c,0x65d83c,2 +np.float32,0x3e5f28,0x3e5f28,2 +np.float32,0x8040ec92,0x8040ec92,2 +np.float32,0xbf2b41a6,0xbf1594d5,2 +np.float32,0x7f2f88f1,0x3f800000,2 +np.float32,0xfdb64ab8,0xbf800000,2 +np.float32,0xbf7a3ff1,0xbf4082f5,2 +np.float32,0x1948fc,0x1948fc,2 +np.float32,0x802c1039,0x802c1039,2 +np.float32,0x80119274,0x80119274,2 +np.float32,0x7e885d7b,0x3f800000,2 +np.float32,0xfaf6a,0xfaf6a,2 +np.float32,0x3eba28c4,0x3eb25e1d,2 +np.float32,0x3e4df370,0x3e4b37da,2 +np.float32,0xbf19eff6,0xbf09b97d,2 +np.float32,0xbeddd3c6,0xbed0ea7f,2 +np.float32,0xff6fc971,0xbf800000,2 +np.float32,0x7e93de29,0x3f800000,2 +np.float32,0x3eb12332,0x3eaa6485,2 +np.float32,0x3eb7c6e4,0x3eb04563,2 +np.float32,0x4a67ee,0x4a67ee,2 +np.float32,0xff1cafde,0xbf800000,2 +np.float32,0x3f5e2812,0x3f3343da,2 +np.float32,0x3f060e04,0x3ef605d4,2 +np.float32,0x3e9027d8,0x3e8c76a6,2 +np.float32,0xe2d33,0xe2d33,2 +np.float32,0xff4c94fc,0xbf800000,2 +np.float32,0xbf574908,0xbf2fb26b,2 +np.float32,0xbf786c08,0xbf3fb68e,2 +np.float32,0x8011ecab,0x8011ecab,2 +np.float32,0xbf061c6a,0xbef61bfa,2 +np.float32,0x7eea5f9d,0x3f800000,2 +np.float32,0x3ea2e19c,0x3e9d99a5,2 +np.float32,0x8071550c,0x8071550c,2 +np.float32,0x41c70b,0x41c70b,2 +np.float32,0x80291fc8,0x80291fc8,2 +np.float32,0x43b1ec,0x43b1ec,2 +np.float32,0x32f5a,0x32f5a,2 +np.float32,0xbe9310ec,0xbe8f2692,2 +np.float32,0x7f75f6bf,0x3f800000,2 +np.float32,0x3e6642a6,0x3e6274d2,2 +np.float32,0x3ecb88e0,0x3ec1733f,2 +np.float32,0x804011b6,0x804011b6,2 +np.float32,0x80629cca,0x80629cca,2 +np.float32,0x8016b914,0x8016b914,2 +np.float32,0xbdd05fc0,0xbdcfa870,2 +np.float32,0x807b824d,0x807b824d,2 +np.float32,0xfeec2576,0xbf800000,2 +np.float32,0xbf54bf22,0xbf2e584c,2 +np.float32,0xbf185eb0,0xbf089b6b,2 +np.float32,0xfbc09480,0xbf800000,2 +np.float32,0x3f413054,0x3f234e25,2 +np.float32,0x7e9e32b8,0x3f800000,2 +np.float32,0x266296,0x266296,2 +np.float32,0x460284,0x460284,2 +np.float32,0x3eb0b056,0x3ea9fe5a,2 +np.float32,0x1a7be5,0x1a7be5,2 +np.float32,0x7f099895,0x3f800000,2 +np.float32,0x3f3614f0,0x3f1c88ef,2 +np.float32,0x7e757dc2,0x3f800000,2 +np.float32,0x801fc91e,0x801fc91e,2 +np.float32,0x3f5ce37d,0x3f329ddb,2 +np.float32,0x3e664d70,0x3e627f15,2 +np.float32,0xbf38ed78,0xbf1e4dfa,2 +np.float32,0xbf5c563d,0xbf325543,2 +np.float32,0xbe91cc54,0xbe8dfb24,2 +np.float32,0x3d767fbe,0x3d7633ac,2 +np.float32,0xbf6aeb40,0xbf398b7f,2 +np.float32,0x7f40508b,0x3f800000,2 +np.float32,0x2650df,0x2650df,2 +np.float32,0xbe8cea3c,0xbe897628,2 +np.float32,0x80515af8,0x80515af8,2 +np.float32,0x7f423986,0x3f800000,2 +np.float32,0xbdf250e8,0xbdf1310c,2 +np.float32,0xfe89288a,0xbf800000,2 +np.float32,0x397b3b,0x397b3b,2 +np.float32,0x7e5e91b0,0x3f800000,2 +np.float32,0x6866e2,0x6866e2,2 +np.float32,0x7f4d8877,0x3f800000,2 +np.float32,0x3e6c4a21,0x3e682ee3,2 +np.float32,0xfc3d5980,0xbf800000,2 +np.float32,0x7eae2cd0,0x3f800000,2 +np.float32,0xbf241222,0xbf10c579,2 +np.float32,0xfebc02de,0xbf800000,2 +np.float32,0xff6e0645,0xbf800000,2 +np.float32,0x802030b6,0x802030b6,2 +np.float32,0x7ef9a441,0x3f800000,2 +np.float32,0x3fcf9f,0x3fcf9f,2 +np.float32,0xbf0ccf13,0xbf0023cc,2 +np.float32,0xfefee688,0xbf800000,2 +np.float32,0xbf6c8e0c,0xbf3a5160,2 +np.float32,0xfe749c28,0xbf800000,2 +np.float32,0x7f7fffff,0x3f800000,2 +np.float32,0x58c1a0,0x58c1a0,2 +np.float32,0x3f2de0a1,0x3f174c17,2 +np.float32,0xbf5f7138,0xbf33eb03,2 +np.float32,0x3da15270,0x3da0fd3c,2 +np.float32,0x3da66560,0x3da607e4,2 +np.float32,0xbf306f9a,0xbf18f3c6,2 +np.float32,0x3e81a4de,0x3e7de293,2 +np.float32,0xbebb5fb8,0xbeb36f1a,2 +np.float32,0x14bf64,0x14bf64,2 +np.float32,0xbeac46c6,0xbea60e73,2 +np.float32,0xbdcdf210,0xbdcd4111,2 +np.float32,0x3f7e3cd9,0x3f42395e,2 +np.float32,0xbc4be640,0xbc4be38e,2 +np.float32,0xff5f53b4,0xbf800000,2 +np.float32,0xbf1315ae,0xbf04c90b,2 +np.float32,0x80000000,0x80000000,2 +np.float32,0xbf6a4149,0xbf393aaa,2 +np.float32,0x3f66b8ee,0x3f378772,2 +np.float32,0xff29293e,0xbf800000,2 +np.float32,0xbcc989c0,0xbcc97f58,2 +np.float32,0xbd9a1b70,0xbd99d125,2 +np.float32,0xfef353cc,0xbf800000,2 +np.float32,0xbdc30cf0,0xbdc27683,2 +np.float32,0xfdfd6768,0xbf800000,2 +np.float32,0x7ebac44c,0x3f800000,2 +np.float32,0xff453cd6,0xbf800000,2 +np.float32,0x3ef07720,0x3ee03787,2 +np.float32,0x80219c14,0x80219c14,2 +np.float32,0x805553a8,0x805553a8,2 +np.float32,0x80703928,0x80703928,2 +np.float32,0xff16d3a7,0xbf800000,2 +np.float32,0x3f1472bc,0x3f05c77b,2 +np.float32,0x3eeea37a,0x3edebcf9,2 +np.float32,0x3db801e6,0x3db7838d,2 +np.float32,0x800870d2,0x800870d2,2 +np.float32,0xbea1172c,0xbe9bfa32,2 +np.float32,0x3f1f5e7c,0x3f0d8a42,2 +np.float32,0x123cdb,0x123cdb,2 +np.float32,0x7f6e6b06,0x3f800000,2 +np.float32,0x3ed80573,0x3ecc0def,2 +np.float32,0xfea31b82,0xbf800000,2 +np.float32,0x6744e0,0x6744e0,2 +np.float32,0x695e8b,0x695e8b,2 +np.float32,0xbee3888a,0xbed5a67d,2 +np.float32,0x7f64bc2a,0x3f800000,2 +np.float32,0x7f204244,0x3f800000,2 +np.float32,0x7f647102,0x3f800000,2 +np.float32,0x3dd8ebc0,0x3dd81d03,2 +np.float32,0x801e7ab1,0x801e7ab1,2 +np.float32,0x7d034b56,0x3f800000,2 +np.float32,0x7fc00000,0x7fc00000,2 +np.float32,0x80194193,0x80194193,2 +np.float32,0xfe31c8d4,0xbf800000,2 +np.float32,0x7fc0c4,0x7fc0c4,2 +np.float32,0xd95bf,0xd95bf,2 +np.float32,0x7e4f991d,0x3f800000,2 +np.float32,0x7fc563,0x7fc563,2 +np.float32,0xbe3fcccc,0xbe3d968a,2 +np.float32,0xfdaaa1c8,0xbf800000,2 +np.float32,0xbf48e449,0xbf27c949,2 +np.float32,0x3eb6c584,0x3eaf625e,2 +np.float32,0xbea35a74,0xbe9e0702,2 +np.float32,0x3eeab47a,0x3edb89d5,2 +np.float32,0xbed99556,0xbecd5de5,2 +np.float64,0xbfb94a81e0329500,0xbfb935867ba761fe,2 +np.float64,0xbfec132f1678265e,0xbfe6900eb097abc3,2 +np.float64,0x5685ea72ad0be,0x5685ea72ad0be,2 +np.float64,0xbfd74d3169ae9a62,0xbfd652e09b9daf32,2 +np.float64,0xbfe28df53d651bea,0xbfe0b8a7f50ab433,2 +np.float64,0x0,0x0,2 +np.float64,0xbfed912738bb224e,0xbfe749e3732831ae,2 +np.float64,0x7fcc6faed838df5d,0x3ff0000000000000,2 +np.float64,0xbfe95fe9a432bfd3,0xbfe51f6349919910,2 +np.float64,0xbfc4d5900b29ab20,0xbfc4a6f496179b8b,2 +np.float64,0xbfcd6025033ac04c,0xbfccded7b34b49b0,2 +np.float64,0xbfdfa655b43f4cac,0xbfdd4ca1e5bb9db8,2 +np.float64,0xe7ea5c7fcfd4c,0xe7ea5c7fcfd4c,2 +np.float64,0xffa5449ca42a8940,0xbff0000000000000,2 +np.float64,0xffe63294c1ac6529,0xbff0000000000000,2 +np.float64,0x7feb9cbae7f73975,0x3ff0000000000000,2 +np.float64,0x800eb07c3e3d60f9,0x800eb07c3e3d60f9,2 +np.float64,0x3fc95777e932aef0,0x3fc9040391e20c00,2 +np.float64,0x800736052dee6c0b,0x800736052dee6c0b,2 +np.float64,0x3fe9ae4afd335c96,0x3fe54b569bab45c7,2 +np.float64,0x7fee4c94217c9927,0x3ff0000000000000,2 +np.float64,0x80094b594bd296b3,0x80094b594bd296b3,2 +np.float64,0xffe5adbcee6b5b7a,0xbff0000000000000,2 +np.float64,0x3fecb8eab47971d5,0x3fe6e236be6f27e9,2 +np.float64,0x44956914892ae,0x44956914892ae,2 +np.float64,0xbfe3bd18ef677a32,0xbfe190bf1e07200c,2 +np.float64,0x800104e5b46209cc,0x800104e5b46209cc,2 +np.float64,0x8008fbcecf71f79e,0x8008fbcecf71f79e,2 +np.float64,0x800f0a46a0be148d,0x800f0a46a0be148d,2 +np.float64,0x7fe657a0702caf40,0x3ff0000000000000,2 +np.float64,0xffd3ff1a9027fe36,0xbff0000000000000,2 +np.float64,0x3fe78bc87bef1790,0x3fe40d2e63aaf029,2 +np.float64,0x7feeabdc4c7d57b8,0x3ff0000000000000,2 +np.float64,0xbfabd28d8437a520,0xbfabcb8ce03a0e56,2 +np.float64,0xbfddc3a133bb8742,0xbfdbc9fdb2594451,2 +np.float64,0x7fec911565b9222a,0x3ff0000000000000,2 +np.float64,0x71302604e2605,0x71302604e2605,2 +np.float64,0xee919d2bdd234,0xee919d2bdd234,2 +np.float64,0xbfc04fcff3209fa0,0xbfc0395a739a2ce4,2 +np.float64,0xffe4668a36e8cd14,0xbff0000000000000,2 +np.float64,0xbfeeafeebefd5fde,0xbfe7cd5f3d61a3ec,2 +np.float64,0x7fddb34219bb6683,0x3ff0000000000000,2 +np.float64,0xbfd2cac6cba5958e,0xbfd24520abb2ff36,2 +np.float64,0xbfb857e49630afc8,0xbfb8452d5064dec2,2 +np.float64,0x3fd2dbf90b25b7f2,0x3fd254eaf48484c2,2 +np.float64,0x800af65c94f5ecba,0x800af65c94f5ecba,2 +np.float64,0xa0eef4bf41ddf,0xa0eef4bf41ddf,2 +np.float64,0xffd8e0a4adb1c14a,0xbff0000000000000,2 +np.float64,0xffe858f6e870b1ed,0xbff0000000000000,2 +np.float64,0x3f94c2c308298580,0x3f94c208a4bb006d,2 +np.float64,0xffb45f0d7428be18,0xbff0000000000000,2 +np.float64,0x800ed4f43dbda9e9,0x800ed4f43dbda9e9,2 +np.float64,0x8002dd697e85bad4,0x8002dd697e85bad4,2 +np.float64,0x787ceab2f0f9e,0x787ceab2f0f9e,2 +np.float64,0xbfdff5fcc2bfebfa,0xbfdd8b736b128589,2 +np.float64,0x7fdb2b4294365684,0x3ff0000000000000,2 +np.float64,0xffe711e5e92e23cc,0xbff0000000000000,2 +np.float64,0x800b1c93f1163928,0x800b1c93f1163928,2 +np.float64,0x7fc524d2f22a49a5,0x3ff0000000000000,2 +np.float64,0x7fc88013b5310026,0x3ff0000000000000,2 +np.float64,0x3fe1a910c5e35222,0x3fe00fd779ebaa2a,2 +np.float64,0xbfb57ec9ca2afd90,0xbfb571e47ecb9335,2 +np.float64,0x7fd7594b20aeb295,0x3ff0000000000000,2 +np.float64,0x7fba4641ca348c83,0x3ff0000000000000,2 +np.float64,0xffe61393706c2726,0xbff0000000000000,2 +np.float64,0x7fd54f3c7baa9e78,0x3ff0000000000000,2 +np.float64,0xffe65ffb12ecbff6,0xbff0000000000000,2 +np.float64,0xbfba3b0376347608,0xbfba239cbbbd1b11,2 +np.float64,0x800200886d640112,0x800200886d640112,2 +np.float64,0xbfecf0ba4679e174,0xbfe6fd59de44a3ec,2 +np.float64,0xffe5c57e122b8afc,0xbff0000000000000,2 +np.float64,0x7fdaad0143355a02,0x3ff0000000000000,2 +np.float64,0x46ab32c08d567,0x46ab32c08d567,2 +np.float64,0x7ff8000000000000,0x7ff8000000000000,2 +np.float64,0xbfda7980fdb4f302,0xbfd90fa9c8066109,2 +np.float64,0x3fe237703c646ee0,0x3fe07969f8d8805a,2 +np.float64,0x8000e9fcfc21d3fb,0x8000e9fcfc21d3fb,2 +np.float64,0xbfdfe6e958bfcdd2,0xbfdd7f952fe87770,2 +np.float64,0xbd7baf217af8,0xbd7baf217af8,2 +np.float64,0xbfceba9e4b3d753c,0xbfce26e54359869a,2 +np.float64,0xb95a2caf72b46,0xb95a2caf72b46,2 +np.float64,0x3fb407e25a280fc5,0x3fb3fd71e457b628,2 +np.float64,0xa1da09d943b41,0xa1da09d943b41,2 +np.float64,0xbfe9c7271cf38e4e,0xbfe559296b471738,2 +np.float64,0x3fefae6170ff5cc3,0x3fe83c70ba82f0e1,2 +np.float64,0x7fe7375348ae6ea6,0x3ff0000000000000,2 +np.float64,0xffe18c9cc6e31939,0xbff0000000000000,2 +np.float64,0x800483d13a6907a3,0x800483d13a6907a3,2 +np.float64,0x7fe772a18caee542,0x3ff0000000000000,2 +np.float64,0xffefff64e7bffec9,0xbff0000000000000,2 +np.float64,0x7fcffc31113ff861,0x3ff0000000000000,2 +np.float64,0x3fd91e067e323c0d,0x3fd7e70bf365a7b3,2 +np.float64,0xb0a6673d614cd,0xb0a6673d614cd,2 +np.float64,0xffef9a297e3f3452,0xbff0000000000000,2 +np.float64,0xffe87cc15e70f982,0xbff0000000000000,2 +np.float64,0xffefd6ad8e7fad5a,0xbff0000000000000,2 +np.float64,0x7fe3aaa3a8a75546,0x3ff0000000000000,2 +np.float64,0xddab0341bb561,0xddab0341bb561,2 +np.float64,0x3fe996d6d7332dae,0x3fe53e3ed5be2922,2 +np.float64,0x3fdbe66a18b7ccd4,0x3fda41e6053c1512,2 +np.float64,0x8914775d1228f,0x8914775d1228f,2 +np.float64,0x3fe44621d4688c44,0x3fe1ef9c7225f8bd,2 +np.float64,0xffab29a2a4365340,0xbff0000000000000,2 +np.float64,0xffc8d4a0c431a940,0xbff0000000000000,2 +np.float64,0xbfd426e085284dc2,0xbfd382e2a9617b87,2 +np.float64,0xbfd3b0a525a7614a,0xbfd3176856faccf1,2 +np.float64,0x80036dedcb06dbdc,0x80036dedcb06dbdc,2 +np.float64,0x3feb13823b762704,0x3fe60ca3facdb696,2 +np.float64,0x3fd7246b7bae48d8,0x3fd62f08afded155,2 +np.float64,0x1,0x1,2 +np.float64,0x3fe8ade4b9715bc9,0x3fe4b97cc1387d27,2 +np.float64,0x3fdf2dbec53e5b7e,0x3fdcecfeee33de95,2 +np.float64,0x3fe4292bf9685258,0x3fe1dbb5a6704090,2 +np.float64,0xbfd21acbb8243598,0xbfd1a2ff42174cae,2 +np.float64,0xdd0d2d01ba1a6,0xdd0d2d01ba1a6,2 +np.float64,0x3fa3f3d2f427e7a0,0x3fa3f13d6f101555,2 +np.float64,0x7fdabf4aceb57e95,0x3ff0000000000000,2 +np.float64,0xd4d9e39ba9b3d,0xd4d9e39ba9b3d,2 +np.float64,0xffec773396f8ee66,0xbff0000000000000,2 +np.float64,0x3fa88cc79031198f,0x3fa887f7ade722ba,2 +np.float64,0xffe63a92066c7524,0xbff0000000000000,2 +np.float64,0xbfcf514e2e3ea29c,0xbfceb510e99aaa19,2 +np.float64,0x9d78c19d3af18,0x9d78c19d3af18,2 +np.float64,0x7fdd748bfbbae917,0x3ff0000000000000,2 +np.float64,0xffb3594c4626b298,0xbff0000000000000,2 +np.float64,0x80068ce5b32d19cc,0x80068ce5b32d19cc,2 +np.float64,0x3fec63d60e78c7ac,0x3fe6b85536e44217,2 +np.float64,0x80080bad4dd0175b,0x80080bad4dd0175b,2 +np.float64,0xbfec6807baf8d010,0xbfe6ba69740f9687,2 +np.float64,0x7fedbae0bbfb75c0,0x3ff0000000000000,2 +np.float64,0x8001cb7aa3c396f6,0x8001cb7aa3c396f6,2 +np.float64,0x7fe1f1f03563e3df,0x3ff0000000000000,2 +np.float64,0x7fd83d3978307a72,0x3ff0000000000000,2 +np.float64,0xbfc05ffe9d20bffc,0xbfc049464e3f0af2,2 +np.float64,0xfe6e053ffcdc1,0xfe6e053ffcdc1,2 +np.float64,0xbfd3bdf39d277be8,0xbfd32386edf12726,2 +np.float64,0x800f41b27bde8365,0x800f41b27bde8365,2 +np.float64,0xbfe2c98390e59307,0xbfe0e3c9260fe798,2 +np.float64,0xffdd6206bcbac40e,0xbff0000000000000,2 +np.float64,0x67f35ef4cfe6c,0x67f35ef4cfe6c,2 +np.float64,0x800337e02ae66fc1,0x800337e02ae66fc1,2 +np.float64,0x3fe0ff70afe1fee1,0x3fdf1f46434330df,2 +np.float64,0x3fd7e0a1df2fc144,0x3fd6d3f82c8031e4,2 +np.float64,0x8008da5cd1b1b4ba,0x8008da5cd1b1b4ba,2 +np.float64,0x80065ec9e4ccbd95,0x80065ec9e4ccbd95,2 +np.float64,0x3fe1d1e559a3a3cb,0x3fe02e4f146aa1ab,2 +np.float64,0x7feb7d2f0836fa5d,0x3ff0000000000000,2 +np.float64,0xbfcb33ce9736679c,0xbfcaccd431b205bb,2 +np.float64,0x800e6d0adf5cda16,0x800e6d0adf5cda16,2 +np.float64,0x7fe46f272ca8de4d,0x3ff0000000000000,2 +np.float64,0x4fdfc73e9fbfa,0x4fdfc73e9fbfa,2 +np.float64,0x800958a13112b143,0x800958a13112b143,2 +np.float64,0xbfea01f877f403f1,0xbfe579a541594247,2 +np.float64,0xeefaf599ddf5f,0xeefaf599ddf5f,2 +np.float64,0x80038766c5e70ece,0x80038766c5e70ece,2 +np.float64,0x7fd31bc28ba63784,0x3ff0000000000000,2 +np.float64,0xbfe4df77eee9bef0,0xbfe257abe7083b77,2 +np.float64,0x7fe6790c78acf218,0x3ff0000000000000,2 +np.float64,0xffe7c66884af8cd0,0xbff0000000000000,2 +np.float64,0x800115e36f422bc8,0x800115e36f422bc8,2 +np.float64,0x3fc601945d2c0329,0x3fc5cab917bb20bc,2 +np.float64,0x3fd6ac9546ad592b,0x3fd5c55437ec3508,2 +np.float64,0xa7bd59294f7ab,0xa7bd59294f7ab,2 +np.float64,0x8005c26c8b8b84da,0x8005c26c8b8b84da,2 +np.float64,0x8257501704aea,0x8257501704aea,2 +np.float64,0x5b12aae0b6256,0x5b12aae0b6256,2 +np.float64,0x800232fe02c465fd,0x800232fe02c465fd,2 +np.float64,0x800dae28f85b5c52,0x800dae28f85b5c52,2 +np.float64,0x3fdade1ac135bc36,0x3fd964a2000ace25,2 +np.float64,0x3fed72ca04fae594,0x3fe73b9170d809f9,2 +np.float64,0x7fc6397e2b2c72fb,0x3ff0000000000000,2 +np.float64,0x3fe1f5296d23ea53,0x3fe048802d17621e,2 +np.float64,0xffe05544b920aa89,0xbff0000000000000,2 +np.float64,0xbfdb2e1588365c2c,0xbfd9a7e4113c713e,2 +np.float64,0xbfed6a06fa3ad40e,0xbfe7376be60535f8,2 +np.float64,0xbfe31dcaf5e63b96,0xbfe120417c46cac1,2 +np.float64,0xbfb7ed67ae2fdad0,0xbfb7dba14af33b00,2 +np.float64,0xffd32bb7eb265770,0xbff0000000000000,2 +np.float64,0x80039877b04730f0,0x80039877b04730f0,2 +np.float64,0x3f832e5630265cac,0x3f832e316f47f218,2 +np.float64,0xffe7fa7f732ff4fe,0xbff0000000000000,2 +np.float64,0x9649b87f2c937,0x9649b87f2c937,2 +np.float64,0xffaee447183dc890,0xbff0000000000000,2 +np.float64,0x7fe4e02dd869c05b,0x3ff0000000000000,2 +np.float64,0x3fe1d35e7463a6bd,0x3fe02f67bd21e86e,2 +np.float64,0xffe57f40fe2afe82,0xbff0000000000000,2 +np.float64,0xbfea1362b93426c6,0xbfe5833421dba8fc,2 +np.float64,0xffe9c689fe338d13,0xbff0000000000000,2 +np.float64,0xffc592dd102b25bc,0xbff0000000000000,2 +np.float64,0x3fd283c7aba5078f,0x3fd203d61d1398c3,2 +np.float64,0x8001d6820243ad05,0x8001d6820243ad05,2 +np.float64,0x3fe0ad5991e15ab4,0x3fdea14ef0d47fbd,2 +np.float64,0x3fe3916f2ee722de,0x3fe1722684a9ffb1,2 +np.float64,0xffef9e54e03f3ca9,0xbff0000000000000,2 +np.float64,0x7fe864faebb0c9f5,0x3ff0000000000000,2 +np.float64,0xbfed3587c3fa6b10,0xbfe71e7112df8a68,2 +np.float64,0xbfdd9efc643b3df8,0xbfdbac3a16caf208,2 +np.float64,0xbfd5ac08feab5812,0xbfd4e14575a6e41b,2 +np.float64,0xffda90fae6b521f6,0xbff0000000000000,2 +np.float64,0x8001380ecf22701e,0x8001380ecf22701e,2 +np.float64,0x7fed266fa5fa4cde,0x3ff0000000000000,2 +np.float64,0xffec6c0ac3b8d815,0xbff0000000000000,2 +np.float64,0x3fe7de43c32fbc88,0x3fe43ef62821a5a6,2 +np.float64,0x800bf4ffc357ea00,0x800bf4ffc357ea00,2 +np.float64,0x3fe125c975624b93,0x3fdf59b2de3eff5d,2 +np.float64,0x8004714c1028e299,0x8004714c1028e299,2 +np.float64,0x3fef1bfbf5fe37f8,0x3fe7fd2ba1b63c8a,2 +np.float64,0x800cae15c3195c2c,0x800cae15c3195c2c,2 +np.float64,0x7fde708e083ce11b,0x3ff0000000000000,2 +np.float64,0x7fbcee5df639dcbb,0x3ff0000000000000,2 +np.float64,0x800b1467141628cf,0x800b1467141628cf,2 +np.float64,0x3fe525e0d36a4bc2,0x3fe286b6e59e30f5,2 +np.float64,0xffe987f8b8330ff1,0xbff0000000000000,2 +np.float64,0x7e0a8284fc151,0x7e0a8284fc151,2 +np.float64,0x8006f982442df305,0x8006f982442df305,2 +np.float64,0xbfd75a3cb62eb47a,0xbfd65e54cee981c9,2 +np.float64,0x258e91104b1d3,0x258e91104b1d3,2 +np.float64,0xbfecd0056779a00b,0xbfe6ed7ae97fff1b,2 +np.float64,0x7fc3a4f9122749f1,0x3ff0000000000000,2 +np.float64,0x6e2b1024dc563,0x6e2b1024dc563,2 +np.float64,0x800d575ad4daaeb6,0x800d575ad4daaeb6,2 +np.float64,0xbfceafb1073d5f64,0xbfce1c93023d8414,2 +np.float64,0xffe895cb5f312b96,0xbff0000000000000,2 +np.float64,0x7fe7811ed4ef023d,0x3ff0000000000000,2 +np.float64,0xbfd93f952f327f2a,0xbfd803e6b5576b99,2 +np.float64,0xffdd883a3fbb1074,0xbff0000000000000,2 +np.float64,0x7fee5624eefcac49,0x3ff0000000000000,2 +np.float64,0xbfe264bb2624c976,0xbfe09a9b7cc896e7,2 +np.float64,0xffef14b417be2967,0xbff0000000000000,2 +np.float64,0xbfecbd0d94397a1b,0xbfe6e43bef852d9f,2 +np.float64,0xbfe20d9e4ba41b3c,0xbfe05a98e05846d9,2 +np.float64,0x10000000000000,0x10000000000000,2 +np.float64,0x7fefde93f7bfbd27,0x3ff0000000000000,2 +np.float64,0x80076b9e232ed73d,0x80076b9e232ed73d,2 +np.float64,0xbfe80df52c701bea,0xbfe45b754b433792,2 +np.float64,0x7fe3b5a637676b4b,0x3ff0000000000000,2 +np.float64,0x2c81d14c5903b,0x2c81d14c5903b,2 +np.float64,0x80038945c767128c,0x80038945c767128c,2 +np.float64,0xffeebaf544bd75ea,0xbff0000000000000,2 +np.float64,0xffdb1867d2b630d0,0xbff0000000000000,2 +np.float64,0x3fe3376eaee66ede,0x3fe13285579763d8,2 +np.float64,0xffddf65ca43becba,0xbff0000000000000,2 +np.float64,0xffec8e3e04791c7b,0xbff0000000000000,2 +np.float64,0x80064f4bde2c9e98,0x80064f4bde2c9e98,2 +np.float64,0x7fe534a085ea6940,0x3ff0000000000000,2 +np.float64,0xbfcbabe31d3757c8,0xbfcb3f8e70adf7e7,2 +np.float64,0xbfe45ca11e28b942,0xbfe1ff04515ef809,2 +np.float64,0x65f4df02cbe9d,0x65f4df02cbe9d,2 +np.float64,0xb08b0cbb61162,0xb08b0cbb61162,2 +np.float64,0x3feae2e8b975c5d1,0x3fe5f302b5e8eda2,2 +np.float64,0x7fcf277ff93e4eff,0x3ff0000000000000,2 +np.float64,0x80010999c4821334,0x80010999c4821334,2 +np.float64,0xbfd7f65911afecb2,0xbfd6e6e9cd098f8b,2 +np.float64,0x800e0560ec3c0ac2,0x800e0560ec3c0ac2,2 +np.float64,0x7fec4152ba3882a4,0x3ff0000000000000,2 +np.float64,0xbfb5c77cd42b8ef8,0xbfb5ba1336084908,2 +np.float64,0x457ff1b68afff,0x457ff1b68afff,2 +np.float64,0x5323ec56a647e,0x5323ec56a647e,2 +np.float64,0xbfeed16cf8bda2da,0xbfe7dc49fc9ae549,2 +np.float64,0xffe8446106b088c1,0xbff0000000000000,2 +np.float64,0xffb93cd13c3279a0,0xbff0000000000000,2 +np.float64,0x7fe515c2aeea2b84,0x3ff0000000000000,2 +np.float64,0x80099df83f933bf1,0x80099df83f933bf1,2 +np.float64,0x7fb3a375562746ea,0x3ff0000000000000,2 +np.float64,0x7fcd7efa243afdf3,0x3ff0000000000000,2 +np.float64,0xffe40cddb12819bb,0xbff0000000000000,2 +np.float64,0x8008b68eecd16d1e,0x8008b68eecd16d1e,2 +np.float64,0x2aec688055d8e,0x2aec688055d8e,2 +np.float64,0xffe23750bc646ea1,0xbff0000000000000,2 +np.float64,0x5adacf60b5b7,0x5adacf60b5b7,2 +np.float64,0x7fefb29b1cbf6535,0x3ff0000000000000,2 +np.float64,0xbfeadbf90175b7f2,0xbfe5ef55e2194794,2 +np.float64,0xeaad2885d55a5,0xeaad2885d55a5,2 +np.float64,0xffd7939fba2f2740,0xbff0000000000000,2 +np.float64,0x3fd187ea3aa30fd4,0x3fd11af023472386,2 +np.float64,0xbf6eb579c03d6b00,0xbf6eb57052f47019,2 +np.float64,0x3fefb67b3bff6cf6,0x3fe83fe4499969ac,2 +np.float64,0xbfe5183aacea3076,0xbfe27da1aa0b61a0,2 +np.float64,0xbfb83e47a2307c90,0xbfb82bcb0e12db42,2 +np.float64,0x80088849b1b11094,0x80088849b1b11094,2 +np.float64,0x800ceeed7399dddb,0x800ceeed7399dddb,2 +np.float64,0x80097cd90892f9b2,0x80097cd90892f9b2,2 +np.float64,0x7ec73feefd8e9,0x7ec73feefd8e9,2 +np.float64,0x7fe3291de5a6523b,0x3ff0000000000000,2 +np.float64,0xbfd537086daa6e10,0xbfd4787af5f60653,2 +np.float64,0x800e8ed4455d1da9,0x800e8ed4455d1da9,2 +np.float64,0x800ef8d19cbdf1a3,0x800ef8d19cbdf1a3,2 +np.float64,0x800dc4fa3a5b89f5,0x800dc4fa3a5b89f5,2 +np.float64,0xaa8b85cd55171,0xaa8b85cd55171,2 +np.float64,0xffd67a5f40acf4be,0xbff0000000000000,2 +np.float64,0xbfb7496db22e92d8,0xbfb7390a48130861,2 +np.float64,0x3fd86a8e7ab0d51d,0x3fd74bfba0f72616,2 +np.float64,0xffb7f5b7fc2feb70,0xbff0000000000000,2 +np.float64,0xbfea0960a7f412c1,0xbfe57db6d0ff4191,2 +np.float64,0x375f4fc26ebeb,0x375f4fc26ebeb,2 +np.float64,0x800c537e70b8a6fd,0x800c537e70b8a6fd,2 +np.float64,0x800b3f4506d67e8a,0x800b3f4506d67e8a,2 +np.float64,0x7fe61f2d592c3e5a,0x3ff0000000000000,2 +np.float64,0xffefffffffffffff,0xbff0000000000000,2 +np.float64,0x8005d0bb84eba178,0x8005d0bb84eba178,2 +np.float64,0x800c78b0ec18f162,0x800c78b0ec18f162,2 +np.float64,0xbfc42cccfb285998,0xbfc4027392f66b0d,2 +np.float64,0x3fd8fdc73fb1fb8e,0x3fd7cb46f928153f,2 +np.float64,0x800c71754298e2eb,0x800c71754298e2eb,2 +np.float64,0x3fe4aa7a96a954f5,0x3fe233f5d3bc1352,2 +np.float64,0x7fd53841f6aa7083,0x3ff0000000000000,2 +np.float64,0x3fd0a887b8a15110,0x3fd04ac3b9c0d1ca,2 +np.float64,0x8007b8e164cf71c4,0x8007b8e164cf71c4,2 +np.float64,0xbfddc35c66bb86b8,0xbfdbc9c5dddfb014,2 +np.float64,0x6a3756fed46eb,0x6a3756fed46eb,2 +np.float64,0xffd3dcd05527b9a0,0xbff0000000000000,2 +np.float64,0xbfd7dc75632fb8ea,0xbfd6d0538b340a98,2 +np.float64,0x17501f822ea05,0x17501f822ea05,2 +np.float64,0xbfe1f98b99a3f317,0xbfe04bbf8f8b6cb3,2 +np.float64,0x66ea65d2cdd4d,0x66ea65d2cdd4d,2 +np.float64,0xbfd12241e2224484,0xbfd0bc62f46ea5e1,2 +np.float64,0x3fed6e6fb3fadcdf,0x3fe7398249097285,2 +np.float64,0x3fe0b5ebeba16bd8,0x3fdeae84b3000a47,2 +np.float64,0x66d1bce8cda38,0x66d1bce8cda38,2 +np.float64,0x3fdd728db3bae51b,0x3fdb880f28c52713,2 +np.float64,0xffb45dbe5228bb80,0xbff0000000000000,2 +np.float64,0x1ff8990c3ff14,0x1ff8990c3ff14,2 +np.float64,0x800a68e8f294d1d2,0x800a68e8f294d1d2,2 +np.float64,0xbfe4d08b84a9a117,0xbfe24da40bff6be7,2 +np.float64,0x3fe0177f0ee02efe,0x3fddb83c5971df51,2 +np.float64,0xffc56893692ad128,0xbff0000000000000,2 +np.float64,0x51b44f6aa368b,0x51b44f6aa368b,2 +np.float64,0x2258ff4e44b21,0x2258ff4e44b21,2 +np.float64,0x3fe913649e7226c9,0x3fe4f3f119530f53,2 +np.float64,0xffe3767df766ecfc,0xbff0000000000000,2 +np.float64,0xbfe62ae12fec55c2,0xbfe33108f1f22a94,2 +np.float64,0x7fb6a6308e2d4c60,0x3ff0000000000000,2 +np.float64,0xbfe00f2085e01e41,0xbfddab19b6fc77d1,2 +np.float64,0x3fb66447dc2cc890,0x3fb655b4f46844f0,2 +np.float64,0x3fd80238f6b00470,0x3fd6f143be1617d6,2 +np.float64,0xbfd05bfeb3a0b7fe,0xbfd0031ab3455e15,2 +np.float64,0xffc3a50351274a08,0xbff0000000000000,2 +np.float64,0xffd8f4241cb1e848,0xbff0000000000000,2 +np.float64,0xbfca72a88c34e550,0xbfca13ebe85f2aca,2 +np.float64,0x3fd47d683ba8fad0,0x3fd3d13f1176ed8c,2 +np.float64,0x3fb6418e642c831d,0x3fb6333ebe479ff2,2 +np.float64,0x800fde8e023fbd1c,0x800fde8e023fbd1c,2 +np.float64,0x8001fb01e323f605,0x8001fb01e323f605,2 +np.float64,0x3febb21ff9f76440,0x3fe65ed788d52fee,2 +np.float64,0x3fe47553ffe8eaa8,0x3fe20fe01f853603,2 +np.float64,0x7fca20b3f9344167,0x3ff0000000000000,2 +np.float64,0x3fe704f4ec6e09ea,0x3fe3ba7277201805,2 +np.float64,0xf864359df0c87,0xf864359df0c87,2 +np.float64,0x4d96b01c9b2d7,0x4d96b01c9b2d7,2 +np.float64,0x3fe8a09fe9f14140,0x3fe4b1c6a2d2e095,2 +np.float64,0xffc46c61b228d8c4,0xbff0000000000000,2 +np.float64,0x3fe680a837ed0150,0x3fe3679d6eeb6485,2 +np.float64,0xbfecedc20f39db84,0xbfe6fbe9ee978bf6,2 +np.float64,0x3fb2314eae24629d,0x3fb2297ba6d55d2d,2 +np.float64,0x3fe9f0b8e7b3e172,0x3fe57026eae36db3,2 +np.float64,0x80097a132ed2f427,0x80097a132ed2f427,2 +np.float64,0x800ae5a41955cb49,0x800ae5a41955cb49,2 +np.float64,0xbfd7527279aea4e4,0xbfd6577de356e1bd,2 +np.float64,0x3fe27d3e01e4fa7c,0x3fe0ac7dd96f9179,2 +np.float64,0x7fedd8cb01bbb195,0x3ff0000000000000,2 +np.float64,0x78f8695af1f0e,0x78f8695af1f0e,2 +np.float64,0x800d2d0e927a5a1d,0x800d2d0e927a5a1d,2 +np.float64,0xffe74b46fb2e968e,0xbff0000000000000,2 +np.float64,0xbfdd12d4c8ba25aa,0xbfdb39dae49e1c10,2 +np.float64,0xbfd6c14710ad828e,0xbfd5d79ef5a8d921,2 +np.float64,0x921f4e55243ea,0x921f4e55243ea,2 +np.float64,0x800b4e4c80969c99,0x800b4e4c80969c99,2 +np.float64,0x7fe08c6ab7e118d4,0x3ff0000000000000,2 +np.float64,0xbfed290014fa5200,0xbfe71871f7e859ed,2 +np.float64,0x8008c1d5c59183ac,0x8008c1d5c59183ac,2 +np.float64,0x3fd339e68c2673cd,0x3fd2aaff3f165a9d,2 +np.float64,0xbfdd20d8113a41b0,0xbfdb4553ea2cb2fb,2 +np.float64,0x3fe52a25deea544c,0x3fe2898d5bf4442c,2 +np.float64,0x498602d4930c1,0x498602d4930c1,2 +np.float64,0x3fd8c450113188a0,0x3fd799b0b2a6c43c,2 +np.float64,0xbfd72bc2f2ae5786,0xbfd6357e15ba7f70,2 +np.float64,0xbfd076188ea0ec32,0xbfd01b8fce44d1af,2 +np.float64,0x9aace1713559c,0x9aace1713559c,2 +np.float64,0x8008a730e8914e62,0x8008a730e8914e62,2 +np.float64,0x7fe9e9a3d833d347,0x3ff0000000000000,2 +np.float64,0x800d3a0d69da741b,0x800d3a0d69da741b,2 +np.float64,0xbfe3e28a29e7c514,0xbfe1aad7643a2d19,2 +np.float64,0x7fe9894c71331298,0x3ff0000000000000,2 +np.float64,0xbfe7c6acb5ef8d5a,0xbfe430c9e258ce62,2 +np.float64,0xffb5a520a62b4a40,0xbff0000000000000,2 +np.float64,0x7fc02109ae204212,0x3ff0000000000000,2 +np.float64,0xb5c58f196b8b2,0xb5c58f196b8b2,2 +np.float64,0x3feb4ee82e769dd0,0x3fe62bae9a39d8b1,2 +np.float64,0x3fec5c3cf278b87a,0x3fe6b49000f12441,2 +np.float64,0x81f64b8103eca,0x81f64b8103eca,2 +np.float64,0xbfeab00d73f5601b,0xbfe5d7f755ab73d9,2 +np.float64,0x3fd016bf28a02d7e,0x3fcf843ea23bcd3c,2 +np.float64,0xbfa1db617423b6c0,0xbfa1d9872ddeb5a8,2 +np.float64,0x3fe83c879d70790f,0x3fe4771502d8f012,2 +np.float64,0x6b267586d64cf,0x6b267586d64cf,2 +np.float64,0x3fc91b6d3f3236d8,0x3fc8ca3eb4da25a9,2 +np.float64,0x7fd4e3f8f3a9c7f1,0x3ff0000000000000,2 +np.float64,0x800a75899214eb14,0x800a75899214eb14,2 +np.float64,0x7fdb1f2e07b63e5b,0x3ff0000000000000,2 +np.float64,0xffe7805a11ef00b4,0xbff0000000000000,2 +np.float64,0x3fc8e1b88a31c371,0x3fc892af45330818,2 +np.float64,0xbfe809fe447013fc,0xbfe45918f07da4d9,2 +np.float64,0xbfeb9d7f2ab73afe,0xbfe65446bfddc792,2 +np.float64,0x3fb47f0a5c28fe15,0x3fb473db9113e880,2 +np.float64,0x800a17ae3cb42f5d,0x800a17ae3cb42f5d,2 +np.float64,0xf5540945eaa81,0xf5540945eaa81,2 +np.float64,0xbfe577fc26aaeff8,0xbfe2bcfbf2cf69ff,2 +np.float64,0xbfb99b3e06333680,0xbfb98577b88e0515,2 +np.float64,0x7fd9290391b25206,0x3ff0000000000000,2 +np.float64,0x7fe1aa62ffa354c5,0x3ff0000000000000,2 +np.float64,0x7b0189a0f604,0x7b0189a0f604,2 +np.float64,0x3f9000ed602001db,0x3f900097fe168105,2 +np.float64,0x3fd576128d2aec25,0x3fd4b1002c92286f,2 +np.float64,0xffecc98ece79931d,0xbff0000000000000,2 +np.float64,0x800a1736c7f42e6e,0x800a1736c7f42e6e,2 +np.float64,0xbfed947548bb28eb,0xbfe74b71479ae739,2 +np.float64,0xa45c032148b9,0xa45c032148b9,2 +np.float64,0xbfc13d011c227a04,0xbfc1228447de5e9f,2 +np.float64,0xffed8baa6ebb1754,0xbff0000000000000,2 +np.float64,0x800ea2de243d45bc,0x800ea2de243d45bc,2 +np.float64,0x8001396be52272d9,0x8001396be52272d9,2 +np.float64,0xd018d1cda031a,0xd018d1cda031a,2 +np.float64,0x7fe1fece1fe3fd9b,0x3ff0000000000000,2 +np.float64,0x8009ac484c135891,0x8009ac484c135891,2 +np.float64,0x3fc560ad132ac15a,0x3fc52e5a9479f08e,2 +np.float64,0x3fd6f80ebe2df01d,0x3fd607f70ce8e3f4,2 +np.float64,0xbfd3e69e82a7cd3e,0xbfd34887c2a40699,2 +np.float64,0x3fe232d9baa465b3,0x3fe0760a822ada0c,2 +np.float64,0x3fe769bbc6eed378,0x3fe3f872680f6631,2 +np.float64,0xffe63dbd952c7b7a,0xbff0000000000000,2 +np.float64,0x4e0c00da9c181,0x4e0c00da9c181,2 +np.float64,0xffeae4d89735c9b0,0xbff0000000000000,2 +np.float64,0x3fe030bcbb606179,0x3fdddfc66660bfce,2 +np.float64,0x7fe35ca40d66b947,0x3ff0000000000000,2 +np.float64,0xbfd45bd66628b7ac,0xbfd3b2e04bfe7866,2 +np.float64,0x3fd1f0be2323e17c,0x3fd17c1c340d7a48,2 +np.float64,0x3fd7123b6cae2478,0x3fd61f0675aa9ae1,2 +np.float64,0xbfe918a377723147,0xbfe4f6efe66f5714,2 +np.float64,0x7fc400356f28006a,0x3ff0000000000000,2 +np.float64,0x7fd2dead70a5bd5a,0x3ff0000000000000,2 +np.float64,0xffe9c28f81f3851e,0xbff0000000000000,2 +np.float64,0x3fd09b1ec7a1363e,0x3fd03e3894320140,2 +np.float64,0x7fe6e80c646dd018,0x3ff0000000000000,2 +np.float64,0x7fec3760a4786ec0,0x3ff0000000000000,2 +np.float64,0x309eb6ee613d8,0x309eb6ee613d8,2 +np.float64,0x800731cb0ece6397,0x800731cb0ece6397,2 +np.float64,0xbfdb0c553db618aa,0xbfd98b8a4680ee60,2 +np.float64,0x3fd603a52eac074c,0x3fd52f6b53de7455,2 +np.float64,0x9ecb821b3d971,0x9ecb821b3d971,2 +np.float64,0x3feb7d64dc36faca,0x3fe643c2754bb7f4,2 +np.float64,0xffeb94825ef72904,0xbff0000000000000,2 +np.float64,0x24267418484cf,0x24267418484cf,2 +np.float64,0xbfa6b2fbac2d65f0,0xbfa6af2dca5bfa6f,2 +np.float64,0x8010000000000000,0x8010000000000000,2 +np.float64,0xffe6873978ed0e72,0xbff0000000000000,2 +np.float64,0x800447934ba88f27,0x800447934ba88f27,2 +np.float64,0x3fef305f09fe60be,0x3fe806156b8ca47c,2 +np.float64,0xffd441c697a8838e,0xbff0000000000000,2 +np.float64,0xbfa7684f6c2ed0a0,0xbfa764238d34830c,2 +np.float64,0xffb2c976142592f0,0xbff0000000000000,2 +np.float64,0xbfcc9d1585393a2c,0xbfcc25756bcbca1f,2 +np.float64,0xbfd477bb1ba8ef76,0xbfd3cc1d2114e77e,2 +np.float64,0xbfed1559983a2ab3,0xbfe70f03afd994ee,2 +np.float64,0xbfeb51139036a227,0xbfe62ccf56bc7fff,2 +np.float64,0x7d802890fb006,0x7d802890fb006,2 +np.float64,0x800e00af777c015f,0x800e00af777c015f,2 +np.float64,0x800647ce128c8f9d,0x800647ce128c8f9d,2 +np.float64,0x800a26da91d44db6,0x800a26da91d44db6,2 +np.float64,0x3fdc727eddb8e4fe,0x3fdab5fd9db630b3,2 +np.float64,0x7fd06def2ba0dbdd,0x3ff0000000000000,2 +np.float64,0xffe23678c4a46cf1,0xbff0000000000000,2 +np.float64,0xbfe7198e42ee331c,0xbfe3c7326c9c7553,2 +np.float64,0xffae465f3c3c8cc0,0xbff0000000000000,2 +np.float64,0xff9aea7c5035d500,0xbff0000000000000,2 +np.float64,0xbfeae49c0f35c938,0xbfe5f3e9326cb08b,2 +np.float64,0x3f9a16f300342de6,0x3f9a1581212be50f,2 +np.float64,0x8d99e2c31b33d,0x8d99e2c31b33d,2 +np.float64,0xffd58af253ab15e4,0xbff0000000000000,2 +np.float64,0xbfd205cd25a40b9a,0xbfd18f97155f8b25,2 +np.float64,0xbfebe839bbf7d074,0xbfe67a6024e8fefe,2 +np.float64,0xbfe4fb3595a9f66b,0xbfe26a42f99819ea,2 +np.float64,0x800e867c739d0cf9,0x800e867c739d0cf9,2 +np.float64,0x8bc4274f17885,0x8bc4274f17885,2 +np.float64,0xaec8914b5d912,0xaec8914b5d912,2 +np.float64,0x7fd1d64473a3ac88,0x3ff0000000000000,2 +np.float64,0xbfe6d6f69cedaded,0xbfe39dd61bc7e23e,2 +np.float64,0x7fed05039d7a0a06,0x3ff0000000000000,2 +np.float64,0xbfc40eab0f281d58,0xbfc3e50d14b79265,2 +np.float64,0x45179aec8a2f4,0x45179aec8a2f4,2 +np.float64,0xbfe717e362ee2fc7,0xbfe3c62a95b07d13,2 +np.float64,0xbfe5b8df0d6b71be,0xbfe2e76c7ec5013d,2 +np.float64,0x5c67ba6eb8cf8,0x5c67ba6eb8cf8,2 +np.float64,0xbfda72ce4cb4e59c,0xbfd909fdc7ecfe20,2 +np.float64,0x7fdf59a1e2beb343,0x3ff0000000000000,2 +np.float64,0xc4f7897f89ef1,0xc4f7897f89ef1,2 +np.float64,0x8fcd0a351f9a2,0x8fcd0a351f9a2,2 +np.float64,0x3fb161761022c2ec,0x3fb15aa31c464de2,2 +np.float64,0x8008a985be71530c,0x8008a985be71530c,2 +np.float64,0x3fca4ddb5e349bb7,0x3fc9f0a3b60e49c6,2 +np.float64,0x7fcc10a2d9382145,0x3ff0000000000000,2 +np.float64,0x78902b3af1206,0x78902b3af1206,2 +np.float64,0x7fe1e2765f23c4ec,0x3ff0000000000000,2 +np.float64,0xc1d288cf83a51,0xc1d288cf83a51,2 +np.float64,0x7fe8af692bb15ed1,0x3ff0000000000000,2 +np.float64,0x80057d90fb8afb23,0x80057d90fb8afb23,2 +np.float64,0x3fdc136b8fb826d8,0x3fda6749582b2115,2 +np.float64,0x800ec8ea477d91d5,0x800ec8ea477d91d5,2 +np.float64,0x4c0f4796981ea,0x4c0f4796981ea,2 +np.float64,0xec34c4a5d8699,0xec34c4a5d8699,2 +np.float64,0x7fce343dfb3c687b,0x3ff0000000000000,2 +np.float64,0xbfc95a98a332b530,0xbfc90705b2cc2fec,2 +np.float64,0x800d118e1dba231c,0x800d118e1dba231c,2 +np.float64,0x3fd354f310a6a9e8,0x3fd2c3bb90054154,2 +np.float64,0xbfdac0d4fab581aa,0xbfd94bf37424928e,2 +np.float64,0x3fe7f5391fefea72,0x3fe44cb49d51985b,2 +np.float64,0xd4c3c329a9879,0xd4c3c329a9879,2 +np.float64,0x3fc53977692a72f0,0x3fc50835d85c9ed1,2 +np.float64,0xbfd6989538ad312a,0xbfd5b3a2c08511fe,2 +np.float64,0xbfe329f2906653e5,0xbfe128ec1525a1c0,2 +np.float64,0x7ff0000000000000,0x3ff0000000000000,2 +np.float64,0xbfea57c90974af92,0xbfe5a87b04aa3116,2 +np.float64,0x7fdfba94043f7527,0x3ff0000000000000,2 +np.float64,0x3feedabddafdb57c,0x3fe7e06c0661978d,2 +np.float64,0x4bd9f3b697b3f,0x4bd9f3b697b3f,2 +np.float64,0x3fdd15bbfc3a2b78,0x3fdb3c3b8d070f7e,2 +np.float64,0x3fbd89ccd23b13a0,0x3fbd686b825cff80,2 +np.float64,0x7ff4000000000000,0x7ffc000000000000,2 +np.float64,0x3f9baa8928375512,0x3f9ba8d01ddd5300,2 +np.float64,0x4a3ebdf2947d8,0x4a3ebdf2947d8,2 +np.float64,0x3fe698d5c06d31ac,0x3fe376dff48312c8,2 +np.float64,0xffd5323df12a647c,0xbff0000000000000,2 +np.float64,0xffea7f111174fe22,0xbff0000000000000,2 +np.float64,0x3feb4656a9b68cad,0x3fe627392eb2156f,2 +np.float64,0x7fc1260e9c224c1c,0x3ff0000000000000,2 +np.float64,0x80056e45e5eadc8d,0x80056e45e5eadc8d,2 +np.float64,0x7fd0958ef6a12b1d,0x3ff0000000000000,2 +np.float64,0x8001f85664e3f0ae,0x8001f85664e3f0ae,2 +np.float64,0x3fe553853beaa70a,0x3fe2a4f5e7c83558,2 +np.float64,0xbfeb33ce6276679d,0xbfe61d8ec9e5ff8c,2 +np.float64,0xbfd1b24e21a3649c,0xbfd14245df6065e9,2 +np.float64,0x3fe286fc40650df9,0x3fe0b395c8059429,2 +np.float64,0xffed378058fa6f00,0xbff0000000000000,2 +np.float64,0xbfd0c4a2d7a18946,0xbfd06509a434d6a0,2 +np.float64,0xbfea31d581f463ab,0xbfe593d976139f94,2 +np.float64,0xbfe0705c85e0e0b9,0xbfde42efa978eb0c,2 +np.float64,0xe4c4c339c9899,0xe4c4c339c9899,2 +np.float64,0x3fd68befa9ad17df,0x3fd5a870b3f1f83e,2 +np.float64,0x8000000000000001,0x8000000000000001,2 +np.float64,0x3fe294256965284b,0x3fe0bd271e22d86b,2 +np.float64,0x8005327a862a64f6,0x8005327a862a64f6,2 +np.float64,0xbfdb8155ce3702ac,0xbfd9ed9ef97920f8,2 +np.float64,0xbff0000000000000,0xbfe85efab514f394,2 +np.float64,0xffe66988f1ecd312,0xbff0000000000000,2 +np.float64,0x3fb178a85e22f150,0x3fb171b9fbf95f1d,2 +np.float64,0x7f829b900025371f,0x3ff0000000000000,2 +np.float64,0x8000000000000000,0x8000000000000000,2 +np.float64,0x8006cb77f60d96f1,0x8006cb77f60d96f1,2 +np.float64,0x3fe0c5d53aa18baa,0x3fdec7012ab92b42,2 +np.float64,0x77266426ee4cd,0x77266426ee4cd,2 +np.float64,0xbfec95f468392be9,0xbfe6d11428f60136,2 +np.float64,0x3fedbf532dfb7ea6,0x3fe75f8436dd1d58,2 +np.float64,0x8002fadd3f85f5bb,0x8002fadd3f85f5bb,2 +np.float64,0xbfefebaa8d3fd755,0xbfe8566c6aa90fba,2 +np.float64,0xffc7dd2b712fba58,0xbff0000000000000,2 +np.float64,0x7fe5d3a6e8aba74d,0x3ff0000000000000,2 +np.float64,0x2da061525b40d,0x2da061525b40d,2 +np.float64,0x7fcb9b9953373732,0x3ff0000000000000,2 +np.float64,0x2ca2f6fc59460,0x2ca2f6fc59460,2 +np.float64,0xffeb84b05af70960,0xbff0000000000000,2 +np.float64,0xffe551e86c6aa3d0,0xbff0000000000000,2 +np.float64,0xbfdb311311366226,0xbfd9aa6688faafb9,2 +np.float64,0xbfd4f3875629e70e,0xbfd43bcd73534c66,2 +np.float64,0x7fe95666f932accd,0x3ff0000000000000,2 +np.float64,0x3fc73dfb482e7bf7,0x3fc6fd70c20ebf60,2 +np.float64,0x800cd9e40939b3c8,0x800cd9e40939b3c8,2 +np.float64,0x3fb0c9fa422193f0,0x3fb0c3d38879a2ac,2 +np.float64,0xffd59a38372b3470,0xbff0000000000000,2 +np.float64,0x3fa8320ef4306420,0x3fa82d739e937d35,2 +np.float64,0x3fd517f16caa2fe4,0x3fd45c8de1e93b37,2 +np.float64,0xaed921655db24,0xaed921655db24,2 +np.float64,0x93478fb9268f2,0x93478fb9268f2,2 +np.float64,0x1615e28a2c2bd,0x1615e28a2c2bd,2 +np.float64,0xbfead23010f5a460,0xbfe5ea24d5d8f820,2 +np.float64,0x774a6070ee94d,0x774a6070ee94d,2 +np.float64,0x3fdf5874bd3eb0e9,0x3fdd0ef121dd915c,2 +np.float64,0x8004b25f53a964bf,0x8004b25f53a964bf,2 +np.float64,0xbfddacdd2ebb59ba,0xbfdbb78198fab36b,2 +np.float64,0x8008a3acf271475a,0x8008a3acf271475a,2 +np.float64,0xbfdb537c8736a6fa,0xbfd9c741038bb8f0,2 +np.float64,0xbfe56a133f6ad426,0xbfe2b3d5b8d259a1,2 +np.float64,0xffda1db531343b6a,0xbff0000000000000,2 +np.float64,0x3fcbe05f3a37c0be,0x3fcb71a54a64ddfb,2 +np.float64,0x7fe1ccaa7da39954,0x3ff0000000000000,2 +np.float64,0x3faeadd8343d5bb0,0x3faea475608860e6,2 +np.float64,0x3fe662ba1c2cc574,0x3fe354a6176e90df,2 +np.float64,0xffe4d49f4e69a93e,0xbff0000000000000,2 +np.float64,0xbfeadbc424f5b788,0xbfe5ef39dbe66343,2 +np.float64,0x99cf66f1339ed,0x99cf66f1339ed,2 +np.float64,0x33af77a2675f0,0x33af77a2675f0,2 +np.float64,0x7fec7b32ecf8f665,0x3ff0000000000000,2 +np.float64,0xffef3e44993e7c88,0xbff0000000000000,2 +np.float64,0xffe8f8ceac31f19c,0xbff0000000000000,2 +np.float64,0x7fe0d15b6da1a2b6,0x3ff0000000000000,2 +np.float64,0x4ba795c2974f3,0x4ba795c2974f3,2 +np.float64,0x3fe361aa37a6c354,0x3fe15079021d6b15,2 +np.float64,0xffe709714f6e12e2,0xbff0000000000000,2 +np.float64,0xffe7ea6a872fd4d4,0xbff0000000000000,2 +np.float64,0xffdb9441c8b72884,0xbff0000000000000,2 +np.float64,0xffd5e11ae9abc236,0xbff0000000000000,2 +np.float64,0xffe092a08b612540,0xbff0000000000000,2 +np.float64,0x3fe1f27e1ca3e4fc,0x3fe04685b5131207,2 +np.float64,0xbfe71ce1bdee39c4,0xbfe3c940809a7081,2 +np.float64,0xffe8c3aa68318754,0xbff0000000000000,2 +np.float64,0x800d4e2919da9c52,0x800d4e2919da9c52,2 +np.float64,0x7fe6c8bca76d9178,0x3ff0000000000000,2 +np.float64,0x7fced8751e3db0e9,0x3ff0000000000000,2 +np.float64,0xd61d0c8bac3a2,0xd61d0c8bac3a2,2 +np.float64,0x3fec57732938aee6,0x3fe6b22f15f38352,2 +np.float64,0xff9251cc7024a3a0,0xbff0000000000000,2 +np.float64,0xf4a68cb9e94d2,0xf4a68cb9e94d2,2 +np.float64,0x3feed76703bdaece,0x3fe7def0fc9a080c,2 +np.float64,0xbfe8971ff7712e40,0xbfe4ac3eb8ebff07,2 +np.float64,0x3fe4825f682904bf,0x3fe218c1952fe67d,2 +np.float64,0xbfd60f7698ac1eee,0xbfd539f0979b4b0c,2 +np.float64,0x3fcf0845993e1088,0x3fce7032f7180144,2 +np.float64,0x7fc83443f3306887,0x3ff0000000000000,2 +np.float64,0x3fe93123ae726247,0x3fe504e4fc437e89,2 +np.float64,0x3fbf9eb8363f3d70,0x3fbf75cdfa6828d5,2 +np.float64,0xbf8b45e5d0368bc0,0xbf8b457c29dfe1a9,2 +np.float64,0x8006c2853d0d850b,0x8006c2853d0d850b,2 +np.float64,0xffef26e25ffe4dc4,0xbff0000000000000,2 +np.float64,0x7fefffffffffffff,0x3ff0000000000000,2 +np.float64,0xbfde98f2c2bd31e6,0xbfdc761bfab1c4cb,2 +np.float64,0xffb725e6222e4bd0,0xbff0000000000000,2 +np.float64,0x800c63ead5d8c7d6,0x800c63ead5d8c7d6,2 +np.float64,0x3fea087e95f410fd,0x3fe57d3ab440706c,2 +np.float64,0xbfdf9f8a603f3f14,0xbfdd4742d77dfa57,2 +np.float64,0xfff0000000000000,0xbff0000000000000,2 +np.float64,0xbfcdc0841d3b8108,0xbfcd3a401debba9a,2 +np.float64,0x800f0c8f4f7e191f,0x800f0c8f4f7e191f,2 +np.float64,0x800ba6e75fd74dcf,0x800ba6e75fd74dcf,2 +np.float64,0x7fee4927e8bc924f,0x3ff0000000000000,2 +np.float64,0x3fadf141903be283,0x3fade8878d9d3551,2 +np.float64,0x3efb1a267df64,0x3efb1a267df64,2 +np.float64,0xffebf55f22b7eabe,0xbff0000000000000,2 +np.float64,0x7fbe8045663d008a,0x3ff0000000000000,2 +np.float64,0x3fefc0129f7f8026,0x3fe843f8b7d6cf38,2 +np.float64,0xbfe846b420f08d68,0xbfe47d1709e43937,2 +np.float64,0x7fe8e87043f1d0e0,0x3ff0000000000000,2 +np.float64,0x3fcfb718453f6e31,0x3fcf14ecee7b32b4,2 +np.float64,0x7fe4306b71a860d6,0x3ff0000000000000,2 +np.float64,0x7fee08459f7c108a,0x3ff0000000000000,2 +np.float64,0x3fed705165fae0a3,0x3fe73a66369c5700,2 +np.float64,0x7fd0e63f4da1cc7e,0x3ff0000000000000,2 +np.float64,0xffd1a40c2ea34818,0xbff0000000000000,2 +np.float64,0xbfa369795c26d2f0,0xbfa36718218d46b3,2 +np.float64,0xef70b9f5dee17,0xef70b9f5dee17,2 +np.float64,0x3fb50a0a6e2a1410,0x3fb4fdf27724560a,2 +np.float64,0x7fe30a0f6166141e,0x3ff0000000000000,2 +np.float64,0xbfd7b3ca7daf6794,0xbfd6accb81032b2d,2 +np.float64,0x3fc21dceb3243b9d,0x3fc1ff15d5d277a3,2 +np.float64,0x3fe483e445a907c9,0x3fe219ca0e269552,2 +np.float64,0x3fb2b1e2a22563c0,0x3fb2a96554900eaf,2 +np.float64,0x4b1ff6409641,0x4b1ff6409641,2 +np.float64,0xbfd92eabc9b25d58,0xbfd7f55d7776d64e,2 +np.float64,0x8003b8604c8770c1,0x8003b8604c8770c1,2 +np.float64,0x800d20a9df1a4154,0x800d20a9df1a4154,2 +np.float64,0xecf8a535d9f15,0xecf8a535d9f15,2 +np.float64,0x3fe92d15bab25a2b,0x3fe50296aa15ae85,2 +np.float64,0x800239c205a47385,0x800239c205a47385,2 +np.float64,0x3fc48664a9290cc8,0x3fc459d126320ef6,2 +np.float64,0x3fe7620625eec40c,0x3fe3f3bcbee3e8c6,2 +np.float64,0x3fd242ff4ca48600,0x3fd1c81ed7a971c8,2 +np.float64,0xbfe39bafcfa73760,0xbfe17959c7a279db,2 +np.float64,0x7fdcd2567239a4ac,0x3ff0000000000000,2 +np.float64,0x3fe5f2f292ebe5e6,0x3fe30d12f05e2752,2 +np.float64,0x7fda3819d1347033,0x3ff0000000000000,2 +np.float64,0xffca5b4d4334b69c,0xbff0000000000000,2 +np.float64,0xb8a2b7cd71457,0xb8a2b7cd71457,2 +np.float64,0x3fee689603fcd12c,0x3fe7ad4ace26d6dd,2 +np.float64,0x7fe26541a564ca82,0x3ff0000000000000,2 +np.float64,0x3fe6912ee66d225e,0x3fe3720d242c4d82,2 +np.float64,0xffe6580c75ecb018,0xbff0000000000000,2 +np.float64,0x7fe01a3370603466,0x3ff0000000000000,2 +np.float64,0xffe84e3f84b09c7e,0xbff0000000000000,2 +np.float64,0x3ff0000000000000,0x3fe85efab514f394,2 +np.float64,0x3fe214d4266429a8,0x3fe05fec03a3c247,2 +np.float64,0x3fd00aec5da015d8,0x3fcf6e070ad4ad62,2 +np.float64,0x800aac8631f5590d,0x800aac8631f5590d,2 +np.float64,0xbfe7c4f5f76f89ec,0xbfe42fc1c57b4a13,2 +np.float64,0xaf146c7d5e28e,0xaf146c7d5e28e,2 +np.float64,0xbfe57188b66ae312,0xbfe2b8be4615ef75,2 +np.float64,0xffef8cb8e1ff1971,0xbff0000000000000,2 +np.float64,0x8001daf8aa63b5f2,0x8001daf8aa63b5f2,2 +np.float64,0x3fdddcc339bbb986,0x3fdbde5f3783538b,2 +np.float64,0xdd8c92c3bb193,0xdd8c92c3bb193,2 +np.float64,0xbfe861a148f0c342,0xbfe48cf1d228a336,2 +np.float64,0xffe260a32e24c146,0xbff0000000000000,2 +np.float64,0x1f7474b43ee8f,0x1f7474b43ee8f,2 +np.float64,0x3fe81dbd89703b7c,0x3fe464d78df92b7b,2 +np.float64,0x7fed0101177a0201,0x3ff0000000000000,2 +np.float64,0x7fd8b419a8316832,0x3ff0000000000000,2 +np.float64,0x3fe93debccf27bd8,0x3fe50c27727917f0,2 +np.float64,0xe5ead05bcbd5a,0xe5ead05bcbd5a,2 +np.float64,0xbfebbbc4cff7778a,0xbfe663c4ca003bbf,2 +np.float64,0xbfea343eb474687e,0xbfe59529f73ea151,2 +np.float64,0x3fbe74a5963ce94b,0x3fbe50123ed05d8d,2 +np.float64,0x3fd31d3a5d263a75,0x3fd290c026cb38a5,2 +np.float64,0xbfd79908acaf3212,0xbfd695620e31c3c6,2 +np.float64,0xbfc26a350324d46c,0xbfc249f335f3e465,2 +np.float64,0xbfac38d5583871b0,0xbfac31866d12a45e,2 +np.float64,0x3fe40cea672819d5,0x3fe1c83754e72c92,2 +np.float64,0xbfa74770642e8ee0,0xbfa74355fcf67332,2 +np.float64,0x7fc60942d32c1285,0x3ff0000000000000,2 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/examples/cython/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/examples/cython/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..f13d52c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/examples/cython/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/examples/cython/checks.pyx b/.local/lib/python3.8/site-packages/numpy/core/tests/examples/cython/checks.pyx new file mode 100644 index 0000000..151979d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/examples/cython/checks.pyx @@ -0,0 +1,30 @@ +""" +Functions in this module give python-space wrappers for cython functions +exposed in numpy/__init__.pxd, so they can be tested in test_cython.py +""" +cimport numpy as cnp +cnp.import_array() + + +def is_td64(obj): + return cnp.is_timedelta64_object(obj) + + +def is_dt64(obj): + return cnp.is_datetime64_object(obj) + + +def get_dt64_value(obj): + return cnp.get_datetime64_value(obj) + + +def get_td64_value(obj): + return cnp.get_timedelta64_value(obj) + + +def get_dt64_unit(obj): + return cnp.get_datetime64_unit(obj) + + +def is_integer(obj): + return isinstance(obj, (cnp.integer, int)) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/examples/cython/setup.py b/.local/lib/python3.8/site-packages/numpy/core/tests/examples/cython/setup.py new file mode 100644 index 0000000..6e34aa7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/examples/cython/setup.py @@ -0,0 +1,25 @@ +""" +Provide python-space access to the functions exposed in numpy/__init__.pxd +for testing. +""" + +import numpy as np +from distutils.core import setup +from Cython.Build import cythonize +from setuptools.extension import Extension +import os + +macros = [("NPY_NO_DEPRECATED_API", 0)] + +checks = Extension( + "checks", + sources=[os.path.join('.', "checks.pyx")], + include_dirs=[np.get_include()], + define_macros=macros, +) + +extensions = [checks] + +setup( + ext_modules=cythonize(extensions) +) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/examples/limited_api/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/core/tests/examples/limited_api/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..f526439 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/core/tests/examples/limited_api/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/examples/limited_api/limited_api.c b/.local/lib/python3.8/site-packages/numpy/core/tests/examples/limited_api/limited_api.c new file mode 100644 index 0000000..698c54c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/examples/limited_api/limited_api.c @@ -0,0 +1,17 @@ +#define Py_LIMITED_API 0x03060000 + +#include +#include +#include + +static PyModuleDef moduledef = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "limited_api" +}; + +PyMODINIT_FUNC PyInit_limited_api(void) +{ + import_array(); + import_umath(); + return PyModule_Create(&moduledef); +} diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/examples/limited_api/setup.py b/.local/lib/python3.8/site-packages/numpy/core/tests/examples/limited_api/setup.py new file mode 100644 index 0000000..18747dc --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/examples/limited_api/setup.py @@ -0,0 +1,22 @@ +""" +Build an example package using the limited Python C API. +""" + +import numpy as np +from setuptools import setup, Extension +import os + +macros = [("NPY_NO_DEPRECATED_API", 0), ("Py_LIMITED_API", "0x03060000")] + +limited_api = Extension( + "limited_api", + sources=[os.path.join('.', "limited_api.c")], + include_dirs=[np.get_include()], + define_macros=macros, +) + +extensions = [limited_api] + +setup( + ext_modules=extensions +) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test__exceptions.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test__exceptions.py new file mode 100644 index 0000000..10b87e0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test__exceptions.py @@ -0,0 +1,88 @@ +""" +Tests of the ._exceptions module. Primarily for exercising the __str__ methods. +""" + +import pickle + +import pytest +import numpy as np + +_ArrayMemoryError = np.core._exceptions._ArrayMemoryError +_UFuncNoLoopError = np.core._exceptions._UFuncNoLoopError + +class TestArrayMemoryError: + def test_pickling(self): + """ Test that _ArrayMemoryError can be pickled """ + error = _ArrayMemoryError((1023,), np.dtype(np.uint8)) + res = pickle.loads(pickle.dumps(error)) + assert res._total_size == error._total_size + + def test_str(self): + e = _ArrayMemoryError((1023,), np.dtype(np.uint8)) + str(e) # not crashing is enough + + # testing these properties is easier than testing the full string repr + def test__size_to_string(self): + """ Test e._size_to_string """ + f = _ArrayMemoryError._size_to_string + Ki = 1024 + assert f(0) == '0 bytes' + assert f(1) == '1 bytes' + assert f(1023) == '1023 bytes' + assert f(Ki) == '1.00 KiB' + assert f(Ki+1) == '1.00 KiB' + assert f(10*Ki) == '10.0 KiB' + assert f(int(999.4*Ki)) == '999. KiB' + assert f(int(1023.4*Ki)) == '1023. KiB' + assert f(int(1023.5*Ki)) == '1.00 MiB' + assert f(Ki*Ki) == '1.00 MiB' + + # 1023.9999 Mib should round to 1 GiB + assert f(int(Ki*Ki*Ki*0.9999)) == '1.00 GiB' + assert f(Ki*Ki*Ki*Ki*Ki*Ki) == '1.00 EiB' + # larger than sys.maxsize, adding larger prefixes isn't going to help + # anyway. + assert f(Ki*Ki*Ki*Ki*Ki*Ki*123456) == '123456. EiB' + + def test__total_size(self): + """ Test e._total_size """ + e = _ArrayMemoryError((1,), np.dtype(np.uint8)) + assert e._total_size == 1 + + e = _ArrayMemoryError((2, 4), np.dtype((np.uint64, 16))) + assert e._total_size == 1024 + + +class TestUFuncNoLoopError: + def test_pickling(self): + """ Test that _UFuncNoLoopError can be pickled """ + assert isinstance(pickle.dumps(_UFuncNoLoopError), bytes) + + +@pytest.mark.parametrize("args", [ + (2, 1, None), + (2, 1, "test_prefix"), + ("test message",), +]) +class TestAxisError: + def test_attr(self, args): + """Validate attribute types.""" + exc = np.AxisError(*args) + if len(args) == 1: + assert exc.axis is None + assert exc.ndim is None + else: + axis, ndim, *_ = args + assert exc.axis == axis + assert exc.ndim == ndim + + def test_pickling(self, args): + """Test that `AxisError` can be pickled.""" + exc = np.AxisError(*args) + exc2 = pickle.loads(pickle.dumps(exc)) + + assert type(exc) is type(exc2) + for name in ("axis", "ndim", "args"): + attr1 = getattr(exc, name) + attr2 = getattr(exc2, name) + assert attr1 == attr2, name diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_abc.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_abc.py new file mode 100644 index 0000000..30e5748 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_abc.py @@ -0,0 +1,54 @@ +from numpy.testing import assert_ + +import numbers + +import numpy as np +from numpy.core.numerictypes import sctypes + +class TestABC: + def test_abstract(self): + assert_(issubclass(np.number, numbers.Number)) + + assert_(issubclass(np.inexact, numbers.Complex)) + assert_(issubclass(np.complexfloating, numbers.Complex)) + assert_(issubclass(np.floating, numbers.Real)) + + assert_(issubclass(np.integer, numbers.Integral)) + assert_(issubclass(np.signedinteger, numbers.Integral)) + assert_(issubclass(np.unsignedinteger, numbers.Integral)) + + def test_floats(self): + for t in sctypes['float']: + assert_(isinstance(t(), numbers.Real), + "{0} is not instance of Real".format(t.__name__)) + assert_(issubclass(t, numbers.Real), + "{0} is not subclass of Real".format(t.__name__)) + assert_(not isinstance(t(), numbers.Rational), + "{0} is instance of Rational".format(t.__name__)) + assert_(not issubclass(t, numbers.Rational), + "{0} is subclass of Rational".format(t.__name__)) + + def test_complex(self): + for t in sctypes['complex']: + assert_(isinstance(t(), numbers.Complex), + "{0} is not instance of Complex".format(t.__name__)) + assert_(issubclass(t, numbers.Complex), + "{0} is not subclass of Complex".format(t.__name__)) + assert_(not isinstance(t(), numbers.Real), + "{0} is instance of Real".format(t.__name__)) + assert_(not issubclass(t, numbers.Real), + "{0} is subclass of Real".format(t.__name__)) + + def test_int(self): + for t in sctypes['int']: + assert_(isinstance(t(), numbers.Integral), + "{0} is not instance of Integral".format(t.__name__)) + assert_(issubclass(t, numbers.Integral), + "{0} is not subclass of Integral".format(t.__name__)) + + def test_uint(self): + for t in sctypes['uint']: + assert_(isinstance(t(), numbers.Integral), + "{0} is not instance of Integral".format(t.__name__)) + assert_(issubclass(t, numbers.Integral), + "{0} is not subclass of Integral".format(t.__name__)) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_api.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_api.py new file mode 100644 index 0000000..d3c7211 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_api.py @@ -0,0 +1,628 @@ +import sys + +import numpy as np +from numpy.core._rational_tests import rational +import pytest +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_raises, assert_warns, + HAS_REFCOUNT + ) + +# Switch between new behaviour when NPY_RELAXED_STRIDES_CHECKING is set. +NPY_RELAXED_STRIDES_CHECKING = np.ones((10, 1), order='C').flags.f_contiguous + + +def test_array_array(): + tobj = type(object) + ones11 = np.ones((1, 1), np.float64) + tndarray = type(ones11) + # Test is_ndarray + assert_equal(np.array(ones11, dtype=np.float64), ones11) + if HAS_REFCOUNT: + old_refcount = sys.getrefcount(tndarray) + np.array(ones11) + assert_equal(old_refcount, sys.getrefcount(tndarray)) + + # test None + assert_equal(np.array(None, dtype=np.float64), + np.array(np.nan, dtype=np.float64)) + if HAS_REFCOUNT: + old_refcount = sys.getrefcount(tobj) + np.array(None, dtype=np.float64) + assert_equal(old_refcount, sys.getrefcount(tobj)) + + # test scalar + assert_equal(np.array(1.0, dtype=np.float64), + np.ones((), dtype=np.float64)) + if HAS_REFCOUNT: + old_refcount = sys.getrefcount(np.float64) + np.array(np.array(1.0, dtype=np.float64), dtype=np.float64) + assert_equal(old_refcount, sys.getrefcount(np.float64)) + + # test string + S2 = np.dtype((bytes, 2)) + S3 = np.dtype((bytes, 3)) + S5 = np.dtype((bytes, 5)) + assert_equal(np.array(b"1.0", dtype=np.float64), + np.ones((), dtype=np.float64)) + assert_equal(np.array(b"1.0").dtype, S3) + assert_equal(np.array(b"1.0", dtype=bytes).dtype, S3) + assert_equal(np.array(b"1.0", dtype=S2), np.array(b"1.")) + assert_equal(np.array(b"1", dtype=S5), np.ones((), dtype=S5)) + + # test string + U2 = np.dtype((str, 2)) + U3 = np.dtype((str, 3)) + U5 = np.dtype((str, 5)) + assert_equal(np.array("1.0", dtype=np.float64), + np.ones((), dtype=np.float64)) + assert_equal(np.array("1.0").dtype, U3) + assert_equal(np.array("1.0", dtype=str).dtype, U3) + assert_equal(np.array("1.0", dtype=U2), np.array(str("1."))) + assert_equal(np.array("1", dtype=U5), np.ones((), dtype=U5)) + + builtins = getattr(__builtins__, '__dict__', __builtins__) + assert_(hasattr(builtins, 'get')) + + # test memoryview + dat = np.array(memoryview(b'1.0'), dtype=np.float64) + assert_equal(dat, [49.0, 46.0, 48.0]) + assert_(dat.dtype.type is np.float64) + + dat = np.array(memoryview(b'1.0')) + assert_equal(dat, [49, 46, 48]) + assert_(dat.dtype.type is np.uint8) + + # test array interface + a = np.array(100.0, dtype=np.float64) + o = type("o", (object,), + dict(__array_interface__=a.__array_interface__)) + assert_equal(np.array(o, dtype=np.float64), a) + + # test array_struct interface + a = np.array([(1, 4.0, 'Hello'), (2, 6.0, 'World')], + dtype=[('f0', int), ('f1', float), ('f2', str)]) + o = type("o", (object,), + dict(__array_struct__=a.__array_struct__)) + ## wasn't what I expected... is np.array(o) supposed to equal a ? + ## instead we get a array([...], dtype=">V18") + assert_equal(bytes(np.array(o).data), bytes(a.data)) + + # test array + o = type("o", (object,), + dict(__array__=lambda *x: np.array(100.0, dtype=np.float64)))() + assert_equal(np.array(o, dtype=np.float64), np.array(100.0, np.float64)) + + # test recursion + nested = 1.5 + for i in range(np.MAXDIMS): + nested = [nested] + + # no error + np.array(nested) + + # Exceeds recursion limit + assert_raises(ValueError, np.array, [nested], dtype=np.float64) + + # Try with lists... + assert_equal(np.array([None] * 10, dtype=np.float64), + np.full((10,), np.nan, dtype=np.float64)) + assert_equal(np.array([[None]] * 10, dtype=np.float64), + np.full((10, 1), np.nan, dtype=np.float64)) + assert_equal(np.array([[None] * 10], dtype=np.float64), + np.full((1, 10), np.nan, dtype=np.float64)) + assert_equal(np.array([[None] * 10] * 10, dtype=np.float64), + np.full((10, 10), np.nan, dtype=np.float64)) + + assert_equal(np.array([1.0] * 10, dtype=np.float64), + np.ones((10,), dtype=np.float64)) + assert_equal(np.array([[1.0]] * 10, dtype=np.float64), + np.ones((10, 1), dtype=np.float64)) + assert_equal(np.array([[1.0] * 10], dtype=np.float64), + np.ones((1, 10), dtype=np.float64)) + assert_equal(np.array([[1.0] * 10] * 10, dtype=np.float64), + np.ones((10, 10), dtype=np.float64)) + + # Try with tuples + assert_equal(np.array((None,) * 10, dtype=np.float64), + np.full((10,), np.nan, dtype=np.float64)) + assert_equal(np.array([(None,)] * 10, dtype=np.float64), + np.full((10, 1), np.nan, dtype=np.float64)) + assert_equal(np.array([(None,) * 10], dtype=np.float64), + np.full((1, 10), np.nan, dtype=np.float64)) + assert_equal(np.array([(None,) * 10] * 10, dtype=np.float64), + np.full((10, 10), np.nan, dtype=np.float64)) + + assert_equal(np.array((1.0,) * 10, dtype=np.float64), + np.ones((10,), dtype=np.float64)) + assert_equal(np.array([(1.0,)] * 10, dtype=np.float64), + np.ones((10, 1), dtype=np.float64)) + assert_equal(np.array([(1.0,) * 10], dtype=np.float64), + np.ones((1, 10), dtype=np.float64)) + assert_equal(np.array([(1.0,) * 10] * 10, dtype=np.float64), + np.ones((10, 10), dtype=np.float64)) + +@pytest.mark.parametrize("array", [True, False]) +def test_array_impossible_casts(array): + # All builtin types can forst cast as least theoretically + # but user dtypes cannot necessarily. + rt = rational(1, 2) + if array: + rt = np.array(rt) + with assert_raises(TypeError): + np.array(rt, dtype="M8") + + +def test_fastCopyAndTranspose(): + # 0D array + a = np.array(2) + b = np.fastCopyAndTranspose(a) + assert_equal(b, a.T) + assert_(b.flags.owndata) + + # 1D array + a = np.array([3, 2, 7, 0]) + b = np.fastCopyAndTranspose(a) + assert_equal(b, a.T) + assert_(b.flags.owndata) + + # 2D array + a = np.arange(6).reshape(2, 3) + b = np.fastCopyAndTranspose(a) + assert_equal(b, a.T) + assert_(b.flags.owndata) + +def test_array_astype(): + a = np.arange(6, dtype='f4').reshape(2, 3) + # Default behavior: allows unsafe casts, keeps memory layout, + # always copies. + b = a.astype('i4') + assert_equal(a, b) + assert_equal(b.dtype, np.dtype('i4')) + assert_equal(a.strides, b.strides) + b = a.T.astype('i4') + assert_equal(a.T, b) + assert_equal(b.dtype, np.dtype('i4')) + assert_equal(a.T.strides, b.strides) + b = a.astype('f4') + assert_equal(a, b) + assert_(not (a is b)) + + # copy=False parameter can sometimes skip a copy + b = a.astype('f4', copy=False) + assert_(a is b) + + # order parameter allows overriding of the memory layout, + # forcing a copy if the layout is wrong + b = a.astype('f4', order='F', copy=False) + assert_equal(a, b) + assert_(not (a is b)) + assert_(b.flags.f_contiguous) + + b = a.astype('f4', order='C', copy=False) + assert_equal(a, b) + assert_(a is b) + assert_(b.flags.c_contiguous) + + # casting parameter allows catching bad casts + b = a.astype('c8', casting='safe') + assert_equal(a, b) + assert_equal(b.dtype, np.dtype('c8')) + + assert_raises(TypeError, a.astype, 'i4', casting='safe') + + # subok=False passes through a non-subclassed array + b = a.astype('f4', subok=0, copy=False) + assert_(a is b) + + class MyNDArray(np.ndarray): + pass + + a = np.array([[0, 1, 2], [3, 4, 5]], dtype='f4').view(MyNDArray) + + # subok=True passes through a subclass + b = a.astype('f4', subok=True, copy=False) + assert_(a is b) + + # subok=True is default, and creates a subtype on a cast + b = a.astype('i4', copy=False) + assert_equal(a, b) + assert_equal(type(b), MyNDArray) + + # subok=False never returns a subclass + b = a.astype('f4', subok=False, copy=False) + assert_equal(a, b) + assert_(not (a is b)) + assert_(type(b) is not MyNDArray) + + # Make sure converting from string object to fixed length string + # does not truncate. + a = np.array([b'a'*100], dtype='O') + b = a.astype('S') + assert_equal(a, b) + assert_equal(b.dtype, np.dtype('S100')) + a = np.array([u'a'*100], dtype='O') + b = a.astype('U') + assert_equal(a, b) + assert_equal(b.dtype, np.dtype('U100')) + + # Same test as above but for strings shorter than 64 characters + a = np.array([b'a'*10], dtype='O') + b = a.astype('S') + assert_equal(a, b) + assert_equal(b.dtype, np.dtype('S10')) + a = np.array([u'a'*10], dtype='O') + b = a.astype('U') + assert_equal(a, b) + assert_equal(b.dtype, np.dtype('U10')) + + a = np.array(123456789012345678901234567890, dtype='O').astype('S') + assert_array_equal(a, np.array(b'1234567890' * 3, dtype='S30')) + a = np.array(123456789012345678901234567890, dtype='O').astype('U') + assert_array_equal(a, np.array(u'1234567890' * 3, dtype='U30')) + + a = np.array([123456789012345678901234567890], dtype='O').astype('S') + assert_array_equal(a, np.array(b'1234567890' * 3, dtype='S30')) + a = np.array([123456789012345678901234567890], dtype='O').astype('U') + assert_array_equal(a, np.array(u'1234567890' * 3, dtype='U30')) + + a = np.array(123456789012345678901234567890, dtype='S') + assert_array_equal(a, np.array(b'1234567890' * 3, dtype='S30')) + a = np.array(123456789012345678901234567890, dtype='U') + assert_array_equal(a, np.array(u'1234567890' * 3, dtype='U30')) + + a = np.array(u'a\u0140', dtype='U') + b = np.ndarray(buffer=a, dtype='uint32', shape=2) + assert_(b.size == 2) + + a = np.array([1000], dtype='i4') + assert_raises(TypeError, a.astype, 'S1', casting='safe') + + a = np.array(1000, dtype='i4') + assert_raises(TypeError, a.astype, 'U1', casting='safe') + +@pytest.mark.parametrize("dt", ["S", "U"]) +def test_array_astype_to_string_discovery_empty(dt): + # See also gh-19085 + arr = np.array([""], dtype=object) + # Note, the itemsize is the `0 -> 1` logic, which should change. + # The important part the test is rather that it does not error. + assert arr.astype(dt).dtype.itemsize == np.dtype(f"{dt}1").itemsize + + # check the same thing for `np.can_cast` (since it accepts arrays) + assert np.can_cast(arr, dt, casting="unsafe") + assert not np.can_cast(arr, dt, casting="same_kind") + # as well as for the object as a descriptor: + assert np.can_cast("O", dt, casting="unsafe") + +@pytest.mark.parametrize("dt", ["d", "f", "S13", "U32"]) +def test_array_astype_to_void(dt): + dt = np.dtype(dt) + arr = np.array([], dtype=dt) + assert arr.astype("V").dtype.itemsize == dt.itemsize + +def test_object_array_astype_to_void(): + # This is different to `test_array_astype_to_void` as object arrays + # are inspected. The default void is "V8" (8 is the length of double) + arr = np.array([], dtype="O").astype("V") + assert arr.dtype == "V8" + +@pytest.mark.parametrize("t", + np.sctypes['uint'] + np.sctypes['int'] + np.sctypes['float'] +) +def test_array_astype_warning(t): + # test ComplexWarning when casting from complex to float or int + a = np.array(10, dtype=np.complex_) + assert_warns(np.ComplexWarning, a.astype, t) + +@pytest.mark.parametrize(["dtype", "out_dtype"], + [(np.bytes_, np.bool_), + (np.unicode_, np.bool_), + (np.dtype("S10,S9"), np.dtype("?,?"))]) +def test_string_to_boolean_cast(dtype, out_dtype): + """ + Currently, for `astype` strings are cast to booleans effectively by + calling `bool(int(string)`. This is not consistent (see gh-9875) and + will eventually be deprecated. + """ + arr = np.array(["10", "10\0\0\0", "0\0\0", "0"], dtype=dtype) + expected = np.array([True, True, False, False], dtype=out_dtype) + assert_array_equal(arr.astype(out_dtype), expected) + +@pytest.mark.parametrize(["dtype", "out_dtype"], + [(np.bytes_, np.bool_), + (np.unicode_, np.bool_), + (np.dtype("S10,S9"), np.dtype("?,?"))]) +def test_string_to_boolean_cast_errors(dtype, out_dtype): + """ + These currently error out, since cast to integers fails, but should not + error out in the future. + """ + for invalid in ["False", "True", "", "\0", "non-empty"]: + arr = np.array([invalid], dtype=dtype) + with assert_raises(ValueError): + arr.astype(out_dtype) + +@pytest.mark.parametrize("str_type", [str, bytes, np.str_, np.unicode_]) +@pytest.mark.parametrize("scalar_type", + [np.complex64, np.complex128, np.clongdouble]) +def test_string_to_complex_cast(str_type, scalar_type): + value = scalar_type(b"1+3j") + assert scalar_type(value) == 1+3j + assert np.array([value], dtype=object).astype(scalar_type)[()] == 1+3j + assert np.array(value).astype(scalar_type)[()] == 1+3j + arr = np.zeros(1, dtype=scalar_type) + arr[0] = value + assert arr[0] == 1+3j + +@pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) +def test_none_to_nan_cast(dtype): + # Note that at the time of writing this test, the scalar constructors + # reject None + arr = np.zeros(1, dtype=dtype) + arr[0] = None + assert np.isnan(arr)[0] + assert np.isnan(np.array(None, dtype=dtype))[()] + assert np.isnan(np.array([None], dtype=dtype))[0] + assert np.isnan(np.array(None).astype(dtype))[()] + +def test_copyto_fromscalar(): + a = np.arange(6, dtype='f4').reshape(2, 3) + + # Simple copy + np.copyto(a, 1.5) + assert_equal(a, 1.5) + np.copyto(a.T, 2.5) + assert_equal(a, 2.5) + + # Where-masked copy + mask = np.array([[0, 1, 0], [0, 0, 1]], dtype='?') + np.copyto(a, 3.5, where=mask) + assert_equal(a, [[2.5, 3.5, 2.5], [2.5, 2.5, 3.5]]) + mask = np.array([[0, 1], [1, 1], [1, 0]], dtype='?') + np.copyto(a.T, 4.5, where=mask) + assert_equal(a, [[2.5, 4.5, 4.5], [4.5, 4.5, 3.5]]) + +def test_copyto(): + a = np.arange(6, dtype='i4').reshape(2, 3) + + # Simple copy + np.copyto(a, [[3, 1, 5], [6, 2, 1]]) + assert_equal(a, [[3, 1, 5], [6, 2, 1]]) + + # Overlapping copy should work + np.copyto(a[:, :2], a[::-1, 1::-1]) + assert_equal(a, [[2, 6, 5], [1, 3, 1]]) + + # Defaults to 'same_kind' casting + assert_raises(TypeError, np.copyto, a, 1.5) + + # Force a copy with 'unsafe' casting, truncating 1.5 to 1 + np.copyto(a, 1.5, casting='unsafe') + assert_equal(a, 1) + + # Copying with a mask + np.copyto(a, 3, where=[True, False, True]) + assert_equal(a, [[3, 1, 3], [3, 1, 3]]) + + # Casting rule still applies with a mask + assert_raises(TypeError, np.copyto, a, 3.5, where=[True, False, True]) + + # Lists of integer 0's and 1's is ok too + np.copyto(a, 4.0, casting='unsafe', where=[[0, 1, 1], [1, 0, 0]]) + assert_equal(a, [[3, 4, 4], [4, 1, 3]]) + + # Overlapping copy with mask should work + np.copyto(a[:, :2], a[::-1, 1::-1], where=[[0, 1], [1, 1]]) + assert_equal(a, [[3, 4, 4], [4, 3, 3]]) + + # 'dst' must be an array + assert_raises(TypeError, np.copyto, [1, 2, 3], [2, 3, 4]) + +def test_copyto_permut(): + # test explicit overflow case + pad = 500 + l = [True] * pad + [True, True, True, True] + r = np.zeros(len(l)-pad) + d = np.ones(len(l)-pad) + mask = np.array(l)[pad:] + np.copyto(r, d, where=mask[::-1]) + + # test all permutation of possible masks, 9 should be sufficient for + # current 4 byte unrolled code + power = 9 + d = np.ones(power) + for i in range(2**power): + r = np.zeros(power) + l = [(i & x) != 0 for x in range(power)] + mask = np.array(l) + np.copyto(r, d, where=mask) + assert_array_equal(r == 1, l) + assert_equal(r.sum(), sum(l)) + + r = np.zeros(power) + np.copyto(r, d, where=mask[::-1]) + assert_array_equal(r == 1, l[::-1]) + assert_equal(r.sum(), sum(l)) + + r = np.zeros(power) + np.copyto(r[::2], d[::2], where=mask[::2]) + assert_array_equal(r[::2] == 1, l[::2]) + assert_equal(r[::2].sum(), sum(l[::2])) + + r = np.zeros(power) + np.copyto(r[::2], d[::2], where=mask[::-2]) + assert_array_equal(r[::2] == 1, l[::-2]) + assert_equal(r[::2].sum(), sum(l[::-2])) + + for c in [0xFF, 0x7F, 0x02, 0x10]: + r = np.zeros(power) + mask = np.array(l) + imask = np.array(l).view(np.uint8) + imask[mask != 0] = c + np.copyto(r, d, where=mask) + assert_array_equal(r == 1, l) + assert_equal(r.sum(), sum(l)) + + r = np.zeros(power) + np.copyto(r, d, where=True) + assert_equal(r.sum(), r.size) + r = np.ones(power) + d = np.zeros(power) + np.copyto(r, d, where=False) + assert_equal(r.sum(), r.size) + +def test_copy_order(): + a = np.arange(24).reshape(2, 1, 3, 4) + b = a.copy(order='F') + c = np.arange(24).reshape(2, 1, 4, 3).swapaxes(2, 3) + + def check_copy_result(x, y, ccontig, fcontig, strides=False): + assert_(not (x is y)) + assert_equal(x, y) + assert_equal(res.flags.c_contiguous, ccontig) + assert_equal(res.flags.f_contiguous, fcontig) + # This check is impossible only because + # NPY_RELAXED_STRIDES_CHECKING changes the strides actively + if not NPY_RELAXED_STRIDES_CHECKING: + if strides: + assert_equal(x.strides, y.strides) + else: + assert_(x.strides != y.strides) + + # Validate the initial state of a, b, and c + assert_(a.flags.c_contiguous) + assert_(not a.flags.f_contiguous) + assert_(not b.flags.c_contiguous) + assert_(b.flags.f_contiguous) + assert_(not c.flags.c_contiguous) + assert_(not c.flags.f_contiguous) + + # Copy with order='C' + res = a.copy(order='C') + check_copy_result(res, a, ccontig=True, fcontig=False, strides=True) + res = b.copy(order='C') + check_copy_result(res, b, ccontig=True, fcontig=False, strides=False) + res = c.copy(order='C') + check_copy_result(res, c, ccontig=True, fcontig=False, strides=False) + res = np.copy(a, order='C') + check_copy_result(res, a, ccontig=True, fcontig=False, strides=True) + res = np.copy(b, order='C') + check_copy_result(res, b, ccontig=True, fcontig=False, strides=False) + res = np.copy(c, order='C') + check_copy_result(res, c, ccontig=True, fcontig=False, strides=False) + + # Copy with order='F' + res = a.copy(order='F') + check_copy_result(res, a, ccontig=False, fcontig=True, strides=False) + res = b.copy(order='F') + check_copy_result(res, b, ccontig=False, fcontig=True, strides=True) + res = c.copy(order='F') + check_copy_result(res, c, ccontig=False, fcontig=True, strides=False) + res = np.copy(a, order='F') + check_copy_result(res, a, ccontig=False, fcontig=True, strides=False) + res = np.copy(b, order='F') + check_copy_result(res, b, ccontig=False, fcontig=True, strides=True) + res = np.copy(c, order='F') + check_copy_result(res, c, ccontig=False, fcontig=True, strides=False) + + # Copy with order='K' + res = a.copy(order='K') + check_copy_result(res, a, ccontig=True, fcontig=False, strides=True) + res = b.copy(order='K') + check_copy_result(res, b, ccontig=False, fcontig=True, strides=True) + res = c.copy(order='K') + check_copy_result(res, c, ccontig=False, fcontig=False, strides=True) + res = np.copy(a, order='K') + check_copy_result(res, a, ccontig=True, fcontig=False, strides=True) + res = np.copy(b, order='K') + check_copy_result(res, b, ccontig=False, fcontig=True, strides=True) + res = np.copy(c, order='K') + check_copy_result(res, c, ccontig=False, fcontig=False, strides=True) + +def test_contiguous_flags(): + a = np.ones((4, 4, 1))[::2,:,:] + if NPY_RELAXED_STRIDES_CHECKING: + a.strides = a.strides[:2] + (-123,) + b = np.ones((2, 2, 1, 2, 2)).swapaxes(3, 4) + + def check_contig(a, ccontig, fcontig): + assert_(a.flags.c_contiguous == ccontig) + assert_(a.flags.f_contiguous == fcontig) + + # Check if new arrays are correct: + check_contig(a, False, False) + check_contig(b, False, False) + if NPY_RELAXED_STRIDES_CHECKING: + check_contig(np.empty((2, 2, 0, 2, 2)), True, True) + check_contig(np.array([[[1], [2]]], order='F'), True, True) + else: + check_contig(np.empty((2, 2, 0, 2, 2)), True, False) + check_contig(np.array([[[1], [2]]], order='F'), False, True) + check_contig(np.empty((2, 2)), True, False) + check_contig(np.empty((2, 2), order='F'), False, True) + + # Check that np.array creates correct contiguous flags: + check_contig(np.array(a, copy=False), False, False) + check_contig(np.array(a, copy=False, order='C'), True, False) + check_contig(np.array(a, ndmin=4, copy=False, order='F'), False, True) + + if NPY_RELAXED_STRIDES_CHECKING: + # Check slicing update of flags and : + check_contig(a[0], True, True) + check_contig(a[None, ::4, ..., None], True, True) + check_contig(b[0, 0, ...], False, True) + check_contig(b[:,:, 0:0,:,:], True, True) + else: + # Check slicing update of flags: + check_contig(a[0], True, False) + # Would be nice if this was C-Contiguous: + check_contig(a[None, 0, ..., None], False, False) + check_contig(b[0, 0, 0, ...], False, True) + + # Test ravel and squeeze. + check_contig(a.ravel(), True, True) + check_contig(np.ones((1, 3, 1)).squeeze(), True, True) + +def test_broadcast_arrays(): + # Test user defined dtypes + a = np.array([(1, 2, 3)], dtype='u4,u4,u4') + b = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], dtype='u4,u4,u4') + result = np.broadcast_arrays(a, b) + assert_equal(result[0], np.array([(1, 2, 3), (1, 2, 3), (1, 2, 3)], dtype='u4,u4,u4')) + assert_equal(result[1], np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], dtype='u4,u4,u4')) + +@pytest.mark.parametrize(["shape", "fill_value", "expected_output"], + [((2, 2), [5.0, 6.0], np.array([[5.0, 6.0], [5.0, 6.0]])), + ((3, 2), [1.0, 2.0], np.array([[1.0, 2.0], [1.0, 2.0], [1.0, 2.0]]))]) +def test_full_from_list(shape, fill_value, expected_output): + output = np.full(shape, fill_value) + assert_equal(output, expected_output) + +def test_astype_copyflag(): + # test the various copyflag options + arr = np.arange(10, dtype=np.intp) + + res_true = arr.astype(np.intp, copy=True) + assert not np.may_share_memory(arr, res_true) + res_always = arr.astype(np.intp, copy=np._CopyMode.ALWAYS) + assert not np.may_share_memory(arr, res_always) + + res_false = arr.astype(np.intp, copy=False) + # `res_false is arr` currently, but check `may_share_memory`. + assert np.may_share_memory(arr, res_false) + res_if_needed = arr.astype(np.intp, copy=np._CopyMode.IF_NEEDED) + # `res_if_needed is arr` currently, but check `may_share_memory`. + assert np.may_share_memory(arr, res_if_needed) + + res_never = arr.astype(np.intp, copy=np._CopyMode.NEVER) + assert np.may_share_memory(arr, res_never) + + # Simple tests for when a copy is necessary: + res_false = arr.astype(np.float64, copy=False) + assert_array_equal(res_false, arr) + res_if_needed = arr.astype(np.float64, + copy=np._CopyMode.IF_NEEDED) + assert_array_equal(res_if_needed, arr) + assert_raises(ValueError, arr.astype, np.float64, + copy=np._CopyMode.NEVER) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_argparse.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_argparse.py new file mode 100644 index 0000000..63a01de --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_argparse.py @@ -0,0 +1,62 @@ +""" +Tests for the private NumPy argument parsing functionality. +They mainly exists to ensure good test coverage without having to try the +weirder cases on actual numpy functions but test them in one place. + +The test function is defined in C to be equivalent to (errors may not always +match exactly, and could be adjusted): + + def func(arg1, /, arg2, *, arg3): + i = integer(arg1) # reproducing the 'i' parsing in Python. + return None +""" + +import pytest + +import numpy as np +from numpy.core._multiarray_tests import argparse_example_function as func + + +def test_invalid_integers(): + with pytest.raises(TypeError, + match="integer argument expected, got float"): + func(1.) + with pytest.raises(OverflowError): + func(2**100) + + +def test_missing_arguments(): + with pytest.raises(TypeError, + match="missing required positional argument 0"): + func() + with pytest.raises(TypeError, + match="missing required positional argument 0"): + func(arg2=1, arg3=4) + with pytest.raises(TypeError, + match=r"missing required argument \'arg2\' \(pos 1\)"): + func(1, arg3=5) + + +def test_too_many_positional(): + # the second argument is positional but can be passed as keyword. + with pytest.raises(TypeError, + match="takes from 2 to 3 positional arguments but 4 were given"): + func(1, 2, 3, 4) + + +def test_multiple_values(): + with pytest.raises(TypeError, + match=r"given by name \('arg2'\) and position \(position 1\)"): + func(1, 2, arg2=3) + + +def test_string_fallbacks(): + # We can (currently?) use numpy strings to test the "slow" fallbacks + # that should normally not be taken due to string interning. + arg2 = np.unicode_("arg2") + missing_arg = np.unicode_("missing_arg") + func(1, **{arg2: 3}) + with pytest.raises(TypeError, + match="got an unexpected keyword argument 'missing_arg'"): + func(2, **{missing_arg: 3}) + diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_array_coercion.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_array_coercion.py new file mode 100644 index 0000000..293f5a6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_array_coercion.py @@ -0,0 +1,748 @@ +""" +Tests for array coercion, mainly through testing `np.array` results directly. +Note that other such tests exist e.g. in `test_api.py` and many corner-cases +are tested (sometimes indirectly) elsewhere. +""" + +import pytest +from pytest import param + +from itertools import product + +import numpy as np +from numpy.core._rational_tests import rational +from numpy.core._multiarray_umath import _discover_array_parameters + +from numpy.testing import ( + assert_array_equal, assert_warns, IS_PYPY) + + +def arraylikes(): + """ + Generator for functions converting an array into various array-likes. + If full is True (default) includes array-likes not capable of handling + all dtypes + """ + # base array: + def ndarray(a): + return a + + yield param(ndarray, id="ndarray") + + # subclass: + class MyArr(np.ndarray): + pass + + def subclass(a): + return a.view(MyArr) + + yield subclass + + class _SequenceLike(): + # We are giving a warning that array-like's were also expected to be + # sequence-like in `np.array([array_like])`, this can be removed + # when the deprecation exired (started NumPy 1.20) + def __len__(self): + raise TypeError + + def __getitem__(self): + raise TypeError + + # Array-interface + class ArrayDunder(_SequenceLike): + def __init__(self, a): + self.a = a + + def __array__(self, dtype=None): + return self.a + + yield param(ArrayDunder, id="__array__") + + # memory-view + yield param(memoryview, id="memoryview") + + # Array-interface + class ArrayInterface(_SequenceLike): + def __init__(self, a): + self.a = a # need to hold on to keep interface valid + self.__array_interface__ = a.__array_interface__ + + yield param(ArrayInterface, id="__array_interface__") + + # Array-Struct + class ArrayStruct(_SequenceLike): + def __init__(self, a): + self.a = a # need to hold on to keep struct valid + self.__array_struct__ = a.__array_struct__ + + yield param(ArrayStruct, id="__array_struct__") + + +def scalar_instances(times=True, extended_precision=True, user_dtype=True): + # Hard-coded list of scalar instances. + # Floats: + yield param(np.sqrt(np.float16(5)), id="float16") + yield param(np.sqrt(np.float32(5)), id="float32") + yield param(np.sqrt(np.float64(5)), id="float64") + if extended_precision: + yield param(np.sqrt(np.longdouble(5)), id="longdouble") + + # Complex: + yield param(np.sqrt(np.complex64(2+3j)), id="complex64") + yield param(np.sqrt(np.complex128(2+3j)), id="complex128") + if extended_precision: + yield param(np.sqrt(np.longcomplex(2+3j)), id="clongdouble") + + # Bool: + # XFAIL: Bool should be added, but has some bad properties when it + # comes to strings, see also gh-9875 + # yield param(np.bool_(0), id="bool") + + # Integers: + yield param(np.int8(2), id="int8") + yield param(np.int16(2), id="int16") + yield param(np.int32(2), id="int32") + yield param(np.int64(2), id="int64") + + yield param(np.uint8(2), id="uint8") + yield param(np.uint16(2), id="uint16") + yield param(np.uint32(2), id="uint32") + yield param(np.uint64(2), id="uint64") + + # Rational: + if user_dtype: + yield param(rational(1, 2), id="rational") + + # Cannot create a structured void scalar directly: + structured = np.array([(1, 3)], "i,i")[0] + assert isinstance(structured, np.void) + assert structured.dtype == np.dtype("i,i") + yield param(structured, id="structured") + + if times: + # Datetimes and timedelta + yield param(np.timedelta64(2), id="timedelta64[generic]") + yield param(np.timedelta64(23, "s"), id="timedelta64[s]") + yield param(np.timedelta64("NaT", "s"), id="timedelta64[s](NaT)") + + yield param(np.datetime64("NaT"), id="datetime64[generic](NaT)") + yield param(np.datetime64("2020-06-07 12:43", "ms"), id="datetime64[ms]") + + # Strings and unstructured void: + yield param(np.bytes_(b"1234"), id="bytes") + yield param(np.unicode_("2345"), id="unicode") + yield param(np.void(b"4321"), id="unstructured_void") + + +def is_parametric_dtype(dtype): + """Returns True if the the dtype is a parametric legacy dtype (itemsize + is 0, or a datetime without units) + """ + if dtype.itemsize == 0: + return True + if issubclass(dtype.type, (np.datetime64, np.timedelta64)): + if dtype.name.endswith("64"): + # Generic time units + return True + return False + + +class TestStringDiscovery: + @pytest.mark.parametrize("obj", + [object(), 1.2, 10**43, None, "string"], + ids=["object", "1.2", "10**43", "None", "string"]) + def test_basic_stringlength(self, obj): + length = len(str(obj)) + expected = np.dtype(f"S{length}") + + assert np.array(obj, dtype="S").dtype == expected + assert np.array([obj], dtype="S").dtype == expected + + # A nested array is also discovered correctly + arr = np.array(obj, dtype="O") + assert np.array(arr, dtype="S").dtype == expected + # Check that .astype() behaves identical + assert arr.astype("S").dtype == expected + + @pytest.mark.parametrize("obj", + [object(), 1.2, 10**43, None, "string"], + ids=["object", "1.2", "10**43", "None", "string"]) + def test_nested_arrays_stringlength(self, obj): + length = len(str(obj)) + expected = np.dtype(f"S{length}") + arr = np.array(obj, dtype="O") + assert np.array([arr, arr], dtype="S").dtype == expected + + @pytest.mark.parametrize("arraylike", arraylikes()) + def test_unpack_first_level(self, arraylike): + # We unpack exactly one level of array likes + obj = np.array([None]) + obj[0] = np.array(1.2) + # the length of the included item, not of the float dtype + length = len(str(obj[0])) + expected = np.dtype(f"S{length}") + + obj = arraylike(obj) + # casting to string usually calls str(obj) + arr = np.array([obj], dtype="S") + assert arr.shape == (1, 1) + assert arr.dtype == expected + + +class TestScalarDiscovery: + def test_void_special_case(self): + # Void dtypes with structures discover tuples as elements + arr = np.array((1, 2, 3), dtype="i,i,i") + assert arr.shape == () + arr = np.array([(1, 2, 3)], dtype="i,i,i") + assert arr.shape == (1,) + + def test_char_special_case(self): + arr = np.array("string", dtype="c") + assert arr.shape == (6,) + assert arr.dtype.char == "c" + arr = np.array(["string"], dtype="c") + assert arr.shape == (1, 6) + assert arr.dtype.char == "c" + + def test_char_special_case_deep(self): + # Check that the character special case errors correctly if the + # array is too deep: + nested = ["string"] # 2 dimensions (due to string being sequence) + for i in range(np.MAXDIMS - 2): + nested = [nested] + + arr = np.array(nested, dtype='c') + assert arr.shape == (1,) * (np.MAXDIMS - 1) + (6,) + with pytest.raises(ValueError): + np.array([nested], dtype="c") + + def test_unknown_object(self): + arr = np.array(object()) + assert arr.shape == () + assert arr.dtype == np.dtype("O") + + @pytest.mark.parametrize("scalar", scalar_instances()) + def test_scalar(self, scalar): + arr = np.array(scalar) + assert arr.shape == () + assert arr.dtype == scalar.dtype + + arr = np.array([[scalar, scalar]]) + assert arr.shape == (1, 2) + assert arr.dtype == scalar.dtype + + # Additionally to string this test also runs into a corner case + # with datetime promotion (the difference is the promotion order). + @pytest.mark.filterwarnings("ignore:Promotion of numbers:FutureWarning") + def test_scalar_promotion(self): + for sc1, sc2 in product(scalar_instances(), scalar_instances()): + sc1, sc2 = sc1.values[0], sc2.values[0] + # test all combinations: + try: + arr = np.array([sc1, sc2]) + except (TypeError, ValueError): + # The promotion between two times can fail + # XFAIL (ValueError): Some object casts are currently undefined + continue + assert arr.shape == (2,) + try: + dt1, dt2 = sc1.dtype, sc2.dtype + expected_dtype = np.promote_types(dt1, dt2) + assert arr.dtype == expected_dtype + except TypeError as e: + # Will currently always go to object dtype + assert arr.dtype == np.dtype("O") + + @pytest.mark.parametrize("scalar", scalar_instances()) + def test_scalar_coercion(self, scalar): + # This tests various scalar coercion paths, mainly for the numerical + # types. It includes some paths not directly related to `np.array` + if isinstance(scalar, np.inexact): + # Ensure we have a full-precision number if available + scalar = type(scalar)((scalar * 2)**0.5) + + if type(scalar) is rational: + # Rational generally fails due to a missing cast. In the future + # object casts should automatically be defined based on `setitem`. + pytest.xfail("Rational to object cast is undefined currently.") + + # Use casting from object: + arr = np.array(scalar, dtype=object).astype(scalar.dtype) + + # Test various ways to create an array containing this scalar: + arr1 = np.array(scalar).reshape(1) + arr2 = np.array([scalar]) + arr3 = np.empty(1, dtype=scalar.dtype) + arr3[0] = scalar + arr4 = np.empty(1, dtype=scalar.dtype) + arr4[:] = [scalar] + # All of these methods should yield the same results + assert_array_equal(arr, arr1) + assert_array_equal(arr, arr2) + assert_array_equal(arr, arr3) + assert_array_equal(arr, arr4) + + @pytest.mark.xfail(IS_PYPY, reason="`int(np.complex128(3))` fails on PyPy") + @pytest.mark.filterwarnings("ignore::numpy.ComplexWarning") + @pytest.mark.parametrize("cast_to", scalar_instances()) + def test_scalar_coercion_same_as_cast_and_assignment(self, cast_to): + """ + Test that in most cases: + * `np.array(scalar, dtype=dtype)` + * `np.empty((), dtype=dtype)[()] = scalar` + * `np.array(scalar).astype(dtype)` + should behave the same. The only exceptions are paramteric dtypes + (mainly datetime/timedelta without unit) and void without fields. + """ + dtype = cast_to.dtype # use to parametrize only the target dtype + + for scalar in scalar_instances(times=False): + scalar = scalar.values[0] + + if dtype.type == np.void: + if scalar.dtype.fields is not None and dtype.fields is None: + # Here, coercion to "V6" works, but the cast fails. + # Since the types are identical, SETITEM takes care of + # this, but has different rules than the cast. + with pytest.raises(TypeError): + np.array(scalar).astype(dtype) + np.array(scalar, dtype=dtype) + np.array([scalar], dtype=dtype) + continue + + # The main test, we first try to use casting and if it succeeds + # continue below testing that things are the same, otherwise + # test that the alternative paths at least also fail. + try: + cast = np.array(scalar).astype(dtype) + except (TypeError, ValueError, RuntimeError): + # coercion should also raise (error type may change) + with pytest.raises(Exception): + np.array(scalar, dtype=dtype) + + if (isinstance(scalar, rational) and + np.issubdtype(dtype, np.signedinteger)): + return + + with pytest.raises(Exception): + np.array([scalar], dtype=dtype) + # assignment should also raise + res = np.zeros((), dtype=dtype) + with pytest.raises(Exception): + res[()] = scalar + + return + + # Non error path: + arr = np.array(scalar, dtype=dtype) + assert_array_equal(arr, cast) + # assignment behaves the same + ass = np.zeros((), dtype=dtype) + ass[()] = scalar + assert_array_equal(ass, cast) + + @pytest.mark.parametrize("pyscalar", [10, 10.32, 10.14j, 10**100]) + def test_pyscalar_subclasses(self, pyscalar): + """NumPy arrays are read/write which means that anything but invariant + behaviour is on thin ice. However, we currently are happy to discover + subclasses of Python float, int, complex the same as the base classes. + This should potentially be deprecated. + """ + class MyScalar(type(pyscalar)): + pass + + res = np.array(MyScalar(pyscalar)) + expected = np.array(pyscalar) + assert_array_equal(res, expected) + + @pytest.mark.parametrize("dtype_char", np.typecodes["All"]) + def test_default_dtype_instance(self, dtype_char): + if dtype_char in "SU": + dtype = np.dtype(dtype_char + "1") + elif dtype_char == "V": + # Legacy behaviour was to use V8. The reason was float64 being the + # default dtype and that having 8 bytes. + dtype = np.dtype("V8") + else: + dtype = np.dtype(dtype_char) + + discovered_dtype, _ = _discover_array_parameters([], type(dtype)) + + assert discovered_dtype == dtype + assert discovered_dtype.itemsize == dtype.itemsize + + @pytest.mark.parametrize("dtype", np.typecodes["Integer"]) + def test_scalar_to_int_coerce_does_not_cast(self, dtype): + """ + Signed integers are currently different in that they do not cast other + NumPy scalar, but instead use scalar.__int__(). The hardcoded + exception to this rule is `np.array(scalar, dtype=integer)`. + """ + dtype = np.dtype(dtype) + invalid_int = np.ulonglong(-1) + + float_nan = np.float64(np.nan) + + for scalar in [float_nan, invalid_int]: + # This is a special case using casting logic and thus not failing: + coerced = np.array(scalar, dtype=dtype) + cast = np.array(scalar).astype(dtype) + assert_array_equal(coerced, cast) + + # However these fail: + with pytest.raises((ValueError, OverflowError)): + np.array([scalar], dtype=dtype) + with pytest.raises((ValueError, OverflowError)): + cast[()] = scalar + + +class TestTimeScalars: + @pytest.mark.parametrize("dtype", [np.int64, np.float32]) + @pytest.mark.parametrize("scalar", + [param(np.timedelta64("NaT", "s"), id="timedelta64[s](NaT)"), + param(np.timedelta64(123, "s"), id="timedelta64[s]"), + param(np.datetime64("NaT", "generic"), id="datetime64[generic](NaT)"), + param(np.datetime64(1, "D"), id="datetime64[D]")],) + def test_coercion_basic(self, dtype, scalar): + # Note the `[scalar]` is there because np.array(scalar) uses stricter + # `scalar.__int__()` rules for backward compatibility right now. + arr = np.array(scalar, dtype=dtype) + cast = np.array(scalar).astype(dtype) + assert_array_equal(arr, cast) + + ass = np.ones((), dtype=dtype) + if issubclass(dtype, np.integer): + with pytest.raises(TypeError): + # raises, as would np.array([scalar], dtype=dtype), this is + # conversion from times, but behaviour of integers. + ass[()] = scalar + else: + ass[()] = scalar + assert_array_equal(ass, cast) + + @pytest.mark.parametrize("dtype", [np.int64, np.float32]) + @pytest.mark.parametrize("scalar", + [param(np.timedelta64(123, "ns"), id="timedelta64[ns]"), + param(np.timedelta64(12, "generic"), id="timedelta64[generic]")]) + def test_coercion_timedelta_convert_to_number(self, dtype, scalar): + # Only "ns" and "generic" timedeltas can be converted to numbers + # so these are slightly special. + arr = np.array(scalar, dtype=dtype) + cast = np.array(scalar).astype(dtype) + ass = np.ones((), dtype=dtype) + ass[()] = scalar # raises, as would np.array([scalar], dtype=dtype) + + assert_array_equal(arr, cast) + assert_array_equal(cast, cast) + + @pytest.mark.parametrize("dtype", ["S6", "U6"]) + @pytest.mark.parametrize(["val", "unit"], + [param(123, "s", id="[s]"), param(123, "D", id="[D]")]) + def test_coercion_assignment_datetime(self, val, unit, dtype): + # String from datetime64 assignment is currently special cased to + # never use casting. This is because casting will error in this + # case, and traditionally in most cases the behaviour is maintained + # like this. (`np.array(scalar, dtype="U6")` would have failed before) + # TODO: This discrepancy _should_ be resolved, either by relaxing the + # cast, or by deprecating the first part. + scalar = np.datetime64(val, unit) + dtype = np.dtype(dtype) + cut_string = dtype.type(str(scalar)[:6]) + + arr = np.array(scalar, dtype=dtype) + assert arr[()] == cut_string + ass = np.ones((), dtype=dtype) + ass[()] = scalar + assert ass[()] == cut_string + + with pytest.raises(RuntimeError): + # However, unlike the above assignment using `str(scalar)[:6]` + # due to being handled by the string DType and not be casting + # the explicit cast fails: + np.array(scalar).astype(dtype) + + + @pytest.mark.parametrize(["val", "unit"], + [param(123, "s", id="[s]"), param(123, "D", id="[D]")]) + def test_coercion_assignment_timedelta(self, val, unit): + scalar = np.timedelta64(val, unit) + + # Unlike datetime64, timedelta allows the unsafe cast: + np.array(scalar, dtype="S6") + cast = np.array(scalar).astype("S6") + ass = np.ones((), dtype="S6") + ass[()] = scalar + expected = scalar.astype("S")[:6] + assert cast[()] == expected + assert ass[()] == expected + +class TestNested: + def test_nested_simple(self): + initial = [1.2] + nested = initial + for i in range(np.MAXDIMS - 1): + nested = [nested] + + arr = np.array(nested, dtype="float64") + assert arr.shape == (1,) * np.MAXDIMS + with pytest.raises(ValueError): + np.array([nested], dtype="float64") + + # We discover object automatically at this time: + with assert_warns(np.VisibleDeprecationWarning): + arr = np.array([nested]) + assert arr.dtype == np.dtype("O") + assert arr.shape == (1,) * np.MAXDIMS + assert arr.item() is initial + + def test_pathological_self_containing(self): + # Test that this also works for two nested sequences + l = [] + l.append(l) + arr = np.array([l, l, l], dtype=object) + assert arr.shape == (3,) + (1,) * (np.MAXDIMS - 1) + + # Also check a ragged case: + arr = np.array([l, [None], l], dtype=object) + assert arr.shape == (3, 1) + + @pytest.mark.parametrize("arraylike", arraylikes()) + def test_nested_arraylikes(self, arraylike): + # We try storing an array like into an array, but the array-like + # will have too many dimensions. This means the shape discovery + # decides that the array-like must be treated as an object (a special + # case of ragged discovery). The result will be an array with one + # dimension less than the maximum dimensions, and the array being + # assigned to it (which does work for object or if `float(arraylike)` + # works). + initial = arraylike(np.ones((1, 1))) + + nested = initial + for i in range(np.MAXDIMS - 1): + nested = [nested] + + with pytest.warns(DeprecationWarning): + # It will refuse to assign the array into + np.array(nested, dtype="float64") + + # If this is object, we end up assigning a (1, 1) array into (1,) + # (due to running out of dimensions), this is currently supported but + # a special case which is not ideal. + arr = np.array(nested, dtype=object) + assert arr.shape == (1,) * np.MAXDIMS + assert arr.item() == np.array(initial).item() + + @pytest.mark.parametrize("arraylike", arraylikes()) + def test_uneven_depth_ragged(self, arraylike): + arr = np.arange(4).reshape((2, 2)) + arr = arraylike(arr) + + # Array is ragged in the second dimension already: + out = np.array([arr, [arr]], dtype=object) + assert out.shape == (2,) + assert out[0] is arr + assert type(out[1]) is list + + # Array is ragged in the third dimension: + with pytest.raises(ValueError): + # This is a broadcast error during assignment, because + # the array shape would be (2, 2, 2) but `arr[0, 0] = arr` fails. + np.array([arr, [arr, arr]], dtype=object) + + def test_empty_sequence(self): + arr = np.array([[], [1], [[1]]], dtype=object) + assert arr.shape == (3,) + + # The empty sequence stops further dimension discovery, so the + # result shape will be (0,) which leads to an error during: + with pytest.raises(ValueError): + np.array([[], np.empty((0, 1))], dtype=object) + + def test_array_of_different_depths(self): + # When multiple arrays (or array-likes) are included in a + # sequences and have different depth, we currently discover + # as many dimensions as they share. (see also gh-17224) + arr = np.zeros((3, 2)) + mismatch_first_dim = np.zeros((1, 2)) + mismatch_second_dim = np.zeros((3, 3)) + + dtype, shape = _discover_array_parameters( + [arr, mismatch_second_dim], dtype=np.dtype("O")) + assert shape == (2, 3) + + dtype, shape = _discover_array_parameters( + [arr, mismatch_first_dim], dtype=np.dtype("O")) + assert shape == (2,) + # The second case is currently supported because the arrays + # can be stored as objects: + res = np.asarray([arr, mismatch_first_dim], dtype=np.dtype("O")) + assert res[0] is arr + assert res[1] is mismatch_first_dim + + +class TestBadSequences: + # These are tests for bad objects passed into `np.array`, in general + # these have undefined behaviour. In the old code they partially worked + # when now they will fail. We could (and maybe should) create a copy + # of all sequences to be safe against bad-actors. + + def test_growing_list(self): + # List to coerce, `mylist` will append to it during coercion + obj = [] + class mylist(list): + def __len__(self): + obj.append([1, 2]) + return super().__len__() + + obj.append(mylist([1, 2])) + + with pytest.raises(RuntimeError): + np.array(obj) + + # Note: We do not test a shrinking list. These do very evil things + # and the only way to fix them would be to copy all sequences. + # (which may be a real option in the future). + + def test_mutated_list(self): + # List to coerce, `mylist` will mutate the first element + obj = [] + class mylist(list): + def __len__(self): + obj[0] = [2, 3] # replace with a different list. + return super().__len__() + + obj.append([2, 3]) + obj.append(mylist([1, 2])) + with pytest.raises(RuntimeError): + np.array(obj) + + def test_replace_0d_array(self): + # List to coerce, `mylist` will mutate the first element + obj = [] + class baditem: + def __len__(self): + obj[0][0] = 2 # replace with a different list. + raise ValueError("not actually a sequence!") + + def __getitem__(self): + pass + + # Runs into a corner case in the new code, the `array(2)` is cached + # so replacing it invalidates the cache. + obj.append([np.array(2), baditem()]) + with pytest.raises(RuntimeError): + np.array(obj) + + +class TestArrayLikes: + @pytest.mark.parametrize("arraylike", arraylikes()) + def test_0d_object_special_case(self, arraylike): + arr = np.array(0.) + obj = arraylike(arr) + # A single array-like is always converted: + res = np.array(obj, dtype=object) + assert_array_equal(arr, res) + + # But a single 0-D nested array-like never: + res = np.array([obj], dtype=object) + assert res[0] is obj + + def test_0d_generic_special_case(self): + class ArraySubclass(np.ndarray): + def __float__(self): + raise TypeError("e.g. quantities raise on this") + + arr = np.array(0.) + obj = arr.view(ArraySubclass) + res = np.array(obj) + # The subclass is simply cast: + assert_array_equal(arr, res) + + # If the 0-D array-like is included, __float__ is currently + # guaranteed to be used. We may want to change that, quantities + # and masked arrays half make use of this. + with pytest.raises(TypeError): + np.array([obj]) + + # The same holds for memoryview: + obj = memoryview(arr) + res = np.array(obj) + assert_array_equal(arr, res) + with pytest.raises(ValueError): + # The error type does not matter much here. + np.array([obj]) + + def test_arraylike_classes(self): + # The classes of array-likes should generally be acceptable to be + # stored inside a numpy (object) array. This tests all of the + # special attributes (since all are checked during coercion). + arr = np.array(np.int64) + assert arr[()] is np.int64 + arr = np.array([np.int64]) + assert arr[0] is np.int64 + + # This also works for properties/unbound methods: + class ArrayLike: + @property + def __array_interface__(self): + pass + + @property + def __array_struct__(self): + pass + + def __array__(self): + pass + + arr = np.array(ArrayLike) + assert arr[()] is ArrayLike + arr = np.array([ArrayLike]) + assert arr[0] is ArrayLike + + @pytest.mark.skipif( + np.dtype(np.intp).itemsize < 8, reason="Needs 64bit platform") + def test_too_large_array_error_paths(self): + """Test the error paths, including for memory leaks""" + arr = np.array(0, dtype="uint8") + # Guarantees that a contiguous copy won't work: + arr = np.broadcast_to(arr, 2**62) + + for i in range(5): + # repeat, to ensure caching cannot have an effect: + with pytest.raises(MemoryError): + np.array(arr) + with pytest.raises(MemoryError): + np.array([arr]) + + @pytest.mark.parametrize("attribute", + ["__array_interface__", "__array__", "__array_struct__"]) + @pytest.mark.parametrize("error", [RecursionError, MemoryError]) + def test_bad_array_like_attributes(self, attribute, error): + # RecursionError and MemoryError are considered fatal. All errors + # (except AttributeError) should probably be raised in the future, + # but shapely made use of it, so it will require a deprecation. + + class BadInterface: + def __getattr__(self, attr): + if attr == attribute: + raise error + super().__getattr__(attr) + + with pytest.raises(error): + np.array(BadInterface()) + + @pytest.mark.parametrize("error", [RecursionError, MemoryError]) + def test_bad_array_like_bad_length(self, error): + # RecursionError and MemoryError are considered "critical" in + # sequences. We could expand this more generally though. (NumPy 1.20) + class BadSequence: + def __len__(self): + raise error + def __getitem__(self): + # must have getitem to be a Sequence + return 1 + + with pytest.raises(error): + np.array(BadSequence()) + diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_arraymethod.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_arraymethod.py new file mode 100644 index 0000000..49aa9f6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_arraymethod.py @@ -0,0 +1,94 @@ +""" +This file tests the generic aspects of ArrayMethod. At the time of writing +this is private API, but when added, public API may be added here. +""" + +import sys +import types +from typing import Any, Type + +import pytest + +import numpy as np +from numpy.core._multiarray_umath import _get_castingimpl as get_castingimpl + + +class TestResolveDescriptors: + # Test mainly error paths of the resolve_descriptors function, + # note that the `casting_unittests` tests exercise this non-error paths. + + # Casting implementations are the main/only current user: + method = get_castingimpl(type(np.dtype("d")), type(np.dtype("f"))) + + @pytest.mark.parametrize("args", [ + (True,), # Not a tuple. + ((None,)), # Too few elements + ((None, None, None),), # Too many + ((None, None),), # Input dtype is None, which is invalid. + ((np.dtype("d"), True),), # Output dtype is not a dtype + ((np.dtype("f"), None),), # Input dtype does not match method + ]) + def test_invalid_arguments(self, args): + with pytest.raises(TypeError): + self.method._resolve_descriptors(*args) + + +class TestSimpleStridedCall: + # Test mainly error paths of the resolve_descriptors function, + # note that the `casting_unittests` tests exercise this non-error paths. + + # Casting implementations are the main/only current user: + method = get_castingimpl(type(np.dtype("d")), type(np.dtype("f"))) + + @pytest.mark.parametrize(["args", "error"], [ + ((True,), TypeError), # Not a tuple + (((None,),), TypeError), # Too few elements + ((None, None), TypeError), # Inputs are not arrays. + (((None, None, None),), TypeError), # Too many + (((np.arange(3), np.arange(3)),), TypeError), # Incorrect dtypes + (((np.ones(3, dtype=">d"), np.ones(3, dtype=" None: + """Test `ndarray.__class_getitem__`.""" + alias = cls[Any, Any] + assert isinstance(alias, types.GenericAlias) + assert alias.__origin__ is cls + + @pytest.mark.parametrize("arg_len", range(4)) + def test_subscript_tuple(self, arg_len: int) -> None: + arg_tup = (Any,) * arg_len + if arg_len == 2: + assert np.ndarray[arg_tup] + else: + with pytest.raises(TypeError): + np.ndarray[arg_tup] + + def test_subscript_scalar(self) -> None: + with pytest.raises(TypeError): + np.ndarray[Any] + + +@pytest.mark.skipif(sys.version_info >= (3, 9), reason="Requires python 3.8") +def test_class_getitem_38() -> None: + match = "Type subscription requires python >= 3.9" + with pytest.raises(TypeError, match=match): + np.ndarray[Any, Any] diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_arrayprint.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_arrayprint.py new file mode 100644 index 0000000..25826d8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_arrayprint.py @@ -0,0 +1,967 @@ +import sys +import gc +from hypothesis import given +from hypothesis.extra import numpy as hynp +import pytest + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_warns, HAS_REFCOUNT, + assert_raises_regex, + ) +import textwrap + +class TestArrayRepr: + def test_nan_inf(self): + x = np.array([np.nan, np.inf]) + assert_equal(repr(x), 'array([nan, inf])') + + def test_subclass(self): + class sub(np.ndarray): pass + + # one dimensional + x1d = np.array([1, 2]).view(sub) + assert_equal(repr(x1d), 'sub([1, 2])') + + # two dimensional + x2d = np.array([[1, 2], [3, 4]]).view(sub) + assert_equal(repr(x2d), + 'sub([[1, 2],\n' + ' [3, 4]])') + + # two dimensional with flexible dtype + xstruct = np.ones((2,2), dtype=[('a', ' 1) + y = sub(None) + x[()] = y + y[()] = x + assert_equal(repr(x), + 'sub(sub(sub(..., dtype=object), dtype=object), dtype=object)') + assert_equal(str(x), '...') + x[()] = 0 # resolve circular references for garbage collector + + # nested 0d-subclass-object + x = sub(None) + x[()] = sub(None) + assert_equal(repr(x), 'sub(sub(None, dtype=object), dtype=object)') + assert_equal(str(x), 'None') + + # gh-10663 + class DuckCounter(np.ndarray): + def __getitem__(self, item): + result = super().__getitem__(item) + if not isinstance(result, DuckCounter): + result = result[...].view(DuckCounter) + return result + + def to_string(self): + return {0: 'zero', 1: 'one', 2: 'two'}.get(self.item(), 'many') + + def __str__(self): + if self.shape == (): + return self.to_string() + else: + fmt = {'all': lambda x: x.to_string()} + return np.array2string(self, formatter=fmt) + + dc = np.arange(5).view(DuckCounter) + assert_equal(str(dc), "[zero one two many many]") + assert_equal(str(dc[0]), "zero") + + def test_self_containing(self): + arr0d = np.array(None) + arr0d[()] = arr0d + assert_equal(repr(arr0d), + 'array(array(..., dtype=object), dtype=object)') + arr0d[()] = 0 # resolve recursion for garbage collector + + arr1d = np.array([None, None]) + arr1d[1] = arr1d + assert_equal(repr(arr1d), + 'array([None, array(..., dtype=object)], dtype=object)') + arr1d[1] = 0 # resolve recursion for garbage collector + + first = np.array(None) + second = np.array(None) + first[()] = second + second[()] = first + assert_equal(repr(first), + 'array(array(array(..., dtype=object), dtype=object), dtype=object)') + first[()] = 0 # resolve circular references for garbage collector + + def test_containing_list(self): + # printing square brackets directly would be ambiguuous + arr1d = np.array([None, None]) + arr1d[0] = [1, 2] + arr1d[1] = [3] + assert_equal(repr(arr1d), + 'array([list([1, 2]), list([3])], dtype=object)') + + def test_void_scalar_recursion(self): + # gh-9345 + repr(np.void(b'test')) # RecursionError ? + + def test_fieldless_structured(self): + # gh-10366 + no_fields = np.dtype([]) + arr_no_fields = np.empty(4, dtype=no_fields) + assert_equal(repr(arr_no_fields), 'array([(), (), (), ()], dtype=[])') + + +class TestComplexArray: + def test_str(self): + rvals = [0, 1, -1, np.inf, -np.inf, np.nan] + cvals = [complex(rp, ip) for rp in rvals for ip in rvals] + dtypes = [np.complex64, np.cdouble, np.clongdouble] + actual = [str(np.array([c], dt)) for c in cvals for dt in dtypes] + wanted = [ + '[0.+0.j]', '[0.+0.j]', '[0.+0.j]', + '[0.+1.j]', '[0.+1.j]', '[0.+1.j]', + '[0.-1.j]', '[0.-1.j]', '[0.-1.j]', + '[0.+infj]', '[0.+infj]', '[0.+infj]', + '[0.-infj]', '[0.-infj]', '[0.-infj]', + '[0.+nanj]', '[0.+nanj]', '[0.+nanj]', + '[1.+0.j]', '[1.+0.j]', '[1.+0.j]', + '[1.+1.j]', '[1.+1.j]', '[1.+1.j]', + '[1.-1.j]', '[1.-1.j]', '[1.-1.j]', + '[1.+infj]', '[1.+infj]', '[1.+infj]', + '[1.-infj]', '[1.-infj]', '[1.-infj]', + '[1.+nanj]', '[1.+nanj]', '[1.+nanj]', + '[-1.+0.j]', '[-1.+0.j]', '[-1.+0.j]', + '[-1.+1.j]', '[-1.+1.j]', '[-1.+1.j]', + '[-1.-1.j]', '[-1.-1.j]', '[-1.-1.j]', + '[-1.+infj]', '[-1.+infj]', '[-1.+infj]', + '[-1.-infj]', '[-1.-infj]', '[-1.-infj]', + '[-1.+nanj]', '[-1.+nanj]', '[-1.+nanj]', + '[inf+0.j]', '[inf+0.j]', '[inf+0.j]', + '[inf+1.j]', '[inf+1.j]', '[inf+1.j]', + '[inf-1.j]', '[inf-1.j]', '[inf-1.j]', + '[inf+infj]', '[inf+infj]', '[inf+infj]', + '[inf-infj]', '[inf-infj]', '[inf-infj]', + '[inf+nanj]', '[inf+nanj]', '[inf+nanj]', + '[-inf+0.j]', '[-inf+0.j]', '[-inf+0.j]', + '[-inf+1.j]', '[-inf+1.j]', '[-inf+1.j]', + '[-inf-1.j]', '[-inf-1.j]', '[-inf-1.j]', + '[-inf+infj]', '[-inf+infj]', '[-inf+infj]', + '[-inf-infj]', '[-inf-infj]', '[-inf-infj]', + '[-inf+nanj]', '[-inf+nanj]', '[-inf+nanj]', + '[nan+0.j]', '[nan+0.j]', '[nan+0.j]', + '[nan+1.j]', '[nan+1.j]', '[nan+1.j]', + '[nan-1.j]', '[nan-1.j]', '[nan-1.j]', + '[nan+infj]', '[nan+infj]', '[nan+infj]', + '[nan-infj]', '[nan-infj]', '[nan-infj]', + '[nan+nanj]', '[nan+nanj]', '[nan+nanj]'] + + for res, val in zip(actual, wanted): + assert_equal(res, val) + +class TestArray2String: + def test_basic(self): + """Basic test of array2string.""" + a = np.arange(3) + assert_(np.array2string(a) == '[0 1 2]') + assert_(np.array2string(a, max_line_width=4, legacy='1.13') == '[0 1\n 2]') + assert_(np.array2string(a, max_line_width=4) == '[0\n 1\n 2]') + + def test_unexpected_kwarg(self): + # ensure than an appropriate TypeError + # is raised when array2string receives + # an unexpected kwarg + + with assert_raises_regex(TypeError, 'nonsense'): + np.array2string(np.array([1, 2, 3]), + nonsense=None) + + def test_format_function(self): + """Test custom format function for each element in array.""" + def _format_function(x): + if np.abs(x) < 1: + return '.' + elif np.abs(x) < 2: + return 'o' + else: + return 'O' + + x = np.arange(3) + x_hex = "[0x0 0x1 0x2]" + x_oct = "[0o0 0o1 0o2]" + assert_(np.array2string(x, formatter={'all':_format_function}) == + "[. o O]") + assert_(np.array2string(x, formatter={'int_kind':_format_function}) == + "[. o O]") + assert_(np.array2string(x, formatter={'all':lambda x: "%.4f" % x}) == + "[0.0000 1.0000 2.0000]") + assert_equal(np.array2string(x, formatter={'int':lambda x: hex(x)}), + x_hex) + assert_equal(np.array2string(x, formatter={'int':lambda x: oct(x)}), + x_oct) + + x = np.arange(3.) + assert_(np.array2string(x, formatter={'float_kind':lambda x: "%.2f" % x}) == + "[0.00 1.00 2.00]") + assert_(np.array2string(x, formatter={'float':lambda x: "%.2f" % x}) == + "[0.00 1.00 2.00]") + + s = np.array(['abc', 'def']) + assert_(np.array2string(s, formatter={'numpystr':lambda s: s*2}) == + '[abcabc defdef]') + + + def test_structure_format(self): + dt = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2,))]) + x = np.array([('Sarah', (8.0, 7.0)), ('John', (6.0, 7.0))], dtype=dt) + assert_equal(np.array2string(x), + "[('Sarah', [8., 7.]) ('John', [6., 7.])]") + + np.set_printoptions(legacy='1.13') + try: + # for issue #5692 + A = np.zeros(shape=10, dtype=[("A", "M8[s]")]) + A[5:].fill(np.datetime64('NaT')) + assert_equal( + np.array2string(A), + textwrap.dedent("""\ + [('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',) + ('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',) ('NaT',) ('NaT',) + ('NaT',) ('NaT',) ('NaT',)]""") + ) + finally: + np.set_printoptions(legacy=False) + + # same again, but with non-legacy behavior + assert_equal( + np.array2string(A), + textwrap.dedent("""\ + [('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',) + ('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',) + ('1970-01-01T00:00:00',) ( 'NaT',) + ( 'NaT',) ( 'NaT',) + ( 'NaT',) ( 'NaT',)]""") + ) + + # and again, with timedeltas + A = np.full(10, 123456, dtype=[("A", "m8[s]")]) + A[5:].fill(np.datetime64('NaT')) + assert_equal( + np.array2string(A), + textwrap.dedent("""\ + [(123456,) (123456,) (123456,) (123456,) (123456,) ( 'NaT',) ( 'NaT',) + ( 'NaT',) ( 'NaT',) ( 'NaT',)]""") + ) + + # See #8160 + struct_int = np.array([([1, -1],), ([123, 1],)], dtype=[('B', 'i4', 2)]) + assert_equal(np.array2string(struct_int), + "[([ 1, -1],) ([123, 1],)]") + struct_2dint = np.array([([[0, 1], [2, 3]],), ([[12, 0], [0, 0]],)], + dtype=[('B', 'i4', (2, 2))]) + assert_equal(np.array2string(struct_2dint), + "[([[ 0, 1], [ 2, 3]],) ([[12, 0], [ 0, 0]],)]") + + # See #8172 + array_scalar = np.array( + (1., 2.1234567890123456789, 3.), dtype=('f8,f8,f8')) + assert_equal(np.array2string(array_scalar), "(1., 2.12345679, 3.)") + + def test_unstructured_void_repr(self): + a = np.array([27, 91, 50, 75, 7, 65, 10, 8, + 27, 91, 51, 49,109, 82,101,100], dtype='u1').view('V8') + assert_equal(repr(a[0]), r"void(b'\x1B\x5B\x32\x4B\x07\x41\x0A\x08')") + assert_equal(str(a[0]), r"b'\x1B\x5B\x32\x4B\x07\x41\x0A\x08'") + assert_equal(repr(a), + r"array([b'\x1B\x5B\x32\x4B\x07\x41\x0A\x08'," "\n" + r" b'\x1B\x5B\x33\x31\x6D\x52\x65\x64'], dtype='|V8')") + + assert_equal(eval(repr(a), vars(np)), a) + assert_equal(eval(repr(a[0]), vars(np)), a[0]) + + def test_edgeitems_kwarg(self): + # previously the global print options would be taken over the kwarg + arr = np.zeros(3, int) + assert_equal( + np.array2string(arr, edgeitems=1, threshold=0), + "[0 ... 0]" + ) + + def test_summarize_1d(self): + A = np.arange(1001) + strA = '[ 0 1 2 ... 998 999 1000]' + assert_equal(str(A), strA) + + reprA = 'array([ 0, 1, 2, ..., 998, 999, 1000])' + assert_equal(repr(A), reprA) + + def test_summarize_2d(self): + A = np.arange(1002).reshape(2, 501) + strA = '[[ 0 1 2 ... 498 499 500]\n' \ + ' [ 501 502 503 ... 999 1000 1001]]' + assert_equal(str(A), strA) + + reprA = 'array([[ 0, 1, 2, ..., 498, 499, 500],\n' \ + ' [ 501, 502, 503, ..., 999, 1000, 1001]])' + assert_equal(repr(A), reprA) + + def test_linewidth(self): + a = np.full(6, 1) + + def make_str(a, width, **kw): + return np.array2string(a, separator="", max_line_width=width, **kw) + + assert_equal(make_str(a, 8, legacy='1.13'), '[111111]') + assert_equal(make_str(a, 7, legacy='1.13'), '[111111]') + assert_equal(make_str(a, 5, legacy='1.13'), '[1111\n' + ' 11]') + + assert_equal(make_str(a, 8), '[111111]') + assert_equal(make_str(a, 7), '[11111\n' + ' 1]') + assert_equal(make_str(a, 5), '[111\n' + ' 111]') + + b = a[None,None,:] + + assert_equal(make_str(b, 12, legacy='1.13'), '[[[111111]]]') + assert_equal(make_str(b, 9, legacy='1.13'), '[[[111111]]]') + assert_equal(make_str(b, 8, legacy='1.13'), '[[[11111\n' + ' 1]]]') + + assert_equal(make_str(b, 12), '[[[111111]]]') + assert_equal(make_str(b, 9), '[[[111\n' + ' 111]]]') + assert_equal(make_str(b, 8), '[[[11\n' + ' 11\n' + ' 11]]]') + + def test_wide_element(self): + a = np.array(['xxxxx']) + assert_equal( + np.array2string(a, max_line_width=5), + "['xxxxx']" + ) + assert_equal( + np.array2string(a, max_line_width=5, legacy='1.13'), + "[ 'xxxxx']" + ) + + def test_multiline_repr(self): + class MultiLine: + def __repr__(self): + return "Line 1\nLine 2" + + a = np.array([[None, MultiLine()], [MultiLine(), None]]) + + assert_equal( + np.array2string(a), + '[[None Line 1\n' + ' Line 2]\n' + ' [Line 1\n' + ' Line 2 None]]' + ) + assert_equal( + np.array2string(a, max_line_width=5), + '[[None\n' + ' Line 1\n' + ' Line 2]\n' + ' [Line 1\n' + ' Line 2\n' + ' None]]' + ) + assert_equal( + repr(a), + 'array([[None, Line 1\n' + ' Line 2],\n' + ' [Line 1\n' + ' Line 2, None]], dtype=object)' + ) + + class MultiLineLong: + def __repr__(self): + return "Line 1\nLooooooooooongestLine2\nLongerLine 3" + + a = np.array([[None, MultiLineLong()], [MultiLineLong(), None]]) + assert_equal( + repr(a), + 'array([[None, Line 1\n' + ' LooooooooooongestLine2\n' + ' LongerLine 3 ],\n' + ' [Line 1\n' + ' LooooooooooongestLine2\n' + ' LongerLine 3 , None]], dtype=object)' + ) + assert_equal( + np.array_repr(a, 20), + 'array([[None,\n' + ' Line 1\n' + ' LooooooooooongestLine2\n' + ' LongerLine 3 ],\n' + ' [Line 1\n' + ' LooooooooooongestLine2\n' + ' LongerLine 3 ,\n' + ' None]],\n' + ' dtype=object)' + ) + + def test_nested_array_repr(self): + a = np.empty((2, 2), dtype=object) + a[0, 0] = np.eye(2) + a[0, 1] = np.eye(3) + a[1, 0] = None + a[1, 1] = np.ones((3, 1)) + assert_equal( + repr(a), + 'array([[array([[1., 0.],\n' + ' [0., 1.]]), array([[1., 0., 0.],\n' + ' [0., 1., 0.],\n' + ' [0., 0., 1.]])],\n' + ' [None, array([[1.],\n' + ' [1.],\n' + ' [1.]])]], dtype=object)' + ) + + @given(hynp.from_dtype(np.dtype("U"))) + def test_any_text(self, text): + # This test checks that, given any value that can be represented in an + # array of dtype("U") (i.e. unicode string), ... + a = np.array([text, text, text]) + # casting a list of them to an array does not e.g. truncate the value + assert_equal(a[0], text) + # and that np.array2string puts a newline in the expected location + expected_repr = "[{0!r} {0!r}\n {0!r}]".format(text) + result = np.array2string(a, max_line_width=len(repr(text)) * 2 + 3) + assert_equal(result, expected_repr) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_refcount(self): + # make sure we do not hold references to the array due to a recursive + # closure (gh-10620) + gc.disable() + a = np.arange(2) + r1 = sys.getrefcount(a) + np.array2string(a) + np.array2string(a) + r2 = sys.getrefcount(a) + gc.collect() + gc.enable() + assert_(r1 == r2) + +class TestPrintOptions: + """Test getting and setting global print options.""" + + def setup(self): + self.oldopts = np.get_printoptions() + + def teardown(self): + np.set_printoptions(**self.oldopts) + + def test_basic(self): + x = np.array([1.5, 0, 1.234567890]) + assert_equal(repr(x), "array([1.5 , 0. , 1.23456789])") + np.set_printoptions(precision=4) + assert_equal(repr(x), "array([1.5 , 0. , 1.2346])") + + def test_precision_zero(self): + np.set_printoptions(precision=0) + for values, string in ( + ([0.], "0."), ([.3], "0."), ([-.3], "-0."), ([.7], "1."), + ([1.5], "2."), ([-1.5], "-2."), ([-15.34], "-15."), + ([100.], "100."), ([.2, -1, 122.51], " 0., -1., 123."), + ([0], "0"), ([-12], "-12"), ([complex(.3, -.7)], "0.-1.j")): + x = np.array(values) + assert_equal(repr(x), "array([%s])" % string) + + def test_formatter(self): + x = np.arange(3) + np.set_printoptions(formatter={'all':lambda x: str(x-1)}) + assert_equal(repr(x), "array([-1, 0, 1])") + + def test_formatter_reset(self): + x = np.arange(3) + np.set_printoptions(formatter={'all':lambda x: str(x-1)}) + assert_equal(repr(x), "array([-1, 0, 1])") + np.set_printoptions(formatter={'int':None}) + assert_equal(repr(x), "array([0, 1, 2])") + + np.set_printoptions(formatter={'all':lambda x: str(x-1)}) + assert_equal(repr(x), "array([-1, 0, 1])") + np.set_printoptions(formatter={'all':None}) + assert_equal(repr(x), "array([0, 1, 2])") + + np.set_printoptions(formatter={'int':lambda x: str(x-1)}) + assert_equal(repr(x), "array([-1, 0, 1])") + np.set_printoptions(formatter={'int_kind':None}) + assert_equal(repr(x), "array([0, 1, 2])") + + x = np.arange(3.) + np.set_printoptions(formatter={'float':lambda x: str(x-1)}) + assert_equal(repr(x), "array([-1.0, 0.0, 1.0])") + np.set_printoptions(formatter={'float_kind':None}) + assert_equal(repr(x), "array([0., 1., 2.])") + + def test_0d_arrays(self): + assert_equal(str(np.array(u'café', '= dtype2.itemsize: + length = self.size // dtype1.itemsize + else: + length = self.size // dtype2.itemsize + + # Assume that the base array is well enough aligned for all inputs. + arr1 = np.empty(length, dtype=dtype1) + assert arr1.flags.c_contiguous + assert arr1.flags.aligned + + values = [random.randrange(-128, 128) for _ in range(length)] + + for i, value in enumerate(values): + # Use item assignment to ensure this is not using casting: + arr1[i] = value + + if dtype2 is None: + if dtype1.char == "?": + values = [bool(v) for v in values] + return arr1, values + + if dtype2.char == "?": + values = [bool(v) for v in values] + + arr2 = np.empty(length, dtype=dtype2) + assert arr2.flags.c_contiguous + assert arr2.flags.aligned + + for i, value in enumerate(values): + # Use item assignment to ensure this is not using casting: + arr2[i] = value + + return arr1, arr2, values + + def get_data_variation(self, arr1, arr2, aligned=True, contig=True): + """ + Returns a copy of arr1 that may be non-contiguous or unaligned, and a + matching array for arr2 (although not a copy). + """ + if contig: + stride1 = arr1.dtype.itemsize + stride2 = arr2.dtype.itemsize + elif aligned: + stride1 = 2 * arr1.dtype.itemsize + stride2 = 2 * arr2.dtype.itemsize + else: + stride1 = arr1.dtype.itemsize + 1 + stride2 = arr2.dtype.itemsize + 1 + + max_size1 = len(arr1) * 3 * arr1.dtype.itemsize + 1 + max_size2 = len(arr2) * 3 * arr2.dtype.itemsize + 1 + from_bytes = np.zeros(max_size1, dtype=np.uint8) + to_bytes = np.zeros(max_size2, dtype=np.uint8) + + # Sanity check that the above is large enough: + assert stride1 * len(arr1) <= from_bytes.nbytes + assert stride2 * len(arr2) <= to_bytes.nbytes + + if aligned: + new1 = as_strided(from_bytes[:-1].view(arr1.dtype), + arr1.shape, (stride1,)) + new2 = as_strided(to_bytes[:-1].view(arr2.dtype), + arr2.shape, (stride2,)) + else: + new1 = as_strided(from_bytes[1:].view(arr1.dtype), + arr1.shape, (stride1,)) + new2 = as_strided(to_bytes[1:].view(arr2.dtype), + arr2.shape, (stride2,)) + + new1[...] = arr1 + + if not contig: + # Ensure we did not overwrite bytes that should not be written: + offset = arr1.dtype.itemsize if aligned else 0 + buf = from_bytes[offset::stride1].tobytes() + assert buf.count(b"\0") == len(buf) + + if contig: + assert new1.flags.c_contiguous + assert new2.flags.c_contiguous + else: + assert not new1.flags.c_contiguous + assert not new2.flags.c_contiguous + + if aligned: + assert new1.flags.aligned + assert new2.flags.aligned + else: + assert not new1.flags.aligned or new1.dtype.alignment == 1 + assert not new2.flags.aligned or new2.dtype.alignment == 1 + + return new1, new2 + + @pytest.mark.parametrize("from_Dt", simple_dtypes) + def test_simple_cancast(self, from_Dt): + for to_Dt in simple_dtypes: + cast = get_castingimpl(from_Dt, to_Dt) + + for from_dt in [from_Dt(), from_Dt().newbyteorder()]: + default = cast._resolve_descriptors((from_dt, None))[1][1] + assert default == to_Dt() + del default + + for to_dt in [to_Dt(), to_Dt().newbyteorder()]: + casting, (from_res, to_res) = cast._resolve_descriptors( + (from_dt, to_dt)) + assert(type(from_res) == from_Dt) + assert(type(to_res) == to_Dt) + if casting & Casting.cast_is_view: + # If a view is acceptable, this is "no" casting + # and byte order must be matching. + assert casting == Casting.no | Casting.cast_is_view + # The above table lists this as "equivalent" + assert Casting.equiv == CAST_TABLE[from_Dt][to_Dt] + # Note that to_res may not be the same as from_dt + assert from_res.isnative == to_res.isnative + else: + if from_Dt == to_Dt: + # Note that to_res may not be the same as from_dt + assert from_res.isnative != to_res.isnative + assert casting == CAST_TABLE[from_Dt][to_Dt] + + if from_Dt is to_Dt: + assert(from_dt is from_res) + assert(to_dt is to_res) + + + @pytest.mark.filterwarnings("ignore::numpy.ComplexWarning") + @pytest.mark.parametrize("from_dt", simple_dtype_instances()) + def test_simple_direct_casts(self, from_dt): + """ + This test checks numeric direct casts for dtypes supported also by the + struct module (plus complex). It tries to be test a wide range of + inputs, but skips over possibly undefined behaviour (e.g. int rollover). + Longdouble and CLongdouble are tested, but only using double precision. + + If this test creates issues, it should possibly just be simplified + or even removed (checking whether unaligned/non-contiguous casts give + the same results is useful, though). + """ + for to_dt in simple_dtype_instances(): + to_dt = to_dt.values[0] + cast = get_castingimpl(type(from_dt), type(to_dt)) + + casting, (from_res, to_res) = cast._resolve_descriptors( + (from_dt, to_dt)) + + if from_res is not from_dt or to_res is not to_dt: + # Do not test this case, it is handled in multiple steps, + # each of which should is tested individually. + return + + safe = (casting & ~Casting.cast_is_view) <= Casting.safe + del from_res, to_res, casting + + arr1, arr2, values = self.get_data(from_dt, to_dt) + + cast._simple_strided_call((arr1, arr2)) + + # Check via python list + assert arr2.tolist() == values + + # Check that the same results are achieved for strided loops + arr1_o, arr2_o = self.get_data_variation(arr1, arr2, True, False) + cast._simple_strided_call((arr1_o, arr2_o)) + + assert_array_equal(arr2_o, arr2) + assert arr2_o.tobytes() == arr2.tobytes() + + # Check if alignment makes a difference, but only if supported + # and only if the alignment can be wrong + if ((from_dt.alignment == 1 and to_dt.alignment == 1) or + not cast._supports_unaligned): + return + + arr1_o, arr2_o = self.get_data_variation(arr1, arr2, False, True) + cast._simple_strided_call((arr1_o, arr2_o)) + + assert_array_equal(arr2_o, arr2) + assert arr2_o.tobytes() == arr2.tobytes() + + arr1_o, arr2_o = self.get_data_variation(arr1, arr2, False, False) + cast._simple_strided_call((arr1_o, arr2_o)) + + assert_array_equal(arr2_o, arr2) + assert arr2_o.tobytes() == arr2.tobytes() + + del arr1_o, arr2_o, cast + + @pytest.mark.parametrize("from_Dt", simple_dtypes) + def test_numeric_to_times(self, from_Dt): + # We currently only implement contiguous loops, so only need to + # test those. + from_dt = from_Dt() + + time_dtypes = [np.dtype("M8"), np.dtype("M8[ms]"), np.dtype("M8[4D]"), + np.dtype("m8"), np.dtype("m8[ms]"), np.dtype("m8[4D]")] + for time_dt in time_dtypes: + cast = get_castingimpl(type(from_dt), type(time_dt)) + + casting, (from_res, to_res) = cast._resolve_descriptors( + (from_dt, time_dt)) + + assert from_res is from_dt + assert to_res is time_dt + del from_res, to_res + + assert(casting & CAST_TABLE[from_Dt][type(time_dt)]) + + int64_dt = np.dtype(np.int64) + arr1, arr2, values = self.get_data(from_dt, int64_dt) + arr2 = arr2.view(time_dt) + arr2[...] = np.datetime64("NaT") + + if time_dt == np.dtype("M8"): + # This is a bit of a strange path, and could probably be removed + arr1[-1] = 0 # ensure at least one value is not NaT + + # The cast currently succeeds, but the values are invalid: + cast._simple_strided_call((arr1, arr2)) + with pytest.raises(ValueError): + str(arr2[-1]) # e.g. conversion to string fails + return + + cast._simple_strided_call((arr1, arr2)) + + assert [int(v) for v in arr2.tolist()] == values + + # Check that the same results are achieved for strided loops + arr1_o, arr2_o = self.get_data_variation(arr1, arr2, True, False) + cast._simple_strided_call((arr1_o, arr2_o)) + + assert_array_equal(arr2_o, arr2) + assert arr2_o.tobytes() == arr2.tobytes() + + @pytest.mark.parametrize( + ["from_dt", "to_dt", "expected_casting", "nom", "denom"], + [("M8[ns]", None, + Casting.no | Casting.cast_is_view, 1, 1), + (str(np.dtype("M8[ns]").newbyteorder()), None, Casting.equiv, 1, 1), + ("M8", "M8[ms]", Casting.safe | Casting.cast_is_view, 1, 1), + ("M8[ms]", "M8", Casting.unsafe, 1, 1), # should be invalid cast + ("M8[5ms]", "M8[5ms]", Casting.no | Casting.cast_is_view, 1, 1), + ("M8[ns]", "M8[ms]", Casting.same_kind, 1, 10**6), + ("M8[ms]", "M8[ns]", Casting.safe, 10**6, 1), + ("M8[ms]", "M8[7ms]", Casting.same_kind, 1, 7), + ("M8[4D]", "M8[1M]", Casting.same_kind, None, + # give full values based on NumPy 1.19.x + [-2**63, 0, -1, 1314, -1315, 564442610]), + ("m8[ns]", None, Casting.no | Casting.cast_is_view, 1, 1), + (str(np.dtype("m8[ns]").newbyteorder()), None, Casting.equiv, 1, 1), + ("m8", "m8[ms]", Casting.safe | Casting.cast_is_view, 1, 1), + ("m8[ms]", "m8", Casting.unsafe, 1, 1), # should be invalid cast + ("m8[5ms]", "m8[5ms]", Casting.no | Casting.cast_is_view, 1, 1), + ("m8[ns]", "m8[ms]", Casting.same_kind, 1, 10**6), + ("m8[ms]", "m8[ns]", Casting.safe, 10**6, 1), + ("m8[ms]", "m8[7ms]", Casting.same_kind, 1, 7), + ("m8[4D]", "m8[1M]", Casting.unsafe, None, + # give full values based on NumPy 1.19.x + [-2**63, 0, 0, 1314, -1315, 564442610])]) + def test_time_to_time(self, from_dt, to_dt, expected_casting, nom, denom): + from_dt = np.dtype(from_dt) + if to_dt is not None: + to_dt = np.dtype(to_dt) + + # Test a few values for casting (results generated with NumPy 1.19) + values = np.array([-2**63, 1, 2**63-1, 10000, -10000, 2**32]) + values = values.astype(np.dtype("int64").newbyteorder(from_dt.byteorder)) + assert values.dtype.byteorder == from_dt.byteorder + assert np.isnat(values.view(from_dt)[0]) + + DType = type(from_dt) + cast = get_castingimpl(DType, DType) + casting, (from_res, to_res) = cast._resolve_descriptors((from_dt, to_dt)) + assert from_res is from_dt + assert to_res is to_dt or to_dt is None + assert casting == expected_casting + + if nom is not None: + expected_out = (values * nom // denom).view(to_res) + expected_out[0] = "NaT" + else: + expected_out = np.empty_like(values) + expected_out[...] = denom + expected_out = expected_out.view(to_dt) + + orig_arr = values.view(from_dt) + orig_out = np.empty_like(expected_out) + + if casting == Casting.unsafe and (to_dt == "m8" or to_dt == "M8"): + # Casting from non-generic to generic units is an error and should + # probably be reported as an invalid cast earlier. + with pytest.raises(ValueError): + cast._simple_strided_call((orig_arr, orig_out)) + return + + for aligned in [True, True]: + for contig in [True, True]: + arr, out = self.get_data_variation( + orig_arr, orig_out, aligned, contig) + out[...] = 0 + cast._simple_strided_call((arr, out)) + assert_array_equal(out.view("int64"), expected_out.view("int64")) + + def string_with_modified_length(self, dtype, change_length): + fact = 1 if dtype.char == "S" else 4 + length = dtype.itemsize // fact + change_length + return np.dtype(f"{dtype.byteorder}{dtype.char}{length}") + + @pytest.mark.parametrize("other_DT", simple_dtypes) + @pytest.mark.parametrize("string_char", ["S", "U"]) + def test_string_cancast(self, other_DT, string_char): + fact = 1 if string_char == "S" else 4 + + string_DT = type(np.dtype(string_char)) + cast = get_castingimpl(other_DT, string_DT) + + other_dt = other_DT() + expected_length = get_expected_stringlength(other_dt) + string_dt = np.dtype(f"{string_char}{expected_length}") + + safety, (res_other_dt, res_dt) = cast._resolve_descriptors((other_dt, None)) + assert res_dt.itemsize == expected_length * fact + assert safety == Casting.safe # we consider to string casts "safe" + assert isinstance(res_dt, string_DT) + + # These casts currently implement changing the string length, so + # check the cast-safety for too long/fixed string lengths: + for change_length in [-1, 0, 1]: + if change_length >= 0: + expected_safety = Casting.safe + else: + expected_safety = Casting.same_kind + + to_dt = self.string_with_modified_length(string_dt, change_length) + safety, (_, res_dt) = cast._resolve_descriptors((other_dt, to_dt)) + assert res_dt is to_dt + assert safety == expected_safety + + # The opposite direction is always considered unsafe: + cast = get_castingimpl(string_DT, other_DT) + + safety, _ = cast._resolve_descriptors((string_dt, other_dt)) + assert safety == Casting.unsafe + + cast = get_castingimpl(string_DT, other_DT) + safety, (_, res_dt) = cast._resolve_descriptors((string_dt, None)) + assert safety == Casting.unsafe + assert other_dt is res_dt # returns the singleton for simple dtypes + + @pytest.mark.parametrize("string_char", ["S", "U"]) + @pytest.mark.parametrize("other_dt", simple_dtype_instances()) + def test_simple_string_casts_roundtrip(self, other_dt, string_char): + """ + Tests casts from and to string by checking the roundtripping property. + + The test also covers some string to string casts (but not all). + + If this test creates issues, it should possibly just be simplified + or even removed (checking whether unaligned/non-contiguous casts give + the same results is useful, though). + """ + string_DT = type(np.dtype(string_char)) + + cast = get_castingimpl(type(other_dt), string_DT) + cast_back = get_castingimpl(string_DT, type(other_dt)) + _, (res_other_dt, string_dt) = cast._resolve_descriptors((other_dt, None)) + + if res_other_dt is not other_dt: + # do not support non-native byteorder, skip test in that case + assert other_dt.byteorder != res_other_dt.byteorder + return + + orig_arr, values = self.get_data(other_dt, None) + str_arr = np.zeros(len(orig_arr), dtype=string_dt) + string_dt_short = self.string_with_modified_length(string_dt, -1) + str_arr_short = np.zeros(len(orig_arr), dtype=string_dt_short) + string_dt_long = self.string_with_modified_length(string_dt, 1) + str_arr_long = np.zeros(len(orig_arr), dtype=string_dt_long) + + assert not cast._supports_unaligned # if support is added, should test + assert not cast_back._supports_unaligned + + for contig in [True, False]: + other_arr, str_arr = self.get_data_variation( + orig_arr, str_arr, True, contig) + _, str_arr_short = self.get_data_variation( + orig_arr, str_arr_short.copy(), True, contig) + _, str_arr_long = self.get_data_variation( + orig_arr, str_arr_long, True, contig) + + cast._simple_strided_call((other_arr, str_arr)) + + cast._simple_strided_call((other_arr, str_arr_short)) + assert_array_equal(str_arr.astype(string_dt_short), str_arr_short) + + cast._simple_strided_call((other_arr, str_arr_long)) + assert_array_equal(str_arr, str_arr_long) + + if other_dt.kind == "b": + # Booleans do not roundtrip + continue + + other_arr[...] = 0 + cast_back._simple_strided_call((str_arr, other_arr)) + assert_array_equal(orig_arr, other_arr) + + other_arr[...] = 0 + cast_back._simple_strided_call((str_arr_long, other_arr)) + assert_array_equal(orig_arr, other_arr) + + @pytest.mark.parametrize("other_dt", ["S8", "U8"]) + @pytest.mark.parametrize("string_char", ["S", "U"]) + def test_string_to_string_cancast(self, other_dt, string_char): + other_dt = np.dtype(other_dt) + + fact = 1 if string_char == "S" else 4 + div = 1 if other_dt.char == "S" else 4 + + string_DT = type(np.dtype(string_char)) + cast = get_castingimpl(type(other_dt), string_DT) + + expected_length = other_dt.itemsize // div + string_dt = np.dtype(f"{string_char}{expected_length}") + + safety, (res_other_dt, res_dt) = cast._resolve_descriptors((other_dt, None)) + assert res_dt.itemsize == expected_length * fact + assert isinstance(res_dt, string_DT) + + if other_dt.char == string_char: + if other_dt.isnative: + expected_safety = Casting.no | Casting.cast_is_view + else: + expected_safety = Casting.equiv + elif string_char == "U": + expected_safety = Casting.safe + else: + expected_safety = Casting.unsafe + + assert expected_safety == safety + + for change_length in [-1, 0, 1]: + to_dt = self.string_with_modified_length(string_dt, change_length) + safety, (_, res_dt) = cast._resolve_descriptors((other_dt, to_dt)) + + assert res_dt is to_dt + if expected_safety == Casting.unsafe: + assert safety == expected_safety + elif change_length < 0: + assert safety == Casting.same_kind + elif change_length == 0: + assert safety == expected_safety + elif change_length > 0: + assert safety == Casting.safe + + @pytest.mark.parametrize("order1", [">", "<"]) + @pytest.mark.parametrize("order2", [">", "<"]) + def test_unicode_byteswapped_cast(self, order1, order2): + # Very specific tests (not using the castingimpl directly) + # that tests unicode bytedwaps including for unaligned array data. + dtype1 = np.dtype(f"{order1}U30") + dtype2 = np.dtype(f"{order2}U30") + data1 = np.empty(30 * 4 + 1, dtype=np.uint8)[1:].view(dtype1) + data2 = np.empty(30 * 4 + 1, dtype=np.uint8)[1:].view(dtype2) + if dtype1.alignment != 1: + # alignment should always be >1, but skip the check if not + assert not data1.flags.aligned + assert not data2.flags.aligned + + element = "this is a ünicode string‽" + data1[()] = element + # Test both `data1` and `data1.copy()` (which should be aligned) + for data in [data1, data1.copy()]: + data2[...] = data1 + assert data2[()] == element + assert data2.copy()[()] == element + + def test_void_to_string_special_case(self): + # Cover a small special case in void to string casting that could + # probably just as well be turned into an error (compare + # `test_object_to_parametric_internal_error` below). + assert np.array([], dtype="V5").astype("S").dtype.itemsize == 5 + assert np.array([], dtype="V5").astype("U").dtype.itemsize == 4 * 5 + + def test_object_to_parametric_internal_error(self): + # We reject casting from object to a parametric type, without + # figuring out the correct instance first. + object_dtype = type(np.dtype(object)) + other_dtype = type(np.dtype(str)) + cast = get_castingimpl(object_dtype, other_dtype) + with pytest.raises(TypeError, + match="casting from object to the parametric DType"): + cast._resolve_descriptors((np.dtype("O"), None)) + + @pytest.mark.parametrize("dtype", simple_dtype_instances()) + def test_object_and_simple_resolution(self, dtype): + # Simple test to exercise the cast when no instance is specified + object_dtype = type(np.dtype(object)) + cast = get_castingimpl(object_dtype, type(dtype)) + + safety, (_, res_dt) = cast._resolve_descriptors((np.dtype("O"), dtype)) + assert safety == Casting.unsafe + assert res_dt is dtype + + safety, (_, res_dt) = cast._resolve_descriptors((np.dtype("O"), None)) + assert safety == Casting.unsafe + assert res_dt == dtype.newbyteorder("=") + + @pytest.mark.parametrize("dtype", simple_dtype_instances()) + def test_simple_to_object_resolution(self, dtype): + # Simple test to exercise the cast when no instance is specified + object_dtype = type(np.dtype(object)) + cast = get_castingimpl(type(dtype), object_dtype) + + safety, (_, res_dt) = cast._resolve_descriptors((dtype, None)) + assert safety == Casting.safe + assert res_dt is np.dtype("O") + + @pytest.mark.parametrize("casting", ["no", "unsafe"]) + def test_void_and_structured_with_subarray(self, casting): + # test case corresponding to gh-19325 + dtype = np.dtype([("foo", " casts may succeed or fail, but a NULL'ed array must + # behave the same as one filled with None's. + arr_normal = np.array([None] * 5) + arr_NULLs = np.empty_like([None] * 5) + # If the check fails (maybe it should) the test would lose its purpose: + assert arr_NULLs.tobytes() == b"\x00" * arr_NULLs.nbytes + + try: + expected = arr_normal.astype(dtype) + except TypeError: + with pytest.raises(TypeError): + arr_NULLs.astype(dtype), + else: + assert_array_equal(expected, arr_NULLs.astype(dtype)) + + @pytest.mark.parametrize("dtype", + np.typecodes["AllInteger"] + np.typecodes["AllFloat"]) + def test_nonstandard_bool_to_other(self, dtype): + # simple test for casting bool_ to numeric types, which should not + # expose the detail that NumPy bools can sometimes take values other + # than 0 and 1. See also gh-19514. + nonstandard_bools = np.array([0, 3, -7], dtype=np.int8).view(bool) + res = nonstandard_bools.astype(dtype) + expected = [0, 1, 1] + assert_array_equal(res, expected) + diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_conversion_utils.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_conversion_utils.py new file mode 100644 index 0000000..d8849ee --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_conversion_utils.py @@ -0,0 +1,205 @@ +""" +Tests for numpy/core/src/multiarray/conversion_utils.c +""" +import re + +import pytest + +import numpy as np +import numpy.core._multiarray_tests as mt +from numpy.testing import assert_warns + + +class StringConverterTestCase: + allow_bytes = True + case_insensitive = True + exact_match = False + warn = True + + def _check_value_error(self, val): + pattern = r'\(got {}\)'.format(re.escape(repr(val))) + with pytest.raises(ValueError, match=pattern) as exc: + self.conv(val) + + def _check_conv_assert_warn(self, val, expected): + if self.warn: + with assert_warns(DeprecationWarning) as exc: + assert self.conv(val) == expected + else: + assert self.conv(val) == expected + + def _check(self, val, expected): + """Takes valid non-deprecated inputs for converters, + runs converters on inputs, checks correctness of outputs, + warnings and errors""" + assert self.conv(val) == expected + + if self.allow_bytes: + assert self.conv(val.encode('ascii')) == expected + else: + with pytest.raises(TypeError): + self.conv(val.encode('ascii')) + + if len(val) != 1: + if self.exact_match: + self._check_value_error(val[:1]) + self._check_value_error(val + '\0') + else: + self._check_conv_assert_warn(val[:1], expected) + + if self.case_insensitive: + if val != val.lower(): + self._check_conv_assert_warn(val.lower(), expected) + if val != val.upper(): + self._check_conv_assert_warn(val.upper(), expected) + else: + if val != val.lower(): + self._check_value_error(val.lower()) + if val != val.upper(): + self._check_value_error(val.upper()) + + def test_wrong_type(self): + # common cases which apply to all the below + with pytest.raises(TypeError): + self.conv({}) + with pytest.raises(TypeError): + self.conv([]) + + def test_wrong_value(self): + # nonsense strings + self._check_value_error('') + self._check_value_error('\N{greek small letter pi}') + + if self.allow_bytes: + self._check_value_error(b'') + # bytes which can't be converted to strings via utf8 + self._check_value_error(b"\xFF") + if self.exact_match: + self._check_value_error("there's no way this is supported") + + +class TestByteorderConverter(StringConverterTestCase): + """ Tests of PyArray_ByteorderConverter """ + conv = mt.run_byteorder_converter + warn = False + + def test_valid(self): + for s in ['big', '>']: + self._check(s, 'NPY_BIG') + for s in ['little', '<']: + self._check(s, 'NPY_LITTLE') + for s in ['native', '=']: + self._check(s, 'NPY_NATIVE') + for s in ['ignore', '|']: + self._check(s, 'NPY_IGNORE') + for s in ['swap']: + self._check(s, 'NPY_SWAP') + + +class TestSortkindConverter(StringConverterTestCase): + """ Tests of PyArray_SortkindConverter """ + conv = mt.run_sortkind_converter + warn = False + + def test_valid(self): + self._check('quicksort', 'NPY_QUICKSORT') + self._check('heapsort', 'NPY_HEAPSORT') + self._check('mergesort', 'NPY_STABLESORT') # alias + self._check('stable', 'NPY_STABLESORT') + + +class TestSelectkindConverter(StringConverterTestCase): + """ Tests of PyArray_SelectkindConverter """ + conv = mt.run_selectkind_converter + case_insensitive = False + exact_match = True + + def test_valid(self): + self._check('introselect', 'NPY_INTROSELECT') + + +class TestSearchsideConverter(StringConverterTestCase): + """ Tests of PyArray_SearchsideConverter """ + conv = mt.run_searchside_converter + def test_valid(self): + self._check('left', 'NPY_SEARCHLEFT') + self._check('right', 'NPY_SEARCHRIGHT') + + +class TestOrderConverter(StringConverterTestCase): + """ Tests of PyArray_OrderConverter """ + conv = mt.run_order_converter + warn = False + + def test_valid(self): + self._check('c', 'NPY_CORDER') + self._check('f', 'NPY_FORTRANORDER') + self._check('a', 'NPY_ANYORDER') + self._check('k', 'NPY_KEEPORDER') + + def test_flatten_invalid_order(self): + # invalid after gh-14596 + with pytest.raises(ValueError): + self.conv('Z') + for order in [False, True, 0, 8]: + with pytest.raises(TypeError): + self.conv(order) + + +class TestClipmodeConverter(StringConverterTestCase): + """ Tests of PyArray_ClipmodeConverter """ + conv = mt.run_clipmode_converter + def test_valid(self): + self._check('clip', 'NPY_CLIP') + self._check('wrap', 'NPY_WRAP') + self._check('raise', 'NPY_RAISE') + + # integer values allowed here + assert self.conv(np.CLIP) == 'NPY_CLIP' + assert self.conv(np.WRAP) == 'NPY_WRAP' + assert self.conv(np.RAISE) == 'NPY_RAISE' + + +class TestCastingConverter(StringConverterTestCase): + """ Tests of PyArray_CastingConverter """ + conv = mt.run_casting_converter + case_insensitive = False + exact_match = True + + def test_valid(self): + self._check("no", "NPY_NO_CASTING") + self._check("equiv", "NPY_EQUIV_CASTING") + self._check("safe", "NPY_SAFE_CASTING") + self._check("same_kind", "NPY_SAME_KIND_CASTING") + self._check("unsafe", "NPY_UNSAFE_CASTING") + + +class TestIntpConverter: + """ Tests of PyArray_IntpConverter """ + conv = mt.run_intp_converter + + def test_basic(self): + assert self.conv(1) == (1,) + assert self.conv((1, 2)) == (1, 2) + assert self.conv([1, 2]) == (1, 2) + assert self.conv(()) == () + + def test_none(self): + # once the warning expires, this will raise TypeError + with pytest.warns(DeprecationWarning): + assert self.conv(None) == () + + def test_float(self): + with pytest.raises(TypeError): + self.conv(1.0) + with pytest.raises(TypeError): + self.conv([1, 1.0]) + + def test_too_large(self): + with pytest.raises(ValueError): + self.conv(2**64) + + def test_too_many_dims(self): + assert self.conv([1]*32) == (1,)*32 + with pytest.raises(ValueError): + self.conv([1]*33) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_cpu_dispatcher.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_cpu_dispatcher.py new file mode 100644 index 0000000..2f7eac7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_cpu_dispatcher.py @@ -0,0 +1,42 @@ +from numpy.core._multiarray_umath import __cpu_features__, __cpu_baseline__, __cpu_dispatch__ +from numpy.core import _umath_tests +from numpy.testing import assert_equal + +def test_dispatcher(): + """ + Testing the utilities of the CPU dispatcher + """ + targets = ( + "SSE2", "SSE41", "AVX2", + "VSX", "VSX2", "VSX3", + "NEON", "ASIMD", "ASIMDHP" + ) + highest_sfx = "" # no suffix for the baseline + all_sfx = [] + for feature in reversed(targets): + # skip baseline features, by the default `CCompilerOpt` do not generate separated objects + # for the baseline, just one object combined all of them via 'baseline' option + # within the configuration statements. + if feature in __cpu_baseline__: + continue + # check compiler and running machine support + if feature not in __cpu_dispatch__ or not __cpu_features__[feature]: + continue + + if not highest_sfx: + highest_sfx = "_" + feature + all_sfx.append("func" + "_" + feature) + + test = _umath_tests.test_dispatch() + assert_equal(test["func"], "func" + highest_sfx) + assert_equal(test["var"], "var" + highest_sfx) + + if highest_sfx: + assert_equal(test["func_xb"], "func" + highest_sfx) + assert_equal(test["var_xb"], "var" + highest_sfx) + else: + assert_equal(test["func_xb"], "nobase") + assert_equal(test["var_xb"], "nobase") + + all_sfx.append("func") # add the baseline + assert_equal(test["all"], all_sfx) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_cpu_features.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_cpu_features.py new file mode 100644 index 0000000..2ccbff4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_cpu_features.py @@ -0,0 +1,174 @@ +import sys, platform, re, pytest +from numpy.core._multiarray_umath import __cpu_features__ + +def assert_features_equal(actual, desired, fname): + __tracebackhide__ = True # Hide traceback for py.test + actual, desired = str(actual), str(desired) + if actual == desired: + return + detected = str(__cpu_features__).replace("'", "") + try: + with open("/proc/cpuinfo", "r") as fd: + cpuinfo = fd.read(2048) + except Exception as err: + cpuinfo = str(err) + + try: + import subprocess + auxv = subprocess.check_output(['/bin/true'], env=dict(LD_SHOW_AUXV="1")) + auxv = auxv.decode() + except Exception as err: + auxv = str(err) + + import textwrap + error_report = textwrap.indent( +""" +########################################### +### Extra debugging information +########################################### +------------------------------------------- +--- NumPy Detections +------------------------------------------- +%s +------------------------------------------- +--- SYS / CPUINFO +------------------------------------------- +%s.... +------------------------------------------- +--- SYS / AUXV +------------------------------------------- +%s +""" % (detected, cpuinfo, auxv), prefix='\r') + + raise AssertionError(( + "Failure Detection\n" + " NAME: '%s'\n" + " ACTUAL: %s\n" + " DESIRED: %s\n" + "%s" + ) % (fname, actual, desired, error_report)) + +class AbstractTest: + features = [] + features_groups = {} + features_map = {} + features_flags = set() + + def load_flags(self): + # a hook + pass + def test_features(self): + self.load_flags() + for gname, features in self.features_groups.items(): + test_features = [self.cpu_have(f) for f in features] + assert_features_equal(__cpu_features__.get(gname), all(test_features), gname) + + for feature_name in self.features: + cpu_have = self.cpu_have(feature_name) + npy_have = __cpu_features__.get(feature_name) + assert_features_equal(npy_have, cpu_have, feature_name) + + def cpu_have(self, feature_name): + map_names = self.features_map.get(feature_name, feature_name) + if isinstance(map_names, str): + return map_names in self.features_flags + for f in map_names: + if f in self.features_flags: + return True + return False + + def load_flags_cpuinfo(self, magic_key): + self.features_flags = self.get_cpuinfo_item(magic_key) + + def get_cpuinfo_item(self, magic_key): + values = set() + with open('/proc/cpuinfo') as fd: + for line in fd: + if not line.startswith(magic_key): + continue + flags_value = [s.strip() for s in line.split(':', 1)] + if len(flags_value) == 2: + values = values.union(flags_value[1].upper().split()) + return values + + def load_flags_auxv(self): + import subprocess + auxv = subprocess.check_output(['/bin/true'], env=dict(LD_SHOW_AUXV="1")) + for at in auxv.split(b'\n'): + if not at.startswith(b"AT_HWCAP"): + continue + hwcap_value = [s.strip() for s in at.split(b':', 1)] + if len(hwcap_value) == 2: + self.features_flags = self.features_flags.union( + hwcap_value[1].upper().decode().split() + ) + +is_linux = sys.platform.startswith('linux') +is_cygwin = sys.platform.startswith('cygwin') +machine = platform.machine() +is_x86 = re.match("^(amd64|x86|i386|i686)", machine, re.IGNORECASE) +@pytest.mark.skipif( + not (is_linux or is_cygwin) or not is_x86, reason="Only for Linux and x86" +) +class Test_X86_Features(AbstractTest): + features = [ + "MMX", "SSE", "SSE2", "SSE3", "SSSE3", "SSE41", "POPCNT", "SSE42", + "AVX", "F16C", "XOP", "FMA4", "FMA3", "AVX2", "AVX512F", "AVX512CD", + "AVX512ER", "AVX512PF", "AVX5124FMAPS", "AVX5124VNNIW", "AVX512VPOPCNTDQ", + "AVX512VL", "AVX512BW", "AVX512DQ", "AVX512VNNI", "AVX512IFMA", + "AVX512VBMI", "AVX512VBMI2", "AVX512BITALG", + ] + features_groups = dict( + AVX512_KNL = ["AVX512F", "AVX512CD", "AVX512ER", "AVX512PF"], + AVX512_KNM = ["AVX512F", "AVX512CD", "AVX512ER", "AVX512PF", "AVX5124FMAPS", + "AVX5124VNNIW", "AVX512VPOPCNTDQ"], + AVX512_SKX = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", "AVX512VL"], + AVX512_CLX = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", "AVX512VL", "AVX512VNNI"], + AVX512_CNL = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", "AVX512VL", "AVX512IFMA", + "AVX512VBMI"], + AVX512_ICL = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", "AVX512VL", "AVX512IFMA", + "AVX512VBMI", "AVX512VNNI", "AVX512VBMI2", "AVX512BITALG", "AVX512VPOPCNTDQ"], + ) + features_map = dict( + SSE3="PNI", SSE41="SSE4_1", SSE42="SSE4_2", FMA3="FMA", + AVX512VNNI="AVX512_VNNI", AVX512BITALG="AVX512_BITALG", AVX512VBMI2="AVX512_VBMI2", + AVX5124FMAPS="AVX512_4FMAPS", AVX5124VNNIW="AVX512_4VNNIW", AVX512VPOPCNTDQ="AVX512_VPOPCNTDQ", + ) + def load_flags(self): + self.load_flags_cpuinfo("flags") + +is_power = re.match("^(powerpc|ppc)64", machine, re.IGNORECASE) +@pytest.mark.skipif(not is_linux or not is_power, reason="Only for Linux and Power") +class Test_POWER_Features(AbstractTest): + features = ["VSX", "VSX2", "VSX3"] + features_map = dict(VSX2="ARCH_2_07", VSX3="ARCH_3_00") + + def load_flags(self): + self.load_flags_auxv() + +is_arm = re.match("^(arm|aarch64)", machine, re.IGNORECASE) +@pytest.mark.skipif(not is_linux or not is_arm, reason="Only for Linux and ARM") +class Test_ARM_Features(AbstractTest): + features = [ + "NEON", "ASIMD", "FPHP", "ASIMDHP", "ASIMDDP", "ASIMDFHM" + ] + features_groups = dict( + NEON_FP16 = ["NEON", "HALF"], + NEON_VFPV4 = ["NEON", "VFPV4"], + ) + def load_flags(self): + self.load_flags_cpuinfo("Features") + arch = self.get_cpuinfo_item("CPU architecture") + # in case of mounting virtual filesystem of aarch64 kernel + is_rootfs_v8 = int('0'+next(iter(arch))) > 7 if arch else 0 + if re.match("^(aarch64|AARCH64)", machine) or is_rootfs_v8: + self.features_map = dict( + NEON="ASIMD", HALF="ASIMD", VFPV4="ASIMD" + ) + else: + self.features_map = dict( + # ELF auxiliary vector and /proc/cpuinfo on Linux kernel(armv8 aarch32) + # doesn't provide information about ASIMD, so we assume that ASIMD is supported + # if the kernel reports any one of the following ARM8 features. + ASIMD=("AES", "SHA1", "SHA2", "PMULL", "CRC32") + ) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_custom_dtypes.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_custom_dtypes.py new file mode 100644 index 0000000..6bcc45d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_custom_dtypes.py @@ -0,0 +1,201 @@ +import pytest + +import numpy as np +from numpy.testing import assert_array_equal +from numpy.core._multiarray_umath import ( + _discover_array_parameters as discover_array_params, _get_sfloat_dtype) + + +SF = _get_sfloat_dtype() + + +class TestSFloat: + def _get_array(self, scaling, aligned=True): + if not aligned: + a = np.empty(3*8 + 1, dtype=np.uint8)[1:] + a = a.view(np.float64) + a[:] = [1., 2., 3.] + else: + a = np.array([1., 2., 3.]) + + a *= 1./scaling # the casting code also uses the reciprocal. + return a.view(SF(scaling)) + + def test_sfloat_rescaled(self): + sf = SF(1.) + sf2 = sf.scaled_by(2.) + assert sf2.get_scaling() == 2. + sf6 = sf2.scaled_by(3.) + assert sf6.get_scaling() == 6. + + def test_class_discovery(self): + # This does not test much, since we always discover the scaling as 1. + # But most of NumPy (when writing) does not understand DType classes + dt, _ = discover_array_params([1., 2., 3.], dtype=SF) + assert dt == SF(1.) + + @pytest.mark.parametrize("scaling", [1., -1., 2.]) + def test_scaled_float_from_floats(self, scaling): + a = np.array([1., 2., 3.], dtype=SF(scaling)) + + assert a.dtype.get_scaling() == scaling + assert_array_equal(scaling * a.view(np.float64), [1., 2., 3.]) + + def test_repr(self): + # Check the repr, mainly to cover the code paths: + assert repr(SF(scaling=1.)) == "_ScaledFloatTestDType(scaling=1.0)" + + @pytest.mark.parametrize("scaling", [1., -1., 2.]) + def test_sfloat_from_float(self, scaling): + a = np.array([1., 2., 3.]).astype(dtype=SF(scaling)) + + assert a.dtype.get_scaling() == scaling + assert_array_equal(scaling * a.view(np.float64), [1., 2., 3.]) + + @pytest.mark.parametrize("aligned", [True, False]) + @pytest.mark.parametrize("scaling", [1., -1., 2.]) + def test_sfloat_getitem(self, aligned, scaling): + a = self._get_array(1., aligned) + assert a.tolist() == [1., 2., 3.] + + @pytest.mark.parametrize("aligned", [True, False]) + def test_sfloat_casts(self, aligned): + a = self._get_array(1., aligned) + + assert np.can_cast(a, SF(-1.), casting="equiv") + assert not np.can_cast(a, SF(-1.), casting="no") + na = a.astype(SF(-1.)) + assert_array_equal(-1 * na.view(np.float64), a.view(np.float64)) + + assert np.can_cast(a, SF(2.), casting="same_kind") + assert not np.can_cast(a, SF(2.), casting="safe") + a2 = a.astype(SF(2.)) + assert_array_equal(2 * a2.view(np.float64), a.view(np.float64)) + + @pytest.mark.parametrize("aligned", [True, False]) + def test_sfloat_cast_internal_errors(self, aligned): + a = self._get_array(2e300, aligned) + + with pytest.raises(TypeError, + match="error raised inside the core-loop: non-finite factor!"): + a.astype(SF(2e-300)) + + def test_sfloat_promotion(self): + assert np.result_type(SF(2.), SF(3.)) == SF(3.) + assert np.result_type(SF(3.), SF(2.)) == SF(3.) + # Float64 -> SF(1.) and then promotes normally, so both of this work: + assert np.result_type(SF(3.), np.float64) == SF(3.) + assert np.result_type(np.float64, SF(0.5)) == SF(1.) + + # Test an undefined promotion: + with pytest.raises(TypeError): + np.result_type(SF(1.), np.int64) + + def test_basic_multiply(self): + a = self._get_array(2.) + b = self._get_array(4.) + + res = a * b + # multiplies dtype scaling and content separately: + assert res.dtype.get_scaling() == 8. + expected_view = a.view(np.float64) * b.view(np.float64) + assert_array_equal(res.view(np.float64), expected_view) + + def test_possible_and_impossible_reduce(self): + # For reductions to work, the first and last operand must have the + # same dtype. For this parametric DType that is not necessarily true. + a = self._get_array(2.) + # Addition reductin works (as of writing requires to pass initial + # because setting a scaled-float from the default `0` fails). + res = np.add.reduce(a, initial=0.) + assert res == a.astype(np.float64).sum() + + # But each multiplication changes the factor, so a reduction is not + # possible (the relaxed version of the old refusal to handle any + # flexible dtype). + with pytest.raises(TypeError, + match="the resolved dtypes are not compatible"): + np.multiply.reduce(a) + + def test_basic_ufunc_at(self): + float_a = np.array([1., 2., 3.]) + b = self._get_array(2.) + + float_b = b.view(np.float64).copy() + np.multiply.at(float_b, [1, 1, 1], float_a) + np.multiply.at(b, [1, 1, 1], float_a) + + assert_array_equal(b.view(np.float64), float_b) + + def test_basic_multiply_promotion(self): + float_a = np.array([1., 2., 3.]) + b = self._get_array(2.) + + res1 = float_a * b + res2 = b * float_a + + # one factor is one, so we get the factor of b: + assert res1.dtype == res2.dtype == b.dtype + expected_view = float_a * b.view(np.float64) + assert_array_equal(res1.view(np.float64), expected_view) + assert_array_equal(res2.view(np.float64), expected_view) + + # Check that promotion works when `out` is used: + np.multiply(b, float_a, out=res2) + with pytest.raises(TypeError): + # The promoter accepts this (maybe it should not), but the SFloat + # result cannot be cast to integer: + np.multiply(b, float_a, out=np.arange(3)) + + def test_basic_addition(self): + a = self._get_array(2.) + b = self._get_array(4.) + + res = a + b + # addition uses the type promotion rules for the result: + assert res.dtype == np.result_type(a.dtype, b.dtype) + expected_view = (a.astype(res.dtype).view(np.float64) + + b.astype(res.dtype).view(np.float64)) + assert_array_equal(res.view(np.float64), expected_view) + + def test_addition_cast_safety(self): + """The addition method is special for the scaled float, because it + includes the "cast" between different factors, thus cast-safety + is influenced by the implementation. + """ + a = self._get_array(2.) + b = self._get_array(-2.) + c = self._get_array(3.) + + # sign change is "equiv": + np.add(a, b, casting="equiv") + with pytest.raises(TypeError): + np.add(a, b, casting="no") + + # Different factor is "same_kind" (default) so check that "safe" fails + with pytest.raises(TypeError): + np.add(a, c, casting="safe") + + # Check that casting the output fails also (done by the ufunc here) + with pytest.raises(TypeError): + np.add(a, a, out=c, casting="safe") + + @pytest.mark.parametrize("ufunc", + [np.logical_and, np.logical_or, np.logical_xor]) + def test_logical_ufuncs_casts_to_bool(self, ufunc): + a = self._get_array(2.) + a[0] = 0. # make sure first element is considered False. + + float_equiv = a.astype(float) + expected = ufunc(float_equiv, float_equiv) + res = ufunc(a, a) + assert_array_equal(res, expected) + + # also check that the same works for reductions: + expected = ufunc.reduce(float_equiv) + res = ufunc.reduce(a) + assert_array_equal(res, expected) + + # The output casting does not match the bool, bool -> bool loop: + with pytest.raises(TypeError): + ufunc(a, a, out=np.empty(a.shape, dtype=int), casting="equiv") diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_cython.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_cython.py new file mode 100644 index 0000000..9896de0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_cython.py @@ -0,0 +1,134 @@ +import os +import shutil +import subprocess +import sys +import pytest + +import numpy as np + +# This import is copied from random.tests.test_extending +try: + import cython + from Cython.Compiler.Version import version as cython_version +except ImportError: + cython = None +else: + from distutils.version import LooseVersion + + # Cython 0.29.21 is required for Python 3.9 and there are + # other fixes in the 0.29 series that are needed even for earlier + # Python versions. + # Note: keep in sync with the one in pyproject.toml + required_version = LooseVersion("0.29.21") + if LooseVersion(cython_version) < required_version: + # too old or wrong cython, skip the test + cython = None + +pytestmark = pytest.mark.skipif(cython is None, reason="requires cython") + + +@pytest.fixture +def install_temp(request, tmp_path): + # Based in part on test_cython from random.tests.test_extending + + here = os.path.dirname(__file__) + ext_dir = os.path.join(here, "examples", "cython") + + cytest = str(tmp_path / "cytest") + + shutil.copytree(ext_dir, cytest) + # build the examples and "install" them into a temporary directory + + install_log = str(tmp_path / "tmp_install_log.txt") + subprocess.check_call( + [ + sys.executable, + "setup.py", + "build", + "install", + "--prefix", str(tmp_path / "installdir"), + "--single-version-externally-managed", + "--record", + install_log, + ], + cwd=cytest, + ) + + # In order to import the built module, we need its path to sys.path + # so parse that out of the record + with open(install_log) as fid: + for line in fid: + if "checks" in line: + sys.path.append(os.path.dirname(line)) + break + else: + raise RuntimeError(f'could not parse "{install_log}"') + + +def test_is_timedelta64_object(install_temp): + import checks + + assert checks.is_td64(np.timedelta64(1234)) + assert checks.is_td64(np.timedelta64(1234, "ns")) + assert checks.is_td64(np.timedelta64("NaT", "ns")) + + assert not checks.is_td64(1) + assert not checks.is_td64(None) + assert not checks.is_td64("foo") + assert not checks.is_td64(np.datetime64("now", "s")) + + +def test_is_datetime64_object(install_temp): + import checks + + assert checks.is_dt64(np.datetime64(1234, "ns")) + assert checks.is_dt64(np.datetime64("NaT", "ns")) + + assert not checks.is_dt64(1) + assert not checks.is_dt64(None) + assert not checks.is_dt64("foo") + assert not checks.is_dt64(np.timedelta64(1234)) + + +def test_get_datetime64_value(install_temp): + import checks + + dt64 = np.datetime64("2016-01-01", "ns") + + result = checks.get_dt64_value(dt64) + expected = dt64.view("i8") + + assert result == expected + + +def test_get_timedelta64_value(install_temp): + import checks + + td64 = np.timedelta64(12345, "h") + + result = checks.get_td64_value(td64) + expected = td64.view("i8") + + assert result == expected + + +def test_get_datetime64_unit(install_temp): + import checks + + dt64 = np.datetime64("2016-01-01", "ns") + result = checks.get_dt64_unit(dt64) + expected = 10 + assert result == expected + + td64 = np.timedelta64(12345, "h") + result = checks.get_dt64_unit(td64) + expected = 5 + assert result == expected + + +def test_abstract_scalars(install_temp): + import checks + + assert checks.is_integer(1) + assert checks.is_integer(np.int8(1)) + assert checks.is_integer(np.uint64(1)) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_datetime.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_datetime.py new file mode 100644 index 0000000..5294c7b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_datetime.py @@ -0,0 +1,2528 @@ + +import numpy +import numpy as np +import datetime +import pytest +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_warns, suppress_warnings, + assert_raises_regex, assert_array_equal, + ) +from numpy.compat import pickle + +# Use pytz to test out various time zones if available +try: + from pytz import timezone as tz + _has_pytz = True +except ImportError: + _has_pytz = False + +try: + RecursionError +except NameError: + RecursionError = RuntimeError # python < 3.5 + + +class TestDateTime: + def test_datetime_dtype_creation(self): + for unit in ['Y', 'M', 'W', 'D', + 'h', 'm', 's', 'ms', 'us', + 'μs', # alias for us + 'ns', 'ps', 'fs', 'as']: + dt1 = np.dtype('M8[750%s]' % unit) + assert_(dt1 == np.dtype('datetime64[750%s]' % unit)) + dt2 = np.dtype('m8[%s]' % unit) + assert_(dt2 == np.dtype('timedelta64[%s]' % unit)) + + # Generic units shouldn't add [] to the end + assert_equal(str(np.dtype("M8")), "datetime64") + + # Should be possible to specify the endianness + assert_equal(np.dtype("=M8"), np.dtype("M8")) + assert_equal(np.dtype("=M8[s]"), np.dtype("M8[s]")) + assert_(np.dtype(">M8") == np.dtype("M8") or + np.dtype("M8[D]") == np.dtype("M8[D]") or + np.dtype("M8") != np.dtype("m8") == np.dtype("m8") or + np.dtype("m8[D]") == np.dtype("m8[D]") or + np.dtype("m8") != np.dtype(" Scalars + assert_equal(np.datetime64(b, '[s]'), np.datetime64('NaT', '[s]')) + assert_equal(np.datetime64(b, '[ms]'), np.datetime64('NaT', '[ms]')) + assert_equal(np.datetime64(b, '[M]'), np.datetime64('NaT', '[M]')) + assert_equal(np.datetime64(b, '[Y]'), np.datetime64('NaT', '[Y]')) + assert_equal(np.datetime64(b, '[W]'), np.datetime64('NaT', '[W]')) + + # Arrays -> Scalars + assert_equal(np.datetime64(a, '[s]'), np.datetime64('NaT', '[s]')) + assert_equal(np.datetime64(a, '[ms]'), np.datetime64('NaT', '[ms]')) + assert_equal(np.datetime64(a, '[M]'), np.datetime64('NaT', '[M]')) + assert_equal(np.datetime64(a, '[Y]'), np.datetime64('NaT', '[Y]')) + assert_equal(np.datetime64(a, '[W]'), np.datetime64('NaT', '[W]')) + + # NaN -> NaT + nan = np.array([np.nan] * 8) + fnan = nan.astype('f') + lnan = nan.astype('g') + cnan = nan.astype('D') + cfnan = nan.astype('F') + clnan = nan.astype('G') + + nat = np.array([np.datetime64('NaT')] * 8) + assert_equal(nan.astype('M8[ns]'), nat) + assert_equal(fnan.astype('M8[ns]'), nat) + assert_equal(lnan.astype('M8[ns]'), nat) + assert_equal(cnan.astype('M8[ns]'), nat) + assert_equal(cfnan.astype('M8[ns]'), nat) + assert_equal(clnan.astype('M8[ns]'), nat) + + nat = np.array([np.timedelta64('NaT')] * 8) + assert_equal(nan.astype('timedelta64[ns]'), nat) + assert_equal(fnan.astype('timedelta64[ns]'), nat) + assert_equal(lnan.astype('timedelta64[ns]'), nat) + assert_equal(cnan.astype('timedelta64[ns]'), nat) + assert_equal(cfnan.astype('timedelta64[ns]'), nat) + assert_equal(clnan.astype('timedelta64[ns]'), nat) + + def test_days_creation(self): + assert_equal(np.array('1599', dtype='M8[D]').astype('i8'), + (1600-1970)*365 - (1972-1600)/4 + 3 - 365) + assert_equal(np.array('1600', dtype='M8[D]').astype('i8'), + (1600-1970)*365 - (1972-1600)/4 + 3) + assert_equal(np.array('1601', dtype='M8[D]').astype('i8'), + (1600-1970)*365 - (1972-1600)/4 + 3 + 366) + assert_equal(np.array('1900', dtype='M8[D]').astype('i8'), + (1900-1970)*365 - (1970-1900)//4) + assert_equal(np.array('1901', dtype='M8[D]').astype('i8'), + (1900-1970)*365 - (1970-1900)//4 + 365) + assert_equal(np.array('1967', dtype='M8[D]').astype('i8'), -3*365 - 1) + assert_equal(np.array('1968', dtype='M8[D]').astype('i8'), -2*365 - 1) + assert_equal(np.array('1969', dtype='M8[D]').astype('i8'), -1*365) + assert_equal(np.array('1970', dtype='M8[D]').astype('i8'), 0*365) + assert_equal(np.array('1971', dtype='M8[D]').astype('i8'), 1*365) + assert_equal(np.array('1972', dtype='M8[D]').astype('i8'), 2*365) + assert_equal(np.array('1973', dtype='M8[D]').astype('i8'), 3*365 + 1) + assert_equal(np.array('1974', dtype='M8[D]').astype('i8'), 4*365 + 1) + assert_equal(np.array('2000', dtype='M8[D]').astype('i8'), + (2000 - 1970)*365 + (2000 - 1972)//4) + assert_equal(np.array('2001', dtype='M8[D]').astype('i8'), + (2000 - 1970)*365 + (2000 - 1972)//4 + 366) + assert_equal(np.array('2400', dtype='M8[D]').astype('i8'), + (2400 - 1970)*365 + (2400 - 1972)//4 - 3) + assert_equal(np.array('2401', dtype='M8[D]').astype('i8'), + (2400 - 1970)*365 + (2400 - 1972)//4 - 3 + 366) + + assert_equal(np.array('1600-02-29', dtype='M8[D]').astype('i8'), + (1600-1970)*365 - (1972-1600)//4 + 3 + 31 + 28) + assert_equal(np.array('1600-03-01', dtype='M8[D]').astype('i8'), + (1600-1970)*365 - (1972-1600)//4 + 3 + 31 + 29) + assert_equal(np.array('2000-02-29', dtype='M8[D]').astype('i8'), + (2000 - 1970)*365 + (2000 - 1972)//4 + 31 + 28) + assert_equal(np.array('2000-03-01', dtype='M8[D]').astype('i8'), + (2000 - 1970)*365 + (2000 - 1972)//4 + 31 + 29) + assert_equal(np.array('2001-03-22', dtype='M8[D]').astype('i8'), + (2000 - 1970)*365 + (2000 - 1972)//4 + 366 + 31 + 28 + 21) + + def test_days_to_pydate(self): + assert_equal(np.array('1599', dtype='M8[D]').astype('O'), + datetime.date(1599, 1, 1)) + assert_equal(np.array('1600', dtype='M8[D]').astype('O'), + datetime.date(1600, 1, 1)) + assert_equal(np.array('1601', dtype='M8[D]').astype('O'), + datetime.date(1601, 1, 1)) + assert_equal(np.array('1900', dtype='M8[D]').astype('O'), + datetime.date(1900, 1, 1)) + assert_equal(np.array('1901', dtype='M8[D]').astype('O'), + datetime.date(1901, 1, 1)) + assert_equal(np.array('2000', dtype='M8[D]').astype('O'), + datetime.date(2000, 1, 1)) + assert_equal(np.array('2001', dtype='M8[D]').astype('O'), + datetime.date(2001, 1, 1)) + assert_equal(np.array('1600-02-29', dtype='M8[D]').astype('O'), + datetime.date(1600, 2, 29)) + assert_equal(np.array('1600-03-01', dtype='M8[D]').astype('O'), + datetime.date(1600, 3, 1)) + assert_equal(np.array('2001-03-22', dtype='M8[D]').astype('O'), + datetime.date(2001, 3, 22)) + + def test_dtype_comparison(self): + assert_(not (np.dtype('M8[us]') == np.dtype('M8[ms]'))) + assert_(np.dtype('M8[us]') != np.dtype('M8[ms]')) + assert_(np.dtype('M8[2D]') != np.dtype('M8[D]')) + assert_(np.dtype('M8[D]') != np.dtype('M8[2D]')) + + def test_pydatetime_creation(self): + a = np.array(['1960-03-12', datetime.date(1960, 3, 12)], dtype='M8[D]') + assert_equal(a[0], a[1]) + a = np.array(['1999-12-31', datetime.date(1999, 12, 31)], dtype='M8[D]') + assert_equal(a[0], a[1]) + a = np.array(['2000-01-01', datetime.date(2000, 1, 1)], dtype='M8[D]') + assert_equal(a[0], a[1]) + # Will fail if the date changes during the exact right moment + a = np.array(['today', datetime.date.today()], dtype='M8[D]') + assert_equal(a[0], a[1]) + # datetime.datetime.now() returns local time, not UTC + #a = np.array(['now', datetime.datetime.now()], dtype='M8[s]') + #assert_equal(a[0], a[1]) + + # we can give a datetime.date time units + assert_equal(np.array(datetime.date(1960, 3, 12), dtype='M8[s]'), + np.array(np.datetime64('1960-03-12T00:00:00'))) + + def test_datetime_string_conversion(self): + a = ['2011-03-16', '1920-01-01', '2013-05-19'] + str_a = np.array(a, dtype='S') + uni_a = np.array(a, dtype='U') + dt_a = np.array(a, dtype='M') + + # String to datetime + assert_equal(dt_a, str_a.astype('M')) + assert_equal(dt_a.dtype, str_a.astype('M').dtype) + dt_b = np.empty_like(dt_a) + dt_b[...] = str_a + assert_equal(dt_a, dt_b) + + # Datetime to string + assert_equal(str_a, dt_a.astype('S0')) + str_b = np.empty_like(str_a) + str_b[...] = dt_a + assert_equal(str_a, str_b) + + # Unicode to datetime + assert_equal(dt_a, uni_a.astype('M')) + assert_equal(dt_a.dtype, uni_a.astype('M').dtype) + dt_b = np.empty_like(dt_a) + dt_b[...] = uni_a + assert_equal(dt_a, dt_b) + + # Datetime to unicode + assert_equal(uni_a, dt_a.astype('U')) + uni_b = np.empty_like(uni_a) + uni_b[...] = dt_a + assert_equal(uni_a, uni_b) + + # Datetime to long string - gh-9712 + assert_equal(str_a, dt_a.astype((np.string_, 128))) + str_b = np.empty(str_a.shape, dtype=(np.string_, 128)) + str_b[...] = dt_a + assert_equal(str_a, str_b) + + @pytest.mark.parametrize("time_dtype", ["m8[D]", "M8[Y]"]) + def test_time_byteswapping(self, time_dtype): + times = np.array(["2017", "NaT"], dtype=time_dtype) + times_swapped = times.astype(times.dtype.newbyteorder()) + assert_array_equal(times, times_swapped) + + unswapped = times_swapped.view(np.int64).newbyteorder() + assert_array_equal(unswapped, times.view(np.int64)) + + @pytest.mark.parametrize(["time1", "time2"], + [("M8[s]", "M8[D]"), ("m8[s]", "m8[ns]")]) + def test_time_byteswapped_cast(self, time1, time2): + dtype1 = np.dtype(time1) + dtype2 = np.dtype(time2) + times = np.array(["2017", "NaT"], dtype=dtype1) + expected = times.astype(dtype2) + + # Test that every byte-swapping combination also returns the same + # results (previous tests check that this comparison works fine). + res = times.astype(dtype1.newbyteorder()).astype(dtype2) + assert_array_equal(res, expected) + res = times.astype(dtype2.newbyteorder()) + assert_array_equal(res, expected) + res = times.astype(dtype1.newbyteorder()).astype(dtype2.newbyteorder()) + assert_array_equal(res, expected) + + @pytest.mark.parametrize("time_dtype", ["m8[D]", "M8[Y]"]) + @pytest.mark.parametrize("str_dtype", ["U", "S"]) + def test_datetime_conversions_byteorders(self, str_dtype, time_dtype): + times = np.array(["2017", "NaT"], dtype=time_dtype) + # Unfortunately, timedelta does not roundtrip: + from_strings = np.array(["2017", "NaT"], dtype=str_dtype) + to_strings = times.astype(str_dtype) # assume this is correct + + # Check that conversion from times to string works if src is swapped: + times_swapped = times.astype(times.dtype.newbyteorder()) + res = times_swapped.astype(str_dtype) + assert_array_equal(res, to_strings) + # And also if both are swapped: + res = times_swapped.astype(to_strings.dtype.newbyteorder()) + assert_array_equal(res, to_strings) + # only destination is swapped: + res = times.astype(to_strings.dtype.newbyteorder()) + assert_array_equal(res, to_strings) + + # Check that conversion from string to times works if src is swapped: + from_strings_swapped = from_strings.astype( + from_strings.dtype.newbyteorder()) + res = from_strings_swapped.astype(time_dtype) + assert_array_equal(res, times) + # And if both are swapped: + res = from_strings_swapped.astype(times.dtype.newbyteorder()) + assert_array_equal(res, times) + # Only destination is swapped: + res = from_strings.astype(times.dtype.newbyteorder()) + assert_array_equal(res, times) + + def test_datetime_array_str(self): + a = np.array(['2011-03-16', '1920-01-01', '2013-05-19'], dtype='M') + assert_equal(str(a), "['2011-03-16' '1920-01-01' '2013-05-19']") + + a = np.array(['2011-03-16T13:55', '1920-01-01T03:12'], dtype='M') + assert_equal(np.array2string(a, separator=', ', + formatter={'datetime': lambda x: + "'%s'" % np.datetime_as_string(x, timezone='UTC')}), + "['2011-03-16T13:55Z', '1920-01-01T03:12Z']") + + # Check that one NaT doesn't corrupt subsequent entries + a = np.array(['2010', 'NaT', '2030']).astype('M') + assert_equal(str(a), "['2010' 'NaT' '2030']") + + def test_timedelta_array_str(self): + a = np.array([-1, 0, 100], dtype='m') + assert_equal(str(a), "[ -1 0 100]") + a = np.array(['NaT', 'NaT'], dtype='m') + assert_equal(str(a), "['NaT' 'NaT']") + # Check right-alignment with NaTs + a = np.array([-1, 'NaT', 0], dtype='m') + assert_equal(str(a), "[ -1 'NaT' 0]") + a = np.array([-1, 'NaT', 1234567], dtype='m') + assert_equal(str(a), "[ -1 'NaT' 1234567]") + + # Test with other byteorder: + a = np.array([-1, 'NaT', 1234567], dtype='>m') + assert_equal(str(a), "[ -1 'NaT' 1234567]") + a = np.array([-1, 'NaT', 1234567], dtype=''\np4\nNNNI-1\nI-1\nI0\n((dp5\n(S'us'\np6\n" + \ + b"I1\nI1\nI1\ntp7\ntp8\ntp9\nb." + assert_equal(pickle.loads(pkl), np.dtype('>M8[us]')) + + def test_setstate(self): + "Verify that datetime dtype __setstate__ can handle bad arguments" + dt = np.dtype('>M8[us]') + assert_raises(ValueError, dt.__setstate__, (4, '>', None, None, None, -1, -1, 0, 1)) + assert_(dt.__reduce__()[2] == np.dtype('>M8[us]').__reduce__()[2]) + assert_raises(TypeError, dt.__setstate__, (4, '>', None, None, None, -1, -1, 0, ({}, 'xxx'))) + assert_(dt.__reduce__()[2] == np.dtype('>M8[us]').__reduce__()[2]) + + def test_dtype_promotion(self): + # datetime datetime computes the metadata gcd + # timedelta timedelta computes the metadata gcd + for mM in ['m', 'M']: + assert_equal( + np.promote_types(np.dtype(mM+'8[2Y]'), np.dtype(mM+'8[2Y]')), + np.dtype(mM+'8[2Y]')) + assert_equal( + np.promote_types(np.dtype(mM+'8[12Y]'), np.dtype(mM+'8[15Y]')), + np.dtype(mM+'8[3Y]')) + assert_equal( + np.promote_types(np.dtype(mM+'8[62M]'), np.dtype(mM+'8[24M]')), + np.dtype(mM+'8[2M]')) + assert_equal( + np.promote_types(np.dtype(mM+'8[1W]'), np.dtype(mM+'8[2D]')), + np.dtype(mM+'8[1D]')) + assert_equal( + np.promote_types(np.dtype(mM+'8[W]'), np.dtype(mM+'8[13s]')), + np.dtype(mM+'8[s]')) + assert_equal( + np.promote_types(np.dtype(mM+'8[13W]'), np.dtype(mM+'8[49s]')), + np.dtype(mM+'8[7s]')) + # timedelta timedelta raises when there is no reasonable gcd + assert_raises(TypeError, np.promote_types, + np.dtype('m8[Y]'), np.dtype('m8[D]')) + assert_raises(TypeError, np.promote_types, + np.dtype('m8[M]'), np.dtype('m8[W]')) + # timedelta and float cannot be safely cast with each other + assert_raises(TypeError, np.promote_types, "float32", "m8") + assert_raises(TypeError, np.promote_types, "m8", "float32") + assert_raises(TypeError, np.promote_types, "uint64", "m8") + assert_raises(TypeError, np.promote_types, "m8", "uint64") + + # timedelta timedelta may overflow with big unit ranges + assert_raises(OverflowError, np.promote_types, + np.dtype('m8[W]'), np.dtype('m8[fs]')) + assert_raises(OverflowError, np.promote_types, + np.dtype('m8[s]'), np.dtype('m8[as]')) + + def test_cast_overflow(self): + # gh-4486 + def cast(): + numpy.datetime64("1971-01-01 00:00:00.000000000000000").astype("datetime64[%s]', + 'timedelta64[%s]']) + def test_isfinite_isinf_isnan_units(self, unit, dstr): + '''check isfinite, isinf, isnan for all units of M, m dtypes + ''' + arr_val = [123, -321, "NaT"] + arr = np.array(arr_val, dtype= dstr % unit) + pos = np.array([True, True, False]) + neg = np.array([False, False, True]) + false = np.array([False, False, False]) + assert_equal(np.isfinite(arr), pos) + assert_equal(np.isinf(arr), false) + assert_equal(np.isnan(arr), neg) + + def test_assert_equal(self): + assert_raises(AssertionError, assert_equal, + np.datetime64('nat'), np.timedelta64('nat')) + + def test_corecursive_input(self): + # construct a co-recursive list + a, b = [], [] + a.append(b) + b.append(a) + obj_arr = np.array([None]) + obj_arr[0] = a + + # At some point this caused a stack overflow (gh-11154). Now raises + # ValueError since the nested list cannot be converted to a datetime. + assert_raises(ValueError, obj_arr.astype, 'M8') + assert_raises(ValueError, obj_arr.astype, 'm8') + + @pytest.mark.parametrize("shape", [(), (1,)]) + def test_discovery_from_object_array(self, shape): + arr = np.array("2020-10-10", dtype=object).reshape(shape) + res = np.array("2020-10-10", dtype="M8").reshape(shape) + assert res.dtype == np.dtype("M8[D]") + assert_equal(arr.astype("M8"), res) + arr[...] = np.bytes_("2020-10-10") # try a numpy string type + assert_equal(arr.astype("M8"), res) + arr = arr.astype("S") + assert_equal(arr.astype("S").astype("M8"), res) + + @pytest.mark.parametrize("time_unit", [ + "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "ns", "ps", "fs", "as", + # compound units + "10D", "2M", + ]) + def test_limit_symmetry(self, time_unit): + """ + Dates should have symmetric limits around the unix epoch at +/-np.int64 + """ + epoch = np.datetime64(0, time_unit) + latest = np.datetime64(np.iinfo(np.int64).max, time_unit) + earliest = np.datetime64(-np.iinfo(np.int64).max, time_unit) + + # above should not have overflowed + assert earliest < epoch < latest + + @pytest.mark.parametrize("time_unit", [ + "Y", "M", + pytest.param("W", marks=pytest.mark.xfail(reason="gh-13197")), + "D", "h", "m", + "s", "ms", "us", "ns", "ps", "fs", "as", + pytest.param("10D", marks=pytest.mark.xfail(reason="similar to gh-13197")), + ]) + @pytest.mark.parametrize("sign", [-1, 1]) + def test_limit_str_roundtrip(self, time_unit, sign): + """ + Limits should roundtrip when converted to strings. + + This tests the conversion to and from npy_datetimestruct. + """ + # TODO: add absolute (gold standard) time span limit strings + limit = np.datetime64(np.iinfo(np.int64).max * sign, time_unit) + + # Convert to string and back. Explicit unit needed since the day and + # week reprs are not distinguishable. + limit_via_str = np.datetime64(str(limit), time_unit) + assert limit_via_str == limit + + +class TestDateTimeData: + + def test_basic(self): + a = np.array(['1980-03-23'], dtype=np.datetime64) + assert_equal(np.datetime_data(a.dtype), ('D', 1)) + + def test_bytes(self): + # byte units are converted to unicode + dt = np.datetime64('2000', (b'ms', 5)) + assert np.datetime_data(dt.dtype) == ('ms', 5) + + dt = np.datetime64('2000', b'5ms') + assert np.datetime_data(dt.dtype) == ('ms', 5) + + def test_non_ascii(self): + # μs is normalized to μ + dt = np.datetime64('2000', ('μs', 5)) + assert np.datetime_data(dt.dtype) == ('us', 5) + + dt = np.datetime64('2000', '5μs') + assert np.datetime_data(dt.dtype) == ('us', 5) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_defchararray.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_defchararray.py new file mode 100644 index 0000000..59fc547 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_defchararray.py @@ -0,0 +1,673 @@ + +import numpy as np +from numpy.core.multiarray import _vec_string +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_raises, + assert_raises_regex + ) + +kw_unicode_true = {'unicode': True} # make 2to3 work properly +kw_unicode_false = {'unicode': False} + +class TestBasic: + def test_from_object_array(self): + A = np.array([['abc', 2], + ['long ', '0123456789']], dtype='O') + B = np.char.array(A) + assert_equal(B.dtype.itemsize, 10) + assert_array_equal(B, [[b'abc', b'2'], + [b'long', b'0123456789']]) + + def test_from_object_array_unicode(self): + A = np.array([['abc', u'Sigma \u03a3'], + ['long ', '0123456789']], dtype='O') + assert_raises(ValueError, np.char.array, (A,)) + B = np.char.array(A, **kw_unicode_true) + assert_equal(B.dtype.itemsize, 10 * np.array('a', 'U').dtype.itemsize) + assert_array_equal(B, [['abc', u'Sigma \u03a3'], + ['long', '0123456789']]) + + def test_from_string_array(self): + A = np.array([[b'abc', b'foo'], + [b'long ', b'0123456789']]) + assert_equal(A.dtype.type, np.string_) + B = np.char.array(A) + assert_array_equal(B, A) + assert_equal(B.dtype, A.dtype) + assert_equal(B.shape, A.shape) + B[0, 0] = 'changed' + assert_(B[0, 0] != A[0, 0]) + C = np.char.asarray(A) + assert_array_equal(C, A) + assert_equal(C.dtype, A.dtype) + C[0, 0] = 'changed again' + assert_(C[0, 0] != B[0, 0]) + assert_(C[0, 0] == A[0, 0]) + + def test_from_unicode_array(self): + A = np.array([['abc', u'Sigma \u03a3'], + ['long ', '0123456789']]) + assert_equal(A.dtype.type, np.unicode_) + B = np.char.array(A) + assert_array_equal(B, A) + assert_equal(B.dtype, A.dtype) + assert_equal(B.shape, A.shape) + B = np.char.array(A, **kw_unicode_true) + assert_array_equal(B, A) + assert_equal(B.dtype, A.dtype) + assert_equal(B.shape, A.shape) + + def fail(): + np.char.array(A, **kw_unicode_false) + + assert_raises(UnicodeEncodeError, fail) + + def test_unicode_upconvert(self): + A = np.char.array(['abc']) + B = np.char.array([u'\u03a3']) + assert_(issubclass((A + B).dtype.type, np.unicode_)) + + def test_from_string(self): + A = np.char.array(b'abc') + assert_equal(len(A), 1) + assert_equal(len(A[0]), 3) + assert_(issubclass(A.dtype.type, np.string_)) + + def test_from_unicode(self): + A = np.char.array(u'\u03a3') + assert_equal(len(A), 1) + assert_equal(len(A[0]), 1) + assert_equal(A.itemsize, 4) + assert_(issubclass(A.dtype.type, np.unicode_)) + +class TestVecString: + def test_non_existent_method(self): + + def fail(): + _vec_string('a', np.string_, 'bogus') + + assert_raises(AttributeError, fail) + + def test_non_string_array(self): + + def fail(): + _vec_string(1, np.string_, 'strip') + + assert_raises(TypeError, fail) + + def test_invalid_args_tuple(self): + + def fail(): + _vec_string(['a'], np.string_, 'strip', 1) + + assert_raises(TypeError, fail) + + def test_invalid_type_descr(self): + + def fail(): + _vec_string(['a'], 'BOGUS', 'strip') + + assert_raises(TypeError, fail) + + def test_invalid_function_args(self): + + def fail(): + _vec_string(['a'], np.string_, 'strip', (1,)) + + assert_raises(TypeError, fail) + + def test_invalid_result_type(self): + + def fail(): + _vec_string(['a'], np.int_, 'strip') + + assert_raises(TypeError, fail) + + def test_broadcast_error(self): + + def fail(): + _vec_string([['abc', 'def']], np.int_, 'find', (['a', 'd', 'j'],)) + + assert_raises(ValueError, fail) + + +class TestWhitespace: + def setup(self): + self.A = np.array([['abc ', '123 '], + ['789 ', 'xyz ']]).view(np.chararray) + self.B = np.array([['abc', '123'], + ['789', 'xyz']]).view(np.chararray) + + def test1(self): + assert_(np.all(self.A == self.B)) + assert_(np.all(self.A >= self.B)) + assert_(np.all(self.A <= self.B)) + assert_(not np.any(self.A > self.B)) + assert_(not np.any(self.A < self.B)) + assert_(not np.any(self.A != self.B)) + +class TestChar: + def setup(self): + self.A = np.array('abc1', dtype='c').view(np.chararray) + + def test_it(self): + assert_equal(self.A.shape, (4,)) + assert_equal(self.A.upper()[:2].tobytes(), b'AB') + +class TestComparisons: + def setup(self): + self.A = np.array([['abc', '123'], + ['789', 'xyz']]).view(np.chararray) + self.B = np.array([['efg', '123 '], + ['051', 'tuv']]).view(np.chararray) + + def test_not_equal(self): + assert_array_equal((self.A != self.B), [[True, False], [True, True]]) + + def test_equal(self): + assert_array_equal((self.A == self.B), [[False, True], [False, False]]) + + def test_greater_equal(self): + assert_array_equal((self.A >= self.B), [[False, True], [True, True]]) + + def test_less_equal(self): + assert_array_equal((self.A <= self.B), [[True, True], [False, False]]) + + def test_greater(self): + assert_array_equal((self.A > self.B), [[False, False], [True, True]]) + + def test_less(self): + assert_array_equal((self.A < self.B), [[True, False], [False, False]]) + + def test_type(self): + out1 = np.char.equal(self.A, self.B) + out2 = np.char.equal('a', 'a') + assert_(isinstance(out1, np.ndarray)) + assert_(isinstance(out2, np.ndarray)) + +class TestComparisonsMixed1(TestComparisons): + """Ticket #1276""" + + def setup(self): + TestComparisons.setup(self) + self.B = np.array([['efg', '123 '], + ['051', 'tuv']], np.unicode_).view(np.chararray) + +class TestComparisonsMixed2(TestComparisons): + """Ticket #1276""" + + def setup(self): + TestComparisons.setup(self) + self.A = np.array([['abc', '123'], + ['789', 'xyz']], np.unicode_).view(np.chararray) + +class TestInformation: + def setup(self): + self.A = np.array([[' abc ', ''], + ['12345', 'MixedCase'], + ['123 \t 345 \0 ', 'UPPER']]).view(np.chararray) + self.B = np.array([[u' \u03a3 ', u''], + [u'12345', u'MixedCase'], + [u'123 \t 345 \0 ', u'UPPER']]).view(np.chararray) + + def test_len(self): + assert_(issubclass(np.char.str_len(self.A).dtype.type, np.integer)) + assert_array_equal(np.char.str_len(self.A), [[5, 0], [5, 9], [12, 5]]) + assert_array_equal(np.char.str_len(self.B), [[3, 0], [5, 9], [12, 5]]) + + def test_count(self): + assert_(issubclass(self.A.count('').dtype.type, np.integer)) + assert_array_equal(self.A.count('a'), [[1, 0], [0, 1], [0, 0]]) + assert_array_equal(self.A.count('123'), [[0, 0], [1, 0], [1, 0]]) + # Python doesn't seem to like counting NULL characters + # assert_array_equal(self.A.count('\0'), [[0, 0], [0, 0], [1, 0]]) + assert_array_equal(self.A.count('a', 0, 2), [[1, 0], [0, 0], [0, 0]]) + assert_array_equal(self.B.count('a'), [[0, 0], [0, 1], [0, 0]]) + assert_array_equal(self.B.count('123'), [[0, 0], [1, 0], [1, 0]]) + # assert_array_equal(self.B.count('\0'), [[0, 0], [0, 0], [1, 0]]) + + def test_endswith(self): + assert_(issubclass(self.A.endswith('').dtype.type, np.bool_)) + assert_array_equal(self.A.endswith(' '), [[1, 0], [0, 0], [1, 0]]) + assert_array_equal(self.A.endswith('3', 0, 3), [[0, 0], [1, 0], [1, 0]]) + + def fail(): + self.A.endswith('3', 'fdjk') + + assert_raises(TypeError, fail) + + def test_find(self): + assert_(issubclass(self.A.find('a').dtype.type, np.integer)) + assert_array_equal(self.A.find('a'), [[1, -1], [-1, 6], [-1, -1]]) + assert_array_equal(self.A.find('3'), [[-1, -1], [2, -1], [2, -1]]) + assert_array_equal(self.A.find('a', 0, 2), [[1, -1], [-1, -1], [-1, -1]]) + assert_array_equal(self.A.find(['1', 'P']), [[-1, -1], [0, -1], [0, 1]]) + + def test_index(self): + + def fail(): + self.A.index('a') + + assert_raises(ValueError, fail) + assert_(np.char.index('abcba', 'b') == 1) + assert_(issubclass(np.char.index('abcba', 'b').dtype.type, np.integer)) + + def test_isalnum(self): + assert_(issubclass(self.A.isalnum().dtype.type, np.bool_)) + assert_array_equal(self.A.isalnum(), [[False, False], [True, True], [False, True]]) + + def test_isalpha(self): + assert_(issubclass(self.A.isalpha().dtype.type, np.bool_)) + assert_array_equal(self.A.isalpha(), [[False, False], [False, True], [False, True]]) + + def test_isdigit(self): + assert_(issubclass(self.A.isdigit().dtype.type, np.bool_)) + assert_array_equal(self.A.isdigit(), [[False, False], [True, False], [False, False]]) + + def test_islower(self): + assert_(issubclass(self.A.islower().dtype.type, np.bool_)) + assert_array_equal(self.A.islower(), [[True, False], [False, False], [False, False]]) + + def test_isspace(self): + assert_(issubclass(self.A.isspace().dtype.type, np.bool_)) + assert_array_equal(self.A.isspace(), [[False, False], [False, False], [False, False]]) + + def test_istitle(self): + assert_(issubclass(self.A.istitle().dtype.type, np.bool_)) + assert_array_equal(self.A.istitle(), [[False, False], [False, False], [False, False]]) + + def test_isupper(self): + assert_(issubclass(self.A.isupper().dtype.type, np.bool_)) + assert_array_equal(self.A.isupper(), [[False, False], [False, False], [False, True]]) + + def test_rfind(self): + assert_(issubclass(self.A.rfind('a').dtype.type, np.integer)) + assert_array_equal(self.A.rfind('a'), [[1, -1], [-1, 6], [-1, -1]]) + assert_array_equal(self.A.rfind('3'), [[-1, -1], [2, -1], [6, -1]]) + assert_array_equal(self.A.rfind('a', 0, 2), [[1, -1], [-1, -1], [-1, -1]]) + assert_array_equal(self.A.rfind(['1', 'P']), [[-1, -1], [0, -1], [0, 2]]) + + def test_rindex(self): + + def fail(): + self.A.rindex('a') + + assert_raises(ValueError, fail) + assert_(np.char.rindex('abcba', 'b') == 3) + assert_(issubclass(np.char.rindex('abcba', 'b').dtype.type, np.integer)) + + def test_startswith(self): + assert_(issubclass(self.A.startswith('').dtype.type, np.bool_)) + assert_array_equal(self.A.startswith(' '), [[1, 0], [0, 0], [0, 0]]) + assert_array_equal(self.A.startswith('1', 0, 3), [[0, 0], [1, 0], [1, 0]]) + + def fail(): + self.A.startswith('3', 'fdjk') + + assert_raises(TypeError, fail) + + +class TestMethods: + def setup(self): + self.A = np.array([[' abc ', ''], + ['12345', 'MixedCase'], + ['123 \t 345 \0 ', 'UPPER']], + dtype='S').view(np.chararray) + self.B = np.array([[u' \u03a3 ', u''], + [u'12345', u'MixedCase'], + [u'123 \t 345 \0 ', u'UPPER']]).view(np.chararray) + + def test_capitalize(self): + tgt = [[b' abc ', b''], + [b'12345', b'Mixedcase'], + [b'123 \t 345 \0 ', b'Upper']] + assert_(issubclass(self.A.capitalize().dtype.type, np.string_)) + assert_array_equal(self.A.capitalize(), tgt) + + tgt = [[u' \u03c3 ', ''], + ['12345', 'Mixedcase'], + ['123 \t 345 \0 ', 'Upper']] + assert_(issubclass(self.B.capitalize().dtype.type, np.unicode_)) + assert_array_equal(self.B.capitalize(), tgt) + + def test_center(self): + assert_(issubclass(self.A.center(10).dtype.type, np.string_)) + C = self.A.center([10, 20]) + assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]]) + + C = self.A.center(20, b'#') + assert_(np.all(C.startswith(b'#'))) + assert_(np.all(C.endswith(b'#'))) + + C = np.char.center(b'FOO', [[10, 20], [15, 8]]) + tgt = [[b' FOO ', b' FOO '], + [b' FOO ', b' FOO ']] + assert_(issubclass(C.dtype.type, np.string_)) + assert_array_equal(C, tgt) + + def test_decode(self): + A = np.char.array([b'\\u03a3']) + assert_(A.decode('unicode-escape')[0] == '\u03a3') + + def test_encode(self): + B = self.B.encode('unicode_escape') + assert_(B[0][0] == str(' \\u03a3 ').encode('latin1')) + + def test_expandtabs(self): + T = self.A.expandtabs() + assert_(T[2, 0] == b'123 345 \0') + + def test_join(self): + # NOTE: list(b'123') == [49, 50, 51] + # so that b','.join(b'123') results to an error on Py3 + A0 = self.A.decode('ascii') + + A = np.char.join([',', '#'], A0) + assert_(issubclass(A.dtype.type, np.unicode_)) + tgt = np.array([[' ,a,b,c, ', ''], + ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'], + ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']]) + assert_array_equal(np.char.join([',', '#'], A0), tgt) + + def test_ljust(self): + assert_(issubclass(self.A.ljust(10).dtype.type, np.string_)) + + C = self.A.ljust([10, 20]) + assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]]) + + C = self.A.ljust(20, b'#') + assert_array_equal(C.startswith(b'#'), [ + [False, True], [False, False], [False, False]]) + assert_(np.all(C.endswith(b'#'))) + + C = np.char.ljust(b'FOO', [[10, 20], [15, 8]]) + tgt = [[b'FOO ', b'FOO '], + [b'FOO ', b'FOO ']] + assert_(issubclass(C.dtype.type, np.string_)) + assert_array_equal(C, tgt) + + def test_lower(self): + tgt = [[b' abc ', b''], + [b'12345', b'mixedcase'], + [b'123 \t 345 \0 ', b'upper']] + assert_(issubclass(self.A.lower().dtype.type, np.string_)) + assert_array_equal(self.A.lower(), tgt) + + tgt = [[u' \u03c3 ', u''], + [u'12345', u'mixedcase'], + [u'123 \t 345 \0 ', u'upper']] + assert_(issubclass(self.B.lower().dtype.type, np.unicode_)) + assert_array_equal(self.B.lower(), tgt) + + def test_lstrip(self): + tgt = [[b'abc ', b''], + [b'12345', b'MixedCase'], + [b'123 \t 345 \0 ', b'UPPER']] + assert_(issubclass(self.A.lstrip().dtype.type, np.string_)) + assert_array_equal(self.A.lstrip(), tgt) + + tgt = [[b' abc', b''], + [b'2345', b'ixedCase'], + [b'23 \t 345 \x00', b'UPPER']] + assert_array_equal(self.A.lstrip([b'1', b'M']), tgt) + + tgt = [[u'\u03a3 ', ''], + ['12345', 'MixedCase'], + ['123 \t 345 \0 ', 'UPPER']] + assert_(issubclass(self.B.lstrip().dtype.type, np.unicode_)) + assert_array_equal(self.B.lstrip(), tgt) + + def test_partition(self): + P = self.A.partition([b'3', b'M']) + tgt = [[(b' abc ', b'', b''), (b'', b'', b'')], + [(b'12', b'3', b'45'), (b'', b'M', b'ixedCase')], + [(b'12', b'3', b' \t 345 \0 '), (b'UPPER', b'', b'')]] + assert_(issubclass(P.dtype.type, np.string_)) + assert_array_equal(P, tgt) + + def test_replace(self): + R = self.A.replace([b'3', b'a'], + [b'##########', b'@']) + tgt = [[b' abc ', b''], + [b'12##########45', b'MixedC@se'], + [b'12########## \t ##########45 \x00', b'UPPER']] + assert_(issubclass(R.dtype.type, np.string_)) + assert_array_equal(R, tgt) + + def test_rjust(self): + assert_(issubclass(self.A.rjust(10).dtype.type, np.string_)) + + C = self.A.rjust([10, 20]) + assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]]) + + C = self.A.rjust(20, b'#') + assert_(np.all(C.startswith(b'#'))) + assert_array_equal(C.endswith(b'#'), + [[False, True], [False, False], [False, False]]) + + C = np.char.rjust(b'FOO', [[10, 20], [15, 8]]) + tgt = [[b' FOO', b' FOO'], + [b' FOO', b' FOO']] + assert_(issubclass(C.dtype.type, np.string_)) + assert_array_equal(C, tgt) + + def test_rpartition(self): + P = self.A.rpartition([b'3', b'M']) + tgt = [[(b'', b'', b' abc '), (b'', b'', b'')], + [(b'12', b'3', b'45'), (b'', b'M', b'ixedCase')], + [(b'123 \t ', b'3', b'45 \0 '), (b'', b'', b'UPPER')]] + assert_(issubclass(P.dtype.type, np.string_)) + assert_array_equal(P, tgt) + + def test_rsplit(self): + A = self.A.rsplit(b'3') + tgt = [[[b' abc '], [b'']], + [[b'12', b'45'], [b'MixedCase']], + [[b'12', b' \t ', b'45 \x00 '], [b'UPPER']]] + assert_(issubclass(A.dtype.type, np.object_)) + assert_equal(A.tolist(), tgt) + + def test_rstrip(self): + assert_(issubclass(self.A.rstrip().dtype.type, np.string_)) + + tgt = [[b' abc', b''], + [b'12345', b'MixedCase'], + [b'123 \t 345', b'UPPER']] + assert_array_equal(self.A.rstrip(), tgt) + + tgt = [[b' abc ', b''], + [b'1234', b'MixedCase'], + [b'123 \t 345 \x00', b'UPP'] + ] + assert_array_equal(self.A.rstrip([b'5', b'ER']), tgt) + + tgt = [[u' \u03a3', ''], + ['12345', 'MixedCase'], + ['123 \t 345', 'UPPER']] + assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_)) + assert_array_equal(self.B.rstrip(), tgt) + + def test_strip(self): + tgt = [[b'abc', b''], + [b'12345', b'MixedCase'], + [b'123 \t 345', b'UPPER']] + assert_(issubclass(self.A.strip().dtype.type, np.string_)) + assert_array_equal(self.A.strip(), tgt) + + tgt = [[b' abc ', b''], + [b'234', b'ixedCas'], + [b'23 \t 345 \x00', b'UPP']] + assert_array_equal(self.A.strip([b'15', b'EReM']), tgt) + + tgt = [[u'\u03a3', ''], + ['12345', 'MixedCase'], + ['123 \t 345', 'UPPER']] + assert_(issubclass(self.B.strip().dtype.type, np.unicode_)) + assert_array_equal(self.B.strip(), tgt) + + def test_split(self): + A = self.A.split(b'3') + tgt = [ + [[b' abc '], [b'']], + [[b'12', b'45'], [b'MixedCase']], + [[b'12', b' \t ', b'45 \x00 '], [b'UPPER']]] + assert_(issubclass(A.dtype.type, np.object_)) + assert_equal(A.tolist(), tgt) + + def test_splitlines(self): + A = np.char.array(['abc\nfds\nwer']).splitlines() + assert_(issubclass(A.dtype.type, np.object_)) + assert_(A.shape == (1,)) + assert_(len(A[0]) == 3) + + def test_swapcase(self): + tgt = [[b' ABC ', b''], + [b'12345', b'mIXEDcASE'], + [b'123 \t 345 \0 ', b'upper']] + assert_(issubclass(self.A.swapcase().dtype.type, np.string_)) + assert_array_equal(self.A.swapcase(), tgt) + + tgt = [[u' \u03c3 ', u''], + [u'12345', u'mIXEDcASE'], + [u'123 \t 345 \0 ', u'upper']] + assert_(issubclass(self.B.swapcase().dtype.type, np.unicode_)) + assert_array_equal(self.B.swapcase(), tgt) + + def test_title(self): + tgt = [[b' Abc ', b''], + [b'12345', b'Mixedcase'], + [b'123 \t 345 \0 ', b'Upper']] + assert_(issubclass(self.A.title().dtype.type, np.string_)) + assert_array_equal(self.A.title(), tgt) + + tgt = [[u' \u03a3 ', u''], + [u'12345', u'Mixedcase'], + [u'123 \t 345 \0 ', u'Upper']] + assert_(issubclass(self.B.title().dtype.type, np.unicode_)) + assert_array_equal(self.B.title(), tgt) + + def test_upper(self): + tgt = [[b' ABC ', b''], + [b'12345', b'MIXEDCASE'], + [b'123 \t 345 \0 ', b'UPPER']] + assert_(issubclass(self.A.upper().dtype.type, np.string_)) + assert_array_equal(self.A.upper(), tgt) + + tgt = [[u' \u03a3 ', u''], + [u'12345', u'MIXEDCASE'], + [u'123 \t 345 \0 ', u'UPPER']] + assert_(issubclass(self.B.upper().dtype.type, np.unicode_)) + assert_array_equal(self.B.upper(), tgt) + + def test_isnumeric(self): + + def fail(): + self.A.isnumeric() + + assert_raises(TypeError, fail) + assert_(issubclass(self.B.isnumeric().dtype.type, np.bool_)) + assert_array_equal(self.B.isnumeric(), [ + [False, False], [True, False], [False, False]]) + + def test_isdecimal(self): + + def fail(): + self.A.isdecimal() + + assert_raises(TypeError, fail) + assert_(issubclass(self.B.isdecimal().dtype.type, np.bool_)) + assert_array_equal(self.B.isdecimal(), [ + [False, False], [True, False], [False, False]]) + + +class TestOperations: + def setup(self): + self.A = np.array([['abc', '123'], + ['789', 'xyz']]).view(np.chararray) + self.B = np.array([['efg', '456'], + ['051', 'tuv']]).view(np.chararray) + + def test_add(self): + AB = np.array([['abcefg', '123456'], + ['789051', 'xyztuv']]).view(np.chararray) + assert_array_equal(AB, (self.A + self.B)) + assert_(len((self.A + self.B)[0][0]) == 6) + + def test_radd(self): + QA = np.array([['qabc', 'q123'], + ['q789', 'qxyz']]).view(np.chararray) + assert_array_equal(QA, ('q' + self.A)) + + def test_mul(self): + A = self.A + for r in (2, 3, 5, 7, 197): + Ar = np.array([[A[0, 0]*r, A[0, 1]*r], + [A[1, 0]*r, A[1, 1]*r]]).view(np.chararray) + + assert_array_equal(Ar, (self.A * r)) + + for ob in [object(), 'qrs']: + with assert_raises_regex(ValueError, + 'Can only multiply by integers'): + A*ob + + def test_rmul(self): + A = self.A + for r in (2, 3, 5, 7, 197): + Ar = np.array([[A[0, 0]*r, A[0, 1]*r], + [A[1, 0]*r, A[1, 1]*r]]).view(np.chararray) + assert_array_equal(Ar, (r * self.A)) + + for ob in [object(), 'qrs']: + with assert_raises_regex(ValueError, + 'Can only multiply by integers'): + ob * A + + def test_mod(self): + """Ticket #856""" + F = np.array([['%d', '%f'], ['%s', '%r']]).view(np.chararray) + C = np.array([[3, 7], [19, 1]]) + FC = np.array([['3', '7.000000'], + ['19', '1']]).view(np.chararray) + assert_array_equal(FC, F % C) + + A = np.array([['%.3f', '%d'], ['%s', '%r']]).view(np.chararray) + A1 = np.array([['1.000', '1'], ['1', '1']]).view(np.chararray) + assert_array_equal(A1, (A % 1)) + + A2 = np.array([['1.000', '2'], ['3', '4']]).view(np.chararray) + assert_array_equal(A2, (A % [[1, 2], [3, 4]])) + + def test_rmod(self): + assert_(("%s" % self.A) == str(self.A)) + assert_(("%r" % self.A) == repr(self.A)) + + for ob in [42, object()]: + with assert_raises_regex( + TypeError, "unsupported operand type.* and 'chararray'"): + ob % self.A + + def test_slice(self): + """Regression test for https://github.com/numpy/numpy/issues/5982""" + + arr = np.array([['abc ', 'def '], ['geh ', 'ijk ']], + dtype='S4').view(np.chararray) + sl1 = arr[:] + assert_array_equal(sl1, arr) + assert_(sl1.base is arr) + assert_(sl1.base.base is arr.base) + + sl2 = arr[:, :] + assert_array_equal(sl2, arr) + assert_(sl2.base is arr) + assert_(sl2.base.base is arr.base) + + assert_(arr[0, 0] == b'abc') + + +def test_empty_indexing(): + """Regression test for ticket 1948.""" + # Check that indexing a chararray with an empty list/array returns an + # empty chararray instead of a chararray with a single empty string in it. + s = np.chararray((4,)) + assert_(s[[]].size == 0) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_deprecations.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_deprecations.py new file mode 100644 index 0000000..94583a5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_deprecations.py @@ -0,0 +1,1252 @@ +""" +Tests related to deprecation warnings. Also a convenient place +to document how deprecations should eventually be turned into errors. + +""" +import datetime +import operator +import warnings +import pytest +import tempfile +import re +import sys + +import numpy as np +from numpy.testing import ( + assert_raises, assert_warns, assert_, assert_array_equal, SkipTest, KnownFailureException + ) + +from numpy.core._multiarray_tests import fromstring_null_term_c_api + +try: + import pytz + _has_pytz = True +except ImportError: + _has_pytz = False + + +class _DeprecationTestCase: + # Just as warning: warnings uses re.match, so the start of this message + # must match. + message = '' + warning_cls = DeprecationWarning + + def setup(self): + self.warn_ctx = warnings.catch_warnings(record=True) + self.log = self.warn_ctx.__enter__() + + # Do *not* ignore other DeprecationWarnings. Ignoring warnings + # can give very confusing results because of + # https://bugs.python.org/issue4180 and it is probably simplest to + # try to keep the tests cleanly giving only the right warning type. + # (While checking them set to "error" those are ignored anyway) + # We still have them show up, because otherwise they would be raised + warnings.filterwarnings("always", category=self.warning_cls) + warnings.filterwarnings("always", message=self.message, + category=self.warning_cls) + + def teardown(self): + self.warn_ctx.__exit__() + + def assert_deprecated(self, function, num=1, ignore_others=False, + function_fails=False, + exceptions=np._NoValue, + args=(), kwargs={}): + """Test if DeprecationWarnings are given and raised. + + This first checks if the function when called gives `num` + DeprecationWarnings, after that it tries to raise these + DeprecationWarnings and compares them with `exceptions`. + The exceptions can be different for cases where this code path + is simply not anticipated and the exception is replaced. + + Parameters + ---------- + function : callable + The function to test + num : int + Number of DeprecationWarnings to expect. This should normally be 1. + ignore_others : bool + Whether warnings of the wrong type should be ignored (note that + the message is not checked) + function_fails : bool + If the function would normally fail, setting this will check for + warnings inside a try/except block. + exceptions : Exception or tuple of Exceptions + Exception to expect when turning the warnings into an error. + The default checks for DeprecationWarnings. If exceptions is + empty the function is expected to run successfully. + args : tuple + Arguments for `function` + kwargs : dict + Keyword arguments for `function` + """ + __tracebackhide__ = True # Hide traceback for py.test + + # reset the log + self.log[:] = [] + + if exceptions is np._NoValue: + exceptions = (self.warning_cls,) + + try: + function(*args, **kwargs) + except (Exception if function_fails else tuple()): + pass + + # just in case, clear the registry + num_found = 0 + for warning in self.log: + if warning.category is self.warning_cls: + num_found += 1 + elif not ignore_others: + raise AssertionError( + "expected %s but got: %s" % + (self.warning_cls.__name__, warning.category)) + if num is not None and num_found != num: + msg = "%i warnings found but %i expected." % (len(self.log), num) + lst = [str(w) for w in self.log] + raise AssertionError("\n".join([msg] + lst)) + + with warnings.catch_warnings(): + warnings.filterwarnings("error", message=self.message, + category=self.warning_cls) + try: + function(*args, **kwargs) + if exceptions != tuple(): + raise AssertionError( + "No error raised during function call") + except exceptions: + if exceptions == tuple(): + raise AssertionError( + "Error raised during function call") + + def assert_not_deprecated(self, function, args=(), kwargs={}): + """Test that warnings are not raised. + + This is just a shorthand for: + + self.assert_deprecated(function, num=0, ignore_others=True, + exceptions=tuple(), args=args, kwargs=kwargs) + """ + self.assert_deprecated(function, num=0, ignore_others=True, + exceptions=tuple(), args=args, kwargs=kwargs) + + +class _VisibleDeprecationTestCase(_DeprecationTestCase): + warning_cls = np.VisibleDeprecationWarning + + +class TestNonTupleNDIndexDeprecation: + def test_basic(self): + a = np.zeros((5, 5)) + with warnings.catch_warnings(): + warnings.filterwarnings('always') + assert_warns(FutureWarning, a.__getitem__, [[0, 1], [0, 1]]) + assert_warns(FutureWarning, a.__getitem__, [slice(None)]) + + warnings.filterwarnings('error') + assert_raises(FutureWarning, a.__getitem__, [[0, 1], [0, 1]]) + assert_raises(FutureWarning, a.__getitem__, [slice(None)]) + + # a a[[0, 1]] always was advanced indexing, so no error/warning + a[[0, 1]] + + +class TestComparisonDeprecations(_DeprecationTestCase): + """This tests the deprecation, for non-element-wise comparison logic. + This used to mean that when an error occurred during element-wise comparison + (i.e. broadcasting) NotImplemented was returned, but also in the comparison + itself, False was given instead of the error. + + Also test FutureWarning for the None comparison. + """ + + message = "elementwise.* comparison failed; .*" + + def test_normal_types(self): + for op in (operator.eq, operator.ne): + # Broadcasting errors: + self.assert_deprecated(op, args=(np.zeros(3), [])) + a = np.zeros(3, dtype='i,i') + # (warning is issued a couple of times here) + self.assert_deprecated(op, args=(a, a[:-1]), num=None) + + # ragged array comparison returns True/False + a = np.array([1, np.array([1,2,3])], dtype=object) + b = np.array([1, np.array([1,2,3])], dtype=object) + self.assert_deprecated(op, args=(a, b), num=None) + + def test_string(self): + # For two string arrays, strings always raised the broadcasting error: + a = np.array(['a', 'b']) + b = np.array(['a', 'b', 'c']) + assert_raises(ValueError, lambda x, y: x == y, a, b) + + # The empty list is not cast to string, and this used to pass due + # to dtype mismatch; now (2018-06-21) it correctly leads to a + # FutureWarning. + assert_warns(FutureWarning, lambda: a == []) + + def test_void_dtype_equality_failures(self): + class NotArray: + def __array__(self): + raise TypeError + + # Needed so Python 3 does not raise DeprecationWarning twice. + def __ne__(self, other): + return NotImplemented + + self.assert_deprecated(lambda: np.arange(2) == NotArray()) + self.assert_deprecated(lambda: np.arange(2) != NotArray()) + + struct1 = np.zeros(2, dtype="i4,i4") + struct2 = np.zeros(2, dtype="i4,i4,i4") + + assert_warns(FutureWarning, lambda: struct1 == 1) + assert_warns(FutureWarning, lambda: struct1 == struct2) + assert_warns(FutureWarning, lambda: struct1 != 1) + assert_warns(FutureWarning, lambda: struct1 != struct2) + + def test_array_richcompare_legacy_weirdness(self): + # It doesn't really work to use assert_deprecated here, b/c part of + # the point of assert_deprecated is to check that when warnings are + # set to "error" mode then the error is propagated -- which is good! + # But here we are testing a bunch of code that is deprecated *because* + # it has the habit of swallowing up errors and converting them into + # different warnings. So assert_warns will have to be sufficient. + assert_warns(FutureWarning, lambda: np.arange(2) == "a") + assert_warns(FutureWarning, lambda: np.arange(2) != "a") + # No warning for scalar comparisons + with warnings.catch_warnings(): + warnings.filterwarnings("error") + assert_(not (np.array(0) == "a")) + assert_(np.array(0) != "a") + assert_(not (np.int16(0) == "a")) + assert_(np.int16(0) != "a") + + for arg1 in [np.asarray(0), np.int16(0)]: + struct = np.zeros(2, dtype="i4,i4") + for arg2 in [struct, "a"]: + for f in [operator.lt, operator.le, operator.gt, operator.ge]: + with warnings.catch_warnings() as l: + warnings.filterwarnings("always") + assert_raises(TypeError, f, arg1, arg2) + assert_(not l) + + +class TestDatetime64Timezone(_DeprecationTestCase): + """Parsing of datetime64 with timezones deprecated in 1.11.0, because + datetime64 is now timezone naive rather than UTC only. + + It will be quite a while before we can remove this, because, at the very + least, a lot of existing code uses the 'Z' modifier to avoid conversion + from local time to UTC, even if otherwise it handles time in a timezone + naive fashion. + """ + def test_string(self): + self.assert_deprecated(np.datetime64, args=('2000-01-01T00+01',)) + self.assert_deprecated(np.datetime64, args=('2000-01-01T00Z',)) + + @pytest.mark.skipif(not _has_pytz, + reason="The pytz module is not available.") + def test_datetime(self): + tz = pytz.timezone('US/Eastern') + dt = datetime.datetime(2000, 1, 1, 0, 0, tzinfo=tz) + self.assert_deprecated(np.datetime64, args=(dt,)) + + +class TestNonCContiguousViewDeprecation(_DeprecationTestCase): + """View of non-C-contiguous arrays deprecated in 1.11.0. + + The deprecation will not be raised for arrays that are both C and F + contiguous, as C contiguous is dominant. There are more such arrays + with relaxed stride checking than without so the deprecation is not + as visible with relaxed stride checking in force. + """ + + def test_fortran_contiguous(self): + self.assert_deprecated(np.ones((2,2)).T.view, args=(complex,)) + self.assert_deprecated(np.ones((2,2)).T.view, args=(np.int8,)) + + +class TestArrayDataAttributeAssignmentDeprecation(_DeprecationTestCase): + """Assigning the 'data' attribute of an ndarray is unsafe as pointed + out in gh-7093. Eventually, such assignment should NOT be allowed, but + in the interests of maintaining backwards compatibility, only a Deprecation- + Warning will be raised instead for the time being to give developers time to + refactor relevant code. + """ + + def test_data_attr_assignment(self): + a = np.arange(10) + b = np.linspace(0, 1, 10) + + self.message = ("Assigning the 'data' attribute is an " + "inherently unsafe operation and will " + "be removed in the future.") + self.assert_deprecated(a.__setattr__, args=('data', b.data)) + + +class TestBinaryReprInsufficientWidthParameterForRepresentation(_DeprecationTestCase): + """ + If a 'width' parameter is passed into ``binary_repr`` that is insufficient to + represent the number in base 2 (positive) or 2's complement (negative) form, + the function used to silently ignore the parameter and return a representation + using the minimal number of bits needed for the form in question. Such behavior + is now considered unsafe from a user perspective and will raise an error in the future. + """ + + def test_insufficient_width_positive(self): + args = (10,) + kwargs = {'width': 2} + + self.message = ("Insufficient bit width provided. This behavior " + "will raise an error in the future.") + self.assert_deprecated(np.binary_repr, args=args, kwargs=kwargs) + + def test_insufficient_width_negative(self): + args = (-5,) + kwargs = {'width': 2} + + self.message = ("Insufficient bit width provided. This behavior " + "will raise an error in the future.") + self.assert_deprecated(np.binary_repr, args=args, kwargs=kwargs) + + +class TestDTypeAttributeIsDTypeDeprecation(_DeprecationTestCase): + # Deprecated 2021-01-05, NumPy 1.21 + message = r".*`.dtype` attribute" + + def test_deprecation_dtype_attribute_is_dtype(self): + class dt: + dtype = "f8" + + class vdt(np.void): + dtype = "f,f" + + self.assert_deprecated(lambda: np.dtype(dt)) + self.assert_deprecated(lambda: np.dtype(dt())) + self.assert_deprecated(lambda: np.dtype(vdt)) + self.assert_deprecated(lambda: np.dtype(vdt(1))) + + +class TestTestDeprecated: + def test_assert_deprecated(self): + test_case_instance = _DeprecationTestCase() + test_case_instance.setup() + assert_raises(AssertionError, + test_case_instance.assert_deprecated, + lambda: None) + + def foo(): + warnings.warn("foo", category=DeprecationWarning, stacklevel=2) + + test_case_instance.assert_deprecated(foo) + test_case_instance.teardown() + + +class TestNonNumericConjugate(_DeprecationTestCase): + """ + Deprecate no-op behavior of ndarray.conjugate on non-numeric dtypes, + which conflicts with the error behavior of np.conjugate. + """ + def test_conjugate(self): + for a in np.array(5), np.array(5j): + self.assert_not_deprecated(a.conjugate) + for a in (np.array('s'), np.array('2016', 'M'), + np.array((1, 2), [('a', int), ('b', int)])): + self.assert_deprecated(a.conjugate) + + +class TestNPY_CHAR(_DeprecationTestCase): + # 2017-05-03, 1.13.0 + def test_npy_char_deprecation(self): + from numpy.core._multiarray_tests import npy_char_deprecation + self.assert_deprecated(npy_char_deprecation) + assert_(npy_char_deprecation() == 'S1') + + +class TestPyArray_AS1D(_DeprecationTestCase): + def test_npy_pyarrayas1d_deprecation(self): + from numpy.core._multiarray_tests import npy_pyarrayas1d_deprecation + assert_raises(NotImplementedError, npy_pyarrayas1d_deprecation) + + +class TestPyArray_AS2D(_DeprecationTestCase): + def test_npy_pyarrayas2d_deprecation(self): + from numpy.core._multiarray_tests import npy_pyarrayas2d_deprecation + assert_raises(NotImplementedError, npy_pyarrayas2d_deprecation) + + +class Test_UPDATEIFCOPY(_DeprecationTestCase): + """ + v1.14 deprecates creating an array with the UPDATEIFCOPY flag, use + WRITEBACKIFCOPY instead + """ + def test_npy_updateifcopy_deprecation(self): + from numpy.core._multiarray_tests import npy_updateifcopy_deprecation + arr = np.arange(9).reshape(3, 3) + v = arr.T + self.assert_deprecated(npy_updateifcopy_deprecation, args=(v,)) + + +class TestDatetimeEvent(_DeprecationTestCase): + # 2017-08-11, 1.14.0 + def test_3_tuple(self): + for cls in (np.datetime64, np.timedelta64): + # two valid uses - (unit, num) and (unit, num, den, None) + self.assert_not_deprecated(cls, args=(1, ('ms', 2))) + self.assert_not_deprecated(cls, args=(1, ('ms', 2, 1, None))) + + # trying to use the event argument, removed in 1.7.0, is deprecated + # it used to be a uint8 + self.assert_deprecated(cls, args=(1, ('ms', 2, 'event'))) + self.assert_deprecated(cls, args=(1, ('ms', 2, 63))) + self.assert_deprecated(cls, args=(1, ('ms', 2, 1, 'event'))) + self.assert_deprecated(cls, args=(1, ('ms', 2, 1, 63))) + + +class TestTruthTestingEmptyArrays(_DeprecationTestCase): + # 2017-09-25, 1.14.0 + message = '.*truth value of an empty array is ambiguous.*' + + def test_1d(self): + self.assert_deprecated(bool, args=(np.array([]),)) + + def test_2d(self): + self.assert_deprecated(bool, args=(np.zeros((1, 0)),)) + self.assert_deprecated(bool, args=(np.zeros((0, 1)),)) + self.assert_deprecated(bool, args=(np.zeros((0, 0)),)) + + +class TestBincount(_DeprecationTestCase): + # 2017-06-01, 1.14.0 + def test_bincount_minlength(self): + self.assert_deprecated(lambda: np.bincount([1, 2, 3], minlength=None)) + + +class TestAlen(_DeprecationTestCase): + # 2019-08-02, 1.18.0 + def test_alen(self): + self.assert_deprecated(lambda: np.alen(np.array([1, 2, 3]))) + + +class TestGeneratorSum(_DeprecationTestCase): + # 2018-02-25, 1.15.0 + def test_generator_sum(self): + self.assert_deprecated(np.sum, args=((i for i in range(5)),)) + + +class TestPositiveOnNonNumerical(_DeprecationTestCase): + # 2018-06-28, 1.16.0 + def test_positive_on_non_number(self): + self.assert_deprecated(operator.pos, args=(np.array('foo'),)) + + +class TestFromstring(_DeprecationTestCase): + # 2017-10-19, 1.14 + def test_fromstring(self): + self.assert_deprecated(np.fromstring, args=('\x00'*80,)) + + +class TestFromStringAndFileInvalidData(_DeprecationTestCase): + # 2019-06-08, 1.17.0 + # Tests should be moved to real tests when deprecation is done. + message = "string or file could not be read to its end" + + @pytest.mark.parametrize("invalid_str", [",invalid_data", "invalid_sep"]) + def test_deprecate_unparsable_data_file(self, invalid_str): + x = np.array([1.51, 2, 3.51, 4], dtype=float) + + with tempfile.TemporaryFile(mode="w") as f: + x.tofile(f, sep=',', format='%.2f') + f.write(invalid_str) + + f.seek(0) + self.assert_deprecated(lambda: np.fromfile(f, sep=",")) + f.seek(0) + self.assert_deprecated(lambda: np.fromfile(f, sep=",", count=5)) + # Should not raise: + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + f.seek(0) + res = np.fromfile(f, sep=",", count=4) + assert_array_equal(res, x) + + @pytest.mark.parametrize("invalid_str", [",invalid_data", "invalid_sep"]) + def test_deprecate_unparsable_string(self, invalid_str): + x = np.array([1.51, 2, 3.51, 4], dtype=float) + x_str = "1.51,2,3.51,4{}".format(invalid_str) + + self.assert_deprecated(lambda: np.fromstring(x_str, sep=",")) + self.assert_deprecated(lambda: np.fromstring(x_str, sep=",", count=5)) + + # The C-level API can use not fixed size, but 0 terminated strings, + # so test that as well: + bytestr = x_str.encode("ascii") + self.assert_deprecated(lambda: fromstring_null_term_c_api(bytestr)) + + with assert_warns(DeprecationWarning): + # this is slightly strange, in that fromstring leaves data + # potentially uninitialized (would be good to error when all is + # read, but count is larger then actual data maybe). + res = np.fromstring(x_str, sep=",", count=5) + assert_array_equal(res[:-1], x) + + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + + # Should not raise: + res = np.fromstring(x_str, sep=",", count=4) + assert_array_equal(res, x) + + +class Test_GetSet_NumericOps(_DeprecationTestCase): + # 2018-09-20, 1.16.0 + def test_get_numeric_ops(self): + from numpy.core._multiarray_tests import getset_numericops + self.assert_deprecated(getset_numericops, num=2) + + # empty kwargs prevents any state actually changing which would break + # other tests. + self.assert_deprecated(np.set_numeric_ops, kwargs={}) + assert_raises(ValueError, np.set_numeric_ops, add='abc') + + +class TestShape1Fields(_DeprecationTestCase): + warning_cls = FutureWarning + + # 2019-05-20, 1.17.0 + def test_shape_1_fields(self): + self.assert_deprecated(np.dtype, args=([('a', int, 1)],)) + + +class TestNonZero(_DeprecationTestCase): + # 2019-05-26, 1.17.0 + def test_zerod(self): + self.assert_deprecated(lambda: np.nonzero(np.array(0))) + self.assert_deprecated(lambda: np.nonzero(np.array(1))) + + +def test_deprecate_ragged_arrays(): + # 2019-11-29 1.19.0 + # + # NEP 34 deprecated automatic object dtype when creating ragged + # arrays. Also see the "ragged" tests in `test_multiarray` + # + # emits a VisibleDeprecationWarning + arg = [1, [2, 3]] + with assert_warns(np.VisibleDeprecationWarning): + np.array(arg) + + +class TestTooDeepDeprecation(_VisibleDeprecationTestCase): + # NumPy 1.20, 2020-05-08 + # This is a bit similar to the above ragged array deprecation case. + message = re.escape("Creating an ndarray from nested sequences exceeding") + + def test_deprecation(self): + nested = [1] + for i in range(np.MAXDIMS - 1): + nested = [nested] + self.assert_not_deprecated(np.array, args=(nested,)) + self.assert_not_deprecated(np.array, + args=(nested,), kwargs=dict(dtype=object)) + + self.assert_deprecated(np.array, args=([nested],)) + + +class TestToString(_DeprecationTestCase): + # 2020-03-06 1.19.0 + message = re.escape("tostring() is deprecated. Use tobytes() instead.") + + def test_tostring(self): + arr = np.array(list(b"test\xFF"), dtype=np.uint8) + self.assert_deprecated(arr.tostring) + + def test_tostring_matches_tobytes(self): + arr = np.array(list(b"test\xFF"), dtype=np.uint8) + b = arr.tobytes() + with assert_warns(DeprecationWarning): + s = arr.tostring() + assert s == b + + +class TestDTypeCoercion(_DeprecationTestCase): + # 2020-02-06 1.19.0 + message = "Converting .* to a dtype .*is deprecated" + deprecated_types = [ + # The builtin scalar super types: + np.generic, np.flexible, np.number, + np.inexact, np.floating, np.complexfloating, + np.integer, np.unsignedinteger, np.signedinteger, + # character is a deprecated S1 special case: + np.character, + ] + + def test_dtype_coercion(self): + for scalar_type in self.deprecated_types: + self.assert_deprecated(np.dtype, args=(scalar_type,)) + + def test_array_construction(self): + for scalar_type in self.deprecated_types: + self.assert_deprecated(np.array, args=([], scalar_type,)) + + def test_not_deprecated(self): + # All specific types are not deprecated: + for group in np.sctypes.values(): + for scalar_type in group: + self.assert_not_deprecated(np.dtype, args=(scalar_type,)) + + for scalar_type in [type, dict, list, tuple]: + # Typical python types are coerced to object currently: + self.assert_not_deprecated(np.dtype, args=(scalar_type,)) + + +class BuiltInRoundComplexDType(_DeprecationTestCase): + # 2020-03-31 1.19.0 + deprecated_types = [np.csingle, np.cdouble, np.clongdouble] + not_deprecated_types = [ + np.int8, np.int16, np.int32, np.int64, + np.uint8, np.uint16, np.uint32, np.uint64, + np.float16, np.float32, np.float64, + ] + + def test_deprecated(self): + for scalar_type in self.deprecated_types: + scalar = scalar_type(0) + self.assert_deprecated(round, args=(scalar,)) + self.assert_deprecated(round, args=(scalar, 0)) + self.assert_deprecated(round, args=(scalar,), kwargs={'ndigits': 0}) + + def test_not_deprecated(self): + for scalar_type in self.not_deprecated_types: + scalar = scalar_type(0) + self.assert_not_deprecated(round, args=(scalar,)) + self.assert_not_deprecated(round, args=(scalar, 0)) + self.assert_not_deprecated(round, args=(scalar,), kwargs={'ndigits': 0}) + + +class TestIncorrectAdvancedIndexWithEmptyResult(_DeprecationTestCase): + # 2020-05-27, NumPy 1.20.0 + message = "Out of bound index found. This was previously ignored.*" + + @pytest.mark.parametrize("index", [([3, 0],), ([0, 0], [3, 0])]) + def test_empty_subspace(self, index): + # Test for both a single and two/multiple advanced indices. These + # This will raise an IndexError in the future. + arr = np.ones((2, 2, 0)) + self.assert_deprecated(arr.__getitem__, args=(index,)) + self.assert_deprecated(arr.__setitem__, args=(index, 0.)) + + # for this array, the subspace is only empty after applying the slice + arr2 = np.ones((2, 2, 1)) + index2 = (slice(0, 0),) + index + self.assert_deprecated(arr2.__getitem__, args=(index2,)) + self.assert_deprecated(arr2.__setitem__, args=(index2, 0.)) + + def test_empty_index_broadcast_not_deprecated(self): + arr = np.ones((2, 2, 2)) + + index = ([[3], [2]], []) # broadcast to an empty result. + self.assert_not_deprecated(arr.__getitem__, args=(index,)) + self.assert_not_deprecated(arr.__setitem__, + args=(index, np.empty((2, 0, 2)))) + + +class TestNonExactMatchDeprecation(_DeprecationTestCase): + # 2020-04-22 + def test_non_exact_match(self): + arr = np.array([[3, 6, 6], [4, 5, 1]]) + # misspelt mode check + self.assert_deprecated(lambda: np.ravel_multi_index(arr, (7, 6), mode='Cilp')) + # using completely different word with first character as R + self.assert_deprecated(lambda: np.searchsorted(arr[0], 4, side='Random')) + + +class TestDeprecatedGlobals(_DeprecationTestCase): + # 2020-06-06 + @pytest.mark.skipif( + sys.version_info < (3, 7), + reason='module-level __getattr__ not supported') + def test_type_aliases(self): + # from builtins + self.assert_deprecated(lambda: np.bool(True)) + self.assert_deprecated(lambda: np.int(1)) + self.assert_deprecated(lambda: np.float(1)) + self.assert_deprecated(lambda: np.complex(1)) + self.assert_deprecated(lambda: np.object()) + self.assert_deprecated(lambda: np.str('abc')) + + # from np.compat + self.assert_deprecated(lambda: np.long(1)) + self.assert_deprecated(lambda: np.unicode('abc')) + + # from np.core.numerictypes + self.assert_deprecated(lambda: np.typeDict) + + +class TestMatrixInOuter(_DeprecationTestCase): + # 2020-05-13 NumPy 1.20.0 + message = (r"add.outer\(\) was passed a numpy matrix as " + r"(first|second) argument.") + + def test_deprecated(self): + arr = np.array([1, 2, 3]) + m = np.array([1, 2, 3]).view(np.matrix) + self.assert_deprecated(np.add.outer, args=(m, m), num=2) + self.assert_deprecated(np.add.outer, args=(arr, m)) + self.assert_deprecated(np.add.outer, args=(m, arr)) + self.assert_not_deprecated(np.add.outer, args=(arr, arr)) + + +class TestRaggedArray(_DeprecationTestCase): + # 2020-07-24, NumPy 1.20.0 + message = "setting an array element with a sequence" + + def test_deprecated(self): + arr = np.ones((1, 1)) + # Deprecated if the array is a leave node: + self.assert_deprecated(lambda: np.array([arr, 0], dtype=np.float64)) + self.assert_deprecated(lambda: np.array([0, arr], dtype=np.float64)) + # And when it is an assignment into a lower dimensional subarray: + self.assert_deprecated(lambda: np.array([arr, [0]], dtype=np.float64)) + self.assert_deprecated(lambda: np.array([[0], arr], dtype=np.float64)) + + +class FlatteningConcatenateUnsafeCast(_DeprecationTestCase): + # NumPy 1.20, 2020-09-03 + message = "concatenate with `axis=None` will use same-kind casting" + + def test_deprecated(self): + self.assert_deprecated(np.concatenate, + args=(([0.], [1.]),), + kwargs=dict(axis=None, out=np.empty(2, dtype=np.int64))) + + def test_not_deprecated(self): + self.assert_not_deprecated(np.concatenate, + args=(([0.], [1.]),), + kwargs={'axis': None, 'out': np.empty(2, dtype=np.int64), + 'casting': "unsafe"}) + + with assert_raises(TypeError): + # Tests should notice if the deprecation warning is given first... + np.concatenate(([0.], [1.]), out=np.empty(2, dtype=np.int64), + casting="same_kind") + + +class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase): + warning_cls = FutureWarning + message = "(creating|casting) an array (with|to) a subarray dtype" + + def test_deprecated_array(self): + # Arrays are more complex, since they "broadcast" on success: + arr = np.array([1, 2]) + + self.assert_deprecated(lambda: arr.astype("(2)i,")) + with pytest.warns(FutureWarning): + res = arr.astype("(2)i,") + + assert_array_equal(res, [[1, 2], [1, 2]]) + + self.assert_deprecated(lambda: np.array(arr, dtype="(2)i,")) + with pytest.warns(FutureWarning): + res = np.array(arr, dtype="(2)i,") + + assert_array_equal(res, [[1, 2], [1, 2]]) + + with pytest.warns(FutureWarning): + res = np.array([[(1,), (2,)], arr], dtype="(2)i,") + + assert_array_equal(res, [[[1, 1], [2, 2]], [[1, 2], [1, 2]]]) + + def test_deprecated_and_error(self): + # These error paths do not give a warning, but will succeed in the + # future. + arr = np.arange(5 * 2).reshape(5, 2) + def check(): + with pytest.raises(ValueError): + arr.astype("(2,2)f") + + self.assert_deprecated(check) + + def check(): + with pytest.raises(ValueError): + np.array(arr, dtype="(2,2)f") + + self.assert_deprecated(check) + + +class TestFutureWarningArrayLikeNotIterable(_DeprecationTestCase): + # Deprecated 2020-12-09, NumPy 1.20 + warning_cls = FutureWarning + message = "The input object of type.*but not a sequence" + + @pytest.mark.parametrize("protocol", + ["__array__", "__array_interface__", "__array_struct__"]) + def test_deprecated(self, protocol): + """Test that these objects give a warning since they are not 0-D, + not coerced at the top level `np.array(obj)`, but nested, and do + *not* define the sequence protocol. + + NOTE: Tests for the versions including __len__ and __getitem__ exist + in `test_array_coercion.py` and they can be modified or amended + when this deprecation expired. + """ + blueprint = np.arange(10) + MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)}) + self.assert_deprecated(lambda: np.array([MyArr()], dtype=object)) + + @pytest.mark.parametrize("protocol", + ["__array__", "__array_interface__", "__array_struct__"]) + def test_0d_not_deprecated(self, protocol): + # 0-D always worked (albeit it would use __float__ or similar for the + # conversion, which may not happen anymore) + blueprint = np.array(1.) + MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)}) + myarr = MyArr() + + self.assert_not_deprecated(lambda: np.array([myarr], dtype=object)) + res = np.array([myarr], dtype=object) + expected = np.empty(1, dtype=object) + expected[0] = myarr + assert_array_equal(res, expected) + + @pytest.mark.parametrize("protocol", + ["__array__", "__array_interface__", "__array_struct__"]) + def test_unnested_not_deprecated(self, protocol): + blueprint = np.arange(10) + MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)}) + myarr = MyArr() + + self.assert_not_deprecated(lambda: np.array(myarr)) + res = np.array(myarr) + assert_array_equal(res, blueprint) + + @pytest.mark.parametrize("protocol", + ["__array__", "__array_interface__", "__array_struct__"]) + def test_strange_dtype_handling(self, protocol): + """The old code would actually use the dtype from the array, but + then end up not using the array (for dimension discovery) + """ + blueprint = np.arange(10).astype("f4") + MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol), + "__float__": lambda _: 0.5}) + myarr = MyArr() + + # Make sure we warn (and capture the FutureWarning) + with pytest.warns(FutureWarning, match=self.message): + res = np.array([[myarr]]) + + assert res.shape == (1, 1) + assert res.dtype == "f4" + assert res[0, 0] == 0.5 + + @pytest.mark.parametrize("protocol", + ["__array__", "__array_interface__", "__array_struct__"]) + def test_assignment_not_deprecated(self, protocol): + # If the result is dtype=object we do not unpack a nested array or + # array-like, if it is nested at exactly the right depth. + # NOTE: We actually do still call __array__, etc. but ignore the result + # in the end. For `dtype=object` we could optimize that away. + blueprint = np.arange(10).astype("f4") + MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol), + "__float__": lambda _: 0.5}) + myarr = MyArr() + + res = np.empty(3, dtype=object) + def set(): + res[:] = [myarr, myarr, myarr] + self.assert_not_deprecated(set) + assert res[0] is myarr + assert res[1] is myarr + assert res[2] is myarr + + +class TestDeprecatedUnpickleObjectScalar(_DeprecationTestCase): + # Deprecated 2020-11-24, NumPy 1.20 + """ + Technically, it should be impossible to create numpy object scalars, + but there was an unpickle path that would in theory allow it. That + path is invalid and must lead to the warning. + """ + message = "Unpickling a scalar with object dtype is deprecated." + + def test_deprecated(self): + ctor = np.core.multiarray.scalar + self.assert_deprecated(lambda: ctor(np.dtype("O"), 1)) + +try: + with warnings.catch_warnings(): + warnings.simplefilter("always") + import nose # noqa: F401 +except ImportError: + HAVE_NOSE = False +else: + HAVE_NOSE = True + + +@pytest.mark.skipif(not HAVE_NOSE, reason="Needs nose") +class TestNoseDecoratorsDeprecated(_DeprecationTestCase): + class DidntSkipException(Exception): + pass + + def test_slow(self): + def _test_slow(): + @np.testing.dec.slow + def slow_func(x, y, z): + pass + + assert_(slow_func.slow) + self.assert_deprecated(_test_slow) + + def test_setastest(self): + def _test_setastest(): + @np.testing.dec.setastest() + def f_default(a): + pass + + @np.testing.dec.setastest(True) + def f_istest(a): + pass + + @np.testing.dec.setastest(False) + def f_isnottest(a): + pass + + assert_(f_default.__test__) + assert_(f_istest.__test__) + assert_(not f_isnottest.__test__) + self.assert_deprecated(_test_setastest, num=3) + + def test_skip_functions_hardcoded(self): + def _test_skip_functions_hardcoded(): + @np.testing.dec.skipif(True) + def f1(x): + raise self.DidntSkipException + + try: + f1('a') + except self.DidntSkipException: + raise Exception('Failed to skip') + except SkipTest().__class__: + pass + + @np.testing.dec.skipif(False) + def f2(x): + raise self.DidntSkipException + + try: + f2('a') + except self.DidntSkipException: + pass + except SkipTest().__class__: + raise Exception('Skipped when not expected to') + self.assert_deprecated(_test_skip_functions_hardcoded, num=2) + + def test_skip_functions_callable(self): + def _test_skip_functions_callable(): + def skip_tester(): + return skip_flag == 'skip me!' + + @np.testing.dec.skipif(skip_tester) + def f1(x): + raise self.DidntSkipException + + try: + skip_flag = 'skip me!' + f1('a') + except self.DidntSkipException: + raise Exception('Failed to skip') + except SkipTest().__class__: + pass + + @np.testing.dec.skipif(skip_tester) + def f2(x): + raise self.DidntSkipException + + try: + skip_flag = 'five is right out!' + f2('a') + except self.DidntSkipException: + pass + except SkipTest().__class__: + raise Exception('Skipped when not expected to') + self.assert_deprecated(_test_skip_functions_callable, num=2) + + def test_skip_generators_hardcoded(self): + def _test_skip_generators_hardcoded(): + @np.testing.dec.knownfailureif(True, "This test is known to fail") + def g1(x): + yield from range(x) + + try: + for j in g1(10): + pass + except KnownFailureException().__class__: + pass + else: + raise Exception('Failed to mark as known failure') + + @np.testing.dec.knownfailureif(False, "This test is NOT known to fail") + def g2(x): + yield from range(x) + raise self.DidntSkipException('FAIL') + + try: + for j in g2(10): + pass + except KnownFailureException().__class__: + raise Exception('Marked incorrectly as known failure') + except self.DidntSkipException: + pass + self.assert_deprecated(_test_skip_generators_hardcoded, num=2) + + def test_skip_generators_callable(self): + def _test_skip_generators_callable(): + def skip_tester(): + return skip_flag == 'skip me!' + + @np.testing.dec.knownfailureif(skip_tester, "This test is known to fail") + def g1(x): + yield from range(x) + + try: + skip_flag = 'skip me!' + for j in g1(10): + pass + except KnownFailureException().__class__: + pass + else: + raise Exception('Failed to mark as known failure') + + @np.testing.dec.knownfailureif(skip_tester, "This test is NOT known to fail") + def g2(x): + yield from range(x) + raise self.DidntSkipException('FAIL') + + try: + skip_flag = 'do not skip' + for j in g2(10): + pass + except KnownFailureException().__class__: + raise Exception('Marked incorrectly as known failure') + except self.DidntSkipException: + pass + self.assert_deprecated(_test_skip_generators_callable, num=2) + + def test_deprecated(self): + def _test_deprecated(): + @np.testing.dec.deprecated(True) + def non_deprecated_func(): + pass + + @np.testing.dec.deprecated() + def deprecated_func(): + import warnings + warnings.warn("TEST: deprecated func", DeprecationWarning, stacklevel=1) + + @np.testing.dec.deprecated() + def deprecated_func2(): + import warnings + warnings.warn("AHHHH", stacklevel=1) + raise ValueError + + @np.testing.dec.deprecated() + def deprecated_func3(): + import warnings + warnings.warn("AHHHH", stacklevel=1) + + # marked as deprecated, but does not raise DeprecationWarning + assert_raises(AssertionError, non_deprecated_func) + # should be silent + deprecated_func() + with warnings.catch_warnings(record=True): + warnings.simplefilter("always") # do not propagate unrelated warnings + # fails if deprecated decorator just disables test. See #1453. + assert_raises(ValueError, deprecated_func2) + # warning is not a DeprecationWarning + assert_raises(AssertionError, deprecated_func3) + self.assert_deprecated(_test_deprecated, num=4) + + def test_parametrize(self): + def _test_parametrize(): + # dec.parametrize assumes that it is being run by nose. Because + # we are running under pytest, we need to explicitly check the + # results. + @np.testing.dec.parametrize('base, power, expected', + [(1, 1, 1), + (2, 1, 2), + (2, 2, 4)]) + def check_parametrize(base, power, expected): + assert_(base**power == expected) + + count = 0 + for test in check_parametrize(): + test[0](*test[1:]) + count += 1 + assert_(count == 3) + self.assert_deprecated(_test_parametrize) + + +class TestSingleElementSignature(_DeprecationTestCase): + # Deprecated 2021-04-01, NumPy 1.21 + message = r"The use of a length 1" + + def test_deprecated(self): + self.assert_deprecated(lambda: np.add(1, 2, signature="d")) + self.assert_deprecated(lambda: np.add(1, 2, sig=(np.dtype("l"),))) + + +class TestComparisonBadDType(_DeprecationTestCase): + # Deprecated 2021-04-01, NumPy 1.21 + message = r"using `dtype=` in comparisons is only useful for" + + def test_deprecated(self): + self.assert_deprecated(lambda: np.equal(1, 1, dtype=np.int64)) + # Not an error only for the transition + self.assert_deprecated(lambda: np.equal(1, 1, sig=(None, None, "l"))) + + def test_not_deprecated(self): + np.equal(True, False, dtype=bool) + np.equal(3, 5, dtype=bool, casting="unsafe") + np.equal([None], [4], dtype=object) + +class TestComparisonBadObjectDType(_DeprecationTestCase): + # Deprecated 2021-04-01, NumPy 1.21 (different branch of the above one) + message = r"using `dtype=object` \(or equivalent signature\) will" + warning_cls = FutureWarning + + def test_deprecated(self): + self.assert_deprecated(lambda: np.equal(1, 1, dtype=object)) + self.assert_deprecated( + lambda: np.equal(1, 1, sig=(None, None, object))) + + +class TestSpecialAttributeLookupFailure(_DeprecationTestCase): + message = r"An exception was ignored while fetching the attribute" + + class WeirdArrayLike: + @property + def __array__(self): + raise RuntimeError("oops!") + + class WeirdArrayInterface: + @property + def __array_interface__(self): + raise RuntimeError("oops!") + + def test_deprecated(self): + self.assert_deprecated(lambda: np.array(self.WeirdArrayLike())) + self.assert_deprecated(lambda: np.array(self.WeirdArrayInterface())) + + +class TestCtypesGetter(_DeprecationTestCase): + # Deprecated 2021-05-18, Numpy 1.21.0 + warning_cls = DeprecationWarning + ctypes = np.array([1]).ctypes + + @pytest.mark.parametrize( + "name", ["get_data", "get_shape", "get_strides", "get_as_parameter"] + ) + def test_deprecated(self, name: str) -> None: + func = getattr(self.ctypes, name) + self.assert_deprecated(lambda: func()) + + @pytest.mark.parametrize( + "name", ["data", "shape", "strides", "_as_parameter_"] + ) + def test_not_deprecated(self, name: str) -> None: + self.assert_not_deprecated(lambda: getattr(self.ctypes, name)) + + +class TestUFuncForcedDTypeWarning(_DeprecationTestCase): + message = "The `dtype` and `signature` arguments to ufuncs only select the" + + def test_not_deprecated(self): + import pickle + # does not warn (test relies on bad pickling behaviour, simply remove + # it if the `assert int64 is not int64_2` should start failing. + int64 = np.dtype("int64") + int64_2 = pickle.loads(pickle.dumps(int64)) + assert int64 is not int64_2 + self.assert_not_deprecated(lambda: np.add(3, 4, dtype=int64_2)) + + def test_deprecation(self): + int64 = np.dtype("int64") + self.assert_deprecated(lambda: np.add(3, 5, dtype=int64.newbyteorder())) + self.assert_deprecated(lambda: np.add(3, 5, dtype="m8[ns]")) + + def test_behaviour(self): + int64 = np.dtype("int64") + arr = np.arange(10, dtype="m8[s]") + + with pytest.warns(DeprecationWarning, match=self.message): + np.add(3, 5, dtype=int64.newbyteorder()) + with pytest.warns(DeprecationWarning, match=self.message): + np.add(3, 5, dtype="m8[ns]") # previously used the "ns" + with pytest.warns(DeprecationWarning, match=self.message): + np.add(arr, arr, dtype="m8[ns]") # never preserved the "ns" + with pytest.warns(DeprecationWarning, match=self.message): + np.maximum(arr, arr, dtype="m8[ns]") # previously used the "ns" + with pytest.warns(DeprecationWarning, match=self.message): + np.maximum.reduce(arr, dtype="m8[ns]") # never preserved the "ns" + + +PARTITION_DICT = { + "partition method": np.arange(10).partition, + "argpartition method": np.arange(10).argpartition, + "partition function": lambda kth: np.partition(np.arange(10), kth), + "argpartition function": lambda kth: np.argpartition(np.arange(10), kth), +} + + +@pytest.mark.parametrize("func", PARTITION_DICT.values(), ids=PARTITION_DICT) +class TestPartitionBoolIndex(_DeprecationTestCase): + # Deprecated 2021-09-29, NumPy 1.22 + warning_cls = DeprecationWarning + message = "Passing booleans as partition index is deprecated" + + def test_deprecated(self, func): + self.assert_deprecated(lambda: func(True)) + self.assert_deprecated(lambda: func([False, True])) + + def test_not_deprecated(self, func): + self.assert_not_deprecated(lambda: func(1)) + self.assert_not_deprecated(lambda: func([0, 1])) + + +class TestMachAr(_DeprecationTestCase): + # Deprecated 2021-10-19, NumPy 1.22 + warning_cls = DeprecationWarning + + def test_deprecated(self): + self.assert_deprecated(lambda: np.MachAr) + + def test_deprecated_module(self): + self.assert_deprecated(lambda: getattr(np.core, "machar")) + + def test_deprecated_attr(self): + finfo = np.finfo(float) + self.assert_deprecated(lambda: getattr(finfo, "machar")) + + +class TestQuantileInterpolationDeprecation(_DeprecationTestCase): + # Deprecated 2021-11-08, NumPy 1.22 + @pytest.mark.parametrize("func", + [np.percentile, np.quantile, np.nanpercentile, np.nanquantile]) + def test_deprecated(self, func): + self.assert_deprecated( + lambda: func([0., 1.], 0., interpolation="linear")) + self.assert_deprecated( + lambda: func([0., 1.], 0., interpolation="nearest")) + + @pytest.mark.parametrize("func", + [np.percentile, np.quantile, np.nanpercentile, np.nanquantile]) + def test_both_passed(self, func): + with warnings.catch_warnings(): + # catch the DeprecationWarning so that it does not raise: + warnings.simplefilter("always", DeprecationWarning) + with pytest.raises(TypeError): + func([0., 1.], 0., interpolation="nearest", method="nearest") diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_dlpack.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_dlpack.py new file mode 100644 index 0000000..eb9a176 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_dlpack.py @@ -0,0 +1,123 @@ +import sys +import pytest + +import numpy as np +from numpy.testing import assert_array_equal, IS_PYPY + + +class TestDLPack: + @pytest.mark.skipif(IS_PYPY, reason="PyPy can't get refcounts.") + def test_dunder_dlpack_refcount(self): + x = np.arange(5) + y = x.__dlpack__() + assert sys.getrefcount(x) == 3 + del y + assert sys.getrefcount(x) == 2 + + def test_dunder_dlpack_stream(self): + x = np.arange(5) + x.__dlpack__(stream=None) + + with pytest.raises(RuntimeError): + x.__dlpack__(stream=1) + + def test_strides_not_multiple_of_itemsize(self): + dt = np.dtype([('int', np.int32), ('char', np.int8)]) + y = np.zeros((5,), dtype=dt) + z = y['int'] + + with pytest.raises(RuntimeError): + np._from_dlpack(z) + + @pytest.mark.skipif(IS_PYPY, reason="PyPy can't get refcounts.") + def test_from_dlpack_refcount(self): + x = np.arange(5) + y = np._from_dlpack(x) + assert sys.getrefcount(x) == 3 + del y + assert sys.getrefcount(x) == 2 + + @pytest.mark.parametrize("dtype", [ + np.int8, np.int16, np.int32, np.int64, + np.uint8, np.uint16, np.uint32, np.uint64, + np.float16, np.float32, np.float64, + np.complex64, np.complex128 + ]) + def test_dtype_passthrough(self, dtype): + x = np.arange(5, dtype=dtype) + y = np._from_dlpack(x) + + assert y.dtype == x.dtype + assert_array_equal(x, y) + + def test_invalid_dtype(self): + x = np.asarray(np.datetime64('2021-05-27')) + + with pytest.raises(TypeError): + np._from_dlpack(x) + + def test_invalid_byte_swapping(self): + dt = np.dtype('=i8').newbyteorder() + x = np.arange(5, dtype=dt) + + with pytest.raises(TypeError): + np._from_dlpack(x) + + def test_non_contiguous(self): + x = np.arange(25).reshape((5, 5)) + + y1 = x[0] + assert_array_equal(y1, np._from_dlpack(y1)) + + y2 = x[:, 0] + assert_array_equal(y2, np._from_dlpack(y2)) + + y3 = x[1, :] + assert_array_equal(y3, np._from_dlpack(y3)) + + y4 = x[1] + assert_array_equal(y4, np._from_dlpack(y4)) + + y5 = np.diagonal(x).copy() + assert_array_equal(y5, np._from_dlpack(y5)) + + @pytest.mark.parametrize("ndim", range(33)) + def test_higher_dims(self, ndim): + shape = (1,) * ndim + x = np.zeros(shape, dtype=np.float64) + + assert shape == np._from_dlpack(x).shape + + def test_dlpack_device(self): + x = np.arange(5) + assert x.__dlpack_device__() == (1, 0) + y = np._from_dlpack(x) + assert y.__dlpack_device__() == (1, 0) + z = y[::2] + assert z.__dlpack_device__() == (1, 0) + + def dlpack_deleter_exception(self): + x = np.arange(5) + _ = x.__dlpack__() + raise RuntimeError + + def test_dlpack_destructor_exception(self): + with pytest.raises(RuntimeError): + self.dlpack_deleter_exception() + + def test_readonly(self): + x = np.arange(5) + x.flags.writeable = False + with pytest.raises(TypeError): + x.__dlpack__() + + def test_ndim0(self): + x = np.array(1.0) + y = np._from_dlpack(x) + assert_array_equal(x, y) + + def test_size1dims_arrays(self): + x = np.ndarray(dtype='f8', shape=(10, 5, 1), strides=(8, 80, 4), + buffer=np.ones(1000, dtype=np.uint8), order='F') + y = np._from_dlpack(x) + assert_array_equal(x, y) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_dtype.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_dtype.py new file mode 100644 index 0000000..e49604e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_dtype.py @@ -0,0 +1,1614 @@ +import sys +import operator +import pytest +import ctypes +import gc +import types +from typing import Any + +import numpy as np +from numpy.core._rational_tests import rational +from numpy.core._multiarray_tests import create_custom_field_dtype +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_raises, HAS_REFCOUNT, + IS_PYSTON) +from numpy.compat import pickle +from itertools import permutations + + +def assert_dtype_equal(a, b): + assert_equal(a, b) + assert_equal(hash(a), hash(b), + "two equivalent types do not hash to the same value !") + +def assert_dtype_not_equal(a, b): + assert_(a != b) + assert_(hash(a) != hash(b), + "two different types hash to the same value !") + +class TestBuiltin: + @pytest.mark.parametrize('t', [int, float, complex, np.int32, str, object, + np.compat.unicode]) + def test_run(self, t): + """Only test hash runs at all.""" + dt = np.dtype(t) + hash(dt) + + @pytest.mark.parametrize('t', [int, float]) + def test_dtype(self, t): + # Make sure equivalent byte order char hash the same (e.g. < and = on + # little endian) + dt = np.dtype(t) + dt2 = dt.newbyteorder("<") + dt3 = dt.newbyteorder(">") + if dt == dt2: + assert_(dt.byteorder != dt2.byteorder, "bogus test") + assert_dtype_equal(dt, dt2) + else: + assert_(dt.byteorder != dt3.byteorder, "bogus test") + assert_dtype_equal(dt, dt3) + + def test_equivalent_dtype_hashing(self): + # Make sure equivalent dtypes with different type num hash equal + uintp = np.dtype(np.uintp) + if uintp.itemsize == 4: + left = uintp + right = np.dtype(np.uint32) + else: + left = uintp + right = np.dtype(np.ulonglong) + assert_(left == right) + assert_(hash(left) == hash(right)) + + def test_invalid_types(self): + # Make sure invalid type strings raise an error + + assert_raises(TypeError, np.dtype, 'O3') + assert_raises(TypeError, np.dtype, 'O5') + assert_raises(TypeError, np.dtype, 'O7') + assert_raises(TypeError, np.dtype, 'b3') + assert_raises(TypeError, np.dtype, 'h4') + assert_raises(TypeError, np.dtype, 'I5') + assert_raises(TypeError, np.dtype, 'e3') + assert_raises(TypeError, np.dtype, 'f5') + + if np.dtype('g').itemsize == 8 or np.dtype('g').itemsize == 16: + assert_raises(TypeError, np.dtype, 'g12') + elif np.dtype('g').itemsize == 12: + assert_raises(TypeError, np.dtype, 'g16') + + if np.dtype('l').itemsize == 8: + assert_raises(TypeError, np.dtype, 'l4') + assert_raises(TypeError, np.dtype, 'L4') + else: + assert_raises(TypeError, np.dtype, 'l8') + assert_raises(TypeError, np.dtype, 'L8') + + if np.dtype('q').itemsize == 8: + assert_raises(TypeError, np.dtype, 'q4') + assert_raises(TypeError, np.dtype, 'Q4') + else: + assert_raises(TypeError, np.dtype, 'q8') + assert_raises(TypeError, np.dtype, 'Q8') + + def test_richcompare_invalid_dtype_equality(self): + # Make sure objects that cannot be converted to valid + # dtypes results in False/True when compared to valid dtypes. + # Here 7 cannot be converted to dtype. No exceptions should be raised + + assert not np.dtype(np.int32) == 7, "dtype richcompare failed for ==" + assert np.dtype(np.int32) != 7, "dtype richcompare failed for !=" + + @pytest.mark.parametrize( + 'operation', + [operator.le, operator.lt, operator.ge, operator.gt]) + def test_richcompare_invalid_dtype_comparison(self, operation): + # Make sure TypeError is raised for comparison operators + # for invalid dtypes. Here 7 is an invalid dtype. + + with pytest.raises(TypeError): + operation(np.dtype(np.int32), 7) + + @pytest.mark.parametrize("dtype", + ['Bool', 'Bytes0', 'Complex32', 'Complex64', + 'Datetime64', 'Float16', 'Float32', 'Float64', + 'Int8', 'Int16', 'Int32', 'Int64', + 'Object0', 'Str0', 'Timedelta64', + 'UInt8', 'UInt16', 'Uint32', 'UInt32', + 'Uint64', 'UInt64', 'Void0', + "Float128", "Complex128"]) + def test_numeric_style_types_are_invalid(self, dtype): + with assert_raises(TypeError): + np.dtype(dtype) + + @pytest.mark.parametrize( + 'value', + ['m8', 'M8', 'datetime64', 'timedelta64', + 'i4, (2,3)f8, f4', 'a3, 3u8, (3,4)a10', + '>f', 'f4', (64, 64)), (1,)), + ('rtile', '>f4', (64, 36))], (3,)), + ('bottom', [('bleft', ('>f4', (8, 64)), (1,)), + ('bright', '>f4', (8, 36))])]) + assert_equal(str(dt), + "[('top', [('tiles', ('>f4', (64, 64)), (1,)), " + "('rtile', '>f4', (64, 36))], (3,)), " + "('bottom', [('bleft', ('>f4', (8, 64)), (1,)), " + "('bright', '>f4', (8, 36))])]") + + # If the sticky aligned flag is set to True, it makes the + # str() function use a dict representation with an 'aligned' flag + dt = np.dtype([('top', [('tiles', ('>f4', (64, 64)), (1,)), + ('rtile', '>f4', (64, 36))], + (3,)), + ('bottom', [('bleft', ('>f4', (8, 64)), (1,)), + ('bright', '>f4', (8, 36))])], + align=True) + assert_equal(str(dt), + "{'names': ['top', 'bottom']," + " 'formats': [([('tiles', ('>f4', (64, 64)), (1,)), " + "('rtile', '>f4', (64, 36))], (3,)), " + "[('bleft', ('>f4', (8, 64)), (1,)), " + "('bright', '>f4', (8, 36))]]," + " 'offsets': [0, 76800]," + " 'itemsize': 80000," + " 'aligned': True}") + with np.printoptions(legacy='1.21'): + assert_equal(str(dt), + "{'names':['top','bottom'], " + "'formats':[([('tiles', ('>f4', (64, 64)), (1,)), " + "('rtile', '>f4', (64, 36))], (3,))," + "[('bleft', ('>f4', (8, 64)), (1,)), " + "('bright', '>f4', (8, 36))]], " + "'offsets':[0,76800], " + "'itemsize':80000, " + "'aligned':True}") + assert_equal(np.dtype(eval(str(dt))), dt) + + dt = np.dtype({'names': ['r', 'g', 'b'], 'formats': ['u1', 'u1', 'u1'], + 'offsets': [0, 1, 2], + 'titles': ['Red pixel', 'Green pixel', 'Blue pixel']}) + assert_equal(str(dt), + "[(('Red pixel', 'r'), 'u1'), " + "(('Green pixel', 'g'), 'u1'), " + "(('Blue pixel', 'b'), 'u1')]") + + dt = np.dtype({'names': ['rgba', 'r', 'g', 'b'], + 'formats': ['f4', (64, 64)), (1,)), + ('rtile', '>f4', (64, 36))], (3,)), + ('bottom', [('bleft', ('>f4', (8, 64)), (1,)), + ('bright', '>f4', (8, 36))])]) + assert_equal(repr(dt), + "dtype([('top', [('tiles', ('>f4', (64, 64)), (1,)), " + "('rtile', '>f4', (64, 36))], (3,)), " + "('bottom', [('bleft', ('>f4', (8, 64)), (1,)), " + "('bright', '>f4', (8, 36))])])") + + dt = np.dtype({'names': ['r', 'g', 'b'], 'formats': ['u1', 'u1', 'u1'], + 'offsets': [0, 1, 2], + 'titles': ['Red pixel', 'Green pixel', 'Blue pixel']}, + align=True) + assert_equal(repr(dt), + "dtype([(('Red pixel', 'r'), 'u1'), " + "(('Green pixel', 'g'), 'u1'), " + "(('Blue pixel', 'b'), 'u1')], align=True)") + + def test_repr_structured_not_packed(self): + dt = np.dtype({'names': ['rgba', 'r', 'g', 'b'], + 'formats': ['f4', (2, 1)), ('b', 'u4')]) + self.check(BigEndStruct, expected) + + def test_little_endian_structure_packed(self): + class LittleEndStruct(ctypes.LittleEndianStructure): + _fields_ = [ + ('one', ctypes.c_uint8), + ('two', ctypes.c_uint32) + ] + _pack_ = 1 + expected = np.dtype([('one', 'u1'), ('two', 'B'), + ('b', '>H') + ], align=True) + self.check(PaddedStruct, expected) + + def test_simple_endian_types(self): + self.check(ctypes.c_uint16.__ctype_le__, np.dtype('u2')) + self.check(ctypes.c_uint8.__ctype_le__, np.dtype('u1')) + self.check(ctypes.c_uint8.__ctype_be__, np.dtype('u1')) + + all_types = set(np.typecodes['All']) + all_pairs = permutations(all_types, 2) + + @pytest.mark.parametrize("pair", all_pairs) + def test_pairs(self, pair): + """ + Check that np.dtype('x,y') matches [np.dtype('x'), np.dtype('y')] + Example: np.dtype('d,I') -> dtype([('f0', ' None: + alias = np.dtype[Any] + assert isinstance(alias, types.GenericAlias) + assert alias.__origin__ is np.dtype + + @pytest.mark.parametrize("code", np.typecodes["All"]) + def test_dtype_subclass(self, code: str) -> None: + cls = type(np.dtype(code)) + alias = cls[Any] + assert isinstance(alias, types.GenericAlias) + assert alias.__origin__ is cls + + @pytest.mark.parametrize("arg_len", range(4)) + def test_subscript_tuple(self, arg_len: int) -> None: + arg_tup = (Any,) * arg_len + if arg_len == 1: + assert np.dtype[arg_tup] + else: + with pytest.raises(TypeError): + np.dtype[arg_tup] + + def test_subscript_scalar(self) -> None: + assert np.dtype[Any] + + +def test_result_type_integers_and_unitless_timedelta64(): + # Regression test for gh-20077. The following call of `result_type` + # would cause a seg. fault. + td = np.timedelta64(4) + result = np.result_type(0, td) + assert_dtype_equal(result, td.dtype) + + +@pytest.mark.skipif(sys.version_info >= (3, 9), reason="Requires python 3.8") +def test_class_getitem_38() -> None: + match = "Type subscription requires python >= 3.9" + with pytest.raises(TypeError, match=match): + np.dtype[Any] diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_einsum.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_einsum.py new file mode 100644 index 0000000..1723116 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_einsum.py @@ -0,0 +1,1110 @@ +import itertools + +import pytest + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_almost_equal, + assert_raises, suppress_warnings, assert_raises_regex, assert_allclose + ) + +# Setup for optimize einsum +chars = 'abcdefghij' +sizes = np.array([2, 3, 4, 5, 4, 3, 2, 6, 5, 4, 3]) +global_size_dict = dict(zip(chars, sizes)) + + +class TestEinsum: + def test_einsum_errors(self): + for do_opt in [True, False]: + # Need enough arguments + assert_raises(ValueError, np.einsum, optimize=do_opt) + assert_raises(ValueError, np.einsum, "", optimize=do_opt) + + # subscripts must be a string + assert_raises(TypeError, np.einsum, 0, 0, optimize=do_opt) + + # out parameter must be an array + assert_raises(TypeError, np.einsum, "", 0, out='test', + optimize=do_opt) + + # order parameter must be a valid order + assert_raises(ValueError, np.einsum, "", 0, order='W', + optimize=do_opt) + + # casting parameter must be a valid casting + assert_raises(ValueError, np.einsum, "", 0, casting='blah', + optimize=do_opt) + + # dtype parameter must be a valid dtype + assert_raises(TypeError, np.einsum, "", 0, dtype='bad_data_type', + optimize=do_opt) + + # other keyword arguments are rejected + assert_raises(TypeError, np.einsum, "", 0, bad_arg=0, + optimize=do_opt) + + # issue 4528 revealed a segfault with this call + assert_raises(TypeError, np.einsum, *(None,)*63, optimize=do_opt) + + # number of operands must match count in subscripts string + assert_raises(ValueError, np.einsum, "", 0, 0, optimize=do_opt) + assert_raises(ValueError, np.einsum, ",", 0, [0], [0], + optimize=do_opt) + assert_raises(ValueError, np.einsum, ",", [0], optimize=do_opt) + + # can't have more subscripts than dimensions in the operand + assert_raises(ValueError, np.einsum, "i", 0, optimize=do_opt) + assert_raises(ValueError, np.einsum, "ij", [0, 0], optimize=do_opt) + assert_raises(ValueError, np.einsum, "...i", 0, optimize=do_opt) + assert_raises(ValueError, np.einsum, "i...j", [0, 0], optimize=do_opt) + assert_raises(ValueError, np.einsum, "i...", 0, optimize=do_opt) + assert_raises(ValueError, np.einsum, "ij...", [0, 0], optimize=do_opt) + + # invalid ellipsis + assert_raises(ValueError, np.einsum, "i..", [0, 0], optimize=do_opt) + assert_raises(ValueError, np.einsum, ".i...", [0, 0], optimize=do_opt) + assert_raises(ValueError, np.einsum, "j->..j", [0, 0], optimize=do_opt) + assert_raises(ValueError, np.einsum, "j->.j...", [0, 0], optimize=do_opt) + + # invalid subscript character + assert_raises(ValueError, np.einsum, "i%...", [0, 0], optimize=do_opt) + assert_raises(ValueError, np.einsum, "...j$", [0, 0], optimize=do_opt) + assert_raises(ValueError, np.einsum, "i->&", [0, 0], optimize=do_opt) + + # output subscripts must appear in input + assert_raises(ValueError, np.einsum, "i->ij", [0, 0], optimize=do_opt) + + # output subscripts may only be specified once + assert_raises(ValueError, np.einsum, "ij->jij", [[0, 0], [0, 0]], + optimize=do_opt) + + # dimensions much match when being collapsed + assert_raises(ValueError, np.einsum, "ii", + np.arange(6).reshape(2, 3), optimize=do_opt) + assert_raises(ValueError, np.einsum, "ii->i", + np.arange(6).reshape(2, 3), optimize=do_opt) + + # broadcasting to new dimensions must be enabled explicitly + assert_raises(ValueError, np.einsum, "i", np.arange(6).reshape(2, 3), + optimize=do_opt) + assert_raises(ValueError, np.einsum, "i->i", [[0, 1], [0, 1]], + out=np.arange(4).reshape(2, 2), optimize=do_opt) + with assert_raises_regex(ValueError, "'b'"): + # gh-11221 - 'c' erroneously appeared in the error message + a = np.ones((3, 3, 4, 5, 6)) + b = np.ones((3, 4, 5)) + np.einsum('aabcb,abc', a, b) + + # Check order kwarg, asanyarray allows 1d to pass through + assert_raises(ValueError, np.einsum, "i->i", np.arange(6).reshape(-1, 1), + optimize=do_opt, order='d') + + def test_einsum_views(self): + # pass-through + for do_opt in [True, False]: + a = np.arange(6) + a.shape = (2, 3) + + b = np.einsum("...", a, optimize=do_opt) + assert_(b.base is a) + + b = np.einsum(a, [Ellipsis], optimize=do_opt) + assert_(b.base is a) + + b = np.einsum("ij", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, a) + + b = np.einsum(a, [0, 1], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, a) + + # output is writeable whenever input is writeable + b = np.einsum("...", a, optimize=do_opt) + assert_(b.flags['WRITEABLE']) + a.flags['WRITEABLE'] = False + b = np.einsum("...", a, optimize=do_opt) + assert_(not b.flags['WRITEABLE']) + + # transpose + a = np.arange(6) + a.shape = (2, 3) + + b = np.einsum("ji", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, a.T) + + b = np.einsum(a, [1, 0], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, a.T) + + # diagonal + a = np.arange(9) + a.shape = (3, 3) + + b = np.einsum("ii->i", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[i, i] for i in range(3)]) + + b = np.einsum(a, [0, 0], [0], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[i, i] for i in range(3)]) + + # diagonal with various ways of broadcasting an additional dimension + a = np.arange(27) + a.shape = (3, 3, 3) + + b = np.einsum("...ii->...i", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [[x[i, i] for i in range(3)] for x in a]) + + b = np.einsum(a, [Ellipsis, 0, 0], [Ellipsis, 0], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [[x[i, i] for i in range(3)] for x in a]) + + b = np.einsum("ii...->...i", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [[x[i, i] for i in range(3)] + for x in a.transpose(2, 0, 1)]) + + b = np.einsum(a, [0, 0, Ellipsis], [Ellipsis, 0], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [[x[i, i] for i in range(3)] + for x in a.transpose(2, 0, 1)]) + + b = np.einsum("...ii->i...", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[:, i, i] for i in range(3)]) + + b = np.einsum(a, [Ellipsis, 0, 0], [0, Ellipsis], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[:, i, i] for i in range(3)]) + + b = np.einsum("jii->ij", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[:, i, i] for i in range(3)]) + + b = np.einsum(a, [1, 0, 0], [0, 1], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[:, i, i] for i in range(3)]) + + b = np.einsum("ii...->i...", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a.transpose(2, 0, 1)[:, i, i] for i in range(3)]) + + b = np.einsum(a, [0, 0, Ellipsis], [0, Ellipsis], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a.transpose(2, 0, 1)[:, i, i] for i in range(3)]) + + b = np.einsum("i...i->i...", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a.transpose(1, 0, 2)[:, i, i] for i in range(3)]) + + b = np.einsum(a, [0, Ellipsis, 0], [0, Ellipsis], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a.transpose(1, 0, 2)[:, i, i] for i in range(3)]) + + b = np.einsum("i...i->...i", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [[x[i, i] for i in range(3)] + for x in a.transpose(1, 0, 2)]) + + b = np.einsum(a, [0, Ellipsis, 0], [Ellipsis, 0], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [[x[i, i] for i in range(3)] + for x in a.transpose(1, 0, 2)]) + + # triple diagonal + a = np.arange(27) + a.shape = (3, 3, 3) + + b = np.einsum("iii->i", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[i, i, i] for i in range(3)]) + + b = np.einsum(a, [0, 0, 0], [0], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[i, i, i] for i in range(3)]) + + # swap axes + a = np.arange(24) + a.shape = (2, 3, 4) + + b = np.einsum("ijk->jik", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, a.swapaxes(0, 1)) + + b = np.einsum(a, [0, 1, 2], [1, 0, 2], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, a.swapaxes(0, 1)) + + def check_einsum_sums(self, dtype, do_opt=False): + # Check various sums. Does many sizes to exercise unrolled loops. + + # sum(a, axis=-1) + for n in range(1, 17): + a = np.arange(n, dtype=dtype) + assert_equal(np.einsum("i->", a, optimize=do_opt), + np.sum(a, axis=-1).astype(dtype)) + assert_equal(np.einsum(a, [0], [], optimize=do_opt), + np.sum(a, axis=-1).astype(dtype)) + + for n in range(1, 17): + a = np.arange(2*3*n, dtype=dtype).reshape(2, 3, n) + assert_equal(np.einsum("...i->...", a, optimize=do_opt), + np.sum(a, axis=-1).astype(dtype)) + assert_equal(np.einsum(a, [Ellipsis, 0], [Ellipsis], optimize=do_opt), + np.sum(a, axis=-1).astype(dtype)) + + # sum(a, axis=0) + for n in range(1, 17): + a = np.arange(2*n, dtype=dtype).reshape(2, n) + assert_equal(np.einsum("i...->...", a, optimize=do_opt), + np.sum(a, axis=0).astype(dtype)) + assert_equal(np.einsum(a, [0, Ellipsis], [Ellipsis], optimize=do_opt), + np.sum(a, axis=0).astype(dtype)) + + for n in range(1, 17): + a = np.arange(2*3*n, dtype=dtype).reshape(2, 3, n) + assert_equal(np.einsum("i...->...", a, optimize=do_opt), + np.sum(a, axis=0).astype(dtype)) + assert_equal(np.einsum(a, [0, Ellipsis], [Ellipsis], optimize=do_opt), + np.sum(a, axis=0).astype(dtype)) + + # trace(a) + for n in range(1, 17): + a = np.arange(n*n, dtype=dtype).reshape(n, n) + assert_equal(np.einsum("ii", a, optimize=do_opt), + np.trace(a).astype(dtype)) + assert_equal(np.einsum(a, [0, 0], optimize=do_opt), + np.trace(a).astype(dtype)) + + # gh-15961: should accept numpy int64 type in subscript list + np_array = np.asarray([0, 0]) + assert_equal(np.einsum(a, np_array, optimize=do_opt), + np.trace(a).astype(dtype)) + assert_equal(np.einsum(a, list(np_array), optimize=do_opt), + np.trace(a).astype(dtype)) + + # multiply(a, b) + assert_equal(np.einsum("..., ...", 3, 4), 12) # scalar case + for n in range(1, 17): + a = np.arange(3 * n, dtype=dtype).reshape(3, n) + b = np.arange(2 * 3 * n, dtype=dtype).reshape(2, 3, n) + assert_equal(np.einsum("..., ...", a, b, optimize=do_opt), + np.multiply(a, b)) + assert_equal(np.einsum(a, [Ellipsis], b, [Ellipsis], optimize=do_opt), + np.multiply(a, b)) + + # inner(a,b) + for n in range(1, 17): + a = np.arange(2 * 3 * n, dtype=dtype).reshape(2, 3, n) + b = np.arange(n, dtype=dtype) + assert_equal(np.einsum("...i, ...i", a, b, optimize=do_opt), np.inner(a, b)) + assert_equal(np.einsum(a, [Ellipsis, 0], b, [Ellipsis, 0], optimize=do_opt), + np.inner(a, b)) + + for n in range(1, 11): + a = np.arange(n * 3 * 2, dtype=dtype).reshape(n, 3, 2) + b = np.arange(n, dtype=dtype) + assert_equal(np.einsum("i..., i...", a, b, optimize=do_opt), + np.inner(a.T, b.T).T) + assert_equal(np.einsum(a, [0, Ellipsis], b, [0, Ellipsis], optimize=do_opt), + np.inner(a.T, b.T).T) + + # outer(a,b) + for n in range(1, 17): + a = np.arange(3, dtype=dtype)+1 + b = np.arange(n, dtype=dtype)+1 + assert_equal(np.einsum("i,j", a, b, optimize=do_opt), + np.outer(a, b)) + assert_equal(np.einsum(a, [0], b, [1], optimize=do_opt), + np.outer(a, b)) + + # Suppress the complex warnings for the 'as f8' tests + with suppress_warnings() as sup: + sup.filter(np.ComplexWarning) + + # matvec(a,b) / a.dot(b) where a is matrix, b is vector + for n in range(1, 17): + a = np.arange(4*n, dtype=dtype).reshape(4, n) + b = np.arange(n, dtype=dtype) + assert_equal(np.einsum("ij, j", a, b, optimize=do_opt), + np.dot(a, b)) + assert_equal(np.einsum(a, [0, 1], b, [1], optimize=do_opt), + np.dot(a, b)) + + c = np.arange(4, dtype=dtype) + np.einsum("ij,j", a, b, out=c, + dtype='f8', casting='unsafe', optimize=do_opt) + assert_equal(c, + np.dot(a.astype('f8'), + b.astype('f8')).astype(dtype)) + c[...] = 0 + np.einsum(a, [0, 1], b, [1], out=c, + dtype='f8', casting='unsafe', optimize=do_opt) + assert_equal(c, + np.dot(a.astype('f8'), + b.astype('f8')).astype(dtype)) + + for n in range(1, 17): + a = np.arange(4*n, dtype=dtype).reshape(4, n) + b = np.arange(n, dtype=dtype) + assert_equal(np.einsum("ji,j", a.T, b.T, optimize=do_opt), + np.dot(b.T, a.T)) + assert_equal(np.einsum(a.T, [1, 0], b.T, [1], optimize=do_opt), + np.dot(b.T, a.T)) + + c = np.arange(4, dtype=dtype) + np.einsum("ji,j", a.T, b.T, out=c, + dtype='f8', casting='unsafe', optimize=do_opt) + assert_equal(c, + np.dot(b.T.astype('f8'), + a.T.astype('f8')).astype(dtype)) + c[...] = 0 + np.einsum(a.T, [1, 0], b.T, [1], out=c, + dtype='f8', casting='unsafe', optimize=do_opt) + assert_equal(c, + np.dot(b.T.astype('f8'), + a.T.astype('f8')).astype(dtype)) + + # matmat(a,b) / a.dot(b) where a is matrix, b is matrix + for n in range(1, 17): + if n < 8 or dtype != 'f2': + a = np.arange(4*n, dtype=dtype).reshape(4, n) + b = np.arange(n*6, dtype=dtype).reshape(n, 6) + assert_equal(np.einsum("ij,jk", a, b, optimize=do_opt), + np.dot(a, b)) + assert_equal(np.einsum(a, [0, 1], b, [1, 2], optimize=do_opt), + np.dot(a, b)) + + for n in range(1, 17): + a = np.arange(4*n, dtype=dtype).reshape(4, n) + b = np.arange(n*6, dtype=dtype).reshape(n, 6) + c = np.arange(24, dtype=dtype).reshape(4, 6) + np.einsum("ij,jk", a, b, out=c, dtype='f8', casting='unsafe', + optimize=do_opt) + assert_equal(c, + np.dot(a.astype('f8'), + b.astype('f8')).astype(dtype)) + c[...] = 0 + np.einsum(a, [0, 1], b, [1, 2], out=c, + dtype='f8', casting='unsafe', optimize=do_opt) + assert_equal(c, + np.dot(a.astype('f8'), + b.astype('f8')).astype(dtype)) + + # matrix triple product (note this is not currently an efficient + # way to multiply 3 matrices) + a = np.arange(12, dtype=dtype).reshape(3, 4) + b = np.arange(20, dtype=dtype).reshape(4, 5) + c = np.arange(30, dtype=dtype).reshape(5, 6) + if dtype != 'f2': + assert_equal(np.einsum("ij,jk,kl", a, b, c, optimize=do_opt), + a.dot(b).dot(c)) + assert_equal(np.einsum(a, [0, 1], b, [1, 2], c, [2, 3], + optimize=do_opt), a.dot(b).dot(c)) + + d = np.arange(18, dtype=dtype).reshape(3, 6) + np.einsum("ij,jk,kl", a, b, c, out=d, + dtype='f8', casting='unsafe', optimize=do_opt) + tgt = a.astype('f8').dot(b.astype('f8')) + tgt = tgt.dot(c.astype('f8')).astype(dtype) + assert_equal(d, tgt) + + d[...] = 0 + np.einsum(a, [0, 1], b, [1, 2], c, [2, 3], out=d, + dtype='f8', casting='unsafe', optimize=do_opt) + tgt = a.astype('f8').dot(b.astype('f8')) + tgt = tgt.dot(c.astype('f8')).astype(dtype) + assert_equal(d, tgt) + + # tensordot(a, b) + if np.dtype(dtype) != np.dtype('f2'): + a = np.arange(60, dtype=dtype).reshape(3, 4, 5) + b = np.arange(24, dtype=dtype).reshape(4, 3, 2) + assert_equal(np.einsum("ijk, jil -> kl", a, b), + np.tensordot(a, b, axes=([1, 0], [0, 1]))) + assert_equal(np.einsum(a, [0, 1, 2], b, [1, 0, 3], [2, 3]), + np.tensordot(a, b, axes=([1, 0], [0, 1]))) + + c = np.arange(10, dtype=dtype).reshape(5, 2) + np.einsum("ijk,jil->kl", a, b, out=c, + dtype='f8', casting='unsafe', optimize=do_opt) + assert_equal(c, np.tensordot(a.astype('f8'), b.astype('f8'), + axes=([1, 0], [0, 1])).astype(dtype)) + c[...] = 0 + np.einsum(a, [0, 1, 2], b, [1, 0, 3], [2, 3], out=c, + dtype='f8', casting='unsafe', optimize=do_opt) + assert_equal(c, np.tensordot(a.astype('f8'), b.astype('f8'), + axes=([1, 0], [0, 1])).astype(dtype)) + + # logical_and(logical_and(a!=0, b!=0), c!=0) + a = np.array([1, 3, -2, 0, 12, 13, 0, 1], dtype=dtype) + b = np.array([0, 3.5, 0., -2, 0, 1, 3, 12], dtype=dtype) + c = np.array([True, True, False, True, True, False, True, True]) + assert_equal(np.einsum("i,i,i->i", a, b, c, + dtype='?', casting='unsafe', optimize=do_opt), + np.logical_and(np.logical_and(a != 0, b != 0), c != 0)) + assert_equal(np.einsum(a, [0], b, [0], c, [0], [0], + dtype='?', casting='unsafe'), + np.logical_and(np.logical_and(a != 0, b != 0), c != 0)) + + a = np.arange(9, dtype=dtype) + assert_equal(np.einsum(",i->", 3, a), 3*np.sum(a)) + assert_equal(np.einsum(3, [], a, [0], []), 3*np.sum(a)) + assert_equal(np.einsum("i,->", a, 3), 3*np.sum(a)) + assert_equal(np.einsum(a, [0], 3, [], []), 3*np.sum(a)) + + # Various stride0, contiguous, and SSE aligned variants + for n in range(1, 25): + a = np.arange(n, dtype=dtype) + if np.dtype(dtype).itemsize > 1: + assert_equal(np.einsum("...,...", a, a, optimize=do_opt), + np.multiply(a, a)) + assert_equal(np.einsum("i,i", a, a, optimize=do_opt), np.dot(a, a)) + assert_equal(np.einsum("i,->i", a, 2, optimize=do_opt), 2*a) + assert_equal(np.einsum(",i->i", 2, a, optimize=do_opt), 2*a) + assert_equal(np.einsum("i,->", a, 2, optimize=do_opt), 2*np.sum(a)) + assert_equal(np.einsum(",i->", 2, a, optimize=do_opt), 2*np.sum(a)) + + assert_equal(np.einsum("...,...", a[1:], a[:-1], optimize=do_opt), + np.multiply(a[1:], a[:-1])) + assert_equal(np.einsum("i,i", a[1:], a[:-1], optimize=do_opt), + np.dot(a[1:], a[:-1])) + assert_equal(np.einsum("i,->i", a[1:], 2, optimize=do_opt), 2*a[1:]) + assert_equal(np.einsum(",i->i", 2, a[1:], optimize=do_opt), 2*a[1:]) + assert_equal(np.einsum("i,->", a[1:], 2, optimize=do_opt), + 2*np.sum(a[1:])) + assert_equal(np.einsum(",i->", 2, a[1:], optimize=do_opt), + 2*np.sum(a[1:])) + + # An object array, summed as the data type + a = np.arange(9, dtype=object) + + b = np.einsum("i->", a, dtype=dtype, casting='unsafe') + assert_equal(b, np.sum(a)) + assert_equal(b.dtype, np.dtype(dtype)) + + b = np.einsum(a, [0], [], dtype=dtype, casting='unsafe') + assert_equal(b, np.sum(a)) + assert_equal(b.dtype, np.dtype(dtype)) + + # A case which was failing (ticket #1885) + p = np.arange(2) + 1 + q = np.arange(4).reshape(2, 2) + 3 + r = np.arange(4).reshape(2, 2) + 7 + assert_equal(np.einsum('z,mz,zm->', p, q, r), 253) + + # singleton dimensions broadcast (gh-10343) + p = np.ones((10,2)) + q = np.ones((1,2)) + assert_array_equal(np.einsum('ij,ij->j', p, q, optimize=True), + np.einsum('ij,ij->j', p, q, optimize=False)) + assert_array_equal(np.einsum('ij,ij->j', p, q, optimize=True), + [10.] * 2) + + # a blas-compatible contraction broadcasting case which was failing + # for optimize=True (ticket #10930) + x = np.array([2., 3.]) + y = np.array([4.]) + assert_array_equal(np.einsum("i, i", x, y, optimize=False), 20.) + assert_array_equal(np.einsum("i, i", x, y, optimize=True), 20.) + + # all-ones array was bypassing bug (ticket #10930) + p = np.ones((1, 5)) / 2 + q = np.ones((5, 5)) / 2 + for optimize in (True, False): + assert_array_equal(np.einsum("...ij,...jk->...ik", p, p, + optimize=optimize), + np.einsum("...ij,...jk->...ik", p, q, + optimize=optimize)) + assert_array_equal(np.einsum("...ij,...jk->...ik", p, q, + optimize=optimize), + np.full((1, 5), 1.25)) + + # Cases which were failing (gh-10899) + x = np.eye(2, dtype=dtype) + y = np.ones(2, dtype=dtype) + assert_array_equal(np.einsum("ji,i->", x, y, optimize=optimize), + [2.]) # contig_contig_outstride0_two + assert_array_equal(np.einsum("i,ij->", y, x, optimize=optimize), + [2.]) # stride0_contig_outstride0_two + assert_array_equal(np.einsum("ij,i->", x, y, optimize=optimize), + [2.]) # contig_stride0_outstride0_two + + def test_einsum_sums_int8(self): + self.check_einsum_sums('i1') + + def test_einsum_sums_uint8(self): + self.check_einsum_sums('u1') + + def test_einsum_sums_int16(self): + self.check_einsum_sums('i2') + + def test_einsum_sums_uint16(self): + self.check_einsum_sums('u2') + + def test_einsum_sums_int32(self): + self.check_einsum_sums('i4') + self.check_einsum_sums('i4', True) + + def test_einsum_sums_uint32(self): + self.check_einsum_sums('u4') + self.check_einsum_sums('u4', True) + + def test_einsum_sums_int64(self): + self.check_einsum_sums('i8') + + def test_einsum_sums_uint64(self): + self.check_einsum_sums('u8') + + def test_einsum_sums_float16(self): + self.check_einsum_sums('f2') + + def test_einsum_sums_float32(self): + self.check_einsum_sums('f4') + + def test_einsum_sums_float64(self): + self.check_einsum_sums('f8') + self.check_einsum_sums('f8', True) + + def test_einsum_sums_longdouble(self): + self.check_einsum_sums(np.longdouble) + + def test_einsum_sums_cfloat64(self): + self.check_einsum_sums('c8') + self.check_einsum_sums('c8', True) + + def test_einsum_sums_cfloat128(self): + self.check_einsum_sums('c16') + + def test_einsum_sums_clongdouble(self): + self.check_einsum_sums(np.clongdouble) + + def test_einsum_misc(self): + # This call used to crash because of a bug in + # PyArray_AssignZero + a = np.ones((1, 2)) + b = np.ones((2, 2, 1)) + assert_equal(np.einsum('ij...,j...->i...', a, b), [[[2], [2]]]) + assert_equal(np.einsum('ij...,j...->i...', a, b, optimize=True), [[[2], [2]]]) + + # Regression test for issue #10369 (test unicode inputs with Python 2) + assert_equal(np.einsum(u'ij...,j...->i...', a, b), [[[2], [2]]]) + assert_equal(np.einsum('...i,...i', [1, 2, 3], [2, 3, 4]), 20) + assert_equal(np.einsum(u'...i,...i', [1, 2, 3], [2, 3, 4]), 20) + assert_equal(np.einsum('...i,...i', [1, 2, 3], [2, 3, 4], + optimize=u'greedy'), 20) + + # The iterator had an issue with buffering this reduction + a = np.ones((5, 12, 4, 2, 3), np.int64) + b = np.ones((5, 12, 11), np.int64) + assert_equal(np.einsum('ijklm,ijn,ijn->', a, b, b), + np.einsum('ijklm,ijn->', a, b)) + assert_equal(np.einsum('ijklm,ijn,ijn->', a, b, b, optimize=True), + np.einsum('ijklm,ijn->', a, b, optimize=True)) + + # Issue #2027, was a problem in the contiguous 3-argument + # inner loop implementation + a = np.arange(1, 3) + b = np.arange(1, 5).reshape(2, 2) + c = np.arange(1, 9).reshape(4, 2) + assert_equal(np.einsum('x,yx,zx->xzy', a, b, c), + [[[1, 3], [3, 9], [5, 15], [7, 21]], + [[8, 16], [16, 32], [24, 48], [32, 64]]]) + assert_equal(np.einsum('x,yx,zx->xzy', a, b, c, optimize=True), + [[[1, 3], [3, 9], [5, 15], [7, 21]], + [[8, 16], [16, 32], [24, 48], [32, 64]]]) + + # Ensure explicitly setting out=None does not cause an error + # see issue gh-15776 and issue gh-15256 + assert_equal(np.einsum('i,j', [1], [2], out=None), [[2]]) + + def test_subscript_range(self): + # Issue #7741, make sure that all letters of Latin alphabet (both uppercase & lowercase) can be used + # when creating a subscript from arrays + a = np.ones((2, 3)) + b = np.ones((3, 4)) + np.einsum(a, [0, 20], b, [20, 2], [0, 2], optimize=False) + np.einsum(a, [0, 27], b, [27, 2], [0, 2], optimize=False) + np.einsum(a, [0, 51], b, [51, 2], [0, 2], optimize=False) + assert_raises(ValueError, lambda: np.einsum(a, [0, 52], b, [52, 2], [0, 2], optimize=False)) + assert_raises(ValueError, lambda: np.einsum(a, [-1, 5], b, [5, 2], [-1, 2], optimize=False)) + + def test_einsum_broadcast(self): + # Issue #2455 change in handling ellipsis + # remove the 'middle broadcast' error + # only use the 'RIGHT' iteration in prepare_op_axes + # adds auto broadcast on left where it belongs + # broadcast on right has to be explicit + # We need to test the optimized parsing as well + + A = np.arange(2 * 3 * 4).reshape(2, 3, 4) + B = np.arange(3) + ref = np.einsum('ijk,j->ijk', A, B, optimize=False) + for opt in [True, False]: + assert_equal(np.einsum('ij...,j...->ij...', A, B, optimize=opt), ref) + assert_equal(np.einsum('ij...,...j->ij...', A, B, optimize=opt), ref) + assert_equal(np.einsum('ij...,j->ij...', A, B, optimize=opt), ref) # used to raise error + + A = np.arange(12).reshape((4, 3)) + B = np.arange(6).reshape((3, 2)) + ref = np.einsum('ik,kj->ij', A, B, optimize=False) + for opt in [True, False]: + assert_equal(np.einsum('ik...,k...->i...', A, B, optimize=opt), ref) + assert_equal(np.einsum('ik...,...kj->i...j', A, B, optimize=opt), ref) + assert_equal(np.einsum('...k,kj', A, B, optimize=opt), ref) # used to raise error + assert_equal(np.einsum('ik,k...->i...', A, B, optimize=opt), ref) # used to raise error + + dims = [2, 3, 4, 5] + a = np.arange(np.prod(dims)).reshape(dims) + v = np.arange(dims[2]) + ref = np.einsum('ijkl,k->ijl', a, v, optimize=False) + for opt in [True, False]: + assert_equal(np.einsum('ijkl,k', a, v, optimize=opt), ref) + assert_equal(np.einsum('...kl,k', a, v, optimize=opt), ref) # used to raise error + assert_equal(np.einsum('...kl,k...', a, v, optimize=opt), ref) + + J, K, M = 160, 160, 120 + A = np.arange(J * K * M).reshape(1, 1, 1, J, K, M) + B = np.arange(J * K * M * 3).reshape(J, K, M, 3) + ref = np.einsum('...lmn,...lmno->...o', A, B, optimize=False) + for opt in [True, False]: + assert_equal(np.einsum('...lmn,lmno->...o', A, B, + optimize=opt), ref) # used to raise error + + def test_einsum_fixedstridebug(self): + # Issue #4485 obscure einsum bug + # This case revealed a bug in nditer where it reported a stride + # as 'fixed' (0) when it was in fact not fixed during processing + # (0 or 4). The reason for the bug was that the check for a fixed + # stride was using the information from the 2D inner loop reuse + # to restrict the iteration dimensions it had to validate to be + # the same, but that 2D inner loop reuse logic is only triggered + # during the buffer copying step, and hence it was invalid to + # rely on those values. The fix is to check all the dimensions + # of the stride in question, which in the test case reveals that + # the stride is not fixed. + # + # NOTE: This test is triggered by the fact that the default buffersize, + # used by einsum, is 8192, and 3*2731 = 8193, is larger than that + # and results in a mismatch between the buffering and the + # striding for operand A. + A = np.arange(2 * 3).reshape(2, 3).astype(np.float32) + B = np.arange(2 * 3 * 2731).reshape(2, 3, 2731).astype(np.int16) + es = np.einsum('cl, cpx->lpx', A, B) + tp = np.tensordot(A, B, axes=(0, 0)) + assert_equal(es, tp) + # The following is the original test case from the bug report, + # made repeatable by changing random arrays to aranges. + A = np.arange(3 * 3).reshape(3, 3).astype(np.float64) + B = np.arange(3 * 3 * 64 * 64).reshape(3, 3, 64, 64).astype(np.float32) + es = np.einsum('cl, cpxy->lpxy', A, B) + tp = np.tensordot(A, B, axes=(0, 0)) + assert_equal(es, tp) + + def test_einsum_fixed_collapsingbug(self): + # Issue #5147. + # The bug only occurred when output argument of einssum was used. + x = np.random.normal(0, 1, (5, 5, 5, 5)) + y1 = np.zeros((5, 5)) + np.einsum('aabb->ab', x, out=y1) + idx = np.arange(5) + y2 = x[idx[:, None], idx[:, None], idx, idx] + assert_equal(y1, y2) + + def test_einsum_failed_on_p9_and_s390x(self): + # Issues gh-14692 and gh-12689 + # Bug with signed vs unsigned char errored on power9 and s390x Linux + tensor = np.random.random_sample((10, 10, 10, 10)) + x = np.einsum('ijij->', tensor) + y = tensor.trace(axis1=0, axis2=2).trace() + assert_allclose(x, y) + + def test_einsum_all_contig_non_contig_output(self): + # Issue gh-5907, tests that the all contiguous special case + # actually checks the contiguity of the output + x = np.ones((5, 5)) + out = np.ones(10)[::2] + correct_base = np.ones(10) + correct_base[::2] = 5 + # Always worked (inner iteration is done with 0-stride): + np.einsum('mi,mi,mi->m', x, x, x, out=out) + assert_array_equal(out.base, correct_base) + # Example 1: + out = np.ones(10)[::2] + np.einsum('im,im,im->m', x, x, x, out=out) + assert_array_equal(out.base, correct_base) + # Example 2, buffering causes x to be contiguous but + # special cases do not catch the operation before: + out = np.ones((2, 2, 2))[..., 0] + correct_base = np.ones((2, 2, 2)) + correct_base[..., 0] = 2 + x = np.ones((2, 2), np.float32) + np.einsum('ij,jk->ik', x, x, out=out) + assert_array_equal(out.base, correct_base) + + @pytest.mark.parametrize("dtype", + np.typecodes["AllFloat"] + np.typecodes["AllInteger"]) + def test_different_paths(self, dtype): + # Test originally added to cover broken float16 path: gh-20305 + # Likely most are covered elsewhere, at least partially. + dtype = np.dtype(dtype) + # Simple test, designed to excersize most specialized code paths, + # note the +0.5 for floats. This makes sure we use a float value + # where the results must be exact. + arr = (np.arange(7) + 0.5).astype(dtype) + scalar = np.array(2, dtype=dtype) + + # contig -> scalar: + res = np.einsum('i->', arr) + assert res == arr.sum() + # contig, contig -> contig: + res = np.einsum('i,i->i', arr, arr) + assert_array_equal(res, arr * arr) + # noncontig, noncontig -> contig: + res = np.einsum('i,i->i', arr.repeat(2)[::2], arr.repeat(2)[::2]) + assert_array_equal(res, arr * arr) + # contig + contig -> scalar + assert np.einsum('i,i->', arr, arr) == (arr * arr).sum() + # contig + scalar -> contig (with out) + out = np.ones(7, dtype=dtype) + res = np.einsum('i,->i', arr, dtype.type(2), out=out) + assert_array_equal(res, arr * dtype.type(2)) + # scalar + contig -> contig (with out) + res = np.einsum(',i->i', scalar, arr) + assert_array_equal(res, arr * dtype.type(2)) + # scalar + contig -> scalar + res = np.einsum(',i->', scalar, arr) + # Use einsum to compare to not have difference due to sum round-offs: + assert res == np.einsum('i->', scalar * arr) + # contig + scalar -> scalar + res = np.einsum('i,->', arr, scalar) + # Use einsum to compare to not have difference due to sum round-offs: + assert res == np.einsum('i->', scalar * arr) + # contig + contig + contig -> scalar + arr = np.array([0.5, 0.5, 0.25, 4.5, 3.], dtype=dtype) + res = np.einsum('i,i,i->', arr, arr, arr) + assert_array_equal(res, (arr * arr * arr).sum()) + # four arrays: + res = np.einsum('i,i,i,i->', arr, arr, arr, arr) + assert_array_equal(res, (arr * arr * arr * arr).sum()) + + def test_small_boolean_arrays(self): + # See gh-5946. + # Use array of True embedded in False. + a = np.zeros((16, 1, 1), dtype=np.bool_)[:2] + a[...] = True + out = np.zeros((16, 1, 1), dtype=np.bool_)[:2] + tgt = np.ones((2, 1, 1), dtype=np.bool_) + res = np.einsum('...ij,...jk->...ik', a, a, out=out) + assert_equal(res, tgt) + + def test_out_is_res(self): + a = np.arange(9).reshape(3, 3) + res = np.einsum('...ij,...jk->...ik', a, a, out=a) + assert res is a + + def optimize_compare(self, subscripts, operands=None): + # Tests all paths of the optimization function against + # conventional einsum + if operands is None: + args = [subscripts] + terms = subscripts.split('->')[0].split(',') + for term in terms: + dims = [global_size_dict[x] for x in term] + args.append(np.random.rand(*dims)) + else: + args = [subscripts] + operands + + noopt = np.einsum(*args, optimize=False) + opt = np.einsum(*args, optimize='greedy') + assert_almost_equal(opt, noopt) + opt = np.einsum(*args, optimize='optimal') + assert_almost_equal(opt, noopt) + + def test_hadamard_like_products(self): + # Hadamard outer products + self.optimize_compare('a,ab,abc->abc') + self.optimize_compare('a,b,ab->ab') + + def test_index_transformations(self): + # Simple index transformation cases + self.optimize_compare('ea,fb,gc,hd,abcd->efgh') + self.optimize_compare('ea,fb,abcd,gc,hd->efgh') + self.optimize_compare('abcd,ea,fb,gc,hd->efgh') + + def test_complex(self): + # Long test cases + self.optimize_compare('acdf,jbje,gihb,hfac,gfac,gifabc,hfac') + self.optimize_compare('acdf,jbje,gihb,hfac,gfac,gifabc,hfac') + self.optimize_compare('cd,bdhe,aidb,hgca,gc,hgibcd,hgac') + self.optimize_compare('abhe,hidj,jgba,hiab,gab') + self.optimize_compare('bde,cdh,agdb,hica,ibd,hgicd,hiac') + self.optimize_compare('chd,bde,agbc,hiad,hgc,hgi,hiad') + self.optimize_compare('chd,bde,agbc,hiad,bdi,cgh,agdb') + self.optimize_compare('bdhe,acad,hiab,agac,hibd') + + def test_collapse(self): + # Inner products + self.optimize_compare('ab,ab,c->') + self.optimize_compare('ab,ab,c->c') + self.optimize_compare('ab,ab,cd,cd->') + self.optimize_compare('ab,ab,cd,cd->ac') + self.optimize_compare('ab,ab,cd,cd->cd') + self.optimize_compare('ab,ab,cd,cd,ef,ef->') + + def test_expand(self): + # Outer products + self.optimize_compare('ab,cd,ef->abcdef') + self.optimize_compare('ab,cd,ef->acdf') + self.optimize_compare('ab,cd,de->abcde') + self.optimize_compare('ab,cd,de->be') + self.optimize_compare('ab,bcd,cd->abcd') + self.optimize_compare('ab,bcd,cd->abd') + + def test_edge_cases(self): + # Difficult edge cases for optimization + self.optimize_compare('eb,cb,fb->cef') + self.optimize_compare('dd,fb,be,cdb->cef') + self.optimize_compare('bca,cdb,dbf,afc->') + self.optimize_compare('dcc,fce,ea,dbf->ab') + self.optimize_compare('fdf,cdd,ccd,afe->ae') + self.optimize_compare('abcd,ad') + self.optimize_compare('ed,fcd,ff,bcf->be') + self.optimize_compare('baa,dcf,af,cde->be') + self.optimize_compare('bd,db,eac->ace') + self.optimize_compare('fff,fae,bef,def->abd') + self.optimize_compare('efc,dbc,acf,fd->abe') + self.optimize_compare('ba,ac,da->bcd') + + def test_inner_product(self): + # Inner products + self.optimize_compare('ab,ab') + self.optimize_compare('ab,ba') + self.optimize_compare('abc,abc') + self.optimize_compare('abc,bac') + self.optimize_compare('abc,cba') + + def test_random_cases(self): + # Randomly built test cases + self.optimize_compare('aab,fa,df,ecc->bde') + self.optimize_compare('ecb,fef,bad,ed->ac') + self.optimize_compare('bcf,bbb,fbf,fc->') + self.optimize_compare('bb,ff,be->e') + self.optimize_compare('bcb,bb,fc,fff->') + self.optimize_compare('fbb,dfd,fc,fc->') + self.optimize_compare('afd,ba,cc,dc->bf') + self.optimize_compare('adb,bc,fa,cfc->d') + self.optimize_compare('bbd,bda,fc,db->acf') + self.optimize_compare('dba,ead,cad->bce') + self.optimize_compare('aef,fbc,dca->bde') + + def test_combined_views_mapping(self): + # gh-10792 + a = np.arange(9).reshape(1, 1, 3, 1, 3) + b = np.einsum('bbcdc->d', a) + assert_equal(b, [12]) + + def test_broadcasting_dot_cases(self): + # Ensures broadcasting cases are not mistaken for GEMM + + a = np.random.rand(1, 5, 4) + b = np.random.rand(4, 6) + c = np.random.rand(5, 6) + d = np.random.rand(10) + + self.optimize_compare('ijk,kl,jl', operands=[a, b, c]) + self.optimize_compare('ijk,kl,jl,i->i', operands=[a, b, c, d]) + + e = np.random.rand(1, 1, 5, 4) + f = np.random.rand(7, 7) + self.optimize_compare('abjk,kl,jl', operands=[e, b, c]) + self.optimize_compare('abjk,kl,jl,ab->ab', operands=[e, b, c, f]) + + # Edge case found in gh-11308 + g = np.arange(64).reshape(2, 4, 8) + self.optimize_compare('obk,ijk->ioj', operands=[g, g]) + + def test_output_order(self): + # Ensure output order is respected for optimize cases, the below + # conraction should yield a reshaped tensor view + # gh-16415 + + a = np.ones((2, 3, 5), order='F') + b = np.ones((4, 3), order='F') + + for opt in [True, False]: + tmp = np.einsum('...ft,mf->...mt', a, b, order='a', optimize=opt) + assert_(tmp.flags.f_contiguous) + + tmp = np.einsum('...ft,mf->...mt', a, b, order='f', optimize=opt) + assert_(tmp.flags.f_contiguous) + + tmp = np.einsum('...ft,mf->...mt', a, b, order='c', optimize=opt) + assert_(tmp.flags.c_contiguous) + + tmp = np.einsum('...ft,mf->...mt', a, b, order='k', optimize=opt) + assert_(tmp.flags.c_contiguous is False) + assert_(tmp.flags.f_contiguous is False) + + tmp = np.einsum('...ft,mf->...mt', a, b, optimize=opt) + assert_(tmp.flags.c_contiguous is False) + assert_(tmp.flags.f_contiguous is False) + + c = np.ones((4, 3), order='C') + for opt in [True, False]: + tmp = np.einsum('...ft,mf->...mt', a, c, order='a', optimize=opt) + assert_(tmp.flags.c_contiguous) + + d = np.ones((2, 3, 5), order='C') + for opt in [True, False]: + tmp = np.einsum('...ft,mf->...mt', d, c, order='a', optimize=opt) + assert_(tmp.flags.c_contiguous) + +class TestEinsumPath: + def build_operands(self, string, size_dict=global_size_dict): + + # Builds views based off initial operands + operands = [string] + terms = string.split('->')[0].split(',') + for term in terms: + dims = [size_dict[x] for x in term] + operands.append(np.random.rand(*dims)) + + return operands + + def assert_path_equal(self, comp, benchmark): + # Checks if list of tuples are equivalent + ret = (len(comp) == len(benchmark)) + assert_(ret) + for pos in range(len(comp) - 1): + ret &= isinstance(comp[pos + 1], tuple) + ret &= (comp[pos + 1] == benchmark[pos + 1]) + assert_(ret) + + def test_memory_contraints(self): + # Ensure memory constraints are satisfied + + outer_test = self.build_operands('a,b,c->abc') + + path, path_str = np.einsum_path(*outer_test, optimize=('greedy', 0)) + self.assert_path_equal(path, ['einsum_path', (0, 1, 2)]) + + path, path_str = np.einsum_path(*outer_test, optimize=('optimal', 0)) + self.assert_path_equal(path, ['einsum_path', (0, 1, 2)]) + + long_test = self.build_operands('acdf,jbje,gihb,hfac') + path, path_str = np.einsum_path(*long_test, optimize=('greedy', 0)) + self.assert_path_equal(path, ['einsum_path', (0, 1, 2, 3)]) + + path, path_str = np.einsum_path(*long_test, optimize=('optimal', 0)) + self.assert_path_equal(path, ['einsum_path', (0, 1, 2, 3)]) + + def test_long_paths(self): + # Long complex cases + + # Long test 1 + long_test1 = self.build_operands('acdf,jbje,gihb,hfac,gfac,gifabc,hfac') + path, path_str = np.einsum_path(*long_test1, optimize='greedy') + self.assert_path_equal(path, ['einsum_path', + (3, 6), (3, 4), (2, 4), (2, 3), (0, 2), (0, 1)]) + + path, path_str = np.einsum_path(*long_test1, optimize='optimal') + self.assert_path_equal(path, ['einsum_path', + (3, 6), (3, 4), (2, 4), (2, 3), (0, 2), (0, 1)]) + + # Long test 2 + long_test2 = self.build_operands('chd,bde,agbc,hiad,bdi,cgh,agdb') + path, path_str = np.einsum_path(*long_test2, optimize='greedy') + print(path) + self.assert_path_equal(path, ['einsum_path', + (3, 4), (0, 3), (3, 4), (1, 3), (1, 2), (0, 1)]) + + path, path_str = np.einsum_path(*long_test2, optimize='optimal') + print(path) + self.assert_path_equal(path, ['einsum_path', + (0, 5), (1, 4), (3, 4), (1, 3), (1, 2), (0, 1)]) + + def test_edge_paths(self): + # Difficult edge cases + + # Edge test1 + edge_test1 = self.build_operands('eb,cb,fb->cef') + path, path_str = np.einsum_path(*edge_test1, optimize='greedy') + self.assert_path_equal(path, ['einsum_path', (0, 2), (0, 1)]) + + path, path_str = np.einsum_path(*edge_test1, optimize='optimal') + self.assert_path_equal(path, ['einsum_path', (0, 2), (0, 1)]) + + # Edge test2 + edge_test2 = self.build_operands('dd,fb,be,cdb->cef') + path, path_str = np.einsum_path(*edge_test2, optimize='greedy') + self.assert_path_equal(path, ['einsum_path', (0, 3), (0, 1), (0, 1)]) + + path, path_str = np.einsum_path(*edge_test2, optimize='optimal') + self.assert_path_equal(path, ['einsum_path', (0, 3), (0, 1), (0, 1)]) + + # Edge test3 + edge_test3 = self.build_operands('bca,cdb,dbf,afc->') + path, path_str = np.einsum_path(*edge_test3, optimize='greedy') + self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 2), (0, 1)]) + + path, path_str = np.einsum_path(*edge_test3, optimize='optimal') + self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 2), (0, 1)]) + + # Edge test4 + edge_test4 = self.build_operands('dcc,fce,ea,dbf->ab') + path, path_str = np.einsum_path(*edge_test4, optimize='greedy') + self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 1), (0, 1)]) + + path, path_str = np.einsum_path(*edge_test4, optimize='optimal') + self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 2), (0, 1)]) + + # Edge test5 + edge_test4 = self.build_operands('a,ac,ab,ad,cd,bd,bc->', + size_dict={"a": 20, "b": 20, "c": 20, "d": 20}) + path, path_str = np.einsum_path(*edge_test4, optimize='greedy') + self.assert_path_equal(path, ['einsum_path', (0, 1), (0, 1, 2, 3, 4, 5)]) + + path, path_str = np.einsum_path(*edge_test4, optimize='optimal') + self.assert_path_equal(path, ['einsum_path', (0, 1), (0, 1, 2, 3, 4, 5)]) + + def test_path_type_input(self): + # Test explicit path handling + path_test = self.build_operands('dcc,fce,ea,dbf->ab') + + path, path_str = np.einsum_path(*path_test, optimize=False) + self.assert_path_equal(path, ['einsum_path', (0, 1, 2, 3)]) + + path, path_str = np.einsum_path(*path_test, optimize=True) + self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 1), (0, 1)]) + + exp_path = ['einsum_path', (0, 2), (0, 2), (0, 1)] + path, path_str = np.einsum_path(*path_test, optimize=exp_path) + self.assert_path_equal(path, exp_path) + + # Double check einsum works on the input path + noopt = np.einsum(*path_test, optimize=False) + opt = np.einsum(*path_test, optimize=exp_path) + assert_almost_equal(noopt, opt) + + def test_spaces(self): + #gh-10794 + arr = np.array([[1]]) + for sp in itertools.product(['', ' '], repeat=4): + # no error for any spacing + np.einsum('{}...a{}->{}...a{}'.format(*sp), arr) + +def test_overlap(): + a = np.arange(9, dtype=int).reshape(3, 3) + b = np.arange(9, dtype=int).reshape(3, 3) + d = np.dot(a, b) + # sanity check + c = np.einsum('ij,jk->ik', a, b) + assert_equal(c, d) + #gh-10080, out overlaps one of the operands + c = np.einsum('ij,jk->ik', a, b, out=b) + assert_equal(c, d) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_errstate.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_errstate.py new file mode 100644 index 0000000..184a373 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_errstate.py @@ -0,0 +1,59 @@ +import pytest +import sysconfig + +import numpy as np +from numpy.testing import assert_, assert_raises + +# The floating point emulation on ARM EABI systems lacking a hardware FPU is +# known to be buggy. This is an attempt to identify these hosts. It may not +# catch all possible cases, but it catches the known cases of gh-413 and +# gh-15562. +hosttype = sysconfig.get_config_var('HOST_GNU_TYPE') +arm_softfloat = False if hosttype is None else hosttype.endswith('gnueabi') + +class TestErrstate: + @pytest.mark.skipif(arm_softfloat, + reason='platform/cpu issue with FPU (gh-413,-15562)') + def test_invalid(self): + with np.errstate(all='raise', under='ignore'): + a = -np.arange(3) + # This should work + with np.errstate(invalid='ignore'): + np.sqrt(a) + # While this should fail! + with assert_raises(FloatingPointError): + np.sqrt(a) + + @pytest.mark.skipif(arm_softfloat, + reason='platform/cpu issue with FPU (gh-15562)') + def test_divide(self): + with np.errstate(all='raise', under='ignore'): + a = -np.arange(3) + # This should work + with np.errstate(divide='ignore'): + a // 0 + # While this should fail! + with assert_raises(FloatingPointError): + a // 0 + # As should this, see gh-15562 + with assert_raises(FloatingPointError): + a // a + + def test_errcall(self): + def foo(*args): + print(args) + + olderrcall = np.geterrcall() + with np.errstate(call=foo): + assert_(np.geterrcall() is foo, 'call is not foo') + with np.errstate(call=None): + assert_(np.geterrcall() is None, 'call is not None') + assert_(np.geterrcall() is olderrcall, 'call is not olderrcall') + + def test_errstate_decorator(self): + @np.errstate(all='ignore') + def foo(): + a = -np.arange(3) + a // 0 + + foo() diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_extint128.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_extint128.py new file mode 100644 index 0000000..3b64915 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_extint128.py @@ -0,0 +1,219 @@ +import itertools +import contextlib +import operator +import pytest + +import numpy as np +import numpy.core._multiarray_tests as mt + +from numpy.testing import assert_raises, assert_equal + + +INT64_MAX = np.iinfo(np.int64).max +INT64_MIN = np.iinfo(np.int64).min +INT64_MID = 2**32 + +# int128 is not two's complement, the sign bit is separate +INT128_MAX = 2**128 - 1 +INT128_MIN = -INT128_MAX +INT128_MID = 2**64 + +INT64_VALUES = ( + [INT64_MIN + j for j in range(20)] + + [INT64_MAX - j for j in range(20)] + + [INT64_MID + j for j in range(-20, 20)] + + [2*INT64_MID + j for j in range(-20, 20)] + + [INT64_MID//2 + j for j in range(-20, 20)] + + list(range(-70, 70)) +) + +INT128_VALUES = ( + [INT128_MIN + j for j in range(20)] + + [INT128_MAX - j for j in range(20)] + + [INT128_MID + j for j in range(-20, 20)] + + [2*INT128_MID + j for j in range(-20, 20)] + + [INT128_MID//2 + j for j in range(-20, 20)] + + list(range(-70, 70)) + + [False] # negative zero +) + +INT64_POS_VALUES = [x for x in INT64_VALUES if x > 0] + + +@contextlib.contextmanager +def exc_iter(*args): + """ + Iterate over Cartesian product of *args, and if an exception is raised, + add information of the current iterate. + """ + + value = [None] + + def iterate(): + for v in itertools.product(*args): + value[0] = v + yield v + + try: + yield iterate() + except Exception: + import traceback + msg = "At: %r\n%s" % (repr(value[0]), + traceback.format_exc()) + raise AssertionError(msg) + + +def test_safe_binop(): + # Test checked arithmetic routines + + ops = [ + (operator.add, 1), + (operator.sub, 2), + (operator.mul, 3) + ] + + with exc_iter(ops, INT64_VALUES, INT64_VALUES) as it: + for xop, a, b in it: + pyop, op = xop + c = pyop(a, b) + + if not (INT64_MIN <= c <= INT64_MAX): + assert_raises(OverflowError, mt.extint_safe_binop, a, b, op) + else: + d = mt.extint_safe_binop(a, b, op) + if c != d: + # assert_equal is slow + assert_equal(d, c) + + +def test_to_128(): + with exc_iter(INT64_VALUES) as it: + for a, in it: + b = mt.extint_to_128(a) + if a != b: + assert_equal(b, a) + + +def test_to_64(): + with exc_iter(INT128_VALUES) as it: + for a, in it: + if not (INT64_MIN <= a <= INT64_MAX): + assert_raises(OverflowError, mt.extint_to_64, a) + else: + b = mt.extint_to_64(a) + if a != b: + assert_equal(b, a) + + +def test_mul_64_64(): + with exc_iter(INT64_VALUES, INT64_VALUES) as it: + for a, b in it: + c = a * b + d = mt.extint_mul_64_64(a, b) + if c != d: + assert_equal(d, c) + + +def test_add_128(): + with exc_iter(INT128_VALUES, INT128_VALUES) as it: + for a, b in it: + c = a + b + if not (INT128_MIN <= c <= INT128_MAX): + assert_raises(OverflowError, mt.extint_add_128, a, b) + else: + d = mt.extint_add_128(a, b) + if c != d: + assert_equal(d, c) + + +def test_sub_128(): + with exc_iter(INT128_VALUES, INT128_VALUES) as it: + for a, b in it: + c = a - b + if not (INT128_MIN <= c <= INT128_MAX): + assert_raises(OverflowError, mt.extint_sub_128, a, b) + else: + d = mt.extint_sub_128(a, b) + if c != d: + assert_equal(d, c) + + +def test_neg_128(): + with exc_iter(INT128_VALUES) as it: + for a, in it: + b = -a + c = mt.extint_neg_128(a) + if b != c: + assert_equal(c, b) + + +def test_shl_128(): + with exc_iter(INT128_VALUES) as it: + for a, in it: + if a < 0: + b = -(((-a) << 1) & (2**128-1)) + else: + b = (a << 1) & (2**128-1) + c = mt.extint_shl_128(a) + if b != c: + assert_equal(c, b) + + +def test_shr_128(): + with exc_iter(INT128_VALUES) as it: + for a, in it: + if a < 0: + b = -((-a) >> 1) + else: + b = a >> 1 + c = mt.extint_shr_128(a) + if b != c: + assert_equal(c, b) + + +def test_gt_128(): + with exc_iter(INT128_VALUES, INT128_VALUES) as it: + for a, b in it: + c = a > b + d = mt.extint_gt_128(a, b) + if c != d: + assert_equal(d, c) + + +@pytest.mark.slow +def test_divmod_128_64(): + with exc_iter(INT128_VALUES, INT64_POS_VALUES) as it: + for a, b in it: + if a >= 0: + c, cr = divmod(a, b) + else: + c, cr = divmod(-a, b) + c = -c + cr = -cr + + d, dr = mt.extint_divmod_128_64(a, b) + + if c != d or d != dr or b*d + dr != a: + assert_equal(d, c) + assert_equal(dr, cr) + assert_equal(b*d + dr, a) + + +def test_floordiv_128_64(): + with exc_iter(INT128_VALUES, INT64_POS_VALUES) as it: + for a, b in it: + c = a // b + d = mt.extint_floordiv_128_64(a, b) + + if c != d: + assert_equal(d, c) + + +def test_ceildiv_128_64(): + with exc_iter(INT128_VALUES, INT64_POS_VALUES) as it: + for a, b in it: + c = (a + b - 1) // b + d = mt.extint_ceildiv_128_64(a, b) + + if c != d: + assert_equal(d, c) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_function_base.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_function_base.py new file mode 100644 index 0000000..dad7a58 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_function_base.py @@ -0,0 +1,409 @@ +from numpy import ( + logspace, linspace, geomspace, dtype, array, sctypes, arange, isnan, + ndarray, sqrt, nextafter, stack, errstate + ) +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_array_equal, assert_allclose, + ) + + +class PhysicalQuantity(float): + def __new__(cls, value): + return float.__new__(cls, value) + + def __add__(self, x): + assert_(isinstance(x, PhysicalQuantity)) + return PhysicalQuantity(float(x) + float(self)) + __radd__ = __add__ + + def __sub__(self, x): + assert_(isinstance(x, PhysicalQuantity)) + return PhysicalQuantity(float(self) - float(x)) + + def __rsub__(self, x): + assert_(isinstance(x, PhysicalQuantity)) + return PhysicalQuantity(float(x) - float(self)) + + def __mul__(self, x): + return PhysicalQuantity(float(x) * float(self)) + __rmul__ = __mul__ + + def __div__(self, x): + return PhysicalQuantity(float(self) / float(x)) + + def __rdiv__(self, x): + return PhysicalQuantity(float(x) / float(self)) + + +class PhysicalQuantity2(ndarray): + __array_priority__ = 10 + + +class TestLogspace: + + def test_basic(self): + y = logspace(0, 6) + assert_(len(y) == 50) + y = logspace(0, 6, num=100) + assert_(y[-1] == 10 ** 6) + y = logspace(0, 6, endpoint=False) + assert_(y[-1] < 10 ** 6) + y = logspace(0, 6, num=7) + assert_array_equal(y, [1, 10, 100, 1e3, 1e4, 1e5, 1e6]) + + def test_start_stop_array(self): + start = array([0., 1.]) + stop = array([6., 7.]) + t1 = logspace(start, stop, 6) + t2 = stack([logspace(_start, _stop, 6) + for _start, _stop in zip(start, stop)], axis=1) + assert_equal(t1, t2) + t3 = logspace(start, stop[0], 6) + t4 = stack([logspace(_start, stop[0], 6) + for _start in start], axis=1) + assert_equal(t3, t4) + t5 = logspace(start, stop, 6, axis=-1) + assert_equal(t5, t2.T) + + def test_dtype(self): + y = logspace(0, 6, dtype='float32') + assert_equal(y.dtype, dtype('float32')) + y = logspace(0, 6, dtype='float64') + assert_equal(y.dtype, dtype('float64')) + y = logspace(0, 6, dtype='int32') + assert_equal(y.dtype, dtype('int32')) + + def test_physical_quantities(self): + a = PhysicalQuantity(1.0) + b = PhysicalQuantity(5.0) + assert_equal(logspace(a, b), logspace(1.0, 5.0)) + + def test_subclass(self): + a = array(1).view(PhysicalQuantity2) + b = array(7).view(PhysicalQuantity2) + ls = logspace(a, b) + assert type(ls) is PhysicalQuantity2 + assert_equal(ls, logspace(1.0, 7.0)) + ls = logspace(a, b, 1) + assert type(ls) is PhysicalQuantity2 + assert_equal(ls, logspace(1.0, 7.0, 1)) + + +class TestGeomspace: + + def test_basic(self): + y = geomspace(1, 1e6) + assert_(len(y) == 50) + y = geomspace(1, 1e6, num=100) + assert_(y[-1] == 10 ** 6) + y = geomspace(1, 1e6, endpoint=False) + assert_(y[-1] < 10 ** 6) + y = geomspace(1, 1e6, num=7) + assert_array_equal(y, [1, 10, 100, 1e3, 1e4, 1e5, 1e6]) + + y = geomspace(8, 2, num=3) + assert_allclose(y, [8, 4, 2]) + assert_array_equal(y.imag, 0) + + y = geomspace(-1, -100, num=3) + assert_array_equal(y, [-1, -10, -100]) + assert_array_equal(y.imag, 0) + + y = geomspace(-100, -1, num=3) + assert_array_equal(y, [-100, -10, -1]) + assert_array_equal(y.imag, 0) + + def test_boundaries_match_start_and_stop_exactly(self): + # make sure that the boundaries of the returned array exactly + # equal 'start' and 'stop' - this isn't obvious because + # np.exp(np.log(x)) isn't necessarily exactly equal to x + start = 0.3 + stop = 20.3 + + y = geomspace(start, stop, num=1) + assert_equal(y[0], start) + + y = geomspace(start, stop, num=1, endpoint=False) + assert_equal(y[0], start) + + y = geomspace(start, stop, num=3) + assert_equal(y[0], start) + assert_equal(y[-1], stop) + + y = geomspace(start, stop, num=3, endpoint=False) + assert_equal(y[0], start) + + def test_nan_interior(self): + with errstate(invalid='ignore'): + y = geomspace(-3, 3, num=4) + + assert_equal(y[0], -3.0) + assert_(isnan(y[1:-1]).all()) + assert_equal(y[3], 3.0) + + with errstate(invalid='ignore'): + y = geomspace(-3, 3, num=4, endpoint=False) + + assert_equal(y[0], -3.0) + assert_(isnan(y[1:]).all()) + + def test_complex(self): + # Purely imaginary + y = geomspace(1j, 16j, num=5) + assert_allclose(y, [1j, 2j, 4j, 8j, 16j]) + assert_array_equal(y.real, 0) + + y = geomspace(-4j, -324j, num=5) + assert_allclose(y, [-4j, -12j, -36j, -108j, -324j]) + assert_array_equal(y.real, 0) + + y = geomspace(1+1j, 1000+1000j, num=4) + assert_allclose(y, [1+1j, 10+10j, 100+100j, 1000+1000j]) + + y = geomspace(-1+1j, -1000+1000j, num=4) + assert_allclose(y, [-1+1j, -10+10j, -100+100j, -1000+1000j]) + + # Logarithmic spirals + y = geomspace(-1, 1, num=3, dtype=complex) + assert_allclose(y, [-1, 1j, +1]) + + y = geomspace(0+3j, -3+0j, 3) + assert_allclose(y, [0+3j, -3/sqrt(2)+3j/sqrt(2), -3+0j]) + y = geomspace(0+3j, 3+0j, 3) + assert_allclose(y, [0+3j, 3/sqrt(2)+3j/sqrt(2), 3+0j]) + y = geomspace(-3+0j, 0-3j, 3) + assert_allclose(y, [-3+0j, -3/sqrt(2)-3j/sqrt(2), 0-3j]) + y = geomspace(0+3j, -3+0j, 3) + assert_allclose(y, [0+3j, -3/sqrt(2)+3j/sqrt(2), -3+0j]) + y = geomspace(-2-3j, 5+7j, 7) + assert_allclose(y, [-2-3j, -0.29058977-4.15771027j, + 2.08885354-4.34146838j, 4.58345529-3.16355218j, + 6.41401745-0.55233457j, 6.75707386+3.11795092j, + 5+7j]) + + # Type promotion should prevent the -5 from becoming a NaN + y = geomspace(3j, -5, 2) + assert_allclose(y, [3j, -5]) + y = geomspace(-5, 3j, 2) + assert_allclose(y, [-5, 3j]) + + def test_dtype(self): + y = geomspace(1, 1e6, dtype='float32') + assert_equal(y.dtype, dtype('float32')) + y = geomspace(1, 1e6, dtype='float64') + assert_equal(y.dtype, dtype('float64')) + y = geomspace(1, 1e6, dtype='int32') + assert_equal(y.dtype, dtype('int32')) + + # Native types + y = geomspace(1, 1e6, dtype=float) + assert_equal(y.dtype, dtype('float_')) + y = geomspace(1, 1e6, dtype=complex) + assert_equal(y.dtype, dtype('complex')) + + def test_start_stop_array_scalar(self): + lim1 = array([120, 100], dtype="int8") + lim2 = array([-120, -100], dtype="int8") + lim3 = array([1200, 1000], dtype="uint16") + t1 = geomspace(lim1[0], lim1[1], 5) + t2 = geomspace(lim2[0], lim2[1], 5) + t3 = geomspace(lim3[0], lim3[1], 5) + t4 = geomspace(120.0, 100.0, 5) + t5 = geomspace(-120.0, -100.0, 5) + t6 = geomspace(1200.0, 1000.0, 5) + + # t3 uses float32, t6 uses float64 + assert_allclose(t1, t4, rtol=1e-2) + assert_allclose(t2, t5, rtol=1e-2) + assert_allclose(t3, t6, rtol=1e-5) + + def test_start_stop_array(self): + # Try to use all special cases. + start = array([1.e0, 32., 1j, -4j, 1+1j, -1]) + stop = array([1.e4, 2., 16j, -324j, 10000+10000j, 1]) + t1 = geomspace(start, stop, 5) + t2 = stack([geomspace(_start, _stop, 5) + for _start, _stop in zip(start, stop)], axis=1) + assert_equal(t1, t2) + t3 = geomspace(start, stop[0], 5) + t4 = stack([geomspace(_start, stop[0], 5) + for _start in start], axis=1) + assert_equal(t3, t4) + t5 = geomspace(start, stop, 5, axis=-1) + assert_equal(t5, t2.T) + + def test_physical_quantities(self): + a = PhysicalQuantity(1.0) + b = PhysicalQuantity(5.0) + assert_equal(geomspace(a, b), geomspace(1.0, 5.0)) + + def test_subclass(self): + a = array(1).view(PhysicalQuantity2) + b = array(7).view(PhysicalQuantity2) + gs = geomspace(a, b) + assert type(gs) is PhysicalQuantity2 + assert_equal(gs, geomspace(1.0, 7.0)) + gs = geomspace(a, b, 1) + assert type(gs) is PhysicalQuantity2 + assert_equal(gs, geomspace(1.0, 7.0, 1)) + + def test_bounds(self): + assert_raises(ValueError, geomspace, 0, 10) + assert_raises(ValueError, geomspace, 10, 0) + assert_raises(ValueError, geomspace, 0, 0) + + +class TestLinspace: + + def test_basic(self): + y = linspace(0, 10) + assert_(len(y) == 50) + y = linspace(2, 10, num=100) + assert_(y[-1] == 10) + y = linspace(2, 10, endpoint=False) + assert_(y[-1] < 10) + assert_raises(ValueError, linspace, 0, 10, num=-1) + + def test_corner(self): + y = list(linspace(0, 1, 1)) + assert_(y == [0.0], y) + assert_raises(TypeError, linspace, 0, 1, num=2.5) + + def test_type(self): + t1 = linspace(0, 1, 0).dtype + t2 = linspace(0, 1, 1).dtype + t3 = linspace(0, 1, 2).dtype + assert_equal(t1, t2) + assert_equal(t2, t3) + + def test_dtype(self): + y = linspace(0, 6, dtype='float32') + assert_equal(y.dtype, dtype('float32')) + y = linspace(0, 6, dtype='float64') + assert_equal(y.dtype, dtype('float64')) + y = linspace(0, 6, dtype='int32') + assert_equal(y.dtype, dtype('int32')) + + def test_start_stop_array_scalar(self): + lim1 = array([-120, 100], dtype="int8") + lim2 = array([120, -100], dtype="int8") + lim3 = array([1200, 1000], dtype="uint16") + t1 = linspace(lim1[0], lim1[1], 5) + t2 = linspace(lim2[0], lim2[1], 5) + t3 = linspace(lim3[0], lim3[1], 5) + t4 = linspace(-120.0, 100.0, 5) + t5 = linspace(120.0, -100.0, 5) + t6 = linspace(1200.0, 1000.0, 5) + assert_equal(t1, t4) + assert_equal(t2, t5) + assert_equal(t3, t6) + + def test_start_stop_array(self): + start = array([-120, 120], dtype="int8") + stop = array([100, -100], dtype="int8") + t1 = linspace(start, stop, 5) + t2 = stack([linspace(_start, _stop, 5) + for _start, _stop in zip(start, stop)], axis=1) + assert_equal(t1, t2) + t3 = linspace(start, stop[0], 5) + t4 = stack([linspace(_start, stop[0], 5) + for _start in start], axis=1) + assert_equal(t3, t4) + t5 = linspace(start, stop, 5, axis=-1) + assert_equal(t5, t2.T) + + def test_complex(self): + lim1 = linspace(1 + 2j, 3 + 4j, 5) + t1 = array([1.0+2.j, 1.5+2.5j, 2.0+3j, 2.5+3.5j, 3.0+4j]) + lim2 = linspace(1j, 10, 5) + t2 = array([0.0+1.j, 2.5+0.75j, 5.0+0.5j, 7.5+0.25j, 10.0+0j]) + assert_equal(lim1, t1) + assert_equal(lim2, t2) + + def test_physical_quantities(self): + a = PhysicalQuantity(0.0) + b = PhysicalQuantity(1.0) + assert_equal(linspace(a, b), linspace(0.0, 1.0)) + + def test_subclass(self): + a = array(0).view(PhysicalQuantity2) + b = array(1).view(PhysicalQuantity2) + ls = linspace(a, b) + assert type(ls) is PhysicalQuantity2 + assert_equal(ls, linspace(0.0, 1.0)) + ls = linspace(a, b, 1) + assert type(ls) is PhysicalQuantity2 + assert_equal(ls, linspace(0.0, 1.0, 1)) + + def test_array_interface(self): + # Regression test for https://github.com/numpy/numpy/pull/6659 + # Ensure that start/stop can be objects that implement + # __array_interface__ and are convertible to numeric scalars + + class Arrayish: + """ + A generic object that supports the __array_interface__ and hence + can in principle be converted to a numeric scalar, but is not + otherwise recognized as numeric, but also happens to support + multiplication by floats. + + Data should be an object that implements the buffer interface, + and contains at least 4 bytes. + """ + + def __init__(self, data): + self._data = data + + @property + def __array_interface__(self): + return {'shape': (), 'typestr': ' 1) + assert_(info.minexp < -1) + assert_(info.maxexp > 1) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_half.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_half.py new file mode 100644 index 0000000..1b6fd21 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_half.py @@ -0,0 +1,554 @@ +import platform +import pytest + +import numpy as np +from numpy import uint16, float16, float32, float64 +from numpy.testing import assert_, assert_equal + + +def assert_raises_fpe(strmatch, callable, *args, **kwargs): + try: + callable(*args, **kwargs) + except FloatingPointError as exc: + assert_(str(exc).find(strmatch) >= 0, + "Did not raise floating point %s error" % strmatch) + else: + assert_(False, + "Did not raise floating point %s error" % strmatch) + +class TestHalf: + def setup(self): + # An array of all possible float16 values + self.all_f16 = np.arange(0x10000, dtype=uint16) + self.all_f16.dtype = float16 + self.all_f32 = np.array(self.all_f16, dtype=float32) + self.all_f64 = np.array(self.all_f16, dtype=float64) + + # An array of all non-NaN float16 values, in sorted order + self.nonan_f16 = np.concatenate( + (np.arange(0xfc00, 0x7fff, -1, dtype=uint16), + np.arange(0x0000, 0x7c01, 1, dtype=uint16))) + self.nonan_f16.dtype = float16 + self.nonan_f32 = np.array(self.nonan_f16, dtype=float32) + self.nonan_f64 = np.array(self.nonan_f16, dtype=float64) + + # An array of all finite float16 values, in sorted order + self.finite_f16 = self.nonan_f16[1:-1] + self.finite_f32 = self.nonan_f32[1:-1] + self.finite_f64 = self.nonan_f64[1:-1] + + def test_half_conversions(self): + """Checks that all 16-bit values survive conversion + to/from 32-bit and 64-bit float""" + # Because the underlying routines preserve the NaN bits, every + # value is preserved when converting to/from other floats. + + # Convert from float32 back to float16 + b = np.array(self.all_f32, dtype=float16) + assert_equal(self.all_f16.view(dtype=uint16), + b.view(dtype=uint16)) + + # Convert from float64 back to float16 + b = np.array(self.all_f64, dtype=float16) + assert_equal(self.all_f16.view(dtype=uint16), + b.view(dtype=uint16)) + + # Convert float16 to longdouble and back + # This doesn't necessarily preserve the extra NaN bits, + # so exclude NaNs. + a_ld = np.array(self.nonan_f16, dtype=np.longdouble) + b = np.array(a_ld, dtype=float16) + assert_equal(self.nonan_f16.view(dtype=uint16), + b.view(dtype=uint16)) + + # Check the range for which all integers can be represented + i_int = np.arange(-2048, 2049) + i_f16 = np.array(i_int, dtype=float16) + j = np.array(i_f16, dtype=int) + assert_equal(i_int, j) + + @pytest.mark.parametrize("string_dt", ["S", "U"]) + def test_half_conversion_to_string(self, string_dt): + # Currently uses S/U32 (which is sufficient for float32) + expected_dt = np.dtype(f"{string_dt}32") + assert np.promote_types(np.float16, string_dt) == expected_dt + assert np.promote_types(string_dt, np.float16) == expected_dt + + arr = np.ones(3, dtype=np.float16).astype(string_dt) + assert arr.dtype == expected_dt + + @pytest.mark.parametrize("string_dt", ["S", "U"]) + def test_half_conversion_from_string(self, string_dt): + string = np.array("3.1416", dtype=string_dt) + assert string.astype(np.float16) == np.array(3.1416, dtype=np.float16) + + @pytest.mark.parametrize("offset", [None, "up", "down"]) + @pytest.mark.parametrize("shift", [None, "up", "down"]) + @pytest.mark.parametrize("float_t", [np.float32, np.float64]) + def test_half_conversion_rounding(self, float_t, shift, offset): + # Assumes that round to even is used during casting. + max_pattern = np.float16(np.finfo(np.float16).max).view(np.uint16) + + # Test all (positive) finite numbers, denormals are most interesting + # however: + f16s_patterns = np.arange(0, max_pattern+1, dtype=np.uint16) + f16s_float = f16s_patterns.view(np.float16).astype(float_t) + + # Shift the values by half a bit up or a down (or do not shift), + if shift == "up": + f16s_float = 0.5 * (f16s_float[:-1] + f16s_float[1:])[1:] + elif shift == "down": + f16s_float = 0.5 * (f16s_float[:-1] + f16s_float[1:])[:-1] + else: + f16s_float = f16s_float[1:-1] + + # Increase the float by a minimal value: + if offset == "up": + f16s_float = np.nextafter(f16s_float, float_t(1e50)) + elif offset == "down": + f16s_float = np.nextafter(f16s_float, float_t(-1e50)) + + # Convert back to float16 and its bit pattern: + res_patterns = f16s_float.astype(np.float16).view(np.uint16) + + # The above calculations tries the original values, or the exact + # mid points between the float16 values. It then further offsets them + # by as little as possible. If no offset occurs, "round to even" + # logic will be necessary, an arbitrarily small offset should cause + # normal up/down rounding always. + + # Calculate the expected pattern: + cmp_patterns = f16s_patterns[1:-1].copy() + + if shift == "down" and offset != "up": + shift_pattern = -1 + elif shift == "up" and offset != "down": + shift_pattern = 1 + else: + # There cannot be a shift, either shift is None, so all rounding + # will go back to original, or shift is reduced by offset too much. + shift_pattern = 0 + + # If rounding occurs, is it normal rounding or round to even? + if offset is None: + # Round to even occurs, modify only non-even, cast to allow + (-1) + cmp_patterns[0::2].view(np.int16)[...] += shift_pattern + else: + cmp_patterns.view(np.int16)[...] += shift_pattern + + assert_equal(res_patterns, cmp_patterns) + + @pytest.mark.parametrize(["float_t", "uint_t", "bits"], + [(np.float32, np.uint32, 23), + (np.float64, np.uint64, 52)]) + def test_half_conversion_denormal_round_even(self, float_t, uint_t, bits): + # Test specifically that all bits are considered when deciding + # whether round to even should occur (i.e. no bits are lost at the + # end. Compare also gh-12721. The most bits can get lost for the + # smallest denormal: + smallest_value = np.uint16(1).view(np.float16).astype(float_t) + assert smallest_value == 2**-24 + + # Will be rounded to zero based on round to even rule: + rounded_to_zero = smallest_value / float_t(2) + assert rounded_to_zero.astype(np.float16) == 0 + + # The significand will be all 0 for the float_t, test that we do not + # lose the lower ones of these: + for i in range(bits): + # slightly increasing the value should make it round up: + larger_pattern = rounded_to_zero.view(uint_t) | uint_t(1 << i) + larger_value = larger_pattern.view(float_t) + assert larger_value.astype(np.float16) == smallest_value + + def test_nans_infs(self): + with np.errstate(all='ignore'): + # Check some of the ufuncs + assert_equal(np.isnan(self.all_f16), np.isnan(self.all_f32)) + assert_equal(np.isinf(self.all_f16), np.isinf(self.all_f32)) + assert_equal(np.isfinite(self.all_f16), np.isfinite(self.all_f32)) + assert_equal(np.signbit(self.all_f16), np.signbit(self.all_f32)) + assert_equal(np.spacing(float16(65504)), np.inf) + + # Check comparisons of all values with NaN + nan = float16(np.nan) + + assert_(not (self.all_f16 == nan).any()) + assert_(not (nan == self.all_f16).any()) + + assert_((self.all_f16 != nan).all()) + assert_((nan != self.all_f16).all()) + + assert_(not (self.all_f16 < nan).any()) + assert_(not (nan < self.all_f16).any()) + + assert_(not (self.all_f16 <= nan).any()) + assert_(not (nan <= self.all_f16).any()) + + assert_(not (self.all_f16 > nan).any()) + assert_(not (nan > self.all_f16).any()) + + assert_(not (self.all_f16 >= nan).any()) + assert_(not (nan >= self.all_f16).any()) + + def test_half_values(self): + """Confirms a small number of known half values""" + a = np.array([1.0, -1.0, + 2.0, -2.0, + 0.0999755859375, 0.333251953125, # 1/10, 1/3 + 65504, -65504, # Maximum magnitude + 2.0**(-14), -2.0**(-14), # Minimum normal + 2.0**(-24), -2.0**(-24), # Minimum subnormal + 0, -1/1e1000, # Signed zeros + np.inf, -np.inf]) + b = np.array([0x3c00, 0xbc00, + 0x4000, 0xc000, + 0x2e66, 0x3555, + 0x7bff, 0xfbff, + 0x0400, 0x8400, + 0x0001, 0x8001, + 0x0000, 0x8000, + 0x7c00, 0xfc00], dtype=uint16) + b.dtype = float16 + assert_equal(a, b) + + def test_half_rounding(self): + """Checks that rounding when converting to half is correct""" + a = np.array([2.0**-25 + 2.0**-35, # Rounds to minimum subnormal + 2.0**-25, # Underflows to zero (nearest even mode) + 2.0**-26, # Underflows to zero + 1.0+2.0**-11 + 2.0**-16, # rounds to 1.0+2**(-10) + 1.0+2.0**-11, # rounds to 1.0 (nearest even mode) + 1.0+2.0**-12, # rounds to 1.0 + 65519, # rounds to 65504 + 65520], # rounds to inf + dtype=float64) + rounded = [2.0**-24, + 0.0, + 0.0, + 1.0+2.0**(-10), + 1.0, + 1.0, + 65504, + np.inf] + + # Check float64->float16 rounding + b = np.array(a, dtype=float16) + assert_equal(b, rounded) + + # Check float32->float16 rounding + a = np.array(a, dtype=float32) + b = np.array(a, dtype=float16) + assert_equal(b, rounded) + + def test_half_correctness(self): + """Take every finite float16, and check the casting functions with + a manual conversion.""" + + # Create an array of all finite float16s + a_bits = self.finite_f16.view(dtype=uint16) + + # Convert to 64-bit float manually + a_sgn = (-1.0)**((a_bits & 0x8000) >> 15) + a_exp = np.array((a_bits & 0x7c00) >> 10, dtype=np.int32) - 15 + a_man = (a_bits & 0x03ff) * 2.0**(-10) + # Implicit bit of normalized floats + a_man[a_exp != -15] += 1 + # Denormalized exponent is -14 + a_exp[a_exp == -15] = -14 + + a_manual = a_sgn * a_man * 2.0**a_exp + + a32_fail = np.nonzero(self.finite_f32 != a_manual)[0] + if len(a32_fail) != 0: + bad_index = a32_fail[0] + assert_equal(self.finite_f32, a_manual, + "First non-equal is half value %x -> %g != %g" % + (self.finite_f16[bad_index], + self.finite_f32[bad_index], + a_manual[bad_index])) + + a64_fail = np.nonzero(self.finite_f64 != a_manual)[0] + if len(a64_fail) != 0: + bad_index = a64_fail[0] + assert_equal(self.finite_f64, a_manual, + "First non-equal is half value %x -> %g != %g" % + (self.finite_f16[bad_index], + self.finite_f64[bad_index], + a_manual[bad_index])) + + def test_half_ordering(self): + """Make sure comparisons are working right""" + + # All non-NaN float16 values in reverse order + a = self.nonan_f16[::-1].copy() + + # 32-bit float copy + b = np.array(a, dtype=float32) + + # Should sort the same + a.sort() + b.sort() + assert_equal(a, b) + + # Comparisons should work + assert_((a[:-1] <= a[1:]).all()) + assert_(not (a[:-1] > a[1:]).any()) + assert_((a[1:] >= a[:-1]).all()) + assert_(not (a[1:] < a[:-1]).any()) + # All != except for +/-0 + assert_equal(np.nonzero(a[:-1] < a[1:])[0].size, a.size-2) + assert_equal(np.nonzero(a[1:] > a[:-1])[0].size, a.size-2) + + def test_half_funcs(self): + """Test the various ArrFuncs""" + + # fill + assert_equal(np.arange(10, dtype=float16), + np.arange(10, dtype=float32)) + + # fillwithscalar + a = np.zeros((5,), dtype=float16) + a.fill(1) + assert_equal(a, np.ones((5,), dtype=float16)) + + # nonzero and copyswap + a = np.array([0, 0, -1, -1/1e20, 0, 2.0**-24, 7.629e-6], dtype=float16) + assert_equal(a.nonzero()[0], + [2, 5, 6]) + a = a.byteswap().newbyteorder() + assert_equal(a.nonzero()[0], + [2, 5, 6]) + + # dot + a = np.arange(0, 10, 0.5, dtype=float16) + b = np.ones((20,), dtype=float16) + assert_equal(np.dot(a, b), + 95) + + # argmax + a = np.array([0, -np.inf, -2, 0.5, 12.55, 7.3, 2.1, 12.4], dtype=float16) + assert_equal(a.argmax(), + 4) + a = np.array([0, -np.inf, -2, np.inf, 12.55, np.nan, 2.1, 12.4], dtype=float16) + assert_equal(a.argmax(), + 5) + + # getitem + a = np.arange(10, dtype=float16) + for i in range(10): + assert_equal(a.item(i), i) + + def test_spacing_nextafter(self): + """Test np.spacing and np.nextafter""" + # All non-negative finite #'s + a = np.arange(0x7c00, dtype=uint16) + hinf = np.array((np.inf,), dtype=float16) + hnan = np.array((np.nan,), dtype=float16) + a_f16 = a.view(dtype=float16) + + assert_equal(np.spacing(a_f16[:-1]), a_f16[1:]-a_f16[:-1]) + + assert_equal(np.nextafter(a_f16[:-1], hinf), a_f16[1:]) + assert_equal(np.nextafter(a_f16[0], -hinf), -a_f16[1]) + assert_equal(np.nextafter(a_f16[1:], -hinf), a_f16[:-1]) + + assert_equal(np.nextafter(hinf, a_f16), a_f16[-1]) + assert_equal(np.nextafter(-hinf, a_f16), -a_f16[-1]) + + assert_equal(np.nextafter(hinf, hinf), hinf) + assert_equal(np.nextafter(hinf, -hinf), a_f16[-1]) + assert_equal(np.nextafter(-hinf, hinf), -a_f16[-1]) + assert_equal(np.nextafter(-hinf, -hinf), -hinf) + + assert_equal(np.nextafter(a_f16, hnan), hnan[0]) + assert_equal(np.nextafter(hnan, a_f16), hnan[0]) + + assert_equal(np.nextafter(hnan, hnan), hnan) + assert_equal(np.nextafter(hinf, hnan), hnan) + assert_equal(np.nextafter(hnan, hinf), hnan) + + # switch to negatives + a |= 0x8000 + + assert_equal(np.spacing(a_f16[0]), np.spacing(a_f16[1])) + assert_equal(np.spacing(a_f16[1:]), a_f16[:-1]-a_f16[1:]) + + assert_equal(np.nextafter(a_f16[0], hinf), -a_f16[1]) + assert_equal(np.nextafter(a_f16[1:], hinf), a_f16[:-1]) + assert_equal(np.nextafter(a_f16[:-1], -hinf), a_f16[1:]) + + assert_equal(np.nextafter(hinf, a_f16), -a_f16[-1]) + assert_equal(np.nextafter(-hinf, a_f16), a_f16[-1]) + + assert_equal(np.nextafter(a_f16, hnan), hnan[0]) + assert_equal(np.nextafter(hnan, a_f16), hnan[0]) + + def test_half_ufuncs(self): + """Test the various ufuncs""" + + a = np.array([0, 1, 2, 4, 2], dtype=float16) + b = np.array([-2, 5, 1, 4, 3], dtype=float16) + c = np.array([0, -1, -np.inf, np.nan, 6], dtype=float16) + + assert_equal(np.add(a, b), [-2, 6, 3, 8, 5]) + assert_equal(np.subtract(a, b), [2, -4, 1, 0, -1]) + assert_equal(np.multiply(a, b), [0, 5, 2, 16, 6]) + assert_equal(np.divide(a, b), [0, 0.199951171875, 2, 1, 0.66650390625]) + + assert_equal(np.equal(a, b), [False, False, False, True, False]) + assert_equal(np.not_equal(a, b), [True, True, True, False, True]) + assert_equal(np.less(a, b), [False, True, False, False, True]) + assert_equal(np.less_equal(a, b), [False, True, False, True, True]) + assert_equal(np.greater(a, b), [True, False, True, False, False]) + assert_equal(np.greater_equal(a, b), [True, False, True, True, False]) + assert_equal(np.logical_and(a, b), [False, True, True, True, True]) + assert_equal(np.logical_or(a, b), [True, True, True, True, True]) + assert_equal(np.logical_xor(a, b), [True, False, False, False, False]) + assert_equal(np.logical_not(a), [True, False, False, False, False]) + + assert_equal(np.isnan(c), [False, False, False, True, False]) + assert_equal(np.isinf(c), [False, False, True, False, False]) + assert_equal(np.isfinite(c), [True, True, False, False, True]) + assert_equal(np.signbit(b), [True, False, False, False, False]) + + assert_equal(np.copysign(b, a), [2, 5, 1, 4, 3]) + + assert_equal(np.maximum(a, b), [0, 5, 2, 4, 3]) + + x = np.maximum(b, c) + assert_(np.isnan(x[3])) + x[3] = 0 + assert_equal(x, [0, 5, 1, 0, 6]) + + assert_equal(np.minimum(a, b), [-2, 1, 1, 4, 2]) + + x = np.minimum(b, c) + assert_(np.isnan(x[3])) + x[3] = 0 + assert_equal(x, [-2, -1, -np.inf, 0, 3]) + + assert_equal(np.fmax(a, b), [0, 5, 2, 4, 3]) + assert_equal(np.fmax(b, c), [0, 5, 1, 4, 6]) + assert_equal(np.fmin(a, b), [-2, 1, 1, 4, 2]) + assert_equal(np.fmin(b, c), [-2, -1, -np.inf, 4, 3]) + + assert_equal(np.floor_divide(a, b), [0, 0, 2, 1, 0]) + assert_equal(np.remainder(a, b), [0, 1, 0, 0, 2]) + assert_equal(np.divmod(a, b), ([0, 0, 2, 1, 0], [0, 1, 0, 0, 2])) + assert_equal(np.square(b), [4, 25, 1, 16, 9]) + assert_equal(np.reciprocal(b), [-0.5, 0.199951171875, 1, 0.25, 0.333251953125]) + assert_equal(np.ones_like(b), [1, 1, 1, 1, 1]) + assert_equal(np.conjugate(b), b) + assert_equal(np.absolute(b), [2, 5, 1, 4, 3]) + assert_equal(np.negative(b), [2, -5, -1, -4, -3]) + assert_equal(np.positive(b), b) + assert_equal(np.sign(b), [-1, 1, 1, 1, 1]) + assert_equal(np.modf(b), ([0, 0, 0, 0, 0], b)) + assert_equal(np.frexp(b), ([-0.5, 0.625, 0.5, 0.5, 0.75], [2, 3, 1, 3, 2])) + assert_equal(np.ldexp(b, [0, 1, 2, 4, 2]), [-2, 10, 4, 64, 12]) + + def test_half_coercion(self): + """Test that half gets coerced properly with the other types""" + a16 = np.array((1,), dtype=float16) + a32 = np.array((1,), dtype=float32) + b16 = float16(1) + b32 = float32(1) + + assert_equal(np.power(a16, 2).dtype, float16) + assert_equal(np.power(a16, 2.0).dtype, float16) + assert_equal(np.power(a16, b16).dtype, float16) + assert_equal(np.power(a16, b32).dtype, float16) + assert_equal(np.power(a16, a16).dtype, float16) + assert_equal(np.power(a16, a32).dtype, float32) + + assert_equal(np.power(b16, 2).dtype, float64) + assert_equal(np.power(b16, 2.0).dtype, float64) + assert_equal(np.power(b16, b16).dtype, float16) + assert_equal(np.power(b16, b32).dtype, float32) + assert_equal(np.power(b16, a16).dtype, float16) + assert_equal(np.power(b16, a32).dtype, float32) + + assert_equal(np.power(a32, a16).dtype, float32) + assert_equal(np.power(a32, b16).dtype, float32) + assert_equal(np.power(b32, a16).dtype, float16) + assert_equal(np.power(b32, b16).dtype, float32) + + @pytest.mark.skipif(platform.machine() == "armv5tel", + reason="See gh-413.") + def test_half_fpe(self): + with np.errstate(all='raise'): + sx16 = np.array((1e-4,), dtype=float16) + bx16 = np.array((1e4,), dtype=float16) + sy16 = float16(1e-4) + by16 = float16(1e4) + + # Underflow errors + assert_raises_fpe('underflow', lambda a, b:a*b, sx16, sx16) + assert_raises_fpe('underflow', lambda a, b:a*b, sx16, sy16) + assert_raises_fpe('underflow', lambda a, b:a*b, sy16, sx16) + assert_raises_fpe('underflow', lambda a, b:a*b, sy16, sy16) + assert_raises_fpe('underflow', lambda a, b:a/b, sx16, bx16) + assert_raises_fpe('underflow', lambda a, b:a/b, sx16, by16) + assert_raises_fpe('underflow', lambda a, b:a/b, sy16, bx16) + assert_raises_fpe('underflow', lambda a, b:a/b, sy16, by16) + assert_raises_fpe('underflow', lambda a, b:a/b, + float16(2.**-14), float16(2**11)) + assert_raises_fpe('underflow', lambda a, b:a/b, + float16(-2.**-14), float16(2**11)) + assert_raises_fpe('underflow', lambda a, b:a/b, + float16(2.**-14+2**-24), float16(2)) + assert_raises_fpe('underflow', lambda a, b:a/b, + float16(-2.**-14-2**-24), float16(2)) + assert_raises_fpe('underflow', lambda a, b:a/b, + float16(2.**-14+2**-23), float16(4)) + + # Overflow errors + assert_raises_fpe('overflow', lambda a, b:a*b, bx16, bx16) + assert_raises_fpe('overflow', lambda a, b:a*b, bx16, by16) + assert_raises_fpe('overflow', lambda a, b:a*b, by16, bx16) + assert_raises_fpe('overflow', lambda a, b:a*b, by16, by16) + assert_raises_fpe('overflow', lambda a, b:a/b, bx16, sx16) + assert_raises_fpe('overflow', lambda a, b:a/b, bx16, sy16) + assert_raises_fpe('overflow', lambda a, b:a/b, by16, sx16) + assert_raises_fpe('overflow', lambda a, b:a/b, by16, sy16) + assert_raises_fpe('overflow', lambda a, b:a+b, + float16(65504), float16(17)) + assert_raises_fpe('overflow', lambda a, b:a-b, + float16(-65504), float16(17)) + assert_raises_fpe('overflow', np.nextafter, float16(65504), float16(np.inf)) + assert_raises_fpe('overflow', np.nextafter, float16(-65504), float16(-np.inf)) + assert_raises_fpe('overflow', np.spacing, float16(65504)) + + # Invalid value errors + assert_raises_fpe('invalid', np.divide, float16(np.inf), float16(np.inf)) + assert_raises_fpe('invalid', np.spacing, float16(np.inf)) + assert_raises_fpe('invalid', np.spacing, float16(np.nan)) + + # These should not raise + float16(65472)+float16(32) + float16(2**-13)/float16(2) + float16(2**-14)/float16(2**10) + np.spacing(float16(-65504)) + np.nextafter(float16(65504), float16(-np.inf)) + np.nextafter(float16(-65504), float16(np.inf)) + np.nextafter(float16(np.inf), float16(0)) + np.nextafter(float16(-np.inf), float16(0)) + np.nextafter(float16(0), float16(np.nan)) + np.nextafter(float16(np.nan), float16(0)) + float16(2**-14)/float16(2**10) + float16(-2**-14)/float16(2**10) + float16(2**-14+2**-23)/float16(2) + float16(-2**-14-2**-23)/float16(2) + + def test_half_array_interface(self): + """Test that half is compatible with __array_interface__""" + class Dummy: + pass + + a = np.ones((1,), dtype=float16) + b = Dummy() + b.__array_interface__ = a.__array_interface__ + c = np.array(b) + assert_(c.dtype == float16) + assert_equal(a, c) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_hashtable.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_hashtable.py new file mode 100644 index 0000000..bace4c0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_hashtable.py @@ -0,0 +1,30 @@ +import pytest + +import random +from numpy.core._multiarray_tests import identityhash_tester + + +@pytest.mark.parametrize("key_length", [1, 3, 6]) +@pytest.mark.parametrize("length", [1, 16, 2000]) +def test_identity_hashtable(key_length, length): + # use a 30 object pool for everything (duplicates will happen) + pool = [object() for i in range(20)] + keys_vals = [] + for i in range(length): + keys = tuple(random.choices(pool, k=key_length)) + keys_vals.append((keys, random.choice(pool))) + + dictionary = dict(keys_vals) + + # add a random item at the end: + keys_vals.append(random.choice(keys_vals)) + # the expected one could be different with duplicates: + expected = dictionary[keys_vals[-1][0]] + + res = identityhash_tester(key_length, keys_vals, replace=True) + assert res is expected + + # check that ensuring one duplicate definitely raises: + keys_vals.insert(0, keys_vals[-2]) + with pytest.raises(RuntimeError): + identityhash_tester(key_length, keys_vals) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_indexerrors.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_indexerrors.py new file mode 100644 index 0000000..a0e9a8c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_indexerrors.py @@ -0,0 +1,133 @@ +import numpy as np +from numpy.testing import ( + assert_raises, assert_raises_regex, + ) + + +class TestIndexErrors: + '''Tests to exercise indexerrors not covered by other tests.''' + + def test_arraytypes_fasttake(self): + 'take from a 0-length dimension' + x = np.empty((2, 3, 0, 4)) + assert_raises(IndexError, x.take, [0], axis=2) + assert_raises(IndexError, x.take, [1], axis=2) + assert_raises(IndexError, x.take, [0], axis=2, mode='wrap') + assert_raises(IndexError, x.take, [0], axis=2, mode='clip') + + def test_take_from_object(self): + # Check exception taking from object array + d = np.zeros(5, dtype=object) + assert_raises(IndexError, d.take, [6]) + + # Check exception taking from 0-d array + d = np.zeros((5, 0), dtype=object) + assert_raises(IndexError, d.take, [1], axis=1) + assert_raises(IndexError, d.take, [0], axis=1) + assert_raises(IndexError, d.take, [0]) + assert_raises(IndexError, d.take, [0], mode='wrap') + assert_raises(IndexError, d.take, [0], mode='clip') + + def test_multiindex_exceptions(self): + a = np.empty(5, dtype=object) + assert_raises(IndexError, a.item, 20) + a = np.empty((5, 0), dtype=object) + assert_raises(IndexError, a.item, (0, 0)) + + a = np.empty(5, dtype=object) + assert_raises(IndexError, a.itemset, 20, 0) + a = np.empty((5, 0), dtype=object) + assert_raises(IndexError, a.itemset, (0, 0), 0) + + def test_put_exceptions(self): + a = np.zeros((5, 5)) + assert_raises(IndexError, a.put, 100, 0) + a = np.zeros((5, 5), dtype=object) + assert_raises(IndexError, a.put, 100, 0) + a = np.zeros((5, 5, 0)) + assert_raises(IndexError, a.put, 100, 0) + a = np.zeros((5, 5, 0), dtype=object) + assert_raises(IndexError, a.put, 100, 0) + + def test_iterators_exceptions(self): + "cases in iterators.c" + def assign(obj, ind, val): + obj[ind] = val + + a = np.zeros([1, 2, 3]) + assert_raises(IndexError, lambda: a[0, 5, None, 2]) + assert_raises(IndexError, lambda: a[0, 5, 0, 2]) + assert_raises(IndexError, lambda: assign(a, (0, 5, None, 2), 1)) + assert_raises(IndexError, lambda: assign(a, (0, 5, 0, 2), 1)) + + a = np.zeros([1, 0, 3]) + assert_raises(IndexError, lambda: a[0, 0, None, 2]) + assert_raises(IndexError, lambda: assign(a, (0, 0, None, 2), 1)) + + a = np.zeros([1, 2, 3]) + assert_raises(IndexError, lambda: a.flat[10]) + assert_raises(IndexError, lambda: assign(a.flat, 10, 5)) + a = np.zeros([1, 0, 3]) + assert_raises(IndexError, lambda: a.flat[10]) + assert_raises(IndexError, lambda: assign(a.flat, 10, 5)) + + a = np.zeros([1, 2, 3]) + assert_raises(IndexError, lambda: a.flat[np.array(10)]) + assert_raises(IndexError, lambda: assign(a.flat, np.array(10), 5)) + a = np.zeros([1, 0, 3]) + assert_raises(IndexError, lambda: a.flat[np.array(10)]) + assert_raises(IndexError, lambda: assign(a.flat, np.array(10), 5)) + + a = np.zeros([1, 2, 3]) + assert_raises(IndexError, lambda: a.flat[np.array([10])]) + assert_raises(IndexError, lambda: assign(a.flat, np.array([10]), 5)) + a = np.zeros([1, 0, 3]) + assert_raises(IndexError, lambda: a.flat[np.array([10])]) + assert_raises(IndexError, lambda: assign(a.flat, np.array([10]), 5)) + + def test_mapping(self): + "cases from mapping.c" + + def assign(obj, ind, val): + obj[ind] = val + + a = np.zeros((0, 10)) + assert_raises(IndexError, lambda: a[12]) + + a = np.zeros((3, 5)) + assert_raises(IndexError, lambda: a[(10, 20)]) + assert_raises(IndexError, lambda: assign(a, (10, 20), 1)) + a = np.zeros((3, 0)) + assert_raises(IndexError, lambda: a[(1, 0)]) + assert_raises(IndexError, lambda: assign(a, (1, 0), 1)) + + a = np.zeros((10,)) + assert_raises(IndexError, lambda: assign(a, 10, 1)) + a = np.zeros((0,)) + assert_raises(IndexError, lambda: assign(a, 10, 1)) + + a = np.zeros((3, 5)) + assert_raises(IndexError, lambda: a[(1, [1, 20])]) + assert_raises(IndexError, lambda: assign(a, (1, [1, 20]), 1)) + a = np.zeros((3, 0)) + assert_raises(IndexError, lambda: a[(1, [0, 1])]) + assert_raises(IndexError, lambda: assign(a, (1, [0, 1]), 1)) + + def test_mapping_error_message(self): + a = np.zeros((3, 5)) + index = (1, 2, 3, 4, 5) + assert_raises_regex( + IndexError, + "too many indices for array: " + "array is 2-dimensional, but 5 were indexed", + lambda: a[index]) + + def test_methods(self): + "cases from methods.c" + + a = np.zeros((3, 3)) + assert_raises(IndexError, lambda: a.item(100)) + assert_raises(IndexError, lambda: a.itemset(100, 1)) + a = np.zeros((0, 3)) + assert_raises(IndexError, lambda: a.item(100)) + assert_raises(IndexError, lambda: a.itemset(100, 1)) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_indexing.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_indexing.py new file mode 100644 index 0000000..1c22538 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_indexing.py @@ -0,0 +1,1411 @@ +import sys +import warnings +import functools +import operator + +import pytest + +import numpy as np +from numpy.core._multiarray_tests import array_indexing +from itertools import product +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_raises_regex, + assert_array_equal, assert_warns, HAS_REFCOUNT, + ) + + +class TestIndexing: + def test_index_no_floats(self): + a = np.array([[[5]]]) + + assert_raises(IndexError, lambda: a[0.0]) + assert_raises(IndexError, lambda: a[0, 0.0]) + assert_raises(IndexError, lambda: a[0.0, 0]) + assert_raises(IndexError, lambda: a[0.0,:]) + assert_raises(IndexError, lambda: a[:, 0.0]) + assert_raises(IndexError, lambda: a[:, 0.0,:]) + assert_raises(IndexError, lambda: a[0.0,:,:]) + assert_raises(IndexError, lambda: a[0, 0, 0.0]) + assert_raises(IndexError, lambda: a[0.0, 0, 0]) + assert_raises(IndexError, lambda: a[0, 0.0, 0]) + assert_raises(IndexError, lambda: a[-1.4]) + assert_raises(IndexError, lambda: a[0, -1.4]) + assert_raises(IndexError, lambda: a[-1.4, 0]) + assert_raises(IndexError, lambda: a[-1.4,:]) + assert_raises(IndexError, lambda: a[:, -1.4]) + assert_raises(IndexError, lambda: a[:, -1.4,:]) + assert_raises(IndexError, lambda: a[-1.4,:,:]) + assert_raises(IndexError, lambda: a[0, 0, -1.4]) + assert_raises(IndexError, lambda: a[-1.4, 0, 0]) + assert_raises(IndexError, lambda: a[0, -1.4, 0]) + assert_raises(IndexError, lambda: a[0.0:, 0.0]) + assert_raises(IndexError, lambda: a[0.0:, 0.0,:]) + + def test_slicing_no_floats(self): + a = np.array([[5]]) + + # start as float. + assert_raises(TypeError, lambda: a[0.0:]) + assert_raises(TypeError, lambda: a[0:, 0.0:2]) + assert_raises(TypeError, lambda: a[0.0::2, :0]) + assert_raises(TypeError, lambda: a[0.0:1:2,:]) + assert_raises(TypeError, lambda: a[:, 0.0:]) + # stop as float. + assert_raises(TypeError, lambda: a[:0.0]) + assert_raises(TypeError, lambda: a[:0, 1:2.0]) + assert_raises(TypeError, lambda: a[:0.0:2, :0]) + assert_raises(TypeError, lambda: a[:0.0,:]) + assert_raises(TypeError, lambda: a[:, 0:4.0:2]) + # step as float. + assert_raises(TypeError, lambda: a[::1.0]) + assert_raises(TypeError, lambda: a[0:, :2:2.0]) + assert_raises(TypeError, lambda: a[1::4.0, :0]) + assert_raises(TypeError, lambda: a[::5.0,:]) + assert_raises(TypeError, lambda: a[:, 0:4:2.0]) + # mixed. + assert_raises(TypeError, lambda: a[1.0:2:2.0]) + assert_raises(TypeError, lambda: a[1.0::2.0]) + assert_raises(TypeError, lambda: a[0:, :2.0:2.0]) + assert_raises(TypeError, lambda: a[1.0:1:4.0, :0]) + assert_raises(TypeError, lambda: a[1.0:5.0:5.0,:]) + assert_raises(TypeError, lambda: a[:, 0.4:4.0:2.0]) + # should still get the DeprecationWarning if step = 0. + assert_raises(TypeError, lambda: a[::0.0]) + + def test_index_no_array_to_index(self): + # No non-scalar arrays. + a = np.array([[[1]]]) + + assert_raises(TypeError, lambda: a[a:a:a]) + + def test_none_index(self): + # `None` index adds newaxis + a = np.array([1, 2, 3]) + assert_equal(a[None], a[np.newaxis]) + assert_equal(a[None].ndim, a.ndim + 1) + + def test_empty_tuple_index(self): + # Empty tuple index creates a view + a = np.array([1, 2, 3]) + assert_equal(a[()], a) + assert_(a[()].base is a) + a = np.array(0) + assert_(isinstance(a[()], np.int_)) + + def test_void_scalar_empty_tuple(self): + s = np.zeros((), dtype='V4') + assert_equal(s[()].dtype, s.dtype) + assert_equal(s[()], s) + assert_equal(type(s[...]), np.ndarray) + + def test_same_kind_index_casting(self): + # Indexes should be cast with same-kind and not safe, even if that + # is somewhat unsafe. So test various different code paths. + index = np.arange(5) + u_index = index.astype(np.uintp) + arr = np.arange(10) + + assert_array_equal(arr[index], arr[u_index]) + arr[u_index] = np.arange(5) + assert_array_equal(arr, np.arange(10)) + + arr = np.arange(10).reshape(5, 2) + assert_array_equal(arr[index], arr[u_index]) + + arr[u_index] = np.arange(5)[:,None] + assert_array_equal(arr, np.arange(5)[:,None].repeat(2, axis=1)) + + arr = np.arange(25).reshape(5, 5) + assert_array_equal(arr[u_index, u_index], arr[index, index]) + + def test_empty_fancy_index(self): + # Empty list index creates an empty array + # with the same dtype (but with weird shape) + a = np.array([1, 2, 3]) + assert_equal(a[[]], []) + assert_equal(a[[]].dtype, a.dtype) + + b = np.array([], dtype=np.intp) + assert_equal(a[[]], []) + assert_equal(a[[]].dtype, a.dtype) + + b = np.array([]) + assert_raises(IndexError, a.__getitem__, b) + + def test_ellipsis_index(self): + a = np.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + assert_(a[...] is not a) + assert_equal(a[...], a) + # `a[...]` was `a` in numpy <1.9. + assert_(a[...].base is a) + + # Slicing with ellipsis can skip an + # arbitrary number of dimensions + assert_equal(a[0, ...], a[0]) + assert_equal(a[0, ...], a[0,:]) + assert_equal(a[..., 0], a[:, 0]) + + # Slicing with ellipsis always results + # in an array, not a scalar + assert_equal(a[0, ..., 1], np.array(2)) + + # Assignment with `(Ellipsis,)` on 0-d arrays + b = np.array(1) + b[(Ellipsis,)] = 2 + assert_equal(b, 2) + + def test_single_int_index(self): + # Single integer index selects one row + a = np.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + + assert_equal(a[0], [1, 2, 3]) + assert_equal(a[-1], [7, 8, 9]) + + # Index out of bounds produces IndexError + assert_raises(IndexError, a.__getitem__, 1 << 30) + # Index overflow produces IndexError + assert_raises(IndexError, a.__getitem__, 1 << 64) + + def test_single_bool_index(self): + # Single boolean index + a = np.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + + assert_equal(a[np.array(True)], a[None]) + assert_equal(a[np.array(False)], a[None][0:0]) + + def test_boolean_shape_mismatch(self): + arr = np.ones((5, 4, 3)) + + index = np.array([True]) + assert_raises(IndexError, arr.__getitem__, index) + + index = np.array([False] * 6) + assert_raises(IndexError, arr.__getitem__, index) + + index = np.zeros((4, 4), dtype=bool) + assert_raises(IndexError, arr.__getitem__, index) + + assert_raises(IndexError, arr.__getitem__, (slice(None), index)) + + def test_boolean_indexing_onedim(self): + # Indexing a 2-dimensional array with + # boolean array of length one + a = np.array([[ 0., 0., 0.]]) + b = np.array([ True], dtype=bool) + assert_equal(a[b], a) + # boolean assignment + a[b] = 1. + assert_equal(a, [[1., 1., 1.]]) + + def test_boolean_assignment_value_mismatch(self): + # A boolean assignment should fail when the shape of the values + # cannot be broadcast to the subscription. (see also gh-3458) + a = np.arange(4) + + def f(a, v): + a[a > -1] = v + + assert_raises(ValueError, f, a, []) + assert_raises(ValueError, f, a, [1, 2, 3]) + assert_raises(ValueError, f, a[:1], [1, 2, 3]) + + def test_boolean_assignment_needs_api(self): + # See also gh-7666 + # This caused a segfault on Python 2 due to the GIL not being + # held when the iterator does not need it, but the transfer function + # does + arr = np.zeros(1000) + indx = np.zeros(1000, dtype=bool) + indx[:100] = True + arr[indx] = np.ones(100, dtype=object) + + expected = np.zeros(1000) + expected[:100] = 1 + assert_array_equal(arr, expected) + + def test_boolean_indexing_twodim(self): + # Indexing a 2-dimensional array with + # 2-dimensional boolean array + a = np.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + b = np.array([[ True, False, True], + [False, True, False], + [ True, False, True]]) + assert_equal(a[b], [1, 3, 5, 7, 9]) + assert_equal(a[b[1]], [[4, 5, 6]]) + assert_equal(a[b[0]], a[b[2]]) + + # boolean assignment + a[b] = 0 + assert_equal(a, [[0, 2, 0], + [4, 0, 6], + [0, 8, 0]]) + + def test_boolean_indexing_list(self): + # Regression test for #13715. It's a use-after-free bug which the + # test won't directly catch, but it will show up in valgrind. + a = np.array([1, 2, 3]) + b = [True, False, True] + # Two variants of the test because the first takes a fast path + assert_equal(a[b], [1, 3]) + assert_equal(a[None, b], [[1, 3]]) + + def test_reverse_strides_and_subspace_bufferinit(self): + # This tests that the strides are not reversed for simple and + # subspace fancy indexing. + a = np.ones(5) + b = np.zeros(5, dtype=np.intp)[::-1] + c = np.arange(5)[::-1] + + a[b] = c + # If the strides are not reversed, the 0 in the arange comes last. + assert_equal(a[0], 0) + + # This also tests that the subspace buffer is initialized: + a = np.ones((5, 2)) + c = np.arange(10).reshape(5, 2)[::-1] + a[b, :] = c + assert_equal(a[0], [0, 1]) + + def test_reversed_strides_result_allocation(self): + # Test a bug when calculating the output strides for a result array + # when the subspace size was 1 (and test other cases as well) + a = np.arange(10)[:, None] + i = np.arange(10)[::-1] + assert_array_equal(a[i], a[i.copy('C')]) + + a = np.arange(20).reshape(-1, 2) + + def test_uncontiguous_subspace_assignment(self): + # During development there was a bug activating a skip logic + # based on ndim instead of size. + a = np.full((3, 4, 2), -1) + b = np.full((3, 4, 2), -1) + + a[[0, 1]] = np.arange(2 * 4 * 2).reshape(2, 4, 2).T + b[[0, 1]] = np.arange(2 * 4 * 2).reshape(2, 4, 2).T.copy() + + assert_equal(a, b) + + def test_too_many_fancy_indices_special_case(self): + # Just documents behaviour, this is a small limitation. + a = np.ones((1,) * 32) # 32 is NPY_MAXDIMS + assert_raises(IndexError, a.__getitem__, (np.array([0]),) * 32) + + def test_scalar_array_bool(self): + # NumPy bools can be used as boolean index (python ones as of yet not) + a = np.array(1) + assert_equal(a[np.bool_(True)], a[np.array(True)]) + assert_equal(a[np.bool_(False)], a[np.array(False)]) + + # After deprecating bools as integers: + #a = np.array([0,1,2]) + #assert_equal(a[True, :], a[None, :]) + #assert_equal(a[:, True], a[:, None]) + # + #assert_(not np.may_share_memory(a, a[True, :])) + + def test_everything_returns_views(self): + # Before `...` would return a itself. + a = np.arange(5) + + assert_(a is not a[()]) + assert_(a is not a[...]) + assert_(a is not a[:]) + + def test_broaderrors_indexing(self): + a = np.zeros((5, 5)) + assert_raises(IndexError, a.__getitem__, ([0, 1], [0, 1, 2])) + assert_raises(IndexError, a.__setitem__, ([0, 1], [0, 1, 2]), 0) + + def test_trivial_fancy_out_of_bounds(self): + a = np.zeros(5) + ind = np.ones(20, dtype=np.intp) + ind[-1] = 10 + assert_raises(IndexError, a.__getitem__, ind) + assert_raises(IndexError, a.__setitem__, ind, 0) + ind = np.ones(20, dtype=np.intp) + ind[0] = 11 + assert_raises(IndexError, a.__getitem__, ind) + assert_raises(IndexError, a.__setitem__, ind, 0) + + def test_trivial_fancy_not_possible(self): + # Test that the fast path for trivial assignment is not incorrectly + # used when the index is not contiguous or 1D, see also gh-11467. + a = np.arange(6) + idx = np.arange(6, dtype=np.intp).reshape(2, 1, 3)[:, :, 0] + assert_array_equal(a[idx], idx) + + # this case must not go into the fast path, note that idx is + # a non-contiuguous none 1D array here. + a[idx] = -1 + res = np.arange(6) + res[0] = -1 + res[3] = -1 + assert_array_equal(a, res) + + def test_nonbaseclass_values(self): + class SubClass(np.ndarray): + def __array_finalize__(self, old): + # Have array finalize do funny things + self.fill(99) + + a = np.zeros((5, 5)) + s = a.copy().view(type=SubClass) + s.fill(1) + + a[[0, 1, 2, 3, 4], :] = s + assert_((a == 1).all()) + + # Subspace is last, so transposing might want to finalize + a[:, [0, 1, 2, 3, 4]] = s + assert_((a == 1).all()) + + a.fill(0) + a[...] = s + assert_((a == 1).all()) + + def test_array_like_values(self): + # Similar to the above test, but use a memoryview instead + a = np.zeros((5, 5)) + s = np.arange(25, dtype=np.float64).reshape(5, 5) + + a[[0, 1, 2, 3, 4], :] = memoryview(s) + assert_array_equal(a, s) + + a[:, [0, 1, 2, 3, 4]] = memoryview(s) + assert_array_equal(a, s) + + a[...] = memoryview(s) + assert_array_equal(a, s) + + def test_subclass_writeable(self): + d = np.rec.array([('NGC1001', 11), ('NGC1002', 1.), ('NGC1003', 1.)], + dtype=[('target', 'S20'), ('V_mag', '>f4')]) + ind = np.array([False, True, True], dtype=bool) + assert_(d[ind].flags.writeable) + ind = np.array([0, 1]) + assert_(d[ind].flags.writeable) + assert_(d[...].flags.writeable) + assert_(d[0].flags.writeable) + + def test_memory_order(self): + # This is not necessary to preserve. Memory layouts for + # more complex indices are not as simple. + a = np.arange(10) + b = np.arange(10).reshape(5,2).T + assert_(a[b].flags.f_contiguous) + + # Takes a different implementation branch: + a = a.reshape(-1, 1) + assert_(a[b, 0].flags.f_contiguous) + + def test_scalar_return_type(self): + # Full scalar indices should return scalars and object + # arrays should not call PyArray_Return on their items + class Zero: + # The most basic valid indexing + def __index__(self): + return 0 + + z = Zero() + + class ArrayLike: + # Simple array, should behave like the array + def __array__(self): + return np.array(0) + + a = np.zeros(()) + assert_(isinstance(a[()], np.float_)) + a = np.zeros(1) + assert_(isinstance(a[z], np.float_)) + a = np.zeros((1, 1)) + assert_(isinstance(a[z, np.array(0)], np.float_)) + assert_(isinstance(a[z, ArrayLike()], np.float_)) + + # And object arrays do not call it too often: + b = np.array(0) + a = np.array(0, dtype=object) + a[()] = b + assert_(isinstance(a[()], np.ndarray)) + a = np.array([b, None]) + assert_(isinstance(a[z], np.ndarray)) + a = np.array([[b, None]]) + assert_(isinstance(a[z, np.array(0)], np.ndarray)) + assert_(isinstance(a[z, ArrayLike()], np.ndarray)) + + def test_small_regressions(self): + # Reference count of intp for index checks + a = np.array([0]) + if HAS_REFCOUNT: + refcount = sys.getrefcount(np.dtype(np.intp)) + # item setting always checks indices in separate function: + a[np.array([0], dtype=np.intp)] = 1 + a[np.array([0], dtype=np.uint8)] = 1 + assert_raises(IndexError, a.__setitem__, + np.array([1], dtype=np.intp), 1) + assert_raises(IndexError, a.__setitem__, + np.array([1], dtype=np.uint8), 1) + + if HAS_REFCOUNT: + assert_equal(sys.getrefcount(np.dtype(np.intp)), refcount) + + def test_unaligned(self): + v = (np.zeros(64, dtype=np.int8) + ord('a'))[1:-7] + d = v.view(np.dtype("S8")) + # unaligned source + x = (np.zeros(16, dtype=np.int8) + ord('a'))[1:-7] + x = x.view(np.dtype("S8")) + x[...] = np.array("b" * 8, dtype="S") + b = np.arange(d.size) + #trivial + assert_equal(d[b], d) + d[b] = x + # nontrivial + # unaligned index array + b = np.zeros(d.size + 1).view(np.int8)[1:-(np.intp(0).itemsize - 1)] + b = b.view(np.intp)[:d.size] + b[...] = np.arange(d.size) + assert_equal(d[b.astype(np.int16)], d) + d[b.astype(np.int16)] = x + # boolean + d[b % 2 == 0] + d[b % 2 == 0] = x[::2] + + def test_tuple_subclass(self): + arr = np.ones((5, 5)) + + # A tuple subclass should also be an nd-index + class TupleSubclass(tuple): + pass + index = ([1], [1]) + index = TupleSubclass(index) + assert_(arr[index].shape == (1,)) + # Unlike the non nd-index: + assert_(arr[index,].shape != (1,)) + + def test_broken_sequence_not_nd_index(self): + # See gh-5063: + # If we have an object which claims to be a sequence, but fails + # on item getting, this should not be converted to an nd-index (tuple) + # If this object happens to be a valid index otherwise, it should work + # This object here is very dubious and probably bad though: + class SequenceLike: + def __index__(self): + return 0 + + def __len__(self): + return 1 + + def __getitem__(self, item): + raise IndexError('Not possible') + + arr = np.arange(10) + assert_array_equal(arr[SequenceLike()], arr[SequenceLike(),]) + + # also test that field indexing does not segfault + # for a similar reason, by indexing a structured array + arr = np.zeros((1,), dtype=[('f1', 'i8'), ('f2', 'i8')]) + assert_array_equal(arr[SequenceLike()], arr[SequenceLike(),]) + + def test_indexing_array_weird_strides(self): + # See also gh-6221 + # the shapes used here come from the issue and create the correct + # size for the iterator buffering size. + x = np.ones(10) + x2 = np.ones((10, 2)) + ind = np.arange(10)[:, None, None, None] + ind = np.broadcast_to(ind, (10, 55, 4, 4)) + + # single advanced index case + assert_array_equal(x[ind], x[ind.copy()]) + # higher dimensional advanced index + zind = np.zeros(4, dtype=np.intp) + assert_array_equal(x2[ind, zind], x2[ind.copy(), zind]) + + def test_indexing_array_negative_strides(self): + # From gh-8264, + # core dumps if negative strides are used in iteration + arro = np.zeros((4, 4)) + arr = arro[::-1, ::-1] + + slices = (slice(None), [0, 1, 2, 3]) + arr[slices] = 10 + assert_array_equal(arr, 10.) + + def test_character_assignment(self): + # This is an example a function going through CopyObject which + # used to have an untested special path for scalars + # (the character special dtype case, should be deprecated probably) + arr = np.zeros((1, 5), dtype="c") + arr[0] = np.str_("asdfg") # must assign as a sequence + assert_array_equal(arr[0], np.array("asdfg", dtype="c")) + assert arr[0, 1] == b"s" # make sure not all were set to "a" for both + + @pytest.mark.parametrize("index", + [True, False, np.array([0])]) + @pytest.mark.parametrize("num", [32, 40]) + @pytest.mark.parametrize("original_ndim", [1, 32]) + def test_too_many_advanced_indices(self, index, num, original_ndim): + # These are limitations based on the number of arguments we can process. + # For `num=32` (and all boolean cases), the result is actually define; + # but the use of NpyIter (NPY_MAXARGS) limits it for technical reasons. + arr = np.ones((1,) * original_ndim) + with pytest.raises(IndexError): + arr[(index,) * num] + with pytest.raises(IndexError): + arr[(index,) * num] = 1. + + def test_structured_advanced_indexing(self): + # Test that copyswap(n) used by integer array indexing is threadsafe + # for structured datatypes, see gh-15387. This test can behave randomly. + from concurrent.futures import ThreadPoolExecutor + + # Create a deeply nested dtype to make a failure more likely: + dt = np.dtype([("", "f8")]) + dt = np.dtype([("", dt)] * 2) + dt = np.dtype([("", dt)] * 2) + # The array should be large enough to likely run into threading issues + arr = np.random.uniform(size=(6000, 8)).view(dt)[:, 0] + + rng = np.random.default_rng() + def func(arr): + indx = rng.integers(0, len(arr), size=6000, dtype=np.intp) + arr[indx] + + tpe = ThreadPoolExecutor(max_workers=8) + futures = [tpe.submit(func, arr) for _ in range(10)] + for f in futures: + f.result() + + assert arr.dtype is dt + + +class TestFieldIndexing: + def test_scalar_return_type(self): + # Field access on an array should return an array, even if it + # is 0-d. + a = np.zeros((), [('a','f8')]) + assert_(isinstance(a['a'], np.ndarray)) + assert_(isinstance(a[['a']], np.ndarray)) + + +class TestBroadcastedAssignments: + def assign(self, a, ind, val): + a[ind] = val + return a + + def test_prepending_ones(self): + a = np.zeros((3, 2)) + + a[...] = np.ones((1, 3, 2)) + # Fancy with subspace with and without transpose + a[[0, 1, 2], :] = np.ones((1, 3, 2)) + a[:, [0, 1]] = np.ones((1, 3, 2)) + # Fancy without subspace (with broadcasting) + a[[[0], [1], [2]], [0, 1]] = np.ones((1, 3, 2)) + + def test_prepend_not_one(self): + assign = self.assign + s_ = np.s_ + a = np.zeros(5) + + # Too large and not only ones. + assert_raises(ValueError, assign, a, s_[...], np.ones((2, 1))) + assert_raises(ValueError, assign, a, s_[[1, 2, 3],], np.ones((2, 1))) + assert_raises(ValueError, assign, a, s_[[[1], [2]],], np.ones((2,2,1))) + + def test_simple_broadcasting_errors(self): + assign = self.assign + s_ = np.s_ + a = np.zeros((5, 1)) + + assert_raises(ValueError, assign, a, s_[...], np.zeros((5, 2))) + assert_raises(ValueError, assign, a, s_[...], np.zeros((5, 0))) + assert_raises(ValueError, assign, a, s_[:, [0]], np.zeros((5, 2))) + assert_raises(ValueError, assign, a, s_[:, [0]], np.zeros((5, 0))) + assert_raises(ValueError, assign, a, s_[[0], :], np.zeros((2, 1))) + + @pytest.mark.parametrize("index", [ + (..., [1, 2], slice(None)), + ([0, 1], ..., 0), + (..., [1, 2], [1, 2])]) + def test_broadcast_error_reports_correct_shape(self, index): + values = np.zeros((100, 100)) # will never broadcast below + + arr = np.zeros((3, 4, 5, 6, 7)) + # We currently report without any spaces (could be changed) + shape_str = str(arr[index].shape).replace(" ", "") + + with pytest.raises(ValueError) as e: + arr[index] = values + + assert str(e.value).endswith(shape_str) + + def test_index_is_larger(self): + # Simple case of fancy index broadcasting of the index. + a = np.zeros((5, 5)) + a[[[0], [1], [2]], [0, 1, 2]] = [2, 3, 4] + + assert_((a[:3, :3] == [2, 3, 4]).all()) + + def test_broadcast_subspace(self): + a = np.zeros((100, 100)) + v = np.arange(100)[:,None] + b = np.arange(100)[::-1] + a[b] = v + assert_((a[::-1] == v).all()) + + +class TestSubclasses: + def test_basic(self): + # Test that indexing in various ways produces SubClass instances, + # and that the base is set up correctly: the original subclass + # instance for views, and a new ndarray for advanced/boolean indexing + # where a copy was made (latter a regression test for gh-11983). + class SubClass(np.ndarray): + pass + + a = np.arange(5) + s = a.view(SubClass) + s_slice = s[:3] + assert_(type(s_slice) is SubClass) + assert_(s_slice.base is s) + assert_array_equal(s_slice, a[:3]) + + s_fancy = s[[0, 1, 2]] + assert_(type(s_fancy) is SubClass) + assert_(s_fancy.base is not s) + assert_(type(s_fancy.base) is np.ndarray) + assert_array_equal(s_fancy, a[[0, 1, 2]]) + assert_array_equal(s_fancy.base, a[[0, 1, 2]]) + + s_bool = s[s > 0] + assert_(type(s_bool) is SubClass) + assert_(s_bool.base is not s) + assert_(type(s_bool.base) is np.ndarray) + assert_array_equal(s_bool, a[a > 0]) + assert_array_equal(s_bool.base, a[a > 0]) + + def test_fancy_on_read_only(self): + # Test that fancy indexing on read-only SubClass does not make a + # read-only copy (gh-14132) + class SubClass(np.ndarray): + pass + + a = np.arange(5) + s = a.view(SubClass) + s.flags.writeable = False + s_fancy = s[[0, 1, 2]] + assert_(s_fancy.flags.writeable) + + + def test_finalize_gets_full_info(self): + # Array finalize should be called on the filled array. + class SubClass(np.ndarray): + def __array_finalize__(self, old): + self.finalize_status = np.array(self) + self.old = old + + s = np.arange(10).view(SubClass) + new_s = s[:3] + assert_array_equal(new_s.finalize_status, new_s) + assert_array_equal(new_s.old, s) + + new_s = s[[0,1,2,3]] + assert_array_equal(new_s.finalize_status, new_s) + assert_array_equal(new_s.old, s) + + new_s = s[s > 0] + assert_array_equal(new_s.finalize_status, new_s) + assert_array_equal(new_s.old, s) + + +class TestFancyIndexingCast: + def test_boolean_index_cast_assign(self): + # Setup the boolean index and float arrays. + shape = (8, 63) + bool_index = np.zeros(shape).astype(bool) + bool_index[0, 1] = True + zero_array = np.zeros(shape) + + # Assigning float is fine. + zero_array[bool_index] = np.array([1]) + assert_equal(zero_array[0, 1], 1) + + # Fancy indexing works, although we get a cast warning. + assert_warns(np.ComplexWarning, + zero_array.__setitem__, ([0], [1]), np.array([2 + 1j])) + assert_equal(zero_array[0, 1], 2) # No complex part + + # Cast complex to float, throwing away the imaginary portion. + assert_warns(np.ComplexWarning, + zero_array.__setitem__, bool_index, np.array([1j])) + assert_equal(zero_array[0, 1], 0) + +class TestFancyIndexingEquivalence: + def test_object_assign(self): + # Check that the field and object special case using copyto is active. + # The right hand side cannot be converted to an array here. + a = np.arange(5, dtype=object) + b = a.copy() + a[:3] = [1, (1,2), 3] + b[[0, 1, 2]] = [1, (1,2), 3] + assert_array_equal(a, b) + + # test same for subspace fancy indexing + b = np.arange(5, dtype=object)[None, :] + b[[0], :3] = [[1, (1,2), 3]] + assert_array_equal(a, b[0]) + + # Check that swapping of axes works. + # There was a bug that made the later assignment throw a ValueError + # do to an incorrectly transposed temporary right hand side (gh-5714) + b = b.T + b[:3, [0]] = [[1], [(1,2)], [3]] + assert_array_equal(a, b[:, 0]) + + # Another test for the memory order of the subspace + arr = np.ones((3, 4, 5), dtype=object) + # Equivalent slicing assignment for comparison + cmp_arr = arr.copy() + cmp_arr[:1, ...] = [[[1], [2], [3], [4]]] + arr[[0], ...] = [[[1], [2], [3], [4]]] + assert_array_equal(arr, cmp_arr) + arr = arr.copy('F') + arr[[0], ...] = [[[1], [2], [3], [4]]] + assert_array_equal(arr, cmp_arr) + + def test_cast_equivalence(self): + # Yes, normal slicing uses unsafe casting. + a = np.arange(5) + b = a.copy() + + a[:3] = np.array(['2', '-3', '-1']) + b[[0, 2, 1]] = np.array(['2', '-1', '-3']) + assert_array_equal(a, b) + + # test the same for subspace fancy indexing + b = np.arange(5)[None, :] + b[[0], :3] = np.array([['2', '-3', '-1']]) + assert_array_equal(a, b[0]) + + +class TestMultiIndexingAutomated: + """ + These tests use code to mimic the C-Code indexing for selection. + + NOTE: + + * This still lacks tests for complex item setting. + * If you change behavior of indexing, you might want to modify + these tests to try more combinations. + * Behavior was written to match numpy version 1.8. (though a + first version matched 1.7.) + * Only tuple indices are supported by the mimicking code. + (and tested as of writing this) + * Error types should match most of the time as long as there + is only one error. For multiple errors, what gets raised + will usually not be the same one. They are *not* tested. + + Update 2016-11-30: It is probably not worth maintaining this test + indefinitely and it can be dropped if maintenance becomes a burden. + + """ + + def setup(self): + self.a = np.arange(np.prod([3, 1, 5, 6])).reshape(3, 1, 5, 6) + self.b = np.empty((3, 0, 5, 6)) + self.complex_indices = ['skip', Ellipsis, + 0, + # Boolean indices, up to 3-d for some special cases of eating up + # dimensions, also need to test all False + np.array([True, False, False]), + np.array([[True, False], [False, True]]), + np.array([[[False, False], [False, False]]]), + # Some slices: + slice(-5, 5, 2), + slice(1, 1, 100), + slice(4, -1, -2), + slice(None, None, -3), + # Some Fancy indexes: + np.empty((0, 1, 1), dtype=np.intp), # empty and can be broadcast + np.array([0, 1, -2]), + np.array([[2], [0], [1]]), + np.array([[0, -1], [0, 1]], dtype=np.dtype('intp').newbyteorder()), + np.array([2, -1], dtype=np.int8), + np.zeros([1]*31, dtype=int), # trigger too large array. + np.array([0., 1.])] # invalid datatype + # Some simpler indices that still cover a bit more + self.simple_indices = [Ellipsis, None, -1, [1], np.array([True]), + 'skip'] + # Very simple ones to fill the rest: + self.fill_indices = [slice(None, None), 0] + + def _get_multi_index(self, arr, indices): + """Mimic multi dimensional indexing. + + Parameters + ---------- + arr : ndarray + Array to be indexed. + indices : tuple of index objects + + Returns + ------- + out : ndarray + An array equivalent to the indexing operation (but always a copy). + `arr[indices]` should be identical. + no_copy : bool + Whether the indexing operation requires a copy. If this is `True`, + `np.may_share_memory(arr, arr[indices])` should be `True` (with + some exceptions for scalars and possibly 0-d arrays). + + Notes + ----- + While the function may mostly match the errors of normal indexing this + is generally not the case. + """ + in_indices = list(indices) + indices = [] + # if False, this is a fancy or boolean index + no_copy = True + # number of fancy/scalar indexes that are not consecutive + num_fancy = 0 + # number of dimensions indexed by a "fancy" index + fancy_dim = 0 + # NOTE: This is a funny twist (and probably OK to change). + # The boolean array has illegal indexes, but this is + # allowed if the broadcast fancy-indices are 0-sized. + # This variable is to catch that case. + error_unless_broadcast_to_empty = False + + # We need to handle Ellipsis and make arrays from indices, also + # check if this is fancy indexing (set no_copy). + ndim = 0 + ellipsis_pos = None # define here mostly to replace all but first. + for i, indx in enumerate(in_indices): + if indx is None: + continue + if isinstance(indx, np.ndarray) and indx.dtype == bool: + no_copy = False + if indx.ndim == 0: + raise IndexError + # boolean indices can have higher dimensions + ndim += indx.ndim + fancy_dim += indx.ndim + continue + if indx is Ellipsis: + if ellipsis_pos is None: + ellipsis_pos = i + continue # do not increment ndim counter + raise IndexError + if isinstance(indx, slice): + ndim += 1 + continue + if not isinstance(indx, np.ndarray): + # This could be open for changes in numpy. + # numpy should maybe raise an error if casting to intp + # is not safe. It rejects np.array([1., 2.]) but not + # [1., 2.] as index (same for ie. np.take). + # (Note the importance of empty lists if changing this here) + try: + indx = np.array(indx, dtype=np.intp) + except ValueError: + raise IndexError + in_indices[i] = indx + elif indx.dtype.kind != 'b' and indx.dtype.kind != 'i': + raise IndexError('arrays used as indices must be of ' + 'integer (or boolean) type') + if indx.ndim != 0: + no_copy = False + ndim += 1 + fancy_dim += 1 + + if arr.ndim - ndim < 0: + # we can't take more dimensions then we have, not even for 0-d + # arrays. since a[()] makes sense, but not a[(),]. We will + # raise an error later on, unless a broadcasting error occurs + # first. + raise IndexError + + if ndim == 0 and None not in in_indices: + # Well we have no indexes or one Ellipsis. This is legal. + return arr.copy(), no_copy + + if ellipsis_pos is not None: + in_indices[ellipsis_pos:ellipsis_pos+1] = ([slice(None, None)] * + (arr.ndim - ndim)) + + for ax, indx in enumerate(in_indices): + if isinstance(indx, slice): + # convert to an index array + indx = np.arange(*indx.indices(arr.shape[ax])) + indices.append(['s', indx]) + continue + elif indx is None: + # this is like taking a slice with one element from a new axis: + indices.append(['n', np.array([0], dtype=np.intp)]) + arr = arr.reshape((arr.shape[:ax] + (1,) + arr.shape[ax:])) + continue + if isinstance(indx, np.ndarray) and indx.dtype == bool: + if indx.shape != arr.shape[ax:ax+indx.ndim]: + raise IndexError + + try: + flat_indx = np.ravel_multi_index(np.nonzero(indx), + arr.shape[ax:ax+indx.ndim], mode='raise') + except Exception: + error_unless_broadcast_to_empty = True + # fill with 0s instead, and raise error later + flat_indx = np.array([0]*indx.sum(), dtype=np.intp) + # concatenate axis into a single one: + if indx.ndim != 0: + arr = arr.reshape((arr.shape[:ax] + + (np.prod(arr.shape[ax:ax+indx.ndim]),) + + arr.shape[ax+indx.ndim:])) + indx = flat_indx + else: + # This could be changed, a 0-d boolean index can + # make sense (even outside the 0-d indexed array case) + # Note that originally this is could be interpreted as + # integer in the full integer special case. + raise IndexError + else: + # If the index is a singleton, the bounds check is done + # before the broadcasting. This used to be different in <1.9 + if indx.ndim == 0: + if indx >= arr.shape[ax] or indx < -arr.shape[ax]: + raise IndexError + if indx.ndim == 0: + # The index is a scalar. This used to be two fold, but if + # fancy indexing was active, the check was done later, + # possibly after broadcasting it away (1.7. or earlier). + # Now it is always done. + if indx >= arr.shape[ax] or indx < - arr.shape[ax]: + raise IndexError + if (len(indices) > 0 and + indices[-1][0] == 'f' and + ax != ellipsis_pos): + # NOTE: There could still have been a 0-sized Ellipsis + # between them. Checked that with ellipsis_pos. + indices[-1].append(indx) + else: + # We have a fancy index that is not after an existing one. + # NOTE: A 0-d array triggers this as well, while one may + # expect it to not trigger it, since a scalar would not be + # considered fancy indexing. + num_fancy += 1 + indices.append(['f', indx]) + + if num_fancy > 1 and not no_copy: + # We have to flush the fancy indexes left + new_indices = indices[:] + axes = list(range(arr.ndim)) + fancy_axes = [] + new_indices.insert(0, ['f']) + ni = 0 + ai = 0 + for indx in indices: + ni += 1 + if indx[0] == 'f': + new_indices[0].extend(indx[1:]) + del new_indices[ni] + ni -= 1 + for ax in range(ai, ai + len(indx[1:])): + fancy_axes.append(ax) + axes.remove(ax) + ai += len(indx) - 1 # axis we are at + indices = new_indices + # and now we need to transpose arr: + arr = arr.transpose(*(fancy_axes + axes)) + + # We only have one 'f' index now and arr is transposed accordingly. + # Now handle newaxis by reshaping... + ax = 0 + for indx in indices: + if indx[0] == 'f': + if len(indx) == 1: + continue + # First of all, reshape arr to combine fancy axes into one: + orig_shape = arr.shape + orig_slice = orig_shape[ax:ax + len(indx[1:])] + arr = arr.reshape((arr.shape[:ax] + + (np.prod(orig_slice).astype(int),) + + arr.shape[ax + len(indx[1:]):])) + + # Check if broadcasting works + res = np.broadcast(*indx[1:]) + # unfortunately the indices might be out of bounds. So check + # that first, and use mode='wrap' then. However only if + # there are any indices... + if res.size != 0: + if error_unless_broadcast_to_empty: + raise IndexError + for _indx, _size in zip(indx[1:], orig_slice): + if _indx.size == 0: + continue + if np.any(_indx >= _size) or np.any(_indx < -_size): + raise IndexError + if len(indx[1:]) == len(orig_slice): + if np.product(orig_slice) == 0: + # Work around for a crash or IndexError with 'wrap' + # in some 0-sized cases. + try: + mi = np.ravel_multi_index(indx[1:], orig_slice, + mode='raise') + except Exception: + # This happens with 0-sized orig_slice (sometimes?) + # here it is a ValueError, but indexing gives a: + raise IndexError('invalid index into 0-sized') + else: + mi = np.ravel_multi_index(indx[1:], orig_slice, + mode='wrap') + else: + # Maybe never happens... + raise ValueError + arr = arr.take(mi.ravel(), axis=ax) + try: + arr = arr.reshape((arr.shape[:ax] + + mi.shape + + arr.shape[ax+1:])) + except ValueError: + # too many dimensions, probably + raise IndexError + ax += mi.ndim + continue + + # If we are here, we have a 1D array for take: + arr = arr.take(indx[1], axis=ax) + ax += 1 + + return arr, no_copy + + def _check_multi_index(self, arr, index): + """Check a multi index item getting and simple setting. + + Parameters + ---------- + arr : ndarray + Array to be indexed, must be a reshaped arange. + index : tuple of indexing objects + Index being tested. + """ + # Test item getting + try: + mimic_get, no_copy = self._get_multi_index(arr, index) + except Exception as e: + if HAS_REFCOUNT: + prev_refcount = sys.getrefcount(arr) + assert_raises(type(e), arr.__getitem__, index) + assert_raises(type(e), arr.__setitem__, index, 0) + if HAS_REFCOUNT: + assert_equal(prev_refcount, sys.getrefcount(arr)) + return + + self._compare_index_result(arr, index, mimic_get, no_copy) + + def _check_single_index(self, arr, index): + """Check a single index item getting and simple setting. + + Parameters + ---------- + arr : ndarray + Array to be indexed, must be an arange. + index : indexing object + Index being tested. Must be a single index and not a tuple + of indexing objects (see also `_check_multi_index`). + """ + try: + mimic_get, no_copy = self._get_multi_index(arr, (index,)) + except Exception as e: + if HAS_REFCOUNT: + prev_refcount = sys.getrefcount(arr) + assert_raises(type(e), arr.__getitem__, index) + assert_raises(type(e), arr.__setitem__, index, 0) + if HAS_REFCOUNT: + assert_equal(prev_refcount, sys.getrefcount(arr)) + return + + self._compare_index_result(arr, index, mimic_get, no_copy) + + def _compare_index_result(self, arr, index, mimic_get, no_copy): + """Compare mimicked result to indexing result. + """ + arr = arr.copy() + indexed_arr = arr[index] + assert_array_equal(indexed_arr, mimic_get) + # Check if we got a view, unless its a 0-sized or 0-d array. + # (then its not a view, and that does not matter) + if indexed_arr.size != 0 and indexed_arr.ndim != 0: + assert_(np.may_share_memory(indexed_arr, arr) == no_copy) + # Check reference count of the original array + if HAS_REFCOUNT: + if no_copy: + # refcount increases by one: + assert_equal(sys.getrefcount(arr), 3) + else: + assert_equal(sys.getrefcount(arr), 2) + + # Test non-broadcast setitem: + b = arr.copy() + b[index] = mimic_get + 1000 + if b.size == 0: + return # nothing to compare here... + if no_copy and indexed_arr.ndim != 0: + # change indexed_arr in-place to manipulate original: + indexed_arr += 1000 + assert_array_equal(arr, b) + return + # Use the fact that the array is originally an arange: + arr.flat[indexed_arr.ravel()] += 1000 + assert_array_equal(arr, b) + + def test_boolean(self): + a = np.array(5) + assert_equal(a[np.array(True)], 5) + a[np.array(True)] = 1 + assert_equal(a, 1) + # NOTE: This is different from normal broadcasting, as + # arr[boolean_array] works like in a multi index. Which means + # it is aligned to the left. This is probably correct for + # consistency with arr[boolean_array,] also no broadcasting + # is done at all + self._check_multi_index( + self.a, (np.zeros_like(self.a, dtype=bool),)) + self._check_multi_index( + self.a, (np.zeros_like(self.a, dtype=bool)[..., 0],)) + self._check_multi_index( + self.a, (np.zeros_like(self.a, dtype=bool)[None, ...],)) + + def test_multidim(self): + # Automatically test combinations with complex indexes on 2nd (or 1st) + # spot and the simple ones in one other spot. + with warnings.catch_warnings(): + # This is so that np.array(True) is not accepted in a full integer + # index, when running the file separately. + warnings.filterwarnings('error', '', DeprecationWarning) + warnings.filterwarnings('error', '', np.VisibleDeprecationWarning) + + def isskip(idx): + return isinstance(idx, str) and idx == "skip" + + for simple_pos in [0, 2, 3]: + tocheck = [self.fill_indices, self.complex_indices, + self.fill_indices, self.fill_indices] + tocheck[simple_pos] = self.simple_indices + for index in product(*tocheck): + index = tuple(i for i in index if not isskip(i)) + self._check_multi_index(self.a, index) + self._check_multi_index(self.b, index) + + # Check very simple item getting: + self._check_multi_index(self.a, (0, 0, 0, 0)) + self._check_multi_index(self.b, (0, 0, 0, 0)) + # Also check (simple cases of) too many indices: + assert_raises(IndexError, self.a.__getitem__, (0, 0, 0, 0, 0)) + assert_raises(IndexError, self.a.__setitem__, (0, 0, 0, 0, 0), 0) + assert_raises(IndexError, self.a.__getitem__, (0, 0, [1], 0, 0)) + assert_raises(IndexError, self.a.__setitem__, (0, 0, [1], 0, 0), 0) + + def test_1d(self): + a = np.arange(10) + for index in self.complex_indices: + self._check_single_index(a, index) + +class TestFloatNonIntegerArgument: + """ + These test that ``TypeError`` is raised when you try to use + non-integers as arguments to for indexing and slicing e.g. ``a[0.0:5]`` + and ``a[0.5]``, or other functions like ``array.reshape(1., -1)``. + + """ + def test_valid_indexing(self): + # These should raise no errors. + a = np.array([[[5]]]) + + a[np.array([0])] + a[[0, 0]] + a[:, [0, 0]] + a[:, 0,:] + a[:,:,:] + + def test_valid_slicing(self): + # These should raise no errors. + a = np.array([[[5]]]) + + a[::] + a[0:] + a[:2] + a[0:2] + a[::2] + a[1::2] + a[:2:2] + a[1:2:2] + + def test_non_integer_argument_errors(self): + a = np.array([[5]]) + + assert_raises(TypeError, np.reshape, a, (1., 1., -1)) + assert_raises(TypeError, np.reshape, a, (np.array(1.), -1)) + assert_raises(TypeError, np.take, a, [0], 1.) + assert_raises(TypeError, np.take, a, [0], np.float64(1.)) + + def test_non_integer_sequence_multiplication(self): + # NumPy scalar sequence multiply should not work with non-integers + def mult(a, b): + return a * b + + assert_raises(TypeError, mult, [1], np.float_(3)) + # following should be OK + mult([1], np.int_(3)) + + def test_reduce_axis_float_index(self): + d = np.zeros((3,3,3)) + assert_raises(TypeError, np.min, d, 0.5) + assert_raises(TypeError, np.min, d, (0.5, 1)) + assert_raises(TypeError, np.min, d, (1, 2.2)) + assert_raises(TypeError, np.min, d, (.2, 1.2)) + + +class TestBooleanIndexing: + # Using a boolean as integer argument/indexing is an error. + def test_bool_as_int_argument_errors(self): + a = np.array([[[1]]]) + + assert_raises(TypeError, np.reshape, a, (True, -1)) + assert_raises(TypeError, np.reshape, a, (np.bool_(True), -1)) + # Note that operator.index(np.array(True)) does not work, a boolean + # array is thus also deprecated, but not with the same message: + assert_raises(TypeError, operator.index, np.array(True)) + assert_warns(DeprecationWarning, operator.index, np.True_) + assert_raises(TypeError, np.take, args=(a, [0], False)) + + def test_boolean_indexing_weirdness(self): + # Weird boolean indexing things + a = np.ones((2, 3, 4)) + a[False, True, ...].shape == (0, 2, 3, 4) + a[True, [0, 1], True, True, [1], [[2]]] == (1, 2) + assert_raises(IndexError, lambda: a[False, [0, 1], ...]) + + + def test_boolean_indexing_fast_path(self): + # These used to either give the wrong error, or incorrectly give no + # error. + a = np.ones((3, 3)) + + # This used to incorrectly work (and give an array of shape (0,)) + idx1 = np.array([[False]*9]) + assert_raises_regex(IndexError, + "boolean index did not match indexed array along dimension 0; " + "dimension is 3 but corresponding boolean dimension is 1", + lambda: a[idx1]) + + # This used to incorrectly give a ValueError: operands could not be broadcast together + idx2 = np.array([[False]*8 + [True]]) + assert_raises_regex(IndexError, + "boolean index did not match indexed array along dimension 0; " + "dimension is 3 but corresponding boolean dimension is 1", + lambda: a[idx2]) + + # This is the same as it used to be. The above two should work like this. + idx3 = np.array([[False]*10]) + assert_raises_regex(IndexError, + "boolean index did not match indexed array along dimension 0; " + "dimension is 3 but corresponding boolean dimension is 1", + lambda: a[idx3]) + + # This used to give ValueError: non-broadcastable operand + a = np.ones((1, 1, 2)) + idx = np.array([[[True], [False]]]) + assert_raises_regex(IndexError, + "boolean index did not match indexed array along dimension 1; " + "dimension is 1 but corresponding boolean dimension is 2", + lambda: a[idx]) + + +class TestArrayToIndexDeprecation: + """Creating an an index from array not 0-D is an error. + + """ + def test_array_to_index_error(self): + # so no exception is expected. The raising is effectively tested above. + a = np.array([[[1]]]) + + assert_raises(TypeError, operator.index, np.array([1])) + assert_raises(TypeError, np.reshape, a, (a, -1)) + assert_raises(TypeError, np.take, a, [0], a) + + +class TestNonIntegerArrayLike: + """Tests that array_likes only valid if can safely cast to integer. + + For instance, lists give IndexError when they cannot be safely cast to + an integer. + + """ + def test_basic(self): + a = np.arange(10) + + assert_raises(IndexError, a.__getitem__, [0.5, 1.5]) + assert_raises(IndexError, a.__getitem__, (['1', '2'],)) + + # The following is valid + a.__getitem__([]) + + +class TestMultipleEllipsisError: + """An index can only have a single ellipsis. + + """ + def test_basic(self): + a = np.arange(10) + assert_raises(IndexError, lambda: a[..., ...]) + assert_raises(IndexError, a.__getitem__, ((Ellipsis,) * 2,)) + assert_raises(IndexError, a.__getitem__, ((Ellipsis,) * 3,)) + + +class TestCApiAccess: + def test_getitem(self): + subscript = functools.partial(array_indexing, 0) + + # 0-d arrays don't work: + assert_raises(IndexError, subscript, np.ones(()), 0) + # Out of bound values: + assert_raises(IndexError, subscript, np.ones(10), 11) + assert_raises(IndexError, subscript, np.ones(10), -11) + assert_raises(IndexError, subscript, np.ones((10, 10)), 11) + assert_raises(IndexError, subscript, np.ones((10, 10)), -11) + + a = np.arange(10) + assert_array_equal(a[4], subscript(a, 4)) + a = a.reshape(5, 2) + assert_array_equal(a[-4], subscript(a, -4)) + + def test_setitem(self): + assign = functools.partial(array_indexing, 1) + + # Deletion is impossible: + assert_raises(ValueError, assign, np.ones(10), 0) + # 0-d arrays don't work: + assert_raises(IndexError, assign, np.ones(()), 0, 0) + # Out of bound values: + assert_raises(IndexError, assign, np.ones(10), 11, 0) + assert_raises(IndexError, assign, np.ones(10), -11, 0) + assert_raises(IndexError, assign, np.ones((10, 10)), 11, 0) + assert_raises(IndexError, assign, np.ones((10, 10)), -11, 0) + + a = np.arange(10) + assign(a, 4, 10) + assert_(a[4] == 10) + + a = a.reshape(5, 2) + assign(a, 4, 10) + assert_array_equal(a[-1], [10, 10]) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_item_selection.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_item_selection.py new file mode 100644 index 0000000..3c35245 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_item_selection.py @@ -0,0 +1,86 @@ +import sys + +import numpy as np +from numpy.testing import ( + assert_, assert_raises, assert_array_equal, HAS_REFCOUNT + ) + + +class TestTake: + def test_simple(self): + a = [[1, 2], [3, 4]] + a_str = [[b'1', b'2'], [b'3', b'4']] + modes = ['raise', 'wrap', 'clip'] + indices = [-1, 4] + index_arrays = [np.empty(0, dtype=np.intp), + np.empty(tuple(), dtype=np.intp), + np.empty((1, 1), dtype=np.intp)] + real_indices = {'raise': {-1: 1, 4: IndexError}, + 'wrap': {-1: 1, 4: 0}, + 'clip': {-1: 0, 4: 1}} + # Currently all types but object, use the same function generation. + # So it should not be necessary to test all. However test also a non + # refcounted struct on top of object, which has a size that hits the + # default (non-specialized) path. + types = int, object, np.dtype([('', 'i2', 3)]) + for t in types: + # ta works, even if the array may be odd if buffer interface is used + ta = np.array(a if np.issubdtype(t, np.number) else a_str, dtype=t) + tresult = list(ta.T.copy()) + for index_array in index_arrays: + if index_array.size != 0: + tresult[0].shape = (2,) + index_array.shape + tresult[1].shape = (2,) + index_array.shape + for mode in modes: + for index in indices: + real_index = real_indices[mode][index] + if real_index is IndexError and index_array.size != 0: + index_array.put(0, index) + assert_raises(IndexError, ta.take, index_array, + mode=mode, axis=1) + elif index_array.size != 0: + index_array.put(0, index) + res = ta.take(index_array, mode=mode, axis=1) + assert_array_equal(res, tresult[real_index]) + else: + res = ta.take(index_array, mode=mode, axis=1) + assert_(res.shape == (2,) + index_array.shape) + + def test_refcounting(self): + objects = [object() for i in range(10)] + for mode in ('raise', 'clip', 'wrap'): + a = np.array(objects) + b = np.array([2, 2, 4, 5, 3, 5]) + a.take(b, out=a[:6], mode=mode) + del a + if HAS_REFCOUNT: + assert_(all(sys.getrefcount(o) == 3 for o in objects)) + # not contiguous, example: + a = np.array(objects * 2)[::2] + a.take(b, out=a[:6], mode=mode) + del a + if HAS_REFCOUNT: + assert_(all(sys.getrefcount(o) == 3 for o in objects)) + + def test_unicode_mode(self): + d = np.arange(10) + k = b'\xc3\xa4'.decode("UTF8") + assert_raises(ValueError, d.take, 5, mode=k) + + def test_empty_partition(self): + # In reference to github issue #6530 + a_original = np.array([0, 2, 4, 6, 8, 10]) + a = a_original.copy() + + # An empty partition should be a successful no-op + a.partition(np.array([], dtype=np.int16)) + + assert_array_equal(a, a_original) + + def test_empty_argpartition(self): + # In reference to github issue #6530 + a = np.array([0, 2, 4, 6, 8, 10]) + a = a.argpartition(np.array([], dtype=np.int16)) + + b = np.array([0, 1, 2, 3, 4, 5]) + assert_array_equal(a, b) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_limited_api.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_limited_api.py new file mode 100644 index 0000000..0bb543d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_limited_api.py @@ -0,0 +1,41 @@ +import os +import shutil +import subprocess +import sys +import sysconfig +import pytest + + +@pytest.mark.xfail( + sysconfig.get_config_var("Py_DEBUG"), + reason=( + "Py_LIMITED_API is incompatible with Py_DEBUG, Py_TRACE_REFS, " + "and Py_REF_DEBUG" + ), +) +def test_limited_api(tmp_path): + """Test building a third-party C extension with the limited API.""" + # Based in part on test_cython from random.tests.test_extending + + here = os.path.dirname(__file__) + ext_dir = os.path.join(here, "examples", "limited_api") + + cytest = str(tmp_path / "limited_api") + + shutil.copytree(ext_dir, cytest) + # build the examples and "install" them into a temporary directory + + install_log = str(tmp_path / "tmp_install_log.txt") + subprocess.check_call( + [ + sys.executable, + "setup.py", + "build", + "install", + "--prefix", str(tmp_path / "installdir"), + "--single-version-externally-managed", + "--record", + install_log, + ], + cwd=cytest, + ) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_longdouble.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_longdouble.py new file mode 100644 index 0000000..1a54e62 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_longdouble.py @@ -0,0 +1,370 @@ +import warnings +import pytest + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_warns, assert_array_equal, + temppath, + ) +from numpy.core.tests._locales import CommaDecimalPointLocale + + +LD_INFO = np.finfo(np.longdouble) +longdouble_longer_than_double = (LD_INFO.eps < np.finfo(np.double).eps) + + +_o = 1 + LD_INFO.eps +string_to_longdouble_inaccurate = (_o != np.longdouble(repr(_o))) +del _o + + +def test_scalar_extraction(): + """Confirm that extracting a value doesn't convert to python float""" + o = 1 + LD_INFO.eps + a = np.array([o, o, o]) + assert_equal(a[1], o) + + +# Conversions string -> long double + +# 0.1 not exactly representable in base 2 floating point. +repr_precision = len(repr(np.longdouble(0.1))) +# +2 from macro block starting around line 842 in scalartypes.c.src. +@pytest.mark.skipif(LD_INFO.precision + 2 >= repr_precision, + reason="repr precision not enough to show eps") +def test_repr_roundtrip(): + # We will only see eps in repr if within printing precision. + o = 1 + LD_INFO.eps + assert_equal(np.longdouble(repr(o)), o, "repr was %s" % repr(o)) + + +@pytest.mark.skipif(string_to_longdouble_inaccurate, reason="Need strtold_l") +def test_repr_roundtrip_bytes(): + o = 1 + LD_INFO.eps + assert_equal(np.longdouble(repr(o).encode("ascii")), o) + + +@pytest.mark.skipif(string_to_longdouble_inaccurate, reason="Need strtold_l") +@pytest.mark.parametrize("strtype", (np.str_, np.bytes_, str, bytes)) +def test_array_and_stringlike_roundtrip(strtype): + """ + Test that string representations of long-double roundtrip both + for array casting and scalar coercion, see also gh-15608. + """ + o = 1 + LD_INFO.eps + + if strtype in (np.bytes_, bytes): + o_str = strtype(repr(o).encode("ascii")) + else: + o_str = strtype(repr(o)) + + # Test that `o` is correctly coerced from the string-like + assert o == np.longdouble(o_str) + + # Test that arrays also roundtrip correctly: + o_strarr = np.asarray([o] * 3, dtype=strtype) + assert (o == o_strarr.astype(np.longdouble)).all() + + # And array coercion and casting to string give the same as scalar repr: + assert (o_strarr == o_str).all() + assert (np.asarray([o] * 3).astype(strtype) == o_str).all() + + +def test_bogus_string(): + assert_raises(ValueError, np.longdouble, "spam") + assert_raises(ValueError, np.longdouble, "1.0 flub") + + +@pytest.mark.skipif(string_to_longdouble_inaccurate, reason="Need strtold_l") +def test_fromstring(): + o = 1 + LD_INFO.eps + s = (" " + repr(o))*5 + a = np.array([o]*5) + assert_equal(np.fromstring(s, sep=" ", dtype=np.longdouble), a, + err_msg="reading '%s'" % s) + + +def test_fromstring_complex(): + for ctype in ["complex", "cdouble", "cfloat"]: + # Check spacing between separator + assert_equal(np.fromstring("1, 2 , 3 ,4", sep=",", dtype=ctype), + np.array([1., 2., 3., 4.])) + # Real component not specified + assert_equal(np.fromstring("1j, -2j, 3j, 4e1j", sep=",", dtype=ctype), + np.array([1.j, -2.j, 3.j, 40.j])) + # Both components specified + assert_equal(np.fromstring("1+1j,2-2j, -3+3j, -4e1+4j", sep=",", dtype=ctype), + np.array([1. + 1.j, 2. - 2.j, - 3. + 3.j, - 40. + 4j])) + # Spaces at wrong places + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("1+2 j,3", dtype=ctype, sep=","), + np.array([1.])) + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("1+ 2j,3", dtype=ctype, sep=","), + np.array([1.])) + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("1 +2j,3", dtype=ctype, sep=","), + np.array([1.])) + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("1+j", dtype=ctype, sep=","), + np.array([1.])) + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("1+", dtype=ctype, sep=","), + np.array([1.])) + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("1j+1", dtype=ctype, sep=","), + np.array([1j])) + + +def test_fromstring_bogus(): + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("1. 2. 3. flop 4.", dtype=float, sep=" "), + np.array([1., 2., 3.])) + + +def test_fromstring_empty(): + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("xxxxx", sep="x"), + np.array([])) + + +def test_fromstring_missing(): + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("1xx3x4x5x6", sep="x"), + np.array([1])) + + +class TestFileBased: + + ldbl = 1 + LD_INFO.eps + tgt = np.array([ldbl]*5) + out = ''.join([repr(t) + '\n' for t in tgt]) + + def test_fromfile_bogus(self): + with temppath() as path: + with open(path, 'wt') as f: + f.write("1. 2. 3. flop 4.\n") + + with assert_warns(DeprecationWarning): + res = np.fromfile(path, dtype=float, sep=" ") + assert_equal(res, np.array([1., 2., 3.])) + + def test_fromfile_complex(self): + for ctype in ["complex", "cdouble", "cfloat"]: + # Check spacing between separator and only real component specified + with temppath() as path: + with open(path, 'wt') as f: + f.write("1, 2 , 3 ,4\n") + + res = np.fromfile(path, dtype=ctype, sep=",") + assert_equal(res, np.array([1., 2., 3., 4.])) + + # Real component not specified + with temppath() as path: + with open(path, 'wt') as f: + f.write("1j, -2j, 3j, 4e1j\n") + + res = np.fromfile(path, dtype=ctype, sep=",") + assert_equal(res, np.array([1.j, -2.j, 3.j, 40.j])) + + # Both components specified + with temppath() as path: + with open(path, 'wt') as f: + f.write("1+1j,2-2j, -3+3j, -4e1+4j\n") + + res = np.fromfile(path, dtype=ctype, sep=",") + assert_equal(res, np.array([1. + 1.j, 2. - 2.j, - 3. + 3.j, - 40. + 4j])) + + # Spaces at wrong places + with temppath() as path: + with open(path, 'wt') as f: + f.write("1+2 j,3\n") + + with assert_warns(DeprecationWarning): + res = np.fromfile(path, dtype=ctype, sep=",") + assert_equal(res, np.array([1.])) + + # Spaces at wrong places + with temppath() as path: + with open(path, 'wt') as f: + f.write("1+ 2j,3\n") + + with assert_warns(DeprecationWarning): + res = np.fromfile(path, dtype=ctype, sep=",") + assert_equal(res, np.array([1.])) + + # Spaces at wrong places + with temppath() as path: + with open(path, 'wt') as f: + f.write("1 +2j,3\n") + + with assert_warns(DeprecationWarning): + res = np.fromfile(path, dtype=ctype, sep=",") + assert_equal(res, np.array([1.])) + + # Spaces at wrong places + with temppath() as path: + with open(path, 'wt') as f: + f.write("1+j\n") + + with assert_warns(DeprecationWarning): + res = np.fromfile(path, dtype=ctype, sep=",") + assert_equal(res, np.array([1.])) + + # Spaces at wrong places + with temppath() as path: + with open(path, 'wt') as f: + f.write("1+\n") + + with assert_warns(DeprecationWarning): + res = np.fromfile(path, dtype=ctype, sep=",") + assert_equal(res, np.array([1.])) + + # Spaces at wrong places + with temppath() as path: + with open(path, 'wt') as f: + f.write("1j+1\n") + + with assert_warns(DeprecationWarning): + res = np.fromfile(path, dtype=ctype, sep=",") + assert_equal(res, np.array([1.j])) + + + + @pytest.mark.skipif(string_to_longdouble_inaccurate, + reason="Need strtold_l") + def test_fromfile(self): + with temppath() as path: + with open(path, 'wt') as f: + f.write(self.out) + res = np.fromfile(path, dtype=np.longdouble, sep="\n") + assert_equal(res, self.tgt) + + @pytest.mark.skipif(string_to_longdouble_inaccurate, + reason="Need strtold_l") + def test_genfromtxt(self): + with temppath() as path: + with open(path, 'wt') as f: + f.write(self.out) + res = np.genfromtxt(path, dtype=np.longdouble) + assert_equal(res, self.tgt) + + @pytest.mark.skipif(string_to_longdouble_inaccurate, + reason="Need strtold_l") + def test_loadtxt(self): + with temppath() as path: + with open(path, 'wt') as f: + f.write(self.out) + res = np.loadtxt(path, dtype=np.longdouble) + assert_equal(res, self.tgt) + + @pytest.mark.skipif(string_to_longdouble_inaccurate, + reason="Need strtold_l") + def test_tofile_roundtrip(self): + with temppath() as path: + self.tgt.tofile(path, sep=" ") + res = np.fromfile(path, dtype=np.longdouble, sep=" ") + assert_equal(res, self.tgt) + + +# Conversions long double -> string + + +def test_repr_exact(): + o = 1 + LD_INFO.eps + assert_(repr(o) != '1') + + +@pytest.mark.skipif(longdouble_longer_than_double, reason="BUG #2376") +@pytest.mark.skipif(string_to_longdouble_inaccurate, + reason="Need strtold_l") +def test_format(): + o = 1 + LD_INFO.eps + assert_("{0:.40g}".format(o) != '1') + + +@pytest.mark.skipif(longdouble_longer_than_double, reason="BUG #2376") +@pytest.mark.skipif(string_to_longdouble_inaccurate, + reason="Need strtold_l") +def test_percent(): + o = 1 + LD_INFO.eps + assert_("%.40g" % o != '1') + + +@pytest.mark.skipif(longdouble_longer_than_double, + reason="array repr problem") +@pytest.mark.skipif(string_to_longdouble_inaccurate, + reason="Need strtold_l") +def test_array_repr(): + o = 1 + LD_INFO.eps + a = np.array([o]) + b = np.array([1], dtype=np.longdouble) + if not np.all(a != b): + raise ValueError("precision loss creating arrays") + assert_(repr(a) != repr(b)) + +# +# Locale tests: scalar types formatting should be independent of the locale +# + +class TestCommaDecimalPointLocale(CommaDecimalPointLocale): + + def test_repr_roundtrip_foreign(self): + o = 1.5 + assert_equal(o, np.longdouble(repr(o))) + + def test_fromstring_foreign_repr(self): + f = 1.234 + a = np.fromstring(repr(f), dtype=float, sep=" ") + assert_equal(a[0], f) + + def test_fromstring_best_effort_float(self): + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("1,234", dtype=float, sep=" "), + np.array([1.])) + + def test_fromstring_best_effort(self): + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("1,234", dtype=np.longdouble, sep=" "), + np.array([1.])) + + def test_fromstring_foreign(self): + s = "1.234" + a = np.fromstring(s, dtype=np.longdouble, sep=" ") + assert_equal(a[0], np.longdouble(s)) + + def test_fromstring_foreign_sep(self): + a = np.array([1, 2, 3, 4]) + b = np.fromstring("1,2,3,4,", dtype=np.longdouble, sep=",") + assert_array_equal(a, b) + + def test_fromstring_foreign_value(self): + with assert_warns(DeprecationWarning): + b = np.fromstring("1,234", dtype=np.longdouble, sep=" ") + assert_array_equal(b[0], 1) + + +@pytest.mark.parametrize("int_val", [ + # cases discussed in gh-10723 + # and gh-9968 + 2 ** 1024, 0]) +def test_longdouble_from_int(int_val): + # for issue gh-9968 + str_val = str(int_val) + # we'll expect a RuntimeWarning on platforms + # with np.longdouble equivalent to np.double + # for large integer input + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + # can be inf==inf on some platforms + assert np.longdouble(int_val) == np.longdouble(str_val) + # we can't directly compare the int and + # max longdouble value on all platforms + if np.allclose(np.finfo(np.longdouble).max, + np.finfo(np.double).max) and w: + assert w[0].category is RuntimeWarning + +@pytest.mark.parametrize("bool_val", [ + True, False]) +def test_longdouble_from_bool(bool_val): + assert np.longdouble(bool_val) == np.longdouble(int(bool_val)) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_machar.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_machar.py new file mode 100644 index 0000000..3a66ec5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_machar.py @@ -0,0 +1,30 @@ +""" +Test machar. Given recent changes to hardcode type data, we might want to get +rid of both MachAr and this test at some point. + +""" +from numpy.core._machar import MachAr +import numpy.core.numerictypes as ntypes +from numpy import errstate, array + + +class TestMachAr: + def _run_machar_highprec(self): + # Instantiate MachAr instance with high enough precision to cause + # underflow + try: + hiprec = ntypes.float96 + MachAr(lambda v: array(v, hiprec)) + except AttributeError: + # Fixme, this needs to raise a 'skip' exception. + "Skipping test: no ntypes.float96 available on this platform." + + def test_underlow(self): + # Regression test for #759: + # instantiating MachAr for dtype = np.float96 raises spurious warning. + with errstate(all='raise'): + try: + self._run_machar_highprec() + except FloatingPointError as e: + msg = "Caught %s exception, should not have been raised." % e + raise AssertionError(msg) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_mem_overlap.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_mem_overlap.py new file mode 100644 index 0000000..24bdf47 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_mem_overlap.py @@ -0,0 +1,931 @@ +import itertools +import pytest + +import numpy as np +from numpy.core._multiarray_tests import solve_diophantine, internal_overlap +from numpy.core import _umath_tests +from numpy.lib.stride_tricks import as_strided +from numpy.testing import ( + assert_, assert_raises, assert_equal, assert_array_equal + ) + + +ndims = 2 +size = 10 +shape = tuple([size] * ndims) + +MAY_SHARE_BOUNDS = 0 +MAY_SHARE_EXACT = -1 + + +def _indices_for_nelems(nelems): + """Returns slices of length nelems, from start onwards, in direction sign.""" + + if nelems == 0: + return [size // 2] # int index + + res = [] + for step in (1, 2): + for sign in (-1, 1): + start = size // 2 - nelems * step * sign // 2 + stop = start + nelems * step * sign + res.append(slice(start, stop, step * sign)) + + return res + + +def _indices_for_axis(): + """Returns (src, dst) pairs of indices.""" + + res = [] + for nelems in (0, 2, 3): + ind = _indices_for_nelems(nelems) + res.extend(itertools.product(ind, ind)) # all assignments of size "nelems" + + return res + + +def _indices(ndims): + """Returns ((axis0_src, axis0_dst), (axis1_src, axis1_dst), ... ) index pairs.""" + + ind = _indices_for_axis() + return itertools.product(ind, repeat=ndims) + + +def _check_assignment(srcidx, dstidx): + """Check assignment arr[dstidx] = arr[srcidx] works.""" + + arr = np.arange(np.product(shape)).reshape(shape) + + cpy = arr.copy() + + cpy[dstidx] = arr[srcidx] + arr[dstidx] = arr[srcidx] + + assert_(np.all(arr == cpy), + 'assigning arr[%s] = arr[%s]' % (dstidx, srcidx)) + + +def test_overlapping_assignments(): + # Test automatically generated assignments which overlap in memory. + + inds = _indices(ndims) + + for ind in inds: + srcidx = tuple([a[0] for a in ind]) + dstidx = tuple([a[1] for a in ind]) + + _check_assignment(srcidx, dstidx) + + +@pytest.mark.slow +def test_diophantine_fuzz(): + # Fuzz test the diophantine solver + rng = np.random.RandomState(1234) + + max_int = np.iinfo(np.intp).max + + for ndim in range(10): + feasible_count = 0 + infeasible_count = 0 + + min_count = 500//(ndim + 1) + + while min(feasible_count, infeasible_count) < min_count: + # Ensure big and small integer problems + A_max = 1 + rng.randint(0, 11, dtype=np.intp)**6 + U_max = rng.randint(0, 11, dtype=np.intp)**6 + + A_max = min(max_int, A_max) + U_max = min(max_int-1, U_max) + + A = tuple(int(rng.randint(1, A_max+1, dtype=np.intp)) + for j in range(ndim)) + U = tuple(int(rng.randint(0, U_max+2, dtype=np.intp)) + for j in range(ndim)) + + b_ub = min(max_int-2, sum(a*ub for a, ub in zip(A, U))) + b = rng.randint(-1, b_ub+2, dtype=np.intp) + + if ndim == 0 and feasible_count < min_count: + b = 0 + + X = solve_diophantine(A, U, b) + + if X is None: + # Check the simplified decision problem agrees + X_simplified = solve_diophantine(A, U, b, simplify=1) + assert_(X_simplified is None, (A, U, b, X_simplified)) + + # Check no solution exists (provided the problem is + # small enough so that brute force checking doesn't + # take too long) + ranges = tuple(range(0, a*ub+1, a) for a, ub in zip(A, U)) + + size = 1 + for r in ranges: + size *= len(r) + if size < 100000: + assert_(not any(sum(w) == b for w in itertools.product(*ranges))) + infeasible_count += 1 + else: + # Check the simplified decision problem agrees + X_simplified = solve_diophantine(A, U, b, simplify=1) + assert_(X_simplified is not None, (A, U, b, X_simplified)) + + # Check validity + assert_(sum(a*x for a, x in zip(A, X)) == b) + assert_(all(0 <= x <= ub for x, ub in zip(X, U))) + feasible_count += 1 + + +def test_diophantine_overflow(): + # Smoke test integer overflow detection + max_intp = np.iinfo(np.intp).max + max_int64 = np.iinfo(np.int64).max + + if max_int64 <= max_intp: + # Check that the algorithm works internally in 128-bit; + # solving this problem requires large intermediate numbers + A = (max_int64//2, max_int64//2 - 10) + U = (max_int64//2, max_int64//2 - 10) + b = 2*(max_int64//2) - 10 + + assert_equal(solve_diophantine(A, U, b), (1, 1)) + + +def check_may_share_memory_exact(a, b): + got = np.may_share_memory(a, b, max_work=MAY_SHARE_EXACT) + + assert_equal(np.may_share_memory(a, b), + np.may_share_memory(a, b, max_work=MAY_SHARE_BOUNDS)) + + a.fill(0) + b.fill(0) + a.fill(1) + exact = b.any() + + err_msg = "" + if got != exact: + err_msg = " " + "\n ".join([ + "base_a - base_b = %r" % (a.__array_interface__['data'][0] - b.__array_interface__['data'][0],), + "shape_a = %r" % (a.shape,), + "shape_b = %r" % (b.shape,), + "strides_a = %r" % (a.strides,), + "strides_b = %r" % (b.strides,), + "size_a = %r" % (a.size,), + "size_b = %r" % (b.size,) + ]) + + assert_equal(got, exact, err_msg=err_msg) + + +def test_may_share_memory_manual(): + # Manual test cases for may_share_memory + + # Base arrays + xs0 = [ + np.zeros([13, 21, 23, 22], dtype=np.int8), + np.zeros([13, 21, 23*2, 22], dtype=np.int8)[:,:,::2,:] + ] + + # Generate all negative stride combinations + xs = [] + for x in xs0: + for ss in itertools.product(*(([slice(None), slice(None, None, -1)],)*4)): + xp = x[ss] + xs.append(xp) + + for x in xs: + # The default is a simple extent check + assert_(np.may_share_memory(x[:,0,:], x[:,1,:])) + assert_(np.may_share_memory(x[:,0,:], x[:,1,:], max_work=None)) + + # Exact checks + check_may_share_memory_exact(x[:,0,:], x[:,1,:]) + check_may_share_memory_exact(x[:,::7], x[:,3::3]) + + try: + xp = x.ravel() + if xp.flags.owndata: + continue + xp = xp.view(np.int16) + except ValueError: + continue + + # 0-size arrays cannot overlap + check_may_share_memory_exact(x.ravel()[6:6], + xp.reshape(13, 21, 23, 11)[:,::7]) + + # Test itemsize is dealt with + check_may_share_memory_exact(x[:,::7], + xp.reshape(13, 21, 23, 11)) + check_may_share_memory_exact(x[:,::7], + xp.reshape(13, 21, 23, 11)[:,3::3]) + check_may_share_memory_exact(x.ravel()[6:7], + xp.reshape(13, 21, 23, 11)[:,::7]) + + # Check unit size + x = np.zeros([1], dtype=np.int8) + check_may_share_memory_exact(x, x) + check_may_share_memory_exact(x, x.copy()) + + +def iter_random_view_pairs(x, same_steps=True, equal_size=False): + rng = np.random.RandomState(1234) + + if equal_size and same_steps: + raise ValueError() + + def random_slice(n, step): + start = rng.randint(0, n+1, dtype=np.intp) + stop = rng.randint(start, n+1, dtype=np.intp) + if rng.randint(0, 2, dtype=np.intp) == 0: + stop, start = start, stop + step *= -1 + return slice(start, stop, step) + + def random_slice_fixed_size(n, step, size): + start = rng.randint(0, n+1 - size*step) + stop = start + (size-1)*step + 1 + if rng.randint(0, 2) == 0: + stop, start = start-1, stop-1 + if stop < 0: + stop = None + step *= -1 + return slice(start, stop, step) + + # First a few regular views + yield x, x + for j in range(1, 7, 3): + yield x[j:], x[:-j] + yield x[...,j:], x[...,:-j] + + # An array with zero stride internal overlap + strides = list(x.strides) + strides[0] = 0 + xp = as_strided(x, shape=x.shape, strides=strides) + yield x, xp + yield xp, xp + + # An array with non-zero stride internal overlap + strides = list(x.strides) + if strides[0] > 1: + strides[0] = 1 + xp = as_strided(x, shape=x.shape, strides=strides) + yield x, xp + yield xp, xp + + # Then discontiguous views + while True: + steps = tuple(rng.randint(1, 11, dtype=np.intp) + if rng.randint(0, 5, dtype=np.intp) == 0 else 1 + for j in range(x.ndim)) + s1 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps)) + + t1 = np.arange(x.ndim) + rng.shuffle(t1) + + if equal_size: + t2 = t1 + else: + t2 = np.arange(x.ndim) + rng.shuffle(t2) + + a = x[s1] + + if equal_size: + if a.size == 0: + continue + + steps2 = tuple(rng.randint(1, max(2, p//(1+pa))) + if rng.randint(0, 5) == 0 else 1 + for p, s, pa in zip(x.shape, s1, a.shape)) + s2 = tuple(random_slice_fixed_size(p, s, pa) + for p, s, pa in zip(x.shape, steps2, a.shape)) + elif same_steps: + steps2 = steps + else: + steps2 = tuple(rng.randint(1, 11, dtype=np.intp) + if rng.randint(0, 5, dtype=np.intp) == 0 else 1 + for j in range(x.ndim)) + + if not equal_size: + s2 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps2)) + + a = a.transpose(t1) + b = x[s2].transpose(t2) + + yield a, b + + +def check_may_share_memory_easy_fuzz(get_max_work, same_steps, min_count): + # Check that overlap problems with common strides are solved with + # little work. + x = np.zeros([17,34,71,97], dtype=np.int16) + + feasible = 0 + infeasible = 0 + + pair_iter = iter_random_view_pairs(x, same_steps) + + while min(feasible, infeasible) < min_count: + a, b = next(pair_iter) + + bounds_overlap = np.may_share_memory(a, b) + may_share_answer = np.may_share_memory(a, b) + easy_answer = np.may_share_memory(a, b, max_work=get_max_work(a, b)) + exact_answer = np.may_share_memory(a, b, max_work=MAY_SHARE_EXACT) + + if easy_answer != exact_answer: + # assert_equal is slow... + assert_equal(easy_answer, exact_answer) + + if may_share_answer != bounds_overlap: + assert_equal(may_share_answer, bounds_overlap) + + if bounds_overlap: + if exact_answer: + feasible += 1 + else: + infeasible += 1 + + +@pytest.mark.slow +def test_may_share_memory_easy_fuzz(): + # Check that overlap problems with common strides are always + # solved with little work. + + check_may_share_memory_easy_fuzz(get_max_work=lambda a, b: 1, + same_steps=True, + min_count=2000) + + +@pytest.mark.slow +def test_may_share_memory_harder_fuzz(): + # Overlap problems with not necessarily common strides take more + # work. + # + # The work bound below can't be reduced much. Harder problems can + # also exist but not be detected here, as the set of problems + # comes from RNG. + + check_may_share_memory_easy_fuzz(get_max_work=lambda a, b: max(a.size, b.size)//2, + same_steps=False, + min_count=2000) + + +def test_shares_memory_api(): + x = np.zeros([4, 5, 6], dtype=np.int8) + + assert_equal(np.shares_memory(x, x), True) + assert_equal(np.shares_memory(x, x.copy()), False) + + a = x[:,::2,::3] + b = x[:,::3,::2] + assert_equal(np.shares_memory(a, b), True) + assert_equal(np.shares_memory(a, b, max_work=None), True) + assert_raises(np.TooHardError, np.shares_memory, a, b, max_work=1) + + +def test_may_share_memory_bad_max_work(): + x = np.zeros([1]) + assert_raises(OverflowError, np.may_share_memory, x, x, max_work=10**100) + assert_raises(OverflowError, np.shares_memory, x, x, max_work=10**100) + + +def test_internal_overlap_diophantine(): + def check(A, U, exists=None): + X = solve_diophantine(A, U, 0, require_ub_nontrivial=1) + + if exists is None: + exists = (X is not None) + + if X is not None: + assert_(sum(a*x for a, x in zip(A, X)) == sum(a*u//2 for a, u in zip(A, U))) + assert_(all(0 <= x <= u for x, u in zip(X, U))) + assert_(any(x != u//2 for x, u in zip(X, U))) + + if exists: + assert_(X is not None, repr(X)) + else: + assert_(X is None, repr(X)) + + # Smoke tests + check((3, 2), (2*2, 3*2), exists=True) + check((3*2, 2), (15*2, (3-1)*2), exists=False) + + +def test_internal_overlap_slices(): + # Slicing an array never generates internal overlap + + x = np.zeros([17,34,71,97], dtype=np.int16) + + rng = np.random.RandomState(1234) + + def random_slice(n, step): + start = rng.randint(0, n+1, dtype=np.intp) + stop = rng.randint(start, n+1, dtype=np.intp) + if rng.randint(0, 2, dtype=np.intp) == 0: + stop, start = start, stop + step *= -1 + return slice(start, stop, step) + + cases = 0 + min_count = 5000 + + while cases < min_count: + steps = tuple(rng.randint(1, 11, dtype=np.intp) + if rng.randint(0, 5, dtype=np.intp) == 0 else 1 + for j in range(x.ndim)) + t1 = np.arange(x.ndim) + rng.shuffle(t1) + s1 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps)) + a = x[s1].transpose(t1) + + assert_(not internal_overlap(a)) + cases += 1 + + +def check_internal_overlap(a, manual_expected=None): + got = internal_overlap(a) + + # Brute-force check + m = set() + ranges = tuple(range(n) for n in a.shape) + for v in itertools.product(*ranges): + offset = sum(s*w for s, w in zip(a.strides, v)) + if offset in m: + expected = True + break + else: + m.add(offset) + else: + expected = False + + # Compare + if got != expected: + assert_equal(got, expected, err_msg=repr((a.strides, a.shape))) + if manual_expected is not None and expected != manual_expected: + assert_equal(expected, manual_expected) + return got + + +def test_internal_overlap_manual(): + # Stride tricks can construct arrays with internal overlap + + # We don't care about memory bounds, the array is not + # read/write accessed + x = np.arange(1).astype(np.int8) + + # Check low-dimensional special cases + + check_internal_overlap(x, False) # 1-dim + check_internal_overlap(x.reshape([]), False) # 0-dim + + a = as_strided(x, strides=(3, 4), shape=(4, 4)) + check_internal_overlap(a, False) + + a = as_strided(x, strides=(3, 4), shape=(5, 4)) + check_internal_overlap(a, True) + + a = as_strided(x, strides=(0,), shape=(0,)) + check_internal_overlap(a, False) + + a = as_strided(x, strides=(0,), shape=(1,)) + check_internal_overlap(a, False) + + a = as_strided(x, strides=(0,), shape=(2,)) + check_internal_overlap(a, True) + + a = as_strided(x, strides=(0, -9993), shape=(87, 22)) + check_internal_overlap(a, True) + + a = as_strided(x, strides=(0, -9993), shape=(1, 22)) + check_internal_overlap(a, False) + + a = as_strided(x, strides=(0, -9993), shape=(0, 22)) + check_internal_overlap(a, False) + + +def test_internal_overlap_fuzz(): + # Fuzz check; the brute-force check is fairly slow + + x = np.arange(1).astype(np.int8) + + overlap = 0 + no_overlap = 0 + min_count = 100 + + rng = np.random.RandomState(1234) + + while min(overlap, no_overlap) < min_count: + ndim = rng.randint(1, 4, dtype=np.intp) + + strides = tuple(rng.randint(-1000, 1000, dtype=np.intp) + for j in range(ndim)) + shape = tuple(rng.randint(1, 30, dtype=np.intp) + for j in range(ndim)) + + a = as_strided(x, strides=strides, shape=shape) + result = check_internal_overlap(a) + + if result: + overlap += 1 + else: + no_overlap += 1 + + +def test_non_ndarray_inputs(): + # Regression check for gh-5604 + + class MyArray: + def __init__(self, data): + self.data = data + + @property + def __array_interface__(self): + return self.data.__array_interface__ + + class MyArray2: + def __init__(self, data): + self.data = data + + def __array__(self): + return self.data + + for cls in [MyArray, MyArray2]: + x = np.arange(5) + + assert_(np.may_share_memory(cls(x[::2]), x[1::2])) + assert_(not np.shares_memory(cls(x[::2]), x[1::2])) + + assert_(np.shares_memory(cls(x[1::3]), x[::2])) + assert_(np.may_share_memory(cls(x[1::3]), x[::2])) + + +def view_element_first_byte(x): + """Construct an array viewing the first byte of each element of `x`""" + from numpy.lib.stride_tricks import DummyArray + interface = dict(x.__array_interface__) + interface['typestr'] = '|b1' + interface['descr'] = [('', '|b1')] + return np.asarray(DummyArray(interface, x)) + + +def assert_copy_equivalent(operation, args, out, **kwargs): + """ + Check that operation(*args, out=out) produces results + equivalent to out[...] = operation(*args, out=out.copy()) + """ + + kwargs['out'] = out + kwargs2 = dict(kwargs) + kwargs2['out'] = out.copy() + + out_orig = out.copy() + out[...] = operation(*args, **kwargs2) + expected = out.copy() + out[...] = out_orig + + got = operation(*args, **kwargs).copy() + + if (got != expected).any(): + assert_equal(got, expected) + + +class TestUFunc: + """ + Test ufunc call memory overlap handling + """ + + def check_unary_fuzz(self, operation, get_out_axis_size, dtype=np.int16, + count=5000): + shapes = [7, 13, 8, 21, 29, 32] + + rng = np.random.RandomState(1234) + + for ndim in range(1, 6): + x = rng.randint(0, 2**16, size=shapes[:ndim]).astype(dtype) + + it = iter_random_view_pairs(x, same_steps=False, equal_size=True) + + min_count = count // (ndim + 1)**2 + + overlapping = 0 + while overlapping < min_count: + a, b = next(it) + + a_orig = a.copy() + b_orig = b.copy() + + if get_out_axis_size is None: + assert_copy_equivalent(operation, [a], out=b) + + if np.shares_memory(a, b): + overlapping += 1 + else: + for axis in itertools.chain(range(ndim), [None]): + a[...] = a_orig + b[...] = b_orig + + # Determine size for reduction axis (None if scalar) + outsize, scalarize = get_out_axis_size(a, b, axis) + if outsize == 'skip': + continue + + # Slice b to get an output array of the correct size + sl = [slice(None)] * ndim + if axis is None: + if outsize is None: + sl = [slice(0, 1)] + [0]*(ndim - 1) + else: + sl = [slice(0, outsize)] + [0]*(ndim - 1) + else: + if outsize is None: + k = b.shape[axis]//2 + if ndim == 1: + sl[axis] = slice(k, k + 1) + else: + sl[axis] = k + else: + assert b.shape[axis] >= outsize + sl[axis] = slice(0, outsize) + b_out = b[tuple(sl)] + + if scalarize: + b_out = b_out.reshape([]) + + if np.shares_memory(a, b_out): + overlapping += 1 + + # Check result + assert_copy_equivalent(operation, [a], out=b_out, axis=axis) + + @pytest.mark.slow + def test_unary_ufunc_call_fuzz(self): + self.check_unary_fuzz(np.invert, None, np.int16) + + @pytest.mark.slow + def test_unary_ufunc_call_complex_fuzz(self): + # Complex typically has a smaller alignment than itemsize + self.check_unary_fuzz(np.negative, None, np.complex128, count=500) + + def test_binary_ufunc_accumulate_fuzz(self): + def get_out_axis_size(a, b, axis): + if axis is None: + if a.ndim == 1: + return a.size, False + else: + return 'skip', False # accumulate doesn't support this + else: + return a.shape[axis], False + + self.check_unary_fuzz(np.add.accumulate, get_out_axis_size, + dtype=np.int16, count=500) + + def test_binary_ufunc_reduce_fuzz(self): + def get_out_axis_size(a, b, axis): + return None, (axis is None or a.ndim == 1) + + self.check_unary_fuzz(np.add.reduce, get_out_axis_size, + dtype=np.int16, count=500) + + def test_binary_ufunc_reduceat_fuzz(self): + def get_out_axis_size(a, b, axis): + if axis is None: + if a.ndim == 1: + return a.size, False + else: + return 'skip', False # reduceat doesn't support this + else: + return a.shape[axis], False + + def do_reduceat(a, out, axis): + if axis is None: + size = len(a) + step = size//len(out) + else: + size = a.shape[axis] + step = a.shape[axis] // out.shape[axis] + idx = np.arange(0, size, step) + return np.add.reduceat(a, idx, out=out, axis=axis) + + self.check_unary_fuzz(do_reduceat, get_out_axis_size, + dtype=np.int16, count=500) + + def test_binary_ufunc_reduceat_manual(self): + def check(ufunc, a, ind, out): + c1 = ufunc.reduceat(a.copy(), ind.copy(), out=out.copy()) + c2 = ufunc.reduceat(a, ind, out=out) + assert_array_equal(c1, c2) + + # Exactly same input/output arrays + a = np.arange(10000, dtype=np.int16) + check(np.add, a, a[::-1].copy(), a) + + # Overlap with index + a = np.arange(10000, dtype=np.int16) + check(np.add, a, a[::-1], a) + + @pytest.mark.slow + def test_unary_gufunc_fuzz(self): + shapes = [7, 13, 8, 21, 29, 32] + gufunc = _umath_tests.euclidean_pdist + + rng = np.random.RandomState(1234) + + for ndim in range(2, 6): + x = rng.rand(*shapes[:ndim]) + + it = iter_random_view_pairs(x, same_steps=False, equal_size=True) + + min_count = 500 // (ndim + 1)**2 + + overlapping = 0 + while overlapping < min_count: + a, b = next(it) + + if min(a.shape[-2:]) < 2 or min(b.shape[-2:]) < 2 or a.shape[-1] < 2: + continue + + # Ensure the shapes are so that euclidean_pdist is happy + if b.shape[-1] > b.shape[-2]: + b = b[...,0,:] + else: + b = b[...,:,0] + + n = a.shape[-2] + p = n * (n - 1) // 2 + if p <= b.shape[-1] and p > 0: + b = b[...,:p] + else: + n = max(2, int(np.sqrt(b.shape[-1]))//2) + p = n * (n - 1) // 2 + a = a[...,:n,:] + b = b[...,:p] + + # Call + if np.shares_memory(a, b): + overlapping += 1 + + with np.errstate(over='ignore', invalid='ignore'): + assert_copy_equivalent(gufunc, [a], out=b) + + def test_ufunc_at_manual(self): + def check(ufunc, a, ind, b=None): + a0 = a.copy() + if b is None: + ufunc.at(a0, ind.copy()) + c1 = a0.copy() + ufunc.at(a, ind) + c2 = a.copy() + else: + ufunc.at(a0, ind.copy(), b.copy()) + c1 = a0.copy() + ufunc.at(a, ind, b) + c2 = a.copy() + assert_array_equal(c1, c2) + + # Overlap with index + a = np.arange(10000, dtype=np.int16) + check(np.invert, a[::-1], a) + + # Overlap with second data array + a = np.arange(100, dtype=np.int16) + ind = np.arange(0, 100, 2, dtype=np.int16) + check(np.add, a, ind, a[25:75]) + + def test_unary_ufunc_1d_manual(self): + # Exercise ufunc fast-paths (that avoid creation of an `np.nditer`) + + def check(a, b): + a_orig = a.copy() + b_orig = b.copy() + + b0 = b.copy() + c1 = ufunc(a, out=b0) + c2 = ufunc(a, out=b) + assert_array_equal(c1, c2) + + # Trigger "fancy ufunc loop" code path + mask = view_element_first_byte(b).view(np.bool_) + + a[...] = a_orig + b[...] = b_orig + c1 = ufunc(a, out=b.copy(), where=mask.copy()).copy() + + a[...] = a_orig + b[...] = b_orig + c2 = ufunc(a, out=b, where=mask.copy()).copy() + + # Also, mask overlapping with output + a[...] = a_orig + b[...] = b_orig + c3 = ufunc(a, out=b, where=mask).copy() + + assert_array_equal(c1, c2) + assert_array_equal(c1, c3) + + dtypes = [np.int8, np.int16, np.int32, np.int64, np.float32, + np.float64, np.complex64, np.complex128] + dtypes = [np.dtype(x) for x in dtypes] + + for dtype in dtypes: + if np.issubdtype(dtype, np.integer): + ufunc = np.invert + else: + ufunc = np.reciprocal + + n = 1000 + k = 10 + indices = [ + np.index_exp[:n], + np.index_exp[k:k+n], + np.index_exp[n-1::-1], + np.index_exp[k+n-1:k-1:-1], + np.index_exp[:2*n:2], + np.index_exp[k:k+2*n:2], + np.index_exp[2*n-1::-2], + np.index_exp[k+2*n-1:k-1:-2], + ] + + for xi, yi in itertools.product(indices, indices): + v = np.arange(1, 1 + n*2 + k, dtype=dtype) + x = v[xi] + y = v[yi] + + with np.errstate(all='ignore'): + check(x, y) + + # Scalar cases + check(x[:1], y) + check(x[-1:], y) + check(x[:1].reshape([]), y) + check(x[-1:].reshape([]), y) + + def test_unary_ufunc_where_same(self): + # Check behavior at wheremask overlap + ufunc = np.invert + + def check(a, out, mask): + c1 = ufunc(a, out=out.copy(), where=mask.copy()) + c2 = ufunc(a, out=out, where=mask) + assert_array_equal(c1, c2) + + # Check behavior with same input and output arrays + x = np.arange(100).astype(np.bool_) + check(x, x, x) + check(x, x.copy(), x) + check(x, x, x.copy()) + + @pytest.mark.slow + def test_binary_ufunc_1d_manual(self): + ufunc = np.add + + def check(a, b, c): + c0 = c.copy() + c1 = ufunc(a, b, out=c0) + c2 = ufunc(a, b, out=c) + assert_array_equal(c1, c2) + + for dtype in [np.int8, np.int16, np.int32, np.int64, + np.float32, np.float64, np.complex64, np.complex128]: + # Check different data dependency orders + + n = 1000 + k = 10 + + indices = [] + for p in [1, 2]: + indices.extend([ + np.index_exp[:p*n:p], + np.index_exp[k:k+p*n:p], + np.index_exp[p*n-1::-p], + np.index_exp[k+p*n-1:k-1:-p], + ]) + + for x, y, z in itertools.product(indices, indices, indices): + v = np.arange(6*n).astype(dtype) + x = v[x] + y = v[y] + z = v[z] + + check(x, y, z) + + # Scalar cases + check(x[:1], y, z) + check(x[-1:], y, z) + check(x[:1].reshape([]), y, z) + check(x[-1:].reshape([]), y, z) + check(x, y[:1], z) + check(x, y[-1:], z) + check(x, y[:1].reshape([]), z) + check(x, y[-1:].reshape([]), z) + + def test_inplace_op_simple_manual(self): + rng = np.random.RandomState(1234) + x = rng.rand(200, 200) # bigger than bufsize + + x += x.T + assert_array_equal(x - x.T, 0) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_mem_policy.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_mem_policy.py new file mode 100644 index 0000000..3dae36d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_mem_policy.py @@ -0,0 +1,423 @@ +import asyncio +import gc +import os +import pytest +import numpy as np +import threading +import warnings +from numpy.testing import extbuild, assert_warns +import sys + + +@pytest.fixture +def get_module(tmp_path): + """ Add a memory policy that returns a false pointer 64 bytes into the + actual allocation, and fill the prefix with some text. Then check at each + memory manipulation that the prefix exists, to make sure all alloc/realloc/ + free/calloc go via the functions here. + """ + if sys.platform.startswith('cygwin'): + pytest.skip('link fails on cygwin') + functions = [ + ("get_default_policy", "METH_NOARGS", """ + Py_INCREF(PyDataMem_DefaultHandler); + return PyDataMem_DefaultHandler; + """), + ("set_secret_data_policy", "METH_NOARGS", """ + PyObject *secret_data = + PyCapsule_New(&secret_data_handler, "mem_handler", NULL); + if (secret_data == NULL) { + return NULL; + } + PyObject *old = PyDataMem_SetHandler(secret_data); + Py_DECREF(secret_data); + return old; + """), + ("set_old_policy", "METH_O", """ + PyObject *old; + if (args != NULL && PyCapsule_CheckExact(args)) { + old = PyDataMem_SetHandler(args); + } + else { + old = PyDataMem_SetHandler(NULL); + } + return old; + """), + ("get_array", "METH_NOARGS", """ + char *buf = (char *)malloc(20); + npy_intp dims[1]; + dims[0] = 20; + PyArray_Descr *descr = PyArray_DescrNewFromType(NPY_UINT8); + return PyArray_NewFromDescr(&PyArray_Type, descr, 1, dims, NULL, + buf, NPY_ARRAY_WRITEABLE, NULL); + """), + ("set_own", "METH_O", """ + if (!PyArray_Check(args)) { + PyErr_SetString(PyExc_ValueError, + "need an ndarray"); + return NULL; + } + PyArray_ENABLEFLAGS((PyArrayObject*)args, NPY_ARRAY_OWNDATA); + // Maybe try this too? + // PyArray_BASE(PyArrayObject *)args) = NULL; + Py_RETURN_NONE; + """), + ("get_array_with_base", "METH_NOARGS", """ + char *buf = (char *)malloc(20); + npy_intp dims[1]; + dims[0] = 20; + PyArray_Descr *descr = PyArray_DescrNewFromType(NPY_UINT8); + PyObject *arr = PyArray_NewFromDescr(&PyArray_Type, descr, 1, dims, + NULL, buf, + NPY_ARRAY_WRITEABLE, NULL); + if (arr == NULL) return NULL; + PyObject *obj = PyCapsule_New(buf, "buf capsule", + (PyCapsule_Destructor)&warn_on_free); + if (obj == NULL) { + Py_DECREF(arr); + return NULL; + } + if (PyArray_SetBaseObject((PyArrayObject *)arr, obj) < 0) { + Py_DECREF(arr); + Py_DECREF(obj); + return NULL; + } + return arr; + + """), + ] + prologue = ''' + #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + #include + /* + * This struct allows the dynamic configuration of the allocator funcs + * of the `secret_data_allocator`. It is provided here for + * demonstration purposes, as a valid `ctx` use-case scenario. + */ + typedef struct { + void *(*malloc)(size_t); + void *(*calloc)(size_t, size_t); + void *(*realloc)(void *, size_t); + void (*free)(void *); + } SecretDataAllocatorFuncs; + + NPY_NO_EXPORT void * + shift_alloc(void *ctx, size_t sz) { + SecretDataAllocatorFuncs *funcs = (SecretDataAllocatorFuncs *)ctx; + char *real = (char *)funcs->malloc(sz + 64); + if (real == NULL) { + return NULL; + } + snprintf(real, 64, "originally allocated %ld", (unsigned long)sz); + return (void *)(real + 64); + } + NPY_NO_EXPORT void * + shift_zero(void *ctx, size_t sz, size_t cnt) { + SecretDataAllocatorFuncs *funcs = (SecretDataAllocatorFuncs *)ctx; + char *real = (char *)funcs->calloc(sz + 64, cnt); + if (real == NULL) { + return NULL; + } + snprintf(real, 64, "originally allocated %ld via zero", + (unsigned long)sz); + return (void *)(real + 64); + } + NPY_NO_EXPORT void + shift_free(void *ctx, void * p, npy_uintp sz) { + SecretDataAllocatorFuncs *funcs = (SecretDataAllocatorFuncs *)ctx; + if (p == NULL) { + return ; + } + char *real = (char *)p - 64; + if (strncmp(real, "originally allocated", 20) != 0) { + fprintf(stdout, "uh-oh, unmatched shift_free, " + "no appropriate prefix\\n"); + /* Make C runtime crash by calling free on the wrong address */ + funcs->free((char *)p + 10); + /* funcs->free(real); */ + } + else { + npy_uintp i = (npy_uintp)atoi(real +20); + if (i != sz) { + fprintf(stderr, "uh-oh, unmatched shift_free" + "(ptr, %ld) but allocated %ld\\n", sz, i); + /* This happens in some places, only print */ + funcs->free(real); + } + else { + funcs->free(real); + } + } + } + NPY_NO_EXPORT void * + shift_realloc(void *ctx, void * p, npy_uintp sz) { + SecretDataAllocatorFuncs *funcs = (SecretDataAllocatorFuncs *)ctx; + if (p != NULL) { + char *real = (char *)p - 64; + if (strncmp(real, "originally allocated", 20) != 0) { + fprintf(stdout, "uh-oh, unmatched shift_realloc\\n"); + return realloc(p, sz); + } + return (void *)((char *)funcs->realloc(real, sz + 64) + 64); + } + else { + char *real = (char *)funcs->realloc(p, sz + 64); + if (real == NULL) { + return NULL; + } + snprintf(real, 64, "originally allocated " + "%ld via realloc", (unsigned long)sz); + return (void *)(real + 64); + } + } + /* As an example, we use the standard {m|c|re}alloc/free funcs. */ + static SecretDataAllocatorFuncs secret_data_handler_ctx = { + malloc, + calloc, + realloc, + free + }; + static PyDataMem_Handler secret_data_handler = { + "secret_data_allocator", + 1, + { + &secret_data_handler_ctx, /* ctx */ + shift_alloc, /* malloc */ + shift_zero, /* calloc */ + shift_realloc, /* realloc */ + shift_free /* free */ + } + }; + void warn_on_free(void *capsule) { + PyErr_WarnEx(PyExc_UserWarning, "in warn_on_free", 1); + void * obj = PyCapsule_GetPointer(capsule, + PyCapsule_GetName(capsule)); + free(obj); + }; + ''' + more_init = "import_array();" + try: + import mem_policy + return mem_policy + except ImportError: + pass + # if it does not exist, build and load it + return extbuild.build_and_import_extension('mem_policy', + functions, + prologue=prologue, + include_dirs=[np.get_include()], + build_dir=tmp_path, + more_init=more_init) + + +def test_set_policy(get_module): + + get_handler_name = np.core.multiarray.get_handler_name + get_handler_version = np.core.multiarray.get_handler_version + orig_policy_name = get_handler_name() + + a = np.arange(10).reshape((2, 5)) # a doesn't own its own data + assert get_handler_name(a) is None + assert get_handler_version(a) is None + assert get_handler_name(a.base) == orig_policy_name + assert get_handler_version(a.base) == 1 + + orig_policy = get_module.set_secret_data_policy() + + b = np.arange(10).reshape((2, 5)) # b doesn't own its own data + assert get_handler_name(b) is None + assert get_handler_version(b) is None + assert get_handler_name(b.base) == 'secret_data_allocator' + assert get_handler_version(b.base) == 1 + + if orig_policy_name == 'default_allocator': + get_module.set_old_policy(None) # tests PyDataMem_SetHandler(NULL) + assert get_handler_name() == 'default_allocator' + else: + get_module.set_old_policy(orig_policy) + assert get_handler_name() == orig_policy_name + + +def test_default_policy_singleton(get_module): + get_handler_name = np.core.multiarray.get_handler_name + + # set the policy to default + orig_policy = get_module.set_old_policy(None) + + assert get_handler_name() == 'default_allocator' + + # re-set the policy to default + def_policy_1 = get_module.set_old_policy(None) + + assert get_handler_name() == 'default_allocator' + + # set the policy to original + def_policy_2 = get_module.set_old_policy(orig_policy) + + # since default policy is a singleton, + # these should be the same object + assert def_policy_1 is def_policy_2 is get_module.get_default_policy() + + +def test_policy_propagation(get_module): + # The memory policy goes hand-in-hand with flags.owndata + + class MyArr(np.ndarray): + pass + + get_handler_name = np.core.multiarray.get_handler_name + orig_policy_name = get_handler_name() + a = np.arange(10).view(MyArr).reshape((2, 5)) + assert get_handler_name(a) is None + assert a.flags.owndata is False + + assert get_handler_name(a.base) is None + assert a.base.flags.owndata is False + + assert get_handler_name(a.base.base) == orig_policy_name + assert a.base.base.flags.owndata is True + + +async def concurrent_context1(get_module, orig_policy_name, event): + if orig_policy_name == 'default_allocator': + get_module.set_secret_data_policy() + assert np.core.multiarray.get_handler_name() == 'secret_data_allocator' + else: + get_module.set_old_policy(None) + assert np.core.multiarray.get_handler_name() == 'default_allocator' + event.set() + + +async def concurrent_context2(get_module, orig_policy_name, event): + await event.wait() + # the policy is not affected by changes in parallel contexts + assert np.core.multiarray.get_handler_name() == orig_policy_name + # change policy in the child context + if orig_policy_name == 'default_allocator': + get_module.set_secret_data_policy() + assert np.core.multiarray.get_handler_name() == 'secret_data_allocator' + else: + get_module.set_old_policy(None) + assert np.core.multiarray.get_handler_name() == 'default_allocator' + + +async def async_test_context_locality(get_module): + orig_policy_name = np.core.multiarray.get_handler_name() + + event = asyncio.Event() + # the child contexts inherit the parent policy + concurrent_task1 = asyncio.create_task( + concurrent_context1(get_module, orig_policy_name, event)) + concurrent_task2 = asyncio.create_task( + concurrent_context2(get_module, orig_policy_name, event)) + await concurrent_task1 + await concurrent_task2 + + # the parent context is not affected by child policy changes + assert np.core.multiarray.get_handler_name() == orig_policy_name + + +def test_context_locality(get_module): + if (sys.implementation.name == 'pypy' + and sys.pypy_version_info[:3] < (7, 3, 6)): + pytest.skip('no context-locality support in PyPy < 7.3.6') + asyncio.run(async_test_context_locality(get_module)) + + +def concurrent_thread1(get_module, event): + get_module.set_secret_data_policy() + assert np.core.multiarray.get_handler_name() == 'secret_data_allocator' + event.set() + + +def concurrent_thread2(get_module, event): + event.wait() + # the policy is not affected by changes in parallel threads + assert np.core.multiarray.get_handler_name() == 'default_allocator' + # change policy in the child thread + get_module.set_secret_data_policy() + + +def test_thread_locality(get_module): + orig_policy_name = np.core.multiarray.get_handler_name() + + event = threading.Event() + # the child threads do not inherit the parent policy + concurrent_task1 = threading.Thread(target=concurrent_thread1, + args=(get_module, event)) + concurrent_task2 = threading.Thread(target=concurrent_thread2, + args=(get_module, event)) + concurrent_task1.start() + concurrent_task2.start() + concurrent_task1.join() + concurrent_task2.join() + + # the parent thread is not affected by child policy changes + assert np.core.multiarray.get_handler_name() == orig_policy_name + + +@pytest.mark.slow +def test_new_policy(get_module): + a = np.arange(10) + orig_policy_name = np.core.multiarray.get_handler_name(a) + + orig_policy = get_module.set_secret_data_policy() + + b = np.arange(10) + assert np.core.multiarray.get_handler_name(b) == 'secret_data_allocator' + + # test array manipulation. This is slow + if orig_policy_name == 'default_allocator': + # when the np.core.test tests recurse into this test, the + # policy will be set so this "if" will be false, preventing + # infinite recursion + # + # if needed, debug this by + # - running tests with -- -s (to not capture stdout/stderr + # - setting extra_argv=['-vv'] here + assert np.core.test('full', verbose=2, extra_argv=['-vv']) + # also try the ma tests, the pickling test is quite tricky + assert np.ma.test('full', verbose=2, extra_argv=['-vv']) + + get_module.set_old_policy(orig_policy) + + c = np.arange(10) + assert np.core.multiarray.get_handler_name(c) == orig_policy_name + +@pytest.mark.xfail(sys.implementation.name == "pypy", + reason=("bad interaction between getenv and " + "os.environ inside pytest")) +@pytest.mark.parametrize("policy", ["0", "1", None]) +def test_switch_owner(get_module, policy): + a = get_module.get_array() + assert np.core.multiarray.get_handler_name(a) is None + get_module.set_own(a) + oldval = os.environ.get('NUMPY_WARN_IF_NO_MEM_POLICY', None) + if policy is None: + if 'NUMPY_WARN_IF_NO_MEM_POLICY' in os.environ: + os.environ.pop('NUMPY_WARN_IF_NO_MEM_POLICY') + else: + os.environ['NUMPY_WARN_IF_NO_MEM_POLICY'] = policy + try: + # The policy should be NULL, so we have to assume we can call + # "free". A warning is given if the policy == "1" + if policy == "1": + with assert_warns(RuntimeWarning) as w: + del a + gc.collect() + else: + del a + gc.collect() + + finally: + if oldval is None: + if 'NUMPY_WARN_IF_NO_MEM_POLICY' in os.environ: + os.environ.pop('NUMPY_WARN_IF_NO_MEM_POLICY') + else: + os.environ['NUMPY_WARN_IF_NO_MEM_POLICY'] = oldval + +def test_owner_is_base(get_module): + a = get_module.get_array_with_base() + with pytest.warns(UserWarning, match='warn_on_free'): + del a + gc.collect() diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_memmap.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_memmap.py new file mode 100644 index 0000000..e4f0a6b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_memmap.py @@ -0,0 +1,215 @@ +import sys +import os +import mmap +import pytest +from pathlib import Path +from tempfile import NamedTemporaryFile, TemporaryFile + +from numpy import ( + memmap, sum, average, product, ndarray, isscalar, add, subtract, multiply) + +from numpy import arange, allclose, asarray +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, suppress_warnings, IS_PYPY, + break_cycles + ) + +class TestMemmap: + def setup(self): + self.tmpfp = NamedTemporaryFile(prefix='mmap') + self.shape = (3, 4) + self.dtype = 'float32' + self.data = arange(12, dtype=self.dtype) + self.data.resize(self.shape) + + def teardown(self): + self.tmpfp.close() + self.data = None + if IS_PYPY: + break_cycles() + break_cycles() + + def test_roundtrip(self): + # Write data to file + fp = memmap(self.tmpfp, dtype=self.dtype, mode='w+', + shape=self.shape) + fp[:] = self.data[:] + del fp # Test __del__ machinery, which handles cleanup + + # Read data back from file + newfp = memmap(self.tmpfp, dtype=self.dtype, mode='r', + shape=self.shape) + assert_(allclose(self.data, newfp)) + assert_array_equal(self.data, newfp) + assert_equal(newfp.flags.writeable, False) + + def test_open_with_filename(self, tmp_path): + tmpname = tmp_path / 'mmap' + fp = memmap(tmpname, dtype=self.dtype, mode='w+', + shape=self.shape) + fp[:] = self.data[:] + del fp + + def test_unnamed_file(self): + with TemporaryFile() as f: + fp = memmap(f, dtype=self.dtype, shape=self.shape) + del fp + + def test_attributes(self): + offset = 1 + mode = "w+" + fp = memmap(self.tmpfp, dtype=self.dtype, mode=mode, + shape=self.shape, offset=offset) + assert_equal(offset, fp.offset) + assert_equal(mode, fp.mode) + del fp + + def test_filename(self, tmp_path): + tmpname = tmp_path / "mmap" + fp = memmap(tmpname, dtype=self.dtype, mode='w+', + shape=self.shape) + abspath = Path(os.path.abspath(tmpname)) + fp[:] = self.data[:] + assert_equal(abspath, fp.filename) + b = fp[:1] + assert_equal(abspath, b.filename) + del b + del fp + + def test_path(self, tmp_path): + tmpname = tmp_path / "mmap" + fp = memmap(Path(tmpname), dtype=self.dtype, mode='w+', + shape=self.shape) + # os.path.realpath does not resolve symlinks on Windows + # see: https://bugs.python.org/issue9949 + # use Path.resolve, just as memmap class does internally + abspath = str(Path(tmpname).resolve()) + fp[:] = self.data[:] + assert_equal(abspath, str(fp.filename.resolve())) + b = fp[:1] + assert_equal(abspath, str(b.filename.resolve())) + del b + del fp + + def test_filename_fileobj(self): + fp = memmap(self.tmpfp, dtype=self.dtype, mode="w+", + shape=self.shape) + assert_equal(fp.filename, self.tmpfp.name) + + @pytest.mark.skipif(sys.platform == 'gnu0', + reason="Known to fail on hurd") + def test_flush(self): + fp = memmap(self.tmpfp, dtype=self.dtype, mode='w+', + shape=self.shape) + fp[:] = self.data[:] + assert_equal(fp[0], self.data[0]) + fp.flush() + + def test_del(self): + # Make sure a view does not delete the underlying mmap + fp_base = memmap(self.tmpfp, dtype=self.dtype, mode='w+', + shape=self.shape) + fp_base[0] = 5 + fp_view = fp_base[0:1] + assert_equal(fp_view[0], 5) + del fp_view + # Should still be able to access and assign values after + # deleting the view + assert_equal(fp_base[0], 5) + fp_base[0] = 6 + assert_equal(fp_base[0], 6) + + def test_arithmetic_drops_references(self): + fp = memmap(self.tmpfp, dtype=self.dtype, mode='w+', + shape=self.shape) + tmp = (fp + 10) + if isinstance(tmp, memmap): + assert_(tmp._mmap is not fp._mmap) + + def test_indexing_drops_references(self): + fp = memmap(self.tmpfp, dtype=self.dtype, mode='w+', + shape=self.shape) + tmp = fp[(1, 2), (2, 3)] + if isinstance(tmp, memmap): + assert_(tmp._mmap is not fp._mmap) + + def test_slicing_keeps_references(self): + fp = memmap(self.tmpfp, dtype=self.dtype, mode='w+', + shape=self.shape) + assert_(fp[:2, :2]._mmap is fp._mmap) + + def test_view(self): + fp = memmap(self.tmpfp, dtype=self.dtype, shape=self.shape) + new1 = fp.view() + new2 = new1.view() + assert_(new1.base is fp) + assert_(new2.base is fp) + new_array = asarray(fp) + assert_(new_array.base is fp) + + def test_ufunc_return_ndarray(self): + fp = memmap(self.tmpfp, dtype=self.dtype, shape=self.shape) + fp[:] = self.data + + with suppress_warnings() as sup: + sup.filter(FutureWarning, "np.average currently does not preserve") + for unary_op in [sum, average, product]: + result = unary_op(fp) + assert_(isscalar(result)) + assert_(result.__class__ is self.data[0, 0].__class__) + + assert_(unary_op(fp, axis=0).__class__ is ndarray) + assert_(unary_op(fp, axis=1).__class__ is ndarray) + + for binary_op in [add, subtract, multiply]: + assert_(binary_op(fp, self.data).__class__ is ndarray) + assert_(binary_op(self.data, fp).__class__ is ndarray) + assert_(binary_op(fp, fp).__class__ is ndarray) + + fp += 1 + assert(fp.__class__ is memmap) + add(fp, 1, out=fp) + assert(fp.__class__ is memmap) + + def test_getitem(self): + fp = memmap(self.tmpfp, dtype=self.dtype, shape=self.shape) + fp[:] = self.data + + assert_(fp[1:, :-1].__class__ is memmap) + # Fancy indexing returns a copy that is not memmapped + assert_(fp[[0, 1]].__class__ is ndarray) + + def test_memmap_subclass(self): + class MemmapSubClass(memmap): + pass + + fp = MemmapSubClass(self.tmpfp, dtype=self.dtype, shape=self.shape) + fp[:] = self.data + + # We keep previous behavior for subclasses of memmap, i.e. the + # ufunc and __getitem__ output is never turned into a ndarray + assert_(sum(fp, axis=0).__class__ is MemmapSubClass) + assert_(sum(fp).__class__ is MemmapSubClass) + assert_(fp[1:, :-1].__class__ is MemmapSubClass) + assert(fp[[0, 1]].__class__ is MemmapSubClass) + + def test_mmap_offset_greater_than_allocation_granularity(self): + size = 5 * mmap.ALLOCATIONGRANULARITY + offset = mmap.ALLOCATIONGRANULARITY + 1 + fp = memmap(self.tmpfp, shape=size, mode='w+', offset=offset) + assert_(fp.offset == offset) + + def test_no_shape(self): + self.tmpfp.write(b'a'*16) + mm = memmap(self.tmpfp, dtype='float64') + assert_equal(mm.shape, (2,)) + + def test_empty_array(self): + # gh-12653 + with pytest.raises(ValueError, match='empty file'): + memmap(self.tmpfp, shape=(0,4), mode='w+') + + self.tmpfp.write(b'\0') + + # ok now the file is not empty + memmap(self.tmpfp, shape=(0,4), mode='w+') diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_multiarray.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_multiarray.py new file mode 100644 index 0000000..b8fd1f3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_multiarray.py @@ -0,0 +1,9230 @@ +import collections.abc +import tempfile +import sys +import warnings +import operator +import io +import itertools +import functools +import ctypes +import os +import gc +import weakref +import pytest +from contextlib import contextmanager + +from numpy.compat import pickle + +import pathlib +import builtins +from decimal import Decimal + +import numpy as np +import numpy.core._multiarray_tests as _multiarray_tests +from numpy.core._rational_tests import rational +from numpy.testing import ( + assert_, assert_raises, assert_warns, assert_equal, assert_almost_equal, + assert_array_equal, assert_raises_regex, assert_array_almost_equal, + assert_allclose, IS_PYPY, IS_PYSTON, HAS_REFCOUNT, assert_array_less, + runstring, temppath, suppress_warnings, break_cycles, + ) +from numpy.testing._private.utils import _no_tracing +from numpy.core.tests._locales import CommaDecimalPointLocale + +# Need to test an object that does not fully implement math interface +from datetime import timedelta, datetime + + +def _aligned_zeros(shape, dtype=float, order="C", align=None): + """ + Allocate a new ndarray with aligned memory. + + The ndarray is guaranteed *not* aligned to twice the requested alignment. + Eg, if align=4, guarantees it is not aligned to 8. If align=None uses + dtype.alignment.""" + dtype = np.dtype(dtype) + if dtype == np.dtype(object): + # Can't do this, fall back to standard allocation (which + # should always be sufficiently aligned) + if align is not None: + raise ValueError("object array alignment not supported") + return np.zeros(shape, dtype=dtype, order=order) + if align is None: + align = dtype.alignment + if not hasattr(shape, '__len__'): + shape = (shape,) + size = functools.reduce(operator.mul, shape) * dtype.itemsize + buf = np.empty(size + 2*align + 1, np.uint8) + + ptr = buf.__array_interface__['data'][0] + offset = ptr % align + if offset != 0: + offset = align - offset + if (ptr % (2*align)) == 0: + offset += align + + # Note: slices producing 0-size arrays do not necessarily change + # data pointer --- so we use and allocate size+1 + buf = buf[offset:offset+size+1][:-1] + data = np.ndarray(shape, dtype, buf, order=order) + data.fill(0) + return data + + +class TestFlags: + def setup(self): + self.a = np.arange(10) + + def test_writeable(self): + mydict = locals() + self.a.flags.writeable = False + assert_raises(ValueError, runstring, 'self.a[0] = 3', mydict) + assert_raises(ValueError, runstring, 'self.a[0:1].itemset(3)', mydict) + self.a.flags.writeable = True + self.a[0] = 5 + self.a[0] = 0 + + def test_writeable_any_base(self): + # Ensure that any base being writeable is sufficient to change flag; + # this is especially interesting for arrays from an array interface. + arr = np.arange(10) + + class subclass(np.ndarray): + pass + + # Create subclass so base will not be collapsed, this is OK to change + view1 = arr.view(subclass) + view2 = view1[...] + arr.flags.writeable = False + view2.flags.writeable = False + view2.flags.writeable = True # Can be set to True again. + + arr = np.arange(10) + + class frominterface: + def __init__(self, arr): + self.arr = arr + self.__array_interface__ = arr.__array_interface__ + + view1 = np.asarray(frominterface) + view2 = view1[...] + view2.flags.writeable = False + view2.flags.writeable = True + + view1.flags.writeable = False + view2.flags.writeable = False + with assert_raises(ValueError): + # Must assume not writeable, since only base is not: + view2.flags.writeable = True + + def test_writeable_from_readonly(self): + # gh-9440 - make sure fromstring, from buffer on readonly buffers + # set writeable False + data = b'\x00' * 100 + vals = np.frombuffer(data, 'B') + assert_raises(ValueError, vals.setflags, write=True) + types = np.dtype( [('vals', 'u1'), ('res3', 'S4')] ) + values = np.core.records.fromstring(data, types) + vals = values['vals'] + assert_raises(ValueError, vals.setflags, write=True) + + def test_writeable_from_buffer(self): + data = bytearray(b'\x00' * 100) + vals = np.frombuffer(data, 'B') + assert_(vals.flags.writeable) + vals.setflags(write=False) + assert_(vals.flags.writeable is False) + vals.setflags(write=True) + assert_(vals.flags.writeable) + types = np.dtype( [('vals', 'u1'), ('res3', 'S4')] ) + values = np.core.records.fromstring(data, types) + vals = values['vals'] + assert_(vals.flags.writeable) + vals.setflags(write=False) + assert_(vals.flags.writeable is False) + vals.setflags(write=True) + assert_(vals.flags.writeable) + + @pytest.mark.skipif(IS_PYPY, reason="PyPy always copies") + def test_writeable_pickle(self): + import pickle + # Small arrays will be copied without setting base. + # See condition for using PyArray_SetBaseObject in + # array_setstate. + a = np.arange(1000) + for v in range(pickle.HIGHEST_PROTOCOL): + vals = pickle.loads(pickle.dumps(a, v)) + assert_(vals.flags.writeable) + assert_(isinstance(vals.base, bytes)) + + def test_writeable_from_c_data(self): + # Test that the writeable flag can be changed for an array wrapping + # low level C-data, but not owning its data. + # Also see that this is deprecated to change from python. + from numpy.core._multiarray_tests import get_c_wrapping_array + + arr_writeable = get_c_wrapping_array(True) + assert not arr_writeable.flags.owndata + assert arr_writeable.flags.writeable + view = arr_writeable[...] + + # Toggling the writeable flag works on the view: + view.flags.writeable = False + assert not view.flags.writeable + view.flags.writeable = True + assert view.flags.writeable + # Flag can be unset on the arr_writeable: + arr_writeable.flags.writeable = False + + arr_readonly = get_c_wrapping_array(False) + assert not arr_readonly.flags.owndata + assert not arr_readonly.flags.writeable + + for arr in [arr_writeable, arr_readonly]: + view = arr[...] + view.flags.writeable = False # make sure it is readonly + arr.flags.writeable = False + assert not arr.flags.writeable + + with assert_raises(ValueError): + view.flags.writeable = True + + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + with assert_raises(DeprecationWarning): + arr.flags.writeable = True + + with assert_warns(DeprecationWarning): + arr.flags.writeable = True + + def test_warnonwrite(self): + a = np.arange(10) + a.flags._warn_on_write = True + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always') + a[1] = 10 + a[2] = 10 + # only warn once + assert_(len(w) == 1) + + @pytest.mark.parametrize(["flag", "flag_value", "writeable"], + [("writeable", True, True), + # Delete _warn_on_write after deprecation and simplify + # the parameterization: + ("_warn_on_write", True, False), + ("writeable", False, False)]) + def test_readonly_flag_protocols(self, flag, flag_value, writeable): + a = np.arange(10) + setattr(a.flags, flag, flag_value) + + class MyArr(): + __array_struct__ = a.__array_struct__ + + assert memoryview(a).readonly is not writeable + assert a.__array_interface__['data'][1] is not writeable + assert np.asarray(MyArr()).flags.writeable is writeable + + def test_otherflags(self): + assert_equal(self.a.flags.carray, True) + assert_equal(self.a.flags['C'], True) + assert_equal(self.a.flags.farray, False) + assert_equal(self.a.flags.behaved, True) + assert_equal(self.a.flags.fnc, False) + assert_equal(self.a.flags.forc, True) + assert_equal(self.a.flags.owndata, True) + assert_equal(self.a.flags.writeable, True) + assert_equal(self.a.flags.aligned, True) + with assert_warns(DeprecationWarning): + assert_equal(self.a.flags.updateifcopy, False) + with assert_warns(DeprecationWarning): + assert_equal(self.a.flags['U'], False) + assert_equal(self.a.flags['UPDATEIFCOPY'], False) + assert_equal(self.a.flags.writebackifcopy, False) + assert_equal(self.a.flags['X'], False) + assert_equal(self.a.flags['WRITEBACKIFCOPY'], False) + + def test_string_align(self): + a = np.zeros(4, dtype=np.dtype('|S4')) + assert_(a.flags.aligned) + # not power of two are accessed byte-wise and thus considered aligned + a = np.zeros(5, dtype=np.dtype('|S4')) + assert_(a.flags.aligned) + + def test_void_align(self): + a = np.zeros(4, dtype=np.dtype([("a", "i4"), ("b", "i4")])) + assert_(a.flags.aligned) + + +class TestHash: + # see #3793 + def test_int(self): + for st, ut, s in [(np.int8, np.uint8, 8), + (np.int16, np.uint16, 16), + (np.int32, np.uint32, 32), + (np.int64, np.uint64, 64)]: + for i in range(1, s): + assert_equal(hash(st(-2**i)), hash(-2**i), + err_msg="%r: -2**%d" % (st, i)) + assert_equal(hash(st(2**(i - 1))), hash(2**(i - 1)), + err_msg="%r: 2**%d" % (st, i - 1)) + assert_equal(hash(st(2**i - 1)), hash(2**i - 1), + err_msg="%r: 2**%d - 1" % (st, i)) + + i = max(i - 1, 1) + assert_equal(hash(ut(2**(i - 1))), hash(2**(i - 1)), + err_msg="%r: 2**%d" % (ut, i - 1)) + assert_equal(hash(ut(2**i - 1)), hash(2**i - 1), + err_msg="%r: 2**%d - 1" % (ut, i)) + + +class TestAttributes: + def setup(self): + self.one = np.arange(10) + self.two = np.arange(20).reshape(4, 5) + self.three = np.arange(60, dtype=np.float64).reshape(2, 5, 6) + + def test_attributes(self): + assert_equal(self.one.shape, (10,)) + assert_equal(self.two.shape, (4, 5)) + assert_equal(self.three.shape, (2, 5, 6)) + self.three.shape = (10, 3, 2) + assert_equal(self.three.shape, (10, 3, 2)) + self.three.shape = (2, 5, 6) + assert_equal(self.one.strides, (self.one.itemsize,)) + num = self.two.itemsize + assert_equal(self.two.strides, (5*num, num)) + num = self.three.itemsize + assert_equal(self.three.strides, (30*num, 6*num, num)) + assert_equal(self.one.ndim, 1) + assert_equal(self.two.ndim, 2) + assert_equal(self.three.ndim, 3) + num = self.two.itemsize + assert_equal(self.two.size, 20) + assert_equal(self.two.nbytes, 20*num) + assert_equal(self.two.itemsize, self.two.dtype.itemsize) + assert_equal(self.two.base, np.arange(20)) + + def test_dtypeattr(self): + assert_equal(self.one.dtype, np.dtype(np.int_)) + assert_equal(self.three.dtype, np.dtype(np.float_)) + assert_equal(self.one.dtype.char, 'l') + assert_equal(self.three.dtype.char, 'd') + assert_(self.three.dtype.str[0] in '<>') + assert_equal(self.one.dtype.str[1], 'i') + assert_equal(self.three.dtype.str[1], 'f') + + def test_int_subclassing(self): + # Regression test for https://github.com/numpy/numpy/pull/3526 + + numpy_int = np.int_(0) + + # int_ doesn't inherit from Python int, because it's not fixed-width + assert_(not isinstance(numpy_int, int)) + + def test_stridesattr(self): + x = self.one + + def make_array(size, offset, strides): + return np.ndarray(size, buffer=x, dtype=int, + offset=offset*x.itemsize, + strides=strides*x.itemsize) + + assert_equal(make_array(4, 4, -1), np.array([4, 3, 2, 1])) + assert_raises(ValueError, make_array, 4, 4, -2) + assert_raises(ValueError, make_array, 4, 2, -1) + assert_raises(ValueError, make_array, 8, 3, 1) + assert_equal(make_array(8, 3, 0), np.array([3]*8)) + # Check behavior reported in gh-2503: + assert_raises(ValueError, make_array, (2, 3), 5, np.array([-2, -3])) + make_array(0, 0, 10) + + def test_set_stridesattr(self): + x = self.one + + def make_array(size, offset, strides): + try: + r = np.ndarray([size], dtype=int, buffer=x, + offset=offset*x.itemsize) + except Exception as e: + raise RuntimeError(e) + r.strides = strides = strides*x.itemsize + return r + + assert_equal(make_array(4, 4, -1), np.array([4, 3, 2, 1])) + assert_equal(make_array(7, 3, 1), np.array([3, 4, 5, 6, 7, 8, 9])) + assert_raises(ValueError, make_array, 4, 4, -2) + assert_raises(ValueError, make_array, 4, 2, -1) + assert_raises(RuntimeError, make_array, 8, 3, 1) + # Check that the true extent of the array is used. + # Test relies on as_strided base not exposing a buffer. + x = np.lib.stride_tricks.as_strided(np.arange(1), (10, 10), (0, 0)) + + def set_strides(arr, strides): + arr.strides = strides + + assert_raises(ValueError, set_strides, x, (10*x.itemsize, x.itemsize)) + + # Test for offset calculations: + x = np.lib.stride_tricks.as_strided(np.arange(10, dtype=np.int8)[-1], + shape=(10,), strides=(-1,)) + assert_raises(ValueError, set_strides, x[::-1], -1) + a = x[::-1] + a.strides = 1 + a[::2].strides = 2 + + # test 0d + arr_0d = np.array(0) + arr_0d.strides = () + assert_raises(TypeError, set_strides, arr_0d, None) + + def test_fill(self): + for t in "?bhilqpBHILQPfdgFDGO": + x = np.empty((3, 2, 1), t) + y = np.empty((3, 2, 1), t) + x.fill(1) + y[...] = 1 + assert_equal(x, y) + + def test_fill_max_uint64(self): + x = np.empty((3, 2, 1), dtype=np.uint64) + y = np.empty((3, 2, 1), dtype=np.uint64) + value = 2**64 - 1 + y[...] = value + x.fill(value) + assert_array_equal(x, y) + + def test_fill_struct_array(self): + # Filling from a scalar + x = np.array([(0, 0.0), (1, 1.0)], dtype='i4,f8') + x.fill(x[0]) + assert_equal(x['f1'][1], x['f1'][0]) + # Filling from a tuple that can be converted + # to a scalar + x = np.zeros(2, dtype=[('a', 'f8'), ('b', 'i4')]) + x.fill((3.5, -2)) + assert_array_equal(x['a'], [3.5, 3.5]) + assert_array_equal(x['b'], [-2, -2]) + + +class TestArrayConstruction: + def test_array(self): + d = np.ones(6) + r = np.array([d, d]) + assert_equal(r, np.ones((2, 6))) + + d = np.ones(6) + tgt = np.ones((2, 6)) + r = np.array([d, d]) + assert_equal(r, tgt) + tgt[1] = 2 + r = np.array([d, d + 1]) + assert_equal(r, tgt) + + d = np.ones(6) + r = np.array([[d, d]]) + assert_equal(r, np.ones((1, 2, 6))) + + d = np.ones(6) + r = np.array([[d, d], [d, d]]) + assert_equal(r, np.ones((2, 2, 6))) + + d = np.ones((6, 6)) + r = np.array([d, d]) + assert_equal(r, np.ones((2, 6, 6))) + + d = np.ones((6, )) + r = np.array([[d, d + 1], d + 2], dtype=object) + assert_equal(len(r), 2) + assert_equal(r[0], [d, d + 1]) + assert_equal(r[1], d + 2) + + tgt = np.ones((2, 3), dtype=bool) + tgt[0, 2] = False + tgt[1, 0:2] = False + r = np.array([[True, True, False], [False, False, True]]) + assert_equal(r, tgt) + r = np.array([[True, False], [True, False], [False, True]]) + assert_equal(r, tgt.T) + + def test_array_empty(self): + assert_raises(TypeError, np.array) + + def test_array_copy_false(self): + d = np.array([1, 2, 3]) + e = np.array(d, copy=False) + d[1] = 3 + assert_array_equal(e, [1, 3, 3]) + e = np.array(d, copy=False, order='F') + d[1] = 4 + assert_array_equal(e, [1, 4, 3]) + e[2] = 7 + assert_array_equal(d, [1, 4, 7]) + + def test_array_copy_true(self): + d = np.array([[1,2,3], [1, 2, 3]]) + e = np.array(d, copy=True) + d[0, 1] = 3 + e[0, 2] = -7 + assert_array_equal(e, [[1, 2, -7], [1, 2, 3]]) + assert_array_equal(d, [[1, 3, 3], [1, 2, 3]]) + e = np.array(d, copy=True, order='F') + d[0, 1] = 5 + e[0, 2] = 7 + assert_array_equal(e, [[1, 3, 7], [1, 2, 3]]) + assert_array_equal(d, [[1, 5, 3], [1,2,3]]) + + def test_array_cont(self): + d = np.ones(10)[::2] + assert_(np.ascontiguousarray(d).flags.c_contiguous) + assert_(np.ascontiguousarray(d).flags.f_contiguous) + assert_(np.asfortranarray(d).flags.c_contiguous) + assert_(np.asfortranarray(d).flags.f_contiguous) + d = np.ones((10, 10))[::2,::2] + assert_(np.ascontiguousarray(d).flags.c_contiguous) + assert_(np.asfortranarray(d).flags.f_contiguous) + + @pytest.mark.parametrize("func", + [np.array, + np.asarray, + np.asanyarray, + np.ascontiguousarray, + np.asfortranarray]) + def test_bad_arguments_error(self, func): + with pytest.raises(TypeError): + func(3, dtype="bad dtype") + with pytest.raises(TypeError): + func() # missing arguments + with pytest.raises(TypeError): + func(1, 2, 3, 4, 5, 6, 7, 8) # too many arguments + + @pytest.mark.parametrize("func", + [np.array, + np.asarray, + np.asanyarray, + np.ascontiguousarray, + np.asfortranarray]) + def test_array_as_keyword(self, func): + # This should likely be made positional only, but do not change + # the name accidentally. + if func is np.array: + func(object=3) + else: + func(a=3) + +class TestAssignment: + def test_assignment_broadcasting(self): + a = np.arange(6).reshape(2, 3) + + # Broadcasting the input to the output + a[...] = np.arange(3) + assert_equal(a, [[0, 1, 2], [0, 1, 2]]) + a[...] = np.arange(2).reshape(2, 1) + assert_equal(a, [[0, 0, 0], [1, 1, 1]]) + + # For compatibility with <= 1.5, a limited version of broadcasting + # the output to the input. + # + # This behavior is inconsistent with NumPy broadcasting + # in general, because it only uses one of the two broadcasting + # rules (adding a new "1" dimension to the left of the shape), + # applied to the output instead of an input. In NumPy 2.0, this kind + # of broadcasting assignment will likely be disallowed. + a[...] = np.arange(6)[::-1].reshape(1, 2, 3) + assert_equal(a, [[5, 4, 3], [2, 1, 0]]) + # The other type of broadcasting would require a reduction operation. + + def assign(a, b): + a[...] = b + + assert_raises(ValueError, assign, a, np.arange(12).reshape(2, 2, 3)) + + def test_assignment_errors(self): + # Address issue #2276 + class C: + pass + a = np.zeros(1) + + def assign(v): + a[0] = v + + assert_raises((AttributeError, TypeError), assign, C()) + assert_raises(ValueError, assign, [1]) + + def test_unicode_assignment(self): + # gh-5049 + from numpy.core.numeric import set_string_function + + @contextmanager + def inject_str(s): + """ replace ndarray.__str__ temporarily """ + set_string_function(lambda x: s, repr=False) + try: + yield + finally: + set_string_function(None, repr=False) + + a1d = np.array([u'test']) + a0d = np.array(u'done') + with inject_str(u'bad'): + a1d[0] = a0d # previously this would invoke __str__ + assert_equal(a1d[0], u'done') + + # this would crash for the same reason + np.array([np.array(u'\xe5\xe4\xf6')]) + + def test_stringlike_empty_list(self): + # gh-8902 + u = np.array([u'done']) + b = np.array([b'done']) + + class bad_sequence: + def __getitem__(self): pass + def __len__(self): raise RuntimeError + + assert_raises(ValueError, operator.setitem, u, 0, []) + assert_raises(ValueError, operator.setitem, b, 0, []) + + assert_raises(ValueError, operator.setitem, u, 0, bad_sequence()) + assert_raises(ValueError, operator.setitem, b, 0, bad_sequence()) + + def test_longdouble_assignment(self): + # only relevant if longdouble is larger than float + # we're looking for loss of precision + + for dtype in (np.longdouble, np.longcomplex): + # gh-8902 + tinyb = np.nextafter(np.longdouble(0), 1).astype(dtype) + tinya = np.nextafter(np.longdouble(0), -1).astype(dtype) + + # construction + tiny1d = np.array([tinya]) + assert_equal(tiny1d[0], tinya) + + # scalar = scalar + tiny1d[0] = tinyb + assert_equal(tiny1d[0], tinyb) + + # 0d = scalar + tiny1d[0, ...] = tinya + assert_equal(tiny1d[0], tinya) + + # 0d = 0d + tiny1d[0, ...] = tinyb[...] + assert_equal(tiny1d[0], tinyb) + + # scalar = 0d + tiny1d[0] = tinyb[...] + assert_equal(tiny1d[0], tinyb) + + arr = np.array([np.array(tinya)]) + assert_equal(arr[0], tinya) + + def test_cast_to_string(self): + # cast to str should do "str(scalar)", not "str(scalar.item())" + # Example: In python2, str(float) is truncated, so we want to avoid + # str(np.float64(...).item()) as this would incorrectly truncate. + a = np.zeros(1, dtype='S20') + a[:] = np.array(['1.12345678901234567890'], dtype='f8') + assert_equal(a[0], b"1.1234567890123457") + + +class TestDtypedescr: + def test_construction(self): + d1 = np.dtype('i4') + assert_equal(d1, np.dtype(np.int32)) + d2 = np.dtype('f8') + assert_equal(d2, np.dtype(np.float64)) + + def test_byteorders(self): + assert_(np.dtype('i4')) + assert_(np.dtype([('a', 'i4')])) + + def test_structured_non_void(self): + fields = [('a', 'i8'), ('b', 'f8')]) + assert_equal(a == b, [False, True]) + + def test_casting(self): + # Check that casting a structured array to change its byte order + # works + a = np.array([(1,)], dtype=[('a', 'i4')], casting='unsafe')) + b = a.astype([('a', '>i4')]) + assert_equal(b, a.byteswap().newbyteorder()) + assert_equal(a['a'][0], b['a'][0]) + + # Check that equality comparison works on structured arrays if + # they are 'equiv'-castable + a = np.array([(5, 42), (10, 1)], dtype=[('a', '>i4'), ('b', 'f8')]) + assert_(np.can_cast(a.dtype, b.dtype, casting='equiv')) + assert_equal(a == b, [True, True]) + + # Check that 'equiv' casting can change byte order + assert_(np.can_cast(a.dtype, b.dtype, casting='equiv')) + c = a.astype(b.dtype, casting='equiv') + assert_equal(a == c, [True, True]) + + # Check that 'safe' casting can change byte order and up-cast + # fields + t = [('a', 'f8')] + assert_(np.can_cast(a.dtype, t, casting='safe')) + c = a.astype(t, casting='safe') + assert_equal((c == np.array([(5, 42), (10, 1)], dtype=t)), + [True, True]) + + # Check that 'same_kind' casting can change byte order and + # change field widths within a "kind" + t = [('a', 'f4')] + assert_(np.can_cast(a.dtype, t, casting='same_kind')) + c = a.astype(t, casting='same_kind') + assert_equal((c == np.array([(5, 42), (10, 1)], dtype=t)), + [True, True]) + + # Check that casting fails if the casting rule should fail on + # any of the fields + t = [('a', '>i8'), ('b', 'i2'), ('b', 'i8'), ('b', 'i4')] + assert_(not np.can_cast(a.dtype, t, casting=casting)) + t = [('a', '>i4'), ('b', ' false + for n in range(3): + v = np.array(b'', (dtype, n)) + assert_equal(bool(v), False) + assert_equal(bool(v[()]), False) + assert_equal(v.astype(bool), False) + assert_(isinstance(v.astype(bool), np.ndarray)) + assert_(v[()].astype(bool) is np.False_) + + # anything else -> true + for n in range(1, 4): + for val in [b'a', b'0', b' ']: + v = np.array(val, (dtype, n)) + assert_equal(bool(v), True) + assert_equal(bool(v[()]), True) + assert_equal(v.astype(bool), True) + assert_(isinstance(v.astype(bool), np.ndarray)) + assert_(v[()].astype(bool) is np.True_) + + def test_cast_from_void(self): + self._test_cast_from_flexible(np.void) + + @pytest.mark.xfail(reason="See gh-9847") + def test_cast_from_unicode(self): + self._test_cast_from_flexible(np.unicode_) + + @pytest.mark.xfail(reason="See gh-9847") + def test_cast_from_bytes(self): + self._test_cast_from_flexible(np.bytes_) + + +class TestZeroSizeFlexible: + @staticmethod + def _zeros(shape, dtype=str): + dtype = np.dtype(dtype) + if dtype == np.void: + return np.zeros(shape, dtype=(dtype, 0)) + + # not constructable directly + dtype = np.dtype([('x', dtype, 0)]) + return np.zeros(shape, dtype=dtype)['x'] + + def test_create(self): + zs = self._zeros(10, bytes) + assert_equal(zs.itemsize, 0) + zs = self._zeros(10, np.void) + assert_equal(zs.itemsize, 0) + zs = self._zeros(10, str) + assert_equal(zs.itemsize, 0) + + def _test_sort_partition(self, name, kinds, **kwargs): + # Previously, these would all hang + for dt in [bytes, np.void, str]: + zs = self._zeros(10, dt) + sort_method = getattr(zs, name) + sort_func = getattr(np, name) + for kind in kinds: + sort_method(kind=kind, **kwargs) + sort_func(zs, kind=kind, **kwargs) + + def test_sort(self): + self._test_sort_partition('sort', kinds='qhs') + + def test_argsort(self): + self._test_sort_partition('argsort', kinds='qhs') + + def test_partition(self): + self._test_sort_partition('partition', kinds=['introselect'], kth=2) + + def test_argpartition(self): + self._test_sort_partition('argpartition', kinds=['introselect'], kth=2) + + def test_resize(self): + # previously an error + for dt in [bytes, np.void, str]: + zs = self._zeros(10, dt) + zs.resize(25) + zs.resize((10, 10)) + + def test_view(self): + for dt in [bytes, np.void, str]: + zs = self._zeros(10, dt) + + # viewing as itself should be allowed + assert_equal(zs.view(dt).dtype, np.dtype(dt)) + + # viewing as any non-empty type gives an empty result + assert_equal(zs.view((dt, 1)).shape, (0,)) + + def test_dumps(self): + zs = self._zeros(10, int) + assert_equal(zs, pickle.loads(zs.dumps())) + + def test_pickle(self): + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + for dt in [bytes, np.void, str]: + zs = self._zeros(10, dt) + p = pickle.dumps(zs, protocol=proto) + zs2 = pickle.loads(p) + + assert_equal(zs.dtype, zs2.dtype) + + def test_pickle_empty(self): + """Checking if an empty array pickled and un-pickled will not cause a + segmentation fault""" + arr = np.array([]).reshape(999999, 0) + pk_dmp = pickle.dumps(arr) + pk_load = pickle.loads(pk_dmp) + + assert pk_load.size == 0 + + @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5, + reason="requires pickle protocol 5") + def test_pickle_with_buffercallback(self): + array = np.arange(10) + buffers = [] + bytes_string = pickle.dumps(array, buffer_callback=buffers.append, + protocol=5) + array_from_buffer = pickle.loads(bytes_string, buffers=buffers) + # when using pickle protocol 5 with buffer callbacks, + # array_from_buffer is reconstructed from a buffer holding a view + # to the initial array's data, so modifying an element in array + # should modify it in array_from_buffer too. + array[0] = -1 + assert array_from_buffer[0] == -1, array_from_buffer[0] + + +class TestMethods: + + sort_kinds = ['quicksort', 'heapsort', 'stable'] + + def test_all_where(self): + a = np.array([[True, False, True], + [False, False, False], + [True, True, True]]) + wh_full = np.array([[True, False, True], + [False, False, False], + [True, False, True]]) + wh_lower = np.array([[False], + [False], + [True]]) + for _ax in [0, None]: + assert_equal(a.all(axis=_ax, where=wh_lower), + np.all(a[wh_lower[:,0],:], axis=_ax)) + assert_equal(np.all(a, axis=_ax, where=wh_lower), + a[wh_lower[:,0],:].all(axis=_ax)) + + assert_equal(a.all(where=wh_full), True) + assert_equal(np.all(a, where=wh_full), True) + assert_equal(a.all(where=False), True) + assert_equal(np.all(a, where=False), True) + + def test_any_where(self): + a = np.array([[True, False, True], + [False, False, False], + [True, True, True]]) + wh_full = np.array([[False, True, False], + [True, True, True], + [False, False, False]]) + wh_middle = np.array([[False], + [True], + [False]]) + for _ax in [0, None]: + assert_equal(a.any(axis=_ax, where=wh_middle), + np.any(a[wh_middle[:,0],:], axis=_ax)) + assert_equal(np.any(a, axis=_ax, where=wh_middle), + a[wh_middle[:,0],:].any(axis=_ax)) + assert_equal(a.any(where=wh_full), False) + assert_equal(np.any(a, where=wh_full), False) + assert_equal(a.any(where=False), False) + assert_equal(np.any(a, where=False), False) + + def test_compress(self): + tgt = [[5, 6, 7, 8, 9]] + arr = np.arange(10).reshape(2, 5) + out = arr.compress([0, 1], axis=0) + assert_equal(out, tgt) + + tgt = [[1, 3], [6, 8]] + out = arr.compress([0, 1, 0, 1, 0], axis=1) + assert_equal(out, tgt) + + tgt = [[1], [6]] + arr = np.arange(10).reshape(2, 5) + out = arr.compress([0, 1], axis=1) + assert_equal(out, tgt) + + arr = np.arange(10).reshape(2, 5) + out = arr.compress([0, 1]) + assert_equal(out, 1) + + def test_choose(self): + x = 2*np.ones((3,), dtype=int) + y = 3*np.ones((3,), dtype=int) + x2 = 2*np.ones((2, 3), dtype=int) + y2 = 3*np.ones((2, 3), dtype=int) + ind = np.array([0, 0, 1]) + + A = ind.choose((x, y)) + assert_equal(A, [2, 2, 3]) + + A = ind.choose((x2, y2)) + assert_equal(A, [[2, 2, 3], [2, 2, 3]]) + + A = ind.choose((x, y2)) + assert_equal(A, [[2, 2, 3], [2, 2, 3]]) + + oned = np.ones(1) + # gh-12031, caused SEGFAULT + assert_raises(TypeError, oned.choose,np.void(0), [oned]) + + out = np.array(0) + ret = np.choose(np.array(1), [10, 20, 30], out=out) + assert out is ret + assert_equal(out[()], 20) + + # gh-6272 check overlap on out + x = np.arange(5) + y = np.choose([0,0,0], [x[:3], x[:3], x[:3]], out=x[1:4], mode='wrap') + assert_equal(y, np.array([0, 1, 2])) + + def test_prod(self): + ba = [1, 2, 10, 11, 6, 5, 4] + ba2 = [[1, 2, 3, 4], [5, 6, 7, 9], [10, 3, 4, 5]] + + for ctype in [np.int16, np.uint16, np.int32, np.uint32, + np.float32, np.float64, np.complex64, np.complex128]: + a = np.array(ba, ctype) + a2 = np.array(ba2, ctype) + if ctype in ['1', 'b']: + assert_raises(ArithmeticError, a.prod) + assert_raises(ArithmeticError, a2.prod, axis=1) + else: + assert_equal(a.prod(axis=0), 26400) + assert_array_equal(a2.prod(axis=0), + np.array([50, 36, 84, 180], ctype)) + assert_array_equal(a2.prod(axis=-1), + np.array([24, 1890, 600], ctype)) + + def test_repeat(self): + m = np.array([1, 2, 3, 4, 5, 6]) + m_rect = m.reshape((2, 3)) + + A = m.repeat([1, 3, 2, 1, 1, 2]) + assert_equal(A, [1, 2, 2, 2, 3, + 3, 4, 5, 6, 6]) + + A = m.repeat(2) + assert_equal(A, [1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6]) + + A = m_rect.repeat([2, 1], axis=0) + assert_equal(A, [[1, 2, 3], + [1, 2, 3], + [4, 5, 6]]) + + A = m_rect.repeat([1, 3, 2], axis=1) + assert_equal(A, [[1, 2, 2, 2, 3, 3], + [4, 5, 5, 5, 6, 6]]) + + A = m_rect.repeat(2, axis=0) + assert_equal(A, [[1, 2, 3], + [1, 2, 3], + [4, 5, 6], + [4, 5, 6]]) + + A = m_rect.repeat(2, axis=1) + assert_equal(A, [[1, 1, 2, 2, 3, 3], + [4, 4, 5, 5, 6, 6]]) + + def test_reshape(self): + arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) + + tgt = [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]] + assert_equal(arr.reshape(2, 6), tgt) + + tgt = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]] + assert_equal(arr.reshape(3, 4), tgt) + + tgt = [[1, 10, 8, 6], [4, 2, 11, 9], [7, 5, 3, 12]] + assert_equal(arr.reshape((3, 4), order='F'), tgt) + + tgt = [[1, 4, 7, 10], [2, 5, 8, 11], [3, 6, 9, 12]] + assert_equal(arr.T.reshape((3, 4), order='C'), tgt) + + def test_round(self): + def check_round(arr, expected, *round_args): + assert_equal(arr.round(*round_args), expected) + # With output array + out = np.zeros_like(arr) + res = arr.round(*round_args, out=out) + assert_equal(out, expected) + assert out is res + + check_round(np.array([1.2, 1.5]), [1, 2]) + check_round(np.array(1.5), 2) + check_round(np.array([12.2, 15.5]), [10, 20], -1) + check_round(np.array([12.15, 15.51]), [12.2, 15.5], 1) + # Complex rounding + check_round(np.array([4.5 + 1.5j]), [4 + 2j]) + check_round(np.array([12.5 + 15.5j]), [10 + 20j], -1) + + def test_squeeze(self): + a = np.array([[[1], [2], [3]]]) + assert_equal(a.squeeze(), [1, 2, 3]) + assert_equal(a.squeeze(axis=(0,)), [[1], [2], [3]]) + assert_raises(ValueError, a.squeeze, axis=(1,)) + assert_equal(a.squeeze(axis=(2,)), [[1, 2, 3]]) + + def test_transpose(self): + a = np.array([[1, 2], [3, 4]]) + assert_equal(a.transpose(), [[1, 3], [2, 4]]) + assert_raises(ValueError, lambda: a.transpose(0)) + assert_raises(ValueError, lambda: a.transpose(0, 0)) + assert_raises(ValueError, lambda: a.transpose(0, 1, 2)) + + def test_sort(self): + # test ordering for floats and complex containing nans. It is only + # necessary to check the less-than comparison, so sorts that + # only follow the insertion sort path are sufficient. We only + # test doubles and complex doubles as the logic is the same. + + # check doubles + msg = "Test real sort order with nans" + a = np.array([np.nan, 1, 0]) + b = np.sort(a) + assert_equal(b, a[::-1], msg) + # check complex + msg = "Test complex sort order with nans" + a = np.zeros(9, dtype=np.complex128) + a.real += [np.nan, np.nan, np.nan, 1, 0, 1, 1, 0, 0] + a.imag += [np.nan, 1, 0, np.nan, np.nan, 1, 0, 1, 0] + b = np.sort(a) + assert_equal(b, a[::-1], msg) + + # all c scalar sorts use the same code with different types + # so it suffices to run a quick check with one type. The number + # of sorted items must be greater than ~50 to check the actual + # algorithm because quick and merge sort fall over to insertion + # sort for small arrays. + + @pytest.mark.parametrize('dtype', [np.uint8, np.uint16, np.uint32, np.uint64, + np.float16, np.float32, np.float64, + np.longdouble]) + def test_sort_unsigned(self, dtype): + a = np.arange(101, dtype=dtype) + b = a[::-1].copy() + for kind in self.sort_kinds: + msg = "scalar sort, kind=%s" % kind + c = a.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + c = b.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + + @pytest.mark.parametrize('dtype', + [np.int8, np.int16, np.int32, np.int64, np.float16, + np.float32, np.float64, np.longdouble]) + def test_sort_signed(self, dtype): + a = np.arange(-50, 51, dtype=dtype) + b = a[::-1].copy() + for kind in self.sort_kinds: + msg = "scalar sort, kind=%s" % (kind) + c = a.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + c = b.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + + @pytest.mark.parametrize('dtype', [np.float32, np.float64, np.longdouble]) + @pytest.mark.parametrize('part', ['real', 'imag']) + def test_sort_complex(self, part, dtype): + # test complex sorts. These use the same code as the scalars + # but the compare function differs. + cdtype = { + np.single: np.csingle, + np.double: np.cdouble, + np.longdouble: np.clongdouble, + }[dtype] + a = np.arange(-50, 51, dtype=dtype) + b = a[::-1].copy() + ai = (a * (1+1j)).astype(cdtype) + bi = (b * (1+1j)).astype(cdtype) + setattr(ai, part, 1) + setattr(bi, part, 1) + for kind in self.sort_kinds: + msg = "complex sort, %s part == 1, kind=%s" % (part, kind) + c = ai.copy() + c.sort(kind=kind) + assert_equal(c, ai, msg) + c = bi.copy() + c.sort(kind=kind) + assert_equal(c, ai, msg) + + def test_sort_complex_byte_swapping(self): + # test sorting of complex arrays requiring byte-swapping, gh-5441 + for endianness in '<>': + for dt in np.typecodes['Complex']: + arr = np.array([1+3.j, 2+2.j, 3+1.j], dtype=endianness + dt) + c = arr.copy() + c.sort() + msg = 'byte-swapped complex sort, dtype={0}'.format(dt) + assert_equal(c, arr, msg) + + @pytest.mark.parametrize('dtype', [np.bytes_, np.unicode_]) + def test_sort_string(self, dtype): + # np.array will perform the encoding to bytes for us in the bytes test + a = np.array(['aaaaaaaa' + chr(i) for i in range(101)], dtype=dtype) + b = a[::-1].copy() + for kind in self.sort_kinds: + msg = "kind=%s" % kind + c = a.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + c = b.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + + def test_sort_object(self): + # test object array sorts. + a = np.empty((101,), dtype=object) + a[:] = list(range(101)) + b = a[::-1] + for kind in ['q', 'h', 'm']: + msg = "kind=%s" % kind + c = a.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + c = b.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + + def test_sort_structured(self): + # test record array sorts. + dt = np.dtype([('f', float), ('i', int)]) + a = np.array([(i, i) for i in range(101)], dtype=dt) + b = a[::-1] + for kind in ['q', 'h', 'm']: + msg = "kind=%s" % kind + c = a.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + c = b.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + + @pytest.mark.parametrize('dtype', ['datetime64[D]', 'timedelta64[D]']) + def test_sort_time(self, dtype): + # test datetime64 and timedelta64 sorts. + a = np.arange(0, 101, dtype=dtype) + b = a[::-1] + for kind in ['q', 'h', 'm']: + msg = "kind=%s" % kind + c = a.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + c = b.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + + def test_sort_axis(self): + # check axis handling. This should be the same for all type + # specific sorts, so we only check it for one type and one kind + a = np.array([[3, 2], [1, 0]]) + b = np.array([[1, 0], [3, 2]]) + c = np.array([[2, 3], [0, 1]]) + d = a.copy() + d.sort(axis=0) + assert_equal(d, b, "test sort with axis=0") + d = a.copy() + d.sort(axis=1) + assert_equal(d, c, "test sort with axis=1") + d = a.copy() + d.sort() + assert_equal(d, c, "test sort with default axis") + + def test_sort_size_0(self): + # check axis handling for multidimensional empty arrays + a = np.array([]) + a.shape = (3, 2, 1, 0) + for axis in range(-a.ndim, a.ndim): + msg = 'test empty array sort with axis={0}'.format(axis) + assert_equal(np.sort(a, axis=axis), a, msg) + msg = 'test empty array sort with axis=None' + assert_equal(np.sort(a, axis=None), a.ravel(), msg) + + def test_sort_bad_ordering(self): + # test generic class with bogus ordering, + # should not segfault. + class Boom: + def __lt__(self, other): + return True + + a = np.array([Boom()] * 100, dtype=object) + for kind in self.sort_kinds: + msg = "kind=%s" % kind + c = a.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + + def test_void_sort(self): + # gh-8210 - previously segfaulted + for i in range(4): + rand = np.random.randint(256, size=4000, dtype=np.uint8) + arr = rand.view('V4') + arr[::-1].sort() + + dt = np.dtype([('val', 'i4', (1,))]) + for i in range(4): + rand = np.random.randint(256, size=4000, dtype=np.uint8) + arr = rand.view(dt) + arr[::-1].sort() + + def test_sort_raises(self): + #gh-9404 + arr = np.array([0, datetime.now(), 1], dtype=object) + for kind in self.sort_kinds: + assert_raises(TypeError, arr.sort, kind=kind) + #gh-3879 + class Raiser: + def raises_anything(*args, **kwargs): + raise TypeError("SOMETHING ERRORED") + __eq__ = __ne__ = __lt__ = __gt__ = __ge__ = __le__ = raises_anything + arr = np.array([[Raiser(), n] for n in range(10)]).reshape(-1) + np.random.shuffle(arr) + for kind in self.sort_kinds: + assert_raises(TypeError, arr.sort, kind=kind) + + def test_sort_degraded(self): + # test degraded dataset would take minutes to run with normal qsort + d = np.arange(1000000) + do = d.copy() + x = d + # create a median of 3 killer where each median is the sorted second + # last element of the quicksort partition + while x.size > 3: + mid = x.size // 2 + x[mid], x[-2] = x[-2], x[mid] + x = x[:-2] + + assert_equal(np.sort(d), do) + assert_equal(d[np.argsort(d)], do) + + def test_copy(self): + def assert_fortran(arr): + assert_(arr.flags.fortran) + assert_(arr.flags.f_contiguous) + assert_(not arr.flags.c_contiguous) + + def assert_c(arr): + assert_(not arr.flags.fortran) + assert_(not arr.flags.f_contiguous) + assert_(arr.flags.c_contiguous) + + a = np.empty((2, 2), order='F') + # Test copying a Fortran array + assert_c(a.copy()) + assert_c(a.copy('C')) + assert_fortran(a.copy('F')) + assert_fortran(a.copy('A')) + + # Now test starting with a C array. + a = np.empty((2, 2), order='C') + assert_c(a.copy()) + assert_c(a.copy('C')) + assert_fortran(a.copy('F')) + assert_c(a.copy('A')) + + def test_sort_order(self): + # Test sorting an array with fields + x1 = np.array([21, 32, 14]) + x2 = np.array(['my', 'first', 'name']) + x3 = np.array([3.1, 4.5, 6.2]) + r = np.rec.fromarrays([x1, x2, x3], names='id,word,number') + + r.sort(order=['id']) + assert_equal(r.id, np.array([14, 21, 32])) + assert_equal(r.word, np.array(['name', 'my', 'first'])) + assert_equal(r.number, np.array([6.2, 3.1, 4.5])) + + r.sort(order=['word']) + assert_equal(r.id, np.array([32, 21, 14])) + assert_equal(r.word, np.array(['first', 'my', 'name'])) + assert_equal(r.number, np.array([4.5, 3.1, 6.2])) + + r.sort(order=['number']) + assert_equal(r.id, np.array([21, 32, 14])) + assert_equal(r.word, np.array(['my', 'first', 'name'])) + assert_equal(r.number, np.array([3.1, 4.5, 6.2])) + + assert_raises_regex(ValueError, 'duplicate', + lambda: r.sort(order=['id', 'id'])) + + if sys.byteorder == 'little': + strtype = '>i2' + else: + strtype = '': + for dt in np.typecodes['Complex']: + arr = np.array([1+3.j, 2+2.j, 3+1.j], dtype=endianness + dt) + msg = 'byte-swapped complex argsort, dtype={0}'.format(dt) + assert_equal(arr.argsort(), + np.arange(len(arr), dtype=np.intp), msg) + + # test string argsorts. + s = 'aaaaaaaa' + a = np.array([s + chr(i) for i in range(101)]) + b = a[::-1].copy() + r = np.arange(101) + rr = r[::-1] + for kind in self.sort_kinds: + msg = "string argsort, kind=%s" % kind + assert_equal(a.copy().argsort(kind=kind), r, msg) + assert_equal(b.copy().argsort(kind=kind), rr, msg) + + # test unicode argsorts. + s = 'aaaaaaaa' + a = np.array([s + chr(i) for i in range(101)], dtype=np.unicode_) + b = a[::-1] + r = np.arange(101) + rr = r[::-1] + for kind in self.sort_kinds: + msg = "unicode argsort, kind=%s" % kind + assert_equal(a.copy().argsort(kind=kind), r, msg) + assert_equal(b.copy().argsort(kind=kind), rr, msg) + + # test object array argsorts. + a = np.empty((101,), dtype=object) + a[:] = list(range(101)) + b = a[::-1] + r = np.arange(101) + rr = r[::-1] + for kind in self.sort_kinds: + msg = "object argsort, kind=%s" % kind + assert_equal(a.copy().argsort(kind=kind), r, msg) + assert_equal(b.copy().argsort(kind=kind), rr, msg) + + # test structured array argsorts. + dt = np.dtype([('f', float), ('i', int)]) + a = np.array([(i, i) for i in range(101)], dtype=dt) + b = a[::-1] + r = np.arange(101) + rr = r[::-1] + for kind in self.sort_kinds: + msg = "structured array argsort, kind=%s" % kind + assert_equal(a.copy().argsort(kind=kind), r, msg) + assert_equal(b.copy().argsort(kind=kind), rr, msg) + + # test datetime64 argsorts. + a = np.arange(0, 101, dtype='datetime64[D]') + b = a[::-1] + r = np.arange(101) + rr = r[::-1] + for kind in ['q', 'h', 'm']: + msg = "datetime64 argsort, kind=%s" % kind + assert_equal(a.copy().argsort(kind=kind), r, msg) + assert_equal(b.copy().argsort(kind=kind), rr, msg) + + # test timedelta64 argsorts. + a = np.arange(0, 101, dtype='timedelta64[D]') + b = a[::-1] + r = np.arange(101) + rr = r[::-1] + for kind in ['q', 'h', 'm']: + msg = "timedelta64 argsort, kind=%s" % kind + assert_equal(a.copy().argsort(kind=kind), r, msg) + assert_equal(b.copy().argsort(kind=kind), rr, msg) + + # check axis handling. This should be the same for all type + # specific argsorts, so we only check it for one type and one kind + a = np.array([[3, 2], [1, 0]]) + b = np.array([[1, 1], [0, 0]]) + c = np.array([[1, 0], [1, 0]]) + assert_equal(a.copy().argsort(axis=0), b) + assert_equal(a.copy().argsort(axis=1), c) + assert_equal(a.copy().argsort(), c) + + # check axis handling for multidimensional empty arrays + a = np.array([]) + a.shape = (3, 2, 1, 0) + for axis in range(-a.ndim, a.ndim): + msg = 'test empty array argsort with axis={0}'.format(axis) + assert_equal(np.argsort(a, axis=axis), + np.zeros_like(a, dtype=np.intp), msg) + msg = 'test empty array argsort with axis=None' + assert_equal(np.argsort(a, axis=None), + np.zeros_like(a.ravel(), dtype=np.intp), msg) + + # check that stable argsorts are stable + r = np.arange(100) + # scalars + a = np.zeros(100) + assert_equal(a.argsort(kind='m'), r) + # complex + a = np.zeros(100, dtype=complex) + assert_equal(a.argsort(kind='m'), r) + # string + a = np.array(['aaaaaaaaa' for i in range(100)]) + assert_equal(a.argsort(kind='m'), r) + # unicode + a = np.array(['aaaaaaaaa' for i in range(100)], dtype=np.unicode_) + assert_equal(a.argsort(kind='m'), r) + + def test_sort_unicode_kind(self): + d = np.arange(10) + k = b'\xc3\xa4'.decode("UTF8") + assert_raises(ValueError, d.sort, kind=k) + assert_raises(ValueError, d.argsort, kind=k) + + def test_searchsorted(self): + # test for floats and complex containing nans. The logic is the + # same for all float types so only test double types for now. + # The search sorted routines use the compare functions for the + # array type, so this checks if that is consistent with the sort + # order. + + # check double + a = np.array([0, 1, np.nan]) + msg = "Test real searchsorted with nans, side='l'" + b = a.searchsorted(a, side='left') + assert_equal(b, np.arange(3), msg) + msg = "Test real searchsorted with nans, side='r'" + b = a.searchsorted(a, side='right') + assert_equal(b, np.arange(1, 4), msg) + # check keyword arguments + a.searchsorted(v=1) + # check double complex + a = np.zeros(9, dtype=np.complex128) + a.real += [0, 0, 1, 1, 0, 1, np.nan, np.nan, np.nan] + a.imag += [0, 1, 0, 1, np.nan, np.nan, 0, 1, np.nan] + msg = "Test complex searchsorted with nans, side='l'" + b = a.searchsorted(a, side='left') + assert_equal(b, np.arange(9), msg) + msg = "Test complex searchsorted with nans, side='r'" + b = a.searchsorted(a, side='right') + assert_equal(b, np.arange(1, 10), msg) + msg = "Test searchsorted with little endian, side='l'" + a = np.array([0, 128], dtype=' p[:, i]).all(), + msg="%d: %r < %r" % (i, p[:, i], p[:, i + 1:].T)) + aae(p, d1[np.arange(d1.shape[0])[:, None], + np.argpartition(d1, i, axis=1, kind=k)]) + + p = np.partition(d0, i, axis=0, kind=k) + aae(p[i, :], np.array([i] * d1.shape[0], dtype=dt)) + # array_less does not seem to work right + at((p[:i, :] <= p[i, :]).all(), + msg="%d: %r <= %r" % (i, p[i, :], p[:i, :])) + at((p[i + 1:, :] > p[i, :]).all(), + msg="%d: %r < %r" % (i, p[i, :], p[:, i + 1:])) + aae(p, d0[np.argpartition(d0, i, axis=0, kind=k), + np.arange(d0.shape[1])[None, :]]) + + # check inplace + dc = d.copy() + dc.partition(i, kind=k) + assert_equal(dc, np.partition(d, i, kind=k)) + dc = d0.copy() + dc.partition(i, axis=0, kind=k) + assert_equal(dc, np.partition(d0, i, axis=0, kind=k)) + dc = d1.copy() + dc.partition(i, axis=1, kind=k) + assert_equal(dc, np.partition(d1, i, axis=1, kind=k)) + + def assert_partitioned(self, d, kth): + prev = 0 + for k in np.sort(kth): + assert_array_less(d[prev:k], d[k], err_msg='kth %d' % k) + assert_((d[k:] >= d[k]).all(), + msg="kth %d, %r not greater equal %d" % (k, d[k:], d[k])) + prev = k + 1 + + def test_partition_iterative(self): + d = np.arange(17) + kth = (0, 1, 2, 429, 231) + assert_raises(ValueError, d.partition, kth) + assert_raises(ValueError, d.argpartition, kth) + d = np.arange(10).reshape((2, 5)) + assert_raises(ValueError, d.partition, kth, axis=0) + assert_raises(ValueError, d.partition, kth, axis=1) + assert_raises(ValueError, np.partition, d, kth, axis=1) + assert_raises(ValueError, np.partition, d, kth, axis=None) + + d = np.array([3, 4, 2, 1]) + p = np.partition(d, (0, 3)) + self.assert_partitioned(p, (0, 3)) + self.assert_partitioned(d[np.argpartition(d, (0, 3))], (0, 3)) + + assert_array_equal(p, np.partition(d, (-3, -1))) + assert_array_equal(p, d[np.argpartition(d, (-3, -1))]) + + d = np.arange(17) + np.random.shuffle(d) + d.partition(range(d.size)) + assert_array_equal(np.arange(17), d) + np.random.shuffle(d) + assert_array_equal(np.arange(17), d[d.argpartition(range(d.size))]) + + # test unsorted kth + d = np.arange(17) + np.random.shuffle(d) + keys = np.array([1, 3, 8, -2]) + np.random.shuffle(d) + p = np.partition(d, keys) + self.assert_partitioned(p, keys) + p = d[np.argpartition(d, keys)] + self.assert_partitioned(p, keys) + np.random.shuffle(keys) + assert_array_equal(np.partition(d, keys), p) + assert_array_equal(d[np.argpartition(d, keys)], p) + + # equal kth + d = np.arange(20)[::-1] + self.assert_partitioned(np.partition(d, [5]*4), [5]) + self.assert_partitioned(np.partition(d, [5]*4 + [6, 13]), + [5]*4 + [6, 13]) + self.assert_partitioned(d[np.argpartition(d, [5]*4)], [5]) + self.assert_partitioned(d[np.argpartition(d, [5]*4 + [6, 13])], + [5]*4 + [6, 13]) + + d = np.arange(12) + np.random.shuffle(d) + d1 = np.tile(np.arange(12), (4, 1)) + map(np.random.shuffle, d1) + d0 = np.transpose(d1) + + kth = (1, 6, 7, -1) + p = np.partition(d1, kth, axis=1) + pa = d1[np.arange(d1.shape[0])[:, None], + d1.argpartition(kth, axis=1)] + assert_array_equal(p, pa) + for i in range(d1.shape[0]): + self.assert_partitioned(p[i,:], kth) + p = np.partition(d0, kth, axis=0) + pa = d0[np.argpartition(d0, kth, axis=0), + np.arange(d0.shape[1])[None,:]] + assert_array_equal(p, pa) + for i in range(d0.shape[1]): + self.assert_partitioned(p[:, i], kth) + + def test_partition_cdtype(self): + d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41), + ('Lancelot', 1.9, 38)], + dtype=[('name', '|S10'), ('height', ' (numpy ufunc, has_in_place_version, preferred_dtype) + ops = { + 'add': (np.add, True, float), + 'sub': (np.subtract, True, float), + 'mul': (np.multiply, True, float), + 'truediv': (np.true_divide, True, float), + 'floordiv': (np.floor_divide, True, float), + 'mod': (np.remainder, True, float), + 'divmod': (np.divmod, False, float), + 'pow': (np.power, True, int), + 'lshift': (np.left_shift, True, int), + 'rshift': (np.right_shift, True, int), + 'and': (np.bitwise_and, True, int), + 'xor': (np.bitwise_xor, True, int), + 'or': (np.bitwise_or, True, int), + 'matmul': (np.matmul, False, float), + # 'ge': (np.less_equal, False), + # 'gt': (np.less, False), + # 'le': (np.greater_equal, False), + # 'lt': (np.greater, False), + # 'eq': (np.equal, False), + # 'ne': (np.not_equal, False), + } + + class Coerced(Exception): + pass + + def array_impl(self): + raise Coerced + + def op_impl(self, other): + return "forward" + + def rop_impl(self, other): + return "reverse" + + def iop_impl(self, other): + return "in-place" + + def array_ufunc_impl(self, ufunc, method, *args, **kwargs): + return ("__array_ufunc__", ufunc, method, args, kwargs) + + # Create an object with the given base, in the given module, with a + # bunch of placeholder __op__ methods, and optionally a + # __array_ufunc__ and __array_priority__. + def make_obj(base, array_priority=False, array_ufunc=False, + alleged_module="__main__"): + class_namespace = {"__array__": array_impl} + if array_priority is not False: + class_namespace["__array_priority__"] = array_priority + for op in ops: + class_namespace["__{0}__".format(op)] = op_impl + class_namespace["__r{0}__".format(op)] = rop_impl + class_namespace["__i{0}__".format(op)] = iop_impl + if array_ufunc is not False: + class_namespace["__array_ufunc__"] = array_ufunc + eval_namespace = {"base": base, + "class_namespace": class_namespace, + "__name__": alleged_module, + } + MyType = eval("type('MyType', (base,), class_namespace)", + eval_namespace) + if issubclass(MyType, np.ndarray): + # Use this range to avoid special case weirdnesses around + # divide-by-0, pow(x, 2), overflow due to pow(big, big), etc. + return np.arange(3, 7).reshape(2, 2).view(MyType) + else: + return MyType() + + def check(obj, binop_override_expected, ufunc_override_expected, + inplace_override_expected, check_scalar=True): + for op, (ufunc, has_inplace, dtype) in ops.items(): + err_msg = ('op: %s, ufunc: %s, has_inplace: %s, dtype: %s' + % (op, ufunc, has_inplace, dtype)) + check_objs = [np.arange(3, 7, dtype=dtype).reshape(2, 2)] + if check_scalar: + check_objs.append(check_objs[0][0]) + for arr in check_objs: + arr_method = getattr(arr, "__{0}__".format(op)) + + def first_out_arg(result): + if op == "divmod": + assert_(isinstance(result, tuple)) + return result[0] + else: + return result + + # arr __op__ obj + if binop_override_expected: + assert_equal(arr_method(obj), NotImplemented, err_msg) + elif ufunc_override_expected: + assert_equal(arr_method(obj)[0], "__array_ufunc__", + err_msg) + else: + if (isinstance(obj, np.ndarray) and + (type(obj).__array_ufunc__ is + np.ndarray.__array_ufunc__)): + # __array__ gets ignored + res = first_out_arg(arr_method(obj)) + assert_(res.__class__ is obj.__class__, err_msg) + else: + assert_raises((TypeError, Coerced), + arr_method, obj, err_msg=err_msg) + # obj __op__ arr + arr_rmethod = getattr(arr, "__r{0}__".format(op)) + if ufunc_override_expected: + res = arr_rmethod(obj) + assert_equal(res[0], "__array_ufunc__", + err_msg=err_msg) + assert_equal(res[1], ufunc, err_msg=err_msg) + else: + if (isinstance(obj, np.ndarray) and + (type(obj).__array_ufunc__ is + np.ndarray.__array_ufunc__)): + # __array__ gets ignored + res = first_out_arg(arr_rmethod(obj)) + assert_(res.__class__ is obj.__class__, err_msg) + else: + # __array_ufunc__ = "asdf" creates a TypeError + assert_raises((TypeError, Coerced), + arr_rmethod, obj, err_msg=err_msg) + + # arr __iop__ obj + # array scalars don't have in-place operators + if has_inplace and isinstance(arr, np.ndarray): + arr_imethod = getattr(arr, "__i{0}__".format(op)) + if inplace_override_expected: + assert_equal(arr_method(obj), NotImplemented, + err_msg=err_msg) + elif ufunc_override_expected: + res = arr_imethod(obj) + assert_equal(res[0], "__array_ufunc__", err_msg) + assert_equal(res[1], ufunc, err_msg) + assert_(type(res[-1]["out"]) is tuple, err_msg) + assert_(res[-1]["out"][0] is arr, err_msg) + else: + if (isinstance(obj, np.ndarray) and + (type(obj).__array_ufunc__ is + np.ndarray.__array_ufunc__)): + # __array__ gets ignored + assert_(arr_imethod(obj) is arr, err_msg) + else: + assert_raises((TypeError, Coerced), + arr_imethod, obj, + err_msg=err_msg) + + op_fn = getattr(operator, op, None) + if op_fn is None: + op_fn = getattr(operator, op + "_", None) + if op_fn is None: + op_fn = getattr(builtins, op) + assert_equal(op_fn(obj, arr), "forward", err_msg) + if not isinstance(obj, np.ndarray): + if binop_override_expected: + assert_equal(op_fn(arr, obj), "reverse", err_msg) + elif ufunc_override_expected: + assert_equal(op_fn(arr, obj)[0], "__array_ufunc__", + err_msg) + if ufunc_override_expected: + assert_equal(ufunc(obj, arr)[0], "__array_ufunc__", + err_msg) + + # No array priority, no array_ufunc -> nothing called + check(make_obj(object), False, False, False) + # Negative array priority, no array_ufunc -> nothing called + # (has to be very negative, because scalar priority is -1000000.0) + check(make_obj(object, array_priority=-2**30), False, False, False) + # Positive array priority, no array_ufunc -> binops and iops only + check(make_obj(object, array_priority=1), True, False, True) + # ndarray ignores array_priority for ndarray subclasses + check(make_obj(np.ndarray, array_priority=1), False, False, False, + check_scalar=False) + # Positive array_priority and array_ufunc -> array_ufunc only + check(make_obj(object, array_priority=1, + array_ufunc=array_ufunc_impl), False, True, False) + check(make_obj(np.ndarray, array_priority=1, + array_ufunc=array_ufunc_impl), False, True, False) + # array_ufunc set to None -> defer binops only + check(make_obj(object, array_ufunc=None), True, False, False) + check(make_obj(np.ndarray, array_ufunc=None), True, False, False, + check_scalar=False) + + def test_ufunc_override_normalize_signature(self): + # gh-5674 + class SomeClass: + def __array_ufunc__(self, ufunc, method, *inputs, **kw): + return kw + + a = SomeClass() + kw = np.add(a, [1]) + assert_('sig' not in kw and 'signature' not in kw) + kw = np.add(a, [1], sig='ii->i') + assert_('sig' not in kw and 'signature' in kw) + assert_equal(kw['signature'], 'ii->i') + kw = np.add(a, [1], signature='ii->i') + assert_('sig' not in kw and 'signature' in kw) + assert_equal(kw['signature'], 'ii->i') + + def test_array_ufunc_index(self): + # Check that index is set appropriately, also if only an output + # is passed on (latter is another regression tests for github bug 4753) + # This also checks implicitly that 'out' is always a tuple. + class CheckIndex: + def __array_ufunc__(self, ufunc, method, *inputs, **kw): + for i, a in enumerate(inputs): + if a is self: + return i + # calls below mean we must be in an output. + for j, a in enumerate(kw['out']): + if a is self: + return (j,) + + a = CheckIndex() + dummy = np.arange(2.) + # 1 input, 1 output + assert_equal(np.sin(a), 0) + assert_equal(np.sin(dummy, a), (0,)) + assert_equal(np.sin(dummy, out=a), (0,)) + assert_equal(np.sin(dummy, out=(a,)), (0,)) + assert_equal(np.sin(a, a), 0) + assert_equal(np.sin(a, out=a), 0) + assert_equal(np.sin(a, out=(a,)), 0) + # 1 input, 2 outputs + assert_equal(np.modf(dummy, a), (0,)) + assert_equal(np.modf(dummy, None, a), (1,)) + assert_equal(np.modf(dummy, dummy, a), (1,)) + assert_equal(np.modf(dummy, out=(a, None)), (0,)) + assert_equal(np.modf(dummy, out=(a, dummy)), (0,)) + assert_equal(np.modf(dummy, out=(None, a)), (1,)) + assert_equal(np.modf(dummy, out=(dummy, a)), (1,)) + assert_equal(np.modf(a, out=(dummy, a)), 0) + with assert_raises(TypeError): + # Out argument must be tuple, since there are multiple outputs + np.modf(dummy, out=a) + + assert_raises(ValueError, np.modf, dummy, out=(a,)) + + # 2 inputs, 1 output + assert_equal(np.add(a, dummy), 0) + assert_equal(np.add(dummy, a), 1) + assert_equal(np.add(dummy, dummy, a), (0,)) + assert_equal(np.add(dummy, a, a), 1) + assert_equal(np.add(dummy, dummy, out=a), (0,)) + assert_equal(np.add(dummy, dummy, out=(a,)), (0,)) + assert_equal(np.add(a, dummy, out=a), 0) + + def test_out_override(self): + # regression test for github bug 4753 + class OutClass(np.ndarray): + def __array_ufunc__(self, ufunc, method, *inputs, **kw): + if 'out' in kw: + tmp_kw = kw.copy() + tmp_kw.pop('out') + func = getattr(ufunc, method) + kw['out'][0][...] = func(*inputs, **tmp_kw) + + A = np.array([0]).view(OutClass) + B = np.array([5]) + C = np.array([6]) + np.multiply(C, B, A) + assert_equal(A[0], 30) + assert_(isinstance(A, OutClass)) + A[0] = 0 + np.multiply(C, B, out=A) + assert_equal(A[0], 30) + assert_(isinstance(A, OutClass)) + + def test_pow_override_with_errors(self): + # regression test for gh-9112 + class PowerOnly(np.ndarray): + def __array_ufunc__(self, ufunc, method, *inputs, **kw): + if ufunc is not np.power: + raise NotImplementedError + return "POWER!" + # explicit cast to float, to ensure the fast power path is taken. + a = np.array(5., dtype=np.float64).view(PowerOnly) + assert_equal(a ** 2.5, "POWER!") + with assert_raises(NotImplementedError): + a ** 0.5 + with assert_raises(NotImplementedError): + a ** 0 + with assert_raises(NotImplementedError): + a ** 1 + with assert_raises(NotImplementedError): + a ** -1 + with assert_raises(NotImplementedError): + a ** 2 + + def test_pow_array_object_dtype(self): + # test pow on arrays of object dtype + class SomeClass: + def __init__(self, num=None): + self.num = num + + # want to ensure a fast pow path is not taken + def __mul__(self, other): + raise AssertionError('__mul__ should not be called') + + def __div__(self, other): + raise AssertionError('__div__ should not be called') + + def __pow__(self, exp): + return SomeClass(num=self.num ** exp) + + def __eq__(self, other): + if isinstance(other, SomeClass): + return self.num == other.num + + __rpow__ = __pow__ + + def pow_for(exp, arr): + return np.array([x ** exp for x in arr]) + + obj_arr = np.array([SomeClass(1), SomeClass(2), SomeClass(3)]) + + assert_equal(obj_arr ** 0.5, pow_for(0.5, obj_arr)) + assert_equal(obj_arr ** 0, pow_for(0, obj_arr)) + assert_equal(obj_arr ** 1, pow_for(1, obj_arr)) + assert_equal(obj_arr ** -1, pow_for(-1, obj_arr)) + assert_equal(obj_arr ** 2, pow_for(2, obj_arr)) + + def test_pos_array_ufunc_override(self): + class A(np.ndarray): + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + return getattr(ufunc, method)(*[i.view(np.ndarray) for + i in inputs], **kwargs) + tst = np.array('foo').view(A) + with assert_raises(TypeError): + +tst + + +class TestTemporaryElide: + # elision is only triggered on relatively large arrays + + def test_extension_incref_elide(self): + # test extension (e.g. cython) calling PyNumber_* slots without + # increasing the reference counts + # + # def incref_elide(a): + # d = input.copy() # refcount 1 + # return d, d + d # PyNumber_Add without increasing refcount + from numpy.core._multiarray_tests import incref_elide + d = np.ones(100000) + orig, res = incref_elide(d) + d + d + # the return original should not be changed to an inplace operation + assert_array_equal(orig, d) + assert_array_equal(res, d + d) + + def test_extension_incref_elide_stack(self): + # scanning if the refcount == 1 object is on the python stack to check + # that we are called directly from python is flawed as object may still + # be above the stack pointer and we have no access to the top of it + # + # def incref_elide_l(d): + # return l[4] + l[4] # PyNumber_Add without increasing refcount + from numpy.core._multiarray_tests import incref_elide_l + # padding with 1 makes sure the object on the stack is not overwritten + l = [1, 1, 1, 1, np.ones(100000)] + res = incref_elide_l(l) + # the return original should not be changed to an inplace operation + assert_array_equal(l[4], np.ones(100000)) + assert_array_equal(res, l[4] + l[4]) + + def test_temporary_with_cast(self): + # check that we don't elide into a temporary which would need casting + d = np.ones(200000, dtype=np.int64) + assert_equal(((d + d) + 2**222).dtype, np.dtype('O')) + + r = ((d + d) / 2) + assert_equal(r.dtype, np.dtype('f8')) + + r = np.true_divide((d + d), 2) + assert_equal(r.dtype, np.dtype('f8')) + + r = ((d + d) / 2.) + assert_equal(r.dtype, np.dtype('f8')) + + r = ((d + d) // 2) + assert_equal(r.dtype, np.dtype(np.int64)) + + # commutative elision into the astype result + f = np.ones(100000, dtype=np.float32) + assert_equal(((f + f) + f.astype(np.float64)).dtype, np.dtype('f8')) + + # no elision into lower type + d = f.astype(np.float64) + assert_equal(((f + f) + d).dtype, d.dtype) + l = np.ones(100000, dtype=np.longdouble) + assert_equal(((d + d) + l).dtype, l.dtype) + + # test unary abs with different output dtype + for dt in (np.complex64, np.complex128, np.clongdouble): + c = np.ones(100000, dtype=dt) + r = abs(c * 2.0) + assert_equal(r.dtype, np.dtype('f%d' % (c.itemsize // 2))) + + def test_elide_broadcast(self): + # test no elision on broadcast to higher dimension + # only triggers elision code path in debug mode as triggering it in + # normal mode needs 256kb large matching dimension, so a lot of memory + d = np.ones((2000, 1), dtype=int) + b = np.ones((2000), dtype=bool) + r = (1 - d) + b + assert_equal(r, 1) + assert_equal(r.shape, (2000, 2000)) + + def test_elide_scalar(self): + # check inplace op does not create ndarray from scalars + a = np.bool_() + assert_(type(~(a & a)) is np.bool_) + + def test_elide_scalar_readonly(self): + # The imaginary part of a real array is readonly. This needs to go + # through fast_scalar_power which is only called for powers of + # +1, -1, 0, 0.5, and 2, so use 2. Also need valid refcount for + # elision which can be gotten for the imaginary part of a real + # array. Should not error. + a = np.empty(100000, dtype=np.float64) + a.imag ** 2 + + def test_elide_readonly(self): + # don't try to elide readonly temporaries + r = np.asarray(np.broadcast_to(np.zeros(1), 100000).flat) * 0.0 + assert_equal(r, 0) + + def test_elide_updateifcopy(self): + a = np.ones(2**20)[::2] + b = a.flat.__array__() + 1 + del b + assert_equal(a, 1) + + +class TestCAPI: + def test_IsPythonScalar(self): + from numpy.core._multiarray_tests import IsPythonScalar + assert_(IsPythonScalar(b'foobar')) + assert_(IsPythonScalar(1)) + assert_(IsPythonScalar(2**80)) + assert_(IsPythonScalar(2.)) + assert_(IsPythonScalar("a")) + + +class TestSubscripting: + def test_test_zero_rank(self): + x = np.array([1, 2, 3]) + assert_(isinstance(x[0], np.int_)) + assert_(type(x[0, ...]) is np.ndarray) + + +class TestPickling: + @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL >= 5, + reason=('this tests the error messages when trying to' + 'protocol 5 although it is not available')) + def test_correct_protocol5_error_message(self): + array = np.arange(10) + + if sys.version_info[:2] in ((3, 6), (3, 7)): + # For the specific case of python3.6 and 3.7, raise a clear import + # error about the pickle5 backport when trying to use protocol=5 + # without the pickle5 package + with pytest.raises(ImportError): + array.__reduce_ex__(5) + + def test_record_array_with_object_dtype(self): + my_object = object() + + arr_with_object = np.array( + [(my_object, 1, 2.0)], + dtype=[('a', object), ('b', int), ('c', float)]) + arr_without_object = np.array( + [('xxx', 1, 2.0)], + dtype=[('a', str), ('b', int), ('c', float)]) + + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + depickled_arr_with_object = pickle.loads( + pickle.dumps(arr_with_object, protocol=proto)) + depickled_arr_without_object = pickle.loads( + pickle.dumps(arr_without_object, protocol=proto)) + + assert_equal(arr_with_object.dtype, + depickled_arr_with_object.dtype) + assert_equal(arr_without_object.dtype, + depickled_arr_without_object.dtype) + + @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5, + reason="requires pickle protocol 5") + def test_f_contiguous_array(self): + f_contiguous_array = np.array([[1, 2, 3], [4, 5, 6]], order='F') + buffers = [] + + # When using pickle protocol 5, Fortran-contiguous arrays can be + # serialized using out-of-band buffers + bytes_string = pickle.dumps(f_contiguous_array, protocol=5, + buffer_callback=buffers.append) + + assert len(buffers) > 0 + + depickled_f_contiguous_array = pickle.loads(bytes_string, + buffers=buffers) + + assert_equal(f_contiguous_array, depickled_f_contiguous_array) + + def test_non_contiguous_array(self): + non_contiguous_array = np.arange(12).reshape(3, 4)[:, :2] + assert not non_contiguous_array.flags.c_contiguous + assert not non_contiguous_array.flags.f_contiguous + + # make sure non-contiguous arrays can be pickled-depickled + # using any protocol + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + depickled_non_contiguous_array = pickle.loads( + pickle.dumps(non_contiguous_array, protocol=proto)) + + assert_equal(non_contiguous_array, depickled_non_contiguous_array) + + def test_roundtrip(self): + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + carray = np.array([[2, 9], [7, 0], [3, 8]]) + DATA = [ + carray, + np.transpose(carray), + np.array([('xxx', 1, 2.0)], dtype=[('a', (str, 3)), ('b', int), + ('c', float)]) + ] + + refs = [weakref.ref(a) for a in DATA] + for a in DATA: + assert_equal( + a, pickle.loads(pickle.dumps(a, protocol=proto)), + err_msg="%r" % a) + del a, DATA, carray + break_cycles() + # check for reference leaks (gh-12793) + for ref in refs: + assert ref() is None + + def _loads(self, obj): + return pickle.loads(obj, encoding='latin1') + + # version 0 pickles, using protocol=2 to pickle + # version 0 doesn't have a version field + def test_version0_int8(self): + s = b'\x80\x02cnumpy.core._internal\n_reconstruct\nq\x01cnumpy\nndarray\nq\x02K\x00\x85U\x01b\x87Rq\x03(K\x04\x85cnumpy\ndtype\nq\x04U\x02i1K\x00K\x01\x87Rq\x05(U\x01|NNJ\xff\xff\xff\xffJ\xff\xff\xff\xfftb\x89U\x04\x01\x02\x03\x04tb.' + a = np.array([1, 2, 3, 4], dtype=np.int8) + p = self._loads(s) + assert_equal(a, p) + + def test_version0_float32(self): + s = b'\x80\x02cnumpy.core._internal\n_reconstruct\nq\x01cnumpy\nndarray\nq\x02K\x00\x85U\x01b\x87Rq\x03(K\x04\x85cnumpy\ndtype\nq\x04U\x02f4K\x00K\x01\x87Rq\x05(U\x01= g2, [g1[i] >= g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 < g2, [g1[i] < g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 > g2, [g1[i] > g2[i] for i in [0, 1, 2]]) + + def test_mixed(self): + g1 = np.array(["spam", "spa", "spammer", "and eggs"]) + g2 = "spam" + assert_array_equal(g1 == g2, [x == g2 for x in g1]) + assert_array_equal(g1 != g2, [x != g2 for x in g1]) + assert_array_equal(g1 < g2, [x < g2 for x in g1]) + assert_array_equal(g1 > g2, [x > g2 for x in g1]) + assert_array_equal(g1 <= g2, [x <= g2 for x in g1]) + assert_array_equal(g1 >= g2, [x >= g2 for x in g1]) + + def test_unicode(self): + g1 = np.array([u"This", u"is", u"example"]) + g2 = np.array([u"This", u"was", u"example"]) + assert_array_equal(g1 == g2, [g1[i] == g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 != g2, [g1[i] != g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 <= g2, [g1[i] <= g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 >= g2, [g1[i] >= g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 < g2, [g1[i] < g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 > g2, [g1[i] > g2[i] for i in [0, 1, 2]]) + +class TestArgmaxArgminCommon: + + sizes = [(), (3,), (3, 2), (2, 3), + (3, 3), (2, 3, 4), (4, 3, 2), + (1, 2, 3, 4), (2, 3, 4, 1), + (3, 4, 1, 2), (4, 1, 2, 3)] + + @pytest.mark.parametrize("size, axis", itertools.chain(*[[(size, axis) + for axis in list(range(-len(size), len(size))) + [None]] + for size in sizes])) + @pytest.mark.parametrize('method', [np.argmax, np.argmin]) + def test_np_argmin_argmax_keepdims(self, size, axis, method): + + arr = np.random.normal(size=size) + + # contiguous arrays + if axis is None: + new_shape = [1 for _ in range(len(size))] + else: + new_shape = list(size) + new_shape[axis] = 1 + new_shape = tuple(new_shape) + + _res_orig = method(arr, axis=axis) + res_orig = _res_orig.reshape(new_shape) + res = method(arr, axis=axis, keepdims=True) + assert_equal(res, res_orig) + assert_(res.shape == new_shape) + outarray = np.empty(res.shape, dtype=res.dtype) + res1 = method(arr, axis=axis, out=outarray, + keepdims=True) + assert_(res1 is outarray) + assert_equal(res, outarray) + + if len(size) > 0: + wrong_shape = list(new_shape) + if axis is not None: + wrong_shape[axis] = 2 + else: + wrong_shape[0] = 2 + wrong_outarray = np.empty(wrong_shape, dtype=res.dtype) + with pytest.raises(ValueError): + method(arr.T, axis=axis, + out=wrong_outarray, keepdims=True) + + # non-contiguous arrays + if axis is None: + new_shape = [1 for _ in range(len(size))] + else: + new_shape = list(size)[::-1] + new_shape[axis] = 1 + new_shape = tuple(new_shape) + + _res_orig = method(arr.T, axis=axis) + res_orig = _res_orig.reshape(new_shape) + res = method(arr.T, axis=axis, keepdims=True) + assert_equal(res, res_orig) + assert_(res.shape == new_shape) + outarray = np.empty(new_shape[::-1], dtype=res.dtype) + outarray = outarray.T + res1 = method(arr.T, axis=axis, out=outarray, + keepdims=True) + assert_(res1 is outarray) + assert_equal(res, outarray) + + if len(size) > 0: + # one dimension lesser for non-zero sized + # array should raise an error + with pytest.raises(ValueError): + method(arr[0], axis=axis, + out=outarray, keepdims=True) + + if len(size) > 0: + wrong_shape = list(new_shape) + if axis is not None: + wrong_shape[axis] = 2 + else: + wrong_shape[0] = 2 + wrong_outarray = np.empty(wrong_shape, dtype=res.dtype) + with pytest.raises(ValueError): + method(arr.T, axis=axis, + out=wrong_outarray, keepdims=True) + + @pytest.mark.parametrize('method', ['max', 'min']) + def test_all(self, method): + a = np.random.normal(0, 1, (4, 5, 6, 7, 8)) + arg_method = getattr(a, 'arg' + method) + val_method = getattr(a, method) + for i in range(a.ndim): + a_maxmin = val_method(i) + aarg_maxmin = arg_method(i) + axes = list(range(a.ndim)) + axes.remove(i) + assert_(np.all(a_maxmin == aarg_maxmin.choose( + *a.transpose(i, *axes)))) + + @pytest.mark.parametrize('method', ['argmax', 'argmin']) + def test_output_shape(self, method): + # see also gh-616 + a = np.ones((10, 5)) + arg_method = getattr(a, method) + # Check some simple shape mismatches + out = np.ones(11, dtype=np.int_) + assert_raises(ValueError, arg_method, -1, out) + + out = np.ones((2, 5), dtype=np.int_) + assert_raises(ValueError, arg_method, -1, out) + + # these could be relaxed possibly (used to allow even the previous) + out = np.ones((1, 10), dtype=np.int_) + assert_raises(ValueError, arg_method, -1, out) + + out = np.ones(10, dtype=np.int_) + arg_method(-1, out=out) + assert_equal(out, arg_method(-1)) + + @pytest.mark.parametrize('ndim', [0, 1]) + @pytest.mark.parametrize('method', ['argmax', 'argmin']) + def test_ret_is_out(self, ndim, method): + a = np.ones((4,) + (3,)*ndim) + arg_method = getattr(a, method) + out = np.empty((3,)*ndim, dtype=np.intp) + ret = arg_method(axis=0, out=out) + assert ret is out + + @pytest.mark.parametrize('np_array, method, idx, val', + [(np.zeros, 'argmax', 5942, "as"), + (np.ones, 'argmin', 6001, "0")]) + def test_unicode(self, np_array, method, idx, val): + d = np_array(6031, dtype='= cmin)) + assert_(np.all(x <= cmax)) + + def _clip_type(self, type_group, array_max, + clip_min, clip_max, inplace=False, + expected_min=None, expected_max=None): + if expected_min is None: + expected_min = clip_min + if expected_max is None: + expected_max = clip_max + + for T in np.sctypes[type_group]: + if sys.byteorder == 'little': + byte_orders = ['=', '>'] + else: + byte_orders = ['<', '='] + + for byteorder in byte_orders: + dtype = np.dtype(T).newbyteorder(byteorder) + + x = (np.random.random(1000) * array_max).astype(dtype) + if inplace: + # The tests that call us pass clip_min and clip_max that + # might not fit in the destination dtype. They were written + # assuming the previous unsafe casting, which now must be + # passed explicitly to avoid a warning. + x.clip(clip_min, clip_max, x, casting='unsafe') + else: + x = x.clip(clip_min, clip_max) + byteorder = '=' + + if x.dtype.byteorder == '|': + byteorder = '|' + assert_equal(x.dtype.byteorder, byteorder) + self._check_range(x, expected_min, expected_max) + return x + + def test_basic(self): + for inplace in [False, True]: + self._clip_type( + 'float', 1024, -12.8, 100.2, inplace=inplace) + self._clip_type( + 'float', 1024, 0, 0, inplace=inplace) + + self._clip_type( + 'int', 1024, -120, 100, inplace=inplace) + self._clip_type( + 'int', 1024, 0, 0, inplace=inplace) + + self._clip_type( + 'uint', 1024, 0, 0, inplace=inplace) + self._clip_type( + 'uint', 1024, -120, 100, inplace=inplace, expected_min=0) + + def test_record_array(self): + rec = np.array([(-5, 2.0, 3.0), (5.0, 4.0, 3.0)], + dtype=[('x', '= 3)) + x = val.clip(min=3) + assert_(np.all(x >= 3)) + x = val.clip(max=4) + assert_(np.all(x <= 4)) + + def test_nan(self): + input_arr = np.array([-2., np.nan, 0.5, 3., 0.25, np.nan]) + result = input_arr.clip(-1, 1) + expected = np.array([-1., np.nan, 0.5, 1., 0.25, np.nan]) + assert_array_equal(result, expected) + + +class TestCompress: + def test_axis(self): + tgt = [[5, 6, 7, 8, 9]] + arr = np.arange(10).reshape(2, 5) + out = np.compress([0, 1], arr, axis=0) + assert_equal(out, tgt) + + tgt = [[1, 3], [6, 8]] + out = np.compress([0, 1, 0, 1, 0], arr, axis=1) + assert_equal(out, tgt) + + def test_truncate(self): + tgt = [[1], [6]] + arr = np.arange(10).reshape(2, 5) + out = np.compress([0, 1], arr, axis=1) + assert_equal(out, tgt) + + def test_flatten(self): + arr = np.arange(10).reshape(2, 5) + out = np.compress([0, 1], arr) + assert_equal(out, 1) + + +class TestPutmask: + def tst_basic(self, x, T, mask, val): + np.putmask(x, mask, val) + assert_equal(x[mask], np.array(val, T)) + + def test_ip_types(self): + unchecked_types = [bytes, str, np.void] + + x = np.random.random(1000)*100 + mask = x < 40 + + for val in [-100, 0, 15]: + for types in np.sctypes.values(): + for T in types: + if T not in unchecked_types: + self.tst_basic(x.copy().astype(T), T, mask, val) + + # Also test string of a length which uses an untypical length + dt = np.dtype("S3") + self.tst_basic(x.astype(dt), dt.type, mask, dt.type(val)[:3]) + + def test_mask_size(self): + assert_raises(ValueError, np.putmask, np.array([1, 2, 3]), [True], 5) + + @pytest.mark.parametrize('dtype', ('>i4', 'f8'), ('z', '= 2, 3) + + +class TestTake: + def tst_basic(self, x): + ind = list(range(x.shape[0])) + assert_array_equal(x.take(ind, axis=0), x) + + def test_ip_types(self): + unchecked_types = [bytes, str, np.void] + + x = np.random.random(24)*100 + x.shape = 2, 3, 4 + for types in np.sctypes.values(): + for T in types: + if T not in unchecked_types: + self.tst_basic(x.copy().astype(T)) + + # Also test string of a length which uses an untypical length + self.tst_basic(x.astype("S3")) + + def test_raise(self): + x = np.random.random(24)*100 + x.shape = 2, 3, 4 + assert_raises(IndexError, x.take, [0, 1, 2], axis=0) + assert_raises(IndexError, x.take, [-3], axis=0) + assert_array_equal(x.take([-1], axis=0)[0], x[1]) + + def test_clip(self): + x = np.random.random(24)*100 + x.shape = 2, 3, 4 + assert_array_equal(x.take([-1], axis=0, mode='clip')[0], x[0]) + assert_array_equal(x.take([2], axis=0, mode='clip')[0], x[1]) + + def test_wrap(self): + x = np.random.random(24)*100 + x.shape = 2, 3, 4 + assert_array_equal(x.take([-1], axis=0, mode='wrap')[0], x[1]) + assert_array_equal(x.take([2], axis=0, mode='wrap')[0], x[0]) + assert_array_equal(x.take([3], axis=0, mode='wrap')[0], x[1]) + + @pytest.mark.parametrize('dtype', ('>i4', 'f8'), ('z', ' 16MB + d = np.zeros(4 * 1024 ** 2) + d.tofile(tmp_filename) + assert_equal(os.path.getsize(tmp_filename), d.nbytes) + assert_array_equal(d, np.fromfile(tmp_filename)) + # check offset + with open(tmp_filename, "r+b") as f: + f.seek(d.nbytes) + d.tofile(f) + assert_equal(os.path.getsize(tmp_filename), d.nbytes * 2) + # check append mode (gh-8329) + open(tmp_filename, "w").close() # delete file contents + with open(tmp_filename, "ab") as f: + d.tofile(f) + assert_array_equal(d, np.fromfile(tmp_filename)) + with open(tmp_filename, "ab") as f: + d.tofile(f) + assert_equal(os.path.getsize(tmp_filename), d.nbytes * 2) + + def test_io_open_buffered_fromfile(self, x, tmp_filename): + # gh-6632 + x.tofile(tmp_filename) + with io.open(tmp_filename, 'rb', buffering=-1) as f: + y = np.fromfile(f, dtype=x.dtype) + assert_array_equal(y, x.flat) + + def test_file_position_after_fromfile(self, tmp_filename): + # gh-4118 + sizes = [io.DEFAULT_BUFFER_SIZE//8, + io.DEFAULT_BUFFER_SIZE, + io.DEFAULT_BUFFER_SIZE*8] + + for size in sizes: + with open(tmp_filename, 'wb') as f: + f.seek(size-1) + f.write(b'\0') + + for mode in ['rb', 'r+b']: + err_msg = "%d %s" % (size, mode) + + with open(tmp_filename, mode) as f: + f.read(2) + np.fromfile(f, dtype=np.float64, count=1) + pos = f.tell() + assert_equal(pos, 10, err_msg=err_msg) + + def test_file_position_after_tofile(self, tmp_filename): + # gh-4118 + sizes = [io.DEFAULT_BUFFER_SIZE//8, + io.DEFAULT_BUFFER_SIZE, + io.DEFAULT_BUFFER_SIZE*8] + + for size in sizes: + err_msg = "%d" % (size,) + + with open(tmp_filename, 'wb') as f: + f.seek(size-1) + f.write(b'\0') + f.seek(10) + f.write(b'12') + np.array([0], dtype=np.float64).tofile(f) + pos = f.tell() + assert_equal(pos, 10 + 2 + 8, err_msg=err_msg) + + with open(tmp_filename, 'r+b') as f: + f.read(2) + f.seek(0, 1) # seek between read&write required by ANSI C + np.array([0], dtype=np.float64).tofile(f) + pos = f.tell() + assert_equal(pos, 10, err_msg=err_msg) + + def test_load_object_array_fromfile(self, tmp_filename): + # gh-12300 + with open(tmp_filename, 'w') as f: + # Ensure we have a file with consistent contents + pass + + with open(tmp_filename, 'rb') as f: + assert_raises_regex(ValueError, "Cannot read into object array", + np.fromfile, f, dtype=object) + + assert_raises_regex(ValueError, "Cannot read into object array", + np.fromfile, tmp_filename, dtype=object) + + def test_fromfile_offset(self, x, tmp_filename): + with open(tmp_filename, 'wb') as f: + x.tofile(f) + + with open(tmp_filename, 'rb') as f: + y = np.fromfile(f, dtype=x.dtype, offset=0) + assert_array_equal(y, x.flat) + + with open(tmp_filename, 'rb') as f: + count_items = len(x.flat) // 8 + offset_items = len(x.flat) // 4 + offset_bytes = x.dtype.itemsize * offset_items + y = np.fromfile( + f, dtype=x.dtype, count=count_items, offset=offset_bytes + ) + assert_array_equal( + y, x.flat[offset_items:offset_items+count_items] + ) + + # subsequent seeks should stack + offset_bytes = x.dtype.itemsize + z = np.fromfile(f, dtype=x.dtype, offset=offset_bytes) + assert_array_equal(z, x.flat[offset_items+count_items+1:]) + + with open(tmp_filename, 'wb') as f: + x.tofile(f, sep=",") + + with open(tmp_filename, 'rb') as f: + assert_raises_regex( + TypeError, + "'offset' argument only permitted for binary files", + np.fromfile, tmp_filename, dtype=x.dtype, + sep=",", offset=1) + + @pytest.mark.skipif(IS_PYPY, reason="bug in PyPy's PyNumber_AsSsize_t") + def test_fromfile_bad_dup(self, x, tmp_filename): + def dup_str(fd): + return 'abc' + + def dup_bigint(fd): + return 2**68 + + old_dup = os.dup + try: + with open(tmp_filename, 'wb') as f: + x.tofile(f) + for dup, exc in ((dup_str, TypeError), (dup_bigint, OSError)): + os.dup = dup + assert_raises(exc, np.fromfile, f) + finally: + os.dup = old_dup + + def _check_from(self, s, value, filename, **kw): + if 'sep' not in kw: + y = np.frombuffer(s, **kw) + else: + y = np.fromstring(s, **kw) + assert_array_equal(y, value) + + with open(filename, 'wb') as f: + f.write(s) + y = np.fromfile(filename, **kw) + assert_array_equal(y, value) + + @pytest.fixture(params=["period", "comma"]) + def decimal_sep_localization(self, request): + """ + Including this fixture in a test will automatically + execute it with both types of decimal separator. + + So:: + + def test_decimal(decimal_sep_localization): + pass + + is equivalent to the following two tests:: + + def test_decimal_period_separator(): + pass + + def test_decimal_comma_separator(): + with CommaDecimalPointLocale(): + pass + """ + if request.param == "period": + yield + elif request.param == "comma": + with CommaDecimalPointLocale(): + yield + else: + assert False, request.param + + def test_nan(self, tmp_filename, decimal_sep_localization): + self._check_from( + b"nan +nan -nan NaN nan(foo) +NaN(BAR) -NAN(q_u_u_x_)", + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + tmp_filename, + sep=' ') + + def test_inf(self, tmp_filename, decimal_sep_localization): + self._check_from( + b"inf +inf -inf infinity -Infinity iNfInItY -inF", + [np.inf, np.inf, -np.inf, np.inf, -np.inf, np.inf, -np.inf], + tmp_filename, + sep=' ') + + def test_numbers(self, tmp_filename, decimal_sep_localization): + self._check_from( + b"1.234 -1.234 .3 .3e55 -123133.1231e+133", + [1.234, -1.234, .3, .3e55, -123133.1231e+133], + tmp_filename, + sep=' ') + + def test_binary(self, tmp_filename): + self._check_from( + b'\x00\x00\x80?\x00\x00\x00@\x00\x00@@\x00\x00\x80@', + np.array([1, 2, 3, 4]), + tmp_filename, + dtype=' 1 minute on mechanical hard drive + def test_big_binary(self): + """Test workarounds for 32-bit limited fwrite, fseek, and ftell + calls in windows. These normally would hang doing something like this. + See http://projects.scipy.org/numpy/ticket/1660""" + if sys.platform != 'win32': + return + try: + # before workarounds, only up to 2**32-1 worked + fourgbplus = 2**32 + 2**16 + testbytes = np.arange(8, dtype=np.int8) + n = len(testbytes) + flike = tempfile.NamedTemporaryFile() + f = flike.file + np.tile(testbytes, fourgbplus // testbytes.nbytes).tofile(f) + flike.seek(0) + a = np.fromfile(f, dtype=np.int8) + flike.close() + assert_(len(a) == fourgbplus) + # check only start and end for speed: + assert_((a[:n] == testbytes).all()) + assert_((a[-n:] == testbytes).all()) + except (MemoryError, ValueError): + pass + + def test_string(self, tmp_filename): + self._check_from(b'1,2,3,4', [1., 2., 3., 4.], tmp_filename, sep=',') + + def test_counted_string(self, tmp_filename, decimal_sep_localization): + self._check_from( + b'1,2,3,4', [1., 2., 3., 4.], tmp_filename, count=4, sep=',') + self._check_from( + b'1,2,3,4', [1., 2., 3.], tmp_filename, count=3, sep=',') + self._check_from( + b'1,2,3,4', [1., 2., 3., 4.], tmp_filename, count=-1, sep=',') + + def test_string_with_ws(self, tmp_filename): + self._check_from( + b'1 2 3 4 ', [1, 2, 3, 4], tmp_filename, dtype=int, sep=' ') + + def test_counted_string_with_ws(self, tmp_filename): + self._check_from( + b'1 2 3 4 ', [1, 2, 3], tmp_filename, count=3, dtype=int, + sep=' ') + + def test_ascii(self, tmp_filename, decimal_sep_localization): + self._check_from( + b'1 , 2 , 3 , 4', [1., 2., 3., 4.], tmp_filename, sep=',') + self._check_from( + b'1,2,3,4', [1., 2., 3., 4.], tmp_filename, dtype=float, sep=',') + + def test_malformed(self, tmp_filename, decimal_sep_localization): + with assert_warns(DeprecationWarning): + self._check_from( + b'1.234 1,234', [1.234, 1.], tmp_filename, sep=' ') + + def test_long_sep(self, tmp_filename): + self._check_from( + b'1_x_3_x_4_x_5', [1, 3, 4, 5], tmp_filename, sep='_x_') + + def test_dtype(self, tmp_filename): + v = np.array([1, 2, 3, 4], dtype=np.int_) + self._check_from(b'1,2,3,4', v, tmp_filename, sep=',', dtype=np.int_) + + def test_dtype_bool(self, tmp_filename): + # can't use _check_from because fromstring can't handle True/False + v = np.array([True, False, True, False], dtype=np.bool_) + s = b'1,0,-2.3,0' + with open(tmp_filename, 'wb') as f: + f.write(s) + y = np.fromfile(tmp_filename, sep=',', dtype=np.bool_) + assert_(y.dtype == '?') + assert_array_equal(y, v) + + def test_tofile_sep(self, tmp_filename, decimal_sep_localization): + x = np.array([1.51, 2, 3.51, 4], dtype=float) + with open(tmp_filename, 'w') as f: + x.tofile(f, sep=',') + with open(tmp_filename, 'r') as f: + s = f.read() + #assert_equal(s, '1.51,2.0,3.51,4.0') + y = np.array([float(p) for p in s.split(',')]) + assert_array_equal(x,y) + + def test_tofile_format(self, tmp_filename, decimal_sep_localization): + x = np.array([1.51, 2, 3.51, 4], dtype=float) + with open(tmp_filename, 'w') as f: + x.tofile(f, sep=',', format='%.2f') + with open(tmp_filename, 'r') as f: + s = f.read() + assert_equal(s, '1.51,2.00,3.51,4.00') + + def test_tofile_cleanup(self, tmp_filename): + x = np.zeros((10), dtype=object) + with open(tmp_filename, 'wb') as f: + assert_raises(OSError, lambda: x.tofile(f, sep='')) + # Dup-ed file handle should be closed or remove will fail on Windows OS + os.remove(tmp_filename) + + # Also make sure that we close the Python handle + assert_raises(OSError, lambda: x.tofile(tmp_filename)) + os.remove(tmp_filename) + + def test_fromfile_subarray_binary(self, tmp_filename): + # Test subarray dtypes which are absorbed into the shape + x = np.arange(24, dtype="i4").reshape(2, 3, 4) + x.tofile(tmp_filename) + res = np.fromfile(tmp_filename, dtype="(3,4)i4") + assert_array_equal(x, res) + + x_str = x.tobytes() + with assert_warns(DeprecationWarning): + # binary fromstring is deprecated + res = np.fromstring(x_str, dtype="(3,4)i4") + assert_array_equal(x, res) + + def test_parsing_subarray_unsupported(self, tmp_filename): + # We currently do not support parsing subarray dtypes + data = "12,42,13," * 50 + with pytest.raises(ValueError): + expected = np.fromstring(data, dtype="(3,)i", sep=",") + + with open(tmp_filename, "w") as f: + f.write(data) + + with pytest.raises(ValueError): + np.fromfile(tmp_filename, dtype="(3,)i", sep=",") + + def test_read_shorter_than_count_subarray(self, tmp_filename): + # Test that requesting more values does not cause any problems + # in conjunction with subarray dimensions being absorbed into the + # array dimension. + expected = np.arange(511 * 10, dtype="i").reshape(-1, 10) + + binary = expected.tobytes() + with pytest.raises(ValueError): + with pytest.warns(DeprecationWarning): + np.fromstring(binary, dtype="(10,)i", count=10000) + + expected.tofile(tmp_filename) + res = np.fromfile(tmp_filename, dtype="(10,)i", count=10000) + assert_array_equal(res, expected) + + +class TestFromBuffer: + @pytest.mark.parametrize('byteorder', ['<', '>']) + @pytest.mark.parametrize('dtype', [float, int, complex]) + def test_basic(self, byteorder, dtype): + dt = np.dtype(dtype).newbyteorder(byteorder) + x = (np.random.random((4, 7)) * 5).astype(dt) + buf = x.tobytes() + assert_array_equal(np.frombuffer(buf, dtype=dt), x.flat) + + def test_empty(self): + assert_array_equal(np.frombuffer(b''), np.array([])) + + +class TestFlat: + def setup(self): + a0 = np.arange(20.0) + a = a0.reshape(4, 5) + a0.shape = (4, 5) + a.flags.writeable = False + self.a = a + self.b = a[::2, ::2] + self.a0 = a0 + self.b0 = a0[::2, ::2] + + def test_contiguous(self): + testpassed = False + try: + self.a.flat[12] = 100.0 + except ValueError: + testpassed = True + assert_(testpassed) + assert_(self.a.flat[12] == 12.0) + + def test_discontiguous(self): + testpassed = False + try: + self.b.flat[4] = 100.0 + except ValueError: + testpassed = True + assert_(testpassed) + assert_(self.b.flat[4] == 12.0) + + def test___array__(self): + c = self.a.flat.__array__() + d = self.b.flat.__array__() + e = self.a0.flat.__array__() + f = self.b0.flat.__array__() + + assert_(c.flags.writeable is False) + assert_(d.flags.writeable is False) + # for 1.14 all are set to non-writeable on the way to replacing the + # UPDATEIFCOPY array returned for non-contiguous arrays. + assert_(e.flags.writeable is True) + assert_(f.flags.writeable is False) + with assert_warns(DeprecationWarning): + assert_(c.flags.updateifcopy is False) + with assert_warns(DeprecationWarning): + assert_(d.flags.updateifcopy is False) + with assert_warns(DeprecationWarning): + assert_(e.flags.updateifcopy is False) + with assert_warns(DeprecationWarning): + # UPDATEIFCOPY is removed. + assert_(f.flags.updateifcopy is False) + assert_(c.flags.writebackifcopy is False) + assert_(d.flags.writebackifcopy is False) + assert_(e.flags.writebackifcopy is False) + assert_(f.flags.writebackifcopy is False) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_refcount(self): + # includes regression test for reference count error gh-13165 + inds = [np.intp(0), np.array([True]*self.a.size), np.array([0]), None] + indtype = np.dtype(np.intp) + rc_indtype = sys.getrefcount(indtype) + for ind in inds: + rc_ind = sys.getrefcount(ind) + for _ in range(100): + try: + self.a.flat[ind] + except IndexError: + pass + assert_(abs(sys.getrefcount(ind) - rc_ind) < 50) + assert_(abs(sys.getrefcount(indtype) - rc_indtype) < 50) + + def test_index_getset(self): + it = np.arange(10).reshape(2, 1, 5).flat + with pytest.raises(AttributeError): + it.index = 10 + + for _ in it: + pass + # Check the value of `.index` is updated correctly (see also gh-19153) + # If the type was incorrect, this would show up on big-endian machines + assert it.index == it.base.size + + +class TestResize: + + @_no_tracing + def test_basic(self): + x = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + if IS_PYPY: + x.resize((5, 5), refcheck=False) + else: + x.resize((5, 5)) + assert_array_equal(x.flat[:9], + np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]).flat) + assert_array_equal(x[9:].flat, 0) + + def test_check_reference(self): + x = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + y = x + assert_raises(ValueError, x.resize, (5, 1)) + del y # avoid pyflakes unused variable warning. + + @_no_tracing + def test_int_shape(self): + x = np.eye(3) + if IS_PYPY: + x.resize(3, refcheck=False) + else: + x.resize(3) + assert_array_equal(x, np.eye(3)[0,:]) + + def test_none_shape(self): + x = np.eye(3) + x.resize(None) + assert_array_equal(x, np.eye(3)) + x.resize() + assert_array_equal(x, np.eye(3)) + + def test_0d_shape(self): + # to it multiple times to test it does not break alloc cache gh-9216 + for i in range(10): + x = np.empty((1,)) + x.resize(()) + assert_equal(x.shape, ()) + assert_equal(x.size, 1) + x = np.empty(()) + x.resize((1,)) + assert_equal(x.shape, (1,)) + assert_equal(x.size, 1) + + def test_invalid_arguments(self): + assert_raises(TypeError, np.eye(3).resize, 'hi') + assert_raises(ValueError, np.eye(3).resize, -1) + assert_raises(TypeError, np.eye(3).resize, order=1) + assert_raises(TypeError, np.eye(3).resize, refcheck='hi') + + @_no_tracing + def test_freeform_shape(self): + x = np.eye(3) + if IS_PYPY: + x.resize(3, 2, 1, refcheck=False) + else: + x.resize(3, 2, 1) + assert_(x.shape == (3, 2, 1)) + + @_no_tracing + def test_zeros_appended(self): + x = np.eye(3) + if IS_PYPY: + x.resize(2, 3, 3, refcheck=False) + else: + x.resize(2, 3, 3) + assert_array_equal(x[0], np.eye(3)) + assert_array_equal(x[1], np.zeros((3, 3))) + + @_no_tracing + def test_obj_obj(self): + # check memory is initialized on resize, gh-4857 + a = np.ones(10, dtype=[('k', object, 2)]) + if IS_PYPY: + a.resize(15, refcheck=False) + else: + a.resize(15,) + assert_equal(a.shape, (15,)) + assert_array_equal(a['k'][-5:], 0) + assert_array_equal(a['k'][:-5], 1) + + def test_empty_view(self): + # check that sizes containing a zero don't trigger a reallocate for + # already empty arrays + x = np.zeros((10, 0), int) + x_view = x[...] + x_view.resize((0, 10)) + x_view.resize((0, 100)) + + def test_check_weakref(self): + x = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + xref = weakref.ref(x) + assert_raises(ValueError, x.resize, (5, 1)) + del xref # avoid pyflakes unused variable warning. + + +class TestRecord: + def test_field_rename(self): + dt = np.dtype([('f', float), ('i', int)]) + dt.names = ['p', 'q'] + assert_equal(dt.names, ['p', 'q']) + + def test_multiple_field_name_occurrence(self): + def test_dtype_init(): + np.dtype([("A", "f8"), ("B", "f8"), ("A", "f8")]) + + # Error raised when multiple fields have the same name + assert_raises(ValueError, test_dtype_init) + + def test_bytes_fields(self): + # Bytes are not allowed in field names and not recognized in titles + # on Py3 + assert_raises(TypeError, np.dtype, [(b'a', int)]) + assert_raises(TypeError, np.dtype, [(('b', b'a'), int)]) + + dt = np.dtype([((b'a', 'b'), int)]) + assert_raises(TypeError, dt.__getitem__, b'a') + + x = np.array([(1,), (2,), (3,)], dtype=dt) + assert_raises(IndexError, x.__getitem__, b'a') + + y = x[0] + assert_raises(IndexError, y.__getitem__, b'a') + + def test_multiple_field_name_unicode(self): + def test_dtype_unicode(): + np.dtype([("\u20B9", "f8"), ("B", "f8"), ("\u20B9", "f8")]) + + # Error raised when multiple fields have the same name(unicode included) + assert_raises(ValueError, test_dtype_unicode) + + def test_fromarrays_unicode(self): + # A single name string provided to fromarrays() is allowed to be unicode + # on both Python 2 and 3: + x = np.core.records.fromarrays([[0], [1]], names=u'a,b', formats=u'i4,i4') + assert_equal(x['a'][0], 0) + assert_equal(x['b'][0], 1) + + def test_unicode_order(self): + # Test that we can sort with order as a unicode field name in both Python 2 and + # 3: + name = u'b' + x = np.array([1, 3, 2], dtype=[(name, int)]) + x.sort(order=name) + assert_equal(x[u'b'], np.array([1, 2, 3])) + + def test_field_names(self): + # Test unicode and 8-bit / byte strings can be used + a = np.zeros((1,), dtype=[('f1', 'i4'), + ('f2', 'i4'), + ('f3', [('sf1', 'i4')])]) + # byte string indexing fails gracefully + assert_raises(IndexError, a.__setitem__, b'f1', 1) + assert_raises(IndexError, a.__getitem__, b'f1') + assert_raises(IndexError, a['f1'].__setitem__, b'sf1', 1) + assert_raises(IndexError, a['f1'].__getitem__, b'sf1') + b = a.copy() + fn1 = str('f1') + b[fn1] = 1 + assert_equal(b[fn1], 1) + fnn = str('not at all') + assert_raises(ValueError, b.__setitem__, fnn, 1) + assert_raises(ValueError, b.__getitem__, fnn) + b[0][fn1] = 2 + assert_equal(b[fn1], 2) + # Subfield + assert_raises(ValueError, b[0].__setitem__, fnn, 1) + assert_raises(ValueError, b[0].__getitem__, fnn) + # Subfield + fn3 = str('f3') + sfn1 = str('sf1') + b[fn3][sfn1] = 1 + assert_equal(b[fn3][sfn1], 1) + assert_raises(ValueError, b[fn3].__setitem__, fnn, 1) + assert_raises(ValueError, b[fn3].__getitem__, fnn) + # multiple subfields + fn2 = str('f2') + b[fn2] = 3 + + assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3)) + assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2)) + assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,))) + + # non-ascii unicode field indexing is well behaved + assert_raises(ValueError, a.__setitem__, u'\u03e0', 1) + assert_raises(ValueError, a.__getitem__, u'\u03e0') + + def test_record_hash(self): + a = np.array([(1, 2), (1, 2)], dtype='i1,i2') + a.flags.writeable = False + b = np.array([(1, 2), (3, 4)], dtype=[('num1', 'i1'), ('num2', 'i2')]) + b.flags.writeable = False + c = np.array([(1, 2), (3, 4)], dtype='i1,i2') + c.flags.writeable = False + assert_(hash(a[0]) == hash(a[1])) + assert_(hash(a[0]) == hash(b[0])) + assert_(hash(a[0]) != hash(b[1])) + assert_(hash(c[0]) == hash(a[0]) and c[0] == a[0]) + + def test_record_no_hash(self): + a = np.array([(1, 2), (1, 2)], dtype='i1,i2') + assert_raises(TypeError, hash, a[0]) + + def test_empty_structure_creation(self): + # make sure these do not raise errors (gh-5631) + np.array([()], dtype={'names': [], 'formats': [], + 'offsets': [], 'itemsize': 12}) + np.array([(), (), (), (), ()], dtype={'names': [], 'formats': [], + 'offsets': [], 'itemsize': 12}) + + def test_multifield_indexing_view(self): + a = np.ones(3, dtype=[('a', 'i4'), ('b', 'f4'), ('c', 'u4')]) + v = a[['a', 'c']] + assert_(v.base is a) + assert_(v.dtype == np.dtype({'names': ['a', 'c'], + 'formats': ['i4', 'u4'], + 'offsets': [0, 8]})) + v[:] = (4,5) + assert_equal(a[0].item(), (4, 1, 5)) + +class TestView: + def test_basic(self): + x = np.array([(1, 2, 3, 4), (5, 6, 7, 8)], + dtype=[('r', np.int8), ('g', np.int8), + ('b', np.int8), ('a', np.int8)]) + # We must be specific about the endianness here: + y = x.view(dtype=' 0) + assert_(issubclass(w[0].category, RuntimeWarning)) + + def test_empty(self): + A = np.zeros((0, 3)) + for f in self.funcs: + for axis in [0, None]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_(np.isnan(f(A, axis=axis)).all()) + assert_(len(w) > 0) + assert_(issubclass(w[0].category, RuntimeWarning)) + for axis in [1]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_equal(f(A, axis=axis), np.zeros([])) + + def test_mean_values(self): + for mat in [self.rmat, self.cmat, self.omat]: + for axis in [0, 1]: + tgt = mat.sum(axis=axis) + res = _mean(mat, axis=axis) * mat.shape[axis] + assert_almost_equal(res, tgt) + for axis in [None]: + tgt = mat.sum(axis=axis) + res = _mean(mat, axis=axis) * np.prod(mat.shape) + assert_almost_equal(res, tgt) + + def test_mean_float16(self): + # This fail if the sum inside mean is done in float16 instead + # of float32. + assert_(_mean(np.ones(100000, dtype='float16')) == 1) + + def test_mean_axis_error(self): + # Ensure that AxisError is raised instead of IndexError when axis is + # out of bounds, see gh-15817. + with assert_raises(np.core._exceptions.AxisError): + np.arange(10).mean(axis=2) + + def test_mean_where(self): + a = np.arange(16).reshape((4, 4)) + wh_full = np.array([[False, True, False, True], + [True, False, True, False], + [True, True, False, False], + [False, False, True, True]]) + wh_partial = np.array([[False], + [True], + [True], + [False]]) + _cases = [(1, True, [1.5, 5.5, 9.5, 13.5]), + (0, wh_full, [6., 5., 10., 9.]), + (1, wh_full, [2., 5., 8.5, 14.5]), + (0, wh_partial, [6., 7., 8., 9.])] + for _ax, _wh, _res in _cases: + assert_allclose(a.mean(axis=_ax, where=_wh), + np.array(_res)) + assert_allclose(np.mean(a, axis=_ax, where=_wh), + np.array(_res)) + + a3d = np.arange(16).reshape((2, 2, 4)) + _wh_partial = np.array([False, True, True, False]) + _res = [[1.5, 5.5], [9.5, 13.5]] + assert_allclose(a3d.mean(axis=2, where=_wh_partial), + np.array(_res)) + assert_allclose(np.mean(a3d, axis=2, where=_wh_partial), + np.array(_res)) + + with pytest.warns(RuntimeWarning) as w: + assert_allclose(a.mean(axis=1, where=wh_partial), + np.array([np.nan, 5.5, 9.5, np.nan])) + with pytest.warns(RuntimeWarning) as w: + assert_equal(a.mean(where=False), np.nan) + with pytest.warns(RuntimeWarning) as w: + assert_equal(np.mean(a, where=False), np.nan) + + def test_var_values(self): + for mat in [self.rmat, self.cmat, self.omat]: + for axis in [0, 1, None]: + msqr = _mean(mat * mat.conj(), axis=axis) + mean = _mean(mat, axis=axis) + tgt = msqr - mean * mean.conjugate() + res = _var(mat, axis=axis) + assert_almost_equal(res, tgt) + + @pytest.mark.parametrize(('complex_dtype', 'ndec'), ( + ('complex64', 6), + ('complex128', 7), + ('clongdouble', 7), + )) + def test_var_complex_values(self, complex_dtype, ndec): + # Test fast-paths for every builtin complex type + for axis in [0, 1, None]: + mat = self.cmat.copy().astype(complex_dtype) + msqr = _mean(mat * mat.conj(), axis=axis) + mean = _mean(mat, axis=axis) + tgt = msqr - mean * mean.conjugate() + res = _var(mat, axis=axis) + assert_almost_equal(res, tgt, decimal=ndec) + + def test_var_dimensions(self): + # _var paths for complex number introduce additions on views that + # increase dimensions. Ensure this generalizes to higher dims + mat = np.stack([self.cmat]*3) + for axis in [0, 1, 2, -1, None]: + msqr = _mean(mat * mat.conj(), axis=axis) + mean = _mean(mat, axis=axis) + tgt = msqr - mean * mean.conjugate() + res = _var(mat, axis=axis) + assert_almost_equal(res, tgt) + + def test_var_complex_byteorder(self): + # Test that var fast-path does not cause failures for complex arrays + # with non-native byteorder + cmat = self.cmat.copy().astype('complex128') + cmat_swapped = cmat.astype(cmat.dtype.newbyteorder()) + assert_almost_equal(cmat.var(), cmat_swapped.var()) + + def test_var_axis_error(self): + # Ensure that AxisError is raised instead of IndexError when axis is + # out of bounds, see gh-15817. + with assert_raises(np.core._exceptions.AxisError): + np.arange(10).var(axis=2) + + def test_var_where(self): + a = np.arange(25).reshape((5, 5)) + wh_full = np.array([[False, True, False, True, True], + [True, False, True, True, False], + [True, True, False, False, True], + [False, True, True, False, True], + [True, False, True, True, False]]) + wh_partial = np.array([[False], + [True], + [True], + [False], + [True]]) + _cases = [(0, True, [50., 50., 50., 50., 50.]), + (1, True, [2., 2., 2., 2., 2.])] + for _ax, _wh, _res in _cases: + assert_allclose(a.var(axis=_ax, where=_wh), + np.array(_res)) + assert_allclose(np.var(a, axis=_ax, where=_wh), + np.array(_res)) + + a3d = np.arange(16).reshape((2, 2, 4)) + _wh_partial = np.array([False, True, True, False]) + _res = [[0.25, 0.25], [0.25, 0.25]] + assert_allclose(a3d.var(axis=2, where=_wh_partial), + np.array(_res)) + assert_allclose(np.var(a3d, axis=2, where=_wh_partial), + np.array(_res)) + + assert_allclose(np.var(a, axis=1, where=wh_full), + np.var(a[wh_full].reshape((5, 3)), axis=1)) + assert_allclose(np.var(a, axis=0, where=wh_partial), + np.var(a[wh_partial[:,0]], axis=0)) + with pytest.warns(RuntimeWarning) as w: + assert_equal(a.var(where=False), np.nan) + with pytest.warns(RuntimeWarning) as w: + assert_equal(np.var(a, where=False), np.nan) + + def test_std_values(self): + for mat in [self.rmat, self.cmat, self.omat]: + for axis in [0, 1, None]: + tgt = np.sqrt(_var(mat, axis=axis)) + res = _std(mat, axis=axis) + assert_almost_equal(res, tgt) + + def test_std_where(self): + a = np.arange(25).reshape((5,5))[::-1] + whf = np.array([[False, True, False, True, True], + [True, False, True, False, True], + [True, True, False, True, False], + [True, False, True, True, False], + [False, True, False, True, True]]) + whp = np.array([[False], + [False], + [True], + [True], + [False]]) + _cases = [ + (0, True, 7.07106781*np.ones((5))), + (1, True, 1.41421356*np.ones((5))), + (0, whf, + np.array([4.0824829 , 8.16496581, 5., 7.39509973, 8.49836586])), + (0, whp, 2.5*np.ones((5))) + ] + for _ax, _wh, _res in _cases: + assert_allclose(a.std(axis=_ax, where=_wh), _res) + assert_allclose(np.std(a, axis=_ax, where=_wh), _res) + + a3d = np.arange(16).reshape((2, 2, 4)) + _wh_partial = np.array([False, True, True, False]) + _res = [[0.5, 0.5], [0.5, 0.5]] + assert_allclose(a3d.std(axis=2, where=_wh_partial), + np.array(_res)) + assert_allclose(np.std(a3d, axis=2, where=_wh_partial), + np.array(_res)) + + assert_allclose(a.std(axis=1, where=whf), + np.std(a[whf].reshape((5,3)), axis=1)) + assert_allclose(np.std(a, axis=1, where=whf), + (a[whf].reshape((5,3))).std(axis=1)) + assert_allclose(a.std(axis=0, where=whp), + np.std(a[whp[:,0]], axis=0)) + assert_allclose(np.std(a, axis=0, where=whp), + (a[whp[:,0]]).std(axis=0)) + with pytest.warns(RuntimeWarning) as w: + assert_equal(a.std(where=False), np.nan) + with pytest.warns(RuntimeWarning) as w: + assert_equal(np.std(a, where=False), np.nan) + + def test_subclass(self): + class TestArray(np.ndarray): + def __new__(cls, data, info): + result = np.array(data) + result = result.view(cls) + result.info = info + return result + + def __array_finalize__(self, obj): + self.info = getattr(obj, "info", '') + + dat = TestArray([[1, 2, 3, 4], [5, 6, 7, 8]], 'jubba') + res = dat.mean(1) + assert_(res.info == dat.info) + res = dat.std(1) + assert_(res.info == dat.info) + res = dat.var(1) + assert_(res.info == dat.info) + + +class TestVdot: + def test_basic(self): + dt_numeric = np.typecodes['AllFloat'] + np.typecodes['AllInteger'] + dt_complex = np.typecodes['Complex'] + + # test real + a = np.eye(3) + for dt in dt_numeric + 'O': + b = a.astype(dt) + res = np.vdot(b, b) + assert_(np.isscalar(res)) + assert_equal(np.vdot(b, b), 3) + + # test complex + a = np.eye(3) * 1j + for dt in dt_complex + 'O': + b = a.astype(dt) + res = np.vdot(b, b) + assert_(np.isscalar(res)) + assert_equal(np.vdot(b, b), 3) + + # test boolean + b = np.eye(3, dtype=bool) + res = np.vdot(b, b) + assert_(np.isscalar(res)) + assert_equal(np.vdot(b, b), True) + + def test_vdot_array_order(self): + a = np.array([[1, 2], [3, 4]], order='C') + b = np.array([[1, 2], [3, 4]], order='F') + res = np.vdot(a, a) + + # integer arrays are exact + assert_equal(np.vdot(a, b), res) + assert_equal(np.vdot(b, a), res) + assert_equal(np.vdot(b, b), res) + + def test_vdot_uncontiguous(self): + for size in [2, 1000]: + # Different sizes match different branches in vdot. + a = np.zeros((size, 2, 2)) + b = np.zeros((size, 2, 2)) + a[:, 0, 0] = np.arange(size) + b[:, 0, 0] = np.arange(size) + 1 + # Make a and b uncontiguous: + a = a[..., 0] + b = b[..., 0] + + assert_equal(np.vdot(a, b), + np.vdot(a.flatten(), b.flatten())) + assert_equal(np.vdot(a, b.copy()), + np.vdot(a.flatten(), b.flatten())) + assert_equal(np.vdot(a.copy(), b), + np.vdot(a.flatten(), b.flatten())) + assert_equal(np.vdot(a.copy('F'), b), + np.vdot(a.flatten(), b.flatten())) + assert_equal(np.vdot(a, b.copy('F')), + np.vdot(a.flatten(), b.flatten())) + + +class TestDot: + def setup(self): + np.random.seed(128) + self.A = np.random.rand(4, 2) + self.b1 = np.random.rand(2, 1) + self.b2 = np.random.rand(2) + self.b3 = np.random.rand(1, 2) + self.b4 = np.random.rand(4) + self.N = 7 + + def test_dotmatmat(self): + A = self.A + res = np.dot(A.transpose(), A) + tgt = np.array([[1.45046013, 0.86323640], + [0.86323640, 0.84934569]]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotmatvec(self): + A, b1 = self.A, self.b1 + res = np.dot(A, b1) + tgt = np.array([[0.32114320], [0.04889721], + [0.15696029], [0.33612621]]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotmatvec2(self): + A, b2 = self.A, self.b2 + res = np.dot(A, b2) + tgt = np.array([0.29677940, 0.04518649, 0.14468333, 0.31039293]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotvecmat(self): + A, b4 = self.A, self.b4 + res = np.dot(b4, A) + tgt = np.array([1.23495091, 1.12222648]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotvecmat2(self): + b3, A = self.b3, self.A + res = np.dot(b3, A.transpose()) + tgt = np.array([[0.58793804, 0.08957460, 0.30605758, 0.62716383]]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotvecmat3(self): + A, b4 = self.A, self.b4 + res = np.dot(A.transpose(), b4) + tgt = np.array([1.23495091, 1.12222648]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotvecvecouter(self): + b1, b3 = self.b1, self.b3 + res = np.dot(b1, b3) + tgt = np.array([[0.20128610, 0.08400440], [0.07190947, 0.03001058]]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotvecvecinner(self): + b1, b3 = self.b1, self.b3 + res = np.dot(b3, b1) + tgt = np.array([[ 0.23129668]]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotcolumnvect1(self): + b1 = np.ones((3, 1)) + b2 = [5.3] + res = np.dot(b1, b2) + tgt = np.array([5.3, 5.3, 5.3]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotcolumnvect2(self): + b1 = np.ones((3, 1)).transpose() + b2 = [6.2] + res = np.dot(b2, b1) + tgt = np.array([6.2, 6.2, 6.2]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotvecscalar(self): + np.random.seed(100) + b1 = np.random.rand(1, 1) + b2 = np.random.rand(1, 4) + res = np.dot(b1, b2) + tgt = np.array([[0.15126730, 0.23068496, 0.45905553, 0.00256425]]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotvecscalar2(self): + np.random.seed(100) + b1 = np.random.rand(4, 1) + b2 = np.random.rand(1, 1) + res = np.dot(b1, b2) + tgt = np.array([[0.00256425],[0.00131359],[0.00200324],[ 0.00398638]]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_all(self): + dims = [(), (1,), (1, 1)] + dout = [(), (1,), (1, 1), (1,), (), (1,), (1, 1), (1,), (1, 1)] + for dim, (dim1, dim2) in zip(dout, itertools.product(dims, dims)): + b1 = np.zeros(dim1) + b2 = np.zeros(dim2) + res = np.dot(b1, b2) + tgt = np.zeros(dim) + assert_(res.shape == tgt.shape) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_vecobject(self): + class Vec: + def __init__(self, sequence=None): + if sequence is None: + sequence = [] + self.array = np.array(sequence) + + def __add__(self, other): + out = Vec() + out.array = self.array + other.array + return out + + def __sub__(self, other): + out = Vec() + out.array = self.array - other.array + return out + + def __mul__(self, other): # with scalar + out = Vec(self.array.copy()) + out.array *= other + return out + + def __rmul__(self, other): + return self*other + + U_non_cont = np.transpose([[1., 1.], [1., 2.]]) + U_cont = np.ascontiguousarray(U_non_cont) + x = np.array([Vec([1., 0.]), Vec([0., 1.])]) + zeros = np.array([Vec([0., 0.]), Vec([0., 0.])]) + zeros_test = np.dot(U_cont, x) - np.dot(U_non_cont, x) + assert_equal(zeros[0].array, zeros_test[0].array) + assert_equal(zeros[1].array, zeros_test[1].array) + + def test_dot_2args(self): + from numpy.core.multiarray import dot + + a = np.array([[1, 2], [3, 4]], dtype=float) + b = np.array([[1, 0], [1, 1]], dtype=float) + c = np.array([[3, 2], [7, 4]], dtype=float) + + d = dot(a, b) + assert_allclose(c, d) + + def test_dot_3args(self): + from numpy.core.multiarray import dot + + np.random.seed(22) + f = np.random.random_sample((1024, 16)) + v = np.random.random_sample((16, 32)) + + r = np.empty((1024, 32)) + for i in range(12): + dot(f, v, r) + if HAS_REFCOUNT: + assert_equal(sys.getrefcount(r), 2) + r2 = dot(f, v, out=None) + assert_array_equal(r2, r) + assert_(r is dot(f, v, out=r)) + + v = v[:, 0].copy() # v.shape == (16,) + r = r[:, 0].copy() # r.shape == (1024,) + r2 = dot(f, v) + assert_(r is dot(f, v, r)) + assert_array_equal(r2, r) + + def test_dot_3args_errors(self): + from numpy.core.multiarray import dot + + np.random.seed(22) + f = np.random.random_sample((1024, 16)) + v = np.random.random_sample((16, 32)) + + r = np.empty((1024, 31)) + assert_raises(ValueError, dot, f, v, r) + + r = np.empty((1024,)) + assert_raises(ValueError, dot, f, v, r) + + r = np.empty((32,)) + assert_raises(ValueError, dot, f, v, r) + + r = np.empty((32, 1024)) + assert_raises(ValueError, dot, f, v, r) + assert_raises(ValueError, dot, f, v, r.T) + + r = np.empty((1024, 64)) + assert_raises(ValueError, dot, f, v, r[:, ::2]) + assert_raises(ValueError, dot, f, v, r[:, :32]) + + r = np.empty((1024, 32), dtype=np.float32) + assert_raises(ValueError, dot, f, v, r) + + r = np.empty((1024, 32), dtype=int) + assert_raises(ValueError, dot, f, v, r) + + def test_dot_array_order(self): + a = np.array([[1, 2], [3, 4]], order='C') + b = np.array([[1, 2], [3, 4]], order='F') + res = np.dot(a, a) + + # integer arrays are exact + assert_equal(np.dot(a, b), res) + assert_equal(np.dot(b, a), res) + assert_equal(np.dot(b, b), res) + + def test_accelerate_framework_sgemv_fix(self): + + def aligned_array(shape, align, dtype, order='C'): + d = dtype(0) + N = np.prod(shape) + tmp = np.zeros(N * d.nbytes + align, dtype=np.uint8) + address = tmp.__array_interface__["data"][0] + for offset in range(align): + if (address + offset) % align == 0: + break + tmp = tmp[offset:offset+N*d.nbytes].view(dtype=dtype) + return tmp.reshape(shape, order=order) + + def as_aligned(arr, align, dtype, order='C'): + aligned = aligned_array(arr.shape, align, dtype, order) + aligned[:] = arr[:] + return aligned + + def assert_dot_close(A, X, desired): + assert_allclose(np.dot(A, X), desired, rtol=1e-5, atol=1e-7) + + m = aligned_array(100, 15, np.float32) + s = aligned_array((100, 100), 15, np.float32) + np.dot(s, m) # this will always segfault if the bug is present + + testdata = itertools.product((15, 32), (10000,), (200, 89), ('C', 'F')) + for align, m, n, a_order in testdata: + # Calculation in double precision + A_d = np.random.rand(m, n) + X_d = np.random.rand(n) + desired = np.dot(A_d, X_d) + # Calculation with aligned single precision + A_f = as_aligned(A_d, align, np.float32, order=a_order) + X_f = as_aligned(X_d, align, np.float32) + assert_dot_close(A_f, X_f, desired) + # Strided A rows + A_d_2 = A_d[::2] + desired = np.dot(A_d_2, X_d) + A_f_2 = A_f[::2] + assert_dot_close(A_f_2, X_f, desired) + # Strided A columns, strided X vector + A_d_22 = A_d_2[:, ::2] + X_d_2 = X_d[::2] + desired = np.dot(A_d_22, X_d_2) + A_f_22 = A_f_2[:, ::2] + X_f_2 = X_f[::2] + assert_dot_close(A_f_22, X_f_2, desired) + # Check the strides are as expected + if a_order == 'F': + assert_equal(A_f_22.strides, (8, 8 * m)) + else: + assert_equal(A_f_22.strides, (8 * n, 8)) + assert_equal(X_f_2.strides, (8,)) + # Strides in A rows + cols only + X_f_2c = as_aligned(X_f_2, align, np.float32) + assert_dot_close(A_f_22, X_f_2c, desired) + # Strides just in A cols + A_d_12 = A_d[:, ::2] + desired = np.dot(A_d_12, X_d_2) + A_f_12 = A_f[:, ::2] + assert_dot_close(A_f_12, X_f_2c, desired) + # Strides in A cols and X + assert_dot_close(A_f_12, X_f_2, desired) + + + +class MatmulCommon: + """Common tests for '@' operator and numpy.matmul. + + """ + # Should work with these types. Will want to add + # "O" at some point + types = "?bhilqBHILQefdgFDGO" + + def test_exceptions(self): + dims = [ + ((1,), (2,)), # mismatched vector vector + ((2, 1,), (2,)), # mismatched matrix vector + ((2,), (1, 2)), # mismatched vector matrix + ((1, 2), (3, 1)), # mismatched matrix matrix + ((1,), ()), # vector scalar + ((), (1)), # scalar vector + ((1, 1), ()), # matrix scalar + ((), (1, 1)), # scalar matrix + ((2, 2, 1), (3, 1, 2)), # cannot broadcast + ] + + for dt, (dm1, dm2) in itertools.product(self.types, dims): + a = np.ones(dm1, dtype=dt) + b = np.ones(dm2, dtype=dt) + assert_raises(ValueError, self.matmul, a, b) + + def test_shapes(self): + dims = [ + ((1, 1), (2, 1, 1)), # broadcast first argument + ((2, 1, 1), (1, 1)), # broadcast second argument + ((2, 1, 1), (2, 1, 1)), # matrix stack sizes match + ] + + for dt, (dm1, dm2) in itertools.product(self.types, dims): + a = np.ones(dm1, dtype=dt) + b = np.ones(dm2, dtype=dt) + res = self.matmul(a, b) + assert_(res.shape == (2, 1, 1)) + + # vector vector returns scalars. + for dt in self.types: + a = np.ones((2,), dtype=dt) + b = np.ones((2,), dtype=dt) + c = self.matmul(a, b) + assert_(np.array(c).shape == ()) + + def test_result_types(self): + mat = np.ones((1,1)) + vec = np.ones((1,)) + for dt in self.types: + m = mat.astype(dt) + v = vec.astype(dt) + for arg in [(m, v), (v, m), (m, m)]: + res = self.matmul(*arg) + assert_(res.dtype == dt) + + # vector vector returns scalars + if dt != "O": + res = self.matmul(v, v) + assert_(type(res) is np.dtype(dt).type) + + def test_scalar_output(self): + vec1 = np.array([2]) + vec2 = np.array([3, 4]).reshape(1, -1) + tgt = np.array([6, 8]) + for dt in self.types[1:]: + v1 = vec1.astype(dt) + v2 = vec2.astype(dt) + res = self.matmul(v1, v2) + assert_equal(res, tgt) + res = self.matmul(v2.T, v1) + assert_equal(res, tgt) + + # boolean type + vec = np.array([True, True], dtype='?').reshape(1, -1) + res = self.matmul(vec[:, 0], vec) + assert_equal(res, True) + + def test_vector_vector_values(self): + vec1 = np.array([1, 2]) + vec2 = np.array([3, 4]).reshape(-1, 1) + tgt1 = np.array([11]) + tgt2 = np.array([[3, 6], [4, 8]]) + for dt in self.types[1:]: + v1 = vec1.astype(dt) + v2 = vec2.astype(dt) + res = self.matmul(v1, v2) + assert_equal(res, tgt1) + # no broadcast, we must make v1 into a 2d ndarray + res = self.matmul(v2, v1.reshape(1, -1)) + assert_equal(res, tgt2) + + # boolean type + vec = np.array([True, True], dtype='?') + res = self.matmul(vec, vec) + assert_equal(res, True) + + def test_vector_matrix_values(self): + vec = np.array([1, 2]) + mat1 = np.array([[1, 2], [3, 4]]) + mat2 = np.stack([mat1]*2, axis=0) + tgt1 = np.array([7, 10]) + tgt2 = np.stack([tgt1]*2, axis=0) + for dt in self.types[1:]: + v = vec.astype(dt) + m1 = mat1.astype(dt) + m2 = mat2.astype(dt) + res = self.matmul(v, m1) + assert_equal(res, tgt1) + res = self.matmul(v, m2) + assert_equal(res, tgt2) + + # boolean type + vec = np.array([True, False]) + mat1 = np.array([[True, False], [False, True]]) + mat2 = np.stack([mat1]*2, axis=0) + tgt1 = np.array([True, False]) + tgt2 = np.stack([tgt1]*2, axis=0) + + res = self.matmul(vec, mat1) + assert_equal(res, tgt1) + res = self.matmul(vec, mat2) + assert_equal(res, tgt2) + + def test_matrix_vector_values(self): + vec = np.array([1, 2]) + mat1 = np.array([[1, 2], [3, 4]]) + mat2 = np.stack([mat1]*2, axis=0) + tgt1 = np.array([5, 11]) + tgt2 = np.stack([tgt1]*2, axis=0) + for dt in self.types[1:]: + v = vec.astype(dt) + m1 = mat1.astype(dt) + m2 = mat2.astype(dt) + res = self.matmul(m1, v) + assert_equal(res, tgt1) + res = self.matmul(m2, v) + assert_equal(res, tgt2) + + # boolean type + vec = np.array([True, False]) + mat1 = np.array([[True, False], [False, True]]) + mat2 = np.stack([mat1]*2, axis=0) + tgt1 = np.array([True, False]) + tgt2 = np.stack([tgt1]*2, axis=0) + + res = self.matmul(vec, mat1) + assert_equal(res, tgt1) + res = self.matmul(vec, mat2) + assert_equal(res, tgt2) + + def test_matrix_matrix_values(self): + mat1 = np.array([[1, 2], [3, 4]]) + mat2 = np.array([[1, 0], [1, 1]]) + mat12 = np.stack([mat1, mat2], axis=0) + mat21 = np.stack([mat2, mat1], axis=0) + tgt11 = np.array([[7, 10], [15, 22]]) + tgt12 = np.array([[3, 2], [7, 4]]) + tgt21 = np.array([[1, 2], [4, 6]]) + tgt12_21 = np.stack([tgt12, tgt21], axis=0) + tgt11_12 = np.stack((tgt11, tgt12), axis=0) + tgt11_21 = np.stack((tgt11, tgt21), axis=0) + for dt in self.types[1:]: + m1 = mat1.astype(dt) + m2 = mat2.astype(dt) + m12 = mat12.astype(dt) + m21 = mat21.astype(dt) + + # matrix @ matrix + res = self.matmul(m1, m2) + assert_equal(res, tgt12) + res = self.matmul(m2, m1) + assert_equal(res, tgt21) + + # stacked @ matrix + res = self.matmul(m12, m1) + assert_equal(res, tgt11_21) + + # matrix @ stacked + res = self.matmul(m1, m12) + assert_equal(res, tgt11_12) + + # stacked @ stacked + res = self.matmul(m12, m21) + assert_equal(res, tgt12_21) + + # boolean type + m1 = np.array([[1, 1], [0, 0]], dtype=np.bool_) + m2 = np.array([[1, 0], [1, 1]], dtype=np.bool_) + m12 = np.stack([m1, m2], axis=0) + m21 = np.stack([m2, m1], axis=0) + tgt11 = m1 + tgt12 = m1 + tgt21 = np.array([[1, 1], [1, 1]], dtype=np.bool_) + tgt12_21 = np.stack([tgt12, tgt21], axis=0) + tgt11_12 = np.stack((tgt11, tgt12), axis=0) + tgt11_21 = np.stack((tgt11, tgt21), axis=0) + + # matrix @ matrix + res = self.matmul(m1, m2) + assert_equal(res, tgt12) + res = self.matmul(m2, m1) + assert_equal(res, tgt21) + + # stacked @ matrix + res = self.matmul(m12, m1) + assert_equal(res, tgt11_21) + + # matrix @ stacked + res = self.matmul(m1, m12) + assert_equal(res, tgt11_12) + + # stacked @ stacked + res = self.matmul(m12, m21) + assert_equal(res, tgt12_21) + + +class TestMatmul(MatmulCommon): + matmul = np.matmul + + def test_out_arg(self): + a = np.ones((5, 2), dtype=float) + b = np.array([[1, 3], [5, 7]], dtype=float) + tgt = np.dot(a, b) + + # test as positional argument + msg = "out positional argument" + out = np.zeros((5, 2), dtype=float) + self.matmul(a, b, out) + assert_array_equal(out, tgt, err_msg=msg) + + # test as keyword argument + msg = "out keyword argument" + out = np.zeros((5, 2), dtype=float) + self.matmul(a, b, out=out) + assert_array_equal(out, tgt, err_msg=msg) + + # test out with not allowed type cast (safe casting) + msg = "Cannot cast ufunc .* output" + out = np.zeros((5, 2), dtype=np.int32) + assert_raises_regex(TypeError, msg, self.matmul, a, b, out=out) + + # test out with type upcast to complex + out = np.zeros((5, 2), dtype=np.complex128) + c = self.matmul(a, b, out=out) + assert_(c is out) + with suppress_warnings() as sup: + sup.filter(np.ComplexWarning, '') + c = c.astype(tgt.dtype) + assert_array_equal(c, tgt) + + def test_empty_out(self): + # Check that the output cannot be broadcast, so that it cannot be + # size zero when the outer dimensions (iterator size) has size zero. + arr = np.ones((0, 1, 1)) + out = np.ones((1, 1, 1)) + assert self.matmul(arr, arr).shape == (0, 1, 1) + + with pytest.raises(ValueError, match=r"non-broadcastable"): + self.matmul(arr, arr, out=out) + + def test_out_contiguous(self): + a = np.ones((5, 2), dtype=float) + b = np.array([[1, 3], [5, 7]], dtype=float) + v = np.array([1, 3], dtype=float) + tgt = np.dot(a, b) + tgt_mv = np.dot(a, v) + + # test out non-contiguous + out = np.ones((5, 2, 2), dtype=float) + c = self.matmul(a, b, out=out[..., 0]) + assert c.base is out + assert_array_equal(c, tgt) + c = self.matmul(a, v, out=out[:, 0, 0]) + assert_array_equal(c, tgt_mv) + c = self.matmul(v, a.T, out=out[:, 0, 0]) + assert_array_equal(c, tgt_mv) + + # test out contiguous in only last dim + out = np.ones((10, 2), dtype=float) + c = self.matmul(a, b, out=out[::2, :]) + assert_array_equal(c, tgt) + + # test transposes of out, args + out = np.ones((5, 2), dtype=float) + c = self.matmul(b.T, a.T, out=out.T) + assert_array_equal(out, tgt) + + m1 = np.arange(15.).reshape(5, 3) + m2 = np.arange(21.).reshape(3, 7) + m3 = np.arange(30.).reshape(5, 6)[:, ::2] # non-contiguous + vc = np.arange(10.) + vr = np.arange(6.) + m0 = np.zeros((3, 0)) + @pytest.mark.parametrize('args', ( + # matrix-matrix + (m1, m2), (m2.T, m1.T), (m2.T.copy(), m1.T), (m2.T, m1.T.copy()), + # matrix-matrix-transpose, contiguous and non + (m1, m1.T), (m1.T, m1), (m1, m3.T), (m3, m1.T), + (m3, m3.T), (m3.T, m3), + # matrix-matrix non-contiguous + (m3, m2), (m2.T, m3.T), (m2.T.copy(), m3.T), + # vector-matrix, matrix-vector, contiguous + (m1, vr[:3]), (vc[:5], m1), (m1.T, vc[:5]), (vr[:3], m1.T), + # vector-matrix, matrix-vector, vector non-contiguous + (m1, vr[::2]), (vc[::2], m1), (m1.T, vc[::2]), (vr[::2], m1.T), + # vector-matrix, matrix-vector, matrix non-contiguous + (m3, vr[:3]), (vc[:5], m3), (m3.T, vc[:5]), (vr[:3], m3.T), + # vector-matrix, matrix-vector, both non-contiguous + (m3, vr[::2]), (vc[::2], m3), (m3.T, vc[::2]), (vr[::2], m3.T), + # size == 0 + (m0, m0.T), (m0.T, m0), (m1, m0), (m0.T, m1.T), + )) + def test_dot_equivalent(self, args): + r1 = np.matmul(*args) + r2 = np.dot(*args) + assert_equal(r1, r2) + + r3 = np.matmul(args[0].copy(), args[1].copy()) + assert_equal(r1, r3) + + def test_matmul_object(self): + import fractions + + f = np.vectorize(fractions.Fraction) + def random_ints(): + return np.random.randint(1, 1000, size=(10, 3, 3)) + M1 = f(random_ints(), random_ints()) + M2 = f(random_ints(), random_ints()) + + M3 = self.matmul(M1, M2) + + [N1, N2, N3] = [a.astype(float) for a in [M1, M2, M3]] + + assert_allclose(N3, self.matmul(N1, N2)) + + def test_matmul_object_type_scalar(self): + from fractions import Fraction as F + v = np.array([F(2,3), F(5,7)]) + res = self.matmul(v, v) + assert_(type(res) is F) + + def test_matmul_empty(self): + a = np.empty((3, 0), dtype=object) + b = np.empty((0, 3), dtype=object) + c = np.zeros((3, 3)) + assert_array_equal(np.matmul(a, b), c) + + def test_matmul_exception_multiply(self): + # test that matmul fails if `__mul__` is missing + class add_not_multiply(): + def __add__(self, other): + return self + a = np.full((3,3), add_not_multiply()) + with assert_raises(TypeError): + b = np.matmul(a, a) + + def test_matmul_exception_add(self): + # test that matmul fails if `__add__` is missing + class multiply_not_add(): + def __mul__(self, other): + return self + a = np.full((3,3), multiply_not_add()) + with assert_raises(TypeError): + b = np.matmul(a, a) + + def test_matmul_bool(self): + # gh-14439 + a = np.array([[1, 0],[1, 1]], dtype=bool) + assert np.max(a.view(np.uint8)) == 1 + b = np.matmul(a, a) + # matmul with boolean output should always be 0, 1 + assert np.max(b.view(np.uint8)) == 1 + + rg = np.random.default_rng(np.random.PCG64(43)) + d = rg.integers(2, size=4*5, dtype=np.int8) + d = d.reshape(4, 5) > 0 + out1 = np.matmul(d, d.reshape(5, 4)) + out2 = np.dot(d, d.reshape(5, 4)) + assert_equal(out1, out2) + + c = np.matmul(np.zeros((2, 0), dtype=bool), np.zeros(0, dtype=bool)) + assert not np.any(c) + + +class TestMatmulOperator(MatmulCommon): + import operator + matmul = operator.matmul + + def test_array_priority_override(self): + + class A: + __array_priority__ = 1000 + + def __matmul__(self, other): + return "A" + + def __rmatmul__(self, other): + return "A" + + a = A() + b = np.ones(2) + assert_equal(self.matmul(a, b), "A") + assert_equal(self.matmul(b, a), "A") + + def test_matmul_raises(self): + assert_raises(TypeError, self.matmul, np.int8(5), np.int8(5)) + assert_raises(TypeError, self.matmul, np.void(b'abc'), np.void(b'abc')) + assert_raises(TypeError, self.matmul, np.arange(10), np.void(b'abc')) + +def test_matmul_inplace(): + # It would be nice to support in-place matmul eventually, but for now + # we don't have a working implementation, so better just to error out + # and nudge people to writing "a = a @ b". + a = np.eye(3) + b = np.eye(3) + assert_raises(TypeError, a.__imatmul__, b) + import operator + assert_raises(TypeError, operator.imatmul, a, b) + assert_raises(TypeError, exec, "a @= b", globals(), locals()) + +def test_matmul_axes(): + a = np.arange(3*4*5).reshape(3, 4, 5) + c = np.matmul(a, a, axes=[(-2, -1), (-1, -2), (1, 2)]) + assert c.shape == (3, 4, 4) + d = np.matmul(a, a, axes=[(-2, -1), (-1, -2), (0, 1)]) + assert d.shape == (4, 4, 3) + e = np.swapaxes(d, 0, 2) + assert_array_equal(e, c) + f = np.matmul(a, np.arange(3), axes=[(1, 0), (0), (0)]) + assert f.shape == (4, 5) + + +class TestInner: + + def test_inner_type_mismatch(self): + c = 1. + A = np.array((1,1), dtype='i,i') + + assert_raises(TypeError, np.inner, c, A) + assert_raises(TypeError, np.inner, A, c) + + def test_inner_scalar_and_vector(self): + for dt in np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + '?': + sca = np.array(3, dtype=dt)[()] + vec = np.array([1, 2], dtype=dt) + desired = np.array([3, 6], dtype=dt) + assert_equal(np.inner(vec, sca), desired) + assert_equal(np.inner(sca, vec), desired) + + def test_vecself(self): + # Ticket 844. + # Inner product of a vector with itself segfaults or give + # meaningless result + a = np.zeros(shape=(1, 80), dtype=np.float64) + p = np.inner(a, a) + assert_almost_equal(p, 0, decimal=14) + + def test_inner_product_with_various_contiguities(self): + # github issue 6532 + for dt in np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + '?': + # check an inner product involving a matrix transpose + A = np.array([[1, 2], [3, 4]], dtype=dt) + B = np.array([[1, 3], [2, 4]], dtype=dt) + C = np.array([1, 1], dtype=dt) + desired = np.array([4, 6], dtype=dt) + assert_equal(np.inner(A.T, C), desired) + assert_equal(np.inner(C, A.T), desired) + assert_equal(np.inner(B, C), desired) + assert_equal(np.inner(C, B), desired) + # check a matrix product + desired = np.array([[7, 10], [15, 22]], dtype=dt) + assert_equal(np.inner(A, B), desired) + # check the syrk vs. gemm paths + desired = np.array([[5, 11], [11, 25]], dtype=dt) + assert_equal(np.inner(A, A), desired) + assert_equal(np.inner(A, A.copy()), desired) + # check an inner product involving an aliased and reversed view + a = np.arange(5).astype(dt) + b = a[::-1] + desired = np.array(10, dtype=dt).item() + assert_equal(np.inner(b, a), desired) + + def test_3d_tensor(self): + for dt in np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + '?': + a = np.arange(24).reshape(2,3,4).astype(dt) + b = np.arange(24, 48).reshape(2,3,4).astype(dt) + desired = np.array( + [[[[ 158, 182, 206], + [ 230, 254, 278]], + + [[ 566, 654, 742], + [ 830, 918, 1006]], + + [[ 974, 1126, 1278], + [1430, 1582, 1734]]], + + [[[1382, 1598, 1814], + [2030, 2246, 2462]], + + [[1790, 2070, 2350], + [2630, 2910, 3190]], + + [[2198, 2542, 2886], + [3230, 3574, 3918]]]], + dtype=dt + ) + assert_equal(np.inner(a, b), desired) + assert_equal(np.inner(b, a).transpose(2,3,0,1), desired) + + +class TestAlen: + def test_basic(self): + with pytest.warns(DeprecationWarning): + m = np.array([1, 2, 3]) + assert_equal(np.alen(m), 3) + + m = np.array([[1, 2, 3], [4, 5, 7]]) + assert_equal(np.alen(m), 2) + + m = [1, 2, 3] + assert_equal(np.alen(m), 3) + + m = [[1, 2, 3], [4, 5, 7]] + assert_equal(np.alen(m), 2) + + def test_singleton(self): + with pytest.warns(DeprecationWarning): + assert_equal(np.alen(5), 1) + + +class TestChoose: + def setup(self): + self.x = 2*np.ones((3,), dtype=int) + self.y = 3*np.ones((3,), dtype=int) + self.x2 = 2*np.ones((2, 3), dtype=int) + self.y2 = 3*np.ones((2, 3), dtype=int) + self.ind = [0, 0, 1] + + def test_basic(self): + A = np.choose(self.ind, (self.x, self.y)) + assert_equal(A, [2, 2, 3]) + + def test_broadcast1(self): + A = np.choose(self.ind, (self.x2, self.y2)) + assert_equal(A, [[2, 2, 3], [2, 2, 3]]) + + def test_broadcast2(self): + A = np.choose(self.ind, (self.x, self.y2)) + assert_equal(A, [[2, 2, 3], [2, 2, 3]]) + + @pytest.mark.parametrize("ops", + [(1000, np.array([1], dtype=np.uint8)), + (-1, np.array([1], dtype=np.uint8)), + (1., np.float32(3)), + (1., np.array([3], dtype=np.float32))],) + def test_output_dtype(self, ops): + expected_dt = np.result_type(*ops) + assert(np.choose([0], ops).dtype == expected_dt) + + +class TestRepeat: + def setup(self): + self.m = np.array([1, 2, 3, 4, 5, 6]) + self.m_rect = self.m.reshape((2, 3)) + + def test_basic(self): + A = np.repeat(self.m, [1, 3, 2, 1, 1, 2]) + assert_equal(A, [1, 2, 2, 2, 3, + 3, 4, 5, 6, 6]) + + def test_broadcast1(self): + A = np.repeat(self.m, 2) + assert_equal(A, [1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6]) + + def test_axis_spec(self): + A = np.repeat(self.m_rect, [2, 1], axis=0) + assert_equal(A, [[1, 2, 3], + [1, 2, 3], + [4, 5, 6]]) + + A = np.repeat(self.m_rect, [1, 3, 2], axis=1) + assert_equal(A, [[1, 2, 2, 2, 3, 3], + [4, 5, 5, 5, 6, 6]]) + + def test_broadcast2(self): + A = np.repeat(self.m_rect, 2, axis=0) + assert_equal(A, [[1, 2, 3], + [1, 2, 3], + [4, 5, 6], + [4, 5, 6]]) + + A = np.repeat(self.m_rect, 2, axis=1) + assert_equal(A, [[1, 1, 2, 2, 3, 3], + [4, 4, 5, 5, 6, 6]]) + + +# TODO: test for multidimensional +NEIGH_MODE = {'zero': 0, 'one': 1, 'constant': 2, 'circular': 3, 'mirror': 4} + + +@pytest.mark.parametrize('dt', [float, Decimal], ids=['float', 'object']) +class TestNeighborhoodIter: + # Simple, 2d tests + def test_simple2d(self, dt): + # Test zero and one padding for simple data type + x = np.array([[0, 1], [2, 3]], dtype=dt) + r = [np.array([[0, 0, 0], [0, 0, 1]], dtype=dt), + np.array([[0, 0, 0], [0, 1, 0]], dtype=dt), + np.array([[0, 0, 1], [0, 2, 3]], dtype=dt), + np.array([[0, 1, 0], [2, 3, 0]], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 0, -1, 1], x[0], NEIGH_MODE['zero']) + assert_array_equal(l, r) + + r = [np.array([[1, 1, 1], [1, 0, 1]], dtype=dt), + np.array([[1, 1, 1], [0, 1, 1]], dtype=dt), + np.array([[1, 0, 1], [1, 2, 3]], dtype=dt), + np.array([[0, 1, 1], [2, 3, 1]], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 0, -1, 1], x[0], NEIGH_MODE['one']) + assert_array_equal(l, r) + + r = [np.array([[4, 4, 4], [4, 0, 1]], dtype=dt), + np.array([[4, 4, 4], [0, 1, 4]], dtype=dt), + np.array([[4, 0, 1], [4, 2, 3]], dtype=dt), + np.array([[0, 1, 4], [2, 3, 4]], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 0, -1, 1], 4, NEIGH_MODE['constant']) + assert_array_equal(l, r) + + # Test with start in the middle + r = [np.array([[4, 0, 1], [4, 2, 3]], dtype=dt), + np.array([[0, 1, 4], [2, 3, 4]], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 0, -1, 1], 4, NEIGH_MODE['constant'], 2) + assert_array_equal(l, r) + + def test_mirror2d(self, dt): + x = np.array([[0, 1], [2, 3]], dtype=dt) + r = [np.array([[0, 0, 1], [0, 0, 1]], dtype=dt), + np.array([[0, 1, 1], [0, 1, 1]], dtype=dt), + np.array([[0, 0, 1], [2, 2, 3]], dtype=dt), + np.array([[0, 1, 1], [2, 3, 3]], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 0, -1, 1], x[0], NEIGH_MODE['mirror']) + assert_array_equal(l, r) + + # Simple, 1d tests + def test_simple(self, dt): + # Test padding with constant values + x = np.linspace(1, 5, 5).astype(dt) + r = [[0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 0]] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 1], x[0], NEIGH_MODE['zero']) + assert_array_equal(l, r) + + r = [[1, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 1]] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 1], x[0], NEIGH_MODE['one']) + assert_array_equal(l, r) + + r = [[x[4], 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, x[4]]] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 1], x[4], NEIGH_MODE['constant']) + assert_array_equal(l, r) + + # Test mirror modes + def test_mirror(self, dt): + x = np.linspace(1, 5, 5).astype(dt) + r = np.array([[2, 1, 1, 2, 3], [1, 1, 2, 3, 4], [1, 2, 3, 4, 5], + [2, 3, 4, 5, 5], [3, 4, 5, 5, 4]], dtype=dt) + l = _multiarray_tests.test_neighborhood_iterator( + x, [-2, 2], x[1], NEIGH_MODE['mirror']) + assert_([i.dtype == dt for i in l]) + assert_array_equal(l, r) + + # Circular mode + def test_circular(self, dt): + x = np.linspace(1, 5, 5).astype(dt) + r = np.array([[4, 5, 1, 2, 3], [5, 1, 2, 3, 4], [1, 2, 3, 4, 5], + [2, 3, 4, 5, 1], [3, 4, 5, 1, 2]], dtype=dt) + l = _multiarray_tests.test_neighborhood_iterator( + x, [-2, 2], x[0], NEIGH_MODE['circular']) + assert_array_equal(l, r) + + +# Test stacking neighborhood iterators +class TestStackedNeighborhoodIter: + # Simple, 1d test: stacking 2 constant-padded neigh iterators + def test_simple_const(self): + dt = np.float64 + # Test zero and one padding for simple data type + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([0], dtype=dt), + np.array([0], dtype=dt), + np.array([1], dtype=dt), + np.array([2], dtype=dt), + np.array([3], dtype=dt), + np.array([0], dtype=dt), + np.array([0], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-2, 4], NEIGH_MODE['zero'], [0, 0], NEIGH_MODE['zero']) + assert_array_equal(l, r) + + r = [np.array([1, 0, 1], dtype=dt), + np.array([0, 1, 2], dtype=dt), + np.array([1, 2, 3], dtype=dt), + np.array([2, 3, 0], dtype=dt), + np.array([3, 0, 1], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['zero'], [-1, 1], NEIGH_MODE['one']) + assert_array_equal(l, r) + + # 2nd simple, 1d test: stacking 2 neigh iterators, mixing const padding and + # mirror padding + def test_simple_mirror(self): + dt = np.float64 + # Stacking zero on top of mirror + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([0, 1, 1], dtype=dt), + np.array([1, 1, 2], dtype=dt), + np.array([1, 2, 3], dtype=dt), + np.array([2, 3, 3], dtype=dt), + np.array([3, 3, 0], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['mirror'], [-1, 1], NEIGH_MODE['zero']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([1, 0, 0], dtype=dt), + np.array([0, 0, 1], dtype=dt), + np.array([0, 1, 2], dtype=dt), + np.array([1, 2, 3], dtype=dt), + np.array([2, 3, 0], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['zero'], [-2, 0], NEIGH_MODE['mirror']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero: 2nd + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([0, 1, 2], dtype=dt), + np.array([1, 2, 3], dtype=dt), + np.array([2, 3, 0], dtype=dt), + np.array([3, 0, 0], dtype=dt), + np.array([0, 0, 3], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['zero'], [0, 2], NEIGH_MODE['mirror']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero: 3rd + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([1, 0, 0, 1, 2], dtype=dt), + np.array([0, 0, 1, 2, 3], dtype=dt), + np.array([0, 1, 2, 3, 0], dtype=dt), + np.array([1, 2, 3, 0, 0], dtype=dt), + np.array([2, 3, 0, 0, 3], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['zero'], [-2, 2], NEIGH_MODE['mirror']) + assert_array_equal(l, r) + + # 3rd simple, 1d test: stacking 2 neigh iterators, mixing const padding and + # circular padding + def test_simple_circular(self): + dt = np.float64 + # Stacking zero on top of mirror + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([0, 3, 1], dtype=dt), + np.array([3, 1, 2], dtype=dt), + np.array([1, 2, 3], dtype=dt), + np.array([2, 3, 1], dtype=dt), + np.array([3, 1, 0], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['circular'], [-1, 1], NEIGH_MODE['zero']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([3, 0, 0], dtype=dt), + np.array([0, 0, 1], dtype=dt), + np.array([0, 1, 2], dtype=dt), + np.array([1, 2, 3], dtype=dt), + np.array([2, 3, 0], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['zero'], [-2, 0], NEIGH_MODE['circular']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero: 2nd + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([0, 1, 2], dtype=dt), + np.array([1, 2, 3], dtype=dt), + np.array([2, 3, 0], dtype=dt), + np.array([3, 0, 0], dtype=dt), + np.array([0, 0, 1], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['zero'], [0, 2], NEIGH_MODE['circular']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero: 3rd + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([3, 0, 0, 1, 2], dtype=dt), + np.array([0, 0, 1, 2, 3], dtype=dt), + np.array([0, 1, 2, 3, 0], dtype=dt), + np.array([1, 2, 3, 0, 0], dtype=dt), + np.array([2, 3, 0, 0, 1], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['zero'], [-2, 2], NEIGH_MODE['circular']) + assert_array_equal(l, r) + + # 4th simple, 1d test: stacking 2 neigh iterators, but with lower iterator + # being strictly within the array + def test_simple_strict_within(self): + dt = np.float64 + # Stacking zero on top of zero, first neighborhood strictly inside the + # array + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([1, 2, 3, 0], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [1, 1], NEIGH_MODE['zero'], [-1, 2], NEIGH_MODE['zero']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero, first neighborhood strictly inside the + # array + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([1, 2, 3, 3], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [1, 1], NEIGH_MODE['zero'], [-1, 2], NEIGH_MODE['mirror']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero, first neighborhood strictly inside the + # array + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([1, 2, 3, 1], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [1, 1], NEIGH_MODE['zero'], [-1, 2], NEIGH_MODE['circular']) + assert_array_equal(l, r) + +class TestWarnings: + + def test_complex_warning(self): + x = np.array([1, 2]) + y = np.array([1-2j, 1+2j]) + + with warnings.catch_warnings(): + warnings.simplefilter("error", np.ComplexWarning) + assert_raises(np.ComplexWarning, x.__setitem__, slice(None), y) + assert_equal(x, [1, 2]) + + +class TestMinScalarType: + + def test_usigned_shortshort(self): + dt = np.min_scalar_type(2**8-1) + wanted = np.dtype('uint8') + assert_equal(wanted, dt) + + def test_usigned_short(self): + dt = np.min_scalar_type(2**16-1) + wanted = np.dtype('uint16') + assert_equal(wanted, dt) + + def test_usigned_int(self): + dt = np.min_scalar_type(2**32-1) + wanted = np.dtype('uint32') + assert_equal(wanted, dt) + + def test_usigned_longlong(self): + dt = np.min_scalar_type(2**63-1) + wanted = np.dtype('uint64') + assert_equal(wanted, dt) + + def test_object(self): + dt = np.min_scalar_type(2**64) + wanted = np.dtype('O') + assert_equal(wanted, dt) + + +from numpy.core._internal import _dtype_from_pep3118 + + +class TestPEP3118Dtype: + def _check(self, spec, wanted): + dt = np.dtype(wanted) + actual = _dtype_from_pep3118(spec) + assert_equal(actual, dt, + err_msg="spec %r != dtype %r" % (spec, wanted)) + + def test_native_padding(self): + align = np.dtype('i').alignment + for j in range(8): + if j == 0: + s = 'bi' + else: + s = 'b%dxi' % j + self._check('@'+s, {'f0': ('i1', 0), + 'f1': ('i', align*(1 + j//align))}) + self._check('='+s, {'f0': ('i1', 0), + 'f1': ('i', 1+j)}) + + def test_native_padding_2(self): + # Native padding should work also for structs and sub-arrays + self._check('x3T{xi}', {'f0': (({'f0': ('i', 4)}, (3,)), 4)}) + self._check('^x3T{xi}', {'f0': (({'f0': ('i', 1)}, (3,)), 1)}) + + def test_trailing_padding(self): + # Trailing padding should be included, *and*, the item size + # should match the alignment if in aligned mode + align = np.dtype('i').alignment + size = np.dtype('i').itemsize + + def aligned(n): + return align*(1 + (n-1)//align) + + base = dict(formats=['i'], names=['f0']) + + self._check('ix', dict(itemsize=aligned(size + 1), **base)) + self._check('ixx', dict(itemsize=aligned(size + 2), **base)) + self._check('ixxx', dict(itemsize=aligned(size + 3), **base)) + self._check('ixxxx', dict(itemsize=aligned(size + 4), **base)) + self._check('i7x', dict(itemsize=aligned(size + 7), **base)) + + self._check('^ix', dict(itemsize=size + 1, **base)) + self._check('^ixx', dict(itemsize=size + 2, **base)) + self._check('^ixxx', dict(itemsize=size + 3, **base)) + self._check('^ixxxx', dict(itemsize=size + 4, **base)) + self._check('^i7x', dict(itemsize=size + 7, **base)) + + def test_native_padding_3(self): + dt = np.dtype( + [('a', 'b'), ('b', 'i'), + ('sub', np.dtype('b,i')), ('c', 'i')], + align=True) + self._check("T{b:a:xxxi:b:T{b:f0:=i:f1:}:sub:xxxi:c:}", dt) + + dt = np.dtype( + [('a', 'b'), ('b', 'i'), ('c', 'b'), ('d', 'b'), + ('e', 'b'), ('sub', np.dtype('b,i', align=True))]) + self._check("T{b:a:=i:b:b:c:b:d:b:e:T{b:f0:xxxi:f1:}:sub:}", dt) + + def test_padding_with_array_inside_struct(self): + dt = np.dtype( + [('a', 'b'), ('b', 'i'), ('c', 'b', (3,)), + ('d', 'i')], + align=True) + self._check("T{b:a:xxxi:b:3b:c:xi:d:}", dt) + + def test_byteorder_inside_struct(self): + # The byte order after @T{=i} should be '=', not '@'. + # Check this by noting the absence of native alignment. + self._check('@T{^i}xi', {'f0': ({'f0': ('i', 0)}, 0), + 'f1': ('i', 5)}) + + def test_intra_padding(self): + # Natively aligned sub-arrays may require some internal padding + align = np.dtype('i').alignment + size = np.dtype('i').itemsize + + def aligned(n): + return (align*(1 + (n-1)//align)) + + self._check('(3)T{ix}', (dict( + names=['f0'], + formats=['i'], + offsets=[0], + itemsize=aligned(size + 1) + ), (3,))) + + def test_char_vs_string(self): + dt = np.dtype('c') + self._check('c', dt) + + dt = np.dtype([('f0', 'S1', (4,)), ('f1', 'S4')]) + self._check('4c4s', dt) + + def test_field_order(self): + # gh-9053 - previously, we relied on dictionary key order + self._check("(0)I:a:f:b:", [('a', 'I', (0,)), ('b', 'f')]) + self._check("(0)I:b:f:a:", [('b', 'I', (0,)), ('a', 'f')]) + + def test_unnamed_fields(self): + self._check('ii', [('f0', 'i'), ('f1', 'i')]) + self._check('ii:f0:', [('f1', 'i'), ('f0', 'i')]) + + self._check('i', 'i') + self._check('i:f0:', [('f0', 'i')]) + + +class TestNewBufferProtocol: + """ Test PEP3118 buffers """ + + def _check_roundtrip(self, obj): + obj = np.asarray(obj) + x = memoryview(obj) + y = np.asarray(x) + y2 = np.array(x) + assert_(not y.flags.owndata) + assert_(y2.flags.owndata) + + assert_equal(y.dtype, obj.dtype) + assert_equal(y.shape, obj.shape) + assert_array_equal(obj, y) + + assert_equal(y2.dtype, obj.dtype) + assert_equal(y2.shape, obj.shape) + assert_array_equal(obj, y2) + + def test_roundtrip(self): + x = np.array([1, 2, 3, 4, 5], dtype='i4') + self._check_roundtrip(x) + + x = np.array([[1, 2], [3, 4]], dtype=np.float64) + self._check_roundtrip(x) + + x = np.zeros((3, 3, 3), dtype=np.float32)[:, 0,:] + self._check_roundtrip(x) + + dt = [('a', 'b'), + ('b', 'h'), + ('c', 'i'), + ('d', 'l'), + ('dx', 'q'), + ('e', 'B'), + ('f', 'H'), + ('g', 'I'), + ('h', 'L'), + ('hx', 'Q'), + ('i', np.single), + ('j', np.double), + ('k', np.longdouble), + ('ix', np.csingle), + ('jx', np.cdouble), + ('kx', np.clongdouble), + ('l', 'S4'), + ('m', 'U4'), + ('n', 'V3'), + ('o', '?'), + ('p', np.half), + ] + x = np.array( + [(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + b'aaaa', 'bbbb', b'xxx', True, 1.0)], + dtype=dt) + self._check_roundtrip(x) + + x = np.array(([[1, 2], [3, 4]],), dtype=[('a', (int, (2, 2)))]) + self._check_roundtrip(x) + + x = np.array([1, 2, 3], dtype='>i2') + self._check_roundtrip(x) + + x = np.array([1, 2, 3], dtype='') + x = np.zeros(4, dtype=dt) + self._check_roundtrip(x) + + def test_roundtrip_scalar(self): + # Issue #4015. + self._check_roundtrip(0) + + def test_invalid_buffer_format(self): + # datetime64 cannot be used fully in a buffer yet + # Should be fixed in the next Numpy major release + dt = np.dtype([('a', 'uint16'), ('b', 'M8[s]')]) + a = np.empty(3, dt) + assert_raises((ValueError, BufferError), memoryview, a) + assert_raises((ValueError, BufferError), memoryview, np.array((3), 'M8[D]')) + + def test_export_simple_1d(self): + x = np.array([1, 2, 3, 4, 5], dtype='i') + y = memoryview(x) + assert_equal(y.format, 'i') + assert_equal(y.shape, (5,)) + assert_equal(y.ndim, 1) + assert_equal(y.strides, (4,)) + assert_equal(y.suboffsets, ()) + assert_equal(y.itemsize, 4) + + def test_export_simple_nd(self): + x = np.array([[1, 2], [3, 4]], dtype=np.float64) + y = memoryview(x) + assert_equal(y.format, 'd') + assert_equal(y.shape, (2, 2)) + assert_equal(y.ndim, 2) + assert_equal(y.strides, (16, 8)) + assert_equal(y.suboffsets, ()) + assert_equal(y.itemsize, 8) + + def test_export_discontiguous(self): + x = np.zeros((3, 3, 3), dtype=np.float32)[:, 0,:] + y = memoryview(x) + assert_equal(y.format, 'f') + assert_equal(y.shape, (3, 3)) + assert_equal(y.ndim, 2) + assert_equal(y.strides, (36, 4)) + assert_equal(y.suboffsets, ()) + assert_equal(y.itemsize, 4) + + def test_export_record(self): + dt = [('a', 'b'), + ('b', 'h'), + ('c', 'i'), + ('d', 'l'), + ('dx', 'q'), + ('e', 'B'), + ('f', 'H'), + ('g', 'I'), + ('h', 'L'), + ('hx', 'Q'), + ('i', np.single), + ('j', np.double), + ('k', np.longdouble), + ('ix', np.csingle), + ('jx', np.cdouble), + ('kx', np.clongdouble), + ('l', 'S4'), + ('m', 'U4'), + ('n', 'V3'), + ('o', '?'), + ('p', np.half), + ] + x = np.array( + [(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + b'aaaa', 'bbbb', b' ', True, 1.0)], + dtype=dt) + y = memoryview(x) + assert_equal(y.shape, (1,)) + assert_equal(y.ndim, 1) + assert_equal(y.suboffsets, ()) + + sz = sum([np.dtype(b).itemsize for a, b in dt]) + if np.dtype('l').itemsize == 4: + assert_equal(y.format, 'T{b:a:=h:b:i:c:l:d:q:dx:B:e:@H:f:=I:g:L:h:Q:hx:f:i:d:j:^g:k:=Zf:ix:Zd:jx:^Zg:kx:4s:l:=4w:m:3x:n:?:o:@e:p:}') + else: + assert_equal(y.format, 'T{b:a:=h:b:i:c:q:d:q:dx:B:e:@H:f:=I:g:Q:h:Q:hx:f:i:d:j:^g:k:=Zf:ix:Zd:jx:^Zg:kx:4s:l:=4w:m:3x:n:?:o:@e:p:}') + # Cannot test if NPY_RELAXED_STRIDES_CHECKING changes the strides + if not (np.ones(1).strides[0] == np.iinfo(np.intp).max): + assert_equal(y.strides, (sz,)) + assert_equal(y.itemsize, sz) + + def test_export_subarray(self): + x = np.array(([[1, 2], [3, 4]],), dtype=[('a', ('i', (2, 2)))]) + y = memoryview(x) + assert_equal(y.format, 'T{(2,2)i:a:}') + assert_equal(y.shape, ()) + assert_equal(y.ndim, 0) + assert_equal(y.strides, ()) + assert_equal(y.suboffsets, ()) + assert_equal(y.itemsize, 16) + + def test_export_endian(self): + x = np.array([1, 2, 3], dtype='>i') + y = memoryview(x) + if sys.byteorder == 'little': + assert_equal(y.format, '>i') + else: + assert_equal(y.format, 'i') + + x = np.array([1, 2, 3], dtype=' np.array(0, dtype=dt1), "type %s failed" % (dt1,)) + assert_(not 1 < np.array(0, dtype=dt1), "type %s failed" % (dt1,)) + + for dt2 in np.typecodes['AllInteger']: + assert_(np.array(1, dtype=dt1) > np.array(0, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + assert_(not np.array(1, dtype=dt1) < np.array(0, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + + # Unsigned integers + for dt1 in 'BHILQP': + assert_(-1 < np.array(1, dtype=dt1), "type %s failed" % (dt1,)) + assert_(not -1 > np.array(1, dtype=dt1), "type %s failed" % (dt1,)) + assert_(-1 != np.array(1, dtype=dt1), "type %s failed" % (dt1,)) + + # Unsigned vs signed + for dt2 in 'bhilqp': + assert_(np.array(1, dtype=dt1) > np.array(-1, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + assert_(not np.array(1, dtype=dt1) < np.array(-1, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + assert_(np.array(1, dtype=dt1) != np.array(-1, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + + # Signed integers and floats + for dt1 in 'bhlqp' + np.typecodes['Float']: + assert_(1 > np.array(-1, dtype=dt1), "type %s failed" % (dt1,)) + assert_(not 1 < np.array(-1, dtype=dt1), "type %s failed" % (dt1,)) + assert_(-1 == np.array(-1, dtype=dt1), "type %s failed" % (dt1,)) + + for dt2 in 'bhlqp' + np.typecodes['Float']: + assert_(np.array(1, dtype=dt1) > np.array(-1, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + assert_(not np.array(1, dtype=dt1) < np.array(-1, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + assert_(np.array(-1, dtype=dt1) == np.array(-1, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + + def test_to_bool_scalar(self): + assert_equal(bool(np.array([False])), False) + assert_equal(bool(np.array([True])), True) + assert_equal(bool(np.array([[42]])), True) + assert_raises(ValueError, bool, np.array([1, 2])) + + class NotConvertible: + def __bool__(self): + raise NotImplementedError + + assert_raises(NotImplementedError, bool, np.array(NotConvertible())) + assert_raises(NotImplementedError, bool, np.array([NotConvertible()])) + if IS_PYSTON: + pytest.skip("Pyston disables recursion checking") + + self_containing = np.array([None]) + self_containing[0] = self_containing + try: + Error = RecursionError + except NameError: + Error = RuntimeError # python < 3.5 + assert_raises(Error, bool, self_containing) # previously stack overflow + self_containing[0] = None # resolve circular reference + + def test_to_int_scalar(self): + # gh-9972 means that these aren't always the same + int_funcs = (int, lambda x: x.__int__()) + for int_func in int_funcs: + assert_equal(int_func(np.array(0)), 0) + assert_equal(int_func(np.array([1])), 1) + assert_equal(int_func(np.array([[42]])), 42) + assert_raises(TypeError, int_func, np.array([1, 2])) + + # gh-9972 + assert_equal(4, int_func(np.array('4'))) + assert_equal(5, int_func(np.bytes_(b'5'))) + assert_equal(6, int_func(np.unicode_(u'6'))) + + class HasTrunc: + def __trunc__(self): + return 3 + assert_equal(3, int_func(np.array(HasTrunc()))) + assert_equal(3, int_func(np.array([HasTrunc()]))) + + class NotConvertible: + def __int__(self): + raise NotImplementedError + assert_raises(NotImplementedError, + int_func, np.array(NotConvertible())) + assert_raises(NotImplementedError, + int_func, np.array([NotConvertible()])) + + +class TestWhere: + def test_basic(self): + dts = [bool, np.int16, np.int32, np.int64, np.double, np.complex128, + np.longdouble, np.clongdouble] + for dt in dts: + c = np.ones(53, dtype=bool) + assert_equal(np.where( c, dt(0), dt(1)), dt(0)) + assert_equal(np.where(~c, dt(0), dt(1)), dt(1)) + assert_equal(np.where(True, dt(0), dt(1)), dt(0)) + assert_equal(np.where(False, dt(0), dt(1)), dt(1)) + d = np.ones_like(c).astype(dt) + e = np.zeros_like(d) + r = d.astype(dt) + c[7] = False + r[7] = e[7] + assert_equal(np.where(c, e, e), e) + assert_equal(np.where(c, d, e), r) + assert_equal(np.where(c, d, e[0]), r) + assert_equal(np.where(c, d[0], e), r) + assert_equal(np.where(c[::2], d[::2], e[::2]), r[::2]) + assert_equal(np.where(c[1::2], d[1::2], e[1::2]), r[1::2]) + assert_equal(np.where(c[::3], d[::3], e[::3]), r[::3]) + assert_equal(np.where(c[1::3], d[1::3], e[1::3]), r[1::3]) + assert_equal(np.where(c[::-2], d[::-2], e[::-2]), r[::-2]) + assert_equal(np.where(c[::-3], d[::-3], e[::-3]), r[::-3]) + assert_equal(np.where(c[1::-3], d[1::-3], e[1::-3]), r[1::-3]) + + def test_exotic(self): + # object + assert_array_equal(np.where(True, None, None), np.array(None)) + # zero sized + m = np.array([], dtype=bool).reshape(0, 3) + b = np.array([], dtype=np.float64).reshape(0, 3) + assert_array_equal(np.where(m, 0, b), np.array([]).reshape(0, 3)) + + # object cast + d = np.array([-1.34, -0.16, -0.54, -0.31, -0.08, -0.95, 0.000, 0.313, + 0.547, -0.18, 0.876, 0.236, 1.969, 0.310, 0.699, 1.013, + 1.267, 0.229, -1.39, 0.487]) + nan = float('NaN') + e = np.array(['5z', '0l', nan, 'Wz', nan, nan, 'Xq', 'cs', nan, nan, + 'QN', nan, nan, 'Fd', nan, nan, 'kp', nan, '36', 'i1'], + dtype=object) + m = np.array([0, 0, 1, 0, 1, 1, 0, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 0, 0], dtype=bool) + + r = e[:] + r[np.where(m)] = d[np.where(m)] + assert_array_equal(np.where(m, d, e), r) + + r = e[:] + r[np.where(~m)] = d[np.where(~m)] + assert_array_equal(np.where(m, e, d), r) + + assert_array_equal(np.where(m, e, e), e) + + # minimal dtype result with NaN scalar (e.g required by pandas) + d = np.array([1., 2.], dtype=np.float32) + e = float('NaN') + assert_equal(np.where(True, d, e).dtype, np.float32) + e = float('Infinity') + assert_equal(np.where(True, d, e).dtype, np.float32) + e = float('-Infinity') + assert_equal(np.where(True, d, e).dtype, np.float32) + # also check upcast + e = float(1e150) + assert_equal(np.where(True, d, e).dtype, np.float64) + + def test_ndim(self): + c = [True, False] + a = np.zeros((2, 25)) + b = np.ones((2, 25)) + r = np.where(np.array(c)[:,np.newaxis], a, b) + assert_array_equal(r[0], a[0]) + assert_array_equal(r[1], b[0]) + + a = a.T + b = b.T + r = np.where(c, a, b) + assert_array_equal(r[:,0], a[:,0]) + assert_array_equal(r[:,1], b[:,0]) + + def test_dtype_mix(self): + c = np.array([False, True, False, False, False, False, True, False, + False, False, True, False]) + a = np.uint32(1) + b = np.array([5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.], + dtype=np.float64) + r = np.array([5., 1., 3., 2., -1., -4., 1., -10., 10., 1., 1., 3.], + dtype=np.float64) + assert_equal(np.where(c, a, b), r) + + a = a.astype(np.float32) + b = b.astype(np.int64) + assert_equal(np.where(c, a, b), r) + + # non bool mask + c = c.astype(int) + c[c != 0] = 34242324 + assert_equal(np.where(c, a, b), r) + # invert + tmpmask = c != 0 + c[c == 0] = 41247212 + c[tmpmask] = 0 + assert_equal(np.where(c, b, a), r) + + def test_foreign(self): + c = np.array([False, True, False, False, False, False, True, False, + False, False, True, False]) + r = np.array([5., 1., 3., 2., -1., -4., 1., -10., 10., 1., 1., 3.], + dtype=np.float64) + a = np.ones(1, dtype='>i4') + b = np.array([5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.], + dtype=np.float64) + assert_equal(np.where(c, a, b), r) + + b = b.astype('>f8') + assert_equal(np.where(c, a, b), r) + + a = a.astype('i4') + assert_equal(np.where(c, a, b), r) + + def test_error(self): + c = [True, True] + a = np.ones((4, 5)) + b = np.ones((5, 5)) + assert_raises(ValueError, np.where, c, a, a) + assert_raises(ValueError, np.where, c[0], a, b) + + def test_string(self): + # gh-4778 check strings are properly filled with nulls + a = np.array("abc") + b = np.array("x" * 753) + assert_equal(np.where(True, a, b), "abc") + assert_equal(np.where(False, b, a), "abc") + + # check native datatype sized strings + a = np.array("abcd") + b = np.array("x" * 8) + assert_equal(np.where(True, a, b), "abcd") + assert_equal(np.where(False, b, a), "abcd") + + def test_empty_result(self): + # pass empty where result through an assignment which reads the data of + # empty arrays, error detectable with valgrind, see gh-8922 + x = np.zeros((1, 1)) + ibad = np.vstack(np.where(x == 99.)) + assert_array_equal(ibad, + np.atleast_2d(np.array([[],[]], dtype=np.intp))) + + def test_largedim(self): + # invalid read regression gh-9304 + shape = [10, 2, 3, 4, 5, 6] + np.random.seed(2) + array = np.random.rand(*shape) + + for i in range(10): + benchmark = array.nonzero() + result = array.nonzero() + assert_array_equal(benchmark, result) + + +if not IS_PYPY: + # sys.getsizeof() is not valid on PyPy + class TestSizeOf: + + def test_empty_array(self): + x = np.array([]) + assert_(sys.getsizeof(x) > 0) + + def check_array(self, dtype): + elem_size = dtype(0).itemsize + + for length in [10, 50, 100, 500]: + x = np.arange(length, dtype=dtype) + assert_(sys.getsizeof(x) > length * elem_size) + + def test_array_int32(self): + self.check_array(np.int32) + + def test_array_int64(self): + self.check_array(np.int64) + + def test_array_float32(self): + self.check_array(np.float32) + + def test_array_float64(self): + self.check_array(np.float64) + + def test_view(self): + d = np.ones(100) + assert_(sys.getsizeof(d[...]) < sys.getsizeof(d)) + + def test_reshape(self): + d = np.ones(100) + assert_(sys.getsizeof(d) < sys.getsizeof(d.reshape(100, 1, 1).copy())) + + @_no_tracing + def test_resize(self): + d = np.ones(100) + old = sys.getsizeof(d) + d.resize(50) + assert_(old > sys.getsizeof(d)) + d.resize(150) + assert_(old < sys.getsizeof(d)) + + def test_error(self): + d = np.ones(100) + assert_raises(TypeError, d.__sizeof__, "a") + + +class TestHashing: + + def test_arrays_not_hashable(self): + x = np.ones(3) + assert_raises(TypeError, hash, x) + + def test_collections_hashable(self): + x = np.array([]) + assert_(not isinstance(x, collections.abc.Hashable)) + + +class TestArrayPriority: + # This will go away when __array_priority__ is settled, meanwhile + # it serves to check unintended changes. + op = operator + binary_ops = [ + op.pow, op.add, op.sub, op.mul, op.floordiv, op.truediv, op.mod, + op.and_, op.or_, op.xor, op.lshift, op.rshift, op.mod, op.gt, + op.ge, op.lt, op.le, op.ne, op.eq + ] + + class Foo(np.ndarray): + __array_priority__ = 100. + + def __new__(cls, *args, **kwargs): + return np.array(*args, **kwargs).view(cls) + + class Bar(np.ndarray): + __array_priority__ = 101. + + def __new__(cls, *args, **kwargs): + return np.array(*args, **kwargs).view(cls) + + class Other: + __array_priority__ = 1000. + + def _all(self, other): + return self.__class__() + + __add__ = __radd__ = _all + __sub__ = __rsub__ = _all + __mul__ = __rmul__ = _all + __pow__ = __rpow__ = _all + __div__ = __rdiv__ = _all + __mod__ = __rmod__ = _all + __truediv__ = __rtruediv__ = _all + __floordiv__ = __rfloordiv__ = _all + __and__ = __rand__ = _all + __xor__ = __rxor__ = _all + __or__ = __ror__ = _all + __lshift__ = __rlshift__ = _all + __rshift__ = __rrshift__ = _all + __eq__ = _all + __ne__ = _all + __gt__ = _all + __ge__ = _all + __lt__ = _all + __le__ = _all + + def test_ndarray_subclass(self): + a = np.array([1, 2]) + b = self.Bar([1, 2]) + for f in self.binary_ops: + msg = repr(f) + assert_(isinstance(f(a, b), self.Bar), msg) + assert_(isinstance(f(b, a), self.Bar), msg) + + def test_ndarray_other(self): + a = np.array([1, 2]) + b = self.Other() + for f in self.binary_ops: + msg = repr(f) + assert_(isinstance(f(a, b), self.Other), msg) + assert_(isinstance(f(b, a), self.Other), msg) + + def test_subclass_subclass(self): + a = self.Foo([1, 2]) + b = self.Bar([1, 2]) + for f in self.binary_ops: + msg = repr(f) + assert_(isinstance(f(a, b), self.Bar), msg) + assert_(isinstance(f(b, a), self.Bar), msg) + + def test_subclass_other(self): + a = self.Foo([1, 2]) + b = self.Other() + for f in self.binary_ops: + msg = repr(f) + assert_(isinstance(f(a, b), self.Other), msg) + assert_(isinstance(f(b, a), self.Other), msg) + + +class TestBytestringArrayNonzero: + + def test_empty_bstring_array_is_falsey(self): + assert_(not np.array([''], dtype=str)) + + def test_whitespace_bstring_array_is_falsey(self): + a = np.array(['spam'], dtype=str) + a[0] = ' \0\0' + assert_(not a) + + def test_all_null_bstring_array_is_falsey(self): + a = np.array(['spam'], dtype=str) + a[0] = '\0\0\0\0' + assert_(not a) + + def test_null_inside_bstring_array_is_truthy(self): + a = np.array(['spam'], dtype=str) + a[0] = ' \0 \0' + assert_(a) + + +class TestUnicodeEncoding: + """ + Tests for encoding related bugs, such as UCS2 vs UCS4, round-tripping + issues, etc + """ + def test_round_trip(self): + """ Tests that GETITEM, SETITEM, and PyArray_Scalar roundtrip """ + # gh-15363 + arr = np.zeros(shape=(), dtype="U1") + for i in range(1, sys.maxunicode + 1): + expected = chr(i) + arr[()] = expected + assert arr[()] == expected + assert arr.item() == expected + + def test_assign_scalar(self): + # gh-3258 + l = np.array(['aa', 'bb']) + l[:] = np.unicode_('cc') + assert_equal(l, ['cc', 'cc']) + + def test_fill_scalar(self): + # gh-7227 + l = np.array(['aa', 'bb']) + l.fill(np.unicode_('cc')) + assert_equal(l, ['cc', 'cc']) + + +class TestUnicodeArrayNonzero: + + def test_empty_ustring_array_is_falsey(self): + assert_(not np.array([''], dtype=np.unicode_)) + + def test_whitespace_ustring_array_is_falsey(self): + a = np.array(['eggs'], dtype=np.unicode_) + a[0] = ' \0\0' + assert_(not a) + + def test_all_null_ustring_array_is_falsey(self): + a = np.array(['eggs'], dtype=np.unicode_) + a[0] = '\0\0\0\0' + assert_(not a) + + def test_null_inside_ustring_array_is_truthy(self): + a = np.array(['eggs'], dtype=np.unicode_) + a[0] = ' \0 \0' + assert_(a) + + +class TestFormat: + + def test_0d(self): + a = np.array(np.pi) + assert_equal('{:0.3g}'.format(a), '3.14') + assert_equal('{:0.3g}'.format(a[()]), '3.14') + + def test_1d_no_format(self): + a = np.array([np.pi]) + assert_equal('{}'.format(a), str(a)) + + def test_1d_format(self): + # until gh-5543, ensure that the behaviour matches what it used to be + a = np.array([np.pi]) + assert_raises(TypeError, '{:30}'.format, a) + +from numpy.testing import IS_PYPY + +class TestCTypes: + + def test_ctypes_is_available(self): + test_arr = np.array([[1, 2, 3], [4, 5, 6]]) + + assert_equal(ctypes, test_arr.ctypes._ctypes) + assert_equal(tuple(test_arr.ctypes.shape), (2, 3)) + + def test_ctypes_is_not_available(self): + from numpy.core import _internal + _internal.ctypes = None + try: + test_arr = np.array([[1, 2, 3], [4, 5, 6]]) + + assert_(isinstance(test_arr.ctypes._ctypes, + _internal._missing_ctypes)) + assert_equal(tuple(test_arr.ctypes.shape), (2, 3)) + finally: + _internal.ctypes = ctypes + + def _make_readonly(x): + x.flags.writeable = False + return x + + @pytest.mark.parametrize('arr', [ + np.array([1, 2, 3]), + np.array([['one', 'two'], ['three', 'four']]), + np.array((1, 2), dtype='i4,i4'), + np.zeros((2,), dtype= + np.dtype(dict( + formats=['2, [44, 55]) + assert_equal(a, np.array([[0, 44], [1, 55], [2, 44]])) + # hit one of the failing paths + assert_raises(ValueError, np.place, a, a>20, []) + + def test_put_noncontiguous(self): + a = np.arange(6).reshape(2,3).T # force non-c-contiguous + np.put(a, [0, 2], [44, 55]) + assert_equal(a, np.array([[44, 3], [55, 4], [2, 5]])) + + def test_putmask_noncontiguous(self): + a = np.arange(6).reshape(2,3).T # force non-c-contiguous + # uses arr_putmask + np.putmask(a, a>2, a**2) + assert_equal(a, np.array([[0, 9], [1, 16], [2, 25]])) + + def test_take_mode_raise(self): + a = np.arange(6, dtype='int') + out = np.empty(2, dtype='int') + np.take(a, [0, 2], out=out, mode='raise') + assert_equal(out, np.array([0, 2])) + + def test_choose_mod_raise(self): + a = np.array([[1, 0, 1], [0, 1, 0], [1, 0, 1]]) + out = np.empty((3,3), dtype='int') + choices = [-10, 10] + np.choose(a, choices, out=out, mode='raise') + assert_equal(out, np.array([[ 10, -10, 10], + [-10, 10, -10], + [ 10, -10, 10]])) + + def test_flatiter__array__(self): + a = np.arange(9).reshape(3,3) + b = a.T.flat + c = b.__array__() + # triggers the WRITEBACKIFCOPY resolution, assuming refcount semantics + del c + + def test_dot_out(self): + # if HAVE_CBLAS, will use WRITEBACKIFCOPY + a = np.arange(9, dtype=float).reshape(3,3) + b = np.dot(a, a, out=a) + assert_equal(b, np.array([[15, 18, 21], [42, 54, 66], [69, 90, 111]])) + + def test_view_assign(self): + from numpy.core._multiarray_tests import npy_create_writebackifcopy, npy_resolve + + arr = np.arange(9).reshape(3, 3).T + arr_wb = npy_create_writebackifcopy(arr) + assert_(arr_wb.flags.writebackifcopy) + assert_(arr_wb.base is arr) + arr_wb[...] = -100 + npy_resolve(arr_wb) + # arr changes after resolve, even though we assigned to arr_wb + assert_equal(arr, -100) + # after resolve, the two arrays no longer reference each other + assert_(arr_wb.ctypes.data != 0) + assert_equal(arr_wb.base, None) + # assigning to arr_wb does not get transferred to arr + arr_wb[...] = 100 + assert_equal(arr, -100) + + @pytest.mark.leaks_references( + reason="increments self in dealloc; ignore since deprecated path.") + def test_dealloc_warning(self): + with suppress_warnings() as sup: + sup.record(RuntimeWarning) + arr = np.arange(9).reshape(3, 3) + v = arr.T + _multiarray_tests.npy_abuse_writebackifcopy(v) + assert len(sup.log) == 1 + + def test_view_discard_refcount(self): + from numpy.core._multiarray_tests import npy_create_writebackifcopy, npy_discard + + arr = np.arange(9).reshape(3, 3).T + orig = arr.copy() + if HAS_REFCOUNT: + arr_cnt = sys.getrefcount(arr) + arr_wb = npy_create_writebackifcopy(arr) + assert_(arr_wb.flags.writebackifcopy) + assert_(arr_wb.base is arr) + arr_wb[...] = -100 + npy_discard(arr_wb) + # arr remains unchanged after discard + assert_equal(arr, orig) + # after discard, the two arrays no longer reference each other + assert_(arr_wb.ctypes.data != 0) + assert_equal(arr_wb.base, None) + if HAS_REFCOUNT: + assert_equal(arr_cnt, sys.getrefcount(arr)) + # assigning to arr_wb does not get transferred to arr + arr_wb[...] = 100 + assert_equal(arr, orig) + + +class TestArange: + def test_infinite(self): + assert_raises_regex( + ValueError, "size exceeded", + np.arange, 0, np.inf + ) + + def test_nan_step(self): + assert_raises_regex( + ValueError, "cannot compute length", + np.arange, 0, 1, np.nan + ) + + def test_zero_step(self): + assert_raises(ZeroDivisionError, np.arange, 0, 10, 0) + assert_raises(ZeroDivisionError, np.arange, 0.0, 10.0, 0.0) + + # empty range + assert_raises(ZeroDivisionError, np.arange, 0, 0, 0) + assert_raises(ZeroDivisionError, np.arange, 0.0, 0.0, 0.0) + + def test_require_range(self): + assert_raises(TypeError, np.arange) + assert_raises(TypeError, np.arange, step=3) + assert_raises(TypeError, np.arange, dtype='int64') + assert_raises(TypeError, np.arange, start=4) + + def test_start_stop_kwarg(self): + keyword_stop = np.arange(stop=3) + keyword_zerotostop = np.arange(start=0, stop=3) + keyword_start_stop = np.arange(start=3, stop=9) + + assert len(keyword_stop) == 3 + assert len(keyword_zerotostop) == 3 + assert len(keyword_start_stop) == 6 + assert_array_equal(keyword_stop, keyword_zerotostop) + + +class TestArrayFinalize: + """ Tests __array_finalize__ """ + + def test_receives_base(self): + # gh-11237 + class SavesBase(np.ndarray): + def __array_finalize__(self, obj): + self.saved_base = self.base + + a = np.array(1).view(SavesBase) + assert_(a.saved_base is a.base) + + def test_bad_finalize(self): + class BadAttributeArray(np.ndarray): + @property + def __array_finalize__(self): + raise RuntimeError("boohoo!") + + with pytest.raises(RuntimeError, match="boohoo!"): + np.arange(10).view(BadAttributeArray) + + def test_lifetime_on_error(self): + # gh-11237 + class RaisesInFinalize(np.ndarray): + def __array_finalize__(self, obj): + # crash, but keep this object alive + raise Exception(self) + + # a plain object can't be weakref'd + class Dummy: pass + + # get a weak reference to an object within an array + obj_arr = np.array(Dummy()) + obj_ref = weakref.ref(obj_arr[()]) + + # get an array that crashed in __array_finalize__ + with assert_raises(Exception) as e: + obj_arr.view(RaisesInFinalize) + + obj_subarray = e.exception.args[0] + del e + assert_(isinstance(obj_subarray, RaisesInFinalize)) + + # reference should still be held by obj_arr + break_cycles() + assert_(obj_ref() is not None, "object should not already be dead") + + del obj_arr + break_cycles() + assert_(obj_ref() is not None, "obj_arr should not hold the last reference") + + del obj_subarray + break_cycles() + assert_(obj_ref() is None, "no references should remain") + + +def test_orderconverter_with_nonASCII_unicode_ordering(): + # gh-7475 + a = np.arange(5) + assert_raises(ValueError, a.flatten, order=u'\xe2') + + +def test_equal_override(): + # gh-9153: ndarray.__eq__ uses special logic for structured arrays, which + # did not respect overrides with __array_priority__ or __array_ufunc__. + # The PR fixed this for __array_priority__ and __array_ufunc__ = None. + class MyAlwaysEqual: + def __eq__(self, other): + return "eq" + + def __ne__(self, other): + return "ne" + + class MyAlwaysEqualOld(MyAlwaysEqual): + __array_priority__ = 10000 + + class MyAlwaysEqualNew(MyAlwaysEqual): + __array_ufunc__ = None + + array = np.array([(0, 1), (2, 3)], dtype='i4,i4') + for my_always_equal_cls in MyAlwaysEqualOld, MyAlwaysEqualNew: + my_always_equal = my_always_equal_cls() + assert_equal(my_always_equal == array, 'eq') + assert_equal(array == my_always_equal, 'eq') + assert_equal(my_always_equal != array, 'ne') + assert_equal(array != my_always_equal, 'ne') + + +@pytest.mark.parametrize( + ["fun", "npfun"], + [ + (_multiarray_tests.npy_cabs, np.absolute), + (_multiarray_tests.npy_carg, np.angle) + ] +) +@pytest.mark.parametrize("x", [1, np.inf, -np.inf, np.nan]) +@pytest.mark.parametrize("y", [1, np.inf, -np.inf, np.nan]) +@pytest.mark.parametrize("test_dtype", np.complexfloating.__subclasses__()) +def test_npymath_complex(fun, npfun, x, y, test_dtype): + # Smoketest npymath functions + z = test_dtype(complex(x, y)) + got = fun(z) + expected = npfun(z) + assert_allclose(got, expected) + + +def test_npymath_real(): + # Smoketest npymath functions + from numpy.core._multiarray_tests import ( + npy_log10, npy_cosh, npy_sinh, npy_tan, npy_tanh) + + funcs = {npy_log10: np.log10, + npy_cosh: np.cosh, + npy_sinh: np.sinh, + npy_tan: np.tan, + npy_tanh: np.tanh} + vals = (1, np.inf, -np.inf, np.nan) + types = (np.float32, np.float64, np.longdouble) + + with np.errstate(all='ignore'): + for fun, npfun in funcs.items(): + for x, t in itertools.product(vals, types): + z = t(x) + got = fun(z) + expected = npfun(z) + assert_allclose(got, expected) + +def test_uintalignment_and_alignment(): + # alignment code needs to satisfy these requirements: + # 1. numpy structs match C struct layout + # 2. ufuncs/casting is safe wrt to aligned access + # 3. copy code is safe wrt to "uint alidned" access + # + # Complex types are the main problem, whose alignment may not be the same + # as their "uint alignment". + # + # This test might only fail on certain platforms, where uint64 alignment is + # not equal to complex64 alignment. The second 2 tests will only fail + # for DEBUG=1. + + d1 = np.dtype('u1,c8', align=True) + d2 = np.dtype('u4,c8', align=True) + d3 = np.dtype({'names': ['a', 'b'], 'formats': ['u1', d1]}, align=True) + + assert_equal(np.zeros(1, dtype=d1)['f1'].flags['ALIGNED'], True) + assert_equal(np.zeros(1, dtype=d2)['f1'].flags['ALIGNED'], True) + assert_equal(np.zeros(1, dtype='u1,c8')['f1'].flags['ALIGNED'], False) + + # check that C struct matches numpy struct size + s = _multiarray_tests.get_struct_alignments() + for d, (alignment, size) in zip([d1,d2,d3], s): + assert_equal(d.alignment, alignment) + assert_equal(d.itemsize, size) + + # check that ufuncs don't complain in debug mode + # (this is probably OK if the aligned flag is true above) + src = np.zeros((2,2), dtype=d1)['f1'] # 4-byte aligned, often + np.exp(src) # assert fails? + + # check that copy code doesn't complain in debug mode + dst = np.zeros((2,2), dtype='c8') + dst[:,1] = src[:,1] # assert in lowlevel_strided_loops fails? + +class TestAlignment: + # adapted from scipy._lib.tests.test__util.test__aligned_zeros + # Checks that unusual memory alignments don't trip up numpy. + # In particular, check RELAXED_STRIDES don't trip alignment assertions in + # NDEBUG mode for size-0 arrays (gh-12503) + + def check(self, shape, dtype, order, align): + err_msg = repr((shape, dtype, order, align)) + x = _aligned_zeros(shape, dtype, order, align=align) + if align is None: + align = np.dtype(dtype).alignment + assert_equal(x.__array_interface__['data'][0] % align, 0) + if hasattr(shape, '__len__'): + assert_equal(x.shape, shape, err_msg) + else: + assert_equal(x.shape, (shape,), err_msg) + assert_equal(x.dtype, dtype) + if order == "C": + assert_(x.flags.c_contiguous, err_msg) + elif order == "F": + if x.size > 0: + assert_(x.flags.f_contiguous, err_msg) + elif order is None: + assert_(x.flags.c_contiguous, err_msg) + else: + raise ValueError() + + def test_various_alignments(self): + for align in [1, 2, 3, 4, 8, 12, 16, 32, 64, None]: + for n in [0, 1, 3, 11]: + for order in ["C", "F", None]: + for dtype in list(np.typecodes["All"]) + ['i4,i4,i4']: + if dtype == 'O': + # object dtype can't be misaligned + continue + for shape in [n, (1, 2, 3, n)]: + self.check(shape, np.dtype(dtype), order, align) + + def test_strided_loop_alignments(self): + # particularly test that complex64 and float128 use right alignment + # code-paths, since these are particularly problematic. It is useful to + # turn on USE_DEBUG for this test, so lowlevel-loop asserts are run. + for align in [1, 2, 4, 8, 12, 16, None]: + xf64 = _aligned_zeros(3, np.float64) + + xc64 = _aligned_zeros(3, np.complex64, align=align) + xf128 = _aligned_zeros(3, np.longdouble, align=align) + + # test casting, both to and from misaligned + with suppress_warnings() as sup: + sup.filter(np.ComplexWarning, "Casting complex values") + xc64.astype('f8') + xf64.astype(np.complex64) + test = xc64 + xf64 + + xf128.astype('f8') + xf64.astype(np.longdouble) + test = xf128 + xf64 + + test = xf128 + xc64 + + # test copy, both to and from misaligned + # contig copy + xf64[:] = xf64.copy() + xc64[:] = xc64.copy() + xf128[:] = xf128.copy() + # strided copy + xf64[::2] = xf64[::2].copy() + xc64[::2] = xc64[::2].copy() + xf128[::2] = xf128[::2].copy() + +def test_getfield(): + a = np.arange(32, dtype='uint16') + if sys.byteorder == 'little': + i = 0 + j = 1 + else: + i = 1 + j = 0 + b = a.getfield('int8', i) + assert_equal(b, a) + b = a.getfield('int8', j) + assert_equal(b, 0) + pytest.raises(ValueError, a.getfield, 'uint8', -1) + pytest.raises(ValueError, a.getfield, 'uint8', 16) + pytest.raises(ValueError, a.getfield, 'uint64', 0) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_nditer.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_nditer.py new file mode 100644 index 0000000..ed775ca --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_nditer.py @@ -0,0 +1,3280 @@ +import sys +import pytest + +import textwrap +import subprocess + +import numpy as np +import numpy.core._multiarray_tests as _multiarray_tests +from numpy import array, arange, nditer, all +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_raises, + HAS_REFCOUNT, suppress_warnings, break_cycles + ) + + +def iter_multi_index(i): + ret = [] + while not i.finished: + ret.append(i.multi_index) + i.iternext() + return ret + +def iter_indices(i): + ret = [] + while not i.finished: + ret.append(i.index) + i.iternext() + return ret + +def iter_iterindices(i): + ret = [] + while not i.finished: + ret.append(i.iterindex) + i.iternext() + return ret + +@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") +def test_iter_refcount(): + # Make sure the iterator doesn't leak + + # Basic + a = arange(6) + dt = np.dtype('f4').newbyteorder() + rc_a = sys.getrefcount(a) + rc_dt = sys.getrefcount(dt) + with nditer(a, [], + [['readwrite', 'updateifcopy']], + casting='unsafe', + op_dtypes=[dt]) as it: + assert_(not it.iterationneedsapi) + assert_(sys.getrefcount(a) > rc_a) + assert_(sys.getrefcount(dt) > rc_dt) + # del 'it' + it = None + assert_equal(sys.getrefcount(a), rc_a) + assert_equal(sys.getrefcount(dt), rc_dt) + + # With a copy + a = arange(6, dtype='f4') + dt = np.dtype('f4') + rc_a = sys.getrefcount(a) + rc_dt = sys.getrefcount(dt) + it = nditer(a, [], + [['readwrite']], + op_dtypes=[dt]) + rc2_a = sys.getrefcount(a) + rc2_dt = sys.getrefcount(dt) + it2 = it.copy() + assert_(sys.getrefcount(a) > rc2_a) + assert_(sys.getrefcount(dt) > rc2_dt) + it = None + assert_equal(sys.getrefcount(a), rc2_a) + assert_equal(sys.getrefcount(dt), rc2_dt) + it2 = None + assert_equal(sys.getrefcount(a), rc_a) + assert_equal(sys.getrefcount(dt), rc_dt) + + del it2 # avoid pyflakes unused variable warning + +def test_iter_best_order(): + # The iterator should always find the iteration order + # with increasing memory addresses + + # Test the ordering for 1-D to 5-D shapes + for shape in [(5,), (3, 4), (2, 3, 4), (2, 3, 4, 3), (2, 3, 2, 2, 3)]: + a = arange(np.prod(shape)) + # Test each combination of positive and negative strides + for dirs in range(2**len(shape)): + dirs_index = [slice(None)]*len(shape) + for bit in range(len(shape)): + if ((2**bit) & dirs): + dirs_index[bit] = slice(None, None, -1) + dirs_index = tuple(dirs_index) + + aview = a.reshape(shape)[dirs_index] + # C-order + i = nditer(aview, [], [['readonly']]) + assert_equal([x for x in i], a) + # Fortran-order + i = nditer(aview.T, [], [['readonly']]) + assert_equal([x for x in i], a) + # Other order + if len(shape) > 2: + i = nditer(aview.swapaxes(0, 1), [], [['readonly']]) + assert_equal([x for x in i], a) + +def test_iter_c_order(): + # Test forcing C order + + # Test the ordering for 1-D to 5-D shapes + for shape in [(5,), (3, 4), (2, 3, 4), (2, 3, 4, 3), (2, 3, 2, 2, 3)]: + a = arange(np.prod(shape)) + # Test each combination of positive and negative strides + for dirs in range(2**len(shape)): + dirs_index = [slice(None)]*len(shape) + for bit in range(len(shape)): + if ((2**bit) & dirs): + dirs_index[bit] = slice(None, None, -1) + dirs_index = tuple(dirs_index) + + aview = a.reshape(shape)[dirs_index] + # C-order + i = nditer(aview, order='C') + assert_equal([x for x in i], aview.ravel(order='C')) + # Fortran-order + i = nditer(aview.T, order='C') + assert_equal([x for x in i], aview.T.ravel(order='C')) + # Other order + if len(shape) > 2: + i = nditer(aview.swapaxes(0, 1), order='C') + assert_equal([x for x in i], + aview.swapaxes(0, 1).ravel(order='C')) + +def test_iter_f_order(): + # Test forcing F order + + # Test the ordering for 1-D to 5-D shapes + for shape in [(5,), (3, 4), (2, 3, 4), (2, 3, 4, 3), (2, 3, 2, 2, 3)]: + a = arange(np.prod(shape)) + # Test each combination of positive and negative strides + for dirs in range(2**len(shape)): + dirs_index = [slice(None)]*len(shape) + for bit in range(len(shape)): + if ((2**bit) & dirs): + dirs_index[bit] = slice(None, None, -1) + dirs_index = tuple(dirs_index) + + aview = a.reshape(shape)[dirs_index] + # C-order + i = nditer(aview, order='F') + assert_equal([x for x in i], aview.ravel(order='F')) + # Fortran-order + i = nditer(aview.T, order='F') + assert_equal([x for x in i], aview.T.ravel(order='F')) + # Other order + if len(shape) > 2: + i = nditer(aview.swapaxes(0, 1), order='F') + assert_equal([x for x in i], + aview.swapaxes(0, 1).ravel(order='F')) + +def test_iter_c_or_f_order(): + # Test forcing any contiguous (C or F) order + + # Test the ordering for 1-D to 5-D shapes + for shape in [(5,), (3, 4), (2, 3, 4), (2, 3, 4, 3), (2, 3, 2, 2, 3)]: + a = arange(np.prod(shape)) + # Test each combination of positive and negative strides + for dirs in range(2**len(shape)): + dirs_index = [slice(None)]*len(shape) + for bit in range(len(shape)): + if ((2**bit) & dirs): + dirs_index[bit] = slice(None, None, -1) + dirs_index = tuple(dirs_index) + + aview = a.reshape(shape)[dirs_index] + # C-order + i = nditer(aview, order='A') + assert_equal([x for x in i], aview.ravel(order='A')) + # Fortran-order + i = nditer(aview.T, order='A') + assert_equal([x for x in i], aview.T.ravel(order='A')) + # Other order + if len(shape) > 2: + i = nditer(aview.swapaxes(0, 1), order='A') + assert_equal([x for x in i], + aview.swapaxes(0, 1).ravel(order='A')) + +def test_nditer_multi_index_set(): + # Test the multi_index set + a = np.arange(6).reshape(2, 3) + it = np.nditer(a, flags=['multi_index']) + + # Removes the iteration on two first elements of a[0] + it.multi_index = (0, 2,) + + assert_equal([i for i in it], [2, 3, 4, 5]) + +@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") +def test_nditer_multi_index_set_refcount(): + # Test if the reference count on index variable is decreased + + index = 0 + i = np.nditer(np.array([111, 222, 333, 444]), flags=['multi_index']) + + start_count = sys.getrefcount(index) + i.multi_index = (index,) + end_count = sys.getrefcount(index) + + assert_equal(start_count, end_count) + +def test_iter_best_order_multi_index_1d(): + # The multi-indices should be correct with any reordering + + a = arange(4) + # 1D order + i = nditer(a, ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(0,), (1,), (2,), (3,)]) + # 1D reversed order + i = nditer(a[::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(3,), (2,), (1,), (0,)]) + +def test_iter_best_order_multi_index_2d(): + # The multi-indices should be correct with any reordering + + a = arange(6) + # 2D C-order + i = nditer(a.reshape(2, 3), ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]) + # 2D Fortran-order + i = nditer(a.reshape(2, 3).copy(order='F'), ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(0, 0), (1, 0), (0, 1), (1, 1), (0, 2), (1, 2)]) + # 2D reversed C-order + i = nditer(a.reshape(2, 3)[::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(1, 0), (1, 1), (1, 2), (0, 0), (0, 1), (0, 2)]) + i = nditer(a.reshape(2, 3)[:, ::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(0, 2), (0, 1), (0, 0), (1, 2), (1, 1), (1, 0)]) + i = nditer(a.reshape(2, 3)[::-1, ::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(1, 2), (1, 1), (1, 0), (0, 2), (0, 1), (0, 0)]) + # 2D reversed Fortran-order + i = nditer(a.reshape(2, 3).copy(order='F')[::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(1, 0), (0, 0), (1, 1), (0, 1), (1, 2), (0, 2)]) + i = nditer(a.reshape(2, 3).copy(order='F')[:, ::-1], + ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(0, 2), (1, 2), (0, 1), (1, 1), (0, 0), (1, 0)]) + i = nditer(a.reshape(2, 3).copy(order='F')[::-1, ::-1], + ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(1, 2), (0, 2), (1, 1), (0, 1), (1, 0), (0, 0)]) + +def test_iter_best_order_multi_index_3d(): + # The multi-indices should be correct with any reordering + + a = arange(12) + # 3D C-order + i = nditer(a.reshape(2, 3, 2), ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (0, 2, 0), (0, 2, 1), + (1, 0, 0), (1, 0, 1), (1, 1, 0), (1, 1, 1), (1, 2, 0), (1, 2, 1)]) + # 3D Fortran-order + i = nditer(a.reshape(2, 3, 2).copy(order='F'), ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(0, 0, 0), (1, 0, 0), (0, 1, 0), (1, 1, 0), (0, 2, 0), (1, 2, 0), + (0, 0, 1), (1, 0, 1), (0, 1, 1), (1, 1, 1), (0, 2, 1), (1, 2, 1)]) + # 3D reversed C-order + i = nditer(a.reshape(2, 3, 2)[::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(1, 0, 0), (1, 0, 1), (1, 1, 0), (1, 1, 1), (1, 2, 0), (1, 2, 1), + (0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (0, 2, 0), (0, 2, 1)]) + i = nditer(a.reshape(2, 3, 2)[:, ::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(0, 2, 0), (0, 2, 1), (0, 1, 0), (0, 1, 1), (0, 0, 0), (0, 0, 1), + (1, 2, 0), (1, 2, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]) + i = nditer(a.reshape(2, 3, 2)[:,:, ::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(0, 0, 1), (0, 0, 0), (0, 1, 1), (0, 1, 0), (0, 2, 1), (0, 2, 0), + (1, 0, 1), (1, 0, 0), (1, 1, 1), (1, 1, 0), (1, 2, 1), (1, 2, 0)]) + # 3D reversed Fortran-order + i = nditer(a.reshape(2, 3, 2).copy(order='F')[::-1], + ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(1, 0, 0), (0, 0, 0), (1, 1, 0), (0, 1, 0), (1, 2, 0), (0, 2, 0), + (1, 0, 1), (0, 0, 1), (1, 1, 1), (0, 1, 1), (1, 2, 1), (0, 2, 1)]) + i = nditer(a.reshape(2, 3, 2).copy(order='F')[:, ::-1], + ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(0, 2, 0), (1, 2, 0), (0, 1, 0), (1, 1, 0), (0, 0, 0), (1, 0, 0), + (0, 2, 1), (1, 2, 1), (0, 1, 1), (1, 1, 1), (0, 0, 1), (1, 0, 1)]) + i = nditer(a.reshape(2, 3, 2).copy(order='F')[:,:, ::-1], + ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(0, 0, 1), (1, 0, 1), (0, 1, 1), (1, 1, 1), (0, 2, 1), (1, 2, 1), + (0, 0, 0), (1, 0, 0), (0, 1, 0), (1, 1, 0), (0, 2, 0), (1, 2, 0)]) + +def test_iter_best_order_c_index_1d(): + # The C index should be correct with any reordering + + a = arange(4) + # 1D order + i = nditer(a, ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [0, 1, 2, 3]) + # 1D reversed order + i = nditer(a[::-1], ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [3, 2, 1, 0]) + +def test_iter_best_order_c_index_2d(): + # The C index should be correct with any reordering + + a = arange(6) + # 2D C-order + i = nditer(a.reshape(2, 3), ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [0, 1, 2, 3, 4, 5]) + # 2D Fortran-order + i = nditer(a.reshape(2, 3).copy(order='F'), + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [0, 3, 1, 4, 2, 5]) + # 2D reversed C-order + i = nditer(a.reshape(2, 3)[::-1], ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [3, 4, 5, 0, 1, 2]) + i = nditer(a.reshape(2, 3)[:, ::-1], ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [2, 1, 0, 5, 4, 3]) + i = nditer(a.reshape(2, 3)[::-1, ::-1], ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [5, 4, 3, 2, 1, 0]) + # 2D reversed Fortran-order + i = nditer(a.reshape(2, 3).copy(order='F')[::-1], + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [3, 0, 4, 1, 5, 2]) + i = nditer(a.reshape(2, 3).copy(order='F')[:, ::-1], + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [2, 5, 1, 4, 0, 3]) + i = nditer(a.reshape(2, 3).copy(order='F')[::-1, ::-1], + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [5, 2, 4, 1, 3, 0]) + +def test_iter_best_order_c_index_3d(): + # The C index should be correct with any reordering + + a = arange(12) + # 3D C-order + i = nditer(a.reshape(2, 3, 2), ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) + # 3D Fortran-order + i = nditer(a.reshape(2, 3, 2).copy(order='F'), + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [0, 6, 2, 8, 4, 10, 1, 7, 3, 9, 5, 11]) + # 3D reversed C-order + i = nditer(a.reshape(2, 3, 2)[::-1], ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5]) + i = nditer(a.reshape(2, 3, 2)[:, ::-1], ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [4, 5, 2, 3, 0, 1, 10, 11, 8, 9, 6, 7]) + i = nditer(a.reshape(2, 3, 2)[:,:, ::-1], ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10]) + # 3D reversed Fortran-order + i = nditer(a.reshape(2, 3, 2).copy(order='F')[::-1], + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [6, 0, 8, 2, 10, 4, 7, 1, 9, 3, 11, 5]) + i = nditer(a.reshape(2, 3, 2).copy(order='F')[:, ::-1], + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [4, 10, 2, 8, 0, 6, 5, 11, 3, 9, 1, 7]) + i = nditer(a.reshape(2, 3, 2).copy(order='F')[:,:, ::-1], + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [1, 7, 3, 9, 5, 11, 0, 6, 2, 8, 4, 10]) + +def test_iter_best_order_f_index_1d(): + # The Fortran index should be correct with any reordering + + a = arange(4) + # 1D order + i = nditer(a, ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [0, 1, 2, 3]) + # 1D reversed order + i = nditer(a[::-1], ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [3, 2, 1, 0]) + +def test_iter_best_order_f_index_2d(): + # The Fortran index should be correct with any reordering + + a = arange(6) + # 2D C-order + i = nditer(a.reshape(2, 3), ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [0, 2, 4, 1, 3, 5]) + # 2D Fortran-order + i = nditer(a.reshape(2, 3).copy(order='F'), + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [0, 1, 2, 3, 4, 5]) + # 2D reversed C-order + i = nditer(a.reshape(2, 3)[::-1], ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [1, 3, 5, 0, 2, 4]) + i = nditer(a.reshape(2, 3)[:, ::-1], ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [4, 2, 0, 5, 3, 1]) + i = nditer(a.reshape(2, 3)[::-1, ::-1], ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [5, 3, 1, 4, 2, 0]) + # 2D reversed Fortran-order + i = nditer(a.reshape(2, 3).copy(order='F')[::-1], + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [1, 0, 3, 2, 5, 4]) + i = nditer(a.reshape(2, 3).copy(order='F')[:, ::-1], + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [4, 5, 2, 3, 0, 1]) + i = nditer(a.reshape(2, 3).copy(order='F')[::-1, ::-1], + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [5, 4, 3, 2, 1, 0]) + +def test_iter_best_order_f_index_3d(): + # The Fortran index should be correct with any reordering + + a = arange(12) + # 3D C-order + i = nditer(a.reshape(2, 3, 2), ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [0, 6, 2, 8, 4, 10, 1, 7, 3, 9, 5, 11]) + # 3D Fortran-order + i = nditer(a.reshape(2, 3, 2).copy(order='F'), + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) + # 3D reversed C-order + i = nditer(a.reshape(2, 3, 2)[::-1], ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [1, 7, 3, 9, 5, 11, 0, 6, 2, 8, 4, 10]) + i = nditer(a.reshape(2, 3, 2)[:, ::-1], ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [4, 10, 2, 8, 0, 6, 5, 11, 3, 9, 1, 7]) + i = nditer(a.reshape(2, 3, 2)[:,:, ::-1], ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [6, 0, 8, 2, 10, 4, 7, 1, 9, 3, 11, 5]) + # 3D reversed Fortran-order + i = nditer(a.reshape(2, 3, 2).copy(order='F')[::-1], + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10]) + i = nditer(a.reshape(2, 3, 2).copy(order='F')[:, ::-1], + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [4, 5, 2, 3, 0, 1, 10, 11, 8, 9, 6, 7]) + i = nditer(a.reshape(2, 3, 2).copy(order='F')[:,:, ::-1], + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5]) + +def test_iter_no_inner_full_coalesce(): + # Check no_inner iterators which coalesce into a single inner loop + + for shape in [(5,), (3, 4), (2, 3, 4), (2, 3, 4, 3), (2, 3, 2, 2, 3)]: + size = np.prod(shape) + a = arange(size) + # Test each combination of forward and backwards indexing + for dirs in range(2**len(shape)): + dirs_index = [slice(None)]*len(shape) + for bit in range(len(shape)): + if ((2**bit) & dirs): + dirs_index[bit] = slice(None, None, -1) + dirs_index = tuple(dirs_index) + + aview = a.reshape(shape)[dirs_index] + # C-order + i = nditer(aview, ['external_loop'], [['readonly']]) + assert_equal(i.ndim, 1) + assert_equal(i[0].shape, (size,)) + # Fortran-order + i = nditer(aview.T, ['external_loop'], [['readonly']]) + assert_equal(i.ndim, 1) + assert_equal(i[0].shape, (size,)) + # Other order + if len(shape) > 2: + i = nditer(aview.swapaxes(0, 1), + ['external_loop'], [['readonly']]) + assert_equal(i.ndim, 1) + assert_equal(i[0].shape, (size,)) + +def test_iter_no_inner_dim_coalescing(): + # Check no_inner iterators whose dimensions may not coalesce completely + + # Skipping the last element in a dimension prevents coalescing + # with the next-bigger dimension + a = arange(24).reshape(2, 3, 4)[:,:, :-1] + i = nditer(a, ['external_loop'], [['readonly']]) + assert_equal(i.ndim, 2) + assert_equal(i[0].shape, (3,)) + a = arange(24).reshape(2, 3, 4)[:, :-1,:] + i = nditer(a, ['external_loop'], [['readonly']]) + assert_equal(i.ndim, 2) + assert_equal(i[0].shape, (8,)) + a = arange(24).reshape(2, 3, 4)[:-1,:,:] + i = nditer(a, ['external_loop'], [['readonly']]) + assert_equal(i.ndim, 1) + assert_equal(i[0].shape, (12,)) + + # Even with lots of 1-sized dimensions, should still coalesce + a = arange(24).reshape(1, 1, 2, 1, 1, 3, 1, 1, 4, 1, 1) + i = nditer(a, ['external_loop'], [['readonly']]) + assert_equal(i.ndim, 1) + assert_equal(i[0].shape, (24,)) + +def test_iter_dim_coalescing(): + # Check that the correct number of dimensions are coalesced + + # Tracking a multi-index disables coalescing + a = arange(24).reshape(2, 3, 4) + i = nditer(a, ['multi_index'], [['readonly']]) + assert_equal(i.ndim, 3) + + # A tracked index can allow coalescing if it's compatible with the array + a3d = arange(24).reshape(2, 3, 4) + i = nditer(a3d, ['c_index'], [['readonly']]) + assert_equal(i.ndim, 1) + i = nditer(a3d.swapaxes(0, 1), ['c_index'], [['readonly']]) + assert_equal(i.ndim, 3) + i = nditer(a3d.T, ['c_index'], [['readonly']]) + assert_equal(i.ndim, 3) + i = nditer(a3d.T, ['f_index'], [['readonly']]) + assert_equal(i.ndim, 1) + i = nditer(a3d.T.swapaxes(0, 1), ['f_index'], [['readonly']]) + assert_equal(i.ndim, 3) + + # When C or F order is forced, coalescing may still occur + a3d = arange(24).reshape(2, 3, 4) + i = nditer(a3d, order='C') + assert_equal(i.ndim, 1) + i = nditer(a3d.T, order='C') + assert_equal(i.ndim, 3) + i = nditer(a3d, order='F') + assert_equal(i.ndim, 3) + i = nditer(a3d.T, order='F') + assert_equal(i.ndim, 1) + i = nditer(a3d, order='A') + assert_equal(i.ndim, 1) + i = nditer(a3d.T, order='A') + assert_equal(i.ndim, 1) + +def test_iter_broadcasting(): + # Standard NumPy broadcasting rules + + # 1D with scalar + i = nditer([arange(6), np.int32(2)], ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 6) + assert_equal(i.shape, (6,)) + + # 2D with scalar + i = nditer([arange(6).reshape(2, 3), np.int32(2)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 6) + assert_equal(i.shape, (2, 3)) + # 2D with 1D + i = nditer([arange(6).reshape(2, 3), arange(3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 6) + assert_equal(i.shape, (2, 3)) + i = nditer([arange(2).reshape(2, 1), arange(3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 6) + assert_equal(i.shape, (2, 3)) + # 2D with 2D + i = nditer([arange(2).reshape(2, 1), arange(3).reshape(1, 3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 6) + assert_equal(i.shape, (2, 3)) + + # 3D with scalar + i = nditer([np.int32(2), arange(24).reshape(4, 2, 3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + # 3D with 1D + i = nditer([arange(3), arange(24).reshape(4, 2, 3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + i = nditer([arange(3), arange(8).reshape(4, 2, 1)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + # 3D with 2D + i = nditer([arange(6).reshape(2, 3), arange(24).reshape(4, 2, 3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + i = nditer([arange(2).reshape(2, 1), arange(24).reshape(4, 2, 3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + i = nditer([arange(3).reshape(1, 3), arange(8).reshape(4, 2, 1)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + # 3D with 3D + i = nditer([arange(2).reshape(1, 2, 1), arange(3).reshape(1, 1, 3), + arange(4).reshape(4, 1, 1)], + ['multi_index'], [['readonly']]*3) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + i = nditer([arange(6).reshape(1, 2, 3), arange(4).reshape(4, 1, 1)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + i = nditer([arange(24).reshape(4, 2, 3), arange(12).reshape(4, 1, 3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + +def test_iter_itershape(): + # Check that allocated outputs work with a specified shape + a = np.arange(6, dtype='i2').reshape(2, 3) + i = nditer([a, None], [], [['readonly'], ['writeonly', 'allocate']], + op_axes=[[0, 1, None], None], + itershape=(-1, -1, 4)) + assert_equal(i.operands[1].shape, (2, 3, 4)) + assert_equal(i.operands[1].strides, (24, 8, 2)) + + i = nditer([a.T, None], [], [['readonly'], ['writeonly', 'allocate']], + op_axes=[[0, 1, None], None], + itershape=(-1, -1, 4)) + assert_equal(i.operands[1].shape, (3, 2, 4)) + assert_equal(i.operands[1].strides, (8, 24, 2)) + + i = nditer([a.T, None], [], [['readonly'], ['writeonly', 'allocate']], + order='F', + op_axes=[[0, 1, None], None], + itershape=(-1, -1, 4)) + assert_equal(i.operands[1].shape, (3, 2, 4)) + assert_equal(i.operands[1].strides, (2, 6, 12)) + + # If we specify 1 in the itershape, it shouldn't allow broadcasting + # of that dimension to a bigger value + assert_raises(ValueError, nditer, [a, None], [], + [['readonly'], ['writeonly', 'allocate']], + op_axes=[[0, 1, None], None], + itershape=(-1, 1, 4)) + # Test bug that for no op_axes but itershape, they are NULLed correctly + i = np.nditer([np.ones(2), None, None], itershape=(2,)) + +def test_iter_broadcasting_errors(): + # Check that errors are thrown for bad broadcasting shapes + + # 1D with 1D + assert_raises(ValueError, nditer, [arange(2), arange(3)], + [], [['readonly']]*2) + # 2D with 1D + assert_raises(ValueError, nditer, + [arange(6).reshape(2, 3), arange(2)], + [], [['readonly']]*2) + # 2D with 2D + assert_raises(ValueError, nditer, + [arange(6).reshape(2, 3), arange(9).reshape(3, 3)], + [], [['readonly']]*2) + assert_raises(ValueError, nditer, + [arange(6).reshape(2, 3), arange(4).reshape(2, 2)], + [], [['readonly']]*2) + # 3D with 3D + assert_raises(ValueError, nditer, + [arange(36).reshape(3, 3, 4), arange(24).reshape(2, 3, 4)], + [], [['readonly']]*2) + assert_raises(ValueError, nditer, + [arange(8).reshape(2, 4, 1), arange(24).reshape(2, 3, 4)], + [], [['readonly']]*2) + + # Verify that the error message mentions the right shapes + try: + nditer([arange(2).reshape(1, 2, 1), + arange(3).reshape(1, 3), + arange(6).reshape(2, 3)], + [], + [['readonly'], ['readonly'], ['writeonly', 'no_broadcast']]) + raise AssertionError('Should have raised a broadcast error') + except ValueError as e: + msg = str(e) + # The message should contain the shape of the 3rd operand + assert_(msg.find('(2,3)') >= 0, + 'Message "%s" doesn\'t contain operand shape (2,3)' % msg) + # The message should contain the broadcast shape + assert_(msg.find('(1,2,3)') >= 0, + 'Message "%s" doesn\'t contain broadcast shape (1,2,3)' % msg) + + try: + nditer([arange(6).reshape(2, 3), arange(2)], + [], + [['readonly'], ['readonly']], + op_axes=[[0, 1], [0, np.newaxis]], + itershape=(4, 3)) + raise AssertionError('Should have raised a broadcast error') + except ValueError as e: + msg = str(e) + # The message should contain "shape->remappedshape" for each operand + assert_(msg.find('(2,3)->(2,3)') >= 0, + 'Message "%s" doesn\'t contain operand shape (2,3)->(2,3)' % msg) + assert_(msg.find('(2,)->(2,newaxis)') >= 0, + ('Message "%s" doesn\'t contain remapped operand shape' + + '(2,)->(2,newaxis)') % msg) + # The message should contain the itershape parameter + assert_(msg.find('(4,3)') >= 0, + 'Message "%s" doesn\'t contain itershape parameter (4,3)' % msg) + + try: + nditer([np.zeros((2, 1, 1)), np.zeros((2,))], + [], + [['writeonly', 'no_broadcast'], ['readonly']]) + raise AssertionError('Should have raised a broadcast error') + except ValueError as e: + msg = str(e) + # The message should contain the shape of the bad operand + assert_(msg.find('(2,1,1)') >= 0, + 'Message "%s" doesn\'t contain operand shape (2,1,1)' % msg) + # The message should contain the broadcast shape + assert_(msg.find('(2,1,2)') >= 0, + 'Message "%s" doesn\'t contain the broadcast shape (2,1,2)' % msg) + +def test_iter_flags_errors(): + # Check that bad combinations of flags produce errors + + a = arange(6) + + # Not enough operands + assert_raises(ValueError, nditer, [], [], []) + # Too many operands + assert_raises(ValueError, nditer, [a]*100, [], [['readonly']]*100) + # Bad global flag + assert_raises(ValueError, nditer, [a], ['bad flag'], [['readonly']]) + # Bad op flag + assert_raises(ValueError, nditer, [a], [], [['readonly', 'bad flag']]) + # Bad order parameter + assert_raises(ValueError, nditer, [a], [], [['readonly']], order='G') + # Bad casting parameter + assert_raises(ValueError, nditer, [a], [], [['readonly']], casting='noon') + # op_flags must match ops + assert_raises(ValueError, nditer, [a]*3, [], [['readonly']]*2) + # Cannot track both a C and an F index + assert_raises(ValueError, nditer, a, + ['c_index', 'f_index'], [['readonly']]) + # Inner iteration and multi-indices/indices are incompatible + assert_raises(ValueError, nditer, a, + ['external_loop', 'multi_index'], [['readonly']]) + assert_raises(ValueError, nditer, a, + ['external_loop', 'c_index'], [['readonly']]) + assert_raises(ValueError, nditer, a, + ['external_loop', 'f_index'], [['readonly']]) + # Must specify exactly one of readwrite/readonly/writeonly per operand + assert_raises(ValueError, nditer, a, [], [[]]) + assert_raises(ValueError, nditer, a, [], [['readonly', 'writeonly']]) + assert_raises(ValueError, nditer, a, [], [['readonly', 'readwrite']]) + assert_raises(ValueError, nditer, a, [], [['writeonly', 'readwrite']]) + assert_raises(ValueError, nditer, a, + [], [['readonly', 'writeonly', 'readwrite']]) + # Python scalars are always readonly + assert_raises(TypeError, nditer, 1.5, [], [['writeonly']]) + assert_raises(TypeError, nditer, 1.5, [], [['readwrite']]) + # Array scalars are always readonly + assert_raises(TypeError, nditer, np.int32(1), [], [['writeonly']]) + assert_raises(TypeError, nditer, np.int32(1), [], [['readwrite']]) + # Check readonly array + a.flags.writeable = False + assert_raises(ValueError, nditer, a, [], [['writeonly']]) + assert_raises(ValueError, nditer, a, [], [['readwrite']]) + a.flags.writeable = True + # Multi-indices available only with the multi_index flag + i = nditer(arange(6), [], [['readonly']]) + assert_raises(ValueError, lambda i:i.multi_index, i) + # Index available only with an index flag + assert_raises(ValueError, lambda i:i.index, i) + # GotoCoords and GotoIndex incompatible with buffering or no_inner + + def assign_multi_index(i): + i.multi_index = (0,) + + def assign_index(i): + i.index = 0 + + def assign_iterindex(i): + i.iterindex = 0 + + def assign_iterrange(i): + i.iterrange = (0, 1) + i = nditer(arange(6), ['external_loop']) + assert_raises(ValueError, assign_multi_index, i) + assert_raises(ValueError, assign_index, i) + assert_raises(ValueError, assign_iterindex, i) + assert_raises(ValueError, assign_iterrange, i) + i = nditer(arange(6), ['buffered']) + assert_raises(ValueError, assign_multi_index, i) + assert_raises(ValueError, assign_index, i) + assert_raises(ValueError, assign_iterrange, i) + # Can't iterate if size is zero + assert_raises(ValueError, nditer, np.array([])) + +def test_iter_slice(): + a, b, c = np.arange(3), np.arange(3), np.arange(3.) + i = nditer([a, b, c], [], ['readwrite']) + with i: + i[0:2] = (3, 3) + assert_equal(a, [3, 1, 2]) + assert_equal(b, [3, 1, 2]) + assert_equal(c, [0, 1, 2]) + i[1] = 12 + assert_equal(i[0:2], [3, 12]) + +def test_iter_assign_mapping(): + a = np.arange(24, dtype='f8').reshape(2, 3, 4).T + it = np.nditer(a, [], [['readwrite', 'updateifcopy']], + casting='same_kind', op_dtypes=[np.dtype('f4')]) + with it: + it.operands[0][...] = 3 + it.operands[0][...] = 14 + assert_equal(a, 14) + it = np.nditer(a, [], [['readwrite', 'updateifcopy']], + casting='same_kind', op_dtypes=[np.dtype('f4')]) + with it: + x = it.operands[0][-1:1] + x[...] = 14 + it.operands[0][...] = -1234 + assert_equal(a, -1234) + # check for no warnings on dealloc + x = None + it = None + +def test_iter_nbo_align_contig(): + # Check that byte order, alignment, and contig changes work + + # Byte order change by requesting a specific dtype + a = np.arange(6, dtype='f4') + au = a.byteswap().newbyteorder() + assert_(a.dtype.byteorder != au.dtype.byteorder) + i = nditer(au, [], [['readwrite', 'updateifcopy']], + casting='equiv', + op_dtypes=[np.dtype('f4')]) + with i: + # context manager triggers UPDATEIFCOPY on i at exit + assert_equal(i.dtypes[0].byteorder, a.dtype.byteorder) + assert_equal(i.operands[0].dtype.byteorder, a.dtype.byteorder) + assert_equal(i.operands[0], a) + i.operands[0][:] = 2 + assert_equal(au, [2]*6) + del i # should not raise a warning + # Byte order change by requesting NBO + a = np.arange(6, dtype='f4') + au = a.byteswap().newbyteorder() + assert_(a.dtype.byteorder != au.dtype.byteorder) + with nditer(au, [], [['readwrite', 'updateifcopy', 'nbo']], + casting='equiv') as i: + # context manager triggers UPDATEIFCOPY on i at exit + assert_equal(i.dtypes[0].byteorder, a.dtype.byteorder) + assert_equal(i.operands[0].dtype.byteorder, a.dtype.byteorder) + assert_equal(i.operands[0], a) + i.operands[0][:] = 12345 + i.operands[0][:] = 2 + assert_equal(au, [2]*6) + + # Unaligned input + a = np.zeros((6*4+1,), dtype='i1')[1:] + a.dtype = 'f4' + a[:] = np.arange(6, dtype='f4') + assert_(not a.flags.aligned) + # Without 'aligned', shouldn't copy + i = nditer(a, [], [['readonly']]) + assert_(not i.operands[0].flags.aligned) + assert_equal(i.operands[0], a) + # With 'aligned', should make a copy + with nditer(a, [], [['readwrite', 'updateifcopy', 'aligned']]) as i: + assert_(i.operands[0].flags.aligned) + # context manager triggers UPDATEIFCOPY on i at exit + assert_equal(i.operands[0], a) + i.operands[0][:] = 3 + assert_equal(a, [3]*6) + + # Discontiguous input + a = arange(12) + # If it is contiguous, shouldn't copy + i = nditer(a[:6], [], [['readonly']]) + assert_(i.operands[0].flags.contiguous) + assert_equal(i.operands[0], a[:6]) + # If it isn't contiguous, should buffer + i = nditer(a[::2], ['buffered', 'external_loop'], + [['readonly', 'contig']], + buffersize=10) + assert_(i[0].flags.contiguous) + assert_equal(i[0], a[::2]) + +def test_iter_array_cast(): + # Check that arrays are cast as requested + + # No cast 'f4' -> 'f4' + a = np.arange(6, dtype='f4').reshape(2, 3) + i = nditer(a, [], [['readwrite']], op_dtypes=[np.dtype('f4')]) + with i: + assert_equal(i.operands[0], a) + assert_equal(i.operands[0].dtype, np.dtype('f4')) + + # Byte-order cast ' '>f4' + a = np.arange(6, dtype='f4')]) as i: + assert_equal(i.operands[0], a) + assert_equal(i.operands[0].dtype, np.dtype('>f4')) + + # Safe case 'f4' -> 'f8' + a = np.arange(24, dtype='f4').reshape(2, 3, 4).swapaxes(1, 2) + i = nditer(a, [], [['readonly', 'copy']], + casting='safe', + op_dtypes=[np.dtype('f8')]) + assert_equal(i.operands[0], a) + assert_equal(i.operands[0].dtype, np.dtype('f8')) + # The memory layout of the temporary should match a (a is (48,4,16)) + # except negative strides get flipped to positive strides. + assert_equal(i.operands[0].strides, (96, 8, 32)) + a = a[::-1,:, ::-1] + i = nditer(a, [], [['readonly', 'copy']], + casting='safe', + op_dtypes=[np.dtype('f8')]) + assert_equal(i.operands[0], a) + assert_equal(i.operands[0].dtype, np.dtype('f8')) + assert_equal(i.operands[0].strides, (96, 8, 32)) + + # Same-kind cast 'f8' -> 'f4' -> 'f8' + a = np.arange(24, dtype='f8').reshape(2, 3, 4).T + with nditer(a, [], + [['readwrite', 'updateifcopy']], + casting='same_kind', + op_dtypes=[np.dtype('f4')]) as i: + assert_equal(i.operands[0], a) + assert_equal(i.operands[0].dtype, np.dtype('f4')) + assert_equal(i.operands[0].strides, (4, 16, 48)) + # Check that WRITEBACKIFCOPY is activated at exit + i.operands[0][2, 1, 1] = -12.5 + assert_(a[2, 1, 1] != -12.5) + assert_equal(a[2, 1, 1], -12.5) + + a = np.arange(6, dtype='i4')[::-2] + with nditer(a, [], + [['writeonly', 'updateifcopy']], + casting='unsafe', + op_dtypes=[np.dtype('f4')]) as i: + assert_equal(i.operands[0].dtype, np.dtype('f4')) + # Even though the stride was negative in 'a', it + # becomes positive in the temporary + assert_equal(i.operands[0].strides, (4,)) + i.operands[0][:] = [1, 2, 3] + assert_equal(a, [1, 2, 3]) + +def test_iter_array_cast_errors(): + # Check that invalid casts are caught + + # Need to enable copying for casts to occur + assert_raises(TypeError, nditer, arange(2, dtype='f4'), [], + [['readonly']], op_dtypes=[np.dtype('f8')]) + # Also need to allow casting for casts to occur + assert_raises(TypeError, nditer, arange(2, dtype='f4'), [], + [['readonly', 'copy']], casting='no', + op_dtypes=[np.dtype('f8')]) + assert_raises(TypeError, nditer, arange(2, dtype='f4'), [], + [['readonly', 'copy']], casting='equiv', + op_dtypes=[np.dtype('f8')]) + assert_raises(TypeError, nditer, arange(2, dtype='f8'), [], + [['writeonly', 'updateifcopy']], + casting='no', + op_dtypes=[np.dtype('f4')]) + assert_raises(TypeError, nditer, arange(2, dtype='f8'), [], + [['writeonly', 'updateifcopy']], + casting='equiv', + op_dtypes=[np.dtype('f4')]) + # ' '>f4' should not work with casting='no' + assert_raises(TypeError, nditer, arange(2, dtype='f4')]) + # 'f4' -> 'f8' is a safe cast, but 'f8' -> 'f4' isn't + assert_raises(TypeError, nditer, arange(2, dtype='f4'), [], + [['readwrite', 'updateifcopy']], + casting='safe', + op_dtypes=[np.dtype('f8')]) + assert_raises(TypeError, nditer, arange(2, dtype='f8'), [], + [['readwrite', 'updateifcopy']], + casting='safe', + op_dtypes=[np.dtype('f4')]) + # 'f4' -> 'i4' is neither a safe nor a same-kind cast + assert_raises(TypeError, nditer, arange(2, dtype='f4'), [], + [['readonly', 'copy']], + casting='same_kind', + op_dtypes=[np.dtype('i4')]) + assert_raises(TypeError, nditer, arange(2, dtype='i4'), [], + [['writeonly', 'updateifcopy']], + casting='same_kind', + op_dtypes=[np.dtype('f4')]) + +def test_iter_scalar_cast(): + # Check that scalars are cast as requested + + # No cast 'f4' -> 'f4' + i = nditer(np.float32(2.5), [], [['readonly']], + op_dtypes=[np.dtype('f4')]) + assert_equal(i.dtypes[0], np.dtype('f4')) + assert_equal(i.value.dtype, np.dtype('f4')) + assert_equal(i.value, 2.5) + # Safe cast 'f4' -> 'f8' + i = nditer(np.float32(2.5), [], + [['readonly', 'copy']], + casting='safe', + op_dtypes=[np.dtype('f8')]) + assert_equal(i.dtypes[0], np.dtype('f8')) + assert_equal(i.value.dtype, np.dtype('f8')) + assert_equal(i.value, 2.5) + # Same-kind cast 'f8' -> 'f4' + i = nditer(np.float64(2.5), [], + [['readonly', 'copy']], + casting='same_kind', + op_dtypes=[np.dtype('f4')]) + assert_equal(i.dtypes[0], np.dtype('f4')) + assert_equal(i.value.dtype, np.dtype('f4')) + assert_equal(i.value, 2.5) + # Unsafe cast 'f8' -> 'i4' + i = nditer(np.float64(3.0), [], + [['readonly', 'copy']], + casting='unsafe', + op_dtypes=[np.dtype('i4')]) + assert_equal(i.dtypes[0], np.dtype('i4')) + assert_equal(i.value.dtype, np.dtype('i4')) + assert_equal(i.value, 3) + # Readonly scalars may be cast even without setting COPY or BUFFERED + i = nditer(3, [], [['readonly']], op_dtypes=[np.dtype('f8')]) + assert_equal(i[0].dtype, np.dtype('f8')) + assert_equal(i[0], 3.) + +def test_iter_scalar_cast_errors(): + # Check that invalid casts are caught + + # Need to allow copying/buffering for write casts of scalars to occur + assert_raises(TypeError, nditer, np.float32(2), [], + [['readwrite']], op_dtypes=[np.dtype('f8')]) + assert_raises(TypeError, nditer, 2.5, [], + [['readwrite']], op_dtypes=[np.dtype('f4')]) + # 'f8' -> 'f4' isn't a safe cast if the value would overflow + assert_raises(TypeError, nditer, np.float64(1e60), [], + [['readonly']], + casting='safe', + op_dtypes=[np.dtype('f4')]) + # 'f4' -> 'i4' is neither a safe nor a same-kind cast + assert_raises(TypeError, nditer, np.float32(2), [], + [['readonly']], + casting='same_kind', + op_dtypes=[np.dtype('i4')]) + +def test_iter_object_arrays_basic(): + # Check that object arrays work + + obj = {'a':3,'b':'d'} + a = np.array([[1, 2, 3], None, obj, None], dtype='O') + if HAS_REFCOUNT: + rc = sys.getrefcount(obj) + + # Need to allow references for object arrays + assert_raises(TypeError, nditer, a) + if HAS_REFCOUNT: + assert_equal(sys.getrefcount(obj), rc) + + i = nditer(a, ['refs_ok'], ['readonly']) + vals = [x_[()] for x_ in i] + assert_equal(np.array(vals, dtype='O'), a) + vals, i, x = [None]*3 + if HAS_REFCOUNT: + assert_equal(sys.getrefcount(obj), rc) + + i = nditer(a.reshape(2, 2).T, ['refs_ok', 'buffered'], + ['readonly'], order='C') + assert_(i.iterationneedsapi) + vals = [x_[()] for x_ in i] + assert_equal(np.array(vals, dtype='O'), a.reshape(2, 2).ravel(order='F')) + vals, i, x = [None]*3 + if HAS_REFCOUNT: + assert_equal(sys.getrefcount(obj), rc) + + i = nditer(a.reshape(2, 2).T, ['refs_ok', 'buffered'], + ['readwrite'], order='C') + with i: + for x in i: + x[...] = None + vals, i, x = [None]*3 + if HAS_REFCOUNT: + assert_(sys.getrefcount(obj) == rc-1) + assert_equal(a, np.array([None]*4, dtype='O')) + +def test_iter_object_arrays_conversions(): + # Conversions to/from objects + a = np.arange(6, dtype='O') + i = nditer(a, ['refs_ok', 'buffered'], ['readwrite'], + casting='unsafe', op_dtypes='i4') + with i: + for x in i: + x[...] += 1 + assert_equal(a, np.arange(6)+1) + + a = np.arange(6, dtype='i4') + i = nditer(a, ['refs_ok', 'buffered'], ['readwrite'], + casting='unsafe', op_dtypes='O') + with i: + for x in i: + x[...] += 1 + assert_equal(a, np.arange(6)+1) + + # Non-contiguous object array + a = np.zeros((6,), dtype=[('p', 'i1'), ('a', 'O')]) + a = a['a'] + a[:] = np.arange(6) + i = nditer(a, ['refs_ok', 'buffered'], ['readwrite'], + casting='unsafe', op_dtypes='i4') + with i: + for x in i: + x[...] += 1 + assert_equal(a, np.arange(6)+1) + + #Non-contiguous value array + a = np.zeros((6,), dtype=[('p', 'i1'), ('a', 'i4')]) + a = a['a'] + a[:] = np.arange(6) + 98172488 + i = nditer(a, ['refs_ok', 'buffered'], ['readwrite'], + casting='unsafe', op_dtypes='O') + with i: + ob = i[0][()] + if HAS_REFCOUNT: + rc = sys.getrefcount(ob) + for x in i: + x[...] += 1 + if HAS_REFCOUNT: + assert_(sys.getrefcount(ob) == rc-1) + assert_equal(a, np.arange(6)+98172489) + +def test_iter_common_dtype(): + # Check that the iterator finds a common data type correctly + + i = nditer([array([3], dtype='f4'), array([0], dtype='f8')], + ['common_dtype'], + [['readonly', 'copy']]*2, + casting='safe') + assert_equal(i.dtypes[0], np.dtype('f8')) + assert_equal(i.dtypes[1], np.dtype('f8')) + i = nditer([array([3], dtype='i4'), array([0], dtype='f4')], + ['common_dtype'], + [['readonly', 'copy']]*2, + casting='safe') + assert_equal(i.dtypes[0], np.dtype('f8')) + assert_equal(i.dtypes[1], np.dtype('f8')) + i = nditer([array([3], dtype='f4'), array(0, dtype='f8')], + ['common_dtype'], + [['readonly', 'copy']]*2, + casting='same_kind') + assert_equal(i.dtypes[0], np.dtype('f4')) + assert_equal(i.dtypes[1], np.dtype('f4')) + i = nditer([array([3], dtype='u4'), array(0, dtype='i4')], + ['common_dtype'], + [['readonly', 'copy']]*2, + casting='safe') + assert_equal(i.dtypes[0], np.dtype('u4')) + assert_equal(i.dtypes[1], np.dtype('u4')) + i = nditer([array([3], dtype='u4'), array(-12, dtype='i4')], + ['common_dtype'], + [['readonly', 'copy']]*2, + casting='safe') + assert_equal(i.dtypes[0], np.dtype('i8')) + assert_equal(i.dtypes[1], np.dtype('i8')) + i = nditer([array([3], dtype='u4'), array(-12, dtype='i4'), + array([2j], dtype='c8'), array([9], dtype='f8')], + ['common_dtype'], + [['readonly', 'copy']]*4, + casting='safe') + assert_equal(i.dtypes[0], np.dtype('c16')) + assert_equal(i.dtypes[1], np.dtype('c16')) + assert_equal(i.dtypes[2], np.dtype('c16')) + assert_equal(i.dtypes[3], np.dtype('c16')) + assert_equal(i.value, (3, -12, 2j, 9)) + + # When allocating outputs, other outputs aren't factored in + i = nditer([array([3], dtype='i4'), None, array([2j], dtype='c16')], [], + [['readonly', 'copy'], + ['writeonly', 'allocate'], + ['writeonly']], + casting='safe') + assert_equal(i.dtypes[0], np.dtype('i4')) + assert_equal(i.dtypes[1], np.dtype('i4')) + assert_equal(i.dtypes[2], np.dtype('c16')) + # But, if common data types are requested, they are + i = nditer([array([3], dtype='i4'), None, array([2j], dtype='c16')], + ['common_dtype'], + [['readonly', 'copy'], + ['writeonly', 'allocate'], + ['writeonly']], + casting='safe') + assert_equal(i.dtypes[0], np.dtype('c16')) + assert_equal(i.dtypes[1], np.dtype('c16')) + assert_equal(i.dtypes[2], np.dtype('c16')) + +def test_iter_copy_if_overlap(): + # Ensure the iterator makes copies on read/write overlap, if requested + + # Copy not needed, 1 op + for flag in ['readonly', 'writeonly', 'readwrite']: + a = arange(10) + i = nditer([a], ['copy_if_overlap'], [[flag]]) + with i: + assert_(i.operands[0] is a) + + # Copy needed, 2 ops, read-write overlap + x = arange(10) + a = x[1:] + b = x[:-1] + with nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']]) as i: + assert_(not np.shares_memory(*i.operands)) + + # Copy not needed with elementwise, 2 ops, exactly same arrays + x = arange(10) + a = x + b = x + i = nditer([a, b], ['copy_if_overlap'], [['readonly', 'overlap_assume_elementwise'], + ['readwrite', 'overlap_assume_elementwise']]) + with i: + assert_(i.operands[0] is a and i.operands[1] is b) + with nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']]) as i: + assert_(i.operands[0] is a and not np.shares_memory(i.operands[1], b)) + + # Copy not needed, 2 ops, no overlap + x = arange(10) + a = x[::2] + b = x[1::2] + i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['writeonly']]) + assert_(i.operands[0] is a and i.operands[1] is b) + + # Copy needed, 2 ops, read-write overlap + x = arange(4, dtype=np.int8) + a = x[3:] + b = x.view(np.int32)[:1] + with nditer([a, b], ['copy_if_overlap'], [['readonly'], ['writeonly']]) as i: + assert_(not np.shares_memory(*i.operands)) + + # Copy needed, 3 ops, read-write overlap + for flag in ['writeonly', 'readwrite']: + x = np.ones([10, 10]) + a = x + b = x.T + c = x + with nditer([a, b, c], ['copy_if_overlap'], + [['readonly'], ['readonly'], [flag]]) as i: + a2, b2, c2 = i.operands + assert_(not np.shares_memory(a2, c2)) + assert_(not np.shares_memory(b2, c2)) + + # Copy not needed, 3 ops, read-only overlap + x = np.ones([10, 10]) + a = x + b = x.T + c = x + i = nditer([a, b, c], ['copy_if_overlap'], + [['readonly'], ['readonly'], ['readonly']]) + a2, b2, c2 = i.operands + assert_(a is a2) + assert_(b is b2) + assert_(c is c2) + + # Copy not needed, 3 ops, read-only overlap + x = np.ones([10, 10]) + a = x + b = np.ones([10, 10]) + c = x.T + i = nditer([a, b, c], ['copy_if_overlap'], + [['readonly'], ['writeonly'], ['readonly']]) + a2, b2, c2 = i.operands + assert_(a is a2) + assert_(b is b2) + assert_(c is c2) + + # Copy not needed, 3 ops, write-only overlap + x = np.arange(7) + a = x[:3] + b = x[3:6] + c = x[4:7] + i = nditer([a, b, c], ['copy_if_overlap'], + [['readonly'], ['writeonly'], ['writeonly']]) + a2, b2, c2 = i.operands + assert_(a is a2) + assert_(b is b2) + assert_(c is c2) + +def test_iter_op_axes(): + # Check that custom axes work + + # Reverse the axes + a = arange(6).reshape(2, 3) + i = nditer([a, a.T], [], [['readonly']]*2, op_axes=[[0, 1], [1, 0]]) + assert_(all([x == y for (x, y) in i])) + a = arange(24).reshape(2, 3, 4) + i = nditer([a.T, a], [], [['readonly']]*2, op_axes=[[2, 1, 0], None]) + assert_(all([x == y for (x, y) in i])) + + # Broadcast 1D to any dimension + a = arange(1, 31).reshape(2, 3, 5) + b = arange(1, 3) + i = nditer([a, b], [], [['readonly']]*2, op_axes=[None, [0, -1, -1]]) + assert_equal([x*y for (x, y) in i], (a*b.reshape(2, 1, 1)).ravel()) + b = arange(1, 4) + i = nditer([a, b], [], [['readonly']]*2, op_axes=[None, [-1, 0, -1]]) + assert_equal([x*y for (x, y) in i], (a*b.reshape(1, 3, 1)).ravel()) + b = arange(1, 6) + i = nditer([a, b], [], [['readonly']]*2, + op_axes=[None, [np.newaxis, np.newaxis, 0]]) + assert_equal([x*y for (x, y) in i], (a*b.reshape(1, 1, 5)).ravel()) + + # Inner product-style broadcasting + a = arange(24).reshape(2, 3, 4) + b = arange(40).reshape(5, 2, 4) + i = nditer([a, b], ['multi_index'], [['readonly']]*2, + op_axes=[[0, 1, -1, -1], [-1, -1, 0, 1]]) + assert_equal(i.shape, (2, 3, 5, 2)) + + # Matrix product-style broadcasting + a = arange(12).reshape(3, 4) + b = arange(20).reshape(4, 5) + i = nditer([a, b], ['multi_index'], [['readonly']]*2, + op_axes=[[0, -1], [-1, 1]]) + assert_equal(i.shape, (3, 5)) + +def test_iter_op_axes_errors(): + # Check that custom axes throws errors for bad inputs + + # Wrong number of items in op_axes + a = arange(6).reshape(2, 3) + assert_raises(ValueError, nditer, [a, a], [], [['readonly']]*2, + op_axes=[[0], [1], [0]]) + # Out of bounds items in op_axes + assert_raises(ValueError, nditer, [a, a], [], [['readonly']]*2, + op_axes=[[2, 1], [0, 1]]) + assert_raises(ValueError, nditer, [a, a], [], [['readonly']]*2, + op_axes=[[0, 1], [2, -1]]) + # Duplicate items in op_axes + assert_raises(ValueError, nditer, [a, a], [], [['readonly']]*2, + op_axes=[[0, 0], [0, 1]]) + assert_raises(ValueError, nditer, [a, a], [], [['readonly']]*2, + op_axes=[[0, 1], [1, 1]]) + + # Different sized arrays in op_axes + assert_raises(ValueError, nditer, [a, a], [], [['readonly']]*2, + op_axes=[[0, 1], [0, 1, 0]]) + + # Non-broadcastable dimensions in the result + assert_raises(ValueError, nditer, [a, a], [], [['readonly']]*2, + op_axes=[[0, 1], [1, 0]]) + +def test_iter_copy(): + # Check that copying the iterator works correctly + a = arange(24).reshape(2, 3, 4) + + # Simple iterator + i = nditer(a) + j = i.copy() + assert_equal([x[()] for x in i], [x[()] for x in j]) + + i.iterindex = 3 + j = i.copy() + assert_equal([x[()] for x in i], [x[()] for x in j]) + + # Buffered iterator + i = nditer(a, ['buffered', 'ranged'], order='F', buffersize=3) + j = i.copy() + assert_equal([x[()] for x in i], [x[()] for x in j]) + + i.iterindex = 3 + j = i.copy() + assert_equal([x[()] for x in i], [x[()] for x in j]) + + i.iterrange = (3, 9) + j = i.copy() + assert_equal([x[()] for x in i], [x[()] for x in j]) + + i.iterrange = (2, 18) + next(i) + next(i) + j = i.copy() + assert_equal([x[()] for x in i], [x[()] for x in j]) + + # Casting iterator + with nditer(a, ['buffered'], order='F', casting='unsafe', + op_dtypes='f8', buffersize=5) as i: + j = i.copy() + assert_equal([x[()] for x in j], a.ravel(order='F')) + + a = arange(24, dtype=' unstructured (any to object), and many other + # casts, which cause this to require all steps in the casting machinery + # one level down as well as the iterator copy (which uses NpyAuxData clone) + in_dtype = np.dtype([("a", np.dtype("i,")), + ("b", np.dtype(">i,d,S17,>d,(3)f,O,i1"))]) + out_dtype = np.dtype([("a", np.dtype("O")), + ("b", np.dtype(">i,>i,S17,>d,>U3,(3)d,i1,O"))]) + arr = np.ones(1000, dtype=in_dtype) + + it = np.nditer((arr,), ["buffered", "external_loop", "refs_ok"], + op_dtypes=[out_dtype], casting="unsafe") + it_copy = it.copy() + + res1 = next(it) + del it + res2 = next(it_copy) + del it_copy + + expected = arr["a"].astype(out_dtype["a"]) + assert_array_equal(res1["a"], expected) + assert_array_equal(res2["a"], expected) + + for field in in_dtype["b"].names: + # Note that the .base avoids the subarray field + expected = arr["b"][field].astype(out_dtype["b"][field].base) + assert_array_equal(res1["b"][field], expected) + assert_array_equal(res2["b"][field], expected) + + +def test_iter_allocate_output_simple(): + # Check that the iterator will properly allocate outputs + + # Simple case + a = arange(6) + i = nditer([a, None], [], [['readonly'], ['writeonly', 'allocate']], + op_dtypes=[None, np.dtype('f4')]) + assert_equal(i.operands[1].shape, a.shape) + assert_equal(i.operands[1].dtype, np.dtype('f4')) + +def test_iter_allocate_output_buffered_readwrite(): + # Allocated output with buffering + delay_bufalloc + + a = arange(6) + i = nditer([a, None], ['buffered', 'delay_bufalloc'], + [['readonly'], ['allocate', 'readwrite']]) + with i: + i.operands[1][:] = 1 + i.reset() + for x in i: + x[1][...] += x[0][...] + assert_equal(i.operands[1], a+1) + +def test_iter_allocate_output_itorder(): + # The allocated output should match the iteration order + + # C-order input, best iteration order + a = arange(6, dtype='i4').reshape(2, 3) + i = nditer([a, None], [], [['readonly'], ['writeonly', 'allocate']], + op_dtypes=[None, np.dtype('f4')]) + assert_equal(i.operands[1].shape, a.shape) + assert_equal(i.operands[1].strides, a.strides) + assert_equal(i.operands[1].dtype, np.dtype('f4')) + # F-order input, best iteration order + a = arange(24, dtype='i4').reshape(2, 3, 4).T + i = nditer([a, None], [], [['readonly'], ['writeonly', 'allocate']], + op_dtypes=[None, np.dtype('f4')]) + assert_equal(i.operands[1].shape, a.shape) + assert_equal(i.operands[1].strides, a.strides) + assert_equal(i.operands[1].dtype, np.dtype('f4')) + # Non-contiguous input, C iteration order + a = arange(24, dtype='i4').reshape(2, 3, 4).swapaxes(0, 1) + i = nditer([a, None], [], + [['readonly'], ['writeonly', 'allocate']], + order='C', + op_dtypes=[None, np.dtype('f4')]) + assert_equal(i.operands[1].shape, a.shape) + assert_equal(i.operands[1].strides, (32, 16, 4)) + assert_equal(i.operands[1].dtype, np.dtype('f4')) + +def test_iter_allocate_output_opaxes(): + # Specifying op_axes should work + + a = arange(24, dtype='i4').reshape(2, 3, 4) + i = nditer([None, a], [], [['writeonly', 'allocate'], ['readonly']], + op_dtypes=[np.dtype('u4'), None], + op_axes=[[1, 2, 0], None]) + assert_equal(i.operands[0].shape, (4, 2, 3)) + assert_equal(i.operands[0].strides, (4, 48, 16)) + assert_equal(i.operands[0].dtype, np.dtype('u4')) + +def test_iter_allocate_output_types_promotion(): + # Check type promotion of automatic outputs + + i = nditer([array([3], dtype='f4'), array([0], dtype='f8'), None], [], + [['readonly']]*2+[['writeonly', 'allocate']]) + assert_equal(i.dtypes[2], np.dtype('f8')) + i = nditer([array([3], dtype='i4'), array([0], dtype='f4'), None], [], + [['readonly']]*2+[['writeonly', 'allocate']]) + assert_equal(i.dtypes[2], np.dtype('f8')) + i = nditer([array([3], dtype='f4'), array(0, dtype='f8'), None], [], + [['readonly']]*2+[['writeonly', 'allocate']]) + assert_equal(i.dtypes[2], np.dtype('f4')) + i = nditer([array([3], dtype='u4'), array(0, dtype='i4'), None], [], + [['readonly']]*2+[['writeonly', 'allocate']]) + assert_equal(i.dtypes[2], np.dtype('u4')) + i = nditer([array([3], dtype='u4'), array(-12, dtype='i4'), None], [], + [['readonly']]*2+[['writeonly', 'allocate']]) + assert_equal(i.dtypes[2], np.dtype('i8')) + +def test_iter_allocate_output_types_byte_order(): + # Verify the rules for byte order changes + + # When there's just one input, the output type exactly matches + a = array([3], dtype='u4').newbyteorder() + i = nditer([a, None], [], + [['readonly'], ['writeonly', 'allocate']]) + assert_equal(i.dtypes[0], i.dtypes[1]) + # With two or more inputs, the output type is in native byte order + i = nditer([a, a, None], [], + [['readonly'], ['readonly'], ['writeonly', 'allocate']]) + assert_(i.dtypes[0] != i.dtypes[2]) + assert_equal(i.dtypes[0].newbyteorder('='), i.dtypes[2]) + +def test_iter_allocate_output_types_scalar(): + # If the inputs are all scalars, the output should be a scalar + + i = nditer([None, 1, 2.3, np.float32(12), np.complex128(3)], [], + [['writeonly', 'allocate']] + [['readonly']]*4) + assert_equal(i.operands[0].dtype, np.dtype('complex128')) + assert_equal(i.operands[0].ndim, 0) + +def test_iter_allocate_output_subtype(): + # Make sure that the subtype with priority wins + class MyNDArray(np.ndarray): + __array_priority__ = 15 + + # subclass vs ndarray + a = np.array([[1, 2], [3, 4]]).view(MyNDArray) + b = np.arange(4).reshape(2, 2).T + i = nditer([a, b, None], [], + [['readonly'], ['readonly'], ['writeonly', 'allocate']]) + assert_equal(type(a), type(i.operands[2])) + assert_(type(b) is not type(i.operands[2])) + assert_equal(i.operands[2].shape, (2, 2)) + + # If subtypes are disabled, we should get back an ndarray. + i = nditer([a, b, None], [], + [['readonly'], ['readonly'], + ['writeonly', 'allocate', 'no_subtype']]) + assert_equal(type(b), type(i.operands[2])) + assert_(type(a) is not type(i.operands[2])) + assert_equal(i.operands[2].shape, (2, 2)) + +def test_iter_allocate_output_errors(): + # Check that the iterator will throw errors for bad output allocations + + # Need an input if no output data type is specified + a = arange(6) + assert_raises(TypeError, nditer, [a, None], [], + [['writeonly'], ['writeonly', 'allocate']]) + # Allocated output should be flagged for writing + assert_raises(ValueError, nditer, [a, None], [], + [['readonly'], ['allocate', 'readonly']]) + # Allocated output can't have buffering without delayed bufalloc + assert_raises(ValueError, nditer, [a, None], ['buffered'], + ['allocate', 'readwrite']) + # Must specify at least one input + assert_raises(ValueError, nditer, [None, None], [], + [['writeonly', 'allocate'], + ['writeonly', 'allocate']], + op_dtypes=[np.dtype('f4'), np.dtype('f4')]) + # If using op_axes, must specify all the axes + a = arange(24, dtype='i4').reshape(2, 3, 4) + assert_raises(ValueError, nditer, [a, None], [], + [['readonly'], ['writeonly', 'allocate']], + op_dtypes=[None, np.dtype('f4')], + op_axes=[None, [0, np.newaxis, 1]]) + # If using op_axes, the axes must be within bounds + assert_raises(ValueError, nditer, [a, None], [], + [['readonly'], ['writeonly', 'allocate']], + op_dtypes=[None, np.dtype('f4')], + op_axes=[None, [0, 3, 1]]) + # If using op_axes, there can't be duplicates + assert_raises(ValueError, nditer, [a, None], [], + [['readonly'], ['writeonly', 'allocate']], + op_dtypes=[None, np.dtype('f4')], + op_axes=[None, [0, 2, 1, 0]]) + # Not all axes may be specified if a reduction. If there is a hole + # in op_axes, this is an error. + a = arange(24, dtype='i4').reshape(2, 3, 4) + assert_raises(ValueError, nditer, [a, None], ["reduce_ok"], + [['readonly'], ['readwrite', 'allocate']], + op_dtypes=[None, np.dtype('f4')], + op_axes=[None, [0, np.newaxis, 2]]) + +def test_iter_remove_axis(): + a = arange(24).reshape(2, 3, 4) + + i = nditer(a, ['multi_index']) + i.remove_axis(1) + assert_equal([x for x in i], a[:, 0,:].ravel()) + + a = a[::-1,:,:] + i = nditer(a, ['multi_index']) + i.remove_axis(0) + assert_equal([x for x in i], a[0,:,:].ravel()) + +def test_iter_remove_multi_index_inner_loop(): + # Check that removing multi-index support works + + a = arange(24).reshape(2, 3, 4) + + i = nditer(a, ['multi_index']) + assert_equal(i.ndim, 3) + assert_equal(i.shape, (2, 3, 4)) + assert_equal(i.itviews[0].shape, (2, 3, 4)) + + # Removing the multi-index tracking causes all dimensions to coalesce + before = [x for x in i] + i.remove_multi_index() + after = [x for x in i] + + assert_equal(before, after) + assert_equal(i.ndim, 1) + assert_raises(ValueError, lambda i:i.shape, i) + assert_equal(i.itviews[0].shape, (24,)) + + # Removing the inner loop means there's just one iteration + i.reset() + assert_equal(i.itersize, 24) + assert_equal(i[0].shape, tuple()) + i.enable_external_loop() + assert_equal(i.itersize, 24) + assert_equal(i[0].shape, (24,)) + assert_equal(i.value, arange(24)) + +def test_iter_iterindex(): + # Make sure iterindex works + + buffersize = 5 + a = arange(24).reshape(4, 3, 2) + for flags in ([], ['buffered']): + i = nditer(a, flags, buffersize=buffersize) + assert_equal(iter_iterindices(i), list(range(24))) + i.iterindex = 2 + assert_equal(iter_iterindices(i), list(range(2, 24))) + + i = nditer(a, flags, order='F', buffersize=buffersize) + assert_equal(iter_iterindices(i), list(range(24))) + i.iterindex = 5 + assert_equal(iter_iterindices(i), list(range(5, 24))) + + i = nditer(a[::-1], flags, order='F', buffersize=buffersize) + assert_equal(iter_iterindices(i), list(range(24))) + i.iterindex = 9 + assert_equal(iter_iterindices(i), list(range(9, 24))) + + i = nditer(a[::-1, ::-1], flags, order='C', buffersize=buffersize) + assert_equal(iter_iterindices(i), list(range(24))) + i.iterindex = 13 + assert_equal(iter_iterindices(i), list(range(13, 24))) + + i = nditer(a[::1, ::-1], flags, buffersize=buffersize) + assert_equal(iter_iterindices(i), list(range(24))) + i.iterindex = 23 + assert_equal(iter_iterindices(i), list(range(23, 24))) + i.reset() + i.iterindex = 2 + assert_equal(iter_iterindices(i), list(range(2, 24))) + +def test_iter_iterrange(): + # Make sure getting and resetting the iterrange works + + buffersize = 5 + a = arange(24, dtype='i4').reshape(4, 3, 2) + a_fort = a.ravel(order='F') + + i = nditer(a, ['ranged'], ['readonly'], order='F', + buffersize=buffersize) + assert_equal(i.iterrange, (0, 24)) + assert_equal([x[()] for x in i], a_fort) + for r in [(0, 24), (1, 2), (3, 24), (5, 5), (0, 20), (23, 24)]: + i.iterrange = r + assert_equal(i.iterrange, r) + assert_equal([x[()] for x in i], a_fort[r[0]:r[1]]) + + i = nditer(a, ['ranged', 'buffered'], ['readonly'], order='F', + op_dtypes='f8', buffersize=buffersize) + assert_equal(i.iterrange, (0, 24)) + assert_equal([x[()] for x in i], a_fort) + for r in [(0, 24), (1, 2), (3, 24), (5, 5), (0, 20), (23, 24)]: + i.iterrange = r + assert_equal(i.iterrange, r) + assert_equal([x[()] for x in i], a_fort[r[0]:r[1]]) + + def get_array(i): + val = np.array([], dtype='f8') + for x in i: + val = np.concatenate((val, x)) + return val + + i = nditer(a, ['ranged', 'buffered', 'external_loop'], + ['readonly'], order='F', + op_dtypes='f8', buffersize=buffersize) + assert_equal(i.iterrange, (0, 24)) + assert_equal(get_array(i), a_fort) + for r in [(0, 24), (1, 2), (3, 24), (5, 5), (0, 20), (23, 24)]: + i.iterrange = r + assert_equal(i.iterrange, r) + assert_equal(get_array(i), a_fort[r[0]:r[1]]) + +def test_iter_buffering(): + # Test buffering with several buffer sizes and types + arrays = [] + # F-order swapped array + arrays.append(np.arange(24, + dtype='c16').reshape(2, 3, 4).T.newbyteorder().byteswap()) + # Contiguous 1-dimensional array + arrays.append(np.arange(10, dtype='f4')) + # Unaligned array + a = np.zeros((4*16+1,), dtype='i1')[1:] + a.dtype = 'i4' + a[:] = np.arange(16, dtype='i4') + arrays.append(a) + # 4-D F-order array + arrays.append(np.arange(120, dtype='i4').reshape(5, 3, 2, 4).T) + for a in arrays: + for buffersize in (1, 2, 3, 5, 8, 11, 16, 1024): + vals = [] + i = nditer(a, ['buffered', 'external_loop'], + [['readonly', 'nbo', 'aligned']], + order='C', + casting='equiv', + buffersize=buffersize) + while not i.finished: + assert_(i[0].size <= buffersize) + vals.append(i[0].copy()) + i.iternext() + assert_equal(np.concatenate(vals), a.ravel(order='C')) + +def test_iter_write_buffering(): + # Test that buffering of writes is working + + # F-order swapped array + a = np.arange(24).reshape(2, 3, 4).T.newbyteorder().byteswap() + i = nditer(a, ['buffered'], + [['readwrite', 'nbo', 'aligned']], + casting='equiv', + order='C', + buffersize=16) + x = 0 + with i: + while not i.finished: + i[0] = x + x += 1 + i.iternext() + assert_equal(a.ravel(order='C'), np.arange(24)) + +def test_iter_buffering_delayed_alloc(): + # Test that delaying buffer allocation works + + a = np.arange(6) + b = np.arange(1, dtype='f4') + i = nditer([a, b], ['buffered', 'delay_bufalloc', 'multi_index', 'reduce_ok'], + ['readwrite'], + casting='unsafe', + op_dtypes='f4') + assert_(i.has_delayed_bufalloc) + assert_raises(ValueError, lambda i:i.multi_index, i) + assert_raises(ValueError, lambda i:i[0], i) + assert_raises(ValueError, lambda i:i[0:2], i) + + def assign_iter(i): + i[0] = 0 + assert_raises(ValueError, assign_iter, i) + + i.reset() + assert_(not i.has_delayed_bufalloc) + assert_equal(i.multi_index, (0,)) + with i: + assert_equal(i[0], 0) + i[1] = 1 + assert_equal(i[0:2], [0, 1]) + assert_equal([[x[0][()], x[1][()]] for x in i], list(zip(range(6), [1]*6))) + +def test_iter_buffered_cast_simple(): + # Test that buffering can handle a simple cast + + a = np.arange(10, dtype='f4') + i = nditer(a, ['buffered', 'external_loop'], + [['readwrite', 'nbo', 'aligned']], + casting='same_kind', + op_dtypes=[np.dtype('f8')], + buffersize=3) + with i: + for v in i: + v[...] *= 2 + + assert_equal(a, 2*np.arange(10, dtype='f4')) + +def test_iter_buffered_cast_byteswapped(): + # Test that buffering can handle a cast which requires swap->cast->swap + + a = np.arange(10, dtype='f4').newbyteorder().byteswap() + i = nditer(a, ['buffered', 'external_loop'], + [['readwrite', 'nbo', 'aligned']], + casting='same_kind', + op_dtypes=[np.dtype('f8').newbyteorder()], + buffersize=3) + with i: + for v in i: + v[...] *= 2 + + assert_equal(a, 2*np.arange(10, dtype='f4')) + + with suppress_warnings() as sup: + sup.filter(np.ComplexWarning) + + a = np.arange(10, dtype='f8').newbyteorder().byteswap() + i = nditer(a, ['buffered', 'external_loop'], + [['readwrite', 'nbo', 'aligned']], + casting='unsafe', + op_dtypes=[np.dtype('c8').newbyteorder()], + buffersize=3) + with i: + for v in i: + v[...] *= 2 + + assert_equal(a, 2*np.arange(10, dtype='f8')) + +def test_iter_buffered_cast_byteswapped_complex(): + # Test that buffering can handle a cast which requires swap->cast->copy + + a = np.arange(10, dtype='c8').newbyteorder().byteswap() + a += 2j + i = nditer(a, ['buffered', 'external_loop'], + [['readwrite', 'nbo', 'aligned']], + casting='same_kind', + op_dtypes=[np.dtype('c16')], + buffersize=3) + with i: + for v in i: + v[...] *= 2 + assert_equal(a, 2*np.arange(10, dtype='c8') + 4j) + + a = np.arange(10, dtype='c8') + a += 2j + i = nditer(a, ['buffered', 'external_loop'], + [['readwrite', 'nbo', 'aligned']], + casting='same_kind', + op_dtypes=[np.dtype('c16').newbyteorder()], + buffersize=3) + with i: + for v in i: + v[...] *= 2 + assert_equal(a, 2*np.arange(10, dtype='c8') + 4j) + + a = np.arange(10, dtype=np.clongdouble).newbyteorder().byteswap() + a += 2j + i = nditer(a, ['buffered', 'external_loop'], + [['readwrite', 'nbo', 'aligned']], + casting='same_kind', + op_dtypes=[np.dtype('c16')], + buffersize=3) + with i: + for v in i: + v[...] *= 2 + assert_equal(a, 2*np.arange(10, dtype=np.clongdouble) + 4j) + + a = np.arange(10, dtype=np.longdouble).newbyteorder().byteswap() + i = nditer(a, ['buffered', 'external_loop'], + [['readwrite', 'nbo', 'aligned']], + casting='same_kind', + op_dtypes=[np.dtype('f4')], + buffersize=7) + with i: + for v in i: + v[...] *= 2 + assert_equal(a, 2*np.arange(10, dtype=np.longdouble)) + +def test_iter_buffered_cast_structured_type(): + # Tests buffering of structured types + + # simple -> struct type (duplicates the value) + sdt = [('a', 'f4'), ('b', 'i8'), ('c', 'c8', (2, 3)), ('d', 'O')] + a = np.arange(3, dtype='f4') + 0.5 + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt) + vals = [np.array(x) for x in i] + assert_equal(vals[0]['a'], 0.5) + assert_equal(vals[0]['b'], 0) + assert_equal(vals[0]['c'], [[(0.5)]*3]*2) + assert_equal(vals[0]['d'], 0.5) + assert_equal(vals[1]['a'], 1.5) + assert_equal(vals[1]['b'], 1) + assert_equal(vals[1]['c'], [[(1.5)]*3]*2) + assert_equal(vals[1]['d'], 1.5) + assert_equal(vals[0].dtype, np.dtype(sdt)) + + # object -> struct type + sdt = [('a', 'f4'), ('b', 'i8'), ('c', 'c8', (2, 3)), ('d', 'O')] + a = np.zeros((3,), dtype='O') + a[0] = (0.5, 0.5, [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 0.5) + a[1] = (1.5, 1.5, [[1.5, 1.5, 1.5], [1.5, 1.5, 1.5]], 1.5) + a[2] = (2.5, 2.5, [[2.5, 2.5, 2.5], [2.5, 2.5, 2.5]], 2.5) + if HAS_REFCOUNT: + rc = sys.getrefcount(a[0]) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt) + vals = [x.copy() for x in i] + assert_equal(vals[0]['a'], 0.5) + assert_equal(vals[0]['b'], 0) + assert_equal(vals[0]['c'], [[(0.5)]*3]*2) + assert_equal(vals[0]['d'], 0.5) + assert_equal(vals[1]['a'], 1.5) + assert_equal(vals[1]['b'], 1) + assert_equal(vals[1]['c'], [[(1.5)]*3]*2) + assert_equal(vals[1]['d'], 1.5) + assert_equal(vals[0].dtype, np.dtype(sdt)) + vals, i, x = [None]*3 + if HAS_REFCOUNT: + assert_equal(sys.getrefcount(a[0]), rc) + + # single-field struct type -> simple + sdt = [('a', 'f4')] + a = np.array([(5.5,), (8,)], dtype=sdt) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes='i4') + assert_equal([x_[()] for x_ in i], [5, 8]) + + # make sure multi-field struct type -> simple doesn't work + sdt = [('a', 'f4'), ('b', 'i8'), ('d', 'O')] + a = np.array([(5.5, 7, 'test'), (8, 10, 11)], dtype=sdt) + assert_raises(TypeError, lambda: ( + nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes='i4'))) + + # struct type -> struct type (field-wise copy) + sdt1 = [('a', 'f4'), ('b', 'i8'), ('d', 'O')] + sdt2 = [('d', 'u2'), ('a', 'O'), ('b', 'f8')] + a = np.array([(1, 2, 3), (4, 5, 6)], dtype=sdt1) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + assert_equal([np.array(x_) for x_ in i], + [np.array((1, 2, 3), dtype=sdt2), + np.array((4, 5, 6), dtype=sdt2)]) + + +def test_iter_buffered_cast_structured_type_failure_with_cleanup(): + # make sure struct type -> struct type with different + # number of fields fails + sdt1 = [('a', 'f4'), ('b', 'i8'), ('d', 'O')] + sdt2 = [('b', 'O'), ('a', 'f8')] + a = np.array([(1, 2, 3), (4, 5, 6)], dtype=sdt1) + + for intent in ["readwrite", "readonly", "writeonly"]: + # If the following assert fails, the place where the error is raised + # within nditer may change. That is fine, but it may make sense for + # a new (hard to design) test to replace it. The `simple_arr` is + # designed to require a multi-step cast (due to having fields). + assert np.can_cast(a.dtype, sdt2, casting="unsafe") + simple_arr = np.array([1, 2], dtype="i,i") # requires clean up + with pytest.raises(ValueError): + nditer((simple_arr, a), ['buffered', 'refs_ok'], [intent, intent], + casting='unsafe', op_dtypes=["f,f", sdt2]) + + +def test_buffered_cast_error_paths(): + with pytest.raises(ValueError): + # The input is cast into an `S3` buffer + np.nditer((np.array("a", dtype="S1"),), op_dtypes=["i"], + casting="unsafe", flags=["buffered"]) + + # The `M8[ns]` is cast into the `S3` output + it = np.nditer((np.array(1, dtype="i"),), op_dtypes=["S1"], + op_flags=["writeonly"], casting="unsafe", flags=["buffered"]) + with pytest.raises(ValueError): + with it: + buf = next(it) + buf[...] = "a" # cannot be converted to int. + +@pytest.mark.skipif(not HAS_REFCOUNT, reason="PyPy seems to not hit this.") +def test_buffered_cast_error_paths_unraisable(): + # The following gives an unraisable error. Pytest sometimes captures that + # (depending python and/or pytest version). So with Python>=3.8 this can + # probably be cleaned out in the future to check for + # pytest.PytestUnraisableExceptionWarning: + code = textwrap.dedent(""" + import numpy as np + + it = np.nditer((np.array(1, dtype="i"),), op_dtypes=["S1"], + op_flags=["writeonly"], casting="unsafe", flags=["buffered"]) + buf = next(it) + buf[...] = "a" + del buf, it # Flushing only happens during deallocate right now. + """) + res = subprocess.check_output([sys.executable, "-c", code], + stderr=subprocess.STDOUT, text=True) + assert "ValueError" in res + + +def test_iter_buffered_cast_subarray(): + # Tests buffering of subarrays + + # one element -> many (copies it to all) + sdt1 = [('a', 'f4')] + sdt2 = [('a', 'f8', (3, 2, 2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + for x, count in zip(i, list(range(6))): + assert_(np.all(x['a'] == count)) + + # one element -> many -> back (copies it to all) + sdt1 = [('a', 'O', (1, 1))] + sdt2 = [('a', 'O', (3, 2, 2))] + a = np.zeros((6,), dtype=sdt1) + a['a'][:, 0, 0] = np.arange(6) + i = nditer(a, ['buffered', 'refs_ok'], ['readwrite'], + casting='unsafe', + op_dtypes=sdt2) + with i: + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_(np.all(x['a'] == count)) + x['a'][0] += 2 + count += 1 + assert_equal(a['a'], np.arange(6).reshape(6, 1, 1)+2) + + # many -> one element -> back (copies just element 0) + sdt1 = [('a', 'O', (3, 2, 2))] + sdt2 = [('a', 'O', (1,))] + a = np.zeros((6,), dtype=sdt1) + a['a'][:, 0, 0, 0] = np.arange(6) + i = nditer(a, ['buffered', 'refs_ok'], ['readwrite'], + casting='unsafe', + op_dtypes=sdt2) + with i: + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], count) + x['a'] += 2 + count += 1 + assert_equal(a['a'], np.arange(6).reshape(6, 1, 1, 1)*np.ones((1, 3, 2, 2))+2) + + # many -> one element -> back (copies just element 0) + sdt1 = [('a', 'f8', (3, 2, 2))] + sdt2 = [('a', 'O', (1,))] + a = np.zeros((6,), dtype=sdt1) + a['a'][:, 0, 0, 0] = np.arange(6) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], count) + count += 1 + + # many -> one element (copies just element 0) + sdt1 = [('a', 'O', (3, 2, 2))] + sdt2 = [('a', 'f4', (1,))] + a = np.zeros((6,), dtype=sdt1) + a['a'][:, 0, 0, 0] = np.arange(6) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], count) + count += 1 + + # many -> matching shape (straightforward copy) + sdt1 = [('a', 'O', (3, 2, 2))] + sdt2 = [('a', 'f4', (3, 2, 2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*3*2*2).reshape(6, 3, 2, 2) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], a[count]['a']) + count += 1 + + # vector -> smaller vector (truncates) + sdt1 = [('a', 'f8', (6,))] + sdt2 = [('a', 'f4', (2,))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*6).reshape(6, 6) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], a[count]['a'][:2]) + count += 1 + + # vector -> bigger vector (pads with zeros) + sdt1 = [('a', 'f8', (2,))] + sdt2 = [('a', 'f4', (6,))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*2).reshape(6, 2) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'][:2], a[count]['a']) + assert_equal(x['a'][2:], [0, 0, 0, 0]) + count += 1 + + # vector -> matrix (broadcasts) + sdt1 = [('a', 'f8', (2,))] + sdt2 = [('a', 'f4', (2, 2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*2).reshape(6, 2) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'][0], a[count]['a']) + assert_equal(x['a'][1], a[count]['a']) + count += 1 + + # vector -> matrix (broadcasts and zero-pads) + sdt1 = [('a', 'f8', (2, 1))] + sdt2 = [('a', 'f4', (3, 2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*2).reshape(6, 2, 1) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'][:2, 0], a[count]['a'][:, 0]) + assert_equal(x['a'][:2, 1], a[count]['a'][:, 0]) + assert_equal(x['a'][2,:], [0, 0]) + count += 1 + + # matrix -> matrix (truncates and zero-pads) + sdt1 = [('a', 'f8', (2, 3))] + sdt2 = [('a', 'f4', (3, 2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*2*3).reshape(6, 2, 3) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'][:2, 0], a[count]['a'][:, 0]) + assert_equal(x['a'][:2, 1], a[count]['a'][:, 1]) + assert_equal(x['a'][2,:], [0, 0]) + count += 1 + +def test_iter_buffering_badwriteback(): + # Writing back from a buffer cannot combine elements + + # a needs write buffering, but had a broadcast dimension + a = np.arange(6).reshape(2, 3, 1) + b = np.arange(12).reshape(2, 3, 2) + assert_raises(ValueError, nditer, [a, b], + ['buffered', 'external_loop'], + [['readwrite'], ['writeonly']], + order='C') + + # But if a is readonly, it's fine + nditer([a, b], ['buffered', 'external_loop'], + [['readonly'], ['writeonly']], + order='C') + + # If a has just one element, it's fine too (constant 0 stride, a reduction) + a = np.arange(1).reshape(1, 1, 1) + nditer([a, b], ['buffered', 'external_loop', 'reduce_ok'], + [['readwrite'], ['writeonly']], + order='C') + + # check that it fails on other dimensions too + a = np.arange(6).reshape(1, 3, 2) + assert_raises(ValueError, nditer, [a, b], + ['buffered', 'external_loop'], + [['readwrite'], ['writeonly']], + order='C') + a = np.arange(4).reshape(2, 1, 2) + assert_raises(ValueError, nditer, [a, b], + ['buffered', 'external_loop'], + [['readwrite'], ['writeonly']], + order='C') + +def test_iter_buffering_string(): + # Safe casting disallows shrinking strings + a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_) + assert_equal(a.dtype, np.dtype('S4')) + assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'], + op_dtypes='S2') + i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6') + assert_equal(i[0], b'abc') + assert_equal(i[0].dtype, np.dtype('S6')) + + a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode_) + assert_equal(a.dtype, np.dtype('U4')) + assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'], + op_dtypes='U2') + i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6') + assert_equal(i[0], u'abc') + assert_equal(i[0].dtype, np.dtype('U6')) + +def test_iter_buffering_growinner(): + # Test that the inner loop grows when no buffering is needed + a = np.arange(30) + i = nditer(a, ['buffered', 'growinner', 'external_loop'], + buffersize=5) + # Should end up with just one inner loop here + assert_equal(i[0].size, a.size) + + +@pytest.mark.slow +def test_iter_buffered_reduce_reuse(): + # large enough array for all views, including negative strides. + a = np.arange(2*3**5)[3**5:3**5+1] + flags = ['buffered', 'delay_bufalloc', 'multi_index', 'reduce_ok', 'refs_ok'] + op_flags = [('readonly',), ('readwrite', 'allocate')] + op_axes_list = [[(0, 1, 2), (0, 1, -1)], [(0, 1, 2), (0, -1, -1)]] + # wrong dtype to force buffering + op_dtypes = [float, a.dtype] + + def get_params(): + for xs in range(-3**2, 3**2 + 1): + for ys in range(xs, 3**2 + 1): + for op_axes in op_axes_list: + # last stride is reduced and because of that not + # important for this test, as it is the inner stride. + strides = (xs * a.itemsize, ys * a.itemsize, a.itemsize) + arr = np.lib.stride_tricks.as_strided(a, (3, 3, 3), strides) + + for skip in [0, 1]: + yield arr, op_axes, skip + + for arr, op_axes, skip in get_params(): + nditer2 = np.nditer([arr.copy(), None], + op_axes=op_axes, flags=flags, op_flags=op_flags, + op_dtypes=op_dtypes) + with nditer2: + nditer2.operands[-1][...] = 0 + nditer2.reset() + nditer2.iterindex = skip + + for (a2_in, b2_in) in nditer2: + b2_in += a2_in.astype(np.int_) + + comp_res = nditer2.operands[-1] + + for bufsize in range(0, 3**3): + nditer1 = np.nditer([arr, None], + op_axes=op_axes, flags=flags, op_flags=op_flags, + buffersize=bufsize, op_dtypes=op_dtypes) + with nditer1: + nditer1.operands[-1][...] = 0 + nditer1.reset() + nditer1.iterindex = skip + + for (a1_in, b1_in) in nditer1: + b1_in += a1_in.astype(np.int_) + + res = nditer1.operands[-1] + assert_array_equal(res, comp_res) + + +def test_iter_no_broadcast(): + # Test that the no_broadcast flag works + a = np.arange(24).reshape(2, 3, 4) + b = np.arange(6).reshape(2, 3, 1) + c = np.arange(12).reshape(3, 4) + + nditer([a, b, c], [], + [['readonly', 'no_broadcast'], + ['readonly'], ['readonly']]) + assert_raises(ValueError, nditer, [a, b, c], [], + [['readonly'], ['readonly', 'no_broadcast'], ['readonly']]) + assert_raises(ValueError, nditer, [a, b, c], [], + [['readonly'], ['readonly'], ['readonly', 'no_broadcast']]) + + +class TestIterNested: + + def test_basic(self): + # Test nested iteration basic usage + a = arange(12).reshape(2, 3, 2) + + i, j = np.nested_iters(a, [[0], [1, 2]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11]]) + + i, j = np.nested_iters(a, [[0, 1], [2]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11]]) + + i, j = np.nested_iters(a, [[0, 2], [1]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 2, 4], [1, 3, 5], [6, 8, 10], [7, 9, 11]]) + + def test_reorder(self): + # Test nested iteration basic usage + a = arange(12).reshape(2, 3, 2) + + # In 'K' order (default), it gets reordered + i, j = np.nested_iters(a, [[0], [2, 1]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11]]) + + i, j = np.nested_iters(a, [[1, 0], [2]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11]]) + + i, j = np.nested_iters(a, [[2, 0], [1]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 2, 4], [1, 3, 5], [6, 8, 10], [7, 9, 11]]) + + # In 'C' order, it doesn't + i, j = np.nested_iters(a, [[0], [2, 1]], order='C') + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 2, 4, 1, 3, 5], [6, 8, 10, 7, 9, 11]]) + + i, j = np.nested_iters(a, [[1, 0], [2]], order='C') + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1], [6, 7], [2, 3], [8, 9], [4, 5], [10, 11]]) + + i, j = np.nested_iters(a, [[2, 0], [1]], order='C') + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 2, 4], [6, 8, 10], [1, 3, 5], [7, 9, 11]]) + + def test_flip_axes(self): + # Test nested iteration with negative axes + a = arange(12).reshape(2, 3, 2)[::-1, ::-1, ::-1] + + # In 'K' order (default), the axes all get flipped + i, j = np.nested_iters(a, [[0], [1, 2]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11]]) + + i, j = np.nested_iters(a, [[0, 1], [2]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11]]) + + i, j = np.nested_iters(a, [[0, 2], [1]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 2, 4], [1, 3, 5], [6, 8, 10], [7, 9, 11]]) + + # In 'C' order, flipping axes is disabled + i, j = np.nested_iters(a, [[0], [1, 2]], order='C') + vals = [list(j) for _ in i] + assert_equal(vals, [[11, 10, 9, 8, 7, 6], [5, 4, 3, 2, 1, 0]]) + + i, j = np.nested_iters(a, [[0, 1], [2]], order='C') + vals = [list(j) for _ in i] + assert_equal(vals, [[11, 10], [9, 8], [7, 6], [5, 4], [3, 2], [1, 0]]) + + i, j = np.nested_iters(a, [[0, 2], [1]], order='C') + vals = [list(j) for _ in i] + assert_equal(vals, [[11, 9, 7], [10, 8, 6], [5, 3, 1], [4, 2, 0]]) + + def test_broadcast(self): + # Test nested iteration with broadcasting + a = arange(2).reshape(2, 1) + b = arange(3).reshape(1, 3) + + i, j = np.nested_iters([a, b], [[0], [1]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[[0, 0], [0, 1], [0, 2]], [[1, 0], [1, 1], [1, 2]]]) + + i, j = np.nested_iters([a, b], [[1], [0]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[[0, 0], [1, 0]], [[0, 1], [1, 1]], [[0, 2], [1, 2]]]) + + def test_dtype_copy(self): + # Test nested iteration with a copy to change dtype + + # copy + a = arange(6, dtype='i4').reshape(2, 3) + i, j = np.nested_iters(a, [[0], [1]], + op_flags=['readonly', 'copy'], + op_dtypes='f8') + assert_equal(j[0].dtype, np.dtype('f8')) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1, 2], [3, 4, 5]]) + vals = None + + # writebackifcopy - using context manager + a = arange(6, dtype='f4').reshape(2, 3) + i, j = np.nested_iters(a, [[0], [1]], + op_flags=['readwrite', 'updateifcopy'], + casting='same_kind', + op_dtypes='f8') + with i, j: + assert_equal(j[0].dtype, np.dtype('f8')) + for x in i: + for y in j: + y[...] += 1 + assert_equal(a, [[0, 1, 2], [3, 4, 5]]) + assert_equal(a, [[1, 2, 3], [4, 5, 6]]) + + # writebackifcopy - using close() + a = arange(6, dtype='f4').reshape(2, 3) + i, j = np.nested_iters(a, [[0], [1]], + op_flags=['readwrite', 'updateifcopy'], + casting='same_kind', + op_dtypes='f8') + assert_equal(j[0].dtype, np.dtype('f8')) + for x in i: + for y in j: + y[...] += 1 + assert_equal(a, [[0, 1, 2], [3, 4, 5]]) + i.close() + j.close() + assert_equal(a, [[1, 2, 3], [4, 5, 6]]) + + def test_dtype_buffered(self): + # Test nested iteration with buffering to change dtype + + a = arange(6, dtype='f4').reshape(2, 3) + i, j = np.nested_iters(a, [[0], [1]], + flags=['buffered'], + op_flags=['readwrite'], + casting='same_kind', + op_dtypes='f8') + assert_equal(j[0].dtype, np.dtype('f8')) + for x in i: + for y in j: + y[...] += 1 + assert_equal(a, [[1, 2, 3], [4, 5, 6]]) + + def test_0d(self): + a = np.arange(12).reshape(2, 3, 2) + i, j = np.nested_iters(a, [[], [1, 0, 2]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]]) + + i, j = np.nested_iters(a, [[1, 0, 2], []]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11]]) + + i, j, k = np.nested_iters(a, [[2, 0], [], [1]]) + vals = [] + for x in i: + for y in j: + vals.append([z for z in k]) + assert_equal(vals, [[0, 2, 4], [1, 3, 5], [6, 8, 10], [7, 9, 11]]) + + def test_iter_nested_iters_dtype_buffered(self): + # Test nested iteration with buffering to change dtype + + a = arange(6, dtype='f4').reshape(2, 3) + i, j = np.nested_iters(a, [[0], [1]], + flags=['buffered'], + op_flags=['readwrite'], + casting='same_kind', + op_dtypes='f8') + with i, j: + assert_equal(j[0].dtype, np.dtype('f8')) + for x in i: + for y in j: + y[...] += 1 + assert_equal(a, [[1, 2, 3], [4, 5, 6]]) + +def test_iter_reduction_error(): + + a = np.arange(6) + assert_raises(ValueError, nditer, [a, None], [], + [['readonly'], ['readwrite', 'allocate']], + op_axes=[[0], [-1]]) + + a = np.arange(6).reshape(2, 3) + assert_raises(ValueError, nditer, [a, None], ['external_loop'], + [['readonly'], ['readwrite', 'allocate']], + op_axes=[[0, 1], [-1, -1]]) + +def test_iter_reduction(): + # Test doing reductions with the iterator + + a = np.arange(6) + i = nditer([a, None], ['reduce_ok'], + [['readonly'], ['readwrite', 'allocate']], + op_axes=[[0], [-1]]) + # Need to initialize the output operand to the addition unit + with i: + i.operands[1][...] = 0 + # Do the reduction + for x, y in i: + y[...] += x + # Since no axes were specified, should have allocated a scalar + assert_equal(i.operands[1].ndim, 0) + assert_equal(i.operands[1], np.sum(a)) + + a = np.arange(6).reshape(2, 3) + i = nditer([a, None], ['reduce_ok', 'external_loop'], + [['readonly'], ['readwrite', 'allocate']], + op_axes=[[0, 1], [-1, -1]]) + # Need to initialize the output operand to the addition unit + with i: + i.operands[1][...] = 0 + # Reduction shape/strides for the output + assert_equal(i[1].shape, (6,)) + assert_equal(i[1].strides, (0,)) + # Do the reduction + for x, y in i: + # Use a for loop instead of ``y[...] += x`` + # (equivalent to ``y[...] = y[...].copy() + x``), + # because y has zero strides we use for the reduction + for j in range(len(y)): + y[j] += x[j] + # Since no axes were specified, should have allocated a scalar + assert_equal(i.operands[1].ndim, 0) + assert_equal(i.operands[1], np.sum(a)) + + # This is a tricky reduction case for the buffering double loop + # to handle + a = np.ones((2, 3, 5)) + it1 = nditer([a, None], ['reduce_ok', 'external_loop'], + [['readonly'], ['readwrite', 'allocate']], + op_axes=[None, [0, -1, 1]]) + it2 = nditer([a, None], ['reduce_ok', 'external_loop', + 'buffered', 'delay_bufalloc'], + [['readonly'], ['readwrite', 'allocate']], + op_axes=[None, [0, -1, 1]], buffersize=10) + with it1, it2: + it1.operands[1].fill(0) + it2.operands[1].fill(0) + it2.reset() + for x in it1: + x[1][...] += x[0] + for x in it2: + x[1][...] += x[0] + assert_equal(it1.operands[1], it2.operands[1]) + assert_equal(it2.operands[1].sum(), a.size) + +def test_iter_buffering_reduction(): + # Test doing buffered reductions with the iterator + + a = np.arange(6) + b = np.array(0., dtype='f8').byteswap().newbyteorder() + i = nditer([a, b], ['reduce_ok', 'buffered'], + [['readonly'], ['readwrite', 'nbo']], + op_axes=[[0], [-1]]) + with i: + assert_equal(i[1].dtype, np.dtype('f8')) + assert_(i[1].dtype != b.dtype) + # Do the reduction + for x, y in i: + y[...] += x + # Since no axes were specified, should have allocated a scalar + assert_equal(b, np.sum(a)) + + a = np.arange(6).reshape(2, 3) + b = np.array([0, 0], dtype='f8').byteswap().newbyteorder() + i = nditer([a, b], ['reduce_ok', 'external_loop', 'buffered'], + [['readonly'], ['readwrite', 'nbo']], + op_axes=[[0, 1], [0, -1]]) + # Reduction shape/strides for the output + with i: + assert_equal(i[1].shape, (3,)) + assert_equal(i[1].strides, (0,)) + # Do the reduction + for x, y in i: + # Use a for loop instead of ``y[...] += x`` + # (equivalent to ``y[...] = y[...].copy() + x``), + # because y has zero strides we use for the reduction + for j in range(len(y)): + y[j] += x[j] + assert_equal(b, np.sum(a, axis=1)) + + # Iterator inner double loop was wrong on this one + p = np.arange(2) + 1 + it = np.nditer([p, None], + ['delay_bufalloc', 'reduce_ok', 'buffered', 'external_loop'], + [['readonly'], ['readwrite', 'allocate']], + op_axes=[[-1, 0], [-1, -1]], + itershape=(2, 2)) + with it: + it.operands[1].fill(0) + it.reset() + assert_equal(it[0], [1, 2, 1, 2]) + + # Iterator inner loop should take argument contiguity into account + x = np.ones((7, 13, 8), np.int8)[4:6,1:11:6,1:5].transpose(1, 2, 0) + x[...] = np.arange(x.size).reshape(x.shape) + y_base = np.arange(4*4, dtype=np.int8).reshape(4, 4) + y_base_copy = y_base.copy() + y = y_base[::2,:,None] + + it = np.nditer([y, x], + ['buffered', 'external_loop', 'reduce_ok'], + [['readwrite'], ['readonly']]) + with it: + for a, b in it: + a.fill(2) + + assert_equal(y_base[1::2], y_base_copy[1::2]) + assert_equal(y_base[::2], 2) + +def test_iter_buffering_reduction_reuse_reduce_loops(): + # There was a bug triggering reuse of the reduce loop inappropriately, + # which caused processing to happen in unnecessarily small chunks + # and overran the buffer. + + a = np.zeros((2, 7)) + b = np.zeros((1, 7)) + it = np.nditer([a, b], flags=['reduce_ok', 'external_loop', 'buffered'], + op_flags=[['readonly'], ['readwrite']], + buffersize=5) + + with it: + bufsizes = [x.shape[0] for x, y in it] + assert_equal(bufsizes, [5, 2, 5, 2]) + assert_equal(sum(bufsizes), a.size) + +def test_iter_writemasked_badinput(): + a = np.zeros((2, 3)) + b = np.zeros((3,)) + m = np.array([[True, True, False], [False, True, False]]) + m2 = np.array([True, True, False]) + m3 = np.array([0, 1, 1], dtype='u1') + mbad1 = np.array([0, 1, 1], dtype='i1') + mbad2 = np.array([0, 1, 1], dtype='f4') + + # Need an 'arraymask' if any operand is 'writemasked' + assert_raises(ValueError, nditer, [a, m], [], + [['readwrite', 'writemasked'], ['readonly']]) + + # A 'writemasked' operand must not be readonly + assert_raises(ValueError, nditer, [a, m], [], + [['readonly', 'writemasked'], ['readonly', 'arraymask']]) + + # 'writemasked' and 'arraymask' may not be used together + assert_raises(ValueError, nditer, [a, m], [], + [['readonly'], ['readwrite', 'arraymask', 'writemasked']]) + + # 'arraymask' may only be specified once + assert_raises(ValueError, nditer, [a, m, m2], [], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask'], + ['readonly', 'arraymask']]) + + # An 'arraymask' with nothing 'writemasked' also doesn't make sense + assert_raises(ValueError, nditer, [a, m], [], + [['readwrite'], ['readonly', 'arraymask']]) + + # A writemasked reduction requires a similarly smaller mask + assert_raises(ValueError, nditer, [a, b, m], ['reduce_ok'], + [['readonly'], + ['readwrite', 'writemasked'], + ['readonly', 'arraymask']]) + # But this should work with a smaller/equal mask to the reduction operand + np.nditer([a, b, m2], ['reduce_ok'], + [['readonly'], + ['readwrite', 'writemasked'], + ['readonly', 'arraymask']]) + # The arraymask itself cannot be a reduction + assert_raises(ValueError, nditer, [a, b, m2], ['reduce_ok'], + [['readonly'], + ['readwrite', 'writemasked'], + ['readwrite', 'arraymask']]) + + # A uint8 mask is ok too + np.nditer([a, m3], ['buffered'], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask']], + op_dtypes=['f4', None], + casting='same_kind') + # An int8 mask isn't ok + assert_raises(TypeError, np.nditer, [a, mbad1], ['buffered'], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask']], + op_dtypes=['f4', None], + casting='same_kind') + # A float32 mask isn't ok + assert_raises(TypeError, np.nditer, [a, mbad2], ['buffered'], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask']], + op_dtypes=['f4', None], + casting='same_kind') + +def _is_buffered(iterator): + try: + iterator.itviews + except ValueError: + return True + return False + +@pytest.mark.parametrize("a", + [np.zeros((3,), dtype='f8'), + np.zeros((9876, 3*5), dtype='f8')[::2, :], + np.zeros((4, 312, 124, 3), dtype='f8')[::2, :, ::2, :], + # Also test with the last dimension strided (so it does not fit if + # there is repeated access) + np.zeros((9,), dtype='f8')[::3], + np.zeros((9876, 3*10), dtype='f8')[::2, ::5], + np.zeros((4, 312, 124, 3), dtype='f8')[::2, :, ::2, ::-1]]) +def test_iter_writemasked(a): + # Note, the slicing above is to ensure that nditer cannot combine multiple + # axes into one. The repetition is just to make things a bit more + # interesting. + shape = a.shape + reps = shape[-1] // 3 + msk = np.empty(shape, dtype=bool) + msk[...] = [True, True, False] * reps + + # When buffering is unused, 'writemasked' effectively does nothing. + # It's up to the user of the iterator to obey the requested semantics. + it = np.nditer([a, msk], [], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask']]) + with it: + for x, m in it: + x[...] = 1 + # Because we violated the semantics, all the values became 1 + assert_equal(a, np.broadcast_to([1, 1, 1] * reps, shape)) + + # Even if buffering is enabled, we still may be accessing the array + # directly. + it = np.nditer([a, msk], ['buffered'], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask']]) + # @seberg: I honestly don't currently understand why a "buffered" iterator + # would end up not using a buffer for the small array here at least when + # "writemasked" is used, that seems confusing... Check by testing for + # actual memory overlap! + is_buffered = True + with it: + for x, m in it: + x[...] = 2.5 + if np.may_share_memory(x, a): + is_buffered = False + + if not is_buffered: + # Because we violated the semantics, all the values became 2.5 + assert_equal(a, np.broadcast_to([2.5, 2.5, 2.5] * reps, shape)) + else: + # For large sizes, the iterator may be buffered: + assert_equal(a, np.broadcast_to([2.5, 2.5, 1] * reps, shape)) + a[...] = 2.5 + + # If buffering will definitely happening, for instance because of + # a cast, only the items selected by the mask will be copied back from + # the buffer. + it = np.nditer([a, msk], ['buffered'], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask']], + op_dtypes=['i8', None], + casting='unsafe') + with it: + for x, m in it: + x[...] = 3 + # Even though we violated the semantics, only the selected values + # were copied back + assert_equal(a, np.broadcast_to([3, 3, 2.5] * reps, shape)) + +def test_iter_writemasked_decref(): + # force casting (to make it interesting) by using a structured dtype. + arr = np.arange(10000).astype(">i,O") + original = arr.copy() + mask = np.random.randint(0, 2, size=10000).astype(bool) + + it = np.nditer([arr, mask], ['buffered', "refs_ok"], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask']], + op_dtypes=[" 0, self.ef[i:]) + assert_array_equal(self.f[i:] - 1 >= 0, self.ef[i:]) + assert_array_equal(self.f[i:] == 0, ~self.ef[i:]) + assert_array_equal(-self.f[i:] < 0, self.ef[i:]) + assert_array_equal(-self.f[i:] + 1 <= 0, self.ef[i:]) + r = self.f[i:] != 0 + assert_array_equal(r, self.ef[i:]) + r2 = self.f[i:] != np.zeros_like(self.f[i:]) + r3 = 0 != self.f[i:] + assert_array_equal(r, r2) + assert_array_equal(r, r3) + # check bool == 0x1 + assert_array_equal(r.view(np.int8), r.astype(np.int8)) + assert_array_equal(r2.view(np.int8), r2.astype(np.int8)) + assert_array_equal(r3.view(np.int8), r3.astype(np.int8)) + + # isnan on amd64 takes the same code path + assert_array_equal(np.isnan(self.nf[i:]), self.ef[i:]) + assert_array_equal(np.isfinite(self.nf[i:]), ~self.ef[i:]) + assert_array_equal(np.isfinite(self.inff[i:]), ~self.ef[i:]) + assert_array_equal(np.isinf(self.inff[i:]), self.efnonan[i:]) + assert_array_equal(np.signbit(self.signf[i:]), self.ef[i:]) + + def test_double(self): + # offset for alignment test + for i in range(2): + assert_array_equal(self.d[i:] > 0, self.ed[i:]) + assert_array_equal(self.d[i:] - 1 >= 0, self.ed[i:]) + assert_array_equal(self.d[i:] == 0, ~self.ed[i:]) + assert_array_equal(-self.d[i:] < 0, self.ed[i:]) + assert_array_equal(-self.d[i:] + 1 <= 0, self.ed[i:]) + r = self.d[i:] != 0 + assert_array_equal(r, self.ed[i:]) + r2 = self.d[i:] != np.zeros_like(self.d[i:]) + r3 = 0 != self.d[i:] + assert_array_equal(r, r2) + assert_array_equal(r, r3) + # check bool == 0x1 + assert_array_equal(r.view(np.int8), r.astype(np.int8)) + assert_array_equal(r2.view(np.int8), r2.astype(np.int8)) + assert_array_equal(r3.view(np.int8), r3.astype(np.int8)) + + # isnan on amd64 takes the same code path + assert_array_equal(np.isnan(self.nd[i:]), self.ed[i:]) + assert_array_equal(np.isfinite(self.nd[i:]), ~self.ed[i:]) + assert_array_equal(np.isfinite(self.infd[i:]), ~self.ed[i:]) + assert_array_equal(np.isinf(self.infd[i:]), self.ednonan[i:]) + assert_array_equal(np.signbit(self.signd[i:]), self.ed[i:]) + + +class TestSeterr: + def test_default(self): + err = np.geterr() + assert_equal(err, + dict(divide='warn', + invalid='warn', + over='warn', + under='ignore') + ) + + def test_set(self): + with np.errstate(): + err = np.seterr() + old = np.seterr(divide='print') + assert_(err == old) + new = np.seterr() + assert_(new['divide'] == 'print') + np.seterr(over='raise') + assert_(np.geterr()['over'] == 'raise') + assert_(new['divide'] == 'print') + np.seterr(**old) + assert_(np.geterr() == old) + + @pytest.mark.skipif(platform.machine() == "armv5tel", reason="See gh-413.") + def test_divide_err(self): + with np.errstate(divide='raise'): + with assert_raises(FloatingPointError): + np.array([1.]) / np.array([0.]) + + np.seterr(divide='ignore') + np.array([1.]) / np.array([0.]) + + def test_errobj(self): + olderrobj = np.geterrobj() + self.called = 0 + try: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + with np.errstate(divide='warn'): + np.seterrobj([20000, 1, None]) + np.array([1.]) / np.array([0.]) + assert_equal(len(w), 1) + + def log_err(*args): + self.called += 1 + extobj_err = args + assert_(len(extobj_err) == 2) + assert_("divide" in extobj_err[0]) + + with np.errstate(divide='ignore'): + np.seterrobj([20000, 3, log_err]) + np.array([1.]) / np.array([0.]) + assert_equal(self.called, 1) + + np.seterrobj(olderrobj) + with np.errstate(divide='ignore'): + np.divide(1., 0., extobj=[20000, 3, log_err]) + assert_equal(self.called, 2) + finally: + np.seterrobj(olderrobj) + del self.called + + def test_errobj_noerrmask(self): + # errmask = 0 has a special code path for the default + olderrobj = np.geterrobj() + try: + # set errobj to something non default + np.seterrobj([umath.UFUNC_BUFSIZE_DEFAULT, + umath.ERR_DEFAULT + 1, None]) + # call a ufunc + np.isnan(np.array([6])) + # same with the default, lots of times to get rid of possible + # pre-existing stack in the code + for i in range(10000): + np.seterrobj([umath.UFUNC_BUFSIZE_DEFAULT, umath.ERR_DEFAULT, + None]) + np.isnan(np.array([6])) + finally: + np.seterrobj(olderrobj) + + +class TestFloatExceptions: + def assert_raises_fpe(self, fpeerr, flop, x, y): + ftype = type(x) + try: + flop(x, y) + assert_(False, + "Type %s did not raise fpe error '%s'." % (ftype, fpeerr)) + except FloatingPointError as exc: + assert_(str(exc).find(fpeerr) >= 0, + "Type %s raised wrong fpe error '%s'." % (ftype, exc)) + + def assert_op_raises_fpe(self, fpeerr, flop, sc1, sc2): + # Check that fpe exception is raised. + # + # Given a floating operation `flop` and two scalar values, check that + # the operation raises the floating point exception specified by + # `fpeerr`. Tests all variants with 0-d array scalars as well. + + self.assert_raises_fpe(fpeerr, flop, sc1, sc2) + self.assert_raises_fpe(fpeerr, flop, sc1[()], sc2) + self.assert_raises_fpe(fpeerr, flop, sc1, sc2[()]) + self.assert_raises_fpe(fpeerr, flop, sc1[()], sc2[()]) + + # Test for all real and complex float types + @pytest.mark.parametrize("typecode", np.typecodes["AllFloat"]) + def test_floating_exceptions(self, typecode): + # Test basic arithmetic function errors + with np.errstate(all='raise'): + ftype = np.obj2sctype(typecode) + if np.dtype(ftype).kind == 'f': + # Get some extreme values for the type + fi = np.finfo(ftype) + ft_tiny = fi._machar.tiny + ft_max = fi.max + ft_eps = fi.eps + underflow = 'underflow' + divbyzero = 'divide by zero' + else: + # 'c', complex, corresponding real dtype + rtype = type(ftype(0).real) + fi = np.finfo(rtype) + ft_tiny = ftype(fi._machar.tiny) + ft_max = ftype(fi.max) + ft_eps = ftype(fi.eps) + # The complex types raise different exceptions + underflow = '' + divbyzero = '' + overflow = 'overflow' + invalid = 'invalid' + + # The value of tiny for double double is NaN, so we need to + # pass the assert + if not np.isnan(ft_tiny): + self.assert_raises_fpe(underflow, + lambda a, b: a/b, ft_tiny, ft_max) + self.assert_raises_fpe(underflow, + lambda a, b: a*b, ft_tiny, ft_tiny) + self.assert_raises_fpe(overflow, + lambda a, b: a*b, ft_max, ftype(2)) + self.assert_raises_fpe(overflow, + lambda a, b: a/b, ft_max, ftype(0.5)) + self.assert_raises_fpe(overflow, + lambda a, b: a+b, ft_max, ft_max*ft_eps) + self.assert_raises_fpe(overflow, + lambda a, b: a-b, -ft_max, ft_max*ft_eps) + self.assert_raises_fpe(overflow, + np.power, ftype(2), ftype(2**fi.nexp)) + self.assert_raises_fpe(divbyzero, + lambda a, b: a/b, ftype(1), ftype(0)) + self.assert_raises_fpe( + invalid, lambda a, b: a/b, ftype(np.inf), ftype(np.inf) + ) + self.assert_raises_fpe(invalid, + lambda a, b: a/b, ftype(0), ftype(0)) + self.assert_raises_fpe( + invalid, lambda a, b: a-b, ftype(np.inf), ftype(np.inf) + ) + self.assert_raises_fpe( + invalid, lambda a, b: a+b, ftype(np.inf), ftype(-np.inf) + ) + self.assert_raises_fpe(invalid, + lambda a, b: a*b, ftype(0), ftype(np.inf)) + + def test_warnings(self): + # test warning code path + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + with np.errstate(all="warn"): + np.divide(1, 0.) + assert_equal(len(w), 1) + assert_("divide by zero" in str(w[0].message)) + np.array(1e300) * np.array(1e300) + assert_equal(len(w), 2) + assert_("overflow" in str(w[-1].message)) + np.array(np.inf) - np.array(np.inf) + assert_equal(len(w), 3) + assert_("invalid value" in str(w[-1].message)) + np.array(1e-300) * np.array(1e-300) + assert_equal(len(w), 4) + assert_("underflow" in str(w[-1].message)) + + +class TestTypes: + def check_promotion_cases(self, promote_func): + # tests that the scalars get coerced correctly. + b = np.bool_(0) + i8, i16, i32, i64 = np.int8(0), np.int16(0), np.int32(0), np.int64(0) + u8, u16, u32, u64 = np.uint8(0), np.uint16(0), np.uint32(0), np.uint64(0) + f32, f64, fld = np.float32(0), np.float64(0), np.longdouble(0) + c64, c128, cld = np.complex64(0), np.complex128(0), np.clongdouble(0) + + # coercion within the same kind + assert_equal(promote_func(i8, i16), np.dtype(np.int16)) + assert_equal(promote_func(i32, i8), np.dtype(np.int32)) + assert_equal(promote_func(i16, i64), np.dtype(np.int64)) + assert_equal(promote_func(u8, u32), np.dtype(np.uint32)) + assert_equal(promote_func(f32, f64), np.dtype(np.float64)) + assert_equal(promote_func(fld, f32), np.dtype(np.longdouble)) + assert_equal(promote_func(f64, fld), np.dtype(np.longdouble)) + assert_equal(promote_func(c128, c64), np.dtype(np.complex128)) + assert_equal(promote_func(cld, c128), np.dtype(np.clongdouble)) + assert_equal(promote_func(c64, fld), np.dtype(np.clongdouble)) + + # coercion between kinds + assert_equal(promote_func(b, i32), np.dtype(np.int32)) + assert_equal(promote_func(b, u8), np.dtype(np.uint8)) + assert_equal(promote_func(i8, u8), np.dtype(np.int16)) + assert_equal(promote_func(u8, i32), np.dtype(np.int32)) + assert_equal(promote_func(i64, u32), np.dtype(np.int64)) + assert_equal(promote_func(u64, i32), np.dtype(np.float64)) + assert_equal(promote_func(i32, f32), np.dtype(np.float64)) + assert_equal(promote_func(i64, f32), np.dtype(np.float64)) + assert_equal(promote_func(f32, i16), np.dtype(np.float32)) + assert_equal(promote_func(f32, u32), np.dtype(np.float64)) + assert_equal(promote_func(f32, c64), np.dtype(np.complex64)) + assert_equal(promote_func(c128, f32), np.dtype(np.complex128)) + assert_equal(promote_func(cld, f64), np.dtype(np.clongdouble)) + + # coercion between scalars and 1-D arrays + assert_equal(promote_func(np.array([b]), i8), np.dtype(np.int8)) + assert_equal(promote_func(np.array([b]), u8), np.dtype(np.uint8)) + assert_equal(promote_func(np.array([b]), i32), np.dtype(np.int32)) + assert_equal(promote_func(np.array([b]), u32), np.dtype(np.uint32)) + assert_equal(promote_func(np.array([i8]), i64), np.dtype(np.int8)) + assert_equal(promote_func(u64, np.array([i32])), np.dtype(np.int32)) + assert_equal(promote_func(i64, np.array([u32])), np.dtype(np.uint32)) + assert_equal(promote_func(np.int32(-1), np.array([u64])), + np.dtype(np.float64)) + assert_equal(promote_func(f64, np.array([f32])), np.dtype(np.float32)) + assert_equal(promote_func(fld, np.array([f32])), np.dtype(np.float32)) + assert_equal(promote_func(np.array([f64]), fld), np.dtype(np.float64)) + assert_equal(promote_func(fld, np.array([c64])), + np.dtype(np.complex64)) + assert_equal(promote_func(c64, np.array([f64])), + np.dtype(np.complex128)) + assert_equal(promote_func(np.complex64(3j), np.array([f64])), + np.dtype(np.complex128)) + + # coercion between scalars and 1-D arrays, where + # the scalar has greater kind than the array + assert_equal(promote_func(np.array([b]), f64), np.dtype(np.float64)) + assert_equal(promote_func(np.array([b]), i64), np.dtype(np.int64)) + assert_equal(promote_func(np.array([b]), u64), np.dtype(np.uint64)) + assert_equal(promote_func(np.array([i8]), f64), np.dtype(np.float64)) + assert_equal(promote_func(np.array([u16]), f64), np.dtype(np.float64)) + + # uint and int are treated as the same "kind" for + # the purposes of array-scalar promotion. + assert_equal(promote_func(np.array([u16]), i32), np.dtype(np.uint16)) + + # float and complex are treated as the same "kind" for + # the purposes of array-scalar promotion, so that you can do + # (0j + float32array) to get a complex64 array instead of + # a complex128 array. + assert_equal(promote_func(np.array([f32]), c128), + np.dtype(np.complex64)) + + def test_coercion(self): + def res_type(a, b): + return np.add(a, b).dtype + + self.check_promotion_cases(res_type) + + # Use-case: float/complex scalar * bool/int8 array + # shouldn't narrow the float/complex type + for a in [np.array([True, False]), np.array([-3, 12], dtype=np.int8)]: + b = 1.234 * a + assert_equal(b.dtype, np.dtype('f8'), "array type %s" % a.dtype) + b = np.longdouble(1.234) * a + assert_equal(b.dtype, np.dtype(np.longdouble), + "array type %s" % a.dtype) + b = np.float64(1.234) * a + assert_equal(b.dtype, np.dtype('f8'), "array type %s" % a.dtype) + b = np.float32(1.234) * a + assert_equal(b.dtype, np.dtype('f4'), "array type %s" % a.dtype) + b = np.float16(1.234) * a + assert_equal(b.dtype, np.dtype('f2'), "array type %s" % a.dtype) + + b = 1.234j * a + assert_equal(b.dtype, np.dtype('c16'), "array type %s" % a.dtype) + b = np.clongdouble(1.234j) * a + assert_equal(b.dtype, np.dtype(np.clongdouble), + "array type %s" % a.dtype) + b = np.complex128(1.234j) * a + assert_equal(b.dtype, np.dtype('c16'), "array type %s" % a.dtype) + b = np.complex64(1.234j) * a + assert_equal(b.dtype, np.dtype('c8'), "array type %s" % a.dtype) + + # The following use-case is problematic, and to resolve its + # tricky side-effects requires more changes. + # + # Use-case: (1-t)*a, where 't' is a boolean array and 'a' is + # a float32, shouldn't promote to float64 + # + # a = np.array([1.0, 1.5], dtype=np.float32) + # t = np.array([True, False]) + # b = t*a + # assert_equal(b, [1.0, 0.0]) + # assert_equal(b.dtype, np.dtype('f4')) + # b = (1-t)*a + # assert_equal(b, [0.0, 1.5]) + # assert_equal(b.dtype, np.dtype('f4')) + # + # Probably ~t (bitwise negation) is more proper to use here, + # but this is arguably less intuitive to understand at a glance, and + # would fail if 't' is actually an integer array instead of boolean: + # + # b = (~t)*a + # assert_equal(b, [0.0, 1.5]) + # assert_equal(b.dtype, np.dtype('f4')) + + def test_result_type(self): + self.check_promotion_cases(np.result_type) + assert_(np.result_type(None) == np.dtype(None)) + + def test_promote_types_endian(self): + # promote_types should always return native-endian types + assert_equal(np.promote_types('i8', '>i8'), np.dtype('i8')) + + assert_equal(np.promote_types('>i8', '>U16'), np.dtype('U21')) + assert_equal(np.promote_types('U16', '>i8'), np.dtype('U21')) + assert_equal(np.promote_types('S5', '>U8'), np.dtype('U8')) + assert_equal(np.promote_types('U8', '>S5'), np.dtype('U8')) + assert_equal(np.promote_types('U8', '>U5'), np.dtype('U8')) + + assert_equal(np.promote_types('M8', '>M8'), np.dtype('M8')) + assert_equal(np.promote_types('m8', '>m8'), np.dtype('m8')) + + def test_can_cast_and_promote_usertypes(self): + # The rational type defines safe casting for signed integers, + # boolean. Rational itself *does* cast safely to double. + # (rational does not actually cast to all signed integers, e.g. + # int64 can be both long and longlong and it registers only the first) + valid_types = ["int8", "int16", "int32", "int64", "bool"] + invalid_types = "BHILQP" + "FDG" + "mM" + "f" + "V" + + rational_dt = np.dtype(rational) + for numpy_dtype in valid_types: + numpy_dtype = np.dtype(numpy_dtype) + assert np.can_cast(numpy_dtype, rational_dt) + assert np.promote_types(numpy_dtype, rational_dt) is rational_dt + + for numpy_dtype in invalid_types: + numpy_dtype = np.dtype(numpy_dtype) + assert not np.can_cast(numpy_dtype, rational_dt) + with pytest.raises(TypeError): + np.promote_types(numpy_dtype, rational_dt) + + double_dt = np.dtype("double") + assert np.can_cast(rational_dt, double_dt) + assert np.promote_types(double_dt, rational_dt) is double_dt + + @pytest.mark.parametrize("swap", ["", "swap"]) + @pytest.mark.parametrize("string_dtype", ["U", "S"]) + def test_promote_types_strings(self, swap, string_dtype): + if swap == "swap": + promote_types = lambda a, b: np.promote_types(b, a) + else: + promote_types = np.promote_types + + S = string_dtype + + # Promote numeric with unsized string: + assert_equal(promote_types('bool', S), np.dtype(S+'5')) + assert_equal(promote_types('b', S), np.dtype(S+'4')) + assert_equal(promote_types('u1', S), np.dtype(S+'3')) + assert_equal(promote_types('u2', S), np.dtype(S+'5')) + assert_equal(promote_types('u4', S), np.dtype(S+'10')) + assert_equal(promote_types('u8', S), np.dtype(S+'20')) + assert_equal(promote_types('i1', S), np.dtype(S+'4')) + assert_equal(promote_types('i2', S), np.dtype(S+'6')) + assert_equal(promote_types('i4', S), np.dtype(S+'11')) + assert_equal(promote_types('i8', S), np.dtype(S+'21')) + # Promote numeric with sized string: + assert_equal(promote_types('bool', S+'1'), np.dtype(S+'5')) + assert_equal(promote_types('bool', S+'30'), np.dtype(S+'30')) + assert_equal(promote_types('b', S+'1'), np.dtype(S+'4')) + assert_equal(promote_types('b', S+'30'), np.dtype(S+'30')) + assert_equal(promote_types('u1', S+'1'), np.dtype(S+'3')) + assert_equal(promote_types('u1', S+'30'), np.dtype(S+'30')) + assert_equal(promote_types('u2', S+'1'), np.dtype(S+'5')) + assert_equal(promote_types('u2', S+'30'), np.dtype(S+'30')) + assert_equal(promote_types('u4', S+'1'), np.dtype(S+'10')) + assert_equal(promote_types('u4', S+'30'), np.dtype(S+'30')) + assert_equal(promote_types('u8', S+'1'), np.dtype(S+'20')) + assert_equal(promote_types('u8', S+'30'), np.dtype(S+'30')) + # Promote with object: + assert_equal(promote_types('O', S+'30'), np.dtype('O')) + + @pytest.mark.parametrize("dtype", + list(np.typecodes["All"]) + + ["i,i", "S3", "S100", "U3", "U100", rational]) + def test_promote_identical_types_metadata(self, dtype): + # The same type passed in twice to promote types always + # preserves metadata + metadata = {1: 1} + dtype = np.dtype(dtype, metadata=metadata) + + res = np.promote_types(dtype, dtype) + assert res.metadata == dtype.metadata + + # byte-swapping preserves and makes the dtype native: + dtype = dtype.newbyteorder() + if dtype.isnative: + # The type does not have byte swapping + return + + res = np.promote_types(dtype, dtype) + if res.char in "?bhilqpBHILQPefdgFDGOmM" or dtype.type is rational: + # Metadata is lost for simple promotions (they create a new dtype) + assert res.metadata is None + else: + assert res.metadata == metadata + if dtype.kind != "V": + # the result is native (except for structured void) + assert res.isnative + + @pytest.mark.slow + @pytest.mark.filterwarnings('ignore:Promotion of numbers:FutureWarning') + @pytest.mark.parametrize(["dtype1", "dtype2"], + itertools.product( + list(np.typecodes["All"]) + + ["i,i", "S3", "S100", "U3", "U100", rational], + repeat=2)) + def test_promote_types_metadata(self, dtype1, dtype2): + """Metadata handling in promotion does not appear formalized + right now in NumPy. This test should thus be considered to + document behaviour, rather than test the correct definition of it. + + This test is very ugly, it was useful for rewriting part of the + promotion, but probably should eventually be replaced/deleted + (i.e. when metadata handling in promotion is better defined). + """ + metadata1 = {1: 1} + metadata2 = {2: 2} + dtype1 = np.dtype(dtype1, metadata=metadata1) + dtype2 = np.dtype(dtype2, metadata=metadata2) + + try: + res = np.promote_types(dtype1, dtype2) + except TypeError: + # Promotion failed, this test only checks metadata + return + + if res.char in "?bhilqpBHILQPefdgFDGOmM" or res.type is rational: + # All simple types lose metadata (due to using promotion table): + assert res.metadata is None + elif res == dtype1: + # If one result is the result, it is usually returned unchanged: + assert res is dtype1 + elif res == dtype2: + # dtype1 may have been cast to the same type/kind as dtype2. + # If the resulting dtype is identical we currently pick the cast + # version of dtype1, which lost the metadata: + if np.promote_types(dtype1, dtype2.kind) == dtype2: + res.metadata is None + else: + res.metadata == metadata2 + else: + assert res.metadata is None + + # Try again for byteswapped version + dtype1 = dtype1.newbyteorder() + assert dtype1.metadata == metadata1 + res_bs = np.promote_types(dtype1, dtype2) + if res_bs.names is not None: + # Structured promotion doesn't remove byteswap: + assert res_bs.newbyteorder() == res + else: + assert res_bs == res + assert res_bs.metadata == res.metadata + + @pytest.mark.parametrize(["dtype1", "dtype2"], + [[np.dtype("V6"), np.dtype("V10")], + [np.dtype([("name1", "i8")]), np.dtype([("name2", "i8")])], + [np.dtype("i8,i8"), np.dtype("i4,i4")], + ]) + def test_invalid_void_promotion(self, dtype1, dtype2): + # Mainly test structured void promotion, which currently allows + # byte-swapping, but nothing else: + with pytest.raises(TypeError): + np.promote_types(dtype1, dtype2) + + @pytest.mark.parametrize(["dtype1", "dtype2"], + [[np.dtype("V10"), np.dtype("V10")], + [np.dtype([("name1", "i8")])], + [np.dtype("i8,i8"), np.dtype("i8,>i8")], + ]) + def test_valid_void_promotion(self, dtype1, dtype2): + assert np.promote_types(dtype1, dtype2) is dtype1 + + def test_can_cast(self): + assert_(np.can_cast(np.int32, np.int64)) + assert_(np.can_cast(np.float64, complex)) + assert_(not np.can_cast(complex, float)) + + assert_(np.can_cast('i8', 'f8')) + assert_(not np.can_cast('i8', 'f4')) + assert_(np.can_cast('i4', 'S11')) + + assert_(np.can_cast('i8', 'i8', 'no')) + assert_(not np.can_cast('i8', 'no')) + + assert_(np.can_cast('i8', 'equiv')) + assert_(not np.can_cast('i8', 'equiv')) + + assert_(np.can_cast('i8', 'safe')) + assert_(not np.can_cast('i4', 'safe')) + + assert_(np.can_cast('i4', 'same_kind')) + assert_(not np.can_cast('u4', 'same_kind')) + + assert_(np.can_cast('u4', 'unsafe')) + + assert_(np.can_cast('bool', 'S5')) + assert_(not np.can_cast('bool', 'S4')) + + assert_(np.can_cast('b', 'S4')) + assert_(not np.can_cast('b', 'S3')) + + assert_(np.can_cast('u1', 'S3')) + assert_(not np.can_cast('u1', 'S2')) + assert_(np.can_cast('u2', 'S5')) + assert_(not np.can_cast('u2', 'S4')) + assert_(np.can_cast('u4', 'S10')) + assert_(not np.can_cast('u4', 'S9')) + assert_(np.can_cast('u8', 'S20')) + assert_(not np.can_cast('u8', 'S19')) + + assert_(np.can_cast('i1', 'S4')) + assert_(not np.can_cast('i1', 'S3')) + assert_(np.can_cast('i2', 'S6')) + assert_(not np.can_cast('i2', 'S5')) + assert_(np.can_cast('i4', 'S11')) + assert_(not np.can_cast('i4', 'S10')) + assert_(np.can_cast('i8', 'S21')) + assert_(not np.can_cast('i8', 'S20')) + + assert_(np.can_cast('bool', 'S5')) + assert_(not np.can_cast('bool', 'S4')) + + assert_(np.can_cast('b', 'U4')) + assert_(not np.can_cast('b', 'U3')) + + assert_(np.can_cast('u1', 'U3')) + assert_(not np.can_cast('u1', 'U2')) + assert_(np.can_cast('u2', 'U5')) + assert_(not np.can_cast('u2', 'U4')) + assert_(np.can_cast('u4', 'U10')) + assert_(not np.can_cast('u4', 'U9')) + assert_(np.can_cast('u8', 'U20')) + assert_(not np.can_cast('u8', 'U19')) + + assert_(np.can_cast('i1', 'U4')) + assert_(not np.can_cast('i1', 'U3')) + assert_(np.can_cast('i2', 'U6')) + assert_(not np.can_cast('i2', 'U5')) + assert_(np.can_cast('i4', 'U11')) + assert_(not np.can_cast('i4', 'U10')) + assert_(np.can_cast('i8', 'U21')) + assert_(not np.can_cast('i8', 'U20')) + + assert_raises(TypeError, np.can_cast, 'i4', None) + assert_raises(TypeError, np.can_cast, None, 'i4') + + # Also test keyword arguments + assert_(np.can_cast(from_=np.int32, to=np.int64)) + + def test_can_cast_simple_to_structured(self): + # Non-structured can only be cast to structured in 'unsafe' mode. + assert_(not np.can_cast('i4', 'i4,i4')) + assert_(not np.can_cast('i4', 'i4,i2')) + assert_(np.can_cast('i4', 'i4,i4', casting='unsafe')) + assert_(np.can_cast('i4', 'i4,i2', casting='unsafe')) + # Even if there is just a single field which is OK. + assert_(not np.can_cast('i2', [('f1', 'i4')])) + assert_(not np.can_cast('i2', [('f1', 'i4')], casting='same_kind')) + assert_(np.can_cast('i2', [('f1', 'i4')], casting='unsafe')) + # It should be the same for recursive structured or subarrays. + assert_(not np.can_cast('i2', [('f1', 'i4,i4')])) + assert_(np.can_cast('i2', [('f1', 'i4,i4')], casting='unsafe')) + assert_(not np.can_cast('i2', [('f1', '(2,3)i4')])) + assert_(np.can_cast('i2', [('f1', '(2,3)i4')], casting='unsafe')) + + def test_can_cast_structured_to_simple(self): + # Need unsafe casting for structured to simple. + assert_(not np.can_cast([('f1', 'i4')], 'i4')) + assert_(np.can_cast([('f1', 'i4')], 'i4', casting='unsafe')) + assert_(np.can_cast([('f1', 'i4')], 'i2', casting='unsafe')) + # Since it is unclear what is being cast, multiple fields to + # single should not work even for unsafe casting. + assert_(not np.can_cast('i4,i4', 'i4', casting='unsafe')) + # But a single field inside a single field is OK. + assert_(not np.can_cast([('f1', [('x', 'i4')])], 'i4')) + assert_(np.can_cast([('f1', [('x', 'i4')])], 'i4', casting='unsafe')) + # And a subarray is fine too - it will just take the first element + # (arguably not very consistently; might also take the first field). + assert_(not np.can_cast([('f0', '(3,)i4')], 'i4')) + assert_(np.can_cast([('f0', '(3,)i4')], 'i4', casting='unsafe')) + # But a structured subarray with multiple fields should fail. + assert_(not np.can_cast([('f0', ('i4,i4'), (2,))], 'i4', + casting='unsafe')) + + def test_can_cast_values(self): + # gh-5917 + for dt in np.sctypes['int'] + np.sctypes['uint']: + ii = np.iinfo(dt) + assert_(np.can_cast(ii.min, dt)) + assert_(np.can_cast(ii.max, dt)) + assert_(not np.can_cast(ii.min - 1, dt)) + assert_(not np.can_cast(ii.max + 1, dt)) + + for dt in np.sctypes['float']: + fi = np.finfo(dt) + assert_(np.can_cast(fi.min, dt)) + assert_(np.can_cast(fi.max, dt)) + + +# Custom exception class to test exception propagation in fromiter +class NIterError(Exception): + pass + + +class TestFromiter: + def makegen(self): + return (x**2 for x in range(24)) + + def test_types(self): + ai32 = np.fromiter(self.makegen(), np.int32) + ai64 = np.fromiter(self.makegen(), np.int64) + af = np.fromiter(self.makegen(), float) + assert_(ai32.dtype == np.dtype(np.int32)) + assert_(ai64.dtype == np.dtype(np.int64)) + assert_(af.dtype == np.dtype(float)) + + def test_lengths(self): + expected = np.array(list(self.makegen())) + a = np.fromiter(self.makegen(), int) + a20 = np.fromiter(self.makegen(), int, 20) + assert_(len(a) == len(expected)) + assert_(len(a20) == 20) + assert_raises(ValueError, np.fromiter, + self.makegen(), int, len(expected) + 10) + + def test_values(self): + expected = np.array(list(self.makegen())) + a = np.fromiter(self.makegen(), int) + a20 = np.fromiter(self.makegen(), int, 20) + assert_(np.alltrue(a == expected, axis=0)) + assert_(np.alltrue(a20 == expected[:20], axis=0)) + + def load_data(self, n, eindex): + # Utility method for the issue 2592 tests. + # Raise an exception at the desired index in the iterator. + for e in range(n): + if e == eindex: + raise NIterError('error at index %s' % eindex) + yield e + + def test_2592(self): + # Test iteration exceptions are correctly raised. + count, eindex = 10, 5 + assert_raises(NIterError, np.fromiter, + self.load_data(count, eindex), dtype=int, count=count) + + def test_2592_edge(self): + # Test iter. exceptions, edge case (exception at end of iterator). + count = 10 + eindex = count-1 + assert_raises(NIterError, np.fromiter, + self.load_data(count, eindex), dtype=int, count=count) + + +class TestNonzero: + def test_nonzero_trivial(self): + assert_equal(np.count_nonzero(np.array([])), 0) + assert_equal(np.count_nonzero(np.array([], dtype='?')), 0) + assert_equal(np.nonzero(np.array([])), ([],)) + + assert_equal(np.count_nonzero(np.array([0])), 0) + assert_equal(np.count_nonzero(np.array([0], dtype='?')), 0) + assert_equal(np.nonzero(np.array([0])), ([],)) + + assert_equal(np.count_nonzero(np.array([1])), 1) + assert_equal(np.count_nonzero(np.array([1], dtype='?')), 1) + assert_equal(np.nonzero(np.array([1])), ([0],)) + + def test_nonzero_zerod(self): + assert_equal(np.count_nonzero(np.array(0)), 0) + assert_equal(np.count_nonzero(np.array(0, dtype='?')), 0) + with assert_warns(DeprecationWarning): + assert_equal(np.nonzero(np.array(0)), ([],)) + + assert_equal(np.count_nonzero(np.array(1)), 1) + assert_equal(np.count_nonzero(np.array(1, dtype='?')), 1) + with assert_warns(DeprecationWarning): + assert_equal(np.nonzero(np.array(1)), ([0],)) + + def test_nonzero_onedim(self): + x = np.array([1, 0, 2, -1, 0, 0, 8]) + assert_equal(np.count_nonzero(x), 4) + assert_equal(np.count_nonzero(x), 4) + assert_equal(np.nonzero(x), ([0, 2, 3, 6],)) + + # x = np.array([(1, 2), (0, 0), (1, 1), (-1, 3), (0, 7)], + # dtype=[('a', 'i4'), ('b', 'i2')]) + x = np.array([(1, 2, -5, -3), (0, 0, 2, 7), (1, 1, 0, 1), (-1, 3, 1, 0), (0, 7, 0, 4)], + dtype=[('a', 'i4'), ('b', 'i2'), ('c', 'i1'), ('d', 'i8')]) + assert_equal(np.count_nonzero(x['a']), 3) + assert_equal(np.count_nonzero(x['b']), 4) + assert_equal(np.count_nonzero(x['c']), 3) + assert_equal(np.count_nonzero(x['d']), 4) + assert_equal(np.nonzero(x['a']), ([0, 2, 3],)) + assert_equal(np.nonzero(x['b']), ([0, 2, 3, 4],)) + + def test_nonzero_twodim(self): + x = np.array([[0, 1, 0], [2, 0, 3]]) + assert_equal(np.count_nonzero(x.astype('i1')), 3) + assert_equal(np.count_nonzero(x.astype('i2')), 3) + assert_equal(np.count_nonzero(x.astype('i4')), 3) + assert_equal(np.count_nonzero(x.astype('i8')), 3) + assert_equal(np.nonzero(x), ([0, 1, 1], [1, 0, 2])) + + x = np.eye(3) + assert_equal(np.count_nonzero(x.astype('i1')), 3) + assert_equal(np.count_nonzero(x.astype('i2')), 3) + assert_equal(np.count_nonzero(x.astype('i4')), 3) + assert_equal(np.count_nonzero(x.astype('i8')), 3) + assert_equal(np.nonzero(x), ([0, 1, 2], [0, 1, 2])) + + x = np.array([[(0, 1), (0, 0), (1, 11)], + [(1, 1), (1, 0), (0, 0)], + [(0, 0), (1, 5), (0, 1)]], dtype=[('a', 'f4'), ('b', 'u1')]) + assert_equal(np.count_nonzero(x['a']), 4) + assert_equal(np.count_nonzero(x['b']), 5) + assert_equal(np.nonzero(x['a']), ([0, 1, 1, 2], [2, 0, 1, 1])) + assert_equal(np.nonzero(x['b']), ([0, 0, 1, 2, 2], [0, 2, 0, 1, 2])) + + assert_(not x['a'].T.flags.aligned) + assert_equal(np.count_nonzero(x['a'].T), 4) + assert_equal(np.count_nonzero(x['b'].T), 5) + assert_equal(np.nonzero(x['a'].T), ([0, 1, 1, 2], [1, 1, 2, 0])) + assert_equal(np.nonzero(x['b'].T), ([0, 0, 1, 2, 2], [0, 1, 2, 0, 2])) + + def test_sparse(self): + # test special sparse condition boolean code path + for i in range(20): + c = np.zeros(200, dtype=bool) + c[i::20] = True + assert_equal(np.nonzero(c)[0], np.arange(i, 200 + i, 20)) + + c = np.zeros(400, dtype=bool) + c[10 + i:20 + i] = True + c[20 + i*2] = True + assert_equal(np.nonzero(c)[0], + np.concatenate((np.arange(10 + i, 20 + i), [20 + i*2]))) + + def test_return_type(self): + class C(np.ndarray): + pass + + for view in (C, np.ndarray): + for nd in range(1, 4): + shape = tuple(range(2, 2+nd)) + x = np.arange(np.prod(shape)).reshape(shape).view(view) + for nzx in (np.nonzero(x), x.nonzero()): + for nzx_i in nzx: + assert_(type(nzx_i) is np.ndarray) + assert_(nzx_i.flags.writeable) + + def test_count_nonzero_axis(self): + # Basic check of functionality + m = np.array([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]]) + + expected = np.array([1, 1, 1, 1, 1]) + assert_equal(np.count_nonzero(m, axis=0), expected) + + expected = np.array([2, 3]) + assert_equal(np.count_nonzero(m, axis=1), expected) + + assert_raises(ValueError, np.count_nonzero, m, axis=(1, 1)) + assert_raises(TypeError, np.count_nonzero, m, axis='foo') + assert_raises(np.AxisError, np.count_nonzero, m, axis=3) + assert_raises(TypeError, np.count_nonzero, + m, axis=np.array([[1], [2]])) + + def test_count_nonzero_axis_all_dtypes(self): + # More thorough test that the axis argument is respected + # for all dtypes and responds correctly when presented with + # either integer or tuple arguments for axis + msg = "Mismatch for dtype: %s" + + def assert_equal_w_dt(a, b, err_msg): + assert_equal(a.dtype, b.dtype, err_msg=err_msg) + assert_equal(a, b, err_msg=err_msg) + + for dt in np.typecodes['All']: + err_msg = msg % (np.dtype(dt).name,) + + if dt != 'V': + if dt != 'M': + m = np.zeros((3, 3), dtype=dt) + n = np.ones(1, dtype=dt) + + m[0, 0] = n[0] + m[1, 0] = n[0] + + else: # np.zeros doesn't work for np.datetime64 + m = np.array(['1970-01-01'] * 9) + m = m.reshape((3, 3)) + + m[0, 0] = '1970-01-12' + m[1, 0] = '1970-01-12' + m = m.astype(dt) + + expected = np.array([2, 0, 0], dtype=np.intp) + assert_equal_w_dt(np.count_nonzero(m, axis=0), + expected, err_msg=err_msg) + + expected = np.array([1, 1, 0], dtype=np.intp) + assert_equal_w_dt(np.count_nonzero(m, axis=1), + expected, err_msg=err_msg) + + expected = np.array(2) + assert_equal(np.count_nonzero(m, axis=(0, 1)), + expected, err_msg=err_msg) + assert_equal(np.count_nonzero(m, axis=None), + expected, err_msg=err_msg) + assert_equal(np.count_nonzero(m), + expected, err_msg=err_msg) + + if dt == 'V': + # There are no 'nonzero' objects for np.void, so the testing + # setup is slightly different for this dtype + m = np.array([np.void(1)] * 6).reshape((2, 3)) + + expected = np.array([0, 0, 0], dtype=np.intp) + assert_equal_w_dt(np.count_nonzero(m, axis=0), + expected, err_msg=err_msg) + + expected = np.array([0, 0], dtype=np.intp) + assert_equal_w_dt(np.count_nonzero(m, axis=1), + expected, err_msg=err_msg) + + expected = np.array(0) + assert_equal(np.count_nonzero(m, axis=(0, 1)), + expected, err_msg=err_msg) + assert_equal(np.count_nonzero(m, axis=None), + expected, err_msg=err_msg) + assert_equal(np.count_nonzero(m), + expected, err_msg=err_msg) + + def test_count_nonzero_axis_consistent(self): + # Check that the axis behaviour for valid axes in + # non-special cases is consistent (and therefore + # correct) by checking it against an integer array + # that is then casted to the generic object dtype + from itertools import combinations, permutations + + axis = (0, 1, 2, 3) + size = (5, 5, 5, 5) + msg = "Mismatch for axis: %s" + + rng = np.random.RandomState(1234) + m = rng.randint(-100, 100, size=size) + n = m.astype(object) + + for length in range(len(axis)): + for combo in combinations(axis, length): + for perm in permutations(combo): + assert_equal( + np.count_nonzero(m, axis=perm), + np.count_nonzero(n, axis=perm), + err_msg=msg % (perm,)) + + def test_countnonzero_axis_empty(self): + a = np.array([[0, 0, 1], [1, 0, 1]]) + assert_equal(np.count_nonzero(a, axis=()), a.astype(bool)) + + def test_countnonzero_keepdims(self): + a = np.array([[0, 0, 1, 0], + [0, 3, 5, 0], + [7, 9, 2, 0]]) + assert_equal(np.count_nonzero(a, axis=0, keepdims=True), + [[1, 2, 3, 0]]) + assert_equal(np.count_nonzero(a, axis=1, keepdims=True), + [[1], [2], [3]]) + assert_equal(np.count_nonzero(a, keepdims=True), + [[6]]) + + def test_array_method(self): + # Tests that the array method + # call to nonzero works + m = np.array([[1, 0, 0], [4, 0, 6]]) + tgt = [[0, 1, 1], [0, 0, 2]] + + assert_equal(m.nonzero(), tgt) + + def test_nonzero_invalid_object(self): + # gh-9295 + a = np.array([np.array([1, 2]), 3], dtype=object) + assert_raises(ValueError, np.nonzero, a) + + class BoolErrors: + def __bool__(self): + raise ValueError("Not allowed") + + assert_raises(ValueError, np.nonzero, np.array([BoolErrors()])) + + def test_nonzero_sideeffect_safety(self): + # gh-13631 + class FalseThenTrue: + _val = False + def __bool__(self): + try: + return self._val + finally: + self._val = True + + class TrueThenFalse: + _val = True + def __bool__(self): + try: + return self._val + finally: + self._val = False + + # result grows on the second pass + a = np.array([True, FalseThenTrue()]) + assert_raises(RuntimeError, np.nonzero, a) + + a = np.array([[True], [FalseThenTrue()]]) + assert_raises(RuntimeError, np.nonzero, a) + + # result shrinks on the second pass + a = np.array([False, TrueThenFalse()]) + assert_raises(RuntimeError, np.nonzero, a) + + a = np.array([[False], [TrueThenFalse()]]) + assert_raises(RuntimeError, np.nonzero, a) + + def test_nonzero_sideffects_structured_void(self): + # Checks that structured void does not mutate alignment flag of + # original array. + arr = np.zeros(5, dtype="i1,i8,i8") # `ones` may short-circuit + assert arr.flags.aligned # structs are considered "aligned" + assert not arr["f2"].flags.aligned + # make sure that nonzero/count_nonzero do not flip the flag: + np.nonzero(arr) + assert arr.flags.aligned + np.count_nonzero(arr) + assert arr.flags.aligned + + def test_nonzero_exception_safe(self): + # gh-13930 + + class ThrowsAfter: + def __init__(self, iters): + self.iters_left = iters + + def __bool__(self): + if self.iters_left == 0: + raise ValueError("called `iters` times") + + self.iters_left -= 1 + return True + + """ + Test that a ValueError is raised instead of a SystemError + + If the __bool__ function is called after the error state is set, + Python (cpython) will raise a SystemError. + """ + + # assert that an exception in first pass is handled correctly + a = np.array([ThrowsAfter(5)]*10) + assert_raises(ValueError, np.nonzero, a) + + # raise exception in second pass for 1-dimensional loop + a = np.array([ThrowsAfter(15)]*10) + assert_raises(ValueError, np.nonzero, a) + + # raise exception in second pass for n-dimensional loop + a = np.array([[ThrowsAfter(15)]]*10) + assert_raises(ValueError, np.nonzero, a) + + def test_structured_threadsafety(self): + # Nonzero (and some other functions) should be threadsafe for + # structured datatypes, see gh-15387. This test can behave randomly. + from concurrent.futures import ThreadPoolExecutor + + # Create a deeply nested dtype to make a failure more likely: + dt = np.dtype([("", "f8")]) + dt = np.dtype([("", dt)]) + dt = np.dtype([("", dt)] * 2) + # The array should be large enough to likely run into threading issues + arr = np.random.uniform(size=(5000, 4)).view(dt)[:, 0] + def func(arr): + arr.nonzero() + + tpe = ThreadPoolExecutor(max_workers=8) + futures = [tpe.submit(func, arr) for _ in range(10)] + for f in futures: + f.result() + + assert arr.dtype is dt + + +class TestIndex: + def test_boolean(self): + a = rand(3, 5, 8) + V = rand(5, 8) + g1 = randint(0, 5, size=15) + g2 = randint(0, 8, size=15) + V[g1, g2] = -V[g1, g2] + assert_((np.array([a[0][V > 0], a[1][V > 0], a[2][V > 0]]) == a[:, V > 0]).all()) + + def test_boolean_edgecase(self): + a = np.array([], dtype='int32') + b = np.array([], dtype='bool') + c = a[b] + assert_equal(c, []) + assert_equal(c.dtype, np.dtype('int32')) + + +class TestBinaryRepr: + def test_zero(self): + assert_equal(np.binary_repr(0), '0') + + def test_positive(self): + assert_equal(np.binary_repr(10), '1010') + assert_equal(np.binary_repr(12522), + '11000011101010') + assert_equal(np.binary_repr(10736848), + '101000111101010011010000') + + def test_negative(self): + assert_equal(np.binary_repr(-1), '-1') + assert_equal(np.binary_repr(-10), '-1010') + assert_equal(np.binary_repr(-12522), + '-11000011101010') + assert_equal(np.binary_repr(-10736848), + '-101000111101010011010000') + + def test_sufficient_width(self): + assert_equal(np.binary_repr(0, width=5), '00000') + assert_equal(np.binary_repr(10, width=7), '0001010') + assert_equal(np.binary_repr(-5, width=7), '1111011') + + def test_neg_width_boundaries(self): + # see gh-8670 + + # Ensure that the example in the issue does not + # break before proceeding to a more thorough test. + assert_equal(np.binary_repr(-128, width=8), '10000000') + + for width in range(1, 11): + num = -2**(width - 1) + exp = '1' + (width - 1) * '0' + assert_equal(np.binary_repr(num, width=width), exp) + + def test_large_neg_int64(self): + # See gh-14289. + assert_equal(np.binary_repr(np.int64(-2**62), width=64), + '11' + '0'*62) + + +class TestBaseRepr: + def test_base3(self): + assert_equal(np.base_repr(3**5, 3), '100000') + + def test_positive(self): + assert_equal(np.base_repr(12, 10), '12') + assert_equal(np.base_repr(12, 10, 4), '000012') + assert_equal(np.base_repr(12, 4), '30') + assert_equal(np.base_repr(3731624803700888, 36), '10QR0ROFCEW') + + def test_negative(self): + assert_equal(np.base_repr(-12, 10), '-12') + assert_equal(np.base_repr(-12, 10, 4), '-000012') + assert_equal(np.base_repr(-12, 4), '-30') + + def test_base_range(self): + with assert_raises(ValueError): + np.base_repr(1, 1) + with assert_raises(ValueError): + np.base_repr(1, 37) + + +class TestArrayComparisons: + def test_array_equal(self): + res = np.array_equal(np.array([1, 2]), np.array([1, 2])) + assert_(res) + assert_(type(res) is bool) + res = np.array_equal(np.array([1, 2]), np.array([1, 2, 3])) + assert_(not res) + assert_(type(res) is bool) + res = np.array_equal(np.array([1, 2]), np.array([3, 4])) + assert_(not res) + assert_(type(res) is bool) + res = np.array_equal(np.array([1, 2]), np.array([1, 3])) + assert_(not res) + assert_(type(res) is bool) + res = np.array_equal(np.array(['a'], dtype='S1'), np.array(['a'], dtype='S1')) + assert_(res) + assert_(type(res) is bool) + res = np.array_equal(np.array([('a', 1)], dtype='S1,u4'), + np.array([('a', 1)], dtype='S1,u4')) + assert_(res) + assert_(type(res) is bool) + + def test_array_equal_equal_nan(self): + # Test array_equal with equal_nan kwarg + a1 = np.array([1, 2, np.nan]) + a2 = np.array([1, np.nan, 2]) + a3 = np.array([1, 2, np.inf]) + + # equal_nan=False by default + assert_(not np.array_equal(a1, a1)) + assert_(np.array_equal(a1, a1, equal_nan=True)) + assert_(not np.array_equal(a1, a2, equal_nan=True)) + # nan's not conflated with inf's + assert_(not np.array_equal(a1, a3, equal_nan=True)) + # 0-D arrays + a = np.array(np.nan) + assert_(not np.array_equal(a, a)) + assert_(np.array_equal(a, a, equal_nan=True)) + # Non-float dtype - equal_nan should have no effect + a = np.array([1, 2, 3], dtype=int) + assert_(np.array_equal(a, a)) + assert_(np.array_equal(a, a, equal_nan=True)) + # Multi-dimensional array + a = np.array([[0, 1], [np.nan, 1]]) + assert_(not np.array_equal(a, a)) + assert_(np.array_equal(a, a, equal_nan=True)) + # Complex values + a, b = [np.array([1 + 1j])]*2 + a.real, b.imag = np.nan, np.nan + assert_(not np.array_equal(a, b, equal_nan=False)) + assert_(np.array_equal(a, b, equal_nan=True)) + + def test_none_compares_elementwise(self): + a = np.array([None, 1, None], dtype=object) + assert_equal(a == None, [True, False, True]) + assert_equal(a != None, [False, True, False]) + + a = np.ones(3) + assert_equal(a == None, [False, False, False]) + assert_equal(a != None, [True, True, True]) + + def test_array_equiv(self): + res = np.array_equiv(np.array([1, 2]), np.array([1, 2])) + assert_(res) + assert_(type(res) is bool) + res = np.array_equiv(np.array([1, 2]), np.array([1, 2, 3])) + assert_(not res) + assert_(type(res) is bool) + res = np.array_equiv(np.array([1, 2]), np.array([3, 4])) + assert_(not res) + assert_(type(res) is bool) + res = np.array_equiv(np.array([1, 2]), np.array([1, 3])) + assert_(not res) + assert_(type(res) is bool) + + res = np.array_equiv(np.array([1, 1]), np.array([1])) + assert_(res) + assert_(type(res) is bool) + res = np.array_equiv(np.array([1, 1]), np.array([[1], [1]])) + assert_(res) + assert_(type(res) is bool) + res = np.array_equiv(np.array([1, 2]), np.array([2])) + assert_(not res) + assert_(type(res) is bool) + res = np.array_equiv(np.array([1, 2]), np.array([[1], [2]])) + assert_(not res) + assert_(type(res) is bool) + res = np.array_equiv(np.array([1, 2]), np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])) + assert_(not res) + assert_(type(res) is bool) + + @pytest.mark.parametrize("dtype", ["V0", "V3", "V10"]) + def test_compare_unstructured_voids(self, dtype): + zeros = np.zeros(3, dtype=dtype) + + assert_array_equal(zeros, zeros) + assert not (zeros != zeros).any() + + if dtype == "V0": + # Can't test != of actually different data + return + + nonzeros = np.array([b"1", b"2", b"3"], dtype=dtype) + + assert not (zeros == nonzeros).any() + assert (zeros != nonzeros).all() + + +def assert_array_strict_equal(x, y): + assert_array_equal(x, y) + # Check flags, 32 bit arches typically don't provide 16 byte alignment + if ((x.dtype.alignment <= 8 or + np.intp().dtype.itemsize != 4) and + sys.platform != 'win32'): + assert_(x.flags == y.flags) + else: + assert_(x.flags.owndata == y.flags.owndata) + assert_(x.flags.writeable == y.flags.writeable) + assert_(x.flags.c_contiguous == y.flags.c_contiguous) + assert_(x.flags.f_contiguous == y.flags.f_contiguous) + assert_(x.flags.writebackifcopy == y.flags.writebackifcopy) + # check endianness + assert_(x.dtype.isnative == y.dtype.isnative) + + +class TestClip: + def setup(self): + self.nr = 5 + self.nc = 3 + + def fastclip(self, a, m, M, out=None, casting=None): + if out is None: + if casting is None: + return a.clip(m, M) + else: + return a.clip(m, M, casting=casting) + else: + if casting is None: + return a.clip(m, M, out) + else: + return a.clip(m, M, out, casting=casting) + + def clip(self, a, m, M, out=None): + # use slow-clip + selector = np.less(a, m) + 2*np.greater(a, M) + return selector.choose((a, m, M), out=out) + + # Handy functions + def _generate_data(self, n, m): + return randn(n, m) + + def _generate_data_complex(self, n, m): + return randn(n, m) + 1.j * rand(n, m) + + def _generate_flt_data(self, n, m): + return (randn(n, m)).astype(np.float32) + + def _neg_byteorder(self, a): + a = np.asarray(a) + if sys.byteorder == 'little': + a = a.astype(a.dtype.newbyteorder('>')) + else: + a = a.astype(a.dtype.newbyteorder('<')) + return a + + def _generate_non_native_data(self, n, m): + data = randn(n, m) + data = self._neg_byteorder(data) + assert_(not data.dtype.isnative) + return data + + def _generate_int_data(self, n, m): + return (10 * rand(n, m)).astype(np.int64) + + def _generate_int32_data(self, n, m): + return (10 * rand(n, m)).astype(np.int32) + + # Now the real test cases + + @pytest.mark.parametrize("dtype", '?bhilqpBHILQPefdgFDGO') + def test_ones_pathological(self, dtype): + # for preservation of behavior described in + # gh-12519; amin > amax behavior may still change + # in the future + arr = np.ones(10, dtype=dtype) + expected = np.zeros(10, dtype=dtype) + actual = np.clip(arr, 1, 0) + if dtype == 'O': + assert actual.tolist() == expected.tolist() + else: + assert_equal(actual, expected) + + def test_simple_double(self): + # Test native double input with scalar min/max. + a = self._generate_data(self.nr, self.nc) + m = 0.1 + M = 0.6 + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + def test_simple_int(self): + # Test native int input with scalar min/max. + a = self._generate_int_data(self.nr, self.nc) + a = a.astype(int) + m = -2 + M = 4 + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + def test_array_double(self): + # Test native double input with array min/max. + a = self._generate_data(self.nr, self.nc) + m = np.zeros(a.shape) + M = m + 0.5 + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + def test_simple_nonnative(self): + # Test non native double input with scalar min/max. + # Test native double input with non native double scalar min/max. + a = self._generate_non_native_data(self.nr, self.nc) + m = -0.5 + M = 0.6 + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_equal(ac, act) + + # Test native double input with non native double scalar min/max. + a = self._generate_data(self.nr, self.nc) + m = -0.5 + M = self._neg_byteorder(0.6) + assert_(not M.dtype.isnative) + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_equal(ac, act) + + def test_simple_complex(self): + # Test native complex input with native double scalar min/max. + # Test native input with complex double scalar min/max. + a = 3 * self._generate_data_complex(self.nr, self.nc) + m = -0.5 + M = 1. + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + # Test native input with complex double scalar min/max. + a = 3 * self._generate_data(self.nr, self.nc) + m = -0.5 + 1.j + M = 1. + 2.j + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + def test_clip_complex(self): + # Address Issue gh-5354 for clipping complex arrays + # Test native complex input without explicit min/max + # ie, either min=None or max=None + a = np.ones(10, dtype=complex) + m = a.min() + M = a.max() + am = self.fastclip(a, m, None) + aM = self.fastclip(a, None, M) + assert_array_strict_equal(am, a) + assert_array_strict_equal(aM, a) + + def test_clip_non_contig(self): + # Test clip for non contiguous native input and native scalar min/max. + a = self._generate_data(self.nr * 2, self.nc * 3) + a = a[::2, ::3] + assert_(not a.flags['F_CONTIGUOUS']) + assert_(not a.flags['C_CONTIGUOUS']) + ac = self.fastclip(a, -1.6, 1.7) + act = self.clip(a, -1.6, 1.7) + assert_array_strict_equal(ac, act) + + def test_simple_out(self): + # Test native double input with scalar min/max. + a = self._generate_data(self.nr, self.nc) + m = -0.5 + M = 0.6 + ac = np.zeros(a.shape) + act = np.zeros(a.shape) + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + @pytest.mark.parametrize("casting", [None, "unsafe"]) + def test_simple_int32_inout(self, casting): + # Test native int32 input with double min/max and int32 out. + a = self._generate_int32_data(self.nr, self.nc) + m = np.float64(0) + M = np.float64(2) + ac = np.zeros(a.shape, dtype=np.int32) + act = ac.copy() + if casting is None: + with assert_warns(DeprecationWarning): + # NumPy 1.17.0, 2018-02-24 - casting is unsafe + self.fastclip(a, m, M, ac, casting=casting) + else: + # explicitly passing "unsafe" will silence warning + self.fastclip(a, m, M, ac, casting=casting) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_simple_int64_out(self): + # Test native int32 input with int32 scalar min/max and int64 out. + a = self._generate_int32_data(self.nr, self.nc) + m = np.int32(-1) + M = np.int32(1) + ac = np.zeros(a.shape, dtype=np.int64) + act = ac.copy() + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_simple_int64_inout(self): + # Test native int32 input with double array min/max and int32 out. + a = self._generate_int32_data(self.nr, self.nc) + m = np.zeros(a.shape, np.float64) + M = np.float64(1) + ac = np.zeros(a.shape, dtype=np.int32) + act = ac.copy() + with assert_warns(DeprecationWarning): + # NumPy 1.17.0, 2018-02-24 - casting is unsafe + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_simple_int32_out(self): + # Test native double input with scalar min/max and int out. + a = self._generate_data(self.nr, self.nc) + m = -1.0 + M = 2.0 + ac = np.zeros(a.shape, dtype=np.int32) + act = ac.copy() + with assert_warns(DeprecationWarning): + # NumPy 1.17.0, 2018-02-24 - casting is unsafe + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_simple_inplace_01(self): + # Test native double input with array min/max in-place. + a = self._generate_data(self.nr, self.nc) + ac = a.copy() + m = np.zeros(a.shape) + M = 1.0 + self.fastclip(a, m, M, a) + self.clip(a, m, M, ac) + assert_array_strict_equal(a, ac) + + def test_simple_inplace_02(self): + # Test native double input with scalar min/max in-place. + a = self._generate_data(self.nr, self.nc) + ac = a.copy() + m = -0.5 + M = 0.6 + self.fastclip(a, m, M, a) + self.clip(ac, m, M, ac) + assert_array_strict_equal(a, ac) + + def test_noncontig_inplace(self): + # Test non contiguous double input with double scalar min/max in-place. + a = self._generate_data(self.nr * 2, self.nc * 3) + a = a[::2, ::3] + assert_(not a.flags['F_CONTIGUOUS']) + assert_(not a.flags['C_CONTIGUOUS']) + ac = a.copy() + m = -0.5 + M = 0.6 + self.fastclip(a, m, M, a) + self.clip(ac, m, M, ac) + assert_array_equal(a, ac) + + def test_type_cast_01(self): + # Test native double input with scalar min/max. + a = self._generate_data(self.nr, self.nc) + m = -0.5 + M = 0.6 + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + def test_type_cast_02(self): + # Test native int32 input with int32 scalar min/max. + a = self._generate_int_data(self.nr, self.nc) + a = a.astype(np.int32) + m = -2 + M = 4 + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + def test_type_cast_03(self): + # Test native int32 input with float64 scalar min/max. + a = self._generate_int32_data(self.nr, self.nc) + m = -2 + M = 4 + ac = self.fastclip(a, np.float64(m), np.float64(M)) + act = self.clip(a, np.float64(m), np.float64(M)) + assert_array_strict_equal(ac, act) + + def test_type_cast_04(self): + # Test native int32 input with float32 scalar min/max. + a = self._generate_int32_data(self.nr, self.nc) + m = np.float32(-2) + M = np.float32(4) + act = self.fastclip(a, m, M) + ac = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + def test_type_cast_05(self): + # Test native int32 with double arrays min/max. + a = self._generate_int_data(self.nr, self.nc) + m = -0.5 + M = 1. + ac = self.fastclip(a, m * np.zeros(a.shape), M) + act = self.clip(a, m * np.zeros(a.shape), M) + assert_array_strict_equal(ac, act) + + def test_type_cast_06(self): + # Test native with NON native scalar min/max. + a = self._generate_data(self.nr, self.nc) + m = 0.5 + m_s = self._neg_byteorder(m) + M = 1. + act = self.clip(a, m_s, M) + ac = self.fastclip(a, m_s, M) + assert_array_strict_equal(ac, act) + + def test_type_cast_07(self): + # Test NON native with native array min/max. + a = self._generate_data(self.nr, self.nc) + m = -0.5 * np.ones(a.shape) + M = 1. + a_s = self._neg_byteorder(a) + assert_(not a_s.dtype.isnative) + act = a_s.clip(m, M) + ac = self.fastclip(a_s, m, M) + assert_array_strict_equal(ac, act) + + def test_type_cast_08(self): + # Test NON native with native scalar min/max. + a = self._generate_data(self.nr, self.nc) + m = -0.5 + M = 1. + a_s = self._neg_byteorder(a) + assert_(not a_s.dtype.isnative) + ac = self.fastclip(a_s, m, M) + act = a_s.clip(m, M) + assert_array_strict_equal(ac, act) + + def test_type_cast_09(self): + # Test native with NON native array min/max. + a = self._generate_data(self.nr, self.nc) + m = -0.5 * np.ones(a.shape) + M = 1. + m_s = self._neg_byteorder(m) + assert_(not m_s.dtype.isnative) + ac = self.fastclip(a, m_s, M) + act = self.clip(a, m_s, M) + assert_array_strict_equal(ac, act) + + def test_type_cast_10(self): + # Test native int32 with float min/max and float out for output argument. + a = self._generate_int_data(self.nr, self.nc) + b = np.zeros(a.shape, dtype=np.float32) + m = np.float32(-0.5) + M = np.float32(1) + act = self.clip(a, m, M, out=b) + ac = self.fastclip(a, m, M, out=b) + assert_array_strict_equal(ac, act) + + def test_type_cast_11(self): + # Test non native with native scalar, min/max, out non native + a = self._generate_non_native_data(self.nr, self.nc) + b = a.copy() + b = b.astype(b.dtype.newbyteorder('>')) + bt = b.copy() + m = -0.5 + M = 1. + self.fastclip(a, m, M, out=b) + self.clip(a, m, M, out=bt) + assert_array_strict_equal(b, bt) + + def test_type_cast_12(self): + # Test native int32 input and min/max and float out + a = self._generate_int_data(self.nr, self.nc) + b = np.zeros(a.shape, dtype=np.float32) + m = np.int32(0) + M = np.int32(1) + act = self.clip(a, m, M, out=b) + ac = self.fastclip(a, m, M, out=b) + assert_array_strict_equal(ac, act) + + def test_clip_with_out_simple(self): + # Test native double input with scalar min/max + a = self._generate_data(self.nr, self.nc) + m = -0.5 + M = 0.6 + ac = np.zeros(a.shape) + act = np.zeros(a.shape) + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_clip_with_out_simple2(self): + # Test native int32 input with double min/max and int32 out + a = self._generate_int32_data(self.nr, self.nc) + m = np.float64(0) + M = np.float64(2) + ac = np.zeros(a.shape, dtype=np.int32) + act = ac.copy() + with assert_warns(DeprecationWarning): + # NumPy 1.17.0, 2018-02-24 - casting is unsafe + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_clip_with_out_simple_int32(self): + # Test native int32 input with int32 scalar min/max and int64 out + a = self._generate_int32_data(self.nr, self.nc) + m = np.int32(-1) + M = np.int32(1) + ac = np.zeros(a.shape, dtype=np.int64) + act = ac.copy() + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_clip_with_out_array_int32(self): + # Test native int32 input with double array min/max and int32 out + a = self._generate_int32_data(self.nr, self.nc) + m = np.zeros(a.shape, np.float64) + M = np.float64(1) + ac = np.zeros(a.shape, dtype=np.int32) + act = ac.copy() + with assert_warns(DeprecationWarning): + # NumPy 1.17.0, 2018-02-24 - casting is unsafe + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_clip_with_out_array_outint32(self): + # Test native double input with scalar min/max and int out + a = self._generate_data(self.nr, self.nc) + m = -1.0 + M = 2.0 + ac = np.zeros(a.shape, dtype=np.int32) + act = ac.copy() + with assert_warns(DeprecationWarning): + # NumPy 1.17.0, 2018-02-24 - casting is unsafe + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_clip_with_out_transposed(self): + # Test that the out argument works when transposed + a = np.arange(16).reshape(4, 4) + out = np.empty_like(a).T + a.clip(4, 10, out=out) + expected = self.clip(a, 4, 10) + assert_array_equal(out, expected) + + def test_clip_with_out_memory_overlap(self): + # Test that the out argument works when it has memory overlap + a = np.arange(16).reshape(4, 4) + ac = a.copy() + a[:-1].clip(4, 10, out=a[1:]) + expected = self.clip(ac[:-1], 4, 10) + assert_array_equal(a[1:], expected) + + def test_clip_inplace_array(self): + # Test native double input with array min/max + a = self._generate_data(self.nr, self.nc) + ac = a.copy() + m = np.zeros(a.shape) + M = 1.0 + self.fastclip(a, m, M, a) + self.clip(a, m, M, ac) + assert_array_strict_equal(a, ac) + + def test_clip_inplace_simple(self): + # Test native double input with scalar min/max + a = self._generate_data(self.nr, self.nc) + ac = a.copy() + m = -0.5 + M = 0.6 + self.fastclip(a, m, M, a) + self.clip(a, m, M, ac) + assert_array_strict_equal(a, ac) + + def test_clip_func_takes_out(self): + # Ensure that the clip() function takes an out=argument. + a = self._generate_data(self.nr, self.nc) + ac = a.copy() + m = -0.5 + M = 0.6 + a2 = np.clip(a, m, M, out=a) + self.clip(a, m, M, ac) + assert_array_strict_equal(a2, ac) + assert_(a2 is a) + + def test_clip_nan(self): + d = np.arange(7.) + with assert_warns(DeprecationWarning): + assert_equal(d.clip(min=np.nan), d) + with assert_warns(DeprecationWarning): + assert_equal(d.clip(max=np.nan), d) + with assert_warns(DeprecationWarning): + assert_equal(d.clip(min=np.nan, max=np.nan), d) + with assert_warns(DeprecationWarning): + assert_equal(d.clip(min=-2, max=np.nan), d) + with assert_warns(DeprecationWarning): + assert_equal(d.clip(min=np.nan, max=10), d) + + def test_object_clip(self): + a = np.arange(10, dtype=object) + actual = np.clip(a, 1, 5) + expected = np.array([1, 1, 2, 3, 4, 5, 5, 5, 5, 5]) + assert actual.tolist() == expected.tolist() + + def test_clip_all_none(self): + a = np.arange(10, dtype=object) + with assert_raises_regex(ValueError, 'max or min'): + np.clip(a, None, None) + + def test_clip_invalid_casting(self): + a = np.arange(10, dtype=object) + with assert_raises_regex(ValueError, + 'casting must be one of'): + self.fastclip(a, 1, 8, casting="garbage") + + @pytest.mark.parametrize("amin, amax", [ + # two scalars + (1, 0), + # mix scalar and array + (1, np.zeros(10)), + # two arrays + (np.ones(10), np.zeros(10)), + ]) + def test_clip_value_min_max_flip(self, amin, amax): + a = np.arange(10, dtype=np.int64) + # requirement from ufunc_docstrings.py + expected = np.minimum(np.maximum(a, amin), amax) + actual = np.clip(a, amin, amax) + assert_equal(actual, expected) + + @pytest.mark.parametrize("arr, amin, amax, exp", [ + # for a bug in npy_ObjectClip, based on a + # case produced by hypothesis + (np.zeros(10, dtype=np.int64), + 0, + -2**64+1, + np.full(10, -2**64+1, dtype=object)), + # for bugs in NPY_TIMEDELTA_MAX, based on a case + # produced by hypothesis + (np.zeros(10, dtype='m8') - 1, + 0, + 0, + np.zeros(10, dtype='m8')), + ]) + def test_clip_problem_cases(self, arr, amin, amax, exp): + actual = np.clip(arr, amin, amax) + assert_equal(actual, exp) + + @pytest.mark.xfail(reason="no scalar nan propagation yet", + raises=AssertionError, + strict=True) + @pytest.mark.parametrize("arr, amin, amax", [ + # problematic scalar nan case from hypothesis + (np.zeros(10, dtype=np.int64), + np.array(np.nan), + np.zeros(10, dtype=np.int32)), + ]) + @pytest.mark.filterwarnings("ignore::DeprecationWarning") + def test_clip_scalar_nan_propagation(self, arr, amin, amax): + # enforcement of scalar nan propagation for comparisons + # called through clip() + expected = np.minimum(np.maximum(arr, amin), amax) + actual = np.clip(arr, amin, amax) + assert_equal(actual, expected) + + @pytest.mark.xfail(reason="propagation doesn't match spec") + @pytest.mark.parametrize("arr, amin, amax", [ + (np.array([1] * 10, dtype='m8'), + np.timedelta64('NaT'), + np.zeros(10, dtype=np.int32)), + ]) + @pytest.mark.filterwarnings("ignore::DeprecationWarning") + def test_NaT_propagation(self, arr, amin, amax): + # NOTE: the expected function spec doesn't + # propagate NaT, but clip() now does + expected = np.minimum(np.maximum(arr, amin), amax) + actual = np.clip(arr, amin, amax) + assert_equal(actual, expected) + + @given( + data=st.data(), + arr=hynp.arrays( + dtype=hynp.integer_dtypes() | hynp.floating_dtypes(), + shape=hynp.array_shapes() + ) + ) + def test_clip_property(self, data, arr): + """A property-based test using Hypothesis. + + This aims for maximum generality: it could in principle generate *any* + valid inputs to np.clip, and in practice generates much more varied + inputs than human testers come up with. + + Because many of the inputs have tricky dependencies - compatible dtypes + and mutually-broadcastable shapes - we use `st.data()` strategy draw + values *inside* the test function, from strategies we construct based + on previous values. An alternative would be to define a custom strategy + with `@st.composite`, but until we have duplicated code inline is fine. + + That accounts for most of the function; the actual test is just three + lines to calculate and compare actual vs expected results! + """ + numeric_dtypes = hynp.integer_dtypes() | hynp.floating_dtypes() + # Generate shapes for the bounds which can be broadcast with each other + # and with the base shape. Below, we might decide to use scalar bounds, + # but it's clearer to generate these shapes unconditionally in advance. + in_shapes, result_shape = data.draw( + hynp.mutually_broadcastable_shapes( + num_shapes=2, base_shape=arr.shape + ) + ) + # Scalar `nan` is deprecated due to the differing behaviour it shows. + s = numeric_dtypes.flatmap( + lambda x: hynp.from_dtype(x, allow_nan=False)) + amin = data.draw(s | hynp.arrays(dtype=numeric_dtypes, + shape=in_shapes[0], elements={"allow_nan": False})) + amax = data.draw(s | hynp.arrays(dtype=numeric_dtypes, + shape=in_shapes[1], elements={"allow_nan": False})) + + # Then calculate our result and expected result and check that they're + # equal! See gh-12519 and gh-19457 for discussion deciding on this + # property and the result_type argument. + result = np.clip(arr, amin, amax) + t = np.result_type(arr, amin, amax) + expected = np.minimum(amax, np.maximum(arr, amin, dtype=t), dtype=t) + assert result.dtype == t + assert_array_equal(result, expected) + + +class TestAllclose: + rtol = 1e-5 + atol = 1e-8 + + def setup(self): + self.olderr = np.seterr(invalid='ignore') + + def teardown(self): + np.seterr(**self.olderr) + + def tst_allclose(self, x, y): + assert_(np.allclose(x, y), "%s and %s not close" % (x, y)) + + def tst_not_allclose(self, x, y): + assert_(not np.allclose(x, y), "%s and %s shouldn't be close" % (x, y)) + + def test_ip_allclose(self): + # Parametric test factory. + arr = np.array([100, 1000]) + aran = np.arange(125).reshape((5, 5, 5)) + + atol = self.atol + rtol = self.rtol + + data = [([1, 0], [1, 0]), + ([atol], [0]), + ([1], [1+rtol+atol]), + (arr, arr + arr*rtol), + (arr, arr + arr*rtol + atol*2), + (aran, aran + aran*rtol), + (np.inf, np.inf), + (np.inf, [np.inf])] + + for (x, y) in data: + self.tst_allclose(x, y) + + def test_ip_not_allclose(self): + # Parametric test factory. + aran = np.arange(125).reshape((5, 5, 5)) + + atol = self.atol + rtol = self.rtol + + data = [([np.inf, 0], [1, np.inf]), + ([np.inf, 0], [1, 0]), + ([np.inf, np.inf], [1, np.inf]), + ([np.inf, np.inf], [1, 0]), + ([-np.inf, 0], [np.inf, 0]), + ([np.nan, 0], [np.nan, 0]), + ([atol*2], [0]), + ([1], [1+rtol+atol*2]), + (aran, aran + aran*atol + atol*2), + (np.array([np.inf, 1]), np.array([0, np.inf]))] + + for (x, y) in data: + self.tst_not_allclose(x, y) + + def test_no_parameter_modification(self): + x = np.array([np.inf, 1]) + y = np.array([0, np.inf]) + np.allclose(x, y) + assert_array_equal(x, np.array([np.inf, 1])) + assert_array_equal(y, np.array([0, np.inf])) + + def test_min_int(self): + # Could make problems because of abs(min_int) == min_int + min_int = np.iinfo(np.int_).min + a = np.array([min_int], dtype=np.int_) + assert_(np.allclose(a, a)) + + def test_equalnan(self): + x = np.array([1.0, np.nan]) + assert_(np.allclose(x, x, equal_nan=True)) + + def test_return_class_is_ndarray(self): + # Issue gh-6475 + # Check that allclose does not preserve subtypes + class Foo(np.ndarray): + def __new__(cls, *args, **kwargs): + return np.array(*args, **kwargs).view(cls) + + a = Foo([1]) + assert_(type(np.allclose(a, a)) is bool) + + +class TestIsclose: + rtol = 1e-5 + atol = 1e-8 + + def setup(self): + atol = self.atol + rtol = self.rtol + arr = np.array([100, 1000]) + aran = np.arange(125).reshape((5, 5, 5)) + + self.all_close_tests = [ + ([1, 0], [1, 0]), + ([atol], [0]), + ([1], [1 + rtol + atol]), + (arr, arr + arr*rtol), + (arr, arr + arr*rtol + atol), + (aran, aran + aran*rtol), + (np.inf, np.inf), + (np.inf, [np.inf]), + ([np.inf, -np.inf], [np.inf, -np.inf]), + ] + self.none_close_tests = [ + ([np.inf, 0], [1, np.inf]), + ([np.inf, -np.inf], [1, 0]), + ([np.inf, np.inf], [1, -np.inf]), + ([np.inf, np.inf], [1, 0]), + ([np.nan, 0], [np.nan, -np.inf]), + ([atol*2], [0]), + ([1], [1 + rtol + atol*2]), + (aran, aran + rtol*1.1*aran + atol*1.1), + (np.array([np.inf, 1]), np.array([0, np.inf])), + ] + self.some_close_tests = [ + ([np.inf, 0], [np.inf, atol*2]), + ([atol, 1, 1e6*(1 + 2*rtol) + atol], [0, np.nan, 1e6]), + (np.arange(3), [0, 1, 2.1]), + (np.nan, [np.nan, np.nan, np.nan]), + ([0], [atol, np.inf, -np.inf, np.nan]), + (0, [atol, np.inf, -np.inf, np.nan]), + ] + self.some_close_results = [ + [True, False], + [True, False, False], + [True, True, False], + [False, False, False], + [True, False, False, False], + [True, False, False, False], + ] + + def test_ip_isclose(self): + self.setup() + tests = self.some_close_tests + results = self.some_close_results + for (x, y), result in zip(tests, results): + assert_array_equal(np.isclose(x, y), result) + + def tst_all_isclose(self, x, y): + assert_(np.all(np.isclose(x, y)), "%s and %s not close" % (x, y)) + + def tst_none_isclose(self, x, y): + msg = "%s and %s shouldn't be close" + assert_(not np.any(np.isclose(x, y)), msg % (x, y)) + + def tst_isclose_allclose(self, x, y): + msg = "isclose.all() and allclose aren't same for %s and %s" + msg2 = "isclose and allclose aren't same for %s and %s" + if np.isscalar(x) and np.isscalar(y): + assert_(np.isclose(x, y) == np.allclose(x, y), msg=msg2 % (x, y)) + else: + assert_array_equal(np.isclose(x, y).all(), np.allclose(x, y), msg % (x, y)) + + def test_ip_all_isclose(self): + self.setup() + for (x, y) in self.all_close_tests: + self.tst_all_isclose(x, y) + + def test_ip_none_isclose(self): + self.setup() + for (x, y) in self.none_close_tests: + self.tst_none_isclose(x, y) + + def test_ip_isclose_allclose(self): + self.setup() + tests = (self.all_close_tests + self.none_close_tests + + self.some_close_tests) + for (x, y) in tests: + self.tst_isclose_allclose(x, y) + + def test_equal_nan(self): + assert_array_equal(np.isclose(np.nan, np.nan, equal_nan=True), [True]) + arr = np.array([1.0, np.nan]) + assert_array_equal(np.isclose(arr, arr, equal_nan=True), [True, True]) + + def test_masked_arrays(self): + # Make sure to test the output type when arguments are interchanged. + + x = np.ma.masked_where([True, True, False], np.arange(3)) + assert_(type(x) is type(np.isclose(2, x))) + assert_(type(x) is type(np.isclose(x, 2))) + + x = np.ma.masked_where([True, True, False], [np.nan, np.inf, np.nan]) + assert_(type(x) is type(np.isclose(np.inf, x))) + assert_(type(x) is type(np.isclose(x, np.inf))) + + x = np.ma.masked_where([True, True, False], [np.nan, np.nan, np.nan]) + y = np.isclose(np.nan, x, equal_nan=True) + assert_(type(x) is type(y)) + # Ensure that the mask isn't modified... + assert_array_equal([True, True, False], y.mask) + y = np.isclose(x, np.nan, equal_nan=True) + assert_(type(x) is type(y)) + # Ensure that the mask isn't modified... + assert_array_equal([True, True, False], y.mask) + + x = np.ma.masked_where([True, True, False], [np.nan, np.nan, np.nan]) + y = np.isclose(x, x, equal_nan=True) + assert_(type(x) is type(y)) + # Ensure that the mask isn't modified... + assert_array_equal([True, True, False], y.mask) + + def test_scalar_return(self): + assert_(np.isscalar(np.isclose(1, 1))) + + def test_no_parameter_modification(self): + x = np.array([np.inf, 1]) + y = np.array([0, np.inf]) + np.isclose(x, y) + assert_array_equal(x, np.array([np.inf, 1])) + assert_array_equal(y, np.array([0, np.inf])) + + def test_non_finite_scalar(self): + # GH7014, when two scalars are compared the output should also be a + # scalar + assert_(np.isclose(np.inf, -np.inf) is np.False_) + assert_(np.isclose(0, np.inf) is np.False_) + assert_(type(np.isclose(0, np.inf)) is np.bool_) + + def test_timedelta(self): + # Allclose currently works for timedelta64 as long as `atol` is + # an integer or also a timedelta64 + a = np.array([[1, 2, 3, "NaT"]], dtype="m8[ns]") + assert np.isclose(a, a, atol=0, equal_nan=True).all() + assert np.isclose(a, a, atol=np.timedelta64(1, "ns"), equal_nan=True).all() + assert np.allclose(a, a, atol=0, equal_nan=True) + assert np.allclose(a, a, atol=np.timedelta64(1, "ns"), equal_nan=True) + + +class TestStdVar: + def setup(self): + self.A = np.array([1, -1, 1, -1]) + self.real_var = 1 + + def test_basic(self): + assert_almost_equal(np.var(self.A), self.real_var) + assert_almost_equal(np.std(self.A)**2, self.real_var) + + def test_scalars(self): + assert_equal(np.var(1), 0) + assert_equal(np.std(1), 0) + + def test_ddof1(self): + assert_almost_equal(np.var(self.A, ddof=1), + self.real_var * len(self.A) / (len(self.A) - 1)) + assert_almost_equal(np.std(self.A, ddof=1)**2, + self.real_var*len(self.A) / (len(self.A) - 1)) + + def test_ddof2(self): + assert_almost_equal(np.var(self.A, ddof=2), + self.real_var * len(self.A) / (len(self.A) - 2)) + assert_almost_equal(np.std(self.A, ddof=2)**2, + self.real_var * len(self.A) / (len(self.A) - 2)) + + def test_out_scalar(self): + d = np.arange(10) + out = np.array(0.) + r = np.std(d, out=out) + assert_(r is out) + assert_array_equal(r, out) + r = np.var(d, out=out) + assert_(r is out) + assert_array_equal(r, out) + r = np.mean(d, out=out) + assert_(r is out) + assert_array_equal(r, out) + + +class TestStdVarComplex: + def test_basic(self): + A = np.array([1, 1.j, -1, -1.j]) + real_var = 1 + assert_almost_equal(np.var(A), real_var) + assert_almost_equal(np.std(A)**2, real_var) + + def test_scalars(self): + assert_equal(np.var(1j), 0) + assert_equal(np.std(1j), 0) + + +class TestCreationFuncs: + # Test ones, zeros, empty and full. + + def setup(self): + dtypes = {np.dtype(tp) for tp in itertools.chain(*np.sctypes.values())} + # void, bytes, str + variable_sized = {tp for tp in dtypes if tp.str.endswith('0')} + self.dtypes = sorted(dtypes - variable_sized | + {np.dtype(tp.str.replace("0", str(i))) + for tp in variable_sized for i in range(1, 10)}, + key=lambda dtype: dtype.str) + self.orders = {'C': 'c_contiguous', 'F': 'f_contiguous'} + self.ndims = 10 + + def check_function(self, func, fill_value=None): + par = ((0, 1, 2), + range(self.ndims), + self.orders, + self.dtypes) + fill_kwarg = {} + if fill_value is not None: + fill_kwarg = {'fill_value': fill_value} + + for size, ndims, order, dtype in itertools.product(*par): + shape = ndims * [size] + + # do not fill void type + if fill_kwarg and dtype.str.startswith('|V'): + continue + + arr = func(shape, order=order, dtype=dtype, + **fill_kwarg) + + assert_equal(arr.dtype, dtype) + assert_(getattr(arr.flags, self.orders[order])) + + if fill_value is not None: + if dtype.str.startswith('|S'): + val = str(fill_value) + else: + val = fill_value + assert_equal(arr, dtype.type(val)) + + def test_zeros(self): + self.check_function(np.zeros) + + def test_ones(self): + self.check_function(np.ones) + + def test_empty(self): + self.check_function(np.empty) + + def test_full(self): + self.check_function(np.full, 0) + self.check_function(np.full, 1) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_for_reference_leak(self): + # Make sure we have an object for reference + dim = 1 + beg = sys.getrefcount(dim) + np.zeros([dim]*10) + assert_(sys.getrefcount(dim) == beg) + np.ones([dim]*10) + assert_(sys.getrefcount(dim) == beg) + np.empty([dim]*10) + assert_(sys.getrefcount(dim) == beg) + np.full([dim]*10, 0) + assert_(sys.getrefcount(dim) == beg) + + +class TestLikeFuncs: + '''Test ones_like, zeros_like, empty_like and full_like''' + + def setup(self): + self.data = [ + # Array scalars + (np.array(3.), None), + (np.array(3), 'f8'), + # 1D arrays + (np.arange(6, dtype='f4'), None), + (np.arange(6), 'c16'), + # 2D C-layout arrays + (np.arange(6).reshape(2, 3), None), + (np.arange(6).reshape(3, 2), 'i1'), + # 2D F-layout arrays + (np.arange(6).reshape((2, 3), order='F'), None), + (np.arange(6).reshape((3, 2), order='F'), 'i1'), + # 3D C-layout arrays + (np.arange(24).reshape(2, 3, 4), None), + (np.arange(24).reshape(4, 3, 2), 'f4'), + # 3D F-layout arrays + (np.arange(24).reshape((2, 3, 4), order='F'), None), + (np.arange(24).reshape((4, 3, 2), order='F'), 'f4'), + # 3D non-C/F-layout arrays + (np.arange(24).reshape(2, 3, 4).swapaxes(0, 1), None), + (np.arange(24).reshape(4, 3, 2).swapaxes(0, 1), '?'), + ] + self.shapes = [(), (5,), (5,6,), (5,6,7,)] + + def compare_array_value(self, dz, value, fill_value): + if value is not None: + if fill_value: + try: + z = dz.dtype.type(value) + except OverflowError: + pass + else: + assert_(np.all(dz == z)) + else: + assert_(np.all(dz == value)) + + def check_like_function(self, like_function, value, fill_value=False): + if fill_value: + fill_kwarg = {'fill_value': value} + else: + fill_kwarg = {} + for d, dtype in self.data: + # default (K) order, dtype + dz = like_function(d, dtype=dtype, **fill_kwarg) + assert_equal(dz.shape, d.shape) + assert_equal(np.array(dz.strides)*d.dtype.itemsize, + np.array(d.strides)*dz.dtype.itemsize) + assert_equal(d.flags.c_contiguous, dz.flags.c_contiguous) + assert_equal(d.flags.f_contiguous, dz.flags.f_contiguous) + if dtype is None: + assert_equal(dz.dtype, d.dtype) + else: + assert_equal(dz.dtype, np.dtype(dtype)) + self.compare_array_value(dz, value, fill_value) + + # C order, default dtype + dz = like_function(d, order='C', dtype=dtype, **fill_kwarg) + assert_equal(dz.shape, d.shape) + assert_(dz.flags.c_contiguous) + if dtype is None: + assert_equal(dz.dtype, d.dtype) + else: + assert_equal(dz.dtype, np.dtype(dtype)) + self.compare_array_value(dz, value, fill_value) + + # F order, default dtype + dz = like_function(d, order='F', dtype=dtype, **fill_kwarg) + assert_equal(dz.shape, d.shape) + assert_(dz.flags.f_contiguous) + if dtype is None: + assert_equal(dz.dtype, d.dtype) + else: + assert_equal(dz.dtype, np.dtype(dtype)) + self.compare_array_value(dz, value, fill_value) + + # A order + dz = like_function(d, order='A', dtype=dtype, **fill_kwarg) + assert_equal(dz.shape, d.shape) + if d.flags.f_contiguous: + assert_(dz.flags.f_contiguous) + else: + assert_(dz.flags.c_contiguous) + if dtype is None: + assert_equal(dz.dtype, d.dtype) + else: + assert_equal(dz.dtype, np.dtype(dtype)) + self.compare_array_value(dz, value, fill_value) + + # Test the 'shape' parameter + for s in self.shapes: + for o in 'CFA': + sz = like_function(d, dtype=dtype, shape=s, order=o, + **fill_kwarg) + assert_equal(sz.shape, s) + if dtype is None: + assert_equal(sz.dtype, d.dtype) + else: + assert_equal(sz.dtype, np.dtype(dtype)) + if o == 'C' or (o == 'A' and d.flags.c_contiguous): + assert_(sz.flags.c_contiguous) + elif o == 'F' or (o == 'A' and d.flags.f_contiguous): + assert_(sz.flags.f_contiguous) + self.compare_array_value(sz, value, fill_value) + + if (d.ndim != len(s)): + assert_equal(np.argsort(like_function(d, dtype=dtype, + shape=s, order='K', + **fill_kwarg).strides), + np.argsort(np.empty(s, dtype=dtype, + order='C').strides)) + else: + assert_equal(np.argsort(like_function(d, dtype=dtype, + shape=s, order='K', + **fill_kwarg).strides), + np.argsort(d.strides)) + + # Test the 'subok' parameter + class MyNDArray(np.ndarray): + pass + + a = np.array([[1, 2], [3, 4]]).view(MyNDArray) + + b = like_function(a, **fill_kwarg) + assert_(type(b) is MyNDArray) + + b = like_function(a, subok=False, **fill_kwarg) + assert_(type(b) is not MyNDArray) + + def test_ones_like(self): + self.check_like_function(np.ones_like, 1) + + def test_zeros_like(self): + self.check_like_function(np.zeros_like, 0) + + def test_empty_like(self): + self.check_like_function(np.empty_like, None) + + def test_filled_like(self): + self.check_like_function(np.full_like, 0, True) + self.check_like_function(np.full_like, 1, True) + self.check_like_function(np.full_like, 1000, True) + self.check_like_function(np.full_like, 123.456, True) + self.check_like_function(np.full_like, np.inf, True) + + @pytest.mark.parametrize('likefunc', [np.empty_like, np.full_like, + np.zeros_like, np.ones_like]) + @pytest.mark.parametrize('dtype', [str, bytes]) + def test_dtype_str_bytes(self, likefunc, dtype): + # Regression test for gh-19860 + a = np.arange(16).reshape(2, 8) + b = a[:, ::2] # Ensure b is not contiguous. + kwargs = {'fill_value': ''} if likefunc == np.full_like else {} + result = likefunc(b, dtype=dtype, **kwargs) + if dtype == str: + assert result.strides == (16, 4) + else: + # dtype is bytes + assert result.strides == (4, 1) + + +class TestCorrelate: + def _setup(self, dt): + self.x = np.array([1, 2, 3, 4, 5], dtype=dt) + self.xs = np.arange(1, 20)[::3] + self.y = np.array([-1, -2, -3], dtype=dt) + self.z1 = np.array([-3., -8., -14., -20., -26., -14., -5.], dtype=dt) + self.z1_4 = np.array([-2., -5., -8., -11., -14., -5.], dtype=dt) + self.z1r = np.array([-15., -22., -22., -16., -10., -4., -1.], dtype=dt) + self.z2 = np.array([-5., -14., -26., -20., -14., -8., -3.], dtype=dt) + self.z2r = np.array([-1., -4., -10., -16., -22., -22., -15.], dtype=dt) + self.zs = np.array([-3., -14., -30., -48., -66., -84., + -102., -54., -19.], dtype=dt) + + def test_float(self): + self._setup(float) + z = np.correlate(self.x, self.y, 'full') + assert_array_almost_equal(z, self.z1) + z = np.correlate(self.x, self.y[:-1], 'full') + assert_array_almost_equal(z, self.z1_4) + z = np.correlate(self.y, self.x, 'full') + assert_array_almost_equal(z, self.z2) + z = np.correlate(self.x[::-1], self.y, 'full') + assert_array_almost_equal(z, self.z1r) + z = np.correlate(self.y, self.x[::-1], 'full') + assert_array_almost_equal(z, self.z2r) + z = np.correlate(self.xs, self.y, 'full') + assert_array_almost_equal(z, self.zs) + + def test_object(self): + self._setup(Decimal) + z = np.correlate(self.x, self.y, 'full') + assert_array_almost_equal(z, self.z1) + z = np.correlate(self.y, self.x, 'full') + assert_array_almost_equal(z, self.z2) + + def test_no_overwrite(self): + d = np.ones(100) + k = np.ones(3) + np.correlate(d, k) + assert_array_equal(d, np.ones(100)) + assert_array_equal(k, np.ones(3)) + + def test_complex(self): + x = np.array([1, 2, 3, 4+1j], dtype=complex) + y = np.array([-1, -2j, 3+1j], dtype=complex) + r_z = np.array([3-1j, 6, 8+1j, 11+5j, -5+8j, -4-1j], dtype=complex) + r_z = r_z[::-1].conjugate() + z = np.correlate(y, x, mode='full') + assert_array_almost_equal(z, r_z) + + def test_zero_size(self): + with pytest.raises(ValueError): + np.correlate(np.array([]), np.ones(1000), mode='full') + with pytest.raises(ValueError): + np.correlate(np.ones(1000), np.array([]), mode='full') + + def test_mode(self): + d = np.ones(100) + k = np.ones(3) + default_mode = np.correlate(d, k, mode='valid') + with assert_warns(DeprecationWarning): + valid_mode = np.correlate(d, k, mode='v') + assert_array_equal(valid_mode, default_mode) + # integer mode + with assert_raises(ValueError): + np.correlate(d, k, mode=-1) + assert_array_equal(np.correlate(d, k, mode=0), valid_mode) + # illegal arguments + with assert_raises(TypeError): + np.correlate(d, k, mode=None) + + +class TestConvolve: + def test_object(self): + d = [1.] * 100 + k = [1.] * 3 + assert_array_almost_equal(np.convolve(d, k)[2:-2], np.full(98, 3)) + + def test_no_overwrite(self): + d = np.ones(100) + k = np.ones(3) + np.convolve(d, k) + assert_array_equal(d, np.ones(100)) + assert_array_equal(k, np.ones(3)) + + def test_mode(self): + d = np.ones(100) + k = np.ones(3) + default_mode = np.convolve(d, k, mode='full') + with assert_warns(DeprecationWarning): + full_mode = np.convolve(d, k, mode='f') + assert_array_equal(full_mode, default_mode) + # integer mode + with assert_raises(ValueError): + np.convolve(d, k, mode=-1) + assert_array_equal(np.convolve(d, k, mode=2), full_mode) + # illegal arguments + with assert_raises(TypeError): + np.convolve(d, k, mode=None) + + +class TestArgwhere: + + @pytest.mark.parametrize('nd', [0, 1, 2]) + def test_nd(self, nd): + # get an nd array with multiple elements in every dimension + x = np.empty((2,)*nd, bool) + + # none + x[...] = False + assert_equal(np.argwhere(x).shape, (0, nd)) + + # only one + x[...] = False + x.flat[0] = True + assert_equal(np.argwhere(x).shape, (1, nd)) + + # all but one + x[...] = True + x.flat[0] = False + assert_equal(np.argwhere(x).shape, (x.size - 1, nd)) + + # all + x[...] = True + assert_equal(np.argwhere(x).shape, (x.size, nd)) + + def test_2D(self): + x = np.arange(6).reshape((2, 3)) + assert_array_equal(np.argwhere(x > 1), + [[0, 2], + [1, 0], + [1, 1], + [1, 2]]) + + def test_list(self): + assert_equal(np.argwhere([4, 0, 2, 1, 3]), [[0], [2], [3], [4]]) + + +class TestStringFunction: + + def test_set_string_function(self): + a = np.array([1]) + np.set_string_function(lambda x: "FOO", repr=True) + assert_equal(repr(a), "FOO") + np.set_string_function(None, repr=True) + assert_equal(repr(a), "array([1])") + + np.set_string_function(lambda x: "FOO", repr=False) + assert_equal(str(a), "FOO") + np.set_string_function(None, repr=False) + assert_equal(str(a), "[1]") + + +class TestRoll: + def test_roll1d(self): + x = np.arange(10) + xr = np.roll(x, 2) + assert_equal(xr, np.array([8, 9, 0, 1, 2, 3, 4, 5, 6, 7])) + + def test_roll2d(self): + x2 = np.reshape(np.arange(10), (2, 5)) + x2r = np.roll(x2, 1) + assert_equal(x2r, np.array([[9, 0, 1, 2, 3], [4, 5, 6, 7, 8]])) + + x2r = np.roll(x2, 1, axis=0) + assert_equal(x2r, np.array([[5, 6, 7, 8, 9], [0, 1, 2, 3, 4]])) + + x2r = np.roll(x2, 1, axis=1) + assert_equal(x2r, np.array([[4, 0, 1, 2, 3], [9, 5, 6, 7, 8]])) + + # Roll multiple axes at once. + x2r = np.roll(x2, 1, axis=(0, 1)) + assert_equal(x2r, np.array([[9, 5, 6, 7, 8], [4, 0, 1, 2, 3]])) + + x2r = np.roll(x2, (1, 0), axis=(0, 1)) + assert_equal(x2r, np.array([[5, 6, 7, 8, 9], [0, 1, 2, 3, 4]])) + + x2r = np.roll(x2, (-1, 0), axis=(0, 1)) + assert_equal(x2r, np.array([[5, 6, 7, 8, 9], [0, 1, 2, 3, 4]])) + + x2r = np.roll(x2, (0, 1), axis=(0, 1)) + assert_equal(x2r, np.array([[4, 0, 1, 2, 3], [9, 5, 6, 7, 8]])) + + x2r = np.roll(x2, (0, -1), axis=(0, 1)) + assert_equal(x2r, np.array([[1, 2, 3, 4, 0], [6, 7, 8, 9, 5]])) + + x2r = np.roll(x2, (1, 1), axis=(0, 1)) + assert_equal(x2r, np.array([[9, 5, 6, 7, 8], [4, 0, 1, 2, 3]])) + + x2r = np.roll(x2, (-1, -1), axis=(0, 1)) + assert_equal(x2r, np.array([[6, 7, 8, 9, 5], [1, 2, 3, 4, 0]])) + + # Roll the same axis multiple times. + x2r = np.roll(x2, 1, axis=(0, 0)) + assert_equal(x2r, np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])) + + x2r = np.roll(x2, 1, axis=(1, 1)) + assert_equal(x2r, np.array([[3, 4, 0, 1, 2], [8, 9, 5, 6, 7]])) + + # Roll more than one turn in either direction. + x2r = np.roll(x2, 6, axis=1) + assert_equal(x2r, np.array([[4, 0, 1, 2, 3], [9, 5, 6, 7, 8]])) + + x2r = np.roll(x2, -4, axis=1) + assert_equal(x2r, np.array([[4, 0, 1, 2, 3], [9, 5, 6, 7, 8]])) + + def test_roll_empty(self): + x = np.array([]) + assert_equal(np.roll(x, 1), np.array([])) + + +class TestRollaxis: + + # expected shape indexed by (axis, start) for array of + # shape (1, 2, 3, 4) + tgtshape = {(0, 0): (1, 2, 3, 4), (0, 1): (1, 2, 3, 4), + (0, 2): (2, 1, 3, 4), (0, 3): (2, 3, 1, 4), + (0, 4): (2, 3, 4, 1), + (1, 0): (2, 1, 3, 4), (1, 1): (1, 2, 3, 4), + (1, 2): (1, 2, 3, 4), (1, 3): (1, 3, 2, 4), + (1, 4): (1, 3, 4, 2), + (2, 0): (3, 1, 2, 4), (2, 1): (1, 3, 2, 4), + (2, 2): (1, 2, 3, 4), (2, 3): (1, 2, 3, 4), + (2, 4): (1, 2, 4, 3), + (3, 0): (4, 1, 2, 3), (3, 1): (1, 4, 2, 3), + (3, 2): (1, 2, 4, 3), (3, 3): (1, 2, 3, 4), + (3, 4): (1, 2, 3, 4)} + + def test_exceptions(self): + a = np.arange(1*2*3*4).reshape(1, 2, 3, 4) + assert_raises(np.AxisError, np.rollaxis, a, -5, 0) + assert_raises(np.AxisError, np.rollaxis, a, 0, -5) + assert_raises(np.AxisError, np.rollaxis, a, 4, 0) + assert_raises(np.AxisError, np.rollaxis, a, 0, 5) + + def test_results(self): + a = np.arange(1*2*3*4).reshape(1, 2, 3, 4).copy() + aind = np.indices(a.shape) + assert_(a.flags['OWNDATA']) + for (i, j) in self.tgtshape: + # positive axis, positive start + res = np.rollaxis(a, axis=i, start=j) + i0, i1, i2, i3 = aind[np.array(res.shape) - 1] + assert_(np.all(res[i0, i1, i2, i3] == a)) + assert_(res.shape == self.tgtshape[(i, j)], str((i,j))) + assert_(not res.flags['OWNDATA']) + + # negative axis, positive start + ip = i + 1 + res = np.rollaxis(a, axis=-ip, start=j) + i0, i1, i2, i3 = aind[np.array(res.shape) - 1] + assert_(np.all(res[i0, i1, i2, i3] == a)) + assert_(res.shape == self.tgtshape[(4 - ip, j)]) + assert_(not res.flags['OWNDATA']) + + # positive axis, negative start + jp = j + 1 if j < 4 else j + res = np.rollaxis(a, axis=i, start=-jp) + i0, i1, i2, i3 = aind[np.array(res.shape) - 1] + assert_(np.all(res[i0, i1, i2, i3] == a)) + assert_(res.shape == self.tgtshape[(i, 4 - jp)]) + assert_(not res.flags['OWNDATA']) + + # negative axis, negative start + ip = i + 1 + jp = j + 1 if j < 4 else j + res = np.rollaxis(a, axis=-ip, start=-jp) + i0, i1, i2, i3 = aind[np.array(res.shape) - 1] + assert_(np.all(res[i0, i1, i2, i3] == a)) + assert_(res.shape == self.tgtshape[(4 - ip, 4 - jp)]) + assert_(not res.flags['OWNDATA']) + + +class TestMoveaxis: + def test_move_to_end(self): + x = np.random.randn(5, 6, 7) + for source, expected in [(0, (6, 7, 5)), + (1, (5, 7, 6)), + (2, (5, 6, 7)), + (-1, (5, 6, 7))]: + actual = np.moveaxis(x, source, -1).shape + assert_(actual, expected) + + def test_move_new_position(self): + x = np.random.randn(1, 2, 3, 4) + for source, destination, expected in [ + (0, 1, (2, 1, 3, 4)), + (1, 2, (1, 3, 2, 4)), + (1, -1, (1, 3, 4, 2)), + ]: + actual = np.moveaxis(x, source, destination).shape + assert_(actual, expected) + + def test_preserve_order(self): + x = np.zeros((1, 2, 3, 4)) + for source, destination in [ + (0, 0), + (3, -1), + (-1, 3), + ([0, -1], [0, -1]), + ([2, 0], [2, 0]), + (range(4), range(4)), + ]: + actual = np.moveaxis(x, source, destination).shape + assert_(actual, (1, 2, 3, 4)) + + def test_move_multiples(self): + x = np.zeros((0, 1, 2, 3)) + for source, destination, expected in [ + ([0, 1], [2, 3], (2, 3, 0, 1)), + ([2, 3], [0, 1], (2, 3, 0, 1)), + ([0, 1, 2], [2, 3, 0], (2, 3, 0, 1)), + ([3, 0], [1, 0], (0, 3, 1, 2)), + ([0, 3], [0, 1], (0, 3, 1, 2)), + ]: + actual = np.moveaxis(x, source, destination).shape + assert_(actual, expected) + + def test_errors(self): + x = np.random.randn(1, 2, 3) + assert_raises_regex(np.AxisError, 'source.*out of bounds', + np.moveaxis, x, 3, 0) + assert_raises_regex(np.AxisError, 'source.*out of bounds', + np.moveaxis, x, -4, 0) + assert_raises_regex(np.AxisError, 'destination.*out of bounds', + np.moveaxis, x, 0, 5) + assert_raises_regex(ValueError, 'repeated axis in `source`', + np.moveaxis, x, [0, 0], [0, 1]) + assert_raises_regex(ValueError, 'repeated axis in `destination`', + np.moveaxis, x, [0, 1], [1, 1]) + assert_raises_regex(ValueError, 'must have the same number', + np.moveaxis, x, 0, [0, 1]) + assert_raises_regex(ValueError, 'must have the same number', + np.moveaxis, x, [0, 1], [0]) + + def test_array_likes(self): + x = np.ma.zeros((1, 2, 3)) + result = np.moveaxis(x, 0, 0) + assert_(x.shape, result.shape) + assert_(isinstance(result, np.ma.MaskedArray)) + + x = [1, 2, 3] + result = np.moveaxis(x, 0, 0) + assert_(x, list(result)) + assert_(isinstance(result, np.ndarray)) + + +class TestCross: + def test_2x2(self): + u = [1, 2] + v = [3, 4] + z = -2 + cp = np.cross(u, v) + assert_equal(cp, z) + cp = np.cross(v, u) + assert_equal(cp, -z) + + def test_2x3(self): + u = [1, 2] + v = [3, 4, 5] + z = np.array([10, -5, -2]) + cp = np.cross(u, v) + assert_equal(cp, z) + cp = np.cross(v, u) + assert_equal(cp, -z) + + def test_3x3(self): + u = [1, 2, 3] + v = [4, 5, 6] + z = np.array([-3, 6, -3]) + cp = np.cross(u, v) + assert_equal(cp, z) + cp = np.cross(v, u) + assert_equal(cp, -z) + + def test_broadcasting(self): + # Ticket #2624 (Trac #2032) + u = np.tile([1, 2], (11, 1)) + v = np.tile([3, 4], (11, 1)) + z = -2 + assert_equal(np.cross(u, v), z) + assert_equal(np.cross(v, u), -z) + assert_equal(np.cross(u, u), 0) + + u = np.tile([1, 2], (11, 1)).T + v = np.tile([3, 4, 5], (11, 1)) + z = np.tile([10, -5, -2], (11, 1)) + assert_equal(np.cross(u, v, axisa=0), z) + assert_equal(np.cross(v, u.T), -z) + assert_equal(np.cross(v, v), 0) + + u = np.tile([1, 2, 3], (11, 1)).T + v = np.tile([3, 4], (11, 1)).T + z = np.tile([-12, 9, -2], (11, 1)) + assert_equal(np.cross(u, v, axisa=0, axisb=0), z) + assert_equal(np.cross(v.T, u.T), -z) + assert_equal(np.cross(u.T, u.T), 0) + + u = np.tile([1, 2, 3], (5, 1)) + v = np.tile([4, 5, 6], (5, 1)).T + z = np.tile([-3, 6, -3], (5, 1)) + assert_equal(np.cross(u, v, axisb=0), z) + assert_equal(np.cross(v.T, u), -z) + assert_equal(np.cross(u, u), 0) + + def test_broadcasting_shapes(self): + u = np.ones((2, 1, 3)) + v = np.ones((5, 3)) + assert_equal(np.cross(u, v).shape, (2, 5, 3)) + u = np.ones((10, 3, 5)) + v = np.ones((2, 5)) + assert_equal(np.cross(u, v, axisa=1, axisb=0).shape, (10, 5, 3)) + assert_raises(np.AxisError, np.cross, u, v, axisa=1, axisb=2) + assert_raises(np.AxisError, np.cross, u, v, axisa=3, axisb=0) + u = np.ones((10, 3, 5, 7)) + v = np.ones((5, 7, 2)) + assert_equal(np.cross(u, v, axisa=1, axisc=2).shape, (10, 5, 3, 7)) + assert_raises(np.AxisError, np.cross, u, v, axisa=-5, axisb=2) + assert_raises(np.AxisError, np.cross, u, v, axisa=1, axisb=-4) + # gh-5885 + u = np.ones((3, 4, 2)) + for axisc in range(-2, 2): + assert_equal(np.cross(u, u, axisc=axisc).shape, (3, 4)) + + +def test_outer_out_param(): + arr1 = np.ones((5,)) + arr2 = np.ones((2,)) + arr3 = np.linspace(-2, 2, 5) + out1 = np.ndarray(shape=(5,5)) + out2 = np.ndarray(shape=(2, 5)) + res1 = np.outer(arr1, arr3, out1) + assert_equal(res1, out1) + assert_equal(np.outer(arr2, arr3, out2), out2) + + +class TestIndices: + + def test_simple(self): + [x, y] = np.indices((4, 3)) + assert_array_equal(x, np.array([[0, 0, 0], + [1, 1, 1], + [2, 2, 2], + [3, 3, 3]])) + assert_array_equal(y, np.array([[0, 1, 2], + [0, 1, 2], + [0, 1, 2], + [0, 1, 2]])) + + def test_single_input(self): + [x] = np.indices((4,)) + assert_array_equal(x, np.array([0, 1, 2, 3])) + + [x] = np.indices((4,), sparse=True) + assert_array_equal(x, np.array([0, 1, 2, 3])) + + def test_scalar_input(self): + assert_array_equal([], np.indices(())) + assert_array_equal([], np.indices((), sparse=True)) + assert_array_equal([[]], np.indices((0,))) + assert_array_equal([[]], np.indices((0,), sparse=True)) + + def test_sparse(self): + [x, y] = np.indices((4,3), sparse=True) + assert_array_equal(x, np.array([[0], [1], [2], [3]])) + assert_array_equal(y, np.array([[0, 1, 2]])) + + @pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64]) + @pytest.mark.parametrize("dims", [(), (0,), (4, 3)]) + def test_return_type(self, dtype, dims): + inds = np.indices(dims, dtype=dtype) + assert_(inds.dtype == dtype) + + for arr in np.indices(dims, dtype=dtype, sparse=True): + assert_(arr.dtype == dtype) + + +class TestRequire: + flag_names = ['C', 'C_CONTIGUOUS', 'CONTIGUOUS', + 'F', 'F_CONTIGUOUS', 'FORTRAN', + 'A', 'ALIGNED', + 'W', 'WRITEABLE', + 'O', 'OWNDATA'] + + def generate_all_false(self, dtype): + arr = np.zeros((2, 2), [('junk', 'i1'), ('a', dtype)]) + arr.setflags(write=False) + a = arr['a'] + assert_(not a.flags['C']) + assert_(not a.flags['F']) + assert_(not a.flags['O']) + assert_(not a.flags['W']) + assert_(not a.flags['A']) + return a + + def set_and_check_flag(self, flag, dtype, arr): + if dtype is None: + dtype = arr.dtype + b = np.require(arr, dtype, [flag]) + assert_(b.flags[flag]) + assert_(b.dtype == dtype) + + # a further call to np.require ought to return the same array + # unless OWNDATA is specified. + c = np.require(b, None, [flag]) + if flag[0] != 'O': + assert_(c is b) + else: + assert_(c.flags[flag]) + + def test_require_each(self): + + id = ['f8', 'i4'] + fd = [None, 'f8', 'c16'] + for idtype, fdtype, flag in itertools.product(id, fd, self.flag_names): + a = self.generate_all_false(idtype) + self.set_and_check_flag(flag, fdtype, a) + + def test_unknown_requirement(self): + a = self.generate_all_false('f8') + assert_raises(KeyError, np.require, a, None, 'Q') + + def test_non_array_input(self): + a = np.require([1, 2, 3, 4], 'i4', ['C', 'A', 'O']) + assert_(a.flags['O']) + assert_(a.flags['C']) + assert_(a.flags['A']) + assert_(a.dtype == 'i4') + assert_equal(a, [1, 2, 3, 4]) + + def test_C_and_F_simul(self): + a = self.generate_all_false('f8') + assert_raises(ValueError, np.require, a, None, ['C', 'F']) + + def test_ensure_array(self): + class ArraySubclass(np.ndarray): + pass + + a = ArraySubclass((2, 2)) + b = np.require(a, None, ['E']) + assert_(type(b) is np.ndarray) + + def test_preserve_subtype(self): + class ArraySubclass(np.ndarray): + pass + + for flag in self.flag_names: + a = ArraySubclass((2, 2)) + self.set_and_check_flag(flag, None, a) + + +class TestBroadcast: + def test_broadcast_in_args(self): + # gh-5881 + arrs = [np.empty((6, 7)), np.empty((5, 6, 1)), np.empty((7,)), + np.empty((5, 1, 7))] + mits = [np.broadcast(*arrs), + np.broadcast(np.broadcast(*arrs[:0]), np.broadcast(*arrs[0:])), + np.broadcast(np.broadcast(*arrs[:1]), np.broadcast(*arrs[1:])), + np.broadcast(np.broadcast(*arrs[:2]), np.broadcast(*arrs[2:])), + np.broadcast(arrs[0], np.broadcast(*arrs[1:-1]), arrs[-1])] + for mit in mits: + assert_equal(mit.shape, (5, 6, 7)) + assert_equal(mit.ndim, 3) + assert_equal(mit.nd, 3) + assert_equal(mit.numiter, 4) + for a, ia in zip(arrs, mit.iters): + assert_(a is ia.base) + + def test_broadcast_single_arg(self): + # gh-6899 + arrs = [np.empty((5, 6, 7))] + mit = np.broadcast(*arrs) + assert_equal(mit.shape, (5, 6, 7)) + assert_equal(mit.ndim, 3) + assert_equal(mit.nd, 3) + assert_equal(mit.numiter, 1) + assert_(arrs[0] is mit.iters[0].base) + + def test_number_of_arguments(self): + arr = np.empty((5,)) + for j in range(35): + arrs = [arr] * j + if j > 32: + assert_raises(ValueError, np.broadcast, *arrs) + else: + mit = np.broadcast(*arrs) + assert_equal(mit.numiter, j) + + def test_broadcast_error_kwargs(self): + #gh-13455 + arrs = [np.empty((5, 6, 7))] + mit = np.broadcast(*arrs) + mit2 = np.broadcast(*arrs, **{}) + assert_equal(mit.shape, mit2.shape) + assert_equal(mit.ndim, mit2.ndim) + assert_equal(mit.nd, mit2.nd) + assert_equal(mit.numiter, mit2.numiter) + assert_(mit.iters[0].base is mit2.iters[0].base) + + assert_raises(ValueError, np.broadcast, 1, **{'x': 1}) + + def test_shape_mismatch_error_message(self): + with pytest.raises(ValueError, match=r"arg 0 with shape \(1, 3\) and " + r"arg 2 with shape \(2,\)"): + np.broadcast([[1, 2, 3]], [[4], [5]], [6, 7]) + + +class TestKeepdims: + + class sub_array(np.ndarray): + def sum(self, axis=None, dtype=None, out=None): + return np.ndarray.sum(self, axis, dtype, out, keepdims=True) + + def test_raise(self): + sub_class = self.sub_array + x = np.arange(30).view(sub_class) + assert_raises(TypeError, np.sum, x, keepdims=True) + + +class TestTensordot: + + def test_zero_dimension(self): + # Test resolution to issue #5663 + a = np.ndarray((3,0)) + b = np.ndarray((0,4)) + td = np.tensordot(a, b, (1, 0)) + assert_array_equal(td, np.dot(a, b)) + assert_array_equal(td, np.einsum('ij,jk', a, b)) + + def test_zero_dimensional(self): + # gh-12130 + arr_0d = np.array(1) + ret = np.tensordot(arr_0d, arr_0d, ([], [])) # contracting no axes is well defined + assert_array_equal(ret, arr_0d) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_numerictypes.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_numerictypes.py new file mode 100644 index 0000000..9cb0034 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_numerictypes.py @@ -0,0 +1,554 @@ +import sys +import itertools + +import pytest +import numpy as np +from numpy.testing import assert_, assert_equal, assert_raises, IS_PYPY + +# This is the structure of the table used for plain objects: +# +# +-+-+-+ +# |x|y|z| +# +-+-+-+ + +# Structure of a plain array description: +Pdescr = [ + ('x', 'i4', (2,)), + ('y', 'f8', (2, 2)), + ('z', 'u1')] + +# A plain list of tuples with values for testing: +PbufferT = [ + # x y z + ([3, 2], [[6., 4.], [6., 4.]], 8), + ([4, 3], [[7., 5.], [7., 5.]], 9), + ] + + +# This is the structure of the table used for nested objects (DON'T PANIC!): +# +# +-+---------------------------------+-----+----------+-+-+ +# |x|Info |color|info |y|z| +# | +-----+--+----------------+----+--+ +----+-----+ | | +# | |value|y2|Info2 |name|z2| |Name|Value| | | +# | | | +----+-----+--+--+ | | | | | | | +# | | | |name|value|y3|z3| | | | | | | | +# +-+-----+--+----+-----+--+--+----+--+-----+----+-----+-+-+ +# + +# The corresponding nested array description: +Ndescr = [ + ('x', 'i4', (2,)), + ('Info', [ + ('value', 'c16'), + ('y2', 'f8'), + ('Info2', [ + ('name', 'S2'), + ('value', 'c16', (2,)), + ('y3', 'f8', (2,)), + ('z3', 'u4', (2,))]), + ('name', 'S2'), + ('z2', 'b1')]), + ('color', 'S2'), + ('info', [ + ('Name', 'U8'), + ('Value', 'c16')]), + ('y', 'f8', (2, 2)), + ('z', 'u1')] + +NbufferT = [ + # x Info color info y z + # value y2 Info2 name z2 Name Value + # name value y3 z3 + ([3, 2], (6j, 6., (b'nn', [6j, 4j], [6., 4.], [1, 2]), b'NN', True), b'cc', (u'NN', 6j), [[6., 4.], [6., 4.]], 8), + ([4, 3], (7j, 7., (b'oo', [7j, 5j], [7., 5.], [2, 1]), b'OO', False), b'dd', (u'OO', 7j), [[7., 5.], [7., 5.]], 9), + ] + + +byteorder = {'little':'<', 'big':'>'}[sys.byteorder] + +def normalize_descr(descr): + "Normalize a description adding the platform byteorder." + + out = [] + for item in descr: + dtype = item[1] + if isinstance(dtype, str): + if dtype[0] not in ['|', '<', '>']: + onebyte = dtype[1:] == "1" + if onebyte or dtype[0] in ['S', 'V', 'b']: + dtype = "|" + dtype + else: + dtype = byteorder + dtype + if len(item) > 2 and np.prod(item[2]) > 1: + nitem = (item[0], dtype, item[2]) + else: + nitem = (item[0], dtype) + out.append(nitem) + elif isinstance(dtype, list): + l = normalize_descr(dtype) + out.append((item[0], l)) + else: + raise ValueError("Expected a str or list and got %s" % + (type(item))) + return out + + +############################################################ +# Creation tests +############################################################ + +class CreateZeros: + """Check the creation of heterogeneous arrays zero-valued""" + + def test_zeros0D(self): + """Check creation of 0-dimensional objects""" + h = np.zeros((), dtype=self._descr) + assert_(normalize_descr(self._descr) == h.dtype.descr) + assert_(h.dtype.fields['x'][0].name[:4] == 'void') + assert_(h.dtype.fields['x'][0].char == 'V') + assert_(h.dtype.fields['x'][0].type == np.void) + # A small check that data is ok + assert_equal(h['z'], np.zeros((), dtype='u1')) + + def test_zerosSD(self): + """Check creation of single-dimensional objects""" + h = np.zeros((2,), dtype=self._descr) + assert_(normalize_descr(self._descr) == h.dtype.descr) + assert_(h.dtype['y'].name[:4] == 'void') + assert_(h.dtype['y'].char == 'V') + assert_(h.dtype['y'].type == np.void) + # A small check that data is ok + assert_equal(h['z'], np.zeros((2,), dtype='u1')) + + def test_zerosMD(self): + """Check creation of multi-dimensional objects""" + h = np.zeros((2, 3), dtype=self._descr) + assert_(normalize_descr(self._descr) == h.dtype.descr) + assert_(h.dtype['z'].name == 'uint8') + assert_(h.dtype['z'].char == 'B') + assert_(h.dtype['z'].type == np.uint8) + # A small check that data is ok + assert_equal(h['z'], np.zeros((2, 3), dtype='u1')) + + +class TestCreateZerosPlain(CreateZeros): + """Check the creation of heterogeneous arrays zero-valued (plain)""" + _descr = Pdescr + +class TestCreateZerosNested(CreateZeros): + """Check the creation of heterogeneous arrays zero-valued (nested)""" + _descr = Ndescr + + +class CreateValues: + """Check the creation of heterogeneous arrays with values""" + + def test_tuple(self): + """Check creation from tuples""" + h = np.array(self._buffer, dtype=self._descr) + assert_(normalize_descr(self._descr) == h.dtype.descr) + if self.multiple_rows: + assert_(h.shape == (2,)) + else: + assert_(h.shape == ()) + + def test_list_of_tuple(self): + """Check creation from list of tuples""" + h = np.array([self._buffer], dtype=self._descr) + assert_(normalize_descr(self._descr) == h.dtype.descr) + if self.multiple_rows: + assert_(h.shape == (1, 2)) + else: + assert_(h.shape == (1,)) + + def test_list_of_list_of_tuple(self): + """Check creation from list of list of tuples""" + h = np.array([[self._buffer]], dtype=self._descr) + assert_(normalize_descr(self._descr) == h.dtype.descr) + if self.multiple_rows: + assert_(h.shape == (1, 1, 2)) + else: + assert_(h.shape == (1, 1)) + + +class TestCreateValuesPlainSingle(CreateValues): + """Check the creation of heterogeneous arrays (plain, single row)""" + _descr = Pdescr + multiple_rows = 0 + _buffer = PbufferT[0] + +class TestCreateValuesPlainMultiple(CreateValues): + """Check the creation of heterogeneous arrays (plain, multiple rows)""" + _descr = Pdescr + multiple_rows = 1 + _buffer = PbufferT + +class TestCreateValuesNestedSingle(CreateValues): + """Check the creation of heterogeneous arrays (nested, single row)""" + _descr = Ndescr + multiple_rows = 0 + _buffer = NbufferT[0] + +class TestCreateValuesNestedMultiple(CreateValues): + """Check the creation of heterogeneous arrays (nested, multiple rows)""" + _descr = Ndescr + multiple_rows = 1 + _buffer = NbufferT + + +############################################################ +# Reading tests +############################################################ + +class ReadValuesPlain: + """Check the reading of values in heterogeneous arrays (plain)""" + + def test_access_fields(self): + h = np.array(self._buffer, dtype=self._descr) + if not self.multiple_rows: + assert_(h.shape == ()) + assert_equal(h['x'], np.array(self._buffer[0], dtype='i4')) + assert_equal(h['y'], np.array(self._buffer[1], dtype='f8')) + assert_equal(h['z'], np.array(self._buffer[2], dtype='u1')) + else: + assert_(len(h) == 2) + assert_equal(h['x'], np.array([self._buffer[0][0], + self._buffer[1][0]], dtype='i4')) + assert_equal(h['y'], np.array([self._buffer[0][1], + self._buffer[1][1]], dtype='f8')) + assert_equal(h['z'], np.array([self._buffer[0][2], + self._buffer[1][2]], dtype='u1')) + + +class TestReadValuesPlainSingle(ReadValuesPlain): + """Check the creation of heterogeneous arrays (plain, single row)""" + _descr = Pdescr + multiple_rows = 0 + _buffer = PbufferT[0] + +class TestReadValuesPlainMultiple(ReadValuesPlain): + """Check the values of heterogeneous arrays (plain, multiple rows)""" + _descr = Pdescr + multiple_rows = 1 + _buffer = PbufferT + +class ReadValuesNested: + """Check the reading of values in heterogeneous arrays (nested)""" + + def test_access_top_fields(self): + """Check reading the top fields of a nested array""" + h = np.array(self._buffer, dtype=self._descr) + if not self.multiple_rows: + assert_(h.shape == ()) + assert_equal(h['x'], np.array(self._buffer[0], dtype='i4')) + assert_equal(h['y'], np.array(self._buffer[4], dtype='f8')) + assert_equal(h['z'], np.array(self._buffer[5], dtype='u1')) + else: + assert_(len(h) == 2) + assert_equal(h['x'], np.array([self._buffer[0][0], + self._buffer[1][0]], dtype='i4')) + assert_equal(h['y'], np.array([self._buffer[0][4], + self._buffer[1][4]], dtype='f8')) + assert_equal(h['z'], np.array([self._buffer[0][5], + self._buffer[1][5]], dtype='u1')) + + def test_nested1_acessors(self): + """Check reading the nested fields of a nested array (1st level)""" + h = np.array(self._buffer, dtype=self._descr) + if not self.multiple_rows: + assert_equal(h['Info']['value'], + np.array(self._buffer[1][0], dtype='c16')) + assert_equal(h['Info']['y2'], + np.array(self._buffer[1][1], dtype='f8')) + assert_equal(h['info']['Name'], + np.array(self._buffer[3][0], dtype='U2')) + assert_equal(h['info']['Value'], + np.array(self._buffer[3][1], dtype='c16')) + else: + assert_equal(h['Info']['value'], + np.array([self._buffer[0][1][0], + self._buffer[1][1][0]], + dtype='c16')) + assert_equal(h['Info']['y2'], + np.array([self._buffer[0][1][1], + self._buffer[1][1][1]], + dtype='f8')) + assert_equal(h['info']['Name'], + np.array([self._buffer[0][3][0], + self._buffer[1][3][0]], + dtype='U2')) + assert_equal(h['info']['Value'], + np.array([self._buffer[0][3][1], + self._buffer[1][3][1]], + dtype='c16')) + + def test_nested2_acessors(self): + """Check reading the nested fields of a nested array (2nd level)""" + h = np.array(self._buffer, dtype=self._descr) + if not self.multiple_rows: + assert_equal(h['Info']['Info2']['value'], + np.array(self._buffer[1][2][1], dtype='c16')) + assert_equal(h['Info']['Info2']['z3'], + np.array(self._buffer[1][2][3], dtype='u4')) + else: + assert_equal(h['Info']['Info2']['value'], + np.array([self._buffer[0][1][2][1], + self._buffer[1][1][2][1]], + dtype='c16')) + assert_equal(h['Info']['Info2']['z3'], + np.array([self._buffer[0][1][2][3], + self._buffer[1][1][2][3]], + dtype='u4')) + + def test_nested1_descriptor(self): + """Check access nested descriptors of a nested array (1st level)""" + h = np.array(self._buffer, dtype=self._descr) + assert_(h.dtype['Info']['value'].name == 'complex128') + assert_(h.dtype['Info']['y2'].name == 'float64') + assert_(h.dtype['info']['Name'].name == 'str256') + assert_(h.dtype['info']['Value'].name == 'complex128') + + def test_nested2_descriptor(self): + """Check access nested descriptors of a nested array (2nd level)""" + h = np.array(self._buffer, dtype=self._descr) + assert_(h.dtype['Info']['Info2']['value'].name == 'void256') + assert_(h.dtype['Info']['Info2']['z3'].name == 'void64') + + +class TestReadValuesNestedSingle(ReadValuesNested): + """Check the values of heterogeneous arrays (nested, single row)""" + _descr = Ndescr + multiple_rows = False + _buffer = NbufferT[0] + +class TestReadValuesNestedMultiple(ReadValuesNested): + """Check the values of heterogeneous arrays (nested, multiple rows)""" + _descr = Ndescr + multiple_rows = True + _buffer = NbufferT + +class TestEmptyField: + def test_assign(self): + a = np.arange(10, dtype=np.float32) + a.dtype = [("int", "<0i4"), ("float", "<2f4")] + assert_(a['int'].shape == (5, 0)) + assert_(a['float'].shape == (5, 2)) + +class TestCommonType: + def test_scalar_loses1(self): + res = np.find_common_type(['f4', 'f4', 'i2'], ['f8']) + assert_(res == 'f4') + + def test_scalar_loses2(self): + res = np.find_common_type(['f4', 'f4'], ['i8']) + assert_(res == 'f4') + + def test_scalar_wins(self): + res = np.find_common_type(['f4', 'f4', 'i2'], ['c8']) + assert_(res == 'c8') + + def test_scalar_wins2(self): + res = np.find_common_type(['u4', 'i4', 'i4'], ['f4']) + assert_(res == 'f8') + + def test_scalar_wins3(self): # doesn't go up to 'f16' on purpose + res = np.find_common_type(['u8', 'i8', 'i8'], ['f8']) + assert_(res == 'f8') + +class TestMultipleFields: + def setup(self): + self.ary = np.array([(1, 2, 3, 4), (5, 6, 7, 8)], dtype='i4,f4,i2,c8') + + def _bad_call(self): + return self.ary['f0', 'f1'] + + def test_no_tuple(self): + assert_raises(IndexError, self._bad_call) + + def test_return(self): + res = self.ary[['f0', 'f2']].tolist() + assert_(res == [(1, 3), (5, 7)]) + + +class TestIsSubDType: + # scalar types can be promoted into dtypes + wrappers = [np.dtype, lambda x: x] + + def test_both_abstract(self): + assert_(np.issubdtype(np.floating, np.inexact)) + assert_(not np.issubdtype(np.inexact, np.floating)) + + def test_same(self): + for cls in (np.float32, np.int32): + for w1, w2 in itertools.product(self.wrappers, repeat=2): + assert_(np.issubdtype(w1(cls), w2(cls))) + + def test_subclass(self): + # note we cannot promote floating to a dtype, as it would turn into a + # concrete type + for w in self.wrappers: + assert_(np.issubdtype(w(np.float32), np.floating)) + assert_(np.issubdtype(w(np.float64), np.floating)) + + def test_subclass_backwards(self): + for w in self.wrappers: + assert_(not np.issubdtype(np.floating, w(np.float32))) + assert_(not np.issubdtype(np.floating, w(np.float64))) + + def test_sibling_class(self): + for w1, w2 in itertools.product(self.wrappers, repeat=2): + assert_(not np.issubdtype(w1(np.float32), w2(np.float64))) + assert_(not np.issubdtype(w1(np.float64), w2(np.float32))) + + def test_nondtype_nonscalartype(self): + # See gh-14619 and gh-9505 which introduced the deprecation to fix + # this. These tests are directly taken from gh-9505 + assert not np.issubdtype(np.float32, 'float64') + assert not np.issubdtype(np.float32, 'f8') + assert not np.issubdtype(np.int32, str) + assert not np.issubdtype(np.int32, 'int64') + assert not np.issubdtype(np.str_, 'void') + # for the following the correct spellings are + # np.integer, np.floating, or np.complexfloating respectively: + assert not np.issubdtype(np.int8, int) # np.int8 is never np.int_ + assert not np.issubdtype(np.float32, float) + assert not np.issubdtype(np.complex64, complex) + assert not np.issubdtype(np.float32, "float") + assert not np.issubdtype(np.float64, "f") + + # Test the same for the correct first datatype and abstract one + # in the case of int, float, complex: + assert np.issubdtype(np.float64, 'float64') + assert np.issubdtype(np.float64, 'f8') + assert np.issubdtype(np.str_, str) + assert np.issubdtype(np.int64, 'int64') + assert np.issubdtype(np.void, 'void') + assert np.issubdtype(np.int8, np.integer) + assert np.issubdtype(np.float32, np.floating) + assert np.issubdtype(np.complex64, np.complexfloating) + assert np.issubdtype(np.float64, "float") + assert np.issubdtype(np.float32, "f") + + +class TestSctypeDict: + def test_longdouble(self): + assert_(np.sctypeDict['f8'] is not np.longdouble) + assert_(np.sctypeDict['c16'] is not np.clongdouble) + + +class TestBitName: + def test_abstract(self): + assert_raises(ValueError, np.core.numerictypes.bitname, np.floating) + + +class TestMaximumSctype: + + # note that parametrizing with sctype['int'] and similar would skip types + # with the same size (gh-11923) + + @pytest.mark.parametrize('t', [np.byte, np.short, np.intc, np.int_, np.longlong]) + def test_int(self, t): + assert_equal(np.maximum_sctype(t), np.sctypes['int'][-1]) + + @pytest.mark.parametrize('t', [np.ubyte, np.ushort, np.uintc, np.uint, np.ulonglong]) + def test_uint(self, t): + assert_equal(np.maximum_sctype(t), np.sctypes['uint'][-1]) + + @pytest.mark.parametrize('t', [np.half, np.single, np.double, np.longdouble]) + def test_float(self, t): + assert_equal(np.maximum_sctype(t), np.sctypes['float'][-1]) + + @pytest.mark.parametrize('t', [np.csingle, np.cdouble, np.clongdouble]) + def test_complex(self, t): + assert_equal(np.maximum_sctype(t), np.sctypes['complex'][-1]) + + @pytest.mark.parametrize('t', [np.bool_, np.object_, np.unicode_, np.bytes_, np.void]) + def test_other(self, t): + assert_equal(np.maximum_sctype(t), t) + + +class Test_sctype2char: + # This function is old enough that we're really just documenting the quirks + # at this point. + + def test_scalar_type(self): + assert_equal(np.sctype2char(np.double), 'd') + assert_equal(np.sctype2char(np.int_), 'l') + assert_equal(np.sctype2char(np.unicode_), 'U') + assert_equal(np.sctype2char(np.bytes_), 'S') + + def test_other_type(self): + assert_equal(np.sctype2char(float), 'd') + assert_equal(np.sctype2char(list), 'O') + assert_equal(np.sctype2char(np.ndarray), 'O') + + def test_third_party_scalar_type(self): + from numpy.core._rational_tests import rational + assert_raises(KeyError, np.sctype2char, rational) + assert_raises(KeyError, np.sctype2char, rational(1)) + + def test_array_instance(self): + assert_equal(np.sctype2char(np.array([1.0, 2.0])), 'd') + + def test_abstract_type(self): + assert_raises(KeyError, np.sctype2char, np.floating) + + def test_non_type(self): + assert_raises(ValueError, np.sctype2char, 1) + +@pytest.mark.parametrize("rep, expected", [ + (np.int32, True), + (list, False), + (1.1, False), + (str, True), + (np.dtype(np.float64), True), + (np.dtype((np.int16, (3, 4))), True), + (np.dtype([('a', np.int8)]), True), + ]) +def test_issctype(rep, expected): + # ensure proper identification of scalar + # data-types by issctype() + actual = np.issctype(rep) + assert_equal(actual, expected) + + +@pytest.mark.skipif(sys.flags.optimize > 1, + reason="no docstrings present to inspect when PYTHONOPTIMIZE/Py_OptimizeFlag > 1") +@pytest.mark.xfail(IS_PYPY, + reason="PyPy cannot modify tp_doc after PyType_Ready") +class TestDocStrings: + def test_platform_dependent_aliases(self): + if np.int64 is np.int_: + assert_('int64' in np.int_.__doc__) + elif np.int64 is np.longlong: + assert_('int64' in np.longlong.__doc__) + + +class TestScalarTypeNames: + # gh-9799 + + numeric_types = [ + np.byte, np.short, np.intc, np.int_, np.longlong, + np.ubyte, np.ushort, np.uintc, np.uint, np.ulonglong, + np.half, np.single, np.double, np.longdouble, + np.csingle, np.cdouble, np.clongdouble, + ] + + def test_names_are_unique(self): + # none of the above may be aliases for each other + assert len(set(self.numeric_types)) == len(self.numeric_types) + + # names must be unique + names = [t.__name__ for t in self.numeric_types] + assert len(set(names)) == len(names) + + @pytest.mark.parametrize('t', numeric_types) + def test_names_reflect_attributes(self, t): + """ Test that names correspond to where the type is under ``np.`` """ + assert getattr(np, t.__name__) is t + + @pytest.mark.parametrize('t', numeric_types) + def test_names_are_undersood_by_dtype(self, t): + """ Test the dtype constructor maps names back to the type """ + assert np.dtype(t.__name__).type is t diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_overrides.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_overrides.py new file mode 100644 index 0000000..9216a3f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_overrides.py @@ -0,0 +1,584 @@ +import inspect +import sys +import os +import tempfile +from io import StringIO +from unittest import mock + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_raises_regex) +from numpy.core.overrides import ( + _get_implementing_args, array_function_dispatch, + verify_matching_signatures, ARRAY_FUNCTION_ENABLED) +from numpy.compat import pickle +import pytest + + +requires_array_function = pytest.mark.skipif( + not ARRAY_FUNCTION_ENABLED, + reason="__array_function__ dispatch not enabled.") + + +def _return_not_implemented(self, *args, **kwargs): + return NotImplemented + + +# need to define this at the top level to test pickling +@array_function_dispatch(lambda array: (array,)) +def dispatched_one_arg(array): + """Docstring.""" + return 'original' + + +@array_function_dispatch(lambda array1, array2: (array1, array2)) +def dispatched_two_arg(array1, array2): + """Docstring.""" + return 'original' + + +class TestGetImplementingArgs: + + def test_ndarray(self): + array = np.array(1) + + args = _get_implementing_args([array]) + assert_equal(list(args), [array]) + + args = _get_implementing_args([array, array]) + assert_equal(list(args), [array]) + + args = _get_implementing_args([array, 1]) + assert_equal(list(args), [array]) + + args = _get_implementing_args([1, array]) + assert_equal(list(args), [array]) + + def test_ndarray_subclasses(self): + + class OverrideSub(np.ndarray): + __array_function__ = _return_not_implemented + + class NoOverrideSub(np.ndarray): + pass + + array = np.array(1).view(np.ndarray) + override_sub = np.array(1).view(OverrideSub) + no_override_sub = np.array(1).view(NoOverrideSub) + + args = _get_implementing_args([array, override_sub]) + assert_equal(list(args), [override_sub, array]) + + args = _get_implementing_args([array, no_override_sub]) + assert_equal(list(args), [no_override_sub, array]) + + args = _get_implementing_args( + [override_sub, no_override_sub]) + assert_equal(list(args), [override_sub, no_override_sub]) + + def test_ndarray_and_duck_array(self): + + class Other: + __array_function__ = _return_not_implemented + + array = np.array(1) + other = Other() + + args = _get_implementing_args([other, array]) + assert_equal(list(args), [other, array]) + + args = _get_implementing_args([array, other]) + assert_equal(list(args), [array, other]) + + def test_ndarray_subclass_and_duck_array(self): + + class OverrideSub(np.ndarray): + __array_function__ = _return_not_implemented + + class Other: + __array_function__ = _return_not_implemented + + array = np.array(1) + subarray = np.array(1).view(OverrideSub) + other = Other() + + assert_equal(_get_implementing_args([array, subarray, other]), + [subarray, array, other]) + assert_equal(_get_implementing_args([array, other, subarray]), + [subarray, array, other]) + + def test_many_duck_arrays(self): + + class A: + __array_function__ = _return_not_implemented + + class B(A): + __array_function__ = _return_not_implemented + + class C(A): + __array_function__ = _return_not_implemented + + class D: + __array_function__ = _return_not_implemented + + a = A() + b = B() + c = C() + d = D() + + assert_equal(_get_implementing_args([1]), []) + assert_equal(_get_implementing_args([a]), [a]) + assert_equal(_get_implementing_args([a, 1]), [a]) + assert_equal(_get_implementing_args([a, a, a]), [a]) + assert_equal(_get_implementing_args([a, d, a]), [a, d]) + assert_equal(_get_implementing_args([a, b]), [b, a]) + assert_equal(_get_implementing_args([b, a]), [b, a]) + assert_equal(_get_implementing_args([a, b, c]), [b, c, a]) + assert_equal(_get_implementing_args([a, c, b]), [c, b, a]) + + def test_too_many_duck_arrays(self): + namespace = dict(__array_function__=_return_not_implemented) + types = [type('A' + str(i), (object,), namespace) for i in range(33)] + relevant_args = [t() for t in types] + + actual = _get_implementing_args(relevant_args[:32]) + assert_equal(actual, relevant_args[:32]) + + with assert_raises_regex(TypeError, 'distinct argument types'): + _get_implementing_args(relevant_args) + + +class TestNDArrayArrayFunction: + + @requires_array_function + def test_method(self): + + class Other: + __array_function__ = _return_not_implemented + + class NoOverrideSub(np.ndarray): + pass + + class OverrideSub(np.ndarray): + __array_function__ = _return_not_implemented + + array = np.array([1]) + other = Other() + no_override_sub = array.view(NoOverrideSub) + override_sub = array.view(OverrideSub) + + result = array.__array_function__(func=dispatched_two_arg, + types=(np.ndarray,), + args=(array, 1.), kwargs={}) + assert_equal(result, 'original') + + result = array.__array_function__(func=dispatched_two_arg, + types=(np.ndarray, Other), + args=(array, other), kwargs={}) + assert_(result is NotImplemented) + + result = array.__array_function__(func=dispatched_two_arg, + types=(np.ndarray, NoOverrideSub), + args=(array, no_override_sub), + kwargs={}) + assert_equal(result, 'original') + + result = array.__array_function__(func=dispatched_two_arg, + types=(np.ndarray, OverrideSub), + args=(array, override_sub), + kwargs={}) + assert_equal(result, 'original') + + with assert_raises_regex(TypeError, 'no implementation found'): + np.concatenate((array, other)) + + expected = np.concatenate((array, array)) + result = np.concatenate((array, no_override_sub)) + assert_equal(result, expected.view(NoOverrideSub)) + result = np.concatenate((array, override_sub)) + assert_equal(result, expected.view(OverrideSub)) + + def test_no_wrapper(self): + # This shouldn't happen unless a user intentionally calls + # __array_function__ with invalid arguments, but check that we raise + # an appropriate error all the same. + array = np.array(1) + func = lambda x: x + with assert_raises_regex(AttributeError, '_implementation'): + array.__array_function__(func=func, types=(np.ndarray,), + args=(array,), kwargs={}) + + +@requires_array_function +class TestArrayFunctionDispatch: + + def test_pickle(self): + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + roundtripped = pickle.loads( + pickle.dumps(dispatched_one_arg, protocol=proto)) + assert_(roundtripped is dispatched_one_arg) + + def test_name_and_docstring(self): + assert_equal(dispatched_one_arg.__name__, 'dispatched_one_arg') + if sys.flags.optimize < 2: + assert_equal(dispatched_one_arg.__doc__, 'Docstring.') + + def test_interface(self): + + class MyArray: + def __array_function__(self, func, types, args, kwargs): + return (self, func, types, args, kwargs) + + original = MyArray() + (obj, func, types, args, kwargs) = dispatched_one_arg(original) + assert_(obj is original) + assert_(func is dispatched_one_arg) + assert_equal(set(types), {MyArray}) + # assert_equal uses the overloaded np.iscomplexobj() internally + assert_(args == (original,)) + assert_equal(kwargs, {}) + + def test_not_implemented(self): + + class MyArray: + def __array_function__(self, func, types, args, kwargs): + return NotImplemented + + array = MyArray() + with assert_raises_regex(TypeError, 'no implementation found'): + dispatched_one_arg(array) + + +@requires_array_function +class TestVerifyMatchingSignatures: + + def test_verify_matching_signatures(self): + + verify_matching_signatures(lambda x: 0, lambda x: 0) + verify_matching_signatures(lambda x=None: 0, lambda x=None: 0) + verify_matching_signatures(lambda x=1: 0, lambda x=None: 0) + + with assert_raises(RuntimeError): + verify_matching_signatures(lambda a: 0, lambda b: 0) + with assert_raises(RuntimeError): + verify_matching_signatures(lambda x: 0, lambda x=None: 0) + with assert_raises(RuntimeError): + verify_matching_signatures(lambda x=None: 0, lambda y=None: 0) + with assert_raises(RuntimeError): + verify_matching_signatures(lambda x=1: 0, lambda y=1: 0) + + def test_array_function_dispatch(self): + + with assert_raises(RuntimeError): + @array_function_dispatch(lambda x: (x,)) + def f(y): + pass + + # should not raise + @array_function_dispatch(lambda x: (x,), verify=False) + def f(y): + pass + + +def _new_duck_type_and_implements(): + """Create a duck array type and implements functions.""" + HANDLED_FUNCTIONS = {} + + class MyArray: + def __array_function__(self, func, types, args, kwargs): + if func not in HANDLED_FUNCTIONS: + return NotImplemented + if not all(issubclass(t, MyArray) for t in types): + return NotImplemented + return HANDLED_FUNCTIONS[func](*args, **kwargs) + + def implements(numpy_function): + """Register an __array_function__ implementations.""" + def decorator(func): + HANDLED_FUNCTIONS[numpy_function] = func + return func + return decorator + + return (MyArray, implements) + + +@requires_array_function +class TestArrayFunctionImplementation: + + def test_one_arg(self): + MyArray, implements = _new_duck_type_and_implements() + + @implements(dispatched_one_arg) + def _(array): + return 'myarray' + + assert_equal(dispatched_one_arg(1), 'original') + assert_equal(dispatched_one_arg(MyArray()), 'myarray') + + def test_optional_args(self): + MyArray, implements = _new_duck_type_and_implements() + + @array_function_dispatch(lambda array, option=None: (array,)) + def func_with_option(array, option='default'): + return option + + @implements(func_with_option) + def my_array_func_with_option(array, new_option='myarray'): + return new_option + + # we don't need to implement every option on __array_function__ + # implementations + assert_equal(func_with_option(1), 'default') + assert_equal(func_with_option(1, option='extra'), 'extra') + assert_equal(func_with_option(MyArray()), 'myarray') + with assert_raises(TypeError): + func_with_option(MyArray(), option='extra') + + # but new options on implementations can't be used + result = my_array_func_with_option(MyArray(), new_option='yes') + assert_equal(result, 'yes') + with assert_raises(TypeError): + func_with_option(MyArray(), new_option='no') + + def test_not_implemented(self): + MyArray, implements = _new_duck_type_and_implements() + + @array_function_dispatch(lambda array: (array,), module='my') + def func(array): + return array + + array = np.array(1) + assert_(func(array) is array) + assert_equal(func.__module__, 'my') + + with assert_raises_regex( + TypeError, "no implementation found for 'my.func'"): + func(MyArray()) + + +class TestNDArrayMethods: + + def test_repr(self): + # gh-12162: should still be defined even if __array_function__ doesn't + # implement np.array_repr() + + class MyArray(np.ndarray): + def __array_function__(*args, **kwargs): + return NotImplemented + + array = np.array(1).view(MyArray) + assert_equal(repr(array), 'MyArray(1)') + assert_equal(str(array), '1') + + +class TestNumPyFunctions: + + def test_set_module(self): + assert_equal(np.sum.__module__, 'numpy') + assert_equal(np.char.equal.__module__, 'numpy.char') + assert_equal(np.fft.fft.__module__, 'numpy.fft') + assert_equal(np.linalg.solve.__module__, 'numpy.linalg') + + def test_inspect_sum(self): + signature = inspect.signature(np.sum) + assert_('axis' in signature.parameters) + + @requires_array_function + def test_override_sum(self): + MyArray, implements = _new_duck_type_and_implements() + + @implements(np.sum) + def _(array): + return 'yes' + + assert_equal(np.sum(MyArray()), 'yes') + + @requires_array_function + def test_sum_on_mock_array(self): + + # We need a proxy for mocks because __array_function__ is only looked + # up in the class dict + class ArrayProxy: + def __init__(self, value): + self.value = value + def __array_function__(self, *args, **kwargs): + return self.value.__array_function__(*args, **kwargs) + def __array__(self, *args, **kwargs): + return self.value.__array__(*args, **kwargs) + + proxy = ArrayProxy(mock.Mock(spec=ArrayProxy)) + proxy.value.__array_function__.return_value = 1 + result = np.sum(proxy) + assert_equal(result, 1) + proxy.value.__array_function__.assert_called_once_with( + np.sum, (ArrayProxy,), (proxy,), {}) + proxy.value.__array__.assert_not_called() + + @requires_array_function + def test_sum_forwarding_implementation(self): + + class MyArray(np.ndarray): + + def sum(self, axis, out): + return 'summed' + + def __array_function__(self, func, types, args, kwargs): + return super().__array_function__(func, types, args, kwargs) + + # note: the internal implementation of np.sum() calls the .sum() method + array = np.array(1).view(MyArray) + assert_equal(np.sum(array), 'summed') + + +class TestArrayLike: + def setup(self): + class MyArray(): + def __init__(self, function=None): + self.function = function + + def __array_function__(self, func, types, args, kwargs): + try: + my_func = getattr(self, func.__name__) + except AttributeError: + return NotImplemented + return my_func(*args, **kwargs) + + self.MyArray = MyArray + + class MyNoArrayFunctionArray(): + def __init__(self, function=None): + self.function = function + + self.MyNoArrayFunctionArray = MyNoArrayFunctionArray + + def add_method(self, name, arr_class, enable_value_error=False): + def _definition(*args, **kwargs): + # Check that `like=` isn't propagated downstream + assert 'like' not in kwargs + + if enable_value_error and 'value_error' in kwargs: + raise ValueError + + return arr_class(getattr(arr_class, name)) + setattr(arr_class, name, _definition) + + def func_args(*args, **kwargs): + return args, kwargs + + @requires_array_function + def test_array_like_not_implemented(self): + self.add_method('array', self.MyArray) + + ref = self.MyArray.array() + + with assert_raises_regex(TypeError, 'no implementation found'): + array_like = np.asarray(1, like=ref) + + _array_tests = [ + ('array', *func_args((1,))), + ('asarray', *func_args((1,))), + ('asanyarray', *func_args((1,))), + ('ascontiguousarray', *func_args((2, 3))), + ('asfortranarray', *func_args((2, 3))), + ('require', *func_args((np.arange(6).reshape(2, 3),), + requirements=['A', 'F'])), + ('empty', *func_args((1,))), + ('full', *func_args((1,), 2)), + ('ones', *func_args((1,))), + ('zeros', *func_args((1,))), + ('arange', *func_args(3)), + ('frombuffer', *func_args(b'\x00' * 8, dtype=int)), + ('fromiter', *func_args(range(3), dtype=int)), + ('fromstring', *func_args('1,2', dtype=int, sep=',')), + ('loadtxt', *func_args(lambda: StringIO('0 1\n2 3'))), + ('genfromtxt', *func_args(lambda: StringIO(u'1,2.1'), + dtype=[('int', 'i8'), ('float', 'f8')], + delimiter=',')), + ] + + @pytest.mark.parametrize('function, args, kwargs', _array_tests) + @pytest.mark.parametrize('numpy_ref', [True, False]) + @requires_array_function + def test_array_like(self, function, args, kwargs, numpy_ref): + self.add_method('array', self.MyArray) + self.add_method(function, self.MyArray) + np_func = getattr(np, function) + my_func = getattr(self.MyArray, function) + + if numpy_ref is True: + ref = np.array(1) + else: + ref = self.MyArray.array() + + like_args = tuple(a() if callable(a) else a for a in args) + array_like = np_func(*like_args, **kwargs, like=ref) + + if numpy_ref is True: + assert type(array_like) is np.ndarray + + np_args = tuple(a() if callable(a) else a for a in args) + np_arr = np_func(*np_args, **kwargs) + + # Special-case np.empty to ensure values match + if function == "empty": + np_arr.fill(1) + array_like.fill(1) + + assert_equal(array_like, np_arr) + else: + assert type(array_like) is self.MyArray + assert array_like.function is my_func + + @pytest.mark.parametrize('function, args, kwargs', _array_tests) + @pytest.mark.parametrize('ref', [1, [1], "MyNoArrayFunctionArray"]) + @requires_array_function + def test_no_array_function_like(self, function, args, kwargs, ref): + self.add_method('array', self.MyNoArrayFunctionArray) + self.add_method(function, self.MyNoArrayFunctionArray) + np_func = getattr(np, function) + + # Instantiate ref if it's the MyNoArrayFunctionArray class + if ref == "MyNoArrayFunctionArray": + ref = self.MyNoArrayFunctionArray.array() + + like_args = tuple(a() if callable(a) else a for a in args) + + with assert_raises_regex(TypeError, + 'The `like` argument must be an array-like that implements'): + np_func(*like_args, **kwargs, like=ref) + + @pytest.mark.parametrize('numpy_ref', [True, False]) + def test_array_like_fromfile(self, numpy_ref): + self.add_method('array', self.MyArray) + self.add_method("fromfile", self.MyArray) + + if numpy_ref is True: + ref = np.array(1) + else: + ref = self.MyArray.array() + + data = np.random.random(5) + + with tempfile.TemporaryDirectory() as tmpdir: + fname = os.path.join(tmpdir, "testfile") + data.tofile(fname) + + array_like = np.fromfile(fname, like=ref) + if numpy_ref is True: + assert type(array_like) is np.ndarray + np_res = np.fromfile(fname, like=ref) + assert_equal(np_res, data) + assert_equal(array_like, np_res) + else: + assert type(array_like) is self.MyArray + assert array_like.function is self.MyArray.fromfile + + @requires_array_function + def test_exception_handling(self): + self.add_method('array', self.MyArray, enable_value_error=True) + + ref = self.MyArray.array() + + with assert_raises(TypeError): + # Raises the error about `value_error` being invalid first + np.array(1, value_error=True, like=ref) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_print.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_print.py new file mode 100644 index 0000000..89a8b48 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_print.py @@ -0,0 +1,200 @@ +import sys + +import pytest + +import numpy as np +from numpy.testing import assert_, assert_equal +from numpy.core.tests._locales import CommaDecimalPointLocale + + +from io import StringIO + +_REF = {np.inf: 'inf', -np.inf: '-inf', np.nan: 'nan'} + + +@pytest.mark.parametrize('tp', [np.float32, np.double, np.longdouble]) +def test_float_types(tp): + """ Check formatting. + + This is only for the str function, and only for simple types. + The precision of np.float32 and np.longdouble aren't the same as the + python float precision. + + """ + for x in [0, 1, -1, 1e20]: + assert_equal(str(tp(x)), str(float(x)), + err_msg='Failed str formatting for type %s' % tp) + + if tp(1e16).itemsize > 4: + assert_equal(str(tp(1e16)), str(float('1e16')), + err_msg='Failed str formatting for type %s' % tp) + else: + ref = '1e+16' + assert_equal(str(tp(1e16)), ref, + err_msg='Failed str formatting for type %s' % tp) + + +@pytest.mark.parametrize('tp', [np.float32, np.double, np.longdouble]) +def test_nan_inf_float(tp): + """ Check formatting of nan & inf. + + This is only for the str function, and only for simple types. + The precision of np.float32 and np.longdouble aren't the same as the + python float precision. + + """ + for x in [np.inf, -np.inf, np.nan]: + assert_equal(str(tp(x)), _REF[x], + err_msg='Failed str formatting for type %s' % tp) + + +@pytest.mark.parametrize('tp', [np.complex64, np.cdouble, np.clongdouble]) +def test_complex_types(tp): + """Check formatting of complex types. + + This is only for the str function, and only for simple types. + The precision of np.float32 and np.longdouble aren't the same as the + python float precision. + + """ + for x in [0, 1, -1, 1e20]: + assert_equal(str(tp(x)), str(complex(x)), + err_msg='Failed str formatting for type %s' % tp) + assert_equal(str(tp(x*1j)), str(complex(x*1j)), + err_msg='Failed str formatting for type %s' % tp) + assert_equal(str(tp(x + x*1j)), str(complex(x + x*1j)), + err_msg='Failed str formatting for type %s' % tp) + + if tp(1e16).itemsize > 8: + assert_equal(str(tp(1e16)), str(complex(1e16)), + err_msg='Failed str formatting for type %s' % tp) + else: + ref = '(1e+16+0j)' + assert_equal(str(tp(1e16)), ref, + err_msg='Failed str formatting for type %s' % tp) + + +@pytest.mark.parametrize('dtype', [np.complex64, np.cdouble, np.clongdouble]) +def test_complex_inf_nan(dtype): + """Check inf/nan formatting of complex types.""" + TESTS = { + complex(np.inf, 0): "(inf+0j)", + complex(0, np.inf): "infj", + complex(-np.inf, 0): "(-inf+0j)", + complex(0, -np.inf): "-infj", + complex(np.inf, 1): "(inf+1j)", + complex(1, np.inf): "(1+infj)", + complex(-np.inf, 1): "(-inf+1j)", + complex(1, -np.inf): "(1-infj)", + complex(np.nan, 0): "(nan+0j)", + complex(0, np.nan): "nanj", + complex(-np.nan, 0): "(nan+0j)", + complex(0, -np.nan): "nanj", + complex(np.nan, 1): "(nan+1j)", + complex(1, np.nan): "(1+nanj)", + complex(-np.nan, 1): "(nan+1j)", + complex(1, -np.nan): "(1+nanj)", + } + for c, s in TESTS.items(): + assert_equal(str(dtype(c)), s) + + +# print tests +def _test_redirected_print(x, tp, ref=None): + file = StringIO() + file_tp = StringIO() + stdout = sys.stdout + try: + sys.stdout = file_tp + print(tp(x)) + sys.stdout = file + if ref: + print(ref) + else: + print(x) + finally: + sys.stdout = stdout + + assert_equal(file.getvalue(), file_tp.getvalue(), + err_msg='print failed for type%s' % tp) + + +@pytest.mark.parametrize('tp', [np.float32, np.double, np.longdouble]) +def test_float_type_print(tp): + """Check formatting when using print """ + for x in [0, 1, -1, 1e20]: + _test_redirected_print(float(x), tp) + + for x in [np.inf, -np.inf, np.nan]: + _test_redirected_print(float(x), tp, _REF[x]) + + if tp(1e16).itemsize > 4: + _test_redirected_print(float(1e16), tp) + else: + ref = '1e+16' + _test_redirected_print(float(1e16), tp, ref) + + +@pytest.mark.parametrize('tp', [np.complex64, np.cdouble, np.clongdouble]) +def test_complex_type_print(tp): + """Check formatting when using print """ + # We do not create complex with inf/nan directly because the feature is + # missing in python < 2.6 + for x in [0, 1, -1, 1e20]: + _test_redirected_print(complex(x), tp) + + if tp(1e16).itemsize > 8: + _test_redirected_print(complex(1e16), tp) + else: + ref = '(1e+16+0j)' + _test_redirected_print(complex(1e16), tp, ref) + + _test_redirected_print(complex(np.inf, 1), tp, '(inf+1j)') + _test_redirected_print(complex(-np.inf, 1), tp, '(-inf+1j)') + _test_redirected_print(complex(-np.nan, 1), tp, '(nan+1j)') + + +def test_scalar_format(): + """Test the str.format method with NumPy scalar types""" + tests = [('{0}', True, np.bool_), + ('{0}', False, np.bool_), + ('{0:d}', 130, np.uint8), + ('{0:d}', 50000, np.uint16), + ('{0:d}', 3000000000, np.uint32), + ('{0:d}', 15000000000000000000, np.uint64), + ('{0:d}', -120, np.int8), + ('{0:d}', -30000, np.int16), + ('{0:d}', -2000000000, np.int32), + ('{0:d}', -7000000000000000000, np.int64), + ('{0:g}', 1.5, np.float16), + ('{0:g}', 1.5, np.float32), + ('{0:g}', 1.5, np.float64), + ('{0:g}', 1.5, np.longdouble), + ('{0:g}', 1.5+0.5j, np.complex64), + ('{0:g}', 1.5+0.5j, np.complex128), + ('{0:g}', 1.5+0.5j, np.clongdouble)] + + for (fmat, val, valtype) in tests: + try: + assert_equal(fmat.format(val), fmat.format(valtype(val)), + "failed with val %s, type %s" % (val, valtype)) + except ValueError as e: + assert_(False, + "format raised exception (fmt='%s', val=%s, type=%s, exc='%s')" % + (fmat, repr(val), repr(valtype), str(e))) + + +# +# Locale tests: scalar types formatting should be independent of the locale +# + +class TestCommaDecimalPointLocale(CommaDecimalPointLocale): + + def test_locale_single(self): + assert_equal(str(np.float32(1.2)), str(float(1.2))) + + def test_locale_double(self): + assert_equal(str(np.double(1.2)), str(float(1.2))) + + def test_locale_longdouble(self): + assert_equal(str(np.longdouble('1.2')), str(float(1.2))) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_protocols.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_protocols.py new file mode 100644 index 0000000..55a2bcf --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_protocols.py @@ -0,0 +1,44 @@ +import pytest +import warnings +import numpy as np + + +@pytest.mark.filterwarnings("error") +def test_getattr_warning(): + # issue gh-14735: make sure we clear only getattr errors, and let warnings + # through + class Wrapper: + def __init__(self, array): + self.array = array + + def __len__(self): + return len(self.array) + + def __getitem__(self, item): + return type(self)(self.array[item]) + + def __getattr__(self, name): + if name.startswith("__array_"): + warnings.warn("object got converted", UserWarning, stacklevel=1) + + return getattr(self.array, name) + + def __repr__(self): + return "".format(self=self) + + array = Wrapper(np.arange(10)) + with pytest.raises(UserWarning, match="object got converted"): + np.asarray(array) + + +def test_array_called(): + class Wrapper: + val = '0' * 100 + def __array__(self, result=None): + return np.array([self.val], dtype=object) + + + wrapped = Wrapper() + arr = np.array(wrapped, dtype=str) + assert arr.dtype == 'U100' + assert arr[0] == Wrapper.val diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_records.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_records.py new file mode 100644 index 0000000..4d4b4b5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_records.py @@ -0,0 +1,520 @@ +import collections.abc +import textwrap +from io import BytesIO +from os import path +from pathlib import Path +import pytest + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_array_almost_equal, + assert_raises, temppath, + ) +from numpy.compat import pickle + + +class TestFromrecords: + def test_fromrecords(self): + r = np.rec.fromrecords([[456, 'dbe', 1.2], [2, 'de', 1.3]], + names='col1,col2,col3') + assert_equal(r[0].item(), (456, 'dbe', 1.2)) + assert_equal(r['col1'].dtype.kind, 'i') + assert_equal(r['col2'].dtype.kind, 'U') + assert_equal(r['col2'].dtype.itemsize, 12) + assert_equal(r['col3'].dtype.kind, 'f') + + def test_fromrecords_0len(self): + """ Verify fromrecords works with a 0-length input """ + dtype = [('a', float), ('b', float)] + r = np.rec.fromrecords([], dtype=dtype) + assert_equal(r.shape, (0,)) + + def test_fromrecords_2d(self): + data = [ + [(1, 2), (3, 4), (5, 6)], + [(6, 5), (4, 3), (2, 1)] + ] + expected_a = [[1, 3, 5], [6, 4, 2]] + expected_b = [[2, 4, 6], [5, 3, 1]] + + # try with dtype + r1 = np.rec.fromrecords(data, dtype=[('a', int), ('b', int)]) + assert_equal(r1['a'], expected_a) + assert_equal(r1['b'], expected_b) + + # try with names + r2 = np.rec.fromrecords(data, names=['a', 'b']) + assert_equal(r2['a'], expected_a) + assert_equal(r2['b'], expected_b) + + assert_equal(r1, r2) + + def test_method_array(self): + r = np.rec.array(b'abcdefg' * 100, formats='i2,a3,i4', shape=3, byteorder='big') + assert_equal(r[1].item(), (25444, b'efg', 1633837924)) + + def test_method_array2(self): + r = np.rec.array([(1, 11, 'a'), (2, 22, 'b'), (3, 33, 'c'), (4, 44, 'd'), (5, 55, 'ex'), + (6, 66, 'f'), (7, 77, 'g')], formats='u1,f4,a1') + assert_equal(r[1].item(), (2, 22.0, b'b')) + + def test_recarray_slices(self): + r = np.rec.array([(1, 11, 'a'), (2, 22, 'b'), (3, 33, 'c'), (4, 44, 'd'), (5, 55, 'ex'), + (6, 66, 'f'), (7, 77, 'g')], formats='u1,f4,a1') + assert_equal(r[1::2][1].item(), (4, 44.0, b'd')) + + def test_recarray_fromarrays(self): + x1 = np.array([1, 2, 3, 4]) + x2 = np.array(['a', 'dd', 'xyz', '12']) + x3 = np.array([1.1, 2, 3, 4]) + r = np.rec.fromarrays([x1, x2, x3], names='a,b,c') + assert_equal(r[1].item(), (2, 'dd', 2.0)) + x1[1] = 34 + assert_equal(r.a, np.array([1, 2, 3, 4])) + + def test_recarray_fromfile(self): + data_dir = path.join(path.dirname(__file__), 'data') + filename = path.join(data_dir, 'recarray_from_file.fits') + fd = open(filename, 'rb') + fd.seek(2880 * 2) + r1 = np.rec.fromfile(fd, formats='f8,i4,a5', shape=3, byteorder='big') + fd.seek(2880 * 2) + r2 = np.rec.array(fd, formats='f8,i4,a5', shape=3, byteorder='big') + fd.seek(2880 * 2) + bytes_array = BytesIO() + bytes_array.write(fd.read()) + bytes_array.seek(0) + r3 = np.rec.fromfile(bytes_array, formats='f8,i4,a5', shape=3, byteorder='big') + fd.close() + assert_equal(r1, r2) + assert_equal(r2, r3) + + def test_recarray_from_obj(self): + count = 10 + a = np.zeros(count, dtype='O') + b = np.zeros(count, dtype='f8') + c = np.zeros(count, dtype='f8') + for i in range(len(a)): + a[i] = list(range(1, 10)) + + mine = np.rec.fromarrays([a, b, c], names='date,data1,data2') + for i in range(len(a)): + assert_((mine.date[i] == list(range(1, 10)))) + assert_((mine.data1[i] == 0.0)) + assert_((mine.data2[i] == 0.0)) + + def test_recarray_repr(self): + a = np.array([(1, 0.1), (2, 0.2)], + dtype=[('foo', ' 2) & (a < 6)) + xb = np.where((b > 2) & (b < 6)) + ya = ((a > 2) & (a < 6)) + yb = ((b > 2) & (b < 6)) + assert_array_almost_equal(xa, ya.nonzero()) + assert_array_almost_equal(xb, yb.nonzero()) + assert_(np.all(a[ya] > 0.5)) + assert_(np.all(b[yb] > 0.5)) + + def test_endian_where(self): + # GitHub issue #369 + net = np.zeros(3, dtype='>f4') + net[1] = 0.00458849 + net[2] = 0.605202 + max_net = net.max() + test = np.where(net <= 0., max_net, net) + correct = np.array([ 0.60520202, 0.00458849, 0.60520202]) + assert_array_almost_equal(test, correct) + + def test_endian_recarray(self): + # Ticket #2185 + dt = np.dtype([ + ('head', '>u4'), + ('data', '>u4', 2), + ]) + buf = np.recarray(1, dtype=dt) + buf[0]['head'] = 1 + buf[0]['data'][:] = [1, 1] + + h = buf[0]['head'] + d = buf[0]['data'][0] + buf[0]['head'] = h + buf[0]['data'][0] = d + assert_(buf[0]['head'] == 1) + + def test_mem_dot(self): + # Ticket #106 + x = np.random.randn(0, 1) + y = np.random.randn(10, 1) + # Dummy array to detect bad memory access: + _z = np.ones(10) + _dummy = np.empty((0, 10)) + z = np.lib.stride_tricks.as_strided(_z, _dummy.shape, _dummy.strides) + np.dot(x, np.transpose(y), out=z) + assert_equal(_z, np.ones(10)) + # Do the same for the built-in dot: + np.core.multiarray.dot(x, np.transpose(y), out=z) + assert_equal(_z, np.ones(10)) + + def test_arange_endian(self): + # Ticket #111 + ref = np.arange(10) + x = np.arange(10, dtype=' 1 and x['two'] > 2) + + def test_method_args(self): + # Make sure methods and functions have same default axis + # keyword and arguments + funcs1 = ['argmax', 'argmin', 'sum', ('product', 'prod'), + ('sometrue', 'any'), + ('alltrue', 'all'), 'cumsum', ('cumproduct', 'cumprod'), + 'ptp', 'cumprod', 'prod', 'std', 'var', 'mean', + 'round', 'min', 'max', 'argsort', 'sort'] + funcs2 = ['compress', 'take', 'repeat'] + + for func in funcs1: + arr = np.random.rand(8, 7) + arr2 = arr.copy() + if isinstance(func, tuple): + func_meth = func[1] + func = func[0] + else: + func_meth = func + res1 = getattr(arr, func_meth)() + res2 = getattr(np, func)(arr2) + if res1 is None: + res1 = arr + + if res1.dtype.kind in 'uib': + assert_((res1 == res2).all(), func) + else: + assert_(abs(res1-res2).max() < 1e-8, func) + + for func in funcs2: + arr1 = np.random.rand(8, 7) + arr2 = np.random.rand(8, 7) + res1 = None + if func == 'compress': + arr1 = arr1.ravel() + res1 = getattr(arr2, func)(arr1) + else: + arr2 = (15*arr2).astype(int).ravel() + if res1 is None: + res1 = getattr(arr1, func)(arr2) + res2 = getattr(np, func)(arr1, arr2) + assert_(abs(res1-res2).max() < 1e-8, func) + + def test_mem_lexsort_strings(self): + # Ticket #298 + lst = ['abc', 'cde', 'fgh'] + np.lexsort((lst,)) + + def test_fancy_index(self): + # Ticket #302 + x = np.array([1, 2])[np.array([0])] + assert_equal(x.shape, (1,)) + + def test_recarray_copy(self): + # Ticket #312 + dt = [('x', np.int16), ('y', np.float64)] + ra = np.array([(1, 2.3)], dtype=dt) + rb = np.rec.array(ra, dtype=dt) + rb['x'] = 2. + assert_(ra['x'] != rb['x']) + + def test_rec_fromarray(self): + # Ticket #322 + x1 = np.array([[1, 2], [3, 4], [5, 6]]) + x2 = np.array(['a', 'dd', 'xyz']) + x3 = np.array([1.1, 2, 3]) + np.rec.fromarrays([x1, x2, x3], formats="(2,)i4,a3,f8") + + def test_object_array_assign(self): + x = np.empty((2, 2), object) + x.flat[2] = (1, 2, 3) + assert_equal(x.flat[2], (1, 2, 3)) + + def test_ndmin_float64(self): + # Ticket #324 + x = np.array([1, 2, 3], dtype=np.float64) + assert_equal(np.array(x, dtype=np.float32, ndmin=2).ndim, 2) + assert_equal(np.array(x, dtype=np.float64, ndmin=2).ndim, 2) + + def test_ndmin_order(self): + # Issue #465 and related checks + assert_(np.array([1, 2], order='C', ndmin=3).flags.c_contiguous) + assert_(np.array([1, 2], order='F', ndmin=3).flags.f_contiguous) + assert_(np.array(np.ones((2, 2), order='F'), ndmin=3).flags.f_contiguous) + assert_(np.array(np.ones((2, 2), order='C'), ndmin=3).flags.c_contiguous) + + def test_mem_axis_minimization(self): + # Ticket #327 + data = np.arange(5) + data = np.add.outer(data, data) + + def test_mem_float_imag(self): + # Ticket #330 + np.float64(1.0).imag + + def test_dtype_tuple(self): + # Ticket #334 + assert_(np.dtype('i4') == np.dtype(('i4', ()))) + + def test_dtype_posttuple(self): + # Ticket #335 + np.dtype([('col1', '()i4')]) + + def test_numeric_carray_compare(self): + # Ticket #341 + assert_equal(np.array(['X'], 'c'), b'X') + + def test_string_array_size(self): + # Ticket #342 + assert_raises(ValueError, + np.array, [['X'], ['X', 'X', 'X']], '|S1') + + def test_dtype_repr(self): + # Ticket #344 + dt1 = np.dtype(('uint32', 2)) + dt2 = np.dtype(('uint32', (2,))) + assert_equal(dt1.__repr__(), dt2.__repr__()) + + def test_reshape_order(self): + # Make sure reshape order works. + a = np.arange(6).reshape(2, 3, order='F') + assert_equal(a, [[0, 2, 4], [1, 3, 5]]) + a = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) + b = a[:, 1] + assert_equal(b.reshape(2, 2, order='F'), [[2, 6], [4, 8]]) + + def test_reshape_zero_strides(self): + # Issue #380, test reshaping of zero strided arrays + a = np.ones(1) + a = np.lib.stride_tricks.as_strided(a, shape=(5,), strides=(0,)) + assert_(a.reshape(5, 1).strides[0] == 0) + + def test_reshape_zero_size(self): + # GitHub Issue #2700, setting shape failed for 0-sized arrays + a = np.ones((0, 2)) + a.shape = (-1, 2) + + # Cannot test if NPY_RELAXED_STRIDES_CHECKING changes the strides. + # With NPY_RELAXED_STRIDES_CHECKING the test becomes superfluous. + @pytest.mark.skipif(np.ones(1).strides[0] == np.iinfo(np.intp).max, + reason="Using relaxed stride checking") + def test_reshape_trailing_ones_strides(self): + # GitHub issue gh-2949, bad strides for trailing ones of new shape + a = np.zeros(12, dtype=np.int32)[::2] # not contiguous + strides_c = (16, 8, 8, 8) + strides_f = (8, 24, 48, 48) + assert_equal(a.reshape(3, 2, 1, 1).strides, strides_c) + assert_equal(a.reshape(3, 2, 1, 1, order='F').strides, strides_f) + assert_equal(np.array(0, dtype=np.int32).reshape(1, 1).strides, (4, 4)) + + def test_repeat_discont(self): + # Ticket #352 + a = np.arange(12).reshape(4, 3)[:, 2] + assert_equal(a.repeat(3), [2, 2, 2, 5, 5, 5, 8, 8, 8, 11, 11, 11]) + + def test_array_index(self): + # Make sure optimization is not called in this case. + a = np.array([1, 2, 3]) + a2 = np.array([[1, 2, 3]]) + assert_equal(a[np.where(a == 3)], a2[np.where(a2 == 3)]) + + def test_object_argmax(self): + a = np.array([1, 2, 3], dtype=object) + assert_(a.argmax() == 2) + + def test_recarray_fields(self): + # Ticket #372 + dt0 = np.dtype([('f0', 'i4'), ('f1', 'i4')]) + dt1 = np.dtype([('f0', 'i8'), ('f1', 'i8')]) + for a in [np.array([(1, 2), (3, 4)], "i4,i4"), + np.rec.array([(1, 2), (3, 4)], "i4,i4"), + np.rec.array([(1, 2), (3, 4)]), + np.rec.fromarrays([(1, 2), (3, 4)], "i4,i4"), + np.rec.fromarrays([(1, 2), (3, 4)])]: + assert_(a.dtype in [dt0, dt1]) + + def test_random_shuffle(self): + # Ticket #374 + a = np.arange(5).reshape((5, 1)) + b = a.copy() + np.random.shuffle(b) + assert_equal(np.sort(b, axis=0), a) + + def test_refcount_vdot(self): + # Changeset #3443 + _assert_valid_refcount(np.vdot) + + def test_startswith(self): + ca = np.char.array(['Hi', 'There']) + assert_equal(ca.startswith('H'), [True, False]) + + def test_noncommutative_reduce_accumulate(self): + # Ticket #413 + tosubtract = np.arange(5) + todivide = np.array([2.0, 0.5, 0.25]) + assert_equal(np.subtract.reduce(tosubtract), -10) + assert_equal(np.divide.reduce(todivide), 16.0) + assert_array_equal(np.subtract.accumulate(tosubtract), + np.array([0, -1, -3, -6, -10])) + assert_array_equal(np.divide.accumulate(todivide), + np.array([2., 4., 16.])) + + def test_convolve_empty(self): + # Convolve should raise an error for empty input array. + assert_raises(ValueError, np.convolve, [], [1]) + assert_raises(ValueError, np.convolve, [1], []) + + def test_multidim_byteswap(self): + # Ticket #449 + r = np.array([(1, (0, 1, 2))], dtype="i2,3i2") + assert_array_equal(r.byteswap(), + np.array([(256, (0, 256, 512))], r.dtype)) + + def test_string_NULL(self): + # Changeset 3557 + assert_equal(np.array("a\x00\x0b\x0c\x00").item(), + 'a\x00\x0b\x0c') + + def test_junk_in_string_fields_of_recarray(self): + # Ticket #483 + r = np.array([[b'abc']], dtype=[('var1', '|S20')]) + assert_(asbytes(r['var1'][0][0]) == b'abc') + + def test_take_output(self): + # Ensure that 'take' honours output parameter. + x = np.arange(12).reshape((3, 4)) + a = np.take(x, [0, 2], axis=1) + b = np.zeros_like(a) + np.take(x, [0, 2], axis=1, out=b) + assert_array_equal(a, b) + + def test_take_object_fail(self): + # Issue gh-3001 + d = 123. + a = np.array([d, 1], dtype=object) + if HAS_REFCOUNT: + ref_d = sys.getrefcount(d) + try: + a.take([0, 100]) + except IndexError: + pass + if HAS_REFCOUNT: + assert_(ref_d == sys.getrefcount(d)) + + def test_array_str_64bit(self): + # Ticket #501 + s = np.array([1, np.nan], dtype=np.float64) + with np.errstate(all='raise'): + np.array_str(s) # Should succeed + + def test_frompyfunc_endian(self): + # Ticket #503 + from math import radians + uradians = np.frompyfunc(radians, 1, 1) + big_endian = np.array([83.4, 83.5], dtype='>f8') + little_endian = np.array([83.4, 83.5], dtype=' object + # casting succeeds + def rs(): + x = np.ones([484, 286]) + y = np.zeros([484, 286]) + x |= y + + assert_raises(TypeError, rs) + + def test_unicode_scalar(self): + # Ticket #600 + x = np.array(["DROND", "DROND1"], dtype="U6") + el = x[1] + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + new = pickle.loads(pickle.dumps(el, protocol=proto)) + assert_equal(new, el) + + def test_arange_non_native_dtype(self): + # Ticket #616 + for T in ('>f4', ' 0)] = v + + assert_raises(IndexError, ia, x, s, np.zeros(9, dtype=float)) + assert_raises(IndexError, ia, x, s, np.zeros(11, dtype=float)) + + # Old special case (different code path): + assert_raises(ValueError, ia, x.flat, s, np.zeros(9, dtype=float)) + assert_raises(ValueError, ia, x.flat, s, np.zeros(11, dtype=float)) + + def test_mem_scalar_indexing(self): + # Ticket #603 + x = np.array([0], dtype=float) + index = np.array(0, dtype=np.int32) + x[index] + + def test_binary_repr_0_width(self): + assert_equal(np.binary_repr(0, width=3), '000') + + def test_fromstring(self): + assert_equal(np.fromstring("12:09:09", dtype=int, sep=":"), + [12, 9, 9]) + + def test_searchsorted_variable_length(self): + x = np.array(['a', 'aa', 'b']) + y = np.array(['d', 'e']) + assert_equal(x.searchsorted(y), [3, 3]) + + def test_string_argsort_with_zeros(self): + # Check argsort for strings containing zeros. + x = np.frombuffer(b"\x00\x02\x00\x01", dtype="|S2") + assert_array_equal(x.argsort(kind='m'), np.array([1, 0])) + assert_array_equal(x.argsort(kind='q'), np.array([1, 0])) + + def test_string_sort_with_zeros(self): + # Check sort for strings containing zeros. + x = np.frombuffer(b"\x00\x02\x00\x01", dtype="|S2") + y = np.frombuffer(b"\x00\x01\x00\x02", dtype="|S2") + assert_array_equal(np.sort(x, kind="q"), y) + + def test_copy_detection_zero_dim(self): + # Ticket #658 + np.indices((0, 3, 4)).T.reshape(-1, 3) + + def test_flat_byteorder(self): + # Ticket #657 + x = np.arange(10) + assert_array_equal(x.astype('>i4'), x.astype('i4').flat[:], x.astype('i4')): + x = np.array([-1, 0, 1], dtype=dt) + assert_equal(x.flat[0].dtype, x[0].dtype) + + def test_copy_detection_corner_case(self): + # Ticket #658 + np.indices((0, 3, 4)).T.reshape(-1, 3) + + # Cannot test if NPY_RELAXED_STRIDES_CHECKING changes the strides. + # With NPY_RELAXED_STRIDES_CHECKING the test becomes superfluous, + # 0-sized reshape itself is tested elsewhere. + @pytest.mark.skipif(np.ones(1).strides[0] == np.iinfo(np.intp).max, + reason="Using relaxed stride checking") + def test_copy_detection_corner_case2(self): + # Ticket #771: strides are not set correctly when reshaping 0-sized + # arrays + b = np.indices((0, 3, 4)).T.reshape(-1, 3) + assert_equal(b.strides, (3 * b.itemsize, b.itemsize)) + + def test_object_array_refcounting(self): + # Ticket #633 + if not hasattr(sys, 'getrefcount'): + return + + # NB. this is probably CPython-specific + + cnt = sys.getrefcount + + a = object() + b = object() + c = object() + + cnt0_a = cnt(a) + cnt0_b = cnt(b) + cnt0_c = cnt(c) + + # -- 0d -> 1-d broadcast slice assignment + + arr = np.zeros(5, dtype=np.object_) + + arr[:] = a + assert_equal(cnt(a), cnt0_a + 5) + + arr[:] = b + assert_equal(cnt(a), cnt0_a) + assert_equal(cnt(b), cnt0_b + 5) + + arr[:2] = c + assert_equal(cnt(b), cnt0_b + 3) + assert_equal(cnt(c), cnt0_c + 2) + + del arr + + # -- 1-d -> 2-d broadcast slice assignment + + arr = np.zeros((5, 2), dtype=np.object_) + arr0 = np.zeros(2, dtype=np.object_) + + arr0[0] = a + assert_(cnt(a) == cnt0_a + 1) + arr0[1] = b + assert_(cnt(b) == cnt0_b + 1) + + arr[:, :] = arr0 + assert_(cnt(a) == cnt0_a + 6) + assert_(cnt(b) == cnt0_b + 6) + + arr[:, 0] = None + assert_(cnt(a) == cnt0_a + 1) + + del arr, arr0 + + # -- 2-d copying + flattening + + arr = np.zeros((5, 2), dtype=np.object_) + + arr[:, 0] = a + arr[:, 1] = b + assert_(cnt(a) == cnt0_a + 5) + assert_(cnt(b) == cnt0_b + 5) + + arr2 = arr.copy() + assert_(cnt(a) == cnt0_a + 10) + assert_(cnt(b) == cnt0_b + 10) + + arr2 = arr[:, 0].copy() + assert_(cnt(a) == cnt0_a + 10) + assert_(cnt(b) == cnt0_b + 5) + + arr2 = arr.flatten() + assert_(cnt(a) == cnt0_a + 10) + assert_(cnt(b) == cnt0_b + 10) + + del arr, arr2 + + # -- concatenate, repeat, take, choose + + arr1 = np.zeros((5, 1), dtype=np.object_) + arr2 = np.zeros((5, 1), dtype=np.object_) + + arr1[...] = a + arr2[...] = b + assert_(cnt(a) == cnt0_a + 5) + assert_(cnt(b) == cnt0_b + 5) + + tmp = np.concatenate((arr1, arr2)) + assert_(cnt(a) == cnt0_a + 5 + 5) + assert_(cnt(b) == cnt0_b + 5 + 5) + + tmp = arr1.repeat(3, axis=0) + assert_(cnt(a) == cnt0_a + 5 + 3*5) + + tmp = arr1.take([1, 2, 3], axis=0) + assert_(cnt(a) == cnt0_a + 5 + 3) + + x = np.array([[0], [1], [0], [1], [1]], int) + tmp = x.choose(arr1, arr2) + assert_(cnt(a) == cnt0_a + 5 + 2) + assert_(cnt(b) == cnt0_b + 5 + 3) + + del tmp # Avoid pyflakes unused variable warning + + def test_mem_custom_float_to_array(self): + # Ticket 702 + class MyFloat: + def __float__(self): + return 1.0 + + tmp = np.atleast_1d([MyFloat()]) + tmp.astype(float) # Should succeed + + def test_object_array_refcount_self_assign(self): + # Ticket #711 + class VictimObject: + deleted = False + + def __del__(self): + self.deleted = True + + d = VictimObject() + arr = np.zeros(5, dtype=np.object_) + arr[:] = d + del d + arr[:] = arr # refcount of 'd' might hit zero here + assert_(not arr[0].deleted) + arr[:] = arr # trying to induce a segfault by doing it again... + assert_(not arr[0].deleted) + + def test_mem_fromiter_invalid_dtype_string(self): + x = [1, 2, 3] + assert_raises(ValueError, + np.fromiter, [xi for xi in x], dtype='S') + + def test_reduce_big_object_array(self): + # Ticket #713 + oldsize = np.setbufsize(10*16) + a = np.array([None]*161, object) + assert_(not np.any(a)) + np.setbufsize(oldsize) + + def test_mem_0d_array_index(self): + # Ticket #714 + np.zeros(10)[np.array(0)] + + def test_nonnative_endian_fill(self): + # Non-native endian arrays were incorrectly filled with scalars + # before r5034. + if sys.byteorder == 'little': + dtype = np.dtype('>i4') + else: + dtype = np.dtype('data contains non-zero floats + x = np.array([123456789e199], dtype=np.float64) + if IS_PYPY: + x.resize((m, 0), refcheck=False) + else: + x.resize((m, 0)) + y = np.array([123456789e199], dtype=np.float64) + if IS_PYPY: + y.resize((0, n), refcheck=False) + else: + y.resize((0, n)) + + # `dot` should just return zero (m, n) matrix + z = np.dot(x, y) + assert_(np.all(z == 0)) + assert_(z.shape == (m, n)) + + def test_zeros(self): + # Regression test for #1061. + # Set a size which cannot fit into a 64 bits signed integer + sz = 2 ** 64 + with assert_raises_regex(ValueError, + 'Maximum allowed dimension exceeded'): + np.empty(sz) + + def test_huge_arange(self): + # Regression test for #1062. + # Set a size which cannot fit into a 64 bits signed integer + sz = 2 ** 64 + with assert_raises_regex(ValueError, + 'Maximum allowed size exceeded'): + np.arange(sz) + assert_(np.size == sz) + + def test_fromiter_bytes(self): + # Ticket #1058 + a = np.fromiter(list(range(10)), dtype='b') + b = np.fromiter(list(range(10)), dtype='B') + assert_(np.alltrue(a == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))) + assert_(np.alltrue(b == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))) + + def test_array_from_sequence_scalar_array(self): + # Ticket #1078: segfaults when creating an array with a sequence of + # 0d arrays. + a = np.array((np.ones(2), np.array(2)), dtype=object) + assert_equal(a.shape, (2,)) + assert_equal(a.dtype, np.dtype(object)) + assert_equal(a[0], np.ones(2)) + assert_equal(a[1], np.array(2)) + + a = np.array(((1,), np.array(1)), dtype=object) + assert_equal(a.shape, (2,)) + assert_equal(a.dtype, np.dtype(object)) + assert_equal(a[0], (1,)) + assert_equal(a[1], np.array(1)) + + def test_array_from_sequence_scalar_array2(self): + # Ticket #1081: weird array with strange input... + t = np.array([np.array([]), np.array(0, object)], dtype=object) + assert_equal(t.shape, (2,)) + assert_equal(t.dtype, np.dtype(object)) + + def test_array_too_big(self): + # Ticket #1080. + assert_raises(ValueError, np.zeros, [975]*7, np.int8) + assert_raises(ValueError, np.zeros, [26244]*5, np.int8) + + def test_dtype_keyerrors_(self): + # Ticket #1106. + dt = np.dtype([('f1', np.uint)]) + assert_raises(KeyError, dt.__getitem__, "f2") + assert_raises(IndexError, dt.__getitem__, 1) + assert_raises(TypeError, dt.__getitem__, 0.0) + + def test_lexsort_buffer_length(self): + # Ticket #1217, don't segfault. + a = np.ones(100, dtype=np.int8) + b = np.ones(100, dtype=np.int32) + i = np.lexsort((a[::-1], b)) + assert_equal(i, np.arange(100, dtype=int)) + + def test_object_array_to_fixed_string(self): + # Ticket #1235. + a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_) + b = np.array(a, dtype=(np.str_, 8)) + assert_equal(a, b) + c = np.array(a, dtype=(np.str_, 5)) + assert_equal(c, np.array(['abcde', 'ijklm'])) + d = np.array(a, dtype=(np.str_, 12)) + assert_equal(a, d) + e = np.empty((2, ), dtype=(np.str_, 8)) + e[:] = a[:] + assert_equal(a, e) + + def test_unicode_to_string_cast(self): + # Ticket #1240. + a = np.array([[u'abc', u'\u03a3'], + [u'asdf', u'erw']], + dtype='U') + assert_raises(UnicodeEncodeError, np.array, a, 'S4') + + def test_unicode_to_string_cast_error(self): + # gh-15790 + a = np.array([u'\x80'] * 129, dtype='U3') + assert_raises(UnicodeEncodeError, np.array, a, 'S') + b = a.reshape(3, 43)[:-1, :-1] + assert_raises(UnicodeEncodeError, np.array, b, 'S') + + def test_mixed_string_unicode_array_creation(self): + a = np.array(['1234', u'123']) + assert_(a.itemsize == 16) + a = np.array([u'123', '1234']) + assert_(a.itemsize == 16) + a = np.array(['1234', u'123', '12345']) + assert_(a.itemsize == 20) + a = np.array([u'123', '1234', u'12345']) + assert_(a.itemsize == 20) + a = np.array([u'123', '1234', u'1234']) + assert_(a.itemsize == 16) + + def test_misaligned_objects_segfault(self): + # Ticket #1198 and #1267 + a1 = np.zeros((10,), dtype='O,c') + a2 = np.array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'], 'S10') + a1['f0'] = a2 + repr(a1) + np.argmax(a1['f0']) + a1['f0'][1] = "FOO" + a1['f0'] = "FOO" + np.array(a1['f0'], dtype='S') + np.nonzero(a1['f0']) + a1.sort() + copy.deepcopy(a1) + + def test_misaligned_scalars_segfault(self): + # Ticket #1267 + s1 = np.array(('a', 'Foo'), dtype='c,O') + s2 = np.array(('b', 'Bar'), dtype='c,O') + s1['f1'] = s2['f1'] + s1['f1'] = 'Baz' + + def test_misaligned_dot_product_objects(self): + # Ticket #1267 + # This didn't require a fix, but it's worth testing anyway, because + # it may fail if .dot stops enforcing the arrays to be BEHAVED + a = np.array([[(1, 'a'), (0, 'a')], [(0, 'a'), (1, 'a')]], dtype='O,c') + b = np.array([[(4, 'a'), (1, 'a')], [(2, 'a'), (2, 'a')]], dtype='O,c') + np.dot(a['f0'], b['f0']) + + def test_byteswap_complex_scalar(self): + # Ticket #1259 and gh-441 + for dtype in [np.dtype('<'+t) for t in np.typecodes['Complex']]: + z = np.array([2.2-1.1j], dtype) + x = z[0] # always native-endian + y = x.byteswap() + if x.dtype.byteorder == z.dtype.byteorder: + # little-endian machine + assert_equal(x, np.frombuffer(y.tobytes(), dtype=dtype.newbyteorder())) + else: + # big-endian machine + assert_equal(x, np.frombuffer(y.tobytes(), dtype=dtype)) + # double check real and imaginary parts: + assert_equal(x.real, y.real.byteswap()) + assert_equal(x.imag, y.imag.byteswap()) + + def test_structured_arrays_with_objects1(self): + # Ticket #1299 + stra = 'aaaa' + strb = 'bbbb' + x = np.array([[(0, stra), (1, strb)]], 'i8,O') + x[x.nonzero()] = x.ravel()[:1] + assert_(x[0, 1] == x[0, 0]) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_structured_arrays_with_objects2(self): + # Ticket #1299 second test + stra = 'aaaa' + strb = 'bbbb' + numb = sys.getrefcount(strb) + numa = sys.getrefcount(stra) + x = np.array([[(0, stra), (1, strb)]], 'i8,O') + x[x.nonzero()] = x.ravel()[:1] + assert_(sys.getrefcount(strb) == numb) + assert_(sys.getrefcount(stra) == numa + 2) + + def test_duplicate_title_and_name(self): + # Ticket #1254 + dtspec = [(('a', 'a'), 'i'), ('b', 'i')] + assert_raises(ValueError, np.dtype, dtspec) + + def test_signed_integer_division_overflow(self): + # Ticket #1317. + def test_type(t): + min = np.array([np.iinfo(t).min]) + min //= -1 + + with np.errstate(divide="ignore"): + for t in (np.int8, np.int16, np.int32, np.int64, int): + test_type(t) + + def test_buffer_hashlib(self): + from hashlib import sha256 + + x = np.array([1, 2, 3], dtype=np.dtype('c') + + def test_log1p_compiler_shenanigans(self): + # Check if log1p is behaving on 32 bit intel systems. + assert_(np.isfinite(np.log1p(np.exp2(-53)))) + + def test_fromiter_comparison(self): + a = np.fromiter(list(range(10)), dtype='b') + b = np.fromiter(list(range(10)), dtype='B') + assert_(np.alltrue(a == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))) + assert_(np.alltrue(b == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))) + + def test_fromstring_crash(self): + # Ticket #1345: the following should not cause a crash + with assert_warns(DeprecationWarning): + np.fromstring(b'aa, aa, 1.0', sep=',') + + def test_ticket_1539(self): + dtypes = [x for x in np.sctypeDict.values() + if (issubclass(x, np.number) + and not issubclass(x, np.timedelta64))] + a = np.array([], np.bool_) # not x[0] because it is unordered + failures = [] + + for x in dtypes: + b = a.astype(x) + for y in dtypes: + c = a.astype(y) + try: + np.dot(b, c) + except TypeError: + failures.append((x, y)) + if failures: + raise AssertionError("Failures: %r" % failures) + + def test_ticket_1538(self): + x = np.finfo(np.float32) + for name in 'eps epsneg max min resolution tiny'.split(): + assert_equal(type(getattr(x, name)), np.float32, + err_msg=name) + + def test_ticket_1434(self): + # Check that the out= argument in var and std has an effect + data = np.array(((1, 2, 3), (4, 5, 6), (7, 8, 9))) + out = np.zeros((3,)) + + ret = data.var(axis=1, out=out) + assert_(ret is out) + assert_array_equal(ret, data.var(axis=1)) + + ret = data.std(axis=1, out=out) + assert_(ret is out) + assert_array_equal(ret, data.std(axis=1)) + + def test_complex_nan_maximum(self): + cnan = complex(0, np.nan) + assert_equal(np.maximum(1, cnan), cnan) + + def test_subclass_int_tuple_assignment(self): + # ticket #1563 + class Subclass(np.ndarray): + def __new__(cls, i): + return np.ones((i,)).view(cls) + + x = Subclass(5) + x[(0,)] = 2 # shouldn't raise an exception + assert_equal(x[0], 2) + + def test_ufunc_no_unnecessary_views(self): + # ticket #1548 + class Subclass(np.ndarray): + pass + x = np.array([1, 2, 3]).view(Subclass) + y = np.add(x, x, x) + assert_equal(id(x), id(y)) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_take_refcount(self): + # ticket #939 + a = np.arange(16, dtype=float) + a.shape = (4, 4) + lut = np.ones((5 + 3, 4), float) + rgba = np.empty(shape=a.shape + (4,), dtype=lut.dtype) + c1 = sys.getrefcount(rgba) + try: + lut.take(a, axis=0, mode='clip', out=rgba) + except TypeError: + pass + c2 = sys.getrefcount(rgba) + assert_equal(c1, c2) + + def test_fromfile_tofile_seeks(self): + # On Python 3, tofile/fromfile used to get (#1610) the Python + # file handle out of sync + f0 = tempfile.NamedTemporaryFile() + f = f0.file + f.write(np.arange(255, dtype='u1').tobytes()) + + f.seek(20) + ret = np.fromfile(f, count=4, dtype='u1') + assert_equal(ret, np.array([20, 21, 22, 23], dtype='u1')) + assert_equal(f.tell(), 24) + + f.seek(40) + np.array([1, 2, 3], dtype='u1').tofile(f) + assert_equal(f.tell(), 43) + + f.seek(40) + data = f.read(3) + assert_equal(data, b"\x01\x02\x03") + + f.seek(80) + f.read(4) + data = np.fromfile(f, dtype='u1', count=4) + assert_equal(data, np.array([84, 85, 86, 87], dtype='u1')) + + f.close() + + def test_complex_scalar_warning(self): + for tp in [np.csingle, np.cdouble, np.clongdouble]: + x = tp(1+2j) + assert_warns(np.ComplexWarning, float, x) + with suppress_warnings() as sup: + sup.filter(np.ComplexWarning) + assert_equal(float(x), float(x.real)) + + def test_complex_scalar_complex_cast(self): + for tp in [np.csingle, np.cdouble, np.clongdouble]: + x = tp(1+2j) + assert_equal(complex(x), 1+2j) + + def test_complex_boolean_cast(self): + # Ticket #2218 + for tp in [np.csingle, np.cdouble, np.clongdouble]: + x = np.array([0, 0+0.5j, 0.5+0j], dtype=tp) + assert_equal(x.astype(bool), np.array([0, 1, 1], dtype=bool)) + assert_(np.any(x)) + assert_(np.all(x[1:])) + + def test_uint_int_conversion(self): + x = 2**64 - 1 + assert_equal(int(np.uint64(x)), x) + + def test_duplicate_field_names_assign(self): + ra = np.fromiter(((i*3, i*2) for i in range(10)), dtype='i8,f8') + ra.dtype.names = ('f1', 'f2') + repr(ra) # should not cause a segmentation fault + assert_raises(ValueError, setattr, ra.dtype, 'names', ('f1', 'f1')) + + def test_eq_string_and_object_array(self): + # From e-mail thread "__eq__ with str and object" (Keith Goodman) + a1 = np.array(['a', 'b'], dtype=object) + a2 = np.array(['a', 'c']) + assert_array_equal(a1 == a2, [True, False]) + assert_array_equal(a2 == a1, [True, False]) + + def test_nonzero_byteswap(self): + a = np.array([0x80000000, 0x00000080, 0], dtype=np.uint32) + a.dtype = np.float32 + assert_equal(a.nonzero()[0], [1]) + a = a.byteswap().newbyteorder() + assert_equal(a.nonzero()[0], [1]) # [0] if nonzero() ignores swap + + def test_find_common_type_boolean(self): + # Ticket #1695 + assert_(np.find_common_type([], ['?', '?']) == '?') + + def test_empty_mul(self): + a = np.array([1.]) + a[1:1] *= 2 + assert_equal(a, [1.]) + + def test_array_side_effect(self): + # The second use of itemsize was throwing an exception because in + # ctors.c, discover_itemsize was calling PyObject_Length without + # checking the return code. This failed to get the length of the + # number 2, and the exception hung around until something checked + # PyErr_Occurred() and returned an error. + assert_equal(np.dtype('S10').itemsize, 10) + np.array([['abc', 2], ['long ', '0123456789']], dtype=np.string_) + assert_equal(np.dtype('S10').itemsize, 10) + + def test_any_float(self): + # all and any for floats + a = np.array([0.1, 0.9]) + assert_(np.any(a)) + assert_(np.all(a)) + + def test_large_float_sum(self): + a = np.arange(10000, dtype='f') + assert_equal(a.sum(dtype='d'), a.astype('d').sum()) + + def test_ufunc_casting_out(self): + a = np.array(1.0, dtype=np.float32) + b = np.array(1.0, dtype=np.float64) + c = np.array(1.0, dtype=np.float32) + np.add(a, b, out=c) + assert_equal(c, 2.0) + + def test_array_scalar_contiguous(self): + # Array scalars are both C and Fortran contiguous + assert_(np.array(1.0).flags.c_contiguous) + assert_(np.array(1.0).flags.f_contiguous) + assert_(np.array(np.float32(1.0)).flags.c_contiguous) + assert_(np.array(np.float32(1.0)).flags.f_contiguous) + + def test_squeeze_contiguous(self): + # Similar to GitHub issue #387 + a = np.zeros((1, 2)).squeeze() + b = np.zeros((2, 2, 2), order='F')[:, :, ::2].squeeze() + assert_(a.flags.c_contiguous) + assert_(a.flags.f_contiguous) + assert_(b.flags.f_contiguous) + + def test_squeeze_axis_handling(self): + # Issue #10779 + # Ensure proper handling of objects + # that don't support axis specification + # when squeezing + + class OldSqueeze(np.ndarray): + + def __new__(cls, + input_array): + obj = np.asarray(input_array).view(cls) + return obj + + # it is perfectly reasonable that prior + # to numpy version 1.7.0 a subclass of ndarray + # might have been created that did not expect + # squeeze to have an axis argument + # NOTE: this example is somewhat artificial; + # it is designed to simulate an old API + # expectation to guard against regression + def squeeze(self): + return super().squeeze() + + oldsqueeze = OldSqueeze(np.array([[1],[2],[3]])) + + # if no axis argument is specified the old API + # expectation should give the correct result + assert_equal(np.squeeze(oldsqueeze), + np.array([1,2,3])) + + # likewise, axis=None should work perfectly well + # with the old API expectation + assert_equal(np.squeeze(oldsqueeze, axis=None), + np.array([1,2,3])) + + # however, specification of any particular axis + # should raise a TypeError in the context of the + # old API specification, even when using a valid + # axis specification like 1 for this array + with assert_raises(TypeError): + # this would silently succeed for array + # subclasses / objects that did not support + # squeeze axis argument handling before fixing + # Issue #10779 + np.squeeze(oldsqueeze, axis=1) + + # check for the same behavior when using an invalid + # axis specification -- in this case axis=0 does not + # have size 1, but the priority should be to raise + # a TypeError for the axis argument and NOT a + # ValueError for squeezing a non-empty dimension + with assert_raises(TypeError): + np.squeeze(oldsqueeze, axis=0) + + # the new API knows how to handle the axis + # argument and will return a ValueError if + # attempting to squeeze an axis that is not + # of length 1 + with assert_raises(ValueError): + np.squeeze(np.array([[1],[2],[3]]), axis=0) + + def test_reduce_contiguous(self): + # GitHub issue #387 + a = np.add.reduce(np.zeros((2, 1, 2)), (0, 1)) + b = np.add.reduce(np.zeros((2, 1, 2)), 1) + assert_(a.flags.c_contiguous) + assert_(a.flags.f_contiguous) + assert_(b.flags.c_contiguous) + + @pytest.mark.skipif(IS_PYSTON, reason="Pyston disables recursion checking") + def test_object_array_self_reference(self): + # Object arrays with references to themselves can cause problems + a = np.array(0, dtype=object) + a[()] = a + assert_raises(RecursionError, int, a) + assert_raises(RecursionError, float, a) + a[()] = None + + @pytest.mark.skipif(IS_PYSTON, reason="Pyston disables recursion checking") + def test_object_array_circular_reference(self): + # Test the same for a circular reference. + a = np.array(0, dtype=object) + b = np.array(0, dtype=object) + a[()] = b + b[()] = a + assert_raises(RecursionError, int, a) + # NumPy has no tp_traverse currently, so circular references + # cannot be detected. So resolve it: + a[()] = None + + # This was causing a to become like the above + a = np.array(0, dtype=object) + a[...] += 1 + assert_equal(a, 1) + + def test_object_array_nested(self): + # but is fine with a reference to a different array + a = np.array(0, dtype=object) + b = np.array(0, dtype=object) + a[()] = b + assert_equal(int(a), int(0)) + assert_equal(float(a), float(0)) + + def test_object_array_self_copy(self): + # An object array being copied into itself DECREF'ed before INCREF'ing + # causing segmentation faults (gh-3787) + a = np.array(object(), dtype=object) + np.copyto(a, a) + if HAS_REFCOUNT: + assert_(sys.getrefcount(a[()]) == 2) + a[()].__class__ # will segfault if object was deleted + + def test_zerosize_accumulate(self): + "Ticket #1733" + x = np.array([[42, 0]], dtype=np.uint32) + assert_equal(np.add.accumulate(x[:-1, 0]), []) + + def test_objectarray_setfield(self): + # Setfield should not overwrite Object fields with non-Object data + x = np.array([1, 2, 3], dtype=object) + assert_raises(TypeError, x.setfield, 4, np.int32, 0) + + def test_setting_rank0_string(self): + "Ticket #1736" + s1 = b"hello1" + s2 = b"hello2" + a = np.zeros((), dtype="S10") + a[()] = s1 + assert_equal(a, np.array(s1)) + a[()] = np.array(s2) + assert_equal(a, np.array(s2)) + + a = np.zeros((), dtype='f4') + a[()] = 3 + assert_equal(a, np.array(3)) + a[()] = np.array(4) + assert_equal(a, np.array(4)) + + def test_string_astype(self): + "Ticket #1748" + s1 = b'black' + s2 = b'white' + s3 = b'other' + a = np.array([[s1], [s2], [s3]]) + assert_equal(a.dtype, np.dtype('S5')) + b = a.astype(np.dtype('S0')) + assert_equal(b.dtype, np.dtype('S5')) + + def test_ticket_1756(self): + # Ticket #1756 + s = b'0123456789abcdef' + a = np.array([s]*5) + for i in range(1, 17): + a1 = np.array(a, "|S%d" % i) + a2 = np.array([s[:i]]*5) + assert_equal(a1, a2) + + def test_fields_strides(self): + "gh-2355" + r = np.frombuffer(b'abcdefghijklmnop'*4*3, dtype='i4,(2,3)u2') + assert_equal(r[0:3:2]['f1'], r['f1'][0:3:2]) + assert_equal(r[0:3:2]['f1'][0], r[0:3:2][0]['f1']) + assert_equal(r[0:3:2]['f1'][0][()], r[0:3:2][0]['f1'][()]) + assert_equal(r[0:3:2]['f1'][0].strides, r[0:3:2][0]['f1'].strides) + + def test_alignment_update(self): + # Check that alignment flag is updated on stride setting + a = np.arange(10) + assert_(a.flags.aligned) + a.strides = 3 + assert_(not a.flags.aligned) + + def test_ticket_1770(self): + "Should not segfault on python 3k" + import numpy as np + try: + a = np.zeros((1,), dtype=[('f1', 'f')]) + a['f1'] = 1 + a['f2'] = 1 + except ValueError: + pass + except Exception: + raise AssertionError + + def test_ticket_1608(self): + "x.flat shouldn't modify data" + x = np.array([[1, 2], [3, 4]]).T + np.array(x.flat) + assert_equal(x, [[1, 3], [2, 4]]) + + def test_pickle_string_overwrite(self): + import re + + data = np.array([1], dtype='b') + blob = pickle.dumps(data, protocol=1) + data = pickle.loads(blob) + + # Check that loads does not clobber interned strings + s = re.sub("a(.)", "\x01\\1", "a_") + assert_equal(s[0], "\x01") + data[0] = 0xbb + s = re.sub("a(.)", "\x01\\1", "a_") + assert_equal(s[0], "\x01") + + def test_pickle_bytes_overwrite(self): + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + data = np.array([1], dtype='b') + data = pickle.loads(pickle.dumps(data, protocol=proto)) + data[0] = 0xdd + bytestring = "\x01 ".encode('ascii') + assert_equal(bytestring[0:1], '\x01'.encode('ascii')) + + def test_pickle_py2_array_latin1_hack(self): + # Check that unpickling hacks in Py3 that support + # encoding='latin1' work correctly. + + # Python2 output for pickle.dumps(numpy.array([129], dtype='b')) + data = (b"cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\np1\n(I0\n" + b"tp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\np7\n(S'i1'\np8\n" + b"I0\nI1\ntp9\nRp10\n(I3\nS'|'\np11\nNNNI-1\nI-1\nI0\ntp12\nbI00\nS'\\x81'\n" + b"p13\ntp14\nb.") + # This should work: + result = pickle.loads(data, encoding='latin1') + assert_array_equal(result, np.array([129], dtype='b')) + # Should not segfault: + assert_raises(Exception, pickle.loads, data, encoding='koi8-r') + + def test_pickle_py2_scalar_latin1_hack(self): + # Check that scalar unpickling hack in Py3 that supports + # encoding='latin1' work correctly. + + # Python2 output for pickle.dumps(...) + datas = [ + # (original, python2_pickle, koi8r_validity) + (np.unicode_('\u6bd2'), + (b"cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n" + b"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\nI0\n" + b"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."), + 'invalid'), + + (np.float64(9e123), + (b"cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n" + b"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI-1\nI-1\nI0\ntp6\n" + b"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."), + 'invalid'), + + (np.bytes_(b'\x9c'), # different 8-bit code point in KOI8-R vs latin1 + (b"cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n" + b"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n" + b"tp8\nRp9\n."), + 'different'), + ] + for original, data, koi8r_validity in datas: + result = pickle.loads(data, encoding='latin1') + assert_equal(result, original) + + # Decoding under non-latin1 encoding (e.g.) KOI8-R can + # produce bad results, but should not segfault. + if koi8r_validity == 'different': + # Unicode code points happen to lie within latin1, + # but are different in koi8-r, resulting to silent + # bogus results + result = pickle.loads(data, encoding='koi8-r') + assert_(result != original) + elif koi8r_validity == 'invalid': + # Unicode code points outside latin1, so results + # to an encoding exception + assert_raises(ValueError, pickle.loads, data, encoding='koi8-r') + else: + raise ValueError(koi8r_validity) + + def test_structured_type_to_object(self): + a_rec = np.array([(0, 1), (3, 2)], dtype='i4,i8') + a_obj = np.empty((2,), dtype=object) + a_obj[0] = (0, 1) + a_obj[1] = (3, 2) + # astype records -> object + assert_equal(a_rec.astype(object), a_obj) + # '=' records -> object + b = np.empty_like(a_obj) + b[...] = a_rec + assert_equal(b, a_obj) + # '=' object -> records + b = np.empty_like(a_rec) + b[...] = a_obj + assert_equal(b, a_rec) + + def test_assign_obj_listoflists(self): + # Ticket # 1870 + # The inner list should get assigned to the object elements + a = np.zeros(4, dtype=object) + b = a.copy() + a[0] = [1] + a[1] = [2] + a[2] = [3] + a[3] = [4] + b[...] = [[1], [2], [3], [4]] + assert_equal(a, b) + # The first dimension should get broadcast + a = np.zeros((2, 2), dtype=object) + a[...] = [[1, 2]] + assert_equal(a, [[1, 2], [1, 2]]) + + @pytest.mark.slow_pypy + def test_memoryleak(self): + # Ticket #1917 - ensure that array data doesn't leak + for i in range(1000): + # 100MB times 1000 would give 100GB of memory usage if it leaks + a = np.empty((100000000,), dtype='i1') + del a + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_ufunc_reduce_memoryleak(self): + a = np.arange(6) + acnt = sys.getrefcount(a) + np.add.reduce(a) + assert_equal(sys.getrefcount(a), acnt) + + def test_search_sorted_invalid_arguments(self): + # Ticket #2021, should not segfault. + x = np.arange(0, 4, dtype='datetime64[D]') + assert_raises(TypeError, x.searchsorted, 1) + + def test_string_truncation(self): + # Ticket #1990 - Data can be truncated in creation of an array from a + # mixed sequence of numeric values and strings (gh-2583) + for val in [True, 1234, 123.4, complex(1, 234)]: + for tostr, dtype in [(asunicode, "U"), (asbytes, "S")]: + b = np.array([val, tostr('xx')], dtype=dtype) + assert_equal(tostr(b[0]), tostr(val)) + b = np.array([tostr('xx'), val], dtype=dtype) + assert_equal(tostr(b[1]), tostr(val)) + + # test also with longer strings + b = np.array([val, tostr('xxxxxxxxxx')], dtype=dtype) + assert_equal(tostr(b[0]), tostr(val)) + b = np.array([tostr('xxxxxxxxxx'), val], dtype=dtype) + assert_equal(tostr(b[1]), tostr(val)) + + def test_string_truncation_ucs2(self): + # Ticket #2081. Python compiled with two byte unicode + # can lead to truncation if itemsize is not properly + # adjusted for NumPy's four byte unicode. + a = np.array(['abcd']) + assert_equal(a.dtype.itemsize, 16) + + def test_unique_stable(self): + # Ticket #2063 must always choose stable sort for argsort to + # get consistent results + v = np.array(([0]*5 + [1]*6 + [2]*6)*4) + res = np.unique(v, return_index=True) + tgt = (np.array([0, 1, 2]), np.array([ 0, 5, 11])) + assert_equal(res, tgt) + + def test_unicode_alloc_dealloc_match(self): + # Ticket #1578, the mismatch only showed up when running + # python-debug for python versions >= 2.7, and then as + # a core dump and error message. + a = np.array(['abc'], dtype=np.unicode_)[0] + del a + + def test_refcount_error_in_clip(self): + # Ticket #1588 + a = np.zeros((2,), dtype='>i2').clip(min=0) + x = a + a + # This used to segfault: + y = str(x) + # Check the final string: + assert_(y == "[0 0]") + + def test_searchsorted_wrong_dtype(self): + # Ticket #2189, it used to segfault, so we check that it raises the + # proper exception. + a = np.array([('a', 1)], dtype='S1, int') + assert_raises(TypeError, np.searchsorted, a, 1.2) + # Ticket #2066, similar problem: + dtype = np.format_parser(['i4', 'i4'], [], []) + a = np.recarray((2,), dtype) + a[...] = [(1, 2), (3, 4)] + assert_raises(TypeError, np.searchsorted, a, 1) + + def test_complex64_alignment(self): + # Issue gh-2668 (trac 2076), segfault on sparc due to misalignment + dtt = np.complex64 + arr = np.arange(10, dtype=dtt) + # 2D array + arr2 = np.reshape(arr, (2, 5)) + # Fortran write followed by (C or F) read caused bus error + data_str = arr2.tobytes('F') + data_back = np.ndarray(arr2.shape, + arr2.dtype, + buffer=data_str, + order='F') + assert_array_equal(arr2, data_back) + + def test_structured_count_nonzero(self): + arr = np.array([0, 1]).astype('i4, (2)i4')[:1] + count = np.count_nonzero(arr) + assert_equal(count, 0) + + def test_copymodule_preserves_f_contiguity(self): + a = np.empty((2, 2), order='F') + b = copy.copy(a) + c = copy.deepcopy(a) + assert_(b.flags.fortran) + assert_(b.flags.f_contiguous) + assert_(c.flags.fortran) + assert_(c.flags.f_contiguous) + + def test_fortran_order_buffer(self): + import numpy as np + a = np.array([['Hello', 'Foob']], dtype='U5', order='F') + arr = np.ndarray(shape=[1, 2, 5], dtype='U1', buffer=a) + arr2 = np.array([[[u'H', u'e', u'l', u'l', u'o'], + [u'F', u'o', u'o', u'b', u'']]]) + assert_array_equal(arr, arr2) + + def test_assign_from_sequence_error(self): + # Ticket #4024. + arr = np.array([1, 2, 3]) + assert_raises(ValueError, arr.__setitem__, slice(None), [9, 9]) + arr.__setitem__(slice(None), [9]) + assert_equal(arr, [9, 9, 9]) + + def test_format_on_flex_array_element(self): + # Ticket #4369. + dt = np.dtype([('date', ' 0: + # unpickling ndarray goes through _frombuffer for protocol 5 + assert b'numpy.core.numeric' in s + else: + assert b'numpy.core.multiarray' in s + + def test_object_casting_errors(self): + # gh-11993 update to ValueError (see gh-16909), since strings can in + # principle be converted to complex, but this string cannot. + arr = np.array(['AAAAA', 18465886.0, 18465886.0], dtype=object) + assert_raises(ValueError, arr.astype, 'c8') + + def test_eff1d_casting(self): + # gh-12711 + x = np.array([1, 2, 4, 7, 0], dtype=np.int16) + res = np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99])) + assert_equal(res, [-99, 1, 2, 3, -7, 88, 99]) + + # The use of safe casting means, that 1<<20 is cast unsafely, an + # error may be better, but currently there is no mechanism for it. + res = np.ediff1d(x, to_begin=(1<<20), to_end=(1<<20)) + assert_equal(res, [0, 1, 2, 3, -7, 0]) + + def test_pickle_datetime64_array(self): + # gh-12745 (would fail with pickle5 installed) + d = np.datetime64('2015-07-04 12:59:59.50', 'ns') + arr = np.array([d]) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + dumped = pickle.dumps(arr, protocol=proto) + assert_equal(pickle.loads(dumped), arr) + + def test_bad_array_interface(self): + class T: + __array_interface__ = {} + + with assert_raises(ValueError): + np.array([T()]) + + def test_2d__array__shape(self): + class T: + def __array__(self): + return np.ndarray(shape=(0,0)) + + # Make sure __array__ is used instead of Sequence methods. + def __iter__(self): + return iter([]) + + def __getitem__(self, idx): + raise AssertionError("__getitem__ was called") + + def __len__(self): + return 0 + + + t = T() + # gh-13659, would raise in broadcasting [x=t for x in result] + arr = np.array([t]) + assert arr.shape == (1, 0, 0) + + @pytest.mark.skipif(sys.maxsize < 2 ** 31 + 1, reason='overflows 32-bit python') + @pytest.mark.skipif(sys.platform == 'win32' and sys.version_info[:2] < (3, 8), + reason='overflows on windows, fixed in bpo-16865') + def test_to_ctypes(self): + #gh-14214 + arr = np.zeros((2 ** 31 + 1,), 'b') + assert arr.size * arr.itemsize > 2 ** 31 + c_arr = np.ctypeslib.as_ctypes(arr) + assert_equal(c_arr._length_, arr.size) + + def test_complex_conversion_error(self): + # gh-17068 + with pytest.raises(TypeError, match=r"Unable to convert dtype.*"): + complex(np.array("now", np.datetime64)) + + def test__array_interface__descr(self): + # gh-17068 + dt = np.dtype(dict(names=['a', 'b'], + offsets=[0, 0], + formats=[np.int64, np.int64])) + descr = np.array((1, 1), dtype=dt).__array_interface__['descr'] + assert descr == [('', '|V8')] # instead of [(b'', '|V8')] + + @pytest.mark.skipif(sys.maxsize < 2 ** 31 + 1, reason='overflows 32-bit python') + @requires_memory(free_bytes=9e9) + def test_dot_big_stride(self): + # gh-17111 + # blas stride = stride//itemsize > int32 max + int32_max = np.iinfo(np.int32).max + n = int32_max + 3 + a = np.empty([n], dtype=np.float32) + b = a[::n-1] + b[...] = 1 + assert b.strides[0] > int32_max * b.dtype.itemsize + assert np.dot(b, b) == 2.0 + + def test_frompyfunc_name(self): + # name conversion was failing for python 3 strings + # resulting in the default '?' name. Also test utf-8 + # encoding using non-ascii name. + def cassé(x): + return x + + f = np.frompyfunc(cassé, 1, 1) + assert str(f) == "" + + @pytest.mark.parametrize("operation", [ + 'add', 'subtract', 'multiply', 'floor_divide', + 'conjugate', 'fmod', 'square', 'reciprocal', + 'power', 'absolute', 'negative', 'positive', + 'greater', 'greater_equal', 'less', + 'less_equal', 'equal', 'not_equal', 'logical_and', + 'logical_not', 'logical_or', 'bitwise_and', 'bitwise_or', + 'bitwise_xor', 'invert', 'left_shift', 'right_shift', + 'gcd', 'lcm' + ] + ) + @pytest.mark.parametrize("order", [ + ('b->', 'B->'), + ('h->', 'H->'), + ('i->', 'I->'), + ('l->', 'L->'), + ('q->', 'Q->'), + ] + ) + def test_ufunc_order(self, operation, order): + # gh-18075 + # Ensure signed types before unsigned + def get_idx(string, str_lst): + for i, s in enumerate(str_lst): + if string in s: + return i + raise ValueError(f"{string} not in list") + types = getattr(np, operation).types + assert get_idx(order[0], types) < get_idx(order[1], types), ( + f"Unexpected types order of ufunc in {operation}" + f"for {order}. Possible fix: Use signed before unsigned" + "in generate_umath.py") diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalar_ctors.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalar_ctors.py new file mode 100644 index 0000000..7e93353 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalar_ctors.py @@ -0,0 +1,115 @@ +""" +Test the scalar constructors, which also do type-coercion +""" +import pytest + +import numpy as np +from numpy.testing import ( + assert_equal, assert_almost_equal, assert_warns, + ) + +class TestFromString: + def test_floating(self): + # Ticket #640, floats from string + fsingle = np.single('1.234') + fdouble = np.double('1.234') + flongdouble = np.longdouble('1.234') + assert_almost_equal(fsingle, 1.234) + assert_almost_equal(fdouble, 1.234) + assert_almost_equal(flongdouble, 1.234) + + def test_floating_overflow(self): + """ Strings containing an unrepresentable float overflow """ + fhalf = np.half('1e10000') + assert_equal(fhalf, np.inf) + fsingle = np.single('1e10000') + assert_equal(fsingle, np.inf) + fdouble = np.double('1e10000') + assert_equal(fdouble, np.inf) + flongdouble = assert_warns(RuntimeWarning, np.longdouble, '1e10000') + assert_equal(flongdouble, np.inf) + + fhalf = np.half('-1e10000') + assert_equal(fhalf, -np.inf) + fsingle = np.single('-1e10000') + assert_equal(fsingle, -np.inf) + fdouble = np.double('-1e10000') + assert_equal(fdouble, -np.inf) + flongdouble = assert_warns(RuntimeWarning, np.longdouble, '-1e10000') + assert_equal(flongdouble, -np.inf) + + +class TestExtraArgs: + def test_superclass(self): + # try both positional and keyword arguments + s = np.str_(b'\\x61', encoding='unicode-escape') + assert s == 'a' + s = np.str_(b'\\x61', 'unicode-escape') + assert s == 'a' + + # previously this would return '\\xx' + with pytest.raises(UnicodeDecodeError): + np.str_(b'\\xx', encoding='unicode-escape') + with pytest.raises(UnicodeDecodeError): + np.str_(b'\\xx', 'unicode-escape') + + # superclass fails, but numpy succeeds + assert np.bytes_(-2) == b'-2' + + def test_datetime(self): + dt = np.datetime64('2000-01', ('M', 2)) + assert np.datetime_data(dt) == ('M', 2) + + with pytest.raises(TypeError): + np.datetime64('2000', garbage=True) + + def test_bool(self): + with pytest.raises(TypeError): + np.bool_(False, garbage=True) + + def test_void(self): + with pytest.raises(TypeError): + np.void(b'test', garbage=True) + + +class TestFromInt: + def test_intp(self): + # Ticket #99 + assert_equal(1024, np.intp(1024)) + + def test_uint64_from_negative(self): + assert_equal(np.uint64(-2), np.uint64(18446744073709551614)) + + +int_types = [np.byte, np.short, np.intc, np.int_, np.longlong] +uint_types = [np.ubyte, np.ushort, np.uintc, np.uint, np.ulonglong] +float_types = [np.half, np.single, np.double, np.longdouble] +cfloat_types = [np.csingle, np.cdouble, np.clongdouble] + + +class TestArrayFromScalar: + """ gh-15467 """ + + def _do_test(self, t1, t2): + x = t1(2) + arr = np.array(x, dtype=t2) + # type should be preserved exactly + if t2 is None: + assert arr.dtype.type is t1 + else: + assert arr.dtype.type is t2 + + @pytest.mark.parametrize('t1', int_types + uint_types) + @pytest.mark.parametrize('t2', int_types + uint_types + [None]) + def test_integers(self, t1, t2): + return self._do_test(t1, t2) + + @pytest.mark.parametrize('t1', float_types) + @pytest.mark.parametrize('t2', float_types + [None]) + def test_reals(self, t1, t2): + return self._do_test(t1, t2) + + @pytest.mark.parametrize('t1', cfloat_types) + @pytest.mark.parametrize('t2', cfloat_types + [None]) + def test_complex(self, t1, t2): + return self._do_test(t1, t2) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalar_methods.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalar_methods.py new file mode 100644 index 0000000..eef4c14 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalar_methods.py @@ -0,0 +1,203 @@ +""" +Test the scalar constructors, which also do type-coercion +""" +import sys +import fractions +import platform +import types +from typing import Any, Type + +import pytest +import numpy as np + +from numpy.testing import assert_equal, assert_raises + + +class TestAsIntegerRatio: + # derived in part from the cpython test "test_floatasratio" + + @pytest.mark.parametrize("ftype", [ + np.half, np.single, np.double, np.longdouble]) + @pytest.mark.parametrize("f, ratio", [ + (0.875, (7, 8)), + (-0.875, (-7, 8)), + (0.0, (0, 1)), + (11.5, (23, 2)), + ]) + def test_small(self, ftype, f, ratio): + assert_equal(ftype(f).as_integer_ratio(), ratio) + + @pytest.mark.parametrize("ftype", [ + np.half, np.single, np.double, np.longdouble]) + def test_simple_fractions(self, ftype): + R = fractions.Fraction + assert_equal(R(0, 1), + R(*ftype(0.0).as_integer_ratio())) + assert_equal(R(5, 2), + R(*ftype(2.5).as_integer_ratio())) + assert_equal(R(1, 2), + R(*ftype(0.5).as_integer_ratio())) + assert_equal(R(-2100, 1), + R(*ftype(-2100.0).as_integer_ratio())) + + @pytest.mark.parametrize("ftype", [ + np.half, np.single, np.double, np.longdouble]) + def test_errors(self, ftype): + assert_raises(OverflowError, ftype('inf').as_integer_ratio) + assert_raises(OverflowError, ftype('-inf').as_integer_ratio) + assert_raises(ValueError, ftype('nan').as_integer_ratio) + + def test_against_known_values(self): + R = fractions.Fraction + assert_equal(R(1075, 512), + R(*np.half(2.1).as_integer_ratio())) + assert_equal(R(-1075, 512), + R(*np.half(-2.1).as_integer_ratio())) + assert_equal(R(4404019, 2097152), + R(*np.single(2.1).as_integer_ratio())) + assert_equal(R(-4404019, 2097152), + R(*np.single(-2.1).as_integer_ratio())) + assert_equal(R(4728779608739021, 2251799813685248), + R(*np.double(2.1).as_integer_ratio())) + assert_equal(R(-4728779608739021, 2251799813685248), + R(*np.double(-2.1).as_integer_ratio())) + # longdouble is platform dependent + + @pytest.mark.parametrize("ftype, frac_vals, exp_vals", [ + # dtype test cases generated using hypothesis + # first five generated cases per dtype + (np.half, [0.0, 0.01154830649280303, 0.31082276347447274, + 0.527350517124794, 0.8308562335072596], + [0, 1, 0, -8, 12]), + (np.single, [0.0, 0.09248576989263226, 0.8160498218131407, + 0.17389442853722373, 0.7956044195067877], + [0, 12, 10, 17, -26]), + (np.double, [0.0, 0.031066908499895136, 0.5214135908877832, + 0.45780736035689296, 0.5906586745934036], + [0, -801, 51, 194, -653]), + pytest.param( + np.longdouble, + [0.0, 0.20492557202724854, 0.4277180662199366, 0.9888085019891495, + 0.9620175814461964], + [0, -7400, 14266, -7822, -8721], + marks=[ + pytest.mark.skipif( + np.finfo(np.double) == np.finfo(np.longdouble), + reason="long double is same as double"), + pytest.mark.skipif( + platform.machine().startswith("ppc"), + reason="IBM double double"), + ] + ) + ]) + def test_roundtrip(self, ftype, frac_vals, exp_vals): + for frac, exp in zip(frac_vals, exp_vals): + f = np.ldexp(ftype(frac), exp) + assert f.dtype == ftype + n, d = f.as_integer_ratio() + + try: + # workaround for gh-9968 + nf = np.longdouble(str(n)) + df = np.longdouble(str(d)) + except (OverflowError, RuntimeWarning): + # the values may not fit in any float type + pytest.skip("longdouble too small on this platform") + + assert_equal(nf / df, f, "{}/{}".format(n, d)) + + +class TestIsInteger: + @pytest.mark.parametrize("str_value", ["inf", "nan"]) + @pytest.mark.parametrize("code", np.typecodes["Float"]) + def test_special(self, code: str, str_value: str) -> None: + cls = np.dtype(code).type + value = cls(str_value) + assert not value.is_integer() + + @pytest.mark.parametrize( + "code", np.typecodes["Float"] + np.typecodes["AllInteger"] + ) + def test_true(self, code: str) -> None: + float_array = np.arange(-5, 5).astype(code) + for value in float_array: + assert value.is_integer() + + @pytest.mark.parametrize("code", np.typecodes["Float"]) + def test_false(self, code: str) -> None: + float_array = np.arange(-5, 5).astype(code) + float_array *= 1.1 + for value in float_array: + if value == 0: + continue + assert not value.is_integer() + + +@pytest.mark.skipif(sys.version_info < (3, 9), reason="Requires python 3.9") +class TestClassGetItem: + @pytest.mark.parametrize("cls", [ + np.number, + np.integer, + np.inexact, + np.unsignedinteger, + np.signedinteger, + np.floating, + ]) + def test_abc(self, cls: Type[np.number]) -> None: + alias = cls[Any] + assert isinstance(alias, types.GenericAlias) + assert alias.__origin__ is cls + + def test_abc_complexfloating(self) -> None: + alias = np.complexfloating[Any, Any] + assert isinstance(alias, types.GenericAlias) + assert alias.__origin__ is np.complexfloating + + @pytest.mark.parametrize("cls", [np.generic, np.flexible, np.character]) + def test_abc_non_numeric(self, cls: Type[np.generic]) -> None: + with pytest.raises(TypeError): + cls[Any] + + @pytest.mark.parametrize("code", np.typecodes["All"]) + def test_concrete(self, code: str) -> None: + cls = np.dtype(code).type + with pytest.raises(TypeError): + cls[Any] + + @pytest.mark.parametrize("arg_len", range(4)) + def test_subscript_tuple(self, arg_len: int) -> None: + arg_tup = (Any,) * arg_len + if arg_len == 1: + assert np.number[arg_tup] + else: + with pytest.raises(TypeError): + np.number[arg_tup] + + def test_subscript_scalar(self) -> None: + assert np.number[Any] + + +@pytest.mark.skipif(sys.version_info >= (3, 9), reason="Requires python 3.8") +@pytest.mark.parametrize("cls", [np.number, np.complexfloating, np.int64]) +def test_class_getitem_38(cls: Type[np.number]) -> None: + match = "Type subscription requires python >= 3.9" + with pytest.raises(TypeError, match=match): + cls[Any] + + +class TestBitCount: + # derived in part from the cpython test "test_bit_count" + + @pytest.mark.parametrize("itype", np.sctypes['int']+np.sctypes['uint']) + def test_small(self, itype): + for a in range(max(np.iinfo(itype).min, 0), 128): + msg = f"Smoke test for {itype}({a}).bit_count()" + assert itype(a).bit_count() == bin(a).count("1"), msg + + def test_bit_count(self): + for exp in [10, 17, 63]: + a = 2**exp + assert np.uint64(a).bit_count() == 1 + assert np.uint64(a - 1).bit_count() == exp + assert np.uint64(a ^ 63).bit_count() == 7 + assert np.uint64((a - 1) ^ 510).bit_count() == exp - 8 diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalarbuffer.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalarbuffer.py new file mode 100644 index 0000000..851cd30 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalarbuffer.py @@ -0,0 +1,154 @@ +""" +Test scalar buffer interface adheres to PEP 3118 +""" +import numpy as np +from numpy.core._rational_tests import rational +from numpy.core._multiarray_tests import get_buffer_info +import pytest + +from numpy.testing import assert_, assert_equal, assert_raises + +# PEP3118 format strings for native (standard alignment and byteorder) types +scalars_and_codes = [ + (np.bool_, '?'), + (np.byte, 'b'), + (np.short, 'h'), + (np.intc, 'i'), + (np.int_, 'l'), + (np.longlong, 'q'), + (np.ubyte, 'B'), + (np.ushort, 'H'), + (np.uintc, 'I'), + (np.uint, 'L'), + (np.ulonglong, 'Q'), + (np.half, 'e'), + (np.single, 'f'), + (np.double, 'd'), + (np.longdouble, 'g'), + (np.csingle, 'Zf'), + (np.cdouble, 'Zd'), + (np.clongdouble, 'Zg'), +] +scalars_only, codes_only = zip(*scalars_and_codes) + + +class TestScalarPEP3118: + + @pytest.mark.parametrize('scalar', scalars_only, ids=codes_only) + def test_scalar_match_array(self, scalar): + x = scalar() + a = np.array([], dtype=np.dtype(scalar)) + mv_x = memoryview(x) + mv_a = memoryview(a) + assert_equal(mv_x.format, mv_a.format) + + @pytest.mark.parametrize('scalar', scalars_only, ids=codes_only) + def test_scalar_dim(self, scalar): + x = scalar() + mv_x = memoryview(x) + assert_equal(mv_x.itemsize, np.dtype(scalar).itemsize) + assert_equal(mv_x.ndim, 0) + assert_equal(mv_x.shape, ()) + assert_equal(mv_x.strides, ()) + assert_equal(mv_x.suboffsets, ()) + + @pytest.mark.parametrize('scalar, code', scalars_and_codes, ids=codes_only) + def test_scalar_code_and_properties(self, scalar, code): + x = scalar() + expected = dict(strides=(), itemsize=x.dtype.itemsize, ndim=0, + shape=(), format=code, readonly=True) + + mv_x = memoryview(x) + print(mv_x.readonly, self._as_dict(mv_x)) + assert self._as_dict(mv_x) == expected + + @pytest.mark.parametrize('scalar', scalars_only, ids=codes_only) + def test_scalar_buffers_readonly(self, scalar): + x = scalar() + with pytest.raises(BufferError, match="scalar buffer is readonly"): + get_buffer_info(x, ["WRITABLE"]) + + def test_void_scalar_structured_data(self): + dt = np.dtype([('name', np.unicode_, 16), ('grades', np.float64, (2,))]) + x = np.array(('ndarray_scalar', (1.2, 3.0)), dtype=dt)[()] + assert_(isinstance(x, np.void)) + mv_x = memoryview(x) + expected_size = 16 * np.dtype((np.unicode_, 1)).itemsize + expected_size += 2 * np.dtype(np.float64).itemsize + assert_equal(mv_x.itemsize, expected_size) + assert_equal(mv_x.ndim, 0) + assert_equal(mv_x.shape, ()) + assert_equal(mv_x.strides, ()) + assert_equal(mv_x.suboffsets, ()) + + # check scalar format string against ndarray format string + a = np.array([('Sarah', (8.0, 7.0)), ('John', (6.0, 7.0))], dtype=dt) + assert_(isinstance(a, np.ndarray)) + mv_a = memoryview(a) + assert_equal(mv_x.itemsize, mv_a.itemsize) + assert_equal(mv_x.format, mv_a.format) + + # Check that we do not allow writeable buffer export (technically + # we could allow it sometimes here...) + with pytest.raises(BufferError, match="scalar buffer is readonly"): + get_buffer_info(x, ["WRITABLE"]) + + def _as_dict(self, m): + return dict(strides=m.strides, shape=m.shape, itemsize=m.itemsize, + ndim=m.ndim, format=m.format, readonly=m.readonly) + + def test_datetime_memoryview(self): + # gh-11656 + # Values verified with v1.13.3, shape is not () as in test_scalar_dim + + dt1 = np.datetime64('2016-01-01') + dt2 = np.datetime64('2017-01-01') + expected = dict(strides=(1,), itemsize=1, ndim=1, shape=(8,), + format='B', readonly=True) + v = memoryview(dt1) + assert self._as_dict(v) == expected + + v = memoryview(dt2 - dt1) + assert self._as_dict(v) == expected + + dt = np.dtype([('a', 'uint16'), ('b', 'M8[s]')]) + a = np.empty(1, dt) + # Fails to create a PEP 3118 valid buffer + assert_raises((ValueError, BufferError), memoryview, a[0]) + + # Check that we do not allow writeable buffer export + with pytest.raises(BufferError, match="scalar buffer is readonly"): + get_buffer_info(dt1, ["WRITABLE"]) + + @pytest.mark.parametrize('s', [ + pytest.param("\x32\x32", id="ascii"), + pytest.param("\uFE0F\uFE0F", id="basic multilingual"), + pytest.param("\U0001f4bb\U0001f4bb", id="non-BMP"), + ]) + def test_str_ucs4(self, s): + s = np.str_(s) # only our subclass implements the buffer protocol + + # all the same, characters always encode as ucs4 + expected = dict(strides=(), itemsize=8, ndim=0, shape=(), format='2w', + readonly=True) + + v = memoryview(s) + assert self._as_dict(v) == expected + + # integers of the paltform-appropriate endianness + code_points = np.frombuffer(v, dtype='i4') + + assert_equal(code_points, [ord(c) for c in s]) + + # Check that we do not allow writeable buffer export + with pytest.raises(BufferError, match="scalar buffer is readonly"): + get_buffer_info(s, ["WRITABLE"]) + + def test_user_scalar_fails_buffer(self): + r = rational(1) + with assert_raises(TypeError): + memoryview(r) + + # Check that we do not allow writeable buffer export + with pytest.raises(BufferError, match="scalar buffer is readonly"): + get_buffer_info(r, ["WRITABLE"]) \ No newline at end of file diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalarinherit.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalarinherit.py new file mode 100644 index 0000000..98d7f7c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalarinherit.py @@ -0,0 +1,98 @@ +""" Test printing of scalar types. + +""" +import pytest + +import numpy as np +from numpy.testing import assert_, assert_raises + + +class A: + pass +class B(A, np.float64): + pass + +class C(B): + pass +class D(C, B): + pass + +class B0(np.float64, A): + pass +class C0(B0): + pass + +class HasNew: + def __new__(cls, *args, **kwargs): + return cls, args, kwargs + +class B1(np.float64, HasNew): + pass + + +class TestInherit: + def test_init(self): + x = B(1.0) + assert_(str(x) == '1.0') + y = C(2.0) + assert_(str(y) == '2.0') + z = D(3.0) + assert_(str(z) == '3.0') + + def test_init2(self): + x = B0(1.0) + assert_(str(x) == '1.0') + y = C0(2.0) + assert_(str(y) == '2.0') + + def test_gh_15395(self): + # HasNew is the second base, so `np.float64` should have priority + x = B1(1.0) + assert_(str(x) == '1.0') + + # previously caused RecursionError!? + with pytest.raises(TypeError): + B1(1.0, 2.0) + + +class TestCharacter: + def test_char_radd(self): + # GH issue 9620, reached gentype_add and raise TypeError + np_s = np.string_('abc') + np_u = np.unicode_('abc') + s = b'def' + u = u'def' + assert_(np_s.__radd__(np_s) is NotImplemented) + assert_(np_s.__radd__(np_u) is NotImplemented) + assert_(np_s.__radd__(s) is NotImplemented) + assert_(np_s.__radd__(u) is NotImplemented) + assert_(np_u.__radd__(np_s) is NotImplemented) + assert_(np_u.__radd__(np_u) is NotImplemented) + assert_(np_u.__radd__(s) is NotImplemented) + assert_(np_u.__radd__(u) is NotImplemented) + assert_(s + np_s == b'defabc') + assert_(u + np_u == u'defabc') + + class MyStr(str, np.generic): + # would segfault + pass + + with assert_raises(TypeError): + # Previously worked, but gave completely wrong result + ret = s + MyStr('abc') + + class MyBytes(bytes, np.generic): + # would segfault + pass + + ret = s + MyBytes(b'abc') + assert(type(ret) is type(s)) + assert ret == b"defabc" + + def test_char_repeat(self): + np_s = np.string_('abc') + np_u = np.unicode_('abc') + res_s = b'abc' * 5 + res_u = u'abc' * 5 + assert_(np_s * 5 == res_s) + assert_(np_u * 5 == res_u) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalarmath.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalarmath.py new file mode 100644 index 0000000..90078a2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalarmath.py @@ -0,0 +1,834 @@ +import contextlib +import sys +import warnings +import itertools +import operator +import platform +import pytest +from hypothesis import given, settings, Verbosity +from hypothesis.strategies import sampled_from + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_almost_equal, + assert_array_equal, IS_PYPY, suppress_warnings, _gen_alignment_data, + assert_warns, + ) + +types = [np.bool_, np.byte, np.ubyte, np.short, np.ushort, np.intc, np.uintc, + np.int_, np.uint, np.longlong, np.ulonglong, + np.single, np.double, np.longdouble, np.csingle, + np.cdouble, np.clongdouble] + +floating_types = np.floating.__subclasses__() +complex_floating_types = np.complexfloating.__subclasses__() + + +# This compares scalarmath against ufuncs. + +class TestTypes: + def test_types(self): + for atype in types: + a = atype(1) + assert_(a == 1, "error with %r: got %r" % (atype, a)) + + def test_type_add(self): + # list of types + for k, atype in enumerate(types): + a_scalar = atype(3) + a_array = np.array([3], dtype=atype) + for l, btype in enumerate(types): + b_scalar = btype(1) + b_array = np.array([1], dtype=btype) + c_scalar = a_scalar + b_scalar + c_array = a_array + b_array + # It was comparing the type numbers, but the new ufunc + # function-finding mechanism finds the lowest function + # to which both inputs can be cast - which produces 'l' + # when you do 'q' + 'b'. The old function finding mechanism + # skipped ahead based on the first argument, but that + # does not produce properly symmetric results... + assert_equal(c_scalar.dtype, c_array.dtype, + "error with types (%d/'%c' + %d/'%c')" % + (k, np.dtype(atype).char, l, np.dtype(btype).char)) + + def test_type_create(self): + for k, atype in enumerate(types): + a = np.array([1, 2, 3], atype) + b = atype([1, 2, 3]) + assert_equal(a, b) + + def test_leak(self): + # test leak of scalar objects + # a leak would show up in valgrind as still-reachable of ~2.6MB + for i in range(200000): + np.add(1, 1) + + +class TestBaseMath: + def test_blocked(self): + # test alignments offsets for simd instructions + # alignments for vz + 2 * (vs - 1) + 1 + for dt, sz in [(np.float32, 11), (np.float64, 7), (np.int32, 11)]: + for out, inp1, inp2, msg in _gen_alignment_data(dtype=dt, + type='binary', + max_size=sz): + exp1 = np.ones_like(inp1) + inp1[...] = np.ones_like(inp1) + inp2[...] = np.zeros_like(inp2) + assert_almost_equal(np.add(inp1, inp2), exp1, err_msg=msg) + assert_almost_equal(np.add(inp1, 2), exp1 + 2, err_msg=msg) + assert_almost_equal(np.add(1, inp2), exp1, err_msg=msg) + + np.add(inp1, inp2, out=out) + assert_almost_equal(out, exp1, err_msg=msg) + + inp2[...] += np.arange(inp2.size, dtype=dt) + 1 + assert_almost_equal(np.square(inp2), + np.multiply(inp2, inp2), err_msg=msg) + # skip true divide for ints + if dt != np.int32: + assert_almost_equal(np.reciprocal(inp2), + np.divide(1, inp2), err_msg=msg) + + inp1[...] = np.ones_like(inp1) + np.add(inp1, 2, out=out) + assert_almost_equal(out, exp1 + 2, err_msg=msg) + inp2[...] = np.ones_like(inp2) + np.add(2, inp2, out=out) + assert_almost_equal(out, exp1 + 2, err_msg=msg) + + def test_lower_align(self): + # check data that is not aligned to element size + # i.e doubles are aligned to 4 bytes on i386 + d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64) + o = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64) + assert_almost_equal(d + d, d * 2) + np.add(d, d, out=o) + np.add(np.ones_like(d), d, out=o) + np.add(d, np.ones_like(d), out=o) + np.add(np.ones_like(d), d) + np.add(d, np.ones_like(d)) + + +class TestPower: + def test_small_types(self): + for t in [np.int8, np.int16, np.float16]: + a = t(3) + b = a ** 4 + assert_(b == 81, "error with %r: got %r" % (t, b)) + + def test_large_types(self): + for t in [np.int32, np.int64, np.float32, np.float64, np.longdouble]: + a = t(51) + b = a ** 4 + msg = "error with %r: got %r" % (t, b) + if np.issubdtype(t, np.integer): + assert_(b == 6765201, msg) + else: + assert_almost_equal(b, 6765201, err_msg=msg) + + def test_integers_to_negative_integer_power(self): + # Note that the combination of uint64 with a signed integer + # has common type np.float64. The other combinations should all + # raise a ValueError for integer ** negative integer. + exp = [np.array(-1, dt)[()] for dt in 'bhilq'] + + # 1 ** -1 possible special case + base = [np.array(1, dt)[()] for dt in 'bhilqBHILQ'] + for i1, i2 in itertools.product(base, exp): + if i1.dtype != np.uint64: + assert_raises(ValueError, operator.pow, i1, i2) + else: + res = operator.pow(i1, i2) + assert_(res.dtype.type is np.float64) + assert_almost_equal(res, 1.) + + # -1 ** -1 possible special case + base = [np.array(-1, dt)[()] for dt in 'bhilq'] + for i1, i2 in itertools.product(base, exp): + if i1.dtype != np.uint64: + assert_raises(ValueError, operator.pow, i1, i2) + else: + res = operator.pow(i1, i2) + assert_(res.dtype.type is np.float64) + assert_almost_equal(res, -1.) + + # 2 ** -1 perhaps generic + base = [np.array(2, dt)[()] for dt in 'bhilqBHILQ'] + for i1, i2 in itertools.product(base, exp): + if i1.dtype != np.uint64: + assert_raises(ValueError, operator.pow, i1, i2) + else: + res = operator.pow(i1, i2) + assert_(res.dtype.type is np.float64) + assert_almost_equal(res, .5) + + def test_mixed_types(self): + typelist = [np.int8, np.int16, np.float16, + np.float32, np.float64, np.int8, + np.int16, np.int32, np.int64] + for t1 in typelist: + for t2 in typelist: + a = t1(3) + b = t2(2) + result = a**b + msg = ("error with %r and %r:" + "got %r, expected %r") % (t1, t2, result, 9) + if np.issubdtype(np.dtype(result), np.integer): + assert_(result == 9, msg) + else: + assert_almost_equal(result, 9, err_msg=msg) + + def test_modular_power(self): + # modular power is not implemented, so ensure it errors + a = 5 + b = 4 + c = 10 + expected = pow(a, b, c) # noqa: F841 + for t in (np.int32, np.float32, np.complex64): + # note that 3-operand power only dispatches on the first argument + assert_raises(TypeError, operator.pow, t(a), b, c) + assert_raises(TypeError, operator.pow, np.array(t(a)), b, c) + + +def floordiv_and_mod(x, y): + return (x // y, x % y) + + +def _signs(dt): + if dt in np.typecodes['UnsignedInteger']: + return (+1,) + else: + return (+1, -1) + + +class TestModulus: + + def test_modulus_basic(self): + dt = np.typecodes['AllInteger'] + np.typecodes['Float'] + for op in [floordiv_and_mod, divmod]: + for dt1, dt2 in itertools.product(dt, dt): + for sg1, sg2 in itertools.product(_signs(dt1), _signs(dt2)): + fmt = 'op: %s, dt1: %s, dt2: %s, sg1: %s, sg2: %s' + msg = fmt % (op.__name__, dt1, dt2, sg1, sg2) + a = np.array(sg1*71, dtype=dt1)[()] + b = np.array(sg2*19, dtype=dt2)[()] + div, rem = op(a, b) + assert_equal(div*b + rem, a, err_msg=msg) + if sg2 == -1: + assert_(b < rem <= 0, msg) + else: + assert_(b > rem >= 0, msg) + + def test_float_modulus_exact(self): + # test that float results are exact for small integers. This also + # holds for the same integers scaled by powers of two. + nlst = list(range(-127, 0)) + plst = list(range(1, 128)) + dividend = nlst + [0] + plst + divisor = nlst + plst + arg = list(itertools.product(dividend, divisor)) + tgt = list(divmod(*t) for t in arg) + + a, b = np.array(arg, dtype=int).T + # convert exact integer results from Python to float so that + # signed zero can be used, it is checked. + tgtdiv, tgtrem = np.array(tgt, dtype=float).T + tgtdiv = np.where((tgtdiv == 0.0) & ((b < 0) ^ (a < 0)), -0.0, tgtdiv) + tgtrem = np.where((tgtrem == 0.0) & (b < 0), -0.0, tgtrem) + + for op in [floordiv_and_mod, divmod]: + for dt in np.typecodes['Float']: + msg = 'op: %s, dtype: %s' % (op.__name__, dt) + fa = a.astype(dt) + fb = b.astype(dt) + # use list comprehension so a_ and b_ are scalars + div, rem = zip(*[op(a_, b_) for a_, b_ in zip(fa, fb)]) + assert_equal(div, tgtdiv, err_msg=msg) + assert_equal(rem, tgtrem, err_msg=msg) + + def test_float_modulus_roundoff(self): + # gh-6127 + dt = np.typecodes['Float'] + for op in [floordiv_and_mod, divmod]: + for dt1, dt2 in itertools.product(dt, dt): + for sg1, sg2 in itertools.product((+1, -1), (+1, -1)): + fmt = 'op: %s, dt1: %s, dt2: %s, sg1: %s, sg2: %s' + msg = fmt % (op.__name__, dt1, dt2, sg1, sg2) + a = np.array(sg1*78*6e-8, dtype=dt1)[()] + b = np.array(sg2*6e-8, dtype=dt2)[()] + div, rem = op(a, b) + # Equal assertion should hold when fmod is used + assert_equal(div*b + rem, a, err_msg=msg) + if sg2 == -1: + assert_(b < rem <= 0, msg) + else: + assert_(b > rem >= 0, msg) + + def test_float_modulus_corner_cases(self): + # Check remainder magnitude. + for dt in np.typecodes['Float']: + b = np.array(1.0, dtype=dt) + a = np.nextafter(np.array(0.0, dtype=dt), -b) + rem = operator.mod(a, b) + assert_(rem <= b, 'dt: %s' % dt) + rem = operator.mod(-a, -b) + assert_(rem >= -b, 'dt: %s' % dt) + + # Check nans, inf + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in remainder") + sup.filter(RuntimeWarning, "divide by zero encountered in remainder") + sup.filter(RuntimeWarning, "divide by zero encountered in floor_divide") + sup.filter(RuntimeWarning, "divide by zero encountered in divmod") + sup.filter(RuntimeWarning, "invalid value encountered in divmod") + for dt in np.typecodes['Float']: + fone = np.array(1.0, dtype=dt) + fzer = np.array(0.0, dtype=dt) + finf = np.array(np.inf, dtype=dt) + fnan = np.array(np.nan, dtype=dt) + rem = operator.mod(fone, fzer) + assert_(np.isnan(rem), 'dt: %s' % dt) + # MSVC 2008 returns NaN here, so disable the check. + #rem = operator.mod(fone, finf) + #assert_(rem == fone, 'dt: %s' % dt) + rem = operator.mod(fone, fnan) + assert_(np.isnan(rem), 'dt: %s' % dt) + rem = operator.mod(finf, fone) + assert_(np.isnan(rem), 'dt: %s' % dt) + for op in [floordiv_and_mod, divmod]: + div, mod = op(fone, fzer) + assert_(np.isinf(div)) and assert_(np.isnan(mod)) + + def test_inplace_floordiv_handling(self): + # issue gh-12927 + # this only applies to in-place floordiv //=, because the output type + # promotes to float which does not fit + a = np.array([1, 2], np.int64) + b = np.array([1, 2], np.uint64) + with pytest.raises(TypeError, + match=r"Cannot cast ufunc 'floor_divide' output from"): + a //= b + + +class TestComplexDivision: + def test_zero_division(self): + with np.errstate(all="ignore"): + for t in [np.complex64, np.complex128]: + a = t(0.0) + b = t(1.0) + assert_(np.isinf(b/a)) + b = t(complex(np.inf, np.inf)) + assert_(np.isinf(b/a)) + b = t(complex(np.inf, np.nan)) + assert_(np.isinf(b/a)) + b = t(complex(np.nan, np.inf)) + assert_(np.isinf(b/a)) + b = t(complex(np.nan, np.nan)) + assert_(np.isnan(b/a)) + b = t(0.) + assert_(np.isnan(b/a)) + + def test_signed_zeros(self): + with np.errstate(all="ignore"): + for t in [np.complex64, np.complex128]: + # tupled (numerator, denominator, expected) + # for testing as expected == numerator/denominator + data = ( + (( 0.0,-1.0), ( 0.0, 1.0), (-1.0,-0.0)), + (( 0.0,-1.0), ( 0.0,-1.0), ( 1.0,-0.0)), + (( 0.0,-1.0), (-0.0,-1.0), ( 1.0, 0.0)), + (( 0.0,-1.0), (-0.0, 1.0), (-1.0, 0.0)), + (( 0.0, 1.0), ( 0.0,-1.0), (-1.0, 0.0)), + (( 0.0,-1.0), ( 0.0,-1.0), ( 1.0,-0.0)), + ((-0.0,-1.0), ( 0.0,-1.0), ( 1.0,-0.0)), + ((-0.0, 1.0), ( 0.0,-1.0), (-1.0,-0.0)) + ) + for cases in data: + n = cases[0] + d = cases[1] + ex = cases[2] + result = t(complex(n[0], n[1])) / t(complex(d[0], d[1])) + # check real and imag parts separately to avoid comparison + # in array context, which does not account for signed zeros + assert_equal(result.real, ex[0]) + assert_equal(result.imag, ex[1]) + + def test_branches(self): + with np.errstate(all="ignore"): + for t in [np.complex64, np.complex128]: + # tupled (numerator, denominator, expected) + # for testing as expected == numerator/denominator + data = list() + + # trigger branch: real(fabs(denom)) > imag(fabs(denom)) + # followed by else condition as neither are == 0 + data.append((( 2.0, 1.0), ( 2.0, 1.0), (1.0, 0.0))) + + # trigger branch: real(fabs(denom)) > imag(fabs(denom)) + # followed by if condition as both are == 0 + # is performed in test_zero_division(), so this is skipped + + # trigger else if branch: real(fabs(denom)) < imag(fabs(denom)) + data.append((( 1.0, 2.0), ( 1.0, 2.0), (1.0, 0.0))) + + for cases in data: + n = cases[0] + d = cases[1] + ex = cases[2] + result = t(complex(n[0], n[1])) / t(complex(d[0], d[1])) + # check real and imag parts separately to avoid comparison + # in array context, which does not account for signed zeros + assert_equal(result.real, ex[0]) + assert_equal(result.imag, ex[1]) + + +class TestConversion: + def test_int_from_long(self): + l = [1e6, 1e12, 1e18, -1e6, -1e12, -1e18] + li = [10**6, 10**12, 10**18, -10**6, -10**12, -10**18] + for T in [None, np.float64, np.int64]: + a = np.array(l, dtype=T) + assert_equal([int(_m) for _m in a], li) + + a = np.array(l[:3], dtype=np.uint64) + assert_equal([int(_m) for _m in a], li[:3]) + + def test_iinfo_long_values(self): + for code in 'bBhH': + res = np.array(np.iinfo(code).max + 1, dtype=code) + tgt = np.iinfo(code).min + assert_(res == tgt) + + for code in np.typecodes['AllInteger']: + res = np.array(np.iinfo(code).max, dtype=code) + tgt = np.iinfo(code).max + assert_(res == tgt) + + for code in np.typecodes['AllInteger']: + res = np.dtype(code).type(np.iinfo(code).max) + tgt = np.iinfo(code).max + assert_(res == tgt) + + def test_int_raise_behaviour(self): + def overflow_error_func(dtype): + dtype(np.iinfo(dtype).max + 1) + + for code in [np.int_, np.uint, np.longlong, np.ulonglong]: + assert_raises(OverflowError, overflow_error_func, code) + + def test_int_from_infinite_longdouble(self): + # gh-627 + x = np.longdouble(np.inf) + assert_raises(OverflowError, int, x) + with suppress_warnings() as sup: + sup.record(np.ComplexWarning) + x = np.clongdouble(np.inf) + assert_raises(OverflowError, int, x) + assert_equal(len(sup.log), 1) + + @pytest.mark.skipif(not IS_PYPY, reason="Test is PyPy only (gh-9972)") + def test_int_from_infinite_longdouble___int__(self): + x = np.longdouble(np.inf) + assert_raises(OverflowError, x.__int__) + with suppress_warnings() as sup: + sup.record(np.ComplexWarning) + x = np.clongdouble(np.inf) + assert_raises(OverflowError, x.__int__) + assert_equal(len(sup.log), 1) + + @pytest.mark.skipif(np.finfo(np.double) == np.finfo(np.longdouble), + reason="long double is same as double") + @pytest.mark.skipif(platform.machine().startswith("ppc"), + reason="IBM double double") + def test_int_from_huge_longdouble(self): + # Produce a longdouble that would overflow a double, + # use exponent that avoids bug in Darwin pow function. + exp = np.finfo(np.double).maxexp - 1 + huge_ld = 2 * 1234 * np.longdouble(2) ** exp + huge_i = 2 * 1234 * 2 ** exp + assert_(huge_ld != np.inf) + assert_equal(int(huge_ld), huge_i) + + def test_int_from_longdouble(self): + x = np.longdouble(1.5) + assert_equal(int(x), 1) + x = np.longdouble(-10.5) + assert_equal(int(x), -10) + + def test_numpy_scalar_relational_operators(self): + # All integer + for dt1 in np.typecodes['AllInteger']: + assert_(1 > np.array(0, dtype=dt1)[()], "type %s failed" % (dt1,)) + assert_(not 1 < np.array(0, dtype=dt1)[()], "type %s failed" % (dt1,)) + + for dt2 in np.typecodes['AllInteger']: + assert_(np.array(1, dtype=dt1)[()] > np.array(0, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + assert_(not np.array(1, dtype=dt1)[()] < np.array(0, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + + #Unsigned integers + for dt1 in 'BHILQP': + assert_(-1 < np.array(1, dtype=dt1)[()], "type %s failed" % (dt1,)) + assert_(not -1 > np.array(1, dtype=dt1)[()], "type %s failed" % (dt1,)) + assert_(-1 != np.array(1, dtype=dt1)[()], "type %s failed" % (dt1,)) + + #unsigned vs signed + for dt2 in 'bhilqp': + assert_(np.array(1, dtype=dt1)[()] > np.array(-1, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + assert_(not np.array(1, dtype=dt1)[()] < np.array(-1, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + assert_(np.array(1, dtype=dt1)[()] != np.array(-1, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + + #Signed integers and floats + for dt1 in 'bhlqp' + np.typecodes['Float']: + assert_(1 > np.array(-1, dtype=dt1)[()], "type %s failed" % (dt1,)) + assert_(not 1 < np.array(-1, dtype=dt1)[()], "type %s failed" % (dt1,)) + assert_(-1 == np.array(-1, dtype=dt1)[()], "type %s failed" % (dt1,)) + + for dt2 in 'bhlqp' + np.typecodes['Float']: + assert_(np.array(1, dtype=dt1)[()] > np.array(-1, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + assert_(not np.array(1, dtype=dt1)[()] < np.array(-1, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + assert_(np.array(-1, dtype=dt1)[()] == np.array(-1, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + + def test_scalar_comparison_to_none(self): + # Scalars should just return False and not give a warnings. + # The comparisons are flagged by pep8, ignore that. + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', FutureWarning) + assert_(not np.float32(1) == None) + assert_(not np.str_('test') == None) + # This is dubious (see below): + assert_(not np.datetime64('NaT') == None) + + assert_(np.float32(1) != None) + assert_(np.str_('test') != None) + # This is dubious (see below): + assert_(np.datetime64('NaT') != None) + assert_(len(w) == 0) + + # For documentation purposes, this is why the datetime is dubious. + # At the time of deprecation this was no behaviour change, but + # it has to be considered when the deprecations are done. + assert_(np.equal(np.datetime64('NaT'), None)) + + +#class TestRepr: +# def test_repr(self): +# for t in types: +# val = t(1197346475.0137341) +# val_repr = repr(val) +# val2 = eval(val_repr) +# assert_equal( val, val2 ) + + +class TestRepr: + def _test_type_repr(self, t): + finfo = np.finfo(t) + last_fraction_bit_idx = finfo.nexp + finfo.nmant + last_exponent_bit_idx = finfo.nexp + storage_bytes = np.dtype(t).itemsize*8 + # could add some more types to the list below + for which in ['small denorm', 'small norm']: + # Values from https://en.wikipedia.org/wiki/IEEE_754 + constr = np.array([0x00]*storage_bytes, dtype=np.uint8) + if which == 'small denorm': + byte = last_fraction_bit_idx // 8 + bytebit = 7-(last_fraction_bit_idx % 8) + constr[byte] = 1 << bytebit + elif which == 'small norm': + byte = last_exponent_bit_idx // 8 + bytebit = 7-(last_exponent_bit_idx % 8) + constr[byte] = 1 << bytebit + else: + raise ValueError('hmm') + val = constr.view(t)[0] + val_repr = repr(val) + val2 = t(eval(val_repr)) + if not (val2 == 0 and val < 1e-100): + assert_equal(val, val2) + + def test_float_repr(self): + # long double test cannot work, because eval goes through a python + # float + for t in [np.float32, np.float64]: + self._test_type_repr(t) + + +if not IS_PYPY: + # sys.getsizeof() is not valid on PyPy + class TestSizeOf: + + def test_equal_nbytes(self): + for type in types: + x = type(0) + assert_(sys.getsizeof(x) > x.nbytes) + + def test_error(self): + d = np.float32() + assert_raises(TypeError, d.__sizeof__, "a") + + +class TestMultiply: + def test_seq_repeat(self): + # Test that basic sequences get repeated when multiplied with + # numpy integers. And errors are raised when multiplied with others. + # Some of this behaviour may be controversial and could be open for + # change. + accepted_types = set(np.typecodes["AllInteger"]) + deprecated_types = {'?'} + forbidden_types = ( + set(np.typecodes["All"]) - accepted_types - deprecated_types) + forbidden_types -= {'V'} # can't default-construct void scalars + + for seq_type in (list, tuple): + seq = seq_type([1, 2, 3]) + for numpy_type in accepted_types: + i = np.dtype(numpy_type).type(2) + assert_equal(seq * i, seq * int(i)) + assert_equal(i * seq, int(i) * seq) + + for numpy_type in deprecated_types: + i = np.dtype(numpy_type).type() + assert_equal( + assert_warns(DeprecationWarning, operator.mul, seq, i), + seq * int(i)) + assert_equal( + assert_warns(DeprecationWarning, operator.mul, i, seq), + int(i) * seq) + + for numpy_type in forbidden_types: + i = np.dtype(numpy_type).type() + assert_raises(TypeError, operator.mul, seq, i) + assert_raises(TypeError, operator.mul, i, seq) + + def test_no_seq_repeat_basic_array_like(self): + # Test that an array-like which does not know how to be multiplied + # does not attempt sequence repeat (raise TypeError). + # See also gh-7428. + class ArrayLike: + def __init__(self, arr): + self.arr = arr + def __array__(self): + return self.arr + + # Test for simple ArrayLike above and memoryviews (original report) + for arr_like in (ArrayLike(np.ones(3)), memoryview(np.ones(3))): + assert_array_equal(arr_like * np.float32(3.), np.full(3, 3.)) + assert_array_equal(np.float32(3.) * arr_like, np.full(3, 3.)) + assert_array_equal(arr_like * np.int_(3), np.full(3, 3)) + assert_array_equal(np.int_(3) * arr_like, np.full(3, 3)) + + +class TestNegative: + def test_exceptions(self): + a = np.ones((), dtype=np.bool_)[()] + assert_raises(TypeError, operator.neg, a) + + def test_result(self): + types = np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + with suppress_warnings() as sup: + sup.filter(RuntimeWarning) + for dt in types: + a = np.ones((), dtype=dt)[()] + assert_equal(operator.neg(a) + a, 0) + + +class TestSubtract: + def test_exceptions(self): + a = np.ones((), dtype=np.bool_)[()] + assert_raises(TypeError, operator.sub, a, a) + + def test_result(self): + types = np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + with suppress_warnings() as sup: + sup.filter(RuntimeWarning) + for dt in types: + a = np.ones((), dtype=dt)[()] + assert_equal(operator.sub(a, a), 0) + + +class TestAbs: + def _test_abs_func(self, absfunc, test_dtype): + x = test_dtype(-1.5) + assert_equal(absfunc(x), 1.5) + x = test_dtype(0.0) + res = absfunc(x) + # assert_equal() checks zero signedness + assert_equal(res, 0.0) + x = test_dtype(-0.0) + res = absfunc(x) + assert_equal(res, 0.0) + + x = test_dtype(np.finfo(test_dtype).max) + assert_equal(absfunc(x), x.real) + + with suppress_warnings() as sup: + sup.filter(UserWarning) + x = test_dtype(np.finfo(test_dtype).tiny) + assert_equal(absfunc(x), x.real) + + x = test_dtype(np.finfo(test_dtype).min) + assert_equal(absfunc(x), -x.real) + + @pytest.mark.parametrize("dtype", floating_types + complex_floating_types) + def test_builtin_abs(self, dtype): + if sys.platform == "cygwin" and dtype == np.clongdouble: + pytest.xfail( + reason="absl is computed in double precision on cygwin" + ) + self._test_abs_func(abs, dtype) + + @pytest.mark.parametrize("dtype", floating_types + complex_floating_types) + def test_numpy_abs(self, dtype): + if sys.platform == "cygwin" and dtype == np.clongdouble: + pytest.xfail( + reason="absl is computed in double precision on cygwin" + ) + self._test_abs_func(np.abs, dtype) + +class TestBitShifts: + + @pytest.mark.parametrize('type_code', np.typecodes['AllInteger']) + @pytest.mark.parametrize('op', + [operator.rshift, operator.lshift], ids=['>>', '<<']) + def test_shift_all_bits(self, type_code, op): + """ Shifts where the shift amount is the width of the type or wider """ + # gh-2449 + dt = np.dtype(type_code) + nbits = dt.itemsize * 8 + for val in [5, -5]: + for shift in [nbits, nbits + 4]: + val_scl = dt.type(val) + shift_scl = dt.type(shift) + res_scl = op(val_scl, shift_scl) + if val_scl < 0 and op is operator.rshift: + # sign bit is preserved + assert_equal(res_scl, -1) + else: + assert_equal(res_scl, 0) + + # Result on scalars should be the same as on arrays + val_arr = np.array([val]*32, dtype=dt) + shift_arr = np.array([shift]*32, dtype=dt) + res_arr = op(val_arr, shift_arr) + assert_equal(res_arr, res_scl) + + +class TestHash: + @pytest.mark.parametrize("type_code", np.typecodes['AllInteger']) + def test_integer_hashes(self, type_code): + scalar = np.dtype(type_code).type + for i in range(128): + assert hash(i) == hash(scalar(i)) + + @pytest.mark.parametrize("type_code", np.typecodes['AllFloat']) + def test_float_and_complex_hashes(self, type_code): + scalar = np.dtype(type_code).type + for val in [np.pi, np.inf, 3, 6.]: + numpy_val = scalar(val) + # Cast back to Python, in case the NumPy scalar has less precision + if numpy_val.dtype.kind == 'c': + val = complex(numpy_val) + else: + val = float(numpy_val) + assert val == numpy_val + print(repr(numpy_val), repr(val)) + assert hash(val) == hash(numpy_val) + + if hash(float(np.nan)) != hash(float(np.nan)): + # If Python distinguises different NaNs we do so too (gh-18833) + assert hash(scalar(np.nan)) != hash(scalar(np.nan)) + + @pytest.mark.parametrize("type_code", np.typecodes['Complex']) + def test_complex_hashes(self, type_code): + # Test some complex valued hashes specifically: + scalar = np.dtype(type_code).type + for val in [np.pi+1j, np.inf-3j, 3j, 6.+1j]: + numpy_val = scalar(val) + assert hash(complex(numpy_val)) == hash(numpy_val) + + +@contextlib.contextmanager +def recursionlimit(n): + o = sys.getrecursionlimit() + try: + sys.setrecursionlimit(n) + yield + finally: + sys.setrecursionlimit(o) + + +objecty_things = [object(), None] +reasonable_operators_for_scalars = [ + operator.lt, operator.le, operator.eq, operator.ne, operator.ge, + operator.gt, operator.add, operator.floordiv, operator.mod, + operator.mul, operator.matmul, operator.pow, operator.sub, + operator.truediv, +] + + +@given(sampled_from(objecty_things), + sampled_from(reasonable_operators_for_scalars), + sampled_from(types)) +@settings(verbosity=Verbosity.verbose) +def test_operator_object_left(o, op, type_): + try: + with recursionlimit(200): + op(o, type_(1)) + except TypeError: + pass + + +@given(sampled_from(objecty_things), + sampled_from(reasonable_operators_for_scalars), + sampled_from(types)) +def test_operator_object_right(o, op, type_): + try: + with recursionlimit(200): + op(type_(1), o) + except TypeError: + pass + + +@given(sampled_from(reasonable_operators_for_scalars), + sampled_from(types), + sampled_from(types)) +def test_operator_scalars(op, type1, type2): + try: + op(type1(1), type2(1)) + except TypeError: + pass + + +@pytest.mark.parametrize("op", reasonable_operators_for_scalars) +def test_longdouble_inf_loop(op): + try: + op(np.longdouble(3), None) + except TypeError: + pass + try: + op(None, np.longdouble(3)) + except TypeError: + pass + + +@pytest.mark.parametrize("op", reasonable_operators_for_scalars) +def test_clongdouble_inf_loop(op): + if op in {operator.mod} and False: + pytest.xfail("The modulo operator is known to be broken") + try: + op(np.clongdouble(3), None) + except TypeError: + pass + try: + op(None, np.longdouble(3)) + except TypeError: + pass diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalarprint.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalarprint.py new file mode 100644 index 0000000..ee21d4a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_scalarprint.py @@ -0,0 +1,381 @@ +""" Test printing of scalar types. + +""" +import code +import platform +import pytest +import sys + +from tempfile import TemporaryFile +import numpy as np +from numpy.testing import assert_, assert_equal, assert_raises + +class TestRealScalars: + def test_str(self): + svals = [0.0, -0.0, 1, -1, np.inf, -np.inf, np.nan] + styps = [np.float16, np.float32, np.float64, np.longdouble] + wanted = [ + ['0.0', '0.0', '0.0', '0.0' ], + ['-0.0', '-0.0', '-0.0', '-0.0'], + ['1.0', '1.0', '1.0', '1.0' ], + ['-1.0', '-1.0', '-1.0', '-1.0'], + ['inf', 'inf', 'inf', 'inf' ], + ['-inf', '-inf', '-inf', '-inf'], + ['nan', 'nan', 'nan', 'nan']] + + for wants, val in zip(wanted, svals): + for want, styp in zip(wants, styps): + msg = 'for str({}({}))'.format(np.dtype(styp).name, repr(val)) + assert_equal(str(styp(val)), want, err_msg=msg) + + def test_scalar_cutoffs(self): + # test that both the str and repr of np.float64 behaves + # like python floats in python3. + def check(v): + assert_equal(str(np.float64(v)), str(v)) + assert_equal(str(np.float64(v)), repr(v)) + assert_equal(repr(np.float64(v)), repr(v)) + assert_equal(repr(np.float64(v)), str(v)) + + # check we use the same number of significant digits + check(1.12345678901234567890) + check(0.0112345678901234567890) + + # check switch from scientific output to positional and back + check(1e-5) + check(1e-4) + check(1e15) + check(1e16) + + def test_py2_float_print(self): + # gh-10753 + # In python2, the python float type implements an obsolete method + # tp_print, which overrides tp_repr and tp_str when using "print" to + # output to a "real file" (ie, not a StringIO). Make sure we don't + # inherit it. + x = np.double(0.1999999999999) + with TemporaryFile('r+t') as f: + print(x, file=f) + f.seek(0) + output = f.read() + assert_equal(output, str(x) + '\n') + # In python2 the value float('0.1999999999999') prints with reduced + # precision as '0.2', but we want numpy's np.double('0.1999999999999') + # to print the unique value, '0.1999999999999'. + + # gh-11031 + # Only in the python2 interactive shell and when stdout is a "real" + # file, the output of the last command is printed to stdout without + # Py_PRINT_RAW (unlike the print statement) so `>>> x` and `>>> print + # x` are potentially different. Make sure they are the same. The only + # way I found to get prompt-like output is using an actual prompt from + # the 'code' module. Again, must use tempfile to get a "real" file. + + # dummy user-input which enters one line and then ctrl-Ds. + def userinput(): + yield 'np.sqrt(2)' + raise EOFError + gen = userinput() + input_func = lambda prompt="": next(gen) + + with TemporaryFile('r+t') as fo, TemporaryFile('r+t') as fe: + orig_stdout, orig_stderr = sys.stdout, sys.stderr + sys.stdout, sys.stderr = fo, fe + + code.interact(local={'np': np}, readfunc=input_func, banner='') + + sys.stdout, sys.stderr = orig_stdout, orig_stderr + + fo.seek(0) + capture = fo.read().strip() + + assert_equal(capture, repr(np.sqrt(2))) + + def test_dragon4(self): + # these tests are adapted from Ryan Juckett's dragon4 implementation, + # see dragon4.c for details. + + fpos32 = lambda x, **k: np.format_float_positional(np.float32(x), **k) + fsci32 = lambda x, **k: np.format_float_scientific(np.float32(x), **k) + fpos64 = lambda x, **k: np.format_float_positional(np.float64(x), **k) + fsci64 = lambda x, **k: np.format_float_scientific(np.float64(x), **k) + + preckwd = lambda prec: {'unique': False, 'precision': prec} + + assert_equal(fpos32('1.0'), "1.") + assert_equal(fsci32('1.0'), "1.e+00") + assert_equal(fpos32('10.234'), "10.234") + assert_equal(fpos32('-10.234'), "-10.234") + assert_equal(fsci32('10.234'), "1.0234e+01") + assert_equal(fsci32('-10.234'), "-1.0234e+01") + assert_equal(fpos32('1000.0'), "1000.") + assert_equal(fpos32('1.0', precision=0), "1.") + assert_equal(fsci32('1.0', precision=0), "1.e+00") + assert_equal(fpos32('10.234', precision=0), "10.") + assert_equal(fpos32('-10.234', precision=0), "-10.") + assert_equal(fsci32('10.234', precision=0), "1.e+01") + assert_equal(fsci32('-10.234', precision=0), "-1.e+01") + assert_equal(fpos32('10.234', precision=2), "10.23") + assert_equal(fsci32('-10.234', precision=2), "-1.02e+01") + assert_equal(fsci64('9.9999999999999995e-08', **preckwd(16)), + '9.9999999999999995e-08') + assert_equal(fsci64('9.8813129168249309e-324', **preckwd(16)), + '9.8813129168249309e-324') + assert_equal(fsci64('9.9999999999999694e-311', **preckwd(16)), + '9.9999999999999694e-311') + + + # test rounding + # 3.1415927410 is closest float32 to np.pi + assert_equal(fpos32('3.14159265358979323846', **preckwd(10)), + "3.1415927410") + assert_equal(fsci32('3.14159265358979323846', **preckwd(10)), + "3.1415927410e+00") + assert_equal(fpos64('3.14159265358979323846', **preckwd(10)), + "3.1415926536") + assert_equal(fsci64('3.14159265358979323846', **preckwd(10)), + "3.1415926536e+00") + # 299792448 is closest float32 to 299792458 + assert_equal(fpos32('299792458.0', **preckwd(5)), "299792448.00000") + assert_equal(fsci32('299792458.0', **preckwd(5)), "2.99792e+08") + assert_equal(fpos64('299792458.0', **preckwd(5)), "299792458.00000") + assert_equal(fsci64('299792458.0', **preckwd(5)), "2.99792e+08") + + assert_equal(fpos32('3.14159265358979323846', **preckwd(25)), + "3.1415927410125732421875000") + assert_equal(fpos64('3.14159265358979323846', **preckwd(50)), + "3.14159265358979311599796346854418516159057617187500") + assert_equal(fpos64('3.14159265358979323846'), "3.141592653589793") + + + # smallest numbers + assert_equal(fpos32(0.5**(126 + 23), unique=False, precision=149), + "0.00000000000000000000000000000000000000000000140129846432" + "4817070923729583289916131280261941876515771757068283889791" + "08268586060148663818836212158203125") + + assert_equal(fpos64(5e-324, unique=False, precision=1074), + "0.00000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000049406564584124654417656" + "8792868221372365059802614324764425585682500675507270208751" + "8652998363616359923797965646954457177309266567103559397963" + "9877479601078187812630071319031140452784581716784898210368" + "8718636056998730723050006387409153564984387312473397273169" + "6151400317153853980741262385655911710266585566867681870395" + "6031062493194527159149245532930545654440112748012970999954" + "1931989409080416563324524757147869014726780159355238611550" + "1348035264934720193790268107107491703332226844753335720832" + "4319360923828934583680601060115061698097530783422773183292" + "4790498252473077637592724787465608477820373446969953364701" + "7972677717585125660551199131504891101451037862738167250955" + "8373897335989936648099411642057026370902792427675445652290" + "87538682506419718265533447265625") + + # largest numbers + f32x = np.finfo(np.float32).max + assert_equal(fpos32(f32x, **preckwd(0)), + "340282346638528859811704183484516925440.") + assert_equal(fpos64(np.finfo(np.float64).max, **preckwd(0)), + "1797693134862315708145274237317043567980705675258449965989" + "1747680315726078002853876058955863276687817154045895351438" + "2464234321326889464182768467546703537516986049910576551282" + "0762454900903893289440758685084551339423045832369032229481" + "6580855933212334827479782620414472316873817718091929988125" + "0404026184124858368.") + # Warning: In unique mode only the integer digits necessary for + # uniqueness are computed, the rest are 0. + assert_equal(fpos32(f32x), + "340282350000000000000000000000000000000.") + + # Further tests of zero-padding vs rounding in different combinations + # of unique, fractional, precision, min_digits + # precision can only reduce digits, not add them. + # min_digits can only extend digits, not reduce them. + assert_equal(fpos32(f32x, unique=True, fractional=True, precision=0), + "340282350000000000000000000000000000000.") + assert_equal(fpos32(f32x, unique=True, fractional=True, precision=4), + "340282350000000000000000000000000000000.") + assert_equal(fpos32(f32x, unique=True, fractional=True, min_digits=0), + "340282346638528859811704183484516925440.") + assert_equal(fpos32(f32x, unique=True, fractional=True, min_digits=4), + "340282346638528859811704183484516925440.0000") + assert_equal(fpos32(f32x, unique=True, fractional=True, + min_digits=4, precision=4), + "340282346638528859811704183484516925440.0000") + assert_raises(ValueError, fpos32, f32x, unique=True, fractional=False, + precision=0) + assert_equal(fpos32(f32x, unique=True, fractional=False, precision=4), + "340300000000000000000000000000000000000.") + assert_equal(fpos32(f32x, unique=True, fractional=False, precision=20), + "340282350000000000000000000000000000000.") + assert_equal(fpos32(f32x, unique=True, fractional=False, min_digits=4), + "340282350000000000000000000000000000000.") + assert_equal(fpos32(f32x, unique=True, fractional=False, + min_digits=20), + "340282346638528859810000000000000000000.") + assert_equal(fpos32(f32x, unique=True, fractional=False, + min_digits=15), + "340282346638529000000000000000000000000.") + assert_equal(fpos32(f32x, unique=False, fractional=False, precision=4), + "340300000000000000000000000000000000000.") + # test that unique rounding is preserved when precision is supplied + # but no extra digits need to be printed (gh-18609) + a = np.float64.fromhex('-1p-97') + assert_equal(fsci64(a, unique=True), '-6.310887241768095e-30') + assert_equal(fsci64(a, unique=False, precision=15), + '-6.310887241768094e-30') + assert_equal(fsci64(a, unique=True, precision=15), + '-6.310887241768095e-30') + assert_equal(fsci64(a, unique=True, min_digits=15), + '-6.310887241768095e-30') + assert_equal(fsci64(a, unique=True, precision=15, min_digits=15), + '-6.310887241768095e-30') + # adds/remove digits in unique mode with unbiased rnding + assert_equal(fsci64(a, unique=True, precision=14), + '-6.31088724176809e-30') + assert_equal(fsci64(a, unique=True, min_digits=16), + '-6.3108872417680944e-30') + assert_equal(fsci64(a, unique=True, precision=16), + '-6.310887241768095e-30') + assert_equal(fsci64(a, unique=True, min_digits=14), + '-6.310887241768095e-30') + # test min_digits in unique mode with different rounding cases + assert_equal(fsci64('1e120', min_digits=3), '1.000e+120') + assert_equal(fsci64('1e100', min_digits=3), '1.000e+100') + + # test trailing zeros + assert_equal(fpos32('1.0', unique=False, precision=3), "1.000") + assert_equal(fpos64('1.0', unique=False, precision=3), "1.000") + assert_equal(fsci32('1.0', unique=False, precision=3), "1.000e+00") + assert_equal(fsci64('1.0', unique=False, precision=3), "1.000e+00") + assert_equal(fpos32('1.5', unique=False, precision=3), "1.500") + assert_equal(fpos64('1.5', unique=False, precision=3), "1.500") + assert_equal(fsci32('1.5', unique=False, precision=3), "1.500e+00") + assert_equal(fsci64('1.5', unique=False, precision=3), "1.500e+00") + # gh-10713 + assert_equal(fpos64('324', unique=False, precision=5, + fractional=False), "324.00") + + + def test_dragon4_interface(self): + tps = [np.float16, np.float32, np.float64] + if hasattr(np, 'float128'): + tps.append(np.float128) + + fpos = np.format_float_positional + fsci = np.format_float_scientific + + for tp in tps: + # test padding + assert_equal(fpos(tp('1.0'), pad_left=4, pad_right=4), " 1. ") + assert_equal(fpos(tp('-1.0'), pad_left=4, pad_right=4), " -1. ") + assert_equal(fpos(tp('-10.2'), + pad_left=4, pad_right=4), " -10.2 ") + + # test exp_digits + assert_equal(fsci(tp('1.23e1'), exp_digits=5), "1.23e+00001") + + # test fixed (non-unique) mode + assert_equal(fpos(tp('1.0'), unique=False, precision=4), "1.0000") + assert_equal(fsci(tp('1.0'), unique=False, precision=4), + "1.0000e+00") + + # test trimming + # trim of 'k' or '.' only affects non-unique mode, since unique + # mode will not output trailing 0s. + assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='k'), + "1.0000") + + assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='.'), + "1.") + assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='.'), + "1.2" if tp != np.float16 else "1.2002") + + assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='0'), + "1.0") + assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='0'), + "1.2" if tp != np.float16 else "1.2002") + assert_equal(fpos(tp('1.'), trim='0'), "1.0") + + assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='-'), + "1") + assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='-'), + "1.2" if tp != np.float16 else "1.2002") + assert_equal(fpos(tp('1.'), trim='-'), "1") + + @pytest.mark.skipif(not platform.machine().startswith("ppc64"), + reason="only applies to ppc float128 values") + def test_ppc64_ibm_double_double128(self): + # check that the precision decreases once we get into the subnormal + # range. Unlike float64, this starts around 1e-292 instead of 1e-308, + # which happens when the first double is normal and the second is + # subnormal. + x = np.float128('2.123123123123123123123123123123123e-286') + got = [str(x/np.float128('2e' + str(i))) for i in range(0,40)] + expected = [ + "1.06156156156156156156156156156157e-286", + "1.06156156156156156156156156156158e-287", + "1.06156156156156156156156156156159e-288", + "1.0615615615615615615615615615616e-289", + "1.06156156156156156156156156156157e-290", + "1.06156156156156156156156156156156e-291", + "1.0615615615615615615615615615616e-292", + "1.0615615615615615615615615615615e-293", + "1.061561561561561561561561561562e-294", + "1.06156156156156156156156156155e-295", + "1.0615615615615615615615615616e-296", + "1.06156156156156156156156156e-297", + "1.06156156156156156156156157e-298", + "1.0615615615615615615615616e-299", + "1.06156156156156156156156e-300", + "1.06156156156156156156155e-301", + "1.0615615615615615615616e-302", + "1.061561561561561561562e-303", + "1.06156156156156156156e-304", + "1.0615615615615615618e-305", + "1.06156156156156156e-306", + "1.06156156156156157e-307", + "1.0615615615615616e-308", + "1.06156156156156e-309", + "1.06156156156157e-310", + "1.0615615615616e-311", + "1.06156156156e-312", + "1.06156156154e-313", + "1.0615615616e-314", + "1.06156156e-315", + "1.06156155e-316", + "1.061562e-317", + "1.06156e-318", + "1.06155e-319", + "1.0617e-320", + "1.06e-321", + "1.04e-322", + "1e-323", + "0.0", + "0.0"] + assert_equal(got, expected) + + # Note: we follow glibc behavior, but it (or gcc) might not be right. + # In particular we can get two values that print the same but are not + # equal: + a = np.float128('2')/np.float128('3') + b = np.float128(str(a)) + assert_equal(str(a), str(b)) + assert_(a != b) + + def float32_roundtrip(self): + # gh-9360 + x = np.float32(1024 - 2**-14) + y = np.float32(1024 - 2**-13) + assert_(repr(x) != repr(y)) + assert_equal(np.float32(repr(x)), x) + assert_equal(np.float32(repr(y)), y) + + def float64_vs_python(self): + # gh-2643, gh-6136, gh-6908 + assert_equal(repr(np.float64(0.1)), repr(0.1)) + assert_(repr(np.float64(0.20000000000000004)) != repr(0.2)) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_shape_base.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_shape_base.py new file mode 100644 index 0000000..679e3c0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_shape_base.py @@ -0,0 +1,762 @@ +import pytest +import numpy as np +from numpy.core import ( + array, arange, atleast_1d, atleast_2d, atleast_3d, block, vstack, hstack, + newaxis, concatenate, stack + ) +from numpy.core.shape_base import (_block_dispatcher, _block_setup, + _block_concatenate, _block_slicing) +from numpy.testing import ( + assert_, assert_raises, assert_array_equal, assert_equal, + assert_raises_regex, assert_warns, IS_PYPY + ) + + +class TestAtleast1d: + def test_0D_array(self): + a = array(1) + b = array(2) + res = [atleast_1d(a), atleast_1d(b)] + desired = [array([1]), array([2])] + assert_array_equal(res, desired) + + def test_1D_array(self): + a = array([1, 2]) + b = array([2, 3]) + res = [atleast_1d(a), atleast_1d(b)] + desired = [array([1, 2]), array([2, 3])] + assert_array_equal(res, desired) + + def test_2D_array(self): + a = array([[1, 2], [1, 2]]) + b = array([[2, 3], [2, 3]]) + res = [atleast_1d(a), atleast_1d(b)] + desired = [a, b] + assert_array_equal(res, desired) + + def test_3D_array(self): + a = array([[1, 2], [1, 2]]) + b = array([[2, 3], [2, 3]]) + a = array([a, a]) + b = array([b, b]) + res = [atleast_1d(a), atleast_1d(b)] + desired = [a, b] + assert_array_equal(res, desired) + + def test_r1array(self): + """ Test to make sure equivalent Travis O's r1array function + """ + assert_(atleast_1d(3).shape == (1,)) + assert_(atleast_1d(3j).shape == (1,)) + assert_(atleast_1d(3.0).shape == (1,)) + assert_(atleast_1d([[2, 3], [4, 5]]).shape == (2, 2)) + + +class TestAtleast2d: + def test_0D_array(self): + a = array(1) + b = array(2) + res = [atleast_2d(a), atleast_2d(b)] + desired = [array([[1]]), array([[2]])] + assert_array_equal(res, desired) + + def test_1D_array(self): + a = array([1, 2]) + b = array([2, 3]) + res = [atleast_2d(a), atleast_2d(b)] + desired = [array([[1, 2]]), array([[2, 3]])] + assert_array_equal(res, desired) + + def test_2D_array(self): + a = array([[1, 2], [1, 2]]) + b = array([[2, 3], [2, 3]]) + res = [atleast_2d(a), atleast_2d(b)] + desired = [a, b] + assert_array_equal(res, desired) + + def test_3D_array(self): + a = array([[1, 2], [1, 2]]) + b = array([[2, 3], [2, 3]]) + a = array([a, a]) + b = array([b, b]) + res = [atleast_2d(a), atleast_2d(b)] + desired = [a, b] + assert_array_equal(res, desired) + + def test_r2array(self): + """ Test to make sure equivalent Travis O's r2array function + """ + assert_(atleast_2d(3).shape == (1, 1)) + assert_(atleast_2d([3j, 1]).shape == (1, 2)) + assert_(atleast_2d([[[3, 1], [4, 5]], [[3, 5], [1, 2]]]).shape == (2, 2, 2)) + + +class TestAtleast3d: + def test_0D_array(self): + a = array(1) + b = array(2) + res = [atleast_3d(a), atleast_3d(b)] + desired = [array([[[1]]]), array([[[2]]])] + assert_array_equal(res, desired) + + def test_1D_array(self): + a = array([1, 2]) + b = array([2, 3]) + res = [atleast_3d(a), atleast_3d(b)] + desired = [array([[[1], [2]]]), array([[[2], [3]]])] + assert_array_equal(res, desired) + + def test_2D_array(self): + a = array([[1, 2], [1, 2]]) + b = array([[2, 3], [2, 3]]) + res = [atleast_3d(a), atleast_3d(b)] + desired = [a[:,:, newaxis], b[:,:, newaxis]] + assert_array_equal(res, desired) + + def test_3D_array(self): + a = array([[1, 2], [1, 2]]) + b = array([[2, 3], [2, 3]]) + a = array([a, a]) + b = array([b, b]) + res = [atleast_3d(a), atleast_3d(b)] + desired = [a, b] + assert_array_equal(res, desired) + + +class TestHstack: + def test_non_iterable(self): + assert_raises(TypeError, hstack, 1) + + def test_empty_input(self): + assert_raises(ValueError, hstack, ()) + + def test_0D_array(self): + a = array(1) + b = array(2) + res = hstack([a, b]) + desired = array([1, 2]) + assert_array_equal(res, desired) + + def test_1D_array(self): + a = array([1]) + b = array([2]) + res = hstack([a, b]) + desired = array([1, 2]) + assert_array_equal(res, desired) + + def test_2D_array(self): + a = array([[1], [2]]) + b = array([[1], [2]]) + res = hstack([a, b]) + desired = array([[1, 1], [2, 2]]) + assert_array_equal(res, desired) + + def test_generator(self): + with assert_warns(FutureWarning): + hstack((np.arange(3) for _ in range(2))) + with assert_warns(FutureWarning): + hstack(map(lambda x: x, np.ones((3, 2)))) + + +class TestVstack: + def test_non_iterable(self): + assert_raises(TypeError, vstack, 1) + + def test_empty_input(self): + assert_raises(ValueError, vstack, ()) + + def test_0D_array(self): + a = array(1) + b = array(2) + res = vstack([a, b]) + desired = array([[1], [2]]) + assert_array_equal(res, desired) + + def test_1D_array(self): + a = array([1]) + b = array([2]) + res = vstack([a, b]) + desired = array([[1], [2]]) + assert_array_equal(res, desired) + + def test_2D_array(self): + a = array([[1], [2]]) + b = array([[1], [2]]) + res = vstack([a, b]) + desired = array([[1], [2], [1], [2]]) + assert_array_equal(res, desired) + + def test_2D_array2(self): + a = array([1, 2]) + b = array([1, 2]) + res = vstack([a, b]) + desired = array([[1, 2], [1, 2]]) + assert_array_equal(res, desired) + + def test_generator(self): + with assert_warns(FutureWarning): + vstack((np.arange(3) for _ in range(2))) + + +class TestConcatenate: + def test_returns_copy(self): + a = np.eye(3) + b = np.concatenate([a]) + b[0, 0] = 2 + assert b[0, 0] != a[0, 0] + + def test_exceptions(self): + # test axis must be in bounds + for ndim in [1, 2, 3]: + a = np.ones((1,)*ndim) + np.concatenate((a, a), axis=0) # OK + assert_raises(np.AxisError, np.concatenate, (a, a), axis=ndim) + assert_raises(np.AxisError, np.concatenate, (a, a), axis=-(ndim + 1)) + + # Scalars cannot be concatenated + assert_raises(ValueError, concatenate, (0,)) + assert_raises(ValueError, concatenate, (np.array(0),)) + + # dimensionality must match + assert_raises_regex( + ValueError, + r"all the input arrays must have same number of dimensions, but " + r"the array at index 0 has 1 dimension\(s\) and the array at " + r"index 1 has 2 dimension\(s\)", + np.concatenate, (np.zeros(1), np.zeros((1, 1)))) + + # test shapes must match except for concatenation axis + a = np.ones((1, 2, 3)) + b = np.ones((2, 2, 3)) + axis = list(range(3)) + for i in range(3): + np.concatenate((a, b), axis=axis[0]) # OK + assert_raises_regex( + ValueError, + "all the input array dimensions for the concatenation axis " + "must match exactly, but along dimension {}, the array at " + "index 0 has size 1 and the array at index 1 has size 2" + .format(i), + np.concatenate, (a, b), axis=axis[1]) + assert_raises(ValueError, np.concatenate, (a, b), axis=axis[2]) + a = np.moveaxis(a, -1, 0) + b = np.moveaxis(b, -1, 0) + axis.append(axis.pop(0)) + + # No arrays to concatenate raises ValueError + assert_raises(ValueError, concatenate, ()) + + def test_concatenate_axis_None(self): + a = np.arange(4, dtype=np.float64).reshape((2, 2)) + b = list(range(3)) + c = ['x'] + r = np.concatenate((a, a), axis=None) + assert_equal(r.dtype, a.dtype) + assert_equal(r.ndim, 1) + r = np.concatenate((a, b), axis=None) + assert_equal(r.size, a.size + len(b)) + assert_equal(r.dtype, a.dtype) + r = np.concatenate((a, b, c), axis=None, dtype="U") + d = array(['0.0', '1.0', '2.0', '3.0', + '0', '1', '2', 'x']) + assert_array_equal(r, d) + + out = np.zeros(a.size + len(b)) + r = np.concatenate((a, b), axis=None) + rout = np.concatenate((a, b), axis=None, out=out) + assert_(out is rout) + assert_equal(r, rout) + + def test_large_concatenate_axis_None(self): + # When no axis is given, concatenate uses flattened versions. + # This also had a bug with many arrays (see gh-5979). + x = np.arange(1, 100) + r = np.concatenate(x, None) + assert_array_equal(x, r) + + # This should probably be deprecated: + r = np.concatenate(x, 100) # axis is >= MAXDIMS + assert_array_equal(x, r) + + def test_concatenate(self): + # Test concatenate function + # One sequence returns unmodified (but as array) + r4 = list(range(4)) + assert_array_equal(concatenate((r4,)), r4) + # Any sequence + assert_array_equal(concatenate((tuple(r4),)), r4) + assert_array_equal(concatenate((array(r4),)), r4) + # 1D default concatenation + r3 = list(range(3)) + assert_array_equal(concatenate((r4, r3)), r4 + r3) + # Mixed sequence types + assert_array_equal(concatenate((tuple(r4), r3)), r4 + r3) + assert_array_equal(concatenate((array(r4), r3)), r4 + r3) + # Explicit axis specification + assert_array_equal(concatenate((r4, r3), 0), r4 + r3) + # Including negative + assert_array_equal(concatenate((r4, r3), -1), r4 + r3) + # 2D + a23 = array([[10, 11, 12], [13, 14, 15]]) + a13 = array([[0, 1, 2]]) + res = array([[10, 11, 12], [13, 14, 15], [0, 1, 2]]) + assert_array_equal(concatenate((a23, a13)), res) + assert_array_equal(concatenate((a23, a13), 0), res) + assert_array_equal(concatenate((a23.T, a13.T), 1), res.T) + assert_array_equal(concatenate((a23.T, a13.T), -1), res.T) + # Arrays much match shape + assert_raises(ValueError, concatenate, (a23.T, a13.T), 0) + # 3D + res = arange(2 * 3 * 7).reshape((2, 3, 7)) + a0 = res[..., :4] + a1 = res[..., 4:6] + a2 = res[..., 6:] + assert_array_equal(concatenate((a0, a1, a2), 2), res) + assert_array_equal(concatenate((a0, a1, a2), -1), res) + assert_array_equal(concatenate((a0.T, a1.T, a2.T), 0), res.T) + + out = res.copy() + rout = concatenate((a0, a1, a2), 2, out=out) + assert_(out is rout) + assert_equal(res, rout) + + @pytest.mark.skipif(IS_PYPY, reason="PYPY handles sq_concat, nb_add differently than cpython") + def test_operator_concat(self): + import operator + a = array([1, 2]) + b = array([3, 4]) + n = [1,2] + res = array([1, 2, 3, 4]) + assert_raises(TypeError, operator.concat, a, b) + assert_raises(TypeError, operator.concat, a, n) + assert_raises(TypeError, operator.concat, n, a) + assert_raises(TypeError, operator.concat, a, 1) + assert_raises(TypeError, operator.concat, 1, a) + + def test_bad_out_shape(self): + a = array([1, 2]) + b = array([3, 4]) + + assert_raises(ValueError, concatenate, (a, b), out=np.empty(5)) + assert_raises(ValueError, concatenate, (a, b), out=np.empty((4,1))) + assert_raises(ValueError, concatenate, (a, b), out=np.empty((1,4))) + concatenate((a, b), out=np.empty(4)) + + @pytest.mark.parametrize("axis", [None, 0]) + @pytest.mark.parametrize("out_dtype", ["c8", "f4", "f8", ">f8", "i8", "S4"]) + @pytest.mark.parametrize("casting", + ['no', 'equiv', 'safe', 'same_kind', 'unsafe']) + def test_out_and_dtype(self, axis, out_dtype, casting): + # Compare usage of `out=out` with `dtype=out.dtype` + out = np.empty(4, dtype=out_dtype) + to_concat = (array([1.1, 2.2]), array([3.3, 4.4])) + + if not np.can_cast(to_concat[0], out_dtype, casting=casting): + with assert_raises(TypeError): + concatenate(to_concat, out=out, axis=axis, casting=casting) + with assert_raises(TypeError): + concatenate(to_concat, dtype=out.dtype, + axis=axis, casting=casting) + else: + res_out = concatenate(to_concat, out=out, + axis=axis, casting=casting) + res_dtype = concatenate(to_concat, dtype=out.dtype, + axis=axis, casting=casting) + assert res_out is out + assert_array_equal(out, res_dtype) + assert res_dtype.dtype == out_dtype + + with assert_raises(TypeError): + concatenate(to_concat, out=out, dtype=out_dtype, axis=axis) + + @pytest.mark.parametrize("axis", [None, 0]) + @pytest.mark.parametrize("string_dt", ["S", "U", "S0", "U0"]) + @pytest.mark.parametrize("arrs", + [([0.],), ([0.], [1]), ([0], ["string"], [1.])]) + def test_dtype_with_promotion(self, arrs, string_dt, axis): + # Note that U0 and S0 should be deprecated eventually and changed to + # actually give the empty string result (together with `np.array`) + res = np.concatenate(arrs, axis=axis, dtype=string_dt, casting="unsafe") + # The actual dtype should be identical to a cast (of a double array): + assert res.dtype == np.array(1.).astype(string_dt).dtype + + @pytest.mark.parametrize("axis", [None, 0]) + def test_string_dtype_does_not_inspect(self, axis): + with pytest.raises(TypeError): + np.concatenate(([None], [1]), dtype="S", axis=axis) + with pytest.raises(TypeError): + np.concatenate(([None], [1]), dtype="U", axis=axis) + + @pytest.mark.parametrize("axis", [None, 0]) + def test_subarray_error(self, axis): + with pytest.raises(TypeError, match=".*subarray dtype"): + np.concatenate(([1], [1]), dtype="(2,)i", axis=axis) + + +def test_stack(): + # non-iterable input + assert_raises(TypeError, stack, 1) + + # 0d input + for input_ in [(1, 2, 3), + [np.int32(1), np.int32(2), np.int32(3)], + [np.array(1), np.array(2), np.array(3)]]: + assert_array_equal(stack(input_), [1, 2, 3]) + # 1d input examples + a = np.array([1, 2, 3]) + b = np.array([4, 5, 6]) + r1 = array([[1, 2, 3], [4, 5, 6]]) + assert_array_equal(np.stack((a, b)), r1) + assert_array_equal(np.stack((a, b), axis=1), r1.T) + # all input types + assert_array_equal(np.stack(list([a, b])), r1) + assert_array_equal(np.stack(array([a, b])), r1) + # all shapes for 1d input + arrays = [np.random.randn(3) for _ in range(10)] + axes = [0, 1, -1, -2] + expected_shapes = [(10, 3), (3, 10), (3, 10), (10, 3)] + for axis, expected_shape in zip(axes, expected_shapes): + assert_equal(np.stack(arrays, axis).shape, expected_shape) + assert_raises_regex(np.AxisError, 'out of bounds', stack, arrays, axis=2) + assert_raises_regex(np.AxisError, 'out of bounds', stack, arrays, axis=-3) + # all shapes for 2d input + arrays = [np.random.randn(3, 4) for _ in range(10)] + axes = [0, 1, 2, -1, -2, -3] + expected_shapes = [(10, 3, 4), (3, 10, 4), (3, 4, 10), + (3, 4, 10), (3, 10, 4), (10, 3, 4)] + for axis, expected_shape in zip(axes, expected_shapes): + assert_equal(np.stack(arrays, axis).shape, expected_shape) + # empty arrays + assert_(stack([[], [], []]).shape == (3, 0)) + assert_(stack([[], [], []], axis=1).shape == (0, 3)) + # out + out = np.zeros_like(r1) + np.stack((a, b), out=out) + assert_array_equal(out, r1) + # edge cases + assert_raises_regex(ValueError, 'need at least one array', stack, []) + assert_raises_regex(ValueError, 'must have the same shape', + stack, [1, np.arange(3)]) + assert_raises_regex(ValueError, 'must have the same shape', + stack, [np.arange(3), 1]) + assert_raises_regex(ValueError, 'must have the same shape', + stack, [np.arange(3), 1], axis=1) + assert_raises_regex(ValueError, 'must have the same shape', + stack, [np.zeros((3, 3)), np.zeros(3)], axis=1) + assert_raises_regex(ValueError, 'must have the same shape', + stack, [np.arange(2), np.arange(3)]) + # generator is deprecated + with assert_warns(FutureWarning): + result = stack((x for x in range(3))) + assert_array_equal(result, np.array([0, 1, 2])) + + +class TestBlock: + @pytest.fixture(params=['block', 'force_concatenate', 'force_slicing']) + def block(self, request): + # blocking small arrays and large arrays go through different paths. + # the algorithm is triggered depending on the number of element + # copies required. + # We define a test fixture that forces most tests to go through + # both code paths. + # Ultimately, this should be removed if a single algorithm is found + # to be faster for both small and large arrays. + def _block_force_concatenate(arrays): + arrays, list_ndim, result_ndim, _ = _block_setup(arrays) + return _block_concatenate(arrays, list_ndim, result_ndim) + + def _block_force_slicing(arrays): + arrays, list_ndim, result_ndim, _ = _block_setup(arrays) + return _block_slicing(arrays, list_ndim, result_ndim) + + if request.param == 'force_concatenate': + return _block_force_concatenate + elif request.param == 'force_slicing': + return _block_force_slicing + elif request.param == 'block': + return block + else: + raise ValueError('Unknown blocking request. There is a typo in the tests.') + + def test_returns_copy(self, block): + a = np.eye(3) + b = block(a) + b[0, 0] = 2 + assert b[0, 0] != a[0, 0] + + def test_block_total_size_estimate(self, block): + _, _, _, total_size = _block_setup([1]) + assert total_size == 1 + + _, _, _, total_size = _block_setup([[1]]) + assert total_size == 1 + + _, _, _, total_size = _block_setup([[1, 1]]) + assert total_size == 2 + + _, _, _, total_size = _block_setup([[1], [1]]) + assert total_size == 2 + + _, _, _, total_size = _block_setup([[1, 2], [3, 4]]) + assert total_size == 4 + + def test_block_simple_row_wise(self, block): + a_2d = np.ones((2, 2)) + b_2d = 2 * a_2d + desired = np.array([[1, 1, 2, 2], + [1, 1, 2, 2]]) + result = block([a_2d, b_2d]) + assert_equal(desired, result) + + def test_block_simple_column_wise(self, block): + a_2d = np.ones((2, 2)) + b_2d = 2 * a_2d + expected = np.array([[1, 1], + [1, 1], + [2, 2], + [2, 2]]) + result = block([[a_2d], [b_2d]]) + assert_equal(expected, result) + + def test_block_with_1d_arrays_row_wise(self, block): + # # # 1-D vectors are treated as row arrays + a = np.array([1, 2, 3]) + b = np.array([2, 3, 4]) + expected = np.array([1, 2, 3, 2, 3, 4]) + result = block([a, b]) + assert_equal(expected, result) + + def test_block_with_1d_arrays_multiple_rows(self, block): + a = np.array([1, 2, 3]) + b = np.array([2, 3, 4]) + expected = np.array([[1, 2, 3, 2, 3, 4], + [1, 2, 3, 2, 3, 4]]) + result = block([[a, b], [a, b]]) + assert_equal(expected, result) + + def test_block_with_1d_arrays_column_wise(self, block): + # # # 1-D vectors are treated as row arrays + a_1d = np.array([1, 2, 3]) + b_1d = np.array([2, 3, 4]) + expected = np.array([[1, 2, 3], + [2, 3, 4]]) + result = block([[a_1d], [b_1d]]) + assert_equal(expected, result) + + def test_block_mixed_1d_and_2d(self, block): + a_2d = np.ones((2, 2)) + b_1d = np.array([2, 2]) + result = block([[a_2d], [b_1d]]) + expected = np.array([[1, 1], + [1, 1], + [2, 2]]) + assert_equal(expected, result) + + def test_block_complicated(self, block): + # a bit more complicated + one_2d = np.array([[1, 1, 1]]) + two_2d = np.array([[2, 2, 2]]) + three_2d = np.array([[3, 3, 3, 3, 3, 3]]) + four_1d = np.array([4, 4, 4, 4, 4, 4]) + five_0d = np.array(5) + six_1d = np.array([6, 6, 6, 6, 6]) + zero_2d = np.zeros((2, 6)) + + expected = np.array([[1, 1, 1, 2, 2, 2], + [3, 3, 3, 3, 3, 3], + [4, 4, 4, 4, 4, 4], + [5, 6, 6, 6, 6, 6], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0]]) + + result = block([[one_2d, two_2d], + [three_2d], + [four_1d], + [five_0d, six_1d], + [zero_2d]]) + assert_equal(result, expected) + + def test_nested(self, block): + one = np.array([1, 1, 1]) + two = np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]]) + three = np.array([3, 3, 3]) + four = np.array([4, 4, 4]) + five = np.array(5) + six = np.array([6, 6, 6, 6, 6]) + zero = np.zeros((2, 6)) + + result = block([ + [ + block([ + [one], + [three], + [four] + ]), + two + ], + [five, six], + [zero] + ]) + expected = np.array([[1, 1, 1, 2, 2, 2], + [3, 3, 3, 2, 2, 2], + [4, 4, 4, 2, 2, 2], + [5, 6, 6, 6, 6, 6], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0]]) + + assert_equal(result, expected) + + def test_3d(self, block): + a000 = np.ones((2, 2, 2), int) * 1 + + a100 = np.ones((3, 2, 2), int) * 2 + a010 = np.ones((2, 3, 2), int) * 3 + a001 = np.ones((2, 2, 3), int) * 4 + + a011 = np.ones((2, 3, 3), int) * 5 + a101 = np.ones((3, 2, 3), int) * 6 + a110 = np.ones((3, 3, 2), int) * 7 + + a111 = np.ones((3, 3, 3), int) * 8 + + result = block([ + [ + [a000, a001], + [a010, a011], + ], + [ + [a100, a101], + [a110, a111], + ] + ]) + expected = array([[[1, 1, 4, 4, 4], + [1, 1, 4, 4, 4], + [3, 3, 5, 5, 5], + [3, 3, 5, 5, 5], + [3, 3, 5, 5, 5]], + + [[1, 1, 4, 4, 4], + [1, 1, 4, 4, 4], + [3, 3, 5, 5, 5], + [3, 3, 5, 5, 5], + [3, 3, 5, 5, 5]], + + [[2, 2, 6, 6, 6], + [2, 2, 6, 6, 6], + [7, 7, 8, 8, 8], + [7, 7, 8, 8, 8], + [7, 7, 8, 8, 8]], + + [[2, 2, 6, 6, 6], + [2, 2, 6, 6, 6], + [7, 7, 8, 8, 8], + [7, 7, 8, 8, 8], + [7, 7, 8, 8, 8]], + + [[2, 2, 6, 6, 6], + [2, 2, 6, 6, 6], + [7, 7, 8, 8, 8], + [7, 7, 8, 8, 8], + [7, 7, 8, 8, 8]]]) + + assert_array_equal(result, expected) + + def test_block_with_mismatched_shape(self, block): + a = np.array([0, 0]) + b = np.eye(2) + assert_raises(ValueError, block, [a, b]) + assert_raises(ValueError, block, [b, a]) + + to_block = [[np.ones((2,3)), np.ones((2,2))], + [np.ones((2,2)), np.ones((2,2))]] + assert_raises(ValueError, block, to_block) + def test_no_lists(self, block): + assert_equal(block(1), np.array(1)) + assert_equal(block(np.eye(3)), np.eye(3)) + + def test_invalid_nesting(self, block): + msg = 'depths are mismatched' + assert_raises_regex(ValueError, msg, block, [1, [2]]) + assert_raises_regex(ValueError, msg, block, [1, []]) + assert_raises_regex(ValueError, msg, block, [[1], 2]) + assert_raises_regex(ValueError, msg, block, [[], 2]) + assert_raises_regex(ValueError, msg, block, [ + [[1], [2]], + [[3, 4]], + [5] # missing brackets + ]) + + def test_empty_lists(self, block): + assert_raises_regex(ValueError, 'empty', block, []) + assert_raises_regex(ValueError, 'empty', block, [[]]) + assert_raises_regex(ValueError, 'empty', block, [[1], []]) + + def test_tuple(self, block): + assert_raises_regex(TypeError, 'tuple', block, ([1, 2], [3, 4])) + assert_raises_regex(TypeError, 'tuple', block, [(1, 2), (3, 4)]) + + def test_different_ndims(self, block): + a = 1. + b = 2 * np.ones((1, 2)) + c = 3 * np.ones((1, 1, 3)) + + result = block([a, b, c]) + expected = np.array([[[1., 2., 2., 3., 3., 3.]]]) + + assert_equal(result, expected) + + def test_different_ndims_depths(self, block): + a = 1. + b = 2 * np.ones((1, 2)) + c = 3 * np.ones((1, 2, 3)) + + result = block([[a, b], [c]]) + expected = np.array([[[1., 2., 2.], + [3., 3., 3.], + [3., 3., 3.]]]) + + assert_equal(result, expected) + + def test_block_memory_order(self, block): + # 3D + arr_c = np.zeros((3,)*3, order='C') + arr_f = np.zeros((3,)*3, order='F') + + b_c = [[[arr_c, arr_c], + [arr_c, arr_c]], + [[arr_c, arr_c], + [arr_c, arr_c]]] + + b_f = [[[arr_f, arr_f], + [arr_f, arr_f]], + [[arr_f, arr_f], + [arr_f, arr_f]]] + + assert block(b_c).flags['C_CONTIGUOUS'] + assert block(b_f).flags['F_CONTIGUOUS'] + + arr_c = np.zeros((3, 3), order='C') + arr_f = np.zeros((3, 3), order='F') + # 2D + b_c = [[arr_c, arr_c], + [arr_c, arr_c]] + + b_f = [[arr_f, arr_f], + [arr_f, arr_f]] + + assert block(b_c).flags['C_CONTIGUOUS'] + assert block(b_f).flags['F_CONTIGUOUS'] + + +def test_block_dispatcher(): + class ArrayLike: + pass + a = ArrayLike() + b = ArrayLike() + c = ArrayLike() + assert_equal(list(_block_dispatcher(a)), [a]) + assert_equal(list(_block_dispatcher([a])), [a]) + assert_equal(list(_block_dispatcher([a, b])), [a, b]) + assert_equal(list(_block_dispatcher([[a], [b, [c]]])), [a, b, c]) + # don't recurse into non-lists + assert_equal(list(_block_dispatcher((a, b))), [(a, b)]) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_simd.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_simd.py new file mode 100644 index 0000000..12a67c4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_simd.py @@ -0,0 +1,981 @@ +# NOTE: Please avoid the use of numpy.testing since NPYV intrinsics +# may be involved in their functionality. +import pytest, math, re +import itertools +from numpy.core._simd import targets +from numpy.core._multiarray_umath import __cpu_baseline__ + +class _Test_Utility: + # submodule of the desired SIMD extension, e.g. targets["AVX512F"] + npyv = None + # the current data type suffix e.g. 's8' + sfx = None + # target name can be 'baseline' or one or more of CPU features + target_name = None + + def __getattr__(self, attr): + """ + To call NPV intrinsics without the attribute 'npyv' and + auto suffixing intrinsics according to class attribute 'sfx' + """ + return getattr(self.npyv, attr + "_" + self.sfx) + + def _data(self, start=None, count=None, reverse=False): + """ + Create list of consecutive numbers according to number of vector's lanes. + """ + if start is None: + start = 1 + if count is None: + count = self.nlanes + rng = range(start, start + count) + if reverse: + rng = reversed(rng) + if self._is_fp(): + return [x / 1.0 for x in rng] + return list(rng) + + def _is_unsigned(self): + return self.sfx[0] == 'u' + + def _is_signed(self): + return self.sfx[0] == 's' + + def _is_fp(self): + return self.sfx[0] == 'f' + + def _scalar_size(self): + return int(self.sfx[1:]) + + def _int_clip(self, seq): + if self._is_fp(): + return seq + max_int = self._int_max() + min_int = self._int_min() + return [min(max(v, min_int), max_int) for v in seq] + + def _int_max(self): + if self._is_fp(): + return None + max_u = self._to_unsigned(self.setall(-1))[0] + if self._is_signed(): + return max_u // 2 + return max_u + + def _int_min(self): + if self._is_fp(): + return None + if self._is_unsigned(): + return 0 + return -(self._int_max() + 1) + + def _true_mask(self): + max_unsig = getattr(self.npyv, "setall_u" + self.sfx[1:])(-1) + return max_unsig[0] + + def _to_unsigned(self, vector): + if isinstance(vector, (list, tuple)): + return getattr(self.npyv, "load_u" + self.sfx[1:])(vector) + else: + sfx = vector.__name__.replace("npyv_", "") + if sfx[0] == "b": + cvt_intrin = "cvt_u{0}_b{0}" + else: + cvt_intrin = "reinterpret_u{0}_{1}" + return getattr(self.npyv, cvt_intrin.format(sfx[1:], sfx))(vector) + + def _pinfinity(self): + v = self.npyv.setall_u32(0x7f800000) + return self.npyv.reinterpret_f32_u32(v)[0] + + def _ninfinity(self): + v = self.npyv.setall_u32(0xff800000) + return self.npyv.reinterpret_f32_u32(v)[0] + + def _nan(self): + v = self.npyv.setall_u32(0x7fc00000) + return self.npyv.reinterpret_f32_u32(v)[0] + + def _cpu_features(self): + target = self.target_name + if target == "baseline": + target = __cpu_baseline__ + else: + target = target.split('__') # multi-target separator + return ' '.join(target) + +class _SIMD_BOOL(_Test_Utility): + """ + To test all boolean vector types at once + """ + def _data(self, start=None, count=None, reverse=False): + nlanes = getattr(self.npyv, "nlanes_u" + self.sfx[1:]) + true_mask = self._true_mask() + rng = range(nlanes) + if reverse: + rng = reversed(rng) + return [true_mask if x % 2 else 0 for x in rng] + + def _load_b(self, data): + len_str = self.sfx[1:] + load = getattr(self.npyv, "load_u" + len_str) + cvt = getattr(self.npyv, f"cvt_b{len_str}_u{len_str}") + return cvt(load(data)) + + def test_operators_logical(self): + """ + Logical operations for boolean types. + Test intrinsics: + npyv_xor_##SFX, npyv_and_##SFX, npyv_or_##SFX, npyv_not_##SFX + """ + data_a = self._data() + data_b = self._data(reverse=True) + vdata_a = self._load_b(data_a) + vdata_b = self._load_b(data_b) + + data_and = [a & b for a, b in zip(data_a, data_b)] + vand = getattr(self, "and")(vdata_a, vdata_b) + assert vand == data_and + + data_or = [a | b for a, b in zip(data_a, data_b)] + vor = getattr(self, "or")(vdata_a, vdata_b) + assert vor == data_or + + data_xor = [a ^ b for a, b in zip(data_a, data_b)] + vxor = getattr(self, "xor")(vdata_a, vdata_b) + assert vxor == data_xor + + vnot = getattr(self, "not")(vdata_a) + assert vnot == data_b + + def test_tobits(self): + data2bits = lambda data: sum([int(x != 0) << i for i, x in enumerate(data, 0)]) + for data in (self._data(), self._data(reverse=True)): + vdata = self._load_b(data) + data_bits = data2bits(data) + tobits = bin(self.tobits(vdata)) + assert tobits == bin(data_bits) + +class _SIMD_INT(_Test_Utility): + """ + To test all integer vector types at once + """ + def test_operators_shift(self): + if self.sfx in ("u8", "s8"): + return + + data_a = self._data(self._int_max() - self.nlanes) + data_b = self._data(self._int_min(), reverse=True) + vdata_a, vdata_b = self.load(data_a), self.load(data_b) + + for count in range(self._scalar_size()): + # load to cast + data_shl_a = self.load([a << count for a in data_a]) + # left shift + shl = self.shl(vdata_a, count) + assert shl == data_shl_a + # load to cast + data_shr_a = self.load([a >> count for a in data_a]) + # right shift + shr = self.shr(vdata_a, count) + assert shr == data_shr_a + + # shift by zero or max or out-range immediate constant is not applicable and illogical + for count in range(1, self._scalar_size()): + # load to cast + data_shl_a = self.load([a << count for a in data_a]) + # left shift by an immediate constant + shli = self.shli(vdata_a, count) + assert shli == data_shl_a + # load to cast + data_shr_a = self.load([a >> count for a in data_a]) + # right shift by an immediate constant + shri = self.shri(vdata_a, count) + assert shri == data_shr_a + + def test_arithmetic_subadd_saturated(self): + if self.sfx in ("u32", "s32", "u64", "s64"): + return + + data_a = self._data(self._int_max() - self.nlanes) + data_b = self._data(self._int_min(), reverse=True) + vdata_a, vdata_b = self.load(data_a), self.load(data_b) + + data_adds = self._int_clip([a + b for a, b in zip(data_a, data_b)]) + adds = self.adds(vdata_a, vdata_b) + assert adds == data_adds + + data_subs = self._int_clip([a - b for a, b in zip(data_a, data_b)]) + subs = self.subs(vdata_a, vdata_b) + assert subs == data_subs + + def test_math_max_min(self): + data_a = self._data() + data_b = self._data(self.nlanes) + vdata_a, vdata_b = self.load(data_a), self.load(data_b) + + data_max = [max(a, b) for a, b in zip(data_a, data_b)] + simd_max = self.max(vdata_a, vdata_b) + assert simd_max == data_max + + data_min = [min(a, b) for a, b in zip(data_a, data_b)] + simd_min = self.min(vdata_a, vdata_b) + assert simd_min == data_min + +class _SIMD_FP32(_Test_Utility): + """ + To only test single precision + """ + def test_conversions(self): + """ + Round to nearest even integer, assume CPU control register is set to rounding. + Test intrinsics: + npyv_round_s32_##SFX + """ + features = self._cpu_features() + if not self.npyv.simd_f64 and re.match(r".*(NEON|ASIMD)", features): + # very costly to emulate nearest even on Armv7 + # instead we round halves to up. e.g. 0.5 -> 1, -0.5 -> -1 + _round = lambda v: int(v + (0.5 if v >= 0 else -0.5)) + else: + _round = round + vdata_a = self.load(self._data()) + vdata_a = self.sub(vdata_a, self.setall(0.5)) + data_round = [_round(x) for x in vdata_a] + vround = self.round_s32(vdata_a) + assert vround == data_round + +class _SIMD_FP64(_Test_Utility): + """ + To only test double precision + """ + def test_conversions(self): + """ + Round to nearest even integer, assume CPU control register is set to rounding. + Test intrinsics: + npyv_round_s32_##SFX + """ + vdata_a = self.load(self._data()) + vdata_a = self.sub(vdata_a, self.setall(0.5)) + vdata_b = self.mul(vdata_a, self.setall(-1.5)) + data_round = [round(x) for x in list(vdata_a) + list(vdata_b)] + vround = self.round_s32(vdata_a, vdata_b) + assert vround == data_round + +class _SIMD_FP(_Test_Utility): + """ + To test all float vector types at once + """ + def test_arithmetic_fused(self): + vdata_a, vdata_b, vdata_c = [self.load(self._data())]*3 + vdata_cx2 = self.add(vdata_c, vdata_c) + # multiply and add, a*b + c + data_fma = self.load([a * b + c for a, b, c in zip(vdata_a, vdata_b, vdata_c)]) + fma = self.muladd(vdata_a, vdata_b, vdata_c) + assert fma == data_fma + # multiply and subtract, a*b - c + fms = self.mulsub(vdata_a, vdata_b, vdata_c) + data_fms = self.sub(data_fma, vdata_cx2) + assert fms == data_fms + # negate multiply and add, -(a*b) + c + nfma = self.nmuladd(vdata_a, vdata_b, vdata_c) + data_nfma = self.sub(vdata_cx2, data_fma) + assert nfma == data_nfma + # negate multiply and subtract, -(a*b) - c + nfms = self.nmulsub(vdata_a, vdata_b, vdata_c) + data_nfms = self.mul(data_fma, self.setall(-1)) + assert nfms == data_nfms + + def test_abs(self): + pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan() + data = self._data() + vdata = self.load(self._data()) + + abs_cases = ((-0, 0), (ninf, pinf), (pinf, pinf), (nan, nan)) + for case, desired in abs_cases: + data_abs = [desired]*self.nlanes + vabs = self.abs(self.setall(case)) + assert vabs == pytest.approx(data_abs, nan_ok=True) + + vabs = self.abs(self.mul(vdata, self.setall(-1))) + assert vabs == data + + def test_sqrt(self): + pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan() + data = self._data() + vdata = self.load(self._data()) + + sqrt_cases = ((-0.0, -0.0), (0.0, 0.0), (-1.0, nan), (ninf, nan), (pinf, pinf)) + for case, desired in sqrt_cases: + data_sqrt = [desired]*self.nlanes + sqrt = self.sqrt(self.setall(case)) + assert sqrt == pytest.approx(data_sqrt, nan_ok=True) + + data_sqrt = self.load([math.sqrt(x) for x in data]) # load to truncate precision + sqrt = self.sqrt(vdata) + assert sqrt == data_sqrt + + def test_square(self): + pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan() + data = self._data() + vdata = self.load(self._data()) + # square + square_cases = ((nan, nan), (pinf, pinf), (ninf, pinf)) + for case, desired in square_cases: + data_square = [desired]*self.nlanes + square = self.square(self.setall(case)) + assert square == pytest.approx(data_square, nan_ok=True) + + data_square = [x*x for x in data] + square = self.square(vdata) + assert square == data_square + + @pytest.mark.parametrize("intrin, func", [("self.ceil", math.ceil), + ("self.trunc", math.trunc)]) + def test_rounding(self, intrin, func): + """ + Test intrinsics: + npyv_ceil_##SFX + npyv_trunc_##SFX + """ + intrin_name = intrin + intrin = eval(intrin) + pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan() + # special cases + round_cases = ((nan, nan), (pinf, pinf), (ninf, ninf)) + for case, desired in round_cases: + data_round = [desired]*self.nlanes + _round = intrin(self.setall(case)) + assert _round == pytest.approx(data_round, nan_ok=True) + for x in range(0, 2**20, 256**2): + for w in (-1.05, -1.10, -1.15, 1.05, 1.10, 1.15): + data = [x*w+a for a in range(self.nlanes)] + vdata = self.load(data) + data_round = [func(x) for x in data] + _round = intrin(vdata) + assert _round == data_round + # signed zero + if "ceil" in intrin_name or "trunc" in intrin_name: + for w in (-0.25, -0.30, -0.45): + _round = self._to_unsigned(intrin(self.setall(w))) + data_round = self._to_unsigned(self.setall(-0.0)) + assert _round == data_round + + def test_max(self): + """ + Test intrinsics: + npyv_max_##SFX + npyv_maxp_##SFX + """ + data_a = self._data() + data_b = self._data(self.nlanes) + vdata_a, vdata_b = self.load(data_a), self.load(data_b) + data_max = [max(a, b) for a, b in zip(data_a, data_b)] + _max = self.max(vdata_a, vdata_b) + assert _max == data_max + maxp = self.maxp(vdata_a, vdata_b) + assert maxp == data_max + # test IEEE standards + pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan() + max_cases = ((nan, nan, nan), (nan, 10, 10), (10, nan, 10), + (pinf, pinf, pinf), (pinf, 10, pinf), (10, pinf, pinf), + (ninf, ninf, ninf), (ninf, 10, 10), (10, ninf, 10), + (10, 0, 10), (10, -10, 10)) + for case_operand1, case_operand2, desired in max_cases: + data_max = [desired]*self.nlanes + vdata_a = self.setall(case_operand1) + vdata_b = self.setall(case_operand2) + maxp = self.maxp(vdata_a, vdata_b) + assert maxp == pytest.approx(data_max, nan_ok=True) + if nan in (case_operand1, case_operand2, desired): + continue + _max = self.max(vdata_a, vdata_b) + assert _max == data_max + + def test_min(self): + """ + Test intrinsics: + npyv_min_##SFX + npyv_minp_##SFX + """ + data_a = self._data() + data_b = self._data(self.nlanes) + vdata_a, vdata_b = self.load(data_a), self.load(data_b) + data_min = [min(a, b) for a, b in zip(data_a, data_b)] + _min = self.min(vdata_a, vdata_b) + assert _min == data_min + minp = self.minp(vdata_a, vdata_b) + assert minp == data_min + # test IEEE standards + pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan() + min_cases = ((nan, nan, nan), (nan, 10, 10), (10, nan, 10), + (pinf, pinf, pinf), (pinf, 10, 10), (10, pinf, 10), + (ninf, ninf, ninf), (ninf, 10, ninf), (10, ninf, ninf), + (10, 0, 0), (10, -10, -10)) + for case_operand1, case_operand2, desired in min_cases: + data_min = [desired]*self.nlanes + vdata_a = self.setall(case_operand1) + vdata_b = self.setall(case_operand2) + minp = self.minp(vdata_a, vdata_b) + assert minp == pytest.approx(data_min, nan_ok=True) + if nan in (case_operand1, case_operand2, desired): + continue + _min = self.min(vdata_a, vdata_b) + assert _min == data_min + + def test_reciprocal(self): + pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan() + data = self._data() + vdata = self.load(self._data()) + + recip_cases = ((nan, nan), (pinf, 0.0), (ninf, -0.0), (0.0, pinf), (-0.0, ninf)) + for case, desired in recip_cases: + data_recip = [desired]*self.nlanes + recip = self.recip(self.setall(case)) + assert recip == pytest.approx(data_recip, nan_ok=True) + + data_recip = self.load([1/x for x in data]) # load to truncate precision + recip = self.recip(vdata) + assert recip == data_recip + + def test_special_cases(self): + """ + Compare Not NaN. Test intrinsics: + npyv_notnan_##SFX + """ + nnan = self.notnan(self.setall(self._nan())) + assert nnan == [0]*self.nlanes + +class _SIMD_ALL(_Test_Utility): + """ + To test all vector types at once + """ + def test_memory_load(self): + data = self._data() + # unaligned load + load_data = self.load(data) + assert load_data == data + # aligned load + loada_data = self.loada(data) + assert loada_data == data + # stream load + loads_data = self.loads(data) + assert loads_data == data + # load lower part + loadl = self.loadl(data) + loadl_half = list(loadl)[:self.nlanes//2] + data_half = data[:self.nlanes//2] + assert loadl_half == data_half + assert loadl != data # detect overflow + + def test_memory_store(self): + data = self._data() + vdata = self.load(data) + # unaligned store + store = [0] * self.nlanes + self.store(store, vdata) + assert store == data + # aligned store + store_a = [0] * self.nlanes + self.storea(store_a, vdata) + assert store_a == data + # stream store + store_s = [0] * self.nlanes + self.stores(store_s, vdata) + assert store_s == data + # store lower part + store_l = [0] * self.nlanes + self.storel(store_l, vdata) + assert store_l[:self.nlanes//2] == data[:self.nlanes//2] + assert store_l != vdata # detect overflow + # store higher part + store_h = [0] * self.nlanes + self.storeh(store_h, vdata) + assert store_h[:self.nlanes//2] == data[self.nlanes//2:] + assert store_h != vdata # detect overflow + + def test_memory_partial_load(self): + if self.sfx in ("u8", "s8", "u16", "s16"): + return + + data = self._data() + lanes = list(range(1, self.nlanes + 1)) + lanes += [self.nlanes**2, self.nlanes**4] # test out of range + for n in lanes: + load_till = self.load_till(data, n, 15) + data_till = data[:n] + [15] * (self.nlanes-n) + assert load_till == data_till + load_tillz = self.load_tillz(data, n) + data_tillz = data[:n] + [0] * (self.nlanes-n) + assert load_tillz == data_tillz + + def test_memory_partial_store(self): + if self.sfx in ("u8", "s8", "u16", "s16"): + return + + data = self._data() + data_rev = self._data(reverse=True) + vdata = self.load(data) + lanes = list(range(1, self.nlanes + 1)) + lanes += [self.nlanes**2, self.nlanes**4] + for n in lanes: + data_till = data_rev.copy() + data_till[:n] = data[:n] + store_till = self._data(reverse=True) + self.store_till(store_till, n, vdata) + assert store_till == data_till + + def test_memory_noncont_load(self): + if self.sfx in ("u8", "s8", "u16", "s16"): + return + + for stride in range(1, 64): + data = self._data(count=stride*self.nlanes) + data_stride = data[::stride] + loadn = self.loadn(data, stride) + assert loadn == data_stride + + for stride in range(-64, 0): + data = self._data(stride, -stride*self.nlanes) + data_stride = self.load(data[::stride]) # cast unsigned + loadn = self.loadn(data, stride) + assert loadn == data_stride + + def test_memory_noncont_partial_load(self): + if self.sfx in ("u8", "s8", "u16", "s16"): + return + + lanes = list(range(1, self.nlanes + 1)) + lanes += [self.nlanes**2, self.nlanes**4] + for stride in range(1, 64): + data = self._data(count=stride*self.nlanes) + data_stride = data[::stride] + for n in lanes: + data_stride_till = data_stride[:n] + [15] * (self.nlanes-n) + loadn_till = self.loadn_till(data, stride, n, 15) + assert loadn_till == data_stride_till + data_stride_tillz = data_stride[:n] + [0] * (self.nlanes-n) + loadn_tillz = self.loadn_tillz(data, stride, n) + assert loadn_tillz == data_stride_tillz + + for stride in range(-64, 0): + data = self._data(stride, -stride*self.nlanes) + data_stride = list(self.load(data[::stride])) # cast unsigned + for n in lanes: + data_stride_till = data_stride[:n] + [15] * (self.nlanes-n) + loadn_till = self.loadn_till(data, stride, n, 15) + assert loadn_till == data_stride_till + data_stride_tillz = data_stride[:n] + [0] * (self.nlanes-n) + loadn_tillz = self.loadn_tillz(data, stride, n) + assert loadn_tillz == data_stride_tillz + + def test_memory_noncont_store(self): + if self.sfx in ("u8", "s8", "u16", "s16"): + return + + vdata = self.load(self._data()) + for stride in range(1, 64): + data = [15] * stride * self.nlanes + data[::stride] = vdata + storen = [15] * stride * self.nlanes + storen += [127]*64 + self.storen(storen, stride, vdata) + assert storen[:-64] == data + assert storen[-64:] == [127]*64 # detect overflow + + for stride in range(-64, 0): + data = [15] * -stride * self.nlanes + data[::stride] = vdata + storen = [127]*64 + storen += [15] * -stride * self.nlanes + self.storen(storen, stride, vdata) + assert storen[64:] == data + assert storen[:64] == [127]*64 # detect overflow + + def test_memory_noncont_partial_store(self): + if self.sfx in ("u8", "s8", "u16", "s16"): + return + + data = self._data() + vdata = self.load(data) + lanes = list(range(1, self.nlanes + 1)) + lanes += [self.nlanes**2, self.nlanes**4] + for stride in range(1, 64): + for n in lanes: + data_till = [15] * stride * self.nlanes + data_till[::stride] = data[:n] + [15] * (self.nlanes-n) + storen_till = [15] * stride * self.nlanes + storen_till += [127]*64 + self.storen_till(storen_till, stride, n, vdata) + assert storen_till[:-64] == data_till + assert storen_till[-64:] == [127]*64 # detect overflow + + for stride in range(-64, 0): + for n in lanes: + data_till = [15] * -stride * self.nlanes + data_till[::stride] = data[:n] + [15] * (self.nlanes-n) + storen_till = [127]*64 + storen_till += [15] * -stride * self.nlanes + self.storen_till(storen_till, stride, n, vdata) + assert storen_till[64:] == data_till + assert storen_till[:64] == [127]*64 # detect overflow + + def test_misc(self): + broadcast_zero = self.zero() + assert broadcast_zero == [0] * self.nlanes + for i in range(1, 10): + broadcasti = self.setall(i) + assert broadcasti == [i] * self.nlanes + + data_a, data_b = self._data(), self._data(reverse=True) + vdata_a, vdata_b = self.load(data_a), self.load(data_b) + + # py level of npyv_set_* don't support ignoring the extra specified lanes or + # fill non-specified lanes with zero. + vset = self.set(*data_a) + assert vset == data_a + # py level of npyv_setf_* don't support ignoring the extra specified lanes or + # fill non-specified lanes with the specified scalar. + vsetf = self.setf(10, *data_a) + assert vsetf == data_a + + # We're testing the sanity of _simd's type-vector, + # reinterpret* intrinsics itself are tested via compiler + # during the build of _simd module + sfxes = ["u8", "s8", "u16", "s16", "u32", "s32", "u64", "s64", "f32"] + if self.npyv.simd_f64: + sfxes.append("f64") + for sfx in sfxes: + vec_name = getattr(self, "reinterpret_" + sfx)(vdata_a).__name__ + assert vec_name == "npyv_" + sfx + + # select & mask operations + select_a = self.select(self.cmpeq(self.zero(), self.zero()), vdata_a, vdata_b) + assert select_a == data_a + select_b = self.select(self.cmpneq(self.zero(), self.zero()), vdata_a, vdata_b) + assert select_b == data_b + + # cleanup intrinsic is only used with AVX for + # zeroing registers to avoid the AVX-SSE transition penalty, + # so nothing to test here + self.npyv.cleanup() + + def test_reorder(self): + data_a, data_b = self._data(), self._data(reverse=True) + vdata_a, vdata_b = self.load(data_a), self.load(data_b) + # lower half part + data_a_lo = data_a[:self.nlanes//2] + data_b_lo = data_b[:self.nlanes//2] + # higher half part + data_a_hi = data_a[self.nlanes//2:] + data_b_hi = data_b[self.nlanes//2:] + # combine two lower parts + combinel = self.combinel(vdata_a, vdata_b) + assert combinel == data_a_lo + data_b_lo + # combine two higher parts + combineh = self.combineh(vdata_a, vdata_b) + assert combineh == data_a_hi + data_b_hi + # combine x2 + combine = self.combine(vdata_a, vdata_b) + assert combine == (data_a_lo + data_b_lo, data_a_hi + data_b_hi) + # zip(interleave) + data_zipl = [v for p in zip(data_a_lo, data_b_lo) for v in p] + data_ziph = [v for p in zip(data_a_hi, data_b_hi) for v in p] + vzip = self.zip(vdata_a, vdata_b) + assert vzip == (data_zipl, data_ziph) + + def test_reorder_rev64(self): + # Reverse elements of each 64-bit lane + ssize = self._scalar_size() + if ssize == 64: + return + data_rev64 = [ + y for x in range(0, self.nlanes, 64//ssize) + for y in reversed(range(x, x + 64//ssize)) + ] + rev64 = self.rev64(self.load(range(self.nlanes))) + assert rev64 == data_rev64 + + def test_operators_comparison(self): + if self._is_fp(): + data_a = self._data() + else: + data_a = self._data(self._int_max() - self.nlanes) + data_b = self._data(self._int_min(), reverse=True) + vdata_a, vdata_b = self.load(data_a), self.load(data_b) + + mask_true = self._true_mask() + def to_bool(vector): + return [lane == mask_true for lane in vector] + # equal + data_eq = [a == b for a, b in zip(data_a, data_b)] + cmpeq = to_bool(self.cmpeq(vdata_a, vdata_b)) + assert cmpeq == data_eq + # not equal + data_neq = [a != b for a, b in zip(data_a, data_b)] + cmpneq = to_bool(self.cmpneq(vdata_a, vdata_b)) + assert cmpneq == data_neq + # greater than + data_gt = [a > b for a, b in zip(data_a, data_b)] + cmpgt = to_bool(self.cmpgt(vdata_a, vdata_b)) + assert cmpgt == data_gt + # greater than and equal + data_ge = [a >= b for a, b in zip(data_a, data_b)] + cmpge = to_bool(self.cmpge(vdata_a, vdata_b)) + assert cmpge == data_ge + # less than + data_lt = [a < b for a, b in zip(data_a, data_b)] + cmplt = to_bool(self.cmplt(vdata_a, vdata_b)) + assert cmplt == data_lt + # less than and equal + data_le = [a <= b for a, b in zip(data_a, data_b)] + cmple = to_bool(self.cmple(vdata_a, vdata_b)) + assert cmple == data_le + + def test_operators_logical(self): + if self._is_fp(): + data_a = self._data() + else: + data_a = self._data(self._int_max() - self.nlanes) + data_b = self._data(self._int_min(), reverse=True) + vdata_a, vdata_b = self.load(data_a), self.load(data_b) + + if self._is_fp(): + data_cast_a = self._to_unsigned(vdata_a) + data_cast_b = self._to_unsigned(vdata_b) + cast, cast_data = self._to_unsigned, self._to_unsigned + else: + data_cast_a, data_cast_b = data_a, data_b + cast, cast_data = lambda a: a, self.load + + data_xor = cast_data([a ^ b for a, b in zip(data_cast_a, data_cast_b)]) + vxor = cast(self.xor(vdata_a, vdata_b)) + assert vxor == data_xor + + data_or = cast_data([a | b for a, b in zip(data_cast_a, data_cast_b)]) + vor = cast(getattr(self, "or")(vdata_a, vdata_b)) + assert vor == data_or + + data_and = cast_data([a & b for a, b in zip(data_cast_a, data_cast_b)]) + vand = cast(getattr(self, "and")(vdata_a, vdata_b)) + assert vand == data_and + + data_not = cast_data([~a for a in data_cast_a]) + vnot = cast(getattr(self, "not")(vdata_a)) + assert vnot == data_not + + def test_conversion_boolean(self): + bsfx = "b" + self.sfx[1:] + to_boolean = getattr(self.npyv, "cvt_%s_%s" % (bsfx, self.sfx)) + from_boolean = getattr(self.npyv, "cvt_%s_%s" % (self.sfx, bsfx)) + + false_vb = to_boolean(self.setall(0)) + true_vb = self.cmpeq(self.setall(0), self.setall(0)) + assert false_vb != true_vb + + false_vsfx = from_boolean(false_vb) + true_vsfx = from_boolean(true_vb) + assert false_vsfx != true_vsfx + + def test_conversion_expand(self): + """ + Test expand intrinsics: + npyv_expand_u16_u8 + npyv_expand_u32_u16 + """ + if self.sfx not in ("u8", "u16"): + return + totype = self.sfx[0]+str(int(self.sfx[1:])*2) + expand = getattr(self.npyv, f"expand_{totype}_{self.sfx}") + # close enough from the edge to detect any deviation + data = self._data(self._int_max() - self.nlanes) + vdata = self.load(data) + edata = expand(vdata) + # lower half part + data_lo = data[:self.nlanes//2] + # higher half part + data_hi = data[self.nlanes//2:] + assert edata == (data_lo, data_hi) + + def test_arithmetic_subadd(self): + if self._is_fp(): + data_a = self._data() + else: + data_a = self._data(self._int_max() - self.nlanes) + data_b = self._data(self._int_min(), reverse=True) + vdata_a, vdata_b = self.load(data_a), self.load(data_b) + + # non-saturated + data_add = self.load([a + b for a, b in zip(data_a, data_b)]) # load to cast + add = self.add(vdata_a, vdata_b) + assert add == data_add + data_sub = self.load([a - b for a, b in zip(data_a, data_b)]) + sub = self.sub(vdata_a, vdata_b) + assert sub == data_sub + + def test_arithmetic_mul(self): + if self.sfx in ("u64", "s64"): + return + + if self._is_fp(): + data_a = self._data() + else: + data_a = self._data(self._int_max() - self.nlanes) + data_b = self._data(self._int_min(), reverse=True) + vdata_a, vdata_b = self.load(data_a), self.load(data_b) + + data_mul = self.load([a * b for a, b in zip(data_a, data_b)]) + mul = self.mul(vdata_a, vdata_b) + assert mul == data_mul + + def test_arithmetic_div(self): + if not self._is_fp(): + return + + data_a, data_b = self._data(), self._data(reverse=True) + vdata_a, vdata_b = self.load(data_a), self.load(data_b) + + # load to truncate f64 to precision of f32 + data_div = self.load([a / b for a, b in zip(data_a, data_b)]) + div = self.div(vdata_a, vdata_b) + assert div == data_div + + def test_arithmetic_intdiv(self): + """ + Test integer division intrinsics: + npyv_divisor_##sfx + npyv_divc_##sfx + """ + if self._is_fp(): + return + + int_min = self._int_min() + def trunc_div(a, d): + """ + Divide towards zero works with large integers > 2^53, + and wrap around overflow similar to what C does. + """ + if d == -1 and a == int_min: + return a + sign_a, sign_d = a < 0, d < 0 + if a == 0 or sign_a == sign_d: + return a // d + return (a + sign_d - sign_a) // d + 1 + + data = [1, -int_min] # to test overflow + data += range(0, 2**8, 2**5) + data += range(0, 2**8, 2**5-1) + bsize = self._scalar_size() + if bsize > 8: + data += range(2**8, 2**16, 2**13) + data += range(2**8, 2**16, 2**13-1) + if bsize > 16: + data += range(2**16, 2**32, 2**29) + data += range(2**16, 2**32, 2**29-1) + if bsize > 32: + data += range(2**32, 2**64, 2**61) + data += range(2**32, 2**64, 2**61-1) + # negate + data += [-x for x in data] + for dividend, divisor in itertools.product(data, data): + divisor = self.setall(divisor)[0] # cast + if divisor == 0: + continue + dividend = self.load(self._data(dividend)) + data_divc = [trunc_div(a, divisor) for a in dividend] + divisor_parms = self.divisor(divisor) + divc = self.divc(dividend, divisor_parms) + assert divc == data_divc + + def test_arithmetic_reduce_sum(self): + """ + Test reduce sum intrinsics: + npyv_sum_##sfx + """ + if self.sfx not in ("u32", "u64", "f32", "f64"): + return + # reduce sum + data = self._data() + vdata = self.load(data) + + data_sum = sum(data) + vsum = self.sum(vdata) + assert vsum == data_sum + + def test_arithmetic_reduce_sumup(self): + """ + Test extend reduce sum intrinsics: + npyv_sumup_##sfx + """ + if self.sfx not in ("u8", "u16"): + return + rdata = (0, self.nlanes, self._int_min(), self._int_max()-self.nlanes) + for r in rdata: + data = self._data(r) + vdata = self.load(data) + data_sum = sum(data) + vsum = self.sumup(vdata) + assert vsum == data_sum + + def test_mask_conditional(self): + """ + Conditional addition and subtraction for all supported data types. + Test intrinsics: + npyv_ifadd_##SFX, npyv_ifsub_##SFX + """ + vdata_a = self.load(self._data()) + vdata_b = self.load(self._data(reverse=True)) + true_mask = self.cmpeq(self.zero(), self.zero()) + false_mask = self.cmpneq(self.zero(), self.zero()) + + data_sub = self.sub(vdata_b, vdata_a) + ifsub = self.ifsub(true_mask, vdata_b, vdata_a, vdata_b) + assert ifsub == data_sub + ifsub = self.ifsub(false_mask, vdata_a, vdata_b, vdata_b) + assert ifsub == vdata_b + + data_add = self.add(vdata_b, vdata_a) + ifadd = self.ifadd(true_mask, vdata_b, vdata_a, vdata_b) + assert ifadd == data_add + ifadd = self.ifadd(false_mask, vdata_a, vdata_b, vdata_b) + assert ifadd == vdata_b + +bool_sfx = ("b8", "b16", "b32", "b64") +int_sfx = ("u8", "s8", "u16", "s16", "u32", "s32", "u64", "s64") +fp_sfx = ("f32", "f64") +all_sfx = int_sfx + fp_sfx +tests_registry = { + bool_sfx: _SIMD_BOOL, + int_sfx : _SIMD_INT, + fp_sfx : _SIMD_FP, + ("f32",): _SIMD_FP32, + ("f64",): _SIMD_FP64, + all_sfx : _SIMD_ALL +} +for target_name, npyv in targets.items(): + simd_width = npyv.simd if npyv else '' + pretty_name = target_name.split('__') # multi-target separator + if len(pretty_name) > 1: + # multi-target + pretty_name = f"({' '.join(pretty_name)})" + else: + pretty_name = pretty_name[0] + + skip = "" + skip_sfx = dict() + if not npyv: + skip = f"target '{pretty_name}' isn't supported by current machine" + elif not npyv.simd: + skip = f"target '{pretty_name}' isn't supported by NPYV" + elif not npyv.simd_f64: + skip_sfx["f64"] = f"target '{pretty_name}' doesn't support double-precision" + + for sfxes, cls in tests_registry.items(): + for sfx in sfxes: + skip_m = skip_sfx.get(sfx, skip) + inhr = (cls,) + attr = dict(npyv=targets[target_name], sfx=sfx, target_name=target_name) + tcls = type(f"Test{cls.__name__}_{simd_width}_{target_name}_{sfx}", inhr, attr) + if skip_m: + pytest.mark.skip(reason=skip_m)(tcls) + globals()[tcls.__name__] = tcls diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_simd_module.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_simd_module.py new file mode 100644 index 0000000..3d71088 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_simd_module.py @@ -0,0 +1,97 @@ +import pytest +from numpy.core._simd import targets +""" +This testing unit only for checking the sanity of common functionality, +therefore all we need is just to take one submodule that represents any +of enabled SIMD extensions to run the test on it and the second submodule +required to run only one check related to the possibility of mixing +the data types among each submodule. +""" +npyvs = [npyv_mod for npyv_mod in targets.values() if npyv_mod and npyv_mod.simd] +npyv, npyv2 = (npyvs + [None, None])[:2] + +unsigned_sfx = ["u8", "u16", "u32", "u64"] +signed_sfx = ["s8", "s16", "s32", "s64"] +fp_sfx = ["f32"] +if npyv and npyv.simd_f64: + fp_sfx.append("f64") + +int_sfx = unsigned_sfx + signed_sfx +all_sfx = unsigned_sfx + int_sfx + +@pytest.mark.skipif(not npyv, reason="could not find any SIMD extension with NPYV support") +class Test_SIMD_MODULE: + + @pytest.mark.parametrize('sfx', all_sfx) + def test_num_lanes(self, sfx): + nlanes = getattr(npyv, "nlanes_" + sfx) + vector = getattr(npyv, "setall_" + sfx)(1) + assert len(vector) == nlanes + + @pytest.mark.parametrize('sfx', all_sfx) + def test_type_name(self, sfx): + vector = getattr(npyv, "setall_" + sfx)(1) + assert vector.__name__ == "npyv_" + sfx + + def test_raises(self): + a, b = [npyv.setall_u32(1)]*2 + for sfx in all_sfx: + vcb = lambda intrin: getattr(npyv, f"{intrin}_{sfx}") + pytest.raises(TypeError, vcb("add"), a) + pytest.raises(TypeError, vcb("add"), a, b, a) + pytest.raises(TypeError, vcb("setall")) + pytest.raises(TypeError, vcb("setall"), [1]) + pytest.raises(TypeError, vcb("load"), 1) + pytest.raises(ValueError, vcb("load"), [1]) + pytest.raises(ValueError, vcb("store"), [1], getattr(npyv, f"reinterpret_{sfx}_u32")(a)) + + @pytest.mark.skipif(not npyv2, reason=( + "could not find a second SIMD extension with NPYV support" + )) + def test_nomix(self): + # mix among submodules isn't allowed + a = npyv.setall_u32(1) + a2 = npyv2.setall_u32(1) + pytest.raises(TypeError, npyv.add_u32, a2, a2) + pytest.raises(TypeError, npyv2.add_u32, a, a) + + @pytest.mark.parametrize('sfx', unsigned_sfx) + def test_unsigned_overflow(self, sfx): + nlanes = getattr(npyv, "nlanes_" + sfx) + maxu = (1 << int(sfx[1:])) - 1 + maxu_72 = (1 << 72) - 1 + lane = getattr(npyv, "setall_" + sfx)(maxu_72)[0] + assert lane == maxu + lanes = getattr(npyv, "load_" + sfx)([maxu_72] * nlanes) + assert lanes == [maxu] * nlanes + lane = getattr(npyv, "setall_" + sfx)(-1)[0] + assert lane == maxu + lanes = getattr(npyv, "load_" + sfx)([-1] * nlanes) + assert lanes == [maxu] * nlanes + + @pytest.mark.parametrize('sfx', signed_sfx) + def test_signed_overflow(self, sfx): + nlanes = getattr(npyv, "nlanes_" + sfx) + maxs_72 = (1 << 71) - 1 + lane = getattr(npyv, "setall_" + sfx)(maxs_72)[0] + assert lane == -1 + lanes = getattr(npyv, "load_" + sfx)([maxs_72] * nlanes) + assert lanes == [-1] * nlanes + mins_72 = -1 << 71 + lane = getattr(npyv, "setall_" + sfx)(mins_72)[0] + assert lane == 0 + lanes = getattr(npyv, "load_" + sfx)([mins_72] * nlanes) + assert lanes == [0] * nlanes + + def test_truncate_f32(self): + f32 = npyv.setall_f32(0.1)[0] + assert f32 != 0.1 + assert round(f32, 1) == 0.1 + + def test_compare(self): + data_range = range(0, npyv.nlanes_u32) + vdata = npyv.load_u32(data_range) + assert vdata == list(data_range) + assert vdata == tuple(data_range) + for i in data_range: + assert vdata[i] == data_range[i] diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_ufunc.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_ufunc.py new file mode 100644 index 0000000..6c56e63 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_ufunc.py @@ -0,0 +1,2510 @@ +import warnings +import itertools +import sys + +import pytest + +import numpy as np +import numpy.core._umath_tests as umt +import numpy.linalg._umath_linalg as uml +import numpy.core._operand_flag_tests as opflag_tests +import numpy.core._rational_tests as _rational_tests +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_array_equal, + assert_almost_equal, assert_array_almost_equal, assert_no_warnings, + assert_allclose, HAS_REFCOUNT, suppress_warnings + ) +from numpy.testing._private.utils import requires_memory +from numpy.compat import pickle + + +UNARY_UFUNCS = [obj for obj in np.core.umath.__dict__.values() + if isinstance(obj, np.ufunc)] +UNARY_OBJECT_UFUNCS = [uf for uf in UNARY_UFUNCS if "O->O" in uf.types] + + +class TestUfuncKwargs: + def test_kwarg_exact(self): + assert_raises(TypeError, np.add, 1, 2, castingx='safe') + assert_raises(TypeError, np.add, 1, 2, dtypex=int) + assert_raises(TypeError, np.add, 1, 2, extobjx=[4096]) + assert_raises(TypeError, np.add, 1, 2, outx=None) + assert_raises(TypeError, np.add, 1, 2, sigx='ii->i') + assert_raises(TypeError, np.add, 1, 2, signaturex='ii->i') + assert_raises(TypeError, np.add, 1, 2, subokx=False) + assert_raises(TypeError, np.add, 1, 2, wherex=[True]) + + def test_sig_signature(self): + assert_raises(TypeError, np.add, 1, 2, sig='ii->i', + signature='ii->i') + + def test_sig_dtype(self): + assert_raises(TypeError, np.add, 1, 2, sig='ii->i', + dtype=int) + assert_raises(TypeError, np.add, 1, 2, signature='ii->i', + dtype=int) + + def test_extobj_refcount(self): + # Should not segfault with USE_DEBUG. + assert_raises(TypeError, np.add, 1, 2, extobj=[4096], parrot=True) + + +class TestUfuncGenericLoops: + """Test generic loops. + + The loops to be tested are: + + PyUFunc_ff_f_As_dd_d + PyUFunc_ff_f + PyUFunc_dd_d + PyUFunc_gg_g + PyUFunc_FF_F_As_DD_D + PyUFunc_DD_D + PyUFunc_FF_F + PyUFunc_GG_G + PyUFunc_OO_O + PyUFunc_OO_O_method + PyUFunc_f_f_As_d_d + PyUFunc_d_d + PyUFunc_f_f + PyUFunc_g_g + PyUFunc_F_F_As_D_D + PyUFunc_F_F + PyUFunc_D_D + PyUFunc_G_G + PyUFunc_O_O + PyUFunc_O_O_method + PyUFunc_On_Om + + Where: + + f -- float + d -- double + g -- long double + F -- complex float + D -- complex double + G -- complex long double + O -- python object + + It is difficult to assure that each of these loops is entered from the + Python level as the special cased loops are a moving target and the + corresponding types are architecture dependent. We probably need to + define C level testing ufuncs to get at them. For the time being, I've + just looked at the signatures registered in the build directory to find + relevant functions. + + """ + np_dtypes = [ + (np.single, np.single), (np.single, np.double), + (np.csingle, np.csingle), (np.csingle, np.cdouble), + (np.double, np.double), (np.longdouble, np.longdouble), + (np.cdouble, np.cdouble), (np.clongdouble, np.clongdouble)] + + @pytest.mark.parametrize('input_dtype,output_dtype', np_dtypes) + def test_unary_PyUFunc(self, input_dtype, output_dtype, f=np.exp, x=0, y=1): + xs = np.full(10, input_dtype(x), dtype=output_dtype) + ys = f(xs)[::2] + assert_allclose(ys, y) + assert_equal(ys.dtype, output_dtype) + + def f2(x, y): + return x**y + + @pytest.mark.parametrize('input_dtype,output_dtype', np_dtypes) + def test_binary_PyUFunc(self, input_dtype, output_dtype, f=f2, x=0, y=1): + xs = np.full(10, input_dtype(x), dtype=output_dtype) + ys = f(xs, xs)[::2] + assert_allclose(ys, y) + assert_equal(ys.dtype, output_dtype) + + # class to use in testing object method loops + class foo: + def conjugate(self): + return np.bool_(1) + + def logical_xor(self, obj): + return np.bool_(1) + + def test_unary_PyUFunc_O_O(self): + x = np.ones(10, dtype=object) + assert_(np.all(np.abs(x) == 1)) + + def test_unary_PyUFunc_O_O_method_simple(self, foo=foo): + x = np.full(10, foo(), dtype=object) + assert_(np.all(np.conjugate(x) == True)) + + def test_binary_PyUFunc_OO_O(self): + x = np.ones(10, dtype=object) + assert_(np.all(np.add(x, x) == 2)) + + def test_binary_PyUFunc_OO_O_method(self, foo=foo): + x = np.full(10, foo(), dtype=object) + assert_(np.all(np.logical_xor(x, x))) + + def test_binary_PyUFunc_On_Om_method(self, foo=foo): + x = np.full((10, 2, 3), foo(), dtype=object) + assert_(np.all(np.logical_xor(x, x))) + + def test_python_complex_conjugate(self): + # The conjugate ufunc should fall back to calling the method: + arr = np.array([1+2j, 3-4j], dtype="O") + assert isinstance(arr[0], complex) + res = np.conjugate(arr) + assert res.dtype == np.dtype("O") + assert_array_equal(res, np.array([1-2j, 3+4j], dtype="O")) + + @pytest.mark.parametrize("ufunc", UNARY_OBJECT_UFUNCS) + def test_unary_PyUFunc_O_O_method_full(self, ufunc): + """Compare the result of the object loop with non-object one""" + val = np.float64(np.pi/4) + + class MyFloat(np.float64): + def __getattr__(self, attr): + try: + return super().__getattr__(attr) + except AttributeError: + return lambda: getattr(np.core.umath, attr)(val) + + # Use 0-D arrays, to ensure the same element call + num_arr = np.array(val, dtype=np.float64) + obj_arr = np.array(MyFloat(val), dtype="O") + + with np.errstate(all="raise"): + try: + res_num = ufunc(num_arr) + except Exception as exc: + with assert_raises(type(exc)): + ufunc(obj_arr) + else: + res_obj = ufunc(obj_arr) + assert_array_almost_equal(res_num.astype("O"), res_obj) + + +def _pickleable_module_global(): + pass + + +class TestUfunc: + def test_pickle(self): + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + assert_(pickle.loads(pickle.dumps(np.sin, + protocol=proto)) is np.sin) + + # Check that ufunc not defined in the top level numpy namespace + # such as numpy.core._rational_tests.test_add can also be pickled + res = pickle.loads(pickle.dumps(_rational_tests.test_add, + protocol=proto)) + assert_(res is _rational_tests.test_add) + + def test_pickle_withstring(self): + astring = (b"cnumpy.core\n_ufunc_reconstruct\np0\n" + b"(S'numpy.core.umath'\np1\nS'cos'\np2\ntp3\nRp4\n.") + assert_(pickle.loads(astring) is np.cos) + + def test_pickle_name_is_qualname(self): + # This tests that a simplification of our ufunc pickle code will + # lead to allowing qualnames as names. Future ufuncs should + # possible add a specific qualname, or a hook into pickling instead + # (dask+numba may benefit). + _pickleable_module_global.ufunc = umt._pickleable_module_global_ufunc + obj = pickle.loads(pickle.dumps(_pickleable_module_global.ufunc)) + assert obj is umt._pickleable_module_global_ufunc + + def test_reduceat_shifting_sum(self): + L = 6 + x = np.arange(L) + idx = np.array(list(zip(np.arange(L - 2), np.arange(L - 2) + 2))).ravel() + assert_array_equal(np.add.reduceat(x, idx)[::2], [1, 3, 5, 7]) + + def test_all_ufunc(self): + """Try to check presence and results of all ufuncs. + + The list of ufuncs comes from generate_umath.py and is as follows: + + ===== ==== ============= =============== ======================== + done args function types notes + ===== ==== ============= =============== ======================== + n 1 conjugate nums + O + n 1 absolute nums + O complex -> real + n 1 negative nums + O + n 1 sign nums + O -> int + n 1 invert bool + ints + O flts raise an error + n 1 degrees real + M cmplx raise an error + n 1 radians real + M cmplx raise an error + n 1 arccos flts + M + n 1 arccosh flts + M + n 1 arcsin flts + M + n 1 arcsinh flts + M + n 1 arctan flts + M + n 1 arctanh flts + M + n 1 cos flts + M + n 1 sin flts + M + n 1 tan flts + M + n 1 cosh flts + M + n 1 sinh flts + M + n 1 tanh flts + M + n 1 exp flts + M + n 1 expm1 flts + M + n 1 log flts + M + n 1 log10 flts + M + n 1 log1p flts + M + n 1 sqrt flts + M real x < 0 raises error + n 1 ceil real + M + n 1 trunc real + M + n 1 floor real + M + n 1 fabs real + M + n 1 rint flts + M + n 1 isnan flts -> bool + n 1 isinf flts -> bool + n 1 isfinite flts -> bool + n 1 signbit real -> bool + n 1 modf real -> (frac, int) + n 1 logical_not bool + nums + M -> bool + n 2 left_shift ints + O flts raise an error + n 2 right_shift ints + O flts raise an error + n 2 add bool + nums + O boolean + is || + n 2 subtract bool + nums + O boolean - is ^ + n 2 multiply bool + nums + O boolean * is & + n 2 divide nums + O + n 2 floor_divide nums + O + n 2 true_divide nums + O bBhH -> f, iIlLqQ -> d + n 2 fmod nums + M + n 2 power nums + O + n 2 greater bool + nums + O -> bool + n 2 greater_equal bool + nums + O -> bool + n 2 less bool + nums + O -> bool + n 2 less_equal bool + nums + O -> bool + n 2 equal bool + nums + O -> bool + n 2 not_equal bool + nums + O -> bool + n 2 logical_and bool + nums + M -> bool + n 2 logical_or bool + nums + M -> bool + n 2 logical_xor bool + nums + M -> bool + n 2 maximum bool + nums + O + n 2 minimum bool + nums + O + n 2 bitwise_and bool + ints + O flts raise an error + n 2 bitwise_or bool + ints + O flts raise an error + n 2 bitwise_xor bool + ints + O flts raise an error + n 2 arctan2 real + M + n 2 remainder ints + real + O + n 2 hypot real + M + ===== ==== ============= =============== ======================== + + Types other than those listed will be accepted, but they are cast to + the smallest compatible type for which the function is defined. The + casting rules are: + + bool -> int8 -> float32 + ints -> double + + """ + pass + + # from include/numpy/ufuncobject.h + size_inferred = 2 + can_ignore = 4 + def test_signature0(self): + # the arguments to test_signature are: nin, nout, core_signature + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 2, 1, "(i),(i)->()") + assert_equal(enabled, 1) + assert_equal(num_dims, (1, 1, 0)) + assert_equal(ixs, (0, 0)) + assert_equal(flags, (self.size_inferred,)) + assert_equal(sizes, (-1,)) + + def test_signature1(self): + # empty core signature; treat as plain ufunc (with trivial core) + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 2, 1, "(),()->()") + assert_equal(enabled, 0) + assert_equal(num_dims, (0, 0, 0)) + assert_equal(ixs, ()) + assert_equal(flags, ()) + assert_equal(sizes, ()) + + def test_signature2(self): + # more complicated names for variables + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 2, 1, "(i1,i2),(J_1)->(_kAB)") + assert_equal(enabled, 1) + assert_equal(num_dims, (2, 1, 1)) + assert_equal(ixs, (0, 1, 2, 3)) + assert_equal(flags, (self.size_inferred,)*4) + assert_equal(sizes, (-1, -1, -1, -1)) + + def test_signature3(self): + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 2, 1, u"(i1, i12), (J_1)->(i12, i2)") + assert_equal(enabled, 1) + assert_equal(num_dims, (2, 1, 2)) + assert_equal(ixs, (0, 1, 2, 1, 3)) + assert_equal(flags, (self.size_inferred,)*4) + assert_equal(sizes, (-1, -1, -1, -1)) + + def test_signature4(self): + # matrix_multiply signature from _umath_tests + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 2, 1, "(n,k),(k,m)->(n,m)") + assert_equal(enabled, 1) + assert_equal(num_dims, (2, 2, 2)) + assert_equal(ixs, (0, 1, 1, 2, 0, 2)) + assert_equal(flags, (self.size_inferred,)*3) + assert_equal(sizes, (-1, -1, -1)) + + def test_signature5(self): + # matmul signature from _umath_tests + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 2, 1, "(n?,k),(k,m?)->(n?,m?)") + assert_equal(enabled, 1) + assert_equal(num_dims, (2, 2, 2)) + assert_equal(ixs, (0, 1, 1, 2, 0, 2)) + assert_equal(flags, (self.size_inferred | self.can_ignore, + self.size_inferred, + self.size_inferred | self.can_ignore)) + assert_equal(sizes, (-1, -1, -1)) + + def test_signature6(self): + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 1, 1, "(3)->()") + assert_equal(enabled, 1) + assert_equal(num_dims, (1, 0)) + assert_equal(ixs, (0,)) + assert_equal(flags, (0,)) + assert_equal(sizes, (3,)) + + def test_signature7(self): + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 3, 1, "(3),(03,3),(n)->(9)") + assert_equal(enabled, 1) + assert_equal(num_dims, (1, 2, 1, 1)) + assert_equal(ixs, (0, 0, 0, 1, 2)) + assert_equal(flags, (0, self.size_inferred, 0)) + assert_equal(sizes, (3, -1, 9)) + + def test_signature8(self): + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 3, 1, "(3?),(3?,3?),(n)->(9)") + assert_equal(enabled, 1) + assert_equal(num_dims, (1, 2, 1, 1)) + assert_equal(ixs, (0, 0, 0, 1, 2)) + assert_equal(flags, (self.can_ignore, self.size_inferred, 0)) + assert_equal(sizes, (3, -1, 9)) + + def test_signature9(self): + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 1, 1, "( 3) -> ( )") + assert_equal(enabled, 1) + assert_equal(num_dims, (1, 0)) + assert_equal(ixs, (0,)) + assert_equal(flags, (0,)) + assert_equal(sizes, (3,)) + + def test_signature10(self): + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 3, 1, "( 3? ) , (3? , 3?) ,(n )-> ( 9)") + assert_equal(enabled, 1) + assert_equal(num_dims, (1, 2, 1, 1)) + assert_equal(ixs, (0, 0, 0, 1, 2)) + assert_equal(flags, (self.can_ignore, self.size_inferred, 0)) + assert_equal(sizes, (3, -1, 9)) + + def test_signature_failure_extra_parenthesis(self): + with assert_raises(ValueError): + umt.test_signature(2, 1, "((i)),(i)->()") + + def test_signature_failure_mismatching_parenthesis(self): + with assert_raises(ValueError): + umt.test_signature(2, 1, "(i),)i(->()") + + def test_signature_failure_signature_missing_input_arg(self): + with assert_raises(ValueError): + umt.test_signature(2, 1, "(i),->()") + + def test_signature_failure_signature_missing_output_arg(self): + with assert_raises(ValueError): + umt.test_signature(2, 2, "(i),(i)->()") + + def test_get_signature(self): + assert_equal(umt.inner1d.signature, "(i),(i)->()") + + def test_forced_sig(self): + a = 0.5*np.arange(3, dtype='f8') + assert_equal(np.add(a, 0.5), [0.5, 1, 1.5]) + with pytest.warns(DeprecationWarning): + assert_equal(np.add(a, 0.5, sig='i', casting='unsafe'), [0, 0, 1]) + assert_equal(np.add(a, 0.5, sig='ii->i', casting='unsafe'), [0, 0, 1]) + with pytest.warns(DeprecationWarning): + assert_equal(np.add(a, 0.5, sig=('i4',), casting='unsafe'), + [0, 0, 1]) + assert_equal(np.add(a, 0.5, sig=('i4', 'i4', 'i4'), + casting='unsafe'), [0, 0, 1]) + + b = np.zeros((3,), dtype='f8') + np.add(a, 0.5, out=b) + assert_equal(b, [0.5, 1, 1.5]) + b[:] = 0 + with pytest.warns(DeprecationWarning): + np.add(a, 0.5, sig='i', out=b, casting='unsafe') + assert_equal(b, [0, 0, 1]) + b[:] = 0 + np.add(a, 0.5, sig='ii->i', out=b, casting='unsafe') + assert_equal(b, [0, 0, 1]) + b[:] = 0 + with pytest.warns(DeprecationWarning): + np.add(a, 0.5, sig=('i4',), out=b, casting='unsafe') + assert_equal(b, [0, 0, 1]) + b[:] = 0 + np.add(a, 0.5, sig=('i4', 'i4', 'i4'), out=b, casting='unsafe') + assert_equal(b, [0, 0, 1]) + + def test_signature_all_None(self): + # signature all None, is an acceptable alternative (since 1.21) + # to not providing a signature. + res1 = np.add([3], [4], sig=(None, None, None)) + res2 = np.add([3], [4]) + assert_array_equal(res1, res2) + res1 = np.maximum([3], [4], sig=(None, None, None)) + res2 = np.maximum([3], [4]) + assert_array_equal(res1, res2) + + with pytest.raises(TypeError): + # special case, that would be deprecated anyway, so errors: + np.add(3, 4, signature=(None,)) + + def test_signature_dtype_type(self): + # Since that will be the normal behaviour (past NumPy 1.21) + # we do support the types already: + float_dtype = type(np.dtype(np.float64)) + np.add(3, 4, signature=(float_dtype, float_dtype, None)) + + @pytest.mark.parametrize("casting", ["unsafe", "same_kind", "safe"]) + def test_partial_signature_mismatch(self, casting): + # If the second argument matches already, no need to specify it: + res = np.ldexp(np.float32(1.), np.int_(2), dtype="d") + assert res.dtype == "d" + res = np.ldexp(np.float32(1.), np.int_(2), signature=(None, None, "d")) + assert res.dtype == "d" + + # ldexp only has a loop for long input as second argument, overriding + # the output cannot help with that (no matter the casting) + with pytest.raises(TypeError): + np.ldexp(1., np.uint64(3), dtype="d") + with pytest.raises(TypeError): + np.ldexp(1., np.uint64(3), signature=(None, None, "d")) + + def test_use_output_signature_for_all_arguments(self): + # Test that providing only `dtype=` or `signature=(None, None, dtype)` + # is sufficient if falling back to a homogeneous signature works. + # In this case, the `intp, intp -> intp` loop is chosen. + res = np.power(1.5, 2.8, dtype=np.intp, casting="unsafe") + assert res == 1 # the cast happens first. + res = np.power(1.5, 2.8, signature=(None, None, np.intp), + casting="unsafe") + assert res == 1 + with pytest.raises(TypeError): + # the unsafe casting would normally cause errors though: + np.power(1.5, 2.8, dtype=np.intp) + + def test_signature_errors(self): + with pytest.raises(TypeError, + match="the signature object to ufunc must be a string or"): + np.add(3, 4, signature=123.) # neither a string nor a tuple + + with pytest.raises(ValueError): + # bad symbols that do not translate to dtypes + np.add(3, 4, signature="%^->#") + + with pytest.raises(ValueError): + np.add(3, 4, signature=b"ii-i") # incomplete and byte string + + with pytest.raises(ValueError): + np.add(3, 4, signature="ii>i") # incomplete string + + with pytest.raises(ValueError): + np.add(3, 4, signature=(None, "f8")) # bad length + + with pytest.raises(UnicodeDecodeError): + np.add(3, 4, signature=b"\xff\xff->i") + + def test_forced_dtype_times(self): + # Signatures only set the type numbers (not the actual loop dtypes) + # so using `M` in a signature/dtype should generally work: + a = np.array(['2010-01-02', '1999-03-14', '1833-03'], dtype='>M8[D]') + np.maximum(a, a, dtype="M") + np.maximum.reduce(a, dtype="M") + + arr = np.arange(10, dtype="m8[s]") + np.add(arr, arr, dtype="m") + np.maximum(arr, arr, dtype="m") + + @pytest.mark.parametrize("ufunc", [np.add, np.sqrt]) + def test_cast_safety(self, ufunc): + """Basic test for the safest casts, because ufuncs inner loops can + indicate a cast-safety as well (which is normally always "no"). + """ + def call_ufunc(arr, **kwargs): + return ufunc(*(arr,) * ufunc.nin, **kwargs) + + arr = np.array([1., 2., 3.], dtype=np.float32) + arr_bs = arr.astype(arr.dtype.newbyteorder()) + expected = call_ufunc(arr) + # Normally, a "no" cast: + res = call_ufunc(arr, casting="no") + assert_array_equal(expected, res) + # Byte-swapping is not allowed with "no" though: + with pytest.raises(TypeError): + call_ufunc(arr_bs, casting="no") + + # But is allowed with "equiv": + res = call_ufunc(arr_bs, casting="equiv") + assert_array_equal(expected, res) + + # Casting to float64 is safe, but not equiv: + with pytest.raises(TypeError): + call_ufunc(arr_bs, dtype=np.float64, casting="equiv") + + # but it is safe cast: + res = call_ufunc(arr_bs, dtype=np.float64, casting="safe") + expected = call_ufunc(arr.astype(np.float64)) # upcast + assert_array_equal(expected, res) + + def test_true_divide(self): + a = np.array(10) + b = np.array(20) + tgt = np.array(0.5) + + for tc in 'bhilqBHILQefdgFDG': + dt = np.dtype(tc) + aa = a.astype(dt) + bb = b.astype(dt) + + # Check result value and dtype. + for x, y in itertools.product([aa, -aa], [bb, -bb]): + + # Check with no output type specified + if tc in 'FDG': + tgt = complex(x)/complex(y) + else: + tgt = float(x)/float(y) + + res = np.true_divide(x, y) + rtol = max(np.finfo(res).resolution, 1e-15) + assert_allclose(res, tgt, rtol=rtol) + + if tc in 'bhilqBHILQ': + assert_(res.dtype.name == 'float64') + else: + assert_(res.dtype.name == dt.name ) + + # Check with output type specified. This also checks for the + # incorrect casts in issue gh-3484 because the unary '-' does + # not change types, even for unsigned types, Hence casts in the + # ufunc from signed to unsigned and vice versa will lead to + # errors in the values. + for tcout in 'bhilqBHILQ': + dtout = np.dtype(tcout) + assert_raises(TypeError, np.true_divide, x, y, dtype=dtout) + + for tcout in 'efdg': + dtout = np.dtype(tcout) + if tc in 'FDG': + # Casting complex to float is not allowed + assert_raises(TypeError, np.true_divide, x, y, dtype=dtout) + else: + tgt = float(x)/float(y) + rtol = max(np.finfo(dtout).resolution, 1e-15) + # The value of tiny for double double is NaN + with suppress_warnings() as sup: + sup.filter(UserWarning) + if not np.isnan(np.finfo(dtout).tiny): + atol = max(np.finfo(dtout).tiny, 3e-308) + else: + atol = 3e-308 + # Some test values result in invalid for float16. + with np.errstate(invalid='ignore'): + res = np.true_divide(x, y, dtype=dtout) + if not np.isfinite(res) and tcout == 'e': + continue + assert_allclose(res, tgt, rtol=rtol, atol=atol) + assert_(res.dtype.name == dtout.name) + + for tcout in 'FDG': + dtout = np.dtype(tcout) + tgt = complex(x)/complex(y) + rtol = max(np.finfo(dtout).resolution, 1e-15) + # The value of tiny for double double is NaN + with suppress_warnings() as sup: + sup.filter(UserWarning) + if not np.isnan(np.finfo(dtout).tiny): + atol = max(np.finfo(dtout).tiny, 3e-308) + else: + atol = 3e-308 + res = np.true_divide(x, y, dtype=dtout) + if not np.isfinite(res): + continue + assert_allclose(res, tgt, rtol=rtol, atol=atol) + assert_(res.dtype.name == dtout.name) + + # Check booleans + a = np.ones((), dtype=np.bool_) + res = np.true_divide(a, a) + assert_(res == 1.0) + assert_(res.dtype.name == 'float64') + res = np.true_divide(~a, a) + assert_(res == 0.0) + assert_(res.dtype.name == 'float64') + + def test_sum_stability(self): + a = np.ones(500, dtype=np.float32) + assert_almost_equal((a / 10.).sum() - a.size / 10., 0, 4) + + a = np.ones(500, dtype=np.float64) + assert_almost_equal((a / 10.).sum() - a.size / 10., 0, 13) + + def test_sum(self): + for dt in (int, np.float16, np.float32, np.float64, np.longdouble): + for v in (0, 1, 2, 7, 8, 9, 15, 16, 19, 127, + 128, 1024, 1235): + tgt = dt(v * (v + 1) / 2) + d = np.arange(1, v + 1, dtype=dt) + + # warning if sum overflows, which it does in float16 + overflow = not np.isfinite(tgt) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert_almost_equal(np.sum(d), tgt) + assert_equal(len(w), 1 * overflow) + + assert_almost_equal(np.sum(d[::-1]), tgt) + assert_equal(len(w), 2 * overflow) + + d = np.ones(500, dtype=dt) + assert_almost_equal(np.sum(d[::2]), 250.) + assert_almost_equal(np.sum(d[1::2]), 250.) + assert_almost_equal(np.sum(d[::3]), 167.) + assert_almost_equal(np.sum(d[1::3]), 167.) + assert_almost_equal(np.sum(d[::-2]), 250.) + assert_almost_equal(np.sum(d[-1::-2]), 250.) + assert_almost_equal(np.sum(d[::-3]), 167.) + assert_almost_equal(np.sum(d[-1::-3]), 167.) + # sum with first reduction entry != 0 + d = np.ones((1,), dtype=dt) + d += d + assert_almost_equal(d, 2.) + + def test_sum_complex(self): + for dt in (np.complex64, np.complex128, np.clongdouble): + for v in (0, 1, 2, 7, 8, 9, 15, 16, 19, 127, + 128, 1024, 1235): + tgt = dt(v * (v + 1) / 2) - dt((v * (v + 1) / 2) * 1j) + d = np.empty(v, dtype=dt) + d.real = np.arange(1, v + 1) + d.imag = -np.arange(1, v + 1) + assert_almost_equal(np.sum(d), tgt) + assert_almost_equal(np.sum(d[::-1]), tgt) + + d = np.ones(500, dtype=dt) + 1j + assert_almost_equal(np.sum(d[::2]), 250. + 250j) + assert_almost_equal(np.sum(d[1::2]), 250. + 250j) + assert_almost_equal(np.sum(d[::3]), 167. + 167j) + assert_almost_equal(np.sum(d[1::3]), 167. + 167j) + assert_almost_equal(np.sum(d[::-2]), 250. + 250j) + assert_almost_equal(np.sum(d[-1::-2]), 250. + 250j) + assert_almost_equal(np.sum(d[::-3]), 167. + 167j) + assert_almost_equal(np.sum(d[-1::-3]), 167. + 167j) + # sum with first reduction entry != 0 + d = np.ones((1,), dtype=dt) + 1j + d += d + assert_almost_equal(d, 2. + 2j) + + def test_sum_initial(self): + # Integer, single axis + assert_equal(np.sum([3], initial=2), 5) + + # Floating point + assert_almost_equal(np.sum([0.2], initial=0.1), 0.3) + + # Multiple non-adjacent axes + assert_equal(np.sum(np.ones((2, 3, 5), dtype=np.int64), axis=(0, 2), initial=2), + [12, 12, 12]) + + def test_sum_where(self): + # More extensive tests done in test_reduction_with_where. + assert_equal(np.sum([[1., 2.], [3., 4.]], where=[True, False]), 4.) + assert_equal(np.sum([[1., 2.], [3., 4.]], axis=0, initial=5., + where=[True, False]), [9., 5.]) + + def test_inner1d(self): + a = np.arange(6).reshape((2, 3)) + assert_array_equal(umt.inner1d(a, a), np.sum(a*a, axis=-1)) + a = np.arange(6) + assert_array_equal(umt.inner1d(a, a), np.sum(a*a)) + + def test_broadcast(self): + msg = "broadcast" + a = np.arange(4).reshape((2, 1, 2)) + b = np.arange(4).reshape((1, 2, 2)) + assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg) + msg = "extend & broadcast loop dimensions" + b = np.arange(4).reshape((2, 2)) + assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg) + # Broadcast in core dimensions should fail + a = np.arange(8).reshape((4, 2)) + b = np.arange(4).reshape((4, 1)) + assert_raises(ValueError, umt.inner1d, a, b) + # Extend core dimensions should fail + a = np.arange(8).reshape((4, 2)) + b = np.array(7) + assert_raises(ValueError, umt.inner1d, a, b) + # Broadcast should fail + a = np.arange(2).reshape((2, 1, 1)) + b = np.arange(3).reshape((3, 1, 1)) + assert_raises(ValueError, umt.inner1d, a, b) + + # Writing to a broadcasted array with overlap should warn, gh-2705 + a = np.arange(2) + b = np.arange(4).reshape((2, 2)) + u, v = np.broadcast_arrays(a, b) + assert_equal(u.strides[0], 0) + x = u + v + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + u += v + assert_equal(len(w), 1) + assert_(x[0, 0] != u[0, 0]) + + # Output reduction should not be allowed. + # See gh-15139 + a = np.arange(6).reshape(3, 2) + b = np.ones(2) + out = np.empty(()) + assert_raises(ValueError, umt.inner1d, a, b, out) + out2 = np.empty(3) + c = umt.inner1d(a, b, out2) + assert_(c is out2) + + def test_out_broadcasts(self): + # For ufuncs and gufuncs (not for reductions), we currently allow + # the output to cause broadcasting of the input arrays. + # both along dimensions with shape 1 and dimensions which do not + # exist at all in the inputs. + arr = np.arange(3).reshape(1, 3) + out = np.empty((5, 4, 3)) + np.add(arr, arr, out=out) + assert (out == np.arange(3) * 2).all() + + # The same holds for gufuncs (gh-16484) + umt.inner1d(arr, arr, out=out) + # the result would be just a scalar `5`, but is broadcast fully: + assert (out == 5).all() + + def test_type_cast(self): + msg = "type cast" + a = np.arange(6, dtype='short').reshape((2, 3)) + assert_array_equal(umt.inner1d(a, a), np.sum(a*a, axis=-1), + err_msg=msg) + msg = "type cast on one argument" + a = np.arange(6).reshape((2, 3)) + b = a + 0.1 + assert_array_almost_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), + err_msg=msg) + + def test_endian(self): + msg = "big endian" + a = np.arange(6, dtype='>i4').reshape((2, 3)) + assert_array_equal(umt.inner1d(a, a), np.sum(a*a, axis=-1), + err_msg=msg) + msg = "little endian" + a = np.arange(6, dtype='()' + inner1d = umt.inner1d + a = np.arange(27.).reshape((3, 3, 3)) + b = np.arange(10., 19.).reshape((3, 1, 3)) + # basic tests on inputs (outputs tested below with matrix_multiply). + c = inner1d(a, b) + assert_array_equal(c, (a * b).sum(-1)) + # default + c = inner1d(a, b, axes=[(-1,), (-1,), ()]) + assert_array_equal(c, (a * b).sum(-1)) + # integers ok for single axis. + c = inner1d(a, b, axes=[-1, -1, ()]) + assert_array_equal(c, (a * b).sum(-1)) + # mix fine + c = inner1d(a, b, axes=[(-1,), -1, ()]) + assert_array_equal(c, (a * b).sum(-1)) + # can omit last axis. + c = inner1d(a, b, axes=[-1, -1]) + assert_array_equal(c, (a * b).sum(-1)) + # can pass in other types of integer (with __index__ protocol) + c = inner1d(a, b, axes=[np.int8(-1), np.array(-1, dtype=np.int32)]) + assert_array_equal(c, (a * b).sum(-1)) + # swap some axes + c = inner1d(a, b, axes=[0, 0]) + assert_array_equal(c, (a * b).sum(0)) + c = inner1d(a, b, axes=[0, 2]) + assert_array_equal(c, (a.transpose(1, 2, 0) * b).sum(-1)) + # Check errors for improperly constructed axes arguments. + # should have list. + assert_raises(TypeError, inner1d, a, b, axes=-1) + # needs enough elements + assert_raises(ValueError, inner1d, a, b, axes=[-1]) + # should pass in indices. + assert_raises(TypeError, inner1d, a, b, axes=[-1.0, -1.0]) + assert_raises(TypeError, inner1d, a, b, axes=[(-1.0,), -1]) + assert_raises(TypeError, inner1d, a, b, axes=[None, 1]) + # cannot pass an index unless there is only one dimension + # (output is wrong in this case) + assert_raises(TypeError, inner1d, a, b, axes=[-1, -1, -1]) + # or pass in generally the wrong number of axes + assert_raises(ValueError, inner1d, a, b, axes=[-1, -1, (-1,)]) + assert_raises(ValueError, inner1d, a, b, axes=[-1, (-2, -1), ()]) + # axes need to have same length. + assert_raises(ValueError, inner1d, a, b, axes=[0, 1]) + + # matrix_multiply signature: '(m,n),(n,p)->(m,p)' + mm = umt.matrix_multiply + a = np.arange(12).reshape((2, 3, 2)) + b = np.arange(8).reshape((2, 2, 2, 1)) + 1 + # Sanity check. + c = mm(a, b) + assert_array_equal(c, np.matmul(a, b)) + # Default axes. + c = mm(a, b, axes=[(-2, -1), (-2, -1), (-2, -1)]) + assert_array_equal(c, np.matmul(a, b)) + # Default with explicit axes. + c = mm(a, b, axes=[(1, 2), (2, 3), (2, 3)]) + assert_array_equal(c, np.matmul(a, b)) + # swap some axes. + c = mm(a, b, axes=[(0, -1), (1, 2), (-2, -1)]) + assert_array_equal(c, np.matmul(a.transpose(1, 0, 2), + b.transpose(0, 3, 1, 2))) + # Default with output array. + c = np.empty((2, 2, 3, 1)) + d = mm(a, b, out=c, axes=[(1, 2), (2, 3), (2, 3)]) + assert_(c is d) + assert_array_equal(c, np.matmul(a, b)) + # Transposed output array + c = np.empty((1, 2, 2, 3)) + d = mm(a, b, out=c, axes=[(-2, -1), (-2, -1), (3, 0)]) + assert_(c is d) + assert_array_equal(c, np.matmul(a, b).transpose(3, 0, 1, 2)) + # Check errors for improperly constructed axes arguments. + # wrong argument + assert_raises(TypeError, mm, a, b, axis=1) + # axes should be list + assert_raises(TypeError, mm, a, b, axes=1) + assert_raises(TypeError, mm, a, b, axes=((-2, -1), (-2, -1), (-2, -1))) + # list needs to have right length + assert_raises(ValueError, mm, a, b, axes=[]) + assert_raises(ValueError, mm, a, b, axes=[(-2, -1)]) + # list should contain tuples for multiple axes + assert_raises(TypeError, mm, a, b, axes=[-1, -1, -1]) + assert_raises(TypeError, mm, a, b, axes=[(-2, -1), (-2, -1), -1]) + assert_raises(TypeError, + mm, a, b, axes=[[-2, -1], [-2, -1], [-2, -1]]) + assert_raises(TypeError, + mm, a, b, axes=[(-2, -1), (-2, -1), [-2, -1]]) + assert_raises(TypeError, mm, a, b, axes=[(-2, -1), (-2, -1), None]) + # tuples should not have duplicated values + assert_raises(ValueError, mm, a, b, axes=[(-2, -1), (-2, -1), (-2, -2)]) + # arrays should have enough axes. + z = np.zeros((2, 2)) + assert_raises(ValueError, mm, z, z[0]) + assert_raises(ValueError, mm, z, z, out=z[:, 0]) + assert_raises(ValueError, mm, z[1], z, axes=[0, 1]) + assert_raises(ValueError, mm, z, z, out=z[0], axes=[0, 1]) + # Regular ufuncs should not accept axes. + assert_raises(TypeError, np.add, 1., 1., axes=[0]) + # should be able to deal with bad unrelated kwargs. + assert_raises(TypeError, mm, z, z, axes=[0, 1], parrot=True) + + def test_axis_argument(self): + # inner1d signature: '(i),(i)->()' + inner1d = umt.inner1d + a = np.arange(27.).reshape((3, 3, 3)) + b = np.arange(10., 19.).reshape((3, 1, 3)) + c = inner1d(a, b) + assert_array_equal(c, (a * b).sum(-1)) + c = inner1d(a, b, axis=-1) + assert_array_equal(c, (a * b).sum(-1)) + out = np.zeros_like(c) + d = inner1d(a, b, axis=-1, out=out) + assert_(d is out) + assert_array_equal(d, c) + c = inner1d(a, b, axis=0) + assert_array_equal(c, (a * b).sum(0)) + # Sanity checks on innerwt and cumsum. + a = np.arange(6).reshape((2, 3)) + b = np.arange(10, 16).reshape((2, 3)) + w = np.arange(20, 26).reshape((2, 3)) + assert_array_equal(umt.innerwt(a, b, w, axis=0), + np.sum(a * b * w, axis=0)) + assert_array_equal(umt.cumsum(a, axis=0), np.cumsum(a, axis=0)) + assert_array_equal(umt.cumsum(a, axis=-1), np.cumsum(a, axis=-1)) + out = np.empty_like(a) + b = umt.cumsum(a, out=out, axis=0) + assert_(out is b) + assert_array_equal(b, np.cumsum(a, axis=0)) + b = umt.cumsum(a, out=out, axis=1) + assert_(out is b) + assert_array_equal(b, np.cumsum(a, axis=-1)) + # Check errors. + # Cannot pass in both axis and axes. + assert_raises(TypeError, inner1d, a, b, axis=0, axes=[0, 0]) + # Not an integer. + assert_raises(TypeError, inner1d, a, b, axis=[0]) + # more than 1 core dimensions. + mm = umt.matrix_multiply + assert_raises(TypeError, mm, a, b, axis=1) + # Output wrong size in axis. + out = np.empty((1, 2, 3), dtype=a.dtype) + assert_raises(ValueError, umt.cumsum, a, out=out, axis=0) + # Regular ufuncs should not accept axis. + assert_raises(TypeError, np.add, 1., 1., axis=0) + + def test_keepdims_argument(self): + # inner1d signature: '(i),(i)->()' + inner1d = umt.inner1d + a = np.arange(27.).reshape((3, 3, 3)) + b = np.arange(10., 19.).reshape((3, 1, 3)) + c = inner1d(a, b) + assert_array_equal(c, (a * b).sum(-1)) + c = inner1d(a, b, keepdims=False) + assert_array_equal(c, (a * b).sum(-1)) + c = inner1d(a, b, keepdims=True) + assert_array_equal(c, (a * b).sum(-1, keepdims=True)) + out = np.zeros_like(c) + d = inner1d(a, b, keepdims=True, out=out) + assert_(d is out) + assert_array_equal(d, c) + # Now combined with axis and axes. + c = inner1d(a, b, axis=-1, keepdims=False) + assert_array_equal(c, (a * b).sum(-1, keepdims=False)) + c = inner1d(a, b, axis=-1, keepdims=True) + assert_array_equal(c, (a * b).sum(-1, keepdims=True)) + c = inner1d(a, b, axis=0, keepdims=False) + assert_array_equal(c, (a * b).sum(0, keepdims=False)) + c = inner1d(a, b, axis=0, keepdims=True) + assert_array_equal(c, (a * b).sum(0, keepdims=True)) + c = inner1d(a, b, axes=[(-1,), (-1,), ()], keepdims=False) + assert_array_equal(c, (a * b).sum(-1)) + c = inner1d(a, b, axes=[(-1,), (-1,), (-1,)], keepdims=True) + assert_array_equal(c, (a * b).sum(-1, keepdims=True)) + c = inner1d(a, b, axes=[0, 0], keepdims=False) + assert_array_equal(c, (a * b).sum(0)) + c = inner1d(a, b, axes=[0, 0, 0], keepdims=True) + assert_array_equal(c, (a * b).sum(0, keepdims=True)) + c = inner1d(a, b, axes=[0, 2], keepdims=False) + assert_array_equal(c, (a.transpose(1, 2, 0) * b).sum(-1)) + c = inner1d(a, b, axes=[0, 2], keepdims=True) + assert_array_equal(c, (a.transpose(1, 2, 0) * b).sum(-1, + keepdims=True)) + c = inner1d(a, b, axes=[0, 2, 2], keepdims=True) + assert_array_equal(c, (a.transpose(1, 2, 0) * b).sum(-1, + keepdims=True)) + c = inner1d(a, b, axes=[0, 2, 0], keepdims=True) + assert_array_equal(c, (a * b.transpose(2, 0, 1)).sum(0, keepdims=True)) + # Hardly useful, but should work. + c = inner1d(a, b, axes=[0, 2, 1], keepdims=True) + assert_array_equal(c, (a.transpose(1, 0, 2) * b.transpose(0, 2, 1)) + .sum(1, keepdims=True)) + # Check with two core dimensions. + a = np.eye(3) * np.arange(4.)[:, np.newaxis, np.newaxis] + expected = uml.det(a) + c = uml.det(a, keepdims=False) + assert_array_equal(c, expected) + c = uml.det(a, keepdims=True) + assert_array_equal(c, expected[:, np.newaxis, np.newaxis]) + a = np.eye(3) * np.arange(4.)[:, np.newaxis, np.newaxis] + expected_s, expected_l = uml.slogdet(a) + cs, cl = uml.slogdet(a, keepdims=False) + assert_array_equal(cs, expected_s) + assert_array_equal(cl, expected_l) + cs, cl = uml.slogdet(a, keepdims=True) + assert_array_equal(cs, expected_s[:, np.newaxis, np.newaxis]) + assert_array_equal(cl, expected_l[:, np.newaxis, np.newaxis]) + # Sanity check on innerwt. + a = np.arange(6).reshape((2, 3)) + b = np.arange(10, 16).reshape((2, 3)) + w = np.arange(20, 26).reshape((2, 3)) + assert_array_equal(umt.innerwt(a, b, w, keepdims=True), + np.sum(a * b * w, axis=-1, keepdims=True)) + assert_array_equal(umt.innerwt(a, b, w, axis=0, keepdims=True), + np.sum(a * b * w, axis=0, keepdims=True)) + # Check errors. + # Not a boolean + assert_raises(TypeError, inner1d, a, b, keepdims='true') + # More than 1 core dimension, and core output dimensions. + mm = umt.matrix_multiply + assert_raises(TypeError, mm, a, b, keepdims=True) + assert_raises(TypeError, mm, a, b, keepdims=False) + # Regular ufuncs should not accept keepdims. + assert_raises(TypeError, np.add, 1., 1., keepdims=False) + + def test_innerwt(self): + a = np.arange(6).reshape((2, 3)) + b = np.arange(10, 16).reshape((2, 3)) + w = np.arange(20, 26).reshape((2, 3)) + assert_array_equal(umt.innerwt(a, b, w), np.sum(a*b*w, axis=-1)) + a = np.arange(100, 124).reshape((2, 3, 4)) + b = np.arange(200, 224).reshape((2, 3, 4)) + w = np.arange(300, 324).reshape((2, 3, 4)) + assert_array_equal(umt.innerwt(a, b, w), np.sum(a*b*w, axis=-1)) + + def test_innerwt_empty(self): + """Test generalized ufunc with zero-sized operands""" + a = np.array([], dtype='f8') + b = np.array([], dtype='f8') + w = np.array([], dtype='f8') + assert_array_equal(umt.innerwt(a, b, w), np.sum(a*b*w, axis=-1)) + + def test_cross1d(self): + """Test with fixed-sized signature.""" + a = np.eye(3) + assert_array_equal(umt.cross1d(a, a), np.zeros((3, 3))) + out = np.zeros((3, 3)) + result = umt.cross1d(a[0], a, out) + assert_(result is out) + assert_array_equal(result, np.vstack((np.zeros(3), a[2], -a[1]))) + assert_raises(ValueError, umt.cross1d, np.eye(4), np.eye(4)) + assert_raises(ValueError, umt.cross1d, a, np.arange(4.)) + # Wrong output core dimension. + assert_raises(ValueError, umt.cross1d, a, np.arange(3.), np.zeros((3, 4))) + # Wrong output broadcast dimension (see gh-15139). + assert_raises(ValueError, umt.cross1d, a, np.arange(3.), np.zeros(3)) + + def test_can_ignore_signature(self): + # Comparing the effects of ? in signature: + # matrix_multiply: (m,n),(n,p)->(m,p) # all must be there. + # matmul: (m?,n),(n,p?)->(m?,p?) # allow missing m, p. + mat = np.arange(12).reshape((2, 3, 2)) + single_vec = np.arange(2) + col_vec = single_vec[:, np.newaxis] + col_vec_array = np.arange(8).reshape((2, 2, 2, 1)) + 1 + # matrix @ single column vector with proper dimension + mm_col_vec = umt.matrix_multiply(mat, col_vec) + # matmul does the same thing + matmul_col_vec = umt.matmul(mat, col_vec) + assert_array_equal(matmul_col_vec, mm_col_vec) + # matrix @ vector without dimension making it a column vector. + # matrix multiply fails -> missing core dim. + assert_raises(ValueError, umt.matrix_multiply, mat, single_vec) + # matmul mimicker passes, and returns a vector. + matmul_col = umt.matmul(mat, single_vec) + assert_array_equal(matmul_col, mm_col_vec.squeeze()) + # Now with a column array: same as for column vector, + # broadcasting sensibly. + mm_col_vec = umt.matrix_multiply(mat, col_vec_array) + matmul_col_vec = umt.matmul(mat, col_vec_array) + assert_array_equal(matmul_col_vec, mm_col_vec) + # As above, but for row vector + single_vec = np.arange(3) + row_vec = single_vec[np.newaxis, :] + row_vec_array = np.arange(24).reshape((4, 2, 1, 1, 3)) + 1 + # row vector @ matrix + mm_row_vec = umt.matrix_multiply(row_vec, mat) + matmul_row_vec = umt.matmul(row_vec, mat) + assert_array_equal(matmul_row_vec, mm_row_vec) + # single row vector @ matrix + assert_raises(ValueError, umt.matrix_multiply, single_vec, mat) + matmul_row = umt.matmul(single_vec, mat) + assert_array_equal(matmul_row, mm_row_vec.squeeze()) + # row vector array @ matrix + mm_row_vec = umt.matrix_multiply(row_vec_array, mat) + matmul_row_vec = umt.matmul(row_vec_array, mat) + assert_array_equal(matmul_row_vec, mm_row_vec) + # Now for vector combinations + # row vector @ column vector + col_vec = row_vec.T + col_vec_array = row_vec_array.swapaxes(-2, -1) + mm_row_col_vec = umt.matrix_multiply(row_vec, col_vec) + matmul_row_col_vec = umt.matmul(row_vec, col_vec) + assert_array_equal(matmul_row_col_vec, mm_row_col_vec) + # single row vector @ single col vector + assert_raises(ValueError, umt.matrix_multiply, single_vec, single_vec) + matmul_row_col = umt.matmul(single_vec, single_vec) + assert_array_equal(matmul_row_col, mm_row_col_vec.squeeze()) + # row vector array @ matrix + mm_row_col_array = umt.matrix_multiply(row_vec_array, col_vec_array) + matmul_row_col_array = umt.matmul(row_vec_array, col_vec_array) + assert_array_equal(matmul_row_col_array, mm_row_col_array) + # Finally, check that things are *not* squeezed if one gives an + # output. + out = np.zeros_like(mm_row_col_array) + out = umt.matrix_multiply(row_vec_array, col_vec_array, out=out) + assert_array_equal(out, mm_row_col_array) + out[:] = 0 + out = umt.matmul(row_vec_array, col_vec_array, out=out) + assert_array_equal(out, mm_row_col_array) + # And check one cannot put missing dimensions back. + out = np.zeros_like(mm_row_col_vec) + assert_raises(ValueError, umt.matrix_multiply, single_vec, single_vec, + out) + # But fine for matmul, since it is just a broadcast. + out = umt.matmul(single_vec, single_vec, out) + assert_array_equal(out, mm_row_col_vec.squeeze()) + + def test_matrix_multiply(self): + self.compare_matrix_multiply_results(np.int64) + self.compare_matrix_multiply_results(np.double) + + def test_matrix_multiply_umath_empty(self): + res = umt.matrix_multiply(np.ones((0, 10)), np.ones((10, 0))) + assert_array_equal(res, np.zeros((0, 0))) + res = umt.matrix_multiply(np.ones((10, 0)), np.ones((0, 10))) + assert_array_equal(res, np.zeros((10, 10))) + + def compare_matrix_multiply_results(self, tp): + d1 = np.array(np.random.rand(2, 3, 4), dtype=tp) + d2 = np.array(np.random.rand(2, 3, 4), dtype=tp) + msg = "matrix multiply on type %s" % d1.dtype.name + + def permute_n(n): + if n == 1: + return ([0],) + ret = () + base = permute_n(n-1) + for perm in base: + for i in range(n): + new = perm + [n-1] + new[n-1] = new[i] + new[i] = n-1 + ret += (new,) + return ret + + def slice_n(n): + if n == 0: + return ((),) + ret = () + base = slice_n(n-1) + for sl in base: + ret += (sl+(slice(None),),) + ret += (sl+(slice(0, 1),),) + return ret + + def broadcastable(s1, s2): + return s1 == s2 or s1 == 1 or s2 == 1 + + permute_3 = permute_n(3) + slice_3 = slice_n(3) + ((slice(None, None, -1),)*3,) + + ref = True + for p1 in permute_3: + for p2 in permute_3: + for s1 in slice_3: + for s2 in slice_3: + a1 = d1.transpose(p1)[s1] + a2 = d2.transpose(p2)[s2] + ref = ref and a1.base is not None + ref = ref and a2.base is not None + if (a1.shape[-1] == a2.shape[-2] and + broadcastable(a1.shape[0], a2.shape[0])): + assert_array_almost_equal( + umt.matrix_multiply(a1, a2), + np.sum(a2[..., np.newaxis].swapaxes(-3, -1) * + a1[..., np.newaxis,:], axis=-1), + err_msg=msg + ' %s %s' % (str(a1.shape), + str(a2.shape))) + + assert_equal(ref, True, err_msg="reference check") + + def test_euclidean_pdist(self): + a = np.arange(12, dtype=float).reshape(4, 3) + out = np.empty((a.shape[0] * (a.shape[0] - 1) // 2,), dtype=a.dtype) + umt.euclidean_pdist(a, out) + b = np.sqrt(np.sum((a[:, None] - a)**2, axis=-1)) + b = b[~np.tri(a.shape[0], dtype=bool)] + assert_almost_equal(out, b) + # An output array is required to determine p with signature (n,d)->(p) + assert_raises(ValueError, umt.euclidean_pdist, a) + + def test_cumsum(self): + a = np.arange(10) + result = umt.cumsum(a) + assert_array_equal(result, a.cumsum()) + + def test_object_logical(self): + a = np.array([3, None, True, False, "test", ""], dtype=object) + assert_equal(np.logical_or(a, None), + np.array([x or None for x in a], dtype=object)) + assert_equal(np.logical_or(a, True), + np.array([x or True for x in a], dtype=object)) + assert_equal(np.logical_or(a, 12), + np.array([x or 12 for x in a], dtype=object)) + assert_equal(np.logical_or(a, "blah"), + np.array([x or "blah" for x in a], dtype=object)) + + assert_equal(np.logical_and(a, None), + np.array([x and None for x in a], dtype=object)) + assert_equal(np.logical_and(a, True), + np.array([x and True for x in a], dtype=object)) + assert_equal(np.logical_and(a, 12), + np.array([x and 12 for x in a], dtype=object)) + assert_equal(np.logical_and(a, "blah"), + np.array([x and "blah" for x in a], dtype=object)) + + assert_equal(np.logical_not(a), + np.array([not x for x in a], dtype=object)) + + assert_equal(np.logical_or.reduce(a), 3) + assert_equal(np.logical_and.reduce(a), None) + + def test_object_comparison(self): + class HasComparisons: + def __eq__(self, other): + return '==' + + arr0d = np.array(HasComparisons()) + assert_equal(arr0d == arr0d, True) + assert_equal(np.equal(arr0d, arr0d), True) # normal behavior is a cast + + arr1d = np.array([HasComparisons()]) + assert_equal(arr1d == arr1d, np.array([True])) + assert_equal(np.equal(arr1d, arr1d), np.array([True])) # normal behavior is a cast + assert_equal(np.equal(arr1d, arr1d, dtype=object), np.array(['=='])) + + def test_object_array_reduction(self): + # Reductions on object arrays + a = np.array(['a', 'b', 'c'], dtype=object) + assert_equal(np.sum(a), 'abc') + assert_equal(np.max(a), 'c') + assert_equal(np.min(a), 'a') + a = np.array([True, False, True], dtype=object) + assert_equal(np.sum(a), 2) + assert_equal(np.prod(a), 0) + assert_equal(np.any(a), True) + assert_equal(np.all(a), False) + assert_equal(np.max(a), True) + assert_equal(np.min(a), False) + assert_equal(np.array([[1]], dtype=object).sum(), 1) + assert_equal(np.array([[[1, 2]]], dtype=object).sum((0, 1)), [1, 2]) + assert_equal(np.array([1], dtype=object).sum(initial=1), 2) + assert_equal(np.array([[1], [2, 3]], dtype=object) + .sum(initial=[0], where=[False, True]), [0, 2, 3]) + + def test_object_array_accumulate_inplace(self): + # Checks that in-place accumulates work, see also gh-7402 + arr = np.ones(4, dtype=object) + arr[:] = [[1] for i in range(4)] + # Twice reproduced also for tuples: + np.add.accumulate(arr, out=arr) + np.add.accumulate(arr, out=arr) + assert_array_equal(arr, + np.array([[1]*i for i in [1, 3, 6, 10]], dtype=object), + ) + + # And the same if the axis argument is used + arr = np.ones((2, 4), dtype=object) + arr[0, :] = [[2] for i in range(4)] + np.add.accumulate(arr, out=arr, axis=-1) + np.add.accumulate(arr, out=arr, axis=-1) + assert_array_equal(arr[0, :], + np.array([[2]*i for i in [1, 3, 6, 10]], dtype=object), + ) + + def test_object_array_accumulate_failure(self): + # Typical accumulation on object works as expected: + res = np.add.accumulate(np.array([1, 0, 2], dtype=object)) + assert_array_equal(res, np.array([1, 1, 3], dtype=object)) + # But errors are propagated from the inner-loop if they occur: + with pytest.raises(TypeError): + np.add.accumulate([1, None, 2]) + + def test_object_array_reduceat_inplace(self): + # Checks that in-place reduceats work, see also gh-7465 + arr = np.empty(4, dtype=object) + arr[:] = [[1] for i in range(4)] + out = np.empty(4, dtype=object) + out[:] = [[1] for i in range(4)] + np.add.reduceat(arr, np.arange(4), out=arr) + np.add.reduceat(arr, np.arange(4), out=arr) + assert_array_equal(arr, out) + + # And the same if the axis argument is used + arr = np.ones((2, 4), dtype=object) + arr[0, :] = [[2] for i in range(4)] + out = np.ones((2, 4), dtype=object) + out[0, :] = [[2] for i in range(4)] + np.add.reduceat(arr, np.arange(4), out=arr, axis=-1) + np.add.reduceat(arr, np.arange(4), out=arr, axis=-1) + assert_array_equal(arr, out) + + def test_object_array_reduceat_failure(self): + # Reduceat works as expected when no invalid operation occurs (None is + # not involved in an operation here) + res = np.add.reduceat(np.array([1, None, 2], dtype=object), [1, 2]) + assert_array_equal(res, np.array([None, 2], dtype=object)) + # But errors when None would be involved in an operation: + with pytest.raises(TypeError): + np.add.reduceat([1, None, 2], [0, 2]) + + def test_zerosize_reduction(self): + # Test with default dtype and object dtype + for a in [[], np.array([], dtype=object)]: + assert_equal(np.sum(a), 0) + assert_equal(np.prod(a), 1) + assert_equal(np.any(a), False) + assert_equal(np.all(a), True) + assert_raises(ValueError, np.max, a) + assert_raises(ValueError, np.min, a) + + def test_axis_out_of_bounds(self): + a = np.array([False, False]) + assert_raises(np.AxisError, a.all, axis=1) + a = np.array([False, False]) + assert_raises(np.AxisError, a.all, axis=-2) + + a = np.array([False, False]) + assert_raises(np.AxisError, a.any, axis=1) + a = np.array([False, False]) + assert_raises(np.AxisError, a.any, axis=-2) + + def test_scalar_reduction(self): + # The functions 'sum', 'prod', etc allow specifying axis=0 + # even for scalars + assert_equal(np.sum(3, axis=0), 3) + assert_equal(np.prod(3.5, axis=0), 3.5) + assert_equal(np.any(True, axis=0), True) + assert_equal(np.all(False, axis=0), False) + assert_equal(np.max(3, axis=0), 3) + assert_equal(np.min(2.5, axis=0), 2.5) + + # Check scalar behaviour for ufuncs without an identity + assert_equal(np.power.reduce(3), 3) + + # Make sure that scalars are coming out from this operation + assert_(type(np.prod(np.float32(2.5), axis=0)) is np.float32) + assert_(type(np.sum(np.float32(2.5), axis=0)) is np.float32) + assert_(type(np.max(np.float32(2.5), axis=0)) is np.float32) + assert_(type(np.min(np.float32(2.5), axis=0)) is np.float32) + + # check if scalars/0-d arrays get cast + assert_(type(np.any(0, axis=0)) is np.bool_) + + # assert that 0-d arrays get wrapped + class MyArray(np.ndarray): + pass + a = np.array(1).view(MyArray) + assert_(type(np.any(a)) is MyArray) + + def test_casting_out_param(self): + # Test that it's possible to do casts on output + a = np.ones((200, 100), np.int64) + b = np.ones((200, 100), np.int64) + c = np.ones((200, 100), np.float64) + np.add(a, b, out=c) + assert_equal(c, 2) + + a = np.zeros(65536) + b = np.zeros(65536, dtype=np.float32) + np.subtract(a, 0, out=b) + assert_equal(b, 0) + + def test_where_param(self): + # Test that the where= ufunc parameter works with regular arrays + a = np.arange(7) + b = np.ones(7) + c = np.zeros(7) + np.add(a, b, out=c, where=(a % 2 == 1)) + assert_equal(c, [0, 2, 0, 4, 0, 6, 0]) + + a = np.arange(4).reshape(2, 2) + 2 + np.power(a, [2, 3], out=a, where=[[0, 1], [1, 0]]) + assert_equal(a, [[2, 27], [16, 5]]) + # Broadcasting the where= parameter + np.subtract(a, 2, out=a, where=[True, False]) + assert_equal(a, [[0, 27], [14, 5]]) + + def test_where_param_buffer_output(self): + # This test is temporarily skipped because it requires + # adding masking features to the nditer to work properly + + # With casting on output + a = np.ones(10, np.int64) + b = np.ones(10, np.int64) + c = 1.5 * np.ones(10, np.float64) + np.add(a, b, out=c, where=[1, 0, 0, 1, 0, 0, 1, 1, 1, 0]) + assert_equal(c, [2, 1.5, 1.5, 2, 1.5, 1.5, 2, 2, 2, 1.5]) + + def test_where_param_alloc(self): + # With casting and allocated output + a = np.array([1], dtype=np.int64) + m = np.array([True], dtype=bool) + assert_equal(np.sqrt(a, where=m), [1]) + + # No casting and allocated output + a = np.array([1], dtype=np.float64) + m = np.array([True], dtype=bool) + assert_equal(np.sqrt(a, where=m), [1]) + + def test_where_with_broadcasting(self): + # See gh-17198 + a = np.random.random((5000, 4)) + b = np.random.random((5000, 1)) + + where = a > 0.3 + out = np.full_like(a, 0) + np.less(a, b, where=where, out=out) + b_where = np.broadcast_to(b, a.shape)[where] + assert_array_equal((a[where] < b_where), out[where].astype(bool)) + assert not out[~where].any() # outside mask, out remains all 0 + + def check_identityless_reduction(self, a): + # np.minimum.reduce is an identityless reduction + + # Verify that it sees the zero at various positions + a[...] = 1 + a[1, 0, 0] = 0 + assert_equal(np.minimum.reduce(a, axis=None), 0) + assert_equal(np.minimum.reduce(a, axis=(0, 1)), [0, 1, 1, 1]) + assert_equal(np.minimum.reduce(a, axis=(0, 2)), [0, 1, 1]) + assert_equal(np.minimum.reduce(a, axis=(1, 2)), [1, 0]) + assert_equal(np.minimum.reduce(a, axis=0), + [[0, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=1), + [[1, 1, 1, 1], [0, 1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=2), + [[1, 1, 1], [0, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=()), a) + + a[...] = 1 + a[0, 1, 0] = 0 + assert_equal(np.minimum.reduce(a, axis=None), 0) + assert_equal(np.minimum.reduce(a, axis=(0, 1)), [0, 1, 1, 1]) + assert_equal(np.minimum.reduce(a, axis=(0, 2)), [1, 0, 1]) + assert_equal(np.minimum.reduce(a, axis=(1, 2)), [0, 1]) + assert_equal(np.minimum.reduce(a, axis=0), + [[1, 1, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=1), + [[0, 1, 1, 1], [1, 1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=2), + [[1, 0, 1], [1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=()), a) + + a[...] = 1 + a[0, 0, 1] = 0 + assert_equal(np.minimum.reduce(a, axis=None), 0) + assert_equal(np.minimum.reduce(a, axis=(0, 1)), [1, 0, 1, 1]) + assert_equal(np.minimum.reduce(a, axis=(0, 2)), [0, 1, 1]) + assert_equal(np.minimum.reduce(a, axis=(1, 2)), [0, 1]) + assert_equal(np.minimum.reduce(a, axis=0), + [[1, 0, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=1), + [[1, 0, 1, 1], [1, 1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=2), + [[0, 1, 1], [1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=()), a) + + @requires_memory(6 * 1024**3) + def test_identityless_reduction_huge_array(self): + # Regression test for gh-20921 (copying identity incorrectly failed) + arr = np.zeros((2, 2**31), 'uint8') + arr[:, 0] = [1, 3] + arr[:, -1] = [4, 1] + res = np.maximum.reduce(arr, axis=0) + del arr + assert res[0] == 3 + assert res[-1] == 4 + + def test_identityless_reduction_corder(self): + a = np.empty((2, 3, 4), order='C') + self.check_identityless_reduction(a) + + def test_identityless_reduction_forder(self): + a = np.empty((2, 3, 4), order='F') + self.check_identityless_reduction(a) + + def test_identityless_reduction_otherorder(self): + a = np.empty((2, 4, 3), order='C').swapaxes(1, 2) + self.check_identityless_reduction(a) + + def test_identityless_reduction_noncontig(self): + a = np.empty((3, 5, 4), order='C').swapaxes(1, 2) + a = a[1:, 1:, 1:] + self.check_identityless_reduction(a) + + def test_identityless_reduction_noncontig_unaligned(self): + a = np.empty((3*4*5*8 + 1,), dtype='i1') + a = a[1:].view(dtype='f8') + a.shape = (3, 4, 5) + a = a[1:, 1:, 1:] + self.check_identityless_reduction(a) + + def test_initial_reduction(self): + # np.minimum.reduce is an identityless reduction + + # For cases like np.maximum(np.abs(...), initial=0) + # More generally, a supremum over non-negative numbers. + assert_equal(np.maximum.reduce([], initial=0), 0) + + # For cases like reduction of an empty array over the reals. + assert_equal(np.minimum.reduce([], initial=np.inf), np.inf) + assert_equal(np.maximum.reduce([], initial=-np.inf), -np.inf) + + # Random tests + assert_equal(np.minimum.reduce([5], initial=4), 4) + assert_equal(np.maximum.reduce([4], initial=5), 5) + assert_equal(np.maximum.reduce([5], initial=4), 5) + assert_equal(np.minimum.reduce([4], initial=5), 4) + + # Check initial=None raises ValueError for both types of ufunc reductions + assert_raises(ValueError, np.minimum.reduce, [], initial=None) + assert_raises(ValueError, np.add.reduce, [], initial=None) + + # Check that np._NoValue gives default behavior. + assert_equal(np.add.reduce([], initial=np._NoValue), 0) + + # Check that initial kwarg behaves as intended for dtype=object + a = np.array([10], dtype=object) + res = np.add.reduce(a, initial=5) + assert_equal(res, 15) + + @pytest.mark.parametrize('axis', (0, 1, None)) + @pytest.mark.parametrize('where', (np.array([False, True, True]), + np.array([[True], [False], [True]]), + np.array([[True, False, False], + [False, True, False], + [False, True, True]]))) + def test_reduction_with_where(self, axis, where): + a = np.arange(9.).reshape(3, 3) + a_copy = a.copy() + a_check = np.zeros_like(a) + np.positive(a, out=a_check, where=where) + + res = np.add.reduce(a, axis=axis, where=where) + check = a_check.sum(axis) + assert_equal(res, check) + # Check we do not overwrite elements of a internally. + assert_array_equal(a, a_copy) + + @pytest.mark.parametrize(('axis', 'where'), + ((0, np.array([True, False, True])), + (1, [True, True, False]), + (None, True))) + @pytest.mark.parametrize('initial', (-np.inf, 5.)) + def test_reduction_with_where_and_initial(self, axis, where, initial): + a = np.arange(9.).reshape(3, 3) + a_copy = a.copy() + a_check = np.full(a.shape, -np.inf) + np.positive(a, out=a_check, where=where) + + res = np.maximum.reduce(a, axis=axis, where=where, initial=initial) + check = a_check.max(axis, initial=initial) + assert_equal(res, check) + + def test_reduction_where_initial_needed(self): + a = np.arange(9.).reshape(3, 3) + m = [False, True, False] + assert_raises(ValueError, np.maximum.reduce, a, where=m) + + def test_identityless_reduction_nonreorderable(self): + a = np.array([[8.0, 2.0, 2.0], [1.0, 0.5, 0.25]]) + + res = np.divide.reduce(a, axis=0) + assert_equal(res, [8.0, 4.0, 8.0]) + + res = np.divide.reduce(a, axis=1) + assert_equal(res, [2.0, 8.0]) + + res = np.divide.reduce(a, axis=()) + assert_equal(res, a) + + assert_raises(ValueError, np.divide.reduce, a, axis=(0, 1)) + + def test_reduce_zero_axis(self): + # If we have a n x m array and do a reduction with axis=1, then we are + # doing n reductions, and each reduction takes an m-element array. For + # a reduction operation without an identity, then: + # n > 0, m > 0: fine + # n = 0, m > 0: fine, doing 0 reductions of m-element arrays + # n > 0, m = 0: can't reduce a 0-element array, ValueError + # n = 0, m = 0: can't reduce a 0-element array, ValueError (for + # consistency with the above case) + # This test doesn't actually look at return values, it just checks to + # make sure that error we get an error in exactly those cases where we + # expect one, and assumes the calculations themselves are done + # correctly. + + def ok(f, *args, **kwargs): + f(*args, **kwargs) + + def err(f, *args, **kwargs): + assert_raises(ValueError, f, *args, **kwargs) + + def t(expect, func, n, m): + expect(func, np.zeros((n, m)), axis=1) + expect(func, np.zeros((m, n)), axis=0) + expect(func, np.zeros((n // 2, n // 2, m)), axis=2) + expect(func, np.zeros((n // 2, m, n // 2)), axis=1) + expect(func, np.zeros((n, m // 2, m // 2)), axis=(1, 2)) + expect(func, np.zeros((m // 2, n, m // 2)), axis=(0, 2)) + expect(func, np.zeros((m // 3, m // 3, m // 3, + n // 2, n // 2)), + axis=(0, 1, 2)) + # Check what happens if the inner (resp. outer) dimensions are a + # mix of zero and non-zero: + expect(func, np.zeros((10, m, n)), axis=(0, 1)) + expect(func, np.zeros((10, n, m)), axis=(0, 2)) + expect(func, np.zeros((m, 10, n)), axis=0) + expect(func, np.zeros((10, m, n)), axis=1) + expect(func, np.zeros((10, n, m)), axis=2) + + # np.maximum is just an arbitrary ufunc with no reduction identity + assert_equal(np.maximum.identity, None) + t(ok, np.maximum.reduce, 30, 30) + t(ok, np.maximum.reduce, 0, 30) + t(err, np.maximum.reduce, 30, 0) + t(err, np.maximum.reduce, 0, 0) + err(np.maximum.reduce, []) + np.maximum.reduce(np.zeros((0, 0)), axis=()) + + # all of the combinations are fine for a reduction that has an + # identity + t(ok, np.add.reduce, 30, 30) + t(ok, np.add.reduce, 0, 30) + t(ok, np.add.reduce, 30, 0) + t(ok, np.add.reduce, 0, 0) + np.add.reduce([]) + np.add.reduce(np.zeros((0, 0)), axis=()) + + # OTOH, accumulate always makes sense for any combination of n and m, + # because it maps an m-element array to an m-element array. These + # tests are simpler because accumulate doesn't accept multiple axes. + for uf in (np.maximum, np.add): + uf.accumulate(np.zeros((30, 0)), axis=0) + uf.accumulate(np.zeros((0, 30)), axis=0) + uf.accumulate(np.zeros((30, 30)), axis=0) + uf.accumulate(np.zeros((0, 0)), axis=0) + + def test_safe_casting(self): + # In old versions of numpy, in-place operations used the 'unsafe' + # casting rules. In versions >= 1.10, 'same_kind' is the + # default and an exception is raised instead of a warning. + # when 'same_kind' is not satisfied. + a = np.array([1, 2, 3], dtype=int) + # Non-in-place addition is fine + assert_array_equal(assert_no_warnings(np.add, a, 1.1), + [2.1, 3.1, 4.1]) + assert_raises(TypeError, np.add, a, 1.1, out=a) + + def add_inplace(a, b): + a += b + + assert_raises(TypeError, add_inplace, a, 1.1) + # Make sure that explicitly overriding the exception is allowed: + assert_no_warnings(np.add, a, 1.1, out=a, casting="unsafe") + assert_array_equal(a, [2, 3, 4]) + + def test_ufunc_custom_out(self): + # Test ufunc with built in input types and custom output type + + a = np.array([0, 1, 2], dtype='i8') + b = np.array([0, 1, 2], dtype='i8') + c = np.empty(3, dtype=_rational_tests.rational) + + # Output must be specified so numpy knows what + # ufunc signature to look for + result = _rational_tests.test_add(a, b, c) + target = np.array([0, 2, 4], dtype=_rational_tests.rational) + assert_equal(result, target) + + # The new resolution means that we can (usually) find custom loops + # as long as they match exactly: + result = _rational_tests.test_add(a, b) + assert_equal(result, target) + + # This works even more generally, so long the default common-dtype + # promoter works out: + result = _rational_tests.test_add(a, b.astype(np.uint16), out=c) + assert_equal(result, target) + + # But, it can be fooled, e.g. (use scalars, which forces legacy + # type resolution to kick in, which then fails): + with assert_raises(TypeError): + _rational_tests.test_add(a, np.uint16(2)) + + def test_operand_flags(self): + a = np.arange(16, dtype='l').reshape(4, 4) + b = np.arange(9, dtype='l').reshape(3, 3) + opflag_tests.inplace_add(a[:-1, :-1], b) + assert_equal(a, np.array([[0, 2, 4, 3], [7, 9, 11, 7], + [14, 16, 18, 11], [12, 13, 14, 15]], dtype='l')) + + a = np.array(0) + opflag_tests.inplace_add(a, 3) + assert_equal(a, 3) + opflag_tests.inplace_add(a, [3, 4]) + assert_equal(a, 10) + + def test_struct_ufunc(self): + import numpy.core._struct_ufunc_tests as struct_ufunc + + a = np.array([(1, 2, 3)], dtype='u8,u8,u8') + b = np.array([(1, 2, 3)], dtype='u8,u8,u8') + + result = struct_ufunc.add_triplet(a, b) + assert_equal(result, np.array([(2, 4, 6)], dtype='u8,u8,u8')) + assert_raises(RuntimeError, struct_ufunc.register_fail) + + def test_custom_ufunc(self): + a = np.array( + [_rational_tests.rational(1, 2), + _rational_tests.rational(1, 3), + _rational_tests.rational(1, 4)], + dtype=_rational_tests.rational) + b = np.array( + [_rational_tests.rational(1, 2), + _rational_tests.rational(1, 3), + _rational_tests.rational(1, 4)], + dtype=_rational_tests.rational) + + result = _rational_tests.test_add_rationals(a, b) + expected = np.array( + [_rational_tests.rational(1), + _rational_tests.rational(2, 3), + _rational_tests.rational(1, 2)], + dtype=_rational_tests.rational) + assert_equal(result, expected) + + def test_custom_ufunc_forced_sig(self): + # gh-9351 - looking for a non-first userloop would previously hang + with assert_raises(TypeError): + np.multiply(_rational_tests.rational(1), 1, + signature=(_rational_tests.rational, int, None)) + + def test_custom_array_like(self): + + class MyThing: + __array_priority__ = 1000 + + rmul_count = 0 + getitem_count = 0 + + def __init__(self, shape): + self.shape = shape + + def __len__(self): + return self.shape[0] + + def __getitem__(self, i): + MyThing.getitem_count += 1 + if not isinstance(i, tuple): + i = (i,) + if len(i) > self.ndim: + raise IndexError("boo") + + return MyThing(self.shape[len(i):]) + + def __rmul__(self, other): + MyThing.rmul_count += 1 + return self + + np.float64(5)*MyThing((3, 3)) + assert_(MyThing.rmul_count == 1, MyThing.rmul_count) + assert_(MyThing.getitem_count <= 2, MyThing.getitem_count) + + def test_inplace_fancy_indexing(self): + + a = np.arange(10) + np.add.at(a, [2, 5, 2], 1) + assert_equal(a, [0, 1, 4, 3, 4, 6, 6, 7, 8, 9]) + + a = np.arange(10) + b = np.array([100, 100, 100]) + np.add.at(a, [2, 5, 2], b) + assert_equal(a, [0, 1, 202, 3, 4, 105, 6, 7, 8, 9]) + + a = np.arange(9).reshape(3, 3) + b = np.array([[100, 100, 100], [200, 200, 200], [300, 300, 300]]) + np.add.at(a, (slice(None), [1, 2, 1]), b) + assert_equal(a, [[0, 201, 102], [3, 404, 205], [6, 607, 308]]) + + a = np.arange(27).reshape(3, 3, 3) + b = np.array([100, 200, 300]) + np.add.at(a, (slice(None), slice(None), [1, 2, 1]), b) + assert_equal(a, + [[[0, 401, 202], + [3, 404, 205], + [6, 407, 208]], + + [[9, 410, 211], + [12, 413, 214], + [15, 416, 217]], + + [[18, 419, 220], + [21, 422, 223], + [24, 425, 226]]]) + + a = np.arange(9).reshape(3, 3) + b = np.array([[100, 100, 100], [200, 200, 200], [300, 300, 300]]) + np.add.at(a, ([1, 2, 1], slice(None)), b) + assert_equal(a, [[0, 1, 2], [403, 404, 405], [206, 207, 208]]) + + a = np.arange(27).reshape(3, 3, 3) + b = np.array([100, 200, 300]) + np.add.at(a, (slice(None), [1, 2, 1], slice(None)), b) + assert_equal(a, + [[[0, 1, 2], + [203, 404, 605], + [106, 207, 308]], + + [[9, 10, 11], + [212, 413, 614], + [115, 216, 317]], + + [[18, 19, 20], + [221, 422, 623], + [124, 225, 326]]]) + + a = np.arange(9).reshape(3, 3) + b = np.array([100, 200, 300]) + np.add.at(a, (0, [1, 2, 1]), b) + assert_equal(a, [[0, 401, 202], [3, 4, 5], [6, 7, 8]]) + + a = np.arange(27).reshape(3, 3, 3) + b = np.array([100, 200, 300]) + np.add.at(a, ([1, 2, 1], 0, slice(None)), b) + assert_equal(a, + [[[0, 1, 2], + [3, 4, 5], + [6, 7, 8]], + + [[209, 410, 611], + [12, 13, 14], + [15, 16, 17]], + + [[118, 219, 320], + [21, 22, 23], + [24, 25, 26]]]) + + a = np.arange(27).reshape(3, 3, 3) + b = np.array([100, 200, 300]) + np.add.at(a, (slice(None), slice(None), slice(None)), b) + assert_equal(a, + [[[100, 201, 302], + [103, 204, 305], + [106, 207, 308]], + + [[109, 210, 311], + [112, 213, 314], + [115, 216, 317]], + + [[118, 219, 320], + [121, 222, 323], + [124, 225, 326]]]) + + a = np.arange(10) + np.negative.at(a, [2, 5, 2]) + assert_equal(a, [0, 1, 2, 3, 4, -5, 6, 7, 8, 9]) + + # Test 0-dim array + a = np.array(0) + np.add.at(a, (), 1) + assert_equal(a, 1) + + assert_raises(IndexError, np.add.at, a, 0, 1) + assert_raises(IndexError, np.add.at, a, [], 1) + + # Test mixed dtypes + a = np.arange(10) + np.power.at(a, [1, 2, 3, 2], 3.5) + assert_equal(a, np.array([0, 1, 4414, 46, 4, 5, 6, 7, 8, 9])) + + # Test boolean indexing and boolean ufuncs + a = np.arange(10) + index = a % 2 == 0 + np.equal.at(a, index, [0, 2, 4, 6, 8]) + assert_equal(a, [1, 1, 1, 3, 1, 5, 1, 7, 1, 9]) + + # Test unary operator + a = np.arange(10, dtype='u4') + np.invert.at(a, [2, 5, 2]) + assert_equal(a, [0, 1, 2, 3, 4, 5 ^ 0xffffffff, 6, 7, 8, 9]) + + # Test empty subspace + orig = np.arange(4) + a = orig[:, None][:, 0:0] + np.add.at(a, [0, 1], 3) + assert_array_equal(orig, np.arange(4)) + + # Test with swapped byte order + index = np.array([1, 2, 1], np.dtype('i').newbyteorder()) + values = np.array([1, 2, 3, 4], np.dtype('f').newbyteorder()) + np.add.at(values, index, 3) + assert_array_equal(values, [1, 8, 6, 4]) + + # Test exception thrown + values = np.array(['a', 1], dtype=object) + assert_raises(TypeError, np.add.at, values, [0, 1], 1) + assert_array_equal(values, np.array(['a', 1], dtype=object)) + + # Test multiple output ufuncs raise error, gh-5665 + assert_raises(ValueError, np.modf.at, np.arange(10), [1]) + + def test_reduce_arguments(self): + f = np.add.reduce + d = np.ones((5,2), dtype=int) + o = np.ones((2,), dtype=d.dtype) + r = o * 5 + assert_equal(f(d), r) + # a, axis=0, dtype=None, out=None, keepdims=False + assert_equal(f(d, axis=0), r) + assert_equal(f(d, 0), r) + assert_equal(f(d, 0, dtype=None), r) + assert_equal(f(d, 0, dtype='i'), r) + assert_equal(f(d, 0, 'i'), r) + assert_equal(f(d, 0, None), r) + assert_equal(f(d, 0, None, out=None), r) + assert_equal(f(d, 0, None, out=o), r) + assert_equal(f(d, 0, None, o), r) + assert_equal(f(d, 0, None, None), r) + assert_equal(f(d, 0, None, None, keepdims=False), r) + assert_equal(f(d, 0, None, None, True), r.reshape((1,) + r.shape)) + assert_equal(f(d, 0, None, None, False, 0), r) + assert_equal(f(d, 0, None, None, False, initial=0), r) + assert_equal(f(d, 0, None, None, False, 0, True), r) + assert_equal(f(d, 0, None, None, False, 0, where=True), r) + # multiple keywords + assert_equal(f(d, axis=0, dtype=None, out=None, keepdims=False), r) + assert_equal(f(d, 0, dtype=None, out=None, keepdims=False), r) + assert_equal(f(d, 0, None, out=None, keepdims=False), r) + assert_equal(f(d, 0, None, out=None, keepdims=False, initial=0, + where=True), r) + + # too little + assert_raises(TypeError, f) + # too much + assert_raises(TypeError, f, d, 0, None, None, False, 0, True, 1) + # invalid axis + assert_raises(TypeError, f, d, "invalid") + assert_raises(TypeError, f, d, axis="invalid") + assert_raises(TypeError, f, d, axis="invalid", dtype=None, + keepdims=True) + # invalid dtype + assert_raises(TypeError, f, d, 0, "invalid") + assert_raises(TypeError, f, d, dtype="invalid") + assert_raises(TypeError, f, d, dtype="invalid", out=None) + # invalid out + assert_raises(TypeError, f, d, 0, None, "invalid") + assert_raises(TypeError, f, d, out="invalid") + assert_raises(TypeError, f, d, out="invalid", dtype=None) + # keepdims boolean, no invalid value + # assert_raises(TypeError, f, d, 0, None, None, "invalid") + # assert_raises(TypeError, f, d, keepdims="invalid", axis=0, dtype=None) + # invalid mix + assert_raises(TypeError, f, d, 0, keepdims="invalid", dtype="invalid", + out=None) + + # invalid keyword + assert_raises(TypeError, f, d, axis=0, dtype=None, invalid=0) + assert_raises(TypeError, f, d, invalid=0) + assert_raises(TypeError, f, d, 0, keepdims=True, invalid="invalid", + out=None) + assert_raises(TypeError, f, d, axis=0, dtype=None, keepdims=True, + out=None, invalid=0) + assert_raises(TypeError, f, d, axis=0, dtype=None, + out=None, invalid=0) + + def test_structured_equal(self): + # https://github.com/numpy/numpy/issues/4855 + + class MyA(np.ndarray): + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + return getattr(ufunc, method)(*(input.view(np.ndarray) + for input in inputs), **kwargs) + a = np.arange(12.).reshape(4,3) + ra = a.view(dtype=('f8,f8,f8')).squeeze() + mra = ra.view(MyA) + + target = np.array([ True, False, False, False], dtype=bool) + assert_equal(np.all(target == (mra == ra[0])), True) + + def test_scalar_equal(self): + # Scalar comparisons should always work, without deprecation warnings. + # even when the ufunc fails. + a = np.array(0.) + b = np.array('a') + assert_(a != b) + assert_(b != a) + assert_(not (a == b)) + assert_(not (b == a)) + + def test_NotImplemented_not_returned(self): + # See gh-5964 and gh-2091. Some of these functions are not operator + # related and were fixed for other reasons in the past. + binary_funcs = [ + np.power, np.add, np.subtract, np.multiply, np.divide, + np.true_divide, np.floor_divide, np.bitwise_and, np.bitwise_or, + np.bitwise_xor, np.left_shift, np.right_shift, np.fmax, + np.fmin, np.fmod, np.hypot, np.logaddexp, np.logaddexp2, + np.maximum, np.minimum, np.mod, + np.greater, np.greater_equal, np.less, np.less_equal, + np.equal, np.not_equal] + + a = np.array('1') + b = 1 + c = np.array([1., 2.]) + for f in binary_funcs: + assert_raises(TypeError, f, a, b) + assert_raises(TypeError, f, c, a) + + @pytest.mark.parametrize("ufunc", + [np.logical_and, np.logical_or]) # logical_xor object loop is bad + @pytest.mark.parametrize("signature", + [(None, None, object), (object, None, None), + (None, object, None)]) + def test_logical_ufuncs_object_signatures(self, ufunc, signature): + a = np.array([True, None, False], dtype=object) + res = ufunc(a, a, signature=signature) + assert res.dtype == object + + @pytest.mark.parametrize("ufunc", + [np.logical_and, np.logical_or, np.logical_xor]) + @pytest.mark.parametrize("signature", + [(bool, None, object), (object, None, bool), + (None, object, bool)]) + def test_logical_ufuncs_mixed_object_signatures(self, ufunc, signature): + # Most mixed signatures fail (except those with bool out, e.g. `OO->?`) + a = np.array([True, None, False]) + with pytest.raises(TypeError): + ufunc(a, a, signature=signature) + + @pytest.mark.parametrize("ufunc", + [np.logical_and, np.logical_or, np.logical_xor]) + def test_logical_ufuncs_support_anything(self, ufunc): + # The logical ufuncs support even input that can't be promoted: + a = np.array(b'1', dtype="V3") + c = np.array([1., 2.]) + assert_array_equal(ufunc(a, c), ufunc([True, True], True)) + assert ufunc.reduce(a) == True + # check that the output has no effect: + out = np.zeros(2, dtype=np.int32) + expected = ufunc([True, True], True).astype(out.dtype) + assert_array_equal(ufunc(a, c, out=out), expected) + out = np.zeros((), dtype=np.int32) + assert ufunc.reduce(a, out=out) == True + # Last check, test reduction when out and a match (the complexity here + # is that the "i,i->?" may seem right, but should not match. + a = np.array([3], dtype="i") + out = np.zeros((), dtype=a.dtype) + assert ufunc.reduce(a, out=out) == 1 + + @pytest.mark.parametrize("ufunc", + [np.logical_and, np.logical_or, np.logical_xor]) + def test_logical_ufuncs_reject_string(self, ufunc): + """ + Logical ufuncs are normally well defined by working with the boolean + equivalent, i.e. casting all inputs to bools should work. + + However, casting strings to bools is *currently* weird, because it + actually uses `bool(int(str))`. Thus we explicitly reject strings. + This test should succeed (and can probably just be removed) as soon as + string to bool casts are well defined in NumPy. + """ + with pytest.raises(TypeError, match="contain a loop with signature"): + ufunc(["1"], ["3"]) + with pytest.raises(TypeError, match="contain a loop with signature"): + ufunc.reduce(["1", "2", "0"]) + + @pytest.mark.parametrize("ufunc", + [np.logical_and, np.logical_or, np.logical_xor]) + def test_logical_ufuncs_out_cast_check(self, ufunc): + a = np.array('1') + c = np.array([1., 2.]) + out = a.copy() + with pytest.raises(TypeError): + # It would be safe, but not equiv casting: + ufunc(a, c, out=out, casting="equiv") + + def test_reducelike_byteorder_resolution(self): + # See gh-20699, byte-order changes need some extra care in the type + # resolution to make the following succeed: + arr_be = np.arange(10, dtype=">i8") + arr_le = np.arange(10, dtype="i + if 'O' in typ or '?' in typ: + continue + inp, out = typ.split('->') + args = [np.ones((3, 3), t) for t in inp] + with warnings.catch_warnings(record=True): + warnings.filterwarnings("always") + res = ufunc(*args) + if isinstance(res, tuple): + outs = tuple(out) + assert len(res) == len(outs) + for r, t in zip(res, outs): + assert r.dtype == np.dtype(t) + else: + assert res.dtype == np.dtype(out) + +@pytest.mark.parametrize('ufunc', [getattr(np, x) for x in dir(np) + if isinstance(getattr(np, x), np.ufunc)]) +def test_ufunc_noncontiguous(ufunc): + ''' + Check that contiguous and non-contiguous calls to ufuncs + have the same results for values in range(9) + ''' + for typ in ufunc.types: + # types is a list of strings like ii->i + if any(set('O?mM') & set(typ)): + # bool, object, datetime are too irregular for this simple test + continue + inp, out = typ.split('->') + args_c = [np.empty(6, t) for t in inp] + args_n = [np.empty(18, t)[::3] for t in inp] + for a in args_c: + a.flat = range(1,7) + for a in args_n: + a.flat = range(1,7) + with warnings.catch_warnings(record=True): + warnings.filterwarnings("always") + res_c = ufunc(*args_c) + res_n = ufunc(*args_n) + if len(out) == 1: + res_c = (res_c,) + res_n = (res_n,) + for c_ar, n_ar in zip(res_c, res_n): + dt = c_ar.dtype + if np.issubdtype(dt, np.floating): + # for floating point results allow a small fuss in comparisons + # since different algorithms (libm vs. intrinsics) can be used + # for different input strides + res_eps = np.finfo(dt).eps + tol = 2*res_eps + assert_allclose(res_c, res_n, atol=tol, rtol=tol) + else: + assert_equal(c_ar, n_ar) + + +@pytest.mark.parametrize('ufunc', [np.sign, np.equal]) +def test_ufunc_warn_with_nan(ufunc): + # issue gh-15127 + # test that calling certain ufuncs with a non-standard `nan` value does not + # emit a warning + # `b` holds a 64 bit signaling nan: the most significant bit of the + # significand is zero. + b = np.array([0x7ff0000000000001], 'i8').view('f8') + assert np.isnan(b) + if ufunc.nin == 1: + ufunc(b) + elif ufunc.nin == 2: + ufunc(b, b.copy()) + else: + raise ValueError('ufunc with more than 2 inputs') + + +@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") +def test_ufunc_casterrors(): + # Tests that casting errors are correctly reported and buffers are + # cleared. + # The following array can be added to itself as an object array, but + # the result cannot be cast to an integer output: + value = 123 # relies on python cache (leak-check will still find it) + arr = np.array([value] * int(np.BUFSIZE * 1.5) + + ["string"] + + [value] * int(1.5 * np.BUFSIZE), dtype=object) + out = np.ones(len(arr), dtype=np.intp) + + count = sys.getrefcount(value) + with pytest.raises(ValueError): + # Output casting failure: + np.add(arr, arr, out=out, casting="unsafe") + + assert count == sys.getrefcount(value) + # output is unchanged after the error, this shows that the iteration + # was aborted (this is not necessarily defined behaviour) + assert out[-1] == 1 + + with pytest.raises(ValueError): + # Input casting failure: + np.add(arr, arr, out=out, dtype=np.intp, casting="unsafe") + + assert count == sys.getrefcount(value) + # output is unchanged after the error, this shows that the iteration + # was aborted (this is not necessarily defined behaviour) + assert out[-1] == 1 + + +def test_trivial_loop_invalid_cast(): + # This tests the fast-path "invalid cast", see gh-19904. + with pytest.raises(TypeError, + match="cast ufunc 'add' input 0"): + # the void dtype definitely cannot cast to double: + np.add(np.array(1, "i,i"), 3, signature="dd->d") + + +@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") +@pytest.mark.parametrize("offset", + [0, np.BUFSIZE//2, int(1.5*np.BUFSIZE)]) +def test_reduce_casterrors(offset): + # Test reporting of casting errors in reductions, we test various + # offsets to where the casting error will occur, since these may occur + # at different places during the reduction procedure. For example + # the first item may be special. + value = 123 # relies on python cache (leak-check will still find it) + arr = np.array([value] * offset + + ["string"] + + [value] * int(1.5 * np.BUFSIZE), dtype=object) + out = np.array(-1, dtype=np.intp) + + count = sys.getrefcount(value) + with pytest.raises(ValueError, match="invalid literal"): + # This is an unsafe cast, but we currently always allow that. + # Note that the double loop is picked, but the cast fails. + np.add.reduce(arr, dtype=np.intp, out=out) + assert count == sys.getrefcount(value) + # If an error occurred during casting, the operation is done at most until + # the error occurs (the result of which would be `value * offset`) and -1 + # if the error happened immediately. + # This does not define behaviour, the output is invalid and thus undefined + assert out[()] < value * offset + + +@pytest.mark.parametrize("method", + [np.add.accumulate, np.add.reduce, + pytest.param(lambda x: np.add.reduceat(x, [0]), id="reduceat"), + pytest.param(lambda x: np.log.at(x, [2]), id="at")]) +def test_ufunc_methods_floaterrors(method): + # adding inf and -inf (or log(-inf) creates an invalid float and warns + arr = np.array([np.inf, 0, -np.inf]) + with np.errstate(all="warn"): + with pytest.warns(RuntimeWarning, match="invalid value"): + method(arr) + + arr = np.array([np.inf, 0, -np.inf]) + with np.errstate(all="raise"): + with pytest.raises(FloatingPointError): + method(arr) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_umath.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_umath.py new file mode 100644 index 0000000..c0b26e7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_umath.py @@ -0,0 +1,3891 @@ +import platform +import warnings +import fnmatch +import itertools +import pytest +import sys +import os +from fractions import Fraction +from functools import reduce + +import numpy.core.umath as ncu +from numpy.core import _umath_tests as ncu_tests +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_raises_regex, + assert_array_equal, assert_almost_equal, assert_array_almost_equal, + assert_array_max_ulp, assert_allclose, assert_no_warnings, suppress_warnings, + _gen_alignment_data, assert_array_almost_equal_nulp + ) + +def get_glibc_version(): + try: + ver = os.confstr('CS_GNU_LIBC_VERSION').rsplit(' ')[1] + except Exception as inst: + ver = '0.0' + + return ver + + +glibcver = get_glibc_version() +glibc_older_than = lambda x: (glibcver != '0.0' and glibcver < x) + +def on_powerpc(): + """ True if we are running on a Power PC platform.""" + return platform.processor() == 'powerpc' or \ + platform.machine().startswith('ppc') + + +def bad_arcsinh(): + """The blocklisted trig functions are not accurate on aarch64 for + complex256. Rather than dig through the actual problem skip the + test. This should be fixed when we can move past glibc2.17 + which is the version in manylinux2014 + """ + x = 1.78e-10 + v1 = np.arcsinh(np.float128(x)) + v2 = np.arcsinh(np.complex256(x)).real + # The eps for float128 is 1-e33, so this is way bigger + return abs((v1 / v2) - 1.0) > 1e-23 + + +class _FilterInvalids: + def setup(self): + self.olderr = np.seterr(invalid='ignore') + + def teardown(self): + np.seterr(**self.olderr) + + +class TestConstants: + def test_pi(self): + assert_allclose(ncu.pi, 3.141592653589793, 1e-15) + + def test_e(self): + assert_allclose(ncu.e, 2.718281828459045, 1e-15) + + def test_euler_gamma(self): + assert_allclose(ncu.euler_gamma, 0.5772156649015329, 1e-15) + + +class TestOut: + def test_out_subok(self): + for subok in (True, False): + a = np.array(0.5) + o = np.empty(()) + + r = np.add(a, 2, o, subok=subok) + assert_(r is o) + r = np.add(a, 2, out=o, subok=subok) + assert_(r is o) + r = np.add(a, 2, out=(o,), subok=subok) + assert_(r is o) + + d = np.array(5.7) + o1 = np.empty(()) + o2 = np.empty((), dtype=np.int32) + + r1, r2 = np.frexp(d, o1, None, subok=subok) + assert_(r1 is o1) + r1, r2 = np.frexp(d, None, o2, subok=subok) + assert_(r2 is o2) + r1, r2 = np.frexp(d, o1, o2, subok=subok) + assert_(r1 is o1) + assert_(r2 is o2) + + r1, r2 = np.frexp(d, out=(o1, None), subok=subok) + assert_(r1 is o1) + r1, r2 = np.frexp(d, out=(None, o2), subok=subok) + assert_(r2 is o2) + r1, r2 = np.frexp(d, out=(o1, o2), subok=subok) + assert_(r1 is o1) + assert_(r2 is o2) + + with assert_raises(TypeError): + # Out argument must be tuple, since there are multiple outputs. + r1, r2 = np.frexp(d, out=o1, subok=subok) + + assert_raises(TypeError, np.add, a, 2, o, o, subok=subok) + assert_raises(TypeError, np.add, a, 2, o, out=o, subok=subok) + assert_raises(TypeError, np.add, a, 2, None, out=o, subok=subok) + assert_raises(ValueError, np.add, a, 2, out=(o, o), subok=subok) + assert_raises(ValueError, np.add, a, 2, out=(), subok=subok) + assert_raises(TypeError, np.add, a, 2, [], subok=subok) + assert_raises(TypeError, np.add, a, 2, out=[], subok=subok) + assert_raises(TypeError, np.add, a, 2, out=([],), subok=subok) + o.flags.writeable = False + assert_raises(ValueError, np.add, a, 2, o, subok=subok) + assert_raises(ValueError, np.add, a, 2, out=o, subok=subok) + assert_raises(ValueError, np.add, a, 2, out=(o,), subok=subok) + + def test_out_wrap_subok(self): + class ArrayWrap(np.ndarray): + __array_priority__ = 10 + + def __new__(cls, arr): + return np.asarray(arr).view(cls).copy() + + def __array_wrap__(self, arr, context): + return arr.view(type(self)) + + for subok in (True, False): + a = ArrayWrap([0.5]) + + r = np.add(a, 2, subok=subok) + if subok: + assert_(isinstance(r, ArrayWrap)) + else: + assert_(type(r) == np.ndarray) + + r = np.add(a, 2, None, subok=subok) + if subok: + assert_(isinstance(r, ArrayWrap)) + else: + assert_(type(r) == np.ndarray) + + r = np.add(a, 2, out=None, subok=subok) + if subok: + assert_(isinstance(r, ArrayWrap)) + else: + assert_(type(r) == np.ndarray) + + r = np.add(a, 2, out=(None,), subok=subok) + if subok: + assert_(isinstance(r, ArrayWrap)) + else: + assert_(type(r) == np.ndarray) + + d = ArrayWrap([5.7]) + o1 = np.empty((1,)) + o2 = np.empty((1,), dtype=np.int32) + + r1, r2 = np.frexp(d, o1, subok=subok) + if subok: + assert_(isinstance(r2, ArrayWrap)) + else: + assert_(type(r2) == np.ndarray) + + r1, r2 = np.frexp(d, o1, None, subok=subok) + if subok: + assert_(isinstance(r2, ArrayWrap)) + else: + assert_(type(r2) == np.ndarray) + + r1, r2 = np.frexp(d, None, o2, subok=subok) + if subok: + assert_(isinstance(r1, ArrayWrap)) + else: + assert_(type(r1) == np.ndarray) + + r1, r2 = np.frexp(d, out=(o1, None), subok=subok) + if subok: + assert_(isinstance(r2, ArrayWrap)) + else: + assert_(type(r2) == np.ndarray) + + r1, r2 = np.frexp(d, out=(None, o2), subok=subok) + if subok: + assert_(isinstance(r1, ArrayWrap)) + else: + assert_(type(r1) == np.ndarray) + + with assert_raises(TypeError): + # Out argument must be tuple, since there are multiple outputs. + r1, r2 = np.frexp(d, out=o1, subok=subok) + + +class TestComparisons: + def test_ignore_object_identity_in_equal(self): + # Check comparing identical objects whose comparison + # is not a simple boolean, e.g., arrays that are compared elementwise. + a = np.array([np.array([1, 2, 3]), None], dtype=object) + assert_raises(ValueError, np.equal, a, a) + + # Check error raised when comparing identical non-comparable objects. + class FunkyType: + def __eq__(self, other): + raise TypeError("I won't compare") + + a = np.array([FunkyType()]) + assert_raises(TypeError, np.equal, a, a) + + # Check identity doesn't override comparison mismatch. + a = np.array([np.nan], dtype=object) + assert_equal(np.equal(a, a), [False]) + + def test_ignore_object_identity_in_not_equal(self): + # Check comparing identical objects whose comparison + # is not a simple boolean, e.g., arrays that are compared elementwise. + a = np.array([np.array([1, 2, 3]), None], dtype=object) + assert_raises(ValueError, np.not_equal, a, a) + + # Check error raised when comparing identical non-comparable objects. + class FunkyType: + def __ne__(self, other): + raise TypeError("I won't compare") + + a = np.array([FunkyType()]) + assert_raises(TypeError, np.not_equal, a, a) + + # Check identity doesn't override comparison mismatch. + a = np.array([np.nan], dtype=object) + assert_equal(np.not_equal(a, a), [True]) + + +class TestAdd: + def test_reduce_alignment(self): + # gh-9876 + # make sure arrays with weird strides work with the optimizations in + # pairwise_sum_@TYPE@. On x86, the 'b' field will count as aligned at a + # 4 byte offset, even though its itemsize is 8. + a = np.zeros(2, dtype=[('a', np.int32), ('b', np.float64)]) + a['a'] = -1 + assert_equal(a['b'].sum(), 0) + + +class TestDivision: + def test_division_int(self): + # int division should follow Python + x = np.array([5, 10, 90, 100, -5, -10, -90, -100, -120]) + if 5 / 10 == 0.5: + assert_equal(x / 100, [0.05, 0.1, 0.9, 1, + -0.05, -0.1, -0.9, -1, -1.2]) + else: + assert_equal(x / 100, [0, 0, 0, 1, -1, -1, -1, -1, -2]) + assert_equal(x // 100, [0, 0, 0, 1, -1, -1, -1, -1, -2]) + assert_equal(x % 100, [5, 10, 90, 0, 95, 90, 10, 0, 80]) + + @pytest.mark.parametrize("dtype,ex_val", itertools.product( + np.sctypes['int'] + np.sctypes['uint'], ( + ( + # dividend + "np.arange(fo.max-lsize, fo.max, dtype=dtype)," + # divisors + "np.arange(lsize, dtype=dtype)," + # scalar divisors + "range(15)" + ), + ( + # dividend + "np.arange(fo.min, fo.min+lsize, dtype=dtype)," + # divisors + "np.arange(lsize//-2, lsize//2, dtype=dtype)," + # scalar divisors + "range(fo.min, fo.min + 15)" + ), ( + # dividend + "np.arange(fo.max-lsize, fo.max, dtype=dtype)," + # divisors + "np.arange(lsize, dtype=dtype)," + # scalar divisors + "[1,3,9,13,neg, fo.min+1, fo.min//2, fo.max//3, fo.max//4]" + ) + ) + )) + def test_division_int_boundary(self, dtype, ex_val): + fo = np.iinfo(dtype) + neg = -1 if fo.min < 0 else 1 + # Large enough to test SIMD loops and remaind elements + lsize = 512 + 7 + a, b, divisors = eval(ex_val) + a_lst, b_lst = a.tolist(), b.tolist() + + c_div = lambda n, d: ( + 0 if d == 0 or (n and n == fo.min and d == -1) else n//d + ) + with np.errstate(divide='ignore'): + ac = a.copy() + ac //= b + div_ab = a // b + div_lst = [c_div(x, y) for x, y in zip(a_lst, b_lst)] + + msg = "Integer arrays floor division check (//)" + assert all(div_ab == div_lst), msg + msg_eq = "Integer arrays floor division check (//=)" + assert all(ac == div_lst), msg_eq + + for divisor in divisors: + ac = a.copy() + with np.errstate(divide='ignore'): + div_a = a // divisor + ac //= divisor + div_lst = [c_div(i, divisor) for i in a_lst] + + assert all(div_a == div_lst), msg + assert all(ac == div_lst), msg_eq + + with np.errstate(divide='raise'): + if 0 in b or (fo.min and -1 in b and fo.min in a): + # Verify overflow case + with pytest.raises(FloatingPointError): + a // b + else: + a // b + if fo.min and fo.min in a: + with pytest.raises(FloatingPointError): + a // -1 + elif fo.min: + a // -1 + with pytest.raises(FloatingPointError): + a // 0 + with pytest.raises(FloatingPointError): + ac = a.copy() + ac //= 0 + + np.array([], dtype=dtype) // 0 + + @pytest.mark.parametrize("dtype,ex_val", itertools.product( + np.sctypes['int'] + np.sctypes['uint'], ( + "np.array([fo.max, 1, 2, 1, 1, 2, 3], dtype=dtype)", + "np.array([fo.min, 1, -2, 1, 1, 2, -3], dtype=dtype)", + "np.arange(fo.min, fo.min+(100*10), 10, dtype=dtype)", + "np.arange(fo.max-(100*7), fo.max, 7, dtype=dtype)", + ) + )) + def test_division_int_reduce(self, dtype, ex_val): + fo = np.iinfo(dtype) + a = eval(ex_val) + lst = a.tolist() + c_div = lambda n, d: ( + 0 if d == 0 or (n and n == fo.min and d == -1) else n//d + ) + + with np.errstate(divide='ignore'): + div_a = np.floor_divide.reduce(a) + div_lst = reduce(c_div, lst) + msg = "Reduce floor integer division check" + assert div_a == div_lst, msg + + with np.errstate(divide='raise'): + with pytest.raises(FloatingPointError): + np.floor_divide.reduce(np.arange(-100, 100, dtype=dtype)) + if fo.min: + with pytest.raises(FloatingPointError): + np.floor_divide.reduce( + np.array([fo.min, 1, -1], dtype=dtype) + ) + + @pytest.mark.parametrize( + "dividend,divisor,quotient", + [(np.timedelta64(2,'Y'), np.timedelta64(2,'M'), 12), + (np.timedelta64(2,'Y'), np.timedelta64(-2,'M'), -12), + (np.timedelta64(-2,'Y'), np.timedelta64(2,'M'), -12), + (np.timedelta64(-2,'Y'), np.timedelta64(-2,'M'), 12), + (np.timedelta64(2,'M'), np.timedelta64(-2,'Y'), -1), + (np.timedelta64(2,'Y'), np.timedelta64(0,'M'), 0), + (np.timedelta64(2,'Y'), 2, np.timedelta64(1,'Y')), + (np.timedelta64(2,'Y'), -2, np.timedelta64(-1,'Y')), + (np.timedelta64(-2,'Y'), 2, np.timedelta64(-1,'Y')), + (np.timedelta64(-2,'Y'), -2, np.timedelta64(1,'Y')), + (np.timedelta64(-2,'Y'), -2, np.timedelta64(1,'Y')), + (np.timedelta64(-2,'Y'), -3, np.timedelta64(0,'Y')), + (np.timedelta64(-2,'Y'), 0, np.timedelta64('Nat','Y')), + ]) + def test_division_int_timedelta(self, dividend, divisor, quotient): + # If either divisor is 0 or quotient is Nat, check for division by 0 + if divisor and (isinstance(quotient, int) or not np.isnat(quotient)): + msg = "Timedelta floor division check" + assert dividend // divisor == quotient, msg + + # Test for arrays as well + msg = "Timedelta arrays floor division check" + dividend_array = np.array([dividend]*5) + quotient_array = np.array([quotient]*5) + assert all(dividend_array // divisor == quotient_array), msg + else: + with np.errstate(divide='raise', invalid='raise'): + with pytest.raises(FloatingPointError): + dividend // divisor + + def test_division_complex(self): + # check that implementation is correct + msg = "Complex division implementation check" + x = np.array([1. + 1.*1j, 1. + .5*1j, 1. + 2.*1j], dtype=np.complex128) + assert_almost_equal(x**2/x, x, err_msg=msg) + # check overflow, underflow + msg = "Complex division overflow/underflow check" + x = np.array([1.e+110, 1.e-110], dtype=np.complex128) + y = x**2/x + assert_almost_equal(y/x, [1, 1], err_msg=msg) + + def test_zero_division_complex(self): + with np.errstate(invalid="ignore", divide="ignore"): + x = np.array([0.0], dtype=np.complex128) + y = 1.0/x + assert_(np.isinf(y)[0]) + y = complex(np.inf, np.nan)/x + assert_(np.isinf(y)[0]) + y = complex(np.nan, np.inf)/x + assert_(np.isinf(y)[0]) + y = complex(np.inf, np.inf)/x + assert_(np.isinf(y)[0]) + y = 0.0/x + assert_(np.isnan(y)[0]) + + def test_floor_division_complex(self): + # check that floor division, divmod and remainder raises type errors + x = np.array([.9 + 1j, -.1 + 1j, .9 + .5*1j, .9 + 2.*1j], dtype=np.complex128) + with pytest.raises(TypeError): + x // 7 + with pytest.raises(TypeError): + np.divmod(x, 7) + with pytest.raises(TypeError): + np.remainder(x, 7) + + def test_floor_division_signed_zero(self): + # Check that the sign bit is correctly set when dividing positive and + # negative zero by one. + x = np.zeros(10) + assert_equal(np.signbit(x//1), 0) + assert_equal(np.signbit((-x)//1), 1) + + @pytest.mark.parametrize('dtype', np.typecodes['Float']) + def test_floor_division_errors(self, dtype): + fnan = np.array(np.nan, dtype=dtype) + fone = np.array(1.0, dtype=dtype) + fzer = np.array(0.0, dtype=dtype) + finf = np.array(np.inf, dtype=dtype) + # divide by zero error check + with np.errstate(divide='raise', invalid='ignore'): + assert_raises(FloatingPointError, np.floor_divide, fone, fzer) + with np.errstate(divide='ignore', invalid='raise'): + np.floor_divide(fone, fzer) + + # The following already contain a NaN and should not warn + with np.errstate(all='raise'): + np.floor_divide(fnan, fone) + np.floor_divide(fone, fnan) + np.floor_divide(fnan, fzer) + np.floor_divide(fzer, fnan) + + @pytest.mark.parametrize('dtype', np.typecodes['Float']) + def test_floor_division_corner_cases(self, dtype): + # test corner cases like 1.0//0.0 for errors and return vals + x = np.zeros(10, dtype=dtype) + y = np.ones(10, dtype=dtype) + fnan = np.array(np.nan, dtype=dtype) + fone = np.array(1.0, dtype=dtype) + fzer = np.array(0.0, dtype=dtype) + finf = np.array(np.inf, dtype=dtype) + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in floor_divide") + div = np.floor_divide(fnan, fone) + assert(np.isnan(div)), "dt: %s, div: %s" % (dt, div) + div = np.floor_divide(fone, fnan) + assert(np.isnan(div)), "dt: %s, div: %s" % (dt, div) + div = np.floor_divide(fnan, fzer) + assert(np.isnan(div)), "dt: %s, div: %s" % (dt, div) + # verify 1.0//0.0 computations return inf + with np.errstate(divide='ignore'): + z = np.floor_divide(y, x) + assert_(np.isinf(z).all()) + +def floor_divide_and_remainder(x, y): + return (np.floor_divide(x, y), np.remainder(x, y)) + + +def _signs(dt): + if dt in np.typecodes['UnsignedInteger']: + return (+1,) + else: + return (+1, -1) + + +class TestRemainder: + + def test_remainder_basic(self): + dt = np.typecodes['AllInteger'] + np.typecodes['Float'] + for op in [floor_divide_and_remainder, np.divmod]: + for dt1, dt2 in itertools.product(dt, dt): + for sg1, sg2 in itertools.product(_signs(dt1), _signs(dt2)): + fmt = 'op: %s, dt1: %s, dt2: %s, sg1: %s, sg2: %s' + msg = fmt % (op.__name__, dt1, dt2, sg1, sg2) + a = np.array(sg1*71, dtype=dt1) + b = np.array(sg2*19, dtype=dt2) + div, rem = op(a, b) + assert_equal(div*b + rem, a, err_msg=msg) + if sg2 == -1: + assert_(b < rem <= 0, msg) + else: + assert_(b > rem >= 0, msg) + + def test_float_remainder_exact(self): + # test that float results are exact for small integers. This also + # holds for the same integers scaled by powers of two. + nlst = list(range(-127, 0)) + plst = list(range(1, 128)) + dividend = nlst + [0] + plst + divisor = nlst + plst + arg = list(itertools.product(dividend, divisor)) + tgt = list(divmod(*t) for t in arg) + + a, b = np.array(arg, dtype=int).T + # convert exact integer results from Python to float so that + # signed zero can be used, it is checked. + tgtdiv, tgtrem = np.array(tgt, dtype=float).T + tgtdiv = np.where((tgtdiv == 0.0) & ((b < 0) ^ (a < 0)), -0.0, tgtdiv) + tgtrem = np.where((tgtrem == 0.0) & (b < 0), -0.0, tgtrem) + + for op in [floor_divide_and_remainder, np.divmod]: + for dt in np.typecodes['Float']: + msg = 'op: %s, dtype: %s' % (op.__name__, dt) + fa = a.astype(dt) + fb = b.astype(dt) + div, rem = op(fa, fb) + assert_equal(div, tgtdiv, err_msg=msg) + assert_equal(rem, tgtrem, err_msg=msg) + + def test_float_remainder_roundoff(self): + # gh-6127 + dt = np.typecodes['Float'] + for op in [floor_divide_and_remainder, np.divmod]: + for dt1, dt2 in itertools.product(dt, dt): + for sg1, sg2 in itertools.product((+1, -1), (+1, -1)): + fmt = 'op: %s, dt1: %s, dt2: %s, sg1: %s, sg2: %s' + msg = fmt % (op.__name__, dt1, dt2, sg1, sg2) + a = np.array(sg1*78*6e-8, dtype=dt1) + b = np.array(sg2*6e-8, dtype=dt2) + div, rem = op(a, b) + # Equal assertion should hold when fmod is used + assert_equal(div*b + rem, a, err_msg=msg) + if sg2 == -1: + assert_(b < rem <= 0, msg) + else: + assert_(b > rem >= 0, msg) + + @pytest.mark.xfail(sys.platform.startswith("darwin"), + reason="MacOS seems to not give the correct 'invalid' warning for " + "`fmod`. Hopefully, others always do.") + @pytest.mark.parametrize('dtype', np.typecodes['Float']) + def test_float_divmod_errors(self, dtype): + # Check valid errors raised for divmod and remainder + fzero = np.array(0.0, dtype=dtype) + fone = np.array(1.0, dtype=dtype) + finf = np.array(np.inf, dtype=dtype) + fnan = np.array(np.nan, dtype=dtype) + # since divmod is combination of both remainder and divide + # ops it will set both dividebyzero and invalid flags + with np.errstate(divide='raise', invalid='ignore'): + assert_raises(FloatingPointError, np.divmod, fone, fzero) + with np.errstate(divide='ignore', invalid='raise'): + assert_raises(FloatingPointError, np.divmod, fone, fzero) + with np.errstate(invalid='raise'): + assert_raises(FloatingPointError, np.divmod, fzero, fzero) + with np.errstate(invalid='raise'): + assert_raises(FloatingPointError, np.divmod, finf, finf) + with np.errstate(divide='ignore', invalid='raise'): + assert_raises(FloatingPointError, np.divmod, finf, fzero) + with np.errstate(divide='raise', invalid='ignore'): + # inf / 0 does not set any flags, only the modulo creates a NaN + np.divmod(finf, fzero) + + @pytest.mark.xfail(sys.platform.startswith("darwin"), + reason="MacOS seems to not give the correct 'invalid' warning for " + "`fmod`. Hopefully, others always do.") + @pytest.mark.parametrize('dtype', np.typecodes['Float']) + @pytest.mark.parametrize('fn', [np.fmod, np.remainder]) + def test_float_remainder_errors(self, dtype, fn): + fzero = np.array(0.0, dtype=dtype) + fone = np.array(1.0, dtype=dtype) + finf = np.array(np.inf, dtype=dtype) + fnan = np.array(np.nan, dtype=dtype) + + # The following already contain a NaN and should not warn. + with np.errstate(all='raise'): + with pytest.raises(FloatingPointError, + match="invalid value"): + fn(fone, fzero) + fn(fnan, fzero) + fn(fzero, fnan) + fn(fone, fnan) + fn(fnan, fone) + + def test_float_remainder_overflow(self): + a = np.finfo(np.float64).tiny + with np.errstate(over='ignore', invalid='ignore'): + div, mod = np.divmod(4, a) + np.isinf(div) + assert_(mod == 0) + with np.errstate(over='raise', invalid='ignore'): + assert_raises(FloatingPointError, np.divmod, 4, a) + with np.errstate(invalid='raise', over='ignore'): + assert_raises(FloatingPointError, np.divmod, 4, a) + + def test_float_divmod_corner_cases(self): + # check nan cases + for dt in np.typecodes['Float']: + fnan = np.array(np.nan, dtype=dt) + fone = np.array(1.0, dtype=dt) + fzer = np.array(0.0, dtype=dt) + finf = np.array(np.inf, dtype=dt) + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in divmod") + sup.filter(RuntimeWarning, "divide by zero encountered in divmod") + div, rem = np.divmod(fone, fzer) + assert(np.isinf(div)), 'dt: %s, div: %s' % (dt, rem) + assert(np.isnan(rem)), 'dt: %s, rem: %s' % (dt, rem) + div, rem = np.divmod(fzer, fzer) + assert(np.isnan(rem)), 'dt: %s, rem: %s' % (dt, rem) + assert_(np.isnan(div)), 'dt: %s, rem: %s' % (dt, rem) + div, rem = np.divmod(finf, finf) + assert(np.isnan(div)), 'dt: %s, rem: %s' % (dt, rem) + assert(np.isnan(rem)), 'dt: %s, rem: %s' % (dt, rem) + div, rem = np.divmod(finf, fzer) + assert(np.isinf(div)), 'dt: %s, rem: %s' % (dt, rem) + assert(np.isnan(rem)), 'dt: %s, rem: %s' % (dt, rem) + div, rem = np.divmod(fnan, fone) + assert(np.isnan(rem)), "dt: %s, rem: %s" % (dt, rem) + assert(np.isnan(div)), "dt: %s, rem: %s" % (dt, rem) + div, rem = np.divmod(fone, fnan) + assert(np.isnan(rem)), "dt: %s, rem: %s" % (dt, rem) + assert(np.isnan(div)), "dt: %s, rem: %s" % (dt, rem) + div, rem = np.divmod(fnan, fzer) + assert(np.isnan(rem)), "dt: %s, rem: %s" % (dt, rem) + assert(np.isnan(div)), "dt: %s, rem: %s" % (dt, rem) + + def test_float_remainder_corner_cases(self): + # Check remainder magnitude. + for dt in np.typecodes['Float']: + fone = np.array(1.0, dtype=dt) + fzer = np.array(0.0, dtype=dt) + fnan = np.array(np.nan, dtype=dt) + b = np.array(1.0, dtype=dt) + a = np.nextafter(np.array(0.0, dtype=dt), -b) + rem = np.remainder(a, b) + assert_(rem <= b, 'dt: %s' % dt) + rem = np.remainder(-a, -b) + assert_(rem >= -b, 'dt: %s' % dt) + + # Check nans, inf + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in remainder") + sup.filter(RuntimeWarning, "invalid value encountered in fmod") + for dt in np.typecodes['Float']: + fone = np.array(1.0, dtype=dt) + fzer = np.array(0.0, dtype=dt) + finf = np.array(np.inf, dtype=dt) + fnan = np.array(np.nan, dtype=dt) + rem = np.remainder(fone, fzer) + assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + # MSVC 2008 returns NaN here, so disable the check. + #rem = np.remainder(fone, finf) + #assert_(rem == fone, 'dt: %s, rem: %s' % (dt, rem)) + rem = np.remainder(finf, fone) + fmod = np.fmod(finf, fone) + assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, fmod)) + assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + rem = np.remainder(finf, finf) + fmod = np.fmod(finf, fone) + assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, fmod)) + rem = np.remainder(finf, fzer) + fmod = np.fmod(finf, fzer) + assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, fmod)) + rem = np.remainder(fone, fnan) + fmod = np.fmod(fone, fnan) + assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, fmod)) + rem = np.remainder(fnan, fzer) + fmod = np.fmod(fnan, fzer) + assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, rem)) + rem = np.remainder(fnan, fone) + fmod = np.fmod(fnan, fone) + assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, rem)) + + +class TestCbrt: + def test_cbrt_scalar(self): + assert_almost_equal((np.cbrt(np.float32(-2.5)**3)), -2.5) + + def test_cbrt(self): + x = np.array([1., 2., -3., np.inf, -np.inf]) + assert_almost_equal(np.cbrt(x**3), x) + + assert_(np.isnan(np.cbrt(np.nan))) + assert_equal(np.cbrt(np.inf), np.inf) + assert_equal(np.cbrt(-np.inf), -np.inf) + + +class TestPower: + def test_power_float(self): + x = np.array([1., 2., 3.]) + assert_equal(x**0, [1., 1., 1.]) + assert_equal(x**1, x) + assert_equal(x**2, [1., 4., 9.]) + y = x.copy() + y **= 2 + assert_equal(y, [1., 4., 9.]) + assert_almost_equal(x**(-1), [1., 0.5, 1./3]) + assert_almost_equal(x**(0.5), [1., ncu.sqrt(2), ncu.sqrt(3)]) + + for out, inp, msg in _gen_alignment_data(dtype=np.float32, + type='unary', + max_size=11): + exp = [ncu.sqrt(i) for i in inp] + assert_almost_equal(inp**(0.5), exp, err_msg=msg) + np.sqrt(inp, out=out) + assert_equal(out, exp, err_msg=msg) + + for out, inp, msg in _gen_alignment_data(dtype=np.float64, + type='unary', + max_size=7): + exp = [ncu.sqrt(i) for i in inp] + assert_almost_equal(inp**(0.5), exp, err_msg=msg) + np.sqrt(inp, out=out) + assert_equal(out, exp, err_msg=msg) + + def test_power_complex(self): + x = np.array([1+2j, 2+3j, 3+4j]) + assert_equal(x**0, [1., 1., 1.]) + assert_equal(x**1, x) + assert_almost_equal(x**2, [-3+4j, -5+12j, -7+24j]) + assert_almost_equal(x**3, [(1+2j)**3, (2+3j)**3, (3+4j)**3]) + assert_almost_equal(x**4, [(1+2j)**4, (2+3j)**4, (3+4j)**4]) + assert_almost_equal(x**(-1), [1/(1+2j), 1/(2+3j), 1/(3+4j)]) + assert_almost_equal(x**(-2), [1/(1+2j)**2, 1/(2+3j)**2, 1/(3+4j)**2]) + assert_almost_equal(x**(-3), [(-11+2j)/125, (-46-9j)/2197, + (-117-44j)/15625]) + assert_almost_equal(x**(0.5), [ncu.sqrt(1+2j), ncu.sqrt(2+3j), + ncu.sqrt(3+4j)]) + norm = 1./((x**14)[0]) + assert_almost_equal(x**14 * norm, + [i * norm for i in [-76443+16124j, 23161315+58317492j, + 5583548873 + 2465133864j]]) + + # Ticket #836 + def assert_complex_equal(x, y): + assert_array_equal(x.real, y.real) + assert_array_equal(x.imag, y.imag) + + for z in [complex(0, np.inf), complex(1, np.inf)]: + z = np.array([z], dtype=np.complex_) + with np.errstate(invalid="ignore"): + assert_complex_equal(z**1, z) + assert_complex_equal(z**2, z*z) + assert_complex_equal(z**3, z*z*z) + + def test_power_zero(self): + # ticket #1271 + zero = np.array([0j]) + one = np.array([1+0j]) + cnan = np.array([complex(np.nan, np.nan)]) + # FIXME cinf not tested. + #cinf = np.array([complex(np.inf, 0)]) + + def assert_complex_equal(x, y): + x, y = np.asarray(x), np.asarray(y) + assert_array_equal(x.real, y.real) + assert_array_equal(x.imag, y.imag) + + # positive powers + for p in [0.33, 0.5, 1, 1.5, 2, 3, 4, 5, 6.6]: + assert_complex_equal(np.power(zero, p), zero) + + # zero power + assert_complex_equal(np.power(zero, 0), one) + with np.errstate(invalid="ignore"): + assert_complex_equal(np.power(zero, 0+1j), cnan) + + # negative power + for p in [0.33, 0.5, 1, 1.5, 2, 3, 4, 5, 6.6]: + assert_complex_equal(np.power(zero, -p), cnan) + assert_complex_equal(np.power(zero, -1+0.2j), cnan) + + def test_fast_power(self): + x = np.array([1, 2, 3], np.int16) + res = x**2.0 + assert_((x**2.00001).dtype is res.dtype) + assert_array_equal(res, [1, 4, 9]) + # check the inplace operation on the casted copy doesn't mess with x + assert_(not np.may_share_memory(res, x)) + assert_array_equal(x, [1, 2, 3]) + + # Check that the fast path ignores 1-element not 0-d arrays + res = x ** np.array([[[2]]]) + assert_equal(res.shape, (1, 1, 3)) + + def test_integer_power(self): + a = np.array([15, 15], 'i8') + b = np.power(a, a) + assert_equal(b, [437893890380859375, 437893890380859375]) + + def test_integer_power_with_integer_zero_exponent(self): + dtypes = np.typecodes['Integer'] + for dt in dtypes: + arr = np.arange(-10, 10, dtype=dt) + assert_equal(np.power(arr, 0), np.ones_like(arr)) + + dtypes = np.typecodes['UnsignedInteger'] + for dt in dtypes: + arr = np.arange(10, dtype=dt) + assert_equal(np.power(arr, 0), np.ones_like(arr)) + + def test_integer_power_of_1(self): + dtypes = np.typecodes['AllInteger'] + for dt in dtypes: + arr = np.arange(10, dtype=dt) + assert_equal(np.power(1, arr), np.ones_like(arr)) + + def test_integer_power_of_zero(self): + dtypes = np.typecodes['AllInteger'] + for dt in dtypes: + arr = np.arange(1, 10, dtype=dt) + assert_equal(np.power(0, arr), np.zeros_like(arr)) + + def test_integer_to_negative_power(self): + dtypes = np.typecodes['Integer'] + for dt in dtypes: + a = np.array([0, 1, 2, 3], dtype=dt) + b = np.array([0, 1, 2, -3], dtype=dt) + one = np.array(1, dtype=dt) + minusone = np.array(-1, dtype=dt) + assert_raises(ValueError, np.power, a, b) + assert_raises(ValueError, np.power, a, minusone) + assert_raises(ValueError, np.power, one, b) + assert_raises(ValueError, np.power, one, minusone) + + +class TestFloat_power: + def test_type_conversion(self): + arg_type = '?bhilBHILefdgFDG' + res_type = 'ddddddddddddgDDG' + for dtin, dtout in zip(arg_type, res_type): + msg = "dtin: %s, dtout: %s" % (dtin, dtout) + arg = np.ones(1, dtype=dtin) + res = np.float_power(arg, arg) + assert_(res.dtype.name == np.dtype(dtout).name, msg) + + +class TestLog2: + @pytest.mark.parametrize('dt', ['f', 'd', 'g']) + def test_log2_values(self, dt): + x = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024] + y = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt) + assert_almost_equal(np.log2(xf), yf) + + @pytest.mark.parametrize("i", range(1, 65)) + def test_log2_ints(self, i): + # a good log2 implementation should provide this, + # might fail on OS with bad libm + v = np.log2(2.**i) + assert_equal(v, float(i), err_msg='at exponent %d' % i) + + def test_log2_special(self): + assert_equal(np.log2(1.), 0.) + assert_equal(np.log2(np.inf), np.inf) + assert_(np.isnan(np.log2(np.nan))) + + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(np.isnan(np.log2(-1.))) + assert_(np.isnan(np.log2(-np.inf))) + assert_equal(np.log2(0.), -np.inf) + assert_(w[0].category is RuntimeWarning) + assert_(w[1].category is RuntimeWarning) + assert_(w[2].category is RuntimeWarning) + + +class TestExp2: + def test_exp2_values(self): + x = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024] + y = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + for dt in ['f', 'd', 'g']: + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt) + assert_almost_equal(np.exp2(yf), xf) + + +class TestLogAddExp2(_FilterInvalids): + # Need test for intermediate precisions + def test_logaddexp2_values(self): + x = [1, 2, 3, 4, 5] + y = [5, 4, 3, 2, 1] + z = [6, 6, 6, 6, 6] + for dt, dec_ in zip(['f', 'd', 'g'], [6, 15, 15]): + xf = np.log2(np.array(x, dtype=dt)) + yf = np.log2(np.array(y, dtype=dt)) + zf = np.log2(np.array(z, dtype=dt)) + assert_almost_equal(np.logaddexp2(xf, yf), zf, decimal=dec_) + + def test_logaddexp2_range(self): + x = [1000000, -1000000, 1000200, -1000200] + y = [1000200, -1000200, 1000000, -1000000] + z = [1000200, -1000000, 1000200, -1000000] + for dt in ['f', 'd', 'g']: + logxf = np.array(x, dtype=dt) + logyf = np.array(y, dtype=dt) + logzf = np.array(z, dtype=dt) + assert_almost_equal(np.logaddexp2(logxf, logyf), logzf) + + def test_inf(self): + inf = np.inf + x = [inf, -inf, inf, -inf, inf, 1, -inf, 1] + y = [inf, inf, -inf, -inf, 1, inf, 1, -inf] + z = [inf, inf, inf, -inf, inf, inf, 1, 1] + with np.errstate(invalid='raise'): + for dt in ['f', 'd', 'g']: + logxf = np.array(x, dtype=dt) + logyf = np.array(y, dtype=dt) + logzf = np.array(z, dtype=dt) + assert_equal(np.logaddexp2(logxf, logyf), logzf) + + def test_nan(self): + assert_(np.isnan(np.logaddexp2(np.nan, np.inf))) + assert_(np.isnan(np.logaddexp2(np.inf, np.nan))) + assert_(np.isnan(np.logaddexp2(np.nan, 0))) + assert_(np.isnan(np.logaddexp2(0, np.nan))) + assert_(np.isnan(np.logaddexp2(np.nan, np.nan))) + + def test_reduce(self): + assert_equal(np.logaddexp2.identity, -np.inf) + assert_equal(np.logaddexp2.reduce([]), -np.inf) + assert_equal(np.logaddexp2.reduce([-np.inf]), -np.inf) + assert_equal(np.logaddexp2.reduce([-np.inf, 0]), 0) + + +class TestLog: + def test_log_values(self): + x = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024] + y = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + for dt in ['f', 'd', 'g']: + log2_ = 0.69314718055994530943 + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt)*log2_ + assert_almost_equal(np.log(xf), yf) + + # test aliasing(issue #17761) + x = np.array([2, 0.937500, 3, 0.947500, 1.054697]) + xf = np.log(x) + assert_almost_equal(np.log(x, out=x), xf) + + # test log() of max for dtype does not raise + for dt in ['f', 'd', 'g']: + with np.errstate(all='raise'): + x = np.finfo(dt).max + np.log(x) + + def test_log_strides(self): + np.random.seed(42) + strides = np.array([-4,-3,-2,-1,1,2,3,4]) + sizes = np.arange(2,100) + for ii in sizes: + x_f64 = np.float64(np.random.uniform(low=0.01, high=100.0,size=ii)) + x_special = x_f64.copy() + x_special[3:-1:4] = 1.0 + y_true = np.log(x_f64) + y_special = np.log(x_special) + for jj in strides: + assert_array_almost_equal_nulp(np.log(x_f64[::jj]), y_true[::jj], nulp=2) + assert_array_almost_equal_nulp(np.log(x_special[::jj]), y_special[::jj], nulp=2) + +class TestExp: + def test_exp_values(self): + x = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024] + y = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + for dt in ['f', 'd', 'g']: + log2_ = 0.69314718055994530943 + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt)*log2_ + assert_almost_equal(np.exp(yf), xf) + + def test_exp_strides(self): + np.random.seed(42) + strides = np.array([-4,-3,-2,-1,1,2,3,4]) + sizes = np.arange(2,100) + for ii in sizes: + x_f64 = np.float64(np.random.uniform(low=0.01, high=709.1,size=ii)) + y_true = np.exp(x_f64) + for jj in strides: + assert_array_almost_equal_nulp(np.exp(x_f64[::jj]), y_true[::jj], nulp=2) + +class TestSpecialFloats: + def test_exp_values(self): + x = [np.nan, np.nan, np.inf, 0.] + y = [np.nan, -np.nan, np.inf, -np.inf] + for dt in ['f', 'd', 'g']: + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt) + assert_equal(np.exp(yf), xf) + + # See: https://github.com/numpy/numpy/issues/19192 + @pytest.mark.xfail( + glibc_older_than("2.17"), + reason="Older glibc versions may not raise appropriate FP exceptions" + ) + def test_exp_exceptions(self): + with np.errstate(over='raise'): + assert_raises(FloatingPointError, np.exp, np.float32(100.)) + assert_raises(FloatingPointError, np.exp, np.float32(1E19)) + assert_raises(FloatingPointError, np.exp, np.float64(800.)) + assert_raises(FloatingPointError, np.exp, np.float64(1E19)) + + with np.errstate(under='raise'): + assert_raises(FloatingPointError, np.exp, np.float32(-1000.)) + assert_raises(FloatingPointError, np.exp, np.float32(-1E19)) + assert_raises(FloatingPointError, np.exp, np.float64(-1000.)) + assert_raises(FloatingPointError, np.exp, np.float64(-1E19)) + + def test_log_values(self): + with np.errstate(all='ignore'): + x = [np.nan, np.nan, np.inf, np.nan, -np.inf, np.nan] + y = [np.nan, -np.nan, np.inf, -np.inf, 0.0, -1.0] + y1p = [np.nan, -np.nan, np.inf, -np.inf, -1.0, -2.0] + for dt in ['f', 'd', 'g']: + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt) + yf1p = np.array(y1p, dtype=dt) + assert_equal(np.log(yf), xf) + assert_equal(np.log2(yf), xf) + assert_equal(np.log10(yf), xf) + assert_equal(np.log1p(yf1p), xf) + + with np.errstate(divide='raise'): + for dt in ['f', 'd']: + assert_raises(FloatingPointError, np.log, + np.array(0.0, dtype=dt)) + assert_raises(FloatingPointError, np.log2, + np.array(0.0, dtype=dt)) + assert_raises(FloatingPointError, np.log10, + np.array(0.0, dtype=dt)) + assert_raises(FloatingPointError, np.log1p, + np.array(-1.0, dtype=dt)) + + with np.errstate(invalid='raise'): + for dt in ['f', 'd']: + assert_raises(FloatingPointError, np.log, + np.array(-np.inf, dtype=dt)) + assert_raises(FloatingPointError, np.log, + np.array(-1.0, dtype=dt)) + assert_raises(FloatingPointError, np.log2, + np.array(-np.inf, dtype=dt)) + assert_raises(FloatingPointError, np.log2, + np.array(-1.0, dtype=dt)) + assert_raises(FloatingPointError, np.log10, + np.array(-np.inf, dtype=dt)) + assert_raises(FloatingPointError, np.log10, + np.array(-1.0, dtype=dt)) + assert_raises(FloatingPointError, np.log1p, + np.array(-np.inf, dtype=dt)) + assert_raises(FloatingPointError, np.log1p, + np.array(-2.0, dtype=dt)) + + # See https://github.com/numpy/numpy/issues/18005 + with assert_no_warnings(): + a = np.array(1e9, dtype='float32') + np.log(a) + + def test_sincos_values(self): + with np.errstate(all='ignore'): + x = [np.nan, np.nan, np.nan, np.nan] + y = [np.nan, -np.nan, np.inf, -np.inf] + for dt in ['f', 'd', 'g']: + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt) + assert_equal(np.sin(yf), xf) + assert_equal(np.cos(yf), xf) + + with np.errstate(invalid='raise'): + assert_raises(FloatingPointError, np.sin, np.float32(-np.inf)) + assert_raises(FloatingPointError, np.sin, np.float32(np.inf)) + assert_raises(FloatingPointError, np.cos, np.float32(-np.inf)) + assert_raises(FloatingPointError, np.cos, np.float32(np.inf)) + + @pytest.mark.parametrize('dt', ['f', 'd', 'g']) + def test_sqrt_values(self, dt): + with np.errstate(all='ignore'): + x = [np.nan, np.nan, np.inf, np.nan, 0.] + y = [np.nan, -np.nan, np.inf, -np.inf, 0.] + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt) + assert_equal(np.sqrt(yf), xf) + + # with np.errstate(invalid='raise'): + # assert_raises( + # FloatingPointError, np.sqrt, np.array(-100., dtype=dt) + # ) + + def test_abs_values(self): + x = [np.nan, np.nan, np.inf, np.inf, 0., 0., 1.0, 1.0] + y = [np.nan, -np.nan, np.inf, -np.inf, 0., -0., -1.0, 1.0] + for dt in ['f', 'd', 'g']: + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt) + assert_equal(np.abs(yf), xf) + + def test_square_values(self): + x = [np.nan, np.nan, np.inf, np.inf] + y = [np.nan, -np.nan, np.inf, -np.inf] + with np.errstate(all='ignore'): + for dt in ['f', 'd', 'g']: + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt) + assert_equal(np.square(yf), xf) + + with np.errstate(over='raise'): + assert_raises(FloatingPointError, np.square, + np.array(1E32, dtype='f')) + assert_raises(FloatingPointError, np.square, + np.array(1E200, dtype='d')) + + def test_reciprocal_values(self): + with np.errstate(all='ignore'): + x = [np.nan, np.nan, 0.0, -0.0, np.inf, -np.inf] + y = [np.nan, -np.nan, np.inf, -np.inf, 0., -0.] + for dt in ['f', 'd', 'g']: + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt) + assert_equal(np.reciprocal(yf), xf) + + with np.errstate(divide='raise'): + for dt in ['f', 'd', 'g']: + assert_raises(FloatingPointError, np.reciprocal, + np.array(-0.0, dtype=dt)) + + def test_tan(self): + with np.errstate(all='ignore'): + in_ = [np.nan, -np.nan, 0.0, -0.0, np.inf, -np.inf] + out = [np.nan, np.nan, 0.0, -0.0, np.nan, np.nan] + for dt in ['f', 'd']: + in_arr = np.array(in_, dtype=dt) + out_arr = np.array(out, dtype=dt) + assert_equal(np.tan(in_arr), out_arr) + + with np.errstate(invalid='raise'): + for dt in ['f', 'd']: + assert_raises(FloatingPointError, np.tan, + np.array(np.inf, dtype=dt)) + assert_raises(FloatingPointError, np.tan, + np.array(-np.inf, dtype=dt)) + + def test_arcsincos(self): + with np.errstate(all='ignore'): + in_ = [np.nan, -np.nan, np.inf, -np.inf] + out = [np.nan, np.nan, np.nan, np.nan] + for dt in ['f', 'd']: + in_arr = np.array(in_, dtype=dt) + out_arr = np.array(out, dtype=dt) + assert_equal(np.arcsin(in_arr), out_arr) + assert_equal(np.arccos(in_arr), out_arr) + + for callable in [np.arcsin, np.arccos]: + for value in [np.inf, -np.inf, 2.0, -2.0]: + for dt in ['f', 'd']: + with np.errstate(invalid='raise'): + assert_raises(FloatingPointError, callable, + np.array(value, dtype=dt)) + + def test_arctan(self): + with np.errstate(all='ignore'): + in_ = [np.nan, -np.nan] + out = [np.nan, np.nan] + for dt in ['f', 'd']: + in_arr = np.array(in_, dtype=dt) + out_arr = np.array(out, dtype=dt) + assert_equal(np.arctan(in_arr), out_arr) + + def test_sinh(self): + in_ = [np.nan, -np.nan, np.inf, -np.inf] + out = [np.nan, np.nan, np.inf, -np.inf] + for dt in ['f', 'd']: + in_arr = np.array(in_, dtype=dt) + out_arr = np.array(out, dtype=dt) + assert_equal(np.sinh(in_arr), out_arr) + + with np.errstate(over='raise'): + assert_raises(FloatingPointError, np.sinh, + np.array(120.0, dtype='f')) + assert_raises(FloatingPointError, np.sinh, + np.array(1200.0, dtype='d')) + + def test_cosh(self): + in_ = [np.nan, -np.nan, np.inf, -np.inf] + out = [np.nan, np.nan, np.inf, np.inf] + for dt in ['f', 'd']: + in_arr = np.array(in_, dtype=dt) + out_arr = np.array(out, dtype=dt) + assert_equal(np.cosh(in_arr), out_arr) + + with np.errstate(over='raise'): + assert_raises(FloatingPointError, np.cosh, + np.array(120.0, dtype='f')) + assert_raises(FloatingPointError, np.cosh, + np.array(1200.0, dtype='d')) + + def test_tanh(self): + in_ = [np.nan, -np.nan, np.inf, -np.inf] + out = [np.nan, np.nan, 1.0, -1.0] + for dt in ['f', 'd']: + in_arr = np.array(in_, dtype=dt) + out_arr = np.array(out, dtype=dt) + assert_equal(np.tanh(in_arr), out_arr) + + def test_arcsinh(self): + in_ = [np.nan, -np.nan, np.inf, -np.inf] + out = [np.nan, np.nan, np.inf, -np.inf] + for dt in ['f', 'd']: + in_arr = np.array(in_, dtype=dt) + out_arr = np.array(out, dtype=dt) + assert_equal(np.arcsinh(in_arr), out_arr) + + def test_arccosh(self): + with np.errstate(all='ignore'): + in_ = [np.nan, -np.nan, np.inf, -np.inf, 1.0, 0.0] + out = [np.nan, np.nan, np.inf, np.nan, 0.0, np.nan] + for dt in ['f', 'd']: + in_arr = np.array(in_, dtype=dt) + out_arr = np.array(out, dtype=dt) + assert_equal(np.arccosh(in_arr), out_arr) + + for value in [0.0, -np.inf]: + with np.errstate(invalid='raise'): + for dt in ['f', 'd']: + assert_raises(FloatingPointError, np.arccosh, + np.array(value, dtype=dt)) + + def test_arctanh(self): + with np.errstate(all='ignore'): + in_ = [np.nan, -np.nan, np.inf, -np.inf, 1.0, -1.0, 2.0] + out = [np.nan, np.nan, np.nan, np.nan, np.inf, -np.inf, np.nan] + for dt in ['f', 'd']: + in_arr = np.array(in_, dtype=dt) + out_arr = np.array(out, dtype=dt) + assert_equal(np.arctanh(in_arr), out_arr) + + for value in [1.01, np.inf, -np.inf, 1.0, -1.0]: + with np.errstate(invalid='raise', divide='raise'): + for dt in ['f', 'd']: + assert_raises(FloatingPointError, np.arctanh, + np.array(value, dtype=dt)) + + def test_exp2(self): + with np.errstate(all='ignore'): + in_ = [np.nan, -np.nan, np.inf, -np.inf] + out = [np.nan, np.nan, np.inf, 0.0] + for dt in ['f', 'd']: + in_arr = np.array(in_, dtype=dt) + out_arr = np.array(out, dtype=dt) + assert_equal(np.exp2(in_arr), out_arr) + + for value in [2000.0, -2000.0]: + with np.errstate(over='raise', under='raise'): + for dt in ['f', 'd']: + assert_raises(FloatingPointError, np.exp2, + np.array(value, dtype=dt)) + + def test_expm1(self): + with np.errstate(all='ignore'): + in_ = [np.nan, -np.nan, np.inf, -np.inf] + out = [np.nan, np.nan, np.inf, -1.0] + for dt in ['f', 'd']: + in_arr = np.array(in_, dtype=dt) + out_arr = np.array(out, dtype=dt) + assert_equal(np.expm1(in_arr), out_arr) + + for value in [200.0, 2000.0]: + with np.errstate(over='raise'): + assert_raises(FloatingPointError, np.expm1, + np.array(value, dtype='f')) + +class TestFPClass: + @pytest.mark.parametrize("stride", [-4,-2,-1,1,2,4]) + def test_fpclass(self, stride): + arr_f64 = np.array([np.nan, -np.nan, np.inf, -np.inf, -1.0, 1.0, -0.0, 0.0, 2.2251e-308, -2.2251e-308], dtype='d') + arr_f32 = np.array([np.nan, -np.nan, np.inf, -np.inf, -1.0, 1.0, -0.0, 0.0, 1.4013e-045, -1.4013e-045], dtype='f') + nan = np.array([True, True, False, False, False, False, False, False, False, False]) + inf = np.array([False, False, True, True, False, False, False, False, False, False]) + sign = np.array([False, True, False, True, True, False, True, False, False, True]) + finite = np.array([False, False, False, False, True, True, True, True, True, True]) + assert_equal(np.isnan(arr_f32[::stride]), nan[::stride]) + assert_equal(np.isnan(arr_f64[::stride]), nan[::stride]) + assert_equal(np.isinf(arr_f32[::stride]), inf[::stride]) + assert_equal(np.isinf(arr_f64[::stride]), inf[::stride]) + assert_equal(np.signbit(arr_f32[::stride]), sign[::stride]) + assert_equal(np.signbit(arr_f64[::stride]), sign[::stride]) + assert_equal(np.isfinite(arr_f32[::stride]), finite[::stride]) + assert_equal(np.isfinite(arr_f64[::stride]), finite[::stride]) + +class TestLDExp: + @pytest.mark.parametrize("stride", [-4,-2,-1,1,2,4]) + @pytest.mark.parametrize("dtype", ['f', 'd']) + def test_ldexp(self, dtype, stride): + mant = np.array([0.125, 0.25, 0.5, 1., 1., 2., 4., 8.], dtype=dtype) + exp = np.array([3, 2, 1, 0, 0, -1, -2, -3], dtype='i') + out = np.zeros(8, dtype=dtype) + assert_equal(np.ldexp(mant[::stride], exp[::stride], out=out[::stride]), np.ones(8, dtype=dtype)[::stride]) + assert_equal(out[::stride], np.ones(8, dtype=dtype)[::stride]) + +class TestFRExp: + @pytest.mark.parametrize("stride", [-4,-2,-1,1,2,4]) + @pytest.mark.parametrize("dtype", ['f', 'd']) + @pytest.mark.skipif(not sys.platform.startswith('linux'), + reason="np.frexp gives different answers for NAN/INF on windows and linux") + def test_frexp(self, dtype, stride): + arr = np.array([np.nan, np.nan, np.inf, -np.inf, 0.0, -0.0, 1.0, -1.0], dtype=dtype) + mant_true = np.array([np.nan, np.nan, np.inf, -np.inf, 0.0, -0.0, 0.5, -0.5], dtype=dtype) + exp_true = np.array([0, 0, 0, 0, 0, 0, 1, 1], dtype='i') + out_mant = np.ones(8, dtype=dtype) + out_exp = 2*np.ones(8, dtype='i') + mant, exp = np.frexp(arr[::stride], out=(out_mant[::stride], out_exp[::stride])) + assert_equal(mant_true[::stride], mant) + assert_equal(exp_true[::stride], exp) + assert_equal(out_mant[::stride], mant_true[::stride]) + assert_equal(out_exp[::stride], exp_true[::stride]) + +# func : [maxulperror, low, high] +avx_ufuncs = {'sqrt' :[1, 0., 100.], + 'absolute' :[0, -100., 100.], + 'reciprocal' :[1, 1., 100.], + 'square' :[1, -100., 100.], + 'rint' :[0, -100., 100.], + 'floor' :[0, -100., 100.], + 'ceil' :[0, -100., 100.], + 'trunc' :[0, -100., 100.]} + +class TestAVXUfuncs: + def test_avx_based_ufunc(self): + strides = np.array([-4,-3,-2,-1,1,2,3,4]) + np.random.seed(42) + for func, prop in avx_ufuncs.items(): + maxulperr = prop[0] + minval = prop[1] + maxval = prop[2] + # various array sizes to ensure masking in AVX is tested + for size in range(1,32): + myfunc = getattr(np, func) + x_f32 = np.float32(np.random.uniform(low=minval, high=maxval, + size=size)) + x_f64 = np.float64(x_f32) + x_f128 = np.longdouble(x_f32) + y_true128 = myfunc(x_f128) + if maxulperr == 0: + assert_equal(myfunc(x_f32), np.float32(y_true128)) + assert_equal(myfunc(x_f64), np.float64(y_true128)) + else: + assert_array_max_ulp(myfunc(x_f32), np.float32(y_true128), + maxulp=maxulperr) + assert_array_max_ulp(myfunc(x_f64), np.float64(y_true128), + maxulp=maxulperr) + # various strides to test gather instruction + if size > 1: + y_true32 = myfunc(x_f32) + y_true64 = myfunc(x_f64) + for jj in strides: + assert_equal(myfunc(x_f64[::jj]), y_true64[::jj]) + assert_equal(myfunc(x_f32[::jj]), y_true32[::jj]) + +class TestAVXFloat32Transcendental: + def test_exp_float32(self): + np.random.seed(42) + x_f32 = np.float32(np.random.uniform(low=0.0,high=88.1,size=1000000)) + x_f64 = np.float64(x_f32) + assert_array_max_ulp(np.exp(x_f32), np.float32(np.exp(x_f64)), maxulp=3) + + def test_log_float32(self): + np.random.seed(42) + x_f32 = np.float32(np.random.uniform(low=0.0,high=1000,size=1000000)) + x_f64 = np.float64(x_f32) + assert_array_max_ulp(np.log(x_f32), np.float32(np.log(x_f64)), maxulp=4) + + def test_sincos_float32(self): + np.random.seed(42) + N = 1000000 + M = np.int_(N/20) + index = np.random.randint(low=0, high=N, size=M) + x_f32 = np.float32(np.random.uniform(low=-100.,high=100.,size=N)) + if not glibc_older_than("2.17"): + # test coverage for elements > 117435.992f for which glibc is used + # this is known to be problematic on old glibc, so skip it there + x_f32[index] = np.float32(10E+10*np.random.rand(M)) + x_f64 = np.float64(x_f32) + assert_array_max_ulp(np.sin(x_f32), np.float32(np.sin(x_f64)), maxulp=2) + assert_array_max_ulp(np.cos(x_f32), np.float32(np.cos(x_f64)), maxulp=2) + # test aliasing(issue #17761) + tx_f32 = x_f32.copy() + assert_array_max_ulp(np.sin(x_f32, out=x_f32), np.float32(np.sin(x_f64)), maxulp=2) + assert_array_max_ulp(np.cos(tx_f32, out=tx_f32), np.float32(np.cos(x_f64)), maxulp=2) + + def test_strided_float32(self): + np.random.seed(42) + strides = np.array([-4,-3,-2,-1,1,2,3,4]) + sizes = np.arange(2,100) + for ii in sizes: + x_f32 = np.float32(np.random.uniform(low=0.01,high=88.1,size=ii)) + x_f32_large = x_f32.copy() + x_f32_large[3:-1:4] = 120000.0 + exp_true = np.exp(x_f32) + log_true = np.log(x_f32) + sin_true = np.sin(x_f32_large) + cos_true = np.cos(x_f32_large) + for jj in strides: + assert_array_almost_equal_nulp(np.exp(x_f32[::jj]), exp_true[::jj], nulp=2) + assert_array_almost_equal_nulp(np.log(x_f32[::jj]), log_true[::jj], nulp=2) + assert_array_almost_equal_nulp(np.sin(x_f32_large[::jj]), sin_true[::jj], nulp=2) + assert_array_almost_equal_nulp(np.cos(x_f32_large[::jj]), cos_true[::jj], nulp=2) + +class TestLogAddExp(_FilterInvalids): + def test_logaddexp_values(self): + x = [1, 2, 3, 4, 5] + y = [5, 4, 3, 2, 1] + z = [6, 6, 6, 6, 6] + for dt, dec_ in zip(['f', 'd', 'g'], [6, 15, 15]): + xf = np.log(np.array(x, dtype=dt)) + yf = np.log(np.array(y, dtype=dt)) + zf = np.log(np.array(z, dtype=dt)) + assert_almost_equal(np.logaddexp(xf, yf), zf, decimal=dec_) + + def test_logaddexp_range(self): + x = [1000000, -1000000, 1000200, -1000200] + y = [1000200, -1000200, 1000000, -1000000] + z = [1000200, -1000000, 1000200, -1000000] + for dt in ['f', 'd', 'g']: + logxf = np.array(x, dtype=dt) + logyf = np.array(y, dtype=dt) + logzf = np.array(z, dtype=dt) + assert_almost_equal(np.logaddexp(logxf, logyf), logzf) + + def test_inf(self): + inf = np.inf + x = [inf, -inf, inf, -inf, inf, 1, -inf, 1] + y = [inf, inf, -inf, -inf, 1, inf, 1, -inf] + z = [inf, inf, inf, -inf, inf, inf, 1, 1] + with np.errstate(invalid='raise'): + for dt in ['f', 'd', 'g']: + logxf = np.array(x, dtype=dt) + logyf = np.array(y, dtype=dt) + logzf = np.array(z, dtype=dt) + assert_equal(np.logaddexp(logxf, logyf), logzf) + + def test_nan(self): + assert_(np.isnan(np.logaddexp(np.nan, np.inf))) + assert_(np.isnan(np.logaddexp(np.inf, np.nan))) + assert_(np.isnan(np.logaddexp(np.nan, 0))) + assert_(np.isnan(np.logaddexp(0, np.nan))) + assert_(np.isnan(np.logaddexp(np.nan, np.nan))) + + def test_reduce(self): + assert_equal(np.logaddexp.identity, -np.inf) + assert_equal(np.logaddexp.reduce([]), -np.inf) + + +class TestLog1p: + def test_log1p(self): + assert_almost_equal(ncu.log1p(0.2), ncu.log(1.2)) + assert_almost_equal(ncu.log1p(1e-6), ncu.log(1+1e-6)) + + def test_special(self): + with np.errstate(invalid="ignore", divide="ignore"): + assert_equal(ncu.log1p(np.nan), np.nan) + assert_equal(ncu.log1p(np.inf), np.inf) + assert_equal(ncu.log1p(-1.), -np.inf) + assert_equal(ncu.log1p(-2.), np.nan) + assert_equal(ncu.log1p(-np.inf), np.nan) + + +class TestExpm1: + def test_expm1(self): + assert_almost_equal(ncu.expm1(0.2), ncu.exp(0.2)-1) + assert_almost_equal(ncu.expm1(1e-6), ncu.exp(1e-6)-1) + + def test_special(self): + assert_equal(ncu.expm1(np.inf), np.inf) + assert_equal(ncu.expm1(0.), 0.) + assert_equal(ncu.expm1(-0.), -0.) + assert_equal(ncu.expm1(np.inf), np.inf) + assert_equal(ncu.expm1(-np.inf), -1.) + + def test_complex(self): + x = np.asarray(1e-12) + assert_allclose(x, ncu.expm1(x)) + x = x.astype(np.complex128) + assert_allclose(x, ncu.expm1(x)) + + +class TestHypot: + def test_simple(self): + assert_almost_equal(ncu.hypot(1, 1), ncu.sqrt(2)) + assert_almost_equal(ncu.hypot(0, 0), 0) + + def test_reduce(self): + assert_almost_equal(ncu.hypot.reduce([3.0, 4.0]), 5.0) + assert_almost_equal(ncu.hypot.reduce([3.0, 4.0, 0]), 5.0) + assert_almost_equal(ncu.hypot.reduce([9.0, 12.0, 20.0]), 25.0) + assert_equal(ncu.hypot.reduce([]), 0.0) + + +def assert_hypot_isnan(x, y): + with np.errstate(invalid='ignore'): + assert_(np.isnan(ncu.hypot(x, y)), + "hypot(%s, %s) is %s, not nan" % (x, y, ncu.hypot(x, y))) + + +def assert_hypot_isinf(x, y): + with np.errstate(invalid='ignore'): + assert_(np.isinf(ncu.hypot(x, y)), + "hypot(%s, %s) is %s, not inf" % (x, y, ncu.hypot(x, y))) + + +class TestHypotSpecialValues: + def test_nan_outputs(self): + assert_hypot_isnan(np.nan, np.nan) + assert_hypot_isnan(np.nan, 1) + + def test_nan_outputs2(self): + assert_hypot_isinf(np.nan, np.inf) + assert_hypot_isinf(np.inf, np.nan) + assert_hypot_isinf(np.inf, 0) + assert_hypot_isinf(0, np.inf) + assert_hypot_isinf(np.inf, np.inf) + assert_hypot_isinf(np.inf, 23.0) + + def test_no_fpe(self): + assert_no_warnings(ncu.hypot, np.inf, 0) + + +def assert_arctan2_isnan(x, y): + assert_(np.isnan(ncu.arctan2(x, y)), "arctan(%s, %s) is %s, not nan" % (x, y, ncu.arctan2(x, y))) + + +def assert_arctan2_ispinf(x, y): + assert_((np.isinf(ncu.arctan2(x, y)) and ncu.arctan2(x, y) > 0), "arctan(%s, %s) is %s, not +inf" % (x, y, ncu.arctan2(x, y))) + + +def assert_arctan2_isninf(x, y): + assert_((np.isinf(ncu.arctan2(x, y)) and ncu.arctan2(x, y) < 0), "arctan(%s, %s) is %s, not -inf" % (x, y, ncu.arctan2(x, y))) + + +def assert_arctan2_ispzero(x, y): + assert_((ncu.arctan2(x, y) == 0 and not np.signbit(ncu.arctan2(x, y))), "arctan(%s, %s) is %s, not +0" % (x, y, ncu.arctan2(x, y))) + + +def assert_arctan2_isnzero(x, y): + assert_((ncu.arctan2(x, y) == 0 and np.signbit(ncu.arctan2(x, y))), "arctan(%s, %s) is %s, not -0" % (x, y, ncu.arctan2(x, y))) + + +class TestArctan2SpecialValues: + def test_one_one(self): + # atan2(1, 1) returns pi/4. + assert_almost_equal(ncu.arctan2(1, 1), 0.25 * np.pi) + assert_almost_equal(ncu.arctan2(-1, 1), -0.25 * np.pi) + assert_almost_equal(ncu.arctan2(1, -1), 0.75 * np.pi) + + def test_zero_nzero(self): + # atan2(+-0, -0) returns +-pi. + assert_almost_equal(ncu.arctan2(np.PZERO, np.NZERO), np.pi) + assert_almost_equal(ncu.arctan2(np.NZERO, np.NZERO), -np.pi) + + def test_zero_pzero(self): + # atan2(+-0, +0) returns +-0. + assert_arctan2_ispzero(np.PZERO, np.PZERO) + assert_arctan2_isnzero(np.NZERO, np.PZERO) + + def test_zero_negative(self): + # atan2(+-0, x) returns +-pi for x < 0. + assert_almost_equal(ncu.arctan2(np.PZERO, -1), np.pi) + assert_almost_equal(ncu.arctan2(np.NZERO, -1), -np.pi) + + def test_zero_positive(self): + # atan2(+-0, x) returns +-0 for x > 0. + assert_arctan2_ispzero(np.PZERO, 1) + assert_arctan2_isnzero(np.NZERO, 1) + + def test_positive_zero(self): + # atan2(y, +-0) returns +pi/2 for y > 0. + assert_almost_equal(ncu.arctan2(1, np.PZERO), 0.5 * np.pi) + assert_almost_equal(ncu.arctan2(1, np.NZERO), 0.5 * np.pi) + + def test_negative_zero(self): + # atan2(y, +-0) returns -pi/2 for y < 0. + assert_almost_equal(ncu.arctan2(-1, np.PZERO), -0.5 * np.pi) + assert_almost_equal(ncu.arctan2(-1, np.NZERO), -0.5 * np.pi) + + def test_any_ninf(self): + # atan2(+-y, -infinity) returns +-pi for finite y > 0. + assert_almost_equal(ncu.arctan2(1, np.NINF), np.pi) + assert_almost_equal(ncu.arctan2(-1, np.NINF), -np.pi) + + def test_any_pinf(self): + # atan2(+-y, +infinity) returns +-0 for finite y > 0. + assert_arctan2_ispzero(1, np.inf) + assert_arctan2_isnzero(-1, np.inf) + + def test_inf_any(self): + # atan2(+-infinity, x) returns +-pi/2 for finite x. + assert_almost_equal(ncu.arctan2( np.inf, 1), 0.5 * np.pi) + assert_almost_equal(ncu.arctan2(-np.inf, 1), -0.5 * np.pi) + + def test_inf_ninf(self): + # atan2(+-infinity, -infinity) returns +-3*pi/4. + assert_almost_equal(ncu.arctan2( np.inf, -np.inf), 0.75 * np.pi) + assert_almost_equal(ncu.arctan2(-np.inf, -np.inf), -0.75 * np.pi) + + def test_inf_pinf(self): + # atan2(+-infinity, +infinity) returns +-pi/4. + assert_almost_equal(ncu.arctan2( np.inf, np.inf), 0.25 * np.pi) + assert_almost_equal(ncu.arctan2(-np.inf, np.inf), -0.25 * np.pi) + + def test_nan_any(self): + # atan2(nan, x) returns nan for any x, including inf + assert_arctan2_isnan(np.nan, np.inf) + assert_arctan2_isnan(np.inf, np.nan) + assert_arctan2_isnan(np.nan, np.nan) + + +class TestLdexp: + def _check_ldexp(self, tp): + assert_almost_equal(ncu.ldexp(np.array(2., np.float32), + np.array(3, tp)), 16.) + assert_almost_equal(ncu.ldexp(np.array(2., np.float64), + np.array(3, tp)), 16.) + assert_almost_equal(ncu.ldexp(np.array(2., np.longdouble), + np.array(3, tp)), 16.) + + def test_ldexp(self): + # The default Python int type should work + assert_almost_equal(ncu.ldexp(2., 3), 16.) + # The following int types should all be accepted + self._check_ldexp(np.int8) + self._check_ldexp(np.int16) + self._check_ldexp(np.int32) + self._check_ldexp('i') + self._check_ldexp('l') + + def test_ldexp_overflow(self): + # silence warning emitted on overflow + with np.errstate(over="ignore"): + imax = np.iinfo(np.dtype('l')).max + imin = np.iinfo(np.dtype('l')).min + assert_equal(ncu.ldexp(2., imax), np.inf) + assert_equal(ncu.ldexp(2., imin), 0) + + +class TestMaximum(_FilterInvalids): + def test_reduce(self): + dflt = np.typecodes['AllFloat'] + dint = np.typecodes['AllInteger'] + seq1 = np.arange(11) + seq2 = seq1[::-1] + func = np.maximum.reduce + for dt in dint: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 10) + assert_equal(func(tmp2), 10) + for dt in dflt: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 10) + assert_equal(func(tmp2), 10) + tmp1[::2] = np.nan + tmp2[::2] = np.nan + assert_equal(func(tmp1), np.nan) + assert_equal(func(tmp2), np.nan) + + def test_reduce_complex(self): + assert_equal(np.maximum.reduce([1, 2j]), 1) + assert_equal(np.maximum.reduce([1+3j, 2j]), 1+3j) + + def test_float_nans(self): + nan = np.nan + arg1 = np.array([0, nan, nan]) + arg2 = np.array([nan, 0, nan]) + out = np.array([nan, nan, nan]) + assert_equal(np.maximum(arg1, arg2), out) + + def test_object_nans(self): + # Multiple checks to give this a chance to + # fail if cmp is used instead of rich compare. + # Failure cannot be guaranteed. + for i in range(1): + x = np.array(float('nan'), object) + y = 1.0 + z = np.array(float('nan'), object) + assert_(np.maximum(x, y) == 1.0) + assert_(np.maximum(z, y) == 1.0) + + def test_complex_nans(self): + nan = np.nan + for cnan in [complex(nan, 0), complex(0, nan), complex(nan, nan)]: + arg1 = np.array([0, cnan, cnan], dtype=complex) + arg2 = np.array([cnan, 0, cnan], dtype=complex) + out = np.array([nan, nan, nan], dtype=complex) + assert_equal(np.maximum(arg1, arg2), out) + + def test_object_array(self): + arg1 = np.arange(5, dtype=object) + arg2 = arg1 + 1 + assert_equal(np.maximum(arg1, arg2), arg2) + + def test_strided_array(self): + arr1 = np.array([-4.0, 1.0, 10.0, 0.0, np.nan, -np.nan, np.inf, -np.inf]) + arr2 = np.array([-2.0,-1.0, np.nan, 1.0, 0.0, np.nan, 1.0, -3.0]) + maxtrue = np.array([-2.0, 1.0, np.nan, 1.0, np.nan, np.nan, np.inf, -3.0]) + out = np.ones(8) + out_maxtrue = np.array([-2.0, 1.0, 1.0, 10.0, 1.0, 1.0, np.nan, 1.0]) + assert_equal(np.maximum(arr1,arr2), maxtrue) + assert_equal(np.maximum(arr1[::2],arr2[::2]), maxtrue[::2]) + assert_equal(np.maximum(arr1[:4:], arr2[::2]), np.array([-2.0, np.nan, 10.0, 1.0])) + assert_equal(np.maximum(arr1[::3], arr2[:3:]), np.array([-2.0, 0.0, np.nan])) + assert_equal(np.maximum(arr1[:6:2], arr2[::3], out=out[::3]), np.array([-2.0, 10., np.nan])) + assert_equal(out, out_maxtrue) + + +class TestMinimum(_FilterInvalids): + def test_reduce(self): + dflt = np.typecodes['AllFloat'] + dint = np.typecodes['AllInteger'] + seq1 = np.arange(11) + seq2 = seq1[::-1] + func = np.minimum.reduce + for dt in dint: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 0) + assert_equal(func(tmp2), 0) + for dt in dflt: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 0) + assert_equal(func(tmp2), 0) + tmp1[::2] = np.nan + tmp2[::2] = np.nan + assert_equal(func(tmp1), np.nan) + assert_equal(func(tmp2), np.nan) + + def test_reduce_complex(self): + assert_equal(np.minimum.reduce([1, 2j]), 2j) + assert_equal(np.minimum.reduce([1+3j, 2j]), 2j) + + def test_float_nans(self): + nan = np.nan + arg1 = np.array([0, nan, nan]) + arg2 = np.array([nan, 0, nan]) + out = np.array([nan, nan, nan]) + assert_equal(np.minimum(arg1, arg2), out) + + def test_object_nans(self): + # Multiple checks to give this a chance to + # fail if cmp is used instead of rich compare. + # Failure cannot be guaranteed. + for i in range(1): + x = np.array(float('nan'), object) + y = 1.0 + z = np.array(float('nan'), object) + assert_(np.minimum(x, y) == 1.0) + assert_(np.minimum(z, y) == 1.0) + + def test_complex_nans(self): + nan = np.nan + for cnan in [complex(nan, 0), complex(0, nan), complex(nan, nan)]: + arg1 = np.array([0, cnan, cnan], dtype=complex) + arg2 = np.array([cnan, 0, cnan], dtype=complex) + out = np.array([nan, nan, nan], dtype=complex) + assert_equal(np.minimum(arg1, arg2), out) + + def test_object_array(self): + arg1 = np.arange(5, dtype=object) + arg2 = arg1 + 1 + assert_equal(np.minimum(arg1, arg2), arg1) + + def test_strided_array(self): + arr1 = np.array([-4.0, 1.0, 10.0, 0.0, np.nan, -np.nan, np.inf, -np.inf]) + arr2 = np.array([-2.0,-1.0, np.nan, 1.0, 0.0, np.nan, 1.0, -3.0]) + mintrue = np.array([-4.0, -1.0, np.nan, 0.0, np.nan, np.nan, 1.0, -np.inf]) + out = np.ones(8) + out_mintrue = np.array([-4.0, 1.0, 1.0, 1.0, 1.0, 1.0, np.nan, 1.0]) + assert_equal(np.minimum(arr1,arr2), mintrue) + assert_equal(np.minimum(arr1[::2],arr2[::2]), mintrue[::2]) + assert_equal(np.minimum(arr1[:4:], arr2[::2]), np.array([-4.0, np.nan, 0.0, 0.0])) + assert_equal(np.minimum(arr1[::3], arr2[:3:]), np.array([-4.0, -1.0, np.nan])) + assert_equal(np.minimum(arr1[:6:2], arr2[::3], out=out[::3]), np.array([-4.0, 1.0, np.nan])) + assert_equal(out, out_mintrue) + +class TestFmax(_FilterInvalids): + def test_reduce(self): + dflt = np.typecodes['AllFloat'] + dint = np.typecodes['AllInteger'] + seq1 = np.arange(11) + seq2 = seq1[::-1] + func = np.fmax.reduce + for dt in dint: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 10) + assert_equal(func(tmp2), 10) + for dt in dflt: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 10) + assert_equal(func(tmp2), 10) + tmp1[::2] = np.nan + tmp2[::2] = np.nan + assert_equal(func(tmp1), 9) + assert_equal(func(tmp2), 9) + + def test_reduce_complex(self): + assert_equal(np.fmax.reduce([1, 2j]), 1) + assert_equal(np.fmax.reduce([1+3j, 2j]), 1+3j) + + def test_float_nans(self): + nan = np.nan + arg1 = np.array([0, nan, nan]) + arg2 = np.array([nan, 0, nan]) + out = np.array([0, 0, nan]) + assert_equal(np.fmax(arg1, arg2), out) + + def test_complex_nans(self): + nan = np.nan + for cnan in [complex(nan, 0), complex(0, nan), complex(nan, nan)]: + arg1 = np.array([0, cnan, cnan], dtype=complex) + arg2 = np.array([cnan, 0, cnan], dtype=complex) + out = np.array([0, 0, nan], dtype=complex) + assert_equal(np.fmax(arg1, arg2), out) + + +class TestFmin(_FilterInvalids): + def test_reduce(self): + dflt = np.typecodes['AllFloat'] + dint = np.typecodes['AllInteger'] + seq1 = np.arange(11) + seq2 = seq1[::-1] + func = np.fmin.reduce + for dt in dint: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 0) + assert_equal(func(tmp2), 0) + for dt in dflt: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 0) + assert_equal(func(tmp2), 0) + tmp1[::2] = np.nan + tmp2[::2] = np.nan + assert_equal(func(tmp1), 1) + assert_equal(func(tmp2), 1) + + def test_reduce_complex(self): + assert_equal(np.fmin.reduce([1, 2j]), 2j) + assert_equal(np.fmin.reduce([1+3j, 2j]), 2j) + + def test_float_nans(self): + nan = np.nan + arg1 = np.array([0, nan, nan]) + arg2 = np.array([nan, 0, nan]) + out = np.array([0, 0, nan]) + assert_equal(np.fmin(arg1, arg2), out) + + def test_complex_nans(self): + nan = np.nan + for cnan in [complex(nan, 0), complex(0, nan), complex(nan, nan)]: + arg1 = np.array([0, cnan, cnan], dtype=complex) + arg2 = np.array([cnan, 0, cnan], dtype=complex) + out = np.array([0, 0, nan], dtype=complex) + assert_equal(np.fmin(arg1, arg2), out) + + +class TestBool: + def test_exceptions(self): + a = np.ones(1, dtype=np.bool_) + assert_raises(TypeError, np.negative, a) + assert_raises(TypeError, np.positive, a) + assert_raises(TypeError, np.subtract, a, a) + + def test_truth_table_logical(self): + # 2, 3 and 4 serves as true values + input1 = [0, 0, 3, 2] + input2 = [0, 4, 0, 2] + + typecodes = (np.typecodes['AllFloat'] + + np.typecodes['AllInteger'] + + '?') # boolean + for dtype in map(np.dtype, typecodes): + arg1 = np.asarray(input1, dtype=dtype) + arg2 = np.asarray(input2, dtype=dtype) + + # OR + out = [False, True, True, True] + for func in (np.logical_or, np.maximum): + assert_equal(func(arg1, arg2).astype(bool), out) + # AND + out = [False, False, False, True] + for func in (np.logical_and, np.minimum): + assert_equal(func(arg1, arg2).astype(bool), out) + # XOR + out = [False, True, True, False] + for func in (np.logical_xor, np.not_equal): + assert_equal(func(arg1, arg2).astype(bool), out) + + def test_truth_table_bitwise(self): + arg1 = [False, False, True, True] + arg2 = [False, True, False, True] + + out = [False, True, True, True] + assert_equal(np.bitwise_or(arg1, arg2), out) + + out = [False, False, False, True] + assert_equal(np.bitwise_and(arg1, arg2), out) + + out = [False, True, True, False] + assert_equal(np.bitwise_xor(arg1, arg2), out) + + def test_reduce(self): + none = np.array([0, 0, 0, 0], bool) + some = np.array([1, 0, 1, 1], bool) + every = np.array([1, 1, 1, 1], bool) + empty = np.array([], bool) + + arrs = [none, some, every, empty] + + for arr in arrs: + assert_equal(np.logical_and.reduce(arr), all(arr)) + + for arr in arrs: + assert_equal(np.logical_or.reduce(arr), any(arr)) + + for arr in arrs: + assert_equal(np.logical_xor.reduce(arr), arr.sum() % 2 == 1) + + +class TestBitwiseUFuncs: + + bitwise_types = [np.dtype(c) for c in '?' + 'bBhHiIlLqQ' + 'O'] + + def test_values(self): + for dt in self.bitwise_types: + zeros = np.array([0], dtype=dt) + ones = np.array([-1], dtype=dt) + msg = "dt = '%s'" % dt.char + + assert_equal(np.bitwise_not(zeros), ones, err_msg=msg) + assert_equal(np.bitwise_not(ones), zeros, err_msg=msg) + + assert_equal(np.bitwise_or(zeros, zeros), zeros, err_msg=msg) + assert_equal(np.bitwise_or(zeros, ones), ones, err_msg=msg) + assert_equal(np.bitwise_or(ones, zeros), ones, err_msg=msg) + assert_equal(np.bitwise_or(ones, ones), ones, err_msg=msg) + + assert_equal(np.bitwise_xor(zeros, zeros), zeros, err_msg=msg) + assert_equal(np.bitwise_xor(zeros, ones), ones, err_msg=msg) + assert_equal(np.bitwise_xor(ones, zeros), ones, err_msg=msg) + assert_equal(np.bitwise_xor(ones, ones), zeros, err_msg=msg) + + assert_equal(np.bitwise_and(zeros, zeros), zeros, err_msg=msg) + assert_equal(np.bitwise_and(zeros, ones), zeros, err_msg=msg) + assert_equal(np.bitwise_and(ones, zeros), zeros, err_msg=msg) + assert_equal(np.bitwise_and(ones, ones), ones, err_msg=msg) + + def test_types(self): + for dt in self.bitwise_types: + zeros = np.array([0], dtype=dt) + ones = np.array([-1], dtype=dt) + msg = "dt = '%s'" % dt.char + + assert_(np.bitwise_not(zeros).dtype == dt, msg) + assert_(np.bitwise_or(zeros, zeros).dtype == dt, msg) + assert_(np.bitwise_xor(zeros, zeros).dtype == dt, msg) + assert_(np.bitwise_and(zeros, zeros).dtype == dt, msg) + + def test_identity(self): + assert_(np.bitwise_or.identity == 0, 'bitwise_or') + assert_(np.bitwise_xor.identity == 0, 'bitwise_xor') + assert_(np.bitwise_and.identity == -1, 'bitwise_and') + + def test_reduction(self): + binary_funcs = (np.bitwise_or, np.bitwise_xor, np.bitwise_and) + + for dt in self.bitwise_types: + zeros = np.array([0], dtype=dt) + ones = np.array([-1], dtype=dt) + for f in binary_funcs: + msg = "dt: '%s', f: '%s'" % (dt, f) + assert_equal(f.reduce(zeros), zeros, err_msg=msg) + assert_equal(f.reduce(ones), ones, err_msg=msg) + + # Test empty reduction, no object dtype + for dt in self.bitwise_types[:-1]: + # No object array types + empty = np.array([], dtype=dt) + for f in binary_funcs: + msg = "dt: '%s', f: '%s'" % (dt, f) + tgt = np.array(f.identity, dtype=dt) + res = f.reduce(empty) + assert_equal(res, tgt, err_msg=msg) + assert_(res.dtype == tgt.dtype, msg) + + # Empty object arrays use the identity. Note that the types may + # differ, the actual type used is determined by the assign_identity + # function and is not the same as the type returned by the identity + # method. + for f in binary_funcs: + msg = "dt: '%s'" % (f,) + empty = np.array([], dtype=object) + tgt = f.identity + res = f.reduce(empty) + assert_equal(res, tgt, err_msg=msg) + + # Non-empty object arrays do not use the identity + for f in binary_funcs: + msg = "dt: '%s'" % (f,) + btype = np.array([True], dtype=object) + assert_(type(f.reduce(btype)) is bool, msg) + + +class TestInt: + def test_logical_not(self): + x = np.ones(10, dtype=np.int16) + o = np.ones(10 * 2, dtype=bool) + tgt = o.copy() + tgt[::2] = False + os = o[::2] + assert_array_equal(np.logical_not(x, out=os), False) + assert_array_equal(o, tgt) + + +class TestFloatingPoint: + def test_floating_point(self): + assert_equal(ncu.FLOATING_POINT_SUPPORT, 1) + + +class TestDegrees: + def test_degrees(self): + assert_almost_equal(ncu.degrees(np.pi), 180.0) + assert_almost_equal(ncu.degrees(-0.5*np.pi), -90.0) + + +class TestRadians: + def test_radians(self): + assert_almost_equal(ncu.radians(180.0), np.pi) + assert_almost_equal(ncu.radians(-90.0), -0.5*np.pi) + + +class TestHeavside: + def test_heaviside(self): + x = np.array([[-30.0, -0.1, 0.0, 0.2], [7.5, np.nan, np.inf, -np.inf]]) + expectedhalf = np.array([[0.0, 0.0, 0.5, 1.0], [1.0, np.nan, 1.0, 0.0]]) + expected1 = expectedhalf.copy() + expected1[0, 2] = 1 + + h = ncu.heaviside(x, 0.5) + assert_equal(h, expectedhalf) + + h = ncu.heaviside(x, 1.0) + assert_equal(h, expected1) + + x = x.astype(np.float32) + + h = ncu.heaviside(x, np.float32(0.5)) + assert_equal(h, expectedhalf.astype(np.float32)) + + h = ncu.heaviside(x, np.float32(1.0)) + assert_equal(h, expected1.astype(np.float32)) + + +class TestSign: + def test_sign(self): + a = np.array([np.inf, -np.inf, np.nan, 0.0, 3.0, -3.0]) + out = np.zeros(a.shape) + tgt = np.array([1., -1., np.nan, 0.0, 1.0, -1.0]) + + with np.errstate(invalid='ignore'): + res = ncu.sign(a) + assert_equal(res, tgt) + res = ncu.sign(a, out) + assert_equal(res, tgt) + assert_equal(out, tgt) + + def test_sign_dtype_object(self): + # In reference to github issue #6229 + + foo = np.array([-.1, 0, .1]) + a = np.sign(foo.astype(object)) + b = np.sign(foo) + + assert_array_equal(a, b) + + def test_sign_dtype_nan_object(self): + # In reference to github issue #6229 + def test_nan(): + foo = np.array([np.nan]) + # FIXME: a not used + a = np.sign(foo.astype(object)) + + assert_raises(TypeError, test_nan) + +class TestMinMax: + def test_minmax_blocked(self): + # simd tests on max/min, test all alignments, slow but important + # for 2 * vz + 2 * (vs - 1) + 1 (unrolled once) + for dt, sz in [(np.float32, 15), (np.float64, 7)]: + for out, inp, msg in _gen_alignment_data(dtype=dt, type='unary', + max_size=sz): + for i in range(inp.size): + inp[:] = np.arange(inp.size, dtype=dt) + inp[i] = np.nan + emsg = lambda: '%r\n%s' % (inp, msg) + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, + "invalid value encountered in reduce") + assert_(np.isnan(inp.max()), msg=emsg) + assert_(np.isnan(inp.min()), msg=emsg) + + inp[i] = 1e10 + assert_equal(inp.max(), 1e10, err_msg=msg) + inp[i] = -1e10 + assert_equal(inp.min(), -1e10, err_msg=msg) + + def test_lower_align(self): + # check data that is not aligned to element size + # i.e doubles are aligned to 4 bytes on i386 + d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64) + assert_equal(d.max(), d[0]) + assert_equal(d.min(), d[0]) + + def test_reduce_reorder(self): + # gh 10370, 11029 Some compilers reorder the call to npy_getfloatstatus + # and put it before the call to an intrisic function that causes + # invalid status to be set. Also make sure warnings are not emitted + for n in (2, 4, 8, 16, 32): + for dt in (np.float32, np.float16, np.complex64): + for r in np.diagflat(np.array([np.nan] * n, dtype=dt)): + assert_equal(np.min(r), np.nan) + + def test_minimize_no_warns(self): + a = np.minimum(np.nan, 1) + assert_equal(a, np.nan) + + +class TestAbsoluteNegative: + def test_abs_neg_blocked(self): + # simd tests on abs, test all alignments for vz + 2 * (vs - 1) + 1 + for dt, sz in [(np.float32, 11), (np.float64, 5)]: + for out, inp, msg in _gen_alignment_data(dtype=dt, type='unary', + max_size=sz): + tgt = [ncu.absolute(i) for i in inp] + np.absolute(inp, out=out) + assert_equal(out, tgt, err_msg=msg) + assert_((out >= 0).all()) + + tgt = [-1*(i) for i in inp] + np.negative(inp, out=out) + assert_equal(out, tgt, err_msg=msg) + + for v in [np.nan, -np.inf, np.inf]: + for i in range(inp.size): + d = np.arange(inp.size, dtype=dt) + inp[:] = -d + inp[i] = v + d[i] = -v if v == -np.inf else v + assert_array_equal(np.abs(inp), d, err_msg=msg) + np.abs(inp, out=out) + assert_array_equal(out, d, err_msg=msg) + + assert_array_equal(-inp, -1*inp, err_msg=msg) + d = -1 * inp + np.negative(inp, out=out) + assert_array_equal(out, d, err_msg=msg) + + def test_lower_align(self): + # check data that is not aligned to element size + # i.e doubles are aligned to 4 bytes on i386 + d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64) + assert_equal(np.abs(d), d) + assert_equal(np.negative(d), -d) + np.negative(d, out=d) + np.negative(np.ones_like(d), out=d) + np.abs(d, out=d) + np.abs(np.ones_like(d), out=d) + + +class TestPositive: + def test_valid(self): + valid_dtypes = [int, float, complex, object] + for dtype in valid_dtypes: + x = np.arange(5, dtype=dtype) + result = np.positive(x) + assert_equal(x, result, err_msg=str(dtype)) + + def test_invalid(self): + with assert_raises(TypeError): + np.positive(True) + with assert_raises(TypeError): + np.positive(np.datetime64('2000-01-01')) + with assert_raises(TypeError): + np.positive(np.array(['foo'], dtype=str)) + with assert_raises(TypeError): + np.positive(np.array(['bar'], dtype=object)) + + +class TestSpecialMethods: + def test_wrap(self): + + class with_wrap: + def __array__(self): + return np.zeros(1) + + def __array_wrap__(self, arr, context): + r = with_wrap() + r.arr = arr + r.context = context + return r + + a = with_wrap() + x = ncu.minimum(a, a) + assert_equal(x.arr, np.zeros(1)) + func, args, i = x.context + assert_(func is ncu.minimum) + assert_equal(len(args), 2) + assert_equal(args[0], a) + assert_equal(args[1], a) + assert_equal(i, 0) + + def test_wrap_and_prepare_out(self): + # Calling convention for out should not affect how special methods are + # called + + class StoreArrayPrepareWrap(np.ndarray): + _wrap_args = None + _prepare_args = None + def __new__(cls): + return np.zeros(()).view(cls) + def __array_wrap__(self, obj, context): + self._wrap_args = context[1] + return obj + def __array_prepare__(self, obj, context): + self._prepare_args = context[1] + return obj + @property + def args(self): + # We need to ensure these are fetched at the same time, before + # any other ufuncs are called by the assertions + return (self._prepare_args, self._wrap_args) + def __repr__(self): + return "a" # for short test output + + def do_test(f_call, f_expected): + a = StoreArrayPrepareWrap() + f_call(a) + p, w = a.args + expected = f_expected(a) + try: + assert_equal(p, expected) + assert_equal(w, expected) + except AssertionError as e: + # assert_equal produces truly useless error messages + raise AssertionError("\n".join([ + "Bad arguments passed in ufunc call", + " expected: {}".format(expected), + " __array_prepare__ got: {}".format(p), + " __array_wrap__ got: {}".format(w) + ])) + + # method not on the out argument + do_test(lambda a: np.add(a, 0), lambda a: (a, 0)) + do_test(lambda a: np.add(a, 0, None), lambda a: (a, 0)) + do_test(lambda a: np.add(a, 0, out=None), lambda a: (a, 0)) + do_test(lambda a: np.add(a, 0, out=(None,)), lambda a: (a, 0)) + + # method on the out argument + do_test(lambda a: np.add(0, 0, a), lambda a: (0, 0, a)) + do_test(lambda a: np.add(0, 0, out=a), lambda a: (0, 0, a)) + do_test(lambda a: np.add(0, 0, out=(a,)), lambda a: (0, 0, a)) + + # Also check the where mask handling: + do_test(lambda a: np.add(a, 0, where=False), lambda a: (a, 0)) + do_test(lambda a: np.add(0, 0, a, where=False), lambda a: (0, 0, a)) + + def test_wrap_with_iterable(self): + # test fix for bug #1026: + + class with_wrap(np.ndarray): + __array_priority__ = 10 + + def __new__(cls): + return np.asarray(1).view(cls).copy() + + def __array_wrap__(self, arr, context): + return arr.view(type(self)) + + a = with_wrap() + x = ncu.multiply(a, (1, 2, 3)) + assert_(isinstance(x, with_wrap)) + assert_array_equal(x, np.array((1, 2, 3))) + + def test_priority_with_scalar(self): + # test fix for bug #826: + + class A(np.ndarray): + __array_priority__ = 10 + + def __new__(cls): + return np.asarray(1.0, 'float64').view(cls).copy() + + a = A() + x = np.float64(1)*a + assert_(isinstance(x, A)) + assert_array_equal(x, np.array(1)) + + def test_old_wrap(self): + + class with_wrap: + def __array__(self): + return np.zeros(1) + + def __array_wrap__(self, arr): + r = with_wrap() + r.arr = arr + return r + + a = with_wrap() + x = ncu.minimum(a, a) + assert_equal(x.arr, np.zeros(1)) + + def test_priority(self): + + class A: + def __array__(self): + return np.zeros(1) + + def __array_wrap__(self, arr, context): + r = type(self)() + r.arr = arr + r.context = context + return r + + class B(A): + __array_priority__ = 20. + + class C(A): + __array_priority__ = 40. + + x = np.zeros(1) + a = A() + b = B() + c = C() + f = ncu.minimum + assert_(type(f(x, x)) is np.ndarray) + assert_(type(f(x, a)) is A) + assert_(type(f(x, b)) is B) + assert_(type(f(x, c)) is C) + assert_(type(f(a, x)) is A) + assert_(type(f(b, x)) is B) + assert_(type(f(c, x)) is C) + + assert_(type(f(a, a)) is A) + assert_(type(f(a, b)) is B) + assert_(type(f(b, a)) is B) + assert_(type(f(b, b)) is B) + assert_(type(f(b, c)) is C) + assert_(type(f(c, b)) is C) + assert_(type(f(c, c)) is C) + + assert_(type(ncu.exp(a) is A)) + assert_(type(ncu.exp(b) is B)) + assert_(type(ncu.exp(c) is C)) + + def test_failing_wrap(self): + + class A: + def __array__(self): + return np.zeros(2) + + def __array_wrap__(self, arr, context): + raise RuntimeError + + a = A() + assert_raises(RuntimeError, ncu.maximum, a, a) + assert_raises(RuntimeError, ncu.maximum.reduce, a) + + def test_failing_out_wrap(self): + + singleton = np.array([1.0]) + + class Ok(np.ndarray): + def __array_wrap__(self, obj): + return singleton + + class Bad(np.ndarray): + def __array_wrap__(self, obj): + raise RuntimeError + + ok = np.empty(1).view(Ok) + bad = np.empty(1).view(Bad) + # double-free (segfault) of "ok" if "bad" raises an exception + for i in range(10): + assert_raises(RuntimeError, ncu.frexp, 1, ok, bad) + + def test_none_wrap(self): + # Tests that issue #8507 is resolved. Previously, this would segfault + + class A: + def __array__(self): + return np.zeros(1) + + def __array_wrap__(self, arr, context=None): + return None + + a = A() + assert_equal(ncu.maximum(a, a), None) + + def test_default_prepare(self): + + class with_wrap: + __array_priority__ = 10 + + def __array__(self): + return np.zeros(1) + + def __array_wrap__(self, arr, context): + return arr + + a = with_wrap() + x = ncu.minimum(a, a) + assert_equal(x, np.zeros(1)) + assert_equal(type(x), np.ndarray) + + @pytest.mark.parametrize("use_where", [True, False]) + def test_prepare(self, use_where): + + class with_prepare(np.ndarray): + __array_priority__ = 10 + + def __array_prepare__(self, arr, context): + # make sure we can return a new + return np.array(arr).view(type=with_prepare) + + a = np.array(1).view(type=with_prepare) + if use_where: + x = np.add(a, a, where=np.array(True)) + else: + x = np.add(a, a) + assert_equal(x, np.array(2)) + assert_equal(type(x), with_prepare) + + @pytest.mark.parametrize("use_where", [True, False]) + def test_prepare_out(self, use_where): + + class with_prepare(np.ndarray): + __array_priority__ = 10 + + def __array_prepare__(self, arr, context): + return np.array(arr).view(type=with_prepare) + + a = np.array([1]).view(type=with_prepare) + if use_where: + x = np.add(a, a, a, where=[True]) + else: + x = np.add(a, a, a) + # Returned array is new, because of the strange + # __array_prepare__ above + assert_(not np.shares_memory(x, a)) + assert_equal(x, np.array([2])) + assert_equal(type(x), with_prepare) + + def test_failing_prepare(self): + + class A: + def __array__(self): + return np.zeros(1) + + def __array_prepare__(self, arr, context=None): + raise RuntimeError + + a = A() + assert_raises(RuntimeError, ncu.maximum, a, a) + assert_raises(RuntimeError, ncu.maximum, a, a, where=False) + + def test_array_too_many_args(self): + + class A: + def __array__(self, dtype, context): + return np.zeros(1) + + a = A() + assert_raises_regex(TypeError, '2 required positional', np.sum, a) + + def test_ufunc_override(self): + # check override works even with instance with high priority. + class A: + def __array_ufunc__(self, func, method, *inputs, **kwargs): + return self, func, method, inputs, kwargs + + class MyNDArray(np.ndarray): + __array_priority__ = 100 + + a = A() + b = np.array([1]).view(MyNDArray) + res0 = np.multiply(a, b) + res1 = np.multiply(b, b, out=a) + + # self + assert_equal(res0[0], a) + assert_equal(res1[0], a) + assert_equal(res0[1], np.multiply) + assert_equal(res1[1], np.multiply) + assert_equal(res0[2], '__call__') + assert_equal(res1[2], '__call__') + assert_equal(res0[3], (a, b)) + assert_equal(res1[3], (b, b)) + assert_equal(res0[4], {}) + assert_equal(res1[4], {'out': (a,)}) + + def test_ufunc_override_mro(self): + + # Some multi arg functions for testing. + def tres_mul(a, b, c): + return a * b * c + + def quatro_mul(a, b, c, d): + return a * b * c * d + + # Make these into ufuncs. + three_mul_ufunc = np.frompyfunc(tres_mul, 3, 1) + four_mul_ufunc = np.frompyfunc(quatro_mul, 4, 1) + + class A: + def __array_ufunc__(self, func, method, *inputs, **kwargs): + return "A" + + class ASub(A): + def __array_ufunc__(self, func, method, *inputs, **kwargs): + return "ASub" + + class B: + def __array_ufunc__(self, func, method, *inputs, **kwargs): + return "B" + + class C: + def __init__(self): + self.count = 0 + + def __array_ufunc__(self, func, method, *inputs, **kwargs): + self.count += 1 + return NotImplemented + + class CSub(C): + def __array_ufunc__(self, func, method, *inputs, **kwargs): + self.count += 1 + return NotImplemented + + a = A() + a_sub = ASub() + b = B() + c = C() + + # Standard + res = np.multiply(a, a_sub) + assert_equal(res, "ASub") + res = np.multiply(a_sub, b) + assert_equal(res, "ASub") + + # With 1 NotImplemented + res = np.multiply(c, a) + assert_equal(res, "A") + assert_equal(c.count, 1) + # Check our counter works, so we can trust tests below. + res = np.multiply(c, a) + assert_equal(c.count, 2) + + # Both NotImplemented. + c = C() + c_sub = CSub() + assert_raises(TypeError, np.multiply, c, c_sub) + assert_equal(c.count, 1) + assert_equal(c_sub.count, 1) + c.count = c_sub.count = 0 + assert_raises(TypeError, np.multiply, c_sub, c) + assert_equal(c.count, 1) + assert_equal(c_sub.count, 1) + c.count = 0 + assert_raises(TypeError, np.multiply, c, c) + assert_equal(c.count, 1) + c.count = 0 + assert_raises(TypeError, np.multiply, 2, c) + assert_equal(c.count, 1) + + # Ternary testing. + assert_equal(three_mul_ufunc(a, 1, 2), "A") + assert_equal(three_mul_ufunc(1, a, 2), "A") + assert_equal(three_mul_ufunc(1, 2, a), "A") + + assert_equal(three_mul_ufunc(a, a, 6), "A") + assert_equal(three_mul_ufunc(a, 2, a), "A") + assert_equal(three_mul_ufunc(a, 2, b), "A") + assert_equal(three_mul_ufunc(a, 2, a_sub), "ASub") + assert_equal(three_mul_ufunc(a, a_sub, 3), "ASub") + c.count = 0 + assert_equal(three_mul_ufunc(c, a_sub, 3), "ASub") + assert_equal(c.count, 1) + c.count = 0 + assert_equal(three_mul_ufunc(1, a_sub, c), "ASub") + assert_equal(c.count, 0) + + c.count = 0 + assert_equal(three_mul_ufunc(a, b, c), "A") + assert_equal(c.count, 0) + c_sub.count = 0 + assert_equal(three_mul_ufunc(a, b, c_sub), "A") + assert_equal(c_sub.count, 0) + assert_equal(three_mul_ufunc(1, 2, b), "B") + + assert_raises(TypeError, three_mul_ufunc, 1, 2, c) + assert_raises(TypeError, three_mul_ufunc, c_sub, 2, c) + assert_raises(TypeError, three_mul_ufunc, c_sub, 2, 3) + + # Quaternary testing. + assert_equal(four_mul_ufunc(a, 1, 2, 3), "A") + assert_equal(four_mul_ufunc(1, a, 2, 3), "A") + assert_equal(four_mul_ufunc(1, 1, a, 3), "A") + assert_equal(four_mul_ufunc(1, 1, 2, a), "A") + + assert_equal(four_mul_ufunc(a, b, 2, 3), "A") + assert_equal(four_mul_ufunc(1, a, 2, b), "A") + assert_equal(four_mul_ufunc(b, 1, a, 3), "B") + assert_equal(four_mul_ufunc(a_sub, 1, 2, a), "ASub") + assert_equal(four_mul_ufunc(a, 1, 2, a_sub), "ASub") + + c = C() + c_sub = CSub() + assert_raises(TypeError, four_mul_ufunc, 1, 2, 3, c) + assert_equal(c.count, 1) + c.count = 0 + assert_raises(TypeError, four_mul_ufunc, 1, 2, c_sub, c) + assert_equal(c_sub.count, 1) + assert_equal(c.count, 1) + c2 = C() + c.count = c_sub.count = 0 + assert_raises(TypeError, four_mul_ufunc, 1, c, c_sub, c2) + assert_equal(c_sub.count, 1) + assert_equal(c.count, 1) + assert_equal(c2.count, 0) + c.count = c2.count = c_sub.count = 0 + assert_raises(TypeError, four_mul_ufunc, c2, c, c_sub, c) + assert_equal(c_sub.count, 1) + assert_equal(c.count, 0) + assert_equal(c2.count, 1) + + def test_ufunc_override_methods(self): + + class A: + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + return self, ufunc, method, inputs, kwargs + + # __call__ + a = A() + with assert_raises(TypeError): + np.multiply.__call__(1, a, foo='bar', answer=42) + res = np.multiply.__call__(1, a, subok='bar', where=42) + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], '__call__') + assert_equal(res[3], (1, a)) + assert_equal(res[4], {'subok': 'bar', 'where': 42}) + + # __call__, wrong args + assert_raises(TypeError, np.multiply, a) + assert_raises(TypeError, np.multiply, a, a, a, a) + assert_raises(TypeError, np.multiply, a, a, sig='a', signature='a') + assert_raises(TypeError, ncu_tests.inner1d, a, a, axis=0, axes=[0, 0]) + + # reduce, positional args + res = np.multiply.reduce(a, 'axis0', 'dtype0', 'out0', 'keep0') + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'reduce') + assert_equal(res[3], (a,)) + assert_equal(res[4], {'dtype':'dtype0', + 'out': ('out0',), + 'keepdims': 'keep0', + 'axis': 'axis0'}) + + # reduce, kwargs + res = np.multiply.reduce(a, axis='axis0', dtype='dtype0', out='out0', + keepdims='keep0', initial='init0', + where='where0') + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'reduce') + assert_equal(res[3], (a,)) + assert_equal(res[4], {'dtype':'dtype0', + 'out': ('out0',), + 'keepdims': 'keep0', + 'axis': 'axis0', + 'initial': 'init0', + 'where': 'where0'}) + + # reduce, output equal to None removed, but not other explicit ones, + # even if they are at their default value. + res = np.multiply.reduce(a, 0, None, None, False) + assert_equal(res[4], {'axis': 0, 'dtype': None, 'keepdims': False}) + res = np.multiply.reduce(a, out=None, axis=0, keepdims=True) + assert_equal(res[4], {'axis': 0, 'keepdims': True}) + res = np.multiply.reduce(a, None, out=(None,), dtype=None) + assert_equal(res[4], {'axis': None, 'dtype': None}) + res = np.multiply.reduce(a, 0, None, None, False, 2, True) + assert_equal(res[4], {'axis': 0, 'dtype': None, 'keepdims': False, + 'initial': 2, 'where': True}) + # np._NoValue ignored for initial + res = np.multiply.reduce(a, 0, None, None, False, + np._NoValue, True) + assert_equal(res[4], {'axis': 0, 'dtype': None, 'keepdims': False, + 'where': True}) + # None kept for initial, True for where. + res = np.multiply.reduce(a, 0, None, None, False, None, True) + assert_equal(res[4], {'axis': 0, 'dtype': None, 'keepdims': False, + 'initial': None, 'where': True}) + + # reduce, wrong args + assert_raises(ValueError, np.multiply.reduce, a, out=()) + assert_raises(ValueError, np.multiply.reduce, a, out=('out0', 'out1')) + assert_raises(TypeError, np.multiply.reduce, a, 'axis0', axis='axis0') + + # accumulate, pos args + res = np.multiply.accumulate(a, 'axis0', 'dtype0', 'out0') + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'accumulate') + assert_equal(res[3], (a,)) + assert_equal(res[4], {'dtype':'dtype0', + 'out': ('out0',), + 'axis': 'axis0'}) + + # accumulate, kwargs + res = np.multiply.accumulate(a, axis='axis0', dtype='dtype0', + out='out0') + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'accumulate') + assert_equal(res[3], (a,)) + assert_equal(res[4], {'dtype':'dtype0', + 'out': ('out0',), + 'axis': 'axis0'}) + + # accumulate, output equal to None removed. + res = np.multiply.accumulate(a, 0, None, None) + assert_equal(res[4], {'axis': 0, 'dtype': None}) + res = np.multiply.accumulate(a, out=None, axis=0, dtype='dtype1') + assert_equal(res[4], {'axis': 0, 'dtype': 'dtype1'}) + res = np.multiply.accumulate(a, None, out=(None,), dtype=None) + assert_equal(res[4], {'axis': None, 'dtype': None}) + + # accumulate, wrong args + assert_raises(ValueError, np.multiply.accumulate, a, out=()) + assert_raises(ValueError, np.multiply.accumulate, a, + out=('out0', 'out1')) + assert_raises(TypeError, np.multiply.accumulate, a, + 'axis0', axis='axis0') + + # reduceat, pos args + res = np.multiply.reduceat(a, [4, 2], 'axis0', 'dtype0', 'out0') + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'reduceat') + assert_equal(res[3], (a, [4, 2])) + assert_equal(res[4], {'dtype':'dtype0', + 'out': ('out0',), + 'axis': 'axis0'}) + + # reduceat, kwargs + res = np.multiply.reduceat(a, [4, 2], axis='axis0', dtype='dtype0', + out='out0') + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'reduceat') + assert_equal(res[3], (a, [4, 2])) + assert_equal(res[4], {'dtype':'dtype0', + 'out': ('out0',), + 'axis': 'axis0'}) + + # reduceat, output equal to None removed. + res = np.multiply.reduceat(a, [4, 2], 0, None, None) + assert_equal(res[4], {'axis': 0, 'dtype': None}) + res = np.multiply.reduceat(a, [4, 2], axis=None, out=None, dtype='dt') + assert_equal(res[4], {'axis': None, 'dtype': 'dt'}) + res = np.multiply.reduceat(a, [4, 2], None, None, out=(None,)) + assert_equal(res[4], {'axis': None, 'dtype': None}) + + # reduceat, wrong args + assert_raises(ValueError, np.multiply.reduce, a, [4, 2], out=()) + assert_raises(ValueError, np.multiply.reduce, a, [4, 2], + out=('out0', 'out1')) + assert_raises(TypeError, np.multiply.reduce, a, [4, 2], + 'axis0', axis='axis0') + + # outer + res = np.multiply.outer(a, 42) + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'outer') + assert_equal(res[3], (a, 42)) + assert_equal(res[4], {}) + + # outer, wrong args + assert_raises(TypeError, np.multiply.outer, a) + assert_raises(TypeError, np.multiply.outer, a, a, a, a) + assert_raises(TypeError, np.multiply.outer, a, a, sig='a', signature='a') + + # at + res = np.multiply.at(a, [4, 2], 'b0') + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'at') + assert_equal(res[3], (a, [4, 2], 'b0')) + + # at, wrong args + assert_raises(TypeError, np.multiply.at, a) + assert_raises(TypeError, np.multiply.at, a, a, a, a) + + def test_ufunc_override_out(self): + + class A: + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + return kwargs + + class B: + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + return kwargs + + a = A() + b = B() + res0 = np.multiply(a, b, 'out_arg') + res1 = np.multiply(a, b, out='out_arg') + res2 = np.multiply(2, b, 'out_arg') + res3 = np.multiply(3, b, out='out_arg') + res4 = np.multiply(a, 4, 'out_arg') + res5 = np.multiply(a, 5, out='out_arg') + + assert_equal(res0['out'][0], 'out_arg') + assert_equal(res1['out'][0], 'out_arg') + assert_equal(res2['out'][0], 'out_arg') + assert_equal(res3['out'][0], 'out_arg') + assert_equal(res4['out'][0], 'out_arg') + assert_equal(res5['out'][0], 'out_arg') + + # ufuncs with multiple output modf and frexp. + res6 = np.modf(a, 'out0', 'out1') + res7 = np.frexp(a, 'out0', 'out1') + assert_equal(res6['out'][0], 'out0') + assert_equal(res6['out'][1], 'out1') + assert_equal(res7['out'][0], 'out0') + assert_equal(res7['out'][1], 'out1') + + # While we're at it, check that default output is never passed on. + assert_(np.sin(a, None) == {}) + assert_(np.sin(a, out=None) == {}) + assert_(np.sin(a, out=(None,)) == {}) + assert_(np.modf(a, None) == {}) + assert_(np.modf(a, None, None) == {}) + assert_(np.modf(a, out=(None, None)) == {}) + with assert_raises(TypeError): + # Out argument must be tuple, since there are multiple outputs. + np.modf(a, out=None) + + # don't give positional and output argument, or too many arguments. + # wrong number of arguments in the tuple is an error too. + assert_raises(TypeError, np.multiply, a, b, 'one', out='two') + assert_raises(TypeError, np.multiply, a, b, 'one', 'two') + assert_raises(ValueError, np.multiply, a, b, out=('one', 'two')) + assert_raises(TypeError, np.multiply, a, out=()) + assert_raises(TypeError, np.modf, a, 'one', out=('two', 'three')) + assert_raises(TypeError, np.modf, a, 'one', 'two', 'three') + assert_raises(ValueError, np.modf, a, out=('one', 'two', 'three')) + assert_raises(ValueError, np.modf, a, out=('one',)) + + def test_ufunc_override_exception(self): + + class A: + def __array_ufunc__(self, *a, **kwargs): + raise ValueError("oops") + + a = A() + assert_raises(ValueError, np.negative, 1, out=a) + assert_raises(ValueError, np.negative, a) + assert_raises(ValueError, np.divide, 1., a) + + def test_ufunc_override_not_implemented(self): + + class A: + def __array_ufunc__(self, *args, **kwargs): + return NotImplemented + + msg = ("operand type(s) all returned NotImplemented from " + "__array_ufunc__(, '__call__', <*>): 'A'") + with assert_raises_regex(TypeError, fnmatch.translate(msg)): + np.negative(A()) + + msg = ("operand type(s) all returned NotImplemented from " + "__array_ufunc__(, '__call__', <*>, , " + "out=(1,)): 'A', 'object', 'int'") + with assert_raises_regex(TypeError, fnmatch.translate(msg)): + np.add(A(), object(), out=1) + + def test_ufunc_override_disabled(self): + + class OptOut: + __array_ufunc__ = None + + opt_out = OptOut() + + # ufuncs always raise + msg = "operand 'OptOut' does not support ufuncs" + with assert_raises_regex(TypeError, msg): + np.add(opt_out, 1) + with assert_raises_regex(TypeError, msg): + np.add(1, opt_out) + with assert_raises_regex(TypeError, msg): + np.negative(opt_out) + + # opt-outs still hold even when other arguments have pathological + # __array_ufunc__ implementations + + class GreedyArray: + def __array_ufunc__(self, *args, **kwargs): + return self + + greedy = GreedyArray() + assert_(np.negative(greedy) is greedy) + with assert_raises_regex(TypeError, msg): + np.add(greedy, opt_out) + with assert_raises_regex(TypeError, msg): + np.add(greedy, 1, out=opt_out) + + def test_gufunc_override(self): + # gufunc are just ufunc instances, but follow a different path, + # so check __array_ufunc__ overrides them properly. + class A: + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + return self, ufunc, method, inputs, kwargs + + inner1d = ncu_tests.inner1d + a = A() + res = inner1d(a, a) + assert_equal(res[0], a) + assert_equal(res[1], inner1d) + assert_equal(res[2], '__call__') + assert_equal(res[3], (a, a)) + assert_equal(res[4], {}) + + res = inner1d(1, 1, out=a) + assert_equal(res[0], a) + assert_equal(res[1], inner1d) + assert_equal(res[2], '__call__') + assert_equal(res[3], (1, 1)) + assert_equal(res[4], {'out': (a,)}) + + # wrong number of arguments in the tuple is an error too. + assert_raises(TypeError, inner1d, a, out='two') + assert_raises(TypeError, inner1d, a, a, 'one', out='two') + assert_raises(TypeError, inner1d, a, a, 'one', 'two') + assert_raises(ValueError, inner1d, a, a, out=('one', 'two')) + assert_raises(ValueError, inner1d, a, a, out=()) + + def test_ufunc_override_with_super(self): + # NOTE: this class is used in doc/source/user/basics.subclassing.rst + # if you make any changes here, do update it there too. + class A(np.ndarray): + def __array_ufunc__(self, ufunc, method, *inputs, out=None, **kwargs): + args = [] + in_no = [] + for i, input_ in enumerate(inputs): + if isinstance(input_, A): + in_no.append(i) + args.append(input_.view(np.ndarray)) + else: + args.append(input_) + + outputs = out + out_no = [] + if outputs: + out_args = [] + for j, output in enumerate(outputs): + if isinstance(output, A): + out_no.append(j) + out_args.append(output.view(np.ndarray)) + else: + out_args.append(output) + kwargs['out'] = tuple(out_args) + else: + outputs = (None,) * ufunc.nout + + info = {} + if in_no: + info['inputs'] = in_no + if out_no: + info['outputs'] = out_no + + results = super().__array_ufunc__(ufunc, method, + *args, **kwargs) + if results is NotImplemented: + return NotImplemented + + if method == 'at': + if isinstance(inputs[0], A): + inputs[0].info = info + return + + if ufunc.nout == 1: + results = (results,) + + results = tuple((np.asarray(result).view(A) + if output is None else output) + for result, output in zip(results, outputs)) + if results and isinstance(results[0], A): + results[0].info = info + + return results[0] if len(results) == 1 else results + + class B: + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + if any(isinstance(input_, A) for input_ in inputs): + return "A!" + else: + return NotImplemented + + d = np.arange(5.) + # 1 input, 1 output + a = np.arange(5.).view(A) + b = np.sin(a) + check = np.sin(d) + assert_(np.all(check == b)) + assert_equal(b.info, {'inputs': [0]}) + b = np.sin(d, out=(a,)) + assert_(np.all(check == b)) + assert_equal(b.info, {'outputs': [0]}) + assert_(b is a) + a = np.arange(5.).view(A) + b = np.sin(a, out=a) + assert_(np.all(check == b)) + assert_equal(b.info, {'inputs': [0], 'outputs': [0]}) + + # 1 input, 2 outputs + a = np.arange(5.).view(A) + b1, b2 = np.modf(a) + assert_equal(b1.info, {'inputs': [0]}) + b1, b2 = np.modf(d, out=(None, a)) + assert_(b2 is a) + assert_equal(b1.info, {'outputs': [1]}) + a = np.arange(5.).view(A) + b = np.arange(5.).view(A) + c1, c2 = np.modf(a, out=(a, b)) + assert_(c1 is a) + assert_(c2 is b) + assert_equal(c1.info, {'inputs': [0], 'outputs': [0, 1]}) + + # 2 input, 1 output + a = np.arange(5.).view(A) + b = np.arange(5.).view(A) + c = np.add(a, b, out=a) + assert_(c is a) + assert_equal(c.info, {'inputs': [0, 1], 'outputs': [0]}) + # some tests with a non-ndarray subclass + a = np.arange(5.) + b = B() + assert_(a.__array_ufunc__(np.add, '__call__', a, b) is NotImplemented) + assert_(b.__array_ufunc__(np.add, '__call__', a, b) is NotImplemented) + assert_raises(TypeError, np.add, a, b) + a = a.view(A) + assert_(a.__array_ufunc__(np.add, '__call__', a, b) is NotImplemented) + assert_(b.__array_ufunc__(np.add, '__call__', a, b) == "A!") + assert_(np.add(a, b) == "A!") + # regression check for gh-9102 -- tests ufunc.reduce implicitly. + d = np.array([[1, 2, 3], [1, 2, 3]]) + a = d.view(A) + c = a.any() + check = d.any() + assert_equal(c, check) + assert_(c.info, {'inputs': [0]}) + c = a.max() + check = d.max() + assert_equal(c, check) + assert_(c.info, {'inputs': [0]}) + b = np.array(0).view(A) + c = a.max(out=b) + assert_equal(c, check) + assert_(c is b) + assert_(c.info, {'inputs': [0], 'outputs': [0]}) + check = a.max(axis=0) + b = np.zeros_like(check).view(A) + c = a.max(axis=0, out=b) + assert_equal(c, check) + assert_(c is b) + assert_(c.info, {'inputs': [0], 'outputs': [0]}) + # simple explicit tests of reduce, accumulate, reduceat + check = np.add.reduce(d, axis=1) + c = np.add.reduce(a, axis=1) + assert_equal(c, check) + assert_(c.info, {'inputs': [0]}) + b = np.zeros_like(c) + c = np.add.reduce(a, 1, None, b) + assert_equal(c, check) + assert_(c is b) + assert_(c.info, {'inputs': [0], 'outputs': [0]}) + check = np.add.accumulate(d, axis=0) + c = np.add.accumulate(a, axis=0) + assert_equal(c, check) + assert_(c.info, {'inputs': [0]}) + b = np.zeros_like(c) + c = np.add.accumulate(a, 0, None, b) + assert_equal(c, check) + assert_(c is b) + assert_(c.info, {'inputs': [0], 'outputs': [0]}) + indices = [0, 2, 1] + check = np.add.reduceat(d, indices, axis=1) + c = np.add.reduceat(a, indices, axis=1) + assert_equal(c, check) + assert_(c.info, {'inputs': [0]}) + b = np.zeros_like(c) + c = np.add.reduceat(a, indices, 1, None, b) + assert_equal(c, check) + assert_(c is b) + assert_(c.info, {'inputs': [0], 'outputs': [0]}) + # and a few tests for at + d = np.array([[1, 2, 3], [1, 2, 3]]) + check = d.copy() + a = d.copy().view(A) + np.add.at(check, ([0, 1], [0, 2]), 1.) + np.add.at(a, ([0, 1], [0, 2]), 1.) + assert_equal(a, check) + assert_(a.info, {'inputs': [0]}) + b = np.array(1.).view(A) + a = d.copy().view(A) + np.add.at(a, ([0, 1], [0, 2]), b) + assert_equal(a, check) + assert_(a.info, {'inputs': [0, 2]}) + + +class TestChoose: + def test_mixed(self): + c = np.array([True, True]) + a = np.array([True, True]) + assert_equal(np.choose(c, (a, 1)), np.array([1, 1])) + + +class TestRationalFunctions: + def test_lcm(self): + self._test_lcm_inner(np.int16) + self._test_lcm_inner(np.uint16) + + def test_lcm_object(self): + self._test_lcm_inner(np.object_) + + def test_gcd(self): + self._test_gcd_inner(np.int16) + self._test_lcm_inner(np.uint16) + + def test_gcd_object(self): + self._test_gcd_inner(np.object_) + + def _test_lcm_inner(self, dtype): + # basic use + a = np.array([12, 120], dtype=dtype) + b = np.array([20, 200], dtype=dtype) + assert_equal(np.lcm(a, b), [60, 600]) + + if not issubclass(dtype, np.unsignedinteger): + # negatives are ignored + a = np.array([12, -12, 12, -12], dtype=dtype) + b = np.array([20, 20, -20, -20], dtype=dtype) + assert_equal(np.lcm(a, b), [60]*4) + + # reduce + a = np.array([3, 12, 20], dtype=dtype) + assert_equal(np.lcm.reduce([3, 12, 20]), 60) + + # broadcasting, and a test including 0 + a = np.arange(6).astype(dtype) + b = 20 + assert_equal(np.lcm(a, b), [0, 20, 20, 60, 20, 20]) + + def _test_gcd_inner(self, dtype): + # basic use + a = np.array([12, 120], dtype=dtype) + b = np.array([20, 200], dtype=dtype) + assert_equal(np.gcd(a, b), [4, 40]) + + if not issubclass(dtype, np.unsignedinteger): + # negatives are ignored + a = np.array([12, -12, 12, -12], dtype=dtype) + b = np.array([20, 20, -20, -20], dtype=dtype) + assert_equal(np.gcd(a, b), [4]*4) + + # reduce + a = np.array([15, 25, 35], dtype=dtype) + assert_equal(np.gcd.reduce(a), 5) + + # broadcasting, and a test including 0 + a = np.arange(6).astype(dtype) + b = 20 + assert_equal(np.gcd(a, b), [20, 1, 2, 1, 4, 5]) + + def test_lcm_overflow(self): + # verify that we don't overflow when a*b does overflow + big = np.int32(np.iinfo(np.int32).max // 11) + a = 2*big + b = 5*big + assert_equal(np.lcm(a, b), 10*big) + + def test_gcd_overflow(self): + for dtype in (np.int32, np.int64): + # verify that we don't overflow when taking abs(x) + # not relevant for lcm, where the result is unrepresentable anyway + a = dtype(np.iinfo(dtype).min) # negative power of two + q = -(a // 4) + assert_equal(np.gcd(a, q*3), q) + assert_equal(np.gcd(a, -q*3), q) + + def test_decimal(self): + from decimal import Decimal + a = np.array([1, 1, -1, -1]) * Decimal('0.20') + b = np.array([1, -1, 1, -1]) * Decimal('0.12') + + assert_equal(np.gcd(a, b), 4*[Decimal('0.04')]) + assert_equal(np.lcm(a, b), 4*[Decimal('0.60')]) + + def test_float(self): + # not well-defined on float due to rounding errors + assert_raises(TypeError, np.gcd, 0.3, 0.4) + assert_raises(TypeError, np.lcm, 0.3, 0.4) + + def test_builtin_long(self): + # sanity check that array coercion is alright for builtin longs + assert_equal(np.array(2**200).item(), 2**200) + + # expressed as prime factors + a = np.array(2**100 * 3**5) + b = np.array([2**100 * 5**7, 2**50 * 3**10]) + assert_equal(np.gcd(a, b), [2**100, 2**50 * 3**5]) + assert_equal(np.lcm(a, b), [2**100 * 3**5 * 5**7, 2**100 * 3**10]) + + assert_equal(np.gcd(2**100, 3**100), 1) + + +class TestRoundingFunctions: + + def test_object_direct(self): + """ test direct implementation of these magic methods """ + class C: + def __floor__(self): + return 1 + def __ceil__(self): + return 2 + def __trunc__(self): + return 3 + + arr = np.array([C(), C()]) + assert_equal(np.floor(arr), [1, 1]) + assert_equal(np.ceil(arr), [2, 2]) + assert_equal(np.trunc(arr), [3, 3]) + + def test_object_indirect(self): + """ test implementations via __float__ """ + class C: + def __float__(self): + return -2.5 + + arr = np.array([C(), C()]) + assert_equal(np.floor(arr), [-3, -3]) + assert_equal(np.ceil(arr), [-2, -2]) + with pytest.raises(TypeError): + np.trunc(arr) # consistent with math.trunc + + def test_fraction(self): + f = Fraction(-4, 3) + assert_equal(np.floor(f), -2) + assert_equal(np.ceil(f), -1) + assert_equal(np.trunc(f), -1) + + +class TestComplexFunctions: + funcs = [np.arcsin, np.arccos, np.arctan, np.arcsinh, np.arccosh, + np.arctanh, np.sin, np.cos, np.tan, np.exp, + np.exp2, np.log, np.sqrt, np.log10, np.log2, + np.log1p] + + def test_it(self): + for f in self.funcs: + if f is np.arccosh: + x = 1.5 + else: + x = .5 + fr = f(x) + fz = f(complex(x)) + assert_almost_equal(fz.real, fr, err_msg='real part %s' % f) + assert_almost_equal(fz.imag, 0., err_msg='imag part %s' % f) + + def test_precisions_consistent(self): + z = 1 + 1j + for f in self.funcs: + fcf = f(np.csingle(z)) + fcd = f(np.cdouble(z)) + fcl = f(np.clongdouble(z)) + assert_almost_equal(fcf, fcd, decimal=6, err_msg='fch-fcd %s' % f) + assert_almost_equal(fcl, fcd, decimal=15, err_msg='fch-fcl %s' % f) + + def test_branch_cuts(self): + # check branch cuts and continuity on them + _check_branch_cut(np.log, -0.5, 1j, 1, -1, True) + _check_branch_cut(np.log2, -0.5, 1j, 1, -1, True) + _check_branch_cut(np.log10, -0.5, 1j, 1, -1, True) + _check_branch_cut(np.log1p, -1.5, 1j, 1, -1, True) + _check_branch_cut(np.sqrt, -0.5, 1j, 1, -1, True) + + _check_branch_cut(np.arcsin, [ -2, 2], [1j, 1j], 1, -1, True) + _check_branch_cut(np.arccos, [ -2, 2], [1j, 1j], 1, -1, True) + _check_branch_cut(np.arctan, [0-2j, 2j], [1, 1], -1, 1, True) + + _check_branch_cut(np.arcsinh, [0-2j, 2j], [1, 1], -1, 1, True) + _check_branch_cut(np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1, True) + _check_branch_cut(np.arctanh, [ -2, 2], [1j, 1j], 1, -1, True) + + # check against bogus branch cuts: assert continuity between quadrants + _check_branch_cut(np.arcsin, [0-2j, 2j], [ 1, 1], 1, 1) + _check_branch_cut(np.arccos, [0-2j, 2j], [ 1, 1], 1, 1) + _check_branch_cut(np.arctan, [ -2, 2], [1j, 1j], 1, 1) + + _check_branch_cut(np.arcsinh, [ -2, 2, 0], [1j, 1j, 1], 1, 1) + _check_branch_cut(np.arccosh, [0-2j, 2j, 2], [1, 1, 1j], 1, 1) + _check_branch_cut(np.arctanh, [0-2j, 2j, 0], [1, 1, 1j], 1, 1) + + def test_branch_cuts_complex64(self): + # check branch cuts and continuity on them + _check_branch_cut(np.log, -0.5, 1j, 1, -1, True, np.complex64) + _check_branch_cut(np.log2, -0.5, 1j, 1, -1, True, np.complex64) + _check_branch_cut(np.log10, -0.5, 1j, 1, -1, True, np.complex64) + _check_branch_cut(np.log1p, -1.5, 1j, 1, -1, True, np.complex64) + _check_branch_cut(np.sqrt, -0.5, 1j, 1, -1, True, np.complex64) + + _check_branch_cut(np.arcsin, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64) + _check_branch_cut(np.arccos, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64) + _check_branch_cut(np.arctan, [0-2j, 2j], [1, 1], -1, 1, True, np.complex64) + + _check_branch_cut(np.arcsinh, [0-2j, 2j], [1, 1], -1, 1, True, np.complex64) + _check_branch_cut(np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1, True, np.complex64) + _check_branch_cut(np.arctanh, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64) + + # check against bogus branch cuts: assert continuity between quadrants + _check_branch_cut(np.arcsin, [0-2j, 2j], [ 1, 1], 1, 1, False, np.complex64) + _check_branch_cut(np.arccos, [0-2j, 2j], [ 1, 1], 1, 1, False, np.complex64) + _check_branch_cut(np.arctan, [ -2, 2], [1j, 1j], 1, 1, False, np.complex64) + + _check_branch_cut(np.arcsinh, [ -2, 2, 0], [1j, 1j, 1], 1, 1, False, np.complex64) + _check_branch_cut(np.arccosh, [0-2j, 2j, 2], [1, 1, 1j], 1, 1, False, np.complex64) + _check_branch_cut(np.arctanh, [0-2j, 2j, 0], [1, 1, 1j], 1, 1, False, np.complex64) + + def test_against_cmath(self): + import cmath + + points = [-1-1j, -1+1j, +1-1j, +1+1j] + name_map = {'arcsin': 'asin', 'arccos': 'acos', 'arctan': 'atan', + 'arcsinh': 'asinh', 'arccosh': 'acosh', 'arctanh': 'atanh'} + atol = 4*np.finfo(complex).eps + for func in self.funcs: + fname = func.__name__.split('.')[-1] + cname = name_map.get(fname, fname) + try: + cfunc = getattr(cmath, cname) + except AttributeError: + continue + for p in points: + a = complex(func(np.complex_(p))) + b = cfunc(p) + assert_(abs(a - b) < atol, "%s %s: %s; cmath: %s" % (fname, p, a, b)) + + @pytest.mark.parametrize('dtype', [np.complex64, np.complex_, np.longcomplex]) + def test_loss_of_precision(self, dtype): + """Check loss of precision in complex arc* functions""" + + # Check against known-good functions + + info = np.finfo(dtype) + real_dtype = dtype(0.).real.dtype + eps = info.eps + + def check(x, rtol): + x = x.astype(real_dtype) + + z = x.astype(dtype) + d = np.absolute(np.arcsinh(x)/np.arcsinh(z).real - 1) + assert_(np.all(d < rtol), (np.argmax(d), x[np.argmax(d)], d.max(), + 'arcsinh')) + + z = (1j*x).astype(dtype) + d = np.absolute(np.arcsinh(x)/np.arcsin(z).imag - 1) + assert_(np.all(d < rtol), (np.argmax(d), x[np.argmax(d)], d.max(), + 'arcsin')) + + z = x.astype(dtype) + d = np.absolute(np.arctanh(x)/np.arctanh(z).real - 1) + assert_(np.all(d < rtol), (np.argmax(d), x[np.argmax(d)], d.max(), + 'arctanh')) + + z = (1j*x).astype(dtype) + d = np.absolute(np.arctanh(x)/np.arctan(z).imag - 1) + assert_(np.all(d < rtol), (np.argmax(d), x[np.argmax(d)], d.max(), + 'arctan')) + + # The switchover was chosen as 1e-3; hence there can be up to + # ~eps/1e-3 of relative cancellation error before it + + x_series = np.logspace(-20, -3.001, 200) + x_basic = np.logspace(-2.999, 0, 10, endpoint=False) + + if dtype is np.longcomplex: + if (platform.machine() == 'aarch64' and bad_arcsinh()): + pytest.skip("Trig functions of np.longcomplex values known " + "to be inaccurate on aarch64 for some compilation " + "configurations.") + # It's not guaranteed that the system-provided arc functions + # are accurate down to a few epsilons. (Eg. on Linux 64-bit) + # So, give more leeway for long complex tests here: + check(x_series, 50.0*eps) + else: + check(x_series, 2.1*eps) + check(x_basic, 2.0*eps/1e-3) + + # Check a few points + + z = np.array([1e-5*(1+1j)], dtype=dtype) + p = 9.999999999333333333e-6 + 1.000000000066666666e-5j + d = np.absolute(1-np.arctanh(z)/p) + assert_(np.all(d < 1e-15)) + + p = 1.0000000000333333333e-5 + 9.999999999666666667e-6j + d = np.absolute(1-np.arcsinh(z)/p) + assert_(np.all(d < 1e-15)) + + p = 9.999999999333333333e-6j + 1.000000000066666666e-5 + d = np.absolute(1-np.arctan(z)/p) + assert_(np.all(d < 1e-15)) + + p = 1.0000000000333333333e-5j + 9.999999999666666667e-6 + d = np.absolute(1-np.arcsin(z)/p) + assert_(np.all(d < 1e-15)) + + # Check continuity across switchover points + + def check(func, z0, d=1): + z0 = np.asarray(z0, dtype=dtype) + zp = z0 + abs(z0) * d * eps * 2 + zm = z0 - abs(z0) * d * eps * 2 + assert_(np.all(zp != zm), (zp, zm)) + + # NB: the cancellation error at the switchover is at least eps + good = (abs(func(zp) - func(zm)) < 2*eps) + assert_(np.all(good), (func, z0[~good])) + + for func in (np.arcsinh, np.arcsinh, np.arcsin, np.arctanh, np.arctan): + pts = [rp+1j*ip for rp in (-1e-3, 0, 1e-3) for ip in(-1e-3, 0, 1e-3) + if rp != 0 or ip != 0] + check(func, pts, 1) + check(func, pts, 1j) + check(func, pts, 1+1j) + + +class TestAttributes: + def test_attributes(self): + add = ncu.add + assert_equal(add.__name__, 'add') + assert_(add.ntypes >= 18) # don't fail if types added + assert_('ii->i' in add.types) + assert_equal(add.nin, 2) + assert_equal(add.nout, 1) + assert_equal(add.identity, 0) + + def test_doc(self): + # don't bother checking the long list of kwargs, which are likely to + # change + assert_(ncu.add.__doc__.startswith( + "add(x1, x2, /, out=None, *, where=True")) + assert_(ncu.frexp.__doc__.startswith( + "frexp(x[, out1, out2], / [, out=(None, None)], *, where=True")) + + +class TestSubclass: + + def test_subclass_op(self): + + class simple(np.ndarray): + def __new__(subtype, shape): + self = np.ndarray.__new__(subtype, shape, dtype=object) + self.fill(0) + return self + + a = simple((3, 4)) + assert_equal(a+a, a) + + +class TestFrompyfunc: + + def test_identity(self): + def mul(a, b): + return a * b + + # with identity=value + mul_ufunc = np.frompyfunc(mul, nin=2, nout=1, identity=1) + assert_equal(mul_ufunc.reduce([2, 3, 4]), 24) + assert_equal(mul_ufunc.reduce(np.ones((2, 2)), axis=(0, 1)), 1) + assert_equal(mul_ufunc.reduce([]), 1) + + # with identity=None (reorderable) + mul_ufunc = np.frompyfunc(mul, nin=2, nout=1, identity=None) + assert_equal(mul_ufunc.reduce([2, 3, 4]), 24) + assert_equal(mul_ufunc.reduce(np.ones((2, 2)), axis=(0, 1)), 1) + assert_raises(ValueError, lambda: mul_ufunc.reduce([])) + + # with no identity (not reorderable) + mul_ufunc = np.frompyfunc(mul, nin=2, nout=1) + assert_equal(mul_ufunc.reduce([2, 3, 4]), 24) + assert_raises(ValueError, lambda: mul_ufunc.reduce(np.ones((2, 2)), axis=(0, 1))) + assert_raises(ValueError, lambda: mul_ufunc.reduce([])) + + +def _check_branch_cut(f, x0, dx, re_sign=1, im_sign=-1, sig_zero_ok=False, + dtype=complex): + """ + Check for a branch cut in a function. + + Assert that `x0` lies on a branch cut of function `f` and `f` is + continuous from the direction `dx`. + + Parameters + ---------- + f : func + Function to check + x0 : array-like + Point on branch cut + dx : array-like + Direction to check continuity in + re_sign, im_sign : {1, -1} + Change of sign of the real or imaginary part expected + sig_zero_ok : bool + Whether to check if the branch cut respects signed zero (if applicable) + dtype : dtype + Dtype to check (should be complex) + + """ + x0 = np.atleast_1d(x0).astype(dtype) + dx = np.atleast_1d(dx).astype(dtype) + + if np.dtype(dtype).char == 'F': + scale = np.finfo(dtype).eps * 1e2 + atol = np.float32(1e-2) + else: + scale = np.finfo(dtype).eps * 1e3 + atol = 1e-4 + + y0 = f(x0) + yp = f(x0 + dx*scale*np.absolute(x0)/np.absolute(dx)) + ym = f(x0 - dx*scale*np.absolute(x0)/np.absolute(dx)) + + assert_(np.all(np.absolute(y0.real - yp.real) < atol), (y0, yp)) + assert_(np.all(np.absolute(y0.imag - yp.imag) < atol), (y0, yp)) + assert_(np.all(np.absolute(y0.real - ym.real*re_sign) < atol), (y0, ym)) + assert_(np.all(np.absolute(y0.imag - ym.imag*im_sign) < atol), (y0, ym)) + + if sig_zero_ok: + # check that signed zeros also work as a displacement + jr = (x0.real == 0) & (dx.real != 0) + ji = (x0.imag == 0) & (dx.imag != 0) + if np.any(jr): + x = x0[jr] + x.real = np.NZERO + ym = f(x) + assert_(np.all(np.absolute(y0[jr].real - ym.real*re_sign) < atol), (y0[jr], ym)) + assert_(np.all(np.absolute(y0[jr].imag - ym.imag*im_sign) < atol), (y0[jr], ym)) + + if np.any(ji): + x = x0[ji] + x.imag = np.NZERO + ym = f(x) + assert_(np.all(np.absolute(y0[ji].real - ym.real*re_sign) < atol), (y0[ji], ym)) + assert_(np.all(np.absolute(y0[ji].imag - ym.imag*im_sign) < atol), (y0[ji], ym)) + +def test_copysign(): + assert_(np.copysign(1, -1) == -1) + with np.errstate(divide="ignore"): + assert_(1 / np.copysign(0, -1) < 0) + assert_(1 / np.copysign(0, 1) > 0) + assert_(np.signbit(np.copysign(np.nan, -1))) + assert_(not np.signbit(np.copysign(np.nan, 1))) + +def _test_nextafter(t): + one = t(1) + two = t(2) + zero = t(0) + eps = np.finfo(t).eps + assert_(np.nextafter(one, two) - one == eps) + assert_(np.nextafter(one, zero) - one < 0) + assert_(np.isnan(np.nextafter(np.nan, one))) + assert_(np.isnan(np.nextafter(one, np.nan))) + assert_(np.nextafter(one, one) == one) + +def test_nextafter(): + return _test_nextafter(np.float64) + + +def test_nextafterf(): + return _test_nextafter(np.float32) + + +@pytest.mark.skipif(np.finfo(np.double) == np.finfo(np.longdouble), + reason="long double is same as double") +@pytest.mark.xfail(condition=platform.machine().startswith("ppc64"), + reason="IBM double double") +def test_nextafterl(): + return _test_nextafter(np.longdouble) + + +def test_nextafter_0(): + for t, direction in itertools.product(np.sctypes['float'], (1, -1)): + # The value of tiny for double double is NaN, so we need to pass the + # assert + with suppress_warnings() as sup: + sup.filter(UserWarning) + if not np.isnan(np.finfo(t).tiny): + tiny = np.finfo(t).tiny + assert_( + 0. < direction * np.nextafter(t(0), t(direction)) < tiny) + assert_equal(np.nextafter(t(0), t(direction)) / t(2.1), direction * 0.0) + +def _test_spacing(t): + one = t(1) + eps = np.finfo(t).eps + nan = t(np.nan) + inf = t(np.inf) + with np.errstate(invalid='ignore'): + assert_(np.spacing(one) == eps) + assert_(np.isnan(np.spacing(nan))) + assert_(np.isnan(np.spacing(inf))) + assert_(np.isnan(np.spacing(-inf))) + assert_(np.spacing(t(1e30)) != 0) + +def test_spacing(): + return _test_spacing(np.float64) + +def test_spacingf(): + return _test_spacing(np.float32) + + +@pytest.mark.skipif(np.finfo(np.double) == np.finfo(np.longdouble), + reason="long double is same as double") +@pytest.mark.xfail(condition=platform.machine().startswith("ppc64"), + reason="IBM double double") +def test_spacingl(): + return _test_spacing(np.longdouble) + +def test_spacing_gfortran(): + # Reference from this fortran file, built with gfortran 4.3.3 on linux + # 32bits: + # PROGRAM test_spacing + # INTEGER, PARAMETER :: SGL = SELECTED_REAL_KIND(p=6, r=37) + # INTEGER, PARAMETER :: DBL = SELECTED_REAL_KIND(p=13, r=200) + # + # WRITE(*,*) spacing(0.00001_DBL) + # WRITE(*,*) spacing(1.0_DBL) + # WRITE(*,*) spacing(1000._DBL) + # WRITE(*,*) spacing(10500._DBL) + # + # WRITE(*,*) spacing(0.00001_SGL) + # WRITE(*,*) spacing(1.0_SGL) + # WRITE(*,*) spacing(1000._SGL) + # WRITE(*,*) spacing(10500._SGL) + # END PROGRAM + ref = {np.float64: [1.69406589450860068E-021, + 2.22044604925031308E-016, + 1.13686837721616030E-013, + 1.81898940354585648E-012], + np.float32: [9.09494702E-13, + 1.19209290E-07, + 6.10351563E-05, + 9.76562500E-04]} + + for dt, dec_ in zip([np.float32, np.float64], (10, 20)): + x = np.array([1e-5, 1, 1000, 10500], dtype=dt) + assert_array_almost_equal(np.spacing(x), ref[dt], decimal=dec_) + +def test_nextafter_vs_spacing(): + # XXX: spacing does not handle long double yet + for t in [np.float32, np.float64]: + for _f in [1, 1e-5, 1000]: + f = t(_f) + f1 = t(_f + 1) + assert_(np.nextafter(f, f1) - f == np.spacing(f)) + +def test_pos_nan(): + """Check np.nan is a positive nan.""" + assert_(np.signbit(np.nan) == 0) + +def test_reduceat(): + """Test bug in reduceat when structured arrays are not copied.""" + db = np.dtype([('name', 'S11'), ('time', np.int64), ('value', np.float32)]) + a = np.empty([100], dtype=db) + a['name'] = 'Simple' + a['time'] = 10 + a['value'] = 100 + indx = [0, 7, 15, 25] + + h2 = [] + val1 = indx[0] + for val2 in indx[1:]: + h2.append(np.add.reduce(a['value'][val1:val2])) + val1 = val2 + h2.append(np.add.reduce(a['value'][val1:])) + h2 = np.array(h2) + + # test buffered -- this should work + h1 = np.add.reduceat(a['value'], indx) + assert_array_almost_equal(h1, h2) + + # This is when the error occurs. + # test no buffer + np.setbufsize(32) + h1 = np.add.reduceat(a['value'], indx) + np.setbufsize(np.UFUNC_BUFSIZE_DEFAULT) + assert_array_almost_equal(h1, h2) + +def test_reduceat_empty(): + """Reduceat should work with empty arrays""" + indices = np.array([], 'i4') + x = np.array([], 'f8') + result = np.add.reduceat(x, indices) + assert_equal(result.dtype, x.dtype) + assert_equal(result.shape, (0,)) + # Another case with a slightly different zero-sized shape + x = np.ones((5, 2)) + result = np.add.reduceat(x, [], axis=0) + assert_equal(result.dtype, x.dtype) + assert_equal(result.shape, (0, 2)) + result = np.add.reduceat(x, [], axis=1) + assert_equal(result.dtype, x.dtype) + assert_equal(result.shape, (5, 0)) + +def test_complex_nan_comparisons(): + nans = [complex(np.nan, 0), complex(0, np.nan), complex(np.nan, np.nan)] + fins = [complex(1, 0), complex(-1, 0), complex(0, 1), complex(0, -1), + complex(1, 1), complex(-1, -1), complex(0, 0)] + + with np.errstate(invalid='ignore'): + for x in nans + fins: + x = np.array([x]) + for y in nans + fins: + y = np.array([y]) + + if np.isfinite(x) and np.isfinite(y): + continue + + assert_equal(x < y, False, err_msg="%r < %r" % (x, y)) + assert_equal(x > y, False, err_msg="%r > %r" % (x, y)) + assert_equal(x <= y, False, err_msg="%r <= %r" % (x, y)) + assert_equal(x >= y, False, err_msg="%r >= %r" % (x, y)) + assert_equal(x == y, False, err_msg="%r == %r" % (x, y)) + + +def test_rint_big_int(): + # np.rint bug for large integer values on Windows 32-bit and MKL + # https://github.com/numpy/numpy/issues/6685 + val = 4607998452777363968 + # This is exactly representable in floating point + assert_equal(val, int(float(val))) + # Rint should not change the value + assert_equal(val, np.rint(val)) + +@pytest.mark.parametrize('ftype', [np.float32, np.float64]) +def test_memoverlap_accumulate(ftype): + # Reproduces bug https://github.com/numpy/numpy/issues/15597 + arr = np.array([0.61, 0.60, 0.77, 0.41, 0.19], dtype=ftype) + out_max = np.array([0.61, 0.61, 0.77, 0.77, 0.77], dtype=ftype) + out_min = np.array([0.61, 0.60, 0.60, 0.41, 0.19], dtype=ftype) + assert_equal(np.maximum.accumulate(arr), out_max) + assert_equal(np.minimum.accumulate(arr), out_min) + +def test_signaling_nan_exceptions(): + with assert_no_warnings(): + a = np.ndarray(shape=(), dtype='float32', buffer=b'\x00\xe0\xbf\xff') + np.isnan(a) + +@pytest.mark.parametrize("arr", [ + np.arange(2), + np.matrix([0, 1]), + np.matrix([[0, 1], [2, 5]]), + ]) +def test_outer_subclass_preserve(arr): + # for gh-8661 + class foo(np.ndarray): pass + actual = np.multiply.outer(arr.view(foo), arr.view(foo)) + assert actual.__class__.__name__ == 'foo' + +def test_outer_bad_subclass(): + class BadArr1(np.ndarray): + def __array_finalize__(self, obj): + # The outer call reshapes to 3 dims, try to do a bad reshape. + if self.ndim == 3: + self.shape = self.shape + (1,) + + def __array_prepare__(self, obj, context=None): + return obj + + class BadArr2(np.ndarray): + def __array_finalize__(self, obj): + if isinstance(obj, BadArr2): + # outer inserts 1-sized dims. In that case disturb them. + if self.shape[-1] == 1: + self.shape = self.shape[::-1] + + def __array_prepare__(self, obj, context=None): + return obj + + for cls in [BadArr1, BadArr2]: + arr = np.ones((2, 3)).view(cls) + with assert_raises(TypeError) as a: + # The first array gets reshaped (not the second one) + np.add.outer(arr, [1, 2]) + + # This actually works, since we only see the reshaping error: + arr = np.ones((2, 3)).view(cls) + assert type(np.add.outer([1, 2], arr)) is cls + +def test_outer_exceeds_maxdims(): + deep = np.ones((1,) * 17) + with assert_raises(ValueError): + np.add.outer(deep, deep) + +def test_bad_legacy_ufunc_silent_errors(): + # legacy ufuncs can't report errors and NumPy can't check if the GIL + # is released. So NumPy has to check after the GIL is released just to + # cover all bases. `np.power` uses/used to use this. + arr = np.arange(3).astype(np.float64) + + with pytest.raises(RuntimeError, match=r"How unexpected :\)!"): + ncu_tests.always_error(arr, arr) + + with pytest.raises(RuntimeError, match=r"How unexpected :\)!"): + # not contiguous means the fast-path cannot be taken + non_contig = arr.repeat(20).reshape(-1, 6)[:, ::2] + ncu_tests.always_error(non_contig, arr) + + with pytest.raises(RuntimeError, match=r"How unexpected :\)!"): + ncu_tests.always_error.outer(arr, arr) + + with pytest.raises(RuntimeError, match=r"How unexpected :\)!"): + ncu_tests.always_error.reduce(arr) + + with pytest.raises(RuntimeError, match=r"How unexpected :\)!"): + ncu_tests.always_error.reduceat(arr, [0, 1]) + + with pytest.raises(RuntimeError, match=r"How unexpected :\)!"): + ncu_tests.always_error.accumulate(arr) + + with pytest.raises(RuntimeError, match=r"How unexpected :\)!"): + ncu_tests.always_error.at(arr, [0, 1, 2], arr) + + +@pytest.mark.parametrize('x1', [np.arange(3.0), [0.0, 1.0, 2.0]]) +def test_bad_legacy_gufunc_silent_errors(x1): + # Verify that an exception raised in a gufunc loop propagates correctly. + # The signature of always_error_gufunc is '(i),()->()'. + with pytest.raises(RuntimeError, match=r"How unexpected :\)!"): + ncu_tests.always_error_gufunc(x1, 0.0) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_umath_accuracy.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_umath_accuracy.py new file mode 100644 index 0000000..32e2dca --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_umath_accuracy.py @@ -0,0 +1,58 @@ +import numpy as np +import os +from os import path +import sys +import pytest +from ctypes import c_longlong, c_double, c_float, c_int, cast, pointer, POINTER +from numpy.testing import assert_array_max_ulp +from numpy.core._multiarray_umath import __cpu_features__ + +IS_AVX = __cpu_features__.get('AVX512F', False) or \ + (__cpu_features__.get('FMA3', False) and __cpu_features__.get('AVX2', False)) +runtest = sys.platform.startswith('linux') and IS_AVX +platform_skip = pytest.mark.skipif(not runtest, + reason="avoid testing inconsistent platform " + "library implementations") + +# convert string to hex function taken from: +# https://stackoverflow.com/questions/1592158/convert-hex-to-float # +def convert(s, datatype="np.float32"): + i = int(s, 16) # convert from hex to a Python int + if (datatype == "np.float64"): + cp = pointer(c_longlong(i)) # make this into a c long long integer + fp = cast(cp, POINTER(c_double)) # cast the int pointer to a double pointer + else: + cp = pointer(c_int(i)) # make this into a c integer + fp = cast(cp, POINTER(c_float)) # cast the int pointer to a float pointer + + return fp.contents.value # dereference the pointer, get the float + +str_to_float = np.vectorize(convert) + +class TestAccuracy: + @platform_skip + def test_validate_transcendentals(self): + with np.errstate(all='ignore'): + data_dir = path.join(path.dirname(__file__), 'data') + files = os.listdir(data_dir) + files = list(filter(lambda f: f.endswith('.csv'), files)) + for filename in files: + filepath = path.join(data_dir, filename) + with open(filepath) as fid: + file_without_comments = (r for r in fid if not r[0] in ('$', '#')) + data = np.genfromtxt(file_without_comments, + dtype=('|S39','|S39','|S39',int), + names=('type','input','output','ulperr'), + delimiter=',', + skip_header=1) + npname = path.splitext(filename)[0].split('-')[3] + npfunc = getattr(np, npname) + for datatype in np.unique(data['type']): + data_subset = data[data['type'] == datatype] + inval = np.array(str_to_float(data_subset['input'].astype(str), data_subset['type'].astype(str)), dtype=eval(datatype)) + outval = np.array(str_to_float(data_subset['output'].astype(str), data_subset['type'].astype(str)), dtype=eval(datatype)) + perm = np.random.permutation(len(inval)) + inval = inval[perm] + outval = outval[perm] + maxulperr = data_subset['ulperr'].max() + assert_array_max_ulp(npfunc(inval), outval, maxulperr) diff --git a/.local/lib/python3.8/site-packages/numpy/core/tests/test_umath_complex.py b/.local/lib/python3.8/site-packages/numpy/core/tests/test_umath_complex.py new file mode 100644 index 0000000..af5bbe5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/core/tests/test_umath_complex.py @@ -0,0 +1,622 @@ +import sys +import platform +import pytest + +import numpy as np +# import the c-extension module directly since _arg is not exported via umath +import numpy.core._multiarray_umath as ncu +from numpy.testing import ( + assert_raises, assert_equal, assert_array_equal, assert_almost_equal, assert_array_max_ulp + ) + +# TODO: branch cuts (use Pauli code) +# TODO: conj 'symmetry' +# TODO: FPU exceptions + +# At least on Windows the results of many complex functions are not conforming +# to the C99 standard. See ticket 1574. +# Ditto for Solaris (ticket 1642) and OS X on PowerPC. +#FIXME: this will probably change when we require full C99 campatibility +with np.errstate(all='ignore'): + functions_seem_flaky = ((np.exp(complex(np.inf, 0)).imag != 0) + or (np.log(complex(np.NZERO, 0)).imag != np.pi)) +# TODO: replace with a check on whether platform-provided C99 funcs are used +xfail_complex_tests = (not sys.platform.startswith('linux') or functions_seem_flaky) + +# TODO This can be xfail when the generator functions are got rid of. +platform_skip = pytest.mark.skipif(xfail_complex_tests, + reason="Inadequate C99 complex support") + + + +class TestCexp: + def test_simple(self): + check = check_complex_value + f = np.exp + + check(f, 1, 0, np.exp(1), 0, False) + check(f, 0, 1, np.cos(1), np.sin(1), False) + + ref = np.exp(1) * complex(np.cos(1), np.sin(1)) + check(f, 1, 1, ref.real, ref.imag, False) + + @platform_skip + def test_special_values(self): + # C99: Section G 6.3.1 + + check = check_complex_value + f = np.exp + + # cexp(+-0 + 0i) is 1 + 0i + check(f, np.PZERO, 0, 1, 0, False) + check(f, np.NZERO, 0, 1, 0, False) + + # cexp(x + infi) is nan + nani for finite x and raises 'invalid' FPU + # exception + check(f, 1, np.inf, np.nan, np.nan) + check(f, -1, np.inf, np.nan, np.nan) + check(f, 0, np.inf, np.nan, np.nan) + + # cexp(inf + 0i) is inf + 0i + check(f, np.inf, 0, np.inf, 0) + + # cexp(-inf + yi) is +0 * (cos(y) + i sin(y)) for finite y + check(f, -np.inf, 1, np.PZERO, np.PZERO) + check(f, -np.inf, 0.75 * np.pi, np.NZERO, np.PZERO) + + # cexp(inf + yi) is +inf * (cos(y) + i sin(y)) for finite y + check(f, np.inf, 1, np.inf, np.inf) + check(f, np.inf, 0.75 * np.pi, -np.inf, np.inf) + + # cexp(-inf + inf i) is +-0 +- 0i (signs unspecified) + def _check_ninf_inf(dummy): + msgform = "cexp(-inf, inf) is (%f, %f), expected (+-0, +-0)" + with np.errstate(invalid='ignore'): + z = f(np.array(complex(-np.inf, np.inf))) + if z.real != 0 or z.imag != 0: + raise AssertionError(msgform % (z.real, z.imag)) + + _check_ninf_inf(None) + + # cexp(inf + inf i) is +-inf + NaNi and raised invalid FPU ex. + def _check_inf_inf(dummy): + msgform = "cexp(inf, inf) is (%f, %f), expected (+-inf, nan)" + with np.errstate(invalid='ignore'): + z = f(np.array(complex(np.inf, np.inf))) + if not np.isinf(z.real) or not np.isnan(z.imag): + raise AssertionError(msgform % (z.real, z.imag)) + + _check_inf_inf(None) + + # cexp(-inf + nan i) is +-0 +- 0i + def _check_ninf_nan(dummy): + msgform = "cexp(-inf, nan) is (%f, %f), expected (+-0, +-0)" + with np.errstate(invalid='ignore'): + z = f(np.array(complex(-np.inf, np.nan))) + if z.real != 0 or z.imag != 0: + raise AssertionError(msgform % (z.real, z.imag)) + + _check_ninf_nan(None) + + # cexp(inf + nan i) is +-inf + nan + def _check_inf_nan(dummy): + msgform = "cexp(-inf, nan) is (%f, %f), expected (+-inf, nan)" + with np.errstate(invalid='ignore'): + z = f(np.array(complex(np.inf, np.nan))) + if not np.isinf(z.real) or not np.isnan(z.imag): + raise AssertionError(msgform % (z.real, z.imag)) + + _check_inf_nan(None) + + # cexp(nan + yi) is nan + nani for y != 0 (optional: raises invalid FPU + # ex) + check(f, np.nan, 1, np.nan, np.nan) + check(f, np.nan, -1, np.nan, np.nan) + + check(f, np.nan, np.inf, np.nan, np.nan) + check(f, np.nan, -np.inf, np.nan, np.nan) + + # cexp(nan + nani) is nan + nani + check(f, np.nan, np.nan, np.nan, np.nan) + + # TODO This can be xfail when the generator functions are got rid of. + @pytest.mark.skip(reason="cexp(nan + 0I) is wrong on most platforms") + def test_special_values2(self): + # XXX: most implementations get it wrong here (including glibc <= 2.10) + # cexp(nan + 0i) is nan + 0i + check = check_complex_value + f = np.exp + + check(f, np.nan, 0, np.nan, 0) + +class TestClog: + def test_simple(self): + x = np.array([1+0j, 1+2j]) + y_r = np.log(np.abs(x)) + 1j * np.angle(x) + y = np.log(x) + assert_almost_equal(y, y_r) + + @platform_skip + @pytest.mark.skipif(platform.machine() == "armv5tel", reason="See gh-413.") + def test_special_values(self): + xl = [] + yl = [] + + # From C99 std (Sec 6.3.2) + # XXX: check exceptions raised + # --- raise for invalid fails. + + # clog(-0 + i0) returns -inf + i pi and raises the 'divide-by-zero' + # floating-point exception. + with np.errstate(divide='raise'): + x = np.array([np.NZERO], dtype=complex) + y = complex(-np.inf, np.pi) + assert_raises(FloatingPointError, np.log, x) + with np.errstate(divide='ignore'): + assert_almost_equal(np.log(x), y) + + xl.append(x) + yl.append(y) + + # clog(+0 + i0) returns -inf + i0 and raises the 'divide-by-zero' + # floating-point exception. + with np.errstate(divide='raise'): + x = np.array([0], dtype=complex) + y = complex(-np.inf, 0) + assert_raises(FloatingPointError, np.log, x) + with np.errstate(divide='ignore'): + assert_almost_equal(np.log(x), y) + + xl.append(x) + yl.append(y) + + # clog(x + i inf returns +inf + i pi /2, for finite x. + x = np.array([complex(1, np.inf)], dtype=complex) + y = complex(np.inf, 0.5 * np.pi) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + x = np.array([complex(-1, np.inf)], dtype=complex) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(x + iNaN) returns NaN + iNaN and optionally raises the + # 'invalid' floating- point exception, for finite x. + with np.errstate(invalid='raise'): + x = np.array([complex(1., np.nan)], dtype=complex) + y = complex(np.nan, np.nan) + #assert_raises(FloatingPointError, np.log, x) + with np.errstate(invalid='ignore'): + assert_almost_equal(np.log(x), y) + + xl.append(x) + yl.append(y) + + with np.errstate(invalid='raise'): + x = np.array([np.inf + 1j * np.nan], dtype=complex) + #assert_raises(FloatingPointError, np.log, x) + with np.errstate(invalid='ignore'): + assert_almost_equal(np.log(x), y) + + xl.append(x) + yl.append(y) + + # clog(- inf + iy) returns +inf + ipi , for finite positive-signed y. + x = np.array([-np.inf + 1j], dtype=complex) + y = complex(np.inf, np.pi) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(+ inf + iy) returns +inf + i0, for finite positive-signed y. + x = np.array([np.inf + 1j], dtype=complex) + y = complex(np.inf, 0) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(- inf + i inf) returns +inf + i3pi /4. + x = np.array([complex(-np.inf, np.inf)], dtype=complex) + y = complex(np.inf, 0.75 * np.pi) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(+ inf + i inf) returns +inf + ipi /4. + x = np.array([complex(np.inf, np.inf)], dtype=complex) + y = complex(np.inf, 0.25 * np.pi) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(+/- inf + iNaN) returns +inf + iNaN. + x = np.array([complex(np.inf, np.nan)], dtype=complex) + y = complex(np.inf, np.nan) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + x = np.array([complex(-np.inf, np.nan)], dtype=complex) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(NaN + iy) returns NaN + iNaN and optionally raises the + # 'invalid' floating-point exception, for finite y. + x = np.array([complex(np.nan, 1)], dtype=complex) + y = complex(np.nan, np.nan) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(NaN + i inf) returns +inf + iNaN. + x = np.array([complex(np.nan, np.inf)], dtype=complex) + y = complex(np.inf, np.nan) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(NaN + iNaN) returns NaN + iNaN. + x = np.array([complex(np.nan, np.nan)], dtype=complex) + y = complex(np.nan, np.nan) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(conj(z)) = conj(clog(z)). + xa = np.array(xl, dtype=complex) + ya = np.array(yl, dtype=complex) + with np.errstate(divide='ignore'): + for i in range(len(xa)): + assert_almost_equal(np.log(xa[i].conj()), ya[i].conj()) + + +class TestCsqrt: + + def test_simple(self): + # sqrt(1) + check_complex_value(np.sqrt, 1, 0, 1, 0) + + # sqrt(1i) + rres = 0.5*np.sqrt(2) + ires = rres + check_complex_value(np.sqrt, 0, 1, rres, ires, False) + + # sqrt(-1) + check_complex_value(np.sqrt, -1, 0, 0, 1) + + def test_simple_conjugate(self): + ref = np.conj(np.sqrt(complex(1, 1))) + + def f(z): + return np.sqrt(np.conj(z)) + + check_complex_value(f, 1, 1, ref.real, ref.imag, False) + + #def test_branch_cut(self): + # _check_branch_cut(f, -1, 0, 1, -1) + + @platform_skip + def test_special_values(self): + # C99: Sec G 6.4.2 + + check = check_complex_value + f = np.sqrt + + # csqrt(+-0 + 0i) is 0 + 0i + check(f, np.PZERO, 0, 0, 0) + check(f, np.NZERO, 0, 0, 0) + + # csqrt(x + infi) is inf + infi for any x (including NaN) + check(f, 1, np.inf, np.inf, np.inf) + check(f, -1, np.inf, np.inf, np.inf) + + check(f, np.PZERO, np.inf, np.inf, np.inf) + check(f, np.NZERO, np.inf, np.inf, np.inf) + check(f, np.inf, np.inf, np.inf, np.inf) + check(f, -np.inf, np.inf, np.inf, np.inf) + check(f, -np.nan, np.inf, np.inf, np.inf) + + # csqrt(x + nani) is nan + nani for any finite x + check(f, 1, np.nan, np.nan, np.nan) + check(f, -1, np.nan, np.nan, np.nan) + check(f, 0, np.nan, np.nan, np.nan) + + # csqrt(-inf + yi) is +0 + infi for any finite y > 0 + check(f, -np.inf, 1, np.PZERO, np.inf) + + # csqrt(inf + yi) is +inf + 0i for any finite y > 0 + check(f, np.inf, 1, np.inf, np.PZERO) + + # csqrt(-inf + nani) is nan +- infi (both +i infi are valid) + def _check_ninf_nan(dummy): + msgform = "csqrt(-inf, nan) is (%f, %f), expected (nan, +-inf)" + z = np.sqrt(np.array(complex(-np.inf, np.nan))) + #Fixme: ugly workaround for isinf bug. + with np.errstate(invalid='ignore'): + if not (np.isnan(z.real) and np.isinf(z.imag)): + raise AssertionError(msgform % (z.real, z.imag)) + + _check_ninf_nan(None) + + # csqrt(+inf + nani) is inf + nani + check(f, np.inf, np.nan, np.inf, np.nan) + + # csqrt(nan + yi) is nan + nani for any finite y (infinite handled in x + # + nani) + check(f, np.nan, 0, np.nan, np.nan) + check(f, np.nan, 1, np.nan, np.nan) + check(f, np.nan, np.nan, np.nan, np.nan) + + # XXX: check for conj(csqrt(z)) == csqrt(conj(z)) (need to fix branch + # cuts first) + +class TestCpow: + def setup(self): + self.olderr = np.seterr(invalid='ignore') + + def teardown(self): + np.seterr(**self.olderr) + + def test_simple(self): + x = np.array([1+1j, 0+2j, 1+2j, np.inf, np.nan]) + y_r = x ** 2 + y = np.power(x, 2) + assert_almost_equal(y, y_r) + + def test_scalar(self): + x = np.array([1, 1j, 2, 2.5+.37j, np.inf, np.nan]) + y = np.array([1, 1j, -0.5+1.5j, -0.5+1.5j, 2, 3]) + lx = list(range(len(x))) + + # Hardcode the expected `builtins.complex` values, + # as complex exponentiation is broken as of bpo-44698 + p_r = [ + 1+0j, + 0.20787957635076193+0j, + 0.35812203996480685+0.6097119028618724j, + 0.12659112128185032+0.48847676699581527j, + complex(np.inf, np.nan), + complex(np.nan, np.nan), + ] + + n_r = [x[i] ** y[i] for i in lx] + for i in lx: + assert_almost_equal(n_r[i], p_r[i], err_msg='Loop %d\n' % i) + + def test_array(self): + x = np.array([1, 1j, 2, 2.5+.37j, np.inf, np.nan]) + y = np.array([1, 1j, -0.5+1.5j, -0.5+1.5j, 2, 3]) + lx = list(range(len(x))) + + # Hardcode the expected `builtins.complex` values, + # as complex exponentiation is broken as of bpo-44698 + p_r = [ + 1+0j, + 0.20787957635076193+0j, + 0.35812203996480685+0.6097119028618724j, + 0.12659112128185032+0.48847676699581527j, + complex(np.inf, np.nan), + complex(np.nan, np.nan), + ] + + n_r = x ** y + for i in lx: + assert_almost_equal(n_r[i], p_r[i], err_msg='Loop %d\n' % i) + +class TestCabs: + def setup(self): + self.olderr = np.seterr(invalid='ignore') + + def teardown(self): + np.seterr(**self.olderr) + + def test_simple(self): + x = np.array([1+1j, 0+2j, 1+2j, np.inf, np.nan]) + y_r = np.array([np.sqrt(2.), 2, np.sqrt(5), np.inf, np.nan]) + y = np.abs(x) + assert_almost_equal(y, y_r) + + def test_fabs(self): + # Test that np.abs(x +- 0j) == np.abs(x) (as mandated by C99 for cabs) + x = np.array([1+0j], dtype=complex) + assert_array_equal(np.abs(x), np.real(x)) + + x = np.array([complex(1, np.NZERO)], dtype=complex) + assert_array_equal(np.abs(x), np.real(x)) + + x = np.array([complex(np.inf, np.NZERO)], dtype=complex) + assert_array_equal(np.abs(x), np.real(x)) + + x = np.array([complex(np.nan, np.NZERO)], dtype=complex) + assert_array_equal(np.abs(x), np.real(x)) + + def test_cabs_inf_nan(self): + x, y = [], [] + + # cabs(+-nan + nani) returns nan + x.append(np.nan) + y.append(np.nan) + check_real_value(np.abs, np.nan, np.nan, np.nan) + + x.append(np.nan) + y.append(-np.nan) + check_real_value(np.abs, -np.nan, np.nan, np.nan) + + # According to C99 standard, if exactly one of the real/part is inf and + # the other nan, then cabs should return inf + x.append(np.inf) + y.append(np.nan) + check_real_value(np.abs, np.inf, np.nan, np.inf) + + x.append(-np.inf) + y.append(np.nan) + check_real_value(np.abs, -np.inf, np.nan, np.inf) + + # cabs(conj(z)) == conj(cabs(z)) (= cabs(z)) + def f(a): + return np.abs(np.conj(a)) + + def g(a, b): + return np.abs(complex(a, b)) + + xa = np.array(x, dtype=complex) + assert len(xa) == len(x) == len(y) + for xi, yi in zip(x, y): + ref = g(xi, yi) + check_real_value(f, xi, yi, ref) + +class TestCarg: + def test_simple(self): + check_real_value(ncu._arg, 1, 0, 0, False) + check_real_value(ncu._arg, 0, 1, 0.5*np.pi, False) + + check_real_value(ncu._arg, 1, 1, 0.25*np.pi, False) + check_real_value(ncu._arg, np.PZERO, np.PZERO, np.PZERO) + + # TODO This can be xfail when the generator functions are got rid of. + @pytest.mark.skip( + reason="Complex arithmetic with signed zero fails on most platforms") + def test_zero(self): + # carg(-0 +- 0i) returns +- pi + check_real_value(ncu._arg, np.NZERO, np.PZERO, np.pi, False) + check_real_value(ncu._arg, np.NZERO, np.NZERO, -np.pi, False) + + # carg(+0 +- 0i) returns +- 0 + check_real_value(ncu._arg, np.PZERO, np.PZERO, np.PZERO) + check_real_value(ncu._arg, np.PZERO, np.NZERO, np.NZERO) + + # carg(x +- 0i) returns +- 0 for x > 0 + check_real_value(ncu._arg, 1, np.PZERO, np.PZERO, False) + check_real_value(ncu._arg, 1, np.NZERO, np.NZERO, False) + + # carg(x +- 0i) returns +- pi for x < 0 + check_real_value(ncu._arg, -1, np.PZERO, np.pi, False) + check_real_value(ncu._arg, -1, np.NZERO, -np.pi, False) + + # carg(+- 0 + yi) returns pi/2 for y > 0 + check_real_value(ncu._arg, np.PZERO, 1, 0.5 * np.pi, False) + check_real_value(ncu._arg, np.NZERO, 1, 0.5 * np.pi, False) + + # carg(+- 0 + yi) returns -pi/2 for y < 0 + check_real_value(ncu._arg, np.PZERO, -1, 0.5 * np.pi, False) + check_real_value(ncu._arg, np.NZERO, -1, -0.5 * np.pi, False) + + #def test_branch_cuts(self): + # _check_branch_cut(ncu._arg, -1, 1j, -1, 1) + + def test_special_values(self): + # carg(-np.inf +- yi) returns +-pi for finite y > 0 + check_real_value(ncu._arg, -np.inf, 1, np.pi, False) + check_real_value(ncu._arg, -np.inf, -1, -np.pi, False) + + # carg(np.inf +- yi) returns +-0 for finite y > 0 + check_real_value(ncu._arg, np.inf, 1, np.PZERO, False) + check_real_value(ncu._arg, np.inf, -1, np.NZERO, False) + + # carg(x +- np.infi) returns +-pi/2 for finite x + check_real_value(ncu._arg, 1, np.inf, 0.5 * np.pi, False) + check_real_value(ncu._arg, 1, -np.inf, -0.5 * np.pi, False) + + # carg(-np.inf +- np.infi) returns +-3pi/4 + check_real_value(ncu._arg, -np.inf, np.inf, 0.75 * np.pi, False) + check_real_value(ncu._arg, -np.inf, -np.inf, -0.75 * np.pi, False) + + # carg(np.inf +- np.infi) returns +-pi/4 + check_real_value(ncu._arg, np.inf, np.inf, 0.25 * np.pi, False) + check_real_value(ncu._arg, np.inf, -np.inf, -0.25 * np.pi, False) + + # carg(x + yi) returns np.nan if x or y is nan + check_real_value(ncu._arg, np.nan, 0, np.nan, False) + check_real_value(ncu._arg, 0, np.nan, np.nan, False) + + check_real_value(ncu._arg, np.nan, np.inf, np.nan, False) + check_real_value(ncu._arg, np.inf, np.nan, np.nan, False) + + +def check_real_value(f, x1, y1, x, exact=True): + z1 = np.array([complex(x1, y1)]) + if exact: + assert_equal(f(z1), x) + else: + assert_almost_equal(f(z1), x) + + +def check_complex_value(f, x1, y1, x2, y2, exact=True): + z1 = np.array([complex(x1, y1)]) + z2 = complex(x2, y2) + with np.errstate(invalid='ignore'): + if exact: + assert_equal(f(z1), z2) + else: + assert_almost_equal(f(z1), z2) + +class TestSpecialComplexAVX: + @pytest.mark.parametrize("stride", [-4,-2,-1,1,2,4]) + @pytest.mark.parametrize("astype", [np.complex64, np.complex128]) + def test_array(self, stride, astype): + arr = np.array([complex(np.nan , np.nan), + complex(np.nan , np.inf), + complex(np.inf , np.nan), + complex(np.inf , np.inf), + complex(0. , np.inf), + complex(np.inf , 0.), + complex(0. , 0.), + complex(0. , np.nan), + complex(np.nan , 0.)], dtype=astype) + abs_true = np.array([np.nan, np.inf, np.inf, np.inf, np.inf, np.inf, 0., np.nan, np.nan], dtype=arr.real.dtype) + sq_true = np.array([complex(np.nan, np.nan), + complex(np.nan, np.nan), + complex(np.nan, np.nan), + complex(np.nan, np.inf), + complex(-np.inf, np.nan), + complex(np.inf, np.nan), + complex(0., 0.), + complex(np.nan, np.nan), + complex(np.nan, np.nan)], dtype=astype) + assert_equal(np.abs(arr[::stride]), abs_true[::stride]) + with np.errstate(invalid='ignore'): + assert_equal(np.square(arr[::stride]), sq_true[::stride]) + +class TestComplexAbsoluteAVX: + @pytest.mark.parametrize("arraysize", [1,2,3,4,5,6,7,8,9,10,11,13,15,17,18,19]) + @pytest.mark.parametrize("stride", [-4,-3,-2,-1,1,2,3,4]) + @pytest.mark.parametrize("astype", [np.complex64, np.complex128]) + # test to ensure masking and strides work as intended in the AVX implementation + def test_array(self, arraysize, stride, astype): + arr = np.ones(arraysize, dtype=astype) + abs_true = np.ones(arraysize, dtype=arr.real.dtype) + assert_equal(np.abs(arr[::stride]), abs_true[::stride]) + +# Testcase taken as is from https://github.com/numpy/numpy/issues/16660 +class TestComplexAbsoluteMixedDTypes: + @pytest.mark.parametrize("stride", [-4,-3,-2,-1,1,2,3,4]) + @pytest.mark.parametrize("astype", [np.complex64, np.complex128]) + @pytest.mark.parametrize("func", ['abs', 'square', 'conjugate']) + + def test_array(self, stride, astype, func): + dtype = [('template_id', 'U') + uni_arr2 = str_arr.astype('>> _lib = np.ctypeslib.load_library('libmystuff', '.') #doctest: +SKIP + +Our result type, an ndarray that must be of type double, be 1-dimensional +and is C-contiguous in memory: + +>>> array_1d_double = np.ctypeslib.ndpointer( +... dtype=np.double, +... ndim=1, flags='CONTIGUOUS') #doctest: +SKIP + +Our C-function typically takes an array and updates its values +in-place. For example:: + + void foo_func(double* x, int length) + { + int i; + for (i = 0; i < length; i++) { + x[i] = i*i; + } + } + +We wrap it using: + +>>> _lib.foo_func.restype = None #doctest: +SKIP +>>> _lib.foo_func.argtypes = [array_1d_double, c_int] #doctest: +SKIP + +Then, we're ready to call ``foo_func``: + +>>> out = np.empty(15, dtype=np.double) +>>> _lib.foo_func(out, len(out)) #doctest: +SKIP + +""" +__all__ = ['load_library', 'ndpointer', 'c_intp', 'as_ctypes', 'as_array', + 'as_ctypes_type'] + +import os +from numpy import ( + integer, ndarray, dtype as _dtype, asarray, frombuffer +) +from numpy.core.multiarray import _flagdict, flagsobj + +try: + import ctypes +except ImportError: + ctypes = None + +if ctypes is None: + def _dummy(*args, **kwds): + """ + Dummy object that raises an ImportError if ctypes is not available. + + Raises + ------ + ImportError + If ctypes is not available. + + """ + raise ImportError("ctypes is not available.") + load_library = _dummy + as_ctypes = _dummy + as_array = _dummy + from numpy import intp as c_intp + _ndptr_base = object +else: + import numpy.core._internal as nic + c_intp = nic._getintp_ctype() + del nic + _ndptr_base = ctypes.c_void_p + + # Adapted from Albert Strasheim + def load_library(libname, loader_path): + """ + It is possible to load a library using + + >>> lib = ctypes.cdll[] # doctest: +SKIP + + But there are cross-platform considerations, such as library file extensions, + plus the fact Windows will just load the first library it finds with that name. + NumPy supplies the load_library function as a convenience. + + .. versionchanged:: 1.20.0 + Allow libname and loader_path to take any + :term:`python:path-like object`. + + Parameters + ---------- + libname : path-like + Name of the library, which can have 'lib' as a prefix, + but without an extension. + loader_path : path-like + Where the library can be found. + + Returns + ------- + ctypes.cdll[libpath] : library object + A ctypes library object + + Raises + ------ + OSError + If there is no library with the expected extension, or the + library is defective and cannot be loaded. + """ + if ctypes.__version__ < '1.0.1': + import warnings + warnings.warn("All features of ctypes interface may not work " + "with ctypes < 1.0.1", stacklevel=2) + + # Convert path-like objects into strings + libname = os.fsdecode(libname) + loader_path = os.fsdecode(loader_path) + + ext = os.path.splitext(libname)[1] + if not ext: + # Try to load library with platform-specific name, otherwise + # default to libname.[so|pyd]. Sometimes, these files are built + # erroneously on non-linux platforms. + from numpy.distutils.misc_util import get_shared_lib_extension + so_ext = get_shared_lib_extension() + libname_ext = [libname + so_ext] + # mac, windows and linux >= py3.2 shared library and loadable + # module have different extensions so try both + so_ext2 = get_shared_lib_extension(is_python_ext=True) + if not so_ext2 == so_ext: + libname_ext.insert(0, libname + so_ext2) + else: + libname_ext = [libname] + + loader_path = os.path.abspath(loader_path) + if not os.path.isdir(loader_path): + libdir = os.path.dirname(loader_path) + else: + libdir = loader_path + + for ln in libname_ext: + libpath = os.path.join(libdir, ln) + if os.path.exists(libpath): + try: + return ctypes.cdll[libpath] + except OSError: + ## defective lib file + raise + ## if no successful return in the libname_ext loop: + raise OSError("no file with expected extension") + + +def _num_fromflags(flaglist): + num = 0 + for val in flaglist: + num += _flagdict[val] + return num + +_flagnames = ['C_CONTIGUOUS', 'F_CONTIGUOUS', 'ALIGNED', 'WRITEABLE', + 'OWNDATA', 'UPDATEIFCOPY', 'WRITEBACKIFCOPY'] +def _flags_fromnum(num): + res = [] + for key in _flagnames: + value = _flagdict[key] + if (num & value): + res.append(key) + return res + + +class _ndptr(_ndptr_base): + @classmethod + def from_param(cls, obj): + if not isinstance(obj, ndarray): + raise TypeError("argument must be an ndarray") + if cls._dtype_ is not None \ + and obj.dtype != cls._dtype_: + raise TypeError("array must have data type %s" % cls._dtype_) + if cls._ndim_ is not None \ + and obj.ndim != cls._ndim_: + raise TypeError("array must have %d dimension(s)" % cls._ndim_) + if cls._shape_ is not None \ + and obj.shape != cls._shape_: + raise TypeError("array must have shape %s" % str(cls._shape_)) + if cls._flags_ is not None \ + and ((obj.flags.num & cls._flags_) != cls._flags_): + raise TypeError("array must have flags %s" % + _flags_fromnum(cls._flags_)) + return obj.ctypes + + +class _concrete_ndptr(_ndptr): + """ + Like _ndptr, but with `_shape_` and `_dtype_` specified. + + Notably, this means the pointer has enough information to reconstruct + the array, which is not generally true. + """ + def _check_retval_(self): + """ + This method is called when this class is used as the .restype + attribute for a shared-library function, to automatically wrap the + pointer into an array. + """ + return self.contents + + @property + def contents(self): + """ + Get an ndarray viewing the data pointed to by this pointer. + + This mirrors the `contents` attribute of a normal ctypes pointer + """ + full_dtype = _dtype((self._dtype_, self._shape_)) + full_ctype = ctypes.c_char * full_dtype.itemsize + buffer = ctypes.cast(self, ctypes.POINTER(full_ctype)).contents + return frombuffer(buffer, dtype=full_dtype).squeeze(axis=0) + + +# Factory for an array-checking class with from_param defined for +# use with ctypes argtypes mechanism +_pointer_type_cache = {} +def ndpointer(dtype=None, ndim=None, shape=None, flags=None): + """ + Array-checking restype/argtypes. + + An ndpointer instance is used to describe an ndarray in restypes + and argtypes specifications. This approach is more flexible than + using, for example, ``POINTER(c_double)``, since several restrictions + can be specified, which are verified upon calling the ctypes function. + These include data type, number of dimensions, shape and flags. If a + given array does not satisfy the specified restrictions, + a ``TypeError`` is raised. + + Parameters + ---------- + dtype : data-type, optional + Array data-type. + ndim : int, optional + Number of array dimensions. + shape : tuple of ints, optional + Array shape. + flags : str or tuple of str + Array flags; may be one or more of: + + - C_CONTIGUOUS / C / CONTIGUOUS + - F_CONTIGUOUS / F / FORTRAN + - OWNDATA / O + - WRITEABLE / W + - ALIGNED / A + - WRITEBACKIFCOPY / X + - UPDATEIFCOPY / U + + Returns + ------- + klass : ndpointer type object + A type object, which is an ``_ndtpr`` instance containing + dtype, ndim, shape and flags information. + + Raises + ------ + TypeError + If a given array does not satisfy the specified restrictions. + + Examples + -------- + >>> clib.somefunc.argtypes = [np.ctypeslib.ndpointer(dtype=np.float64, + ... ndim=1, + ... flags='C_CONTIGUOUS')] + ... #doctest: +SKIP + >>> clib.somefunc(np.array([1, 2, 3], dtype=np.float64)) + ... #doctest: +SKIP + + """ + + # normalize dtype to an Optional[dtype] + if dtype is not None: + dtype = _dtype(dtype) + + # normalize flags to an Optional[int] + num = None + if flags is not None: + if isinstance(flags, str): + flags = flags.split(',') + elif isinstance(flags, (int, integer)): + num = flags + flags = _flags_fromnum(num) + elif isinstance(flags, flagsobj): + num = flags.num + flags = _flags_fromnum(num) + if num is None: + try: + flags = [x.strip().upper() for x in flags] + except Exception as e: + raise TypeError("invalid flags specification") from e + num = _num_fromflags(flags) + + # normalize shape to an Optional[tuple] + if shape is not None: + try: + shape = tuple(shape) + except TypeError: + # single integer -> 1-tuple + shape = (shape,) + + cache_key = (dtype, ndim, shape, num) + + try: + return _pointer_type_cache[cache_key] + except KeyError: + pass + + # produce a name for the new type + if dtype is None: + name = 'any' + elif dtype.names is not None: + name = str(id(dtype)) + else: + name = dtype.str + if ndim is not None: + name += "_%dd" % ndim + if shape is not None: + name += "_"+"x".join(str(x) for x in shape) + if flags is not None: + name += "_"+"_".join(flags) + + if dtype is not None and shape is not None: + base = _concrete_ndptr + else: + base = _ndptr + + klass = type("ndpointer_%s"%name, (base,), + {"_dtype_": dtype, + "_shape_" : shape, + "_ndim_" : ndim, + "_flags_" : num}) + _pointer_type_cache[cache_key] = klass + return klass + + +if ctypes is not None: + def _ctype_ndarray(element_type, shape): + """ Create an ndarray of the given element type and shape """ + for dim in shape[::-1]: + element_type = dim * element_type + # prevent the type name include np.ctypeslib + element_type.__module__ = None + return element_type + + + def _get_scalar_type_map(): + """ + Return a dictionary mapping native endian scalar dtype to ctypes types + """ + ct = ctypes + simple_types = [ + ct.c_byte, ct.c_short, ct.c_int, ct.c_long, ct.c_longlong, + ct.c_ubyte, ct.c_ushort, ct.c_uint, ct.c_ulong, ct.c_ulonglong, + ct.c_float, ct.c_double, + ct.c_bool, + ] + return {_dtype(ctype): ctype for ctype in simple_types} + + + _scalar_type_map = _get_scalar_type_map() + + + def _ctype_from_dtype_scalar(dtype): + # swapping twice ensure that `=` is promoted to <, >, or | + dtype_with_endian = dtype.newbyteorder('S').newbyteorder('S') + dtype_native = dtype.newbyteorder('=') + try: + ctype = _scalar_type_map[dtype_native] + except KeyError as e: + raise NotImplementedError( + "Converting {!r} to a ctypes type".format(dtype) + ) from None + + if dtype_with_endian.byteorder == '>': + ctype = ctype.__ctype_be__ + elif dtype_with_endian.byteorder == '<': + ctype = ctype.__ctype_le__ + + return ctype + + + def _ctype_from_dtype_subarray(dtype): + element_dtype, shape = dtype.subdtype + ctype = _ctype_from_dtype(element_dtype) + return _ctype_ndarray(ctype, shape) + + + def _ctype_from_dtype_structured(dtype): + # extract offsets of each field + field_data = [] + for name in dtype.names: + field_dtype, offset = dtype.fields[name][:2] + field_data.append((offset, name, _ctype_from_dtype(field_dtype))) + + # ctypes doesn't care about field order + field_data = sorted(field_data, key=lambda f: f[0]) + + if len(field_data) > 1 and all(offset == 0 for offset, name, ctype in field_data): + # union, if multiple fields all at address 0 + size = 0 + _fields_ = [] + for offset, name, ctype in field_data: + _fields_.append((name, ctype)) + size = max(size, ctypes.sizeof(ctype)) + + # pad to the right size + if dtype.itemsize != size: + _fields_.append(('', ctypes.c_char * dtype.itemsize)) + + # we inserted manual padding, so always `_pack_` + return type('union', (ctypes.Union,), dict( + _fields_=_fields_, + _pack_=1, + __module__=None, + )) + else: + last_offset = 0 + _fields_ = [] + for offset, name, ctype in field_data: + padding = offset - last_offset + if padding < 0: + raise NotImplementedError("Overlapping fields") + if padding > 0: + _fields_.append(('', ctypes.c_char * padding)) + + _fields_.append((name, ctype)) + last_offset = offset + ctypes.sizeof(ctype) + + + padding = dtype.itemsize - last_offset + if padding > 0: + _fields_.append(('', ctypes.c_char * padding)) + + # we inserted manual padding, so always `_pack_` + return type('struct', (ctypes.Structure,), dict( + _fields_=_fields_, + _pack_=1, + __module__=None, + )) + + + def _ctype_from_dtype(dtype): + if dtype.fields is not None: + return _ctype_from_dtype_structured(dtype) + elif dtype.subdtype is not None: + return _ctype_from_dtype_subarray(dtype) + else: + return _ctype_from_dtype_scalar(dtype) + + + def as_ctypes_type(dtype): + r""" + Convert a dtype into a ctypes type. + + Parameters + ---------- + dtype : dtype + The dtype to convert + + Returns + ------- + ctype + A ctype scalar, union, array, or struct + + Raises + ------ + NotImplementedError + If the conversion is not possible + + Notes + ----- + This function does not losslessly round-trip in either direction. + + ``np.dtype(as_ctypes_type(dt))`` will: + + - insert padding fields + - reorder fields to be sorted by offset + - discard field titles + + ``as_ctypes_type(np.dtype(ctype))`` will: + + - discard the class names of `ctypes.Structure`\ s and + `ctypes.Union`\ s + - convert single-element `ctypes.Union`\ s into single-element + `ctypes.Structure`\ s + - insert padding fields + + """ + return _ctype_from_dtype(_dtype(dtype)) + + + def as_array(obj, shape=None): + """ + Create a numpy array from a ctypes array or POINTER. + + The numpy array shares the memory with the ctypes object. + + The shape parameter must be given if converting from a ctypes POINTER. + The shape parameter is ignored if converting from a ctypes array + """ + if isinstance(obj, ctypes._Pointer): + # convert pointers to an array of the desired shape + if shape is None: + raise TypeError( + 'as_array() requires a shape argument when called on a ' + 'pointer') + p_arr_type = ctypes.POINTER(_ctype_ndarray(obj._type_, shape)) + obj = ctypes.cast(obj, p_arr_type).contents + + return asarray(obj) + + + def as_ctypes(obj): + """Create and return a ctypes object from a numpy array. Actually + anything that exposes the __array_interface__ is accepted.""" + ai = obj.__array_interface__ + if ai["strides"]: + raise TypeError("strided arrays not supported") + if ai["version"] != 3: + raise TypeError("only __array_interface__ version 3 supported") + addr, readonly = ai["data"] + if readonly: + raise TypeError("readonly arrays unsupported") + + # can't use `_dtype((ai["typestr"], ai["shape"]))` here, as it overflows + # dtype.itemsize (gh-14214) + ctype_scalar = as_ctypes_type(ai["typestr"]) + result_type = _ctype_ndarray(ctype_scalar, ai["shape"]) + result = result_type.from_address(addr) + result.__keep = obj + return result diff --git a/.local/lib/python3.8/site-packages/numpy/ctypeslib.pyi b/.local/lib/python3.8/site-packages/numpy/ctypeslib.pyi new file mode 100644 index 0000000..1c396d2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ctypeslib.pyi @@ -0,0 +1,268 @@ +# NOTE: Numpy's mypy plugin is used for importing the correct +# platform-specific `ctypes._SimpleCData[int]` sub-type +from ctypes import c_int64 as _c_intp + +import os +import sys +import ctypes +from typing import ( + Literal as L, + Any, + List, + Union, + TypeVar, + Type, + Generic, + Optional, + overload, + Iterable, + ClassVar, + Tuple, + Sequence, + Dict, +) + +from numpy import ( + ndarray, + dtype, + generic, + bool_, + byte, + short, + intc, + int_, + longlong, + ubyte, + ushort, + uintc, + uint, + ulonglong, + single, + double, + float_, + longdouble, + void, +) +from numpy.core._internal import _ctypes +from numpy.core.multiarray import flagsobj +from numpy.typing import ( + # Arrays + ArrayLike, + NDArray, + _FiniteNestedSequence, + _SupportsArray, + + # Shapes + _ShapeLike, + + # DTypes + DTypeLike, + _SupportsDType, + _VoidDTypeLike, + _BoolCodes, + _UByteCodes, + _UShortCodes, + _UIntCCodes, + _UIntCodes, + _ULongLongCodes, + _ByteCodes, + _ShortCodes, + _IntCCodes, + _IntCodes, + _LongLongCodes, + _SingleCodes, + _DoubleCodes, + _LongDoubleCodes, +) + +# TODO: Add a proper `_Shape` bound once we've got variadic typevars +_DType = TypeVar("_DType", bound=dtype[Any]) +_DTypeOptional = TypeVar("_DTypeOptional", bound=Optional[dtype[Any]]) +_SCT = TypeVar("_SCT", bound=generic) + +_DTypeLike = Union[ + dtype[_SCT], + Type[_SCT], + _SupportsDType[dtype[_SCT]], +] +_ArrayLike = _FiniteNestedSequence[_SupportsArray[dtype[_SCT]]] + +_FlagsKind = L[ + 'C_CONTIGUOUS', 'CONTIGUOUS', 'C', + 'F_CONTIGUOUS', 'FORTRAN', 'F', + 'ALIGNED', 'A', + 'WRITEABLE', 'W', + 'OWNDATA', 'O', + 'UPDATEIFCOPY', 'U', + 'WRITEBACKIFCOPY', 'X', +] + +# TODO: Add a shape typevar once we have variadic typevars (PEP 646) +class _ndptr(ctypes.c_void_p, Generic[_DTypeOptional]): + # In practice these 4 classvars are defined in the dynamic class + # returned by `ndpointer` + _dtype_: ClassVar[_DTypeOptional] + _shape_: ClassVar[None] + _ndim_: ClassVar[None | int] + _flags_: ClassVar[None | List[_FlagsKind]] + + @overload + @classmethod + def from_param(cls: Type[_ndptr[None]], obj: ndarray[Any, Any]) -> _ctypes: ... + @overload + @classmethod + def from_param(cls: Type[_ndptr[_DType]], obj: ndarray[Any, _DType]) -> _ctypes: ... + +class _concrete_ndptr(_ndptr[_DType]): + _dtype_: ClassVar[_DType] + _shape_: ClassVar[Tuple[int, ...]] + @property + def contents(self) -> ndarray[Any, _DType]: ... + +def load_library( + libname: str | bytes | os.PathLike[str] | os.PathLike[bytes], + loader_path: str | bytes | os.PathLike[str] | os.PathLike[bytes], +) -> ctypes.CDLL: ... + +__all__: List[str] + +c_intp = _c_intp + +@overload +def ndpointer( + dtype: None = ..., + ndim: int = ..., + shape: None | _ShapeLike = ..., + flags: None | _FlagsKind | Iterable[_FlagsKind] | int | flagsobj = ..., +) -> Type[_ndptr[None]]: ... +@overload +def ndpointer( + dtype: _DTypeLike[_SCT], + ndim: int = ..., + *, + shape: _ShapeLike, + flags: None | _FlagsKind | Iterable[_FlagsKind] | int | flagsobj = ..., +) -> Type[_concrete_ndptr[dtype[_SCT]]]: ... +@overload +def ndpointer( + dtype: DTypeLike, + ndim: int = ..., + *, + shape: _ShapeLike, + flags: None | _FlagsKind | Iterable[_FlagsKind] | int | flagsobj = ..., +) -> Type[_concrete_ndptr[dtype[Any]]]: ... +@overload +def ndpointer( + dtype: _DTypeLike[_SCT], + ndim: int = ..., + shape: None = ..., + flags: None | _FlagsKind | Iterable[_FlagsKind] | int | flagsobj = ..., +) -> Type[_ndptr[dtype[_SCT]]]: ... +@overload +def ndpointer( + dtype: DTypeLike, + ndim: int = ..., + shape: None = ..., + flags: None | _FlagsKind | Iterable[_FlagsKind] | int | flagsobj = ..., +) -> Type[_ndptr[dtype[Any]]]: ... + +@overload +def as_ctypes_type(dtype: _BoolCodes | _DTypeLike[bool_] | Type[ctypes.c_bool]) -> Type[ctypes.c_bool]: ... +@overload +def as_ctypes_type(dtype: _ByteCodes | _DTypeLike[byte] | Type[ctypes.c_byte]) -> Type[ctypes.c_byte]: ... +@overload +def as_ctypes_type(dtype: _ShortCodes | _DTypeLike[short] | Type[ctypes.c_short]) -> Type[ctypes.c_short]: ... +@overload +def as_ctypes_type(dtype: _IntCCodes | _DTypeLike[intc] | Type[ctypes.c_int]) -> Type[ctypes.c_int]: ... +@overload +def as_ctypes_type(dtype: _IntCodes | _DTypeLike[int_] | Type[int | ctypes.c_long]) -> Type[ctypes.c_long]: ... +@overload +def as_ctypes_type(dtype: _LongLongCodes | _DTypeLike[longlong] | Type[ctypes.c_longlong]) -> Type[ctypes.c_longlong]: ... +@overload +def as_ctypes_type(dtype: _UByteCodes | _DTypeLike[ubyte] | Type[ctypes.c_ubyte]) -> Type[ctypes.c_ubyte]: ... +@overload +def as_ctypes_type(dtype: _UShortCodes | _DTypeLike[ushort] | Type[ctypes.c_ushort]) -> Type[ctypes.c_ushort]: ... +@overload +def as_ctypes_type(dtype: _UIntCCodes | _DTypeLike[uintc] | Type[ctypes.c_uint]) -> Type[ctypes.c_uint]: ... +@overload +def as_ctypes_type(dtype: _UIntCodes | _DTypeLike[uint] | Type[ctypes.c_ulong]) -> Type[ctypes.c_ulong]: ... +@overload +def as_ctypes_type(dtype: _ULongLongCodes | _DTypeLike[ulonglong] | Type[ctypes.c_ulonglong]) -> Type[ctypes.c_ulonglong]: ... +@overload +def as_ctypes_type(dtype: _SingleCodes | _DTypeLike[single] | Type[ctypes.c_float]) -> Type[ctypes.c_float]: ... +@overload +def as_ctypes_type(dtype: _DoubleCodes | _DTypeLike[double] | Type[float | ctypes.c_double]) -> Type[ctypes.c_double]: ... +@overload +def as_ctypes_type(dtype: _LongDoubleCodes | _DTypeLike[longdouble] | Type[ctypes.c_longdouble]) -> Type[ctypes.c_longdouble]: ... +@overload +def as_ctypes_type(dtype: _VoidDTypeLike) -> Type[Any]: ... # `ctypes.Union` or `ctypes.Structure` +@overload +def as_ctypes_type(dtype: str) -> Type[Any]: ... + +@overload +def as_array(obj: ctypes._PointerLike, shape: Sequence[int]) -> NDArray[Any]: ... +@overload +def as_array(obj: _ArrayLike[_SCT], shape: None | _ShapeLike = ...) -> NDArray[_SCT]: ... +@overload +def as_array(obj: object, shape: None | _ShapeLike = ...) -> NDArray[Any]: ... + +@overload +def as_ctypes(obj: bool_) -> ctypes.c_bool: ... +@overload +def as_ctypes(obj: byte) -> ctypes.c_byte: ... +@overload +def as_ctypes(obj: short) -> ctypes.c_short: ... +@overload +def as_ctypes(obj: intc) -> ctypes.c_int: ... +@overload +def as_ctypes(obj: int_) -> ctypes.c_long: ... +@overload +def as_ctypes(obj: longlong) -> ctypes.c_longlong: ... +@overload +def as_ctypes(obj: ubyte) -> ctypes.c_ubyte: ... +@overload +def as_ctypes(obj: ushort) -> ctypes.c_ushort: ... +@overload +def as_ctypes(obj: uintc) -> ctypes.c_uint: ... +@overload +def as_ctypes(obj: uint) -> ctypes.c_ulong: ... +@overload +def as_ctypes(obj: ulonglong) -> ctypes.c_ulonglong: ... +@overload +def as_ctypes(obj: single) -> ctypes.c_float: ... +@overload +def as_ctypes(obj: double) -> ctypes.c_double: ... +@overload +def as_ctypes(obj: longdouble) -> ctypes.c_longdouble: ... +@overload +def as_ctypes(obj: void) -> Any: ... # `ctypes.Union` or `ctypes.Structure` +@overload +def as_ctypes(obj: NDArray[bool_]) -> ctypes.Array[ctypes.c_bool]: ... +@overload +def as_ctypes(obj: NDArray[byte]) -> ctypes.Array[ctypes.c_byte]: ... +@overload +def as_ctypes(obj: NDArray[short]) -> ctypes.Array[ctypes.c_short]: ... +@overload +def as_ctypes(obj: NDArray[intc]) -> ctypes.Array[ctypes.c_int]: ... +@overload +def as_ctypes(obj: NDArray[int_]) -> ctypes.Array[ctypes.c_long]: ... +@overload +def as_ctypes(obj: NDArray[longlong]) -> ctypes.Array[ctypes.c_longlong]: ... +@overload +def as_ctypes(obj: NDArray[ubyte]) -> ctypes.Array[ctypes.c_ubyte]: ... +@overload +def as_ctypes(obj: NDArray[ushort]) -> ctypes.Array[ctypes.c_ushort]: ... +@overload +def as_ctypes(obj: NDArray[uintc]) -> ctypes.Array[ctypes.c_uint]: ... +@overload +def as_ctypes(obj: NDArray[uint]) -> ctypes.Array[ctypes.c_ulong]: ... +@overload +def as_ctypes(obj: NDArray[ulonglong]) -> ctypes.Array[ctypes.c_ulonglong]: ... +@overload +def as_ctypes(obj: NDArray[single]) -> ctypes.Array[ctypes.c_float]: ... +@overload +def as_ctypes(obj: NDArray[double]) -> ctypes.Array[ctypes.c_double]: ... +@overload +def as_ctypes(obj: NDArray[longdouble]) -> ctypes.Array[ctypes.c_longdouble]: ... +@overload +def as_ctypes(obj: NDArray[void]) -> ctypes.Array[Any]: ... # `ctypes.Union` or `ctypes.Structure` diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__config__.py b/.local/lib/python3.8/site-packages/numpy/distutils/__config__.py new file mode 100644 index 0000000..7b3f196 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/__config__.py @@ -0,0 +1,119 @@ +# This file is generated by numpy's setup.py +# It contains system_info results at the time of building this package. +__all__ = ["get_info","show"] + + +import os +import sys + +extra_dll_dir = os.path.join(os.path.dirname(__file__), '.libs') + +if sys.platform == 'win32' and os.path.isdir(extra_dll_dir): + if sys.version_info >= (3, 8): + os.add_dll_directory(extra_dll_dir) + else: + os.environ.setdefault('PATH', '') + os.environ['PATH'] += os.pathsep + extra_dll_dir + +openblas64__info={'libraries': ['openblas64_', 'openblas64_'], 'library_dirs': ['/usr/local/lib'], 'language': 'c', 'define_macros': [('HAVE_CBLAS', None), ('BLAS_SYMBOL_SUFFIX', '64_'), ('HAVE_BLAS_ILP64', None)], 'runtime_library_dirs': ['/usr/local/lib']} +blas_ilp64_opt_info={'libraries': ['openblas64_', 'openblas64_'], 'library_dirs': ['/usr/local/lib'], 'language': 'c', 'define_macros': [('HAVE_CBLAS', None), ('BLAS_SYMBOL_SUFFIX', '64_'), ('HAVE_BLAS_ILP64', None)], 'runtime_library_dirs': ['/usr/local/lib']} +openblas64__lapack_info={'libraries': ['openblas64_', 'openblas64_'], 'library_dirs': ['/usr/local/lib'], 'language': 'c', 'define_macros': [('HAVE_CBLAS', None), ('BLAS_SYMBOL_SUFFIX', '64_'), ('HAVE_BLAS_ILP64', None), ('HAVE_LAPACKE', None)], 'runtime_library_dirs': ['/usr/local/lib']} +lapack_ilp64_opt_info={'libraries': ['openblas64_', 'openblas64_'], 'library_dirs': ['/usr/local/lib'], 'language': 'c', 'define_macros': [('HAVE_CBLAS', None), ('BLAS_SYMBOL_SUFFIX', '64_'), ('HAVE_BLAS_ILP64', None), ('HAVE_LAPACKE', None)], 'runtime_library_dirs': ['/usr/local/lib']} + +def get_info(name): + g = globals() + return g.get(name, g.get(name + "_info", {})) + +def show(): + """ + Show libraries in the system on which NumPy was built. + + Print information about various resources (libraries, library + directories, include directories, etc.) in the system on which + NumPy was built. + + See Also + -------- + get_include : Returns the directory containing NumPy C + header files. + + Notes + ----- + 1. Classes specifying the information to be printed are defined + in the `numpy.distutils.system_info` module. + + Information may include: + + * ``language``: language used to write the libraries (mostly + C or f77) + * ``libraries``: names of libraries found in the system + * ``library_dirs``: directories containing the libraries + * ``include_dirs``: directories containing library header files + * ``src_dirs``: directories containing library source files + * ``define_macros``: preprocessor macros used by + ``distutils.setup`` + * ``baseline``: minimum CPU features required + * ``found``: dispatched features supported in the system + * ``not found``: dispatched features that are not supported + in the system + + 2. NumPy BLAS/LAPACK Installation Notes + + Installing a numpy wheel (``pip install numpy`` or force it + via ``pip install numpy --only-binary :numpy: numpy``) includes + an OpenBLAS implementation of the BLAS and LAPACK linear algebra + APIs. In this case, ``library_dirs`` reports the original build + time configuration as compiled with gcc/gfortran; at run time + the OpenBLAS library is in + ``site-packages/numpy.libs/`` (linux), or + ``site-packages/numpy/.dylibs/`` (macOS), or + ``site-packages/numpy/.libs/`` (windows). + + Installing numpy from source + (``pip install numpy --no-binary numpy``) searches for BLAS and + LAPACK dynamic link libraries at build time as influenced by + environment variables NPY_BLAS_LIBS, NPY_CBLAS_LIBS, and + NPY_LAPACK_LIBS; or NPY_BLAS_ORDER and NPY_LAPACK_ORDER; + or the optional file ``~/.numpy-site.cfg``. + NumPy remembers those locations and expects to load the same + libraries at run-time. + In NumPy 1.21+ on macOS, 'accelerate' (Apple's Accelerate BLAS + library) is in the default build-time search order after + 'openblas'. + + Examples + -------- + >>> import numpy as np + >>> np.show_config() + blas_opt_info: + language = c + define_macros = [('HAVE_CBLAS', None)] + libraries = ['openblas', 'openblas'] + library_dirs = ['/usr/local/lib'] + """ + from numpy.core._multiarray_umath import ( + __cpu_features__, __cpu_baseline__, __cpu_dispatch__ + ) + for name,info_dict in globals().items(): + if name[0] == "_" or type(info_dict) is not type({}): continue + print(name + ":") + if not info_dict: + print(" NOT AVAILABLE") + for k,v in info_dict.items(): + v = str(v) + if k == "sources" and len(v) > 200: + v = v[:60] + " ...\n... " + v[-60:] + print(" %s = %s" % (k,v)) + + features_found, features_not_found = [], [] + for feature in __cpu_dispatch__: + if __cpu_features__[feature]: + features_found.append(feature) + else: + features_not_found.append(feature) + + print("Supported SIMD extensions in this NumPy install:") + print(" baseline = %s" % (','.join(__cpu_baseline__))) + print(" found = %s" % (','.join(features_found))) + print(" not found = %s" % (','.join(features_not_found))) + diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__init__.py b/.local/lib/python3.8/site-packages/numpy/distutils/__init__.py new file mode 100644 index 0000000..79974d1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/__init__.py @@ -0,0 +1,51 @@ +""" +An enhanced distutils, providing support for Fortran compilers, for BLAS, +LAPACK and other common libraries for numerical computing, and more. + +Public submodules are:: + + misc_util + system_info + cpu_info + log + exec_command + +For details, please see the *Packaging* and *NumPy Distutils User Guide* +sections of the NumPy Reference Guide. + +For configuring the preference for and location of libraries like BLAS and +LAPACK, and for setting include paths and similar build options, please see +``site.cfg.example`` in the root of the NumPy repository or sdist. + +""" + +# Must import local ccompiler ASAP in order to get +# customized CCompiler.spawn effective. +from . import ccompiler +from . import unixccompiler + +from .npy_pkg_config import * + +# If numpy is installed, add distutils.test() +try: + from . import __config__ + # Normally numpy is installed if the above import works, but an interrupted + # in-place build could also have left a __config__.py. In that case the + # next import may still fail, so keep it inside the try block. + from numpy._pytesttester import PytestTester + test = PytestTester(__name__) + del PytestTester +except ImportError: + pass + + +def customized_fcompiler(plat=None, compiler=None): + from numpy.distutils.fcompiler import new_fcompiler + c = new_fcompiler(plat=plat, compiler=compiler) + c.customize() + return c + +def customized_ccompiler(plat=None, compiler=None, verbose=1): + c = ccompiler.new_compiler(plat=plat, compiler=compiler, verbose=verbose) + c.customize('') + return c diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__init__.pyi b/.local/lib/python3.8/site-packages/numpy/distutils/__init__.pyi new file mode 100644 index 0000000..3938d68 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/__init__.pyi @@ -0,0 +1,4 @@ +from typing import Any + +# TODO: remove when the full numpy namespace is defined +def __getattr__(name: str) -> Any: ... diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/__config__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/__config__.cpython-38.pyc new file mode 100644 index 0000000..4863d64 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/__config__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..54bfec8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/_shell_utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/_shell_utils.cpython-38.pyc new file mode 100644 index 0000000..7f7d464 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/_shell_utils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/armccompiler.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/armccompiler.cpython-38.pyc new file mode 100644 index 0000000..0c452b8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/armccompiler.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/ccompiler.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/ccompiler.cpython-38.pyc new file mode 100644 index 0000000..e78ad89 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/ccompiler.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/ccompiler_opt.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/ccompiler_opt.cpython-38.pyc new file mode 100644 index 0000000..46edce6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/ccompiler_opt.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/conv_template.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/conv_template.cpython-38.pyc new file mode 100644 index 0000000..ef0dd7b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/conv_template.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/core.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/core.cpython-38.pyc new file mode 100644 index 0000000..b09ab91 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/core.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/cpuinfo.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/cpuinfo.cpython-38.pyc new file mode 100644 index 0000000..426e40a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/cpuinfo.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/exec_command.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/exec_command.cpython-38.pyc new file mode 100644 index 0000000..13397b0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/exec_command.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/extension.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/extension.cpython-38.pyc new file mode 100644 index 0000000..edcce69 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/extension.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/from_template.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/from_template.cpython-38.pyc new file mode 100644 index 0000000..41c9717 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/from_template.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/intelccompiler.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/intelccompiler.cpython-38.pyc new file mode 100644 index 0000000..b89879e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/intelccompiler.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/lib2def.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/lib2def.cpython-38.pyc new file mode 100644 index 0000000..0cb47df Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/lib2def.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/line_endings.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/line_endings.cpython-38.pyc new file mode 100644 index 0000000..ca5bf1b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/line_endings.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/log.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/log.cpython-38.pyc new file mode 100644 index 0000000..e1dd6e5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/log.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/mingw32ccompiler.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/mingw32ccompiler.cpython-38.pyc new file mode 100644 index 0000000..62bc6de Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/mingw32ccompiler.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/misc_util.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/misc_util.cpython-38.pyc new file mode 100644 index 0000000..cbd647d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/misc_util.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/msvc9compiler.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/msvc9compiler.cpython-38.pyc new file mode 100644 index 0000000..feb57b7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/msvc9compiler.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/msvccompiler.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/msvccompiler.cpython-38.pyc new file mode 100644 index 0000000..2f5b058 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/msvccompiler.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/npy_pkg_config.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/npy_pkg_config.cpython-38.pyc new file mode 100644 index 0000000..64aa46a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/npy_pkg_config.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/numpy_distribution.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/numpy_distribution.cpython-38.pyc new file mode 100644 index 0000000..ac988a3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/numpy_distribution.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/pathccompiler.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/pathccompiler.cpython-38.pyc new file mode 100644 index 0000000..e89feb0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/pathccompiler.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..aaa70bb Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/system_info.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/system_info.cpython-38.pyc new file mode 100644 index 0000000..a5a3cf9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/system_info.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/unixccompiler.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/unixccompiler.cpython-38.pyc new file mode 100644 index 0000000..9fdebb2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/__pycache__/unixccompiler.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/_shell_utils.py b/.local/lib/python3.8/site-packages/numpy/distutils/_shell_utils.py new file mode 100644 index 0000000..82abd5f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/_shell_utils.py @@ -0,0 +1,91 @@ +""" +Helper functions for interacting with the shell, and consuming shell-style +parameters provided in config files. +""" +import os +import shlex +import subprocess +try: + from shlex import quote +except ImportError: + from pipes import quote + +__all__ = ['WindowsParser', 'PosixParser', 'NativeParser'] + + +class CommandLineParser: + """ + An object that knows how to split and join command-line arguments. + + It must be true that ``argv == split(join(argv))`` for all ``argv``. + The reverse neednt be true - `join(split(cmd))` may result in the addition + or removal of unnecessary escaping. + """ + @staticmethod + def join(argv): + """ Join a list of arguments into a command line string """ + raise NotImplementedError + + @staticmethod + def split(cmd): + """ Split a command line string into a list of arguments """ + raise NotImplementedError + + +class WindowsParser: + """ + The parsing behavior used by `subprocess.call("string")` on Windows, which + matches the Microsoft C/C++ runtime. + + Note that this is _not_ the behavior of cmd. + """ + @staticmethod + def join(argv): + # note that list2cmdline is specific to the windows syntax + return subprocess.list2cmdline(argv) + + @staticmethod + def split(cmd): + import ctypes # guarded import for systems without ctypes + try: + ctypes.windll + except AttributeError: + raise NotImplementedError + + # Windows has special parsing rules for the executable (no quotes), + # that we do not care about - insert a dummy element + if not cmd: + return [] + cmd = 'dummy ' + cmd + + CommandLineToArgvW = ctypes.windll.shell32.CommandLineToArgvW + CommandLineToArgvW.restype = ctypes.POINTER(ctypes.c_wchar_p) + CommandLineToArgvW.argtypes = (ctypes.c_wchar_p, ctypes.POINTER(ctypes.c_int)) + + nargs = ctypes.c_int() + lpargs = CommandLineToArgvW(cmd, ctypes.byref(nargs)) + args = [lpargs[i] for i in range(nargs.value)] + assert not ctypes.windll.kernel32.LocalFree(lpargs) + + # strip the element we inserted + assert args[0] == "dummy" + return args[1:] + + +class PosixParser: + """ + The parsing behavior used by `subprocess.call("string", shell=True)` on Posix. + """ + @staticmethod + def join(argv): + return ' '.join(quote(arg) for arg in argv) + + @staticmethod + def split(cmd): + return shlex.split(cmd, posix=True) + + +if os.name == 'nt': + NativeParser = WindowsParser +elif os.name == 'posix': + NativeParser = PosixParser diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/armccompiler.py b/.local/lib/python3.8/site-packages/numpy/distutils/armccompiler.py new file mode 100644 index 0000000..968504c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/armccompiler.py @@ -0,0 +1,28 @@ +from __future__ import division, absolute_import, print_function + +from distutils.unixccompiler import UnixCCompiler + +class ArmCCompiler(UnixCCompiler): + + """ + Arm compiler. + """ + + compiler_type = 'arm' + cc_exe = 'armclang' + cxx_exe = 'armclang++' + + def __init__(self, verbose=0, dry_run=0, force=0): + UnixCCompiler.__init__(self, verbose, dry_run, force) + cc_compiler = self.cc_exe + cxx_compiler = self.cxx_exe + self.set_executables(compiler=cc_compiler + + ' -O3 -fPIC', + compiler_so=cc_compiler + + ' -O3 -fPIC', + compiler_cxx=cxx_compiler + + ' -O3 -fPIC', + linker_exe=cc_compiler + + ' -lamath', + linker_so=cc_compiler + + ' -lamath -shared') diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/ccompiler.py b/.local/lib/python3.8/site-packages/numpy/distutils/ccompiler.py new file mode 100644 index 0000000..74e2c51 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/ccompiler.py @@ -0,0 +1,806 @@ +import os +import re +import sys +import shlex +import time +import subprocess +from copy import copy +from distutils import ccompiler +from distutils.ccompiler import ( + compiler_class, gen_lib_options, get_default_compiler, new_compiler, + CCompiler +) +from distutils.errors import ( + DistutilsExecError, DistutilsModuleError, DistutilsPlatformError, + CompileError, UnknownFileError +) +from distutils.sysconfig import customize_compiler +from distutils.version import LooseVersion + +from numpy.distutils import log +from numpy.distutils.exec_command import ( + filepath_from_subprocess_output, forward_bytes_to_stdout +) +from numpy.distutils.misc_util import cyg2win32, is_sequence, mingw32, \ + get_num_build_jobs, \ + _commandline_dep_string, \ + sanitize_cxx_flags + +# globals for parallel build management +import threading + +_job_semaphore = None +_global_lock = threading.Lock() +_processing_files = set() + + +def _needs_build(obj, cc_args, extra_postargs, pp_opts): + """ + Check if an objects needs to be rebuild based on its dependencies + + Parameters + ---------- + obj : str + object file + + Returns + ------- + bool + """ + # defined in unixcompiler.py + dep_file = obj + '.d' + if not os.path.exists(dep_file): + return True + + # dep_file is a makefile containing 'object: dependencies' + # formatted like posix shell (spaces escaped, \ line continuations) + # the last line contains the compiler commandline arguments as some + # projects may compile an extension multiple times with different + # arguments + with open(dep_file, "r") as f: + lines = f.readlines() + + cmdline =_commandline_dep_string(cc_args, extra_postargs, pp_opts) + last_cmdline = lines[-1] + if last_cmdline != cmdline: + return True + + contents = ''.join(lines[:-1]) + deps = [x for x in shlex.split(contents, posix=True) + if x != "\n" and not x.endswith(":")] + + try: + t_obj = os.stat(obj).st_mtime + + # check if any of the dependencies is newer than the object + # the dependencies includes the source used to create the object + for f in deps: + if os.stat(f).st_mtime > t_obj: + return True + except OSError: + # no object counts as newer (shouldn't happen if dep_file exists) + return True + + return False + + +def replace_method(klass, method_name, func): + # Py3k does not have unbound method anymore, MethodType does not work + m = lambda self, *args, **kw: func(self, *args, **kw) + setattr(klass, method_name, m) + + +###################################################################### +## Method that subclasses may redefine. But don't call this method, +## it i private to CCompiler class and may return unexpected +## results if used elsewhere. So, you have been warned.. + +def CCompiler_find_executables(self): + """ + Does nothing here, but is called by the get_version method and can be + overridden by subclasses. In particular it is redefined in the `FCompiler` + class where more documentation can be found. + + """ + pass + + +replace_method(CCompiler, 'find_executables', CCompiler_find_executables) + + +# Using customized CCompiler.spawn. +def CCompiler_spawn(self, cmd, display=None, env=None): + """ + Execute a command in a sub-process. + + Parameters + ---------- + cmd : str + The command to execute. + display : str or sequence of str, optional + The text to add to the log file kept by `numpy.distutils`. + If not given, `display` is equal to `cmd`. + env: a dictionary for environment variables, optional + + Returns + ------- + None + + Raises + ------ + DistutilsExecError + If the command failed, i.e. the exit status was not 0. + + """ + env = env if env is not None else dict(os.environ) + if display is None: + display = cmd + if is_sequence(display): + display = ' '.join(list(display)) + log.info(display) + try: + if self.verbose: + subprocess.check_output(cmd, env=env) + else: + subprocess.check_output(cmd, stderr=subprocess.STDOUT, env=env) + except subprocess.CalledProcessError as exc: + o = exc.output + s = exc.returncode + except OSError as e: + # OSError doesn't have the same hooks for the exception + # output, but exec_command() historically would use an + # empty string for EnvironmentError (base class for + # OSError) + # o = b'' + # still that would make the end-user lost in translation! + o = f"\n\n{e}\n\n\n" + try: + o = o.encode(sys.stdout.encoding) + except AttributeError: + o = o.encode('utf8') + # status previously used by exec_command() for parent + # of OSError + s = 127 + else: + # use a convenience return here so that any kind of + # caught exception will execute the default code after the + # try / except block, which handles various exceptions + return None + + if is_sequence(cmd): + cmd = ' '.join(list(cmd)) + + if self.verbose: + forward_bytes_to_stdout(o) + + if re.search(b'Too many open files', o): + msg = '\nTry rerunning setup command until build succeeds.' + else: + msg = '' + raise DistutilsExecError('Command "%s" failed with exit status %d%s' % + (cmd, s, msg)) + +replace_method(CCompiler, 'spawn', CCompiler_spawn) + +def CCompiler_object_filenames(self, source_filenames, strip_dir=0, output_dir=''): + """ + Return the name of the object files for the given source files. + + Parameters + ---------- + source_filenames : list of str + The list of paths to source files. Paths can be either relative or + absolute, this is handled transparently. + strip_dir : bool, optional + Whether to strip the directory from the returned paths. If True, + the file name prepended by `output_dir` is returned. Default is False. + output_dir : str, optional + If given, this path is prepended to the returned paths to the + object files. + + Returns + ------- + obj_names : list of str + The list of paths to the object files corresponding to the source + files in `source_filenames`. + + """ + if output_dir is None: + output_dir = '' + obj_names = [] + for src_name in source_filenames: + base, ext = os.path.splitext(os.path.normpath(src_name)) + base = os.path.splitdrive(base)[1] # Chop off the drive + base = base[os.path.isabs(base):] # If abs, chop off leading / + if base.startswith('..'): + # Resolve starting relative path components, middle ones + # (if any) have been handled by os.path.normpath above. + i = base.rfind('..')+2 + d = base[:i] + d = os.path.basename(os.path.abspath(d)) + base = d + base[i:] + if ext not in self.src_extensions: + raise UnknownFileError("unknown file type '%s' (from '%s')" % (ext, src_name)) + if strip_dir: + base = os.path.basename(base) + obj_name = os.path.join(output_dir, base + self.obj_extension) + obj_names.append(obj_name) + return obj_names + +replace_method(CCompiler, 'object_filenames', CCompiler_object_filenames) + +def CCompiler_compile(self, sources, output_dir=None, macros=None, + include_dirs=None, debug=0, extra_preargs=None, + extra_postargs=None, depends=None): + """ + Compile one or more source files. + + Please refer to the Python distutils API reference for more details. + + Parameters + ---------- + sources : list of str + A list of filenames + output_dir : str, optional + Path to the output directory. + macros : list of tuples + A list of macro definitions. + include_dirs : list of str, optional + The directories to add to the default include file search path for + this compilation only. + debug : bool, optional + Whether or not to output debug symbols in or alongside the object + file(s). + extra_preargs, extra_postargs : ? + Extra pre- and post-arguments. + depends : list of str, optional + A list of file names that all targets depend on. + + Returns + ------- + objects : list of str + A list of object file names, one per source file `sources`. + + Raises + ------ + CompileError + If compilation fails. + + """ + global _job_semaphore + + jobs = get_num_build_jobs() + + # setup semaphore to not exceed number of compile jobs when parallelized at + # extension level (python >= 3.5) + with _global_lock: + if _job_semaphore is None: + _job_semaphore = threading.Semaphore(jobs) + + if not sources: + return [] + from numpy.distutils.fcompiler import (FCompiler, is_f_file, + has_f90_header) + if isinstance(self, FCompiler): + display = [] + for fc in ['f77', 'f90', 'fix']: + fcomp = getattr(self, 'compiler_'+fc) + if fcomp is None: + continue + display.append("Fortran %s compiler: %s" % (fc, ' '.join(fcomp))) + display = '\n'.join(display) + else: + ccomp = self.compiler_so + display = "C compiler: %s\n" % (' '.join(ccomp),) + log.info(display) + macros, objects, extra_postargs, pp_opts, build = \ + self._setup_compile(output_dir, macros, include_dirs, sources, + depends, extra_postargs) + cc_args = self._get_cc_args(pp_opts, debug, extra_preargs) + display = "compile options: '%s'" % (' '.join(cc_args)) + if extra_postargs: + display += "\nextra options: '%s'" % (' '.join(extra_postargs)) + log.info(display) + + def single_compile(args): + obj, (src, ext) = args + if not _needs_build(obj, cc_args, extra_postargs, pp_opts): + return + + # check if we are currently already processing the same object + # happens when using the same source in multiple extensions + while True: + # need explicit lock as there is no atomic check and add with GIL + with _global_lock: + # file not being worked on, start working + if obj not in _processing_files: + _processing_files.add(obj) + break + # wait for the processing to end + time.sleep(0.1) + + try: + # retrieve slot from our #job semaphore and build + with _job_semaphore: + self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts) + finally: + # register being done processing + with _global_lock: + _processing_files.remove(obj) + + + if isinstance(self, FCompiler): + objects_to_build = list(build.keys()) + f77_objects, other_objects = [], [] + for obj in objects: + if obj in objects_to_build: + src, ext = build[obj] + if self.compiler_type=='absoft': + obj = cyg2win32(obj) + src = cyg2win32(src) + if is_f_file(src) and not has_f90_header(src): + f77_objects.append((obj, (src, ext))) + else: + other_objects.append((obj, (src, ext))) + + # f77 objects can be built in parallel + build_items = f77_objects + # build f90 modules serial, module files are generated during + # compilation and may be used by files later in the list so the + # ordering is important + for o in other_objects: + single_compile(o) + else: + build_items = build.items() + + if len(build) > 1 and jobs > 1: + # build parallel + from concurrent.futures import ThreadPoolExecutor + with ThreadPoolExecutor(jobs) as pool: + pool.map(single_compile, build_items) + else: + # build serial + for o in build_items: + single_compile(o) + + # Return *all* object filenames, not just the ones we just built. + return objects + +replace_method(CCompiler, 'compile', CCompiler_compile) + +def CCompiler_customize_cmd(self, cmd, ignore=()): + """ + Customize compiler using distutils command. + + Parameters + ---------- + cmd : class instance + An instance inheriting from `distutils.cmd.Command`. + ignore : sequence of str, optional + List of `CCompiler` commands (without ``'set_'``) that should not be + altered. Strings that are checked for are: + ``('include_dirs', 'define', 'undef', 'libraries', 'library_dirs', + 'rpath', 'link_objects')``. + + Returns + ------- + None + + """ + log.info('customize %s using %s' % (self.__class__.__name__, + cmd.__class__.__name__)) + + if hasattr(self, 'compiler') and 'clang' in self.compiler[0]: + # clang defaults to a non-strict floating error point model. + # Since NumPy and most Python libs give warnings for these, override: + self.compiler.append('-ftrapping-math') + self.compiler_so.append('-ftrapping-math') + + def allow(attr): + return getattr(cmd, attr, None) is not None and attr not in ignore + + if allow('include_dirs'): + self.set_include_dirs(cmd.include_dirs) + if allow('define'): + for (name, value) in cmd.define: + self.define_macro(name, value) + if allow('undef'): + for macro in cmd.undef: + self.undefine_macro(macro) + if allow('libraries'): + self.set_libraries(self.libraries + cmd.libraries) + if allow('library_dirs'): + self.set_library_dirs(self.library_dirs + cmd.library_dirs) + if allow('rpath'): + self.set_runtime_library_dirs(cmd.rpath) + if allow('link_objects'): + self.set_link_objects(cmd.link_objects) + +replace_method(CCompiler, 'customize_cmd', CCompiler_customize_cmd) + +def _compiler_to_string(compiler): + props = [] + mx = 0 + keys = list(compiler.executables.keys()) + for key in ['version', 'libraries', 'library_dirs', + 'object_switch', 'compile_switch', + 'include_dirs', 'define', 'undef', 'rpath', 'link_objects']: + if key not in keys: + keys.append(key) + for key in keys: + if hasattr(compiler, key): + v = getattr(compiler, key) + mx = max(mx, len(key)) + props.append((key, repr(v))) + fmt = '%-' + repr(mx+1) + 's = %s' + lines = [fmt % prop for prop in props] + return '\n'.join(lines) + +def CCompiler_show_customization(self): + """ + Print the compiler customizations to stdout. + + Parameters + ---------- + None + + Returns + ------- + None + + Notes + ----- + Printing is only done if the distutils log threshold is < 2. + + """ + try: + self.get_version() + except Exception: + pass + if log._global_log.threshold<2: + print('*'*80) + print(self.__class__) + print(_compiler_to_string(self)) + print('*'*80) + +replace_method(CCompiler, 'show_customization', CCompiler_show_customization) + +def CCompiler_customize(self, dist, need_cxx=0): + """ + Do any platform-specific customization of a compiler instance. + + This method calls `distutils.sysconfig.customize_compiler` for + platform-specific customization, as well as optionally remove a flag + to suppress spurious warnings in case C++ code is being compiled. + + Parameters + ---------- + dist : object + This parameter is not used for anything. + need_cxx : bool, optional + Whether or not C++ has to be compiled. If so (True), the + ``"-Wstrict-prototypes"`` option is removed to prevent spurious + warnings. Default is False. + + Returns + ------- + None + + Notes + ----- + All the default options used by distutils can be extracted with:: + + from distutils import sysconfig + sysconfig.get_config_vars('CC', 'CXX', 'OPT', 'BASECFLAGS', + 'CCSHARED', 'LDSHARED', 'SO') + + """ + # See FCompiler.customize for suggested usage. + log.info('customize %s' % (self.__class__.__name__)) + customize_compiler(self) + if need_cxx: + # In general, distutils uses -Wstrict-prototypes, but this option is + # not valid for C++ code, only for C. Remove it if it's there to + # avoid a spurious warning on every compilation. + try: + self.compiler_so.remove('-Wstrict-prototypes') + except (AttributeError, ValueError): + pass + + if hasattr(self, 'compiler') and 'cc' in self.compiler[0]: + if not self.compiler_cxx: + if self.compiler[0].startswith('gcc'): + a, b = 'gcc', 'g++' + else: + a, b = 'cc', 'c++' + self.compiler_cxx = [self.compiler[0].replace(a, b)]\ + + self.compiler[1:] + else: + if hasattr(self, 'compiler'): + log.warn("#### %s #######" % (self.compiler,)) + if not hasattr(self, 'compiler_cxx'): + log.warn('Missing compiler_cxx fix for ' + self.__class__.__name__) + + + # check if compiler supports gcc style automatic dependencies + # run on every extension so skip for known good compilers + if hasattr(self, 'compiler') and ('gcc' in self.compiler[0] or + 'g++' in self.compiler[0] or + 'clang' in self.compiler[0]): + self._auto_depends = True + elif os.name == 'posix': + import tempfile + import shutil + tmpdir = tempfile.mkdtemp() + try: + fn = os.path.join(tmpdir, "file.c") + with open(fn, "w") as f: + f.write("int a;\n") + self.compile([fn], output_dir=tmpdir, + extra_preargs=['-MMD', '-MF', fn + '.d']) + self._auto_depends = True + except CompileError: + self._auto_depends = False + finally: + shutil.rmtree(tmpdir) + + return + +replace_method(CCompiler, 'customize', CCompiler_customize) + +def simple_version_match(pat=r'[-.\d]+', ignore='', start=''): + """ + Simple matching of version numbers, for use in CCompiler and FCompiler. + + Parameters + ---------- + pat : str, optional + A regular expression matching version numbers. + Default is ``r'[-.\\d]+'``. + ignore : str, optional + A regular expression matching patterns to skip. + Default is ``''``, in which case nothing is skipped. + start : str, optional + A regular expression matching the start of where to start looking + for version numbers. + Default is ``''``, in which case searching is started at the + beginning of the version string given to `matcher`. + + Returns + ------- + matcher : callable + A function that is appropriate to use as the ``.version_match`` + attribute of a `CCompiler` class. `matcher` takes a single parameter, + a version string. + + """ + def matcher(self, version_string): + # version string may appear in the second line, so getting rid + # of new lines: + version_string = version_string.replace('\n', ' ') + pos = 0 + if start: + m = re.match(start, version_string) + if not m: + return None + pos = m.end() + while True: + m = re.search(pat, version_string[pos:]) + if not m: + return None + if ignore and re.match(ignore, m.group(0)): + pos = m.end() + continue + break + return m.group(0) + return matcher + +def CCompiler_get_version(self, force=False, ok_status=[0]): + """ + Return compiler version, or None if compiler is not available. + + Parameters + ---------- + force : bool, optional + If True, force a new determination of the version, even if the + compiler already has a version attribute. Default is False. + ok_status : list of int, optional + The list of status values returned by the version look-up process + for which a version string is returned. If the status value is not + in `ok_status`, None is returned. Default is ``[0]``. + + Returns + ------- + version : str or None + Version string, in the format of `distutils.version.LooseVersion`. + + """ + if not force and hasattr(self, 'version'): + return self.version + self.find_executables() + try: + version_cmd = self.version_cmd + except AttributeError: + return None + if not version_cmd or not version_cmd[0]: + return None + try: + matcher = self.version_match + except AttributeError: + try: + pat = self.version_pattern + except AttributeError: + return None + def matcher(version_string): + m = re.match(pat, version_string) + if not m: + return None + version = m.group('version') + return version + + try: + output = subprocess.check_output(version_cmd, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as exc: + output = exc.output + status = exc.returncode + except OSError: + # match the historical returns for a parent + # exception class caught by exec_command() + status = 127 + output = b'' + else: + # output isn't actually a filepath but we do this + # for now to match previous distutils behavior + output = filepath_from_subprocess_output(output) + status = 0 + + version = None + if status in ok_status: + version = matcher(output) + if version: + version = LooseVersion(version) + self.version = version + return version + +replace_method(CCompiler, 'get_version', CCompiler_get_version) + +def CCompiler_cxx_compiler(self): + """ + Return the C++ compiler. + + Parameters + ---------- + None + + Returns + ------- + cxx : class instance + The C++ compiler, as a `CCompiler` instance. + + """ + if self.compiler_type in ('msvc', 'intelw', 'intelemw'): + return self + + cxx = copy(self) + cxx.compiler_cxx = cxx.compiler_cxx + cxx.compiler_so = [cxx.compiler_cxx[0]] + \ + sanitize_cxx_flags(cxx.compiler_so[1:]) + if sys.platform.startswith('aix') and 'ld_so_aix' in cxx.linker_so[0]: + # AIX needs the ld_so_aix script included with Python + cxx.linker_so = [cxx.linker_so[0], cxx.compiler_cxx[0]] \ + + cxx.linker_so[2:] + else: + cxx.linker_so = [cxx.compiler_cxx[0]] + cxx.linker_so[1:] + return cxx + +replace_method(CCompiler, 'cxx_compiler', CCompiler_cxx_compiler) + +compiler_class['intel'] = ('intelccompiler', 'IntelCCompiler', + "Intel C Compiler for 32-bit applications") +compiler_class['intele'] = ('intelccompiler', 'IntelItaniumCCompiler', + "Intel C Itanium Compiler for Itanium-based applications") +compiler_class['intelem'] = ('intelccompiler', 'IntelEM64TCCompiler', + "Intel C Compiler for 64-bit applications") +compiler_class['intelw'] = ('intelccompiler', 'IntelCCompilerW', + "Intel C Compiler for 32-bit applications on Windows") +compiler_class['intelemw'] = ('intelccompiler', 'IntelEM64TCCompilerW', + "Intel C Compiler for 64-bit applications on Windows") +compiler_class['pathcc'] = ('pathccompiler', 'PathScaleCCompiler', + "PathScale Compiler for SiCortex-based applications") +compiler_class['arm'] = ('armccompiler', 'ArmCCompiler', + "Arm C Compiler") + +ccompiler._default_compilers += (('linux.*', 'intel'), + ('linux.*', 'intele'), + ('linux.*', 'intelem'), + ('linux.*', 'pathcc'), + ('nt', 'intelw'), + ('nt', 'intelemw')) + +if sys.platform == 'win32': + compiler_class['mingw32'] = ('mingw32ccompiler', 'Mingw32CCompiler', + "Mingw32 port of GNU C Compiler for Win32"\ + "(for MSC built Python)") + if mingw32(): + # On windows platforms, we want to default to mingw32 (gcc) + # because msvc can't build blitz stuff. + log.info('Setting mingw32 as default compiler for nt.') + ccompiler._default_compilers = (('nt', 'mingw32'),) \ + + ccompiler._default_compilers + + +_distutils_new_compiler = new_compiler +def new_compiler (plat=None, + compiler=None, + verbose=None, + dry_run=0, + force=0): + # Try first C compilers from numpy.distutils. + if verbose is None: + verbose = log.get_threshold() <= log.INFO + if plat is None: + plat = os.name + try: + if compiler is None: + compiler = get_default_compiler(plat) + (module_name, class_name, long_description) = compiler_class[compiler] + except KeyError: + msg = "don't know how to compile C/C++ code on platform '%s'" % plat + if compiler is not None: + msg = msg + " with '%s' compiler" % compiler + raise DistutilsPlatformError(msg) + module_name = "numpy.distutils." + module_name + try: + __import__ (module_name) + except ImportError as e: + msg = str(e) + log.info('%s in numpy.distutils; trying from distutils', + str(msg)) + module_name = module_name[6:] + try: + __import__(module_name) + except ImportError as e: + msg = str(e) + raise DistutilsModuleError("can't compile C/C++ code: unable to load module '%s'" % \ + module_name) + try: + module = sys.modules[module_name] + klass = vars(module)[class_name] + except KeyError: + raise DistutilsModuleError(("can't compile C/C++ code: unable to find class '%s' " + + "in module '%s'") % (class_name, module_name)) + compiler = klass(None, dry_run, force) + compiler.verbose = verbose + log.debug('new_compiler returns %s' % (klass)) + return compiler + +ccompiler.new_compiler = new_compiler + +_distutils_gen_lib_options = gen_lib_options +def gen_lib_options(compiler, library_dirs, runtime_library_dirs, libraries): + # the version of this function provided by CPython allows the following + # to return lists, which are unpacked automatically: + # - compiler.runtime_library_dir_option + # our version extends the behavior to: + # - compiler.library_dir_option + # - compiler.library_option + # - compiler.find_library_file + r = _distutils_gen_lib_options(compiler, library_dirs, + runtime_library_dirs, libraries) + lib_opts = [] + for i in r: + if is_sequence(i): + lib_opts.extend(list(i)) + else: + lib_opts.append(i) + return lib_opts +ccompiler.gen_lib_options = gen_lib_options + +# Also fix up the various compiler modules, which do +# from distutils.ccompiler import gen_lib_options +# Don't bother with mwerks, as we don't support Classic Mac. +for _cc in ['msvc9', 'msvc', '_msvc', 'bcpp', 'cygwinc', 'emxc', 'unixc']: + _m = sys.modules.get('distutils.' + _cc + 'compiler') + if _m is not None: + setattr(_m, 'gen_lib_options', gen_lib_options) + diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/ccompiler_opt.py b/.local/lib/python3.8/site-packages/numpy/distutils/ccompiler_opt.py new file mode 100644 index 0000000..b38e47c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/ccompiler_opt.py @@ -0,0 +1,2580 @@ +"""Provides the `CCompilerOpt` class, used for handling the CPU/hardware +optimization, starting from parsing the command arguments, to managing the +relation between the CPU baseline and dispatch-able features, +also generating the required C headers and ending with compiling +the sources with proper compiler's flags. + +`CCompilerOpt` doesn't provide runtime detection for the CPU features, +instead only focuses on the compiler side, but it creates abstract C headers +that can be used later for the final runtime dispatching process.""" + +import atexit +import inspect +import os +import pprint +import re +import subprocess +import textwrap + + +class _Config: + """An abstract class holds all configurable attributes of `CCompilerOpt`, + these class attributes can be used to change the default behavior + of `CCompilerOpt` in order to fit other requirements. + + Attributes + ---------- + conf_nocache : bool + Set True to disable memory and file cache. + Default is False. + + conf_noopt : bool + Set True to forces the optimization to be disabled, + in this case `CCompilerOpt` tends to generate all + expected headers in order to 'not' break the build. + Default is False. + + conf_cache_factors : list + Add extra factors to the primary caching factors. The caching factors + are utilized to determine if there are changes had happened that + requires to discard the cache and re-updating it. The primary factors + are the arguments of `CCompilerOpt` and `CCompiler`'s properties(type, flags, etc). + Default is list of two items, containing the time of last modification + of `ccompiler_opt` and value of attribute "conf_noopt" + + conf_tmp_path : str, + The path of temporary directory. Default is auto-created + temporary directory via ``tempfile.mkdtemp()``. + + conf_check_path : str + The path of testing files. Each added CPU feature must have a + **C** source file contains at least one intrinsic or instruction that + related to this feature, so it can be tested against the compiler. + Default is ``./distutils/checks``. + + conf_target_groups : dict + Extra tokens that can be reached from dispatch-able sources through + the special mark ``@targets``. Default is an empty dictionary. + + **Notes**: + - case-insensitive for tokens and group names + - sign '#' must stick in the begin of group name and only within ``@targets`` + + **Example**: + .. code-block:: console + + $ "@targets #avx_group other_tokens" > group_inside.c + + >>> CCompilerOpt.conf_target_groups["avx_group"] = \\ + "$werror $maxopt avx2 avx512f avx512_skx" + >>> cco = CCompilerOpt(cc_instance) + >>> cco.try_dispatch(["group_inside.c"]) + + conf_c_prefix : str + The prefix of public C definitions. Default is ``"NPY_"``. + + conf_c_prefix_ : str + The prefix of internal C definitions. Default is ``"NPY__"``. + + conf_cc_flags : dict + Nested dictionaries defining several compiler flags + that linked to some major functions, the main key + represent the compiler name and sub-keys represent + flags names. Default is already covers all supported + **C** compilers. + + Sub-keys explained as follows: + + "native": str or None + used by argument option `native`, to detect the current + machine support via the compiler. + "werror": str or None + utilized to treat warning as errors during testing CPU features + against the compiler and also for target's policy `$werror` + via dispatch-able sources. + "maxopt": str or None + utilized for target's policy '$maxopt' and the value should + contains the maximum acceptable optimization by the compiler. + e.g. in gcc `'-O3'` + + **Notes**: + * case-sensitive for compiler names and flags + * use space to separate multiple flags + * any flag will tested against the compiler and it will skipped + if it's not applicable. + + conf_min_features : dict + A dictionary defines the used CPU features for + argument option `'min'`, the key represent the CPU architecture + name e.g. `'x86'`. Default values provide the best effort + on wide range of users platforms. + + **Note**: case-sensitive for architecture names. + + conf_features : dict + Nested dictionaries used for identifying the CPU features. + the primary key is represented as a feature name or group name + that gathers several features. Default values covers all + supported features but without the major options like "flags", + these undefined options handle it by method `conf_features_partial()`. + Default value is covers almost all CPU features for *X86*, *IBM/Power64* + and *ARM 7/8*. + + Sub-keys explained as follows: + + "implies" : str or list, optional, + List of CPU feature names to be implied by it, + the feature name must be defined within `conf_features`. + Default is None. + + "flags": str or list, optional + List of compiler flags. Default is None. + + "detect": str or list, optional + List of CPU feature names that required to be detected + in runtime. By default, its the feature name or features + in "group" if its specified. + + "implies_detect": bool, optional + If True, all "detect" of implied features will be combined. + Default is True. see `feature_detect()`. + + "group": str or list, optional + Same as "implies" but doesn't require the feature name to be + defined within `conf_features`. + + "interest": int, required + a key for sorting CPU features + + "headers": str or list, optional + intrinsics C header file + + "disable": str, optional + force disable feature, the string value should contains the + reason of disabling. + + "autovec": bool or None, optional + True or False to declare that CPU feature can be auto-vectorized + by the compiler. + By default(None), treated as True if the feature contains at + least one applicable flag. see `feature_can_autovec()` + + "extra_checks": str or list, optional + Extra test case names for the CPU feature that need to be tested + against the compiler. + + Each test case must have a C file named ``extra_xxxx.c``, where + ``xxxx`` is the case name in lower case, under 'conf_check_path'. + It should contain at least one intrinsic or function related to the test case. + + If the compiler able to successfully compile the C file then `CCompilerOpt` + will add a C ``#define`` for it into the main dispatch header, e.g. + ```#define {conf_c_prefix}_XXXX`` where ``XXXX`` is the case name in upper case. + + **NOTES**: + * space can be used as separator with options that supports "str or list" + * case-sensitive for all values and feature name must be in upper-case. + * if flags aren't applicable, its will skipped rather than disable the + CPU feature + * the CPU feature will disabled if the compiler fail to compile + the test file + """ + conf_nocache = False + conf_noopt = False + conf_cache_factors = None + conf_tmp_path = None + conf_check_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "checks" + ) + conf_target_groups = {} + conf_c_prefix = 'NPY_' + conf_c_prefix_ = 'NPY__' + conf_cc_flags = dict( + gcc = dict( + # native should always fail on arm and ppc64, + # native usually works only with x86 + native = '-march=native', + opt = '-O3', + werror = '-Werror', + ), + clang = dict( + native = '-march=native', + opt = "-O3", + # One of the following flags needs to be applicable for Clang to + # guarantee the sanity of the testing process, however in certain + # cases `-Werror` gets skipped during the availability test due to + # "unused arguments" warnings. + # see https://github.com/numpy/numpy/issues/19624 + werror = '-Werror=switch -Werror', + ), + icc = dict( + native = '-xHost', + opt = '-O3', + werror = '-Werror', + ), + iccw = dict( + native = '/QxHost', + opt = '/O3', + werror = '/Werror', + ), + msvc = dict( + native = None, + opt = '/O2', + werror = '/WX', + ) + ) + conf_min_features = dict( + x86 = "SSE SSE2", + x64 = "SSE SSE2 SSE3", + ppc64 = '', # play it safe + ppc64le = "VSX VSX2", + armhf = '', # play it safe + aarch64 = "NEON NEON_FP16 NEON_VFPV4 ASIMD" + ) + conf_features = dict( + # X86 + SSE = dict( + interest=1, headers="xmmintrin.h", + # enabling SSE without SSE2 is useless also + # it's non-optional for x86_64 + implies="SSE2" + ), + SSE2 = dict(interest=2, implies="SSE", headers="emmintrin.h"), + SSE3 = dict(interest=3, implies="SSE2", headers="pmmintrin.h"), + SSSE3 = dict(interest=4, implies="SSE3", headers="tmmintrin.h"), + SSE41 = dict(interest=5, implies="SSSE3", headers="smmintrin.h"), + POPCNT = dict(interest=6, implies="SSE41", headers="popcntintrin.h"), + SSE42 = dict(interest=7, implies="POPCNT"), + AVX = dict( + interest=8, implies="SSE42", headers="immintrin.h", + implies_detect=False + ), + XOP = dict(interest=9, implies="AVX", headers="x86intrin.h"), + FMA4 = dict(interest=10, implies="AVX", headers="x86intrin.h"), + F16C = dict(interest=11, implies="AVX"), + FMA3 = dict(interest=12, implies="F16C"), + AVX2 = dict(interest=13, implies="F16C"), + AVX512F = dict( + interest=20, implies="FMA3 AVX2", implies_detect=False, + extra_checks="AVX512F_REDUCE" + ), + AVX512CD = dict(interest=21, implies="AVX512F"), + AVX512_KNL = dict( + interest=40, implies="AVX512CD", group="AVX512ER AVX512PF", + detect="AVX512_KNL", implies_detect=False + ), + AVX512_KNM = dict( + interest=41, implies="AVX512_KNL", + group="AVX5124FMAPS AVX5124VNNIW AVX512VPOPCNTDQ", + detect="AVX512_KNM", implies_detect=False + ), + AVX512_SKX = dict( + interest=42, implies="AVX512CD", group="AVX512VL AVX512BW AVX512DQ", + detect="AVX512_SKX", implies_detect=False, + extra_checks="AVX512BW_MASK AVX512DQ_MASK" + ), + AVX512_CLX = dict( + interest=43, implies="AVX512_SKX", group="AVX512VNNI", + detect="AVX512_CLX" + ), + AVX512_CNL = dict( + interest=44, implies="AVX512_SKX", group="AVX512IFMA AVX512VBMI", + detect="AVX512_CNL", implies_detect=False + ), + AVX512_ICL = dict( + interest=45, implies="AVX512_CLX AVX512_CNL", + group="AVX512VBMI2 AVX512BITALG AVX512VPOPCNTDQ", + detect="AVX512_ICL", implies_detect=False + ), + # IBM/Power + ## Power7/ISA 2.06 + VSX = dict(interest=1, headers="altivec.h", extra_checks="VSX_ASM"), + ## Power8/ISA 2.07 + VSX2 = dict(interest=2, implies="VSX", implies_detect=False), + ## Power9/ISA 3.00 + VSX3 = dict(interest=3, implies="VSX2", implies_detect=False), + # ARM + NEON = dict(interest=1, headers="arm_neon.h"), + NEON_FP16 = dict(interest=2, implies="NEON"), + ## FMA + NEON_VFPV4 = dict(interest=3, implies="NEON_FP16"), + ## Advanced SIMD + ASIMD = dict(interest=4, implies="NEON_FP16 NEON_VFPV4", implies_detect=False), + ## ARMv8.2 half-precision & vector arithm + ASIMDHP = dict(interest=5, implies="ASIMD"), + ## ARMv8.2 dot product + ASIMDDP = dict(interest=6, implies="ASIMD"), + ## ARMv8.2 Single & half-precision Multiply + ASIMDFHM = dict(interest=7, implies="ASIMDHP"), + ) + def conf_features_partial(self): + """Return a dictionary of supported CPU features by the platform, + and accumulate the rest of undefined options in `conf_features`, + the returned dict has same rules and notes in + class attribute `conf_features`, also its override + any options that been set in 'conf_features'. + """ + if self.cc_noopt: + # optimization is disabled + return {} + + on_x86 = self.cc_on_x86 or self.cc_on_x64 + is_unix = self.cc_is_gcc or self.cc_is_clang + + if on_x86 and is_unix: return dict( + SSE = dict(flags="-msse"), + SSE2 = dict(flags="-msse2"), + SSE3 = dict(flags="-msse3"), + SSSE3 = dict(flags="-mssse3"), + SSE41 = dict(flags="-msse4.1"), + POPCNT = dict(flags="-mpopcnt"), + SSE42 = dict(flags="-msse4.2"), + AVX = dict(flags="-mavx"), + F16C = dict(flags="-mf16c"), + XOP = dict(flags="-mxop"), + FMA4 = dict(flags="-mfma4"), + FMA3 = dict(flags="-mfma"), + AVX2 = dict(flags="-mavx2"), + AVX512F = dict(flags="-mavx512f"), + AVX512CD = dict(flags="-mavx512cd"), + AVX512_KNL = dict(flags="-mavx512er -mavx512pf"), + AVX512_KNM = dict( + flags="-mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq" + ), + AVX512_SKX = dict(flags="-mavx512vl -mavx512bw -mavx512dq"), + AVX512_CLX = dict(flags="-mavx512vnni"), + AVX512_CNL = dict(flags="-mavx512ifma -mavx512vbmi"), + AVX512_ICL = dict( + flags="-mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq" + ) + ) + if on_x86 and self.cc_is_icc: return dict( + SSE = dict(flags="-msse"), + SSE2 = dict(flags="-msse2"), + SSE3 = dict(flags="-msse3"), + SSSE3 = dict(flags="-mssse3"), + SSE41 = dict(flags="-msse4.1"), + POPCNT = {}, + SSE42 = dict(flags="-msse4.2"), + AVX = dict(flags="-mavx"), + F16C = {}, + XOP = dict(disable="Intel Compiler doesn't support it"), + FMA4 = dict(disable="Intel Compiler doesn't support it"), + # Intel Compiler doesn't support AVX2 or FMA3 independently + FMA3 = dict( + implies="F16C AVX2", flags="-march=core-avx2" + ), + AVX2 = dict(implies="FMA3", flags="-march=core-avx2"), + # Intel Compiler doesn't support AVX512F or AVX512CD independently + AVX512F = dict( + implies="AVX2 AVX512CD", flags="-march=common-avx512" + ), + AVX512CD = dict( + implies="AVX2 AVX512F", flags="-march=common-avx512" + ), + AVX512_KNL = dict(flags="-xKNL"), + AVX512_KNM = dict(flags="-xKNM"), + AVX512_SKX = dict(flags="-xSKYLAKE-AVX512"), + AVX512_CLX = dict(flags="-xCASCADELAKE"), + AVX512_CNL = dict(flags="-xCANNONLAKE"), + AVX512_ICL = dict(flags="-xICELAKE-CLIENT"), + ) + if on_x86 and self.cc_is_iccw: return dict( + SSE = dict(flags="/arch:SSE"), + SSE2 = dict(flags="/arch:SSE2"), + SSE3 = dict(flags="/arch:SSE3"), + SSSE3 = dict(flags="/arch:SSSE3"), + SSE41 = dict(flags="/arch:SSE4.1"), + POPCNT = {}, + SSE42 = dict(flags="/arch:SSE4.2"), + AVX = dict(flags="/arch:AVX"), + F16C = {}, + XOP = dict(disable="Intel Compiler doesn't support it"), + FMA4 = dict(disable="Intel Compiler doesn't support it"), + # Intel Compiler doesn't support FMA3 or AVX2 independently + FMA3 = dict( + implies="F16C AVX2", flags="/arch:CORE-AVX2" + ), + AVX2 = dict( + implies="FMA3", flags="/arch:CORE-AVX2" + ), + # Intel Compiler doesn't support AVX512F or AVX512CD independently + AVX512F = dict( + implies="AVX2 AVX512CD", flags="/Qx:COMMON-AVX512" + ), + AVX512CD = dict( + implies="AVX2 AVX512F", flags="/Qx:COMMON-AVX512" + ), + AVX512_KNL = dict(flags="/Qx:KNL"), + AVX512_KNM = dict(flags="/Qx:KNM"), + AVX512_SKX = dict(flags="/Qx:SKYLAKE-AVX512"), + AVX512_CLX = dict(flags="/Qx:CASCADELAKE"), + AVX512_CNL = dict(flags="/Qx:CANNONLAKE"), + AVX512_ICL = dict(flags="/Qx:ICELAKE-CLIENT") + ) + if on_x86 and self.cc_is_msvc: return dict( + SSE = dict(flags="/arch:SSE") if self.cc_on_x86 else {}, + SSE2 = dict(flags="/arch:SSE2") if self.cc_on_x86 else {}, + SSE3 = {}, + SSSE3 = {}, + SSE41 = {}, + POPCNT = dict(headers="nmmintrin.h"), + SSE42 = {}, + AVX = dict(flags="/arch:AVX"), + F16C = {}, + XOP = dict(headers="ammintrin.h"), + FMA4 = dict(headers="ammintrin.h"), + # MSVC doesn't support FMA3 or AVX2 independently + FMA3 = dict( + implies="F16C AVX2", flags="/arch:AVX2" + ), + AVX2 = dict( + implies="F16C FMA3", flags="/arch:AVX2" + ), + # MSVC doesn't support AVX512F or AVX512CD independently, + # always generate instructions belong to (VL/VW/DQ) + AVX512F = dict( + implies="AVX2 AVX512CD AVX512_SKX", flags="/arch:AVX512" + ), + AVX512CD = dict( + implies="AVX512F AVX512_SKX", flags="/arch:AVX512" + ), + AVX512_KNL = dict( + disable="MSVC compiler doesn't support it" + ), + AVX512_KNM = dict( + disable="MSVC compiler doesn't support it" + ), + AVX512_SKX = dict(flags="/arch:AVX512"), + AVX512_CLX = {}, + AVX512_CNL = {}, + AVX512_ICL = {} + ) + + on_power = self.cc_on_ppc64le or self.cc_on_ppc64 + if on_power: + partial = dict( + VSX = dict( + implies=("VSX2" if self.cc_on_ppc64le else ""), + flags="-mvsx" + ), + VSX2 = dict( + flags="-mcpu=power8", implies_detect=False + ), + VSX3 = dict( + flags="-mcpu=power9 -mtune=power9", implies_detect=False + ) + ) + if self.cc_is_clang: + partial["VSX"]["flags"] = "-maltivec -mvsx" + partial["VSX2"]["flags"] = "-mpower8-vector" + partial["VSX3"]["flags"] = "-mpower9-vector" + + return partial + + if self.cc_on_aarch64 and is_unix: return dict( + NEON = dict( + implies="NEON_FP16 NEON_VFPV4 ASIMD", autovec=True + ), + NEON_FP16 = dict( + implies="NEON NEON_VFPV4 ASIMD", autovec=True + ), + NEON_VFPV4 = dict( + implies="NEON NEON_FP16 ASIMD", autovec=True + ), + ASIMD = dict( + implies="NEON NEON_FP16 NEON_VFPV4", autovec=True + ), + ASIMDHP = dict( + flags="-march=armv8.2-a+fp16" + ), + ASIMDDP = dict( + flags="-march=armv8.2-a+dotprod" + ), + ASIMDFHM = dict( + flags="-march=armv8.2-a+fp16fml" + ), + ) + if self.cc_on_armhf and is_unix: return dict( + NEON = dict( + flags="-mfpu=neon" + ), + NEON_FP16 = dict( + flags="-mfpu=neon-fp16 -mfp16-format=ieee" + ), + NEON_VFPV4 = dict( + flags="-mfpu=neon-vfpv4", + ), + ASIMD = dict( + flags="-mfpu=neon-fp-armv8 -march=armv8-a+simd", + ), + ASIMDHP = dict( + flags="-march=armv8.2-a+fp16" + ), + ASIMDDP = dict( + flags="-march=armv8.2-a+dotprod", + ), + ASIMDFHM = dict( + flags="-march=armv8.2-a+fp16fml" + ) + ) + # TODO: ARM MSVC + return {} + + def __init__(self): + if self.conf_tmp_path is None: + import shutil + import tempfile + tmp = tempfile.mkdtemp() + def rm_temp(): + try: + shutil.rmtree(tmp) + except OSError: + pass + atexit.register(rm_temp) + self.conf_tmp_path = tmp + + if self.conf_cache_factors is None: + self.conf_cache_factors = [ + os.path.getmtime(__file__), + self.conf_nocache + ] + +class _Distutils: + """A helper class that provides a collection of fundamental methods + implemented in a top of Python and NumPy Distutils. + + The idea behind this class is to gather all methods that it may + need to override in case of reuse 'CCompilerOpt' in environment + different than of what NumPy has. + + Parameters + ---------- + ccompiler : `CCompiler` + The generate instance that returned from `distutils.ccompiler.new_compiler()`. + """ + def __init__(self, ccompiler): + self._ccompiler = ccompiler + + def dist_compile(self, sources, flags, ccompiler=None, **kwargs): + """Wrap CCompiler.compile()""" + assert(isinstance(sources, list)) + assert(isinstance(flags, list)) + flags = kwargs.pop("extra_postargs", []) + flags + if not ccompiler: + ccompiler = self._ccompiler + + return ccompiler.compile(sources, extra_postargs=flags, **kwargs) + + def dist_test(self, source, flags, macros=[]): + """Return True if 'CCompiler.compile()' able to compile + a source file with certain flags. + """ + assert(isinstance(source, str)) + from distutils.errors import CompileError + cc = self._ccompiler; + bk_spawn = getattr(cc, 'spawn', None) + if bk_spawn: + cc_type = getattr(self._ccompiler, "compiler_type", "") + if cc_type in ("msvc",): + setattr(cc, 'spawn', self._dist_test_spawn_paths) + else: + setattr(cc, 'spawn', self._dist_test_spawn) + test = False + try: + self.dist_compile( + [source], flags, macros=macros, output_dir=self.conf_tmp_path + ) + test = True + except CompileError as e: + self.dist_log(str(e), stderr=True) + if bk_spawn: + setattr(cc, 'spawn', bk_spawn) + return test + + def dist_info(self): + """ + Return a tuple containing info about (platform, compiler, extra_args), + required by the abstract class '_CCompiler' for discovering the + platform environment. This is also used as a cache factor in order + to detect any changes happening from outside. + """ + if hasattr(self, "_dist_info"): + return self._dist_info + + cc_type = getattr(self._ccompiler, "compiler_type", '') + if cc_type in ("intelem", "intelemw"): + platform = "x86_64" + elif cc_type in ("intel", "intelw", "intele"): + platform = "x86" + else: + from distutils.util import get_platform + platform = get_platform() + + cc_info = getattr(self._ccompiler, "compiler", getattr(self._ccompiler, "compiler_so", '')) + if not cc_type or cc_type == "unix": + if hasattr(cc_info, "__iter__"): + compiler = cc_info[0] + else: + compiler = str(cc_info) + else: + compiler = cc_type + + if hasattr(cc_info, "__iter__") and len(cc_info) > 1: + extra_args = ' '.join(cc_info[1:]) + else: + extra_args = os.environ.get("CFLAGS", "") + extra_args += os.environ.get("CPPFLAGS", "") + + self._dist_info = (platform, compiler, extra_args) + return self._dist_info + + @staticmethod + def dist_error(*args): + """Raise a compiler error""" + from distutils.errors import CompileError + raise CompileError(_Distutils._dist_str(*args)) + + @staticmethod + def dist_fatal(*args): + """Raise a distutils error""" + from distutils.errors import DistutilsError + raise DistutilsError(_Distutils._dist_str(*args)) + + @staticmethod + def dist_log(*args, stderr=False): + """Print a console message""" + from numpy.distutils import log + out = _Distutils._dist_str(*args) + if stderr: + log.warn(out) + else: + log.info(out) + + @staticmethod + def dist_load_module(name, path): + """Load a module from file, required by the abstract class '_Cache'.""" + from .misc_util import exec_mod_from_location + try: + return exec_mod_from_location(name, path) + except Exception as e: + _Distutils.dist_log(e, stderr=True) + return None + + @staticmethod + def _dist_str(*args): + """Return a string to print by log and errors.""" + def to_str(arg): + if not isinstance(arg, str) and hasattr(arg, '__iter__'): + ret = [] + for a in arg: + ret.append(to_str(a)) + return '('+ ' '.join(ret) + ')' + return str(arg) + + stack = inspect.stack()[2] + start = "CCompilerOpt.%s[%d] : " % (stack.function, stack.lineno) + out = ' '.join([ + to_str(a) + for a in (*args,) + ]) + return start + out + + def _dist_test_spawn_paths(self, cmd, display=None): + """ + Fix msvc SDK ENV path same as distutils do + without it we get c1: fatal error C1356: unable to find mspdbcore.dll + """ + if not hasattr(self._ccompiler, "_paths"): + self._dist_test_spawn(cmd) + return + old_path = os.getenv("path") + try: + os.environ["path"] = self._ccompiler._paths + self._dist_test_spawn(cmd) + finally: + os.environ["path"] = old_path + + _dist_warn_regex = re.compile( + # intel and msvc compilers don't raise + # fatal errors when flags are wrong or unsupported + ".*(" + "warning D9002|" # msvc, it should be work with any language. + "invalid argument for option" # intel + ").*" + ) + @staticmethod + def _dist_test_spawn(cmd, display=None): + try: + o = subprocess.check_output(cmd, stderr=subprocess.STDOUT, + universal_newlines=True) + if o and re.match(_Distutils._dist_warn_regex, o): + _Distutils.dist_error( + "Flags in command", cmd ,"aren't supported by the compiler" + ", output -> \n%s" % o + ) + except subprocess.CalledProcessError as exc: + o = exc.output + s = exc.returncode + except OSError as e: + o = e + s = 127 + else: + return None + _Distutils.dist_error( + "Command", cmd, "failed with exit status %d output -> \n%s" % ( + s, o + )) + +_share_cache = {} +class _Cache: + """An abstract class handles caching functionality, provides two + levels of caching, in-memory by share instances attributes among + each other and by store attributes into files. + + **Note**: + any attributes that start with ``_`` or ``conf_`` will be ignored. + + Parameters + ---------- + cache_path: str or None + The path of cache file, if None then cache in file will disabled. + + *factors: + The caching factors that need to utilize next to `conf_cache_factors`. + + Attributes + ---------- + cache_private: set + Hold the attributes that need be skipped from "in-memory cache". + + cache_infile: bool + Utilized during initializing this class, to determine if the cache was able + to loaded from the specified cache path in 'cache_path'. + """ + + # skip attributes from cache + _cache_ignore = re.compile("^(_|conf_)") + + def __init__(self, cache_path=None, *factors): + self.cache_me = {} + self.cache_private = set() + self.cache_infile = False + self._cache_path = None + + if self.conf_nocache: + self.dist_log("cache is disabled by `Config`") + return + + self._cache_hash = self.cache_hash(*factors, *self.conf_cache_factors) + self._cache_path = cache_path + if cache_path: + if os.path.exists(cache_path): + self.dist_log("load cache from file ->", cache_path) + cache_mod = self.dist_load_module("cache", cache_path) + if not cache_mod: + self.dist_log( + "unable to load the cache file as a module", + stderr=True + ) + elif not hasattr(cache_mod, "hash") or \ + not hasattr(cache_mod, "data"): + self.dist_log("invalid cache file", stderr=True) + elif self._cache_hash == cache_mod.hash: + self.dist_log("hit the file cache") + for attr, val in cache_mod.data.items(): + setattr(self, attr, val) + self.cache_infile = True + else: + self.dist_log("miss the file cache") + + if not self.cache_infile: + other_cache = _share_cache.get(self._cache_hash) + if other_cache: + self.dist_log("hit the memory cache") + for attr, val in other_cache.__dict__.items(): + if attr in other_cache.cache_private or \ + re.match(self._cache_ignore, attr): + continue + setattr(self, attr, val) + + _share_cache[self._cache_hash] = self + atexit.register(self.cache_flush) + + def __del__(self): + for h, o in _share_cache.items(): + if o == self: + _share_cache.pop(h) + break + + def cache_flush(self): + """ + Force update the cache. + """ + if not self._cache_path: + return + # TODO: don't write if the cache doesn't change + self.dist_log("write cache to path ->", self._cache_path) + cdict = self.__dict__.copy() + for attr in self.__dict__.keys(): + if re.match(self._cache_ignore, attr): + cdict.pop(attr) + + d = os.path.dirname(self._cache_path) + if not os.path.exists(d): + os.makedirs(d) + + repr_dict = pprint.pformat(cdict, compact=True) + with open(self._cache_path, "w") as f: + f.write(textwrap.dedent("""\ + # AUTOGENERATED DON'T EDIT + # Please make changes to the code generator \ + (distutils/ccompiler_opt.py) + hash = {} + data = \\ + """).format(self._cache_hash)) + f.write(repr_dict) + + def cache_hash(self, *factors): + # is there a built-in non-crypto hash? + # sdbm + chash = 0 + for f in factors: + for char in str(f): + chash = ord(char) + (chash << 6) + (chash << 16) - chash + chash &= 0xFFFFFFFF + return chash + + @staticmethod + def me(cb): + """ + A static method that can be treated as a decorator to + dynamically cache certain methods. + """ + def cache_wrap_me(self, *args, **kwargs): + # good for normal args + cache_key = str(( + cb.__name__, *args, *kwargs.keys(), *kwargs.values() + )) + if cache_key in self.cache_me: + return self.cache_me[cache_key] + ccb = cb(self, *args, **kwargs) + self.cache_me[cache_key] = ccb + return ccb + return cache_wrap_me + +class _CCompiler: + """A helper class for `CCompilerOpt` containing all utilities that + related to the fundamental compiler's functions. + + Attributes + ---------- + cc_on_x86 : bool + True when the target architecture is 32-bit x86 + cc_on_x64 : bool + True when the target architecture is 64-bit x86 + cc_on_ppc64 : bool + True when the target architecture is 64-bit big-endian PowerPC + cc_on_armhf : bool + True when the target architecture is 32-bit ARMv7+ + cc_on_aarch64 : bool + True when the target architecture is 64-bit Armv8-a+ + cc_on_noarch : bool + True when the target architecture is unknown or not supported + cc_is_gcc : bool + True if the compiler is GNU or + if the compiler is unknown + cc_is_clang : bool + True if the compiler is Clang + cc_is_icc : bool + True if the compiler is Intel compiler (unix like) + cc_is_iccw : bool + True if the compiler is Intel compiler (msvc like) + cc_is_nocc : bool + True if the compiler isn't supported directly, + Note: that cause a fail-back to gcc + cc_has_debug : bool + True if the compiler has debug flags + cc_has_native : bool + True if the compiler has native flags + cc_noopt : bool + True if the compiler has definition 'DISABLE_OPT*', + or 'cc_on_noarch' is True + cc_march : str + The target architecture name, or "unknown" if + the architecture isn't supported + cc_name : str + The compiler name, or "unknown" if the compiler isn't supported + cc_flags : dict + Dictionary containing the initialized flags of `_Config.conf_cc_flags` + """ + def __init__(self): + if hasattr(self, "cc_is_cached"): + return + # attr regex + detect_arch = ( + ("cc_on_x64", ".*(x|x86_|amd)64.*"), + ("cc_on_x86", ".*(win32|x86|i386|i686).*"), + ("cc_on_ppc64le", ".*(powerpc|ppc)64(el|le).*"), + ("cc_on_ppc64", ".*(powerpc|ppc)64.*"), + ("cc_on_aarch64", ".*(aarch64|arm64).*"), + ("cc_on_armhf", ".*arm.*"), + # undefined platform + ("cc_on_noarch", ""), + ) + detect_compiler = ( + ("cc_is_gcc", r".*(gcc|gnu\-g).*"), + ("cc_is_clang", ".*clang.*"), + ("cc_is_iccw", ".*(intelw|intelemw|iccw).*"), # intel msvc like + ("cc_is_icc", ".*(intel|icc).*"), # intel unix like + ("cc_is_msvc", ".*msvc.*"), + # undefined compiler will be treat it as gcc + ("cc_is_nocc", ""), + ) + detect_args = ( + ("cc_has_debug", ".*(O0|Od|ggdb|coverage|debug:full).*"), + ("cc_has_native", ".*(-march=native|-xHost|/QxHost).*"), + # in case if the class run with -DNPY_DISABLE_OPTIMIZATION + ("cc_noopt", ".*DISABLE_OPT.*"), + ) + + dist_info = self.dist_info() + platform, compiler_info, extra_args = dist_info + # set False to all attrs + for section in (detect_arch, detect_compiler, detect_args): + for attr, rgex in section: + setattr(self, attr, False) + + for detect, searchin in ((detect_arch, platform), (detect_compiler, compiler_info)): + for attr, rgex in detect: + if rgex and not re.match(rgex, searchin, re.IGNORECASE): + continue + setattr(self, attr, True) + break + + for attr, rgex in detect_args: + if rgex and not re.match(rgex, extra_args, re.IGNORECASE): + continue + setattr(self, attr, True) + + if self.cc_on_noarch: + self.dist_log( + "unable to detect CPU architecture which lead to disable the optimization. " + f"check dist_info:<<\n{dist_info}\n>>", + stderr=True + ) + self.cc_noopt = True + + if self.conf_noopt: + self.dist_log("Optimization is disabled by the Config", stderr=True) + self.cc_noopt = True + + if self.cc_is_nocc: + """ + mingw can be treated as a gcc, and also xlc even if it based on clang, + but still has the same gcc optimization flags. + """ + self.dist_log( + "unable to detect compiler type which leads to treating it as GCC. " + "this is a normal behavior if you're using gcc-like compiler such as MinGW or IBM/XLC." + f"check dist_info:<<\n{dist_info}\n>>", + stderr=True + ) + self.cc_is_gcc = True + + self.cc_march = "unknown" + for arch in ("x86", "x64", "ppc64", "ppc64le", "armhf", "aarch64"): + if getattr(self, "cc_on_" + arch): + self.cc_march = arch + break + + self.cc_name = "unknown" + for name in ("gcc", "clang", "iccw", "icc", "msvc"): + if getattr(self, "cc_is_" + name): + self.cc_name = name + break + + self.cc_flags = {} + compiler_flags = self.conf_cc_flags.get(self.cc_name) + if compiler_flags is None: + self.dist_fatal( + "undefined flag for compiler '%s', " + "leave an empty dict instead" % self.cc_name + ) + for name, flags in compiler_flags.items(): + self.cc_flags[name] = nflags = [] + if flags: + assert(isinstance(flags, str)) + flags = flags.split() + for f in flags: + if self.cc_test_flags([f]): + nflags.append(f) + + self.cc_is_cached = True + + @_Cache.me + def cc_test_flags(self, flags): + """ + Returns True if the compiler supports 'flags'. + """ + assert(isinstance(flags, list)) + self.dist_log("testing flags", flags) + test_path = os.path.join(self.conf_check_path, "test_flags.c") + test = self.dist_test(test_path, flags) + if not test: + self.dist_log("testing failed", stderr=True) + return test + + def cc_normalize_flags(self, flags): + """ + Remove the conflicts that caused due gathering implied features flags. + + Parameters + ---------- + 'flags' list, compiler flags + flags should be sorted from the lowest to the highest interest. + + Returns + ------- + list, filtered from any conflicts. + + Examples + -------- + >>> self.cc_normalize_flags(['-march=armv8.2-a+fp16', '-march=armv8.2-a+dotprod']) + ['armv8.2-a+fp16+dotprod'] + + >>> self.cc_normalize_flags( + ['-msse', '-msse2', '-msse3', '-mssse3', '-msse4.1', '-msse4.2', '-mavx', '-march=core-avx2'] + ) + ['-march=core-avx2'] + """ + assert(isinstance(flags, list)) + if self.cc_is_gcc or self.cc_is_clang or self.cc_is_icc: + return self._cc_normalize_unix(flags) + + if self.cc_is_msvc or self.cc_is_iccw: + return self._cc_normalize_win(flags) + return flags + + _cc_normalize_unix_mrgx = re.compile( + # 1- to check the highest of + r"^(-mcpu=|-march=|-x[A-Z0-9\-])" + ) + _cc_normalize_unix_frgx = re.compile( + # 2- to remove any flags starts with + # -march, -mcpu, -x(INTEL) and '-m' without '=' + r"^(?!(-mcpu=|-march=|-x[A-Z0-9\-]))(?!-m[a-z0-9\-\.]*.$)" + ) + _cc_normalize_unix_krgx = re.compile( + # 3- keep only the highest of + r"^(-mfpu|-mtune)" + ) + _cc_normalize_arch_ver = re.compile( + r"[0-9.]" + ) + def _cc_normalize_unix(self, flags): + def ver_flags(f): + # arch ver subflag + # -march=armv8.2-a+fp16fml + tokens = f.split('+') + ver = float('0' + ''.join( + re.findall(self._cc_normalize_arch_ver, tokens[0]) + )) + return ver, tokens[0], tokens[1:] + + if len(flags) <= 1: + return flags + # get the highest matched flag + for i, cur_flag in enumerate(reversed(flags)): + if not re.match(self._cc_normalize_unix_mrgx, cur_flag): + continue + lower_flags = flags[:-(i+1)] + upper_flags = flags[-i:] + filterd = list(filter( + self._cc_normalize_unix_frgx.search, lower_flags + )) + # gather subflags + ver, arch, subflags = ver_flags(cur_flag) + if ver > 0 and len(subflags) > 0: + for xflag in lower_flags: + xver, _, xsubflags = ver_flags(xflag) + if ver == xver: + subflags = xsubflags + subflags + cur_flag = arch + '+' + '+'.join(subflags) + + flags = filterd + [cur_flag] + if i > 0: + flags += upper_flags + break + + # to remove overridable flags + final_flags = [] + matched = set() + for f in reversed(flags): + match = re.match(self._cc_normalize_unix_krgx, f) + if not match: + pass + elif match[0] in matched: + continue + else: + matched.add(match[0]) + final_flags.insert(0, f) + return final_flags + + _cc_normalize_win_frgx = re.compile( + r"^(?!(/arch\:|/Qx\:))" + ) + _cc_normalize_win_mrgx = re.compile( + r"^(/arch|/Qx:)" + ) + def _cc_normalize_win(self, flags): + for i, f in enumerate(reversed(flags)): + if not re.match(self._cc_normalize_win_mrgx, f): + continue + i += 1 + return list(filter( + self._cc_normalize_win_frgx.search, flags[:-i] + )) + flags[-i:] + return flags + +class _Feature: + """A helper class for `CCompilerOpt` that managing CPU features. + + Attributes + ---------- + feature_supported : dict + Dictionary containing all CPU features that supported + by the platform, according to the specified values in attribute + `_Config.conf_features` and `_Config.conf_features_partial()` + + feature_min : set + The minimum support of CPU features, according to + the specified values in attribute `_Config.conf_min_features`. + """ + def __init__(self): + if hasattr(self, "feature_is_cached"): + return + self.feature_supported = pfeatures = self.conf_features_partial() + for feature_name in list(pfeatures.keys()): + feature = pfeatures[feature_name] + cfeature = self.conf_features[feature_name] + feature.update({ + k:v for k,v in cfeature.items() if k not in feature + }) + disabled = feature.get("disable") + if disabled is not None: + pfeatures.pop(feature_name) + self.dist_log( + "feature '%s' is disabled," % feature_name, + disabled, stderr=True + ) + continue + # list is used internally for these options + for option in ( + "implies", "group", "detect", "headers", "flags", "extra_checks" + ) : + oval = feature.get(option) + if isinstance(oval, str): + feature[option] = oval.split() + + self.feature_min = set() + min_f = self.conf_min_features.get(self.cc_march, "") + for F in min_f.upper().split(): + if F in self.feature_supported: + self.feature_min.add(F) + + self.feature_is_cached = True + + def feature_names(self, names=None, force_flags=None, macros=[]): + """ + Returns a set of CPU feature names that supported by platform and the **C** compiler. + + Parameters + ---------- + names: sequence or None, optional + Specify certain CPU features to test it against the **C** compiler. + if None(default), it will test all current supported features. + **Note**: feature names must be in upper-case. + + force_flags: list or None, optional + If None(default), default compiler flags for every CPU feature will + be used during the test. + + macros : list of tuples, optional + A list of C macro definitions. + """ + assert( + names is None or ( + not isinstance(names, str) and + hasattr(names, "__iter__") + ) + ) + assert(force_flags is None or isinstance(force_flags, list)) + if names is None: + names = self.feature_supported.keys() + supported_names = set() + for f in names: + if self.feature_is_supported( + f, force_flags=force_flags, macros=macros + ): + supported_names.add(f) + return supported_names + + def feature_is_exist(self, name): + """ + Returns True if a certain feature is exist and covered within + `_Config.conf_features`. + + Parameters + ---------- + 'name': str + feature name in uppercase. + """ + assert(name.isupper()) + return name in self.conf_features + + def feature_sorted(self, names, reverse=False): + """ + Sort a list of CPU features ordered by the lowest interest. + + Parameters + ---------- + 'names': sequence + sequence of supported feature names in uppercase. + 'reverse': bool, optional + If true, the sorted features is reversed. (highest interest) + + Returns + ------- + list, sorted CPU features + """ + def sort_cb(k): + if isinstance(k, str): + return self.feature_supported[k]["interest"] + # multiple features + rank = max([self.feature_supported[f]["interest"] for f in k]) + # FIXME: that's not a safe way to increase the rank for + # multi targets + rank += len(k) -1 + return rank + return sorted(names, reverse=reverse, key=sort_cb) + + def feature_implies(self, names, keep_origins=False): + """ + Return a set of CPU features that implied by 'names' + + Parameters + ---------- + names: str or sequence of str + CPU feature name(s) in uppercase. + + keep_origins: bool + if False(default) then the returned set will not contain any + features from 'names'. This case happens only when two features + imply each other. + + Examples + -------- + >>> self.feature_implies("SSE3") + {'SSE', 'SSE2'} + >>> self.feature_implies("SSE2") + {'SSE'} + >>> self.feature_implies("SSE2", keep_origins=True) + # 'SSE2' found here since 'SSE' and 'SSE2' imply each other + {'SSE', 'SSE2'} + """ + def get_implies(name, _caller=set()): + implies = set() + d = self.feature_supported[name] + for i in d.get("implies", []): + implies.add(i) + if i in _caller: + # infinity recursive guard since + # features can imply each other + continue + _caller.add(name) + implies = implies.union(get_implies(i, _caller)) + return implies + + if isinstance(names, str): + implies = get_implies(names) + names = [names] + else: + assert(hasattr(names, "__iter__")) + implies = set() + for n in names: + implies = implies.union(get_implies(n)) + if not keep_origins: + implies.difference_update(names) + return implies + + def feature_implies_c(self, names): + """same as feature_implies() but combining 'names'""" + if isinstance(names, str): + names = set((names,)) + else: + names = set(names) + return names.union(self.feature_implies(names)) + + def feature_ahead(self, names): + """ + Return list of features in 'names' after remove any + implied features and keep the origins. + + Parameters + ---------- + 'names': sequence + sequence of CPU feature names in uppercase. + + Returns + ------- + list of CPU features sorted as-is 'names' + + Examples + -------- + >>> self.feature_ahead(["SSE2", "SSE3", "SSE41"]) + ["SSE41"] + # assume AVX2 and FMA3 implies each other and AVX2 + # is the highest interest + >>> self.feature_ahead(["SSE2", "SSE3", "SSE41", "AVX2", "FMA3"]) + ["AVX2"] + # assume AVX2 and FMA3 don't implies each other + >>> self.feature_ahead(["SSE2", "SSE3", "SSE41", "AVX2", "FMA3"]) + ["AVX2", "FMA3"] + """ + assert( + not isinstance(names, str) + and hasattr(names, '__iter__') + ) + implies = self.feature_implies(names, keep_origins=True) + ahead = [n for n in names if n not in implies] + if len(ahead) == 0: + # return the highest interested feature + # if all features imply each other + ahead = self.feature_sorted(names, reverse=True)[:1] + return ahead + + def feature_untied(self, names): + """ + same as 'feature_ahead()' but if both features implied each other + and keep the highest interest. + + Parameters + ---------- + 'names': sequence + sequence of CPU feature names in uppercase. + + Returns + ------- + list of CPU features sorted as-is 'names' + + Examples + -------- + >>> self.feature_untied(["SSE2", "SSE3", "SSE41"]) + ["SSE2", "SSE3", "SSE41"] + # assume AVX2 and FMA3 implies each other + >>> self.feature_untied(["SSE2", "SSE3", "SSE41", "FMA3", "AVX2"]) + ["SSE2", "SSE3", "SSE41", "AVX2"] + """ + assert( + not isinstance(names, str) + and hasattr(names, '__iter__') + ) + final = [] + for n in names: + implies = self.feature_implies(n) + tied = [ + nn for nn in final + if nn in implies and n in self.feature_implies(nn) + ] + if tied: + tied = self.feature_sorted(tied + [n]) + if n not in tied[1:]: + continue + final.remove(tied[:1][0]) + final.append(n) + return final + + def feature_get_til(self, names, keyisfalse): + """ + same as `feature_implies_c()` but stop collecting implied + features when feature's option that provided through + parameter 'keyisfalse' is False, also sorting the returned + features. + """ + def til(tnames): + # sort from highest to lowest interest then cut if "key" is False + tnames = self.feature_implies_c(tnames) + tnames = self.feature_sorted(tnames, reverse=True) + for i, n in enumerate(tnames): + if not self.feature_supported[n].get(keyisfalse, True): + tnames = tnames[:i+1] + break + return tnames + + if isinstance(names, str) or len(names) <= 1: + names = til(names) + # normalize the sort + names.reverse() + return names + + names = self.feature_ahead(names) + names = {t for n in names for t in til(n)} + return self.feature_sorted(names) + + def feature_detect(self, names): + """ + Return a list of CPU features that required to be detected + sorted from the lowest to highest interest. + """ + names = self.feature_get_til(names, "implies_detect") + detect = [] + for n in names: + d = self.feature_supported[n] + detect += d.get("detect", d.get("group", [n])) + return detect + + @_Cache.me + def feature_flags(self, names): + """ + Return a list of CPU features flags sorted from the lowest + to highest interest. + """ + names = self.feature_sorted(self.feature_implies_c(names)) + flags = [] + for n in names: + d = self.feature_supported[n] + f = d.get("flags", []) + if not f or not self.cc_test_flags(f): + continue + flags += f + return self.cc_normalize_flags(flags) + + @_Cache.me + def feature_test(self, name, force_flags=None, macros=[]): + """ + Test a certain CPU feature against the compiler through its own + check file. + + Parameters + ---------- + name: str + Supported CPU feature name. + + force_flags: list or None, optional + If None(default), the returned flags from `feature_flags()` + will be used. + + macros : list of tuples, optional + A list of C macro definitions. + """ + if force_flags is None: + force_flags = self.feature_flags(name) + + self.dist_log( + "testing feature '%s' with flags (%s)" % ( + name, ' '.join(force_flags) + )) + # Each CPU feature must have C source code contains at + # least one intrinsic or instruction related to this feature. + test_path = os.path.join( + self.conf_check_path, "cpu_%s.c" % name.lower() + ) + if not os.path.exists(test_path): + self.dist_fatal("feature test file is not exist", test_path) + + test = self.dist_test( + test_path, force_flags + self.cc_flags["werror"], macros=macros + ) + if not test: + self.dist_log("testing failed", stderr=True) + return test + + @_Cache.me + def feature_is_supported(self, name, force_flags=None, macros=[]): + """ + Check if a certain CPU feature is supported by the platform and compiler. + + Parameters + ---------- + name: str + CPU feature name in uppercase. + + force_flags: list or None, optional + If None(default), default compiler flags for every CPU feature will + be used during test. + + macros : list of tuples, optional + A list of C macro definitions. + """ + assert(name.isupper()) + assert(force_flags is None or isinstance(force_flags, list)) + + supported = name in self.feature_supported + if supported: + for impl in self.feature_implies(name): + if not self.feature_test(impl, force_flags, macros=macros): + return False + if not self.feature_test(name, force_flags, macros=macros): + return False + return supported + + @_Cache.me + def feature_can_autovec(self, name): + """ + check if the feature can be auto-vectorized by the compiler + """ + assert(isinstance(name, str)) + d = self.feature_supported[name] + can = d.get("autovec", None) + if can is None: + valid_flags = [ + self.cc_test_flags([f]) for f in d.get("flags", []) + ] + can = valid_flags and any(valid_flags) + return can + + @_Cache.me + def feature_extra_checks(self, name): + """ + Return a list of supported extra checks after testing them against + the compiler. + + Parameters + ---------- + names: str + CPU feature name in uppercase. + """ + assert isinstance(name, str) + d = self.feature_supported[name] + extra_checks = d.get("extra_checks", []) + if not extra_checks: + return [] + + self.dist_log("Testing extra checks for feature '%s'" % name, extra_checks) + flags = self.feature_flags(name) + available = [] + not_available = [] + for chk in extra_checks: + test_path = os.path.join( + self.conf_check_path, "extra_%s.c" % chk.lower() + ) + if not os.path.exists(test_path): + self.dist_fatal("extra check file does not exist", test_path) + + is_supported = self.dist_test(test_path, flags + self.cc_flags["werror"]) + if is_supported: + available.append(chk) + else: + not_available.append(chk) + + if not_available: + self.dist_log("testing failed for checks", not_available, stderr=True) + return available + + + def feature_c_preprocessor(self, feature_name, tabs=0): + """ + Generate C preprocessor definitions and include headers of a CPU feature. + + Parameters + ---------- + 'feature_name': str + CPU feature name in uppercase. + 'tabs': int + if > 0, align the generated strings to the right depend on number of tabs. + + Returns + ------- + str, generated C preprocessor + + Examples + -------- + >>> self.feature_c_preprocessor("SSE3") + /** SSE3 **/ + #define NPY_HAVE_SSE3 1 + #include + """ + assert(feature_name.isupper()) + feature = self.feature_supported.get(feature_name) + assert(feature is not None) + + prepr = [ + "/** %s **/" % feature_name, + "#define %sHAVE_%s 1" % (self.conf_c_prefix, feature_name) + ] + prepr += [ + "#include <%s>" % h for h in feature.get("headers", []) + ] + + extra_defs = feature.get("group", []) + extra_defs += self.feature_extra_checks(feature_name) + for edef in extra_defs: + # Guard extra definitions in case of duplicate with + # another feature + prepr += [ + "#ifndef %sHAVE_%s" % (self.conf_c_prefix, edef), + "\t#define %sHAVE_%s 1" % (self.conf_c_prefix, edef), + "#endif", + ] + + if tabs > 0: + prepr = [('\t'*tabs) + l for l in prepr] + return '\n'.join(prepr) + +class _Parse: + """A helper class that parsing main arguments of `CCompilerOpt`, + also parsing configuration statements in dispatch-able sources. + + Parameters + ---------- + cpu_baseline: str or None + minimal set of required CPU features or special options. + + cpu_dispatch: str or None + dispatched set of additional CPU features or special options. + + Special options can be: + - **MIN**: Enables the minimum CPU features that utilized via `_Config.conf_min_features` + - **MAX**: Enables all supported CPU features by the Compiler and platform. + - **NATIVE**: Enables all CPU features that supported by the current machine. + - **NONE**: Enables nothing + - **Operand +/-**: remove or add features, useful with options **MAX**, **MIN** and **NATIVE**. + NOTE: operand + is only added for nominal reason. + + NOTES: + - Case-insensitive among all CPU features and special options. + - Comma or space can be used as a separator. + - If the CPU feature is not supported by the user platform or compiler, + it will be skipped rather than raising a fatal error. + - Any specified CPU features to 'cpu_dispatch' will be skipped if its part of CPU baseline features + - 'cpu_baseline' force enables implied features. + + Attributes + ---------- + parse_baseline_names : list + Final CPU baseline's feature names(sorted from low to high) + parse_baseline_flags : list + Compiler flags of baseline features + parse_dispatch_names : list + Final CPU dispatch-able feature names(sorted from low to high) + parse_target_groups : dict + Dictionary containing initialized target groups that configured + through class attribute `conf_target_groups`. + + The key is represent the group name and value is a tuple + contains three items : + - bool, True if group has the 'baseline' option. + - list, list of CPU features. + - list, list of extra compiler flags. + + """ + def __init__(self, cpu_baseline, cpu_dispatch): + self._parse_policies = dict( + # POLICY NAME, (HAVE, NOT HAVE, [DEB]) + KEEP_BASELINE = ( + None, self._parse_policy_not_keepbase, + [] + ), + KEEP_SORT = ( + self._parse_policy_keepsort, + self._parse_policy_not_keepsort, + [] + ), + MAXOPT = ( + self._parse_policy_maxopt, None, + [] + ), + WERROR = ( + self._parse_policy_werror, None, + [] + ), + AUTOVEC = ( + self._parse_policy_autovec, None, + ["MAXOPT"] + ) + ) + if hasattr(self, "parse_is_cached"): + return + + self.parse_baseline_names = [] + self.parse_baseline_flags = [] + self.parse_dispatch_names = [] + self.parse_target_groups = {} + + if self.cc_noopt: + # skip parsing baseline and dispatch args and keep parsing target groups + cpu_baseline = cpu_dispatch = None + + self.dist_log("check requested baseline") + if cpu_baseline is not None: + cpu_baseline = self._parse_arg_features("cpu_baseline", cpu_baseline) + baseline_names = self.feature_names(cpu_baseline) + self.parse_baseline_flags = self.feature_flags(baseline_names) + self.parse_baseline_names = self.feature_sorted( + self.feature_implies_c(baseline_names) + ) + + self.dist_log("check requested dispatch-able features") + if cpu_dispatch is not None: + cpu_dispatch_ = self._parse_arg_features("cpu_dispatch", cpu_dispatch) + cpu_dispatch = { + f for f in cpu_dispatch_ + if f not in self.parse_baseline_names + } + conflict_baseline = cpu_dispatch_.difference(cpu_dispatch) + self.parse_dispatch_names = self.feature_sorted( + self.feature_names(cpu_dispatch) + ) + if len(conflict_baseline) > 0: + self.dist_log( + "skip features", conflict_baseline, "since its part of baseline" + ) + + self.dist_log("initialize targets groups") + for group_name, tokens in self.conf_target_groups.items(): + self.dist_log("parse target group", group_name) + GROUP_NAME = group_name.upper() + if not tokens or not tokens.strip(): + # allow empty groups, useful in case if there's a need + # to disable certain group since '_parse_target_tokens()' + # requires at least one valid target + self.parse_target_groups[GROUP_NAME] = ( + False, [], [] + ) + continue + has_baseline, features, extra_flags = \ + self._parse_target_tokens(tokens) + self.parse_target_groups[GROUP_NAME] = ( + has_baseline, features, extra_flags + ) + + self.parse_is_cached = True + + def parse_targets(self, source): + """ + Fetch and parse configuration statements that required for + defining the targeted CPU features, statements should be declared + in the top of source in between **C** comment and start + with a special mark **@targets**. + + Configuration statements are sort of keywords representing + CPU features names, group of statements and policies, combined + together to determine the required optimization. + + Parameters + ---------- + source: str + the path of **C** source file. + + Returns + ------- + - bool, True if group has the 'baseline' option + - list, list of CPU features + - list, list of extra compiler flags + """ + self.dist_log("looking for '@targets' inside -> ", source) + # get lines between /*@targets and */ + with open(source) as fd: + tokens = "" + max_to_reach = 1000 # good enough, isn't? + start_with = "@targets" + start_pos = -1 + end_with = "*/" + end_pos = -1 + for current_line, line in enumerate(fd): + if current_line == max_to_reach: + self.dist_fatal("reached the max of lines") + break + if start_pos == -1: + start_pos = line.find(start_with) + if start_pos == -1: + continue + start_pos += len(start_with) + tokens += line + end_pos = line.find(end_with) + if end_pos != -1: + end_pos += len(tokens) - len(line) + break + + if start_pos == -1: + self.dist_fatal("expected to find '%s' within a C comment" % start_with) + if end_pos == -1: + self.dist_fatal("expected to end with '%s'" % end_with) + + tokens = tokens[start_pos:end_pos] + return self._parse_target_tokens(tokens) + + _parse_regex_arg = re.compile(r'\s|,|([+-])') + def _parse_arg_features(self, arg_name, req_features): + if not isinstance(req_features, str): + self.dist_fatal("expected a string in '%s'" % arg_name) + + final_features = set() + # space and comma can be used as a separator + tokens = list(filter(None, re.split(self._parse_regex_arg, req_features))) + append = True # append is the default + for tok in tokens: + if tok[0] in ("#", "$"): + self.dist_fatal( + arg_name, "target groups and policies " + "aren't allowed from arguments, " + "only from dispatch-able sources" + ) + if tok == '+': + append = True + continue + if tok == '-': + append = False + continue + + TOK = tok.upper() # we use upper-case internally + features_to = set() + if TOK == "NONE": + pass + elif TOK == "NATIVE": + native = self.cc_flags["native"] + if not native: + self.dist_fatal(arg_name, + "native option isn't supported by the compiler" + ) + features_to = self.feature_names( + force_flags=native, macros=[("DETECT_FEATURES", 1)] + ) + elif TOK == "MAX": + features_to = self.feature_supported.keys() + elif TOK == "MIN": + features_to = self.feature_min + else: + if TOK in self.feature_supported: + features_to.add(TOK) + else: + if not self.feature_is_exist(TOK): + self.dist_fatal(arg_name, + ", '%s' isn't a known feature or option" % tok + ) + if append: + final_features = final_features.union(features_to) + else: + final_features = final_features.difference(features_to) + + append = True # back to default + + return final_features + + _parse_regex_target = re.compile(r'\s|[*,/]|([()])') + def _parse_target_tokens(self, tokens): + assert(isinstance(tokens, str)) + final_targets = [] # to keep it sorted as specified + extra_flags = [] + has_baseline = False + + skipped = set() + policies = set() + multi_target = None + + tokens = list(filter(None, re.split(self._parse_regex_target, tokens))) + if not tokens: + self.dist_fatal("expected one token at least") + + for tok in tokens: + TOK = tok.upper() + ch = tok[0] + if ch in ('+', '-'): + self.dist_fatal( + "+/- are 'not' allowed from target's groups or @targets, " + "only from cpu_baseline and cpu_dispatch parms" + ) + elif ch == '$': + if multi_target is not None: + self.dist_fatal( + "policies aren't allowed inside multi-target '()'" + ", only CPU features" + ) + policies.add(self._parse_token_policy(TOK)) + elif ch == '#': + if multi_target is not None: + self.dist_fatal( + "target groups aren't allowed inside multi-target '()'" + ", only CPU features" + ) + has_baseline, final_targets, extra_flags = \ + self._parse_token_group(TOK, has_baseline, final_targets, extra_flags) + elif ch == '(': + if multi_target is not None: + self.dist_fatal("unclosed multi-target, missing ')'") + multi_target = set() + elif ch == ')': + if multi_target is None: + self.dist_fatal("multi-target opener '(' wasn't found") + targets = self._parse_multi_target(multi_target) + if targets is None: + skipped.add(tuple(multi_target)) + else: + if len(targets) == 1: + targets = targets[0] + if targets and targets not in final_targets: + final_targets.append(targets) + multi_target = None # back to default + else: + if TOK == "BASELINE": + if multi_target is not None: + self.dist_fatal("baseline isn't allowed inside multi-target '()'") + has_baseline = True + continue + + if multi_target is not None: + multi_target.add(TOK) + continue + + if not self.feature_is_exist(TOK): + self.dist_fatal("invalid target name '%s'" % TOK) + + is_enabled = ( + TOK in self.parse_baseline_names or + TOK in self.parse_dispatch_names + ) + if is_enabled: + if TOK not in final_targets: + final_targets.append(TOK) + continue + + skipped.add(TOK) + + if multi_target is not None: + self.dist_fatal("unclosed multi-target, missing ')'") + if skipped: + self.dist_log( + "skip targets", skipped, + "not part of baseline or dispatch-able features" + ) + + final_targets = self.feature_untied(final_targets) + + # add polices dependencies + for p in list(policies): + _, _, deps = self._parse_policies[p] + for d in deps: + if d in policies: + continue + self.dist_log( + "policy '%s' force enables '%s'" % ( + p, d + )) + policies.add(d) + + # release policies filtrations + for p, (have, nhave, _) in self._parse_policies.items(): + func = None + if p in policies: + func = have + self.dist_log("policy '%s' is ON" % p) + else: + func = nhave + if not func: + continue + has_baseline, final_targets, extra_flags = func( + has_baseline, final_targets, extra_flags + ) + + return has_baseline, final_targets, extra_flags + + def _parse_token_policy(self, token): + """validate policy token""" + if len(token) <= 1 or token[-1:] == token[0]: + self.dist_fatal("'$' must stuck in the begin of policy name") + token = token[1:] + if token not in self._parse_policies: + self.dist_fatal( + "'%s' is an invalid policy name, available policies are" % token, + self._parse_policies.keys() + ) + return token + + def _parse_token_group(self, token, has_baseline, final_targets, extra_flags): + """validate group token""" + if len(token) <= 1 or token[-1:] == token[0]: + self.dist_fatal("'#' must stuck in the begin of group name") + + token = token[1:] + ghas_baseline, gtargets, gextra_flags = self.parse_target_groups.get( + token, (False, None, []) + ) + if gtargets is None: + self.dist_fatal( + "'%s' is an invalid target group name, " % token + \ + "available target groups are", + self.parse_target_groups.keys() + ) + if ghas_baseline: + has_baseline = True + # always keep sorting as specified + final_targets += [f for f in gtargets if f not in final_targets] + extra_flags += [f for f in gextra_flags if f not in extra_flags] + return has_baseline, final_targets, extra_flags + + def _parse_multi_target(self, targets): + """validate multi targets that defined between parentheses()""" + # remove any implied features and keep the origins + if not targets: + self.dist_fatal("empty multi-target '()'") + if not all([ + self.feature_is_exist(tar) for tar in targets + ]) : + self.dist_fatal("invalid target name in multi-target", targets) + if not all([ + ( + tar in self.parse_baseline_names or + tar in self.parse_dispatch_names + ) + for tar in targets + ]) : + return None + targets = self.feature_ahead(targets) + if not targets: + return None + # force sort multi targets, so it can be comparable + targets = self.feature_sorted(targets) + targets = tuple(targets) # hashable + return targets + + def _parse_policy_not_keepbase(self, has_baseline, final_targets, extra_flags): + """skip all baseline features""" + skipped = [] + for tar in final_targets[:]: + is_base = False + if isinstance(tar, str): + is_base = tar in self.parse_baseline_names + else: + # multi targets + is_base = all([ + f in self.parse_baseline_names + for f in tar + ]) + if is_base: + skipped.append(tar) + final_targets.remove(tar) + + if skipped: + self.dist_log("skip baseline features", skipped) + + return has_baseline, final_targets, extra_flags + + def _parse_policy_keepsort(self, has_baseline, final_targets, extra_flags): + """leave a notice that $keep_sort is on""" + self.dist_log( + "policy 'keep_sort' is on, dispatch-able targets", final_targets, "\n" + "are 'not' sorted depend on the highest interest but" + "as specified in the dispatch-able source or the extra group" + ) + return has_baseline, final_targets, extra_flags + + def _parse_policy_not_keepsort(self, has_baseline, final_targets, extra_flags): + """sorted depend on the highest interest""" + final_targets = self.feature_sorted(final_targets, reverse=True) + return has_baseline, final_targets, extra_flags + + def _parse_policy_maxopt(self, has_baseline, final_targets, extra_flags): + """append the compiler optimization flags""" + if self.cc_has_debug: + self.dist_log("debug mode is detected, policy 'maxopt' is skipped.") + elif self.cc_noopt: + self.dist_log("optimization is disabled, policy 'maxopt' is skipped.") + else: + flags = self.cc_flags["opt"] + if not flags: + self.dist_log( + "current compiler doesn't support optimization flags, " + "policy 'maxopt' is skipped", stderr=True + ) + else: + extra_flags += flags + return has_baseline, final_targets, extra_flags + + def _parse_policy_werror(self, has_baseline, final_targets, extra_flags): + """force warnings to treated as errors""" + flags = self.cc_flags["werror"] + if not flags: + self.dist_log( + "current compiler doesn't support werror flags, " + "warnings will 'not' treated as errors", stderr=True + ) + else: + self.dist_log("compiler warnings are treated as errors") + extra_flags += flags + return has_baseline, final_targets, extra_flags + + def _parse_policy_autovec(self, has_baseline, final_targets, extra_flags): + """skip features that has no auto-vectorized support by compiler""" + skipped = [] + for tar in final_targets[:]: + if isinstance(tar, str): + can = self.feature_can_autovec(tar) + else: # multiple target + can = all([ + self.feature_can_autovec(t) + for t in tar + ]) + if not can: + final_targets.remove(tar) + skipped.append(tar) + + if skipped: + self.dist_log("skip non auto-vectorized features", skipped) + + return has_baseline, final_targets, extra_flags + +class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse): + """ + A helper class for `CCompiler` aims to provide extra build options + to effectively control of compiler optimizations that are directly + related to CPU features. + """ + def __init__(self, ccompiler, cpu_baseline="min", cpu_dispatch="max", cache_path=None): + _Config.__init__(self) + _Distutils.__init__(self, ccompiler) + _Cache.__init__(self, cache_path, self.dist_info(), cpu_baseline, cpu_dispatch) + _CCompiler.__init__(self) + _Feature.__init__(self) + if not self.cc_noopt and self.cc_has_native: + self.dist_log( + "native flag is specified through environment variables. " + "force cpu-baseline='native'" + ) + cpu_baseline = "native" + _Parse.__init__(self, cpu_baseline, cpu_dispatch) + # keep the requested features untouched, need it later for report + # and trace purposes + self._requested_baseline = cpu_baseline + self._requested_dispatch = cpu_dispatch + # key is the dispatch-able source and value is a tuple + # contains two items (has_baseline[boolean], dispatched-features[list]) + self.sources_status = getattr(self, "sources_status", {}) + # every instance should has a separate one + self.cache_private.add("sources_status") + # set it at the end to make sure the cache writing was done after init + # this class + self.hit_cache = hasattr(self, "hit_cache") + + def is_cached(self): + """ + Returns True if the class loaded from the cache file + """ + return self.cache_infile and self.hit_cache + + def cpu_baseline_flags(self): + """ + Returns a list of final CPU baseline compiler flags + """ + return self.parse_baseline_flags + + def cpu_baseline_names(self): + """ + return a list of final CPU baseline feature names + """ + return self.parse_baseline_names + + def cpu_dispatch_names(self): + """ + return a list of final CPU dispatch feature names + """ + return self.parse_dispatch_names + + def try_dispatch(self, sources, src_dir=None, ccompiler=None, **kwargs): + """ + Compile one or more dispatch-able sources and generates object files, + also generates abstract C config headers and macros that + used later for the final runtime dispatching process. + + The mechanism behind it is to takes each source file that specified + in 'sources' and branching it into several files depend on + special configuration statements that must be declared in the + top of each source which contains targeted CPU features, + then it compiles every branched source with the proper compiler flags. + + Parameters + ---------- + sources : list + Must be a list of dispatch-able sources file paths, + and configuration statements must be declared inside + each file. + + src_dir : str + Path of parent directory for the generated headers and wrapped sources. + If None(default) the files will generated in-place. + + ccompiler: CCompiler + Distutils `CCompiler` instance to be used for compilation. + If None (default), the provided instance during the initialization + will be used instead. + + **kwargs : any + Arguments to pass on to the `CCompiler.compile()` + + Returns + ------- + list : generated object files + + Raises + ------ + CompileError + Raises by `CCompiler.compile()` on compiling failure. + DistutilsError + Some errors during checking the sanity of configuration statements. + + See Also + -------- + parse_targets : + Parsing the configuration statements of dispatch-able sources. + """ + to_compile = {} + baseline_flags = self.cpu_baseline_flags() + include_dirs = kwargs.setdefault("include_dirs", []) + + for src in sources: + output_dir = os.path.dirname(src) + if src_dir: + if not output_dir.startswith(src_dir): + output_dir = os.path.join(src_dir, output_dir) + if output_dir not in include_dirs: + # To allow including the generated config header(*.dispatch.h) + # by the dispatch-able sources + include_dirs.append(output_dir) + + has_baseline, targets, extra_flags = self.parse_targets(src) + nochange = self._generate_config(output_dir, src, targets, has_baseline) + for tar in targets: + tar_src = self._wrap_target(output_dir, src, tar, nochange=nochange) + flags = tuple(extra_flags + self.feature_flags(tar)) + to_compile.setdefault(flags, []).append(tar_src) + + if has_baseline: + flags = tuple(extra_flags + baseline_flags) + to_compile.setdefault(flags, []).append(src) + + self.sources_status[src] = (has_baseline, targets) + + # For these reasons, the sources are compiled in a separate loop: + # - Gathering all sources with the same flags to benefit from + # the parallel compiling as much as possible. + # - To generate all config headers of the dispatchable sources, + # before the compilation in case if there are dependency relationships + # among them. + objects = [] + for flags, srcs in to_compile.items(): + objects += self.dist_compile( + srcs, list(flags), ccompiler=ccompiler, **kwargs + ) + return objects + + def generate_dispatch_header(self, header_path): + """ + Generate the dispatch header which contains the #definitions and headers + for platform-specific instruction-sets for the enabled CPU baseline and + dispatch-able features. + + Its highly recommended to take a look at the generated header + also the generated source files via `try_dispatch()` + in order to get the full picture. + """ + self.dist_log("generate CPU dispatch header: (%s)" % header_path) + + baseline_names = self.cpu_baseline_names() + dispatch_names = self.cpu_dispatch_names() + baseline_len = len(baseline_names) + dispatch_len = len(dispatch_names) + + header_dir = os.path.dirname(header_path) + if not os.path.exists(header_dir): + self.dist_log( + f"dispatch header dir {header_dir} does not exist, creating it", + stderr=True + ) + os.makedirs(header_dir) + + with open(header_path, 'w') as f: + baseline_calls = ' \\\n'.join([ + ( + "\t%sWITH_CPU_EXPAND_(MACRO_TO_CALL(%s, __VA_ARGS__))" + ) % (self.conf_c_prefix, f) + for f in baseline_names + ]) + dispatch_calls = ' \\\n'.join([ + ( + "\t%sWITH_CPU_EXPAND_(MACRO_TO_CALL(%s, __VA_ARGS__))" + ) % (self.conf_c_prefix, f) + for f in dispatch_names + ]) + f.write(textwrap.dedent("""\ + /* + * AUTOGENERATED DON'T EDIT + * Please make changes to the code generator (distutils/ccompiler_opt.py) + */ + #define {pfx}WITH_CPU_BASELINE "{baseline_str}" + #define {pfx}WITH_CPU_DISPATCH "{dispatch_str}" + #define {pfx}WITH_CPU_BASELINE_N {baseline_len} + #define {pfx}WITH_CPU_DISPATCH_N {dispatch_len} + #define {pfx}WITH_CPU_EXPAND_(X) X + #define {pfx}WITH_CPU_BASELINE_CALL(MACRO_TO_CALL, ...) \\ + {baseline_calls} + #define {pfx}WITH_CPU_DISPATCH_CALL(MACRO_TO_CALL, ...) \\ + {dispatch_calls} + """).format( + pfx=self.conf_c_prefix, baseline_str=" ".join(baseline_names), + dispatch_str=" ".join(dispatch_names), baseline_len=baseline_len, + dispatch_len=dispatch_len, baseline_calls=baseline_calls, + dispatch_calls=dispatch_calls + )) + baseline_pre = '' + for name in baseline_names: + baseline_pre += self.feature_c_preprocessor(name, tabs=1) + '\n' + + dispatch_pre = '' + for name in dispatch_names: + dispatch_pre += textwrap.dedent("""\ + #ifdef {pfx}CPU_TARGET_{name} + {pre} + #endif /*{pfx}CPU_TARGET_{name}*/ + """).format( + pfx=self.conf_c_prefix_, name=name, pre=self.feature_c_preprocessor( + name, tabs=1 + )) + + f.write(textwrap.dedent("""\ + /******* baseline features *******/ + {baseline_pre} + /******* dispatch features *******/ + {dispatch_pre} + """).format( + pfx=self.conf_c_prefix_, baseline_pre=baseline_pre, + dispatch_pre=dispatch_pre + )) + + def report(self, full=False): + report = [] + platform_rows = [] + baseline_rows = [] + dispatch_rows = [] + report.append(("Platform", platform_rows)) + report.append(("", "")) + report.append(("CPU baseline", baseline_rows)) + report.append(("", "")) + report.append(("CPU dispatch", dispatch_rows)) + + ########## platform ########## + platform_rows.append(("Architecture", ( + "unsupported" if self.cc_on_noarch else self.cc_march) + )) + platform_rows.append(("Compiler", ( + "unix-like" if self.cc_is_nocc else self.cc_name) + )) + ########## baseline ########## + if self.cc_noopt: + baseline_rows.append(("Requested", "optimization disabled")) + else: + baseline_rows.append(("Requested", repr(self._requested_baseline))) + + baseline_names = self.cpu_baseline_names() + baseline_rows.append(( + "Enabled", (' '.join(baseline_names) if baseline_names else "none") + )) + baseline_flags = self.cpu_baseline_flags() + baseline_rows.append(( + "Flags", (' '.join(baseline_flags) if baseline_flags else "none") + )) + extra_checks = [] + for name in baseline_names: + extra_checks += self.feature_extra_checks(name) + baseline_rows.append(( + "Extra checks", (' '.join(extra_checks) if extra_checks else "none") + )) + + ########## dispatch ########## + if self.cc_noopt: + baseline_rows.append(("Requested", "optimization disabled")) + else: + dispatch_rows.append(("Requested", repr(self._requested_dispatch))) + + dispatch_names = self.cpu_dispatch_names() + dispatch_rows.append(( + "Enabled", (' '.join(dispatch_names) if dispatch_names else "none") + )) + ########## Generated ########## + # TODO: + # - collect object names from 'try_dispatch()' + # then get size of each object and printed + # - give more details about the features that not + # generated due compiler support + # - find a better output's design. + # + target_sources = {} + for source, (_, targets) in self.sources_status.items(): + for tar in targets: + target_sources.setdefault(tar, []).append(source) + + if not full or not target_sources: + generated = "" + for tar in self.feature_sorted(target_sources): + sources = target_sources[tar] + name = tar if isinstance(tar, str) else '(%s)' % ' '.join(tar) + generated += name + "[%d] " % len(sources) + dispatch_rows.append(("Generated", generated[:-1] if generated else "none")) + else: + dispatch_rows.append(("Generated", '')) + for tar in self.feature_sorted(target_sources): + sources = target_sources[tar] + pretty_name = tar if isinstance(tar, str) else '(%s)' % ' '.join(tar) + flags = ' '.join(self.feature_flags(tar)) + implies = ' '.join(self.feature_sorted(self.feature_implies(tar))) + detect = ' '.join(self.feature_detect(tar)) + extra_checks = [] + for name in ((tar,) if isinstance(tar, str) else tar): + extra_checks += self.feature_extra_checks(name) + extra_checks = (' '.join(extra_checks) if extra_checks else "none") + + dispatch_rows.append(('', '')) + dispatch_rows.append((pretty_name, implies)) + dispatch_rows.append(("Flags", flags)) + dispatch_rows.append(("Extra checks", extra_checks)) + dispatch_rows.append(("Detect", detect)) + for src in sources: + dispatch_rows.append(("", src)) + + ############################### + # TODO: add support for 'markdown' format + text = [] + secs_len = [len(secs) for secs, _ in report] + cols_len = [len(col) for _, rows in report for col, _ in rows] + tab = ' ' * 2 + pad = max(max(secs_len), max(cols_len)) + for sec, rows in report: + if not sec: + text.append("") # empty line + continue + sec += ' ' * (pad - len(sec)) + text.append(sec + tab + ': ') + for col, val in rows: + col += ' ' * (pad - len(col)) + text.append(tab + col + ': ' + val) + + return '\n'.join(text) + + def _wrap_target(self, output_dir, dispatch_src, target, nochange=False): + assert(isinstance(target, (str, tuple))) + if isinstance(target, str): + ext_name = target_name = target + else: + # multi-target + ext_name = '.'.join(target) + target_name = '__'.join(target) + + wrap_path = os.path.join(output_dir, os.path.basename(dispatch_src)) + wrap_path = "{0}.{2}{1}".format(*os.path.splitext(wrap_path), ext_name.lower()) + if nochange and os.path.exists(wrap_path): + return wrap_path + + self.dist_log("wrap dispatch-able target -> ", wrap_path) + # sorting for readability + features = self.feature_sorted(self.feature_implies_c(target)) + target_join = "#define %sCPU_TARGET_" % self.conf_c_prefix_ + target_defs = [target_join + f for f in features] + target_defs = '\n'.join(target_defs) + + with open(wrap_path, "w") as fd: + fd.write(textwrap.dedent("""\ + /** + * AUTOGENERATED DON'T EDIT + * Please make changes to the code generator \ + (distutils/ccompiler_opt.py) + */ + #define {pfx}CPU_TARGET_MODE + #define {pfx}CPU_TARGET_CURRENT {target_name} + {target_defs} + #include "{path}" + """).format( + pfx=self.conf_c_prefix_, target_name=target_name, + path=os.path.abspath(dispatch_src), target_defs=target_defs + )) + return wrap_path + + def _generate_config(self, output_dir, dispatch_src, targets, has_baseline=False): + config_path = os.path.basename(dispatch_src) + config_path = os.path.splitext(config_path)[0] + '.h' + config_path = os.path.join(output_dir, config_path) + # check if targets didn't change to avoid recompiling + cache_hash = self.cache_hash(targets, has_baseline) + try: + with open(config_path) as f: + last_hash = f.readline().split("cache_hash:") + if len(last_hash) == 2 and int(last_hash[1]) == cache_hash: + return True + except OSError: + pass + + self.dist_log("generate dispatched config -> ", config_path) + dispatch_calls = [] + for tar in targets: + if isinstance(tar, str): + target_name = tar + else: # multi target + target_name = '__'.join([t for t in tar]) + req_detect = self.feature_detect(tar) + req_detect = '&&'.join([ + "CHK(%s)" % f for f in req_detect + ]) + dispatch_calls.append( + "\t%sCPU_DISPATCH_EXPAND_(CB((%s), %s, __VA_ARGS__))" % ( + self.conf_c_prefix_, req_detect, target_name + )) + dispatch_calls = ' \\\n'.join(dispatch_calls) + + if has_baseline: + baseline_calls = ( + "\t%sCPU_DISPATCH_EXPAND_(CB(__VA_ARGS__))" + ) % self.conf_c_prefix_ + else: + baseline_calls = '' + + with open(config_path, "w") as fd: + fd.write(textwrap.dedent("""\ + // cache_hash:{cache_hash} + /** + * AUTOGENERATED DON'T EDIT + * Please make changes to the code generator (distutils/ccompiler_opt.py) + */ + #ifndef {pfx}CPU_DISPATCH_EXPAND_ + #define {pfx}CPU_DISPATCH_EXPAND_(X) X + #endif + #undef {pfx}CPU_DISPATCH_BASELINE_CALL + #undef {pfx}CPU_DISPATCH_CALL + #define {pfx}CPU_DISPATCH_BASELINE_CALL(CB, ...) \\ + {baseline_calls} + #define {pfx}CPU_DISPATCH_CALL(CHK, CB, ...) \\ + {dispatch_calls} + """).format( + pfx=self.conf_c_prefix_, baseline_calls=baseline_calls, + dispatch_calls=dispatch_calls, cache_hash=cache_hash + )) + return False + +def new_ccompiler_opt(compiler, dispatch_hpath, **kwargs): + """ + Create a new instance of 'CCompilerOpt' and generate the dispatch header + which contains the #definitions and headers of platform-specific instruction-sets for + the enabled CPU baseline and dispatch-able features. + + Parameters + ---------- + compiler : CCompiler instance + dispatch_hpath : str + path of the dispatch header + + **kwargs: passed as-is to `CCompilerOpt(...)` + Returns + ------- + new instance of CCompilerOpt + """ + opt = CCompilerOpt(compiler, **kwargs) + if not os.path.exists(dispatch_hpath) or not opt.is_cached(): + opt.generate_dispatch_header(dispatch_hpath) + return opt diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_asimd.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_asimd.c new file mode 100644 index 0000000..8df556b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_asimd.c @@ -0,0 +1,25 @@ +#ifdef _MSC_VER + #include +#endif +#include + +int main(void) +{ + float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f); + /* MAXMIN */ + int ret = (int)vgetq_lane_f32(vmaxnmq_f32(v1, v2), 0); + ret += (int)vgetq_lane_f32(vminnmq_f32(v1, v2), 0); + /* ROUNDING */ + ret += (int)vgetq_lane_f32(vrndq_f32(v1), 0); +#ifdef __aarch64__ + { + float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0); + /* MAXMIN */ + ret += (int)vgetq_lane_f64(vmaxnmq_f64(vd1, vd2), 0); + ret += (int)vgetq_lane_f64(vminnmq_f64(vd1, vd2), 0); + /* ROUNDING */ + ret += (int)vgetq_lane_f64(vrndq_f64(vd1), 0); + } +#endif + return ret; +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_asimddp.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_asimddp.c new file mode 100644 index 0000000..0158d13 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_asimddp.c @@ -0,0 +1,15 @@ +#ifdef _MSC_VER + #include +#endif +#include + +int main(void) +{ + uint8x16_t v1 = vdupq_n_u8((unsigned char)1), v2 = vdupq_n_u8((unsigned char)2); + uint32x4_t va = vdupq_n_u32(3); + int ret = (int)vgetq_lane_u32(vdotq_u32(va, v1, v2), 0); +#ifdef __aarch64__ + ret += (int)vgetq_lane_u32(vdotq_laneq_u32(va, v1, v2, 0), 0); +#endif + return ret; +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_asimdfhm.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_asimdfhm.c new file mode 100644 index 0000000..bb437aa --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_asimdfhm.c @@ -0,0 +1,17 @@ +#ifdef _MSC_VER + #include +#endif +#include + +int main(void) +{ + float16x8_t vhp = vdupq_n_f16((float16_t)1); + float16x4_t vlhp = vdup_n_f16((float16_t)1); + float32x4_t vf = vdupq_n_f32(1.0f); + float32x2_t vlf = vdup_n_f32(1.0f); + + int ret = (int)vget_lane_f32(vfmlal_low_u32(vlf, vlhp, vlhp), 0); + ret += (int)vgetq_lane_f32(vfmlslq_high_u32(vf, vhp, vhp), 0); + + return ret; +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_asimdhp.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_asimdhp.c new file mode 100644 index 0000000..80b9400 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_asimdhp.c @@ -0,0 +1,14 @@ +#ifdef _MSC_VER + #include +#endif +#include + +int main(void) +{ + float16x8_t vhp = vdupq_n_f16((float16_t)-1); + float16x4_t vlhp = vdup_n_f16((float16_t)-1); + + int ret = (int)vgetq_lane_f16(vabdq_f16(vhp, vhp), 0); + ret += (int)vget_lane_f16(vabd_f16(vlhp, vlhp), 0); + return ret; +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx.c new file mode 100644 index 0000000..26ae184 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx.c @@ -0,0 +1,20 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __AVX__ + #error "HOST/ARCH doesn't support AVX" + #endif +#endif + +#include + +int main(int argc, char **argv) +{ + __m256 a = _mm256_add_ps(_mm256_loadu_ps((const float*)argv[argc-1]), _mm256_loadu_ps((const float*)argv[1])); + return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a)); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx2.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx2.c new file mode 100644 index 0000000..ddde868 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx2.c @@ -0,0 +1,20 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __AVX2__ + #error "HOST/ARCH doesn't support AVX2" + #endif +#endif + +#include + +int main(int argc, char **argv) +{ + __m256i a = _mm256_abs_epi16(_mm256_loadu_si256((const __m256i*)argv[argc-1])); + return _mm_cvtsi128_si32(_mm256_castsi256_si128(a)); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_clx.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_clx.c new file mode 100644 index 0000000..81edcd0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_clx.c @@ -0,0 +1,22 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __AVX512VNNI__ + #error "HOST/ARCH doesn't support CascadeLake AVX512 features" + #endif +#endif + +#include + +int main(int argc, char **argv) +{ + /* VNNI */ + __m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]); + a = _mm512_dpbusd_epi32(a, _mm512_setzero_si512(), a); + return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_cnl.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_cnl.c new file mode 100644 index 0000000..5799f12 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_cnl.c @@ -0,0 +1,24 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__AVX512VBMI__) || !defined(__AVX512IFMA__) + #error "HOST/ARCH doesn't support CannonLake AVX512 features" + #endif +#endif + +#include + +int main(int argc, char **argv) +{ + __m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]); + /* IFMA */ + a = _mm512_madd52hi_epu64(a, a, _mm512_setzero_si512()); + /* VMBI */ + a = _mm512_permutex2var_epi8(a, _mm512_setzero_si512(), a); + return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_icl.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_icl.c new file mode 100644 index 0000000..3cf44d7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_icl.c @@ -0,0 +1,26 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512BITALG__) || !defined(__AVX512VPOPCNTDQ__) + #error "HOST/ARCH doesn't support IceLake AVX512 features" + #endif +#endif + +#include + +int main(int argc, char **argv) +{ + __m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]); + /* VBMI2 */ + a = _mm512_shrdv_epi64(a, a, _mm512_setzero_si512()); + /* BITLAG */ + a = _mm512_popcnt_epi8(a); + /* VPOPCNTDQ */ + a = _mm512_popcnt_epi64(a); + return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_knl.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_knl.c new file mode 100644 index 0000000..b3f4f69 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_knl.c @@ -0,0 +1,25 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__AVX512ER__) || !defined(__AVX512PF__) + #error "HOST/ARCH doesn't support Knights Landing AVX512 features" + #endif +#endif + +#include + +int main(int argc, char **argv) +{ + int base[128]; + __m512d ad = _mm512_loadu_pd((const __m512d*)argv[argc-1]); + /* ER */ + __m512i a = _mm512_castpd_si512(_mm512_exp2a23_pd(ad)); + /* PF */ + _mm512_mask_prefetch_i64scatter_pd(base, _mm512_cmpeq_epi64_mask(a, a), a, 1, _MM_HINT_T1); + return base[0]; +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_knm.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_knm.c new file mode 100644 index 0000000..2c42646 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_knm.c @@ -0,0 +1,30 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__AVX5124FMAPS__) || !defined(__AVX5124VNNIW__) || !defined(__AVX512VPOPCNTDQ__) + #error "HOST/ARCH doesn't support Knights Mill AVX512 features" + #endif +#endif + +#include + +int main(int argc, char **argv) +{ + __m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]); + __m512 b = _mm512_loadu_ps((const __m512*)argv[argc-2]); + + /* 4FMAPS */ + b = _mm512_4fmadd_ps(b, b, b, b, b, NULL); + /* 4VNNIW */ + a = _mm512_4dpwssd_epi32(a, a, a, a, a, NULL); + /* VPOPCNTDQ */ + a = _mm512_popcnt_epi64(a); + + a = _mm512_add_epi32(a, _mm512_castps_si512(b)); + return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_skx.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_skx.c new file mode 100644 index 0000000..8840efb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512_skx.c @@ -0,0 +1,26 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__AVX512VL__) || !defined(__AVX512BW__) || !defined(__AVX512DQ__) + #error "HOST/ARCH doesn't support SkyLake AVX512 features" + #endif +#endif + +#include + +int main(int argc, char **argv) +{ + __m512i aa = _mm512_abs_epi32(_mm512_loadu_si512((const __m512i*)argv[argc-1])); + /* VL */ + __m256i a = _mm256_abs_epi64(_mm512_extracti64x4_epi64(aa, 1)); + /* DQ */ + __m512i b = _mm512_broadcast_i32x8(a); + /* BW */ + b = _mm512_abs_epi16(b); + return _mm_cvtsi128_si32(_mm512_castsi512_si128(b)); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512cd.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512cd.c new file mode 100644 index 0000000..5e29c79 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512cd.c @@ -0,0 +1,20 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __AVX512CD__ + #error "HOST/ARCH doesn't support AVX512CD" + #endif +#endif + +#include + +int main(int argc, char **argv) +{ + __m512i a = _mm512_lzcnt_epi32(_mm512_loadu_si512((const __m512i*)argv[argc-1])); + return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512f.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512f.c new file mode 100644 index 0000000..d0eb7b1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_avx512f.c @@ -0,0 +1,20 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __AVX512F__ + #error "HOST/ARCH doesn't support AVX512F" + #endif +#endif + +#include + +int main(int argc, char **argv) +{ + __m512i a = _mm512_abs_epi32(_mm512_loadu_si512((const __m512i*)argv[argc-1])); + return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_f16c.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_f16c.c new file mode 100644 index 0000000..fdf36ce --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_f16c.c @@ -0,0 +1,22 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __F16C__ + #error "HOST/ARCH doesn't support F16C" + #endif +#endif + +#include +#include + +int main(int argc, char **argv) +{ + __m128 a = _mm_cvtph_ps(_mm_loadu_si128((const __m128i*)argv[argc-1])); + __m256 a8 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)argv[argc-2])); + return (int)(_mm_cvtss_f32(a) + _mm_cvtss_f32(_mm256_castps256_ps128(a8))); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_fma3.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_fma3.c new file mode 100644 index 0000000..bfeef22 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_fma3.c @@ -0,0 +1,22 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__FMA__) && !defined(__AVX2__) + #error "HOST/ARCH doesn't support FMA3" + #endif +#endif + +#include +#include + +int main(int argc, char **argv) +{ + __m256 a = _mm256_loadu_ps((const float*)argv[argc-1]); + a = _mm256_fmadd_ps(a, a, a); + return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a)); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_fma4.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_fma4.c new file mode 100644 index 0000000..0ff17a4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_fma4.c @@ -0,0 +1,13 @@ +#include +#ifdef _MSC_VER + #include +#else + #include +#endif + +int main(int argc, char **argv) +{ + __m256 a = _mm256_loadu_ps((const float*)argv[argc-1]); + a = _mm256_macc_ps(a, a, a); + return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a)); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_neon.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_neon.c new file mode 100644 index 0000000..4eab1f3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_neon.c @@ -0,0 +1,15 @@ +#ifdef _MSC_VER + #include +#endif +#include + +int main(void) +{ + float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f); + int ret = (int)vgetq_lane_f32(vmulq_f32(v1, v2), 0); +#ifdef __aarch64__ + float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0); + ret += (int)vgetq_lane_f64(vmulq_f64(vd1, vd2), 0); +#endif + return ret; +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_neon_fp16.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_neon_fp16.c new file mode 100644 index 0000000..745d2e7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_neon_fp16.c @@ -0,0 +1,11 @@ +#ifdef _MSC_VER + #include +#endif +#include + +int main(void) +{ + short z4[] = {0, 0, 0, 0, 0, 0, 0, 0}; + float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16((const short*)z4)); + return (int)vgetq_lane_f32(v_z4, 0); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_neon_vfpv4.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_neon_vfpv4.c new file mode 100644 index 0000000..45f7b5d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_neon_vfpv4.c @@ -0,0 +1,19 @@ +#ifdef _MSC_VER + #include +#endif +#include + +int main(void) +{ + float32x4_t v1 = vdupq_n_f32(1.0f); + float32x4_t v2 = vdupq_n_f32(2.0f); + float32x4_t v3 = vdupq_n_f32(3.0f); + int ret = (int)vgetq_lane_f32(vfmaq_f32(v1, v2, v3), 0); +#ifdef __aarch64__ + float64x2_t vd1 = vdupq_n_f64(1.0); + float64x2_t vd2 = vdupq_n_f64(2.0); + float64x2_t vd3 = vdupq_n_f64(3.0); + ret += (int)vgetq_lane_f64(vfmaq_f64(vd1, vd2, vd3), 0); +#endif + return ret; +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_popcnt.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_popcnt.c new file mode 100644 index 0000000..813c461 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_popcnt.c @@ -0,0 +1,32 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env vr `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__SSE4_2__) && !defined(__POPCNT__) + #error "HOST/ARCH doesn't support POPCNT" + #endif +#endif + +#ifdef _MSC_VER + #include +#else + #include +#endif + +int main(int argc, char **argv) +{ + // To make sure popcnt instructions are generated + // and been tested against the assembler + unsigned long long a = *((unsigned long long*)argv[argc-1]); + unsigned int b = *((unsigned int*)argv[argc-2]); + +#if defined(_M_X64) || defined(__x86_64__) + a = _mm_popcnt_u64(a); +#endif + b = _mm_popcnt_u32(b); + return (int)a + b; +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse.c new file mode 100644 index 0000000..602b74e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse.c @@ -0,0 +1,20 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __SSE__ + #error "HOST/ARCH doesn't support SSE" + #endif +#endif + +#include + +int main(void) +{ + __m128 a = _mm_add_ps(_mm_setzero_ps(), _mm_setzero_ps()); + return (int)_mm_cvtss_f32(a); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse2.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse2.c new file mode 100644 index 0000000..33826a9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse2.c @@ -0,0 +1,20 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __SSE2__ + #error "HOST/ARCH doesn't support SSE2" + #endif +#endif + +#include + +int main(void) +{ + __m128i a = _mm_add_epi16(_mm_setzero_si128(), _mm_setzero_si128()); + return _mm_cvtsi128_si32(a); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse3.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse3.c new file mode 100644 index 0000000..d47c20f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse3.c @@ -0,0 +1,20 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __SSE3__ + #error "HOST/ARCH doesn't support SSE3" + #endif +#endif + +#include + +int main(void) +{ + __m128 a = _mm_hadd_ps(_mm_setzero_ps(), _mm_setzero_ps()); + return (int)_mm_cvtss_f32(a); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse41.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse41.c new file mode 100644 index 0000000..7c80238 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse41.c @@ -0,0 +1,20 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __SSE4_1__ + #error "HOST/ARCH doesn't support SSE41" + #endif +#endif + +#include + +int main(void) +{ + __m128 a = _mm_floor_ps(_mm_setzero_ps()); + return (int)_mm_cvtss_f32(a); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse42.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse42.c new file mode 100644 index 0000000..f60e18f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_sse42.c @@ -0,0 +1,20 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __SSE4_2__ + #error "HOST/ARCH doesn't support SSE42" + #endif +#endif + +#include + +int main(void) +{ + __m128 a = _mm_hadd_ps(_mm_setzero_ps(), _mm_setzero_ps()); + return (int)_mm_cvtss_f32(a); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_ssse3.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_ssse3.c new file mode 100644 index 0000000..fde390d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_ssse3.c @@ -0,0 +1,20 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __SSSE3__ + #error "HOST/ARCH doesn't support SSSE3" + #endif +#endif + +#include + +int main(void) +{ + __m128i a = _mm_hadd_epi16(_mm_setzero_si128(), _mm_setzero_si128()); + return (int)_mm_cvtsi128_si32(a); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_vsx.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_vsx.c new file mode 100644 index 0000000..0b3f30d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_vsx.c @@ -0,0 +1,21 @@ +#ifndef __VSX__ + #error "VSX is not supported" +#endif +#include + +#if (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__)) + #define vsx_ld vec_vsx_ld + #define vsx_st vec_vsx_st +#else + #define vsx_ld vec_xl + #define vsx_st vec_xst +#endif + +int main(void) +{ + unsigned int zout[4]; + unsigned int z4[] = {0, 0, 0, 0}; + __vector unsigned int v_z4 = vsx_ld(0, z4); + vsx_st(v_z4, 0, zout); + return zout[0]; +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_vsx2.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_vsx2.c new file mode 100644 index 0000000..410fb29 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_vsx2.c @@ -0,0 +1,13 @@ +#ifndef __VSX__ + #error "VSX is not supported" +#endif +#include + +typedef __vector unsigned long long v_uint64x2; + +int main(void) +{ + v_uint64x2 z2 = (v_uint64x2){0, 0}; + z2 = (v_uint64x2)vec_cmpeq(z2, z2); + return (int)vec_extract(z2, 0); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_vsx3.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_vsx3.c new file mode 100644 index 0000000..8575265 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_vsx3.c @@ -0,0 +1,13 @@ +#ifndef __VSX__ + #error "VSX is not supported" +#endif +#include + +typedef __vector unsigned int v_uint32x4; + +int main(void) +{ + v_uint32x4 z4 = (v_uint32x4){0, 0, 0, 0}; + z4 = vec_absd(z4, z4); + return (int)vec_extract(z4, 0); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_xop.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_xop.c new file mode 100644 index 0000000..51d70cf --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/cpu_xop.c @@ -0,0 +1,12 @@ +#include +#ifdef _MSC_VER + #include +#else + #include +#endif + +int main(void) +{ + __m128i a = _mm_comge_epu32(_mm_setzero_si128(), _mm_setzero_si128()); + return _mm_cvtsi128_si32(a); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/extra_avx512bw_mask.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/extra_avx512bw_mask.c new file mode 100644 index 0000000..9cfd0c2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/extra_avx512bw_mask.c @@ -0,0 +1,18 @@ +#include +/** + * Test BW mask operations due to: + * - MSVC has supported it since vs2019 see, + * https://developercommunity.visualstudio.com/content/problem/518298/missing-avx512bw-mask-intrinsics.html + * - Clang >= v8.0 + * - GCC >= v7.1 + */ +int main(void) +{ + __mmask64 m64 = _mm512_cmpeq_epi8_mask(_mm512_set1_epi8((char)1), _mm512_set1_epi8((char)1)); + m64 = _kor_mask64(m64, m64); + m64 = _kxor_mask64(m64, m64); + m64 = _cvtu64_mask64(_cvtmask64_u64(m64)); + m64 = _mm512_kunpackd(m64, m64); + m64 = (__mmask64)_mm512_kunpackw((__mmask32)m64, (__mmask32)m64); + return (int)_cvtmask64_u64(m64); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/extra_avx512dq_mask.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/extra_avx512dq_mask.c new file mode 100644 index 0000000..f0dc88b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/extra_avx512dq_mask.c @@ -0,0 +1,16 @@ +#include +/** + * Test DQ mask operations due to: + * - MSVC has supported it since vs2019 see, + * https://developercommunity.visualstudio.com/content/problem/518298/missing-avx512bw-mask-intrinsics.html + * - Clang >= v8.0 + * - GCC >= v7.1 + */ +int main(void) +{ + __mmask8 m8 = _mm512_cmpeq_epi64_mask(_mm512_set1_epi64(1), _mm512_set1_epi64(1)); + m8 = _kor_mask8(m8, m8); + m8 = _kxor_mask8(m8, m8); + m8 = _cvtu32_mask8(_cvtmask8_u32(m8)); + return (int)_cvtmask8_u32(m8); +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/extra_avx512f_reduce.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/extra_avx512f_reduce.c new file mode 100644 index 0000000..db01aae --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/extra_avx512f_reduce.c @@ -0,0 +1,41 @@ +#include +/** + * The following intrinsics don't have direct native support but compilers + * tend to emulate them. + * They're usually supported by gcc >= 7.1, clang >= 4 and icc >= 19 + */ +int main(void) +{ + __m512 one_ps = _mm512_set1_ps(1.0f); + __m512d one_pd = _mm512_set1_pd(1.0); + __m512i one_i64 = _mm512_set1_epi64(1); + // add + float sum_ps = _mm512_reduce_add_ps(one_ps); + double sum_pd = _mm512_reduce_add_pd(one_pd); + int sum_int = (int)_mm512_reduce_add_epi64(one_i64); + sum_int += (int)_mm512_reduce_add_epi32(one_i64); + // mul + sum_ps += _mm512_reduce_mul_ps(one_ps); + sum_pd += _mm512_reduce_mul_pd(one_pd); + sum_int += (int)_mm512_reduce_mul_epi64(one_i64); + sum_int += (int)_mm512_reduce_mul_epi32(one_i64); + // min + sum_ps += _mm512_reduce_min_ps(one_ps); + sum_pd += _mm512_reduce_min_pd(one_pd); + sum_int += (int)_mm512_reduce_min_epi32(one_i64); + sum_int += (int)_mm512_reduce_min_epu32(one_i64); + sum_int += (int)_mm512_reduce_min_epi64(one_i64); + // max + sum_ps += _mm512_reduce_max_ps(one_ps); + sum_pd += _mm512_reduce_max_pd(one_pd); + sum_int += (int)_mm512_reduce_max_epi32(one_i64); + sum_int += (int)_mm512_reduce_max_epu32(one_i64); + sum_int += (int)_mm512_reduce_max_epi64(one_i64); + // and + sum_int += (int)_mm512_reduce_and_epi32(one_i64); + sum_int += (int)_mm512_reduce_and_epi64(one_i64); + // or + sum_int += (int)_mm512_reduce_or_epi32(one_i64); + sum_int += (int)_mm512_reduce_or_epi64(one_i64); + return (int)sum_ps + (int)sum_pd + sum_int; +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/extra_vsx_asm.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/extra_vsx_asm.c new file mode 100644 index 0000000..b73a6f4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/extra_vsx_asm.c @@ -0,0 +1,36 @@ +/** + * Testing ASM VSX register number fixer '%x' + * + * old versions of CLANG doesn't support %x in the inline asm template + * which fixes register number when using any of the register constraints wa, wd, wf. + * + * xref: + * - https://bugs.llvm.org/show_bug.cgi?id=31837 + * - https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html + */ +#ifndef __VSX__ + #error "VSX is not supported" +#endif +#include + +#if (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__)) + #define vsx_ld vec_vsx_ld + #define vsx_st vec_vsx_st +#else + #define vsx_ld vec_xl + #define vsx_st vec_xst +#endif + +int main(void) +{ + float z4[] = {0, 0, 0, 0}; + signed int zout[] = {0, 0, 0, 0}; + + __vector float vz4 = vsx_ld(0, z4); + __vector signed int asm_ret = vsx_ld(0, zout); + + __asm__ ("xvcvspsxws %x0,%x1" : "=wa" (vz4) : "wa" (asm_ret)); + + vsx_st(asm_ret, 0, zout); + return zout[0]; +} diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/checks/test_flags.c b/.local/lib/python3.8/site-packages/numpy/distutils/checks/test_flags.c new file mode 100644 index 0000000..4cd09d4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/checks/test_flags.c @@ -0,0 +1 @@ +int test_flags; diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__init__.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/__init__.py new file mode 100644 index 0000000..3ba501d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/__init__.py @@ -0,0 +1,41 @@ +"""distutils.command + +Package containing implementation of all the standard Distutils +commands. + +""" +def test_na_writable_attributes_deletion(): + a = np.NA(2) + attr = ['payload', 'dtype'] + for s in attr: + assert_raises(AttributeError, delattr, a, s) + + +__revision__ = "$Id: __init__.py,v 1.3 2005/05/16 11:08:49 pearu Exp $" + +distutils_all = [ #'build_py', + 'clean', + 'install_clib', + 'install_scripts', + 'bdist', + 'bdist_dumb', + 'bdist_wininst', + ] + +__import__('distutils.command', globals(), locals(), distutils_all) + +__all__ = ['build', + 'config_compiler', + 'config', + 'build_src', + 'build_py', + 'build_ext', + 'build_clib', + 'build_scripts', + 'install', + 'install_data', + 'install_headers', + 'install_lib', + 'bdist_rpm', + 'sdist', + ] + distutils_all diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..df37bf2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/autodist.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/autodist.cpython-38.pyc new file mode 100644 index 0000000..5ee8c37 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/autodist.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/bdist_rpm.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/bdist_rpm.cpython-38.pyc new file mode 100644 index 0000000..30c85ac Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/bdist_rpm.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build.cpython-38.pyc new file mode 100644 index 0000000..05df0e6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_clib.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_clib.cpython-38.pyc new file mode 100644 index 0000000..1ee9e81 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_clib.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_ext.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_ext.cpython-38.pyc new file mode 100644 index 0000000..9b4af32 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_ext.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_py.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_py.cpython-38.pyc new file mode 100644 index 0000000..e6eed0d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_py.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_scripts.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_scripts.cpython-38.pyc new file mode 100644 index 0000000..48b15bc Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_scripts.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_src.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_src.cpython-38.pyc new file mode 100644 index 0000000..87ea488 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/build_src.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/config.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/config.cpython-38.pyc new file mode 100644 index 0000000..3144a09 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/config.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/config_compiler.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/config_compiler.cpython-38.pyc new file mode 100644 index 0000000..14e2390 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/config_compiler.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/develop.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/develop.cpython-38.pyc new file mode 100644 index 0000000..5574030 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/develop.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/egg_info.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/egg_info.cpython-38.pyc new file mode 100644 index 0000000..6913e19 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/egg_info.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/install.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/install.cpython-38.pyc new file mode 100644 index 0000000..1f11ee1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/install.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/install_clib.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/install_clib.cpython-38.pyc new file mode 100644 index 0000000..df84a74 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/install_clib.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/install_data.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/install_data.cpython-38.pyc new file mode 100644 index 0000000..7290fc5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/install_data.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/install_headers.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/install_headers.cpython-38.pyc new file mode 100644 index 0000000..960d3e1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/install_headers.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/sdist.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/sdist.cpython-38.pyc new file mode 100644 index 0000000..a8bcfdb Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/command/__pycache__/sdist.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/autodist.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/autodist.py new file mode 100644 index 0000000..b72d0ca --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/autodist.py @@ -0,0 +1,148 @@ +"""This module implements additional tests ala autoconf which can be useful. + +""" +import textwrap + +# We put them here since they could be easily reused outside numpy.distutils + +def check_inline(cmd): + """Return the inline identifier (may be empty).""" + cmd._check_compiler() + body = textwrap.dedent(""" + #ifndef __cplusplus + static %(inline)s int static_func (void) + { + return 0; + } + %(inline)s int nostatic_func (void) + { + return 0; + } + #endif""") + + for kw in ['inline', '__inline__', '__inline']: + st = cmd.try_compile(body % {'inline': kw}, None, None) + if st: + return kw + + return '' + + +def check_restrict(cmd): + """Return the restrict identifier (may be empty).""" + cmd._check_compiler() + body = textwrap.dedent(""" + static int static_func (char * %(restrict)s a) + { + return 0; + } + """) + + for kw in ['restrict', '__restrict__', '__restrict']: + st = cmd.try_compile(body % {'restrict': kw}, None, None) + if st: + return kw + + return '' + + +def check_compiler_gcc(cmd): + """Check if the compiler is GCC.""" + + cmd._check_compiler() + body = textwrap.dedent(""" + int + main() + { + #if (! defined __GNUC__) + #error gcc required + #endif + return 0; + } + """) + return cmd.try_compile(body, None, None) + + +def check_gcc_version_at_least(cmd, major, minor=0, patchlevel=0): + """ + Check that the gcc version is at least the specified version.""" + + cmd._check_compiler() + version = '.'.join([str(major), str(minor), str(patchlevel)]) + body = textwrap.dedent(""" + int + main() + { + #if (! defined __GNUC__) || (__GNUC__ < %(major)d) || \\ + (__GNUC_MINOR__ < %(minor)d) || \\ + (__GNUC_PATCHLEVEL__ < %(patchlevel)d) + #error gcc >= %(version)s required + #endif + return 0; + } + """) + kw = {'version': version, 'major': major, 'minor': minor, + 'patchlevel': patchlevel} + + return cmd.try_compile(body % kw, None, None) + + +def check_gcc_function_attribute(cmd, attribute, name): + """Return True if the given function attribute is supported.""" + cmd._check_compiler() + body = textwrap.dedent(""" + #pragma GCC diagnostic error "-Wattributes" + #pragma clang diagnostic error "-Wattributes" + + int %s %s(void* unused) + { + return 0; + } + + int + main() + { + return 0; + } + """) % (attribute, name) + return cmd.try_compile(body, None, None) != 0 + + +def check_gcc_function_attribute_with_intrinsics(cmd, attribute, name, code, + include): + """Return True if the given function attribute is supported with + intrinsics.""" + cmd._check_compiler() + body = textwrap.dedent(""" + #include<%s> + int %s %s(void) + { + %s; + return 0; + } + + int + main() + { + return 0; + } + """) % (include, attribute, name, code) + return cmd.try_compile(body, None, None) != 0 + + +def check_gcc_variable_attribute(cmd, attribute): + """Return True if the given variable attribute is supported.""" + cmd._check_compiler() + body = textwrap.dedent(""" + #pragma GCC diagnostic error "-Wattributes" + #pragma clang diagnostic error "-Wattributes" + + int %s foo; + + int + main() + { + return 0; + } + """) % (attribute, ) + return cmd.try_compile(body, None, None) != 0 diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/bdist_rpm.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/bdist_rpm.py new file mode 100644 index 0000000..682e7a8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/bdist_rpm.py @@ -0,0 +1,22 @@ +import os +import sys +if 'setuptools' in sys.modules: + from setuptools.command.bdist_rpm import bdist_rpm as old_bdist_rpm +else: + from distutils.command.bdist_rpm import bdist_rpm as old_bdist_rpm + +class bdist_rpm(old_bdist_rpm): + + def _make_spec_file(self): + spec_file = old_bdist_rpm._make_spec_file(self) + + # Replace hardcoded setup.py script name + # with the real setup script name. + setup_py = os.path.basename(sys.argv[0]) + if setup_py == 'setup.py': + return spec_file + new_spec_file = [] + for line in spec_file: + line = line.replace('setup.py', setup_py) + new_spec_file.append(line) + return new_spec_file diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/build.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/build.py new file mode 100644 index 0000000..a4fda53 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/build.py @@ -0,0 +1,61 @@ +import os +import sys +from distutils.command.build import build as old_build +from distutils.util import get_platform +from numpy.distutils.command.config_compiler import show_fortran_compilers + +class build(old_build): + + sub_commands = [('config_cc', lambda *args: True), + ('config_fc', lambda *args: True), + ('build_src', old_build.has_ext_modules), + ] + old_build.sub_commands + + user_options = old_build.user_options + [ + ('fcompiler=', None, + "specify the Fortran compiler type"), + ('warn-error', None, + "turn all warnings into errors (-Werror)"), + ('cpu-baseline=', None, + "specify a list of enabled baseline CPU optimizations"), + ('cpu-dispatch=', None, + "specify a list of dispatched CPU optimizations"), + ('disable-optimization', None, + "disable CPU optimized code(dispatch,simd,fast...)"), + ('simd-test=', None, + "specify a list of CPU optimizations to be tested against NumPy SIMD interface"), + ] + + help_options = old_build.help_options + [ + ('help-fcompiler', None, "list available Fortran compilers", + show_fortran_compilers), + ] + + def initialize_options(self): + old_build.initialize_options(self) + self.fcompiler = None + self.warn_error = False + self.cpu_baseline = "min" + self.cpu_dispatch = "max -xop -fma4" # drop AMD legacy features by default + self.disable_optimization = False + """ + the '_simd' module is a very large. Adding more dispatched features + will increase binary size and compile time. By default we minimize + the targeted features to those most commonly used by the NumPy SIMD interface(NPYV), + NOTE: any specified features will be ignored if they're: + - part of the baseline(--cpu-baseline) + - not part of dispatch-able features(--cpu-dispatch) + - not supported by compiler or platform + """ + self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD" + + def finalize_options(self): + build_scripts = self.build_scripts + old_build.finalize_options(self) + plat_specifier = ".{}-{}.{}".format(get_platform(), *sys.version_info[:2]) + if build_scripts is None: + self.build_scripts = os.path.join(self.build_base, + 'scripts' + plat_specifier) + + def run(self): + old_build.run(self) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/build_clib.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/build_clib.py new file mode 100644 index 0000000..45201f9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/build_clib.py @@ -0,0 +1,468 @@ +""" Modified version of build_clib that handles fortran source files. +""" +import os +from glob import glob +import shutil +from distutils.command.build_clib import build_clib as old_build_clib +from distutils.errors import DistutilsSetupError, DistutilsError, \ + DistutilsFileError + +from numpy.distutils import log +from distutils.dep_util import newer_group +from numpy.distutils.misc_util import ( + filter_sources, get_lib_source_files, get_numpy_include_dirs, + has_cxx_sources, has_f_sources, is_sequence +) +from numpy.distutils.ccompiler_opt import new_ccompiler_opt + +# Fix Python distutils bug sf #1718574: +_l = old_build_clib.user_options +for _i in range(len(_l)): + if _l[_i][0] in ['build-clib', 'build-temp']: + _l[_i] = (_l[_i][0] + '=',) + _l[_i][1:] +# + + +class build_clib(old_build_clib): + + description = "build C/C++/F libraries used by Python extensions" + + user_options = old_build_clib.user_options + [ + ('fcompiler=', None, + "specify the Fortran compiler type"), + ('inplace', 'i', 'Build in-place'), + ('parallel=', 'j', + "number of parallel jobs"), + ('warn-error', None, + "turn all warnings into errors (-Werror)"), + ('cpu-baseline=', None, + "specify a list of enabled baseline CPU optimizations"), + ('cpu-dispatch=', None, + "specify a list of dispatched CPU optimizations"), + ('disable-optimization', None, + "disable CPU optimized code(dispatch,simd,fast...)"), + ] + + boolean_options = old_build_clib.boolean_options + \ + ['inplace', 'warn-error', 'disable-optimization'] + + def initialize_options(self): + old_build_clib.initialize_options(self) + self.fcompiler = None + self.inplace = 0 + self.parallel = None + self.warn_error = None + self.cpu_baseline = None + self.cpu_dispatch = None + self.disable_optimization = None + + + def finalize_options(self): + if self.parallel: + try: + self.parallel = int(self.parallel) + except ValueError as e: + raise ValueError("--parallel/-j argument must be an integer") from e + old_build_clib.finalize_options(self) + self.set_undefined_options('build', + ('parallel', 'parallel'), + ('warn_error', 'warn_error'), + ('cpu_baseline', 'cpu_baseline'), + ('cpu_dispatch', 'cpu_dispatch'), + ('disable_optimization', 'disable_optimization') + ) + + def have_f_sources(self): + for (lib_name, build_info) in self.libraries: + if has_f_sources(build_info.get('sources', [])): + return True + return False + + def have_cxx_sources(self): + for (lib_name, build_info) in self.libraries: + if has_cxx_sources(build_info.get('sources', [])): + return True + return False + + def run(self): + if not self.libraries: + return + + # Make sure that library sources are complete. + languages = [] + + # Make sure that extension sources are complete. + self.run_command('build_src') + + for (lib_name, build_info) in self.libraries: + l = build_info.get('language', None) + if l and l not in languages: + languages.append(l) + + from distutils.ccompiler import new_compiler + self.compiler = new_compiler(compiler=self.compiler, + dry_run=self.dry_run, + force=self.force) + self.compiler.customize(self.distribution, + need_cxx=self.have_cxx_sources()) + + if self.warn_error: + self.compiler.compiler.append('-Werror') + self.compiler.compiler_so.append('-Werror') + + libraries = self.libraries + self.libraries = None + self.compiler.customize_cmd(self) + self.libraries = libraries + + self.compiler.show_customization() + + if not self.disable_optimization: + dispatch_hpath = os.path.join("numpy", "distutils", "include", "npy_cpu_dispatch_config.h") + dispatch_hpath = os.path.join(self.get_finalized_command("build_src").build_src, dispatch_hpath) + opt_cache_path = os.path.abspath( + os.path.join(self.build_temp, 'ccompiler_opt_cache_clib.py') + ) + if hasattr(self, "compiler_opt"): + # By default `CCompilerOpt` update the cache at the exit of + # the process, which may lead to duplicate building + # (see build_extension()/force_rebuild) if run() called + # multiple times within the same os process/thread without + # giving the chance the previous instances of `CCompilerOpt` + # to update the cache. + self.compiler_opt.cache_flush() + + self.compiler_opt = new_ccompiler_opt( + compiler=self.compiler, dispatch_hpath=dispatch_hpath, + cpu_baseline=self.cpu_baseline, cpu_dispatch=self.cpu_dispatch, + cache_path=opt_cache_path + ) + def report(copt): + log.info("\n########### CLIB COMPILER OPTIMIZATION ###########") + log.info(copt.report(full=True)) + + import atexit + atexit.register(report, self.compiler_opt) + + if self.have_f_sources(): + from numpy.distutils.fcompiler import new_fcompiler + self._f_compiler = new_fcompiler(compiler=self.fcompiler, + verbose=self.verbose, + dry_run=self.dry_run, + force=self.force, + requiref90='f90' in languages, + c_compiler=self.compiler) + if self._f_compiler is not None: + self._f_compiler.customize(self.distribution) + + libraries = self.libraries + self.libraries = None + self._f_compiler.customize_cmd(self) + self.libraries = libraries + + self._f_compiler.show_customization() + else: + self._f_compiler = None + + self.build_libraries(self.libraries) + + if self.inplace: + for l in self.distribution.installed_libraries: + libname = self.compiler.library_filename(l.name) + source = os.path.join(self.build_clib, libname) + target = os.path.join(l.target_dir, libname) + self.mkpath(l.target_dir) + shutil.copy(source, target) + + def get_source_files(self): + self.check_library_list(self.libraries) + filenames = [] + for lib in self.libraries: + filenames.extend(get_lib_source_files(lib)) + return filenames + + def build_libraries(self, libraries): + for (lib_name, build_info) in libraries: + self.build_a_library(build_info, lib_name, libraries) + + def assemble_flags(self, in_flags): + """ Assemble flags from flag list + + Parameters + ---------- + in_flags : None or sequence + None corresponds to empty list. Sequence elements can be strings + or callables that return lists of strings. Callable takes `self` as + single parameter. + + Returns + ------- + out_flags : list + """ + if in_flags is None: + return [] + out_flags = [] + for in_flag in in_flags: + if callable(in_flag): + out_flags += in_flag(self) + else: + out_flags.append(in_flag) + return out_flags + + def build_a_library(self, build_info, lib_name, libraries): + # default compilers + compiler = self.compiler + fcompiler = self._f_compiler + + sources = build_info.get('sources') + if sources is None or not is_sequence(sources): + raise DistutilsSetupError(("in 'libraries' option (library '%s'), " + + "'sources' must be present and must be " + + "a list of source filenames") % lib_name) + sources = list(sources) + + c_sources, cxx_sources, f_sources, fmodule_sources \ + = filter_sources(sources) + requiref90 = not not fmodule_sources or \ + build_info.get('language', 'c') == 'f90' + + # save source type information so that build_ext can use it. + source_languages = [] + if c_sources: + source_languages.append('c') + if cxx_sources: + source_languages.append('c++') + if requiref90: + source_languages.append('f90') + elif f_sources: + source_languages.append('f77') + build_info['source_languages'] = source_languages + + lib_file = compiler.library_filename(lib_name, + output_dir=self.build_clib) + depends = sources + build_info.get('depends', []) + + force_rebuild = self.force + if not self.disable_optimization and not self.compiler_opt.is_cached(): + log.debug("Detected changes on compiler optimizations") + force_rebuild = True + if not (force_rebuild or newer_group(depends, lib_file, 'newer')): + log.debug("skipping '%s' library (up-to-date)", lib_name) + return + else: + log.info("building '%s' library", lib_name) + + config_fc = build_info.get('config_fc', {}) + if fcompiler is not None and config_fc: + log.info('using additional config_fc from setup script ' + 'for fortran compiler: %s' + % (config_fc,)) + from numpy.distutils.fcompiler import new_fcompiler + fcompiler = new_fcompiler(compiler=fcompiler.compiler_type, + verbose=self.verbose, + dry_run=self.dry_run, + force=self.force, + requiref90=requiref90, + c_compiler=self.compiler) + if fcompiler is not None: + dist = self.distribution + base_config_fc = dist.get_option_dict('config_fc').copy() + base_config_fc.update(config_fc) + fcompiler.customize(base_config_fc) + + # check availability of Fortran compilers + if (f_sources or fmodule_sources) and fcompiler is None: + raise DistutilsError("library %s has Fortran sources" + " but no Fortran compiler found" % (lib_name)) + + if fcompiler is not None: + fcompiler.extra_f77_compile_args = build_info.get( + 'extra_f77_compile_args') or [] + fcompiler.extra_f90_compile_args = build_info.get( + 'extra_f90_compile_args') or [] + + macros = build_info.get('macros') + if macros is None: + macros = [] + include_dirs = build_info.get('include_dirs') + if include_dirs is None: + include_dirs = [] + # Flags can be strings, or callables that return a list of strings. + extra_postargs = self.assemble_flags( + build_info.get('extra_compiler_args')) + extra_cflags = self.assemble_flags( + build_info.get('extra_cflags')) + extra_cxxflags = self.assemble_flags( + build_info.get('extra_cxxflags')) + + include_dirs.extend(get_numpy_include_dirs()) + # where compiled F90 module files are: + module_dirs = build_info.get('module_dirs') or [] + module_build_dir = os.path.dirname(lib_file) + if requiref90: + self.mkpath(module_build_dir) + + if compiler.compiler_type == 'msvc': + # this hack works around the msvc compiler attributes + # problem, msvc uses its own convention :( + c_sources += cxx_sources + cxx_sources = [] + + # filtering C dispatch-table sources when optimization is not disabled, + # otherwise treated as normal sources. + copt_c_sources = [] + copt_cxx_sources = [] + copt_baseline_flags = [] + copt_macros = [] + if not self.disable_optimization: + bsrc_dir = self.get_finalized_command("build_src").build_src + dispatch_hpath = os.path.join("numpy", "distutils", "include") + dispatch_hpath = os.path.join(bsrc_dir, dispatch_hpath) + include_dirs.append(dispatch_hpath) + + copt_build_src = None if self.inplace else bsrc_dir + for _srcs, _dst, _ext in ( + ((c_sources,), copt_c_sources, ('.dispatch.c',)), + ((c_sources, cxx_sources), copt_cxx_sources, + ('.dispatch.cpp', '.dispatch.cxx')) + ): + for _src in _srcs: + _dst += [ + _src.pop(_src.index(s)) + for s in _src[:] if s.endswith(_ext) + ] + copt_baseline_flags = self.compiler_opt.cpu_baseline_flags() + else: + copt_macros.append(("NPY_DISABLE_OPTIMIZATION", 1)) + + objects = [] + if copt_cxx_sources: + log.info("compiling C++ dispatch-able sources") + objects += self.compiler_opt.try_dispatch( + copt_c_sources, + output_dir=self.build_temp, + src_dir=copt_build_src, + macros=macros + copt_macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=extra_postargs + extra_cxxflags, + ccompiler=cxx_compiler + ) + + if copt_c_sources: + log.info("compiling C dispatch-able sources") + objects += self.compiler_opt.try_dispatch( + copt_c_sources, + output_dir=self.build_temp, + src_dir=copt_build_src, + macros=macros + copt_macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=extra_postargs + extra_cflags) + + if c_sources: + log.info("compiling C sources") + objects += compiler.compile( + c_sources, + output_dir=self.build_temp, + macros=macros + copt_macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=(extra_postargs + + copt_baseline_flags + + extra_cflags)) + + if cxx_sources: + log.info("compiling C++ sources") + cxx_compiler = compiler.cxx_compiler() + cxx_objects = cxx_compiler.compile( + cxx_sources, + output_dir=self.build_temp, + macros=macros + copt_macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=(extra_postargs + + copt_baseline_flags + + extra_cxxflags)) + objects.extend(cxx_objects) + + if f_sources or fmodule_sources: + extra_postargs = [] + f_objects = [] + + if requiref90: + if fcompiler.module_dir_switch is None: + existing_modules = glob('*.mod') + extra_postargs += fcompiler.module_options( + module_dirs, module_build_dir) + + if fmodule_sources: + log.info("compiling Fortran 90 module sources") + f_objects += fcompiler.compile(fmodule_sources, + output_dir=self.build_temp, + macros=macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=extra_postargs) + + if requiref90 and self._f_compiler.module_dir_switch is None: + # move new compiled F90 module files to module_build_dir + for f in glob('*.mod'): + if f in existing_modules: + continue + t = os.path.join(module_build_dir, f) + if os.path.abspath(f) == os.path.abspath(t): + continue + if os.path.isfile(t): + os.remove(t) + try: + self.move_file(f, module_build_dir) + except DistutilsFileError: + log.warn('failed to move %r to %r' + % (f, module_build_dir)) + + if f_sources: + log.info("compiling Fortran sources") + f_objects += fcompiler.compile(f_sources, + output_dir=self.build_temp, + macros=macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=extra_postargs) + else: + f_objects = [] + + if f_objects and not fcompiler.can_ccompiler_link(compiler): + # Default linker cannot link Fortran object files, and results + # need to be wrapped later. Instead of creating a real static + # library, just keep track of the object files. + listfn = os.path.join(self.build_clib, + lib_name + '.fobjects') + with open(listfn, 'w') as f: + f.write("\n".join(os.path.abspath(obj) for obj in f_objects)) + + listfn = os.path.join(self.build_clib, + lib_name + '.cobjects') + with open(listfn, 'w') as f: + f.write("\n".join(os.path.abspath(obj) for obj in objects)) + + # create empty "library" file for dependency tracking + lib_fname = os.path.join(self.build_clib, + lib_name + compiler.static_lib_extension) + with open(lib_fname, 'wb') as f: + pass + else: + # assume that default linker is suitable for + # linking Fortran object files + objects.extend(f_objects) + compiler.create_static_lib(objects, lib_name, + output_dir=self.build_clib, + debug=self.debug) + + # fix library dependencies + clib_libraries = build_info.get('libraries', []) + for lname, binfo in libraries: + if lname in clib_libraries: + clib_libraries.extend(binfo.get('libraries', [])) + if clib_libraries: + build_info['libraries'] = clib_libraries diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/build_ext.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/build_ext.py new file mode 100644 index 0000000..8b568c1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/build_ext.py @@ -0,0 +1,733 @@ +""" Modified version of build_ext that handles fortran source files. + +""" +import os +import subprocess +from glob import glob + +from distutils.dep_util import newer_group +from distutils.command.build_ext import build_ext as old_build_ext +from distutils.errors import DistutilsFileError, DistutilsSetupError,\ + DistutilsError +from distutils.file_util import copy_file + +from numpy.distutils import log +from numpy.distutils.exec_command import filepath_from_subprocess_output +from numpy.distutils.system_info import combine_paths +from numpy.distutils.misc_util import ( + filter_sources, get_ext_source_files, get_numpy_include_dirs, + has_cxx_sources, has_f_sources, is_sequence +) +from numpy.distutils.command.config_compiler import show_fortran_compilers +from numpy.distutils.ccompiler_opt import new_ccompiler_opt, CCompilerOpt + +class build_ext (old_build_ext): + + description = "build C/C++/F extensions (compile/link to build directory)" + + user_options = old_build_ext.user_options + [ + ('fcompiler=', None, + "specify the Fortran compiler type"), + ('parallel=', 'j', + "number of parallel jobs"), + ('warn-error', None, + "turn all warnings into errors (-Werror)"), + ('cpu-baseline=', None, + "specify a list of enabled baseline CPU optimizations"), + ('cpu-dispatch=', None, + "specify a list of dispatched CPU optimizations"), + ('disable-optimization', None, + "disable CPU optimized code(dispatch,simd,fast...)"), + ('simd-test=', None, + "specify a list of CPU optimizations to be tested against NumPy SIMD interface"), + ] + + help_options = old_build_ext.help_options + [ + ('help-fcompiler', None, "list available Fortran compilers", + show_fortran_compilers), + ] + + boolean_options = old_build_ext.boolean_options + ['warn-error', 'disable-optimization'] + + def initialize_options(self): + old_build_ext.initialize_options(self) + self.fcompiler = None + self.parallel = None + self.warn_error = None + self.cpu_baseline = None + self.cpu_dispatch = None + self.disable_optimization = None + self.simd_test = None + + def finalize_options(self): + if self.parallel: + try: + self.parallel = int(self.parallel) + except ValueError as e: + raise ValueError("--parallel/-j argument must be an integer") from e + + # Ensure that self.include_dirs and self.distribution.include_dirs + # refer to the same list object. finalize_options will modify + # self.include_dirs, but self.distribution.include_dirs is used + # during the actual build. + # self.include_dirs is None unless paths are specified with + # --include-dirs. + # The include paths will be passed to the compiler in the order: + # numpy paths, --include-dirs paths, Python include path. + if isinstance(self.include_dirs, str): + self.include_dirs = self.include_dirs.split(os.pathsep) + incl_dirs = self.include_dirs or [] + if self.distribution.include_dirs is None: + self.distribution.include_dirs = [] + self.include_dirs = self.distribution.include_dirs + self.include_dirs.extend(incl_dirs) + + old_build_ext.finalize_options(self) + self.set_undefined_options('build', + ('parallel', 'parallel'), + ('warn_error', 'warn_error'), + ('cpu_baseline', 'cpu_baseline'), + ('cpu_dispatch', 'cpu_dispatch'), + ('disable_optimization', 'disable_optimization'), + ('simd_test', 'simd_test') + ) + CCompilerOpt.conf_target_groups["simd_test"] = self.simd_test + + def run(self): + if not self.extensions: + return + + # Make sure that extension sources are complete. + self.run_command('build_src') + + if self.distribution.has_c_libraries(): + if self.inplace: + if self.distribution.have_run.get('build_clib'): + log.warn('build_clib already run, it is too late to ' + 'ensure in-place build of build_clib') + build_clib = self.distribution.get_command_obj( + 'build_clib') + else: + build_clib = self.distribution.get_command_obj( + 'build_clib') + build_clib.inplace = 1 + build_clib.ensure_finalized() + build_clib.run() + self.distribution.have_run['build_clib'] = 1 + + else: + self.run_command('build_clib') + build_clib = self.get_finalized_command('build_clib') + self.library_dirs.append(build_clib.build_clib) + else: + build_clib = None + + # Not including C libraries to the list of + # extension libraries automatically to prevent + # bogus linking commands. Extensions must + # explicitly specify the C libraries that they use. + + from distutils.ccompiler import new_compiler + from numpy.distutils.fcompiler import new_fcompiler + + compiler_type = self.compiler + # Initialize C compiler: + self.compiler = new_compiler(compiler=compiler_type, + verbose=self.verbose, + dry_run=self.dry_run, + force=self.force) + self.compiler.customize(self.distribution) + self.compiler.customize_cmd(self) + + if self.warn_error: + self.compiler.compiler.append('-Werror') + self.compiler.compiler_so.append('-Werror') + + self.compiler.show_customization() + + if not self.disable_optimization: + dispatch_hpath = os.path.join("numpy", "distutils", "include", "npy_cpu_dispatch_config.h") + dispatch_hpath = os.path.join(self.get_finalized_command("build_src").build_src, dispatch_hpath) + opt_cache_path = os.path.abspath( + os.path.join(self.build_temp, 'ccompiler_opt_cache_ext.py') + ) + if hasattr(self, "compiler_opt"): + # By default `CCompilerOpt` update the cache at the exit of + # the process, which may lead to duplicate building + # (see build_extension()/force_rebuild) if run() called + # multiple times within the same os process/thread without + # giving the chance the previous instances of `CCompilerOpt` + # to update the cache. + self.compiler_opt.cache_flush() + + self.compiler_opt = new_ccompiler_opt( + compiler=self.compiler, dispatch_hpath=dispatch_hpath, + cpu_baseline=self.cpu_baseline, cpu_dispatch=self.cpu_dispatch, + cache_path=opt_cache_path + ) + def report(copt): + log.info("\n########### EXT COMPILER OPTIMIZATION ###########") + log.info(copt.report(full=True)) + + import atexit + atexit.register(report, self.compiler_opt) + + # Setup directory for storing generated extra DLL files on Windows + self.extra_dll_dir = os.path.join(self.build_temp, '.libs') + if not os.path.isdir(self.extra_dll_dir): + os.makedirs(self.extra_dll_dir) + + # Create mapping of libraries built by build_clib: + clibs = {} + if build_clib is not None: + for libname, build_info in build_clib.libraries or []: + if libname in clibs and clibs[libname] != build_info: + log.warn('library %r defined more than once,' + ' overwriting build_info\n%s... \nwith\n%s...' + % (libname, repr(clibs[libname])[:300], repr(build_info)[:300])) + clibs[libname] = build_info + # .. and distribution libraries: + for libname, build_info in self.distribution.libraries or []: + if libname in clibs: + # build_clib libraries have a precedence before distribution ones + continue + clibs[libname] = build_info + + # Determine if C++/Fortran 77/Fortran 90 compilers are needed. + # Update extension libraries, library_dirs, and macros. + all_languages = set() + for ext in self.extensions: + ext_languages = set() + c_libs = [] + c_lib_dirs = [] + macros = [] + for libname in ext.libraries: + if libname in clibs: + binfo = clibs[libname] + c_libs += binfo.get('libraries', []) + c_lib_dirs += binfo.get('library_dirs', []) + for m in binfo.get('macros', []): + if m not in macros: + macros.append(m) + + for l in clibs.get(libname, {}).get('source_languages', []): + ext_languages.add(l) + if c_libs: + new_c_libs = ext.libraries + c_libs + log.info('updating extension %r libraries from %r to %r' + % (ext.name, ext.libraries, new_c_libs)) + ext.libraries = new_c_libs + ext.library_dirs = ext.library_dirs + c_lib_dirs + if macros: + log.info('extending extension %r defined_macros with %r' + % (ext.name, macros)) + ext.define_macros = ext.define_macros + macros + + # determine extension languages + if has_f_sources(ext.sources): + ext_languages.add('f77') + if has_cxx_sources(ext.sources): + ext_languages.add('c++') + l = ext.language or self.compiler.detect_language(ext.sources) + if l: + ext_languages.add(l) + + # reset language attribute for choosing proper linker + # + # When we build extensions with multiple languages, we have to + # choose a linker. The rules here are: + # 1. if there is Fortran code, always prefer the Fortran linker, + # 2. otherwise prefer C++ over C, + # 3. Users can force a particular linker by using + # `language='c'` # or 'c++', 'f90', 'f77' + # in their config.add_extension() calls. + if 'c++' in ext_languages: + ext_language = 'c++' + else: + ext_language = 'c' # default + + has_fortran = False + if 'f90' in ext_languages: + ext_language = 'f90' + has_fortran = True + elif 'f77' in ext_languages: + ext_language = 'f77' + has_fortran = True + + if not ext.language or has_fortran: + if l and l != ext_language and ext.language: + log.warn('resetting extension %r language from %r to %r.' % + (ext.name, l, ext_language)) + + ext.language = ext_language + + # global language + all_languages.update(ext_languages) + + need_f90_compiler = 'f90' in all_languages + need_f77_compiler = 'f77' in all_languages + need_cxx_compiler = 'c++' in all_languages + + # Initialize C++ compiler: + if need_cxx_compiler: + self._cxx_compiler = new_compiler(compiler=compiler_type, + verbose=self.verbose, + dry_run=self.dry_run, + force=self.force) + compiler = self._cxx_compiler + compiler.customize(self.distribution, need_cxx=need_cxx_compiler) + compiler.customize_cmd(self) + compiler.show_customization() + self._cxx_compiler = compiler.cxx_compiler() + else: + self._cxx_compiler = None + + # Initialize Fortran 77 compiler: + if need_f77_compiler: + ctype = self.fcompiler + self._f77_compiler = new_fcompiler(compiler=self.fcompiler, + verbose=self.verbose, + dry_run=self.dry_run, + force=self.force, + requiref90=False, + c_compiler=self.compiler) + fcompiler = self._f77_compiler + if fcompiler: + ctype = fcompiler.compiler_type + fcompiler.customize(self.distribution) + if fcompiler and fcompiler.get_version(): + fcompiler.customize_cmd(self) + fcompiler.show_customization() + else: + self.warn('f77_compiler=%s is not available.' % + (ctype)) + self._f77_compiler = None + else: + self._f77_compiler = None + + # Initialize Fortran 90 compiler: + if need_f90_compiler: + ctype = self.fcompiler + self._f90_compiler = new_fcompiler(compiler=self.fcompiler, + verbose=self.verbose, + dry_run=self.dry_run, + force=self.force, + requiref90=True, + c_compiler=self.compiler) + fcompiler = self._f90_compiler + if fcompiler: + ctype = fcompiler.compiler_type + fcompiler.customize(self.distribution) + if fcompiler and fcompiler.get_version(): + fcompiler.customize_cmd(self) + fcompiler.show_customization() + else: + self.warn('f90_compiler=%s is not available.' % + (ctype)) + self._f90_compiler = None + else: + self._f90_compiler = None + + # Build extensions + self.build_extensions() + + # Copy over any extra DLL files + # FIXME: In the case where there are more than two packages, + # we blindly assume that both packages need all of the libraries, + # resulting in a larger wheel than is required. This should be fixed, + # but it's so rare that I won't bother to handle it. + pkg_roots = { + self.get_ext_fullname(ext.name).split('.')[0] + for ext in self.extensions + } + for pkg_root in pkg_roots: + shared_lib_dir = os.path.join(pkg_root, '.libs') + if not self.inplace: + shared_lib_dir = os.path.join(self.build_lib, shared_lib_dir) + for fn in os.listdir(self.extra_dll_dir): + if not os.path.isdir(shared_lib_dir): + os.makedirs(shared_lib_dir) + if not fn.lower().endswith('.dll'): + continue + runtime_lib = os.path.join(self.extra_dll_dir, fn) + copy_file(runtime_lib, shared_lib_dir) + + def swig_sources(self, sources, extensions=None): + # Do nothing. Swig sources have been handled in build_src command. + return sources + + def build_extension(self, ext): + sources = ext.sources + if sources is None or not is_sequence(sources): + raise DistutilsSetupError( + ("in 'ext_modules' option (extension '%s'), " + + "'sources' must be present and must be " + + "a list of source filenames") % ext.name) + sources = list(sources) + + if not sources: + return + + fullname = self.get_ext_fullname(ext.name) + if self.inplace: + modpath = fullname.split('.') + package = '.'.join(modpath[0:-1]) + base = modpath[-1] + build_py = self.get_finalized_command('build_py') + package_dir = build_py.get_package_dir(package) + ext_filename = os.path.join(package_dir, + self.get_ext_filename(base)) + else: + ext_filename = os.path.join(self.build_lib, + self.get_ext_filename(fullname)) + depends = sources + ext.depends + + force_rebuild = self.force + if not self.disable_optimization and not self.compiler_opt.is_cached(): + log.debug("Detected changes on compiler optimizations") + force_rebuild = True + if not (force_rebuild or newer_group(depends, ext_filename, 'newer')): + log.debug("skipping '%s' extension (up-to-date)", ext.name) + return + else: + log.info("building '%s' extension", ext.name) + + extra_args = ext.extra_compile_args or [] + extra_cflags = getattr(ext, 'extra_c_compile_args', None) or [] + extra_cxxflags = getattr(ext, 'extra_cxx_compile_args', None) or [] + + macros = ext.define_macros[:] + for undef in ext.undef_macros: + macros.append((undef,)) + + c_sources, cxx_sources, f_sources, fmodule_sources = \ + filter_sources(ext.sources) + + if self.compiler.compiler_type == 'msvc': + if cxx_sources: + # Needed to compile kiva.agg._agg extension. + extra_args.append('/Zm1000') + # this hack works around the msvc compiler attributes + # problem, msvc uses its own convention :( + c_sources += cxx_sources + cxx_sources = [] + + # Set Fortran/C++ compilers for compilation and linking. + if ext.language == 'f90': + fcompiler = self._f90_compiler + elif ext.language == 'f77': + fcompiler = self._f77_compiler + else: # in case ext.language is c++, for instance + fcompiler = self._f90_compiler or self._f77_compiler + if fcompiler is not None: + fcompiler.extra_f77_compile_args = (ext.extra_f77_compile_args or []) if hasattr( + ext, 'extra_f77_compile_args') else [] + fcompiler.extra_f90_compile_args = (ext.extra_f90_compile_args or []) if hasattr( + ext, 'extra_f90_compile_args') else [] + cxx_compiler = self._cxx_compiler + + # check for the availability of required compilers + if cxx_sources and cxx_compiler is None: + raise DistutilsError("extension %r has C++ sources" + "but no C++ compiler found" % (ext.name)) + if (f_sources or fmodule_sources) and fcompiler is None: + raise DistutilsError("extension %r has Fortran sources " + "but no Fortran compiler found" % (ext.name)) + if ext.language in ['f77', 'f90'] and fcompiler is None: + self.warn("extension %r has Fortran libraries " + "but no Fortran linker found, using default linker" % (ext.name)) + if ext.language == 'c++' and cxx_compiler is None: + self.warn("extension %r has C++ libraries " + "but no C++ linker found, using default linker" % (ext.name)) + + kws = {'depends': ext.depends} + output_dir = self.build_temp + + include_dirs = ext.include_dirs + get_numpy_include_dirs() + + # filtering C dispatch-table sources when optimization is not disabled, + # otherwise treated as normal sources. + copt_c_sources = [] + copt_cxx_sources = [] + copt_baseline_flags = [] + copt_macros = [] + if not self.disable_optimization: + bsrc_dir = self.get_finalized_command("build_src").build_src + dispatch_hpath = os.path.join("numpy", "distutils", "include") + dispatch_hpath = os.path.join(bsrc_dir, dispatch_hpath) + include_dirs.append(dispatch_hpath) + + copt_build_src = None if self.inplace else bsrc_dir + for _srcs, _dst, _ext in ( + ((c_sources,), copt_c_sources, ('.dispatch.c',)), + ((c_sources, cxx_sources), copt_cxx_sources, + ('.dispatch.cpp', '.dispatch.cxx')) + ): + for _src in _srcs: + _dst += [ + _src.pop(_src.index(s)) + for s in _src[:] if s.endswith(_ext) + ] + copt_baseline_flags = self.compiler_opt.cpu_baseline_flags() + else: + copt_macros.append(("NPY_DISABLE_OPTIMIZATION", 1)) + + c_objects = [] + if copt_cxx_sources: + log.info("compiling C++ dispatch-able sources") + c_objects += self.compiler_opt.try_dispatch( + copt_cxx_sources, + output_dir=output_dir, + src_dir=copt_build_src, + macros=macros + copt_macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=extra_args + extra_cxxflags, + ccompiler=cxx_compiler, + **kws + ) + if copt_c_sources: + log.info("compiling C dispatch-able sources") + c_objects += self.compiler_opt.try_dispatch( + copt_c_sources, + output_dir=output_dir, + src_dir=copt_build_src, + macros=macros + copt_macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=extra_args + extra_cflags, + **kws) + if c_sources: + log.info("compiling C sources") + c_objects += self.compiler.compile( + c_sources, + output_dir=output_dir, + macros=macros + copt_macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=(extra_args + copt_baseline_flags + + extra_cflags), + **kws) + if cxx_sources: + log.info("compiling C++ sources") + c_objects += cxx_compiler.compile( + cxx_sources, + output_dir=output_dir, + macros=macros + copt_macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=(extra_args + copt_baseline_flags + + extra_cxxflags), + **kws) + + extra_postargs = [] + f_objects = [] + if fmodule_sources: + log.info("compiling Fortran 90 module sources") + module_dirs = ext.module_dirs[:] + module_build_dir = os.path.join( + self.build_temp, os.path.dirname( + self.get_ext_filename(fullname))) + + self.mkpath(module_build_dir) + if fcompiler.module_dir_switch is None: + existing_modules = glob('*.mod') + extra_postargs += fcompiler.module_options( + module_dirs, module_build_dir) + f_objects += fcompiler.compile(fmodule_sources, + output_dir=self.build_temp, + macros=macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=extra_postargs, + depends=ext.depends) + + if fcompiler.module_dir_switch is None: + for f in glob('*.mod'): + if f in existing_modules: + continue + t = os.path.join(module_build_dir, f) + if os.path.abspath(f) == os.path.abspath(t): + continue + if os.path.isfile(t): + os.remove(t) + try: + self.move_file(f, module_build_dir) + except DistutilsFileError: + log.warn('failed to move %r to %r' % + (f, module_build_dir)) + if f_sources: + log.info("compiling Fortran sources") + f_objects += fcompiler.compile(f_sources, + output_dir=self.build_temp, + macros=macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=extra_postargs, + depends=ext.depends) + + if f_objects and not fcompiler.can_ccompiler_link(self.compiler): + unlinkable_fobjects = f_objects + objects = c_objects + else: + unlinkable_fobjects = [] + objects = c_objects + f_objects + + if ext.extra_objects: + objects.extend(ext.extra_objects) + extra_args = ext.extra_link_args or [] + libraries = self.get_libraries(ext)[:] + library_dirs = ext.library_dirs[:] + + linker = self.compiler.link_shared_object + # Always use system linker when using MSVC compiler. + if self.compiler.compiler_type in ('msvc', 'intelw', 'intelemw'): + # expand libraries with fcompiler libraries as we are + # not using fcompiler linker + self._libs_with_msvc_and_fortran( + fcompiler, libraries, library_dirs) + + elif ext.language in ['f77', 'f90'] and fcompiler is not None: + linker = fcompiler.link_shared_object + if ext.language == 'c++' and cxx_compiler is not None: + linker = cxx_compiler.link_shared_object + + if fcompiler is not None: + objects, libraries = self._process_unlinkable_fobjects( + objects, libraries, + fcompiler, library_dirs, + unlinkable_fobjects) + + linker(objects, ext_filename, + libraries=libraries, + library_dirs=library_dirs, + runtime_library_dirs=ext.runtime_library_dirs, + extra_postargs=extra_args, + export_symbols=self.get_export_symbols(ext), + debug=self.debug, + build_temp=self.build_temp, + target_lang=ext.language) + + def _add_dummy_mingwex_sym(self, c_sources): + build_src = self.get_finalized_command("build_src").build_src + build_clib = self.get_finalized_command("build_clib").build_clib + objects = self.compiler.compile([os.path.join(build_src, + "gfortran_vs2003_hack.c")], + output_dir=self.build_temp) + self.compiler.create_static_lib( + objects, "_gfortran_workaround", output_dir=build_clib, debug=self.debug) + + def _process_unlinkable_fobjects(self, objects, libraries, + fcompiler, library_dirs, + unlinkable_fobjects): + libraries = list(libraries) + objects = list(objects) + unlinkable_fobjects = list(unlinkable_fobjects) + + # Expand possible fake static libraries to objects; + # make sure to iterate over a copy of the list as + # "fake" libraries will be removed as they are + # encountered + for lib in libraries[:]: + for libdir in library_dirs: + fake_lib = os.path.join(libdir, lib + '.fobjects') + if os.path.isfile(fake_lib): + # Replace fake static library + libraries.remove(lib) + with open(fake_lib, 'r') as f: + unlinkable_fobjects.extend(f.read().splitlines()) + + # Expand C objects + c_lib = os.path.join(libdir, lib + '.cobjects') + with open(c_lib, 'r') as f: + objects.extend(f.read().splitlines()) + + # Wrap unlinkable objects to a linkable one + if unlinkable_fobjects: + fobjects = [os.path.abspath(obj) for obj in unlinkable_fobjects] + wrapped = fcompiler.wrap_unlinkable_objects( + fobjects, output_dir=self.build_temp, + extra_dll_dir=self.extra_dll_dir) + objects.extend(wrapped) + + return objects, libraries + + def _libs_with_msvc_and_fortran(self, fcompiler, c_libraries, + c_library_dirs): + if fcompiler is None: + return + + for libname in c_libraries: + if libname.startswith('msvc'): + continue + fileexists = False + for libdir in c_library_dirs or []: + libfile = os.path.join(libdir, '%s.lib' % (libname)) + if os.path.isfile(libfile): + fileexists = True + break + if fileexists: + continue + # make g77-compiled static libs available to MSVC + fileexists = False + for libdir in c_library_dirs: + libfile = os.path.join(libdir, 'lib%s.a' % (libname)) + if os.path.isfile(libfile): + # copy libname.a file to name.lib so that MSVC linker + # can find it + libfile2 = os.path.join(self.build_temp, libname + '.lib') + copy_file(libfile, libfile2) + if self.build_temp not in c_library_dirs: + c_library_dirs.append(self.build_temp) + fileexists = True + break + if fileexists: + continue + log.warn('could not find library %r in directories %s' + % (libname, c_library_dirs)) + + # Always use system linker when using MSVC compiler. + f_lib_dirs = [] + for dir in fcompiler.library_dirs: + # correct path when compiling in Cygwin but with normal Win + # Python + if dir.startswith('/usr/lib'): + try: + dir = subprocess.check_output(['cygpath', '-w', dir]) + except (OSError, subprocess.CalledProcessError): + pass + else: + dir = filepath_from_subprocess_output(dir) + f_lib_dirs.append(dir) + c_library_dirs.extend(f_lib_dirs) + + # make g77-compiled static libs available to MSVC + for lib in fcompiler.libraries: + if not lib.startswith('msvc'): + c_libraries.append(lib) + p = combine_paths(f_lib_dirs, 'lib' + lib + '.a') + if p: + dst_name = os.path.join(self.build_temp, lib + '.lib') + if not os.path.isfile(dst_name): + copy_file(p[0], dst_name) + if self.build_temp not in c_library_dirs: + c_library_dirs.append(self.build_temp) + + def get_source_files(self): + self.check_extensions_list(self.extensions) + filenames = [] + for ext in self.extensions: + filenames.extend(get_ext_source_files(ext)) + return filenames + + def get_outputs(self): + self.check_extensions_list(self.extensions) + + outputs = [] + for ext in self.extensions: + if not ext.sources: + continue + fullname = self.get_ext_fullname(ext.name) + outputs.append(os.path.join(self.build_lib, + self.get_ext_filename(fullname))) + return outputs diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/build_py.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/build_py.py new file mode 100644 index 0000000..d30dc5b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/build_py.py @@ -0,0 +1,31 @@ +from distutils.command.build_py import build_py as old_build_py +from numpy.distutils.misc_util import is_string + +class build_py(old_build_py): + + def run(self): + build_src = self.get_finalized_command('build_src') + if build_src.py_modules_dict and self.packages is None: + self.packages = list(build_src.py_modules_dict.keys ()) + old_build_py.run(self) + + def find_package_modules(self, package, package_dir): + modules = old_build_py.find_package_modules(self, package, package_dir) + + # Find build_src generated *.py files. + build_src = self.get_finalized_command('build_src') + modules += build_src.py_modules_dict.get(package, []) + + return modules + + def find_modules(self): + old_py_modules = self.py_modules[:] + new_py_modules = [_m for _m in self.py_modules if is_string(_m)] + self.py_modules[:] = new_py_modules + modules = old_build_py.find_modules(self) + self.py_modules[:] = old_py_modules + + return modules + + # XXX: Fix find_source_files for item in py_modules such that item is 3-tuple + # and item[2] is source file. diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/build_scripts.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/build_scripts.py new file mode 100644 index 0000000..d5cadb2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/build_scripts.py @@ -0,0 +1,49 @@ +""" Modified version of build_scripts that handles building scripts from functions. + +""" +from distutils.command.build_scripts import build_scripts as old_build_scripts +from numpy.distutils import log +from numpy.distutils.misc_util import is_string + +class build_scripts(old_build_scripts): + + def generate_scripts(self, scripts): + new_scripts = [] + func_scripts = [] + for script in scripts: + if is_string(script): + new_scripts.append(script) + else: + func_scripts.append(script) + if not func_scripts: + return new_scripts + + build_dir = self.build_dir + self.mkpath(build_dir) + for func in func_scripts: + script = func(build_dir) + if not script: + continue + if is_string(script): + log.info(" adding '%s' to scripts" % (script,)) + new_scripts.append(script) + else: + [log.info(" adding '%s' to scripts" % (s,)) for s in script] + new_scripts.extend(list(script)) + return new_scripts + + def run (self): + if not self.scripts: + return + + self.scripts = self.generate_scripts(self.scripts) + # Now make sure that the distribution object has this list of scripts. + # setuptools' develop command requires that this be a list of filenames, + # not functions. + self.distribution.scripts = self.scripts + + return old_build_scripts.run(self) + + def get_source_files(self): + from numpy.distutils.misc_util import get_script_files + return get_script_files(self.scripts) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/build_src.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/build_src.py new file mode 100644 index 0000000..5581011 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/build_src.py @@ -0,0 +1,773 @@ +""" Build swig and f2py sources. +""" +import os +import re +import sys +import shlex +import copy + +from distutils.command import build_ext +from distutils.dep_util import newer_group, newer +from distutils.util import get_platform +from distutils.errors import DistutilsError, DistutilsSetupError + + +# this import can't be done here, as it uses numpy stuff only available +# after it's installed +#import numpy.f2py +from numpy.distutils import log +from numpy.distutils.misc_util import ( + fortran_ext_match, appendpath, is_string, is_sequence, get_cmd + ) +from numpy.distutils.from_template import process_file as process_f_file +from numpy.distutils.conv_template import process_file as process_c_file + +def subst_vars(target, source, d): + """Substitute any occurrence of @foo@ by d['foo'] from source file into + target.""" + var = re.compile('@([a-zA-Z_]+)@') + with open(source, 'r') as fs: + with open(target, 'w') as ft: + for l in fs: + m = var.search(l) + if m: + ft.write(l.replace('@%s@' % m.group(1), d[m.group(1)])) + else: + ft.write(l) + +class build_src(build_ext.build_ext): + + description = "build sources from SWIG, F2PY files or a function" + + user_options = [ + ('build-src=', 'd', "directory to \"build\" sources to"), + ('f2py-opts=', None, "list of f2py command line options"), + ('swig=', None, "path to the SWIG executable"), + ('swig-opts=', None, "list of SWIG command line options"), + ('swig-cpp', None, "make SWIG create C++ files (default is autodetected from sources)"), + ('f2pyflags=', None, "additional flags to f2py (use --f2py-opts= instead)"), # obsolete + ('swigflags=', None, "additional flags to swig (use --swig-opts= instead)"), # obsolete + ('force', 'f', "forcibly build everything (ignore file timestamps)"), + ('inplace', 'i', + "ignore build-lib and put compiled extensions into the source " + + "directory alongside your pure Python modules"), + ('verbose-cfg', None, + "change logging level from WARN to INFO which will show all " + + "compiler output") + ] + + boolean_options = ['force', 'inplace', 'verbose-cfg'] + + help_options = [] + + def initialize_options(self): + self.extensions = None + self.package = None + self.py_modules = None + self.py_modules_dict = None + self.build_src = None + self.build_lib = None + self.build_base = None + self.force = None + self.inplace = None + self.package_dir = None + self.f2pyflags = None # obsolete + self.f2py_opts = None + self.swigflags = None # obsolete + self.swig_opts = None + self.swig_cpp = None + self.swig = None + self.verbose_cfg = None + + def finalize_options(self): + self.set_undefined_options('build', + ('build_base', 'build_base'), + ('build_lib', 'build_lib'), + ('force', 'force')) + if self.package is None: + self.package = self.distribution.ext_package + self.extensions = self.distribution.ext_modules + self.libraries = self.distribution.libraries or [] + self.py_modules = self.distribution.py_modules or [] + self.data_files = self.distribution.data_files or [] + + if self.build_src is None: + plat_specifier = ".{}-{}.{}".format(get_platform(), *sys.version_info[:2]) + self.build_src = os.path.join(self.build_base, 'src'+plat_specifier) + + # py_modules_dict is used in build_py.find_package_modules + self.py_modules_dict = {} + + if self.f2pyflags: + if self.f2py_opts: + log.warn('ignoring --f2pyflags as --f2py-opts already used') + else: + self.f2py_opts = self.f2pyflags + self.f2pyflags = None + if self.f2py_opts is None: + self.f2py_opts = [] + else: + self.f2py_opts = shlex.split(self.f2py_opts) + + if self.swigflags: + if self.swig_opts: + log.warn('ignoring --swigflags as --swig-opts already used') + else: + self.swig_opts = self.swigflags + self.swigflags = None + + if self.swig_opts is None: + self.swig_opts = [] + else: + self.swig_opts = shlex.split(self.swig_opts) + + # use options from build_ext command + build_ext = self.get_finalized_command('build_ext') + if self.inplace is None: + self.inplace = build_ext.inplace + if self.swig_cpp is None: + self.swig_cpp = build_ext.swig_cpp + for c in ['swig', 'swig_opt']: + o = '--'+c.replace('_', '-') + v = getattr(build_ext, c, None) + if v: + if getattr(self, c): + log.warn('both build_src and build_ext define %s option' % (o)) + else: + log.info('using "%s=%s" option from build_ext command' % (o, v)) + setattr(self, c, v) + + def run(self): + log.info("build_src") + if not (self.extensions or self.libraries): + return + self.build_sources() + + def build_sources(self): + + if self.inplace: + self.get_package_dir = \ + self.get_finalized_command('build_py').get_package_dir + + self.build_py_modules_sources() + + for libname_info in self.libraries: + self.build_library_sources(*libname_info) + + if self.extensions: + self.check_extensions_list(self.extensions) + + for ext in self.extensions: + self.build_extension_sources(ext) + + self.build_data_files_sources() + self.build_npy_pkg_config() + + def build_data_files_sources(self): + if not self.data_files: + return + log.info('building data_files sources') + from numpy.distutils.misc_util import get_data_files + new_data_files = [] + for data in self.data_files: + if isinstance(data, str): + new_data_files.append(data) + elif isinstance(data, tuple): + d, files = data + if self.inplace: + build_dir = self.get_package_dir('.'.join(d.split(os.sep))) + else: + build_dir = os.path.join(self.build_src, d) + funcs = [f for f in files if hasattr(f, '__call__')] + files = [f for f in files if not hasattr(f, '__call__')] + for f in funcs: + if f.__code__.co_argcount==1: + s = f(build_dir) + else: + s = f() + if s is not None: + if isinstance(s, list): + files.extend(s) + elif isinstance(s, str): + files.append(s) + else: + raise TypeError(repr(s)) + filenames = get_data_files((d, files)) + new_data_files.append((d, filenames)) + else: + raise TypeError(repr(data)) + self.data_files[:] = new_data_files + + + def _build_npy_pkg_config(self, info, gd): + template, install_dir, subst_dict = info + template_dir = os.path.dirname(template) + for k, v in gd.items(): + subst_dict[k] = v + + if self.inplace == 1: + generated_dir = os.path.join(template_dir, install_dir) + else: + generated_dir = os.path.join(self.build_src, template_dir, + install_dir) + generated = os.path.basename(os.path.splitext(template)[0]) + generated_path = os.path.join(generated_dir, generated) + if not os.path.exists(generated_dir): + os.makedirs(generated_dir) + + subst_vars(generated_path, template, subst_dict) + + # Where to install relatively to install prefix + full_install_dir = os.path.join(template_dir, install_dir) + return full_install_dir, generated_path + + def build_npy_pkg_config(self): + log.info('build_src: building npy-pkg config files') + + # XXX: another ugly workaround to circumvent distutils brain damage. We + # need the install prefix here, but finalizing the options of the + # install command when only building sources cause error. Instead, we + # copy the install command instance, and finalize the copy so that it + # does not disrupt how distutils want to do things when with the + # original install command instance. + install_cmd = copy.copy(get_cmd('install')) + if not install_cmd.finalized == 1: + install_cmd.finalize_options() + build_npkg = False + if self.inplace == 1: + top_prefix = '.' + build_npkg = True + elif hasattr(install_cmd, 'install_libbase'): + top_prefix = install_cmd.install_libbase + build_npkg = True + + if build_npkg: + for pkg, infos in self.distribution.installed_pkg_config.items(): + pkg_path = self.distribution.package_dir[pkg] + prefix = os.path.join(os.path.abspath(top_prefix), pkg_path) + d = {'prefix': prefix} + for info in infos: + install_dir, generated = self._build_npy_pkg_config(info, d) + self.distribution.data_files.append((install_dir, + [generated])) + + def build_py_modules_sources(self): + if not self.py_modules: + return + log.info('building py_modules sources') + new_py_modules = [] + for source in self.py_modules: + if is_sequence(source) and len(source)==3: + package, module_base, source = source + if self.inplace: + build_dir = self.get_package_dir(package) + else: + build_dir = os.path.join(self.build_src, + os.path.join(*package.split('.'))) + if hasattr(source, '__call__'): + target = os.path.join(build_dir, module_base + '.py') + source = source(target) + if source is None: + continue + modules = [(package, module_base, source)] + if package not in self.py_modules_dict: + self.py_modules_dict[package] = [] + self.py_modules_dict[package] += modules + else: + new_py_modules.append(source) + self.py_modules[:] = new_py_modules + + def build_library_sources(self, lib_name, build_info): + sources = list(build_info.get('sources', [])) + + if not sources: + return + + log.info('building library "%s" sources' % (lib_name)) + + sources = self.generate_sources(sources, (lib_name, build_info)) + + sources = self.template_sources(sources, (lib_name, build_info)) + + sources, h_files = self.filter_h_files(sources) + + if h_files: + log.info('%s - nothing done with h_files = %s', + self.package, h_files) + + #for f in h_files: + # self.distribution.headers.append((lib_name,f)) + + build_info['sources'] = sources + return + + def build_extension_sources(self, ext): + + sources = list(ext.sources) + + log.info('building extension "%s" sources' % (ext.name)) + + fullname = self.get_ext_fullname(ext.name) + + modpath = fullname.split('.') + package = '.'.join(modpath[0:-1]) + + if self.inplace: + self.ext_target_dir = self.get_package_dir(package) + + sources = self.generate_sources(sources, ext) + sources = self.template_sources(sources, ext) + sources = self.swig_sources(sources, ext) + sources = self.f2py_sources(sources, ext) + sources = self.pyrex_sources(sources, ext) + + sources, py_files = self.filter_py_files(sources) + + if package not in self.py_modules_dict: + self.py_modules_dict[package] = [] + modules = [] + for f in py_files: + module = os.path.splitext(os.path.basename(f))[0] + modules.append((package, module, f)) + self.py_modules_dict[package] += modules + + sources, h_files = self.filter_h_files(sources) + + if h_files: + log.info('%s - nothing done with h_files = %s', + package, h_files) + #for f in h_files: + # self.distribution.headers.append((package,f)) + + ext.sources = sources + + def generate_sources(self, sources, extension): + new_sources = [] + func_sources = [] + for source in sources: + if is_string(source): + new_sources.append(source) + else: + func_sources.append(source) + if not func_sources: + return new_sources + if self.inplace and not is_sequence(extension): + build_dir = self.ext_target_dir + else: + if is_sequence(extension): + name = extension[0] + # if 'include_dirs' not in extension[1]: + # extension[1]['include_dirs'] = [] + # incl_dirs = extension[1]['include_dirs'] + else: + name = extension.name + # incl_dirs = extension.include_dirs + #if self.build_src not in incl_dirs: + # incl_dirs.append(self.build_src) + build_dir = os.path.join(*([self.build_src] + +name.split('.')[:-1])) + self.mkpath(build_dir) + + if self.verbose_cfg: + new_level = log.INFO + else: + new_level = log.WARN + old_level = log.set_threshold(new_level) + + for func in func_sources: + source = func(extension, build_dir) + if not source: + continue + if is_sequence(source): + [log.info(" adding '%s' to sources." % (s,)) for s in source] + new_sources.extend(source) + else: + log.info(" adding '%s' to sources." % (source,)) + new_sources.append(source) + log.set_threshold(old_level) + return new_sources + + def filter_py_files(self, sources): + return self.filter_files(sources, ['.py']) + + def filter_h_files(self, sources): + return self.filter_files(sources, ['.h', '.hpp', '.inc']) + + def filter_files(self, sources, exts = []): + new_sources = [] + files = [] + for source in sources: + (base, ext) = os.path.splitext(source) + if ext in exts: + files.append(source) + else: + new_sources.append(source) + return new_sources, files + + def template_sources(self, sources, extension): + new_sources = [] + if is_sequence(extension): + depends = extension[1].get('depends') + include_dirs = extension[1].get('include_dirs') + else: + depends = extension.depends + include_dirs = extension.include_dirs + for source in sources: + (base, ext) = os.path.splitext(source) + if ext == '.src': # Template file + if self.inplace: + target_dir = os.path.dirname(base) + else: + target_dir = appendpath(self.build_src, os.path.dirname(base)) + self.mkpath(target_dir) + target_file = os.path.join(target_dir, os.path.basename(base)) + if (self.force or newer_group([source] + depends, target_file)): + if _f_pyf_ext_match(base): + log.info("from_template:> %s" % (target_file)) + outstr = process_f_file(source) + else: + log.info("conv_template:> %s" % (target_file)) + outstr = process_c_file(source) + with open(target_file, 'w') as fid: + fid.write(outstr) + if _header_ext_match(target_file): + d = os.path.dirname(target_file) + if d not in include_dirs: + log.info(" adding '%s' to include_dirs." % (d)) + include_dirs.append(d) + new_sources.append(target_file) + else: + new_sources.append(source) + return new_sources + + def pyrex_sources(self, sources, extension): + """Pyrex not supported; this remains for Cython support (see below)""" + new_sources = [] + ext_name = extension.name.split('.')[-1] + for source in sources: + (base, ext) = os.path.splitext(source) + if ext == '.pyx': + target_file = self.generate_a_pyrex_source(base, ext_name, + source, + extension) + new_sources.append(target_file) + else: + new_sources.append(source) + return new_sources + + def generate_a_pyrex_source(self, base, ext_name, source, extension): + """Pyrex is not supported, but some projects monkeypatch this method. + + That allows compiling Cython code, see gh-6955. + This method will remain here for compatibility reasons. + """ + return [] + + def f2py_sources(self, sources, extension): + new_sources = [] + f2py_sources = [] + f_sources = [] + f2py_targets = {} + target_dirs = [] + ext_name = extension.name.split('.')[-1] + skip_f2py = 0 + + for source in sources: + (base, ext) = os.path.splitext(source) + if ext == '.pyf': # F2PY interface file + if self.inplace: + target_dir = os.path.dirname(base) + else: + target_dir = appendpath(self.build_src, os.path.dirname(base)) + if os.path.isfile(source): + name = get_f2py_modulename(source) + if name != ext_name: + raise DistutilsSetupError('mismatch of extension names: %s ' + 'provides %r but expected %r' % ( + source, name, ext_name)) + target_file = os.path.join(target_dir, name+'module.c') + else: + log.debug(' source %s does not exist: skipping f2py\'ing.' \ + % (source)) + name = ext_name + skip_f2py = 1 + target_file = os.path.join(target_dir, name+'module.c') + if not os.path.isfile(target_file): + log.warn(' target %s does not exist:\n '\ + 'Assuming %smodule.c was generated with '\ + '"build_src --inplace" command.' \ + % (target_file, name)) + target_dir = os.path.dirname(base) + target_file = os.path.join(target_dir, name+'module.c') + if not os.path.isfile(target_file): + raise DistutilsSetupError("%r missing" % (target_file,)) + log.info(' Yes! Using %r as up-to-date target.' \ + % (target_file)) + target_dirs.append(target_dir) + f2py_sources.append(source) + f2py_targets[source] = target_file + new_sources.append(target_file) + elif fortran_ext_match(ext): + f_sources.append(source) + else: + new_sources.append(source) + + if not (f2py_sources or f_sources): + return new_sources + + for d in target_dirs: + self.mkpath(d) + + f2py_options = extension.f2py_options + self.f2py_opts + + if self.distribution.libraries: + for name, build_info in self.distribution.libraries: + if name in extension.libraries: + f2py_options.extend(build_info.get('f2py_options', [])) + + log.info("f2py options: %s" % (f2py_options)) + + if f2py_sources: + if len(f2py_sources) != 1: + raise DistutilsSetupError( + 'only one .pyf file is allowed per extension module but got'\ + ' more: %r' % (f2py_sources,)) + source = f2py_sources[0] + target_file = f2py_targets[source] + target_dir = os.path.dirname(target_file) or '.' + depends = [source] + extension.depends + if (self.force or newer_group(depends, target_file, 'newer')) \ + and not skip_f2py: + log.info("f2py: %s" % (source)) + import numpy.f2py + numpy.f2py.run_main(f2py_options + + ['--build-dir', target_dir, source]) + else: + log.debug(" skipping '%s' f2py interface (up-to-date)" % (source)) + else: + #XXX TODO: --inplace support for sdist command + if is_sequence(extension): + name = extension[0] + else: name = extension.name + target_dir = os.path.join(*([self.build_src] + +name.split('.')[:-1])) + target_file = os.path.join(target_dir, ext_name + 'module.c') + new_sources.append(target_file) + depends = f_sources + extension.depends + if (self.force or newer_group(depends, target_file, 'newer')) \ + and not skip_f2py: + log.info("f2py:> %s" % (target_file)) + self.mkpath(target_dir) + import numpy.f2py + numpy.f2py.run_main(f2py_options + ['--lower', + '--build-dir', target_dir]+\ + ['-m', ext_name]+f_sources) + else: + log.debug(" skipping f2py fortran files for '%s' (up-to-date)"\ + % (target_file)) + + if not os.path.isfile(target_file): + raise DistutilsError("f2py target file %r not generated" % (target_file,)) + + build_dir = os.path.join(self.build_src, target_dir) + target_c = os.path.join(build_dir, 'fortranobject.c') + target_h = os.path.join(build_dir, 'fortranobject.h') + log.info(" adding '%s' to sources." % (target_c)) + new_sources.append(target_c) + if build_dir not in extension.include_dirs: + log.info(" adding '%s' to include_dirs." % (build_dir)) + extension.include_dirs.append(build_dir) + + if not skip_f2py: + import numpy.f2py + d = os.path.dirname(numpy.f2py.__file__) + source_c = os.path.join(d, 'src', 'fortranobject.c') + source_h = os.path.join(d, 'src', 'fortranobject.h') + if newer(source_c, target_c) or newer(source_h, target_h): + self.mkpath(os.path.dirname(target_c)) + self.copy_file(source_c, target_c) + self.copy_file(source_h, target_h) + else: + if not os.path.isfile(target_c): + raise DistutilsSetupError("f2py target_c file %r not found" % (target_c,)) + if not os.path.isfile(target_h): + raise DistutilsSetupError("f2py target_h file %r not found" % (target_h,)) + + for name_ext in ['-f2pywrappers.f', '-f2pywrappers2.f90']: + filename = os.path.join(target_dir, ext_name + name_ext) + if os.path.isfile(filename): + log.info(" adding '%s' to sources." % (filename)) + f_sources.append(filename) + + return new_sources + f_sources + + def swig_sources(self, sources, extension): + # Assuming SWIG 1.3.14 or later. See compatibility note in + # http://www.swig.org/Doc1.3/Python.html#Python_nn6 + + new_sources = [] + swig_sources = [] + swig_targets = {} + target_dirs = [] + py_files = [] # swig generated .py files + target_ext = '.c' + if '-c++' in extension.swig_opts: + typ = 'c++' + is_cpp = True + extension.swig_opts.remove('-c++') + elif self.swig_cpp: + typ = 'c++' + is_cpp = True + else: + typ = None + is_cpp = False + skip_swig = 0 + ext_name = extension.name.split('.')[-1] + + for source in sources: + (base, ext) = os.path.splitext(source) + if ext == '.i': # SWIG interface file + # the code below assumes that the sources list + # contains not more than one .i SWIG interface file + if self.inplace: + target_dir = os.path.dirname(base) + py_target_dir = self.ext_target_dir + else: + target_dir = appendpath(self.build_src, os.path.dirname(base)) + py_target_dir = target_dir + if os.path.isfile(source): + name = get_swig_modulename(source) + if name != ext_name[1:]: + raise DistutilsSetupError( + 'mismatch of extension names: %s provides %r' + ' but expected %r' % (source, name, ext_name[1:])) + if typ is None: + typ = get_swig_target(source) + is_cpp = typ=='c++' + else: + typ2 = get_swig_target(source) + if typ2 is None: + log.warn('source %r does not define swig target, assuming %s swig target' \ + % (source, typ)) + elif typ!=typ2: + log.warn('expected %r but source %r defines %r swig target' \ + % (typ, source, typ2)) + if typ2=='c++': + log.warn('resetting swig target to c++ (some targets may have .c extension)') + is_cpp = True + else: + log.warn('assuming that %r has c++ swig target' % (source)) + if is_cpp: + target_ext = '.cpp' + target_file = os.path.join(target_dir, '%s_wrap%s' \ + % (name, target_ext)) + else: + log.warn(' source %s does not exist: skipping swig\'ing.' \ + % (source)) + name = ext_name[1:] + skip_swig = 1 + target_file = _find_swig_target(target_dir, name) + if not os.path.isfile(target_file): + log.warn(' target %s does not exist:\n '\ + 'Assuming %s_wrap.{c,cpp} was generated with '\ + '"build_src --inplace" command.' \ + % (target_file, name)) + target_dir = os.path.dirname(base) + target_file = _find_swig_target(target_dir, name) + if not os.path.isfile(target_file): + raise DistutilsSetupError("%r missing" % (target_file,)) + log.warn(' Yes! Using %r as up-to-date target.' \ + % (target_file)) + target_dirs.append(target_dir) + new_sources.append(target_file) + py_files.append(os.path.join(py_target_dir, name+'.py')) + swig_sources.append(source) + swig_targets[source] = new_sources[-1] + else: + new_sources.append(source) + + if not swig_sources: + return new_sources + + if skip_swig: + return new_sources + py_files + + for d in target_dirs: + self.mkpath(d) + + swig = self.swig or self.find_swig() + swig_cmd = [swig, "-python"] + extension.swig_opts + if is_cpp: + swig_cmd.append('-c++') + for d in extension.include_dirs: + swig_cmd.append('-I'+d) + for source in swig_sources: + target = swig_targets[source] + depends = [source] + extension.depends + if self.force or newer_group(depends, target, 'newer'): + log.info("%s: %s" % (os.path.basename(swig) \ + + (is_cpp and '++' or ''), source)) + self.spawn(swig_cmd + self.swig_opts \ + + ["-o", target, '-outdir', py_target_dir, source]) + else: + log.debug(" skipping '%s' swig interface (up-to-date)" \ + % (source)) + + return new_sources + py_files + +_f_pyf_ext_match = re.compile(r'.*\.(f90|f95|f77|for|ftn|f|pyf)\Z', re.I).match +_header_ext_match = re.compile(r'.*\.(inc|h|hpp)\Z', re.I).match + +#### SWIG related auxiliary functions #### +_swig_module_name_match = re.compile(r'\s*%module\s*(.*\(\s*package\s*=\s*"(?P[\w_]+)".*\)|)\s*(?P[\w_]+)', + re.I).match +_has_c_header = re.compile(r'-\*-\s*c\s*-\*-', re.I).search +_has_cpp_header = re.compile(r'-\*-\s*c\+\+\s*-\*-', re.I).search + +def get_swig_target(source): + with open(source, 'r') as f: + result = None + line = f.readline() + if _has_cpp_header(line): + result = 'c++' + if _has_c_header(line): + result = 'c' + return result + +def get_swig_modulename(source): + with open(source, 'r') as f: + name = None + for line in f: + m = _swig_module_name_match(line) + if m: + name = m.group('name') + break + return name + +def _find_swig_target(target_dir, name): + for ext in ['.cpp', '.c']: + target = os.path.join(target_dir, '%s_wrap%s' % (name, ext)) + if os.path.isfile(target): + break + return target + +#### F2PY related auxiliary functions #### + +_f2py_module_name_match = re.compile(r'\s*python\s*module\s*(?P[\w_]+)', + re.I).match +_f2py_user_module_name_match = re.compile(r'\s*python\s*module\s*(?P[\w_]*?' + r'__user__[\w_]*)', re.I).match + +def get_f2py_modulename(source): + name = None + with open(source) as f: + for line in f: + m = _f2py_module_name_match(line) + if m: + if _f2py_user_module_name_match(line): # skip *__user__* names + continue + name = m.group('name') + break + return name + +########################################## diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/config.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/config.py new file mode 100644 index 0000000..1f4037b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/config.py @@ -0,0 +1,517 @@ +# Added Fortran compiler support to config. Currently useful only for +# try_compile call. try_run works but is untested for most of Fortran +# compilers (they must define linker_exe first). +# Pearu Peterson +import os +import signal +import subprocess +import sys +import textwrap +import warnings + +from distutils.command.config import config as old_config +from distutils.command.config import LANG_EXT +from distutils import log +from distutils.file_util import copy_file +from distutils.ccompiler import CompileError, LinkError +import distutils +from numpy.distutils.exec_command import filepath_from_subprocess_output +from numpy.distutils.mingw32ccompiler import generate_manifest +from numpy.distutils.command.autodist import (check_gcc_function_attribute, + check_gcc_function_attribute_with_intrinsics, + check_gcc_variable_attribute, + check_gcc_version_at_least, + check_inline, + check_restrict, + check_compiler_gcc) + +LANG_EXT['f77'] = '.f' +LANG_EXT['f90'] = '.f90' + +class config(old_config): + old_config.user_options += [ + ('fcompiler=', None, "specify the Fortran compiler type"), + ] + + def initialize_options(self): + self.fcompiler = None + old_config.initialize_options(self) + + def _check_compiler (self): + old_config._check_compiler(self) + from numpy.distutils.fcompiler import FCompiler, new_fcompiler + + if sys.platform == 'win32' and (self.compiler.compiler_type in + ('msvc', 'intelw', 'intelemw')): + # XXX: hack to circumvent a python 2.6 bug with msvc9compiler: + # initialize call query_vcvarsall, which throws an IOError, and + # causes an error along the way without much information. We try to + # catch it here, hoping it is early enough, and print an helpful + # message instead of Error: None. + if not self.compiler.initialized: + try: + self.compiler.initialize() + except IOError as e: + msg = textwrap.dedent("""\ + Could not initialize compiler instance: do you have Visual Studio + installed? If you are trying to build with MinGW, please use "python setup.py + build -c mingw32" instead. If you have Visual Studio installed, check it is + correctly installed, and the right version (VS 2008 for python 2.6, 2.7 and 3.2, + VS 2010 for >= 3.3). + + Original exception was: %s, and the Compiler class was %s + ============================================================================""") \ + % (e, self.compiler.__class__.__name__) + print(textwrap.dedent("""\ + ============================================================================""")) + raise distutils.errors.DistutilsPlatformError(msg) from e + + # After MSVC is initialized, add an explicit /MANIFEST to linker + # flags. See issues gh-4245 and gh-4101 for details. Also + # relevant are issues 4431 and 16296 on the Python bug tracker. + from distutils import msvc9compiler + if msvc9compiler.get_build_version() >= 10: + for ldflags in [self.compiler.ldflags_shared, + self.compiler.ldflags_shared_debug]: + if '/MANIFEST' not in ldflags: + ldflags.append('/MANIFEST') + + if not isinstance(self.fcompiler, FCompiler): + self.fcompiler = new_fcompiler(compiler=self.fcompiler, + dry_run=self.dry_run, force=1, + c_compiler=self.compiler) + if self.fcompiler is not None: + self.fcompiler.customize(self.distribution) + if self.fcompiler.get_version(): + self.fcompiler.customize_cmd(self) + self.fcompiler.show_customization() + + def _wrap_method(self, mth, lang, args): + from distutils.ccompiler import CompileError + from distutils.errors import DistutilsExecError + save_compiler = self.compiler + if lang in ['f77', 'f90']: + self.compiler = self.fcompiler + if self.compiler is None: + raise CompileError('%s compiler is not set' % (lang,)) + try: + ret = mth(*((self,)+args)) + except (DistutilsExecError, CompileError) as e: + self.compiler = save_compiler + raise CompileError from e + self.compiler = save_compiler + return ret + + def _compile (self, body, headers, include_dirs, lang): + src, obj = self._wrap_method(old_config._compile, lang, + (body, headers, include_dirs, lang)) + # _compile in unixcompiler.py sometimes creates .d dependency files. + # Clean them up. + self.temp_files.append(obj + '.d') + return src, obj + + def _link (self, body, + headers, include_dirs, + libraries, library_dirs, lang): + if self.compiler.compiler_type=='msvc': + libraries = (libraries or [])[:] + library_dirs = (library_dirs or [])[:] + if lang in ['f77', 'f90']: + lang = 'c' # always use system linker when using MSVC compiler + if self.fcompiler: + for d in self.fcompiler.library_dirs or []: + # correct path when compiling in Cygwin but with + # normal Win Python + if d.startswith('/usr/lib'): + try: + d = subprocess.check_output(['cygpath', + '-w', d]) + except (OSError, subprocess.CalledProcessError): + pass + else: + d = filepath_from_subprocess_output(d) + library_dirs.append(d) + for libname in self.fcompiler.libraries or []: + if libname not in libraries: + libraries.append(libname) + for libname in libraries: + if libname.startswith('msvc'): continue + fileexists = False + for libdir in library_dirs or []: + libfile = os.path.join(libdir, '%s.lib' % (libname)) + if os.path.isfile(libfile): + fileexists = True + break + if fileexists: continue + # make g77-compiled static libs available to MSVC + fileexists = False + for libdir in library_dirs: + libfile = os.path.join(libdir, 'lib%s.a' % (libname)) + if os.path.isfile(libfile): + # copy libname.a file to name.lib so that MSVC linker + # can find it + libfile2 = os.path.join(libdir, '%s.lib' % (libname)) + copy_file(libfile, libfile2) + self.temp_files.append(libfile2) + fileexists = True + break + if fileexists: continue + log.warn('could not find library %r in directories %s' \ + % (libname, library_dirs)) + elif self.compiler.compiler_type == 'mingw32': + generate_manifest(self) + return self._wrap_method(old_config._link, lang, + (body, headers, include_dirs, + libraries, library_dirs, lang)) + + def check_header(self, header, include_dirs=None, library_dirs=None, lang='c'): + self._check_compiler() + return self.try_compile( + "/* we need a dummy line to make distutils happy */", + [header], include_dirs) + + def check_decl(self, symbol, + headers=None, include_dirs=None): + self._check_compiler() + body = textwrap.dedent(""" + int main(void) + { + #ifndef %s + (void) %s; + #endif + ; + return 0; + }""") % (symbol, symbol) + + return self.try_compile(body, headers, include_dirs) + + def check_macro_true(self, symbol, + headers=None, include_dirs=None): + self._check_compiler() + body = textwrap.dedent(""" + int main(void) + { + #if %s + #else + #error false or undefined macro + #endif + ; + return 0; + }""") % (symbol,) + + return self.try_compile(body, headers, include_dirs) + + def check_type(self, type_name, headers=None, include_dirs=None, + library_dirs=None): + """Check type availability. Return True if the type can be compiled, + False otherwise""" + self._check_compiler() + + # First check the type can be compiled + body = textwrap.dedent(r""" + int main(void) { + if ((%(name)s *) 0) + return 0; + if (sizeof (%(name)s)) + return 0; + } + """) % {'name': type_name} + + st = False + try: + try: + self._compile(body % {'type': type_name}, + headers, include_dirs, 'c') + st = True + except distutils.errors.CompileError: + st = False + finally: + self._clean() + + return st + + def check_type_size(self, type_name, headers=None, include_dirs=None, library_dirs=None, expected=None): + """Check size of a given type.""" + self._check_compiler() + + # First check the type can be compiled + body = textwrap.dedent(r""" + typedef %(type)s npy_check_sizeof_type; + int main (void) + { + static int test_array [1 - 2 * !(((long) (sizeof (npy_check_sizeof_type))) >= 0)]; + test_array [0] = 0 + + ; + return 0; + } + """) + self._compile(body % {'type': type_name}, + headers, include_dirs, 'c') + self._clean() + + if expected: + body = textwrap.dedent(r""" + typedef %(type)s npy_check_sizeof_type; + int main (void) + { + static int test_array [1 - 2 * !(((long) (sizeof (npy_check_sizeof_type))) == %(size)s)]; + test_array [0] = 0 + + ; + return 0; + } + """) + for size in expected: + try: + self._compile(body % {'type': type_name, 'size': size}, + headers, include_dirs, 'c') + self._clean() + return size + except CompileError: + pass + + # this fails to *compile* if size > sizeof(type) + body = textwrap.dedent(r""" + typedef %(type)s npy_check_sizeof_type; + int main (void) + { + static int test_array [1 - 2 * !(((long) (sizeof (npy_check_sizeof_type))) <= %(size)s)]; + test_array [0] = 0 + + ; + return 0; + } + """) + + # The principle is simple: we first find low and high bounds of size + # for the type, where low/high are looked up on a log scale. Then, we + # do a binary search to find the exact size between low and high + low = 0 + mid = 0 + while True: + try: + self._compile(body % {'type': type_name, 'size': mid}, + headers, include_dirs, 'c') + self._clean() + break + except CompileError: + #log.info("failure to test for bound %d" % mid) + low = mid + 1 + mid = 2 * mid + 1 + + high = mid + # Binary search: + while low != high: + mid = (high - low) // 2 + low + try: + self._compile(body % {'type': type_name, 'size': mid}, + headers, include_dirs, 'c') + self._clean() + high = mid + except CompileError: + low = mid + 1 + return low + + def check_func(self, func, + headers=None, include_dirs=None, + libraries=None, library_dirs=None, + decl=False, call=False, call_args=None): + # clean up distutils's config a bit: add void to main(), and + # return a value. + self._check_compiler() + body = [] + if decl: + if type(decl) == str: + body.append(decl) + else: + body.append("int %s (void);" % func) + # Handle MSVC intrinsics: force MS compiler to make a function call. + # Useful to test for some functions when built with optimization on, to + # avoid build error because the intrinsic and our 'fake' test + # declaration do not match. + body.append("#ifdef _MSC_VER") + body.append("#pragma function(%s)" % func) + body.append("#endif") + body.append("int main (void) {") + if call: + if call_args is None: + call_args = '' + body.append(" %s(%s);" % (func, call_args)) + else: + body.append(" %s;" % func) + body.append(" return 0;") + body.append("}") + body = '\n'.join(body) + "\n" + + return self.try_link(body, headers, include_dirs, + libraries, library_dirs) + + def check_funcs_once(self, funcs, + headers=None, include_dirs=None, + libraries=None, library_dirs=None, + decl=False, call=False, call_args=None): + """Check a list of functions at once. + + This is useful to speed up things, since all the functions in the funcs + list will be put in one compilation unit. + + Arguments + --------- + funcs : seq + list of functions to test + include_dirs : seq + list of header paths + libraries : seq + list of libraries to link the code snippet to + library_dirs : seq + list of library paths + decl : dict + for every (key, value), the declaration in the value will be + used for function in key. If a function is not in the + dictionary, no declaration will be used. + call : dict + for every item (f, value), if the value is True, a call will be + done to the function f. + """ + self._check_compiler() + body = [] + if decl: + for f, v in decl.items(): + if v: + body.append("int %s (void);" % f) + + # Handle MS intrinsics. See check_func for more info. + body.append("#ifdef _MSC_VER") + for func in funcs: + body.append("#pragma function(%s)" % func) + body.append("#endif") + + body.append("int main (void) {") + if call: + for f in funcs: + if f in call and call[f]: + if not (call_args and f in call_args and call_args[f]): + args = '' + else: + args = call_args[f] + body.append(" %s(%s);" % (f, args)) + else: + body.append(" %s;" % f) + else: + for f in funcs: + body.append(" %s;" % f) + body.append(" return 0;") + body.append("}") + body = '\n'.join(body) + "\n" + + return self.try_link(body, headers, include_dirs, + libraries, library_dirs) + + def check_inline(self): + """Return the inline keyword recognized by the compiler, empty string + otherwise.""" + return check_inline(self) + + def check_restrict(self): + """Return the restrict keyword recognized by the compiler, empty string + otherwise.""" + return check_restrict(self) + + def check_compiler_gcc(self): + """Return True if the C compiler is gcc""" + return check_compiler_gcc(self) + + def check_gcc_function_attribute(self, attribute, name): + return check_gcc_function_attribute(self, attribute, name) + + def check_gcc_function_attribute_with_intrinsics(self, attribute, name, + code, include): + return check_gcc_function_attribute_with_intrinsics(self, attribute, + name, code, include) + + def check_gcc_variable_attribute(self, attribute): + return check_gcc_variable_attribute(self, attribute) + + def check_gcc_version_at_least(self, major, minor=0, patchlevel=0): + """Return True if the GCC version is greater than or equal to the + specified version.""" + return check_gcc_version_at_least(self, major, minor, patchlevel) + + def get_output(self, body, headers=None, include_dirs=None, + libraries=None, library_dirs=None, + lang="c", use_tee=None): + """Try to compile, link to an executable, and run a program + built from 'body' and 'headers'. Returns the exit status code + of the program and its output. + """ + # 2008-11-16, RemoveMe + warnings.warn("\n+++++++++++++++++++++++++++++++++++++++++++++++++\n" + "Usage of get_output is deprecated: please do not \n" + "use it anymore, and avoid configuration checks \n" + "involving running executable on the target machine.\n" + "+++++++++++++++++++++++++++++++++++++++++++++++++\n", + DeprecationWarning, stacklevel=2) + self._check_compiler() + exitcode, output = 255, '' + try: + grabber = GrabStdout() + try: + src, obj, exe = self._link(body, headers, include_dirs, + libraries, library_dirs, lang) + grabber.restore() + except Exception: + output = grabber.data + grabber.restore() + raise + exe = os.path.join('.', exe) + try: + # specify cwd arg for consistency with + # historic usage pattern of exec_command() + # also, note that exe appears to be a string, + # which exec_command() handled, but we now + # use a list for check_output() -- this assumes + # that exe is always a single command + output = subprocess.check_output([exe], cwd='.') + except subprocess.CalledProcessError as exc: + exitstatus = exc.returncode + output = '' + except OSError: + # preserve the EnvironmentError exit status + # used historically in exec_command() + exitstatus = 127 + output = '' + else: + output = filepath_from_subprocess_output(output) + if hasattr(os, 'WEXITSTATUS'): + exitcode = os.WEXITSTATUS(exitstatus) + if os.WIFSIGNALED(exitstatus): + sig = os.WTERMSIG(exitstatus) + log.error('subprocess exited with signal %d' % (sig,)) + if sig == signal.SIGINT: + # control-C + raise KeyboardInterrupt + else: + exitcode = exitstatus + log.info("success!") + except (CompileError, LinkError): + log.info("failure.") + self._clean() + return exitcode, output + +class GrabStdout: + + def __init__(self): + self.sys_stdout = sys.stdout + self.data = '' + sys.stdout = self + + def write (self, data): + self.sys_stdout.write(data) + self.data += data + + def flush (self): + self.sys_stdout.flush() + + def restore(self): + sys.stdout = self.sys_stdout diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/config_compiler.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/config_compiler.py new file mode 100644 index 0000000..44265bf --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/config_compiler.py @@ -0,0 +1,126 @@ +from distutils.core import Command +from numpy.distutils import log + +#XXX: Linker flags + +def show_fortran_compilers(_cache=None): + # Using cache to prevent infinite recursion. + if _cache: + return + elif _cache is None: + _cache = [] + _cache.append(1) + from numpy.distutils.fcompiler import show_fcompilers + import distutils.core + dist = distutils.core._setup_distribution + show_fcompilers(dist) + +class config_fc(Command): + """ Distutils command to hold user specified options + to Fortran compilers. + + config_fc command is used by the FCompiler.customize() method. + """ + + description = "specify Fortran 77/Fortran 90 compiler information" + + user_options = [ + ('fcompiler=', None, "specify Fortran compiler type"), + ('f77exec=', None, "specify F77 compiler command"), + ('f90exec=', None, "specify F90 compiler command"), + ('f77flags=', None, "specify F77 compiler flags"), + ('f90flags=', None, "specify F90 compiler flags"), + ('opt=', None, "specify optimization flags"), + ('arch=', None, "specify architecture specific optimization flags"), + ('debug', 'g', "compile with debugging information"), + ('noopt', None, "compile without optimization"), + ('noarch', None, "compile without arch-dependent optimization"), + ] + + help_options = [ + ('help-fcompiler', None, "list available Fortran compilers", + show_fortran_compilers), + ] + + boolean_options = ['debug', 'noopt', 'noarch'] + + def initialize_options(self): + self.fcompiler = None + self.f77exec = None + self.f90exec = None + self.f77flags = None + self.f90flags = None + self.opt = None + self.arch = None + self.debug = None + self.noopt = None + self.noarch = None + + def finalize_options(self): + log.info('unifing config_fc, config, build_clib, build_ext, build commands --fcompiler options') + build_clib = self.get_finalized_command('build_clib') + build_ext = self.get_finalized_command('build_ext') + config = self.get_finalized_command('config') + build = self.get_finalized_command('build') + cmd_list = [self, config, build_clib, build_ext, build] + for a in ['fcompiler']: + l = [] + for c in cmd_list: + v = getattr(c, a) + if v is not None: + if not isinstance(v, str): v = v.compiler_type + if v not in l: l.append(v) + if not l: v1 = None + else: v1 = l[0] + if len(l)>1: + log.warn(' commands have different --%s options: %s'\ + ', using first in list as default' % (a, l)) + if v1: + for c in cmd_list: + if getattr(c, a) is None: setattr(c, a, v1) + + def run(self): + # Do nothing. + return + +class config_cc(Command): + """ Distutils command to hold user specified options + to C/C++ compilers. + """ + + description = "specify C/C++ compiler information" + + user_options = [ + ('compiler=', None, "specify C/C++ compiler type"), + ] + + def initialize_options(self): + self.compiler = None + + def finalize_options(self): + log.info('unifing config_cc, config, build_clib, build_ext, build commands --compiler options') + build_clib = self.get_finalized_command('build_clib') + build_ext = self.get_finalized_command('build_ext') + config = self.get_finalized_command('config') + build = self.get_finalized_command('build') + cmd_list = [self, config, build_clib, build_ext, build] + for a in ['compiler']: + l = [] + for c in cmd_list: + v = getattr(c, a) + if v is not None: + if not isinstance(v, str): v = v.compiler_type + if v not in l: l.append(v) + if not l: v1 = None + else: v1 = l[0] + if len(l)>1: + log.warn(' commands have different --%s options: %s'\ + ', using first in list as default' % (a, l)) + if v1: + for c in cmd_list: + if getattr(c, a) is None: setattr(c, a, v1) + return + + def run(self): + # Do nothing. + return diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/develop.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/develop.py new file mode 100644 index 0000000..af24baf --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/develop.py @@ -0,0 +1,15 @@ +""" Override the develop command from setuptools so we can ensure that our +generated files (from build_src or build_scripts) are properly converted to real +files with filenames. + +""" +from setuptools.command.develop import develop as old_develop + +class develop(old_develop): + __doc__ = old_develop.__doc__ + def install_for_development(self): + # Build sources in-place, too. + self.reinitialize_command('build_src', inplace=1) + # Make sure scripts are built. + self.run_command('build_scripts') + old_develop.install_for_development(self) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/egg_info.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/egg_info.py new file mode 100644 index 0000000..14c62b4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/egg_info.py @@ -0,0 +1,25 @@ +import sys + +from setuptools.command.egg_info import egg_info as _egg_info + +class egg_info(_egg_info): + def run(self): + if 'sdist' in sys.argv: + import warnings + import textwrap + msg = textwrap.dedent(""" + `build_src` is being run, this may lead to missing + files in your sdist! You want to use distutils.sdist + instead of the setuptools version: + + from distutils.command.sdist import sdist + cmdclass={'sdist': sdist}" + + See numpy's setup.py or gh-7131 for details.""") + warnings.warn(msg, UserWarning, stacklevel=2) + + # We need to ensure that build_src has been executed in order to give + # setuptools' egg_info command real filenames instead of functions which + # generate files. + self.run_command("build_src") + _egg_info.run(self) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/install.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/install.py new file mode 100644 index 0000000..2eff2d1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/install.py @@ -0,0 +1,79 @@ +import sys +if 'setuptools' in sys.modules: + import setuptools.command.install as old_install_mod + have_setuptools = True +else: + import distutils.command.install as old_install_mod + have_setuptools = False +from distutils.file_util import write_file + +old_install = old_install_mod.install + +class install(old_install): + + # Always run install_clib - the command is cheap, so no need to bypass it; + # but it's not run by setuptools -- so it's run again in install_data + sub_commands = old_install.sub_commands + [ + ('install_clib', lambda x: True) + ] + + def finalize_options (self): + old_install.finalize_options(self) + self.install_lib = self.install_libbase + + def setuptools_run(self): + """ The setuptools version of the .run() method. + + We must pull in the entire code so we can override the level used in the + _getframe() call since we wrap this call by one more level. + """ + from distutils.command.install import install as distutils_install + + # Explicit request for old-style install? Just do it + if self.old_and_unmanageable or self.single_version_externally_managed: + return distutils_install.run(self) + + # Attempt to detect whether we were called from setup() or by another + # command. If we were called by setup(), our caller will be the + # 'run_command' method in 'distutils.dist', and *its* caller will be + # the 'run_commands' method. If we were called any other way, our + # immediate caller *might* be 'run_command', but it won't have been + # called by 'run_commands'. This is slightly kludgy, but seems to + # work. + # + caller = sys._getframe(3) + caller_module = caller.f_globals.get('__name__', '') + caller_name = caller.f_code.co_name + + if caller_module != 'distutils.dist' or caller_name!='run_commands': + # We weren't called from the command line or setup(), so we + # should run in backward-compatibility mode to support bdist_* + # commands. + distutils_install.run(self) + else: + self.do_egg_install() + + def run(self): + if not have_setuptools: + r = old_install.run(self) + else: + r = self.setuptools_run() + if self.record: + # bdist_rpm fails when INSTALLED_FILES contains + # paths with spaces. Such paths must be enclosed + # with double-quotes. + with open(self.record, 'r') as f: + lines = [] + need_rewrite = False + for l in f: + l = l.rstrip() + if ' ' in l: + need_rewrite = True + l = '"%s"' % (l) + lines.append(l) + if need_rewrite: + self.execute(write_file, + (self.record, lines), + "re-writing list of installed files to '%s'" % + self.record) + return r diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/install_clib.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/install_clib.py new file mode 100644 index 0000000..aa2e559 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/install_clib.py @@ -0,0 +1,40 @@ +import os +from distutils.core import Command +from distutils.ccompiler import new_compiler +from numpy.distutils.misc_util import get_cmd + +class install_clib(Command): + description = "Command to install installable C libraries" + + user_options = [] + + def initialize_options(self): + self.install_dir = None + self.outfiles = [] + + def finalize_options(self): + self.set_undefined_options('install', ('install_lib', 'install_dir')) + + def run (self): + build_clib_cmd = get_cmd("build_clib") + if not build_clib_cmd.build_clib: + # can happen if the user specified `--skip-build` + build_clib_cmd.finalize_options() + build_dir = build_clib_cmd.build_clib + + # We need the compiler to get the library name -> filename association + if not build_clib_cmd.compiler: + compiler = new_compiler(compiler=None) + compiler.customize(self.distribution) + else: + compiler = build_clib_cmd.compiler + + for l in self.distribution.installed_libraries: + target_dir = os.path.join(self.install_dir, l.target_dir) + name = compiler.library_filename(l.name) + source = os.path.join(build_dir, name) + self.mkpath(target_dir) + self.outfiles.append(self.copy_file(source, target_dir)[0]) + + def get_outputs(self): + return self.outfiles diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/install_data.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/install_data.py new file mode 100644 index 0000000..0a2e68a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/install_data.py @@ -0,0 +1,24 @@ +import sys +have_setuptools = ('setuptools' in sys.modules) + +from distutils.command.install_data import install_data as old_install_data + +#data installer with improved intelligence over distutils +#data files are copied into the project directory instead +#of willy-nilly +class install_data (old_install_data): + + def run(self): + old_install_data.run(self) + + if have_setuptools: + # Run install_clib again, since setuptools does not run sub-commands + # of install automatically + self.run_command('install_clib') + + def finalize_options (self): + self.set_undefined_options('install', + ('install_lib', 'install_dir'), + ('root', 'root'), + ('force', 'force'), + ) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/install_headers.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/install_headers.py new file mode 100644 index 0000000..bb4ad56 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/install_headers.py @@ -0,0 +1,25 @@ +import os +from distutils.command.install_headers import install_headers as old_install_headers + +class install_headers (old_install_headers): + + def run (self): + headers = self.distribution.headers + if not headers: + return + + prefix = os.path.dirname(self.install_dir) + for header in headers: + if isinstance(header, tuple): + # Kind of a hack, but I don't know where else to change this... + if header[0] == 'numpy.core': + header = ('numpy', header[1]) + if os.path.splitext(header[1])[1] == '.inc': + continue + d = os.path.join(*([prefix]+header[0].split('.'))) + header = header[1] + else: + d = self.install_dir + self.mkpath(d) + (out, _) = self.copy_file(header, d) + self.outfiles.append(out) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/command/sdist.py b/.local/lib/python3.8/site-packages/numpy/distutils/command/sdist.py new file mode 100644 index 0000000..e341938 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/command/sdist.py @@ -0,0 +1,27 @@ +import sys +if 'setuptools' in sys.modules: + from setuptools.command.sdist import sdist as old_sdist +else: + from distutils.command.sdist import sdist as old_sdist + +from numpy.distutils.misc_util import get_data_files + +class sdist(old_sdist): + + def add_defaults (self): + old_sdist.add_defaults(self) + + dist = self.distribution + + if dist.has_data_files(): + for data in dist.data_files: + self.filelist.extend(get_data_files(data)) + + if dist.has_headers(): + headers = [] + for h in dist.headers: + if isinstance(h, str): headers.append(h) + else: headers.append(h[1]) + self.filelist.extend(headers) + + return diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/conv_template.py b/.local/lib/python3.8/site-packages/numpy/distutils/conv_template.py new file mode 100644 index 0000000..c8933d1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/conv_template.py @@ -0,0 +1,329 @@ +#!/usr/bin/env python3 +""" +takes templated file .xxx.src and produces .xxx file where .xxx is +.i or .c or .h, using the following template rules + +/**begin repeat -- on a line by itself marks the start of a repeated code + segment +/**end repeat**/ -- on a line by itself marks it's end + +After the /**begin repeat and before the */, all the named templates are placed +these should all have the same number of replacements + +Repeat blocks can be nested, with each nested block labeled with its depth, +i.e. +/**begin repeat1 + *.... + */ +/**end repeat1**/ + +When using nested loops, you can optionally exclude particular +combinations of the variables using (inside the comment portion of the inner loop): + + :exclude: var1=value1, var2=value2, ... + +This will exclude the pattern where var1 is value1 and var2 is value2 when +the result is being generated. + + +In the main body each replace will use one entry from the list of named replacements + + Note that all #..# forms in a block must have the same number of + comma-separated entries. + +Example: + + An input file containing + + /**begin repeat + * #a = 1,2,3# + * #b = 1,2,3# + */ + + /**begin repeat1 + * #c = ted, jim# + */ + @a@, @b@, @c@ + /**end repeat1**/ + + /**end repeat**/ + + produces + + line 1 "template.c.src" + + /* + ********************************************************************* + ** This file was autogenerated from a template DO NOT EDIT!!** + ** Changes should be made to the original source (.src) file ** + ********************************************************************* + */ + + #line 9 + 1, 1, ted + + #line 9 + 1, 1, jim + + #line 9 + 2, 2, ted + + #line 9 + 2, 2, jim + + #line 9 + 3, 3, ted + + #line 9 + 3, 3, jim + +""" + +__all__ = ['process_str', 'process_file'] + +import os +import sys +import re + +# names for replacement that are already global. +global_names = {} + +# header placed at the front of head processed file +header =\ +""" +/* + ***************************************************************************** + ** This file was autogenerated from a template DO NOT EDIT!!!! ** + ** Changes should be made to the original source (.src) file ** + ***************************************************************************** + */ + +""" +# Parse string for repeat loops +def parse_structure(astr, level): + """ + The returned line number is from the beginning of the string, starting + at zero. Returns an empty list if no loops found. + + """ + if level == 0 : + loopbeg = "/**begin repeat" + loopend = "/**end repeat**/" + else : + loopbeg = "/**begin repeat%d" % level + loopend = "/**end repeat%d**/" % level + + ind = 0 + line = 0 + spanlist = [] + while True: + start = astr.find(loopbeg, ind) + if start == -1: + break + start2 = astr.find("*/", start) + start2 = astr.find("\n", start2) + fini1 = astr.find(loopend, start2) + fini2 = astr.find("\n", fini1) + line += astr.count("\n", ind, start2+1) + spanlist.append((start, start2+1, fini1, fini2+1, line)) + line += astr.count("\n", start2+1, fini2) + ind = fini2 + spanlist.sort() + return spanlist + + +def paren_repl(obj): + torep = obj.group(1) + numrep = obj.group(2) + return ','.join([torep]*int(numrep)) + +parenrep = re.compile(r"\(([^)]*)\)\*(\d+)") +plainrep = re.compile(r"([^*]+)\*(\d+)") +def parse_values(astr): + # replaces all occurrences of '(a,b,c)*4' in astr + # with 'a,b,c,a,b,c,a,b,c,a,b,c'. Empty braces generate + # empty values, i.e., ()*4 yields ',,,'. The result is + # split at ',' and a list of values returned. + astr = parenrep.sub(paren_repl, astr) + # replaces occurrences of xxx*3 with xxx, xxx, xxx + astr = ','.join([plainrep.sub(paren_repl, x.strip()) + for x in astr.split(',')]) + return astr.split(',') + + +stripast = re.compile(r"\n\s*\*?") +named_re = re.compile(r"#\s*(\w*)\s*=([^#]*)#") +exclude_vars_re = re.compile(r"(\w*)=(\w*)") +exclude_re = re.compile(":exclude:") +def parse_loop_header(loophead) : + """Find all named replacements in the header + + Returns a list of dictionaries, one for each loop iteration, + where each key is a name to be substituted and the corresponding + value is the replacement string. + + Also return a list of exclusions. The exclusions are dictionaries + of key value pairs. There can be more than one exclusion. + [{'var1':'value1', 'var2', 'value2'[,...]}, ...] + + """ + # Strip out '\n' and leading '*', if any, in continuation lines. + # This should not effect code previous to this change as + # continuation lines were not allowed. + loophead = stripast.sub("", loophead) + # parse out the names and lists of values + names = [] + reps = named_re.findall(loophead) + nsub = None + for rep in reps: + name = rep[0] + vals = parse_values(rep[1]) + size = len(vals) + if nsub is None : + nsub = size + elif nsub != size : + msg = "Mismatch in number of values, %d != %d\n%s = %s" + raise ValueError(msg % (nsub, size, name, vals)) + names.append((name, vals)) + + + # Find any exclude variables + excludes = [] + + for obj in exclude_re.finditer(loophead): + span = obj.span() + # find next newline + endline = loophead.find('\n', span[1]) + substr = loophead[span[1]:endline] + ex_names = exclude_vars_re.findall(substr) + excludes.append(dict(ex_names)) + + # generate list of dictionaries, one for each template iteration + dlist = [] + if nsub is None : + raise ValueError("No substitution variables found") + for i in range(nsub): + tmp = {name: vals[i] for name, vals in names} + dlist.append(tmp) + return dlist + +replace_re = re.compile(r"@(\w+)@") +def parse_string(astr, env, level, line) : + lineno = "#line %d\n" % line + + # local function for string replacement, uses env + def replace(match): + name = match.group(1) + try : + val = env[name] + except KeyError: + msg = 'line %d: no definition of key "%s"'%(line, name) + raise ValueError(msg) from None + return val + + code = [lineno] + struct = parse_structure(astr, level) + if struct : + # recurse over inner loops + oldend = 0 + newlevel = level + 1 + for sub in struct: + pref = astr[oldend:sub[0]] + head = astr[sub[0]:sub[1]] + text = astr[sub[1]:sub[2]] + oldend = sub[3] + newline = line + sub[4] + code.append(replace_re.sub(replace, pref)) + try : + envlist = parse_loop_header(head) + except ValueError as e: + msg = "line %d: %s" % (newline, e) + raise ValueError(msg) + for newenv in envlist : + newenv.update(env) + newcode = parse_string(text, newenv, newlevel, newline) + code.extend(newcode) + suff = astr[oldend:] + code.append(replace_re.sub(replace, suff)) + else : + # replace keys + code.append(replace_re.sub(replace, astr)) + code.append('\n') + return ''.join(code) + +def process_str(astr): + code = [header] + code.extend(parse_string(astr, global_names, 0, 1)) + return ''.join(code) + + +include_src_re = re.compile(r"(\n|\A)#include\s*['\"]" + r"(?P[\w\d./\\]+[.]src)['\"]", re.I) + +def resolve_includes(source): + d = os.path.dirname(source) + with open(source) as fid: + lines = [] + for line in fid: + m = include_src_re.match(line) + if m: + fn = m.group('name') + if not os.path.isabs(fn): + fn = os.path.join(d, fn) + if os.path.isfile(fn): + lines.extend(resolve_includes(fn)) + else: + lines.append(line) + else: + lines.append(line) + return lines + +def process_file(source): + lines = resolve_includes(source) + sourcefile = os.path.normcase(source).replace("\\", "\\\\") + try: + code = process_str(''.join(lines)) + except ValueError as e: + raise ValueError('In "%s" loop at %s' % (sourcefile, e)) from None + return '#line 1 "%s"\n%s' % (sourcefile, code) + + +def unique_key(adict): + # this obtains a unique key given a dictionary + # currently it works by appending together n of the letters of the + # current keys and increasing n until a unique key is found + # -- not particularly quick + allkeys = list(adict.keys()) + done = False + n = 1 + while not done: + newkey = "".join([x[:n] for x in allkeys]) + if newkey in allkeys: + n += 1 + else: + done = True + return newkey + + +def main(): + try: + file = sys.argv[1] + except IndexError: + fid = sys.stdin + outfile = sys.stdout + else: + fid = open(file, 'r') + (base, ext) = os.path.splitext(file) + newname = base + outfile = open(newname, 'w') + + allstr = fid.read() + try: + writestr = process_str(allstr) + except ValueError as e: + raise ValueError("In %s loop at %s" % (file, e)) from None + + outfile.write(writestr) + +if __name__ == "__main__": + main() diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/core.py b/.local/lib/python3.8/site-packages/numpy/distutils/core.py new file mode 100644 index 0000000..c4a14e5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/core.py @@ -0,0 +1,215 @@ +import sys +from distutils.core import Distribution + +if 'setuptools' in sys.modules: + have_setuptools = True + from setuptools import setup as old_setup + # easy_install imports math, it may be picked up from cwd + from setuptools.command import easy_install + try: + # very old versions of setuptools don't have this + from setuptools.command import bdist_egg + except ImportError: + have_setuptools = False +else: + from distutils.core import setup as old_setup + have_setuptools = False + +import warnings +import distutils.core +import distutils.dist + +from numpy.distutils.extension import Extension # noqa: F401 +from numpy.distutils.numpy_distribution import NumpyDistribution +from numpy.distutils.command import config, config_compiler, \ + build, build_py, build_ext, build_clib, build_src, build_scripts, \ + sdist, install_data, install_headers, install, bdist_rpm, \ + install_clib +from numpy.distutils.misc_util import is_sequence, is_string + +numpy_cmdclass = {'build': build.build, + 'build_src': build_src.build_src, + 'build_scripts': build_scripts.build_scripts, + 'config_cc': config_compiler.config_cc, + 'config_fc': config_compiler.config_fc, + 'config': config.config, + 'build_ext': build_ext.build_ext, + 'build_py': build_py.build_py, + 'build_clib': build_clib.build_clib, + 'sdist': sdist.sdist, + 'install_data': install_data.install_data, + 'install_headers': install_headers.install_headers, + 'install_clib': install_clib.install_clib, + 'install': install.install, + 'bdist_rpm': bdist_rpm.bdist_rpm, + } +if have_setuptools: + # Use our own versions of develop and egg_info to ensure that build_src is + # handled appropriately. + from numpy.distutils.command import develop, egg_info + numpy_cmdclass['bdist_egg'] = bdist_egg.bdist_egg + numpy_cmdclass['develop'] = develop.develop + numpy_cmdclass['easy_install'] = easy_install.easy_install + numpy_cmdclass['egg_info'] = egg_info.egg_info + +def _dict_append(d, **kws): + for k, v in kws.items(): + if k not in d: + d[k] = v + continue + dv = d[k] + if isinstance(dv, tuple): + d[k] = dv + tuple(v) + elif isinstance(dv, list): + d[k] = dv + list(v) + elif isinstance(dv, dict): + _dict_append(dv, **v) + elif is_string(dv): + d[k] = dv + v + else: + raise TypeError(repr(type(dv))) + +def _command_line_ok(_cache=None): + """ Return True if command line does not contain any + help or display requests. + """ + if _cache: + return _cache[0] + elif _cache is None: + _cache = [] + ok = True + display_opts = ['--'+n for n in Distribution.display_option_names] + for o in Distribution.display_options: + if o[1]: + display_opts.append('-'+o[1]) + for arg in sys.argv: + if arg.startswith('--help') or arg=='-h' or arg in display_opts: + ok = False + break + _cache.append(ok) + return ok + +def get_distribution(always=False): + dist = distutils.core._setup_distribution + # XXX Hack to get numpy installable with easy_install. + # The problem is easy_install runs it's own setup(), which + # sets up distutils.core._setup_distribution. However, + # when our setup() runs, that gets overwritten and lost. + # We can't use isinstance, as the DistributionWithoutHelpCommands + # class is local to a function in setuptools.command.easy_install + if dist is not None and \ + 'DistributionWithoutHelpCommands' in repr(dist): + dist = None + if always and dist is None: + dist = NumpyDistribution() + return dist + +def setup(**attr): + + cmdclass = numpy_cmdclass.copy() + + new_attr = attr.copy() + if 'cmdclass' in new_attr: + cmdclass.update(new_attr['cmdclass']) + new_attr['cmdclass'] = cmdclass + + if 'configuration' in new_attr: + # To avoid calling configuration if there are any errors + # or help request in command in the line. + configuration = new_attr.pop('configuration') + + old_dist = distutils.core._setup_distribution + old_stop = distutils.core._setup_stop_after + distutils.core._setup_distribution = None + distutils.core._setup_stop_after = "commandline" + try: + dist = setup(**new_attr) + finally: + distutils.core._setup_distribution = old_dist + distutils.core._setup_stop_after = old_stop + if dist.help or not _command_line_ok(): + # probably displayed help, skip running any commands + return dist + + # create setup dictionary and append to new_attr + config = configuration() + if hasattr(config, 'todict'): + config = config.todict() + _dict_append(new_attr, **config) + + # Move extension source libraries to libraries + libraries = [] + for ext in new_attr.get('ext_modules', []): + new_libraries = [] + for item in ext.libraries: + if is_sequence(item): + lib_name, build_info = item + _check_append_ext_library(libraries, lib_name, build_info) + new_libraries.append(lib_name) + elif is_string(item): + new_libraries.append(item) + else: + raise TypeError("invalid description of extension module " + "library %r" % (item,)) + ext.libraries = new_libraries + if libraries: + if 'libraries' not in new_attr: + new_attr['libraries'] = [] + for item in libraries: + _check_append_library(new_attr['libraries'], item) + + # sources in ext_modules or libraries may contain header files + if ('ext_modules' in new_attr or 'libraries' in new_attr) \ + and 'headers' not in new_attr: + new_attr['headers'] = [] + + # Use our custom NumpyDistribution class instead of distutils' one + new_attr['distclass'] = NumpyDistribution + + return old_setup(**new_attr) + +def _check_append_library(libraries, item): + for libitem in libraries: + if is_sequence(libitem): + if is_sequence(item): + if item[0]==libitem[0]: + if item[1] is libitem[1]: + return + warnings.warn("[0] libraries list contains %r with" + " different build_info" % (item[0],), + stacklevel=2) + break + else: + if item==libitem[0]: + warnings.warn("[1] libraries list contains %r with" + " no build_info" % (item[0],), + stacklevel=2) + break + else: + if is_sequence(item): + if item[0]==libitem: + warnings.warn("[2] libraries list contains %r with" + " no build_info" % (item[0],), + stacklevel=2) + break + else: + if item==libitem: + return + libraries.append(item) + +def _check_append_ext_library(libraries, lib_name, build_info): + for item in libraries: + if is_sequence(item): + if item[0]==lib_name: + if item[1] is build_info: + return + warnings.warn("[3] libraries list contains %r with" + " different build_info" % (lib_name,), + stacklevel=2) + break + elif item==lib_name: + warnings.warn("[4] libraries list contains %r with" + " no build_info" % (lib_name,), + stacklevel=2) + break + libraries.append((lib_name, build_info)) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/cpuinfo.py b/.local/lib/python3.8/site-packages/numpy/distutils/cpuinfo.py new file mode 100644 index 0000000..7762021 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/cpuinfo.py @@ -0,0 +1,683 @@ +#!/usr/bin/env python3 +""" +cpuinfo + +Copyright 2002 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy (BSD style) license. See LICENSE.txt that came with +this distribution for specifics. + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +Pearu Peterson + +""" +__all__ = ['cpu'] + +import os +import platform +import re +import sys +import types +import warnings + +from subprocess import getstatusoutput + + +def getoutput(cmd, successful_status=(0,), stacklevel=1): + try: + status, output = getstatusoutput(cmd) + except OSError as e: + warnings.warn(str(e), UserWarning, stacklevel=stacklevel) + return False, "" + if os.WIFEXITED(status) and os.WEXITSTATUS(status) in successful_status: + return True, output + return False, output + +def command_info(successful_status=(0,), stacklevel=1, **kw): + info = {} + for key in kw: + ok, output = getoutput(kw[key], successful_status=successful_status, + stacklevel=stacklevel+1) + if ok: + info[key] = output.strip() + return info + +def command_by_line(cmd, successful_status=(0,), stacklevel=1): + ok, output = getoutput(cmd, successful_status=successful_status, + stacklevel=stacklevel+1) + if not ok: + return + for line in output.splitlines(): + yield line.strip() + +def key_value_from_command(cmd, sep, successful_status=(0,), + stacklevel=1): + d = {} + for line in command_by_line(cmd, successful_status=successful_status, + stacklevel=stacklevel+1): + l = [s.strip() for s in line.split(sep, 1)] + if len(l) == 2: + d[l[0]] = l[1] + return d + +class CPUInfoBase: + """Holds CPU information and provides methods for requiring + the availability of various CPU features. + """ + + def _try_call(self, func): + try: + return func() + except Exception: + pass + + def __getattr__(self, name): + if not name.startswith('_'): + if hasattr(self, '_'+name): + attr = getattr(self, '_'+name) + if isinstance(attr, types.MethodType): + return lambda func=self._try_call,attr=attr : func(attr) + else: + return lambda : None + raise AttributeError(name) + + def _getNCPUs(self): + return 1 + + def __get_nbits(self): + abits = platform.architecture()[0] + nbits = re.compile(r'(\d+)bit').search(abits).group(1) + return nbits + + def _is_32bit(self): + return self.__get_nbits() == '32' + + def _is_64bit(self): + return self.__get_nbits() == '64' + +class LinuxCPUInfo(CPUInfoBase): + + info = None + + def __init__(self): + if self.info is not None: + return + info = [ {} ] + ok, output = getoutput('uname -m') + if ok: + info[0]['uname_m'] = output.strip() + try: + fo = open('/proc/cpuinfo') + except OSError as e: + warnings.warn(str(e), UserWarning, stacklevel=2) + else: + for line in fo: + name_value = [s.strip() for s in line.split(':', 1)] + if len(name_value) != 2: + continue + name, value = name_value + if not info or name in info[-1]: # next processor + info.append({}) + info[-1][name] = value + fo.close() + self.__class__.info = info + + def _not_impl(self): pass + + # Athlon + + def _is_AMD(self): + return self.info[0]['vendor_id']=='AuthenticAMD' + + def _is_AthlonK6_2(self): + return self._is_AMD() and self.info[0]['model'] == '2' + + def _is_AthlonK6_3(self): + return self._is_AMD() and self.info[0]['model'] == '3' + + def _is_AthlonK6(self): + return re.match(r'.*?AMD-K6', self.info[0]['model name']) is not None + + def _is_AthlonK7(self): + return re.match(r'.*?AMD-K7', self.info[0]['model name']) is not None + + def _is_AthlonMP(self): + return re.match(r'.*?Athlon\(tm\) MP\b', + self.info[0]['model name']) is not None + + def _is_AMD64(self): + return self.is_AMD() and self.info[0]['family'] == '15' + + def _is_Athlon64(self): + return re.match(r'.*?Athlon\(tm\) 64\b', + self.info[0]['model name']) is not None + + def _is_AthlonHX(self): + return re.match(r'.*?Athlon HX\b', + self.info[0]['model name']) is not None + + def _is_Opteron(self): + return re.match(r'.*?Opteron\b', + self.info[0]['model name']) is not None + + def _is_Hammer(self): + return re.match(r'.*?Hammer\b', + self.info[0]['model name']) is not None + + # Alpha + + def _is_Alpha(self): + return self.info[0]['cpu']=='Alpha' + + def _is_EV4(self): + return self.is_Alpha() and self.info[0]['cpu model'] == 'EV4' + + def _is_EV5(self): + return self.is_Alpha() and self.info[0]['cpu model'] == 'EV5' + + def _is_EV56(self): + return self.is_Alpha() and self.info[0]['cpu model'] == 'EV56' + + def _is_PCA56(self): + return self.is_Alpha() and self.info[0]['cpu model'] == 'PCA56' + + # Intel + + #XXX + _is_i386 = _not_impl + + def _is_Intel(self): + return self.info[0]['vendor_id']=='GenuineIntel' + + def _is_i486(self): + return self.info[0]['cpu']=='i486' + + def _is_i586(self): + return self.is_Intel() and self.info[0]['cpu family'] == '5' + + def _is_i686(self): + return self.is_Intel() and self.info[0]['cpu family'] == '6' + + def _is_Celeron(self): + return re.match(r'.*?Celeron', + self.info[0]['model name']) is not None + + def _is_Pentium(self): + return re.match(r'.*?Pentium', + self.info[0]['model name']) is not None + + def _is_PentiumII(self): + return re.match(r'.*?Pentium.*?II\b', + self.info[0]['model name']) is not None + + def _is_PentiumPro(self): + return re.match(r'.*?PentiumPro\b', + self.info[0]['model name']) is not None + + def _is_PentiumMMX(self): + return re.match(r'.*?Pentium.*?MMX\b', + self.info[0]['model name']) is not None + + def _is_PentiumIII(self): + return re.match(r'.*?Pentium.*?III\b', + self.info[0]['model name']) is not None + + def _is_PentiumIV(self): + return re.match(r'.*?Pentium.*?(IV|4)\b', + self.info[0]['model name']) is not None + + def _is_PentiumM(self): + return re.match(r'.*?Pentium.*?M\b', + self.info[0]['model name']) is not None + + def _is_Prescott(self): + return self.is_PentiumIV() and self.has_sse3() + + def _is_Nocona(self): + return (self.is_Intel() + and (self.info[0]['cpu family'] == '6' + or self.info[0]['cpu family'] == '15') + and (self.has_sse3() and not self.has_ssse3()) + and re.match(r'.*?\blm\b', self.info[0]['flags']) is not None) + + def _is_Core2(self): + return (self.is_64bit() and self.is_Intel() and + re.match(r'.*?Core\(TM\)2\b', + self.info[0]['model name']) is not None) + + def _is_Itanium(self): + return re.match(r'.*?Itanium\b', + self.info[0]['family']) is not None + + def _is_XEON(self): + return re.match(r'.*?XEON\b', + self.info[0]['model name'], re.IGNORECASE) is not None + + _is_Xeon = _is_XEON + + # Varia + + def _is_singleCPU(self): + return len(self.info) == 1 + + def _getNCPUs(self): + return len(self.info) + + def _has_fdiv_bug(self): + return self.info[0]['fdiv_bug']=='yes' + + def _has_f00f_bug(self): + return self.info[0]['f00f_bug']=='yes' + + def _has_mmx(self): + return re.match(r'.*?\bmmx\b', self.info[0]['flags']) is not None + + def _has_sse(self): + return re.match(r'.*?\bsse\b', self.info[0]['flags']) is not None + + def _has_sse2(self): + return re.match(r'.*?\bsse2\b', self.info[0]['flags']) is not None + + def _has_sse3(self): + return re.match(r'.*?\bpni\b', self.info[0]['flags']) is not None + + def _has_ssse3(self): + return re.match(r'.*?\bssse3\b', self.info[0]['flags']) is not None + + def _has_3dnow(self): + return re.match(r'.*?\b3dnow\b', self.info[0]['flags']) is not None + + def _has_3dnowext(self): + return re.match(r'.*?\b3dnowext\b', self.info[0]['flags']) is not None + +class IRIXCPUInfo(CPUInfoBase): + info = None + + def __init__(self): + if self.info is not None: + return + info = key_value_from_command('sysconf', sep=' ', + successful_status=(0, 1)) + self.__class__.info = info + + def _not_impl(self): pass + + def _is_singleCPU(self): + return self.info.get('NUM_PROCESSORS') == '1' + + def _getNCPUs(self): + return int(self.info.get('NUM_PROCESSORS', 1)) + + def __cputype(self, n): + return self.info.get('PROCESSORS').split()[0].lower() == 'r%s' % (n) + def _is_r2000(self): return self.__cputype(2000) + def _is_r3000(self): return self.__cputype(3000) + def _is_r3900(self): return self.__cputype(3900) + def _is_r4000(self): return self.__cputype(4000) + def _is_r4100(self): return self.__cputype(4100) + def _is_r4300(self): return self.__cputype(4300) + def _is_r4400(self): return self.__cputype(4400) + def _is_r4600(self): return self.__cputype(4600) + def _is_r4650(self): return self.__cputype(4650) + def _is_r5000(self): return self.__cputype(5000) + def _is_r6000(self): return self.__cputype(6000) + def _is_r8000(self): return self.__cputype(8000) + def _is_r10000(self): return self.__cputype(10000) + def _is_r12000(self): return self.__cputype(12000) + def _is_rorion(self): return self.__cputype('orion') + + def get_ip(self): + try: return self.info.get('MACHINE') + except Exception: pass + def __machine(self, n): + return self.info.get('MACHINE').lower() == 'ip%s' % (n) + def _is_IP19(self): return self.__machine(19) + def _is_IP20(self): return self.__machine(20) + def _is_IP21(self): return self.__machine(21) + def _is_IP22(self): return self.__machine(22) + def _is_IP22_4k(self): return self.__machine(22) and self._is_r4000() + def _is_IP22_5k(self): return self.__machine(22) and self._is_r5000() + def _is_IP24(self): return self.__machine(24) + def _is_IP25(self): return self.__machine(25) + def _is_IP26(self): return self.__machine(26) + def _is_IP27(self): return self.__machine(27) + def _is_IP28(self): return self.__machine(28) + def _is_IP30(self): return self.__machine(30) + def _is_IP32(self): return self.__machine(32) + def _is_IP32_5k(self): return self.__machine(32) and self._is_r5000() + def _is_IP32_10k(self): return self.__machine(32) and self._is_r10000() + + +class DarwinCPUInfo(CPUInfoBase): + info = None + + def __init__(self): + if self.info is not None: + return + info = command_info(arch='arch', + machine='machine') + info['sysctl_hw'] = key_value_from_command('sysctl hw', sep='=') + self.__class__.info = info + + def _not_impl(self): pass + + def _getNCPUs(self): + return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) + + def _is_Power_Macintosh(self): + return self.info['sysctl_hw']['hw.machine']=='Power Macintosh' + + def _is_i386(self): + return self.info['arch']=='i386' + def _is_ppc(self): + return self.info['arch']=='ppc' + + def __machine(self, n): + return self.info['machine'] == 'ppc%s'%n + def _is_ppc601(self): return self.__machine(601) + def _is_ppc602(self): return self.__machine(602) + def _is_ppc603(self): return self.__machine(603) + def _is_ppc603e(self): return self.__machine('603e') + def _is_ppc604(self): return self.__machine(604) + def _is_ppc604e(self): return self.__machine('604e') + def _is_ppc620(self): return self.__machine(620) + def _is_ppc630(self): return self.__machine(630) + def _is_ppc740(self): return self.__machine(740) + def _is_ppc7400(self): return self.__machine(7400) + def _is_ppc7450(self): return self.__machine(7450) + def _is_ppc750(self): return self.__machine(750) + def _is_ppc403(self): return self.__machine(403) + def _is_ppc505(self): return self.__machine(505) + def _is_ppc801(self): return self.__machine(801) + def _is_ppc821(self): return self.__machine(821) + def _is_ppc823(self): return self.__machine(823) + def _is_ppc860(self): return self.__machine(860) + + +class SunOSCPUInfo(CPUInfoBase): + + info = None + + def __init__(self): + if self.info is not None: + return + info = command_info(arch='arch', + mach='mach', + uname_i='uname_i', + isainfo_b='isainfo -b', + isainfo_n='isainfo -n', + ) + info['uname_X'] = key_value_from_command('uname -X', sep='=') + for line in command_by_line('psrinfo -v 0'): + m = re.match(r'\s*The (?P

[\w\d]+) processor operates at', line) + if m: + info['processor'] = m.group('p') + break + self.__class__.info = info + + def _not_impl(self): pass + + def _is_i386(self): + return self.info['isainfo_n']=='i386' + def _is_sparc(self): + return self.info['isainfo_n']=='sparc' + def _is_sparcv9(self): + return self.info['isainfo_n']=='sparcv9' + + def _getNCPUs(self): + return int(self.info['uname_X'].get('NumCPU', 1)) + + def _is_sun4(self): + return self.info['arch']=='sun4' + + def _is_SUNW(self): + return re.match(r'SUNW', self.info['uname_i']) is not None + def _is_sparcstation5(self): + return re.match(r'.*SPARCstation-5', self.info['uname_i']) is not None + def _is_ultra1(self): + return re.match(r'.*Ultra-1', self.info['uname_i']) is not None + def _is_ultra250(self): + return re.match(r'.*Ultra-250', self.info['uname_i']) is not None + def _is_ultra2(self): + return re.match(r'.*Ultra-2', self.info['uname_i']) is not None + def _is_ultra30(self): + return re.match(r'.*Ultra-30', self.info['uname_i']) is not None + def _is_ultra4(self): + return re.match(r'.*Ultra-4', self.info['uname_i']) is not None + def _is_ultra5_10(self): + return re.match(r'.*Ultra-5_10', self.info['uname_i']) is not None + def _is_ultra5(self): + return re.match(r'.*Ultra-5', self.info['uname_i']) is not None + def _is_ultra60(self): + return re.match(r'.*Ultra-60', self.info['uname_i']) is not None + def _is_ultra80(self): + return re.match(r'.*Ultra-80', self.info['uname_i']) is not None + def _is_ultraenterprice(self): + return re.match(r'.*Ultra-Enterprise', self.info['uname_i']) is not None + def _is_ultraenterprice10k(self): + return re.match(r'.*Ultra-Enterprise-10000', self.info['uname_i']) is not None + def _is_sunfire(self): + return re.match(r'.*Sun-Fire', self.info['uname_i']) is not None + def _is_ultra(self): + return re.match(r'.*Ultra', self.info['uname_i']) is not None + + def _is_cpusparcv7(self): + return self.info['processor']=='sparcv7' + def _is_cpusparcv8(self): + return self.info['processor']=='sparcv8' + def _is_cpusparcv9(self): + return self.info['processor']=='sparcv9' + +class Win32CPUInfo(CPUInfoBase): + + info = None + pkey = r"HARDWARE\DESCRIPTION\System\CentralProcessor" + # XXX: what does the value of + # HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0 + # mean? + + def __init__(self): + if self.info is not None: + return + info = [] + try: + #XXX: Bad style to use so long `try:...except:...`. Fix it! + import winreg + + prgx = re.compile(r"family\s+(?P\d+)\s+model\s+(?P\d+)" + r"\s+stepping\s+(?P\d+)", re.IGNORECASE) + chnd=winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, self.pkey) + pnum=0 + while True: + try: + proc=winreg.EnumKey(chnd, pnum) + except winreg.error: + break + else: + pnum+=1 + info.append({"Processor":proc}) + phnd=winreg.OpenKey(chnd, proc) + pidx=0 + while True: + try: + name, value, vtpe=winreg.EnumValue(phnd, pidx) + except winreg.error: + break + else: + pidx=pidx+1 + info[-1][name]=value + if name=="Identifier": + srch=prgx.search(value) + if srch: + info[-1]["Family"]=int(srch.group("FML")) + info[-1]["Model"]=int(srch.group("MDL")) + info[-1]["Stepping"]=int(srch.group("STP")) + except Exception as e: + print(e, '(ignoring)') + self.__class__.info = info + + def _not_impl(self): pass + + # Athlon + + def _is_AMD(self): + return self.info[0]['VendorIdentifier']=='AuthenticAMD' + + def _is_Am486(self): + return self.is_AMD() and self.info[0]['Family']==4 + + def _is_Am5x86(self): + return self.is_AMD() and self.info[0]['Family']==4 + + def _is_AMDK5(self): + return self.is_AMD() and self.info[0]['Family']==5 \ + and self.info[0]['Model'] in [0, 1, 2, 3] + + def _is_AMDK6(self): + return self.is_AMD() and self.info[0]['Family']==5 \ + and self.info[0]['Model'] in [6, 7] + + def _is_AMDK6_2(self): + return self.is_AMD() and self.info[0]['Family']==5 \ + and self.info[0]['Model']==8 + + def _is_AMDK6_3(self): + return self.is_AMD() and self.info[0]['Family']==5 \ + and self.info[0]['Model']==9 + + def _is_AMDK7(self): + return self.is_AMD() and self.info[0]['Family'] == 6 + + # To reliably distinguish between the different types of AMD64 chips + # (Athlon64, Operton, Athlon64 X2, Semperon, Turion 64, etc.) would + # require looking at the 'brand' from cpuid + + def _is_AMD64(self): + return self.is_AMD() and self.info[0]['Family'] == 15 + + # Intel + + def _is_Intel(self): + return self.info[0]['VendorIdentifier']=='GenuineIntel' + + def _is_i386(self): + return self.info[0]['Family']==3 + + def _is_i486(self): + return self.info[0]['Family']==4 + + def _is_i586(self): + return self.is_Intel() and self.info[0]['Family']==5 + + def _is_i686(self): + return self.is_Intel() and self.info[0]['Family']==6 + + def _is_Pentium(self): + return self.is_Intel() and self.info[0]['Family']==5 + + def _is_PentiumMMX(self): + return self.is_Intel() and self.info[0]['Family']==5 \ + and self.info[0]['Model']==4 + + def _is_PentiumPro(self): + return self.is_Intel() and self.info[0]['Family']==6 \ + and self.info[0]['Model']==1 + + def _is_PentiumII(self): + return self.is_Intel() and self.info[0]['Family']==6 \ + and self.info[0]['Model'] in [3, 5, 6] + + def _is_PentiumIII(self): + return self.is_Intel() and self.info[0]['Family']==6 \ + and self.info[0]['Model'] in [7, 8, 9, 10, 11] + + def _is_PentiumIV(self): + return self.is_Intel() and self.info[0]['Family']==15 + + def _is_PentiumM(self): + return self.is_Intel() and self.info[0]['Family'] == 6 \ + and self.info[0]['Model'] in [9, 13, 14] + + def _is_Core2(self): + return self.is_Intel() and self.info[0]['Family'] == 6 \ + and self.info[0]['Model'] in [15, 16, 17] + + # Varia + + def _is_singleCPU(self): + return len(self.info) == 1 + + def _getNCPUs(self): + return len(self.info) + + def _has_mmx(self): + if self.is_Intel(): + return (self.info[0]['Family']==5 and self.info[0]['Model']==4) \ + or (self.info[0]['Family'] in [6, 15]) + elif self.is_AMD(): + return self.info[0]['Family'] in [5, 6, 15] + else: + return False + + def _has_sse(self): + if self.is_Intel(): + return ((self.info[0]['Family']==6 and + self.info[0]['Model'] in [7, 8, 9, 10, 11]) + or self.info[0]['Family']==15) + elif self.is_AMD(): + return ((self.info[0]['Family']==6 and + self.info[0]['Model'] in [6, 7, 8, 10]) + or self.info[0]['Family']==15) + else: + return False + + def _has_sse2(self): + if self.is_Intel(): + return self.is_Pentium4() or self.is_PentiumM() \ + or self.is_Core2() + elif self.is_AMD(): + return self.is_AMD64() + else: + return False + + def _has_3dnow(self): + return self.is_AMD() and self.info[0]['Family'] in [5, 6, 15] + + def _has_3dnowext(self): + return self.is_AMD() and self.info[0]['Family'] in [6, 15] + +if sys.platform.startswith('linux'): # variations: linux2,linux-i386 (any others?) + cpuinfo = LinuxCPUInfo +elif sys.platform.startswith('irix'): + cpuinfo = IRIXCPUInfo +elif sys.platform == 'darwin': + cpuinfo = DarwinCPUInfo +elif sys.platform.startswith('sunos'): + cpuinfo = SunOSCPUInfo +elif sys.platform.startswith('win32'): + cpuinfo = Win32CPUInfo +elif sys.platform.startswith('cygwin'): + cpuinfo = LinuxCPUInfo +#XXX: other OS's. Eg. use _winreg on Win32. Or os.uname on unices. +else: + cpuinfo = CPUInfoBase + +cpu = cpuinfo() + +#if __name__ == "__main__": +# +# cpu.is_blaa() +# cpu.is_Intel() +# cpu.is_Alpha() +# +# print('CPU information:'), +# for name in dir(cpuinfo): +# if name[0]=='_' and name[1]!='_': +# r = getattr(cpu,name[1:])() +# if r: +# if r!=1: +# print('%s=%s' %(name[1:],r)) +# else: +# print(name[1:]), +# print() diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/exec_command.py b/.local/lib/python3.8/site-packages/numpy/distutils/exec_command.py new file mode 100644 index 0000000..79998cf --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/exec_command.py @@ -0,0 +1,316 @@ +""" +exec_command + +Implements exec_command function that is (almost) equivalent to +commands.getstatusoutput function but on NT, DOS systems the +returned status is actually correct (though, the returned status +values may be different by a factor). In addition, exec_command +takes keyword arguments for (re-)defining environment variables. + +Provides functions: + + exec_command --- execute command in a specified directory and + in the modified environment. + find_executable --- locate a command using info from environment + variable PATH. Equivalent to posix `which` + command. + +Author: Pearu Peterson +Created: 11 January 2003 + +Requires: Python 2.x + +Successfully tested on: + +======== ============ ================================================= +os.name sys.platform comments +======== ============ ================================================= +posix linux2 Debian (sid) Linux, Python 2.1.3+, 2.2.3+, 2.3.3 + PyCrust 0.9.3, Idle 1.0.2 +posix linux2 Red Hat 9 Linux, Python 2.1.3, 2.2.2, 2.3.2 +posix sunos5 SunOS 5.9, Python 2.2, 2.3.2 +posix darwin Darwin 7.2.0, Python 2.3 +nt win32 Windows Me + Python 2.3(EE), Idle 1.0, PyCrust 0.7.2 + Python 2.1.1 Idle 0.8 +nt win32 Windows 98, Python 2.1.1. Idle 0.8 +nt win32 Cygwin 98-4.10, Python 2.1.1(MSC) - echo tests + fail i.e. redefining environment variables may + not work. FIXED: don't use cygwin echo! + Comment: also `cmd /c echo` will not work + but redefining environment variables do work. +posix cygwin Cygwin 98-4.10, Python 2.3.3(cygming special) +nt win32 Windows XP, Python 2.3.3 +======== ============ ================================================= + +Known bugs: + +* Tests, that send messages to stderr, fail when executed from MSYS prompt + because the messages are lost at some point. + +""" +__all__ = ['exec_command', 'find_executable'] + +import os +import sys +import subprocess +import locale +import warnings + +from numpy.distutils.misc_util import is_sequence, make_temp_file +from numpy.distutils import log + +def filepath_from_subprocess_output(output): + """ + Convert `bytes` in the encoding used by a subprocess into a filesystem-appropriate `str`. + + Inherited from `exec_command`, and possibly incorrect. + """ + mylocale = locale.getpreferredencoding(False) + if mylocale is None: + mylocale = 'ascii' + output = output.decode(mylocale, errors='replace') + output = output.replace('\r\n', '\n') + # Another historical oddity + if output[-1:] == '\n': + output = output[:-1] + return output + + +def forward_bytes_to_stdout(val): + """ + Forward bytes from a subprocess call to the console, without attempting to + decode them. + + The assumption is that the subprocess call already returned bytes in + a suitable encoding. + """ + if hasattr(sys.stdout, 'buffer'): + # use the underlying binary output if there is one + sys.stdout.buffer.write(val) + elif hasattr(sys.stdout, 'encoding'): + # round-trip the encoding if necessary + sys.stdout.write(val.decode(sys.stdout.encoding)) + else: + # make a best-guess at the encoding + sys.stdout.write(val.decode('utf8', errors='replace')) + + +def temp_file_name(): + # 2019-01-30, 1.17 + warnings.warn('temp_file_name is deprecated since NumPy v1.17, use ' + 'tempfile.mkstemp instead', DeprecationWarning, stacklevel=1) + fo, name = make_temp_file() + fo.close() + return name + +def get_pythonexe(): + pythonexe = sys.executable + if os.name in ['nt', 'dos']: + fdir, fn = os.path.split(pythonexe) + fn = fn.upper().replace('PYTHONW', 'PYTHON') + pythonexe = os.path.join(fdir, fn) + assert os.path.isfile(pythonexe), '%r is not a file' % (pythonexe,) + return pythonexe + +def find_executable(exe, path=None, _cache={}): + """Return full path of a executable or None. + + Symbolic links are not followed. + """ + key = exe, path + try: + return _cache[key] + except KeyError: + pass + log.debug('find_executable(%r)' % exe) + orig_exe = exe + + if path is None: + path = os.environ.get('PATH', os.defpath) + if os.name=='posix': + realpath = os.path.realpath + else: + realpath = lambda a:a + + if exe.startswith('"'): + exe = exe[1:-1] + + suffixes = [''] + if os.name in ['nt', 'dos', 'os2']: + fn, ext = os.path.splitext(exe) + extra_suffixes = ['.exe', '.com', '.bat'] + if ext.lower() not in extra_suffixes: + suffixes = extra_suffixes + + if os.path.isabs(exe): + paths = [''] + else: + paths = [ os.path.abspath(p) for p in path.split(os.pathsep) ] + + for path in paths: + fn = os.path.join(path, exe) + for s in suffixes: + f_ext = fn+s + if not os.path.islink(f_ext): + f_ext = realpath(f_ext) + if os.path.isfile(f_ext) and os.access(f_ext, os.X_OK): + log.info('Found executable %s' % f_ext) + _cache[key] = f_ext + return f_ext + + log.warn('Could not locate executable %s' % orig_exe) + return None + +############################################################ + +def _preserve_environment( names ): + log.debug('_preserve_environment(%r)' % (names)) + env = {name: os.environ.get(name) for name in names} + return env + +def _update_environment( **env ): + log.debug('_update_environment(...)') + for name, value in env.items(): + os.environ[name] = value or '' + +def exec_command(command, execute_in='', use_shell=None, use_tee=None, + _with_python = 1, **env ): + """ + Return (status,output) of executed command. + + .. deprecated:: 1.17 + Use subprocess.Popen instead + + Parameters + ---------- + command : str + A concatenated string of executable and arguments. + execute_in : str + Before running command ``cd execute_in`` and after ``cd -``. + use_shell : {bool, None}, optional + If True, execute ``sh -c command``. Default None (True) + use_tee : {bool, None}, optional + If True use tee. Default None (True) + + + Returns + ------- + res : str + Both stdout and stderr messages. + + Notes + ----- + On NT, DOS systems the returned status is correct for external commands. + Wild cards will not work for non-posix systems or when use_shell=0. + + """ + # 2019-01-30, 1.17 + warnings.warn('exec_command is deprecated since NumPy v1.17, use ' + 'subprocess.Popen instead', DeprecationWarning, stacklevel=1) + log.debug('exec_command(%r,%s)' % (command, + ','.join(['%s=%r'%kv for kv in env.items()]))) + + if use_tee is None: + use_tee = os.name=='posix' + if use_shell is None: + use_shell = os.name=='posix' + execute_in = os.path.abspath(execute_in) + oldcwd = os.path.abspath(os.getcwd()) + + if __name__[-12:] == 'exec_command': + exec_dir = os.path.dirname(os.path.abspath(__file__)) + elif os.path.isfile('exec_command.py'): + exec_dir = os.path.abspath('.') + else: + exec_dir = os.path.abspath(sys.argv[0]) + if os.path.isfile(exec_dir): + exec_dir = os.path.dirname(exec_dir) + + if oldcwd!=execute_in: + os.chdir(execute_in) + log.debug('New cwd: %s' % execute_in) + else: + log.debug('Retaining cwd: %s' % oldcwd) + + oldenv = _preserve_environment( list(env.keys()) ) + _update_environment( **env ) + + try: + st = _exec_command(command, + use_shell=use_shell, + use_tee=use_tee, + **env) + finally: + if oldcwd!=execute_in: + os.chdir(oldcwd) + log.debug('Restored cwd to %s' % oldcwd) + _update_environment(**oldenv) + + return st + + +def _exec_command(command, use_shell=None, use_tee = None, **env): + """ + Internal workhorse for exec_command(). + """ + if use_shell is None: + use_shell = os.name=='posix' + if use_tee is None: + use_tee = os.name=='posix' + + if os.name == 'posix' and use_shell: + # On POSIX, subprocess always uses /bin/sh, override + sh = os.environ.get('SHELL', '/bin/sh') + if is_sequence(command): + command = [sh, '-c', ' '.join(command)] + else: + command = [sh, '-c', command] + use_shell = False + + elif os.name == 'nt' and is_sequence(command): + # On Windows, join the string for CreateProcess() ourselves as + # subprocess does it a bit differently + command = ' '.join(_quote_arg(arg) for arg in command) + + # Inherit environment by default + env = env or None + try: + # universal_newlines is set to False so that communicate() + # will return bytes. We need to decode the output ourselves + # so that Python will not raise a UnicodeDecodeError when + # it encounters an invalid character; rather, we simply replace it + proc = subprocess.Popen(command, shell=use_shell, env=env, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=False) + except OSError: + # Return 127, as os.spawn*() and /bin/sh do + return 127, '' + + text, err = proc.communicate() + mylocale = locale.getpreferredencoding(False) + if mylocale is None: + mylocale = 'ascii' + text = text.decode(mylocale, errors='replace') + text = text.replace('\r\n', '\n') + # Another historical oddity + if text[-1:] == '\n': + text = text[:-1] + + if use_tee and text: + print(text) + return proc.returncode, text + + +def _quote_arg(arg): + """ + Quote the argument for safe use in a shell command line. + """ + # If there is a quote in the string, assume relevants parts of the + # string are already quoted (e.g. '-I"C:\\Program Files\\..."') + if '"' not in arg and ' ' in arg: + return '"%s"' % arg + return arg + +############################################################ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/extension.py b/.local/lib/python3.8/site-packages/numpy/distutils/extension.py new file mode 100644 index 0000000..3ede013 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/extension.py @@ -0,0 +1,107 @@ +"""distutils.extension + +Provides the Extension class, used to describe C/C++ extension +modules in setup scripts. + +Overridden to support f2py. + +""" +import re +from distutils.extension import Extension as old_Extension + + +cxx_ext_re = re.compile(r'.*\.(cpp|cxx|cc)\Z', re.I).match +fortran_pyf_ext_re = re.compile(r'.*\.(f90|f95|f77|for|ftn|f|pyf)\Z', re.I).match + + +class Extension(old_Extension): + """ + Parameters + ---------- + name : str + Extension name. + sources : list of str + List of source file locations relative to the top directory of + the package. + extra_compile_args : list of str + Extra command line arguments to pass to the compiler. + extra_f77_compile_args : list of str + Extra command line arguments to pass to the fortran77 compiler. + extra_f90_compile_args : list of str + Extra command line arguments to pass to the fortran90 compiler. + """ + def __init__( + self, name, sources, + include_dirs=None, + define_macros=None, + undef_macros=None, + library_dirs=None, + libraries=None, + runtime_library_dirs=None, + extra_objects=None, + extra_compile_args=None, + extra_link_args=None, + export_symbols=None, + swig_opts=None, + depends=None, + language=None, + f2py_options=None, + module_dirs=None, + extra_c_compile_args=None, + extra_cxx_compile_args=None, + extra_f77_compile_args=None, + extra_f90_compile_args=None,): + + old_Extension.__init__( + self, name, [], + include_dirs=include_dirs, + define_macros=define_macros, + undef_macros=undef_macros, + library_dirs=library_dirs, + libraries=libraries, + runtime_library_dirs=runtime_library_dirs, + extra_objects=extra_objects, + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, + export_symbols=export_symbols) + + # Avoid assert statements checking that sources contains strings: + self.sources = sources + + # Python 2.4 distutils new features + self.swig_opts = swig_opts or [] + # swig_opts is assumed to be a list. Here we handle the case where it + # is specified as a string instead. + if isinstance(self.swig_opts, str): + import warnings + msg = "swig_opts is specified as a string instead of a list" + warnings.warn(msg, SyntaxWarning, stacklevel=2) + self.swig_opts = self.swig_opts.split() + + # Python 2.3 distutils new features + self.depends = depends or [] + self.language = language + + # numpy_distutils features + self.f2py_options = f2py_options or [] + self.module_dirs = module_dirs or [] + self.extra_c_compile_args = extra_c_compile_args or [] + self.extra_cxx_compile_args = extra_cxx_compile_args or [] + self.extra_f77_compile_args = extra_f77_compile_args or [] + self.extra_f90_compile_args = extra_f90_compile_args or [] + + return + + def has_cxx_sources(self): + for source in self.sources: + if cxx_ext_re(str(source)): + return True + return False + + def has_f2py_sources(self): + for source in self.sources: + if fortran_pyf_ext_re(source): + return True + return False + +# class Extension diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__init__.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__init__.py new file mode 100644 index 0000000..d8dcfa8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__init__.py @@ -0,0 +1,1024 @@ +"""numpy.distutils.fcompiler + +Contains FCompiler, an abstract base class that defines the interface +for the numpy.distutils Fortran compiler abstraction model. + +Terminology: + +To be consistent, where the term 'executable' is used, it means the single +file, like 'gcc', that is executed, and should be a string. In contrast, +'command' means the entire command line, like ['gcc', '-c', 'file.c'], and +should be a list. + +But note that FCompiler.executables is actually a dictionary of commands. + +""" +__all__ = ['FCompiler', 'new_fcompiler', 'show_fcompilers', + 'dummy_fortran_file'] + +import os +import sys +import re + +from distutils.sysconfig import get_python_lib +from distutils.fancy_getopt import FancyGetopt +from distutils.errors import DistutilsModuleError, \ + DistutilsExecError, CompileError, LinkError, DistutilsPlatformError +from distutils.util import split_quoted, strtobool + +from numpy.distutils.ccompiler import CCompiler, gen_lib_options +from numpy.distutils import log +from numpy.distutils.misc_util import is_string, all_strings, is_sequence, \ + make_temp_file, get_shared_lib_extension +from numpy.distutils.exec_command import find_executable +from numpy.distutils import _shell_utils + +from .environment import EnvironmentConfig + +__metaclass__ = type + +class CompilerNotFound(Exception): + pass + +def flaglist(s): + if is_string(s): + return split_quoted(s) + else: + return s + +def str2bool(s): + if is_string(s): + return strtobool(s) + return bool(s) + +def is_sequence_of_strings(seq): + return is_sequence(seq) and all_strings(seq) + +class FCompiler(CCompiler): + """Abstract base class to define the interface that must be implemented + by real Fortran compiler classes. + + Methods that subclasses may redefine: + + update_executables(), find_executables(), get_version() + get_flags(), get_flags_opt(), get_flags_arch(), get_flags_debug() + get_flags_f77(), get_flags_opt_f77(), get_flags_arch_f77(), + get_flags_debug_f77(), get_flags_f90(), get_flags_opt_f90(), + get_flags_arch_f90(), get_flags_debug_f90(), + get_flags_fix(), get_flags_linker_so() + + DON'T call these methods (except get_version) after + constructing a compiler instance or inside any other method. + All methods, except update_executables() and find_executables(), + may call the get_version() method. + + After constructing a compiler instance, always call customize(dist=None) + method that finalizes compiler construction and makes the following + attributes available: + compiler_f77 + compiler_f90 + compiler_fix + linker_so + archiver + ranlib + libraries + library_dirs + """ + + # These are the environment variables and distutils keys used. + # Each configuration description is + # (, , , , ) + # The hook names are handled by the self._environment_hook method. + # - names starting with 'self.' call methods in this class + # - names starting with 'exe.' return the key in the executables dict + # - names like 'flags.YYY' return self.get_flag_YYY() + # convert is either None or a function to convert a string to the + # appropriate type used. + + distutils_vars = EnvironmentConfig( + distutils_section='config_fc', + noopt = (None, None, 'noopt', str2bool, False), + noarch = (None, None, 'noarch', str2bool, False), + debug = (None, None, 'debug', str2bool, False), + verbose = (None, None, 'verbose', str2bool, False), + ) + + command_vars = EnvironmentConfig( + distutils_section='config_fc', + compiler_f77 = ('exe.compiler_f77', 'F77', 'f77exec', None, False), + compiler_f90 = ('exe.compiler_f90', 'F90', 'f90exec', None, False), + compiler_fix = ('exe.compiler_fix', 'F90', 'f90exec', None, False), + version_cmd = ('exe.version_cmd', None, None, None, False), + linker_so = ('exe.linker_so', 'LDSHARED', 'ldshared', None, False), + linker_exe = ('exe.linker_exe', 'LD', 'ld', None, False), + archiver = (None, 'AR', 'ar', None, False), + ranlib = (None, 'RANLIB', 'ranlib', None, False), + ) + + flag_vars = EnvironmentConfig( + distutils_section='config_fc', + f77 = ('flags.f77', 'F77FLAGS', 'f77flags', flaglist, True), + f90 = ('flags.f90', 'F90FLAGS', 'f90flags', flaglist, True), + free = ('flags.free', 'FREEFLAGS', 'freeflags', flaglist, True), + fix = ('flags.fix', None, None, flaglist, False), + opt = ('flags.opt', 'FOPT', 'opt', flaglist, True), + opt_f77 = ('flags.opt_f77', None, None, flaglist, False), + opt_f90 = ('flags.opt_f90', None, None, flaglist, False), + arch = ('flags.arch', 'FARCH', 'arch', flaglist, False), + arch_f77 = ('flags.arch_f77', None, None, flaglist, False), + arch_f90 = ('flags.arch_f90', None, None, flaglist, False), + debug = ('flags.debug', 'FDEBUG', 'fdebug', flaglist, True), + debug_f77 = ('flags.debug_f77', None, None, flaglist, False), + debug_f90 = ('flags.debug_f90', None, None, flaglist, False), + flags = ('self.get_flags', 'FFLAGS', 'fflags', flaglist, True), + linker_so = ('flags.linker_so', 'LDFLAGS', 'ldflags', flaglist, True), + linker_exe = ('flags.linker_exe', 'LDFLAGS', 'ldflags', flaglist, True), + ar = ('flags.ar', 'ARFLAGS', 'arflags', flaglist, True), + ) + + language_map = {'.f': 'f77', + '.for': 'f77', + '.F': 'f77', # XXX: needs preprocessor + '.ftn': 'f77', + '.f77': 'f77', + '.f90': 'f90', + '.F90': 'f90', # XXX: needs preprocessor + '.f95': 'f90', + } + language_order = ['f90', 'f77'] + + + # These will be set by the subclass + + compiler_type = None + compiler_aliases = () + version_pattern = None + + possible_executables = [] + executables = { + 'version_cmd': ["f77", "-v"], + 'compiler_f77': ["f77"], + 'compiler_f90': ["f90"], + 'compiler_fix': ["f90", "-fixed"], + 'linker_so': ["f90", "-shared"], + 'linker_exe': ["f90"], + 'archiver': ["ar", "-cr"], + 'ranlib': None, + } + + # If compiler does not support compiling Fortran 90 then it can + # suggest using another compiler. For example, gnu would suggest + # gnu95 compiler type when there are F90 sources. + suggested_f90_compiler = None + + compile_switch = "-c" + object_switch = "-o " # Ending space matters! It will be stripped + # but if it is missing then object_switch + # will be prefixed to object file name by + # string concatenation. + library_switch = "-o " # Ditto! + + # Switch to specify where module files are created and searched + # for USE statement. Normally it is a string and also here ending + # space matters. See above. + module_dir_switch = None + + # Switch to specify where module files are searched for USE statement. + module_include_switch = '-I' + + pic_flags = [] # Flags to create position-independent code + + src_extensions = ['.for', '.ftn', '.f77', '.f', '.f90', '.f95', '.F', '.F90', '.FOR'] + obj_extension = ".o" + + shared_lib_extension = get_shared_lib_extension() + static_lib_extension = ".a" # or .lib + static_lib_format = "lib%s%s" # or %s%s + shared_lib_format = "%s%s" + exe_extension = "" + + _exe_cache = {} + + _executable_keys = ['version_cmd', 'compiler_f77', 'compiler_f90', + 'compiler_fix', 'linker_so', 'linker_exe', 'archiver', + 'ranlib'] + + # This will be set by new_fcompiler when called in + # command/{build_ext.py, build_clib.py, config.py} files. + c_compiler = None + + # extra_{f77,f90}_compile_args are set by build_ext.build_extension method + extra_f77_compile_args = [] + extra_f90_compile_args = [] + + def __init__(self, *args, **kw): + CCompiler.__init__(self, *args, **kw) + self.distutils_vars = self.distutils_vars.clone(self._environment_hook) + self.command_vars = self.command_vars.clone(self._environment_hook) + self.flag_vars = self.flag_vars.clone(self._environment_hook) + self.executables = self.executables.copy() + for e in self._executable_keys: + if e not in self.executables: + self.executables[e] = None + + # Some methods depend on .customize() being called first, so + # this keeps track of whether that's happened yet. + self._is_customised = False + + def __copy__(self): + obj = self.__new__(self.__class__) + obj.__dict__.update(self.__dict__) + obj.distutils_vars = obj.distutils_vars.clone(obj._environment_hook) + obj.command_vars = obj.command_vars.clone(obj._environment_hook) + obj.flag_vars = obj.flag_vars.clone(obj._environment_hook) + obj.executables = obj.executables.copy() + return obj + + def copy(self): + return self.__copy__() + + # Use properties for the attributes used by CCompiler. Setting them + # as attributes from the self.executables dictionary is error-prone, + # so we get them from there each time. + def _command_property(key): + def fget(self): + assert self._is_customised + return self.executables[key] + return property(fget=fget) + version_cmd = _command_property('version_cmd') + compiler_f77 = _command_property('compiler_f77') + compiler_f90 = _command_property('compiler_f90') + compiler_fix = _command_property('compiler_fix') + linker_so = _command_property('linker_so') + linker_exe = _command_property('linker_exe') + archiver = _command_property('archiver') + ranlib = _command_property('ranlib') + + # Make our terminology consistent. + def set_executable(self, key, value): + self.set_command(key, value) + + def set_commands(self, **kw): + for k, v in kw.items(): + self.set_command(k, v) + + def set_command(self, key, value): + if not key in self._executable_keys: + raise ValueError( + "unknown executable '%s' for class %s" % + (key, self.__class__.__name__)) + if is_string(value): + value = split_quoted(value) + assert value is None or is_sequence_of_strings(value[1:]), (key, value) + self.executables[key] = value + + ###################################################################### + ## Methods that subclasses may redefine. But don't call these methods! + ## They are private to FCompiler class and may return unexpected + ## results if used elsewhere. So, you have been warned.. + + def find_executables(self): + """Go through the self.executables dictionary, and attempt to + find and assign appropriate executables. + + Executable names are looked for in the environment (environment + variables, the distutils.cfg, and command line), the 0th-element of + the command list, and the self.possible_executables list. + + Also, if the 0th element is "" or "", the Fortran 77 + or the Fortran 90 compiler executable is used, unless overridden + by an environment setting. + + Subclasses should call this if overridden. + """ + assert self._is_customised + exe_cache = self._exe_cache + def cached_find_executable(exe): + if exe in exe_cache: + return exe_cache[exe] + fc_exe = find_executable(exe) + exe_cache[exe] = exe_cache[fc_exe] = fc_exe + return fc_exe + def verify_command_form(name, value): + if value is not None and not is_sequence_of_strings(value): + raise ValueError( + "%s value %r is invalid in class %s" % + (name, value, self.__class__.__name__)) + def set_exe(exe_key, f77=None, f90=None): + cmd = self.executables.get(exe_key, None) + if not cmd: + return None + # Note that we get cmd[0] here if the environment doesn't + # have anything set + exe_from_environ = getattr(self.command_vars, exe_key) + if not exe_from_environ: + possibles = [f90, f77] + self.possible_executables + else: + possibles = [exe_from_environ] + self.possible_executables + + seen = set() + unique_possibles = [] + for e in possibles: + if e == '': + e = f77 + elif e == '': + e = f90 + if not e or e in seen: + continue + seen.add(e) + unique_possibles.append(e) + + for exe in unique_possibles: + fc_exe = cached_find_executable(exe) + if fc_exe: + cmd[0] = fc_exe + return fc_exe + self.set_command(exe_key, None) + return None + + ctype = self.compiler_type + f90 = set_exe('compiler_f90') + if not f90: + f77 = set_exe('compiler_f77') + if f77: + log.warn('%s: no Fortran 90 compiler found' % ctype) + else: + raise CompilerNotFound('%s: f90 nor f77' % ctype) + else: + f77 = set_exe('compiler_f77', f90=f90) + if not f77: + log.warn('%s: no Fortran 77 compiler found' % ctype) + set_exe('compiler_fix', f90=f90) + + set_exe('linker_so', f77=f77, f90=f90) + set_exe('linker_exe', f77=f77, f90=f90) + set_exe('version_cmd', f77=f77, f90=f90) + set_exe('archiver') + set_exe('ranlib') + + def update_executables(self): + """Called at the beginning of customisation. Subclasses should + override this if they need to set up the executables dictionary. + + Note that self.find_executables() is run afterwards, so the + self.executables dictionary values can contain or as + the command, which will be replaced by the found F77 or F90 + compiler. + """ + pass + + def get_flags(self): + """List of flags common to all compiler types.""" + return [] + self.pic_flags + + def _get_command_flags(self, key): + cmd = self.executables.get(key, None) + if cmd is None: + return [] + return cmd[1:] + + def get_flags_f77(self): + """List of Fortran 77 specific flags.""" + return self._get_command_flags('compiler_f77') + def get_flags_f90(self): + """List of Fortran 90 specific flags.""" + return self._get_command_flags('compiler_f90') + def get_flags_free(self): + """List of Fortran 90 free format specific flags.""" + return [] + def get_flags_fix(self): + """List of Fortran 90 fixed format specific flags.""" + return self._get_command_flags('compiler_fix') + def get_flags_linker_so(self): + """List of linker flags to build a shared library.""" + return self._get_command_flags('linker_so') + def get_flags_linker_exe(self): + """List of linker flags to build an executable.""" + return self._get_command_flags('linker_exe') + def get_flags_ar(self): + """List of archiver flags. """ + return self._get_command_flags('archiver') + def get_flags_opt(self): + """List of architecture independent compiler flags.""" + return [] + def get_flags_arch(self): + """List of architecture dependent compiler flags.""" + return [] + def get_flags_debug(self): + """List of compiler flags to compile with debugging information.""" + return [] + + get_flags_opt_f77 = get_flags_opt_f90 = get_flags_opt + get_flags_arch_f77 = get_flags_arch_f90 = get_flags_arch + get_flags_debug_f77 = get_flags_debug_f90 = get_flags_debug + + def get_libraries(self): + """List of compiler libraries.""" + return self.libraries[:] + def get_library_dirs(self): + """List of compiler library directories.""" + return self.library_dirs[:] + + def get_version(self, force=False, ok_status=[0]): + assert self._is_customised + version = CCompiler.get_version(self, force=force, ok_status=ok_status) + if version is None: + raise CompilerNotFound() + return version + + + ############################################################ + + ## Public methods: + + def customize(self, dist = None): + """Customize Fortran compiler. + + This method gets Fortran compiler specific information from + (i) class definition, (ii) environment, (iii) distutils config + files, and (iv) command line (later overrides earlier). + + This method should be always called after constructing a + compiler instance. But not in __init__ because Distribution + instance is needed for (iii) and (iv). + """ + log.info('customize %s' % (self.__class__.__name__)) + + self._is_customised = True + + self.distutils_vars.use_distribution(dist) + self.command_vars.use_distribution(dist) + self.flag_vars.use_distribution(dist) + + self.update_executables() + + # find_executables takes care of setting the compiler commands, + # version_cmd, linker_so, linker_exe, ar, and ranlib + self.find_executables() + + noopt = self.distutils_vars.get('noopt', False) + noarch = self.distutils_vars.get('noarch', noopt) + debug = self.distutils_vars.get('debug', False) + + f77 = self.command_vars.compiler_f77 + f90 = self.command_vars.compiler_f90 + + f77flags = [] + f90flags = [] + freeflags = [] + fixflags = [] + + if f77: + f77 = _shell_utils.NativeParser.split(f77) + f77flags = self.flag_vars.f77 + if f90: + f90 = _shell_utils.NativeParser.split(f90) + f90flags = self.flag_vars.f90 + freeflags = self.flag_vars.free + # XXX Assuming that free format is default for f90 compiler. + fix = self.command_vars.compiler_fix + # NOTE: this and similar examples are probably just + # excluding --coverage flag when F90 = gfortran --coverage + # instead of putting that flag somewhere more appropriate + # this and similar examples where a Fortran compiler + # environment variable has been customized by CI or a user + # should perhaps eventually be more thoroughly tested and more + # robustly handled + if fix: + fix = _shell_utils.NativeParser.split(fix) + fixflags = self.flag_vars.fix + f90flags + + oflags, aflags, dflags = [], [], [] + # examine get_flags__ for extra flags + # only add them if the method is different from get_flags_ + def get_flags(tag, flags): + # note that self.flag_vars. calls self.get_flags_() + flags.extend(getattr(self.flag_vars, tag)) + this_get = getattr(self, 'get_flags_' + tag) + for name, c, flagvar in [('f77', f77, f77flags), + ('f90', f90, f90flags), + ('f90', fix, fixflags)]: + t = '%s_%s' % (tag, name) + if c and this_get is not getattr(self, 'get_flags_' + t): + flagvar.extend(getattr(self.flag_vars, t)) + if not noopt: + get_flags('opt', oflags) + if not noarch: + get_flags('arch', aflags) + if debug: + get_flags('debug', dflags) + + fflags = self.flag_vars.flags + dflags + oflags + aflags + + if f77: + self.set_commands(compiler_f77=f77+f77flags+fflags) + if f90: + self.set_commands(compiler_f90=f90+freeflags+f90flags+fflags) + if fix: + self.set_commands(compiler_fix=fix+fixflags+fflags) + + + #XXX: Do we need LDSHARED->SOSHARED, LDFLAGS->SOFLAGS + linker_so = self.linker_so + if linker_so: + linker_so_flags = self.flag_vars.linker_so + if sys.platform.startswith('aix'): + python_lib = get_python_lib(standard_lib=1) + ld_so_aix = os.path.join(python_lib, 'config', 'ld_so_aix') + python_exp = os.path.join(python_lib, 'config', 'python.exp') + linker_so = [ld_so_aix] + linker_so + ['-bI:'+python_exp] + self.set_commands(linker_so=linker_so+linker_so_flags) + + linker_exe = self.linker_exe + if linker_exe: + linker_exe_flags = self.flag_vars.linker_exe + self.set_commands(linker_exe=linker_exe+linker_exe_flags) + + ar = self.command_vars.archiver + if ar: + arflags = self.flag_vars.ar + self.set_commands(archiver=[ar]+arflags) + + self.set_library_dirs(self.get_library_dirs()) + self.set_libraries(self.get_libraries()) + + def dump_properties(self): + """Print out the attributes of a compiler instance.""" + props = [] + for key in list(self.executables.keys()) + \ + ['version', 'libraries', 'library_dirs', + 'object_switch', 'compile_switch']: + if hasattr(self, key): + v = getattr(self, key) + props.append((key, None, '= '+repr(v))) + props.sort() + + pretty_printer = FancyGetopt(props) + for l in pretty_printer.generate_help("%s instance properties:" \ + % (self.__class__.__name__)): + if l[:4]==' --': + l = ' ' + l[4:] + print(l) + + ################### + + def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts): + """Compile 'src' to product 'obj'.""" + src_flags = {} + if is_f_file(src) and not has_f90_header(src): + flavor = ':f77' + compiler = self.compiler_f77 + src_flags = get_f77flags(src) + extra_compile_args = self.extra_f77_compile_args or [] + elif is_free_format(src): + flavor = ':f90' + compiler = self.compiler_f90 + if compiler is None: + raise DistutilsExecError('f90 not supported by %s needed for %s'\ + % (self.__class__.__name__, src)) + extra_compile_args = self.extra_f90_compile_args or [] + else: + flavor = ':fix' + compiler = self.compiler_fix + if compiler is None: + raise DistutilsExecError('f90 (fixed) not supported by %s needed for %s'\ + % (self.__class__.__name__, src)) + extra_compile_args = self.extra_f90_compile_args or [] + if self.object_switch[-1]==' ': + o_args = [self.object_switch.strip(), obj] + else: + o_args = [self.object_switch.strip()+obj] + + assert self.compile_switch.strip() + s_args = [self.compile_switch, src] + + if extra_compile_args: + log.info('extra %s options: %r' \ + % (flavor[1:], ' '.join(extra_compile_args))) + + extra_flags = src_flags.get(self.compiler_type, []) + if extra_flags: + log.info('using compile options from source: %r' \ + % ' '.join(extra_flags)) + + command = compiler + cc_args + extra_flags + s_args + o_args \ + + extra_postargs + extra_compile_args + + display = '%s: %s' % (os.path.basename(compiler[0]) + flavor, + src) + try: + self.spawn(command, display=display) + except DistutilsExecError as e: + msg = str(e) + raise CompileError(msg) from None + + def module_options(self, module_dirs, module_build_dir): + options = [] + if self.module_dir_switch is not None: + if self.module_dir_switch[-1]==' ': + options.extend([self.module_dir_switch.strip(), module_build_dir]) + else: + options.append(self.module_dir_switch.strip()+module_build_dir) + else: + print('XXX: module_build_dir=%r option ignored' % (module_build_dir)) + print('XXX: Fix module_dir_switch for ', self.__class__.__name__) + if self.module_include_switch is not None: + for d in [module_build_dir]+module_dirs: + options.append('%s%s' % (self.module_include_switch, d)) + else: + print('XXX: module_dirs=%r option ignored' % (module_dirs)) + print('XXX: Fix module_include_switch for ', self.__class__.__name__) + return options + + def library_option(self, lib): + return "-l" + lib + def library_dir_option(self, dir): + return "-L" + dir + + def link(self, target_desc, objects, + output_filename, output_dir=None, libraries=None, + library_dirs=None, runtime_library_dirs=None, + export_symbols=None, debug=0, extra_preargs=None, + extra_postargs=None, build_temp=None, target_lang=None): + objects, output_dir = self._fix_object_args(objects, output_dir) + libraries, library_dirs, runtime_library_dirs = \ + self._fix_lib_args(libraries, library_dirs, runtime_library_dirs) + + lib_opts = gen_lib_options(self, library_dirs, runtime_library_dirs, + libraries) + if is_string(output_dir): + output_filename = os.path.join(output_dir, output_filename) + elif output_dir is not None: + raise TypeError("'output_dir' must be a string or None") + + if self._need_link(objects, output_filename): + if self.library_switch[-1]==' ': + o_args = [self.library_switch.strip(), output_filename] + else: + o_args = [self.library_switch.strip()+output_filename] + + if is_string(self.objects): + ld_args = objects + [self.objects] + else: + ld_args = objects + self.objects + ld_args = ld_args + lib_opts + o_args + if debug: + ld_args[:0] = ['-g'] + if extra_preargs: + ld_args[:0] = extra_preargs + if extra_postargs: + ld_args.extend(extra_postargs) + self.mkpath(os.path.dirname(output_filename)) + if target_desc == CCompiler.EXECUTABLE: + linker = self.linker_exe[:] + else: + linker = self.linker_so[:] + command = linker + ld_args + try: + self.spawn(command) + except DistutilsExecError as e: + msg = str(e) + raise LinkError(msg) from None + else: + log.debug("skipping %s (up-to-date)", output_filename) + + def _environment_hook(self, name, hook_name): + if hook_name is None: + return None + if is_string(hook_name): + if hook_name.startswith('self.'): + hook_name = hook_name[5:] + hook = getattr(self, hook_name) + return hook() + elif hook_name.startswith('exe.'): + hook_name = hook_name[4:] + var = self.executables[hook_name] + if var: + return var[0] + else: + return None + elif hook_name.startswith('flags.'): + hook_name = hook_name[6:] + hook = getattr(self, 'get_flags_' + hook_name) + return hook() + else: + return hook_name() + + def can_ccompiler_link(self, ccompiler): + """ + Check if the given C compiler can link objects produced by + this compiler. + """ + return True + + def wrap_unlinkable_objects(self, objects, output_dir, extra_dll_dir): + """ + Convert a set of object files that are not compatible with the default + linker, to a file that is compatible. + + Parameters + ---------- + objects : list + List of object files to include. + output_dir : str + Output directory to place generated object files. + extra_dll_dir : str + Output directory to place extra DLL files that need to be + included on Windows. + + Returns + ------- + converted_objects : list of str + List of converted object files. + Note that the number of output files is not necessarily + the same as inputs. + + """ + raise NotImplementedError() + + ## class FCompiler + +_default_compilers = ( + # sys.platform mappings + ('win32', ('gnu', 'intelv', 'absoft', 'compaqv', 'intelev', 'gnu95', 'g95', + 'intelvem', 'intelem', 'flang')), + ('cygwin.*', ('gnu', 'intelv', 'absoft', 'compaqv', 'intelev', 'gnu95', 'g95')), + ('linux.*', ('arm', 'gnu95', 'intel', 'lahey', 'pg', 'nv', 'absoft', 'nag', + 'vast', 'compaq', 'intele', 'intelem', 'gnu', 'g95', + 'pathf95', 'nagfor', 'fujitsu')), + ('darwin.*', ('gnu95', 'nag', 'nagfor', 'absoft', 'ibm', 'intel', 'gnu', + 'g95', 'pg')), + ('sunos.*', ('sun', 'gnu', 'gnu95', 'g95')), + ('irix.*', ('mips', 'gnu', 'gnu95',)), + ('aix.*', ('ibm', 'gnu', 'gnu95',)), + # os.name mappings + ('posix', ('gnu', 'gnu95',)), + ('nt', ('gnu', 'gnu95',)), + ('mac', ('gnu95', 'gnu', 'pg')), + ) + +fcompiler_class = None +fcompiler_aliases = None + +def load_all_fcompiler_classes(): + """Cache all the FCompiler classes found in modules in the + numpy.distutils.fcompiler package. + """ + from glob import glob + global fcompiler_class, fcompiler_aliases + if fcompiler_class is not None: + return + pys = os.path.join(os.path.dirname(__file__), '*.py') + fcompiler_class = {} + fcompiler_aliases = {} + for fname in glob(pys): + module_name, ext = os.path.splitext(os.path.basename(fname)) + module_name = 'numpy.distutils.fcompiler.' + module_name + __import__ (module_name) + module = sys.modules[module_name] + if hasattr(module, 'compilers'): + for cname in module.compilers: + klass = getattr(module, cname) + desc = (klass.compiler_type, klass, klass.description) + fcompiler_class[klass.compiler_type] = desc + for alias in klass.compiler_aliases: + if alias in fcompiler_aliases: + raise ValueError("alias %r defined for both %s and %s" + % (alias, klass.__name__, + fcompiler_aliases[alias][1].__name__)) + fcompiler_aliases[alias] = desc + +def _find_existing_fcompiler(compiler_types, + osname=None, platform=None, + requiref90=False, + c_compiler=None): + from numpy.distutils.core import get_distribution + dist = get_distribution(always=True) + for compiler_type in compiler_types: + v = None + try: + c = new_fcompiler(plat=platform, compiler=compiler_type, + c_compiler=c_compiler) + c.customize(dist) + v = c.get_version() + if requiref90 and c.compiler_f90 is None: + v = None + new_compiler = c.suggested_f90_compiler + if new_compiler: + log.warn('Trying %r compiler as suggested by %r ' + 'compiler for f90 support.' % (compiler_type, + new_compiler)) + c = new_fcompiler(plat=platform, compiler=new_compiler, + c_compiler=c_compiler) + c.customize(dist) + v = c.get_version() + if v is not None: + compiler_type = new_compiler + if requiref90 and c.compiler_f90 is None: + raise ValueError('%s does not support compiling f90 codes, ' + 'skipping.' % (c.__class__.__name__)) + except DistutilsModuleError: + log.debug("_find_existing_fcompiler: compiler_type='%s' raised DistutilsModuleError", compiler_type) + except CompilerNotFound: + log.debug("_find_existing_fcompiler: compiler_type='%s' not found", compiler_type) + if v is not None: + return compiler_type + return None + +def available_fcompilers_for_platform(osname=None, platform=None): + if osname is None: + osname = os.name + if platform is None: + platform = sys.platform + matching_compiler_types = [] + for pattern, compiler_type in _default_compilers: + if re.match(pattern, platform) or re.match(pattern, osname): + for ct in compiler_type: + if ct not in matching_compiler_types: + matching_compiler_types.append(ct) + if not matching_compiler_types: + matching_compiler_types.append('gnu') + return matching_compiler_types + +def get_default_fcompiler(osname=None, platform=None, requiref90=False, + c_compiler=None): + """Determine the default Fortran compiler to use for the given + platform.""" + matching_compiler_types = available_fcompilers_for_platform(osname, + platform) + log.info("get_default_fcompiler: matching types: '%s'", + matching_compiler_types) + compiler_type = _find_existing_fcompiler(matching_compiler_types, + osname=osname, + platform=platform, + requiref90=requiref90, + c_compiler=c_compiler) + return compiler_type + +# Flag to avoid rechecking for Fortran compiler every time +failed_fcompilers = set() + +def new_fcompiler(plat=None, + compiler=None, + verbose=0, + dry_run=0, + force=0, + requiref90=False, + c_compiler = None): + """Generate an instance of some FCompiler subclass for the supplied + platform/compiler combination. + """ + global failed_fcompilers + fcompiler_key = (plat, compiler) + if fcompiler_key in failed_fcompilers: + return None + + load_all_fcompiler_classes() + if plat is None: + plat = os.name + if compiler is None: + compiler = get_default_fcompiler(plat, requiref90=requiref90, + c_compiler=c_compiler) + if compiler in fcompiler_class: + module_name, klass, long_description = fcompiler_class[compiler] + elif compiler in fcompiler_aliases: + module_name, klass, long_description = fcompiler_aliases[compiler] + else: + msg = "don't know how to compile Fortran code on platform '%s'" % plat + if compiler is not None: + msg = msg + " with '%s' compiler." % compiler + msg = msg + " Supported compilers are: %s)" \ + % (','.join(fcompiler_class.keys())) + log.warn(msg) + failed_fcompilers.add(fcompiler_key) + return None + + compiler = klass(verbose=verbose, dry_run=dry_run, force=force) + compiler.c_compiler = c_compiler + return compiler + +def show_fcompilers(dist=None): + """Print list of available compilers (used by the "--help-fcompiler" + option to "config_fc"). + """ + if dist is None: + from distutils.dist import Distribution + from numpy.distutils.command.config_compiler import config_fc + dist = Distribution() + dist.script_name = os.path.basename(sys.argv[0]) + dist.script_args = ['config_fc'] + sys.argv[1:] + try: + dist.script_args.remove('--help-fcompiler') + except ValueError: + pass + dist.cmdclass['config_fc'] = config_fc + dist.parse_config_files() + dist.parse_command_line() + compilers = [] + compilers_na = [] + compilers_ni = [] + if not fcompiler_class: + load_all_fcompiler_classes() + platform_compilers = available_fcompilers_for_platform() + for compiler in platform_compilers: + v = None + log.set_verbosity(-2) + try: + c = new_fcompiler(compiler=compiler, verbose=dist.verbose) + c.customize(dist) + v = c.get_version() + except (DistutilsModuleError, CompilerNotFound) as e: + log.debug("show_fcompilers: %s not found" % (compiler,)) + log.debug(repr(e)) + + if v is None: + compilers_na.append(("fcompiler="+compiler, None, + fcompiler_class[compiler][2])) + else: + c.dump_properties() + compilers.append(("fcompiler="+compiler, None, + fcompiler_class[compiler][2] + ' (%s)' % v)) + + compilers_ni = list(set(fcompiler_class.keys()) - set(platform_compilers)) + compilers_ni = [("fcompiler="+fc, None, fcompiler_class[fc][2]) + for fc in compilers_ni] + + compilers.sort() + compilers_na.sort() + compilers_ni.sort() + pretty_printer = FancyGetopt(compilers) + pretty_printer.print_help("Fortran compilers found:") + pretty_printer = FancyGetopt(compilers_na) + pretty_printer.print_help("Compilers available for this " + "platform, but not found:") + if compilers_ni: + pretty_printer = FancyGetopt(compilers_ni) + pretty_printer.print_help("Compilers not available on this platform:") + print("For compiler details, run 'config_fc --verbose' setup command.") + + +def dummy_fortran_file(): + fo, name = make_temp_file(suffix='.f') + fo.write(" subroutine dummy()\n end\n") + fo.close() + return name[:-2] + + +is_f_file = re.compile(r'.*\.(for|ftn|f77|f)\Z', re.I).match +_has_f_header = re.compile(r'-\*-\s*fortran\s*-\*-', re.I).search +_has_f90_header = re.compile(r'-\*-\s*f90\s*-\*-', re.I).search +_has_fix_header = re.compile(r'-\*-\s*fix\s*-\*-', re.I).search +_free_f90_start = re.compile(r'[^c*!]\s*[^\s\d\t]', re.I).match + +def is_free_format(file): + """Check if file is in free format Fortran.""" + # f90 allows both fixed and free format, assuming fixed unless + # signs of free format are detected. + result = 0 + with open(file, encoding='latin1') as f: + line = f.readline() + n = 10000 # the number of non-comment lines to scan for hints + if _has_f_header(line) or _has_fix_header(line): + n = 0 + elif _has_f90_header(line): + n = 0 + result = 1 + while n>0 and line: + line = line.rstrip() + if line and line[0]!='!': + n -= 1 + if (line[0]!='\t' and _free_f90_start(line[:5])) or line[-1:]=='&': + result = 1 + break + line = f.readline() + return result + +def has_f90_header(src): + with open(src, encoding='latin1') as f: + line = f.readline() + return _has_f90_header(line) or _has_fix_header(line) + +_f77flags_re = re.compile(r'(c|)f77flags\s*\(\s*(?P\w+)\s*\)\s*=\s*(?P.*)', re.I) +def get_f77flags(src): + """ + Search the first 20 lines of fortran 77 code for line pattern + `CF77FLAGS()=` + Return a dictionary {:}. + """ + flags = {} + with open(src, encoding='latin1') as f: + i = 0 + for line in f: + i += 1 + if i>20: break + m = _f77flags_re.match(line) + if not m: continue + fcname = m.group('fcname').strip() + fflags = m.group('fflags').strip() + flags[fcname] = split_quoted(fflags) + return flags + +# TODO: implement get_f90flags and use it in _compile similarly to get_f77flags + +if __name__ == '__main__': + show_fcompilers() diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..0ed9541 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/absoft.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/absoft.cpython-38.pyc new file mode 100644 index 0000000..a2f2768 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/absoft.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/arm.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/arm.cpython-38.pyc new file mode 100644 index 0000000..c246a53 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/arm.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/compaq.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/compaq.cpython-38.pyc new file mode 100644 index 0000000..7d69829 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/compaq.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/environment.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/environment.cpython-38.pyc new file mode 100644 index 0000000..ed93de1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/environment.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/fujitsu.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/fujitsu.cpython-38.pyc new file mode 100644 index 0000000..e226e63 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/fujitsu.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/g95.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/g95.cpython-38.pyc new file mode 100644 index 0000000..f024aec Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/g95.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/gnu.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/gnu.cpython-38.pyc new file mode 100644 index 0000000..1014d52 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/gnu.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/hpux.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/hpux.cpython-38.pyc new file mode 100644 index 0000000..47af092 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/hpux.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/ibm.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/ibm.cpython-38.pyc new file mode 100644 index 0000000..7806f04 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/ibm.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/intel.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/intel.cpython-38.pyc new file mode 100644 index 0000000..79cf077 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/intel.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/lahey.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/lahey.cpython-38.pyc new file mode 100644 index 0000000..1de1a28 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/lahey.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/mips.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/mips.cpython-38.pyc new file mode 100644 index 0000000..eba8d01 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/mips.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/nag.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/nag.cpython-38.pyc new file mode 100644 index 0000000..de10483 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/nag.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/none.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/none.cpython-38.pyc new file mode 100644 index 0000000..c91b823 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/none.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/nv.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/nv.cpython-38.pyc new file mode 100644 index 0000000..43d24eb Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/nv.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/pathf95.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/pathf95.cpython-38.pyc new file mode 100644 index 0000000..552892d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/pathf95.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/pg.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/pg.cpython-38.pyc new file mode 100644 index 0000000..c08fba8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/pg.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/sun.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/sun.cpython-38.pyc new file mode 100644 index 0000000..672ed40 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/sun.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/vast.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/vast.cpython-38.pyc new file mode 100644 index 0000000..d12dd3e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/__pycache__/vast.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/absoft.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/absoft.py new file mode 100644 index 0000000..efe3a4c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/absoft.py @@ -0,0 +1,156 @@ + +# http://www.absoft.com/literature/osxuserguide.pdf +# http://www.absoft.com/documentation.html + +# Notes: +# - when using -g77 then use -DUNDERSCORE_G77 to compile f2py +# generated extension modules (works for f2py v2.45.241_1936 and up) +import os + +from numpy.distutils.cpuinfo import cpu +from numpy.distutils.fcompiler import FCompiler, dummy_fortran_file +from numpy.distutils.misc_util import cyg2win32 + +compilers = ['AbsoftFCompiler'] + +class AbsoftFCompiler(FCompiler): + + compiler_type = 'absoft' + description = 'Absoft Corp Fortran Compiler' + #version_pattern = r'FORTRAN 77 Compiler (?P[^\s*,]*).*?Absoft Corp' + version_pattern = r'(f90:.*?(Absoft Pro FORTRAN Version|FORTRAN 77 Compiler|Absoft Fortran Compiler Version|Copyright Absoft Corporation.*?Version))'+\ + r' (?P[^\s*,]*)(.*?Absoft Corp|)' + + # on windows: f90 -V -c dummy.f + # f90: Copyright Absoft Corporation 1994-1998 mV2; Cray Research, Inc. 1994-1996 CF90 (2.x.x.x f36t87) Version 2.3 Wed Apr 19, 2006 13:05:16 + + # samt5735(8)$ f90 -V -c dummy.f + # f90: Copyright Absoft Corporation 1994-2002; Absoft Pro FORTRAN Version 8.0 + # Note that fink installs g77 as f77, so need to use f90 for detection. + + executables = { + 'version_cmd' : None, # set by update_executables + 'compiler_f77' : ["f77"], + 'compiler_fix' : ["f90"], + 'compiler_f90' : ["f90"], + 'linker_so' : [""], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + + if os.name=='nt': + library_switch = '/out:' #No space after /out:! + + module_dir_switch = None + module_include_switch = '-p' + + def update_executables(self): + f = cyg2win32(dummy_fortran_file()) + self.executables['version_cmd'] = ['', '-V', '-c', + f+'.f', '-o', f+'.o'] + + def get_flags_linker_so(self): + if os.name=='nt': + opt = ['/dll'] + # The "-K shared" switches are being left in for pre-9.0 versions + # of Absoft though I don't think versions earlier than 9 can + # actually be used to build shared libraries. In fact, version + # 8 of Absoft doesn't recognize "-K shared" and will fail. + elif self.get_version() >= '9.0': + opt = ['-shared'] + else: + opt = ["-K", "shared"] + return opt + + def library_dir_option(self, dir): + if os.name=='nt': + return ['-link', '/PATH:%s' % (dir)] + return "-L" + dir + + def library_option(self, lib): + if os.name=='nt': + return '%s.lib' % (lib) + return "-l" + lib + + def get_library_dirs(self): + opt = FCompiler.get_library_dirs(self) + d = os.environ.get('ABSOFT') + if d: + if self.get_version() >= '10.0': + # use shared libraries, the static libraries were not compiled -fPIC + prefix = 'sh' + else: + prefix = '' + if cpu.is_64bit(): + suffix = '64' + else: + suffix = '' + opt.append(os.path.join(d, '%slib%s' % (prefix, suffix))) + return opt + + def get_libraries(self): + opt = FCompiler.get_libraries(self) + if self.get_version() >= '11.0': + opt.extend(['af90math', 'afio', 'af77math', 'amisc']) + elif self.get_version() >= '10.0': + opt.extend(['af90math', 'afio', 'af77math', 'U77']) + elif self.get_version() >= '8.0': + opt.extend(['f90math', 'fio', 'f77math', 'U77']) + else: + opt.extend(['fio', 'f90math', 'fmath', 'U77']) + if os.name =='nt': + opt.append('COMDLG32') + return opt + + def get_flags(self): + opt = FCompiler.get_flags(self) + if os.name != 'nt': + opt.extend(['-s']) + if self.get_version(): + if self.get_version()>='8.2': + opt.append('-fpic') + return opt + + def get_flags_f77(self): + opt = FCompiler.get_flags_f77(self) + opt.extend(['-N22', '-N90', '-N110']) + v = self.get_version() + if os.name == 'nt': + if v and v>='8.0': + opt.extend(['-f', '-N15']) + else: + opt.append('-f') + if v: + if v<='4.6': + opt.append('-B108') + else: + # Though -N15 is undocumented, it works with + # Absoft 8.0 on Linux + opt.append('-N15') + return opt + + def get_flags_f90(self): + opt = FCompiler.get_flags_f90(self) + opt.extend(["-YCFRL=1", "-YCOM_NAMES=LCS", "-YCOM_PFX", "-YEXT_PFX", + "-YCOM_SFX=_", "-YEXT_SFX=_", "-YEXT_NAMES=LCS"]) + if self.get_version(): + if self.get_version()>'4.6': + opt.extend(["-YDEALLOC=ALL"]) + return opt + + def get_flags_fix(self): + opt = FCompiler.get_flags_fix(self) + opt.extend(["-YCFRL=1", "-YCOM_NAMES=LCS", "-YCOM_PFX", "-YEXT_PFX", + "-YCOM_SFX=_", "-YEXT_SFX=_", "-YEXT_NAMES=LCS"]) + opt.extend(["-f", "fixed"]) + return opt + + def get_flags_opt(self): + opt = ['-O'] + return opt + +if __name__ == '__main__': + from distutils import log + log.set_verbosity(2) + from numpy.distutils import customized_fcompiler + print(customized_fcompiler(compiler='absoft').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/arm.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/arm.py new file mode 100644 index 0000000..bc491d9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/arm.py @@ -0,0 +1,73 @@ +from __future__ import division, absolute_import, print_function + +import sys + +from numpy.distutils.fcompiler import FCompiler, dummy_fortran_file +from sys import platform +from os.path import join, dirname, normpath + +compilers = ['ArmFlangCompiler'] + +import functools + +class ArmFlangCompiler(FCompiler): + compiler_type = 'arm' + description = 'Arm Compiler' + version_pattern = r'\s*Arm.*version (?P[\d.-]+).*' + + ar_exe = 'lib.exe' + possible_executables = ['armflang'] + + executables = { + 'version_cmd': ["", "--version"], + 'compiler_f77': ["armflang", "-fPIC"], + 'compiler_fix': ["armflang", "-fPIC", "-ffixed-form"], + 'compiler_f90': ["armflang", "-fPIC"], + 'linker_so': ["armflang", "-fPIC", "-shared"], + 'archiver': ["ar", "-cr"], + 'ranlib': None + } + + pic_flags = ["-fPIC", "-DPIC"] + c_compiler = 'arm' + module_dir_switch = '-module ' # Don't remove ending space! + + def get_libraries(self): + opt = FCompiler.get_libraries(self) + opt.extend(['flang', 'flangrti', 'ompstub']) + return opt + + @functools.lru_cache(maxsize=128) + def get_library_dirs(self): + """List of compiler library directories.""" + opt = FCompiler.get_library_dirs(self) + flang_dir = dirname(self.executables['compiler_f77'][0]) + opt.append(normpath(join(flang_dir, '..', 'lib'))) + + return opt + + def get_flags(self): + return [] + + def get_flags_free(self): + return [] + + def get_flags_debug(self): + return ['-g'] + + def get_flags_opt(self): + return ['-O3'] + + def get_flags_arch(self): + return [] + + def runtime_library_dir_option(self, dir): + return '-Wl,-rpath=%s' % dir + + +if __name__ == '__main__': + from distutils import log + log.set_verbosity(2) + from numpy.distutils import customized_fcompiler + print(customized_fcompiler(compiler='armflang').get_version()) + diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/compaq.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/compaq.py new file mode 100644 index 0000000..01314c1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/compaq.py @@ -0,0 +1,120 @@ + +#http://www.compaq.com/fortran/docs/ +import os +import sys + +from numpy.distutils.fcompiler import FCompiler +from distutils.errors import DistutilsPlatformError + +compilers = ['CompaqFCompiler'] +if os.name != 'posix' or sys.platform[:6] == 'cygwin' : + # Otherwise we'd get a false positive on posix systems with + # case-insensitive filesystems (like darwin), because we'll pick + # up /bin/df + compilers.append('CompaqVisualFCompiler') + +class CompaqFCompiler(FCompiler): + + compiler_type = 'compaq' + description = 'Compaq Fortran Compiler' + version_pattern = r'Compaq Fortran (?P[^\s]*).*' + + if sys.platform[:5]=='linux': + fc_exe = 'fort' + else: + fc_exe = 'f90' + + executables = { + 'version_cmd' : ['', "-version"], + 'compiler_f77' : [fc_exe, "-f77rtl", "-fixed"], + 'compiler_fix' : [fc_exe, "-fixed"], + 'compiler_f90' : [fc_exe], + 'linker_so' : [''], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + + module_dir_switch = '-module ' # not tested + module_include_switch = '-I' + + def get_flags(self): + return ['-assume no2underscore', '-nomixed_str_len_arg'] + def get_flags_debug(self): + return ['-g', '-check bounds'] + def get_flags_opt(self): + return ['-O4', '-align dcommons', '-assume bigarrays', + '-assume nozsize', '-math_library fast'] + def get_flags_arch(self): + return ['-arch host', '-tune host'] + def get_flags_linker_so(self): + if sys.platform[:5]=='linux': + return ['-shared'] + return ['-shared', '-Wl,-expect_unresolved,*'] + +class CompaqVisualFCompiler(FCompiler): + + compiler_type = 'compaqv' + description = 'DIGITAL or Compaq Visual Fortran Compiler' + version_pattern = (r'(DIGITAL|Compaq) Visual Fortran Optimizing Compiler' + r' Version (?P[^\s]*).*') + + compile_switch = '/compile_only' + object_switch = '/object:' + library_switch = '/OUT:' #No space after /OUT:! + + static_lib_extension = ".lib" + static_lib_format = "%s%s" + module_dir_switch = '/module:' + module_include_switch = '/I' + + ar_exe = 'lib.exe' + fc_exe = 'DF' + + if sys.platform=='win32': + from numpy.distutils.msvccompiler import MSVCCompiler + + try: + m = MSVCCompiler() + m.initialize() + ar_exe = m.lib + except DistutilsPlatformError: + pass + except AttributeError as e: + if '_MSVCCompiler__root' in str(e): + print('Ignoring "%s" (I think it is msvccompiler.py bug)' % (e)) + else: + raise + except OSError as e: + if not "vcvarsall.bat" in str(e): + print("Unexpected OSError in", __file__) + raise + except ValueError as e: + if not "'path'" in str(e): + print("Unexpected ValueError in", __file__) + raise + + executables = { + 'version_cmd' : ['', "/what"], + 'compiler_f77' : [fc_exe, "/f77rtl", "/fixed"], + 'compiler_fix' : [fc_exe, "/fixed"], + 'compiler_f90' : [fc_exe], + 'linker_so' : [''], + 'archiver' : [ar_exe, "/OUT:"], + 'ranlib' : None + } + + def get_flags(self): + return ['/nologo', '/MD', '/WX', '/iface=(cref,nomixed_str_len_arg)', + '/names:lowercase', '/assume:underscore'] + def get_flags_opt(self): + return ['/Ox', '/fast', '/optimize:5', '/unroll:0', '/math_library:fast'] + def get_flags_arch(self): + return ['/threads'] + def get_flags_debug(self): + return ['/debug'] + +if __name__ == '__main__': + from distutils import log + log.set_verbosity(2) + from numpy.distutils import customized_fcompiler + print(customized_fcompiler(compiler='compaq').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/environment.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/environment.py new file mode 100644 index 0000000..ecd4d99 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/environment.py @@ -0,0 +1,88 @@ +import os +from distutils.dist import Distribution + +__metaclass__ = type + +class EnvironmentConfig: + def __init__(self, distutils_section='ALL', **kw): + self._distutils_section = distutils_section + self._conf_keys = kw + self._conf = None + self._hook_handler = None + + def dump_variable(self, name): + conf_desc = self._conf_keys[name] + hook, envvar, confvar, convert, append = conf_desc + if not convert: + convert = lambda x : x + print('%s.%s:' % (self._distutils_section, name)) + v = self._hook_handler(name, hook) + print(' hook : %s' % (convert(v),)) + if envvar: + v = os.environ.get(envvar, None) + print(' environ: %s' % (convert(v),)) + if confvar and self._conf: + v = self._conf.get(confvar, (None, None))[1] + print(' config : %s' % (convert(v),)) + + def dump_variables(self): + for name in self._conf_keys: + self.dump_variable(name) + + def __getattr__(self, name): + try: + conf_desc = self._conf_keys[name] + except KeyError: + raise AttributeError( + f"'EnvironmentConfig' object has no attribute '{name}'" + ) from None + + return self._get_var(name, conf_desc) + + def get(self, name, default=None): + try: + conf_desc = self._conf_keys[name] + except KeyError: + return default + var = self._get_var(name, conf_desc) + if var is None: + var = default + return var + + def _get_var(self, name, conf_desc): + hook, envvar, confvar, convert, append = conf_desc + if convert is None: + convert = lambda x: x + var = self._hook_handler(name, hook) + if envvar is not None: + envvar_contents = os.environ.get(envvar) + if envvar_contents is not None: + envvar_contents = convert(envvar_contents) + if var and append: + if os.environ.get('NPY_DISTUTILS_APPEND_FLAGS', '1') == '1': + var.extend(envvar_contents) + else: + # NPY_DISTUTILS_APPEND_FLAGS was explicitly set to 0 + # to keep old (overwrite flags rather than append to + # them) behavior + var = envvar_contents + else: + var = envvar_contents + if confvar is not None and self._conf: + if confvar in self._conf: + source, confvar_contents = self._conf[confvar] + var = convert(confvar_contents) + return var + + + def clone(self, hook_handler): + ec = self.__class__(distutils_section=self._distutils_section, + **self._conf_keys) + ec._hook_handler = hook_handler + return ec + + def use_distribution(self, dist): + if isinstance(dist, Distribution): + self._conf = dist.get_option_dict(self._distutils_section) + else: + self._conf = dist diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/fujitsu.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/fujitsu.py new file mode 100644 index 0000000..ddce674 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/fujitsu.py @@ -0,0 +1,46 @@ +""" +fujitsu + +Supports Fujitsu compiler function. +This compiler is developed by Fujitsu and is used in A64FX on Fugaku. +""" +from numpy.distutils.fcompiler import FCompiler + +compilers = ['FujitsuFCompiler'] + +class FujitsuFCompiler(FCompiler): + compiler_type = 'fujitsu' + description = 'Fujitsu Fortran Compiler' + + possible_executables = ['frt'] + version_pattern = r'frt \(FRT\) (?P[a-z\d.]+)' + # $ frt --version + # frt (FRT) x.x.x yyyymmdd + + executables = { + 'version_cmd' : ["", "--version"], + 'compiler_f77' : ["frt", "-Fixed"], + 'compiler_fix' : ["frt", "-Fixed"], + 'compiler_f90' : ["frt"], + 'linker_so' : ["frt", "-shared"], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + pic_flags = ['-KPIC'] + module_dir_switch = '-M' + module_include_switch = '-I' + + def get_flags_opt(self): + return ['-O3'] + def get_flags_debug(self): + return ['-g'] + def runtime_library_dir_option(self, dir): + return f'-Wl,-rpath={dir}' + def get_libraries(self): + return ['fj90f', 'fj90i', 'fjsrcinfo'] + +if __name__ == '__main__': + from distutils import log + from numpy.distutils import customized_fcompiler + log.set_verbosity(2) + print(customized_fcompiler('fujitsu').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/g95.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/g95.py new file mode 100644 index 0000000..e109a97 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/g95.py @@ -0,0 +1,42 @@ +# http://g95.sourceforge.net/ +from numpy.distutils.fcompiler import FCompiler + +compilers = ['G95FCompiler'] + +class G95FCompiler(FCompiler): + compiler_type = 'g95' + description = 'G95 Fortran Compiler' + +# version_pattern = r'G95 \((GCC (?P[\d.]+)|.*?) \(g95!\) (?P.*)\).*' + # $ g95 --version + # G95 (GCC 4.0.3 (g95!) May 22 2006) + + version_pattern = r'G95 \((GCC (?P[\d.]+)|.*?) \(g95 (?P.*)!\) (?P.*)\).*' + # $ g95 --version + # G95 (GCC 4.0.3 (g95 0.90!) Aug 22 2006) + + executables = { + 'version_cmd' : ["", "--version"], + 'compiler_f77' : ["g95", "-ffixed-form"], + 'compiler_fix' : ["g95", "-ffixed-form"], + 'compiler_f90' : ["g95"], + 'linker_so' : ["", "-shared"], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + pic_flags = ['-fpic'] + module_dir_switch = '-fmod=' + module_include_switch = '-I' + + def get_flags(self): + return ['-fno-second-underscore'] + def get_flags_opt(self): + return ['-O'] + def get_flags_debug(self): + return ['-g'] + +if __name__ == '__main__': + from distutils import log + from numpy.distutils import customized_fcompiler + log.set_verbosity(2) + print(customized_fcompiler('g95').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/gnu.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/gnu.py new file mode 100644 index 0000000..3917807 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/gnu.py @@ -0,0 +1,549 @@ +import re +import os +import sys +import warnings +import platform +import tempfile +import hashlib +import base64 +import subprocess +from subprocess import Popen, PIPE, STDOUT +from numpy.distutils.exec_command import filepath_from_subprocess_output +from numpy.distutils.fcompiler import FCompiler +from distutils.version import LooseVersion + +compilers = ['GnuFCompiler', 'Gnu95FCompiler'] + +TARGET_R = re.compile(r"Target: ([a-zA-Z0-9_\-]*)") + +# XXX: handle cross compilation + + +def is_win64(): + return sys.platform == "win32" and platform.architecture()[0] == "64bit" + + +class GnuFCompiler(FCompiler): + compiler_type = 'gnu' + compiler_aliases = ('g77', ) + description = 'GNU Fortran 77 compiler' + + def gnu_version_match(self, version_string): + """Handle the different versions of GNU fortran compilers""" + # Strip warning(s) that may be emitted by gfortran + while version_string.startswith('gfortran: warning'): + version_string =\ + version_string[version_string.find('\n') + 1:].strip() + + # Gfortran versions from after 2010 will output a simple string + # (usually "x.y", "x.y.z" or "x.y.z-q") for ``-dumpversion``; older + # gfortrans may still return long version strings (``-dumpversion`` was + # an alias for ``--version``) + if len(version_string) <= 20: + # Try to find a valid version string + m = re.search(r'([0-9.]+)', version_string) + if m: + # g77 provides a longer version string that starts with GNU + # Fortran + if version_string.startswith('GNU Fortran'): + return ('g77', m.group(1)) + + # gfortran only outputs a version string such as #.#.#, so check + # if the match is at the start of the string + elif m.start() == 0: + return ('gfortran', m.group(1)) + else: + # Output probably from --version, try harder: + m = re.search(r'GNU Fortran\s+95.*?([0-9-.]+)', version_string) + if m: + return ('gfortran', m.group(1)) + m = re.search( + r'GNU Fortran.*?\-?([0-9-.]+\.[0-9-.]+)', version_string) + if m: + v = m.group(1) + if v.startswith('0') or v.startswith('2') or v.startswith('3'): + # the '0' is for early g77's + return ('g77', v) + else: + # at some point in the 4.x series, the ' 95' was dropped + # from the version string + return ('gfortran', v) + + # If still nothing, raise an error to make the problem easy to find. + err = 'A valid Fortran version was not found in this string:\n' + raise ValueError(err + version_string) + + def version_match(self, version_string): + v = self.gnu_version_match(version_string) + if not v or v[0] != 'g77': + return None + return v[1] + + possible_executables = ['g77', 'f77'] + executables = { + 'version_cmd' : [None, "-dumpversion"], + 'compiler_f77' : [None, "-g", "-Wall", "-fno-second-underscore"], + 'compiler_f90' : None, # Use --fcompiler=gnu95 for f90 codes + 'compiler_fix' : None, + 'linker_so' : [None, "-g", "-Wall"], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"], + 'linker_exe' : [None, "-g", "-Wall"] + } + module_dir_switch = None + module_include_switch = None + + # Cygwin: f771: warning: -fPIC ignored for target (all code is + # position independent) + if os.name != 'nt' and sys.platform != 'cygwin': + pic_flags = ['-fPIC'] + + # use -mno-cygwin for g77 when Python is not Cygwin-Python + if sys.platform == 'win32': + for key in ['version_cmd', 'compiler_f77', 'linker_so', 'linker_exe']: + executables[key].append('-mno-cygwin') + + g2c = 'g2c' + suggested_f90_compiler = 'gnu95' + + def get_flags_linker_so(self): + opt = self.linker_so[1:] + if sys.platform == 'darwin': + target = os.environ.get('MACOSX_DEPLOYMENT_TARGET', None) + # If MACOSX_DEPLOYMENT_TARGET is set, we simply trust the value + # and leave it alone. But, distutils will complain if the + # environment's value is different from the one in the Python + # Makefile used to build Python. We let distutils handle this + # error checking. + if not target: + # If MACOSX_DEPLOYMENT_TARGET is not set in the environment, + # we try to get it first from sysconfig and then + # fall back to setting it to 10.9 This is a reasonable default + # even when using the official Python dist and those derived + # from it. + import sysconfig + target = sysconfig.get_config_var('MACOSX_DEPLOYMENT_TARGET') + if not target: + target = '10.9' + s = f'Env. variable MACOSX_DEPLOYMENT_TARGET set to {target}' + warnings.warn(s, stacklevel=2) + os.environ['MACOSX_DEPLOYMENT_TARGET'] = str(target) + opt.extend(['-undefined', 'dynamic_lookup', '-bundle']) + else: + opt.append("-shared") + if sys.platform.startswith('sunos'): + # SunOS often has dynamically loaded symbols defined in the + # static library libg2c.a The linker doesn't like this. To + # ignore the problem, use the -mimpure-text flag. It isn't + # the safest thing, but seems to work. 'man gcc' says: + # ".. Instead of using -mimpure-text, you should compile all + # source code with -fpic or -fPIC." + opt.append('-mimpure-text') + return opt + + def get_libgcc_dir(self): + try: + output = subprocess.check_output(self.compiler_f77 + + ['-print-libgcc-file-name']) + except (OSError, subprocess.CalledProcessError): + pass + else: + output = filepath_from_subprocess_output(output) + return os.path.dirname(output) + return None + + def get_libgfortran_dir(self): + if sys.platform[:5] == 'linux': + libgfortran_name = 'libgfortran.so' + elif sys.platform == 'darwin': + libgfortran_name = 'libgfortran.dylib' + else: + libgfortran_name = None + + libgfortran_dir = None + if libgfortran_name: + find_lib_arg = ['-print-file-name={0}'.format(libgfortran_name)] + try: + output = subprocess.check_output( + self.compiler_f77 + find_lib_arg) + except (OSError, subprocess.CalledProcessError): + pass + else: + output = filepath_from_subprocess_output(output) + libgfortran_dir = os.path.dirname(output) + return libgfortran_dir + + def get_library_dirs(self): + opt = [] + if sys.platform[:5] != 'linux': + d = self.get_libgcc_dir() + if d: + # if windows and not cygwin, libg2c lies in a different folder + if sys.platform == 'win32' and not d.startswith('/usr/lib'): + d = os.path.normpath(d) + path = os.path.join(d, "lib%s.a" % self.g2c) + if not os.path.exists(path): + root = os.path.join(d, *((os.pardir, ) * 4)) + d2 = os.path.abspath(os.path.join(root, 'lib')) + path = os.path.join(d2, "lib%s.a" % self.g2c) + if os.path.exists(path): + opt.append(d2) + opt.append(d) + # For Macports / Linux, libgfortran and libgcc are not co-located + lib_gfortran_dir = self.get_libgfortran_dir() + if lib_gfortran_dir: + opt.append(lib_gfortran_dir) + return opt + + def get_libraries(self): + opt = [] + d = self.get_libgcc_dir() + if d is not None: + g2c = self.g2c + '-pic' + f = self.static_lib_format % (g2c, self.static_lib_extension) + if not os.path.isfile(os.path.join(d, f)): + g2c = self.g2c + else: + g2c = self.g2c + + if g2c is not None: + opt.append(g2c) + c_compiler = self.c_compiler + if sys.platform == 'win32' and c_compiler and \ + c_compiler.compiler_type == 'msvc': + opt.append('gcc') + if sys.platform == 'darwin': + opt.append('cc_dynamic') + return opt + + def get_flags_debug(self): + return ['-g'] + + def get_flags_opt(self): + v = self.get_version() + if v and v <= '3.3.3': + # With this compiler version building Fortran BLAS/LAPACK + # with -O3 caused failures in lib.lapack heevr,syevr tests. + opt = ['-O2'] + else: + opt = ['-O3'] + opt.append('-funroll-loops') + return opt + + def _c_arch_flags(self): + """ Return detected arch flags from CFLAGS """ + import sysconfig + try: + cflags = sysconfig.get_config_vars()['CFLAGS'] + except KeyError: + return [] + arch_re = re.compile(r"-arch\s+(\w+)") + arch_flags = [] + for arch in arch_re.findall(cflags): + arch_flags += ['-arch', arch] + return arch_flags + + def get_flags_arch(self): + return [] + + def runtime_library_dir_option(self, dir): + if sys.platform == 'win32' or sys.platform == 'cygwin': + # Linux/Solaris/Unix support RPATH, Windows does not + raise NotImplementedError + + # TODO: could use -Xlinker here, if it's supported + assert "," not in dir + + if sys.platform == 'darwin': + return f'-Wl,-rpath,{dir}' + elif sys.platform[:3] == 'aix': + # AIX RPATH is called LIBPATH + return f'-Wl,-blibpath:{dir}' + else: + return f'-Wl,-rpath={dir}' + + +class Gnu95FCompiler(GnuFCompiler): + compiler_type = 'gnu95' + compiler_aliases = ('gfortran', ) + description = 'GNU Fortran 95 compiler' + + def version_match(self, version_string): + v = self.gnu_version_match(version_string) + if not v or v[0] != 'gfortran': + return None + v = v[1] + if LooseVersion(v) >= "4": + # gcc-4 series releases do not support -mno-cygwin option + pass + else: + # use -mno-cygwin flag for gfortran when Python is not + # Cygwin-Python + if sys.platform == 'win32': + for key in [ + 'version_cmd', 'compiler_f77', 'compiler_f90', + 'compiler_fix', 'linker_so', 'linker_exe' + ]: + self.executables[key].append('-mno-cygwin') + return v + + possible_executables = ['gfortran', 'f95'] + executables = { + 'version_cmd' : ["", "-dumpversion"], + 'compiler_f77' : [None, "-Wall", "-g", "-ffixed-form", + "-fno-second-underscore"], + 'compiler_f90' : [None, "-Wall", "-g", + "-fno-second-underscore"], + 'compiler_fix' : [None, "-Wall", "-g","-ffixed-form", + "-fno-second-underscore"], + 'linker_so' : ["", "-Wall", "-g"], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"], + 'linker_exe' : [None, "-Wall"] + } + + module_dir_switch = '-J' + module_include_switch = '-I' + + if sys.platform[:3] == 'aix': + executables['linker_so'].append('-lpthread') + if platform.architecture()[0][:2] == '64': + for key in ['compiler_f77', 'compiler_f90','compiler_fix','linker_so', 'linker_exe']: + executables[key].append('-maix64') + + g2c = 'gfortran' + + def _universal_flags(self, cmd): + """Return a list of -arch flags for every supported architecture.""" + if not sys.platform == 'darwin': + return [] + arch_flags = [] + # get arches the C compiler gets. + c_archs = self._c_arch_flags() + if "i386" in c_archs: + c_archs[c_archs.index("i386")] = "i686" + # check the arches the Fortran compiler supports, and compare with + # arch flags from C compiler + for arch in ["ppc", "i686", "x86_64", "ppc64"]: + if _can_target(cmd, arch) and arch in c_archs: + arch_flags.extend(["-arch", arch]) + return arch_flags + + def get_flags(self): + flags = GnuFCompiler.get_flags(self) + arch_flags = self._universal_flags(self.compiler_f90) + if arch_flags: + flags[:0] = arch_flags + return flags + + def get_flags_linker_so(self): + flags = GnuFCompiler.get_flags_linker_so(self) + arch_flags = self._universal_flags(self.linker_so) + if arch_flags: + flags[:0] = arch_flags + return flags + + def get_library_dirs(self): + opt = GnuFCompiler.get_library_dirs(self) + if sys.platform == 'win32': + c_compiler = self.c_compiler + if c_compiler and c_compiler.compiler_type == "msvc": + target = self.get_target() + if target: + d = os.path.normpath(self.get_libgcc_dir()) + root = os.path.join(d, *((os.pardir, ) * 4)) + path = os.path.join(root, "lib") + mingwdir = os.path.normpath(path) + if os.path.exists(os.path.join(mingwdir, "libmingwex.a")): + opt.append(mingwdir) + # For Macports / Linux, libgfortran and libgcc are not co-located + lib_gfortran_dir = self.get_libgfortran_dir() + if lib_gfortran_dir: + opt.append(lib_gfortran_dir) + return opt + + def get_libraries(self): + opt = GnuFCompiler.get_libraries(self) + if sys.platform == 'darwin': + opt.remove('cc_dynamic') + if sys.platform == 'win32': + c_compiler = self.c_compiler + if c_compiler and c_compiler.compiler_type == "msvc": + if "gcc" in opt: + i = opt.index("gcc") + opt.insert(i + 1, "mingwex") + opt.insert(i + 1, "mingw32") + c_compiler = self.c_compiler + if c_compiler and c_compiler.compiler_type == "msvc": + return [] + else: + pass + return opt + + def get_target(self): + try: + output = subprocess.check_output(self.compiler_f77 + ['-v']) + except (OSError, subprocess.CalledProcessError): + pass + else: + output = filepath_from_subprocess_output(output) + m = TARGET_R.search(output) + if m: + return m.group(1) + return "" + + def _hash_files(self, filenames): + h = hashlib.sha1() + for fn in filenames: + with open(fn, 'rb') as f: + while True: + block = f.read(131072) + if not block: + break + h.update(block) + text = base64.b32encode(h.digest()) + text = text.decode('ascii') + return text.rstrip('=') + + def _link_wrapper_lib(self, objects, output_dir, extra_dll_dir, + chained_dlls, is_archive): + """Create a wrapper shared library for the given objects + + Return an MSVC-compatible lib + """ + + c_compiler = self.c_compiler + if c_compiler.compiler_type != "msvc": + raise ValueError("This method only supports MSVC") + + object_hash = self._hash_files(list(objects) + list(chained_dlls)) + + if is_win64(): + tag = 'win_amd64' + else: + tag = 'win32' + + basename = 'lib' + os.path.splitext( + os.path.basename(objects[0]))[0][:8] + root_name = basename + '.' + object_hash + '.gfortran-' + tag + dll_name = root_name + '.dll' + def_name = root_name + '.def' + lib_name = root_name + '.lib' + dll_path = os.path.join(extra_dll_dir, dll_name) + def_path = os.path.join(output_dir, def_name) + lib_path = os.path.join(output_dir, lib_name) + + if os.path.isfile(lib_path): + # Nothing to do + return lib_path, dll_path + + if is_archive: + objects = (["-Wl,--whole-archive"] + list(objects) + + ["-Wl,--no-whole-archive"]) + self.link_shared_object( + objects, + dll_name, + output_dir=extra_dll_dir, + extra_postargs=list(chained_dlls) + [ + '-Wl,--allow-multiple-definition', + '-Wl,--output-def,' + def_path, + '-Wl,--export-all-symbols', + '-Wl,--enable-auto-import', + '-static', + '-mlong-double-64', + ]) + + # No PowerPC! + if is_win64(): + specifier = '/MACHINE:X64' + else: + specifier = '/MACHINE:X86' + + # MSVC specific code + lib_args = ['/def:' + def_path, '/OUT:' + lib_path, specifier] + if not c_compiler.initialized: + c_compiler.initialize() + c_compiler.spawn([c_compiler.lib] + lib_args) + + return lib_path, dll_path + + def can_ccompiler_link(self, compiler): + # MSVC cannot link objects compiled by GNU fortran + return compiler.compiler_type not in ("msvc", ) + + def wrap_unlinkable_objects(self, objects, output_dir, extra_dll_dir): + """ + Convert a set of object files that are not compatible with the default + linker, to a file that is compatible. + """ + if self.c_compiler.compiler_type == "msvc": + # Compile a DLL and return the lib for the DLL as + # the object. Also keep track of previous DLLs that + # we have compiled so that we can link against them. + + # If there are .a archives, assume they are self-contained + # static libraries, and build separate DLLs for each + archives = [] + plain_objects = [] + for obj in objects: + if obj.lower().endswith('.a'): + archives.append(obj) + else: + plain_objects.append(obj) + + chained_libs = [] + chained_dlls = [] + for archive in archives[::-1]: + lib, dll = self._link_wrapper_lib( + [archive], + output_dir, + extra_dll_dir, + chained_dlls=chained_dlls, + is_archive=True) + chained_libs.insert(0, lib) + chained_dlls.insert(0, dll) + + if not plain_objects: + return chained_libs + + lib, dll = self._link_wrapper_lib( + plain_objects, + output_dir, + extra_dll_dir, + chained_dlls=chained_dlls, + is_archive=False) + return [lib] + chained_libs + else: + raise ValueError("Unsupported C compiler") + + +def _can_target(cmd, arch): + """Return true if the architecture supports the -arch flag""" + newcmd = cmd[:] + fid, filename = tempfile.mkstemp(suffix=".f") + os.close(fid) + try: + d = os.path.dirname(filename) + output = os.path.splitext(filename)[0] + ".o" + try: + newcmd.extend(["-arch", arch, "-c", filename]) + p = Popen(newcmd, stderr=STDOUT, stdout=PIPE, cwd=d) + p.communicate() + return p.returncode == 0 + finally: + if os.path.exists(output): + os.remove(output) + finally: + os.remove(filename) + + +if __name__ == '__main__': + from distutils import log + from numpy.distutils import customized_fcompiler + log.set_verbosity(2) + + print(customized_fcompiler('gnu').get_version()) + try: + print(customized_fcompiler('g95').get_version()) + except Exception as e: + print(e) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/hpux.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/hpux.py new file mode 100644 index 0000000..09e6483 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/hpux.py @@ -0,0 +1,41 @@ +from numpy.distutils.fcompiler import FCompiler + +compilers = ['HPUXFCompiler'] + +class HPUXFCompiler(FCompiler): + + compiler_type = 'hpux' + description = 'HP Fortran 90 Compiler' + version_pattern = r'HP F90 (?P[^\s*,]*)' + + executables = { + 'version_cmd' : ["f90", "+version"], + 'compiler_f77' : ["f90"], + 'compiler_fix' : ["f90"], + 'compiler_f90' : ["f90"], + 'linker_so' : ["ld", "-b"], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + module_dir_switch = None #XXX: fix me + module_include_switch = None #XXX: fix me + pic_flags = ['+Z'] + def get_flags(self): + return self.pic_flags + ['+ppu', '+DD64'] + def get_flags_opt(self): + return ['-O3'] + def get_libraries(self): + return ['m'] + def get_library_dirs(self): + opt = ['/usr/lib/hpux64'] + return opt + def get_version(self, force=0, ok_status=[256, 0, 1]): + # XXX status==256 may indicate 'unrecognized option' or + # 'no input file'. So, version_cmd needs more work. + return FCompiler.get_version(self, force, ok_status) + +if __name__ == '__main__': + from distutils import log + log.set_verbosity(10) + from numpy.distutils import customized_fcompiler + print(customized_fcompiler(compiler='hpux').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/ibm.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/ibm.py new file mode 100644 index 0000000..eff2440 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/ibm.py @@ -0,0 +1,97 @@ +import os +import re +import sys +import subprocess + +from numpy.distutils.fcompiler import FCompiler +from numpy.distutils.exec_command import find_executable +from numpy.distutils.misc_util import make_temp_file +from distutils import log + +compilers = ['IBMFCompiler'] + +class IBMFCompiler(FCompiler): + compiler_type = 'ibm' + description = 'IBM XL Fortran Compiler' + version_pattern = r'(xlf\(1\)\s*|)IBM XL Fortran ((Advanced Edition |)Version |Enterprise Edition V|for AIX, V)(?P[^\s*]*)' + #IBM XL Fortran Enterprise Edition V10.1 for AIX \nVersion: 10.01.0000.0004 + + executables = { + 'version_cmd' : ["", "-qversion"], + 'compiler_f77' : ["xlf"], + 'compiler_fix' : ["xlf90", "-qfixed"], + 'compiler_f90' : ["xlf90"], + 'linker_so' : ["xlf95"], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + + def get_version(self,*args,**kwds): + version = FCompiler.get_version(self,*args,**kwds) + + if version is None and sys.platform.startswith('aix'): + # use lslpp to find out xlf version + lslpp = find_executable('lslpp') + xlf = find_executable('xlf') + if os.path.exists(xlf) and os.path.exists(lslpp): + try: + o = subprocess.check_output([lslpp, '-Lc', 'xlfcmp']) + except (OSError, subprocess.CalledProcessError): + pass + else: + m = re.search(r'xlfcmp:(?P\d+([.]\d+)+)', o) + if m: version = m.group('version') + + xlf_dir = '/etc/opt/ibmcmp/xlf' + if version is None and os.path.isdir(xlf_dir): + # linux: + # If the output of xlf does not contain version info + # (that's the case with xlf 8.1, for instance) then + # let's try another method: + l = sorted(os.listdir(xlf_dir)) + l.reverse() + l = [d for d in l if os.path.isfile(os.path.join(xlf_dir, d, 'xlf.cfg'))] + if l: + from distutils.version import LooseVersion + self.version = version = LooseVersion(l[0]) + return version + + def get_flags(self): + return ['-qextname'] + + def get_flags_debug(self): + return ['-g'] + + def get_flags_linker_so(self): + opt = [] + if sys.platform=='darwin': + opt.append('-Wl,-bundle,-flat_namespace,-undefined,suppress') + else: + opt.append('-bshared') + version = self.get_version(ok_status=[0, 40]) + if version is not None: + if sys.platform.startswith('aix'): + xlf_cfg = '/etc/xlf.cfg' + else: + xlf_cfg = '/etc/opt/ibmcmp/xlf/%s/xlf.cfg' % version + fo, new_cfg = make_temp_file(suffix='_xlf.cfg') + log.info('Creating '+new_cfg) + with open(xlf_cfg, 'r') as fi: + crt1_match = re.compile(r'\s*crt\s*=\s*(?P.*)/crt1.o').match + for line in fi: + m = crt1_match(line) + if m: + fo.write('crt = %s/bundle1.o\n' % (m.group('path'))) + else: + fo.write(line) + fo.close() + opt.append('-F'+new_cfg) + return opt + + def get_flags_opt(self): + return ['-O3'] + +if __name__ == '__main__': + from numpy.distutils import customized_fcompiler + log.set_verbosity(2) + print(customized_fcompiler(compiler='ibm').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/intel.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/intel.py new file mode 100644 index 0000000..f97c5b3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/intel.py @@ -0,0 +1,210 @@ +# http://developer.intel.com/software/products/compilers/flin/ +import sys + +from numpy.distutils.ccompiler import simple_version_match +from numpy.distutils.fcompiler import FCompiler, dummy_fortran_file + +compilers = ['IntelFCompiler', 'IntelVisualFCompiler', + 'IntelItaniumFCompiler', 'IntelItaniumVisualFCompiler', + 'IntelEM64VisualFCompiler', 'IntelEM64TFCompiler'] + + +def intel_version_match(type): + # Match against the important stuff in the version string + return simple_version_match(start=r'Intel.*?Fortran.*?(?:%s).*?Version' % (type,)) + + +class BaseIntelFCompiler(FCompiler): + def update_executables(self): + f = dummy_fortran_file() + self.executables['version_cmd'] = ['', '-FI', '-V', '-c', + f + '.f', '-o', f + '.o'] + + def runtime_library_dir_option(self, dir): + # TODO: could use -Xlinker here, if it's supported + assert "," not in dir + + return '-Wl,-rpath=%s' % dir + + +class IntelFCompiler(BaseIntelFCompiler): + + compiler_type = 'intel' + compiler_aliases = ('ifort',) + description = 'Intel Fortran Compiler for 32-bit apps' + version_match = intel_version_match('32-bit|IA-32') + + possible_executables = ['ifort', 'ifc'] + + executables = { + 'version_cmd' : None, # set by update_executables + 'compiler_f77' : [None, "-72", "-w90", "-w95"], + 'compiler_f90' : [None], + 'compiler_fix' : [None, "-FI"], + 'linker_so' : ["", "-shared"], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + + pic_flags = ['-fPIC'] + module_dir_switch = '-module ' # Don't remove ending space! + module_include_switch = '-I' + + def get_flags_free(self): + return ['-FR'] + + def get_flags(self): + return ['-fPIC'] + + def get_flags_opt(self): # Scipy test failures with -O2 + v = self.get_version() + mpopt = 'openmp' if v and v < '15' else 'qopenmp' + return ['-fp-model', 'strict', '-O1', + '-assume', 'minus0', '-{}'.format(mpopt)] + + def get_flags_arch(self): + return [] + + def get_flags_linker_so(self): + opt = FCompiler.get_flags_linker_so(self) + v = self.get_version() + if v and v >= '8.0': + opt.append('-nofor_main') + if sys.platform == 'darwin': + # Here, it's -dynamiclib + try: + idx = opt.index('-shared') + opt.remove('-shared') + except ValueError: + idx = 0 + opt[idx:idx] = ['-dynamiclib', '-Wl,-undefined,dynamic_lookup'] + return opt + + +class IntelItaniumFCompiler(IntelFCompiler): + compiler_type = 'intele' + compiler_aliases = () + description = 'Intel Fortran Compiler for Itanium apps' + + version_match = intel_version_match('Itanium|IA-64') + + possible_executables = ['ifort', 'efort', 'efc'] + + executables = { + 'version_cmd' : None, + 'compiler_f77' : [None, "-FI", "-w90", "-w95"], + 'compiler_fix' : [None, "-FI"], + 'compiler_f90' : [None], + 'linker_so' : ['', "-shared"], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + + +class IntelEM64TFCompiler(IntelFCompiler): + compiler_type = 'intelem' + compiler_aliases = () + description = 'Intel Fortran Compiler for 64-bit apps' + + version_match = intel_version_match('EM64T-based|Intel\\(R\\) 64|64|IA-64|64-bit') + + possible_executables = ['ifort', 'efort', 'efc'] + + executables = { + 'version_cmd' : None, + 'compiler_f77' : [None, "-FI"], + 'compiler_fix' : [None, "-FI"], + 'compiler_f90' : [None], + 'linker_so' : ['', "-shared"], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + +# Is there no difference in the version string between the above compilers +# and the Visual compilers? + + +class IntelVisualFCompiler(BaseIntelFCompiler): + compiler_type = 'intelv' + description = 'Intel Visual Fortran Compiler for 32-bit apps' + version_match = intel_version_match('32-bit|IA-32') + + def update_executables(self): + f = dummy_fortran_file() + self.executables['version_cmd'] = ['', '/FI', '/c', + f + '.f', '/o', f + '.o'] + + ar_exe = 'lib.exe' + possible_executables = ['ifort', 'ifl'] + + executables = { + 'version_cmd' : None, + 'compiler_f77' : [None], + 'compiler_fix' : [None], + 'compiler_f90' : [None], + 'linker_so' : [None], + 'archiver' : [ar_exe, "/verbose", "/OUT:"], + 'ranlib' : None + } + + compile_switch = '/c ' + object_switch = '/Fo' # No space after /Fo! + library_switch = '/OUT:' # No space after /OUT:! + module_dir_switch = '/module:' # No space after /module: + module_include_switch = '/I' + + def get_flags(self): + opt = ['/nologo', '/MD', '/nbs', '/names:lowercase', '/assume:underscore'] + return opt + + def get_flags_free(self): + return [] + + def get_flags_debug(self): + return ['/4Yb', '/d2'] + + def get_flags_opt(self): + return ['/O1', '/assume:minus0'] # Scipy test failures with /O2 + + def get_flags_arch(self): + return ["/arch:IA32", "/QaxSSE3"] + + def runtime_library_dir_option(self, dir): + raise NotImplementedError + + +class IntelItaniumVisualFCompiler(IntelVisualFCompiler): + compiler_type = 'intelev' + description = 'Intel Visual Fortran Compiler for Itanium apps' + + version_match = intel_version_match('Itanium') + + possible_executables = ['efl'] # XXX this is a wild guess + ar_exe = IntelVisualFCompiler.ar_exe + + executables = { + 'version_cmd' : None, + 'compiler_f77' : [None, "-FI", "-w90", "-w95"], + 'compiler_fix' : [None, "-FI", "-4L72", "-w"], + 'compiler_f90' : [None], + 'linker_so' : ['', "-shared"], + 'archiver' : [ar_exe, "/verbose", "/OUT:"], + 'ranlib' : None + } + + +class IntelEM64VisualFCompiler(IntelVisualFCompiler): + compiler_type = 'intelvem' + description = 'Intel Visual Fortran Compiler for 64-bit apps' + + version_match = simple_version_match(start=r'Intel\(R\).*?64,') + + def get_flags_arch(self): + return [] + + +if __name__ == '__main__': + from distutils import log + log.set_verbosity(2) + from numpy.distutils import customized_fcompiler + print(customized_fcompiler(compiler='intel').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/lahey.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/lahey.py new file mode 100644 index 0000000..e925838 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/lahey.py @@ -0,0 +1,45 @@ +import os + +from numpy.distutils.fcompiler import FCompiler + +compilers = ['LaheyFCompiler'] + +class LaheyFCompiler(FCompiler): + + compiler_type = 'lahey' + description = 'Lahey/Fujitsu Fortran 95 Compiler' + version_pattern = r'Lahey/Fujitsu Fortran 95 Compiler Release (?P[^\s*]*)' + + executables = { + 'version_cmd' : ["", "--version"], + 'compiler_f77' : ["lf95", "--fix"], + 'compiler_fix' : ["lf95", "--fix"], + 'compiler_f90' : ["lf95"], + 'linker_so' : ["lf95", "-shared"], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + + module_dir_switch = None #XXX Fix me + module_include_switch = None #XXX Fix me + + def get_flags_opt(self): + return ['-O'] + def get_flags_debug(self): + return ['-g', '--chk', '--chkglobal'] + def get_library_dirs(self): + opt = [] + d = os.environ.get('LAHEY') + if d: + opt.append(os.path.join(d, 'lib')) + return opt + def get_libraries(self): + opt = [] + opt.extend(['fj9f6', 'fj9i6', 'fj9ipp', 'fj9e6']) + return opt + +if __name__ == '__main__': + from distutils import log + log.set_verbosity(2) + from numpy.distutils import customized_fcompiler + print(customized_fcompiler(compiler='lahey').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/mips.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/mips.py new file mode 100644 index 0000000..a097380 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/mips.py @@ -0,0 +1,54 @@ +from numpy.distutils.cpuinfo import cpu +from numpy.distutils.fcompiler import FCompiler + +compilers = ['MIPSFCompiler'] + +class MIPSFCompiler(FCompiler): + + compiler_type = 'mips' + description = 'MIPSpro Fortran Compiler' + version_pattern = r'MIPSpro Compilers: Version (?P[^\s*,]*)' + + executables = { + 'version_cmd' : ["", "-version"], + 'compiler_f77' : ["f77", "-f77"], + 'compiler_fix' : ["f90", "-fixedform"], + 'compiler_f90' : ["f90"], + 'linker_so' : ["f90", "-shared"], + 'archiver' : ["ar", "-cr"], + 'ranlib' : None + } + module_dir_switch = None #XXX: fix me + module_include_switch = None #XXX: fix me + pic_flags = ['-KPIC'] + + def get_flags(self): + return self.pic_flags + ['-n32'] + def get_flags_opt(self): + return ['-O3'] + def get_flags_arch(self): + opt = [] + for a in '19 20 21 22_4k 22_5k 24 25 26 27 28 30 32_5k 32_10k'.split(): + if getattr(cpu, 'is_IP%s'%a)(): + opt.append('-TARG:platform=IP%s' % a) + break + return opt + def get_flags_arch_f77(self): + r = None + if cpu.is_r10000(): r = 10000 + elif cpu.is_r12000(): r = 12000 + elif cpu.is_r8000(): r = 8000 + elif cpu.is_r5000(): r = 5000 + elif cpu.is_r4000(): r = 4000 + if r is not None: + return ['r%s' % (r)] + return [] + def get_flags_arch_f90(self): + r = self.get_flags_arch_f77() + if r: + r[0] = '-' + r[0] + return r + +if __name__ == '__main__': + from numpy.distutils import customized_fcompiler + print(customized_fcompiler(compiler='mips').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/nag.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/nag.py new file mode 100644 index 0000000..939201f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/nag.py @@ -0,0 +1,87 @@ +import sys +import re +from numpy.distutils.fcompiler import FCompiler + +compilers = ['NAGFCompiler', 'NAGFORCompiler'] + +class BaseNAGFCompiler(FCompiler): + version_pattern = r'NAG.* Release (?P[^(\s]*)' + + def version_match(self, version_string): + m = re.search(self.version_pattern, version_string) + if m: + return m.group('version') + else: + return None + + def get_flags_linker_so(self): + return ["-Wl,-shared"] + def get_flags_opt(self): + return ['-O4'] + def get_flags_arch(self): + return [] + +class NAGFCompiler(BaseNAGFCompiler): + + compiler_type = 'nag' + description = 'NAGWare Fortran 95 Compiler' + + executables = { + 'version_cmd' : ["", "-V"], + 'compiler_f77' : ["f95", "-fixed"], + 'compiler_fix' : ["f95", "-fixed"], + 'compiler_f90' : ["f95"], + 'linker_so' : [""], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + + def get_flags_linker_so(self): + if sys.platform == 'darwin': + return ['-unsharedf95', '-Wl,-bundle,-flat_namespace,-undefined,suppress'] + return BaseNAGFCompiler.get_flags_linker_so(self) + def get_flags_arch(self): + version = self.get_version() + if version and version < '5.1': + return ['-target=native'] + else: + return BaseNAGFCompiler.get_flags_arch(self) + def get_flags_debug(self): + return ['-g', '-gline', '-g90', '-nan', '-C'] + +class NAGFORCompiler(BaseNAGFCompiler): + + compiler_type = 'nagfor' + description = 'NAG Fortran Compiler' + + executables = { + 'version_cmd' : ["nagfor", "-V"], + 'compiler_f77' : ["nagfor", "-fixed"], + 'compiler_fix' : ["nagfor", "-fixed"], + 'compiler_f90' : ["nagfor"], + 'linker_so' : ["nagfor"], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + + def get_flags_linker_so(self): + if sys.platform == 'darwin': + return ['-unsharedrts', + '-Wl,-bundle,-flat_namespace,-undefined,suppress'] + return BaseNAGFCompiler.get_flags_linker_so(self) + def get_flags_debug(self): + version = self.get_version() + if version and version > '6.1': + return ['-g', '-u', '-nan', '-C=all', '-thread_safe', + '-kind=unique', '-Warn=allocation', '-Warn=subnormal'] + else: + return ['-g', '-nan', '-C=all', '-u', '-thread_safe'] + + +if __name__ == '__main__': + from distutils import log + log.set_verbosity(2) + from numpy.distutils import customized_fcompiler + compiler = customized_fcompiler(compiler='nagfor') + print(compiler.get_version()) + print(compiler.get_flags_debug()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/none.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/none.py new file mode 100644 index 0000000..ef411ff --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/none.py @@ -0,0 +1,28 @@ +from numpy.distutils.fcompiler import FCompiler +from numpy.distutils import customized_fcompiler + +compilers = ['NoneFCompiler'] + +class NoneFCompiler(FCompiler): + + compiler_type = 'none' + description = 'Fake Fortran compiler' + + executables = {'compiler_f77': None, + 'compiler_f90': None, + 'compiler_fix': None, + 'linker_so': None, + 'linker_exe': None, + 'archiver': None, + 'ranlib': None, + 'version_cmd': None, + } + + def find_executables(self): + pass + + +if __name__ == '__main__': + from distutils import log + log.set_verbosity(2) + print(customized_fcompiler(compiler='none').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/nv.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/nv.py new file mode 100644 index 0000000..212f348 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/nv.py @@ -0,0 +1,53 @@ +from numpy.distutils.fcompiler import FCompiler + +compilers = ['NVHPCFCompiler'] + +class NVHPCFCompiler(FCompiler): + """ NVIDIA High Performance Computing (HPC) SDK Fortran Compiler + + https://developer.nvidia.com/hpc-sdk + + Since august 2020 the NVIDIA HPC SDK includes the compilers formerly known as The Portland Group compilers, + https://www.pgroup.com/index.htm. + See also `numpy.distutils.fcompiler.pg`. + """ + + compiler_type = 'nv' + description = 'NVIDIA HPC SDK' + version_pattern = r'\s*(nvfortran|(pg(f77|f90|fortran)) \(aka nvfortran\)) (?P[\d.-]+).*' + + executables = { + 'version_cmd': ["", "-V"], + 'compiler_f77': ["nvfortran"], + 'compiler_fix': ["nvfortran", "-Mfixed"], + 'compiler_f90': ["nvfortran"], + 'linker_so': [""], + 'archiver': ["ar", "-cr"], + 'ranlib': ["ranlib"] + } + pic_flags = ['-fpic'] + + module_dir_switch = '-module ' + module_include_switch = '-I' + + def get_flags(self): + opt = ['-Minform=inform', '-Mnosecond_underscore'] + return self.pic_flags + opt + + def get_flags_opt(self): + return ['-fast'] + + def get_flags_debug(self): + return ['-g'] + + def get_flags_linker_so(self): + return ["-shared", '-fpic'] + + def runtime_library_dir_option(self, dir): + return '-R%s' % dir + +if __name__ == '__main__': + from distutils import log + log.set_verbosity(2) + from numpy.distutils import customized_fcompiler + print(customized_fcompiler(compiler='nv').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/pathf95.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/pathf95.py new file mode 100644 index 0000000..0768cb1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/pathf95.py @@ -0,0 +1,33 @@ +from numpy.distutils.fcompiler import FCompiler + +compilers = ['PathScaleFCompiler'] + +class PathScaleFCompiler(FCompiler): + + compiler_type = 'pathf95' + description = 'PathScale Fortran Compiler' + version_pattern = r'PathScale\(TM\) Compiler Suite: Version (?P[\d.]+)' + + executables = { + 'version_cmd' : ["pathf95", "-version"], + 'compiler_f77' : ["pathf95", "-fixedform"], + 'compiler_fix' : ["pathf95", "-fixedform"], + 'compiler_f90' : ["pathf95"], + 'linker_so' : ["pathf95", "-shared"], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + pic_flags = ['-fPIC'] + module_dir_switch = '-module ' # Don't remove ending space! + module_include_switch = '-I' + + def get_flags_opt(self): + return ['-O3'] + def get_flags_debug(self): + return ['-g'] + +if __name__ == '__main__': + from distutils import log + log.set_verbosity(2) + from numpy.distutils import customized_fcompiler + print(customized_fcompiler(compiler='pathf95').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/pg.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/pg.py new file mode 100644 index 0000000..72442c4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/pg.py @@ -0,0 +1,128 @@ +# http://www.pgroup.com +import sys + +from numpy.distutils.fcompiler import FCompiler +from sys import platform +from os.path import join, dirname, normpath + +compilers = ['PGroupFCompiler', 'PGroupFlangCompiler'] + + +class PGroupFCompiler(FCompiler): + + compiler_type = 'pg' + description = 'Portland Group Fortran Compiler' + version_pattern = r'\s*pg(f77|f90|hpf|fortran) (?P[\d.-]+).*' + + if platform == 'darwin': + executables = { + 'version_cmd': ["", "-V"], + 'compiler_f77': ["pgfortran", "-dynamiclib"], + 'compiler_fix': ["pgfortran", "-Mfixed", "-dynamiclib"], + 'compiler_f90': ["pgfortran", "-dynamiclib"], + 'linker_so': ["libtool"], + 'archiver': ["ar", "-cr"], + 'ranlib': ["ranlib"] + } + pic_flags = [''] + else: + executables = { + 'version_cmd': ["", "-V"], + 'compiler_f77': ["pgfortran"], + 'compiler_fix': ["pgfortran", "-Mfixed"], + 'compiler_f90': ["pgfortran"], + 'linker_so': [""], + 'archiver': ["ar", "-cr"], + 'ranlib': ["ranlib"] + } + pic_flags = ['-fpic'] + + module_dir_switch = '-module ' + module_include_switch = '-I' + + def get_flags(self): + opt = ['-Minform=inform', '-Mnosecond_underscore'] + return self.pic_flags + opt + + def get_flags_opt(self): + return ['-fast'] + + def get_flags_debug(self): + return ['-g'] + + if platform == 'darwin': + def get_flags_linker_so(self): + return ["-dynamic", '-undefined', 'dynamic_lookup'] + + else: + def get_flags_linker_so(self): + return ["-shared", '-fpic'] + + def runtime_library_dir_option(self, dir): + return '-R%s' % dir + + +import functools + +class PGroupFlangCompiler(FCompiler): + compiler_type = 'flang' + description = 'Portland Group Fortran LLVM Compiler' + version_pattern = r'\s*(flang|clang) version (?P[\d.-]+).*' + + ar_exe = 'lib.exe' + possible_executables = ['flang'] + + executables = { + 'version_cmd': ["", "--version"], + 'compiler_f77': ["flang"], + 'compiler_fix': ["flang"], + 'compiler_f90': ["flang"], + 'linker_so': [None], + 'archiver': [ar_exe, "/verbose", "/OUT:"], + 'ranlib': None + } + + library_switch = '/OUT:' # No space after /OUT:! + module_dir_switch = '-module ' # Don't remove ending space! + + def get_libraries(self): + opt = FCompiler.get_libraries(self) + opt.extend(['flang', 'flangrti', 'ompstub']) + return opt + + @functools.lru_cache(maxsize=128) + def get_library_dirs(self): + """List of compiler library directories.""" + opt = FCompiler.get_library_dirs(self) + flang_dir = dirname(self.executables['compiler_f77'][0]) + opt.append(normpath(join(flang_dir, '..', 'lib'))) + + return opt + + def get_flags(self): + return [] + + def get_flags_free(self): + return [] + + def get_flags_debug(self): + return ['-g'] + + def get_flags_opt(self): + return ['-O3'] + + def get_flags_arch(self): + return [] + + def runtime_library_dir_option(self, dir): + raise NotImplementedError + + +if __name__ == '__main__': + from distutils import log + log.set_verbosity(2) + from numpy.distutils import customized_fcompiler + if 'flang' in sys.argv: + print(customized_fcompiler(compiler='flang').get_version()) + else: + print(customized_fcompiler(compiler='pg').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/sun.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/sun.py new file mode 100644 index 0000000..d039f0b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/sun.py @@ -0,0 +1,51 @@ +from numpy.distutils.ccompiler import simple_version_match +from numpy.distutils.fcompiler import FCompiler + +compilers = ['SunFCompiler'] + +class SunFCompiler(FCompiler): + + compiler_type = 'sun' + description = 'Sun or Forte Fortran 95 Compiler' + # ex: + # f90: Sun WorkShop 6 update 2 Fortran 95 6.2 Patch 111690-10 2003/08/28 + version_match = simple_version_match( + start=r'f9[05]: (Sun|Forte|WorkShop).*Fortran 95') + + executables = { + 'version_cmd' : ["", "-V"], + 'compiler_f77' : ["f90"], + 'compiler_fix' : ["f90", "-fixed"], + 'compiler_f90' : ["f90"], + 'linker_so' : ["", "-Bdynamic", "-G"], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + module_dir_switch = '-moddir=' + module_include_switch = '-M' + pic_flags = ['-xcode=pic32'] + + def get_flags_f77(self): + ret = ["-ftrap=%none"] + if (self.get_version() or '') >= '7': + ret.append("-f77") + else: + ret.append("-fixed") + return ret + def get_opt(self): + return ['-fast', '-dalign'] + def get_arch(self): + return ['-xtarget=generic'] + def get_libraries(self): + opt = [] + opt.extend(['fsu', 'sunmath', 'mvec']) + return opt + + def runtime_library_dir_option(self, dir): + return '-R%s' % dir + +if __name__ == '__main__': + from distutils import log + log.set_verbosity(2) + from numpy.distutils import customized_fcompiler + print(customized_fcompiler(compiler='sun').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/vast.py b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/vast.py new file mode 100644 index 0000000..92a1647 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/fcompiler/vast.py @@ -0,0 +1,52 @@ +import os + +from numpy.distutils.fcompiler.gnu import GnuFCompiler + +compilers = ['VastFCompiler'] + +class VastFCompiler(GnuFCompiler): + compiler_type = 'vast' + compiler_aliases = () + description = 'Pacific-Sierra Research Fortran 90 Compiler' + version_pattern = (r'\s*Pacific-Sierra Research vf90 ' + r'(Personal|Professional)\s+(?P[^\s]*)') + + # VAST f90 does not support -o with -c. So, object files are created + # to the current directory and then moved to build directory + object_switch = ' && function _mvfile { mv -v `basename $1` $1 ; } && _mvfile ' + + executables = { + 'version_cmd' : ["vf90", "-v"], + 'compiler_f77' : ["g77"], + 'compiler_fix' : ["f90", "-Wv,-ya"], + 'compiler_f90' : ["f90"], + 'linker_so' : [""], + 'archiver' : ["ar", "-cr"], + 'ranlib' : ["ranlib"] + } + module_dir_switch = None #XXX Fix me + module_include_switch = None #XXX Fix me + + def find_executables(self): + pass + + def get_version_cmd(self): + f90 = self.compiler_f90[0] + d, b = os.path.split(f90) + vf90 = os.path.join(d, 'v'+b) + return vf90 + + def get_flags_arch(self): + vast_version = self.get_version() + gnu = GnuFCompiler() + gnu.customize(None) + self.version = gnu.get_version() + opt = GnuFCompiler.get_flags_arch(self) + self.version = vast_version + return opt + +if __name__ == '__main__': + from distutils import log + log.set_verbosity(2) + from numpy.distutils import customized_fcompiler + print(customized_fcompiler(compiler='vast').get_version()) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/from_template.py b/.local/lib/python3.8/site-packages/numpy/distutils/from_template.py new file mode 100644 index 0000000..90d1f4c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/from_template.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 +""" + +process_file(filename) + + takes templated file .xxx.src and produces .xxx file where .xxx + is .pyf .f90 or .f using the following template rules: + + '<..>' denotes a template. + + All function and subroutine blocks in a source file with names that + contain '<..>' will be replicated according to the rules in '<..>'. + + The number of comma-separated words in '<..>' will determine the number of + replicates. + + '<..>' may have two different forms, named and short. For example, + + named: + where anywhere inside a block '

' will be replaced with + 'd', 's', 'z', and 'c' for each replicate of the block. + + <_c> is already defined: <_c=s,d,c,z> + <_t> is already defined: <_t=real,double precision,complex,double complex> + + short: + , a short form of the named, useful when no

appears inside + a block. + + In general, '<..>' contains a comma separated list of arbitrary + expressions. If these expression must contain a comma|leftarrow|rightarrow, + then prepend the comma|leftarrow|rightarrow with a backslash. + + If an expression matches '\\' then it will be replaced + by -th expression. + + Note that all '<..>' forms in a block must have the same number of + comma-separated entries. + + Predefined named template rules: + + + + + + +""" +__all__ = ['process_str', 'process_file'] + +import os +import sys +import re + +routine_start_re = re.compile(r'(\n|\A)(( (\$|\*))|)\s*(subroutine|function)\b', re.I) +routine_end_re = re.compile(r'\n\s*end\s*(subroutine|function)\b.*(\n|\Z)', re.I) +function_start_re = re.compile(r'\n (\$|\*)\s*function\b', re.I) + +def parse_structure(astr): + """ Return a list of tuples for each function or subroutine each + tuple is the start and end of a subroutine or function to be + expanded. + """ + + spanlist = [] + ind = 0 + while True: + m = routine_start_re.search(astr, ind) + if m is None: + break + start = m.start() + if function_start_re.match(astr, start, m.end()): + while True: + i = astr.rfind('\n', ind, start) + if i==-1: + break + start = i + if astr[i:i+7]!='\n $': + break + start += 1 + m = routine_end_re.search(astr, m.end()) + ind = end = m and m.end()-1 or len(astr) + spanlist.append((start, end)) + return spanlist + +template_re = re.compile(r"<\s*(\w[\w\d]*)\s*>") +named_re = re.compile(r"<\s*(\w[\w\d]*)\s*=\s*(.*?)\s*>") +list_re = re.compile(r"<\s*((.*?))\s*>") + +def find_repl_patterns(astr): + reps = named_re.findall(astr) + names = {} + for rep in reps: + name = rep[0].strip() or unique_key(names) + repl = rep[1].replace(r'\,', '@comma@') + thelist = conv(repl) + names[name] = thelist + return names + +def find_and_remove_repl_patterns(astr): + names = find_repl_patterns(astr) + astr = re.subn(named_re, '', astr)[0] + return astr, names + +item_re = re.compile(r"\A\\(?P\d+)\Z") +def conv(astr): + b = astr.split(',') + l = [x.strip() for x in b] + for i in range(len(l)): + m = item_re.match(l[i]) + if m: + j = int(m.group('index')) + l[i] = l[j] + return ','.join(l) + +def unique_key(adict): + """ Obtain a unique key given a dictionary.""" + allkeys = list(adict.keys()) + done = False + n = 1 + while not done: + newkey = '__l%s' % (n) + if newkey in allkeys: + n += 1 + else: + done = True + return newkey + + +template_name_re = re.compile(r'\A\s*(\w[\w\d]*)\s*\Z') +def expand_sub(substr, names): + substr = substr.replace(r'\>', '@rightarrow@') + substr = substr.replace(r'\<', '@leftarrow@') + lnames = find_repl_patterns(substr) + substr = named_re.sub(r"<\1>", substr) # get rid of definition templates + + def listrepl(mobj): + thelist = conv(mobj.group(1).replace(r'\,', '@comma@')) + if template_name_re.match(thelist): + return "<%s>" % (thelist) + name = None + for key in lnames.keys(): # see if list is already in dictionary + if lnames[key] == thelist: + name = key + if name is None: # this list is not in the dictionary yet + name = unique_key(lnames) + lnames[name] = thelist + return "<%s>" % name + + substr = list_re.sub(listrepl, substr) # convert all lists to named templates + # newnames are constructed as needed + + numsubs = None + base_rule = None + rules = {} + for r in template_re.findall(substr): + if r not in rules: + thelist = lnames.get(r, names.get(r, None)) + if thelist is None: + raise ValueError('No replicates found for <%s>' % (r)) + if r not in names and not thelist.startswith('_'): + names[r] = thelist + rule = [i.replace('@comma@', ',') for i in thelist.split(',')] + num = len(rule) + + if numsubs is None: + numsubs = num + rules[r] = rule + base_rule = r + elif num == numsubs: + rules[r] = rule + else: + print("Mismatch in number of replacements (base <%s=%s>)" + " for <%s=%s>. Ignoring." % + (base_rule, ','.join(rules[base_rule]), r, thelist)) + if not rules: + return substr + + def namerepl(mobj): + name = mobj.group(1) + return rules.get(name, (k+1)*[name])[k] + + newstr = '' + for k in range(numsubs): + newstr += template_re.sub(namerepl, substr) + '\n\n' + + newstr = newstr.replace('@rightarrow@', '>') + newstr = newstr.replace('@leftarrow@', '<') + return newstr + +def process_str(allstr): + newstr = allstr + writestr = '' + + struct = parse_structure(newstr) + + oldend = 0 + names = {} + names.update(_special_names) + for sub in struct: + cleanedstr, defs = find_and_remove_repl_patterns(newstr[oldend:sub[0]]) + writestr += cleanedstr + names.update(defs) + writestr += expand_sub(newstr[sub[0]:sub[1]], names) + oldend = sub[1] + writestr += newstr[oldend:] + + return writestr + +include_src_re = re.compile(r"(\n|\A)\s*include\s*['\"](?P[\w\d./\\]+\.src)['\"]", re.I) + +def resolve_includes(source): + d = os.path.dirname(source) + with open(source) as fid: + lines = [] + for line in fid: + m = include_src_re.match(line) + if m: + fn = m.group('name') + if not os.path.isabs(fn): + fn = os.path.join(d, fn) + if os.path.isfile(fn): + lines.extend(resolve_includes(fn)) + else: + lines.append(line) + else: + lines.append(line) + return lines + +def process_file(source): + lines = resolve_includes(source) + return process_str(''.join(lines)) + +_special_names = find_repl_patterns(''' +<_c=s,d,c,z> +<_t=real,double precision,complex,double complex> + + + + + +''') + +def main(): + try: + file = sys.argv[1] + except IndexError: + fid = sys.stdin + outfile = sys.stdout + else: + fid = open(file, 'r') + (base, ext) = os.path.splitext(file) + newname = base + outfile = open(newname, 'w') + + allstr = fid.read() + writestr = process_str(allstr) + outfile.write(writestr) + + +if __name__ == "__main__": + main() diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/intelccompiler.py b/.local/lib/python3.8/site-packages/numpy/distutils/intelccompiler.py new file mode 100644 index 0000000..0fa1c11 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/intelccompiler.py @@ -0,0 +1,111 @@ +import platform + +from distutils.unixccompiler import UnixCCompiler +from numpy.distutils.exec_command import find_executable +from numpy.distutils.ccompiler import simple_version_match +if platform.system() == 'Windows': + from numpy.distutils.msvc9compiler import MSVCCompiler + + +class IntelCCompiler(UnixCCompiler): + """A modified Intel compiler compatible with a GCC-built Python.""" + compiler_type = 'intel' + cc_exe = 'icc' + cc_args = 'fPIC' + + def __init__(self, verbose=0, dry_run=0, force=0): + UnixCCompiler.__init__(self, verbose, dry_run, force) + + v = self.get_version() + mpopt = 'openmp' if v and v < '15' else 'qopenmp' + self.cc_exe = ('icc -fPIC -fp-model strict -O3 ' + '-fomit-frame-pointer -{}').format(mpopt) + compiler = self.cc_exe + + if platform.system() == 'Darwin': + shared_flag = '-Wl,-undefined,dynamic_lookup' + else: + shared_flag = '-shared' + self.set_executables(compiler=compiler, + compiler_so=compiler, + compiler_cxx=compiler, + archiver='xiar' + ' cru', + linker_exe=compiler + ' -shared-intel', + linker_so=compiler + ' ' + shared_flag + + ' -shared-intel') + + +class IntelItaniumCCompiler(IntelCCompiler): + compiler_type = 'intele' + + # On Itanium, the Intel Compiler used to be called ecc, let's search for + # it (now it's also icc, so ecc is last in the search). + for cc_exe in map(find_executable, ['icc', 'ecc']): + if cc_exe: + break + + +class IntelEM64TCCompiler(UnixCCompiler): + """ + A modified Intel x86_64 compiler compatible with a 64bit GCC-built Python. + """ + compiler_type = 'intelem' + cc_exe = 'icc -m64' + cc_args = '-fPIC' + + def __init__(self, verbose=0, dry_run=0, force=0): + UnixCCompiler.__init__(self, verbose, dry_run, force) + + v = self.get_version() + mpopt = 'openmp' if v and v < '15' else 'qopenmp' + self.cc_exe = ('icc -std=c99 -m64 -fPIC -fp-model strict -O3 ' + '-fomit-frame-pointer -{}').format(mpopt) + compiler = self.cc_exe + + if platform.system() == 'Darwin': + shared_flag = '-Wl,-undefined,dynamic_lookup' + else: + shared_flag = '-shared' + self.set_executables(compiler=compiler, + compiler_so=compiler, + compiler_cxx=compiler, + archiver='xiar' + ' cru', + linker_exe=compiler + ' -shared-intel', + linker_so=compiler + ' ' + shared_flag + + ' -shared-intel') + + +if platform.system() == 'Windows': + class IntelCCompilerW(MSVCCompiler): + """ + A modified Intel compiler compatible with an MSVC-built Python. + """ + compiler_type = 'intelw' + compiler_cxx = 'icl' + + def __init__(self, verbose=0, dry_run=0, force=0): + MSVCCompiler.__init__(self, verbose, dry_run, force) + version_match = simple_version_match(start=r'Intel\(R\).*?32,') + self.__version = version_match + + def initialize(self, plat_name=None): + MSVCCompiler.initialize(self, plat_name) + self.cc = self.find_exe('icl.exe') + self.lib = self.find_exe('xilib') + self.linker = self.find_exe('xilink') + self.compile_options = ['/nologo', '/O3', '/MD', '/W3', + '/Qstd=c99'] + self.compile_options_debug = ['/nologo', '/Od', '/MDd', '/W3', + '/Qstd=c99', '/Z7', '/D_DEBUG'] + + class IntelEM64TCCompilerW(IntelCCompilerW): + """ + A modified Intel x86_64 compiler compatible with + a 64bit MSVC-built Python. + """ + compiler_type = 'intelemw' + + def __init__(self, verbose=0, dry_run=0, force=0): + MSVCCompiler.__init__(self, verbose, dry_run, force) + version_match = simple_version_match(start=r'Intel\(R\).*?64,') + self.__version = version_match diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/lib2def.py b/.local/lib/python3.8/site-packages/numpy/distutils/lib2def.py new file mode 100644 index 0000000..820ed71 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/lib2def.py @@ -0,0 +1,116 @@ +import re +import sys +import subprocess + +__doc__ = """This module generates a DEF file from the symbols in +an MSVC-compiled DLL import library. It correctly discriminates between +data and functions. The data is collected from the output of the program +nm(1). + +Usage: + python lib2def.py [libname.lib] [output.def] +or + python lib2def.py [libname.lib] > output.def + +libname.lib defaults to python.lib and output.def defaults to stdout + +Author: Robert Kern +Last Update: April 30, 1999 +""" + +__version__ = '0.1a' + +py_ver = "%d%d" % tuple(sys.version_info[:2]) + +DEFAULT_NM = ['nm', '-Cs'] + +DEF_HEADER = """LIBRARY python%s.dll +;CODE PRELOAD MOVEABLE DISCARDABLE +;DATA PRELOAD SINGLE + +EXPORTS +""" % py_ver +# the header of the DEF file + +FUNC_RE = re.compile(r"^(.*) in python%s\.dll" % py_ver, re.MULTILINE) +DATA_RE = re.compile(r"^_imp__(.*) in python%s\.dll" % py_ver, re.MULTILINE) + +def parse_cmd(): + """Parses the command-line arguments. + +libfile, deffile = parse_cmd()""" + if len(sys.argv) == 3: + if sys.argv[1][-4:] == '.lib' and sys.argv[2][-4:] == '.def': + libfile, deffile = sys.argv[1:] + elif sys.argv[1][-4:] == '.def' and sys.argv[2][-4:] == '.lib': + deffile, libfile = sys.argv[1:] + else: + print("I'm assuming that your first argument is the library") + print("and the second is the DEF file.") + elif len(sys.argv) == 2: + if sys.argv[1][-4:] == '.def': + deffile = sys.argv[1] + libfile = 'python%s.lib' % py_ver + elif sys.argv[1][-4:] == '.lib': + deffile = None + libfile = sys.argv[1] + else: + libfile = 'python%s.lib' % py_ver + deffile = None + return libfile, deffile + +def getnm(nm_cmd=['nm', '-Cs', 'python%s.lib' % py_ver], shell=True): + """Returns the output of nm_cmd via a pipe. + +nm_output = getnm(nm_cmd = 'nm -Cs py_lib')""" + p = subprocess.Popen(nm_cmd, shell=shell, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, universal_newlines=True) + nm_output, nm_err = p.communicate() + if p.returncode != 0: + raise RuntimeError('failed to run "%s": "%s"' % ( + ' '.join(nm_cmd), nm_err)) + return nm_output + +def parse_nm(nm_output): + """Returns a tuple of lists: dlist for the list of data +symbols and flist for the list of function symbols. + +dlist, flist = parse_nm(nm_output)""" + data = DATA_RE.findall(nm_output) + func = FUNC_RE.findall(nm_output) + + flist = [] + for sym in data: + if sym in func and (sym[:2] == 'Py' or sym[:3] == '_Py' or sym[:4] == 'init'): + flist.append(sym) + + dlist = [] + for sym in data: + if sym not in flist and (sym[:2] == 'Py' or sym[:3] == '_Py'): + dlist.append(sym) + + dlist.sort() + flist.sort() + return dlist, flist + +def output_def(dlist, flist, header, file = sys.stdout): + """Outputs the final DEF file to a file defaulting to stdout. + +output_def(dlist, flist, header, file = sys.stdout)""" + for data_sym in dlist: + header = header + '\t%s DATA\n' % data_sym + header = header + '\n' # blank line + for func_sym in flist: + header = header + '\t%s\n' % func_sym + file.write(header) + +if __name__ == '__main__': + libfile, deffile = parse_cmd() + if deffile is None: + deffile = sys.stdout + else: + deffile = open(deffile, 'w') + nm_cmd = DEFAULT_NM + [str(libfile)] + nm_output = getnm(nm_cmd, shell=False) + dlist, flist = parse_nm(nm_output) + output_def(dlist, flist, DEF_HEADER, deffile) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/line_endings.py b/.local/lib/python3.8/site-packages/numpy/distutils/line_endings.py new file mode 100644 index 0000000..686e5eb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/line_endings.py @@ -0,0 +1,77 @@ +""" Functions for converting from DOS to UNIX line endings + +""" +import os +import re +import sys + + +def dos2unix(file): + "Replace CRLF with LF in argument files. Print names of changed files." + if os.path.isdir(file): + print(file, "Directory!") + return + + with open(file, "rb") as fp: + data = fp.read() + if '\0' in data: + print(file, "Binary!") + return + + newdata = re.sub("\r\n", "\n", data) + if newdata != data: + print('dos2unix:', file) + with open(file, "wb") as f: + f.write(newdata) + return file + else: + print(file, 'ok') + +def dos2unix_one_dir(modified_files, dir_name, file_names): + for file in file_names: + full_path = os.path.join(dir_name, file) + file = dos2unix(full_path) + if file is not None: + modified_files.append(file) + +def dos2unix_dir(dir_name): + modified_files = [] + os.path.walk(dir_name, dos2unix_one_dir, modified_files) + return modified_files +#---------------------------------- + +def unix2dos(file): + "Replace LF with CRLF in argument files. Print names of changed files." + if os.path.isdir(file): + print(file, "Directory!") + return + + with open(file, "rb") as fp: + data = fp.read() + if '\0' in data: + print(file, "Binary!") + return + newdata = re.sub("\r\n", "\n", data) + newdata = re.sub("\n", "\r\n", newdata) + if newdata != data: + print('unix2dos:', file) + with open(file, "wb") as f: + f.write(newdata) + return file + else: + print(file, 'ok') + +def unix2dos_one_dir(modified_files, dir_name, file_names): + for file in file_names: + full_path = os.path.join(dir_name, file) + unix2dos(full_path) + if file is not None: + modified_files.append(file) + +def unix2dos_dir(dir_name): + modified_files = [] + os.path.walk(dir_name, unix2dos_one_dir, modified_files) + return modified_files + +if __name__ == "__main__": + dos2unix_dir(sys.argv[1]) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/log.py b/.local/lib/python3.8/site-packages/numpy/distutils/log.py new file mode 100644 index 0000000..3347f56 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/log.py @@ -0,0 +1,111 @@ +# Colored log +import sys +from distutils.log import * # noqa: F403 +from distutils.log import Log as old_Log +from distutils.log import _global_log + +from numpy.distutils.misc_util import (red_text, default_text, cyan_text, + green_text, is_sequence, is_string) + + +def _fix_args(args,flag=1): + if is_string(args): + return args.replace('%', '%%') + if flag and is_sequence(args): + return tuple([_fix_args(a, flag=0) for a in args]) + return args + + +class Log(old_Log): + def _log(self, level, msg, args): + if level >= self.threshold: + if args: + msg = msg % _fix_args(args) + if 0: + if msg.startswith('copying ') and msg.find(' -> ') != -1: + return + if msg.startswith('byte-compiling '): + return + print(_global_color_map[level](msg)) + sys.stdout.flush() + + def good(self, msg, *args): + """ + If we log WARN messages, log this message as a 'nice' anti-warn + message. + + """ + if WARN >= self.threshold: + if args: + print(green_text(msg % _fix_args(args))) + else: + print(green_text(msg)) + sys.stdout.flush() + + +_global_log.__class__ = Log + +good = _global_log.good + +def set_threshold(level, force=False): + prev_level = _global_log.threshold + if prev_level > DEBUG or force: + # If we're running at DEBUG, don't change the threshold, as there's + # likely a good reason why we're running at this level. + _global_log.threshold = level + if level <= DEBUG: + info('set_threshold: setting threshold to DEBUG level,' + ' it can be changed only with force argument') + else: + info('set_threshold: not changing threshold from DEBUG level' + ' %s to %s' % (prev_level, level)) + return prev_level + +def get_threshold(): + return _global_log.threshold + +def set_verbosity(v, force=False): + prev_level = _global_log.threshold + if v < 0: + set_threshold(ERROR, force) + elif v == 0: + set_threshold(WARN, force) + elif v == 1: + set_threshold(INFO, force) + elif v >= 2: + set_threshold(DEBUG, force) + return {FATAL:-2,ERROR:-1,WARN:0,INFO:1,DEBUG:2}.get(prev_level, 1) + + +_global_color_map = { + DEBUG:cyan_text, + INFO:default_text, + WARN:red_text, + ERROR:red_text, + FATAL:red_text +} + +# don't use INFO,.. flags in set_verbosity, these flags are for set_threshold. +set_verbosity(0, force=True) + + +_error = error +_warn = warn +_info = info +_debug = debug + + +def error(msg, *a, **kw): + _error(f"ERROR: {msg}", *a, **kw) + + +def warn(msg, *a, **kw): + _warn(f"WARN: {msg}", *a, **kw) + + +def info(msg, *a, **kw): + _info(f"INFO: {msg}", *a, **kw) + + +def debug(msg, *a, **kw): + _debug(f"DEBUG: {msg}", *a, **kw) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/mingw/gfortran_vs2003_hack.c b/.local/lib/python3.8/site-packages/numpy/distutils/mingw/gfortran_vs2003_hack.c new file mode 100644 index 0000000..485a675 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/mingw/gfortran_vs2003_hack.c @@ -0,0 +1,6 @@ +int _get_output_format(void) +{ + return 0; +} + +int _imp____lc_codepage = 0; diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/mingw32ccompiler.py b/.local/lib/python3.8/site-packages/numpy/distutils/mingw32ccompiler.py new file mode 100644 index 0000000..fbe3655 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/mingw32ccompiler.py @@ -0,0 +1,598 @@ +""" +Support code for building Python extensions on Windows. + + # NT stuff + # 1. Make sure libpython.a exists for gcc. If not, build it. + # 2. Force windows to use gcc (we're struggling with MSVC and g77 support) + # 3. Force windows to use g77 + +""" +import os +import platform +import sys +import subprocess +import re +import textwrap + +# Overwrite certain distutils.ccompiler functions: +import numpy.distutils.ccompiler # noqa: F401 +from numpy.distutils import log +# NT stuff +# 1. Make sure libpython.a exists for gcc. If not, build it. +# 2. Force windows to use gcc (we're struggling with MSVC and g77 support) +# --> this is done in numpy/distutils/ccompiler.py +# 3. Force windows to use g77 + +import distutils.cygwinccompiler +from distutils.unixccompiler import UnixCCompiler +from distutils.msvccompiler import get_build_version as get_build_msvc_version +from distutils.errors import UnknownFileError +from numpy.distutils.misc_util import (msvc_runtime_library, + msvc_runtime_version, + msvc_runtime_major, + get_build_architecture) + +def get_msvcr_replacement(): + """Replacement for outdated version of get_msvcr from cygwinccompiler""" + msvcr = msvc_runtime_library() + return [] if msvcr is None else [msvcr] + +# monkey-patch cygwinccompiler with our updated version from misc_util +# to avoid getting an exception raised on Python 3.5 +distutils.cygwinccompiler.get_msvcr = get_msvcr_replacement + +# Useful to generate table of symbols from a dll +_START = re.compile(r'\[Ordinal/Name Pointer\] Table') +_TABLE = re.compile(r'^\s+\[([\s*[0-9]*)\] ([a-zA-Z0-9_]*)') + +# the same as cygwin plus some additional parameters +class Mingw32CCompiler(distutils.cygwinccompiler.CygwinCCompiler): + """ A modified MingW32 compiler compatible with an MSVC built Python. + + """ + + compiler_type = 'mingw32' + + def __init__ (self, + verbose=0, + dry_run=0, + force=0): + + distutils.cygwinccompiler.CygwinCCompiler.__init__ (self, verbose, + dry_run, force) + + # **changes: eric jones 4/11/01 + # 1. Check for import library on Windows. Build if it doesn't exist. + + build_import_library() + + # Check for custom msvc runtime library on Windows. Build if it doesn't exist. + msvcr_success = build_msvcr_library() + msvcr_dbg_success = build_msvcr_library(debug=True) + if msvcr_success or msvcr_dbg_success: + # add preprocessor statement for using customized msvcr lib + self.define_macro('NPY_MINGW_USE_CUSTOM_MSVCR') + + # Define the MSVC version as hint for MinGW + msvcr_version = msvc_runtime_version() + if msvcr_version: + self.define_macro('__MSVCRT_VERSION__', '0x%04i' % msvcr_version) + + # MS_WIN64 should be defined when building for amd64 on windows, + # but python headers define it only for MS compilers, which has all + # kind of bad consequences, like using Py_ModuleInit4 instead of + # Py_ModuleInit4_64, etc... So we add it here + if get_build_architecture() == 'AMD64': + self.set_executables( + compiler='gcc -g -DDEBUG -DMS_WIN64 -O0 -Wall', + compiler_so='gcc -g -DDEBUG -DMS_WIN64 -O0 -Wall ' + '-Wstrict-prototypes', + linker_exe='gcc -g', + linker_so='gcc -g -shared') + else: + self.set_executables( + compiler='gcc -O2 -Wall', + compiler_so='gcc -O2 -Wall -Wstrict-prototypes', + linker_exe='g++ ', + linker_so='g++ -shared') + # added for python2.3 support + # we can't pass it through set_executables because pre 2.2 would fail + self.compiler_cxx = ['g++'] + + # Maybe we should also append -mthreads, but then the finished dlls + # need another dll (mingwm10.dll see Mingw32 docs) (-mthreads: Support + # thread-safe exception handling on `Mingw32') + + # no additional libraries needed + #self.dll_libraries=[] + return + + # __init__ () + + def link(self, + target_desc, + objects, + output_filename, + output_dir, + libraries, + library_dirs, + runtime_library_dirs, + export_symbols = None, + debug=0, + extra_preargs=None, + extra_postargs=None, + build_temp=None, + target_lang=None): + # Include the appropriate MSVC runtime library if Python was built + # with MSVC >= 7.0 (MinGW standard is msvcrt) + runtime_library = msvc_runtime_library() + if runtime_library: + if not libraries: + libraries = [] + libraries.append(runtime_library) + args = (self, + target_desc, + objects, + output_filename, + output_dir, + libraries, + library_dirs, + runtime_library_dirs, + None, #export_symbols, we do this in our def-file + debug, + extra_preargs, + extra_postargs, + build_temp, + target_lang) + func = UnixCCompiler.link + func(*args[:func.__code__.co_argcount]) + return + + def object_filenames (self, + source_filenames, + strip_dir=0, + output_dir=''): + if output_dir is None: output_dir = '' + obj_names = [] + for src_name in source_filenames: + # use normcase to make sure '.rc' is really '.rc' and not '.RC' + (base, ext) = os.path.splitext (os.path.normcase(src_name)) + + # added these lines to strip off windows drive letters + # without it, .o files are placed next to .c files + # instead of the build directory + drv, base = os.path.splitdrive(base) + if drv: + base = base[1:] + + if ext not in (self.src_extensions + ['.rc', '.res']): + raise UnknownFileError( + "unknown file type '%s' (from '%s')" % \ + (ext, src_name)) + if strip_dir: + base = os.path.basename (base) + if ext == '.res' or ext == '.rc': + # these need to be compiled to object files + obj_names.append (os.path.join (output_dir, + base + ext + self.obj_extension)) + else: + obj_names.append (os.path.join (output_dir, + base + self.obj_extension)) + return obj_names + + # object_filenames () + + +def find_python_dll(): + # We can't do much here: + # - find it in the virtualenv (sys.prefix) + # - find it in python main dir (sys.base_prefix, if in a virtualenv) + # - sys.real_prefix is main dir for virtualenvs in Python 2.7 + # - in system32, + # - ortherwise (Sxs), I don't know how to get it. + stems = [sys.prefix] + if hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix: + stems.append(sys.base_prefix) + elif hasattr(sys, 'real_prefix') and sys.real_prefix != sys.prefix: + stems.append(sys.real_prefix) + + sub_dirs = ['', 'lib', 'bin'] + # generate possible combinations of directory trees and sub-directories + lib_dirs = [] + for stem in stems: + for folder in sub_dirs: + lib_dirs.append(os.path.join(stem, folder)) + + # add system directory as well + if 'SYSTEMROOT' in os.environ: + lib_dirs.append(os.path.join(os.environ['SYSTEMROOT'], 'System32')) + + # search in the file system for possible candidates + major_version, minor_version = tuple(sys.version_info[:2]) + implementation = platform.python_implementation() + if implementation == 'CPython': + dllname = f'python{major_version}{minor_version}.dll' + elif implementation == 'PyPy': + dllname = f'libpypy{major_version}-c.dll' + else: + dllname = 'Unknown platform {implementation}' + print("Looking for %s" % dllname) + for folder in lib_dirs: + dll = os.path.join(folder, dllname) + if os.path.exists(dll): + return dll + + raise ValueError("%s not found in %s" % (dllname, lib_dirs)) + +def dump_table(dll): + st = subprocess.check_output(["objdump.exe", "-p", dll]) + return st.split(b'\n') + +def generate_def(dll, dfile): + """Given a dll file location, get all its exported symbols and dump them + into the given def file. + + The .def file will be overwritten""" + dump = dump_table(dll) + for i in range(len(dump)): + if _START.match(dump[i].decode()): + break + else: + raise ValueError("Symbol table not found") + + syms = [] + for j in range(i+1, len(dump)): + m = _TABLE.match(dump[j].decode()) + if m: + syms.append((int(m.group(1).strip()), m.group(2))) + else: + break + + if len(syms) == 0: + log.warn('No symbols found in %s' % dll) + + with open(dfile, 'w') as d: + d.write('LIBRARY %s\n' % os.path.basename(dll)) + d.write(';CODE PRELOAD MOVEABLE DISCARDABLE\n') + d.write(';DATA PRELOAD SINGLE\n') + d.write('\nEXPORTS\n') + for s in syms: + #d.write('@%d %s\n' % (s[0], s[1])) + d.write('%s\n' % s[1]) + +def find_dll(dll_name): + + arch = {'AMD64' : 'amd64', + 'Intel' : 'x86'}[get_build_architecture()] + + def _find_dll_in_winsxs(dll_name): + # Walk through the WinSxS directory to find the dll. + winsxs_path = os.path.join(os.environ.get('WINDIR', r'C:\WINDOWS'), + 'winsxs') + if not os.path.exists(winsxs_path): + return None + for root, dirs, files in os.walk(winsxs_path): + if dll_name in files and arch in root: + return os.path.join(root, dll_name) + return None + + def _find_dll_in_path(dll_name): + # First, look in the Python directory, then scan PATH for + # the given dll name. + for path in [sys.prefix] + os.environ['PATH'].split(';'): + filepath = os.path.join(path, dll_name) + if os.path.exists(filepath): + return os.path.abspath(filepath) + + return _find_dll_in_winsxs(dll_name) or _find_dll_in_path(dll_name) + +def build_msvcr_library(debug=False): + if os.name != 'nt': + return False + + # If the version number is None, then we couldn't find the MSVC runtime at + # all, because we are running on a Python distribution which is customed + # compiled; trust that the compiler is the same as the one available to us + # now, and that it is capable of linking with the correct runtime without + # any extra options. + msvcr_ver = msvc_runtime_major() + if msvcr_ver is None: + log.debug('Skip building import library: ' + 'Runtime is not compiled with MSVC') + return False + + # Skip using a custom library for versions < MSVC 8.0 + if msvcr_ver < 80: + log.debug('Skip building msvcr library:' + ' custom functionality not present') + return False + + msvcr_name = msvc_runtime_library() + if debug: + msvcr_name += 'd' + + # Skip if custom library already exists + out_name = "lib%s.a" % msvcr_name + out_file = os.path.join(sys.prefix, 'libs', out_name) + if os.path.isfile(out_file): + log.debug('Skip building msvcr library: "%s" exists' % + (out_file,)) + return True + + # Find the msvcr dll + msvcr_dll_name = msvcr_name + '.dll' + dll_file = find_dll(msvcr_dll_name) + if not dll_file: + log.warn('Cannot build msvcr library: "%s" not found' % + msvcr_dll_name) + return False + + def_name = "lib%s.def" % msvcr_name + def_file = os.path.join(sys.prefix, 'libs', def_name) + + log.info('Building msvcr library: "%s" (from %s)' \ + % (out_file, dll_file)) + + # Generate a symbol definition file from the msvcr dll + generate_def(dll_file, def_file) + + # Create a custom mingw library for the given symbol definitions + cmd = ['dlltool', '-d', def_file, '-l', out_file] + retcode = subprocess.call(cmd) + + # Clean up symbol definitions + os.remove(def_file) + + return (not retcode) + +def build_import_library(): + if os.name != 'nt': + return + + arch = get_build_architecture() + if arch == 'AMD64': + return _build_import_library_amd64() + elif arch == 'Intel': + return _build_import_library_x86() + else: + raise ValueError("Unhandled arch %s" % arch) + +def _check_for_import_lib(): + """Check if an import library for the Python runtime already exists.""" + major_version, minor_version = tuple(sys.version_info[:2]) + + # patterns for the file name of the library itself + patterns = ['libpython%d%d.a', + 'libpython%d%d.dll.a', + 'libpython%d.%d.dll.a'] + + # directory trees that may contain the library + stems = [sys.prefix] + if hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix: + stems.append(sys.base_prefix) + elif hasattr(sys, 'real_prefix') and sys.real_prefix != sys.prefix: + stems.append(sys.real_prefix) + + # possible subdirectories within those trees where it is placed + sub_dirs = ['libs', 'lib'] + + # generate a list of candidate locations + candidates = [] + for pat in patterns: + filename = pat % (major_version, minor_version) + for stem_dir in stems: + for folder in sub_dirs: + candidates.append(os.path.join(stem_dir, folder, filename)) + + # test the filesystem to see if we can find any of these + for fullname in candidates: + if os.path.isfile(fullname): + # already exists, in location given + return (True, fullname) + + # needs to be built, preferred location given first + return (False, candidates[0]) + +def _build_import_library_amd64(): + out_exists, out_file = _check_for_import_lib() + if out_exists: + log.debug('Skip building import library: "%s" exists', out_file) + return + + # get the runtime dll for which we are building import library + dll_file = find_python_dll() + log.info('Building import library (arch=AMD64): "%s" (from %s)' % + (out_file, dll_file)) + + # generate symbol list from this library + def_name = "python%d%d.def" % tuple(sys.version_info[:2]) + def_file = os.path.join(sys.prefix, 'libs', def_name) + generate_def(dll_file, def_file) + + # generate import library from this symbol list + cmd = ['dlltool', '-d', def_file, '-l', out_file] + subprocess.check_call(cmd) + +def _build_import_library_x86(): + """ Build the import libraries for Mingw32-gcc on Windows + """ + out_exists, out_file = _check_for_import_lib() + if out_exists: + log.debug('Skip building import library: "%s" exists', out_file) + return + + lib_name = "python%d%d.lib" % tuple(sys.version_info[:2]) + lib_file = os.path.join(sys.prefix, 'libs', lib_name) + if not os.path.isfile(lib_file): + # didn't find library file in virtualenv, try base distribution, too, + # and use that instead if found there. for Python 2.7 venvs, the base + # directory is in attribute real_prefix instead of base_prefix. + if hasattr(sys, 'base_prefix'): + base_lib = os.path.join(sys.base_prefix, 'libs', lib_name) + elif hasattr(sys, 'real_prefix'): + base_lib = os.path.join(sys.real_prefix, 'libs', lib_name) + else: + base_lib = '' # os.path.isfile('') == False + + if os.path.isfile(base_lib): + lib_file = base_lib + else: + log.warn('Cannot build import library: "%s" not found', lib_file) + return + log.info('Building import library (ARCH=x86): "%s"', out_file) + + from numpy.distutils import lib2def + + def_name = "python%d%d.def" % tuple(sys.version_info[:2]) + def_file = os.path.join(sys.prefix, 'libs', def_name) + nm_output = lib2def.getnm( + lib2def.DEFAULT_NM + [lib_file], shell=False) + dlist, flist = lib2def.parse_nm(nm_output) + with open(def_file, 'w') as fid: + lib2def.output_def(dlist, flist, lib2def.DEF_HEADER, fid) + + dll_name = find_python_dll () + + cmd = ["dlltool", + "--dllname", dll_name, + "--def", def_file, + "--output-lib", out_file] + status = subprocess.check_output(cmd) + if status: + log.warn('Failed to build import library for gcc. Linking will fail.') + return + +#===================================== +# Dealing with Visual Studio MANIFESTS +#===================================== + +# Functions to deal with visual studio manifests. Manifest are a mechanism to +# enforce strong DLL versioning on windows, and has nothing to do with +# distutils MANIFEST. manifests are XML files with version info, and used by +# the OS loader; they are necessary when linking against a DLL not in the +# system path; in particular, official python 2.6 binary is built against the +# MS runtime 9 (the one from VS 2008), which is not available on most windows +# systems; python 2.6 installer does install it in the Win SxS (Side by side) +# directory, but this requires the manifest for this to work. This is a big +# mess, thanks MS for a wonderful system. + +# XXX: ideally, we should use exactly the same version as used by python. I +# submitted a patch to get this version, but it was only included for python +# 2.6.1 and above. So for versions below, we use a "best guess". +_MSVCRVER_TO_FULLVER = {} +if sys.platform == 'win32': + try: + import msvcrt + # I took one version in my SxS directory: no idea if it is the good + # one, and we can't retrieve it from python + _MSVCRVER_TO_FULLVER['80'] = "8.0.50727.42" + _MSVCRVER_TO_FULLVER['90'] = "9.0.21022.8" + # Value from msvcrt.CRT_ASSEMBLY_VERSION under Python 3.3.0 + # on Windows XP: + _MSVCRVER_TO_FULLVER['100'] = "10.0.30319.460" + crt_ver = getattr(msvcrt, 'CRT_ASSEMBLY_VERSION', None) + if crt_ver is not None: # Available at least back to Python 3.3 + maj, min = re.match(r'(\d+)\.(\d)', crt_ver).groups() + _MSVCRVER_TO_FULLVER[maj + min] = crt_ver + del maj, min + del crt_ver + except ImportError: + # If we are here, means python was not built with MSVC. Not sure what + # to do in that case: manifest building will fail, but it should not be + # used in that case anyway + log.warn('Cannot import msvcrt: using manifest will not be possible') + +def msvc_manifest_xml(maj, min): + """Given a major and minor version of the MSVCR, returns the + corresponding XML file.""" + try: + fullver = _MSVCRVER_TO_FULLVER[str(maj * 10 + min)] + except KeyError: + raise ValueError("Version %d,%d of MSVCRT not supported yet" % + (maj, min)) from None + # Don't be fooled, it looks like an XML, but it is not. In particular, it + # should not have any space before starting, and its size should be + # divisible by 4, most likely for alignment constraints when the xml is + # embedded in the binary... + # This template was copied directly from the python 2.6 binary (using + # strings.exe from mingw on python.exe). + template = textwrap.dedent("""\ + + + + + + + + + + + + + + """) + + return template % {'fullver': fullver, 'maj': maj, 'min': min} + +def manifest_rc(name, type='dll'): + """Return the rc file used to generate the res file which will be embedded + as manifest for given manifest file name, of given type ('dll' or + 'exe'). + + Parameters + ---------- + name : str + name of the manifest file to embed + type : str {'dll', 'exe'} + type of the binary which will embed the manifest + + """ + if type == 'dll': + rctype = 2 + elif type == 'exe': + rctype = 1 + else: + raise ValueError("Type %s not supported" % type) + + return """\ +#include "winuser.h" +%d RT_MANIFEST %s""" % (rctype, name) + +def check_embedded_msvcr_match_linked(msver): + """msver is the ms runtime version used for the MANIFEST.""" + # check msvcr major version are the same for linking and + # embedding + maj = msvc_runtime_major() + if maj: + if not maj == int(msver): + raise ValueError( + "Discrepancy between linked msvcr " \ + "(%d) and the one about to be embedded " \ + "(%d)" % (int(msver), maj)) + +def configtest_name(config): + base = os.path.basename(config._gen_temp_sourcefile("yo", [], "c")) + return os.path.splitext(base)[0] + +def manifest_name(config): + # Get configest name (including suffix) + root = configtest_name(config) + exext = config.compiler.exe_extension + return root + exext + ".manifest" + +def rc_name(config): + # Get configtest name (including suffix) + root = configtest_name(config) + return root + ".rc" + +def generate_manifest(config): + msver = get_build_msvc_version() + if msver is not None: + if msver >= 8: + check_embedded_msvcr_match_linked(msver) + ma_str, mi_str = str(msver).split('.') + # Write the manifest file + manxml = msvc_manifest_xml(int(ma_str), int(mi_str)) + with open(manifest_name(config), "w") as man: + config.temp_files.append(manifest_name(config)) + man.write(manxml) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/misc_util.py b/.local/lib/python3.8/site-packages/numpy/distutils/misc_util.py new file mode 100644 index 0000000..513be75 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/misc_util.py @@ -0,0 +1,2502 @@ +import os +import re +import sys +import copy +import glob +import atexit +import tempfile +import subprocess +import shutil +import multiprocessing +import textwrap +import importlib.util +from threading import local as tlocal +from functools import reduce + +import distutils +from distutils.errors import DistutilsError + +# stores temporary directory of each thread to only create one per thread +_tdata = tlocal() + +# store all created temporary directories so they can be deleted on exit +_tmpdirs = [] +def clean_up_temporary_directory(): + if _tmpdirs is not None: + for d in _tmpdirs: + try: + shutil.rmtree(d) + except OSError: + pass + +atexit.register(clean_up_temporary_directory) + +__all__ = ['Configuration', 'get_numpy_include_dirs', 'default_config_dict', + 'dict_append', 'appendpath', 'generate_config_py', + 'get_cmd', 'allpath', 'get_mathlibs', + 'terminal_has_colors', 'red_text', 'green_text', 'yellow_text', + 'blue_text', 'cyan_text', 'cyg2win32', 'mingw32', 'all_strings', + 'has_f_sources', 'has_cxx_sources', 'filter_sources', + 'get_dependencies', 'is_local_src_dir', 'get_ext_source_files', + 'get_script_files', 'get_lib_source_files', 'get_data_files', + 'dot_join', 'get_frame', 'minrelpath', 'njoin', + 'is_sequence', 'is_string', 'as_list', 'gpaths', 'get_language', + 'get_build_architecture', 'get_info', 'get_pkg_info', + 'get_num_build_jobs', 'sanitize_cxx_flags', + 'exec_mod_from_location'] + +class InstallableLib: + """ + Container to hold information on an installable library. + + Parameters + ---------- + name : str + Name of the installed library. + build_info : dict + Dictionary holding build information. + target_dir : str + Absolute path specifying where to install the library. + + See Also + -------- + Configuration.add_installed_library + + Notes + ----- + The three parameters are stored as attributes with the same names. + + """ + def __init__(self, name, build_info, target_dir): + self.name = name + self.build_info = build_info + self.target_dir = target_dir + + +def get_num_build_jobs(): + """ + Get number of parallel build jobs set by the --parallel command line + argument of setup.py + If the command did not receive a setting the environment variable + NPY_NUM_BUILD_JOBS is checked. If that is unset, return the number of + processors on the system, with a maximum of 8 (to prevent + overloading the system if there a lot of CPUs). + + Returns + ------- + out : int + number of parallel jobs that can be run + + """ + from numpy.distutils.core import get_distribution + try: + cpu_count = len(os.sched_getaffinity(0)) + except AttributeError: + cpu_count = multiprocessing.cpu_count() + cpu_count = min(cpu_count, 8) + envjobs = int(os.environ.get("NPY_NUM_BUILD_JOBS", cpu_count)) + dist = get_distribution() + # may be None during configuration + if dist is None: + return envjobs + + # any of these three may have the job set, take the largest + cmdattr = (getattr(dist.get_command_obj('build'), 'parallel', None), + getattr(dist.get_command_obj('build_ext'), 'parallel', None), + getattr(dist.get_command_obj('build_clib'), 'parallel', None)) + if all(x is None for x in cmdattr): + return envjobs + else: + return max(x for x in cmdattr if x is not None) + +def quote_args(args): + """Quote list of arguments. + + .. deprecated:: 1.22. + """ + import warnings + warnings.warn('"quote_args" is deprecated.', + DeprecationWarning, stacklevel=2) + # don't used _nt_quote_args as it does not check if + # args items already have quotes or not. + args = list(args) + for i in range(len(args)): + a = args[i] + if ' ' in a and a[0] not in '"\'': + args[i] = '"%s"' % (a) + return args + +def allpath(name): + "Convert a /-separated pathname to one using the OS's path separator." + split = name.split('/') + return os.path.join(*split) + +def rel_path(path, parent_path): + """Return path relative to parent_path.""" + # Use realpath to avoid issues with symlinked dirs (see gh-7707) + pd = os.path.realpath(os.path.abspath(parent_path)) + apath = os.path.realpath(os.path.abspath(path)) + if len(apath) < len(pd): + return path + if apath == pd: + return '' + if pd == apath[:len(pd)]: + assert apath[len(pd)] in [os.sep], repr((path, apath[len(pd)])) + path = apath[len(pd)+1:] + return path + +def get_path_from_frame(frame, parent_path=None): + """Return path of the module given a frame object from the call stack. + + Returned path is relative to parent_path when given, + otherwise it is absolute path. + """ + + # First, try to find if the file name is in the frame. + try: + caller_file = eval('__file__', frame.f_globals, frame.f_locals) + d = os.path.dirname(os.path.abspath(caller_file)) + except NameError: + # __file__ is not defined, so let's try __name__. We try this second + # because setuptools spoofs __name__ to be '__main__' even though + # sys.modules['__main__'] might be something else, like easy_install(1). + caller_name = eval('__name__', frame.f_globals, frame.f_locals) + __import__(caller_name) + mod = sys.modules[caller_name] + if hasattr(mod, '__file__'): + d = os.path.dirname(os.path.abspath(mod.__file__)) + else: + # we're probably running setup.py as execfile("setup.py") + # (likely we're building an egg) + d = os.path.abspath('.') + + if parent_path is not None: + d = rel_path(d, parent_path) + + return d or '.' + +def njoin(*path): + """Join two or more pathname components + + - convert a /-separated pathname to one using the OS's path separator. + - resolve `..` and `.` from path. + + Either passing n arguments as in njoin('a','b'), or a sequence + of n names as in njoin(['a','b']) is handled, or a mixture of such arguments. + """ + paths = [] + for p in path: + if is_sequence(p): + # njoin(['a', 'b'], 'c') + paths.append(njoin(*p)) + else: + assert is_string(p) + paths.append(p) + path = paths + if not path: + # njoin() + joined = '' + else: + # njoin('a', 'b') + joined = os.path.join(*path) + if os.path.sep != '/': + joined = joined.replace('/', os.path.sep) + return minrelpath(joined) + +def get_mathlibs(path=None): + """Return the MATHLIB line from numpyconfig.h + """ + if path is not None: + config_file = os.path.join(path, '_numpyconfig.h') + else: + # Look for the file in each of the numpy include directories. + dirs = get_numpy_include_dirs() + for path in dirs: + fn = os.path.join(path, '_numpyconfig.h') + if os.path.exists(fn): + config_file = fn + break + else: + raise DistutilsError('_numpyconfig.h not found in numpy include ' + 'dirs %r' % (dirs,)) + + with open(config_file) as fid: + mathlibs = [] + s = '#define MATHLIB' + for line in fid: + if line.startswith(s): + value = line[len(s):].strip() + if value: + mathlibs.extend(value.split(',')) + return mathlibs + +def minrelpath(path): + """Resolve `..` and '.' from path. + """ + if not is_string(path): + return path + if '.' not in path: + return path + l = path.split(os.sep) + while l: + try: + i = l.index('.', 1) + except ValueError: + break + del l[i] + j = 1 + while l: + try: + i = l.index('..', j) + except ValueError: + break + if l[i-1]=='..': + j += 1 + else: + del l[i], l[i-1] + j = 1 + if not l: + return '' + return os.sep.join(l) + +def sorted_glob(fileglob): + """sorts output of python glob for https://bugs.python.org/issue30461 + to allow extensions to have reproducible build results""" + return sorted(glob.glob(fileglob)) + +def _fix_paths(paths, local_path, include_non_existing): + assert is_sequence(paths), repr(type(paths)) + new_paths = [] + assert not is_string(paths), repr(paths) + for n in paths: + if is_string(n): + if '*' in n or '?' in n: + p = sorted_glob(n) + p2 = sorted_glob(njoin(local_path, n)) + if p2: + new_paths.extend(p2) + elif p: + new_paths.extend(p) + else: + if include_non_existing: + new_paths.append(n) + print('could not resolve pattern in %r: %r' % + (local_path, n)) + else: + n2 = njoin(local_path, n) + if os.path.exists(n2): + new_paths.append(n2) + else: + if os.path.exists(n): + new_paths.append(n) + elif include_non_existing: + new_paths.append(n) + if not os.path.exists(n): + print('non-existing path in %r: %r' % + (local_path, n)) + + elif is_sequence(n): + new_paths.extend(_fix_paths(n, local_path, include_non_existing)) + else: + new_paths.append(n) + return [minrelpath(p) for p in new_paths] + +def gpaths(paths, local_path='', include_non_existing=True): + """Apply glob to paths and prepend local_path if needed. + """ + if is_string(paths): + paths = (paths,) + return _fix_paths(paths, local_path, include_non_existing) + +def make_temp_file(suffix='', prefix='', text=True): + if not hasattr(_tdata, 'tempdir'): + _tdata.tempdir = tempfile.mkdtemp() + _tmpdirs.append(_tdata.tempdir) + fid, name = tempfile.mkstemp(suffix=suffix, + prefix=prefix, + dir=_tdata.tempdir, + text=text) + fo = os.fdopen(fid, 'w') + return fo, name + +# Hooks for colored terminal output. +# See also https://web.archive.org/web/20100314204946/http://www.livinglogic.de/Python/ansistyle +def terminal_has_colors(): + if sys.platform=='cygwin' and 'USE_COLOR' not in os.environ: + # Avoid importing curses that causes illegal operation + # with a message: + # PYTHON2 caused an invalid page fault in + # module CYGNURSES7.DLL as 015f:18bbfc28 + # Details: Python 2.3.3 [GCC 3.3.1 (cygming special)] + # ssh to Win32 machine from debian + # curses.version is 2.2 + # CYGWIN_98-4.10, release 1.5.7(0.109/3/2)) + return 0 + if hasattr(sys.stdout, 'isatty') and sys.stdout.isatty(): + try: + import curses + curses.setupterm() + if (curses.tigetnum("colors") >= 0 + and curses.tigetnum("pairs") >= 0 + and ((curses.tigetstr("setf") is not None + and curses.tigetstr("setb") is not None) + or (curses.tigetstr("setaf") is not None + and curses.tigetstr("setab") is not None) + or curses.tigetstr("scp") is not None)): + return 1 + except Exception: + pass + return 0 + +if terminal_has_colors(): + _colour_codes = dict(black=0, red=1, green=2, yellow=3, + blue=4, magenta=5, cyan=6, white=7, default=9) + def colour_text(s, fg=None, bg=None, bold=False): + seq = [] + if bold: + seq.append('1') + if fg: + fgcode = 30 + _colour_codes.get(fg.lower(), 0) + seq.append(str(fgcode)) + if bg: + bgcode = 40 + _colour_codes.get(fg.lower(), 7) + seq.append(str(bgcode)) + if seq: + return '\x1b[%sm%s\x1b[0m' % (';'.join(seq), s) + else: + return s +else: + def colour_text(s, fg=None, bg=None): + return s + +def default_text(s): + return colour_text(s, 'default') +def red_text(s): + return colour_text(s, 'red') +def green_text(s): + return colour_text(s, 'green') +def yellow_text(s): + return colour_text(s, 'yellow') +def cyan_text(s): + return colour_text(s, 'cyan') +def blue_text(s): + return colour_text(s, 'blue') + +######################### + +def cyg2win32(path: str) -> str: + """Convert a path from Cygwin-native to Windows-native. + + Uses the cygpath utility (part of the Base install) to do the + actual conversion. Falls back to returning the original path if + this fails. + + Handles the default ``/cygdrive`` mount prefix as well as the + ``/proc/cygdrive`` portable prefix, custom cygdrive prefixes such + as ``/`` or ``/mnt``, and absolute paths such as ``/usr/src/`` or + ``/home/username`` + + Parameters + ---------- + path : str + The path to convert + + Returns + ------- + converted_path : str + The converted path + + Notes + ----- + Documentation for cygpath utility: + https://cygwin.com/cygwin-ug-net/cygpath.html + Documentation for the C function it wraps: + https://cygwin.com/cygwin-api/func-cygwin-conv-path.html + + """ + if sys.platform != "cygwin": + return path + return subprocess.check_output( + ["/usr/bin/cygpath", "--windows", path], universal_newlines=True + ) + + +def mingw32(): + """Return true when using mingw32 environment. + """ + if sys.platform=='win32': + if os.environ.get('OSTYPE', '')=='msys': + return True + if os.environ.get('MSYSTEM', '')=='MINGW32': + return True + return False + +def msvc_runtime_version(): + "Return version of MSVC runtime library, as defined by __MSC_VER__ macro" + msc_pos = sys.version.find('MSC v.') + if msc_pos != -1: + msc_ver = int(sys.version[msc_pos+6:msc_pos+10]) + else: + msc_ver = None + return msc_ver + +def msvc_runtime_library(): + "Return name of MSVC runtime library if Python was built with MSVC >= 7" + ver = msvc_runtime_major () + if ver: + if ver < 140: + return "msvcr%i" % ver + else: + return "vcruntime%i" % ver + else: + return None + +def msvc_runtime_major(): + "Return major version of MSVC runtime coded like get_build_msvc_version" + major = {1300: 70, # MSVC 7.0 + 1310: 71, # MSVC 7.1 + 1400: 80, # MSVC 8 + 1500: 90, # MSVC 9 (aka 2008) + 1600: 100, # MSVC 10 (aka 2010) + 1900: 140, # MSVC 14 (aka 2015) + }.get(msvc_runtime_version(), None) + return major + +######################### + +#XXX need support for .C that is also C++ +cxx_ext_match = re.compile(r'.*\.(cpp|cxx|cc)\Z', re.I).match +fortran_ext_match = re.compile(r'.*\.(f90|f95|f77|for|ftn|f)\Z', re.I).match +f90_ext_match = re.compile(r'.*\.(f90|f95)\Z', re.I).match +f90_module_name_match = re.compile(r'\s*module\s*(?P[\w_]+)', re.I).match +def _get_f90_modules(source): + """Return a list of Fortran f90 module names that + given source file defines. + """ + if not f90_ext_match(source): + return [] + modules = [] + with open(source, 'r') as f: + for line in f: + m = f90_module_name_match(line) + if m: + name = m.group('name') + modules.append(name) + # break # XXX can we assume that there is one module per file? + return modules + +def is_string(s): + return isinstance(s, str) + +def all_strings(lst): + """Return True if all items in lst are string objects. """ + for item in lst: + if not is_string(item): + return False + return True + +def is_sequence(seq): + if is_string(seq): + return False + try: + len(seq) + except Exception: + return False + return True + +def is_glob_pattern(s): + return is_string(s) and ('*' in s or '?' in s) + +def as_list(seq): + if is_sequence(seq): + return list(seq) + else: + return [seq] + +def get_language(sources): + # not used in numpy/scipy packages, use build_ext.detect_language instead + """Determine language value (c,f77,f90) from sources """ + language = None + for source in sources: + if isinstance(source, str): + if f90_ext_match(source): + language = 'f90' + break + elif fortran_ext_match(source): + language = 'f77' + return language + +def has_f_sources(sources): + """Return True if sources contains Fortran files """ + for source in sources: + if fortran_ext_match(source): + return True + return False + +def has_cxx_sources(sources): + """Return True if sources contains C++ files """ + for source in sources: + if cxx_ext_match(source): + return True + return False + +def filter_sources(sources): + """Return four lists of filenames containing + C, C++, Fortran, and Fortran 90 module sources, + respectively. + """ + c_sources = [] + cxx_sources = [] + f_sources = [] + fmodule_sources = [] + for source in sources: + if fortran_ext_match(source): + modules = _get_f90_modules(source) + if modules: + fmodule_sources.append(source) + else: + f_sources.append(source) + elif cxx_ext_match(source): + cxx_sources.append(source) + else: + c_sources.append(source) + return c_sources, cxx_sources, f_sources, fmodule_sources + + +def _get_headers(directory_list): + # get *.h files from list of directories + headers = [] + for d in directory_list: + head = sorted_glob(os.path.join(d, "*.h")) #XXX: *.hpp files?? + headers.extend(head) + return headers + +def _get_directories(list_of_sources): + # get unique directories from list of sources. + direcs = [] + for f in list_of_sources: + d = os.path.split(f) + if d[0] != '' and not d[0] in direcs: + direcs.append(d[0]) + return direcs + +def _commandline_dep_string(cc_args, extra_postargs, pp_opts): + """ + Return commandline representation used to determine if a file needs + to be recompiled + """ + cmdline = 'commandline: ' + cmdline += ' '.join(cc_args) + cmdline += ' '.join(extra_postargs) + cmdline += ' '.join(pp_opts) + '\n' + return cmdline + + +def get_dependencies(sources): + #XXX scan sources for include statements + return _get_headers(_get_directories(sources)) + +def is_local_src_dir(directory): + """Return true if directory is local directory. + """ + if not is_string(directory): + return False + abs_dir = os.path.abspath(directory) + c = os.path.commonprefix([os.getcwd(), abs_dir]) + new_dir = abs_dir[len(c):].split(os.sep) + if new_dir and not new_dir[0]: + new_dir = new_dir[1:] + if new_dir and new_dir[0]=='build': + return False + new_dir = os.sep.join(new_dir) + return os.path.isdir(new_dir) + +def general_source_files(top_path): + pruned_directories = {'CVS':1, '.svn':1, 'build':1} + prune_file_pat = re.compile(r'(?:[~#]|\.py[co]|\.o)$') + for dirpath, dirnames, filenames in os.walk(top_path, topdown=True): + pruned = [ d for d in dirnames if d not in pruned_directories ] + dirnames[:] = pruned + for f in filenames: + if not prune_file_pat.search(f): + yield os.path.join(dirpath, f) + +def general_source_directories_files(top_path): + """Return a directory name relative to top_path and + files contained. + """ + pruned_directories = ['CVS', '.svn', 'build'] + prune_file_pat = re.compile(r'(?:[~#]|\.py[co]|\.o)$') + for dirpath, dirnames, filenames in os.walk(top_path, topdown=True): + pruned = [ d for d in dirnames if d not in pruned_directories ] + dirnames[:] = pruned + for d in dirnames: + dpath = os.path.join(dirpath, d) + rpath = rel_path(dpath, top_path) + files = [] + for f in os.listdir(dpath): + fn = os.path.join(dpath, f) + if os.path.isfile(fn) and not prune_file_pat.search(fn): + files.append(fn) + yield rpath, files + dpath = top_path + rpath = rel_path(dpath, top_path) + filenames = [os.path.join(dpath, f) for f in os.listdir(dpath) \ + if not prune_file_pat.search(f)] + files = [f for f in filenames if os.path.isfile(f)] + yield rpath, files + + +def get_ext_source_files(ext): + # Get sources and any include files in the same directory. + filenames = [] + sources = [_m for _m in ext.sources if is_string(_m)] + filenames.extend(sources) + filenames.extend(get_dependencies(sources)) + for d in ext.depends: + if is_local_src_dir(d): + filenames.extend(list(general_source_files(d))) + elif os.path.isfile(d): + filenames.append(d) + return filenames + +def get_script_files(scripts): + scripts = [_m for _m in scripts if is_string(_m)] + return scripts + +def get_lib_source_files(lib): + filenames = [] + sources = lib[1].get('sources', []) + sources = [_m for _m in sources if is_string(_m)] + filenames.extend(sources) + filenames.extend(get_dependencies(sources)) + depends = lib[1].get('depends', []) + for d in depends: + if is_local_src_dir(d): + filenames.extend(list(general_source_files(d))) + elif os.path.isfile(d): + filenames.append(d) + return filenames + +def get_shared_lib_extension(is_python_ext=False): + """Return the correct file extension for shared libraries. + + Parameters + ---------- + is_python_ext : bool, optional + Whether the shared library is a Python extension. Default is False. + + Returns + ------- + so_ext : str + The shared library extension. + + Notes + ----- + For Python shared libs, `so_ext` will typically be '.so' on Linux and OS X, + and '.pyd' on Windows. For Python >= 3.2 `so_ext` has a tag prepended on + POSIX systems according to PEP 3149. For Python 3.2 this is implemented on + Linux, but not on OS X. + + """ + confvars = distutils.sysconfig.get_config_vars() + # SO is deprecated in 3.3.1, use EXT_SUFFIX instead + so_ext = confvars.get('EXT_SUFFIX', None) + if so_ext is None: + so_ext = confvars.get('SO', '') + + if not is_python_ext: + # hardcode known values, config vars (including SHLIB_SUFFIX) are + # unreliable (see #3182) + # darwin, windows and debug linux are wrong in 3.3.1 and older + if (sys.platform.startswith('linux') or + sys.platform.startswith('gnukfreebsd')): + so_ext = '.so' + elif sys.platform.startswith('darwin'): + so_ext = '.dylib' + elif sys.platform.startswith('win'): + so_ext = '.dll' + else: + # fall back to config vars for unknown platforms + # fix long extension for Python >=3.2, see PEP 3149. + if 'SOABI' in confvars: + # Does nothing unless SOABI config var exists + so_ext = so_ext.replace('.' + confvars.get('SOABI'), '', 1) + + return so_ext + +def get_data_files(data): + if is_string(data): + return [data] + sources = data[1] + filenames = [] + for s in sources: + if hasattr(s, '__call__'): + continue + if is_local_src_dir(s): + filenames.extend(list(general_source_files(s))) + elif is_string(s): + if os.path.isfile(s): + filenames.append(s) + else: + print('Not existing data file:', s) + else: + raise TypeError(repr(s)) + return filenames + +def dot_join(*args): + return '.'.join([a for a in args if a]) + +def get_frame(level=0): + """Return frame object from call stack with given level. + """ + try: + return sys._getframe(level+1) + except AttributeError: + frame = sys.exc_info()[2].tb_frame + for _ in range(level+1): + frame = frame.f_back + return frame + + +###################### + +class Configuration: + + _list_keys = ['packages', 'ext_modules', 'data_files', 'include_dirs', + 'libraries', 'headers', 'scripts', 'py_modules', + 'installed_libraries', 'define_macros'] + _dict_keys = ['package_dir', 'installed_pkg_config'] + _extra_keys = ['name', 'version'] + + numpy_include_dirs = [] + + def __init__(self, + package_name=None, + parent_name=None, + top_path=None, + package_path=None, + caller_level=1, + setup_name='setup.py', + **attrs): + """Construct configuration instance of a package. + + package_name -- name of the package + Ex.: 'distutils' + parent_name -- name of the parent package + Ex.: 'numpy' + top_path -- directory of the toplevel package + Ex.: the directory where the numpy package source sits + package_path -- directory of package. Will be computed by magic from the + directory of the caller module if not specified + Ex.: the directory where numpy.distutils is + caller_level -- frame level to caller namespace, internal parameter. + """ + self.name = dot_join(parent_name, package_name) + self.version = None + + caller_frame = get_frame(caller_level) + self.local_path = get_path_from_frame(caller_frame, top_path) + # local_path -- directory of a file (usually setup.py) that + # defines a configuration() function. + # local_path -- directory of a file (usually setup.py) that + # defines a configuration() function. + if top_path is None: + top_path = self.local_path + self.local_path = '' + if package_path is None: + package_path = self.local_path + elif os.path.isdir(njoin(self.local_path, package_path)): + package_path = njoin(self.local_path, package_path) + if not os.path.isdir(package_path or '.'): + raise ValueError("%r is not a directory" % (package_path,)) + self.top_path = top_path + self.package_path = package_path + # this is the relative path in the installed package + self.path_in_package = os.path.join(*self.name.split('.')) + + self.list_keys = self._list_keys[:] + self.dict_keys = self._dict_keys[:] + + for n in self.list_keys: + v = copy.copy(attrs.get(n, [])) + setattr(self, n, as_list(v)) + + for n in self.dict_keys: + v = copy.copy(attrs.get(n, {})) + setattr(self, n, v) + + known_keys = self.list_keys + self.dict_keys + self.extra_keys = self._extra_keys[:] + for n in attrs.keys(): + if n in known_keys: + continue + a = attrs[n] + setattr(self, n, a) + if isinstance(a, list): + self.list_keys.append(n) + elif isinstance(a, dict): + self.dict_keys.append(n) + else: + self.extra_keys.append(n) + + if os.path.exists(njoin(package_path, '__init__.py')): + self.packages.append(self.name) + self.package_dir[self.name] = package_path + + self.options = dict( + ignore_setup_xxx_py = False, + assume_default_configuration = False, + delegate_options_to_subpackages = False, + quiet = False, + ) + + caller_instance = None + for i in range(1, 3): + try: + f = get_frame(i) + except ValueError: + break + try: + caller_instance = eval('self', f.f_globals, f.f_locals) + break + except NameError: + pass + if isinstance(caller_instance, self.__class__): + if caller_instance.options['delegate_options_to_subpackages']: + self.set_options(**caller_instance.options) + + self.setup_name = setup_name + + def todict(self): + """ + Return a dictionary compatible with the keyword arguments of distutils + setup function. + + Examples + -------- + >>> setup(**config.todict()) #doctest: +SKIP + """ + + self._optimize_data_files() + d = {} + known_keys = self.list_keys + self.dict_keys + self.extra_keys + for n in known_keys: + a = getattr(self, n) + if a: + d[n] = a + return d + + def info(self, message): + if not self.options['quiet']: + print(message) + + def warn(self, message): + sys.stderr.write('Warning: %s\n' % (message,)) + + def set_options(self, **options): + """ + Configure Configuration instance. + + The following options are available: + - ignore_setup_xxx_py + - assume_default_configuration + - delegate_options_to_subpackages + - quiet + + """ + for key, value in options.items(): + if key in self.options: + self.options[key] = value + else: + raise ValueError('Unknown option: '+key) + + def get_distribution(self): + """Return the distutils distribution object for self.""" + from numpy.distutils.core import get_distribution + return get_distribution() + + def _wildcard_get_subpackage(self, subpackage_name, + parent_name, + caller_level = 1): + l = subpackage_name.split('.') + subpackage_path = njoin([self.local_path]+l) + dirs = [_m for _m in sorted_glob(subpackage_path) if os.path.isdir(_m)] + config_list = [] + for d in dirs: + if not os.path.isfile(njoin(d, '__init__.py')): + continue + if 'build' in d.split(os.sep): + continue + n = '.'.join(d.split(os.sep)[-len(l):]) + c = self.get_subpackage(n, + parent_name = parent_name, + caller_level = caller_level+1) + config_list.extend(c) + return config_list + + def _get_configuration_from_setup_py(self, setup_py, + subpackage_name, + subpackage_path, + parent_name, + caller_level = 1): + # In case setup_py imports local modules: + sys.path.insert(0, os.path.dirname(setup_py)) + try: + setup_name = os.path.splitext(os.path.basename(setup_py))[0] + n = dot_join(self.name, subpackage_name, setup_name) + setup_module = exec_mod_from_location( + '_'.join(n.split('.')), setup_py) + if not hasattr(setup_module, 'configuration'): + if not self.options['assume_default_configuration']: + self.warn('Assuming default configuration '\ + '(%s does not define configuration())'\ + % (setup_module)) + config = Configuration(subpackage_name, parent_name, + self.top_path, subpackage_path, + caller_level = caller_level + 1) + else: + pn = dot_join(*([parent_name] + subpackage_name.split('.')[:-1])) + args = (pn,) + if setup_module.configuration.__code__.co_argcount > 1: + args = args + (self.top_path,) + config = setup_module.configuration(*args) + if config.name!=dot_join(parent_name, subpackage_name): + self.warn('Subpackage %r configuration returned as %r' % \ + (dot_join(parent_name, subpackage_name), config.name)) + finally: + del sys.path[0] + return config + + def get_subpackage(self,subpackage_name, + subpackage_path=None, + parent_name=None, + caller_level = 1): + """Return list of subpackage configurations. + + Parameters + ---------- + subpackage_name : str or None + Name of the subpackage to get the configuration. '*' in + subpackage_name is handled as a wildcard. + subpackage_path : str + If None, then the path is assumed to be the local path plus the + subpackage_name. If a setup.py file is not found in the + subpackage_path, then a default configuration is used. + parent_name : str + Parent name. + """ + if subpackage_name is None: + if subpackage_path is None: + raise ValueError( + "either subpackage_name or subpackage_path must be specified") + subpackage_name = os.path.basename(subpackage_path) + + # handle wildcards + l = subpackage_name.split('.') + if subpackage_path is None and '*' in subpackage_name: + return self._wildcard_get_subpackage(subpackage_name, + parent_name, + caller_level = caller_level+1) + assert '*' not in subpackage_name, repr((subpackage_name, subpackage_path, parent_name)) + if subpackage_path is None: + subpackage_path = njoin([self.local_path] + l) + else: + subpackage_path = njoin([subpackage_path] + l[:-1]) + subpackage_path = self.paths([subpackage_path])[0] + setup_py = njoin(subpackage_path, self.setup_name) + if not self.options['ignore_setup_xxx_py']: + if not os.path.isfile(setup_py): + setup_py = njoin(subpackage_path, + 'setup_%s.py' % (subpackage_name)) + if not os.path.isfile(setup_py): + if not self.options['assume_default_configuration']: + self.warn('Assuming default configuration '\ + '(%s/{setup_%s,setup}.py was not found)' \ + % (os.path.dirname(setup_py), subpackage_name)) + config = Configuration(subpackage_name, parent_name, + self.top_path, subpackage_path, + caller_level = caller_level+1) + else: + config = self._get_configuration_from_setup_py( + setup_py, + subpackage_name, + subpackage_path, + parent_name, + caller_level = caller_level + 1) + if config: + return [config] + else: + return [] + + def add_subpackage(self,subpackage_name, + subpackage_path=None, + standalone = False): + """Add a sub-package to the current Configuration instance. + + This is useful in a setup.py script for adding sub-packages to a + package. + + Parameters + ---------- + subpackage_name : str + name of the subpackage + subpackage_path : str + if given, the subpackage path such as the subpackage is in + subpackage_path / subpackage_name. If None,the subpackage is + assumed to be located in the local path / subpackage_name. + standalone : bool + """ + + if standalone: + parent_name = None + else: + parent_name = self.name + config_list = self.get_subpackage(subpackage_name, subpackage_path, + parent_name = parent_name, + caller_level = 2) + if not config_list: + self.warn('No configuration returned, assuming unavailable.') + for config in config_list: + d = config + if isinstance(config, Configuration): + d = config.todict() + assert isinstance(d, dict), repr(type(d)) + + self.info('Appending %s configuration to %s' \ + % (d.get('name'), self.name)) + self.dict_append(**d) + + dist = self.get_distribution() + if dist is not None: + self.warn('distutils distribution has been initialized,'\ + ' it may be too late to add a subpackage '+ subpackage_name) + + def add_data_dir(self, data_path): + """Recursively add files under data_path to data_files list. + + Recursively add files under data_path to the list of data_files to be + installed (and distributed). The data_path can be either a relative + path-name, or an absolute path-name, or a 2-tuple where the first + argument shows where in the install directory the data directory + should be installed to. + + Parameters + ---------- + data_path : seq or str + Argument can be either + + * 2-sequence (, ) + * path to data directory where python datadir suffix defaults + to package dir. + + Notes + ----- + Rules for installation paths:: + + foo/bar -> (foo/bar, foo/bar) -> parent/foo/bar + (gun, foo/bar) -> parent/gun + foo/* -> (foo/a, foo/a), (foo/b, foo/b) -> parent/foo/a, parent/foo/b + (gun, foo/*) -> (gun, foo/a), (gun, foo/b) -> gun + (gun/*, foo/*) -> parent/gun/a, parent/gun/b + /foo/bar -> (bar, /foo/bar) -> parent/bar + (gun, /foo/bar) -> parent/gun + (fun/*/gun/*, sun/foo/bar) -> parent/fun/foo/gun/bar + + Examples + -------- + For example suppose the source directory contains fun/foo.dat and + fun/bar/car.dat: + + >>> self.add_data_dir('fun') #doctest: +SKIP + >>> self.add_data_dir(('sun', 'fun')) #doctest: +SKIP + >>> self.add_data_dir(('gun', '/full/path/to/fun'))#doctest: +SKIP + + Will install data-files to the locations:: + + / + fun/ + foo.dat + bar/ + car.dat + sun/ + foo.dat + bar/ + car.dat + gun/ + foo.dat + car.dat + + """ + if is_sequence(data_path): + d, data_path = data_path + else: + d = None + if is_sequence(data_path): + [self.add_data_dir((d, p)) for p in data_path] + return + if not is_string(data_path): + raise TypeError("not a string: %r" % (data_path,)) + if d is None: + if os.path.isabs(data_path): + return self.add_data_dir((os.path.basename(data_path), data_path)) + return self.add_data_dir((data_path, data_path)) + paths = self.paths(data_path, include_non_existing=False) + if is_glob_pattern(data_path): + if is_glob_pattern(d): + pattern_list = allpath(d).split(os.sep) + pattern_list.reverse() + # /a/*//b/ -> /a/*/b + rl = list(range(len(pattern_list)-1)); rl.reverse() + for i in rl: + if not pattern_list[i]: + del pattern_list[i] + # + for path in paths: + if not os.path.isdir(path): + print('Not a directory, skipping', path) + continue + rpath = rel_path(path, self.local_path) + path_list = rpath.split(os.sep) + path_list.reverse() + target_list = [] + i = 0 + for s in pattern_list: + if is_glob_pattern(s): + if i>=len(path_list): + raise ValueError('cannot fill pattern %r with %r' \ + % (d, path)) + target_list.append(path_list[i]) + else: + assert s==path_list[i], repr((s, path_list[i], data_path, d, path, rpath)) + target_list.append(s) + i += 1 + if path_list[i:]: + self.warn('mismatch of pattern_list=%s and path_list=%s'\ + % (pattern_list, path_list)) + target_list.reverse() + self.add_data_dir((os.sep.join(target_list), path)) + else: + for path in paths: + self.add_data_dir((d, path)) + return + assert not is_glob_pattern(d), repr(d) + + dist = self.get_distribution() + if dist is not None and dist.data_files is not None: + data_files = dist.data_files + else: + data_files = self.data_files + + for path in paths: + for d1, f in list(general_source_directories_files(path)): + target_path = os.path.join(self.path_in_package, d, d1) + data_files.append((target_path, f)) + + def _optimize_data_files(self): + data_dict = {} + for p, files in self.data_files: + if p not in data_dict: + data_dict[p] = set() + for f in files: + data_dict[p].add(f) + self.data_files[:] = [(p, list(files)) for p, files in data_dict.items()] + + def add_data_files(self,*files): + """Add data files to configuration data_files. + + Parameters + ---------- + files : sequence + Argument(s) can be either + + * 2-sequence (,) + * paths to data files where python datadir prefix defaults + to package dir. + + Notes + ----- + The form of each element of the files sequence is very flexible + allowing many combinations of where to get the files from the package + and where they should ultimately be installed on the system. The most + basic usage is for an element of the files argument sequence to be a + simple filename. This will cause that file from the local path to be + installed to the installation path of the self.name package (package + path). The file argument can also be a relative path in which case the + entire relative path will be installed into the package directory. + Finally, the file can be an absolute path name in which case the file + will be found at the absolute path name but installed to the package + path. + + This basic behavior can be augmented by passing a 2-tuple in as the + file argument. The first element of the tuple should specify the + relative path (under the package install directory) where the + remaining sequence of files should be installed to (it has nothing to + do with the file-names in the source distribution). The second element + of the tuple is the sequence of files that should be installed. The + files in this sequence can be filenames, relative paths, or absolute + paths. For absolute paths the file will be installed in the top-level + package installation directory (regardless of the first argument). + Filenames and relative path names will be installed in the package + install directory under the path name given as the first element of + the tuple. + + Rules for installation paths: + + #. file.txt -> (., file.txt)-> parent/file.txt + #. foo/file.txt -> (foo, foo/file.txt) -> parent/foo/file.txt + #. /foo/bar/file.txt -> (., /foo/bar/file.txt) -> parent/file.txt + #. ``*``.txt -> parent/a.txt, parent/b.txt + #. foo/``*``.txt`` -> parent/foo/a.txt, parent/foo/b.txt + #. ``*/*.txt`` -> (``*``, ``*``/``*``.txt) -> parent/c/a.txt, parent/d/b.txt + #. (sun, file.txt) -> parent/sun/file.txt + #. (sun, bar/file.txt) -> parent/sun/file.txt + #. (sun, /foo/bar/file.txt) -> parent/sun/file.txt + #. (sun, ``*``.txt) -> parent/sun/a.txt, parent/sun/b.txt + #. (sun, bar/``*``.txt) -> parent/sun/a.txt, parent/sun/b.txt + #. (sun/``*``, ``*``/``*``.txt) -> parent/sun/c/a.txt, parent/d/b.txt + + An additional feature is that the path to a data-file can actually be + a function that takes no arguments and returns the actual path(s) to + the data-files. This is useful when the data files are generated while + building the package. + + Examples + -------- + Add files to the list of data_files to be included with the package. + + >>> self.add_data_files('foo.dat', + ... ('fun', ['gun.dat', 'nun/pun.dat', '/tmp/sun.dat']), + ... 'bar/cat.dat', + ... '/full/path/to/can.dat') #doctest: +SKIP + + will install these data files to:: + + / + foo.dat + fun/ + gun.dat + nun/ + pun.dat + sun.dat + bar/ + car.dat + can.dat + + where is the package (or sub-package) + directory such as '/usr/lib/python2.4/site-packages/mypackage' ('C: + \\Python2.4 \\Lib \\site-packages \\mypackage') or + '/usr/lib/python2.4/site- packages/mypackage/mysubpackage' ('C: + \\Python2.4 \\Lib \\site-packages \\mypackage \\mysubpackage'). + """ + + if len(files)>1: + for f in files: + self.add_data_files(f) + return + assert len(files)==1 + if is_sequence(files[0]): + d, files = files[0] + else: + d = None + if is_string(files): + filepat = files + elif is_sequence(files): + if len(files)==1: + filepat = files[0] + else: + for f in files: + self.add_data_files((d, f)) + return + else: + raise TypeError(repr(type(files))) + + if d is None: + if hasattr(filepat, '__call__'): + d = '' + elif os.path.isabs(filepat): + d = '' + else: + d = os.path.dirname(filepat) + self.add_data_files((d, files)) + return + + paths = self.paths(filepat, include_non_existing=False) + if is_glob_pattern(filepat): + if is_glob_pattern(d): + pattern_list = d.split(os.sep) + pattern_list.reverse() + for path in paths: + path_list = path.split(os.sep) + path_list.reverse() + path_list.pop() # filename + target_list = [] + i = 0 + for s in pattern_list: + if is_glob_pattern(s): + target_list.append(path_list[i]) + i += 1 + else: + target_list.append(s) + target_list.reverse() + self.add_data_files((os.sep.join(target_list), path)) + else: + self.add_data_files((d, paths)) + return + assert not is_glob_pattern(d), repr((d, filepat)) + + dist = self.get_distribution() + if dist is not None and dist.data_files is not None: + data_files = dist.data_files + else: + data_files = self.data_files + + data_files.append((os.path.join(self.path_in_package, d), paths)) + + ### XXX Implement add_py_modules + + def add_define_macros(self, macros): + """Add define macros to configuration + + Add the given sequence of macro name and value duples to the beginning + of the define_macros list This list will be visible to all extension + modules of the current package. + """ + dist = self.get_distribution() + if dist is not None: + if not hasattr(dist, 'define_macros'): + dist.define_macros = [] + dist.define_macros.extend(macros) + else: + self.define_macros.extend(macros) + + + def add_include_dirs(self,*paths): + """Add paths to configuration include directories. + + Add the given sequence of paths to the beginning of the include_dirs + list. This list will be visible to all extension modules of the + current package. + """ + include_dirs = self.paths(paths) + dist = self.get_distribution() + if dist is not None: + if dist.include_dirs is None: + dist.include_dirs = [] + dist.include_dirs.extend(include_dirs) + else: + self.include_dirs.extend(include_dirs) + + def add_headers(self,*files): + """Add installable headers to configuration. + + Add the given sequence of files to the beginning of the headers list. + By default, headers will be installed under // directory. If an item of files + is a tuple, then its first argument specifies the actual installation + location relative to the path. + + Parameters + ---------- + files : str or seq + Argument(s) can be either: + + * 2-sequence (,) + * path(s) to header file(s) where python includedir suffix will + default to package name. + """ + headers = [] + for path in files: + if is_string(path): + [headers.append((self.name, p)) for p in self.paths(path)] + else: + if not isinstance(path, (tuple, list)) or len(path) != 2: + raise TypeError(repr(path)) + [headers.append((path[0], p)) for p in self.paths(path[1])] + dist = self.get_distribution() + if dist is not None: + if dist.headers is None: + dist.headers = [] + dist.headers.extend(headers) + else: + self.headers.extend(headers) + + def paths(self,*paths,**kws): + """Apply glob to paths and prepend local_path if needed. + + Applies glob.glob(...) to each path in the sequence (if needed) and + pre-pends the local_path if needed. Because this is called on all + source lists, this allows wildcard characters to be specified in lists + of sources for extension modules and libraries and scripts and allows + path-names be relative to the source directory. + + """ + include_non_existing = kws.get('include_non_existing', True) + return gpaths(paths, + local_path = self.local_path, + include_non_existing=include_non_existing) + + def _fix_paths_dict(self, kw): + for k in kw.keys(): + v = kw[k] + if k in ['sources', 'depends', 'include_dirs', 'library_dirs', + 'module_dirs', 'extra_objects']: + new_v = self.paths(v) + kw[k] = new_v + + def add_extension(self,name,sources,**kw): + """Add extension to configuration. + + Create and add an Extension instance to the ext_modules list. This + method also takes the following optional keyword arguments that are + passed on to the Extension constructor. + + Parameters + ---------- + name : str + name of the extension + sources : seq + list of the sources. The list of sources may contain functions + (called source generators) which must take an extension instance + and a build directory as inputs and return a source file or list of + source files or None. If None is returned then no sources are + generated. If the Extension instance has no sources after + processing all source generators, then no extension module is + built. + include_dirs : + define_macros : + undef_macros : + library_dirs : + libraries : + runtime_library_dirs : + extra_objects : + extra_compile_args : + extra_link_args : + extra_f77_compile_args : + extra_f90_compile_args : + export_symbols : + swig_opts : + depends : + The depends list contains paths to files or directories that the + sources of the extension module depend on. If any path in the + depends list is newer than the extension module, then the module + will be rebuilt. + language : + f2py_options : + module_dirs : + extra_info : dict or list + dict or list of dict of keywords to be appended to keywords. + + Notes + ----- + The self.paths(...) method is applied to all lists that may contain + paths. + """ + ext_args = copy.copy(kw) + ext_args['name'] = dot_join(self.name, name) + ext_args['sources'] = sources + + if 'extra_info' in ext_args: + extra_info = ext_args['extra_info'] + del ext_args['extra_info'] + if isinstance(extra_info, dict): + extra_info = [extra_info] + for info in extra_info: + assert isinstance(info, dict), repr(info) + dict_append(ext_args,**info) + + self._fix_paths_dict(ext_args) + + # Resolve out-of-tree dependencies + libraries = ext_args.get('libraries', []) + libnames = [] + ext_args['libraries'] = [] + for libname in libraries: + if isinstance(libname, tuple): + self._fix_paths_dict(libname[1]) + + # Handle library names of the form libname@relative/path/to/library + if '@' in libname: + lname, lpath = libname.split('@', 1) + lpath = os.path.abspath(njoin(self.local_path, lpath)) + if os.path.isdir(lpath): + c = self.get_subpackage(None, lpath, + caller_level = 2) + if isinstance(c, Configuration): + c = c.todict() + for l in [l[0] for l in c.get('libraries', [])]: + llname = l.split('__OF__', 1)[0] + if llname == lname: + c.pop('name', None) + dict_append(ext_args,**c) + break + continue + libnames.append(libname) + + ext_args['libraries'] = libnames + ext_args['libraries'] + ext_args['define_macros'] = \ + self.define_macros + ext_args.get('define_macros', []) + + from numpy.distutils.core import Extension + ext = Extension(**ext_args) + self.ext_modules.append(ext) + + dist = self.get_distribution() + if dist is not None: + self.warn('distutils distribution has been initialized,'\ + ' it may be too late to add an extension '+name) + return ext + + def add_library(self,name,sources,**build_info): + """ + Add library to configuration. + + Parameters + ---------- + name : str + Name of the extension. + sources : sequence + List of the sources. The list of sources may contain functions + (called source generators) which must take an extension instance + and a build directory as inputs and return a source file or list of + source files or None. If None is returned then no sources are + generated. If the Extension instance has no sources after + processing all source generators, then no extension module is + built. + build_info : dict, optional + The following keys are allowed: + + * depends + * macros + * include_dirs + * extra_compiler_args + * extra_f77_compile_args + * extra_f90_compile_args + * f2py_options + * language + + """ + self._add_library(name, sources, None, build_info) + + dist = self.get_distribution() + if dist is not None: + self.warn('distutils distribution has been initialized,'\ + ' it may be too late to add a library '+ name) + + def _add_library(self, name, sources, install_dir, build_info): + """Common implementation for add_library and add_installed_library. Do + not use directly""" + build_info = copy.copy(build_info) + build_info['sources'] = sources + + # Sometimes, depends is not set up to an empty list by default, and if + # depends is not given to add_library, distutils barfs (#1134) + if not 'depends' in build_info: + build_info['depends'] = [] + + self._fix_paths_dict(build_info) + + # Add to libraries list so that it is build with build_clib + self.libraries.append((name, build_info)) + + def add_installed_library(self, name, sources, install_dir, build_info=None): + """ + Similar to add_library, but the specified library is installed. + + Most C libraries used with `distutils` are only used to build python + extensions, but libraries built through this method will be installed + so that they can be reused by third-party packages. + + Parameters + ---------- + name : str + Name of the installed library. + sources : sequence + List of the library's source files. See `add_library` for details. + install_dir : str + Path to install the library, relative to the current sub-package. + build_info : dict, optional + The following keys are allowed: + + * depends + * macros + * include_dirs + * extra_compiler_args + * extra_f77_compile_args + * extra_f90_compile_args + * f2py_options + * language + + Returns + ------- + None + + See Also + -------- + add_library, add_npy_pkg_config, get_info + + Notes + ----- + The best way to encode the options required to link against the specified + C libraries is to use a "libname.ini" file, and use `get_info` to + retrieve the required options (see `add_npy_pkg_config` for more + information). + + """ + if not build_info: + build_info = {} + + install_dir = os.path.join(self.package_path, install_dir) + self._add_library(name, sources, install_dir, build_info) + self.installed_libraries.append(InstallableLib(name, build_info, install_dir)) + + def add_npy_pkg_config(self, template, install_dir, subst_dict=None): + """ + Generate and install a npy-pkg config file from a template. + + The config file generated from `template` is installed in the + given install directory, using `subst_dict` for variable substitution. + + Parameters + ---------- + template : str + The path of the template, relatively to the current package path. + install_dir : str + Where to install the npy-pkg config file, relatively to the current + package path. + subst_dict : dict, optional + If given, any string of the form ``@key@`` will be replaced by + ``subst_dict[key]`` in the template file when installed. The install + prefix is always available through the variable ``@prefix@``, since the + install prefix is not easy to get reliably from setup.py. + + See also + -------- + add_installed_library, get_info + + Notes + ----- + This works for both standard installs and in-place builds, i.e. the + ``@prefix@`` refer to the source directory for in-place builds. + + Examples + -------- + :: + + config.add_npy_pkg_config('foo.ini.in', 'lib', {'foo': bar}) + + Assuming the foo.ini.in file has the following content:: + + [meta] + Name=@foo@ + Version=1.0 + Description=dummy description + + [default] + Cflags=-I@prefix@/include + Libs= + + The generated file will have the following content:: + + [meta] + Name=bar + Version=1.0 + Description=dummy description + + [default] + Cflags=-Iprefix_dir/include + Libs= + + and will be installed as foo.ini in the 'lib' subpath. + + When cross-compiling with numpy distutils, it might be necessary to + use modified npy-pkg-config files. Using the default/generated files + will link with the host libraries (i.e. libnpymath.a). For + cross-compilation you of-course need to link with target libraries, + while using the host Python installation. + + You can copy out the numpy/core/lib/npy-pkg-config directory, add a + pkgdir value to the .ini files and set NPY_PKG_CONFIG_PATH environment + variable to point to the directory with the modified npy-pkg-config + files. + + Example npymath.ini modified for cross-compilation:: + + [meta] + Name=npymath + Description=Portable, core math library implementing C99 standard + Version=0.1 + + [variables] + pkgname=numpy.core + pkgdir=/build/arm-linux-gnueabi/sysroot/usr/lib/python3.7/site-packages/numpy/core + prefix=${pkgdir} + libdir=${prefix}/lib + includedir=${prefix}/include + + [default] + Libs=-L${libdir} -lnpymath + Cflags=-I${includedir} + Requires=mlib + + [msvc] + Libs=/LIBPATH:${libdir} npymath.lib + Cflags=/INCLUDE:${includedir} + Requires=mlib + + """ + if subst_dict is None: + subst_dict = {} + template = os.path.join(self.package_path, template) + + if self.name in self.installed_pkg_config: + self.installed_pkg_config[self.name].append((template, install_dir, + subst_dict)) + else: + self.installed_pkg_config[self.name] = [(template, install_dir, + subst_dict)] + + + def add_scripts(self,*files): + """Add scripts to configuration. + + Add the sequence of files to the beginning of the scripts list. + Scripts will be installed under the /bin/ directory. + + """ + scripts = self.paths(files) + dist = self.get_distribution() + if dist is not None: + if dist.scripts is None: + dist.scripts = [] + dist.scripts.extend(scripts) + else: + self.scripts.extend(scripts) + + def dict_append(self,**dict): + for key in self.list_keys: + a = getattr(self, key) + a.extend(dict.get(key, [])) + for key in self.dict_keys: + a = getattr(self, key) + a.update(dict.get(key, {})) + known_keys = self.list_keys + self.dict_keys + self.extra_keys + for key in dict.keys(): + if key not in known_keys: + a = getattr(self, key, None) + if a and a==dict[key]: continue + self.warn('Inheriting attribute %r=%r from %r' \ + % (key, dict[key], dict.get('name', '?'))) + setattr(self, key, dict[key]) + self.extra_keys.append(key) + elif key in self.extra_keys: + self.info('Ignoring attempt to set %r (from %r to %r)' \ + % (key, getattr(self, key), dict[key])) + elif key in known_keys: + # key is already processed above + pass + else: + raise ValueError("Don't know about key=%r" % (key)) + + def __str__(self): + from pprint import pformat + known_keys = self.list_keys + self.dict_keys + self.extra_keys + s = '<'+5*'-' + '\n' + s += 'Configuration of '+self.name+':\n' + known_keys.sort() + for k in known_keys: + a = getattr(self, k, None) + if a: + s += '%s = %s\n' % (k, pformat(a)) + s += 5*'-' + '>' + return s + + def get_config_cmd(self): + """ + Returns the numpy.distutils config command instance. + """ + cmd = get_cmd('config') + cmd.ensure_finalized() + cmd.dump_source = 0 + cmd.noisy = 0 + old_path = os.environ.get('PATH') + if old_path: + path = os.pathsep.join(['.', old_path]) + os.environ['PATH'] = path + return cmd + + def get_build_temp_dir(self): + """ + Return a path to a temporary directory where temporary files should be + placed. + """ + cmd = get_cmd('build') + cmd.ensure_finalized() + return cmd.build_temp + + def have_f77c(self): + """Check for availability of Fortran 77 compiler. + + Use it inside source generating function to ensure that + setup distribution instance has been initialized. + + Notes + ----- + True if a Fortran 77 compiler is available (because a simple Fortran 77 + code was able to be compiled successfully). + """ + simple_fortran_subroutine = ''' + subroutine simple + end + ''' + config_cmd = self.get_config_cmd() + flag = config_cmd.try_compile(simple_fortran_subroutine, lang='f77') + return flag + + def have_f90c(self): + """Check for availability of Fortran 90 compiler. + + Use it inside source generating function to ensure that + setup distribution instance has been initialized. + + Notes + ----- + True if a Fortran 90 compiler is available (because a simple Fortran + 90 code was able to be compiled successfully) + """ + simple_fortran_subroutine = ''' + subroutine simple + end + ''' + config_cmd = self.get_config_cmd() + flag = config_cmd.try_compile(simple_fortran_subroutine, lang='f90') + return flag + + def append_to(self, extlib): + """Append libraries, include_dirs to extension or library item. + """ + if is_sequence(extlib): + lib_name, build_info = extlib + dict_append(build_info, + libraries=self.libraries, + include_dirs=self.include_dirs) + else: + from numpy.distutils.core import Extension + assert isinstance(extlib, Extension), repr(extlib) + extlib.libraries.extend(self.libraries) + extlib.include_dirs.extend(self.include_dirs) + + def _get_svn_revision(self, path): + """Return path's SVN revision number. + """ + try: + output = subprocess.check_output(['svnversion'], cwd=path) + except (subprocess.CalledProcessError, OSError): + pass + else: + m = re.match(rb'(?P\d+)', output) + if m: + return int(m.group('revision')) + + if sys.platform=='win32' and os.environ.get('SVN_ASP_DOT_NET_HACK', None): + entries = njoin(path, '_svn', 'entries') + else: + entries = njoin(path, '.svn', 'entries') + if os.path.isfile(entries): + with open(entries) as f: + fstr = f.read() + if fstr[:5] == '\d+)"', fstr) + if m: + return int(m.group('revision')) + else: # non-xml entries file --- check to be sure that + m = re.search(r'dir[\n\r]+(?P\d+)', fstr) + if m: + return int(m.group('revision')) + return None + + def _get_hg_revision(self, path): + """Return path's Mercurial revision number. + """ + try: + output = subprocess.check_output( + ['hg', 'identify', '--num'], cwd=path) + except (subprocess.CalledProcessError, OSError): + pass + else: + m = re.match(rb'(?P\d+)', output) + if m: + return int(m.group('revision')) + + branch_fn = njoin(path, '.hg', 'branch') + branch_cache_fn = njoin(path, '.hg', 'branch.cache') + + if os.path.isfile(branch_fn): + branch0 = None + with open(branch_fn) as f: + revision0 = f.read().strip() + + branch_map = {} + with open(branch_cache_fn, 'r') as f: + for line in f: + branch1, revision1 = line.split()[:2] + if revision1==revision0: + branch0 = branch1 + try: + revision1 = int(revision1) + except ValueError: + continue + branch_map[branch1] = revision1 + + return branch_map.get(branch0) + + return None + + + def get_version(self, version_file=None, version_variable=None): + """Try to get version string of a package. + + Return a version string of the current package or None if the version + information could not be detected. + + Notes + ----- + This method scans files named + __version__.py, _version.py, version.py, and + __svn_version__.py for string variables version, __version__, and + _version, until a version number is found. + """ + version = getattr(self, 'version', None) + if version is not None: + return version + + # Get version from version file. + if version_file is None: + files = ['__version__.py', + self.name.split('.')[-1]+'_version.py', + 'version.py', + '__svn_version__.py', + '__hg_version__.py'] + else: + files = [version_file] + if version_variable is None: + version_vars = ['version', + '__version__', + self.name.split('.')[-1]+'_version'] + else: + version_vars = [version_variable] + for f in files: + fn = njoin(self.local_path, f) + if os.path.isfile(fn): + info = ('.py', 'U', 1) + name = os.path.splitext(os.path.basename(fn))[0] + n = dot_join(self.name, name) + try: + version_module = exec_mod_from_location( + '_'.join(n.split('.')), fn) + except ImportError as e: + self.warn(str(e)) + version_module = None + if version_module is None: + continue + + for a in version_vars: + version = getattr(version_module, a, None) + if version is not None: + break + + # Try if versioneer module + try: + version = version_module.get_versions()['version'] + except AttributeError: + pass + + if version is not None: + break + + if version is not None: + self.version = version + return version + + # Get version as SVN or Mercurial revision number + revision = self._get_svn_revision(self.local_path) + if revision is None: + revision = self._get_hg_revision(self.local_path) + + if revision is not None: + version = str(revision) + self.version = version + + return version + + def make_svn_version_py(self, delete=True): + """Appends a data function to the data_files list that will generate + __svn_version__.py file to the current package directory. + + Generate package __svn_version__.py file from SVN revision number, + it will be removed after python exits but will be available + when sdist, etc commands are executed. + + Notes + ----- + If __svn_version__.py existed before, nothing is done. + + This is + intended for working with source directories that are in an SVN + repository. + """ + target = njoin(self.local_path, '__svn_version__.py') + revision = self._get_svn_revision(self.local_path) + if os.path.isfile(target) or revision is None: + return + else: + def generate_svn_version_py(): + if not os.path.isfile(target): + version = str(revision) + self.info('Creating %s (version=%r)' % (target, version)) + with open(target, 'w') as f: + f.write('version = %r\n' % (version)) + + def rm_file(f=target,p=self.info): + if delete: + try: os.remove(f); p('removed '+f) + except OSError: pass + try: os.remove(f+'c'); p('removed '+f+'c') + except OSError: pass + + atexit.register(rm_file) + + return target + + self.add_data_files(('', generate_svn_version_py())) + + def make_hg_version_py(self, delete=True): + """Appends a data function to the data_files list that will generate + __hg_version__.py file to the current package directory. + + Generate package __hg_version__.py file from Mercurial revision, + it will be removed after python exits but will be available + when sdist, etc commands are executed. + + Notes + ----- + If __hg_version__.py existed before, nothing is done. + + This is intended for working with source directories that are + in an Mercurial repository. + """ + target = njoin(self.local_path, '__hg_version__.py') + revision = self._get_hg_revision(self.local_path) + if os.path.isfile(target) or revision is None: + return + else: + def generate_hg_version_py(): + if not os.path.isfile(target): + version = str(revision) + self.info('Creating %s (version=%r)' % (target, version)) + with open(target, 'w') as f: + f.write('version = %r\n' % (version)) + + def rm_file(f=target,p=self.info): + if delete: + try: os.remove(f); p('removed '+f) + except OSError: pass + try: os.remove(f+'c'); p('removed '+f+'c') + except OSError: pass + + atexit.register(rm_file) + + return target + + self.add_data_files(('', generate_hg_version_py())) + + def make_config_py(self,name='__config__'): + """Generate package __config__.py file containing system_info + information used during building the package. + + This file is installed to the + package installation directory. + + """ + self.py_modules.append((self.name, name, generate_config_py)) + + def get_info(self,*names): + """Get resources information. + + Return information (from system_info.get_info) for all of the names in + the argument list in a single dictionary. + """ + from .system_info import get_info, dict_append + info_dict = {} + for a in names: + dict_append(info_dict,**get_info(a)) + return info_dict + + +def get_cmd(cmdname, _cache={}): + if cmdname not in _cache: + import distutils.core + dist = distutils.core._setup_distribution + if dist is None: + from distutils.errors import DistutilsInternalError + raise DistutilsInternalError( + 'setup distribution instance not initialized') + cmd = dist.get_command_obj(cmdname) + _cache[cmdname] = cmd + return _cache[cmdname] + +def get_numpy_include_dirs(): + # numpy_include_dirs are set by numpy/core/setup.py, otherwise [] + include_dirs = Configuration.numpy_include_dirs[:] + if not include_dirs: + import numpy + include_dirs = [ numpy.get_include() ] + # else running numpy/core/setup.py + return include_dirs + +def get_npy_pkg_dir(): + """Return the path where to find the npy-pkg-config directory. + + If the NPY_PKG_CONFIG_PATH environment variable is set, the value of that + is returned. Otherwise, a path inside the location of the numpy module is + returned. + + The NPY_PKG_CONFIG_PATH can be useful when cross-compiling, maintaining + customized npy-pkg-config .ini files for the cross-compilation + environment, and using them when cross-compiling. + + """ + d = os.environ.get('NPY_PKG_CONFIG_PATH') + if d is not None: + return d + spec = importlib.util.find_spec('numpy') + d = os.path.join(os.path.dirname(spec.origin), + 'core', 'lib', 'npy-pkg-config') + return d + +def get_pkg_info(pkgname, dirs=None): + """ + Return library info for the given package. + + Parameters + ---------- + pkgname : str + Name of the package (should match the name of the .ini file, without + the extension, e.g. foo for the file foo.ini). + dirs : sequence, optional + If given, should be a sequence of additional directories where to look + for npy-pkg-config files. Those directories are searched prior to the + NumPy directory. + + Returns + ------- + pkginfo : class instance + The `LibraryInfo` instance containing the build information. + + Raises + ------ + PkgNotFound + If the package is not found. + + See Also + -------- + Configuration.add_npy_pkg_config, Configuration.add_installed_library, + get_info + + """ + from numpy.distutils.npy_pkg_config import read_config + + if dirs: + dirs.append(get_npy_pkg_dir()) + else: + dirs = [get_npy_pkg_dir()] + return read_config(pkgname, dirs) + +def get_info(pkgname, dirs=None): + """ + Return an info dict for a given C library. + + The info dict contains the necessary options to use the C library. + + Parameters + ---------- + pkgname : str + Name of the package (should match the name of the .ini file, without + the extension, e.g. foo for the file foo.ini). + dirs : sequence, optional + If given, should be a sequence of additional directories where to look + for npy-pkg-config files. Those directories are searched prior to the + NumPy directory. + + Returns + ------- + info : dict + The dictionary with build information. + + Raises + ------ + PkgNotFound + If the package is not found. + + See Also + -------- + Configuration.add_npy_pkg_config, Configuration.add_installed_library, + get_pkg_info + + Examples + -------- + To get the necessary information for the npymath library from NumPy: + + >>> npymath_info = np.distutils.misc_util.get_info('npymath') + >>> npymath_info #doctest: +SKIP + {'define_macros': [], 'libraries': ['npymath'], 'library_dirs': + ['.../numpy/core/lib'], 'include_dirs': ['.../numpy/core/include']} + + This info dict can then be used as input to a `Configuration` instance:: + + config.add_extension('foo', sources=['foo.c'], extra_info=npymath_info) + + """ + from numpy.distutils.npy_pkg_config import parse_flags + pkg_info = get_pkg_info(pkgname, dirs) + + # Translate LibraryInfo instance into a build_info dict + info = parse_flags(pkg_info.cflags()) + for k, v in parse_flags(pkg_info.libs()).items(): + info[k].extend(v) + + # add_extension extra_info argument is ANAL + info['define_macros'] = info['macros'] + del info['macros'] + del info['ignored'] + + return info + +def is_bootstrapping(): + import builtins + + try: + builtins.__NUMPY_SETUP__ + return True + except AttributeError: + return False + + +######################### + +def default_config_dict(name = None, parent_name = None, local_path=None): + """Return a configuration dictionary for usage in + configuration() function defined in file setup_.py. + """ + import warnings + warnings.warn('Use Configuration(%r,%r,top_path=%r) instead of '\ + 'deprecated default_config_dict(%r,%r,%r)' + % (name, parent_name, local_path, + name, parent_name, local_path, + ), stacklevel=2) + c = Configuration(name, parent_name, local_path) + return c.todict() + + +def dict_append(d, **kws): + for k, v in kws.items(): + if k in d: + ov = d[k] + if isinstance(ov, str): + d[k] = v + else: + d[k].extend(v) + else: + d[k] = v + +def appendpath(prefix, path): + if os.path.sep != '/': + prefix = prefix.replace('/', os.path.sep) + path = path.replace('/', os.path.sep) + drive = '' + if os.path.isabs(path): + drive = os.path.splitdrive(prefix)[0] + absprefix = os.path.splitdrive(os.path.abspath(prefix))[1] + pathdrive, path = os.path.splitdrive(path) + d = os.path.commonprefix([absprefix, path]) + if os.path.join(absprefix[:len(d)], absprefix[len(d):]) != absprefix \ + or os.path.join(path[:len(d)], path[len(d):]) != path: + # Handle invalid paths + d = os.path.dirname(d) + subpath = path[len(d):] + if os.path.isabs(subpath): + subpath = subpath[1:] + else: + subpath = path + return os.path.normpath(njoin(drive + prefix, subpath)) + +def generate_config_py(target): + """Generate config.py file containing system_info information + used during building the package. + + Usage: + config['py_modules'].append((packagename, '__config__',generate_config_py)) + """ + from numpy.distutils.system_info import system_info + from distutils.dir_util import mkpath + mkpath(os.path.dirname(target)) + with open(target, 'w') as f: + f.write('# This file is generated by numpy\'s %s\n' % (os.path.basename(sys.argv[0]))) + f.write('# It contains system_info results at the time of building this package.\n') + f.write('__all__ = ["get_info","show"]\n\n') + + # For gfortran+msvc combination, extra shared libraries may exist + f.write(textwrap.dedent(""" + import os + import sys + + extra_dll_dir = os.path.join(os.path.dirname(__file__), '.libs') + + if sys.platform == 'win32' and os.path.isdir(extra_dll_dir): + if sys.version_info >= (3, 8): + os.add_dll_directory(extra_dll_dir) + else: + os.environ.setdefault('PATH', '') + os.environ['PATH'] += os.pathsep + extra_dll_dir + + """)) + + for k, i in system_info.saved_results.items(): + f.write('%s=%r\n' % (k, i)) + f.write(textwrap.dedent(r''' + def get_info(name): + g = globals() + return g.get(name, g.get(name + "_info", {})) + + def show(): + """ + Show libraries in the system on which NumPy was built. + + Print information about various resources (libraries, library + directories, include directories, etc.) in the system on which + NumPy was built. + + See Also + -------- + get_include : Returns the directory containing NumPy C + header files. + + Notes + ----- + 1. Classes specifying the information to be printed are defined + in the `numpy.distutils.system_info` module. + + Information may include: + + * ``language``: language used to write the libraries (mostly + C or f77) + * ``libraries``: names of libraries found in the system + * ``library_dirs``: directories containing the libraries + * ``include_dirs``: directories containing library header files + * ``src_dirs``: directories containing library source files + * ``define_macros``: preprocessor macros used by + ``distutils.setup`` + * ``baseline``: minimum CPU features required + * ``found``: dispatched features supported in the system + * ``not found``: dispatched features that are not supported + in the system + + 2. NumPy BLAS/LAPACK Installation Notes + + Installing a numpy wheel (``pip install numpy`` or force it + via ``pip install numpy --only-binary :numpy: numpy``) includes + an OpenBLAS implementation of the BLAS and LAPACK linear algebra + APIs. In this case, ``library_dirs`` reports the original build + time configuration as compiled with gcc/gfortran; at run time + the OpenBLAS library is in + ``site-packages/numpy.libs/`` (linux), or + ``site-packages/numpy/.dylibs/`` (macOS), or + ``site-packages/numpy/.libs/`` (windows). + + Installing numpy from source + (``pip install numpy --no-binary numpy``) searches for BLAS and + LAPACK dynamic link libraries at build time as influenced by + environment variables NPY_BLAS_LIBS, NPY_CBLAS_LIBS, and + NPY_LAPACK_LIBS; or NPY_BLAS_ORDER and NPY_LAPACK_ORDER; + or the optional file ``~/.numpy-site.cfg``. + NumPy remembers those locations and expects to load the same + libraries at run-time. + In NumPy 1.21+ on macOS, 'accelerate' (Apple's Accelerate BLAS + library) is in the default build-time search order after + 'openblas'. + + Examples + -------- + >>> import numpy as np + >>> np.show_config() + blas_opt_info: + language = c + define_macros = [('HAVE_CBLAS', None)] + libraries = ['openblas', 'openblas'] + library_dirs = ['/usr/local/lib'] + """ + from numpy.core._multiarray_umath import ( + __cpu_features__, __cpu_baseline__, __cpu_dispatch__ + ) + for name,info_dict in globals().items(): + if name[0] == "_" or type(info_dict) is not type({}): continue + print(name + ":") + if not info_dict: + print(" NOT AVAILABLE") + for k,v in info_dict.items(): + v = str(v) + if k == "sources" and len(v) > 200: + v = v[:60] + " ...\n... " + v[-60:] + print(" %s = %s" % (k,v)) + + features_found, features_not_found = [], [] + for feature in __cpu_dispatch__: + if __cpu_features__[feature]: + features_found.append(feature) + else: + features_not_found.append(feature) + + print("Supported SIMD extensions in this NumPy install:") + print(" baseline = %s" % (','.join(__cpu_baseline__))) + print(" found = %s" % (','.join(features_found))) + print(" not found = %s" % (','.join(features_not_found))) + + ''')) + + return target + +def msvc_version(compiler): + """Return version major and minor of compiler instance if it is + MSVC, raise an exception otherwise.""" + if not compiler.compiler_type == "msvc": + raise ValueError("Compiler instance is not msvc (%s)"\ + % compiler.compiler_type) + return compiler._MSVCCompiler__version + +def get_build_architecture(): + # Importing distutils.msvccompiler triggers a warning on non-Windows + # systems, so delay the import to here. + from distutils.msvccompiler import get_build_architecture + return get_build_architecture() + + +_cxx_ignore_flags = {'-Werror=implicit-function-declaration', '-std=c99'} + + +def sanitize_cxx_flags(cxxflags): + ''' + Some flags are valid for C but not C++. Prune them. + ''' + return [flag for flag in cxxflags if flag not in _cxx_ignore_flags] + + +def exec_mod_from_location(modname, modfile): + ''' + Use importlib machinery to import a module `modname` from the file + `modfile`. Depending on the `spec.loader`, the module may not be + registered in sys.modules. + ''' + spec = importlib.util.spec_from_file_location(modname, modfile) + foo = importlib.util.module_from_spec(spec) + spec.loader.exec_module(foo) + return foo + diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/msvc9compiler.py b/.local/lib/python3.8/site-packages/numpy/distutils/msvc9compiler.py new file mode 100644 index 0000000..6823949 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/msvc9compiler.py @@ -0,0 +1,63 @@ +import os +from distutils.msvc9compiler import MSVCCompiler as _MSVCCompiler + +from .system_info import platform_bits + + +def _merge(old, new): + """Concatenate two environment paths avoiding repeats. + + Here `old` is the environment string before the base class initialize + function is called and `new` is the string after the call. The new string + will be a fixed string if it is not obtained from the current environment, + or the same as the old string if obtained from the same environment. The aim + here is not to append the new string if it is already contained in the old + string so as to limit the growth of the environment string. + + Parameters + ---------- + old : string + Previous environment string. + new : string + New environment string. + + Returns + ------- + ret : string + Updated environment string. + + """ + if not old: + return new + if new in old: + return old + + # Neither new nor old is empty. Give old priority. + return ';'.join([old, new]) + + +class MSVCCompiler(_MSVCCompiler): + def __init__(self, verbose=0, dry_run=0, force=0): + _MSVCCompiler.__init__(self, verbose, dry_run, force) + + def initialize(self, plat_name=None): + # The 'lib' and 'include' variables may be overwritten + # by MSVCCompiler.initialize, so save them for later merge. + environ_lib = os.getenv('lib') + environ_include = os.getenv('include') + _MSVCCompiler.initialize(self, plat_name) + + # Merge current and previous values of 'lib' and 'include' + os.environ['lib'] = _merge(environ_lib, os.environ['lib']) + os.environ['include'] = _merge(environ_include, os.environ['include']) + + # msvc9 building for 32 bits requires SSE2 to work around a + # compiler bug. + if platform_bits == 32: + self.compile_options += ['/arch:SSE2'] + self.compile_options_debug += ['/arch:SSE2'] + + def manifest_setup_ldargs(self, output_filename, build_temp, ld_args): + ld_args.append('/MANIFEST') + _MSVCCompiler.manifest_setup_ldargs(self, output_filename, + build_temp, ld_args) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/msvccompiler.py b/.local/lib/python3.8/site-packages/numpy/distutils/msvccompiler.py new file mode 100644 index 0000000..681a254 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/msvccompiler.py @@ -0,0 +1,58 @@ +import os +from distutils.msvccompiler import MSVCCompiler as _MSVCCompiler + +from .system_info import platform_bits + + +def _merge(old, new): + """Concatenate two environment paths avoiding repeats. + + Here `old` is the environment string before the base class initialize + function is called and `new` is the string after the call. The new string + will be a fixed string if it is not obtained from the current environment, + or the same as the old string if obtained from the same environment. The aim + here is not to append the new string if it is already contained in the old + string so as to limit the growth of the environment string. + + Parameters + ---------- + old : string + Previous environment string. + new : string + New environment string. + + Returns + ------- + ret : string + Updated environment string. + + """ + if new in old: + return old + if not old: + return new + + # Neither new nor old is empty. Give old priority. + return ';'.join([old, new]) + + +class MSVCCompiler(_MSVCCompiler): + def __init__(self, verbose=0, dry_run=0, force=0): + _MSVCCompiler.__init__(self, verbose, dry_run, force) + + def initialize(self): + # The 'lib' and 'include' variables may be overwritten + # by MSVCCompiler.initialize, so save them for later merge. + environ_lib = os.getenv('lib', '') + environ_include = os.getenv('include', '') + _MSVCCompiler.initialize(self) + + # Merge current and previous values of 'lib' and 'include' + os.environ['lib'] = _merge(environ_lib, os.environ['lib']) + os.environ['include'] = _merge(environ_include, os.environ['include']) + + # msvc9 building for 32 bits requires SSE2 to work around a + # compiler bug. + if platform_bits == 32: + self.compile_options += ['/arch:SSE2'] + self.compile_options_debug += ['/arch:SSE2'] diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/npy_pkg_config.py b/.local/lib/python3.8/site-packages/numpy/distutils/npy_pkg_config.py new file mode 100644 index 0000000..f6e3ad3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/npy_pkg_config.py @@ -0,0 +1,437 @@ +import sys +import re +import os + +from configparser import RawConfigParser + +__all__ = ['FormatError', 'PkgNotFound', 'LibraryInfo', 'VariableSet', + 'read_config', 'parse_flags'] + +_VAR = re.compile(r'\$\{([a-zA-Z0-9_-]+)\}') + +class FormatError(OSError): + """ + Exception thrown when there is a problem parsing a configuration file. + + """ + def __init__(self, msg): + self.msg = msg + + def __str__(self): + return self.msg + +class PkgNotFound(OSError): + """Exception raised when a package can not be located.""" + def __init__(self, msg): + self.msg = msg + + def __str__(self): + return self.msg + +def parse_flags(line): + """ + Parse a line from a config file containing compile flags. + + Parameters + ---------- + line : str + A single line containing one or more compile flags. + + Returns + ------- + d : dict + Dictionary of parsed flags, split into relevant categories. + These categories are the keys of `d`: + + * 'include_dirs' + * 'library_dirs' + * 'libraries' + * 'macros' + * 'ignored' + + """ + d = {'include_dirs': [], 'library_dirs': [], 'libraries': [], + 'macros': [], 'ignored': []} + + flags = (' ' + line).split(' -') + for flag in flags: + flag = '-' + flag + if len(flag) > 0: + if flag.startswith('-I'): + d['include_dirs'].append(flag[2:].strip()) + elif flag.startswith('-L'): + d['library_dirs'].append(flag[2:].strip()) + elif flag.startswith('-l'): + d['libraries'].append(flag[2:].strip()) + elif flag.startswith('-D'): + d['macros'].append(flag[2:].strip()) + else: + d['ignored'].append(flag) + + return d + +def _escape_backslash(val): + return val.replace('\\', '\\\\') + +class LibraryInfo: + """ + Object containing build information about a library. + + Parameters + ---------- + name : str + The library name. + description : str + Description of the library. + version : str + Version string. + sections : dict + The sections of the configuration file for the library. The keys are + the section headers, the values the text under each header. + vars : class instance + A `VariableSet` instance, which contains ``(name, value)`` pairs for + variables defined in the configuration file for the library. + requires : sequence, optional + The required libraries for the library to be installed. + + Notes + ----- + All input parameters (except "sections" which is a method) are available as + attributes of the same name. + + """ + def __init__(self, name, description, version, sections, vars, requires=None): + self.name = name + self.description = description + if requires: + self.requires = requires + else: + self.requires = [] + self.version = version + self._sections = sections + self.vars = vars + + def sections(self): + """ + Return the section headers of the config file. + + Parameters + ---------- + None + + Returns + ------- + keys : list of str + The list of section headers. + + """ + return list(self._sections.keys()) + + def cflags(self, section="default"): + val = self.vars.interpolate(self._sections[section]['cflags']) + return _escape_backslash(val) + + def libs(self, section="default"): + val = self.vars.interpolate(self._sections[section]['libs']) + return _escape_backslash(val) + + def __str__(self): + m = ['Name: %s' % self.name, 'Description: %s' % self.description] + if self.requires: + m.append('Requires:') + else: + m.append('Requires: %s' % ",".join(self.requires)) + m.append('Version: %s' % self.version) + + return "\n".join(m) + +class VariableSet: + """ + Container object for the variables defined in a config file. + + `VariableSet` can be used as a plain dictionary, with the variable names + as keys. + + Parameters + ---------- + d : dict + Dict of items in the "variables" section of the configuration file. + + """ + def __init__(self, d): + self._raw_data = dict([(k, v) for k, v in d.items()]) + + self._re = {} + self._re_sub = {} + + self._init_parse() + + def _init_parse(self): + for k, v in self._raw_data.items(): + self._init_parse_var(k, v) + + def _init_parse_var(self, name, value): + self._re[name] = re.compile(r'\$\{%s\}' % name) + self._re_sub[name] = value + + def interpolate(self, value): + # Brute force: we keep interpolating until there is no '${var}' anymore + # or until interpolated string is equal to input string + def _interpolate(value): + for k in self._re.keys(): + value = self._re[k].sub(self._re_sub[k], value) + return value + while _VAR.search(value): + nvalue = _interpolate(value) + if nvalue == value: + break + value = nvalue + + return value + + def variables(self): + """ + Return the list of variable names. + + Parameters + ---------- + None + + Returns + ------- + names : list of str + The names of all variables in the `VariableSet` instance. + + """ + return list(self._raw_data.keys()) + + # Emulate a dict to set/get variables values + def __getitem__(self, name): + return self._raw_data[name] + + def __setitem__(self, name, value): + self._raw_data[name] = value + self._init_parse_var(name, value) + +def parse_meta(config): + if not config.has_section('meta'): + raise FormatError("No meta section found !") + + d = dict(config.items('meta')) + + for k in ['name', 'description', 'version']: + if not k in d: + raise FormatError("Option %s (section [meta]) is mandatory, " + "but not found" % k) + + if not 'requires' in d: + d['requires'] = [] + + return d + +def parse_variables(config): + if not config.has_section('variables'): + raise FormatError("No variables section found !") + + d = {} + + for name, value in config.items("variables"): + d[name] = value + + return VariableSet(d) + +def parse_sections(config): + return meta_d, r + +def pkg_to_filename(pkg_name): + return "%s.ini" % pkg_name + +def parse_config(filename, dirs=None): + if dirs: + filenames = [os.path.join(d, filename) for d in dirs] + else: + filenames = [filename] + + config = RawConfigParser() + + n = config.read(filenames) + if not len(n) >= 1: + raise PkgNotFound("Could not find file(s) %s" % str(filenames)) + + # Parse meta and variables sections + meta = parse_meta(config) + + vars = {} + if config.has_section('variables'): + for name, value in config.items("variables"): + vars[name] = _escape_backslash(value) + + # Parse "normal" sections + secs = [s for s in config.sections() if not s in ['meta', 'variables']] + sections = {} + + requires = {} + for s in secs: + d = {} + if config.has_option(s, "requires"): + requires[s] = config.get(s, 'requires') + + for name, value in config.items(s): + d[name] = value + sections[s] = d + + return meta, vars, sections, requires + +def _read_config_imp(filenames, dirs=None): + def _read_config(f): + meta, vars, sections, reqs = parse_config(f, dirs) + # recursively add sections and variables of required libraries + for rname, rvalue in reqs.items(): + nmeta, nvars, nsections, nreqs = _read_config(pkg_to_filename(rvalue)) + + # Update var dict for variables not in 'top' config file + for k, v in nvars.items(): + if not k in vars: + vars[k] = v + + # Update sec dict + for oname, ovalue in nsections[rname].items(): + if ovalue: + sections[rname][oname] += ' %s' % ovalue + + return meta, vars, sections, reqs + + meta, vars, sections, reqs = _read_config(filenames) + + # FIXME: document this. If pkgname is defined in the variables section, and + # there is no pkgdir variable defined, pkgdir is automatically defined to + # the path of pkgname. This requires the package to be imported to work + if not 'pkgdir' in vars and "pkgname" in vars: + pkgname = vars["pkgname"] + if not pkgname in sys.modules: + raise ValueError("You should import %s to get information on %s" % + (pkgname, meta["name"])) + + mod = sys.modules[pkgname] + vars["pkgdir"] = _escape_backslash(os.path.dirname(mod.__file__)) + + return LibraryInfo(name=meta["name"], description=meta["description"], + version=meta["version"], sections=sections, vars=VariableSet(vars)) + +# Trivial cache to cache LibraryInfo instances creation. To be really +# efficient, the cache should be handled in read_config, since a same file can +# be parsed many time outside LibraryInfo creation, but I doubt this will be a +# problem in practice +_CACHE = {} +def read_config(pkgname, dirs=None): + """ + Return library info for a package from its configuration file. + + Parameters + ---------- + pkgname : str + Name of the package (should match the name of the .ini file, without + the extension, e.g. foo for the file foo.ini). + dirs : sequence, optional + If given, should be a sequence of directories - usually including + the NumPy base directory - where to look for npy-pkg-config files. + + Returns + ------- + pkginfo : class instance + The `LibraryInfo` instance containing the build information. + + Raises + ------ + PkgNotFound + If the package is not found. + + See Also + -------- + misc_util.get_info, misc_util.get_pkg_info + + Examples + -------- + >>> npymath_info = np.distutils.npy_pkg_config.read_config('npymath') + >>> type(npymath_info) + + >>> print(npymath_info) + Name: npymath + Description: Portable, core math library implementing C99 standard + Requires: + Version: 0.1 #random + + """ + try: + return _CACHE[pkgname] + except KeyError: + v = _read_config_imp(pkg_to_filename(pkgname), dirs) + _CACHE[pkgname] = v + return v + +# TODO: +# - implements version comparison (modversion + atleast) + +# pkg-config simple emulator - useful for debugging, and maybe later to query +# the system +if __name__ == '__main__': + from optparse import OptionParser + import glob + + parser = OptionParser() + parser.add_option("--cflags", dest="cflags", action="store_true", + help="output all preprocessor and compiler flags") + parser.add_option("--libs", dest="libs", action="store_true", + help="output all linker flags") + parser.add_option("--use-section", dest="section", + help="use this section instead of default for options") + parser.add_option("--version", dest="version", action="store_true", + help="output version") + parser.add_option("--atleast-version", dest="min_version", + help="Minimal version") + parser.add_option("--list-all", dest="list_all", action="store_true", + help="Minimal version") + parser.add_option("--define-variable", dest="define_variable", + help="Replace variable with the given value") + + (options, args) = parser.parse_args(sys.argv) + + if len(args) < 2: + raise ValueError("Expect package name on the command line:") + + if options.list_all: + files = glob.glob("*.ini") + for f in files: + info = read_config(f) + print("%s\t%s - %s" % (info.name, info.name, info.description)) + + pkg_name = args[1] + d = os.environ.get('NPY_PKG_CONFIG_PATH') + if d: + info = read_config(pkg_name, ['numpy/core/lib/npy-pkg-config', '.', d]) + else: + info = read_config(pkg_name, ['numpy/core/lib/npy-pkg-config', '.']) + + if options.section: + section = options.section + else: + section = "default" + + if options.define_variable: + m = re.search(r'([\S]+)=([\S]+)', options.define_variable) + if not m: + raise ValueError("--define-variable option should be of " + "the form --define-variable=foo=bar") + else: + name = m.group(1) + value = m.group(2) + info.vars[name] = value + + if options.cflags: + print(info.cflags(section)) + if options.libs: + print(info.libs(section)) + if options.version: + print(info.version) + if options.min_version: + print(info.version >= options.min_version) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/numpy_distribution.py b/.local/lib/python3.8/site-packages/numpy/distutils/numpy_distribution.py new file mode 100644 index 0000000..ea81826 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/numpy_distribution.py @@ -0,0 +1,17 @@ +# XXX: Handle setuptools ? +from distutils.core import Distribution + +# This class is used because we add new files (sconscripts, and so on) with the +# scons command +class NumpyDistribution(Distribution): + def __init__(self, attrs = None): + # A list of (sconscripts, pre_hook, post_hook, src, parent_names) + self.scons_data = [] + # A list of installable libraries + self.installed_libraries = [] + # A dict of pkg_config files to generate/install + self.installed_pkg_config = {} + Distribution.__init__(self, attrs) + + def has_scons_scripts(self): + return bool(self.scons_data) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/pathccompiler.py b/.local/lib/python3.8/site-packages/numpy/distutils/pathccompiler.py new file mode 100644 index 0000000..4805181 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/pathccompiler.py @@ -0,0 +1,21 @@ +from distutils.unixccompiler import UnixCCompiler + +class PathScaleCCompiler(UnixCCompiler): + + """ + PathScale compiler compatible with an gcc built Python. + """ + + compiler_type = 'pathcc' + cc_exe = 'pathcc' + cxx_exe = 'pathCC' + + def __init__ (self, verbose=0, dry_run=0, force=0): + UnixCCompiler.__init__ (self, verbose, dry_run, force) + cc_compiler = self.cc_exe + cxx_compiler = self.cxx_exe + self.set_executables(compiler=cc_compiler, + compiler_so=cc_compiler, + compiler_cxx=cxx_compiler, + linker_exe=cc_compiler, + linker_so=cc_compiler + ' -shared') diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/setup.py b/.local/lib/python3.8/site-packages/numpy/distutils/setup.py new file mode 100644 index 0000000..522756f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/setup.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('distutils', parent_package, top_path) + config.add_subpackage('command') + config.add_subpackage('fcompiler') + config.add_subpackage('tests') + config.add_data_files('site.cfg') + config.add_data_files('mingw/gfortran_vs2003_hack.c') + config.add_data_dir('checks') + config.add_data_files('*.pyi') + config.make_config_py() + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/system_info.py b/.local/lib/python3.8/site-packages/numpy/distutils/system_info.py new file mode 100644 index 0000000..d5a1687 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/system_info.py @@ -0,0 +1,3172 @@ +#!/usr/bin/env python3 +""" +This file defines a set of system_info classes for getting +information about various resources (libraries, library directories, +include directories, etc.) in the system. Usage: + info_dict = get_info() + where is a string 'atlas','x11','fftw','lapack','blas', + 'lapack_src', 'blas_src', etc. For a complete list of allowed names, + see the definition of get_info() function below. + + Returned info_dict is a dictionary which is compatible with + distutils.setup keyword arguments. If info_dict == {}, then the + asked resource is not available (system_info could not find it). + + Several *_info classes specify an environment variable to specify + the locations of software. When setting the corresponding environment + variable to 'None' then the software will be ignored, even when it + is available in system. + +Global parameters: + system_info.search_static_first - search static libraries (.a) + in precedence to shared ones (.so, .sl) if enabled. + system_info.verbosity - output the results to stdout if enabled. + +The file 'site.cfg' is looked for in + +1) Directory of main setup.py file being run. +2) Home directory of user running the setup.py file as ~/.numpy-site.cfg +3) System wide directory (location of this file...) + +The first one found is used to get system configuration options The +format is that used by ConfigParser (i.e., Windows .INI style). The +section ALL is not intended for general use. + +Appropriate defaults are used if nothing is specified. + +The order of finding the locations of resources is the following: + 1. environment variable + 2. section in site.cfg + 3. DEFAULT section in site.cfg + 4. System default search paths (see ``default_*`` variables below). +Only the first complete match is returned. + +Currently, the following classes are available, along with their section names: + + Numeric_info:Numeric + _numpy_info:Numeric + _pkg_config_info:None + accelerate_info:accelerate + agg2_info:agg2 + amd_info:amd + atlas_3_10_blas_info:atlas + atlas_3_10_blas_threads_info:atlas + atlas_3_10_info:atlas + atlas_3_10_threads_info:atlas + atlas_blas_info:atlas + atlas_blas_threads_info:atlas + atlas_info:atlas + atlas_threads_info:atlas + blas64__opt_info:ALL # usage recommended (general ILP64 BLAS, 64_ symbol suffix) + blas_ilp64_opt_info:ALL # usage recommended (general ILP64 BLAS) + blas_ilp64_plain_opt_info:ALL # usage recommended (general ILP64 BLAS, no symbol suffix) + blas_info:blas + blas_mkl_info:mkl + blas_opt_info:ALL # usage recommended + blas_src_info:blas_src + blis_info:blis + boost_python_info:boost_python + dfftw_info:fftw + dfftw_threads_info:fftw + djbfft_info:djbfft + f2py_info:ALL + fft_opt_info:ALL + fftw2_info:fftw + fftw3_info:fftw3 + fftw_info:fftw + fftw_threads_info:fftw + flame_info:flame + freetype2_info:freetype2 + gdk_2_info:gdk_2 + gdk_info:gdk + gdk_pixbuf_2_info:gdk_pixbuf_2 + gdk_pixbuf_xlib_2_info:gdk_pixbuf_xlib_2 + gdk_x11_2_info:gdk_x11_2 + gtkp_2_info:gtkp_2 + gtkp_x11_2_info:gtkp_x11_2 + lapack64__opt_info:ALL # usage recommended (general ILP64 LAPACK, 64_ symbol suffix) + lapack_atlas_3_10_info:atlas + lapack_atlas_3_10_threads_info:atlas + lapack_atlas_info:atlas + lapack_atlas_threads_info:atlas + lapack_ilp64_opt_info:ALL # usage recommended (general ILP64 LAPACK) + lapack_ilp64_plain_opt_info:ALL # usage recommended (general ILP64 LAPACK, no symbol suffix) + lapack_info:lapack + lapack_mkl_info:mkl + lapack_opt_info:ALL # usage recommended + lapack_src_info:lapack_src + mkl_info:mkl + numarray_info:numarray + numerix_info:numerix + numpy_info:numpy + openblas64__info:openblas64_ + openblas64__lapack_info:openblas64_ + openblas_clapack_info:openblas + openblas_ilp64_info:openblas_ilp64 + openblas_ilp64_lapack_info:openblas_ilp64 + openblas_info:openblas + openblas_lapack_info:openblas + sfftw_info:fftw + sfftw_threads_info:fftw + system_info:ALL + umfpack_info:umfpack + wx_info:wx + x11_info:x11 + xft_info:xft + +Note that blas_opt_info and lapack_opt_info honor the NPY_BLAS_ORDER +and NPY_LAPACK_ORDER environment variables to determine the order in which +specific BLAS and LAPACK libraries are searched for. + +This search (or autodetection) can be bypassed by defining the environment +variables NPY_BLAS_LIBS and NPY_LAPACK_LIBS, which should then contain the +exact linker flags to use (language will be set to F77). Building against +Netlib BLAS/LAPACK or stub files, in order to be able to switch BLAS and LAPACK +implementations at runtime. If using this to build NumPy itself, it is +recommended to also define NPY_CBLAS_LIBS (assuming your BLAS library has a +CBLAS interface) to enable CBLAS usage for matrix multiplication (unoptimized +otherwise). + +Example: +---------- +[DEFAULT] +# default section +library_dirs = /usr/lib:/usr/local/lib:/opt/lib +include_dirs = /usr/include:/usr/local/include:/opt/include +src_dirs = /usr/local/src:/opt/src +# search static libraries (.a) in preference to shared ones (.so) +search_static_first = 0 + +[fftw] +libraries = rfftw, fftw + +[atlas] +library_dirs = /usr/lib/3dnow:/usr/lib/3dnow/atlas +# for overriding the names of the atlas libraries +libraries = lapack, f77blas, cblas, atlas + +[x11] +library_dirs = /usr/X11R6/lib +include_dirs = /usr/X11R6/include +---------- + +Note that the ``libraries`` key is the default setting for libraries. + +Authors: + Pearu Peterson , February 2002 + David M. Cooke , April 2002 + +Copyright 2002 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy (BSD style) license. See LICENSE.txt that came with +this distribution for specifics. + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. + +""" +import sys +import os +import re +import copy +import warnings +import subprocess +import textwrap + +from glob import glob +from functools import reduce +from configparser import NoOptionError +from configparser import RawConfigParser as ConfigParser +# It seems that some people are importing ConfigParser from here so is +# good to keep its class name. Use of RawConfigParser is needed in +# order to be able to load path names with percent in them, like +# `feature%2Fcool` which is common on git flow branch names. + +from distutils.errors import DistutilsError +from distutils.dist import Distribution +import sysconfig +from numpy.distutils import log +from distutils.util import get_platform + +from numpy.distutils.exec_command import ( + find_executable, filepath_from_subprocess_output, + ) +from numpy.distutils.misc_util import (is_sequence, is_string, + get_shared_lib_extension) +from numpy.distutils.command.config import config as cmd_config +from numpy.distutils import customized_ccompiler as _customized_ccompiler +from numpy.distutils import _shell_utils +import distutils.ccompiler +import tempfile +import shutil + +__all__ = ['system_info'] + +# Determine number of bits +import platform +_bits = {'32bit': 32, '64bit': 64} +platform_bits = _bits[platform.architecture()[0]] + + +global_compiler = None + +def customized_ccompiler(): + global global_compiler + if not global_compiler: + global_compiler = _customized_ccompiler() + return global_compiler + + +def _c_string_literal(s): + """ + Convert a python string into a literal suitable for inclusion into C code + """ + # only these three characters are forbidden in C strings + s = s.replace('\\', r'\\') + s = s.replace('"', r'\"') + s = s.replace('\n', r'\n') + return '"{}"'.format(s) + + +def libpaths(paths, bits): + """Return a list of library paths valid on 32 or 64 bit systems. + + Inputs: + paths : sequence + A sequence of strings (typically paths) + bits : int + An integer, the only valid values are 32 or 64. A ValueError exception + is raised otherwise. + + Examples: + + Consider a list of directories + >>> paths = ['/usr/X11R6/lib','/usr/X11/lib','/usr/lib'] + + For a 32-bit platform, this is already valid: + >>> np.distutils.system_info.libpaths(paths,32) + ['/usr/X11R6/lib', '/usr/X11/lib', '/usr/lib'] + + On 64 bits, we prepend the '64' postfix + >>> np.distutils.system_info.libpaths(paths,64) + ['/usr/X11R6/lib64', '/usr/X11R6/lib', '/usr/X11/lib64', '/usr/X11/lib', + '/usr/lib64', '/usr/lib'] + """ + if bits not in (32, 64): + raise ValueError("Invalid bit size in libpaths: 32 or 64 only") + + # Handle 32bit case + if bits == 32: + return paths + + # Handle 64bit case + out = [] + for p in paths: + out.extend([p + '64', p]) + + return out + + +if sys.platform == 'win32': + default_lib_dirs = ['C:\\', + os.path.join(sysconfig.get_config_var('exec_prefix'), + 'libs')] + default_runtime_dirs = [] + default_include_dirs = [] + default_src_dirs = ['.'] + default_x11_lib_dirs = [] + default_x11_include_dirs = [] + _include_dirs = [ + 'include', + 'include/suitesparse', + ] + _lib_dirs = [ + 'lib', + ] + + _include_dirs = [d.replace('/', os.sep) for d in _include_dirs] + _lib_dirs = [d.replace('/', os.sep) for d in _lib_dirs] + def add_system_root(library_root): + """Add a package manager root to the include directories""" + global default_lib_dirs + global default_include_dirs + + library_root = os.path.normpath(library_root) + + default_lib_dirs.extend( + os.path.join(library_root, d) for d in _lib_dirs) + default_include_dirs.extend( + os.path.join(library_root, d) for d in _include_dirs) + + # VCpkg is the de-facto package manager on windows for C/C++ + # libraries. If it is on the PATH, then we append its paths here. + vcpkg = shutil.which('vcpkg') + if vcpkg: + vcpkg_dir = os.path.dirname(vcpkg) + if platform.architecture()[0] == '32bit': + specifier = 'x86' + else: + specifier = 'x64' + + vcpkg_installed = os.path.join(vcpkg_dir, 'installed') + for vcpkg_root in [ + os.path.join(vcpkg_installed, specifier + '-windows'), + os.path.join(vcpkg_installed, specifier + '-windows-static'), + ]: + add_system_root(vcpkg_root) + + # Conda is another popular package manager that provides libraries + conda = shutil.which('conda') + if conda: + conda_dir = os.path.dirname(conda) + add_system_root(os.path.join(conda_dir, '..', 'Library')) + add_system_root(os.path.join(conda_dir, 'Library')) + +else: + default_lib_dirs = libpaths(['/usr/local/lib', '/opt/lib', '/usr/lib', + '/opt/local/lib', '/sw/lib'], platform_bits) + default_runtime_dirs = [] + default_include_dirs = ['/usr/local/include', + '/opt/include', + # path of umfpack under macports + '/opt/local/include/ufsparse', + '/opt/local/include', '/sw/include', + '/usr/include/suitesparse'] + default_src_dirs = ['.', '/usr/local/src', '/opt/src', '/sw/src'] + + default_x11_lib_dirs = libpaths(['/usr/X11R6/lib', '/usr/X11/lib', + '/usr/lib'], platform_bits) + default_x11_include_dirs = ['/usr/X11R6/include', '/usr/X11/include'] + + if os.path.exists('/usr/lib/X11'): + globbed_x11_dir = glob('/usr/lib/*/libX11.so') + if globbed_x11_dir: + x11_so_dir = os.path.split(globbed_x11_dir[0])[0] + default_x11_lib_dirs.extend([x11_so_dir, '/usr/lib/X11']) + default_x11_include_dirs.extend(['/usr/lib/X11/include', + '/usr/include/X11']) + + with open(os.devnull, 'w') as tmp: + try: + p = subprocess.Popen(["gcc", "-print-multiarch"], stdout=subprocess.PIPE, + stderr=tmp) + except (OSError, DistutilsError): + # OSError if gcc is not installed, or SandboxViolation (DistutilsError + # subclass) if an old setuptools bug is triggered (see gh-3160). + pass + else: + triplet = str(p.communicate()[0].decode().strip()) + if p.returncode == 0: + # gcc supports the "-print-multiarch" option + default_x11_lib_dirs += [os.path.join("/usr/lib/", triplet)] + default_lib_dirs += [os.path.join("/usr/lib/", triplet)] + + +if os.path.join(sys.prefix, 'lib') not in default_lib_dirs: + default_lib_dirs.insert(0, os.path.join(sys.prefix, 'lib')) + default_include_dirs.append(os.path.join(sys.prefix, 'include')) + default_src_dirs.append(os.path.join(sys.prefix, 'src')) + +default_lib_dirs = [_m for _m in default_lib_dirs if os.path.isdir(_m)] +default_runtime_dirs = [_m for _m in default_runtime_dirs if os.path.isdir(_m)] +default_include_dirs = [_m for _m in default_include_dirs if os.path.isdir(_m)] +default_src_dirs = [_m for _m in default_src_dirs if os.path.isdir(_m)] + +so_ext = get_shared_lib_extension() + + +def get_standard_file(fname): + """Returns a list of files named 'fname' from + 1) System-wide directory (directory-location of this module) + 2) Users HOME directory (os.environ['HOME']) + 3) Local directory + """ + # System-wide file + filenames = [] + try: + f = __file__ + except NameError: + f = sys.argv[0] + sysfile = os.path.join(os.path.split(os.path.abspath(f))[0], + fname) + if os.path.isfile(sysfile): + filenames.append(sysfile) + + # Home directory + # And look for the user config file + try: + f = os.path.expanduser('~') + except KeyError: + pass + else: + user_file = os.path.join(f, fname) + if os.path.isfile(user_file): + filenames.append(user_file) + + # Local file + if os.path.isfile(fname): + filenames.append(os.path.abspath(fname)) + + return filenames + + +def _parse_env_order(base_order, env): + """ Parse an environment variable `env` by splitting with "," and only returning elements from `base_order` + + This method will sequence the environment variable and check for their + individual elements in `base_order`. + + The items in the environment variable may be negated via '^item' or '!itema,itemb'. + It must start with ^/! to negate all options. + + Raises + ------ + ValueError: for mixed negated and non-negated orders or multiple negated orders + + Parameters + ---------- + base_order : list of str + the base list of orders + env : str + the environment variable to be parsed, if none is found, `base_order` is returned + + Returns + ------- + allow_order : list of str + allowed orders in lower-case + unknown_order : list of str + for values not overlapping with `base_order` + """ + order_str = os.environ.get(env, None) + + # ensure all base-orders are lower-case (for easier comparison) + base_order = [order.lower() for order in base_order] + if order_str is None: + return base_order, [] + + neg = order_str.startswith('^') or order_str.startswith('!') + # Check format + order_str_l = list(order_str) + sum_neg = order_str_l.count('^') + order_str_l.count('!') + if neg: + if sum_neg > 1: + raise ValueError(f"Environment variable '{env}' may only contain a single (prefixed) negation: {order_str}") + # remove prefix + order_str = order_str[1:] + elif sum_neg > 0: + raise ValueError(f"Environment variable '{env}' may not mix negated an non-negated items: {order_str}") + + # Split and lower case + orders = order_str.lower().split(',') + + # to inform callee about non-overlapping elements + unknown_order = [] + + # if negated, we have to remove from the order + if neg: + allow_order = base_order.copy() + + for order in orders: + if not order: + continue + + if order not in base_order: + unknown_order.append(order) + continue + + if order in allow_order: + allow_order.remove(order) + + else: + allow_order = [] + + for order in orders: + if not order: + continue + + if order not in base_order: + unknown_order.append(order) + continue + + if order not in allow_order: + allow_order.append(order) + + return allow_order, unknown_order + + +def get_info(name, notfound_action=0): + """ + notfound_action: + 0 - do nothing + 1 - display warning message + 2 - raise error + """ + cl = {'armpl': armpl_info, + 'blas_armpl': blas_armpl_info, + 'lapack_armpl': lapack_armpl_info, + 'fftw3_armpl': fftw3_armpl_info, + 'atlas': atlas_info, # use lapack_opt or blas_opt instead + 'atlas_threads': atlas_threads_info, # ditto + 'atlas_blas': atlas_blas_info, + 'atlas_blas_threads': atlas_blas_threads_info, + 'lapack_atlas': lapack_atlas_info, # use lapack_opt instead + 'lapack_atlas_threads': lapack_atlas_threads_info, # ditto + 'atlas_3_10': atlas_3_10_info, # use lapack_opt or blas_opt instead + 'atlas_3_10_threads': atlas_3_10_threads_info, # ditto + 'atlas_3_10_blas': atlas_3_10_blas_info, + 'atlas_3_10_blas_threads': atlas_3_10_blas_threads_info, + 'lapack_atlas_3_10': lapack_atlas_3_10_info, # use lapack_opt instead + 'lapack_atlas_3_10_threads': lapack_atlas_3_10_threads_info, # ditto + 'flame': flame_info, # use lapack_opt instead + 'mkl': mkl_info, + # openblas which may or may not have embedded lapack + 'openblas': openblas_info, # use blas_opt instead + # openblas with embedded lapack + 'openblas_lapack': openblas_lapack_info, # use blas_opt instead + 'openblas_clapack': openblas_clapack_info, # use blas_opt instead + 'blis': blis_info, # use blas_opt instead + 'lapack_mkl': lapack_mkl_info, # use lapack_opt instead + 'blas_mkl': blas_mkl_info, # use blas_opt instead + 'accelerate': accelerate_info, # use blas_opt instead + 'openblas64_': openblas64__info, + 'openblas64__lapack': openblas64__lapack_info, + 'openblas_ilp64': openblas_ilp64_info, + 'openblas_ilp64_lapack': openblas_ilp64_lapack_info, + 'x11': x11_info, + 'fft_opt': fft_opt_info, + 'fftw': fftw_info, + 'fftw2': fftw2_info, + 'fftw3': fftw3_info, + 'dfftw': dfftw_info, + 'sfftw': sfftw_info, + 'fftw_threads': fftw_threads_info, + 'dfftw_threads': dfftw_threads_info, + 'sfftw_threads': sfftw_threads_info, + 'djbfft': djbfft_info, + 'blas': blas_info, # use blas_opt instead + 'lapack': lapack_info, # use lapack_opt instead + 'lapack_src': lapack_src_info, + 'blas_src': blas_src_info, + 'numpy': numpy_info, + 'f2py': f2py_info, + 'Numeric': Numeric_info, + 'numeric': Numeric_info, + 'numarray': numarray_info, + 'numerix': numerix_info, + 'lapack_opt': lapack_opt_info, + 'lapack_ilp64_opt': lapack_ilp64_opt_info, + 'lapack_ilp64_plain_opt': lapack_ilp64_plain_opt_info, + 'lapack64__opt': lapack64__opt_info, + 'blas_opt': blas_opt_info, + 'blas_ilp64_opt': blas_ilp64_opt_info, + 'blas_ilp64_plain_opt': blas_ilp64_plain_opt_info, + 'blas64__opt': blas64__opt_info, + 'boost_python': boost_python_info, + 'agg2': agg2_info, + 'wx': wx_info, + 'gdk_pixbuf_xlib_2': gdk_pixbuf_xlib_2_info, + 'gdk-pixbuf-xlib-2.0': gdk_pixbuf_xlib_2_info, + 'gdk_pixbuf_2': gdk_pixbuf_2_info, + 'gdk-pixbuf-2.0': gdk_pixbuf_2_info, + 'gdk': gdk_info, + 'gdk_2': gdk_2_info, + 'gdk-2.0': gdk_2_info, + 'gdk_x11_2': gdk_x11_2_info, + 'gdk-x11-2.0': gdk_x11_2_info, + 'gtkp_x11_2': gtkp_x11_2_info, + 'gtk+-x11-2.0': gtkp_x11_2_info, + 'gtkp_2': gtkp_2_info, + 'gtk+-2.0': gtkp_2_info, + 'xft': xft_info, + 'freetype2': freetype2_info, + 'umfpack': umfpack_info, + 'amd': amd_info, + }.get(name.lower(), system_info) + return cl().get_info(notfound_action) + + +class NotFoundError(DistutilsError): + """Some third-party program or library is not found.""" + + +class AliasedOptionError(DistutilsError): + """ + Aliases entries in config files should not be existing. + In section '{section}' we found multiple appearances of options {options}.""" + + +class AtlasNotFoundError(NotFoundError): + """ + Atlas (http://github.com/math-atlas/math-atlas) libraries not found. + Directories to search for the libraries can be specified in the + numpy/distutils/site.cfg file (section [atlas]) or by setting + the ATLAS environment variable.""" + + +class FlameNotFoundError(NotFoundError): + """ + FLAME (http://www.cs.utexas.edu/~flame/web/) libraries not found. + Directories to search for the libraries can be specified in the + numpy/distutils/site.cfg file (section [flame]).""" + + +class LapackNotFoundError(NotFoundError): + """ + Lapack (http://www.netlib.org/lapack/) libraries not found. + Directories to search for the libraries can be specified in the + numpy/distutils/site.cfg file (section [lapack]) or by setting + the LAPACK environment variable.""" + + +class LapackSrcNotFoundError(LapackNotFoundError): + """ + Lapack (http://www.netlib.org/lapack/) sources not found. + Directories to search for the sources can be specified in the + numpy/distutils/site.cfg file (section [lapack_src]) or by setting + the LAPACK_SRC environment variable.""" + + +class LapackILP64NotFoundError(NotFoundError): + """ + 64-bit Lapack libraries not found. + Known libraries in numpy/distutils/site.cfg file are: + openblas64_, openblas_ilp64 + """ + +class BlasOptNotFoundError(NotFoundError): + """ + Optimized (vendor) Blas libraries are not found. + Falls back to netlib Blas library which has worse performance. + A better performance should be easily gained by switching + Blas library.""" + +class BlasNotFoundError(NotFoundError): + """ + Blas (http://www.netlib.org/blas/) libraries not found. + Directories to search for the libraries can be specified in the + numpy/distutils/site.cfg file (section [blas]) or by setting + the BLAS environment variable.""" + +class BlasILP64NotFoundError(NotFoundError): + """ + 64-bit Blas libraries not found. + Known libraries in numpy/distutils/site.cfg file are: + openblas64_, openblas_ilp64 + """ + +class BlasSrcNotFoundError(BlasNotFoundError): + """ + Blas (http://www.netlib.org/blas/) sources not found. + Directories to search for the sources can be specified in the + numpy/distutils/site.cfg file (section [blas_src]) or by setting + the BLAS_SRC environment variable.""" + + +class FFTWNotFoundError(NotFoundError): + """ + FFTW (http://www.fftw.org/) libraries not found. + Directories to search for the libraries can be specified in the + numpy/distutils/site.cfg file (section [fftw]) or by setting + the FFTW environment variable.""" + + +class DJBFFTNotFoundError(NotFoundError): + """ + DJBFFT (https://cr.yp.to/djbfft.html) libraries not found. + Directories to search for the libraries can be specified in the + numpy/distutils/site.cfg file (section [djbfft]) or by setting + the DJBFFT environment variable.""" + + +class NumericNotFoundError(NotFoundError): + """ + Numeric (https://www.numpy.org/) module not found. + Get it from above location, install it, and retry setup.py.""" + + +class X11NotFoundError(NotFoundError): + """X11 libraries not found.""" + + +class UmfpackNotFoundError(NotFoundError): + """ + UMFPACK sparse solver (https://www.cise.ufl.edu/research/sparse/umfpack/) + not found. Directories to search for the libraries can be specified in the + numpy/distutils/site.cfg file (section [umfpack]) or by setting + the UMFPACK environment variable.""" + + +class system_info: + + """ get_info() is the only public method. Don't use others. + """ + dir_env_var = None + # XXX: search_static_first is disabled by default, may disappear in + # future unless it is proved to be useful. + search_static_first = 0 + # The base-class section name is a random word "ALL" and is not really + # intended for general use. It cannot be None nor can it be DEFAULT as + # these break the ConfigParser. See gh-15338 + section = 'ALL' + saved_results = {} + + notfounderror = NotFoundError + + def __init__(self, + default_lib_dirs=default_lib_dirs, + default_include_dirs=default_include_dirs, + ): + self.__class__.info = {} + self.local_prefixes = [] + defaults = {'library_dirs': os.pathsep.join(default_lib_dirs), + 'include_dirs': os.pathsep.join(default_include_dirs), + 'runtime_library_dirs': os.pathsep.join(default_runtime_dirs), + 'rpath': '', + 'src_dirs': os.pathsep.join(default_src_dirs), + 'search_static_first': str(self.search_static_first), + 'extra_compile_args': '', 'extra_link_args': ''} + self.cp = ConfigParser(defaults) + self.files = [] + self.files.extend(get_standard_file('.numpy-site.cfg')) + self.files.extend(get_standard_file('site.cfg')) + self.parse_config_files() + + if self.section is not None: + self.search_static_first = self.cp.getboolean( + self.section, 'search_static_first') + assert isinstance(self.search_static_first, int) + + def parse_config_files(self): + self.cp.read(self.files) + if not self.cp.has_section(self.section): + if self.section is not None: + self.cp.add_section(self.section) + + def calc_libraries_info(self): + libs = self.get_libraries() + dirs = self.get_lib_dirs() + # The extensions use runtime_library_dirs + r_dirs = self.get_runtime_lib_dirs() + # Intrinsic distutils use rpath, we simply append both entries + # as though they were one entry + r_dirs.extend(self.get_runtime_lib_dirs(key='rpath')) + info = {} + for lib in libs: + i = self.check_libs(dirs, [lib]) + if i is not None: + dict_append(info, **i) + else: + log.info('Library %s was not found. Ignoring' % (lib)) + + if r_dirs: + i = self.check_libs(r_dirs, [lib]) + if i is not None: + # Swap library keywords found to runtime_library_dirs + # the libraries are insisting on the user having defined + # them using the library_dirs, and not necessarily by + # runtime_library_dirs + del i['libraries'] + i['runtime_library_dirs'] = i.pop('library_dirs') + dict_append(info, **i) + else: + log.info('Runtime library %s was not found. Ignoring' % (lib)) + + return info + + def set_info(self, **info): + if info: + lib_info = self.calc_libraries_info() + dict_append(info, **lib_info) + # Update extra information + extra_info = self.calc_extra_info() + dict_append(info, **extra_info) + self.saved_results[self.__class__.__name__] = info + + def get_option_single(self, *options): + """ Ensure that only one of `options` are found in the section + + Parameters + ---------- + *options : list of str + a list of options to be found in the section (``self.section``) + + Returns + ------- + str : + the option that is uniquely found in the section + + Raises + ------ + AliasedOptionError : + in case more than one of the options are found + """ + found = [self.cp.has_option(self.section, opt) for opt in options] + if sum(found) == 1: + return options[found.index(True)] + elif sum(found) == 0: + # nothing is found anyways + return options[0] + + # Else we have more than 1 key found + if AliasedOptionError.__doc__ is None: + raise AliasedOptionError() + raise AliasedOptionError(AliasedOptionError.__doc__.format( + section=self.section, options='[{}]'.format(', '.join(options)))) + + + def has_info(self): + return self.__class__.__name__ in self.saved_results + + def calc_extra_info(self): + """ Updates the information in the current information with + respect to these flags: + extra_compile_args + extra_link_args + """ + info = {} + for key in ['extra_compile_args', 'extra_link_args']: + # Get values + opt = self.cp.get(self.section, key) + opt = _shell_utils.NativeParser.split(opt) + if opt: + tmp = {key: opt} + dict_append(info, **tmp) + return info + + def get_info(self, notfound_action=0): + """ Return a dictionary with items that are compatible + with numpy.distutils.setup keyword arguments. + """ + flag = 0 + if not self.has_info(): + flag = 1 + log.info(self.__class__.__name__ + ':') + if hasattr(self, 'calc_info'): + self.calc_info() + if notfound_action: + if not self.has_info(): + if notfound_action == 1: + warnings.warn(self.notfounderror.__doc__, stacklevel=2) + elif notfound_action == 2: + raise self.notfounderror(self.notfounderror.__doc__) + else: + raise ValueError(repr(notfound_action)) + + if not self.has_info(): + log.info(' NOT AVAILABLE') + self.set_info() + else: + log.info(' FOUND:') + + res = self.saved_results.get(self.__class__.__name__) + if log.get_threshold() <= log.INFO and flag: + for k, v in res.items(): + v = str(v) + if k in ['sources', 'libraries'] and len(v) > 270: + v = v[:120] + '...\n...\n...' + v[-120:] + log.info(' %s = %s', k, v) + log.info('') + + return copy.deepcopy(res) + + def get_paths(self, section, key): + dirs = self.cp.get(section, key).split(os.pathsep) + env_var = self.dir_env_var + if env_var: + if is_sequence(env_var): + e0 = env_var[-1] + for e in env_var: + if e in os.environ: + e0 = e + break + if not env_var[0] == e0: + log.info('Setting %s=%s' % (env_var[0], e0)) + env_var = e0 + if env_var and env_var in os.environ: + d = os.environ[env_var] + if d == 'None': + log.info('Disabled %s: %s', + self.__class__.__name__, '(%s is None)' + % (env_var,)) + return [] + if os.path.isfile(d): + dirs = [os.path.dirname(d)] + dirs + l = getattr(self, '_lib_names', []) + if len(l) == 1: + b = os.path.basename(d) + b = os.path.splitext(b)[0] + if b[:3] == 'lib': + log.info('Replacing _lib_names[0]==%r with %r' \ + % (self._lib_names[0], b[3:])) + self._lib_names[0] = b[3:] + else: + ds = d.split(os.pathsep) + ds2 = [] + for d in ds: + if os.path.isdir(d): + ds2.append(d) + for dd in ['include', 'lib']: + d1 = os.path.join(d, dd) + if os.path.isdir(d1): + ds2.append(d1) + dirs = ds2 + dirs + default_dirs = self.cp.get(self.section, key).split(os.pathsep) + dirs.extend(default_dirs) + ret = [] + for d in dirs: + if len(d) > 0 and not os.path.isdir(d): + warnings.warn('Specified path %s is invalid.' % d, stacklevel=2) + continue + + if d not in ret: + ret.append(d) + + log.debug('( %s = %s )', key, ':'.join(ret)) + return ret + + def get_lib_dirs(self, key='library_dirs'): + return self.get_paths(self.section, key) + + def get_runtime_lib_dirs(self, key='runtime_library_dirs'): + path = self.get_paths(self.section, key) + if path == ['']: + path = [] + return path + + def get_include_dirs(self, key='include_dirs'): + return self.get_paths(self.section, key) + + def get_src_dirs(self, key='src_dirs'): + return self.get_paths(self.section, key) + + def get_libs(self, key, default): + try: + libs = self.cp.get(self.section, key) + except NoOptionError: + if not default: + return [] + if is_string(default): + return [default] + return default + return [b for b in [a.strip() for a in libs.split(',')] if b] + + def get_libraries(self, key='libraries'): + if hasattr(self, '_lib_names'): + return self.get_libs(key, default=self._lib_names) + else: + return self.get_libs(key, '') + + def library_extensions(self): + c = customized_ccompiler() + static_exts = [] + if c.compiler_type != 'msvc': + # MSVC doesn't understand binutils + static_exts.append('.a') + if sys.platform == 'win32': + static_exts.append('.lib') # .lib is used by MSVC and others + if self.search_static_first: + exts = static_exts + [so_ext] + else: + exts = [so_ext] + static_exts + if sys.platform == 'cygwin': + exts.append('.dll.a') + if sys.platform == 'darwin': + exts.append('.dylib') + return exts + + def check_libs(self, lib_dirs, libs, opt_libs=[]): + """If static or shared libraries are available then return + their info dictionary. + + Checks for all libraries as shared libraries first, then + static (or vice versa if self.search_static_first is True). + """ + exts = self.library_extensions() + info = None + for ext in exts: + info = self._check_libs(lib_dirs, libs, opt_libs, [ext]) + if info is not None: + break + if not info: + log.info(' libraries %s not found in %s', ','.join(libs), + lib_dirs) + return info + + def check_libs2(self, lib_dirs, libs, opt_libs=[]): + """If static or shared libraries are available then return + their info dictionary. + + Checks each library for shared or static. + """ + exts = self.library_extensions() + info = self._check_libs(lib_dirs, libs, opt_libs, exts) + if not info: + log.info(' libraries %s not found in %s', ','.join(libs), + lib_dirs) + + return info + + def _find_lib(self, lib_dir, lib, exts): + assert is_string(lib_dir) + # under windows first try without 'lib' prefix + if sys.platform == 'win32': + lib_prefixes = ['', 'lib'] + else: + lib_prefixes = ['lib'] + # for each library name, see if we can find a file for it. + for ext in exts: + for prefix in lib_prefixes: + p = self.combine_paths(lib_dir, prefix + lib + ext) + if p: + break + if p: + assert len(p) == 1 + # ??? splitext on p[0] would do this for cygwin + # doesn't seem correct + if ext == '.dll.a': + lib += '.dll' + if ext == '.lib': + lib = prefix + lib + return lib + + return False + + def _find_libs(self, lib_dirs, libs, exts): + # make sure we preserve the order of libs, as it can be important + found_dirs, found_libs = [], [] + for lib in libs: + for lib_dir in lib_dirs: + found_lib = self._find_lib(lib_dir, lib, exts) + if found_lib: + found_libs.append(found_lib) + if lib_dir not in found_dirs: + found_dirs.append(lib_dir) + break + return found_dirs, found_libs + + def _check_libs(self, lib_dirs, libs, opt_libs, exts): + """Find mandatory and optional libs in expected paths. + + Missing optional libraries are silently forgotten. + """ + if not is_sequence(lib_dirs): + lib_dirs = [lib_dirs] + # First, try to find the mandatory libraries + found_dirs, found_libs = self._find_libs(lib_dirs, libs, exts) + if len(found_libs) > 0 and len(found_libs) == len(libs): + # Now, check for optional libraries + opt_found_dirs, opt_found_libs = self._find_libs(lib_dirs, opt_libs, exts) + found_libs.extend(opt_found_libs) + for lib_dir in opt_found_dirs: + if lib_dir not in found_dirs: + found_dirs.append(lib_dir) + info = {'libraries': found_libs, 'library_dirs': found_dirs} + return info + else: + return None + + def combine_paths(self, *args): + """Return a list of existing paths composed by all combinations + of items from the arguments. + """ + return combine_paths(*args) + + +class fft_opt_info(system_info): + + def calc_info(self): + info = {} + fftw_info = get_info('fftw3') or get_info('fftw2') or get_info('dfftw') + djbfft_info = get_info('djbfft') + if fftw_info: + dict_append(info, **fftw_info) + if djbfft_info: + dict_append(info, **djbfft_info) + self.set_info(**info) + return + + +class fftw_info(system_info): + #variables to override + section = 'fftw' + dir_env_var = 'FFTW' + notfounderror = FFTWNotFoundError + ver_info = [{'name':'fftw3', + 'libs':['fftw3'], + 'includes':['fftw3.h'], + 'macros':[('SCIPY_FFTW3_H', None)]}, + {'name':'fftw2', + 'libs':['rfftw', 'fftw'], + 'includes':['fftw.h', 'rfftw.h'], + 'macros':[('SCIPY_FFTW_H', None)]}] + + def calc_ver_info(self, ver_param): + """Returns True on successful version detection, else False""" + lib_dirs = self.get_lib_dirs() + incl_dirs = self.get_include_dirs() + + opt = self.get_option_single(self.section + '_libs', 'libraries') + libs = self.get_libs(opt, ver_param['libs']) + info = self.check_libs(lib_dirs, libs) + if info is not None: + flag = 0 + for d in incl_dirs: + if len(self.combine_paths(d, ver_param['includes'])) \ + == len(ver_param['includes']): + dict_append(info, include_dirs=[d]) + flag = 1 + break + if flag: + dict_append(info, define_macros=ver_param['macros']) + else: + info = None + if info is not None: + self.set_info(**info) + return True + else: + log.info(' %s not found' % (ver_param['name'])) + return False + + def calc_info(self): + for i in self.ver_info: + if self.calc_ver_info(i): + break + + +class fftw2_info(fftw_info): + #variables to override + section = 'fftw' + dir_env_var = 'FFTW' + notfounderror = FFTWNotFoundError + ver_info = [{'name':'fftw2', + 'libs':['rfftw', 'fftw'], + 'includes':['fftw.h', 'rfftw.h'], + 'macros':[('SCIPY_FFTW_H', None)]} + ] + + +class fftw3_info(fftw_info): + #variables to override + section = 'fftw3' + dir_env_var = 'FFTW3' + notfounderror = FFTWNotFoundError + ver_info = [{'name':'fftw3', + 'libs':['fftw3'], + 'includes':['fftw3.h'], + 'macros':[('SCIPY_FFTW3_H', None)]}, + ] + + +class fftw3_armpl_info(fftw_info): + section = 'fftw3' + dir_env_var = 'ARMPL_DIR' + notfounderror = FFTWNotFoundError + ver_info = [{'name': 'fftw3', + 'libs': ['armpl_lp64_mp'], + 'includes': ['fftw3.h'], + 'macros': [('SCIPY_FFTW3_H', None)]}] + + +class dfftw_info(fftw_info): + section = 'fftw' + dir_env_var = 'FFTW' + ver_info = [{'name':'dfftw', + 'libs':['drfftw', 'dfftw'], + 'includes':['dfftw.h', 'drfftw.h'], + 'macros':[('SCIPY_DFFTW_H', None)]}] + + +class sfftw_info(fftw_info): + section = 'fftw' + dir_env_var = 'FFTW' + ver_info = [{'name':'sfftw', + 'libs':['srfftw', 'sfftw'], + 'includes':['sfftw.h', 'srfftw.h'], + 'macros':[('SCIPY_SFFTW_H', None)]}] + + +class fftw_threads_info(fftw_info): + section = 'fftw' + dir_env_var = 'FFTW' + ver_info = [{'name':'fftw threads', + 'libs':['rfftw_threads', 'fftw_threads'], + 'includes':['fftw_threads.h', 'rfftw_threads.h'], + 'macros':[('SCIPY_FFTW_THREADS_H', None)]}] + + +class dfftw_threads_info(fftw_info): + section = 'fftw' + dir_env_var = 'FFTW' + ver_info = [{'name':'dfftw threads', + 'libs':['drfftw_threads', 'dfftw_threads'], + 'includes':['dfftw_threads.h', 'drfftw_threads.h'], + 'macros':[('SCIPY_DFFTW_THREADS_H', None)]}] + + +class sfftw_threads_info(fftw_info): + section = 'fftw' + dir_env_var = 'FFTW' + ver_info = [{'name':'sfftw threads', + 'libs':['srfftw_threads', 'sfftw_threads'], + 'includes':['sfftw_threads.h', 'srfftw_threads.h'], + 'macros':[('SCIPY_SFFTW_THREADS_H', None)]}] + + +class djbfft_info(system_info): + section = 'djbfft' + dir_env_var = 'DJBFFT' + notfounderror = DJBFFTNotFoundError + + def get_paths(self, section, key): + pre_dirs = system_info.get_paths(self, section, key) + dirs = [] + for d in pre_dirs: + dirs.extend(self.combine_paths(d, ['djbfft']) + [d]) + return [d for d in dirs if os.path.isdir(d)] + + def calc_info(self): + lib_dirs = self.get_lib_dirs() + incl_dirs = self.get_include_dirs() + info = None + for d in lib_dirs: + p = self.combine_paths(d, ['djbfft.a']) + if p: + info = {'extra_objects': p} + break + p = self.combine_paths(d, ['libdjbfft.a', 'libdjbfft' + so_ext]) + if p: + info = {'libraries': ['djbfft'], 'library_dirs': [d]} + break + if info is None: + return + for d in incl_dirs: + if len(self.combine_paths(d, ['fftc8.h', 'fftfreq.h'])) == 2: + dict_append(info, include_dirs=[d], + define_macros=[('SCIPY_DJBFFT_H', None)]) + self.set_info(**info) + return + return + + +class mkl_info(system_info): + section = 'mkl' + dir_env_var = 'MKLROOT' + _lib_mkl = ['mkl_rt'] + + def get_mkl_rootdir(self): + mklroot = os.environ.get('MKLROOT', None) + if mklroot is not None: + return mklroot + paths = os.environ.get('LD_LIBRARY_PATH', '').split(os.pathsep) + ld_so_conf = '/etc/ld.so.conf' + if os.path.isfile(ld_so_conf): + with open(ld_so_conf, 'r') as f: + for d in f: + d = d.strip() + if d: + paths.append(d) + intel_mkl_dirs = [] + for path in paths: + path_atoms = path.split(os.sep) + for m in path_atoms: + if m.startswith('mkl'): + d = os.sep.join(path_atoms[:path_atoms.index(m) + 2]) + intel_mkl_dirs.append(d) + break + for d in paths: + dirs = glob(os.path.join(d, 'mkl', '*')) + dirs += glob(os.path.join(d, 'mkl*')) + for sub_dir in dirs: + if os.path.isdir(os.path.join(sub_dir, 'lib')): + return sub_dir + return None + + def __init__(self): + mklroot = self.get_mkl_rootdir() + if mklroot is None: + system_info.__init__(self) + else: + from .cpuinfo import cpu + if cpu.is_Itanium(): + plt = '64' + elif cpu.is_Intel() and cpu.is_64bit(): + plt = 'intel64' + else: + plt = '32' + system_info.__init__( + self, + default_lib_dirs=[os.path.join(mklroot, 'lib', plt)], + default_include_dirs=[os.path.join(mklroot, 'include')]) + + def calc_info(self): + lib_dirs = self.get_lib_dirs() + incl_dirs = self.get_include_dirs() + opt = self.get_option_single('mkl_libs', 'libraries') + mkl_libs = self.get_libs(opt, self._lib_mkl) + info = self.check_libs2(lib_dirs, mkl_libs) + if info is None: + return + dict_append(info, + define_macros=[('SCIPY_MKL_H', None), + ('HAVE_CBLAS', None)], + include_dirs=incl_dirs) + if sys.platform == 'win32': + pass # win32 has no pthread library + else: + dict_append(info, libraries=['pthread']) + self.set_info(**info) + + +class lapack_mkl_info(mkl_info): + pass + + +class blas_mkl_info(mkl_info): + pass + + +class armpl_info(system_info): + section = 'armpl' + dir_env_var = 'ARMPL_DIR' + _lib_armpl = ['armpl_lp64_mp'] + + def calc_info(self): + lib_dirs = self.get_lib_dirs() + incl_dirs = self.get_include_dirs() + armpl_libs = self.get_libs('armpl_libs', self._lib_armpl) + info = self.check_libs2(lib_dirs, armpl_libs) + if info is None: + return + dict_append(info, + define_macros=[('SCIPY_MKL_H', None), + ('HAVE_CBLAS', None)], + include_dirs=incl_dirs) + self.set_info(**info) + +class lapack_armpl_info(armpl_info): + pass + +class blas_armpl_info(armpl_info): + pass + + +class atlas_info(system_info): + section = 'atlas' + dir_env_var = 'ATLAS' + _lib_names = ['f77blas', 'cblas'] + if sys.platform[:7] == 'freebsd': + _lib_atlas = ['atlas_r'] + _lib_lapack = ['alapack_r'] + else: + _lib_atlas = ['atlas'] + _lib_lapack = ['lapack'] + + notfounderror = AtlasNotFoundError + + def get_paths(self, section, key): + pre_dirs = system_info.get_paths(self, section, key) + dirs = [] + for d in pre_dirs: + dirs.extend(self.combine_paths(d, ['atlas*', 'ATLAS*', + 'sse', '3dnow', 'sse2']) + [d]) + return [d for d in dirs if os.path.isdir(d)] + + def calc_info(self): + lib_dirs = self.get_lib_dirs() + info = {} + opt = self.get_option_single('atlas_libs', 'libraries') + atlas_libs = self.get_libs(opt, self._lib_names + self._lib_atlas) + lapack_libs = self.get_libs('lapack_libs', self._lib_lapack) + atlas = None + lapack = None + atlas_1 = None + for d in lib_dirs: + atlas = self.check_libs2(d, atlas_libs, []) + if atlas is not None: + lib_dirs2 = [d] + self.combine_paths(d, ['atlas*', 'ATLAS*']) + lapack = self.check_libs2(lib_dirs2, lapack_libs, []) + if lapack is not None: + break + if atlas: + atlas_1 = atlas + log.info(self.__class__) + if atlas is None: + atlas = atlas_1 + if atlas is None: + return + include_dirs = self.get_include_dirs() + h = (self.combine_paths(lib_dirs + include_dirs, 'cblas.h') or [None]) + h = h[0] + if h: + h = os.path.dirname(h) + dict_append(info, include_dirs=[h]) + info['language'] = 'c' + if lapack is not None: + dict_append(info, **lapack) + dict_append(info, **atlas) + elif 'lapack_atlas' in atlas['libraries']: + dict_append(info, **atlas) + dict_append(info, + define_macros=[('ATLAS_WITH_LAPACK_ATLAS', None)]) + self.set_info(**info) + return + else: + dict_append(info, **atlas) + dict_append(info, define_macros=[('ATLAS_WITHOUT_LAPACK', None)]) + message = textwrap.dedent(""" + ********************************************************************* + Could not find lapack library within the ATLAS installation. + ********************************************************************* + """) + warnings.warn(message, stacklevel=2) + self.set_info(**info) + return + + # Check if lapack library is complete, only warn if it is not. + lapack_dir = lapack['library_dirs'][0] + lapack_name = lapack['libraries'][0] + lapack_lib = None + lib_prefixes = ['lib'] + if sys.platform == 'win32': + lib_prefixes.append('') + for e in self.library_extensions(): + for prefix in lib_prefixes: + fn = os.path.join(lapack_dir, prefix + lapack_name + e) + if os.path.exists(fn): + lapack_lib = fn + break + if lapack_lib: + break + if lapack_lib is not None: + sz = os.stat(lapack_lib)[6] + if sz <= 4000 * 1024: + message = textwrap.dedent(""" + ********************************************************************* + Lapack library (from ATLAS) is probably incomplete: + size of %s is %sk (expected >4000k) + + Follow the instructions in the KNOWN PROBLEMS section of the file + numpy/INSTALL.txt. + ********************************************************************* + """) % (lapack_lib, sz / 1024) + warnings.warn(message, stacklevel=2) + else: + info['language'] = 'f77' + + atlas_version, atlas_extra_info = get_atlas_version(**atlas) + dict_append(info, **atlas_extra_info) + + self.set_info(**info) + + +class atlas_blas_info(atlas_info): + _lib_names = ['f77blas', 'cblas'] + + def calc_info(self): + lib_dirs = self.get_lib_dirs() + info = {} + opt = self.get_option_single('atlas_libs', 'libraries') + atlas_libs = self.get_libs(opt, self._lib_names + self._lib_atlas) + atlas = self.check_libs2(lib_dirs, atlas_libs, []) + if atlas is None: + return + include_dirs = self.get_include_dirs() + h = (self.combine_paths(lib_dirs + include_dirs, 'cblas.h') or [None]) + h = h[0] + if h: + h = os.path.dirname(h) + dict_append(info, include_dirs=[h]) + info['language'] = 'c' + info['define_macros'] = [('HAVE_CBLAS', None)] + + atlas_version, atlas_extra_info = get_atlas_version(**atlas) + dict_append(atlas, **atlas_extra_info) + + dict_append(info, **atlas) + + self.set_info(**info) + return + + +class atlas_threads_info(atlas_info): + dir_env_var = ['PTATLAS', 'ATLAS'] + _lib_names = ['ptf77blas', 'ptcblas'] + + +class atlas_blas_threads_info(atlas_blas_info): + dir_env_var = ['PTATLAS', 'ATLAS'] + _lib_names = ['ptf77blas', 'ptcblas'] + + +class lapack_atlas_info(atlas_info): + _lib_names = ['lapack_atlas'] + atlas_info._lib_names + + +class lapack_atlas_threads_info(atlas_threads_info): + _lib_names = ['lapack_atlas'] + atlas_threads_info._lib_names + + +class atlas_3_10_info(atlas_info): + _lib_names = ['satlas'] + _lib_atlas = _lib_names + _lib_lapack = _lib_names + + +class atlas_3_10_blas_info(atlas_3_10_info): + _lib_names = ['satlas'] + + def calc_info(self): + lib_dirs = self.get_lib_dirs() + info = {} + opt = self.get_option_single('atlas_lib', 'libraries') + atlas_libs = self.get_libs(opt, self._lib_names) + atlas = self.check_libs2(lib_dirs, atlas_libs, []) + if atlas is None: + return + include_dirs = self.get_include_dirs() + h = (self.combine_paths(lib_dirs + include_dirs, 'cblas.h') or [None]) + h = h[0] + if h: + h = os.path.dirname(h) + dict_append(info, include_dirs=[h]) + info['language'] = 'c' + info['define_macros'] = [('HAVE_CBLAS', None)] + + atlas_version, atlas_extra_info = get_atlas_version(**atlas) + dict_append(atlas, **atlas_extra_info) + + dict_append(info, **atlas) + + self.set_info(**info) + return + + +class atlas_3_10_threads_info(atlas_3_10_info): + dir_env_var = ['PTATLAS', 'ATLAS'] + _lib_names = ['tatlas'] + _lib_atlas = _lib_names + _lib_lapack = _lib_names + + +class atlas_3_10_blas_threads_info(atlas_3_10_blas_info): + dir_env_var = ['PTATLAS', 'ATLAS'] + _lib_names = ['tatlas'] + + +class lapack_atlas_3_10_info(atlas_3_10_info): + pass + + +class lapack_atlas_3_10_threads_info(atlas_3_10_threads_info): + pass + + +class lapack_info(system_info): + section = 'lapack' + dir_env_var = 'LAPACK' + _lib_names = ['lapack'] + notfounderror = LapackNotFoundError + + def calc_info(self): + lib_dirs = self.get_lib_dirs() + + opt = self.get_option_single('lapack_libs', 'libraries') + lapack_libs = self.get_libs(opt, self._lib_names) + info = self.check_libs(lib_dirs, lapack_libs, []) + if info is None: + return + info['language'] = 'f77' + self.set_info(**info) + + +class lapack_src_info(system_info): + # LAPACK_SRC is deprecated, please do not use this! + # Build or install a BLAS library via your package manager or from + # source separately. + section = 'lapack_src' + dir_env_var = 'LAPACK_SRC' + notfounderror = LapackSrcNotFoundError + + def get_paths(self, section, key): + pre_dirs = system_info.get_paths(self, section, key) + dirs = [] + for d in pre_dirs: + dirs.extend([d] + self.combine_paths(d, ['LAPACK*/SRC', 'SRC'])) + return [d for d in dirs if os.path.isdir(d)] + + def calc_info(self): + src_dirs = self.get_src_dirs() + src_dir = '' + for d in src_dirs: + if os.path.isfile(os.path.join(d, 'dgesv.f')): + src_dir = d + break + if not src_dir: + #XXX: Get sources from netlib. May be ask first. + return + # The following is extracted from LAPACK-3.0/SRC/Makefile. + # Added missing names from lapack-lite-3.1.1/SRC/Makefile + # while keeping removed names for Lapack-3.0 compatibility. + allaux = ''' + ilaenv ieeeck lsame lsamen xerbla + iparmq + ''' # *.f + laux = ''' + bdsdc bdsqr disna labad lacpy ladiv lae2 laebz laed0 laed1 + laed2 laed3 laed4 laed5 laed6 laed7 laed8 laed9 laeda laev2 + lagtf lagts lamch lamrg lanst lapy2 lapy3 larnv larrb larre + larrf lartg laruv las2 lascl lasd0 lasd1 lasd2 lasd3 lasd4 + lasd5 lasd6 lasd7 lasd8 lasd9 lasda lasdq lasdt laset lasq1 + lasq2 lasq3 lasq4 lasq5 lasq6 lasr lasrt lassq lasv2 pttrf + stebz stedc steqr sterf + + larra larrc larrd larr larrk larrj larrr laneg laisnan isnan + lazq3 lazq4 + ''' # [s|d]*.f + lasrc = ''' + gbbrd gbcon gbequ gbrfs gbsv gbsvx gbtf2 gbtrf gbtrs gebak + gebal gebd2 gebrd gecon geequ gees geesx geev geevx gegs gegv + gehd2 gehrd gelq2 gelqf gels gelsd gelss gelsx gelsy geql2 + geqlf geqp3 geqpf geqr2 geqrf gerfs gerq2 gerqf gesc2 gesdd + gesv gesvd gesvx getc2 getf2 getrf getri getrs ggbak ggbal + gges ggesx ggev ggevx ggglm gghrd gglse ggqrf ggrqf ggsvd + ggsvp gtcon gtrfs gtsv gtsvx gttrf gttrs gtts2 hgeqz hsein + hseqr labrd lacon laein lags2 lagtm lahqr lahrd laic1 lals0 + lalsa lalsd langb lange langt lanhs lansb lansp lansy lantb + lantp lantr lapll lapmt laqgb laqge laqp2 laqps laqsb laqsp + laqsy lar1v lar2v larf larfb larfg larft larfx largv larrv + lartv larz larzb larzt laswp lasyf latbs latdf latps latrd + latrs latrz latzm lauu2 lauum pbcon pbequ pbrfs pbstf pbsv + pbsvx pbtf2 pbtrf pbtrs pocon poequ porfs posv posvx potf2 + potrf potri potrs ppcon ppequ pprfs ppsv ppsvx pptrf pptri + pptrs ptcon pteqr ptrfs ptsv ptsvx pttrs ptts2 spcon sprfs + spsv spsvx sptrf sptri sptrs stegr stein sycon syrfs sysv + sysvx sytf2 sytrf sytri sytrs tbcon tbrfs tbtrs tgevc tgex2 + tgexc tgsen tgsja tgsna tgsy2 tgsyl tpcon tprfs tptri tptrs + trcon trevc trexc trrfs trsen trsna trsyl trti2 trtri trtrs + tzrqf tzrzf + + lacn2 lahr2 stemr laqr0 laqr1 laqr2 laqr3 laqr4 laqr5 + ''' # [s|c|d|z]*.f + sd_lasrc = ''' + laexc lag2 lagv2 laln2 lanv2 laqtr lasy2 opgtr opmtr org2l + org2r orgbr orghr orgl2 orglq orgql orgqr orgr2 orgrq orgtr + orm2l orm2r ormbr ormhr orml2 ormlq ormql ormqr ormr2 ormr3 + ormrq ormrz ormtr rscl sbev sbevd sbevx sbgst sbgv sbgvd sbgvx + sbtrd spev spevd spevx spgst spgv spgvd spgvx sptrd stev stevd + stevr stevx syev syevd syevr syevx sygs2 sygst sygv sygvd + sygvx sytd2 sytrd + ''' # [s|d]*.f + cz_lasrc = ''' + bdsqr hbev hbevd hbevx hbgst hbgv hbgvd hbgvx hbtrd hecon heev + heevd heevr heevx hegs2 hegst hegv hegvd hegvx herfs hesv + hesvx hetd2 hetf2 hetrd hetrf hetri hetrs hpcon hpev hpevd + hpevx hpgst hpgv hpgvd hpgvx hprfs hpsv hpsvx hptrd hptrf + hptri hptrs lacgv lacp2 lacpy lacrm lacrt ladiv laed0 laed7 + laed8 laesy laev2 lahef lanhb lanhe lanhp lanht laqhb laqhe + laqhp larcm larnv lartg lascl laset lasr lassq pttrf rot spmv + spr stedc steqr symv syr ung2l ung2r ungbr unghr ungl2 unglq + ungql ungqr ungr2 ungrq ungtr unm2l unm2r unmbr unmhr unml2 + unmlq unmql unmqr unmr2 unmr3 unmrq unmrz unmtr upgtr upmtr + ''' # [c|z]*.f + ####### + sclaux = laux + ' econd ' # s*.f + dzlaux = laux + ' secnd ' # d*.f + slasrc = lasrc + sd_lasrc # s*.f + dlasrc = lasrc + sd_lasrc # d*.f + clasrc = lasrc + cz_lasrc + ' srot srscl ' # c*.f + zlasrc = lasrc + cz_lasrc + ' drot drscl ' # z*.f + oclasrc = ' icmax1 scsum1 ' # *.f + ozlasrc = ' izmax1 dzsum1 ' # *.f + sources = ['s%s.f' % f for f in (sclaux + slasrc).split()] \ + + ['d%s.f' % f for f in (dzlaux + dlasrc).split()] \ + + ['c%s.f' % f for f in (clasrc).split()] \ + + ['z%s.f' % f for f in (zlasrc).split()] \ + + ['%s.f' % f for f in (allaux + oclasrc + ozlasrc).split()] + sources = [os.path.join(src_dir, f) for f in sources] + # Lapack 3.1: + src_dir2 = os.path.join(src_dir, '..', 'INSTALL') + sources += [os.path.join(src_dir2, p + 'lamch.f') for p in 'sdcz'] + # Lapack 3.2.1: + sources += [os.path.join(src_dir, p + 'larfp.f') for p in 'sdcz'] + sources += [os.path.join(src_dir, 'ila' + p + 'lr.f') for p in 'sdcz'] + sources += [os.path.join(src_dir, 'ila' + p + 'lc.f') for p in 'sdcz'] + # Should we check here actual existence of source files? + # Yes, the file listing is different between 3.0 and 3.1 + # versions. + sources = [f for f in sources if os.path.isfile(f)] + info = {'sources': sources, 'language': 'f77'} + self.set_info(**info) + +atlas_version_c_text = r''' +/* This file is generated from numpy/distutils/system_info.py */ +void ATL_buildinfo(void); +int main(void) { + ATL_buildinfo(); + return 0; +} +''' + +_cached_atlas_version = {} + + +def get_atlas_version(**config): + libraries = config.get('libraries', []) + library_dirs = config.get('library_dirs', []) + key = (tuple(libraries), tuple(library_dirs)) + if key in _cached_atlas_version: + return _cached_atlas_version[key] + c = cmd_config(Distribution()) + atlas_version = None + info = {} + try: + s, o = c.get_output(atlas_version_c_text, + libraries=libraries, library_dirs=library_dirs, + ) + if s and re.search(r'undefined reference to `_gfortran', o, re.M): + s, o = c.get_output(atlas_version_c_text, + libraries=libraries + ['gfortran'], + library_dirs=library_dirs, + ) + if not s: + warnings.warn(textwrap.dedent(""" + ***************************************************** + Linkage with ATLAS requires gfortran. Use + + python setup.py config_fc --fcompiler=gnu95 ... + + when building extension libraries that use ATLAS. + Make sure that -lgfortran is used for C++ extensions. + ***************************************************** + """), stacklevel=2) + dict_append(info, language='f90', + define_macros=[('ATLAS_REQUIRES_GFORTRAN', None)]) + except Exception: # failed to get version from file -- maybe on Windows + # look at directory name + for o in library_dirs: + m = re.search(r'ATLAS_(?P\d+[.]\d+[.]\d+)_', o) + if m: + atlas_version = m.group('version') + if atlas_version is not None: + break + + # final choice --- look at ATLAS_VERSION environment + # variable + if atlas_version is None: + atlas_version = os.environ.get('ATLAS_VERSION', None) + if atlas_version: + dict_append(info, define_macros=[( + 'ATLAS_INFO', _c_string_literal(atlas_version)) + ]) + else: + dict_append(info, define_macros=[('NO_ATLAS_INFO', -1)]) + return atlas_version or '?.?.?', info + + if not s: + m = re.search(r'ATLAS version (?P\d+[.]\d+[.]\d+)', o) + if m: + atlas_version = m.group('version') + if atlas_version is None: + if re.search(r'undefined symbol: ATL_buildinfo', o, re.M): + atlas_version = '3.2.1_pre3.3.6' + else: + log.info('Status: %d', s) + log.info('Output: %s', o) + + elif atlas_version == '3.2.1_pre3.3.6': + dict_append(info, define_macros=[('NO_ATLAS_INFO', -2)]) + else: + dict_append(info, define_macros=[( + 'ATLAS_INFO', _c_string_literal(atlas_version)) + ]) + result = _cached_atlas_version[key] = atlas_version, info + return result + + +class lapack_opt_info(system_info): + notfounderror = LapackNotFoundError + + # List of all known LAPACK libraries, in the default order + lapack_order = ['armpl', 'mkl', 'openblas', 'flame', + 'accelerate', 'atlas', 'lapack'] + order_env_var_name = 'NPY_LAPACK_ORDER' + + def _calc_info_armpl(self): + info = get_info('lapack_armpl') + if info: + self.set_info(**info) + return True + return False + + def _calc_info_mkl(self): + info = get_info('lapack_mkl') + if info: + self.set_info(**info) + return True + return False + + def _calc_info_openblas(self): + info = get_info('openblas_lapack') + if info: + self.set_info(**info) + return True + info = get_info('openblas_clapack') + if info: + self.set_info(**info) + return True + return False + + def _calc_info_flame(self): + info = get_info('flame') + if info: + self.set_info(**info) + return True + return False + + def _calc_info_atlas(self): + info = get_info('atlas_3_10_threads') + if not info: + info = get_info('atlas_3_10') + if not info: + info = get_info('atlas_threads') + if not info: + info = get_info('atlas') + if info: + # Figure out if ATLAS has lapack... + # If not we need the lapack library, but not BLAS! + l = info.get('define_macros', []) + if ('ATLAS_WITH_LAPACK_ATLAS', None) in l \ + or ('ATLAS_WITHOUT_LAPACK', None) in l: + # Get LAPACK (with possible warnings) + # If not found we don't accept anything + # since we can't use ATLAS with LAPACK! + lapack_info = self._get_info_lapack() + if not lapack_info: + return False + dict_append(info, **lapack_info) + self.set_info(**info) + return True + return False + + def _calc_info_accelerate(self): + info = get_info('accelerate') + if info: + self.set_info(**info) + return True + return False + + def _get_info_blas(self): + # Default to get the optimized BLAS implementation + info = get_info('blas_opt') + if not info: + warnings.warn(BlasNotFoundError.__doc__ or '', stacklevel=3) + info_src = get_info('blas_src') + if not info_src: + warnings.warn(BlasSrcNotFoundError.__doc__ or '', stacklevel=3) + return {} + dict_append(info, libraries=[('fblas_src', info_src)]) + return info + + def _get_info_lapack(self): + info = get_info('lapack') + if not info: + warnings.warn(LapackNotFoundError.__doc__ or '', stacklevel=3) + info_src = get_info('lapack_src') + if not info_src: + warnings.warn(LapackSrcNotFoundError.__doc__ or '', stacklevel=3) + return {} + dict_append(info, libraries=[('flapack_src', info_src)]) + return info + + def _calc_info_lapack(self): + info = self._get_info_lapack() + if info: + info_blas = self._get_info_blas() + dict_append(info, **info_blas) + dict_append(info, define_macros=[('NO_ATLAS_INFO', 1)]) + self.set_info(**info) + return True + return False + + def _calc_info_from_envvar(self): + info = {} + info['language'] = 'f77' + info['libraries'] = [] + info['include_dirs'] = [] + info['define_macros'] = [] + info['extra_link_args'] = os.environ['NPY_LAPACK_LIBS'].split() + self.set_info(**info) + return True + + def _calc_info(self, name): + return getattr(self, '_calc_info_{}'.format(name))() + + def calc_info(self): + lapack_order, unknown_order = _parse_env_order(self.lapack_order, self.order_env_var_name) + if len(unknown_order) > 0: + raise ValueError("lapack_opt_info user defined " + "LAPACK order has unacceptable " + "values: {}".format(unknown_order)) + + if 'NPY_LAPACK_LIBS' in os.environ: + # Bypass autodetection, set language to F77 and use env var linker + # flags directly + self._calc_info_from_envvar() + return + + for lapack in lapack_order: + if self._calc_info(lapack): + return + + if 'lapack' not in lapack_order: + # Since the user may request *not* to use any library, we still need + # to raise warnings to signal missing packages! + warnings.warn(LapackNotFoundError.__doc__ or '', stacklevel=2) + warnings.warn(LapackSrcNotFoundError.__doc__ or '', stacklevel=2) + + +class _ilp64_opt_info_mixin: + symbol_suffix = None + symbol_prefix = None + + def _check_info(self, info): + macros = dict(info.get('define_macros', [])) + prefix = macros.get('BLAS_SYMBOL_PREFIX', '') + suffix = macros.get('BLAS_SYMBOL_SUFFIX', '') + + if self.symbol_prefix not in (None, prefix): + return False + + if self.symbol_suffix not in (None, suffix): + return False + + return bool(info) + + +class lapack_ilp64_opt_info(lapack_opt_info, _ilp64_opt_info_mixin): + notfounderror = LapackILP64NotFoundError + lapack_order = ['openblas64_', 'openblas_ilp64'] + order_env_var_name = 'NPY_LAPACK_ILP64_ORDER' + + def _calc_info(self, name): + info = get_info(name + '_lapack') + if self._check_info(info): + self.set_info(**info) + return True + return False + + +class lapack_ilp64_plain_opt_info(lapack_ilp64_opt_info): + # Same as lapack_ilp64_opt_info, but fix symbol names + symbol_prefix = '' + symbol_suffix = '' + + +class lapack64__opt_info(lapack_ilp64_opt_info): + symbol_prefix = '' + symbol_suffix = '64_' + + +class blas_opt_info(system_info): + notfounderror = BlasNotFoundError + # List of all known BLAS libraries, in the default order + + blas_order = ['armpl', 'mkl', 'blis', 'openblas', + 'accelerate', 'atlas', 'blas'] + order_env_var_name = 'NPY_BLAS_ORDER' + + def _calc_info_armpl(self): + info = get_info('blas_armpl') + if info: + self.set_info(**info) + return True + return False + + def _calc_info_mkl(self): + info = get_info('blas_mkl') + if info: + self.set_info(**info) + return True + return False + + def _calc_info_blis(self): + info = get_info('blis') + if info: + self.set_info(**info) + return True + return False + + def _calc_info_openblas(self): + info = get_info('openblas') + if info: + self.set_info(**info) + return True + return False + + def _calc_info_atlas(self): + info = get_info('atlas_3_10_blas_threads') + if not info: + info = get_info('atlas_3_10_blas') + if not info: + info = get_info('atlas_blas_threads') + if not info: + info = get_info('atlas_blas') + if info: + self.set_info(**info) + return True + return False + + def _calc_info_accelerate(self): + info = get_info('accelerate') + if info: + self.set_info(**info) + return True + return False + + def _calc_info_blas(self): + # Warn about a non-optimized BLAS library + warnings.warn(BlasOptNotFoundError.__doc__ or '', stacklevel=3) + info = {} + dict_append(info, define_macros=[('NO_ATLAS_INFO', 1)]) + + blas = get_info('blas') + if blas: + dict_append(info, **blas) + else: + # Not even BLAS was found! + warnings.warn(BlasNotFoundError.__doc__ or '', stacklevel=3) + + blas_src = get_info('blas_src') + if not blas_src: + warnings.warn(BlasSrcNotFoundError.__doc__ or '', stacklevel=3) + return False + dict_append(info, libraries=[('fblas_src', blas_src)]) + + self.set_info(**info) + return True + + def _calc_info_from_envvar(self): + info = {} + info['language'] = 'f77' + info['libraries'] = [] + info['include_dirs'] = [] + info['define_macros'] = [] + info['extra_link_args'] = os.environ['NPY_BLAS_LIBS'].split() + if 'NPY_CBLAS_LIBS' in os.environ: + info['define_macros'].append(('HAVE_CBLAS', None)) + info['extra_link_args'].extend( + os.environ['NPY_CBLAS_LIBS'].split()) + self.set_info(**info) + return True + + def _calc_info(self, name): + return getattr(self, '_calc_info_{}'.format(name))() + + def calc_info(self): + blas_order, unknown_order = _parse_env_order(self.blas_order, self.order_env_var_name) + if len(unknown_order) > 0: + raise ValueError("blas_opt_info user defined BLAS order has unacceptable values: {}".format(unknown_order)) + + if 'NPY_BLAS_LIBS' in os.environ: + # Bypass autodetection, set language to F77 and use env var linker + # flags directly + self._calc_info_from_envvar() + return + + for blas in blas_order: + if self._calc_info(blas): + return + + if 'blas' not in blas_order: + # Since the user may request *not* to use any library, we still need + # to raise warnings to signal missing packages! + warnings.warn(BlasNotFoundError.__doc__ or '', stacklevel=2) + warnings.warn(BlasSrcNotFoundError.__doc__ or '', stacklevel=2) + + +class blas_ilp64_opt_info(blas_opt_info, _ilp64_opt_info_mixin): + notfounderror = BlasILP64NotFoundError + blas_order = ['openblas64_', 'openblas_ilp64'] + order_env_var_name = 'NPY_BLAS_ILP64_ORDER' + + def _calc_info(self, name): + info = get_info(name) + if self._check_info(info): + self.set_info(**info) + return True + return False + + +class blas_ilp64_plain_opt_info(blas_ilp64_opt_info): + symbol_prefix = '' + symbol_suffix = '' + + +class blas64__opt_info(blas_ilp64_opt_info): + symbol_prefix = '' + symbol_suffix = '64_' + + +class cblas_info(system_info): + section = 'cblas' + dir_env_var = 'CBLAS' + # No default as it's used only in blas_info + _lib_names = [] + notfounderror = BlasNotFoundError + + +class blas_info(system_info): + section = 'blas' + dir_env_var = 'BLAS' + _lib_names = ['blas'] + notfounderror = BlasNotFoundError + + def calc_info(self): + lib_dirs = self.get_lib_dirs() + opt = self.get_option_single('blas_libs', 'libraries') + blas_libs = self.get_libs(opt, self._lib_names) + info = self.check_libs(lib_dirs, blas_libs, []) + if info is None: + return + else: + info['include_dirs'] = self.get_include_dirs() + if platform.system() == 'Windows': + # The check for windows is needed because get_cblas_libs uses the + # same compiler that was used to compile Python and msvc is + # often not installed when mingw is being used. This rough + # treatment is not desirable, but windows is tricky. + info['language'] = 'f77' # XXX: is it generally true? + # If cblas is given as an option, use those + cblas_info_obj = cblas_info() + cblas_opt = cblas_info_obj.get_option_single('cblas_libs', 'libraries') + cblas_libs = cblas_info_obj.get_libs(cblas_opt, None) + if cblas_libs: + info['libraries'] = cblas_libs + blas_libs + info['define_macros'] = [('HAVE_CBLAS', None)] + else: + lib = self.get_cblas_libs(info) + if lib is not None: + info['language'] = 'c' + info['libraries'] = lib + info['define_macros'] = [('HAVE_CBLAS', None)] + self.set_info(**info) + + def get_cblas_libs(self, info): + """ Check whether we can link with CBLAS interface + + This method will search through several combinations of libraries + to check whether CBLAS is present: + + 1. Libraries in ``info['libraries']``, as is + 2. As 1. but also explicitly adding ``'cblas'`` as a library + 3. As 1. but also explicitly adding ``'blas'`` as a library + 4. Check only library ``'cblas'`` + 5. Check only library ``'blas'`` + + Parameters + ---------- + info : dict + system information dictionary for compilation and linking + + Returns + ------- + libraries : list of str or None + a list of libraries that enables the use of CBLAS interface. + Returns None if not found or a compilation error occurs. + + Since 1.17 returns a list. + """ + # primitive cblas check by looking for the header and trying to link + # cblas or blas + c = customized_ccompiler() + tmpdir = tempfile.mkdtemp() + s = textwrap.dedent("""\ + #include + int main(int argc, const char *argv[]) + { + double a[4] = {1,2,3,4}; + double b[4] = {5,6,7,8}; + return cblas_ddot(4, a, 1, b, 1) > 10; + }""") + src = os.path.join(tmpdir, 'source.c') + try: + with open(src, 'wt') as f: + f.write(s) + + try: + # check we can compile (find headers) + obj = c.compile([src], output_dir=tmpdir, + include_dirs=self.get_include_dirs()) + except (distutils.ccompiler.CompileError, distutils.ccompiler.LinkError): + return None + + # check we can link (find library) + # some systems have separate cblas and blas libs. + for libs in [info['libraries'], ['cblas'] + info['libraries'], + ['blas'] + info['libraries'], ['cblas'], ['blas']]: + try: + c.link_executable(obj, os.path.join(tmpdir, "a.out"), + libraries=libs, + library_dirs=info['library_dirs'], + extra_postargs=info.get('extra_link_args', [])) + return libs + except distutils.ccompiler.LinkError: + pass + finally: + shutil.rmtree(tmpdir) + return None + + +class openblas_info(blas_info): + section = 'openblas' + dir_env_var = 'OPENBLAS' + _lib_names = ['openblas'] + _require_symbols = [] + notfounderror = BlasNotFoundError + + @property + def symbol_prefix(self): + try: + return self.cp.get(self.section, 'symbol_prefix') + except NoOptionError: + return '' + + @property + def symbol_suffix(self): + try: + return self.cp.get(self.section, 'symbol_suffix') + except NoOptionError: + return '' + + def _calc_info(self): + c = customized_ccompiler() + + lib_dirs = self.get_lib_dirs() + + # Prefer to use libraries over openblas_libs + opt = self.get_option_single('openblas_libs', 'libraries') + openblas_libs = self.get_libs(opt, self._lib_names) + + info = self.check_libs(lib_dirs, openblas_libs, []) + + if c.compiler_type == "msvc" and info is None: + from numpy.distutils.fcompiler import new_fcompiler + f = new_fcompiler(c_compiler=c) + if f and f.compiler_type == 'gnu95': + # Try gfortran-compatible library files + info = self.check_msvc_gfortran_libs(lib_dirs, openblas_libs) + # Skip lapack check, we'd need build_ext to do it + skip_symbol_check = True + elif info: + skip_symbol_check = False + info['language'] = 'c' + + if info is None: + return None + + # Add extra info for OpenBLAS + extra_info = self.calc_extra_info() + dict_append(info, **extra_info) + + if not (skip_symbol_check or self.check_symbols(info)): + return None + + info['define_macros'] = [('HAVE_CBLAS', None)] + if self.symbol_prefix: + info['define_macros'] += [('BLAS_SYMBOL_PREFIX', self.symbol_prefix)] + if self.symbol_suffix: + info['define_macros'] += [('BLAS_SYMBOL_SUFFIX', self.symbol_suffix)] + + return info + + def calc_info(self): + info = self._calc_info() + if info is not None: + self.set_info(**info) + + def check_msvc_gfortran_libs(self, library_dirs, libraries): + # First, find the full path to each library directory + library_paths = [] + for library in libraries: + for library_dir in library_dirs: + # MinGW static ext will be .a + fullpath = os.path.join(library_dir, library + '.a') + if os.path.isfile(fullpath): + library_paths.append(fullpath) + break + else: + return None + + # Generate numpy.distutils virtual static library file + basename = self.__class__.__name__ + tmpdir = os.path.join(os.getcwd(), 'build', basename) + if not os.path.isdir(tmpdir): + os.makedirs(tmpdir) + + info = {'library_dirs': [tmpdir], + 'libraries': [basename], + 'language': 'f77'} + + fake_lib_file = os.path.join(tmpdir, basename + '.fobjects') + fake_clib_file = os.path.join(tmpdir, basename + '.cobjects') + with open(fake_lib_file, 'w') as f: + f.write("\n".join(library_paths)) + with open(fake_clib_file, 'w') as f: + pass + + return info + + def check_symbols(self, info): + res = False + c = customized_ccompiler() + + tmpdir = tempfile.mkdtemp() + + prototypes = "\n".join("void %s%s%s();" % (self.symbol_prefix, + symbol_name, + self.symbol_suffix) + for symbol_name in self._require_symbols) + calls = "\n".join("%s%s%s();" % (self.symbol_prefix, + symbol_name, + self.symbol_suffix) + for symbol_name in self._require_symbols) + s = textwrap.dedent("""\ + %(prototypes)s + int main(int argc, const char *argv[]) + { + %(calls)s + return 0; + }""") % dict(prototypes=prototypes, calls=calls) + src = os.path.join(tmpdir, 'source.c') + out = os.path.join(tmpdir, 'a.out') + # Add the additional "extra" arguments + try: + extra_args = info['extra_link_args'] + except Exception: + extra_args = [] + try: + with open(src, 'wt') as f: + f.write(s) + obj = c.compile([src], output_dir=tmpdir) + try: + c.link_executable(obj, out, libraries=info['libraries'], + library_dirs=info['library_dirs'], + extra_postargs=extra_args) + res = True + except distutils.ccompiler.LinkError: + res = False + finally: + shutil.rmtree(tmpdir) + return res + +class openblas_lapack_info(openblas_info): + section = 'openblas' + dir_env_var = 'OPENBLAS' + _lib_names = ['openblas'] + _require_symbols = ['zungqr_'] + notfounderror = BlasNotFoundError + +class openblas_clapack_info(openblas_lapack_info): + _lib_names = ['openblas', 'lapack'] + +class openblas_ilp64_info(openblas_info): + section = 'openblas_ilp64' + dir_env_var = 'OPENBLAS_ILP64' + _lib_names = ['openblas64'] + _require_symbols = ['dgemm_', 'cblas_dgemm'] + notfounderror = BlasILP64NotFoundError + + def _calc_info(self): + info = super()._calc_info() + if info is not None: + info['define_macros'] += [('HAVE_BLAS_ILP64', None)] + return info + +class openblas_ilp64_lapack_info(openblas_ilp64_info): + _require_symbols = ['dgemm_', 'cblas_dgemm', 'zungqr_', 'LAPACKE_zungqr'] + + def _calc_info(self): + info = super()._calc_info() + if info: + info['define_macros'] += [('HAVE_LAPACKE', None)] + return info + +class openblas64__info(openblas_ilp64_info): + # ILP64 Openblas, with default symbol suffix + section = 'openblas64_' + dir_env_var = 'OPENBLAS64_' + _lib_names = ['openblas64_'] + symbol_suffix = '64_' + symbol_prefix = '' + +class openblas64__lapack_info(openblas_ilp64_lapack_info, openblas64__info): + pass + +class blis_info(blas_info): + section = 'blis' + dir_env_var = 'BLIS' + _lib_names = ['blis'] + notfounderror = BlasNotFoundError + + def calc_info(self): + lib_dirs = self.get_lib_dirs() + opt = self.get_option_single('blis_libs', 'libraries') + blis_libs = self.get_libs(opt, self._lib_names) + info = self.check_libs2(lib_dirs, blis_libs, []) + if info is None: + return + + # Add include dirs + incl_dirs = self.get_include_dirs() + dict_append(info, + language='c', + define_macros=[('HAVE_CBLAS', None)], + include_dirs=incl_dirs) + self.set_info(**info) + + +class flame_info(system_info): + """ Usage of libflame for LAPACK operations + + This requires libflame to be compiled with lapack wrappers: + + ./configure --enable-lapack2flame ... + + Be aware that libflame 5.1.0 has some missing names in the shared library, so + if you have problems, try the static flame library. + """ + section = 'flame' + _lib_names = ['flame'] + notfounderror = FlameNotFoundError + + def check_embedded_lapack(self, info): + """ libflame does not necessarily have a wrapper for fortran LAPACK, we need to check """ + c = customized_ccompiler() + + tmpdir = tempfile.mkdtemp() + s = textwrap.dedent("""\ + void zungqr_(); + int main(int argc, const char *argv[]) + { + zungqr_(); + return 0; + }""") + src = os.path.join(tmpdir, 'source.c') + out = os.path.join(tmpdir, 'a.out') + # Add the additional "extra" arguments + extra_args = info.get('extra_link_args', []) + try: + with open(src, 'wt') as f: + f.write(s) + obj = c.compile([src], output_dir=tmpdir) + try: + c.link_executable(obj, out, libraries=info['libraries'], + library_dirs=info['library_dirs'], + extra_postargs=extra_args) + return True + except distutils.ccompiler.LinkError: + return False + finally: + shutil.rmtree(tmpdir) + + def calc_info(self): + lib_dirs = self.get_lib_dirs() + flame_libs = self.get_libs('libraries', self._lib_names) + + info = self.check_libs2(lib_dirs, flame_libs, []) + if info is None: + return + + # Add the extra flag args to info + extra_info = self.calc_extra_info() + dict_append(info, **extra_info) + + if self.check_embedded_lapack(info): + # check if the user has supplied all information required + self.set_info(**info) + else: + # Try and get the BLAS lib to see if we can get it to work + blas_info = get_info('blas_opt') + if not blas_info: + # since we already failed once, this ain't going to work either + return + + # Now we need to merge the two dictionaries + for key in blas_info: + if isinstance(blas_info[key], list): + info[key] = info.get(key, []) + blas_info[key] + elif isinstance(blas_info[key], tuple): + info[key] = info.get(key, ()) + blas_info[key] + else: + info[key] = info.get(key, '') + blas_info[key] + + # Now check again + if self.check_embedded_lapack(info): + self.set_info(**info) + + +class accelerate_info(system_info): + section = 'accelerate' + _lib_names = ['accelerate', 'veclib'] + notfounderror = BlasNotFoundError + + def calc_info(self): + # Make possible to enable/disable from config file/env var + libraries = os.environ.get('ACCELERATE') + if libraries: + libraries = [libraries] + else: + libraries = self.get_libs('libraries', self._lib_names) + libraries = [lib.strip().lower() for lib in libraries] + + if (sys.platform == 'darwin' and + not os.getenv('_PYTHON_HOST_PLATFORM', None)): + # Use the system BLAS from Accelerate or vecLib under OSX + args = [] + link_args = [] + if get_platform()[-4:] == 'i386' or 'intel' in get_platform() or \ + 'x86_64' in get_platform() or \ + 'i386' in platform.platform(): + intel = 1 + else: + intel = 0 + if (os.path.exists('/System/Library/Frameworks' + '/Accelerate.framework/') and + 'accelerate' in libraries): + if intel: + args.extend(['-msse3']) + args.extend([ + '-I/System/Library/Frameworks/vecLib.framework/Headers']) + link_args.extend(['-Wl,-framework', '-Wl,Accelerate']) + elif (os.path.exists('/System/Library/Frameworks' + '/vecLib.framework/') and + 'veclib' in libraries): + if intel: + args.extend(['-msse3']) + args.extend([ + '-I/System/Library/Frameworks/vecLib.framework/Headers']) + link_args.extend(['-Wl,-framework', '-Wl,vecLib']) + + if args: + self.set_info(extra_compile_args=args, + extra_link_args=link_args, + define_macros=[('NO_ATLAS_INFO', 3), + ('HAVE_CBLAS', None)]) + + return + +class blas_src_info(system_info): + # BLAS_SRC is deprecated, please do not use this! + # Build or install a BLAS library via your package manager or from + # source separately. + section = 'blas_src' + dir_env_var = 'BLAS_SRC' + notfounderror = BlasSrcNotFoundError + + def get_paths(self, section, key): + pre_dirs = system_info.get_paths(self, section, key) + dirs = [] + for d in pre_dirs: + dirs.extend([d] + self.combine_paths(d, ['blas'])) + return [d for d in dirs if os.path.isdir(d)] + + def calc_info(self): + src_dirs = self.get_src_dirs() + src_dir = '' + for d in src_dirs: + if os.path.isfile(os.path.join(d, 'daxpy.f')): + src_dir = d + break + if not src_dir: + #XXX: Get sources from netlib. May be ask first. + return + blas1 = ''' + caxpy csscal dnrm2 dzasum saxpy srotg zdotc ccopy cswap drot + dznrm2 scasum srotm zdotu cdotc dasum drotg icamax scnrm2 + srotmg zdrot cdotu daxpy drotm idamax scopy sscal zdscal crotg + dcabs1 drotmg isamax sdot sswap zrotg cscal dcopy dscal izamax + snrm2 zaxpy zscal csrot ddot dswap sasum srot zcopy zswap + scabs1 + ''' + blas2 = ''' + cgbmv chpmv ctrsv dsymv dtrsv sspr2 strmv zhemv ztpmv cgemv + chpr dgbmv dsyr lsame ssymv strsv zher ztpsv cgerc chpr2 dgemv + dsyr2 sgbmv ssyr xerbla zher2 ztrmv cgeru ctbmv dger dtbmv + sgemv ssyr2 zgbmv zhpmv ztrsv chbmv ctbsv dsbmv dtbsv sger + stbmv zgemv zhpr chemv ctpmv dspmv dtpmv ssbmv stbsv zgerc + zhpr2 cher ctpsv dspr dtpsv sspmv stpmv zgeru ztbmv cher2 + ctrmv dspr2 dtrmv sspr stpsv zhbmv ztbsv + ''' + blas3 = ''' + cgemm csymm ctrsm dsyrk sgemm strmm zhemm zsyr2k chemm csyr2k + dgemm dtrmm ssymm strsm zher2k zsyrk cher2k csyrk dsymm dtrsm + ssyr2k zherk ztrmm cherk ctrmm dsyr2k ssyrk zgemm zsymm ztrsm + ''' + sources = [os.path.join(src_dir, f + '.f') \ + for f in (blas1 + blas2 + blas3).split()] + #XXX: should we check here actual existence of source files? + sources = [f for f in sources if os.path.isfile(f)] + info = {'sources': sources, 'language': 'f77'} + self.set_info(**info) + + +class x11_info(system_info): + section = 'x11' + notfounderror = X11NotFoundError + _lib_names = ['X11'] + + def __init__(self): + system_info.__init__(self, + default_lib_dirs=default_x11_lib_dirs, + default_include_dirs=default_x11_include_dirs) + + def calc_info(self): + if sys.platform in ['win32']: + return + lib_dirs = self.get_lib_dirs() + include_dirs = self.get_include_dirs() + opt = self.get_option_single('x11_libs', 'libraries') + x11_libs = self.get_libs(opt, self._lib_names) + info = self.check_libs(lib_dirs, x11_libs, []) + if info is None: + return + inc_dir = None + for d in include_dirs: + if self.combine_paths(d, 'X11/X.h'): + inc_dir = d + break + if inc_dir is not None: + dict_append(info, include_dirs=[inc_dir]) + self.set_info(**info) + + +class _numpy_info(system_info): + section = 'Numeric' + modulename = 'Numeric' + notfounderror = NumericNotFoundError + + def __init__(self): + include_dirs = [] + try: + module = __import__(self.modulename) + prefix = [] + for name in module.__file__.split(os.sep): + if name == 'lib': + break + prefix.append(name) + + # Ask numpy for its own include path before attempting + # anything else + try: + include_dirs.append(getattr(module, 'get_include')()) + except AttributeError: + pass + + include_dirs.append(sysconfig.get_path('include')) + except ImportError: + pass + py_incl_dir = sysconfig.get_path('include') + include_dirs.append(py_incl_dir) + py_pincl_dir = sysconfig.get_path('platinclude') + if py_pincl_dir not in include_dirs: + include_dirs.append(py_pincl_dir) + for d in default_include_dirs: + d = os.path.join(d, os.path.basename(py_incl_dir)) + if d not in include_dirs: + include_dirs.append(d) + system_info.__init__(self, + default_lib_dirs=[], + default_include_dirs=include_dirs) + + def calc_info(self): + try: + module = __import__(self.modulename) + except ImportError: + return + info = {} + macros = [] + for v in ['__version__', 'version']: + vrs = getattr(module, v, None) + if vrs is None: + continue + macros = [(self.modulename.upper() + '_VERSION', + _c_string_literal(vrs)), + (self.modulename.upper(), None)] + break + dict_append(info, define_macros=macros) + include_dirs = self.get_include_dirs() + inc_dir = None + for d in include_dirs: + if self.combine_paths(d, + os.path.join(self.modulename, + 'arrayobject.h')): + inc_dir = d + break + if inc_dir is not None: + dict_append(info, include_dirs=[inc_dir]) + if info: + self.set_info(**info) + return + + +class numarray_info(_numpy_info): + section = 'numarray' + modulename = 'numarray' + + +class Numeric_info(_numpy_info): + section = 'Numeric' + modulename = 'Numeric' + + +class numpy_info(_numpy_info): + section = 'numpy' + modulename = 'numpy' + + +class numerix_info(system_info): + section = 'numerix' + + def calc_info(self): + which = None, None + if os.getenv("NUMERIX"): + which = os.getenv("NUMERIX"), "environment var" + # If all the above fail, default to numpy. + if which[0] is None: + which = "numpy", "defaulted" + try: + import numpy # noqa: F401 + which = "numpy", "defaulted" + except ImportError as e: + msg1 = str(e) + try: + import Numeric # noqa: F401 + which = "numeric", "defaulted" + except ImportError as e: + msg2 = str(e) + try: + import numarray # noqa: F401 + which = "numarray", "defaulted" + except ImportError as e: + msg3 = str(e) + log.info(msg1) + log.info(msg2) + log.info(msg3) + which = which[0].strip().lower(), which[1] + if which[0] not in ["numeric", "numarray", "numpy"]: + raise ValueError("numerix selector must be either 'Numeric' " + "or 'numarray' or 'numpy' but the value obtained" + " from the %s was '%s'." % (which[1], which[0])) + os.environ['NUMERIX'] = which[0] + self.set_info(**get_info(which[0])) + + +class f2py_info(system_info): + def calc_info(self): + try: + import numpy.f2py as f2py + except ImportError: + return + f2py_dir = os.path.join(os.path.dirname(f2py.__file__), 'src') + self.set_info(sources=[os.path.join(f2py_dir, 'fortranobject.c')], + include_dirs=[f2py_dir]) + return + + +class boost_python_info(system_info): + section = 'boost_python' + dir_env_var = 'BOOST' + + def get_paths(self, section, key): + pre_dirs = system_info.get_paths(self, section, key) + dirs = [] + for d in pre_dirs: + dirs.extend([d] + self.combine_paths(d, ['boost*'])) + return [d for d in dirs if os.path.isdir(d)] + + def calc_info(self): + src_dirs = self.get_src_dirs() + src_dir = '' + for d in src_dirs: + if os.path.isfile(os.path.join(d, 'libs', 'python', 'src', + 'module.cpp')): + src_dir = d + break + if not src_dir: + return + py_incl_dirs = [sysconfig.get_path('include')] + py_pincl_dir = sysconfig.get_path('platinclude') + if py_pincl_dir not in py_incl_dirs: + py_incl_dirs.append(py_pincl_dir) + srcs_dir = os.path.join(src_dir, 'libs', 'python', 'src') + bpl_srcs = glob(os.path.join(srcs_dir, '*.cpp')) + bpl_srcs += glob(os.path.join(srcs_dir, '*', '*.cpp')) + info = {'libraries': [('boost_python_src', + {'include_dirs': [src_dir] + py_incl_dirs, + 'sources':bpl_srcs} + )], + 'include_dirs': [src_dir], + } + if info: + self.set_info(**info) + return + + +class agg2_info(system_info): + section = 'agg2' + dir_env_var = 'AGG2' + + def get_paths(self, section, key): + pre_dirs = system_info.get_paths(self, section, key) + dirs = [] + for d in pre_dirs: + dirs.extend([d] + self.combine_paths(d, ['agg2*'])) + return [d for d in dirs if os.path.isdir(d)] + + def calc_info(self): + src_dirs = self.get_src_dirs() + src_dir = '' + for d in src_dirs: + if os.path.isfile(os.path.join(d, 'src', 'agg_affine_matrix.cpp')): + src_dir = d + break + if not src_dir: + return + if sys.platform == 'win32': + agg2_srcs = glob(os.path.join(src_dir, 'src', 'platform', + 'win32', 'agg_win32_bmp.cpp')) + else: + agg2_srcs = glob(os.path.join(src_dir, 'src', '*.cpp')) + agg2_srcs += [os.path.join(src_dir, 'src', 'platform', + 'X11', + 'agg_platform_support.cpp')] + + info = {'libraries': + [('agg2_src', + {'sources': agg2_srcs, + 'include_dirs': [os.path.join(src_dir, 'include')], + } + )], + 'include_dirs': [os.path.join(src_dir, 'include')], + } + if info: + self.set_info(**info) + return + + +class _pkg_config_info(system_info): + section = None + config_env_var = 'PKG_CONFIG' + default_config_exe = 'pkg-config' + append_config_exe = '' + version_macro_name = None + release_macro_name = None + version_flag = '--modversion' + cflags_flag = '--cflags' + + def get_config_exe(self): + if self.config_env_var in os.environ: + return os.environ[self.config_env_var] + return self.default_config_exe + + def get_config_output(self, config_exe, option): + cmd = config_exe + ' ' + self.append_config_exe + ' ' + option + try: + o = subprocess.check_output(cmd) + except (OSError, subprocess.CalledProcessError): + pass + else: + o = filepath_from_subprocess_output(o) + return o + + def calc_info(self): + config_exe = find_executable(self.get_config_exe()) + if not config_exe: + log.warn('File not found: %s. Cannot determine %s info.' \ + % (config_exe, self.section)) + return + info = {} + macros = [] + libraries = [] + library_dirs = [] + include_dirs = [] + extra_link_args = [] + extra_compile_args = [] + version = self.get_config_output(config_exe, self.version_flag) + if version: + macros.append((self.__class__.__name__.split('.')[-1].upper(), + _c_string_literal(version))) + if self.version_macro_name: + macros.append((self.version_macro_name + '_%s' + % (version.replace('.', '_')), None)) + if self.release_macro_name: + release = self.get_config_output(config_exe, '--release') + if release: + macros.append((self.release_macro_name + '_%s' + % (release.replace('.', '_')), None)) + opts = self.get_config_output(config_exe, '--libs') + if opts: + for opt in opts.split(): + if opt[:2] == '-l': + libraries.append(opt[2:]) + elif opt[:2] == '-L': + library_dirs.append(opt[2:]) + else: + extra_link_args.append(opt) + opts = self.get_config_output(config_exe, self.cflags_flag) + if opts: + for opt in opts.split(): + if opt[:2] == '-I': + include_dirs.append(opt[2:]) + elif opt[:2] == '-D': + if '=' in opt: + n, v = opt[2:].split('=') + macros.append((n, v)) + else: + macros.append((opt[2:], None)) + else: + extra_compile_args.append(opt) + if macros: + dict_append(info, define_macros=macros) + if libraries: + dict_append(info, libraries=libraries) + if library_dirs: + dict_append(info, library_dirs=library_dirs) + if include_dirs: + dict_append(info, include_dirs=include_dirs) + if extra_link_args: + dict_append(info, extra_link_args=extra_link_args) + if extra_compile_args: + dict_append(info, extra_compile_args=extra_compile_args) + if info: + self.set_info(**info) + return + + +class wx_info(_pkg_config_info): + section = 'wx' + config_env_var = 'WX_CONFIG' + default_config_exe = 'wx-config' + append_config_exe = '' + version_macro_name = 'WX_VERSION' + release_macro_name = 'WX_RELEASE' + version_flag = '--version' + cflags_flag = '--cxxflags' + + +class gdk_pixbuf_xlib_2_info(_pkg_config_info): + section = 'gdk_pixbuf_xlib_2' + append_config_exe = 'gdk-pixbuf-xlib-2.0' + version_macro_name = 'GDK_PIXBUF_XLIB_VERSION' + + +class gdk_pixbuf_2_info(_pkg_config_info): + section = 'gdk_pixbuf_2' + append_config_exe = 'gdk-pixbuf-2.0' + version_macro_name = 'GDK_PIXBUF_VERSION' + + +class gdk_x11_2_info(_pkg_config_info): + section = 'gdk_x11_2' + append_config_exe = 'gdk-x11-2.0' + version_macro_name = 'GDK_X11_VERSION' + + +class gdk_2_info(_pkg_config_info): + section = 'gdk_2' + append_config_exe = 'gdk-2.0' + version_macro_name = 'GDK_VERSION' + + +class gdk_info(_pkg_config_info): + section = 'gdk' + append_config_exe = 'gdk' + version_macro_name = 'GDK_VERSION' + + +class gtkp_x11_2_info(_pkg_config_info): + section = 'gtkp_x11_2' + append_config_exe = 'gtk+-x11-2.0' + version_macro_name = 'GTK_X11_VERSION' + + +class gtkp_2_info(_pkg_config_info): + section = 'gtkp_2' + append_config_exe = 'gtk+-2.0' + version_macro_name = 'GTK_VERSION' + + +class xft_info(_pkg_config_info): + section = 'xft' + append_config_exe = 'xft' + version_macro_name = 'XFT_VERSION' + + +class freetype2_info(_pkg_config_info): + section = 'freetype2' + append_config_exe = 'freetype2' + version_macro_name = 'FREETYPE2_VERSION' + + +class amd_info(system_info): + section = 'amd' + dir_env_var = 'AMD' + _lib_names = ['amd'] + + def calc_info(self): + lib_dirs = self.get_lib_dirs() + + opt = self.get_option_single('amd_libs', 'libraries') + amd_libs = self.get_libs(opt, self._lib_names) + info = self.check_libs(lib_dirs, amd_libs, []) + if info is None: + return + + include_dirs = self.get_include_dirs() + + inc_dir = None + for d in include_dirs: + p = self.combine_paths(d, 'amd.h') + if p: + inc_dir = os.path.dirname(p[0]) + break + if inc_dir is not None: + dict_append(info, include_dirs=[inc_dir], + define_macros=[('SCIPY_AMD_H', None)], + swig_opts=['-I' + inc_dir]) + + self.set_info(**info) + return + + +class umfpack_info(system_info): + section = 'umfpack' + dir_env_var = 'UMFPACK' + notfounderror = UmfpackNotFoundError + _lib_names = ['umfpack'] + + def calc_info(self): + lib_dirs = self.get_lib_dirs() + + opt = self.get_option_single('umfpack_libs', 'libraries') + umfpack_libs = self.get_libs(opt, self._lib_names) + info = self.check_libs(lib_dirs, umfpack_libs, []) + if info is None: + return + + include_dirs = self.get_include_dirs() + + inc_dir = None + for d in include_dirs: + p = self.combine_paths(d, ['', 'umfpack'], 'umfpack.h') + if p: + inc_dir = os.path.dirname(p[0]) + break + if inc_dir is not None: + dict_append(info, include_dirs=[inc_dir], + define_macros=[('SCIPY_UMFPACK_H', None)], + swig_opts=['-I' + inc_dir]) + + dict_append(info, **get_info('amd')) + + self.set_info(**info) + return + + +def combine_paths(*args, **kws): + """ Return a list of existing paths composed by all combinations of + items from arguments. + """ + r = [] + for a in args: + if not a: + continue + if is_string(a): + a = [a] + r.append(a) + args = r + if not args: + return [] + if len(args) == 1: + result = reduce(lambda a, b: a + b, map(glob, args[0]), []) + elif len(args) == 2: + result = [] + for a0 in args[0]: + for a1 in args[1]: + result.extend(glob(os.path.join(a0, a1))) + else: + result = combine_paths(*(combine_paths(args[0], args[1]) + args[2:])) + log.debug('(paths: %s)', ','.join(result)) + return result + +language_map = {'c': 0, 'c++': 1, 'f77': 2, 'f90': 3} +inv_language_map = {0: 'c', 1: 'c++', 2: 'f77', 3: 'f90'} + + +def dict_append(d, **kws): + languages = [] + for k, v in kws.items(): + if k == 'language': + languages.append(v) + continue + if k in d: + if k in ['library_dirs', 'include_dirs', + 'extra_compile_args', 'extra_link_args', + 'runtime_library_dirs', 'define_macros']: + [d[k].append(vv) for vv in v if vv not in d[k]] + else: + d[k].extend(v) + else: + d[k] = v + if languages: + l = inv_language_map[max([language_map.get(l, 0) for l in languages])] + d['language'] = l + return + + +def parseCmdLine(argv=(None,)): + import optparse + parser = optparse.OptionParser("usage: %prog [-v] [info objs]") + parser.add_option('-v', '--verbose', action='store_true', dest='verbose', + default=False, + help='be verbose and print more messages') + + opts, args = parser.parse_args(args=argv[1:]) + return opts, args + + +def show_all(argv=None): + import inspect + if argv is None: + argv = sys.argv + opts, args = parseCmdLine(argv) + if opts.verbose: + log.set_threshold(log.DEBUG) + else: + log.set_threshold(log.INFO) + show_only = [] + for n in args: + if n[-5:] != '_info': + n = n + '_info' + show_only.append(n) + show_all = not show_only + _gdict_ = globals().copy() + for name, c in _gdict_.items(): + if not inspect.isclass(c): + continue + if not issubclass(c, system_info) or c is system_info: + continue + if not show_all: + if name not in show_only: + continue + del show_only[show_only.index(name)] + conf = c() + conf.verbosity = 2 + # we don't need the result, but we want + # the side effect of printing diagnostics + conf.get_info() + if show_only: + log.info('Info classes not defined: %s', ','.join(show_only)) + +if __name__ == "__main__": + show_all() diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..e6c40c7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_build_ext.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_build_ext.cpython-38.pyc new file mode 100644 index 0000000..2c635d7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_build_ext.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_ccompiler_opt.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_ccompiler_opt.cpython-38.pyc new file mode 100644 index 0000000..e4cf1bc Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_ccompiler_opt.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_ccompiler_opt_conf.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_ccompiler_opt_conf.cpython-38.pyc new file mode 100644 index 0000000..adc3587 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_ccompiler_opt_conf.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_exec_command.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_exec_command.cpython-38.pyc new file mode 100644 index 0000000..7c30aa5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_exec_command.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler.cpython-38.pyc new file mode 100644 index 0000000..fc28f57 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_gnu.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_gnu.cpython-38.pyc new file mode 100644 index 0000000..f6974b3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_gnu.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_intel.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_intel.cpython-38.pyc new file mode 100644 index 0000000..03c2781 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_intel.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_nagfor.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_nagfor.cpython-38.pyc new file mode 100644 index 0000000..4ff8006 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_nagfor.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_from_template.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_from_template.cpython-38.pyc new file mode 100644 index 0000000..a80c0ca Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_from_template.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_log.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_log.cpython-38.pyc new file mode 100644 index 0000000..0aeb88a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_log.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_mingw32ccompiler.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_mingw32ccompiler.cpython-38.pyc new file mode 100644 index 0000000..d3ca12a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_mingw32ccompiler.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_misc_util.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_misc_util.cpython-38.pyc new file mode 100644 index 0000000..44b3e7a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_misc_util.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_npy_pkg_config.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_npy_pkg_config.cpython-38.pyc new file mode 100644 index 0000000..2c4568f Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_npy_pkg_config.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_shell_utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_shell_utils.cpython-38.pyc new file mode 100644 index 0000000..dd497d0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_shell_utils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_system_info.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_system_info.cpython-38.pyc new file mode 100644 index 0000000..4f715ea Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/distutils/tests/__pycache__/test_system_info.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_build_ext.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_build_ext.py new file mode 100644 index 0000000..c007159 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_build_ext.py @@ -0,0 +1,72 @@ +'''Tests for numpy.distutils.build_ext.''' + +import os +import subprocess +import sys +from textwrap import indent, dedent +import pytest + +@pytest.mark.slow +def test_multi_fortran_libs_link(tmp_path): + ''' + Ensures multiple "fake" static libraries are correctly linked. + see gh-18295 + ''' + + # We need to make sure we actually have an f77 compiler. + # This is nontrivial, so we'll borrow the utilities + # from f2py tests: + from numpy.f2py.tests.util import has_f77_compiler + if not has_f77_compiler(): + pytest.skip('No F77 compiler found') + + # make some dummy sources + with open(tmp_path / '_dummy1.f', 'w') as fid: + fid.write(indent(dedent('''\ + FUNCTION dummy_one() + RETURN + END FUNCTION'''), prefix=' '*6)) + with open(tmp_path / '_dummy2.f', 'w') as fid: + fid.write(indent(dedent('''\ + FUNCTION dummy_two() + RETURN + END FUNCTION'''), prefix=' '*6)) + with open(tmp_path / '_dummy.c', 'w') as fid: + # doesn't need to load - just needs to exist + fid.write('int PyInit_dummyext;') + + # make a setup file + with open(tmp_path / 'setup.py', 'w') as fid: + srctree = os.path.join(os.path.dirname(__file__), '..', '..', '..') + fid.write(dedent(f'''\ + def configuration(parent_package="", top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration("", parent_package, top_path) + config.add_library("dummy1", sources=["_dummy1.f"]) + config.add_library("dummy2", sources=["_dummy2.f"]) + config.add_extension("dummyext", sources=["_dummy.c"], libraries=["dummy1", "dummy2"]) + return config + + + if __name__ == "__main__": + import sys + sys.path.insert(0, r"{srctree}") + from numpy.distutils.core import setup + setup(**configuration(top_path="").todict())''')) + + # build the test extensino and "install" into a temporary directory + build_dir = tmp_path + subprocess.check_call([sys.executable, 'setup.py', 'build', 'install', + '--prefix', str(tmp_path / 'installdir'), + '--record', str(tmp_path / 'tmp_install_log.txt'), + ], + cwd=str(build_dir), + ) + # get the path to the so + so = None + with open(tmp_path /'tmp_install_log.txt') as fid: + for line in fid: + if 'dummyext' in line: + so = line.strip() + break + assert so is not None diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_ccompiler_opt.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_ccompiler_opt.py new file mode 100644 index 0000000..1b27ab0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_ccompiler_opt.py @@ -0,0 +1,789 @@ +import re, textwrap, os +from os import sys, path +from distutils.errors import DistutilsError + +is_standalone = __name__ == '__main__' and __package__ is None +if is_standalone: + import unittest, contextlib, tempfile, shutil + sys.path.append(path.abspath(path.join(path.dirname(__file__), ".."))) + from ccompiler_opt import CCompilerOpt + + # from numpy/testing/_private/utils.py + @contextlib.contextmanager + def tempdir(*args, **kwargs): + tmpdir = tempfile.mkdtemp(*args, **kwargs) + try: + yield tmpdir + finally: + shutil.rmtree(tmpdir) + + def assert_(expr, msg=''): + if not expr: + raise AssertionError(msg) +else: + from numpy.distutils.ccompiler_opt import CCompilerOpt + from numpy.testing import assert_, tempdir + +# architectures and compilers to test +arch_compilers = dict( + x86 = ("gcc", "clang", "icc", "iccw", "msvc"), + x64 = ("gcc", "clang", "icc", "iccw", "msvc"), + ppc64 = ("gcc", "clang"), + ppc64le = ("gcc", "clang"), + armhf = ("gcc", "clang"), + aarch64 = ("gcc", "clang"), + noarch = ("gcc",) +) + +class FakeCCompilerOpt(CCompilerOpt): + fake_info = "" + def __init__(self, trap_files="", trap_flags="", *args, **kwargs): + self.fake_trap_files = trap_files + self.fake_trap_flags = trap_flags + CCompilerOpt.__init__(self, None, **kwargs) + + def __repr__(self): + return textwrap.dedent("""\ + <<<< + march : {} + compiler : {} + ---------------- + {} + >>>> + """).format(self.cc_march, self.cc_name, self.report()) + + def dist_compile(self, sources, flags, **kwargs): + assert(isinstance(sources, list)) + assert(isinstance(flags, list)) + if self.fake_trap_files: + for src in sources: + if re.match(self.fake_trap_files, src): + self.dist_error("source is trapped by a fake interface") + if self.fake_trap_flags: + for f in flags: + if re.match(self.fake_trap_flags, f): + self.dist_error("flag is trapped by a fake interface") + # fake objects + return zip(sources, [' '.join(flags)] * len(sources)) + + def dist_info(self): + return FakeCCompilerOpt.fake_info + + @staticmethod + def dist_log(*args, stderr=False): + pass + +class _Test_CCompilerOpt: + arch = None # x86_64 + cc = None # gcc + + def setup(self): + FakeCCompilerOpt.conf_nocache = True + self._opt = None + + def nopt(self, *args, **kwargs): + FakeCCompilerOpt.fake_info = (self.arch, self.cc, "") + return FakeCCompilerOpt(*args, **kwargs) + + def opt(self): + if not self._opt: + self._opt = self.nopt() + return self._opt + + def march(self): + return self.opt().cc_march + + def cc_name(self): + return self.opt().cc_name + + def get_targets(self, targets, groups, **kwargs): + FakeCCompilerOpt.conf_target_groups = groups + opt = self.nopt( + cpu_baseline=kwargs.get("baseline", "min"), + cpu_dispatch=kwargs.get("dispatch", "max"), + trap_files=kwargs.get("trap_files", ""), + trap_flags=kwargs.get("trap_flags", "") + ) + with tempdir() as tmpdir: + file = os.path.join(tmpdir, "test_targets.c") + with open(file, 'w') as f: + f.write(targets) + gtargets = [] + gflags = {} + fake_objects = opt.try_dispatch([file]) + for source, flags in fake_objects: + gtar = path.basename(source).split('.')[1:-1] + glen = len(gtar) + if glen == 0: + gtar = "baseline" + elif glen == 1: + gtar = gtar[0].upper() + else: + # converting multi-target into parentheses str format to be equivalent + # to the configuration statements syntax. + gtar = ('('+' '.join(gtar)+')').upper() + gtargets.append(gtar) + gflags[gtar] = flags + + has_baseline, targets = opt.sources_status[file] + targets = targets + ["baseline"] if has_baseline else targets + # convert tuple that represent multi-target into parentheses str format + targets = [ + '('+' '.join(tar)+')' if isinstance(tar, tuple) else tar + for tar in targets + ] + if len(targets) != len(gtargets) or not all(t in gtargets for t in targets): + raise AssertionError( + "'sources_status' returns different targets than the compiled targets\n" + "%s != %s" % (targets, gtargets) + ) + # return targets from 'sources_status' since the order is matters + return targets, gflags + + def arg_regex(self, **kwargs): + map2origin = dict( + x64 = "x86", + ppc64le = "ppc64", + aarch64 = "armhf", + clang = "gcc", + ) + march = self.march(); cc_name = self.cc_name() + map_march = map2origin.get(march, march) + map_cc = map2origin.get(cc_name, cc_name) + for key in ( + march, cc_name, map_march, map_cc, + march + '_' + cc_name, + map_march + '_' + cc_name, + march + '_' + map_cc, + map_march + '_' + map_cc, + ) : + regex = kwargs.pop(key, None) + if regex is not None: + break + if regex: + if isinstance(regex, dict): + for k, v in regex.items(): + if v[-1:] not in ')}$?\\.+*': + regex[k] = v + '$' + else: + assert(isinstance(regex, str)) + if regex[-1:] not in ')}$?\\.+*': + regex += '$' + return regex + + def expect(self, dispatch, baseline="", **kwargs): + match = self.arg_regex(**kwargs) + if match is None: + return + opt = self.nopt( + cpu_baseline=baseline, cpu_dispatch=dispatch, + trap_files=kwargs.get("trap_files", ""), + trap_flags=kwargs.get("trap_flags", "") + ) + features = ' '.join(opt.cpu_dispatch_names()) + if not match: + if len(features) != 0: + raise AssertionError( + 'expected empty features, not "%s"' % features + ) + return + if not re.match(match, features, re.IGNORECASE): + raise AssertionError( + 'dispatch features "%s" not match "%s"' % (features, match) + ) + + def expect_baseline(self, baseline, dispatch="", **kwargs): + match = self.arg_regex(**kwargs) + if match is None: + return + opt = self.nopt( + cpu_baseline=baseline, cpu_dispatch=dispatch, + trap_files=kwargs.get("trap_files", ""), + trap_flags=kwargs.get("trap_flags", "") + ) + features = ' '.join(opt.cpu_baseline_names()) + if not match: + if len(features) != 0: + raise AssertionError( + 'expected empty features, not "%s"' % features + ) + return + if not re.match(match, features, re.IGNORECASE): + raise AssertionError( + 'baseline features "%s" not match "%s"' % (features, match) + ) + + def expect_flags(self, baseline, dispatch="", **kwargs): + match = self.arg_regex(**kwargs) + if match is None: + return + opt = self.nopt( + cpu_baseline=baseline, cpu_dispatch=dispatch, + trap_files=kwargs.get("trap_files", ""), + trap_flags=kwargs.get("trap_flags", "") + ) + flags = ' '.join(opt.cpu_baseline_flags()) + if not match: + if len(flags) != 0: + raise AssertionError( + 'expected empty flags not "%s"' % flags + ) + return + if not re.match(match, flags): + raise AssertionError( + 'flags "%s" not match "%s"' % (flags, match) + ) + + def expect_targets(self, targets, groups={}, **kwargs): + match = self.arg_regex(**kwargs) + if match is None: + return + targets, _ = self.get_targets(targets=targets, groups=groups, **kwargs) + targets = ' '.join(targets) + if not match: + if len(targets) != 0: + raise AssertionError( + 'expected empty targets, not "%s"' % targets + ) + return + if not re.match(match, targets, re.IGNORECASE): + raise AssertionError( + 'targets "%s" not match "%s"' % (targets, match) + ) + + def expect_target_flags(self, targets, groups={}, **kwargs): + match_dict = self.arg_regex(**kwargs) + if match_dict is None: + return + assert(isinstance(match_dict, dict)) + _, tar_flags = self.get_targets(targets=targets, groups=groups) + + for match_tar, match_flags in match_dict.items(): + if match_tar not in tar_flags: + raise AssertionError( + 'expected to find target "%s"' % match_tar + ) + flags = tar_flags[match_tar] + if not match_flags: + if len(flags) != 0: + raise AssertionError( + 'expected to find empty flags in target "%s"' % match_tar + ) + if not re.match(match_flags, flags): + raise AssertionError( + '"%s" flags "%s" not match "%s"' % (match_tar, flags, match_flags) + ) + + def test_interface(self): + wrong_arch = "ppc64" if self.arch != "ppc64" else "x86" + wrong_cc = "clang" if self.cc != "clang" else "icc" + opt = self.opt() + assert_(getattr(opt, "cc_on_" + self.arch)) + assert_(not getattr(opt, "cc_on_" + wrong_arch)) + assert_(getattr(opt, "cc_is_" + self.cc)) + assert_(not getattr(opt, "cc_is_" + wrong_cc)) + + def test_args_empty(self): + for baseline, dispatch in ( + ("", "none"), + (None, ""), + ("none +none", "none - none"), + ("none -max", "min - max"), + ("+vsx2 -VSX2", "vsx avx2 avx512f -max"), + ("max -vsx - avx + avx512f neon -MAX ", + "min -min + max -max -vsx + avx2 -avx2 +NONE") + ) : + opt = self.nopt(cpu_baseline=baseline, cpu_dispatch=dispatch) + assert(len(opt.cpu_baseline_names()) == 0) + assert(len(opt.cpu_dispatch_names()) == 0) + + def test_args_validation(self): + if self.march() == "unknown": + return + # check sanity of argument's validation + for baseline, dispatch in ( + ("unkown_feature - max +min", "unknown max min"), # unknowing features + ("#avx2", "$vsx") # groups and polices aren't acceptable + ) : + try: + self.nopt(cpu_baseline=baseline, cpu_dispatch=dispatch) + raise AssertionError("excepted an exception for invalid arguments") + except DistutilsError: + pass + + def test_skip(self): + # only takes what platform supports and skip the others + # without casing exceptions + self.expect( + "sse vsx neon", + x86="sse", ppc64="vsx", armhf="neon", unknown="" + ) + self.expect( + "sse41 avx avx2 vsx2 vsx3 neon_vfpv4 asimd", + x86 = "sse41 avx avx2", + ppc64 = "vsx2 vsx3", + armhf = "neon_vfpv4 asimd", + unknown = "" + ) + # any features in cpu_dispatch must be ignored if it's part of baseline + self.expect( + "sse neon vsx", baseline="sse neon vsx", + x86="", ppc64="", armhf="" + ) + self.expect( + "avx2 vsx3 asimdhp", baseline="avx2 vsx3 asimdhp", + x86="", ppc64="", armhf="" + ) + + def test_implies(self): + # baseline combining implied features, so we count + # on it instead of testing 'feature_implies()'' directly + self.expect_baseline( + "fma3 avx2 asimd vsx3", + # .* between two spaces can validate features in between + x86 = "sse .* sse41 .* fma3.*avx2", + ppc64 = "vsx vsx2 vsx3", + armhf = "neon neon_fp16 neon_vfpv4 asimd" + ) + """ + special cases + """ + # in icc and msvc, FMA3 and AVX2 can't be separated + # both need to implies each other, same for avx512f & cd + for f0, f1 in ( + ("fma3", "avx2"), + ("avx512f", "avx512cd"), + ): + diff = ".* sse42 .* %s .*%s$" % (f0, f1) + self.expect_baseline(f0, + x86_gcc=".* sse42 .* %s$" % f0, + x86_icc=diff, x86_iccw=diff + ) + self.expect_baseline(f1, + x86_gcc=".* avx .* %s$" % f1, + x86_icc=diff, x86_iccw=diff + ) + # in msvc, following features can't be separated too + for f in (("fma3", "avx2"), ("avx512f", "avx512cd", "avx512_skx")): + for ff in f: + self.expect_baseline(ff, + x86_msvc=".*%s" % ' '.join(f) + ) + + # in ppc64le VSX and VSX2 can't be separated + self.expect_baseline("vsx", ppc64le="vsx vsx2") + # in aarch64 following features can't be separated + for f in ("neon", "neon_fp16", "neon_vfpv4", "asimd"): + self.expect_baseline(f, aarch64="neon neon_fp16 neon_vfpv4 asimd") + + def test_args_options(self): + # max & native + for o in ("max", "native"): + if o == "native" and self.cc_name() == "msvc": + continue + self.expect(o, + trap_files=".*cpu_(sse|vsx|neon).c", + x86="", ppc64="", armhf="" + ) + self.expect(o, + trap_files=".*cpu_(sse3|vsx2|neon_vfpv4).c", + x86="sse sse2", ppc64="vsx", armhf="neon neon_fp16", + aarch64="", ppc64le="" + ) + self.expect(o, + trap_files=".*cpu_(popcnt|vsx3).c", + x86="sse .* sse41", ppc64="vsx vsx2", + armhf="neon neon_fp16 .* asimd .*" + ) + self.expect(o, + x86_gcc=".* xop fma4 .* avx512f .* avx512_knl avx512_knm avx512_skx .*", + # in icc, xop and fam4 aren't supported + x86_icc=".* avx512f .* avx512_knl avx512_knm avx512_skx .*", + x86_iccw=".* avx512f .* avx512_knl avx512_knm avx512_skx .*", + # in msvc, avx512_knl avx512_knm aren't supported + x86_msvc=".* xop fma4 .* avx512f .* avx512_skx .*", + armhf=".* asimd asimdhp asimddp .*", + ppc64="vsx vsx2 vsx3.*" + ) + # min + self.expect("min", + x86="sse sse2", x64="sse sse2 sse3", + armhf="", aarch64="neon neon_fp16 .* asimd", + ppc64="", ppc64le="vsx vsx2" + ) + self.expect( + "min", trap_files=".*cpu_(sse2|vsx2).c", + x86="", ppc64le="" + ) + # an exception must triggered if native flag isn't supported + # when option "native" is activated through the args + try: + self.expect("native", + trap_flags=".*(-march=native|-xHost|/QxHost).*", + x86=".*", ppc64=".*", armhf=".*" + ) + if self.march() != "unknown": + raise AssertionError( + "excepted an exception for %s" % self.march() + ) + except DistutilsError: + if self.march() == "unknown": + raise AssertionError("excepted no exceptions") + + def test_flags(self): + self.expect_flags( + "sse sse2 vsx vsx2 neon neon_fp16", + x86_gcc="-msse -msse2", x86_icc="-msse -msse2", + x86_iccw="/arch:SSE2", + x86_msvc="/arch:SSE2" if self.march() == "x86" else "", + ppc64_gcc= "-mcpu=power8", + ppc64_clang="-maltivec -mvsx -mpower8-vector", + armhf_gcc="-mfpu=neon-fp16 -mfp16-format=ieee", + aarch64="" + ) + # testing normalize -march + self.expect_flags( + "asimd", + aarch64="", + armhf_gcc=r"-mfp16-format=ieee -mfpu=neon-fp-armv8 -march=armv8-a\+simd" + ) + self.expect_flags( + "asimdhp", + aarch64_gcc=r"-march=armv8.2-a\+fp16", + armhf_gcc=r"-mfp16-format=ieee -mfpu=neon-fp-armv8 -march=armv8.2-a\+fp16" + ) + self.expect_flags( + "asimddp", aarch64_gcc=r"-march=armv8.2-a\+dotprod" + ) + self.expect_flags( + # asimdfhm implies asimdhp + "asimdfhm", aarch64_gcc=r"-march=armv8.2-a\+fp16\+fp16fml" + ) + self.expect_flags( + "asimddp asimdhp asimdfhm", + aarch64_gcc=r"-march=armv8.2-a\+dotprod\+fp16\+fp16fml" + ) + + def test_targets_exceptions(self): + for targets in ( + "bla bla", "/*@targets", + "/*@targets */", + "/*@targets unknown */", + "/*@targets $unknown_policy avx2 */", + "/*@targets #unknown_group avx2 */", + "/*@targets $ */", + "/*@targets # vsx */", + "/*@targets #$ vsx */", + "/*@targets vsx avx2 ) */", + "/*@targets vsx avx2 (avx2 */", + "/*@targets vsx avx2 () */", + "/*@targets vsx avx2 ($autovec) */", # no features + "/*@targets vsx avx2 (xxx) */", + "/*@targets vsx avx2 (baseline) */", + ) : + try: + self.expect_targets( + targets, + x86="", armhf="", ppc64="" + ) + if self.march() != "unknown": + raise AssertionError( + "excepted an exception for %s" % self.march() + ) + except DistutilsError: + if self.march() == "unknown": + raise AssertionError("excepted no exceptions") + + def test_targets_syntax(self): + for targets in ( + "/*@targets $keep_baseline sse vsx neon*/", + "/*@targets,$keep_baseline,sse,vsx,neon*/", + "/*@targets*$keep_baseline*sse*vsx*neon*/", + """ + /* + ** @targets + ** $keep_baseline, sse vsx,neon + */ + """, + """ + /* + ************@targets************* + ** $keep_baseline, sse vsx, neon + ********************************* + */ + """, + """ + /* + /////////////@targets///////////////// + //$keep_baseline//sse//vsx//neon + ///////////////////////////////////// + */ + """, + """ + /* + @targets + $keep_baseline + SSE VSX NEON*/ + """ + ) : + self.expect_targets(targets, + x86="sse", ppc64="vsx", armhf="neon", unknown="" + ) + + def test_targets(self): + # test skipping baseline features + self.expect_targets( + """ + /*@targets + sse sse2 sse41 avx avx2 avx512f + vsx vsx2 vsx3 + neon neon_fp16 asimdhp asimddp + */ + """, + baseline="avx vsx2 asimd", + x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx3" + ) + # test skipping non-dispatch features + self.expect_targets( + """ + /*@targets + sse41 avx avx2 avx512f + vsx2 vsx3 + asimd asimdhp asimddp + */ + """, + baseline="", dispatch="sse41 avx2 vsx2 asimd asimddp", + x86="avx2 sse41", armhf="asimddp asimd", ppc64="vsx2" + ) + # test skipping features that not supported + self.expect_targets( + """ + /*@targets + sse2 sse41 avx2 avx512f + vsx2 vsx3 + neon asimdhp asimddp + */ + """, + baseline="", + trap_files=".*(avx2|avx512f|vsx3|asimddp).c", + x86="sse41 sse2", ppc64="vsx2", armhf="asimdhp neon" + ) + # test skipping features that implies each other + self.expect_targets( + """ + /*@targets + sse sse2 avx fma3 avx2 avx512f avx512cd + vsx vsx2 vsx3 + neon neon_vfpv4 neon_fp16 neon_fp16 asimd asimdhp + asimddp asimdfhm + */ + """, + baseline="", + x86_gcc="avx512cd avx512f avx2 fma3 avx sse2", + x86_msvc="avx512cd avx2 avx sse2", + x86_icc="avx512cd avx2 avx sse2", + x86_iccw="avx512cd avx2 avx sse2", + ppc64="vsx3 vsx2 vsx", + ppc64le="vsx3 vsx2", + armhf="asimdfhm asimddp asimdhp asimd neon_vfpv4 neon_fp16 neon", + aarch64="asimdfhm asimddp asimdhp asimd" + ) + + def test_targets_policies(self): + # 'keep_baseline', generate objects for baseline features + self.expect_targets( + """ + /*@targets + $keep_baseline + sse2 sse42 avx2 avx512f + vsx2 vsx3 + neon neon_vfpv4 asimd asimddp + */ + """, + baseline="sse41 avx2 vsx2 asimd vsx3", + x86="avx512f avx2 sse42 sse2", + ppc64="vsx3 vsx2", + armhf="asimddp asimd neon_vfpv4 neon", + # neon, neon_vfpv4, asimd implies each other + aarch64="asimddp asimd" + ) + # 'keep_sort', leave the sort as-is + self.expect_targets( + """ + /*@targets + $keep_baseline $keep_sort + avx512f sse42 avx2 sse2 + vsx2 vsx3 + asimd neon neon_vfpv4 asimddp + */ + """, + x86="avx512f sse42 avx2 sse2", + ppc64="vsx2 vsx3", + armhf="asimd neon neon_vfpv4 asimddp", + # neon, neon_vfpv4, asimd implies each other + aarch64="asimd asimddp" + ) + # 'autovec', skipping features that can't be + # vectorized by the compiler + self.expect_targets( + """ + /*@targets + $keep_baseline $keep_sort $autovec + avx512f avx2 sse42 sse41 sse2 + vsx3 vsx2 + asimddp asimd neon_vfpv4 neon + */ + """, + x86_gcc="avx512f avx2 sse42 sse41 sse2", + x86_icc="avx512f avx2 sse42 sse41 sse2", + x86_iccw="avx512f avx2 sse42 sse41 sse2", + x86_msvc="avx512f avx2 sse2" + if self.march() == 'x86' else "avx512f avx2", + ppc64="vsx3 vsx2", + armhf="asimddp asimd neon_vfpv4 neon", + # neon, neon_vfpv4, asimd implies each other + aarch64="asimddp asimd" + ) + for policy in ("$maxopt", "$autovec"): + # 'maxopt' and autovec set the max acceptable optimization flags + self.expect_target_flags( + "/*@targets baseline %s */" % policy, + gcc={"baseline":".*-O3.*"}, icc={"baseline":".*-O3.*"}, + iccw={"baseline":".*/O3.*"}, msvc={"baseline":".*/O2.*"}, + unknown={"baseline":".*"} + ) + + # 'werror', force compilers to treat warnings as errors + self.expect_target_flags( + "/*@targets baseline $werror */", + gcc={"baseline":".*-Werror.*"}, icc={"baseline":".*-Werror.*"}, + iccw={"baseline":".*/Werror.*"}, msvc={"baseline":".*/WX.*"}, + unknown={"baseline":".*"} + ) + + def test_targets_groups(self): + self.expect_targets( + """ + /*@targets $keep_baseline baseline #test_group */ + """, + groups=dict( + test_group=(""" + $keep_baseline + asimddp sse2 vsx2 avx2 vsx3 + avx512f asimdhp + """) + ), + x86="avx512f avx2 sse2 baseline", + ppc64="vsx3 vsx2 baseline", + armhf="asimddp asimdhp baseline" + ) + # test skip duplicating and sorting + self.expect_targets( + """ + /*@targets + * sse42 avx avx512f + * #test_group_1 + * vsx2 + * #test_group_2 + * asimddp asimdfhm + */ + """, + groups=dict( + test_group_1=(""" + VSX2 vsx3 asimd avx2 SSE41 + """), + test_group_2=(""" + vsx2 vsx3 asImd aVx2 sse41 + """) + ), + x86="avx512f avx2 avx sse42 sse41", + ppc64="vsx3 vsx2", + # vsx2 part of the default baseline of ppc64le, option ("min") + ppc64le="vsx3", + armhf="asimdfhm asimddp asimd", + # asimd part of the default baseline of aarch64, option ("min") + aarch64="asimdfhm asimddp" + ) + + def test_targets_multi(self): + self.expect_targets( + """ + /*@targets + (avx512_clx avx512_cnl) (asimdhp asimddp) + */ + """, + x86=r"\(avx512_clx avx512_cnl\)", + armhf=r"\(asimdhp asimddp\)", + ) + # test skipping implied features and auto-sort + self.expect_targets( + """ + /*@targets + f16c (sse41 avx sse42) (sse3 avx2 avx512f) + vsx2 (vsx vsx3 vsx2) + (neon neon_vfpv4 asimd asimdhp asimddp) + */ + """, + x86="avx512f f16c avx", + ppc64="vsx3 vsx2", + ppc64le="vsx3", # vsx2 part of baseline + armhf=r"\(asimdhp asimddp\)", + ) + # test skipping implied features and keep sort + self.expect_targets( + """ + /*@targets $keep_sort + (sse41 avx sse42) (sse3 avx2 avx512f) + (vsx vsx3 vsx2) + (asimddp neon neon_vfpv4 asimd asimdhp) + */ + """, + x86="avx avx512f", + ppc64="vsx3", + armhf=r"\(asimdhp asimddp\)", + ) + # test compiler variety and avoiding duplicating + self.expect_targets( + """ + /*@targets $keep_sort + fma3 avx2 (fma3 avx2) (avx2 fma3) avx2 fma3 + */ + """, + x86_gcc=r"fma3 avx2 \(fma3 avx2\)", + x86_icc="avx2", x86_iccw="avx2", + x86_msvc="avx2" + ) + +def new_test(arch, cc): + if is_standalone: return textwrap.dedent("""\ + class TestCCompilerOpt_{class_name}(_Test_CCompilerOpt, unittest.TestCase): + arch = '{arch}' + cc = '{cc}' + def __init__(self, methodName="runTest"): + unittest.TestCase.__init__(self, methodName) + self.setup() + """).format( + class_name=arch + '_' + cc, arch=arch, cc=cc + ) + return textwrap.dedent("""\ + class TestCCompilerOpt_{class_name}(_Test_CCompilerOpt): + arch = '{arch}' + cc = '{cc}' + """).format( + class_name=arch + '_' + cc, arch=arch, cc=cc + ) +""" +if 1 and is_standalone: + FakeCCompilerOpt.fake_info = "x86_icc" + cco = FakeCCompilerOpt(None, cpu_baseline="avx2") + print(' '.join(cco.cpu_baseline_names())) + print(cco.cpu_baseline_flags()) + unittest.main() + sys.exit() +""" +for arch, compilers in arch_compilers.items(): + for cc in compilers: + exec(new_test(arch, cc)) + +if is_standalone: + unittest.main() diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_ccompiler_opt_conf.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_ccompiler_opt_conf.py new file mode 100644 index 0000000..09c1fad --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_ccompiler_opt_conf.py @@ -0,0 +1,176 @@ +import unittest +from os import sys, path + +is_standalone = __name__ == '__main__' and __package__ is None +if is_standalone: + sys.path.append(path.abspath(path.join(path.dirname(__file__), ".."))) + from ccompiler_opt import CCompilerOpt +else: + from numpy.distutils.ccompiler_opt import CCompilerOpt + +arch_compilers = dict( + x86 = ("gcc", "clang", "icc", "iccw", "msvc"), + x64 = ("gcc", "clang", "icc", "iccw", "msvc"), + ppc64 = ("gcc", "clang"), + ppc64le = ("gcc", "clang"), + armhf = ("gcc", "clang"), + aarch64 = ("gcc", "clang"), + narch = ("gcc",) +) + +class FakeCCompilerOpt(CCompilerOpt): + fake_info = ("arch", "compiler", "extra_args") + def __init__(self, *args, **kwargs): + CCompilerOpt.__init__(self, None, **kwargs) + def dist_compile(self, sources, flags, **kwargs): + return sources + def dist_info(self): + return FakeCCompilerOpt.fake_info + @staticmethod + def dist_log(*args, stderr=False): + pass + +class _TestConfFeatures(FakeCCompilerOpt): + """A hook to check the sanity of configured features +- before it called by the abstract class '_Feature' + """ + + def conf_features_partial(self): + conf_all = self.conf_features + for feature_name, feature in conf_all.items(): + self.test_feature( + "attribute conf_features", + conf_all, feature_name, feature + ) + + conf_partial = FakeCCompilerOpt.conf_features_partial(self) + for feature_name, feature in conf_partial.items(): + self.test_feature( + "conf_features_partial()", + conf_partial, feature_name, feature + ) + return conf_partial + + def test_feature(self, log, search_in, feature_name, feature_dict): + error_msg = ( + "during validate '{}' within feature '{}', " + "march '{}' and compiler '{}'\n>> " + ).format(log, feature_name, self.cc_march, self.cc_name) + + if not feature_name.isupper(): + raise AssertionError(error_msg + "feature name must be in uppercase") + + for option, val in feature_dict.items(): + self.test_option_types(error_msg, option, val) + self.test_duplicates(error_msg, option, val) + + self.test_implies(error_msg, search_in, feature_name, feature_dict) + self.test_group(error_msg, search_in, feature_name, feature_dict) + self.test_extra_checks(error_msg, search_in, feature_name, feature_dict) + + def test_option_types(self, error_msg, option, val): + for tp, available in ( + ((str, list), ( + "implies", "headers", "flags", "group", "detect", "extra_checks" + )), + ((str,), ("disable",)), + ((int,), ("interest",)), + ((bool,), ("implies_detect",)), + ((bool, type(None)), ("autovec",)), + ) : + found_it = option in available + if not found_it: + continue + if not isinstance(val, tp): + error_tp = [t.__name__ for t in (*tp,)] + error_tp = ' or '.join(error_tp) + raise AssertionError(error_msg + + "expected '%s' type for option '%s' not '%s'" % ( + error_tp, option, type(val).__name__ + )) + break + + if not found_it: + raise AssertionError(error_msg + "invalid option name '%s'" % option) + + def test_duplicates(self, error_msg, option, val): + if option not in ( + "implies", "headers", "flags", "group", "detect", "extra_checks" + ) : return + + if isinstance(val, str): + val = val.split() + + if len(val) != len(set(val)): + raise AssertionError(error_msg + "duplicated values in option '%s'" % option) + + def test_implies(self, error_msg, search_in, feature_name, feature_dict): + if feature_dict.get("disabled") is not None: + return + implies = feature_dict.get("implies", "") + if not implies: + return + if isinstance(implies, str): + implies = implies.split() + + if feature_name in implies: + raise AssertionError(error_msg + "feature implies itself") + + for impl in implies: + impl_dict = search_in.get(impl) + if impl_dict is not None: + if "disable" in impl_dict: + raise AssertionError(error_msg + "implies disabled feature '%s'" % impl) + continue + raise AssertionError(error_msg + "implies non-exist feature '%s'" % impl) + + def test_group(self, error_msg, search_in, feature_name, feature_dict): + if feature_dict.get("disabled") is not None: + return + group = feature_dict.get("group", "") + if not group: + return + if isinstance(group, str): + group = group.split() + + for f in group: + impl_dict = search_in.get(f) + if not impl_dict or "disable" in impl_dict: + continue + raise AssertionError(error_msg + + "in option 'group', '%s' already exists as a feature name" % f + ) + + def test_extra_checks(self, error_msg, search_in, feature_name, feature_dict): + if feature_dict.get("disabled") is not None: + return + extra_checks = feature_dict.get("extra_checks", "") + if not extra_checks: + return + if isinstance(extra_checks, str): + extra_checks = extra_checks.split() + + for f in extra_checks: + impl_dict = search_in.get(f) + if not impl_dict or "disable" in impl_dict: + continue + raise AssertionError(error_msg + + "in option 'extra_checks', extra test case '%s' already exists as a feature name" % f + ) + +class TestConfFeatures(unittest.TestCase): + def __init__(self, methodName="runTest"): + unittest.TestCase.__init__(self, methodName) + self.setup() + + def setup(self): + FakeCCompilerOpt.conf_nocache = True + + def test_features(self): + for arch, compilers in arch_compilers.items(): + for cc in compilers: + FakeCCompilerOpt.fake_info = (arch, cc, "") + _TestConfFeatures() + +if is_standalone: + unittest.main() diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_exec_command.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_exec_command.py new file mode 100644 index 0000000..d6eb7d1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_exec_command.py @@ -0,0 +1,214 @@ +import os +import sys +from tempfile import TemporaryFile + +from numpy.distutils import exec_command +from numpy.distutils.exec_command import get_pythonexe +from numpy.testing import tempdir, assert_, assert_warns + +# In python 3 stdout, stderr are text (unicode compliant) devices, so to +# emulate them import StringIO from the io module. +from io import StringIO + +class redirect_stdout: + """Context manager to redirect stdout for exec_command test.""" + def __init__(self, stdout=None): + self._stdout = stdout or sys.stdout + + def __enter__(self): + self.old_stdout = sys.stdout + sys.stdout = self._stdout + + def __exit__(self, exc_type, exc_value, traceback): + self._stdout.flush() + sys.stdout = self.old_stdout + # note: closing sys.stdout won't close it. + self._stdout.close() + +class redirect_stderr: + """Context manager to redirect stderr for exec_command test.""" + def __init__(self, stderr=None): + self._stderr = stderr or sys.stderr + + def __enter__(self): + self.old_stderr = sys.stderr + sys.stderr = self._stderr + + def __exit__(self, exc_type, exc_value, traceback): + self._stderr.flush() + sys.stderr = self.old_stderr + # note: closing sys.stderr won't close it. + self._stderr.close() + +class emulate_nonposix: + """Context manager to emulate os.name != 'posix' """ + def __init__(self, osname='non-posix'): + self._new_name = osname + + def __enter__(self): + self._old_name = os.name + os.name = self._new_name + + def __exit__(self, exc_type, exc_value, traceback): + os.name = self._old_name + + +def test_exec_command_stdout(): + # Regression test for gh-2999 and gh-2915. + # There are several packages (nose, scipy.weave.inline, Sage inline + # Fortran) that replace stdout, in which case it doesn't have a fileno + # method. This is tested here, with a do-nothing command that fails if the + # presence of fileno() is assumed in exec_command. + + # The code has a special case for posix systems, so if we are on posix test + # both that the special case works and that the generic code works. + + # Test posix version: + with redirect_stdout(StringIO()): + with redirect_stderr(TemporaryFile()): + with assert_warns(DeprecationWarning): + exec_command.exec_command("cd '.'") + + if os.name == 'posix': + # Test general (non-posix) version: + with emulate_nonposix(): + with redirect_stdout(StringIO()): + with redirect_stderr(TemporaryFile()): + with assert_warns(DeprecationWarning): + exec_command.exec_command("cd '.'") + +def test_exec_command_stderr(): + # Test posix version: + with redirect_stdout(TemporaryFile(mode='w+')): + with redirect_stderr(StringIO()): + with assert_warns(DeprecationWarning): + exec_command.exec_command("cd '.'") + + if os.name == 'posix': + # Test general (non-posix) version: + with emulate_nonposix(): + with redirect_stdout(TemporaryFile()): + with redirect_stderr(StringIO()): + with assert_warns(DeprecationWarning): + exec_command.exec_command("cd '.'") + + +class TestExecCommand: + def setup(self): + self.pyexe = get_pythonexe() + + def check_nt(self, **kws): + s, o = exec_command.exec_command('cmd /C echo path=%path%') + assert_(s == 0) + assert_(o != '') + + s, o = exec_command.exec_command( + '"%s" -c "import sys;sys.stderr.write(sys.platform)"' % self.pyexe) + assert_(s == 0) + assert_(o == 'win32') + + def check_posix(self, **kws): + s, o = exec_command.exec_command("echo Hello", **kws) + assert_(s == 0) + assert_(o == 'Hello') + + s, o = exec_command.exec_command('echo $AAA', **kws) + assert_(s == 0) + assert_(o == '') + + s, o = exec_command.exec_command('echo "$AAA"', AAA='Tere', **kws) + assert_(s == 0) + assert_(o == 'Tere') + + s, o = exec_command.exec_command('echo "$AAA"', **kws) + assert_(s == 0) + assert_(o == '') + + if 'BBB' not in os.environ: + os.environ['BBB'] = 'Hi' + s, o = exec_command.exec_command('echo "$BBB"', **kws) + assert_(s == 0) + assert_(o == 'Hi') + + s, o = exec_command.exec_command('echo "$BBB"', BBB='Hey', **kws) + assert_(s == 0) + assert_(o == 'Hey') + + s, o = exec_command.exec_command('echo "$BBB"', **kws) + assert_(s == 0) + assert_(o == 'Hi') + + del os.environ['BBB'] + + s, o = exec_command.exec_command('echo "$BBB"', **kws) + assert_(s == 0) + assert_(o == '') + + + s, o = exec_command.exec_command('this_is_not_a_command', **kws) + assert_(s != 0) + assert_(o != '') + + s, o = exec_command.exec_command('echo path=$PATH', **kws) + assert_(s == 0) + assert_(o != '') + + s, o = exec_command.exec_command( + '"%s" -c "import sys,os;sys.stderr.write(os.name)"' % + self.pyexe, **kws) + assert_(s == 0) + assert_(o == 'posix') + + def check_basic(self, *kws): + s, o = exec_command.exec_command( + '"%s" -c "raise \'Ignore me.\'"' % self.pyexe, **kws) + assert_(s != 0) + assert_(o != '') + + s, o = exec_command.exec_command( + '"%s" -c "import sys;sys.stderr.write(\'0\');' + 'sys.stderr.write(\'1\');sys.stderr.write(\'2\')"' % + self.pyexe, **kws) + assert_(s == 0) + assert_(o == '012') + + s, o = exec_command.exec_command( + '"%s" -c "import sys;sys.exit(15)"' % self.pyexe, **kws) + assert_(s == 15) + assert_(o == '') + + s, o = exec_command.exec_command( + '"%s" -c "print(\'Heipa\'")' % self.pyexe, **kws) + assert_(s == 0) + assert_(o == 'Heipa') + + def check_execute_in(self, **kws): + with tempdir() as tmpdir: + fn = "file" + tmpfile = os.path.join(tmpdir, fn) + with open(tmpfile, 'w') as f: + f.write('Hello') + + s, o = exec_command.exec_command( + '"%s" -c "f = open(\'%s\', \'r\'); f.close()"' % + (self.pyexe, fn), **kws) + assert_(s != 0) + assert_(o != '') + s, o = exec_command.exec_command( + '"%s" -c "f = open(\'%s\', \'r\'); print(f.read()); ' + 'f.close()"' % (self.pyexe, fn), execute_in=tmpdir, **kws) + assert_(s == 0) + assert_(o == 'Hello') + + def test_basic(self): + with redirect_stdout(StringIO()): + with redirect_stderr(StringIO()): + with assert_warns(DeprecationWarning): + if os.name == "posix": + self.check_posix(use_tee=0) + self.check_posix(use_tee=1) + elif os.name == "nt": + self.check_nt(use_tee=0) + self.check_nt(use_tee=1) + self.check_execute_in(use_tee=0) + self.check_execute_in(use_tee=1) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_fcompiler.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_fcompiler.py new file mode 100644 index 0000000..dd97f1e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_fcompiler.py @@ -0,0 +1,43 @@ +from numpy.testing import assert_ +import numpy.distutils.fcompiler + +customizable_flags = [ + ('f77', 'F77FLAGS'), + ('f90', 'F90FLAGS'), + ('free', 'FREEFLAGS'), + ('arch', 'FARCH'), + ('debug', 'FDEBUG'), + ('flags', 'FFLAGS'), + ('linker_so', 'LDFLAGS'), +] + + +def test_fcompiler_flags(monkeypatch): + monkeypatch.setenv('NPY_DISTUTILS_APPEND_FLAGS', '0') + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='none') + flag_vars = fc.flag_vars.clone(lambda *args, **kwargs: None) + + for opt, envvar in customizable_flags: + new_flag = '-dummy-{}-flag'.format(opt) + prev_flags = getattr(flag_vars, opt) + + monkeypatch.setenv(envvar, new_flag) + new_flags = getattr(flag_vars, opt) + + monkeypatch.delenv(envvar) + assert_(new_flags == [new_flag]) + + monkeypatch.setenv('NPY_DISTUTILS_APPEND_FLAGS', '1') + + for opt, envvar in customizable_flags: + new_flag = '-dummy-{}-flag'.format(opt) + prev_flags = getattr(flag_vars, opt) + monkeypatch.setenv(envvar, new_flag) + new_flags = getattr(flag_vars, opt) + + monkeypatch.delenv(envvar) + if prev_flags is None: + assert_(new_flags == [new_flag]) + else: + assert_(new_flags == prev_flags + [new_flag]) + diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_fcompiler_gnu.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_fcompiler_gnu.py new file mode 100644 index 0000000..0817ae5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_fcompiler_gnu.py @@ -0,0 +1,55 @@ +from numpy.testing import assert_ + +import numpy.distutils.fcompiler + +g77_version_strings = [ + ('GNU Fortran 0.5.25 20010319 (prerelease)', '0.5.25'), + ('GNU Fortran (GCC 3.2) 3.2 20020814 (release)', '3.2'), + ('GNU Fortran (GCC) 3.3.3 20040110 (prerelease) (Debian)', '3.3.3'), + ('GNU Fortran (GCC) 3.3.3 (Debian 20040401)', '3.3.3'), + ('GNU Fortran (GCC 3.2.2 20030222 (Red Hat Linux 3.2.2-5)) 3.2.2' + ' 20030222 (Red Hat Linux 3.2.2-5)', '3.2.2'), +] + +gfortran_version_strings = [ + ('GNU Fortran 95 (GCC 4.0.3 20051023 (prerelease) (Debian 4.0.2-3))', + '4.0.3'), + ('GNU Fortran 95 (GCC) 4.1.0', '4.1.0'), + ('GNU Fortran 95 (GCC) 4.2.0 20060218 (experimental)', '4.2.0'), + ('GNU Fortran (GCC) 4.3.0 20070316 (experimental)', '4.3.0'), + ('GNU Fortran (rubenvb-4.8.0) 4.8.0', '4.8.0'), + ('4.8.0', '4.8.0'), + ('4.0.3-7', '4.0.3'), + ("gfortran: warning: couldn't understand kern.osversion '14.1.0\n4.9.1", + '4.9.1'), + ("gfortran: warning: couldn't understand kern.osversion '14.1.0\n" + "gfortran: warning: yet another warning\n4.9.1", + '4.9.1'), + ('GNU Fortran (crosstool-NG 8a21ab48) 7.2.0', '7.2.0') +] + +class TestG77Versions: + def test_g77_version(self): + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='gnu') + for vs, version in g77_version_strings: + v = fc.version_match(vs) + assert_(v == version, (vs, v)) + + def test_not_g77(self): + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='gnu') + for vs, _ in gfortran_version_strings: + v = fc.version_match(vs) + assert_(v is None, (vs, v)) + +class TestGFortranVersions: + def test_gfortran_version(self): + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='gnu95') + for vs, version in gfortran_version_strings: + v = fc.version_match(vs) + assert_(v == version, (vs, v)) + + def test_not_gfortran(self): + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='gnu95') + for vs, _ in g77_version_strings: + v = fc.version_match(vs) + assert_(v is None, (vs, v)) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_fcompiler_intel.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_fcompiler_intel.py new file mode 100644 index 0000000..45c9cda --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_fcompiler_intel.py @@ -0,0 +1,30 @@ +import numpy.distutils.fcompiler +from numpy.testing import assert_ + + +intel_32bit_version_strings = [ + ("Intel(R) Fortran Intel(R) 32-bit Compiler Professional for applications" + "running on Intel(R) 32, Version 11.1", '11.1'), +] + +intel_64bit_version_strings = [ + ("Intel(R) Fortran IA-64 Compiler Professional for applications" + "running on IA-64, Version 11.0", '11.0'), + ("Intel(R) Fortran Intel(R) 64 Compiler Professional for applications" + "running on Intel(R) 64, Version 11.1", '11.1') +] + +class TestIntelFCompilerVersions: + def test_32bit_version(self): + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='intel') + for vs, version in intel_32bit_version_strings: + v = fc.version_match(vs) + assert_(v == version) + + +class TestIntelEM64TFCompilerVersions: + def test_64bit_version(self): + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='intelem') + for vs, version in intel_64bit_version_strings: + v = fc.version_match(vs) + assert_(v == version) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_fcompiler_nagfor.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_fcompiler_nagfor.py new file mode 100644 index 0000000..2e04f52 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_fcompiler_nagfor.py @@ -0,0 +1,22 @@ +from numpy.testing import assert_ +import numpy.distutils.fcompiler + +nag_version_strings = [('nagfor', 'NAG Fortran Compiler Release ' + '6.2(Chiyoda) Build 6200', '6.2'), + ('nagfor', 'NAG Fortran Compiler Release ' + '6.1(Tozai) Build 6136', '6.1'), + ('nagfor', 'NAG Fortran Compiler Release ' + '6.0(Hibiya) Build 1021', '6.0'), + ('nagfor', 'NAG Fortran Compiler Release ' + '5.3.2(971)', '5.3.2'), + ('nag', 'NAGWare Fortran 95 compiler Release 5.1' + '(347,355-367,375,380-383,389,394,399,401-402,407,' + '431,435,437,446,459-460,463,472,494,496,503,508,' + '511,517,529,555,557,565)', '5.1')] + +class TestNagFCompilerVersions: + def test_version_match(self): + for comp, vs, version in nag_version_strings: + fc = numpy.distutils.fcompiler.new_fcompiler(compiler=comp) + v = fc.version_match(vs) + assert_(v == version) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_from_template.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_from_template.py new file mode 100644 index 0000000..5881754 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_from_template.py @@ -0,0 +1,44 @@ + +from numpy.distutils.from_template import process_str +from numpy.testing import assert_equal + + +pyf_src = """ +python module foo + <_rd=real,double precision> + interface + subroutine foosub(tol) + <_rd>, intent(in,out) :: tol + end subroutine foosub + end interface +end python module foo +""" + +expected_pyf = """ +python module foo + interface + subroutine sfoosub(tol) + real, intent(in,out) :: tol + end subroutine sfoosub + subroutine dfoosub(tol) + double precision, intent(in,out) :: tol + end subroutine dfoosub + end interface +end python module foo +""" + + +def normalize_whitespace(s): + """ + Remove leading and trailing whitespace, and convert internal + stretches of whitespace to a single space. + """ + return ' '.join(s.split()) + + +def test_from_template(): + """Regression test for gh-10712.""" + pyf = process_str(pyf_src) + normalized_pyf = normalize_whitespace(pyf) + normalized_expected_pyf = normalize_whitespace(expected_pyf) + assert_equal(normalized_pyf, normalized_expected_pyf) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_log.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_log.py new file mode 100644 index 0000000..36f49f5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_log.py @@ -0,0 +1,32 @@ +import io +import re +from contextlib import redirect_stdout + +import pytest + +from numpy.distutils import log + + +def setup_module(): + log.set_verbosity(2, force=True) # i.e. DEBUG + + +def teardown_module(): + log.set_verbosity(0, force=True) # the default + + +r_ansi = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") + + +@pytest.mark.parametrize("func_name", ["error", "warn", "info", "debug"]) +def test_log_prefix(func_name): + func = getattr(log, func_name) + msg = f"{func_name} message" + f = io.StringIO() + with redirect_stdout(f): + func(msg) + out = f.getvalue() + assert out # sanity check + clean_out = r_ansi.sub("", out) + line = next(line for line in clean_out.splitlines()) + assert line == f"{func_name.upper()}: {msg}" diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_mingw32ccompiler.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_mingw32ccompiler.py new file mode 100644 index 0000000..ebedacb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_mingw32ccompiler.py @@ -0,0 +1,42 @@ +import shutil +import subprocess +import sys +import pytest + +from numpy.distutils import mingw32ccompiler + + +@pytest.mark.skipif(sys.platform != 'win32', reason='win32 only test') +def test_build_import(): + '''Test the mingw32ccompiler.build_import_library, which builds a + `python.a` from the MSVC `python.lib` + ''' + + # make sure `nm.exe` exists and supports the current python version. This + # can get mixed up when the PATH has a 64-bit nm but the python is 32-bit + try: + out = subprocess.check_output(['nm.exe', '--help']) + except FileNotFoundError: + pytest.skip("'nm.exe' not on path, is mingw installed?") + supported = out[out.find(b'supported targets:'):] + if sys.maxsize < 2**32: + if b'pe-i386' not in supported: + raise ValueError("'nm.exe' found but it does not support 32-bit " + "dlls when using 32-bit python. Supported " + "formats: '%s'" % supported) + elif b'pe-x86-64' not in supported: + raise ValueError("'nm.exe' found but it does not support 64-bit " + "dlls when using 64-bit python. Supported " + "formats: '%s'" % supported) + # Hide the import library to force a build + has_import_lib, fullpath = mingw32ccompiler._check_for_import_lib() + if has_import_lib: + shutil.move(fullpath, fullpath + '.bak') + + try: + # Whew, now we can actually test the function + mingw32ccompiler.build_import_library() + + finally: + if has_import_lib: + shutil.move(fullpath + '.bak', fullpath) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_misc_util.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_misc_util.py new file mode 100644 index 0000000..605c804 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_misc_util.py @@ -0,0 +1,82 @@ +from os.path import join, sep, dirname + +from numpy.distutils.misc_util import ( + appendpath, minrelpath, gpaths, get_shared_lib_extension, get_info + ) +from numpy.testing import ( + assert_, assert_equal + ) + +ajoin = lambda *paths: join(*((sep,)+paths)) + +class TestAppendpath: + + def test_1(self): + assert_equal(appendpath('prefix', 'name'), join('prefix', 'name')) + assert_equal(appendpath('/prefix', 'name'), ajoin('prefix', 'name')) + assert_equal(appendpath('/prefix', '/name'), ajoin('prefix', 'name')) + assert_equal(appendpath('prefix', '/name'), join('prefix', 'name')) + + def test_2(self): + assert_equal(appendpath('prefix/sub', 'name'), + join('prefix', 'sub', 'name')) + assert_equal(appendpath('prefix/sub', 'sup/name'), + join('prefix', 'sub', 'sup', 'name')) + assert_equal(appendpath('/prefix/sub', '/prefix/name'), + ajoin('prefix', 'sub', 'name')) + + def test_3(self): + assert_equal(appendpath('/prefix/sub', '/prefix/sup/name'), + ajoin('prefix', 'sub', 'sup', 'name')) + assert_equal(appendpath('/prefix/sub/sub2', '/prefix/sup/sup2/name'), + ajoin('prefix', 'sub', 'sub2', 'sup', 'sup2', 'name')) + assert_equal(appendpath('/prefix/sub/sub2', '/prefix/sub/sup/name'), + ajoin('prefix', 'sub', 'sub2', 'sup', 'name')) + +class TestMinrelpath: + + def test_1(self): + n = lambda path: path.replace('/', sep) + assert_equal(minrelpath(n('aa/bb')), n('aa/bb')) + assert_equal(minrelpath('..'), '..') + assert_equal(minrelpath(n('aa/..')), '') + assert_equal(minrelpath(n('aa/../bb')), 'bb') + assert_equal(minrelpath(n('aa/bb/..')), 'aa') + assert_equal(minrelpath(n('aa/bb/../..')), '') + assert_equal(minrelpath(n('aa/bb/../cc/../dd')), n('aa/dd')) + assert_equal(minrelpath(n('.././..')), n('../..')) + assert_equal(minrelpath(n('aa/bb/.././../dd')), n('dd')) + +class TestGpaths: + + def test_gpaths(self): + local_path = minrelpath(join(dirname(__file__), '..')) + ls = gpaths('command/*.py', local_path) + assert_(join(local_path, 'command', 'build_src.py') in ls, repr(ls)) + f = gpaths('system_info.py', local_path) + assert_(join(local_path, 'system_info.py') == f[0], repr(f)) + +class TestSharedExtension: + + def test_get_shared_lib_extension(self): + import sys + ext = get_shared_lib_extension(is_python_ext=False) + if sys.platform.startswith('linux'): + assert_equal(ext, '.so') + elif sys.platform.startswith('gnukfreebsd'): + assert_equal(ext, '.so') + elif sys.platform.startswith('darwin'): + assert_equal(ext, '.dylib') + elif sys.platform.startswith('win'): + assert_equal(ext, '.dll') + # just check for no crash + assert_(get_shared_lib_extension(is_python_ext=True)) + + +def test_installed_npymath_ini(): + # Regression test for gh-7707. If npymath.ini wasn't installed, then this + # will give an error. + info = get_info('npymath') + + assert isinstance(info, dict) + assert "define_macros" in info diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_npy_pkg_config.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_npy_pkg_config.py new file mode 100644 index 0000000..b287ebe --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_npy_pkg_config.py @@ -0,0 +1,84 @@ +import os + +from numpy.distutils.npy_pkg_config import read_config, parse_flags +from numpy.testing import temppath, assert_ + +simple = """\ +[meta] +Name = foo +Description = foo lib +Version = 0.1 + +[default] +cflags = -I/usr/include +libs = -L/usr/lib +""" +simple_d = {'cflags': '-I/usr/include', 'libflags': '-L/usr/lib', + 'version': '0.1', 'name': 'foo'} + +simple_variable = """\ +[meta] +Name = foo +Description = foo lib +Version = 0.1 + +[variables] +prefix = /foo/bar +libdir = ${prefix}/lib +includedir = ${prefix}/include + +[default] +cflags = -I${includedir} +libs = -L${libdir} +""" +simple_variable_d = {'cflags': '-I/foo/bar/include', 'libflags': '-L/foo/bar/lib', + 'version': '0.1', 'name': 'foo'} + +class TestLibraryInfo: + def test_simple(self): + with temppath('foo.ini') as path: + with open(path, 'w') as f: + f.write(simple) + pkg = os.path.splitext(path)[0] + out = read_config(pkg) + + assert_(out.cflags() == simple_d['cflags']) + assert_(out.libs() == simple_d['libflags']) + assert_(out.name == simple_d['name']) + assert_(out.version == simple_d['version']) + + def test_simple_variable(self): + with temppath('foo.ini') as path: + with open(path, 'w') as f: + f.write(simple_variable) + pkg = os.path.splitext(path)[0] + out = read_config(pkg) + + assert_(out.cflags() == simple_variable_d['cflags']) + assert_(out.libs() == simple_variable_d['libflags']) + assert_(out.name == simple_variable_d['name']) + assert_(out.version == simple_variable_d['version']) + out.vars['prefix'] = '/Users/david' + assert_(out.cflags() == '-I/Users/david/include') + +class TestParseFlags: + def test_simple_cflags(self): + d = parse_flags("-I/usr/include") + assert_(d['include_dirs'] == ['/usr/include']) + + d = parse_flags("-I/usr/include -DFOO") + assert_(d['include_dirs'] == ['/usr/include']) + assert_(d['macros'] == ['FOO']) + + d = parse_flags("-I /usr/include -DFOO") + assert_(d['include_dirs'] == ['/usr/include']) + assert_(d['macros'] == ['FOO']) + + def test_simple_lflags(self): + d = parse_flags("-L/usr/lib -lfoo -L/usr/lib -lbar") + assert_(d['library_dirs'] == ['/usr/lib', '/usr/lib']) + assert_(d['libraries'] == ['foo', 'bar']) + + d = parse_flags("-L /usr/lib -lfoo -L/usr/lib -lbar") + assert_(d['library_dirs'] == ['/usr/lib', '/usr/lib']) + assert_(d['libraries'] == ['foo', 'bar']) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_shell_utils.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_shell_utils.py new file mode 100644 index 0000000..32bd283 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_shell_utils.py @@ -0,0 +1,76 @@ +import pytest +import subprocess +import json +import sys + +from numpy.distutils import _shell_utils + +argv_cases = [ + [r'exe'], + [r'path/exe'], + [r'path\exe'], + [r'\\server\path\exe'], + [r'path to/exe'], + [r'path to\exe'], + + [r'exe', '--flag'], + [r'path/exe', '--flag'], + [r'path\exe', '--flag'], + [r'path to/exe', '--flag'], + [r'path to\exe', '--flag'], + + # flags containing literal quotes in their name + [r'path to/exe', '--flag-"quoted"'], + [r'path to\exe', '--flag-"quoted"'], + [r'path to/exe', '"--flag-quoted"'], + [r'path to\exe', '"--flag-quoted"'], +] + + +@pytest.fixture(params=[ + _shell_utils.WindowsParser, + _shell_utils.PosixParser +]) +def Parser(request): + return request.param + + +@pytest.fixture +def runner(Parser): + if Parser != _shell_utils.NativeParser: + pytest.skip('Unable to run with non-native parser') + + if Parser == _shell_utils.WindowsParser: + return lambda cmd: subprocess.check_output(cmd) + elif Parser == _shell_utils.PosixParser: + # posix has no non-shell string parsing + return lambda cmd: subprocess.check_output(cmd, shell=True) + else: + raise NotImplementedError + + +@pytest.mark.parametrize('argv', argv_cases) +def test_join_matches_subprocess(Parser, runner, argv): + """ + Test that join produces strings understood by subprocess + """ + # invoke python to return its arguments as json + cmd = [ + sys.executable, '-c', + 'import json, sys; print(json.dumps(sys.argv[1:]))' + ] + joined = Parser.join(cmd + argv) + json_out = runner(joined).decode() + assert json.loads(json_out) == argv + + +@pytest.mark.parametrize('argv', argv_cases) +def test_roundtrip(Parser, argv): + """ + Test that split is the inverse operation of join + """ + try: + joined = Parser.join(argv) + assert argv == Parser.split(joined) + except NotImplementedError: + pytest.skip("Not implemented") diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_system_info.py b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_system_info.py new file mode 100644 index 0000000..8c26271 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/tests/test_system_info.py @@ -0,0 +1,324 @@ +import os +import shutil +import pytest +from tempfile import mkstemp, mkdtemp +from subprocess import Popen, PIPE +from distutils.errors import DistutilsError + +from numpy.testing import assert_, assert_equal, assert_raises +from numpy.distutils import ccompiler, customized_ccompiler +from numpy.distutils.system_info import system_info, ConfigParser, mkl_info +from numpy.distutils.system_info import AliasedOptionError +from numpy.distutils.system_info import default_lib_dirs, default_include_dirs +from numpy.distutils import _shell_utils + + +def get_class(name, notfound_action=1): + """ + notfound_action: + 0 - do nothing + 1 - display warning message + 2 - raise error + """ + cl = {'temp1': Temp1Info, + 'temp2': Temp2Info, + 'duplicate_options': DuplicateOptionInfo, + }.get(name.lower(), _system_info) + return cl() + +simple_site = """ +[ALL] +library_dirs = {dir1:s}{pathsep:s}{dir2:s} +libraries = {lib1:s},{lib2:s} +extra_compile_args = -I/fake/directory -I"/path with/spaces" -Os +runtime_library_dirs = {dir1:s} + +[temp1] +library_dirs = {dir1:s} +libraries = {lib1:s} +runtime_library_dirs = {dir1:s} + +[temp2] +library_dirs = {dir2:s} +libraries = {lib2:s} +extra_link_args = -Wl,-rpath={lib2_escaped:s} +rpath = {dir2:s} + +[duplicate_options] +mylib_libs = {lib1:s} +libraries = {lib2:s} +""" +site_cfg = simple_site + +fakelib_c_text = """ +/* This file is generated from numpy/distutils/testing/test_system_info.py */ +#include +void foo(void) { + printf("Hello foo"); +} +void bar(void) { + printf("Hello bar"); +} +""" + +def have_compiler(): + """ Return True if there appears to be an executable compiler + """ + compiler = customized_ccompiler() + try: + cmd = compiler.compiler # Unix compilers + except AttributeError: + try: + if not compiler.initialized: + compiler.initialize() # MSVC is different + except (DistutilsError, ValueError): + return False + cmd = [compiler.cc] + try: + p = Popen(cmd, stdout=PIPE, stderr=PIPE) + p.stdout.close() + p.stderr.close() + p.wait() + except OSError: + return False + return True + + +HAVE_COMPILER = have_compiler() + + +class _system_info(system_info): + + def __init__(self, + default_lib_dirs=default_lib_dirs, + default_include_dirs=default_include_dirs, + verbosity=1, + ): + self.__class__.info = {} + self.local_prefixes = [] + defaults = {'library_dirs': '', + 'include_dirs': '', + 'runtime_library_dirs': '', + 'rpath': '', + 'src_dirs': '', + 'search_static_first': "0", + 'extra_compile_args': '', + 'extra_link_args': ''} + self.cp = ConfigParser(defaults) + # We have to parse the config files afterwards + # to have a consistent temporary filepath + + def _check_libs(self, lib_dirs, libs, opt_libs, exts): + """Override _check_libs to return with all dirs """ + info = {'libraries': libs, 'library_dirs': lib_dirs} + return info + + +class Temp1Info(_system_info): + """For testing purposes""" + section = 'temp1' + + +class Temp2Info(_system_info): + """For testing purposes""" + section = 'temp2' + +class DuplicateOptionInfo(_system_info): + """For testing purposes""" + section = 'duplicate_options' + + +class TestSystemInfoReading: + + def setup(self): + """ Create the libraries """ + # Create 2 sources and 2 libraries + self._dir1 = mkdtemp() + self._src1 = os.path.join(self._dir1, 'foo.c') + self._lib1 = os.path.join(self._dir1, 'libfoo.so') + self._dir2 = mkdtemp() + self._src2 = os.path.join(self._dir2, 'bar.c') + self._lib2 = os.path.join(self._dir2, 'libbar.so') + # Update local site.cfg + global simple_site, site_cfg + site_cfg = simple_site.format(**{ + 'dir1': self._dir1, + 'lib1': self._lib1, + 'dir2': self._dir2, + 'lib2': self._lib2, + 'pathsep': os.pathsep, + 'lib2_escaped': _shell_utils.NativeParser.join([self._lib2]) + }) + # Write site.cfg + fd, self._sitecfg = mkstemp() + os.close(fd) + with open(self._sitecfg, 'w') as fd: + fd.write(site_cfg) + # Write the sources + with open(self._src1, 'w') as fd: + fd.write(fakelib_c_text) + with open(self._src2, 'w') as fd: + fd.write(fakelib_c_text) + # We create all class-instances + + def site_and_parse(c, site_cfg): + c.files = [site_cfg] + c.parse_config_files() + return c + self.c_default = site_and_parse(get_class('default'), self._sitecfg) + self.c_temp1 = site_and_parse(get_class('temp1'), self._sitecfg) + self.c_temp2 = site_and_parse(get_class('temp2'), self._sitecfg) + self.c_dup_options = site_and_parse(get_class('duplicate_options'), + self._sitecfg) + + + def teardown(self): + # Do each removal separately + try: + shutil.rmtree(self._dir1) + except Exception: + pass + try: + shutil.rmtree(self._dir2) + except Exception: + pass + try: + os.remove(self._sitecfg) + except Exception: + pass + + def test_all(self): + # Read in all information in the ALL block + tsi = self.c_default + assert_equal(tsi.get_lib_dirs(), [self._dir1, self._dir2]) + assert_equal(tsi.get_libraries(), [self._lib1, self._lib2]) + assert_equal(tsi.get_runtime_lib_dirs(), [self._dir1]) + extra = tsi.calc_extra_info() + assert_equal(extra['extra_compile_args'], ['-I/fake/directory', '-I/path with/spaces', '-Os']) + + def test_temp1(self): + # Read in all information in the temp1 block + tsi = self.c_temp1 + assert_equal(tsi.get_lib_dirs(), [self._dir1]) + assert_equal(tsi.get_libraries(), [self._lib1]) + assert_equal(tsi.get_runtime_lib_dirs(), [self._dir1]) + + def test_temp2(self): + # Read in all information in the temp2 block + tsi = self.c_temp2 + assert_equal(tsi.get_lib_dirs(), [self._dir2]) + assert_equal(tsi.get_libraries(), [self._lib2]) + # Now from rpath and not runtime_library_dirs + assert_equal(tsi.get_runtime_lib_dirs(key='rpath'), [self._dir2]) + extra = tsi.calc_extra_info() + assert_equal(extra['extra_link_args'], ['-Wl,-rpath=' + self._lib2]) + + def test_duplicate_options(self): + # Ensure that duplicates are raising an AliasedOptionError + tsi = self.c_dup_options + assert_raises(AliasedOptionError, tsi.get_option_single, "mylib_libs", "libraries") + assert_equal(tsi.get_libs("mylib_libs", [self._lib1]), [self._lib1]) + assert_equal(tsi.get_libs("libraries", [self._lib2]), [self._lib2]) + + @pytest.mark.skipif(not HAVE_COMPILER, reason="Missing compiler") + def test_compile1(self): + # Compile source and link the first source + c = customized_ccompiler() + previousDir = os.getcwd() + try: + # Change directory to not screw up directories + os.chdir(self._dir1) + c.compile([os.path.basename(self._src1)], output_dir=self._dir1) + # Ensure that the object exists + assert_(os.path.isfile(self._src1.replace('.c', '.o')) or + os.path.isfile(self._src1.replace('.c', '.obj'))) + finally: + os.chdir(previousDir) + + @pytest.mark.skipif(not HAVE_COMPILER, reason="Missing compiler") + @pytest.mark.skipif('msvc' in repr(ccompiler.new_compiler()), + reason="Fails with MSVC compiler ") + def test_compile2(self): + # Compile source and link the second source + tsi = self.c_temp2 + c = customized_ccompiler() + extra_link_args = tsi.calc_extra_info()['extra_link_args'] + previousDir = os.getcwd() + try: + # Change directory to not screw up directories + os.chdir(self._dir2) + c.compile([os.path.basename(self._src2)], output_dir=self._dir2, + extra_postargs=extra_link_args) + # Ensure that the object exists + assert_(os.path.isfile(self._src2.replace('.c', '.o'))) + finally: + os.chdir(previousDir) + + HAS_MKL = "mkl_rt" in mkl_info().calc_libraries_info().get("libraries", []) + + @pytest.mark.xfail(HAS_MKL, reason=("`[DEFAULT]` override doesn't work if " + "numpy is built with MKL support")) + def test_overrides(self): + previousDir = os.getcwd() + cfg = os.path.join(self._dir1, 'site.cfg') + shutil.copy(self._sitecfg, cfg) + try: + os.chdir(self._dir1) + # Check that the '[ALL]' section does not override + # missing values from other sections + info = mkl_info() + lib_dirs = info.cp['ALL']['library_dirs'].split(os.pathsep) + assert info.get_lib_dirs() != lib_dirs + + # But if we copy the values to a '[mkl]' section the value + # is correct + with open(cfg, 'r') as fid: + mkl = fid.read().replace('[ALL]', '[mkl]', 1) + with open(cfg, 'w') as fid: + fid.write(mkl) + info = mkl_info() + assert info.get_lib_dirs() == lib_dirs + + # Also, the values will be taken from a section named '[DEFAULT]' + with open(cfg, 'r') as fid: + dflt = fid.read().replace('[mkl]', '[DEFAULT]', 1) + with open(cfg, 'w') as fid: + fid.write(dflt) + info = mkl_info() + assert info.get_lib_dirs() == lib_dirs + finally: + os.chdir(previousDir) + + +def test_distutils_parse_env_order(monkeypatch): + from numpy.distutils.system_info import _parse_env_order + env = 'NPY_TESTS_DISTUTILS_PARSE_ENV_ORDER' + + base_order = list('abcdef') + + monkeypatch.setenv(env, 'b,i,e,f') + order, unknown = _parse_env_order(base_order, env) + assert len(order) == 3 + assert order == list('bef') + assert len(unknown) == 1 + + # For when LAPACK/BLAS optimization is disabled + monkeypatch.setenv(env, '') + order, unknown = _parse_env_order(base_order, env) + assert len(order) == 0 + assert len(unknown) == 0 + + for prefix in '^!': + monkeypatch.setenv(env, f'{prefix}b,i,e') + order, unknown = _parse_env_order(base_order, env) + assert len(order) == 4 + assert order == list('acdf') + assert len(unknown) == 1 + + with pytest.raises(ValueError): + monkeypatch.setenv(env, 'b,^e,i') + _parse_env_order(base_order, env) + + with pytest.raises(ValueError): + monkeypatch.setenv(env, '!b,^e,i') + _parse_env_order(base_order, env) diff --git a/.local/lib/python3.8/site-packages/numpy/distutils/unixccompiler.py b/.local/lib/python3.8/site-packages/numpy/distutils/unixccompiler.py new file mode 100644 index 0000000..4884960 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/distutils/unixccompiler.py @@ -0,0 +1,141 @@ +""" +unixccompiler - can handle very long argument lists for ar. + +""" +import os +import sys +import subprocess +import shlex + +from distutils.errors import CompileError, DistutilsExecError, LibError +from distutils.unixccompiler import UnixCCompiler +from numpy.distutils.ccompiler import replace_method +from numpy.distutils.misc_util import _commandline_dep_string +from numpy.distutils import log + +# Note that UnixCCompiler._compile appeared in Python 2.3 +def UnixCCompiler__compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts): + """Compile a single source files with a Unix-style compiler.""" + # HP ad-hoc fix, see ticket 1383 + ccomp = self.compiler_so + if ccomp[0] == 'aCC': + # remove flags that will trigger ANSI-C mode for aCC + if '-Ae' in ccomp: + ccomp.remove('-Ae') + if '-Aa' in ccomp: + ccomp.remove('-Aa') + # add flags for (almost) sane C++ handling + ccomp += ['-AA'] + self.compiler_so = ccomp + # ensure OPT environment variable is read + if 'OPT' in os.environ: + # XXX who uses this? + from sysconfig import get_config_vars + opt = shlex.join(shlex.split(os.environ['OPT'])) + gcv_opt = shlex.join(shlex.split(get_config_vars('OPT')[0])) + ccomp_s = shlex.join(self.compiler_so) + if opt not in ccomp_s: + ccomp_s = ccomp_s.replace(gcv_opt, opt) + self.compiler_so = shlex.split(ccomp_s) + llink_s = shlex.join(self.linker_so) + if opt not in llink_s: + self.linker_so = self.linker_so + shlex.split(opt) + + display = '%s: %s' % (os.path.basename(self.compiler_so[0]), src) + + # gcc style automatic dependencies, outputs a makefile (-MF) that lists + # all headers needed by a c file as a side effect of compilation (-MMD) + if getattr(self, '_auto_depends', False): + deps = ['-MMD', '-MF', obj + '.d'] + else: + deps = [] + + try: + self.spawn(self.compiler_so + cc_args + [src, '-o', obj] + deps + + extra_postargs, display = display) + except DistutilsExecError as e: + msg = str(e) + raise CompileError(msg) from None + + # add commandline flags to dependency file + if deps: + # After running the compiler, the file created will be in EBCDIC + # but will not be tagged as such. This tags it so the file does not + # have multiple different encodings being written to it + if sys.platform == 'zos': + subprocess.check_output(['chtag', '-tc', 'IBM1047', obj + '.d']) + with open(obj + '.d', 'a') as f: + f.write(_commandline_dep_string(cc_args, extra_postargs, pp_opts)) + +replace_method(UnixCCompiler, '_compile', UnixCCompiler__compile) + + +def UnixCCompiler_create_static_lib(self, objects, output_libname, + output_dir=None, debug=0, target_lang=None): + """ + Build a static library in a separate sub-process. + + Parameters + ---------- + objects : list or tuple of str + List of paths to object files used to build the static library. + output_libname : str + The library name as an absolute or relative (if `output_dir` is used) + path. + output_dir : str, optional + The path to the output directory. Default is None, in which case + the ``output_dir`` attribute of the UnixCCompiler instance. + debug : bool, optional + This parameter is not used. + target_lang : str, optional + This parameter is not used. + + Returns + ------- + None + + """ + objects, output_dir = self._fix_object_args(objects, output_dir) + + output_filename = \ + self.library_filename(output_libname, output_dir=output_dir) + + if self._need_link(objects, output_filename): + try: + # previous .a may be screwed up; best to remove it first + # and recreate. + # Also, ar on OS X doesn't handle updating universal archives + os.unlink(output_filename) + except OSError: + pass + self.mkpath(os.path.dirname(output_filename)) + tmp_objects = objects + self.objects + while tmp_objects: + objects = tmp_objects[:50] + tmp_objects = tmp_objects[50:] + display = '%s: adding %d object files to %s' % ( + os.path.basename(self.archiver[0]), + len(objects), output_filename) + self.spawn(self.archiver + [output_filename] + objects, + display = display) + + # Not many Unices required ranlib anymore -- SunOS 4.x is, I + # think the only major Unix that does. Maybe we need some + # platform intelligence here to skip ranlib if it's not + # needed -- or maybe Python's configure script took care of + # it for us, hence the check for leading colon. + if self.ranlib: + display = '%s:@ %s' % (os.path.basename(self.ranlib[0]), + output_filename) + try: + self.spawn(self.ranlib + [output_filename], + display = display) + except DistutilsExecError as e: + msg = str(e) + raise LibError(msg) from None + else: + log.debug("skipping %s (up-to-date)", output_filename) + return + +replace_method(UnixCCompiler, 'create_static_lib', + UnixCCompiler_create_static_lib) diff --git a/.local/lib/python3.8/site-packages/numpy/doc/__init__.py b/.local/lib/python3.8/site-packages/numpy/doc/__init__.py new file mode 100644 index 0000000..8a944fe --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/doc/__init__.py @@ -0,0 +1,26 @@ +import os + +ref_dir = os.path.join(os.path.dirname(__file__)) + +__all__ = sorted(f[:-3] for f in os.listdir(ref_dir) if f.endswith('.py') and + not f.startswith('__')) + +for f in __all__: + __import__(__name__ + '.' + f) + +del f, ref_dir + +__doc__ = """\ +Topical documentation +===================== + +The following topics are available: +%s + +You can view them by + +>>> help(np.doc.TOPIC) #doctest: +SKIP + +""" % '\n- '.join([''] + __all__) + +__all__.extend(['__doc__']) diff --git a/.local/lib/python3.8/site-packages/numpy/doc/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/doc/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..2d0491c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/doc/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/doc/__pycache__/constants.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/doc/__pycache__/constants.cpython-38.pyc new file mode 100644 index 0000000..5ca2798 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/doc/__pycache__/constants.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/doc/__pycache__/ufuncs.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/doc/__pycache__/ufuncs.cpython-38.pyc new file mode 100644 index 0000000..72d427b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/doc/__pycache__/ufuncs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/doc/constants.py b/.local/lib/python3.8/site-packages/numpy/doc/constants.py new file mode 100644 index 0000000..4db5c63 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/doc/constants.py @@ -0,0 +1,412 @@ +""" +========= +Constants +========= + +.. currentmodule:: numpy + +NumPy includes several constants: + +%(constant_list)s +""" +# +# Note: the docstring is autogenerated. +# +import re +import textwrap + +# Maintain same format as in numpy.add_newdocs +constants = [] +def add_newdoc(module, name, doc): + constants.append((name, doc)) + +add_newdoc('numpy', 'pi', + """ + ``pi = 3.1415926535897932384626433...`` + + References + ---------- + https://en.wikipedia.org/wiki/Pi + + """) + +add_newdoc('numpy', 'e', + """ + Euler's constant, base of natural logarithms, Napier's constant. + + ``e = 2.71828182845904523536028747135266249775724709369995...`` + + See Also + -------- + exp : Exponential function + log : Natural logarithm + + References + ---------- + https://en.wikipedia.org/wiki/E_%28mathematical_constant%29 + + """) + +add_newdoc('numpy', 'euler_gamma', + """ + ``γ = 0.5772156649015328606065120900824024310421...`` + + References + ---------- + https://en.wikipedia.org/wiki/Euler-Mascheroni_constant + + """) + +add_newdoc('numpy', 'inf', + """ + IEEE 754 floating point representation of (positive) infinity. + + Returns + ------- + y : float + A floating point representation of positive infinity. + + See Also + -------- + isinf : Shows which elements are positive or negative infinity + + isposinf : Shows which elements are positive infinity + + isneginf : Shows which elements are negative infinity + + isnan : Shows which elements are Not a Number + + isfinite : Shows which elements are finite (not one of Not a Number, + positive infinity and negative infinity) + + Notes + ----- + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). This means that Not a Number is not equivalent to infinity. + Also that positive infinity is not equivalent to negative infinity. But + infinity is equivalent to positive infinity. + + `Inf`, `Infinity`, `PINF` and `infty` are aliases for `inf`. + + Examples + -------- + >>> np.inf + inf + >>> np.array([1]) / 0. + array([ Inf]) + + """) + +add_newdoc('numpy', 'nan', + """ + IEEE 754 floating point representation of Not a Number (NaN). + + Returns + ------- + y : A floating point representation of Not a Number. + + See Also + -------- + isnan : Shows which elements are Not a Number. + + isfinite : Shows which elements are finite (not one of + Not a Number, positive infinity and negative infinity) + + Notes + ----- + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). This means that Not a Number is not equivalent to infinity. + + `NaN` and `NAN` are aliases of `nan`. + + Examples + -------- + >>> np.nan + nan + >>> np.log(-1) + nan + >>> np.log([-1, 1, 2]) + array([ NaN, 0. , 0.69314718]) + + """) + +add_newdoc('numpy', 'newaxis', + """ + A convenient alias for None, useful for indexing arrays. + + Examples + -------- + >>> newaxis is None + True + >>> x = np.arange(3) + >>> x + array([0, 1, 2]) + >>> x[:, newaxis] + array([[0], + [1], + [2]]) + >>> x[:, newaxis, newaxis] + array([[[0]], + [[1]], + [[2]]]) + >>> x[:, newaxis] * x + array([[0, 0, 0], + [0, 1, 2], + [0, 2, 4]]) + + Outer product, same as ``outer(x, y)``: + + >>> y = np.arange(3, 6) + >>> x[:, newaxis] * y + array([[ 0, 0, 0], + [ 3, 4, 5], + [ 6, 8, 10]]) + + ``x[newaxis, :]`` is equivalent to ``x[newaxis]`` and ``x[None]``: + + >>> x[newaxis, :].shape + (1, 3) + >>> x[newaxis].shape + (1, 3) + >>> x[None].shape + (1, 3) + >>> x[:, newaxis].shape + (3, 1) + + """) + +add_newdoc('numpy', 'NZERO', + """ + IEEE 754 floating point representation of negative zero. + + Returns + ------- + y : float + A floating point representation of negative zero. + + See Also + -------- + PZERO : Defines positive zero. + + isinf : Shows which elements are positive or negative infinity. + + isposinf : Shows which elements are positive infinity. + + isneginf : Shows which elements are negative infinity. + + isnan : Shows which elements are Not a Number. + + isfinite : Shows which elements are finite - not one of + Not a Number, positive infinity and negative infinity. + + Notes + ----- + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). Negative zero is considered to be a finite number. + + Examples + -------- + >>> np.NZERO + -0.0 + >>> np.PZERO + 0.0 + + >>> np.isfinite([np.NZERO]) + array([ True]) + >>> np.isnan([np.NZERO]) + array([False]) + >>> np.isinf([np.NZERO]) + array([False]) + + """) + +add_newdoc('numpy', 'PZERO', + """ + IEEE 754 floating point representation of positive zero. + + Returns + ------- + y : float + A floating point representation of positive zero. + + See Also + -------- + NZERO : Defines negative zero. + + isinf : Shows which elements are positive or negative infinity. + + isposinf : Shows which elements are positive infinity. + + isneginf : Shows which elements are negative infinity. + + isnan : Shows which elements are Not a Number. + + isfinite : Shows which elements are finite - not one of + Not a Number, positive infinity and negative infinity. + + Notes + ----- + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). Positive zero is considered to be a finite number. + + Examples + -------- + >>> np.PZERO + 0.0 + >>> np.NZERO + -0.0 + + >>> np.isfinite([np.PZERO]) + array([ True]) + >>> np.isnan([np.PZERO]) + array([False]) + >>> np.isinf([np.PZERO]) + array([False]) + + """) + +add_newdoc('numpy', 'NAN', + """ + IEEE 754 floating point representation of Not a Number (NaN). + + `NaN` and `NAN` are equivalent definitions of `nan`. Please use + `nan` instead of `NAN`. + + See Also + -------- + nan + + """) + +add_newdoc('numpy', 'NaN', + """ + IEEE 754 floating point representation of Not a Number (NaN). + + `NaN` and `NAN` are equivalent definitions of `nan`. Please use + `nan` instead of `NaN`. + + See Also + -------- + nan + + """) + +add_newdoc('numpy', 'NINF', + """ + IEEE 754 floating point representation of negative infinity. + + Returns + ------- + y : float + A floating point representation of negative infinity. + + See Also + -------- + isinf : Shows which elements are positive or negative infinity + + isposinf : Shows which elements are positive infinity + + isneginf : Shows which elements are negative infinity + + isnan : Shows which elements are Not a Number + + isfinite : Shows which elements are finite (not one of Not a Number, + positive infinity and negative infinity) + + Notes + ----- + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). This means that Not a Number is not equivalent to infinity. + Also that positive infinity is not equivalent to negative infinity. But + infinity is equivalent to positive infinity. + + Examples + -------- + >>> np.NINF + -inf + >>> np.log(0) + -inf + + """) + +add_newdoc('numpy', 'PINF', + """ + IEEE 754 floating point representation of (positive) infinity. + + Use `inf` because `Inf`, `Infinity`, `PINF` and `infty` are aliases for + `inf`. For more details, see `inf`. + + See Also + -------- + inf + + """) + +add_newdoc('numpy', 'infty', + """ + IEEE 754 floating point representation of (positive) infinity. + + Use `inf` because `Inf`, `Infinity`, `PINF` and `infty` are aliases for + `inf`. For more details, see `inf`. + + See Also + -------- + inf + + """) + +add_newdoc('numpy', 'Inf', + """ + IEEE 754 floating point representation of (positive) infinity. + + Use `inf` because `Inf`, `Infinity`, `PINF` and `infty` are aliases for + `inf`. For more details, see `inf`. + + See Also + -------- + inf + + """) + +add_newdoc('numpy', 'Infinity', + """ + IEEE 754 floating point representation of (positive) infinity. + + Use `inf` because `Inf`, `Infinity`, `PINF` and `infty` are aliases for + `inf`. For more details, see `inf`. + + See Also + -------- + inf + + """) + + +if __doc__: + constants_str = [] + constants.sort() + for name, doc in constants: + s = textwrap.dedent(doc).replace("\n", "\n ") + + # Replace sections by rubrics + lines = s.split("\n") + new_lines = [] + for line in lines: + m = re.match(r'^(\s+)[-=]+\s*$', line) + if m and new_lines: + prev = textwrap.dedent(new_lines.pop()) + new_lines.append('%s.. rubric:: %s' % (m.group(1), prev)) + new_lines.append('') + else: + new_lines.append(line) + s = "\n".join(new_lines) + + # Done. + constants_str.append(""".. data:: %s\n %s""" % (name, s)) + constants_str = "\n".join(constants_str) + + __doc__ = __doc__ % dict(constant_list=constants_str) + del constants_str, name, doc + del line, lines, new_lines, m, s, prev + +del constants, add_newdoc diff --git a/.local/lib/python3.8/site-packages/numpy/doc/ufuncs.py b/.local/lib/python3.8/site-packages/numpy/doc/ufuncs.py new file mode 100644 index 0000000..eecc150 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/doc/ufuncs.py @@ -0,0 +1,137 @@ +""" +=================== +Universal Functions +=================== + +Ufuncs are, generally speaking, mathematical functions or operations that are +applied element-by-element to the contents of an array. That is, the result +in each output array element only depends on the value in the corresponding +input array (or arrays) and on no other array elements. NumPy comes with a +large suite of ufuncs, and scipy extends that suite substantially. The simplest +example is the addition operator: :: + + >>> np.array([0,2,3,4]) + np.array([1,1,-1,2]) + array([1, 3, 2, 6]) + +The ufunc module lists all the available ufuncs in numpy. Documentation on +the specific ufuncs may be found in those modules. This documentation is +intended to address the more general aspects of ufuncs common to most of +them. All of the ufuncs that make use of Python operators (e.g., +, -, etc.) +have equivalent functions defined (e.g. add() for +) + +Type coercion +============= + +What happens when a binary operator (e.g., +,-,\\*,/, etc) deals with arrays of +two different types? What is the type of the result? Typically, the result is +the higher of the two types. For example: :: + + float32 + float64 -> float64 + int8 + int32 -> int32 + int16 + float32 -> float32 + float32 + complex64 -> complex64 + +There are some less obvious cases generally involving mixes of types +(e.g. uints, ints and floats) where equal bit sizes for each are not +capable of saving all the information in a different type of equivalent +bit size. Some examples are int32 vs float32 or uint32 vs int32. +Generally, the result is the higher type of larger size than both +(if available). So: :: + + int32 + float32 -> float64 + uint32 + int32 -> int64 + +Finally, the type coercion behavior when expressions involve Python +scalars is different than that seen for arrays. Since Python has a +limited number of types, combining a Python int with a dtype=np.int8 +array does not coerce to the higher type but instead, the type of the +array prevails. So the rules for Python scalars combined with arrays is +that the result will be that of the array equivalent the Python scalar +if the Python scalar is of a higher 'kind' than the array (e.g., float +vs. int), otherwise the resultant type will be that of the array. +For example: :: + + Python int + int8 -> int8 + Python float + int8 -> float64 + +ufunc methods +============= + +Binary ufuncs support 4 methods. + +**.reduce(arr)** applies the binary operator to elements of the array in + sequence. For example: :: + + >>> np.add.reduce(np.arange(10)) # adds all elements of array + 45 + +For multidimensional arrays, the first dimension is reduced by default: :: + + >>> np.add.reduce(np.arange(10).reshape(2,5)) + array([ 5, 7, 9, 11, 13]) + +The axis keyword can be used to specify different axes to reduce: :: + + >>> np.add.reduce(np.arange(10).reshape(2,5),axis=1) + array([10, 35]) + +**.accumulate(arr)** applies the binary operator and generates an an +equivalently shaped array that includes the accumulated amount for each +element of the array. A couple examples: :: + + >>> np.add.accumulate(np.arange(10)) + array([ 0, 1, 3, 6, 10, 15, 21, 28, 36, 45]) + >>> np.multiply.accumulate(np.arange(1,9)) + array([ 1, 2, 6, 24, 120, 720, 5040, 40320]) + +The behavior for multidimensional arrays is the same as for .reduce(), +as is the use of the axis keyword). + +**.reduceat(arr,indices)** allows one to apply reduce to selected parts + of an array. It is a difficult method to understand. See the documentation + at: + +**.outer(arr1,arr2)** generates an outer operation on the two arrays arr1 and + arr2. It will work on multidimensional arrays (the shape of the result is + the concatenation of the two input shapes.: :: + + >>> np.multiply.outer(np.arange(3),np.arange(4)) + array([[0, 0, 0, 0], + [0, 1, 2, 3], + [0, 2, 4, 6]]) + +Output arguments +================ + +All ufuncs accept an optional output array. The array must be of the expected +output shape. Beware that if the type of the output array is of a different +(and lower) type than the output result, the results may be silently truncated +or otherwise corrupted in the downcast to the lower type. This usage is useful +when one wants to avoid creating large temporary arrays and instead allows one +to reuse the same array memory repeatedly (at the expense of not being able to +use more convenient operator notation in expressions). Note that when the +output argument is used, the ufunc still returns a reference to the result. + + >>> x = np.arange(2) + >>> np.add(np.arange(2),np.arange(2.),x) + array([0, 2]) + >>> x + array([0, 2]) + +and & or as ufuncs +================== + +Invariably people try to use the python 'and' and 'or' as logical operators +(and quite understandably). But these operators do not behave as normal +operators since Python treats these quite differently. They cannot be +overloaded with array equivalents. Thus using 'and' or 'or' with an array +results in an error. There are two alternatives: + + 1) use the ufunc functions logical_and() and logical_or(). + 2) use the bitwise operators & and \\|. The drawback of these is that if + the arguments to these operators are not boolean arrays, the result is + likely incorrect. On the other hand, most usages of logical_and and + logical_or are with boolean arrays. As long as one is careful, this is + a convenient way to apply these operators. + +""" diff --git a/.local/lib/python3.8/site-packages/numpy/dual.py b/.local/lib/python3.8/site-packages/numpy/dual.py new file mode 100644 index 0000000..eb7e61a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/dual.py @@ -0,0 +1,83 @@ +""" +.. deprecated:: 1.20 + +*This module is deprecated. Instead of importing functions from* +``numpy.dual``, *the functions should be imported directly from NumPy +or SciPy*. + +Aliases for functions which may be accelerated by SciPy. + +SciPy_ can be built to use accelerated or otherwise improved libraries +for FFTs, linear algebra, and special functions. This module allows +developers to transparently support these accelerated functions when +SciPy is available but still support users who have only installed +NumPy. + +.. _SciPy : https://www.scipy.org + +""" +import warnings + + +warnings.warn('The module numpy.dual is deprecated. Instead of using dual, ' + 'use the functions directly from numpy or scipy.', + category=DeprecationWarning, + stacklevel=2) + +# This module should be used for functions both in numpy and scipy if +# you want to use the numpy version if available but the scipy version +# otherwise. +# Usage --- from numpy.dual import fft, inv + +__all__ = ['fft', 'ifft', 'fftn', 'ifftn', 'fft2', 'ifft2', + 'norm', 'inv', 'svd', 'solve', 'det', 'eig', 'eigvals', + 'eigh', 'eigvalsh', 'lstsq', 'pinv', 'cholesky', 'i0'] + +import numpy.linalg as linpkg +import numpy.fft as fftpkg +from numpy.lib import i0 +import sys + + +fft = fftpkg.fft +ifft = fftpkg.ifft +fftn = fftpkg.fftn +ifftn = fftpkg.ifftn +fft2 = fftpkg.fft2 +ifft2 = fftpkg.ifft2 + +norm = linpkg.norm +inv = linpkg.inv +svd = linpkg.svd +solve = linpkg.solve +det = linpkg.det +eig = linpkg.eig +eigvals = linpkg.eigvals +eigh = linpkg.eigh +eigvalsh = linpkg.eigvalsh +lstsq = linpkg.lstsq +pinv = linpkg.pinv +cholesky = linpkg.cholesky + +_restore_dict = {} + +def register_func(name, func): + if name not in __all__: + raise ValueError("{} not a dual function.".format(name)) + f = sys._getframe(0).f_globals + _restore_dict[name] = f[name] + f[name] = func + +def restore_func(name): + if name not in __all__: + raise ValueError("{} not a dual function.".format(name)) + try: + val = _restore_dict[name] + except KeyError: + return + else: + sys._getframe(0).f_globals[name] = val + +def restore_all(): + for name in _restore_dict.keys(): + restore_func(name) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__init__.py b/.local/lib/python3.8/site-packages/numpy/f2py/__init__.py new file mode 100644 index 0000000..f147f1b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/__init__.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +"""Fortran to Python Interface Generator. + +""" +__all__ = ['run_main', 'compile', 'get_include'] + +import sys +import subprocess +import os + +from . import f2py2e +from . import diagnose + +run_main = f2py2e.run_main +main = f2py2e.main + + +def compile(source, + modulename='untitled', + extra_args='', + verbose=True, + source_fn=None, + extension='.f', + full_output=False + ): + """ + Build extension module from a Fortran 77 source string with f2py. + + Parameters + ---------- + source : str or bytes + Fortran source of module / subroutine to compile + + .. versionchanged:: 1.16.0 + Accept str as well as bytes + + modulename : str, optional + The name of the compiled python module + extra_args : str or list, optional + Additional parameters passed to f2py + + .. versionchanged:: 1.16.0 + A list of args may also be provided. + + verbose : bool, optional + Print f2py output to screen + source_fn : str, optional + Name of the file where the fortran source is written. + The default is to use a temporary file with the extension + provided by the `extension` parameter + extension : {'.f', '.f90'}, optional + Filename extension if `source_fn` is not provided. + The extension tells which fortran standard is used. + The default is `.f`, which implies F77 standard. + + .. versionadded:: 1.11.0 + + full_output : bool, optional + If True, return a `subprocess.CompletedProcess` containing + the stdout and stderr of the compile process, instead of just + the status code. + + .. versionadded:: 1.20.0 + + + Returns + ------- + result : int or `subprocess.CompletedProcess` + 0 on success, or a `subprocess.CompletedProcess` if + ``full_output=True`` + + Examples + -------- + .. literalinclude:: ../../source/f2py/code/results/compile_session.dat + :language: python + + """ + import tempfile + import shlex + + if source_fn is None: + f, fname = tempfile.mkstemp(suffix=extension) + # f is a file descriptor so need to close it + # carefully -- not with .close() directly + os.close(f) + else: + fname = source_fn + + if not isinstance(source, str): + source = str(source, 'utf-8') + try: + with open(fname, 'w') as f: + f.write(source) + + args = ['-c', '-m', modulename, f.name] + + if isinstance(extra_args, str): + is_posix = (os.name == 'posix') + extra_args = shlex.split(extra_args, posix=is_posix) + + args.extend(extra_args) + + c = [sys.executable, + '-c', + 'import numpy.f2py as f2py2e;f2py2e.main()'] + args + try: + cp = subprocess.run(c, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + except OSError: + # preserve historic status code used by exec_command() + cp = subprocess.CompletedProcess(c, 127, stdout=b'', stderr=b'') + else: + if verbose: + print(cp.stdout.decode()) + finally: + if source_fn is None: + os.remove(fname) + + if full_output: + return cp + else: + return cp.returncode + + +def get_include(): + """ + Return the directory that contains the fortranobject.c and .h files. + + .. note:: + + This function is not needed when building an extension with + `numpy.distutils` directly from ``.f`` and/or ``.pyf`` files + in one go. + + Python extension modules built with f2py-generated code need to use + ``fortranobject.c`` as a source file, and include the ``fortranobject.h`` + header. This function can be used to obtain the directory containing + both of these files. + + Returns + ------- + include_path : str + Absolute path to the directory containing ``fortranobject.c`` and + ``fortranobject.h``. + + Notes + ----- + .. versionadded:: 1.22.0 + + Unless the build system you are using has specific support for f2py, + building a Python extension using a ``.pyf`` signature file is a two-step + process. For a module ``mymod``: + + - Step 1: run ``python -m numpy.f2py mymod.pyf --quiet``. This + generates ``_mymodmodule.c`` and (if needed) + ``_fblas-f2pywrappers.f`` files next to ``mymod.pyf``. + - Step 2: build your Python extension module. This requires the + following source files: + + - ``_mymodmodule.c`` + - ``_mymod-f2pywrappers.f`` (if it was generated in step 1) + - ``fortranobject.c`` + + See Also + -------- + numpy.get_include : function that returns the numpy include directory + + """ + return os.path.join(os.path.dirname(__file__), 'src') + + +if sys.version_info[:2] >= (3, 7): + # module level getattr is only supported in 3.7 onwards + # https://www.python.org/dev/peps/pep-0562/ + def __getattr__(attr): + + # Avoid importing things that aren't needed for building + # which might import the main numpy module + if attr == "f2py_testing": + import numpy.f2py.f2py_testing as f2py_testing + return f2py_testing + + elif attr == "test": + from numpy._pytesttester import PytestTester + test = PytestTester(__name__) + return test + + else: + raise AttributeError("module {!r} has no attribute " + "{!r}".format(__name__, attr)) + + def __dir__(): + return list(globals().keys() | {"f2py_testing", "test"}) + +else: + from . import f2py_testing + + from numpy._pytesttester import PytestTester + test = PytestTester(__name__) + del PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__init__.pyi b/.local/lib/python3.8/site-packages/numpy/f2py/__init__.pyi new file mode 100644 index 0000000..e52e12b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/__init__.pyi @@ -0,0 +1,42 @@ +import os +import subprocess +from typing import Literal as L, Any, List, Iterable, Dict, overload, TypedDict + +from numpy._pytesttester import PytestTester + +class _F2PyDictBase(TypedDict): + csrc: List[str] + h: List[str] + +class _F2PyDict(_F2PyDictBase, total=False): + fsrc: List[str] + ltx: List[str] + +__all__: List[str] +__path__: List[str] +test: PytestTester + +def run_main(comline_list: Iterable[str]) -> Dict[str, _F2PyDict]: ... + +@overload +def compile( # type: ignore[misc] + source: str | bytes, + modulename: str = ..., + extra_args: str | List[str] = ..., + verbose: bool = ..., + source_fn: None | str | bytes | os.PathLike[Any] = ..., + extension: L[".f", ".f90"] = ..., + full_output: L[False] = ..., +) -> int: ... +@overload +def compile( + source: str | bytes, + modulename: str = ..., + extra_args: str | List[str] = ..., + verbose: bool = ..., + source_fn: None | str | bytes | os.PathLike[Any] = ..., + extension: L[".f", ".f90"] = ..., + full_output: L[True] = ..., +) -> subprocess.CompletedProcess[bytes]: ... + +def get_include() -> str: ... diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__main__.py b/.local/lib/python3.8/site-packages/numpy/f2py/__main__.py new file mode 100644 index 0000000..936a753 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/__main__.py @@ -0,0 +1,5 @@ +# See: +# https://web.archive.org/web/20140822061353/http://cens.ioc.ee/projects/f2py2e +from numpy.f2py.f2py2e import main + +main() diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..205e869 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/__main__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/__main__.cpython-38.pyc new file mode 100644 index 0000000..d010924 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/__main__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/__version__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/__version__.cpython-38.pyc new file mode 100644 index 0000000..194684c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/__version__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/auxfuncs.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/auxfuncs.cpython-38.pyc new file mode 100644 index 0000000..c507a41 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/auxfuncs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/capi_maps.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/capi_maps.cpython-38.pyc new file mode 100644 index 0000000..27d6ea8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/capi_maps.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/cb_rules.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/cb_rules.cpython-38.pyc new file mode 100644 index 0000000..fe0cb34 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/cb_rules.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/cfuncs.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/cfuncs.cpython-38.pyc new file mode 100644 index 0000000..22cec42 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/cfuncs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/common_rules.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/common_rules.cpython-38.pyc new file mode 100644 index 0000000..c68a438 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/common_rules.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/crackfortran.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/crackfortran.cpython-38.pyc new file mode 100644 index 0000000..132e2ee Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/crackfortran.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/diagnose.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/diagnose.cpython-38.pyc new file mode 100644 index 0000000..a1df8e4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/diagnose.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/f2py2e.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/f2py2e.cpython-38.pyc new file mode 100644 index 0000000..da947d0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/f2py2e.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/f2py_testing.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/f2py_testing.cpython-38.pyc new file mode 100644 index 0000000..2b37768 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/f2py_testing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/f90mod_rules.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/f90mod_rules.cpython-38.pyc new file mode 100644 index 0000000..f82d8e4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/f90mod_rules.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/func2subr.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/func2subr.cpython-38.pyc new file mode 100644 index 0000000..029682d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/func2subr.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/rules.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/rules.cpython-38.pyc new file mode 100644 index 0000000..8020dc4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/rules.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..838d0e8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/symbolic.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/symbolic.cpython-38.pyc new file mode 100644 index 0000000..a9b590e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/symbolic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/use_rules.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/use_rules.cpython-38.pyc new file mode 100644 index 0000000..07f3408 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/__pycache__/use_rules.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/__version__.py b/.local/lib/python3.8/site-packages/numpy/f2py/__version__.py new file mode 100644 index 0000000..e20d7c1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/__version__.py @@ -0,0 +1 @@ +from numpy.version import version diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/auxfuncs.py b/.local/lib/python3.8/site-packages/numpy/f2py/auxfuncs.py new file mode 100644 index 0000000..c8f2067 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/auxfuncs.py @@ -0,0 +1,857 @@ +#!/usr/bin/env python3 +""" + +Auxiliary functions for f2py2e. + +Copyright 1999,2000 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy (BSD style) LICENSE. + + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +$Date: 2005/07/24 19:01:55 $ +Pearu Peterson + +""" +import pprint +import sys +import types +from functools import reduce + +from . import __version__ +from . import cfuncs + +__all__ = [ + 'applyrules', 'debugcapi', 'dictappend', 'errmess', 'gentitle', + 'getargs2', 'getcallprotoargument', 'getcallstatement', + 'getfortranname', 'getpymethoddef', 'getrestdoc', 'getusercode', + 'getusercode1', 'hasbody', 'hascallstatement', 'hascommon', + 'hasexternals', 'hasinitvalue', 'hasnote', 'hasresultnote', + 'isallocatable', 'isarray', 'isarrayofstrings', 'iscomplex', + 'iscomplexarray', 'iscomplexfunction', 'iscomplexfunction_warn', + 'isdouble', 'isdummyroutine', 'isexternal', 'isfunction', + 'isfunction_wrap', 'isint1array', 'isinteger', 'isintent_aux', + 'isintent_c', 'isintent_callback', 'isintent_copy', 'isintent_dict', + 'isintent_hide', 'isintent_in', 'isintent_inout', 'isintent_inplace', + 'isintent_nothide', 'isintent_out', 'isintent_overwrite', 'islogical', + 'islogicalfunction', 'islong_complex', 'islong_double', + 'islong_doublefunction', 'islong_long', 'islong_longfunction', + 'ismodule', 'ismoduleroutine', 'isoptional', 'isprivate', 'isrequired', + 'isroutine', 'isscalar', 'issigned_long_longarray', 'isstring', + 'isstringarray', 'isstringfunction', 'issubroutine', + 'issubroutine_wrap', 'isthreadsafe', 'isunsigned', 'isunsigned_char', + 'isunsigned_chararray', 'isunsigned_long_long', + 'isunsigned_long_longarray', 'isunsigned_short', + 'isunsigned_shortarray', 'l_and', 'l_not', 'l_or', 'outmess', + 'replace', 'show', 'stripcomma', 'throw_error', +] + + +f2py_version = __version__.version + + +errmess = sys.stderr.write +show = pprint.pprint + +options = {} +debugoptions = [] +wrapfuncs = 1 + + +def outmess(t): + if options.get('verbose', 1): + sys.stdout.write(t) + + +def debugcapi(var): + return 'capi' in debugoptions + + +def _isstring(var): + return 'typespec' in var and var['typespec'] == 'character' and \ + not isexternal(var) + + +def isstring(var): + return _isstring(var) and not isarray(var) + + +def ischaracter(var): + return isstring(var) and 'charselector' not in var + + +def isstringarray(var): + return isarray(var) and _isstring(var) + + +def isarrayofstrings(var): + # leaving out '*' for now so that `character*(*) a(m)` and `character + # a(m,*)` are treated differently. Luckily `character**` is illegal. + return isstringarray(var) and var['dimension'][-1] == '(*)' + + +def isarray(var): + return 'dimension' in var and not isexternal(var) + + +def isscalar(var): + return not (isarray(var) or isstring(var) or isexternal(var)) + + +def iscomplex(var): + return isscalar(var) and \ + var.get('typespec') in ['complex', 'double complex'] + + +def islogical(var): + return isscalar(var) and var.get('typespec') == 'logical' + + +def isinteger(var): + return isscalar(var) and var.get('typespec') == 'integer' + + +def isreal(var): + return isscalar(var) and var.get('typespec') == 'real' + + +def get_kind(var): + try: + return var['kindselector']['*'] + except KeyError: + try: + return var['kindselector']['kind'] + except KeyError: + pass + + +def islong_long(var): + if not isscalar(var): + return 0 + if var.get('typespec') not in ['integer', 'logical']: + return 0 + return get_kind(var) == '8' + + +def isunsigned_char(var): + if not isscalar(var): + return 0 + if var.get('typespec') != 'integer': + return 0 + return get_kind(var) == '-1' + + +def isunsigned_short(var): + if not isscalar(var): + return 0 + if var.get('typespec') != 'integer': + return 0 + return get_kind(var) == '-2' + + +def isunsigned(var): + if not isscalar(var): + return 0 + if var.get('typespec') != 'integer': + return 0 + return get_kind(var) == '-4' + + +def isunsigned_long_long(var): + if not isscalar(var): + return 0 + if var.get('typespec') != 'integer': + return 0 + return get_kind(var) == '-8' + + +def isdouble(var): + if not isscalar(var): + return 0 + if not var.get('typespec') == 'real': + return 0 + return get_kind(var) == '8' + + +def islong_double(var): + if not isscalar(var): + return 0 + if not var.get('typespec') == 'real': + return 0 + return get_kind(var) == '16' + + +def islong_complex(var): + if not iscomplex(var): + return 0 + return get_kind(var) == '32' + + +def iscomplexarray(var): + return isarray(var) and \ + var.get('typespec') in ['complex', 'double complex'] + + +def isint1array(var): + return isarray(var) and var.get('typespec') == 'integer' \ + and get_kind(var) == '1' + + +def isunsigned_chararray(var): + return isarray(var) and var.get('typespec') in ['integer', 'logical']\ + and get_kind(var) == '-1' + + +def isunsigned_shortarray(var): + return isarray(var) and var.get('typespec') in ['integer', 'logical']\ + and get_kind(var) == '-2' + + +def isunsignedarray(var): + return isarray(var) and var.get('typespec') in ['integer', 'logical']\ + and get_kind(var) == '-4' + + +def isunsigned_long_longarray(var): + return isarray(var) and var.get('typespec') in ['integer', 'logical']\ + and get_kind(var) == '-8' + + +def issigned_chararray(var): + return isarray(var) and var.get('typespec') in ['integer', 'logical']\ + and get_kind(var) == '1' + + +def issigned_shortarray(var): + return isarray(var) and var.get('typespec') in ['integer', 'logical']\ + and get_kind(var) == '2' + + +def issigned_array(var): + return isarray(var) and var.get('typespec') in ['integer', 'logical']\ + and get_kind(var) == '4' + + +def issigned_long_longarray(var): + return isarray(var) and var.get('typespec') in ['integer', 'logical']\ + and get_kind(var) == '8' + + +def isallocatable(var): + return 'attrspec' in var and 'allocatable' in var['attrspec'] + + +def ismutable(var): + return not ('dimension' not in var or isstring(var)) + + +def ismoduleroutine(rout): + return 'modulename' in rout + + +def ismodule(rout): + return 'block' in rout and 'module' == rout['block'] + + +def isfunction(rout): + return 'block' in rout and 'function' == rout['block'] + + +def isfunction_wrap(rout): + if isintent_c(rout): + return 0 + return wrapfuncs and isfunction(rout) and (not isexternal(rout)) + + +def issubroutine(rout): + return 'block' in rout and 'subroutine' == rout['block'] + + +def issubroutine_wrap(rout): + if isintent_c(rout): + return 0 + return issubroutine(rout) and hasassumedshape(rout) + + +def hasassumedshape(rout): + if rout.get('hasassumedshape'): + return True + for a in rout['args']: + for d in rout['vars'].get(a, {}).get('dimension', []): + if d == ':': + rout['hasassumedshape'] = True + return True + return False + + +def requiresf90wrapper(rout): + return ismoduleroutine(rout) or hasassumedshape(rout) + + +def isroutine(rout): + return isfunction(rout) or issubroutine(rout) + + +def islogicalfunction(rout): + if not isfunction(rout): + return 0 + if 'result' in rout: + a = rout['result'] + else: + a = rout['name'] + if a in rout['vars']: + return islogical(rout['vars'][a]) + return 0 + + +def islong_longfunction(rout): + if not isfunction(rout): + return 0 + if 'result' in rout: + a = rout['result'] + else: + a = rout['name'] + if a in rout['vars']: + return islong_long(rout['vars'][a]) + return 0 + + +def islong_doublefunction(rout): + if not isfunction(rout): + return 0 + if 'result' in rout: + a = rout['result'] + else: + a = rout['name'] + if a in rout['vars']: + return islong_double(rout['vars'][a]) + return 0 + + +def iscomplexfunction(rout): + if not isfunction(rout): + return 0 + if 'result' in rout: + a = rout['result'] + else: + a = rout['name'] + if a in rout['vars']: + return iscomplex(rout['vars'][a]) + return 0 + + +def iscomplexfunction_warn(rout): + if iscomplexfunction(rout): + outmess("""\ + ************************************************************** + Warning: code with a function returning complex value + may not work correctly with your Fortran compiler. + When using GNU gcc/g77 compilers, codes should work + correctly for callbacks with: + f2py -c -DF2PY_CB_RETURNCOMPLEX + **************************************************************\n""") + return 1 + return 0 + + +def isstringfunction(rout): + if not isfunction(rout): + return 0 + if 'result' in rout: + a = rout['result'] + else: + a = rout['name'] + if a in rout['vars']: + return isstring(rout['vars'][a]) + return 0 + + +def hasexternals(rout): + return 'externals' in rout and rout['externals'] + + +def isthreadsafe(rout): + return 'f2pyenhancements' in rout and \ + 'threadsafe' in rout['f2pyenhancements'] + + +def hasvariables(rout): + return 'vars' in rout and rout['vars'] + + +def isoptional(var): + return ('attrspec' in var and 'optional' in var['attrspec'] and + 'required' not in var['attrspec']) and isintent_nothide(var) + + +def isexternal(var): + return 'attrspec' in var and 'external' in var['attrspec'] + + +def isrequired(var): + return not isoptional(var) and isintent_nothide(var) + + +def isintent_in(var): + if 'intent' not in var: + return 1 + if 'hide' in var['intent']: + return 0 + if 'inplace' in var['intent']: + return 0 + if 'in' in var['intent']: + return 1 + if 'out' in var['intent']: + return 0 + if 'inout' in var['intent']: + return 0 + if 'outin' in var['intent']: + return 0 + return 1 + + +def isintent_inout(var): + return ('intent' in var and ('inout' in var['intent'] or + 'outin' in var['intent']) and 'in' not in var['intent'] and + 'hide' not in var['intent'] and 'inplace' not in var['intent']) + + +def isintent_out(var): + return 'out' in var.get('intent', []) + + +def isintent_hide(var): + return ('intent' in var and ('hide' in var['intent'] or + ('out' in var['intent'] and 'in' not in var['intent'] and + (not l_or(isintent_inout, isintent_inplace)(var))))) + +def isintent_nothide(var): + return not isintent_hide(var) + + +def isintent_c(var): + return 'c' in var.get('intent', []) + + +def isintent_cache(var): + return 'cache' in var.get('intent', []) + + +def isintent_copy(var): + return 'copy' in var.get('intent', []) + + +def isintent_overwrite(var): + return 'overwrite' in var.get('intent', []) + + +def isintent_callback(var): + return 'callback' in var.get('intent', []) + + +def isintent_inplace(var): + return 'inplace' in var.get('intent', []) + + +def isintent_aux(var): + return 'aux' in var.get('intent', []) + + +def isintent_aligned4(var): + return 'aligned4' in var.get('intent', []) + + +def isintent_aligned8(var): + return 'aligned8' in var.get('intent', []) + + +def isintent_aligned16(var): + return 'aligned16' in var.get('intent', []) + +isintent_dict = {isintent_in: 'INTENT_IN', isintent_inout: 'INTENT_INOUT', + isintent_out: 'INTENT_OUT', isintent_hide: 'INTENT_HIDE', + isintent_cache: 'INTENT_CACHE', + isintent_c: 'INTENT_C', isoptional: 'OPTIONAL', + isintent_inplace: 'INTENT_INPLACE', + isintent_aligned4: 'INTENT_ALIGNED4', + isintent_aligned8: 'INTENT_ALIGNED8', + isintent_aligned16: 'INTENT_ALIGNED16', + } + + +def isprivate(var): + return 'attrspec' in var and 'private' in var['attrspec'] + + +def hasinitvalue(var): + return '=' in var + + +def hasinitvalueasstring(var): + if not hasinitvalue(var): + return 0 + return var['='][0] in ['"', "'"] + + +def hasnote(var): + return 'note' in var + + +def hasresultnote(rout): + if not isfunction(rout): + return 0 + if 'result' in rout: + a = rout['result'] + else: + a = rout['name'] + if a in rout['vars']: + return hasnote(rout['vars'][a]) + return 0 + + +def hascommon(rout): + return 'common' in rout + + +def containscommon(rout): + if hascommon(rout): + return 1 + if hasbody(rout): + for b in rout['body']: + if containscommon(b): + return 1 + return 0 + + +def containsmodule(block): + if ismodule(block): + return 1 + if not hasbody(block): + return 0 + for b in block['body']: + if containsmodule(b): + return 1 + return 0 + + +def hasbody(rout): + return 'body' in rout + + +def hascallstatement(rout): + return getcallstatement(rout) is not None + + +def istrue(var): + return 1 + + +def isfalse(var): + return 0 + + +class F2PYError(Exception): + pass + + +class throw_error: + + def __init__(self, mess): + self.mess = mess + + def __call__(self, var): + mess = '\n\n var = %s\n Message: %s\n' % (var, self.mess) + raise F2PYError(mess) + + +def l_and(*f): + l, l2 = 'lambda v', [] + for i in range(len(f)): + l = '%s,f%d=f[%d]' % (l, i, i) + l2.append('f%d(v)' % (i)) + return eval('%s:%s' % (l, ' and '.join(l2))) + + +def l_or(*f): + l, l2 = 'lambda v', [] + for i in range(len(f)): + l = '%s,f%d=f[%d]' % (l, i, i) + l2.append('f%d(v)' % (i)) + return eval('%s:%s' % (l, ' or '.join(l2))) + + +def l_not(f): + return eval('lambda v,f=f:not f(v)') + + +def isdummyroutine(rout): + try: + return rout['f2pyenhancements']['fortranname'] == '' + except KeyError: + return 0 + + +def getfortranname(rout): + try: + name = rout['f2pyenhancements']['fortranname'] + if name == '': + raise KeyError + if not name: + errmess('Failed to use fortranname from %s\n' % + (rout['f2pyenhancements'])) + raise KeyError + except KeyError: + name = rout['name'] + return name + + +def getmultilineblock(rout, blockname, comment=1, counter=0): + try: + r = rout['f2pyenhancements'].get(blockname) + except KeyError: + return + if not r: + return + if counter > 0 and isinstance(r, str): + return + if isinstance(r, list): + if counter >= len(r): + return + r = r[counter] + if r[:3] == "'''": + if comment: + r = '\t/* start ' + blockname + \ + ' multiline (' + repr(counter) + ') */\n' + r[3:] + else: + r = r[3:] + if r[-3:] == "'''": + if comment: + r = r[:-3] + '\n\t/* end multiline (' + repr(counter) + ')*/' + else: + r = r[:-3] + else: + errmess("%s multiline block should end with `'''`: %s\n" + % (blockname, repr(r))) + return r + + +def getcallstatement(rout): + return getmultilineblock(rout, 'callstatement') + + +def getcallprotoargument(rout, cb_map={}): + r = getmultilineblock(rout, 'callprotoargument', comment=0) + if r: + return r + if hascallstatement(rout): + outmess( + 'warning: callstatement is defined without callprotoargument\n') + return + from .capi_maps import getctype + arg_types, arg_types2 = [], [] + if l_and(isstringfunction, l_not(isfunction_wrap))(rout): + arg_types.extend(['char*', 'size_t']) + for n in rout['args']: + var = rout['vars'][n] + if isintent_callback(var): + continue + if n in cb_map: + ctype = cb_map[n] + '_typedef' + else: + ctype = getctype(var) + if l_and(isintent_c, l_or(isscalar, iscomplex))(var): + pass + elif isstring(var): + pass + else: + ctype = ctype + '*' + if isstring(var) or isarrayofstrings(var): + arg_types2.append('size_t') + arg_types.append(ctype) + + proto_args = ','.join(arg_types + arg_types2) + if not proto_args: + proto_args = 'void' + return proto_args + + +def getusercode(rout): + return getmultilineblock(rout, 'usercode') + + +def getusercode1(rout): + return getmultilineblock(rout, 'usercode', counter=1) + + +def getpymethoddef(rout): + return getmultilineblock(rout, 'pymethoddef') + + +def getargs(rout): + sortargs, args = [], [] + if 'args' in rout: + args = rout['args'] + if 'sortvars' in rout: + for a in rout['sortvars']: + if a in args: + sortargs.append(a) + for a in args: + if a not in sortargs: + sortargs.append(a) + else: + sortargs = rout['args'] + return args, sortargs + + +def getargs2(rout): + sortargs, args = [], rout.get('args', []) + auxvars = [a for a in rout['vars'].keys() if isintent_aux(rout['vars'][a]) + and a not in args] + args = auxvars + args + if 'sortvars' in rout: + for a in rout['sortvars']: + if a in args: + sortargs.append(a) + for a in args: + if a not in sortargs: + sortargs.append(a) + else: + sortargs = auxvars + rout['args'] + return args, sortargs + + +def getrestdoc(rout): + if 'f2pymultilines' not in rout: + return None + k = None + if rout['block'] == 'python module': + k = rout['block'], rout['name'] + return rout['f2pymultilines'].get(k, None) + + +def gentitle(name): + l = (80 - len(name) - 6) // 2 + return '/*%s %s %s*/' % (l * '*', name, l * '*') + + +def flatlist(l): + if isinstance(l, list): + return reduce(lambda x, y, f=flatlist: x + f(y), l, []) + return [l] + + +def stripcomma(s): + if s and s[-1] == ',': + return s[:-1] + return s + + +def replace(str, d, defaultsep=''): + if isinstance(d, list): + return [replace(str, _m, defaultsep) for _m in d] + if isinstance(str, list): + return [replace(_m, d, defaultsep) for _m in str] + for k in 2 * list(d.keys()): + if k == 'separatorsfor': + continue + if 'separatorsfor' in d and k in d['separatorsfor']: + sep = d['separatorsfor'][k] + else: + sep = defaultsep + if isinstance(d[k], list): + str = str.replace('#%s#' % (k), sep.join(flatlist(d[k]))) + else: + str = str.replace('#%s#' % (k), d[k]) + return str + + +def dictappend(rd, ar): + if isinstance(ar, list): + for a in ar: + rd = dictappend(rd, a) + return rd + for k in ar.keys(): + if k[0] == '_': + continue + if k in rd: + if isinstance(rd[k], str): + rd[k] = [rd[k]] + if isinstance(rd[k], list): + if isinstance(ar[k], list): + rd[k] = rd[k] + ar[k] + else: + rd[k].append(ar[k]) + elif isinstance(rd[k], dict): + if isinstance(ar[k], dict): + if k == 'separatorsfor': + for k1 in ar[k].keys(): + if k1 not in rd[k]: + rd[k][k1] = ar[k][k1] + else: + rd[k] = dictappend(rd[k], ar[k]) + else: + rd[k] = ar[k] + return rd + + +def applyrules(rules, d, var={}): + ret = {} + if isinstance(rules, list): + for r in rules: + rr = applyrules(r, d, var) + ret = dictappend(ret, rr) + if '_break' in rr: + break + return ret + if '_check' in rules and (not rules['_check'](var)): + return ret + if 'need' in rules: + res = applyrules({'needs': rules['need']}, d, var) + if 'needs' in res: + cfuncs.append_needs(res['needs']) + + for k in rules.keys(): + if k == 'separatorsfor': + ret[k] = rules[k] + continue + if isinstance(rules[k], str): + ret[k] = replace(rules[k], d) + elif isinstance(rules[k], list): + ret[k] = [] + for i in rules[k]: + ar = applyrules({k: i}, d, var) + if k in ar: + ret[k].append(ar[k]) + elif k[0] == '_': + continue + elif isinstance(rules[k], dict): + ret[k] = [] + for k1 in rules[k].keys(): + if isinstance(k1, types.FunctionType) and k1(var): + if isinstance(rules[k][k1], list): + for i in rules[k][k1]: + if isinstance(i, dict): + res = applyrules({'supertext': i}, d, var) + if 'supertext' in res: + i = res['supertext'] + else: + i = '' + ret[k].append(replace(i, d)) + else: + i = rules[k][k1] + if isinstance(i, dict): + res = applyrules({'supertext': i}, d) + if 'supertext' in res: + i = res['supertext'] + else: + i = '' + ret[k].append(replace(i, d)) + else: + errmess('applyrules: ignoring rule %s.\n' % repr(rules[k])) + if isinstance(ret[k], list): + if len(ret[k]) == 1: + ret[k] = ret[k][0] + if ret[k] == []: + del ret[k] + return ret diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/capi_maps.py b/.local/lib/python3.8/site-packages/numpy/f2py/capi_maps.py new file mode 100644 index 0000000..581f946 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/capi_maps.py @@ -0,0 +1,834 @@ +#!/usr/bin/env python3 +""" + +Copyright 1999,2000 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy License. + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +$Date: 2005/05/06 10:57:33 $ +Pearu Peterson + +""" +from . import __version__ +f2py_version = __version__.version + +import copy +import re +import os +from .crackfortran import markoutercomma +from . import cb_rules + +# The environment provided by auxfuncs.py is needed for some calls to eval. +# As the needed functions cannot be determined by static inspection of the +# code, it is safest to use import * pending a major refactoring of f2py. +from .auxfuncs import * + +__all__ = [ + 'getctype', 'getstrlength', 'getarrdims', 'getpydocsign', + 'getarrdocsign', 'getinit', 'sign2map', 'routsign2map', 'modsign2map', + 'cb_sign2map', 'cb_routsign2map', 'common_sign2map' +] + + +# Numarray and Numeric users should set this False +using_newcore = True + +depargs = [] +lcb_map = {} +lcb2_map = {} +# forced casting: mainly caused by the fact that Python or Numeric +# C/APIs do not support the corresponding C types. +c2py_map = {'double': 'float', + 'float': 'float', # forced casting + 'long_double': 'float', # forced casting + 'char': 'int', # forced casting + 'signed_char': 'int', # forced casting + 'unsigned_char': 'int', # forced casting + 'short': 'int', # forced casting + 'unsigned_short': 'int', # forced casting + 'int': 'int', # forced casting + 'long': 'int', + 'long_long': 'long', + 'unsigned': 'int', # forced casting + 'complex_float': 'complex', # forced casting + 'complex_double': 'complex', + 'complex_long_double': 'complex', # forced casting + 'string': 'string', + } +c2capi_map = {'double': 'NPY_DOUBLE', + 'float': 'NPY_FLOAT', + 'long_double': 'NPY_DOUBLE', # forced casting + 'char': 'NPY_STRING', + 'unsigned_char': 'NPY_UBYTE', + 'signed_char': 'NPY_BYTE', + 'short': 'NPY_SHORT', + 'unsigned_short': 'NPY_USHORT', + 'int': 'NPY_INT', + 'unsigned': 'NPY_UINT', + 'long': 'NPY_LONG', + 'long_long': 'NPY_LONG', # forced casting + 'complex_float': 'NPY_CFLOAT', + 'complex_double': 'NPY_CDOUBLE', + 'complex_long_double': 'NPY_CDOUBLE', # forced casting + 'string': 'NPY_STRING'} + +# These new maps aren't used anywhere yet, but should be by default +# unless building numeric or numarray extensions. +if using_newcore: + c2capi_map = {'double': 'NPY_DOUBLE', + 'float': 'NPY_FLOAT', + 'long_double': 'NPY_LONGDOUBLE', + 'char': 'NPY_BYTE', + 'unsigned_char': 'NPY_UBYTE', + 'signed_char': 'NPY_BYTE', + 'short': 'NPY_SHORT', + 'unsigned_short': 'NPY_USHORT', + 'int': 'NPY_INT', + 'unsigned': 'NPY_UINT', + 'long': 'NPY_LONG', + 'unsigned_long': 'NPY_ULONG', + 'long_long': 'NPY_LONGLONG', + 'unsigned_long_long': 'NPY_ULONGLONG', + 'complex_float': 'NPY_CFLOAT', + 'complex_double': 'NPY_CDOUBLE', + 'complex_long_double': 'NPY_CDOUBLE', + 'string':'NPY_STRING' + } + +c2pycode_map = {'double': 'd', + 'float': 'f', + 'long_double': 'd', # forced casting + 'char': '1', + 'signed_char': '1', + 'unsigned_char': 'b', + 'short': 's', + 'unsigned_short': 'w', + 'int': 'i', + 'unsigned': 'u', + 'long': 'l', + 'long_long': 'L', + 'complex_float': 'F', + 'complex_double': 'D', + 'complex_long_double': 'D', # forced casting + 'string': 'c' + } + +if using_newcore: + c2pycode_map = {'double': 'd', + 'float': 'f', + 'long_double': 'g', + 'char': 'b', + 'unsigned_char': 'B', + 'signed_char': 'b', + 'short': 'h', + 'unsigned_short': 'H', + 'int': 'i', + 'unsigned': 'I', + 'long': 'l', + 'unsigned_long': 'L', + 'long_long': 'q', + 'unsigned_long_long': 'Q', + 'complex_float': 'F', + 'complex_double': 'D', + 'complex_long_double': 'G', + 'string': 'S'} + +c2buildvalue_map = {'double': 'd', + 'float': 'f', + 'char': 'b', + 'signed_char': 'b', + 'short': 'h', + 'int': 'i', + 'long': 'l', + 'long_long': 'L', + 'complex_float': 'N', + 'complex_double': 'N', + 'complex_long_double': 'N', + 'string': 'y'} + +f2cmap_all = {'real': {'': 'float', '4': 'float', '8': 'double', + '12': 'long_double', '16': 'long_double'}, + 'integer': {'': 'int', '1': 'signed_char', '2': 'short', + '4': 'int', '8': 'long_long', + '-1': 'unsigned_char', '-2': 'unsigned_short', + '-4': 'unsigned', '-8': 'unsigned_long_long'}, + 'complex': {'': 'complex_float', '8': 'complex_float', + '16': 'complex_double', '24': 'complex_long_double', + '32': 'complex_long_double'}, + 'complexkind': {'': 'complex_float', '4': 'complex_float', + '8': 'complex_double', '12': 'complex_long_double', + '16': 'complex_long_double'}, + 'logical': {'': 'int', '1': 'char', '2': 'short', '4': 'int', + '8': 'long_long'}, + 'double complex': {'': 'complex_double'}, + 'double precision': {'': 'double'}, + 'byte': {'': 'char'}, + 'character': {'': 'string'} + } + +f2cmap_default = copy.deepcopy(f2cmap_all) + + +def load_f2cmap_file(f2cmap_file): + global f2cmap_all + + f2cmap_all = copy.deepcopy(f2cmap_default) + + if f2cmap_file is None: + # Default value + f2cmap_file = '.f2py_f2cmap' + if not os.path.isfile(f2cmap_file): + return + + # User defined additions to f2cmap_all. + # f2cmap_file must contain a dictionary of dictionaries, only. For + # example, {'real':{'low':'float'}} means that Fortran 'real(low)' is + # interpreted as C 'float'. This feature is useful for F90/95 users if + # they use PARAMETERS in type specifications. + try: + outmess('Reading f2cmap from {!r} ...\n'.format(f2cmap_file)) + with open(f2cmap_file, 'r') as f: + d = eval(f.read(), {}, {}) + for k, d1 in d.items(): + for k1 in d1.keys(): + d1[k1.lower()] = d1[k1] + d[k.lower()] = d[k] + for k in d.keys(): + if k not in f2cmap_all: + f2cmap_all[k] = {} + for k1 in d[k].keys(): + if d[k][k1] in c2py_map: + if k1 in f2cmap_all[k]: + outmess( + "\tWarning: redefinition of {'%s':{'%s':'%s'->'%s'}}\n" % (k, k1, f2cmap_all[k][k1], d[k][k1])) + f2cmap_all[k][k1] = d[k][k1] + outmess('\tMapping "%s(kind=%s)" to "%s"\n' % + (k, k1, d[k][k1])) + else: + errmess("\tIgnoring map {'%s':{'%s':'%s'}}: '%s' must be in %s\n" % ( + k, k1, d[k][k1], d[k][k1], list(c2py_map.keys()))) + outmess('Successfully applied user defined f2cmap changes\n') + except Exception as msg: + errmess( + 'Failed to apply user defined f2cmap changes: %s. Skipping.\n' % (msg)) + +cformat_map = {'double': '%g', + 'float': '%g', + 'long_double': '%Lg', + 'char': '%d', + 'signed_char': '%d', + 'unsigned_char': '%hhu', + 'short': '%hd', + 'unsigned_short': '%hu', + 'int': '%d', + 'unsigned': '%u', + 'long': '%ld', + 'unsigned_long': '%lu', + 'long_long': '%ld', + 'complex_float': '(%g,%g)', + 'complex_double': '(%g,%g)', + 'complex_long_double': '(%Lg,%Lg)', + 'string': '%s', + } + +# Auxiliary functions + + +def getctype(var): + """ + Determines C type + """ + ctype = 'void' + if isfunction(var): + if 'result' in var: + a = var['result'] + else: + a = var['name'] + if a in var['vars']: + return getctype(var['vars'][a]) + else: + errmess('getctype: function %s has no return value?!\n' % a) + elif issubroutine(var): + return ctype + elif 'typespec' in var and var['typespec'].lower() in f2cmap_all: + typespec = var['typespec'].lower() + f2cmap = f2cmap_all[typespec] + ctype = f2cmap[''] # default type + if 'kindselector' in var: + if '*' in var['kindselector']: + try: + ctype = f2cmap[var['kindselector']['*']] + except KeyError: + errmess('getctype: "%s %s %s" not supported.\n' % + (var['typespec'], '*', var['kindselector']['*'])) + elif 'kind' in var['kindselector']: + if typespec + 'kind' in f2cmap_all: + f2cmap = f2cmap_all[typespec + 'kind'] + try: + ctype = f2cmap[var['kindselector']['kind']] + except KeyError: + if typespec in f2cmap_all: + f2cmap = f2cmap_all[typespec] + try: + ctype = f2cmap[str(var['kindselector']['kind'])] + except KeyError: + errmess('getctype: "%s(kind=%s)" is mapped to C "%s" (to override define dict(%s = dict(%s="")) in %s/.f2py_f2cmap file).\n' + % (typespec, var['kindselector']['kind'], ctype, + typespec, var['kindselector']['kind'], os.getcwd())) + else: + if not isexternal(var): + errmess('getctype: No C-type found in "%s", assuming void.\n' % var) + return ctype + + +def getstrlength(var): + if isstringfunction(var): + if 'result' in var: + a = var['result'] + else: + a = var['name'] + if a in var['vars']: + return getstrlength(var['vars'][a]) + else: + errmess('getstrlength: function %s has no return value?!\n' % a) + if not isstring(var): + errmess( + 'getstrlength: expected a signature of a string but got: %s\n' % (repr(var))) + len = '1' + if 'charselector' in var: + a = var['charselector'] + if '*' in a: + len = a['*'] + elif 'len' in a: + len = a['len'] + if re.match(r'\(\s*(\*|:)\s*\)', len) or re.match(r'(\*|:)', len): + if isintent_hide(var): + errmess('getstrlength:intent(hide): expected a string with defined length but got: %s\n' % ( + repr(var))) + len = '-1' + return len + + +def getarrdims(a, var, verbose=0): + ret = {} + if isstring(var) and not isarray(var): + ret['dims'] = getstrlength(var) + ret['size'] = ret['dims'] + ret['rank'] = '1' + elif isscalar(var): + ret['size'] = '1' + ret['rank'] = '0' + ret['dims'] = '' + elif isarray(var): + dim = copy.copy(var['dimension']) + ret['size'] = '*'.join(dim) + try: + ret['size'] = repr(eval(ret['size'])) + except Exception: + pass + ret['dims'] = ','.join(dim) + ret['rank'] = repr(len(dim)) + ret['rank*[-1]'] = repr(len(dim) * [-1])[1:-1] + for i in range(len(dim)): # solve dim for dependencies + v = [] + if dim[i] in depargs: + v = [dim[i]] + else: + for va in depargs: + if re.match(r'.*?\b%s\b.*' % va, dim[i]): + v.append(va) + for va in v: + if depargs.index(va) > depargs.index(a): + dim[i] = '*' + break + ret['setdims'], i = '', -1 + for d in dim: + i = i + 1 + if d not in ['*', ':', '(*)', '(:)']: + ret['setdims'] = '%s#varname#_Dims[%d]=%s,' % ( + ret['setdims'], i, d) + if ret['setdims']: + ret['setdims'] = ret['setdims'][:-1] + ret['cbsetdims'], i = '', -1 + for d in var['dimension']: + i = i + 1 + if d not in ['*', ':', '(*)', '(:)']: + ret['cbsetdims'] = '%s#varname#_Dims[%d]=%s,' % ( + ret['cbsetdims'], i, d) + elif isintent_in(var): + outmess('getarrdims:warning: assumed shape array, using 0 instead of %r\n' + % (d)) + ret['cbsetdims'] = '%s#varname#_Dims[%d]=%s,' % ( + ret['cbsetdims'], i, 0) + elif verbose: + errmess( + 'getarrdims: If in call-back function: array argument %s must have bounded dimensions: got %s\n' % (repr(a), repr(d))) + if ret['cbsetdims']: + ret['cbsetdims'] = ret['cbsetdims'][:-1] +# if not isintent_c(var): +# var['dimension'].reverse() + return ret + + +def getpydocsign(a, var): + global lcb_map + if isfunction(var): + if 'result' in var: + af = var['result'] + else: + af = var['name'] + if af in var['vars']: + return getpydocsign(af, var['vars'][af]) + else: + errmess('getctype: function %s has no return value?!\n' % af) + return '', '' + sig, sigout = a, a + opt = '' + if isintent_in(var): + opt = 'input' + elif isintent_inout(var): + opt = 'in/output' + out_a = a + if isintent_out(var): + for k in var['intent']: + if k[:4] == 'out=': + out_a = k[4:] + break + init = '' + ctype = getctype(var) + + if hasinitvalue(var): + init, showinit = getinit(a, var) + init = ', optional\\n Default: %s' % showinit + if isscalar(var): + if isintent_inout(var): + sig = '%s : %s rank-0 array(%s,\'%s\')%s' % (a, opt, c2py_map[ctype], + c2pycode_map[ctype], init) + else: + sig = '%s : %s %s%s' % (a, opt, c2py_map[ctype], init) + sigout = '%s : %s' % (out_a, c2py_map[ctype]) + elif isstring(var): + if isintent_inout(var): + sig = '%s : %s rank-0 array(string(len=%s),\'c\')%s' % ( + a, opt, getstrlength(var), init) + else: + sig = '%s : %s string(len=%s)%s' % ( + a, opt, getstrlength(var), init) + sigout = '%s : string(len=%s)' % (out_a, getstrlength(var)) + elif isarray(var): + dim = var['dimension'] + rank = repr(len(dim)) + sig = '%s : %s rank-%s array(\'%s\') with bounds (%s)%s' % (a, opt, rank, + c2pycode_map[ + ctype], + ','.join(dim), init) + if a == out_a: + sigout = '%s : rank-%s array(\'%s\') with bounds (%s)'\ + % (a, rank, c2pycode_map[ctype], ','.join(dim)) + else: + sigout = '%s : rank-%s array(\'%s\') with bounds (%s) and %s storage'\ + % (out_a, rank, c2pycode_map[ctype], ','.join(dim), a) + elif isexternal(var): + ua = '' + if a in lcb_map and lcb_map[a] in lcb2_map and 'argname' in lcb2_map[lcb_map[a]]: + ua = lcb2_map[lcb_map[a]]['argname'] + if not ua == a: + ua = ' => %s' % ua + else: + ua = '' + sig = '%s : call-back function%s' % (a, ua) + sigout = sig + else: + errmess( + 'getpydocsign: Could not resolve docsignature for "%s".\n' % a) + return sig, sigout + + +def getarrdocsign(a, var): + ctype = getctype(var) + if isstring(var) and (not isarray(var)): + sig = '%s : rank-0 array(string(len=%s),\'c\')' % (a, + getstrlength(var)) + elif isscalar(var): + sig = '%s : rank-0 array(%s,\'%s\')' % (a, c2py_map[ctype], + c2pycode_map[ctype],) + elif isarray(var): + dim = var['dimension'] + rank = repr(len(dim)) + sig = '%s : rank-%s array(\'%s\') with bounds (%s)' % (a, rank, + c2pycode_map[ + ctype], + ','.join(dim)) + return sig + + +def getinit(a, var): + if isstring(var): + init, showinit = '""', "''" + else: + init, showinit = '', '' + if hasinitvalue(var): + init = var['='] + showinit = init + if iscomplex(var) or iscomplexarray(var): + ret = {} + + try: + v = var["="] + if ',' in v: + ret['init.r'], ret['init.i'] = markoutercomma( + v[1:-1]).split('@,@') + else: + v = eval(v, {}, {}) + ret['init.r'], ret['init.i'] = str(v.real), str(v.imag) + except Exception: + raise ValueError( + 'getinit: expected complex number `(r,i)\' but got `%s\' as initial value of %r.' % (init, a)) + if isarray(var): + init = '(capi_c.r=%s,capi_c.i=%s,capi_c)' % ( + ret['init.r'], ret['init.i']) + elif isstring(var): + if not init: + init, showinit = '""', "''" + if init[0] == "'": + init = '"%s"' % (init[1:-1].replace('"', '\\"')) + if init[0] == '"': + showinit = "'%s'" % (init[1:-1]) + return init, showinit + + +def sign2map(a, var): + """ + varname,ctype,atype + init,init.r,init.i,pytype + vardebuginfo,vardebugshowvalue,varshowvalue + varrfromat + intent + """ + out_a = a + if isintent_out(var): + for k in var['intent']: + if k[:4] == 'out=': + out_a = k[4:] + break + ret = {'varname': a, 'outvarname': out_a, 'ctype': getctype(var)} + intent_flags = [] + for f, s in isintent_dict.items(): + if f(var): + intent_flags.append('F2PY_%s' % s) + if intent_flags: + # TODO: Evaluate intent_flags here. + ret['intent'] = '|'.join(intent_flags) + else: + ret['intent'] = 'F2PY_INTENT_IN' + if isarray(var): + ret['varrformat'] = 'N' + elif ret['ctype'] in c2buildvalue_map: + ret['varrformat'] = c2buildvalue_map[ret['ctype']] + else: + ret['varrformat'] = 'O' + ret['init'], ret['showinit'] = getinit(a, var) + if hasinitvalue(var) and iscomplex(var) and not isarray(var): + ret['init.r'], ret['init.i'] = markoutercomma( + ret['init'][1:-1]).split('@,@') + if isexternal(var): + ret['cbnamekey'] = a + if a in lcb_map: + ret['cbname'] = lcb_map[a] + ret['maxnofargs'] = lcb2_map[lcb_map[a]]['maxnofargs'] + ret['nofoptargs'] = lcb2_map[lcb_map[a]]['nofoptargs'] + ret['cbdocstr'] = lcb2_map[lcb_map[a]]['docstr'] + ret['cblatexdocstr'] = lcb2_map[lcb_map[a]]['latexdocstr'] + else: + ret['cbname'] = a + errmess('sign2map: Confused: external %s is not in lcb_map%s.\n' % ( + a, list(lcb_map.keys()))) + if isstring(var): + ret['length'] = getstrlength(var) + if isarray(var): + ret = dictappend(ret, getarrdims(a, var)) + dim = copy.copy(var['dimension']) + if ret['ctype'] in c2capi_map: + ret['atype'] = c2capi_map[ret['ctype']] + # Debug info + if debugcapi(var): + il = [isintent_in, 'input', isintent_out, 'output', + isintent_inout, 'inoutput', isrequired, 'required', + isoptional, 'optional', isintent_hide, 'hidden', + iscomplex, 'complex scalar', + l_and(isscalar, l_not(iscomplex)), 'scalar', + isstring, 'string', isarray, 'array', + iscomplexarray, 'complex array', isstringarray, 'string array', + iscomplexfunction, 'complex function', + l_and(isfunction, l_not(iscomplexfunction)), 'function', + isexternal, 'callback', + isintent_callback, 'callback', + isintent_aux, 'auxiliary', + ] + rl = [] + for i in range(0, len(il), 2): + if il[i](var): + rl.append(il[i + 1]) + if isstring(var): + rl.append('slen(%s)=%s' % (a, ret['length'])) + if isarray(var): + ddim = ','.join( + map(lambda x, y: '%s|%s' % (x, y), var['dimension'], dim)) + rl.append('dims(%s)' % ddim) + if isexternal(var): + ret['vardebuginfo'] = 'debug-capi:%s=>%s:%s' % ( + a, ret['cbname'], ','.join(rl)) + else: + ret['vardebuginfo'] = 'debug-capi:%s %s=%s:%s' % ( + ret['ctype'], a, ret['showinit'], ','.join(rl)) + if isscalar(var): + if ret['ctype'] in cformat_map: + ret['vardebugshowvalue'] = 'debug-capi:%s=%s' % ( + a, cformat_map[ret['ctype']]) + if isstring(var): + ret['vardebugshowvalue'] = 'debug-capi:slen(%s)=%%d %s=\\"%%s\\"' % ( + a, a) + if isexternal(var): + ret['vardebugshowvalue'] = 'debug-capi:%s=%%p' % (a) + if ret['ctype'] in cformat_map: + ret['varshowvalue'] = '#name#:%s=%s' % (a, cformat_map[ret['ctype']]) + ret['showvalueformat'] = '%s' % (cformat_map[ret['ctype']]) + if isstring(var): + ret['varshowvalue'] = '#name#:slen(%s)=%%d %s=\\"%%s\\"' % (a, a) + ret['pydocsign'], ret['pydocsignout'] = getpydocsign(a, var) + if hasnote(var): + ret['note'] = var['note'] + return ret + + +def routsign2map(rout): + """ + name,NAME,begintitle,endtitle + rname,ctype,rformat + routdebugshowvalue + """ + global lcb_map + name = rout['name'] + fname = getfortranname(rout) + ret = {'name': name, + 'texname': name.replace('_', '\\_'), + 'name_lower': name.lower(), + 'NAME': name.upper(), + 'begintitle': gentitle(name), + 'endtitle': gentitle('end of %s' % name), + 'fortranname': fname, + 'FORTRANNAME': fname.upper(), + 'callstatement': getcallstatement(rout) or '', + 'usercode': getusercode(rout) or '', + 'usercode1': getusercode1(rout) or '', + } + if '_' in fname: + ret['F_FUNC'] = 'F_FUNC_US' + else: + ret['F_FUNC'] = 'F_FUNC' + if '_' in name: + ret['F_WRAPPEDFUNC'] = 'F_WRAPPEDFUNC_US' + else: + ret['F_WRAPPEDFUNC'] = 'F_WRAPPEDFUNC' + lcb_map = {} + if 'use' in rout: + for u in rout['use'].keys(): + if u in cb_rules.cb_map: + for un in cb_rules.cb_map[u]: + ln = un[0] + if 'map' in rout['use'][u]: + for k in rout['use'][u]['map'].keys(): + if rout['use'][u]['map'][k] == un[0]: + ln = k + break + lcb_map[ln] = un[1] + elif 'externals' in rout and rout['externals']: + errmess('routsign2map: Confused: function %s has externals %s but no "use" statement.\n' % ( + ret['name'], repr(rout['externals']))) + ret['callprotoargument'] = getcallprotoargument(rout, lcb_map) or '' + if isfunction(rout): + if 'result' in rout: + a = rout['result'] + else: + a = rout['name'] + ret['rname'] = a + ret['pydocsign'], ret['pydocsignout'] = getpydocsign(a, rout) + ret['ctype'] = getctype(rout['vars'][a]) + if hasresultnote(rout): + ret['resultnote'] = rout['vars'][a]['note'] + rout['vars'][a]['note'] = ['See elsewhere.'] + if ret['ctype'] in c2buildvalue_map: + ret['rformat'] = c2buildvalue_map[ret['ctype']] + else: + ret['rformat'] = 'O' + errmess('routsign2map: no c2buildvalue key for type %s\n' % + (repr(ret['ctype']))) + if debugcapi(rout): + if ret['ctype'] in cformat_map: + ret['routdebugshowvalue'] = 'debug-capi:%s=%s' % ( + a, cformat_map[ret['ctype']]) + if isstringfunction(rout): + ret['routdebugshowvalue'] = 'debug-capi:slen(%s)=%%d %s=\\"%%s\\"' % ( + a, a) + if isstringfunction(rout): + ret['rlength'] = getstrlength(rout['vars'][a]) + if ret['rlength'] == '-1': + errmess('routsign2map: expected explicit specification of the length of the string returned by the fortran function %s; taking 10.\n' % ( + repr(rout['name']))) + ret['rlength'] = '10' + if hasnote(rout): + ret['note'] = rout['note'] + rout['note'] = ['See elsewhere.'] + return ret + + +def modsign2map(m): + """ + modulename + """ + if ismodule(m): + ret = {'f90modulename': m['name'], + 'F90MODULENAME': m['name'].upper(), + 'texf90modulename': m['name'].replace('_', '\\_')} + else: + ret = {'modulename': m['name'], + 'MODULENAME': m['name'].upper(), + 'texmodulename': m['name'].replace('_', '\\_')} + ret['restdoc'] = getrestdoc(m) or [] + if hasnote(m): + ret['note'] = m['note'] + ret['usercode'] = getusercode(m) or '' + ret['usercode1'] = getusercode1(m) or '' + if m['body']: + ret['interface_usercode'] = getusercode(m['body'][0]) or '' + else: + ret['interface_usercode'] = '' + ret['pymethoddef'] = getpymethoddef(m) or '' + if 'coutput' in m: + ret['coutput'] = m['coutput'] + if 'f2py_wrapper_output' in m: + ret['f2py_wrapper_output'] = m['f2py_wrapper_output'] + return ret + + +def cb_sign2map(a, var, index=None): + ret = {'varname': a} + ret['varname_i'] = ret['varname'] + ret['ctype'] = getctype(var) + if ret['ctype'] in c2capi_map: + ret['atype'] = c2capi_map[ret['ctype']] + if ret['ctype'] in cformat_map: + ret['showvalueformat'] = '%s' % (cformat_map[ret['ctype']]) + if isarray(var): + ret = dictappend(ret, getarrdims(a, var)) + ret['pydocsign'], ret['pydocsignout'] = getpydocsign(a, var) + if hasnote(var): + ret['note'] = var['note'] + var['note'] = ['See elsewhere.'] + return ret + + +def cb_routsign2map(rout, um): + """ + name,begintitle,endtitle,argname + ctype,rctype,maxnofargs,nofoptargs,returncptr + """ + ret = {'name': 'cb_%s_in_%s' % (rout['name'], um), + 'returncptr': ''} + if isintent_callback(rout): + if '_' in rout['name']: + F_FUNC = 'F_FUNC_US' + else: + F_FUNC = 'F_FUNC' + ret['callbackname'] = '%s(%s,%s)' \ + % (F_FUNC, + rout['name'].lower(), + rout['name'].upper(), + ) + ret['static'] = 'extern' + else: + ret['callbackname'] = ret['name'] + ret['static'] = 'static' + ret['argname'] = rout['name'] + ret['begintitle'] = gentitle(ret['name']) + ret['endtitle'] = gentitle('end of %s' % ret['name']) + ret['ctype'] = getctype(rout) + ret['rctype'] = 'void' + if ret['ctype'] == 'string': + ret['rctype'] = 'void' + else: + ret['rctype'] = ret['ctype'] + if ret['rctype'] != 'void': + if iscomplexfunction(rout): + ret['returncptr'] = """ +#ifdef F2PY_CB_RETURNCOMPLEX +return_value= +#endif +""" + else: + ret['returncptr'] = 'return_value=' + if ret['ctype'] in cformat_map: + ret['showvalueformat'] = '%s' % (cformat_map[ret['ctype']]) + if isstringfunction(rout): + ret['strlength'] = getstrlength(rout) + if isfunction(rout): + if 'result' in rout: + a = rout['result'] + else: + a = rout['name'] + if hasnote(rout['vars'][a]): + ret['note'] = rout['vars'][a]['note'] + rout['vars'][a]['note'] = ['See elsewhere.'] + ret['rname'] = a + ret['pydocsign'], ret['pydocsignout'] = getpydocsign(a, rout) + if iscomplexfunction(rout): + ret['rctype'] = """ +#ifdef F2PY_CB_RETURNCOMPLEX +#ctype# +#else +void +#endif +""" + else: + if hasnote(rout): + ret['note'] = rout['note'] + rout['note'] = ['See elsewhere.'] + nofargs = 0 + nofoptargs = 0 + if 'args' in rout and 'vars' in rout: + for a in rout['args']: + var = rout['vars'][a] + if l_or(isintent_in, isintent_inout)(var): + nofargs = nofargs + 1 + if isoptional(var): + nofoptargs = nofoptargs + 1 + ret['maxnofargs'] = repr(nofargs) + ret['nofoptargs'] = repr(nofoptargs) + if hasnote(rout) and isfunction(rout) and 'result' in rout: + ret['routnote'] = rout['note'] + rout['note'] = ['See elsewhere.'] + return ret + + +def common_sign2map(a, var): # obsolute + ret = {'varname': a, 'ctype': getctype(var)} + if isstringarray(var): + ret['ctype'] = 'char' + if ret['ctype'] in c2capi_map: + ret['atype'] = c2capi_map[ret['ctype']] + if ret['ctype'] in cformat_map: + ret['showvalueformat'] = '%s' % (cformat_map[ret['ctype']]) + if isarray(var): + ret = dictappend(ret, getarrdims(a, var)) + elif isstring(var): + ret['size'] = getstrlength(var) + ret['rank'] = '1' + ret['pydocsign'], ret['pydocsignout'] = getpydocsign(a, var) + if hasnote(var): + ret['note'] = var['note'] + var['note'] = ['See elsewhere.'] + # for strings this returns 0-rank but actually is 1-rank + ret['arrdocstr'] = getarrdocsign(a, var) + return ret diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/cb_rules.py b/.local/lib/python3.8/site-packages/numpy/f2py/cb_rules.py new file mode 100644 index 0000000..c2d7faa --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/cb_rules.py @@ -0,0 +1,640 @@ +#!/usr/bin/env python3 +""" + +Build call-back mechanism for f2py2e. + +Copyright 2000 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy License. + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +$Date: 2005/07/20 11:27:58 $ +Pearu Peterson + +""" +from . import __version__ +from .auxfuncs import ( + applyrules, debugcapi, dictappend, errmess, getargs, hasnote, isarray, + iscomplex, iscomplexarray, iscomplexfunction, isfunction, isintent_c, + isintent_hide, isintent_in, isintent_inout, isintent_nothide, + isintent_out, isoptional, isrequired, isscalar, isstring, + isstringfunction, issubroutine, l_and, l_not, l_or, outmess, replace, + stripcomma, throw_error +) +from . import cfuncs + +f2py_version = __version__.version + + +################## Rules for callback function ############## + +cb_routine_rules = { + 'cbtypedefs': 'typedef #rctype#(*#name#_typedef)(#optargs_td##args_td##strarglens_td##noargs#);', + 'body': """ +#begintitle# +typedef struct { + PyObject *capi; + PyTupleObject *args_capi; + int nofargs; + jmp_buf jmpbuf; +} #name#_t; + +#if defined(F2PY_THREAD_LOCAL_DECL) && !defined(F2PY_USE_PYTHON_TLS) + +static F2PY_THREAD_LOCAL_DECL #name#_t *_active_#name# = NULL; + +static #name#_t *swap_active_#name#(#name#_t *ptr) { + #name#_t *prev = _active_#name#; + _active_#name# = ptr; + return prev; +} + +static #name#_t *get_active_#name#(void) { + return _active_#name#; +} + +#else + +static #name#_t *swap_active_#name#(#name#_t *ptr) { + char *key = "__f2py_cb_#name#"; + return (#name#_t *)F2PySwapThreadLocalCallbackPtr(key, ptr); +} + +static #name#_t *get_active_#name#(void) { + char *key = "__f2py_cb_#name#"; + return (#name#_t *)F2PyGetThreadLocalCallbackPtr(key); +} + +#endif + +/*typedef #rctype#(*#name#_typedef)(#optargs_td##args_td##strarglens_td##noargs#);*/ +#static# #rctype# #callbackname# (#optargs##args##strarglens##noargs#) { + #name#_t cb_local = { NULL, NULL, 0 }; + #name#_t *cb = NULL; + PyTupleObject *capi_arglist = NULL; + PyObject *capi_return = NULL; + PyObject *capi_tmp = NULL; + PyObject *capi_arglist_list = NULL; + int capi_j,capi_i = 0; + int capi_longjmp_ok = 1; +#decl# +#ifdef F2PY_REPORT_ATEXIT +f2py_cb_start_clock(); +#endif + cb = get_active_#name#(); + if (cb == NULL) { + capi_longjmp_ok = 0; + cb = &cb_local; + } + capi_arglist = cb->args_capi; + CFUNCSMESS(\"cb:Call-back function #name# (maxnofargs=#maxnofargs#(-#nofoptargs#))\\n\"); + CFUNCSMESSPY(\"cb:#name#_capi=\",cb->capi); + if (cb->capi==NULL) { + capi_longjmp_ok = 0; + cb->capi = PyObject_GetAttrString(#modulename#_module,\"#argname#\"); + CFUNCSMESSPY(\"cb:#name#_capi=\",cb->capi); + } + if (cb->capi==NULL) { + PyErr_SetString(#modulename#_error,\"cb: Callback #argname# not defined (as an argument or module #modulename# attribute).\\n\"); + goto capi_fail; + } + if (F2PyCapsule_Check(cb->capi)) { + #name#_typedef #name#_cptr; + #name#_cptr = F2PyCapsule_AsVoidPtr(cb->capi); + #returncptr#(*#name#_cptr)(#optargs_nm##args_nm##strarglens_nm#); + #return# + } + if (capi_arglist==NULL) { + capi_longjmp_ok = 0; + capi_tmp = PyObject_GetAttrString(#modulename#_module,\"#argname#_extra_args\"); + if (capi_tmp) { + capi_arglist = (PyTupleObject *)PySequence_Tuple(capi_tmp); + Py_DECREF(capi_tmp); + if (capi_arglist==NULL) { + PyErr_SetString(#modulename#_error,\"Failed to convert #modulename#.#argname#_extra_args to tuple.\\n\"); + goto capi_fail; + } + } else { + PyErr_Clear(); + capi_arglist = (PyTupleObject *)Py_BuildValue(\"()\"); + } + } + if (capi_arglist == NULL) { + PyErr_SetString(#modulename#_error,\"Callback #argname# argument list is not set.\\n\"); + goto capi_fail; + } +#setdims# +#ifdef PYPY_VERSION +#define CAPI_ARGLIST_SETITEM(idx, value) PyList_SetItem((PyObject *)capi_arglist_list, idx, value) + capi_arglist_list = PySequence_List(capi_arglist); + if (capi_arglist_list == NULL) goto capi_fail; +#else +#define CAPI_ARGLIST_SETITEM(idx, value) PyTuple_SetItem((PyObject *)capi_arglist, idx, value) +#endif +#pyobjfrom# +#undef CAPI_ARGLIST_SETITEM +#ifdef PYPY_VERSION + CFUNCSMESSPY(\"cb:capi_arglist=\",capi_arglist_list); +#else + CFUNCSMESSPY(\"cb:capi_arglist=\",capi_arglist); +#endif + CFUNCSMESS(\"cb:Call-back calling Python function #argname#.\\n\"); +#ifdef F2PY_REPORT_ATEXIT +f2py_cb_start_call_clock(); +#endif +#ifdef PYPY_VERSION + capi_return = PyObject_CallObject(cb->capi,(PyObject *)capi_arglist_list); + Py_DECREF(capi_arglist_list); + capi_arglist_list = NULL; +#else + capi_return = PyObject_CallObject(cb->capi,(PyObject *)capi_arglist); +#endif +#ifdef F2PY_REPORT_ATEXIT +f2py_cb_stop_call_clock(); +#endif + CFUNCSMESSPY(\"cb:capi_return=\",capi_return); + if (capi_return == NULL) { + fprintf(stderr,\"capi_return is NULL\\n\"); + goto capi_fail; + } + if (capi_return == Py_None) { + Py_DECREF(capi_return); + capi_return = Py_BuildValue(\"()\"); + } + else if (!PyTuple_Check(capi_return)) { + capi_return = Py_BuildValue(\"(N)\",capi_return); + } + capi_j = PyTuple_Size(capi_return); + capi_i = 0; +#frompyobj# + CFUNCSMESS(\"cb:#name#:successful\\n\"); + Py_DECREF(capi_return); +#ifdef F2PY_REPORT_ATEXIT +f2py_cb_stop_clock(); +#endif + goto capi_return_pt; +capi_fail: + fprintf(stderr,\"Call-back #name# failed.\\n\"); + Py_XDECREF(capi_return); + Py_XDECREF(capi_arglist_list); + if (capi_longjmp_ok) { + longjmp(cb->jmpbuf,-1); + } +capi_return_pt: + ; +#return# +} +#endtitle# +""", + 'need': ['setjmp.h', 'CFUNCSMESS', 'F2PY_THREAD_LOCAL_DECL'], + 'maxnofargs': '#maxnofargs#', + 'nofoptargs': '#nofoptargs#', + 'docstr': """\ + def #argname#(#docsignature#): return #docreturn#\\n\\ +#docstrsigns#""", + 'latexdocstr': """ +{{}\\verb@def #argname#(#latexdocsignature#): return #docreturn#@{}} +#routnote# + +#latexdocstrsigns#""", + 'docstrshort': 'def #argname#(#docsignature#): return #docreturn#' +} +cb_rout_rules = [ + { # Init + 'separatorsfor': {'decl': '\n', + 'args': ',', 'optargs': '', 'pyobjfrom': '\n', 'freemem': '\n', + 'args_td': ',', 'optargs_td': '', + 'args_nm': ',', 'optargs_nm': '', + 'frompyobj': '\n', 'setdims': '\n', + 'docstrsigns': '\\n"\n"', + 'latexdocstrsigns': '\n', + 'latexdocstrreq': '\n', 'latexdocstropt': '\n', + 'latexdocstrout': '\n', 'latexdocstrcbs': '\n', + }, + 'decl': '/*decl*/', 'pyobjfrom': '/*pyobjfrom*/', 'frompyobj': '/*frompyobj*/', + 'args': [], 'optargs': '', 'return': '', 'strarglens': '', 'freemem': '/*freemem*/', + 'args_td': [], 'optargs_td': '', 'strarglens_td': '', + 'args_nm': [], 'optargs_nm': '', 'strarglens_nm': '', + 'noargs': '', + 'setdims': '/*setdims*/', + 'docstrsigns': '', 'latexdocstrsigns': '', + 'docstrreq': ' Required arguments:', + 'docstropt': ' Optional arguments:', + 'docstrout': ' Return objects:', + 'docstrcbs': ' Call-back functions:', + 'docreturn': '', 'docsign': '', 'docsignopt': '', + 'latexdocstrreq': '\\noindent Required arguments:', + 'latexdocstropt': '\\noindent Optional arguments:', + 'latexdocstrout': '\\noindent Return objects:', + 'latexdocstrcbs': '\\noindent Call-back functions:', + 'routnote': {hasnote: '--- #note#', l_not(hasnote): ''}, + }, { # Function + 'decl': ' #ctype# return_value = 0;', + 'frompyobj': [ + {debugcapi: ' CFUNCSMESS("cb:Getting return_value->");'}, + '''\ + if (capi_j>capi_i) { + GETSCALARFROMPYTUPLE(capi_return,capi_i++,&return_value,#ctype#, + "#ctype#_from_pyobj failed in converting return_value of" + " call-back function #name# to C #ctype#\\n"); + } else { + fprintf(stderr,"Warning: call-back function #name# did not provide" + " return value (index=%d, type=#ctype#)\\n",capi_i); + }''', + {debugcapi: + ' fprintf(stderr,"#showvalueformat#.\\n",return_value);'} + ], + 'need': ['#ctype#_from_pyobj', {debugcapi: 'CFUNCSMESS'}, 'GETSCALARFROMPYTUPLE'], + 'return': ' return return_value;', + '_check': l_and(isfunction, l_not(isstringfunction), l_not(iscomplexfunction)) + }, + { # String function + 'pyobjfrom': {debugcapi: ' fprintf(stderr,"debug-capi:cb:#name#:%d:\\n",return_value_len);'}, + 'args': '#ctype# return_value,int return_value_len', + 'args_nm': 'return_value,&return_value_len', + 'args_td': '#ctype# ,int', + 'frompyobj': [ + {debugcapi: ' CFUNCSMESS("cb:Getting return_value->\\"");'}, + """\ + if (capi_j>capi_i) { + GETSTRFROMPYTUPLE(capi_return,capi_i++,return_value,return_value_len); + } else { + fprintf(stderr,"Warning: call-back function #name# did not provide" + " return value (index=%d, type=#ctype#)\\n",capi_i); + }""", + {debugcapi: + ' fprintf(stderr,"#showvalueformat#\\".\\n",return_value);'} + ], + 'need': ['#ctype#_from_pyobj', {debugcapi: 'CFUNCSMESS'}, + 'string.h', 'GETSTRFROMPYTUPLE'], + 'return': 'return;', + '_check': isstringfunction + }, + { # Complex function + 'optargs': """ +#ifndef F2PY_CB_RETURNCOMPLEX +#ctype# *return_value +#endif +""", + 'optargs_nm': """ +#ifndef F2PY_CB_RETURNCOMPLEX +return_value +#endif +""", + 'optargs_td': """ +#ifndef F2PY_CB_RETURNCOMPLEX +#ctype# * +#endif +""", + 'decl': """ +#ifdef F2PY_CB_RETURNCOMPLEX + #ctype# return_value = {0, 0}; +#endif +""", + 'frompyobj': [ + {debugcapi: ' CFUNCSMESS("cb:Getting return_value->");'}, + """\ + if (capi_j>capi_i) { +#ifdef F2PY_CB_RETURNCOMPLEX + GETSCALARFROMPYTUPLE(capi_return,capi_i++,&return_value,#ctype#, + \"#ctype#_from_pyobj failed in converting return_value of call-back\" + \" function #name# to C #ctype#\\n\"); +#else + GETSCALARFROMPYTUPLE(capi_return,capi_i++,return_value,#ctype#, + \"#ctype#_from_pyobj failed in converting return_value of call-back\" + \" function #name# to C #ctype#\\n\"); +#endif + } else { + fprintf(stderr, + \"Warning: call-back function #name# did not provide\" + \" return value (index=%d, type=#ctype#)\\n\",capi_i); + }""", + {debugcapi: """\ +#ifdef F2PY_CB_RETURNCOMPLEX + fprintf(stderr,\"#showvalueformat#.\\n\",(return_value).r,(return_value).i); +#else + fprintf(stderr,\"#showvalueformat#.\\n\",(*return_value).r,(*return_value).i); +#endif +"""} + ], + 'return': """ +#ifdef F2PY_CB_RETURNCOMPLEX + return return_value; +#else + return; +#endif +""", + 'need': ['#ctype#_from_pyobj', {debugcapi: 'CFUNCSMESS'}, + 'string.h', 'GETSCALARFROMPYTUPLE', '#ctype#'], + '_check': iscomplexfunction + }, + {'docstrout': ' #pydocsignout#', + 'latexdocstrout': ['\\item[]{{}\\verb@#pydocsignout#@{}}', + {hasnote: '--- #note#'}], + 'docreturn': '#rname#,', + '_check': isfunction}, + {'_check': issubroutine, 'return': 'return;'} +] + +cb_arg_rules = [ + { # Doc + 'docstropt': {l_and(isoptional, isintent_nothide): ' #pydocsign#'}, + 'docstrreq': {l_and(isrequired, isintent_nothide): ' #pydocsign#'}, + 'docstrout': {isintent_out: ' #pydocsignout#'}, + 'latexdocstropt': {l_and(isoptional, isintent_nothide): ['\\item[]{{}\\verb@#pydocsign#@{}}', + {hasnote: '--- #note#'}]}, + 'latexdocstrreq': {l_and(isrequired, isintent_nothide): ['\\item[]{{}\\verb@#pydocsign#@{}}', + {hasnote: '--- #note#'}]}, + 'latexdocstrout': {isintent_out: ['\\item[]{{}\\verb@#pydocsignout#@{}}', + {l_and(hasnote, isintent_hide): '--- #note#', + l_and(hasnote, isintent_nothide): '--- See above.'}]}, + 'docsign': {l_and(isrequired, isintent_nothide): '#varname#,'}, + 'docsignopt': {l_and(isoptional, isintent_nothide): '#varname#,'}, + 'depend': '' + }, + { + 'args': { + l_and(isscalar, isintent_c): '#ctype# #varname_i#', + l_and(isscalar, l_not(isintent_c)): '#ctype# *#varname_i#_cb_capi', + isarray: '#ctype# *#varname_i#', + isstring: '#ctype# #varname_i#' + }, + 'args_nm': { + l_and(isscalar, isintent_c): '#varname_i#', + l_and(isscalar, l_not(isintent_c)): '#varname_i#_cb_capi', + isarray: '#varname_i#', + isstring: '#varname_i#' + }, + 'args_td': { + l_and(isscalar, isintent_c): '#ctype#', + l_and(isscalar, l_not(isintent_c)): '#ctype# *', + isarray: '#ctype# *', + isstring: '#ctype#' + }, + 'need': {l_or(isscalar, isarray, isstring): '#ctype#'}, + # untested with multiple args + 'strarglens': {isstring: ',int #varname_i#_cb_len'}, + 'strarglens_td': {isstring: ',int'}, # untested with multiple args + # untested with multiple args + 'strarglens_nm': {isstring: ',#varname_i#_cb_len'}, + }, + { # Scalars + 'decl': {l_not(isintent_c): ' #ctype# #varname_i#=(*#varname_i#_cb_capi);'}, + 'error': {l_and(isintent_c, isintent_out, + throw_error('intent(c,out) is forbidden for callback scalar arguments')): + ''}, + 'frompyobj': [{debugcapi: ' CFUNCSMESS("cb:Getting #varname#->");'}, + {isintent_out: + ' if (capi_j>capi_i)\n GETSCALARFROMPYTUPLE(capi_return,capi_i++,#varname_i#_cb_capi,#ctype#,"#ctype#_from_pyobj failed in converting argument #varname# of call-back function #name# to C #ctype#\\n");'}, + {l_and(debugcapi, l_and(l_not(iscomplex), isintent_c)): + ' fprintf(stderr,"#showvalueformat#.\\n",#varname_i#);'}, + {l_and(debugcapi, l_and(l_not(iscomplex), l_not( isintent_c))): + ' fprintf(stderr,"#showvalueformat#.\\n",*#varname_i#_cb_capi);'}, + {l_and(debugcapi, l_and(iscomplex, isintent_c)): + ' fprintf(stderr,"#showvalueformat#.\\n",(#varname_i#).r,(#varname_i#).i);'}, + {l_and(debugcapi, l_and(iscomplex, l_not( isintent_c))): + ' fprintf(stderr,"#showvalueformat#.\\n",(*#varname_i#_cb_capi).r,(*#varname_i#_cb_capi).i);'}, + ], + 'need': [{isintent_out: ['#ctype#_from_pyobj', 'GETSCALARFROMPYTUPLE']}, + {debugcapi: 'CFUNCSMESS'}], + '_check': isscalar + }, { + 'pyobjfrom': [{isintent_in: """\ + if (cb->nofargs>capi_i) + if (CAPI_ARGLIST_SETITEM(capi_i++,pyobj_from_#ctype#1(#varname_i#))) + goto capi_fail;"""}, + {isintent_inout: """\ + if (cb->nofargs>capi_i) + if (CAPI_ARGLIST_SETITEM(capi_i++,pyarr_from_p_#ctype#1(#varname_i#_cb_capi))) + goto capi_fail;"""}], + 'need': [{isintent_in: 'pyobj_from_#ctype#1'}, + {isintent_inout: 'pyarr_from_p_#ctype#1'}, + {iscomplex: '#ctype#'}], + '_check': l_and(isscalar, isintent_nothide), + '_optional': '' + }, { # String + 'frompyobj': [{debugcapi: ' CFUNCSMESS("cb:Getting #varname#->\\"");'}, + """ if (capi_j>capi_i) + GETSTRFROMPYTUPLE(capi_return,capi_i++,#varname_i#,#varname_i#_cb_len);""", + {debugcapi: + ' fprintf(stderr,"#showvalueformat#\\":%d:.\\n",#varname_i#,#varname_i#_cb_len);'}, + ], + 'need': ['#ctype#', 'GETSTRFROMPYTUPLE', + {debugcapi: 'CFUNCSMESS'}, 'string.h'], + '_check': l_and(isstring, isintent_out) + }, { + 'pyobjfrom': [{debugcapi: ' fprintf(stderr,"debug-capi:cb:#varname#=\\"#showvalueformat#\\":%d:\\n",#varname_i#,#varname_i#_cb_len);'}, + {isintent_in: """\ + if (cb->nofargs>capi_i) + if (CAPI_ARGLIST_SETITEM(capi_i++,pyobj_from_#ctype#1size(#varname_i#,#varname_i#_cb_len))) + goto capi_fail;"""}, + {isintent_inout: """\ + if (cb->nofargs>capi_i) { + int #varname_i#_cb_dims[] = {#varname_i#_cb_len}; + if (CAPI_ARGLIST_SETITEM(capi_i++,pyarr_from_p_#ctype#1(#varname_i#,#varname_i#_cb_dims))) + goto capi_fail; + }"""}], + 'need': [{isintent_in: 'pyobj_from_#ctype#1size'}, + {isintent_inout: 'pyarr_from_p_#ctype#1'}], + '_check': l_and(isstring, isintent_nothide), + '_optional': '' + }, + # Array ... + { + 'decl': ' npy_intp #varname_i#_Dims[#rank#] = {#rank*[-1]#};', + 'setdims': ' #cbsetdims#;', + '_check': isarray, + '_depend': '' + }, + { + 'pyobjfrom': [{debugcapi: ' fprintf(stderr,"debug-capi:cb:#varname#\\n");'}, + {isintent_c: """\ + if (cb->nofargs>capi_i) { + int itemsize_ = #atype# == NPY_STRING ? 1 : 0; + /*XXX: Hmm, what will destroy this array??? */ + PyArrayObject *tmp_arr = (PyArrayObject *)PyArray_New(&PyArray_Type,#rank#,#varname_i#_Dims,#atype#,NULL,(char*)#varname_i#,itemsize_,NPY_ARRAY_CARRAY,NULL); +""", + l_not(isintent_c): """\ + if (cb->nofargs>capi_i) { + int itemsize_ = #atype# == NPY_STRING ? 1 : 0; + /*XXX: Hmm, what will destroy this array??? */ + PyArrayObject *tmp_arr = (PyArrayObject *)PyArray_New(&PyArray_Type,#rank#,#varname_i#_Dims,#atype#,NULL,(char*)#varname_i#,itemsize_,NPY_ARRAY_FARRAY,NULL); +""", + }, + """ + if (tmp_arr==NULL) + goto capi_fail; + if (CAPI_ARGLIST_SETITEM(capi_i++,(PyObject *)tmp_arr)) + goto capi_fail; +}"""], + '_check': l_and(isarray, isintent_nothide, l_or(isintent_in, isintent_inout)), + '_optional': '', + }, { + 'frompyobj': [{debugcapi: ' CFUNCSMESS("cb:Getting #varname#->");'}, + """ if (capi_j>capi_i) { + PyArrayObject *rv_cb_arr = NULL; + if ((capi_tmp = PyTuple_GetItem(capi_return,capi_i++))==NULL) goto capi_fail; + rv_cb_arr = array_from_pyobj(#atype#,#varname_i#_Dims,#rank#,F2PY_INTENT_IN""", + {isintent_c: '|F2PY_INTENT_C'}, + """,capi_tmp); + if (rv_cb_arr == NULL) { + fprintf(stderr,\"rv_cb_arr is NULL\\n\"); + goto capi_fail; + } + MEMCOPY(#varname_i#,PyArray_DATA(rv_cb_arr),PyArray_NBYTES(rv_cb_arr)); + if (capi_tmp != (PyObject *)rv_cb_arr) { + Py_DECREF(rv_cb_arr); + } + }""", + {debugcapi: ' fprintf(stderr,"<-.\\n");'}, + ], + 'need': ['MEMCOPY', {iscomplexarray: '#ctype#'}], + '_check': l_and(isarray, isintent_out) + }, { + 'docreturn': '#varname#,', + '_check': isintent_out + } +] + +################## Build call-back module ############# +cb_map = {} + + +def buildcallbacks(m): + cb_map[m['name']] = [] + for bi in m['body']: + if bi['block'] == 'interface': + for b in bi['body']: + if b: + buildcallback(b, m['name']) + else: + errmess('warning: empty body for %s\n' % (m['name'])) + + +def buildcallback(rout, um): + from . import capi_maps + + outmess(' Constructing call-back function "cb_%s_in_%s"\n' % + (rout['name'], um)) + args, depargs = getargs(rout) + capi_maps.depargs = depargs + var = rout['vars'] + vrd = capi_maps.cb_routsign2map(rout, um) + rd = dictappend({}, vrd) + cb_map[um].append([rout['name'], rd['name']]) + for r in cb_rout_rules: + if ('_check' in r and r['_check'](rout)) or ('_check' not in r): + ar = applyrules(r, vrd, rout) + rd = dictappend(rd, ar) + savevrd = {} + for i, a in enumerate(args): + vrd = capi_maps.cb_sign2map(a, var[a], index=i) + savevrd[a] = vrd + for r in cb_arg_rules: + if '_depend' in r: + continue + if '_optional' in r and isoptional(var[a]): + continue + if ('_check' in r and r['_check'](var[a])) or ('_check' not in r): + ar = applyrules(r, vrd, var[a]) + rd = dictappend(rd, ar) + if '_break' in r: + break + for a in args: + vrd = savevrd[a] + for r in cb_arg_rules: + if '_depend' in r: + continue + if ('_optional' not in r) or ('_optional' in r and isrequired(var[a])): + continue + if ('_check' in r and r['_check'](var[a])) or ('_check' not in r): + ar = applyrules(r, vrd, var[a]) + rd = dictappend(rd, ar) + if '_break' in r: + break + for a in depargs: + vrd = savevrd[a] + for r in cb_arg_rules: + if '_depend' not in r: + continue + if '_optional' in r: + continue + if ('_check' in r and r['_check'](var[a])) or ('_check' not in r): + ar = applyrules(r, vrd, var[a]) + rd = dictappend(rd, ar) + if '_break' in r: + break + if 'args' in rd and 'optargs' in rd: + if isinstance(rd['optargs'], list): + rd['optargs'] = rd['optargs'] + [""" +#ifndef F2PY_CB_RETURNCOMPLEX +, +#endif +"""] + rd['optargs_nm'] = rd['optargs_nm'] + [""" +#ifndef F2PY_CB_RETURNCOMPLEX +, +#endif +"""] + rd['optargs_td'] = rd['optargs_td'] + [""" +#ifndef F2PY_CB_RETURNCOMPLEX +, +#endif +"""] + if isinstance(rd['docreturn'], list): + rd['docreturn'] = stripcomma( + replace('#docreturn#', {'docreturn': rd['docreturn']})) + optargs = stripcomma(replace('#docsignopt#', + {'docsignopt': rd['docsignopt']} + )) + if optargs == '': + rd['docsignature'] = stripcomma( + replace('#docsign#', {'docsign': rd['docsign']})) + else: + rd['docsignature'] = replace('#docsign#[#docsignopt#]', + {'docsign': rd['docsign'], + 'docsignopt': optargs, + }) + rd['latexdocsignature'] = rd['docsignature'].replace('_', '\\_') + rd['latexdocsignature'] = rd['latexdocsignature'].replace(',', ', ') + rd['docstrsigns'] = [] + rd['latexdocstrsigns'] = [] + for k in ['docstrreq', 'docstropt', 'docstrout', 'docstrcbs']: + if k in rd and isinstance(rd[k], list): + rd['docstrsigns'] = rd['docstrsigns'] + rd[k] + k = 'latex' + k + if k in rd and isinstance(rd[k], list): + rd['latexdocstrsigns'] = rd['latexdocstrsigns'] + rd[k][0:1] +\ + ['\\begin{description}'] + rd[k][1:] +\ + ['\\end{description}'] + if 'args' not in rd: + rd['args'] = '' + rd['args_td'] = '' + rd['args_nm'] = '' + if not (rd.get('args') or rd.get('optargs') or rd.get('strarglens')): + rd['noargs'] = 'void' + + ar = applyrules(cb_routine_rules, rd) + cfuncs.callbacks[rd['name']] = ar['body'] + if isinstance(ar['need'], str): + ar['need'] = [ar['need']] + + if 'need' in rd: + for t in cfuncs.typedefs.keys(): + if t in rd['need']: + ar['need'].append(t) + + cfuncs.typedefs_generated[rd['name'] + '_typedef'] = ar['cbtypedefs'] + ar['need'].append(rd['name'] + '_typedef') + cfuncs.needs[rd['name']] = ar['need'] + + capi_maps.lcb2_map[rd['name']] = {'maxnofargs': ar['maxnofargs'], + 'nofoptargs': ar['nofoptargs'], + 'docstr': ar['docstr'], + 'latexdocstr': ar['latexdocstr'], + 'argname': rd['argname'] + } + outmess(' %s\n' % (ar['docstrshort'])) + return +################## Build call-back function ############# diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/cfuncs.py b/.local/lib/python3.8/site-packages/numpy/f2py/cfuncs.py new file mode 100644 index 0000000..bdd27ad --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/cfuncs.py @@ -0,0 +1,1469 @@ +#!/usr/bin/env python3 +""" + +C declarations, CPP macros, and C functions for f2py2e. +Only required declarations/macros/functions will be used. + +Copyright 1999,2000 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy License. + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +$Date: 2005/05/06 11:42:34 $ +Pearu Peterson + +""" +import sys +import copy + +from . import __version__ + +f2py_version = __version__.version +errmess = sys.stderr.write + +##################### Definitions ################## + +outneeds = {'includes0': [], 'includes': [], 'typedefs': [], 'typedefs_generated': [], + 'userincludes': [], + 'cppmacros': [], 'cfuncs': [], 'callbacks': [], 'f90modhooks': [], + 'commonhooks': []} +needs = {} +includes0 = {'includes0': '/*need_includes0*/'} +includes = {'includes': '/*need_includes*/'} +userincludes = {'userincludes': '/*need_userincludes*/'} +typedefs = {'typedefs': '/*need_typedefs*/'} +typedefs_generated = {'typedefs_generated': '/*need_typedefs_generated*/'} +cppmacros = {'cppmacros': '/*need_cppmacros*/'} +cfuncs = {'cfuncs': '/*need_cfuncs*/'} +callbacks = {'callbacks': '/*need_callbacks*/'} +f90modhooks = {'f90modhooks': '/*need_f90modhooks*/', + 'initf90modhooksstatic': '/*initf90modhooksstatic*/', + 'initf90modhooksdynamic': '/*initf90modhooksdynamic*/', + } +commonhooks = {'commonhooks': '/*need_commonhooks*/', + 'initcommonhooks': '/*need_initcommonhooks*/', + } + +############ Includes ################### + +includes0['math.h'] = '#include ' +includes0['string.h'] = '#include ' +includes0['setjmp.h'] = '#include ' + +includes['Python.h'] = '#include ' +needs['arrayobject.h'] = ['Python.h'] +includes['arrayobject.h'] = '''#define PY_ARRAY_UNIQUE_SYMBOL PyArray_API +#include "arrayobject.h"''' + +includes['arrayobject.h'] = '#include "fortranobject.h"' +includes['stdarg.h'] = '#include ' + +############# Type definitions ############### + +typedefs['unsigned_char'] = 'typedef unsigned char unsigned_char;' +typedefs['unsigned_short'] = 'typedef unsigned short unsigned_short;' +typedefs['unsigned_long'] = 'typedef unsigned long unsigned_long;' +typedefs['signed_char'] = 'typedef signed char signed_char;' +typedefs['long_long'] = """\ +#ifdef _WIN32 +typedef __int64 long_long; +#else +typedef long long long_long; +typedef unsigned long long unsigned_long_long; +#endif +""" +typedefs['unsigned_long_long'] = """\ +#ifdef _WIN32 +typedef __uint64 long_long; +#else +typedef unsigned long long unsigned_long_long; +#endif +""" +typedefs['long_double'] = """\ +#ifndef _LONG_DOUBLE +typedef long double long_double; +#endif +""" +typedefs[ + 'complex_long_double'] = 'typedef struct {long double r,i;} complex_long_double;' +typedefs['complex_float'] = 'typedef struct {float r,i;} complex_float;' +typedefs['complex_double'] = 'typedef struct {double r,i;} complex_double;' +typedefs['string'] = """typedef char * string;""" + + +############### CPP macros #################### +cppmacros['CFUNCSMESS'] = """\ +#ifdef DEBUGCFUNCS +#define CFUNCSMESS(mess) fprintf(stderr,\"debug-capi:\"mess); +#define CFUNCSMESSPY(mess,obj) CFUNCSMESS(mess) \\ + PyObject_Print((PyObject *)obj,stderr,Py_PRINT_RAW);\\ + fprintf(stderr,\"\\n\"); +#else +#define CFUNCSMESS(mess) +#define CFUNCSMESSPY(mess,obj) +#endif +""" +cppmacros['F_FUNC'] = """\ +#if defined(PREPEND_FORTRAN) +#if defined(NO_APPEND_FORTRAN) +#if defined(UPPERCASE_FORTRAN) +#define F_FUNC(f,F) _##F +#else +#define F_FUNC(f,F) _##f +#endif +#else +#if defined(UPPERCASE_FORTRAN) +#define F_FUNC(f,F) _##F##_ +#else +#define F_FUNC(f,F) _##f##_ +#endif +#endif +#else +#if defined(NO_APPEND_FORTRAN) +#if defined(UPPERCASE_FORTRAN) +#define F_FUNC(f,F) F +#else +#define F_FUNC(f,F) f +#endif +#else +#if defined(UPPERCASE_FORTRAN) +#define F_FUNC(f,F) F##_ +#else +#define F_FUNC(f,F) f##_ +#endif +#endif +#endif +#if defined(UNDERSCORE_G77) +#define F_FUNC_US(f,F) F_FUNC(f##_,F##_) +#else +#define F_FUNC_US(f,F) F_FUNC(f,F) +#endif +""" +cppmacros['F_WRAPPEDFUNC'] = """\ +#if defined(PREPEND_FORTRAN) +#if defined(NO_APPEND_FORTRAN) +#if defined(UPPERCASE_FORTRAN) +#define F_WRAPPEDFUNC(f,F) _F2PYWRAP##F +#else +#define F_WRAPPEDFUNC(f,F) _f2pywrap##f +#endif +#else +#if defined(UPPERCASE_FORTRAN) +#define F_WRAPPEDFUNC(f,F) _F2PYWRAP##F##_ +#else +#define F_WRAPPEDFUNC(f,F) _f2pywrap##f##_ +#endif +#endif +#else +#if defined(NO_APPEND_FORTRAN) +#if defined(UPPERCASE_FORTRAN) +#define F_WRAPPEDFUNC(f,F) F2PYWRAP##F +#else +#define F_WRAPPEDFUNC(f,F) f2pywrap##f +#endif +#else +#if defined(UPPERCASE_FORTRAN) +#define F_WRAPPEDFUNC(f,F) F2PYWRAP##F##_ +#else +#define F_WRAPPEDFUNC(f,F) f2pywrap##f##_ +#endif +#endif +#endif +#if defined(UNDERSCORE_G77) +#define F_WRAPPEDFUNC_US(f,F) F_WRAPPEDFUNC(f##_,F##_) +#else +#define F_WRAPPEDFUNC_US(f,F) F_WRAPPEDFUNC(f,F) +#endif +""" +cppmacros['F_MODFUNC'] = """\ +#if defined(F90MOD2CCONV1) /*E.g. Compaq Fortran */ +#if defined(NO_APPEND_FORTRAN) +#define F_MODFUNCNAME(m,f) $ ## m ## $ ## f +#else +#define F_MODFUNCNAME(m,f) $ ## m ## $ ## f ## _ +#endif +#endif + +#if defined(F90MOD2CCONV2) /*E.g. IBM XL Fortran, not tested though */ +#if defined(NO_APPEND_FORTRAN) +#define F_MODFUNCNAME(m,f) __ ## m ## _MOD_ ## f +#else +#define F_MODFUNCNAME(m,f) __ ## m ## _MOD_ ## f ## _ +#endif +#endif + +#if defined(F90MOD2CCONV3) /*E.g. MIPSPro Compilers */ +#if defined(NO_APPEND_FORTRAN) +#define F_MODFUNCNAME(m,f) f ## .in. ## m +#else +#define F_MODFUNCNAME(m,f) f ## .in. ## m ## _ +#endif +#endif +/* +#if defined(UPPERCASE_FORTRAN) +#define F_MODFUNC(m,M,f,F) F_MODFUNCNAME(M,F) +#else +#define F_MODFUNC(m,M,f,F) F_MODFUNCNAME(m,f) +#endif +*/ + +#define F_MODFUNC(m,f) (*(f2pymodstruct##m##.##f)) +""" +cppmacros['SWAPUNSAFE'] = """\ +#define SWAP(a,b) (size_t)(a) = ((size_t)(a) ^ (size_t)(b));\\ + (size_t)(b) = ((size_t)(a) ^ (size_t)(b));\\ + (size_t)(a) = ((size_t)(a) ^ (size_t)(b)) +""" +cppmacros['SWAP'] = """\ +#define SWAP(a,b,t) {\\ + t *c;\\ + c = a;\\ + a = b;\\ + b = c;} +""" +# cppmacros['ISCONTIGUOUS']='#define ISCONTIGUOUS(m) (PyArray_FLAGS(m) & +# NPY_ARRAY_C_CONTIGUOUS)' +cppmacros['PRINTPYOBJERR'] = """\ +#define PRINTPYOBJERR(obj)\\ + fprintf(stderr,\"#modulename#.error is related to \");\\ + PyObject_Print((PyObject *)obj,stderr,Py_PRINT_RAW);\\ + fprintf(stderr,\"\\n\"); +""" +cppmacros['MINMAX'] = """\ +#ifndef max +#define max(a,b) ((a > b) ? (a) : (b)) +#endif +#ifndef min +#define min(a,b) ((a < b) ? (a) : (b)) +#endif +#ifndef MAX +#define MAX(a,b) ((a > b) ? (a) : (b)) +#endif +#ifndef MIN +#define MIN(a,b) ((a < b) ? (a) : (b)) +#endif +""" +needs['len..'] = ['f2py_size'] +cppmacros['len..'] = """\ +#define rank(var) var ## _Rank +#define shape(var,dim) var ## _Dims[dim] +#define old_rank(var) (PyArray_NDIM((PyArrayObject *)(capi_ ## var ## _tmp))) +#define old_shape(var,dim) PyArray_DIM(((PyArrayObject *)(capi_ ## var ## _tmp)),dim) +#define fshape(var,dim) shape(var,rank(var)-dim-1) +#define len(var) shape(var,0) +#define flen(var) fshape(var,0) +#define old_size(var) PyArray_SIZE((PyArrayObject *)(capi_ ## var ## _tmp)) +/* #define index(i) capi_i ## i */ +#define slen(var) capi_ ## var ## _len +#define size(var, ...) f2py_size((PyArrayObject *)(capi_ ## var ## _tmp), ## __VA_ARGS__, -1) +""" +needs['f2py_size'] = ['stdarg.h'] +cfuncs['f2py_size'] = """\ +static int f2py_size(PyArrayObject* var, ...) +{ + npy_int sz = 0; + npy_int dim; + npy_int rank; + va_list argp; + va_start(argp, var); + dim = va_arg(argp, npy_int); + if (dim==-1) + { + sz = PyArray_SIZE(var); + } + else + { + rank = PyArray_NDIM(var); + if (dim>=1 && dim<=rank) + sz = PyArray_DIM(var, dim-1); + else + fprintf(stderr, \"f2py_size: 2nd argument value=%d fails to satisfy 1<=value<=%d. Result will be 0.\\n\", dim, rank); + } + va_end(argp); + return sz; +} +""" + +cppmacros[ + 'pyobj_from_char1'] = '#define pyobj_from_char1(v) (PyLong_FromLong(v))' +cppmacros[ + 'pyobj_from_short1'] = '#define pyobj_from_short1(v) (PyLong_FromLong(v))' +needs['pyobj_from_int1'] = ['signed_char'] +cppmacros['pyobj_from_int1'] = '#define pyobj_from_int1(v) (PyLong_FromLong(v))' +cppmacros[ + 'pyobj_from_long1'] = '#define pyobj_from_long1(v) (PyLong_FromLong(v))' +needs['pyobj_from_long_long1'] = ['long_long'] +cppmacros['pyobj_from_long_long1'] = """\ +#ifdef HAVE_LONG_LONG +#define pyobj_from_long_long1(v) (PyLong_FromLongLong(v)) +#else +#warning HAVE_LONG_LONG is not available. Redefining pyobj_from_long_long. +#define pyobj_from_long_long1(v) (PyLong_FromLong(v)) +#endif +""" +needs['pyobj_from_long_double1'] = ['long_double'] +cppmacros[ + 'pyobj_from_long_double1'] = '#define pyobj_from_long_double1(v) (PyFloat_FromDouble(v))' +cppmacros[ + 'pyobj_from_double1'] = '#define pyobj_from_double1(v) (PyFloat_FromDouble(v))' +cppmacros[ + 'pyobj_from_float1'] = '#define pyobj_from_float1(v) (PyFloat_FromDouble(v))' +needs['pyobj_from_complex_long_double1'] = ['complex_long_double'] +cppmacros[ + 'pyobj_from_complex_long_double1'] = '#define pyobj_from_complex_long_double1(v) (PyComplex_FromDoubles(v.r,v.i))' +needs['pyobj_from_complex_double1'] = ['complex_double'] +cppmacros[ + 'pyobj_from_complex_double1'] = '#define pyobj_from_complex_double1(v) (PyComplex_FromDoubles(v.r,v.i))' +needs['pyobj_from_complex_float1'] = ['complex_float'] +cppmacros[ + 'pyobj_from_complex_float1'] = '#define pyobj_from_complex_float1(v) (PyComplex_FromDoubles(v.r,v.i))' +needs['pyobj_from_string1'] = ['string'] +cppmacros[ + 'pyobj_from_string1'] = '#define pyobj_from_string1(v) (PyUnicode_FromString((char *)v))' +needs['pyobj_from_string1size'] = ['string'] +cppmacros[ + 'pyobj_from_string1size'] = '#define pyobj_from_string1size(v,len) (PyUnicode_FromStringAndSize((char *)v, len))' +needs['TRYPYARRAYTEMPLATE'] = ['PRINTPYOBJERR'] +cppmacros['TRYPYARRAYTEMPLATE'] = """\ +/* New SciPy */ +#define TRYPYARRAYTEMPLATECHAR case NPY_STRING: *(char *)(PyArray_DATA(arr))=*v; break; +#define TRYPYARRAYTEMPLATELONG case NPY_LONG: *(long *)(PyArray_DATA(arr))=*v; break; +#define TRYPYARRAYTEMPLATEOBJECT case NPY_OBJECT: PyArray_SETITEM(arr,PyArray_DATA(arr),pyobj_from_ ## ctype ## 1(*v)); break; + +#define TRYPYARRAYTEMPLATE(ctype,typecode) \\ + PyArrayObject *arr = NULL;\\ + if (!obj) return -2;\\ + if (!PyArray_Check(obj)) return -1;\\ + if (!(arr=(PyArrayObject *)obj)) {fprintf(stderr,\"TRYPYARRAYTEMPLATE:\");PRINTPYOBJERR(obj);return 0;}\\ + if (PyArray_DESCR(arr)->type==typecode) {*(ctype *)(PyArray_DATA(arr))=*v; return 1;}\\ + switch (PyArray_TYPE(arr)) {\\ + case NPY_DOUBLE: *(npy_double *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_INT: *(npy_int *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_LONG: *(npy_long *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_FLOAT: *(npy_float *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_CDOUBLE: *(npy_double *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_CFLOAT: *(npy_float *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_BOOL: *(npy_bool *)(PyArray_DATA(arr))=(*v!=0); break;\\ + case NPY_UBYTE: *(npy_ubyte *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_BYTE: *(npy_byte *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_SHORT: *(npy_short *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_USHORT: *(npy_ushort *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_UINT: *(npy_uint *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_ULONG: *(npy_ulong *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_LONGLONG: *(npy_longlong *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_ULONGLONG: *(npy_ulonglong *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_LONGDOUBLE: *(npy_longdouble *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_CLONGDOUBLE: *(npy_longdouble *)(PyArray_DATA(arr))=*v; break;\\ + case NPY_OBJECT: PyArray_SETITEM(arr, PyArray_DATA(arr), pyobj_from_ ## ctype ## 1(*v)); break;\\ + default: return -2;\\ + };\\ + return 1 +""" + +needs['TRYCOMPLEXPYARRAYTEMPLATE'] = ['PRINTPYOBJERR'] +cppmacros['TRYCOMPLEXPYARRAYTEMPLATE'] = """\ +#define TRYCOMPLEXPYARRAYTEMPLATEOBJECT case NPY_OBJECT: PyArray_SETITEM(arr, PyArray_DATA(arr), pyobj_from_complex_ ## ctype ## 1((*v))); break; +#define TRYCOMPLEXPYARRAYTEMPLATE(ctype,typecode)\\ + PyArrayObject *arr = NULL;\\ + if (!obj) return -2;\\ + if (!PyArray_Check(obj)) return -1;\\ + if (!(arr=(PyArrayObject *)obj)) {fprintf(stderr,\"TRYCOMPLEXPYARRAYTEMPLATE:\");PRINTPYOBJERR(obj);return 0;}\\ + if (PyArray_DESCR(arr)->type==typecode) {\\ + *(ctype *)(PyArray_DATA(arr))=(*v).r;\\ + *(ctype *)(PyArray_DATA(arr)+sizeof(ctype))=(*v).i;\\ + return 1;\\ + }\\ + switch (PyArray_TYPE(arr)) {\\ + case NPY_CDOUBLE: *(npy_double *)(PyArray_DATA(arr))=(*v).r;\\ + *(npy_double *)(PyArray_DATA(arr)+sizeof(npy_double))=(*v).i;\\ + break;\\ + case NPY_CFLOAT: *(npy_float *)(PyArray_DATA(arr))=(*v).r;\\ + *(npy_float *)(PyArray_DATA(arr)+sizeof(npy_float))=(*v).i;\\ + break;\\ + case NPY_DOUBLE: *(npy_double *)(PyArray_DATA(arr))=(*v).r; break;\\ + case NPY_LONG: *(npy_long *)(PyArray_DATA(arr))=(*v).r; break;\\ + case NPY_FLOAT: *(npy_float *)(PyArray_DATA(arr))=(*v).r; break;\\ + case NPY_INT: *(npy_int *)(PyArray_DATA(arr))=(*v).r; break;\\ + case NPY_SHORT: *(npy_short *)(PyArray_DATA(arr))=(*v).r; break;\\ + case NPY_UBYTE: *(npy_ubyte *)(PyArray_DATA(arr))=(*v).r; break;\\ + case NPY_BYTE: *(npy_byte *)(PyArray_DATA(arr))=(*v).r; break;\\ + case NPY_BOOL: *(npy_bool *)(PyArray_DATA(arr))=((*v).r!=0 && (*v).i!=0); break;\\ + case NPY_USHORT: *(npy_ushort *)(PyArray_DATA(arr))=(*v).r; break;\\ + case NPY_UINT: *(npy_uint *)(PyArray_DATA(arr))=(*v).r; break;\\ + case NPY_ULONG: *(npy_ulong *)(PyArray_DATA(arr))=(*v).r; break;\\ + case NPY_LONGLONG: *(npy_longlong *)(PyArray_DATA(arr))=(*v).r; break;\\ + case NPY_ULONGLONG: *(npy_ulonglong *)(PyArray_DATA(arr))=(*v).r; break;\\ + case NPY_LONGDOUBLE: *(npy_longdouble *)(PyArray_DATA(arr))=(*v).r; break;\\ + case NPY_CLONGDOUBLE: *(npy_longdouble *)(PyArray_DATA(arr))=(*v).r;\\ + *(npy_longdouble *)(PyArray_DATA(arr)+sizeof(npy_longdouble))=(*v).i;\\ + break;\\ + case NPY_OBJECT: PyArray_SETITEM(arr, PyArray_DATA(arr), pyobj_from_complex_ ## ctype ## 1((*v))); break;\\ + default: return -2;\\ + };\\ + return -1; +""" +# cppmacros['NUMFROMARROBJ']="""\ +# define NUMFROMARROBJ(typenum,ctype) \\ +# if (PyArray_Check(obj)) arr = (PyArrayObject *)obj;\\ +# else arr = (PyArrayObject *)PyArray_ContiguousFromObject(obj,typenum,0,0);\\ +# if (arr) {\\ +# if (PyArray_TYPE(arr)==NPY_OBJECT) {\\ +# if (!ctype ## _from_pyobj(v,(PyArray_DESCR(arr)->getitem)(PyArray_DATA(arr)),\"\"))\\ +# goto capi_fail;\\ +# } else {\\ +# (PyArray_DESCR(arr)->cast[typenum])(PyArray_DATA(arr),1,(char*)v,1,1);\\ +# }\\ +# if ((PyObject *)arr != obj) { Py_DECREF(arr); }\\ +# return 1;\\ +# } +# """ +# XXX: Note that CNUMFROMARROBJ is identical with NUMFROMARROBJ +# cppmacros['CNUMFROMARROBJ']="""\ +# define CNUMFROMARROBJ(typenum,ctype) \\ +# if (PyArray_Check(obj)) arr = (PyArrayObject *)obj;\\ +# else arr = (PyArrayObject *)PyArray_ContiguousFromObject(obj,typenum,0,0);\\ +# if (arr) {\\ +# if (PyArray_TYPE(arr)==NPY_OBJECT) {\\ +# if (!ctype ## _from_pyobj(v,(PyArray_DESCR(arr)->getitem)(PyArray_DATA(arr)),\"\"))\\ +# goto capi_fail;\\ +# } else {\\ +# (PyArray_DESCR(arr)->cast[typenum])((void *)(PyArray_DATA(arr)),1,(void *)(v),1,1);\\ +# }\\ +# if ((PyObject *)arr != obj) { Py_DECREF(arr); }\\ +# return 1;\\ +# } +# """ + + +needs['GETSTRFROMPYTUPLE'] = ['STRINGCOPYN', 'PRINTPYOBJERR'] +cppmacros['GETSTRFROMPYTUPLE'] = """\ +#define GETSTRFROMPYTUPLE(tuple,index,str,len) {\\ + PyObject *rv_cb_str = PyTuple_GetItem((tuple),(index));\\ + if (rv_cb_str == NULL)\\ + goto capi_fail;\\ + if (PyBytes_Check(rv_cb_str)) {\\ + str[len-1]='\\0';\\ + STRINGCOPYN((str),PyBytes_AS_STRING((PyBytesObject*)rv_cb_str),(len));\\ + } else {\\ + PRINTPYOBJERR(rv_cb_str);\\ + PyErr_SetString(#modulename#_error,\"string object expected\");\\ + goto capi_fail;\\ + }\\ + } +""" +cppmacros['GETSCALARFROMPYTUPLE'] = """\ +#define GETSCALARFROMPYTUPLE(tuple,index,var,ctype,mess) {\\ + if ((capi_tmp = PyTuple_GetItem((tuple),(index)))==NULL) goto capi_fail;\\ + if (!(ctype ## _from_pyobj((var),capi_tmp,mess)))\\ + goto capi_fail;\\ + } +""" + +cppmacros['FAILNULL'] = """\\ +#define FAILNULL(p) do { \\ + if ((p) == NULL) { \\ + PyErr_SetString(PyExc_MemoryError, "NULL pointer found"); \\ + goto capi_fail; \\ + } \\ +} while (0) +""" +needs['MEMCOPY'] = ['string.h', 'FAILNULL'] +cppmacros['MEMCOPY'] = """\ +#define MEMCOPY(to,from,n)\\ + do { FAILNULL(to); FAILNULL(from); (void)memcpy(to,from,n); } while (0) +""" +cppmacros['STRINGMALLOC'] = """\ +#define STRINGMALLOC(str,len)\\ + if ((str = (string)malloc(len+1)) == NULL) {\\ + PyErr_SetString(PyExc_MemoryError, \"out of memory\");\\ + goto capi_fail;\\ + } else {\\ + (str)[len] = '\\0';\\ + } +""" +cppmacros['STRINGFREE'] = """\ +#define STRINGFREE(str) do {if (!(str == NULL)) free(str);} while (0) +""" +needs['STRINGPADN'] = ['string.h'] +cppmacros['STRINGPADN'] = """\ +/* +STRINGPADN replaces null values with padding values from the right. + +`to` must have size of at least N bytes. + +If the `to[N-1]` has null value, then replace it and all the +preceding, nulls with the given padding. + +STRINGPADN(to, N, PADDING, NULLVALUE) is an inverse operation. +*/ +#define STRINGPADN(to, N, NULLVALUE, PADDING) \\ + do { \\ + int _m = (N); \\ + char *_to = (to); \\ + for (_m -= 1; _m >= 0 && _to[_m] == NULLVALUE; _m--) { \\ + _to[_m] = PADDING; \\ + } \\ + } while (0) +""" +needs['STRINGCOPYN'] = ['string.h', 'FAILNULL'] +cppmacros['STRINGCOPYN'] = """\ +/* +STRINGCOPYN copies N bytes. + +`to` and `from` buffers must have sizes of at least N bytes. +*/ +#define STRINGCOPYN(to,from,N) \\ + do { \\ + int _m = (N); \\ + char *_to = (to); \\ + char *_from = (from); \\ + FAILNULL(_to); FAILNULL(_from); \\ + (void)strncpy(_to, _from, _m); \\ + } while (0) +""" +needs['STRINGCOPY'] = ['string.h', 'FAILNULL'] +cppmacros['STRINGCOPY'] = """\ +#define STRINGCOPY(to,from)\\ + do { FAILNULL(to); FAILNULL(from); (void)strcpy(to,from); } while (0) +""" +cppmacros['CHECKGENERIC'] = """\ +#define CHECKGENERIC(check,tcheck,name) \\ + if (!(check)) {\\ + PyErr_SetString(#modulename#_error,\"(\"tcheck\") failed for \"name);\\ + /*goto capi_fail;*/\\ + } else """ +cppmacros['CHECKARRAY'] = """\ +#define CHECKARRAY(check,tcheck,name) \\ + if (!(check)) {\\ + PyErr_SetString(#modulename#_error,\"(\"tcheck\") failed for \"name);\\ + /*goto capi_fail;*/\\ + } else """ +cppmacros['CHECKSTRING'] = """\ +#define CHECKSTRING(check,tcheck,name,show,var)\\ + if (!(check)) {\\ + char errstring[256];\\ + sprintf(errstring, \"%s: \"show, \"(\"tcheck\") failed for \"name, slen(var), var);\\ + PyErr_SetString(#modulename#_error, errstring);\\ + /*goto capi_fail;*/\\ + } else """ +cppmacros['CHECKSCALAR'] = """\ +#define CHECKSCALAR(check,tcheck,name,show,var)\\ + if (!(check)) {\\ + char errstring[256];\\ + sprintf(errstring, \"%s: \"show, \"(\"tcheck\") failed for \"name, var);\\ + PyErr_SetString(#modulename#_error,errstring);\\ + /*goto capi_fail;*/\\ + } else """ +# cppmacros['CHECKDIMS']="""\ +# define CHECKDIMS(dims,rank) \\ +# for (int i=0;i<(rank);i++)\\ +# if (dims[i]<0) {\\ +# fprintf(stderr,\"Unspecified array argument requires a complete dimension specification.\\n\");\\ +# goto capi_fail;\\ +# } +# """ +cppmacros[ + 'ARRSIZE'] = '#define ARRSIZE(dims,rank) (_PyArray_multiply_list(dims,rank))' +cppmacros['OLDPYNUM'] = """\ +#ifdef OLDPYNUM +#error You need to install NumPy version 0.13 or higher. See https://scipy.org/install.html +#endif +""" +cppmacros["F2PY_THREAD_LOCAL_DECL"] = """\ +#ifndef F2PY_THREAD_LOCAL_DECL +#if defined(_MSC_VER) +#define F2PY_THREAD_LOCAL_DECL __declspec(thread) +#elif defined(__MINGW32__) || defined(__MINGW64__) +#define F2PY_THREAD_LOCAL_DECL __thread +#elif defined(__STDC_VERSION__) \\ + && (__STDC_VERSION__ >= 201112L) \\ + && !defined(__STDC_NO_THREADS__) \\ + && (!defined(__GLIBC__) || __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 12)) \\ + && !defined(__OpenBSD__) +/* __STDC_NO_THREADS__ was first defined in a maintenance release of glibc 2.12, + see https://lists.gnu.org/archive/html/commit-hurd/2012-07/msg00180.html, + so `!defined(__STDC_NO_THREADS__)` may give false positive for the existence + of `threads.h` when using an older release of glibc 2.12 + See gh-19437 for details on OpenBSD */ +#include +#define F2PY_THREAD_LOCAL_DECL thread_local +#elif defined(__GNUC__) \\ + && (__GNUC__ > 4 || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 4))) +#define F2PY_THREAD_LOCAL_DECL __thread +#endif +#endif +""" +################# C functions ############### + +cfuncs['calcarrindex'] = """\ +static int calcarrindex(int *i,PyArrayObject *arr) { + int k,ii = i[0]; + for (k=1; k < PyArray_NDIM(arr); k++) + ii += (ii*(PyArray_DIM(arr,k) - 1)+i[k]); /* assuming contiguous arr */ + return ii; +}""" +cfuncs['calcarrindextr'] = """\ +static int calcarrindextr(int *i,PyArrayObject *arr) { + int k,ii = i[PyArray_NDIM(arr)-1]; + for (k=1; k < PyArray_NDIM(arr); k++) + ii += (ii*(PyArray_DIM(arr,PyArray_NDIM(arr)-k-1) - 1)+i[PyArray_NDIM(arr)-k-1]); /* assuming contiguous arr */ + return ii; +}""" +cfuncs['forcomb'] = """\ +static struct { int nd;npy_intp *d;int *i,*i_tr,tr; } forcombcache; +static int initforcomb(npy_intp *dims,int nd,int tr) { + int k; + if (dims==NULL) return 0; + if (nd<0) return 0; + forcombcache.nd = nd; + forcombcache.d = dims; + forcombcache.tr = tr; + if ((forcombcache.i = (int *)malloc(sizeof(int)*nd))==NULL) return 0; + if ((forcombcache.i_tr = (int *)malloc(sizeof(int)*nd))==NULL) return 0; + for (k=1;k PyArray_NBYTES(arr)) { + n = PyArray_NBYTES(arr); + } + STRINGCOPYN(buf, str, n); + return 1; + } +capi_fail: + PRINTPYOBJERR(obj); + PyErr_SetString(#modulename#_error, \"try_pyarr_from_string failed\"); + return 0; +} +""" +needs['string_from_pyobj'] = ['string', 'STRINGMALLOC', 'STRINGCOPYN'] +cfuncs['string_from_pyobj'] = """\ +/* + Create a new string buffer `str` of at most length `len` from a + Python string-like object `obj`. + + The string buffer has given size (len) or the size of inistr when len==-1. + + The string buffer is padded with blanks: in Fortran, trailing blanks + are insignificant contrary to C nulls. + */ +static int +string_from_pyobj(string *str, int *len, const string inistr, PyObject *obj, + const char *errmess) +{ + PyObject *tmp = NULL; + string buf = NULL; + npy_intp n = -1; +#ifdef DEBUGCFUNCS +fprintf(stderr,\"string_from_pyobj(str='%s',len=%d,inistr='%s',obj=%p)\\n\", + (char*)str, *len, (char *)inistr, obj); +#endif + if (obj == Py_None) { + n = strlen(inistr); + buf = inistr; + } + else if (PyArray_Check(obj)) { + PyArrayObject *arr = (PyArrayObject *)obj; + if (!ISCONTIGUOUS(arr)) { + PyErr_SetString(PyExc_ValueError, + \"array object is non-contiguous.\"); + goto capi_fail; + } + n = PyArray_NBYTES(arr); + buf = PyArray_DATA(arr); + n = strnlen(buf, n); + } + else { + if (PyBytes_Check(obj)) { + tmp = obj; + Py_INCREF(tmp); + } + else if (PyUnicode_Check(obj)) { + tmp = PyUnicode_AsASCIIString(obj); + } + else { + PyObject *tmp2; + tmp2 = PyObject_Str(obj); + if (tmp2) { + tmp = PyUnicode_AsASCIIString(tmp2); + Py_DECREF(tmp2); + } + else { + tmp = NULL; + } + } + if (tmp == NULL) goto capi_fail; + n = PyBytes_GET_SIZE(tmp); + buf = PyBytes_AS_STRING(tmp); + } + if (*len == -1) { + /* TODO: change the type of `len` so that we can remove this */ + if (n > NPY_MAX_INT) { + PyErr_SetString(PyExc_OverflowError, + "object too large for a 32-bit int"); + goto capi_fail; + } + *len = n; + } + else if (*len < n) { + /* discard the last (len-n) bytes of input buf */ + n = *len; + } + if (n < 0 || *len < 0 || buf == NULL) { + goto capi_fail; + } + STRINGMALLOC(*str, *len); // *str is allocated with size (*len + 1) + if (n < *len) { + /* + Pad fixed-width string with nulls. The caller will replace + nulls with blanks when the corresponding argument is not + intent(c). + */ + memset(*str + n, '\\0', *len - n); + } + STRINGCOPYN(*str, buf, n); + Py_XDECREF(tmp); + return 1; +capi_fail: + Py_XDECREF(tmp); + { + PyObject* err = PyErr_Occurred(); + if (err == NULL) { + err = #modulename#_error; + } + PyErr_SetString(err, errmess); + } + return 0; +} +""" + +needs['char_from_pyobj'] = ['int_from_pyobj'] +cfuncs['char_from_pyobj'] = """\ +static int +char_from_pyobj(char* v, PyObject *obj, const char *errmess) { + int i = 0; + if (int_from_pyobj(&i, obj, errmess)) { + *v = (char)i; + return 1; + } + return 0; +} +""" + + +needs['signed_char_from_pyobj'] = ['int_from_pyobj', 'signed_char'] +cfuncs['signed_char_from_pyobj'] = """\ +static int +signed_char_from_pyobj(signed_char* v, PyObject *obj, const char *errmess) { + int i = 0; + if (int_from_pyobj(&i, obj, errmess)) { + *v = (signed_char)i; + return 1; + } + return 0; +} +""" + + +needs['short_from_pyobj'] = ['int_from_pyobj'] +cfuncs['short_from_pyobj'] = """\ +static int +short_from_pyobj(short* v, PyObject *obj, const char *errmess) { + int i = 0; + if (int_from_pyobj(&i, obj, errmess)) { + *v = (short)i; + return 1; + } + return 0; +} +""" + + +cfuncs['int_from_pyobj'] = """\ +static int +int_from_pyobj(int* v, PyObject *obj, const char *errmess) +{ + PyObject* tmp = NULL; + + if (PyLong_Check(obj)) { + *v = Npy__PyLong_AsInt(obj); + return !(*v == -1 && PyErr_Occurred()); + } + + tmp = PyNumber_Long(obj); + if (tmp) { + *v = Npy__PyLong_AsInt(tmp); + Py_DECREF(tmp); + return !(*v == -1 && PyErr_Occurred()); + } + + if (PyComplex_Check(obj)) { + PyErr_Clear(); + tmp = PyObject_GetAttrString(obj,\"real\"); + } + else if (PyBytes_Check(obj) || PyUnicode_Check(obj)) { + /*pass*/; + } + else if (PySequence_Check(obj)) { + PyErr_Clear(); + tmp = PySequence_GetItem(obj, 0); + } + + if (tmp) { + if (int_from_pyobj(v, tmp, errmess)) { + Py_DECREF(tmp); + return 1; + } + Py_DECREF(tmp); + } + + { + PyObject* err = PyErr_Occurred(); + if (err == NULL) { + err = #modulename#_error; + } + PyErr_SetString(err, errmess); + } + return 0; +} +""" + + +cfuncs['long_from_pyobj'] = """\ +static int +long_from_pyobj(long* v, PyObject *obj, const char *errmess) { + PyObject* tmp = NULL; + + if (PyLong_Check(obj)) { + *v = PyLong_AsLong(obj); + return !(*v == -1 && PyErr_Occurred()); + } + + tmp = PyNumber_Long(obj); + if (tmp) { + *v = PyLong_AsLong(tmp); + Py_DECREF(tmp); + return !(*v == -1 && PyErr_Occurred()); + } + + if (PyComplex_Check(obj)) { + PyErr_Clear(); + tmp = PyObject_GetAttrString(obj,\"real\"); + } + else if (PyBytes_Check(obj) || PyUnicode_Check(obj)) { + /*pass*/; + } + else if (PySequence_Check(obj)) { + PyErr_Clear(); + tmp = PySequence_GetItem(obj, 0); + } + + if (tmp) { + if (long_from_pyobj(v, tmp, errmess)) { + Py_DECREF(tmp); + return 1; + } + Py_DECREF(tmp); + } + { + PyObject* err = PyErr_Occurred(); + if (err == NULL) { + err = #modulename#_error; + } + PyErr_SetString(err, errmess); + } + return 0; +} +""" + + +needs['long_long_from_pyobj'] = ['long_long'] +cfuncs['long_long_from_pyobj'] = """\ +static int +long_long_from_pyobj(long_long* v, PyObject *obj, const char *errmess) +{ + PyObject* tmp = NULL; + + if (PyLong_Check(obj)) { + *v = PyLong_AsLongLong(obj); + return !(*v == -1 && PyErr_Occurred()); + } + + tmp = PyNumber_Long(obj); + if (tmp) { + *v = PyLong_AsLongLong(tmp); + Py_DECREF(tmp); + return !(*v == -1 && PyErr_Occurred()); + } + + if (PyComplex_Check(obj)) { + PyErr_Clear(); + tmp = PyObject_GetAttrString(obj,\"real\"); + } + else if (PyBytes_Check(obj) || PyUnicode_Check(obj)) { + /*pass*/; + } + else if (PySequence_Check(obj)) { + PyErr_Clear(); + tmp = PySequence_GetItem(obj, 0); + } + + if (tmp) { + if (long_long_from_pyobj(v, tmp, errmess)) { + Py_DECREF(tmp); + return 1; + } + Py_DECREF(tmp); + } + { + PyObject* err = PyErr_Occurred(); + if (err == NULL) { + err = #modulename#_error; + } + PyErr_SetString(err,errmess); + } + return 0; +} +""" + + +needs['long_double_from_pyobj'] = ['double_from_pyobj', 'long_double'] +cfuncs['long_double_from_pyobj'] = """\ +static int +long_double_from_pyobj(long_double* v, PyObject *obj, const char *errmess) +{ + double d=0; + if (PyArray_CheckScalar(obj)){ + if PyArray_IsScalar(obj, LongDouble) { + PyArray_ScalarAsCtype(obj, v); + return 1; + } + else if (PyArray_Check(obj) && PyArray_TYPE(obj) == NPY_LONGDOUBLE) { + (*v) = *((npy_longdouble *)PyArray_DATA(obj)); + return 1; + } + } + if (double_from_pyobj(&d, obj, errmess)) { + *v = (long_double)d; + return 1; + } + return 0; +} +""" + + +cfuncs['double_from_pyobj'] = """\ +static int +double_from_pyobj(double* v, PyObject *obj, const char *errmess) +{ + PyObject* tmp = NULL; + if (PyFloat_Check(obj)) { + *v = PyFloat_AsDouble(obj); + return !(*v == -1.0 && PyErr_Occurred()); + } + + tmp = PyNumber_Float(obj); + if (tmp) { + *v = PyFloat_AsDouble(tmp); + Py_DECREF(tmp); + return !(*v == -1.0 && PyErr_Occurred()); + } + + if (PyComplex_Check(obj)) { + PyErr_Clear(); + tmp = PyObject_GetAttrString(obj,\"real\"); + } + else if (PyBytes_Check(obj) || PyUnicode_Check(obj)) { + /*pass*/; + } + else if (PySequence_Check(obj)) { + PyErr_Clear(); + tmp = PySequence_GetItem(obj, 0); + } + + if (tmp) { + if (double_from_pyobj(v,tmp,errmess)) {Py_DECREF(tmp); return 1;} + Py_DECREF(tmp); + } + { + PyObject* err = PyErr_Occurred(); + if (err==NULL) err = #modulename#_error; + PyErr_SetString(err,errmess); + } + return 0; +} +""" + + +needs['float_from_pyobj'] = ['double_from_pyobj'] +cfuncs['float_from_pyobj'] = """\ +static int +float_from_pyobj(float* v, PyObject *obj, const char *errmess) +{ + double d=0.0; + if (double_from_pyobj(&d,obj,errmess)) { + *v = (float)d; + return 1; + } + return 0; +} +""" + + +needs['complex_long_double_from_pyobj'] = ['complex_long_double', 'long_double', + 'complex_double_from_pyobj'] +cfuncs['complex_long_double_from_pyobj'] = """\ +static int +complex_long_double_from_pyobj(complex_long_double* v, PyObject *obj, const char *errmess) +{ + complex_double cd = {0.0,0.0}; + if (PyArray_CheckScalar(obj)){ + if PyArray_IsScalar(obj, CLongDouble) { + PyArray_ScalarAsCtype(obj, v); + return 1; + } + else if (PyArray_Check(obj) && PyArray_TYPE(obj)==NPY_CLONGDOUBLE) { + (*v).r = ((npy_clongdouble *)PyArray_DATA(obj))->real; + (*v).i = ((npy_clongdouble *)PyArray_DATA(obj))->imag; + return 1; + } + } + if (complex_double_from_pyobj(&cd,obj,errmess)) { + (*v).r = (long_double)cd.r; + (*v).i = (long_double)cd.i; + return 1; + } + return 0; +} +""" + + +needs['complex_double_from_pyobj'] = ['complex_double'] +cfuncs['complex_double_from_pyobj'] = """\ +static int +complex_double_from_pyobj(complex_double* v, PyObject *obj, const char *errmess) { + Py_complex c; + if (PyComplex_Check(obj)) { + c = PyComplex_AsCComplex(obj); + (*v).r = c.real; + (*v).i = c.imag; + return 1; + } + if (PyArray_IsScalar(obj, ComplexFloating)) { + if (PyArray_IsScalar(obj, CFloat)) { + npy_cfloat new; + PyArray_ScalarAsCtype(obj, &new); + (*v).r = (double)new.real; + (*v).i = (double)new.imag; + } + else if (PyArray_IsScalar(obj, CLongDouble)) { + npy_clongdouble new; + PyArray_ScalarAsCtype(obj, &new); + (*v).r = (double)new.real; + (*v).i = (double)new.imag; + } + else { /* if (PyArray_IsScalar(obj, CDouble)) */ + PyArray_ScalarAsCtype(obj, v); + } + return 1; + } + if (PyArray_CheckScalar(obj)) { /* 0-dim array or still array scalar */ + PyObject *arr; + if (PyArray_Check(obj)) { + arr = PyArray_Cast((PyArrayObject *)obj, NPY_CDOUBLE); + } + else { + arr = PyArray_FromScalar(obj, PyArray_DescrFromType(NPY_CDOUBLE)); + } + if (arr == NULL) { + return 0; + } + (*v).r = ((npy_cdouble *)PyArray_DATA(arr))->real; + (*v).i = ((npy_cdouble *)PyArray_DATA(arr))->imag; + Py_DECREF(arr); + return 1; + } + /* Python does not provide PyNumber_Complex function :-( */ + (*v).i = 0.0; + if (PyFloat_Check(obj)) { + (*v).r = PyFloat_AsDouble(obj); + return !((*v).r == -1.0 && PyErr_Occurred()); + } + if (PyLong_Check(obj)) { + (*v).r = PyLong_AsDouble(obj); + return !((*v).r == -1.0 && PyErr_Occurred()); + } + if (PySequence_Check(obj) && !(PyBytes_Check(obj) || PyUnicode_Check(obj))) { + PyObject *tmp = PySequence_GetItem(obj,0); + if (tmp) { + if (complex_double_from_pyobj(v,tmp,errmess)) { + Py_DECREF(tmp); + return 1; + } + Py_DECREF(tmp); + } + } + { + PyObject* err = PyErr_Occurred(); + if (err==NULL) + err = PyExc_TypeError; + PyErr_SetString(err,errmess); + } + return 0; +} +""" + + +needs['complex_float_from_pyobj'] = [ + 'complex_float', 'complex_double_from_pyobj'] +cfuncs['complex_float_from_pyobj'] = """\ +static int +complex_float_from_pyobj(complex_float* v,PyObject *obj,const char *errmess) +{ + complex_double cd={0.0,0.0}; + if (complex_double_from_pyobj(&cd,obj,errmess)) { + (*v).r = (float)cd.r; + (*v).i = (float)cd.i; + return 1; + } + return 0; +} +""" + + +needs['try_pyarr_from_char'] = ['pyobj_from_char1', 'TRYPYARRAYTEMPLATE'] +cfuncs[ + 'try_pyarr_from_char'] = 'static int try_pyarr_from_char(PyObject* obj,char* v) {\n TRYPYARRAYTEMPLATE(char,\'c\');\n}\n' +needs['try_pyarr_from_signed_char'] = ['TRYPYARRAYTEMPLATE', 'unsigned_char'] +cfuncs[ + 'try_pyarr_from_unsigned_char'] = 'static int try_pyarr_from_unsigned_char(PyObject* obj,unsigned_char* v) {\n TRYPYARRAYTEMPLATE(unsigned_char,\'b\');\n}\n' +needs['try_pyarr_from_signed_char'] = ['TRYPYARRAYTEMPLATE', 'signed_char'] +cfuncs[ + 'try_pyarr_from_signed_char'] = 'static int try_pyarr_from_signed_char(PyObject* obj,signed_char* v) {\n TRYPYARRAYTEMPLATE(signed_char,\'1\');\n}\n' +needs['try_pyarr_from_short'] = ['pyobj_from_short1', 'TRYPYARRAYTEMPLATE'] +cfuncs[ + 'try_pyarr_from_short'] = 'static int try_pyarr_from_short(PyObject* obj,short* v) {\n TRYPYARRAYTEMPLATE(short,\'s\');\n}\n' +needs['try_pyarr_from_int'] = ['pyobj_from_int1', 'TRYPYARRAYTEMPLATE'] +cfuncs[ + 'try_pyarr_from_int'] = 'static int try_pyarr_from_int(PyObject* obj,int* v) {\n TRYPYARRAYTEMPLATE(int,\'i\');\n}\n' +needs['try_pyarr_from_long'] = ['pyobj_from_long1', 'TRYPYARRAYTEMPLATE'] +cfuncs[ + 'try_pyarr_from_long'] = 'static int try_pyarr_from_long(PyObject* obj,long* v) {\n TRYPYARRAYTEMPLATE(long,\'l\');\n}\n' +needs['try_pyarr_from_long_long'] = [ + 'pyobj_from_long_long1', 'TRYPYARRAYTEMPLATE', 'long_long'] +cfuncs[ + 'try_pyarr_from_long_long'] = 'static int try_pyarr_from_long_long(PyObject* obj,long_long* v) {\n TRYPYARRAYTEMPLATE(long_long,\'L\');\n}\n' +needs['try_pyarr_from_float'] = ['pyobj_from_float1', 'TRYPYARRAYTEMPLATE'] +cfuncs[ + 'try_pyarr_from_float'] = 'static int try_pyarr_from_float(PyObject* obj,float* v) {\n TRYPYARRAYTEMPLATE(float,\'f\');\n}\n' +needs['try_pyarr_from_double'] = ['pyobj_from_double1', 'TRYPYARRAYTEMPLATE'] +cfuncs[ + 'try_pyarr_from_double'] = 'static int try_pyarr_from_double(PyObject* obj,double* v) {\n TRYPYARRAYTEMPLATE(double,\'d\');\n}\n' +needs['try_pyarr_from_complex_float'] = [ + 'pyobj_from_complex_float1', 'TRYCOMPLEXPYARRAYTEMPLATE', 'complex_float'] +cfuncs[ + 'try_pyarr_from_complex_float'] = 'static int try_pyarr_from_complex_float(PyObject* obj,complex_float* v) {\n TRYCOMPLEXPYARRAYTEMPLATE(float,\'F\');\n}\n' +needs['try_pyarr_from_complex_double'] = [ + 'pyobj_from_complex_double1', 'TRYCOMPLEXPYARRAYTEMPLATE', 'complex_double'] +cfuncs[ + 'try_pyarr_from_complex_double'] = 'static int try_pyarr_from_complex_double(PyObject* obj,complex_double* v) {\n TRYCOMPLEXPYARRAYTEMPLATE(double,\'D\');\n}\n' + + +needs['create_cb_arglist'] = ['CFUNCSMESS', 'PRINTPYOBJERR', 'MINMAX'] +# create the list of arguments to be used when calling back to python +cfuncs['create_cb_arglist'] = """\ +static int +create_cb_arglist(PyObject* fun, PyTupleObject* xa , const int maxnofargs, + const int nofoptargs, int *nofargs, PyTupleObject **args, + const char *errmess) +{ + PyObject *tmp = NULL; + PyObject *tmp_fun = NULL; + Py_ssize_t tot, opt, ext, siz, i, di = 0; + CFUNCSMESS(\"create_cb_arglist\\n\"); + tot=opt=ext=siz=0; + /* Get the total number of arguments */ + if (PyFunction_Check(fun)) { + tmp_fun = fun; + Py_INCREF(tmp_fun); + } + else { + di = 1; + if (PyObject_HasAttrString(fun,\"im_func\")) { + tmp_fun = PyObject_GetAttrString(fun,\"im_func\"); + } + else if (PyObject_HasAttrString(fun,\"__call__\")) { + tmp = PyObject_GetAttrString(fun,\"__call__\"); + if (PyObject_HasAttrString(tmp,\"im_func\")) + tmp_fun = PyObject_GetAttrString(tmp,\"im_func\"); + else { + tmp_fun = fun; /* built-in function */ + Py_INCREF(tmp_fun); + tot = maxnofargs; + if (PyCFunction_Check(fun)) { + /* In case the function has a co_argcount (like on PyPy) */ + di = 0; + } + if (xa != NULL) + tot += PyTuple_Size((PyObject *)xa); + } + Py_XDECREF(tmp); + } + else if (PyFortran_Check(fun) || PyFortran_Check1(fun)) { + tot = maxnofargs; + if (xa != NULL) + tot += PyTuple_Size((PyObject *)xa); + tmp_fun = fun; + Py_INCREF(tmp_fun); + } + else if (F2PyCapsule_Check(fun)) { + tot = maxnofargs; + if (xa != NULL) + ext = PyTuple_Size((PyObject *)xa); + if(ext>0) { + fprintf(stderr,\"extra arguments tuple cannot be used with CObject call-back\\n\"); + goto capi_fail; + } + tmp_fun = fun; + Py_INCREF(tmp_fun); + } + } + + if (tmp_fun == NULL) { + fprintf(stderr, + \"Call-back argument must be function|instance|instance.__call__|f2py-function \" + \"but got %s.\\n\", + ((fun == NULL) ? \"NULL\" : Py_TYPE(fun)->tp_name)); + goto capi_fail; + } + + if (PyObject_HasAttrString(tmp_fun,\"__code__\")) { + if (PyObject_HasAttrString(tmp = PyObject_GetAttrString(tmp_fun,\"__code__\"),\"co_argcount\")) { + PyObject *tmp_argcount = PyObject_GetAttrString(tmp,\"co_argcount\"); + Py_DECREF(tmp); + if (tmp_argcount == NULL) { + goto capi_fail; + } + tot = PyLong_AsSsize_t(tmp_argcount) - di; + Py_DECREF(tmp_argcount); + } + } + /* Get the number of optional arguments */ + if (PyObject_HasAttrString(tmp_fun,\"__defaults__\")) { + if (PyTuple_Check(tmp = PyObject_GetAttrString(tmp_fun,\"__defaults__\"))) + opt = PyTuple_Size(tmp); + Py_XDECREF(tmp); + } + /* Get the number of extra arguments */ + if (xa != NULL) + ext = PyTuple_Size((PyObject *)xa); + /* Calculate the size of call-backs argument list */ + siz = MIN(maxnofargs+ext,tot); + *nofargs = MAX(0,siz-ext); + +#ifdef DEBUGCFUNCS + fprintf(stderr, + \"debug-capi:create_cb_arglist:maxnofargs(-nofoptargs),\" + \"tot,opt,ext,siz,nofargs = %d(-%d), %zd, %zd, %zd, %zd, %d\\n\", + maxnofargs, nofoptargs, tot, opt, ext, siz, *nofargs); +#endif + + if (siz < tot-opt) { + fprintf(stderr, + \"create_cb_arglist: Failed to build argument list \" + \"(siz) with enough arguments (tot-opt) required by \" + \"user-supplied function (siz,tot,opt=%zd, %zd, %zd).\\n\", + siz, tot, opt); + goto capi_fail; + } + + /* Initialize argument list */ + *args = (PyTupleObject *)PyTuple_New(siz); + for (i=0;i<*nofargs;i++) { + Py_INCREF(Py_None); + PyTuple_SET_ITEM((PyObject *)(*args),i,Py_None); + } + if (xa != NULL) + for (i=(*nofargs);i 0: + if outneeds[n][0] not in needs: + out.append(outneeds[n][0]) + del outneeds[n][0] + else: + flag = 0 + for k in outneeds[n][1:]: + if k in needs[outneeds[n][0]]: + flag = 1 + break + if flag: + outneeds[n] = outneeds[n][1:] + [outneeds[n][0]] + else: + out.append(outneeds[n][0]) + del outneeds[n][0] + if saveout and (0 not in map(lambda x, y: x == y, saveout, outneeds[n])) \ + and outneeds[n] != []: + print(n, saveout) + errmess( + 'get_needs: no progress in sorting needs, probably circular dependence, skipping.\n') + out = out + saveout + break + saveout = copy.copy(outneeds[n]) + if out == []: + out = [n] + res[n] = out + return res diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/common_rules.py b/.local/lib/python3.8/site-packages/numpy/f2py/common_rules.py new file mode 100644 index 0000000..937d8bc --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/common_rules.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +""" + +Build common block mechanism for f2py2e. + +Copyright 2000 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy License + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +$Date: 2005/05/06 10:57:33 $ +Pearu Peterson + +""" +from . import __version__ +f2py_version = __version__.version + +from .auxfuncs import ( + hasbody, hascommon, hasnote, isintent_hide, outmess +) +from . import capi_maps +from . import func2subr +from .crackfortran import rmbadname + + +def findcommonblocks(block, top=1): + ret = [] + if hascommon(block): + for key, value in block['common'].items(): + vars_ = {v: block['vars'][v] for v in value} + ret.append((key, value, vars_)) + elif hasbody(block): + for b in block['body']: + ret = ret + findcommonblocks(b, 0) + if top: + tret = [] + names = [] + for t in ret: + if t[0] not in names: + names.append(t[0]) + tret.append(t) + return tret + return ret + + +def buildhooks(m): + ret = {'commonhooks': [], 'initcommonhooks': [], + 'docs': ['"COMMON blocks:\\n"']} + fwrap = [''] + + def fadd(line, s=fwrap): + s[0] = '%s\n %s' % (s[0], line) + chooks = [''] + + def cadd(line, s=chooks): + s[0] = '%s\n%s' % (s[0], line) + ihooks = [''] + + def iadd(line, s=ihooks): + s[0] = '%s\n%s' % (s[0], line) + doc = [''] + + def dadd(line, s=doc): + s[0] = '%s\n%s' % (s[0], line) + for (name, vnames, vars) in findcommonblocks(m): + lower_name = name.lower() + hnames, inames = [], [] + for n in vnames: + if isintent_hide(vars[n]): + hnames.append(n) + else: + inames.append(n) + if hnames: + outmess('\t\tConstructing COMMON block support for "%s"...\n\t\t %s\n\t\t Hidden: %s\n' % ( + name, ','.join(inames), ','.join(hnames))) + else: + outmess('\t\tConstructing COMMON block support for "%s"...\n\t\t %s\n' % ( + name, ','.join(inames))) + fadd('subroutine f2pyinit%s(setupfunc)' % name) + fadd('external setupfunc') + for n in vnames: + fadd(func2subr.var2fixfortran(vars, n)) + if name == '_BLNK_': + fadd('common %s' % (','.join(vnames))) + else: + fadd('common /%s/ %s' % (name, ','.join(vnames))) + fadd('call setupfunc(%s)' % (','.join(inames))) + fadd('end\n') + cadd('static FortranDataDef f2py_%s_def[] = {' % (name)) + idims = [] + for n in inames: + ct = capi_maps.getctype(vars[n]) + at = capi_maps.c2capi_map[ct] + dm = capi_maps.getarrdims(n, vars[n]) + if dm['dims']: + idims.append('(%s)' % (dm['dims'])) + else: + idims.append('') + dms = dm['dims'].strip() + if not dms: + dms = '-1' + cadd('\t{\"%s\",%s,{{%s}},%s},' % (n, dm['rank'], dms, at)) + cadd('\t{NULL}\n};') + inames1 = rmbadname(inames) + inames1_tps = ','.join(['char *' + s for s in inames1]) + cadd('static void f2py_setup_%s(%s) {' % (name, inames1_tps)) + cadd('\tint i_f2py=0;') + for n in inames1: + cadd('\tf2py_%s_def[i_f2py++].data = %s;' % (name, n)) + cadd('}') + if '_' in lower_name: + F_FUNC = 'F_FUNC_US' + else: + F_FUNC = 'F_FUNC' + cadd('extern void %s(f2pyinit%s,F2PYINIT%s)(void(*)(%s));' + % (F_FUNC, lower_name, name.upper(), + ','.join(['char*'] * len(inames1)))) + cadd('static void f2py_init_%s(void) {' % name) + cadd('\t%s(f2pyinit%s,F2PYINIT%s)(f2py_setup_%s);' + % (F_FUNC, lower_name, name.upper(), name)) + cadd('}\n') + iadd('\ttmp = PyFortranObject_New(f2py_%s_def,f2py_init_%s);' % (name, name)) + iadd('\tF2PyDict_SetItemString(d, \"%s\", tmp);' % name) + iadd('\tPy_DECREF(tmp);') + tname = name.replace('_', '\\_') + dadd('\\subsection{Common block \\texttt{%s}}\n' % (tname)) + dadd('\\begin{description}') + for n in inames: + dadd('\\item[]{{}\\verb@%s@{}}' % + (capi_maps.getarrdocsign(n, vars[n]))) + if hasnote(vars[n]): + note = vars[n]['note'] + if isinstance(note, list): + note = '\n'.join(note) + dadd('--- %s' % (note)) + dadd('\\end{description}') + ret['docs'].append( + '"\t/%s/ %s\\n"' % (name, ','.join(map(lambda v, d: v + d, inames, idims)))) + ret['commonhooks'] = chooks + ret['initcommonhooks'] = ihooks + ret['latexdoc'] = doc[0] + if len(ret['docs']) <= 1: + ret['docs'] = '' + return ret, fwrap[0] diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/crackfortran.py b/.local/lib/python3.8/site-packages/numpy/f2py/crackfortran.py new file mode 100644 index 0000000..824d87e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/crackfortran.py @@ -0,0 +1,3301 @@ +#!/usr/bin/env python3 +""" +crackfortran --- read fortran (77,90) code and extract declaration information. + +Copyright 1999-2004 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy License. + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +$Date: 2005/09/27 07:13:49 $ +Pearu Peterson + + +Usage of crackfortran: +====================== +Command line keys: -quiet,-verbose,-fix,-f77,-f90,-show,-h + -m ,--ignore-contains +Functions: crackfortran, crack2fortran +The following Fortran statements/constructions are supported +(or will be if needed): + block data,byte,call,character,common,complex,contains,data, + dimension,double complex,double precision,end,external,function, + implicit,integer,intent,interface,intrinsic, + logical,module,optional,parameter,private,public, + program,real,(sequence?),subroutine,type,use,virtual, + include,pythonmodule +Note: 'virtual' is mapped to 'dimension'. +Note: 'implicit integer (z) static (z)' is 'implicit static (z)' (this is minor bug). +Note: code after 'contains' will be ignored until its scope ends. +Note: 'common' statement is extended: dimensions are moved to variable definitions +Note: f2py directive: f2py is read as +Note: pythonmodule is introduced to represent Python module + +Usage: + `postlist=crackfortran(files)` + `postlist` contains declaration information read from the list of files `files`. + `crack2fortran(postlist)` returns a fortran code to be saved to pyf-file + + `postlist` has the following structure: + *** it is a list of dictionaries containing `blocks': + B = {'block','body','vars','parent_block'[,'name','prefix','args','result', + 'implicit','externals','interfaced','common','sortvars', + 'commonvars','note']} + B['block'] = 'interface' | 'function' | 'subroutine' | 'module' | + 'program' | 'block data' | 'type' | 'pythonmodule' | + 'abstract interface' + B['body'] --- list containing `subblocks' with the same structure as `blocks' + B['parent_block'] --- dictionary of a parent block: + C['body'][]['parent_block'] is C + B['vars'] --- dictionary of variable definitions + B['sortvars'] --- dictionary of variable definitions sorted by dependence (independent first) + B['name'] --- name of the block (not if B['block']=='interface') + B['prefix'] --- prefix string (only if B['block']=='function') + B['args'] --- list of argument names if B['block']== 'function' | 'subroutine' + B['result'] --- name of the return value (only if B['block']=='function') + B['implicit'] --- dictionary {'a':,'b':...} | None + B['externals'] --- list of variables being external + B['interfaced'] --- list of variables being external and defined + B['common'] --- dictionary of common blocks (list of objects) + B['commonvars'] --- list of variables used in common blocks (dimensions are moved to variable definitions) + B['from'] --- string showing the 'parents' of the current block + B['use'] --- dictionary of modules used in current block: + {:{['only':<0|1>],['map':{:,...}]}} + B['note'] --- list of LaTeX comments on the block + B['f2pyenhancements'] --- optional dictionary + {'threadsafe':'','fortranname':, + 'callstatement':|, + 'callprotoargument':, + 'usercode':|, + 'pymethoddef:' + } + B['entry'] --- dictionary {entryname:argslist,..} + B['varnames'] --- list of variable names given in the order of reading the + Fortran code, useful for derived types. + B['saved_interface'] --- a string of scanned routine signature, defines explicit interface + *** Variable definition is a dictionary + D = B['vars'][] = + {'typespec'[,'attrspec','kindselector','charselector','=','typename']} + D['typespec'] = 'byte' | 'character' | 'complex' | 'double complex' | + 'double precision' | 'integer' | 'logical' | 'real' | 'type' + D['attrspec'] --- list of attributes (e.g. 'dimension()', + 'external','intent(in|out|inout|hide|c|callback|cache|aligned4|aligned8|aligned16)', + 'optional','required', etc) + K = D['kindselector'] = {['*','kind']} (only if D['typespec'] = + 'complex' | 'integer' | 'logical' | 'real' ) + C = D['charselector'] = {['*','len','kind']} + (only if D['typespec']=='character') + D['='] --- initialization expression string + D['typename'] --- name of the type if D['typespec']=='type' + D['dimension'] --- list of dimension bounds + D['intent'] --- list of intent specifications + D['depend'] --- list of variable names on which current variable depends on + D['check'] --- list of C-expressions; if C-expr returns zero, exception is raised + D['note'] --- list of LaTeX comments on the variable + *** Meaning of kind/char selectors (few examples): + D['typespec>']*K['*'] + D['typespec'](kind=K['kind']) + character*C['*'] + character(len=C['len'],kind=C['kind']) + (see also fortran type declaration statement formats below) + +Fortran 90 type declaration statement format (F77 is subset of F90) +==================================================================== +(Main source: IBM XL Fortran 5.1 Language Reference Manual) +type declaration = [[]::] + = byte | + character[] | + complex[] | + double complex | + double precision | + integer[] | + logical[] | + real[] | + type() + = * | + ([len=][,[kind=]]) | + (kind=[,len=]) + = * | + ([kind=]) + = comma separated list of attributes. + Only the following attributes are used in + building up the interface: + external + (parameter --- affects '=' key) + optional + intent + Other attributes are ignored. + = in | out | inout + = comma separated list of dimension bounds. + = [[*][()] | [()]*] + [// | =] [,] + +In addition, the following attributes are used: check,depend,note + +TODO: + * Apply 'parameter' attribute (e.g. 'integer parameter :: i=2' 'real x(i)' + -> 'real x(2)') + The above may be solved by creating appropriate preprocessor program, for example. + +""" +import sys +import string +import fileinput +import re +import os +import copy +import platform + +from . import __version__ + +# The environment provided by auxfuncs.py is needed for some calls to eval. +# As the needed functions cannot be determined by static inspection of the +# code, it is safest to use import * pending a major refactoring of f2py. +from .auxfuncs import * +from . import symbolic + +f2py_version = __version__.version + +# Global flags: +strictf77 = 1 # Ignore `!' comments unless line[0]=='!' +sourcecodeform = 'fix' # 'fix','free' +quiet = 0 # Be verbose if 0 (Obsolete: not used any more) +verbose = 1 # Be quiet if 0, extra verbose if > 1. +tabchar = 4 * ' ' +pyffilename = '' +f77modulename = '' +skipemptyends = 0 # for old F77 programs without 'program' statement +ignorecontains = 1 +dolowercase = 1 +debug = [] + +# Global variables +beginpattern = '' +currentfilename = '' +expectbegin = 1 +f90modulevars = {} +filepositiontext = '' +gotnextfile = 1 +groupcache = None +groupcounter = 0 +grouplist = {groupcounter: []} +groupname = '' +include_paths = [] +neededmodule = -1 +onlyfuncs = [] +previous_context = None +skipblocksuntil = -1 +skipfuncs = [] +skipfunctions = [] +usermodules = [] + + +def reset_global_f2py_vars(): + global groupcounter, grouplist, neededmodule, expectbegin + global skipblocksuntil, usermodules, f90modulevars, gotnextfile + global filepositiontext, currentfilename, skipfunctions, skipfuncs + global onlyfuncs, include_paths, previous_context + global strictf77, sourcecodeform, quiet, verbose, tabchar, pyffilename + global f77modulename, skipemptyends, ignorecontains, dolowercase, debug + + # flags + strictf77 = 1 + sourcecodeform = 'fix' + quiet = 0 + verbose = 1 + tabchar = 4 * ' ' + pyffilename = '' + f77modulename = '' + skipemptyends = 0 + ignorecontains = 1 + dolowercase = 1 + debug = [] + # variables + groupcounter = 0 + grouplist = {groupcounter: []} + neededmodule = -1 + expectbegin = 1 + skipblocksuntil = -1 + usermodules = [] + f90modulevars = {} + gotnextfile = 1 + filepositiontext = '' + currentfilename = '' + skipfunctions = [] + skipfuncs = [] + onlyfuncs = [] + include_paths = [] + previous_context = None + + +def outmess(line, flag=1): + global filepositiontext + + if not verbose: + return + if not quiet: + if flag: + sys.stdout.write(filepositiontext) + sys.stdout.write(line) + +re._MAXCACHE = 50 +defaultimplicitrules = {} +for c in "abcdefghopqrstuvwxyz$_": + defaultimplicitrules[c] = {'typespec': 'real'} +for c in "ijklmn": + defaultimplicitrules[c] = {'typespec': 'integer'} +badnames = {} +invbadnames = {} +for n in ['int', 'double', 'float', 'char', 'short', 'long', 'void', 'case', 'while', + 'return', 'signed', 'unsigned', 'if', 'for', 'typedef', 'sizeof', 'union', + 'struct', 'static', 'register', 'new', 'break', 'do', 'goto', 'switch', + 'continue', 'else', 'inline', 'extern', 'delete', 'const', 'auto', + 'len', 'rank', 'shape', 'index', 'slen', 'size', '_i', + 'max', 'min', + 'flen', 'fshape', + 'string', 'complex_double', 'float_double', 'stdin', 'stderr', 'stdout', + 'type', 'default']: + badnames[n] = n + '_bn' + invbadnames[n + '_bn'] = n + + +def rmbadname1(name): + if name in badnames: + errmess('rmbadname1: Replacing "%s" with "%s".\n' % + (name, badnames[name])) + return badnames[name] + return name + + +def rmbadname(names): + return [rmbadname1(_m) for _m in names] + + +def undo_rmbadname1(name): + if name in invbadnames: + errmess('undo_rmbadname1: Replacing "%s" with "%s".\n' + % (name, invbadnames[name])) + return invbadnames[name] + return name + + +def undo_rmbadname(names): + return [undo_rmbadname1(_m) for _m in names] + + +def getextension(name): + i = name.rfind('.') + if i == -1: + return '' + if '\\' in name[i:]: + return '' + if '/' in name[i:]: + return '' + return name[i + 1:] + +is_f_file = re.compile(r'.*\.(for|ftn|f77|f)\Z', re.I).match +_has_f_header = re.compile(r'-\*-\s*fortran\s*-\*-', re.I).search +_has_f90_header = re.compile(r'-\*-\s*f90\s*-\*-', re.I).search +_has_fix_header = re.compile(r'-\*-\s*fix\s*-\*-', re.I).search +_free_f90_start = re.compile(r'[^c*]\s*[^\s\d\t]', re.I).match + + +def is_free_format(file): + """Check if file is in free format Fortran.""" + # f90 allows both fixed and free format, assuming fixed unless + # signs of free format are detected. + result = 0 + with open(file, 'r') as f: + line = f.readline() + n = 15 # the number of non-comment lines to scan for hints + if _has_f_header(line): + n = 0 + elif _has_f90_header(line): + n = 0 + result = 1 + while n > 0 and line: + if line[0] != '!' and line.strip(): + n -= 1 + if (line[0] != '\t' and _free_f90_start(line[:5])) or line[-2:-1] == '&': + result = 1 + break + line = f.readline() + return result + + +# Read fortran (77,90) code +def readfortrancode(ffile, dowithline=show, istop=1): + """ + Read fortran codes from files and + 1) Get rid of comments, line continuations, and empty lines; lower cases. + 2) Call dowithline(line) on every line. + 3) Recursively call itself when statement \"include ''\" is met. + """ + global gotnextfile, filepositiontext, currentfilename, sourcecodeform, strictf77 + global beginpattern, quiet, verbose, dolowercase, include_paths + + if not istop: + saveglobals = gotnextfile, filepositiontext, currentfilename, sourcecodeform, strictf77,\ + beginpattern, quiet, verbose, dolowercase + if ffile == []: + return + localdolowercase = dolowercase + # cont: set to True when the content of the last line read + # indicates statement continuation + cont = False + finalline = '' + ll = '' + includeline = re.compile( + r'\s*include\s*(\'|")(?P[^\'"]*)(\'|")', re.I) + cont1 = re.compile(r'(?P.*)&\s*\Z') + cont2 = re.compile(r'(\s*&|)(?P.*)') + mline_mark = re.compile(r".*?'''") + if istop: + dowithline('', -1) + ll, l1 = '', '' + spacedigits = [' '] + [str(_m) for _m in range(10)] + filepositiontext = '' + fin = fileinput.FileInput(ffile) + while True: + l = fin.readline() + if not l: + break + if fin.isfirstline(): + filepositiontext = '' + currentfilename = fin.filename() + gotnextfile = 1 + l1 = l + strictf77 = 0 + sourcecodeform = 'fix' + ext = os.path.splitext(currentfilename)[1] + if is_f_file(currentfilename) and \ + not (_has_f90_header(l) or _has_fix_header(l)): + strictf77 = 1 + elif is_free_format(currentfilename) and not _has_fix_header(l): + sourcecodeform = 'free' + if strictf77: + beginpattern = beginpattern77 + else: + beginpattern = beginpattern90 + outmess('\tReading file %s (format:%s%s)\n' + % (repr(currentfilename), sourcecodeform, + strictf77 and ',strict' or '')) + + l = l.expandtabs().replace('\xa0', ' ') + # Get rid of newline characters + while not l == '': + if l[-1] not in "\n\r\f": + break + l = l[:-1] + if not strictf77: + (l, rl) = split_by_unquoted(l, '!') + l += ' ' + if rl[:5].lower() == '!f2py': # f2py directive + l, _ = split_by_unquoted(l + 4 * ' ' + rl[5:], '!') + if l.strip() == '': # Skip empty line + if sourcecodeform == 'free': + # In free form, a statement continues in the next line + # that is not a comment line [3.3.2.4^1], lines with + # blanks are comment lines [3.3.2.3^1]. Hence, the + # line continuation flag must retain its state. + pass + else: + # In fixed form, statement continuation is determined + # by a non-blank character at the 6-th position. Empty + # line indicates a start of a new statement + # [3.3.3.3^1]. Hence, the line continuation flag must + # be reset. + cont = False + continue + if sourcecodeform == 'fix': + if l[0] in ['*', 'c', '!', 'C', '#']: + if l[1:5].lower() == 'f2py': # f2py directive + l = ' ' + l[5:] + else: # Skip comment line + cont = False + continue + elif strictf77: + if len(l) > 72: + l = l[:72] + if not (l[0] in spacedigits): + raise Exception('readfortrancode: Found non-(space,digit) char ' + 'in the first column.\n\tAre you sure that ' + 'this code is in fix form?\n\tline=%s' % repr(l)) + + if (not cont or strictf77) and (len(l) > 5 and not l[5] == ' '): + # Continuation of a previous line + ll = ll + l[6:] + finalline = '' + origfinalline = '' + else: + if not strictf77: + # F90 continuation + r = cont1.match(l) + if r: + l = r.group('line') # Continuation follows .. + if cont: + ll = ll + cont2.match(l).group('line') + finalline = '' + origfinalline = '' + else: + # clean up line beginning from possible digits. + l = ' ' + l[5:] + if localdolowercase: + finalline = ll.lower() + else: + finalline = ll + origfinalline = ll + ll = l + cont = (r is not None) + else: + # clean up line beginning from possible digits. + l = ' ' + l[5:] + if localdolowercase: + finalline = ll.lower() + else: + finalline = ll + origfinalline = ll + ll = l + + elif sourcecodeform == 'free': + if not cont and ext == '.pyf' and mline_mark.match(l): + l = l + '\n' + while True: + lc = fin.readline() + if not lc: + errmess( + 'Unexpected end of file when reading multiline\n') + break + l = l + lc + if mline_mark.match(lc): + break + l = l.rstrip() + r = cont1.match(l) + if r: + l = r.group('line') # Continuation follows .. + if cont: + ll = ll + cont2.match(l).group('line') + finalline = '' + origfinalline = '' + else: + if localdolowercase: + finalline = ll.lower() + else: + finalline = ll + origfinalline = ll + ll = l + cont = (r is not None) + else: + raise ValueError( + "Flag sourcecodeform must be either 'fix' or 'free': %s" % repr(sourcecodeform)) + filepositiontext = 'Line #%d in %s:"%s"\n\t' % ( + fin.filelineno() - 1, currentfilename, l1) + m = includeline.match(origfinalline) + if m: + fn = m.group('name') + if os.path.isfile(fn): + readfortrancode(fn, dowithline=dowithline, istop=0) + else: + include_dirs = [ + os.path.dirname(currentfilename)] + include_paths + foundfile = 0 + for inc_dir in include_dirs: + fn1 = os.path.join(inc_dir, fn) + if os.path.isfile(fn1): + foundfile = 1 + readfortrancode(fn1, dowithline=dowithline, istop=0) + break + if not foundfile: + outmess('readfortrancode: could not find include file %s in %s. Ignoring.\n' % ( + repr(fn), os.pathsep.join(include_dirs))) + else: + dowithline(finalline) + l1 = ll + if localdolowercase: + finalline = ll.lower() + else: + finalline = ll + origfinalline = ll + filepositiontext = 'Line #%d in %s:"%s"\n\t' % ( + fin.filelineno() - 1, currentfilename, l1) + m = includeline.match(origfinalline) + if m: + fn = m.group('name') + if os.path.isfile(fn): + readfortrancode(fn, dowithline=dowithline, istop=0) + else: + include_dirs = [os.path.dirname(currentfilename)] + include_paths + foundfile = 0 + for inc_dir in include_dirs: + fn1 = os.path.join(inc_dir, fn) + if os.path.isfile(fn1): + foundfile = 1 + readfortrancode(fn1, dowithline=dowithline, istop=0) + break + if not foundfile: + outmess('readfortrancode: could not find include file %s in %s. Ignoring.\n' % ( + repr(fn), os.pathsep.join(include_dirs))) + else: + dowithline(finalline) + filepositiontext = '' + fin.close() + if istop: + dowithline('', 1) + else: + gotnextfile, filepositiontext, currentfilename, sourcecodeform, strictf77,\ + beginpattern, quiet, verbose, dolowercase = saveglobals + +# Crack line +beforethisafter = r'\s*(?P%s(?=\s*(\b(%s)\b)))' + \ + r'\s*(?P(\b(%s)\b))' + \ + r'\s*(?P%s)\s*\Z' +## +fortrantypes = r'character|logical|integer|real|complex|double\s*(precision\s*(complex|)|complex)|type(?=\s*\([\w\s,=(*)]*\))|byte' +typespattern = re.compile( + beforethisafter % ('', fortrantypes, fortrantypes, '.*'), re.I), 'type' +typespattern4implicit = re.compile(beforethisafter % ( + '', fortrantypes + '|static|automatic|undefined', fortrantypes + '|static|automatic|undefined', '.*'), re.I) +# +functionpattern = re.compile(beforethisafter % ( + r'([a-z]+[\w\s(=*+-/)]*?|)', 'function', 'function', '.*'), re.I), 'begin' +subroutinepattern = re.compile(beforethisafter % ( + r'[a-z\s]*?', 'subroutine', 'subroutine', '.*'), re.I), 'begin' +# modulepattern=re.compile(beforethisafter%('[a-z\s]*?','module','module','.*'),re.I),'begin' +# +groupbegins77 = r'program|block\s*data' +beginpattern77 = re.compile( + beforethisafter % ('', groupbegins77, groupbegins77, '.*'), re.I), 'begin' +groupbegins90 = groupbegins77 + \ + r'|module(?!\s*procedure)|python\s*module|(abstract|)\s*interface|' + \ + r'type(?!\s*\()' +beginpattern90 = re.compile( + beforethisafter % ('', groupbegins90, groupbegins90, '.*'), re.I), 'begin' +groupends = (r'end|endprogram|endblockdata|endmodule|endpythonmodule|' + r'endinterface|endsubroutine|endfunction') +endpattern = re.compile( + beforethisafter % ('', groupends, groupends, r'[\w\s]*'), re.I), 'end' +endifs = r'(end\s*(if|do|where|select|while|forall|associate|block|critical|enum|team))|(module\s*procedure)' +endifpattern = re.compile( + beforethisafter % (r'[\w]*?', endifs, endifs, r'[\w\s]*'), re.I), 'endif' +# +implicitpattern = re.compile( + beforethisafter % ('', 'implicit', 'implicit', '.*'), re.I), 'implicit' +dimensionpattern = re.compile(beforethisafter % ( + '', 'dimension|virtual', 'dimension|virtual', '.*'), re.I), 'dimension' +externalpattern = re.compile( + beforethisafter % ('', 'external', 'external', '.*'), re.I), 'external' +optionalpattern = re.compile( + beforethisafter % ('', 'optional', 'optional', '.*'), re.I), 'optional' +requiredpattern = re.compile( + beforethisafter % ('', 'required', 'required', '.*'), re.I), 'required' +publicpattern = re.compile( + beforethisafter % ('', 'public', 'public', '.*'), re.I), 'public' +privatepattern = re.compile( + beforethisafter % ('', 'private', 'private', '.*'), re.I), 'private' +intrinsicpattern = re.compile( + beforethisafter % ('', 'intrinsic', 'intrinsic', '.*'), re.I), 'intrinsic' +intentpattern = re.compile(beforethisafter % ( + '', 'intent|depend|note|check', 'intent|depend|note|check', r'\s*\(.*?\).*'), re.I), 'intent' +parameterpattern = re.compile( + beforethisafter % ('', 'parameter', 'parameter', r'\s*\(.*'), re.I), 'parameter' +datapattern = re.compile( + beforethisafter % ('', 'data', 'data', '.*'), re.I), 'data' +callpattern = re.compile( + beforethisafter % ('', 'call', 'call', '.*'), re.I), 'call' +entrypattern = re.compile( + beforethisafter % ('', 'entry', 'entry', '.*'), re.I), 'entry' +callfunpattern = re.compile( + beforethisafter % ('', 'callfun', 'callfun', '.*'), re.I), 'callfun' +commonpattern = re.compile( + beforethisafter % ('', 'common', 'common', '.*'), re.I), 'common' +usepattern = re.compile( + beforethisafter % ('', 'use', 'use', '.*'), re.I), 'use' +containspattern = re.compile( + beforethisafter % ('', 'contains', 'contains', ''), re.I), 'contains' +formatpattern = re.compile( + beforethisafter % ('', 'format', 'format', '.*'), re.I), 'format' +# Non-fortran and f2py-specific statements +f2pyenhancementspattern = re.compile(beforethisafter % ('', 'threadsafe|fortranname|callstatement|callprotoargument|usercode|pymethoddef', + 'threadsafe|fortranname|callstatement|callprotoargument|usercode|pymethoddef', '.*'), re.I | re.S), 'f2pyenhancements' +multilinepattern = re.compile( + r"\s*(?P''')(?P.*?)(?P''')\s*\Z", re.S), 'multiline' +## + +def split_by_unquoted(line, characters): + """ + Splits the line into (line[:i], line[i:]), + where i is the index of first occurrence of one of the characters + not within quotes, or len(line) if no such index exists + """ + assert not (set('"\'') & set(characters)), "cannot split by unquoted quotes" + r = re.compile( + r"\A(?P({single_quoted}|{double_quoted}|{not_quoted})*)" + r"(?P{char}.*)\Z".format( + not_quoted="[^\"'{}]".format(re.escape(characters)), + char="[{}]".format(re.escape(characters)), + single_quoted=r"('([^'\\]|(\\.))*')", + double_quoted=r'("([^"\\]|(\\.))*")')) + m = r.match(line) + if m: + d = m.groupdict() + return (d["before"], d["after"]) + return (line, "") + +def _simplifyargs(argsline): + a = [] + for n in markoutercomma(argsline).split('@,@'): + for r in '(),': + n = n.replace(r, '_') + a.append(n) + return ','.join(a) + +crackline_re_1 = re.compile(r'\s*(?P\b[a-z]+\w*\b)\s*=.*', re.I) + + +def crackline(line, reset=0): + """ + reset=-1 --- initialize + reset=0 --- crack the line + reset=1 --- final check if mismatch of blocks occurred + + Cracked data is saved in grouplist[0]. + """ + global beginpattern, groupcounter, groupname, groupcache, grouplist + global filepositiontext, currentfilename, neededmodule, expectbegin + global skipblocksuntil, skipemptyends, previous_context, gotnextfile + + _, has_semicolon = split_by_unquoted(line, ";") + if has_semicolon and not (f2pyenhancementspattern[0].match(line) or + multilinepattern[0].match(line)): + # XXX: non-zero reset values need testing + assert reset == 0, repr(reset) + # split line on unquoted semicolons + line, semicolon_line = split_by_unquoted(line, ";") + while semicolon_line: + crackline(line, reset) + line, semicolon_line = split_by_unquoted(semicolon_line[1:], ";") + crackline(line, reset) + return + if reset < 0: + groupcounter = 0 + groupname = {groupcounter: ''} + groupcache = {groupcounter: {}} + grouplist = {groupcounter: []} + groupcache[groupcounter]['body'] = [] + groupcache[groupcounter]['vars'] = {} + groupcache[groupcounter]['block'] = '' + groupcache[groupcounter]['name'] = '' + neededmodule = -1 + skipblocksuntil = -1 + return + if reset > 0: + fl = 0 + if f77modulename and neededmodule == groupcounter: + fl = 2 + while groupcounter > fl: + outmess('crackline: groupcounter=%s groupname=%s\n' % + (repr(groupcounter), repr(groupname))) + outmess( + 'crackline: Mismatch of blocks encountered. Trying to fix it by assuming "end" statement.\n') + grouplist[groupcounter - 1].append(groupcache[groupcounter]) + grouplist[groupcounter - 1][-1]['body'] = grouplist[groupcounter] + del grouplist[groupcounter] + groupcounter = groupcounter - 1 + if f77modulename and neededmodule == groupcounter: + grouplist[groupcounter - 1].append(groupcache[groupcounter]) + grouplist[groupcounter - 1][-1]['body'] = grouplist[groupcounter] + del grouplist[groupcounter] + groupcounter = groupcounter - 1 # end interface + grouplist[groupcounter - 1].append(groupcache[groupcounter]) + grouplist[groupcounter - 1][-1]['body'] = grouplist[groupcounter] + del grouplist[groupcounter] + groupcounter = groupcounter - 1 # end module + neededmodule = -1 + return + if line == '': + return + flag = 0 + for pat in [dimensionpattern, externalpattern, intentpattern, optionalpattern, + requiredpattern, + parameterpattern, datapattern, publicpattern, privatepattern, + intrinsicpattern, + endifpattern, endpattern, + formatpattern, + beginpattern, functionpattern, subroutinepattern, + implicitpattern, typespattern, commonpattern, + callpattern, usepattern, containspattern, + entrypattern, + f2pyenhancementspattern, + multilinepattern + ]: + m = pat[0].match(line) + if m: + break + flag = flag + 1 + if not m: + re_1 = crackline_re_1 + if 0 <= skipblocksuntil <= groupcounter: + return + if 'externals' in groupcache[groupcounter]: + for name in groupcache[groupcounter]['externals']: + if name in invbadnames: + name = invbadnames[name] + if 'interfaced' in groupcache[groupcounter] and name in groupcache[groupcounter]['interfaced']: + continue + m1 = re.match( + r'(?P[^"]*)\b%s\b\s*@\(@(?P[^@]*)@\)@.*\Z' % name, markouterparen(line), re.I) + if m1: + m2 = re_1.match(m1.group('before')) + a = _simplifyargs(m1.group('args')) + if m2: + line = 'callfun %s(%s) result (%s)' % ( + name, a, m2.group('result')) + else: + line = 'callfun %s(%s)' % (name, a) + m = callfunpattern[0].match(line) + if not m: + outmess( + 'crackline: could not resolve function call for line=%s.\n' % repr(line)) + return + analyzeline(m, 'callfun', line) + return + if verbose > 1 or (verbose == 1 and currentfilename.lower().endswith('.pyf')): + previous_context = None + outmess('crackline:%d: No pattern for line\n' % (groupcounter)) + return + elif pat[1] == 'end': + if 0 <= skipblocksuntil < groupcounter: + groupcounter = groupcounter - 1 + if skipblocksuntil <= groupcounter: + return + if groupcounter <= 0: + raise Exception('crackline: groupcounter(=%s) is nonpositive. ' + 'Check the blocks.' + % (groupcounter)) + m1 = beginpattern[0].match((line)) + if (m1) and (not m1.group('this') == groupname[groupcounter]): + raise Exception('crackline: End group %s does not match with ' + 'previous Begin group %s\n\t%s' % + (repr(m1.group('this')), repr(groupname[groupcounter]), + filepositiontext) + ) + if skipblocksuntil == groupcounter: + skipblocksuntil = -1 + grouplist[groupcounter - 1].append(groupcache[groupcounter]) + grouplist[groupcounter - 1][-1]['body'] = grouplist[groupcounter] + del grouplist[groupcounter] + groupcounter = groupcounter - 1 + if not skipemptyends: + expectbegin = 1 + elif pat[1] == 'begin': + if 0 <= skipblocksuntil <= groupcounter: + groupcounter = groupcounter + 1 + return + gotnextfile = 0 + analyzeline(m, pat[1], line) + expectbegin = 0 + elif pat[1] == 'endif': + pass + elif pat[1] == 'contains': + if ignorecontains: + return + if 0 <= skipblocksuntil <= groupcounter: + return + skipblocksuntil = groupcounter + else: + if 0 <= skipblocksuntil <= groupcounter: + return + analyzeline(m, pat[1], line) + + +def markouterparen(line): + l = '' + f = 0 + for c in line: + if c == '(': + f = f + 1 + if f == 1: + l = l + '@(@' + continue + elif c == ')': + f = f - 1 + if f == 0: + l = l + '@)@' + continue + l = l + c + return l + + +def markoutercomma(line, comma=','): + l = '' + f = 0 + before, after = split_by_unquoted(line, comma + '()') + l += before + while after: + if (after[0] == comma) and (f == 0): + l += '@' + comma + '@' + else: + l += after[0] + if after[0] == '(': + f += 1 + elif after[0] == ')': + f -= 1 + before, after = split_by_unquoted(after[1:], comma + '()') + l += before + assert not f, repr((f, line, l)) + return l + +def unmarkouterparen(line): + r = line.replace('@(@', '(').replace('@)@', ')') + return r + + +def appenddecl(decl, decl2, force=1): + if not decl: + decl = {} + if not decl2: + return decl + if decl is decl2: + return decl + for k in list(decl2.keys()): + if k == 'typespec': + if force or k not in decl: + decl[k] = decl2[k] + elif k == 'attrspec': + for l in decl2[k]: + decl = setattrspec(decl, l, force) + elif k == 'kindselector': + decl = setkindselector(decl, decl2[k], force) + elif k == 'charselector': + decl = setcharselector(decl, decl2[k], force) + elif k in ['=', 'typename']: + if force or k not in decl: + decl[k] = decl2[k] + elif k == 'note': + pass + elif k in ['intent', 'check', 'dimension', 'optional', + 'required', 'depend']: + errmess('appenddecl: "%s" not implemented.\n' % k) + else: + raise Exception('appenddecl: Unknown variable definition key: ' + + str(k)) + return decl + +selectpattern = re.compile( + r'\s*(?P(@\(@.*?@\)@|\*[\d*]+|\*\s*@\(@.*?@\)@|))(?P.*)\Z', re.I) +nameargspattern = re.compile( + r'\s*(?P\b[\w$]+\b)\s*(@\(@\s*(?P[\w\s,]*)\s*@\)@|)\s*((result(\s*@\(@\s*(?P\b[\w$]+\b)\s*@\)@|))|(bind\s*@\(@\s*(?P.*)\s*@\)@))*\s*\Z', re.I) +callnameargspattern = re.compile( + r'\s*(?P\b[\w$]+\b)\s*@\(@\s*(?P.*)\s*@\)@\s*\Z', re.I) +real16pattern = re.compile( + r'([-+]?(?:\d+(?:\.\d*)?|\d*\.\d+))[dD]((?:[-+]?\d+)?)') +real8pattern = re.compile( + r'([-+]?((?:\d+(?:\.\d*)?|\d*\.\d+))[eE]((?:[-+]?\d+)?)|(\d+\.\d*))') + +_intentcallbackpattern = re.compile(r'intent\s*\(.*?\bcallback\b', re.I) + + +def _is_intent_callback(vdecl): + for a in vdecl.get('attrspec', []): + if _intentcallbackpattern.match(a): + return 1 + return 0 + + +def _resolvenameargspattern(line): + line = markouterparen(line) + m1 = nameargspattern.match(line) + if m1: + return m1.group('name'), m1.group('args'), m1.group('result'), m1.group('bind') + m1 = callnameargspattern.match(line) + if m1: + return m1.group('name'), m1.group('args'), None, None + return None, [], None, None + + +def analyzeline(m, case, line): + global groupcounter, groupname, groupcache, grouplist, filepositiontext + global currentfilename, f77modulename, neededinterface, neededmodule + global expectbegin, gotnextfile, previous_context + + block = m.group('this') + if case != 'multiline': + previous_context = None + if expectbegin and case not in ['begin', 'call', 'callfun', 'type'] \ + and not skipemptyends and groupcounter < 1: + newname = os.path.basename(currentfilename).split('.')[0] + outmess( + 'analyzeline: no group yet. Creating program group with name "%s".\n' % newname) + gotnextfile = 0 + groupcounter = groupcounter + 1 + groupname[groupcounter] = 'program' + groupcache[groupcounter] = {} + grouplist[groupcounter] = [] + groupcache[groupcounter]['body'] = [] + groupcache[groupcounter]['vars'] = {} + groupcache[groupcounter]['block'] = 'program' + groupcache[groupcounter]['name'] = newname + groupcache[groupcounter]['from'] = 'fromsky' + expectbegin = 0 + if case in ['begin', 'call', 'callfun']: + # Crack line => block,name,args,result + block = block.lower() + if re.match(r'block\s*data', block, re.I): + block = 'block data' + elif re.match(r'python\s*module', block, re.I): + block = 'python module' + elif re.match(r'abstract\s*interface', block, re.I): + block = 'abstract interface' + name, args, result, bind = _resolvenameargspattern(m.group('after')) + if name is None: + if block == 'block data': + name = '_BLOCK_DATA_' + else: + name = '' + if block not in ['interface', 'block data', 'abstract interface']: + outmess('analyzeline: No name/args pattern found for line.\n') + + previous_context = (block, name, groupcounter) + if args: + args = rmbadname([x.strip() + for x in markoutercomma(args).split('@,@')]) + else: + args = [] + if '' in args: + while '' in args: + args.remove('') + outmess( + 'analyzeline: argument list is malformed (missing argument).\n') + + # end of crack line => block,name,args,result + needmodule = 0 + needinterface = 0 + + if case in ['call', 'callfun']: + needinterface = 1 + if 'args' not in groupcache[groupcounter]: + return + if name not in groupcache[groupcounter]['args']: + return + for it in grouplist[groupcounter]: + if it['name'] == name: + return + if name in groupcache[groupcounter]['interfaced']: + return + block = {'call': 'subroutine', 'callfun': 'function'}[case] + if f77modulename and neededmodule == -1 and groupcounter <= 1: + neededmodule = groupcounter + 2 + needmodule = 1 + if block not in ['interface', 'abstract interface']: + needinterface = 1 + # Create new block(s) + groupcounter = groupcounter + 1 + groupcache[groupcounter] = {} + grouplist[groupcounter] = [] + if needmodule: + if verbose > 1: + outmess('analyzeline: Creating module block %s\n' % + repr(f77modulename), 0) + groupname[groupcounter] = 'module' + groupcache[groupcounter]['block'] = 'python module' + groupcache[groupcounter]['name'] = f77modulename + groupcache[groupcounter]['from'] = '' + groupcache[groupcounter]['body'] = [] + groupcache[groupcounter]['externals'] = [] + groupcache[groupcounter]['interfaced'] = [] + groupcache[groupcounter]['vars'] = {} + groupcounter = groupcounter + 1 + groupcache[groupcounter] = {} + grouplist[groupcounter] = [] + if needinterface: + if verbose > 1: + outmess('analyzeline: Creating additional interface block (groupcounter=%s).\n' % ( + groupcounter), 0) + groupname[groupcounter] = 'interface' + groupcache[groupcounter]['block'] = 'interface' + groupcache[groupcounter]['name'] = 'unknown_interface' + groupcache[groupcounter]['from'] = '%s:%s' % ( + groupcache[groupcounter - 1]['from'], groupcache[groupcounter - 1]['name']) + groupcache[groupcounter]['body'] = [] + groupcache[groupcounter]['externals'] = [] + groupcache[groupcounter]['interfaced'] = [] + groupcache[groupcounter]['vars'] = {} + groupcounter = groupcounter + 1 + groupcache[groupcounter] = {} + grouplist[groupcounter] = [] + groupname[groupcounter] = block + groupcache[groupcounter]['block'] = block + if not name: + name = 'unknown_' + block.replace(' ', '_') + groupcache[groupcounter]['prefix'] = m.group('before') + groupcache[groupcounter]['name'] = rmbadname1(name) + groupcache[groupcounter]['result'] = result + if groupcounter == 1: + groupcache[groupcounter]['from'] = currentfilename + else: + if f77modulename and groupcounter == 3: + groupcache[groupcounter]['from'] = '%s:%s' % ( + groupcache[groupcounter - 1]['from'], currentfilename) + else: + groupcache[groupcounter]['from'] = '%s:%s' % ( + groupcache[groupcounter - 1]['from'], groupcache[groupcounter - 1]['name']) + for k in list(groupcache[groupcounter].keys()): + if not groupcache[groupcounter][k]: + del groupcache[groupcounter][k] + + groupcache[groupcounter]['args'] = args + groupcache[groupcounter]['body'] = [] + groupcache[groupcounter]['externals'] = [] + groupcache[groupcounter]['interfaced'] = [] + groupcache[groupcounter]['vars'] = {} + groupcache[groupcounter]['entry'] = {} + # end of creation + if block == 'type': + groupcache[groupcounter]['varnames'] = [] + + if case in ['call', 'callfun']: # set parents variables + if name not in groupcache[groupcounter - 2]['externals']: + groupcache[groupcounter - 2]['externals'].append(name) + groupcache[groupcounter]['vars'] = copy.deepcopy( + groupcache[groupcounter - 2]['vars']) + try: + del groupcache[groupcounter]['vars'][name][ + groupcache[groupcounter]['vars'][name]['attrspec'].index('external')] + except Exception: + pass + if block in ['function', 'subroutine']: # set global attributes + try: + groupcache[groupcounter]['vars'][name] = appenddecl( + groupcache[groupcounter]['vars'][name], groupcache[groupcounter - 2]['vars']['']) + except Exception: + pass + if case == 'callfun': # return type + if result and result in groupcache[groupcounter]['vars']: + if not name == result: + groupcache[groupcounter]['vars'][name] = appenddecl( + groupcache[groupcounter]['vars'][name], groupcache[groupcounter]['vars'][result]) + # if groupcounter>1: # name is interfaced + try: + groupcache[groupcounter - 2]['interfaced'].append(name) + except Exception: + pass + if block == 'function': + t = typespattern[0].match(m.group('before') + ' ' + name) + if t: + typespec, selector, attr, edecl = cracktypespec0( + t.group('this'), t.group('after')) + updatevars(typespec, selector, attr, edecl) + + if case in ['call', 'callfun']: + grouplist[groupcounter - 1].append(groupcache[groupcounter]) + grouplist[groupcounter - 1][-1]['body'] = grouplist[groupcounter] + del grouplist[groupcounter] + groupcounter = groupcounter - 1 # end routine + grouplist[groupcounter - 1].append(groupcache[groupcounter]) + grouplist[groupcounter - 1][-1]['body'] = grouplist[groupcounter] + del grouplist[groupcounter] + groupcounter = groupcounter - 1 # end interface + + elif case == 'entry': + name, args, result, bind = _resolvenameargspattern(m.group('after')) + if name is not None: + if args: + args = rmbadname([x.strip() + for x in markoutercomma(args).split('@,@')]) + else: + args = [] + assert result is None, repr(result) + groupcache[groupcounter]['entry'][name] = args + previous_context = ('entry', name, groupcounter) + elif case == 'type': + typespec, selector, attr, edecl = cracktypespec0( + block, m.group('after')) + last_name = updatevars(typespec, selector, attr, edecl) + if last_name is not None: + previous_context = ('variable', last_name, groupcounter) + elif case in ['dimension', 'intent', 'optional', 'required', 'external', 'public', 'private', 'intrinsic']: + edecl = groupcache[groupcounter]['vars'] + ll = m.group('after').strip() + i = ll.find('::') + if i < 0 and case == 'intent': + i = markouterparen(ll).find('@)@') - 2 + ll = ll[:i + 1] + '::' + ll[i + 1:] + i = ll.find('::') + if ll[i:] == '::' and 'args' in groupcache[groupcounter]: + outmess('All arguments will have attribute %s%s\n' % + (m.group('this'), ll[:i])) + ll = ll + ','.join(groupcache[groupcounter]['args']) + if i < 0: + i = 0 + pl = '' + else: + pl = ll[:i].strip() + ll = ll[i + 2:] + ch = markoutercomma(pl).split('@,@') + if len(ch) > 1: + pl = ch[0] + outmess('analyzeline: cannot handle multiple attributes without type specification. Ignoring %r.\n' % ( + ','.join(ch[1:]))) + last_name = None + + for e in [x.strip() for x in markoutercomma(ll).split('@,@')]: + m1 = namepattern.match(e) + if not m1: + if case in ['public', 'private']: + k = '' + else: + print(m.groupdict()) + outmess('analyzeline: no name pattern found in %s statement for %s. Skipping.\n' % ( + case, repr(e))) + continue + else: + k = rmbadname1(m1.group('name')) + if k not in edecl: + edecl[k] = {} + if case == 'dimension': + ap = case + m1.group('after') + if case == 'intent': + ap = m.group('this') + pl + if _intentcallbackpattern.match(ap): + if k not in groupcache[groupcounter]['args']: + if groupcounter > 1: + if '__user__' not in groupcache[groupcounter - 2]['name']: + outmess( + 'analyzeline: missing __user__ module (could be nothing)\n') + # fixes ticket 1693 + if k != groupcache[groupcounter]['name']: + outmess('analyzeline: appending intent(callback) %s' + ' to %s arguments\n' % (k, groupcache[groupcounter]['name'])) + groupcache[groupcounter]['args'].append(k) + else: + errmess( + 'analyzeline: intent(callback) %s is ignored\n' % (k)) + else: + errmess('analyzeline: intent(callback) %s is already' + ' in argument list\n' % (k)) + if case in ['optional', 'required', 'public', 'external', 'private', 'intrinsic']: + ap = case + if 'attrspec' in edecl[k]: + edecl[k]['attrspec'].append(ap) + else: + edecl[k]['attrspec'] = [ap] + if case == 'external': + if groupcache[groupcounter]['block'] == 'program': + outmess('analyzeline: ignoring program arguments\n') + continue + if k not in groupcache[groupcounter]['args']: + continue + if 'externals' not in groupcache[groupcounter]: + groupcache[groupcounter]['externals'] = [] + groupcache[groupcounter]['externals'].append(k) + last_name = k + groupcache[groupcounter]['vars'] = edecl + if last_name is not None: + previous_context = ('variable', last_name, groupcounter) + elif case == 'parameter': + edecl = groupcache[groupcounter]['vars'] + ll = m.group('after').strip()[1:-1] + last_name = None + for e in markoutercomma(ll).split('@,@'): + try: + k, initexpr = [x.strip() for x in e.split('=')] + except Exception: + outmess( + 'analyzeline: could not extract name,expr in parameter statement "%s" of "%s"\n' % (e, ll)) + continue + params = get_parameters(edecl) + k = rmbadname1(k) + if k not in edecl: + edecl[k] = {} + if '=' in edecl[k] and (not edecl[k]['='] == initexpr): + outmess('analyzeline: Overwriting the value of parameter "%s" ("%s") with "%s".\n' % ( + k, edecl[k]['='], initexpr)) + t = determineexprtype(initexpr, params) + if t: + if t.get('typespec') == 'real': + tt = list(initexpr) + for m in real16pattern.finditer(initexpr): + tt[m.start():m.end()] = list( + initexpr[m.start():m.end()].lower().replace('d', 'e')) + initexpr = ''.join(tt) + elif t.get('typespec') == 'complex': + initexpr = initexpr[1:].lower().replace('d', 'e').\ + replace(',', '+1j*(') + try: + v = eval(initexpr, {}, params) + except (SyntaxError, NameError, TypeError) as msg: + errmess('analyzeline: Failed to evaluate %r. Ignoring: %s\n' + % (initexpr, msg)) + continue + edecl[k]['='] = repr(v) + if 'attrspec' in edecl[k]: + edecl[k]['attrspec'].append('parameter') + else: + edecl[k]['attrspec'] = ['parameter'] + last_name = k + groupcache[groupcounter]['vars'] = edecl + if last_name is not None: + previous_context = ('variable', last_name, groupcounter) + elif case == 'implicit': + if m.group('after').strip().lower() == 'none': + groupcache[groupcounter]['implicit'] = None + elif m.group('after'): + if 'implicit' in groupcache[groupcounter]: + impl = groupcache[groupcounter]['implicit'] + else: + impl = {} + if impl is None: + outmess( + 'analyzeline: Overwriting earlier "implicit none" statement.\n') + impl = {} + for e in markoutercomma(m.group('after')).split('@,@'): + decl = {} + m1 = re.match( + r'\s*(?P.*?)\s*(\(\s*(?P[a-z-, ]+)\s*\)\s*|)\Z', e, re.I) + if not m1: + outmess( + 'analyzeline: could not extract info of implicit statement part "%s"\n' % (e)) + continue + m2 = typespattern4implicit.match(m1.group('this')) + if not m2: + outmess( + 'analyzeline: could not extract types pattern of implicit statement part "%s"\n' % (e)) + continue + typespec, selector, attr, edecl = cracktypespec0( + m2.group('this'), m2.group('after')) + kindselect, charselect, typename = cracktypespec( + typespec, selector) + decl['typespec'] = typespec + decl['kindselector'] = kindselect + decl['charselector'] = charselect + decl['typename'] = typename + for k in list(decl.keys()): + if not decl[k]: + del decl[k] + for r in markoutercomma(m1.group('after')).split('@,@'): + if '-' in r: + try: + begc, endc = [x.strip() for x in r.split('-')] + except Exception: + outmess( + 'analyzeline: expected "-" instead of "%s" in range list of implicit statement\n' % r) + continue + else: + begc = endc = r.strip() + if not len(begc) == len(endc) == 1: + outmess( + 'analyzeline: expected "-" instead of "%s" in range list of implicit statement (2)\n' % r) + continue + for o in range(ord(begc), ord(endc) + 1): + impl[chr(o)] = decl + groupcache[groupcounter]['implicit'] = impl + elif case == 'data': + ll = [] + dl = '' + il = '' + f = 0 + fc = 1 + inp = 0 + for c in m.group('after'): + if not inp: + if c == "'": + fc = not fc + if c == '/' and fc: + f = f + 1 + continue + if c == '(': + inp = inp + 1 + elif c == ')': + inp = inp - 1 + if f == 0: + dl = dl + c + elif f == 1: + il = il + c + elif f == 2: + dl = dl.strip() + if dl.startswith(','): + dl = dl[1:].strip() + ll.append([dl, il]) + dl = c + il = '' + f = 0 + if f == 2: + dl = dl.strip() + if dl.startswith(','): + dl = dl[1:].strip() + ll.append([dl, il]) + vars = {} + if 'vars' in groupcache[groupcounter]: + vars = groupcache[groupcounter]['vars'] + last_name = None + for l in ll: + l = [x.strip() for x in l] + if l[0][0] == ',': + l[0] = l[0][1:] + if l[0][0] == '(': + outmess( + 'analyzeline: implied-DO list "%s" is not supported. Skipping.\n' % l[0]) + continue + i = 0 + j = 0 + llen = len(l[1]) + for v in rmbadname([x.strip() for x in markoutercomma(l[0]).split('@,@')]): + if v[0] == '(': + outmess( + 'analyzeline: implied-DO list "%s" is not supported. Skipping.\n' % v) + # XXX: subsequent init expressions may get wrong values. + # Ignoring since data statements are irrelevant for + # wrapping. + continue + fc = 0 + while (i < llen) and (fc or not l[1][i] == ','): + if l[1][i] == "'": + fc = not fc + i = i + 1 + i = i + 1 + if v not in vars: + vars[v] = {} + if '=' in vars[v] and not vars[v]['='] == l[1][j:i - 1]: + outmess('analyzeline: changing init expression of "%s" ("%s") to "%s"\n' % ( + v, vars[v]['='], l[1][j:i - 1])) + vars[v]['='] = l[1][j:i - 1] + j = i + last_name = v + groupcache[groupcounter]['vars'] = vars + if last_name is not None: + previous_context = ('variable', last_name, groupcounter) + elif case == 'common': + line = m.group('after').strip() + if not line[0] == '/': + line = '//' + line + cl = [] + f = 0 + bn = '' + ol = '' + for c in line: + if c == '/': + f = f + 1 + continue + if f >= 3: + bn = bn.strip() + if not bn: + bn = '_BLNK_' + cl.append([bn, ol]) + f = f - 2 + bn = '' + ol = '' + if f % 2: + bn = bn + c + else: + ol = ol + c + bn = bn.strip() + if not bn: + bn = '_BLNK_' + cl.append([bn, ol]) + commonkey = {} + if 'common' in groupcache[groupcounter]: + commonkey = groupcache[groupcounter]['common'] + for c in cl: + if c[0] not in commonkey: + commonkey[c[0]] = [] + for i in [x.strip() for x in markoutercomma(c[1]).split('@,@')]: + if i: + commonkey[c[0]].append(i) + groupcache[groupcounter]['common'] = commonkey + previous_context = ('common', bn, groupcounter) + elif case == 'use': + m1 = re.match( + r'\A\s*(?P\b\w+\b)\s*((,(\s*\bonly\b\s*:|(?P))\s*(?P.*))|)\s*\Z', m.group('after'), re.I) + if m1: + mm = m1.groupdict() + if 'use' not in groupcache[groupcounter]: + groupcache[groupcounter]['use'] = {} + name = m1.group('name') + groupcache[groupcounter]['use'][name] = {} + isonly = 0 + if 'list' in mm and mm['list'] is not None: + if 'notonly' in mm and mm['notonly'] is None: + isonly = 1 + groupcache[groupcounter]['use'][name]['only'] = isonly + ll = [x.strip() for x in mm['list'].split(',')] + rl = {} + for l in ll: + if '=' in l: + m2 = re.match( + r'\A\s*(?P\b\w+\b)\s*=\s*>\s*(?P\b\w+\b)\s*\Z', l, re.I) + if m2: + rl[m2.group('local').strip()] = m2.group( + 'use').strip() + else: + outmess( + 'analyzeline: Not local=>use pattern found in %s\n' % repr(l)) + else: + rl[l] = l + groupcache[groupcounter]['use'][name]['map'] = rl + else: + pass + else: + print(m.groupdict()) + outmess('analyzeline: Could not crack the use statement.\n') + elif case in ['f2pyenhancements']: + if 'f2pyenhancements' not in groupcache[groupcounter]: + groupcache[groupcounter]['f2pyenhancements'] = {} + d = groupcache[groupcounter]['f2pyenhancements'] + if m.group('this') == 'usercode' and 'usercode' in d: + if isinstance(d['usercode'], str): + d['usercode'] = [d['usercode']] + d['usercode'].append(m.group('after')) + else: + d[m.group('this')] = m.group('after') + elif case == 'multiline': + if previous_context is None: + if verbose: + outmess('analyzeline: No context for multiline block.\n') + return + gc = groupcounter + appendmultiline(groupcache[gc], + previous_context[:2], + m.group('this')) + else: + if verbose > 1: + print(m.groupdict()) + outmess('analyzeline: No code implemented for line.\n') + + +def appendmultiline(group, context_name, ml): + if 'f2pymultilines' not in group: + group['f2pymultilines'] = {} + d = group['f2pymultilines'] + if context_name not in d: + d[context_name] = [] + d[context_name].append(ml) + return + + +def cracktypespec0(typespec, ll): + selector = None + attr = None + if re.match(r'double\s*complex', typespec, re.I): + typespec = 'double complex' + elif re.match(r'double\s*precision', typespec, re.I): + typespec = 'double precision' + else: + typespec = typespec.strip().lower() + m1 = selectpattern.match(markouterparen(ll)) + if not m1: + outmess( + 'cracktypespec0: no kind/char_selector pattern found for line.\n') + return + d = m1.groupdict() + for k in list(d.keys()): + d[k] = unmarkouterparen(d[k]) + if typespec in ['complex', 'integer', 'logical', 'real', 'character', 'type']: + selector = d['this'] + ll = d['after'] + i = ll.find('::') + if i >= 0: + attr = ll[:i].strip() + ll = ll[i + 2:] + return typespec, selector, attr, ll +##### +namepattern = re.compile(r'\s*(?P\b\w+\b)\s*(?P.*)\s*\Z', re.I) +kindselector = re.compile( + r'\s*(\(\s*(kind\s*=)?\s*(?P.*)\s*\)|\*\s*(?P.*?))\s*\Z', re.I) +charselector = re.compile( + r'\s*(\((?P.*)\)|\*\s*(?P.*))\s*\Z', re.I) +lenkindpattern = re.compile( + r'\s*(kind\s*=\s*(?P.*?)\s*(@,@\s*len\s*=\s*(?P.*)|)|(len\s*=\s*|)(?P.*?)\s*(@,@\s*(kind\s*=\s*|)(?P.*)|))\s*\Z', re.I) +lenarraypattern = re.compile( + r'\s*(@\(@\s*(?!/)\s*(?P.*?)\s*@\)@\s*\*\s*(?P.*?)|(\*\s*(?P.*?)|)\s*(@\(@\s*(?!/)\s*(?P.*?)\s*@\)@|))\s*(=\s*(?P.*?)|(@\(@|)/\s*(?P.*?)\s*/(@\)@|)|)\s*\Z', re.I) + + +def removespaces(expr): + expr = expr.strip() + if len(expr) <= 1: + return expr + expr2 = expr[0] + for i in range(1, len(expr) - 1): + if (expr[i] == ' ' and + ((expr[i + 1] in "()[]{}=+-/* ") or + (expr[i - 1] in "()[]{}=+-/* "))): + continue + expr2 = expr2 + expr[i] + expr2 = expr2 + expr[-1] + return expr2 + + +def markinnerspaces(line): + """ + The function replace all spaces in the input variable line which are + surrounded with quotation marks, with the triplet "@_@". + + For instance, for the input "a 'b c'" the function returns "a 'b@_@c'" + + Parameters + ---------- + line : str + + Returns + ------- + str + + """ + fragment = '' + inside = False + current_quote = None + escaped = '' + for c in line: + if escaped == '\\' and c in ['\\', '\'', '"']: + fragment += c + escaped = c + continue + if not inside and c in ['\'', '"']: + current_quote = c + if c == current_quote: + inside = not inside + elif c == ' ' and inside: + fragment += '@_@' + continue + fragment += c + escaped = c # reset to non-backslash + return fragment + + +def updatevars(typespec, selector, attrspec, entitydecl): + global groupcache, groupcounter + + last_name = None + kindselect, charselect, typename = cracktypespec(typespec, selector) + if attrspec: + attrspec = [x.strip() for x in markoutercomma(attrspec).split('@,@')] + l = [] + c = re.compile(r'(?P[a-zA-Z]+)') + for a in attrspec: + if not a: + continue + m = c.match(a) + if m: + s = m.group('start').lower() + a = s + a[len(s):] + l.append(a) + attrspec = l + el = [x.strip() for x in markoutercomma(entitydecl).split('@,@')] + el1 = [] + for e in el: + for e1 in [x.strip() for x in markoutercomma(removespaces(markinnerspaces(e)), comma=' ').split('@ @')]: + if e1: + el1.append(e1.replace('@_@', ' ')) + for e in el1: + m = namepattern.match(e) + if not m: + outmess( + 'updatevars: no name pattern found for entity=%s. Skipping.\n' % (repr(e))) + continue + ename = rmbadname1(m.group('name')) + edecl = {} + if ename in groupcache[groupcounter]['vars']: + edecl = groupcache[groupcounter]['vars'][ename].copy() + not_has_typespec = 'typespec' not in edecl + if not_has_typespec: + edecl['typespec'] = typespec + elif typespec and (not typespec == edecl['typespec']): + outmess('updatevars: attempt to change the type of "%s" ("%s") to "%s". Ignoring.\n' % ( + ename, edecl['typespec'], typespec)) + if 'kindselector' not in edecl: + edecl['kindselector'] = copy.copy(kindselect) + elif kindselect: + for k in list(kindselect.keys()): + if k in edecl['kindselector'] and (not kindselect[k] == edecl['kindselector'][k]): + outmess('updatevars: attempt to change the kindselector "%s" of "%s" ("%s") to "%s". Ignoring.\n' % ( + k, ename, edecl['kindselector'][k], kindselect[k])) + else: + edecl['kindselector'][k] = copy.copy(kindselect[k]) + if 'charselector' not in edecl and charselect: + if not_has_typespec: + edecl['charselector'] = charselect + else: + errmess('updatevars:%s: attempt to change empty charselector to %r. Ignoring.\n' + % (ename, charselect)) + elif charselect: + for k in list(charselect.keys()): + if k in edecl['charselector'] and (not charselect[k] == edecl['charselector'][k]): + outmess('updatevars: attempt to change the charselector "%s" of "%s" ("%s") to "%s". Ignoring.\n' % ( + k, ename, edecl['charselector'][k], charselect[k])) + else: + edecl['charselector'][k] = copy.copy(charselect[k]) + if 'typename' not in edecl: + edecl['typename'] = typename + elif typename and (not edecl['typename'] == typename): + outmess('updatevars: attempt to change the typename of "%s" ("%s") to "%s". Ignoring.\n' % ( + ename, edecl['typename'], typename)) + if 'attrspec' not in edecl: + edecl['attrspec'] = copy.copy(attrspec) + elif attrspec: + for a in attrspec: + if a not in edecl['attrspec']: + edecl['attrspec'].append(a) + else: + edecl['typespec'] = copy.copy(typespec) + edecl['kindselector'] = copy.copy(kindselect) + edecl['charselector'] = copy.copy(charselect) + edecl['typename'] = typename + edecl['attrspec'] = copy.copy(attrspec) + if 'external' in (edecl.get('attrspec') or []) and e in groupcache[groupcounter]['args']: + if 'externals' not in groupcache[groupcounter]: + groupcache[groupcounter]['externals'] = [] + groupcache[groupcounter]['externals'].append(e) + if m.group('after'): + m1 = lenarraypattern.match(markouterparen(m.group('after'))) + if m1: + d1 = m1.groupdict() + for lk in ['len', 'array', 'init']: + if d1[lk + '2'] is not None: + d1[lk] = d1[lk + '2'] + del d1[lk + '2'] + for k in list(d1.keys()): + if d1[k] is not None: + d1[k] = unmarkouterparen(d1[k]) + else: + del d1[k] + if 'len' in d1 and 'array' in d1: + if d1['len'] == '': + d1['len'] = d1['array'] + del d1['array'] + else: + d1['array'] = d1['array'] + ',' + d1['len'] + del d1['len'] + errmess('updatevars: "%s %s" is mapped to "%s %s(%s)"\n' % ( + typespec, e, typespec, ename, d1['array'])) + if 'array' in d1: + dm = 'dimension(%s)' % d1['array'] + if 'attrspec' not in edecl or (not edecl['attrspec']): + edecl['attrspec'] = [dm] + else: + edecl['attrspec'].append(dm) + for dm1 in edecl['attrspec']: + if dm1[:9] == 'dimension' and dm1 != dm: + del edecl['attrspec'][-1] + errmess('updatevars:%s: attempt to change %r to %r. Ignoring.\n' + % (ename, dm1, dm)) + break + + if 'len' in d1: + if typespec in ['complex', 'integer', 'logical', 'real']: + if ('kindselector' not in edecl) or (not edecl['kindselector']): + edecl['kindselector'] = {} + edecl['kindselector']['*'] = d1['len'] + elif typespec == 'character': + if ('charselector' not in edecl) or (not edecl['charselector']): + edecl['charselector'] = {} + if 'len' in edecl['charselector']: + del edecl['charselector']['len'] + edecl['charselector']['*'] = d1['len'] + if 'init' in d1: + if '=' in edecl and (not edecl['='] == d1['init']): + outmess('updatevars: attempt to change the init expression of "%s" ("%s") to "%s". Ignoring.\n' % ( + ename, edecl['='], d1['init'])) + else: + edecl['='] = d1['init'] + else: + outmess('updatevars: could not crack entity declaration "%s". Ignoring.\n' % ( + ename + m.group('after'))) + for k in list(edecl.keys()): + if not edecl[k]: + del edecl[k] + groupcache[groupcounter]['vars'][ename] = edecl + if 'varnames' in groupcache[groupcounter]: + groupcache[groupcounter]['varnames'].append(ename) + last_name = ename + return last_name + + +def cracktypespec(typespec, selector): + kindselect = None + charselect = None + typename = None + if selector: + if typespec in ['complex', 'integer', 'logical', 'real']: + kindselect = kindselector.match(selector) + if not kindselect: + outmess( + 'cracktypespec: no kindselector pattern found for %s\n' % (repr(selector))) + return + kindselect = kindselect.groupdict() + kindselect['*'] = kindselect['kind2'] + del kindselect['kind2'] + for k in list(kindselect.keys()): + if not kindselect[k]: + del kindselect[k] + for k, i in list(kindselect.items()): + kindselect[k] = rmbadname1(i) + elif typespec == 'character': + charselect = charselector.match(selector) + if not charselect: + outmess( + 'cracktypespec: no charselector pattern found for %s\n' % (repr(selector))) + return + charselect = charselect.groupdict() + charselect['*'] = charselect['charlen'] + del charselect['charlen'] + if charselect['lenkind']: + lenkind = lenkindpattern.match( + markoutercomma(charselect['lenkind'])) + lenkind = lenkind.groupdict() + for lk in ['len', 'kind']: + if lenkind[lk + '2']: + lenkind[lk] = lenkind[lk + '2'] + charselect[lk] = lenkind[lk] + del lenkind[lk + '2'] + del charselect['lenkind'] + for k in list(charselect.keys()): + if not charselect[k]: + del charselect[k] + for k, i in list(charselect.items()): + charselect[k] = rmbadname1(i) + elif typespec == 'type': + typename = re.match(r'\s*\(\s*(?P\w+)\s*\)', selector, re.I) + if typename: + typename = typename.group('name') + else: + outmess('cracktypespec: no typename found in %s\n' % + (repr(typespec + selector))) + else: + outmess('cracktypespec: no selector used for %s\n' % + (repr(selector))) + return kindselect, charselect, typename +###### + + +def setattrspec(decl, attr, force=0): + if not decl: + decl = {} + if not attr: + return decl + if 'attrspec' not in decl: + decl['attrspec'] = [attr] + return decl + if force: + decl['attrspec'].append(attr) + if attr in decl['attrspec']: + return decl + if attr == 'static' and 'automatic' not in decl['attrspec']: + decl['attrspec'].append(attr) + elif attr == 'automatic' and 'static' not in decl['attrspec']: + decl['attrspec'].append(attr) + elif attr == 'public': + if 'private' not in decl['attrspec']: + decl['attrspec'].append(attr) + elif attr == 'private': + if 'public' not in decl['attrspec']: + decl['attrspec'].append(attr) + else: + decl['attrspec'].append(attr) + return decl + + +def setkindselector(decl, sel, force=0): + if not decl: + decl = {} + if not sel: + return decl + if 'kindselector' not in decl: + decl['kindselector'] = sel + return decl + for k in list(sel.keys()): + if force or k not in decl['kindselector']: + decl['kindselector'][k] = sel[k] + return decl + + +def setcharselector(decl, sel, force=0): + if not decl: + decl = {} + if not sel: + return decl + if 'charselector' not in decl: + decl['charselector'] = sel + return decl + for k in list(sel.keys()): + if force or k not in decl['charselector']: + decl['charselector'][k] = sel[k] + return decl + + +def getblockname(block, unknown='unknown'): + if 'name' in block: + return block['name'] + return unknown + +# post processing + + +def setmesstext(block): + global filepositiontext + + try: + filepositiontext = 'In: %s:%s\n' % (block['from'], block['name']) + except Exception: + pass + + +def get_usedict(block): + usedict = {} + if 'parent_block' in block: + usedict = get_usedict(block['parent_block']) + if 'use' in block: + usedict.update(block['use']) + return usedict + + +def get_useparameters(block, param_map=None): + global f90modulevars + + if param_map is None: + param_map = {} + usedict = get_usedict(block) + if not usedict: + return param_map + for usename, mapping in list(usedict.items()): + usename = usename.lower() + if usename not in f90modulevars: + outmess('get_useparameters: no module %s info used by %s\n' % + (usename, block.get('name'))) + continue + mvars = f90modulevars[usename] + params = get_parameters(mvars) + if not params: + continue + # XXX: apply mapping + if mapping: + errmess('get_useparameters: mapping for %s not impl.\n' % (mapping)) + for k, v in list(params.items()): + if k in param_map: + outmess('get_useparameters: overriding parameter %s with' + ' value from module %s\n' % (repr(k), repr(usename))) + param_map[k] = v + + return param_map + + +def postcrack2(block, tab='', param_map=None): + global f90modulevars + + if not f90modulevars: + return block + if isinstance(block, list): + ret = [postcrack2(g, tab=tab + '\t', param_map=param_map) + for g in block] + return ret + setmesstext(block) + outmess('%sBlock: %s\n' % (tab, block['name']), 0) + + if param_map is None: + param_map = get_useparameters(block) + + if param_map is not None and 'vars' in block: + vars = block['vars'] + for n in list(vars.keys()): + var = vars[n] + if 'kindselector' in var: + kind = var['kindselector'] + if 'kind' in kind: + val = kind['kind'] + if val in param_map: + kind['kind'] = param_map[val] + new_body = [postcrack2(b, tab=tab + '\t', param_map=param_map) + for b in block['body']] + block['body'] = new_body + + return block + + +def postcrack(block, args=None, tab=''): + """ + TODO: + function return values + determine expression types if in argument list + """ + global usermodules, onlyfunctions + + if isinstance(block, list): + gret = [] + uret = [] + for g in block: + setmesstext(g) + g = postcrack(g, tab=tab + '\t') + # sort user routines to appear first + if 'name' in g and '__user__' in g['name']: + uret.append(g) + else: + gret.append(g) + return uret + gret + setmesstext(block) + if not isinstance(block, dict) and 'block' not in block: + raise Exception('postcrack: Expected block dictionary instead of ' + + str(block)) + if 'name' in block and not block['name'] == 'unknown_interface': + outmess('%sBlock: %s\n' % (tab, block['name']), 0) + block = analyzeargs(block) + block = analyzecommon(block) + block['vars'] = analyzevars(block) + block['sortvars'] = sortvarnames(block['vars']) + if 'args' in block and block['args']: + args = block['args'] + block['body'] = analyzebody(block, args, tab=tab) + + userisdefined = [] + if 'use' in block: + useblock = block['use'] + for k in list(useblock.keys()): + if '__user__' in k: + userisdefined.append(k) + else: + useblock = {} + name = '' + if 'name' in block: + name = block['name'] + # and not userisdefined: # Build a __user__ module + if 'externals' in block and block['externals']: + interfaced = [] + if 'interfaced' in block: + interfaced = block['interfaced'] + mvars = copy.copy(block['vars']) + if name: + mname = name + '__user__routines' + else: + mname = 'unknown__user__routines' + if mname in userisdefined: + i = 1 + while '%s_%i' % (mname, i) in userisdefined: + i = i + 1 + mname = '%s_%i' % (mname, i) + interface = {'block': 'interface', 'body': [], + 'vars': {}, 'name': name + '_user_interface'} + for e in block['externals']: + if e in interfaced: + edef = [] + j = -1 + for b in block['body']: + j = j + 1 + if b['block'] == 'interface': + i = -1 + for bb in b['body']: + i = i + 1 + if 'name' in bb and bb['name'] == e: + edef = copy.copy(bb) + del b['body'][i] + break + if edef: + if not b['body']: + del block['body'][j] + del interfaced[interfaced.index(e)] + break + interface['body'].append(edef) + else: + if e in mvars and not isexternal(mvars[e]): + interface['vars'][e] = mvars[e] + if interface['vars'] or interface['body']: + block['interfaced'] = interfaced + mblock = {'block': 'python module', 'body': [ + interface], 'vars': {}, 'name': mname, 'interfaced': block['externals']} + useblock[mname] = {} + usermodules.append(mblock) + if useblock: + block['use'] = useblock + return block + + +def sortvarnames(vars): + indep = [] + dep = [] + for v in list(vars.keys()): + if 'depend' in vars[v] and vars[v]['depend']: + dep.append(v) + else: + indep.append(v) + n = len(dep) + i = 0 + while dep: # XXX: How to catch dependence cycles correctly? + v = dep[0] + fl = 0 + for w in dep[1:]: + if w in vars[v]['depend']: + fl = 1 + break + if fl: + dep = dep[1:] + [v] + i = i + 1 + if i > n: + errmess('sortvarnames: failed to compute dependencies because' + ' of cyclic dependencies between ' + + ', '.join(dep) + '\n') + indep = indep + dep + break + else: + indep.append(v) + dep = dep[1:] + n = len(dep) + i = 0 + return indep + + +def analyzecommon(block): + if not hascommon(block): + return block + commonvars = [] + for k in list(block['common'].keys()): + comvars = [] + for e in block['common'][k]: + m = re.match( + r'\A\s*\b(?P.*?)\b\s*(\((?P.*?)\)|)\s*\Z', e, re.I) + if m: + dims = [] + if m.group('dims'): + dims = [x.strip() + for x in markoutercomma(m.group('dims')).split('@,@')] + n = rmbadname1(m.group('name').strip()) + if n in block['vars']: + if 'attrspec' in block['vars'][n]: + block['vars'][n]['attrspec'].append( + 'dimension(%s)' % (','.join(dims))) + else: + block['vars'][n]['attrspec'] = [ + 'dimension(%s)' % (','.join(dims))] + else: + if dims: + block['vars'][n] = { + 'attrspec': ['dimension(%s)' % (','.join(dims))]} + else: + block['vars'][n] = {} + if n not in commonvars: + commonvars.append(n) + else: + n = e + errmess( + 'analyzecommon: failed to extract "[()]" from "%s" in common /%s/.\n' % (e, k)) + comvars.append(n) + block['common'][k] = comvars + if 'commonvars' not in block: + block['commonvars'] = commonvars + else: + block['commonvars'] = block['commonvars'] + commonvars + return block + + +def analyzebody(block, args, tab=''): + global usermodules, skipfuncs, onlyfuncs, f90modulevars + + setmesstext(block) + body = [] + for b in block['body']: + b['parent_block'] = block + if b['block'] in ['function', 'subroutine']: + if args is not None and b['name'] not in args: + continue + else: + as_ = b['args'] + if b['name'] in skipfuncs: + continue + if onlyfuncs and b['name'] not in onlyfuncs: + continue + b['saved_interface'] = crack2fortrangen( + b, '\n' + ' ' * 6, as_interface=True) + + else: + as_ = args + b = postcrack(b, as_, tab=tab + '\t') + if b['block'] in ['interface', 'abstract interface'] and not b['body']: + if 'f2pyenhancements' not in b: + continue + if b['block'].replace(' ', '') == 'pythonmodule': + usermodules.append(b) + else: + if b['block'] == 'module': + f90modulevars[b['name']] = b['vars'] + body.append(b) + return body + + +def buildimplicitrules(block): + setmesstext(block) + implicitrules = defaultimplicitrules + attrrules = {} + if 'implicit' in block: + if block['implicit'] is None: + implicitrules = None + if verbose > 1: + outmess( + 'buildimplicitrules: no implicit rules for routine %s.\n' % repr(block['name'])) + else: + for k in list(block['implicit'].keys()): + if block['implicit'][k].get('typespec') not in ['static', 'automatic']: + implicitrules[k] = block['implicit'][k] + else: + attrrules[k] = block['implicit'][k]['typespec'] + return implicitrules, attrrules + + +def myeval(e, g=None, l=None): + """ Like `eval` but returns only integers and floats """ + r = eval(e, g, l) + if type(r) in [int, float]: + return r + raise ValueError('r=%r' % (r)) + +getlincoef_re_1 = re.compile(r'\A\b\w+\b\Z', re.I) + + +def getlincoef(e, xset): # e = a*x+b ; x in xset + """ + Obtain ``a`` and ``b`` when ``e == "a*x+b"``, where ``x`` is a symbol in + xset. + + >>> getlincoef('2*x + 1', {'x'}) + (2, 1, 'x') + >>> getlincoef('3*x + x*2 + 2 + 1', {'x'}) + (5, 3, 'x') + >>> getlincoef('0', {'x'}) + (0, 0, None) + >>> getlincoef('0*x', {'x'}) + (0, 0, 'x') + >>> getlincoef('x*x', {'x'}) + (None, None, None) + + This can be tricked by sufficiently complex expressions + + >>> getlincoef('(x - 0.5)*(x - 1.5)*(x - 1)*x + 2*x + 3', {'x'}) + (2.0, 3.0, 'x') + """ + try: + c = int(myeval(e, {}, {})) + return 0, c, None + except Exception: + pass + if getlincoef_re_1.match(e): + return 1, 0, e + len_e = len(e) + for x in xset: + if len(x) > len_e: + continue + if re.search(r'\w\s*\([^)]*\b' + x + r'\b', e): + # skip function calls having x as an argument, e.g max(1, x) + continue + re_1 = re.compile(r'(?P.*?)\b' + x + r'\b(?P.*)', re.I) + m = re_1.match(e) + if m: + try: + m1 = re_1.match(e) + while m1: + ee = '%s(%s)%s' % ( + m1.group('before'), 0, m1.group('after')) + m1 = re_1.match(ee) + b = myeval(ee, {}, {}) + m1 = re_1.match(e) + while m1: + ee = '%s(%s)%s' % ( + m1.group('before'), 1, m1.group('after')) + m1 = re_1.match(ee) + a = myeval(ee, {}, {}) - b + m1 = re_1.match(e) + while m1: + ee = '%s(%s)%s' % ( + m1.group('before'), 0.5, m1.group('after')) + m1 = re_1.match(ee) + c = myeval(ee, {}, {}) + # computing another point to be sure that expression is linear + m1 = re_1.match(e) + while m1: + ee = '%s(%s)%s' % ( + m1.group('before'), 1.5, m1.group('after')) + m1 = re_1.match(ee) + c2 = myeval(ee, {}, {}) + if (a * 0.5 + b == c and a * 1.5 + b == c2): + return a, b, x + except Exception: + pass + break + return None, None, None + + +word_pattern = re.compile(r'\b[a-z][\w$]*\b', re.I) + + +def _get_depend_dict(name, vars, deps): + if name in vars: + words = vars[name].get('depend', []) + + if '=' in vars[name] and not isstring(vars[name]): + for word in word_pattern.findall(vars[name]['=']): + # The word_pattern may return values that are not + # only variables, they can be string content for instance + if word not in words and word in vars and word != name: + words.append(word) + for word in words[:]: + for w in deps.get(word, []) \ + or _get_depend_dict(word, vars, deps): + if w not in words: + words.append(w) + else: + outmess('_get_depend_dict: no dependence info for %s\n' % (repr(name))) + words = [] + deps[name] = words + return words + + +def _calc_depend_dict(vars): + names = list(vars.keys()) + depend_dict = {} + for n in names: + _get_depend_dict(n, vars, depend_dict) + return depend_dict + + +def get_sorted_names(vars): + """ + """ + depend_dict = _calc_depend_dict(vars) + names = [] + for name in list(depend_dict.keys()): + if not depend_dict[name]: + names.append(name) + del depend_dict[name] + while depend_dict: + for name, lst in list(depend_dict.items()): + new_lst = [n for n in lst if n in depend_dict] + if not new_lst: + names.append(name) + del depend_dict[name] + else: + depend_dict[name] = new_lst + return [name for name in names if name in vars] + + +def _kind_func(string): + # XXX: return something sensible. + if string[0] in "'\"": + string = string[1:-1] + if real16pattern.match(string): + return 8 + elif real8pattern.match(string): + return 4 + return 'kind(' + string + ')' + + +def _selected_int_kind_func(r): + # XXX: This should be processor dependent + m = 10 ** r + if m <= 2 ** 8: + return 1 + if m <= 2 ** 16: + return 2 + if m <= 2 ** 32: + return 4 + if m <= 2 ** 63: + return 8 + if m <= 2 ** 128: + return 16 + return -1 + + +def _selected_real_kind_func(p, r=0, radix=0): + # XXX: This should be processor dependent + # This is only good for 0 <= p <= 20 + if p < 7: + return 4 + if p < 16: + return 8 + machine = platform.machine().lower() + if machine.startswith(('aarch64', 'power', 'ppc', 'riscv', 's390x', 'sparc')): + if p <= 20: + return 16 + else: + if p < 19: + return 10 + elif p <= 20: + return 16 + return -1 + + +def get_parameters(vars, global_params={}): + params = copy.copy(global_params) + g_params = copy.copy(global_params) + for name, func in [('kind', _kind_func), + ('selected_int_kind', _selected_int_kind_func), + ('selected_real_kind', _selected_real_kind_func), ]: + if name not in g_params: + g_params[name] = func + param_names = [] + for n in get_sorted_names(vars): + if 'attrspec' in vars[n] and 'parameter' in vars[n]['attrspec']: + param_names.append(n) + kind_re = re.compile(r'\bkind\s*\(\s*(?P.*)\s*\)', re.I) + selected_int_kind_re = re.compile( + r'\bselected_int_kind\s*\(\s*(?P.*)\s*\)', re.I) + selected_kind_re = re.compile( + r'\bselected_(int|real)_kind\s*\(\s*(?P.*)\s*\)', re.I) + for n in param_names: + if '=' in vars[n]: + v = vars[n]['='] + if islogical(vars[n]): + v = v.lower() + for repl in [ + ('.false.', 'False'), + ('.true.', 'True'), + # TODO: test .eq., .neq., etc replacements. + ]: + v = v.replace(*repl) + v = kind_re.sub(r'kind("\1")', v) + v = selected_int_kind_re.sub(r'selected_int_kind(\1)', v) + + # We need to act according to the data. + # The easy case is if the data has a kind-specifier, + # then we may easily remove those specifiers. + # However, it may be that the user uses other specifiers...(!) + is_replaced = False + if 'kindselector' in vars[n]: + if 'kind' in vars[n]['kindselector']: + orig_v_len = len(v) + v = v.replace('_' + vars[n]['kindselector']['kind'], '') + # Again, this will be true if even a single specifier + # has been replaced, see comment above. + is_replaced = len(v) < orig_v_len + + if not is_replaced: + if not selected_kind_re.match(v): + v_ = v.split('_') + # In case there are additive parameters + if len(v_) > 1: + v = ''.join(v_[:-1]).lower().replace(v_[-1].lower(), '') + + # Currently this will not work for complex numbers. + # There is missing code for extracting a complex number, + # which may be defined in either of these: + # a) (Re, Im) + # b) cmplx(Re, Im) + # c) dcmplx(Re, Im) + # d) cmplx(Re, Im, ) + + if isdouble(vars[n]): + tt = list(v) + for m in real16pattern.finditer(v): + tt[m.start():m.end()] = list( + v[m.start():m.end()].lower().replace('d', 'e')) + v = ''.join(tt) + + elif iscomplex(vars[n]): + outmess(f'get_parameters[TODO]: ' + f'implement evaluation of complex expression {v}\n') + + try: + params[n] = eval(v, g_params, params) + except Exception as msg: + params[n] = v + outmess('get_parameters: got "%s" on %s\n' % (msg, repr(v))) + if isstring(vars[n]) and isinstance(params[n], int): + params[n] = chr(params[n]) + nl = n.lower() + if nl != n: + params[nl] = params[n] + else: + print(vars[n]) + outmess( + 'get_parameters:parameter %s does not have value?!\n' % (repr(n))) + return params + + +def _eval_length(length, params): + if length in ['(:)', '(*)', '*']: + return '(*)' + return _eval_scalar(length, params) + +_is_kind_number = re.compile(r'\d+_').match + + +def _eval_scalar(value, params): + if _is_kind_number(value): + value = value.split('_')[0] + try: + value = eval(value, {}, params) + value = (repr if isinstance(value, str) else str)(value) + except (NameError, SyntaxError, TypeError): + return value + except Exception as msg: + errmess('"%s" in evaluating %r ' + '(available names: %s)\n' + % (msg, value, list(params.keys()))) + return value + + +def analyzevars(block): + global f90modulevars + + setmesstext(block) + implicitrules, attrrules = buildimplicitrules(block) + vars = copy.copy(block['vars']) + if block['block'] == 'function' and block['name'] not in vars: + vars[block['name']] = {} + if '' in block['vars']: + del vars[''] + if 'attrspec' in block['vars']['']: + gen = block['vars']['']['attrspec'] + for n in list(vars.keys()): + for k in ['public', 'private']: + if k in gen: + vars[n] = setattrspec(vars[n], k) + svars = [] + args = block['args'] + for a in args: + try: + vars[a] + svars.append(a) + except KeyError: + pass + for n in list(vars.keys()): + if n not in args: + svars.append(n) + + params = get_parameters(vars, get_useparameters(block)) + + dep_matches = {} + name_match = re.compile(r'[A-Za-z][\w$]*').match + for v in list(vars.keys()): + m = name_match(v) + if m: + n = v[m.start():m.end()] + try: + dep_matches[n] + except KeyError: + dep_matches[n] = re.compile(r'.*\b%s\b' % (v), re.I).match + for n in svars: + if n[0] in list(attrrules.keys()): + vars[n] = setattrspec(vars[n], attrrules[n[0]]) + if 'typespec' not in vars[n]: + if not('attrspec' in vars[n] and 'external' in vars[n]['attrspec']): + if implicitrules: + ln0 = n[0].lower() + for k in list(implicitrules[ln0].keys()): + if k == 'typespec' and implicitrules[ln0][k] == 'undefined': + continue + if k not in vars[n]: + vars[n][k] = implicitrules[ln0][k] + elif k == 'attrspec': + for l in implicitrules[ln0][k]: + vars[n] = setattrspec(vars[n], l) + elif n in block['args']: + outmess('analyzevars: typespec of variable %s is not defined in routine %s.\n' % ( + repr(n), block['name'])) + + if 'charselector' in vars[n]: + if 'len' in vars[n]['charselector']: + l = vars[n]['charselector']['len'] + try: + l = str(eval(l, {}, params)) + except Exception: + pass + vars[n]['charselector']['len'] = l + + if 'kindselector' in vars[n]: + if 'kind' in vars[n]['kindselector']: + l = vars[n]['kindselector']['kind'] + try: + l = str(eval(l, {}, params)) + except Exception: + pass + vars[n]['kindselector']['kind'] = l + + dimension_exprs = {} + if 'attrspec' in vars[n]: + attr = vars[n]['attrspec'] + attr.reverse() + vars[n]['attrspec'] = [] + dim, intent, depend, check, note = None, None, None, None, None + for a in attr: + if a[:9] == 'dimension': + dim = (a[9:].strip())[1:-1] + elif a[:6] == 'intent': + intent = (a[6:].strip())[1:-1] + elif a[:6] == 'depend': + depend = (a[6:].strip())[1:-1] + elif a[:5] == 'check': + check = (a[5:].strip())[1:-1] + elif a[:4] == 'note': + note = (a[4:].strip())[1:-1] + else: + vars[n] = setattrspec(vars[n], a) + if intent: + if 'intent' not in vars[n]: + vars[n]['intent'] = [] + for c in [x.strip() for x in markoutercomma(intent).split('@,@')]: + # Remove spaces so that 'in out' becomes 'inout' + tmp = c.replace(' ', '') + if tmp not in vars[n]['intent']: + vars[n]['intent'].append(tmp) + intent = None + if note: + note = note.replace('\\n\\n', '\n\n') + note = note.replace('\\n ', '\n') + if 'note' not in vars[n]: + vars[n]['note'] = [note] + else: + vars[n]['note'].append(note) + note = None + if depend is not None: + if 'depend' not in vars[n]: + vars[n]['depend'] = [] + for c in rmbadname([x.strip() for x in markoutercomma(depend).split('@,@')]): + if c not in vars[n]['depend']: + vars[n]['depend'].append(c) + depend = None + if check is not None: + if 'check' not in vars[n]: + vars[n]['check'] = [] + for c in [x.strip() for x in markoutercomma(check).split('@,@')]: + if c not in vars[n]['check']: + vars[n]['check'].append(c) + check = None + if dim and 'dimension' not in vars[n]: + vars[n]['dimension'] = [] + for d in rmbadname([x.strip() for x in markoutercomma(dim).split('@,@')]): + star = ':' if d == ':' else '*' + # Evaluate `d` with respect to params + if d in params: + d = str(params[d]) + for p in params: + re_1 = re.compile(r'(?P.*?)\b' + p + r'\b(?P.*)', re.I) + m = re_1.match(d) + while m: + d = m.group('before') + \ + str(params[p]) + m.group('after') + m = re_1.match(d) + + if d == star: + dl = [star] + else: + dl = markoutercomma(d, ':').split('@:@') + if len(dl) == 2 and '*' in dl: # e.g. dimension(5:*) + dl = ['*'] + d = '*' + if len(dl) == 1 and dl[0] != star: + dl = ['1', dl[0]] + if len(dl) == 2: + d1, d2 = map(symbolic.Expr.parse, dl) + dsize = d2 - d1 + 1 + d = dsize.tostring(language=symbolic.Language.C) + # find variables v that define d as a linear + # function, `d == a * v + b`, and store + # coefficients a and b for further analysis. + solver_and_deps = {} + for v in block['vars']: + s = symbolic.as_symbol(v) + if dsize.contains(s): + try: + a, b = dsize.linear_solve(s) + + def solve_v(s, a=a, b=b): + return (s - b) / a + + all_symbols = set(a.symbols()) + all_symbols.update(b.symbols()) + except RuntimeError as msg: + # d is not a linear function of v, + # however, if v can be determined + # from d using other means, + # implement the corresponding + # solve_v function here. + solve_v = None + all_symbols = set(dsize.symbols()) + v_deps = set( + s.data for s in all_symbols + if s.data in vars) + solver_and_deps[v] = solve_v, list(v_deps) + # Note that dsize may contain symbols that are + # not defined in block['vars']. Here we assume + # these correspond to Fortran/C intrinsic + # functions or that are defined by other + # means. We'll let the compiler validate the + # definiteness of such symbols. + dimension_exprs[d] = solver_and_deps + vars[n]['dimension'].append(d) + + if 'dimension' in vars[n]: + if isstringarray(vars[n]): + if 'charselector' in vars[n]: + d = vars[n]['charselector'] + if '*' in d: + d = d['*'] + errmess('analyzevars: character array "character*%s %s(%s)" is considered as "character %s(%s)"; "intent(c)" is forced.\n' + % (d, n, + ','.join(vars[n]['dimension']), + n, ','.join(vars[n]['dimension'] + [d]))) + vars[n]['dimension'].append(d) + del vars[n]['charselector'] + if 'intent' not in vars[n]: + vars[n]['intent'] = [] + if 'c' not in vars[n]['intent']: + vars[n]['intent'].append('c') + else: + errmess( + "analyzevars: charselector=%r unhandled.\n" % (d)) + + if 'check' not in vars[n] and 'args' in block and n in block['args']: + # n is an argument that has no checks defined. Here we + # generate some consistency checks for n, and when n is an + # array, generate checks for its dimensions and construct + # initialization expressions. + n_deps = vars[n].get('depend', []) + n_checks = [] + n_is_input = l_or(isintent_in, isintent_inout, + isintent_inplace)(vars[n]) + if 'dimension' in vars[n]: # n is array + for i, d in enumerate(vars[n]['dimension']): + coeffs_and_deps = dimension_exprs.get(d) + if coeffs_and_deps is None: + # d is `:` or `*` or a constant expression + pass + elif n_is_input: + # n is an input array argument and its shape + # may define variables used in dimension + # specifications. + for v, (solver, deps) in coeffs_and_deps.items(): + if ((v in n_deps + or '=' in vars[v] + or 'depend' in vars[v])): + # Skip a variable that + # - n depends on + # - has user-defined initialization expression + # - has user-defined dependecies + continue + if solver is not None: + # v can be solved from d, hence, we + # make it an optional argument with + # initialization expression: + is_required = False + init = solver(symbolic.as_symbol( + f'shape({n}, {i})')) + init = init.tostring( + language=symbolic.Language.C) + vars[v]['='] = init + # n needs to be initialized before v. So, + # making v dependent on n and on any + # variables in solver or d. + vars[v]['depend'] = [n] + deps + if 'check' not in vars[v]: + # add check only when no + # user-specified checks exist + vars[v]['check'] = [ + f'shape({n}, {i}) == {d}'] + else: + # d is a non-linear function on v, + # hence, v must be a required input + # argument that n will depend on + is_required = True + if 'intent' not in vars[v]: + vars[v]['intent'] = [] + if 'in' not in vars[v]['intent']: + vars[v]['intent'].append('in') + # v needs to be initialized before n + n_deps.append(v) + n_checks.append( + f'shape({n}, {i}) == {d}') + v_attr = vars[v].get('attrspec', []) + if not ('optional' in v_attr + or 'required' in v_attr): + v_attr.append( + 'required' if is_required else 'optional') + if v_attr: + vars[v]['attrspec'] = v_attr + if coeffs_and_deps is not None: + # extend v dependencies with ones specified in attrspec + for v, (solver, deps) in coeffs_and_deps.items(): + v_deps = vars[v].get('depend', []) + for aa in vars[v].get('attrspec', []): + if aa.startswith('depend'): + aa = ''.join(aa.split()) + v_deps.extend(aa[7:-1].split(',')) + if v_deps: + vars[v]['depend'] = list(set(v_deps)) + if n not in v_deps: + n_deps.append(v) + elif isstring(vars[n]): + if 'charselector' in vars[n]: + if '*' in vars[n]['charselector']: + length = _eval_length(vars[n]['charselector']['*'], + params) + vars[n]['charselector']['*'] = length + elif 'len' in vars[n]['charselector']: + length = _eval_length(vars[n]['charselector']['len'], + params) + del vars[n]['charselector']['len'] + vars[n]['charselector']['*'] = length + if n_checks: + vars[n]['check'] = n_checks + if n_deps: + vars[n]['depend'] = list(set(n_deps)) + + if '=' in vars[n]: + if 'attrspec' not in vars[n]: + vars[n]['attrspec'] = [] + if ('optional' not in vars[n]['attrspec']) and \ + ('required' not in vars[n]['attrspec']): + vars[n]['attrspec'].append('optional') + if 'depend' not in vars[n]: + vars[n]['depend'] = [] + for v, m in list(dep_matches.items()): + if m(vars[n]['=']): + vars[n]['depend'].append(v) + if not vars[n]['depend']: + del vars[n]['depend'] + if isscalar(vars[n]): + vars[n]['='] = _eval_scalar(vars[n]['='], params) + + for n in list(vars.keys()): + if n == block['name']: # n is block name + if 'note' in vars[n]: + block['note'] = vars[n]['note'] + if block['block'] == 'function': + if 'result' in block and block['result'] in vars: + vars[n] = appenddecl(vars[n], vars[block['result']]) + if 'prefix' in block: + pr = block['prefix'] + pr1 = pr.replace('pure', '') + ispure = (not pr == pr1) + pr = pr1.replace('recursive', '') + isrec = (not pr == pr1) + m = typespattern[0].match(pr) + if m: + typespec, selector, attr, edecl = cracktypespec0( + m.group('this'), m.group('after')) + kindselect, charselect, typename = cracktypespec( + typespec, selector) + vars[n]['typespec'] = typespec + if kindselect: + if 'kind' in kindselect: + try: + kindselect['kind'] = eval( + kindselect['kind'], {}, params) + except Exception: + pass + vars[n]['kindselector'] = kindselect + if charselect: + vars[n]['charselector'] = charselect + if typename: + vars[n]['typename'] = typename + if ispure: + vars[n] = setattrspec(vars[n], 'pure') + if isrec: + vars[n] = setattrspec(vars[n], 'recursive') + else: + outmess( + 'analyzevars: prefix (%s) were not used\n' % repr(block['prefix'])) + if not block['block'] in ['module', 'pythonmodule', 'python module', 'block data']: + if 'commonvars' in block: + neededvars = copy.copy(block['args'] + block['commonvars']) + else: + neededvars = copy.copy(block['args']) + for n in list(vars.keys()): + if l_or(isintent_callback, isintent_aux)(vars[n]): + neededvars.append(n) + if 'entry' in block: + neededvars.extend(list(block['entry'].keys())) + for k in list(block['entry'].keys()): + for n in block['entry'][k]: + if n not in neededvars: + neededvars.append(n) + if block['block'] == 'function': + if 'result' in block: + neededvars.append(block['result']) + else: + neededvars.append(block['name']) + if block['block'] in ['subroutine', 'function']: + name = block['name'] + if name in vars and 'intent' in vars[name]: + block['intent'] = vars[name]['intent'] + if block['block'] == 'type': + neededvars.extend(list(vars.keys())) + for n in list(vars.keys()): + if n not in neededvars: + del vars[n] + return vars + +analyzeargs_re_1 = re.compile(r'\A[a-z]+[\w$]*\Z', re.I) + + +def expr2name(a, block, args=[]): + orig_a = a + a_is_expr = not analyzeargs_re_1.match(a) + if a_is_expr: # `a` is an expression + implicitrules, attrrules = buildimplicitrules(block) + at = determineexprtype(a, block['vars'], implicitrules) + na = 'e_' + for c in a: + c = c.lower() + if c not in string.ascii_lowercase + string.digits: + c = '_' + na = na + c + if na[-1] == '_': + na = na + 'e' + else: + na = na + '_e' + a = na + while a in block['vars'] or a in block['args']: + a = a + 'r' + if a in args: + k = 1 + while a + str(k) in args: + k = k + 1 + a = a + str(k) + if a_is_expr: + block['vars'][a] = at + else: + if a not in block['vars']: + if orig_a in block['vars']: + block['vars'][a] = block['vars'][orig_a] + else: + block['vars'][a] = {} + if 'externals' in block and orig_a in block['externals'] + block['interfaced']: + block['vars'][a] = setattrspec(block['vars'][a], 'external') + return a + + +def analyzeargs(block): + setmesstext(block) + implicitrules, _ = buildimplicitrules(block) + if 'args' not in block: + block['args'] = [] + args = [] + for a in block['args']: + a = expr2name(a, block, args) + args.append(a) + block['args'] = args + if 'entry' in block: + for k, args1 in list(block['entry'].items()): + for a in args1: + if a not in block['vars']: + block['vars'][a] = {} + + for b in block['body']: + if b['name'] in args: + if 'externals' not in block: + block['externals'] = [] + if b['name'] not in block['externals']: + block['externals'].append(b['name']) + if 'result' in block and block['result'] not in block['vars']: + block['vars'][block['result']] = {} + return block + +determineexprtype_re_1 = re.compile(r'\A\(.+?,.+?\)\Z', re.I) +determineexprtype_re_2 = re.compile(r'\A[+-]?\d+(_(?P\w+)|)\Z', re.I) +determineexprtype_re_3 = re.compile( + r'\A[+-]?[\d.]+[-\d+de.]*(_(?P\w+)|)\Z', re.I) +determineexprtype_re_4 = re.compile(r'\A\(.*\)\Z', re.I) +determineexprtype_re_5 = re.compile(r'\A(?P\w+)\s*\(.*?\)\s*\Z', re.I) + + +def _ensure_exprdict(r): + if isinstance(r, int): + return {'typespec': 'integer'} + if isinstance(r, float): + return {'typespec': 'real'} + if isinstance(r, complex): + return {'typespec': 'complex'} + if isinstance(r, dict): + return r + raise AssertionError(repr(r)) + + +def determineexprtype(expr, vars, rules={}): + if expr in vars: + return _ensure_exprdict(vars[expr]) + expr = expr.strip() + if determineexprtype_re_1.match(expr): + return {'typespec': 'complex'} + m = determineexprtype_re_2.match(expr) + if m: + if 'name' in m.groupdict() and m.group('name'): + outmess( + 'determineexprtype: selected kind types not supported (%s)\n' % repr(expr)) + return {'typespec': 'integer'} + m = determineexprtype_re_3.match(expr) + if m: + if 'name' in m.groupdict() and m.group('name'): + outmess( + 'determineexprtype: selected kind types not supported (%s)\n' % repr(expr)) + return {'typespec': 'real'} + for op in ['+', '-', '*', '/']: + for e in [x.strip() for x in markoutercomma(expr, comma=op).split('@' + op + '@')]: + if e in vars: + return _ensure_exprdict(vars[e]) + t = {} + if determineexprtype_re_4.match(expr): # in parenthesis + t = determineexprtype(expr[1:-1], vars, rules) + else: + m = determineexprtype_re_5.match(expr) + if m: + rn = m.group('name') + t = determineexprtype(m.group('name'), vars, rules) + if t and 'attrspec' in t: + del t['attrspec'] + if not t: + if rn[0] in rules: + return _ensure_exprdict(rules[rn[0]]) + if expr[0] in '\'"': + return {'typespec': 'character', 'charselector': {'*': '*'}} + if not t: + outmess( + 'determineexprtype: could not determine expressions (%s) type.\n' % (repr(expr))) + return t + +###### + + +def crack2fortrangen(block, tab='\n', as_interface=False): + global skipfuncs, onlyfuncs + + setmesstext(block) + ret = '' + if isinstance(block, list): + for g in block: + if g and g['block'] in ['function', 'subroutine']: + if g['name'] in skipfuncs: + continue + if onlyfuncs and g['name'] not in onlyfuncs: + continue + ret = ret + crack2fortrangen(g, tab, as_interface=as_interface) + return ret + prefix = '' + name = '' + args = '' + blocktype = block['block'] + if blocktype == 'program': + return '' + argsl = [] + if 'name' in block: + name = block['name'] + if 'args' in block: + vars = block['vars'] + for a in block['args']: + a = expr2name(a, block, argsl) + if not isintent_callback(vars[a]): + argsl.append(a) + if block['block'] == 'function' or argsl: + args = '(%s)' % ','.join(argsl) + f2pyenhancements = '' + if 'f2pyenhancements' in block: + for k in list(block['f2pyenhancements'].keys()): + f2pyenhancements = '%s%s%s %s' % ( + f2pyenhancements, tab + tabchar, k, block['f2pyenhancements'][k]) + intent_lst = block.get('intent', [])[:] + if blocktype == 'function' and 'callback' in intent_lst: + intent_lst.remove('callback') + if intent_lst: + f2pyenhancements = '%s%sintent(%s) %s' %\ + (f2pyenhancements, tab + tabchar, + ','.join(intent_lst), name) + use = '' + if 'use' in block: + use = use2fortran(block['use'], tab + tabchar) + common = '' + if 'common' in block: + common = common2fortran(block['common'], tab + tabchar) + if name == 'unknown_interface': + name = '' + result = '' + if 'result' in block: + result = ' result (%s)' % block['result'] + if block['result'] not in argsl: + argsl.append(block['result']) + body = crack2fortrangen(block['body'], tab + tabchar, as_interface=as_interface) + vars = vars2fortran( + block, block['vars'], argsl, tab + tabchar, as_interface=as_interface) + mess = '' + if 'from' in block and not as_interface: + mess = '! in %s' % block['from'] + if 'entry' in block: + entry_stmts = '' + for k, i in list(block['entry'].items()): + entry_stmts = '%s%sentry %s(%s)' \ + % (entry_stmts, tab + tabchar, k, ','.join(i)) + body = body + entry_stmts + if blocktype == 'block data' and name == '_BLOCK_DATA_': + name = '' + ret = '%s%s%s %s%s%s %s%s%s%s%s%s%send %s %s' % ( + tab, prefix, blocktype, name, args, result, mess, f2pyenhancements, use, vars, common, body, tab, blocktype, name) + return ret + + +def common2fortran(common, tab=''): + ret = '' + for k in list(common.keys()): + if k == '_BLNK_': + ret = '%s%scommon %s' % (ret, tab, ','.join(common[k])) + else: + ret = '%s%scommon /%s/ %s' % (ret, tab, k, ','.join(common[k])) + return ret + + +def use2fortran(use, tab=''): + ret = '' + for m in list(use.keys()): + ret = '%s%suse %s,' % (ret, tab, m) + if use[m] == {}: + if ret and ret[-1] == ',': + ret = ret[:-1] + continue + if 'only' in use[m] and use[m]['only']: + ret = '%s only:' % (ret) + if 'map' in use[m] and use[m]['map']: + c = ' ' + for k in list(use[m]['map'].keys()): + if k == use[m]['map'][k]: + ret = '%s%s%s' % (ret, c, k) + c = ',' + else: + ret = '%s%s%s=>%s' % (ret, c, k, use[m]['map'][k]) + c = ',' + if ret and ret[-1] == ',': + ret = ret[:-1] + return ret + + +def true_intent_list(var): + lst = var['intent'] + ret = [] + for intent in lst: + try: + f = globals()['isintent_%s' % intent] + except KeyError: + pass + else: + if f(var): + ret.append(intent) + return ret + + +def vars2fortran(block, vars, args, tab='', as_interface=False): + """ + TODO: + public sub + ... + """ + setmesstext(block) + ret = '' + nout = [] + for a in args: + if a in block['vars']: + nout.append(a) + if 'commonvars' in block: + for a in block['commonvars']: + if a in vars: + if a not in nout: + nout.append(a) + else: + errmess( + 'vars2fortran: Confused?!: "%s" is not defined in vars.\n' % a) + if 'varnames' in block: + nout.extend(block['varnames']) + if not as_interface: + for a in list(vars.keys()): + if a not in nout: + nout.append(a) + for a in nout: + if 'depend' in vars[a]: + for d in vars[a]['depend']: + if d in vars and 'depend' in vars[d] and a in vars[d]['depend']: + errmess( + 'vars2fortran: Warning: cross-dependence between variables "%s" and "%s"\n' % (a, d)) + if 'externals' in block and a in block['externals']: + if isintent_callback(vars[a]): + ret = '%s%sintent(callback) %s' % (ret, tab, a) + ret = '%s%sexternal %s' % (ret, tab, a) + if isoptional(vars[a]): + ret = '%s%soptional %s' % (ret, tab, a) + if a in vars and 'typespec' not in vars[a]: + continue + cont = 1 + for b in block['body']: + if a == b['name'] and b['block'] == 'function': + cont = 0 + break + if cont: + continue + if a not in vars: + show(vars) + outmess('vars2fortran: No definition for argument "%s".\n' % a) + continue + if a == block['name']: + if block['block'] != 'function' or block.get('result'): + # 1) skip declaring a variable that name matches with + # subroutine name + # 2) skip declaring function when its type is + # declared via `result` construction + continue + if 'typespec' not in vars[a]: + if 'attrspec' in vars[a] and 'external' in vars[a]['attrspec']: + if a in args: + ret = '%s%sexternal %s' % (ret, tab, a) + continue + show(vars[a]) + outmess('vars2fortran: No typespec for argument "%s".\n' % a) + continue + vardef = vars[a]['typespec'] + if vardef == 'type' and 'typename' in vars[a]: + vardef = '%s(%s)' % (vardef, vars[a]['typename']) + selector = {} + if 'kindselector' in vars[a]: + selector = vars[a]['kindselector'] + elif 'charselector' in vars[a]: + selector = vars[a]['charselector'] + if '*' in selector: + if selector['*'] in ['*', ':']: + vardef = '%s*(%s)' % (vardef, selector['*']) + else: + vardef = '%s*%s' % (vardef, selector['*']) + else: + if 'len' in selector: + vardef = '%s(len=%s' % (vardef, selector['len']) + if 'kind' in selector: + vardef = '%s,kind=%s)' % (vardef, selector['kind']) + else: + vardef = '%s)' % (vardef) + elif 'kind' in selector: + vardef = '%s(kind=%s)' % (vardef, selector['kind']) + c = ' ' + if 'attrspec' in vars[a]: + attr = [l for l in vars[a]['attrspec'] + if l not in ['external']] + if attr: + vardef = '%s, %s' % (vardef, ','.join(attr)) + c = ',' + if 'dimension' in vars[a]: + vardef = '%s%sdimension(%s)' % ( + vardef, c, ','.join(vars[a]['dimension'])) + c = ',' + if 'intent' in vars[a]: + lst = true_intent_list(vars[a]) + if lst: + vardef = '%s%sintent(%s)' % (vardef, c, ','.join(lst)) + c = ',' + if 'check' in vars[a]: + vardef = '%s%scheck(%s)' % (vardef, c, ','.join(vars[a]['check'])) + c = ',' + if 'depend' in vars[a]: + vardef = '%s%sdepend(%s)' % ( + vardef, c, ','.join(vars[a]['depend'])) + c = ',' + if '=' in vars[a]: + v = vars[a]['='] + if vars[a]['typespec'] in ['complex', 'double complex']: + try: + v = eval(v) + v = '(%s,%s)' % (v.real, v.imag) + except Exception: + pass + vardef = '%s :: %s=%s' % (vardef, a, v) + else: + vardef = '%s :: %s' % (vardef, a) + ret = '%s%s%s' % (ret, tab, vardef) + return ret +###### + + +def crackfortran(files): + global usermodules + + outmess('Reading fortran codes...\n', 0) + readfortrancode(files, crackline) + outmess('Post-processing...\n', 0) + usermodules = [] + postlist = postcrack(grouplist[0]) + outmess('Post-processing (stage 2)...\n', 0) + postlist = postcrack2(postlist) + return usermodules + postlist + + +def crack2fortran(block): + global f2py_version + + pyf = crack2fortrangen(block) + '\n' + header = """! -*- f90 -*- +! Note: the context of this file is case sensitive. +""" + footer = """ +! This file was auto-generated with f2py (version:%s). +! See: +! https://web.archive.org/web/20140822061353/http://cens.ioc.ee/projects/f2py2e +""" % (f2py_version) + return header + pyf + footer + +if __name__ == "__main__": + files = [] + funcs = [] + f = 1 + f2 = 0 + f3 = 0 + showblocklist = 0 + for l in sys.argv[1:]: + if l == '': + pass + elif l[0] == ':': + f = 0 + elif l == '-quiet': + quiet = 1 + verbose = 0 + elif l == '-verbose': + verbose = 2 + quiet = 0 + elif l == '-fix': + if strictf77: + outmess( + 'Use option -f90 before -fix if Fortran 90 code is in fix form.\n', 0) + skipemptyends = 1 + sourcecodeform = 'fix' + elif l == '-skipemptyends': + skipemptyends = 1 + elif l == '--ignore-contains': + ignorecontains = 1 + elif l == '-f77': + strictf77 = 1 + sourcecodeform = 'fix' + elif l == '-f90': + strictf77 = 0 + sourcecodeform = 'free' + skipemptyends = 1 + elif l == '-h': + f2 = 1 + elif l == '-show': + showblocklist = 1 + elif l == '-m': + f3 = 1 + elif l[0] == '-': + errmess('Unknown option %s\n' % repr(l)) + elif f2: + f2 = 0 + pyffilename = l + elif f3: + f3 = 0 + f77modulename = l + elif f: + try: + open(l).close() + files.append(l) + except OSError as detail: + errmess(f'OSError: {detail!s}\n') + else: + funcs.append(l) + if not strictf77 and f77modulename and not skipemptyends: + outmess("""\ + Warning: You have specified module name for non Fortran 77 code + that should not need one (expect if you are scanning F90 code + for non module blocks but then you should use flag -skipemptyends + and also be sure that the files do not contain programs without program statement). +""", 0) + + postlist = crackfortran(files) + if pyffilename: + outmess('Writing fortran code to file %s\n' % repr(pyffilename), 0) + pyf = crack2fortran(postlist) + with open(pyffilename, 'w') as f: + f.write(pyf) + if showblocklist: + show(postlist) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/diagnose.py b/.local/lib/python3.8/site-packages/numpy/f2py/diagnose.py new file mode 100644 index 0000000..21ee399 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/diagnose.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +import os +import sys +import tempfile + + +def run_command(cmd): + print('Running %r:' % (cmd)) + os.system(cmd) + print('------') + + +def run(): + _path = os.getcwd() + os.chdir(tempfile.gettempdir()) + print('------') + print('os.name=%r' % (os.name)) + print('------') + print('sys.platform=%r' % (sys.platform)) + print('------') + print('sys.version:') + print(sys.version) + print('------') + print('sys.prefix:') + print(sys.prefix) + print('------') + print('sys.path=%r' % (':'.join(sys.path))) + print('------') + + try: + import numpy + has_newnumpy = 1 + except ImportError: + print('Failed to import new numpy:', sys.exc_info()[1]) + has_newnumpy = 0 + + try: + from numpy.f2py import f2py2e + has_f2py2e = 1 + except ImportError: + print('Failed to import f2py2e:', sys.exc_info()[1]) + has_f2py2e = 0 + + try: + import numpy.distutils + has_numpy_distutils = 2 + except ImportError: + try: + import numpy_distutils + has_numpy_distutils = 1 + except ImportError: + print('Failed to import numpy_distutils:', sys.exc_info()[1]) + has_numpy_distutils = 0 + + if has_newnumpy: + try: + print('Found new numpy version %r in %s' % + (numpy.__version__, numpy.__file__)) + except Exception as msg: + print('error:', msg) + print('------') + + if has_f2py2e: + try: + print('Found f2py2e version %r in %s' % + (f2py2e.__version__.version, f2py2e.__file__)) + except Exception as msg: + print('error:', msg) + print('------') + + if has_numpy_distutils: + try: + if has_numpy_distutils == 2: + print('Found numpy.distutils version %r in %r' % ( + numpy.distutils.__version__, + numpy.distutils.__file__)) + else: + print('Found numpy_distutils version %r in %r' % ( + numpy_distutils.numpy_distutils_version.numpy_distutils_version, + numpy_distutils.__file__)) + print('------') + except Exception as msg: + print('error:', msg) + print('------') + try: + if has_numpy_distutils == 1: + print( + 'Importing numpy_distutils.command.build_flib ...', end=' ') + import numpy_distutils.command.build_flib as build_flib + print('ok') + print('------') + try: + print( + 'Checking availability of supported Fortran compilers:') + for compiler_class in build_flib.all_compilers: + compiler_class(verbose=1).is_available() + print('------') + except Exception as msg: + print('error:', msg) + print('------') + except Exception as msg: + print( + 'error:', msg, '(ignore it, build_flib is obsolute for numpy.distutils 0.2.2 and up)') + print('------') + try: + if has_numpy_distutils == 2: + print('Importing numpy.distutils.fcompiler ...', end=' ') + import numpy.distutils.fcompiler as fcompiler + else: + print('Importing numpy_distutils.fcompiler ...', end=' ') + import numpy_distutils.fcompiler as fcompiler + print('ok') + print('------') + try: + print('Checking availability of supported Fortran compilers:') + fcompiler.show_fcompilers() + print('------') + except Exception as msg: + print('error:', msg) + print('------') + except Exception as msg: + print('error:', msg) + print('------') + try: + if has_numpy_distutils == 2: + print('Importing numpy.distutils.cpuinfo ...', end=' ') + from numpy.distutils.cpuinfo import cpuinfo + print('ok') + print('------') + else: + try: + print( + 'Importing numpy_distutils.command.cpuinfo ...', end=' ') + from numpy_distutils.command.cpuinfo import cpuinfo + print('ok') + print('------') + except Exception as msg: + print('error:', msg, '(ignore it)') + print('Importing numpy_distutils.cpuinfo ...', end=' ') + from numpy_distutils.cpuinfo import cpuinfo + print('ok') + print('------') + cpu = cpuinfo() + print('CPU information:', end=' ') + for name in dir(cpuinfo): + if name[0] == '_' and name[1] != '_' and getattr(cpu, name[1:])(): + print(name[1:], end=' ') + print('------') + except Exception as msg: + print('error:', msg) + print('------') + os.chdir(_path) +if __name__ == "__main__": + run() diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/f2py2e.py b/.local/lib/python3.8/site-packages/numpy/f2py/f2py2e.py new file mode 100644 index 0000000..4d79c30 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/f2py2e.py @@ -0,0 +1,693 @@ +#!/usr/bin/env python3 +""" + +f2py2e - Fortran to Python C/API generator. 2nd Edition. + See __usage__ below. + +Copyright 1999--2011 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy License. + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +$Date: 2005/05/06 08:31:19 $ +Pearu Peterson + +""" +import sys +import os +import pprint +import re + +from . import crackfortran +from . import rules +from . import cb_rules +from . import auxfuncs +from . import cfuncs +from . import f90mod_rules +from . import __version__ +from . import capi_maps + +f2py_version = __version__.version +numpy_version = __version__.version +errmess = sys.stderr.write +# outmess=sys.stdout.write +show = pprint.pprint +outmess = auxfuncs.outmess + +__usage__ =\ +f"""Usage: + +1) To construct extension module sources: + + f2py [] [[[only:]||[skip:]] \\ + ] \\ + [: ...] + +2) To compile fortran files and build extension modules: + + f2py -c [, , ] + +3) To generate signature files: + + f2py -h ...< same options as in (1) > + +Description: This program generates a Python C/API file (module.c) + that contains wrappers for given fortran functions so that they + can be called from Python. With the -c option the corresponding + extension modules are built. + +Options: + + --2d-numpy Use numpy.f2py tool with NumPy support. [DEFAULT] + --2d-numeric Use f2py2e tool with Numeric support. + --2d-numarray Use f2py2e tool with Numarray support. + --g3-numpy Use 3rd generation f2py from the separate f2py package. + [NOT AVAILABLE YET] + + -h Write signatures of the fortran routines to file + and exit. You can then edit and use it instead + of . If ==stdout then the + signatures are printed to stdout. + Names of fortran routines for which Python C/API + functions will be generated. Default is all that are found + in . + Paths to fortran/signature files that will be scanned for + in order to determine their signatures. + skip: Ignore fortran functions that follow until `:'. + only: Use only fortran functions that follow until `:'. + : Get back to mode. + + -m Name of the module; f2py generates a Python/C API + file module.c or extension module . + Default is 'untitled'. + + --[no-]lower Do [not] lower the cases in . By default, + --lower is assumed with -h key, and --no-lower without -h key. + + --build-dir All f2py generated files are created in . + Default is tempfile.mkdtemp(). + + --overwrite-signature Overwrite existing signature file. + + --[no-]latex-doc Create (or not) module.tex. + Default is --no-latex-doc. + --short-latex Create 'incomplete' LaTeX document (without commands + \\documentclass, \\tableofcontents, and \\begin{{document}}, + \\end{{document}}). + + --[no-]rest-doc Create (or not) module.rst. + Default is --no-rest-doc. + + --debug-capi Create C/API code that reports the state of the wrappers + during runtime. Useful for debugging. + + --[no-]wrap-functions Create Fortran subroutine wrappers to Fortran 77 + functions. --wrap-functions is default because it ensures + maximum portability/compiler independence. + + --include-paths ::... Search include files from the given + directories. + + --help-link [..] List system resources found by system_info.py. See also + --link- switch below. [..] is optional list + of resources names. E.g. try 'f2py --help-link lapack_opt'. + + --f2cmap Load Fortran-to-Python KIND specification from the given + file. Default: .f2py_f2cmap in current directory. + + --quiet Run quietly. + --verbose Run with extra verbosity. + -v Print f2py version ID and exit. + + +numpy.distutils options (only effective with -c): + + --fcompiler= Specify Fortran compiler type by vendor + --compiler= Specify C compiler type (as defined by distutils) + + --help-fcompiler List available Fortran compilers and exit + --f77exec= Specify the path to F77 compiler + --f90exec= Specify the path to F90 compiler + --f77flags= Specify F77 compiler flags + --f90flags= Specify F90 compiler flags + --opt= Specify optimization flags + --arch= Specify architecture specific optimization flags + --noopt Compile without optimization + --noarch Compile without arch-dependent optimization + --debug Compile with debugging information + +Extra options (only effective with -c): + + --link- Link extension module with as defined + by numpy.distutils/system_info.py. E.g. to link + with optimized LAPACK libraries (vecLib on MacOSX, + ATLAS elsewhere), use --link-lapack_opt. + See also --help-link switch. + + -L/path/to/lib/ -l + -D -U + -I/path/to/include/ + .o .so .a + + Using the following macros may be required with non-gcc Fortran + compilers: + -DPREPEND_FORTRAN -DNO_APPEND_FORTRAN -DUPPERCASE_FORTRAN + -DUNDERSCORE_G77 + + When using -DF2PY_REPORT_ATEXIT, a performance report of F2PY + interface is printed out at exit (platforms: Linux). + + When using -DF2PY_REPORT_ON_ARRAY_COPY=, a message is + sent to stderr whenever F2PY interface makes a copy of an + array. Integer sets the threshold for array sizes when + a message should be shown. + +Version: {f2py_version} +numpy Version: {numpy_version} +Requires: Python 3.5 or higher. +License: NumPy license (see LICENSE.txt in the NumPy source code) +Copyright 1999 - 2011 Pearu Peterson all rights reserved. +https://web.archive.org/web/20140822061353/http://cens.ioc.ee/projects/f2py2e""" + + +def scaninputline(inputline): + files, skipfuncs, onlyfuncs, debug = [], [], [], [] + f, f2, f3, f5, f6, f7, f8, f9, f10 = 1, 0, 0, 0, 0, 0, 0, 0, 0 + verbose = 1 + dolc = -1 + dolatexdoc = 0 + dorestdoc = 0 + wrapfuncs = 1 + buildpath = '.' + include_paths = [] + signsfile, modulename = None, None + options = {'buildpath': buildpath, + 'coutput': None, + 'f2py_wrapper_output': None} + for l in inputline: + if l == '': + pass + elif l == 'only:': + f = 0 + elif l == 'skip:': + f = -1 + elif l == ':': + f = 1 + elif l[:8] == '--debug-': + debug.append(l[8:]) + elif l == '--lower': + dolc = 1 + elif l == '--build-dir': + f6 = 1 + elif l == '--no-lower': + dolc = 0 + elif l == '--quiet': + verbose = 0 + elif l == '--verbose': + verbose += 1 + elif l == '--latex-doc': + dolatexdoc = 1 + elif l == '--no-latex-doc': + dolatexdoc = 0 + elif l == '--rest-doc': + dorestdoc = 1 + elif l == '--no-rest-doc': + dorestdoc = 0 + elif l == '--wrap-functions': + wrapfuncs = 1 + elif l == '--no-wrap-functions': + wrapfuncs = 0 + elif l == '--short-latex': + options['shortlatex'] = 1 + elif l == '--coutput': + f8 = 1 + elif l == '--f2py-wrapper-output': + f9 = 1 + elif l == '--f2cmap': + f10 = 1 + elif l == '--overwrite-signature': + options['h-overwrite'] = 1 + elif l == '-h': + f2 = 1 + elif l == '-m': + f3 = 1 + elif l[:2] == '-v': + print(f2py_version) + sys.exit() + elif l == '--show-compilers': + f5 = 1 + elif l[:8] == '-include': + cfuncs.outneeds['userincludes'].append(l[9:-1]) + cfuncs.userincludes[l[9:-1]] = '#include ' + l[8:] + elif l[:15] in '--include_paths': + outmess( + 'f2py option --include_paths is deprecated, use --include-paths instead.\n') + f7 = 1 + elif l[:15] in '--include-paths': + f7 = 1 + elif l[0] == '-': + errmess('Unknown option %s\n' % repr(l)) + sys.exit() + elif f2: + f2 = 0 + signsfile = l + elif f3: + f3 = 0 + modulename = l + elif f6: + f6 = 0 + buildpath = l + elif f7: + f7 = 0 + include_paths.extend(l.split(os.pathsep)) + elif f8: + f8 = 0 + options["coutput"] = l + elif f9: + f9 = 0 + options["f2py_wrapper_output"] = l + elif f10: + f10 = 0 + options["f2cmap_file"] = l + elif f == 1: + try: + with open(l): + pass + files.append(l) + except OSError as detail: + errmess(f'OSError: {detail!s}. Skipping file "{l!s}".\n') + elif f == -1: + skipfuncs.append(l) + elif f == 0: + onlyfuncs.append(l) + if not f5 and not files and not modulename: + print(__usage__) + sys.exit() + if not os.path.isdir(buildpath): + if not verbose: + outmess('Creating build directory %s\n' % (buildpath)) + os.mkdir(buildpath) + if signsfile: + signsfile = os.path.join(buildpath, signsfile) + if signsfile and os.path.isfile(signsfile) and 'h-overwrite' not in options: + errmess( + 'Signature file "%s" exists!!! Use --overwrite-signature to overwrite.\n' % (signsfile)) + sys.exit() + + options['debug'] = debug + options['verbose'] = verbose + if dolc == -1 and not signsfile: + options['do-lower'] = 0 + else: + options['do-lower'] = dolc + if modulename: + options['module'] = modulename + if signsfile: + options['signsfile'] = signsfile + if onlyfuncs: + options['onlyfuncs'] = onlyfuncs + if skipfuncs: + options['skipfuncs'] = skipfuncs + options['dolatexdoc'] = dolatexdoc + options['dorestdoc'] = dorestdoc + options['wrapfuncs'] = wrapfuncs + options['buildpath'] = buildpath + options['include_paths'] = include_paths + options.setdefault('f2cmap_file', None) + return files, options + + +def callcrackfortran(files, options): + rules.options = options + crackfortran.debug = options['debug'] + crackfortran.verbose = options['verbose'] + if 'module' in options: + crackfortran.f77modulename = options['module'] + if 'skipfuncs' in options: + crackfortran.skipfuncs = options['skipfuncs'] + if 'onlyfuncs' in options: + crackfortran.onlyfuncs = options['onlyfuncs'] + crackfortran.include_paths[:] = options['include_paths'] + crackfortran.dolowercase = options['do-lower'] + postlist = crackfortran.crackfortran(files) + if 'signsfile' in options: + outmess('Saving signatures to file "%s"\n' % (options['signsfile'])) + pyf = crackfortran.crack2fortran(postlist) + if options['signsfile'][-6:] == 'stdout': + sys.stdout.write(pyf) + else: + with open(options['signsfile'], 'w') as f: + f.write(pyf) + if options["coutput"] is None: + for mod in postlist: + mod["coutput"] = "%smodule.c" % mod["name"] + else: + for mod in postlist: + mod["coutput"] = options["coutput"] + if options["f2py_wrapper_output"] is None: + for mod in postlist: + mod["f2py_wrapper_output"] = "%s-f2pywrappers.f" % mod["name"] + else: + for mod in postlist: + mod["f2py_wrapper_output"] = options["f2py_wrapper_output"] + return postlist + + +def buildmodules(lst): + cfuncs.buildcfuncs() + outmess('Building modules...\n') + modules, mnames, isusedby = [], [], {} + for item in lst: + if '__user__' in item['name']: + cb_rules.buildcallbacks(item) + else: + if 'use' in item: + for u in item['use'].keys(): + if u not in isusedby: + isusedby[u] = [] + isusedby[u].append(item['name']) + modules.append(item) + mnames.append(item['name']) + ret = {} + for module, name in zip(modules, mnames): + if name in isusedby: + outmess('\tSkipping module "%s" which is used by %s.\n' % ( + name, ','.join('"%s"' % s for s in isusedby[name]))) + else: + um = [] + if 'use' in module: + for u in module['use'].keys(): + if u in isusedby and u in mnames: + um.append(modules[mnames.index(u)]) + else: + outmess( + f'\tModule "{name}" uses nonexisting "{u}" ' + 'which will be ignored.\n') + ret[name] = {} + dict_append(ret[name], rules.buildmodule(module, um)) + return ret + + +def dict_append(d_out, d_in): + for (k, v) in d_in.items(): + if k not in d_out: + d_out[k] = [] + if isinstance(v, list): + d_out[k] = d_out[k] + v + else: + d_out[k].append(v) + + +def run_main(comline_list): + """ + Equivalent to running:: + + f2py + + where ``=string.join(,' ')``, but in Python. Unless + ``-h`` is used, this function returns a dictionary containing + information on generated modules and their dependencies on source + files. For example, the command ``f2py -m scalar scalar.f`` can be + executed from Python as follows + + You cannot build extension modules with this function, that is, + using ``-c`` is not allowed. Use ``compile`` command instead + + Examples + -------- + .. literalinclude:: ../../source/f2py/code/results/run_main_session.dat + :language: python + + """ + crackfortran.reset_global_f2py_vars() + f2pydir = os.path.dirname(os.path.abspath(cfuncs.__file__)) + fobjhsrc = os.path.join(f2pydir, 'src', 'fortranobject.h') + fobjcsrc = os.path.join(f2pydir, 'src', 'fortranobject.c') + files, options = scaninputline(comline_list) + auxfuncs.options = options + capi_maps.load_f2cmap_file(options['f2cmap_file']) + postlist = callcrackfortran(files, options) + isusedby = {} + for plist in postlist: + if 'use' in plist: + for u in plist['use'].keys(): + if u not in isusedby: + isusedby[u] = [] + isusedby[u].append(plist['name']) + for plist in postlist: + if plist['block'] == 'python module' and '__user__' in plist['name']: + if plist['name'] in isusedby: + # if not quiet: + outmess( + f'Skipping Makefile build for module "{plist["name"]}" ' + 'which is used by {}\n'.format( + ','.join(f'"{s}"' for s in isusedby[plist['name']]))) + if 'signsfile' in options: + if options['verbose'] > 1: + outmess( + 'Stopping. Edit the signature file and then run f2py on the signature file: ') + outmess('%s %s\n' % + (os.path.basename(sys.argv[0]), options['signsfile'])) + return + for plist in postlist: + if plist['block'] != 'python module': + if 'python module' not in options: + errmess( + 'Tip: If your original code is Fortran source then you must use -m option.\n') + raise TypeError('All blocks must be python module blocks but got %s' % ( + repr(postlist[i]['block']))) + auxfuncs.debugoptions = options['debug'] + f90mod_rules.options = options + auxfuncs.wrapfuncs = options['wrapfuncs'] + + ret = buildmodules(postlist) + + for mn in ret.keys(): + dict_append(ret[mn], {'csrc': fobjcsrc, 'h': fobjhsrc}) + return ret + + +def filter_files(prefix, suffix, files, remove_prefix=None): + """ + Filter files by prefix and suffix. + """ + filtered, rest = [], [] + match = re.compile(prefix + r'.*' + suffix + r'\Z').match + if remove_prefix: + ind = len(prefix) + else: + ind = 0 + for file in [x.strip() for x in files]: + if match(file): + filtered.append(file[ind:]) + else: + rest.append(file) + return filtered, rest + + +def get_prefix(module): + p = os.path.dirname(os.path.dirname(module.__file__)) + return p + + +def run_compile(): + """ + Do it all in one call! + """ + import tempfile + + i = sys.argv.index('-c') + del sys.argv[i] + + remove_build_dir = 0 + try: + i = sys.argv.index('--build-dir') + except ValueError: + i = None + if i is not None: + build_dir = sys.argv[i + 1] + del sys.argv[i + 1] + del sys.argv[i] + else: + remove_build_dir = 1 + build_dir = tempfile.mkdtemp() + + _reg1 = re.compile(r'--link-') + sysinfo_flags = [_m for _m in sys.argv[1:] if _reg1.match(_m)] + sys.argv = [_m for _m in sys.argv if _m not in sysinfo_flags] + if sysinfo_flags: + sysinfo_flags = [f[7:] for f in sysinfo_flags] + + _reg2 = re.compile( + r'--((no-|)(wrap-functions|lower)|debug-capi|quiet)|-include') + f2py_flags = [_m for _m in sys.argv[1:] if _reg2.match(_m)] + sys.argv = [_m for _m in sys.argv if _m not in f2py_flags] + f2py_flags2 = [] + fl = 0 + for a in sys.argv[1:]: + if a in ['only:', 'skip:']: + fl = 1 + elif a == ':': + fl = 0 + if fl or a == ':': + f2py_flags2.append(a) + if f2py_flags2 and f2py_flags2[-1] != ':': + f2py_flags2.append(':') + f2py_flags.extend(f2py_flags2) + + sys.argv = [_m for _m in sys.argv if _m not in f2py_flags2] + _reg3 = re.compile( + r'--((f(90)?compiler(-exec|)|compiler)=|help-compiler)') + flib_flags = [_m for _m in sys.argv[1:] if _reg3.match(_m)] + sys.argv = [_m for _m in sys.argv if _m not in flib_flags] + _reg4 = re.compile( + r'--((f(77|90)(flags|exec)|opt|arch)=|(debug|noopt|noarch|help-fcompiler))') + fc_flags = [_m for _m in sys.argv[1:] if _reg4.match(_m)] + sys.argv = [_m for _m in sys.argv if _m not in fc_flags] + + del_list = [] + for s in flib_flags: + v = '--fcompiler=' + if s[:len(v)] == v: + from numpy.distutils import fcompiler + fcompiler.load_all_fcompiler_classes() + allowed_keys = list(fcompiler.fcompiler_class.keys()) + nv = ov = s[len(v):].lower() + if ov not in allowed_keys: + vmap = {} # XXX + try: + nv = vmap[ov] + except KeyError: + if ov not in vmap.values(): + print('Unknown vendor: "%s"' % (s[len(v):])) + nv = ov + i = flib_flags.index(s) + flib_flags[i] = '--fcompiler=' + nv + continue + for s in del_list: + i = flib_flags.index(s) + del flib_flags[i] + assert len(flib_flags) <= 2, repr(flib_flags) + + _reg5 = re.compile(r'--(verbose)') + setup_flags = [_m for _m in sys.argv[1:] if _reg5.match(_m)] + sys.argv = [_m for _m in sys.argv if _m not in setup_flags] + + if '--quiet' in f2py_flags: + setup_flags.append('--quiet') + + modulename = 'untitled' + sources = sys.argv[1:] + + for optname in ['--include_paths', '--include-paths', '--f2cmap']: + if optname in sys.argv: + i = sys.argv.index(optname) + f2py_flags.extend(sys.argv[i:i + 2]) + del sys.argv[i + 1], sys.argv[i] + sources = sys.argv[1:] + + if '-m' in sys.argv: + i = sys.argv.index('-m') + modulename = sys.argv[i + 1] + del sys.argv[i + 1], sys.argv[i] + sources = sys.argv[1:] + else: + from numpy.distutils.command.build_src import get_f2py_modulename + pyf_files, sources = filter_files('', '[.]pyf([.]src|)', sources) + sources = pyf_files + sources + for f in pyf_files: + modulename = get_f2py_modulename(f) + if modulename: + break + + extra_objects, sources = filter_files('', '[.](o|a|so|dylib)', sources) + include_dirs, sources = filter_files('-I', '', sources, remove_prefix=1) + library_dirs, sources = filter_files('-L', '', sources, remove_prefix=1) + libraries, sources = filter_files('-l', '', sources, remove_prefix=1) + undef_macros, sources = filter_files('-U', '', sources, remove_prefix=1) + define_macros, sources = filter_files('-D', '', sources, remove_prefix=1) + for i in range(len(define_macros)): + name_value = define_macros[i].split('=', 1) + if len(name_value) == 1: + name_value.append(None) + if len(name_value) == 2: + define_macros[i] = tuple(name_value) + else: + print('Invalid use of -D:', name_value) + + from numpy.distutils.system_info import get_info + + num_info = {} + if num_info: + include_dirs.extend(num_info.get('include_dirs', [])) + + from numpy.distutils.core import setup, Extension + ext_args = {'name': modulename, 'sources': sources, + 'include_dirs': include_dirs, + 'library_dirs': library_dirs, + 'libraries': libraries, + 'define_macros': define_macros, + 'undef_macros': undef_macros, + 'extra_objects': extra_objects, + 'f2py_options': f2py_flags, + } + + if sysinfo_flags: + from numpy.distutils.misc_util import dict_append + for n in sysinfo_flags: + i = get_info(n) + if not i: + outmess('No %s resources found in system' + ' (try `f2py --help-link`)\n' % (repr(n))) + dict_append(ext_args, **i) + + ext = Extension(**ext_args) + sys.argv = [sys.argv[0]] + setup_flags + sys.argv.extend(['build', + '--build-temp', build_dir, + '--build-base', build_dir, + '--build-platlib', '.', + # disable CCompilerOpt + '--disable-optimization']) + if fc_flags: + sys.argv.extend(['config_fc'] + fc_flags) + if flib_flags: + sys.argv.extend(['build_ext'] + flib_flags) + + setup(ext_modules=[ext]) + + if remove_build_dir and os.path.exists(build_dir): + import shutil + outmess('Removing build directory %s\n' % (build_dir)) + shutil.rmtree(build_dir) + + +def main(): + if '--help-link' in sys.argv[1:]: + sys.argv.remove('--help-link') + from numpy.distutils.system_info import show_all + show_all() + return + + # Probably outdated options that were not working before 1.16 + if '--g3-numpy' in sys.argv[1:]: + sys.stderr.write("G3 f2py support is not implemented, yet.\\n") + sys.exit(1) + elif '--2e-numeric' in sys.argv[1:]: + sys.argv.remove('--2e-numeric') + elif '--2e-numarray' in sys.argv[1:]: + # Note that this errors becaust the -DNUMARRAY argument is + # not recognized. Just here for back compatibility and the + # error message. + sys.argv.append("-DNUMARRAY") + sys.argv.remove('--2e-numarray') + elif '--2e-numpy' in sys.argv[1:]: + sys.argv.remove('--2e-numpy') + else: + pass + + if '-c' in sys.argv[1:]: + run_compile() + else: + run_main(sys.argv[1:]) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/f2py_testing.py b/.local/lib/python3.8/site-packages/numpy/f2py/f2py_testing.py new file mode 100644 index 0000000..1f109e6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/f2py_testing.py @@ -0,0 +1,46 @@ +import sys +import re + +from numpy.testing import jiffies, memusage + + +def cmdline(): + m = re.compile(r'\A\d+\Z') + args = [] + repeat = 1 + for a in sys.argv[1:]: + if m.match(a): + repeat = eval(a) + else: + args.append(a) + f2py_opts = ' '.join(args) + return repeat, f2py_opts + + +def run(runtest, test_functions, repeat=1): + l = [(t, repr(t.__doc__.split('\n')[1].strip())) for t in test_functions] + start_memusage = memusage() + diff_memusage = None + start_jiffies = jiffies() + i = 0 + while i < repeat: + i += 1 + for t, fname in l: + runtest(t) + if start_memusage is None: + continue + if diff_memusage is None: + diff_memusage = memusage() - start_memusage + else: + diff_memusage2 = memusage() - start_memusage + if diff_memusage2 != diff_memusage: + print('memory usage change at step %i:' % i, + diff_memusage2 - diff_memusage, + fname) + diff_memusage = diff_memusage2 + current_memusage = memusage() + print('run', repeat * len(test_functions), 'tests', + 'in %.2f seconds' % ((jiffies() - start_jiffies) / 100.0)) + if start_memusage: + print('initial virtual memory size:', start_memusage, 'bytes') + print('current virtual memory size:', current_memusage, 'bytes') diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/f90mod_rules.py b/.local/lib/python3.8/site-packages/numpy/f2py/f90mod_rules.py new file mode 100644 index 0000000..3e1c967 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/f90mod_rules.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +""" + +Build F90 module support for f2py2e. + +Copyright 2000 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy License. + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +$Date: 2005/02/03 19:30:23 $ +Pearu Peterson + +""" +__version__ = "$Revision: 1.27 $"[10:-1] + +f2py_version = 'See `f2py -v`' + +import numpy as np + +from . import capi_maps +from . import func2subr +from .crackfortran import undo_rmbadname, undo_rmbadname1 + +# The environment provided by auxfuncs.py is needed for some calls to eval. +# As the needed functions cannot be determined by static inspection of the +# code, it is safest to use import * pending a major refactoring of f2py. +from .auxfuncs import * + +options = {} + + +def findf90modules(m): + if ismodule(m): + return [m] + if not hasbody(m): + return [] + ret = [] + for b in m['body']: + if ismodule(b): + ret.append(b) + else: + ret = ret + findf90modules(b) + return ret + +fgetdims1 = """\ + external f2pysetdata + logical ns + integer r,i + integer(%d) s(*) + ns = .FALSE. + if (allocated(d)) then + do i=1,r + if ((size(d,i).ne.s(i)).and.(s(i).ge.0)) then + ns = .TRUE. + end if + end do + if (ns) then + deallocate(d) + end if + end if + if ((.not.allocated(d)).and.(s(1).ge.1)) then""" % np.intp().itemsize + +fgetdims2 = """\ + end if + if (allocated(d)) then + do i=1,r + s(i) = size(d,i) + end do + end if + flag = 1 + call f2pysetdata(d,allocated(d))""" + +fgetdims2_sa = """\ + end if + if (allocated(d)) then + do i=1,r + s(i) = size(d,i) + end do + !s(r) must be equal to len(d(1)) + end if + flag = 2 + call f2pysetdata(d,allocated(d))""" + + +def buildhooks(pymod): + from . import rules + ret = {'f90modhooks': [], 'initf90modhooks': [], 'body': [], + 'need': ['F_FUNC', 'arrayobject.h'], + 'separatorsfor': {'includes0': '\n', 'includes': '\n'}, + 'docs': ['"Fortran 90/95 modules:\\n"'], + 'latexdoc': []} + fhooks = [''] + + def fadd(line, s=fhooks): + s[0] = '%s\n %s' % (s[0], line) + doc = [''] + + def dadd(line, s=doc): + s[0] = '%s\n%s' % (s[0], line) + for m in findf90modules(pymod): + sargs, fargs, efargs, modobjs, notvars, onlyvars = [], [], [], [], [ + m['name']], [] + sargsp = [] + ifargs = [] + mfargs = [] + if hasbody(m): + for b in m['body']: + notvars.append(b['name']) + for n in m['vars'].keys(): + var = m['vars'][n] + if (n not in notvars) and (not l_or(isintent_hide, isprivate)(var)): + onlyvars.append(n) + mfargs.append(n) + outmess('\t\tConstructing F90 module support for "%s"...\n' % + (m['name'])) + if onlyvars: + outmess('\t\t Variables: %s\n' % (' '.join(onlyvars))) + chooks = [''] + + def cadd(line, s=chooks): + s[0] = '%s\n%s' % (s[0], line) + ihooks = [''] + + def iadd(line, s=ihooks): + s[0] = '%s\n%s' % (s[0], line) + + vrd = capi_maps.modsign2map(m) + cadd('static FortranDataDef f2py_%s_def[] = {' % (m['name'])) + dadd('\\subsection{Fortran 90/95 module \\texttt{%s}}\n' % (m['name'])) + if hasnote(m): + note = m['note'] + if isinstance(note, list): + note = '\n'.join(note) + dadd(note) + if onlyvars: + dadd('\\begin{description}') + for n in onlyvars: + var = m['vars'][n] + modobjs.append(n) + ct = capi_maps.getctype(var) + at = capi_maps.c2capi_map[ct] + dm = capi_maps.getarrdims(n, var) + dms = dm['dims'].replace('*', '-1').strip() + dms = dms.replace(':', '-1').strip() + if not dms: + dms = '-1' + use_fgetdims2 = fgetdims2 + if isstringarray(var): + if 'charselector' in var and 'len' in var['charselector']: + cadd('\t{"%s",%s,{{%s,%s}},%s},' + % (undo_rmbadname1(n), dm['rank'], dms, var['charselector']['len'], at)) + use_fgetdims2 = fgetdims2_sa + else: + cadd('\t{"%s",%s,{{%s}},%s},' % + (undo_rmbadname1(n), dm['rank'], dms, at)) + else: + cadd('\t{"%s",%s,{{%s}},%s},' % + (undo_rmbadname1(n), dm['rank'], dms, at)) + dadd('\\item[]{{}\\verb@%s@{}}' % + (capi_maps.getarrdocsign(n, var))) + if hasnote(var): + note = var['note'] + if isinstance(note, list): + note = '\n'.join(note) + dadd('--- %s' % (note)) + if isallocatable(var): + fargs.append('f2py_%s_getdims_%s' % (m['name'], n)) + efargs.append(fargs[-1]) + sargs.append( + 'void (*%s)(int*,int*,void(*)(char*,int*),int*)' % (n)) + sargsp.append('void (*)(int*,int*,void(*)(char*,int*),int*)') + iadd('\tf2py_%s_def[i_f2py++].func = %s;' % (m['name'], n)) + fadd('subroutine %s(r,s,f2pysetdata,flag)' % (fargs[-1])) + fadd('use %s, only: d => %s\n' % + (m['name'], undo_rmbadname1(n))) + fadd('integer flag\n') + fhooks[0] = fhooks[0] + fgetdims1 + dms = range(1, int(dm['rank']) + 1) + fadd(' allocate(d(%s))\n' % + (','.join(['s(%s)' % i for i in dms]))) + fhooks[0] = fhooks[0] + use_fgetdims2 + fadd('end subroutine %s' % (fargs[-1])) + else: + fargs.append(n) + sargs.append('char *%s' % (n)) + sargsp.append('char*') + iadd('\tf2py_%s_def[i_f2py++].data = %s;' % (m['name'], n)) + if onlyvars: + dadd('\\end{description}') + if hasbody(m): + for b in m['body']: + if not isroutine(b): + outmess("f90mod_rules.buildhooks:" + f" skipping {b['block']} {b['name']}\n") + continue + modobjs.append('%s()' % (b['name'])) + b['modulename'] = m['name'] + api, wrap = rules.buildapi(b) + if isfunction(b): + fhooks[0] = fhooks[0] + wrap + fargs.append('f2pywrap_%s_%s' % (m['name'], b['name'])) + ifargs.append(func2subr.createfuncwrapper(b, signature=1)) + else: + if wrap: + fhooks[0] = fhooks[0] + wrap + fargs.append('f2pywrap_%s_%s' % (m['name'], b['name'])) + ifargs.append( + func2subr.createsubrwrapper(b, signature=1)) + else: + fargs.append(b['name']) + mfargs.append(fargs[-1]) + api['externroutines'] = [] + ar = applyrules(api, vrd) + ar['docs'] = [] + ar['docshort'] = [] + ret = dictappend(ret, ar) + cadd('\t{"%s",-1,{{-1}},0,NULL,(void *)f2py_rout_#modulename#_%s_%s,doc_f2py_rout_#modulename#_%s_%s},' % + (b['name'], m['name'], b['name'], m['name'], b['name'])) + sargs.append('char *%s' % (b['name'])) + sargsp.append('char *') + iadd('\tf2py_%s_def[i_f2py++].data = %s;' % + (m['name'], b['name'])) + cadd('\t{NULL}\n};\n') + iadd('}') + ihooks[0] = 'static void f2py_setup_%s(%s) {\n\tint i_f2py=0;%s' % ( + m['name'], ','.join(sargs), ihooks[0]) + if '_' in m['name']: + F_FUNC = 'F_FUNC_US' + else: + F_FUNC = 'F_FUNC' + iadd('extern void %s(f2pyinit%s,F2PYINIT%s)(void (*)(%s));' + % (F_FUNC, m['name'], m['name'].upper(), ','.join(sargsp))) + iadd('static void f2py_init_%s(void) {' % (m['name'])) + iadd('\t%s(f2pyinit%s,F2PYINIT%s)(f2py_setup_%s);' + % (F_FUNC, m['name'], m['name'].upper(), m['name'])) + iadd('}\n') + ret['f90modhooks'] = ret['f90modhooks'] + chooks + ihooks + ret['initf90modhooks'] = ['\tPyDict_SetItemString(d, "%s", PyFortranObject_New(f2py_%s_def,f2py_init_%s));' % ( + m['name'], m['name'], m['name'])] + ret['initf90modhooks'] + fadd('') + fadd('subroutine f2pyinit%s(f2pysetupfunc)' % (m['name'])) + if mfargs: + for a in undo_rmbadname(mfargs): + fadd('use %s, only : %s' % (m['name'], a)) + if ifargs: + fadd(' '.join(['interface'] + ifargs)) + fadd('end interface') + fadd('external f2pysetupfunc') + if efargs: + for a in undo_rmbadname(efargs): + fadd('external %s' % (a)) + fadd('call f2pysetupfunc(%s)' % (','.join(undo_rmbadname(fargs)))) + fadd('end subroutine f2pyinit%s\n' % (m['name'])) + + dadd('\n'.join(ret['latexdoc']).replace( + r'\subsection{', r'\subsubsection{')) + + ret['latexdoc'] = [] + ret['docs'].append('"\t%s --- %s"' % (m['name'], + ','.join(undo_rmbadname(modobjs)))) + + ret['routine_defs'] = '' + ret['doc'] = [] + ret['docshort'] = [] + ret['latexdoc'] = doc[0] + if len(ret['docs']) <= 1: + ret['docs'] = '' + return ret, fhooks[0] diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/func2subr.py b/.local/lib/python3.8/site-packages/numpy/f2py/func2subr.py new file mode 100644 index 0000000..21d4c00 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/func2subr.py @@ -0,0 +1,300 @@ +#!/usr/bin/env python3 +""" + +Rules for building C/API module with f2py2e. + +Copyright 1999,2000 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy License. + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +$Date: 2004/11/26 11:13:06 $ +Pearu Peterson + +""" +__version__ = "$Revision: 1.16 $"[10:-1] + +f2py_version = 'See `f2py -v`' + +import copy + +from .auxfuncs import ( + getfortranname, isexternal, isfunction, isfunction_wrap, isintent_in, + isintent_out, islogicalfunction, ismoduleroutine, isscalar, + issubroutine, issubroutine_wrap, outmess, show +) + + +def var2fixfortran(vars, a, fa=None, f90mode=None): + if fa is None: + fa = a + if a not in vars: + show(vars) + outmess('var2fixfortran: No definition for argument "%s".\n' % a) + return '' + if 'typespec' not in vars[a]: + show(vars[a]) + outmess('var2fixfortran: No typespec for argument "%s".\n' % a) + return '' + vardef = vars[a]['typespec'] + if vardef == 'type' and 'typename' in vars[a]: + vardef = '%s(%s)' % (vardef, vars[a]['typename']) + selector = {} + lk = '' + if 'kindselector' in vars[a]: + selector = vars[a]['kindselector'] + lk = 'kind' + elif 'charselector' in vars[a]: + selector = vars[a]['charselector'] + lk = 'len' + if '*' in selector: + if f90mode: + if selector['*'] in ['*', ':', '(*)']: + vardef = '%s(len=*)' % (vardef) + else: + vardef = '%s(%s=%s)' % (vardef, lk, selector['*']) + else: + if selector['*'] in ['*', ':']: + vardef = '%s*(%s)' % (vardef, selector['*']) + else: + vardef = '%s*%s' % (vardef, selector['*']) + else: + if 'len' in selector: + vardef = '%s(len=%s' % (vardef, selector['len']) + if 'kind' in selector: + vardef = '%s,kind=%s)' % (vardef, selector['kind']) + else: + vardef = '%s)' % (vardef) + elif 'kind' in selector: + vardef = '%s(kind=%s)' % (vardef, selector['kind']) + + vardef = '%s %s' % (vardef, fa) + if 'dimension' in vars[a]: + vardef = '%s(%s)' % (vardef, ','.join(vars[a]['dimension'])) + return vardef + + +def createfuncwrapper(rout, signature=0): + assert isfunction(rout) + + extra_args = [] + vars = rout['vars'] + for a in rout['args']: + v = rout['vars'][a] + for i, d in enumerate(v.get('dimension', [])): + if d == ':': + dn = 'f2py_%s_d%s' % (a, i) + dv = dict(typespec='integer', intent=['hide']) + dv['='] = 'shape(%s, %s)' % (a, i) + extra_args.append(dn) + vars[dn] = dv + v['dimension'][i] = dn + rout['args'].extend(extra_args) + need_interface = bool(extra_args) + + ret = [''] + + def add(line, ret=ret): + ret[0] = '%s\n %s' % (ret[0], line) + name = rout['name'] + fortranname = getfortranname(rout) + f90mode = ismoduleroutine(rout) + newname = '%sf2pywrap' % (name) + + if newname not in vars: + vars[newname] = vars[name] + args = [newname] + rout['args'][1:] + else: + args = [newname] + rout['args'] + + l = var2fixfortran(vars, name, newname, f90mode) + if l[:13] == 'character*(*)': + if f90mode: + l = 'character(len=10)' + l[13:] + else: + l = 'character*10' + l[13:] + charselect = vars[name]['charselector'] + if charselect.get('*', '') == '(*)': + charselect['*'] = '10' + sargs = ', '.join(args) + if f90mode: + add('subroutine f2pywrap_%s_%s (%s)' % + (rout['modulename'], name, sargs)) + if not signature: + add('use %s, only : %s' % (rout['modulename'], fortranname)) + else: + add('subroutine f2pywrap%s (%s)' % (name, sargs)) + if not need_interface: + add('external %s' % (fortranname)) + l = l + ', ' + fortranname + if need_interface: + for line in rout['saved_interface'].split('\n'): + if line.lstrip().startswith('use ') and '__user__' not in line: + add(line) + + args = args[1:] + dumped_args = [] + for a in args: + if isexternal(vars[a]): + add('external %s' % (a)) + dumped_args.append(a) + for a in args: + if a in dumped_args: + continue + if isscalar(vars[a]): + add(var2fixfortran(vars, a, f90mode=f90mode)) + dumped_args.append(a) + for a in args: + if a in dumped_args: + continue + if isintent_in(vars[a]): + add(var2fixfortran(vars, a, f90mode=f90mode)) + dumped_args.append(a) + for a in args: + if a in dumped_args: + continue + add(var2fixfortran(vars, a, f90mode=f90mode)) + + add(l) + + if need_interface: + if f90mode: + # f90 module already defines needed interface + pass + else: + add('interface') + add(rout['saved_interface'].lstrip()) + add('end interface') + + sargs = ', '.join([a for a in args if a not in extra_args]) + + if not signature: + if islogicalfunction(rout): + add('%s = .not.(.not.%s(%s))' % (newname, fortranname, sargs)) + else: + add('%s = %s(%s)' % (newname, fortranname, sargs)) + if f90mode: + add('end subroutine f2pywrap_%s_%s' % (rout['modulename'], name)) + else: + add('end') + return ret[0] + + +def createsubrwrapper(rout, signature=0): + assert issubroutine(rout) + + extra_args = [] + vars = rout['vars'] + for a in rout['args']: + v = rout['vars'][a] + for i, d in enumerate(v.get('dimension', [])): + if d == ':': + dn = 'f2py_%s_d%s' % (a, i) + dv = dict(typespec='integer', intent=['hide']) + dv['='] = 'shape(%s, %s)' % (a, i) + extra_args.append(dn) + vars[dn] = dv + v['dimension'][i] = dn + rout['args'].extend(extra_args) + need_interface = bool(extra_args) + + ret = [''] + + def add(line, ret=ret): + ret[0] = '%s\n %s' % (ret[0], line) + name = rout['name'] + fortranname = getfortranname(rout) + f90mode = ismoduleroutine(rout) + + args = rout['args'] + + sargs = ', '.join(args) + if f90mode: + add('subroutine f2pywrap_%s_%s (%s)' % + (rout['modulename'], name, sargs)) + if not signature: + add('use %s, only : %s' % (rout['modulename'], fortranname)) + else: + add('subroutine f2pywrap%s (%s)' % (name, sargs)) + if not need_interface: + add('external %s' % (fortranname)) + + if need_interface: + for line in rout['saved_interface'].split('\n'): + if line.lstrip().startswith('use ') and '__user__' not in line: + add(line) + + dumped_args = [] + for a in args: + if isexternal(vars[a]): + add('external %s' % (a)) + dumped_args.append(a) + for a in args: + if a in dumped_args: + continue + if isscalar(vars[a]): + add(var2fixfortran(vars, a, f90mode=f90mode)) + dumped_args.append(a) + for a in args: + if a in dumped_args: + continue + add(var2fixfortran(vars, a, f90mode=f90mode)) + + if need_interface: + if f90mode: + # f90 module already defines needed interface + pass + else: + add('interface') + for line in rout['saved_interface'].split('\n'): + if line.lstrip().startswith('use ') and '__user__' in line: + continue + add(line) + add('end interface') + + sargs = ', '.join([a for a in args if a not in extra_args]) + + if not signature: + add('call %s(%s)' % (fortranname, sargs)) + if f90mode: + add('end subroutine f2pywrap_%s_%s' % (rout['modulename'], name)) + else: + add('end') + return ret[0] + + +def assubr(rout): + if isfunction_wrap(rout): + fortranname = getfortranname(rout) + name = rout['name'] + outmess('\t\tCreating wrapper for Fortran function "%s"("%s")...\n' % ( + name, fortranname)) + rout = copy.copy(rout) + fname = name + rname = fname + if 'result' in rout: + rname = rout['result'] + rout['vars'][fname] = rout['vars'][rname] + fvar = rout['vars'][fname] + if not isintent_out(fvar): + if 'intent' not in fvar: + fvar['intent'] = [] + fvar['intent'].append('out') + flag = 1 + for i in fvar['intent']: + if i.startswith('out='): + flag = 0 + break + if flag: + fvar['intent'].append('out=%s' % (rname)) + rout['args'][:] = [fname] + rout['args'] + return rout, createfuncwrapper(rout) + if issubroutine_wrap(rout): + fortranname = getfortranname(rout) + name = rout['name'] + outmess('\t\tCreating wrapper for Fortran subroutine "%s"("%s")...\n' % ( + name, fortranname)) + rout = copy.copy(rout) + return rout, createsubrwrapper(rout) + return rout, '' diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/rules.py b/.local/lib/python3.8/site-packages/numpy/f2py/rules.py new file mode 100644 index 0000000..78810a0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/rules.py @@ -0,0 +1,1486 @@ +#!/usr/bin/env python3 +""" + +Rules for building C/API module with f2py2e. + +Here is a skeleton of a new wrapper function (13Dec2001): + +wrapper_function(args) + declarations + get_python_arguments, say, `a' and `b' + + get_a_from_python + if (successful) { + + get_b_from_python + if (successful) { + + callfortran + if (successful) { + + put_a_to_python + if (successful) { + + put_b_to_python + if (successful) { + + buildvalue = ... + + } + + } + + } + + } + cleanup_b + + } + cleanup_a + + return buildvalue + +Copyright 1999,2000 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy License. + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +$Date: 2005/08/30 08:58:42 $ +Pearu Peterson + +""" +import os +import time +import copy + +# __version__.version is now the same as the NumPy version +from . import __version__ +f2py_version = __version__.version +numpy_version = __version__.version + +from .auxfuncs import ( + applyrules, debugcapi, dictappend, errmess, gentitle, getargs2, + hascallstatement, hasexternals, hasinitvalue, hasnote, hasresultnote, + isarray, isarrayofstrings, iscomplex, iscomplexarray, + iscomplexfunction, iscomplexfunction_warn, isdummyroutine, isexternal, + isfunction, isfunction_wrap, isint1array, isintent_aux, isintent_c, + isintent_callback, isintent_copy, isintent_hide, isintent_inout, + isintent_nothide, isintent_out, isintent_overwrite, islogical, + islong_complex, islong_double, islong_doublefunction, islong_long, + islong_longfunction, ismoduleroutine, isoptional, isrequired, isscalar, + issigned_long_longarray, isstring, isstringarray, isstringfunction, + issubroutine, issubroutine_wrap, isthreadsafe, isunsigned, + isunsigned_char, isunsigned_chararray, isunsigned_long_long, + isunsigned_long_longarray, isunsigned_short, isunsigned_shortarray, + l_and, l_not, l_or, outmess, replace, stripcomma, requiresf90wrapper +) + +from . import capi_maps +from . import cfuncs +from . import common_rules +from . import use_rules +from . import f90mod_rules +from . import func2subr + +options = {} +sepdict = {} +#for k in ['need_cfuncs']: sepdict[k]=',' +for k in ['decl', + 'frompyobj', + 'cleanupfrompyobj', + 'topyarr', 'method', + 'pyobjfrom', 'closepyobjfrom', + 'freemem', + 'userincludes', + 'includes0', 'includes', 'typedefs', 'typedefs_generated', + 'cppmacros', 'cfuncs', 'callbacks', + 'latexdoc', + 'restdoc', + 'routine_defs', 'externroutines', + 'initf2pywraphooks', + 'commonhooks', 'initcommonhooks', + 'f90modhooks', 'initf90modhooks']: + sepdict[k] = '\n' + +#################### Rules for C/API module ################# + +generationtime = int(os.environ.get('SOURCE_DATE_EPOCH', time.time())) +module_rules = { + 'modulebody': """\ +/* File: #modulename#module.c + * This file is auto-generated with f2py (version:#f2py_version#). + * f2py is a Fortran to Python Interface Generator (FPIG), Second Edition, + * written by Pearu Peterson . + * Generation date: """ + time.asctime(time.gmtime(generationtime)) + """ + * Do not edit this file directly unless you know what you are doing!!! + */ + +#ifdef __cplusplus +extern \"C\" { +#endif + +#ifndef PY_SSIZE_T_CLEAN +#define PY_SSIZE_T_CLEAN +#endif /* PY_SSIZE_T_CLEAN */ + +""" + gentitle("See f2py2e/cfuncs.py: includes") + """ +#includes# +#includes0# + +""" + gentitle("See f2py2e/rules.py: mod_rules['modulebody']") + """ +static PyObject *#modulename#_error; +static PyObject *#modulename#_module; + +""" + gentitle("See f2py2e/cfuncs.py: typedefs") + """ +#typedefs# + +""" + gentitle("See f2py2e/cfuncs.py: typedefs_generated") + """ +#typedefs_generated# + +""" + gentitle("See f2py2e/cfuncs.py: cppmacros") + """ +#cppmacros# + +""" + gentitle("See f2py2e/cfuncs.py: cfuncs") + """ +#cfuncs# + +""" + gentitle("See f2py2e/cfuncs.py: userincludes") + """ +#userincludes# + +""" + gentitle("See f2py2e/capi_rules.py: usercode") + """ +#usercode# + +/* See f2py2e/rules.py */ +#externroutines# + +""" + gentitle("See f2py2e/capi_rules.py: usercode1") + """ +#usercode1# + +""" + gentitle("See f2py2e/cb_rules.py: buildcallback") + """ +#callbacks# + +""" + gentitle("See f2py2e/rules.py: buildapi") + """ +#body# + +""" + gentitle("See f2py2e/f90mod_rules.py: buildhooks") + """ +#f90modhooks# + +""" + gentitle("See f2py2e/rules.py: module_rules['modulebody']") + """ + +""" + gentitle("See f2py2e/common_rules.py: buildhooks") + """ +#commonhooks# + +""" + gentitle("See f2py2e/rules.py") + """ + +static FortranDataDef f2py_routine_defs[] = { +#routine_defs# + {NULL} +}; + +static PyMethodDef f2py_module_methods[] = { +#pymethoddef# + {NULL,NULL} +}; + +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "#modulename#", + NULL, + -1, + f2py_module_methods, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC PyInit_#modulename#(void) { + int i; + PyObject *m,*d, *s, *tmp; + m = #modulename#_module = PyModule_Create(&moduledef); + Py_SET_TYPE(&PyFortran_Type, &PyType_Type); + import_array(); + if (PyErr_Occurred()) + {PyErr_SetString(PyExc_ImportError, \"can't initialize module #modulename# (failed to import numpy)\"); return m;} + d = PyModule_GetDict(m); + s = PyUnicode_FromString(\"#f2py_version#\"); + PyDict_SetItemString(d, \"__version__\", s); + Py_DECREF(s); + s = PyUnicode_FromString( + \"This module '#modulename#' is auto-generated with f2py (version:#f2py_version#).\\nFunctions:\\n\"\n#docs#\".\"); + PyDict_SetItemString(d, \"__doc__\", s); + Py_DECREF(s); + s = PyUnicode_FromString(\"""" + numpy_version + """\"); + PyDict_SetItemString(d, \"__f2py_numpy_version__\", s); + Py_DECREF(s); + #modulename#_error = PyErr_NewException (\"#modulename#.error\", NULL, NULL); + /* + * Store the error object inside the dict, so that it could get deallocated. + * (in practice, this is a module, so it likely will not and cannot.) + */ + PyDict_SetItemString(d, \"_#modulename#_error\", #modulename#_error); + Py_DECREF(#modulename#_error); + for(i=0;f2py_routine_defs[i].name!=NULL;i++) { + tmp = PyFortranObject_NewAsAttr(&f2py_routine_defs[i]); + PyDict_SetItemString(d, f2py_routine_defs[i].name, tmp); + Py_DECREF(tmp); + } +#initf2pywraphooks# +#initf90modhooks# +#initcommonhooks# +#interface_usercode# + +#ifdef F2PY_REPORT_ATEXIT + if (! PyErr_Occurred()) + on_exit(f2py_report_on_exit,(void*)\"#modulename#\"); +#endif + return m; +} +#ifdef __cplusplus +} +#endif +""", + 'separatorsfor': {'latexdoc': '\n\n', + 'restdoc': '\n\n'}, + 'latexdoc': ['\\section{Module \\texttt{#texmodulename#}}\n', + '#modnote#\n', + '#latexdoc#'], + 'restdoc': ['Module #modulename#\n' + '=' * 80, + '\n#restdoc#'] +} + +defmod_rules = [ + {'body': '/*eof body*/', + 'method': '/*eof method*/', + 'externroutines': '/*eof externroutines*/', + 'routine_defs': '/*eof routine_defs*/', + 'initf90modhooks': '/*eof initf90modhooks*/', + 'initf2pywraphooks': '/*eof initf2pywraphooks*/', + 'initcommonhooks': '/*eof initcommonhooks*/', + 'latexdoc': '', + 'restdoc': '', + 'modnote': {hasnote: '#note#', l_not(hasnote): ''}, + } +] + +routine_rules = { + 'separatorsfor': sepdict, + 'body': """ +#begintitle# +static char doc_#apiname#[] = \"\\\n#docreturn##name#(#docsignatureshort#)\\n\\nWrapper for ``#name#``.\\\n\\n#docstrsigns#\"; +/* #declfortranroutine# */ +static PyObject *#apiname#(const PyObject *capi_self, + PyObject *capi_args, + PyObject *capi_keywds, + #functype# (*f2py_func)(#callprotoargument#)) { + PyObject * volatile capi_buildvalue = NULL; + volatile int f2py_success = 1; +#decl# + static char *capi_kwlist[] = {#kwlist##kwlistopt##kwlistxa#NULL}; +#usercode# +#routdebugenter# +#ifdef F2PY_REPORT_ATEXIT +f2py_start_clock(); +#endif + if (!PyArg_ParseTupleAndKeywords(capi_args,capi_keywds,\\ + \"#argformat#|#keyformat##xaformat#:#pyname#\",\\ + capi_kwlist#args_capi##keys_capi##keys_xa#))\n return NULL; +#frompyobj# +/*end of frompyobj*/ +#ifdef F2PY_REPORT_ATEXIT +f2py_start_call_clock(); +#endif +#callfortranroutine# +if (PyErr_Occurred()) + f2py_success = 0; +#ifdef F2PY_REPORT_ATEXIT +f2py_stop_call_clock(); +#endif +/*end of callfortranroutine*/ + if (f2py_success) { +#pyobjfrom# +/*end of pyobjfrom*/ + CFUNCSMESS(\"Building return value.\\n\"); + capi_buildvalue = Py_BuildValue(\"#returnformat#\"#return#); +/*closepyobjfrom*/ +#closepyobjfrom# + } /*if (f2py_success) after callfortranroutine*/ +/*cleanupfrompyobj*/ +#cleanupfrompyobj# + if (capi_buildvalue == NULL) { +#routdebugfailure# + } else { +#routdebugleave# + } + CFUNCSMESS(\"Freeing memory.\\n\"); +#freemem# +#ifdef F2PY_REPORT_ATEXIT +f2py_stop_clock(); +#endif + return capi_buildvalue; +} +#endtitle# +""", + 'routine_defs': '#routine_def#', + 'initf2pywraphooks': '#initf2pywraphook#', + 'externroutines': '#declfortranroutine#', + 'doc': '#docreturn##name#(#docsignature#)', + 'docshort': '#docreturn##name#(#docsignatureshort#)', + 'docs': '" #docreturn##name#(#docsignature#)\\n"\n', + 'need': ['arrayobject.h', 'CFUNCSMESS', 'MINMAX'], + 'cppmacros': {debugcapi: '#define DEBUGCFUNCS'}, + 'latexdoc': ['\\subsection{Wrapper function \\texttt{#texname#}}\n', + """ +\\noindent{{}\\verb@#docreturn##name#@{}}\\texttt{(#latexdocsignatureshort#)} +#routnote# + +#latexdocstrsigns# +"""], + 'restdoc': ['Wrapped function ``#name#``\n' + '-' * 80, + + ] +} + +################## Rules for C/API function ############## + +rout_rules = [ + { # Init + 'separatorsfor': {'callfortranroutine': '\n', 'routdebugenter': '\n', 'decl': '\n', + 'routdebugleave': '\n', 'routdebugfailure': '\n', + 'setjmpbuf': ' || ', + 'docstrreq': '\n', 'docstropt': '\n', 'docstrout': '\n', + 'docstrcbs': '\n', 'docstrsigns': '\\n"\n"', + 'latexdocstrsigns': '\n', + 'latexdocstrreq': '\n', 'latexdocstropt': '\n', + 'latexdocstrout': '\n', 'latexdocstrcbs': '\n', + }, + 'kwlist': '', 'kwlistopt': '', 'callfortran': '', 'callfortranappend': '', + 'docsign': '', 'docsignopt': '', 'decl': '/*decl*/', + 'freemem': '/*freemem*/', + 'docsignshort': '', 'docsignoptshort': '', + 'docstrsigns': '', 'latexdocstrsigns': '', + 'docstrreq': '\\nParameters\\n----------', + 'docstropt': '\\nOther Parameters\\n----------------', + 'docstrout': '\\nReturns\\n-------', + 'docstrcbs': '\\nNotes\\n-----\\nCall-back functions::\\n', + 'latexdocstrreq': '\\noindent Required arguments:', + 'latexdocstropt': '\\noindent Optional arguments:', + 'latexdocstrout': '\\noindent Return objects:', + 'latexdocstrcbs': '\\noindent Call-back functions:', + 'args_capi': '', 'keys_capi': '', 'functype': '', + 'frompyobj': '/*frompyobj*/', + # this list will be reversed + 'cleanupfrompyobj': ['/*end of cleanupfrompyobj*/'], + 'pyobjfrom': '/*pyobjfrom*/', + # this list will be reversed + 'closepyobjfrom': ['/*end of closepyobjfrom*/'], + 'topyarr': '/*topyarr*/', 'routdebugleave': '/*routdebugleave*/', + 'routdebugenter': '/*routdebugenter*/', + 'routdebugfailure': '/*routdebugfailure*/', + 'callfortranroutine': '/*callfortranroutine*/', + 'argformat': '', 'keyformat': '', 'need_cfuncs': '', + 'docreturn': '', 'return': '', 'returnformat': '', 'rformat': '', + 'kwlistxa': '', 'keys_xa': '', 'xaformat': '', 'docsignxa': '', 'docsignxashort': '', + 'initf2pywraphook': '', + 'routnote': {hasnote: '--- #note#', l_not(hasnote): ''}, + }, { + 'apiname': 'f2py_rout_#modulename#_#name#', + 'pyname': '#modulename#.#name#', + 'decl': '', + '_check': l_not(ismoduleroutine) + }, { + 'apiname': 'f2py_rout_#modulename#_#f90modulename#_#name#', + 'pyname': '#modulename#.#f90modulename#.#name#', + 'decl': '', + '_check': ismoduleroutine + }, { # Subroutine + 'functype': 'void', + 'declfortranroutine': {l_and(l_not(l_or(ismoduleroutine, isintent_c)), l_not(isdummyroutine)): 'extern void #F_FUNC#(#fortranname#,#FORTRANNAME#)(#callprotoargument#);', + l_and(l_not(ismoduleroutine), isintent_c, l_not(isdummyroutine)): 'extern void #fortranname#(#callprotoargument#);', + ismoduleroutine: '', + isdummyroutine: '' + }, + 'routine_def': {l_not(l_or(ismoduleroutine, isintent_c, isdummyroutine)): ' {\"#name#\",-1,{{-1}},0,(char *)#F_FUNC#(#fortranname#,#FORTRANNAME#),(f2py_init_func)#apiname#,doc_#apiname#},', + l_and(l_not(ismoduleroutine), isintent_c, l_not(isdummyroutine)): ' {\"#name#\",-1,{{-1}},0,(char *)#fortranname#,(f2py_init_func)#apiname#,doc_#apiname#},', + l_and(l_not(ismoduleroutine), isdummyroutine): ' {\"#name#\",-1,{{-1}},0,NULL,(f2py_init_func)#apiname#,doc_#apiname#},', + }, + 'need': {l_and(l_not(l_or(ismoduleroutine, isintent_c)), l_not(isdummyroutine)): 'F_FUNC'}, + 'callfortranroutine': [ + {debugcapi: [ + """ fprintf(stderr,\"debug-capi:Fortran subroutine `#fortranname#(#callfortran#)\'\\n\");"""]}, + {hasexternals: """\ + if (#setjmpbuf#) { + f2py_success = 0; + } else {"""}, + {isthreadsafe: ' Py_BEGIN_ALLOW_THREADS'}, + {hascallstatement: ''' #callstatement#; + /*(*f2py_func)(#callfortran#);*/'''}, + {l_not(l_or(hascallstatement, isdummyroutine)) + : ' (*f2py_func)(#callfortran#);'}, + {isthreadsafe: ' Py_END_ALLOW_THREADS'}, + {hasexternals: """ }"""} + ], + '_check': l_and(issubroutine, l_not(issubroutine_wrap)), + }, { # Wrapped function + 'functype': 'void', + 'declfortranroutine': {l_not(l_or(ismoduleroutine, isdummyroutine)): 'extern void #F_WRAPPEDFUNC#(#name_lower#,#NAME#)(#callprotoargument#);', + isdummyroutine: '', + }, + + 'routine_def': {l_not(l_or(ismoduleroutine, isdummyroutine)): ' {\"#name#\",-1,{{-1}},0,(char *)#F_WRAPPEDFUNC#(#name_lower#,#NAME#),(f2py_init_func)#apiname#,doc_#apiname#},', + isdummyroutine: ' {\"#name#\",-1,{{-1}},0,NULL,(f2py_init_func)#apiname#,doc_#apiname#},', + }, + 'initf2pywraphook': {l_not(l_or(ismoduleroutine, isdummyroutine)): ''' + { + extern #ctype# #F_FUNC#(#name_lower#,#NAME#)(void); + PyObject* o = PyDict_GetItemString(d,"#name#"); + tmp = F2PyCapsule_FromVoidPtr((void*)#F_FUNC#(#name_lower#,#NAME#),NULL); + PyObject_SetAttrString(o,"_cpointer", tmp); + Py_DECREF(tmp); + s = PyUnicode_FromString("#name#"); + PyObject_SetAttrString(o,"__name__", s); + Py_DECREF(s); + } + '''}, + 'need': {l_not(l_or(ismoduleroutine, isdummyroutine)): ['F_WRAPPEDFUNC', 'F_FUNC']}, + 'callfortranroutine': [ + {debugcapi: [ + """ fprintf(stderr,\"debug-capi:Fortran subroutine `f2pywrap#name_lower#(#callfortran#)\'\\n\");"""]}, + {hasexternals: """\ + if (#setjmpbuf#) { + f2py_success = 0; + } else {"""}, + {isthreadsafe: ' Py_BEGIN_ALLOW_THREADS'}, + {l_not(l_or(hascallstatement, isdummyroutine)) + : ' (*f2py_func)(#callfortran#);'}, + {hascallstatement: + ' #callstatement#;\n /*(*f2py_func)(#callfortran#);*/'}, + {isthreadsafe: ' Py_END_ALLOW_THREADS'}, + {hasexternals: ' }'} + ], + '_check': isfunction_wrap, + }, { # Wrapped subroutine + 'functype': 'void', + 'declfortranroutine': {l_not(l_or(ismoduleroutine, isdummyroutine)): 'extern void #F_WRAPPEDFUNC#(#name_lower#,#NAME#)(#callprotoargument#);', + isdummyroutine: '', + }, + + 'routine_def': {l_not(l_or(ismoduleroutine, isdummyroutine)): ' {\"#name#\",-1,{{-1}},0,(char *)#F_WRAPPEDFUNC#(#name_lower#,#NAME#),(f2py_init_func)#apiname#,doc_#apiname#},', + isdummyroutine: ' {\"#name#\",-1,{{-1}},0,NULL,(f2py_init_func)#apiname#,doc_#apiname#},', + }, + 'initf2pywraphook': {l_not(l_or(ismoduleroutine, isdummyroutine)): ''' + { + extern void #F_FUNC#(#name_lower#,#NAME#)(void); + PyObject* o = PyDict_GetItemString(d,"#name#"); + tmp = F2PyCapsule_FromVoidPtr((void*)#F_FUNC#(#name_lower#,#NAME#),NULL); + PyObject_SetAttrString(o,"_cpointer", tmp); + Py_DECREF(tmp); + s = PyUnicode_FromString("#name#"); + PyObject_SetAttrString(o,"__name__", s); + Py_DECREF(s); + } + '''}, + 'need': {l_not(l_or(ismoduleroutine, isdummyroutine)): ['F_WRAPPEDFUNC', 'F_FUNC']}, + 'callfortranroutine': [ + {debugcapi: [ + """ fprintf(stderr,\"debug-capi:Fortran subroutine `f2pywrap#name_lower#(#callfortran#)\'\\n\");"""]}, + {hasexternals: """\ + if (#setjmpbuf#) { + f2py_success = 0; + } else {"""}, + {isthreadsafe: ' Py_BEGIN_ALLOW_THREADS'}, + {l_not(l_or(hascallstatement, isdummyroutine)) + : ' (*f2py_func)(#callfortran#);'}, + {hascallstatement: + ' #callstatement#;\n /*(*f2py_func)(#callfortran#);*/'}, + {isthreadsafe: ' Py_END_ALLOW_THREADS'}, + {hasexternals: ' }'} + ], + '_check': issubroutine_wrap, + }, { # Function + 'functype': '#ctype#', + 'docreturn': {l_not(isintent_hide): '#rname#,'}, + 'docstrout': '#pydocsignout#', + 'latexdocstrout': ['\\item[]{{}\\verb@#pydocsignout#@{}}', + {hasresultnote: '--- #resultnote#'}], + 'callfortranroutine': [{l_and(debugcapi, isstringfunction): """\ +#ifdef USESCOMPAQFORTRAN + fprintf(stderr,\"debug-capi:Fortran function #ctype# #fortranname#(#callcompaqfortran#)\\n\"); +#else + fprintf(stderr,\"debug-capi:Fortran function #ctype# #fortranname#(#callfortran#)\\n\"); +#endif +"""}, + {l_and(debugcapi, l_not(isstringfunction)): """\ + fprintf(stderr,\"debug-capi:Fortran function #ctype# #fortranname#(#callfortran#)\\n\"); +"""} + ], + '_check': l_and(isfunction, l_not(isfunction_wrap)) + }, { # Scalar function + 'declfortranroutine': {l_and(l_not(l_or(ismoduleroutine, isintent_c)), l_not(isdummyroutine)): 'extern #ctype# #F_FUNC#(#fortranname#,#FORTRANNAME#)(#callprotoargument#);', + l_and(l_not(ismoduleroutine), isintent_c, l_not(isdummyroutine)): 'extern #ctype# #fortranname#(#callprotoargument#);', + isdummyroutine: '' + }, + 'routine_def': {l_and(l_not(l_or(ismoduleroutine, isintent_c)), l_not(isdummyroutine)): ' {\"#name#\",-1,{{-1}},0,(char *)#F_FUNC#(#fortranname#,#FORTRANNAME#),(f2py_init_func)#apiname#,doc_#apiname#},', + l_and(l_not(ismoduleroutine), isintent_c, l_not(isdummyroutine)): ' {\"#name#\",-1,{{-1}},0,(char *)#fortranname#,(f2py_init_func)#apiname#,doc_#apiname#},', + isdummyroutine: ' {\"#name#\",-1,{{-1}},0,NULL,(f2py_init_func)#apiname#,doc_#apiname#},', + }, + 'decl': [{iscomplexfunction_warn: ' #ctype# #name#_return_value={0,0};', + l_not(iscomplexfunction): ' #ctype# #name#_return_value=0;'}, + {iscomplexfunction: + ' PyObject *#name#_return_value_capi = Py_None;'} + ], + 'callfortranroutine': [ + {hasexternals: """\ + if (#setjmpbuf#) { + f2py_success = 0; + } else {"""}, + {isthreadsafe: ' Py_BEGIN_ALLOW_THREADS'}, + {hascallstatement: ''' #callstatement#; +/* #name#_return_value = (*f2py_func)(#callfortran#);*/ +'''}, + {l_not(l_or(hascallstatement, isdummyroutine)) + : ' #name#_return_value = (*f2py_func)(#callfortran#);'}, + {isthreadsafe: ' Py_END_ALLOW_THREADS'}, + {hasexternals: ' }'}, + {l_and(debugcapi, iscomplexfunction) + : ' fprintf(stderr,"#routdebugshowvalue#\\n",#name#_return_value.r,#name#_return_value.i);'}, + {l_and(debugcapi, l_not(iscomplexfunction)): ' fprintf(stderr,"#routdebugshowvalue#\\n",#name#_return_value);'}], + 'pyobjfrom': {iscomplexfunction: ' #name#_return_value_capi = pyobj_from_#ctype#1(#name#_return_value);'}, + 'need': [{l_not(isdummyroutine): 'F_FUNC'}, + {iscomplexfunction: 'pyobj_from_#ctype#1'}, + {islong_longfunction: 'long_long'}, + {islong_doublefunction: 'long_double'}], + 'returnformat': {l_not(isintent_hide): '#rformat#'}, + 'return': {iscomplexfunction: ',#name#_return_value_capi', + l_not(l_or(iscomplexfunction, isintent_hide)): ',#name#_return_value'}, + '_check': l_and(isfunction, l_not(isstringfunction), l_not(isfunction_wrap)) + }, { # String function # in use for --no-wrap + 'declfortranroutine': 'extern void #F_FUNC#(#fortranname#,#FORTRANNAME#)(#callprotoargument#);', + 'routine_def': {l_not(l_or(ismoduleroutine, isintent_c)): + ' {\"#name#\",-1,{{-1}},0,(char *)#F_FUNC#(#fortranname#,#FORTRANNAME#),(f2py_init_func)#apiname#,doc_#apiname#},', + l_and(l_not(ismoduleroutine), isintent_c): + ' {\"#name#\",-1,{{-1}},0,(char *)#fortranname#,(f2py_init_func)#apiname#,doc_#apiname#},' + }, + 'decl': [' #ctype# #name#_return_value = NULL;', + ' int #name#_return_value_len = 0;'], + 'callfortran':'#name#_return_value,#name#_return_value_len,', + 'callfortranroutine':[' #name#_return_value_len = #rlength#;', + ' if ((#name#_return_value = (string)malloc(' + + '#name#_return_value_len+1) == NULL) {', + ' PyErr_SetString(PyExc_MemoryError, \"out of memory\");', + ' f2py_success = 0;', + ' } else {', + " (#name#_return_value)[#name#_return_value_len] = '\\0';", + ' }', + ' if (f2py_success) {', + {hasexternals: """\ + if (#setjmpbuf#) { + f2py_success = 0; + } else {"""}, + {isthreadsafe: ' Py_BEGIN_ALLOW_THREADS'}, + """\ +#ifdef USESCOMPAQFORTRAN + (*f2py_func)(#callcompaqfortran#); +#else + (*f2py_func)(#callfortran#); +#endif +""", + {isthreadsafe: ' Py_END_ALLOW_THREADS'}, + {hasexternals: ' }'}, + {debugcapi: + ' fprintf(stderr,"#routdebugshowvalue#\\n",#name#_return_value_len,#name#_return_value);'}, + ' } /* if (f2py_success) after (string)malloc */', + ], + 'returnformat': '#rformat#', + 'return': ',#name#_return_value', + 'freemem': ' STRINGFREE(#name#_return_value);', + 'need': ['F_FUNC', '#ctype#', 'STRINGFREE'], + '_check':l_and(isstringfunction, l_not(isfunction_wrap)) # ???obsolete + }, + { # Debugging + 'routdebugenter': ' fprintf(stderr,"debug-capi:Python C/API function #modulename#.#name#(#docsignature#)\\n");', + 'routdebugleave': ' fprintf(stderr,"debug-capi:Python C/API function #modulename#.#name#: successful.\\n");', + 'routdebugfailure': ' fprintf(stderr,"debug-capi:Python C/API function #modulename#.#name#: failure.\\n");', + '_check': debugcapi + } +] + +################ Rules for arguments ################## + +typedef_need_dict = {islong_long: 'long_long', + islong_double: 'long_double', + islong_complex: 'complex_long_double', + isunsigned_char: 'unsigned_char', + isunsigned_short: 'unsigned_short', + isunsigned: 'unsigned', + isunsigned_long_long: 'unsigned_long_long', + isunsigned_chararray: 'unsigned_char', + isunsigned_shortarray: 'unsigned_short', + isunsigned_long_longarray: 'unsigned_long_long', + issigned_long_longarray: 'long_long', + } + +aux_rules = [ + { + 'separatorsfor': sepdict + }, + { # Common + 'frompyobj': [' /* Processing auxiliary variable #varname# */', + {debugcapi: ' fprintf(stderr,"#vardebuginfo#\\n");'}, ], + 'cleanupfrompyobj': ' /* End of cleaning variable #varname# */', + 'need': typedef_need_dict, + }, + # Scalars (not complex) + { # Common + 'decl': ' #ctype# #varname# = 0;', + 'need': {hasinitvalue: 'math.h'}, + 'frompyobj': {hasinitvalue: ' #varname# = #init#;'}, + '_check': l_and(isscalar, l_not(iscomplex)), + }, + { + 'return': ',#varname#', + 'docstrout': '#pydocsignout#', + 'docreturn': '#outvarname#,', + 'returnformat': '#varrformat#', + '_check': l_and(isscalar, l_not(iscomplex), isintent_out), + }, + # Complex scalars + { # Common + 'decl': ' #ctype# #varname#;', + 'frompyobj': {hasinitvalue: ' #varname#.r = #init.r#, #varname#.i = #init.i#;'}, + '_check': iscomplex + }, + # String + { # Common + 'decl': [' #ctype# #varname# = NULL;', + ' int slen(#varname#);', + ], + 'need':['len..'], + '_check':isstring + }, + # Array + { # Common + 'decl': [' #ctype# *#varname# = NULL;', + ' npy_intp #varname#_Dims[#rank#] = {#rank*[-1]#};', + ' const int #varname#_Rank = #rank#;', + ], + 'need':['len..', {hasinitvalue: 'forcomb'}, {hasinitvalue: 'CFUNCSMESS'}], + '_check': isarray + }, + # Scalararray + { # Common + '_check': l_and(isarray, l_not(iscomplexarray)) + }, { # Not hidden + '_check': l_and(isarray, l_not(iscomplexarray), isintent_nothide) + }, + # Integer*1 array + {'need': '#ctype#', + '_check': isint1array, + '_depend': '' + }, + # Integer*-1 array + {'need': '#ctype#', + '_check': isunsigned_chararray, + '_depend': '' + }, + # Integer*-2 array + {'need': '#ctype#', + '_check': isunsigned_shortarray, + '_depend': '' + }, + # Integer*-8 array + {'need': '#ctype#', + '_check': isunsigned_long_longarray, + '_depend': '' + }, + # Complexarray + {'need': '#ctype#', + '_check': iscomplexarray, + '_depend': '' + }, + # Stringarray + { + 'callfortranappend': {isarrayofstrings: 'flen(#varname#),'}, + 'need': 'string', + '_check': isstringarray + } +] + +arg_rules = [ + { + 'separatorsfor': sepdict + }, + { # Common + 'frompyobj': [' /* Processing variable #varname# */', + {debugcapi: ' fprintf(stderr,"#vardebuginfo#\\n");'}, ], + 'cleanupfrompyobj': ' /* End of cleaning variable #varname# */', + '_depend': '', + 'need': typedef_need_dict, + }, + # Doc signatures + { + 'docstropt': {l_and(isoptional, isintent_nothide): '#pydocsign#'}, + 'docstrreq': {l_and(isrequired, isintent_nothide): '#pydocsign#'}, + 'docstrout': {isintent_out: '#pydocsignout#'}, + 'latexdocstropt': {l_and(isoptional, isintent_nothide): ['\\item[]{{}\\verb@#pydocsign#@{}}', + {hasnote: '--- #note#'}]}, + 'latexdocstrreq': {l_and(isrequired, isintent_nothide): ['\\item[]{{}\\verb@#pydocsign#@{}}', + {hasnote: '--- #note#'}]}, + 'latexdocstrout': {isintent_out: ['\\item[]{{}\\verb@#pydocsignout#@{}}', + {l_and(hasnote, isintent_hide): '--- #note#', + l_and(hasnote, isintent_nothide): '--- See above.'}]}, + 'depend': '' + }, + # Required/Optional arguments + { + 'kwlist': '"#varname#",', + 'docsign': '#varname#,', + '_check': l_and(isintent_nothide, l_not(isoptional)) + }, + { + 'kwlistopt': '"#varname#",', + 'docsignopt': '#varname#=#showinit#,', + 'docsignoptshort': '#varname#,', + '_check': l_and(isintent_nothide, isoptional) + }, + # Docstring/BuildValue + { + 'docreturn': '#outvarname#,', + 'returnformat': '#varrformat#', + '_check': isintent_out + }, + # Externals (call-back functions) + { # Common + 'docsignxa': {isintent_nothide: '#varname#_extra_args=(),'}, + 'docsignxashort': {isintent_nothide: '#varname#_extra_args,'}, + 'docstropt': {isintent_nothide: '#varname#_extra_args : input tuple, optional\\n Default: ()'}, + 'docstrcbs': '#cbdocstr#', + 'latexdocstrcbs': '\\item[] #cblatexdocstr#', + 'latexdocstropt': {isintent_nothide: '\\item[]{{}\\verb@#varname#_extra_args := () input tuple@{}} --- Extra arguments for call-back function {{}\\verb@#varname#@{}}.'}, + 'decl': [' #cbname#_t #varname#_cb = { Py_None, NULL, 0 };', + ' #cbname#_t *#varname#_cb_ptr = &#varname#_cb;', + ' PyTupleObject *#varname#_xa_capi = NULL;', + {l_not(isintent_callback): + ' #cbname#_typedef #varname#_cptr;'} + ], + 'kwlistxa': {isintent_nothide: '"#varname#_extra_args",'}, + 'argformat': {isrequired: 'O'}, + 'keyformat': {isoptional: 'O'}, + 'xaformat': {isintent_nothide: 'O!'}, + 'args_capi': {isrequired: ',&#varname#_cb.capi'}, + 'keys_capi': {isoptional: ',&#varname#_cb.capi'}, + 'keys_xa': ',&PyTuple_Type,&#varname#_xa_capi', + 'setjmpbuf': '(setjmp(#varname#_cb.jmpbuf))', + 'callfortran': {l_not(isintent_callback): '#varname#_cptr,'}, + 'need': ['#cbname#', 'setjmp.h'], + '_check':isexternal + }, + { + 'frompyobj': [{l_not(isintent_callback): """\ +if(F2PyCapsule_Check(#varname#_cb.capi)) { + #varname#_cptr = F2PyCapsule_AsVoidPtr(#varname#_cb.capi); +} else { + #varname#_cptr = #cbname#; +} +"""}, {isintent_callback: """\ +if (#varname#_cb.capi==Py_None) { + #varname#_cb.capi = PyObject_GetAttrString(#modulename#_module,\"#varname#\"); + if (#varname#_cb.capi) { + if (#varname#_xa_capi==NULL) { + if (PyObject_HasAttrString(#modulename#_module,\"#varname#_extra_args\")) { + PyObject* capi_tmp = PyObject_GetAttrString(#modulename#_module,\"#varname#_extra_args\"); + if (capi_tmp) { + #varname#_xa_capi = (PyTupleObject *)PySequence_Tuple(capi_tmp); + Py_DECREF(capi_tmp); + } + else { + #varname#_xa_capi = (PyTupleObject *)Py_BuildValue(\"()\"); + } + if (#varname#_xa_capi==NULL) { + PyErr_SetString(#modulename#_error,\"Failed to convert #modulename#.#varname#_extra_args to tuple.\\n\"); + return NULL; + } + } + } + } + if (#varname#_cb.capi==NULL) { + PyErr_SetString(#modulename#_error,\"Callback #varname# not defined (as an argument or module #modulename# attribute).\\n\"); + return NULL; + } +} +"""}, + """\ + if (create_cb_arglist(#varname#_cb.capi,#varname#_xa_capi,#maxnofargs#,#nofoptargs#,&#varname#_cb.nofargs,&#varname#_cb.args_capi,\"failed in processing argument list for call-back #varname#.\")) { +""", + {debugcapi: ["""\ + fprintf(stderr,\"debug-capi:Assuming %d arguments; at most #maxnofargs#(-#nofoptargs#) is expected.\\n\",#varname#_cb.nofargs); + CFUNCSMESSPY(\"for #varname#=\",#varname#_cb.capi);""", + {l_not(isintent_callback): """ fprintf(stderr,\"#vardebugshowvalue# (call-back in C).\\n\",#cbname#);"""}]}, + """\ + CFUNCSMESS(\"Saving callback variables for `#varname#`.\\n\"); + #varname#_cb_ptr = swap_active_#cbname#(#varname#_cb_ptr);""", + ], + 'cleanupfrompyobj': + """\ + CFUNCSMESS(\"Restoring callback variables for `#varname#`.\\n\"); + #varname#_cb_ptr = swap_active_#cbname#(#varname#_cb_ptr); + Py_DECREF(#varname#_cb.args_capi); + }""", + 'need': ['SWAP', 'create_cb_arglist'], + '_check':isexternal, + '_depend':'' + }, + # Scalars (not complex) + { # Common + 'decl': ' #ctype# #varname# = 0;', + 'pyobjfrom': {debugcapi: ' fprintf(stderr,"#vardebugshowvalue#\\n",#varname#);'}, + 'callfortran': {isintent_c: '#varname#,', l_not(isintent_c): '&#varname#,'}, + 'return': {isintent_out: ',#varname#'}, + '_check': l_and(isscalar, l_not(iscomplex)) + }, { + 'need': {hasinitvalue: 'math.h'}, + '_check': l_and(isscalar, l_not(iscomplex)), + }, { # Not hidden + 'decl': ' PyObject *#varname#_capi = Py_None;', + 'argformat': {isrequired: 'O'}, + 'keyformat': {isoptional: 'O'}, + 'args_capi': {isrequired: ',&#varname#_capi'}, + 'keys_capi': {isoptional: ',&#varname#_capi'}, + 'pyobjfrom': {isintent_inout: """\ + f2py_success = try_pyarr_from_#ctype#(#varname#_capi,&#varname#); + if (f2py_success) {"""}, + 'closepyobjfrom': {isintent_inout: " } /*if (f2py_success) of #varname# pyobjfrom*/"}, + 'need': {isintent_inout: 'try_pyarr_from_#ctype#'}, + '_check': l_and(isscalar, l_not(iscomplex), isintent_nothide) + }, { + 'frompyobj': [ + # hasinitvalue... + # if pyobj is None: + # varname = init + # else + # from_pyobj(varname) + # + # isoptional and noinitvalue... + # if pyobj is not None: + # from_pyobj(varname) + # else: + # varname is uninitialized + # + # ... + # from_pyobj(varname) + # + {hasinitvalue: ' if (#varname#_capi == Py_None) #varname# = #init#; else', + '_depend': ''}, + {l_and(isoptional, l_not(hasinitvalue)): ' if (#varname#_capi != Py_None)', + '_depend': ''}, + {l_not(islogical): '''\ + f2py_success = #ctype#_from_pyobj(&#varname#,#varname#_capi,"#pyname#() #nth# (#varname#) can\'t be converted to #ctype#"); + if (f2py_success) {'''}, + {islogical: '''\ + #varname# = (#ctype#)PyObject_IsTrue(#varname#_capi); + f2py_success = 1; + if (f2py_success) {'''}, + ], + 'cleanupfrompyobj': ' } /*if (f2py_success) of #varname#*/', + 'need': {l_not(islogical): '#ctype#_from_pyobj'}, + '_check': l_and(isscalar, l_not(iscomplex), isintent_nothide), + '_depend': '' + }, { # Hidden + 'frompyobj': {hasinitvalue: ' #varname# = #init#;'}, + 'need': typedef_need_dict, + '_check': l_and(isscalar, l_not(iscomplex), isintent_hide), + '_depend': '' + }, { # Common + 'frompyobj': {debugcapi: ' fprintf(stderr,"#vardebugshowvalue#\\n",#varname#);'}, + '_check': l_and(isscalar, l_not(iscomplex)), + '_depend': '' + }, + # Complex scalars + { # Common + 'decl': ' #ctype# #varname#;', + 'callfortran': {isintent_c: '#varname#,', l_not(isintent_c): '&#varname#,'}, + 'pyobjfrom': {debugcapi: ' fprintf(stderr,"#vardebugshowvalue#\\n",#varname#.r,#varname#.i);'}, + 'return': {isintent_out: ',#varname#_capi'}, + '_check': iscomplex + }, { # Not hidden + 'decl': ' PyObject *#varname#_capi = Py_None;', + 'argformat': {isrequired: 'O'}, + 'keyformat': {isoptional: 'O'}, + 'args_capi': {isrequired: ',&#varname#_capi'}, + 'keys_capi': {isoptional: ',&#varname#_capi'}, + 'need': {isintent_inout: 'try_pyarr_from_#ctype#'}, + 'pyobjfrom': {isintent_inout: """\ + f2py_success = try_pyarr_from_#ctype#(#varname#_capi,&#varname#); + if (f2py_success) {"""}, + 'closepyobjfrom': {isintent_inout: " } /*if (f2py_success) of #varname# pyobjfrom*/"}, + '_check': l_and(iscomplex, isintent_nothide) + }, { + 'frompyobj': [{hasinitvalue: ' if (#varname#_capi==Py_None) {#varname#.r = #init.r#, #varname#.i = #init.i#;} else'}, + {l_and(isoptional, l_not(hasinitvalue)) + : ' if (#varname#_capi != Py_None)'}, + ' f2py_success = #ctype#_from_pyobj(&#varname#,#varname#_capi,"#pyname#() #nth# (#varname#) can\'t be converted to #ctype#");' + '\n if (f2py_success) {'], + 'cleanupfrompyobj': ' } /*if (f2py_success) of #varname# frompyobj*/', + 'need': ['#ctype#_from_pyobj'], + '_check': l_and(iscomplex, isintent_nothide), + '_depend': '' + }, { # Hidden + 'decl': {isintent_out: ' PyObject *#varname#_capi = Py_None;'}, + '_check': l_and(iscomplex, isintent_hide) + }, { + 'frompyobj': {hasinitvalue: ' #varname#.r = #init.r#, #varname#.i = #init.i#;'}, + '_check': l_and(iscomplex, isintent_hide), + '_depend': '' + }, { # Common + 'pyobjfrom': {isintent_out: ' #varname#_capi = pyobj_from_#ctype#1(#varname#);'}, + 'need': ['pyobj_from_#ctype#1'], + '_check': iscomplex + }, { + 'frompyobj': {debugcapi: ' fprintf(stderr,"#vardebugshowvalue#\\n",#varname#.r,#varname#.i);'}, + '_check': iscomplex, + '_depend': '' + }, + # String + { # Common + 'decl': [' #ctype# #varname# = NULL;', + ' int slen(#varname#);', + ' PyObject *#varname#_capi = Py_None;'], + 'callfortran':'#varname#,', + 'callfortranappend':'slen(#varname#),', + 'pyobjfrom':[ + {debugcapi: + ' fprintf(stderr,' + '"#vardebugshowvalue#\\n",slen(#varname#),#varname#);'}, + # The trailing null value for Fortran is blank. + {l_and(isintent_out, l_not(isintent_c)): + " STRINGPADN(#varname#, slen(#varname#), ' ', '\\0');"}, + ], + 'return': {isintent_out: ',#varname#'}, + 'need': ['len..', + {l_and(isintent_out, l_not(isintent_c)): 'STRINGPADN'}], + '_check':isstring + }, { # Common + 'frompyobj': [ + """\ + slen(#varname#) = #length#; + f2py_success = #ctype#_from_pyobj(&#varname#,&slen(#varname#),#init#,""" +"""#varname#_capi,\"#ctype#_from_pyobj failed in converting #nth#""" +"""`#varname#\' of #pyname# to C #ctype#\"); + if (f2py_success) {""", + # The trailing null value for Fortran is blank. + {l_not(isintent_c): + " STRINGPADN(#varname#, slen(#varname#), '\\0', ' ');"}, + ], + 'cleanupfrompyobj': """\ + STRINGFREE(#varname#); + } /*if (f2py_success) of #varname#*/""", + 'need': ['#ctype#_from_pyobj', 'len..', 'STRINGFREE', + {l_not(isintent_c): 'STRINGPADN'}], + '_check':isstring, + '_depend':'' + }, { # Not hidden + 'argformat': {isrequired: 'O'}, + 'keyformat': {isoptional: 'O'}, + 'args_capi': {isrequired: ',&#varname#_capi'}, + 'keys_capi': {isoptional: ',&#varname#_capi'}, + 'pyobjfrom': [ + {l_and(isintent_inout, l_not(isintent_c)): + " STRINGPADN(#varname#, slen(#varname#), ' ', '\\0');"}, + {isintent_inout: '''\ + f2py_success = try_pyarr_from_#ctype#(#varname#_capi, #varname#, + slen(#varname#)); + if (f2py_success) {'''}], + 'closepyobjfrom': {isintent_inout: ' } /*if (f2py_success) of #varname# pyobjfrom*/'}, + 'need': {isintent_inout: 'try_pyarr_from_#ctype#', + l_and(isintent_inout, l_not(isintent_c)): 'STRINGPADN'}, + '_check': l_and(isstring, isintent_nothide) + }, { # Hidden + '_check': l_and(isstring, isintent_hide) + }, { + 'frompyobj': {debugcapi: ' fprintf(stderr,"#vardebugshowvalue#\\n",slen(#varname#),#varname#);'}, + '_check': isstring, + '_depend': '' + }, + # Array + { # Common + 'decl': [' #ctype# *#varname# = NULL;', + ' npy_intp #varname#_Dims[#rank#] = {#rank*[-1]#};', + ' const int #varname#_Rank = #rank#;', + ' PyArrayObject *capi_#varname#_tmp = NULL;', + ' int capi_#varname#_intent = 0;', + ], + 'callfortran':'#varname#,', + 'return':{isintent_out: ',capi_#varname#_tmp'}, + 'need': 'len..', + '_check': isarray + }, { # intent(overwrite) array + 'decl': ' int capi_overwrite_#varname# = 1;', + 'kwlistxa': '"overwrite_#varname#",', + 'xaformat': 'i', + 'keys_xa': ',&capi_overwrite_#varname#', + 'docsignxa': 'overwrite_#varname#=1,', + 'docsignxashort': 'overwrite_#varname#,', + 'docstropt': 'overwrite_#varname# : input int, optional\\n Default: 1', + '_check': l_and(isarray, isintent_overwrite), + }, { + 'frompyobj': ' capi_#varname#_intent |= (capi_overwrite_#varname#?0:F2PY_INTENT_COPY);', + '_check': l_and(isarray, isintent_overwrite), + '_depend': '', + }, + { # intent(copy) array + 'decl': ' int capi_overwrite_#varname# = 0;', + 'kwlistxa': '"overwrite_#varname#",', + 'xaformat': 'i', + 'keys_xa': ',&capi_overwrite_#varname#', + 'docsignxa': 'overwrite_#varname#=0,', + 'docsignxashort': 'overwrite_#varname#,', + 'docstropt': 'overwrite_#varname# : input int, optional\\n Default: 0', + '_check': l_and(isarray, isintent_copy), + }, { + 'frompyobj': ' capi_#varname#_intent |= (capi_overwrite_#varname#?0:F2PY_INTENT_COPY);', + '_check': l_and(isarray, isintent_copy), + '_depend': '', + }, { + 'need': [{hasinitvalue: 'forcomb'}, {hasinitvalue: 'CFUNCSMESS'}], + '_check': isarray, + '_depend': '' + }, { # Not hidden + 'decl': ' PyObject *#varname#_capi = Py_None;', + 'argformat': {isrequired: 'O'}, + 'keyformat': {isoptional: 'O'}, + 'args_capi': {isrequired: ',&#varname#_capi'}, + 'keys_capi': {isoptional: ',&#varname#_capi'}, + '_check': l_and(isarray, isintent_nothide) + }, { + 'frompyobj': [' #setdims#;', + ' capi_#varname#_intent |= #intent#;', + {isintent_hide: + ' capi_#varname#_tmp = array_from_pyobj(#atype#,#varname#_Dims,#varname#_Rank,capi_#varname#_intent,Py_None);'}, + {isintent_nothide: + ' capi_#varname#_tmp = array_from_pyobj(#atype#,#varname#_Dims,#varname#_Rank,capi_#varname#_intent,#varname#_capi);'}, + """\ + if (capi_#varname#_tmp == NULL) { + PyObject *exc, *val, *tb; + PyErr_Fetch(&exc, &val, &tb); + PyErr_SetString(exc ? exc : #modulename#_error,\"failed in converting #nth# `#varname#\' of #pyname# to C/Fortran array\" ); + npy_PyErr_ChainExceptionsCause(exc, val, tb); + } else { + #varname# = (#ctype# *)(PyArray_DATA(capi_#varname#_tmp)); +""", + {hasinitvalue: [ + {isintent_nothide: + ' if (#varname#_capi == Py_None) {'}, + {isintent_hide: ' {'}, + {iscomplexarray: ' #ctype# capi_c;'}, + """\ + int *_i,capi_i=0; + CFUNCSMESS(\"#name#: Initializing #varname#=#init#\\n\"); + if (initforcomb(PyArray_DIMS(capi_#varname#_tmp),PyArray_NDIM(capi_#varname#_tmp),1)) { + while ((_i = nextforcomb())) + #varname#[capi_i++] = #init#; /* fortran way */ + } else { + PyObject *exc, *val, *tb; + PyErr_Fetch(&exc, &val, &tb); + PyErr_SetString(exc ? exc : #modulename#_error,\"Initialization of #nth# #varname# failed (initforcomb).\"); + npy_PyErr_ChainExceptionsCause(exc, val, tb); + f2py_success = 0; + } + } + if (f2py_success) {"""]}, + ], + 'cleanupfrompyobj': [ # note that this list will be reversed + ' } /*if (capi_#varname#_tmp == NULL) ... else of #varname#*/', + {l_not(l_or(isintent_out, isintent_hide)): """\ + if((PyObject *)capi_#varname#_tmp!=#varname#_capi) { + Py_XDECREF(capi_#varname#_tmp); }"""}, + {l_and(isintent_hide, l_not(isintent_out)) + : """ Py_XDECREF(capi_#varname#_tmp);"""}, + {hasinitvalue: ' } /*if (f2py_success) of #varname# init*/'}, + ], + '_check': isarray, + '_depend': '' + }, + # Scalararray + { # Common + '_check': l_and(isarray, l_not(iscomplexarray)) + }, { # Not hidden + '_check': l_and(isarray, l_not(iscomplexarray), isintent_nothide) + }, + # Integer*1 array + {'need': '#ctype#', + '_check': isint1array, + '_depend': '' + }, + # Integer*-1 array + {'need': '#ctype#', + '_check': isunsigned_chararray, + '_depend': '' + }, + # Integer*-2 array + {'need': '#ctype#', + '_check': isunsigned_shortarray, + '_depend': '' + }, + # Integer*-8 array + {'need': '#ctype#', + '_check': isunsigned_long_longarray, + '_depend': '' + }, + # Complexarray + {'need': '#ctype#', + '_check': iscomplexarray, + '_depend': '' + }, + # Stringarray + { + 'callfortranappend': {isarrayofstrings: 'flen(#varname#),'}, + 'need': 'string', + '_check': isstringarray + } +] + +################# Rules for checking ############### + +check_rules = [ + { + 'frompyobj': {debugcapi: ' fprintf(stderr,\"debug-capi:Checking `#check#\'\\n\");'}, + 'need': 'len..' + }, { + 'frompyobj': ' CHECKSCALAR(#check#,\"#check#\",\"#nth# #varname#\",\"#varshowvalue#\",#varname#) {', + 'cleanupfrompyobj': ' } /*CHECKSCALAR(#check#)*/', + 'need': 'CHECKSCALAR', + '_check': l_and(isscalar, l_not(iscomplex)), + '_break': '' + }, { + 'frompyobj': ' CHECKSTRING(#check#,\"#check#\",\"#nth# #varname#\",\"#varshowvalue#\",#varname#) {', + 'cleanupfrompyobj': ' } /*CHECKSTRING(#check#)*/', + 'need': 'CHECKSTRING', + '_check': isstring, + '_break': '' + }, { + 'need': 'CHECKARRAY', + 'frompyobj': ' CHECKARRAY(#check#,\"#check#\",\"#nth# #varname#\") {', + 'cleanupfrompyobj': ' } /*CHECKARRAY(#check#)*/', + '_check': isarray, + '_break': '' + }, { + 'need': 'CHECKGENERIC', + 'frompyobj': ' CHECKGENERIC(#check#,\"#check#\",\"#nth# #varname#\") {', + 'cleanupfrompyobj': ' } /*CHECKGENERIC(#check#)*/', + } +] + +########## Applying the rules. No need to modify what follows ############# + +#################### Build C/API module ####################### + + +def buildmodule(m, um): + """ + Return + """ + outmess(' Building module "%s"...\n' % (m['name'])) + ret = {} + mod_rules = defmod_rules[:] + vrd = capi_maps.modsign2map(m) + rd = dictappend({'f2py_version': f2py_version}, vrd) + funcwrappers = [] + funcwrappers2 = [] # F90 codes + for n in m['interfaced']: + nb = None + for bi in m['body']: + if bi['block'] not in ['interface', 'abstract interface']: + errmess('buildmodule: Expected interface block. Skipping.\n') + continue + for b in bi['body']: + if b['name'] == n: + nb = b + break + + if not nb: + errmess( + 'buildmodule: Could not found the body of interfaced routine "%s". Skipping.\n' % (n)) + continue + nb_list = [nb] + if 'entry' in nb: + for k, a in nb['entry'].items(): + nb1 = copy.deepcopy(nb) + del nb1['entry'] + nb1['name'] = k + nb1['args'] = a + nb_list.append(nb1) + for nb in nb_list: + # requiresf90wrapper must be called before buildapi as it + # rewrites assumed shape arrays as automatic arrays. + isf90 = requiresf90wrapper(nb) + api, wrap = buildapi(nb) + if wrap: + if isf90: + funcwrappers2.append(wrap) + else: + funcwrappers.append(wrap) + ar = applyrules(api, vrd) + rd = dictappend(rd, ar) + + # Construct COMMON block support + cr, wrap = common_rules.buildhooks(m) + if wrap: + funcwrappers.append(wrap) + ar = applyrules(cr, vrd) + rd = dictappend(rd, ar) + + # Construct F90 module support + mr, wrap = f90mod_rules.buildhooks(m) + if wrap: + funcwrappers2.append(wrap) + ar = applyrules(mr, vrd) + rd = dictappend(rd, ar) + + for u in um: + ar = use_rules.buildusevars(u, m['use'][u['name']]) + rd = dictappend(rd, ar) + + needs = cfuncs.get_needs() + code = {} + for n in needs.keys(): + code[n] = [] + for k in needs[n]: + c = '' + if k in cfuncs.includes0: + c = cfuncs.includes0[k] + elif k in cfuncs.includes: + c = cfuncs.includes[k] + elif k in cfuncs.userincludes: + c = cfuncs.userincludes[k] + elif k in cfuncs.typedefs: + c = cfuncs.typedefs[k] + elif k in cfuncs.typedefs_generated: + c = cfuncs.typedefs_generated[k] + elif k in cfuncs.cppmacros: + c = cfuncs.cppmacros[k] + elif k in cfuncs.cfuncs: + c = cfuncs.cfuncs[k] + elif k in cfuncs.callbacks: + c = cfuncs.callbacks[k] + elif k in cfuncs.f90modhooks: + c = cfuncs.f90modhooks[k] + elif k in cfuncs.commonhooks: + c = cfuncs.commonhooks[k] + else: + errmess('buildmodule: unknown need %s.\n' % (repr(k))) + continue + code[n].append(c) + mod_rules.append(code) + for r in mod_rules: + if ('_check' in r and r['_check'](m)) or ('_check' not in r): + ar = applyrules(r, vrd, m) + rd = dictappend(rd, ar) + ar = applyrules(module_rules, rd) + + fn = os.path.join(options['buildpath'], vrd['coutput']) + ret['csrc'] = fn + with open(fn, 'w') as f: + f.write(ar['modulebody'].replace('\t', 2 * ' ')) + outmess(' Wrote C/API module "%s" to file "%s"\n' % (m['name'], fn)) + + if options['dorestdoc']: + fn = os.path.join( + options['buildpath'], vrd['modulename'] + 'module.rest') + with open(fn, 'w') as f: + f.write('.. -*- rest -*-\n') + f.write('\n'.join(ar['restdoc'])) + outmess(' ReST Documentation is saved to file "%s/%smodule.rest"\n' % + (options['buildpath'], vrd['modulename'])) + if options['dolatexdoc']: + fn = os.path.join( + options['buildpath'], vrd['modulename'] + 'module.tex') + ret['ltx'] = fn + with open(fn, 'w') as f: + f.write( + '%% This file is auto-generated with f2py (version:%s)\n' % (f2py_version)) + if 'shortlatex' not in options: + f.write( + '\\documentclass{article}\n\\usepackage{a4wide}\n\\begin{document}\n\\tableofcontents\n\n') + f.write('\n'.join(ar['latexdoc'])) + if 'shortlatex' not in options: + f.write('\\end{document}') + outmess(' Documentation is saved to file "%s/%smodule.tex"\n' % + (options['buildpath'], vrd['modulename'])) + if funcwrappers: + wn = os.path.join(options['buildpath'], vrd['f2py_wrapper_output']) + ret['fsrc'] = wn + with open(wn, 'w') as f: + f.write('C -*- fortran -*-\n') + f.write( + 'C This file is autogenerated with f2py (version:%s)\n' % (f2py_version)) + f.write( + 'C It contains Fortran 77 wrappers to fortran functions.\n') + lines = [] + for l in ('\n\n'.join(funcwrappers) + '\n').split('\n'): + if 0 <= l.find('!') < 66: + # don't split comment lines + lines.append(l + '\n') + elif l and l[0] == ' ': + while len(l) >= 66: + lines.append(l[:66] + '\n &') + l = l[66:] + lines.append(l + '\n') + else: + lines.append(l + '\n') + lines = ''.join(lines).replace('\n &\n', '\n') + f.write(lines) + outmess(' Fortran 77 wrappers are saved to "%s"\n' % (wn)) + if funcwrappers2: + wn = os.path.join( + options['buildpath'], '%s-f2pywrappers2.f90' % (vrd['modulename'])) + ret['fsrc'] = wn + with open(wn, 'w') as f: + f.write('! -*- f90 -*-\n') + f.write( + '! This file is autogenerated with f2py (version:%s)\n' % (f2py_version)) + f.write( + '! It contains Fortran 90 wrappers to fortran functions.\n') + lines = [] + for l in ('\n\n'.join(funcwrappers2) + '\n').split('\n'): + if 0 <= l.find('!') < 72: + # don't split comment lines + lines.append(l + '\n') + elif len(l) > 72 and l[0] == ' ': + lines.append(l[:72] + '&\n &') + l = l[72:] + while len(l) > 66: + lines.append(l[:66] + '&\n &') + l = l[66:] + lines.append(l + '\n') + else: + lines.append(l + '\n') + lines = ''.join(lines).replace('\n &\n', '\n') + f.write(lines) + outmess(' Fortran 90 wrappers are saved to "%s"\n' % (wn)) + return ret + +################## Build C/API function ############# + +stnd = {1: 'st', 2: 'nd', 3: 'rd', 4: 'th', 5: 'th', + 6: 'th', 7: 'th', 8: 'th', 9: 'th', 0: 'th'} + + +def buildapi(rout): + rout, wrap = func2subr.assubr(rout) + args, depargs = getargs2(rout) + capi_maps.depargs = depargs + var = rout['vars'] + + if ismoduleroutine(rout): + outmess(' Constructing wrapper function "%s.%s"...\n' % + (rout['modulename'], rout['name'])) + else: + outmess(' Constructing wrapper function "%s"...\n' % (rout['name'])) + # Routine + vrd = capi_maps.routsign2map(rout) + rd = dictappend({}, vrd) + for r in rout_rules: + if ('_check' in r and r['_check'](rout)) or ('_check' not in r): + ar = applyrules(r, vrd, rout) + rd = dictappend(rd, ar) + + # Args + nth, nthk = 0, 0 + savevrd = {} + for a in args: + vrd = capi_maps.sign2map(a, var[a]) + if isintent_aux(var[a]): + _rules = aux_rules + else: + _rules = arg_rules + if not isintent_hide(var[a]): + if not isoptional(var[a]): + nth = nth + 1 + vrd['nth'] = repr(nth) + stnd[nth % 10] + ' argument' + else: + nthk = nthk + 1 + vrd['nth'] = repr(nthk) + stnd[nthk % 10] + ' keyword' + else: + vrd['nth'] = 'hidden' + savevrd[a] = vrd + for r in _rules: + if '_depend' in r: + continue + if ('_check' in r and r['_check'](var[a])) or ('_check' not in r): + ar = applyrules(r, vrd, var[a]) + rd = dictappend(rd, ar) + if '_break' in r: + break + for a in depargs: + if isintent_aux(var[a]): + _rules = aux_rules + else: + _rules = arg_rules + vrd = savevrd[a] + for r in _rules: + if '_depend' not in r: + continue + if ('_check' in r and r['_check'](var[a])) or ('_check' not in r): + ar = applyrules(r, vrd, var[a]) + rd = dictappend(rd, ar) + if '_break' in r: + break + if 'check' in var[a]: + for c in var[a]['check']: + vrd['check'] = c + ar = applyrules(check_rules, vrd, var[a]) + rd = dictappend(rd, ar) + if isinstance(rd['cleanupfrompyobj'], list): + rd['cleanupfrompyobj'].reverse() + if isinstance(rd['closepyobjfrom'], list): + rd['closepyobjfrom'].reverse() + rd['docsignature'] = stripcomma(replace('#docsign##docsignopt##docsignxa#', + {'docsign': rd['docsign'], + 'docsignopt': rd['docsignopt'], + 'docsignxa': rd['docsignxa']})) + optargs = stripcomma(replace('#docsignopt##docsignxa#', + {'docsignxa': rd['docsignxashort'], + 'docsignopt': rd['docsignoptshort']} + )) + if optargs == '': + rd['docsignatureshort'] = stripcomma( + replace('#docsign#', {'docsign': rd['docsign']})) + else: + rd['docsignatureshort'] = replace('#docsign#[#docsignopt#]', + {'docsign': rd['docsign'], + 'docsignopt': optargs, + }) + rd['latexdocsignatureshort'] = rd['docsignatureshort'].replace('_', '\\_') + rd['latexdocsignatureshort'] = rd[ + 'latexdocsignatureshort'].replace(',', ', ') + cfs = stripcomma(replace('#callfortran##callfortranappend#', { + 'callfortran': rd['callfortran'], 'callfortranappend': rd['callfortranappend']})) + if len(rd['callfortranappend']) > 1: + rd['callcompaqfortran'] = stripcomma(replace('#callfortran# 0,#callfortranappend#', { + 'callfortran': rd['callfortran'], 'callfortranappend': rd['callfortranappend']})) + else: + rd['callcompaqfortran'] = cfs + rd['callfortran'] = cfs + if isinstance(rd['docreturn'], list): + rd['docreturn'] = stripcomma( + replace('#docreturn#', {'docreturn': rd['docreturn']})) + ' = ' + rd['docstrsigns'] = [] + rd['latexdocstrsigns'] = [] + for k in ['docstrreq', 'docstropt', 'docstrout', 'docstrcbs']: + if k in rd and isinstance(rd[k], list): + rd['docstrsigns'] = rd['docstrsigns'] + rd[k] + k = 'latex' + k + if k in rd and isinstance(rd[k], list): + rd['latexdocstrsigns'] = rd['latexdocstrsigns'] + rd[k][0:1] +\ + ['\\begin{description}'] + rd[k][1:] +\ + ['\\end{description}'] + + ar = applyrules(routine_rules, rd) + if ismoduleroutine(rout): + outmess(' %s\n' % (ar['docshort'])) + else: + outmess(' %s\n' % (ar['docshort'])) + return ar, wrap + + +#################### EOF rules.py ####################### diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/setup.py b/.local/lib/python3.8/site-packages/numpy/f2py/setup.py new file mode 100644 index 0000000..499609f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/setup.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +""" +setup.py for installing F2PY + +Usage: + pip install . + +Copyright 2001-2005 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy License. + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +$Revision: 1.32 $ +$Date: 2005/01/30 17:22:14 $ +Pearu Peterson + +""" +from numpy.distutils.core import setup +from numpy.distutils.misc_util import Configuration + + +from __version__ import version + + +def configuration(parent_package='', top_path=None): + config = Configuration('f2py', parent_package, top_path) + config.add_subpackage('tests') + config.add_data_dir('tests/src') + config.add_data_files( + 'src/fortranobject.c', + 'src/fortranobject.h') + config.add_data_files('*.pyi') + return config + + +if __name__ == "__main__": + + config = configuration(top_path='') + config = config.todict() + + config['classifiers'] = [ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: NumPy License', + 'Natural Language :: English', + 'Operating System :: OS Independent', + 'Programming Language :: C', + 'Programming Language :: Fortran', + 'Programming Language :: Python', + 'Topic :: Scientific/Engineering', + 'Topic :: Software Development :: Code Generators', + ] + setup(version=version, + description="F2PY - Fortran to Python Interface Generator", + author="Pearu Peterson", + author_email="pearu@cens.ioc.ee", + maintainer="Pearu Peterson", + maintainer_email="pearu@cens.ioc.ee", + license="BSD", + platforms="Unix, Windows (mingw|cygwin), Mac OSX", + long_description="""\ +The Fortran to Python Interface Generator, or F2PY for short, is a +command line tool (f2py) for generating Python C/API modules for +wrapping Fortran 77/90/95 subroutines, accessing common blocks from +Python, and calling Python functions from Fortran (call-backs). +Interfacing subroutines/data from Fortran 90/95 modules is supported.""", + url="https://numpy.org/doc/stable/f2py/", + keywords=['Fortran', 'f2py'], + **config) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/src/fortranobject.c b/.local/lib/python3.8/site-packages/numpy/f2py/src/fortranobject.c new file mode 100644 index 0000000..c963781 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/src/fortranobject.c @@ -0,0 +1,1197 @@ +#define FORTRANOBJECT_C +#include "fortranobject.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/* + This file implements: FortranObject, array_from_pyobj, copy_ND_array + + Author: Pearu Peterson + $Revision: 1.52 $ + $Date: 2005/07/11 07:44:20 $ +*/ + +int +F2PyDict_SetItemString(PyObject *dict, char *name, PyObject *obj) +{ + if (obj == NULL) { + fprintf(stderr, "Error loading %s\n", name); + if (PyErr_Occurred()) { + PyErr_Print(); + PyErr_Clear(); + } + return -1; + } + return PyDict_SetItemString(dict, name, obj); +} + +/* + * Python-only fallback for thread-local callback pointers + */ +void * +F2PySwapThreadLocalCallbackPtr(char *key, void *ptr) +{ + PyObject *local_dict, *value; + void *prev; + + local_dict = PyThreadState_GetDict(); + if (local_dict == NULL) { + Py_FatalError( + "F2PySwapThreadLocalCallbackPtr: PyThreadState_GetDict " + "failed"); + } + + value = PyDict_GetItemString(local_dict, key); + if (value != NULL) { + prev = PyLong_AsVoidPtr(value); + if (PyErr_Occurred()) { + Py_FatalError( + "F2PySwapThreadLocalCallbackPtr: PyLong_AsVoidPtr failed"); + } + } + else { + prev = NULL; + } + + value = PyLong_FromVoidPtr((void *)ptr); + if (value == NULL) { + Py_FatalError( + "F2PySwapThreadLocalCallbackPtr: PyLong_FromVoidPtr failed"); + } + + if (PyDict_SetItemString(local_dict, key, value) != 0) { + Py_FatalError( + "F2PySwapThreadLocalCallbackPtr: PyDict_SetItemString failed"); + } + + Py_DECREF(value); + + return prev; +} + +void * +F2PyGetThreadLocalCallbackPtr(char *key) +{ + PyObject *local_dict, *value; + void *prev; + + local_dict = PyThreadState_GetDict(); + if (local_dict == NULL) { + Py_FatalError( + "F2PyGetThreadLocalCallbackPtr: PyThreadState_GetDict failed"); + } + + value = PyDict_GetItemString(local_dict, key); + if (value != NULL) { + prev = PyLong_AsVoidPtr(value); + if (PyErr_Occurred()) { + Py_FatalError( + "F2PyGetThreadLocalCallbackPtr: PyLong_AsVoidPtr failed"); + } + } + else { + prev = NULL; + } + + return prev; +} + +/************************* FortranObject *******************************/ + +typedef PyObject *(*fortranfunc)(PyObject *, PyObject *, PyObject *, void *); + +PyObject * +PyFortranObject_New(FortranDataDef *defs, f2py_void_func init) +{ + int i; + PyFortranObject *fp = NULL; + PyObject *v = NULL; + if (init != NULL) { /* Initialize F90 module objects */ + (*(init))(); + } + fp = PyObject_New(PyFortranObject, &PyFortran_Type); + if (fp == NULL) { + return NULL; + } + if ((fp->dict = PyDict_New()) == NULL) { + Py_DECREF(fp); + return NULL; + } + fp->len = 0; + while (defs[fp->len].name != NULL) { + fp->len++; + } + if (fp->len == 0) { + goto fail; + } + fp->defs = defs; + for (i = 0; i < fp->len; i++) { + if (fp->defs[i].rank == -1) { /* Is Fortran routine */ + v = PyFortranObject_NewAsAttr(&(fp->defs[i])); + if (v == NULL) { + goto fail; + } + PyDict_SetItemString(fp->dict, fp->defs[i].name, v); + Py_XDECREF(v); + } + else if ((fp->defs[i].data) != + NULL) { /* Is Fortran variable or array (not allocatable) */ + if (fp->defs[i].type == NPY_STRING) { + npy_intp n = fp->defs[i].rank - 1; + v = PyArray_New(&PyArray_Type, n, fp->defs[i].dims.d, + NPY_STRING, NULL, fp->defs[i].data, + fp->defs[i].dims.d[n], NPY_ARRAY_FARRAY, NULL); + } + else { + v = PyArray_New(&PyArray_Type, fp->defs[i].rank, + fp->defs[i].dims.d, fp->defs[i].type, NULL, + fp->defs[i].data, 0, NPY_ARRAY_FARRAY, NULL); + } + if (v == NULL) { + goto fail; + } + PyDict_SetItemString(fp->dict, fp->defs[i].name, v); + Py_XDECREF(v); + } + } + return (PyObject *)fp; +fail: + Py_XDECREF(fp); + return NULL; +} + +PyObject * +PyFortranObject_NewAsAttr(FortranDataDef *defs) +{ /* used for calling F90 module routines */ + PyFortranObject *fp = NULL; + fp = PyObject_New(PyFortranObject, &PyFortran_Type); + if (fp == NULL) + return NULL; + if ((fp->dict = PyDict_New()) == NULL) { + PyObject_Del(fp); + return NULL; + } + fp->len = 1; + fp->defs = defs; + return (PyObject *)fp; +} + +/* Fortran methods */ + +static void +fortran_dealloc(PyFortranObject *fp) +{ + Py_XDECREF(fp->dict); + PyObject_Del(fp); +} + +/* Returns number of bytes consumed from buf, or -1 on error. */ +static Py_ssize_t +format_def(char *buf, Py_ssize_t size, FortranDataDef def) +{ + char *p = buf; + int i; + npy_intp n; + + n = PyOS_snprintf(p, size, "array(%" NPY_INTP_FMT, def.dims.d[0]); + if (n < 0 || n >= size) { + return -1; + } + p += n; + size -= n; + + for (i = 1; i < def.rank; i++) { + n = PyOS_snprintf(p, size, ",%" NPY_INTP_FMT, def.dims.d[i]); + if (n < 0 || n >= size) { + return -1; + } + p += n; + size -= n; + } + + if (size <= 0) { + return -1; + } + + *p++ = ')'; + size--; + + if (def.data == NULL) { + static const char notalloc[] = ", not allocated"; + if ((size_t)size < sizeof(notalloc)) { + return -1; + } + memcpy(p, notalloc, sizeof(notalloc)); + p += sizeof(notalloc); + size -= sizeof(notalloc); + } + + return p - buf; +} + +static PyObject * +fortran_doc(FortranDataDef def) +{ + char *buf, *p; + PyObject *s = NULL; + Py_ssize_t n, origsize, size = 100; + + if (def.doc != NULL) { + size += strlen(def.doc); + } + origsize = size; + buf = p = (char *)PyMem_Malloc(size); + if (buf == NULL) { + return PyErr_NoMemory(); + } + + if (def.rank == -1) { + if (def.doc) { + n = strlen(def.doc); + if (n > size) { + goto fail; + } + memcpy(p, def.doc, n); + p += n; + size -= n; + } + else { + n = PyOS_snprintf(p, size, "%s - no docs available", def.name); + if (n < 0 || n >= size) { + goto fail; + } + p += n; + size -= n; + } + } + else { + PyArray_Descr *d = PyArray_DescrFromType(def.type); + n = PyOS_snprintf(p, size, "%s : '%c'-", def.name, d->type); + Py_DECREF(d); + if (n < 0 || n >= size) { + goto fail; + } + p += n; + size -= n; + + if (def.data == NULL) { + n = format_def(p, size, def); + if (n < 0) { + goto fail; + } + p += n; + size -= n; + } + else if (def.rank > 0) { + n = format_def(p, size, def); + if (n < 0) { + goto fail; + } + p += n; + size -= n; + } + else { + n = strlen("scalar"); + if (size < n) { + goto fail; + } + memcpy(p, "scalar", n); + p += n; + size -= n; + } + } + if (size <= 1) { + goto fail; + } + *p++ = '\n'; + size--; + + /* p now points one beyond the last character of the string in buf */ + s = PyUnicode_FromStringAndSize(buf, p - buf); + + PyMem_Free(buf); + return s; + +fail: + fprintf(stderr, + "fortranobject.c: fortran_doc: len(p)=%zd>%zd=size:" + " too long docstring required, increase size\n", + p - buf, origsize); + PyMem_Free(buf); + return NULL; +} + +static FortranDataDef *save_def; /* save pointer of an allocatable array */ +static void +set_data(char *d, npy_intp *f) +{ /* callback from Fortran */ + if (*f) /* In fortran f=allocated(d) */ + save_def->data = d; + else + save_def->data = NULL; + /* printf("set_data: d=%p,f=%d\n",d,*f); */ +} + +static PyObject * +fortran_getattr(PyFortranObject *fp, char *name) +{ + int i, j, k, flag; + if (fp->dict != NULL) { + PyObject *v = _PyDict_GetItemStringWithError(fp->dict, name); + if (v == NULL && PyErr_Occurred()) { + return NULL; + } + else if (v != NULL) { + Py_INCREF(v); + return v; + } + } + for (i = 0, j = 1; i < fp->len && (j = strcmp(name, fp->defs[i].name)); + i++) + ; + if (j == 0) + if (fp->defs[i].rank != -1) { /* F90 allocatable array */ + if (fp->defs[i].func == NULL) + return NULL; + for (k = 0; k < fp->defs[i].rank; ++k) fp->defs[i].dims.d[k] = -1; + save_def = &fp->defs[i]; + (*(fp->defs[i].func))(&fp->defs[i].rank, fp->defs[i].dims.d, + set_data, &flag); + if (flag == 2) + k = fp->defs[i].rank + 1; + else + k = fp->defs[i].rank; + if (fp->defs[i].data != NULL) { /* array is allocated */ + PyObject *v = PyArray_New( + &PyArray_Type, k, fp->defs[i].dims.d, fp->defs[i].type, + NULL, fp->defs[i].data, 0, NPY_ARRAY_FARRAY, NULL); + if (v == NULL) + return NULL; + /* Py_INCREF(v); */ + return v; + } + else { /* array is not allocated */ + Py_RETURN_NONE; + } + } + if (strcmp(name, "__dict__") == 0) { + Py_INCREF(fp->dict); + return fp->dict; + } + if (strcmp(name, "__doc__") == 0) { + PyObject *s = PyUnicode_FromString(""), *s2, *s3; + for (i = 0; i < fp->len; i++) { + s2 = fortran_doc(fp->defs[i]); + s3 = PyUnicode_Concat(s, s2); + Py_DECREF(s2); + Py_DECREF(s); + s = s3; + } + if (PyDict_SetItemString(fp->dict, name, s)) + return NULL; + return s; + } + if ((strcmp(name, "_cpointer") == 0) && (fp->len == 1)) { + PyObject *cobj = + F2PyCapsule_FromVoidPtr((void *)(fp->defs[0].data), NULL); + if (PyDict_SetItemString(fp->dict, name, cobj)) + return NULL; + return cobj; + } + PyObject *str, *ret; + str = PyUnicode_FromString(name); + ret = PyObject_GenericGetAttr((PyObject *)fp, str); + Py_DECREF(str); + return ret; +} + +static int +fortran_setattr(PyFortranObject *fp, char *name, PyObject *v) +{ + int i, j, flag; + PyArrayObject *arr = NULL; + for (i = 0, j = 1; i < fp->len && (j = strcmp(name, fp->defs[i].name)); + i++) + ; + if (j == 0) { + if (fp->defs[i].rank == -1) { + PyErr_SetString(PyExc_AttributeError, + "over-writing fortran routine"); + return -1; + } + if (fp->defs[i].func != NULL) { /* is allocatable array */ + npy_intp dims[F2PY_MAX_DIMS]; + int k; + save_def = &fp->defs[i]; + if (v != Py_None) { /* set new value (reallocate if needed -- + see f2py generated code for more + details ) */ + for (k = 0; k < fp->defs[i].rank; k++) dims[k] = -1; + if ((arr = array_from_pyobj(fp->defs[i].type, dims, + fp->defs[i].rank, F2PY_INTENT_IN, + v)) == NULL) + return -1; + (*(fp->defs[i].func))(&fp->defs[i].rank, PyArray_DIMS(arr), + set_data, &flag); + } + else { /* deallocate */ + for (k = 0; k < fp->defs[i].rank; k++) dims[k] = 0; + (*(fp->defs[i].func))(&fp->defs[i].rank, dims, set_data, + &flag); + for (k = 0; k < fp->defs[i].rank; k++) dims[k] = -1; + } + memcpy(fp->defs[i].dims.d, dims, + fp->defs[i].rank * sizeof(npy_intp)); + } + else { /* not allocatable array */ + if ((arr = array_from_pyobj(fp->defs[i].type, fp->defs[i].dims.d, + fp->defs[i].rank, F2PY_INTENT_IN, + v)) == NULL) + return -1; + } + if (fp->defs[i].data != + NULL) { /* copy Python object to Fortran array */ + npy_intp s = PyArray_MultiplyList(fp->defs[i].dims.d, + PyArray_NDIM(arr)); + if (s == -1) + s = PyArray_MultiplyList(PyArray_DIMS(arr), PyArray_NDIM(arr)); + if (s < 0 || (memcpy(fp->defs[i].data, PyArray_DATA(arr), + s * PyArray_ITEMSIZE(arr))) == NULL) { + if ((PyObject *)arr != v) { + Py_DECREF(arr); + } + return -1; + } + if ((PyObject *)arr != v) { + Py_DECREF(arr); + } + } + else + return (fp->defs[i].func == NULL ? -1 : 0); + return 0; /* successful */ + } + if (fp->dict == NULL) { + fp->dict = PyDict_New(); + if (fp->dict == NULL) + return -1; + } + if (v == NULL) { + int rv = PyDict_DelItemString(fp->dict, name); + if (rv < 0) + PyErr_SetString(PyExc_AttributeError, + "delete non-existing fortran attribute"); + return rv; + } + else + return PyDict_SetItemString(fp->dict, name, v); +} + +static PyObject * +fortran_call(PyFortranObject *fp, PyObject *arg, PyObject *kw) +{ + int i = 0; + /* printf("fortran call + name=%s,func=%p,data=%p,%p\n",fp->defs[i].name, + fp->defs[i].func,fp->defs[i].data,&fp->defs[i].data); */ + if (fp->defs[i].rank == -1) { /* is Fortran routine */ + if (fp->defs[i].func == NULL) { + PyErr_Format(PyExc_RuntimeError, "no function to call"); + return NULL; + } + else if (fp->defs[i].data == NULL) + /* dummy routine */ + return (*((fortranfunc)(fp->defs[i].func)))((PyObject *)fp, arg, + kw, NULL); + else + return (*((fortranfunc)(fp->defs[i].func)))( + (PyObject *)fp, arg, kw, (void *)fp->defs[i].data); + } + PyErr_Format(PyExc_TypeError, "this fortran object is not callable"); + return NULL; +} + +static PyObject * +fortran_repr(PyFortranObject *fp) +{ + PyObject *name = NULL, *repr = NULL; + name = PyObject_GetAttrString((PyObject *)fp, "__name__"); + PyErr_Clear(); + if (name != NULL && PyUnicode_Check(name)) { + repr = PyUnicode_FromFormat("", name); + } + else { + repr = PyUnicode_FromString(""); + } + Py_XDECREF(name); + return repr; +} + +PyTypeObject PyFortran_Type = { + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "fortran", + .tp_basicsize = sizeof(PyFortranObject), + .tp_dealloc = (destructor)fortran_dealloc, + .tp_getattr = (getattrfunc)fortran_getattr, + .tp_setattr = (setattrfunc)fortran_setattr, + .tp_repr = (reprfunc)fortran_repr, + .tp_call = (ternaryfunc)fortran_call, +}; + +/************************* f2py_report_atexit *******************************/ + +#ifdef F2PY_REPORT_ATEXIT +static int passed_time = 0; +static int passed_counter = 0; +static int passed_call_time = 0; +static struct timeb start_time; +static struct timeb stop_time; +static struct timeb start_call_time; +static struct timeb stop_call_time; +static int cb_passed_time = 0; +static int cb_passed_counter = 0; +static int cb_passed_call_time = 0; +static struct timeb cb_start_time; +static struct timeb cb_stop_time; +static struct timeb cb_start_call_time; +static struct timeb cb_stop_call_time; + +extern void +f2py_start_clock(void) +{ + ftime(&start_time); +} +extern void +f2py_start_call_clock(void) +{ + f2py_stop_clock(); + ftime(&start_call_time); +} +extern void +f2py_stop_clock(void) +{ + ftime(&stop_time); + passed_time += 1000 * (stop_time.time - start_time.time); + passed_time += stop_time.millitm - start_time.millitm; +} +extern void +f2py_stop_call_clock(void) +{ + ftime(&stop_call_time); + passed_call_time += 1000 * (stop_call_time.time - start_call_time.time); + passed_call_time += stop_call_time.millitm - start_call_time.millitm; + passed_counter += 1; + f2py_start_clock(); +} + +extern void +f2py_cb_start_clock(void) +{ + ftime(&cb_start_time); +} +extern void +f2py_cb_start_call_clock(void) +{ + f2py_cb_stop_clock(); + ftime(&cb_start_call_time); +} +extern void +f2py_cb_stop_clock(void) +{ + ftime(&cb_stop_time); + cb_passed_time += 1000 * (cb_stop_time.time - cb_start_time.time); + cb_passed_time += cb_stop_time.millitm - cb_start_time.millitm; +} +extern void +f2py_cb_stop_call_clock(void) +{ + ftime(&cb_stop_call_time); + cb_passed_call_time += + 1000 * (cb_stop_call_time.time - cb_start_call_time.time); + cb_passed_call_time += + cb_stop_call_time.millitm - cb_start_call_time.millitm; + cb_passed_counter += 1; + f2py_cb_start_clock(); +} + +static int f2py_report_on_exit_been_here = 0; +extern void +f2py_report_on_exit(int exit_flag, void *name) +{ + if (f2py_report_on_exit_been_here) { + fprintf(stderr, " %s\n", (char *)name); + return; + } + f2py_report_on_exit_been_here = 1; + fprintf(stderr, " /-----------------------\\\n"); + fprintf(stderr, " < F2PY performance report >\n"); + fprintf(stderr, " \\-----------------------/\n"); + fprintf(stderr, "Overall time spent in ...\n"); + fprintf(stderr, "(a) wrapped (Fortran/C) functions : %8d msec\n", + passed_call_time); + fprintf(stderr, "(b) f2py interface, %6d calls : %8d msec\n", + passed_counter, passed_time); + fprintf(stderr, "(c) call-back (Python) functions : %8d msec\n", + cb_passed_call_time); + fprintf(stderr, "(d) f2py call-back interface, %6d calls : %8d msec\n", + cb_passed_counter, cb_passed_time); + + fprintf(stderr, + "(e) wrapped (Fortran/C) functions (actual) : %8d msec\n\n", + passed_call_time - cb_passed_call_time - cb_passed_time); + fprintf(stderr, + "Use -DF2PY_REPORT_ATEXIT_DISABLE to disable this message.\n"); + fprintf(stderr, "Exit status: %d\n", exit_flag); + fprintf(stderr, "Modules : %s\n", (char *)name); +} +#endif + +/********************** report on array copy ****************************/ + +#ifdef F2PY_REPORT_ON_ARRAY_COPY +static void +f2py_report_on_array_copy(PyArrayObject *arr) +{ + const npy_intp arr_size = PyArray_Size((PyObject *)arr); + if (arr_size > F2PY_REPORT_ON_ARRAY_COPY) { + fprintf(stderr, + "copied an array: size=%ld, elsize=%" NPY_INTP_FMT "\n", + arr_size, (npy_intp)PyArray_ITEMSIZE(arr)); + } +} +static void +f2py_report_on_array_copy_fromany(void) +{ + fprintf(stderr, "created an array from object\n"); +} + +#define F2PY_REPORT_ON_ARRAY_COPY_FROMARR \ + f2py_report_on_array_copy((PyArrayObject *)arr) +#define F2PY_REPORT_ON_ARRAY_COPY_FROMANY f2py_report_on_array_copy_fromany() +#else +#define F2PY_REPORT_ON_ARRAY_COPY_FROMARR +#define F2PY_REPORT_ON_ARRAY_COPY_FROMANY +#endif + +/************************* array_from_obj *******************************/ + +/* + * File: array_from_pyobj.c + * + * Description: + * ------------ + * Provides array_from_pyobj function that returns a contiguous array + * object with the given dimensions and required storage order, either + * in row-major (C) or column-major (Fortran) order. The function + * array_from_pyobj is very flexible about its Python object argument + * that can be any number, list, tuple, or array. + * + * array_from_pyobj is used in f2py generated Python extension + * modules. + * + * Author: Pearu Peterson + * Created: 13-16 January 2002 + * $Id: fortranobject.c,v 1.52 2005/07/11 07:44:20 pearu Exp $ + */ + +static int +check_and_fix_dimensions(const PyArrayObject *arr, const int rank, + npy_intp *dims); + +static int +find_first_negative_dimension(const int rank, const npy_intp *dims) +{ + for (int i = 0; i < rank; ++i) { + if (dims[i] < 0) { + return i; + } + } + return -1; +} + +#ifdef DEBUG_COPY_ND_ARRAY +void +dump_dims(int rank, npy_intp const *dims) +{ + int i; + printf("["); + for (i = 0; i < rank; ++i) { + printf("%3" NPY_INTP_FMT, dims[i]); + } + printf("]\n"); +} +void +dump_attrs(const PyArrayObject *obj) +{ + const PyArrayObject_fields *arr = (const PyArrayObject_fields *)obj; + int rank = PyArray_NDIM(arr); + npy_intp size = PyArray_Size((PyObject *)arr); + printf("\trank = %d, flags = %d, size = %" NPY_INTP_FMT "\n", rank, + arr->flags, size); + printf("\tstrides = "); + dump_dims(rank, arr->strides); + printf("\tdimensions = "); + dump_dims(rank, arr->dimensions); +} +#endif + +#define SWAPTYPE(a, b, t) \ + { \ + t c; \ + c = (a); \ + (a) = (b); \ + (b) = c; \ + } + +static int +swap_arrays(PyArrayObject *obj1, PyArrayObject *obj2) +{ + PyArrayObject_fields *arr1 = (PyArrayObject_fields *)obj1, + *arr2 = (PyArrayObject_fields *)obj2; + SWAPTYPE(arr1->data, arr2->data, char *); + SWAPTYPE(arr1->nd, arr2->nd, int); + SWAPTYPE(arr1->dimensions, arr2->dimensions, npy_intp *); + SWAPTYPE(arr1->strides, arr2->strides, npy_intp *); + SWAPTYPE(arr1->base, arr2->base, PyObject *); + SWAPTYPE(arr1->descr, arr2->descr, PyArray_Descr *); + SWAPTYPE(arr1->flags, arr2->flags, int); + /* SWAPTYPE(arr1->weakreflist,arr2->weakreflist,PyObject*); */ + return 0; +} + +#define ARRAY_ISCOMPATIBLE(arr, type_num) \ + ((PyArray_ISINTEGER(arr) && PyTypeNum_ISINTEGER(type_num)) || \ + (PyArray_ISFLOAT(arr) && PyTypeNum_ISFLOAT(type_num)) || \ + (PyArray_ISCOMPLEX(arr) && PyTypeNum_ISCOMPLEX(type_num)) || \ + (PyArray_ISBOOL(arr) && PyTypeNum_ISBOOL(type_num))) + +extern PyArrayObject * +array_from_pyobj(const int type_num, npy_intp *dims, const int rank, + const int intent, PyObject *obj) +{ + /* + * Note about reference counting + * ----------------------------- + * If the caller returns the array to Python, it must be done with + * Py_BuildValue("N",arr). + * Otherwise, if obj!=arr then the caller must call Py_DECREF(arr). + * + * Note on intent(cache,out,..) + * --------------------- + * Don't expect correct data when returning intent(cache) array. + * + */ + char mess[200]; + PyArrayObject *arr = NULL; + PyArray_Descr *descr; + char typechar; + int elsize; + + if ((intent & F2PY_INTENT_HIDE) || + ((intent & F2PY_INTENT_CACHE) && (obj == Py_None)) || + ((intent & F2PY_OPTIONAL) && (obj == Py_None))) { + /* intent(cache), optional, intent(hide) */ + int i = find_first_negative_dimension(rank, dims); + if (i >= 0) { + PyErr_Format(PyExc_ValueError, + "failed to create intent(cache|hide)|optional array" + " -- must have defined dimensions, but dims[%d] = %" + NPY_INTP_FMT, i, dims[i]); + return NULL; + } + arr = (PyArrayObject *)PyArray_New(&PyArray_Type, rank, dims, type_num, + NULL, NULL, 1, + !(intent & F2PY_INTENT_C), NULL); + if (arr == NULL) + return NULL; + if (!(intent & F2PY_INTENT_CACHE)) + PyArray_FILLWBYTE(arr, 0); + return arr; + } + + descr = PyArray_DescrFromType(type_num); + /* compatibility with NPY_CHAR */ + if (type_num == NPY_STRING) { + PyArray_DESCR_REPLACE(descr); + if (descr == NULL) { + return NULL; + } + descr->elsize = 1; + descr->type = NPY_CHARLTR; + } + elsize = descr->elsize; + typechar = descr->type; + Py_DECREF(descr); + if (PyArray_Check(obj)) { + arr = (PyArrayObject *)obj; + + if (intent & F2PY_INTENT_CACHE) { + /* intent(cache) */ + if (PyArray_ISONESEGMENT(arr) && PyArray_ITEMSIZE(arr) >= elsize) { + if (check_and_fix_dimensions(arr, rank, dims)) { + return NULL; + } + if (intent & F2PY_INTENT_OUT) + Py_INCREF(arr); + return arr; + } + strcpy(mess, "failed to initialize intent(cache) array"); + if (!PyArray_ISONESEGMENT(arr)) + strcat(mess, " -- input must be in one segment"); + if (PyArray_ITEMSIZE(arr) < elsize) + sprintf(mess + strlen(mess), + " -- expected at least elsize=%d but got " + "%" NPY_INTP_FMT, + elsize, (npy_intp)PyArray_ITEMSIZE(arr)); + PyErr_SetString(PyExc_ValueError, mess); + return NULL; + } + + /* here we have always intent(in) or intent(inout) or intent(inplace) + */ + + if (check_and_fix_dimensions(arr, rank, dims)) { + return NULL; + } + /* + printf("intent alignment=%d\n", F2PY_GET_ALIGNMENT(intent)); + printf("alignment check=%d\n", F2PY_CHECK_ALIGNMENT(arr, intent)); + int i; + for (i=1;i<=16;i++) + printf("i=%d isaligned=%d\n", i, ARRAY_ISALIGNED(arr, i)); + */ + if ((!(intent & F2PY_INTENT_COPY)) && + PyArray_ITEMSIZE(arr) == elsize && + ARRAY_ISCOMPATIBLE(arr, type_num) && + F2PY_CHECK_ALIGNMENT(arr, intent)) { + if ((intent & F2PY_INTENT_C) ? PyArray_ISCARRAY_RO(arr) + : PyArray_ISFARRAY_RO(arr)) { + if ((intent & F2PY_INTENT_OUT)) { + Py_INCREF(arr); + } + /* Returning input array */ + return arr; + } + } + if (intent & F2PY_INTENT_INOUT) { + strcpy(mess, "failed to initialize intent(inout) array"); + /* Must use PyArray_IS*ARRAY because intent(inout) requires + * writable input */ + if ((intent & F2PY_INTENT_C) && !PyArray_ISCARRAY(arr)) + strcat(mess, " -- input not contiguous"); + if (!(intent & F2PY_INTENT_C) && !PyArray_ISFARRAY(arr)) + strcat(mess, " -- input not fortran contiguous"); + if (PyArray_ITEMSIZE(arr) != elsize) + sprintf(mess + strlen(mess), + " -- expected elsize=%d but got %" NPY_INTP_FMT, + elsize, (npy_intp)PyArray_ITEMSIZE(arr)); + if (!(ARRAY_ISCOMPATIBLE(arr, type_num))) + sprintf(mess + strlen(mess), + " -- input '%c' not compatible to '%c'", + PyArray_DESCR(arr)->type, typechar); + if (!(F2PY_CHECK_ALIGNMENT(arr, intent))) + sprintf(mess + strlen(mess), " -- input not %d-aligned", + F2PY_GET_ALIGNMENT(intent)); + PyErr_SetString(PyExc_ValueError, mess); + return NULL; + } + + /* here we have always intent(in) or intent(inplace) */ + + { + PyArrayObject *retarr; + retarr = (PyArrayObject *)PyArray_New( + &PyArray_Type, PyArray_NDIM(arr), PyArray_DIMS(arr), + type_num, NULL, NULL, 1, !(intent & F2PY_INTENT_C), NULL); + if (retarr == NULL) + return NULL; + F2PY_REPORT_ON_ARRAY_COPY_FROMARR; + if (PyArray_CopyInto(retarr, arr)) { + Py_DECREF(retarr); + return NULL; + } + if (intent & F2PY_INTENT_INPLACE) { + if (swap_arrays(arr, retarr)) + return NULL; /* XXX: set exception */ + Py_XDECREF(retarr); + if (intent & F2PY_INTENT_OUT) + Py_INCREF(arr); + } + else { + arr = retarr; + } + } + return arr; + } + + if ((intent & F2PY_INTENT_INOUT) || (intent & F2PY_INTENT_INPLACE) || + (intent & F2PY_INTENT_CACHE)) { + PyErr_Format(PyExc_TypeError, + "failed to initialize intent(inout|inplace|cache) " + "array, input '%s' object is not an array", + Py_TYPE(obj)->tp_name); + return NULL; + } + + { + PyArray_Descr *descr = PyArray_DescrFromType(type_num); + /* compatibility with NPY_CHAR */ + if (type_num == NPY_STRING) { + PyArray_DESCR_REPLACE(descr); + if (descr == NULL) { + return NULL; + } + descr->elsize = 1; + descr->type = NPY_CHARLTR; + } + F2PY_REPORT_ON_ARRAY_COPY_FROMANY; + arr = (PyArrayObject *)PyArray_FromAny( + obj, descr, 0, 0, + ((intent & F2PY_INTENT_C) ? NPY_ARRAY_CARRAY + : NPY_ARRAY_FARRAY) | + NPY_ARRAY_FORCECAST, + NULL); + if (arr == NULL) + return NULL; + if (check_and_fix_dimensions(arr, rank, dims)) { + return NULL; + } + return arr; + } +} + +/*****************************************/ +/* Helper functions for array_from_pyobj */ +/*****************************************/ + +static int +check_and_fix_dimensions(const PyArrayObject *arr, const int rank, + npy_intp *dims) +{ + /* + * This function fills in blanks (that are -1's) in dims list using + * the dimensions from arr. It also checks that non-blank dims will + * match with the corresponding values in arr dimensions. + * + * Returns 0 if the function is successful. + * + * If an error condition is detected, an exception is set and 1 is + * returned. + */ + const npy_intp arr_size = + (PyArray_NDIM(arr)) ? PyArray_Size((PyObject *)arr) : 1; +#ifdef DEBUG_COPY_ND_ARRAY + dump_attrs(arr); + printf("check_and_fix_dimensions:init: dims="); + dump_dims(rank, dims); +#endif + if (rank > PyArray_NDIM(arr)) { /* [1,2] -> [[1],[2]]; 1 -> [[1]] */ + npy_intp new_size = 1; + int free_axe = -1; + int i; + npy_intp d; + /* Fill dims where -1 or 0; check dimensions; calc new_size; */ + for (i = 0; i < PyArray_NDIM(arr); ++i) { + d = PyArray_DIM(arr, i); + if (dims[i] >= 0) { + if (d > 1 && dims[i] != d) { + PyErr_Format( + PyExc_ValueError, + "%d-th dimension must be fixed to %" NPY_INTP_FMT + " but got %" NPY_INTP_FMT "\n", + i, dims[i], d); + return 1; + } + if (!dims[i]) + dims[i] = 1; + } + else { + dims[i] = d ? d : 1; + } + new_size *= dims[i]; + } + for (i = PyArray_NDIM(arr); i < rank; ++i) + if (dims[i] > 1) { + PyErr_Format(PyExc_ValueError, + "%d-th dimension must be %" NPY_INTP_FMT + " but got 0 (not defined).\n", + i, dims[i]); + return 1; + } + else if (free_axe < 0) + free_axe = i; + else + dims[i] = 1; + if (free_axe >= 0) { + dims[free_axe] = arr_size / new_size; + new_size *= dims[free_axe]; + } + if (new_size != arr_size) { + PyErr_Format(PyExc_ValueError, + "unexpected array size: new_size=%" NPY_INTP_FMT + ", got array with arr_size=%" NPY_INTP_FMT + " (maybe too many free indices)\n", + new_size, arr_size); + return 1; + } + } + else if (rank == PyArray_NDIM(arr)) { + npy_intp new_size = 1; + int i; + npy_intp d; + for (i = 0; i < rank; ++i) { + d = PyArray_DIM(arr, i); + if (dims[i] >= 0) { + if (d > 1 && d != dims[i]) { + PyErr_Format( + PyExc_ValueError, + "%d-th dimension must be fixed to %" NPY_INTP_FMT + " but got %" NPY_INTP_FMT "\n", + i, dims[i], d); + return 1; + } + if (!dims[i]) + dims[i] = 1; + } + else + dims[i] = d; + new_size *= dims[i]; + } + if (new_size != arr_size) { + PyErr_Format(PyExc_ValueError, + "unexpected array size: new_size=%" NPY_INTP_FMT + ", got array with arr_size=%" NPY_INTP_FMT "\n", + new_size, arr_size); + return 1; + } + } + else { /* [[1,2]] -> [[1],[2]] */ + int i, j; + npy_intp d; + int effrank; + npy_intp size; + for (i = 0, effrank = 0; i < PyArray_NDIM(arr); ++i) + if (PyArray_DIM(arr, i) > 1) + ++effrank; + if (dims[rank - 1] >= 0) + if (effrank > rank) { + PyErr_Format(PyExc_ValueError, + "too many axes: %d (effrank=%d), " + "expected rank=%d\n", + PyArray_NDIM(arr), effrank, rank); + return 1; + } + + for (i = 0, j = 0; i < rank; ++i) { + while (j < PyArray_NDIM(arr) && PyArray_DIM(arr, j) < 2) ++j; + if (j >= PyArray_NDIM(arr)) + d = 1; + else + d = PyArray_DIM(arr, j++); + if (dims[i] >= 0) { + if (d > 1 && d != dims[i]) { + PyErr_Format( + PyExc_ValueError, + "%d-th dimension must be fixed to %" NPY_INTP_FMT + " but got %" NPY_INTP_FMT " (real index=%d)\n", + i, dims[i], d, j - 1); + return 1; + } + if (!dims[i]) + dims[i] = 1; + } + else + dims[i] = d; + } + + for (i = rank; i < PyArray_NDIM(arr); + ++i) { /* [[1,2],[3,4]] -> [1,2,3,4] */ + while (j < PyArray_NDIM(arr) && PyArray_DIM(arr, j) < 2) ++j; + if (j >= PyArray_NDIM(arr)) + d = 1; + else + d = PyArray_DIM(arr, j++); + dims[rank - 1] *= d; + } + for (i = 0, size = 1; i < rank; ++i) size *= dims[i]; + if (size != arr_size) { + char msg[200]; + int len; + snprintf(msg, sizeof(msg), + "unexpected array size: size=%" NPY_INTP_FMT + ", arr_size=%" NPY_INTP_FMT + ", rank=%d, effrank=%d, arr.nd=%d, dims=[", + size, arr_size, rank, effrank, PyArray_NDIM(arr)); + for (i = 0; i < rank; ++i) { + len = strlen(msg); + snprintf(msg + len, sizeof(msg) - len, " %" NPY_INTP_FMT, + dims[i]); + } + len = strlen(msg); + snprintf(msg + len, sizeof(msg) - len, " ], arr.dims=["); + for (i = 0; i < PyArray_NDIM(arr); ++i) { + len = strlen(msg); + snprintf(msg + len, sizeof(msg) - len, " %" NPY_INTP_FMT, + PyArray_DIM(arr, i)); + } + len = strlen(msg); + snprintf(msg + len, sizeof(msg) - len, " ]\n"); + PyErr_SetString(PyExc_ValueError, msg); + return 1; + } + } +#ifdef DEBUG_COPY_ND_ARRAY + printf("check_and_fix_dimensions:end: dims="); + dump_dims(rank, dims); +#endif + return 0; +} + +/* End of file: array_from_pyobj.c */ + +/************************* copy_ND_array *******************************/ + +extern int +copy_ND_array(const PyArrayObject *arr, PyArrayObject *out) +{ + F2PY_REPORT_ON_ARRAY_COPY_FROMARR; + return PyArray_CopyInto(out, (PyArrayObject *)arr); +} + +/*********************************************/ +/* Compatibility functions for Python >= 3.0 */ +/*********************************************/ + +PyObject * +F2PyCapsule_FromVoidPtr(void *ptr, void (*dtor)(PyObject *)) +{ + PyObject *ret = PyCapsule_New(ptr, NULL, dtor); + if (ret == NULL) { + PyErr_Clear(); + } + return ret; +} + +void * +F2PyCapsule_AsVoidPtr(PyObject *obj) +{ + void *ret = PyCapsule_GetPointer(obj, NULL); + if (ret == NULL) { + PyErr_Clear(); + } + return ret; +} + +int +F2PyCapsule_Check(PyObject *ptr) +{ + return PyCapsule_CheckExact(ptr); +} + +#ifdef __cplusplus +} +#endif +/************************* EOF fortranobject.c *******************************/ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/src/fortranobject.h b/.local/lib/python3.8/site-packages/numpy/f2py/src/fortranobject.h new file mode 100644 index 0000000..a1e9fdb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/src/fortranobject.h @@ -0,0 +1,143 @@ +#ifndef Py_FORTRANOBJECT_H +#define Py_FORTRANOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#ifdef FORTRANOBJECT_C +#define NO_IMPORT_ARRAY +#endif +#define PY_ARRAY_UNIQUE_SYMBOL _npy_f2py_ARRAY_API +#include "numpy/arrayobject.h" +#include "numpy/npy_3kcompat.h" + +#ifdef F2PY_REPORT_ATEXIT +#include +// clang-format off +extern void f2py_start_clock(void); +extern void f2py_stop_clock(void); +extern void f2py_start_call_clock(void); +extern void f2py_stop_call_clock(void); +extern void f2py_cb_start_clock(void); +extern void f2py_cb_stop_clock(void); +extern void f2py_cb_start_call_clock(void); +extern void f2py_cb_stop_call_clock(void); +extern void f2py_report_on_exit(int, void *); +// clang-format on +#endif + +#ifdef DMALLOC +#include "dmalloc.h" +#endif + +/* Fortran object interface */ + +/* +123456789-123456789-123456789-123456789-123456789-123456789-123456789-12 + +PyFortranObject represents various Fortran objects: +Fortran (module) routines, COMMON blocks, module data. + +Author: Pearu Peterson +*/ + +#define F2PY_MAX_DIMS 40 + +typedef void (*f2py_set_data_func)(char *, npy_intp *); +typedef void (*f2py_void_func)(void); +typedef void (*f2py_init_func)(int *, npy_intp *, f2py_set_data_func, int *); + +/*typedef void* (*f2py_c_func)(void*,...);*/ + +typedef void *(*f2pycfunc)(void); + +typedef struct { + char *name; /* attribute (array||routine) name */ + int rank; /* array rank, 0 for scalar, max is F2PY_MAX_DIMS, + || rank=-1 for Fortran routine */ + struct { + npy_intp d[F2PY_MAX_DIMS]; + } dims; /* dimensions of the array, || not used */ + int type; /* PyArray_ || not used */ + char *data; /* pointer to array || Fortran routine */ + f2py_init_func func; /* initialization function for + allocatable arrays: + func(&rank,dims,set_ptr_func,name,len(name)) + || C/API wrapper for Fortran routine */ + char *doc; /* documentation string; only recommended + for routines. */ +} FortranDataDef; + +typedef struct { + PyObject_HEAD + int len; /* Number of attributes */ + FortranDataDef *defs; /* An array of FortranDataDef's */ + PyObject *dict; /* Fortran object attribute dictionary */ +} PyFortranObject; + +#define PyFortran_Check(op) (Py_TYPE(op) == &PyFortran_Type) +#define PyFortran_Check1(op) (0 == strcmp(Py_TYPE(op)->tp_name, "fortran")) + +extern PyTypeObject PyFortran_Type; +extern int +F2PyDict_SetItemString(PyObject *dict, char *name, PyObject *obj); +extern PyObject * +PyFortranObject_New(FortranDataDef *defs, f2py_void_func init); +extern PyObject * +PyFortranObject_NewAsAttr(FortranDataDef *defs); + +PyObject * +F2PyCapsule_FromVoidPtr(void *ptr, void (*dtor)(PyObject *)); +void * +F2PyCapsule_AsVoidPtr(PyObject *obj); +int +F2PyCapsule_Check(PyObject *ptr); + +extern void * +F2PySwapThreadLocalCallbackPtr(char *key, void *ptr); +extern void * +F2PyGetThreadLocalCallbackPtr(char *key); + +#define ISCONTIGUOUS(m) (PyArray_FLAGS(m) & NPY_ARRAY_C_CONTIGUOUS) +#define F2PY_INTENT_IN 1 +#define F2PY_INTENT_INOUT 2 +#define F2PY_INTENT_OUT 4 +#define F2PY_INTENT_HIDE 8 +#define F2PY_INTENT_CACHE 16 +#define F2PY_INTENT_COPY 32 +#define F2PY_INTENT_C 64 +#define F2PY_OPTIONAL 128 +#define F2PY_INTENT_INPLACE 256 +#define F2PY_INTENT_ALIGNED4 512 +#define F2PY_INTENT_ALIGNED8 1024 +#define F2PY_INTENT_ALIGNED16 2048 + +#define ARRAY_ISALIGNED(ARR, SIZE) ((size_t)(PyArray_DATA(ARR)) % (SIZE) == 0) +#define F2PY_ALIGN4(intent) (intent & F2PY_INTENT_ALIGNED4) +#define F2PY_ALIGN8(intent) (intent & F2PY_INTENT_ALIGNED8) +#define F2PY_ALIGN16(intent) (intent & F2PY_INTENT_ALIGNED16) + +#define F2PY_GET_ALIGNMENT(intent) \ + (F2PY_ALIGN4(intent) \ + ? 4 \ + : (F2PY_ALIGN8(intent) ? 8 : (F2PY_ALIGN16(intent) ? 16 : 1))) +#define F2PY_CHECK_ALIGNMENT(arr, intent) \ + ARRAY_ISALIGNED(arr, F2PY_GET_ALIGNMENT(intent)) + +extern PyArrayObject * +array_from_pyobj(const int type_num, npy_intp *dims, const int rank, + const int intent, PyObject *obj); +extern int +copy_ND_array(const PyArrayObject *in, PyArrayObject *out); + +#ifdef DEBUG_COPY_ND_ARRAY +extern void +dump_attrs(const PyArrayObject *arr); +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_FORTRANOBJECT_H */ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/symbolic.py b/.local/lib/python3.8/site-packages/numpy/f2py/symbolic.py new file mode 100644 index 0000000..c2ab0f1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/symbolic.py @@ -0,0 +1,1510 @@ +"""Fortran/C symbolic expressions + +References: +- J3/21-007: Draft Fortran 202x. https://j3-fortran.org/doc/year/21/21-007.pdf +""" + +# To analyze Fortran expressions to solve dimensions specifications, +# for instances, we implement a minimal symbolic engine for parsing +# expressions into a tree of expression instances. As a first +# instance, we care only about arithmetic expressions involving +# integers and operations like addition (+), subtraction (-), +# multiplication (*), division (Fortran / is Python //, Fortran // is +# concatenate), and exponentiation (**). In addition, .pyf files may +# contain C expressions that support here is implemented as well. +# +# TODO: support logical constants (Op.BOOLEAN) +# TODO: support logical operators (.AND., ...) +# TODO: support defined operators (.MYOP., ...) +# +__all__ = ['Expr'] + + +import re +import warnings +from enum import Enum +from math import gcd + + +class Language(Enum): + """ + Used as Expr.tostring language argument. + """ + Python = 0 + Fortran = 1 + C = 2 + + +class Op(Enum): + """ + Used as Expr op attribute. + """ + INTEGER = 10 + REAL = 12 + COMPLEX = 15 + STRING = 20 + ARRAY = 30 + SYMBOL = 40 + TERNARY = 100 + APPLY = 200 + INDEXING = 210 + CONCAT = 220 + RELATIONAL = 300 + TERMS = 1000 + FACTORS = 2000 + REF = 3000 + DEREF = 3001 + + +class RelOp(Enum): + """ + Used in Op.RELATIONAL expression to specify the function part. + """ + EQ = 1 + NE = 2 + LT = 3 + LE = 4 + GT = 5 + GE = 6 + + @classmethod + def fromstring(cls, s, language=Language.C): + if language is Language.Fortran: + return {'.eq.': RelOp.EQ, '.ne.': RelOp.NE, + '.lt.': RelOp.LT, '.le.': RelOp.LE, + '.gt.': RelOp.GT, '.ge.': RelOp.GE}[s.lower()] + return {'==': RelOp.EQ, '!=': RelOp.NE, '<': RelOp.LT, + '<=': RelOp.LE, '>': RelOp.GT, '>=': RelOp.GE}[s] + + def tostring(self, language=Language.C): + if language is Language.Fortran: + return {RelOp.EQ: '.eq.', RelOp.NE: '.ne.', + RelOp.LT: '.lt.', RelOp.LE: '.le.', + RelOp.GT: '.gt.', RelOp.GE: '.ge.'}[self] + return {RelOp.EQ: '==', RelOp.NE: '!=', + RelOp.LT: '<', RelOp.LE: '<=', + RelOp.GT: '>', RelOp.GE: '>='}[self] + + +class ArithOp(Enum): + """ + Used in Op.APPLY expression to specify the function part. + """ + POS = 1 + NEG = 2 + ADD = 3 + SUB = 4 + MUL = 5 + DIV = 6 + POW = 7 + + +class OpError(Exception): + pass + + +class Precedence(Enum): + """ + Used as Expr.tostring precedence argument. + """ + ATOM = 0 + POWER = 1 + UNARY = 2 + PRODUCT = 3 + SUM = 4 + LT = 6 + EQ = 7 + LAND = 11 + LOR = 12 + TERNARY = 13 + ASSIGN = 14 + TUPLE = 15 + NONE = 100 + + +integer_types = (int,) +number_types = (int, float) + + +def _pairs_add(d, k, v): + # Internal utility method for updating terms and factors data. + c = d.get(k) + if c is None: + d[k] = v + else: + c = c + v + if c: + d[k] = c + else: + del d[k] + + +class ExprWarning(UserWarning): + pass + + +def ewarn(message): + warnings.warn(message, ExprWarning, stacklevel=2) + + +class Expr: + """Represents a Fortran expression as a op-data pair. + + Expr instances are hashable and sortable. + """ + + @staticmethod + def parse(s, language=Language.C): + """Parse a Fortran expression to a Expr. + """ + return fromstring(s, language=language) + + def __init__(self, op, data): + assert isinstance(op, Op) + + # sanity checks + if op is Op.INTEGER: + # data is a 2-tuple of numeric object and a kind value + # (default is 4) + assert isinstance(data, tuple) and len(data) == 2 + assert isinstance(data[0], int) + assert isinstance(data[1], (int, str)), data + elif op is Op.REAL: + # data is a 2-tuple of numeric object and a kind value + # (default is 4) + assert isinstance(data, tuple) and len(data) == 2 + assert isinstance(data[0], float) + assert isinstance(data[1], (int, str)), data + elif op is Op.COMPLEX: + # data is a 2-tuple of constant expressions + assert isinstance(data, tuple) and len(data) == 2 + elif op is Op.STRING: + # data is a 2-tuple of quoted string and a kind value + # (default is 1) + assert isinstance(data, tuple) and len(data) == 2 + assert (isinstance(data[0], str) + and data[0][::len(data[0])-1] in ('""', "''", '@@')) + assert isinstance(data[1], (int, str)), data + elif op is Op.SYMBOL: + # data is any hashable object + assert hash(data) is not None + elif op in (Op.ARRAY, Op.CONCAT): + # data is a tuple of expressions + assert isinstance(data, tuple) + assert all(isinstance(item, Expr) for item in data), data + elif op in (Op.TERMS, Op.FACTORS): + # data is {:} where dict values + # are nonzero Python integers + assert isinstance(data, dict) + elif op is Op.APPLY: + # data is (, , ) where + # operands are Expr instances + assert isinstance(data, tuple) and len(data) == 3 + # function is any hashable object + assert hash(data[0]) is not None + assert isinstance(data[1], tuple) + assert isinstance(data[2], dict) + elif op is Op.INDEXING: + # data is (, ) + assert isinstance(data, tuple) and len(data) == 2 + # function is any hashable object + assert hash(data[0]) is not None + elif op is Op.TERNARY: + # data is (, , ) + assert isinstance(data, tuple) and len(data) == 3 + elif op in (Op.REF, Op.DEREF): + # data is Expr instance + assert isinstance(data, Expr) + elif op is Op.RELATIONAL: + # data is (, , ) + assert isinstance(data, tuple) and len(data) == 3 + else: + raise NotImplementedError( + f'unknown op or missing sanity check: {op}') + + self.op = op + self.data = data + + def __eq__(self, other): + return (isinstance(other, Expr) + and self.op is other.op + and self.data == other.data) + + def __hash__(self): + if self.op in (Op.TERMS, Op.FACTORS): + data = tuple(sorted(self.data.items())) + elif self.op is Op.APPLY: + data = self.data[:2] + tuple(sorted(self.data[2].items())) + else: + data = self.data + return hash((self.op, data)) + + def __lt__(self, other): + if isinstance(other, Expr): + if self.op is not other.op: + return self.op.value < other.op.value + if self.op in (Op.TERMS, Op.FACTORS): + return (tuple(sorted(self.data.items())) + < tuple(sorted(other.data.items()))) + if self.op is Op.APPLY: + if self.data[:2] != other.data[:2]: + return self.data[:2] < other.data[:2] + return tuple(sorted(self.data[2].items())) < tuple( + sorted(other.data[2].items())) + return self.data < other.data + return NotImplemented + + def __le__(self, other): return self == other or self < other + + def __gt__(self, other): return not (self <= other) + + def __ge__(self, other): return not (self < other) + + def __repr__(self): + return f'{type(self).__name__}({self.op}, {self.data!r})' + + def __str__(self): + return self.tostring() + + def tostring(self, parent_precedence=Precedence.NONE, + language=Language.Fortran): + """Return a string representation of Expr. + """ + if self.op in (Op.INTEGER, Op.REAL): + precedence = (Precedence.SUM if self.data[0] < 0 + else Precedence.ATOM) + r = str(self.data[0]) + (f'_{self.data[1]}' + if self.data[1] != 4 else '') + elif self.op is Op.COMPLEX: + r = ', '.join(item.tostring(Precedence.TUPLE, language=language) + for item in self.data) + r = '(' + r + ')' + precedence = Precedence.ATOM + elif self.op is Op.SYMBOL: + precedence = Precedence.ATOM + r = str(self.data) + elif self.op is Op.STRING: + r = self.data[0] + if self.data[1] != 1: + r = self.data[1] + '_' + r + precedence = Precedence.ATOM + elif self.op is Op.ARRAY: + r = ', '.join(item.tostring(Precedence.TUPLE, language=language) + for item in self.data) + r = '[' + r + ']' + precedence = Precedence.ATOM + elif self.op is Op.TERMS: + terms = [] + for term, coeff in sorted(self.data.items()): + if coeff < 0: + op = ' - ' + coeff = -coeff + else: + op = ' + ' + if coeff == 1: + term = term.tostring(Precedence.SUM, language=language) + else: + if term == as_number(1): + term = str(coeff) + else: + term = f'{coeff} * ' + term.tostring( + Precedence.PRODUCT, language=language) + if terms: + terms.append(op) + elif op == ' - ': + terms.append('-') + terms.append(term) + r = ''.join(terms) or '0' + precedence = Precedence.SUM if terms else Precedence.ATOM + elif self.op is Op.FACTORS: + factors = [] + tail = [] + for base, exp in sorted(self.data.items()): + op = ' * ' + if exp == 1: + factor = base.tostring(Precedence.PRODUCT, + language=language) + elif language is Language.C: + if exp in range(2, 10): + factor = base.tostring(Precedence.PRODUCT, + language=language) + factor = ' * '.join([factor] * exp) + elif exp in range(-10, 0): + factor = base.tostring(Precedence.PRODUCT, + language=language) + tail += [factor] * -exp + continue + else: + factor = base.tostring(Precedence.TUPLE, + language=language) + factor = f'pow({factor}, {exp})' + else: + factor = base.tostring(Precedence.POWER, + language=language) + f' ** {exp}' + if factors: + factors.append(op) + factors.append(factor) + if tail: + if not factors: + factors += ['1'] + factors += ['/', '(', ' * '.join(tail), ')'] + r = ''.join(factors) or '1' + precedence = Precedence.PRODUCT if factors else Precedence.ATOM + elif self.op is Op.APPLY: + name, args, kwargs = self.data + if name is ArithOp.DIV and language is Language.C: + numer, denom = [arg.tostring(Precedence.PRODUCT, + language=language) + for arg in args] + r = f'{numer} / {denom}' + precedence = Precedence.PRODUCT + else: + args = [arg.tostring(Precedence.TUPLE, language=language) + for arg in args] + args += [k + '=' + v.tostring(Precedence.NONE) + for k, v in kwargs.items()] + r = f'{name}({", ".join(args)})' + precedence = Precedence.ATOM + elif self.op is Op.INDEXING: + name = self.data[0] + args = [arg.tostring(Precedence.TUPLE, language=language) + for arg in self.data[1:]] + r = f'{name}[{", ".join(args)}]' + precedence = Precedence.ATOM + elif self.op is Op.CONCAT: + args = [arg.tostring(Precedence.PRODUCT, language=language) + for arg in self.data] + r = " // ".join(args) + precedence = Precedence.PRODUCT + elif self.op is Op.TERNARY: + cond, expr1, expr2 = [a.tostring(Precedence.TUPLE, + language=language) + for a in self.data] + if language is Language.C: + r = f'({cond}?{expr1}:{expr2})' + elif language is Language.Python: + r = f'({expr1} if {cond} else {expr2})' + elif language is Language.Fortran: + r = f'merge({expr1}, {expr2}, {cond})' + else: + raise NotImplementedError( + f'tostring for {self.op} and {language}') + precedence = Precedence.ATOM + elif self.op is Op.REF: + r = '&' + self.data.tostring(Precedence.UNARY, language=language) + precedence = Precedence.UNARY + elif self.op is Op.DEREF: + r = '*' + self.data.tostring(Precedence.UNARY, language=language) + precedence = Precedence.UNARY + elif self.op is Op.RELATIONAL: + rop, left, right = self.data + precedence = (Precedence.EQ if rop in (RelOp.EQ, RelOp.NE) + else Precedence.LT) + left = left.tostring(precedence, language=language) + right = right.tostring(precedence, language=language) + rop = rop.tostring(language=language) + r = f'{left} {rop} {right}' + else: + raise NotImplementedError(f'tostring for op {self.op}') + if parent_precedence.value < precedence.value: + # If parent precedence is higher than operand precedence, + # operand will be enclosed in parenthesis. + return '(' + r + ')' + return r + + def __pos__(self): + return self + + def __neg__(self): + return self * -1 + + def __add__(self, other): + other = as_expr(other) + if isinstance(other, Expr): + if self.op is other.op: + if self.op in (Op.INTEGER, Op.REAL): + return as_number( + self.data[0] + other.data[0], + max(self.data[1], other.data[1])) + if self.op is Op.COMPLEX: + r1, i1 = self.data + r2, i2 = other.data + return as_complex(r1 + r2, i1 + i2) + if self.op is Op.TERMS: + r = Expr(self.op, dict(self.data)) + for k, v in other.data.items(): + _pairs_add(r.data, k, v) + return normalize(r) + if self.op is Op.COMPLEX and other.op in (Op.INTEGER, Op.REAL): + return self + as_complex(other) + elif self.op in (Op.INTEGER, Op.REAL) and other.op is Op.COMPLEX: + return as_complex(self) + other + elif self.op is Op.REAL and other.op is Op.INTEGER: + return self + as_real(other, kind=self.data[1]) + elif self.op is Op.INTEGER and other.op is Op.REAL: + return as_real(self, kind=other.data[1]) + other + return as_terms(self) + as_terms(other) + return NotImplemented + + def __radd__(self, other): + if isinstance(other, number_types): + return as_number(other) + self + return NotImplemented + + def __sub__(self, other): + return self + (-other) + + def __rsub__(self, other): + if isinstance(other, number_types): + return as_number(other) - self + return NotImplemented + + def __mul__(self, other): + other = as_expr(other) + if isinstance(other, Expr): + if self.op is other.op: + if self.op in (Op.INTEGER, Op.REAL): + return as_number(self.data[0] * other.data[0], + max(self.data[1], other.data[1])) + elif self.op is Op.COMPLEX: + r1, i1 = self.data + r2, i2 = other.data + return as_complex(r1 * r2 - i1 * i2, r1 * i2 + r2 * i1) + + if self.op is Op.FACTORS: + r = Expr(self.op, dict(self.data)) + for k, v in other.data.items(): + _pairs_add(r.data, k, v) + return normalize(r) + elif self.op is Op.TERMS: + r = Expr(self.op, {}) + for t1, c1 in self.data.items(): + for t2, c2 in other.data.items(): + _pairs_add(r.data, t1 * t2, c1 * c2) + return normalize(r) + + if self.op is Op.COMPLEX and other.op in (Op.INTEGER, Op.REAL): + return self * as_complex(other) + elif other.op is Op.COMPLEX and self.op in (Op.INTEGER, Op.REAL): + return as_complex(self) * other + elif self.op is Op.REAL and other.op is Op.INTEGER: + return self * as_real(other, kind=self.data[1]) + elif self.op is Op.INTEGER and other.op is Op.REAL: + return as_real(self, kind=other.data[1]) * other + + if self.op is Op.TERMS: + return self * as_terms(other) + elif other.op is Op.TERMS: + return as_terms(self) * other + + return as_factors(self) * as_factors(other) + return NotImplemented + + def __rmul__(self, other): + if isinstance(other, number_types): + return as_number(other) * self + return NotImplemented + + def __pow__(self, other): + other = as_expr(other) + if isinstance(other, Expr): + if other.op is Op.INTEGER: + exponent = other.data[0] + # TODO: other kind not used + if exponent == 0: + return as_number(1) + if exponent == 1: + return self + if exponent > 0: + if self.op is Op.FACTORS: + r = Expr(self.op, {}) + for k, v in self.data.items(): + r.data[k] = v * exponent + return normalize(r) + return self * (self ** (exponent - 1)) + elif exponent != -1: + return (self ** (-exponent)) ** -1 + return Expr(Op.FACTORS, {self: exponent}) + return as_apply(ArithOp.POW, self, other) + return NotImplemented + + def __truediv__(self, other): + other = as_expr(other) + if isinstance(other, Expr): + # Fortran / is different from Python /: + # - `/` is a truncate operation for integer operands + return normalize(as_apply(ArithOp.DIV, self, other)) + return NotImplemented + + def __rtruediv__(self, other): + other = as_expr(other) + if isinstance(other, Expr): + return other / self + return NotImplemented + + def __floordiv__(self, other): + other = as_expr(other) + if isinstance(other, Expr): + # Fortran // is different from Python //: + # - `//` is a concatenate operation for string operands + return normalize(Expr(Op.CONCAT, (self, other))) + return NotImplemented + + def __rfloordiv__(self, other): + other = as_expr(other) + if isinstance(other, Expr): + return other // self + return NotImplemented + + def __call__(self, *args, **kwargs): + # In Fortran, parenthesis () are use for both function call as + # well as indexing operations. + # + # TODO: implement a method for deciding when __call__ should + # return an INDEXING expression. + return as_apply(self, *map(as_expr, args), + **dict((k, as_expr(v)) for k, v in kwargs.items())) + + def __getitem__(self, index): + # Provided to support C indexing operations that .pyf files + # may contain. + index = as_expr(index) + if not isinstance(index, tuple): + index = index, + if len(index) > 1: + ewarn(f'C-index should be a single expression but got `{index}`') + return Expr(Op.INDEXING, (self,) + index) + + def substitute(self, symbols_map): + """Recursively substitute symbols with values in symbols map. + + Symbols map is a dictionary of symbol-expression pairs. + """ + if self.op is Op.SYMBOL: + value = symbols_map.get(self) + if value is None: + return self + m = re.match(r'\A(@__f2py_PARENTHESIS_(\w+)_\d+@)\Z', self.data) + if m: + # complement to fromstring method + items, paren = m.groups() + if paren in ['ROUNDDIV', 'SQUARE']: + return as_array(value) + assert paren == 'ROUND', (paren, value) + return value + if self.op in (Op.INTEGER, Op.REAL, Op.STRING): + return self + if self.op in (Op.ARRAY, Op.COMPLEX): + return Expr(self.op, tuple(item.substitute(symbols_map) + for item in self.data)) + if self.op is Op.CONCAT: + return normalize(Expr(self.op, tuple(item.substitute(symbols_map) + for item in self.data))) + if self.op is Op.TERMS: + r = None + for term, coeff in self.data.items(): + if r is None: + r = term.substitute(symbols_map) * coeff + else: + r += term.substitute(symbols_map) * coeff + if r is None: + ewarn('substitute: empty TERMS expression interpreted as' + ' int-literal 0') + return as_number(0) + return r + if self.op is Op.FACTORS: + r = None + for base, exponent in self.data.items(): + if r is None: + r = base.substitute(symbols_map) ** exponent + else: + r *= base.substitute(symbols_map) ** exponent + if r is None: + ewarn('substitute: empty FACTORS expression interpreted' + ' as int-literal 1') + return as_number(1) + return r + if self.op is Op.APPLY: + target, args, kwargs = self.data + if isinstance(target, Expr): + target = target.substitute(symbols_map) + args = tuple(a.substitute(symbols_map) for a in args) + kwargs = dict((k, v.substitute(symbols_map)) + for k, v in kwargs.items()) + return normalize(Expr(self.op, (target, args, kwargs))) + if self.op is Op.INDEXING: + func = self.data[0] + if isinstance(func, Expr): + func = func.substitute(symbols_map) + args = tuple(a.substitute(symbols_map) for a in self.data[1:]) + return normalize(Expr(self.op, (func,) + args)) + if self.op is Op.TERNARY: + operands = tuple(a.substitute(symbols_map) for a in self.data) + return normalize(Expr(self.op, operands)) + if self.op in (Op.REF, Op.DEREF): + return normalize(Expr(self.op, self.data.substitute(symbols_map))) + if self.op is Op.RELATIONAL: + rop, left, right = self.data + left = left.substitute(symbols_map) + right = right.substitute(symbols_map) + return normalize(Expr(self.op, (rop, left, right))) + raise NotImplementedError(f'substitute method for {self.op}: {self!r}') + + def traverse(self, visit, *args, **kwargs): + """Traverse expression tree with visit function. + + The visit function is applied to an expression with given args + and kwargs. + + Traverse call returns an expression returned by visit when not + None, otherwise return a new normalized expression with + traverse-visit sub-expressions. + """ + result = visit(self, *args, **kwargs) + if result is not None: + return result + + if self.op in (Op.INTEGER, Op.REAL, Op.STRING, Op.SYMBOL): + return self + elif self.op in (Op.COMPLEX, Op.ARRAY, Op.CONCAT, Op.TERNARY): + return normalize(Expr(self.op, tuple( + item.traverse(visit, *args, **kwargs) + for item in self.data))) + elif self.op in (Op.TERMS, Op.FACTORS): + data = {} + for k, v in self.data.items(): + k = k.traverse(visit, *args, **kwargs) + v = (v.traverse(visit, *args, **kwargs) + if isinstance(v, Expr) else v) + if k in data: + v = data[k] + v + data[k] = v + return normalize(Expr(self.op, data)) + elif self.op is Op.APPLY: + obj = self.data[0] + func = (obj.traverse(visit, *args, **kwargs) + if isinstance(obj, Expr) else obj) + operands = tuple(operand.traverse(visit, *args, **kwargs) + for operand in self.data[1]) + kwoperands = dict((k, v.traverse(visit, *args, **kwargs)) + for k, v in self.data[2].items()) + return normalize(Expr(self.op, (func, operands, kwoperands))) + elif self.op is Op.INDEXING: + obj = self.data[0] + obj = (obj.traverse(visit, *args, **kwargs) + if isinstance(obj, Expr) else obj) + indices = tuple(index.traverse(visit, *args, **kwargs) + for index in self.data[1:]) + return normalize(Expr(self.op, (obj,) + indices)) + elif self.op in (Op.REF, Op.DEREF): + return normalize(Expr(self.op, + self.data.traverse(visit, *args, **kwargs))) + elif self.op is Op.RELATIONAL: + rop, left, right = self.data + left = left.traverse(visit, *args, **kwargs) + right = right.traverse(visit, *args, **kwargs) + return normalize(Expr(self.op, (rop, left, right))) + raise NotImplementedError(f'traverse method for {self.op}') + + def contains(self, other): + """Check if self contains other. + """ + found = [] + + def visit(expr, found=found): + if found: + return expr + elif expr == other: + found.append(1) + return expr + + self.traverse(visit) + + return len(found) != 0 + + def symbols(self): + """Return a set of symbols contained in self. + """ + found = set() + + def visit(expr, found=found): + if expr.op is Op.SYMBOL: + found.add(expr) + + self.traverse(visit) + + return found + + def polynomial_atoms(self): + """Return a set of expressions used as atoms in polynomial self. + """ + found = set() + + def visit(expr, found=found): + if expr.op is Op.FACTORS: + for b in expr.data: + b.traverse(visit) + return expr + if expr.op in (Op.TERMS, Op.COMPLEX): + return + if expr.op is Op.APPLY and isinstance(expr.data[0], ArithOp): + if expr.data[0] is ArithOp.POW: + expr.data[1][0].traverse(visit) + return expr + return + if expr.op in (Op.INTEGER, Op.REAL): + return expr + + found.add(expr) + + if expr.op in (Op.INDEXING, Op.APPLY): + return expr + + self.traverse(visit) + + return found + + def linear_solve(self, symbol): + """Return a, b such that a * symbol + b == self. + + If self is not linear with respect to symbol, raise RuntimeError. + """ + b = self.substitute({symbol: as_number(0)}) + ax = self - b + a = ax.substitute({symbol: as_number(1)}) + + zero, _ = as_numer_denom(a * symbol - ax) + + if zero != as_number(0): + raise RuntimeError(f'not a {symbol}-linear equation:' + f' {a} * {symbol} + {b} == {self}') + return a, b + + +def normalize(obj): + """Normalize Expr and apply basic evaluation methods. + """ + if not isinstance(obj, Expr): + return obj + + if obj.op is Op.TERMS: + d = {} + for t, c in obj.data.items(): + if c == 0: + continue + if t.op is Op.COMPLEX and c != 1: + t = t * c + c = 1 + if t.op is Op.TERMS: + for t1, c1 in t.data.items(): + _pairs_add(d, t1, c1 * c) + else: + _pairs_add(d, t, c) + if len(d) == 0: + # TODO: deterimine correct kind + return as_number(0) + elif len(d) == 1: + (t, c), = d.items() + if c == 1: + return t + return Expr(Op.TERMS, d) + + if obj.op is Op.FACTORS: + coeff = 1 + d = {} + for b, e in obj.data.items(): + if e == 0: + continue + if b.op is Op.TERMS and isinstance(e, integer_types) and e > 1: + # expand integer powers of sums + b = b * (b ** (e - 1)) + e = 1 + + if b.op in (Op.INTEGER, Op.REAL): + if e == 1: + coeff *= b.data[0] + elif e > 0: + coeff *= b.data[0] ** e + else: + _pairs_add(d, b, e) + elif b.op is Op.FACTORS: + if e > 0 and isinstance(e, integer_types): + for b1, e1 in b.data.items(): + _pairs_add(d, b1, e1 * e) + else: + _pairs_add(d, b, e) + else: + _pairs_add(d, b, e) + if len(d) == 0 or coeff == 0: + # TODO: deterimine correct kind + assert isinstance(coeff, number_types) + return as_number(coeff) + elif len(d) == 1: + (b, e), = d.items() + if e == 1: + t = b + else: + t = Expr(Op.FACTORS, d) + if coeff == 1: + return t + return Expr(Op.TERMS, {t: coeff}) + elif coeff == 1: + return Expr(Op.FACTORS, d) + else: + return Expr(Op.TERMS, {Expr(Op.FACTORS, d): coeff}) + + if obj.op is Op.APPLY and obj.data[0] is ArithOp.DIV: + dividend, divisor = obj.data[1] + t1, c1 = as_term_coeff(dividend) + t2, c2 = as_term_coeff(divisor) + if isinstance(c1, integer_types) and isinstance(c2, integer_types): + g = gcd(c1, c2) + c1, c2 = c1//g, c2//g + else: + c1, c2 = c1/c2, 1 + + if t1.op is Op.APPLY and t1.data[0] is ArithOp.DIV: + numer = t1.data[1][0] * c1 + denom = t1.data[1][1] * t2 * c2 + return as_apply(ArithOp.DIV, numer, denom) + + if t2.op is Op.APPLY and t2.data[0] is ArithOp.DIV: + numer = t2.data[1][1] * t1 * c1 + denom = t2.data[1][0] * c2 + return as_apply(ArithOp.DIV, numer, denom) + + d = dict(as_factors(t1).data) + for b, e in as_factors(t2).data.items(): + _pairs_add(d, b, -e) + numer, denom = {}, {} + for b, e in d.items(): + if e > 0: + numer[b] = e + else: + denom[b] = -e + numer = normalize(Expr(Op.FACTORS, numer)) * c1 + denom = normalize(Expr(Op.FACTORS, denom)) * c2 + + if denom.op in (Op.INTEGER, Op.REAL) and denom.data[0] == 1: + # TODO: denom kind not used + return numer + return as_apply(ArithOp.DIV, numer, denom) + + if obj.op is Op.CONCAT: + lst = [obj.data[0]] + for s in obj.data[1:]: + last = lst[-1] + if ( + last.op is Op.STRING + and s.op is Op.STRING + and last.data[0][0] in '"\'' + and s.data[0][0] == last.data[0][-1] + ): + new_last = as_string(last.data[0][:-1] + s.data[0][1:], + max(last.data[1], s.data[1])) + lst[-1] = new_last + else: + lst.append(s) + if len(lst) == 1: + return lst[0] + return Expr(Op.CONCAT, tuple(lst)) + + if obj.op is Op.TERNARY: + cond, expr1, expr2 = map(normalize, obj.data) + if cond.op is Op.INTEGER: + return expr1 if cond.data[0] else expr2 + return Expr(Op.TERNARY, (cond, expr1, expr2)) + + return obj + + +def as_expr(obj): + """Convert non-Expr objects to Expr objects. + """ + if isinstance(obj, complex): + return as_complex(obj.real, obj.imag) + if isinstance(obj, number_types): + return as_number(obj) + if isinstance(obj, str): + # STRING expression holds string with boundary quotes, hence + # applying repr: + return as_string(repr(obj)) + if isinstance(obj, tuple): + return tuple(map(as_expr, obj)) + return obj + + +def as_symbol(obj): + """Return object as SYMBOL expression (variable or unparsed expression). + """ + return Expr(Op.SYMBOL, obj) + + +def as_number(obj, kind=4): + """Return object as INTEGER or REAL constant. + """ + if isinstance(obj, int): + return Expr(Op.INTEGER, (obj, kind)) + if isinstance(obj, float): + return Expr(Op.REAL, (obj, kind)) + if isinstance(obj, Expr): + if obj.op in (Op.INTEGER, Op.REAL): + return obj + raise OpError(f'cannot convert {obj} to INTEGER or REAL constant') + + +def as_integer(obj, kind=4): + """Return object as INTEGER constant. + """ + if isinstance(obj, int): + return Expr(Op.INTEGER, (obj, kind)) + if isinstance(obj, Expr): + if obj.op is Op.INTEGER: + return obj + raise OpError(f'cannot convert {obj} to INTEGER constant') + + +def as_real(obj, kind=4): + """Return object as REAL constant. + """ + if isinstance(obj, int): + return Expr(Op.REAL, (float(obj), kind)) + if isinstance(obj, float): + return Expr(Op.REAL, (obj, kind)) + if isinstance(obj, Expr): + if obj.op is Op.REAL: + return obj + elif obj.op is Op.INTEGER: + return Expr(Op.REAL, (float(obj.data[0]), kind)) + raise OpError(f'cannot convert {obj} to REAL constant') + + +def as_string(obj, kind=1): + """Return object as STRING expression (string literal constant). + """ + return Expr(Op.STRING, (obj, kind)) + + +def as_array(obj): + """Return object as ARRAY expression (array constant). + """ + if isinstance(obj, Expr): + obj = obj, + return Expr(Op.ARRAY, obj) + + +def as_complex(real, imag=0): + """Return object as COMPLEX expression (complex literal constant). + """ + return Expr(Op.COMPLEX, (as_expr(real), as_expr(imag))) + + +def as_apply(func, *args, **kwargs): + """Return object as APPLY expression (function call, constructor, etc.) + """ + return Expr(Op.APPLY, + (func, tuple(map(as_expr, args)), + dict((k, as_expr(v)) for k, v in kwargs.items()))) + + +def as_ternary(cond, expr1, expr2): + """Return object as TERNARY expression (cond?expr1:expr2). + """ + return Expr(Op.TERNARY, (cond, expr1, expr2)) + + +def as_ref(expr): + """Return object as referencing expression. + """ + return Expr(Op.REF, expr) + + +def as_deref(expr): + """Return object as dereferencing expression. + """ + return Expr(Op.DEREF, expr) + + +def as_eq(left, right): + return Expr(Op.RELATIONAL, (RelOp.EQ, left, right)) + + +def as_ne(left, right): + return Expr(Op.RELATIONAL, (RelOp.NE, left, right)) + + +def as_lt(left, right): + return Expr(Op.RELATIONAL, (RelOp.LT, left, right)) + + +def as_le(left, right): + return Expr(Op.RELATIONAL, (RelOp.LE, left, right)) + + +def as_gt(left, right): + return Expr(Op.RELATIONAL, (RelOp.GT, left, right)) + + +def as_ge(left, right): + return Expr(Op.RELATIONAL, (RelOp.GE, left, right)) + + +def as_terms(obj): + """Return expression as TERMS expression. + """ + if isinstance(obj, Expr): + obj = normalize(obj) + if obj.op is Op.TERMS: + return obj + if obj.op is Op.INTEGER: + return Expr(Op.TERMS, {as_integer(1, obj.data[1]): obj.data[0]}) + if obj.op is Op.REAL: + return Expr(Op.TERMS, {as_real(1, obj.data[1]): obj.data[0]}) + return Expr(Op.TERMS, {obj: 1}) + raise OpError(f'cannot convert {type(obj)} to terms Expr') + + +def as_factors(obj): + """Return expression as FACTORS expression. + """ + if isinstance(obj, Expr): + obj = normalize(obj) + if obj.op is Op.FACTORS: + return obj + if obj.op is Op.TERMS: + if len(obj.data) == 1: + (term, coeff), = obj.data.items() + if coeff == 1: + return Expr(Op.FACTORS, {term: 1}) + return Expr(Op.FACTORS, {term: 1, Expr.number(coeff): 1}) + if ((obj.op is Op.APPLY + and obj.data[0] is ArithOp.DIV + and not obj.data[2])): + return Expr(Op.FACTORS, {obj.data[1][0]: 1, obj.data[1][1]: -1}) + return Expr(Op.FACTORS, {obj: 1}) + raise OpError(f'cannot convert {type(obj)} to terms Expr') + + +def as_term_coeff(obj): + """Return expression as term-coefficient pair. + """ + if isinstance(obj, Expr): + obj = normalize(obj) + if obj.op is Op.INTEGER: + return as_integer(1, obj.data[1]), obj.data[0] + if obj.op is Op.REAL: + return as_real(1, obj.data[1]), obj.data[0] + if obj.op is Op.TERMS: + if len(obj.data) == 1: + (term, coeff), = obj.data.items() + return term, coeff + # TODO: find common divisor of coefficients + if obj.op is Op.APPLY and obj.data[0] is ArithOp.DIV: + t, c = as_term_coeff(obj.data[1][0]) + return as_apply(ArithOp.DIV, t, obj.data[1][1]), c + return obj, 1 + raise OpError(f'cannot convert {type(obj)} to term and coeff') + + +def as_numer_denom(obj): + """Return expression as numer-denom pair. + """ + if isinstance(obj, Expr): + obj = normalize(obj) + if obj.op in (Op.INTEGER, Op.REAL, Op.COMPLEX, Op.SYMBOL, + Op.INDEXING, Op.TERNARY): + return obj, as_number(1) + elif obj.op is Op.APPLY: + if obj.data[0] is ArithOp.DIV and not obj.data[2]: + numers, denoms = map(as_numer_denom, obj.data[1]) + return numers[0] * denoms[1], numers[1] * denoms[0] + return obj, as_number(1) + elif obj.op is Op.TERMS: + numers, denoms = [], [] + for term, coeff in obj.data.items(): + n, d = as_numer_denom(term) + n = n * coeff + numers.append(n) + denoms.append(d) + numer, denom = as_number(0), as_number(1) + for i in range(len(numers)): + n = numers[i] + for j in range(len(numers)): + if i != j: + n *= denoms[j] + numer += n + denom *= denoms[i] + if denom.op in (Op.INTEGER, Op.REAL) and denom.data[0] < 0: + numer, denom = -numer, -denom + return numer, denom + elif obj.op is Op.FACTORS: + numer, denom = as_number(1), as_number(1) + for b, e in obj.data.items(): + bnumer, bdenom = as_numer_denom(b) + if e > 0: + numer *= bnumer ** e + denom *= bdenom ** e + elif e < 0: + numer *= bdenom ** (-e) + denom *= bnumer ** (-e) + return numer, denom + raise OpError(f'cannot convert {type(obj)} to numer and denom') + + +def _counter(): + # Used internally to generate unique dummy symbols + counter = 0 + while True: + counter += 1 + yield counter + + +COUNTER = _counter() + + +def eliminate_quotes(s): + """Replace quoted substrings of input string. + + Return a new string and a mapping of replacements. + """ + d = {} + + def repl(m): + kind, value = m.groups()[:2] + if kind: + # remove trailing underscore + kind = kind[:-1] + p = {"'": "SINGLE", '"': "DOUBLE"}[value[0]] + k = f'{kind}@__f2py_QUOTES_{p}_{COUNTER.__next__()}@' + d[k] = value + return k + + new_s = re.sub(r'({kind}_|)({single_quoted}|{double_quoted})'.format( + kind=r'\w[\w\d_]*', + single_quoted=r"('([^'\\]|(\\.))*')", + double_quoted=r'("([^"\\]|(\\.))*")'), + repl, s) + + assert '"' not in new_s + assert "'" not in new_s + + return new_s, d + + +def insert_quotes(s, d): + """Inverse of eliminate_quotes. + """ + for k, v in d.items(): + kind = k[:k.find('@')] + if kind: + kind += '_' + s = s.replace(k, kind + v) + return s + + +def replace_parenthesis(s): + """Replace substrings of input that are enclosed in parenthesis. + + Return a new string and a mapping of replacements. + """ + # Find a parenthesis pair that appears first. + + # Fortran deliminator are `(`, `)`, `[`, `]`, `(/', '/)`, `/`. + # We don't handle `/` deliminator because it is not a part of an + # expression. + left, right = None, None + mn_i = len(s) + for left_, right_ in (('(/', '/)'), + '()', + '{}', # to support C literal structs + '[]'): + i = s.find(left_) + if i == -1: + continue + if i < mn_i: + mn_i = i + left, right = left_, right_ + + if left is None: + return s, {} + + i = mn_i + j = s.find(right, i) + + while s.count(left, i + 1, j) != s.count(right, i + 1, j): + j = s.find(right, j + 1) + if j == -1: + raise ValueError(f'Mismatch of {left+right} parenthesis in {s!r}') + + p = {'(': 'ROUND', '[': 'SQUARE', '{': 'CURLY', '(/': 'ROUNDDIV'}[left] + + k = f'@__f2py_PARENTHESIS_{p}_{COUNTER.__next__()}@' + v = s[i+len(left):j] + r, d = replace_parenthesis(s[j+len(right):]) + d[k] = v + return s[:i] + k + r, d + + +def _get_parenthesis_kind(s): + assert s.startswith('@__f2py_PARENTHESIS_'), s + return s.split('_')[4] + + +def unreplace_parenthesis(s, d): + """Inverse of replace_parenthesis. + """ + for k, v in d.items(): + p = _get_parenthesis_kind(k) + left = dict(ROUND='(', SQUARE='[', CURLY='{', ROUNDDIV='(/')[p] + right = dict(ROUND=')', SQUARE=']', CURLY='}', ROUNDDIV='/)')[p] + s = s.replace(k, left + v + right) + return s + + +def fromstring(s, language=Language.C): + """Create an expression from a string. + + This is a "lazy" parser, that is, only arithmetic operations are + resolved, non-arithmetic operations are treated as symbols. + """ + r = _FromStringWorker(language=language).parse(s) + if isinstance(r, Expr): + return r + raise ValueError(f'failed to parse `{s}` to Expr instance: got `{r}`') + + +class _Pair: + # Internal class to represent a pair of expressions + + def __init__(self, left, right): + self.left = left + self.right = right + + def substitute(self, symbols_map): + left, right = self.left, self.right + if isinstance(left, Expr): + left = left.substitute(symbols_map) + if isinstance(right, Expr): + right = right.substitute(symbols_map) + return _Pair(left, right) + + def __repr__(self): + return f'{type(self).__name__}({self.left}, {self.right})' + + +class _FromStringWorker: + + def __init__(self, language=Language.C): + self.original = None + self.quotes_map = None + self.language = language + + def finalize_string(self, s): + return insert_quotes(s, self.quotes_map) + + def parse(self, inp): + self.original = inp + unquoted, self.quotes_map = eliminate_quotes(inp) + return self.process(unquoted) + + def process(self, s, context='expr'): + """Parse string within the given context. + + The context may define the result in case of ambiguous + expressions. For instance, consider expressions `f(x, y)` and + `(x, y) + (a, b)` where `f` is a function and pair `(x, y)` + denotes complex number. Specifying context as "args" or + "expr", the subexpression `(x, y)` will be parse to an + argument list or to a complex number, respectively. + """ + if isinstance(s, (list, tuple)): + return type(s)(self.process(s_, context) for s_ in s) + + assert isinstance(s, str), (type(s), s) + + # replace subexpressions in parenthesis with f2py @-names + r, raw_symbols_map = replace_parenthesis(s) + r = r.strip() + + def restore(r): + # restores subexpressions marked with f2py @-names + if isinstance(r, (list, tuple)): + return type(r)(map(restore, r)) + return unreplace_parenthesis(r, raw_symbols_map) + + # comma-separated tuple + if ',' in r: + operands = restore(r.split(',')) + if context == 'args': + return tuple(self.process(operands)) + if context == 'expr': + if len(operands) == 2: + # complex number literal + return as_complex(*self.process(operands)) + raise NotImplementedError( + f'parsing comma-separated list (context={context}): {r}') + + # ternary operation + m = re.match(r'\A([^?]+)[?]([^:]+)[:](.+)\Z', r) + if m: + assert context == 'expr', context + oper, expr1, expr2 = restore(m.groups()) + oper = self.process(oper) + expr1 = self.process(expr1) + expr2 = self.process(expr2) + return as_ternary(oper, expr1, expr2) + + # relational expression + if self.language is Language.Fortran: + m = re.match( + r'\A(.+)\s*[.](eq|ne|lt|le|gt|ge)[.]\s*(.+)\Z', r, re.I) + else: + m = re.match( + r'\A(.+)\s*([=][=]|[!][=]|[<][=]|[<]|[>][=]|[>])\s*(.+)\Z', r) + if m: + left, rop, right = m.groups() + if self.language is Language.Fortran: + rop = '.' + rop + '.' + left, right = self.process(restore((left, right))) + rop = RelOp.fromstring(rop, language=self.language) + return Expr(Op.RELATIONAL, (rop, left, right)) + + # keyword argument + m = re.match(r'\A(\w[\w\d_]*)\s*[=](.*)\Z', r) + if m: + keyname, value = m.groups() + value = restore(value) + return _Pair(keyname, self.process(value)) + + # addition/subtraction operations + operands = re.split(r'((? 1: + result = self.process(restore(operands[0] or '0')) + for op, operand in zip(operands[1::2], operands[2::2]): + operand = self.process(restore(operand)) + op = op.strip() + if op == '+': + result += operand + else: + assert op == '-' + result -= operand + return result + + # string concatenate operation + if self.language is Language.Fortran and '//' in r: + operands = restore(r.split('//')) + return Expr(Op.CONCAT, + tuple(self.process(operands))) + + # multiplication/division operations + operands = re.split(r'(?<=[@\w\d_])\s*([*]|/)', + (r if self.language is Language.C + else r.replace('**', '@__f2py_DOUBLE_STAR@'))) + if len(operands) > 1: + operands = restore(operands) + if self.language is not Language.C: + operands = [operand.replace('@__f2py_DOUBLE_STAR@', '**') + for operand in operands] + # Expression is an arithmetic product + result = self.process(operands[0]) + for op, operand in zip(operands[1::2], operands[2::2]): + operand = self.process(operand) + op = op.strip() + if op == '*': + result *= operand + else: + assert op == '/' + result /= operand + return result + + # referencing/dereferencing + if r.startswith('*') or r.startswith('&'): + op = {'*': Op.DEREF, '&': Op.REF}[r[0]] + operand = self.process(restore(r[1:])) + return Expr(op, operand) + + # exponentiation operations + if self.language is not Language.C and '**' in r: + operands = list(reversed(restore(r.split('**')))) + result = self.process(operands[0]) + for operand in operands[1:]: + operand = self.process(operand) + result = operand ** result + return result + + # int-literal-constant + m = re.match(r'\A({digit_string})({kind}|)\Z'.format( + digit_string=r'\d+', + kind=r'_(\d+|\w[\w\d_]*)'), r) + if m: + value, _, kind = m.groups() + if kind and kind.isdigit(): + kind = int(kind) + return as_integer(int(value), kind or 4) + + # real-literal-constant + m = re.match(r'\A({significant}({exponent}|)|\d+{exponent})({kind}|)\Z' + .format( + significant=r'[.]\d+|\d+[.]\d*', + exponent=r'[edED][+-]?\d+', + kind=r'_(\d+|\w[\w\d_]*)'), r) + if m: + value, _, _, kind = m.groups() + if kind and kind.isdigit(): + kind = int(kind) + value = value.lower() + if 'd' in value: + return as_real(float(value.replace('d', 'e')), kind or 8) + return as_real(float(value), kind or 4) + + # string-literal-constant with kind parameter specification + if r in self.quotes_map: + kind = r[:r.find('@')] + return as_string(self.quotes_map[r], kind or 1) + + # array constructor or literal complex constant or + # parenthesized expression + if r in raw_symbols_map: + paren = _get_parenthesis_kind(r) + items = self.process(restore(raw_symbols_map[r]), + 'expr' if paren == 'ROUND' else 'args') + if paren == 'ROUND': + if isinstance(items, Expr): + return items + if paren in ['ROUNDDIV', 'SQUARE']: + # Expression is a array constructor + if isinstance(items, Expr): + items = (items,) + return as_array(items) + + # function call/indexing + m = re.match(r'\A(.+)\s*(@__f2py_PARENTHESIS_(ROUND|SQUARE)_\d+@)\Z', + r) + if m: + target, args, paren = m.groups() + target = self.process(restore(target)) + args = self.process(restore(args)[1:-1], 'args') + if not isinstance(args, tuple): + args = args, + if paren == 'ROUND': + kwargs = dict((a.left, a.right) for a in args + if isinstance(a, _Pair)) + args = tuple(a for a in args if not isinstance(a, _Pair)) + # Warning: this could also be Fortran indexing operation.. + return as_apply(target, *args, **kwargs) + else: + # Expression is a C/Python indexing operation + # (e.g. used in .pyf files) + assert paren == 'SQUARE' + return target[args] + + # Fortran standard conforming identifier + m = re.match(r'\A\w[\w\d_]*\Z', r) + if m: + return as_symbol(r) + + # fall-back to symbol + r = self.finalize_string(restore(r)) + ewarn( + f'fromstring: treating {r!r} as symbol (original={self.original})') + return as_symbol(r) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..bbb67ba Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_abstract_interface.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_abstract_interface.cpython-38.pyc new file mode 100644 index 0000000..35bf5c8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_abstract_interface.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_array_from_pyobj.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_array_from_pyobj.cpython-38.pyc new file mode 100644 index 0000000..325d0b0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_array_from_pyobj.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_assumed_shape.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_assumed_shape.cpython-38.pyc new file mode 100644 index 0000000..cf71b44 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_assumed_shape.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_block_docstring.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_block_docstring.cpython-38.pyc new file mode 100644 index 0000000..6cc0ce1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_block_docstring.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_callback.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_callback.cpython-38.pyc new file mode 100644 index 0000000..81863f1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_callback.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_common.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_common.cpython-38.pyc new file mode 100644 index 0000000..d8b8308 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_compile_function.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_compile_function.cpython-38.pyc new file mode 100644 index 0000000..bd2d759 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_compile_function.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_crackfortran.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_crackfortran.cpython-38.pyc new file mode 100644 index 0000000..0b5881a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_crackfortran.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_kind.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_kind.cpython-38.pyc new file mode 100644 index 0000000..28c5c2f Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_kind.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_mixed.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_mixed.cpython-38.pyc new file mode 100644 index 0000000..4643c52 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_mixed.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_module_doc.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_module_doc.cpython-38.pyc new file mode 100644 index 0000000..9199e90 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_module_doc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_parameter.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_parameter.cpython-38.pyc new file mode 100644 index 0000000..dcb1959 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_parameter.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_quoted_character.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_quoted_character.cpython-38.pyc new file mode 100644 index 0000000..864e20f Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_quoted_character.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_regression.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_regression.cpython-38.pyc new file mode 100644 index 0000000..d31dbcf Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_regression.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_character.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_character.cpython-38.pyc new file mode 100644 index 0000000..7c2b9e1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_character.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_complex.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_complex.cpython-38.pyc new file mode 100644 index 0000000..a4ee2b9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_complex.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_integer.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_integer.cpython-38.pyc new file mode 100644 index 0000000..e403e6c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_integer.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_logical.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_logical.cpython-38.pyc new file mode 100644 index 0000000..64dbe16 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_logical.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_real.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_real.cpython-38.pyc new file mode 100644 index 0000000..0848a0d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_return_real.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_semicolon_split.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_semicolon_split.cpython-38.pyc new file mode 100644 index 0000000..901fbef Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_semicolon_split.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_size.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_size.cpython-38.pyc new file mode 100644 index 0000000..15b6715 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_size.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_string.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_string.cpython-38.pyc new file mode 100644 index 0000000..7d9bd98 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_string.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_symbolic.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_symbolic.cpython-38.pyc new file mode 100644 index 0000000..b335ba1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/test_symbolic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/util.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/util.cpython-38.pyc new file mode 100644 index 0000000..1fbebd7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/__pycache__/util.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c new file mode 100644 index 0000000..ea47e05 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c @@ -0,0 +1,230 @@ +/* + * This file was auto-generated with f2py (version:2_1330) and hand edited by + * Pearu for testing purposes. Do not edit this file unless you know what you + * are doing!!! + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/*********************** See f2py2e/cfuncs.py: includes ***********************/ + +#define PY_SSIZE_T_CLEAN +#include +#include "fortranobject.h" +#include + +static PyObject *wrap_error; +static PyObject *wrap_module; + +/************************************ call ************************************/ +static char doc_f2py_rout_wrap_call[] = "\ +Function signature:\n\ + arr = call(type_num,dims,intent,obj)\n\ +Required arguments:\n" +" type_num : input int\n" +" dims : input int-sequence\n" +" intent : input int\n" +" obj : input python object\n" +"Return objects:\n" +" arr : array"; +static PyObject *f2py_rout_wrap_call(PyObject *capi_self, + PyObject *capi_args) { + PyObject * volatile capi_buildvalue = NULL; + int type_num = 0; + npy_intp *dims = NULL; + PyObject *dims_capi = Py_None; + int rank = 0; + int intent = 0; + PyArrayObject *capi_arr_tmp = NULL; + PyObject *arr_capi = Py_None; + int i; + + if (!PyArg_ParseTuple(capi_args,"iOiO|:wrap.call",\ + &type_num,&dims_capi,&intent,&arr_capi)) + return NULL; + rank = PySequence_Length(dims_capi); + dims = malloc(rank*sizeof(npy_intp)); + for (i=0;ikind, + PyArray_DESCR(arr)->type, + PyArray_TYPE(arr), + PyArray_ITEMSIZE(arr), + PyArray_DESCR(arr)->alignment, + PyArray_FLAGS(arr), + PyArray_ITEMSIZE(arr)); +} + +static PyMethodDef f2py_module_methods[] = { + + {"call",f2py_rout_wrap_call,METH_VARARGS,doc_f2py_rout_wrap_call}, + {"array_attrs",f2py_rout_wrap_attrs,METH_VARARGS,doc_f2py_rout_wrap_attrs}, + {NULL,NULL} +}; + +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "test_array_from_pyobj_ext", + NULL, + -1, + f2py_module_methods, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC PyInit_test_array_from_pyobj_ext(void) { + PyObject *m,*d, *s; + m = wrap_module = PyModule_Create(&moduledef); + Py_SET_TYPE(&PyFortran_Type, &PyType_Type); + import_array(); + if (PyErr_Occurred()) + Py_FatalError("can't initialize module wrap (failed to import numpy)"); + d = PyModule_GetDict(m); + s = PyUnicode_FromString("This module 'wrap' is auto-generated with f2py (version:2_1330).\nFunctions:\n" + " arr = call(type_num,dims,intent,obj)\n" + "."); + PyDict_SetItemString(d, "__doc__", s); + wrap_error = PyErr_NewException ("wrap.error", NULL, NULL); + Py_DECREF(s); + +#define ADDCONST(NAME, CONST) \ + s = PyLong_FromLong(CONST); \ + PyDict_SetItemString(d, NAME, s); \ + Py_DECREF(s) + + ADDCONST("F2PY_INTENT_IN", F2PY_INTENT_IN); + ADDCONST("F2PY_INTENT_INOUT", F2PY_INTENT_INOUT); + ADDCONST("F2PY_INTENT_OUT", F2PY_INTENT_OUT); + ADDCONST("F2PY_INTENT_HIDE", F2PY_INTENT_HIDE); + ADDCONST("F2PY_INTENT_CACHE", F2PY_INTENT_CACHE); + ADDCONST("F2PY_INTENT_COPY", F2PY_INTENT_COPY); + ADDCONST("F2PY_INTENT_C", F2PY_INTENT_C); + ADDCONST("F2PY_OPTIONAL", F2PY_OPTIONAL); + ADDCONST("F2PY_INTENT_INPLACE", F2PY_INTENT_INPLACE); + ADDCONST("NPY_BOOL", NPY_BOOL); + ADDCONST("NPY_BYTE", NPY_BYTE); + ADDCONST("NPY_UBYTE", NPY_UBYTE); + ADDCONST("NPY_SHORT", NPY_SHORT); + ADDCONST("NPY_USHORT", NPY_USHORT); + ADDCONST("NPY_INT", NPY_INT); + ADDCONST("NPY_UINT", NPY_UINT); + ADDCONST("NPY_INTP", NPY_INTP); + ADDCONST("NPY_UINTP", NPY_UINTP); + ADDCONST("NPY_LONG", NPY_LONG); + ADDCONST("NPY_ULONG", NPY_ULONG); + ADDCONST("NPY_LONGLONG", NPY_LONGLONG); + ADDCONST("NPY_ULONGLONG", NPY_ULONGLONG); + ADDCONST("NPY_FLOAT", NPY_FLOAT); + ADDCONST("NPY_DOUBLE", NPY_DOUBLE); + ADDCONST("NPY_LONGDOUBLE", NPY_LONGDOUBLE); + ADDCONST("NPY_CFLOAT", NPY_CFLOAT); + ADDCONST("NPY_CDOUBLE", NPY_CDOUBLE); + ADDCONST("NPY_CLONGDOUBLE", NPY_CLONGDOUBLE); + ADDCONST("NPY_OBJECT", NPY_OBJECT); + ADDCONST("NPY_STRING", NPY_STRING); + ADDCONST("NPY_UNICODE", NPY_UNICODE); + ADDCONST("NPY_VOID", NPY_VOID); + ADDCONST("NPY_NTYPES", NPY_NTYPES); + ADDCONST("NPY_NOTYPE", NPY_NOTYPE); + ADDCONST("NPY_USERDEF", NPY_USERDEF); + + ADDCONST("CONTIGUOUS", NPY_ARRAY_C_CONTIGUOUS); + ADDCONST("FORTRAN", NPY_ARRAY_F_CONTIGUOUS); + ADDCONST("OWNDATA", NPY_ARRAY_OWNDATA); + ADDCONST("FORCECAST", NPY_ARRAY_FORCECAST); + ADDCONST("ENSURECOPY", NPY_ARRAY_ENSURECOPY); + ADDCONST("ENSUREARRAY", NPY_ARRAY_ENSUREARRAY); + ADDCONST("ALIGNED", NPY_ARRAY_ALIGNED); + ADDCONST("WRITEABLE", NPY_ARRAY_WRITEABLE); + ADDCONST("UPDATEIFCOPY", NPY_ARRAY_UPDATEIFCOPY); + ADDCONST("WRITEBACKIFCOPY", NPY_ARRAY_WRITEBACKIFCOPY); + + ADDCONST("BEHAVED", NPY_ARRAY_BEHAVED); + ADDCONST("BEHAVED_NS", NPY_ARRAY_BEHAVED_NS); + ADDCONST("CARRAY", NPY_ARRAY_CARRAY); + ADDCONST("FARRAY", NPY_ARRAY_FARRAY); + ADDCONST("CARRAY_RO", NPY_ARRAY_CARRAY_RO); + ADDCONST("FARRAY_RO", NPY_ARRAY_FARRAY_RO); + ADDCONST("DEFAULT", NPY_ARRAY_DEFAULT); + ADDCONST("UPDATE_ALL", NPY_ARRAY_UPDATE_ALL); + +#undef ADDCONST( + + if (PyErr_Occurred()) + Py_FatalError("can't initialize module wrap"); + +#ifdef F2PY_REPORT_ATEXIT + on_exit(f2py_report_on_exit,(void*)"array_from_pyobj.wrap.call"); +#endif + + return m; +} +#ifdef __cplusplus +} +#endif diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/.f2py_f2cmap b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/.f2py_f2cmap new file mode 100644 index 0000000..2665f89 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/.f2py_f2cmap @@ -0,0 +1 @@ +dict(real=dict(rk="double")) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/foo_free.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/foo_free.f90 new file mode 100644 index 0000000..b301710 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/foo_free.f90 @@ -0,0 +1,34 @@ + +subroutine sum(x, res) + implicit none + real, intent(in) :: x(:) + real, intent(out) :: res + + integer :: i + + !print *, "sum: size(x) = ", size(x) + + res = 0.0 + + do i = 1, size(x) + res = res + x(i) + enddo + +end subroutine sum + +function fsum(x) result (res) + implicit none + real, intent(in) :: x(:) + real :: res + + integer :: i + + !print *, "fsum: size(x) = ", size(x) + + res = 0.0 + + do i = 1, size(x) + res = res + x(i) + enddo + +end function fsum diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/foo_mod.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/foo_mod.f90 new file mode 100644 index 0000000..cbe6317 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/foo_mod.f90 @@ -0,0 +1,41 @@ + +module mod + +contains + +subroutine sum(x, res) + implicit none + real, intent(in) :: x(:) + real, intent(out) :: res + + integer :: i + + !print *, "sum: size(x) = ", size(x) + + res = 0.0 + + do i = 1, size(x) + res = res + x(i) + enddo + +end subroutine sum + +function fsum(x) result (res) + implicit none + real, intent(in) :: x(:) + real :: res + + integer :: i + + !print *, "fsum: size(x) = ", size(x) + + res = 0.0 + + do i = 1, size(x) + res = res + x(i) + enddo + +end function fsum + + +end module mod diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/foo_use.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/foo_use.f90 new file mode 100644 index 0000000..337465a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/foo_use.f90 @@ -0,0 +1,19 @@ +subroutine sum_with_use(x, res) + use precision + + implicit none + + real(kind=rk), intent(in) :: x(:) + real(kind=rk), intent(out) :: res + + integer :: i + + !print *, "size(x) = ", size(x) + + res = 0.0 + + do i = 1, size(x) + res = res + x(i) + enddo + + end subroutine diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/precision.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/precision.f90 new file mode 100644 index 0000000..ed6c70c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/assumed_shape/precision.f90 @@ -0,0 +1,4 @@ +module precision + integer, parameter :: rk = selected_real_kind(8) + integer, parameter :: ik = selected_real_kind(4) +end module diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/common/block.f b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/common/block.f new file mode 100644 index 0000000..7ea7968 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/common/block.f @@ -0,0 +1,11 @@ + SUBROUTINE INITCB + DOUBLE PRECISION LONG + CHARACTER STRING + INTEGER OK + + COMMON /BLOCK/ LONG, STRING, OK + LONG = 1.0 + STRING = '2' + OK = 3 + RETURN + END diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/kind/foo.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/kind/foo.f90 new file mode 100644 index 0000000..d3d15cf --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/kind/foo.f90 @@ -0,0 +1,20 @@ + + +subroutine selectedrealkind(p, r, res) + implicit none + + integer, intent(in) :: p, r + !f2py integer :: r=0 + integer, intent(out) :: res + res = selected_real_kind(p, r) + +end subroutine + +subroutine selectedintkind(p, res) + implicit none + + integer, intent(in) :: p + integer, intent(out) :: res + res = selected_int_kind(p) + +end subroutine diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/mixed/foo.f b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/mixed/foo.f new file mode 100644 index 0000000..c347425 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/mixed/foo.f @@ -0,0 +1,5 @@ + subroutine bar11(a) +cf2py intent(out) a + integer a + a = 11 + end diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/mixed/foo_fixed.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/mixed/foo_fixed.f90 new file mode 100644 index 0000000..7543a6a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/mixed/foo_fixed.f90 @@ -0,0 +1,8 @@ + module foo_fixed + contains + subroutine bar12(a) +!f2py intent(out) a + integer a + a = 12 + end subroutine bar12 + end module foo_fixed diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/mixed/foo_free.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/mixed/foo_free.f90 new file mode 100644 index 0000000..c1b641f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/mixed/foo_free.f90 @@ -0,0 +1,8 @@ +module foo_free +contains + subroutine bar13(a) + !f2py intent(out) a + integer a + a = 13 + end subroutine bar13 +end module foo_free diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/module_data/mod.mod b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/module_data/mod.mod new file mode 100644 index 0000000..8670a97 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/module_data/mod.mod differ diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/module_data/module_data_docstring.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/module_data/module_data_docstring.f90 new file mode 100644 index 0000000..4505e0c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/module_data/module_data_docstring.f90 @@ -0,0 +1,12 @@ +module mod + integer :: i + integer :: x(4) + real, dimension(2,3) :: a + real, allocatable, dimension(:,:) :: b +contains + subroutine foo + integer :: k + k = 1 + a(1,2) = a(1,2)+3 + end subroutine foo +end module mod diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_both.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_both.f90 new file mode 100644 index 0000000..ac90ced --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_both.f90 @@ -0,0 +1,57 @@ +! Check that parameters are correct intercepted. +! Constants with comma separations are commonly +! used, for instance Pi = 3._dp +subroutine foo(x) + implicit none + integer, parameter :: sp = selected_real_kind(6) + integer, parameter :: dp = selected_real_kind(15) + integer, parameter :: ii = selected_int_kind(9) + integer, parameter :: il = selected_int_kind(18) + real(dp), intent(inout) :: x + dimension x(3) + real(sp), parameter :: three_s = 3._sp + real(dp), parameter :: three_d = 3._dp + integer(ii), parameter :: three_i = 3_ii + integer(il), parameter :: three_l = 3_il + x(1) = x(1) + x(2) * three_s * three_i + x(3) * three_d * three_l + x(2) = x(2) * three_s + x(3) = x(3) * three_l + return +end subroutine + + +subroutine foo_no(x) + implicit none + integer, parameter :: sp = selected_real_kind(6) + integer, parameter :: dp = selected_real_kind(15) + integer, parameter :: ii = selected_int_kind(9) + integer, parameter :: il = selected_int_kind(18) + real(dp), intent(inout) :: x + dimension x(3) + real(sp), parameter :: three_s = 3. + real(dp), parameter :: three_d = 3. + integer(ii), parameter :: three_i = 3 + integer(il), parameter :: three_l = 3 + x(1) = x(1) + x(2) * three_s * three_i + x(3) * three_d * three_l + x(2) = x(2) * three_s + x(3) = x(3) * three_l + return +end subroutine + +subroutine foo_sum(x) + implicit none + integer, parameter :: sp = selected_real_kind(6) + integer, parameter :: dp = selected_real_kind(15) + integer, parameter :: ii = selected_int_kind(9) + integer, parameter :: il = selected_int_kind(18) + real(dp), intent(inout) :: x + dimension x(3) + real(sp), parameter :: three_s = 2._sp + 1._sp + real(dp), parameter :: three_d = 1._dp + 2._dp + integer(ii), parameter :: three_i = 2_ii + 1_ii + integer(il), parameter :: three_l = 1_il + 2_il + x(1) = x(1) + x(2) * three_s * three_i + x(3) * three_d * three_l + x(2) = x(2) * three_s + x(3) = x(3) * three_l + return +end subroutine diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_compound.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_compound.f90 new file mode 100644 index 0000000..e51f5e9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_compound.f90 @@ -0,0 +1,15 @@ +! Check that parameters are correct intercepted. +! Constants with comma separations are commonly +! used, for instance Pi = 3._dp +subroutine foo_compound_int(x) + implicit none + integer, parameter :: ii = selected_int_kind(9) + integer(ii), intent(inout) :: x + dimension x(3) + integer(ii), parameter :: three = 3_ii + integer(ii), parameter :: two = 2_ii + integer(ii), parameter :: six = three * 1_ii * two + + x(1) = x(1) + x(2) + x(3) * six + return +end subroutine diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_integer.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_integer.f90 new file mode 100644 index 0000000..aaa83d2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_integer.f90 @@ -0,0 +1,22 @@ +! Check that parameters are correct intercepted. +! Constants with comma separations are commonly +! used, for instance Pi = 3._dp +subroutine foo_int(x) + implicit none + integer, parameter :: ii = selected_int_kind(9) + integer(ii), intent(inout) :: x + dimension x(3) + integer(ii), parameter :: three = 3_ii + x(1) = x(1) + x(2) + x(3) * three + return +end subroutine + +subroutine foo_long(x) + implicit none + integer, parameter :: ii = selected_int_kind(18) + integer(ii), intent(inout) :: x + dimension x(3) + integer(ii), parameter :: three = 3_ii + x(1) = x(1) + x(2) + x(3) * three + return +end subroutine diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_non_compound.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_non_compound.f90 new file mode 100644 index 0000000..62c9a5b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_non_compound.f90 @@ -0,0 +1,23 @@ +! Check that parameters are correct intercepted. +! Specifically that types of constants without +! compound kind specs are correctly inferred +! adapted Gibbs iteration code from pymc +! for this test case +subroutine foo_non_compound_int(x) + implicit none + integer, parameter :: ii = selected_int_kind(9) + + integer(ii) maxiterates + parameter (maxiterates=2) + + integer(ii) maxseries + parameter (maxseries=2) + + integer(ii) wasize + parameter (wasize=maxiterates*maxseries) + integer(ii), intent(inout) :: x + dimension x(wasize) + + x(1) = x(1) + x(2) + x(3) + x(4) * wasize + return +end subroutine diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_real.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_real.f90 new file mode 100644 index 0000000..02ac9dd --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/parameter/constant_real.f90 @@ -0,0 +1,23 @@ +! Check that parameters are correct intercepted. +! Constants with comma separations are commonly +! used, for instance Pi = 3._dp +subroutine foo_single(x) + implicit none + integer, parameter :: rp = selected_real_kind(6) + real(rp), intent(inout) :: x + dimension x(3) + real(rp), parameter :: three = 3._rp + x(1) = x(1) + x(2) + x(3) * three + return +end subroutine + +subroutine foo_double(x) + implicit none + integer, parameter :: rp = selected_real_kind(15) + real(rp), intent(inout) :: x + dimension x(3) + real(rp), parameter :: three = 3._rp + x(1) = x(1) + x(2) + x(3) * three + return +end subroutine + diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/regression/inout.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/regression/inout.f90 new file mode 100644 index 0000000..80cdad9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/regression/inout.f90 @@ -0,0 +1,9 @@ +! Check that intent(in out) translates as intent(inout). +! The separation seems to be a common usage. + subroutine foo(x) + implicit none + real(4), intent(in out) :: x + dimension x(3) + x(1) = x(1) + x(2) + x(3) + return + end diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/size/foo.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/size/foo.f90 new file mode 100644 index 0000000..5b66f8c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/size/foo.f90 @@ -0,0 +1,44 @@ + +subroutine foo(a, n, m, b) + implicit none + + real, intent(in) :: a(n, m) + integer, intent(in) :: n, m + real, intent(out) :: b(size(a, 1)) + + integer :: i + + do i = 1, size(b) + b(i) = sum(a(i,:)) + enddo +end subroutine + +subroutine trans(x,y) + implicit none + real, intent(in), dimension(:,:) :: x + real, intent(out), dimension( size(x,2), size(x,1) ) :: y + integer :: N, M, i, j + N = size(x,1) + M = size(x,2) + DO i=1,N + do j=1,M + y(j,i) = x(i,j) + END DO + END DO +end subroutine trans + +subroutine flatten(x,y) + implicit none + real, intent(in), dimension(:,:) :: x + real, intent(out), dimension( size(x) ) :: y + integer :: N, M, i, j, k + N = size(x,1) + M = size(x,2) + k = 1 + DO i=1,N + do j=1,M + y(k) = x(i,j) + k = k + 1 + END DO + END DO +end subroutine flatten diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/string/char.f90 b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/string/char.f90 new file mode 100644 index 0000000..bb7985c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/src/string/char.f90 @@ -0,0 +1,29 @@ +MODULE char_test + +CONTAINS + +SUBROUTINE change_strings(strings, n_strs, out_strings) + IMPLICIT NONE + + ! Inputs + INTEGER, INTENT(IN) :: n_strs + CHARACTER, INTENT(IN), DIMENSION(2,n_strs) :: strings + CHARACTER, INTENT(OUT), DIMENSION(2,n_strs) :: out_strings + +!f2py INTEGER, INTENT(IN) :: n_strs +!f2py CHARACTER, INTENT(IN), DIMENSION(2,n_strs) :: strings +!f2py CHARACTER, INTENT(OUT), DIMENSION(2,n_strs) :: strings + + ! Misc. + INTEGER*4 :: j + + + DO j=1, n_strs + out_strings(1,j) = strings(1,j) + out_strings(2,j) = 'A' + END DO + +END SUBROUTINE change_strings + +END MODULE char_test + diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_abstract_interface.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_abstract_interface.py new file mode 100644 index 0000000..936c1f7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_abstract_interface.py @@ -0,0 +1,66 @@ +import textwrap +from . import util +from numpy.f2py import crackfortran + + +class TestAbstractInterface(util.F2PyTest): + suffix = '.f90' + + skip = ['add1', 'add2'] + + code = textwrap.dedent(""" + module ops_module + + abstract interface + subroutine op(x, y, z) + integer, intent(in) :: x, y + integer, intent(out) :: z + end subroutine + end interface + + contains + + subroutine foo(x, y, r1, r2) + integer, intent(in) :: x, y + integer, intent(out) :: r1, r2 + procedure (op) add1, add2 + procedure (op), pointer::p + p=>add1 + call p(x, y, r1) + p=>add2 + call p(x, y, r2) + end subroutine + end module + + subroutine add1(x, y, z) + integer, intent(in) :: x, y + integer, intent(out) :: z + z = x + y + end subroutine + + subroutine add2(x, y, z) + integer, intent(in) :: x, y + integer, intent(out) :: z + z = x + 2 * y + end subroutine + """) + + def test_abstract_interface(self): + assert self.module.ops_module.foo(3, 5) == (8, 13) + + def test_parse_abstract_interface(self, tmp_path): + # Test gh18403 + f_path = tmp_path / "gh18403_mod.f90" + with f_path.open('w') as ff: + ff.write(textwrap.dedent("""\ + module test + abstract interface + subroutine foo() + end subroutine + end interface + end module test + """)) + mod = crackfortran.crackfortran([str(f_path)]) + assert len(mod) == 1 + assert len(mod[0]['body']) == 1 + assert mod[0]['body'][0]['block'] == 'abstract interface' diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_array_from_pyobj.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_array_from_pyobj.py new file mode 100644 index 0000000..649fd1c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_array_from_pyobj.py @@ -0,0 +1,596 @@ +import os +import sys +import copy +import platform +import pytest + +import numpy as np + +from numpy.testing import assert_, assert_equal +from numpy.core.multiarray import typeinfo +from . import util + +wrap = None + + +def setup_module(): + """ + Build the required testing extension module + + """ + global wrap + + # Check compiler availability first + if not util.has_c_compiler(): + pytest.skip("No C compiler available") + + if wrap is None: + config_code = """ + config.add_extension('test_array_from_pyobj_ext', + sources=['wrapmodule.c', 'fortranobject.c'], + define_macros=[]) + """ + d = os.path.dirname(__file__) + src = [os.path.join(d, 'src', 'array_from_pyobj', 'wrapmodule.c'), + os.path.join(d, '..', 'src', 'fortranobject.c'), + os.path.join(d, '..', 'src', 'fortranobject.h')] + wrap = util.build_module_distutils(src, config_code, + 'test_array_from_pyobj_ext') + + +def flags_info(arr): + flags = wrap.array_attrs(arr)[6] + return flags2names(flags) + + +def flags2names(flags): + info = [] + for flagname in ['CONTIGUOUS', 'FORTRAN', 'OWNDATA', 'ENSURECOPY', + 'ENSUREARRAY', 'ALIGNED', 'NOTSWAPPED', 'WRITEABLE', + 'WRITEBACKIFCOPY', 'UPDATEIFCOPY', 'BEHAVED', 'BEHAVED_RO', + 'CARRAY', 'FARRAY' + ]: + if abs(flags) & getattr(wrap, flagname, 0): + info.append(flagname) + return info + + +class Intent: + + def __init__(self, intent_list=[]): + self.intent_list = intent_list[:] + flags = 0 + for i in intent_list: + if i == 'optional': + flags |= wrap.F2PY_OPTIONAL + else: + flags |= getattr(wrap, 'F2PY_INTENT_' + i.upper()) + self.flags = flags + + def __getattr__(self, name): + name = name.lower() + if name == 'in_': + name = 'in' + return self.__class__(self.intent_list + [name]) + + def __str__(self): + return 'intent(%s)' % (','.join(self.intent_list)) + + def __repr__(self): + return 'Intent(%r)' % (self.intent_list) + + def is_intent(self, *names): + for name in names: + if name not in self.intent_list: + return False + return True + + def is_intent_exact(self, *names): + return len(self.intent_list) == len(names) and self.is_intent(*names) + +intent = Intent() + +_type_names = ['BOOL', 'BYTE', 'UBYTE', 'SHORT', 'USHORT', 'INT', 'UINT', + 'LONG', 'ULONG', 'LONGLONG', 'ULONGLONG', + 'FLOAT', 'DOUBLE', 'CFLOAT'] + +_cast_dict = {'BOOL': ['BOOL']} +_cast_dict['BYTE'] = _cast_dict['BOOL'] + ['BYTE'] +_cast_dict['UBYTE'] = _cast_dict['BOOL'] + ['UBYTE'] +_cast_dict['BYTE'] = ['BYTE'] +_cast_dict['UBYTE'] = ['UBYTE'] +_cast_dict['SHORT'] = _cast_dict['BYTE'] + ['UBYTE', 'SHORT'] +_cast_dict['USHORT'] = _cast_dict['UBYTE'] + ['BYTE', 'USHORT'] +_cast_dict['INT'] = _cast_dict['SHORT'] + ['USHORT', 'INT'] +_cast_dict['UINT'] = _cast_dict['USHORT'] + ['SHORT', 'UINT'] + +_cast_dict['LONG'] = _cast_dict['INT'] + ['LONG'] +_cast_dict['ULONG'] = _cast_dict['UINT'] + ['ULONG'] + +_cast_dict['LONGLONG'] = _cast_dict['LONG'] + ['LONGLONG'] +_cast_dict['ULONGLONG'] = _cast_dict['ULONG'] + ['ULONGLONG'] + +_cast_dict['FLOAT'] = _cast_dict['SHORT'] + ['USHORT', 'FLOAT'] +_cast_dict['DOUBLE'] = _cast_dict['INT'] + ['UINT', 'FLOAT', 'DOUBLE'] + +_cast_dict['CFLOAT'] = _cast_dict['FLOAT'] + ['CFLOAT'] + +# 32 bit system malloc typically does not provide the alignment required by +# 16 byte long double types this means the inout intent cannot be satisfied +# and several tests fail as the alignment flag can be randomly true or fals +# when numpy gains an aligned allocator the tests could be enabled again +# +# Furthermore, on macOS ARM64, LONGDOUBLE is an alias for DOUBLE. +if ((np.intp().dtype.itemsize != 4 or np.clongdouble().dtype.alignment <= 8) and + sys.platform != 'win32' and + (platform.system(), platform.processor()) != ('Darwin', 'arm')): + _type_names.extend(['LONGDOUBLE', 'CDOUBLE', 'CLONGDOUBLE']) + _cast_dict['LONGDOUBLE'] = _cast_dict['LONG'] + \ + ['ULONG', 'FLOAT', 'DOUBLE', 'LONGDOUBLE'] + _cast_dict['CLONGDOUBLE'] = _cast_dict['LONGDOUBLE'] + \ + ['CFLOAT', 'CDOUBLE', 'CLONGDOUBLE'] + _cast_dict['CDOUBLE'] = _cast_dict['DOUBLE'] + ['CFLOAT', 'CDOUBLE'] + + +class Type: + _type_cache = {} + + def __new__(cls, name): + if isinstance(name, np.dtype): + dtype0 = name + name = None + for n, i in typeinfo.items(): + if not isinstance(i, type) and dtype0.type is i.type: + name = n + break + obj = cls._type_cache.get(name.upper(), None) + if obj is not None: + return obj + obj = object.__new__(cls) + obj._init(name) + cls._type_cache[name.upper()] = obj + return obj + + def _init(self, name): + self.NAME = name.upper() + info = typeinfo[self.NAME] + self.type_num = getattr(wrap, 'NPY_' + self.NAME) + assert_equal(self.type_num, info.num) + self.dtype = np.dtype(info.type) + self.type = info.type + self.elsize = info.bits / 8 + self.dtypechar = info.char + + def cast_types(self): + return [self.__class__(_m) for _m in _cast_dict[self.NAME]] + + def all_types(self): + return [self.__class__(_m) for _m in _type_names] + + def smaller_types(self): + bits = typeinfo[self.NAME].alignment + types = [] + for name in _type_names: + if typeinfo[name].alignment < bits: + types.append(Type(name)) + return types + + def equal_types(self): + bits = typeinfo[self.NAME].alignment + types = [] + for name in _type_names: + if name == self.NAME: + continue + if typeinfo[name].alignment == bits: + types.append(Type(name)) + return types + + def larger_types(self): + bits = typeinfo[self.NAME].alignment + types = [] + for name in _type_names: + if typeinfo[name].alignment > bits: + types.append(Type(name)) + return types + + +class Array: + + def __init__(self, typ, dims, intent, obj): + self.type = typ + self.dims = dims + self.intent = intent + self.obj_copy = copy.deepcopy(obj) + self.obj = obj + + # arr.dtypechar may be different from typ.dtypechar + self.arr = wrap.call(typ.type_num, dims, intent.flags, obj) + + assert_(isinstance(self.arr, np.ndarray), repr(type(self.arr))) + + self.arr_attr = wrap.array_attrs(self.arr) + + if len(dims) > 1: + if self.intent.is_intent('c'): + assert_(intent.flags & wrap.F2PY_INTENT_C) + assert_(not self.arr.flags['FORTRAN'], + repr((self.arr.flags, getattr(obj, 'flags', None)))) + assert_(self.arr.flags['CONTIGUOUS']) + assert_(not self.arr_attr[6] & wrap.FORTRAN) + else: + assert_(not intent.flags & wrap.F2PY_INTENT_C) + assert_(self.arr.flags['FORTRAN']) + assert_(not self.arr.flags['CONTIGUOUS']) + assert_(self.arr_attr[6] & wrap.FORTRAN) + + if obj is None: + self.pyarr = None + self.pyarr_attr = None + return + + if intent.is_intent('cache'): + assert_(isinstance(obj, np.ndarray), repr(type(obj))) + self.pyarr = np.array(obj).reshape(*dims).copy() + else: + self.pyarr = np.array( + np.array(obj, dtype=typ.dtypechar).reshape(*dims), + order=self.intent.is_intent('c') and 'C' or 'F') + assert_(self.pyarr.dtype == typ, + repr((self.pyarr.dtype, typ))) + self.pyarr.setflags(write=self.arr.flags['WRITEABLE']) + assert_(self.pyarr.flags['OWNDATA'], (obj, intent)) + self.pyarr_attr = wrap.array_attrs(self.pyarr) + + if len(dims) > 1: + if self.intent.is_intent('c'): + assert_(not self.pyarr.flags['FORTRAN']) + assert_(self.pyarr.flags['CONTIGUOUS']) + assert_(not self.pyarr_attr[6] & wrap.FORTRAN) + else: + assert_(self.pyarr.flags['FORTRAN']) + assert_(not self.pyarr.flags['CONTIGUOUS']) + assert_(self.pyarr_attr[6] & wrap.FORTRAN) + + assert_(self.arr_attr[1] == self.pyarr_attr[1]) # nd + assert_(self.arr_attr[2] == self.pyarr_attr[2]) # dimensions + if self.arr_attr[1] <= 1: + assert_(self.arr_attr[3] == self.pyarr_attr[3], + repr((self.arr_attr[3], self.pyarr_attr[3], + self.arr.tobytes(), self.pyarr.tobytes()))) # strides + assert_(self.arr_attr[5][-2:] == self.pyarr_attr[5][-2:], + repr((self.arr_attr[5], self.pyarr_attr[5]))) # descr + assert_(self.arr_attr[6] == self.pyarr_attr[6], + repr((self.arr_attr[6], self.pyarr_attr[6], + flags2names(0 * self.arr_attr[6] - self.pyarr_attr[6]), + flags2names(self.arr_attr[6]), intent))) # flags + + if intent.is_intent('cache'): + assert_(self.arr_attr[5][3] >= self.type.elsize, + repr((self.arr_attr[5][3], self.type.elsize))) + else: + assert_(self.arr_attr[5][3] == self.type.elsize, + repr((self.arr_attr[5][3], self.type.elsize))) + assert_(self.arr_equal(self.pyarr, self.arr)) + + if isinstance(self.obj, np.ndarray): + if typ.elsize == Type(obj.dtype).elsize: + if not intent.is_intent('copy') and self.arr_attr[1] <= 1: + assert_(self.has_shared_memory()) + + def arr_equal(self, arr1, arr2): + if arr1.shape != arr2.shape: + return False + return (arr1 == arr2).all() + + def __str__(self): + return str(self.arr) + + def has_shared_memory(self): + """Check that created array shares data with input array. + """ + if self.obj is self.arr: + return True + if not isinstance(self.obj, np.ndarray): + return False + obj_attr = wrap.array_attrs(self.obj) + return obj_attr[0] == self.arr_attr[0] + + +class TestIntent: + + def test_in_out(self): + assert_equal(str(intent.in_.out), 'intent(in,out)') + assert_(intent.in_.c.is_intent('c')) + assert_(not intent.in_.c.is_intent_exact('c')) + assert_(intent.in_.c.is_intent_exact('c', 'in')) + assert_(intent.in_.c.is_intent_exact('in', 'c')) + assert_(not intent.in_.is_intent('c')) + + +class TestSharedMemory: + num2seq = [1, 2] + num23seq = [[1, 2, 3], [4, 5, 6]] + + @pytest.fixture(autouse=True, scope='class', params=_type_names) + def setup_type(self, request): + request.cls.type = Type(request.param) + request.cls.array = lambda self, dims, intent, obj: \ + Array(Type(request.param), dims, intent, obj) + + def test_in_from_2seq(self): + a = self.array([2], intent.in_, self.num2seq) + assert_(not a.has_shared_memory()) + + def test_in_from_2casttype(self): + for t in self.type.cast_types(): + obj = np.array(self.num2seq, dtype=t.dtype) + a = self.array([len(self.num2seq)], intent.in_, obj) + if t.elsize == self.type.elsize: + assert_( + a.has_shared_memory(), repr((self.type.dtype, t.dtype))) + else: + assert_(not a.has_shared_memory(), repr(t.dtype)) + + @pytest.mark.parametrize('write', ['w', 'ro']) + @pytest.mark.parametrize('order', ['C', 'F']) + @pytest.mark.parametrize('inp', ['2seq', '23seq']) + def test_in_nocopy(self, write, order, inp): + """Test if intent(in) array can be passed without copies + """ + seq = getattr(self, 'num' + inp) + obj = np.array(seq, dtype=self.type.dtype, order=order) + obj.setflags(write=(write == 'w')) + a = self.array(obj.shape, ((order=='C' and intent.in_.c) or intent.in_), obj) + assert a.has_shared_memory() + + def test_inout_2seq(self): + obj = np.array(self.num2seq, dtype=self.type.dtype) + a = self.array([len(self.num2seq)], intent.inout, obj) + assert_(a.has_shared_memory()) + + try: + a = self.array([2], intent.in_.inout, self.num2seq) + except TypeError as msg: + if not str(msg).startswith('failed to initialize intent' + '(inout|inplace|cache) array'): + raise + else: + raise SystemError('intent(inout) should have failed on sequence') + + def test_f_inout_23seq(self): + obj = np.array(self.num23seq, dtype=self.type.dtype, order='F') + shape = (len(self.num23seq), len(self.num23seq[0])) + a = self.array(shape, intent.in_.inout, obj) + assert_(a.has_shared_memory()) + + obj = np.array(self.num23seq, dtype=self.type.dtype, order='C') + shape = (len(self.num23seq), len(self.num23seq[0])) + try: + a = self.array(shape, intent.in_.inout, obj) + except ValueError as msg: + if not str(msg).startswith('failed to initialize intent' + '(inout) array'): + raise + else: + raise SystemError( + 'intent(inout) should have failed on improper array') + + def test_c_inout_23seq(self): + obj = np.array(self.num23seq, dtype=self.type.dtype) + shape = (len(self.num23seq), len(self.num23seq[0])) + a = self.array(shape, intent.in_.c.inout, obj) + assert_(a.has_shared_memory()) + + def test_in_copy_from_2casttype(self): + for t in self.type.cast_types(): + obj = np.array(self.num2seq, dtype=t.dtype) + a = self.array([len(self.num2seq)], intent.in_.copy, obj) + assert_(not a.has_shared_memory(), repr(t.dtype)) + + def test_c_in_from_23seq(self): + a = self.array([len(self.num23seq), len(self.num23seq[0])], + intent.in_, self.num23seq) + assert_(not a.has_shared_memory()) + + def test_in_from_23casttype(self): + for t in self.type.cast_types(): + obj = np.array(self.num23seq, dtype=t.dtype) + a = self.array([len(self.num23seq), len(self.num23seq[0])], + intent.in_, obj) + assert_(not a.has_shared_memory(), repr(t.dtype)) + + def test_f_in_from_23casttype(self): + for t in self.type.cast_types(): + obj = np.array(self.num23seq, dtype=t.dtype, order='F') + a = self.array([len(self.num23seq), len(self.num23seq[0])], + intent.in_, obj) + if t.elsize == self.type.elsize: + assert_(a.has_shared_memory(), repr(t.dtype)) + else: + assert_(not a.has_shared_memory(), repr(t.dtype)) + + def test_c_in_from_23casttype(self): + for t in self.type.cast_types(): + obj = np.array(self.num23seq, dtype=t.dtype) + a = self.array([len(self.num23seq), len(self.num23seq[0])], + intent.in_.c, obj) + if t.elsize == self.type.elsize: + assert_(a.has_shared_memory(), repr(t.dtype)) + else: + assert_(not a.has_shared_memory(), repr(t.dtype)) + + def test_f_copy_in_from_23casttype(self): + for t in self.type.cast_types(): + obj = np.array(self.num23seq, dtype=t.dtype, order='F') + a = self.array([len(self.num23seq), len(self.num23seq[0])], + intent.in_.copy, obj) + assert_(not a.has_shared_memory(), repr(t.dtype)) + + def test_c_copy_in_from_23casttype(self): + for t in self.type.cast_types(): + obj = np.array(self.num23seq, dtype=t.dtype) + a = self.array([len(self.num23seq), len(self.num23seq[0])], + intent.in_.c.copy, obj) + assert_(not a.has_shared_memory(), repr(t.dtype)) + + def test_in_cache_from_2casttype(self): + for t in self.type.all_types(): + if t.elsize != self.type.elsize: + continue + obj = np.array(self.num2seq, dtype=t.dtype) + shape = (len(self.num2seq),) + a = self.array(shape, intent.in_.c.cache, obj) + assert_(a.has_shared_memory(), repr(t.dtype)) + + a = self.array(shape, intent.in_.cache, obj) + assert_(a.has_shared_memory(), repr(t.dtype)) + + obj = np.array(self.num2seq, dtype=t.dtype, order='F') + a = self.array(shape, intent.in_.c.cache, obj) + assert_(a.has_shared_memory(), repr(t.dtype)) + + a = self.array(shape, intent.in_.cache, obj) + assert_(a.has_shared_memory(), repr(t.dtype)) + + try: + a = self.array(shape, intent.in_.cache, obj[::-1]) + except ValueError as msg: + if not str(msg).startswith('failed to initialize' + ' intent(cache) array'): + raise + else: + raise SystemError( + 'intent(cache) should have failed on multisegmented array') + + def test_in_cache_from_2casttype_failure(self): + for t in self.type.all_types(): + if t.elsize >= self.type.elsize: + continue + obj = np.array(self.num2seq, dtype=t.dtype) + shape = (len(self.num2seq),) + try: + self.array(shape, intent.in_.cache, obj) # Should succeed + except ValueError as msg: + if not str(msg).startswith('failed to initialize' + ' intent(cache) array'): + raise + else: + raise SystemError( + 'intent(cache) should have failed on smaller array') + + def test_cache_hidden(self): + shape = (2,) + a = self.array(shape, intent.cache.hide, None) + assert_(a.arr.shape == shape) + + shape = (2, 3) + a = self.array(shape, intent.cache.hide, None) + assert_(a.arr.shape == shape) + + shape = (-1, 3) + try: + a = self.array(shape, intent.cache.hide, None) + except ValueError as msg: + if not str(msg).startswith('failed to create intent' + '(cache|hide)|optional array'): + raise + else: + raise SystemError( + 'intent(cache) should have failed on undefined dimensions') + + def test_hidden(self): + shape = (2,) + a = self.array(shape, intent.hide, None) + assert_(a.arr.shape == shape) + assert_(a.arr_equal(a.arr, np.zeros(shape, dtype=self.type.dtype))) + + shape = (2, 3) + a = self.array(shape, intent.hide, None) + assert_(a.arr.shape == shape) + assert_(a.arr_equal(a.arr, np.zeros(shape, dtype=self.type.dtype))) + assert_(a.arr.flags['FORTRAN'] and not a.arr.flags['CONTIGUOUS']) + + shape = (2, 3) + a = self.array(shape, intent.c.hide, None) + assert_(a.arr.shape == shape) + assert_(a.arr_equal(a.arr, np.zeros(shape, dtype=self.type.dtype))) + assert_(not a.arr.flags['FORTRAN'] and a.arr.flags['CONTIGUOUS']) + + shape = (-1, 3) + try: + a = self.array(shape, intent.hide, None) + except ValueError as msg: + if not str(msg).startswith('failed to create intent' + '(cache|hide)|optional array'): + raise + else: + raise SystemError('intent(hide) should have failed' + ' on undefined dimensions') + + def test_optional_none(self): + shape = (2,) + a = self.array(shape, intent.optional, None) + assert_(a.arr.shape == shape) + assert_(a.arr_equal(a.arr, np.zeros(shape, dtype=self.type.dtype))) + + shape = (2, 3) + a = self.array(shape, intent.optional, None) + assert_(a.arr.shape == shape) + assert_(a.arr_equal(a.arr, np.zeros(shape, dtype=self.type.dtype))) + assert_(a.arr.flags['FORTRAN'] and not a.arr.flags['CONTIGUOUS']) + + shape = (2, 3) + a = self.array(shape, intent.c.optional, None) + assert_(a.arr.shape == shape) + assert_(a.arr_equal(a.arr, np.zeros(shape, dtype=self.type.dtype))) + assert_(not a.arr.flags['FORTRAN'] and a.arr.flags['CONTIGUOUS']) + + def test_optional_from_2seq(self): + obj = self.num2seq + shape = (len(obj),) + a = self.array(shape, intent.optional, obj) + assert_(a.arr.shape == shape) + assert_(not a.has_shared_memory()) + + def test_optional_from_23seq(self): + obj = self.num23seq + shape = (len(obj), len(obj[0])) + a = self.array(shape, intent.optional, obj) + assert_(a.arr.shape == shape) + assert_(not a.has_shared_memory()) + + a = self.array(shape, intent.optional.c, obj) + assert_(a.arr.shape == shape) + assert_(not a.has_shared_memory()) + + def test_inplace(self): + obj = np.array(self.num23seq, dtype=self.type.dtype) + assert_(not obj.flags['FORTRAN'] and obj.flags['CONTIGUOUS']) + shape = obj.shape + a = self.array(shape, intent.inplace, obj) + assert_(obj[1][2] == a.arr[1][2], repr((obj, a.arr))) + a.arr[1][2] = 54 + assert_(obj[1][2] == a.arr[1][2] == + np.array(54, dtype=self.type.dtype), repr((obj, a.arr))) + assert_(a.arr is obj) + assert_(obj.flags['FORTRAN']) # obj attributes are changed inplace! + assert_(not obj.flags['CONTIGUOUS']) + + def test_inplace_from_casttype(self): + for t in self.type.cast_types(): + if t is self.type: + continue + obj = np.array(self.num23seq, dtype=t.dtype) + assert_(obj.dtype.type == t.type) + assert_(obj.dtype.type is not self.type.type) + assert_(not obj.flags['FORTRAN'] and obj.flags['CONTIGUOUS']) + shape = obj.shape + a = self.array(shape, intent.inplace, obj) + assert_(obj[1][2] == a.arr[1][2], repr((obj, a.arr))) + a.arr[1][2] = 54 + assert_(obj[1][2] == a.arr[1][2] == + np.array(54, dtype=self.type.dtype), repr((obj, a.arr))) + assert_(a.arr is obj) + assert_(obj.flags['FORTRAN']) # obj attributes changed inplace! + assert_(not obj.flags['CONTIGUOUS']) + assert_(obj.dtype.type is self.type.type) # obj changed inplace! diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_assumed_shape.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_assumed_shape.py new file mode 100644 index 0000000..79e3ad1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_assumed_shape.py @@ -0,0 +1,53 @@ +import os +import pytest +import tempfile + +from numpy.testing import assert_ +from . import util + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestAssumedShapeSumExample(util.F2PyTest): + sources = [_path('src', 'assumed_shape', 'foo_free.f90'), + _path('src', 'assumed_shape', 'foo_use.f90'), + _path('src', 'assumed_shape', 'precision.f90'), + _path('src', 'assumed_shape', 'foo_mod.f90'), + _path('src', 'assumed_shape', '.f2py_f2cmap'), + ] + + @pytest.mark.slow + def test_all(self): + r = self.module.fsum([1, 2]) + assert_(r == 3, repr(r)) + r = self.module.sum([1, 2]) + assert_(r == 3, repr(r)) + r = self.module.sum_with_use([1, 2]) + assert_(r == 3, repr(r)) + + r = self.module.mod.sum([1, 2]) + assert_(r == 3, repr(r)) + r = self.module.mod.fsum([1, 2]) + assert_(r == 3, repr(r)) + + +class TestF2cmapOption(TestAssumedShapeSumExample): + def setup(self): + # Use a custom file name for .f2py_f2cmap + self.sources = list(self.sources) + f2cmap_src = self.sources.pop(-1) + + self.f2cmap_file = tempfile.NamedTemporaryFile(delete=False) + with open(f2cmap_src, 'rb') as f: + self.f2cmap_file.write(f.read()) + self.f2cmap_file.close() + + self.sources.append(self.f2cmap_file.name) + self.options = ["--f2cmap", self.f2cmap_file.name] + + super().setup() + + def teardown(self): + os.unlink(self.f2cmap_file.name) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_block_docstring.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_block_docstring.py new file mode 100644 index 0000000..7d72516 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_block_docstring.py @@ -0,0 +1,23 @@ +import sys +import pytest +from . import util + +from numpy.testing import assert_equal, IS_PYPY + +class TestBlockDocString(util.F2PyTest): + code = """ + SUBROUTINE FOO() + INTEGER BAR(2, 3) + + COMMON /BLOCK/ BAR + RETURN + END + """ + + @pytest.mark.skipif(sys.platform=='win32', + reason='Fails with MinGW64 Gfortran (Issue #9673)') + @pytest.mark.xfail(IS_PYPY, + reason="PyPy cannot modify tp_doc after PyType_Ready") + def test_block_docstring(self): + expected = "bar : 'i'-array(2,3)\n" + assert_equal(self.module.block.__doc__, expected) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_callback.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_callback.py new file mode 100644 index 0000000..5d2aab9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_callback.py @@ -0,0 +1,325 @@ +import math +import textwrap +import sys +import pytest +import threading +import traceback +import time + +import numpy as np +from numpy.testing import assert_, assert_equal, IS_PYPY +from . import util + + +class TestF77Callback(util.F2PyTest): + code = """ + subroutine t(fun,a) + integer a +cf2py intent(out) a + external fun + call fun(a) + end + + subroutine func(a) +cf2py intent(in,out) a + integer a + a = a + 11 + end + + subroutine func0(a) +cf2py intent(out) a + integer a + a = 11 + end + + subroutine t2(a) +cf2py intent(callback) fun + integer a +cf2py intent(out) a + external fun + call fun(a) + end + + subroutine string_callback(callback, a) + external callback + double precision callback + double precision a + character*1 r +cf2py intent(out) a + r = 'r' + a = callback(r) + end + + subroutine string_callback_array(callback, cu, lencu, a) + external callback + integer callback + integer lencu + character*8 cu(lencu) + integer a +cf2py intent(out) a + + a = callback(cu, lencu) + end + + subroutine hidden_callback(a, r) + external global_f +cf2py intent(callback, hide) global_f + integer a, r, global_f +cf2py intent(out) r + r = global_f(a) + end + + subroutine hidden_callback2(a, r) + external global_f + integer a, r, global_f +cf2py intent(out) r + r = global_f(a) + end + """ + + @pytest.mark.parametrize('name', 't,t2'.split(',')) + def test_all(self, name): + self.check_function(name) + + @pytest.mark.xfail(IS_PYPY, + reason="PyPy cannot modify tp_doc after PyType_Ready") + def test_docstring(self): + expected = textwrap.dedent("""\ + a = t(fun,[fun_extra_args]) + + Wrapper for ``t``. + + Parameters + ---------- + fun : call-back function + + Other Parameters + ---------------- + fun_extra_args : input tuple, optional + Default: () + + Returns + ------- + a : int + + Notes + ----- + Call-back functions:: + + def fun(): return a + Return objects: + a : int + """) + assert_equal(self.module.t.__doc__, expected) + + def check_function(self, name): + t = getattr(self.module, name) + r = t(lambda: 4) + assert_(r == 4, repr(r)) + r = t(lambda a: 5, fun_extra_args=(6,)) + assert_(r == 5, repr(r)) + r = t(lambda a: a, fun_extra_args=(6,)) + assert_(r == 6, repr(r)) + r = t(lambda a: 5 + a, fun_extra_args=(7,)) + assert_(r == 12, repr(r)) + r = t(lambda a: math.degrees(a), fun_extra_args=(math.pi,)) + assert_(r == 180, repr(r)) + r = t(math.degrees, fun_extra_args=(math.pi,)) + assert_(r == 180, repr(r)) + + r = t(self.module.func, fun_extra_args=(6,)) + assert_(r == 17, repr(r)) + r = t(self.module.func0) + assert_(r == 11, repr(r)) + r = t(self.module.func0._cpointer) + assert_(r == 11, repr(r)) + + class A: + + def __call__(self): + return 7 + + def mth(self): + return 9 + a = A() + r = t(a) + assert_(r == 7, repr(r)) + r = t(a.mth) + assert_(r == 9, repr(r)) + + @pytest.mark.skipif(sys.platform=='win32', + reason='Fails with MinGW64 Gfortran (Issue #9673)') + def test_string_callback(self): + + def callback(code): + if code == 'r': + return 0 + else: + return 1 + + f = getattr(self.module, 'string_callback') + r = f(callback) + assert_(r == 0, repr(r)) + + @pytest.mark.skipif(sys.platform=='win32', + reason='Fails with MinGW64 Gfortran (Issue #9673)') + def test_string_callback_array(self): + # See gh-10027 + cu = np.zeros((1, 8), 'S1') + + def callback(cu, lencu): + if cu.shape != (lencu, 8): + return 1 + if cu.dtype != 'S1': + return 2 + if not np.all(cu == b''): + return 3 + return 0 + + f = getattr(self.module, 'string_callback_array') + res = f(callback, cu, len(cu)) + assert_(res == 0, repr(res)) + + def test_threadsafety(self): + # Segfaults if the callback handling is not threadsafe + + errors = [] + + def cb(): + # Sleep here to make it more likely for another thread + # to call their callback at the same time. + time.sleep(1e-3) + + # Check reentrancy + r = self.module.t(lambda: 123) + assert_(r == 123) + + return 42 + + def runner(name): + try: + for j in range(50): + r = self.module.t(cb) + assert_(r == 42) + self.check_function(name) + except Exception: + errors.append(traceback.format_exc()) + + threads = [threading.Thread(target=runner, args=(arg,)) + for arg in ("t", "t2") for n in range(20)] + + for t in threads: + t.start() + + for t in threads: + t.join() + + errors = "\n\n".join(errors) + if errors: + raise AssertionError(errors) + + def test_hidden_callback(self): + try: + self.module.hidden_callback(2) + except Exception as msg: + assert_(str(msg).startswith('Callback global_f not defined')) + + try: + self.module.hidden_callback2(2) + except Exception as msg: + assert_(str(msg).startswith('cb: Callback global_f not defined')) + + self.module.global_f = lambda x: x + 1 + r = self.module.hidden_callback(2) + assert_(r == 3) + + self.module.global_f = lambda x: x + 2 + r = self.module.hidden_callback(2) + assert_(r == 4) + + del self.module.global_f + try: + self.module.hidden_callback(2) + except Exception as msg: + assert_(str(msg).startswith('Callback global_f not defined')) + + self.module.global_f = lambda x=0: x + 3 + r = self.module.hidden_callback(2) + assert_(r == 5) + + # reproducer of gh18341 + r = self.module.hidden_callback2(2) + assert_(r == 3) + + +class TestF77CallbackPythonTLS(TestF77Callback): + """ + Callback tests using Python thread-local storage instead of + compiler-provided + """ + options = ["-DF2PY_USE_PYTHON_TLS"] + + +class TestF90Callback(util.F2PyTest): + + suffix = '.f90' + + code = textwrap.dedent( + """ + function gh17797(f, y) result(r) + external f + integer(8) :: r, f + integer(8), dimension(:) :: y + r = f(0) + r = r + sum(y) + end function gh17797 + """) + + def test_gh17797(self): + + def incr(x): + return x + 123 + + y = np.array([1, 2, 3], dtype=np.int64) + r = self.module.gh17797(incr, y) + assert r == 123 + 1 + 2 + 3 + + +class TestGH18335(util.F2PyTest): + """The reproduction of the reported issue requires specific input that + extensions may break the issue conditions, so the reproducer is + implemented as a separate test class. Do not extend this test with + other tests! + """ + + suffix = '.f90' + + code = textwrap.dedent( + """ + ! When gh18335_workaround is defined as an extension, + ! the issue cannot be reproduced. + !subroutine gh18335_workaround(f, y) + ! implicit none + ! external f + ! integer(kind=1) :: y(1) + ! call f(y) + !end subroutine gh18335_workaround + + function gh18335(f) result (r) + implicit none + external f + integer(kind=1) :: y(1), r + y(1) = 123 + call f(y) + r = y(1) + end function gh18335 + """) + + def test_gh18335(self): + + def foo(x): + x[0] += 1 + + y = np.array([1, 2, 3], dtype=np.int8) + r = self.module.gh18335(foo) + assert r == 123 + 1 diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_common.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_common.py new file mode 100644 index 0000000..e4bf355 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_common.py @@ -0,0 +1,25 @@ +import os +import sys +import pytest + +import numpy as np +from . import util + +from numpy.testing import assert_array_equal + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + +class TestCommonBlock(util.F2PyTest): + sources = [_path('src', 'common', 'block.f')] + + @pytest.mark.skipif(sys.platform=='win32', + reason='Fails with MinGW64 Gfortran (Issue #9673)') + def test_common_block(self): + self.module.initcb() + assert_array_equal(self.module.block.long_bn, + np.array(1.0, dtype=np.float64)) + assert_array_equal(self.module.block.string_bn, + np.array('2', dtype='|S1')) + assert_array_equal(self.module.block.ok, + np.array(3, dtype=np.int32)) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_compile_function.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_compile_function.py new file mode 100644 index 0000000..f76fd64 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_compile_function.py @@ -0,0 +1,125 @@ +"""See https://github.com/numpy/numpy/pull/11937. + +""" +import sys +import os +import uuid +from importlib import import_module +import pytest + +import numpy.f2py + +from numpy.testing import assert_equal +from . import util + + +def setup_module(): + if not util.has_c_compiler(): + pytest.skip("Needs C compiler") + if not util.has_f77_compiler(): + pytest.skip('Needs FORTRAN 77 compiler') + + +# extra_args can be a list (since gh-11937) or string. +# also test absence of extra_args +@pytest.mark.parametrize( + "extra_args", [['--noopt', '--debug'], '--noopt --debug', ''] + ) +@pytest.mark.leaks_references(reason="Imported module seems never deleted.") +def test_f2py_init_compile(extra_args): + # flush through the f2py __init__ compile() function code path as a + # crude test for input handling following migration from + # exec_command() to subprocess.check_output() in gh-11937 + + # the Fortran 77 syntax requires 6 spaces before any commands, but + # more space may be added/ + fsource = """ + integer function foo() + foo = 10 + 5 + return + end + """ + # use various helper functions in util.py to enable robust build / + # compile and reimport cycle in test suite + moddir = util.get_module_dir() + modname = util.get_temp_module_name() + + cwd = os.getcwd() + target = os.path.join(moddir, str(uuid.uuid4()) + '.f') + # try running compile() with and without a source_fn provided so + # that the code path where a temporary file for writing Fortran + # source is created is also explored + for source_fn in [target, None]: + # mimic the path changing behavior used by build_module() in + # util.py, but don't actually use build_module() because it has + # its own invocation of subprocess that circumvents the + # f2py.compile code block under test + try: + os.chdir(moddir) + ret_val = numpy.f2py.compile( + fsource, + modulename=modname, + extra_args=extra_args, + source_fn=source_fn + ) + finally: + os.chdir(cwd) + + # check for compile success return value + assert_equal(ret_val, 0) + + # we are not currently able to import the Python-Fortran + # interface module on Windows / Appveyor, even though we do get + # successful compilation on that platform with Python 3.x + if sys.platform != 'win32': + # check for sensible result of Fortran function; that means + # we can import the module name in Python and retrieve the + # result of the sum operation + return_check = import_module(modname) + calc_result = return_check.foo() + assert_equal(calc_result, 15) + # Removal from sys.modules, is not as such necessary. Even with + # removal, the module (dict) stays alive. + del sys.modules[modname] + + +def test_f2py_init_compile_failure(): + # verify an appropriate integer status value returned by + # f2py.compile() when invalid Fortran is provided + ret_val = numpy.f2py.compile(b"invalid") + assert_equal(ret_val, 1) + + +def test_f2py_init_compile_bad_cmd(): + # verify that usage of invalid command in f2py.compile() returns + # status value of 127 for historic consistency with exec_command() + # error handling + + # patch the sys Python exe path temporarily to induce an OSError + # downstream NOTE: how bad of an idea is this patching? + try: + temp = sys.executable + sys.executable = 'does not exist' + + # the OSError should take precedence over invalid Fortran + ret_val = numpy.f2py.compile(b"invalid") + assert_equal(ret_val, 127) + finally: + sys.executable = temp + + +@pytest.mark.parametrize('fsource', + ['program test_f2py\nend program test_f2py', + b'program test_f2py\nend program test_f2py',]) +def test_compile_from_strings(tmpdir, fsource): + # Make sure we can compile str and bytes gh-12796 + cwd = os.getcwd() + try: + os.chdir(str(tmpdir)) + ret_val = numpy.f2py.compile( + fsource, + modulename='test_compile_from_strings', + extension='.f90') + assert_equal(ret_val, 0) + finally: + os.chdir(cwd) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_crackfortran.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_crackfortran.py new file mode 100644 index 0000000..a78b5da --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_crackfortran.py @@ -0,0 +1,288 @@ +import pytest +import numpy as np +from numpy.testing import assert_array_equal, assert_equal +from numpy.f2py.crackfortran import markinnerspaces +from . import util +from numpy.f2py import crackfortran +import textwrap + + +class TestNoSpace(util.F2PyTest): + # issue gh-15035: add handling for endsubroutine, endfunction with no space + # between "end" and the block name + code = """ + subroutine subb(k) + real(8), intent(inout) :: k(:) + k=k+1 + endsubroutine + + subroutine subc(w,k) + real(8), intent(in) :: w(:) + real(8), intent(out) :: k(size(w)) + k=w+1 + endsubroutine + + function t0(value) + character value + character t0 + t0 = value + endfunction + """ + + def test_module(self): + k = np.array([1, 2, 3], dtype=np.float64) + w = np.array([1, 2, 3], dtype=np.float64) + self.module.subb(k) + assert_array_equal(k, w + 1) + self.module.subc([w, k]) + assert_array_equal(k, w + 1) + assert self.module.t0(23) == b'2' + + +class TestPublicPrivate(): + + def test_defaultPrivate(self, tmp_path): + f_path = tmp_path / "mod.f90" + with f_path.open('w') as ff: + ff.write(textwrap.dedent("""\ + module foo + private + integer :: a + public :: setA + integer :: b + contains + subroutine setA(v) + integer, intent(in) :: v + a = v + end subroutine setA + end module foo + """)) + mod = crackfortran.crackfortran([str(f_path)]) + assert len(mod) == 1 + mod = mod[0] + assert 'private' in mod['vars']['a']['attrspec'] + assert 'public' not in mod['vars']['a']['attrspec'] + assert 'private' in mod['vars']['b']['attrspec'] + assert 'public' not in mod['vars']['b']['attrspec'] + assert 'private' not in mod['vars']['seta']['attrspec'] + assert 'public' in mod['vars']['seta']['attrspec'] + + def test_defaultPublic(self, tmp_path): + f_path = tmp_path / "mod.f90" + with f_path.open('w') as ff: + ff.write(textwrap.dedent("""\ + module foo + public + integer, private :: a + public :: setA + contains + subroutine setA(v) + integer, intent(in) :: v + a = v + end subroutine setA + end module foo + """)) + mod = crackfortran.crackfortran([str(f_path)]) + assert len(mod) == 1 + mod = mod[0] + assert 'private' in mod['vars']['a']['attrspec'] + assert 'public' not in mod['vars']['a']['attrspec'] + assert 'private' not in mod['vars']['seta']['attrspec'] + assert 'public' in mod['vars']['seta']['attrspec'] + + +class TestExternal(util.F2PyTest): + # issue gh-17859: add external attribute support + code = """ + integer(8) function external_as_statement(fcn) + implicit none + external fcn + integer(8) :: fcn + external_as_statement = fcn(0) + end + + integer(8) function external_as_attribute(fcn) + implicit none + integer(8), external :: fcn + external_as_attribute = fcn(0) + end + """ + + def test_external_as_statement(self): + def incr(x): + return x + 123 + r = self.module.external_as_statement(incr) + assert r == 123 + + def test_external_as_attribute(self): + def incr(x): + return x + 123 + r = self.module.external_as_attribute(incr) + assert r == 123 + + +class TestCrackFortran(util.F2PyTest): + + suffix = '.f90' + + code = textwrap.dedent(""" + subroutine gh2848( & + ! first 2 parameters + par1, par2,& + ! last 2 parameters + par3, par4) + + integer, intent(in) :: par1, par2 + integer, intent(out) :: par3, par4 + + par3 = par1 + par4 = par2 + + end subroutine gh2848 + """) + + def test_gh2848(self): + r = self.module.gh2848(1, 2) + assert r == (1, 2) + + +class TestMarkinnerspaces(): + # issue #14118: markinnerspaces does not handle multiple quotations + + def test_do_not_touch_normal_spaces(self): + test_list = ["a ", " a", "a b c", "'abcdefghij'"] + for i in test_list: + assert_equal(markinnerspaces(i), i) + + def test_one_relevant_space(self): + assert_equal(markinnerspaces("a 'b c' \\\' \\\'"), "a 'b@_@c' \\' \\'") + assert_equal(markinnerspaces(r'a "b c" \" \"'), r'a "b@_@c" \" \"') + + def test_ignore_inner_quotes(self): + assert_equal(markinnerspaces('a \'b c" " d\' e'), + "a 'b@_@c\"@_@\"@_@d' e") + assert_equal(markinnerspaces('a "b c\' \' d" e'), + "a \"b@_@c'@_@'@_@d\" e") + + def test_multiple_relevant_spaces(self): + assert_equal(markinnerspaces("a 'b c' 'd e'"), "a 'b@_@c' 'd@_@e'") + assert_equal(markinnerspaces(r'a "b c" "d e"'), r'a "b@_@c" "d@_@e"') + + +class TestDimSpec(util.F2PyTest): + """This test suite tests various expressions that are used as dimension + specifications. + + There exists two usage cases where analyzing dimensions + specifications are important. + + In the first case, the size of output arrays must be defined based + on the inputs to a Fortran function. Because Fortran supports + arbitrary bases for indexing, for instance, `arr(lower:upper)`, + f2py has to evaluate an expression `upper - lower + 1` where + `lower` and `upper` are arbitrary expressions of input parameters. + The evaluation is performed in C, so f2py has to translate Fortran + expressions to valid C expressions (an alternative approach is + that a developer specifies the corresponding C expressions in a + .pyf file). + + In the second case, when user provides an input array with a given + size but some hidden parameters used in dimensions specifications + need to be determined based on the input array size. This is a + harder problem because f2py has to solve the inverse problem: find + a parameter `p` such that `upper(p) - lower(p) + 1` equals to the + size of input array. In the case when this equation cannot be + solved (e.g. because the input array size is wrong), raise an + error before calling the Fortran function (that otherwise would + likely crash Python process when the size of input arrays is + wrong). f2py currently supports this case only when the equation + is linear with respect to unknown parameter. + + """ + + suffix = '.f90' + + code_template = textwrap.dedent(""" + function get_arr_size_{count}(a, n) result (length) + integer, intent(in) :: n + integer, dimension({dimspec}), intent(out) :: a + integer length + length = size(a) + end function + + subroutine get_inv_arr_size_{count}(a, n) + integer :: n + ! the value of n is computed in f2py wrapper + !f2py intent(out) n + integer, dimension({dimspec}), intent(in) :: a + if (a({first}).gt.0) then + print*, "a=", a + endif + end subroutine + """) + + linear_dimspecs = [ + "n", "2*n", "2:n", "n/2", "5 - n/2", "3*n:20", "n*(n+1):n*(n+5)", + "2*n, n" + ] + nonlinear_dimspecs = ["2*n:3*n*n+2*n"] + all_dimspecs = linear_dimspecs + nonlinear_dimspecs + + code = '' + for count, dimspec in enumerate(all_dimspecs): + lst = [(d.split(":")[0] if ":" in d else "1") for d in dimspec.split(',')] + code += code_template.format( + count=count, + dimspec=dimspec, + first=", ".join(lst), + ) + + @pytest.mark.parametrize('dimspec', all_dimspecs) + def test_array_size(self, dimspec): + + count = self.all_dimspecs.index(dimspec) + get_arr_size = getattr(self.module, f'get_arr_size_{count}') + + for n in [1, 2, 3, 4, 5]: + sz, a = get_arr_size(n) + assert a.size == sz + + @pytest.mark.parametrize('dimspec', all_dimspecs) + def test_inv_array_size(self, dimspec): + + count = self.all_dimspecs.index(dimspec) + get_arr_size = getattr(self.module, f'get_arr_size_{count}') + get_inv_arr_size = getattr(self.module, f'get_inv_arr_size_{count}') + + for n in [1, 2, 3, 4, 5]: + sz, a = get_arr_size(n) + if dimspec in self.nonlinear_dimspecs: + # one must specify n as input, the call we'll ensure + # that a and n are compatible: + n1 = get_inv_arr_size(a, n) + else: + # in case of linear dependence, n can be determined + # from the shape of a: + n1 = get_inv_arr_size(a) + # n1 may be different from n (for instance, when `a` size + # is a function of some `n` fraction) but it must produce + # the same sized array + sz1, _ = get_arr_size(n1) + assert sz == sz1, (n, n1, sz, sz1) + + +class TestModuleDeclaration(): + def test_dependencies(self, tmp_path): + f_path = tmp_path / "mod.f90" + with f_path.open('w') as ff: + ff.write(textwrap.dedent("""\ + module foo + type bar + character(len = 4) :: text + end type bar + type(bar), parameter :: abar = bar('abar') + end module foo + """)) + mod = crackfortran.crackfortran([str(f_path)]) + assert len(mod) == 1 + assert mod[0]['vars']['abar']['='] == "bar('abar')" diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_kind.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_kind.py new file mode 100644 index 0000000..a7e2b28 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_kind.py @@ -0,0 +1,32 @@ +import os +import pytest + +from numpy.testing import assert_ +from numpy.f2py.crackfortran import ( + _selected_int_kind_func as selected_int_kind, + _selected_real_kind_func as selected_real_kind + ) +from . import util + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestKind(util.F2PyTest): + sources = [_path('src', 'kind', 'foo.f90')] + + @pytest.mark.slow + def test_all(self): + selectedrealkind = self.module.selectedrealkind + selectedintkind = self.module.selectedintkind + + for i in range(40): + assert_(selectedintkind(i) in [selected_int_kind(i), -1], + 'selectedintkind(%s): expected %r but got %r' % + (i, selected_int_kind(i), selectedintkind(i))) + + for i in range(20): + assert_(selectedrealkind(i) in [selected_real_kind(i), -1], + 'selectedrealkind(%s): expected %r but got %r' % + (i, selected_real_kind(i), selectedrealkind(i))) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_mixed.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_mixed.py new file mode 100644 index 0000000..04266ca --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_mixed.py @@ -0,0 +1,35 @@ +import os +import textwrap +import pytest + +from numpy.testing import assert_, assert_equal, IS_PYPY +from . import util + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestMixed(util.F2PyTest): + sources = [_path('src', 'mixed', 'foo.f'), + _path('src', 'mixed', 'foo_fixed.f90'), + _path('src', 'mixed', 'foo_free.f90')] + + def test_all(self): + assert_(self.module.bar11() == 11) + assert_(self.module.foo_fixed.bar12() == 12) + assert_(self.module.foo_free.bar13() == 13) + + @pytest.mark.xfail(IS_PYPY, + reason="PyPy cannot modify tp_doc after PyType_Ready") + def test_docstring(self): + expected = textwrap.dedent("""\ + a = bar11() + + Wrapper for ``bar11``. + + Returns + ------- + a : int + """) + assert_equal(self.module.bar11.__doc__, expected) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_module_doc.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_module_doc.py new file mode 100644 index 0000000..4b9555c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_module_doc.py @@ -0,0 +1,30 @@ +import os +import sys +import pytest +import textwrap + +from . import util +from numpy.testing import assert_equal, IS_PYPY + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestModuleDocString(util.F2PyTest): + sources = [_path('src', 'module_data', 'module_data_docstring.f90')] + + @pytest.mark.skipif(sys.platform=='win32', + reason='Fails with MinGW64 Gfortran (Issue #9673)') + @pytest.mark.xfail(IS_PYPY, + reason="PyPy cannot modify tp_doc after PyType_Ready") + def test_module_docstring(self): + assert_equal(self.module.mod.__doc__, + textwrap.dedent('''\ + i : 'i'-scalar + x : 'i'-array(4) + a : 'f'-array(2,3) + b : 'f'-array(-1,-1), not allocated\x00 + foo()\n + Wrapper for ``foo``.\n\n''') + ) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_parameter.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_parameter.py new file mode 100644 index 0000000..b618271 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_parameter.py @@ -0,0 +1,116 @@ +import os +import pytest + +import numpy as np +from numpy.testing import assert_raises, assert_equal + +from . import util + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestParameters(util.F2PyTest): + # Check that intent(in out) translates as intent(inout) + sources = [_path('src', 'parameter', 'constant_real.f90'), + _path('src', 'parameter', 'constant_integer.f90'), + _path('src', 'parameter', 'constant_both.f90'), + _path('src', 'parameter', 'constant_compound.f90'), + _path('src', 'parameter', 'constant_non_compound.f90'), + ] + + @pytest.mark.slow + def test_constant_real_single(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.float32)[::2] + assert_raises(ValueError, self.module.foo_single, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.float32) + self.module.foo_single(x) + assert_equal(x, [0 + 1 + 2*3, 1, 2]) + + @pytest.mark.slow + def test_constant_real_double(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.float64)[::2] + assert_raises(ValueError, self.module.foo_double, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.float64) + self.module.foo_double(x) + assert_equal(x, [0 + 1 + 2*3, 1, 2]) + + @pytest.mark.slow + def test_constant_compound_int(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.int32)[::2] + assert_raises(ValueError, self.module.foo_compound_int, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.int32) + self.module.foo_compound_int(x) + assert_equal(x, [0 + 1 + 2*6, 1, 2]) + + @pytest.mark.slow + def test_constant_non_compound_int(self): + # check values + x = np.arange(4, dtype=np.int32) + self.module.foo_non_compound_int(x) + assert_equal(x, [0 + 1 + 2 + 3*4, 1, 2, 3]) + + @pytest.mark.slow + def test_constant_integer_int(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.int32)[::2] + assert_raises(ValueError, self.module.foo_int, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.int32) + self.module.foo_int(x) + assert_equal(x, [0 + 1 + 2*3, 1, 2]) + + @pytest.mark.slow + def test_constant_integer_long(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.int64)[::2] + assert_raises(ValueError, self.module.foo_long, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.int64) + self.module.foo_long(x) + assert_equal(x, [0 + 1 + 2*3, 1, 2]) + + @pytest.mark.slow + def test_constant_both(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.float64)[::2] + assert_raises(ValueError, self.module.foo, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.float64) + self.module.foo(x) + assert_equal(x, [0 + 1*3*3 + 2*3*3, 1*3, 2*3]) + + @pytest.mark.slow + def test_constant_no(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.float64)[::2] + assert_raises(ValueError, self.module.foo_no, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.float64) + self.module.foo_no(x) + assert_equal(x, [0 + 1*3*3 + 2*3*3, 1*3, 2*3]) + + @pytest.mark.slow + def test_constant_sum(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.float64)[::2] + assert_raises(ValueError, self.module.foo_sum, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.float64) + self.module.foo_sum(x) + assert_equal(x, [0 + 1*3*3 + 2*3*3, 1*3, 2*3]) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_quoted_character.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_quoted_character.py new file mode 100644 index 0000000..20c7766 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_quoted_character.py @@ -0,0 +1,32 @@ +"""See https://github.com/numpy/numpy/pull/10676. + +""" +import sys +import pytest + +from numpy.testing import assert_equal +from . import util + + +class TestQuotedCharacter(util.F2PyTest): + code = """ + SUBROUTINE FOO(OUT1, OUT2, OUT3, OUT4, OUT5, OUT6) + CHARACTER SINGLE, DOUBLE, SEMICOL, EXCLA, OPENPAR, CLOSEPAR + PARAMETER (SINGLE="'", DOUBLE='"', SEMICOL=';', EXCLA="!", + 1 OPENPAR="(", CLOSEPAR=")") + CHARACTER OUT1, OUT2, OUT3, OUT4, OUT5, OUT6 +Cf2py intent(out) OUT1, OUT2, OUT3, OUT4, OUT5, OUT6 + OUT1 = SINGLE + OUT2 = DOUBLE + OUT3 = SEMICOL + OUT4 = EXCLA + OUT5 = OPENPAR + OUT6 = CLOSEPAR + RETURN + END + """ + + @pytest.mark.skipif(sys.platform=='win32', + reason='Fails with MinGW64 Gfortran (Issue #9673)') + def test_quoted_character(self): + assert_equal(self.module.foo(), (b"'", b'"', b';', b'!', b'(', b')')) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_regression.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_regression.py new file mode 100644 index 0000000..b91499e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_regression.py @@ -0,0 +1,55 @@ +import os +import pytest + +import numpy as np +from numpy.testing import assert_, assert_raises, assert_equal, assert_string_equal + +from . import util + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestIntentInOut(util.F2PyTest): + # Check that intent(in out) translates as intent(inout) + sources = [_path('src', 'regression', 'inout.f90')] + + @pytest.mark.slow + def test_inout(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.float32)[::2] + assert_raises(ValueError, self.module.foo, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.float32) + self.module.foo(x) + assert_equal(x, [3, 1, 2]) + + +class TestNumpyVersionAttribute(util.F2PyTest): + # Check that th attribute __f2py_numpy_version__ is present + # in the compiled module and that has the value np.__version__. + sources = [_path('src', 'regression', 'inout.f90')] + + @pytest.mark.slow + def test_numpy_version_attribute(self): + + # Check that self.module has an attribute named "__f2py_numpy_version__" + assert_(hasattr(self.module, "__f2py_numpy_version__"), + msg="Fortran module does not have __f2py_numpy_version__") + + # Check that the attribute __f2py_numpy_version__ is a string + assert_(isinstance(self.module.__f2py_numpy_version__, str), + msg="__f2py_numpy_version__ is not a string") + + # Check that __f2py_numpy_version__ has the value numpy.__version__ + assert_string_equal(np.__version__, self.module.__f2py_numpy_version__) + + +def test_include_path(): + incdir = np.f2py.get_include() + fnames_in_dir = os.listdir(incdir) + for fname in ('fortranobject.c', 'fortranobject.h'): + assert fname in fnames_in_dir + diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_character.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_character.py new file mode 100644 index 0000000..2c999ed --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_character.py @@ -0,0 +1,145 @@ +import pytest + +from numpy import array +from numpy.testing import assert_ +from . import util +import platform +IS_S390X = platform.machine() == 's390x' + + +class TestReturnCharacter(util.F2PyTest): + + def check_function(self, t, tname): + if tname in ['t0', 't1', 's0', 's1']: + assert_(t(23) == b'2') + r = t('ab') + assert_(r == b'a', repr(r)) + r = t(array('ab')) + assert_(r == b'a', repr(r)) + r = t(array(77, 'u1')) + assert_(r == b'M', repr(r)) + #assert_(_raises(ValueError, t, array([77,87]))) + #assert_(_raises(ValueError, t, array(77))) + elif tname in ['ts', 'ss']: + assert_(t(23) == b'23', repr(t(23))) + assert_(t('123456789abcdef') == b'123456789a') + elif tname in ['t5', 's5']: + assert_(t(23) == b'23', repr(t(23))) + assert_(t('ab') == b'ab', repr(t('ab'))) + assert_(t('123456789abcdef') == b'12345') + else: + raise NotImplementedError + + +class TestF77ReturnCharacter(TestReturnCharacter): + code = """ + function t0(value) + character value + character t0 + t0 = value + end + function t1(value) + character*1 value + character*1 t1 + t1 = value + end + function t5(value) + character*5 value + character*5 t5 + t5 = value + end + function ts(value) + character*(*) value + character*(*) ts + ts = value + end + + subroutine s0(t0,value) + character value + character t0 +cf2py intent(out) t0 + t0 = value + end + subroutine s1(t1,value) + character*1 value + character*1 t1 +cf2py intent(out) t1 + t1 = value + end + subroutine s5(t5,value) + character*5 value + character*5 t5 +cf2py intent(out) t5 + t5 = value + end + subroutine ss(ts,value) + character*(*) value + character*10 ts +cf2py intent(out) ts + ts = value + end + """ + + @pytest.mark.xfail(IS_S390X, reason="callback returns ' '") + @pytest.mark.parametrize('name', 't0,t1,t5,s0,s1,s5,ss'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module, name), name) + + +class TestF90ReturnCharacter(TestReturnCharacter): + suffix = ".f90" + code = """ +module f90_return_char + contains + function t0(value) + character :: value + character :: t0 + t0 = value + end function t0 + function t1(value) + character(len=1) :: value + character(len=1) :: t1 + t1 = value + end function t1 + function t5(value) + character(len=5) :: value + character(len=5) :: t5 + t5 = value + end function t5 + function ts(value) + character(len=*) :: value + character(len=10) :: ts + ts = value + end function ts + + subroutine s0(t0,value) + character :: value + character :: t0 +!f2py intent(out) t0 + t0 = value + end subroutine s0 + subroutine s1(t1,value) + character(len=1) :: value + character(len=1) :: t1 +!f2py intent(out) t1 + t1 = value + end subroutine s1 + subroutine s5(t5,value) + character(len=5) :: value + character(len=5) :: t5 +!f2py intent(out) t5 + t5 = value + end subroutine s5 + subroutine ss(ts,value) + character(len=*) :: value + character(len=10) :: ts +!f2py intent(out) ts + ts = value + end subroutine ss +end module f90_return_char + """ + + @pytest.mark.xfail(IS_S390X, reason="callback returns ' '") + @pytest.mark.parametrize('name', 't0,t1,t5,ts,s0,s1,s5,ss'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module.f90_return_char, name), name) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_complex.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_complex.py new file mode 100644 index 0000000..3d2e2b9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_complex.py @@ -0,0 +1,163 @@ +import pytest + +from numpy import array +from numpy.testing import assert_, assert_raises +from . import util + + +class TestReturnComplex(util.F2PyTest): + + def check_function(self, t, tname): + if tname in ['t0', 't8', 's0', 's8']: + err = 1e-5 + else: + err = 0.0 + assert_(abs(t(234j) - 234.0j) <= err) + assert_(abs(t(234.6) - 234.6) <= err) + assert_(abs(t(234) - 234.0) <= err) + assert_(abs(t(234.6 + 3j) - (234.6 + 3j)) <= err) + #assert_( abs(t('234')-234.)<=err) + #assert_( abs(t('234.6')-234.6)<=err) + assert_(abs(t(-234) + 234.) <= err) + assert_(abs(t([234]) - 234.) <= err) + assert_(abs(t((234,)) - 234.) <= err) + assert_(abs(t(array(234)) - 234.) <= err) + assert_(abs(t(array(23 + 4j, 'F')) - (23 + 4j)) <= err) + assert_(abs(t(array([234])) - 234.) <= err) + assert_(abs(t(array([[234]])) - 234.) <= err) + assert_(abs(t(array([234], 'b')) + 22.) <= err) + assert_(abs(t(array([234], 'h')) - 234.) <= err) + assert_(abs(t(array([234], 'i')) - 234.) <= err) + assert_(abs(t(array([234], 'l')) - 234.) <= err) + assert_(abs(t(array([234], 'q')) - 234.) <= err) + assert_(abs(t(array([234], 'f')) - 234.) <= err) + assert_(abs(t(array([234], 'd')) - 234.) <= err) + assert_(abs(t(array([234 + 3j], 'F')) - (234 + 3j)) <= err) + assert_(abs(t(array([234], 'D')) - 234.) <= err) + + #assert_raises(TypeError, t, array([234], 'a1')) + assert_raises(TypeError, t, 'abc') + + assert_raises(IndexError, t, []) + assert_raises(IndexError, t, ()) + + assert_raises(TypeError, t, t) + assert_raises(TypeError, t, {}) + + try: + r = t(10 ** 400) + assert_(repr(r) in ['(inf+0j)', '(Infinity+0j)'], repr(r)) + except OverflowError: + pass + + +class TestF77ReturnComplex(TestReturnComplex): + code = """ + function t0(value) + complex value + complex t0 + t0 = value + end + function t8(value) + complex*8 value + complex*8 t8 + t8 = value + end + function t16(value) + complex*16 value + complex*16 t16 + t16 = value + end + function td(value) + double complex value + double complex td + td = value + end + + subroutine s0(t0,value) + complex value + complex t0 +cf2py intent(out) t0 + t0 = value + end + subroutine s8(t8,value) + complex*8 value + complex*8 t8 +cf2py intent(out) t8 + t8 = value + end + subroutine s16(t16,value) + complex*16 value + complex*16 t16 +cf2py intent(out) t16 + t16 = value + end + subroutine sd(td,value) + double complex value + double complex td +cf2py intent(out) td + td = value + end + """ + + @pytest.mark.parametrize('name', 't0,t8,t16,td,s0,s8,s16,sd'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module, name), name) + + +class TestF90ReturnComplex(TestReturnComplex): + suffix = ".f90" + code = """ +module f90_return_complex + contains + function t0(value) + complex :: value + complex :: t0 + t0 = value + end function t0 + function t8(value) + complex(kind=4) :: value + complex(kind=4) :: t8 + t8 = value + end function t8 + function t16(value) + complex(kind=8) :: value + complex(kind=8) :: t16 + t16 = value + end function t16 + function td(value) + double complex :: value + double complex :: td + td = value + end function td + + subroutine s0(t0,value) + complex :: value + complex :: t0 +!f2py intent(out) t0 + t0 = value + end subroutine s0 + subroutine s8(t8,value) + complex(kind=4) :: value + complex(kind=4) :: t8 +!f2py intent(out) t8 + t8 = value + end subroutine s8 + subroutine s16(t16,value) + complex(kind=8) :: value + complex(kind=8) :: t16 +!f2py intent(out) t16 + t16 = value + end subroutine s16 + subroutine sd(td,value) + double complex :: value + double complex :: td +!f2py intent(out) td + td = value + end subroutine sd +end module f90_return_complex + """ + + @pytest.mark.parametrize('name', 't0,t8,t16,td,s0,s8,s16,sd'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module.f90_return_complex, name), name) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_integer.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_integer.py new file mode 100644 index 0000000..0a8121d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_integer.py @@ -0,0 +1,175 @@ +import pytest + +from numpy import array +from numpy.testing import assert_, assert_raises +from . import util + + +class TestReturnInteger(util.F2PyTest): + + def check_function(self, t, tname): + assert_(t(123) == 123, repr(t(123))) + assert_(t(123.6) == 123) + assert_(t('123') == 123) + assert_(t(-123) == -123) + assert_(t([123]) == 123) + assert_(t((123,)) == 123) + assert_(t(array(123)) == 123) + assert_(t(array([123])) == 123) + assert_(t(array([[123]])) == 123) + assert_(t(array([123], 'b')) == 123) + assert_(t(array([123], 'h')) == 123) + assert_(t(array([123], 'i')) == 123) + assert_(t(array([123], 'l')) == 123) + assert_(t(array([123], 'B')) == 123) + assert_(t(array([123], 'f')) == 123) + assert_(t(array([123], 'd')) == 123) + + #assert_raises(ValueError, t, array([123],'S3')) + assert_raises(ValueError, t, 'abc') + + assert_raises(IndexError, t, []) + assert_raises(IndexError, t, ()) + + assert_raises(Exception, t, t) + assert_raises(Exception, t, {}) + + if tname in ['t8', 's8']: + assert_raises(OverflowError, t, 100000000000000000000000) + assert_raises(OverflowError, t, 10000000011111111111111.23) + + +class TestF77ReturnInteger(TestReturnInteger): + code = """ + function t0(value) + integer value + integer t0 + t0 = value + end + function t1(value) + integer*1 value + integer*1 t1 + t1 = value + end + function t2(value) + integer*2 value + integer*2 t2 + t2 = value + end + function t4(value) + integer*4 value + integer*4 t4 + t4 = value + end + function t8(value) + integer*8 value + integer*8 t8 + t8 = value + end + + subroutine s0(t0,value) + integer value + integer t0 +cf2py intent(out) t0 + t0 = value + end + subroutine s1(t1,value) + integer*1 value + integer*1 t1 +cf2py intent(out) t1 + t1 = value + end + subroutine s2(t2,value) + integer*2 value + integer*2 t2 +cf2py intent(out) t2 + t2 = value + end + subroutine s4(t4,value) + integer*4 value + integer*4 t4 +cf2py intent(out) t4 + t4 = value + end + subroutine s8(t8,value) + integer*8 value + integer*8 t8 +cf2py intent(out) t8 + t8 = value + end + """ + + @pytest.mark.parametrize('name', + 't0,t1,t2,t4,t8,s0,s1,s2,s4,s8'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module, name), name) + + +class TestF90ReturnInteger(TestReturnInteger): + suffix = ".f90" + code = """ +module f90_return_integer + contains + function t0(value) + integer :: value + integer :: t0 + t0 = value + end function t0 + function t1(value) + integer(kind=1) :: value + integer(kind=1) :: t1 + t1 = value + end function t1 + function t2(value) + integer(kind=2) :: value + integer(kind=2) :: t2 + t2 = value + end function t2 + function t4(value) + integer(kind=4) :: value + integer(kind=4) :: t4 + t4 = value + end function t4 + function t8(value) + integer(kind=8) :: value + integer(kind=8) :: t8 + t8 = value + end function t8 + + subroutine s0(t0,value) + integer :: value + integer :: t0 +!f2py intent(out) t0 + t0 = value + end subroutine s0 + subroutine s1(t1,value) + integer(kind=1) :: value + integer(kind=1) :: t1 +!f2py intent(out) t1 + t1 = value + end subroutine s1 + subroutine s2(t2,value) + integer(kind=2) :: value + integer(kind=2) :: t2 +!f2py intent(out) t2 + t2 = value + end subroutine s2 + subroutine s4(t4,value) + integer(kind=4) :: value + integer(kind=4) :: t4 +!f2py intent(out) t4 + t4 = value + end subroutine s4 + subroutine s8(t8,value) + integer(kind=8) :: value + integer(kind=8) :: t8 +!f2py intent(out) t8 + t8 = value + end subroutine s8 +end module f90_return_integer + """ + + @pytest.mark.parametrize('name', + 't0,t1,t2,t4,t8,s0,s1,s2,s4,s8'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module.f90_return_integer, name), name) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_logical.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_logical.py new file mode 100644 index 0000000..9db939c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_logical.py @@ -0,0 +1,185 @@ +import pytest + +from numpy import array +from numpy.testing import assert_, assert_raises +from . import util + + +class TestReturnLogical(util.F2PyTest): + + def check_function(self, t): + assert_(t(True) == 1, repr(t(True))) + assert_(t(False) == 0, repr(t(False))) + assert_(t(0) == 0) + assert_(t(None) == 0) + assert_(t(0.0) == 0) + assert_(t(0j) == 0) + assert_(t(1j) == 1) + assert_(t(234) == 1) + assert_(t(234.6) == 1) + assert_(t(234.6 + 3j) == 1) + assert_(t('234') == 1) + assert_(t('aaa') == 1) + assert_(t('') == 0) + assert_(t([]) == 0) + assert_(t(()) == 0) + assert_(t({}) == 0) + assert_(t(t) == 1) + assert_(t(-234) == 1) + assert_(t(10 ** 100) == 1) + assert_(t([234]) == 1) + assert_(t((234,)) == 1) + assert_(t(array(234)) == 1) + assert_(t(array([234])) == 1) + assert_(t(array([[234]])) == 1) + assert_(t(array([234], 'b')) == 1) + assert_(t(array([234], 'h')) == 1) + assert_(t(array([234], 'i')) == 1) + assert_(t(array([234], 'l')) == 1) + assert_(t(array([234], 'f')) == 1) + assert_(t(array([234], 'd')) == 1) + assert_(t(array([234 + 3j], 'F')) == 1) + assert_(t(array([234], 'D')) == 1) + assert_(t(array(0)) == 0) + assert_(t(array([0])) == 0) + assert_(t(array([[0]])) == 0) + assert_(t(array([0j])) == 0) + assert_(t(array([1])) == 1) + assert_raises(ValueError, t, array([0, 0])) + + +class TestF77ReturnLogical(TestReturnLogical): + code = """ + function t0(value) + logical value + logical t0 + t0 = value + end + function t1(value) + logical*1 value + logical*1 t1 + t1 = value + end + function t2(value) + logical*2 value + logical*2 t2 + t2 = value + end + function t4(value) + logical*4 value + logical*4 t4 + t4 = value + end +c function t8(value) +c logical*8 value +c logical*8 t8 +c t8 = value +c end + + subroutine s0(t0,value) + logical value + logical t0 +cf2py intent(out) t0 + t0 = value + end + subroutine s1(t1,value) + logical*1 value + logical*1 t1 +cf2py intent(out) t1 + t1 = value + end + subroutine s2(t2,value) + logical*2 value + logical*2 t2 +cf2py intent(out) t2 + t2 = value + end + subroutine s4(t4,value) + logical*4 value + logical*4 t4 +cf2py intent(out) t4 + t4 = value + end +c subroutine s8(t8,value) +c logical*8 value +c logical*8 t8 +cf2py intent(out) t8 +c t8 = value +c end + """ + + @pytest.mark.slow + @pytest.mark.parametrize('name', 't0,t1,t2,t4,s0,s1,s2,s4'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module, name)) + + +class TestF90ReturnLogical(TestReturnLogical): + suffix = ".f90" + code = """ +module f90_return_logical + contains + function t0(value) + logical :: value + logical :: t0 + t0 = value + end function t0 + function t1(value) + logical(kind=1) :: value + logical(kind=1) :: t1 + t1 = value + end function t1 + function t2(value) + logical(kind=2) :: value + logical(kind=2) :: t2 + t2 = value + end function t2 + function t4(value) + logical(kind=4) :: value + logical(kind=4) :: t4 + t4 = value + end function t4 + function t8(value) + logical(kind=8) :: value + logical(kind=8) :: t8 + t8 = value + end function t8 + + subroutine s0(t0,value) + logical :: value + logical :: t0 +!f2py intent(out) t0 + t0 = value + end subroutine s0 + subroutine s1(t1,value) + logical(kind=1) :: value + logical(kind=1) :: t1 +!f2py intent(out) t1 + t1 = value + end subroutine s1 + subroutine s2(t2,value) + logical(kind=2) :: value + logical(kind=2) :: t2 +!f2py intent(out) t2 + t2 = value + end subroutine s2 + subroutine s4(t4,value) + logical(kind=4) :: value + logical(kind=4) :: t4 +!f2py intent(out) t4 + t4 = value + end subroutine s4 + subroutine s8(t8,value) + logical(kind=8) :: value + logical(kind=8) :: t8 +!f2py intent(out) t8 + t8 = value + end subroutine s8 +end module f90_return_logical + """ + + @pytest.mark.slow + @pytest.mark.parametrize('name', + 't0,t1,t2,t4,t8,s0,s1,s2,s4,s8'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module.f90_return_logical, name)) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_real.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_real.py new file mode 100644 index 0000000..8e5022a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_return_real.py @@ -0,0 +1,203 @@ +import platform +import pytest + +from numpy import array +from numpy.testing import assert_, assert_raises +from . import util + + +class TestReturnReal(util.F2PyTest): + + def check_function(self, t, tname): + if tname in ['t0', 't4', 's0', 's4']: + err = 1e-5 + else: + err = 0.0 + assert_(abs(t(234) - 234.0) <= err) + assert_(abs(t(234.6) - 234.6) <= err) + assert_(abs(t('234') - 234) <= err) + assert_(abs(t('234.6') - 234.6) <= err) + assert_(abs(t(-234) + 234) <= err) + assert_(abs(t([234]) - 234) <= err) + assert_(abs(t((234,)) - 234.) <= err) + assert_(abs(t(array(234)) - 234.) <= err) + assert_(abs(t(array([234])) - 234.) <= err) + assert_(abs(t(array([[234]])) - 234.) <= err) + assert_(abs(t(array([234], 'b')) + 22) <= err) + assert_(abs(t(array([234], 'h')) - 234.) <= err) + assert_(abs(t(array([234], 'i')) - 234.) <= err) + assert_(abs(t(array([234], 'l')) - 234.) <= err) + assert_(abs(t(array([234], 'B')) - 234.) <= err) + assert_(abs(t(array([234], 'f')) - 234.) <= err) + assert_(abs(t(array([234], 'd')) - 234.) <= err) + if tname in ['t0', 't4', 's0', 's4']: + assert_(t(1e200) == t(1e300)) # inf + + #assert_raises(ValueError, t, array([234], 'S1')) + assert_raises(ValueError, t, 'abc') + + assert_raises(IndexError, t, []) + assert_raises(IndexError, t, ()) + + assert_raises(Exception, t, t) + assert_raises(Exception, t, {}) + + try: + r = t(10 ** 400) + assert_(repr(r) in ['inf', 'Infinity'], repr(r)) + except OverflowError: + pass + + + +@pytest.mark.skipif( + platform.system() == 'Darwin', + reason="Prone to error when run with numpy/f2py/tests on mac os, " + "but not when run in isolation") +class TestCReturnReal(TestReturnReal): + suffix = ".pyf" + module_name = "c_ext_return_real" + code = """ +python module c_ext_return_real +usercode \'\'\' +float t4(float value) { return value; } +void s4(float *t4, float value) { *t4 = value; } +double t8(double value) { return value; } +void s8(double *t8, double value) { *t8 = value; } +\'\'\' +interface + function t4(value) + real*4 intent(c) :: t4,value + end + function t8(value) + real*8 intent(c) :: t8,value + end + subroutine s4(t4,value) + intent(c) s4 + real*4 intent(out) :: t4 + real*4 intent(c) :: value + end + subroutine s8(t8,value) + intent(c) s8 + real*8 intent(out) :: t8 + real*8 intent(c) :: value + end +end interface +end python module c_ext_return_real + """ + + @pytest.mark.parametrize('name', 't4,t8,s4,s8'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module, name), name) + + +class TestF77ReturnReal(TestReturnReal): + code = """ + function t0(value) + real value + real t0 + t0 = value + end + function t4(value) + real*4 value + real*4 t4 + t4 = value + end + function t8(value) + real*8 value + real*8 t8 + t8 = value + end + function td(value) + double precision value + double precision td + td = value + end + + subroutine s0(t0,value) + real value + real t0 +cf2py intent(out) t0 + t0 = value + end + subroutine s4(t4,value) + real*4 value + real*4 t4 +cf2py intent(out) t4 + t4 = value + end + subroutine s8(t8,value) + real*8 value + real*8 t8 +cf2py intent(out) t8 + t8 = value + end + subroutine sd(td,value) + double precision value + double precision td +cf2py intent(out) td + td = value + end + """ + + @pytest.mark.parametrize('name', 't0,t4,t8,td,s0,s4,s8,sd'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module, name), name) + + +class TestF90ReturnReal(TestReturnReal): + suffix = ".f90" + code = """ +module f90_return_real + contains + function t0(value) + real :: value + real :: t0 + t0 = value + end function t0 + function t4(value) + real(kind=4) :: value + real(kind=4) :: t4 + t4 = value + end function t4 + function t8(value) + real(kind=8) :: value + real(kind=8) :: t8 + t8 = value + end function t8 + function td(value) + double precision :: value + double precision :: td + td = value + end function td + + subroutine s0(t0,value) + real :: value + real :: t0 +!f2py intent(out) t0 + t0 = value + end subroutine s0 + subroutine s4(t4,value) + real(kind=4) :: value + real(kind=4) :: t4 +!f2py intent(out) t4 + t4 = value + end subroutine s4 + subroutine s8(t8,value) + real(kind=8) :: value + real(kind=8) :: t8 +!f2py intent(out) t8 + t8 = value + end subroutine s8 + subroutine sd(td,value) + double precision :: value + double precision :: td +!f2py intent(out) td + td = value + end subroutine sd +end module f90_return_real + """ + + @pytest.mark.parametrize('name', 't0,t4,t8,td,s0,s4,s8,sd'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module.f90_return_real, name), name) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_semicolon_split.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_semicolon_split.py new file mode 100644 index 0000000..d8b4bf2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_semicolon_split.py @@ -0,0 +1,63 @@ +import platform +import pytest + +from . import util +from numpy.testing import assert_equal + +@pytest.mark.skipif( + platform.system() == 'Darwin', + reason="Prone to error when run with numpy/f2py/tests on mac os, " + "but not when run in isolation") +class TestMultiline(util.F2PyTest): + suffix = ".pyf" + module_name = "multiline" + code = """ +python module {module} + usercode ''' +void foo(int* x) {{ + char dummy = ';'; + *x = 42; +}} +''' + interface + subroutine foo(x) + intent(c) foo + integer intent(out) :: x + end subroutine foo + end interface +end python module {module} + """.format(module=module_name) + + def test_multiline(self): + assert_equal(self.module.foo(), 42) + + +@pytest.mark.skipif( + platform.system() == 'Darwin', + reason="Prone to error when run with numpy/f2py/tests on mac os, " + "but not when run in isolation") +class TestCallstatement(util.F2PyTest): + suffix = ".pyf" + module_name = "callstatement" + code = """ +python module {module} + usercode ''' +void foo(int* x) {{ +}} +''' + interface + subroutine foo(x) + intent(c) foo + integer intent(out) :: x + callprotoargument int* + callstatement {{ & + ; & + x = 42; & + }} + end subroutine foo + end interface +end python module {module} + """.format(module=module_name) + + def test_callstatement(self): + assert_equal(self.module.foo(), 42) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_size.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_size.py new file mode 100644 index 0000000..b609fa7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_size.py @@ -0,0 +1,49 @@ +import os +import pytest + +from numpy.testing import assert_equal +from . import util + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestSizeSumExample(util.F2PyTest): + sources = [_path('src', 'size', 'foo.f90')] + + @pytest.mark.slow + def test_all(self): + r = self.module.foo([[]]) + assert_equal(r, [0], repr(r)) + + r = self.module.foo([[1, 2]]) + assert_equal(r, [3], repr(r)) + + r = self.module.foo([[1, 2], [3, 4]]) + assert_equal(r, [3, 7], repr(r)) + + r = self.module.foo([[1, 2], [3, 4], [5, 6]]) + assert_equal(r, [3, 7, 11], repr(r)) + + @pytest.mark.slow + def test_transpose(self): + r = self.module.trans([[]]) + assert_equal(r.T, [[]], repr(r)) + + r = self.module.trans([[1, 2]]) + assert_equal(r, [[1], [2]], repr(r)) + + r = self.module.trans([[1, 2, 3], [4, 5, 6]]) + assert_equal(r, [[1, 4], [2, 5], [3, 6]], repr(r)) + + @pytest.mark.slow + def test_flatten(self): + r = self.module.flatten([[]]) + assert_equal(r, [], repr(r)) + + r = self.module.flatten([[1, 2]]) + assert_equal(r, [1, 2], repr(r)) + + r = self.module.flatten([[1, 2, 3], [4, 5, 6]]) + assert_equal(r, [1, 2, 3, 4, 5, 6], repr(r)) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_string.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_string.py new file mode 100644 index 0000000..7b27f87 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_string.py @@ -0,0 +1,166 @@ +import os +import pytest +import textwrap +from numpy.testing import assert_array_equal +import numpy as np +from . import util + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestString(util.F2PyTest): + sources = [_path('src', 'string', 'char.f90')] + + @pytest.mark.slow + def test_char(self): + strings = np.array(['ab', 'cd', 'ef'], dtype='c').T + inp, out = self.module.char_test.change_strings(strings, + strings.shape[1]) + assert_array_equal(inp, strings) + expected = strings.copy() + expected[1, :] = 'AAA' + assert_array_equal(out, expected) + + +class TestDocStringArguments(util.F2PyTest): + suffix = '.f' + + code = """ +C FILE: STRING.F + SUBROUTINE FOO(A,B,C,D) + CHARACTER*5 A, B + CHARACTER*(*) C,D +Cf2py intent(in) a,c +Cf2py intent(inout) b,d + PRINT*, "A=",A + PRINT*, "B=",B + PRINT*, "C=",C + PRINT*, "D=",D + PRINT*, "CHANGE A,B,C,D" + A(1:1) = 'A' + B(1:1) = 'B' + C(1:1) = 'C' + D(1:1) = 'D' + PRINT*, "A=",A + PRINT*, "B=",B + PRINT*, "C=",C + PRINT*, "D=",D + END +C END OF FILE STRING.F + """ + + def test_example(self): + a = np.array(b'123\0\0') + b = np.array(b'123\0\0') + c = np.array(b'123') + d = np.array(b'123') + + self.module.foo(a, b, c, d) + + assert a.tobytes() == b'123\0\0' + assert b.tobytes() == b'B23\0\0', (b.tobytes(),) + assert c.tobytes() == b'123' + assert d.tobytes() == b'D23' + + +class TestFixedString(util.F2PyTest): + suffix = '.f90' + + code = textwrap.dedent(""" + function sint(s) result(i) + implicit none + character(len=*) :: s + integer :: j, i + i = 0 + do j=len(s), 1, -1 + if (.not.((i.eq.0).and.(s(j:j).eq.' '))) then + i = i + ichar(s(j:j)) * 10 ** (j - 1) + endif + end do + return + end function sint + + function test_in_bytes4(a) result (i) + implicit none + integer :: sint + character(len=4) :: a + integer :: i + i = sint(a) + a(1:1) = 'A' + return + end function test_in_bytes4 + + function test_inout_bytes4(a) result (i) + implicit none + integer :: sint + character(len=4), intent(inout) :: a + integer :: i + if (a(1:1).ne.' ') then + a(1:1) = 'E' + endif + i = sint(a) + return + end function test_inout_bytes4 + """) + + @staticmethod + def _sint(s, start=0, end=None): + """Return the content of a string buffer as integer value. + + For example: + _sint('1234') -> 4321 + _sint('123A') -> 17321 + """ + if isinstance(s, np.ndarray): + s = s.tobytes() + elif isinstance(s, str): + s = s.encode() + assert isinstance(s, bytes) + if end is None: + end = len(s) + i = 0 + for j in range(start, min(end, len(s))): + i += s[j] * 10 ** j + return i + + def _get_input(self, intent='in'): + if intent in ['in']: + yield '' + yield '1' + yield '1234' + yield '12345' + yield b'' + yield b'\0' + yield b'1' + yield b'\01' + yield b'1\0' + yield b'1234' + yield b'12345' + yield np.ndarray((), np.bytes_, buffer=b'') # array(b'', dtype='|S0') + yield np.array(b'') # array(b'', dtype='|S1') + yield np.array(b'\0') + yield np.array(b'1') + yield np.array(b'1\0') + yield np.array(b'\01') + yield np.array(b'1234') + yield np.array(b'123\0') + yield np.array(b'12345') + + def test_intent_in(self): + for s in self._get_input(): + r = self.module.test_in_bytes4(s) + # also checks that s is not changed inplace + expected = self._sint(s, end=4) + assert r == expected, (s) + + def test_intent_inout(self): + for s in self._get_input(intent='inout'): + rest = self._sint(s, start=4) + r = self.module.test_inout_bytes4(s) + expected = self._sint(s, end=4) + assert r == expected + + # check that the rest of input string is preserved + assert rest == self._sint(s, start=4) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_symbolic.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_symbolic.py new file mode 100644 index 0000000..b37ae33 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/test_symbolic.py @@ -0,0 +1,462 @@ +from numpy.testing import assert_raises +from numpy.f2py.symbolic import ( + Expr, Op, ArithOp, Language, + as_symbol, as_number, as_string, as_array, as_complex, + as_terms, as_factors, eliminate_quotes, insert_quotes, + fromstring, as_expr, as_apply, + as_numer_denom, as_ternary, as_ref, as_deref, + normalize, as_eq, as_ne, as_lt, as_gt, as_le, as_ge + ) +from . import util + + +class TestSymbolic(util.F2PyTest): + + def test_eliminate_quotes(self): + def worker(s): + r, d = eliminate_quotes(s) + s1 = insert_quotes(r, d) + assert s1 == s + + for kind in ['', 'mykind_']: + worker(kind + '"1234" // "ABCD"') + worker(kind + '"1234" // ' + kind + '"ABCD"') + worker(kind + '"1234" // \'ABCD\'') + worker(kind + '"1234" // ' + kind + '\'ABCD\'') + worker(kind + '"1\\"2\'AB\'34"') + worker('a = ' + kind + "'1\\'2\"AB\"34'") + + def test_sanity(self): + x = as_symbol('x') + y = as_symbol('y') + z = as_symbol('z') + + assert x.op == Op.SYMBOL + assert repr(x) == "Expr(Op.SYMBOL, 'x')" + assert x == x + assert x != y + assert hash(x) is not None + + n = as_number(123) + m = as_number(456) + assert n.op == Op.INTEGER + assert repr(n) == "Expr(Op.INTEGER, (123, 4))" + assert n == n + assert n != m + assert hash(n) is not None + + fn = as_number(12.3) + fm = as_number(45.6) + assert fn.op == Op.REAL + assert repr(fn) == "Expr(Op.REAL, (12.3, 4))" + assert fn == fn + assert fn != fm + assert hash(fn) is not None + + c = as_complex(1, 2) + c2 = as_complex(3, 4) + assert c.op == Op.COMPLEX + assert repr(c) == ("Expr(Op.COMPLEX, (Expr(Op.INTEGER, (1, 4))," + " Expr(Op.INTEGER, (2, 4))))") + assert c == c + assert c != c2 + assert hash(c) is not None + + s = as_string("'123'") + s2 = as_string('"ABC"') + assert s.op == Op.STRING + assert repr(s) == "Expr(Op.STRING, (\"'123'\", 1))", repr(s) + assert s == s + assert s != s2 + + a = as_array((n, m)) + b = as_array((n,)) + assert a.op == Op.ARRAY + assert repr(a) == ("Expr(Op.ARRAY, (Expr(Op.INTEGER, (123, 4))," + " Expr(Op.INTEGER, (456, 4))))") + assert a == a + assert a != b + + t = as_terms(x) + u = as_terms(y) + assert t.op == Op.TERMS + assert repr(t) == "Expr(Op.TERMS, {Expr(Op.SYMBOL, 'x'): 1})" + assert t == t + assert t != u + assert hash(t) is not None + + v = as_factors(x) + w = as_factors(y) + assert v.op == Op.FACTORS + assert repr(v) == "Expr(Op.FACTORS, {Expr(Op.SYMBOL, 'x'): 1})" + assert v == v + assert w != v + assert hash(v) is not None + + t = as_ternary(x, y, z) + u = as_ternary(x, z, y) + assert t.op == Op.TERNARY + assert t == t + assert t != u + assert hash(t) is not None + + e = as_eq(x, y) + f = as_lt(x, y) + assert e.op == Op.RELATIONAL + assert e == e + assert e != f + assert hash(e) is not None + + def test_tostring_fortran(self): + x = as_symbol('x') + y = as_symbol('y') + z = as_symbol('z') + n = as_number(123) + m = as_number(456) + a = as_array((n, m)) + c = as_complex(n, m) + + assert str(x) == 'x' + assert str(n) == '123' + assert str(a) == '[123, 456]' + assert str(c) == '(123, 456)' + + assert str(Expr(Op.TERMS, {x: 1})) == 'x' + assert str(Expr(Op.TERMS, {x: 2})) == '2 * x' + assert str(Expr(Op.TERMS, {x: -1})) == '-x' + assert str(Expr(Op.TERMS, {x: -2})) == '-2 * x' + assert str(Expr(Op.TERMS, {x: 1, y: 1})) == 'x + y' + assert str(Expr(Op.TERMS, {x: -1, y: -1})) == '-x - y' + assert str(Expr(Op.TERMS, {x: 2, y: 3})) == '2 * x + 3 * y' + assert str(Expr(Op.TERMS, {x: -2, y: 3})) == '-2 * x + 3 * y' + assert str(Expr(Op.TERMS, {x: 2, y: -3})) == '2 * x - 3 * y' + + assert str(Expr(Op.FACTORS, {x: 1})) == 'x' + assert str(Expr(Op.FACTORS, {x: 2})) == 'x ** 2' + assert str(Expr(Op.FACTORS, {x: -1})) == 'x ** -1' + assert str(Expr(Op.FACTORS, {x: -2})) == 'x ** -2' + assert str(Expr(Op.FACTORS, {x: 1, y: 1})) == 'x * y' + assert str(Expr(Op.FACTORS, {x: 2, y: 3})) == 'x ** 2 * y ** 3' + + v = Expr(Op.FACTORS, {x: 2, Expr(Op.TERMS, {x: 1, y: 1}): 3}) + assert str(v) == 'x ** 2 * (x + y) ** 3', str(v) + v = Expr(Op.FACTORS, {x: 2, Expr(Op.FACTORS, {x: 1, y: 1}): 3}) + assert str(v) == 'x ** 2 * (x * y) ** 3', str(v) + + assert str(Expr(Op.APPLY, ('f', (), {}))) == 'f()' + assert str(Expr(Op.APPLY, ('f', (x,), {}))) == 'f(x)' + assert str(Expr(Op.APPLY, ('f', (x, y), {}))) == 'f(x, y)' + assert str(Expr(Op.INDEXING, ('f', x))) == 'f[x]' + + assert str(as_ternary(x, y, z)) == 'merge(y, z, x)' + assert str(as_eq(x, y)) == 'x .eq. y' + assert str(as_ne(x, y)) == 'x .ne. y' + assert str(as_lt(x, y)) == 'x .lt. y' + assert str(as_le(x, y)) == 'x .le. y' + assert str(as_gt(x, y)) == 'x .gt. y' + assert str(as_ge(x, y)) == 'x .ge. y' + + def test_tostring_c(self): + language = Language.C + x = as_symbol('x') + y = as_symbol('y') + z = as_symbol('z') + n = as_number(123) + + assert Expr(Op.FACTORS, {x: 2}).tostring(language=language) == 'x * x' + assert Expr(Op.FACTORS, {x + y: 2}).tostring( + language=language) == '(x + y) * (x + y)' + assert Expr(Op.FACTORS, {x: 12}).tostring( + language=language) == 'pow(x, 12)' + + assert as_apply(ArithOp.DIV, x, y).tostring( + language=language) == 'x / y' + assert as_apply(ArithOp.DIV, x, x + y).tostring( + language=language) == 'x / (x + y)' + assert as_apply(ArithOp.DIV, x - y, x + y).tostring( + language=language) == '(x - y) / (x + y)' + assert (x + (x - y) / (x + y) + n).tostring( + language=language) == '123 + x + (x - y) / (x + y)' + + assert as_ternary(x, y, z).tostring(language=language) == "(x?y:z)" + assert as_eq(x, y).tostring(language=language) == "x == y" + assert as_ne(x, y).tostring(language=language) == "x != y" + assert as_lt(x, y).tostring(language=language) == "x < y" + assert as_le(x, y).tostring(language=language) == "x <= y" + assert as_gt(x, y).tostring(language=language) == "x > y" + assert as_ge(x, y).tostring(language=language) == "x >= y" + + def test_operations(self): + x = as_symbol('x') + y = as_symbol('y') + z = as_symbol('z') + + assert x + x == Expr(Op.TERMS, {x: 2}) + assert x - x == Expr(Op.INTEGER, (0, 4)) + assert x + y == Expr(Op.TERMS, {x: 1, y: 1}) + assert x - y == Expr(Op.TERMS, {x: 1, y: -1}) + assert x * x == Expr(Op.FACTORS, {x: 2}) + assert x * y == Expr(Op.FACTORS, {x: 1, y: 1}) + + assert +x == x + assert -x == Expr(Op.TERMS, {x: -1}), repr(-x) + assert 2 * x == Expr(Op.TERMS, {x: 2}) + assert 2 + x == Expr(Op.TERMS, {x: 1, as_number(1): 2}) + assert 2 * x + 3 * y == Expr(Op.TERMS, {x: 2, y: 3}) + assert (x + y) * 2 == Expr(Op.TERMS, {x: 2, y: 2}) + + assert x ** 2 == Expr(Op.FACTORS, {x: 2}) + assert (x + y) ** 2 == Expr(Op.TERMS, + {Expr(Op.FACTORS, {x: 2}): 1, + Expr(Op.FACTORS, {y: 2}): 1, + Expr(Op.FACTORS, {x: 1, y: 1}): 2}) + assert (x + y) * x == x ** 2 + x * y + assert (x + y) ** 2 == x ** 2 + 2 * x * y + y ** 2 + assert (x + y) ** 2 + (x - y) ** 2 == 2 * x ** 2 + 2 * y ** 2 + assert (x + y) * z == x * z + y * z + assert z * (x + y) == x * z + y * z + + assert (x / 2) == as_apply(ArithOp.DIV, x, as_number(2)) + assert (2 * x / 2) == x + assert (3 * x / 2) == as_apply(ArithOp.DIV, 3*x, as_number(2)) + assert (4 * x / 2) == 2 * x + assert (5 * x / 2) == as_apply(ArithOp.DIV, 5*x, as_number(2)) + assert (6 * x / 2) == 3 * x + assert ((3*5) * x / 6) == as_apply(ArithOp.DIV, 5*x, as_number(2)) + assert (30*x**2*y**4 / (24*x**3*y**3)) == as_apply(ArithOp.DIV, + 5*y, 4*x) + assert ((15 * x / 6) / 5) == as_apply( + ArithOp.DIV, x, as_number(2)), ((15 * x / 6) / 5) + assert (x / (5 / x)) == as_apply(ArithOp.DIV, x**2, as_number(5)) + + assert (x / 2.0) == Expr(Op.TERMS, {x: 0.5}) + + s = as_string('"ABC"') + t = as_string('"123"') + + assert s // t == Expr(Op.STRING, ('"ABC123"', 1)) + assert s // x == Expr(Op.CONCAT, (s, x)) + assert x // s == Expr(Op.CONCAT, (x, s)) + + c = as_complex(1., 2.) + assert -c == as_complex(-1., -2.) + assert c + c == as_expr((1+2j)*2) + assert c * c == as_expr((1+2j)**2) + + def test_substitute(self): + x = as_symbol('x') + y = as_symbol('y') + z = as_symbol('z') + a = as_array((x, y)) + + assert x.substitute({x: y}) == y + assert (x + y).substitute({x: z}) == y + z + assert (x * y).substitute({x: z}) == y * z + assert (x ** 4).substitute({x: z}) == z ** 4 + assert (x / y).substitute({x: z}) == z / y + assert x.substitute({x: y + z}) == y + z + assert a.substitute({x: y + z}) == as_array((y + z, y)) + + assert as_ternary(x, y, z).substitute( + {x: y + z}) == as_ternary(y + z, y, z) + assert as_eq(x, y).substitute( + {x: y + z}) == as_eq(y + z, y) + + def test_fromstring(self): + + x = as_symbol('x') + y = as_symbol('y') + z = as_symbol('z') + f = as_symbol('f') + s = as_string('"ABC"') + t = as_string('"123"') + a = as_array((x, y)) + + assert fromstring('x') == x + assert fromstring('+ x') == x + assert fromstring('- x') == -x + assert fromstring('x + y') == x + y + assert fromstring('x + 1') == x + 1 + assert fromstring('x * y') == x * y + assert fromstring('x * 2') == x * 2 + assert fromstring('x / y') == x / y + assert fromstring('x ** 2', + language=Language.Python) == x ** 2 + assert fromstring('x ** 2 ** 3', + language=Language.Python) == x ** 2 ** 3 + assert fromstring('(x + y) * z') == (x + y) * z + + assert fromstring('f(x)') == f(x) + assert fromstring('f(x,y)') == f(x, y) + assert fromstring('f[x]') == f[x] + assert fromstring('f[x][y]') == f[x][y] + + assert fromstring('"ABC"') == s + assert normalize(fromstring('"ABC" // "123" ', + language=Language.Fortran)) == s // t + assert fromstring('f("ABC")') == f(s) + assert fromstring('MYSTRKIND_"ABC"') == as_string('"ABC"', 'MYSTRKIND') + + assert fromstring('(/x, y/)') == a, fromstring('(/x, y/)') + assert fromstring('f((/x, y/))') == f(a) + assert fromstring('(/(x+y)*z/)') == as_array(((x+y)*z,)) + + assert fromstring('123') == as_number(123) + assert fromstring('123_2') == as_number(123, 2) + assert fromstring('123_myintkind') == as_number(123, 'myintkind') + + assert fromstring('123.0') == as_number(123.0, 4) + assert fromstring('123.0_4') == as_number(123.0, 4) + assert fromstring('123.0_8') == as_number(123.0, 8) + assert fromstring('123.0e0') == as_number(123.0, 4) + assert fromstring('123.0d0') == as_number(123.0, 8) + assert fromstring('123d0') == as_number(123.0, 8) + assert fromstring('123e-0') == as_number(123.0, 4) + assert fromstring('123d+0') == as_number(123.0, 8) + assert fromstring('123.0_myrealkind') == as_number(123.0, 'myrealkind') + assert fromstring('3E4') == as_number(30000.0, 4) + + assert fromstring('(1, 2)') == as_complex(1, 2) + assert fromstring('(1e2, PI)') == as_complex( + as_number(100.0), as_symbol('PI')) + + assert fromstring('[1, 2]') == as_array((as_number(1), as_number(2))) + + assert fromstring('POINT(x, y=1)') == as_apply( + as_symbol('POINT'), x, y=as_number(1)) + assert (fromstring('PERSON(name="John", age=50, shape=(/34, 23/))') + == as_apply(as_symbol('PERSON'), + name=as_string('"John"'), + age=as_number(50), + shape=as_array((as_number(34), as_number(23))))) + + assert fromstring('x?y:z') == as_ternary(x, y, z) + + assert fromstring('*x') == as_deref(x) + assert fromstring('**x') == as_deref(as_deref(x)) + assert fromstring('&x') == as_ref(x) + assert fromstring('(*x) * (*y)') == as_deref(x) * as_deref(y) + assert fromstring('(*x) * *y') == as_deref(x) * as_deref(y) + assert fromstring('*x * *y') == as_deref(x) * as_deref(y) + assert fromstring('*x**y') == as_deref(x) * as_deref(y) + + assert fromstring('x == y') == as_eq(x, y) + assert fromstring('x != y') == as_ne(x, y) + assert fromstring('x < y') == as_lt(x, y) + assert fromstring('x > y') == as_gt(x, y) + assert fromstring('x <= y') == as_le(x, y) + assert fromstring('x >= y') == as_ge(x, y) + + assert fromstring('x .eq. y', language=Language.Fortran) == as_eq(x, y) + assert fromstring('x .ne. y', language=Language.Fortran) == as_ne(x, y) + assert fromstring('x .lt. y', language=Language.Fortran) == as_lt(x, y) + assert fromstring('x .gt. y', language=Language.Fortran) == as_gt(x, y) + assert fromstring('x .le. y', language=Language.Fortran) == as_le(x, y) + assert fromstring('x .ge. y', language=Language.Fortran) == as_ge(x, y) + + def test_traverse(self): + x = as_symbol('x') + y = as_symbol('y') + z = as_symbol('z') + f = as_symbol('f') + + # Use traverse to substitute a symbol + def replace_visit(s, r=z): + if s == x: + return r + + assert x.traverse(replace_visit) == z + assert y.traverse(replace_visit) == y + assert z.traverse(replace_visit) == z + assert (f(y)).traverse(replace_visit) == f(y) + assert (f(x)).traverse(replace_visit) == f(z) + assert (f[y]).traverse(replace_visit) == f[y] + assert (f[z]).traverse(replace_visit) == f[z] + assert (x + y + z).traverse(replace_visit) == (2 * z + y) + assert (x + f(y, x - z)).traverse( + replace_visit) == (z + f(y, as_number(0))) + assert as_eq(x, y).traverse(replace_visit) == as_eq(z, y) + + # Use traverse to collect symbols, method 1 + function_symbols = set() + symbols = set() + + def collect_symbols(s): + if s.op is Op.APPLY: + oper = s.data[0] + function_symbols.add(oper) + if oper in symbols: + symbols.remove(oper) + elif s.op is Op.SYMBOL and s not in function_symbols: + symbols.add(s) + + (x + f(y, x - z)).traverse(collect_symbols) + assert function_symbols == {f} + assert symbols == {x, y, z} + + # Use traverse to collect symbols, method 2 + def collect_symbols2(expr, symbols): + if expr.op is Op.SYMBOL: + symbols.add(expr) + + symbols = set() + (x + f(y, x - z)).traverse(collect_symbols2, symbols) + assert symbols == {x, y, z, f} + + # Use traverse to partially collect symbols + def collect_symbols3(expr, symbols): + if expr.op is Op.APPLY: + # skip traversing function calls + return expr + if expr.op is Op.SYMBOL: + symbols.add(expr) + + symbols = set() + (x + f(y, x - z)).traverse(collect_symbols3, symbols) + assert symbols == {x} + + def test_linear_solve(self): + x = as_symbol('x') + y = as_symbol('y') + z = as_symbol('z') + + assert x.linear_solve(x) == (as_number(1), as_number(0)) + assert (x+1).linear_solve(x) == (as_number(1), as_number(1)) + assert (2*x).linear_solve(x) == (as_number(2), as_number(0)) + assert (2*x+3).linear_solve(x) == (as_number(2), as_number(3)) + assert as_number(3).linear_solve(x) == (as_number(0), as_number(3)) + assert y.linear_solve(x) == (as_number(0), y) + assert (y*z).linear_solve(x) == (as_number(0), y * z) + + assert (x+y).linear_solve(x) == (as_number(1), y) + assert (z*x+y).linear_solve(x) == (z, y) + assert ((z+y)*x+y).linear_solve(x) == (z + y, y) + assert (z*y*x+y).linear_solve(x) == (z * y, y) + + assert_raises(RuntimeError, lambda: (x*x).linear_solve(x)) + + def test_as_numer_denom(self): + x = as_symbol('x') + y = as_symbol('y') + n = as_number(123) + + assert as_numer_denom(x) == (x, as_number(1)) + assert as_numer_denom(x / n) == (x, n) + assert as_numer_denom(n / x) == (n, x) + assert as_numer_denom(x / y) == (x, y) + assert as_numer_denom(x * y) == (x * y, as_number(1)) + assert as_numer_denom(n + x / y) == (x + n * y, y) + assert as_numer_denom(n + x / (y - x / n)) == (y * n ** 2, y * n - x) + + def test_polynomial_atoms(self): + x = as_symbol('x') + y = as_symbol('y') + n = as_number(123) + + assert x.polynomial_atoms() == {x} + assert n.polynomial_atoms() == set() + assert (y[x]).polynomial_atoms() == {y[x]} + assert (y(x)).polynomial_atoms() == {y(x)} + assert (y(x) + x).polynomial_atoms() == {y(x), x} + assert (y(x) * x[y]).polynomial_atoms() == {y(x), x[y]} + assert (y(x) ** x).polynomial_atoms() == {y(x)} diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/tests/util.py b/.local/lib/python3.8/site-packages/numpy/f2py/tests/util.py new file mode 100644 index 0000000..1a6805e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/tests/util.py @@ -0,0 +1,356 @@ +""" +Utility functions for + +- building and importing modules on test time, using a temporary location +- detecting if compilers are present + +""" +import os +import sys +import subprocess +import tempfile +import shutil +import atexit +import textwrap +import re +import pytest + +from numpy.compat import asbytes, asstr +from numpy.testing import temppath +from importlib import import_module + +# +# Maintaining a temporary module directory +# + +_module_dir = None +_module_num = 5403 + + +def _cleanup(): + global _module_dir + if _module_dir is not None: + try: + sys.path.remove(_module_dir) + except ValueError: + pass + try: + shutil.rmtree(_module_dir) + except OSError: + pass + _module_dir = None + + +def get_module_dir(): + global _module_dir + if _module_dir is None: + _module_dir = tempfile.mkdtemp() + atexit.register(_cleanup) + if _module_dir not in sys.path: + sys.path.insert(0, _module_dir) + return _module_dir + + +def get_temp_module_name(): + # Assume single-threaded, and the module dir usable only by this thread + global _module_num + d = get_module_dir() + name = "_test_ext_module_%d" % _module_num + _module_num += 1 + if name in sys.modules: + # this should not be possible, but check anyway + raise RuntimeError("Temporary module name already in use.") + return name + + +def _memoize(func): + memo = {} + + def wrapper(*a, **kw): + key = repr((a, kw)) + if key not in memo: + try: + memo[key] = func(*a, **kw) + except Exception as e: + memo[key] = e + raise + ret = memo[key] + if isinstance(ret, Exception): + raise ret + return ret + wrapper.__name__ = func.__name__ + return wrapper + +# +# Building modules +# + + +@_memoize +def build_module(source_files, options=[], skip=[], only=[], module_name=None): + """ + Compile and import a f2py module, built from the given files. + + """ + + code = ("import sys; sys.path = %s; import numpy.f2py as f2py2e; " + "f2py2e.main()" % repr(sys.path)) + + d = get_module_dir() + + # Copy files + dst_sources = [] + f2py_sources = [] + for fn in source_files: + if not os.path.isfile(fn): + raise RuntimeError("%s is not a file" % fn) + dst = os.path.join(d, os.path.basename(fn)) + shutil.copyfile(fn, dst) + dst_sources.append(dst) + + base, ext = os.path.splitext(dst) + if ext in ('.f90', '.f', '.c', '.pyf'): + f2py_sources.append(dst) + + # Prepare options + if module_name is None: + module_name = get_temp_module_name() + f2py_opts = ['-c', '-m', module_name] + options + f2py_sources + if skip: + f2py_opts += ['skip:'] + skip + if only: + f2py_opts += ['only:'] + only + + # Build + cwd = os.getcwd() + try: + os.chdir(d) + cmd = [sys.executable, '-c', code] + f2py_opts + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + out, err = p.communicate() + if p.returncode != 0: + raise RuntimeError("Running f2py failed: %s\n%s" + % (cmd[4:], asstr(out))) + finally: + os.chdir(cwd) + + # Partial cleanup + for fn in dst_sources: + os.unlink(fn) + + # Import + return import_module(module_name) + + +@_memoize +def build_code(source_code, options=[], skip=[], only=[], suffix=None, + module_name=None): + """ + Compile and import Fortran code using f2py. + + """ + if suffix is None: + suffix = '.f' + with temppath(suffix=suffix) as path: + with open(path, 'w') as f: + f.write(source_code) + return build_module([path], options=options, skip=skip, only=only, + module_name=module_name) + +# +# Check if compilers are available at all... +# + +_compiler_status = None + + +def _get_compiler_status(): + global _compiler_status + if _compiler_status is not None: + return _compiler_status + + _compiler_status = (False, False, False) + + # XXX: this is really ugly. But I don't know how to invoke Distutils + # in a safer way... + code = textwrap.dedent("""\ + import os + import sys + sys.path = %(syspath)s + + def configuration(parent_name='',top_path=None): + global config + from numpy.distutils.misc_util import Configuration + config = Configuration('', parent_name, top_path) + return config + + from numpy.distutils.core import setup + setup(configuration=configuration) + + config_cmd = config.get_config_cmd() + have_c = config_cmd.try_compile('void foo() {}') + print('COMPILERS:%%d,%%d,%%d' %% (have_c, + config.have_f77c(), + config.have_f90c())) + sys.exit(99) + """) + code = code % dict(syspath=repr(sys.path)) + + tmpdir = tempfile.mkdtemp() + try: + script = os.path.join(tmpdir, 'setup.py') + + with open(script, 'w') as f: + f.write(code) + + cmd = [sys.executable, 'setup.py', 'config'] + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + cwd=tmpdir) + out, err = p.communicate() + finally: + shutil.rmtree(tmpdir) + + m = re.search(br'COMPILERS:(\d+),(\d+),(\d+)', out) + if m: + _compiler_status = (bool(int(m.group(1))), bool(int(m.group(2))), + bool(int(m.group(3)))) + # Finished + return _compiler_status + + +def has_c_compiler(): + return _get_compiler_status()[0] + + +def has_f77_compiler(): + return _get_compiler_status()[1] + + +def has_f90_compiler(): + return _get_compiler_status()[2] + +# +# Building with distutils +# + + +@_memoize +def build_module_distutils(source_files, config_code, module_name, **kw): + """ + Build a module via distutils and import it. + + """ + d = get_module_dir() + + # Copy files + dst_sources = [] + for fn in source_files: + if not os.path.isfile(fn): + raise RuntimeError("%s is not a file" % fn) + dst = os.path.join(d, os.path.basename(fn)) + shutil.copyfile(fn, dst) + dst_sources.append(dst) + + # Build script + config_code = textwrap.dedent(config_code).replace("\n", "\n ") + + code = textwrap.dedent("""\ + import os + import sys + sys.path = %(syspath)s + + def configuration(parent_name='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('', parent_name, top_path) + %(config_code)s + return config + + if __name__ == "__main__": + from numpy.distutils.core import setup + setup(configuration=configuration) + """) % dict(config_code=config_code, syspath=repr(sys.path)) + + script = os.path.join(d, get_temp_module_name() + '.py') + dst_sources.append(script) + with open(script, 'wb') as f: + f.write(asbytes(code)) + + # Build + cwd = os.getcwd() + try: + os.chdir(d) + cmd = [sys.executable, script, 'build_ext', '-i'] + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + out, err = p.communicate() + if p.returncode != 0: + raise RuntimeError("Running distutils build failed: %s\n%s" + % (cmd[4:], asstr(out))) + finally: + os.chdir(cwd) + + # Partial cleanup + for fn in dst_sources: + os.unlink(fn) + + # Import + __import__(module_name) + return sys.modules[module_name] + +# +# Unittest convenience +# + + +class F2PyTest: + code = None + sources = None + options = [] + skip = [] + only = [] + suffix = '.f' + module = None + module_name = None + + def setup(self): + if sys.platform == 'win32': + pytest.skip('Fails with MinGW64 Gfortran (Issue #9673)') + + if self.module is not None: + return + + # Check compiler availability first + if not has_c_compiler(): + pytest.skip("No C compiler available") + + codes = [] + if self.sources: + codes.extend(self.sources) + if self.code is not None: + codes.append(self.suffix) + + needs_f77 = False + needs_f90 = False + for fn in codes: + if fn.endswith('.f'): + needs_f77 = True + elif fn.endswith('.f90'): + needs_f90 = True + if needs_f77 and not has_f77_compiler(): + pytest.skip("No Fortran 77 compiler available") + if needs_f90 and not has_f90_compiler(): + pytest.skip("No Fortran 90 compiler available") + + # Build the module + if self.code is not None: + self.module = build_code(self.code, options=self.options, + skip=self.skip, only=self.only, + suffix=self.suffix, + module_name=self.module_name) + + if self.sources is not None: + self.module = build_module(self.sources, options=self.options, + skip=self.skip, only=self.only, + module_name=self.module_name) diff --git a/.local/lib/python3.8/site-packages/numpy/f2py/use_rules.py b/.local/lib/python3.8/site-packages/numpy/f2py/use_rules.py new file mode 100644 index 0000000..f1b71e8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/f2py/use_rules.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +""" + +Build 'use others module data' mechanism for f2py2e. + +Unfinished. + +Copyright 2000 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy License. + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +$Date: 2000/09/10 12:35:43 $ +Pearu Peterson + +""" +__version__ = "$Revision: 1.3 $"[10:-1] + +f2py_version = 'See `f2py -v`' + + +from .auxfuncs import ( + applyrules, dictappend, gentitle, hasnote, outmess +) + + +usemodule_rules = { + 'body': """ +#begintitle# +static char doc_#apiname#[] = \"\\\nVariable wrapper signature:\\n\\ +\t #name# = get_#name#()\\n\\ +Arguments:\\n\\ +#docstr#\"; +extern F_MODFUNC(#usemodulename#,#USEMODULENAME#,#realname#,#REALNAME#); +static PyObject *#apiname#(PyObject *capi_self, PyObject *capi_args) { +/*#decl#*/ +\tif (!PyArg_ParseTuple(capi_args, \"\")) goto capi_fail; +printf(\"c: %d\\n\",F_MODFUNC(#usemodulename#,#USEMODULENAME#,#realname#,#REALNAME#)); +\treturn Py_BuildValue(\"\"); +capi_fail: +\treturn NULL; +} +""", + 'method': '\t{\"get_#name#\",#apiname#,METH_VARARGS|METH_KEYWORDS,doc_#apiname#},', + 'need': ['F_MODFUNC'] +} + +################ + + +def buildusevars(m, r): + ret = {} + outmess( + '\t\tBuilding use variable hooks for module "%s" (feature only for F90/F95)...\n' % (m['name'])) + varsmap = {} + revmap = {} + if 'map' in r: + for k in r['map'].keys(): + if r['map'][k] in revmap: + outmess('\t\t\tVariable "%s<=%s" is already mapped by "%s". Skipping.\n' % ( + r['map'][k], k, revmap[r['map'][k]])) + else: + revmap[r['map'][k]] = k + if 'only' in r and r['only']: + for v in r['map'].keys(): + if r['map'][v] in m['vars']: + + if revmap[r['map'][v]] == v: + varsmap[v] = r['map'][v] + else: + outmess('\t\t\tIgnoring map "%s=>%s". See above.\n' % + (v, r['map'][v])) + else: + outmess( + '\t\t\tNo definition for variable "%s=>%s". Skipping.\n' % (v, r['map'][v])) + else: + for v in m['vars'].keys(): + if v in revmap: + varsmap[v] = revmap[v] + else: + varsmap[v] = v + for v in varsmap.keys(): + ret = dictappend(ret, buildusevar(v, varsmap[v], m['vars'], m['name'])) + return ret + + +def buildusevar(name, realname, vars, usemodulename): + outmess('\t\t\tConstructing wrapper function for variable "%s=>%s"...\n' % ( + name, realname)) + ret = {} + vrd = {'name': name, + 'realname': realname, + 'REALNAME': realname.upper(), + 'usemodulename': usemodulename, + 'USEMODULENAME': usemodulename.upper(), + 'texname': name.replace('_', '\\_'), + 'begintitle': gentitle('%s=>%s' % (name, realname)), + 'endtitle': gentitle('end of %s=>%s' % (name, realname)), + 'apiname': '#modulename#_use_%s_from_%s' % (realname, usemodulename) + } + nummap = {0: 'Ro', 1: 'Ri', 2: 'Rii', 3: 'Riii', 4: 'Riv', + 5: 'Rv', 6: 'Rvi', 7: 'Rvii', 8: 'Rviii', 9: 'Rix'} + vrd['texnamename'] = name + for i in nummap.keys(): + vrd['texnamename'] = vrd['texnamename'].replace(repr(i), nummap[i]) + if hasnote(vars[realname]): + vrd['note'] = vars[realname]['note'] + rd = dictappend({}, vrd) + + print(name, realname, vars[realname]) + ret = applyrules(usemodule_rules, rd) + return ret diff --git a/.local/lib/python3.8/site-packages/numpy/fft/__init__.py b/.local/lib/python3.8/site-packages/numpy/fft/__init__.py new file mode 100644 index 0000000..fd5e475 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/fft/__init__.py @@ -0,0 +1,212 @@ +""" +Discrete Fourier Transform (:mod:`numpy.fft`) +============================================= + +.. currentmodule:: numpy.fft + +The SciPy module `scipy.fft` is a more comprehensive superset +of ``numpy.fft``, which includes only a basic set of routines. + +Standard FFTs +------------- + +.. autosummary:: + :toctree: generated/ + + fft Discrete Fourier transform. + ifft Inverse discrete Fourier transform. + fft2 Discrete Fourier transform in two dimensions. + ifft2 Inverse discrete Fourier transform in two dimensions. + fftn Discrete Fourier transform in N-dimensions. + ifftn Inverse discrete Fourier transform in N dimensions. + +Real FFTs +--------- + +.. autosummary:: + :toctree: generated/ + + rfft Real discrete Fourier transform. + irfft Inverse real discrete Fourier transform. + rfft2 Real discrete Fourier transform in two dimensions. + irfft2 Inverse real discrete Fourier transform in two dimensions. + rfftn Real discrete Fourier transform in N dimensions. + irfftn Inverse real discrete Fourier transform in N dimensions. + +Hermitian FFTs +-------------- + +.. autosummary:: + :toctree: generated/ + + hfft Hermitian discrete Fourier transform. + ihfft Inverse Hermitian discrete Fourier transform. + +Helper routines +--------------- + +.. autosummary:: + :toctree: generated/ + + fftfreq Discrete Fourier Transform sample frequencies. + rfftfreq DFT sample frequencies (for usage with rfft, irfft). + fftshift Shift zero-frequency component to center of spectrum. + ifftshift Inverse of fftshift. + + +Background information +---------------------- + +Fourier analysis is fundamentally a method for expressing a function as a +sum of periodic components, and for recovering the function from those +components. When both the function and its Fourier transform are +replaced with discretized counterparts, it is called the discrete Fourier +transform (DFT). The DFT has become a mainstay of numerical computing in +part because of a very fast algorithm for computing it, called the Fast +Fourier Transform (FFT), which was known to Gauss (1805) and was brought +to light in its current form by Cooley and Tukey [CT]_. Press et al. [NR]_ +provide an accessible introduction to Fourier analysis and its +applications. + +Because the discrete Fourier transform separates its input into +components that contribute at discrete frequencies, it has a great number +of applications in digital signal processing, e.g., for filtering, and in +this context the discretized input to the transform is customarily +referred to as a *signal*, which exists in the *time domain*. The output +is called a *spectrum* or *transform* and exists in the *frequency +domain*. + +Implementation details +---------------------- + +There are many ways to define the DFT, varying in the sign of the +exponent, normalization, etc. In this implementation, the DFT is defined +as + +.. math:: + A_k = \\sum_{m=0}^{n-1} a_m \\exp\\left\\{-2\\pi i{mk \\over n}\\right\\} + \\qquad k = 0,\\ldots,n-1. + +The DFT is in general defined for complex inputs and outputs, and a +single-frequency component at linear frequency :math:`f` is +represented by a complex exponential +:math:`a_m = \\exp\\{2\\pi i\\,f m\\Delta t\\}`, where :math:`\\Delta t` +is the sampling interval. + +The values in the result follow so-called "standard" order: If ``A = +fft(a, n)``, then ``A[0]`` contains the zero-frequency term (the sum of +the signal), which is always purely real for real inputs. Then ``A[1:n/2]`` +contains the positive-frequency terms, and ``A[n/2+1:]`` contains the +negative-frequency terms, in order of decreasingly negative frequency. +For an even number of input points, ``A[n/2]`` represents both positive and +negative Nyquist frequency, and is also purely real for real input. For +an odd number of input points, ``A[(n-1)/2]`` contains the largest positive +frequency, while ``A[(n+1)/2]`` contains the largest negative frequency. +The routine ``np.fft.fftfreq(n)`` returns an array giving the frequencies +of corresponding elements in the output. The routine +``np.fft.fftshift(A)`` shifts transforms and their frequencies to put the +zero-frequency components in the middle, and ``np.fft.ifftshift(A)`` undoes +that shift. + +When the input `a` is a time-domain signal and ``A = fft(a)``, ``np.abs(A)`` +is its amplitude spectrum and ``np.abs(A)**2`` is its power spectrum. +The phase spectrum is obtained by ``np.angle(A)``. + +The inverse DFT is defined as + +.. math:: + a_m = \\frac{1}{n}\\sum_{k=0}^{n-1}A_k\\exp\\left\\{2\\pi i{mk\\over n}\\right\\} + \\qquad m = 0,\\ldots,n-1. + +It differs from the forward transform by the sign of the exponential +argument and the default normalization by :math:`1/n`. + +Type Promotion +-------------- + +`numpy.fft` promotes ``float32`` and ``complex64`` arrays to ``float64`` and +``complex128`` arrays respectively. For an FFT implementation that does not +promote input arrays, see `scipy.fftpack`. + +Normalization +------------- + +The argument ``norm`` indicates which direction of the pair of direct/inverse +transforms is scaled and with what normalization factor. +The default normalization (``"backward"``) has the direct (forward) transforms +unscaled and the inverse (backward) transforms scaled by :math:`1/n`. It is +possible to obtain unitary transforms by setting the keyword argument ``norm`` +to ``"ortho"`` so that both direct and inverse transforms are scaled by +:math:`1/\\sqrt{n}`. Finally, setting the keyword argument ``norm`` to +``"forward"`` has the direct transforms scaled by :math:`1/n` and the inverse +transforms unscaled (i.e. exactly opposite to the default ``"backward"``). +`None` is an alias of the default option ``"backward"`` for backward +compatibility. + +Real and Hermitian transforms +----------------------------- + +When the input is purely real, its transform is Hermitian, i.e., the +component at frequency :math:`f_k` is the complex conjugate of the +component at frequency :math:`-f_k`, which means that for real +inputs there is no information in the negative frequency components that +is not already available from the positive frequency components. +The family of `rfft` functions is +designed to operate on real inputs, and exploits this symmetry by +computing only the positive frequency components, up to and including the +Nyquist frequency. Thus, ``n`` input points produce ``n/2+1`` complex +output points. The inverses of this family assumes the same symmetry of +its input, and for an output of ``n`` points uses ``n/2+1`` input points. + +Correspondingly, when the spectrum is purely real, the signal is +Hermitian. The `hfft` family of functions exploits this symmetry by +using ``n/2+1`` complex points in the input (time) domain for ``n`` real +points in the frequency domain. + +In higher dimensions, FFTs are used, e.g., for image analysis and +filtering. The computational efficiency of the FFT means that it can +also be a faster way to compute large convolutions, using the property +that a convolution in the time domain is equivalent to a point-by-point +multiplication in the frequency domain. + +Higher dimensions +----------------- + +In two dimensions, the DFT is defined as + +.. math:: + A_{kl} = \\sum_{m=0}^{M-1} \\sum_{n=0}^{N-1} + a_{mn}\\exp\\left\\{-2\\pi i \\left({mk\\over M}+{nl\\over N}\\right)\\right\\} + \\qquad k = 0, \\ldots, M-1;\\quad l = 0, \\ldots, N-1, + +which extends in the obvious way to higher dimensions, and the inverses +in higher dimensions also extend in the same way. + +References +---------- + +.. [CT] Cooley, James W., and John W. Tukey, 1965, "An algorithm for the + machine calculation of complex Fourier series," *Math. Comput.* + 19: 297-301. + +.. [NR] Press, W., Teukolsky, S., Vetterline, W.T., and Flannery, B.P., + 2007, *Numerical Recipes: The Art of Scientific Computing*, ch. + 12-13. Cambridge Univ. Press, Cambridge, UK. + +Examples +-------- + +For examples, see the various functions. + +""" + +from . import _pocketfft, helper +from ._pocketfft import * +from .helper import * + +__all__ = _pocketfft.__all__.copy() +__all__ += helper.__all__ + +from numpy._pytesttester import PytestTester +test = PytestTester(__name__) +del PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/fft/__init__.pyi b/.local/lib/python3.8/site-packages/numpy/fft/__init__.pyi new file mode 100644 index 0000000..510e576 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/fft/__init__.pyi @@ -0,0 +1,31 @@ +from typing import Any, List + +from numpy._pytesttester import PytestTester + +from numpy.fft._pocketfft import ( + fft as fft, + ifft as ifft, + rfft as rfft, + irfft as irfft, + hfft as hfft, + ihfft as ihfft, + rfftn as rfftn, + irfftn as irfftn, + rfft2 as rfft2, + irfft2 as irfft2, + fft2 as fft2, + ifft2 as ifft2, + fftn as fftn, + ifftn as ifftn, +) + +from numpy.fft.helper import ( + fftshift as fftshift, + ifftshift as ifftshift, + fftfreq as fftfreq, + rfftfreq as rfftfreq, +) + +__all__: List[str] +__path__: List[str] +test: PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/fft/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/fft/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..5ffeb23 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/fft/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/fft/__pycache__/_pocketfft.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/fft/__pycache__/_pocketfft.cpython-38.pyc new file mode 100644 index 0000000..9d4d0ca Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/fft/__pycache__/_pocketfft.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/fft/__pycache__/helper.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/fft/__pycache__/helper.cpython-38.pyc new file mode 100644 index 0000000..dae9ff5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/fft/__pycache__/helper.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/fft/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/fft/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..f6d560d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/fft/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/fft/_pocketfft.py b/.local/lib/python3.8/site-packages/numpy/fft/_pocketfft.py new file mode 100644 index 0000000..ad69f7c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/fft/_pocketfft.py @@ -0,0 +1,1424 @@ +""" +Discrete Fourier Transforms + +Routines in this module: + +fft(a, n=None, axis=-1, norm="backward") +ifft(a, n=None, axis=-1, norm="backward") +rfft(a, n=None, axis=-1, norm="backward") +irfft(a, n=None, axis=-1, norm="backward") +hfft(a, n=None, axis=-1, norm="backward") +ihfft(a, n=None, axis=-1, norm="backward") +fftn(a, s=None, axes=None, norm="backward") +ifftn(a, s=None, axes=None, norm="backward") +rfftn(a, s=None, axes=None, norm="backward") +irfftn(a, s=None, axes=None, norm="backward") +fft2(a, s=None, axes=(-2,-1), norm="backward") +ifft2(a, s=None, axes=(-2, -1), norm="backward") +rfft2(a, s=None, axes=(-2,-1), norm="backward") +irfft2(a, s=None, axes=(-2, -1), norm="backward") + +i = inverse transform +r = transform of purely real data +h = Hermite transform +n = n-dimensional transform +2 = 2-dimensional transform +(Note: 2D routines are just nD routines with different default +behavior.) + +""" +__all__ = ['fft', 'ifft', 'rfft', 'irfft', 'hfft', 'ihfft', 'rfftn', + 'irfftn', 'rfft2', 'irfft2', 'fft2', 'ifft2', 'fftn', 'ifftn'] + +import functools + +from numpy.core import asarray, zeros, swapaxes, conjugate, take, sqrt +from . import _pocketfft_internal as pfi +from numpy.core.multiarray import normalize_axis_index +from numpy.core import overrides + + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy.fft') + + +# `inv_norm` is a float by which the result of the transform needs to be +# divided. This replaces the original, more intuitive 'fct` parameter to avoid +# divisions by zero (or alternatively additional checks) in the case of +# zero-length axes during its computation. +def _raw_fft(a, n, axis, is_real, is_forward, inv_norm): + axis = normalize_axis_index(axis, a.ndim) + if n is None: + n = a.shape[axis] + + fct = 1/inv_norm + + if a.shape[axis] != n: + s = list(a.shape) + index = [slice(None)]*len(s) + if s[axis] > n: + index[axis] = slice(0, n) + a = a[tuple(index)] + else: + index[axis] = slice(0, s[axis]) + s[axis] = n + z = zeros(s, a.dtype.char) + z[tuple(index)] = a + a = z + + if axis == a.ndim-1: + r = pfi.execute(a, is_real, is_forward, fct) + else: + a = swapaxes(a, axis, -1) + r = pfi.execute(a, is_real, is_forward, fct) + r = swapaxes(r, axis, -1) + return r + + +def _get_forward_norm(n, norm): + if n < 1: + raise ValueError(f"Invalid number of FFT data points ({n}) specified.") + + if norm is None or norm == "backward": + return 1 + elif norm == "ortho": + return sqrt(n) + elif norm == "forward": + return n + raise ValueError(f'Invalid norm value {norm}; should be "backward",' + '"ortho" or "forward".') + + +def _get_backward_norm(n, norm): + if n < 1: + raise ValueError(f"Invalid number of FFT data points ({n}) specified.") + + if norm is None or norm == "backward": + return n + elif norm == "ortho": + return sqrt(n) + elif norm == "forward": + return 1 + raise ValueError(f'Invalid norm value {norm}; should be "backward", ' + '"ortho" or "forward".') + + +_SWAP_DIRECTION_MAP = {"backward": "forward", None: "forward", + "ortho": "ortho", "forward": "backward"} + + +def _swap_direction(norm): + try: + return _SWAP_DIRECTION_MAP[norm] + except KeyError: + raise ValueError(f'Invalid norm value {norm}; should be "backward", ' + '"ortho" or "forward".') from None + + +def _fft_dispatcher(a, n=None, axis=None, norm=None): + return (a,) + + +@array_function_dispatch(_fft_dispatcher) +def fft(a, n=None, axis=-1, norm=None): + """ + Compute the one-dimensional discrete Fourier Transform. + + This function computes the one-dimensional *n*-point discrete Fourier + Transform (DFT) with the efficient Fast Fourier Transform (FFT) + algorithm [CT]. + + Parameters + ---------- + a : array_like + Input array, can be complex. + n : int, optional + Length of the transformed axis of the output. + If `n` is smaller than the length of the input, the input is cropped. + If it is larger, the input is padded with zeros. If `n` is not given, + the length of the input along the axis specified by `axis` is used. + axis : int, optional + Axis over which to compute the FFT. If not given, the last axis is + used. + norm : {"backward", "ortho", "forward"}, optional + .. versionadded:: 1.10.0 + + Normalization mode (see `numpy.fft`). Default is "backward". + Indicates which direction of the forward/backward pair of transforms + is scaled and with what normalization factor. + + .. versionadded:: 1.20.0 + + The "backward", "forward" values were added. + + Returns + ------- + out : complex ndarray + The truncated or zero-padded input, transformed along the axis + indicated by `axis`, or the last one if `axis` is not specified. + + Raises + ------ + IndexError + If `axis` is not a valid axis of `a`. + + See Also + -------- + numpy.fft : for definition of the DFT and conventions used. + ifft : The inverse of `fft`. + fft2 : The two-dimensional FFT. + fftn : The *n*-dimensional FFT. + rfftn : The *n*-dimensional FFT of real input. + fftfreq : Frequency bins for given FFT parameters. + + Notes + ----- + FFT (Fast Fourier Transform) refers to a way the discrete Fourier + Transform (DFT) can be calculated efficiently, by using symmetries in the + calculated terms. The symmetry is highest when `n` is a power of 2, and + the transform is therefore most efficient for these sizes. + + The DFT is defined, with the conventions used in this implementation, in + the documentation for the `numpy.fft` module. + + References + ---------- + .. [CT] Cooley, James W., and John W. Tukey, 1965, "An algorithm for the + machine calculation of complex Fourier series," *Math. Comput.* + 19: 297-301. + + Examples + -------- + >>> np.fft.fft(np.exp(2j * np.pi * np.arange(8) / 8)) + array([-2.33486982e-16+1.14423775e-17j, 8.00000000e+00-1.25557246e-15j, + 2.33486982e-16+2.33486982e-16j, 0.00000000e+00+1.22464680e-16j, + -1.14423775e-17+2.33486982e-16j, 0.00000000e+00+5.20784380e-16j, + 1.14423775e-17+1.14423775e-17j, 0.00000000e+00+1.22464680e-16j]) + + In this example, real input has an FFT which is Hermitian, i.e., symmetric + in the real part and anti-symmetric in the imaginary part, as described in + the `numpy.fft` documentation: + + >>> import matplotlib.pyplot as plt + >>> t = np.arange(256) + >>> sp = np.fft.fft(np.sin(t)) + >>> freq = np.fft.fftfreq(t.shape[-1]) + >>> plt.plot(freq, sp.real, freq, sp.imag) + [, ] + >>> plt.show() + + """ + a = asarray(a) + if n is None: + n = a.shape[axis] + inv_norm = _get_forward_norm(n, norm) + output = _raw_fft(a, n, axis, False, True, inv_norm) + return output + + +@array_function_dispatch(_fft_dispatcher) +def ifft(a, n=None, axis=-1, norm=None): + """ + Compute the one-dimensional inverse discrete Fourier Transform. + + This function computes the inverse of the one-dimensional *n*-point + discrete Fourier transform computed by `fft`. In other words, + ``ifft(fft(a)) == a`` to within numerical accuracy. + For a general description of the algorithm and definitions, + see `numpy.fft`. + + The input should be ordered in the same way as is returned by `fft`, + i.e., + + * ``a[0]`` should contain the zero frequency term, + * ``a[1:n//2]`` should contain the positive-frequency terms, + * ``a[n//2 + 1:]`` should contain the negative-frequency terms, in + increasing order starting from the most negative frequency. + + For an even number of input points, ``A[n//2]`` represents the sum of + the values at the positive and negative Nyquist frequencies, as the two + are aliased together. See `numpy.fft` for details. + + Parameters + ---------- + a : array_like + Input array, can be complex. + n : int, optional + Length of the transformed axis of the output. + If `n` is smaller than the length of the input, the input is cropped. + If it is larger, the input is padded with zeros. If `n` is not given, + the length of the input along the axis specified by `axis` is used. + See notes about padding issues. + axis : int, optional + Axis over which to compute the inverse DFT. If not given, the last + axis is used. + norm : {"backward", "ortho", "forward"}, optional + .. versionadded:: 1.10.0 + + Normalization mode (see `numpy.fft`). Default is "backward". + Indicates which direction of the forward/backward pair of transforms + is scaled and with what normalization factor. + + .. versionadded:: 1.20.0 + + The "backward", "forward" values were added. + + Returns + ------- + out : complex ndarray + The truncated or zero-padded input, transformed along the axis + indicated by `axis`, or the last one if `axis` is not specified. + + Raises + ------ + IndexError + If `axis` is not a valid axis of `a`. + + See Also + -------- + numpy.fft : An introduction, with definitions and general explanations. + fft : The one-dimensional (forward) FFT, of which `ifft` is the inverse + ifft2 : The two-dimensional inverse FFT. + ifftn : The n-dimensional inverse FFT. + + Notes + ----- + If the input parameter `n` is larger than the size of the input, the input + is padded by appending zeros at the end. Even though this is the common + approach, it might lead to surprising results. If a different padding is + desired, it must be performed before calling `ifft`. + + Examples + -------- + >>> np.fft.ifft([0, 4, 0, 0]) + array([ 1.+0.j, 0.+1.j, -1.+0.j, 0.-1.j]) # may vary + + Create and plot a band-limited signal with random phases: + + >>> import matplotlib.pyplot as plt + >>> t = np.arange(400) + >>> n = np.zeros((400,), dtype=complex) + >>> n[40:60] = np.exp(1j*np.random.uniform(0, 2*np.pi, (20,))) + >>> s = np.fft.ifft(n) + >>> plt.plot(t, s.real, label='real') + [] + >>> plt.plot(t, s.imag, '--', label='imaginary') + [] + >>> plt.legend() + + >>> plt.show() + + """ + a = asarray(a) + if n is None: + n = a.shape[axis] + inv_norm = _get_backward_norm(n, norm) + output = _raw_fft(a, n, axis, False, False, inv_norm) + return output + + +@array_function_dispatch(_fft_dispatcher) +def rfft(a, n=None, axis=-1, norm=None): + """ + Compute the one-dimensional discrete Fourier Transform for real input. + + This function computes the one-dimensional *n*-point discrete Fourier + Transform (DFT) of a real-valued array by means of an efficient algorithm + called the Fast Fourier Transform (FFT). + + Parameters + ---------- + a : array_like + Input array + n : int, optional + Number of points along transformation axis in the input to use. + If `n` is smaller than the length of the input, the input is cropped. + If it is larger, the input is padded with zeros. If `n` is not given, + the length of the input along the axis specified by `axis` is used. + axis : int, optional + Axis over which to compute the FFT. If not given, the last axis is + used. + norm : {"backward", "ortho", "forward"}, optional + .. versionadded:: 1.10.0 + + Normalization mode (see `numpy.fft`). Default is "backward". + Indicates which direction of the forward/backward pair of transforms + is scaled and with what normalization factor. + + .. versionadded:: 1.20.0 + + The "backward", "forward" values were added. + + Returns + ------- + out : complex ndarray + The truncated or zero-padded input, transformed along the axis + indicated by `axis`, or the last one if `axis` is not specified. + If `n` is even, the length of the transformed axis is ``(n/2)+1``. + If `n` is odd, the length is ``(n+1)/2``. + + Raises + ------ + IndexError + If `axis` is not a valid axis of `a`. + + See Also + -------- + numpy.fft : For definition of the DFT and conventions used. + irfft : The inverse of `rfft`. + fft : The one-dimensional FFT of general (complex) input. + fftn : The *n*-dimensional FFT. + rfftn : The *n*-dimensional FFT of real input. + + Notes + ----- + When the DFT is computed for purely real input, the output is + Hermitian-symmetric, i.e. the negative frequency terms are just the complex + conjugates of the corresponding positive-frequency terms, and the + negative-frequency terms are therefore redundant. This function does not + compute the negative frequency terms, and the length of the transformed + axis of the output is therefore ``n//2 + 1``. + + When ``A = rfft(a)`` and fs is the sampling frequency, ``A[0]`` contains + the zero-frequency term 0*fs, which is real due to Hermitian symmetry. + + If `n` is even, ``A[-1]`` contains the term representing both positive + and negative Nyquist frequency (+fs/2 and -fs/2), and must also be purely + real. If `n` is odd, there is no term at fs/2; ``A[-1]`` contains + the largest positive frequency (fs/2*(n-1)/n), and is complex in the + general case. + + If the input `a` contains an imaginary part, it is silently discarded. + + Examples + -------- + >>> np.fft.fft([0, 1, 0, 0]) + array([ 1.+0.j, 0.-1.j, -1.+0.j, 0.+1.j]) # may vary + >>> np.fft.rfft([0, 1, 0, 0]) + array([ 1.+0.j, 0.-1.j, -1.+0.j]) # may vary + + Notice how the final element of the `fft` output is the complex conjugate + of the second element, for real input. For `rfft`, this symmetry is + exploited to compute only the non-negative frequency terms. + + """ + a = asarray(a) + if n is None: + n = a.shape[axis] + inv_norm = _get_forward_norm(n, norm) + output = _raw_fft(a, n, axis, True, True, inv_norm) + return output + + +@array_function_dispatch(_fft_dispatcher) +def irfft(a, n=None, axis=-1, norm=None): + """ + Computes the inverse of `rfft`. + + This function computes the inverse of the one-dimensional *n*-point + discrete Fourier Transform of real input computed by `rfft`. + In other words, ``irfft(rfft(a), len(a)) == a`` to within numerical + accuracy. (See Notes below for why ``len(a)`` is necessary here.) + + The input is expected to be in the form returned by `rfft`, i.e. the + real zero-frequency term followed by the complex positive frequency terms + in order of increasing frequency. Since the discrete Fourier Transform of + real input is Hermitian-symmetric, the negative frequency terms are taken + to be the complex conjugates of the corresponding positive frequency terms. + + Parameters + ---------- + a : array_like + The input array. + n : int, optional + Length of the transformed axis of the output. + For `n` output points, ``n//2+1`` input points are necessary. If the + input is longer than this, it is cropped. If it is shorter than this, + it is padded with zeros. If `n` is not given, it is taken to be + ``2*(m-1)`` where ``m`` is the length of the input along the axis + specified by `axis`. + axis : int, optional + Axis over which to compute the inverse FFT. If not given, the last + axis is used. + norm : {"backward", "ortho", "forward"}, optional + .. versionadded:: 1.10.0 + + Normalization mode (see `numpy.fft`). Default is "backward". + Indicates which direction of the forward/backward pair of transforms + is scaled and with what normalization factor. + + .. versionadded:: 1.20.0 + + The "backward", "forward" values were added. + + Returns + ------- + out : ndarray + The truncated or zero-padded input, transformed along the axis + indicated by `axis`, or the last one if `axis` is not specified. + The length of the transformed axis is `n`, or, if `n` is not given, + ``2*(m-1)`` where ``m`` is the length of the transformed axis of the + input. To get an odd number of output points, `n` must be specified. + + Raises + ------ + IndexError + If `axis` is not a valid axis of `a`. + + See Also + -------- + numpy.fft : For definition of the DFT and conventions used. + rfft : The one-dimensional FFT of real input, of which `irfft` is inverse. + fft : The one-dimensional FFT. + irfft2 : The inverse of the two-dimensional FFT of real input. + irfftn : The inverse of the *n*-dimensional FFT of real input. + + Notes + ----- + Returns the real valued `n`-point inverse discrete Fourier transform + of `a`, where `a` contains the non-negative frequency terms of a + Hermitian-symmetric sequence. `n` is the length of the result, not the + input. + + If you specify an `n` such that `a` must be zero-padded or truncated, the + extra/removed values will be added/removed at high frequencies. One can + thus resample a series to `m` points via Fourier interpolation by: + ``a_resamp = irfft(rfft(a), m)``. + + The correct interpretation of the hermitian input depends on the length of + the original data, as given by `n`. This is because each input shape could + correspond to either an odd or even length signal. By default, `irfft` + assumes an even output length which puts the last entry at the Nyquist + frequency; aliasing with its symmetric counterpart. By Hermitian symmetry, + the value is thus treated as purely real. To avoid losing information, the + correct length of the real input **must** be given. + + Examples + -------- + >>> np.fft.ifft([1, -1j, -1, 1j]) + array([0.+0.j, 1.+0.j, 0.+0.j, 0.+0.j]) # may vary + >>> np.fft.irfft([1, -1j, -1]) + array([0., 1., 0., 0.]) + + Notice how the last term in the input to the ordinary `ifft` is the + complex conjugate of the second term, and the output has zero imaginary + part everywhere. When calling `irfft`, the negative frequencies are not + specified, and the output array is purely real. + + """ + a = asarray(a) + if n is None: + n = (a.shape[axis] - 1) * 2 + inv_norm = _get_backward_norm(n, norm) + output = _raw_fft(a, n, axis, True, False, inv_norm) + return output + + +@array_function_dispatch(_fft_dispatcher) +def hfft(a, n=None, axis=-1, norm=None): + """ + Compute the FFT of a signal that has Hermitian symmetry, i.e., a real + spectrum. + + Parameters + ---------- + a : array_like + The input array. + n : int, optional + Length of the transformed axis of the output. For `n` output + points, ``n//2 + 1`` input points are necessary. If the input is + longer than this, it is cropped. If it is shorter than this, it is + padded with zeros. If `n` is not given, it is taken to be ``2*(m-1)`` + where ``m`` is the length of the input along the axis specified by + `axis`. + axis : int, optional + Axis over which to compute the FFT. If not given, the last + axis is used. + norm : {"backward", "ortho", "forward"}, optional + .. versionadded:: 1.10.0 + + Normalization mode (see `numpy.fft`). Default is "backward". + Indicates which direction of the forward/backward pair of transforms + is scaled and with what normalization factor. + + .. versionadded:: 1.20.0 + + The "backward", "forward" values were added. + + Returns + ------- + out : ndarray + The truncated or zero-padded input, transformed along the axis + indicated by `axis`, or the last one if `axis` is not specified. + The length of the transformed axis is `n`, or, if `n` is not given, + ``2*m - 2`` where ``m`` is the length of the transformed axis of + the input. To get an odd number of output points, `n` must be + specified, for instance as ``2*m - 1`` in the typical case, + + Raises + ------ + IndexError + If `axis` is not a valid axis of `a`. + + See also + -------- + rfft : Compute the one-dimensional FFT for real input. + ihfft : The inverse of `hfft`. + + Notes + ----- + `hfft`/`ihfft` are a pair analogous to `rfft`/`irfft`, but for the + opposite case: here the signal has Hermitian symmetry in the time + domain and is real in the frequency domain. So here it's `hfft` for + which you must supply the length of the result if it is to be odd. + + * even: ``ihfft(hfft(a, 2*len(a) - 2)) == a``, within roundoff error, + * odd: ``ihfft(hfft(a, 2*len(a) - 1)) == a``, within roundoff error. + + The correct interpretation of the hermitian input depends on the length of + the original data, as given by `n`. This is because each input shape could + correspond to either an odd or even length signal. By default, `hfft` + assumes an even output length which puts the last entry at the Nyquist + frequency; aliasing with its symmetric counterpart. By Hermitian symmetry, + the value is thus treated as purely real. To avoid losing information, the + shape of the full signal **must** be given. + + Examples + -------- + >>> signal = np.array([1, 2, 3, 4, 3, 2]) + >>> np.fft.fft(signal) + array([15.+0.j, -4.+0.j, 0.+0.j, -1.-0.j, 0.+0.j, -4.+0.j]) # may vary + >>> np.fft.hfft(signal[:4]) # Input first half of signal + array([15., -4., 0., -1., 0., -4.]) + >>> np.fft.hfft(signal, 6) # Input entire signal and truncate + array([15., -4., 0., -1., 0., -4.]) + + + >>> signal = np.array([[1, 1.j], [-1.j, 2]]) + >>> np.conj(signal.T) - signal # check Hermitian symmetry + array([[ 0.-0.j, -0.+0.j], # may vary + [ 0.+0.j, 0.-0.j]]) + >>> freq_spectrum = np.fft.hfft(signal) + >>> freq_spectrum + array([[ 1., 1.], + [ 2., -2.]]) + + """ + a = asarray(a) + if n is None: + n = (a.shape[axis] - 1) * 2 + new_norm = _swap_direction(norm) + output = irfft(conjugate(a), n, axis, norm=new_norm) + return output + + +@array_function_dispatch(_fft_dispatcher) +def ihfft(a, n=None, axis=-1, norm=None): + """ + Compute the inverse FFT of a signal that has Hermitian symmetry. + + Parameters + ---------- + a : array_like + Input array. + n : int, optional + Length of the inverse FFT, the number of points along + transformation axis in the input to use. If `n` is smaller than + the length of the input, the input is cropped. If it is larger, + the input is padded with zeros. If `n` is not given, the length of + the input along the axis specified by `axis` is used. + axis : int, optional + Axis over which to compute the inverse FFT. If not given, the last + axis is used. + norm : {"backward", "ortho", "forward"}, optional + .. versionadded:: 1.10.0 + + Normalization mode (see `numpy.fft`). Default is "backward". + Indicates which direction of the forward/backward pair of transforms + is scaled and with what normalization factor. + + .. versionadded:: 1.20.0 + + The "backward", "forward" values were added. + + Returns + ------- + out : complex ndarray + The truncated or zero-padded input, transformed along the axis + indicated by `axis`, or the last one if `axis` is not specified. + The length of the transformed axis is ``n//2 + 1``. + + See also + -------- + hfft, irfft + + Notes + ----- + `hfft`/`ihfft` are a pair analogous to `rfft`/`irfft`, but for the + opposite case: here the signal has Hermitian symmetry in the time + domain and is real in the frequency domain. So here it's `hfft` for + which you must supply the length of the result if it is to be odd: + + * even: ``ihfft(hfft(a, 2*len(a) - 2)) == a``, within roundoff error, + * odd: ``ihfft(hfft(a, 2*len(a) - 1)) == a``, within roundoff error. + + Examples + -------- + >>> spectrum = np.array([ 15, -4, 0, -1, 0, -4]) + >>> np.fft.ifft(spectrum) + array([1.+0.j, 2.+0.j, 3.+0.j, 4.+0.j, 3.+0.j, 2.+0.j]) # may vary + >>> np.fft.ihfft(spectrum) + array([ 1.-0.j, 2.-0.j, 3.-0.j, 4.-0.j]) # may vary + + """ + a = asarray(a) + if n is None: + n = a.shape[axis] + new_norm = _swap_direction(norm) + output = conjugate(rfft(a, n, axis, norm=new_norm)) + return output + + +def _cook_nd_args(a, s=None, axes=None, invreal=0): + if s is None: + shapeless = 1 + if axes is None: + s = list(a.shape) + else: + s = take(a.shape, axes) + else: + shapeless = 0 + s = list(s) + if axes is None: + axes = list(range(-len(s), 0)) + if len(s) != len(axes): + raise ValueError("Shape and axes have different lengths.") + if invreal and shapeless: + s[-1] = (a.shape[axes[-1]] - 1) * 2 + return s, axes + + +def _raw_fftnd(a, s=None, axes=None, function=fft, norm=None): + a = asarray(a) + s, axes = _cook_nd_args(a, s, axes) + itl = list(range(len(axes))) + itl.reverse() + for ii in itl: + a = function(a, n=s[ii], axis=axes[ii], norm=norm) + return a + + +def _fftn_dispatcher(a, s=None, axes=None, norm=None): + return (a,) + + +@array_function_dispatch(_fftn_dispatcher) +def fftn(a, s=None, axes=None, norm=None): + """ + Compute the N-dimensional discrete Fourier Transform. + + This function computes the *N*-dimensional discrete Fourier Transform over + any number of axes in an *M*-dimensional array by means of the Fast Fourier + Transform (FFT). + + Parameters + ---------- + a : array_like + Input array, can be complex. + s : sequence of ints, optional + Shape (length of each transformed axis) of the output + (``s[0]`` refers to axis 0, ``s[1]`` to axis 1, etc.). + This corresponds to ``n`` for ``fft(x, n)``. + Along any axis, if the given shape is smaller than that of the input, + the input is cropped. If it is larger, the input is padded with zeros. + if `s` is not given, the shape of the input along the axes specified + by `axes` is used. + axes : sequence of ints, optional + Axes over which to compute the FFT. If not given, the last ``len(s)`` + axes are used, or all axes if `s` is also not specified. + Repeated indices in `axes` means that the transform over that axis is + performed multiple times. + norm : {"backward", "ortho", "forward"}, optional + .. versionadded:: 1.10.0 + + Normalization mode (see `numpy.fft`). Default is "backward". + Indicates which direction of the forward/backward pair of transforms + is scaled and with what normalization factor. + + .. versionadded:: 1.20.0 + + The "backward", "forward" values were added. + + Returns + ------- + out : complex ndarray + The truncated or zero-padded input, transformed along the axes + indicated by `axes`, or by a combination of `s` and `a`, + as explained in the parameters section above. + + Raises + ------ + ValueError + If `s` and `axes` have different length. + IndexError + If an element of `axes` is larger than than the number of axes of `a`. + + See Also + -------- + numpy.fft : Overall view of discrete Fourier transforms, with definitions + and conventions used. + ifftn : The inverse of `fftn`, the inverse *n*-dimensional FFT. + fft : The one-dimensional FFT, with definitions and conventions used. + rfftn : The *n*-dimensional FFT of real input. + fft2 : The two-dimensional FFT. + fftshift : Shifts zero-frequency terms to centre of array + + Notes + ----- + The output, analogously to `fft`, contains the term for zero frequency in + the low-order corner of all axes, the positive frequency terms in the + first half of all axes, the term for the Nyquist frequency in the middle + of all axes and the negative frequency terms in the second half of all + axes, in order of decreasingly negative frequency. + + See `numpy.fft` for details, definitions and conventions used. + + Examples + -------- + >>> a = np.mgrid[:3, :3, :3][0] + >>> np.fft.fftn(a, axes=(1, 2)) + array([[[ 0.+0.j, 0.+0.j, 0.+0.j], # may vary + [ 0.+0.j, 0.+0.j, 0.+0.j], + [ 0.+0.j, 0.+0.j, 0.+0.j]], + [[ 9.+0.j, 0.+0.j, 0.+0.j], + [ 0.+0.j, 0.+0.j, 0.+0.j], + [ 0.+0.j, 0.+0.j, 0.+0.j]], + [[18.+0.j, 0.+0.j, 0.+0.j], + [ 0.+0.j, 0.+0.j, 0.+0.j], + [ 0.+0.j, 0.+0.j, 0.+0.j]]]) + >>> np.fft.fftn(a, (2, 2), axes=(0, 1)) + array([[[ 2.+0.j, 2.+0.j, 2.+0.j], # may vary + [ 0.+0.j, 0.+0.j, 0.+0.j]], + [[-2.+0.j, -2.+0.j, -2.+0.j], + [ 0.+0.j, 0.+0.j, 0.+0.j]]]) + + >>> import matplotlib.pyplot as plt + >>> [X, Y] = np.meshgrid(2 * np.pi * np.arange(200) / 12, + ... 2 * np.pi * np.arange(200) / 34) + >>> S = np.sin(X) + np.cos(Y) + np.random.uniform(0, 1, X.shape) + >>> FS = np.fft.fftn(S) + >>> plt.imshow(np.log(np.abs(np.fft.fftshift(FS))**2)) + + >>> plt.show() + + """ + return _raw_fftnd(a, s, axes, fft, norm) + + +@array_function_dispatch(_fftn_dispatcher) +def ifftn(a, s=None, axes=None, norm=None): + """ + Compute the N-dimensional inverse discrete Fourier Transform. + + This function computes the inverse of the N-dimensional discrete + Fourier Transform over any number of axes in an M-dimensional array by + means of the Fast Fourier Transform (FFT). In other words, + ``ifftn(fftn(a)) == a`` to within numerical accuracy. + For a description of the definitions and conventions used, see `numpy.fft`. + + The input, analogously to `ifft`, should be ordered in the same way as is + returned by `fftn`, i.e. it should have the term for zero frequency + in all axes in the low-order corner, the positive frequency terms in the + first half of all axes, the term for the Nyquist frequency in the middle + of all axes and the negative frequency terms in the second half of all + axes, in order of decreasingly negative frequency. + + Parameters + ---------- + a : array_like + Input array, can be complex. + s : sequence of ints, optional + Shape (length of each transformed axis) of the output + (``s[0]`` refers to axis 0, ``s[1]`` to axis 1, etc.). + This corresponds to ``n`` for ``ifft(x, n)``. + Along any axis, if the given shape is smaller than that of the input, + the input is cropped. If it is larger, the input is padded with zeros. + if `s` is not given, the shape of the input along the axes specified + by `axes` is used. See notes for issue on `ifft` zero padding. + axes : sequence of ints, optional + Axes over which to compute the IFFT. If not given, the last ``len(s)`` + axes are used, or all axes if `s` is also not specified. + Repeated indices in `axes` means that the inverse transform over that + axis is performed multiple times. + norm : {"backward", "ortho", "forward"}, optional + .. versionadded:: 1.10.0 + + Normalization mode (see `numpy.fft`). Default is "backward". + Indicates which direction of the forward/backward pair of transforms + is scaled and with what normalization factor. + + .. versionadded:: 1.20.0 + + The "backward", "forward" values were added. + + Returns + ------- + out : complex ndarray + The truncated or zero-padded input, transformed along the axes + indicated by `axes`, or by a combination of `s` or `a`, + as explained in the parameters section above. + + Raises + ------ + ValueError + If `s` and `axes` have different length. + IndexError + If an element of `axes` is larger than than the number of axes of `a`. + + See Also + -------- + numpy.fft : Overall view of discrete Fourier transforms, with definitions + and conventions used. + fftn : The forward *n*-dimensional FFT, of which `ifftn` is the inverse. + ifft : The one-dimensional inverse FFT. + ifft2 : The two-dimensional inverse FFT. + ifftshift : Undoes `fftshift`, shifts zero-frequency terms to beginning + of array. + + Notes + ----- + See `numpy.fft` for definitions and conventions used. + + Zero-padding, analogously with `ifft`, is performed by appending zeros to + the input along the specified dimension. Although this is the common + approach, it might lead to surprising results. If another form of zero + padding is desired, it must be performed before `ifftn` is called. + + Examples + -------- + >>> a = np.eye(4) + >>> np.fft.ifftn(np.fft.fftn(a, axes=(0,)), axes=(1,)) + array([[1.+0.j, 0.+0.j, 0.+0.j, 0.+0.j], # may vary + [0.+0.j, 1.+0.j, 0.+0.j, 0.+0.j], + [0.+0.j, 0.+0.j, 1.+0.j, 0.+0.j], + [0.+0.j, 0.+0.j, 0.+0.j, 1.+0.j]]) + + + Create and plot an image with band-limited frequency content: + + >>> import matplotlib.pyplot as plt + >>> n = np.zeros((200,200), dtype=complex) + >>> n[60:80, 20:40] = np.exp(1j*np.random.uniform(0, 2*np.pi, (20, 20))) + >>> im = np.fft.ifftn(n).real + >>> plt.imshow(im) + + >>> plt.show() + + """ + return _raw_fftnd(a, s, axes, ifft, norm) + + +@array_function_dispatch(_fftn_dispatcher) +def fft2(a, s=None, axes=(-2, -1), norm=None): + """ + Compute the 2-dimensional discrete Fourier Transform. + + This function computes the *n*-dimensional discrete Fourier Transform + over any axes in an *M*-dimensional array by means of the + Fast Fourier Transform (FFT). By default, the transform is computed over + the last two axes of the input array, i.e., a 2-dimensional FFT. + + Parameters + ---------- + a : array_like + Input array, can be complex + s : sequence of ints, optional + Shape (length of each transformed axis) of the output + (``s[0]`` refers to axis 0, ``s[1]`` to axis 1, etc.). + This corresponds to ``n`` for ``fft(x, n)``. + Along each axis, if the given shape is smaller than that of the input, + the input is cropped. If it is larger, the input is padded with zeros. + if `s` is not given, the shape of the input along the axes specified + by `axes` is used. + axes : sequence of ints, optional + Axes over which to compute the FFT. If not given, the last two + axes are used. A repeated index in `axes` means the transform over + that axis is performed multiple times. A one-element sequence means + that a one-dimensional FFT is performed. + norm : {"backward", "ortho", "forward"}, optional + .. versionadded:: 1.10.0 + + Normalization mode (see `numpy.fft`). Default is "backward". + Indicates which direction of the forward/backward pair of transforms + is scaled and with what normalization factor. + + .. versionadded:: 1.20.0 + + The "backward", "forward" values were added. + + Returns + ------- + out : complex ndarray + The truncated or zero-padded input, transformed along the axes + indicated by `axes`, or the last two axes if `axes` is not given. + + Raises + ------ + ValueError + If `s` and `axes` have different length, or `axes` not given and + ``len(s) != 2``. + IndexError + If an element of `axes` is larger than than the number of axes of `a`. + + See Also + -------- + numpy.fft : Overall view of discrete Fourier transforms, with definitions + and conventions used. + ifft2 : The inverse two-dimensional FFT. + fft : The one-dimensional FFT. + fftn : The *n*-dimensional FFT. + fftshift : Shifts zero-frequency terms to the center of the array. + For two-dimensional input, swaps first and third quadrants, and second + and fourth quadrants. + + Notes + ----- + `fft2` is just `fftn` with a different default for `axes`. + + The output, analogously to `fft`, contains the term for zero frequency in + the low-order corner of the transformed axes, the positive frequency terms + in the first half of these axes, the term for the Nyquist frequency in the + middle of the axes and the negative frequency terms in the second half of + the axes, in order of decreasingly negative frequency. + + See `fftn` for details and a plotting example, and `numpy.fft` for + definitions and conventions used. + + + Examples + -------- + >>> a = np.mgrid[:5, :5][0] + >>> np.fft.fft2(a) + array([[ 50. +0.j , 0. +0.j , 0. +0.j , # may vary + 0. +0.j , 0. +0.j ], + [-12.5+17.20477401j, 0. +0.j , 0. +0.j , + 0. +0.j , 0. +0.j ], + [-12.5 +4.0614962j , 0. +0.j , 0. +0.j , + 0. +0.j , 0. +0.j ], + [-12.5 -4.0614962j , 0. +0.j , 0. +0.j , + 0. +0.j , 0. +0.j ], + [-12.5-17.20477401j, 0. +0.j , 0. +0.j , + 0. +0.j , 0. +0.j ]]) + + """ + return _raw_fftnd(a, s, axes, fft, norm) + + +@array_function_dispatch(_fftn_dispatcher) +def ifft2(a, s=None, axes=(-2, -1), norm=None): + """ + Compute the 2-dimensional inverse discrete Fourier Transform. + + This function computes the inverse of the 2-dimensional discrete Fourier + Transform over any number of axes in an M-dimensional array by means of + the Fast Fourier Transform (FFT). In other words, ``ifft2(fft2(a)) == a`` + to within numerical accuracy. By default, the inverse transform is + computed over the last two axes of the input array. + + The input, analogously to `ifft`, should be ordered in the same way as is + returned by `fft2`, i.e. it should have the term for zero frequency + in the low-order corner of the two axes, the positive frequency terms in + the first half of these axes, the term for the Nyquist frequency in the + middle of the axes and the negative frequency terms in the second half of + both axes, in order of decreasingly negative frequency. + + Parameters + ---------- + a : array_like + Input array, can be complex. + s : sequence of ints, optional + Shape (length of each axis) of the output (``s[0]`` refers to axis 0, + ``s[1]`` to axis 1, etc.). This corresponds to `n` for ``ifft(x, n)``. + Along each axis, if the given shape is smaller than that of the input, + the input is cropped. If it is larger, the input is padded with zeros. + if `s` is not given, the shape of the input along the axes specified + by `axes` is used. See notes for issue on `ifft` zero padding. + axes : sequence of ints, optional + Axes over which to compute the FFT. If not given, the last two + axes are used. A repeated index in `axes` means the transform over + that axis is performed multiple times. A one-element sequence means + that a one-dimensional FFT is performed. + norm : {"backward", "ortho", "forward"}, optional + .. versionadded:: 1.10.0 + + Normalization mode (see `numpy.fft`). Default is "backward". + Indicates which direction of the forward/backward pair of transforms + is scaled and with what normalization factor. + + .. versionadded:: 1.20.0 + + The "backward", "forward" values were added. + + Returns + ------- + out : complex ndarray + The truncated or zero-padded input, transformed along the axes + indicated by `axes`, or the last two axes if `axes` is not given. + + Raises + ------ + ValueError + If `s` and `axes` have different length, or `axes` not given and + ``len(s) != 2``. + IndexError + If an element of `axes` is larger than than the number of axes of `a`. + + See Also + -------- + numpy.fft : Overall view of discrete Fourier transforms, with definitions + and conventions used. + fft2 : The forward 2-dimensional FFT, of which `ifft2` is the inverse. + ifftn : The inverse of the *n*-dimensional FFT. + fft : The one-dimensional FFT. + ifft : The one-dimensional inverse FFT. + + Notes + ----- + `ifft2` is just `ifftn` with a different default for `axes`. + + See `ifftn` for details and a plotting example, and `numpy.fft` for + definition and conventions used. + + Zero-padding, analogously with `ifft`, is performed by appending zeros to + the input along the specified dimension. Although this is the common + approach, it might lead to surprising results. If another form of zero + padding is desired, it must be performed before `ifft2` is called. + + Examples + -------- + >>> a = 4 * np.eye(4) + >>> np.fft.ifft2(a) + array([[1.+0.j, 0.+0.j, 0.+0.j, 0.+0.j], # may vary + [0.+0.j, 0.+0.j, 0.+0.j, 1.+0.j], + [0.+0.j, 0.+0.j, 1.+0.j, 0.+0.j], + [0.+0.j, 1.+0.j, 0.+0.j, 0.+0.j]]) + + """ + return _raw_fftnd(a, s, axes, ifft, norm) + + +@array_function_dispatch(_fftn_dispatcher) +def rfftn(a, s=None, axes=None, norm=None): + """ + Compute the N-dimensional discrete Fourier Transform for real input. + + This function computes the N-dimensional discrete Fourier Transform over + any number of axes in an M-dimensional real array by means of the Fast + Fourier Transform (FFT). By default, all axes are transformed, with the + real transform performed over the last axis, while the remaining + transforms are complex. + + Parameters + ---------- + a : array_like + Input array, taken to be real. + s : sequence of ints, optional + Shape (length along each transformed axis) to use from the input. + (``s[0]`` refers to axis 0, ``s[1]`` to axis 1, etc.). + The final element of `s` corresponds to `n` for ``rfft(x, n)``, while + for the remaining axes, it corresponds to `n` for ``fft(x, n)``. + Along any axis, if the given shape is smaller than that of the input, + the input is cropped. If it is larger, the input is padded with zeros. + if `s` is not given, the shape of the input along the axes specified + by `axes` is used. + axes : sequence of ints, optional + Axes over which to compute the FFT. If not given, the last ``len(s)`` + axes are used, or all axes if `s` is also not specified. + norm : {"backward", "ortho", "forward"}, optional + .. versionadded:: 1.10.0 + + Normalization mode (see `numpy.fft`). Default is "backward". + Indicates which direction of the forward/backward pair of transforms + is scaled and with what normalization factor. + + .. versionadded:: 1.20.0 + + The "backward", "forward" values were added. + + Returns + ------- + out : complex ndarray + The truncated or zero-padded input, transformed along the axes + indicated by `axes`, or by a combination of `s` and `a`, + as explained in the parameters section above. + The length of the last axis transformed will be ``s[-1]//2+1``, + while the remaining transformed axes will have lengths according to + `s`, or unchanged from the input. + + Raises + ------ + ValueError + If `s` and `axes` have different length. + IndexError + If an element of `axes` is larger than than the number of axes of `a`. + + See Also + -------- + irfftn : The inverse of `rfftn`, i.e. the inverse of the n-dimensional FFT + of real input. + fft : The one-dimensional FFT, with definitions and conventions used. + rfft : The one-dimensional FFT of real input. + fftn : The n-dimensional FFT. + rfft2 : The two-dimensional FFT of real input. + + Notes + ----- + The transform for real input is performed over the last transformation + axis, as by `rfft`, then the transform over the remaining axes is + performed as by `fftn`. The order of the output is as for `rfft` for the + final transformation axis, and as for `fftn` for the remaining + transformation axes. + + See `fft` for details, definitions and conventions used. + + Examples + -------- + >>> a = np.ones((2, 2, 2)) + >>> np.fft.rfftn(a) + array([[[8.+0.j, 0.+0.j], # may vary + [0.+0.j, 0.+0.j]], + [[0.+0.j, 0.+0.j], + [0.+0.j, 0.+0.j]]]) + + >>> np.fft.rfftn(a, axes=(2, 0)) + array([[[4.+0.j, 0.+0.j], # may vary + [4.+0.j, 0.+0.j]], + [[0.+0.j, 0.+0.j], + [0.+0.j, 0.+0.j]]]) + + """ + a = asarray(a) + s, axes = _cook_nd_args(a, s, axes) + a = rfft(a, s[-1], axes[-1], norm) + for ii in range(len(axes)-1): + a = fft(a, s[ii], axes[ii], norm) + return a + + +@array_function_dispatch(_fftn_dispatcher) +def rfft2(a, s=None, axes=(-2, -1), norm=None): + """ + Compute the 2-dimensional FFT of a real array. + + Parameters + ---------- + a : array + Input array, taken to be real. + s : sequence of ints, optional + Shape of the FFT. + axes : sequence of ints, optional + Axes over which to compute the FFT. + norm : {"backward", "ortho", "forward"}, optional + .. versionadded:: 1.10.0 + + Normalization mode (see `numpy.fft`). Default is "backward". + Indicates which direction of the forward/backward pair of transforms + is scaled and with what normalization factor. + + .. versionadded:: 1.20.0 + + The "backward", "forward" values were added. + + Returns + ------- + out : ndarray + The result of the real 2-D FFT. + + See Also + -------- + rfftn : Compute the N-dimensional discrete Fourier Transform for real + input. + + Notes + ----- + This is really just `rfftn` with different default behavior. + For more details see `rfftn`. + + Examples + -------- + >>> a = np.mgrid[:5, :5][0] + >>> np.fft.rfft2(a) + array([[ 50. +0.j , 0. +0.j , 0. +0.j ], + [-12.5+17.20477401j, 0. +0.j , 0. +0.j ], + [-12.5 +4.0614962j , 0. +0.j , 0. +0.j ], + [-12.5 -4.0614962j , 0. +0.j , 0. +0.j ], + [-12.5-17.20477401j, 0. +0.j , 0. +0.j ]]) + """ + return rfftn(a, s, axes, norm) + + +@array_function_dispatch(_fftn_dispatcher) +def irfftn(a, s=None, axes=None, norm=None): + """ + Computes the inverse of `rfftn`. + + This function computes the inverse of the N-dimensional discrete + Fourier Transform for real input over any number of axes in an + M-dimensional array by means of the Fast Fourier Transform (FFT). In + other words, ``irfftn(rfftn(a), a.shape) == a`` to within numerical + accuracy. (The ``a.shape`` is necessary like ``len(a)`` is for `irfft`, + and for the same reason.) + + The input should be ordered in the same way as is returned by `rfftn`, + i.e. as for `irfft` for the final transformation axis, and as for `ifftn` + along all the other axes. + + Parameters + ---------- + a : array_like + Input array. + s : sequence of ints, optional + Shape (length of each transformed axis) of the output + (``s[0]`` refers to axis 0, ``s[1]`` to axis 1, etc.). `s` is also the + number of input points used along this axis, except for the last axis, + where ``s[-1]//2+1`` points of the input are used. + Along any axis, if the shape indicated by `s` is smaller than that of + the input, the input is cropped. If it is larger, the input is padded + with zeros. If `s` is not given, the shape of the input along the axes + specified by axes is used. Except for the last axis which is taken to + be ``2*(m-1)`` where ``m`` is the length of the input along that axis. + axes : sequence of ints, optional + Axes over which to compute the inverse FFT. If not given, the last + `len(s)` axes are used, or all axes if `s` is also not specified. + Repeated indices in `axes` means that the inverse transform over that + axis is performed multiple times. + norm : {"backward", "ortho", "forward"}, optional + .. versionadded:: 1.10.0 + + Normalization mode (see `numpy.fft`). Default is "backward". + Indicates which direction of the forward/backward pair of transforms + is scaled and with what normalization factor. + + .. versionadded:: 1.20.0 + + The "backward", "forward" values were added. + + Returns + ------- + out : ndarray + The truncated or zero-padded input, transformed along the axes + indicated by `axes`, or by a combination of `s` or `a`, + as explained in the parameters section above. + The length of each transformed axis is as given by the corresponding + element of `s`, or the length of the input in every axis except for the + last one if `s` is not given. In the final transformed axis the length + of the output when `s` is not given is ``2*(m-1)`` where ``m`` is the + length of the final transformed axis of the input. To get an odd + number of output points in the final axis, `s` must be specified. + + Raises + ------ + ValueError + If `s` and `axes` have different length. + IndexError + If an element of `axes` is larger than than the number of axes of `a`. + + See Also + -------- + rfftn : The forward n-dimensional FFT of real input, + of which `ifftn` is the inverse. + fft : The one-dimensional FFT, with definitions and conventions used. + irfft : The inverse of the one-dimensional FFT of real input. + irfft2 : The inverse of the two-dimensional FFT of real input. + + Notes + ----- + See `fft` for definitions and conventions used. + + See `rfft` for definitions and conventions used for real input. + + The correct interpretation of the hermitian input depends on the shape of + the original data, as given by `s`. This is because each input shape could + correspond to either an odd or even length signal. By default, `irfftn` + assumes an even output length which puts the last entry at the Nyquist + frequency; aliasing with its symmetric counterpart. When performing the + final complex to real transform, the last value is thus treated as purely + real. To avoid losing information, the correct shape of the real input + **must** be given. + + Examples + -------- + >>> a = np.zeros((3, 2, 2)) + >>> a[0, 0, 0] = 3 * 2 * 2 + >>> np.fft.irfftn(a) + array([[[1., 1.], + [1., 1.]], + [[1., 1.], + [1., 1.]], + [[1., 1.], + [1., 1.]]]) + + """ + a = asarray(a) + s, axes = _cook_nd_args(a, s, axes, invreal=1) + for ii in range(len(axes)-1): + a = ifft(a, s[ii], axes[ii], norm) + a = irfft(a, s[-1], axes[-1], norm) + return a + + +@array_function_dispatch(_fftn_dispatcher) +def irfft2(a, s=None, axes=(-2, -1), norm=None): + """ + Computes the inverse of `rfft2`. + + Parameters + ---------- + a : array_like + The input array + s : sequence of ints, optional + Shape of the real output to the inverse FFT. + axes : sequence of ints, optional + The axes over which to compute the inverse fft. + Default is the last two axes. + norm : {"backward", "ortho", "forward"}, optional + .. versionadded:: 1.10.0 + + Normalization mode (see `numpy.fft`). Default is "backward". + Indicates which direction of the forward/backward pair of transforms + is scaled and with what normalization factor. + + .. versionadded:: 1.20.0 + + The "backward", "forward" values were added. + + Returns + ------- + out : ndarray + The result of the inverse real 2-D FFT. + + See Also + -------- + rfft2 : The forward two-dimensional FFT of real input, + of which `irfft2` is the inverse. + rfft : The one-dimensional FFT for real input. + irfft : The inverse of the one-dimensional FFT of real input. + irfftn : Compute the inverse of the N-dimensional FFT of real input. + + Notes + ----- + This is really `irfftn` with different defaults. + For more details see `irfftn`. + + Examples + -------- + >>> a = np.mgrid[:5, :5][0] + >>> A = np.fft.rfft2(a) + >>> np.fft.irfft2(A, s=a.shape) + array([[0., 0., 0., 0., 0.], + [1., 1., 1., 1., 1.], + [2., 2., 2., 2., 2.], + [3., 3., 3., 3., 3.], + [4., 4., 4., 4., 4.]]) + """ + return irfftn(a, s, axes, norm) diff --git a/.local/lib/python3.8/site-packages/numpy/fft/_pocketfft.pyi b/.local/lib/python3.8/site-packages/numpy/fft/_pocketfft.pyi new file mode 100644 index 0000000..86cf6a6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/fft/_pocketfft.pyi @@ -0,0 +1,111 @@ +from typing import ( + Literal as L, + List, + Sequence, +) + +from numpy import complex128, float64 +from numpy.typing import ArrayLike, NDArray, _ArrayLikeNumber_co + +_NormKind = L[None, "backward", "ortho", "forward"] + +__all__: List[str] + +def fft( + a: ArrayLike, + n: None | int = ..., + axis: int = ..., + norm: _NormKind = ..., +) -> NDArray[complex128]: ... + +def ifft( + a: ArrayLike, + n: None | int = ..., + axis: int = ..., + norm: _NormKind = ..., +) -> NDArray[complex128]: ... + +def rfft( + a: ArrayLike, + n: None | int = ..., + axis: int = ..., + norm: _NormKind = ..., +) -> NDArray[complex128]: ... + +def irfft( + a: ArrayLike, + n: None | int = ..., + axis: int = ..., + norm: _NormKind = ..., +) -> NDArray[float64]: ... + +# Input array must be compatible with `np.conjugate` +def hfft( + a: _ArrayLikeNumber_co, + n: None | int = ..., + axis: int = ..., + norm: _NormKind = ..., +) -> NDArray[float64]: ... + +def ihfft( + a: ArrayLike, + n: None | int = ..., + axis: int = ..., + norm: _NormKind = ..., +) -> NDArray[complex128]: ... + +def fftn( + a: ArrayLike, + s: None | Sequence[int] = ..., + axes: None | Sequence[int] = ..., + norm: _NormKind = ..., +) -> NDArray[complex128]: ... + +def ifftn( + a: ArrayLike, + s: None | Sequence[int] = ..., + axes: None | Sequence[int] = ..., + norm: _NormKind = ..., +) -> NDArray[complex128]: ... + +def rfftn( + a: ArrayLike, + s: None | Sequence[int] = ..., + axes: None | Sequence[int] = ..., + norm: _NormKind = ..., +) -> NDArray[complex128]: ... + +def irfftn( + a: ArrayLike, + s: None | Sequence[int] = ..., + axes: None | Sequence[int] = ..., + norm: _NormKind = ..., +) -> NDArray[float64]: ... + +def fft2( + a: ArrayLike, + s: None | Sequence[int] = ..., + axes: None | Sequence[int] = ..., + norm: _NormKind = ..., +) -> NDArray[complex128]: ... + +def ifft2( + a: ArrayLike, + s: None | Sequence[int] = ..., + axes: None | Sequence[int] = ..., + norm: _NormKind = ..., +) -> NDArray[complex128]: ... + +def rfft2( + a: ArrayLike, + s: None | Sequence[int] = ..., + axes: None | Sequence[int] = ..., + norm: _NormKind = ..., +) -> NDArray[complex128]: ... + +def irfft2( + a: ArrayLike, + s: None | Sequence[int] = ..., + axes: None | Sequence[int] = ..., + norm: _NormKind = ..., +) -> NDArray[float64]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/fft/_pocketfft_internal.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/fft/_pocketfft_internal.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..92796a3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/fft/_pocketfft_internal.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/fft/helper.py b/.local/lib/python3.8/site-packages/numpy/fft/helper.py new file mode 100644 index 0000000..927ee1a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/fft/helper.py @@ -0,0 +1,221 @@ +""" +Discrete Fourier Transforms - helper.py + +""" +from numpy.core import integer, empty, arange, asarray, roll +from numpy.core.overrides import array_function_dispatch, set_module + +# Created by Pearu Peterson, September 2002 + +__all__ = ['fftshift', 'ifftshift', 'fftfreq', 'rfftfreq'] + +integer_types = (int, integer) + + +def _fftshift_dispatcher(x, axes=None): + return (x,) + + +@array_function_dispatch(_fftshift_dispatcher, module='numpy.fft') +def fftshift(x, axes=None): + """ + Shift the zero-frequency component to the center of the spectrum. + + This function swaps half-spaces for all axes listed (defaults to all). + Note that ``y[0]`` is the Nyquist component only if ``len(x)`` is even. + + Parameters + ---------- + x : array_like + Input array. + axes : int or shape tuple, optional + Axes over which to shift. Default is None, which shifts all axes. + + Returns + ------- + y : ndarray + The shifted array. + + See Also + -------- + ifftshift : The inverse of `fftshift`. + + Examples + -------- + >>> freqs = np.fft.fftfreq(10, 0.1) + >>> freqs + array([ 0., 1., 2., ..., -3., -2., -1.]) + >>> np.fft.fftshift(freqs) + array([-5., -4., -3., -2., -1., 0., 1., 2., 3., 4.]) + + Shift the zero-frequency component only along the second axis: + + >>> freqs = np.fft.fftfreq(9, d=1./9).reshape(3, 3) + >>> freqs + array([[ 0., 1., 2.], + [ 3., 4., -4.], + [-3., -2., -1.]]) + >>> np.fft.fftshift(freqs, axes=(1,)) + array([[ 2., 0., 1.], + [-4., 3., 4.], + [-1., -3., -2.]]) + + """ + x = asarray(x) + if axes is None: + axes = tuple(range(x.ndim)) + shift = [dim // 2 for dim in x.shape] + elif isinstance(axes, integer_types): + shift = x.shape[axes] // 2 + else: + shift = [x.shape[ax] // 2 for ax in axes] + + return roll(x, shift, axes) + + +@array_function_dispatch(_fftshift_dispatcher, module='numpy.fft') +def ifftshift(x, axes=None): + """ + The inverse of `fftshift`. Although identical for even-length `x`, the + functions differ by one sample for odd-length `x`. + + Parameters + ---------- + x : array_like + Input array. + axes : int or shape tuple, optional + Axes over which to calculate. Defaults to None, which shifts all axes. + + Returns + ------- + y : ndarray + The shifted array. + + See Also + -------- + fftshift : Shift zero-frequency component to the center of the spectrum. + + Examples + -------- + >>> freqs = np.fft.fftfreq(9, d=1./9).reshape(3, 3) + >>> freqs + array([[ 0., 1., 2.], + [ 3., 4., -4.], + [-3., -2., -1.]]) + >>> np.fft.ifftshift(np.fft.fftshift(freqs)) + array([[ 0., 1., 2.], + [ 3., 4., -4.], + [-3., -2., -1.]]) + + """ + x = asarray(x) + if axes is None: + axes = tuple(range(x.ndim)) + shift = [-(dim // 2) for dim in x.shape] + elif isinstance(axes, integer_types): + shift = -(x.shape[axes] // 2) + else: + shift = [-(x.shape[ax] // 2) for ax in axes] + + return roll(x, shift, axes) + + +@set_module('numpy.fft') +def fftfreq(n, d=1.0): + """ + Return the Discrete Fourier Transform sample frequencies. + + The returned float array `f` contains the frequency bin centers in cycles + per unit of the sample spacing (with zero at the start). For instance, if + the sample spacing is in seconds, then the frequency unit is cycles/second. + + Given a window length `n` and a sample spacing `d`:: + + f = [0, 1, ..., n/2-1, -n/2, ..., -1] / (d*n) if n is even + f = [0, 1, ..., (n-1)/2, -(n-1)/2, ..., -1] / (d*n) if n is odd + + Parameters + ---------- + n : int + Window length. + d : scalar, optional + Sample spacing (inverse of the sampling rate). Defaults to 1. + + Returns + ------- + f : ndarray + Array of length `n` containing the sample frequencies. + + Examples + -------- + >>> signal = np.array([-2, 8, 6, 4, 1, 0, 3, 5], dtype=float) + >>> fourier = np.fft.fft(signal) + >>> n = signal.size + >>> timestep = 0.1 + >>> freq = np.fft.fftfreq(n, d=timestep) + >>> freq + array([ 0. , 1.25, 2.5 , ..., -3.75, -2.5 , -1.25]) + + """ + if not isinstance(n, integer_types): + raise ValueError("n should be an integer") + val = 1.0 / (n * d) + results = empty(n, int) + N = (n-1)//2 + 1 + p1 = arange(0, N, dtype=int) + results[:N] = p1 + p2 = arange(-(n//2), 0, dtype=int) + results[N:] = p2 + return results * val + + +@set_module('numpy.fft') +def rfftfreq(n, d=1.0): + """ + Return the Discrete Fourier Transform sample frequencies + (for usage with rfft, irfft). + + The returned float array `f` contains the frequency bin centers in cycles + per unit of the sample spacing (with zero at the start). For instance, if + the sample spacing is in seconds, then the frequency unit is cycles/second. + + Given a window length `n` and a sample spacing `d`:: + + f = [0, 1, ..., n/2-1, n/2] / (d*n) if n is even + f = [0, 1, ..., (n-1)/2-1, (n-1)/2] / (d*n) if n is odd + + Unlike `fftfreq` (but like `scipy.fftpack.rfftfreq`) + the Nyquist frequency component is considered to be positive. + + Parameters + ---------- + n : int + Window length. + d : scalar, optional + Sample spacing (inverse of the sampling rate). Defaults to 1. + + Returns + ------- + f : ndarray + Array of length ``n//2 + 1`` containing the sample frequencies. + + Examples + -------- + >>> signal = np.array([-2, 8, 6, 4, 1, 0, 3, 5, -3, 4], dtype=float) + >>> fourier = np.fft.rfft(signal) + >>> n = signal.size + >>> sample_rate = 100 + >>> freq = np.fft.fftfreq(n, d=1./sample_rate) + >>> freq + array([ 0., 10., 20., ..., -30., -20., -10.]) + >>> freq = np.fft.rfftfreq(n, d=1./sample_rate) + >>> freq + array([ 0., 10., 20., 30., 40., 50.]) + + """ + if not isinstance(n, integer_types): + raise ValueError("n should be an integer") + val = 1.0/(n*d) + N = n//2 + 1 + results = arange(0, N, dtype=int) + return results * val diff --git a/.local/lib/python3.8/site-packages/numpy/fft/helper.pyi b/.local/lib/python3.8/site-packages/numpy/fft/helper.pyi new file mode 100644 index 0000000..d75826f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/fft/helper.pyi @@ -0,0 +1,50 @@ +from typing import List, Any, TypeVar, overload + +from numpy import generic, dtype, integer, floating, complexfloating +from numpy.typing import ( + NDArray, + ArrayLike, + _ShapeLike, + _SupportsArray, + _FiniteNestedSequence, + _ArrayLikeFloat_co, + _ArrayLikeComplex_co, +) + +_SCT = TypeVar("_SCT", bound=generic) + +_ArrayLike = _FiniteNestedSequence[_SupportsArray[dtype[_SCT]]] + +__all__: List[str] + +@overload +def fftshift(x: _ArrayLike[_SCT], axes: None | _ShapeLike = ...) -> NDArray[_SCT]: ... +@overload +def fftshift(x: ArrayLike, axes: None | _ShapeLike = ...) -> NDArray[Any]: ... + +@overload +def ifftshift(x: _ArrayLike[_SCT], axes: None | _ShapeLike = ...) -> NDArray[_SCT]: ... +@overload +def ifftshift(x: ArrayLike, axes: None | _ShapeLike = ...) -> NDArray[Any]: ... + +@overload +def fftfreq( + n: int | integer[Any], + d: _ArrayLikeFloat_co, +) -> NDArray[floating[Any]]: ... +@overload +def fftfreq( + n: int | integer[Any], + d: _ArrayLikeComplex_co, +) -> NDArray[complexfloating[Any, Any]]: ... + +@overload +def rfftfreq( + n: int | integer[Any], + d: _ArrayLikeFloat_co, +) -> NDArray[floating[Any]]: ... +@overload +def rfftfreq( + n: int | integer[Any], + d: _ArrayLikeComplex_co, +) -> NDArray[complexfloating[Any, Any]]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/fft/setup.py b/.local/lib/python3.8/site-packages/numpy/fft/setup.py new file mode 100644 index 0000000..477948a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/fft/setup.py @@ -0,0 +1,22 @@ +import sys + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('fft', parent_package, top_path) + + config.add_subpackage('tests') + + # AIX needs to be told to use large file support - at all times + defs = [('_LARGE_FILES', None)] if sys.platform[:3] == "aix" else [] + # Configure pocketfft_internal + config.add_extension('_pocketfft_internal', + sources=['_pocketfft.c'], + define_macros=defs, + ) + + config.add_data_files('*.pyi') + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/.local/lib/python3.8/site-packages/numpy/fft/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/fft/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/fft/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/fft/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..487f97b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/fft/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/fft/tests/__pycache__/test_helper.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/fft/tests/__pycache__/test_helper.cpython-38.pyc new file mode 100644 index 0000000..37adf4c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/fft/tests/__pycache__/test_helper.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/fft/tests/__pycache__/test_pocketfft.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/fft/tests/__pycache__/test_pocketfft.cpython-38.pyc new file mode 100644 index 0000000..bb40874 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/fft/tests/__pycache__/test_pocketfft.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/fft/tests/test_helper.py b/.local/lib/python3.8/site-packages/numpy/fft/tests/test_helper.py new file mode 100644 index 0000000..3fb700b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/fft/tests/test_helper.py @@ -0,0 +1,167 @@ +"""Test functions for fftpack.helper module + +Copied from fftpack.helper by Pearu Peterson, October 2005 + +""" +import numpy as np +from numpy.testing import assert_array_almost_equal +from numpy import fft, pi + + +class TestFFTShift: + + def test_definition(self): + x = [0, 1, 2, 3, 4, -4, -3, -2, -1] + y = [-4, -3, -2, -1, 0, 1, 2, 3, 4] + assert_array_almost_equal(fft.fftshift(x), y) + assert_array_almost_equal(fft.ifftshift(y), x) + x = [0, 1, 2, 3, 4, -5, -4, -3, -2, -1] + y = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4] + assert_array_almost_equal(fft.fftshift(x), y) + assert_array_almost_equal(fft.ifftshift(y), x) + + def test_inverse(self): + for n in [1, 4, 9, 100, 211]: + x = np.random.random((n,)) + assert_array_almost_equal(fft.ifftshift(fft.fftshift(x)), x) + + def test_axes_keyword(self): + freqs = [[0, 1, 2], [3, 4, -4], [-3, -2, -1]] + shifted = [[-1, -3, -2], [2, 0, 1], [-4, 3, 4]] + assert_array_almost_equal(fft.fftshift(freqs, axes=(0, 1)), shifted) + assert_array_almost_equal(fft.fftshift(freqs, axes=0), + fft.fftshift(freqs, axes=(0,))) + assert_array_almost_equal(fft.ifftshift(shifted, axes=(0, 1)), freqs) + assert_array_almost_equal(fft.ifftshift(shifted, axes=0), + fft.ifftshift(shifted, axes=(0,))) + + assert_array_almost_equal(fft.fftshift(freqs), shifted) + assert_array_almost_equal(fft.ifftshift(shifted), freqs) + + def test_uneven_dims(self): + """ Test 2D input, which has uneven dimension sizes """ + freqs = [ + [0, 1], + [2, 3], + [4, 5] + ] + + # shift in dimension 0 + shift_dim0 = [ + [4, 5], + [0, 1], + [2, 3] + ] + assert_array_almost_equal(fft.fftshift(freqs, axes=0), shift_dim0) + assert_array_almost_equal(fft.ifftshift(shift_dim0, axes=0), freqs) + assert_array_almost_equal(fft.fftshift(freqs, axes=(0,)), shift_dim0) + assert_array_almost_equal(fft.ifftshift(shift_dim0, axes=[0]), freqs) + + # shift in dimension 1 + shift_dim1 = [ + [1, 0], + [3, 2], + [5, 4] + ] + assert_array_almost_equal(fft.fftshift(freqs, axes=1), shift_dim1) + assert_array_almost_equal(fft.ifftshift(shift_dim1, axes=1), freqs) + + # shift in both dimensions + shift_dim_both = [ + [5, 4], + [1, 0], + [3, 2] + ] + assert_array_almost_equal(fft.fftshift(freqs, axes=(0, 1)), shift_dim_both) + assert_array_almost_equal(fft.ifftshift(shift_dim_both, axes=(0, 1)), freqs) + assert_array_almost_equal(fft.fftshift(freqs, axes=[0, 1]), shift_dim_both) + assert_array_almost_equal(fft.ifftshift(shift_dim_both, axes=[0, 1]), freqs) + + # axes=None (default) shift in all dimensions + assert_array_almost_equal(fft.fftshift(freqs, axes=None), shift_dim_both) + assert_array_almost_equal(fft.ifftshift(shift_dim_both, axes=None), freqs) + assert_array_almost_equal(fft.fftshift(freqs), shift_dim_both) + assert_array_almost_equal(fft.ifftshift(shift_dim_both), freqs) + + def test_equal_to_original(self): + """ Test that the new (>=v1.15) implementation (see #10073) is equal to the original (<=v1.14) """ + from numpy.core import asarray, concatenate, arange, take + + def original_fftshift(x, axes=None): + """ How fftshift was implemented in v1.14""" + tmp = asarray(x) + ndim = tmp.ndim + if axes is None: + axes = list(range(ndim)) + elif isinstance(axes, int): + axes = (axes,) + y = tmp + for k in axes: + n = tmp.shape[k] + p2 = (n + 1) // 2 + mylist = concatenate((arange(p2, n), arange(p2))) + y = take(y, mylist, k) + return y + + def original_ifftshift(x, axes=None): + """ How ifftshift was implemented in v1.14 """ + tmp = asarray(x) + ndim = tmp.ndim + if axes is None: + axes = list(range(ndim)) + elif isinstance(axes, int): + axes = (axes,) + y = tmp + for k in axes: + n = tmp.shape[k] + p2 = n - (n + 1) // 2 + mylist = concatenate((arange(p2, n), arange(p2))) + y = take(y, mylist, k) + return y + + # create possible 2d array combinations and try all possible keywords + # compare output to original functions + for i in range(16): + for j in range(16): + for axes_keyword in [0, 1, None, (0,), (0, 1)]: + inp = np.random.rand(i, j) + + assert_array_almost_equal(fft.fftshift(inp, axes_keyword), + original_fftshift(inp, axes_keyword)) + + assert_array_almost_equal(fft.ifftshift(inp, axes_keyword), + original_ifftshift(inp, axes_keyword)) + + +class TestFFTFreq: + + def test_definition(self): + x = [0, 1, 2, 3, 4, -4, -3, -2, -1] + assert_array_almost_equal(9*fft.fftfreq(9), x) + assert_array_almost_equal(9*pi*fft.fftfreq(9, pi), x) + x = [0, 1, 2, 3, 4, -5, -4, -3, -2, -1] + assert_array_almost_equal(10*fft.fftfreq(10), x) + assert_array_almost_equal(10*pi*fft.fftfreq(10, pi), x) + + +class TestRFFTFreq: + + def test_definition(self): + x = [0, 1, 2, 3, 4] + assert_array_almost_equal(9*fft.rfftfreq(9), x) + assert_array_almost_equal(9*pi*fft.rfftfreq(9, pi), x) + x = [0, 1, 2, 3, 4, 5] + assert_array_almost_equal(10*fft.rfftfreq(10), x) + assert_array_almost_equal(10*pi*fft.rfftfreq(10, pi), x) + + +class TestIRFFTN: + + def test_not_last_axis_success(self): + ar, ai = np.random.random((2, 16, 8, 32)) + a = ar + 1j*ai + + axes = (-2,) + + # Should not raise error + fft.irfftn(a, axes=axes) diff --git a/.local/lib/python3.8/site-packages/numpy/fft/tests/test_pocketfft.py b/.local/lib/python3.8/site-packages/numpy/fft/tests/test_pocketfft.py new file mode 100644 index 0000000..3926442 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/fft/tests/test_pocketfft.py @@ -0,0 +1,307 @@ +import numpy as np +import pytest +from numpy.random import random +from numpy.testing import ( + assert_array_equal, assert_raises, assert_allclose + ) +import threading +import queue + + +def fft1(x): + L = len(x) + phase = -2j * np.pi * (np.arange(L) / L) + phase = np.arange(L).reshape(-1, 1) * phase + return np.sum(x*np.exp(phase), axis=1) + + +class TestFFTShift: + + def test_fft_n(self): + assert_raises(ValueError, np.fft.fft, [1, 2, 3], 0) + + +class TestFFT1D: + + def test_identity(self): + maxlen = 512 + x = random(maxlen) + 1j*random(maxlen) + xr = random(maxlen) + for i in range(1, maxlen): + assert_allclose(np.fft.ifft(np.fft.fft(x[0:i])), x[0:i], + atol=1e-12) + assert_allclose(np.fft.irfft(np.fft.rfft(xr[0:i]), i), + xr[0:i], atol=1e-12) + + def test_fft(self): + x = random(30) + 1j*random(30) + assert_allclose(fft1(x), np.fft.fft(x), atol=1e-6) + assert_allclose(fft1(x), np.fft.fft(x, norm="backward"), atol=1e-6) + assert_allclose(fft1(x) / np.sqrt(30), + np.fft.fft(x, norm="ortho"), atol=1e-6) + assert_allclose(fft1(x) / 30., + np.fft.fft(x, norm="forward"), atol=1e-6) + + @pytest.mark.parametrize('norm', (None, 'backward', 'ortho', 'forward')) + def test_ifft(self, norm): + x = random(30) + 1j*random(30) + assert_allclose( + x, np.fft.ifft(np.fft.fft(x, norm=norm), norm=norm), + atol=1e-6) + # Ensure we get the correct error message + with pytest.raises(ValueError, + match='Invalid number of FFT data points'): + np.fft.ifft([], norm=norm) + + def test_fft2(self): + x = random((30, 20)) + 1j*random((30, 20)) + assert_allclose(np.fft.fft(np.fft.fft(x, axis=1), axis=0), + np.fft.fft2(x), atol=1e-6) + assert_allclose(np.fft.fft2(x), + np.fft.fft2(x, norm="backward"), atol=1e-6) + assert_allclose(np.fft.fft2(x) / np.sqrt(30 * 20), + np.fft.fft2(x, norm="ortho"), atol=1e-6) + assert_allclose(np.fft.fft2(x) / (30. * 20.), + np.fft.fft2(x, norm="forward"), atol=1e-6) + + def test_ifft2(self): + x = random((30, 20)) + 1j*random((30, 20)) + assert_allclose(np.fft.ifft(np.fft.ifft(x, axis=1), axis=0), + np.fft.ifft2(x), atol=1e-6) + assert_allclose(np.fft.ifft2(x), + np.fft.ifft2(x, norm="backward"), atol=1e-6) + assert_allclose(np.fft.ifft2(x) * np.sqrt(30 * 20), + np.fft.ifft2(x, norm="ortho"), atol=1e-6) + assert_allclose(np.fft.ifft2(x) * (30. * 20.), + np.fft.ifft2(x, norm="forward"), atol=1e-6) + + def test_fftn(self): + x = random((30, 20, 10)) + 1j*random((30, 20, 10)) + assert_allclose( + np.fft.fft(np.fft.fft(np.fft.fft(x, axis=2), axis=1), axis=0), + np.fft.fftn(x), atol=1e-6) + assert_allclose(np.fft.fftn(x), + np.fft.fftn(x, norm="backward"), atol=1e-6) + assert_allclose(np.fft.fftn(x) / np.sqrt(30 * 20 * 10), + np.fft.fftn(x, norm="ortho"), atol=1e-6) + assert_allclose(np.fft.fftn(x) / (30. * 20. * 10.), + np.fft.fftn(x, norm="forward"), atol=1e-6) + + def test_ifftn(self): + x = random((30, 20, 10)) + 1j*random((30, 20, 10)) + assert_allclose( + np.fft.ifft(np.fft.ifft(np.fft.ifft(x, axis=2), axis=1), axis=0), + np.fft.ifftn(x), atol=1e-6) + assert_allclose(np.fft.ifftn(x), + np.fft.ifftn(x, norm="backward"), atol=1e-6) + assert_allclose(np.fft.ifftn(x) * np.sqrt(30 * 20 * 10), + np.fft.ifftn(x, norm="ortho"), atol=1e-6) + assert_allclose(np.fft.ifftn(x) * (30. * 20. * 10.), + np.fft.ifftn(x, norm="forward"), atol=1e-6) + + def test_rfft(self): + x = random(30) + for n in [x.size, 2*x.size]: + for norm in [None, 'backward', 'ortho', 'forward']: + assert_allclose( + np.fft.fft(x, n=n, norm=norm)[:(n//2 + 1)], + np.fft.rfft(x, n=n, norm=norm), atol=1e-6) + assert_allclose( + np.fft.rfft(x, n=n), + np.fft.rfft(x, n=n, norm="backward"), atol=1e-6) + assert_allclose( + np.fft.rfft(x, n=n) / np.sqrt(n), + np.fft.rfft(x, n=n, norm="ortho"), atol=1e-6) + assert_allclose( + np.fft.rfft(x, n=n) / n, + np.fft.rfft(x, n=n, norm="forward"), atol=1e-6) + + def test_irfft(self): + x = random(30) + assert_allclose(x, np.fft.irfft(np.fft.rfft(x)), atol=1e-6) + assert_allclose(x, np.fft.irfft(np.fft.rfft(x, norm="backward"), + norm="backward"), atol=1e-6) + assert_allclose(x, np.fft.irfft(np.fft.rfft(x, norm="ortho"), + norm="ortho"), atol=1e-6) + assert_allclose(x, np.fft.irfft(np.fft.rfft(x, norm="forward"), + norm="forward"), atol=1e-6) + + def test_rfft2(self): + x = random((30, 20)) + assert_allclose(np.fft.fft2(x)[:, :11], np.fft.rfft2(x), atol=1e-6) + assert_allclose(np.fft.rfft2(x), + np.fft.rfft2(x, norm="backward"), atol=1e-6) + assert_allclose(np.fft.rfft2(x) / np.sqrt(30 * 20), + np.fft.rfft2(x, norm="ortho"), atol=1e-6) + assert_allclose(np.fft.rfft2(x) / (30. * 20.), + np.fft.rfft2(x, norm="forward"), atol=1e-6) + + def test_irfft2(self): + x = random((30, 20)) + assert_allclose(x, np.fft.irfft2(np.fft.rfft2(x)), atol=1e-6) + assert_allclose(x, np.fft.irfft2(np.fft.rfft2(x, norm="backward"), + norm="backward"), atol=1e-6) + assert_allclose(x, np.fft.irfft2(np.fft.rfft2(x, norm="ortho"), + norm="ortho"), atol=1e-6) + assert_allclose(x, np.fft.irfft2(np.fft.rfft2(x, norm="forward"), + norm="forward"), atol=1e-6) + + def test_rfftn(self): + x = random((30, 20, 10)) + assert_allclose(np.fft.fftn(x)[:, :, :6], np.fft.rfftn(x), atol=1e-6) + assert_allclose(np.fft.rfftn(x), + np.fft.rfftn(x, norm="backward"), atol=1e-6) + assert_allclose(np.fft.rfftn(x) / np.sqrt(30 * 20 * 10), + np.fft.rfftn(x, norm="ortho"), atol=1e-6) + assert_allclose(np.fft.rfftn(x) / (30. * 20. * 10.), + np.fft.rfftn(x, norm="forward"), atol=1e-6) + + def test_irfftn(self): + x = random((30, 20, 10)) + assert_allclose(x, np.fft.irfftn(np.fft.rfftn(x)), atol=1e-6) + assert_allclose(x, np.fft.irfftn(np.fft.rfftn(x, norm="backward"), + norm="backward"), atol=1e-6) + assert_allclose(x, np.fft.irfftn(np.fft.rfftn(x, norm="ortho"), + norm="ortho"), atol=1e-6) + assert_allclose(x, np.fft.irfftn(np.fft.rfftn(x, norm="forward"), + norm="forward"), atol=1e-6) + + def test_hfft(self): + x = random(14) + 1j*random(14) + x_herm = np.concatenate((random(1), x, random(1))) + x = np.concatenate((x_herm, x[::-1].conj())) + assert_allclose(np.fft.fft(x), np.fft.hfft(x_herm), atol=1e-6) + assert_allclose(np.fft.hfft(x_herm), + np.fft.hfft(x_herm, norm="backward"), atol=1e-6) + assert_allclose(np.fft.hfft(x_herm) / np.sqrt(30), + np.fft.hfft(x_herm, norm="ortho"), atol=1e-6) + assert_allclose(np.fft.hfft(x_herm) / 30., + np.fft.hfft(x_herm, norm="forward"), atol=1e-6) + + def test_ihfft(self): + x = random(14) + 1j*random(14) + x_herm = np.concatenate((random(1), x, random(1))) + x = np.concatenate((x_herm, x[::-1].conj())) + assert_allclose(x_herm, np.fft.ihfft(np.fft.hfft(x_herm)), atol=1e-6) + assert_allclose(x_herm, np.fft.ihfft(np.fft.hfft(x_herm, + norm="backward"), norm="backward"), atol=1e-6) + assert_allclose(x_herm, np.fft.ihfft(np.fft.hfft(x_herm, + norm="ortho"), norm="ortho"), atol=1e-6) + assert_allclose(x_herm, np.fft.ihfft(np.fft.hfft(x_herm, + norm="forward"), norm="forward"), atol=1e-6) + + @pytest.mark.parametrize("op", [np.fft.fftn, np.fft.ifftn, + np.fft.rfftn, np.fft.irfftn]) + def test_axes(self, op): + x = random((30, 20, 10)) + axes = [(0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)] + for a in axes: + op_tr = op(np.transpose(x, a)) + tr_op = np.transpose(op(x, axes=a), a) + assert_allclose(op_tr, tr_op, atol=1e-6) + + def test_all_1d_norm_preserving(self): + # verify that round-trip transforms are norm-preserving + x = random(30) + x_norm = np.linalg.norm(x) + n = x.size * 2 + func_pairs = [(np.fft.fft, np.fft.ifft), + (np.fft.rfft, np.fft.irfft), + # hfft: order so the first function takes x.size samples + # (necessary for comparison to x_norm above) + (np.fft.ihfft, np.fft.hfft), + ] + for forw, back in func_pairs: + for n in [x.size, 2*x.size]: + for norm in [None, 'backward', 'ortho', 'forward']: + tmp = forw(x, n=n, norm=norm) + tmp = back(tmp, n=n, norm=norm) + assert_allclose(x_norm, + np.linalg.norm(tmp), atol=1e-6) + + @pytest.mark.parametrize("dtype", [np.half, np.single, np.double, + np.longdouble]) + def test_dtypes(self, dtype): + # make sure that all input precisions are accepted and internally + # converted to 64bit + x = random(30).astype(dtype) + assert_allclose(np.fft.ifft(np.fft.fft(x)), x, atol=1e-6) + assert_allclose(np.fft.irfft(np.fft.rfft(x)), x, atol=1e-6) + + +@pytest.mark.parametrize( + "dtype", + [np.float32, np.float64, np.complex64, np.complex128]) +@pytest.mark.parametrize("order", ["F", 'non-contiguous']) +@pytest.mark.parametrize( + "fft", + [np.fft.fft, np.fft.fft2, np.fft.fftn, + np.fft.ifft, np.fft.ifft2, np.fft.ifftn]) +def test_fft_with_order(dtype, order, fft): + # Check that FFT/IFFT produces identical results for C, Fortran and + # non contiguous arrays + rng = np.random.RandomState(42) + X = rng.rand(8, 7, 13).astype(dtype, copy=False) + # See discussion in pull/14178 + _tol = 8.0 * np.sqrt(np.log2(X.size)) * np.finfo(X.dtype).eps + if order == 'F': + Y = np.asfortranarray(X) + else: + # Make a non contiguous array + Y = X[::-1] + X = np.ascontiguousarray(X[::-1]) + + if fft.__name__.endswith('fft'): + for axis in range(3): + X_res = fft(X, axis=axis) + Y_res = fft(Y, axis=axis) + assert_allclose(X_res, Y_res, atol=_tol, rtol=_tol) + elif fft.__name__.endswith(('fft2', 'fftn')): + axes = [(0, 1), (1, 2), (0, 2)] + if fft.__name__.endswith('fftn'): + axes.extend([(0,), (1,), (2,), None]) + for ax in axes: + X_res = fft(X, axes=ax) + Y_res = fft(Y, axes=ax) + assert_allclose(X_res, Y_res, atol=_tol, rtol=_tol) + else: + raise ValueError() + + +class TestFFTThreadSafe: + threads = 16 + input_shape = (800, 200) + + def _test_mtsame(self, func, *args): + def worker(args, q): + q.put(func(*args)) + + q = queue.Queue() + expected = func(*args) + + # Spin off a bunch of threads to call the same function simultaneously + t = [threading.Thread(target=worker, args=(args, q)) + for i in range(self.threads)] + [x.start() for x in t] + + [x.join() for x in t] + # Make sure all threads returned the correct value + for i in range(self.threads): + assert_array_equal(q.get(timeout=5), expected, + 'Function returned wrong value in multithreaded context') + + def test_fft(self): + a = np.ones(self.input_shape) * 1+0j + self._test_mtsame(np.fft.fft, a) + + def test_ifft(self): + a = np.ones(self.input_shape) * 1+0j + self._test_mtsame(np.fft.ifft, a) + + def test_rfft(self): + a = np.ones(self.input_shape) + self._test_mtsame(np.fft.rfft, a) + + def test_irfft(self): + a = np.ones(self.input_shape) * 1+0j + self._test_mtsame(np.fft.irfft, a) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__init__.py b/.local/lib/python3.8/site-packages/numpy/lib/__init__.py new file mode 100644 index 0000000..ad88ba3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/__init__.py @@ -0,0 +1,61 @@ +""" +**Note:** almost all functions in the ``numpy.lib`` namespace +are also present in the main ``numpy`` namespace. Please use the +functions as ``np.`` where possible. + +``numpy.lib`` is mostly a space for implementing functions that don't +belong in core or in another NumPy submodule with a clear purpose +(e.g. ``random``, ``fft``, ``linalg``, ``ma``). + +Most contains basic functions that are used by several submodules and are +useful to have in the main name-space. + +""" +import math + +from numpy.version import version as __version__ + +# Public submodules +# Note: recfunctions and (maybe) format are public too, but not imported +from . import mixins +from . import scimath as emath + +# Private submodules +from .type_check import * +from .index_tricks import * +from .function_base import * +from .nanfunctions import * +from .shape_base import * +from .stride_tricks import * +from .twodim_base import * +from .ufunclike import * +from .histograms import * + +from .polynomial import * +from .utils import * +from .arraysetops import * +from .npyio import * +from .arrayterator import Arrayterator +from .arraypad import * +from ._version import * +from numpy.core._multiarray_umath import tracemalloc_domain + +__all__ = ['emath', 'math', 'tracemalloc_domain', 'Arrayterator'] +__all__ += type_check.__all__ +__all__ += index_tricks.__all__ +__all__ += function_base.__all__ +__all__ += shape_base.__all__ +__all__ += stride_tricks.__all__ +__all__ += twodim_base.__all__ +__all__ += ufunclike.__all__ +__all__ += arraypad.__all__ +__all__ += polynomial.__all__ +__all__ += utils.__all__ +__all__ += arraysetops.__all__ +__all__ += npyio.__all__ +__all__ += nanfunctions.__all__ +__all__ += histograms.__all__ + +from numpy._pytesttester import PytestTester +test = PytestTester(__name__) +del PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__init__.pyi b/.local/lib/python3.8/site-packages/numpy/lib/__init__.pyi new file mode 100644 index 0000000..ae23b2e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/__init__.pyi @@ -0,0 +1,245 @@ +import math as math +from typing import Any, List + +from numpy._pytesttester import PytestTester + +from numpy import ( + ndenumerate as ndenumerate, + ndindex as ndindex, +) + +from numpy.version import version + +from numpy.lib import ( + format as format, + mixins as mixins, + scimath as scimath, + stride_tricks as stride_stricks, +) + +from numpy.lib._version import ( + NumpyVersion as NumpyVersion, +) + +from numpy.lib.arraypad import ( + pad as pad, +) + +from numpy.lib.arraysetops import ( + ediff1d as ediff1d, + intersect1d as intersect1d, + setxor1d as setxor1d, + union1d as union1d, + setdiff1d as setdiff1d, + unique as unique, + in1d as in1d, + isin as isin, +) + +from numpy.lib.arrayterator import ( + Arrayterator as Arrayterator, +) + +from numpy.lib.function_base import ( + select as select, + piecewise as piecewise, + trim_zeros as trim_zeros, + copy as copy, + iterable as iterable, + percentile as percentile, + diff as diff, + gradient as gradient, + angle as angle, + unwrap as unwrap, + sort_complex as sort_complex, + disp as disp, + flip as flip, + rot90 as rot90, + extract as extract, + place as place, + vectorize as vectorize, + asarray_chkfinite as asarray_chkfinite, + average as average, + bincount as bincount, + digitize as digitize, + cov as cov, + corrcoef as corrcoef, + msort as msort, + median as median, + sinc as sinc, + hamming as hamming, + hanning as hanning, + bartlett as bartlett, + blackman as blackman, + kaiser as kaiser, + trapz as trapz, + i0 as i0, + add_newdoc as add_newdoc, + add_docstring as add_docstring, + meshgrid as meshgrid, + delete as delete, + insert as insert, + append as append, + interp as interp, + add_newdoc_ufunc as add_newdoc_ufunc, + quantile as quantile, +) + +from numpy.lib.histograms import ( + histogram_bin_edges as histogram_bin_edges, + histogram as histogram, + histogramdd as histogramdd, +) + +from numpy.lib.index_tricks import ( + ravel_multi_index as ravel_multi_index, + unravel_index as unravel_index, + mgrid as mgrid, + ogrid as ogrid, + r_ as r_, + c_ as c_, + s_ as s_, + index_exp as index_exp, + ix_ as ix_, + fill_diagonal as fill_diagonal, + diag_indices as diag_indices, + diag_indices_from as diag_indices_from, +) + +from numpy.lib.nanfunctions import ( + nansum as nansum, + nanmax as nanmax, + nanmin as nanmin, + nanargmax as nanargmax, + nanargmin as nanargmin, + nanmean as nanmean, + nanmedian as nanmedian, + nanpercentile as nanpercentile, + nanvar as nanvar, + nanstd as nanstd, + nanprod as nanprod, + nancumsum as nancumsum, + nancumprod as nancumprod, + nanquantile as nanquantile, +) + +from numpy.lib.npyio import ( + savetxt as savetxt, + loadtxt as loadtxt, + genfromtxt as genfromtxt, + recfromtxt as recfromtxt, + recfromcsv as recfromcsv, + load as load, + save as save, + savez as savez, + savez_compressed as savez_compressed, + packbits as packbits, + unpackbits as unpackbits, + fromregex as fromregex, + DataSource as DataSource, +) + +from numpy.lib.polynomial import ( + poly as poly, + roots as roots, + polyint as polyint, + polyder as polyder, + polyadd as polyadd, + polysub as polysub, + polymul as polymul, + polydiv as polydiv, + polyval as polyval, + polyfit as polyfit, + RankWarning as RankWarning, + poly1d as poly1d, +) + +from numpy.lib.shape_base import ( + column_stack as column_stack, + row_stack as row_stack, + dstack as dstack, + array_split as array_split, + split as split, + hsplit as hsplit, + vsplit as vsplit, + dsplit as dsplit, + apply_over_axes as apply_over_axes, + expand_dims as expand_dims, + apply_along_axis as apply_along_axis, + kron as kron, + tile as tile, + get_array_wrap as get_array_wrap, + take_along_axis as take_along_axis, + put_along_axis as put_along_axis, +) + +from numpy.lib.stride_tricks import ( + broadcast_to as broadcast_to, + broadcast_arrays as broadcast_arrays, + broadcast_shapes as broadcast_shapes, +) + +from numpy.lib.twodim_base import ( + diag as diag, + diagflat as diagflat, + eye as eye, + fliplr as fliplr, + flipud as flipud, + tri as tri, + triu as triu, + tril as tril, + vander as vander, + histogram2d as histogram2d, + mask_indices as mask_indices, + tril_indices as tril_indices, + tril_indices_from as tril_indices_from, + triu_indices as triu_indices, + triu_indices_from as triu_indices_from, +) + +from numpy.lib.type_check import ( + mintypecode as mintypecode, + asfarray as asfarray, + real as real, + imag as imag, + iscomplex as iscomplex, + isreal as isreal, + iscomplexobj as iscomplexobj, + isrealobj as isrealobj, + nan_to_num as nan_to_num, + real_if_close as real_if_close, + typename as typename, + common_type as common_type, +) + +from numpy.lib.ufunclike import ( + fix as fix, + isposinf as isposinf, + isneginf as isneginf, +) + +from numpy.lib.utils import ( + issubclass_ as issubclass_, + issubsctype as issubsctype, + issubdtype as issubdtype, + deprecate as deprecate, + deprecate_with_doc as deprecate_with_doc, + get_include as get_include, + info as info, + source as source, + who as who, + lookfor as lookfor, + byte_bounds as byte_bounds, + safe_eval as safe_eval, +) + +from numpy.core.multiarray import ( + tracemalloc_domain as tracemalloc_domain, +) + +__all__: List[str] +__path__: List[str] +test: PytestTester + +__version__ = version +emath = scimath diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..1b6fbb0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/_datasource.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/_datasource.cpython-38.pyc new file mode 100644 index 0000000..9141d68 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/_datasource.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/_iotools.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/_iotools.cpython-38.pyc new file mode 100644 index 0000000..46cd261 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/_iotools.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/_version.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/_version.cpython-38.pyc new file mode 100644 index 0000000..d19aad0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/_version.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/arraypad.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/arraypad.cpython-38.pyc new file mode 100644 index 0000000..994eacd Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/arraypad.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/arraysetops.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/arraysetops.cpython-38.pyc new file mode 100644 index 0000000..77f4b10 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/arraysetops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/arrayterator.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/arrayterator.cpython-38.pyc new file mode 100644 index 0000000..6ea1786 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/arrayterator.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/format.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/format.cpython-38.pyc new file mode 100644 index 0000000..b7e4f32 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/format.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/function_base.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/function_base.cpython-38.pyc new file mode 100644 index 0000000..b43546f Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/function_base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/histograms.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/histograms.cpython-38.pyc new file mode 100644 index 0000000..353f1fd Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/histograms.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/index_tricks.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/index_tricks.cpython-38.pyc new file mode 100644 index 0000000..95acef2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/index_tricks.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/mixins.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/mixins.cpython-38.pyc new file mode 100644 index 0000000..36c3c4c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/mixins.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/nanfunctions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/nanfunctions.cpython-38.pyc new file mode 100644 index 0000000..0522ea0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/nanfunctions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/npyio.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/npyio.cpython-38.pyc new file mode 100644 index 0000000..2f9e29e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/npyio.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/polynomial.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/polynomial.cpython-38.pyc new file mode 100644 index 0000000..b001e7c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/polynomial.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/recfunctions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/recfunctions.cpython-38.pyc new file mode 100644 index 0000000..da74269 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/recfunctions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/scimath.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/scimath.cpython-38.pyc new file mode 100644 index 0000000..3a5ff56 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/scimath.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..fa715dd Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/shape_base.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/shape_base.cpython-38.pyc new file mode 100644 index 0000000..799d6cd Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/shape_base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/stride_tricks.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/stride_tricks.cpython-38.pyc new file mode 100644 index 0000000..6d6fd22 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/stride_tricks.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/twodim_base.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/twodim_base.cpython-38.pyc new file mode 100644 index 0000000..ede357d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/twodim_base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/type_check.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/type_check.cpython-38.pyc new file mode 100644 index 0000000..cddcd3a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/type_check.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/ufunclike.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/ufunclike.cpython-38.pyc new file mode 100644 index 0000000..3117a3e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/ufunclike.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/user_array.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/user_array.cpython-38.pyc new file mode 100644 index 0000000..2f17986 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/user_array.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000..79e33fa Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/__pycache__/utils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/_datasource.py b/.local/lib/python3.8/site-packages/numpy/lib/_datasource.py new file mode 100644 index 0000000..8201d37 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/_datasource.py @@ -0,0 +1,703 @@ +"""A file interface for handling local and remote data files. + +The goal of datasource is to abstract some of the file system operations +when dealing with data files so the researcher doesn't have to know all the +low-level details. Through datasource, a researcher can obtain and use a +file with one function call, regardless of location of the file. + +DataSource is meant to augment standard python libraries, not replace them. +It should work seamlessly with standard file IO operations and the os +module. + +DataSource files can originate locally or remotely: + +- local files : '/home/guido/src/local/data.txt' +- URLs (http, ftp, ...) : 'http://www.scipy.org/not/real/data.txt' + +DataSource files can also be compressed or uncompressed. Currently only +gzip, bz2 and xz are supported. + +Example:: + + >>> # Create a DataSource, use os.curdir (default) for local storage. + >>> from numpy import DataSource + >>> ds = DataSource() + >>> + >>> # Open a remote file. + >>> # DataSource downloads the file, stores it locally in: + >>> # './www.google.com/index.html' + >>> # opens the file and returns a file object. + >>> fp = ds.open('http://www.google.com/') # doctest: +SKIP + >>> + >>> # Use the file as you normally would + >>> fp.read() # doctest: +SKIP + >>> fp.close() # doctest: +SKIP + +""" +import os +import io + +from numpy.core.overrides import set_module + + +_open = open + + +def _check_mode(mode, encoding, newline): + """Check mode and that encoding and newline are compatible. + + Parameters + ---------- + mode : str + File open mode. + encoding : str + File encoding. + newline : str + Newline for text files. + + """ + if "t" in mode: + if "b" in mode: + raise ValueError("Invalid mode: %r" % (mode,)) + else: + if encoding is not None: + raise ValueError("Argument 'encoding' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + + +# Using a class instead of a module-level dictionary +# to reduce the initial 'import numpy' overhead by +# deferring the import of lzma, bz2 and gzip until needed + +# TODO: .zip support, .tar support? +class _FileOpeners: + """ + Container for different methods to open (un-)compressed files. + + `_FileOpeners` contains a dictionary that holds one method for each + supported file format. Attribute lookup is implemented in such a way + that an instance of `_FileOpeners` itself can be indexed with the keys + of that dictionary. Currently uncompressed files as well as files + compressed with ``gzip``, ``bz2`` or ``xz`` compression are supported. + + Notes + ----- + `_file_openers`, an instance of `_FileOpeners`, is made available for + use in the `_datasource` module. + + Examples + -------- + >>> import gzip + >>> np.lib._datasource._file_openers.keys() + [None, '.bz2', '.gz', '.xz', '.lzma'] + >>> np.lib._datasource._file_openers['.gz'] is gzip.open + True + + """ + + def __init__(self): + self._loaded = False + self._file_openers = {None: io.open} + + def _load(self): + if self._loaded: + return + + try: + import bz2 + self._file_openers[".bz2"] = bz2.open + except ImportError: + pass + + try: + import gzip + self._file_openers[".gz"] = gzip.open + except ImportError: + pass + + try: + import lzma + self._file_openers[".xz"] = lzma.open + self._file_openers[".lzma"] = lzma.open + except (ImportError, AttributeError): + # There are incompatible backports of lzma that do not have the + # lzma.open attribute, so catch that as well as ImportError. + pass + + self._loaded = True + + def keys(self): + """ + Return the keys of currently supported file openers. + + Parameters + ---------- + None + + Returns + ------- + keys : list + The keys are None for uncompressed files and the file extension + strings (i.e. ``'.gz'``, ``'.xz'``) for supported compression + methods. + + """ + self._load() + return list(self._file_openers.keys()) + + def __getitem__(self, key): + self._load() + return self._file_openers[key] + +_file_openers = _FileOpeners() + +def open(path, mode='r', destpath=os.curdir, encoding=None, newline=None): + """ + Open `path` with `mode` and return the file object. + + If ``path`` is an URL, it will be downloaded, stored in the + `DataSource` `destpath` directory and opened from there. + + Parameters + ---------- + path : str + Local file path or URL to open. + mode : str, optional + Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to + append. Available modes depend on the type of object specified by + path. Default is 'r'. + destpath : str, optional + Path to the directory where the source file gets downloaded to for + use. If `destpath` is None, a temporary directory will be created. + The default path is the current directory. + encoding : {None, str}, optional + Open text file with given encoding. The default encoding will be + what `io.open` uses. + newline : {None, str}, optional + Newline to use when reading text file. + + Returns + ------- + out : file object + The opened file. + + Notes + ----- + This is a convenience function that instantiates a `DataSource` and + returns the file object from ``DataSource.open(path)``. + + """ + + ds = DataSource(destpath) + return ds.open(path, mode, encoding=encoding, newline=newline) + + +@set_module('numpy') +class DataSource: + """ + DataSource(destpath='.') + + A generic data source file (file, http, ftp, ...). + + DataSources can be local files or remote files/URLs. The files may + also be compressed or uncompressed. DataSource hides some of the + low-level details of downloading the file, allowing you to simply pass + in a valid file path (or URL) and obtain a file object. + + Parameters + ---------- + destpath : str or None, optional + Path to the directory where the source file gets downloaded to for + use. If `destpath` is None, a temporary directory will be created. + The default path is the current directory. + + Notes + ----- + URLs require a scheme string (``http://``) to be used, without it they + will fail:: + + >>> repos = np.DataSource() + >>> repos.exists('www.google.com/index.html') + False + >>> repos.exists('http://www.google.com/index.html') + True + + Temporary directories are deleted when the DataSource is deleted. + + Examples + -------- + :: + + >>> ds = np.DataSource('/home/guido') + >>> urlname = 'http://www.google.com/' + >>> gfile = ds.open('http://www.google.com/') + >>> ds.abspath(urlname) + '/home/guido/www.google.com/index.html' + + >>> ds = np.DataSource(None) # use with temporary file + >>> ds.open('/home/guido/foobar.txt') + + >>> ds.abspath('/home/guido/foobar.txt') + '/tmp/.../home/guido/foobar.txt' + + """ + + def __init__(self, destpath=os.curdir): + """Create a DataSource with a local path at destpath.""" + if destpath: + self._destpath = os.path.abspath(destpath) + self._istmpdest = False + else: + import tempfile # deferring import to improve startup time + self._destpath = tempfile.mkdtemp() + self._istmpdest = True + + def __del__(self): + # Remove temp directories + if hasattr(self, '_istmpdest') and self._istmpdest: + import shutil + + shutil.rmtree(self._destpath) + + def _iszip(self, filename): + """Test if the filename is a zip file by looking at the file extension. + + """ + fname, ext = os.path.splitext(filename) + return ext in _file_openers.keys() + + def _iswritemode(self, mode): + """Test if the given mode will open a file for writing.""" + + # Currently only used to test the bz2 files. + _writemodes = ("w", "+") + for c in mode: + if c in _writemodes: + return True + return False + + def _splitzipext(self, filename): + """Split zip extension from filename and return filename. + + *Returns*: + base, zip_ext : {tuple} + + """ + + if self._iszip(filename): + return os.path.splitext(filename) + else: + return filename, None + + def _possible_names(self, filename): + """Return a tuple containing compressed filename variations.""" + names = [filename] + if not self._iszip(filename): + for zipext in _file_openers.keys(): + if zipext: + names.append(filename+zipext) + return names + + def _isurl(self, path): + """Test if path is a net location. Tests the scheme and netloc.""" + + # We do this here to reduce the 'import numpy' initial import time. + from urllib.parse import urlparse + + # BUG : URLs require a scheme string ('http://') to be used. + # www.google.com will fail. + # Should we prepend the scheme for those that don't have it and + # test that also? Similar to the way we append .gz and test for + # for compressed versions of files. + + scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path) + return bool(scheme and netloc) + + def _cache(self, path): + """Cache the file specified by path. + + Creates a copy of the file in the datasource cache. + + """ + # We import these here because importing them is slow and + # a significant fraction of numpy's total import time. + import shutil + from urllib.request import urlopen + + upath = self.abspath(path) + + # ensure directory exists + if not os.path.exists(os.path.dirname(upath)): + os.makedirs(os.path.dirname(upath)) + + # TODO: Doesn't handle compressed files! + if self._isurl(path): + with urlopen(path) as openedurl: + with _open(upath, 'wb') as f: + shutil.copyfileobj(openedurl, f) + else: + shutil.copyfile(path, upath) + return upath + + def _findfile(self, path): + """Searches for ``path`` and returns full path if found. + + If path is an URL, _findfile will cache a local copy and return the + path to the cached file. If path is a local file, _findfile will + return a path to that local file. + + The search will include possible compressed versions of the file + and return the first occurrence found. + + """ + + # Build list of possible local file paths + if not self._isurl(path): + # Valid local paths + filelist = self._possible_names(path) + # Paths in self._destpath + filelist += self._possible_names(self.abspath(path)) + else: + # Cached URLs in self._destpath + filelist = self._possible_names(self.abspath(path)) + # Remote URLs + filelist = filelist + self._possible_names(path) + + for name in filelist: + if self.exists(name): + if self._isurl(name): + name = self._cache(name) + return name + return None + + def abspath(self, path): + """ + Return absolute path of file in the DataSource directory. + + If `path` is an URL, then `abspath` will return either the location + the file exists locally or the location it would exist when opened + using the `open` method. + + Parameters + ---------- + path : str + Can be a local file or a remote URL. + + Returns + ------- + out : str + Complete path, including the `DataSource` destination directory. + + Notes + ----- + The functionality is based on `os.path.abspath`. + + """ + # We do this here to reduce the 'import numpy' initial import time. + from urllib.parse import urlparse + + # TODO: This should be more robust. Handles case where path includes + # the destpath, but not other sub-paths. Failing case: + # path = /home/guido/datafile.txt + # destpath = /home/alex/ + # upath = self.abspath(path) + # upath == '/home/alex/home/guido/datafile.txt' + + # handle case where path includes self._destpath + splitpath = path.split(self._destpath, 2) + if len(splitpath) > 1: + path = splitpath[1] + scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path) + netloc = self._sanitize_relative_path(netloc) + upath = self._sanitize_relative_path(upath) + return os.path.join(self._destpath, netloc, upath) + + def _sanitize_relative_path(self, path): + """Return a sanitised relative path for which + os.path.abspath(os.path.join(base, path)).startswith(base) + """ + last = None + path = os.path.normpath(path) + while path != last: + last = path + # Note: os.path.join treats '/' as os.sep on Windows + path = path.lstrip(os.sep).lstrip('/') + path = path.lstrip(os.pardir).lstrip('..') + drive, path = os.path.splitdrive(path) # for Windows + return path + + def exists(self, path): + """ + Test if path exists. + + Test if `path` exists as (and in this order): + + - a local file. + - a remote URL that has been downloaded and stored locally in the + `DataSource` directory. + - a remote URL that has not been downloaded, but is valid and + accessible. + + Parameters + ---------- + path : str + Can be a local file or a remote URL. + + Returns + ------- + out : bool + True if `path` exists. + + Notes + ----- + When `path` is an URL, `exists` will return True if it's either + stored locally in the `DataSource` directory, or is a valid remote + URL. `DataSource` does not discriminate between the two, the file + is accessible if it exists in either location. + + """ + + # First test for local path + if os.path.exists(path): + return True + + # We import this here because importing urllib is slow and + # a significant fraction of numpy's total import time. + from urllib.request import urlopen + from urllib.error import URLError + + # Test cached url + upath = self.abspath(path) + if os.path.exists(upath): + return True + + # Test remote url + if self._isurl(path): + try: + netfile = urlopen(path) + netfile.close() + del(netfile) + return True + except URLError: + return False + return False + + def open(self, path, mode='r', encoding=None, newline=None): + """ + Open and return file-like object. + + If `path` is an URL, it will be downloaded, stored in the + `DataSource` directory and opened from there. + + Parameters + ---------- + path : str + Local file path or URL to open. + mode : {'r', 'w', 'a'}, optional + Mode to open `path`. Mode 'r' for reading, 'w' for writing, + 'a' to append. Available modes depend on the type of object + specified by `path`. Default is 'r'. + encoding : {None, str}, optional + Open text file with given encoding. The default encoding will be + what `io.open` uses. + newline : {None, str}, optional + Newline to use when reading text file. + + Returns + ------- + out : file object + File object. + + """ + + # TODO: There is no support for opening a file for writing which + # doesn't exist yet (creating a file). Should there be? + + # TODO: Add a ``subdir`` parameter for specifying the subdirectory + # used to store URLs in self._destpath. + + if self._isurl(path) and self._iswritemode(mode): + raise ValueError("URLs are not writeable") + + # NOTE: _findfile will fail on a new file opened for writing. + found = self._findfile(path) + if found: + _fname, ext = self._splitzipext(found) + if ext == 'bz2': + mode.replace("+", "") + return _file_openers[ext](found, mode=mode, + encoding=encoding, newline=newline) + else: + raise FileNotFoundError(f"{path} not found.") + + +class Repository (DataSource): + """ + Repository(baseurl, destpath='.') + + A data repository where multiple DataSource's share a base + URL/directory. + + `Repository` extends `DataSource` by prepending a base URL (or + directory) to all the files it handles. Use `Repository` when you will + be working with multiple files from one base URL. Initialize + `Repository` with the base URL, then refer to each file by its filename + only. + + Parameters + ---------- + baseurl : str + Path to the local directory or remote location that contains the + data files. + destpath : str or None, optional + Path to the directory where the source file gets downloaded to for + use. If `destpath` is None, a temporary directory will be created. + The default path is the current directory. + + Examples + -------- + To analyze all files in the repository, do something like this + (note: this is not self-contained code):: + + >>> repos = np.lib._datasource.Repository('/home/user/data/dir/') + >>> for filename in filelist: + ... fp = repos.open(filename) + ... fp.analyze() + ... fp.close() + + Similarly you could use a URL for a repository:: + + >>> repos = np.lib._datasource.Repository('http://www.xyz.edu/data') + + """ + + def __init__(self, baseurl, destpath=os.curdir): + """Create a Repository with a shared url or directory of baseurl.""" + DataSource.__init__(self, destpath=destpath) + self._baseurl = baseurl + + def __del__(self): + DataSource.__del__(self) + + def _fullpath(self, path): + """Return complete path for path. Prepends baseurl if necessary.""" + splitpath = path.split(self._baseurl, 2) + if len(splitpath) == 1: + result = os.path.join(self._baseurl, path) + else: + result = path # path contains baseurl already + return result + + def _findfile(self, path): + """Extend DataSource method to prepend baseurl to ``path``.""" + return DataSource._findfile(self, self._fullpath(path)) + + def abspath(self, path): + """ + Return absolute path of file in the Repository directory. + + If `path` is an URL, then `abspath` will return either the location + the file exists locally or the location it would exist when opened + using the `open` method. + + Parameters + ---------- + path : str + Can be a local file or a remote URL. This may, but does not + have to, include the `baseurl` with which the `Repository` was + initialized. + + Returns + ------- + out : str + Complete path, including the `DataSource` destination directory. + + """ + return DataSource.abspath(self, self._fullpath(path)) + + def exists(self, path): + """ + Test if path exists prepending Repository base URL to path. + + Test if `path` exists as (and in this order): + + - a local file. + - a remote URL that has been downloaded and stored locally in the + `DataSource` directory. + - a remote URL that has not been downloaded, but is valid and + accessible. + + Parameters + ---------- + path : str + Can be a local file or a remote URL. This may, but does not + have to, include the `baseurl` with which the `Repository` was + initialized. + + Returns + ------- + out : bool + True if `path` exists. + + Notes + ----- + When `path` is an URL, `exists` will return True if it's either + stored locally in the `DataSource` directory, or is a valid remote + URL. `DataSource` does not discriminate between the two, the file + is accessible if it exists in either location. + + """ + return DataSource.exists(self, self._fullpath(path)) + + def open(self, path, mode='r', encoding=None, newline=None): + """ + Open and return file-like object prepending Repository base URL. + + If `path` is an URL, it will be downloaded, stored in the + DataSource directory and opened from there. + + Parameters + ---------- + path : str + Local file path or URL to open. This may, but does not have to, + include the `baseurl` with which the `Repository` was + initialized. + mode : {'r', 'w', 'a'}, optional + Mode to open `path`. Mode 'r' for reading, 'w' for writing, + 'a' to append. Available modes depend on the type of object + specified by `path`. Default is 'r'. + encoding : {None, str}, optional + Open text file with given encoding. The default encoding will be + what `io.open` uses. + newline : {None, str}, optional + Newline to use when reading text file. + + Returns + ------- + out : file object + File object. + + """ + return DataSource.open(self, self._fullpath(path), mode, + encoding=encoding, newline=newline) + + def listdir(self): + """ + List files in the source Repository. + + Returns + ------- + files : list of str + List of file names (not containing a directory part). + + Notes + ----- + Does not currently work for remote repositories. + + """ + if self._isurl(self._baseurl): + raise NotImplementedError( + "Directory listing of URLs, not supported yet.") + else: + return os.listdir(self._baseurl) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/_iotools.py b/.local/lib/python3.8/site-packages/numpy/lib/_iotools.py new file mode 100644 index 0000000..4a5ac12 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/_iotools.py @@ -0,0 +1,897 @@ +"""A collection of functions designed to help I/O with ascii files. + +""" +__docformat__ = "restructuredtext en" + +import numpy as np +import numpy.core.numeric as nx +from numpy.compat import asbytes, asunicode + + +def _decode_line(line, encoding=None): + """Decode bytes from binary input streams. + + Defaults to decoding from 'latin1'. That differs from the behavior of + np.compat.asunicode that decodes from 'ascii'. + + Parameters + ---------- + line : str or bytes + Line to be decoded. + encoding : str + Encoding used to decode `line`. + + Returns + ------- + decoded_line : str + + """ + if type(line) is bytes: + if encoding is None: + encoding = "latin1" + line = line.decode(encoding) + + return line + + +def _is_string_like(obj): + """ + Check whether obj behaves like a string. + """ + try: + obj + '' + except (TypeError, ValueError): + return False + return True + + +def _is_bytes_like(obj): + """ + Check whether obj behaves like a bytes object. + """ + try: + obj + b'' + except (TypeError, ValueError): + return False + return True + + +def has_nested_fields(ndtype): + """ + Returns whether one or several fields of a dtype are nested. + + Parameters + ---------- + ndtype : dtype + Data-type of a structured array. + + Raises + ------ + AttributeError + If `ndtype` does not have a `names` attribute. + + Examples + -------- + >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)]) + >>> np.lib._iotools.has_nested_fields(dt) + False + + """ + for name in ndtype.names or (): + if ndtype[name].names is not None: + return True + return False + + +def flatten_dtype(ndtype, flatten_base=False): + """ + Unpack a structured data-type by collapsing nested fields and/or fields + with a shape. + + Note that the field names are lost. + + Parameters + ---------- + ndtype : dtype + The datatype to collapse + flatten_base : bool, optional + If True, transform a field with a shape into several fields. Default is + False. + + Examples + -------- + >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), + ... ('block', int, (2, 3))]) + >>> np.lib._iotools.flatten_dtype(dt) + [dtype('S4'), dtype('float64'), dtype('float64'), dtype('int64')] + >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True) + [dtype('S4'), + dtype('float64'), + dtype('float64'), + dtype('int64'), + dtype('int64'), + dtype('int64'), + dtype('int64'), + dtype('int64'), + dtype('int64')] + + """ + names = ndtype.names + if names is None: + if flatten_base: + return [ndtype.base] * int(np.prod(ndtype.shape)) + return [ndtype.base] + else: + types = [] + for field in names: + info = ndtype.fields[field] + flat_dt = flatten_dtype(info[0], flatten_base) + types.extend(flat_dt) + return types + + +class LineSplitter: + """ + Object to split a string at a given delimiter or at given places. + + Parameters + ---------- + delimiter : str, int, or sequence of ints, optional + If a string, character used to delimit consecutive fields. + If an integer or a sequence of integers, width(s) of each field. + comments : str, optional + Character used to mark the beginning of a comment. Default is '#'. + autostrip : bool, optional + Whether to strip each individual field. Default is True. + + """ + + def autostrip(self, method): + """ + Wrapper to strip each member of the output of `method`. + + Parameters + ---------- + method : function + Function that takes a single argument and returns a sequence of + strings. + + Returns + ------- + wrapped : function + The result of wrapping `method`. `wrapped` takes a single input + argument and returns a list of strings that are stripped of + white-space. + + """ + return lambda input: [_.strip() for _ in method(input)] + + def __init__(self, delimiter=None, comments='#', autostrip=True, + encoding=None): + delimiter = _decode_line(delimiter) + comments = _decode_line(comments) + + self.comments = comments + + # Delimiter is a character + if (delimiter is None) or isinstance(delimiter, str): + delimiter = delimiter or None + _handyman = self._delimited_splitter + # Delimiter is a list of field widths + elif hasattr(delimiter, '__iter__'): + _handyman = self._variablewidth_splitter + idx = np.cumsum([0] + list(delimiter)) + delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])] + # Delimiter is a single integer + elif int(delimiter): + (_handyman, delimiter) = ( + self._fixedwidth_splitter, int(delimiter)) + else: + (_handyman, delimiter) = (self._delimited_splitter, None) + self.delimiter = delimiter + if autostrip: + self._handyman = self.autostrip(_handyman) + else: + self._handyman = _handyman + self.encoding = encoding + + def _delimited_splitter(self, line): + """Chop off comments, strip, and split at delimiter. """ + if self.comments is not None: + line = line.split(self.comments)[0] + line = line.strip(" \r\n") + if not line: + return [] + return line.split(self.delimiter) + + def _fixedwidth_splitter(self, line): + if self.comments is not None: + line = line.split(self.comments)[0] + line = line.strip("\r\n") + if not line: + return [] + fixed = self.delimiter + slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)] + return [line[s] for s in slices] + + def _variablewidth_splitter(self, line): + if self.comments is not None: + line = line.split(self.comments)[0] + if not line: + return [] + slices = self.delimiter + return [line[s] for s in slices] + + def __call__(self, line): + return self._handyman(_decode_line(line, self.encoding)) + + +class NameValidator: + """ + Object to validate a list of strings to use as field names. + + The strings are stripped of any non alphanumeric character, and spaces + are replaced by '_'. During instantiation, the user can define a list + of names to exclude, as well as a list of invalid characters. Names in + the exclusion list are appended a '_' character. + + Once an instance has been created, it can be called with a list of + names, and a list of valid names will be created. The `__call__` + method accepts an optional keyword "default" that sets the default name + in case of ambiguity. By default this is 'f', so that names will + default to `f0`, `f1`, etc. + + Parameters + ---------- + excludelist : sequence, optional + A list of names to exclude. This list is appended to the default + list ['return', 'file', 'print']. Excluded names are appended an + underscore: for example, `file` becomes `file_` if supplied. + deletechars : str, optional + A string combining invalid characters that must be deleted from the + names. + case_sensitive : {True, False, 'upper', 'lower'}, optional + * If True, field names are case-sensitive. + * If False or 'upper', field names are converted to upper case. + * If 'lower', field names are converted to lower case. + + The default value is True. + replace_space : '_', optional + Character(s) used in replacement of white spaces. + + Notes + ----- + Calling an instance of `NameValidator` is the same as calling its + method `validate`. + + Examples + -------- + >>> validator = np.lib._iotools.NameValidator() + >>> validator(['file', 'field2', 'with space', 'CaSe']) + ('file_', 'field2', 'with_space', 'CaSe') + + >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'], + ... deletechars='q', + ... case_sensitive=False) + >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe']) + ('EXCL', 'FIELD2', 'NO_Q', 'WITH_SPACE', 'CASE') + + """ + + defaultexcludelist = ['return', 'file', 'print'] + defaultdeletechars = set(r"""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""") + + def __init__(self, excludelist=None, deletechars=None, + case_sensitive=None, replace_space='_'): + # Process the exclusion list .. + if excludelist is None: + excludelist = [] + excludelist.extend(self.defaultexcludelist) + self.excludelist = excludelist + # Process the list of characters to delete + if deletechars is None: + delete = self.defaultdeletechars + else: + delete = set(deletechars) + delete.add('"') + self.deletechars = delete + # Process the case option ..... + if (case_sensitive is None) or (case_sensitive is True): + self.case_converter = lambda x: x + elif (case_sensitive is False) or case_sensitive.startswith('u'): + self.case_converter = lambda x: x.upper() + elif case_sensitive.startswith('l'): + self.case_converter = lambda x: x.lower() + else: + msg = 'unrecognized case_sensitive value %s.' % case_sensitive + raise ValueError(msg) + + self.replace_space = replace_space + + def validate(self, names, defaultfmt="f%i", nbfields=None): + """ + Validate a list of strings as field names for a structured array. + + Parameters + ---------- + names : sequence of str + Strings to be validated. + defaultfmt : str, optional + Default format string, used if validating a given string + reduces its length to zero. + nbfields : integer, optional + Final number of validated names, used to expand or shrink the + initial list of names. + + Returns + ------- + validatednames : list of str + The list of validated field names. + + Notes + ----- + A `NameValidator` instance can be called directly, which is the + same as calling `validate`. For examples, see `NameValidator`. + + """ + # Initial checks .............. + if (names is None): + if (nbfields is None): + return None + names = [] + if isinstance(names, str): + names = [names, ] + if nbfields is not None: + nbnames = len(names) + if (nbnames < nbfields): + names = list(names) + [''] * (nbfields - nbnames) + elif (nbnames > nbfields): + names = names[:nbfields] + # Set some shortcuts ........... + deletechars = self.deletechars + excludelist = self.excludelist + case_converter = self.case_converter + replace_space = self.replace_space + # Initializes some variables ... + validatednames = [] + seen = dict() + nbempty = 0 + + for item in names: + item = case_converter(item).strip() + if replace_space: + item = item.replace(' ', replace_space) + item = ''.join([c for c in item if c not in deletechars]) + if item == '': + item = defaultfmt % nbempty + while item in names: + nbempty += 1 + item = defaultfmt % nbempty + nbempty += 1 + elif item in excludelist: + item += '_' + cnt = seen.get(item, 0) + if cnt > 0: + validatednames.append(item + '_%d' % cnt) + else: + validatednames.append(item) + seen[item] = cnt + 1 + return tuple(validatednames) + + def __call__(self, names, defaultfmt="f%i", nbfields=None): + return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields) + + +def str2bool(value): + """ + Tries to transform a string supposed to represent a boolean to a boolean. + + Parameters + ---------- + value : str + The string that is transformed to a boolean. + + Returns + ------- + boolval : bool + The boolean representation of `value`. + + Raises + ------ + ValueError + If the string is not 'True' or 'False' (case independent) + + Examples + -------- + >>> np.lib._iotools.str2bool('TRUE') + True + >>> np.lib._iotools.str2bool('false') + False + + """ + value = value.upper() + if value == 'TRUE': + return True + elif value == 'FALSE': + return False + else: + raise ValueError("Invalid boolean") + + +class ConverterError(Exception): + """ + Exception raised when an error occurs in a converter for string values. + + """ + pass + + +class ConverterLockError(ConverterError): + """ + Exception raised when an attempt is made to upgrade a locked converter. + + """ + pass + + +class ConversionWarning(UserWarning): + """ + Warning issued when a string converter has a problem. + + Notes + ----- + In `genfromtxt` a `ConversionWarning` is issued if raising exceptions + is explicitly suppressed with the "invalid_raise" keyword. + + """ + pass + + +class StringConverter: + """ + Factory class for function transforming a string into another object + (int, float). + + After initialization, an instance can be called to transform a string + into another object. If the string is recognized as representing a + missing value, a default value is returned. + + Attributes + ---------- + func : function + Function used for the conversion. + default : any + Default value to return when the input corresponds to a missing + value. + type : type + Type of the output. + _status : int + Integer representing the order of the conversion. + _mapper : sequence of tuples + Sequence of tuples (dtype, function, default value) to evaluate in + order. + _locked : bool + Holds `locked` parameter. + + Parameters + ---------- + dtype_or_func : {None, dtype, function}, optional + If a `dtype`, specifies the input data type, used to define a basic + function and a default value for missing data. For example, when + `dtype` is float, the `func` attribute is set to `float` and the + default value to `np.nan`. If a function, this function is used to + convert a string to another object. In this case, it is recommended + to give an associated default value as input. + default : any, optional + Value to return by default, that is, when the string to be + converted is flagged as missing. If not given, `StringConverter` + tries to supply a reasonable default value. + missing_values : {None, sequence of str}, optional + ``None`` or sequence of strings indicating a missing value. If ``None`` + then missing values are indicated by empty entries. The default is + ``None``. + locked : bool, optional + Whether the StringConverter should be locked to prevent automatic + upgrade or not. Default is False. + + """ + _mapper = [(nx.bool_, str2bool, False), + (nx.int_, int, -1),] + + # On 32-bit systems, we need to make sure that we explicitly include + # nx.int64 since ns.int_ is nx.int32. + if nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize: + _mapper.append((nx.int64, int, -1)) + + _mapper.extend([(nx.float64, float, nx.nan), + (nx.complex128, complex, nx.nan + 0j), + (nx.longdouble, nx.longdouble, nx.nan), + # If a non-default dtype is passed, fall back to generic + # ones (should only be used for the converter) + (nx.integer, int, -1), + (nx.floating, float, nx.nan), + (nx.complexfloating, complex, nx.nan + 0j), + # Last, try with the string types (must be last, because + # `_mapper[-1]` is used as default in some cases) + (nx.unicode_, asunicode, '???'), + (nx.string_, asbytes, '???'), + ]) + + @classmethod + def _getdtype(cls, val): + """Returns the dtype of the input variable.""" + return np.array(val).dtype + + @classmethod + def _getsubdtype(cls, val): + """Returns the type of the dtype of the input variable.""" + return np.array(val).dtype.type + + @classmethod + def _dtypeortype(cls, dtype): + """Returns dtype for datetime64 and type of dtype otherwise.""" + + # This is a bit annoying. We want to return the "general" type in most + # cases (ie. "string" rather than "S10"), but we want to return the + # specific type for datetime64 (ie. "datetime64[us]" rather than + # "datetime64"). + if dtype.type == np.datetime64: + return dtype + return dtype.type + + @classmethod + def upgrade_mapper(cls, func, default=None): + """ + Upgrade the mapper of a StringConverter by adding a new function and + its corresponding default. + + The input function (or sequence of functions) and its associated + default value (if any) is inserted in penultimate position of the + mapper. The corresponding type is estimated from the dtype of the + default value. + + Parameters + ---------- + func : var + Function, or sequence of functions + + Examples + -------- + >>> import dateutil.parser + >>> import datetime + >>> dateparser = dateutil.parser.parse + >>> defaultdate = datetime.date(2000, 1, 1) + >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate) + """ + # Func is a single functions + if hasattr(func, '__call__'): + cls._mapper.insert(-1, (cls._getsubdtype(default), func, default)) + return + elif hasattr(func, '__iter__'): + if isinstance(func[0], (tuple, list)): + for _ in func: + cls._mapper.insert(-1, _) + return + if default is None: + default = [None] * len(func) + else: + default = list(default) + default.append([None] * (len(func) - len(default))) + for fct, dft in zip(func, default): + cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft)) + + @classmethod + def _find_map_entry(cls, dtype): + # if a converter for the specific dtype is available use that + for i, (deftype, func, default_def) in enumerate(cls._mapper): + if dtype.type == deftype: + return i, (deftype, func, default_def) + + # otherwise find an inexact match + for i, (deftype, func, default_def) in enumerate(cls._mapper): + if np.issubdtype(dtype.type, deftype): + return i, (deftype, func, default_def) + + raise LookupError + + def __init__(self, dtype_or_func=None, default=None, missing_values=None, + locked=False): + # Defines a lock for upgrade + self._locked = bool(locked) + # No input dtype: minimal initialization + if dtype_or_func is None: + self.func = str2bool + self._status = 0 + self.default = default or False + dtype = np.dtype('bool') + else: + # Is the input a np.dtype ? + try: + self.func = None + dtype = np.dtype(dtype_or_func) + except TypeError: + # dtype_or_func must be a function, then + if not hasattr(dtype_or_func, '__call__'): + errmsg = ("The input argument `dtype` is neither a" + " function nor a dtype (got '%s' instead)") + raise TypeError(errmsg % type(dtype_or_func)) + # Set the function + self.func = dtype_or_func + # If we don't have a default, try to guess it or set it to + # None + if default is None: + try: + default = self.func('0') + except ValueError: + default = None + dtype = self._getdtype(default) + + # find the best match in our mapper + try: + self._status, (_, func, default_def) = self._find_map_entry(dtype) + except LookupError: + # no match + self.default = default + _, func, _ = self._mapper[-1] + self._status = 0 + else: + # use the found default only if we did not already have one + if default is None: + self.default = default_def + else: + self.default = default + + # If the input was a dtype, set the function to the last we saw + if self.func is None: + self.func = func + + # If the status is 1 (int), change the function to + # something more robust. + if self.func == self._mapper[1][1]: + if issubclass(dtype.type, np.uint64): + self.func = np.uint64 + elif issubclass(dtype.type, np.int64): + self.func = np.int64 + else: + self.func = lambda x: int(float(x)) + # Store the list of strings corresponding to missing values. + if missing_values is None: + self.missing_values = {''} + else: + if isinstance(missing_values, str): + missing_values = missing_values.split(",") + self.missing_values = set(list(missing_values) + ['']) + + self._callingfunction = self._strict_call + self.type = self._dtypeortype(dtype) + self._checked = False + self._initial_default = default + + def _loose_call(self, value): + try: + return self.func(value) + except ValueError: + return self.default + + def _strict_call(self, value): + try: + + # We check if we can convert the value using the current function + new_value = self.func(value) + + # In addition to having to check whether func can convert the + # value, we also have to make sure that we don't get overflow + # errors for integers. + if self.func is int: + try: + np.array(value, dtype=self.type) + except OverflowError: + raise ValueError + + # We're still here so we can now return the new value + return new_value + + except ValueError: + if value.strip() in self.missing_values: + if not self._status: + self._checked = False + return self.default + raise ValueError("Cannot convert string '%s'" % value) + + def __call__(self, value): + return self._callingfunction(value) + + def _do_upgrade(self): + # Raise an exception if we locked the converter... + if self._locked: + errmsg = "Converter is locked and cannot be upgraded" + raise ConverterLockError(errmsg) + _statusmax = len(self._mapper) + # Complains if we try to upgrade by the maximum + _status = self._status + if _status == _statusmax: + errmsg = "Could not find a valid conversion function" + raise ConverterError(errmsg) + elif _status < _statusmax - 1: + _status += 1 + self.type, self.func, default = self._mapper[_status] + self._status = _status + if self._initial_default is not None: + self.default = self._initial_default + else: + self.default = default + + def upgrade(self, value): + """ + Find the best converter for a given string, and return the result. + + The supplied string `value` is converted by testing different + converters in order. First the `func` method of the + `StringConverter` instance is tried, if this fails other available + converters are tried. The order in which these other converters + are tried is determined by the `_status` attribute of the instance. + + Parameters + ---------- + value : str + The string to convert. + + Returns + ------- + out : any + The result of converting `value` with the appropriate converter. + + """ + self._checked = True + try: + return self._strict_call(value) + except ValueError: + self._do_upgrade() + return self.upgrade(value) + + def iterupgrade(self, value): + self._checked = True + if not hasattr(value, '__iter__'): + value = (value,) + _strict_call = self._strict_call + try: + for _m in value: + _strict_call(_m) + except ValueError: + self._do_upgrade() + self.iterupgrade(value) + + def update(self, func, default=None, testing_value=None, + missing_values='', locked=False): + """ + Set StringConverter attributes directly. + + Parameters + ---------- + func : function + Conversion function. + default : any, optional + Value to return by default, that is, when the string to be + converted is flagged as missing. If not given, + `StringConverter` tries to supply a reasonable default value. + testing_value : str, optional + A string representing a standard input value of the converter. + This string is used to help defining a reasonable default + value. + missing_values : {sequence of str, None}, optional + Sequence of strings indicating a missing value. If ``None``, then + the existing `missing_values` are cleared. The default is `''`. + locked : bool, optional + Whether the StringConverter should be locked to prevent + automatic upgrade or not. Default is False. + + Notes + ----- + `update` takes the same parameters as the constructor of + `StringConverter`, except that `func` does not accept a `dtype` + whereas `dtype_or_func` in the constructor does. + + """ + self.func = func + self._locked = locked + + # Don't reset the default to None if we can avoid it + if default is not None: + self.default = default + self.type = self._dtypeortype(self._getdtype(default)) + else: + try: + tester = func(testing_value or '1') + except (TypeError, ValueError): + tester = None + self.type = self._dtypeortype(self._getdtype(tester)) + + # Add the missing values to the existing set or clear it. + if missing_values is None: + # Clear all missing values even though the ctor initializes it to + # set(['']) when the argument is None. + self.missing_values = set() + else: + if not np.iterable(missing_values): + missing_values = [missing_values] + if not all(isinstance(v, str) for v in missing_values): + raise TypeError("missing_values must be strings or unicode") + self.missing_values.update(missing_values) + + +def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs): + """ + Convenience function to create a `np.dtype` object. + + The function processes the input `dtype` and matches it with the given + names. + + Parameters + ---------- + ndtype : var + Definition of the dtype. Can be any string or dictionary recognized + by the `np.dtype` function, or a sequence of types. + names : str or sequence, optional + Sequence of strings to use as field names for a structured dtype. + For convenience, `names` can be a string of a comma-separated list + of names. + defaultfmt : str, optional + Format string used to define missing names, such as ``"f%i"`` + (default) or ``"fields_%02i"``. + validationargs : optional + A series of optional arguments used to initialize a + `NameValidator`. + + Examples + -------- + >>> np.lib._iotools.easy_dtype(float) + dtype('float64') + >>> np.lib._iotools.easy_dtype("i4, f8") + dtype([('f0', '>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i") + dtype([('field_000', '>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c") + dtype([('a', '>> np.lib._iotools.easy_dtype(float, names="a,b,c") + dtype([('a', ' 9 in principle): + + - Released version: '1.8.0', '1.8.1', etc. + - Alpha: '1.8.0a1', '1.8.0a2', etc. + - Beta: '1.8.0b1', '1.8.0b2', etc. + - Release candidates: '1.8.0rc1', '1.8.0rc2', etc. + - Development versions: '1.8.0.dev-f1234afa' (git commit hash appended) + - Development versions after a1: '1.8.0a1.dev-f1234afa', + '1.8.0b2.dev-f1234afa', + '1.8.1rc1.dev-f1234afa', etc. + - Development versions (no git hash available): '1.8.0.dev-Unknown' + + Comparing needs to be done against a valid version string or other + `NumpyVersion` instance. Note that all development versions of the same + (pre-)release compare equal. + + .. versionadded:: 1.9.0 + + Parameters + ---------- + vstring : str + NumPy version string (``np.__version__``). + + Examples + -------- + >>> from numpy.lib import NumpyVersion + >>> if NumpyVersion(np.__version__) < '1.7.0': + ... print('skip') + >>> # skip + + >>> NumpyVersion('1.7') # raises ValueError, add ".0" + Traceback (most recent call last): + ... + ValueError: Not a valid numpy version string + + """ + + def __init__(self, vstring): + self.vstring = vstring + ver_main = re.match(r'\d+\.\d+\.\d+', vstring) + if not ver_main: + raise ValueError("Not a valid numpy version string") + + self.version = ver_main.group() + self.major, self.minor, self.bugfix = [int(x) for x in + self.version.split('.')] + if len(vstring) == ver_main.end(): + self.pre_release = 'final' + else: + alpha = re.match(r'a\d', vstring[ver_main.end():]) + beta = re.match(r'b\d', vstring[ver_main.end():]) + rc = re.match(r'rc\d', vstring[ver_main.end():]) + pre_rel = [m for m in [alpha, beta, rc] if m is not None] + if pre_rel: + self.pre_release = pre_rel[0].group() + else: + self.pre_release = '' + + self.is_devversion = bool(re.search(r'.dev', vstring)) + + def _compare_version(self, other): + """Compare major.minor.bugfix""" + if self.major == other.major: + if self.minor == other.minor: + if self.bugfix == other.bugfix: + vercmp = 0 + elif self.bugfix > other.bugfix: + vercmp = 1 + else: + vercmp = -1 + elif self.minor > other.minor: + vercmp = 1 + else: + vercmp = -1 + elif self.major > other.major: + vercmp = 1 + else: + vercmp = -1 + + return vercmp + + def _compare_pre_release(self, other): + """Compare alpha/beta/rc/final.""" + if self.pre_release == other.pre_release: + vercmp = 0 + elif self.pre_release == 'final': + vercmp = 1 + elif other.pre_release == 'final': + vercmp = -1 + elif self.pre_release > other.pre_release: + vercmp = 1 + else: + vercmp = -1 + + return vercmp + + def _compare(self, other): + if not isinstance(other, (str, NumpyVersion)): + raise ValueError("Invalid object to compare with NumpyVersion.") + + if isinstance(other, str): + other = NumpyVersion(other) + + vercmp = self._compare_version(other) + if vercmp == 0: + # Same x.y.z version, check for alpha/beta/rc + vercmp = self._compare_pre_release(other) + if vercmp == 0: + # Same version and same pre-release, check if dev version + if self.is_devversion is other.is_devversion: + vercmp = 0 + elif self.is_devversion: + vercmp = -1 + else: + vercmp = 1 + + return vercmp + + def __lt__(self, other): + return self._compare(other) < 0 + + def __le__(self, other): + return self._compare(other) <= 0 + + def __eq__(self, other): + return self._compare(other) == 0 + + def __ne__(self, other): + return self._compare(other) != 0 + + def __gt__(self, other): + return self._compare(other) > 0 + + def __ge__(self, other): + return self._compare(other) >= 0 + + def __repr__(self): + return "NumpyVersion(%s)" % self.vstring diff --git a/.local/lib/python3.8/site-packages/numpy/lib/_version.pyi b/.local/lib/python3.8/site-packages/numpy/lib/_version.pyi new file mode 100644 index 0000000..3581d63 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/_version.pyi @@ -0,0 +1,19 @@ +from typing import Union, List + +__all__: List[str] + +class NumpyVersion: + vstring: str + version: str + major: int + minor: int + bugfix: int + pre_release: str + is_devversion: bool + def __init__(self, vstring: str) -> None: ... + def __lt__(self, other: Union[str, NumpyVersion]) -> bool: ... + def __le__(self, other: Union[str, NumpyVersion]) -> bool: ... + def __eq__(self, other: Union[str, NumpyVersion]) -> bool: ... # type: ignore[override] + def __ne__(self, other: Union[str, NumpyVersion]) -> bool: ... # type: ignore[override] + def __gt__(self, other: Union[str, NumpyVersion]) -> bool: ... + def __ge__(self, other: Union[str, NumpyVersion]) -> bool: ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/arraypad.py b/.local/lib/python3.8/site-packages/numpy/lib/arraypad.py new file mode 100644 index 0000000..8830b81 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/arraypad.py @@ -0,0 +1,876 @@ +""" +The arraypad module contains a group of functions to pad values onto the edges +of an n-dimensional array. + +""" +import numpy as np +from numpy.core.overrides import array_function_dispatch +from numpy.lib.index_tricks import ndindex + + +__all__ = ['pad'] + + +############################################################################### +# Private utility functions. + + +def _round_if_needed(arr, dtype): + """ + Rounds arr inplace if destination dtype is integer. + + Parameters + ---------- + arr : ndarray + Input array. + dtype : dtype + The dtype of the destination array. + """ + if np.issubdtype(dtype, np.integer): + arr.round(out=arr) + + +def _slice_at_axis(sl, axis): + """ + Construct tuple of slices to slice an array in the given dimension. + + Parameters + ---------- + sl : slice + The slice for the given dimension. + axis : int + The axis to which `sl` is applied. All other dimensions are left + "unsliced". + + Returns + ------- + sl : tuple of slices + A tuple with slices matching `shape` in length. + + Examples + -------- + >>> _slice_at_axis(slice(None, 3, -1), 1) + (slice(None, None, None), slice(None, 3, -1), (...,)) + """ + return (slice(None),) * axis + (sl,) + (...,) + + +def _view_roi(array, original_area_slice, axis): + """ + Get a view of the current region of interest during iterative padding. + + When padding multiple dimensions iteratively corner values are + unnecessarily overwritten multiple times. This function reduces the + working area for the first dimensions so that corners are excluded. + + Parameters + ---------- + array : ndarray + The array with the region of interest. + original_area_slice : tuple of slices + Denotes the area with original values of the unpadded array. + axis : int + The currently padded dimension assuming that `axis` is padded before + `axis` + 1. + + Returns + ------- + roi : ndarray + The region of interest of the original `array`. + """ + axis += 1 + sl = (slice(None),) * axis + original_area_slice[axis:] + return array[sl] + + +def _pad_simple(array, pad_width, fill_value=None): + """ + Pad array on all sides with either a single value or undefined values. + + Parameters + ---------- + array : ndarray + Array to grow. + pad_width : sequence of tuple[int, int] + Pad width on both sides for each dimension in `arr`. + fill_value : scalar, optional + If provided the padded area is filled with this value, otherwise + the pad area left undefined. + + Returns + ------- + padded : ndarray + The padded array with the same dtype as`array`. Its order will default + to C-style if `array` is not F-contiguous. + original_area_slice : tuple + A tuple of slices pointing to the area of the original array. + """ + # Allocate grown array + new_shape = tuple( + left + size + right + for size, (left, right) in zip(array.shape, pad_width) + ) + order = 'F' if array.flags.fnc else 'C' # Fortran and not also C-order + padded = np.empty(new_shape, dtype=array.dtype, order=order) + + if fill_value is not None: + padded.fill(fill_value) + + # Copy old array into correct space + original_area_slice = tuple( + slice(left, left + size) + for size, (left, right) in zip(array.shape, pad_width) + ) + padded[original_area_slice] = array + + return padded, original_area_slice + + +def _set_pad_area(padded, axis, width_pair, value_pair): + """ + Set empty-padded area in given dimension. + + Parameters + ---------- + padded : ndarray + Array with the pad area which is modified inplace. + axis : int + Dimension with the pad area to set. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. + value_pair : tuple of scalars or ndarrays + Values inserted into the pad area on each side. It must match or be + broadcastable to the shape of `arr`. + """ + left_slice = _slice_at_axis(slice(None, width_pair[0]), axis) + padded[left_slice] = value_pair[0] + + right_slice = _slice_at_axis( + slice(padded.shape[axis] - width_pair[1], None), axis) + padded[right_slice] = value_pair[1] + + +def _get_edges(padded, axis, width_pair): + """ + Retrieve edge values from empty-padded array in given dimension. + + Parameters + ---------- + padded : ndarray + Empty-padded array. + axis : int + Dimension in which the edges are considered. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. + + Returns + ------- + left_edge, right_edge : ndarray + Edge values of the valid area in `padded` in the given dimension. Its + shape will always match `padded` except for the dimension given by + `axis` which will have a length of 1. + """ + left_index = width_pair[0] + left_slice = _slice_at_axis(slice(left_index, left_index + 1), axis) + left_edge = padded[left_slice] + + right_index = padded.shape[axis] - width_pair[1] + right_slice = _slice_at_axis(slice(right_index - 1, right_index), axis) + right_edge = padded[right_slice] + + return left_edge, right_edge + + +def _get_linear_ramps(padded, axis, width_pair, end_value_pair): + """ + Construct linear ramps for empty-padded array in given dimension. + + Parameters + ---------- + padded : ndarray + Empty-padded array. + axis : int + Dimension in which the ramps are constructed. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. + end_value_pair : (scalar, scalar) + End values for the linear ramps which form the edge of the fully padded + array. These values are included in the linear ramps. + + Returns + ------- + left_ramp, right_ramp : ndarray + Linear ramps to set on both sides of `padded`. + """ + edge_pair = _get_edges(padded, axis, width_pair) + + left_ramp, right_ramp = ( + np.linspace( + start=end_value, + stop=edge.squeeze(axis), # Dimension is replaced by linspace + num=width, + endpoint=False, + dtype=padded.dtype, + axis=axis + ) + for end_value, edge, width in zip( + end_value_pair, edge_pair, width_pair + ) + ) + + # Reverse linear space in appropriate dimension + right_ramp = right_ramp[_slice_at_axis(slice(None, None, -1), axis)] + + return left_ramp, right_ramp + + +def _get_stats(padded, axis, width_pair, length_pair, stat_func): + """ + Calculate statistic for the empty-padded array in given dimension. + + Parameters + ---------- + padded : ndarray + Empty-padded array. + axis : int + Dimension in which the statistic is calculated. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. + length_pair : 2-element sequence of None or int + Gives the number of values in valid area from each side that is + taken into account when calculating the statistic. If None the entire + valid area in `padded` is considered. + stat_func : function + Function to compute statistic. The expected signature is + ``stat_func(x: ndarray, axis: int, keepdims: bool) -> ndarray``. + + Returns + ------- + left_stat, right_stat : ndarray + Calculated statistic for both sides of `padded`. + """ + # Calculate indices of the edges of the area with original values + left_index = width_pair[0] + right_index = padded.shape[axis] - width_pair[1] + # as well as its length + max_length = right_index - left_index + + # Limit stat_lengths to max_length + left_length, right_length = length_pair + if left_length is None or max_length < left_length: + left_length = max_length + if right_length is None or max_length < right_length: + right_length = max_length + + if (left_length == 0 or right_length == 0) \ + and stat_func in {np.amax, np.amin}: + # amax and amin can't operate on an empty array, + # raise a more descriptive warning here instead of the default one + raise ValueError("stat_length of 0 yields no value for padding") + + # Calculate statistic for the left side + left_slice = _slice_at_axis( + slice(left_index, left_index + left_length), axis) + left_chunk = padded[left_slice] + left_stat = stat_func(left_chunk, axis=axis, keepdims=True) + _round_if_needed(left_stat, padded.dtype) + + if left_length == right_length == max_length: + # return early as right_stat must be identical to left_stat + return left_stat, left_stat + + # Calculate statistic for the right side + right_slice = _slice_at_axis( + slice(right_index - right_length, right_index), axis) + right_chunk = padded[right_slice] + right_stat = stat_func(right_chunk, axis=axis, keepdims=True) + _round_if_needed(right_stat, padded.dtype) + + return left_stat, right_stat + + +def _set_reflect_both(padded, axis, width_pair, method, include_edge=False): + """ + Pad `axis` of `arr` with reflection. + + Parameters + ---------- + padded : ndarray + Input array of arbitrary shape. + axis : int + Axis along which to pad `arr`. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. + method : str + Controls method of reflection; options are 'even' or 'odd'. + include_edge : bool + If true, edge value is included in reflection, otherwise the edge + value forms the symmetric axis to the reflection. + + Returns + ------- + pad_amt : tuple of ints, length 2 + New index positions of padding to do along the `axis`. If these are + both 0, padding is done in this dimension. + """ + left_pad, right_pad = width_pair + old_length = padded.shape[axis] - right_pad - left_pad + + if include_edge: + # Edge is included, we need to offset the pad amount by 1 + edge_offset = 1 + else: + edge_offset = 0 # Edge is not included, no need to offset pad amount + old_length -= 1 # but must be omitted from the chunk + + if left_pad > 0: + # Pad with reflected values on left side: + # First limit chunk size which can't be larger than pad area + chunk_length = min(old_length, left_pad) + # Slice right to left, stop on or next to edge, start relative to stop + stop = left_pad - edge_offset + start = stop + chunk_length + left_slice = _slice_at_axis(slice(start, stop, -1), axis) + left_chunk = padded[left_slice] + + if method == "odd": + # Negate chunk and align with edge + edge_slice = _slice_at_axis(slice(left_pad, left_pad + 1), axis) + left_chunk = 2 * padded[edge_slice] - left_chunk + + # Insert chunk into padded area + start = left_pad - chunk_length + stop = left_pad + pad_area = _slice_at_axis(slice(start, stop), axis) + padded[pad_area] = left_chunk + # Adjust pointer to left edge for next iteration + left_pad -= chunk_length + + if right_pad > 0: + # Pad with reflected values on right side: + # First limit chunk size which can't be larger than pad area + chunk_length = min(old_length, right_pad) + # Slice right to left, start on or next to edge, stop relative to start + start = -right_pad + edge_offset - 2 + stop = start - chunk_length + right_slice = _slice_at_axis(slice(start, stop, -1), axis) + right_chunk = padded[right_slice] + + if method == "odd": + # Negate chunk and align with edge + edge_slice = _slice_at_axis( + slice(-right_pad - 1, -right_pad), axis) + right_chunk = 2 * padded[edge_slice] - right_chunk + + # Insert chunk into padded area + start = padded.shape[axis] - right_pad + stop = start + chunk_length + pad_area = _slice_at_axis(slice(start, stop), axis) + padded[pad_area] = right_chunk + # Adjust pointer to right edge for next iteration + right_pad -= chunk_length + + return left_pad, right_pad + + +def _set_wrap_both(padded, axis, width_pair): + """ + Pad `axis` of `arr` with wrapped values. + + Parameters + ---------- + padded : ndarray + Input array of arbitrary shape. + axis : int + Axis along which to pad `arr`. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. + + Returns + ------- + pad_amt : tuple of ints, length 2 + New index positions of padding to do along the `axis`. If these are + both 0, padding is done in this dimension. + """ + left_pad, right_pad = width_pair + period = padded.shape[axis] - right_pad - left_pad + + # If the current dimension of `arr` doesn't contain enough valid values + # (not part of the undefined pad area) we need to pad multiple times. + # Each time the pad area shrinks on both sides which is communicated with + # these variables. + new_left_pad = 0 + new_right_pad = 0 + + if left_pad > 0: + # Pad with wrapped values on left side + # First slice chunk from right side of the non-pad area. + # Use min(period, left_pad) to ensure that chunk is not larger than + # pad area + right_slice = _slice_at_axis( + slice(-right_pad - min(period, left_pad), + -right_pad if right_pad != 0 else None), + axis + ) + right_chunk = padded[right_slice] + + if left_pad > period: + # Chunk is smaller than pad area + pad_area = _slice_at_axis(slice(left_pad - period, left_pad), axis) + new_left_pad = left_pad - period + else: + # Chunk matches pad area + pad_area = _slice_at_axis(slice(None, left_pad), axis) + padded[pad_area] = right_chunk + + if right_pad > 0: + # Pad with wrapped values on right side + # First slice chunk from left side of the non-pad area. + # Use min(period, right_pad) to ensure that chunk is not larger than + # pad area + left_slice = _slice_at_axis( + slice(left_pad, left_pad + min(period, right_pad),), axis) + left_chunk = padded[left_slice] + + if right_pad > period: + # Chunk is smaller than pad area + pad_area = _slice_at_axis( + slice(-right_pad, -right_pad + period), axis) + new_right_pad = right_pad - period + else: + # Chunk matches pad area + pad_area = _slice_at_axis(slice(-right_pad, None), axis) + padded[pad_area] = left_chunk + + return new_left_pad, new_right_pad + + +def _as_pairs(x, ndim, as_index=False): + """ + Broadcast `x` to an array with the shape (`ndim`, 2). + + A helper function for `pad` that prepares and validates arguments like + `pad_width` for iteration in pairs. + + Parameters + ---------- + x : {None, scalar, array-like} + The object to broadcast to the shape (`ndim`, 2). + ndim : int + Number of pairs the broadcasted `x` will have. + as_index : bool, optional + If `x` is not None, try to round each element of `x` to an integer + (dtype `np.intp`) and ensure every element is positive. + + Returns + ------- + pairs : nested iterables, shape (`ndim`, 2) + The broadcasted version of `x`. + + Raises + ------ + ValueError + If `as_index` is True and `x` contains negative elements. + Or if `x` is not broadcastable to the shape (`ndim`, 2). + """ + if x is None: + # Pass through None as a special case, otherwise np.round(x) fails + # with an AttributeError + return ((None, None),) * ndim + + x = np.array(x) + if as_index: + x = np.round(x).astype(np.intp, copy=False) + + if x.ndim < 3: + # Optimization: Possibly use faster paths for cases where `x` has + # only 1 or 2 elements. `np.broadcast_to` could handle these as well + # but is currently slower + + if x.size == 1: + # x was supplied as a single value + x = x.ravel() # Ensure x[0] works for x.ndim == 0, 1, 2 + if as_index and x < 0: + raise ValueError("index can't contain negative values") + return ((x[0], x[0]),) * ndim + + if x.size == 2 and x.shape != (2, 1): + # x was supplied with a single value for each side + # but except case when each dimension has a single value + # which should be broadcasted to a pair, + # e.g. [[1], [2]] -> [[1, 1], [2, 2]] not [[1, 2], [1, 2]] + x = x.ravel() # Ensure x[0], x[1] works + if as_index and (x[0] < 0 or x[1] < 0): + raise ValueError("index can't contain negative values") + return ((x[0], x[1]),) * ndim + + if as_index and x.min() < 0: + raise ValueError("index can't contain negative values") + + # Converting the array with `tolist` seems to improve performance + # when iterating and indexing the result (see usage in `pad`) + return np.broadcast_to(x, (ndim, 2)).tolist() + + +def _pad_dispatcher(array, pad_width, mode=None, **kwargs): + return (array,) + + +############################################################################### +# Public functions + + +@array_function_dispatch(_pad_dispatcher, module='numpy') +def pad(array, pad_width, mode='constant', **kwargs): + """ + Pad an array. + + Parameters + ---------- + array : array_like of rank N + The array to pad. + pad_width : {sequence, array_like, int} + Number of values padded to the edges of each axis. + ((before_1, after_1), ... (before_N, after_N)) unique pad widths + for each axis. + ((before, after),) yields same before and after pad for each axis. + (pad,) or int is a shortcut for before = after = pad width for all + axes. + mode : str or function, optional + One of the following string values or a user supplied function. + + 'constant' (default) + Pads with a constant value. + 'edge' + Pads with the edge values of array. + 'linear_ramp' + Pads with the linear ramp between end_value and the + array edge value. + 'maximum' + Pads with the maximum value of all or part of the + vector along each axis. + 'mean' + Pads with the mean value of all or part of the + vector along each axis. + 'median' + Pads with the median value of all or part of the + vector along each axis. + 'minimum' + Pads with the minimum value of all or part of the + vector along each axis. + 'reflect' + Pads with the reflection of the vector mirrored on + the first and last values of the vector along each + axis. + 'symmetric' + Pads with the reflection of the vector mirrored + along the edge of the array. + 'wrap' + Pads with the wrap of the vector along the axis. + The first values are used to pad the end and the + end values are used to pad the beginning. + 'empty' + Pads with undefined values. + + .. versionadded:: 1.17 + + + Padding function, see Notes. + stat_length : sequence or int, optional + Used in 'maximum', 'mean', 'median', and 'minimum'. Number of + values at edge of each axis used to calculate the statistic value. + + ((before_1, after_1), ... (before_N, after_N)) unique statistic + lengths for each axis. + + ((before, after),) yields same before and after statistic lengths + for each axis. + + (stat_length,) or int is a shortcut for before = after = statistic + length for all axes. + + Default is ``None``, to use the entire axis. + constant_values : sequence or scalar, optional + Used in 'constant'. The values to set the padded values for each + axis. + + ``((before_1, after_1), ... (before_N, after_N))`` unique pad constants + for each axis. + + ``((before, after),)`` yields same before and after constants for each + axis. + + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all axes. + + Default is 0. + end_values : sequence or scalar, optional + Used in 'linear_ramp'. The values used for the ending value of the + linear_ramp and that will form the edge of the padded array. + + ``((before_1, after_1), ... (before_N, after_N))`` unique end values + for each axis. + + ``((before, after),)`` yields same before and after end values for each + axis. + + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all axes. + + Default is 0. + reflect_type : {'even', 'odd'}, optional + Used in 'reflect', and 'symmetric'. The 'even' style is the + default with an unaltered reflection around the edge value. For + the 'odd' style, the extended part of the array is created by + subtracting the reflected values from two times the edge value. + + Returns + ------- + pad : ndarray + Padded array of rank equal to `array` with shape increased + according to `pad_width`. + + Notes + ----- + .. versionadded:: 1.7.0 + + For an array with rank greater than 1, some of the padding of later + axes is calculated from padding of previous axes. This is easiest to + think about with a rank 2 array where the corners of the padded array + are calculated by using padded values from the first axis. + + The padding function, if used, should modify a rank 1 array in-place. It + has the following signature:: + + padding_func(vector, iaxis_pad_width, iaxis, kwargs) + + where + + vector : ndarray + A rank 1 array already padded with zeros. Padded values are + vector[:iaxis_pad_width[0]] and vector[-iaxis_pad_width[1]:]. + iaxis_pad_width : tuple + A 2-tuple of ints, iaxis_pad_width[0] represents the number of + values padded at the beginning of vector where + iaxis_pad_width[1] represents the number of values padded at + the end of vector. + iaxis : int + The axis currently being calculated. + kwargs : dict + Any keyword arguments the function requires. + + Examples + -------- + >>> a = [1, 2, 3, 4, 5] + >>> np.pad(a, (2, 3), 'constant', constant_values=(4, 6)) + array([4, 4, 1, ..., 6, 6, 6]) + + >>> np.pad(a, (2, 3), 'edge') + array([1, 1, 1, ..., 5, 5, 5]) + + >>> np.pad(a, (2, 3), 'linear_ramp', end_values=(5, -4)) + array([ 5, 3, 1, 2, 3, 4, 5, 2, -1, -4]) + + >>> np.pad(a, (2,), 'maximum') + array([5, 5, 1, 2, 3, 4, 5, 5, 5]) + + >>> np.pad(a, (2,), 'mean') + array([3, 3, 1, 2, 3, 4, 5, 3, 3]) + + >>> np.pad(a, (2,), 'median') + array([3, 3, 1, 2, 3, 4, 5, 3, 3]) + + >>> a = [[1, 2], [3, 4]] + >>> np.pad(a, ((3, 2), (2, 3)), 'minimum') + array([[1, 1, 1, 2, 1, 1, 1], + [1, 1, 1, 2, 1, 1, 1], + [1, 1, 1, 2, 1, 1, 1], + [1, 1, 1, 2, 1, 1, 1], + [3, 3, 3, 4, 3, 3, 3], + [1, 1, 1, 2, 1, 1, 1], + [1, 1, 1, 2, 1, 1, 1]]) + + >>> a = [1, 2, 3, 4, 5] + >>> np.pad(a, (2, 3), 'reflect') + array([3, 2, 1, 2, 3, 4, 5, 4, 3, 2]) + + >>> np.pad(a, (2, 3), 'reflect', reflect_type='odd') + array([-1, 0, 1, 2, 3, 4, 5, 6, 7, 8]) + + >>> np.pad(a, (2, 3), 'symmetric') + array([2, 1, 1, 2, 3, 4, 5, 5, 4, 3]) + + >>> np.pad(a, (2, 3), 'symmetric', reflect_type='odd') + array([0, 1, 1, 2, 3, 4, 5, 5, 6, 7]) + + >>> np.pad(a, (2, 3), 'wrap') + array([4, 5, 1, 2, 3, 4, 5, 1, 2, 3]) + + >>> def pad_with(vector, pad_width, iaxis, kwargs): + ... pad_value = kwargs.get('padder', 10) + ... vector[:pad_width[0]] = pad_value + ... vector[-pad_width[1]:] = pad_value + >>> a = np.arange(6) + >>> a = a.reshape((2, 3)) + >>> np.pad(a, 2, pad_with) + array([[10, 10, 10, 10, 10, 10, 10], + [10, 10, 10, 10, 10, 10, 10], + [10, 10, 0, 1, 2, 10, 10], + [10, 10, 3, 4, 5, 10, 10], + [10, 10, 10, 10, 10, 10, 10], + [10, 10, 10, 10, 10, 10, 10]]) + >>> np.pad(a, 2, pad_with, padder=100) + array([[100, 100, 100, 100, 100, 100, 100], + [100, 100, 100, 100, 100, 100, 100], + [100, 100, 0, 1, 2, 100, 100], + [100, 100, 3, 4, 5, 100, 100], + [100, 100, 100, 100, 100, 100, 100], + [100, 100, 100, 100, 100, 100, 100]]) + """ + array = np.asarray(array) + pad_width = np.asarray(pad_width) + + if not pad_width.dtype.kind == 'i': + raise TypeError('`pad_width` must be of integral type.') + + # Broadcast to shape (array.ndim, 2) + pad_width = _as_pairs(pad_width, array.ndim, as_index=True) + + if callable(mode): + # Old behavior: Use user-supplied function with np.apply_along_axis + function = mode + # Create a new zero padded array + padded, _ = _pad_simple(array, pad_width, fill_value=0) + # And apply along each axis + + for axis in range(padded.ndim): + # Iterate using ndindex as in apply_along_axis, but assuming that + # function operates inplace on the padded array. + + # view with the iteration axis at the end + view = np.moveaxis(padded, axis, -1) + + # compute indices for the iteration axes, and append a trailing + # ellipsis to prevent 0d arrays decaying to scalars (gh-8642) + inds = ndindex(view.shape[:-1]) + inds = (ind + (Ellipsis,) for ind in inds) + for ind in inds: + function(view[ind], pad_width[axis], axis, kwargs) + + return padded + + # Make sure that no unsupported keywords were passed for the current mode + allowed_kwargs = { + 'empty': [], 'edge': [], 'wrap': [], + 'constant': ['constant_values'], + 'linear_ramp': ['end_values'], + 'maximum': ['stat_length'], + 'mean': ['stat_length'], + 'median': ['stat_length'], + 'minimum': ['stat_length'], + 'reflect': ['reflect_type'], + 'symmetric': ['reflect_type'], + } + try: + unsupported_kwargs = set(kwargs) - set(allowed_kwargs[mode]) + except KeyError: + raise ValueError("mode '{}' is not supported".format(mode)) from None + if unsupported_kwargs: + raise ValueError("unsupported keyword arguments for mode '{}': {}" + .format(mode, unsupported_kwargs)) + + stat_functions = {"maximum": np.amax, "minimum": np.amin, + "mean": np.mean, "median": np.median} + + # Create array with final shape and original values + # (padded area is undefined) + padded, original_area_slice = _pad_simple(array, pad_width) + # And prepare iteration over all dimensions + # (zipping may be more readable than using enumerate) + axes = range(padded.ndim) + + if mode == "constant": + values = kwargs.get("constant_values", 0) + values = _as_pairs(values, padded.ndim) + for axis, width_pair, value_pair in zip(axes, pad_width, values): + roi = _view_roi(padded, original_area_slice, axis) + _set_pad_area(roi, axis, width_pair, value_pair) + + elif mode == "empty": + pass # Do nothing as _pad_simple already returned the correct result + + elif array.size == 0: + # Only modes "constant" and "empty" can extend empty axes, all other + # modes depend on `array` not being empty + # -> ensure every empty axis is only "padded with 0" + for axis, width_pair in zip(axes, pad_width): + if array.shape[axis] == 0 and any(width_pair): + raise ValueError( + "can't extend empty axis {} using modes other than " + "'constant' or 'empty'".format(axis) + ) + # passed, don't need to do anything more as _pad_simple already + # returned the correct result + + elif mode == "edge": + for axis, width_pair in zip(axes, pad_width): + roi = _view_roi(padded, original_area_slice, axis) + edge_pair = _get_edges(roi, axis, width_pair) + _set_pad_area(roi, axis, width_pair, edge_pair) + + elif mode == "linear_ramp": + end_values = kwargs.get("end_values", 0) + end_values = _as_pairs(end_values, padded.ndim) + for axis, width_pair, value_pair in zip(axes, pad_width, end_values): + roi = _view_roi(padded, original_area_slice, axis) + ramp_pair = _get_linear_ramps(roi, axis, width_pair, value_pair) + _set_pad_area(roi, axis, width_pair, ramp_pair) + + elif mode in stat_functions: + func = stat_functions[mode] + length = kwargs.get("stat_length", None) + length = _as_pairs(length, padded.ndim, as_index=True) + for axis, width_pair, length_pair in zip(axes, pad_width, length): + roi = _view_roi(padded, original_area_slice, axis) + stat_pair = _get_stats(roi, axis, width_pair, length_pair, func) + _set_pad_area(roi, axis, width_pair, stat_pair) + + elif mode in {"reflect", "symmetric"}: + method = kwargs.get("reflect_type", "even") + include_edge = True if mode == "symmetric" else False + for axis, (left_index, right_index) in zip(axes, pad_width): + if array.shape[axis] == 1 and (left_index > 0 or right_index > 0): + # Extending singleton dimension for 'reflect' is legacy + # behavior; it really should raise an error. + edge_pair = _get_edges(padded, axis, (left_index, right_index)) + _set_pad_area( + padded, axis, (left_index, right_index), edge_pair) + continue + + roi = _view_roi(padded, original_area_slice, axis) + while left_index > 0 or right_index > 0: + # Iteratively pad until dimension is filled with reflected + # values. This is necessary if the pad area is larger than + # the length of the original values in the current dimension. + left_index, right_index = _set_reflect_both( + roi, axis, (left_index, right_index), + method, include_edge + ) + + elif mode == "wrap": + for axis, (left_index, right_index) in zip(axes, pad_width): + roi = _view_roi(padded, original_area_slice, axis) + while left_index > 0 or right_index > 0: + # Iteratively pad until dimension is filled with wrapped + # values. This is necessary if the pad area is larger than + # the length of the original values in the current dimension. + left_index, right_index = _set_wrap_both( + roi, axis, (left_index, right_index)) + + return padded diff --git a/.local/lib/python3.8/site-packages/numpy/lib/arraypad.pyi b/.local/lib/python3.8/site-packages/numpy/lib/arraypad.pyi new file mode 100644 index 0000000..d7c5f48 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/arraypad.pyi @@ -0,0 +1,91 @@ +from typing import ( + Literal as L, + Any, + Dict, + List, + overload, + Tuple, + TypeVar, + Protocol, +) + +from numpy import ndarray, dtype, generic + +from numpy.typing import ( + ArrayLike, + NDArray, + _ArrayLikeInt, + _FiniteNestedSequence, + _SupportsArray, +) + +_SCT = TypeVar("_SCT", bound=generic) + +class _ModeFunc(Protocol): + def __call__( + self, + vector: NDArray[Any], + iaxis_pad_width: Tuple[int, int], + iaxis: int, + kwargs: Dict[str, Any], + /, + ) -> None: ... + +_ModeKind = L[ + "constant", + "edge", + "linear_ramp", + "maximum", + "mean", + "median", + "minimum", + "reflect", + "symmetric", + "wrap", + "empty", +] + +_ArrayLike = _FiniteNestedSequence[_SupportsArray[dtype[_SCT]]] + +__all__: List[str] + +# TODO: In practice each keyword argument is exclusive to one or more +# specific modes. Consider adding more overloads to express this in the future. + +# Expand `**kwargs` into explicit keyword-only arguments +@overload +def pad( + array: _ArrayLike[_SCT], + pad_width: _ArrayLikeInt, + mode: _ModeKind = ..., + *, + stat_length: None | _ArrayLikeInt = ..., + constant_values: ArrayLike = ..., + end_values: ArrayLike = ..., + reflect_type: L["odd", "even"] = ..., +) -> NDArray[_SCT]: ... +@overload +def pad( + array: ArrayLike, + pad_width: _ArrayLikeInt, + mode: _ModeKind = ..., + *, + stat_length: None | _ArrayLikeInt = ..., + constant_values: ArrayLike = ..., + end_values: ArrayLike = ..., + reflect_type: L["odd", "even"] = ..., +) -> NDArray[Any]: ... +@overload +def pad( + array: _ArrayLike[_SCT], + pad_width: _ArrayLikeInt, + mode: _ModeFunc, + **kwargs: Any, +) -> NDArray[_SCT]: ... +@overload +def pad( + array: ArrayLike, + pad_width: _ArrayLikeInt, + mode: _ModeFunc, + **kwargs: Any, +) -> NDArray[Any]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/arraysetops.py b/.local/lib/python3.8/site-packages/numpy/lib/arraysetops.py new file mode 100644 index 0000000..bd56b69 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/arraysetops.py @@ -0,0 +1,826 @@ +""" +Set operations for arrays based on sorting. + +Notes +----- + +For floating point arrays, inaccurate results may appear due to usual round-off +and floating point comparison issues. + +Speed could be gained in some operations by an implementation of +`numpy.sort`, that can provide directly the permutation vectors, thus avoiding +calls to `numpy.argsort`. + +Original author: Robert Cimrman + +""" +import functools + +import numpy as np +from numpy.core import overrides + + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + + +__all__ = [ + 'ediff1d', 'intersect1d', 'setxor1d', 'union1d', 'setdiff1d', 'unique', + 'in1d', 'isin' + ] + + +def _ediff1d_dispatcher(ary, to_end=None, to_begin=None): + return (ary, to_end, to_begin) + + +@array_function_dispatch(_ediff1d_dispatcher) +def ediff1d(ary, to_end=None, to_begin=None): + """ + The differences between consecutive elements of an array. + + Parameters + ---------- + ary : array_like + If necessary, will be flattened before the differences are taken. + to_end : array_like, optional + Number(s) to append at the end of the returned differences. + to_begin : array_like, optional + Number(s) to prepend at the beginning of the returned differences. + + Returns + ------- + ediff1d : ndarray + The differences. Loosely, this is ``ary.flat[1:] - ary.flat[:-1]``. + + See Also + -------- + diff, gradient + + Notes + ----- + When applied to masked arrays, this function drops the mask information + if the `to_begin` and/or `to_end` parameters are used. + + Examples + -------- + >>> x = np.array([1, 2, 4, 7, 0]) + >>> np.ediff1d(x) + array([ 1, 2, 3, -7]) + + >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99])) + array([-99, 1, 2, ..., -7, 88, 99]) + + The returned array is always 1D. + + >>> y = [[1, 2, 4], [1, 6, 24]] + >>> np.ediff1d(y) + array([ 1, 2, -3, 5, 18]) + + """ + # force a 1d array + ary = np.asanyarray(ary).ravel() + + # enforce that the dtype of `ary` is used for the output + dtype_req = ary.dtype + + # fast track default case + if to_begin is None and to_end is None: + return ary[1:] - ary[:-1] + + if to_begin is None: + l_begin = 0 + else: + to_begin = np.asanyarray(to_begin) + if not np.can_cast(to_begin, dtype_req, casting="same_kind"): + raise TypeError("dtype of `to_begin` must be compatible " + "with input `ary` under the `same_kind` rule.") + + to_begin = to_begin.ravel() + l_begin = len(to_begin) + + if to_end is None: + l_end = 0 + else: + to_end = np.asanyarray(to_end) + if not np.can_cast(to_end, dtype_req, casting="same_kind"): + raise TypeError("dtype of `to_end` must be compatible " + "with input `ary` under the `same_kind` rule.") + + to_end = to_end.ravel() + l_end = len(to_end) + + # do the calculation in place and copy to_begin and to_end + l_diff = max(len(ary) - 1, 0) + result = np.empty(l_diff + l_begin + l_end, dtype=ary.dtype) + result = ary.__array_wrap__(result) + if l_begin > 0: + result[:l_begin] = to_begin + if l_end > 0: + result[l_begin + l_diff:] = to_end + np.subtract(ary[1:], ary[:-1], result[l_begin:l_begin + l_diff]) + return result + + +def _unpack_tuple(x): + """ Unpacks one-element tuples for use as return values """ + if len(x) == 1: + return x[0] + else: + return x + + +def _unique_dispatcher(ar, return_index=None, return_inverse=None, + return_counts=None, axis=None): + return (ar,) + + +@array_function_dispatch(_unique_dispatcher) +def unique(ar, return_index=False, return_inverse=False, + return_counts=False, axis=None): + """ + Find the unique elements of an array. + + Returns the sorted unique elements of an array. There are three optional + outputs in addition to the unique elements: + + * the indices of the input array that give the unique values + * the indices of the unique array that reconstruct the input array + * the number of times each unique value comes up in the input array + + Parameters + ---------- + ar : array_like + Input array. Unless `axis` is specified, this will be flattened if it + is not already 1-D. + return_index : bool, optional + If True, also return the indices of `ar` (along the specified axis, + if provided, or in the flattened array) that result in the unique array. + return_inverse : bool, optional + If True, also return the indices of the unique array (for the specified + axis, if provided) that can be used to reconstruct `ar`. + return_counts : bool, optional + If True, also return the number of times each unique item appears + in `ar`. + + .. versionadded:: 1.9.0 + + axis : int or None, optional + The axis to operate on. If None, `ar` will be flattened. If an integer, + the subarrays indexed by the given axis will be flattened and treated + as the elements of a 1-D array with the dimension of the given axis, + see the notes for more details. Object arrays or structured arrays + that contain objects are not supported if the `axis` kwarg is used. The + default is None. + + .. versionadded:: 1.13.0 + + Returns + ------- + unique : ndarray + The sorted unique values. + unique_indices : ndarray, optional + The indices of the first occurrences of the unique values in the + original array. Only provided if `return_index` is True. + unique_inverse : ndarray, optional + The indices to reconstruct the original array from the + unique array. Only provided if `return_inverse` is True. + unique_counts : ndarray, optional + The number of times each of the unique values comes up in the + original array. Only provided if `return_counts` is True. + + .. versionadded:: 1.9.0 + + See Also + -------- + numpy.lib.arraysetops : Module with a number of other functions for + performing set operations on arrays. + repeat : Repeat elements of an array. + + Notes + ----- + When an axis is specified the subarrays indexed by the axis are sorted. + This is done by making the specified axis the first dimension of the array + (move the axis to the first dimension to keep the order of the other axes) + and then flattening the subarrays in C order. The flattened subarrays are + then viewed as a structured type with each element given a label, with the + effect that we end up with a 1-D array of structured types that can be + treated in the same way as any other 1-D array. The result is that the + flattened subarrays are sorted in lexicographic order starting with the + first element. + + .. versionchanged: NumPy 1.21 + If nan values are in the input array, a single nan is put + to the end of the sorted unique values. + + Also for complex arrays all NaN values are considered equivalent + (no matter whether the NaN is in the real or imaginary part). + As the representant for the returned array the smallest one in the + lexicographical order is chosen - see np.sort for how the lexicographical + order is defined for complex arrays. + + Examples + -------- + >>> np.unique([1, 1, 2, 2, 3, 3]) + array([1, 2, 3]) + >>> a = np.array([[1, 1], [2, 3]]) + >>> np.unique(a) + array([1, 2, 3]) + + Return the unique rows of a 2D array + + >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]]) + >>> np.unique(a, axis=0) + array([[1, 0, 0], [2, 3, 4]]) + + Return the indices of the original array that give the unique values: + + >>> a = np.array(['a', 'b', 'b', 'c', 'a']) + >>> u, indices = np.unique(a, return_index=True) + >>> u + array(['a', 'b', 'c'], dtype='>> indices + array([0, 1, 3]) + >>> a[indices] + array(['a', 'b', 'c'], dtype='>> a = np.array([1, 2, 6, 4, 2, 3, 2]) + >>> u, indices = np.unique(a, return_inverse=True) + >>> u + array([1, 2, 3, 4, 6]) + >>> indices + array([0, 1, 4, 3, 1, 2, 1]) + >>> u[indices] + array([1, 2, 6, 4, 2, 3, 2]) + + Reconstruct the input values from the unique values and counts: + + >>> a = np.array([1, 2, 6, 4, 2, 3, 2]) + >>> values, counts = np.unique(a, return_counts=True) + >>> values + array([1, 2, 3, 4, 6]) + >>> counts + array([1, 3, 1, 1, 1]) + >>> np.repeat(values, counts) + array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved + + """ + ar = np.asanyarray(ar) + if axis is None: + ret = _unique1d(ar, return_index, return_inverse, return_counts) + return _unpack_tuple(ret) + + # axis was specified and not None + try: + ar = np.moveaxis(ar, axis, 0) + except np.AxisError: + # this removes the "axis1" or "axis2" prefix from the error message + raise np.AxisError(axis, ar.ndim) from None + + # Must reshape to a contiguous 2D array for this to work... + orig_shape, orig_dtype = ar.shape, ar.dtype + ar = ar.reshape(orig_shape[0], np.prod(orig_shape[1:], dtype=np.intp)) + ar = np.ascontiguousarray(ar) + dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])] + + # At this point, `ar` has shape `(n, m)`, and `dtype` is a structured + # data type with `m` fields where each field has the data type of `ar`. + # In the following, we create the array `consolidated`, which has + # shape `(n,)` with data type `dtype`. + try: + if ar.shape[1] > 0: + consolidated = ar.view(dtype) + else: + # If ar.shape[1] == 0, then dtype will be `np.dtype([])`, which is + # a data type with itemsize 0, and the call `ar.view(dtype)` will + # fail. Instead, we'll use `np.empty` to explicitly create the + # array with shape `(len(ar),)`. Because `dtype` in this case has + # itemsize 0, the total size of the result is still 0 bytes. + consolidated = np.empty(len(ar), dtype=dtype) + except TypeError as e: + # There's no good way to do this for object arrays, etc... + msg = 'The axis argument to unique is not supported for dtype {dt}' + raise TypeError(msg.format(dt=ar.dtype)) from e + + def reshape_uniq(uniq): + n = len(uniq) + uniq = uniq.view(orig_dtype) + uniq = uniq.reshape(n, *orig_shape[1:]) + uniq = np.moveaxis(uniq, 0, axis) + return uniq + + output = _unique1d(consolidated, return_index, + return_inverse, return_counts) + output = (reshape_uniq(output[0]),) + output[1:] + return _unpack_tuple(output) + + +def _unique1d(ar, return_index=False, return_inverse=False, + return_counts=False): + """ + Find the unique elements of an array, ignoring shape. + """ + ar = np.asanyarray(ar).flatten() + + optional_indices = return_index or return_inverse + + if optional_indices: + perm = ar.argsort(kind='mergesort' if return_index else 'quicksort') + aux = ar[perm] + else: + ar.sort() + aux = ar + mask = np.empty(aux.shape, dtype=np.bool_) + mask[:1] = True + if aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and np.isnan(aux[-1]): + if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent + aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left') + else: + aux_firstnan = np.searchsorted(aux, aux[-1], side='left') + if aux_firstnan > 0: + mask[1:aux_firstnan] = ( + aux[1:aux_firstnan] != aux[:aux_firstnan - 1]) + mask[aux_firstnan] = True + mask[aux_firstnan + 1:] = False + else: + mask[1:] = aux[1:] != aux[:-1] + + ret = (aux[mask],) + if return_index: + ret += (perm[mask],) + if return_inverse: + imask = np.cumsum(mask) - 1 + inv_idx = np.empty(mask.shape, dtype=np.intp) + inv_idx[perm] = imask + ret += (inv_idx,) + if return_counts: + idx = np.concatenate(np.nonzero(mask) + ([mask.size],)) + ret += (np.diff(idx),) + return ret + + +def _intersect1d_dispatcher( + ar1, ar2, assume_unique=None, return_indices=None): + return (ar1, ar2) + + +@array_function_dispatch(_intersect1d_dispatcher) +def intersect1d(ar1, ar2, assume_unique=False, return_indices=False): + """ + Find the intersection of two arrays. + + Return the sorted, unique values that are in both of the input arrays. + + Parameters + ---------- + ar1, ar2 : array_like + Input arrays. Will be flattened if not already 1D. + assume_unique : bool + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. If True but ``ar1`` or ``ar2`` are not + unique, incorrect results and out-of-bounds indices could result. + Default is False. + return_indices : bool + If True, the indices which correspond to the intersection of the two + arrays are returned. The first instance of a value is used if there are + multiple. Default is False. + + .. versionadded:: 1.15.0 + + Returns + ------- + intersect1d : ndarray + Sorted 1D array of common and unique elements. + comm1 : ndarray + The indices of the first occurrences of the common values in `ar1`. + Only provided if `return_indices` is True. + comm2 : ndarray + The indices of the first occurrences of the common values in `ar2`. + Only provided if `return_indices` is True. + + + See Also + -------- + numpy.lib.arraysetops : Module with a number of other functions for + performing set operations on arrays. + + Examples + -------- + >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1]) + array([1, 3]) + + To intersect more than two arrays, use functools.reduce: + + >>> from functools import reduce + >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) + array([3]) + + To return the indices of the values common to the input arrays + along with the intersected values: + + >>> x = np.array([1, 1, 2, 3, 4]) + >>> y = np.array([2, 1, 4, 6]) + >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True) + >>> x_ind, y_ind + (array([0, 2, 4]), array([1, 0, 2])) + >>> xy, x[x_ind], y[y_ind] + (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4])) + + """ + ar1 = np.asanyarray(ar1) + ar2 = np.asanyarray(ar2) + + if not assume_unique: + if return_indices: + ar1, ind1 = unique(ar1, return_index=True) + ar2, ind2 = unique(ar2, return_index=True) + else: + ar1 = unique(ar1) + ar2 = unique(ar2) + else: + ar1 = ar1.ravel() + ar2 = ar2.ravel() + + aux = np.concatenate((ar1, ar2)) + if return_indices: + aux_sort_indices = np.argsort(aux, kind='mergesort') + aux = aux[aux_sort_indices] + else: + aux.sort() + + mask = aux[1:] == aux[:-1] + int1d = aux[:-1][mask] + + if return_indices: + ar1_indices = aux_sort_indices[:-1][mask] + ar2_indices = aux_sort_indices[1:][mask] - ar1.size + if not assume_unique: + ar1_indices = ind1[ar1_indices] + ar2_indices = ind2[ar2_indices] + + return int1d, ar1_indices, ar2_indices + else: + return int1d + + +def _setxor1d_dispatcher(ar1, ar2, assume_unique=None): + return (ar1, ar2) + + +@array_function_dispatch(_setxor1d_dispatcher) +def setxor1d(ar1, ar2, assume_unique=False): + """ + Find the set exclusive-or of two arrays. + + Return the sorted, unique values that are in only one (not both) of the + input arrays. + + Parameters + ---------- + ar1, ar2 : array_like + Input arrays. + assume_unique : bool + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. Default is False. + + Returns + ------- + setxor1d : ndarray + Sorted 1D array of unique values that are in only one of the input + arrays. + + Examples + -------- + >>> a = np.array([1, 2, 3, 2, 4]) + >>> b = np.array([2, 3, 5, 7, 5]) + >>> np.setxor1d(a,b) + array([1, 4, 5, 7]) + + """ + if not assume_unique: + ar1 = unique(ar1) + ar2 = unique(ar2) + + aux = np.concatenate((ar1, ar2)) + if aux.size == 0: + return aux + + aux.sort() + flag = np.concatenate(([True], aux[1:] != aux[:-1], [True])) + return aux[flag[1:] & flag[:-1]] + + +def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None): + return (ar1, ar2) + + +@array_function_dispatch(_in1d_dispatcher) +def in1d(ar1, ar2, assume_unique=False, invert=False): + """ + Test whether each element of a 1-D array is also present in a second array. + + Returns a boolean array the same length as `ar1` that is True + where an element of `ar1` is in `ar2` and False otherwise. + + We recommend using :func:`isin` instead of `in1d` for new code. + + Parameters + ---------- + ar1 : (M,) array_like + Input array. + ar2 : array_like + The values against which to test each value of `ar1`. + assume_unique : bool, optional + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. Default is False. + invert : bool, optional + If True, the values in the returned array are inverted (that is, + False where an element of `ar1` is in `ar2` and True otherwise). + Default is False. ``np.in1d(a, b, invert=True)`` is equivalent + to (but is faster than) ``np.invert(in1d(a, b))``. + + .. versionadded:: 1.8.0 + + Returns + ------- + in1d : (M,) ndarray, bool + The values `ar1[in1d]` are in `ar2`. + + See Also + -------- + isin : Version of this function that preserves the + shape of ar1. + numpy.lib.arraysetops : Module with a number of other functions for + performing set operations on arrays. + + Notes + ----- + `in1d` can be considered as an element-wise function version of the + python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly + equivalent to ``np.array([item in b for item in a])``. + However, this idea fails if `ar2` is a set, or similar (non-sequence) + container: As ``ar2`` is converted to an array, in those cases + ``asarray(ar2)`` is an object array rather than the expected array of + contained values. + + .. versionadded:: 1.4.0 + + Examples + -------- + >>> test = np.array([0, 1, 2, 5, 0]) + >>> states = [0, 2] + >>> mask = np.in1d(test, states) + >>> mask + array([ True, False, True, False, True]) + >>> test[mask] + array([0, 2, 0]) + >>> mask = np.in1d(test, states, invert=True) + >>> mask + array([False, True, False, True, False]) + >>> test[mask] + array([1, 5]) + """ + # Ravel both arrays, behavior for the first array could be different + ar1 = np.asarray(ar1).ravel() + ar2 = np.asarray(ar2).ravel() + + # Ensure that iteration through object arrays yields size-1 arrays + if ar2.dtype == object: + ar2 = ar2.reshape(-1, 1) + + # Check if one of the arrays may contain arbitrary objects + contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject + + # This code is run when + # a) the first condition is true, making the code significantly faster + # b) the second condition is true (i.e. `ar1` or `ar2` may contain + # arbitrary objects), since then sorting is not guaranteed to work + if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object: + if invert: + mask = np.ones(len(ar1), dtype=bool) + for a in ar2: + mask &= (ar1 != a) + else: + mask = np.zeros(len(ar1), dtype=bool) + for a in ar2: + mask |= (ar1 == a) + return mask + + # Otherwise use sorting + if not assume_unique: + ar1, rev_idx = np.unique(ar1, return_inverse=True) + ar2 = np.unique(ar2) + + ar = np.concatenate((ar1, ar2)) + # We need this to be a stable sort, so always use 'mergesort' + # here. The values from the first array should always come before + # the values from the second array. + order = ar.argsort(kind='mergesort') + sar = ar[order] + if invert: + bool_ar = (sar[1:] != sar[:-1]) + else: + bool_ar = (sar[1:] == sar[:-1]) + flag = np.concatenate((bool_ar, [invert])) + ret = np.empty(ar.shape, dtype=bool) + ret[order] = flag + + if assume_unique: + return ret[:len(ar1)] + else: + return ret[rev_idx] + + +def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None): + return (element, test_elements) + + +@array_function_dispatch(_isin_dispatcher) +def isin(element, test_elements, assume_unique=False, invert=False): + """ + Calculates `element in test_elements`, broadcasting over `element` only. + Returns a boolean array of the same shape as `element` that is True + where an element of `element` is in `test_elements` and False otherwise. + + Parameters + ---------- + element : array_like + Input array. + test_elements : array_like + The values against which to test each value of `element`. + This argument is flattened if it is an array or array_like. + See notes for behavior with non-array-like parameters. + assume_unique : bool, optional + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. Default is False. + invert : bool, optional + If True, the values in the returned array are inverted, as if + calculating `element not in test_elements`. Default is False. + ``np.isin(a, b, invert=True)`` is equivalent to (but faster + than) ``np.invert(np.isin(a, b))``. + + Returns + ------- + isin : ndarray, bool + Has the same shape as `element`. The values `element[isin]` + are in `test_elements`. + + See Also + -------- + in1d : Flattened version of this function. + numpy.lib.arraysetops : Module with a number of other functions for + performing set operations on arrays. + + Notes + ----- + + `isin` is an element-wise function version of the python keyword `in`. + ``isin(a, b)`` is roughly equivalent to + ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences. + + `element` and `test_elements` are converted to arrays if they are not + already. If `test_elements` is a set (or other non-sequence collection) + it will be converted to an object array with one element, rather than an + array of the values contained in `test_elements`. This is a consequence + of the `array` constructor's way of handling non-sequence collections. + Converting the set to a list usually gives the desired behavior. + + .. versionadded:: 1.13.0 + + Examples + -------- + >>> element = 2*np.arange(4).reshape((2, 2)) + >>> element + array([[0, 2], + [4, 6]]) + >>> test_elements = [1, 2, 4, 8] + >>> mask = np.isin(element, test_elements) + >>> mask + array([[False, True], + [ True, False]]) + >>> element[mask] + array([2, 4]) + + The indices of the matched values can be obtained with `nonzero`: + + >>> np.nonzero(mask) + (array([0, 1]), array([1, 0])) + + The test can also be inverted: + + >>> mask = np.isin(element, test_elements, invert=True) + >>> mask + array([[ True, False], + [False, True]]) + >>> element[mask] + array([0, 6]) + + Because of how `array` handles sets, the following does not + work as expected: + + >>> test_set = {1, 2, 4, 8} + >>> np.isin(element, test_set) + array([[False, False], + [False, False]]) + + Casting the set to a list gives the expected result: + + >>> np.isin(element, list(test_set)) + array([[False, True], + [ True, False]]) + """ + element = np.asarray(element) + return in1d(element, test_elements, assume_unique=assume_unique, + invert=invert).reshape(element.shape) + + +def _union1d_dispatcher(ar1, ar2): + return (ar1, ar2) + + +@array_function_dispatch(_union1d_dispatcher) +def union1d(ar1, ar2): + """ + Find the union of two arrays. + + Return the unique, sorted array of values that are in either of the two + input arrays. + + Parameters + ---------- + ar1, ar2 : array_like + Input arrays. They are flattened if they are not already 1D. + + Returns + ------- + union1d : ndarray + Unique, sorted union of the input arrays. + + See Also + -------- + numpy.lib.arraysetops : Module with a number of other functions for + performing set operations on arrays. + + Examples + -------- + >>> np.union1d([-1, 0, 1], [-2, 0, 2]) + array([-2, -1, 0, 1, 2]) + + To find the union of more than two arrays, use functools.reduce: + + >>> from functools import reduce + >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) + array([1, 2, 3, 4, 6]) + """ + return unique(np.concatenate((ar1, ar2), axis=None)) + + +def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None): + return (ar1, ar2) + + +@array_function_dispatch(_setdiff1d_dispatcher) +def setdiff1d(ar1, ar2, assume_unique=False): + """ + Find the set difference of two arrays. + + Return the unique values in `ar1` that are not in `ar2`. + + Parameters + ---------- + ar1 : array_like + Input array. + ar2 : array_like + Input comparison array. + assume_unique : bool + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. Default is False. + + Returns + ------- + setdiff1d : ndarray + 1D array of values in `ar1` that are not in `ar2`. The result + is sorted when `assume_unique=False`, but otherwise only sorted + if the input is sorted. + + See Also + -------- + numpy.lib.arraysetops : Module with a number of other functions for + performing set operations on arrays. + + Examples + -------- + >>> a = np.array([1, 2, 3, 2, 4, 1]) + >>> b = np.array([3, 4, 5, 6]) + >>> np.setdiff1d(a, b) + array([1, 2]) + + """ + if assume_unique: + ar1 = np.asarray(ar1).ravel() + else: + ar1 = unique(ar1) + ar2 = unique(ar2) + return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)] diff --git a/.local/lib/python3.8/site-packages/numpy/lib/arraysetops.pyi b/.local/lib/python3.8/site-packages/numpy/lib/arraysetops.pyi new file mode 100644 index 0000000..6f13ec7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/arraysetops.pyi @@ -0,0 +1,335 @@ +from typing import ( + Literal as L, + Any, + List, + Union, + TypeVar, + Tuple, + overload, + SupportsIndex, +) + +from numpy import ( + dtype, + generic, + number, + bool_, + ushort, + ubyte, + uintc, + uint, + ulonglong, + short, + int8, + byte, + intc, + int_, + intp, + longlong, + half, + single, + double, + longdouble, + csingle, + cdouble, + clongdouble, + timedelta64, + datetime64, + object_, + str_, + bytes_, + void, +) + +from numpy.typing import ( + ArrayLike, + NDArray, + _FiniteNestedSequence, + _SupportsArray, + _ArrayLikeBool_co, + _ArrayLikeDT64_co, + _ArrayLikeTD64_co, + _ArrayLikeObject_co, + _ArrayLikeNumber_co, +) + +_SCT = TypeVar("_SCT", bound=generic) +_NumberType = TypeVar("_NumberType", bound=number[Any]) + +# Explicitly set all allowed values to prevent accidental castings to +# abstract dtypes (their common super-type). +# +# Only relevant if two or more arguments are parametrized, (e.g. `setdiff1d`) +# which could result in, for example, `int64` and `float64`producing a +# `number[_64Bit]` array +_SCTNoCast = TypeVar( + "_SCTNoCast", + bool_, + ushort, + ubyte, + uintc, + uint, + ulonglong, + short, + byte, + intc, + int_, + longlong, + half, + single, + double, + longdouble, + csingle, + cdouble, + clongdouble, + timedelta64, + datetime64, + object_, + str_, + bytes_, + void, +) + +_ArrayLike = _FiniteNestedSequence[_SupportsArray[dtype[_SCT]]] + +__all__: List[str] + +@overload +def ediff1d( + ary: _ArrayLikeBool_co, + to_end: None | ArrayLike = ..., + to_begin: None | ArrayLike = ..., +) -> NDArray[int8]: ... +@overload +def ediff1d( + ary: _ArrayLike[_NumberType], + to_end: None | ArrayLike = ..., + to_begin: None | ArrayLike = ..., +) -> NDArray[_NumberType]: ... +@overload +def ediff1d( + ary: _ArrayLikeNumber_co, + to_end: None | ArrayLike = ..., + to_begin: None | ArrayLike = ..., +) -> NDArray[Any]: ... +@overload +def ediff1d( + ary: _ArrayLikeDT64_co | _ArrayLikeTD64_co, + to_end: None | ArrayLike = ..., + to_begin: None | ArrayLike = ..., +) -> NDArray[timedelta64]: ... +@overload +def ediff1d( + ary: _ArrayLikeObject_co, + to_end: None | ArrayLike = ..., + to_begin: None | ArrayLike = ..., +) -> NDArray[object_]: ... + +@overload +def unique( + ar: _ArrayLike[_SCT], + return_index: L[False] = ..., + return_inverse: L[False] = ..., + return_counts: L[False] = ..., + axis: None | SupportsIndex = ..., +) -> NDArray[_SCT]: ... +@overload +def unique( + ar: ArrayLike, + return_index: L[False] = ..., + return_inverse: L[False] = ..., + return_counts: L[False] = ..., + axis: None | SupportsIndex = ..., +) -> NDArray[Any]: ... +@overload +def unique( + ar: _ArrayLike[_SCT], + return_index: L[True] = ..., + return_inverse: L[False] = ..., + return_counts: L[False] = ..., + axis: None | SupportsIndex = ..., +) -> Tuple[NDArray[_SCT], NDArray[intp]]: ... +@overload +def unique( + ar: ArrayLike, + return_index: L[True] = ..., + return_inverse: L[False] = ..., + return_counts: L[False] = ..., + axis: None | SupportsIndex = ..., +) -> Tuple[NDArray[Any], NDArray[intp]]: ... +@overload +def unique( + ar: _ArrayLike[_SCT], + return_index: L[False] = ..., + return_inverse: L[True] = ..., + return_counts: L[False] = ..., + axis: None | SupportsIndex = ..., +) -> Tuple[NDArray[_SCT], NDArray[intp]]: ... +@overload +def unique( + ar: ArrayLike, + return_index: L[False] = ..., + return_inverse: L[True] = ..., + return_counts: L[False] = ..., + axis: None | SupportsIndex = ..., +) -> Tuple[NDArray[Any], NDArray[intp]]: ... +@overload +def unique( + ar: _ArrayLike[_SCT], + return_index: L[False] = ..., + return_inverse: L[False] = ..., + return_counts: L[True] = ..., + axis: None | SupportsIndex = ..., +) -> Tuple[NDArray[_SCT], NDArray[intp]]: ... +@overload +def unique( + ar: ArrayLike, + return_index: L[False] = ..., + return_inverse: L[False] = ..., + return_counts: L[True] = ..., + axis: None | SupportsIndex = ..., +) -> Tuple[NDArray[Any], NDArray[intp]]: ... +@overload +def unique( + ar: _ArrayLike[_SCT], + return_index: L[True] = ..., + return_inverse: L[True] = ..., + return_counts: L[False] = ..., + axis: None | SupportsIndex = ..., +) -> Tuple[NDArray[_SCT], NDArray[intp], NDArray[intp]]: ... +@overload +def unique( + ar: ArrayLike, + return_index: L[True] = ..., + return_inverse: L[True] = ..., + return_counts: L[False] = ..., + axis: None | SupportsIndex = ..., +) -> Tuple[NDArray[Any], NDArray[intp], NDArray[intp]]: ... +@overload +def unique( + ar: _ArrayLike[_SCT], + return_index: L[True] = ..., + return_inverse: L[False] = ..., + return_counts: L[True] = ..., + axis: None | SupportsIndex = ..., +) -> Tuple[NDArray[_SCT], NDArray[intp], NDArray[intp]]: ... +@overload +def unique( + ar: ArrayLike, + return_index: L[True] = ..., + return_inverse: L[False] = ..., + return_counts: L[True] = ..., + axis: None | SupportsIndex = ..., +) -> Tuple[NDArray[Any], NDArray[intp], NDArray[intp]]: ... +@overload +def unique( + ar: _ArrayLike[_SCT], + return_index: L[False] = ..., + return_inverse: L[True] = ..., + return_counts: L[True] = ..., + axis: None | SupportsIndex = ..., +) -> Tuple[NDArray[_SCT], NDArray[intp], NDArray[intp]]: ... +@overload +def unique( + ar: ArrayLike, + return_index: L[False] = ..., + return_inverse: L[True] = ..., + return_counts: L[True] = ..., + axis: None | SupportsIndex = ..., +) -> Tuple[NDArray[Any], NDArray[intp], NDArray[intp]]: ... +@overload +def unique( + ar: _ArrayLike[_SCT], + return_index: L[True] = ..., + return_inverse: L[True] = ..., + return_counts: L[True] = ..., + axis: None | SupportsIndex = ..., +) -> Tuple[NDArray[_SCT], NDArray[intp], NDArray[intp], NDArray[intp]]: ... +@overload +def unique( + ar: ArrayLike, + return_index: L[True] = ..., + return_inverse: L[True] = ..., + return_counts: L[True] = ..., + axis: None | SupportsIndex = ..., +) -> Tuple[NDArray[Any], NDArray[intp], NDArray[intp], NDArray[intp]]: ... + +@overload +def intersect1d( + ar1: _ArrayLike[_SCTNoCast], + ar2: _ArrayLike[_SCTNoCast], + assume_unique: bool = ..., + return_indices: L[False] = ..., +) -> NDArray[_SCTNoCast]: ... +@overload +def intersect1d( + ar1: ArrayLike, + ar2: ArrayLike, + assume_unique: bool = ..., + return_indices: L[False] = ..., +) -> NDArray[Any]: ... +@overload +def intersect1d( + ar1: _ArrayLike[_SCTNoCast], + ar2: _ArrayLike[_SCTNoCast], + assume_unique: bool = ..., + return_indices: L[True] = ..., +) -> Tuple[NDArray[_SCTNoCast], NDArray[intp], NDArray[intp]]: ... +@overload +def intersect1d( + ar1: ArrayLike, + ar2: ArrayLike, + assume_unique: bool = ..., + return_indices: L[True] = ..., +) -> Tuple[NDArray[Any], NDArray[intp], NDArray[intp]]: ... + +@overload +def setxor1d( + ar1: _ArrayLike[_SCTNoCast], + ar2: _ArrayLike[_SCTNoCast], + assume_unique: bool = ..., +) -> NDArray[_SCTNoCast]: ... +@overload +def setxor1d( + ar1: ArrayLike, + ar2: ArrayLike, + assume_unique: bool = ..., +) -> NDArray[Any]: ... + +def in1d( + ar1: ArrayLike, + ar2: ArrayLike, + assume_unique: bool = ..., + invert: bool = ..., +) -> NDArray[bool_]: ... + +def isin( + element: ArrayLike, + test_elements: ArrayLike, + assume_unique: bool = ..., + invert: bool = ..., +) -> NDArray[bool_]: ... + +@overload +def union1d( + ar1: _ArrayLike[_SCTNoCast], + ar2: _ArrayLike[_SCTNoCast], +) -> NDArray[_SCTNoCast]: ... +@overload +def union1d( + ar1: ArrayLike, + ar2: ArrayLike, +) -> NDArray[Any]: ... + +@overload +def setdiff1d( + ar1: _ArrayLike[_SCTNoCast], + ar2: _ArrayLike[_SCTNoCast], + assume_unique: bool = ..., +) -> NDArray[_SCTNoCast]: ... +@overload +def setdiff1d( + ar1: ArrayLike, + ar2: ArrayLike, + assume_unique: bool = ..., +) -> NDArray[Any]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/arrayterator.py b/.local/lib/python3.8/site-packages/numpy/lib/arrayterator.py new file mode 100644 index 0000000..b9ea21f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/arrayterator.py @@ -0,0 +1,219 @@ +""" +A buffered iterator for big arrays. + +This module solves the problem of iterating over a big file-based array +without having to read it into memory. The `Arrayterator` class wraps +an array object, and when iterated it will return sub-arrays with at most +a user-specified number of elements. + +""" +from operator import mul +from functools import reduce + +__all__ = ['Arrayterator'] + + +class Arrayterator: + """ + Buffered iterator for big arrays. + + `Arrayterator` creates a buffered iterator for reading big arrays in small + contiguous blocks. The class is useful for objects stored in the + file system. It allows iteration over the object *without* reading + everything in memory; instead, small blocks are read and iterated over. + + `Arrayterator` can be used with any object that supports multidimensional + slices. This includes NumPy arrays, but also variables from + Scientific.IO.NetCDF or pynetcdf for example. + + Parameters + ---------- + var : array_like + The object to iterate over. + buf_size : int, optional + The buffer size. If `buf_size` is supplied, the maximum amount of + data that will be read into memory is `buf_size` elements. + Default is None, which will read as many element as possible + into memory. + + Attributes + ---------- + var + buf_size + start + stop + step + shape + flat + + See Also + -------- + ndenumerate : Multidimensional array iterator. + flatiter : Flat array iterator. + memmap : Create a memory-map to an array stored in a binary file on disk. + + Notes + ----- + The algorithm works by first finding a "running dimension", along which + the blocks will be extracted. Given an array of dimensions + ``(d1, d2, ..., dn)``, e.g. if `buf_size` is smaller than ``d1``, the + first dimension will be used. If, on the other hand, + ``d1 < buf_size < d1*d2`` the second dimension will be used, and so on. + Blocks are extracted along this dimension, and when the last block is + returned the process continues from the next dimension, until all + elements have been read. + + Examples + -------- + >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6) + >>> a_itor = np.lib.Arrayterator(a, 2) + >>> a_itor.shape + (3, 4, 5, 6) + + Now we can iterate over ``a_itor``, and it will return arrays of size + two. Since `buf_size` was smaller than any dimension, the first + dimension will be iterated over first: + + >>> for subarr in a_itor: + ... if not subarr.all(): + ... print(subarr, subarr.shape) # doctest: +SKIP + >>> # [[[[0 1]]]] (1, 1, 1, 2) + + """ + + def __init__(self, var, buf_size=None): + self.var = var + self.buf_size = buf_size + + self.start = [0 for dim in var.shape] + self.stop = [dim for dim in var.shape] + self.step = [1 for dim in var.shape] + + def __getattr__(self, attr): + return getattr(self.var, attr) + + def __getitem__(self, index): + """ + Return a new arrayterator. + + """ + # Fix index, handling ellipsis and incomplete slices. + if not isinstance(index, tuple): + index = (index,) + fixed = [] + length, dims = len(index), self.ndim + for slice_ in index: + if slice_ is Ellipsis: + fixed.extend([slice(None)] * (dims-length+1)) + length = len(fixed) + elif isinstance(slice_, int): + fixed.append(slice(slice_, slice_+1, 1)) + else: + fixed.append(slice_) + index = tuple(fixed) + if len(index) < dims: + index += (slice(None),) * (dims-len(index)) + + # Return a new arrayterator object. + out = self.__class__(self.var, self.buf_size) + for i, (start, stop, step, slice_) in enumerate( + zip(self.start, self.stop, self.step, index)): + out.start[i] = start + (slice_.start or 0) + out.step[i] = step * (slice_.step or 1) + out.stop[i] = start + (slice_.stop or stop-start) + out.stop[i] = min(stop, out.stop[i]) + return out + + def __array__(self): + """ + Return corresponding data. + + """ + slice_ = tuple(slice(*t) for t in zip( + self.start, self.stop, self.step)) + return self.var[slice_] + + @property + def flat(self): + """ + A 1-D flat iterator for Arrayterator objects. + + This iterator returns elements of the array to be iterated over in + `Arrayterator` one by one. It is similar to `flatiter`. + + See Also + -------- + Arrayterator + flatiter + + Examples + -------- + >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6) + >>> a_itor = np.lib.Arrayterator(a, 2) + + >>> for subarr in a_itor.flat: + ... if not subarr: + ... print(subarr, type(subarr)) + ... + 0 + + """ + for block in self: + yield from block.flat + + @property + def shape(self): + """ + The shape of the array to be iterated over. + + For an example, see `Arrayterator`. + + """ + return tuple(((stop-start-1)//step+1) for start, stop, step in + zip(self.start, self.stop, self.step)) + + def __iter__(self): + # Skip arrays with degenerate dimensions + if [dim for dim in self.shape if dim <= 0]: + return + + start = self.start[:] + stop = self.stop[:] + step = self.step[:] + ndims = self.var.ndim + + while True: + count = self.buf_size or reduce(mul, self.shape) + + # iterate over each dimension, looking for the + # running dimension (ie, the dimension along which + # the blocks will be built from) + rundim = 0 + for i in range(ndims-1, -1, -1): + # if count is zero we ran out of elements to read + # along higher dimensions, so we read only a single position + if count == 0: + stop[i] = start[i]+1 + elif count <= self.shape[i]: + # limit along this dimension + stop[i] = start[i] + count*step[i] + rundim = i + else: + # read everything along this dimension + stop[i] = self.stop[i] + stop[i] = min(self.stop[i], stop[i]) + count = count//self.shape[i] + + # yield a block + slice_ = tuple(slice(*t) for t in zip(start, stop, step)) + yield self.var[slice_] + + # Update start position, taking care of overflow to + # other dimensions + start[rundim] = stop[rundim] # start where we stopped + for i in range(ndims-1, 0, -1): + if start[i] >= self.stop[i]: + start[i] = self.start[i] + start[i-1] += self.step[i-1] + if start[0] >= self.stop[0]: + return diff --git a/.local/lib/python3.8/site-packages/numpy/lib/arrayterator.pyi b/.local/lib/python3.8/site-packages/numpy/lib/arrayterator.pyi new file mode 100644 index 0000000..82c6692 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/arrayterator.pyi @@ -0,0 +1,52 @@ +from typing import ( + List, + Any, + TypeVar, + Generator, + List, + Union, + Tuple, + overload, +) + +from numpy import ndarray, dtype, generic +from numpy.typing import DTypeLike + +# TODO: Set a shape bound once we've got proper shape support +_Shape = TypeVar("_Shape", bound=Any) +_DType = TypeVar("_DType", bound=dtype[Any]) +_ScalarType = TypeVar("_ScalarType", bound=generic) + +_Index = Union[ + Union[ellipsis, int, slice], + Tuple[Union[ellipsis, int, slice], ...], +] + +__all__: List[str] + +# NOTE: In reality `Arrayterator` does not actually inherit from `ndarray`, +# but its ``__getattr__` method does wrap around the former and thus has +# access to all its methods + +class Arrayterator(ndarray[_Shape, _DType]): + var: ndarray[_Shape, _DType] # type: ignore[assignment] + buf_size: None | int + start: List[int] + stop: List[int] + step: List[int] + + @property # type: ignore[misc] + def shape(self) -> Tuple[int, ...]: ... + @property + def flat( # type: ignore[override] + self: ndarray[Any, dtype[_ScalarType]] + ) -> Generator[_ScalarType, None, None]: ... + def __init__( + self, var: ndarray[_Shape, _DType], buf_size: None | int = ... + ) -> None: ... + @overload + def __array__(self, dtype: None = ...) -> ndarray[Any, _DType]: ... + @overload + def __array__(self, dtype: DTypeLike) -> ndarray[Any, dtype[Any]]: ... + def __getitem__(self, index: _Index) -> Arrayterator[Any, _DType]: ... + def __iter__(self) -> Generator[ndarray[Any, _DType], None, None]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/format.py b/.local/lib/python3.8/site-packages/numpy/lib/format.py new file mode 100644 index 0000000..3967b43 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/format.py @@ -0,0 +1,917 @@ +""" +Binary serialization + +NPY format +========== + +A simple format for saving numpy arrays to disk with the full +information about them. + +The ``.npy`` format is the standard binary file format in NumPy for +persisting a *single* arbitrary NumPy array on disk. The format stores all +of the shape and dtype information necessary to reconstruct the array +correctly even on another machine with a different architecture. +The format is designed to be as simple as possible while achieving +its limited goals. + +The ``.npz`` format is the standard format for persisting *multiple* NumPy +arrays on disk. A ``.npz`` file is a zip file containing multiple ``.npy`` +files, one for each array. + +Capabilities +------------ + +- Can represent all NumPy arrays including nested record arrays and + object arrays. + +- Represents the data in its native binary form. + +- Supports Fortran-contiguous arrays directly. + +- Stores all of the necessary information to reconstruct the array + including shape and dtype on a machine of a different + architecture. Both little-endian and big-endian arrays are + supported, and a file with little-endian numbers will yield + a little-endian array on any machine reading the file. The + types are described in terms of their actual sizes. For example, + if a machine with a 64-bit C "long int" writes out an array with + "long ints", a reading machine with 32-bit C "long ints" will yield + an array with 64-bit integers. + +- Is straightforward to reverse engineer. Datasets often live longer than + the programs that created them. A competent developer should be + able to create a solution in their preferred programming language to + read most ``.npy`` files that they have been given without much + documentation. + +- Allows memory-mapping of the data. See `open_memmap`. + +- Can be read from a filelike stream object instead of an actual file. + +- Stores object arrays, i.e. arrays containing elements that are arbitrary + Python objects. Files with object arrays are not to be mmapable, but + can be read and written to disk. + +Limitations +----------- + +- Arbitrary subclasses of numpy.ndarray are not completely preserved. + Subclasses will be accepted for writing, but only the array data will + be written out. A regular numpy.ndarray object will be created + upon reading the file. + +.. warning:: + + Due to limitations in the interpretation of structured dtypes, dtypes + with fields with empty names will have the names replaced by 'f0', 'f1', + etc. Such arrays will not round-trip through the format entirely + accurately. The data is intact; only the field names will differ. We are + working on a fix for this. This fix will not require a change in the + file format. The arrays with such structures can still be saved and + restored, and the correct dtype may be restored by using the + ``loadedarray.view(correct_dtype)`` method. + +File extensions +--------------- + +We recommend using the ``.npy`` and ``.npz`` extensions for files saved +in this format. This is by no means a requirement; applications may wish +to use these file formats but use an extension specific to the +application. In the absence of an obvious alternative, however, +we suggest using ``.npy`` and ``.npz``. + +Version numbering +----------------- + +The version numbering of these formats is independent of NumPy version +numbering. If the format is upgraded, the code in `numpy.io` will still +be able to read and write Version 1.0 files. + +Format Version 1.0 +------------------ + +The first 6 bytes are a magic string: exactly ``\\x93NUMPY``. + +The next 1 byte is an unsigned byte: the major version number of the file +format, e.g. ``\\x01``. + +The next 1 byte is an unsigned byte: the minor version number of the file +format, e.g. ``\\x00``. Note: the version of the file format is not tied +to the version of the numpy package. + +The next 2 bytes form a little-endian unsigned short int: the length of +the header data HEADER_LEN. + +The next HEADER_LEN bytes form the header data describing the array's +format. It is an ASCII string which contains a Python literal expression +of a dictionary. It is terminated by a newline (``\\n``) and padded with +spaces (``\\x20``) to make the total of +``len(magic string) + 2 + len(length) + HEADER_LEN`` be evenly divisible +by 64 for alignment purposes. + +The dictionary contains three keys: + + "descr" : dtype.descr + An object that can be passed as an argument to the `numpy.dtype` + constructor to create the array's dtype. + "fortran_order" : bool + Whether the array data is Fortran-contiguous or not. Since + Fortran-contiguous arrays are a common form of non-C-contiguity, + we allow them to be written directly to disk for efficiency. + "shape" : tuple of int + The shape of the array. + +For repeatability and readability, the dictionary keys are sorted in +alphabetic order. This is for convenience only. A writer SHOULD implement +this if possible. A reader MUST NOT depend on this. + +Following the header comes the array data. If the dtype contains Python +objects (i.e. ``dtype.hasobject is True``), then the data is a Python +pickle of the array. Otherwise the data is the contiguous (either C- +or Fortran-, depending on ``fortran_order``) bytes of the array. +Consumers can figure out the number of bytes by multiplying the number +of elements given by the shape (noting that ``shape=()`` means there is +1 element) by ``dtype.itemsize``. + +Format Version 2.0 +------------------ + +The version 1.0 format only allowed the array header to have a total size of +65535 bytes. This can be exceeded by structured arrays with a large number of +columns. The version 2.0 format extends the header size to 4 GiB. +`numpy.save` will automatically save in 2.0 format if the data requires it, +else it will always use the more compatible 1.0 format. + +The description of the fourth element of the header therefore has become: +"The next 4 bytes form a little-endian unsigned int: the length of the header +data HEADER_LEN." + +Format Version 3.0 +------------------ + +This version replaces the ASCII string (which in practice was latin1) with +a utf8-encoded string, so supports structured types with any unicode field +names. + +Notes +----- +The ``.npy`` format, including motivation for creating it and a comparison of +alternatives, is described in the +:doc:`"npy-format" NEP `, however details have +evolved with time and this document is more current. + +""" +import numpy +import warnings +from numpy.lib.utils import safe_eval +from numpy.compat import ( + isfileobj, os_fspath, pickle + ) + + +__all__ = [] + + +EXPECTED_KEYS = {'descr', 'fortran_order', 'shape'} +MAGIC_PREFIX = b'\x93NUMPY' +MAGIC_LEN = len(MAGIC_PREFIX) + 2 +ARRAY_ALIGN = 64 # plausible values are powers of 2 between 16 and 4096 +BUFFER_SIZE = 2**18 # size of buffer for reading npz files in bytes + +# difference between version 1.0 and 2.0 is a 4 byte (I) header length +# instead of 2 bytes (H) allowing storage of large structured arrays +_header_size_info = { + (1, 0): (' 255: + raise ValueError("major version must be 0 <= major < 256") + if minor < 0 or minor > 255: + raise ValueError("minor version must be 0 <= minor < 256") + return MAGIC_PREFIX + bytes([major, minor]) + +def read_magic(fp): + """ Read the magic string to get the version of the file format. + + Parameters + ---------- + fp : filelike object + + Returns + ------- + major : int + minor : int + """ + magic_str = _read_bytes(fp, MAGIC_LEN, "magic string") + if magic_str[:-2] != MAGIC_PREFIX: + msg = "the magic string is not correct; expected %r, got %r" + raise ValueError(msg % (MAGIC_PREFIX, magic_str[:-2])) + major, minor = magic_str[-2:] + return major, minor + +def _has_metadata(dt): + if dt.metadata is not None: + return True + elif dt.names is not None: + return any(_has_metadata(dt[k]) for k in dt.names) + elif dt.subdtype is not None: + return _has_metadata(dt.base) + else: + return False + +def dtype_to_descr(dtype): + """ + Get a serializable descriptor from the dtype. + + The .descr attribute of a dtype object cannot be round-tripped through + the dtype() constructor. Simple types, like dtype('float32'), have + a descr which looks like a record array with one field with '' as + a name. The dtype() constructor interprets this as a request to give + a default name. Instead, we construct descriptor that can be passed to + dtype(). + + Parameters + ---------- + dtype : dtype + The dtype of the array that will be written to disk. + + Returns + ------- + descr : object + An object that can be passed to `numpy.dtype()` in order to + replicate the input dtype. + + """ + if _has_metadata(dtype): + warnings.warn("metadata on a dtype may be saved or ignored, but will " + "raise if saved when read. Use another form of storage.", + UserWarning, stacklevel=2) + if dtype.names is not None: + # This is a record array. The .descr is fine. XXX: parts of the + # record array with an empty name, like padding bytes, still get + # fiddled with. This needs to be fixed in the C implementation of + # dtype(). + return dtype.descr + else: + return dtype.str + +def descr_to_dtype(descr): + """ + Returns a dtype based off the given description. + + This is essentially the reverse of `dtype_to_descr()`. It will remove + the valueless padding fields created by, i.e. simple fields like + dtype('float32'), and then convert the description to its corresponding + dtype. + + Parameters + ---------- + descr : object + The object retrieved by dtype.descr. Can be passed to + `numpy.dtype()` in order to replicate the input dtype. + + Returns + ------- + dtype : dtype + The dtype constructed by the description. + + """ + if isinstance(descr, str): + # No padding removal needed + return numpy.dtype(descr) + elif isinstance(descr, tuple): + # subtype, will always have a shape descr[1] + dt = descr_to_dtype(descr[0]) + return numpy.dtype((dt, descr[1])) + + titles = [] + names = [] + formats = [] + offsets = [] + offset = 0 + for field in descr: + if len(field) == 2: + name, descr_str = field + dt = descr_to_dtype(descr_str) + else: + name, descr_str, shape = field + dt = numpy.dtype((descr_to_dtype(descr_str), shape)) + + # Ignore padding bytes, which will be void bytes with '' as name + # Once support for blank names is removed, only "if name == ''" needed) + is_pad = (name == '' and dt.type is numpy.void and dt.names is None) + if not is_pad: + title, name = name if isinstance(name, tuple) else (None, name) + titles.append(title) + names.append(name) + formats.append(dt) + offsets.append(offset) + offset += dt.itemsize + + return numpy.dtype({'names': names, 'formats': formats, 'titles': titles, + 'offsets': offsets, 'itemsize': offset}) + +def header_data_from_array_1_0(array): + """ Get the dictionary of header metadata from a numpy.ndarray. + + Parameters + ---------- + array : numpy.ndarray + + Returns + ------- + d : dict + This has the appropriate entries for writing its string representation + to the header of the file. + """ + d = {'shape': array.shape} + if array.flags.c_contiguous: + d['fortran_order'] = False + elif array.flags.f_contiguous: + d['fortran_order'] = True + else: + # Totally non-contiguous data. We will have to make it C-contiguous + # before writing. Note that we need to test for C_CONTIGUOUS first + # because a 1-D array is both C_CONTIGUOUS and F_CONTIGUOUS. + d['fortran_order'] = False + + d['descr'] = dtype_to_descr(array.dtype) + return d + + +def _wrap_header(header, version): + """ + Takes a stringified header, and attaches the prefix and padding to it + """ + import struct + assert version is not None + fmt, encoding = _header_size_info[version] + if not isinstance(header, bytes): # always true on python 3 + header = header.encode(encoding) + hlen = len(header) + 1 + padlen = ARRAY_ALIGN - ((MAGIC_LEN + struct.calcsize(fmt) + hlen) % ARRAY_ALIGN) + try: + header_prefix = magic(*version) + struct.pack(fmt, hlen + padlen) + except struct.error: + msg = "Header length {} too big for version={}".format(hlen, version) + raise ValueError(msg) from None + + # Pad the header with spaces and a final newline such that the magic + # string, the header-length short and the header are aligned on a + # ARRAY_ALIGN byte boundary. This supports memory mapping of dtypes + # aligned up to ARRAY_ALIGN on systems like Linux where mmap() + # offset must be page-aligned (i.e. the beginning of the file). + return header_prefix + header + b' '*padlen + b'\n' + + +def _wrap_header_guess_version(header): + """ + Like `_wrap_header`, but chooses an appropriate version given the contents + """ + try: + return _wrap_header(header, (1, 0)) + except ValueError: + pass + + try: + ret = _wrap_header(header, (2, 0)) + except UnicodeEncodeError: + pass + else: + warnings.warn("Stored array in format 2.0. It can only be" + "read by NumPy >= 1.9", UserWarning, stacklevel=2) + return ret + + header = _wrap_header(header, (3, 0)) + warnings.warn("Stored array in format 3.0. It can only be " + "read by NumPy >= 1.17", UserWarning, stacklevel=2) + return header + + +def _write_array_header(fp, d, version=None): + """ Write the header for an array and returns the version used + + Parameters + ---------- + fp : filelike object + d : dict + This has the appropriate entries for writing its string representation + to the header of the file. + version: tuple or None + None means use oldest that works + explicit version will raise a ValueError if the format does not + allow saving this data. Default: None + """ + header = ["{"] + for key, value in sorted(d.items()): + # Need to use repr here, since we eval these when reading + header.append("'%s': %s, " % (key, repr(value))) + header.append("}") + header = "".join(header) + if version is None: + header = _wrap_header_guess_version(header) + else: + header = _wrap_header(header, version) + fp.write(header) + +def write_array_header_1_0(fp, d): + """ Write the header for an array using the 1.0 format. + + Parameters + ---------- + fp : filelike object + d : dict + This has the appropriate entries for writing its string + representation to the header of the file. + """ + _write_array_header(fp, d, (1, 0)) + + +def write_array_header_2_0(fp, d): + """ Write the header for an array using the 2.0 format. + The 2.0 format allows storing very large structured arrays. + + .. versionadded:: 1.9.0 + + Parameters + ---------- + fp : filelike object + d : dict + This has the appropriate entries for writing its string + representation to the header of the file. + """ + _write_array_header(fp, d, (2, 0)) + +def read_array_header_1_0(fp): + """ + Read an array header from a filelike object using the 1.0 file format + version. + + This will leave the file object located just after the header. + + Parameters + ---------- + fp : filelike object + A file object or something with a `.read()` method like a file. + + Returns + ------- + shape : tuple of int + The shape of the array. + fortran_order : bool + The array data will be written out directly if it is either + C-contiguous or Fortran-contiguous. Otherwise, it will be made + contiguous before writing it out. + dtype : dtype + The dtype of the file's data. + + Raises + ------ + ValueError + If the data is invalid. + + """ + return _read_array_header(fp, version=(1, 0)) + +def read_array_header_2_0(fp): + """ + Read an array header from a filelike object using the 2.0 file format + version. + + This will leave the file object located just after the header. + + .. versionadded:: 1.9.0 + + Parameters + ---------- + fp : filelike object + A file object or something with a `.read()` method like a file. + + Returns + ------- + shape : tuple of int + The shape of the array. + fortran_order : bool + The array data will be written out directly if it is either + C-contiguous or Fortran-contiguous. Otherwise, it will be made + contiguous before writing it out. + dtype : dtype + The dtype of the file's data. + + Raises + ------ + ValueError + If the data is invalid. + + """ + return _read_array_header(fp, version=(2, 0)) + + +def _filter_header(s): + """Clean up 'L' in npz header ints. + + Cleans up the 'L' in strings representing integers. Needed to allow npz + headers produced in Python2 to be read in Python3. + + Parameters + ---------- + s : string + Npy file header. + + Returns + ------- + header : str + Cleaned up header. + + """ + import tokenize + from io import StringIO + + tokens = [] + last_token_was_number = False + for token in tokenize.generate_tokens(StringIO(s).readline): + token_type = token[0] + token_string = token[1] + if (last_token_was_number and + token_type == tokenize.NAME and + token_string == "L"): + continue + else: + tokens.append(token) + last_token_was_number = (token_type == tokenize.NUMBER) + return tokenize.untokenize(tokens) + + +def _read_array_header(fp, version): + """ + see read_array_header_1_0 + """ + # Read an unsigned, little-endian short int which has the length of the + # header. + import struct + hinfo = _header_size_info.get(version) + if hinfo is None: + raise ValueError("Invalid version {!r}".format(version)) + hlength_type, encoding = hinfo + + hlength_str = _read_bytes(fp, struct.calcsize(hlength_type), "array header length") + header_length = struct.unpack(hlength_type, hlength_str)[0] + header = _read_bytes(fp, header_length, "array header") + header = header.decode(encoding) + + # The header is a pretty-printed string representation of a literal + # Python dictionary with trailing newlines padded to a ARRAY_ALIGN byte + # boundary. The keys are strings. + # "shape" : tuple of int + # "fortran_order" : bool + # "descr" : dtype.descr + # Versions (2, 0) and (1, 0) could have been created by a Python 2 + # implementation before header filtering was implemented. + if version <= (2, 0): + header = _filter_header(header) + try: + d = safe_eval(header) + except SyntaxError as e: + msg = "Cannot parse header: {!r}" + raise ValueError(msg.format(header)) from e + if not isinstance(d, dict): + msg = "Header is not a dictionary: {!r}" + raise ValueError(msg.format(d)) + + if EXPECTED_KEYS != d.keys(): + keys = sorted(d.keys()) + msg = "Header does not contain the correct keys: {!r}" + raise ValueError(msg.format(keys)) + + # Sanity-check the values. + if (not isinstance(d['shape'], tuple) or + not all(isinstance(x, int) for x in d['shape'])): + msg = "shape is not valid: {!r}" + raise ValueError(msg.format(d['shape'])) + if not isinstance(d['fortran_order'], bool): + msg = "fortran_order is not a valid bool: {!r}" + raise ValueError(msg.format(d['fortran_order'])) + try: + dtype = descr_to_dtype(d['descr']) + except TypeError as e: + msg = "descr is not a valid dtype descriptor: {!r}" + raise ValueError(msg.format(d['descr'])) from e + + return d['shape'], d['fortran_order'], dtype + +def write_array(fp, array, version=None, allow_pickle=True, pickle_kwargs=None): + """ + Write an array to an NPY file, including a header. + + If the array is neither C-contiguous nor Fortran-contiguous AND the + file_like object is not a real file object, this function will have to + copy data in memory. + + Parameters + ---------- + fp : file_like object + An open, writable file object, or similar object with a + ``.write()`` method. + array : ndarray + The array to write to disk. + version : (int, int) or None, optional + The version number of the format. None means use the oldest + supported version that is able to store the data. Default: None + allow_pickle : bool, optional + Whether to allow writing pickled data. Default: True + pickle_kwargs : dict, optional + Additional keyword arguments to pass to pickle.dump, excluding + 'protocol'. These are only useful when pickling objects in object + arrays on Python 3 to Python 2 compatible format. + + Raises + ------ + ValueError + If the array cannot be persisted. This includes the case of + allow_pickle=False and array being an object array. + Various other errors + If the array contains Python objects as part of its dtype, the + process of pickling them may raise various errors if the objects + are not picklable. + + """ + _check_version(version) + _write_array_header(fp, header_data_from_array_1_0(array), version) + + if array.itemsize == 0: + buffersize = 0 + else: + # Set buffer size to 16 MiB to hide the Python loop overhead. + buffersize = max(16 * 1024 ** 2 // array.itemsize, 1) + + if array.dtype.hasobject: + # We contain Python objects so we cannot write out the data + # directly. Instead, we will pickle it out + if not allow_pickle: + raise ValueError("Object arrays cannot be saved when " + "allow_pickle=False") + if pickle_kwargs is None: + pickle_kwargs = {} + pickle.dump(array, fp, protocol=3, **pickle_kwargs) + elif array.flags.f_contiguous and not array.flags.c_contiguous: + if isfileobj(fp): + array.T.tofile(fp) + else: + for chunk in numpy.nditer( + array, flags=['external_loop', 'buffered', 'zerosize_ok'], + buffersize=buffersize, order='F'): + fp.write(chunk.tobytes('C')) + else: + if isfileobj(fp): + array.tofile(fp) + else: + for chunk in numpy.nditer( + array, flags=['external_loop', 'buffered', 'zerosize_ok'], + buffersize=buffersize, order='C'): + fp.write(chunk.tobytes('C')) + + +def read_array(fp, allow_pickle=False, pickle_kwargs=None): + """ + Read an array from an NPY file. + + Parameters + ---------- + fp : file_like object + If this is not a real file object, then this may take extra memory + and time. + allow_pickle : bool, optional + Whether to allow writing pickled data. Default: False + + .. versionchanged:: 1.16.3 + Made default False in response to CVE-2019-6446. + + pickle_kwargs : dict + Additional keyword arguments to pass to pickle.load. These are only + useful when loading object arrays saved on Python 2 when using + Python 3. + + Returns + ------- + array : ndarray + The array from the data on disk. + + Raises + ------ + ValueError + If the data is invalid, or allow_pickle=False and the file contains + an object array. + + """ + version = read_magic(fp) + _check_version(version) + shape, fortran_order, dtype = _read_array_header(fp, version) + if len(shape) == 0: + count = 1 + else: + count = numpy.multiply.reduce(shape, dtype=numpy.int64) + + # Now read the actual data. + if dtype.hasobject: + # The array contained Python objects. We need to unpickle the data. + if not allow_pickle: + raise ValueError("Object arrays cannot be loaded when " + "allow_pickle=False") + if pickle_kwargs is None: + pickle_kwargs = {} + try: + array = pickle.load(fp, **pickle_kwargs) + except UnicodeError as err: + # Friendlier error message + raise UnicodeError("Unpickling a python object failed: %r\n" + "You may need to pass the encoding= option " + "to numpy.load" % (err,)) from err + else: + if isfileobj(fp): + # We can use the fast fromfile() function. + array = numpy.fromfile(fp, dtype=dtype, count=count) + else: + # This is not a real file. We have to read it the + # memory-intensive way. + # crc32 module fails on reads greater than 2 ** 32 bytes, + # breaking large reads from gzip streams. Chunk reads to + # BUFFER_SIZE bytes to avoid issue and reduce memory overhead + # of the read. In non-chunked case count < max_read_count, so + # only one read is performed. + + # Use np.ndarray instead of np.empty since the latter does + # not correctly instantiate zero-width string dtypes; see + # https://github.com/numpy/numpy/pull/6430 + array = numpy.ndarray(count, dtype=dtype) + + if dtype.itemsize > 0: + # If dtype.itemsize == 0 then there's nothing more to read + max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dtype.itemsize) + + for i in range(0, count, max_read_count): + read_count = min(max_read_count, count - i) + read_size = int(read_count * dtype.itemsize) + data = _read_bytes(fp, read_size, "array data") + array[i:i+read_count] = numpy.frombuffer(data, dtype=dtype, + count=read_count) + + if fortran_order: + array.shape = shape[::-1] + array = array.transpose() + else: + array.shape = shape + + return array + + +def open_memmap(filename, mode='r+', dtype=None, shape=None, + fortran_order=False, version=None): + """ + Open a .npy file as a memory-mapped array. + + This may be used to read an existing file or create a new one. + + Parameters + ---------- + filename : str or path-like + The name of the file on disk. This may *not* be a file-like + object. + mode : str, optional + The mode in which to open the file; the default is 'r+'. In + addition to the standard file modes, 'c' is also accepted to mean + "copy on write." See `memmap` for the available mode strings. + dtype : data-type, optional + The data type of the array if we are creating a new file in "write" + mode, if not, `dtype` is ignored. The default value is None, which + results in a data-type of `float64`. + shape : tuple of int + The shape of the array if we are creating a new file in "write" + mode, in which case this parameter is required. Otherwise, this + parameter is ignored and is thus optional. + fortran_order : bool, optional + Whether the array should be Fortran-contiguous (True) or + C-contiguous (False, the default) if we are creating a new file in + "write" mode. + version : tuple of int (major, minor) or None + If the mode is a "write" mode, then this is the version of the file + format used to create the file. None means use the oldest + supported version that is able to store the data. Default: None + + Returns + ------- + marray : memmap + The memory-mapped array. + + Raises + ------ + ValueError + If the data or the mode is invalid. + OSError + If the file is not found or cannot be opened correctly. + + See Also + -------- + numpy.memmap + + """ + if isfileobj(filename): + raise ValueError("Filename must be a string or a path-like object." + " Memmap cannot use existing file handles.") + + if 'w' in mode: + # We are creating the file, not reading it. + # Check if we ought to create the file. + _check_version(version) + # Ensure that the given dtype is an authentic dtype object rather + # than just something that can be interpreted as a dtype object. + dtype = numpy.dtype(dtype) + if dtype.hasobject: + msg = "Array can't be memory-mapped: Python objects in dtype." + raise ValueError(msg) + d = dict( + descr=dtype_to_descr(dtype), + fortran_order=fortran_order, + shape=shape, + ) + # If we got here, then it should be safe to create the file. + with open(os_fspath(filename), mode+'b') as fp: + _write_array_header(fp, d, version) + offset = fp.tell() + else: + # Read the header of the file first. + with open(os_fspath(filename), 'rb') as fp: + version = read_magic(fp) + _check_version(version) + + shape, fortran_order, dtype = _read_array_header(fp, version) + if dtype.hasobject: + msg = "Array can't be memory-mapped: Python objects in dtype." + raise ValueError(msg) + offset = fp.tell() + + if fortran_order: + order = 'F' + else: + order = 'C' + + # We need to change a write-only mode to a read-write mode since we've + # already written data to the file. + if mode == 'w+': + mode = 'r+' + + marray = numpy.memmap(filename, dtype=dtype, shape=shape, order=order, + mode=mode, offset=offset) + + return marray + + +def _read_bytes(fp, size, error_template="ran out of data"): + """ + Read from file-like object until size bytes are read. + Raises ValueError if not EOF is encountered before size bytes are read. + Non-blocking objects only supported if they derive from io objects. + + Required as e.g. ZipExtFile in python 2.6 can return less data than + requested. + """ + data = bytes() + while True: + # io files (default in python3) return None or raise on + # would-block, python2 file will truncate, probably nothing can be + # done about that. note that regular files can't be non-blocking + try: + r = fp.read(size - len(data)) + data += r + if len(r) == 0 or len(data) == size: + break + except BlockingIOError: + pass + if len(data) != size: + msg = "EOF: reading %s, expected %d bytes got %d" + raise ValueError(msg % (error_template, size, len(data))) + else: + return data diff --git a/.local/lib/python3.8/site-packages/numpy/lib/format.pyi b/.local/lib/python3.8/site-packages/numpy/lib/format.pyi new file mode 100644 index 0000000..092245d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/format.pyi @@ -0,0 +1,22 @@ +from typing import Any, List, Set, Literal, Final + +__all__: List[str] + +EXPECTED_KEYS: Final[Set[str]] +MAGIC_PREFIX: Final[bytes] +MAGIC_LEN: Literal[8] +ARRAY_ALIGN: Literal[64] +BUFFER_SIZE: Literal[262144] # 2**18 + +def magic(major, minor): ... +def read_magic(fp): ... +def dtype_to_descr(dtype): ... +def descr_to_dtype(descr): ... +def header_data_from_array_1_0(array): ... +def write_array_header_1_0(fp, d): ... +def write_array_header_2_0(fp, d): ... +def read_array_header_1_0(fp): ... +def read_array_header_2_0(fp): ... +def write_array(fp, array, version=..., allow_pickle=..., pickle_kwargs=...): ... +def read_array(fp, allow_pickle=..., pickle_kwargs=...): ... +def open_memmap(filename, mode=..., dtype=..., shape=..., fortran_order=..., version=...): ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/function_base.py b/.local/lib/python3.8/site-packages/numpy/lib/function_base.py new file mode 100644 index 0000000..a215f63 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/function_base.py @@ -0,0 +1,5507 @@ +import collections.abc +import functools +import re +import sys +import warnings + +import numpy as np +import numpy.core.numeric as _nx +from numpy.core import transpose +from numpy.core.numeric import ( + ones, zeros_like, arange, concatenate, array, asarray, asanyarray, empty, + ndarray, take, dot, where, intp, integer, isscalar, absolute + ) +from numpy.core.umath import ( + pi, add, arctan2, frompyfunc, cos, less_equal, sqrt, sin, + mod, exp, not_equal, subtract + ) +from numpy.core.fromnumeric import ( + ravel, nonzero, partition, mean, any, sum + ) +from numpy.core.numerictypes import typecodes +from numpy.core.overrides import set_module +from numpy.core import overrides +from numpy.core.function_base import add_newdoc +from numpy.lib.twodim_base import diag +from numpy.core.multiarray import ( + _insert, add_docstring, bincount, normalize_axis_index, _monotonicity, + interp as compiled_interp, interp_complex as compiled_interp_complex + ) +from numpy.core.umath import _add_newdoc_ufunc as add_newdoc_ufunc + +import builtins + +# needed in this module for compatibility +from numpy.lib.histograms import histogram, histogramdd # noqa: F401 + + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + + +__all__ = [ + 'select', 'piecewise', 'trim_zeros', 'copy', 'iterable', 'percentile', + 'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', 'disp', 'flip', + 'rot90', 'extract', 'place', 'vectorize', 'asarray_chkfinite', 'average', + 'bincount', 'digitize', 'cov', 'corrcoef', + 'msort', 'median', 'sinc', 'hamming', 'hanning', 'bartlett', + 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring', + 'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc', + 'quantile' + ] + +# _QuantileMethods is a dictionary listing all the supported methods to +# compute quantile/percentile. +# +# Below virtual_index refer to the index of the element where the percentile +# would be found in the sorted sample. +# When the sample contains exactly the percentile wanted, the virtual_index is +# an integer to the index of this element. +# When the percentile wanted is in between two elements, the virtual_index +# is made of a integer part (a.k.a 'i' or 'left') and a fractional part +# (a.k.a 'g' or 'gamma') +# +# Each method in _QuantileMethods has two properties +# get_virtual_index : Callable +# The function used to compute the virtual_index. +# fix_gamma : Callable +# A function used for discret methods to force the index to a specific value. +_QuantileMethods = dict( + # --- HYNDMAN and FAN METHODS + # Discrete methods + inverted_cdf=dict( + get_virtual_index=lambda n, quantiles: _inverted_cdf(n, quantiles), + fix_gamma=lambda gamma, _: gamma, # should never be called + ), + averaged_inverted_cdf=dict( + get_virtual_index=lambda n, quantiles: (n * quantiles) - 1, + fix_gamma=lambda gamma, _: _get_gamma_mask( + shape=gamma.shape, + default_value=1., + conditioned_value=0.5, + where=gamma == 0), + ), + closest_observation=dict( + get_virtual_index=lambda n, quantiles: _closest_observation(n, + quantiles), + fix_gamma=lambda gamma, _: gamma, # should never be called + ), + # Continuous methods + interpolated_inverted_cdf=dict( + get_virtual_index=lambda n, quantiles: + _compute_virtual_index(n, quantiles, 0, 1), + fix_gamma=lambda gamma, _: gamma, + ), + hazen=dict( + get_virtual_index=lambda n, quantiles: + _compute_virtual_index(n, quantiles, 0.5, 0.5), + fix_gamma=lambda gamma, _: gamma, + ), + weibull=dict( + get_virtual_index=lambda n, quantiles: + _compute_virtual_index(n, quantiles, 0, 0), + fix_gamma=lambda gamma, _: gamma, + ), + # Default method. + # To avoid some rounding issues, `(n-1) * quantiles` is preferred to + # `_compute_virtual_index(n, quantiles, 1, 1)`. + # They are mathematically equivalent. + linear=dict( + get_virtual_index=lambda n, quantiles: (n - 1) * quantiles, + fix_gamma=lambda gamma, _: gamma, + ), + median_unbiased=dict( + get_virtual_index=lambda n, quantiles: + _compute_virtual_index(n, quantiles, 1 / 3.0, 1 / 3.0), + fix_gamma=lambda gamma, _: gamma, + ), + normal_unbiased=dict( + get_virtual_index=lambda n, quantiles: + _compute_virtual_index(n, quantiles, 3 / 8.0, 3 / 8.0), + fix_gamma=lambda gamma, _: gamma, + ), + # --- OTHER METHODS + lower=dict( + get_virtual_index=lambda n, quantiles: np.floor( + (n - 1) * quantiles).astype(np.intp), + fix_gamma=lambda gamma, _: gamma, + # should never be called, index dtype is int + ), + higher=dict( + get_virtual_index=lambda n, quantiles: np.ceil( + (n - 1) * quantiles).astype(np.intp), + fix_gamma=lambda gamma, _: gamma, + # should never be called, index dtype is int + ), + midpoint=dict( + get_virtual_index=lambda n, quantiles: 0.5 * ( + np.floor((n - 1) * quantiles) + + np.ceil((n - 1) * quantiles)), + fix_gamma=lambda gamma, index: _get_gamma_mask( + shape=gamma.shape, + default_value=0.5, + conditioned_value=0., + where=index % 1 == 0), + ), + nearest=dict( + get_virtual_index=lambda n, quantiles: np.around( + (n - 1) * quantiles).astype(np.intp), + fix_gamma=lambda gamma, _: gamma, + # should never be called, index dtype is int + )) + + +def _rot90_dispatcher(m, k=None, axes=None): + return (m,) + + +@array_function_dispatch(_rot90_dispatcher) +def rot90(m, k=1, axes=(0, 1)): + """ + Rotate an array by 90 degrees in the plane specified by axes. + + Rotation direction is from the first towards the second axis. + + Parameters + ---------- + m : array_like + Array of two or more dimensions. + k : integer + Number of times the array is rotated by 90 degrees. + axes: (2,) array_like + The array is rotated in the plane defined by the axes. + Axes must be different. + + .. versionadded:: 1.12.0 + + Returns + ------- + y : ndarray + A rotated view of `m`. + + See Also + -------- + flip : Reverse the order of elements in an array along the given axis. + fliplr : Flip an array horizontally. + flipud : Flip an array vertically. + + Notes + ----- + ``rot90(m, k=1, axes=(1,0))`` is the reverse of + ``rot90(m, k=1, axes=(0,1))`` + + ``rot90(m, k=1, axes=(1,0))`` is equivalent to + ``rot90(m, k=-1, axes=(0,1))`` + + Examples + -------- + >>> m = np.array([[1,2],[3,4]], int) + >>> m + array([[1, 2], + [3, 4]]) + >>> np.rot90(m) + array([[2, 4], + [1, 3]]) + >>> np.rot90(m, 2) + array([[4, 3], + [2, 1]]) + >>> m = np.arange(8).reshape((2,2,2)) + >>> np.rot90(m, 1, (1,2)) + array([[[1, 3], + [0, 2]], + [[5, 7], + [4, 6]]]) + + """ + axes = tuple(axes) + if len(axes) != 2: + raise ValueError("len(axes) must be 2.") + + m = asanyarray(m) + + if axes[0] == axes[1] or absolute(axes[0] - axes[1]) == m.ndim: + raise ValueError("Axes must be different.") + + if (axes[0] >= m.ndim or axes[0] < -m.ndim + or axes[1] >= m.ndim or axes[1] < -m.ndim): + raise ValueError("Axes={} out of range for array of ndim={}." + .format(axes, m.ndim)) + + k %= 4 + + if k == 0: + return m[:] + if k == 2: + return flip(flip(m, axes[0]), axes[1]) + + axes_list = arange(0, m.ndim) + (axes_list[axes[0]], axes_list[axes[1]]) = (axes_list[axes[1]], + axes_list[axes[0]]) + + if k == 1: + return transpose(flip(m, axes[1]), axes_list) + else: + # k == 3 + return flip(transpose(m, axes_list), axes[1]) + + +def _flip_dispatcher(m, axis=None): + return (m,) + + +@array_function_dispatch(_flip_dispatcher) +def flip(m, axis=None): + """ + Reverse the order of elements in an array along the given axis. + + The shape of the array is preserved, but the elements are reordered. + + .. versionadded:: 1.12.0 + + Parameters + ---------- + m : array_like + Input array. + axis : None or int or tuple of ints, optional + Axis or axes along which to flip over. The default, + axis=None, will flip over all of the axes of the input array. + If axis is negative it counts from the last to the first axis. + + If axis is a tuple of ints, flipping is performed on all of the axes + specified in the tuple. + + .. versionchanged:: 1.15.0 + None and tuples of axes are supported + + Returns + ------- + out : array_like + A view of `m` with the entries of axis reversed. Since a view is + returned, this operation is done in constant time. + + See Also + -------- + flipud : Flip an array vertically (axis=0). + fliplr : Flip an array horizontally (axis=1). + + Notes + ----- + flip(m, 0) is equivalent to flipud(m). + + flip(m, 1) is equivalent to fliplr(m). + + flip(m, n) corresponds to ``m[...,::-1,...]`` with ``::-1`` at position n. + + flip(m) corresponds to ``m[::-1,::-1,...,::-1]`` with ``::-1`` at all + positions. + + flip(m, (0, 1)) corresponds to ``m[::-1,::-1,...]`` with ``::-1`` at + position 0 and position 1. + + Examples + -------- + >>> A = np.arange(8).reshape((2,2,2)) + >>> A + array([[[0, 1], + [2, 3]], + [[4, 5], + [6, 7]]]) + >>> np.flip(A, 0) + array([[[4, 5], + [6, 7]], + [[0, 1], + [2, 3]]]) + >>> np.flip(A, 1) + array([[[2, 3], + [0, 1]], + [[6, 7], + [4, 5]]]) + >>> np.flip(A) + array([[[7, 6], + [5, 4]], + [[3, 2], + [1, 0]]]) + >>> np.flip(A, (0, 2)) + array([[[5, 4], + [7, 6]], + [[1, 0], + [3, 2]]]) + >>> A = np.random.randn(3,4,5) + >>> np.all(np.flip(A,2) == A[:,:,::-1,...]) + True + """ + if not hasattr(m, 'ndim'): + m = asarray(m) + if axis is None: + indexer = (np.s_[::-1],) * m.ndim + else: + axis = _nx.normalize_axis_tuple(axis, m.ndim) + indexer = [np.s_[:]] * m.ndim + for ax in axis: + indexer[ax] = np.s_[::-1] + indexer = tuple(indexer) + return m[indexer] + + +@set_module('numpy') +def iterable(y): + """ + Check whether or not an object can be iterated over. + + Parameters + ---------- + y : object + Input object. + + Returns + ------- + b : bool + Return ``True`` if the object has an iterator method or is a + sequence and ``False`` otherwise. + + + Examples + -------- + >>> np.iterable([1, 2, 3]) + True + >>> np.iterable(2) + False + + Notes + ----- + In most cases, the results of ``np.iterable(obj)`` are consistent with + ``isinstance(obj, collections.abc.Iterable)``. One notable exception is + the treatment of 0-dimensional arrays:: + + >>> from collections.abc import Iterable + >>> a = np.array(1.0) # 0-dimensional numpy array + >>> isinstance(a, Iterable) + True + >>> np.iterable(a) + False + + """ + try: + iter(y) + except TypeError: + return False + return True + + +def _average_dispatcher(a, axis=None, weights=None, returned=None): + return (a, weights) + + +@array_function_dispatch(_average_dispatcher) +def average(a, axis=None, weights=None, returned=False): + """ + Compute the weighted average along the specified axis. + + Parameters + ---------- + a : array_like + Array containing data to be averaged. If `a` is not an array, a + conversion is attempted. + axis : None or int or tuple of ints, optional + Axis or axes along which to average `a`. The default, + axis=None, will average over all of the elements of the input array. + If axis is negative it counts from the last to the first axis. + + .. versionadded:: 1.7.0 + + If axis is a tuple of ints, averaging is performed on all of the axes + specified in the tuple instead of a single axis or all the axes as + before. + weights : array_like, optional + An array of weights associated with the values in `a`. Each value in + `a` contributes to the average according to its associated weight. + The weights array can either be 1-D (in which case its length must be + the size of `a` along the given axis) or of the same shape as `a`. + If `weights=None`, then all data in `a` are assumed to have a + weight equal to one. The 1-D calculation is:: + + avg = sum(a * weights) / sum(weights) + + The only constraint on `weights` is that `sum(weights)` must not be 0. + returned : bool, optional + Default is `False`. If `True`, the tuple (`average`, `sum_of_weights`) + is returned, otherwise only the average is returned. + If `weights=None`, `sum_of_weights` is equivalent to the number of + elements over which the average is taken. + + Returns + ------- + retval, [sum_of_weights] : array_type or double + Return the average along the specified axis. When `returned` is `True`, + return a tuple with the average as the first element and the sum + of the weights as the second element. `sum_of_weights` is of the + same type as `retval`. The result dtype follows a genereal pattern. + If `weights` is None, the result dtype will be that of `a` , or ``float64`` + if `a` is integral. Otherwise, if `weights` is not None and `a` is non- + integral, the result type will be the type of lowest precision capable of + representing values of both `a` and `weights`. If `a` happens to be + integral, the previous rules still applies but the result dtype will + at least be ``float64``. + + Raises + ------ + ZeroDivisionError + When all weights along axis are zero. See `numpy.ma.average` for a + version robust to this type of error. + TypeError + When the length of 1D `weights` is not the same as the shape of `a` + along axis. + + See Also + -------- + mean + + ma.average : average for masked arrays -- useful if your data contains + "missing" values + numpy.result_type : Returns the type that results from applying the + numpy type promotion rules to the arguments. + + Examples + -------- + >>> data = np.arange(1, 5) + >>> data + array([1, 2, 3, 4]) + >>> np.average(data) + 2.5 + >>> np.average(np.arange(1, 11), weights=np.arange(10, 0, -1)) + 4.0 + + >>> data = np.arange(6).reshape((3,2)) + >>> data + array([[0, 1], + [2, 3], + [4, 5]]) + >>> np.average(data, axis=1, weights=[1./4, 3./4]) + array([0.75, 2.75, 4.75]) + >>> np.average(data, weights=[1./4, 3./4]) + Traceback (most recent call last): + ... + TypeError: Axis must be specified when shapes of a and weights differ. + + >>> a = np.ones(5, dtype=np.float128) + >>> w = np.ones(5, dtype=np.complex64) + >>> avg = np.average(a, weights=w) + >>> print(avg.dtype) + complex256 + """ + a = np.asanyarray(a) + + if weights is None: + avg = a.mean(axis) + scl = avg.dtype.type(a.size/avg.size) + else: + wgt = np.asanyarray(weights) + + if issubclass(a.dtype.type, (np.integer, np.bool_)): + result_dtype = np.result_type(a.dtype, wgt.dtype, 'f8') + else: + result_dtype = np.result_type(a.dtype, wgt.dtype) + + # Sanity checks + if a.shape != wgt.shape: + if axis is None: + raise TypeError( + "Axis must be specified when shapes of a and weights " + "differ.") + if wgt.ndim != 1: + raise TypeError( + "1D weights expected when shapes of a and weights differ.") + if wgt.shape[0] != a.shape[axis]: + raise ValueError( + "Length of weights not compatible with specified axis.") + + # setup wgt to broadcast along axis + wgt = np.broadcast_to(wgt, (a.ndim-1)*(1,) + wgt.shape) + wgt = wgt.swapaxes(-1, axis) + + scl = wgt.sum(axis=axis, dtype=result_dtype) + if np.any(scl == 0.0): + raise ZeroDivisionError( + "Weights sum to zero, can't be normalized") + + avg = np.multiply(a, wgt, dtype=result_dtype).sum(axis)/scl + + if returned: + if scl.shape != avg.shape: + scl = np.broadcast_to(scl, avg.shape).copy() + return avg, scl + else: + return avg + + +@set_module('numpy') +def asarray_chkfinite(a, dtype=None, order=None): + """Convert the input to an array, checking for NaNs or Infs. + + Parameters + ---------- + a : array_like + Input data, in any form that can be converted to an array. This + includes lists, lists of tuples, tuples, tuples of tuples, tuples + of lists and ndarrays. Success requires no NaNs or Infs. + dtype : data-type, optional + By default, the data-type is inferred from the input data. + order : {'C', 'F', 'A', 'K'}, optional + Memory layout. 'A' and 'K' depend on the order of input array a. + 'C' row-major (C-style), + 'F' column-major (Fortran-style) memory representation. + 'A' (any) means 'F' if `a` is Fortran contiguous, 'C' otherwise + 'K' (keep) preserve input order + Defaults to 'C'. + + Returns + ------- + out : ndarray + Array interpretation of `a`. No copy is performed if the input + is already an ndarray. If `a` is a subclass of ndarray, a base + class ndarray is returned. + + Raises + ------ + ValueError + Raises ValueError if `a` contains NaN (Not a Number) or Inf (Infinity). + + See Also + -------- + asarray : Create and array. + asanyarray : Similar function which passes through subclasses. + ascontiguousarray : Convert input to a contiguous array. + asfarray : Convert input to a floating point ndarray. + asfortranarray : Convert input to an ndarray with column-major + memory order. + fromiter : Create an array from an iterator. + fromfunction : Construct an array by executing a function on grid + positions. + + Examples + -------- + Convert a list into an array. If all elements are finite + ``asarray_chkfinite`` is identical to ``asarray``. + + >>> a = [1, 2] + >>> np.asarray_chkfinite(a, dtype=float) + array([1., 2.]) + + Raises ValueError if array_like contains Nans or Infs. + + >>> a = [1, 2, np.inf] + >>> try: + ... np.asarray_chkfinite(a) + ... except ValueError: + ... print('ValueError') + ... + ValueError + + """ + a = asarray(a, dtype=dtype, order=order) + if a.dtype.char in typecodes['AllFloat'] and not np.isfinite(a).all(): + raise ValueError( + "array must not contain infs or NaNs") + return a + + +def _piecewise_dispatcher(x, condlist, funclist, *args, **kw): + yield x + # support the undocumented behavior of allowing scalars + if np.iterable(condlist): + yield from condlist + + +@array_function_dispatch(_piecewise_dispatcher) +def piecewise(x, condlist, funclist, *args, **kw): + """ + Evaluate a piecewise-defined function. + + Given a set of conditions and corresponding functions, evaluate each + function on the input data wherever its condition is true. + + Parameters + ---------- + x : ndarray or scalar + The input domain. + condlist : list of bool arrays or bool scalars + Each boolean array corresponds to a function in `funclist`. Wherever + `condlist[i]` is True, `funclist[i](x)` is used as the output value. + + Each boolean array in `condlist` selects a piece of `x`, + and should therefore be of the same shape as `x`. + + The length of `condlist` must correspond to that of `funclist`. + If one extra function is given, i.e. if + ``len(funclist) == len(condlist) + 1``, then that extra function + is the default value, used wherever all conditions are false. + funclist : list of callables, f(x,*args,**kw), or scalars + Each function is evaluated over `x` wherever its corresponding + condition is True. It should take a 1d array as input and give an 1d + array or a scalar value as output. If, instead of a callable, + a scalar is provided then a constant function (``lambda x: scalar``) is + assumed. + args : tuple, optional + Any further arguments given to `piecewise` are passed to the functions + upon execution, i.e., if called ``piecewise(..., ..., 1, 'a')``, then + each function is called as ``f(x, 1, 'a')``. + kw : dict, optional + Keyword arguments used in calling `piecewise` are passed to the + functions upon execution, i.e., if called + ``piecewise(..., ..., alpha=1)``, then each function is called as + ``f(x, alpha=1)``. + + Returns + ------- + out : ndarray + The output is the same shape and type as x and is found by + calling the functions in `funclist` on the appropriate portions of `x`, + as defined by the boolean arrays in `condlist`. Portions not covered + by any condition have a default value of 0. + + + See Also + -------- + choose, select, where + + Notes + ----- + This is similar to choose or select, except that functions are + evaluated on elements of `x` that satisfy the corresponding condition from + `condlist`. + + The result is:: + + |-- + |funclist[0](x[condlist[0]]) + out = |funclist[1](x[condlist[1]]) + |... + |funclist[n2](x[condlist[n2]]) + |-- + + Examples + -------- + Define the sigma function, which is -1 for ``x < 0`` and +1 for ``x >= 0``. + + >>> x = np.linspace(-2.5, 2.5, 6) + >>> np.piecewise(x, [x < 0, x >= 0], [-1, 1]) + array([-1., -1., -1., 1., 1., 1.]) + + Define the absolute value, which is ``-x`` for ``x <0`` and ``x`` for + ``x >= 0``. + + >>> np.piecewise(x, [x < 0, x >= 0], [lambda x: -x, lambda x: x]) + array([2.5, 1.5, 0.5, 0.5, 1.5, 2.5]) + + Apply the same function to a scalar value. + + >>> y = -2 + >>> np.piecewise(y, [y < 0, y >= 0], [lambda x: -x, lambda x: x]) + array(2) + + """ + x = asanyarray(x) + n2 = len(funclist) + + # undocumented: single condition is promoted to a list of one condition + if isscalar(condlist) or ( + not isinstance(condlist[0], (list, ndarray)) and x.ndim != 0): + condlist = [condlist] + + condlist = asarray(condlist, dtype=bool) + n = len(condlist) + + if n == n2 - 1: # compute the "otherwise" condition. + condelse = ~np.any(condlist, axis=0, keepdims=True) + condlist = np.concatenate([condlist, condelse], axis=0) + n += 1 + elif n != n2: + raise ValueError( + "with {} condition(s), either {} or {} functions are expected" + .format(n, n, n+1) + ) + + y = zeros_like(x) + for cond, func in zip(condlist, funclist): + if not isinstance(func, collections.abc.Callable): + y[cond] = func + else: + vals = x[cond] + if vals.size > 0: + y[cond] = func(vals, *args, **kw) + + return y + + +def _select_dispatcher(condlist, choicelist, default=None): + yield from condlist + yield from choicelist + + +@array_function_dispatch(_select_dispatcher) +def select(condlist, choicelist, default=0): + """ + Return an array drawn from elements in choicelist, depending on conditions. + + Parameters + ---------- + condlist : list of bool ndarrays + The list of conditions which determine from which array in `choicelist` + the output elements are taken. When multiple conditions are satisfied, + the first one encountered in `condlist` is used. + choicelist : list of ndarrays + The list of arrays from which the output elements are taken. It has + to be of the same length as `condlist`. + default : scalar, optional + The element inserted in `output` when all conditions evaluate to False. + + Returns + ------- + output : ndarray + The output at position m is the m-th element of the array in + `choicelist` where the m-th element of the corresponding array in + `condlist` is True. + + See Also + -------- + where : Return elements from one of two arrays depending on condition. + take, choose, compress, diag, diagonal + + Examples + -------- + >>> x = np.arange(6) + >>> condlist = [x<3, x>3] + >>> choicelist = [x, x**2] + >>> np.select(condlist, choicelist, 42) + array([ 0, 1, 2, 42, 16, 25]) + + >>> condlist = [x<=4, x>3] + >>> choicelist = [x, x**2] + >>> np.select(condlist, choicelist, 55) + array([ 0, 1, 2, 3, 4, 25]) + + """ + # Check the size of condlist and choicelist are the same, or abort. + if len(condlist) != len(choicelist): + raise ValueError( + 'list of cases must be same length as list of conditions') + + # Now that the dtype is known, handle the deprecated select([], []) case + if len(condlist) == 0: + raise ValueError("select with an empty condition list is not possible") + + choicelist = [np.asarray(choice) for choice in choicelist] + + try: + intermediate_dtype = np.result_type(*choicelist) + except TypeError as e: + msg = f'Choicelist elements do not have a common dtype: {e}' + raise TypeError(msg) from None + default_array = np.asarray(default) + choicelist.append(default_array) + + # need to get the result type before broadcasting for correct scalar + # behaviour + try: + dtype = np.result_type(intermediate_dtype, default_array) + except TypeError as e: + msg = f'Choicelists and default value do not have a common dtype: {e}' + raise TypeError(msg) from None + + # Convert conditions to arrays and broadcast conditions and choices + # as the shape is needed for the result. Doing it separately optimizes + # for example when all choices are scalars. + condlist = np.broadcast_arrays(*condlist) + choicelist = np.broadcast_arrays(*choicelist) + + # If cond array is not an ndarray in boolean format or scalar bool, abort. + for i, cond in enumerate(condlist): + if cond.dtype.type is not np.bool_: + raise TypeError( + 'invalid entry {} in condlist: should be boolean ndarray'.format(i)) + + if choicelist[0].ndim == 0: + # This may be common, so avoid the call. + result_shape = condlist[0].shape + else: + result_shape = np.broadcast_arrays(condlist[0], choicelist[0])[0].shape + + result = np.full(result_shape, choicelist[-1], dtype) + + # Use np.copyto to burn each choicelist array onto result, using the + # corresponding condlist as a boolean mask. This is done in reverse + # order since the first choice should take precedence. + choicelist = choicelist[-2::-1] + condlist = condlist[::-1] + for choice, cond in zip(choicelist, condlist): + np.copyto(result, choice, where=cond) + + return result + + +def _copy_dispatcher(a, order=None, subok=None): + return (a,) + + +@array_function_dispatch(_copy_dispatcher) +def copy(a, order='K', subok=False): + """ + Return an array copy of the given object. + + Parameters + ---------- + a : array_like + Input data. + order : {'C', 'F', 'A', 'K'}, optional + Controls the memory layout of the copy. 'C' means C-order, + 'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous, + 'C' otherwise. 'K' means match the layout of `a` as closely + as possible. (Note that this function and :meth:`ndarray.copy` are very + similar, but have different default values for their order= + arguments.) + subok : bool, optional + If True, then sub-classes will be passed-through, otherwise the + returned array will be forced to be a base-class array (defaults to False). + + .. versionadded:: 1.19.0 + + Returns + ------- + arr : ndarray + Array interpretation of `a`. + + See Also + -------- + ndarray.copy : Preferred method for creating an array copy + + Notes + ----- + This is equivalent to: + + >>> np.array(a, copy=True) #doctest: +SKIP + + Examples + -------- + Create an array x, with a reference y and a copy z: + + >>> x = np.array([1, 2, 3]) + >>> y = x + >>> z = np.copy(x) + + Note that, when we modify x, y changes, but not z: + + >>> x[0] = 10 + >>> x[0] == y[0] + True + >>> x[0] == z[0] + False + + Note that, np.copy clears previously set WRITEABLE=False flag. + + >>> a = np.array([1, 2, 3]) + >>> a.flags["WRITEABLE"] = False + >>> b = np.copy(a) + >>> b.flags["WRITEABLE"] + True + >>> b[0] = 3 + >>> b + array([3, 2, 3]) + + Note that np.copy is a shallow copy and will not copy object + elements within arrays. This is mainly important for arrays + containing Python objects. The new array will contain the + same object which may lead to surprises if that object can + be modified (is mutable): + + >>> a = np.array([1, 'm', [2, 3, 4]], dtype=object) + >>> b = np.copy(a) + >>> b[2][0] = 10 + >>> a + array([1, 'm', list([10, 3, 4])], dtype=object) + + To ensure all elements within an ``object`` array are copied, + use `copy.deepcopy`: + + >>> import copy + >>> a = np.array([1, 'm', [2, 3, 4]], dtype=object) + >>> c = copy.deepcopy(a) + >>> c[2][0] = 10 + >>> c + array([1, 'm', list([10, 3, 4])], dtype=object) + >>> a + array([1, 'm', list([2, 3, 4])], dtype=object) + + """ + return array(a, order=order, subok=subok, copy=True) + +# Basic operations + + +def _gradient_dispatcher(f, *varargs, axis=None, edge_order=None): + yield f + yield from varargs + + +@array_function_dispatch(_gradient_dispatcher) +def gradient(f, *varargs, axis=None, edge_order=1): + """ + Return the gradient of an N-dimensional array. + + The gradient is computed using second order accurate central differences + in the interior points and either first or second order accurate one-sides + (forward or backwards) differences at the boundaries. + The returned gradient hence has the same shape as the input array. + + Parameters + ---------- + f : array_like + An N-dimensional array containing samples of a scalar function. + varargs : list of scalar or array, optional + Spacing between f values. Default unitary spacing for all dimensions. + Spacing can be specified using: + + 1. single scalar to specify a sample distance for all dimensions. + 2. N scalars to specify a constant sample distance for each dimension. + i.e. `dx`, `dy`, `dz`, ... + 3. N arrays to specify the coordinates of the values along each + dimension of F. The length of the array must match the size of + the corresponding dimension + 4. Any combination of N scalars/arrays with the meaning of 2. and 3. + + If `axis` is given, the number of varargs must equal the number of axes. + Default: 1. + + edge_order : {1, 2}, optional + Gradient is calculated using N-th order accurate differences + at the boundaries. Default: 1. + + .. versionadded:: 1.9.1 + + axis : None or int or tuple of ints, optional + Gradient is calculated only along the given axis or axes + The default (axis = None) is to calculate the gradient for all the axes + of the input array. axis may be negative, in which case it counts from + the last to the first axis. + + .. versionadded:: 1.11.0 + + Returns + ------- + gradient : ndarray or list of ndarray + A list of ndarrays (or a single ndarray if there is only one dimension) + corresponding to the derivatives of f with respect to each dimension. + Each derivative has the same shape as f. + + Examples + -------- + >>> f = np.array([1, 2, 4, 7, 11, 16], dtype=float) + >>> np.gradient(f) + array([1. , 1.5, 2.5, 3.5, 4.5, 5. ]) + >>> np.gradient(f, 2) + array([0.5 , 0.75, 1.25, 1.75, 2.25, 2.5 ]) + + Spacing can be also specified with an array that represents the coordinates + of the values F along the dimensions. + For instance a uniform spacing: + + >>> x = np.arange(f.size) + >>> np.gradient(f, x) + array([1. , 1.5, 2.5, 3.5, 4.5, 5. ]) + + Or a non uniform one: + + >>> x = np.array([0., 1., 1.5, 3.5, 4., 6.], dtype=float) + >>> np.gradient(f, x) + array([1. , 3. , 3.5, 6.7, 6.9, 2.5]) + + For two dimensional arrays, the return will be two arrays ordered by + axis. In this example the first array stands for the gradient in + rows and the second one in columns direction: + + >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=float)) + [array([[ 2., 2., -1.], + [ 2., 2., -1.]]), array([[1. , 2.5, 4. ], + [1. , 1. , 1. ]])] + + In this example the spacing is also specified: + uniform for axis=0 and non uniform for axis=1 + + >>> dx = 2. + >>> y = [1., 1.5, 3.5] + >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=float), dx, y) + [array([[ 1. , 1. , -0.5], + [ 1. , 1. , -0.5]]), array([[2. , 2. , 2. ], + [2. , 1.7, 0.5]])] + + It is possible to specify how boundaries are treated using `edge_order` + + >>> x = np.array([0, 1, 2, 3, 4]) + >>> f = x**2 + >>> np.gradient(f, edge_order=1) + array([1., 2., 4., 6., 7.]) + >>> np.gradient(f, edge_order=2) + array([0., 2., 4., 6., 8.]) + + The `axis` keyword can be used to specify a subset of axes of which the + gradient is calculated + + >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=float), axis=0) + array([[ 2., 2., -1.], + [ 2., 2., -1.]]) + + Notes + ----- + Assuming that :math:`f\\in C^{3}` (i.e., :math:`f` has at least 3 continuous + derivatives) and let :math:`h_{*}` be a non-homogeneous stepsize, we + minimize the "consistency error" :math:`\\eta_{i}` between the true gradient + and its estimate from a linear combination of the neighboring grid-points: + + .. math:: + + \\eta_{i} = f_{i}^{\\left(1\\right)} - + \\left[ \\alpha f\\left(x_{i}\\right) + + \\beta f\\left(x_{i} + h_{d}\\right) + + \\gamma f\\left(x_{i}-h_{s}\\right) + \\right] + + By substituting :math:`f(x_{i} + h_{d})` and :math:`f(x_{i} - h_{s})` + with their Taylor series expansion, this translates into solving + the following the linear system: + + .. math:: + + \\left\\{ + \\begin{array}{r} + \\alpha+\\beta+\\gamma=0 \\\\ + \\beta h_{d}-\\gamma h_{s}=1 \\\\ + \\beta h_{d}^{2}+\\gamma h_{s}^{2}=0 + \\end{array} + \\right. + + The resulting approximation of :math:`f_{i}^{(1)}` is the following: + + .. math:: + + \\hat f_{i}^{(1)} = + \\frac{ + h_{s}^{2}f\\left(x_{i} + h_{d}\\right) + + \\left(h_{d}^{2} - h_{s}^{2}\\right)f\\left(x_{i}\\right) + - h_{d}^{2}f\\left(x_{i}-h_{s}\\right)} + { h_{s}h_{d}\\left(h_{d} + h_{s}\\right)} + + \\mathcal{O}\\left(\\frac{h_{d}h_{s}^{2} + + h_{s}h_{d}^{2}}{h_{d} + + h_{s}}\\right) + + It is worth noting that if :math:`h_{s}=h_{d}` + (i.e., data are evenly spaced) + we find the standard second order approximation: + + .. math:: + + \\hat f_{i}^{(1)}= + \\frac{f\\left(x_{i+1}\\right) - f\\left(x_{i-1}\\right)}{2h} + + \\mathcal{O}\\left(h^{2}\\right) + + With a similar procedure the forward/backward approximations used for + boundaries can be derived. + + References + ---------- + .. [1] Quarteroni A., Sacco R., Saleri F. (2007) Numerical Mathematics + (Texts in Applied Mathematics). New York: Springer. + .. [2] Durran D. R. (1999) Numerical Methods for Wave Equations + in Geophysical Fluid Dynamics. New York: Springer. + .. [3] Fornberg B. (1988) Generation of Finite Difference Formulas on + Arbitrarily Spaced Grids, + Mathematics of Computation 51, no. 184 : 699-706. + `PDF `_. + """ + f = np.asanyarray(f) + N = f.ndim # number of dimensions + + if axis is None: + axes = tuple(range(N)) + else: + axes = _nx.normalize_axis_tuple(axis, N) + + len_axes = len(axes) + n = len(varargs) + if n == 0: + # no spacing argument - use 1 in all axes + dx = [1.0] * len_axes + elif n == 1 and np.ndim(varargs[0]) == 0: + # single scalar for all axes + dx = varargs * len_axes + elif n == len_axes: + # scalar or 1d array for each axis + dx = list(varargs) + for i, distances in enumerate(dx): + distances = np.asanyarray(distances) + if distances.ndim == 0: + continue + elif distances.ndim != 1: + raise ValueError("distances must be either scalars or 1d") + if len(distances) != f.shape[axes[i]]: + raise ValueError("when 1d, distances must match " + "the length of the corresponding dimension") + if np.issubdtype(distances.dtype, np.integer): + # Convert numpy integer types to float64 to avoid modular + # arithmetic in np.diff(distances). + distances = distances.astype(np.float64) + diffx = np.diff(distances) + # if distances are constant reduce to the scalar case + # since it brings a consistent speedup + if (diffx == diffx[0]).all(): + diffx = diffx[0] + dx[i] = diffx + else: + raise TypeError("invalid number of arguments") + + if edge_order > 2: + raise ValueError("'edge_order' greater than 2 not supported") + + # use central differences on interior and one-sided differences on the + # endpoints. This preserves second order-accuracy over the full domain. + + outvals = [] + + # create slice objects --- initially all are [:, :, ..., :] + slice1 = [slice(None)]*N + slice2 = [slice(None)]*N + slice3 = [slice(None)]*N + slice4 = [slice(None)]*N + + otype = f.dtype + if otype.type is np.datetime64: + # the timedelta dtype with the same unit information + otype = np.dtype(otype.name.replace('datetime', 'timedelta')) + # view as timedelta to allow addition + f = f.view(otype) + elif otype.type is np.timedelta64: + pass + elif np.issubdtype(otype, np.inexact): + pass + else: + # All other types convert to floating point. + # First check if f is a numpy integer type; if so, convert f to float64 + # to avoid modular arithmetic when computing the changes in f. + if np.issubdtype(otype, np.integer): + f = f.astype(np.float64) + otype = np.float64 + + for axis, ax_dx in zip(axes, dx): + if f.shape[axis] < edge_order + 1: + raise ValueError( + "Shape of array too small to calculate a numerical gradient, " + "at least (edge_order + 1) elements are required.") + # result allocation + out = np.empty_like(f, dtype=otype) + + # spacing for the current axis + uniform_spacing = np.ndim(ax_dx) == 0 + + # Numerical differentiation: 2nd order interior + slice1[axis] = slice(1, -1) + slice2[axis] = slice(None, -2) + slice3[axis] = slice(1, -1) + slice4[axis] = slice(2, None) + + if uniform_spacing: + out[tuple(slice1)] = (f[tuple(slice4)] - f[tuple(slice2)]) / (2. * ax_dx) + else: + dx1 = ax_dx[0:-1] + dx2 = ax_dx[1:] + a = -(dx2)/(dx1 * (dx1 + dx2)) + b = (dx2 - dx1) / (dx1 * dx2) + c = dx1 / (dx2 * (dx1 + dx2)) + # fix the shape for broadcasting + shape = np.ones(N, dtype=int) + shape[axis] = -1 + a.shape = b.shape = c.shape = shape + # 1D equivalent -- out[1:-1] = a * f[:-2] + b * f[1:-1] + c * f[2:] + out[tuple(slice1)] = a * f[tuple(slice2)] + b * f[tuple(slice3)] + c * f[tuple(slice4)] + + # Numerical differentiation: 1st order edges + if edge_order == 1: + slice1[axis] = 0 + slice2[axis] = 1 + slice3[axis] = 0 + dx_0 = ax_dx if uniform_spacing else ax_dx[0] + # 1D equivalent -- out[0] = (f[1] - f[0]) / (x[1] - x[0]) + out[tuple(slice1)] = (f[tuple(slice2)] - f[tuple(slice3)]) / dx_0 + + slice1[axis] = -1 + slice2[axis] = -1 + slice3[axis] = -2 + dx_n = ax_dx if uniform_spacing else ax_dx[-1] + # 1D equivalent -- out[-1] = (f[-1] - f[-2]) / (x[-1] - x[-2]) + out[tuple(slice1)] = (f[tuple(slice2)] - f[tuple(slice3)]) / dx_n + + # Numerical differentiation: 2nd order edges + else: + slice1[axis] = 0 + slice2[axis] = 0 + slice3[axis] = 1 + slice4[axis] = 2 + if uniform_spacing: + a = -1.5 / ax_dx + b = 2. / ax_dx + c = -0.5 / ax_dx + else: + dx1 = ax_dx[0] + dx2 = ax_dx[1] + a = -(2. * dx1 + dx2)/(dx1 * (dx1 + dx2)) + b = (dx1 + dx2) / (dx1 * dx2) + c = - dx1 / (dx2 * (dx1 + dx2)) + # 1D equivalent -- out[0] = a * f[0] + b * f[1] + c * f[2] + out[tuple(slice1)] = a * f[tuple(slice2)] + b * f[tuple(slice3)] + c * f[tuple(slice4)] + + slice1[axis] = -1 + slice2[axis] = -3 + slice3[axis] = -2 + slice4[axis] = -1 + if uniform_spacing: + a = 0.5 / ax_dx + b = -2. / ax_dx + c = 1.5 / ax_dx + else: + dx1 = ax_dx[-2] + dx2 = ax_dx[-1] + a = (dx2) / (dx1 * (dx1 + dx2)) + b = - (dx2 + dx1) / (dx1 * dx2) + c = (2. * dx2 + dx1) / (dx2 * (dx1 + dx2)) + # 1D equivalent -- out[-1] = a * f[-3] + b * f[-2] + c * f[-1] + out[tuple(slice1)] = a * f[tuple(slice2)] + b * f[tuple(slice3)] + c * f[tuple(slice4)] + + outvals.append(out) + + # reset the slice object in this dimension to ":" + slice1[axis] = slice(None) + slice2[axis] = slice(None) + slice3[axis] = slice(None) + slice4[axis] = slice(None) + + if len_axes == 1: + return outvals[0] + else: + return outvals + + +def _diff_dispatcher(a, n=None, axis=None, prepend=None, append=None): + return (a, prepend, append) + + +@array_function_dispatch(_diff_dispatcher) +def diff(a, n=1, axis=-1, prepend=np._NoValue, append=np._NoValue): + """ + Calculate the n-th discrete difference along the given axis. + + The first difference is given by ``out[i] = a[i+1] - a[i]`` along + the given axis, higher differences are calculated by using `diff` + recursively. + + Parameters + ---------- + a : array_like + Input array + n : int, optional + The number of times values are differenced. If zero, the input + is returned as-is. + axis : int, optional + The axis along which the difference is taken, default is the + last axis. + prepend, append : array_like, optional + Values to prepend or append to `a` along axis prior to + performing the difference. Scalar values are expanded to + arrays with length 1 in the direction of axis and the shape + of the input array in along all other axes. Otherwise the + dimension and shape must match `a` except along axis. + + .. versionadded:: 1.16.0 + + Returns + ------- + diff : ndarray + The n-th differences. The shape of the output is the same as `a` + except along `axis` where the dimension is smaller by `n`. The + type of the output is the same as the type of the difference + between any two elements of `a`. This is the same as the type of + `a` in most cases. A notable exception is `datetime64`, which + results in a `timedelta64` output array. + + See Also + -------- + gradient, ediff1d, cumsum + + Notes + ----- + Type is preserved for boolean arrays, so the result will contain + `False` when consecutive elements are the same and `True` when they + differ. + + For unsigned integer arrays, the results will also be unsigned. This + should not be surprising, as the result is consistent with + calculating the difference directly: + + >>> u8_arr = np.array([1, 0], dtype=np.uint8) + >>> np.diff(u8_arr) + array([255], dtype=uint8) + >>> u8_arr[1,...] - u8_arr[0,...] + 255 + + If this is not desirable, then the array should be cast to a larger + integer type first: + + >>> i16_arr = u8_arr.astype(np.int16) + >>> np.diff(i16_arr) + array([-1], dtype=int16) + + Examples + -------- + >>> x = np.array([1, 2, 4, 7, 0]) + >>> np.diff(x) + array([ 1, 2, 3, -7]) + >>> np.diff(x, n=2) + array([ 1, 1, -10]) + + >>> x = np.array([[1, 3, 6, 10], [0, 5, 6, 8]]) + >>> np.diff(x) + array([[2, 3, 4], + [5, 1, 2]]) + >>> np.diff(x, axis=0) + array([[-1, 2, 0, -2]]) + + >>> x = np.arange('1066-10-13', '1066-10-16', dtype=np.datetime64) + >>> np.diff(x) + array([1, 1], dtype='timedelta64[D]') + + """ + if n == 0: + return a + if n < 0: + raise ValueError( + "order must be non-negative but got " + repr(n)) + + a = asanyarray(a) + nd = a.ndim + if nd == 0: + raise ValueError("diff requires input that is at least one dimensional") + axis = normalize_axis_index(axis, nd) + + combined = [] + if prepend is not np._NoValue: + prepend = np.asanyarray(prepend) + if prepend.ndim == 0: + shape = list(a.shape) + shape[axis] = 1 + prepend = np.broadcast_to(prepend, tuple(shape)) + combined.append(prepend) + + combined.append(a) + + if append is not np._NoValue: + append = np.asanyarray(append) + if append.ndim == 0: + shape = list(a.shape) + shape[axis] = 1 + append = np.broadcast_to(append, tuple(shape)) + combined.append(append) + + if len(combined) > 1: + a = np.concatenate(combined, axis) + + slice1 = [slice(None)] * nd + slice2 = [slice(None)] * nd + slice1[axis] = slice(1, None) + slice2[axis] = slice(None, -1) + slice1 = tuple(slice1) + slice2 = tuple(slice2) + + op = not_equal if a.dtype == np.bool_ else subtract + for _ in range(n): + a = op(a[slice1], a[slice2]) + + return a + + +def _interp_dispatcher(x, xp, fp, left=None, right=None, period=None): + return (x, xp, fp) + + +@array_function_dispatch(_interp_dispatcher) +def interp(x, xp, fp, left=None, right=None, period=None): + """ + One-dimensional linear interpolation for monotonically increasing sample points. + + Returns the one-dimensional piecewise linear interpolant to a function + with given discrete data points (`xp`, `fp`), evaluated at `x`. + + Parameters + ---------- + x : array_like + The x-coordinates at which to evaluate the interpolated values. + + xp : 1-D sequence of floats + The x-coordinates of the data points, must be increasing if argument + `period` is not specified. Otherwise, `xp` is internally sorted after + normalizing the periodic boundaries with ``xp = xp % period``. + + fp : 1-D sequence of float or complex + The y-coordinates of the data points, same length as `xp`. + + left : optional float or complex corresponding to fp + Value to return for `x < xp[0]`, default is `fp[0]`. + + right : optional float or complex corresponding to fp + Value to return for `x > xp[-1]`, default is `fp[-1]`. + + period : None or float, optional + A period for the x-coordinates. This parameter allows the proper + interpolation of angular x-coordinates. Parameters `left` and `right` + are ignored if `period` is specified. + + .. versionadded:: 1.10.0 + + Returns + ------- + y : float or complex (corresponding to fp) or ndarray + The interpolated values, same shape as `x`. + + Raises + ------ + ValueError + If `xp` and `fp` have different length + If `xp` or `fp` are not 1-D sequences + If `period == 0` + + See Also + -------- + scipy.interpolate + + Warnings + -------- + The x-coordinate sequence is expected to be increasing, but this is not + explicitly enforced. However, if the sequence `xp` is non-increasing, + interpolation results are meaningless. + + Note that, since NaN is unsortable, `xp` also cannot contain NaNs. + + A simple check for `xp` being strictly increasing is:: + + np.all(np.diff(xp) > 0) + + Examples + -------- + >>> xp = [1, 2, 3] + >>> fp = [3, 2, 0] + >>> np.interp(2.5, xp, fp) + 1.0 + >>> np.interp([0, 1, 1.5, 2.72, 3.14], xp, fp) + array([3. , 3. , 2.5 , 0.56, 0. ]) + >>> UNDEF = -99.0 + >>> np.interp(3.14, xp, fp, right=UNDEF) + -99.0 + + Plot an interpolant to the sine function: + + >>> x = np.linspace(0, 2*np.pi, 10) + >>> y = np.sin(x) + >>> xvals = np.linspace(0, 2*np.pi, 50) + >>> yinterp = np.interp(xvals, x, y) + >>> import matplotlib.pyplot as plt + >>> plt.plot(x, y, 'o') + [] + >>> plt.plot(xvals, yinterp, '-x') + [] + >>> plt.show() + + Interpolation with periodic x-coordinates: + + >>> x = [-180, -170, -185, 185, -10, -5, 0, 365] + >>> xp = [190, -190, 350, -350] + >>> fp = [5, 10, 3, 4] + >>> np.interp(x, xp, fp, period=360) + array([7.5 , 5. , 8.75, 6.25, 3. , 3.25, 3.5 , 3.75]) + + Complex interpolation: + + >>> x = [1.5, 4.0] + >>> xp = [2,3,5] + >>> fp = [1.0j, 0, 2+3j] + >>> np.interp(x, xp, fp) + array([0.+1.j , 1.+1.5j]) + + """ + + fp = np.asarray(fp) + + if np.iscomplexobj(fp): + interp_func = compiled_interp_complex + input_dtype = np.complex128 + else: + interp_func = compiled_interp + input_dtype = np.float64 + + if period is not None: + if period == 0: + raise ValueError("period must be a non-zero value") + period = abs(period) + left = None + right = None + + x = np.asarray(x, dtype=np.float64) + xp = np.asarray(xp, dtype=np.float64) + fp = np.asarray(fp, dtype=input_dtype) + + if xp.ndim != 1 or fp.ndim != 1: + raise ValueError("Data points must be 1-D sequences") + if xp.shape[0] != fp.shape[0]: + raise ValueError("fp and xp are not of the same length") + # normalizing periodic boundaries + x = x % period + xp = xp % period + asort_xp = np.argsort(xp) + xp = xp[asort_xp] + fp = fp[asort_xp] + xp = np.concatenate((xp[-1:]-period, xp, xp[0:1]+period)) + fp = np.concatenate((fp[-1:], fp, fp[0:1])) + + return interp_func(x, xp, fp, left, right) + + +def _angle_dispatcher(z, deg=None): + return (z,) + + +@array_function_dispatch(_angle_dispatcher) +def angle(z, deg=False): + """ + Return the angle of the complex argument. + + Parameters + ---------- + z : array_like + A complex number or sequence of complex numbers. + deg : bool, optional + Return angle in degrees if True, radians if False (default). + + Returns + ------- + angle : ndarray or scalar + The counterclockwise angle from the positive real axis on the complex + plane in the range ``(-pi, pi]``, with dtype as numpy.float64. + + .. versionchanged:: 1.16.0 + This function works on subclasses of ndarray like `ma.array`. + + See Also + -------- + arctan2 + absolute + + Notes + ----- + Although the angle of the complex number 0 is undefined, ``numpy.angle(0)`` + returns the value 0. + + Examples + -------- + >>> np.angle([1.0, 1.0j, 1+1j]) # in radians + array([ 0. , 1.57079633, 0.78539816]) # may vary + >>> np.angle(1+1j, deg=True) # in degrees + 45.0 + + """ + z = asanyarray(z) + if issubclass(z.dtype.type, _nx.complexfloating): + zimag = z.imag + zreal = z.real + else: + zimag = 0 + zreal = z + + a = arctan2(zimag, zreal) + if deg: + a *= 180/pi + return a + + +def _unwrap_dispatcher(p, discont=None, axis=None, *, period=None): + return (p,) + + +@array_function_dispatch(_unwrap_dispatcher) +def unwrap(p, discont=None, axis=-1, *, period=2*pi): + r""" + Unwrap by taking the complement of large deltas with respect to the period. + + This unwraps a signal `p` by changing elements which have an absolute + difference from their predecessor of more than ``max(discont, period/2)`` + to their `period`-complementary values. + + For the default case where `period` is :math:`2\pi` and `discont` is + :math:`\pi`, this unwraps a radian phase `p` such that adjacent differences + are never greater than :math:`\pi` by adding :math:`2k\pi` for some + integer :math:`k`. + + Parameters + ---------- + p : array_like + Input array. + discont : float, optional + Maximum discontinuity between values, default is ``period/2``. + Values below ``period/2`` are treated as if they were ``period/2``. + To have an effect different from the default, `discont` should be + larger than ``period/2``. + axis : int, optional + Axis along which unwrap will operate, default is the last axis. + period: float, optional + Size of the range over which the input wraps. By default, it is + ``2 pi``. + + .. versionadded:: 1.21.0 + + Returns + ------- + out : ndarray + Output array. + + See Also + -------- + rad2deg, deg2rad + + Notes + ----- + If the discontinuity in `p` is smaller than ``period/2``, + but larger than `discont`, no unwrapping is done because taking + the complement would only make the discontinuity larger. + + Examples + -------- + >>> phase = np.linspace(0, np.pi, num=5) + >>> phase[3:] += np.pi + >>> phase + array([ 0. , 0.78539816, 1.57079633, 5.49778714, 6.28318531]) # may vary + >>> np.unwrap(phase) + array([ 0. , 0.78539816, 1.57079633, -0.78539816, 0. ]) # may vary + >>> np.unwrap([0, 1, 2, -1, 0], period=4) + array([0, 1, 2, 3, 4]) + >>> np.unwrap([ 1, 2, 3, 4, 5, 6, 1, 2, 3], period=6) + array([1, 2, 3, 4, 5, 6, 7, 8, 9]) + >>> np.unwrap([2, 3, 4, 5, 2, 3, 4, 5], period=4) + array([2, 3, 4, 5, 6, 7, 8, 9]) + >>> phase_deg = np.mod(np.linspace(0 ,720, 19), 360) - 180 + >>> np.unwrap(phase_deg, period=360) + array([-180., -140., -100., -60., -20., 20., 60., 100., 140., + 180., 220., 260., 300., 340., 380., 420., 460., 500., + 540.]) + """ + p = asarray(p) + nd = p.ndim + dd = diff(p, axis=axis) + if discont is None: + discont = period/2 + slice1 = [slice(None, None)]*nd # full slices + slice1[axis] = slice(1, None) + slice1 = tuple(slice1) + dtype = np.result_type(dd, period) + if _nx.issubdtype(dtype, _nx.integer): + interval_high, rem = divmod(period, 2) + boundary_ambiguous = rem == 0 + else: + interval_high = period / 2 + boundary_ambiguous = True + interval_low = -interval_high + ddmod = mod(dd - interval_low, period) + interval_low + if boundary_ambiguous: + # for `mask = (abs(dd) == period/2)`, the above line made + # `ddmod[mask] == -period/2`. correct these such that + # `ddmod[mask] == sign(dd[mask])*period/2`. + _nx.copyto(ddmod, interval_high, + where=(ddmod == interval_low) & (dd > 0)) + ph_correct = ddmod - dd + _nx.copyto(ph_correct, 0, where=abs(dd) < discont) + up = array(p, copy=True, dtype=dtype) + up[slice1] = p[slice1] + ph_correct.cumsum(axis) + return up + + +def _sort_complex(a): + return (a,) + + +@array_function_dispatch(_sort_complex) +def sort_complex(a): + """ + Sort a complex array using the real part first, then the imaginary part. + + Parameters + ---------- + a : array_like + Input array + + Returns + ------- + out : complex ndarray + Always returns a sorted complex array. + + Examples + -------- + >>> np.sort_complex([5, 3, 6, 2, 1]) + array([1.+0.j, 2.+0.j, 3.+0.j, 5.+0.j, 6.+0.j]) + + >>> np.sort_complex([1 + 2j, 2 - 1j, 3 - 2j, 3 - 3j, 3 + 5j]) + array([1.+2.j, 2.-1.j, 3.-3.j, 3.-2.j, 3.+5.j]) + + """ + b = array(a, copy=True) + b.sort() + if not issubclass(b.dtype.type, _nx.complexfloating): + if b.dtype.char in 'bhBH': + return b.astype('F') + elif b.dtype.char == 'g': + return b.astype('G') + else: + return b.astype('D') + else: + return b + + +def _trim_zeros(filt, trim=None): + return (filt,) + + +@array_function_dispatch(_trim_zeros) +def trim_zeros(filt, trim='fb'): + """ + Trim the leading and/or trailing zeros from a 1-D array or sequence. + + Parameters + ---------- + filt : 1-D array or sequence + Input array. + trim : str, optional + A string with 'f' representing trim from front and 'b' to trim from + back. Default is 'fb', trim zeros from both front and back of the + array. + + Returns + ------- + trimmed : 1-D array or sequence + The result of trimming the input. The input data type is preserved. + + Examples + -------- + >>> a = np.array((0, 0, 0, 1, 2, 3, 0, 2, 1, 0)) + >>> np.trim_zeros(a) + array([1, 2, 3, 0, 2, 1]) + + >>> np.trim_zeros(a, 'b') + array([0, 0, 0, ..., 0, 2, 1]) + + The input data type is preserved, list/tuple in means list/tuple out. + + >>> np.trim_zeros([0, 1, 2, 0]) + [1, 2] + + """ + + first = 0 + trim = trim.upper() + if 'F' in trim: + for i in filt: + if i != 0.: + break + else: + first = first + 1 + last = len(filt) + if 'B' in trim: + for i in filt[::-1]: + if i != 0.: + break + else: + last = last - 1 + return filt[first:last] + + +def _extract_dispatcher(condition, arr): + return (condition, arr) + + +@array_function_dispatch(_extract_dispatcher) +def extract(condition, arr): + """ + Return the elements of an array that satisfy some condition. + + This is equivalent to ``np.compress(ravel(condition), ravel(arr))``. If + `condition` is boolean ``np.extract`` is equivalent to ``arr[condition]``. + + Note that `place` does the exact opposite of `extract`. + + Parameters + ---------- + condition : array_like + An array whose nonzero or True entries indicate the elements of `arr` + to extract. + arr : array_like + Input array of the same size as `condition`. + + Returns + ------- + extract : ndarray + Rank 1 array of values from `arr` where `condition` is True. + + See Also + -------- + take, put, copyto, compress, place + + Examples + -------- + >>> arr = np.arange(12).reshape((3, 4)) + >>> arr + array([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + >>> condition = np.mod(arr, 3)==0 + >>> condition + array([[ True, False, False, True], + [False, False, True, False], + [False, True, False, False]]) + >>> np.extract(condition, arr) + array([0, 3, 6, 9]) + + + If `condition` is boolean: + + >>> arr[condition] + array([0, 3, 6, 9]) + + """ + return _nx.take(ravel(arr), nonzero(ravel(condition))[0]) + + +def _place_dispatcher(arr, mask, vals): + return (arr, mask, vals) + + +@array_function_dispatch(_place_dispatcher) +def place(arr, mask, vals): + """ + Change elements of an array based on conditional and input values. + + Similar to ``np.copyto(arr, vals, where=mask)``, the difference is that + `place` uses the first N elements of `vals`, where N is the number of + True values in `mask`, while `copyto` uses the elements where `mask` + is True. + + Note that `extract` does the exact opposite of `place`. + + Parameters + ---------- + arr : ndarray + Array to put data into. + mask : array_like + Boolean mask array. Must have the same size as `a`. + vals : 1-D sequence + Values to put into `a`. Only the first N elements are used, where + N is the number of True values in `mask`. If `vals` is smaller + than N, it will be repeated, and if elements of `a` are to be masked, + this sequence must be non-empty. + + See Also + -------- + copyto, put, take, extract + + Examples + -------- + >>> arr = np.arange(6).reshape(2, 3) + >>> np.place(arr, arr>2, [44, 55]) + >>> arr + array([[ 0, 1, 2], + [44, 55, 44]]) + + """ + if not isinstance(arr, np.ndarray): + raise TypeError("argument 1 must be numpy.ndarray, " + "not {name}".format(name=type(arr).__name__)) + + return _insert(arr, mask, vals) + + +def disp(mesg, device=None, linefeed=True): + """ + Display a message on a device. + + Parameters + ---------- + mesg : str + Message to display. + device : object + Device to write message. If None, defaults to ``sys.stdout`` which is + very similar to ``print``. `device` needs to have ``write()`` and + ``flush()`` methods. + linefeed : bool, optional + Option whether to print a line feed or not. Defaults to True. + + Raises + ------ + AttributeError + If `device` does not have a ``write()`` or ``flush()`` method. + + Examples + -------- + Besides ``sys.stdout``, a file-like object can also be used as it has + both required methods: + + >>> from io import StringIO + >>> buf = StringIO() + >>> np.disp(u'"Display" in a file', device=buf) + >>> buf.getvalue() + '"Display" in a file\\n' + + """ + if device is None: + device = sys.stdout + if linefeed: + device.write('%s\n' % mesg) + else: + device.write('%s' % mesg) + device.flush() + return + + +# See https://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html +_DIMENSION_NAME = r'\w+' +_CORE_DIMENSION_LIST = '(?:{0:}(?:,{0:})*)?'.format(_DIMENSION_NAME) +_ARGUMENT = r'\({}\)'.format(_CORE_DIMENSION_LIST) +_ARGUMENT_LIST = '{0:}(?:,{0:})*'.format(_ARGUMENT) +_SIGNATURE = '^{0:}->{0:}$'.format(_ARGUMENT_LIST) + + +def _parse_gufunc_signature(signature): + """ + Parse string signatures for a generalized universal function. + + Arguments + --------- + signature : string + Generalized universal function signature, e.g., ``(m,n),(n,p)->(m,p)`` + for ``np.matmul``. + + Returns + ------- + Tuple of input and output core dimensions parsed from the signature, each + of the form List[Tuple[str, ...]]. + """ + signature = re.sub(r'\s+', '', signature) + + if not re.match(_SIGNATURE, signature): + raise ValueError( + 'not a valid gufunc signature: {}'.format(signature)) + return tuple([tuple(re.findall(_DIMENSION_NAME, arg)) + for arg in re.findall(_ARGUMENT, arg_list)] + for arg_list in signature.split('->')) + + +def _update_dim_sizes(dim_sizes, arg, core_dims): + """ + Incrementally check and update core dimension sizes for a single argument. + + Arguments + --------- + dim_sizes : Dict[str, int] + Sizes of existing core dimensions. Will be updated in-place. + arg : ndarray + Argument to examine. + core_dims : Tuple[str, ...] + Core dimensions for this argument. + """ + if not core_dims: + return + + num_core_dims = len(core_dims) + if arg.ndim < num_core_dims: + raise ValueError( + '%d-dimensional argument does not have enough ' + 'dimensions for all core dimensions %r' + % (arg.ndim, core_dims)) + + core_shape = arg.shape[-num_core_dims:] + for dim, size in zip(core_dims, core_shape): + if dim in dim_sizes: + if size != dim_sizes[dim]: + raise ValueError( + 'inconsistent size for core dimension %r: %r vs %r' + % (dim, size, dim_sizes[dim])) + else: + dim_sizes[dim] = size + + +def _parse_input_dimensions(args, input_core_dims): + """ + Parse broadcast and core dimensions for vectorize with a signature. + + Arguments + --------- + args : Tuple[ndarray, ...] + Tuple of input arguments to examine. + input_core_dims : List[Tuple[str, ...]] + List of core dimensions corresponding to each input. + + Returns + ------- + broadcast_shape : Tuple[int, ...] + Common shape to broadcast all non-core dimensions to. + dim_sizes : Dict[str, int] + Common sizes for named core dimensions. + """ + broadcast_args = [] + dim_sizes = {} + for arg, core_dims in zip(args, input_core_dims): + _update_dim_sizes(dim_sizes, arg, core_dims) + ndim = arg.ndim - len(core_dims) + dummy_array = np.lib.stride_tricks.as_strided(0, arg.shape[:ndim]) + broadcast_args.append(dummy_array) + broadcast_shape = np.lib.stride_tricks._broadcast_shape(*broadcast_args) + return broadcast_shape, dim_sizes + + +def _calculate_shapes(broadcast_shape, dim_sizes, list_of_core_dims): + """Helper for calculating broadcast shapes with core dimensions.""" + return [broadcast_shape + tuple(dim_sizes[dim] for dim in core_dims) + for core_dims in list_of_core_dims] + + +def _create_arrays(broadcast_shape, dim_sizes, list_of_core_dims, dtypes, + results=None): + """Helper for creating output arrays in vectorize.""" + shapes = _calculate_shapes(broadcast_shape, dim_sizes, list_of_core_dims) + if dtypes is None: + dtypes = [None] * len(shapes) + if results is None: + arrays = tuple(np.empty(shape=shape, dtype=dtype) + for shape, dtype in zip(shapes, dtypes)) + else: + arrays = tuple(np.empty_like(result, shape=shape, dtype=dtype) + for result, shape, dtype + in zip(results, shapes, dtypes)) + return arrays + + +@set_module('numpy') +class vectorize: + """ + vectorize(pyfunc, otypes=None, doc=None, excluded=None, cache=False, + signature=None) + + Generalized function class. + + Define a vectorized function which takes a nested sequence of objects or + numpy arrays as inputs and returns a single numpy array or a tuple of numpy + arrays. The vectorized function evaluates `pyfunc` over successive tuples + of the input arrays like the python map function, except it uses the + broadcasting rules of numpy. + + The data type of the output of `vectorized` is determined by calling + the function with the first element of the input. This can be avoided + by specifying the `otypes` argument. + + Parameters + ---------- + pyfunc : callable + A python function or method. + otypes : str or list of dtypes, optional + The output data type. It must be specified as either a string of + typecode characters or a list of data type specifiers. There should + be one data type specifier for each output. + doc : str, optional + The docstring for the function. If None, the docstring will be the + ``pyfunc.__doc__``. + excluded : set, optional + Set of strings or integers representing the positional or keyword + arguments for which the function will not be vectorized. These will be + passed directly to `pyfunc` unmodified. + + .. versionadded:: 1.7.0 + + cache : bool, optional + If `True`, then cache the first function call that determines the number + of outputs if `otypes` is not provided. + + .. versionadded:: 1.7.0 + + signature : string, optional + Generalized universal function signature, e.g., ``(m,n),(n)->(m)`` for + vectorized matrix-vector multiplication. If provided, ``pyfunc`` will + be called with (and expected to return) arrays with shapes given by the + size of corresponding core dimensions. By default, ``pyfunc`` is + assumed to take scalars as input and output. + + .. versionadded:: 1.12.0 + + Returns + ------- + vectorized : callable + Vectorized function. + + See Also + -------- + frompyfunc : Takes an arbitrary Python function and returns a ufunc + + Notes + ----- + The `vectorize` function is provided primarily for convenience, not for + performance. The implementation is essentially a for loop. + + If `otypes` is not specified, then a call to the function with the + first argument will be used to determine the number of outputs. The + results of this call will be cached if `cache` is `True` to prevent + calling the function twice. However, to implement the cache, the + original function must be wrapped which will slow down subsequent + calls, so only do this if your function is expensive. + + The new keyword argument interface and `excluded` argument support + further degrades performance. + + References + ---------- + .. [1] :doc:`/reference/c-api/generalized-ufuncs` + + Examples + -------- + >>> def myfunc(a, b): + ... "Return a-b if a>b, otherwise return a+b" + ... if a > b: + ... return a - b + ... else: + ... return a + b + + >>> vfunc = np.vectorize(myfunc) + >>> vfunc([1, 2, 3, 4], 2) + array([3, 4, 1, 2]) + + The docstring is taken from the input function to `vectorize` unless it + is specified: + + >>> vfunc.__doc__ + 'Return a-b if a>b, otherwise return a+b' + >>> vfunc = np.vectorize(myfunc, doc='Vectorized `myfunc`') + >>> vfunc.__doc__ + 'Vectorized `myfunc`' + + The output type is determined by evaluating the first element of the input, + unless it is specified: + + >>> out = vfunc([1, 2, 3, 4], 2) + >>> type(out[0]) + + >>> vfunc = np.vectorize(myfunc, otypes=[float]) + >>> out = vfunc([1, 2, 3, 4], 2) + >>> type(out[0]) + + + The `excluded` argument can be used to prevent vectorizing over certain + arguments. This can be useful for array-like arguments of a fixed length + such as the coefficients for a polynomial as in `polyval`: + + >>> def mypolyval(p, x): + ... _p = list(p) + ... res = _p.pop(0) + ... while _p: + ... res = res*x + _p.pop(0) + ... return res + >>> vpolyval = np.vectorize(mypolyval, excluded=['p']) + >>> vpolyval(p=[1, 2, 3], x=[0, 1]) + array([3, 6]) + + Positional arguments may also be excluded by specifying their position: + + >>> vpolyval.excluded.add(0) + >>> vpolyval([1, 2, 3], x=[0, 1]) + array([3, 6]) + + The `signature` argument allows for vectorizing functions that act on + non-scalar arrays of fixed length. For example, you can use it for a + vectorized calculation of Pearson correlation coefficient and its p-value: + + >>> import scipy.stats + >>> pearsonr = np.vectorize(scipy.stats.pearsonr, + ... signature='(n),(n)->(),()') + >>> pearsonr([[0, 1, 2, 3]], [[1, 2, 3, 4], [4, 3, 2, 1]]) + (array([ 1., -1.]), array([ 0., 0.])) + + Or for a vectorized convolution: + + >>> convolve = np.vectorize(np.convolve, signature='(n),(m)->(k)') + >>> convolve(np.eye(4), [1, 2, 1]) + array([[1., 2., 1., 0., 0., 0.], + [0., 1., 2., 1., 0., 0.], + [0., 0., 1., 2., 1., 0.], + [0., 0., 0., 1., 2., 1.]]) + + """ + def __init__(self, pyfunc, otypes=None, doc=None, excluded=None, + cache=False, signature=None): + self.pyfunc = pyfunc + self.cache = cache + self.signature = signature + self._ufunc = {} # Caching to improve default performance + + if doc is None: + self.__doc__ = pyfunc.__doc__ + else: + self.__doc__ = doc + + if isinstance(otypes, str): + for char in otypes: + if char not in typecodes['All']: + raise ValueError("Invalid otype specified: %s" % (char,)) + elif iterable(otypes): + otypes = ''.join([_nx.dtype(x).char for x in otypes]) + elif otypes is not None: + raise ValueError("Invalid otype specification") + self.otypes = otypes + + # Excluded variable support + if excluded is None: + excluded = set() + self.excluded = set(excluded) + + if signature is not None: + self._in_and_out_core_dims = _parse_gufunc_signature(signature) + else: + self._in_and_out_core_dims = None + + def __call__(self, *args, **kwargs): + """ + Return arrays with the results of `pyfunc` broadcast (vectorized) over + `args` and `kwargs` not in `excluded`. + """ + excluded = self.excluded + if not kwargs and not excluded: + func = self.pyfunc + vargs = args + else: + # The wrapper accepts only positional arguments: we use `names` and + # `inds` to mutate `the_args` and `kwargs` to pass to the original + # function. + nargs = len(args) + + names = [_n for _n in kwargs if _n not in excluded] + inds = [_i for _i in range(nargs) if _i not in excluded] + the_args = list(args) + + def func(*vargs): + for _n, _i in enumerate(inds): + the_args[_i] = vargs[_n] + kwargs.update(zip(names, vargs[len(inds):])) + return self.pyfunc(*the_args, **kwargs) + + vargs = [args[_i] for _i in inds] + vargs.extend([kwargs[_n] for _n in names]) + + return self._vectorize_call(func=func, args=vargs) + + def _get_ufunc_and_otypes(self, func, args): + """Return (ufunc, otypes).""" + # frompyfunc will fail if args is empty + if not args: + raise ValueError('args can not be empty') + + if self.otypes is not None: + otypes = self.otypes + + # self._ufunc is a dictionary whose keys are the number of + # arguments (i.e. len(args)) and whose values are ufuncs created + # by frompyfunc. len(args) can be different for different calls if + # self.pyfunc has parameters with default values. We only use the + # cache when func is self.pyfunc, which occurs when the call uses + # only positional arguments and no arguments are excluded. + + nin = len(args) + nout = len(self.otypes) + if func is not self.pyfunc or nin not in self._ufunc: + ufunc = frompyfunc(func, nin, nout) + else: + ufunc = None # We'll get it from self._ufunc + if func is self.pyfunc: + ufunc = self._ufunc.setdefault(nin, ufunc) + else: + # Get number of outputs and output types by calling the function on + # the first entries of args. We also cache the result to prevent + # the subsequent call when the ufunc is evaluated. + # Assumes that ufunc first evaluates the 0th elements in the input + # arrays (the input values are not checked to ensure this) + args = [asarray(arg) for arg in args] + if builtins.any(arg.size == 0 for arg in args): + raise ValueError('cannot call `vectorize` on size 0 inputs ' + 'unless `otypes` is set') + + inputs = [arg.flat[0] for arg in args] + outputs = func(*inputs) + + # Performance note: profiling indicates that -- for simple + # functions at least -- this wrapping can almost double the + # execution time. + # Hence we make it optional. + if self.cache: + _cache = [outputs] + + def _func(*vargs): + if _cache: + return _cache.pop() + else: + return func(*vargs) + else: + _func = func + + if isinstance(outputs, tuple): + nout = len(outputs) + else: + nout = 1 + outputs = (outputs,) + + otypes = ''.join([asarray(outputs[_k]).dtype.char + for _k in range(nout)]) + + # Performance note: profiling indicates that creating the ufunc is + # not a significant cost compared with wrapping so it seems not + # worth trying to cache this. + ufunc = frompyfunc(_func, len(args), nout) + + return ufunc, otypes + + def _vectorize_call(self, func, args): + """Vectorized call to `func` over positional `args`.""" + if self.signature is not None: + res = self._vectorize_call_with_signature(func, args) + elif not args: + res = func() + else: + ufunc, otypes = self._get_ufunc_and_otypes(func=func, args=args) + + # Convert args to object arrays first + inputs = [asanyarray(a, dtype=object) for a in args] + + outputs = ufunc(*inputs) + + if ufunc.nout == 1: + res = asanyarray(outputs, dtype=otypes[0]) + else: + res = tuple([asanyarray(x, dtype=t) + for x, t in zip(outputs, otypes)]) + return res + + def _vectorize_call_with_signature(self, func, args): + """Vectorized call over positional arguments with a signature.""" + input_core_dims, output_core_dims = self._in_and_out_core_dims + + if len(args) != len(input_core_dims): + raise TypeError('wrong number of positional arguments: ' + 'expected %r, got %r' + % (len(input_core_dims), len(args))) + args = tuple(asanyarray(arg) for arg in args) + + broadcast_shape, dim_sizes = _parse_input_dimensions( + args, input_core_dims) + input_shapes = _calculate_shapes(broadcast_shape, dim_sizes, + input_core_dims) + args = [np.broadcast_to(arg, shape, subok=True) + for arg, shape in zip(args, input_shapes)] + + outputs = None + otypes = self.otypes + nout = len(output_core_dims) + + for index in np.ndindex(*broadcast_shape): + results = func(*(arg[index] for arg in args)) + + n_results = len(results) if isinstance(results, tuple) else 1 + + if nout != n_results: + raise ValueError( + 'wrong number of outputs from pyfunc: expected %r, got %r' + % (nout, n_results)) + + if nout == 1: + results = (results,) + + if outputs is None: + for result, core_dims in zip(results, output_core_dims): + _update_dim_sizes(dim_sizes, result, core_dims) + + outputs = _create_arrays(broadcast_shape, dim_sizes, + output_core_dims, otypes, results) + + for output, result in zip(outputs, results): + output[index] = result + + if outputs is None: + # did not call the function even once + if otypes is None: + raise ValueError('cannot call `vectorize` on size 0 inputs ' + 'unless `otypes` is set') + if builtins.any(dim not in dim_sizes + for dims in output_core_dims + for dim in dims): + raise ValueError('cannot call `vectorize` with a signature ' + 'including new output dimensions on size 0 ' + 'inputs') + outputs = _create_arrays(broadcast_shape, dim_sizes, + output_core_dims, otypes) + + return outputs[0] if nout == 1 else outputs + + +def _cov_dispatcher(m, y=None, rowvar=None, bias=None, ddof=None, + fweights=None, aweights=None, *, dtype=None): + return (m, y, fweights, aweights) + + +@array_function_dispatch(_cov_dispatcher) +def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, + aweights=None, *, dtype=None): + """ + Estimate a covariance matrix, given data and weights. + + Covariance indicates the level to which two variables vary together. + If we examine N-dimensional samples, :math:`X = [x_1, x_2, ... x_N]^T`, + then the covariance matrix element :math:`C_{ij}` is the covariance of + :math:`x_i` and :math:`x_j`. The element :math:`C_{ii}` is the variance + of :math:`x_i`. + + See the notes for an outline of the algorithm. + + Parameters + ---------- + m : array_like + A 1-D or 2-D array containing multiple variables and observations. + Each row of `m` represents a variable, and each column a single + observation of all those variables. Also see `rowvar` below. + y : array_like, optional + An additional set of variables and observations. `y` has the same form + as that of `m`. + rowvar : bool, optional + If `rowvar` is True (default), then each row represents a + variable, with observations in the columns. Otherwise, the relationship + is transposed: each column represents a variable, while the rows + contain observations. + bias : bool, optional + Default normalization (False) is by ``(N - 1)``, where ``N`` is the + number of observations given (unbiased estimate). If `bias` is True, + then normalization is by ``N``. These values can be overridden by using + the keyword ``ddof`` in numpy versions >= 1.5. + ddof : int, optional + If not ``None`` the default value implied by `bias` is overridden. + Note that ``ddof=1`` will return the unbiased estimate, even if both + `fweights` and `aweights` are specified, and ``ddof=0`` will return + the simple average. See the notes for the details. The default value + is ``None``. + + .. versionadded:: 1.5 + fweights : array_like, int, optional + 1-D array of integer frequency weights; the number of times each + observation vector should be repeated. + + .. versionadded:: 1.10 + aweights : array_like, optional + 1-D array of observation vector weights. These relative weights are + typically large for observations considered "important" and smaller for + observations considered less "important". If ``ddof=0`` the array of + weights can be used to assign probabilities to observation vectors. + + .. versionadded:: 1.10 + dtype : data-type, optional + Data-type of the result. By default, the return data-type will have + at least `numpy.float64` precision. + + .. versionadded:: 1.20 + + Returns + ------- + out : ndarray + The covariance matrix of the variables. + + See Also + -------- + corrcoef : Normalized covariance matrix + + Notes + ----- + Assume that the observations are in the columns of the observation + array `m` and let ``f = fweights`` and ``a = aweights`` for brevity. The + steps to compute the weighted covariance are as follows:: + + >>> m = np.arange(10, dtype=np.float64) + >>> f = np.arange(10) * 2 + >>> a = np.arange(10) ** 2. + >>> ddof = 1 + >>> w = f * a + >>> v1 = np.sum(w) + >>> v2 = np.sum(w * a) + >>> m -= np.sum(m * w, axis=None, keepdims=True) / v1 + >>> cov = np.dot(m * w, m.T) * v1 / (v1**2 - ddof * v2) + + Note that when ``a == 1``, the normalization factor + ``v1 / (v1**2 - ddof * v2)`` goes over to ``1 / (np.sum(f) - ddof)`` + as it should. + + Examples + -------- + Consider two variables, :math:`x_0` and :math:`x_1`, which + correlate perfectly, but in opposite directions: + + >>> x = np.array([[0, 2], [1, 1], [2, 0]]).T + >>> x + array([[0, 1, 2], + [2, 1, 0]]) + + Note how :math:`x_0` increases while :math:`x_1` decreases. The covariance + matrix shows this clearly: + + >>> np.cov(x) + array([[ 1., -1.], + [-1., 1.]]) + + Note that element :math:`C_{0,1}`, which shows the correlation between + :math:`x_0` and :math:`x_1`, is negative. + + Further, note how `x` and `y` are combined: + + >>> x = [-2.1, -1, 4.3] + >>> y = [3, 1.1, 0.12] + >>> X = np.stack((x, y), axis=0) + >>> np.cov(X) + array([[11.71 , -4.286 ], # may vary + [-4.286 , 2.144133]]) + >>> np.cov(x, y) + array([[11.71 , -4.286 ], # may vary + [-4.286 , 2.144133]]) + >>> np.cov(x) + array(11.71) + + """ + # Check inputs + if ddof is not None and ddof != int(ddof): + raise ValueError( + "ddof must be integer") + + # Handles complex arrays too + m = np.asarray(m) + if m.ndim > 2: + raise ValueError("m has more than 2 dimensions") + + if y is not None: + y = np.asarray(y) + if y.ndim > 2: + raise ValueError("y has more than 2 dimensions") + + if dtype is None: + if y is None: + dtype = np.result_type(m, np.float64) + else: + dtype = np.result_type(m, y, np.float64) + + X = array(m, ndmin=2, dtype=dtype) + if not rowvar and X.shape[0] != 1: + X = X.T + if X.shape[0] == 0: + return np.array([]).reshape(0, 0) + if y is not None: + y = array(y, copy=False, ndmin=2, dtype=dtype) + if not rowvar and y.shape[0] != 1: + y = y.T + X = np.concatenate((X, y), axis=0) + + if ddof is None: + if bias == 0: + ddof = 1 + else: + ddof = 0 + + # Get the product of frequencies and weights + w = None + if fweights is not None: + fweights = np.asarray(fweights, dtype=float) + if not np.all(fweights == np.around(fweights)): + raise TypeError( + "fweights must be integer") + if fweights.ndim > 1: + raise RuntimeError( + "cannot handle multidimensional fweights") + if fweights.shape[0] != X.shape[1]: + raise RuntimeError( + "incompatible numbers of samples and fweights") + if any(fweights < 0): + raise ValueError( + "fweights cannot be negative") + w = fweights + if aweights is not None: + aweights = np.asarray(aweights, dtype=float) + if aweights.ndim > 1: + raise RuntimeError( + "cannot handle multidimensional aweights") + if aweights.shape[0] != X.shape[1]: + raise RuntimeError( + "incompatible numbers of samples and aweights") + if any(aweights < 0): + raise ValueError( + "aweights cannot be negative") + if w is None: + w = aweights + else: + w *= aweights + + avg, w_sum = average(X, axis=1, weights=w, returned=True) + w_sum = w_sum[0] + + # Determine the normalization + if w is None: + fact = X.shape[1] - ddof + elif ddof == 0: + fact = w_sum + elif aweights is None: + fact = w_sum - ddof + else: + fact = w_sum - ddof*sum(w*aweights)/w_sum + + if fact <= 0: + warnings.warn("Degrees of freedom <= 0 for slice", + RuntimeWarning, stacklevel=3) + fact = 0.0 + + X -= avg[:, None] + if w is None: + X_T = X.T + else: + X_T = (X*w).T + c = dot(X, X_T.conj()) + c *= np.true_divide(1, fact) + return c.squeeze() + + +def _corrcoef_dispatcher(x, y=None, rowvar=None, bias=None, ddof=None, *, + dtype=None): + return (x, y) + + +@array_function_dispatch(_corrcoef_dispatcher) +def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, ddof=np._NoValue, *, + dtype=None): + """ + Return Pearson product-moment correlation coefficients. + + Please refer to the documentation for `cov` for more detail. The + relationship between the correlation coefficient matrix, `R`, and the + covariance matrix, `C`, is + + .. math:: R_{ij} = \\frac{ C_{ij} } { \\sqrt{ C_{ii} * C_{jj} } } + + The values of `R` are between -1 and 1, inclusive. + + Parameters + ---------- + x : array_like + A 1-D or 2-D array containing multiple variables and observations. + Each row of `x` represents a variable, and each column a single + observation of all those variables. Also see `rowvar` below. + y : array_like, optional + An additional set of variables and observations. `y` has the same + shape as `x`. + rowvar : bool, optional + If `rowvar` is True (default), then each row represents a + variable, with observations in the columns. Otherwise, the relationship + is transposed: each column represents a variable, while the rows + contain observations. + bias : _NoValue, optional + Has no effect, do not use. + + .. deprecated:: 1.10.0 + ddof : _NoValue, optional + Has no effect, do not use. + + .. deprecated:: 1.10.0 + dtype : data-type, optional + Data-type of the result. By default, the return data-type will have + at least `numpy.float64` precision. + + .. versionadded:: 1.20 + + Returns + ------- + R : ndarray + The correlation coefficient matrix of the variables. + + See Also + -------- + cov : Covariance matrix + + Notes + ----- + Due to floating point rounding the resulting array may not be Hermitian, + the diagonal elements may not be 1, and the elements may not satisfy the + inequality abs(a) <= 1. The real and imaginary parts are clipped to the + interval [-1, 1] in an attempt to improve on that situation but is not + much help in the complex case. + + This function accepts but discards arguments `bias` and `ddof`. This is + for backwards compatibility with previous versions of this function. These + arguments had no effect on the return values of the function and can be + safely ignored in this and previous versions of numpy. + + Examples + -------- + In this example we generate two random arrays, ``xarr`` and ``yarr``, and + compute the row-wise and column-wise Pearson correlation coefficients, + ``R``. Since ``rowvar`` is true by default, we first find the row-wise + Pearson correlation coefficients between the variables of ``xarr``. + + >>> import numpy as np + >>> rng = np.random.default_rng(seed=42) + >>> xarr = rng.random((3, 3)) + >>> xarr + array([[0.77395605, 0.43887844, 0.85859792], + [0.69736803, 0.09417735, 0.97562235], + [0.7611397 , 0.78606431, 0.12811363]]) + >>> R1 = np.corrcoef(xarr) + >>> R1 + array([[ 1. , 0.99256089, -0.68080986], + [ 0.99256089, 1. , -0.76492172], + [-0.68080986, -0.76492172, 1. ]]) + + If we add another set of variables and observations ``yarr``, we can + compute the row-wise Pearson correlation coefficients between the + variables in ``xarr`` and ``yarr``. + + >>> yarr = rng.random((3, 3)) + >>> yarr + array([[0.45038594, 0.37079802, 0.92676499], + [0.64386512, 0.82276161, 0.4434142 ], + [0.22723872, 0.55458479, 0.06381726]]) + >>> R2 = np.corrcoef(xarr, yarr) + >>> R2 + array([[ 1. , 0.99256089, -0.68080986, 0.75008178, -0.934284 , + -0.99004057], + [ 0.99256089, 1. , -0.76492172, 0.82502011, -0.97074098, + -0.99981569], + [-0.68080986, -0.76492172, 1. , -0.99507202, 0.89721355, + 0.77714685], + [ 0.75008178, 0.82502011, -0.99507202, 1. , -0.93657855, + -0.83571711], + [-0.934284 , -0.97074098, 0.89721355, -0.93657855, 1. , + 0.97517215], + [-0.99004057, -0.99981569, 0.77714685, -0.83571711, 0.97517215, + 1. ]]) + + Finally if we use the option ``rowvar=False``, the columns are now + being treated as the variables and we will find the column-wise Pearson + correlation coefficients between variables in ``xarr`` and ``yarr``. + + >>> R3 = np.corrcoef(xarr, yarr, rowvar=False) + >>> R3 + array([[ 1. , 0.77598074, -0.47458546, -0.75078643, -0.9665554 , + 0.22423734], + [ 0.77598074, 1. , -0.92346708, -0.99923895, -0.58826587, + -0.44069024], + [-0.47458546, -0.92346708, 1. , 0.93773029, 0.23297648, + 0.75137473], + [-0.75078643, -0.99923895, 0.93773029, 1. , 0.55627469, + 0.47536961], + [-0.9665554 , -0.58826587, 0.23297648, 0.55627469, 1. , + -0.46666491], + [ 0.22423734, -0.44069024, 0.75137473, 0.47536961, -0.46666491, + 1. ]]) + + """ + if bias is not np._NoValue or ddof is not np._NoValue: + # 2015-03-15, 1.10 + warnings.warn('bias and ddof have no effect and are deprecated', + DeprecationWarning, stacklevel=3) + c = cov(x, y, rowvar, dtype=dtype) + try: + d = diag(c) + except ValueError: + # scalar covariance + # nan if incorrect value (nan, inf, 0), 1 otherwise + return c / c + stddev = sqrt(d.real) + c /= stddev[:, None] + c /= stddev[None, :] + + # Clip real and imaginary parts to [-1, 1]. This does not guarantee + # abs(a[i,j]) <= 1 for complex arrays, but is the best we can do without + # excessive work. + np.clip(c.real, -1, 1, out=c.real) + if np.iscomplexobj(c): + np.clip(c.imag, -1, 1, out=c.imag) + + return c + + +@set_module('numpy') +def blackman(M): + """ + Return the Blackman window. + + The Blackman window is a taper formed by using the first three + terms of a summation of cosines. It was designed to have close to the + minimal leakage possible. It is close to optimal, only slightly worse + than a Kaiser window. + + Parameters + ---------- + M : int + Number of points in the output window. If zero or less, an empty + array is returned. + + Returns + ------- + out : ndarray + The window, with the maximum value normalized to one (the value one + appears only if the number of samples is odd). + + See Also + -------- + bartlett, hamming, hanning, kaiser + + Notes + ----- + The Blackman window is defined as + + .. math:: w(n) = 0.42 - 0.5 \\cos(2\\pi n/M) + 0.08 \\cos(4\\pi n/M) + + Most references to the Blackman window come from the signal processing + literature, where it is used as one of many windowing functions for + smoothing values. It is also known as an apodization (which means + "removing the foot", i.e. smoothing discontinuities at the beginning + and end of the sampled signal) or tapering function. It is known as a + "near optimal" tapering function, almost as good (by some measures) + as the kaiser window. + + References + ---------- + Blackman, R.B. and Tukey, J.W., (1958) The measurement of power spectra, + Dover Publications, New York. + + Oppenheim, A.V., and R.W. Schafer. Discrete-Time Signal Processing. + Upper Saddle River, NJ: Prentice-Hall, 1999, pp. 468-471. + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> np.blackman(12) + array([-1.38777878e-17, 3.26064346e-02, 1.59903635e-01, # may vary + 4.14397981e-01, 7.36045180e-01, 9.67046769e-01, + 9.67046769e-01, 7.36045180e-01, 4.14397981e-01, + 1.59903635e-01, 3.26064346e-02, -1.38777878e-17]) + + Plot the window and the frequency response: + + >>> from numpy.fft import fft, fftshift + >>> window = np.blackman(51) + >>> plt.plot(window) + [] + >>> plt.title("Blackman window") + Text(0.5, 1.0, 'Blackman window') + >>> plt.ylabel("Amplitude") + Text(0, 0.5, 'Amplitude') + >>> plt.xlabel("Sample") + Text(0.5, 0, 'Sample') + >>> plt.show() + + >>> plt.figure() +
+ >>> A = fft(window, 2048) / 25.5 + >>> mag = np.abs(fftshift(A)) + >>> freq = np.linspace(-0.5, 0.5, len(A)) + >>> with np.errstate(divide='ignore', invalid='ignore'): + ... response = 20 * np.log10(mag) + ... + >>> response = np.clip(response, -100, 100) + >>> plt.plot(freq, response) + [] + >>> plt.title("Frequency response of Blackman window") + Text(0.5, 1.0, 'Frequency response of Blackman window') + >>> plt.ylabel("Magnitude [dB]") + Text(0, 0.5, 'Magnitude [dB]') + >>> plt.xlabel("Normalized frequency [cycles per sample]") + Text(0.5, 0, 'Normalized frequency [cycles per sample]') + >>> _ = plt.axis('tight') + >>> plt.show() + + """ + if M < 1: + return array([], dtype=np.result_type(M, 0.0)) + if M == 1: + return ones(1, dtype=np.result_type(M, 0.0)) + n = arange(1-M, M, 2) + return 0.42 + 0.5*cos(pi*n/(M-1)) + 0.08*cos(2.0*pi*n/(M-1)) + + +@set_module('numpy') +def bartlett(M): + """ + Return the Bartlett window. + + The Bartlett window is very similar to a triangular window, except + that the end points are at zero. It is often used in signal + processing for tapering a signal, without generating too much + ripple in the frequency domain. + + Parameters + ---------- + M : int + Number of points in the output window. If zero or less, an + empty array is returned. + + Returns + ------- + out : array + The triangular window, with the maximum value normalized to one + (the value one appears only if the number of samples is odd), with + the first and last samples equal to zero. + + See Also + -------- + blackman, hamming, hanning, kaiser + + Notes + ----- + The Bartlett window is defined as + + .. math:: w(n) = \\frac{2}{M-1} \\left( + \\frac{M-1}{2} - \\left|n - \\frac{M-1}{2}\\right| + \\right) + + Most references to the Bartlett window come from the signal + processing literature, where it is used as one of many windowing + functions for smoothing values. Note that convolution with this + window produces linear interpolation. It is also known as an + apodization (which means"removing the foot", i.e. smoothing + discontinuities at the beginning and end of the sampled signal) or + tapering function. The fourier transform of the Bartlett is the product + of two sinc functions. + Note the excellent discussion in Kanasewich. + + References + ---------- + .. [1] M.S. Bartlett, "Periodogram Analysis and Continuous Spectra", + Biometrika 37, 1-16, 1950. + .. [2] E.R. Kanasewich, "Time Sequence Analysis in Geophysics", + The University of Alberta Press, 1975, pp. 109-110. + .. [3] A.V. Oppenheim and R.W. Schafer, "Discrete-Time Signal + Processing", Prentice-Hall, 1999, pp. 468-471. + .. [4] Wikipedia, "Window function", + https://en.wikipedia.org/wiki/Window_function + .. [5] W.H. Press, B.P. Flannery, S.A. Teukolsky, and W.T. Vetterling, + "Numerical Recipes", Cambridge University Press, 1986, page 429. + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> np.bartlett(12) + array([ 0. , 0.18181818, 0.36363636, 0.54545455, 0.72727273, # may vary + 0.90909091, 0.90909091, 0.72727273, 0.54545455, 0.36363636, + 0.18181818, 0. ]) + + Plot the window and its frequency response (requires SciPy and matplotlib): + + >>> from numpy.fft import fft, fftshift + >>> window = np.bartlett(51) + >>> plt.plot(window) + [] + >>> plt.title("Bartlett window") + Text(0.5, 1.0, 'Bartlett window') + >>> plt.ylabel("Amplitude") + Text(0, 0.5, 'Amplitude') + >>> plt.xlabel("Sample") + Text(0.5, 0, 'Sample') + >>> plt.show() + + >>> plt.figure() +
+ >>> A = fft(window, 2048) / 25.5 + >>> mag = np.abs(fftshift(A)) + >>> freq = np.linspace(-0.5, 0.5, len(A)) + >>> with np.errstate(divide='ignore', invalid='ignore'): + ... response = 20 * np.log10(mag) + ... + >>> response = np.clip(response, -100, 100) + >>> plt.plot(freq, response) + [] + >>> plt.title("Frequency response of Bartlett window") + Text(0.5, 1.0, 'Frequency response of Bartlett window') + >>> plt.ylabel("Magnitude [dB]") + Text(0, 0.5, 'Magnitude [dB]') + >>> plt.xlabel("Normalized frequency [cycles per sample]") + Text(0.5, 0, 'Normalized frequency [cycles per sample]') + >>> _ = plt.axis('tight') + >>> plt.show() + + """ + if M < 1: + return array([], dtype=np.result_type(M, 0.0)) + if M == 1: + return ones(1, dtype=np.result_type(M, 0.0)) + n = arange(1-M, M, 2) + return where(less_equal(n, 0), 1 + n/(M-1), 1 - n/(M-1)) + + +@set_module('numpy') +def hanning(M): + """ + Return the Hanning window. + + The Hanning window is a taper formed by using a weighted cosine. + + Parameters + ---------- + M : int + Number of points in the output window. If zero or less, an + empty array is returned. + + Returns + ------- + out : ndarray, shape(M,) + The window, with the maximum value normalized to one (the value + one appears only if `M` is odd). + + See Also + -------- + bartlett, blackman, hamming, kaiser + + Notes + ----- + The Hanning window is defined as + + .. math:: w(n) = 0.5 - 0.5cos\\left(\\frac{2\\pi{n}}{M-1}\\right) + \\qquad 0 \\leq n \\leq M-1 + + The Hanning was named for Julius von Hann, an Austrian meteorologist. + It is also known as the Cosine Bell. Some authors prefer that it be + called a Hann window, to help avoid confusion with the very similar + Hamming window. + + Most references to the Hanning window come from the signal processing + literature, where it is used as one of many windowing functions for + smoothing values. It is also known as an apodization (which means + "removing the foot", i.e. smoothing discontinuities at the beginning + and end of the sampled signal) or tapering function. + + References + ---------- + .. [1] Blackman, R.B. and Tukey, J.W., (1958) The measurement of power + spectra, Dover Publications, New York. + .. [2] E.R. Kanasewich, "Time Sequence Analysis in Geophysics", + The University of Alberta Press, 1975, pp. 106-108. + .. [3] Wikipedia, "Window function", + https://en.wikipedia.org/wiki/Window_function + .. [4] W.H. Press, B.P. Flannery, S.A. Teukolsky, and W.T. Vetterling, + "Numerical Recipes", Cambridge University Press, 1986, page 425. + + Examples + -------- + >>> np.hanning(12) + array([0. , 0.07937323, 0.29229249, 0.57115742, 0.82743037, + 0.97974649, 0.97974649, 0.82743037, 0.57115742, 0.29229249, + 0.07937323, 0. ]) + + Plot the window and its frequency response: + + >>> import matplotlib.pyplot as plt + >>> from numpy.fft import fft, fftshift + >>> window = np.hanning(51) + >>> plt.plot(window) + [] + >>> plt.title("Hann window") + Text(0.5, 1.0, 'Hann window') + >>> plt.ylabel("Amplitude") + Text(0, 0.5, 'Amplitude') + >>> plt.xlabel("Sample") + Text(0.5, 0, 'Sample') + >>> plt.show() + + >>> plt.figure() +
+ >>> A = fft(window, 2048) / 25.5 + >>> mag = np.abs(fftshift(A)) + >>> freq = np.linspace(-0.5, 0.5, len(A)) + >>> with np.errstate(divide='ignore', invalid='ignore'): + ... response = 20 * np.log10(mag) + ... + >>> response = np.clip(response, -100, 100) + >>> plt.plot(freq, response) + [] + >>> plt.title("Frequency response of the Hann window") + Text(0.5, 1.0, 'Frequency response of the Hann window') + >>> plt.ylabel("Magnitude [dB]") + Text(0, 0.5, 'Magnitude [dB]') + >>> plt.xlabel("Normalized frequency [cycles per sample]") + Text(0.5, 0, 'Normalized frequency [cycles per sample]') + >>> plt.axis('tight') + ... + >>> plt.show() + + """ + if M < 1: + return array([], dtype=np.result_type(M, 0.0)) + if M == 1: + return ones(1, dtype=np.result_type(M, 0.0)) + n = arange(1-M, M, 2) + return 0.5 + 0.5*cos(pi*n/(M-1)) + + +@set_module('numpy') +def hamming(M): + """ + Return the Hamming window. + + The Hamming window is a taper formed by using a weighted cosine. + + Parameters + ---------- + M : int + Number of points in the output window. If zero or less, an + empty array is returned. + + Returns + ------- + out : ndarray + The window, with the maximum value normalized to one (the value + one appears only if the number of samples is odd). + + See Also + -------- + bartlett, blackman, hanning, kaiser + + Notes + ----- + The Hamming window is defined as + + .. math:: w(n) = 0.54 - 0.46cos\\left(\\frac{2\\pi{n}}{M-1}\\right) + \\qquad 0 \\leq n \\leq M-1 + + The Hamming was named for R. W. Hamming, an associate of J. W. Tukey + and is described in Blackman and Tukey. It was recommended for + smoothing the truncated autocovariance function in the time domain. + Most references to the Hamming window come from the signal processing + literature, where it is used as one of many windowing functions for + smoothing values. It is also known as an apodization (which means + "removing the foot", i.e. smoothing discontinuities at the beginning + and end of the sampled signal) or tapering function. + + References + ---------- + .. [1] Blackman, R.B. and Tukey, J.W., (1958) The measurement of power + spectra, Dover Publications, New York. + .. [2] E.R. Kanasewich, "Time Sequence Analysis in Geophysics", The + University of Alberta Press, 1975, pp. 109-110. + .. [3] Wikipedia, "Window function", + https://en.wikipedia.org/wiki/Window_function + .. [4] W.H. Press, B.P. Flannery, S.A. Teukolsky, and W.T. Vetterling, + "Numerical Recipes", Cambridge University Press, 1986, page 425. + + Examples + -------- + >>> np.hamming(12) + array([ 0.08 , 0.15302337, 0.34890909, 0.60546483, 0.84123594, # may vary + 0.98136677, 0.98136677, 0.84123594, 0.60546483, 0.34890909, + 0.15302337, 0.08 ]) + + Plot the window and the frequency response: + + >>> import matplotlib.pyplot as plt + >>> from numpy.fft import fft, fftshift + >>> window = np.hamming(51) + >>> plt.plot(window) + [] + >>> plt.title("Hamming window") + Text(0.5, 1.0, 'Hamming window') + >>> plt.ylabel("Amplitude") + Text(0, 0.5, 'Amplitude') + >>> plt.xlabel("Sample") + Text(0.5, 0, 'Sample') + >>> plt.show() + + >>> plt.figure() +
+ >>> A = fft(window, 2048) / 25.5 + >>> mag = np.abs(fftshift(A)) + >>> freq = np.linspace(-0.5, 0.5, len(A)) + >>> response = 20 * np.log10(mag) + >>> response = np.clip(response, -100, 100) + >>> plt.plot(freq, response) + [] + >>> plt.title("Frequency response of Hamming window") + Text(0.5, 1.0, 'Frequency response of Hamming window') + >>> plt.ylabel("Magnitude [dB]") + Text(0, 0.5, 'Magnitude [dB]') + >>> plt.xlabel("Normalized frequency [cycles per sample]") + Text(0.5, 0, 'Normalized frequency [cycles per sample]') + >>> plt.axis('tight') + ... + >>> plt.show() + + """ + if M < 1: + return array([], dtype=np.result_type(M, 0.0)) + if M == 1: + return ones(1, dtype=np.result_type(M, 0.0)) + n = arange(1-M, M, 2) + return 0.54 + 0.46*cos(pi*n/(M-1)) + + +## Code from cephes for i0 + +_i0A = [ + -4.41534164647933937950E-18, + 3.33079451882223809783E-17, + -2.43127984654795469359E-16, + 1.71539128555513303061E-15, + -1.16853328779934516808E-14, + 7.67618549860493561688E-14, + -4.85644678311192946090E-13, + 2.95505266312963983461E-12, + -1.72682629144155570723E-11, + 9.67580903537323691224E-11, + -5.18979560163526290666E-10, + 2.65982372468238665035E-9, + -1.30002500998624804212E-8, + 6.04699502254191894932E-8, + -2.67079385394061173391E-7, + 1.11738753912010371815E-6, + -4.41673835845875056359E-6, + 1.64484480707288970893E-5, + -5.75419501008210370398E-5, + 1.88502885095841655729E-4, + -5.76375574538582365885E-4, + 1.63947561694133579842E-3, + -4.32430999505057594430E-3, + 1.05464603945949983183E-2, + -2.37374148058994688156E-2, + 4.93052842396707084878E-2, + -9.49010970480476444210E-2, + 1.71620901522208775349E-1, + -3.04682672343198398683E-1, + 6.76795274409476084995E-1 + ] + +_i0B = [ + -7.23318048787475395456E-18, + -4.83050448594418207126E-18, + 4.46562142029675999901E-17, + 3.46122286769746109310E-17, + -2.82762398051658348494E-16, + -3.42548561967721913462E-16, + 1.77256013305652638360E-15, + 3.81168066935262242075E-15, + -9.55484669882830764870E-15, + -4.15056934728722208663E-14, + 1.54008621752140982691E-14, + 3.85277838274214270114E-13, + 7.18012445138366623367E-13, + -1.79417853150680611778E-12, + -1.32158118404477131188E-11, + -3.14991652796324136454E-11, + 1.18891471078464383424E-11, + 4.94060238822496958910E-10, + 3.39623202570838634515E-9, + 2.26666899049817806459E-8, + 2.04891858946906374183E-7, + 2.89137052083475648297E-6, + 6.88975834691682398426E-5, + 3.36911647825569408990E-3, + 8.04490411014108831608E-1 + ] + + +def _chbevl(x, vals): + b0 = vals[0] + b1 = 0.0 + + for i in range(1, len(vals)): + b2 = b1 + b1 = b0 + b0 = x*b1 - b2 + vals[i] + + return 0.5*(b0 - b2) + + +def _i0_1(x): + return exp(x) * _chbevl(x/2.0-2, _i0A) + + +def _i0_2(x): + return exp(x) * _chbevl(32.0/x - 2.0, _i0B) / sqrt(x) + + +def _i0_dispatcher(x): + return (x,) + + +@array_function_dispatch(_i0_dispatcher) +def i0(x): + """ + Modified Bessel function of the first kind, order 0. + + Usually denoted :math:`I_0`. + + Parameters + ---------- + x : array_like of float + Argument of the Bessel function. + + Returns + ------- + out : ndarray, shape = x.shape, dtype = float + The modified Bessel function evaluated at each of the elements of `x`. + + See Also + -------- + scipy.special.i0, scipy.special.iv, scipy.special.ive + + Notes + ----- + The scipy implementation is recommended over this function: it is a + proper ufunc written in C, and more than an order of magnitude faster. + + We use the algorithm published by Clenshaw [1]_ and referenced by + Abramowitz and Stegun [2]_, for which the function domain is + partitioned into the two intervals [0,8] and (8,inf), and Chebyshev + polynomial expansions are employed in each interval. Relative error on + the domain [0,30] using IEEE arithmetic is documented [3]_ as having a + peak of 5.8e-16 with an rms of 1.4e-16 (n = 30000). + + References + ---------- + .. [1] C. W. Clenshaw, "Chebyshev series for mathematical functions", in + *National Physical Laboratory Mathematical Tables*, vol. 5, London: + Her Majesty's Stationery Office, 1962. + .. [2] M. Abramowitz and I. A. Stegun, *Handbook of Mathematical + Functions*, 10th printing, New York: Dover, 1964, pp. 379. + https://personal.math.ubc.ca/~cbm/aands/page_379.htm + .. [3] https://metacpan.org/pod/distribution/Math-Cephes/lib/Math/Cephes.pod#i0:-Modified-Bessel-function-of-order-zero + + Examples + -------- + >>> np.i0(0.) + array(1.0) + >>> np.i0([0, 1, 2, 3]) + array([1. , 1.26606588, 2.2795853 , 4.88079259]) + + """ + x = np.asanyarray(x) + if x.dtype.kind == 'c': + raise TypeError("i0 not supported for complex values") + if x.dtype.kind != 'f': + x = x.astype(float) + x = np.abs(x) + return piecewise(x, [x <= 8.0], [_i0_1, _i0_2]) + +## End of cephes code for i0 + + +@set_module('numpy') +def kaiser(M, beta): + """ + Return the Kaiser window. + + The Kaiser window is a taper formed by using a Bessel function. + + Parameters + ---------- + M : int + Number of points in the output window. If zero or less, an + empty array is returned. + beta : float + Shape parameter for window. + + Returns + ------- + out : array + The window, with the maximum value normalized to one (the value + one appears only if the number of samples is odd). + + See Also + -------- + bartlett, blackman, hamming, hanning + + Notes + ----- + The Kaiser window is defined as + + .. math:: w(n) = I_0\\left( \\beta \\sqrt{1-\\frac{4n^2}{(M-1)^2}} + \\right)/I_0(\\beta) + + with + + .. math:: \\quad -\\frac{M-1}{2} \\leq n \\leq \\frac{M-1}{2}, + + where :math:`I_0` is the modified zeroth-order Bessel function. + + The Kaiser was named for Jim Kaiser, who discovered a simple + approximation to the DPSS window based on Bessel functions. The Kaiser + window is a very good approximation to the Digital Prolate Spheroidal + Sequence, or Slepian window, which is the transform which maximizes the + energy in the main lobe of the window relative to total energy. + + The Kaiser can approximate many other windows by varying the beta + parameter. + + ==== ======================= + beta Window shape + ==== ======================= + 0 Rectangular + 5 Similar to a Hamming + 6 Similar to a Hanning + 8.6 Similar to a Blackman + ==== ======================= + + A beta value of 14 is probably a good starting point. Note that as beta + gets large, the window narrows, and so the number of samples needs to be + large enough to sample the increasingly narrow spike, otherwise NaNs will + get returned. + + Most references to the Kaiser window come from the signal processing + literature, where it is used as one of many windowing functions for + smoothing values. It is also known as an apodization (which means + "removing the foot", i.e. smoothing discontinuities at the beginning + and end of the sampled signal) or tapering function. + + References + ---------- + .. [1] J. F. Kaiser, "Digital Filters" - Ch 7 in "Systems analysis by + digital computer", Editors: F.F. Kuo and J.F. Kaiser, p 218-285. + John Wiley and Sons, New York, (1966). + .. [2] E.R. Kanasewich, "Time Sequence Analysis in Geophysics", The + University of Alberta Press, 1975, pp. 177-178. + .. [3] Wikipedia, "Window function", + https://en.wikipedia.org/wiki/Window_function + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> np.kaiser(12, 14) + array([7.72686684e-06, 3.46009194e-03, 4.65200189e-02, # may vary + 2.29737120e-01, 5.99885316e-01, 9.45674898e-01, + 9.45674898e-01, 5.99885316e-01, 2.29737120e-01, + 4.65200189e-02, 3.46009194e-03, 7.72686684e-06]) + + + Plot the window and the frequency response: + + >>> from numpy.fft import fft, fftshift + >>> window = np.kaiser(51, 14) + >>> plt.plot(window) + [] + >>> plt.title("Kaiser window") + Text(0.5, 1.0, 'Kaiser window') + >>> plt.ylabel("Amplitude") + Text(0, 0.5, 'Amplitude') + >>> plt.xlabel("Sample") + Text(0.5, 0, 'Sample') + >>> plt.show() + + >>> plt.figure() +
+ >>> A = fft(window, 2048) / 25.5 + >>> mag = np.abs(fftshift(A)) + >>> freq = np.linspace(-0.5, 0.5, len(A)) + >>> response = 20 * np.log10(mag) + >>> response = np.clip(response, -100, 100) + >>> plt.plot(freq, response) + [] + >>> plt.title("Frequency response of Kaiser window") + Text(0.5, 1.0, 'Frequency response of Kaiser window') + >>> plt.ylabel("Magnitude [dB]") + Text(0, 0.5, 'Magnitude [dB]') + >>> plt.xlabel("Normalized frequency [cycles per sample]") + Text(0.5, 0, 'Normalized frequency [cycles per sample]') + >>> plt.axis('tight') + (-0.5, 0.5, -100.0, ...) # may vary + >>> plt.show() + + """ + if M == 1: + return np.ones(1, dtype=np.result_type(M, 0.0)) + n = arange(0, M) + alpha = (M-1)/2.0 + return i0(beta * sqrt(1-((n-alpha)/alpha)**2.0))/i0(float(beta)) + + +def _sinc_dispatcher(x): + return (x,) + + +@array_function_dispatch(_sinc_dispatcher) +def sinc(x): + r""" + Return the normalized sinc function. + + The sinc function is :math:`\sin(\pi x)/(\pi x)`. + + .. note:: + + Note the normalization factor of ``pi`` used in the definition. + This is the most commonly used definition in signal processing. + Use ``sinc(x / np.pi)`` to obtain the unnormalized sinc function + :math:`\sin(x)/(x)` that is more common in mathematics. + + Parameters + ---------- + x : ndarray + Array (possibly multi-dimensional) of values for which to to + calculate ``sinc(x)``. + + Returns + ------- + out : ndarray + ``sinc(x)``, which has the same shape as the input. + + Notes + ----- + ``sinc(0)`` is the limit value 1. + + The name sinc is short for "sine cardinal" or "sinus cardinalis". + + The sinc function is used in various signal processing applications, + including in anti-aliasing, in the construction of a Lanczos resampling + filter, and in interpolation. + + For bandlimited interpolation of discrete-time signals, the ideal + interpolation kernel is proportional to the sinc function. + + References + ---------- + .. [1] Weisstein, Eric W. "Sinc Function." From MathWorld--A Wolfram Web + Resource. http://mathworld.wolfram.com/SincFunction.html + .. [2] Wikipedia, "Sinc function", + https://en.wikipedia.org/wiki/Sinc_function + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> x = np.linspace(-4, 4, 41) + >>> np.sinc(x) + array([-3.89804309e-17, -4.92362781e-02, -8.40918587e-02, # may vary + -8.90384387e-02, -5.84680802e-02, 3.89804309e-17, + 6.68206631e-02, 1.16434881e-01, 1.26137788e-01, + 8.50444803e-02, -3.89804309e-17, -1.03943254e-01, + -1.89206682e-01, -2.16236208e-01, -1.55914881e-01, + 3.89804309e-17, 2.33872321e-01, 5.04551152e-01, + 7.56826729e-01, 9.35489284e-01, 1.00000000e+00, + 9.35489284e-01, 7.56826729e-01, 5.04551152e-01, + 2.33872321e-01, 3.89804309e-17, -1.55914881e-01, + -2.16236208e-01, -1.89206682e-01, -1.03943254e-01, + -3.89804309e-17, 8.50444803e-02, 1.26137788e-01, + 1.16434881e-01, 6.68206631e-02, 3.89804309e-17, + -5.84680802e-02, -8.90384387e-02, -8.40918587e-02, + -4.92362781e-02, -3.89804309e-17]) + + >>> plt.plot(x, np.sinc(x)) + [] + >>> plt.title("Sinc Function") + Text(0.5, 1.0, 'Sinc Function') + >>> plt.ylabel("Amplitude") + Text(0, 0.5, 'Amplitude') + >>> plt.xlabel("X") + Text(0.5, 0, 'X') + >>> plt.show() + + """ + x = np.asanyarray(x) + y = pi * where(x == 0, 1.0e-20, x) + return sin(y)/y + + +def _msort_dispatcher(a): + return (a,) + + +@array_function_dispatch(_msort_dispatcher) +def msort(a): + """ + Return a copy of an array sorted along the first axis. + + Parameters + ---------- + a : array_like + Array to be sorted. + + Returns + ------- + sorted_array : ndarray + Array of the same type and shape as `a`. + + See Also + -------- + sort + + Notes + ----- + ``np.msort(a)`` is equivalent to ``np.sort(a, axis=0)``. + + """ + b = array(a, subok=True, copy=True) + b.sort(0) + return b + + +def _ureduce(a, func, **kwargs): + """ + Internal Function. + Call `func` with `a` as first argument swapping the axes to use extended + axis on functions that don't support it natively. + + Returns result and a.shape with axis dims set to 1. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + func : callable + Reduction function capable of receiving a single axis argument. + It is called with `a` as first argument followed by `kwargs`. + kwargs : keyword arguments + additional keyword arguments to pass to `func`. + + Returns + ------- + result : tuple + Result of func(a, **kwargs) and a.shape with axis dims set to 1 + which can be used to reshape the result to the same shape a ufunc with + keepdims=True would produce. + + """ + a = np.asanyarray(a) + axis = kwargs.get('axis', None) + if axis is not None: + keepdim = list(a.shape) + nd = a.ndim + axis = _nx.normalize_axis_tuple(axis, nd) + + for ax in axis: + keepdim[ax] = 1 + + if len(axis) == 1: + kwargs['axis'] = axis[0] + else: + keep = set(range(nd)) - set(axis) + nkeep = len(keep) + # swap axis that should not be reduced to front + for i, s in enumerate(sorted(keep)): + a = a.swapaxes(i, s) + # merge reduced axis + a = a.reshape(a.shape[:nkeep] + (-1,)) + kwargs['axis'] = -1 + keepdim = tuple(keepdim) + else: + keepdim = (1,) * a.ndim + + r = func(a, **kwargs) + return r, keepdim + + +def _median_dispatcher( + a, axis=None, out=None, overwrite_input=None, keepdims=None): + return (a, out) + + +@array_function_dispatch(_median_dispatcher) +def median(a, axis=None, out=None, overwrite_input=False, keepdims=False): + """ + Compute the median along the specified axis. + + Returns the median of the array elements. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + axis : {int, sequence of int, None}, optional + Axis or axes along which the medians are computed. The default + is to compute the median along a flattened version of the array. + A sequence of axes is supported since version 1.9.0. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow use of memory of input array `a` for + calculations. The input array will be modified by the call to + `median`. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. If `overwrite_input` is ``True`` and `a` is not already an + `ndarray`, an error will be raised. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + .. versionadded:: 1.9.0 + + Returns + ------- + median : ndarray + A new array holding the result. If the input contains integers + or floats smaller than ``float64``, then the output data-type is + ``np.float64``. Otherwise, the data-type of the output is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + mean, percentile + + Notes + ----- + Given a vector ``V`` of length ``N``, the median of ``V`` is the + middle value of a sorted copy of ``V``, ``V_sorted`` - i + e., ``V_sorted[(N-1)/2]``, when ``N`` is odd, and the average of the + two middle values of ``V_sorted`` when ``N`` is even. + + Examples + -------- + >>> a = np.array([[10, 7, 4], [3, 2, 1]]) + >>> a + array([[10, 7, 4], + [ 3, 2, 1]]) + >>> np.median(a) + 3.5 + >>> np.median(a, axis=0) + array([6.5, 4.5, 2.5]) + >>> np.median(a, axis=1) + array([7., 2.]) + >>> m = np.median(a, axis=0) + >>> out = np.zeros_like(m) + >>> np.median(a, axis=0, out=m) + array([6.5, 4.5, 2.5]) + >>> m + array([6.5, 4.5, 2.5]) + >>> b = a.copy() + >>> np.median(b, axis=1, overwrite_input=True) + array([7., 2.]) + >>> assert not np.all(a==b) + >>> b = a.copy() + >>> np.median(b, axis=None, overwrite_input=True) + 3.5 + >>> assert not np.all(a==b) + + """ + r, k = _ureduce(a, func=_median, axis=axis, out=out, + overwrite_input=overwrite_input) + if keepdims: + return r.reshape(k) + else: + return r + + +def _median(a, axis=None, out=None, overwrite_input=False): + # can't be reasonably be implemented in terms of percentile as we have to + # call mean to not break astropy + a = np.asanyarray(a) + + # Set the partition indexes + if axis is None: + sz = a.size + else: + sz = a.shape[axis] + if sz % 2 == 0: + szh = sz // 2 + kth = [szh - 1, szh] + else: + kth = [(sz - 1) // 2] + # Check if the array contains any nan's + if np.issubdtype(a.dtype, np.inexact): + kth.append(-1) + + if overwrite_input: + if axis is None: + part = a.ravel() + part.partition(kth) + else: + a.partition(kth, axis=axis) + part = a + else: + part = partition(a, kth, axis=axis) + + if part.shape == (): + # make 0-D arrays work + return part.item() + if axis is None: + axis = 0 + + indexer = [slice(None)] * part.ndim + index = part.shape[axis] // 2 + if part.shape[axis] % 2 == 1: + # index with slice to allow mean (below) to work + indexer[axis] = slice(index, index+1) + else: + indexer[axis] = slice(index-1, index+1) + indexer = tuple(indexer) + + # Use mean in both odd and even case to coerce data type, + # using out array if needed. + rout = mean(part[indexer], axis=axis, out=out) + # Check if the array contains any nan's + if np.issubdtype(a.dtype, np.inexact) and sz > 0: + # If nans are possible, warn and replace by nans like mean would. + rout = np.lib.utils._median_nancheck(part, rout, axis) + + return rout + + +def _percentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, + method=None, keepdims=None, *, interpolation=None): + return (a, q, out) + + +@array_function_dispatch(_percentile_dispatcher) +def percentile(a, + q, + axis=None, + out=None, + overwrite_input=False, + method="linear", + keepdims=False, + *, + interpolation=None): + """ + Compute the q-th percentile of the data along the specified axis. + + Returns the q-th percentile(s) of the array elements. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + q : array_like of float + Percentile or sequence of percentiles to compute, which must be between + 0 and 100 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the percentiles are computed. The + default is to compute the percentile(s) along a flattened + version of the array. + + .. versionchanged:: 1.9.0 + A tuple of axes is supported + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by intermediate + calculations, to save memory. In this case, the contents of the input + `a` after this function completes is undefined. + method : str, optional + This parameter specifies the method to use for estimating the + percentile. There are many different methods, some unique to NumPy. + See the notes for explanation. The options sorted by their R type + as summarized in the H&F paper [1]_ are: + + 1. 'inverted_cdf' + 2. 'averaged_inverted_cdf' + 3. 'closest_observation' + 4. 'interpolated_inverted_cdf' + 5. 'hazen' + 6. 'weibull' + 7. 'linear' (default) + 8. 'median_unbiased' + 9. 'normal_unbiased' + + The first three methods are discontiuous. NumPy further defines the + following discontinuous variations of the default 'linear' (7.) option: + + * 'lower' + * 'higher', + * 'midpoint' + * 'nearest' + + .. versionchanged:: 1.22.0 + This argument was previously called "interpolation" and only + offered the "linear" default and last four options. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + .. versionadded:: 1.9.0 + + interpolation : str, optional + Deprecated name for the method keyword argument. + + .. deprecated:: 1.22.0 + + Returns + ------- + percentile : scalar or ndarray + If `q` is a single percentile and `axis=None`, then the result + is a scalar. If multiple percentiles are given, first axis of + the result corresponds to the percentiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + mean + median : equivalent to ``percentile(..., 50)`` + nanpercentile + quantile : equivalent to percentile, except q in the range [0, 1]. + + Notes + ----- + Given a vector ``V`` of length ``N``, the q-th percentile of ``V`` is + the value ``q/100`` of the way from the minimum to the maximum in a + sorted copy of ``V``. The values and distances of the two nearest + neighbors as well as the `method` parameter will determine the + percentile if the normalized ranking does not match the location of + ``q`` exactly. This function is the same as the median if ``q=50``, the + same as the minimum if ``q=0`` and the same as the maximum if + ``q=100``. + + This optional `method` parameter specifies the method to use when the + desired quantile lies between two data points ``i < j``. + If ``g`` is the fractional part of the index surrounded by ``i`` and + alpha and beta are correction constants modifying i and j. + + Below, 'q' is the quantile value, 'n' is the sample size and + alpha and beta are constants. + The following formula gives an interpolation "i + g" of where the quantile + would be in the sorted sample. + With 'i' being the floor and 'g' the fractional part of the result. + + .. math:: + i + g = (q - alpha) / ( n - alpha - beta + 1 ) + + The different methods then work as follows + + inverted_cdf: + method 1 of H&F [1]_. + This method gives discontinuous results: + * if g > 0 ; then take j + * if g = 0 ; then take i + + averaged_inverted_cdf: + method 2 of H&F [1]_. + This method give discontinuous results: + * if g > 0 ; then take j + * if g = 0 ; then average between bounds + + closest_observation: + method 3 of H&F [1]_. + This method give discontinuous results: + * if g > 0 ; then take j + * if g = 0 and index is odd ; then take j + * if g = 0 and index is even ; then take i + + interpolated_inverted_cdf: + method 4 of H&F [1]_. + This method give continuous results using: + * alpha = 0 + * beta = 1 + + hazen: + method 5 of H&F [1]_. + This method give continuous results using: + * alpha = 1/2 + * beta = 1/2 + + weibull: + method 6 of H&F [1]_. + This method give continuous results using: + * alpha = 0 + * beta = 0 + + linear: + method 7 of H&F [1]_. + This method give continuous results using: + * alpha = 1 + * beta = 1 + + median_unbiased: + method 8 of H&F [1]_. + This method is probably the best method if the sample + distribution function is unknown (see reference). + This method give continuous results using: + * alpha = 1/3 + * beta = 1/3 + + normal_unbiased: + method 9 of H&F [1]_. + This method is probably the best method if the sample + distribution function is known to be normal. + This method give continuous results using: + * alpha = 3/8 + * beta = 3/8 + + lower: + NumPy method kept for backwards compatibility. + Takes ``i`` as the interpolation point. + + higher: + NumPy method kept for backwards compatibility. + Takes ``j`` as the interpolation point. + + nearest: + NumPy method kept for backwards compatibility. + Takes ``i`` or ``j``, whichever is nearest. + + midpoint: + NumPy method kept for backwards compatibility. + Uses ``(i + j) / 2``. + + Examples + -------- + >>> a = np.array([[10, 7, 4], [3, 2, 1]]) + >>> a + array([[10, 7, 4], + [ 3, 2, 1]]) + >>> np.percentile(a, 50) + 3.5 + >>> np.percentile(a, 50, axis=0) + array([6.5, 4.5, 2.5]) + >>> np.percentile(a, 50, axis=1) + array([7., 2.]) + >>> np.percentile(a, 50, axis=1, keepdims=True) + array([[7.], + [2.]]) + + >>> m = np.percentile(a, 50, axis=0) + >>> out = np.zeros_like(m) + >>> np.percentile(a, 50, axis=0, out=out) + array([6.5, 4.5, 2.5]) + >>> m + array([6.5, 4.5, 2.5]) + + >>> b = a.copy() + >>> np.percentile(b, 50, axis=1, overwrite_input=True) + array([7., 2.]) + >>> assert not np.all(a == b) + + The different methods can be visualized graphically: + + .. plot:: + + import matplotlib.pyplot as plt + + a = np.arange(4) + p = np.linspace(0, 100, 6001) + ax = plt.gca() + lines = [ + ('linear', '-', 'C0'), + ('inverted_cdf', ':', 'C1'), + # Almost the same as `inverted_cdf`: + ('averaged_inverted_cdf', '-.', 'C1'), + ('closest_observation', ':', 'C2'), + ('interpolated_inverted_cdf', '--', 'C1'), + ('hazen', '--', 'C3'), + ('weibull', '-.', 'C4'), + ('median_unbiased', '--', 'C5'), + ('normal_unbiased', '-.', 'C6'), + ] + for method, style, color in lines: + ax.plot( + p, np.percentile(a, p, method=method), + label=method, linestyle=style, color=color) + ax.set( + title='Percentiles for different methods and data: ' + str(a), + xlabel='Percentile', + ylabel='Estimated percentile value', + yticks=a) + ax.legend() + plt.show() + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + + """ + if interpolation is not None: + method = _check_interpolation_as_method( + method, interpolation, "percentile") + q = np.true_divide(q, 100) + q = asanyarray(q) # undo any decay that the ufunc performed (see gh-13105) + if not _quantile_is_valid(q): + raise ValueError("Percentiles must be in the range [0, 100]") + return _quantile_unchecked( + a, q, axis, out, overwrite_input, method, keepdims) + + +def _quantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, + method=None, keepdims=None, *, interpolation=None): + return (a, q, out) + + +@array_function_dispatch(_quantile_dispatcher) +def quantile(a, + q, + axis=None, + out=None, + overwrite_input=False, + method="linear", + keepdims=False, + *, + interpolation=None): + """ + Compute the q-th quantile of the data along the specified axis. + + .. versionadded:: 1.15.0 + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + q : array_like of float + Quantile or sequence of quantiles to compute, which must be between + 0 and 1 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the quantiles are computed. The default is + to compute the quantile(s) along a flattened version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape and buffer length as the expected output, but the + type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by + intermediate calculations, to save memory. In this case, the + contents of the input `a` after this function completes is + undefined. + method : str, optional + This parameter specifies the method to use for estimating the + quantile. There are many different methods, some unique to NumPy. + See the notes for explanation. The options sorted by their R type + as summarized in the H&F paper [1]_ are: + + 1. 'inverted_cdf' + 2. 'averaged_inverted_cdf' + 3. 'closest_observation' + 4. 'interpolated_inverted_cdf' + 5. 'hazen' + 6. 'weibull' + 7. 'linear' (default) + 8. 'median_unbiased' + 9. 'normal_unbiased' + + The first three methods are discontiuous. NumPy further defines the + following discontinuous variations of the default 'linear' (7.) option: + + * 'lower' + * 'higher', + * 'midpoint' + * 'nearest' + + .. versionchanged:: 1.22.0 + This argument was previously called "interpolation" and only + offered the "linear" default and last four options. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + interpolation : str, optional + Deprecated name for the method keyword argument. + + .. deprecated:: 1.22.0 + + Returns + ------- + quantile : scalar or ndarray + If `q` is a single quantile and `axis=None`, then the result + is a scalar. If multiple quantiles are given, first axis of + the result corresponds to the quantiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + mean + percentile : equivalent to quantile, but with q in the range [0, 100]. + median : equivalent to ``quantile(..., 0.5)`` + nanquantile + + Notes + ----- + Given a vector ``V`` of length ``N``, the q-th quantile of ``V`` is the + value ``q`` of the way from the minimum to the maximum in a sorted copy of + ``V``. The values and distances of the two nearest neighbors as well as the + `method` parameter will determine the quantile if the normalized + ranking does not match the location of ``q`` exactly. This function is the + same as the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and + the same as the maximum if ``q=1.0``. + + This optional `method` parameter specifies the method to use when the + desired quantile lies between two data points ``i < j``. + If ``g`` is the fractional part of the index surrounded by ``i`` and + alpha and beta are correction constants modifying i and j. + + .. math:: + i + g = (q - alpha) / ( n - alpha - beta + 1 ) + + The different methods then work as follows + + inverted_cdf: + method 1 of H&F [1]_. + This method gives discontinuous results: + * if g > 0 ; then take j + * if g = 0 ; then take i + + averaged_inverted_cdf: + method 2 of H&F [1]_. + This method give discontinuous results: + * if g > 0 ; then take j + * if g = 0 ; then average between bounds + + closest_observation: + method 3 of H&F [1]_. + This method give discontinuous results: + * if g > 0 ; then take j + * if g = 0 and index is odd ; then take j + * if g = 0 and index is even ; then take i + + interpolated_inverted_cdf: + method 4 of H&F [1]_. + This method give continuous results using: + * alpha = 0 + * beta = 1 + + hazen: + method 5 of H&F [1]_. + This method give continuous results using: + * alpha = 1/2 + * beta = 1/2 + + weibull: + method 6 of H&F [1]_. + This method give continuous results using: + * alpha = 0 + * beta = 0 + + linear: + method 7 of H&F [1]_. + This method give continuous results using: + * alpha = 1 + * beta = 1 + + median_unbiased: + method 8 of H&F [1]_. + This method is probably the best method if the sample + distribution function is unknown (see reference). + This method give continuous results using: + * alpha = 1/3 + * beta = 1/3 + + normal_unbiased: + method 9 of H&F [1]_. + This method is probably the best method if the sample + distribution function is known to be normal. + This method give continuous results using: + * alpha = 3/8 + * beta = 3/8 + + lower: + NumPy method kept for backwards compatibility. + Takes ``i`` as the interpolation point. + + higher: + NumPy method kept for backwards compatibility. + Takes ``j`` as the interpolation point. + + nearest: + NumPy method kept for backwards compatibility. + Takes ``i`` or ``j``, whichever is nearest. + + midpoint: + NumPy method kept for backwards compatibility. + Uses ``(i + j) / 2``. + + Examples + -------- + >>> a = np.array([[10, 7, 4], [3, 2, 1]]) + >>> a + array([[10, 7, 4], + [ 3, 2, 1]]) + >>> np.quantile(a, 0.5) + 3.5 + >>> np.quantile(a, 0.5, axis=0) + array([6.5, 4.5, 2.5]) + >>> np.quantile(a, 0.5, axis=1) + array([7., 2.]) + >>> np.quantile(a, 0.5, axis=1, keepdims=True) + array([[7.], + [2.]]) + >>> m = np.quantile(a, 0.5, axis=0) + >>> out = np.zeros_like(m) + >>> np.quantile(a, 0.5, axis=0, out=out) + array([6.5, 4.5, 2.5]) + >>> m + array([6.5, 4.5, 2.5]) + >>> b = a.copy() + >>> np.quantile(b, 0.5, axis=1, overwrite_input=True) + array([7., 2.]) + >>> assert not np.all(a == b) + + See also `numpy.percentile` for a visualization of most methods. + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + + """ + if interpolation is not None: + method = _check_interpolation_as_method( + method, interpolation, "quantile") + + q = np.asanyarray(q) + if not _quantile_is_valid(q): + raise ValueError("Quantiles must be in the range [0, 1]") + return _quantile_unchecked( + a, q, axis, out, overwrite_input, method, keepdims) + + +def _quantile_unchecked(a, + q, + axis=None, + out=None, + overwrite_input=False, + method="linear", + keepdims=False): + """Assumes that q is in [0, 1], and is an ndarray""" + r, k = _ureduce(a, + func=_quantile_ureduce_func, + q=q, + axis=axis, + out=out, + overwrite_input=overwrite_input, + method=method) + if keepdims: + return r.reshape(q.shape + k) + else: + return r + + +def _quantile_is_valid(q): + # avoid expensive reductions, relevant for arrays with < O(1000) elements + if q.ndim == 1 and q.size < 10: + for i in range(q.size): + if not (0.0 <= q[i] <= 1.0): + return False + else: + if not (np.all(0 <= q) and np.all(q <= 1)): + return False + return True + + +def _check_interpolation_as_method(method, interpolation, fname): + # Deprecated NumPy 1.22, 2021-11-08 + warnings.warn( + f"the `interpolation=` argument to {fname} was renamed to " + "`method=`, which has additional options.\n" + "Users of the modes 'nearest', 'lower', 'higher', or " + "'midpoint' are encouraged to review the method they. " + "(Deprecated NumPy 1.22)", + DeprecationWarning, stacklevel=4) + if method != "linear": + # sanity check, we assume this basically never happens + raise TypeError( + "You shall not pass both `method` and `interpolation`!\n" + "(`interpolation` is Deprecated in favor of `method`)") + return interpolation + + +def _compute_virtual_index(n, quantiles, alpha: float, beta: float): + """ + Compute the floating point indexes of an array for the linear + interpolation of quantiles. + n : array_like + The sample sizes. + quantiles : array_like + The quantiles values. + alpha : float + A constant used to correct the index computed. + beta : float + A constant used to correct the index computed. + + alpha and beta values depend on the chosen method + (see quantile documentation) + + Reference: + Hyndman&Fan paper "Sample Quantiles in Statistical Packages", + DOI: 10.1080/00031305.1996.10473566 + """ + return n * quantiles + ( + alpha + quantiles * (1 - alpha - beta) + ) - 1 + + +def _get_gamma(virtual_indexes, previous_indexes, method): + """ + Compute gamma (a.k.a 'm' or 'weight') for the linear interpolation + of quantiles. + + virtual_indexes : array_like + The indexes where the percentile is supposed to be found in the sorted + sample. + previous_indexes : array_like + The floor values of virtual_indexes. + interpolation : dict + The interpolation method chosen, which may have a specific rule + modifying gamma. + + gamma is usually the fractional part of virtual_indexes but can be modified + by the interpolation method. + """ + gamma = np.asanyarray(virtual_indexes - previous_indexes) + gamma = method["fix_gamma"](gamma, virtual_indexes) + return np.asanyarray(gamma) + + +def _lerp(a, b, t, out=None): + """ + Compute the linear interpolation weighted by gamma on each point of + two same shape array. + + a : array_like + Left bound. + b : array_like + Right bound. + t : array_like + The interpolation weight. + out : array_like + Output array. + """ + diff_b_a = subtract(b, a) + # asanyarray is a stop-gap until gh-13105 + lerp_interpolation = asanyarray(add(a, diff_b_a * t, out=out)) + subtract(b, diff_b_a * (1 - t), out=lerp_interpolation, where=t >= 0.5) + if lerp_interpolation.ndim == 0 and out is None: + lerp_interpolation = lerp_interpolation[()] # unpack 0d arrays + return lerp_interpolation + + +def _get_gamma_mask(shape, default_value, conditioned_value, where): + out = np.full(shape, default_value) + np.copyto(out, conditioned_value, where=where, casting="unsafe") + return out + + +def _discret_interpolation_to_boundaries(index, gamma_condition_fun): + previous = np.floor(index) + next = previous + 1 + gamma = index - previous + res = _get_gamma_mask(shape=index.shape, + default_value=next, + conditioned_value=previous, + where=gamma_condition_fun(gamma, index) + ).astype(np.intp) + # Some methods can lead to out-of-bound integers, clip them: + res[res < 0] = 0 + return res + + +def _closest_observation(n, quantiles): + gamma_fun = lambda gamma, index: (gamma == 0) & (np.floor(index) % 2 == 0) + return _discret_interpolation_to_boundaries((n * quantiles) - 1 - 0.5, + gamma_fun) + + +def _inverted_cdf(n, quantiles): + gamma_fun = lambda gamma, _: (gamma == 0) + return _discret_interpolation_to_boundaries((n * quantiles) - 1, + gamma_fun) + + +def _quantile_ureduce_func( + a: np.array, + q: np.array, + axis: int = None, + out=None, + overwrite_input: bool = False, + method="linear", +) -> np.array: + if q.ndim > 2: + # The code below works fine for nd, but it might not have useful + # semantics. For now, keep the supported dimensions the same as it was + # before. + raise ValueError("q must be a scalar or 1d") + if overwrite_input: + if axis is None: + axis = 0 + arr = a.ravel() + else: + arr = a + else: + if axis is None: + axis = 0 + arr = a.flatten() + else: + arr = a.copy() + result = _quantile(arr, + quantiles=q, + axis=axis, + method=method, + out=out) + return result + + +def _get_indexes(arr, virtual_indexes, valid_values_count): + """ + Get the valid indexes of arr neighbouring virtual_indexes. + Note + This is a companion function to linear interpolation of + Quantiles + + Returns + ------- + (previous_indexes, next_indexes): Tuple + A Tuple of virtual_indexes neighbouring indexes + """ + previous_indexes = np.asanyarray(np.floor(virtual_indexes)) + next_indexes = np.asanyarray(previous_indexes + 1) + indexes_above_bounds = virtual_indexes >= valid_values_count - 1 + # When indexes is above max index, take the max value of the array + if indexes_above_bounds.any(): + previous_indexes[indexes_above_bounds] = -1 + next_indexes[indexes_above_bounds] = -1 + # When indexes is below min index, take the min value of the array + indexes_below_bounds = virtual_indexes < 0 + if indexes_below_bounds.any(): + previous_indexes[indexes_below_bounds] = 0 + next_indexes[indexes_below_bounds] = 0 + if np.issubdtype(arr.dtype, np.inexact): + # After the sort, slices having NaNs will have for last element a NaN + virtual_indexes_nans = np.isnan(virtual_indexes) + if virtual_indexes_nans.any(): + previous_indexes[virtual_indexes_nans] = -1 + next_indexes[virtual_indexes_nans] = -1 + previous_indexes = previous_indexes.astype(np.intp) + next_indexes = next_indexes.astype(np.intp) + return previous_indexes, next_indexes + + +def _quantile( + arr: np.array, + quantiles: np.array, + axis: int = -1, + method="linear", + out=None, +): + """ + Private function that doesn't support extended axis or keepdims. + These methods are extended to this function using _ureduce + See nanpercentile for parameter usage + It computes the quantiles of the array for the given axis. + A linear interpolation is performed based on the `interpolation`. + + By default, the method is "linear" where alpha == beta == 1 which + performs the 7th method of Hyndman&Fan. + With "median_unbiased" we get alpha == beta == 1/3 + thus the 8th method of Hyndman&Fan. + """ + # --- Setup + arr = np.asanyarray(arr) + values_count = arr.shape[axis] + # The dimensions of `q` are prepended to the output shape, so we need the + # axis being sampled from `arr` to be last. + DATA_AXIS = 0 + if axis != DATA_AXIS: # But moveaxis is slow, so only call it if axis!=0. + arr = np.moveaxis(arr, axis, destination=DATA_AXIS) + # --- Computation of indexes + # Index where to find the value in the sorted array. + # Virtual because it is a floating point value, not an valid index. + # The nearest neighbours are used for interpolation + try: + method = _QuantileMethods[method] + except KeyError: + raise ValueError( + f"{method!r} is not a valid method. Use one of: " + f"{_QuantileMethods.keys()}") from None + virtual_indexes = method["get_virtual_index"](values_count, quantiles) + virtual_indexes = np.asanyarray(virtual_indexes) + if np.issubdtype(virtual_indexes.dtype, np.integer): + # No interpolation needed, take the points along axis + if np.issubdtype(arr.dtype, np.inexact): + # may contain nan, which would sort to the end + arr.partition(concatenate((virtual_indexes.ravel(), [-1])), axis=0) + slices_having_nans = np.isnan(arr[-1]) + else: + # cannot contain nan + arr.partition(virtual_indexes.ravel(), axis=0) + slices_having_nans = np.array(False, dtype=bool) + result = take(arr, virtual_indexes, axis=0, out=out) + else: + previous_indexes, next_indexes = _get_indexes(arr, + virtual_indexes, + values_count) + # --- Sorting + arr.partition( + np.unique(np.concatenate(([0, -1], + previous_indexes.ravel(), + next_indexes.ravel(), + ))), + axis=DATA_AXIS) + if np.issubdtype(arr.dtype, np.inexact): + slices_having_nans = np.isnan( + take(arr, indices=-1, axis=DATA_AXIS) + ) + else: + slices_having_nans = None + # --- Get values from indexes + previous = np.take(arr, previous_indexes, axis=DATA_AXIS) + next = np.take(arr, next_indexes, axis=DATA_AXIS) + # --- Linear interpolation + gamma = _get_gamma(virtual_indexes, previous_indexes, method) + result_shape = virtual_indexes.shape + (1,) * (arr.ndim - 1) + gamma = gamma.reshape(result_shape) + result = _lerp(previous, + next, + gamma, + out=out) + if np.any(slices_having_nans): + if result.ndim == 0 and out is None: + # can't write to a scalar + result = arr.dtype.type(np.nan) + else: + result[..., slices_having_nans] = np.nan + return result + + +def _trapz_dispatcher(y, x=None, dx=None, axis=None): + return (y, x) + + +@array_function_dispatch(_trapz_dispatcher) +def trapz(y, x=None, dx=1.0, axis=-1): + r""" + Integrate along the given axis using the composite trapezoidal rule. + + If `x` is provided, the integration happens in sequence along its + elements - they are not sorted. + + Integrate `y` (`x`) along each 1d slice on the given axis, compute + :math:`\int y(x) dx`. + When `x` is specified, this integrates along the parametric curve, + computing :math:`\int_t y(t) dt = + \int_t y(t) \left.\frac{dx}{dt}\right|_{x=x(t)} dt`. + + Parameters + ---------- + y : array_like + Input array to integrate. + x : array_like, optional + The sample points corresponding to the `y` values. If `x` is None, + the sample points are assumed to be evenly spaced `dx` apart. The + default is None. + dx : scalar, optional + The spacing between sample points when `x` is None. The default is 1. + axis : int, optional + The axis along which to integrate. + + Returns + ------- + trapz : float or ndarray + Definite integral of 'y' = n-dimensional array as approximated along + a single axis by the trapezoidal rule. If 'y' is a 1-dimensional array, + then the result is a float. If 'n' is greater than 1, then the result + is an 'n-1' dimensional array. + + See Also + -------- + sum, cumsum + + Notes + ----- + Image [2]_ illustrates trapezoidal rule -- y-axis locations of points + will be taken from `y` array, by default x-axis distances between + points will be 1.0, alternatively they can be provided with `x` array + or with `dx` scalar. Return value will be equal to combined area under + the red lines. + + + References + ---------- + .. [1] Wikipedia page: https://en.wikipedia.org/wiki/Trapezoidal_rule + + .. [2] Illustration image: + https://en.wikipedia.org/wiki/File:Composite_trapezoidal_rule_illustration.png + + Examples + -------- + >>> np.trapz([1,2,3]) + 4.0 + >>> np.trapz([1,2,3], x=[4,6,8]) + 8.0 + >>> np.trapz([1,2,3], dx=2) + 8.0 + + Using a decreasing `x` corresponds to integrating in reverse: + + >>> np.trapz([1,2,3], x=[8,6,4]) + -8.0 + + More generally `x` is used to integrate along a parametric curve. + This finds the area of a circle, noting we repeat the sample which closes + the curve: + + >>> theta = np.linspace(0, 2 * np.pi, num=1000, endpoint=True) + >>> np.trapz(np.cos(theta), x=np.sin(theta)) + 3.141571941375841 + + >>> a = np.arange(6).reshape(2, 3) + >>> a + array([[0, 1, 2], + [3, 4, 5]]) + >>> np.trapz(a, axis=0) + array([1.5, 2.5, 3.5]) + >>> np.trapz(a, axis=1) + array([2., 8.]) + """ + y = asanyarray(y) + if x is None: + d = dx + else: + x = asanyarray(x) + if x.ndim == 1: + d = diff(x) + # reshape to correct shape + shape = [1]*y.ndim + shape[axis] = d.shape[0] + d = d.reshape(shape) + else: + d = diff(x, axis=axis) + nd = y.ndim + slice1 = [slice(None)]*nd + slice2 = [slice(None)]*nd + slice1[axis] = slice(1, None) + slice2[axis] = slice(None, -1) + try: + ret = (d * (y[tuple(slice1)] + y[tuple(slice2)]) / 2.0).sum(axis) + except ValueError: + # Operations didn't work, cast to ndarray + d = np.asarray(d) + y = np.asarray(y) + ret = add.reduce(d * (y[tuple(slice1)]+y[tuple(slice2)])/2.0, axis) + return ret + + +def _meshgrid_dispatcher(*xi, copy=None, sparse=None, indexing=None): + return xi + + +# Based on scitools meshgrid +@array_function_dispatch(_meshgrid_dispatcher) +def meshgrid(*xi, copy=True, sparse=False, indexing='xy'): + """ + Return coordinate matrices from coordinate vectors. + + Make N-D coordinate arrays for vectorized evaluations of + N-D scalar/vector fields over N-D grids, given + one-dimensional coordinate arrays x1, x2,..., xn. + + .. versionchanged:: 1.9 + 1-D and 0-D cases are allowed. + + Parameters + ---------- + x1, x2,..., xn : array_like + 1-D arrays representing the coordinates of a grid. + indexing : {'xy', 'ij'}, optional + Cartesian ('xy', default) or matrix ('ij') indexing of output. + See Notes for more details. + + .. versionadded:: 1.7.0 + sparse : bool, optional + If True the shape of the returned coordinate array for dimension *i* + is reduced from ``(N1, ..., Ni, ... Nn)`` to + ``(1, ..., 1, Ni, 1, ..., 1)``. These sparse coordinate grids are + intended to be use with :ref:`basics.broadcasting`. When all + coordinates are used in an expression, broadcasting still leads to a + fully-dimensonal result array. + + Default is False. + + .. versionadded:: 1.7.0 + copy : bool, optional + If False, a view into the original arrays are returned in order to + conserve memory. Default is True. Please note that + ``sparse=False, copy=False`` will likely return non-contiguous + arrays. Furthermore, more than one element of a broadcast array + may refer to a single memory location. If you need to write to the + arrays, make copies first. + + .. versionadded:: 1.7.0 + + Returns + ------- + X1, X2,..., XN : ndarray + For vectors `x1`, `x2`,..., 'xn' with lengths ``Ni=len(xi)`` , + return ``(N1, N2, N3,...Nn)`` shaped arrays if indexing='ij' + or ``(N2, N1, N3,...Nn)`` shaped arrays if indexing='xy' + with the elements of `xi` repeated to fill the matrix along + the first dimension for `x1`, the second for `x2` and so on. + + Notes + ----- + This function supports both indexing conventions through the indexing + keyword argument. Giving the string 'ij' returns a meshgrid with + matrix indexing, while 'xy' returns a meshgrid with Cartesian indexing. + In the 2-D case with inputs of length M and N, the outputs are of shape + (N, M) for 'xy' indexing and (M, N) for 'ij' indexing. In the 3-D case + with inputs of length M, N and P, outputs are of shape (N, M, P) for + 'xy' indexing and (M, N, P) for 'ij' indexing. The difference is + illustrated by the following code snippet:: + + xv, yv = np.meshgrid(x, y, indexing='ij') + for i in range(nx): + for j in range(ny): + # treat xv[i,j], yv[i,j] + + xv, yv = np.meshgrid(x, y, indexing='xy') + for i in range(nx): + for j in range(ny): + # treat xv[j,i], yv[j,i] + + In the 1-D and 0-D case, the indexing and sparse keywords have no effect. + + See Also + -------- + mgrid : Construct a multi-dimensional "meshgrid" using indexing notation. + ogrid : Construct an open multi-dimensional "meshgrid" using indexing + notation. + + Examples + -------- + >>> nx, ny = (3, 2) + >>> x = np.linspace(0, 1, nx) + >>> y = np.linspace(0, 1, ny) + >>> xv, yv = np.meshgrid(x, y) + >>> xv + array([[0. , 0.5, 1. ], + [0. , 0.5, 1. ]]) + >>> yv + array([[0., 0., 0.], + [1., 1., 1.]]) + >>> xv, yv = np.meshgrid(x, y, sparse=True) # make sparse output arrays + >>> xv + array([[0. , 0.5, 1. ]]) + >>> yv + array([[0.], + [1.]]) + + `meshgrid` is very useful to evaluate functions on a grid. If the + function depends on all coordinates, you can use the parameter + ``sparse=True`` to save memory and computation time. + + >>> x = np.linspace(-5, 5, 101) + >>> y = np.linspace(-5, 5, 101) + >>> # full coorindate arrays + >>> xx, yy = np.meshgrid(x, y) + >>> zz = np.sqrt(xx**2 + yy**2) + >>> xx.shape, yy.shape, zz.shape + ((101, 101), (101, 101), (101, 101)) + >>> # sparse coordinate arrays + >>> xs, ys = np.meshgrid(x, y, sparse=True) + >>> zs = np.sqrt(xs**2 + ys**2) + >>> xs.shape, ys.shape, zs.shape + ((1, 101), (101, 1), (101, 101)) + >>> np.array_equal(zz, zs) + True + + >>> import matplotlib.pyplot as plt + >>> h = plt.contourf(x, y, zs) + >>> plt.axis('scaled') + >>> plt.colorbar() + >>> plt.show() + """ + ndim = len(xi) + + if indexing not in ['xy', 'ij']: + raise ValueError( + "Valid values for `indexing` are 'xy' and 'ij'.") + + s0 = (1,) * ndim + output = [np.asanyarray(x).reshape(s0[:i] + (-1,) + s0[i + 1:]) + for i, x in enumerate(xi)] + + if indexing == 'xy' and ndim > 1: + # switch first and second axis + output[0].shape = (1, -1) + s0[2:] + output[1].shape = (-1, 1) + s0[2:] + + if not sparse: + # Return the full N-D matrix (not only the 1-D vector) + output = np.broadcast_arrays(*output, subok=True) + + if copy: + output = [x.copy() for x in output] + + return output + + +def _delete_dispatcher(arr, obj, axis=None): + return (arr, obj) + + +@array_function_dispatch(_delete_dispatcher) +def delete(arr, obj, axis=None): + """ + Return a new array with sub-arrays along an axis deleted. For a one + dimensional array, this returns those entries not returned by + `arr[obj]`. + + Parameters + ---------- + arr : array_like + Input array. + obj : slice, int or array of ints + Indicate indices of sub-arrays to remove along the specified axis. + + .. versionchanged:: 1.19.0 + Boolean indices are now treated as a mask of elements to remove, + rather than being cast to the integers 0 and 1. + + axis : int, optional + The axis along which to delete the subarray defined by `obj`. + If `axis` is None, `obj` is applied to the flattened array. + + Returns + ------- + out : ndarray + A copy of `arr` with the elements specified by `obj` removed. Note + that `delete` does not occur in-place. If `axis` is None, `out` is + a flattened array. + + See Also + -------- + insert : Insert elements into an array. + append : Append elements at the end of an array. + + Notes + ----- + Often it is preferable to use a boolean mask. For example: + + >>> arr = np.arange(12) + 1 + >>> mask = np.ones(len(arr), dtype=bool) + >>> mask[[0,2,4]] = False + >>> result = arr[mask,...] + + Is equivalent to `np.delete(arr, [0,2,4], axis=0)`, but allows further + use of `mask`. + + Examples + -------- + >>> arr = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]]) + >>> arr + array([[ 1, 2, 3, 4], + [ 5, 6, 7, 8], + [ 9, 10, 11, 12]]) + >>> np.delete(arr, 1, 0) + array([[ 1, 2, 3, 4], + [ 9, 10, 11, 12]]) + + >>> np.delete(arr, np.s_[::2], 1) + array([[ 2, 4], + [ 6, 8], + [10, 12]]) + >>> np.delete(arr, [1,3,5], None) + array([ 1, 3, 5, 7, 8, 9, 10, 11, 12]) + + """ + wrap = None + if type(arr) is not ndarray: + try: + wrap = arr.__array_wrap__ + except AttributeError: + pass + + arr = asarray(arr) + ndim = arr.ndim + arrorder = 'F' if arr.flags.fnc else 'C' + if axis is None: + if ndim != 1: + arr = arr.ravel() + # needed for np.matrix, which is still not 1d after being ravelled + ndim = arr.ndim + axis = ndim - 1 + else: + axis = normalize_axis_index(axis, ndim) + + slobj = [slice(None)]*ndim + N = arr.shape[axis] + newshape = list(arr.shape) + + if isinstance(obj, slice): + start, stop, step = obj.indices(N) + xr = range(start, stop, step) + numtodel = len(xr) + + if numtodel <= 0: + if wrap: + return wrap(arr.copy(order=arrorder)) + else: + return arr.copy(order=arrorder) + + # Invert if step is negative: + if step < 0: + step = -step + start = xr[-1] + stop = xr[0] + 1 + + newshape[axis] -= numtodel + new = empty(newshape, arr.dtype, arrorder) + # copy initial chunk + if start == 0: + pass + else: + slobj[axis] = slice(None, start) + new[tuple(slobj)] = arr[tuple(slobj)] + # copy end chunk + if stop == N: + pass + else: + slobj[axis] = slice(stop-numtodel, None) + slobj2 = [slice(None)]*ndim + slobj2[axis] = slice(stop, None) + new[tuple(slobj)] = arr[tuple(slobj2)] + # copy middle pieces + if step == 1: + pass + else: # use array indexing. + keep = ones(stop-start, dtype=bool) + keep[:stop-start:step] = False + slobj[axis] = slice(start, stop-numtodel) + slobj2 = [slice(None)]*ndim + slobj2[axis] = slice(start, stop) + arr = arr[tuple(slobj2)] + slobj2[axis] = keep + new[tuple(slobj)] = arr[tuple(slobj2)] + if wrap: + return wrap(new) + else: + return new + + if isinstance(obj, (int, integer)) and not isinstance(obj, bool): + # optimization for a single value + if (obj < -N or obj >= N): + raise IndexError( + "index %i is out of bounds for axis %i with " + "size %i" % (obj, axis, N)) + if (obj < 0): + obj += N + newshape[axis] -= 1 + new = empty(newshape, arr.dtype, arrorder) + slobj[axis] = slice(None, obj) + new[tuple(slobj)] = arr[tuple(slobj)] + slobj[axis] = slice(obj, None) + slobj2 = [slice(None)]*ndim + slobj2[axis] = slice(obj+1, None) + new[tuple(slobj)] = arr[tuple(slobj2)] + else: + _obj = obj + obj = np.asarray(obj) + if obj.size == 0 and not isinstance(_obj, np.ndarray): + obj = obj.astype(intp) + + if obj.dtype == bool: + if obj.shape != (N,): + raise ValueError('boolean array argument obj to delete ' + 'must be one dimensional and match the axis ' + 'length of {}'.format(N)) + + # optimization, the other branch is slower + keep = ~obj + else: + keep = ones(N, dtype=bool) + keep[obj,] = False + + slobj[axis] = keep + new = arr[tuple(slobj)] + + if wrap: + return wrap(new) + else: + return new + + +def _insert_dispatcher(arr, obj, values, axis=None): + return (arr, obj, values) + + +@array_function_dispatch(_insert_dispatcher) +def insert(arr, obj, values, axis=None): + """ + Insert values along the given axis before the given indices. + + Parameters + ---------- + arr : array_like + Input array. + obj : int, slice or sequence of ints + Object that defines the index or indices before which `values` is + inserted. + + .. versionadded:: 1.8.0 + + Support for multiple insertions when `obj` is a single scalar or a + sequence with one element (similar to calling insert multiple + times). + values : array_like + Values to insert into `arr`. If the type of `values` is different + from that of `arr`, `values` is converted to the type of `arr`. + `values` should be shaped so that ``arr[...,obj,...] = values`` + is legal. + axis : int, optional + Axis along which to insert `values`. If `axis` is None then `arr` + is flattened first. + + Returns + ------- + out : ndarray + A copy of `arr` with `values` inserted. Note that `insert` + does not occur in-place: a new array is returned. If + `axis` is None, `out` is a flattened array. + + See Also + -------- + append : Append elements at the end of an array. + concatenate : Join a sequence of arrays along an existing axis. + delete : Delete elements from an array. + + Notes + ----- + Note that for higher dimensional inserts `obj=0` behaves very different + from `obj=[0]` just like `arr[:,0,:] = values` is different from + `arr[:,[0],:] = values`. + + Examples + -------- + >>> a = np.array([[1, 1], [2, 2], [3, 3]]) + >>> a + array([[1, 1], + [2, 2], + [3, 3]]) + >>> np.insert(a, 1, 5) + array([1, 5, 1, ..., 2, 3, 3]) + >>> np.insert(a, 1, 5, axis=1) + array([[1, 5, 1], + [2, 5, 2], + [3, 5, 3]]) + + Difference between sequence and scalars: + + >>> np.insert(a, [1], [[1],[2],[3]], axis=1) + array([[1, 1, 1], + [2, 2, 2], + [3, 3, 3]]) + >>> np.array_equal(np.insert(a, 1, [1, 2, 3], axis=1), + ... np.insert(a, [1], [[1],[2],[3]], axis=1)) + True + + >>> b = a.flatten() + >>> b + array([1, 1, 2, 2, 3, 3]) + >>> np.insert(b, [2, 2], [5, 6]) + array([1, 1, 5, ..., 2, 3, 3]) + + >>> np.insert(b, slice(2, 4), [5, 6]) + array([1, 1, 5, ..., 2, 3, 3]) + + >>> np.insert(b, [2, 2], [7.13, False]) # type casting + array([1, 1, 7, ..., 2, 3, 3]) + + >>> x = np.arange(8).reshape(2, 4) + >>> idx = (1, 3) + >>> np.insert(x, idx, 999, axis=1) + array([[ 0, 999, 1, 2, 999, 3], + [ 4, 999, 5, 6, 999, 7]]) + + """ + wrap = None + if type(arr) is not ndarray: + try: + wrap = arr.__array_wrap__ + except AttributeError: + pass + + arr = asarray(arr) + ndim = arr.ndim + arrorder = 'F' if arr.flags.fnc else 'C' + if axis is None: + if ndim != 1: + arr = arr.ravel() + # needed for np.matrix, which is still not 1d after being ravelled + ndim = arr.ndim + axis = ndim - 1 + else: + axis = normalize_axis_index(axis, ndim) + slobj = [slice(None)]*ndim + N = arr.shape[axis] + newshape = list(arr.shape) + + if isinstance(obj, slice): + # turn it into a range object + indices = arange(*obj.indices(N), dtype=intp) + else: + # need to copy obj, because indices will be changed in-place + indices = np.array(obj) + if indices.dtype == bool: + # See also delete + # 2012-10-11, NumPy 1.8 + warnings.warn( + "in the future insert will treat boolean arrays and " + "array-likes as a boolean index instead of casting it to " + "integer", FutureWarning, stacklevel=3) + indices = indices.astype(intp) + # Code after warning period: + #if obj.ndim != 1: + # raise ValueError('boolean array argument obj to insert ' + # 'must be one dimensional') + #indices = np.flatnonzero(obj) + elif indices.ndim > 1: + raise ValueError( + "index array argument obj to insert must be one dimensional " + "or scalar") + if indices.size == 1: + index = indices.item() + if index < -N or index > N: + raise IndexError(f"index {obj} is out of bounds for axis {axis} " + f"with size {N}") + if (index < 0): + index += N + + # There are some object array corner cases here, but we cannot avoid + # that: + values = array(values, copy=False, ndmin=arr.ndim, dtype=arr.dtype) + if indices.ndim == 0: + # broadcasting is very different here, since a[:,0,:] = ... behaves + # very different from a[:,[0],:] = ...! This changes values so that + # it works likes the second case. (here a[:,0:1,:]) + values = np.moveaxis(values, 0, axis) + numnew = values.shape[axis] + newshape[axis] += numnew + new = empty(newshape, arr.dtype, arrorder) + slobj[axis] = slice(None, index) + new[tuple(slobj)] = arr[tuple(slobj)] + slobj[axis] = slice(index, index+numnew) + new[tuple(slobj)] = values + slobj[axis] = slice(index+numnew, None) + slobj2 = [slice(None)] * ndim + slobj2[axis] = slice(index, None) + new[tuple(slobj)] = arr[tuple(slobj2)] + if wrap: + return wrap(new) + return new + elif indices.size == 0 and not isinstance(obj, np.ndarray): + # Can safely cast the empty list to intp + indices = indices.astype(intp) + + indices[indices < 0] += N + + numnew = len(indices) + order = indices.argsort(kind='mergesort') # stable sort + indices[order] += np.arange(numnew) + + newshape[axis] += numnew + old_mask = ones(newshape[axis], dtype=bool) + old_mask[indices] = False + + new = empty(newshape, arr.dtype, arrorder) + slobj2 = [slice(None)]*ndim + slobj[axis] = indices + slobj2[axis] = old_mask + new[tuple(slobj)] = values + new[tuple(slobj2)] = arr + + if wrap: + return wrap(new) + return new + + +def _append_dispatcher(arr, values, axis=None): + return (arr, values) + + +@array_function_dispatch(_append_dispatcher) +def append(arr, values, axis=None): + """ + Append values to the end of an array. + + Parameters + ---------- + arr : array_like + Values are appended to a copy of this array. + values : array_like + These values are appended to a copy of `arr`. It must be of the + correct shape (the same shape as `arr`, excluding `axis`). If + `axis` is not specified, `values` can be any shape and will be + flattened before use. + axis : int, optional + The axis along which `values` are appended. If `axis` is not + given, both `arr` and `values` are flattened before use. + + Returns + ------- + append : ndarray + A copy of `arr` with `values` appended to `axis`. Note that + `append` does not occur in-place: a new array is allocated and + filled. If `axis` is None, `out` is a flattened array. + + See Also + -------- + insert : Insert elements into an array. + delete : Delete elements from an array. + + Examples + -------- + >>> np.append([1, 2, 3], [[4, 5, 6], [7, 8, 9]]) + array([1, 2, 3, ..., 7, 8, 9]) + + When `axis` is specified, `values` must have the correct shape. + + >>> np.append([[1, 2, 3], [4, 5, 6]], [[7, 8, 9]], axis=0) + array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + >>> np.append([[1, 2, 3], [4, 5, 6]], [7, 8, 9], axis=0) + Traceback (most recent call last): + ... + ValueError: all the input arrays must have same number of dimensions, but + the array at index 0 has 2 dimension(s) and the array at index 1 has 1 + dimension(s) + + """ + arr = asanyarray(arr) + if axis is None: + if arr.ndim != 1: + arr = arr.ravel() + values = ravel(values) + axis = arr.ndim-1 + return concatenate((arr, values), axis=axis) + + +def _digitize_dispatcher(x, bins, right=None): + return (x, bins) + + +@array_function_dispatch(_digitize_dispatcher) +def digitize(x, bins, right=False): + """ + Return the indices of the bins to which each value in input array belongs. + + ========= ============= ============================ + `right` order of bins returned index `i` satisfies + ========= ============= ============================ + ``False`` increasing ``bins[i-1] <= x < bins[i]`` + ``True`` increasing ``bins[i-1] < x <= bins[i]`` + ``False`` decreasing ``bins[i-1] > x >= bins[i]`` + ``True`` decreasing ``bins[i-1] >= x > bins[i]`` + ========= ============= ============================ + + If values in `x` are beyond the bounds of `bins`, 0 or ``len(bins)`` is + returned as appropriate. + + Parameters + ---------- + x : array_like + Input array to be binned. Prior to NumPy 1.10.0, this array had to + be 1-dimensional, but can now have any shape. + bins : array_like + Array of bins. It has to be 1-dimensional and monotonic. + right : bool, optional + Indicating whether the intervals include the right or the left bin + edge. Default behavior is (right==False) indicating that the interval + does not include the right edge. The left bin end is open in this + case, i.e., bins[i-1] <= x < bins[i] is the default behavior for + monotonically increasing bins. + + Returns + ------- + indices : ndarray of ints + Output array of indices, of same shape as `x`. + + Raises + ------ + ValueError + If `bins` is not monotonic. + TypeError + If the type of the input is complex. + + See Also + -------- + bincount, histogram, unique, searchsorted + + Notes + ----- + If values in `x` are such that they fall outside the bin range, + attempting to index `bins` with the indices that `digitize` returns + will result in an IndexError. + + .. versionadded:: 1.10.0 + + `np.digitize` is implemented in terms of `np.searchsorted`. This means + that a binary search is used to bin the values, which scales much better + for larger number of bins than the previous linear search. It also removes + the requirement for the input array to be 1-dimensional. + + For monotonically _increasing_ `bins`, the following are equivalent:: + + np.digitize(x, bins, right=True) + np.searchsorted(bins, x, side='left') + + Note that as the order of the arguments are reversed, the side must be too. + The `searchsorted` call is marginally faster, as it does not do any + monotonicity checks. Perhaps more importantly, it supports all dtypes. + + Examples + -------- + >>> x = np.array([0.2, 6.4, 3.0, 1.6]) + >>> bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0]) + >>> inds = np.digitize(x, bins) + >>> inds + array([1, 4, 3, 2]) + >>> for n in range(x.size): + ... print(bins[inds[n]-1], "<=", x[n], "<", bins[inds[n]]) + ... + 0.0 <= 0.2 < 1.0 + 4.0 <= 6.4 < 10.0 + 2.5 <= 3.0 < 4.0 + 1.0 <= 1.6 < 2.5 + + >>> x = np.array([1.2, 10.0, 12.4, 15.5, 20.]) + >>> bins = np.array([0, 5, 10, 15, 20]) + >>> np.digitize(x,bins,right=True) + array([1, 2, 3, 4, 4]) + >>> np.digitize(x,bins,right=False) + array([1, 3, 3, 4, 5]) + """ + x = _nx.asarray(x) + bins = _nx.asarray(bins) + + # here for compatibility, searchsorted below is happy to take this + if np.issubdtype(x.dtype, _nx.complexfloating): + raise TypeError("x may not be complex") + + mono = _monotonicity(bins) + if mono == 0: + raise ValueError("bins must be monotonically increasing or decreasing") + + # this is backwards because the arguments below are swapped + side = 'left' if right else 'right' + if mono == -1: + # reverse the bins, and invert the results + return len(bins) - _nx.searchsorted(bins[::-1], x, side=side) + else: + return _nx.searchsorted(bins, x, side=side) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/function_base.pyi b/.local/lib/python3.8/site-packages/numpy/lib/function_base.pyi new file mode 100644 index 0000000..7e227f9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/function_base.pyi @@ -0,0 +1,704 @@ +import sys +from typing import ( + Literal as L, + List, + Type, + Sequence, + Tuple, + Union, + Any, + TypeVar, + Iterator, + overload, + Callable, + Protocol, + SupportsIndex, + Iterable, + SupportsInt, +) + +if sys.version_info >= (3, 10): + from typing import TypeGuard +else: + from typing_extensions import TypeGuard + +from numpy import ( + vectorize as vectorize, + ufunc, + dtype, + generic, + floating, + complexfloating, + intp, + float64, + complex128, + timedelta64, + datetime64, + object_, + _OrderKACF, +) + +from numpy.typing import ( + NDArray, + ArrayLike, + DTypeLike, + _ShapeLike, + _ScalarLike_co, + _SupportsDType, + _FiniteNestedSequence, + _SupportsArray, + _ArrayLikeInt_co, + _ArrayLikeFloat_co, + _ArrayLikeComplex_co, + _ArrayLikeTD64_co, + _ArrayLikeDT64_co, + _ArrayLikeObject_co, + _FloatLike_co, + _ComplexLike_co, +) + +from numpy.core.function_base import ( + add_newdoc as add_newdoc, +) + +from numpy.core.multiarray import ( + add_docstring as add_docstring, + bincount as bincount, +) + +from numpy.core.umath import _add_newdoc_ufunc + +_T = TypeVar("_T") +_T_co = TypeVar("_T_co", covariant=True) +_SCT = TypeVar("_SCT", bound=generic) +_ArrayType = TypeVar("_ArrayType", bound=NDArray[Any]) + +_2Tuple = Tuple[_T, _T] +_ArrayLike = _FiniteNestedSequence[_SupportsArray[dtype[_SCT]]] +_DTypeLike = Union[ + dtype[_SCT], + Type[_SCT], + _SupportsDType[dtype[_SCT]], +] + +class _TrimZerosSequence(Protocol[_T_co]): + def __len__(self) -> int: ... + def __getitem__(self, key: slice, /) -> _T_co: ... + def __iter__(self) -> Iterator[Any]: ... + +class _SupportsWriteFlush(Protocol): + def write(self, s: str, /) -> object: ... + def flush(self) -> object: ... + +__all__: List[str] + +# NOTE: This is in reality a re-export of `np.core.umath._add_newdoc_ufunc` +def add_newdoc_ufunc(ufunc: ufunc, new_docstring: str, /) -> None: ... + +@overload +def rot90( + m: _ArrayLike[_SCT], + k: int = ..., + axes: Tuple[int, int] = ..., +) -> NDArray[_SCT]: ... +@overload +def rot90( + m: ArrayLike, + k: int = ..., + axes: Tuple[int, int] = ..., +) -> NDArray[Any]: ... + +@overload +def flip(m: _SCT, axis: None = ...) -> _SCT: ... +@overload +def flip(m: _ScalarLike_co, axis: None = ...) -> Any: ... +@overload +def flip(m: _ArrayLike[_SCT], axis: None | _ShapeLike = ...) -> NDArray[_SCT]: ... +@overload +def flip(m: ArrayLike, axis: None | _ShapeLike = ...) -> NDArray[Any]: ... + +def iterable(y: object) -> TypeGuard[Iterable[Any]]: ... + +@overload +def average( + a: _ArrayLikeFloat_co, + axis: None = ..., + weights: None | _ArrayLikeFloat_co= ..., + returned: L[False] = ..., +) -> floating[Any]: ... +@overload +def average( + a: _ArrayLikeComplex_co, + axis: None = ..., + weights: None | _ArrayLikeComplex_co = ..., + returned: L[False] = ..., +) -> complexfloating[Any, Any]: ... +@overload +def average( + a: _ArrayLikeObject_co, + axis: None = ..., + weights: None | Any = ..., + returned: L[False] = ..., +) -> Any: ... +@overload +def average( + a: _ArrayLikeFloat_co, + axis: None = ..., + weights: None | _ArrayLikeFloat_co= ..., + returned: L[True] = ..., +) -> _2Tuple[floating[Any]]: ... +@overload +def average( + a: _ArrayLikeComplex_co, + axis: None = ..., + weights: None | _ArrayLikeComplex_co = ..., + returned: L[True] = ..., +) -> _2Tuple[complexfloating[Any, Any]]: ... +@overload +def average( + a: _ArrayLikeObject_co, + axis: None = ..., + weights: None | Any = ..., + returned: L[True] = ..., +) -> _2Tuple[Any]: ... +@overload +def average( + a: _ArrayLikeComplex_co | _ArrayLikeObject_co, + axis: None | _ShapeLike = ..., + weights: None | Any = ..., + returned: L[False] = ..., +) -> Any: ... +@overload +def average( + a: _ArrayLikeComplex_co | _ArrayLikeObject_co, + axis: None | _ShapeLike = ..., + weights: None | Any = ..., + returned: L[True] = ..., +) -> _2Tuple[Any]: ... + +@overload +def asarray_chkfinite( + a: _ArrayLike[_SCT], + dtype: None = ..., + order: _OrderKACF = ..., +) -> NDArray[_SCT]: ... +@overload +def asarray_chkfinite( + a: object, + dtype: None = ..., + order: _OrderKACF = ..., +) -> NDArray[Any]: ... +@overload +def asarray_chkfinite( + a: Any, + dtype: _DTypeLike[_SCT], + order: _OrderKACF = ..., +) -> NDArray[_SCT]: ... +@overload +def asarray_chkfinite( + a: Any, + dtype: DTypeLike, + order: _OrderKACF = ..., +) -> NDArray[Any]: ... + +@overload +def piecewise( + x: _ArrayLike[_SCT], + condlist: ArrayLike, + funclist: Sequence[Any | Callable[..., Any]], + *args: Any, + **kw: Any, +) -> NDArray[_SCT]: ... +@overload +def piecewise( + x: ArrayLike, + condlist: ArrayLike, + funclist: Sequence[Any | Callable[..., Any]], + *args: Any, + **kw: Any, +) -> NDArray[Any]: ... + +def select( + condlist: Sequence[ArrayLike], + choicelist: Sequence[ArrayLike], + default: ArrayLike = ..., +) -> NDArray[Any]: ... + +@overload +def copy( + a: _ArrayType, + order: _OrderKACF, + subok: L[True], +) -> _ArrayType: ... +@overload +def copy( + a: _ArrayType, + order: _OrderKACF = ..., + *, + subok: L[True], +) -> _ArrayType: ... +@overload +def copy( + a: _ArrayLike[_SCT], + order: _OrderKACF = ..., + subok: L[False] = ..., +) -> NDArray[_SCT]: ... +@overload +def copy( + a: ArrayLike, + order: _OrderKACF = ..., + subok: L[False] = ..., +) -> NDArray[Any]: ... + +def gradient( + f: ArrayLike, + *varargs: ArrayLike, + axis: None | _ShapeLike = ..., + edge_order: L[1, 2] = ..., +) -> Any: ... + +@overload +def diff( + a: _T, + n: L[0], + axis: SupportsIndex = ..., + prepend: ArrayLike = ..., + append: ArrayLike = ..., +) -> _T: ... +@overload +def diff( + a: ArrayLike, + n: int = ..., + axis: SupportsIndex = ..., + prepend: ArrayLike = ..., + append: ArrayLike = ..., +) -> NDArray[Any]: ... + +@overload +def interp( + x: _ArrayLikeFloat_co, + xp: _ArrayLikeFloat_co, + fp: _ArrayLikeFloat_co, + left: None | _FloatLike_co = ..., + right: None | _FloatLike_co = ..., + period: None | _FloatLike_co = ..., +) -> NDArray[float64]: ... +@overload +def interp( + x: _ArrayLikeFloat_co, + xp: _ArrayLikeFloat_co, + fp: _ArrayLikeComplex_co, + left: None | _ComplexLike_co = ..., + right: None | _ComplexLike_co = ..., + period: None | _FloatLike_co = ..., +) -> NDArray[complex128]: ... + +@overload +def angle(z: _ArrayLikeFloat_co, deg: bool = ...) -> floating[Any]: ... +@overload +def angle(z: _ArrayLikeComplex_co, deg: bool = ...) -> complexfloating[Any, Any]: ... +@overload +def angle(z: _ArrayLikeObject_co, deg: bool = ...) -> Any: ... + +@overload +def unwrap( + p: _ArrayLikeFloat_co, + discont: None | float = ..., + axis: int = ..., + *, + period: float = ..., +) -> NDArray[floating[Any]]: ... +@overload +def unwrap( + p: _ArrayLikeObject_co, + discont: None | float = ..., + axis: int = ..., + *, + period: float = ..., +) -> NDArray[object_]: ... + +def sort_complex(a: ArrayLike) -> NDArray[complexfloating[Any, Any]]: ... + +def trim_zeros( + filt: _TrimZerosSequence[_T], + trim: L["f", "b", "fb", "bf"] = ..., +) -> _T: ... + +@overload +def extract(condition: ArrayLike, arr: _ArrayLike[_SCT]) -> NDArray[_SCT]: ... +@overload +def extract(condition: ArrayLike, arr: ArrayLike) -> NDArray[Any]: ... + +def place(arr: NDArray[Any], mask: ArrayLike, vals: Any) -> None: ... + +def disp( + mesg: object, + device: None | _SupportsWriteFlush = ..., + linefeed: bool = ..., +) -> None: ... + +@overload +def cov( + m: _ArrayLikeFloat_co, + y: None | _ArrayLikeFloat_co = ..., + rowvar: bool = ..., + bias: bool = ..., + ddof: None | SupportsIndex | SupportsInt = ..., + fweights: None | ArrayLike = ..., + aweights: None | ArrayLike = ..., + *, + dtype: None = ..., +) -> NDArray[floating[Any]]: ... +@overload +def cov( + m: _ArrayLikeComplex_co, + y: None | _ArrayLikeComplex_co = ..., + rowvar: bool = ..., + bias: bool = ..., + ddof: None | SupportsIndex | SupportsInt = ..., + fweights: None | ArrayLike = ..., + aweights: None | ArrayLike = ..., + *, + dtype: None = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def cov( + m: _ArrayLikeComplex_co, + y: None | _ArrayLikeComplex_co = ..., + rowvar: bool = ..., + bias: bool = ..., + ddof: None | SupportsIndex | SupportsInt = ..., + fweights: None | ArrayLike = ..., + aweights: None | ArrayLike = ..., + *, + dtype: _DTypeLike[_SCT], +) -> NDArray[_SCT]: ... +@overload +def cov( + m: _ArrayLikeComplex_co, + y: None | _ArrayLikeComplex_co = ..., + rowvar: bool = ..., + bias: bool = ..., + ddof: None | SupportsIndex | SupportsInt = ..., + fweights: None | ArrayLike = ..., + aweights: None | ArrayLike = ..., + *, + dtype: DTypeLike, +) -> NDArray[Any]: ... + +# NOTE `bias` and `ddof` have been deprecated +@overload +def corrcoef( + m: _ArrayLikeFloat_co, + y: None | _ArrayLikeFloat_co = ..., + rowvar: bool = ..., + *, + dtype: None = ..., +) -> NDArray[floating[Any]]: ... +@overload +def corrcoef( + m: _ArrayLikeComplex_co, + y: None | _ArrayLikeComplex_co = ..., + rowvar: bool = ..., + *, + dtype: None = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def corrcoef( + m: _ArrayLikeComplex_co, + y: None | _ArrayLikeComplex_co = ..., + rowvar: bool = ..., + *, + dtype: _DTypeLike[_SCT], +) -> NDArray[_SCT]: ... +@overload +def corrcoef( + m: _ArrayLikeComplex_co, + y: None | _ArrayLikeComplex_co = ..., + rowvar: bool = ..., + *, + dtype: DTypeLike, +) -> NDArray[Any]: ... + +def blackman(M: _FloatLike_co) -> NDArray[floating[Any]]: ... + +def bartlett(M: _FloatLike_co) -> NDArray[floating[Any]]: ... + +def hanning(M: _FloatLike_co) -> NDArray[floating[Any]]: ... + +def hamming(M: _FloatLike_co) -> NDArray[floating[Any]]: ... + +def i0(x: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... + +def kaiser( + M: _FloatLike_co, + beta: _FloatLike_co, +) -> NDArray[floating[Any]]: ... + +@overload +def sinc(x: _FloatLike_co) -> floating[Any]: ... +@overload +def sinc(x: _ComplexLike_co) -> complexfloating[Any, Any]: ... +@overload +def sinc(x: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... +@overload +def sinc(x: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... + +@overload +def msort(a: _ArrayType) -> _ArrayType: ... +@overload +def msort(a: _ArrayLike[_SCT]) -> NDArray[_SCT]: ... +@overload +def msort(a: ArrayLike) -> NDArray[Any]: ... + +@overload +def median( + a: _ArrayLikeFloat_co, + axis: None = ..., + out: None = ..., + overwrite_input: bool = ..., + keepdims: L[False] = ..., +) -> floating[Any]: ... +@overload +def median( + a: _ArrayLikeComplex_co, + axis: None = ..., + out: None = ..., + overwrite_input: bool = ..., + keepdims: L[False] = ..., +) -> complexfloating[Any, Any]: ... +@overload +def median( + a: _ArrayLikeTD64_co, + axis: None = ..., + out: None = ..., + overwrite_input: bool = ..., + keepdims: L[False] = ..., +) -> timedelta64: ... +@overload +def median( + a: _ArrayLikeObject_co, + axis: None = ..., + out: None = ..., + overwrite_input: bool = ..., + keepdims: L[False] = ..., +) -> Any: ... +@overload +def median( + a: _ArrayLikeFloat_co | _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeObject_co, + axis: None | _ShapeLike = ..., + out: None = ..., + overwrite_input: bool = ..., + keepdims: bool = ..., +) -> Any: ... +@overload +def median( + a: _ArrayLikeFloat_co | _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeObject_co, + axis: None | _ShapeLike = ..., + out: _ArrayType = ..., + overwrite_input: bool = ..., + keepdims: bool = ..., +) -> _ArrayType: ... + +_MethodKind = L[ + "inverted_cdf", + "averaged_inverted_cdf", + "closest_observation", + "interpolated_inverted_cdf", + "hazen", + "weibull", + "linear", + "median_unbiased", + "normal_unbiased", + "lower", + "higher", + "midpoint", + "nearest", +] + +@overload +def percentile( + a: _ArrayLikeFloat_co, + q: _FloatLike_co, + axis: None = ..., + out: None = ..., + overwrite_input: bool = ..., + method: _MethodKind = ..., + keepdims: L[False] = ..., +) -> floating[Any]: ... +@overload +def percentile( + a: _ArrayLikeComplex_co, + q: _FloatLike_co, + axis: None = ..., + out: None = ..., + overwrite_input: bool = ..., + method: _MethodKind = ..., + keepdims: L[False] = ..., +) -> complexfloating[Any, Any]: ... +@overload +def percentile( + a: _ArrayLikeTD64_co, + q: _FloatLike_co, + axis: None = ..., + out: None = ..., + overwrite_input: bool = ..., + method: _MethodKind = ..., + keepdims: L[False] = ..., +) -> timedelta64: ... +@overload +def percentile( + a: _ArrayLikeDT64_co, + q: _FloatLike_co, + axis: None = ..., + out: None = ..., + overwrite_input: bool = ..., + method: _MethodKind = ..., + keepdims: L[False] = ..., +) -> datetime64: ... +@overload +def percentile( + a: _ArrayLikeObject_co, + q: _FloatLike_co, + axis: None = ..., + out: None = ..., + overwrite_input: bool = ..., + method: _MethodKind = ..., + keepdims: L[False] = ..., +) -> Any: ... +@overload +def percentile( + a: _ArrayLikeFloat_co, + q: _ArrayLikeFloat_co, + axis: None = ..., + out: None = ..., + overwrite_input: bool = ..., + method: _MethodKind = ..., + keepdims: L[False] = ..., +) -> NDArray[floating[Any]]: ... +@overload +def percentile( + a: _ArrayLikeComplex_co, + q: _ArrayLikeFloat_co, + axis: None = ..., + out: None = ..., + overwrite_input: bool = ..., + method: _MethodKind = ..., + keepdims: L[False] = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def percentile( + a: _ArrayLikeTD64_co, + q: _ArrayLikeFloat_co, + axis: None = ..., + out: None = ..., + overwrite_input: bool = ..., + method: _MethodKind = ..., + keepdims: L[False] = ..., +) -> NDArray[timedelta64]: ... +@overload +def percentile( + a: _ArrayLikeDT64_co, + q: _ArrayLikeFloat_co, + axis: None = ..., + out: None = ..., + overwrite_input: bool = ..., + method: _MethodKind = ..., + keepdims: L[False] = ..., +) -> NDArray[datetime64]: ... +@overload +def percentile( + a: _ArrayLikeObject_co, + q: _ArrayLikeFloat_co, + axis: None = ..., + out: None = ..., + overwrite_input: bool = ..., + method: _MethodKind = ..., + keepdims: L[False] = ..., +) -> NDArray[object_]: ... +@overload +def percentile( + a: _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeTD64_co | _ArrayLikeObject_co, + q: _ArrayLikeFloat_co, + axis: None | _ShapeLike = ..., + out: None = ..., + overwrite_input: bool = ..., + method: _MethodKind = ..., + keepdims: bool = ..., +) -> Any: ... +@overload +def percentile( + a: _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeTD64_co | _ArrayLikeObject_co, + q: _ArrayLikeFloat_co, + axis: None | _ShapeLike = ..., + out: _ArrayType = ..., + overwrite_input: bool = ..., + method: _MethodKind = ..., + keepdims: bool = ..., +) -> _ArrayType: ... + +# NOTE: Not an alias, but they do have identical signatures +# (that we can reuse) +quantile = percentile + +# TODO: Returns a scalar for <= 1D array-likes; returns an ndarray otherwise +def trapz( + y: _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeObject_co, + x: None | _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeObject_co = ..., + dx: float = ..., + axis: SupportsIndex = ..., +) -> Any: ... + +def meshgrid( + *xi: ArrayLike, + copy: bool = ..., + sparse: bool = ..., + indexing: L["xy", "ij"] = ..., +) -> List[NDArray[Any]]: ... + +@overload +def delete( + arr: _ArrayLike[_SCT], + obj: slice | _ArrayLikeInt_co, + axis: None | SupportsIndex = ..., +) -> NDArray[_SCT]: ... +@overload +def delete( + arr: ArrayLike, + obj: slice | _ArrayLikeInt_co, + axis: None | SupportsIndex = ..., +) -> NDArray[Any]: ... + +@overload +def insert( + arr: _ArrayLike[_SCT], + obj: slice | _ArrayLikeInt_co, + values: ArrayLike, + axis: None | SupportsIndex = ..., +) -> NDArray[_SCT]: ... +@overload +def insert( + arr: ArrayLike, + obj: slice | _ArrayLikeInt_co, + values: ArrayLike, + axis: None | SupportsIndex = ..., +) -> NDArray[Any]: ... + +def append( + arr: ArrayLike, + values: ArrayLike, + axis: None | SupportsIndex = ..., +) -> NDArray[Any]: ... + +@overload +def digitize( + x: _FloatLike_co, + bins: _ArrayLikeFloat_co, + right: bool = ..., +) -> intp: ... +@overload +def digitize( + x: _ArrayLikeFloat_co, + bins: _ArrayLikeFloat_co, + right: bool = ..., +) -> NDArray[intp]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/histograms.py b/.local/lib/python3.8/site-packages/numpy/lib/histograms.py new file mode 100644 index 0000000..b6909bc --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/histograms.py @@ -0,0 +1,1129 @@ +""" +Histogram-related functions +""" +import contextlib +import functools +import operator +import warnings + +import numpy as np +from numpy.core import overrides + +__all__ = ['histogram', 'histogramdd', 'histogram_bin_edges'] + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + +# range is a keyword argument to many functions, so save the builtin so they can +# use it. +_range = range + + +def _ptp(x): + """Peak-to-peak value of x. + + This implementation avoids the problem of signed integer arrays having a + peak-to-peak value that cannot be represented with the array's data type. + This function returns an unsigned value for signed integer arrays. + """ + return _unsigned_subtract(x.max(), x.min()) + + +def _hist_bin_sqrt(x, range): + """ + Square root histogram bin estimator. + + Bin width is inversely proportional to the data size. Used by many + programs for its simplicity. + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + del range # unused + return _ptp(x) / np.sqrt(x.size) + + +def _hist_bin_sturges(x, range): + """ + Sturges histogram bin estimator. + + A very simplistic estimator based on the assumption of normality of + the data. This estimator has poor performance for non-normal data, + which becomes especially obvious for large data sets. The estimate + depends only on size of the data. + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + del range # unused + return _ptp(x) / (np.log2(x.size) + 1.0) + + +def _hist_bin_rice(x, range): + """ + Rice histogram bin estimator. + + Another simple estimator with no normality assumption. It has better + performance for large data than Sturges, but tends to overestimate + the number of bins. The number of bins is proportional to the cube + root of data size (asymptotically optimal). The estimate depends + only on size of the data. + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + del range # unused + return _ptp(x) / (2.0 * x.size ** (1.0 / 3)) + + +def _hist_bin_scott(x, range): + """ + Scott histogram bin estimator. + + The binwidth is proportional to the standard deviation of the data + and inversely proportional to the cube root of data size + (asymptotically optimal). + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + del range # unused + return (24.0 * np.pi**0.5 / x.size)**(1.0 / 3.0) * np.std(x) + + +def _hist_bin_stone(x, range): + """ + Histogram bin estimator based on minimizing the estimated integrated squared error (ISE). + + The number of bins is chosen by minimizing the estimated ISE against the unknown true distribution. + The ISE is estimated using cross-validation and can be regarded as a generalization of Scott's rule. + https://en.wikipedia.org/wiki/Histogram#Scott.27s_normal_reference_rule + + This paper by Stone appears to be the origination of this rule. + http://digitalassets.lib.berkeley.edu/sdtr/ucb/text/34.pdf + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + range : (float, float) + The lower and upper range of the bins. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + + n = x.size + ptp_x = _ptp(x) + if n <= 1 or ptp_x == 0: + return 0 + + def jhat(nbins): + hh = ptp_x / nbins + p_k = np.histogram(x, bins=nbins, range=range)[0] / n + return (2 - (n + 1) * p_k.dot(p_k)) / hh + + nbins_upper_bound = max(100, int(np.sqrt(n))) + nbins = min(_range(1, nbins_upper_bound + 1), key=jhat) + if nbins == nbins_upper_bound: + warnings.warn("The number of bins estimated may be suboptimal.", + RuntimeWarning, stacklevel=3) + return ptp_x / nbins + + +def _hist_bin_doane(x, range): + """ + Doane's histogram bin estimator. + + Improved version of Sturges' formula which works better for + non-normal data. See + stats.stackexchange.com/questions/55134/doanes-formula-for-histogram-binning + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + del range # unused + if x.size > 2: + sg1 = np.sqrt(6.0 * (x.size - 2) / ((x.size + 1.0) * (x.size + 3))) + sigma = np.std(x) + if sigma > 0.0: + # These three operations add up to + # g1 = np.mean(((x - np.mean(x)) / sigma)**3) + # but use only one temp array instead of three + temp = x - np.mean(x) + np.true_divide(temp, sigma, temp) + np.power(temp, 3, temp) + g1 = np.mean(temp) + return _ptp(x) / (1.0 + np.log2(x.size) + + np.log2(1.0 + np.absolute(g1) / sg1)) + return 0.0 + + +def _hist_bin_fd(x, range): + """ + The Freedman-Diaconis histogram bin estimator. + + The Freedman-Diaconis rule uses interquartile range (IQR) to + estimate binwidth. It is considered a variation of the Scott rule + with more robustness as the IQR is less affected by outliers than + the standard deviation. However, the IQR depends on fewer points + than the standard deviation, so it is less accurate, especially for + long tailed distributions. + + If the IQR is 0, this function returns 0 for the bin width. + Binwidth is inversely proportional to the cube root of data size + (asymptotically optimal). + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + del range # unused + iqr = np.subtract(*np.percentile(x, [75, 25])) + return 2.0 * iqr * x.size ** (-1.0 / 3.0) + + +def _hist_bin_auto(x, range): + """ + Histogram bin estimator that uses the minimum width of the + Freedman-Diaconis and Sturges estimators if the FD bin width is non-zero. + If the bin width from the FD estimator is 0, the Sturges estimator is used. + + The FD estimator is usually the most robust method, but its width + estimate tends to be too large for small `x` and bad for data with limited + variance. The Sturges estimator is quite good for small (<1000) datasets + and is the default in the R language. This method gives good off-the-shelf + behaviour. + + .. versionchanged:: 1.15.0 + If there is limited variance the IQR can be 0, which results in the + FD bin width being 0 too. This is not a valid bin width, so + ``np.histogram_bin_edges`` chooses 1 bin instead, which may not be optimal. + If the IQR is 0, it's unlikely any variance-based estimators will be of + use, so we revert to the Sturges estimator, which only uses the size of the + dataset in its calculation. + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + + See Also + -------- + _hist_bin_fd, _hist_bin_sturges + """ + fd_bw = _hist_bin_fd(x, range) + sturges_bw = _hist_bin_sturges(x, range) + del range # unused + if fd_bw: + return min(fd_bw, sturges_bw) + else: + # limited variance, so we return a len dependent bw estimator + return sturges_bw + +# Private dict initialized at module load time +_hist_bin_selectors = {'stone': _hist_bin_stone, + 'auto': _hist_bin_auto, + 'doane': _hist_bin_doane, + 'fd': _hist_bin_fd, + 'rice': _hist_bin_rice, + 'scott': _hist_bin_scott, + 'sqrt': _hist_bin_sqrt, + 'sturges': _hist_bin_sturges} + + +def _ravel_and_check_weights(a, weights): + """ Check a and weights have matching shapes, and ravel both """ + a = np.asarray(a) + + # Ensure that the array is a "subtractable" dtype + if a.dtype == np.bool_: + warnings.warn("Converting input from {} to {} for compatibility." + .format(a.dtype, np.uint8), + RuntimeWarning, stacklevel=3) + a = a.astype(np.uint8) + + if weights is not None: + weights = np.asarray(weights) + if weights.shape != a.shape: + raise ValueError( + 'weights should have the same shape as a.') + weights = weights.ravel() + a = a.ravel() + return a, weights + + +def _get_outer_edges(a, range): + """ + Determine the outer bin edges to use, from either the data or the range + argument + """ + if range is not None: + first_edge, last_edge = range + if first_edge > last_edge: + raise ValueError( + 'max must be larger than min in range parameter.') + if not (np.isfinite(first_edge) and np.isfinite(last_edge)): + raise ValueError( + "supplied range of [{}, {}] is not finite".format(first_edge, last_edge)) + elif a.size == 0: + # handle empty arrays. Can't determine range, so use 0-1. + first_edge, last_edge = 0, 1 + else: + first_edge, last_edge = a.min(), a.max() + if not (np.isfinite(first_edge) and np.isfinite(last_edge)): + raise ValueError( + "autodetected range of [{}, {}] is not finite".format(first_edge, last_edge)) + + # expand empty range to avoid divide by zero + if first_edge == last_edge: + first_edge = first_edge - 0.5 + last_edge = last_edge + 0.5 + + return first_edge, last_edge + + +def _unsigned_subtract(a, b): + """ + Subtract two values where a >= b, and produce an unsigned result + + This is needed when finding the difference between the upper and lower + bound of an int16 histogram + """ + # coerce to a single type + signed_to_unsigned = { + np.byte: np.ubyte, + np.short: np.ushort, + np.intc: np.uintc, + np.int_: np.uint, + np.longlong: np.ulonglong + } + dt = np.result_type(a, b) + try: + dt = signed_to_unsigned[dt.type] + except KeyError: + return np.subtract(a, b, dtype=dt) + else: + # we know the inputs are integers, and we are deliberately casting + # signed to unsigned + return np.subtract(a, b, casting='unsafe', dtype=dt) + + +def _get_bin_edges(a, bins, range, weights): + """ + Computes the bins used internally by `histogram`. + + Parameters + ========== + a : ndarray + Ravelled data array + bins, range + Forwarded arguments from `histogram`. + weights : ndarray, optional + Ravelled weights array, or None + + Returns + ======= + bin_edges : ndarray + Array of bin edges + uniform_bins : (Number, Number, int): + The upper bound, lowerbound, and number of bins, used in the optimized + implementation of `histogram` that works on uniform bins. + """ + # parse the overloaded bins argument + n_equal_bins = None + bin_edges = None + + if isinstance(bins, str): + bin_name = bins + # if `bins` is a string for an automatic method, + # this will replace it with the number of bins calculated + if bin_name not in _hist_bin_selectors: + raise ValueError( + "{!r} is not a valid estimator for `bins`".format(bin_name)) + if weights is not None: + raise TypeError("Automated estimation of the number of " + "bins is not supported for weighted data") + + first_edge, last_edge = _get_outer_edges(a, range) + + # truncate the range if needed + if range is not None: + keep = (a >= first_edge) + keep &= (a <= last_edge) + if not np.logical_and.reduce(keep): + a = a[keep] + + if a.size == 0: + n_equal_bins = 1 + else: + # Do not call selectors on empty arrays + width = _hist_bin_selectors[bin_name](a, (first_edge, last_edge)) + if width: + n_equal_bins = int(np.ceil(_unsigned_subtract(last_edge, first_edge) / width)) + else: + # Width can be zero for some estimators, e.g. FD when + # the IQR of the data is zero. + n_equal_bins = 1 + + elif np.ndim(bins) == 0: + try: + n_equal_bins = operator.index(bins) + except TypeError as e: + raise TypeError( + '`bins` must be an integer, a string, or an array') from e + if n_equal_bins < 1: + raise ValueError('`bins` must be positive, when an integer') + + first_edge, last_edge = _get_outer_edges(a, range) + + elif np.ndim(bins) == 1: + bin_edges = np.asarray(bins) + if np.any(bin_edges[:-1] > bin_edges[1:]): + raise ValueError( + '`bins` must increase monotonically, when an array') + + else: + raise ValueError('`bins` must be 1d, when an array') + + if n_equal_bins is not None: + # gh-10322 means that type resolution rules are dependent on array + # shapes. To avoid this causing problems, we pick a type now and stick + # with it throughout. + bin_type = np.result_type(first_edge, last_edge, a) + if np.issubdtype(bin_type, np.integer): + bin_type = np.result_type(bin_type, float) + + # bin edges must be computed + bin_edges = np.linspace( + first_edge, last_edge, n_equal_bins + 1, + endpoint=True, dtype=bin_type) + return bin_edges, (first_edge, last_edge, n_equal_bins) + else: + return bin_edges, None + + +def _search_sorted_inclusive(a, v): + """ + Like `searchsorted`, but where the last item in `v` is placed on the right. + + In the context of a histogram, this makes the last bin edge inclusive + """ + return np.concatenate(( + a.searchsorted(v[:-1], 'left'), + a.searchsorted(v[-1:], 'right') + )) + + +def _histogram_bin_edges_dispatcher(a, bins=None, range=None, weights=None): + return (a, bins, weights) + + +@array_function_dispatch(_histogram_bin_edges_dispatcher) +def histogram_bin_edges(a, bins=10, range=None, weights=None): + r""" + Function to calculate only the edges of the bins used by the `histogram` + function. + + Parameters + ---------- + a : array_like + Input data. The histogram is computed over the flattened array. + bins : int or sequence of scalars or str, optional + If `bins` is an int, it defines the number of equal-width + bins in the given range (10, by default). If `bins` is a + sequence, it defines the bin edges, including the rightmost + edge, allowing for non-uniform bin widths. + + If `bins` is a string from the list below, `histogram_bin_edges` will use + the method chosen to calculate the optimal bin width and + consequently the number of bins (see `Notes` for more detail on + the estimators) from the data that falls within the requested + range. While the bin width will be optimal for the actual data + in the range, the number of bins will be computed to fill the + entire range, including the empty portions. For visualisation, + using the 'auto' option is suggested. Weighted data is not + supported for automated bin size selection. + + 'auto' + Maximum of the 'sturges' and 'fd' estimators. Provides good + all around performance. + + 'fd' (Freedman Diaconis Estimator) + Robust (resilient to outliers) estimator that takes into + account data variability and data size. + + 'doane' + An improved version of Sturges' estimator that works better + with non-normal datasets. + + 'scott' + Less robust estimator that that takes into account data + variability and data size. + + 'stone' + Estimator based on leave-one-out cross-validation estimate of + the integrated squared error. Can be regarded as a generalization + of Scott's rule. + + 'rice' + Estimator does not take variability into account, only data + size. Commonly overestimates number of bins required. + + 'sturges' + R's default method, only accounts for data size. Only + optimal for gaussian data and underestimates number of bins + for large non-gaussian datasets. + + 'sqrt' + Square root (of data size) estimator, used by Excel and + other programs for its speed and simplicity. + + range : (float, float), optional + The lower and upper range of the bins. If not provided, range + is simply ``(a.min(), a.max())``. Values outside the range are + ignored. The first element of the range must be less than or + equal to the second. `range` affects the automatic bin + computation as well. While bin width is computed to be optimal + based on the actual data within `range`, the bin count will fill + the entire range including portions containing no data. + + weights : array_like, optional + An array of weights, of the same shape as `a`. Each value in + `a` only contributes its associated weight towards the bin count + (instead of 1). This is currently not used by any of the bin estimators, + but may be in the future. + + Returns + ------- + bin_edges : array of dtype float + The edges to pass into `histogram` + + See Also + -------- + histogram + + Notes + ----- + The methods to estimate the optimal number of bins are well founded + in literature, and are inspired by the choices R provides for + histogram visualisation. Note that having the number of bins + proportional to :math:`n^{1/3}` is asymptotically optimal, which is + why it appears in most estimators. These are simply plug-in methods + that give good starting points for number of bins. In the equations + below, :math:`h` is the binwidth and :math:`n_h` is the number of + bins. All estimators that compute bin counts are recast to bin width + using the `ptp` of the data. The final bin count is obtained from + ``np.round(np.ceil(range / h))``. The final bin width is often less + than what is returned by the estimators below. + + 'auto' (maximum of the 'sturges' and 'fd' estimators) + A compromise to get a good value. For small datasets the Sturges + value will usually be chosen, while larger datasets will usually + default to FD. Avoids the overly conservative behaviour of FD + and Sturges for small and large datasets respectively. + Switchover point is usually :math:`a.size \approx 1000`. + + 'fd' (Freedman Diaconis Estimator) + .. math:: h = 2 \frac{IQR}{n^{1/3}} + + The binwidth is proportional to the interquartile range (IQR) + and inversely proportional to cube root of a.size. Can be too + conservative for small datasets, but is quite good for large + datasets. The IQR is very robust to outliers. + + 'scott' + .. math:: h = \sigma \sqrt[3]{\frac{24 * \sqrt{\pi}}{n}} + + The binwidth is proportional to the standard deviation of the + data and inversely proportional to cube root of ``x.size``. Can + be too conservative for small datasets, but is quite good for + large datasets. The standard deviation is not very robust to + outliers. Values are very similar to the Freedman-Diaconis + estimator in the absence of outliers. + + 'rice' + .. math:: n_h = 2n^{1/3} + + The number of bins is only proportional to cube root of + ``a.size``. It tends to overestimate the number of bins and it + does not take into account data variability. + + 'sturges' + .. math:: n_h = \log _{2}n+1 + + The number of bins is the base 2 log of ``a.size``. This + estimator assumes normality of data and is too conservative for + larger, non-normal datasets. This is the default method in R's + ``hist`` method. + + 'doane' + .. math:: n_h = 1 + \log_{2}(n) + + \log_{2}(1 + \frac{|g_1|}{\sigma_{g_1}}) + + g_1 = mean[(\frac{x - \mu}{\sigma})^3] + + \sigma_{g_1} = \sqrt{\frac{6(n - 2)}{(n + 1)(n + 3)}} + + An improved version of Sturges' formula that produces better + estimates for non-normal datasets. This estimator attempts to + account for the skew of the data. + + 'sqrt' + .. math:: n_h = \sqrt n + + The simplest and fastest estimator. Only takes into account the + data size. + + Examples + -------- + >>> arr = np.array([0, 0, 0, 1, 2, 3, 3, 4, 5]) + >>> np.histogram_bin_edges(arr, bins='auto', range=(0, 1)) + array([0. , 0.25, 0.5 , 0.75, 1. ]) + >>> np.histogram_bin_edges(arr, bins=2) + array([0. , 2.5, 5. ]) + + For consistency with histogram, an array of pre-computed bins is + passed through unmodified: + + >>> np.histogram_bin_edges(arr, [1, 2]) + array([1, 2]) + + This function allows one set of bins to be computed, and reused across + multiple histograms: + + >>> shared_bins = np.histogram_bin_edges(arr, bins='auto') + >>> shared_bins + array([0., 1., 2., 3., 4., 5.]) + + >>> group_id = np.array([0, 1, 1, 0, 1, 1, 0, 1, 1]) + >>> hist_0, _ = np.histogram(arr[group_id == 0], bins=shared_bins) + >>> hist_1, _ = np.histogram(arr[group_id == 1], bins=shared_bins) + + >>> hist_0; hist_1 + array([1, 1, 0, 1, 0]) + array([2, 0, 1, 1, 2]) + + Which gives more easily comparable results than using separate bins for + each histogram: + + >>> hist_0, bins_0 = np.histogram(arr[group_id == 0], bins='auto') + >>> hist_1, bins_1 = np.histogram(arr[group_id == 1], bins='auto') + >>> hist_0; hist_1 + array([1, 1, 1]) + array([2, 1, 1, 2]) + >>> bins_0; bins_1 + array([0., 1., 2., 3.]) + array([0. , 1.25, 2.5 , 3.75, 5. ]) + + """ + a, weights = _ravel_and_check_weights(a, weights) + bin_edges, _ = _get_bin_edges(a, bins, range, weights) + return bin_edges + + +def _histogram_dispatcher( + a, bins=None, range=None, normed=None, weights=None, density=None): + return (a, bins, weights) + + +@array_function_dispatch(_histogram_dispatcher) +def histogram(a, bins=10, range=None, normed=None, weights=None, + density=None): + r""" + Compute the histogram of a dataset. + + Parameters + ---------- + a : array_like + Input data. The histogram is computed over the flattened array. + bins : int or sequence of scalars or str, optional + If `bins` is an int, it defines the number of equal-width + bins in the given range (10, by default). If `bins` is a + sequence, it defines a monotonically increasing array of bin edges, + including the rightmost edge, allowing for non-uniform bin widths. + + .. versionadded:: 1.11.0 + + If `bins` is a string, it defines the method used to calculate the + optimal bin width, as defined by `histogram_bin_edges`. + + range : (float, float), optional + The lower and upper range of the bins. If not provided, range + is simply ``(a.min(), a.max())``. Values outside the range are + ignored. The first element of the range must be less than or + equal to the second. `range` affects the automatic bin + computation as well. While bin width is computed to be optimal + based on the actual data within `range`, the bin count will fill + the entire range including portions containing no data. + normed : bool, optional + + .. deprecated:: 1.6.0 + + This is equivalent to the `density` argument, but produces incorrect + results for unequal bin widths. It should not be used. + + .. versionchanged:: 1.15.0 + DeprecationWarnings are actually emitted. + + weights : array_like, optional + An array of weights, of the same shape as `a`. Each value in + `a` only contributes its associated weight towards the bin count + (instead of 1). If `density` is True, the weights are + normalized, so that the integral of the density over the range + remains 1. + density : bool, optional + If ``False``, the result will contain the number of samples in + each bin. If ``True``, the result is the value of the + probability *density* function at the bin, normalized such that + the *integral* over the range is 1. Note that the sum of the + histogram values will not be equal to 1 unless bins of unity + width are chosen; it is not a probability *mass* function. + + Overrides the ``normed`` keyword if given. + + Returns + ------- + hist : array + The values of the histogram. See `density` and `weights` for a + description of the possible semantics. + bin_edges : array of dtype float + Return the bin edges ``(length(hist)+1)``. + + + See Also + -------- + histogramdd, bincount, searchsorted, digitize, histogram_bin_edges + + Notes + ----- + All but the last (righthand-most) bin is half-open. In other words, + if `bins` is:: + + [1, 2, 3, 4] + + then the first bin is ``[1, 2)`` (including 1, but excluding 2) and + the second ``[2, 3)``. The last bin, however, is ``[3, 4]``, which + *includes* 4. + + + Examples + -------- + >>> np.histogram([1, 2, 1], bins=[0, 1, 2, 3]) + (array([0, 2, 1]), array([0, 1, 2, 3])) + >>> np.histogram(np.arange(4), bins=np.arange(5), density=True) + (array([0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4])) + >>> np.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3]) + (array([1, 4, 1]), array([0, 1, 2, 3])) + + >>> a = np.arange(5) + >>> hist, bin_edges = np.histogram(a, density=True) + >>> hist + array([0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5]) + >>> hist.sum() + 2.4999999999999996 + >>> np.sum(hist * np.diff(bin_edges)) + 1.0 + + .. versionadded:: 1.11.0 + + Automated Bin Selection Methods example, using 2 peak random data + with 2000 points: + + >>> import matplotlib.pyplot as plt + >>> rng = np.random.RandomState(10) # deterministic random data + >>> a = np.hstack((rng.normal(size=1000), + ... rng.normal(loc=5, scale=2, size=1000))) + >>> _ = plt.hist(a, bins='auto') # arguments are passed to np.histogram + >>> plt.title("Histogram with 'auto' bins") + Text(0.5, 1.0, "Histogram with 'auto' bins") + >>> plt.show() + + """ + a, weights = _ravel_and_check_weights(a, weights) + + bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights) + + # Histogram is an integer or a float array depending on the weights. + if weights is None: + ntype = np.dtype(np.intp) + else: + ntype = weights.dtype + + # We set a block size, as this allows us to iterate over chunks when + # computing histograms, to minimize memory usage. + BLOCK = 65536 + + # The fast path uses bincount, but that only works for certain types + # of weight + simple_weights = ( + weights is None or + np.can_cast(weights.dtype, np.double) or + np.can_cast(weights.dtype, complex) + ) + + if uniform_bins is not None and simple_weights: + # Fast algorithm for equal bins + # We now convert values of a to bin indices, under the assumption of + # equal bin widths (which is valid here). + first_edge, last_edge, n_equal_bins = uniform_bins + + # Initialize empty histogram + n = np.zeros(n_equal_bins, ntype) + + # Pre-compute histogram scaling factor + norm = n_equal_bins / _unsigned_subtract(last_edge, first_edge) + + # We iterate over blocks here for two reasons: the first is that for + # large arrays, it is actually faster (for example for a 10^8 array it + # is 2x as fast) and it results in a memory footprint 3x lower in the + # limit of large arrays. + for i in _range(0, len(a), BLOCK): + tmp_a = a[i:i+BLOCK] + if weights is None: + tmp_w = None + else: + tmp_w = weights[i:i + BLOCK] + + # Only include values in the right range + keep = (tmp_a >= first_edge) + keep &= (tmp_a <= last_edge) + if not np.logical_and.reduce(keep): + tmp_a = tmp_a[keep] + if tmp_w is not None: + tmp_w = tmp_w[keep] + + # This cast ensures no type promotions occur below, which gh-10322 + # make unpredictable. Getting it wrong leads to precision errors + # like gh-8123. + tmp_a = tmp_a.astype(bin_edges.dtype, copy=False) + + # Compute the bin indices, and for values that lie exactly on + # last_edge we need to subtract one + f_indices = _unsigned_subtract(tmp_a, first_edge) * norm + indices = f_indices.astype(np.intp) + indices[indices == n_equal_bins] -= 1 + + # The index computation is not guaranteed to give exactly + # consistent results within ~1 ULP of the bin edges. + decrement = tmp_a < bin_edges[indices] + indices[decrement] -= 1 + # The last bin includes the right edge. The other bins do not. + increment = ((tmp_a >= bin_edges[indices + 1]) + & (indices != n_equal_bins - 1)) + indices[increment] += 1 + + # We now compute the histogram using bincount + if ntype.kind == 'c': + n.real += np.bincount(indices, weights=tmp_w.real, + minlength=n_equal_bins) + n.imag += np.bincount(indices, weights=tmp_w.imag, + minlength=n_equal_bins) + else: + n += np.bincount(indices, weights=tmp_w, + minlength=n_equal_bins).astype(ntype) + else: + # Compute via cumulative histogram + cum_n = np.zeros(bin_edges.shape, ntype) + if weights is None: + for i in _range(0, len(a), BLOCK): + sa = np.sort(a[i:i+BLOCK]) + cum_n += _search_sorted_inclusive(sa, bin_edges) + else: + zero = np.zeros(1, dtype=ntype) + for i in _range(0, len(a), BLOCK): + tmp_a = a[i:i+BLOCK] + tmp_w = weights[i:i+BLOCK] + sorting_index = np.argsort(tmp_a) + sa = tmp_a[sorting_index] + sw = tmp_w[sorting_index] + cw = np.concatenate((zero, sw.cumsum())) + bin_index = _search_sorted_inclusive(sa, bin_edges) + cum_n += cw[bin_index] + + n = np.diff(cum_n) + + # density overrides the normed keyword + if density is not None: + if normed is not None: + # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6) + warnings.warn( + "The normed argument is ignored when density is provided. " + "In future passing both will result in an error.", + DeprecationWarning, stacklevel=3) + normed = None + + if density: + db = np.array(np.diff(bin_edges), float) + return n/db/n.sum(), bin_edges + elif normed: + # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6) + warnings.warn( + "Passing `normed=True` on non-uniform bins has always been " + "broken, and computes neither the probability density " + "function nor the probability mass function. " + "The result is only correct if the bins are uniform, when " + "density=True will produce the same result anyway. " + "The argument will be removed in a future version of " + "numpy.", + np.VisibleDeprecationWarning, stacklevel=3) + + # this normalization is incorrect, but + db = np.array(np.diff(bin_edges), float) + return n/(n*db).sum(), bin_edges + else: + if normed is not None: + # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6) + warnings.warn( + "Passing normed=False is deprecated, and has no effect. " + "Consider passing the density argument instead.", + DeprecationWarning, stacklevel=3) + return n, bin_edges + + +def _histogramdd_dispatcher(sample, bins=None, range=None, normed=None, + weights=None, density=None): + if hasattr(sample, 'shape'): # same condition as used in histogramdd + yield sample + else: + yield from sample + with contextlib.suppress(TypeError): + yield from bins + yield weights + + +@array_function_dispatch(_histogramdd_dispatcher) +def histogramdd(sample, bins=10, range=None, normed=None, weights=None, + density=None): + """ + Compute the multidimensional histogram of some data. + + Parameters + ---------- + sample : (N, D) array, or (D, N) array_like + The data to be histogrammed. + + Note the unusual interpretation of sample when an array_like: + + * When an array, each row is a coordinate in a D-dimensional space - + such as ``histogramdd(np.array([p1, p2, p3]))``. + * When an array_like, each element is the list of values for single + coordinate - such as ``histogramdd((X, Y, Z))``. + + The first form should be preferred. + + bins : sequence or int, optional + The bin specification: + + * A sequence of arrays describing the monotonically increasing bin + edges along each dimension. + * The number of bins for each dimension (nx, ny, ... =bins) + * The number of bins for all dimensions (nx=ny=...=bins). + + range : sequence, optional + A sequence of length D, each an optional (lower, upper) tuple giving + the outer bin edges to be used if the edges are not given explicitly in + `bins`. + An entry of None in the sequence results in the minimum and maximum + values being used for the corresponding dimension. + The default, None, is equivalent to passing a tuple of D None values. + density : bool, optional + If False, the default, returns the number of samples in each bin. + If True, returns the probability *density* function at the bin, + ``bin_count / sample_count / bin_volume``. + normed : bool, optional + An alias for the density argument that behaves identically. To avoid + confusion with the broken normed argument to `histogram`, `density` + should be preferred. + weights : (N,) array_like, optional + An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`. + Weights are normalized to 1 if normed is True. If normed is False, + the values of the returned histogram are equal to the sum of the + weights belonging to the samples falling into each bin. + + Returns + ------- + H : ndarray + The multidimensional histogram of sample x. See normed and weights + for the different possible semantics. + edges : list + A list of D arrays describing the bin edges for each dimension. + + See Also + -------- + histogram: 1-D histogram + histogram2d: 2-D histogram + + Examples + -------- + >>> r = np.random.randn(100,3) + >>> H, edges = np.histogramdd(r, bins = (5, 8, 4)) + >>> H.shape, edges[0].size, edges[1].size, edges[2].size + ((5, 8, 4), 6, 9, 5) + + """ + + try: + # Sample is an ND-array. + N, D = sample.shape + except (AttributeError, ValueError): + # Sample is a sequence of 1D arrays. + sample = np.atleast_2d(sample).T + N, D = sample.shape + + nbin = np.empty(D, int) + edges = D*[None] + dedges = D*[None] + if weights is not None: + weights = np.asarray(weights) + + try: + M = len(bins) + if M != D: + raise ValueError( + 'The dimension of bins must be equal to the dimension of the ' + ' sample x.') + except TypeError: + # bins is an integer + bins = D*[bins] + + # normalize the range argument + if range is None: + range = (None,) * D + elif len(range) != D: + raise ValueError('range argument must have one entry per dimension') + + # Create edge arrays + for i in _range(D): + if np.ndim(bins[i]) == 0: + if bins[i] < 1: + raise ValueError( + '`bins[{}]` must be positive, when an integer'.format(i)) + smin, smax = _get_outer_edges(sample[:,i], range[i]) + try: + n = operator.index(bins[i]) + + except TypeError as e: + raise TypeError( + "`bins[{}]` must be an integer, when a scalar".format(i) + ) from e + + edges[i] = np.linspace(smin, smax, n + 1) + elif np.ndim(bins[i]) == 1: + edges[i] = np.asarray(bins[i]) + if np.any(edges[i][:-1] > edges[i][1:]): + raise ValueError( + '`bins[{}]` must be monotonically increasing, when an array' + .format(i)) + else: + raise ValueError( + '`bins[{}]` must be a scalar or 1d array'.format(i)) + + nbin[i] = len(edges[i]) + 1 # includes an outlier on each end + dedges[i] = np.diff(edges[i]) + + # Compute the bin number each sample falls into. + Ncount = tuple( + # avoid np.digitize to work around gh-11022 + np.searchsorted(edges[i], sample[:, i], side='right') + for i in _range(D) + ) + + # Using digitize, values that fall on an edge are put in the right bin. + # For the rightmost bin, we want values equal to the right edge to be + # counted in the last bin, and not as an outlier. + for i in _range(D): + # Find which points are on the rightmost edge. + on_edge = (sample[:, i] == edges[i][-1]) + # Shift these points one bin to the left. + Ncount[i][on_edge] -= 1 + + # Compute the sample indices in the flattened histogram matrix. + # This raises an error if the array is too large. + xy = np.ravel_multi_index(Ncount, nbin) + + # Compute the number of repetitions in xy and assign it to the + # flattened histmat. + hist = np.bincount(xy, weights, minlength=nbin.prod()) + + # Shape into a proper matrix + hist = hist.reshape(nbin) + + # This preserves the (bad) behavior observed in gh-7845, for now. + hist = hist.astype(float, casting='safe') + + # Remove outliers (indices 0 and -1 for each dimension). + core = D*(slice(1, -1),) + hist = hist[core] + + # handle the aliasing normed argument + if normed is None: + if density is None: + density = False + elif density is None: + # an explicit normed argument was passed, alias it to the new name + density = normed + else: + raise TypeError("Cannot specify both 'normed' and 'density'") + + if density: + # calculate the probability density function + s = hist.sum() + for i in _range(D): + shape = np.ones(D, int) + shape[i] = nbin[i] - 2 + hist = hist / dedges[i].reshape(shape) + hist /= s + + if (hist.shape != nbin - 2).any(): + raise RuntimeError( + "Internal Shape Error") + return hist, edges diff --git a/.local/lib/python3.8/site-packages/numpy/lib/histograms.pyi b/.local/lib/python3.8/site-packages/numpy/lib/histograms.pyi new file mode 100644 index 0000000..2ceb607 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/histograms.pyi @@ -0,0 +1,51 @@ +from typing import ( + Literal as L, + List, + Tuple, + Any, + SupportsIndex, + Sequence, +) + +from numpy.typing import ( + NDArray, + ArrayLike, +) + +_BinKind = L[ + "stone", + "auto", + "doane", + "fd", + "rice", + "scott", + "sqrt", + "sturges", +] + +__all__: List[str] + +def histogram_bin_edges( + a: ArrayLike, + bins: _BinKind | SupportsIndex | ArrayLike = ..., + range: None | Tuple[float, float] = ..., + weights: None | ArrayLike = ..., +) -> NDArray[Any]: ... + +def histogram( + a: ArrayLike, + bins: _BinKind | SupportsIndex | ArrayLike = ..., + range: None | Tuple[float, float] = ..., + normed: None = ..., + weights: None | ArrayLike = ..., + density: bool = ..., +) -> Tuple[NDArray[Any], NDArray[Any]]: ... + +def histogramdd( + sample: ArrayLike, + bins: SupportsIndex | ArrayLike = ..., + range: Sequence[Tuple[float, float]] = ..., + normed: None | bool = ..., + weights: None | ArrayLike = ..., + density: None | bool = ..., +) -> Tuple[NDArray[Any], List[NDArray[Any]]]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/index_tricks.py b/.local/lib/python3.8/site-packages/numpy/lib/index_tricks.py new file mode 100644 index 0000000..2a4402c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/index_tricks.py @@ -0,0 +1,1014 @@ +import functools +import sys +import math +import warnings + +import numpy.core.numeric as _nx +from numpy.core.numeric import ( + asarray, ScalarType, array, alltrue, cumprod, arange, ndim +) +from numpy.core.numerictypes import find_common_type, issubdtype + +import numpy.matrixlib as matrixlib +from .function_base import diff +from numpy.core.multiarray import ravel_multi_index, unravel_index +from numpy.core.overrides import set_module +from numpy.core import overrides, linspace +from numpy.lib.stride_tricks import as_strided + + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + + +__all__ = [ + 'ravel_multi_index', 'unravel_index', 'mgrid', 'ogrid', 'r_', 'c_', + 's_', 'index_exp', 'ix_', 'ndenumerate', 'ndindex', 'fill_diagonal', + 'diag_indices', 'diag_indices_from' +] + + +def _ix__dispatcher(*args): + return args + + +@array_function_dispatch(_ix__dispatcher) +def ix_(*args): + """ + Construct an open mesh from multiple sequences. + + This function takes N 1-D sequences and returns N outputs with N + dimensions each, such that the shape is 1 in all but one dimension + and the dimension with the non-unit shape value cycles through all + N dimensions. + + Using `ix_` one can quickly construct index arrays that will index + the cross product. ``a[np.ix_([1,3],[2,5])]`` returns the array + ``[[a[1,2] a[1,5]], [a[3,2] a[3,5]]]``. + + Parameters + ---------- + args : 1-D sequences + Each sequence should be of integer or boolean type. + Boolean sequences will be interpreted as boolean masks for the + corresponding dimension (equivalent to passing in + ``np.nonzero(boolean_sequence)``). + + Returns + ------- + out : tuple of ndarrays + N arrays with N dimensions each, with N the number of input + sequences. Together these arrays form an open mesh. + + See Also + -------- + ogrid, mgrid, meshgrid + + Examples + -------- + >>> a = np.arange(10).reshape(2, 5) + >>> a + array([[0, 1, 2, 3, 4], + [5, 6, 7, 8, 9]]) + >>> ixgrid = np.ix_([0, 1], [2, 4]) + >>> ixgrid + (array([[0], + [1]]), array([[2, 4]])) + >>> ixgrid[0].shape, ixgrid[1].shape + ((2, 1), (1, 2)) + >>> a[ixgrid] + array([[2, 4], + [7, 9]]) + + >>> ixgrid = np.ix_([True, True], [2, 4]) + >>> a[ixgrid] + array([[2, 4], + [7, 9]]) + >>> ixgrid = np.ix_([True, True], [False, False, True, False, True]) + >>> a[ixgrid] + array([[2, 4], + [7, 9]]) + + """ + out = [] + nd = len(args) + for k, new in enumerate(args): + if not isinstance(new, _nx.ndarray): + new = asarray(new) + if new.size == 0: + # Explicitly type empty arrays to avoid float default + new = new.astype(_nx.intp) + if new.ndim != 1: + raise ValueError("Cross index must be 1 dimensional") + if issubdtype(new.dtype, _nx.bool_): + new, = new.nonzero() + new = new.reshape((1,)*k + (new.size,) + (1,)*(nd-k-1)) + out.append(new) + return tuple(out) + + +class nd_grid: + """ + Construct a multi-dimensional "meshgrid". + + ``grid = nd_grid()`` creates an instance which will return a mesh-grid + when indexed. The dimension and number of the output arrays are equal + to the number of indexing dimensions. If the step length is not a + complex number, then the stop is not inclusive. + + However, if the step length is a **complex number** (e.g. 5j), then the + integer part of its magnitude is interpreted as specifying the + number of points to create between the start and stop values, where + the stop value **is inclusive**. + + If instantiated with an argument of ``sparse=True``, the mesh-grid is + open (or not fleshed out) so that only one-dimension of each returned + argument is greater than 1. + + Parameters + ---------- + sparse : bool, optional + Whether the grid is sparse or not. Default is False. + + Notes + ----- + Two instances of `nd_grid` are made available in the NumPy namespace, + `mgrid` and `ogrid`, approximately defined as:: + + mgrid = nd_grid(sparse=False) + ogrid = nd_grid(sparse=True) + + Users should use these pre-defined instances instead of using `nd_grid` + directly. + """ + + def __init__(self, sparse=False): + self.sparse = sparse + + def __getitem__(self, key): + try: + size = [] + typ = int + for kk in key: + step = kk.step + start = kk.start + if start is None: + start = 0 + if step is None: + step = 1 + if isinstance(step, (_nx.complexfloating, complex)): + size.append(int(abs(step))) + typ = float + else: + size.append( + int(math.ceil((kk.stop - start) / (step * 1.0)))) + if (isinstance(step, (_nx.floating, float)) or + isinstance(start, (_nx.floating, float)) or + isinstance(kk.stop, (_nx.floating, float))): + typ = float + if self.sparse: + nn = [_nx.arange(_x, dtype=_t) + for _x, _t in zip(size, (typ,)*len(size))] + else: + nn = _nx.indices(size, typ) + for k, kk in enumerate(key): + step = kk.step + start = kk.start + if start is None: + start = 0 + if step is None: + step = 1 + if isinstance(step, (_nx.complexfloating, complex)): + step = int(abs(step)) + if step != 1: + step = (kk.stop - start) / float(step - 1) + nn[k] = (nn[k]*step+start) + if self.sparse: + slobj = [_nx.newaxis]*len(size) + for k in range(len(size)): + slobj[k] = slice(None, None) + nn[k] = nn[k][tuple(slobj)] + slobj[k] = _nx.newaxis + return nn + except (IndexError, TypeError): + step = key.step + stop = key.stop + start = key.start + if start is None: + start = 0 + if isinstance(step, (_nx.complexfloating, complex)): + step = abs(step) + length = int(step) + if step != 1: + step = (key.stop-start)/float(step-1) + return _nx.arange(0, length, 1, float)*step + start + else: + return _nx.arange(start, stop, step) + + +class MGridClass(nd_grid): + """ + `nd_grid` instance which returns a dense multi-dimensional "meshgrid". + + An instance of `numpy.lib.index_tricks.nd_grid` which returns an dense + (or fleshed out) mesh-grid when indexed, so that each returned argument + has the same shape. The dimensions and number of the output arrays are + equal to the number of indexing dimensions. If the step length is not a + complex number, then the stop is not inclusive. + + However, if the step length is a **complex number** (e.g. 5j), then + the integer part of its magnitude is interpreted as specifying the + number of points to create between the start and stop values, where + the stop value **is inclusive**. + + Returns + ------- + mesh-grid `ndarrays` all of the same dimensions + + See Also + -------- + numpy.lib.index_tricks.nd_grid : class of `ogrid` and `mgrid` objects + ogrid : like mgrid but returns open (not fleshed out) mesh grids + r_ : array concatenator + + Examples + -------- + >>> np.mgrid[0:5,0:5] + array([[[0, 0, 0, 0, 0], + [1, 1, 1, 1, 1], + [2, 2, 2, 2, 2], + [3, 3, 3, 3, 3], + [4, 4, 4, 4, 4]], + [[0, 1, 2, 3, 4], + [0, 1, 2, 3, 4], + [0, 1, 2, 3, 4], + [0, 1, 2, 3, 4], + [0, 1, 2, 3, 4]]]) + >>> np.mgrid[-1:1:5j] + array([-1. , -0.5, 0. , 0.5, 1. ]) + + """ + + def __init__(self): + super().__init__(sparse=False) + + +mgrid = MGridClass() + + +class OGridClass(nd_grid): + """ + `nd_grid` instance which returns an open multi-dimensional "meshgrid". + + An instance of `numpy.lib.index_tricks.nd_grid` which returns an open + (i.e. not fleshed out) mesh-grid when indexed, so that only one dimension + of each returned array is greater than 1. The dimension and number of the + output arrays are equal to the number of indexing dimensions. If the step + length is not a complex number, then the stop is not inclusive. + + However, if the step length is a **complex number** (e.g. 5j), then + the integer part of its magnitude is interpreted as specifying the + number of points to create between the start and stop values, where + the stop value **is inclusive**. + + Returns + ------- + mesh-grid + `ndarrays` with only one dimension not equal to 1 + + See Also + -------- + np.lib.index_tricks.nd_grid : class of `ogrid` and `mgrid` objects + mgrid : like `ogrid` but returns dense (or fleshed out) mesh grids + r_ : array concatenator + + Examples + -------- + >>> from numpy import ogrid + >>> ogrid[-1:1:5j] + array([-1. , -0.5, 0. , 0.5, 1. ]) + >>> ogrid[0:5,0:5] + [array([[0], + [1], + [2], + [3], + [4]]), array([[0, 1, 2, 3, 4]])] + + """ + + def __init__(self): + super().__init__(sparse=True) + + +ogrid = OGridClass() + + +class AxisConcatenator: + """ + Translates slice objects to concatenation along an axis. + + For detailed documentation on usage, see `r_`. + """ + # allow ma.mr_ to override this + concatenate = staticmethod(_nx.concatenate) + makemat = staticmethod(matrixlib.matrix) + + def __init__(self, axis=0, matrix=False, ndmin=1, trans1d=-1): + self.axis = axis + self.matrix = matrix + self.trans1d = trans1d + self.ndmin = ndmin + + def __getitem__(self, key): + # handle matrix builder syntax + if isinstance(key, str): + frame = sys._getframe().f_back + mymat = matrixlib.bmat(key, frame.f_globals, frame.f_locals) + return mymat + + if not isinstance(key, tuple): + key = (key,) + + # copy attributes, since they can be overridden in the first argument + trans1d = self.trans1d + ndmin = self.ndmin + matrix = self.matrix + axis = self.axis + + objs = [] + scalars = [] + arraytypes = [] + scalartypes = [] + + for k, item in enumerate(key): + scalar = False + if isinstance(item, slice): + step = item.step + start = item.start + stop = item.stop + if start is None: + start = 0 + if step is None: + step = 1 + if isinstance(step, (_nx.complexfloating, complex)): + size = int(abs(step)) + newobj = linspace(start, stop, num=size) + else: + newobj = _nx.arange(start, stop, step) + if ndmin > 1: + newobj = array(newobj, copy=False, ndmin=ndmin) + if trans1d != -1: + newobj = newobj.swapaxes(-1, trans1d) + elif isinstance(item, str): + if k != 0: + raise ValueError("special directives must be the " + "first entry.") + if item in ('r', 'c'): + matrix = True + col = (item == 'c') + continue + if ',' in item: + vec = item.split(',') + try: + axis, ndmin = [int(x) for x in vec[:2]] + if len(vec) == 3: + trans1d = int(vec[2]) + continue + except Exception as e: + raise ValueError( + "unknown special directive {!r}".format(item) + ) from e + try: + axis = int(item) + continue + except (ValueError, TypeError) as e: + raise ValueError("unknown special directive") from e + elif type(item) in ScalarType: + newobj = array(item, ndmin=ndmin) + scalars.append(len(objs)) + scalar = True + scalartypes.append(newobj.dtype) + else: + item_ndim = ndim(item) + newobj = array(item, copy=False, subok=True, ndmin=ndmin) + if trans1d != -1 and item_ndim < ndmin: + k2 = ndmin - item_ndim + k1 = trans1d + if k1 < 0: + k1 += k2 + 1 + defaxes = list(range(ndmin)) + axes = defaxes[:k1] + defaxes[k2:] + defaxes[k1:k2] + newobj = newobj.transpose(axes) + objs.append(newobj) + if not scalar and isinstance(newobj, _nx.ndarray): + arraytypes.append(newobj.dtype) + + # Ensure that scalars won't up-cast unless warranted + final_dtype = find_common_type(arraytypes, scalartypes) + if final_dtype is not None: + for k in scalars: + objs[k] = objs[k].astype(final_dtype) + + res = self.concatenate(tuple(objs), axis=axis) + + if matrix: + oldndim = res.ndim + res = self.makemat(res) + if oldndim == 1 and col: + res = res.T + return res + + def __len__(self): + return 0 + +# separate classes are used here instead of just making r_ = concatentor(0), +# etc. because otherwise we couldn't get the doc string to come out right +# in help(r_) + + +class RClass(AxisConcatenator): + """ + Translates slice objects to concatenation along the first axis. + + This is a simple way to build up arrays quickly. There are two use cases. + + 1. If the index expression contains comma separated arrays, then stack + them along their first axis. + 2. If the index expression contains slice notation or scalars then create + a 1-D array with a range indicated by the slice notation. + + If slice notation is used, the syntax ``start:stop:step`` is equivalent + to ``np.arange(start, stop, step)`` inside of the brackets. However, if + ``step`` is an imaginary number (i.e. 100j) then its integer portion is + interpreted as a number-of-points desired and the start and stop are + inclusive. In other words ``start:stop:stepj`` is interpreted as + ``np.linspace(start, stop, step, endpoint=1)`` inside of the brackets. + After expansion of slice notation, all comma separated sequences are + concatenated together. + + Optional character strings placed as the first element of the index + expression can be used to change the output. The strings 'r' or 'c' result + in matrix output. If the result is 1-D and 'r' is specified a 1 x N (row) + matrix is produced. If the result is 1-D and 'c' is specified, then a N x 1 + (column) matrix is produced. If the result is 2-D then both provide the + same matrix result. + + A string integer specifies which axis to stack multiple comma separated + arrays along. A string of two comma-separated integers allows indication + of the minimum number of dimensions to force each entry into as the + second integer (the axis to concatenate along is still the first integer). + + A string with three comma-separated integers allows specification of the + axis to concatenate along, the minimum number of dimensions to force the + entries to, and which axis should contain the start of the arrays which + are less than the specified number of dimensions. In other words the third + integer allows you to specify where the 1's should be placed in the shape + of the arrays that have their shapes upgraded. By default, they are placed + in the front of the shape tuple. The third argument allows you to specify + where the start of the array should be instead. Thus, a third argument of + '0' would place the 1's at the end of the array shape. Negative integers + specify where in the new shape tuple the last dimension of upgraded arrays + should be placed, so the default is '-1'. + + Parameters + ---------- + Not a function, so takes no parameters + + + Returns + ------- + A concatenated ndarray or matrix. + + See Also + -------- + concatenate : Join a sequence of arrays along an existing axis. + c_ : Translates slice objects to concatenation along the second axis. + + Examples + -------- + >>> np.r_[np.array([1,2,3]), 0, 0, np.array([4,5,6])] + array([1, 2, 3, ..., 4, 5, 6]) + >>> np.r_[-1:1:6j, [0]*3, 5, 6] + array([-1. , -0.6, -0.2, 0.2, 0.6, 1. , 0. , 0. , 0. , 5. , 6. ]) + + String integers specify the axis to concatenate along or the minimum + number of dimensions to force entries into. + + >>> a = np.array([[0, 1, 2], [3, 4, 5]]) + >>> np.r_['-1', a, a] # concatenate along last axis + array([[0, 1, 2, 0, 1, 2], + [3, 4, 5, 3, 4, 5]]) + >>> np.r_['0,2', [1,2,3], [4,5,6]] # concatenate along first axis, dim>=2 + array([[1, 2, 3], + [4, 5, 6]]) + + >>> np.r_['0,2,0', [1,2,3], [4,5,6]] + array([[1], + [2], + [3], + [4], + [5], + [6]]) + >>> np.r_['1,2,0', [1,2,3], [4,5,6]] + array([[1, 4], + [2, 5], + [3, 6]]) + + Using 'r' or 'c' as a first string argument creates a matrix. + + >>> np.r_['r',[1,2,3], [4,5,6]] + matrix([[1, 2, 3, 4, 5, 6]]) + + """ + + def __init__(self): + AxisConcatenator.__init__(self, 0) + + +r_ = RClass() + + +class CClass(AxisConcatenator): + """ + Translates slice objects to concatenation along the second axis. + + This is short-hand for ``np.r_['-1,2,0', index expression]``, which is + useful because of its common occurrence. In particular, arrays will be + stacked along their last axis after being upgraded to at least 2-D with + 1's post-pended to the shape (column vectors made out of 1-D arrays). + + See Also + -------- + column_stack : Stack 1-D arrays as columns into a 2-D array. + r_ : For more detailed documentation. + + Examples + -------- + >>> np.c_[np.array([1,2,3]), np.array([4,5,6])] + array([[1, 4], + [2, 5], + [3, 6]]) + >>> np.c_[np.array([[1,2,3]]), 0, 0, np.array([[4,5,6]])] + array([[1, 2, 3, ..., 4, 5, 6]]) + + """ + + def __init__(self): + AxisConcatenator.__init__(self, -1, ndmin=2, trans1d=0) + + +c_ = CClass() + + +@set_module('numpy') +class ndenumerate: + """ + Multidimensional index iterator. + + Return an iterator yielding pairs of array coordinates and values. + + Parameters + ---------- + arr : ndarray + Input array. + + See Also + -------- + ndindex, flatiter + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> for index, x in np.ndenumerate(a): + ... print(index, x) + (0, 0) 1 + (0, 1) 2 + (1, 0) 3 + (1, 1) 4 + + """ + + def __init__(self, arr): + self.iter = asarray(arr).flat + + def __next__(self): + """ + Standard iterator method, returns the index tuple and array value. + + Returns + ------- + coords : tuple of ints + The indices of the current iteration. + val : scalar + The array element of the current iteration. + + """ + return self.iter.coords, next(self.iter) + + def __iter__(self): + return self + + +@set_module('numpy') +class ndindex: + """ + An N-dimensional iterator object to index arrays. + + Given the shape of an array, an `ndindex` instance iterates over + the N-dimensional index of the array. At each iteration a tuple + of indices is returned, the last dimension is iterated over first. + + Parameters + ---------- + shape : ints, or a single tuple of ints + The size of each dimension of the array can be passed as + individual parameters or as the elements of a tuple. + + See Also + -------- + ndenumerate, flatiter + + Examples + -------- + Dimensions as individual arguments + + >>> for index in np.ndindex(3, 2, 1): + ... print(index) + (0, 0, 0) + (0, 1, 0) + (1, 0, 0) + (1, 1, 0) + (2, 0, 0) + (2, 1, 0) + + Same dimensions - but in a tuple ``(3, 2, 1)`` + + >>> for index in np.ndindex((3, 2, 1)): + ... print(index) + (0, 0, 0) + (0, 1, 0) + (1, 0, 0) + (1, 1, 0) + (2, 0, 0) + (2, 1, 0) + + """ + + def __init__(self, *shape): + if len(shape) == 1 and isinstance(shape[0], tuple): + shape = shape[0] + x = as_strided(_nx.zeros(1), shape=shape, + strides=_nx.zeros_like(shape)) + self._it = _nx.nditer(x, flags=['multi_index', 'zerosize_ok'], + order='C') + + def __iter__(self): + return self + + def ndincr(self): + """ + Increment the multi-dimensional index by one. + + This method is for backward compatibility only: do not use. + + .. deprecated:: 1.20.0 + This method has been advised against since numpy 1.8.0, but only + started emitting DeprecationWarning as of this version. + """ + # NumPy 1.20.0, 2020-09-08 + warnings.warn( + "`ndindex.ndincr()` is deprecated, use `next(ndindex)` instead", + DeprecationWarning, stacklevel=2) + next(self) + + def __next__(self): + """ + Standard iterator method, updates the index and returns the index + tuple. + + Returns + ------- + val : tuple of ints + Returns a tuple containing the indices of the current + iteration. + + """ + next(self._it) + return self._it.multi_index + + +# You can do all this with slice() plus a few special objects, +# but there's a lot to remember. This version is simpler because +# it uses the standard array indexing syntax. +# +# Written by Konrad Hinsen +# last revision: 1999-7-23 +# +# Cosmetic changes by T. Oliphant 2001 +# +# + +class IndexExpression: + """ + A nicer way to build up index tuples for arrays. + + .. note:: + Use one of the two predefined instances `index_exp` or `s_` + rather than directly using `IndexExpression`. + + For any index combination, including slicing and axis insertion, + ``a[indices]`` is the same as ``a[np.index_exp[indices]]`` for any + array `a`. However, ``np.index_exp[indices]`` can be used anywhere + in Python code and returns a tuple of slice objects that can be + used in the construction of complex index expressions. + + Parameters + ---------- + maketuple : bool + If True, always returns a tuple. + + See Also + -------- + index_exp : Predefined instance that always returns a tuple: + `index_exp = IndexExpression(maketuple=True)`. + s_ : Predefined instance without tuple conversion: + `s_ = IndexExpression(maketuple=False)`. + + Notes + ----- + You can do all this with `slice()` plus a few special objects, + but there's a lot to remember and this version is simpler because + it uses the standard array indexing syntax. + + Examples + -------- + >>> np.s_[2::2] + slice(2, None, 2) + >>> np.index_exp[2::2] + (slice(2, None, 2),) + + >>> np.array([0, 1, 2, 3, 4])[np.s_[2::2]] + array([2, 4]) + + """ + + def __init__(self, maketuple): + self.maketuple = maketuple + + def __getitem__(self, item): + if self.maketuple and not isinstance(item, tuple): + return (item,) + else: + return item + + +index_exp = IndexExpression(maketuple=True) +s_ = IndexExpression(maketuple=False) + +# End contribution from Konrad. + + +# The following functions complement those in twodim_base, but are +# applicable to N-dimensions. + + +def _fill_diagonal_dispatcher(a, val, wrap=None): + return (a,) + + +@array_function_dispatch(_fill_diagonal_dispatcher) +def fill_diagonal(a, val, wrap=False): + """Fill the main diagonal of the given array of any dimensionality. + + For an array `a` with ``a.ndim >= 2``, the diagonal is the list of + locations with indices ``a[i, ..., i]`` all identical. This function + modifies the input array in-place, it does not return a value. + + Parameters + ---------- + a : array, at least 2-D. + Array whose diagonal is to be filled, it gets modified in-place. + + val : scalar or array_like + Value(s) to write on the diagonal. If `val` is scalar, the value is + written along the diagonal. If array-like, the flattened `val` is + written along the diagonal, repeating if necessary to fill all + diagonal entries. + + wrap : bool + For tall matrices in NumPy version up to 1.6.2, the + diagonal "wrapped" after N columns. You can have this behavior + with this option. This affects only tall matrices. + + See also + -------- + diag_indices, diag_indices_from + + Notes + ----- + .. versionadded:: 1.4.0 + + This functionality can be obtained via `diag_indices`, but internally + this version uses a much faster implementation that never constructs the + indices and uses simple slicing. + + Examples + -------- + >>> a = np.zeros((3, 3), int) + >>> np.fill_diagonal(a, 5) + >>> a + array([[5, 0, 0], + [0, 5, 0], + [0, 0, 5]]) + + The same function can operate on a 4-D array: + + >>> a = np.zeros((3, 3, 3, 3), int) + >>> np.fill_diagonal(a, 4) + + We only show a few blocks for clarity: + + >>> a[0, 0] + array([[4, 0, 0], + [0, 0, 0], + [0, 0, 0]]) + >>> a[1, 1] + array([[0, 0, 0], + [0, 4, 0], + [0, 0, 0]]) + >>> a[2, 2] + array([[0, 0, 0], + [0, 0, 0], + [0, 0, 4]]) + + The wrap option affects only tall matrices: + + >>> # tall matrices no wrap + >>> a = np.zeros((5, 3), int) + >>> np.fill_diagonal(a, 4) + >>> a + array([[4, 0, 0], + [0, 4, 0], + [0, 0, 4], + [0, 0, 0], + [0, 0, 0]]) + + >>> # tall matrices wrap + >>> a = np.zeros((5, 3), int) + >>> np.fill_diagonal(a, 4, wrap=True) + >>> a + array([[4, 0, 0], + [0, 4, 0], + [0, 0, 4], + [0, 0, 0], + [4, 0, 0]]) + + >>> # wide matrices + >>> a = np.zeros((3, 5), int) + >>> np.fill_diagonal(a, 4, wrap=True) + >>> a + array([[4, 0, 0, 0, 0], + [0, 4, 0, 0, 0], + [0, 0, 4, 0, 0]]) + + The anti-diagonal can be filled by reversing the order of elements + using either `numpy.flipud` or `numpy.fliplr`. + + >>> a = np.zeros((3, 3), int); + >>> np.fill_diagonal(np.fliplr(a), [1,2,3]) # Horizontal flip + >>> a + array([[0, 0, 1], + [0, 2, 0], + [3, 0, 0]]) + >>> np.fill_diagonal(np.flipud(a), [1,2,3]) # Vertical flip + >>> a + array([[0, 0, 3], + [0, 2, 0], + [1, 0, 0]]) + + Note that the order in which the diagonal is filled varies depending + on the flip function. + """ + if a.ndim < 2: + raise ValueError("array must be at least 2-d") + end = None + if a.ndim == 2: + # Explicit, fast formula for the common case. For 2-d arrays, we + # accept rectangular ones. + step = a.shape[1] + 1 + # This is needed to don't have tall matrix have the diagonal wrap. + if not wrap: + end = a.shape[1] * a.shape[1] + else: + # For more than d=2, the strided formula is only valid for arrays with + # all dimensions equal, so we check first. + if not alltrue(diff(a.shape) == 0): + raise ValueError("All dimensions of input must be of equal length") + step = 1 + (cumprod(a.shape[:-1])).sum() + + # Write the value out into the diagonal. + a.flat[:end:step] = val + + +@set_module('numpy') +def diag_indices(n, ndim=2): + """ + Return the indices to access the main diagonal of an array. + + This returns a tuple of indices that can be used to access the main + diagonal of an array `a` with ``a.ndim >= 2`` dimensions and shape + (n, n, ..., n). For ``a.ndim = 2`` this is the usual diagonal, for + ``a.ndim > 2`` this is the set of indices to access ``a[i, i, ..., i]`` + for ``i = [0..n-1]``. + + Parameters + ---------- + n : int + The size, along each dimension, of the arrays for which the returned + indices can be used. + + ndim : int, optional + The number of dimensions. + + See Also + -------- + diag_indices_from + + Notes + ----- + .. versionadded:: 1.4.0 + + Examples + -------- + Create a set of indices to access the diagonal of a (4, 4) array: + + >>> di = np.diag_indices(4) + >>> di + (array([0, 1, 2, 3]), array([0, 1, 2, 3])) + >>> a = np.arange(16).reshape(4, 4) + >>> a + array([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11], + [12, 13, 14, 15]]) + >>> a[di] = 100 + >>> a + array([[100, 1, 2, 3], + [ 4, 100, 6, 7], + [ 8, 9, 100, 11], + [ 12, 13, 14, 100]]) + + Now, we create indices to manipulate a 3-D array: + + >>> d3 = np.diag_indices(2, 3) + >>> d3 + (array([0, 1]), array([0, 1]), array([0, 1])) + + And use it to set the diagonal of an array of zeros to 1: + + >>> a = np.zeros((2, 2, 2), dtype=int) + >>> a[d3] = 1 + >>> a + array([[[1, 0], + [0, 0]], + [[0, 0], + [0, 1]]]) + + """ + idx = arange(n) + return (idx,) * ndim + + +def _diag_indices_from(arr): + return (arr,) + + +@array_function_dispatch(_diag_indices_from) +def diag_indices_from(arr): + """ + Return the indices to access the main diagonal of an n-dimensional array. + + See `diag_indices` for full details. + + Parameters + ---------- + arr : array, at least 2-D + + See Also + -------- + diag_indices + + Notes + ----- + .. versionadded:: 1.4.0 + + """ + + if not arr.ndim >= 2: + raise ValueError("input array must be at least 2-d") + # For more than d=2, the strided formula is only valid for arrays with + # all dimensions equal, so we check first. + if not alltrue(diff(arr.shape) == 0): + raise ValueError("All dimensions of input must be of equal length") + + return diag_indices(arr.shape[0], arr.ndim) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/index_tricks.pyi b/.local/lib/python3.8/site-packages/numpy/lib/index_tricks.pyi new file mode 100644 index 0000000..d16faf8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/index_tricks.pyi @@ -0,0 +1,165 @@ +from typing import ( + Any, + Tuple, + TypeVar, + Generic, + overload, + List, + Union, + Sequence, + Literal, + SupportsIndex, +) + +from numpy import ( + # Circumvent a naming conflict with `AxisConcatenator.matrix` + matrix as _Matrix, + ndenumerate as ndenumerate, + ndindex as ndindex, + ndarray, + dtype, + integer, + str_, + bytes_, + bool_, + int_, + float_, + complex_, + intp, + _OrderCF, + _ModeKind, +) +from numpy.typing import ( + # Arrays + ArrayLike, + _NestedSequence, + _FiniteNestedSequence, + NDArray, + _ArrayLikeInt, + + # DTypes + DTypeLike, + _SupportsDType, + + # Shapes + _ShapeLike, +) + +from numpy.core.multiarray import ( + unravel_index as unravel_index, + ravel_multi_index as ravel_multi_index, +) + +_T = TypeVar("_T") +_DType = TypeVar("_DType", bound=dtype[Any]) +_BoolType = TypeVar("_BoolType", Literal[True], Literal[False]) +_TupType = TypeVar("_TupType", bound=Tuple[Any, ...]) +_ArrayType = TypeVar("_ArrayType", bound=ndarray[Any, Any]) + +__all__: List[str] + +@overload +def ix_(*args: _FiniteNestedSequence[_SupportsDType[_DType]]) -> Tuple[ndarray[Any, _DType], ...]: ... +@overload +def ix_(*args: str | _NestedSequence[str]) -> Tuple[NDArray[str_], ...]: ... +@overload +def ix_(*args: bytes | _NestedSequence[bytes]) -> Tuple[NDArray[bytes_], ...]: ... +@overload +def ix_(*args: bool | _NestedSequence[bool]) -> Tuple[NDArray[bool_], ...]: ... +@overload +def ix_(*args: int | _NestedSequence[int]) -> Tuple[NDArray[int_], ...]: ... +@overload +def ix_(*args: float | _NestedSequence[float]) -> Tuple[NDArray[float_], ...]: ... +@overload +def ix_(*args: complex | _NestedSequence[complex]) -> Tuple[NDArray[complex_], ...]: ... + +class nd_grid(Generic[_BoolType]): + sparse: _BoolType + def __init__(self, sparse: _BoolType = ...) -> None: ... + @overload + def __getitem__( + self: nd_grid[Literal[False]], + key: Union[slice, Sequence[slice]], + ) -> NDArray[Any]: ... + @overload + def __getitem__( + self: nd_grid[Literal[True]], + key: Union[slice, Sequence[slice]], + ) -> List[NDArray[Any]]: ... + +class MGridClass(nd_grid[Literal[False]]): + def __init__(self) -> None: ... + +mgrid: MGridClass + +class OGridClass(nd_grid[Literal[True]]): + def __init__(self) -> None: ... + +ogrid: OGridClass + +class AxisConcatenator: + axis: int + matrix: bool + ndmin: int + trans1d: int + def __init__( + self, + axis: int = ..., + matrix: bool = ..., + ndmin: int = ..., + trans1d: int = ..., + ) -> None: ... + @staticmethod + @overload + def concatenate( # type: ignore[misc] + *a: ArrayLike, axis: SupportsIndex = ..., out: None = ... + ) -> NDArray[Any]: ... + @staticmethod + @overload + def concatenate( + *a: ArrayLike, axis: SupportsIndex = ..., out: _ArrayType = ... + ) -> _ArrayType: ... + @staticmethod + def makemat( + data: ArrayLike, dtype: DTypeLike = ..., copy: bool = ... + ) -> _Matrix: ... + + # TODO: Sort out this `__getitem__` method + def __getitem__(self, key: Any) -> Any: ... + +class RClass(AxisConcatenator): + axis: Literal[0] + matrix: Literal[False] + ndmin: Literal[1] + trans1d: Literal[-1] + def __init__(self) -> None: ... + +r_: RClass + +class CClass(AxisConcatenator): + axis: Literal[-1] + matrix: Literal[False] + ndmin: Literal[2] + trans1d: Literal[0] + def __init__(self) -> None: ... + +c_: CClass + +class IndexExpression(Generic[_BoolType]): + maketuple: _BoolType + def __init__(self, maketuple: _BoolType) -> None: ... + @overload + def __getitem__(self, item: _TupType) -> _TupType: ... # type: ignore[misc] + @overload + def __getitem__(self: IndexExpression[Literal[True]], item: _T) -> Tuple[_T]: ... + @overload + def __getitem__(self: IndexExpression[Literal[False]], item: _T) -> _T: ... + +index_exp: IndexExpression[Literal[True]] +s_: IndexExpression[Literal[False]] + +def fill_diagonal(a: ndarray[Any, Any], val: Any, wrap: bool = ...) -> None: ... +def diag_indices(n: int, ndim: int = ...) -> Tuple[NDArray[int_], ...]: ... +def diag_indices_from(arr: ArrayLike) -> Tuple[NDArray[int_], ...]: ... + +# NOTE: see `numpy/__init__.pyi` for `ndenumerate` and `ndindex` diff --git a/.local/lib/python3.8/site-packages/numpy/lib/mixins.py b/.local/lib/python3.8/site-packages/numpy/lib/mixins.py new file mode 100644 index 0000000..c81239f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/mixins.py @@ -0,0 +1,176 @@ +"""Mixin classes for custom array types that don't inherit from ndarray.""" +from numpy.core import umath as um + + +__all__ = ['NDArrayOperatorsMixin'] + + +def _disables_array_ufunc(obj): + """True when __array_ufunc__ is set to None.""" + try: + return obj.__array_ufunc__ is None + except AttributeError: + return False + + +def _binary_method(ufunc, name): + """Implement a forward binary method with a ufunc, e.g., __add__.""" + def func(self, other): + if _disables_array_ufunc(other): + return NotImplemented + return ufunc(self, other) + func.__name__ = '__{}__'.format(name) + return func + + +def _reflected_binary_method(ufunc, name): + """Implement a reflected binary method with a ufunc, e.g., __radd__.""" + def func(self, other): + if _disables_array_ufunc(other): + return NotImplemented + return ufunc(other, self) + func.__name__ = '__r{}__'.format(name) + return func + + +def _inplace_binary_method(ufunc, name): + """Implement an in-place binary method with a ufunc, e.g., __iadd__.""" + def func(self, other): + return ufunc(self, other, out=(self,)) + func.__name__ = '__i{}__'.format(name) + return func + + +def _numeric_methods(ufunc, name): + """Implement forward, reflected and inplace binary methods with a ufunc.""" + return (_binary_method(ufunc, name), + _reflected_binary_method(ufunc, name), + _inplace_binary_method(ufunc, name)) + + +def _unary_method(ufunc, name): + """Implement a unary special method with a ufunc.""" + def func(self): + return ufunc(self) + func.__name__ = '__{}__'.format(name) + return func + + +class NDArrayOperatorsMixin: + """Mixin defining all operator special methods using __array_ufunc__. + + This class implements the special methods for almost all of Python's + builtin operators defined in the `operator` module, including comparisons + (``==``, ``>``, etc.) and arithmetic (``+``, ``*``, ``-``, etc.), by + deferring to the ``__array_ufunc__`` method, which subclasses must + implement. + + It is useful for writing classes that do not inherit from `numpy.ndarray`, + but that should support arithmetic and numpy universal functions like + arrays as described in `A Mechanism for Overriding Ufuncs + `_. + + As an trivial example, consider this implementation of an ``ArrayLike`` + class that simply wraps a NumPy array and ensures that the result of any + arithmetic operation is also an ``ArrayLike`` object:: + + class ArrayLike(np.lib.mixins.NDArrayOperatorsMixin): + def __init__(self, value): + self.value = np.asarray(value) + + # One might also consider adding the built-in list type to this + # list, to support operations like np.add(array_like, list) + _HANDLED_TYPES = (np.ndarray, numbers.Number) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + out = kwargs.get('out', ()) + for x in inputs + out: + # Only support operations with instances of _HANDLED_TYPES. + # Use ArrayLike instead of type(self) for isinstance to + # allow subclasses that don't override __array_ufunc__ to + # handle ArrayLike objects. + if not isinstance(x, self._HANDLED_TYPES + (ArrayLike,)): + return NotImplemented + + # Defer to the implementation of the ufunc on unwrapped values. + inputs = tuple(x.value if isinstance(x, ArrayLike) else x + for x in inputs) + if out: + kwargs['out'] = tuple( + x.value if isinstance(x, ArrayLike) else x + for x in out) + result = getattr(ufunc, method)(*inputs, **kwargs) + + if type(result) is tuple: + # multiple return values + return tuple(type(self)(x) for x in result) + elif method == 'at': + # no return value + return None + else: + # one return value + return type(self)(result) + + def __repr__(self): + return '%s(%r)' % (type(self).__name__, self.value) + + In interactions between ``ArrayLike`` objects and numbers or numpy arrays, + the result is always another ``ArrayLike``: + + >>> x = ArrayLike([1, 2, 3]) + >>> x - 1 + ArrayLike(array([0, 1, 2])) + >>> 1 - x + ArrayLike(array([ 0, -1, -2])) + >>> np.arange(3) - x + ArrayLike(array([-1, -1, -1])) + >>> x - np.arange(3) + ArrayLike(array([1, 1, 1])) + + Note that unlike ``numpy.ndarray``, ``ArrayLike`` does not allow operations + with arbitrary, unrecognized types. This ensures that interactions with + ArrayLike preserve a well-defined casting hierarchy. + + .. versionadded:: 1.13 + """ + # Like np.ndarray, this mixin class implements "Option 1" from the ufunc + # overrides NEP. + + # comparisons don't have reflected and in-place versions + __lt__ = _binary_method(um.less, 'lt') + __le__ = _binary_method(um.less_equal, 'le') + __eq__ = _binary_method(um.equal, 'eq') + __ne__ = _binary_method(um.not_equal, 'ne') + __gt__ = _binary_method(um.greater, 'gt') + __ge__ = _binary_method(um.greater_equal, 'ge') + + # numeric methods + __add__, __radd__, __iadd__ = _numeric_methods(um.add, 'add') + __sub__, __rsub__, __isub__ = _numeric_methods(um.subtract, 'sub') + __mul__, __rmul__, __imul__ = _numeric_methods(um.multiply, 'mul') + __matmul__, __rmatmul__, __imatmul__ = _numeric_methods( + um.matmul, 'matmul') + # Python 3 does not use __div__, __rdiv__, or __idiv__ + __truediv__, __rtruediv__, __itruediv__ = _numeric_methods( + um.true_divide, 'truediv') + __floordiv__, __rfloordiv__, __ifloordiv__ = _numeric_methods( + um.floor_divide, 'floordiv') + __mod__, __rmod__, __imod__ = _numeric_methods(um.remainder, 'mod') + __divmod__ = _binary_method(um.divmod, 'divmod') + __rdivmod__ = _reflected_binary_method(um.divmod, 'divmod') + # __idivmod__ does not exist + # TODO: handle the optional third argument for __pow__? + __pow__, __rpow__, __ipow__ = _numeric_methods(um.power, 'pow') + __lshift__, __rlshift__, __ilshift__ = _numeric_methods( + um.left_shift, 'lshift') + __rshift__, __rrshift__, __irshift__ = _numeric_methods( + um.right_shift, 'rshift') + __and__, __rand__, __iand__ = _numeric_methods(um.bitwise_and, 'and') + __xor__, __rxor__, __ixor__ = _numeric_methods(um.bitwise_xor, 'xor') + __or__, __ror__, __ior__ = _numeric_methods(um.bitwise_or, 'or') + + # unary methods + __neg__ = _unary_method(um.negative, 'neg') + __pos__ = _unary_method(um.positive, 'pos') + __abs__ = _unary_method(um.absolute, 'abs') + __invert__ = _unary_method(um.invert, 'invert') diff --git a/.local/lib/python3.8/site-packages/numpy/lib/mixins.pyi b/.local/lib/python3.8/site-packages/numpy/lib/mixins.pyi new file mode 100644 index 0000000..f137bb5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/mixins.pyi @@ -0,0 +1,62 @@ +from typing import List +from abc import ABCMeta, abstractmethod + +__all__: List[str] + +# NOTE: `NDArrayOperatorsMixin` is not formally an abstract baseclass, +# even though it's reliant on subclasses implementing `__array_ufunc__` + +class NDArrayOperatorsMixin(metaclass=ABCMeta): + @abstractmethod + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ... + def __lt__(self, other): ... + def __le__(self, other): ... + def __eq__(self, other): ... + def __ne__(self, other): ... + def __gt__(self, other): ... + def __ge__(self, other): ... + def __add__(self, other): ... + def __radd__(self, other): ... + def __iadd__(self, other): ... + def __sub__(self, other): ... + def __rsub__(self, other): ... + def __isub__(self, other): ... + def __mul__(self, other): ... + def __rmul__(self, other): ... + def __imul__(self, other): ... + def __matmul__(self, other): ... + def __rmatmul__(self, other): ... + def __imatmul__(self, other): ... + def __truediv__(self, other): ... + def __rtruediv__(self, other): ... + def __itruediv__(self, other): ... + def __floordiv__(self, other): ... + def __rfloordiv__(self, other): ... + def __ifloordiv__(self, other): ... + def __mod__(self, other): ... + def __rmod__(self, other): ... + def __imod__(self, other): ... + def __divmod__(self, other): ... + def __rdivmod__(self, other): ... + def __pow__(self, other): ... + def __rpow__(self, other): ... + def __ipow__(self, other): ... + def __lshift__(self, other): ... + def __rlshift__(self, other): ... + def __ilshift__(self, other): ... + def __rshift__(self, other): ... + def __rrshift__(self, other): ... + def __irshift__(self, other): ... + def __and__(self, other): ... + def __rand__(self, other): ... + def __iand__(self, other): ... + def __xor__(self, other): ... + def __rxor__(self, other): ... + def __ixor__(self, other): ... + def __or__(self, other): ... + def __ror__(self, other): ... + def __ior__(self, other): ... + def __neg__(self): ... + def __pos__(self): ... + def __abs__(self): ... + def __invert__(self): ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/nanfunctions.py b/.local/lib/python3.8/site-packages/numpy/lib/nanfunctions.py new file mode 100644 index 0000000..d7ea1ca --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/nanfunctions.py @@ -0,0 +1,1887 @@ +""" +Functions that ignore NaN. + +Functions +--------- + +- `nanmin` -- minimum non-NaN value +- `nanmax` -- maximum non-NaN value +- `nanargmin` -- index of minimum non-NaN value +- `nanargmax` -- index of maximum non-NaN value +- `nansum` -- sum of non-NaN values +- `nanprod` -- product of non-NaN values +- `nancumsum` -- cumulative sum of non-NaN values +- `nancumprod` -- cumulative product of non-NaN values +- `nanmean` -- mean of non-NaN values +- `nanvar` -- variance of non-NaN values +- `nanstd` -- standard deviation of non-NaN values +- `nanmedian` -- median of non-NaN values +- `nanquantile` -- qth quantile of non-NaN values +- `nanpercentile` -- qth percentile of non-NaN values + +""" +import functools +import warnings +import numpy as np +from numpy.lib import function_base +from numpy.core import overrides + + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + + +__all__ = [ + 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', + 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod', + 'nancumsum', 'nancumprod', 'nanquantile' + ] + + +def _nan_mask(a, out=None): + """ + Parameters + ---------- + a : array-like + Input array with at least 1 dimension. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output and will prevent the allocation of a new array. + + Returns + ------- + y : bool ndarray or True + A bool array where ``np.nan`` positions are marked with ``False`` + and other positions are marked with ``True``. If the type of ``a`` + is such that it can't possibly contain ``np.nan``, returns ``True``. + """ + # we assume that a is an array for this private function + + if a.dtype.kind not in 'fc': + return True + + y = np.isnan(a, out=out) + y = np.invert(y, out=y) + return y + +def _replace_nan(a, val): + """ + If `a` is of inexact type, make a copy of `a`, replace NaNs with + the `val` value, and return the copy together with a boolean mask + marking the locations where NaNs were present. If `a` is not of + inexact type, do nothing and return `a` together with a mask of None. + + Note that scalars will end up as array scalars, which is important + for using the result as the value of the out argument in some + operations. + + Parameters + ---------- + a : array-like + Input array. + val : float + NaN values are set to val before doing the operation. + + Returns + ------- + y : ndarray + If `a` is of inexact type, return a copy of `a` with the NaNs + replaced by the fill value, otherwise return `a`. + mask: {bool, None} + If `a` is of inexact type, return a boolean mask marking locations of + NaNs, otherwise return None. + + """ + a = np.asanyarray(a) + + if a.dtype == np.object_: + # object arrays do not support `isnan` (gh-9009), so make a guess + mask = np.not_equal(a, a, dtype=bool) + elif issubclass(a.dtype.type, np.inexact): + mask = np.isnan(a) + else: + mask = None + + if mask is not None: + a = np.array(a, subok=True, copy=True) + np.copyto(a, val, where=mask) + + return a, mask + + +def _copyto(a, val, mask): + """ + Replace values in `a` with NaN where `mask` is True. This differs from + copyto in that it will deal with the case where `a` is a numpy scalar. + + Parameters + ---------- + a : ndarray or numpy scalar + Array or numpy scalar some of whose values are to be replaced + by val. + val : numpy scalar + Value used a replacement. + mask : ndarray, scalar + Boolean array. Where True the corresponding element of `a` is + replaced by `val`. Broadcasts. + + Returns + ------- + res : ndarray, scalar + Array with elements replaced or scalar `val`. + + """ + if isinstance(a, np.ndarray): + np.copyto(a, val, where=mask, casting='unsafe') + else: + a = a.dtype.type(val) + return a + + +def _remove_nan_1d(arr1d, overwrite_input=False): + """ + Equivalent to arr1d[~arr1d.isnan()], but in a different order + + Presumably faster as it incurs fewer copies + + Parameters + ---------- + arr1d : ndarray + Array to remove nans from + overwrite_input : bool + True if `arr1d` can be modified in place + + Returns + ------- + res : ndarray + Array with nan elements removed + overwrite_input : bool + True if `res` can be modified in place, given the constraint on the + input + """ + if arr1d.dtype == object: + # object arrays do not support `isnan` (gh-9009), so make a guess + c = np.not_equal(arr1d, arr1d, dtype=bool) + else: + c = np.isnan(arr1d) + + s = np.nonzero(c)[0] + if s.size == arr1d.size: + warnings.warn("All-NaN slice encountered", RuntimeWarning, + stacklevel=5) + return arr1d[:0], True + elif s.size == 0: + return arr1d, overwrite_input + else: + if not overwrite_input: + arr1d = arr1d.copy() + # select non-nans at end of array + enonan = arr1d[-s.size:][~c[-s.size:]] + # fill nans in beginning of array with non-nans of end + arr1d[s[:enonan.size]] = enonan + + return arr1d[:-s.size], True + + +def _divide_by_count(a, b, out=None): + """ + Compute a/b ignoring invalid results. If `a` is an array the division + is done in place. If `a` is a scalar, then its type is preserved in the + output. If out is None, then then a is used instead so that the + division is in place. Note that this is only called with `a` an inexact + type. + + Parameters + ---------- + a : {ndarray, numpy scalar} + Numerator. Expected to be of inexact type but not checked. + b : {ndarray, numpy scalar} + Denominator. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. + + Returns + ------- + ret : {ndarray, numpy scalar} + The return value is a/b. If `a` was an ndarray the division is done + in place. If `a` is a numpy scalar, the division preserves its type. + + """ + with np.errstate(invalid='ignore', divide='ignore'): + if isinstance(a, np.ndarray): + if out is None: + return np.divide(a, b, out=a, casting='unsafe') + else: + return np.divide(a, b, out=out, casting='unsafe') + else: + if out is None: + # Precaution against reduced object arrays + try: + return a.dtype.type(a / b) + except AttributeError: + return a / b + else: + # This is questionable, but currently a numpy scalar can + # be output to a zero dimensional array. + return np.divide(a, b, out=out, casting='unsafe') + + +def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None, + initial=None, where=None): + return (a, out) + + +@array_function_dispatch(_nanmin_dispatcher) +def nanmin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue, + where=np._NoValue): + """ + Return minimum of an array or minimum along an axis, ignoring any NaNs. + When all-NaN slices are encountered a ``RuntimeWarning`` is raised and + Nan is returned for that slice. + + Parameters + ---------- + a : array_like + Array containing numbers whose minimum is desired. If `a` is not an + array, a conversion is attempted. + axis : {int, tuple of int, None}, optional + Axis or axes along which the minimum is computed. The default is to compute + the minimum of the flattened array. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. See + :ref:`ufuncs-output-type` for more details. + + .. versionadded:: 1.8.0 + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `a`. + + If the value is anything but the default, then + `keepdims` will be passed through to the `min` method + of sub-classes of `ndarray`. If the sub-classes methods + does not implement `keepdims` any exceptions will be raised. + + .. versionadded:: 1.8.0 + initial : scalar, optional + The maximum value of an output element. Must be present to allow + computation on empty slice. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.22.0 + where : array_like of bool, optional + Elements to compare for the minimum. See `~numpy.ufunc.reduce` + for details. + + .. versionadded:: 1.22.0 + + Returns + ------- + nanmin : ndarray + An array with the same shape as `a`, with the specified axis + removed. If `a` is a 0-d array, or if axis is None, an ndarray + scalar is returned. The same dtype as `a` is returned. + + See Also + -------- + nanmax : + The maximum value of an array along a given axis, ignoring any NaNs. + amin : + The minimum value of an array along a given axis, propagating any NaNs. + fmin : + Element-wise minimum of two arrays, ignoring any NaNs. + minimum : + Element-wise minimum of two arrays, propagating any NaNs. + isnan : + Shows which elements are Not a Number (NaN). + isfinite: + Shows which elements are neither NaN nor infinity. + + amax, fmax, maximum + + Notes + ----- + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). This means that Not a Number is not equivalent to infinity. + Positive infinity is treated as a very large number and negative + infinity is treated as a very small (i.e. negative) number. + + If the input has a integer type the function is equivalent to np.min. + + Examples + -------- + >>> a = np.array([[1, 2], [3, np.nan]]) + >>> np.nanmin(a) + 1.0 + >>> np.nanmin(a, axis=0) + array([1., 2.]) + >>> np.nanmin(a, axis=1) + array([1., 3.]) + + When positive infinity and negative infinity are present: + + >>> np.nanmin([1, 2, np.nan, np.inf]) + 1.0 + >>> np.nanmin([1, 2, np.nan, np.NINF]) + -inf + + """ + kwargs = {} + if keepdims is not np._NoValue: + kwargs['keepdims'] = keepdims + if initial is not np._NoValue: + kwargs['initial'] = initial + if where is not np._NoValue: + kwargs['where'] = where + + if type(a) is np.ndarray and a.dtype != np.object_: + # Fast, but not safe for subclasses of ndarray, or object arrays, + # which do not implement isnan (gh-9009), or fmin correctly (gh-8975) + res = np.fmin.reduce(a, axis=axis, out=out, **kwargs) + if np.isnan(res).any(): + warnings.warn("All-NaN slice encountered", RuntimeWarning, + stacklevel=3) + else: + # Slow, but safe for subclasses of ndarray + a, mask = _replace_nan(a, +np.inf) + res = np.amin(a, axis=axis, out=out, **kwargs) + if mask is None: + return res + + # Check for all-NaN axis + kwargs.pop("initial", None) + mask = np.all(mask, axis=axis, **kwargs) + if np.any(mask): + res = _copyto(res, np.nan, mask) + warnings.warn("All-NaN axis encountered", RuntimeWarning, + stacklevel=3) + return res + + +def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None, + initial=None, where=None): + return (a, out) + + +@array_function_dispatch(_nanmax_dispatcher) +def nanmax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue, + where=np._NoValue): + """ + Return the maximum of an array or maximum along an axis, ignoring any + NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is + raised and NaN is returned for that slice. + + Parameters + ---------- + a : array_like + Array containing numbers whose maximum is desired. If `a` is not an + array, a conversion is attempted. + axis : {int, tuple of int, None}, optional + Axis or axes along which the maximum is computed. The default is to compute + the maximum of the flattened array. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. See + :ref:`ufuncs-output-type` for more details. + + .. versionadded:: 1.8.0 + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `a`. + + If the value is anything but the default, then + `keepdims` will be passed through to the `max` method + of sub-classes of `ndarray`. If the sub-classes methods + does not implement `keepdims` any exceptions will be raised. + + .. versionadded:: 1.8.0 + initial : scalar, optional + The minimum value of an output element. Must be present to allow + computation on empty slice. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.22.0 + where : array_like of bool, optional + Elements to compare for the maximum. See `~numpy.ufunc.reduce` + for details. + + .. versionadded:: 1.22.0 + + Returns + ------- + nanmax : ndarray + An array with the same shape as `a`, with the specified axis removed. + If `a` is a 0-d array, or if axis is None, an ndarray scalar is + returned. The same dtype as `a` is returned. + + See Also + -------- + nanmin : + The minimum value of an array along a given axis, ignoring any NaNs. + amax : + The maximum value of an array along a given axis, propagating any NaNs. + fmax : + Element-wise maximum of two arrays, ignoring any NaNs. + maximum : + Element-wise maximum of two arrays, propagating any NaNs. + isnan : + Shows which elements are Not a Number (NaN). + isfinite: + Shows which elements are neither NaN nor infinity. + + amin, fmin, minimum + + Notes + ----- + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). This means that Not a Number is not equivalent to infinity. + Positive infinity is treated as a very large number and negative + infinity is treated as a very small (i.e. negative) number. + + If the input has a integer type the function is equivalent to np.max. + + Examples + -------- + >>> a = np.array([[1, 2], [3, np.nan]]) + >>> np.nanmax(a) + 3.0 + >>> np.nanmax(a, axis=0) + array([3., 2.]) + >>> np.nanmax(a, axis=1) + array([2., 3.]) + + When positive infinity and negative infinity are present: + + >>> np.nanmax([1, 2, np.nan, np.NINF]) + 2.0 + >>> np.nanmax([1, 2, np.nan, np.inf]) + inf + + """ + kwargs = {} + if keepdims is not np._NoValue: + kwargs['keepdims'] = keepdims + if initial is not np._NoValue: + kwargs['initial'] = initial + if where is not np._NoValue: + kwargs['where'] = where + + if type(a) is np.ndarray and a.dtype != np.object_: + # Fast, but not safe for subclasses of ndarray, or object arrays, + # which do not implement isnan (gh-9009), or fmax correctly (gh-8975) + res = np.fmax.reduce(a, axis=axis, out=out, **kwargs) + if np.isnan(res).any(): + warnings.warn("All-NaN slice encountered", RuntimeWarning, + stacklevel=3) + else: + # Slow, but safe for subclasses of ndarray + a, mask = _replace_nan(a, -np.inf) + res = np.amax(a, axis=axis, out=out, **kwargs) + if mask is None: + return res + + # Check for all-NaN axis + kwargs.pop("initial", None) + mask = np.all(mask, axis=axis, **kwargs) + if np.any(mask): + res = _copyto(res, np.nan, mask) + warnings.warn("All-NaN axis encountered", RuntimeWarning, + stacklevel=3) + return res + + +def _nanargmin_dispatcher(a, axis=None, out=None, *, keepdims=None): + return (a,) + + +@array_function_dispatch(_nanargmin_dispatcher) +def nanargmin(a, axis=None, out=None, *, keepdims=np._NoValue): + """ + Return the indices of the minimum values in the specified axis ignoring + NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the results + cannot be trusted if a slice contains only NaNs and Infs. + + Parameters + ---------- + a : array_like + Input data. + axis : int, optional + Axis along which to operate. By default flattened input is used. + out : array, optional + If provided, the result will be inserted into this array. It should + be of the appropriate shape and dtype. + + .. versionadded:: 1.22.0 + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. + + .. versionadded:: 1.22.0 + + Returns + ------- + index_array : ndarray + An array of indices or a single index value. + + See Also + -------- + argmin, nanargmax + + Examples + -------- + >>> a = np.array([[np.nan, 4], [2, 3]]) + >>> np.argmin(a) + 0 + >>> np.nanargmin(a) + 2 + >>> np.nanargmin(a, axis=0) + array([1, 1]) + >>> np.nanargmin(a, axis=1) + array([1, 0]) + + """ + a, mask = _replace_nan(a, np.inf) + if mask is not None: + mask = np.all(mask, axis=axis) + if np.any(mask): + raise ValueError("All-NaN slice encountered") + res = np.argmin(a, axis=axis, out=out, keepdims=keepdims) + return res + + +def _nanargmax_dispatcher(a, axis=None, out=None, *, keepdims=None): + return (a,) + + +@array_function_dispatch(_nanargmax_dispatcher) +def nanargmax(a, axis=None, out=None, *, keepdims=np._NoValue): + """ + Return the indices of the maximum values in the specified axis ignoring + NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the + results cannot be trusted if a slice contains only NaNs and -Infs. + + + Parameters + ---------- + a : array_like + Input data. + axis : int, optional + Axis along which to operate. By default flattened input is used. + out : array, optional + If provided, the result will be inserted into this array. It should + be of the appropriate shape and dtype. + + .. versionadded:: 1.22.0 + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. + + .. versionadded:: 1.22.0 + + Returns + ------- + index_array : ndarray + An array of indices or a single index value. + + See Also + -------- + argmax, nanargmin + + Examples + -------- + >>> a = np.array([[np.nan, 4], [2, 3]]) + >>> np.argmax(a) + 0 + >>> np.nanargmax(a) + 1 + >>> np.nanargmax(a, axis=0) + array([1, 0]) + >>> np.nanargmax(a, axis=1) + array([1, 1]) + + """ + a, mask = _replace_nan(a, -np.inf) + if mask is not None: + mask = np.all(mask, axis=axis) + if np.any(mask): + raise ValueError("All-NaN slice encountered") + res = np.argmax(a, axis=axis, out=out, keepdims=keepdims) + return res + + +def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None, + initial=None, where=None): + return (a, out) + + +@array_function_dispatch(_nansum_dispatcher) +def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, + initial=np._NoValue, where=np._NoValue): + """ + Return the sum of array elements over a given axis treating Not a + Numbers (NaNs) as zero. + + In NumPy versions <= 1.9.0 Nan is returned for slices that are all-NaN or + empty. In later versions zero is returned. + + Parameters + ---------- + a : array_like + Array containing numbers whose sum is desired. If `a` is not an + array, a conversion is attempted. + axis : {int, tuple of int, None}, optional + Axis or axes along which the sum is computed. The default is to compute the + sum of the flattened array. + dtype : data-type, optional + The type of the returned array and of the accumulator in which the + elements are summed. By default, the dtype of `a` is used. An + exception is when `a` has an integer type with less precision than + the platform (u)intp. In that case, the default will be either + (u)int32 or (u)int64 depending on whether the platform is 32 or 64 + bits. For inexact inputs, dtype must be inexact. + + .. versionadded:: 1.8.0 + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``. If provided, it must have the same shape as the + expected output, but the type will be cast if necessary. See + :ref:`ufuncs-output-type` for more details. The casting of NaN to integer + can yield unexpected results. + + .. versionadded:: 1.8.0 + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `a`. + + + If the value is anything but the default, then + `keepdims` will be passed through to the `mean` or `sum` methods + of sub-classes of `ndarray`. If the sub-classes methods + does not implement `keepdims` any exceptions will be raised. + + .. versionadded:: 1.8.0 + initial : scalar, optional + Starting value for the sum. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.22.0 + where : array_like of bool, optional + Elements to include in the sum. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.22.0 + + Returns + ------- + nansum : ndarray. + A new array holding the result is returned unless `out` is + specified, in which it is returned. The result has the same + size as `a`, and the same shape as `a` if `axis` is not None + or `a` is a 1-d array. + + See Also + -------- + numpy.sum : Sum across array propagating NaNs. + isnan : Show which elements are NaN. + isfinite : Show which elements are not NaN or +/-inf. + + Notes + ----- + If both positive and negative infinity are present, the sum will be Not + A Number (NaN). + + Examples + -------- + >>> np.nansum(1) + 1 + >>> np.nansum([1]) + 1 + >>> np.nansum([1, np.nan]) + 1.0 + >>> a = np.array([[1, 1], [1, np.nan]]) + >>> np.nansum(a) + 3.0 + >>> np.nansum(a, axis=0) + array([2., 1.]) + >>> np.nansum([1, np.nan, np.inf]) + inf + >>> np.nansum([1, np.nan, np.NINF]) + -inf + >>> from numpy.testing import suppress_warnings + >>> with suppress_warnings() as sup: + ... sup.filter(RuntimeWarning) + ... np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present + nan + + """ + a, mask = _replace_nan(a, 0) + return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims, + initial=initial, where=where) + + +def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None, + initial=None, where=None): + return (a, out) + + +@array_function_dispatch(_nanprod_dispatcher) +def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, + initial=np._NoValue, where=np._NoValue): + """ + Return the product of array elements over a given axis treating Not a + Numbers (NaNs) as ones. + + One is returned for slices that are all-NaN or empty. + + .. versionadded:: 1.10.0 + + Parameters + ---------- + a : array_like + Array containing numbers whose product is desired. If `a` is not an + array, a conversion is attempted. + axis : {int, tuple of int, None}, optional + Axis or axes along which the product is computed. The default is to compute + the product of the flattened array. + dtype : data-type, optional + The type of the returned array and of the accumulator in which the + elements are summed. By default, the dtype of `a` is used. An + exception is when `a` has an integer type with less precision than + the platform (u)intp. In that case, the default will be either + (u)int32 or (u)int64 depending on whether the platform is 32 or 64 + bits. For inexact inputs, dtype must be inexact. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``. If provided, it must have the same shape as the + expected output, but the type will be cast if necessary. See + :ref:`ufuncs-output-type` for more details. The casting of NaN to integer + can yield unexpected results. + keepdims : bool, optional + If True, the axes which are reduced are left in the result as + dimensions with size one. With this option, the result will + broadcast correctly against the original `arr`. + initial : scalar, optional + The starting value for this product. See `~numpy.ufunc.reduce` + for details. + + .. versionadded:: 1.22.0 + where : array_like of bool, optional + Elements to include in the product. See `~numpy.ufunc.reduce` + for details. + + .. versionadded:: 1.22.0 + + Returns + ------- + nanprod : ndarray + A new array holding the result is returned unless `out` is + specified, in which case it is returned. + + See Also + -------- + numpy.prod : Product across array propagating NaNs. + isnan : Show which elements are NaN. + + Examples + -------- + >>> np.nanprod(1) + 1 + >>> np.nanprod([1]) + 1 + >>> np.nanprod([1, np.nan]) + 1.0 + >>> a = np.array([[1, 2], [3, np.nan]]) + >>> np.nanprod(a) + 6.0 + >>> np.nanprod(a, axis=0) + array([3., 2.]) + + """ + a, mask = _replace_nan(a, 1) + return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims, + initial=initial, where=where) + + +def _nancumsum_dispatcher(a, axis=None, dtype=None, out=None): + return (a, out) + + +@array_function_dispatch(_nancumsum_dispatcher) +def nancumsum(a, axis=None, dtype=None, out=None): + """ + Return the cumulative sum of array elements over a given axis treating Not a + Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are + encountered and leading NaNs are replaced by zeros. + + Zeros are returned for slices that are all-NaN or empty. + + .. versionadded:: 1.12.0 + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + Axis along which the cumulative sum is computed. The default + (None) is to compute the cumsum over the flattened array. + dtype : dtype, optional + Type of the returned array and of the accumulator in which the + elements are summed. If `dtype` is not specified, it defaults + to the dtype of `a`, unless `a` has an integer dtype with a + precision less than that of the default platform integer. In + that case, the default platform integer is used. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output + but the type will be cast if necessary. See :ref:`ufuncs-output-type` for + more details. + + Returns + ------- + nancumsum : ndarray. + A new array holding the result is returned unless `out` is + specified, in which it is returned. The result has the same + size as `a`, and the same shape as `a` if `axis` is not None + or `a` is a 1-d array. + + See Also + -------- + numpy.cumsum : Cumulative sum across array propagating NaNs. + isnan : Show which elements are NaN. + + Examples + -------- + >>> np.nancumsum(1) + array([1]) + >>> np.nancumsum([1]) + array([1]) + >>> np.nancumsum([1, np.nan]) + array([1., 1.]) + >>> a = np.array([[1, 2], [3, np.nan]]) + >>> np.nancumsum(a) + array([1., 3., 6., 6.]) + >>> np.nancumsum(a, axis=0) + array([[1., 2.], + [4., 2.]]) + >>> np.nancumsum(a, axis=1) + array([[1., 3.], + [3., 3.]]) + + """ + a, mask = _replace_nan(a, 0) + return np.cumsum(a, axis=axis, dtype=dtype, out=out) + + +def _nancumprod_dispatcher(a, axis=None, dtype=None, out=None): + return (a, out) + + +@array_function_dispatch(_nancumprod_dispatcher) +def nancumprod(a, axis=None, dtype=None, out=None): + """ + Return the cumulative product of array elements over a given axis treating Not a + Numbers (NaNs) as one. The cumulative product does not change when NaNs are + encountered and leading NaNs are replaced by ones. + + Ones are returned for slices that are all-NaN or empty. + + .. versionadded:: 1.12.0 + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + Axis along which the cumulative product is computed. By default + the input is flattened. + dtype : dtype, optional + Type of the returned array, as well as of the accumulator in which + the elements are multiplied. If *dtype* is not specified, it + defaults to the dtype of `a`, unless `a` has an integer dtype with + a precision less than that of the default platform integer. In + that case, the default platform integer is used instead. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output + but the type of the resulting values will be cast if necessary. + + Returns + ------- + nancumprod : ndarray + A new array holding the result is returned unless `out` is + specified, in which case it is returned. + + See Also + -------- + numpy.cumprod : Cumulative product across array propagating NaNs. + isnan : Show which elements are NaN. + + Examples + -------- + >>> np.nancumprod(1) + array([1]) + >>> np.nancumprod([1]) + array([1]) + >>> np.nancumprod([1, np.nan]) + array([1., 1.]) + >>> a = np.array([[1, 2], [3, np.nan]]) + >>> np.nancumprod(a) + array([1., 2., 6., 6.]) + >>> np.nancumprod(a, axis=0) + array([[1., 2.], + [3., 2.]]) + >>> np.nancumprod(a, axis=1) + array([[1., 2.], + [3., 3.]]) + + """ + a, mask = _replace_nan(a, 1) + return np.cumprod(a, axis=axis, dtype=dtype, out=out) + + +def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None, + *, where=None): + return (a, out) + + +@array_function_dispatch(_nanmean_dispatcher) +def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, + *, where=np._NoValue): + """ + Compute the arithmetic mean along the specified axis, ignoring NaNs. + + Returns the average of the array elements. The average is taken over + the flattened array by default, otherwise over the specified axis. + `float64` intermediate and return values are used for integer inputs. + + For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised. + + .. versionadded:: 1.8.0 + + Parameters + ---------- + a : array_like + Array containing numbers whose mean is desired. If `a` is not an + array, a conversion is attempted. + axis : {int, tuple of int, None}, optional + Axis or axes along which the means are computed. The default is to compute + the mean of the flattened array. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default + is `float64`; for inexact inputs, it is the same as the input + dtype. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. See + :ref:`ufuncs-output-type` for more details. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `a`. + + If the value is anything but the default, then + `keepdims` will be passed through to the `mean` or `sum` methods + of sub-classes of `ndarray`. If the sub-classes methods + does not implement `keepdims` any exceptions will be raised. + where : array_like of bool, optional + Elements to include in the mean. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.22.0 + + Returns + ------- + m : ndarray, see dtype parameter above + If `out=None`, returns a new array containing the mean values, + otherwise a reference to the output array is returned. Nan is + returned for slices that contain only NaNs. + + See Also + -------- + average : Weighted average + mean : Arithmetic mean taken while not ignoring NaNs + var, nanvar + + Notes + ----- + The arithmetic mean is the sum of the non-NaN elements along the axis + divided by the number of non-NaN elements. + + Note that for floating-point input, the mean is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for `float32`. Specifying a + higher-precision accumulator using the `dtype` keyword can alleviate + this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.nanmean(a) + 2.6666666666666665 + >>> np.nanmean(a, axis=0) + array([2., 4.]) + >>> np.nanmean(a, axis=1) + array([1., 3.5]) # may vary + + """ + arr, mask = _replace_nan(a, 0) + if mask is None: + return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims, + where=where) + + if dtype is not None: + dtype = np.dtype(dtype) + if dtype is not None and not issubclass(dtype.type, np.inexact): + raise TypeError("If a is inexact, then dtype must be inexact") + if out is not None and not issubclass(out.dtype.type, np.inexact): + raise TypeError("If a is inexact, then out must be inexact") + + cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims, + where=where) + tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims, + where=where) + avg = _divide_by_count(tot, cnt, out=out) + + isbad = (cnt == 0) + if isbad.any(): + warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=3) + # NaN is the only possible bad value, so no further + # action is needed to handle bad results. + return avg + + +def _nanmedian1d(arr1d, overwrite_input=False): + """ + Private function for rank 1 arrays. Compute the median ignoring NaNs. + See nanmedian for parameter usage + """ + arr1d_parsed, overwrite_input = _remove_nan_1d( + arr1d, overwrite_input=overwrite_input, + ) + + if arr1d_parsed.size == 0: + # Ensure that a nan-esque scalar of the appropriate type (and unit) + # is returned for `timedelta64` and `complexfloating` + return arr1d[-1] + + return np.median(arr1d_parsed, overwrite_input=overwrite_input) + + +def _nanmedian(a, axis=None, out=None, overwrite_input=False): + """ + Private function that doesn't support extended axis or keepdims. + These methods are extended to this function using _ureduce + See nanmedian for parameter usage + + """ + if axis is None or a.ndim == 1: + part = a.ravel() + if out is None: + return _nanmedian1d(part, overwrite_input) + else: + out[...] = _nanmedian1d(part, overwrite_input) + return out + else: + # for small medians use sort + indexing which is still faster than + # apply_along_axis + # benchmarked with shuffled (50, 50, x) containing a few NaN + if a.shape[axis] < 600: + return _nanmedian_small(a, axis, out, overwrite_input) + result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input) + if out is not None: + out[...] = result + return result + + +def _nanmedian_small(a, axis=None, out=None, overwrite_input=False): + """ + sort + indexing median, faster for small medians along multiple + dimensions due to the high overhead of apply_along_axis + + see nanmedian for parameter usage + """ + a = np.ma.masked_array(a, np.isnan(a)) + m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input) + for i in range(np.count_nonzero(m.mask.ravel())): + warnings.warn("All-NaN slice encountered", RuntimeWarning, + stacklevel=4) + + fill_value = np.timedelta64("NaT") if m.dtype.kind == "m" else np.nan + if out is not None: + out[...] = m.filled(fill_value) + return out + return m.filled(fill_value) + + +def _nanmedian_dispatcher( + a, axis=None, out=None, overwrite_input=None, keepdims=None): + return (a, out) + + +@array_function_dispatch(_nanmedian_dispatcher) +def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue): + """ + Compute the median along the specified axis, while ignoring NaNs. + + Returns the median of the array elements. + + .. versionadded:: 1.9.0 + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + axis : {int, sequence of int, None}, optional + Axis or axes along which the medians are computed. The default + is to compute the median along a flattened version of the array. + A sequence of axes is supported since version 1.9.0. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow use of memory of input array `a` for + calculations. The input array will be modified by the call to + `median`. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. If `overwrite_input` is ``True`` and `a` is not already an + `ndarray`, an error will be raised. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `a`. + + If this is anything but the default value it will be passed + through (in the special case of an empty array) to the + `mean` function of the underlying array. If the array is + a sub-class and `mean` does not have the kwarg `keepdims` this + will raise a RuntimeError. + + Returns + ------- + median : ndarray + A new array holding the result. If the input contains integers + or floats smaller than ``float64``, then the output data-type is + ``np.float64``. Otherwise, the data-type of the output is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + mean, median, percentile + + Notes + ----- + Given a vector ``V`` of length ``N``, the median of ``V`` is the + middle value of a sorted copy of ``V``, ``V_sorted`` - i.e., + ``V_sorted[(N-1)/2]``, when ``N`` is odd and the average of the two + middle values of ``V_sorted`` when ``N`` is even. + + Examples + -------- + >>> a = np.array([[10.0, 7, 4], [3, 2, 1]]) + >>> a[0, 1] = np.nan + >>> a + array([[10., nan, 4.], + [ 3., 2., 1.]]) + >>> np.median(a) + nan + >>> np.nanmedian(a) + 3.0 + >>> np.nanmedian(a, axis=0) + array([6.5, 2. , 2.5]) + >>> np.median(a, axis=1) + array([nan, 2.]) + >>> b = a.copy() + >>> np.nanmedian(b, axis=1, overwrite_input=True) + array([7., 2.]) + >>> assert not np.all(a==b) + >>> b = a.copy() + >>> np.nanmedian(b, axis=None, overwrite_input=True) + 3.0 + >>> assert not np.all(a==b) + + """ + a = np.asanyarray(a) + # apply_along_axis in _nanmedian doesn't handle empty arrays well, + # so deal them upfront + if a.size == 0: + return np.nanmean(a, axis, out=out, keepdims=keepdims) + + r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out, + overwrite_input=overwrite_input) + if keepdims and keepdims is not np._NoValue: + return r.reshape(k) + else: + return r + + +def _nanpercentile_dispatcher( + a, q, axis=None, out=None, overwrite_input=None, + method=None, keepdims=None, *, interpolation=None): + return (a, q, out) + + +@array_function_dispatch(_nanpercentile_dispatcher) +def nanpercentile( + a, + q, + axis=None, + out=None, + overwrite_input=False, + method="linear", + keepdims=np._NoValue, + *, + interpolation=None, +): + """ + Compute the qth percentile of the data along the specified axis, + while ignoring nan values. + + Returns the qth percentile(s) of the array elements. + + .. versionadded:: 1.9.0 + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array, containing + nan values to be ignored. + q : array_like of float + Percentile or sequence of percentiles to compute, which must be + between 0 and 100 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the percentiles are computed. The default + is to compute the percentile(s) along a flattened version of the + array. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape and buffer length as the expected output, but the + type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by + intermediate calculations, to save memory. In this case, the + contents of the input `a` after this function completes is + undefined. + method : str, optional + This parameter specifies the method to use for estimating the + percentile. There are many different methods, some unique to NumPy. + See the notes for explanation. The options sorted by their R type + as summarized in the H&F paper [1]_ are: + + 1. 'inverted_cdf' + 2. 'averaged_inverted_cdf' + 3. 'closest_observation' + 4. 'interpolated_inverted_cdf' + 5. 'hazen' + 6. 'weibull' + 7. 'linear' (default) + 8. 'median_unbiased' + 9. 'normal_unbiased' + + The first three methods are discontiuous. NumPy further defines the + following discontinuous variations of the default 'linear' (7.) option: + + * 'lower' + * 'higher', + * 'midpoint' + * 'nearest' + + .. versionchanged:: 1.22.0 + This argument was previously called "interpolation" and only + offered the "linear" default and last four options. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + If this is anything but the default value it will be passed + through (in the special case of an empty array) to the + `mean` function of the underlying array. If the array is + a sub-class and `mean` does not have the kwarg `keepdims` this + will raise a RuntimeError. + + interpolation : str, optional + Deprecated name for the method keyword argument. + + .. deprecated:: 1.22.0 + + Returns + ------- + percentile : scalar or ndarray + If `q` is a single percentile and `axis=None`, then the result + is a scalar. If multiple percentiles are given, first axis of + the result corresponds to the percentiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + nanmean + nanmedian : equivalent to ``nanpercentile(..., 50)`` + percentile, median, mean + nanquantile : equivalent to nanpercentile, except q in range [0, 1]. + + Notes + ----- + For more information please see `numpy.percentile` + + Examples + -------- + >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) + >>> a[0][1] = np.nan + >>> a + array([[10., nan, 4.], + [ 3., 2., 1.]]) + >>> np.percentile(a, 50) + nan + >>> np.nanpercentile(a, 50) + 3.0 + >>> np.nanpercentile(a, 50, axis=0) + array([6.5, 2. , 2.5]) + >>> np.nanpercentile(a, 50, axis=1, keepdims=True) + array([[7.], + [2.]]) + >>> m = np.nanpercentile(a, 50, axis=0) + >>> out = np.zeros_like(m) + >>> np.nanpercentile(a, 50, axis=0, out=out) + array([6.5, 2. , 2.5]) + >>> m + array([6.5, 2. , 2.5]) + + >>> b = a.copy() + >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True) + array([7., 2.]) + >>> assert not np.all(a==b) + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + + """ + if interpolation is not None: + method = function_base._check_interpolation_as_method( + method, interpolation, "nanpercentile") + + a = np.asanyarray(a) + q = np.true_divide(q, 100.0) + # undo any decay that the ufunc performed (see gh-13105) + q = np.asanyarray(q) + if not function_base._quantile_is_valid(q): + raise ValueError("Percentiles must be in the range [0, 100]") + return _nanquantile_unchecked( + a, q, axis, out, overwrite_input, method, keepdims) + + +def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, + method=None, keepdims=None, *, interpolation=None): + return (a, q, out) + + +@array_function_dispatch(_nanquantile_dispatcher) +def nanquantile( + a, + q, + axis=None, + out=None, + overwrite_input=False, + method="linear", + keepdims=np._NoValue, + *, + interpolation=None, +): + """ + Compute the qth quantile of the data along the specified axis, + while ignoring nan values. + Returns the qth quantile(s) of the array elements. + + .. versionadded:: 1.15.0 + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array, containing + nan values to be ignored + q : array_like of float + Quantile or sequence of quantiles to compute, which must be between + 0 and 1 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the quantiles are computed. The + default is to compute the quantile(s) along a flattened + version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by intermediate + calculations, to save memory. In this case, the contents of the input + `a` after this function completes is undefined. + method : str, optional + This parameter specifies the method to use for estimating the + quantile. There are many different methods, some unique to NumPy. + See the notes for explanation. The options sorted by their R type + as summarized in the H&F paper [1]_ are: + + 1. 'inverted_cdf' + 2. 'averaged_inverted_cdf' + 3. 'closest_observation' + 4. 'interpolated_inverted_cdf' + 5. 'hazen' + 6. 'weibull' + 7. 'linear' (default) + 8. 'median_unbiased' + 9. 'normal_unbiased' + + The first three methods are discontiuous. NumPy further defines the + following discontinuous variations of the default 'linear' (7.) option: + + * 'lower' + * 'higher', + * 'midpoint' + * 'nearest' + + .. versionchanged:: 1.22.0 + This argument was previously called "interpolation" and only + offered the "linear" default and last four options. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + If this is anything but the default value it will be passed + through (in the special case of an empty array) to the + `mean` function of the underlying array. If the array is + a sub-class and `mean` does not have the kwarg `keepdims` this + will raise a RuntimeError. + + interpolation : str, optional + Deprecated name for the method keyword argument. + + .. deprecated:: 1.22.0 + + Returns + ------- + quantile : scalar or ndarray + If `q` is a single percentile and `axis=None`, then the result + is a scalar. If multiple quantiles are given, first axis of + the result corresponds to the quantiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + quantile + nanmean, nanmedian + nanmedian : equivalent to ``nanquantile(..., 0.5)`` + nanpercentile : same as nanquantile, but with q in the range [0, 100]. + + Notes + ----- + For more information please see `numpy.quantile` + + Examples + -------- + >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) + >>> a[0][1] = np.nan + >>> a + array([[10., nan, 4.], + [ 3., 2., 1.]]) + >>> np.quantile(a, 0.5) + nan + >>> np.nanquantile(a, 0.5) + 3.0 + >>> np.nanquantile(a, 0.5, axis=0) + array([6.5, 2. , 2.5]) + >>> np.nanquantile(a, 0.5, axis=1, keepdims=True) + array([[7.], + [2.]]) + >>> m = np.nanquantile(a, 0.5, axis=0) + >>> out = np.zeros_like(m) + >>> np.nanquantile(a, 0.5, axis=0, out=out) + array([6.5, 2. , 2.5]) + >>> m + array([6.5, 2. , 2.5]) + >>> b = a.copy() + >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True) + array([7., 2.]) + >>> assert not np.all(a==b) + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + + """ + if interpolation is not None: + method = function_base._check_interpolation_as_method( + method, interpolation, "nanquantile") + + a = np.asanyarray(a) + q = np.asanyarray(q) + if not function_base._quantile_is_valid(q): + raise ValueError("Quantiles must be in the range [0, 1]") + return _nanquantile_unchecked( + a, q, axis, out, overwrite_input, method, keepdims) + + +def _nanquantile_unchecked( + a, + q, + axis=None, + out=None, + overwrite_input=False, + method="linear", + keepdims=np._NoValue, +): + """Assumes that q is in [0, 1], and is an ndarray""" + # apply_along_axis in _nanpercentile doesn't handle empty arrays well, + # so deal them upfront + if a.size == 0: + return np.nanmean(a, axis, out=out, keepdims=keepdims) + r, k = function_base._ureduce(a, + func=_nanquantile_ureduce_func, + q=q, + axis=axis, + out=out, + overwrite_input=overwrite_input, + method=method) + if keepdims and keepdims is not np._NoValue: + return r.reshape(q.shape + k) + else: + return r + + +def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, + method="linear"): + """ + Private function that doesn't support extended axis or keepdims. + These methods are extended to this function using _ureduce + See nanpercentile for parameter usage + """ + if axis is None or a.ndim == 1: + part = a.ravel() + result = _nanquantile_1d(part, q, overwrite_input, method) + else: + result = np.apply_along_axis(_nanquantile_1d, axis, a, q, + overwrite_input, method) + # apply_along_axis fills in collapsed axis with results. + # Move that axis to the beginning to match percentile's + # convention. + if q.ndim != 0: + result = np.moveaxis(result, axis, 0) + + if out is not None: + out[...] = result + return result + + +def _nanquantile_1d(arr1d, q, overwrite_input=False, method="linear"): + """ + Private function for rank 1 arrays. Compute quantile ignoring NaNs. + See nanpercentile for parameter usage + """ + arr1d, overwrite_input = _remove_nan_1d(arr1d, + overwrite_input=overwrite_input) + if arr1d.size == 0: + # convert to scalar + return np.full(q.shape, np.nan, dtype=arr1d.dtype)[()] + + return function_base._quantile_unchecked( + arr1d, q, overwrite_input=overwrite_input, method=method) + + +def _nanvar_dispatcher(a, axis=None, dtype=None, out=None, ddof=None, + keepdims=None, *, where=None): + return (a, out) + + +@array_function_dispatch(_nanvar_dispatcher) +def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue, + *, where=np._NoValue): + """ + Compute the variance along the specified axis, while ignoring NaNs. + + Returns the variance of the array elements, a measure of the spread of + a distribution. The variance is computed for the flattened array by + default, otherwise over the specified axis. + + For all-NaN slices or slices with zero degrees of freedom, NaN is + returned and a `RuntimeWarning` is raised. + + .. versionadded:: 1.8.0 + + Parameters + ---------- + a : array_like + Array containing numbers whose variance is desired. If `a` is not an + array, a conversion is attempted. + axis : {int, tuple of int, None}, optional + Axis or axes along which the variance is computed. The default is to compute + the variance of the flattened array. + dtype : data-type, optional + Type to use in computing the variance. For arrays of integer type + the default is `float64`; for arrays of float types it is the same as + the array type. + out : ndarray, optional + Alternate output array in which to place the result. It must have + the same shape as the expected output, but the type is cast if + necessary. + ddof : int, optional + "Delta Degrees of Freedom": the divisor used in the calculation is + ``N - ddof``, where ``N`` represents the number of non-NaN + elements. By default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `a`. + where : array_like of bool, optional + Elements to include in the variance. See `~numpy.ufunc.reduce` for + details. + + .. versionadded:: 1.22.0 + + Returns + ------- + variance : ndarray, see dtype parameter above + If `out` is None, return a new array containing the variance, + otherwise return a reference to the output array. If ddof is >= the + number of non-NaN elements in a slice or the slice contains only + NaNs, then the result for that slice is NaN. + + See Also + -------- + std : Standard deviation + mean : Average + var : Variance while not ignoring NaNs + nanstd, nanmean + :ref:`ufuncs-output-type` + + Notes + ----- + The variance is the average of the squared deviations from the mean, + i.e., ``var = mean(abs(x - x.mean())**2)``. + + The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``. + If, however, `ddof` is specified, the divisor ``N - ddof`` is used + instead. In standard statistical practice, ``ddof=1`` provides an + unbiased estimator of the variance of a hypothetical infinite + population. ``ddof=0`` provides a maximum likelihood estimate of the + variance for normally distributed variables. + + Note that for complex numbers, the absolute value is taken before + squaring, so that the result is always real and nonnegative. + + For floating-point input, the variance is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for `float32` (see example + below). Specifying a higher-accuracy accumulator using the ``dtype`` + keyword can alleviate this issue. + + For this function to work on sub-classes of ndarray, they must define + `sum` with the kwarg `keepdims` + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.nanvar(a) + 1.5555555555555554 + >>> np.nanvar(a, axis=0) + array([1., 0.]) + >>> np.nanvar(a, axis=1) + array([0., 0.25]) # may vary + + """ + arr, mask = _replace_nan(a, 0) + if mask is None: + return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims, where=where) + + if dtype is not None: + dtype = np.dtype(dtype) + if dtype is not None and not issubclass(dtype.type, np.inexact): + raise TypeError("If a is inexact, then dtype must be inexact") + if out is not None and not issubclass(out.dtype.type, np.inexact): + raise TypeError("If a is inexact, then out must be inexact") + + # Compute mean + if type(arr) is np.matrix: + _keepdims = np._NoValue + else: + _keepdims = True + # we need to special case matrix for reverse compatibility + # in order for this to work, these sums need to be called with + # keepdims=True, however matrix now raises an error in this case, but + # the reason that it drops the keepdims kwarg is to force keepdims=True + # so this used to work by serendipity. + cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims, + where=where) + avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=_keepdims, where=where) + avg = _divide_by_count(avg, cnt) + + # Compute squared deviation from mean. + np.subtract(arr, avg, out=arr, casting='unsafe', where=where) + arr = _copyto(arr, 0, mask) + if issubclass(arr.dtype.type, np.complexfloating): + sqr = np.multiply(arr, arr.conj(), out=arr, where=where).real + else: + sqr = np.multiply(arr, arr, out=arr, where=where) + + # Compute variance. + var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims, + where=where) + + # Precaution against reduced object arrays + try: + var_ndim = var.ndim + except AttributeError: + var_ndim = np.ndim(var) + if var_ndim < cnt.ndim: + # Subclasses of ndarray may ignore keepdims, so check here. + cnt = cnt.squeeze(axis) + dof = cnt - ddof + var = _divide_by_count(var, dof) + + isbad = (dof <= 0) + if np.any(isbad): + warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning, + stacklevel=3) + # NaN, inf, or negative numbers are all possible bad + # values, so explicitly replace them with NaN. + var = _copyto(var, np.nan, isbad) + return var + + +def _nanstd_dispatcher(a, axis=None, dtype=None, out=None, ddof=None, + keepdims=None, *, where=None): + return (a, out) + + +@array_function_dispatch(_nanstd_dispatcher) +def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue, + *, where=np._NoValue): + """ + Compute the standard deviation along the specified axis, while + ignoring NaNs. + + Returns the standard deviation, a measure of the spread of a + distribution, of the non-NaN array elements. The standard deviation is + computed for the flattened array by default, otherwise over the + specified axis. + + For all-NaN slices or slices with zero degrees of freedom, NaN is + returned and a `RuntimeWarning` is raised. + + .. versionadded:: 1.8.0 + + Parameters + ---------- + a : array_like + Calculate the standard deviation of the non-NaN values. + axis : {int, tuple of int, None}, optional + Axis or axes along which the standard deviation is computed. The default is + to compute the standard deviation of the flattened array. + dtype : dtype, optional + Type to use in computing the standard deviation. For arrays of + integer type the default is float64, for arrays of float types it + is the same as the array type. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output but the type (of the + calculated values) will be cast if necessary. + ddof : int, optional + Means Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of non-NaN + elements. By default `ddof` is zero. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `a`. + + If this value is anything but the default it is passed through + as-is to the relevant functions of the sub-classes. If these + functions do not have a `keepdims` kwarg, a RuntimeError will + be raised. + where : array_like of bool, optional + Elements to include in the standard deviation. + See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.22.0 + + Returns + ------- + standard_deviation : ndarray, see dtype parameter above. + If `out` is None, return a new array containing the standard + deviation, otherwise return a reference to the output array. If + ddof is >= the number of non-NaN elements in a slice or the slice + contains only NaNs, then the result for that slice is NaN. + + See Also + -------- + var, mean, std + nanvar, nanmean + :ref:`ufuncs-output-type` + + Notes + ----- + The standard deviation is the square root of the average of the squared + deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``. + + The average squared deviation is normally calculated as + ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is + specified, the divisor ``N - ddof`` is used instead. In standard + statistical practice, ``ddof=1`` provides an unbiased estimator of the + variance of the infinite population. ``ddof=0`` provides a maximum + likelihood estimate of the variance for normally distributed variables. + The standard deviation computed in this function is the square root of + the estimated variance, so even with ``ddof=1``, it will not be an + unbiased estimate of the standard deviation per se. + + Note that, for complex numbers, `std` takes the absolute value before + squaring, so that the result is always real and nonnegative. + + For floating-point input, the *std* is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for float32 (see example + below). Specifying a higher-accuracy accumulator using the `dtype` + keyword can alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.nanstd(a) + 1.247219128924647 + >>> np.nanstd(a, axis=0) + array([1., 0.]) + >>> np.nanstd(a, axis=1) + array([0., 0.5]) # may vary + + """ + var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims, where=where) + if isinstance(var, np.ndarray): + std = np.sqrt(var, out=var) + elif hasattr(var, 'dtype'): + std = var.dtype.type(np.sqrt(var)) + else: + std = np.sqrt(var) + return std diff --git a/.local/lib/python3.8/site-packages/numpy/lib/nanfunctions.pyi b/.local/lib/python3.8/site-packages/numpy/lib/nanfunctions.pyi new file mode 100644 index 0000000..54b4a7e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/nanfunctions.pyi @@ -0,0 +1,40 @@ +from typing import List + +from numpy.core.fromnumeric import ( + amin, + amax, + argmin, + argmax, + sum, + prod, + cumsum, + cumprod, + mean, + var, + std +) + +from numpy.lib.function_base import ( + median, + percentile, + quantile, +) + +__all__: List[str] + +# NOTE: In reaility these functions are not aliases but distinct functions +# with identical signatures. +nanmin = amin +nanmax = amax +nanargmin = argmin +nanargmax = argmax +nansum = sum +nanprod = prod +nancumsum = cumsum +nancumprod = cumprod +nanmean = mean +nanvar = var +nanstd = std +nanmedian = median +nanpercentile = percentile +nanquantile = quantile diff --git a/.local/lib/python3.8/site-packages/numpy/lib/npyio.py b/.local/lib/python3.8/site-packages/numpy/lib/npyio.py new file mode 100644 index 0000000..6c34e95 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/npyio.py @@ -0,0 +1,2380 @@ +import os +import re +import functools +import itertools +import warnings +import weakref +import contextlib +from operator import itemgetter, index as opindex, methodcaller +from collections.abc import Mapping + +import numpy as np +from . import format +from ._datasource import DataSource +from numpy.core import overrides +from numpy.core.multiarray import packbits, unpackbits +from numpy.core.overrides import set_array_function_like_doc, set_module +from ._iotools import ( + LineSplitter, NameValidator, StringConverter, ConverterError, + ConverterLockError, ConversionWarning, _is_string_like, + has_nested_fields, flatten_dtype, easy_dtype, _decode_line + ) + +from numpy.compat import ( + asbytes, asstr, asunicode, os_fspath, os_PathLike, + pickle + ) + + +__all__ = [ + 'savetxt', 'loadtxt', 'genfromtxt', + 'recfromtxt', 'recfromcsv', 'load', 'save', 'savez', + 'savez_compressed', 'packbits', 'unpackbits', 'fromregex', 'DataSource' + ] + + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + + +class BagObj: + """ + BagObj(obj) + + Convert attribute look-ups to getitems on the object passed in. + + Parameters + ---------- + obj : class instance + Object on which attribute look-up is performed. + + Examples + -------- + >>> from numpy.lib.npyio import BagObj as BO + >>> class BagDemo: + ... def __getitem__(self, key): # An instance of BagObj(BagDemo) + ... # will call this method when any + ... # attribute look-up is required + ... result = "Doesn't matter what you want, " + ... return result + "you're gonna get this" + ... + >>> demo_obj = BagDemo() + >>> bagobj = BO(demo_obj) + >>> bagobj.hello_there + "Doesn't matter what you want, you're gonna get this" + >>> bagobj.I_can_be_anything + "Doesn't matter what you want, you're gonna get this" + + """ + + def __init__(self, obj): + # Use weakref to make NpzFile objects collectable by refcount + self._obj = weakref.proxy(obj) + + def __getattribute__(self, key): + try: + return object.__getattribute__(self, '_obj')[key] + except KeyError: + raise AttributeError(key) from None + + def __dir__(self): + """ + Enables dir(bagobj) to list the files in an NpzFile. + + This also enables tab-completion in an interpreter or IPython. + """ + return list(object.__getattribute__(self, '_obj').keys()) + + +def zipfile_factory(file, *args, **kwargs): + """ + Create a ZipFile. + + Allows for Zip64, and the `file` argument can accept file, str, or + pathlib.Path objects. `args` and `kwargs` are passed to the zipfile.ZipFile + constructor. + """ + if not hasattr(file, 'read'): + file = os_fspath(file) + import zipfile + kwargs['allowZip64'] = True + return zipfile.ZipFile(file, *args, **kwargs) + + +class NpzFile(Mapping): + """ + NpzFile(fid) + + A dictionary-like object with lazy-loading of files in the zipped + archive provided on construction. + + `NpzFile` is used to load files in the NumPy ``.npz`` data archive + format. It assumes that files in the archive have a ``.npy`` extension, + other files are ignored. + + The arrays and file strings are lazily loaded on either + getitem access using ``obj['key']`` or attribute lookup using + ``obj.f.key``. A list of all files (without ``.npy`` extensions) can + be obtained with ``obj.files`` and the ZipFile object itself using + ``obj.zip``. + + Attributes + ---------- + files : list of str + List of all files in the archive with a ``.npy`` extension. + zip : ZipFile instance + The ZipFile object initialized with the zipped archive. + f : BagObj instance + An object on which attribute can be performed as an alternative + to getitem access on the `NpzFile` instance itself. + allow_pickle : bool, optional + Allow loading pickled data. Default: False + + .. versionchanged:: 1.16.3 + Made default False in response to CVE-2019-6446. + + pickle_kwargs : dict, optional + Additional keyword arguments to pass on to pickle.load. + These are only useful when loading object arrays saved on + Python 2 when using Python 3. + + Parameters + ---------- + fid : file or str + The zipped archive to open. This is either a file-like object + or a string containing the path to the archive. + own_fid : bool, optional + Whether NpzFile should close the file handle. + Requires that `fid` is a file-like object. + + Examples + -------- + >>> from tempfile import TemporaryFile + >>> outfile = TemporaryFile() + >>> x = np.arange(10) + >>> y = np.sin(x) + >>> np.savez(outfile, x=x, y=y) + >>> _ = outfile.seek(0) + + >>> npz = np.load(outfile) + >>> isinstance(npz, np.lib.io.NpzFile) + True + >>> sorted(npz.files) + ['x', 'y'] + >>> npz['x'] # getitem access + array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + >>> npz.f.x # attribute lookup + array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + + """ + # Make __exit__ safe if zipfile_factory raises an exception + zip = None + fid = None + + def __init__(self, fid, own_fid=False, allow_pickle=False, + pickle_kwargs=None): + # Import is postponed to here since zipfile depends on gzip, an + # optional component of the so-called standard library. + _zip = zipfile_factory(fid) + self._files = _zip.namelist() + self.files = [] + self.allow_pickle = allow_pickle + self.pickle_kwargs = pickle_kwargs + for x in self._files: + if x.endswith('.npy'): + self.files.append(x[:-4]) + else: + self.files.append(x) + self.zip = _zip + self.f = BagObj(self) + if own_fid: + self.fid = fid + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def close(self): + """ + Close the file. + + """ + if self.zip is not None: + self.zip.close() + self.zip = None + if self.fid is not None: + self.fid.close() + self.fid = None + self.f = None # break reference cycle + + def __del__(self): + self.close() + + # Implement the Mapping ABC + def __iter__(self): + return iter(self.files) + + def __len__(self): + return len(self.files) + + def __getitem__(self, key): + # FIXME: This seems like it will copy strings around + # more than is strictly necessary. The zipfile + # will read the string and then + # the format.read_array will copy the string + # to another place in memory. + # It would be better if the zipfile could read + # (or at least uncompress) the data + # directly into the array memory. + member = False + if key in self._files: + member = True + elif key in self.files: + member = True + key += '.npy' + if member: + bytes = self.zip.open(key) + magic = bytes.read(len(format.MAGIC_PREFIX)) + bytes.close() + if magic == format.MAGIC_PREFIX: + bytes = self.zip.open(key) + return format.read_array(bytes, + allow_pickle=self.allow_pickle, + pickle_kwargs=self.pickle_kwargs) + else: + return self.zip.read(key) + else: + raise KeyError("%s is not a file in the archive" % key) + + # deprecate the python 2 dict apis that we supported by accident in + # python 3. We forgot to implement itervalues() at all in earlier + # versions of numpy, so no need to deprecated it here. + + def iteritems(self): + # Numpy 1.15, 2018-02-20 + warnings.warn( + "NpzFile.iteritems is deprecated in python 3, to match the " + "removal of dict.itertems. Use .items() instead.", + DeprecationWarning, stacklevel=2) + return self.items() + + def iterkeys(self): + # Numpy 1.15, 2018-02-20 + warnings.warn( + "NpzFile.iterkeys is deprecated in python 3, to match the " + "removal of dict.iterkeys. Use .keys() instead.", + DeprecationWarning, stacklevel=2) + return self.keys() + + +@set_module('numpy') +def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, + encoding='ASCII'): + """ + Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. + + .. warning:: Loading files that contain object arrays uses the ``pickle`` + module, which is not secure against erroneous or maliciously + constructed data. Consider passing ``allow_pickle=False`` to + load data that is known not to contain object arrays for the + safer handling of untrusted sources. + + Parameters + ---------- + file : file-like object, string, or pathlib.Path + The file to read. File-like objects must support the + ``seek()`` and ``read()`` methods. Pickled files require that the + file-like object support the ``readline()`` method as well. + mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional + If not None, then memory-map the file, using the given mode (see + `numpy.memmap` for a detailed description of the modes). A + memory-mapped array is kept on disk. However, it can be accessed + and sliced like any ndarray. Memory mapping is especially useful + for accessing small fragments of large files without reading the + entire file into memory. + allow_pickle : bool, optional + Allow loading pickled object arrays stored in npy files. Reasons for + disallowing pickles include security, as loading pickled data can + execute arbitrary code. If pickles are disallowed, loading object + arrays will fail. Default: False + + .. versionchanged:: 1.16.3 + Made default False in response to CVE-2019-6446. + + fix_imports : bool, optional + Only useful when loading Python 2 generated pickled files on Python 3, + which includes npy/npz files containing object arrays. If `fix_imports` + is True, pickle will try to map the old Python 2 names to the new names + used in Python 3. + encoding : str, optional + What encoding to use when reading Python 2 strings. Only useful when + loading Python 2 generated pickled files in Python 3, which includes + npy/npz files containing object arrays. Values other than 'latin1', + 'ASCII', and 'bytes' are not allowed, as they can corrupt numerical + data. Default: 'ASCII' + + Returns + ------- + result : array, tuple, dict, etc. + Data stored in the file. For ``.npz`` files, the returned instance + of NpzFile class must be closed to avoid leaking file descriptors. + + Raises + ------ + OSError + If the input file does not exist or cannot be read. + UnpicklingError + If ``allow_pickle=True``, but the file cannot be loaded as a pickle. + ValueError + The file contains an object array, but ``allow_pickle=False`` given. + + See Also + -------- + save, savez, savez_compressed, loadtxt + memmap : Create a memory-map to an array stored in a file on disk. + lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file. + + Notes + ----- + - If the file contains pickle data, then whatever object is stored + in the pickle is returned. + - If the file is a ``.npy`` file, then a single array is returned. + - If the file is a ``.npz`` file, then a dictionary-like object is + returned, containing ``{filename: array}`` key-value pairs, one for + each file in the archive. + - If the file is a ``.npz`` file, the returned value supports the + context manager protocol in a similar fashion to the open function:: + + with load('foo.npz') as data: + a = data['a'] + + The underlying file descriptor is closed when exiting the 'with' + block. + + Examples + -------- + Store data to disk, and load it again: + + >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]])) + >>> np.load('/tmp/123.npy') + array([[1, 2, 3], + [4, 5, 6]]) + + Store compressed data to disk, and load it again: + + >>> a=np.array([[1, 2, 3], [4, 5, 6]]) + >>> b=np.array([1, 2]) + >>> np.savez('/tmp/123.npz', a=a, b=b) + >>> data = np.load('/tmp/123.npz') + >>> data['a'] + array([[1, 2, 3], + [4, 5, 6]]) + >>> data['b'] + array([1, 2]) + >>> data.close() + + Mem-map the stored array, and then access the second row + directly from disk: + + >>> X = np.load('/tmp/123.npy', mmap_mode='r') + >>> X[1, :] + memmap([4, 5, 6]) + + """ + if encoding not in ('ASCII', 'latin1', 'bytes'): + # The 'encoding' value for pickle also affects what encoding + # the serialized binary data of NumPy arrays is loaded + # in. Pickle does not pass on the encoding information to + # NumPy. The unpickling code in numpy.core.multiarray is + # written to assume that unicode data appearing where binary + # should be is in 'latin1'. 'bytes' is also safe, as is 'ASCII'. + # + # Other encoding values can corrupt binary data, and we + # purposefully disallow them. For the same reason, the errors= + # argument is not exposed, as values other than 'strict' + # result can similarly silently corrupt numerical data. + raise ValueError("encoding must be 'ASCII', 'latin1', or 'bytes'") + + pickle_kwargs = dict(encoding=encoding, fix_imports=fix_imports) + + with contextlib.ExitStack() as stack: + if hasattr(file, 'read'): + fid = file + own_fid = False + else: + fid = stack.enter_context(open(os_fspath(file), "rb")) + own_fid = True + + # Code to distinguish from NumPy binary files and pickles. + _ZIP_PREFIX = b'PK\x03\x04' + _ZIP_SUFFIX = b'PK\x05\x06' # empty zip files start with this + N = len(format.MAGIC_PREFIX) + magic = fid.read(N) + # If the file size is less than N, we need to make sure not + # to seek past the beginning of the file + fid.seek(-min(N, len(magic)), 1) # back-up + if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX): + # zip-file (assume .npz) + # Potentially transfer file ownership to NpzFile + stack.pop_all() + ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle, + pickle_kwargs=pickle_kwargs) + return ret + elif magic == format.MAGIC_PREFIX: + # .npy file + if mmap_mode: + return format.open_memmap(file, mode=mmap_mode) + else: + return format.read_array(fid, allow_pickle=allow_pickle, + pickle_kwargs=pickle_kwargs) + else: + # Try a pickle + if not allow_pickle: + raise ValueError("Cannot load file containing pickled data " + "when allow_pickle=False") + try: + return pickle.load(fid, **pickle_kwargs) + except Exception as e: + raise pickle.UnpicklingError( + f"Failed to interpret file {file!r} as a pickle") from e + + +def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None): + return (arr,) + + +@array_function_dispatch(_save_dispatcher) +def save(file, arr, allow_pickle=True, fix_imports=True): + """ + Save an array to a binary file in NumPy ``.npy`` format. + + Parameters + ---------- + file : file, str, or pathlib.Path + File or filename to which the data is saved. If file is a file-object, + then the filename is unchanged. If file is a string or Path, a ``.npy`` + extension will be appended to the filename if it does not already + have one. + arr : array_like + Array data to be saved. + allow_pickle : bool, optional + Allow saving object arrays using Python pickles. Reasons for disallowing + pickles include security (loading pickled data can execute arbitrary + code) and portability (pickled objects may not be loadable on different + Python installations, for example if the stored objects require libraries + that are not available, and not all pickled data is compatible between + Python 2 and Python 3). + Default: True + fix_imports : bool, optional + Only useful in forcing objects in object arrays on Python 3 to be + pickled in a Python 2 compatible way. If `fix_imports` is True, pickle + will try to map the new Python 3 names to the old module names used in + Python 2, so that the pickle data stream is readable with Python 2. + + See Also + -------- + savez : Save several arrays into a ``.npz`` archive + savetxt, load + + Notes + ----- + For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format`. + + Any data saved to the file is appended to the end of the file. + + Examples + -------- + >>> from tempfile import TemporaryFile + >>> outfile = TemporaryFile() + + >>> x = np.arange(10) + >>> np.save(outfile, x) + + >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file + >>> np.load(outfile) + array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + + + >>> with open('test.npy', 'wb') as f: + ... np.save(f, np.array([1, 2])) + ... np.save(f, np.array([1, 3])) + >>> with open('test.npy', 'rb') as f: + ... a = np.load(f) + ... b = np.load(f) + >>> print(a, b) + # [1 2] [1 3] + """ + if hasattr(file, 'write'): + file_ctx = contextlib.nullcontext(file) + else: + file = os_fspath(file) + if not file.endswith('.npy'): + file = file + '.npy' + file_ctx = open(file, "wb") + + with file_ctx as fid: + arr = np.asanyarray(arr) + format.write_array(fid, arr, allow_pickle=allow_pickle, + pickle_kwargs=dict(fix_imports=fix_imports)) + + +def _savez_dispatcher(file, *args, **kwds): + yield from args + yield from kwds.values() + + +@array_function_dispatch(_savez_dispatcher) +def savez(file, *args, **kwds): + """Save several arrays into a single file in uncompressed ``.npz`` format. + + Provide arrays as keyword arguments to store them under the + corresponding name in the output file: ``savez(fn, x=x, y=y)``. + + If arrays are specified as positional arguments, i.e., ``savez(fn, + x, y)``, their names will be `arr_0`, `arr_1`, etc. + + Parameters + ---------- + file : str or file + Either the filename (string) or an open file (file-like object) + where the data will be saved. If file is a string or a Path, the + ``.npz`` extension will be appended to the filename if it is not + already there. + args : Arguments, optional + Arrays to save to the file. Please use keyword arguments (see + `kwds` below) to assign names to arrays. Arrays specified as + args will be named "arr_0", "arr_1", and so on. + kwds : Keyword arguments, optional + Arrays to save to the file. Each array will be saved to the + output file with its corresponding keyword name. + + Returns + ------- + None + + See Also + -------- + save : Save a single array to a binary file in NumPy format. + savetxt : Save an array to a file as plain text. + savez_compressed : Save several arrays into a compressed ``.npz`` archive + + Notes + ----- + The ``.npz`` file format is a zipped archive of files named after the + variables they contain. The archive is not compressed and each file + in the archive contains one variable in ``.npy`` format. For a + description of the ``.npy`` format, see :py:mod:`numpy.lib.format`. + + When opening the saved ``.npz`` file with `load` a `NpzFile` object is + returned. This is a dictionary-like object which can be queried for + its list of arrays (with the ``.files`` attribute), and for the arrays + themselves. + + Keys passed in `kwds` are used as filenames inside the ZIP archive. + Therefore, keys should be valid filenames; e.g., avoid keys that begin with + ``/`` or contain ``.``. + + When naming variables with keyword arguments, it is not possible to name a + variable ``file``, as this would cause the ``file`` argument to be defined + twice in the call to ``savez``. + + Examples + -------- + >>> from tempfile import TemporaryFile + >>> outfile = TemporaryFile() + >>> x = np.arange(10) + >>> y = np.sin(x) + + Using `savez` with \\*args, the arrays are saved with default names. + + >>> np.savez(outfile, x, y) + >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file + >>> npzfile = np.load(outfile) + >>> npzfile.files + ['arr_0', 'arr_1'] + >>> npzfile['arr_0'] + array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + + Using `savez` with \\**kwds, the arrays are saved with the keyword names. + + >>> outfile = TemporaryFile() + >>> np.savez(outfile, x=x, y=y) + >>> _ = outfile.seek(0) + >>> npzfile = np.load(outfile) + >>> sorted(npzfile.files) + ['x', 'y'] + >>> npzfile['x'] + array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + + """ + _savez(file, args, kwds, False) + + +def _savez_compressed_dispatcher(file, *args, **kwds): + yield from args + yield from kwds.values() + + +@array_function_dispatch(_savez_compressed_dispatcher) +def savez_compressed(file, *args, **kwds): + """ + Save several arrays into a single file in compressed ``.npz`` format. + + Provide arrays as keyword arguments to store them under the + corresponding name in the output file: ``savez(fn, x=x, y=y)``. + + If arrays are specified as positional arguments, i.e., ``savez(fn, + x, y)``, their names will be `arr_0`, `arr_1`, etc. + + Parameters + ---------- + file : str or file + Either the filename (string) or an open file (file-like object) + where the data will be saved. If file is a string or a Path, the + ``.npz`` extension will be appended to the filename if it is not + already there. + args : Arguments, optional + Arrays to save to the file. Please use keyword arguments (see + `kwds` below) to assign names to arrays. Arrays specified as + args will be named "arr_0", "arr_1", and so on. + kwds : Keyword arguments, optional + Arrays to save to the file. Each array will be saved to the + output file with its corresponding keyword name. + + Returns + ------- + None + + See Also + -------- + numpy.save : Save a single array to a binary file in NumPy format. + numpy.savetxt : Save an array to a file as plain text. + numpy.savez : Save several arrays into an uncompressed ``.npz`` file format + numpy.load : Load the files created by savez_compressed. + + Notes + ----- + The ``.npz`` file format is a zipped archive of files named after the + variables they contain. The archive is compressed with + ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable + in ``.npy`` format. For a description of the ``.npy`` format, see + :py:mod:`numpy.lib.format`. + + + When opening the saved ``.npz`` file with `load` a `NpzFile` object is + returned. This is a dictionary-like object which can be queried for + its list of arrays (with the ``.files`` attribute), and for the arrays + themselves. + + Examples + -------- + >>> test_array = np.random.rand(3, 2) + >>> test_vector = np.random.rand(4) + >>> np.savez_compressed('/tmp/123', a=test_array, b=test_vector) + >>> loaded = np.load('/tmp/123.npz') + >>> print(np.array_equal(test_array, loaded['a'])) + True + >>> print(np.array_equal(test_vector, loaded['b'])) + True + + """ + _savez(file, args, kwds, True) + + +def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): + # Import is postponed to here since zipfile depends on gzip, an optional + # component of the so-called standard library. + import zipfile + + if not hasattr(file, 'write'): + file = os_fspath(file) + if not file.endswith('.npz'): + file = file + '.npz' + + namedict = kwds + for i, val in enumerate(args): + key = 'arr_%d' % i + if key in namedict.keys(): + raise ValueError( + "Cannot use un-named variables and keyword %s" % key) + namedict[key] = val + + if compress: + compression = zipfile.ZIP_DEFLATED + else: + compression = zipfile.ZIP_STORED + + zipf = zipfile_factory(file, mode="w", compression=compression) + + for key, val in namedict.items(): + fname = key + '.npy' + val = np.asanyarray(val) + # always force zip64, gh-10776 + with zipf.open(fname, 'w', force_zip64=True) as fid: + format.write_array(fid, val, + allow_pickle=allow_pickle, + pickle_kwargs=pickle_kwargs) + + zipf.close() + + +def _floatconv(x): + try: + return float(x) # The fastest path. + except ValueError: + if '0x' in x: # Don't accidentally convert "a" ("0xa") to 10. + try: + return float.fromhex(x) + except ValueError: + pass + raise # Raise the original exception, which makes more sense. + + +_CONVERTERS = [ # These converters only ever get strs (not bytes) as input. + (np.bool_, lambda x: bool(int(x))), + (np.uint64, np.uint64), + (np.int64, np.int64), + (np.integer, lambda x: int(float(x))), + (np.longdouble, np.longdouble), + (np.floating, _floatconv), + (complex, lambda x: complex(x.replace('+-', '-'))), + (np.bytes_, methodcaller('encode', 'latin-1')), + (np.unicode_, str), +] + + +def _getconv(dtype): + """ + Find the correct dtype converter. Adapted from matplotlib. + + Even when a lambda is returned, it is defined at the toplevel, to allow + testing for equality and enabling optimization for single-type data. + """ + for base, conv in _CONVERTERS: + if issubclass(dtype.type, base): + return conv + return str + + +# _loadtxt_flatten_dtype_internal and _loadtxt_pack_items are loadtxt helpers +# lifted to the toplevel because recursive inner functions cause either +# GC-dependent reference loops (because they are closures over loadtxt's +# internal variables) or large overheads if using a manual trampoline to hide +# the recursive calls. + + +# not to be confused with the flatten_dtype we import... +def _loadtxt_flatten_dtype_internal(dt): + """Unpack a structured data-type, and produce a packer function.""" + if dt.names is None: + # If the dtype is flattened, return. + # If the dtype has a shape, the dtype occurs + # in the list more than once. + shape = dt.shape + if len(shape) == 0: + return ([dt.base], None) + else: + packing = [(shape[-1], list)] + if len(shape) > 1: + for dim in dt.shape[-2::-1]: + packing = [(dim*packing[0][0], packing*dim)] + return ([dt.base] * int(np.prod(dt.shape)), + functools.partial(_loadtxt_pack_items, packing)) + else: + types = [] + packing = [] + for field in dt.names: + tp, bytes = dt.fields[field] + flat_dt, flat_packer = _loadtxt_flatten_dtype_internal(tp) + types.extend(flat_dt) + flat_packing = flat_packer.args[0] if flat_packer else None + # Avoid extra nesting for subarrays + if tp.ndim > 0: + packing.extend(flat_packing) + else: + packing.append((len(flat_dt), flat_packing)) + return (types, functools.partial(_loadtxt_pack_items, packing)) + + +def _loadtxt_pack_items(packing, items): + """Pack items into nested lists based on re-packing info.""" + if packing is None: + return items[0] + elif packing is tuple: + return tuple(items) + elif packing is list: + return list(items) + else: + start = 0 + ret = [] + for length, subpacking in packing: + ret.append( + _loadtxt_pack_items(subpacking, items[start:start+length])) + start += length + return tuple(ret) + + +# amount of lines loadtxt reads in one chunk, can be overridden for testing +_loadtxt_chunksize = 50000 + + +def _loadtxt_dispatcher(fname, dtype=None, comments=None, delimiter=None, + converters=None, skiprows=None, usecols=None, unpack=None, + ndmin=None, encoding=None, max_rows=None, *, like=None): + return (like,) + + +@set_array_function_like_doc +@set_module('numpy') +def loadtxt(fname, dtype=float, comments='#', delimiter=None, + converters=None, skiprows=0, usecols=None, unpack=False, + ndmin=0, encoding='bytes', max_rows=None, *, like=None): + r""" + Load data from a text file. + + Each row in the text file must have the same number of values. + + Parameters + ---------- + fname : file, str, pathlib.Path, list of str, generator + File, filename, list, or generator to read. If the filename + extension is ``.gz`` or ``.bz2``, the file is first decompressed. Note + that generators must return bytes or strings. The strings + in a list or produced by a generator are treated as lines. + dtype : data-type, optional + Data-type of the resulting array; default: float. If this is a + structured data-type, the resulting array will be 1-dimensional, and + each row will be interpreted as an element of the array. In this + case, the number of columns used must match the number of fields in + the data-type. + comments : str or sequence of str, optional + The characters or list of characters used to indicate the start of a + comment. None implies no comments. For backwards compatibility, byte + strings will be decoded as 'latin1'. The default is '#'. + delimiter : str, optional + The string used to separate values. For backwards compatibility, byte + strings will be decoded as 'latin1'. The default is whitespace. + converters : dict, optional + A dictionary mapping column number to a function that will parse the + column string into the desired value. E.g., if column 0 is a date + string: ``converters = {0: datestr2num}``. Converters can also be + used to provide a default value for missing data (but see also + `genfromtxt`): ``converters = {3: lambda s: float(s.strip() or 0)}``. + Default: None. + skiprows : int, optional + Skip the first `skiprows` lines, including comments; default: 0. + usecols : int or sequence, optional + Which columns to read, with 0 being the first. For example, + ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns. + The default, None, results in all columns being read. + + .. versionchanged:: 1.11.0 + When a single column has to be read it is possible to use + an integer instead of a tuple. E.g ``usecols = 3`` reads the + fourth column the same way as ``usecols = (3,)`` would. + unpack : bool, optional + If True, the returned array is transposed, so that arguments may be + unpacked using ``x, y, z = loadtxt(...)``. When used with a + structured data-type, arrays are returned for each field. + Default is False. + ndmin : int, optional + The returned array will have at least `ndmin` dimensions. + Otherwise mono-dimensional axes will be squeezed. + Legal values: 0 (default), 1 or 2. + + .. versionadded:: 1.6.0 + encoding : str, optional + Encoding used to decode the inputfile. Does not apply to input streams. + The special value 'bytes' enables backward compatibility workarounds + that ensures you receive byte arrays as results if possible and passes + 'latin1' encoded strings to converters. Override this value to receive + unicode arrays and pass strings as input to converters. If set to None + the system default is used. The default value is 'bytes'. + + .. versionadded:: 1.14.0 + max_rows : int, optional + Read `max_rows` lines of content after `skiprows` lines. The default + is to read all the lines. + + .. versionadded:: 1.16.0 + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + out : ndarray + Data read from the text file. + + See Also + -------- + load, fromstring, fromregex + genfromtxt : Load data with missing values handled as specified. + scipy.io.loadmat : reads MATLAB data files + + Notes + ----- + This function aims to be a fast reader for simply formatted files. The + `genfromtxt` function provides more sophisticated handling of, e.g., + lines with missing values. + + .. versionadded:: 1.10.0 + + The strings produced by the Python float.hex method can be used as + input for floats. + + Examples + -------- + >>> from io import StringIO # StringIO behaves like a file object + >>> c = StringIO("0 1\n2 3") + >>> np.loadtxt(c) + array([[0., 1.], + [2., 3.]]) + + >>> d = StringIO("M 21 72\nF 35 58") + >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'), + ... 'formats': ('S1', 'i4', 'f4')}) + array([(b'M', 21, 72.), (b'F', 35, 58.)], + dtype=[('gender', 'S1'), ('age', '>> c = StringIO("1,0,2\n3,0,4") + >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True) + >>> x + array([1., 3.]) + >>> y + array([2., 4.]) + + This example shows how `converters` can be used to convert a field + with a trailing minus sign into a negative number. + + >>> s = StringIO('10.01 31.25-\n19.22 64.31\n17.57- 63.94') + >>> def conv(fld): + ... return -float(fld[:-1]) if fld.endswith(b'-') else float(fld) + ... + >>> np.loadtxt(s, converters={0: conv, 1: conv}) + array([[ 10.01, -31.25], + [ 19.22, 64.31], + [-17.57, 63.94]]) + """ + + if like is not None: + return _loadtxt_with_like( + fname, dtype=dtype, comments=comments, delimiter=delimiter, + converters=converters, skiprows=skiprows, usecols=usecols, + unpack=unpack, ndmin=ndmin, encoding=encoding, + max_rows=max_rows, like=like + ) + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Nested functions used by loadtxt. + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + def split_line(line: str): + """Chop off comments, strip, and split at delimiter.""" + for comment in comments: # Much faster than using a single regex. + line = line.split(comment, 1)[0] + line = line.strip('\r\n') + return line.split(delimiter) if line else [] + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Main body of loadtxt. + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + # Check correctness of the values of `ndmin` + if ndmin not in [0, 1, 2]: + raise ValueError('Illegal value of ndmin keyword: %s' % ndmin) + + # Type conversions for Py3 convenience + if comments is not None: + if isinstance(comments, (str, bytes)): + comments = [comments] + comments = [_decode_line(x) for x in comments] + else: + comments = [] + + if delimiter is not None: + delimiter = _decode_line(delimiter) + + user_converters = converters + + byte_converters = False + if encoding == 'bytes': + encoding = None + byte_converters = True + + if usecols is not None: + # Copy usecols, allowing it to be a single int or a sequence of ints. + try: + usecols = list(usecols) + except TypeError: + usecols = [usecols] + for i, col_idx in enumerate(usecols): + try: + usecols[i] = opindex(col_idx) # Cast to builtin int now. + except TypeError as e: + e.args = ( + "usecols must be an int or a sequence of ints but " + "it contains at least one element of type %s" % + type(col_idx), + ) + raise + if len(usecols) > 1: + usecols_getter = itemgetter(*usecols) + else: + # Get an iterable back, even if using a single column. + usecols_getter = lambda obj, c=usecols[0]: [obj[c]] + else: + usecols_getter = None + + # Make sure we're dealing with a proper dtype + dtype = np.dtype(dtype) + defconv = _getconv(dtype) + + dtype_types, packer = _loadtxt_flatten_dtype_internal(dtype) + + fh_closing_ctx = contextlib.nullcontext() + try: + if isinstance(fname, os_PathLike): + fname = os_fspath(fname) + if _is_string_like(fname): + fh = np.lib._datasource.open(fname, 'rt', encoding=encoding) + fencoding = getattr(fh, 'encoding', 'latin1') + line_iter = iter(fh) + fh_closing_ctx = contextlib.closing(fh) + else: + line_iter = iter(fname) + fencoding = getattr(fname, 'encoding', 'latin1') + try: + first_line = next(line_iter) + except StopIteration: + pass # Nothing matters if line_iter is empty. + else: + # Put first_line back. + line_iter = itertools.chain([first_line], line_iter) + if isinstance(first_line, bytes): + # Using latin1 matches _decode_line's behavior. + decoder = methodcaller( + "decode", + encoding if encoding is not None else "latin1") + line_iter = map(decoder, line_iter) + except TypeError as e: + raise ValueError( + f"fname must be a string, filehandle, list of strings,\n" + f"or generator. Got {type(fname)} instead." + ) from e + + with fh_closing_ctx: + + # input may be a python2 io stream + if encoding is not None: + fencoding = encoding + # we must assume local encoding + # TODO emit portability warning? + elif fencoding is None: + import locale + fencoding = locale.getpreferredencoding() + + # Skip the first `skiprows` lines + for i in range(skiprows): + next(line_iter) + + # Read until we find a line with some values, and use it to determine + # the need for decoding and estimate the number of columns. + for first_line in line_iter: + ncols = len(usecols or split_line(first_line)) + if ncols: + # Put first_line back. + line_iter = itertools.chain([first_line], line_iter) + break + else: # End of lines reached + ncols = len(usecols or []) + warnings.warn('loadtxt: Empty input file: "%s"' % fname, + stacklevel=2) + + line_iter = itertools.islice(line_iter, max_rows) + lineno_words_iter = filter( + itemgetter(1), # item[1] is words; filter skips empty lines. + enumerate(map(split_line, line_iter), 1 + skiprows)) + + # Now that we know ncols, create the default converters list, and + # set packing, if necessary. + if len(dtype_types) > 1: + # We're dealing with a structured array, each field of + # the dtype matches a column + converters = [_getconv(dt) for dt in dtype_types] + else: + # All fields have the same dtype; use specialized packers which are + # much faster than those using _loadtxt_pack_items. + converters = [defconv for i in range(ncols)] + if ncols == 1: + packer = itemgetter(0) + else: + def packer(row): return row + + # By preference, use the converters specified by the user + for i, conv in (user_converters or {}).items(): + if usecols: + try: + i = usecols.index(i) + except ValueError: + # Unused converter specified + continue + if byte_converters: + # converters may use decode to workaround numpy's old + # behaviour, so encode the string again (converters are only + # called with strings) before passing to the user converter. + def tobytes_first(conv, x): + return conv(x.encode("latin1")) + converters[i] = functools.partial(tobytes_first, conv) + else: + converters[i] = conv + + fencode = methodcaller("encode", fencoding) + converters = [conv if conv is not bytes else fencode + for conv in converters] + if len(set(converters)) == 1: + # Optimize single-type data. Note that this is only reached if + # `_getconv` returns equal callables (i.e. not local lambdas) on + # equal dtypes. + def convert_row(vals, _conv=converters[0]): + return [*map(_conv, vals)] + else: + def convert_row(vals): + return [conv(val) for conv, val in zip(converters, vals)] + + # read data in chunks and fill it into an array via resize + # over-allocating and shrinking the array later may be faster but is + # probably not relevant compared to the cost of actually reading and + # converting the data + X = None + while True: + chunk = [] + for lineno, words in itertools.islice( + lineno_words_iter, _loadtxt_chunksize): + if usecols_getter is not None: + words = usecols_getter(words) + elif len(words) != ncols: + raise ValueError( + f"Wrong number of columns at line {lineno}") + # Convert each value according to its column, then pack it + # according to the dtype's nesting, and store it. + chunk.append(packer(convert_row(words))) + if not chunk: # The islice is empty, i.e. we're done. + break + + if X is None: + X = np.array(chunk, dtype) + else: + nshape = list(X.shape) + pos = nshape[0] + nshape[0] += len(chunk) + X.resize(nshape, refcheck=False) + X[pos:, ...] = chunk + + if X is None: + X = np.array([], dtype) + + # Multicolumn data are returned with shape (1, N, M), i.e. + # (1, 1, M) for a single row - remove the singleton dimension there + if X.ndim == 3 and X.shape[:2] == (1, 1): + X.shape = (1, -1) + + # Verify that the array has at least dimensions `ndmin`. + # Tweak the size and shape of the arrays - remove extraneous dimensions + if X.ndim > ndmin: + X = np.squeeze(X) + # and ensure we have the minimum number of dimensions asked for + # - has to be in this order for the odd case ndmin=1, X.squeeze().ndim=0 + if X.ndim < ndmin: + if ndmin == 1: + X = np.atleast_1d(X) + elif ndmin == 2: + X = np.atleast_2d(X).T + + if unpack: + if len(dtype_types) > 1: + # For structured arrays, return an array for each field. + return [X[field] for field in dtype.names] + else: + return X.T + else: + return X + + +_loadtxt_with_like = array_function_dispatch( + _loadtxt_dispatcher +)(loadtxt) + + +def _savetxt_dispatcher(fname, X, fmt=None, delimiter=None, newline=None, + header=None, footer=None, comments=None, + encoding=None): + return (X,) + + +@array_function_dispatch(_savetxt_dispatcher) +def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', + footer='', comments='# ', encoding=None): + """ + Save an array to a text file. + + Parameters + ---------- + fname : filename or file handle + If the filename ends in ``.gz``, the file is automatically saved in + compressed gzip format. `loadtxt` understands gzipped files + transparently. + X : 1D or 2D array_like + Data to be saved to a text file. + fmt : str or sequence of strs, optional + A single format (%10.5f), a sequence of formats, or a + multi-format string, e.g. 'Iteration %d -- %10.5f', in which + case `delimiter` is ignored. For complex `X`, the legal options + for `fmt` are: + + * a single specifier, `fmt='%.4e'`, resulting in numbers formatted + like `' (%s+%sj)' % (fmt, fmt)` + * a full string specifying every real and imaginary part, e.g. + `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns + * a list of specifiers, one per column - in this case, the real + and imaginary part must have separate specifiers, + e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns + delimiter : str, optional + String or character separating columns. + newline : str, optional + String or character separating lines. + + .. versionadded:: 1.5.0 + header : str, optional + String that will be written at the beginning of the file. + + .. versionadded:: 1.7.0 + footer : str, optional + String that will be written at the end of the file. + + .. versionadded:: 1.7.0 + comments : str, optional + String that will be prepended to the ``header`` and ``footer`` strings, + to mark them as comments. Default: '# ', as expected by e.g. + ``numpy.loadtxt``. + + .. versionadded:: 1.7.0 + encoding : {None, str}, optional + Encoding used to encode the outputfile. Does not apply to output + streams. If the encoding is something other than 'bytes' or 'latin1' + you will not be able to load the file in NumPy versions < 1.14. Default + is 'latin1'. + + .. versionadded:: 1.14.0 + + + See Also + -------- + save : Save an array to a binary file in NumPy ``.npy`` format + savez : Save several arrays into an uncompressed ``.npz`` archive + savez_compressed : Save several arrays into a compressed ``.npz`` archive + + Notes + ----- + Further explanation of the `fmt` parameter + (``%[flag]width[.precision]specifier``): + + flags: + ``-`` : left justify + + ``+`` : Forces to precede result with + or -. + + ``0`` : Left pad the number with zeros instead of space (see width). + + width: + Minimum number of characters to be printed. The value is not truncated + if it has more characters. + + precision: + - For integer specifiers (eg. ``d,i,o,x``), the minimum number of + digits. + - For ``e, E`` and ``f`` specifiers, the number of digits to print + after the decimal point. + - For ``g`` and ``G``, the maximum number of significant digits. + - For ``s``, the maximum number of characters. + + specifiers: + ``c`` : character + + ``d`` or ``i`` : signed decimal integer + + ``e`` or ``E`` : scientific notation with ``e`` or ``E``. + + ``f`` : decimal floating point + + ``g,G`` : use the shorter of ``e,E`` or ``f`` + + ``o`` : signed octal + + ``s`` : string of characters + + ``u`` : unsigned decimal integer + + ``x,X`` : unsigned hexadecimal integer + + This explanation of ``fmt`` is not complete, for an exhaustive + specification see [1]_. + + References + ---------- + .. [1] `Format Specification Mini-Language + `_, + Python Documentation. + + Examples + -------- + >>> x = y = z = np.arange(0.0,5.0,1.0) + >>> np.savetxt('test.out', x, delimiter=',') # X is an array + >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays + >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation + + """ + + # Py3 conversions first + if isinstance(fmt, bytes): + fmt = asstr(fmt) + delimiter = asstr(delimiter) + + class WriteWrap: + """Convert to bytes on bytestream inputs. + + """ + def __init__(self, fh, encoding): + self.fh = fh + self.encoding = encoding + self.do_write = self.first_write + + def close(self): + self.fh.close() + + def write(self, v): + self.do_write(v) + + def write_bytes(self, v): + if isinstance(v, bytes): + self.fh.write(v) + else: + self.fh.write(v.encode(self.encoding)) + + def write_normal(self, v): + self.fh.write(asunicode(v)) + + def first_write(self, v): + try: + self.write_normal(v) + self.write = self.write_normal + except TypeError: + # input is probably a bytestream + self.write_bytes(v) + self.write = self.write_bytes + + own_fh = False + if isinstance(fname, os_PathLike): + fname = os_fspath(fname) + if _is_string_like(fname): + # datasource doesn't support creating a new file ... + open(fname, 'wt').close() + fh = np.lib._datasource.open(fname, 'wt', encoding=encoding) + own_fh = True + elif hasattr(fname, 'write'): + # wrap to handle byte output streams + fh = WriteWrap(fname, encoding or 'latin1') + else: + raise ValueError('fname must be a string or file handle') + + try: + X = np.asarray(X) + + # Handle 1-dimensional arrays + if X.ndim == 0 or X.ndim > 2: + raise ValueError( + "Expected 1D or 2D array, got %dD array instead" % X.ndim) + elif X.ndim == 1: + # Common case -- 1d array of numbers + if X.dtype.names is None: + X = np.atleast_2d(X).T + ncol = 1 + + # Complex dtype -- each field indicates a separate column + else: + ncol = len(X.dtype.names) + else: + ncol = X.shape[1] + + iscomplex_X = np.iscomplexobj(X) + # `fmt` can be a string with multiple insertion points or a + # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') + if type(fmt) in (list, tuple): + if len(fmt) != ncol: + raise AttributeError('fmt has wrong shape. %s' % str(fmt)) + format = asstr(delimiter).join(map(asstr, fmt)) + elif isinstance(fmt, str): + n_fmt_chars = fmt.count('%') + error = ValueError('fmt has wrong number of %% formats: %s' % fmt) + if n_fmt_chars == 1: + if iscomplex_X: + fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol + else: + fmt = [fmt, ] * ncol + format = delimiter.join(fmt) + elif iscomplex_X and n_fmt_chars != (2 * ncol): + raise error + elif ((not iscomplex_X) and n_fmt_chars != ncol): + raise error + else: + format = fmt + else: + raise ValueError('invalid fmt: %r' % (fmt,)) + + if len(header) > 0: + header = header.replace('\n', '\n' + comments) + fh.write(comments + header + newline) + if iscomplex_X: + for row in X: + row2 = [] + for number in row: + row2.append(number.real) + row2.append(number.imag) + s = format % tuple(row2) + newline + fh.write(s.replace('+-', '-')) + else: + for row in X: + try: + v = format % tuple(row) + newline + except TypeError as e: + raise TypeError("Mismatch between array dtype ('%s') and " + "format specifier ('%s')" + % (str(X.dtype), format)) from e + fh.write(v) + + if len(footer) > 0: + footer = footer.replace('\n', '\n' + comments) + fh.write(comments + footer + newline) + finally: + if own_fh: + fh.close() + + +@set_module('numpy') +def fromregex(file, regexp, dtype, encoding=None): + r""" + Construct an array from a text file, using regular expression parsing. + + The returned array is always a structured array, and is constructed from + all matches of the regular expression in the file. Groups in the regular + expression are converted to fields of the structured array. + + Parameters + ---------- + file : path or file + Filename or file object to read. + + .. versionchanged:: 1.22.0 + Now accepts `os.PathLike` implementations. + regexp : str or regexp + Regular expression used to parse the file. + Groups in the regular expression correspond to fields in the dtype. + dtype : dtype or list of dtypes + Dtype for the structured array; must be a structured datatype. + encoding : str, optional + Encoding used to decode the inputfile. Does not apply to input streams. + + .. versionadded:: 1.14.0 + + Returns + ------- + output : ndarray + The output array, containing the part of the content of `file` that + was matched by `regexp`. `output` is always a structured array. + + Raises + ------ + TypeError + When `dtype` is not a valid dtype for a structured array. + + See Also + -------- + fromstring, loadtxt + + Notes + ----- + Dtypes for structured arrays can be specified in several forms, but all + forms specify at least the data type and field name. For details see + `basics.rec`. + + Examples + -------- + >>> from io import StringIO + >>> text = StringIO("1312 foo\n1534 bar\n444 qux") + + >>> regexp = r"(\d+)\s+(...)" # match [digits, whitespace, anything] + >>> output = np.fromregex(text, regexp, + ... [('num', np.int64), ('key', 'S3')]) + >>> output + array([(1312, b'foo'), (1534, b'bar'), ( 444, b'qux')], + dtype=[('num', '>> output['num'] + array([1312, 1534, 444]) + + """ + own_fh = False + if not hasattr(file, "read"): + file = os.fspath(file) + file = np.lib._datasource.open(file, 'rt', encoding=encoding) + own_fh = True + + try: + if not isinstance(dtype, np.dtype): + dtype = np.dtype(dtype) + if dtype.names is None: + raise TypeError('dtype must be a structured datatype.') + + content = file.read() + if isinstance(content, bytes) and isinstance(regexp, str): + regexp = asbytes(regexp) + elif isinstance(content, str) and isinstance(regexp, bytes): + regexp = asstr(regexp) + + if not hasattr(regexp, 'match'): + regexp = re.compile(regexp) + seq = regexp.findall(content) + if seq and not isinstance(seq[0], tuple): + # Only one group is in the regexp. + # Create the new array as a single data-type and then + # re-interpret as a single-field structured array. + newdtype = np.dtype(dtype[dtype.names[0]]) + output = np.array(seq, dtype=newdtype) + output.dtype = dtype + else: + output = np.array(seq, dtype=dtype) + + return output + finally: + if own_fh: + file.close() + + +#####-------------------------------------------------------------------------- +#---- --- ASCII functions --- +#####-------------------------------------------------------------------------- + + +def _genfromtxt_dispatcher(fname, dtype=None, comments=None, delimiter=None, + skip_header=None, skip_footer=None, converters=None, + missing_values=None, filling_values=None, usecols=None, + names=None, excludelist=None, deletechars=None, + replace_space=None, autostrip=None, case_sensitive=None, + defaultfmt=None, unpack=None, usemask=None, loose=None, + invalid_raise=None, max_rows=None, encoding=None, *, + like=None): + return (like,) + + +@set_array_function_like_doc +@set_module('numpy') +def genfromtxt(fname, dtype=float, comments='#', delimiter=None, + skip_header=0, skip_footer=0, converters=None, + missing_values=None, filling_values=None, usecols=None, + names=None, excludelist=None, + deletechars=''.join(sorted(NameValidator.defaultdeletechars)), + replace_space='_', autostrip=False, case_sensitive=True, + defaultfmt="f%i", unpack=None, usemask=False, loose=True, + invalid_raise=True, max_rows=None, encoding='bytes', *, + like=None): + """ + Load data from a text file, with missing values handled as specified. + + Each line past the first `skip_header` lines is split at the `delimiter` + character, and characters following the `comments` character are discarded. + + Parameters + ---------- + fname : file, str, pathlib.Path, list of str, generator + File, filename, list, or generator to read. If the filename + extension is ``.gz`` or ``.bz2``, the file is first decompressed. Note + that generators must return bytes or strings. The strings + in a list or produced by a generator are treated as lines. + dtype : dtype, optional + Data type of the resulting array. + If None, the dtypes will be determined by the contents of each + column, individually. + comments : str, optional + The character used to indicate the start of a comment. + All the characters occurring on a line after a comment are discarded. + delimiter : str, int, or sequence, optional + The string used to separate values. By default, any consecutive + whitespaces act as delimiter. An integer or sequence of integers + can also be provided as width(s) of each field. + skiprows : int, optional + `skiprows` was removed in numpy 1.10. Please use `skip_header` instead. + skip_header : int, optional + The number of lines to skip at the beginning of the file. + skip_footer : int, optional + The number of lines to skip at the end of the file. + converters : variable, optional + The set of functions that convert the data of a column to a value. + The converters can also be used to provide a default value + for missing data: ``converters = {3: lambda s: float(s or 0)}``. + missing : variable, optional + `missing` was removed in numpy 1.10. Please use `missing_values` + instead. + missing_values : variable, optional + The set of strings corresponding to missing data. + filling_values : variable, optional + The set of values to be used as default when the data are missing. + usecols : sequence, optional + Which columns to read, with 0 being the first. For example, + ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns. + names : {None, True, str, sequence}, optional + If `names` is True, the field names are read from the first line after + the first `skip_header` lines. This line can optionally be preceded + by a comment delimiter. If `names` is a sequence or a single-string of + comma-separated names, the names will be used to define the field names + in a structured dtype. If `names` is None, the names of the dtype + fields will be used, if any. + excludelist : sequence, optional + A list of names to exclude. This list is appended to the default list + ['return','file','print']. Excluded names are appended with an + underscore: for example, `file` would become `file_`. + deletechars : str, optional + A string combining invalid characters that must be deleted from the + names. + defaultfmt : str, optional + A format used to define default field names, such as "f%i" or "f_%02i". + autostrip : bool, optional + Whether to automatically strip white spaces from the variables. + replace_space : char, optional + Character(s) used in replacement of white spaces in the variable + names. By default, use a '_'. + case_sensitive : {True, False, 'upper', 'lower'}, optional + If True, field names are case sensitive. + If False or 'upper', field names are converted to upper case. + If 'lower', field names are converted to lower case. + unpack : bool, optional + If True, the returned array is transposed, so that arguments may be + unpacked using ``x, y, z = genfromtxt(...)``. When used with a + structured data-type, arrays are returned for each field. + Default is False. + usemask : bool, optional + If True, return a masked array. + If False, return a regular array. + loose : bool, optional + If True, do not raise errors for invalid values. + invalid_raise : bool, optional + If True, an exception is raised if an inconsistency is detected in the + number of columns. + If False, a warning is emitted and the offending lines are skipped. + max_rows : int, optional + The maximum number of rows to read. Must not be used with skip_footer + at the same time. If given, the value must be at least 1. Default is + to read the entire file. + + .. versionadded:: 1.10.0 + encoding : str, optional + Encoding used to decode the inputfile. Does not apply when `fname` is + a file object. The special value 'bytes' enables backward compatibility + workarounds that ensure that you receive byte arrays when possible + and passes latin1 encoded strings to converters. Override this value to + receive unicode arrays and pass strings as input to converters. If set + to None the system default is used. The default value is 'bytes'. + + .. versionadded:: 1.14.0 + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + out : ndarray + Data read from the text file. If `usemask` is True, this is a + masked array. + + See Also + -------- + numpy.loadtxt : equivalent function when no data is missing. + + Notes + ----- + * When spaces are used as delimiters, or when no delimiter has been given + as input, there should not be any missing data between two fields. + * When the variables are named (either by a flexible dtype or with `names`), + there must not be any header in the file (else a ValueError + exception is raised). + * Individual values are not stripped of spaces by default. + When using a custom converter, make sure the function does remove spaces. + + References + ---------- + .. [1] NumPy User Guide, section `I/O with NumPy + `_. + + Examples + -------- + >>> from io import StringIO + >>> import numpy as np + + Comma delimited file with mixed dtype + + >>> s = StringIO(u"1,1.3,abcde") + >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'), + ... ('mystring','S5')], delimiter=",") + >>> data + array((1, 1.3, b'abcde'), + dtype=[('myint', '>> _ = s.seek(0) # needed for StringIO example only + >>> data = np.genfromtxt(s, dtype=None, + ... names = ['myint','myfloat','mystring'], delimiter=",") + >>> data + array((1, 1.3, b'abcde'), + dtype=[('myint', '>> _ = s.seek(0) + >>> data = np.genfromtxt(s, dtype="i8,f8,S5", + ... names=['myint','myfloat','mystring'], delimiter=",") + >>> data + array((1, 1.3, b'abcde'), + dtype=[('myint', '>> s = StringIO(u"11.3abcde") + >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'], + ... delimiter=[1,3,5]) + >>> data + array((1, 1.3, b'abcde'), + dtype=[('intvar', '>> f = StringIO(''' + ... text,# of chars + ... hello world,11 + ... numpy,5''') + >>> np.genfromtxt(f, dtype='S12,S12', delimiter=',') + array([(b'text', b''), (b'hello world', b'11'), (b'numpy', b'5')], + dtype=[('f0', 'S12'), ('f1', 'S12')]) + + """ + + if like is not None: + return _genfromtxt_with_like( + fname, dtype=dtype, comments=comments, delimiter=delimiter, + skip_header=skip_header, skip_footer=skip_footer, + converters=converters, missing_values=missing_values, + filling_values=filling_values, usecols=usecols, names=names, + excludelist=excludelist, deletechars=deletechars, + replace_space=replace_space, autostrip=autostrip, + case_sensitive=case_sensitive, defaultfmt=defaultfmt, + unpack=unpack, usemask=usemask, loose=loose, + invalid_raise=invalid_raise, max_rows=max_rows, encoding=encoding, + like=like + ) + + if max_rows is not None: + if skip_footer: + raise ValueError( + "The keywords 'skip_footer' and 'max_rows' can not be " + "specified at the same time.") + if max_rows < 1: + raise ValueError("'max_rows' must be at least 1.") + + if usemask: + from numpy.ma import MaskedArray, make_mask_descr + # Check the input dictionary of converters + user_converters = converters or {} + if not isinstance(user_converters, dict): + raise TypeError( + "The input argument 'converter' should be a valid dictionary " + "(got '%s' instead)" % type(user_converters)) + + if encoding == 'bytes': + encoding = None + byte_converters = True + else: + byte_converters = False + + # Initialize the filehandle, the LineSplitter and the NameValidator + try: + if isinstance(fname, os_PathLike): + fname = os_fspath(fname) + if isinstance(fname, str): + fid = np.lib._datasource.open(fname, 'rt', encoding=encoding) + fid_ctx = contextlib.closing(fid) + else: + fid = fname + fid_ctx = contextlib.nullcontext(fid) + fhd = iter(fid) + except TypeError as e: + raise TypeError( + f"fname must be a string, filehandle, list of strings,\n" + f"or generator. Got {type(fname)} instead." + ) from e + + with fid_ctx: + split_line = LineSplitter(delimiter=delimiter, comments=comments, + autostrip=autostrip, encoding=encoding) + validate_names = NameValidator(excludelist=excludelist, + deletechars=deletechars, + case_sensitive=case_sensitive, + replace_space=replace_space) + + # Skip the first `skip_header` rows + try: + for i in range(skip_header): + next(fhd) + + # Keep on until we find the first valid values + first_values = None + + while not first_values: + first_line = _decode_line(next(fhd), encoding) + if (names is True) and (comments is not None): + if comments in first_line: + first_line = ( + ''.join(first_line.split(comments)[1:])) + first_values = split_line(first_line) + except StopIteration: + # return an empty array if the datafile is empty + first_line = '' + first_values = [] + warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2) + + # Should we take the first values as names ? + if names is True: + fval = first_values[0].strip() + if comments is not None: + if fval in comments: + del first_values[0] + + # Check the columns to use: make sure `usecols` is a list + if usecols is not None: + try: + usecols = [_.strip() for _ in usecols.split(",")] + except AttributeError: + try: + usecols = list(usecols) + except TypeError: + usecols = [usecols, ] + nbcols = len(usecols or first_values) + + # Check the names and overwrite the dtype.names if needed + if names is True: + names = validate_names([str(_.strip()) for _ in first_values]) + first_line = '' + elif _is_string_like(names): + names = validate_names([_.strip() for _ in names.split(',')]) + elif names: + names = validate_names(names) + # Get the dtype + if dtype is not None: + dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names, + excludelist=excludelist, + deletechars=deletechars, + case_sensitive=case_sensitive, + replace_space=replace_space) + # Make sure the names is a list (for 2.5) + if names is not None: + names = list(names) + + if usecols: + for (i, current) in enumerate(usecols): + # if usecols is a list of names, convert to a list of indices + if _is_string_like(current): + usecols[i] = names.index(current) + elif current < 0: + usecols[i] = current + len(first_values) + # If the dtype is not None, make sure we update it + if (dtype is not None) and (len(dtype) > nbcols): + descr = dtype.descr + dtype = np.dtype([descr[_] for _ in usecols]) + names = list(dtype.names) + # If `names` is not None, update the names + elif (names is not None) and (len(names) > nbcols): + names = [names[_] for _ in usecols] + elif (names is not None) and (dtype is not None): + names = list(dtype.names) + + # Process the missing values ............................... + # Rename missing_values for convenience + user_missing_values = missing_values or () + if isinstance(user_missing_values, bytes): + user_missing_values = user_missing_values.decode('latin1') + + # Define the list of missing_values (one column: one list) + missing_values = [list(['']) for _ in range(nbcols)] + + # We have a dictionary: process it field by field + if isinstance(user_missing_values, dict): + # Loop on the items + for (key, val) in user_missing_values.items(): + # Is the key a string ? + if _is_string_like(key): + try: + # Transform it into an integer + key = names.index(key) + except ValueError: + # We couldn't find it: the name must have been dropped + continue + # Redefine the key as needed if it's a column number + if usecols: + try: + key = usecols.index(key) + except ValueError: + pass + # Transform the value as a list of string + if isinstance(val, (list, tuple)): + val = [str(_) for _ in val] + else: + val = [str(val), ] + # Add the value(s) to the current list of missing + if key is None: + # None acts as default + for miss in missing_values: + miss.extend(val) + else: + missing_values[key].extend(val) + # We have a sequence : each item matches a column + elif isinstance(user_missing_values, (list, tuple)): + for (value, entry) in zip(user_missing_values, missing_values): + value = str(value) + if value not in entry: + entry.append(value) + # We have a string : apply it to all entries + elif isinstance(user_missing_values, str): + user_value = user_missing_values.split(",") + for entry in missing_values: + entry.extend(user_value) + # We have something else: apply it to all entries + else: + for entry in missing_values: + entry.extend([str(user_missing_values)]) + + # Process the filling_values ............................... + # Rename the input for convenience + user_filling_values = filling_values + if user_filling_values is None: + user_filling_values = [] + # Define the default + filling_values = [None] * nbcols + # We have a dictionary : update each entry individually + if isinstance(user_filling_values, dict): + for (key, val) in user_filling_values.items(): + if _is_string_like(key): + try: + # Transform it into an integer + key = names.index(key) + except ValueError: + # We couldn't find it: the name must have been dropped, + continue + # Redefine the key if it's a column number and usecols is defined + if usecols: + try: + key = usecols.index(key) + except ValueError: + pass + # Add the value to the list + filling_values[key] = val + # We have a sequence : update on a one-to-one basis + elif isinstance(user_filling_values, (list, tuple)): + n = len(user_filling_values) + if (n <= nbcols): + filling_values[:n] = user_filling_values + else: + filling_values = user_filling_values[:nbcols] + # We have something else : use it for all entries + else: + filling_values = [user_filling_values] * nbcols + + # Initialize the converters ................................ + if dtype is None: + # Note: we can't use a [...]*nbcols, as we would have 3 times the same + # ... converter, instead of 3 different converters. + converters = [StringConverter(None, missing_values=miss, default=fill) + for (miss, fill) in zip(missing_values, filling_values)] + else: + dtype_flat = flatten_dtype(dtype, flatten_base=True) + # Initialize the converters + if len(dtype_flat) > 1: + # Flexible type : get a converter from each dtype + zipit = zip(dtype_flat, missing_values, filling_values) + converters = [StringConverter(dt, locked=True, + missing_values=miss, default=fill) + for (dt, miss, fill) in zipit] + else: + # Set to a default converter (but w/ different missing values) + zipit = zip(missing_values, filling_values) + converters = [StringConverter(dtype, locked=True, + missing_values=miss, default=fill) + for (miss, fill) in zipit] + # Update the converters to use the user-defined ones + uc_update = [] + for (j, conv) in user_converters.items(): + # If the converter is specified by column names, use the index instead + if _is_string_like(j): + try: + j = names.index(j) + i = j + except ValueError: + continue + elif usecols: + try: + i = usecols.index(j) + except ValueError: + # Unused converter specified + continue + else: + i = j + # Find the value to test - first_line is not filtered by usecols: + if len(first_line): + testing_value = first_values[j] + else: + testing_value = None + if conv is bytes: + user_conv = asbytes + elif byte_converters: + # converters may use decode to workaround numpy's old behaviour, + # so encode the string again before passing to the user converter + def tobytes_first(x, conv): + if type(x) is bytes: + return conv(x) + return conv(x.encode("latin1")) + user_conv = functools.partial(tobytes_first, conv=conv) + else: + user_conv = conv + converters[i].update(user_conv, locked=True, + testing_value=testing_value, + default=filling_values[i], + missing_values=missing_values[i],) + uc_update.append((i, user_conv)) + # Make sure we have the corrected keys in user_converters... + user_converters.update(uc_update) + + # Fixme: possible error as following variable never used. + # miss_chars = [_.missing_values for _ in converters] + + # Initialize the output lists ... + # ... rows + rows = [] + append_to_rows = rows.append + # ... masks + if usemask: + masks = [] + append_to_masks = masks.append + # ... invalid + invalid = [] + append_to_invalid = invalid.append + + # Parse each line + for (i, line) in enumerate(itertools.chain([first_line, ], fhd)): + values = split_line(line) + nbvalues = len(values) + # Skip an empty line + if nbvalues == 0: + continue + if usecols: + # Select only the columns we need + try: + values = [values[_] for _ in usecols] + except IndexError: + append_to_invalid((i + skip_header + 1, nbvalues)) + continue + elif nbvalues != nbcols: + append_to_invalid((i + skip_header + 1, nbvalues)) + continue + # Store the values + append_to_rows(tuple(values)) + if usemask: + append_to_masks(tuple([v.strip() in m + for (v, m) in zip(values, + missing_values)])) + if len(rows) == max_rows: + break + + # Upgrade the converters (if needed) + if dtype is None: + for (i, converter) in enumerate(converters): + current_column = [itemgetter(i)(_m) for _m in rows] + try: + converter.iterupgrade(current_column) + except ConverterLockError: + errmsg = "Converter #%i is locked and cannot be upgraded: " % i + current_column = map(itemgetter(i), rows) + for (j, value) in enumerate(current_column): + try: + converter.upgrade(value) + except (ConverterError, ValueError): + errmsg += "(occurred line #%i for value '%s')" + errmsg %= (j + 1 + skip_header, value) + raise ConverterError(errmsg) + + # Check that we don't have invalid values + nbinvalid = len(invalid) + if nbinvalid > 0: + nbrows = len(rows) + nbinvalid - skip_footer + # Construct the error message + template = " Line #%%i (got %%i columns instead of %i)" % nbcols + if skip_footer > 0: + nbinvalid_skipped = len([_ for _ in invalid + if _[0] > nbrows + skip_header]) + invalid = invalid[:nbinvalid - nbinvalid_skipped] + skip_footer -= nbinvalid_skipped +# +# nbrows -= skip_footer +# errmsg = [template % (i, nb) +# for (i, nb) in invalid if i < nbrows] +# else: + errmsg = [template % (i, nb) + for (i, nb) in invalid] + if len(errmsg): + errmsg.insert(0, "Some errors were detected !") + errmsg = "\n".join(errmsg) + # Raise an exception ? + if invalid_raise: + raise ValueError(errmsg) + # Issue a warning ? + else: + warnings.warn(errmsg, ConversionWarning, stacklevel=2) + + # Strip the last skip_footer data + if skip_footer > 0: + rows = rows[:-skip_footer] + if usemask: + masks = masks[:-skip_footer] + + # Convert each value according to the converter: + # We want to modify the list in place to avoid creating a new one... + if loose: + rows = list( + zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)] + for (i, conv) in enumerate(converters)])) + else: + rows = list( + zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)] + for (i, conv) in enumerate(converters)])) + + # Reset the dtype + data = rows + if dtype is None: + # Get the dtypes from the types of the converters + column_types = [conv.type for conv in converters] + # Find the columns with strings... + strcolidx = [i for (i, v) in enumerate(column_types) + if v == np.unicode_] + + if byte_converters and strcolidx: + # convert strings back to bytes for backward compatibility + warnings.warn( + "Reading unicode strings without specifying the encoding " + "argument is deprecated. Set the encoding, use None for the " + "system default.", + np.VisibleDeprecationWarning, stacklevel=2) + def encode_unicode_cols(row_tup): + row = list(row_tup) + for i in strcolidx: + row[i] = row[i].encode('latin1') + return tuple(row) + + try: + data = [encode_unicode_cols(r) for r in data] + except UnicodeEncodeError: + pass + else: + for i in strcolidx: + column_types[i] = np.bytes_ + + # Update string types to be the right length + sized_column_types = column_types[:] + for i, col_type in enumerate(column_types): + if np.issubdtype(col_type, np.character): + n_chars = max(len(row[i]) for row in data) + sized_column_types[i] = (col_type, n_chars) + + if names is None: + # If the dtype is uniform (before sizing strings) + base = { + c_type + for c, c_type in zip(converters, column_types) + if c._checked} + if len(base) == 1: + uniform_type, = base + (ddtype, mdtype) = (uniform_type, bool) + else: + ddtype = [(defaultfmt % i, dt) + for (i, dt) in enumerate(sized_column_types)] + if usemask: + mdtype = [(defaultfmt % i, bool) + for (i, dt) in enumerate(sized_column_types)] + else: + ddtype = list(zip(names, sized_column_types)) + mdtype = list(zip(names, [bool] * len(sized_column_types))) + output = np.array(data, dtype=ddtype) + if usemask: + outputmask = np.array(masks, dtype=mdtype) + else: + # Overwrite the initial dtype names if needed + if names and dtype.names is not None: + dtype.names = names + # Case 1. We have a structured type + if len(dtype_flat) > 1: + # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])] + # First, create the array using a flattened dtype: + # [('a', int), ('b1', int), ('b2', float)] + # Then, view the array using the specified dtype. + if 'O' in (_.char for _ in dtype_flat): + if has_nested_fields(dtype): + raise NotImplementedError( + "Nested fields involving objects are not supported...") + else: + output = np.array(data, dtype=dtype) + else: + rows = np.array(data, dtype=[('', _) for _ in dtype_flat]) + output = rows.view(dtype) + # Now, process the rowmasks the same way + if usemask: + rowmasks = np.array( + masks, dtype=np.dtype([('', bool) for t in dtype_flat])) + # Construct the new dtype + mdtype = make_mask_descr(dtype) + outputmask = rowmasks.view(mdtype) + # Case #2. We have a basic dtype + else: + # We used some user-defined converters + if user_converters: + ishomogeneous = True + descr = [] + for i, ttype in enumerate([conv.type for conv in converters]): + # Keep the dtype of the current converter + if i in user_converters: + ishomogeneous &= (ttype == dtype.type) + if np.issubdtype(ttype, np.character): + ttype = (ttype, max(len(row[i]) for row in data)) + descr.append(('', ttype)) + else: + descr.append(('', dtype)) + # So we changed the dtype ? + if not ishomogeneous: + # We have more than one field + if len(descr) > 1: + dtype = np.dtype(descr) + # We have only one field: drop the name if not needed. + else: + dtype = np.dtype(ttype) + # + output = np.array(data, dtype) + if usemask: + if dtype.names is not None: + mdtype = [(_, bool) for _ in dtype.names] + else: + mdtype = bool + outputmask = np.array(masks, dtype=mdtype) + # Try to take care of the missing data we missed + names = output.dtype.names + if usemask and names: + for (name, conv) in zip(names, converters): + missing_values = [conv(_) for _ in conv.missing_values + if _ != ''] + for mval in missing_values: + outputmask[name] |= (output[name] == mval) + # Construct the final array + if usemask: + output = output.view(MaskedArray) + output._mask = outputmask + output = np.squeeze(output) + if unpack: + if names is None: + return output.T + elif len(names) == 1: + # squeeze single-name dtypes too + return output[names[0]] + else: + # For structured arrays with multiple fields, + # return an array for each field. + return [output[field] for field in names] + return output + + +_genfromtxt_with_like = array_function_dispatch( + _genfromtxt_dispatcher +)(genfromtxt) + + +def recfromtxt(fname, **kwargs): + """ + Load ASCII data from a file and return it in a record array. + + If ``usemask=False`` a standard `recarray` is returned, + if ``usemask=True`` a MaskedRecords array is returned. + + Parameters + ---------- + fname, kwargs : For a description of input parameters, see `genfromtxt`. + + See Also + -------- + numpy.genfromtxt : generic function + + Notes + ----- + By default, `dtype` is None, which means that the data-type of the output + array will be determined from the data. + + """ + kwargs.setdefault("dtype", None) + usemask = kwargs.get('usemask', False) + output = genfromtxt(fname, **kwargs) + if usemask: + from numpy.ma.mrecords import MaskedRecords + output = output.view(MaskedRecords) + else: + output = output.view(np.recarray) + return output + + +def recfromcsv(fname, **kwargs): + """ + Load ASCII data stored in a comma-separated file. + + The returned array is a record array (if ``usemask=False``, see + `recarray`) or a masked record array (if ``usemask=True``, + see `ma.mrecords.MaskedRecords`). + + Parameters + ---------- + fname, kwargs : For a description of input parameters, see `genfromtxt`. + + See Also + -------- + numpy.genfromtxt : generic function to load ASCII data. + + Notes + ----- + By default, `dtype` is None, which means that the data-type of the output + array will be determined from the data. + + """ + # Set default kwargs for genfromtxt as relevant to csv import. + kwargs.setdefault("case_sensitive", "lower") + kwargs.setdefault("names", True) + kwargs.setdefault("delimiter", ",") + kwargs.setdefault("dtype", None) + output = genfromtxt(fname, **kwargs) + + usemask = kwargs.get("usemask", False) + if usemask: + from numpy.ma.mrecords import MaskedRecords + output = output.view(MaskedRecords) + else: + output = output.view(np.recarray) + return output diff --git a/.local/lib/python3.8/site-packages/numpy/lib/npyio.pyi b/.local/lib/python3.8/site-packages/numpy/lib/npyio.pyi new file mode 100644 index 0000000..75d06e9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/npyio.pyi @@ -0,0 +1,266 @@ +import os +import sys +import zipfile +import types +from typing import ( + Literal as L, + Any, + Mapping, + TypeVar, + Generic, + List, + Type, + Iterator, + Union, + IO, + overload, + Sequence, + Callable, + Pattern, + Protocol, + Iterable, +) + +from numpy import ( + DataSource as DataSource, + ndarray, + recarray, + dtype, + generic, + float64, + void, + record, +) + +from numpy.ma.mrecords import MaskedRecords +from numpy.typing import ArrayLike, DTypeLike, NDArray, _SupportsDType + +from numpy.core.multiarray import ( + packbits as packbits, + unpackbits as unpackbits, +) + +_T = TypeVar("_T") +_T_contra = TypeVar("_T_contra", contravariant=True) +_T_co = TypeVar("_T_co", covariant=True) +_SCT = TypeVar("_SCT", bound=generic) +_CharType_co = TypeVar("_CharType_co", str, bytes, covariant=True) +_CharType_contra = TypeVar("_CharType_contra", str, bytes, contravariant=True) + +_DTypeLike = Union[ + Type[_SCT], + dtype[_SCT], + _SupportsDType[dtype[_SCT]], +] + +class _SupportsGetItem(Protocol[_T_contra, _T_co]): + def __getitem__(self, key: _T_contra, /) -> _T_co: ... + +class _SupportsRead(Protocol[_CharType_co]): + def read(self) -> _CharType_co: ... + +class _SupportsReadSeek(Protocol[_CharType_co]): + def read(self, n: int, /) -> _CharType_co: ... + def seek(self, offset: int, whence: int, /) -> object: ... + +class _SupportsWrite(Protocol[_CharType_contra]): + def write(self, s: _CharType_contra, /) -> object: ... + +__all__: List[str] + +class BagObj(Generic[_T_co]): + def __init__(self, obj: _SupportsGetItem[str, _T_co]) -> None: ... + def __getattribute__(self, key: str) -> _T_co: ... + def __dir__(self) -> List[str]: ... + +class NpzFile(Mapping[str, NDArray[Any]]): + zip: zipfile.ZipFile + fid: None | IO[str] + files: List[str] + allow_pickle: bool + pickle_kwargs: None | Mapping[str, Any] + # Represent `f` as a mutable property so we can access the type of `self` + @property + def f(self: _T) -> BagObj[_T]: ... + @f.setter + def f(self: _T, value: BagObj[_T]) -> None: ... + def __init__( + self, + fid: IO[str], + own_fid: bool = ..., + allow_pickle: bool = ..., + pickle_kwargs: None | Mapping[str, Any] = ..., + ) -> None: ... + def __enter__(self: _T) -> _T: ... + def __exit__( + self, + exc_type: None | Type[BaseException], + exc_value: None | BaseException, + traceback: None | types.TracebackType, + /, + ) -> None: ... + def close(self) -> None: ... + def __del__(self) -> None: ... + def __iter__(self) -> Iterator[str]: ... + def __len__(self) -> int: ... + def __getitem__(self, key: str) -> NDArray[Any]: ... + +# NOTE: Returns a `NpzFile` if file is a zip file; +# returns an `ndarray`/`memmap` otherwise +def load( + file: str | bytes | os.PathLike[Any] | _SupportsReadSeek[bytes], + mmap_mode: L[None, "r+", "r", "w+", "c"] = ..., + allow_pickle: bool = ..., + fix_imports: bool = ..., + encoding: L["ASCII", "latin1", "bytes"] = ..., +) -> Any: ... + +def save( + file: str | os.PathLike[str] | _SupportsWrite[bytes], + arr: ArrayLike, + allow_pickle: bool = ..., + fix_imports: bool = ..., +) -> None: ... + +def savez( + file: str | os.PathLike[str] | _SupportsWrite[bytes], + *args: ArrayLike, + **kwds: ArrayLike, +) -> None: ... + +def savez_compressed( + file: str | os.PathLike[str] | _SupportsWrite[bytes], + *args: ArrayLike, + **kwds: ArrayLike, +) -> None: ... + +# File-like objects only have to implement `__iter__` and, +# optionally, `encoding` +@overload +def loadtxt( + fname: str | os.PathLike[str] | Iterable[str] | Iterable[bytes], + dtype: None = ..., + comments: None | str | Sequence[str] = ..., + delimiter: None | str = ..., + converters: None | Mapping[int | str, Callable[[str], Any]] = ..., + skiprows: int = ..., + usecols: int | Sequence[int] = ..., + unpack: bool = ..., + ndmin: L[0, 1, 2] = ..., + encoding: None | str = ..., + max_rows: None | int = ..., + *, + like: None | ArrayLike = ... +) -> NDArray[float64]: ... +@overload +def loadtxt( + fname: str | os.PathLike[str] | Iterable[str] | Iterable[bytes], + dtype: _DTypeLike[_SCT], + comments: None | str | Sequence[str] = ..., + delimiter: None | str = ..., + converters: None | Mapping[int | str, Callable[[str], Any]] = ..., + skiprows: int = ..., + usecols: int | Sequence[int] = ..., + unpack: bool = ..., + ndmin: L[0, 1, 2] = ..., + encoding: None | str = ..., + max_rows: None | int = ..., + *, + like: None | ArrayLike = ... +) -> NDArray[_SCT]: ... +@overload +def loadtxt( + fname: str | os.PathLike[str] | Iterable[str] | Iterable[bytes], + dtype: DTypeLike, + comments: None | str | Sequence[str] = ..., + delimiter: None | str = ..., + converters: None | Mapping[int | str, Callable[[str], Any]] = ..., + skiprows: int = ..., + usecols: int | Sequence[int] = ..., + unpack: bool = ..., + ndmin: L[0, 1, 2] = ..., + encoding: None | str = ..., + max_rows: None | int = ..., + *, + like: None | ArrayLike = ... +) -> NDArray[Any]: ... + +def savetxt( + fname: str | os.PathLike[str] | _SupportsWrite[str] | _SupportsWrite[bytes], + X: ArrayLike, + fmt: str | Sequence[str] = ..., + delimiter: str = ..., + newline: str = ..., + header: str = ..., + footer: str = ..., + comments: str = ..., + encoding: None | str = ..., +) -> None: ... + +@overload +def fromregex( + file: str | os.PathLike[str] | _SupportsRead[str] | _SupportsRead[bytes], + regexp: str | bytes | Pattern[Any], + dtype: _DTypeLike[_SCT], + encoding: None | str = ... +) -> NDArray[_SCT]: ... +@overload +def fromregex( + file: str | os.PathLike[str] | _SupportsRead[str] | _SupportsRead[bytes], + regexp: str | bytes | Pattern[Any], + dtype: DTypeLike, + encoding: None | str = ... +) -> NDArray[Any]: ... + +# TODO: Sort out arguments +@overload +def genfromtxt( + fname: str | os.PathLike[str] | Iterable[str] | Iterable[bytes], + dtype: None = ..., + *args: Any, + **kwargs: Any, +) -> NDArray[float64]: ... +@overload +def genfromtxt( + fname: str | os.PathLike[str] | Iterable[str] | Iterable[bytes], + dtype: _DTypeLike[_SCT], + *args: Any, + **kwargs: Any, +) -> NDArray[_SCT]: ... +@overload +def genfromtxt( + fname: str | os.PathLike[str] | Iterable[str] | Iterable[bytes], + dtype: DTypeLike, + *args: Any, + **kwargs: Any, +) -> NDArray[Any]: ... + +@overload +def recfromtxt( + fname: str | os.PathLike[str] | Iterable[str] | Iterable[bytes], + *, + usemask: L[False] = ..., + **kwargs: Any, +) -> recarray[Any, dtype[record]]: ... +@overload +def recfromtxt( + fname: str | os.PathLike[str] | Iterable[str] | Iterable[bytes], + *, + usemask: L[True], + **kwargs: Any, +) -> MaskedRecords[Any, dtype[void]]: ... + +@overload +def recfromcsv( + fname: str | os.PathLike[str] | Iterable[str] | Iterable[bytes], + *, + usemask: L[False] = ..., + **kwargs: Any, +) -> recarray[Any, dtype[record]]: ... +@overload +def recfromcsv( + fname: str | os.PathLike[str] | Iterable[str] | Iterable[bytes], + *, + usemask: L[True], + **kwargs: Any, +) -> MaskedRecords[Any, dtype[void]]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/polynomial.py b/.local/lib/python3.8/site-packages/numpy/lib/polynomial.py new file mode 100644 index 0000000..f824c4c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/polynomial.py @@ -0,0 +1,1452 @@ +""" +Functions to operate on polynomials. + +""" +__all__ = ['poly', 'roots', 'polyint', 'polyder', 'polyadd', + 'polysub', 'polymul', 'polydiv', 'polyval', 'poly1d', + 'polyfit', 'RankWarning'] + +import functools +import re +import warnings +import numpy.core.numeric as NX + +from numpy.core import (isscalar, abs, finfo, atleast_1d, hstack, dot, array, + ones) +from numpy.core import overrides +from numpy.core.overrides import set_module +from numpy.lib.twodim_base import diag, vander +from numpy.lib.function_base import trim_zeros +from numpy.lib.type_check import iscomplex, real, imag, mintypecode +from numpy.linalg import eigvals, lstsq, inv + + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + + +@set_module('numpy') +class RankWarning(UserWarning): + """ + Issued by `polyfit` when the Vandermonde matrix is rank deficient. + + For more information, a way to suppress the warning, and an example of + `RankWarning` being issued, see `polyfit`. + + """ + pass + + +def _poly_dispatcher(seq_of_zeros): + return seq_of_zeros + + +@array_function_dispatch(_poly_dispatcher) +def poly(seq_of_zeros): + """ + Find the coefficients of a polynomial with the given sequence of roots. + + .. note:: + This forms part of the old polynomial API. Since version 1.4, the + new polynomial API defined in `numpy.polynomial` is preferred. + A summary of the differences can be found in the + :doc:`transition guide `. + + Returns the coefficients of the polynomial whose leading coefficient + is one for the given sequence of zeros (multiple roots must be included + in the sequence as many times as their multiplicity; see Examples). + A square matrix (or array, which will be treated as a matrix) can also + be given, in which case the coefficients of the characteristic polynomial + of the matrix are returned. + + Parameters + ---------- + seq_of_zeros : array_like, shape (N,) or (N, N) + A sequence of polynomial roots, or a square array or matrix object. + + Returns + ------- + c : ndarray + 1D array of polynomial coefficients from highest to lowest degree: + + ``c[0] * x**(N) + c[1] * x**(N-1) + ... + c[N-1] * x + c[N]`` + where c[0] always equals 1. + + Raises + ------ + ValueError + If input is the wrong shape (the input must be a 1-D or square + 2-D array). + + See Also + -------- + polyval : Compute polynomial values. + roots : Return the roots of a polynomial. + polyfit : Least squares polynomial fit. + poly1d : A one-dimensional polynomial class. + + Notes + ----- + Specifying the roots of a polynomial still leaves one degree of + freedom, typically represented by an undetermined leading + coefficient. [1]_ In the case of this function, that coefficient - + the first one in the returned array - is always taken as one. (If + for some reason you have one other point, the only automatic way + presently to leverage that information is to use ``polyfit``.) + + The characteristic polynomial, :math:`p_a(t)`, of an `n`-by-`n` + matrix **A** is given by + + :math:`p_a(t) = \\mathrm{det}(t\\, \\mathbf{I} - \\mathbf{A})`, + + where **I** is the `n`-by-`n` identity matrix. [2]_ + + References + ---------- + .. [1] M. Sullivan and M. Sullivan, III, "Algebra and Trignometry, + Enhanced With Graphing Utilities," Prentice-Hall, pg. 318, 1996. + + .. [2] G. Strang, "Linear Algebra and Its Applications, 2nd Edition," + Academic Press, pg. 182, 1980. + + Examples + -------- + Given a sequence of a polynomial's zeros: + + >>> np.poly((0, 0, 0)) # Multiple root example + array([1., 0., 0., 0.]) + + The line above represents z**3 + 0*z**2 + 0*z + 0. + + >>> np.poly((-1./2, 0, 1./2)) + array([ 1. , 0. , -0.25, 0. ]) + + The line above represents z**3 - z/4 + + >>> np.poly((np.random.random(1)[0], 0, np.random.random(1)[0])) + array([ 1. , -0.77086955, 0.08618131, 0. ]) # random + + Given a square array object: + + >>> P = np.array([[0, 1./3], [-1./2, 0]]) + >>> np.poly(P) + array([1. , 0. , 0.16666667]) + + Note how in all cases the leading coefficient is always 1. + + """ + seq_of_zeros = atleast_1d(seq_of_zeros) + sh = seq_of_zeros.shape + + if len(sh) == 2 and sh[0] == sh[1] and sh[0] != 0: + seq_of_zeros = eigvals(seq_of_zeros) + elif len(sh) == 1: + dt = seq_of_zeros.dtype + # Let object arrays slip through, e.g. for arbitrary precision + if dt != object: + seq_of_zeros = seq_of_zeros.astype(mintypecode(dt.char)) + else: + raise ValueError("input must be 1d or non-empty square 2d array.") + + if len(seq_of_zeros) == 0: + return 1.0 + dt = seq_of_zeros.dtype + a = ones((1,), dtype=dt) + for zero in seq_of_zeros: + a = NX.convolve(a, array([1, -zero], dtype=dt), mode='full') + + if issubclass(a.dtype.type, NX.complexfloating): + # if complex roots are all complex conjugates, the roots are real. + roots = NX.asarray(seq_of_zeros, complex) + if NX.all(NX.sort(roots) == NX.sort(roots.conjugate())): + a = a.real.copy() + + return a + + +def _roots_dispatcher(p): + return p + + +@array_function_dispatch(_roots_dispatcher) +def roots(p): + """ + Return the roots of a polynomial with coefficients given in p. + + .. note:: + This forms part of the old polynomial API. Since version 1.4, the + new polynomial API defined in `numpy.polynomial` is preferred. + A summary of the differences can be found in the + :doc:`transition guide `. + + The values in the rank-1 array `p` are coefficients of a polynomial. + If the length of `p` is n+1 then the polynomial is described by:: + + p[0] * x**n + p[1] * x**(n-1) + ... + p[n-1]*x + p[n] + + Parameters + ---------- + p : array_like + Rank-1 array of polynomial coefficients. + + Returns + ------- + out : ndarray + An array containing the roots of the polynomial. + + Raises + ------ + ValueError + When `p` cannot be converted to a rank-1 array. + + See also + -------- + poly : Find the coefficients of a polynomial with a given sequence + of roots. + polyval : Compute polynomial values. + polyfit : Least squares polynomial fit. + poly1d : A one-dimensional polynomial class. + + Notes + ----- + The algorithm relies on computing the eigenvalues of the + companion matrix [1]_. + + References + ---------- + .. [1] R. A. Horn & C. R. Johnson, *Matrix Analysis*. Cambridge, UK: + Cambridge University Press, 1999, pp. 146-7. + + Examples + -------- + >>> coeff = [3.2, 2, 1] + >>> np.roots(coeff) + array([-0.3125+0.46351241j, -0.3125-0.46351241j]) + + """ + # If input is scalar, this makes it an array + p = atleast_1d(p) + if p.ndim != 1: + raise ValueError("Input must be a rank-1 array.") + + # find non-zero array entries + non_zero = NX.nonzero(NX.ravel(p))[0] + + # Return an empty array if polynomial is all zeros + if len(non_zero) == 0: + return NX.array([]) + + # find the number of trailing zeros -- this is the number of roots at 0. + trailing_zeros = len(p) - non_zero[-1] - 1 + + # strip leading and trailing zeros + p = p[int(non_zero[0]):int(non_zero[-1])+1] + + # casting: if incoming array isn't floating point, make it floating point. + if not issubclass(p.dtype.type, (NX.floating, NX.complexfloating)): + p = p.astype(float) + + N = len(p) + if N > 1: + # build companion matrix and find its eigenvalues (the roots) + A = diag(NX.ones((N-2,), p.dtype), -1) + A[0,:] = -p[1:] / p[0] + roots = eigvals(A) + else: + roots = NX.array([]) + + # tack any zeros onto the back of the array + roots = hstack((roots, NX.zeros(trailing_zeros, roots.dtype))) + return roots + + +def _polyint_dispatcher(p, m=None, k=None): + return (p,) + + +@array_function_dispatch(_polyint_dispatcher) +def polyint(p, m=1, k=None): + """ + Return an antiderivative (indefinite integral) of a polynomial. + + .. note:: + This forms part of the old polynomial API. Since version 1.4, the + new polynomial API defined in `numpy.polynomial` is preferred. + A summary of the differences can be found in the + :doc:`transition guide `. + + The returned order `m` antiderivative `P` of polynomial `p` satisfies + :math:`\\frac{d^m}{dx^m}P(x) = p(x)` and is defined up to `m - 1` + integration constants `k`. The constants determine the low-order + polynomial part + + .. math:: \\frac{k_{m-1}}{0!} x^0 + \\ldots + \\frac{k_0}{(m-1)!}x^{m-1} + + of `P` so that :math:`P^{(j)}(0) = k_{m-j-1}`. + + Parameters + ---------- + p : array_like or poly1d + Polynomial to integrate. + A sequence is interpreted as polynomial coefficients, see `poly1d`. + m : int, optional + Order of the antiderivative. (Default: 1) + k : list of `m` scalars or scalar, optional + Integration constants. They are given in the order of integration: + those corresponding to highest-order terms come first. + + If ``None`` (default), all constants are assumed to be zero. + If `m = 1`, a single scalar can be given instead of a list. + + See Also + -------- + polyder : derivative of a polynomial + poly1d.integ : equivalent method + + Examples + -------- + The defining property of the antiderivative: + + >>> p = np.poly1d([1,1,1]) + >>> P = np.polyint(p) + >>> P + poly1d([ 0.33333333, 0.5 , 1. , 0. ]) # may vary + >>> np.polyder(P) == p + True + + The integration constants default to zero, but can be specified: + + >>> P = np.polyint(p, 3) + >>> P(0) + 0.0 + >>> np.polyder(P)(0) + 0.0 + >>> np.polyder(P, 2)(0) + 0.0 + >>> P = np.polyint(p, 3, k=[6,5,3]) + >>> P + poly1d([ 0.01666667, 0.04166667, 0.16666667, 3. , 5. , 3. ]) # may vary + + Note that 3 = 6 / 2!, and that the constants are given in the order of + integrations. Constant of the highest-order polynomial term comes first: + + >>> np.polyder(P, 2)(0) + 6.0 + >>> np.polyder(P, 1)(0) + 5.0 + >>> P(0) + 3.0 + + """ + m = int(m) + if m < 0: + raise ValueError("Order of integral must be positive (see polyder)") + if k is None: + k = NX.zeros(m, float) + k = atleast_1d(k) + if len(k) == 1 and m > 1: + k = k[0]*NX.ones(m, float) + if len(k) < m: + raise ValueError( + "k must be a scalar or a rank-1 array of length 1 or >m.") + + truepoly = isinstance(p, poly1d) + p = NX.asarray(p) + if m == 0: + if truepoly: + return poly1d(p) + return p + else: + # Note: this must work also with object and integer arrays + y = NX.concatenate((p.__truediv__(NX.arange(len(p), 0, -1)), [k[0]])) + val = polyint(y, m - 1, k=k[1:]) + if truepoly: + return poly1d(val) + return val + + +def _polyder_dispatcher(p, m=None): + return (p,) + + +@array_function_dispatch(_polyder_dispatcher) +def polyder(p, m=1): + """ + Return the derivative of the specified order of a polynomial. + + .. note:: + This forms part of the old polynomial API. Since version 1.4, the + new polynomial API defined in `numpy.polynomial` is preferred. + A summary of the differences can be found in the + :doc:`transition guide `. + + Parameters + ---------- + p : poly1d or sequence + Polynomial to differentiate. + A sequence is interpreted as polynomial coefficients, see `poly1d`. + m : int, optional + Order of differentiation (default: 1) + + Returns + ------- + der : poly1d + A new polynomial representing the derivative. + + See Also + -------- + polyint : Anti-derivative of a polynomial. + poly1d : Class for one-dimensional polynomials. + + Examples + -------- + The derivative of the polynomial :math:`x^3 + x^2 + x^1 + 1` is: + + >>> p = np.poly1d([1,1,1,1]) + >>> p2 = np.polyder(p) + >>> p2 + poly1d([3, 2, 1]) + + which evaluates to: + + >>> p2(2.) + 17.0 + + We can verify this, approximating the derivative with + ``(f(x + h) - f(x))/h``: + + >>> (p(2. + 0.001) - p(2.)) / 0.001 + 17.007000999997857 + + The fourth-order derivative of a 3rd-order polynomial is zero: + + >>> np.polyder(p, 2) + poly1d([6, 2]) + >>> np.polyder(p, 3) + poly1d([6]) + >>> np.polyder(p, 4) + poly1d([0]) + + """ + m = int(m) + if m < 0: + raise ValueError("Order of derivative must be positive (see polyint)") + + truepoly = isinstance(p, poly1d) + p = NX.asarray(p) + n = len(p) - 1 + y = p[:-1] * NX.arange(n, 0, -1) + if m == 0: + val = p + else: + val = polyder(y, m - 1) + if truepoly: + val = poly1d(val) + return val + + +def _polyfit_dispatcher(x, y, deg, rcond=None, full=None, w=None, cov=None): + return (x, y, w) + + +@array_function_dispatch(_polyfit_dispatcher) +def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): + """ + Least squares polynomial fit. + + .. note:: + This forms part of the old polynomial API. Since version 1.4, the + new polynomial API defined in `numpy.polynomial` is preferred. + A summary of the differences can be found in the + :doc:`transition guide `. + + Fit a polynomial ``p(x) = p[0] * x**deg + ... + p[deg]`` of degree `deg` + to points `(x, y)`. Returns a vector of coefficients `p` that minimises + the squared error in the order `deg`, `deg-1`, ... `0`. + + The `Polynomial.fit ` class + method is recommended for new code as it is more stable numerically. See + the documentation of the method for more information. + + Parameters + ---------- + x : array_like, shape (M,) + x-coordinates of the M sample points ``(x[i], y[i])``. + y : array_like, shape (M,) or (M, K) + y-coordinates of the sample points. Several data sets of sample + points sharing the same x-coordinates can be fitted at once by + passing in a 2D-array that contains one dataset per column. + deg : int + Degree of the fitting polynomial + rcond : float, optional + Relative condition number of the fit. Singular values smaller than + this relative to the largest singular value will be ignored. The + default value is len(x)*eps, where eps is the relative precision of + the float type, about 2e-16 in most cases. + full : bool, optional + Switch determining nature of return value. When it is False (the + default) just the coefficients are returned, when True diagnostic + information from the singular value decomposition is also returned. + w : array_like, shape (M,), optional + Weights. If not None, the weight ``w[i]`` applies to the unsquared + residual ``y[i] - y_hat[i]`` at ``x[i]``. Ideally the weights are + chosen so that the errors of the products ``w[i]*y[i]`` all have the + same variance. When using inverse-variance weighting, use + ``w[i] = 1/sigma(y[i])``. The default value is None. + cov : bool or str, optional + If given and not `False`, return not just the estimate but also its + covariance matrix. By default, the covariance are scaled by + chi2/dof, where dof = M - (deg + 1), i.e., the weights are presumed + to be unreliable except in a relative sense and everything is scaled + such that the reduced chi2 is unity. This scaling is omitted if + ``cov='unscaled'``, as is relevant for the case that the weights are + w = 1/sigma, with sigma known to be a reliable estimate of the + uncertainty. + + Returns + ------- + p : ndarray, shape (deg + 1,) or (deg + 1, K) + Polynomial coefficients, highest power first. If `y` was 2-D, the + coefficients for `k`-th data set are in ``p[:,k]``. + + residuals, rank, singular_values, rcond + These values are only returned if ``full == True`` + + - residuals -- sum of squared residuals of the least squares fit + - rank -- the effective rank of the scaled Vandermonde + coefficient matrix + - singular_values -- singular values of the scaled Vandermonde + coefficient matrix + - rcond -- value of `rcond`. + + For more details, see `numpy.linalg.lstsq`. + + V : ndarray, shape (M,M) or (M,M,K) + Present only if ``full == False`` and ``cov == True``. The covariance + matrix of the polynomial coefficient estimates. The diagonal of + this matrix are the variance estimates for each coefficient. If y + is a 2-D array, then the covariance matrix for the `k`-th data set + are in ``V[:,:,k]`` + + + Warns + ----- + RankWarning + The rank of the coefficient matrix in the least-squares fit is + deficient. The warning is only raised if ``full == False``. + + The warnings can be turned off by + + >>> import warnings + >>> warnings.simplefilter('ignore', np.RankWarning) + + See Also + -------- + polyval : Compute polynomial values. + linalg.lstsq : Computes a least-squares fit. + scipy.interpolate.UnivariateSpline : Computes spline fits. + + Notes + ----- + The solution minimizes the squared error + + .. math:: + E = \\sum_{j=0}^k |p(x_j) - y_j|^2 + + in the equations:: + + x[0]**n * p[0] + ... + x[0] * p[n-1] + p[n] = y[0] + x[1]**n * p[0] + ... + x[1] * p[n-1] + p[n] = y[1] + ... + x[k]**n * p[0] + ... + x[k] * p[n-1] + p[n] = y[k] + + The coefficient matrix of the coefficients `p` is a Vandermonde matrix. + + `polyfit` issues a `RankWarning` when the least-squares fit is badly + conditioned. This implies that the best fit is not well-defined due + to numerical error. The results may be improved by lowering the polynomial + degree or by replacing `x` by `x` - `x`.mean(). The `rcond` parameter + can also be set to a value smaller than its default, but the resulting + fit may be spurious: including contributions from the small singular + values can add numerical noise to the result. + + Note that fitting polynomial coefficients is inherently badly conditioned + when the degree of the polynomial is large or the interval of sample points + is badly centered. The quality of the fit should always be checked in these + cases. When polynomial fits are not satisfactory, splines may be a good + alternative. + + References + ---------- + .. [1] Wikipedia, "Curve fitting", + https://en.wikipedia.org/wiki/Curve_fitting + .. [2] Wikipedia, "Polynomial interpolation", + https://en.wikipedia.org/wiki/Polynomial_interpolation + + Examples + -------- + >>> import warnings + >>> x = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]) + >>> y = np.array([0.0, 0.8, 0.9, 0.1, -0.8, -1.0]) + >>> z = np.polyfit(x, y, 3) + >>> z + array([ 0.08703704, -0.81349206, 1.69312169, -0.03968254]) # may vary + + It is convenient to use `poly1d` objects for dealing with polynomials: + + >>> p = np.poly1d(z) + >>> p(0.5) + 0.6143849206349179 # may vary + >>> p(3.5) + -0.34732142857143039 # may vary + >>> p(10) + 22.579365079365115 # may vary + + High-order polynomials may oscillate wildly: + + >>> with warnings.catch_warnings(): + ... warnings.simplefilter('ignore', np.RankWarning) + ... p30 = np.poly1d(np.polyfit(x, y, 30)) + ... + >>> p30(4) + -0.80000000000000204 # may vary + >>> p30(5) + -0.99999999999999445 # may vary + >>> p30(4.5) + -0.10547061179440398 # may vary + + Illustration: + + >>> import matplotlib.pyplot as plt + >>> xp = np.linspace(-2, 6, 100) + >>> _ = plt.plot(x, y, '.', xp, p(xp), '-', xp, p30(xp), '--') + >>> plt.ylim(-2,2) + (-2, 2) + >>> plt.show() + + """ + order = int(deg) + 1 + x = NX.asarray(x) + 0.0 + y = NX.asarray(y) + 0.0 + + # check arguments. + if deg < 0: + raise ValueError("expected deg >= 0") + if x.ndim != 1: + raise TypeError("expected 1D vector for x") + if x.size == 0: + raise TypeError("expected non-empty vector for x") + if y.ndim < 1 or y.ndim > 2: + raise TypeError("expected 1D or 2D array for y") + if x.shape[0] != y.shape[0]: + raise TypeError("expected x and y to have same length") + + # set rcond + if rcond is None: + rcond = len(x)*finfo(x.dtype).eps + + # set up least squares equation for powers of x + lhs = vander(x, order) + rhs = y + + # apply weighting + if w is not None: + w = NX.asarray(w) + 0.0 + if w.ndim != 1: + raise TypeError("expected a 1-d array for weights") + if w.shape[0] != y.shape[0]: + raise TypeError("expected w and y to have the same length") + lhs *= w[:, NX.newaxis] + if rhs.ndim == 2: + rhs *= w[:, NX.newaxis] + else: + rhs *= w + + # scale lhs to improve condition number and solve + scale = NX.sqrt((lhs*lhs).sum(axis=0)) + lhs /= scale + c, resids, rank, s = lstsq(lhs, rhs, rcond) + c = (c.T/scale).T # broadcast scale coefficients + + # warn on rank reduction, which indicates an ill conditioned matrix + if rank != order and not full: + msg = "Polyfit may be poorly conditioned" + warnings.warn(msg, RankWarning, stacklevel=4) + + if full: + return c, resids, rank, s, rcond + elif cov: + Vbase = inv(dot(lhs.T, lhs)) + Vbase /= NX.outer(scale, scale) + if cov == "unscaled": + fac = 1 + else: + if len(x) <= order: + raise ValueError("the number of data points must exceed order " + "to scale the covariance matrix") + # note, this used to be: fac = resids / (len(x) - order - 2.0) + # it was deciced that the "- 2" (originally justified by "Bayesian + # uncertainty analysis") is not was the user expects + # (see gh-11196 and gh-11197) + fac = resids / (len(x) - order) + if y.ndim == 1: + return c, Vbase * fac + else: + return c, Vbase[:,:, NX.newaxis] * fac + else: + return c + + +def _polyval_dispatcher(p, x): + return (p, x) + + +@array_function_dispatch(_polyval_dispatcher) +def polyval(p, x): + """ + Evaluate a polynomial at specific values. + + .. note:: + This forms part of the old polynomial API. Since version 1.4, the + new polynomial API defined in `numpy.polynomial` is preferred. + A summary of the differences can be found in the + :doc:`transition guide `. + + If `p` is of length N, this function returns the value: + + ``p[0]*x**(N-1) + p[1]*x**(N-2) + ... + p[N-2]*x + p[N-1]`` + + If `x` is a sequence, then ``p(x)`` is returned for each element of ``x``. + If `x` is another polynomial then the composite polynomial ``p(x(t))`` + is returned. + + Parameters + ---------- + p : array_like or poly1d object + 1D array of polynomial coefficients (including coefficients equal + to zero) from highest degree to the constant term, or an + instance of poly1d. + x : array_like or poly1d object + A number, an array of numbers, or an instance of poly1d, at + which to evaluate `p`. + + Returns + ------- + values : ndarray or poly1d + If `x` is a poly1d instance, the result is the composition of the two + polynomials, i.e., `x` is "substituted" in `p` and the simplified + result is returned. In addition, the type of `x` - array_like or + poly1d - governs the type of the output: `x` array_like => `values` + array_like, `x` a poly1d object => `values` is also. + + See Also + -------- + poly1d: A polynomial class. + + Notes + ----- + Horner's scheme [1]_ is used to evaluate the polynomial. Even so, + for polynomials of high degree the values may be inaccurate due to + rounding errors. Use carefully. + + If `x` is a subtype of `ndarray` the return value will be of the same type. + + References + ---------- + .. [1] I. N. Bronshtein, K. A. Semendyayev, and K. A. Hirsch (Eng. + trans. Ed.), *Handbook of Mathematics*, New York, Van Nostrand + Reinhold Co., 1985, pg. 720. + + Examples + -------- + >>> np.polyval([3,0,1], 5) # 3 * 5**2 + 0 * 5**1 + 1 + 76 + >>> np.polyval([3,0,1], np.poly1d(5)) + poly1d([76]) + >>> np.polyval(np.poly1d([3,0,1]), 5) + 76 + >>> np.polyval(np.poly1d([3,0,1]), np.poly1d(5)) + poly1d([76]) + + """ + p = NX.asarray(p) + if isinstance(x, poly1d): + y = 0 + else: + x = NX.asanyarray(x) + y = NX.zeros_like(x) + for pv in p: + y = y * x + pv + return y + + +def _binary_op_dispatcher(a1, a2): + return (a1, a2) + + +@array_function_dispatch(_binary_op_dispatcher) +def polyadd(a1, a2): + """ + Find the sum of two polynomials. + + .. note:: + This forms part of the old polynomial API. Since version 1.4, the + new polynomial API defined in `numpy.polynomial` is preferred. + A summary of the differences can be found in the + :doc:`transition guide `. + + Returns the polynomial resulting from the sum of two input polynomials. + Each input must be either a poly1d object or a 1D sequence of polynomial + coefficients, from highest to lowest degree. + + Parameters + ---------- + a1, a2 : array_like or poly1d object + Input polynomials. + + Returns + ------- + out : ndarray or poly1d object + The sum of the inputs. If either input is a poly1d object, then the + output is also a poly1d object. Otherwise, it is a 1D array of + polynomial coefficients from highest to lowest degree. + + See Also + -------- + poly1d : A one-dimensional polynomial class. + poly, polyadd, polyder, polydiv, polyfit, polyint, polysub, polyval + + Examples + -------- + >>> np.polyadd([1, 2], [9, 5, 4]) + array([9, 6, 6]) + + Using poly1d objects: + + >>> p1 = np.poly1d([1, 2]) + >>> p2 = np.poly1d([9, 5, 4]) + >>> print(p1) + 1 x + 2 + >>> print(p2) + 2 + 9 x + 5 x + 4 + >>> print(np.polyadd(p1, p2)) + 2 + 9 x + 6 x + 6 + + """ + truepoly = (isinstance(a1, poly1d) or isinstance(a2, poly1d)) + a1 = atleast_1d(a1) + a2 = atleast_1d(a2) + diff = len(a2) - len(a1) + if diff == 0: + val = a1 + a2 + elif diff > 0: + zr = NX.zeros(diff, a1.dtype) + val = NX.concatenate((zr, a1)) + a2 + else: + zr = NX.zeros(abs(diff), a2.dtype) + val = a1 + NX.concatenate((zr, a2)) + if truepoly: + val = poly1d(val) + return val + + +@array_function_dispatch(_binary_op_dispatcher) +def polysub(a1, a2): + """ + Difference (subtraction) of two polynomials. + + .. note:: + This forms part of the old polynomial API. Since version 1.4, the + new polynomial API defined in `numpy.polynomial` is preferred. + A summary of the differences can be found in the + :doc:`transition guide `. + + Given two polynomials `a1` and `a2`, returns ``a1 - a2``. + `a1` and `a2` can be either array_like sequences of the polynomials' + coefficients (including coefficients equal to zero), or `poly1d` objects. + + Parameters + ---------- + a1, a2 : array_like or poly1d + Minuend and subtrahend polynomials, respectively. + + Returns + ------- + out : ndarray or poly1d + Array or `poly1d` object of the difference polynomial's coefficients. + + See Also + -------- + polyval, polydiv, polymul, polyadd + + Examples + -------- + .. math:: (2 x^2 + 10 x - 2) - (3 x^2 + 10 x -4) = (-x^2 + 2) + + >>> np.polysub([2, 10, -2], [3, 10, -4]) + array([-1, 0, 2]) + + """ + truepoly = (isinstance(a1, poly1d) or isinstance(a2, poly1d)) + a1 = atleast_1d(a1) + a2 = atleast_1d(a2) + diff = len(a2) - len(a1) + if diff == 0: + val = a1 - a2 + elif diff > 0: + zr = NX.zeros(diff, a1.dtype) + val = NX.concatenate((zr, a1)) - a2 + else: + zr = NX.zeros(abs(diff), a2.dtype) + val = a1 - NX.concatenate((zr, a2)) + if truepoly: + val = poly1d(val) + return val + + +@array_function_dispatch(_binary_op_dispatcher) +def polymul(a1, a2): + """ + Find the product of two polynomials. + + .. note:: + This forms part of the old polynomial API. Since version 1.4, the + new polynomial API defined in `numpy.polynomial` is preferred. + A summary of the differences can be found in the + :doc:`transition guide `. + + Finds the polynomial resulting from the multiplication of the two input + polynomials. Each input must be either a poly1d object or a 1D sequence + of polynomial coefficients, from highest to lowest degree. + + Parameters + ---------- + a1, a2 : array_like or poly1d object + Input polynomials. + + Returns + ------- + out : ndarray or poly1d object + The polynomial resulting from the multiplication of the inputs. If + either inputs is a poly1d object, then the output is also a poly1d + object. Otherwise, it is a 1D array of polynomial coefficients from + highest to lowest degree. + + See Also + -------- + poly1d : A one-dimensional polynomial class. + poly, polyadd, polyder, polydiv, polyfit, polyint, polysub, polyval + convolve : Array convolution. Same output as polymul, but has parameter + for overlap mode. + + Examples + -------- + >>> np.polymul([1, 2, 3], [9, 5, 1]) + array([ 9, 23, 38, 17, 3]) + + Using poly1d objects: + + >>> p1 = np.poly1d([1, 2, 3]) + >>> p2 = np.poly1d([9, 5, 1]) + >>> print(p1) + 2 + 1 x + 2 x + 3 + >>> print(p2) + 2 + 9 x + 5 x + 1 + >>> print(np.polymul(p1, p2)) + 4 3 2 + 9 x + 23 x + 38 x + 17 x + 3 + + """ + truepoly = (isinstance(a1, poly1d) or isinstance(a2, poly1d)) + a1, a2 = poly1d(a1), poly1d(a2) + val = NX.convolve(a1, a2) + if truepoly: + val = poly1d(val) + return val + + +def _polydiv_dispatcher(u, v): + return (u, v) + + +@array_function_dispatch(_polydiv_dispatcher) +def polydiv(u, v): + """ + Returns the quotient and remainder of polynomial division. + + .. note:: + This forms part of the old polynomial API. Since version 1.4, the + new polynomial API defined in `numpy.polynomial` is preferred. + A summary of the differences can be found in the + :doc:`transition guide `. + + The input arrays are the coefficients (including any coefficients + equal to zero) of the "numerator" (dividend) and "denominator" + (divisor) polynomials, respectively. + + Parameters + ---------- + u : array_like or poly1d + Dividend polynomial's coefficients. + + v : array_like or poly1d + Divisor polynomial's coefficients. + + Returns + ------- + q : ndarray + Coefficients, including those equal to zero, of the quotient. + r : ndarray + Coefficients, including those equal to zero, of the remainder. + + See Also + -------- + poly, polyadd, polyder, polydiv, polyfit, polyint, polymul, polysub + polyval + + Notes + ----- + Both `u` and `v` must be 0-d or 1-d (ndim = 0 or 1), but `u.ndim` need + not equal `v.ndim`. In other words, all four possible combinations - + ``u.ndim = v.ndim = 0``, ``u.ndim = v.ndim = 1``, + ``u.ndim = 1, v.ndim = 0``, and ``u.ndim = 0, v.ndim = 1`` - work. + + Examples + -------- + .. math:: \\frac{3x^2 + 5x + 2}{2x + 1} = 1.5x + 1.75, remainder 0.25 + + >>> x = np.array([3.0, 5.0, 2.0]) + >>> y = np.array([2.0, 1.0]) + >>> np.polydiv(x, y) + (array([1.5 , 1.75]), array([0.25])) + + """ + truepoly = (isinstance(u, poly1d) or isinstance(v, poly1d)) + u = atleast_1d(u) + 0.0 + v = atleast_1d(v) + 0.0 + # w has the common type + w = u[0] + v[0] + m = len(u) - 1 + n = len(v) - 1 + scale = 1. / v[0] + q = NX.zeros((max(m - n + 1, 1),), w.dtype) + r = u.astype(w.dtype) + for k in range(0, m-n+1): + d = scale * r[k] + q[k] = d + r[k:k+n+1] -= d*v + while NX.allclose(r[0], 0, rtol=1e-14) and (r.shape[-1] > 1): + r = r[1:] + if truepoly: + return poly1d(q), poly1d(r) + return q, r + +_poly_mat = re.compile(r"\*\*([0-9]*)") +def _raise_power(astr, wrap=70): + n = 0 + line1 = '' + line2 = '' + output = ' ' + while True: + mat = _poly_mat.search(astr, n) + if mat is None: + break + span = mat.span() + power = mat.groups()[0] + partstr = astr[n:span[0]] + n = span[1] + toadd2 = partstr + ' '*(len(power)-1) + toadd1 = ' '*(len(partstr)-1) + power + if ((len(line2) + len(toadd2) > wrap) or + (len(line1) + len(toadd1) > wrap)): + output += line1 + "\n" + line2 + "\n " + line1 = toadd1 + line2 = toadd2 + else: + line2 += partstr + ' '*(len(power)-1) + line1 += ' '*(len(partstr)-1) + power + output += line1 + "\n" + line2 + return output + astr[n:] + + +@set_module('numpy') +class poly1d: + """ + A one-dimensional polynomial class. + + .. note:: + This forms part of the old polynomial API. Since version 1.4, the + new polynomial API defined in `numpy.polynomial` is preferred. + A summary of the differences can be found in the + :doc:`transition guide `. + + A convenience class, used to encapsulate "natural" operations on + polynomials so that said operations may take on their customary + form in code (see Examples). + + Parameters + ---------- + c_or_r : array_like + The polynomial's coefficients, in decreasing powers, or if + the value of the second parameter is True, the polynomial's + roots (values where the polynomial evaluates to 0). For example, + ``poly1d([1, 2, 3])`` returns an object that represents + :math:`x^2 + 2x + 3`, whereas ``poly1d([1, 2, 3], True)`` returns + one that represents :math:`(x-1)(x-2)(x-3) = x^3 - 6x^2 + 11x -6`. + r : bool, optional + If True, `c_or_r` specifies the polynomial's roots; the default + is False. + variable : str, optional + Changes the variable used when printing `p` from `x` to `variable` + (see Examples). + + Examples + -------- + Construct the polynomial :math:`x^2 + 2x + 3`: + + >>> p = np.poly1d([1, 2, 3]) + >>> print(np.poly1d(p)) + 2 + 1 x + 2 x + 3 + + Evaluate the polynomial at :math:`x = 0.5`: + + >>> p(0.5) + 4.25 + + Find the roots: + + >>> p.r + array([-1.+1.41421356j, -1.-1.41421356j]) + >>> p(p.r) + array([ -4.44089210e-16+0.j, -4.44089210e-16+0.j]) # may vary + + These numbers in the previous line represent (0, 0) to machine precision + + Show the coefficients: + + >>> p.c + array([1, 2, 3]) + + Display the order (the leading zero-coefficients are removed): + + >>> p.order + 2 + + Show the coefficient of the k-th power in the polynomial + (which is equivalent to ``p.c[-(i+1)]``): + + >>> p[1] + 2 + + Polynomials can be added, subtracted, multiplied, and divided + (returns quotient and remainder): + + >>> p * p + poly1d([ 1, 4, 10, 12, 9]) + + >>> (p**3 + 4) / p + (poly1d([ 1., 4., 10., 12., 9.]), poly1d([4.])) + + ``asarray(p)`` gives the coefficient array, so polynomials can be + used in all functions that accept arrays: + + >>> p**2 # square of polynomial + poly1d([ 1, 4, 10, 12, 9]) + + >>> np.square(p) # square of individual coefficients + array([1, 4, 9]) + + The variable used in the string representation of `p` can be modified, + using the `variable` parameter: + + >>> p = np.poly1d([1,2,3], variable='z') + >>> print(p) + 2 + 1 z + 2 z + 3 + + Construct a polynomial from its roots: + + >>> np.poly1d([1, 2], True) + poly1d([ 1., -3., 2.]) + + This is the same polynomial as obtained by: + + >>> np.poly1d([1, -1]) * np.poly1d([1, -2]) + poly1d([ 1, -3, 2]) + + """ + __hash__ = None + + @property + def coeffs(self): + """ The polynomial coefficients """ + return self._coeffs + + @coeffs.setter + def coeffs(self, value): + # allowing this makes p.coeffs *= 2 legal + if value is not self._coeffs: + raise AttributeError("Cannot set attribute") + + @property + def variable(self): + """ The name of the polynomial variable """ + return self._variable + + # calculated attributes + @property + def order(self): + """ The order or degree of the polynomial """ + return len(self._coeffs) - 1 + + @property + def roots(self): + """ The roots of the polynomial, where self(x) == 0 """ + return roots(self._coeffs) + + # our internal _coeffs property need to be backed by __dict__['coeffs'] for + # scipy to work correctly. + @property + def _coeffs(self): + return self.__dict__['coeffs'] + @_coeffs.setter + def _coeffs(self, coeffs): + self.__dict__['coeffs'] = coeffs + + # alias attributes + r = roots + c = coef = coefficients = coeffs + o = order + + def __init__(self, c_or_r, r=False, variable=None): + if isinstance(c_or_r, poly1d): + self._variable = c_or_r._variable + self._coeffs = c_or_r._coeffs + + if set(c_or_r.__dict__) - set(self.__dict__): + msg = ("In the future extra properties will not be copied " + "across when constructing one poly1d from another") + warnings.warn(msg, FutureWarning, stacklevel=2) + self.__dict__.update(c_or_r.__dict__) + + if variable is not None: + self._variable = variable + return + if r: + c_or_r = poly(c_or_r) + c_or_r = atleast_1d(c_or_r) + if c_or_r.ndim > 1: + raise ValueError("Polynomial must be 1d only.") + c_or_r = trim_zeros(c_or_r, trim='f') + if len(c_or_r) == 0: + c_or_r = NX.array([0], dtype=c_or_r.dtype) + self._coeffs = c_or_r + if variable is None: + variable = 'x' + self._variable = variable + + def __array__(self, t=None): + if t: + return NX.asarray(self.coeffs, t) + else: + return NX.asarray(self.coeffs) + + def __repr__(self): + vals = repr(self.coeffs) + vals = vals[6:-1] + return "poly1d(%s)" % vals + + def __len__(self): + return self.order + + def __str__(self): + thestr = "0" + var = self.variable + + # Remove leading zeros + coeffs = self.coeffs[NX.logical_or.accumulate(self.coeffs != 0)] + N = len(coeffs)-1 + + def fmt_float(q): + s = '%.4g' % q + if s.endswith('.0000'): + s = s[:-5] + return s + + for k, coeff in enumerate(coeffs): + if not iscomplex(coeff): + coefstr = fmt_float(real(coeff)) + elif real(coeff) == 0: + coefstr = '%sj' % fmt_float(imag(coeff)) + else: + coefstr = '(%s + %sj)' % (fmt_float(real(coeff)), + fmt_float(imag(coeff))) + + power = (N-k) + if power == 0: + if coefstr != '0': + newstr = '%s' % (coefstr,) + else: + if k == 0: + newstr = '0' + else: + newstr = '' + elif power == 1: + if coefstr == '0': + newstr = '' + elif coefstr == 'b': + newstr = var + else: + newstr = '%s %s' % (coefstr, var) + else: + if coefstr == '0': + newstr = '' + elif coefstr == 'b': + newstr = '%s**%d' % (var, power,) + else: + newstr = '%s %s**%d' % (coefstr, var, power) + + if k > 0: + if newstr != '': + if newstr.startswith('-'): + thestr = "%s - %s" % (thestr, newstr[1:]) + else: + thestr = "%s + %s" % (thestr, newstr) + else: + thestr = newstr + return _raise_power(thestr) + + def __call__(self, val): + return polyval(self.coeffs, val) + + def __neg__(self): + return poly1d(-self.coeffs) + + def __pos__(self): + return self + + def __mul__(self, other): + if isscalar(other): + return poly1d(self.coeffs * other) + else: + other = poly1d(other) + return poly1d(polymul(self.coeffs, other.coeffs)) + + def __rmul__(self, other): + if isscalar(other): + return poly1d(other * self.coeffs) + else: + other = poly1d(other) + return poly1d(polymul(self.coeffs, other.coeffs)) + + def __add__(self, other): + other = poly1d(other) + return poly1d(polyadd(self.coeffs, other.coeffs)) + + def __radd__(self, other): + other = poly1d(other) + return poly1d(polyadd(self.coeffs, other.coeffs)) + + def __pow__(self, val): + if not isscalar(val) or int(val) != val or val < 0: + raise ValueError("Power to non-negative integers only.") + res = [1] + for _ in range(val): + res = polymul(self.coeffs, res) + return poly1d(res) + + def __sub__(self, other): + other = poly1d(other) + return poly1d(polysub(self.coeffs, other.coeffs)) + + def __rsub__(self, other): + other = poly1d(other) + return poly1d(polysub(other.coeffs, self.coeffs)) + + def __div__(self, other): + if isscalar(other): + return poly1d(self.coeffs/other) + else: + other = poly1d(other) + return polydiv(self, other) + + __truediv__ = __div__ + + def __rdiv__(self, other): + if isscalar(other): + return poly1d(other/self.coeffs) + else: + other = poly1d(other) + return polydiv(other, self) + + __rtruediv__ = __rdiv__ + + def __eq__(self, other): + if not isinstance(other, poly1d): + return NotImplemented + if self.coeffs.shape != other.coeffs.shape: + return False + return (self.coeffs == other.coeffs).all() + + def __ne__(self, other): + if not isinstance(other, poly1d): + return NotImplemented + return not self.__eq__(other) + + + def __getitem__(self, val): + ind = self.order - val + if val > self.order: + return self.coeffs.dtype.type(0) + if val < 0: + return self.coeffs.dtype.type(0) + return self.coeffs[ind] + + def __setitem__(self, key, val): + ind = self.order - key + if key < 0: + raise ValueError("Does not support negative powers.") + if key > self.order: + zr = NX.zeros(key-self.order, self.coeffs.dtype) + self._coeffs = NX.concatenate((zr, self.coeffs)) + ind = 0 + self._coeffs[ind] = val + return + + def __iter__(self): + return iter(self.coeffs) + + def integ(self, m=1, k=0): + """ + Return an antiderivative (indefinite integral) of this polynomial. + + Refer to `polyint` for full documentation. + + See Also + -------- + polyint : equivalent function + + """ + return poly1d(polyint(self.coeffs, m=m, k=k)) + + def deriv(self, m=1): + """ + Return a derivative of this polynomial. + + Refer to `polyder` for full documentation. + + See Also + -------- + polyder : equivalent function + + """ + return poly1d(polyder(self.coeffs, m=m)) + +# Stuff to do on module import + +warnings.simplefilter('always', RankWarning) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/polynomial.pyi b/.local/lib/python3.8/site-packages/numpy/lib/polynomial.pyi new file mode 100644 index 0000000..00065f5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/polynomial.pyi @@ -0,0 +1,305 @@ +from typing import ( + Literal as L, + List, + overload, + Any, + SupportsInt, + SupportsIndex, + TypeVar, + Tuple, + NoReturn, +) + +from numpy import ( + RankWarning as RankWarning, + poly1d as poly1d, + unsignedinteger, + signedinteger, + floating, + complexfloating, + bool_, + int32, + int64, + float64, + complex128, + object_, +) + +from numpy.typing import ( + NDArray, + ArrayLike, + _ArrayLikeBool_co, + _ArrayLikeUInt_co, + _ArrayLikeInt_co, + _ArrayLikeFloat_co, + _ArrayLikeComplex_co, + _ArrayLikeObject_co, +) + +_T = TypeVar("_T") + +_2Tup = Tuple[_T, _T] +_5Tup = Tuple[ + _T, + NDArray[float64], + NDArray[int32], + NDArray[float64], + NDArray[float64], +] + +__all__: List[str] + +def poly(seq_of_zeros: ArrayLike) -> NDArray[floating[Any]]: ... + +# Returns either a float or complex array depending on the input values. +# See `np.linalg.eigvals`. +def roots(p: ArrayLike) -> NDArray[complexfloating[Any, Any]] | NDArray[floating[Any]]: ... + +@overload +def polyint( + p: poly1d, + m: SupportsInt | SupportsIndex = ..., + k: None | _ArrayLikeComplex_co | _ArrayLikeObject_co = ..., +) -> poly1d: ... +@overload +def polyint( + p: _ArrayLikeFloat_co, + m: SupportsInt | SupportsIndex = ..., + k: None | _ArrayLikeFloat_co = ..., +) -> NDArray[floating[Any]]: ... +@overload +def polyint( + p: _ArrayLikeComplex_co, + m: SupportsInt | SupportsIndex = ..., + k: None | _ArrayLikeComplex_co = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def polyint( + p: _ArrayLikeObject_co, + m: SupportsInt | SupportsIndex = ..., + k: None | _ArrayLikeObject_co = ..., +) -> NDArray[object_]: ... + +@overload +def polyder( + p: poly1d, + m: SupportsInt | SupportsIndex = ..., +) -> poly1d: ... +@overload +def polyder( + p: _ArrayLikeFloat_co, + m: SupportsInt | SupportsIndex = ..., +) -> NDArray[floating[Any]]: ... +@overload +def polyder( + p: _ArrayLikeComplex_co, + m: SupportsInt | SupportsIndex = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def polyder( + p: _ArrayLikeObject_co, + m: SupportsInt | SupportsIndex = ..., +) -> NDArray[object_]: ... + +@overload +def polyfit( + x: _ArrayLikeFloat_co, + y: _ArrayLikeFloat_co, + deg: SupportsIndex | SupportsInt, + rcond: None | float = ..., + full: L[False] = ..., + w: None | _ArrayLikeFloat_co = ..., + cov: L[False] = ..., +) -> NDArray[float64]: ... +@overload +def polyfit( + x: _ArrayLikeComplex_co, + y: _ArrayLikeComplex_co, + deg: SupportsIndex | SupportsInt, + rcond: None | float = ..., + full: L[False] = ..., + w: None | _ArrayLikeFloat_co = ..., + cov: L[False] = ..., +) -> NDArray[complex128]: ... +@overload +def polyfit( + x: _ArrayLikeFloat_co, + y: _ArrayLikeFloat_co, + deg: SupportsIndex | SupportsInt, + rcond: None | float = ..., + full: L[False] = ..., + w: None | _ArrayLikeFloat_co = ..., + cov: L[True, "unscaled"] = ..., +) -> _2Tup[NDArray[float64]]: ... +@overload +def polyfit( + x: _ArrayLikeComplex_co, + y: _ArrayLikeComplex_co, + deg: SupportsIndex | SupportsInt, + rcond: None | float = ..., + full: L[False] = ..., + w: None | _ArrayLikeFloat_co = ..., + cov: L[True, "unscaled"] = ..., +) -> _2Tup[NDArray[complex128]]: ... +@overload +def polyfit( + x: _ArrayLikeFloat_co, + y: _ArrayLikeFloat_co, + deg: SupportsIndex | SupportsInt, + rcond: None | float = ..., + full: L[True] = ..., + w: None | _ArrayLikeFloat_co = ..., + cov: bool | L["unscaled"] = ..., +) -> _5Tup[NDArray[float64]]: ... +@overload +def polyfit( + x: _ArrayLikeComplex_co, + y: _ArrayLikeComplex_co, + deg: SupportsIndex | SupportsInt, + rcond: None | float = ..., + full: L[True] = ..., + w: None | _ArrayLikeFloat_co = ..., + cov: bool | L["unscaled"] = ..., +) -> _5Tup[NDArray[complex128]]: ... + +@overload +def polyval( + p: _ArrayLikeBool_co, + x: _ArrayLikeBool_co, +) -> NDArray[int64]: ... +@overload +def polyval( + p: _ArrayLikeUInt_co, + x: _ArrayLikeUInt_co, +) -> NDArray[unsignedinteger[Any]]: ... +@overload +def polyval( + p: _ArrayLikeInt_co, + x: _ArrayLikeInt_co, +) -> NDArray[signedinteger[Any]]: ... +@overload +def polyval( + p: _ArrayLikeFloat_co, + x: _ArrayLikeFloat_co, +) -> NDArray[floating[Any]]: ... +@overload +def polyval( + p: _ArrayLikeComplex_co, + x: _ArrayLikeComplex_co, +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def polyval( + p: _ArrayLikeObject_co, + x: _ArrayLikeObject_co, +) -> NDArray[object_]: ... + +@overload +def polyadd( + a1: poly1d, + a2: _ArrayLikeComplex_co | _ArrayLikeObject_co, +) -> poly1d: ... +@overload +def polyadd( + a1: _ArrayLikeComplex_co | _ArrayLikeObject_co, + a2: poly1d, +) -> poly1d: ... +@overload +def polyadd( + a1: _ArrayLikeBool_co, + a2: _ArrayLikeBool_co, +) -> NDArray[bool_]: ... +@overload +def polyadd( + a1: _ArrayLikeUInt_co, + a2: _ArrayLikeUInt_co, +) -> NDArray[unsignedinteger[Any]]: ... +@overload +def polyadd( + a1: _ArrayLikeInt_co, + a2: _ArrayLikeInt_co, +) -> NDArray[signedinteger[Any]]: ... +@overload +def polyadd( + a1: _ArrayLikeFloat_co, + a2: _ArrayLikeFloat_co, +) -> NDArray[floating[Any]]: ... +@overload +def polyadd( + a1: _ArrayLikeComplex_co, + a2: _ArrayLikeComplex_co, +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def polyadd( + a1: _ArrayLikeObject_co, + a2: _ArrayLikeObject_co, +) -> NDArray[object_]: ... + +@overload +def polysub( + a1: poly1d, + a2: _ArrayLikeComplex_co | _ArrayLikeObject_co, +) -> poly1d: ... +@overload +def polysub( + a1: _ArrayLikeComplex_co | _ArrayLikeObject_co, + a2: poly1d, +) -> poly1d: ... +@overload +def polysub( + a1: _ArrayLikeBool_co, + a2: _ArrayLikeBool_co, +) -> NoReturn: ... +@overload +def polysub( + a1: _ArrayLikeUInt_co, + a2: _ArrayLikeUInt_co, +) -> NDArray[unsignedinteger[Any]]: ... +@overload +def polysub( + a1: _ArrayLikeInt_co, + a2: _ArrayLikeInt_co, +) -> NDArray[signedinteger[Any]]: ... +@overload +def polysub( + a1: _ArrayLikeFloat_co, + a2: _ArrayLikeFloat_co, +) -> NDArray[floating[Any]]: ... +@overload +def polysub( + a1: _ArrayLikeComplex_co, + a2: _ArrayLikeComplex_co, +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def polysub( + a1: _ArrayLikeObject_co, + a2: _ArrayLikeObject_co, +) -> NDArray[object_]: ... + +# NOTE: Not an alias, but they do have the same signature (that we can reuse) +polymul = polyadd + +@overload +def polydiv( + u: poly1d, + v: _ArrayLikeComplex_co | _ArrayLikeObject_co, +) -> _2Tup[poly1d]: ... +@overload +def polydiv( + u: _ArrayLikeComplex_co | _ArrayLikeObject_co, + v: poly1d, +) -> _2Tup[poly1d]: ... +@overload +def polydiv( + u: _ArrayLikeFloat_co, + v: _ArrayLikeFloat_co, +) -> _2Tup[NDArray[floating[Any]]]: ... +@overload +def polydiv( + u: _ArrayLikeComplex_co, + v: _ArrayLikeComplex_co, +) -> _2Tup[NDArray[complexfloating[Any, Any]]]: ... +@overload +def polydiv( + u: _ArrayLikeObject_co, + v: _ArrayLikeObject_co, +) -> _2Tup[NDArray[Any]]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/recfunctions.py b/.local/lib/python3.8/site-packages/numpy/lib/recfunctions.py new file mode 100644 index 0000000..a491f61 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/recfunctions.py @@ -0,0 +1,1595 @@ +""" +Collection of utilities to manipulate structured arrays. + +Most of these functions were initially implemented by John Hunter for +matplotlib. They have been rewritten and extended for convenience. + +""" +import itertools +import numpy as np +import numpy.ma as ma +from numpy import ndarray, recarray +from numpy.ma import MaskedArray +from numpy.ma.mrecords import MaskedRecords +from numpy.core.overrides import array_function_dispatch +from numpy.lib._iotools import _is_string_like +from numpy.testing import suppress_warnings + +_check_fill_value = np.ma.core._check_fill_value + + +__all__ = [ + 'append_fields', 'apply_along_fields', 'assign_fields_by_name', + 'drop_fields', 'find_duplicates', 'flatten_descr', + 'get_fieldstructure', 'get_names', 'get_names_flat', + 'join_by', 'merge_arrays', 'rec_append_fields', + 'rec_drop_fields', 'rec_join', 'recursive_fill_fields', + 'rename_fields', 'repack_fields', 'require_fields', + 'stack_arrays', 'structured_to_unstructured', 'unstructured_to_structured', + ] + + +def _recursive_fill_fields_dispatcher(input, output): + return (input, output) + + +@array_function_dispatch(_recursive_fill_fields_dispatcher) +def recursive_fill_fields(input, output): + """ + Fills fields from output with fields from input, + with support for nested structures. + + Parameters + ---------- + input : ndarray + Input array. + output : ndarray + Output array. + + Notes + ----- + * `output` should be at least the same size as `input` + + Examples + -------- + >>> from numpy.lib import recfunctions as rfn + >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', np.int64), ('B', np.float64)]) + >>> b = np.zeros((3,), dtype=a.dtype) + >>> rfn.recursive_fill_fields(a, b) + array([(1, 10.), (2, 20.), (0, 0.)], dtype=[('A', '>> dt = np.dtype([(('a', 'A'), np.int64), ('b', np.double, 3)]) + >>> dt.descr + [(('a', 'A'), '>> _get_fieldspec(dt) + [(('a', 'A'), dtype('int64')), ('b', dtype(('>> from numpy.lib import recfunctions as rfn + >>> rfn.get_names(np.empty((1,), dtype=int)) + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' + + >>> rfn.get_names(np.empty((1,), dtype=[('A',int), ('B', float)])) + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' + >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) + >>> rfn.get_names(adtype) + ('a', ('b', ('ba', 'bb'))) + """ + listnames = [] + names = adtype.names + for name in names: + current = adtype[name] + if current.names is not None: + listnames.append((name, tuple(get_names(current)))) + else: + listnames.append(name) + return tuple(listnames) + + +def get_names_flat(adtype): + """ + Returns the field names of the input datatype as a tuple. Nested structure + are flattened beforehand. + + Parameters + ---------- + adtype : dtype + Input datatype + + Examples + -------- + >>> from numpy.lib import recfunctions as rfn + >>> rfn.get_names_flat(np.empty((1,), dtype=int)) is None + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' + >>> rfn.get_names_flat(np.empty((1,), dtype=[('A',int), ('B', float)])) + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' + >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) + >>> rfn.get_names_flat(adtype) + ('a', 'b', 'ba', 'bb') + """ + listnames = [] + names = adtype.names + for name in names: + listnames.append(name) + current = adtype[name] + if current.names is not None: + listnames.extend(get_names_flat(current)) + return tuple(listnames) + + +def flatten_descr(ndtype): + """ + Flatten a structured data-type description. + + Examples + -------- + >>> from numpy.lib import recfunctions as rfn + >>> ndtype = np.dtype([('a', '>> rfn.flatten_descr(ndtype) + (('a', dtype('int32')), ('ba', dtype('float64')), ('bb', dtype('int32'))) + + """ + names = ndtype.names + if names is None: + return (('', ndtype),) + else: + descr = [] + for field in names: + (typ, _) = ndtype.fields[field] + if typ.names is not None: + descr.extend(flatten_descr(typ)) + else: + descr.append((field, typ)) + return tuple(descr) + + +def _zip_dtype(seqarrays, flatten=False): + newdtype = [] + if flatten: + for a in seqarrays: + newdtype.extend(flatten_descr(a.dtype)) + else: + for a in seqarrays: + current = a.dtype + if current.names is not None and len(current.names) == 1: + # special case - dtypes of 1 field are flattened + newdtype.extend(_get_fieldspec(current)) + else: + newdtype.append(('', current)) + return np.dtype(newdtype) + + +def _zip_descr(seqarrays, flatten=False): + """ + Combine the dtype description of a series of arrays. + + Parameters + ---------- + seqarrays : sequence of arrays + Sequence of arrays + flatten : {boolean}, optional + Whether to collapse nested descriptions. + """ + return _zip_dtype(seqarrays, flatten=flatten).descr + + +def get_fieldstructure(adtype, lastname=None, parents=None,): + """ + Returns a dictionary with fields indexing lists of their parent fields. + + This function is used to simplify access to fields nested in other fields. + + Parameters + ---------- + adtype : np.dtype + Input datatype + lastname : optional + Last processed field name (used internally during recursion). + parents : dictionary + Dictionary of parent fields (used interbally during recursion). + + Examples + -------- + >>> from numpy.lib import recfunctions as rfn + >>> ndtype = np.dtype([('A', int), + ... ('B', [('BA', int), + ... ('BB', [('BBA', int), ('BBB', int)])])]) + >>> rfn.get_fieldstructure(ndtype) + ... # XXX: possible regression, order of BBA and BBB is swapped + {'A': [], 'B': [], 'BA': ['B'], 'BB': ['B'], 'BBA': ['B', 'BB'], 'BBB': ['B', 'BB']} + + """ + if parents is None: + parents = {} + names = adtype.names + for name in names: + current = adtype[name] + if current.names is not None: + if lastname: + parents[name] = [lastname, ] + else: + parents[name] = [] + parents.update(get_fieldstructure(current, name, parents)) + else: + lastparent = [_ for _ in (parents.get(lastname, []) or [])] + if lastparent: + lastparent.append(lastname) + elif lastname: + lastparent = [lastname, ] + parents[name] = lastparent or [] + return parents + + +def _izip_fields_flat(iterable): + """ + Returns an iterator of concatenated fields from a sequence of arrays, + collapsing any nested structure. + + """ + for element in iterable: + if isinstance(element, np.void): + yield from _izip_fields_flat(tuple(element)) + else: + yield element + + +def _izip_fields(iterable): + """ + Returns an iterator of concatenated fields from a sequence of arrays. + + """ + for element in iterable: + if (hasattr(element, '__iter__') and + not isinstance(element, str)): + yield from _izip_fields(element) + elif isinstance(element, np.void) and len(tuple(element)) == 1: + # this statement is the same from the previous expression + yield from _izip_fields(element) + else: + yield element + + +def _izip_records(seqarrays, fill_value=None, flatten=True): + """ + Returns an iterator of concatenated items from a sequence of arrays. + + Parameters + ---------- + seqarrays : sequence of arrays + Sequence of arrays. + fill_value : {None, integer} + Value used to pad shorter iterables. + flatten : {True, False}, + Whether to + """ + + # Should we flatten the items, or just use a nested approach + if flatten: + zipfunc = _izip_fields_flat + else: + zipfunc = _izip_fields + + for tup in itertools.zip_longest(*seqarrays, fillvalue=fill_value): + yield tuple(zipfunc(tup)) + + +def _fix_output(output, usemask=True, asrecarray=False): + """ + Private function: return a recarray, a ndarray, a MaskedArray + or a MaskedRecords depending on the input parameters + """ + if not isinstance(output, MaskedArray): + usemask = False + if usemask: + if asrecarray: + output = output.view(MaskedRecords) + else: + output = ma.filled(output) + if asrecarray: + output = output.view(recarray) + return output + + +def _fix_defaults(output, defaults=None): + """ + Update the fill_value and masked data of `output` + from the default given in a dictionary defaults. + """ + names = output.dtype.names + (data, mask, fill_value) = (output.data, output.mask, output.fill_value) + for (k, v) in (defaults or {}).items(): + if k in names: + fill_value[k] = v + data[k][mask[k]] = v + return output + + +def _merge_arrays_dispatcher(seqarrays, fill_value=None, flatten=None, + usemask=None, asrecarray=None): + return seqarrays + + +@array_function_dispatch(_merge_arrays_dispatcher) +def merge_arrays(seqarrays, fill_value=-1, flatten=False, + usemask=False, asrecarray=False): + """ + Merge arrays field by field. + + Parameters + ---------- + seqarrays : sequence of ndarrays + Sequence of arrays + fill_value : {float}, optional + Filling value used to pad missing data on the shorter arrays. + flatten : {False, True}, optional + Whether to collapse nested fields. + usemask : {False, True}, optional + Whether to return a masked array or not. + asrecarray : {False, True}, optional + Whether to return a recarray (MaskedRecords) or not. + + Examples + -------- + >>> from numpy.lib import recfunctions as rfn + >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.]))) + array([( 1, 10.), ( 2, 20.), (-1, 30.)], + dtype=[('f0', '>> rfn.merge_arrays((np.array([1, 2], dtype=np.int64), + ... np.array([10., 20., 30.])), usemask=False) + array([(1, 10.0), (2, 20.0), (-1, 30.0)], + dtype=[('f0', '>> rfn.merge_arrays((np.array([1, 2]).view([('a', np.int64)]), + ... np.array([10., 20., 30.])), + ... usemask=False, asrecarray=True) + rec.array([( 1, 10.), ( 2, 20.), (-1, 30.)], + dtype=[('a', '>> from numpy.lib import recfunctions as rfn + >>> a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], + ... dtype=[('a', np.int64), ('b', [('ba', np.double), ('bb', np.int64)])]) + >>> rfn.drop_fields(a, 'a') + array([((2., 3),), ((5., 6),)], + dtype=[('b', [('ba', '>> rfn.drop_fields(a, 'ba') + array([(1, (3,)), (4, (6,))], dtype=[('a', '>> rfn.drop_fields(a, ['ba', 'bb']) + array([(1,), (4,)], dtype=[('a', '>> from numpy.lib import recfunctions as rfn + >>> a = np.array([(1, (2, [3.0, 30.])), (4, (5, [6.0, 60.]))], + ... dtype=[('a', int),('b', [('ba', float), ('bb', (float, 2))])]) + >>> rfn.rename_fields(a, {'a':'A', 'bb':'BB'}) + array([(1, (2., [ 3., 30.])), (4, (5., [ 6., 60.]))], + dtype=[('A', ' 1: + data = merge_arrays(data, flatten=True, usemask=usemask, + fill_value=fill_value) + else: + data = data.pop() + # + output = ma.masked_all( + max(len(base), len(data)), + dtype=_get_fieldspec(base.dtype) + _get_fieldspec(data.dtype)) + output = recursive_fill_fields(base, output) + output = recursive_fill_fields(data, output) + # + return _fix_output(output, usemask=usemask, asrecarray=asrecarray) + + +def _rec_append_fields_dispatcher(base, names, data, dtypes=None): + yield base + yield from data + + +@array_function_dispatch(_rec_append_fields_dispatcher) +def rec_append_fields(base, names, data, dtypes=None): + """ + Add new fields to an existing array. + + The names of the fields are given with the `names` arguments, + the corresponding values with the `data` arguments. + If a single field is appended, `names`, `data` and `dtypes` do not have + to be lists but just values. + + Parameters + ---------- + base : array + Input array to extend. + names : string, sequence + String or sequence of strings corresponding to the names + of the new fields. + data : array or sequence of arrays + Array or sequence of arrays storing the fields to add to the base. + dtypes : sequence of datatypes, optional + Datatype or sequence of datatypes. + If None, the datatypes are estimated from the `data`. + + See Also + -------- + append_fields + + Returns + ------- + appended_array : np.recarray + """ + return append_fields(base, names, data=data, dtypes=dtypes, + asrecarray=True, usemask=False) + + +def _repack_fields_dispatcher(a, align=None, recurse=None): + return (a,) + + +@array_function_dispatch(_repack_fields_dispatcher) +def repack_fields(a, align=False, recurse=False): + """ + Re-pack the fields of a structured array or dtype in memory. + + The memory layout of structured datatypes allows fields at arbitrary + byte offsets. This means the fields can be separated by padding bytes, + their offsets can be non-monotonically increasing, and they can overlap. + + This method removes any overlaps and reorders the fields in memory so they + have increasing byte offsets, and adds or removes padding bytes depending + on the `align` option, which behaves like the `align` option to `np.dtype`. + + If `align=False`, this method produces a "packed" memory layout in which + each field starts at the byte the previous field ended, and any padding + bytes are removed. + + If `align=True`, this methods produces an "aligned" memory layout in which + each field's offset is a multiple of its alignment, and the total itemsize + is a multiple of the largest alignment, by adding padding bytes as needed. + + Parameters + ---------- + a : ndarray or dtype + array or dtype for which to repack the fields. + align : boolean + If true, use an "aligned" memory layout, otherwise use a "packed" layout. + recurse : boolean + If True, also repack nested structures. + + Returns + ------- + repacked : ndarray or dtype + Copy of `a` with fields repacked, or `a` itself if no repacking was + needed. + + Examples + -------- + + >>> from numpy.lib import recfunctions as rfn + >>> def print_offsets(d): + ... print("offsets:", [d.fields[name][1] for name in d.names]) + ... print("itemsize:", d.itemsize) + ... + >>> dt = np.dtype('u1, >> dt + dtype({'names': ['f0', 'f1', 'f2'], 'formats': ['u1', '>> print_offsets(dt) + offsets: [0, 8, 16] + itemsize: 24 + >>> packed_dt = rfn.repack_fields(dt) + >>> packed_dt + dtype([('f0', 'u1'), ('f1', '>> print_offsets(packed_dt) + offsets: [0, 1, 9] + itemsize: 17 + + """ + if not isinstance(a, np.dtype): + dt = repack_fields(a.dtype, align=align, recurse=recurse) + return a.astype(dt, copy=False) + + if a.names is None: + return a + + fieldinfo = [] + for name in a.names: + tup = a.fields[name] + if recurse: + fmt = repack_fields(tup[0], align=align, recurse=True) + else: + fmt = tup[0] + + if len(tup) == 3: + name = (tup[2], name) + + fieldinfo.append((name, fmt)) + + dt = np.dtype(fieldinfo, align=align) + return np.dtype((a.type, dt)) + +def _get_fields_and_offsets(dt, offset=0): + """ + Returns a flat list of (dtype, count, offset) tuples of all the + scalar fields in the dtype "dt", including nested fields, in left + to right order. + """ + + # counts up elements in subarrays, including nested subarrays, and returns + # base dtype and count + def count_elem(dt): + count = 1 + while dt.shape != (): + for size in dt.shape: + count *= size + dt = dt.base + return dt, count + + fields = [] + for name in dt.names: + field = dt.fields[name] + f_dt, f_offset = field[0], field[1] + f_dt, n = count_elem(f_dt) + + if f_dt.names is None: + fields.append((np.dtype((f_dt, (n,))), n, f_offset + offset)) + else: + subfields = _get_fields_and_offsets(f_dt, f_offset + offset) + size = f_dt.itemsize + + for i in range(n): + if i == 0: + # optimization: avoid list comprehension if no subarray + fields.extend(subfields) + else: + fields.extend([(d, c, o + i*size) for d, c, o in subfields]) + return fields + + +def _structured_to_unstructured_dispatcher(arr, dtype=None, copy=None, + casting=None): + return (arr,) + +@array_function_dispatch(_structured_to_unstructured_dispatcher) +def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'): + """ + Converts an n-D structured array into an (n+1)-D unstructured array. + + The new array will have a new last dimension equal in size to the + number of field-elements of the input array. If not supplied, the output + datatype is determined from the numpy type promotion rules applied to all + the field datatypes. + + Nested fields, as well as each element of any subarray fields, all count + as a single field-elements. + + Parameters + ---------- + arr : ndarray + Structured array or dtype to convert. Cannot contain object datatype. + dtype : dtype, optional + The dtype of the output unstructured array. + copy : bool, optional + See copy argument to `ndarray.astype`. If true, always return a copy. + If false, and `dtype` requirements are satisfied, a view is returned. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + See casting argument of `ndarray.astype`. Controls what kind of data + casting may occur. + + Returns + ------- + unstructured : ndarray + Unstructured array with one more dimension. + + Examples + -------- + + >>> from numpy.lib import recfunctions as rfn + >>> a = np.zeros(4, dtype=[('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)]) + >>> a + array([(0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.]), + (0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.])], + dtype=[('a', '>> rfn.structured_to_unstructured(a) + array([[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]]) + + >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], + ... dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) + >>> np.mean(rfn.structured_to_unstructured(b[['x', 'z']]), axis=-1) + array([ 3. , 5.5, 9. , 11. ]) + + """ + if arr.dtype.names is None: + raise ValueError('arr must be a structured array') + + fields = _get_fields_and_offsets(arr.dtype) + n_fields = len(fields) + if n_fields == 0 and dtype is None: + raise ValueError("arr has no fields. Unable to guess dtype") + elif n_fields == 0: + # too many bugs elsewhere for this to work now + raise NotImplementedError("arr with no fields is not supported") + + dts, counts, offsets = zip(*fields) + names = ['f{}'.format(n) for n in range(n_fields)] + + if dtype is None: + out_dtype = np.result_type(*[dt.base for dt in dts]) + else: + out_dtype = dtype + + # Use a series of views and casts to convert to an unstructured array: + + # first view using flattened fields (doesn't work for object arrays) + # Note: dts may include a shape for subarrays + flattened_fields = np.dtype({'names': names, + 'formats': dts, + 'offsets': offsets, + 'itemsize': arr.dtype.itemsize}) + with suppress_warnings() as sup: # until 1.16 (gh-12447) + sup.filter(FutureWarning, "Numpy has detected") + arr = arr.view(flattened_fields) + + # next cast to a packed format with all fields converted to new dtype + packed_fields = np.dtype({'names': names, + 'formats': [(out_dtype, dt.shape) for dt in dts]}) + arr = arr.astype(packed_fields, copy=copy, casting=casting) + + # finally is it safe to view the packed fields as the unstructured type + return arr.view((out_dtype, (sum(counts),))) + + +def _unstructured_to_structured_dispatcher(arr, dtype=None, names=None, + align=None, copy=None, casting=None): + return (arr,) + +@array_function_dispatch(_unstructured_to_structured_dispatcher) +def unstructured_to_structured(arr, dtype=None, names=None, align=False, + copy=False, casting='unsafe'): + """ + Converts an n-D unstructured array into an (n-1)-D structured array. + + The last dimension of the input array is converted into a structure, with + number of field-elements equal to the size of the last dimension of the + input array. By default all output fields have the input array's dtype, but + an output structured dtype with an equal number of fields-elements can be + supplied instead. + + Nested fields, as well as each element of any subarray fields, all count + towards the number of field-elements. + + Parameters + ---------- + arr : ndarray + Unstructured array or dtype to convert. + dtype : dtype, optional + The structured dtype of the output array + names : list of strings, optional + If dtype is not supplied, this specifies the field names for the output + dtype, in order. The field dtypes will be the same as the input array. + align : boolean, optional + Whether to create an aligned memory layout. + copy : bool, optional + See copy argument to `ndarray.astype`. If true, always return a copy. + If false, and `dtype` requirements are satisfied, a view is returned. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + See casting argument of `ndarray.astype`. Controls what kind of data + casting may occur. + + Returns + ------- + structured : ndarray + Structured array with fewer dimensions. + + Examples + -------- + + >>> from numpy.lib import recfunctions as rfn + >>> dt = np.dtype([('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)]) + >>> a = np.arange(20).reshape((4,5)) + >>> a + array([[ 0, 1, 2, 3, 4], + [ 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19]]) + >>> rfn.unstructured_to_structured(a, dt) + array([( 0, ( 1., 2), [ 3., 4.]), ( 5, ( 6., 7), [ 8., 9.]), + (10, (11., 12), [13., 14.]), (15, (16., 17), [18., 19.])], + dtype=[('a', '>> from numpy.lib import recfunctions as rfn + >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], + ... dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) + >>> rfn.apply_along_fields(np.mean, b) + array([ 2.66666667, 5.33333333, 8.66666667, 11. ]) + >>> rfn.apply_along_fields(np.mean, b[['x', 'z']]) + array([ 3. , 5.5, 9. , 11. ]) + + """ + if arr.dtype.names is None: + raise ValueError('arr must be a structured array') + + uarr = structured_to_unstructured(arr) + return func(uarr, axis=-1) + # works and avoids axis requirement, but very, very slow: + #return np.apply_along_axis(func, -1, uarr) + +def _assign_fields_by_name_dispatcher(dst, src, zero_unassigned=None): + return dst, src + +@array_function_dispatch(_assign_fields_by_name_dispatcher) +def assign_fields_by_name(dst, src, zero_unassigned=True): + """ + Assigns values from one structured array to another by field name. + + Normally in numpy >= 1.14, assignment of one structured array to another + copies fields "by position", meaning that the first field from the src is + copied to the first field of the dst, and so on, regardless of field name. + + This function instead copies "by field name", such that fields in the dst + are assigned from the identically named field in the src. This applies + recursively for nested structures. This is how structure assignment worked + in numpy >= 1.6 to <= 1.13. + + Parameters + ---------- + dst : ndarray + src : ndarray + The source and destination arrays during assignment. + zero_unassigned : bool, optional + If True, fields in the dst for which there was no matching + field in the src are filled with the value 0 (zero). This + was the behavior of numpy <= 1.13. If False, those fields + are not modified. + """ + + if dst.dtype.names is None: + dst[...] = src + return + + for name in dst.dtype.names: + if name not in src.dtype.names: + if zero_unassigned: + dst[name] = 0 + else: + assign_fields_by_name(dst[name], src[name], + zero_unassigned) + +def _require_fields_dispatcher(array, required_dtype): + return (array,) + +@array_function_dispatch(_require_fields_dispatcher) +def require_fields(array, required_dtype): + """ + Casts a structured array to a new dtype using assignment by field-name. + + This function assigns from the old to the new array by name, so the + value of a field in the output array is the value of the field with the + same name in the source array. This has the effect of creating a new + ndarray containing only the fields "required" by the required_dtype. + + If a field name in the required_dtype does not exist in the + input array, that field is created and set to 0 in the output array. + + Parameters + ---------- + a : ndarray + array to cast + required_dtype : dtype + datatype for output array + + Returns + ------- + out : ndarray + array with the new dtype, with field values copied from the fields in + the input array with the same name + + Examples + -------- + + >>> from numpy.lib import recfunctions as rfn + >>> a = np.ones(4, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')]) + >>> rfn.require_fields(a, [('b', 'f4'), ('c', 'u1')]) + array([(1., 1), (1., 1), (1., 1), (1., 1)], + dtype=[('b', '>> rfn.require_fields(a, [('b', 'f4'), ('newf', 'u1')]) + array([(1., 0), (1., 0), (1., 0), (1., 0)], + dtype=[('b', '>> from numpy.lib import recfunctions as rfn + >>> x = np.array([1, 2,]) + >>> rfn.stack_arrays(x) is x + True + >>> z = np.array([('A', 1), ('B', 2)], dtype=[('A', '|S3'), ('B', float)]) + >>> zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], + ... dtype=[('A', '|S3'), ('B', np.double), ('C', np.double)]) + >>> test = rfn.stack_arrays((z,zz)) + >>> test + masked_array(data=[(b'A', 1.0, --), (b'B', 2.0, --), (b'a', 10.0, 100.0), + (b'b', 20.0, 200.0), (b'c', 30.0, 300.0)], + mask=[(False, False, True), (False, False, True), + (False, False, False), (False, False, False), + (False, False, False)], + fill_value=(b'N/A', 1.e+20, 1.e+20), + dtype=[('A', 'S3'), ('B', ' '%s'" % + (cdtype, fdtype)) + # Only one field: use concatenate + if len(newdescr) == 1: + output = ma.concatenate(seqarrays) + else: + # + output = ma.masked_all((np.sum(nrecords),), newdescr) + offset = np.cumsum(np.r_[0, nrecords]) + seen = [] + for (a, n, i, j) in zip(seqarrays, fldnames, offset[:-1], offset[1:]): + names = a.dtype.names + if names is None: + output['f%i' % len(seen)][i:j] = a + else: + for name in n: + output[name][i:j] = a[name] + if name not in seen: + seen.append(name) + # + return _fix_output(_fix_defaults(output, defaults), + usemask=usemask, asrecarray=asrecarray) + + +def _find_duplicates_dispatcher( + a, key=None, ignoremask=None, return_index=None): + return (a,) + + +@array_function_dispatch(_find_duplicates_dispatcher) +def find_duplicates(a, key=None, ignoremask=True, return_index=False): + """ + Find the duplicates in a structured array along a given key + + Parameters + ---------- + a : array-like + Input array + key : {string, None}, optional + Name of the fields along which to check the duplicates. + If None, the search is performed by records + ignoremask : {True, False}, optional + Whether masked data should be discarded or considered as duplicates. + return_index : {False, True}, optional + Whether to return the indices of the duplicated values. + + Examples + -------- + >>> from numpy.lib import recfunctions as rfn + >>> ndtype = [('a', int)] + >>> a = np.ma.array([1, 1, 1, 2, 2, 3, 3], + ... mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype) + >>> rfn.find_duplicates(a, ignoremask=True, return_index=True) + (masked_array(data=[(1,), (1,), (2,), (2,)], + mask=[(False,), (False,), (False,), (False,)], + fill_value=(999999,), + dtype=[('a', '= nb1)] - nb1 + (r1cmn, r2cmn) = (len(idx_1), len(idx_2)) + if jointype == 'inner': + (r1spc, r2spc) = (0, 0) + elif jointype == 'outer': + idx_out = idx_sort[~flag_in] + idx_1 = np.concatenate((idx_1, idx_out[(idx_out < nb1)])) + idx_2 = np.concatenate((idx_2, idx_out[(idx_out >= nb1)] - nb1)) + (r1spc, r2spc) = (len(idx_1) - r1cmn, len(idx_2) - r2cmn) + elif jointype == 'leftouter': + idx_out = idx_sort[~flag_in] + idx_1 = np.concatenate((idx_1, idx_out[(idx_out < nb1)])) + (r1spc, r2spc) = (len(idx_1) - r1cmn, 0) + # Select the entries from each input + (s1, s2) = (r1[idx_1], r2[idx_2]) + # + # Build the new description of the output array ....... + # Start with the key fields + ndtype = _get_fieldspec(r1k.dtype) + + # Add the fields from r1 + for fname, fdtype in _get_fieldspec(r1.dtype): + if fname not in key: + ndtype.append((fname, fdtype)) + + # Add the fields from r2 + for fname, fdtype in _get_fieldspec(r2.dtype): + # Have we seen the current name already ? + # we need to rebuild this list every time + names = list(name for name, dtype in ndtype) + try: + nameidx = names.index(fname) + except ValueError: + #... we haven't: just add the description to the current list + ndtype.append((fname, fdtype)) + else: + # collision + _, cdtype = ndtype[nameidx] + if fname in key: + # The current field is part of the key: take the largest dtype + ndtype[nameidx] = (fname, max(fdtype, cdtype)) + else: + # The current field is not part of the key: add the suffixes, + # and place the new field adjacent to the old one + ndtype[nameidx:nameidx + 1] = [ + (fname + r1postfix, cdtype), + (fname + r2postfix, fdtype) + ] + # Rebuild a dtype from the new fields + ndtype = np.dtype(ndtype) + # Find the largest nb of common fields : + # r1cmn and r2cmn should be equal, but... + cmn = max(r1cmn, r2cmn) + # Construct an empty array + output = ma.masked_all((cmn + r1spc + r2spc,), dtype=ndtype) + names = output.dtype.names + for f in r1names: + selected = s1[f] + if f not in names or (f in r2names and not r2postfix and f not in key): + f += r1postfix + current = output[f] + current[:r1cmn] = selected[:r1cmn] + if jointype in ('outer', 'leftouter'): + current[cmn:cmn + r1spc] = selected[r1cmn:] + for f in r2names: + selected = s2[f] + if f not in names or (f in r1names and not r1postfix and f not in key): + f += r2postfix + current = output[f] + current[:r2cmn] = selected[:r2cmn] + if (jointype == 'outer') and r2spc: + current[-r2spc:] = selected[r2cmn:] + # Sort and finalize the output + output.sort(order=key) + kwargs = dict(usemask=usemask, asrecarray=asrecarray) + return _fix_output(_fix_defaults(output, defaults), **kwargs) + + +def _rec_join_dispatcher( + key, r1, r2, jointype=None, r1postfix=None, r2postfix=None, + defaults=None): + return (r1, r2) + + +@array_function_dispatch(_rec_join_dispatcher) +def rec_join(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', + defaults=None): + """ + Join arrays `r1` and `r2` on keys. + Alternative to join_by, that always returns a np.recarray. + + See Also + -------- + join_by : equivalent function + """ + kwargs = dict(jointype=jointype, r1postfix=r1postfix, r2postfix=r2postfix, + defaults=defaults, usemask=False, asrecarray=True) + return join_by(key, r1, r2, **kwargs) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/scimath.py b/.local/lib/python3.8/site-packages/numpy/lib/scimath.py new file mode 100644 index 0000000..308f132 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/scimath.py @@ -0,0 +1,616 @@ +""" +Wrapper functions to more user-friendly calling of certain math functions +whose output data-type is different than the input data-type in certain +domains of the input. + +For example, for functions like `log` with branch cuts, the versions in this +module provide the mathematically valid answers in the complex plane:: + + >>> import math + >>> np.emath.log(-math.exp(1)) == (1+1j*math.pi) + True + +Similarly, `sqrt`, other base logarithms, `power` and trig functions are +correctly handled. See their respective docstrings for specific examples. + +Functions +--------- + +.. autosummary:: + :toctree: generated/ + + sqrt + log + log2 + logn + log10 + power + arccos + arcsin + arctanh + +""" +import numpy.core.numeric as nx +import numpy.core.numerictypes as nt +from numpy.core.numeric import asarray, any +from numpy.core.overrides import array_function_dispatch +from numpy.lib.type_check import isreal + + +__all__ = [ + 'sqrt', 'log', 'log2', 'logn', 'log10', 'power', 'arccos', 'arcsin', + 'arctanh' + ] + + +_ln2 = nx.log(2.0) + + +def _tocomplex(arr): + """Convert its input `arr` to a complex array. + + The input is returned as a complex array of the smallest type that will fit + the original data: types like single, byte, short, etc. become csingle, + while others become cdouble. + + A copy of the input is always made. + + Parameters + ---------- + arr : array + + Returns + ------- + array + An array with the same input data as the input but in complex form. + + Examples + -------- + + First, consider an input of type short: + + >>> a = np.array([1,2,3],np.short) + + >>> ac = np.lib.scimath._tocomplex(a); ac + array([1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) + + >>> ac.dtype + dtype('complex64') + + If the input is of type double, the output is correspondingly of the + complex double type as well: + + >>> b = np.array([1,2,3],np.double) + + >>> bc = np.lib.scimath._tocomplex(b); bc + array([1.+0.j, 2.+0.j, 3.+0.j]) + + >>> bc.dtype + dtype('complex128') + + Note that even if the input was complex to begin with, a copy is still + made, since the astype() method always copies: + + >>> c = np.array([1,2,3],np.csingle) + + >>> cc = np.lib.scimath._tocomplex(c); cc + array([1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) + + >>> c *= 2; c + array([2.+0.j, 4.+0.j, 6.+0.j], dtype=complex64) + + >>> cc + array([1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) + """ + if issubclass(arr.dtype.type, (nt.single, nt.byte, nt.short, nt.ubyte, + nt.ushort, nt.csingle)): + return arr.astype(nt.csingle) + else: + return arr.astype(nt.cdouble) + + +def _fix_real_lt_zero(x): + """Convert `x` to complex if it has real, negative components. + + Otherwise, output is just the array version of the input (via asarray). + + Parameters + ---------- + x : array_like + + Returns + ------- + array + + Examples + -------- + >>> np.lib.scimath._fix_real_lt_zero([1,2]) + array([1, 2]) + + >>> np.lib.scimath._fix_real_lt_zero([-1,2]) + array([-1.+0.j, 2.+0.j]) + + """ + x = asarray(x) + if any(isreal(x) & (x < 0)): + x = _tocomplex(x) + return x + + +def _fix_int_lt_zero(x): + """Convert `x` to double if it has real, negative components. + + Otherwise, output is just the array version of the input (via asarray). + + Parameters + ---------- + x : array_like + + Returns + ------- + array + + Examples + -------- + >>> np.lib.scimath._fix_int_lt_zero([1,2]) + array([1, 2]) + + >>> np.lib.scimath._fix_int_lt_zero([-1,2]) + array([-1., 2.]) + """ + x = asarray(x) + if any(isreal(x) & (x < 0)): + x = x * 1.0 + return x + + +def _fix_real_abs_gt_1(x): + """Convert `x` to complex if it has real components x_i with abs(x_i)>1. + + Otherwise, output is just the array version of the input (via asarray). + + Parameters + ---------- + x : array_like + + Returns + ------- + array + + Examples + -------- + >>> np.lib.scimath._fix_real_abs_gt_1([0,1]) + array([0, 1]) + + >>> np.lib.scimath._fix_real_abs_gt_1([0,2]) + array([0.+0.j, 2.+0.j]) + """ + x = asarray(x) + if any(isreal(x) & (abs(x) > 1)): + x = _tocomplex(x) + return x + + +def _unary_dispatcher(x): + return (x,) + + +@array_function_dispatch(_unary_dispatcher) +def sqrt(x): + """ + Compute the square root of x. + + For negative input elements, a complex value is returned + (unlike `numpy.sqrt` which returns NaN). + + Parameters + ---------- + x : array_like + The input value(s). + + Returns + ------- + out : ndarray or scalar + The square root of `x`. If `x` was a scalar, so is `out`, + otherwise an array is returned. + + See Also + -------- + numpy.sqrt + + Examples + -------- + For real, non-negative inputs this works just like `numpy.sqrt`: + + >>> np.emath.sqrt(1) + 1.0 + >>> np.emath.sqrt([1, 4]) + array([1., 2.]) + + But it automatically handles negative inputs: + + >>> np.emath.sqrt(-1) + 1j + >>> np.emath.sqrt([-1,4]) + array([0.+1.j, 2.+0.j]) + + """ + x = _fix_real_lt_zero(x) + return nx.sqrt(x) + + +@array_function_dispatch(_unary_dispatcher) +def log(x): + """ + Compute the natural logarithm of `x`. + + Return the "principal value" (for a description of this, see `numpy.log`) + of :math:`log_e(x)`. For real `x > 0`, this is a real number (``log(0)`` + returns ``-inf`` and ``log(np.inf)`` returns ``inf``). Otherwise, the + complex principle value is returned. + + Parameters + ---------- + x : array_like + The value(s) whose log is (are) required. + + Returns + ------- + out : ndarray or scalar + The log of the `x` value(s). If `x` was a scalar, so is `out`, + otherwise an array is returned. + + See Also + -------- + numpy.log + + Notes + ----- + For a log() that returns ``NAN`` when real `x < 0`, use `numpy.log` + (note, however, that otherwise `numpy.log` and this `log` are identical, + i.e., both return ``-inf`` for `x = 0`, ``inf`` for `x = inf`, and, + notably, the complex principle value if ``x.imag != 0``). + + Examples + -------- + >>> np.emath.log(np.exp(1)) + 1.0 + + Negative arguments are handled "correctly" (recall that + ``exp(log(x)) == x`` does *not* hold for real ``x < 0``): + + >>> np.emath.log(-np.exp(1)) == (1 + np.pi * 1j) + True + + """ + x = _fix_real_lt_zero(x) + return nx.log(x) + + +@array_function_dispatch(_unary_dispatcher) +def log10(x): + """ + Compute the logarithm base 10 of `x`. + + Return the "principal value" (for a description of this, see + `numpy.log10`) of :math:`log_{10}(x)`. For real `x > 0`, this + is a real number (``log10(0)`` returns ``-inf`` and ``log10(np.inf)`` + returns ``inf``). Otherwise, the complex principle value is returned. + + Parameters + ---------- + x : array_like or scalar + The value(s) whose log base 10 is (are) required. + + Returns + ------- + out : ndarray or scalar + The log base 10 of the `x` value(s). If `x` was a scalar, so is `out`, + otherwise an array object is returned. + + See Also + -------- + numpy.log10 + + Notes + ----- + For a log10() that returns ``NAN`` when real `x < 0`, use `numpy.log10` + (note, however, that otherwise `numpy.log10` and this `log10` are + identical, i.e., both return ``-inf`` for `x = 0`, ``inf`` for `x = inf`, + and, notably, the complex principle value if ``x.imag != 0``). + + Examples + -------- + + (We set the printing precision so the example can be auto-tested) + + >>> np.set_printoptions(precision=4) + + >>> np.emath.log10(10**1) + 1.0 + + >>> np.emath.log10([-10**1, -10**2, 10**2]) + array([1.+1.3644j, 2.+1.3644j, 2.+0.j ]) + + """ + x = _fix_real_lt_zero(x) + return nx.log10(x) + + +def _logn_dispatcher(n, x): + return (n, x,) + + +@array_function_dispatch(_logn_dispatcher) +def logn(n, x): + """ + Take log base n of x. + + If `x` contains negative inputs, the answer is computed and returned in the + complex domain. + + Parameters + ---------- + n : array_like + The integer base(s) in which the log is taken. + x : array_like + The value(s) whose log base `n` is (are) required. + + Returns + ------- + out : ndarray or scalar + The log base `n` of the `x` value(s). If `x` was a scalar, so is + `out`, otherwise an array is returned. + + Examples + -------- + >>> np.set_printoptions(precision=4) + + >>> np.emath.logn(2, [4, 8]) + array([2., 3.]) + >>> np.emath.logn(2, [-4, -8, 8]) + array([2.+4.5324j, 3.+4.5324j, 3.+0.j ]) + + """ + x = _fix_real_lt_zero(x) + n = _fix_real_lt_zero(n) + return nx.log(x)/nx.log(n) + + +@array_function_dispatch(_unary_dispatcher) +def log2(x): + """ + Compute the logarithm base 2 of `x`. + + Return the "principal value" (for a description of this, see + `numpy.log2`) of :math:`log_2(x)`. For real `x > 0`, this is + a real number (``log2(0)`` returns ``-inf`` and ``log2(np.inf)`` returns + ``inf``). Otherwise, the complex principle value is returned. + + Parameters + ---------- + x : array_like + The value(s) whose log base 2 is (are) required. + + Returns + ------- + out : ndarray or scalar + The log base 2 of the `x` value(s). If `x` was a scalar, so is `out`, + otherwise an array is returned. + + See Also + -------- + numpy.log2 + + Notes + ----- + For a log2() that returns ``NAN`` when real `x < 0`, use `numpy.log2` + (note, however, that otherwise `numpy.log2` and this `log2` are + identical, i.e., both return ``-inf`` for `x = 0`, ``inf`` for `x = inf`, + and, notably, the complex principle value if ``x.imag != 0``). + + Examples + -------- + We set the printing precision so the example can be auto-tested: + + >>> np.set_printoptions(precision=4) + + >>> np.emath.log2(8) + 3.0 + >>> np.emath.log2([-4, -8, 8]) + array([2.+4.5324j, 3.+4.5324j, 3.+0.j ]) + + """ + x = _fix_real_lt_zero(x) + return nx.log2(x) + + +def _power_dispatcher(x, p): + return (x, p) + + +@array_function_dispatch(_power_dispatcher) +def power(x, p): + """ + Return x to the power p, (x**p). + + If `x` contains negative values, the output is converted to the + complex domain. + + Parameters + ---------- + x : array_like + The input value(s). + p : array_like of ints + The power(s) to which `x` is raised. If `x` contains multiple values, + `p` has to either be a scalar, or contain the same number of values + as `x`. In the latter case, the result is + ``x[0]**p[0], x[1]**p[1], ...``. + + Returns + ------- + out : ndarray or scalar + The result of ``x**p``. If `x` and `p` are scalars, so is `out`, + otherwise an array is returned. + + See Also + -------- + numpy.power + + Examples + -------- + >>> np.set_printoptions(precision=4) + + >>> np.emath.power([2, 4], 2) + array([ 4, 16]) + >>> np.emath.power([2, 4], -2) + array([0.25 , 0.0625]) + >>> np.emath.power([-2, 4], 2) + array([ 4.-0.j, 16.+0.j]) + + """ + x = _fix_real_lt_zero(x) + p = _fix_int_lt_zero(p) + return nx.power(x, p) + + +@array_function_dispatch(_unary_dispatcher) +def arccos(x): + """ + Compute the inverse cosine of x. + + Return the "principal value" (for a description of this, see + `numpy.arccos`) of the inverse cosine of `x`. For real `x` such that + `abs(x) <= 1`, this is a real number in the closed interval + :math:`[0, \\pi]`. Otherwise, the complex principle value is returned. + + Parameters + ---------- + x : array_like or scalar + The value(s) whose arccos is (are) required. + + Returns + ------- + out : ndarray or scalar + The inverse cosine(s) of the `x` value(s). If `x` was a scalar, so + is `out`, otherwise an array object is returned. + + See Also + -------- + numpy.arccos + + Notes + ----- + For an arccos() that returns ``NAN`` when real `x` is not in the + interval ``[-1,1]``, use `numpy.arccos`. + + Examples + -------- + >>> np.set_printoptions(precision=4) + + >>> np.emath.arccos(1) # a scalar is returned + 0.0 + + >>> np.emath.arccos([1,2]) + array([0.-0.j , 0.-1.317j]) + + """ + x = _fix_real_abs_gt_1(x) + return nx.arccos(x) + + +@array_function_dispatch(_unary_dispatcher) +def arcsin(x): + """ + Compute the inverse sine of x. + + Return the "principal value" (for a description of this, see + `numpy.arcsin`) of the inverse sine of `x`. For real `x` such that + `abs(x) <= 1`, this is a real number in the closed interval + :math:`[-\\pi/2, \\pi/2]`. Otherwise, the complex principle value is + returned. + + Parameters + ---------- + x : array_like or scalar + The value(s) whose arcsin is (are) required. + + Returns + ------- + out : ndarray or scalar + The inverse sine(s) of the `x` value(s). If `x` was a scalar, so + is `out`, otherwise an array object is returned. + + See Also + -------- + numpy.arcsin + + Notes + ----- + For an arcsin() that returns ``NAN`` when real `x` is not in the + interval ``[-1,1]``, use `numpy.arcsin`. + + Examples + -------- + >>> np.set_printoptions(precision=4) + + >>> np.emath.arcsin(0) + 0.0 + + >>> np.emath.arcsin([0,1]) + array([0. , 1.5708]) + + """ + x = _fix_real_abs_gt_1(x) + return nx.arcsin(x) + + +@array_function_dispatch(_unary_dispatcher) +def arctanh(x): + """ + Compute the inverse hyperbolic tangent of `x`. + + Return the "principal value" (for a description of this, see + `numpy.arctanh`) of ``arctanh(x)``. For real `x` such that + ``abs(x) < 1``, this is a real number. If `abs(x) > 1`, or if `x` is + complex, the result is complex. Finally, `x = 1` returns``inf`` and + ``x=-1`` returns ``-inf``. + + Parameters + ---------- + x : array_like + The value(s) whose arctanh is (are) required. + + Returns + ------- + out : ndarray or scalar + The inverse hyperbolic tangent(s) of the `x` value(s). If `x` was + a scalar so is `out`, otherwise an array is returned. + + + See Also + -------- + numpy.arctanh + + Notes + ----- + For an arctanh() that returns ``NAN`` when real `x` is not in the + interval ``(-1,1)``, use `numpy.arctanh` (this latter, however, does + return +/-inf for ``x = +/-1``). + + Examples + -------- + >>> np.set_printoptions(precision=4) + + >>> from numpy.testing import suppress_warnings + >>> with suppress_warnings() as sup: + ... sup.filter(RuntimeWarning) + ... np.emath.arctanh(np.eye(2)) + array([[inf, 0.], + [ 0., inf]]) + >>> np.emath.arctanh([1j]) + array([0.+0.7854j]) + + """ + x = _fix_real_abs_gt_1(x) + return nx.arctanh(x) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/scimath.pyi b/.local/lib/python3.8/site-packages/numpy/lib/scimath.pyi new file mode 100644 index 0000000..d0d4af4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/scimath.pyi @@ -0,0 +1,13 @@ +from typing import List + +__all__: List[str] + +def sqrt(x): ... +def log(x): ... +def log10(x): ... +def logn(n, x): ... +def log2(x): ... +def power(x, p): ... +def arccos(x): ... +def arcsin(x): ... +def arctanh(x): ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/setup.py b/.local/lib/python3.8/site-packages/numpy/lib/setup.py new file mode 100644 index 0000000..7520b72 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/setup.py @@ -0,0 +1,12 @@ +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + + config = Configuration('lib', parent_package, top_path) + config.add_subpackage('tests') + config.add_data_dir('tests/data') + config.add_data_files('*.pyi') + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/shape_base.py b/.local/lib/python3.8/site-packages/numpy/lib/shape_base.py new file mode 100644 index 0000000..a3fbee3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/shape_base.py @@ -0,0 +1,1260 @@ +import functools + +import numpy.core.numeric as _nx +from numpy.core.numeric import ( + asarray, zeros, outer, concatenate, array, asanyarray + ) +from numpy.core.fromnumeric import reshape, transpose +from numpy.core.multiarray import normalize_axis_index +from numpy.core import overrides +from numpy.core import vstack, atleast_3d +from numpy.core.numeric import normalize_axis_tuple +from numpy.core.shape_base import _arrays_for_stack_dispatcher +from numpy.lib.index_tricks import ndindex +from numpy.matrixlib.defmatrix import matrix # this raises all the right alarm bells + + +__all__ = [ + 'column_stack', 'row_stack', 'dstack', 'array_split', 'split', + 'hsplit', 'vsplit', 'dsplit', 'apply_over_axes', 'expand_dims', + 'apply_along_axis', 'kron', 'tile', 'get_array_wrap', 'take_along_axis', + 'put_along_axis' + ] + + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + + +def _make_along_axis_idx(arr_shape, indices, axis): + # compute dimensions to iterate over + if not _nx.issubdtype(indices.dtype, _nx.integer): + raise IndexError('`indices` must be an integer array') + if len(arr_shape) != indices.ndim: + raise ValueError( + "`indices` and `arr` must have the same number of dimensions") + shape_ones = (1,) * indices.ndim + dest_dims = list(range(axis)) + [None] + list(range(axis+1, indices.ndim)) + + # build a fancy index, consisting of orthogonal aranges, with the + # requested index inserted at the right location + fancy_index = [] + for dim, n in zip(dest_dims, arr_shape): + if dim is None: + fancy_index.append(indices) + else: + ind_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:] + fancy_index.append(_nx.arange(n).reshape(ind_shape)) + + return tuple(fancy_index) + + +def _take_along_axis_dispatcher(arr, indices, axis): + return (arr, indices) + + +@array_function_dispatch(_take_along_axis_dispatcher) +def take_along_axis(arr, indices, axis): + """ + Take values from the input array by matching 1d index and data slices. + + This iterates over matching 1d slices oriented along the specified axis in + the index and data arrays, and uses the former to look up values in the + latter. These slices can be different lengths. + + Functions returning an index along an axis, like `argsort` and + `argpartition`, produce suitable indices for this function. + + .. versionadded:: 1.15.0 + + Parameters + ---------- + arr : ndarray (Ni..., M, Nk...) + Source array + indices : ndarray (Ni..., J, Nk...) + Indices to take along each 1d slice of `arr`. This must match the + dimension of arr, but dimensions Ni and Nj only need to broadcast + against `arr`. + axis : int + The axis to take 1d slices along. If axis is None, the input array is + treated as if it had first been flattened to 1d, for consistency with + `sort` and `argsort`. + + Returns + ------- + out: ndarray (Ni..., J, Nk...) + The indexed result. + + Notes + ----- + This is equivalent to (but faster than) the following use of `ndindex` and + `s_`, which sets each of ``ii`` and ``kk`` to a tuple of indices:: + + Ni, M, Nk = a.shape[:axis], a.shape[axis], a.shape[axis+1:] + J = indices.shape[axis] # Need not equal M + out = np.empty(Ni + (J,) + Nk) + + for ii in ndindex(Ni): + for kk in ndindex(Nk): + a_1d = a [ii + s_[:,] + kk] + indices_1d = indices[ii + s_[:,] + kk] + out_1d = out [ii + s_[:,] + kk] + for j in range(J): + out_1d[j] = a_1d[indices_1d[j]] + + Equivalently, eliminating the inner loop, the last two lines would be:: + + out_1d[:] = a_1d[indices_1d] + + See Also + -------- + take : Take along an axis, using the same indices for every 1d slice + put_along_axis : + Put values into the destination array by matching 1d index and data slices + + Examples + -------- + + For this sample array + + >>> a = np.array([[10, 30, 20], [60, 40, 50]]) + + We can sort either by using sort directly, or argsort and this function + + >>> np.sort(a, axis=1) + array([[10, 20, 30], + [40, 50, 60]]) + >>> ai = np.argsort(a, axis=1); ai + array([[0, 2, 1], + [1, 2, 0]]) + >>> np.take_along_axis(a, ai, axis=1) + array([[10, 20, 30], + [40, 50, 60]]) + + The same works for max and min, if you expand the dimensions: + + >>> np.expand_dims(np.max(a, axis=1), axis=1) + array([[30], + [60]]) + >>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1) + >>> ai + array([[1], + [0]]) + >>> np.take_along_axis(a, ai, axis=1) + array([[30], + [60]]) + + If we want to get the max and min at the same time, we can stack the + indices first + + >>> ai_min = np.expand_dims(np.argmin(a, axis=1), axis=1) + >>> ai_max = np.expand_dims(np.argmax(a, axis=1), axis=1) + >>> ai = np.concatenate([ai_min, ai_max], axis=1) + >>> ai + array([[0, 1], + [1, 0]]) + >>> np.take_along_axis(a, ai, axis=1) + array([[10, 30], + [40, 60]]) + """ + # normalize inputs + if axis is None: + arr = arr.flat + arr_shape = (len(arr),) # flatiter has no .shape + axis = 0 + else: + axis = normalize_axis_index(axis, arr.ndim) + arr_shape = arr.shape + + # use the fancy index + return arr[_make_along_axis_idx(arr_shape, indices, axis)] + + +def _put_along_axis_dispatcher(arr, indices, values, axis): + return (arr, indices, values) + + +@array_function_dispatch(_put_along_axis_dispatcher) +def put_along_axis(arr, indices, values, axis): + """ + Put values into the destination array by matching 1d index and data slices. + + This iterates over matching 1d slices oriented along the specified axis in + the index and data arrays, and uses the former to place values into the + latter. These slices can be different lengths. + + Functions returning an index along an axis, like `argsort` and + `argpartition`, produce suitable indices for this function. + + .. versionadded:: 1.15.0 + + Parameters + ---------- + arr : ndarray (Ni..., M, Nk...) + Destination array. + indices : ndarray (Ni..., J, Nk...) + Indices to change along each 1d slice of `arr`. This must match the + dimension of arr, but dimensions in Ni and Nj may be 1 to broadcast + against `arr`. + values : array_like (Ni..., J, Nk...) + values to insert at those indices. Its shape and dimension are + broadcast to match that of `indices`. + axis : int + The axis to take 1d slices along. If axis is None, the destination + array is treated as if a flattened 1d view had been created of it. + + Notes + ----- + This is equivalent to (but faster than) the following use of `ndindex` and + `s_`, which sets each of ``ii`` and ``kk`` to a tuple of indices:: + + Ni, M, Nk = a.shape[:axis], a.shape[axis], a.shape[axis+1:] + J = indices.shape[axis] # Need not equal M + + for ii in ndindex(Ni): + for kk in ndindex(Nk): + a_1d = a [ii + s_[:,] + kk] + indices_1d = indices[ii + s_[:,] + kk] + values_1d = values [ii + s_[:,] + kk] + for j in range(J): + a_1d[indices_1d[j]] = values_1d[j] + + Equivalently, eliminating the inner loop, the last two lines would be:: + + a_1d[indices_1d] = values_1d + + See Also + -------- + take_along_axis : + Take values from the input array by matching 1d index and data slices + + Examples + -------- + + For this sample array + + >>> a = np.array([[10, 30, 20], [60, 40, 50]]) + + We can replace the maximum values with: + + >>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1) + >>> ai + array([[1], + [0]]) + >>> np.put_along_axis(a, ai, 99, axis=1) + >>> a + array([[10, 99, 20], + [99, 40, 50]]) + + """ + # normalize inputs + if axis is None: + arr = arr.flat + axis = 0 + arr_shape = (len(arr),) # flatiter has no .shape + else: + axis = normalize_axis_index(axis, arr.ndim) + arr_shape = arr.shape + + # use the fancy index + arr[_make_along_axis_idx(arr_shape, indices, axis)] = values + + +def _apply_along_axis_dispatcher(func1d, axis, arr, *args, **kwargs): + return (arr,) + + +@array_function_dispatch(_apply_along_axis_dispatcher) +def apply_along_axis(func1d, axis, arr, *args, **kwargs): + """ + Apply a function to 1-D slices along the given axis. + + Execute `func1d(a, *args, **kwargs)` where `func1d` operates on 1-D arrays + and `a` is a 1-D slice of `arr` along `axis`. + + This is equivalent to (but faster than) the following use of `ndindex` and + `s_`, which sets each of ``ii``, ``jj``, and ``kk`` to a tuple of indices:: + + Ni, Nk = a.shape[:axis], a.shape[axis+1:] + for ii in ndindex(Ni): + for kk in ndindex(Nk): + f = func1d(arr[ii + s_[:,] + kk]) + Nj = f.shape + for jj in ndindex(Nj): + out[ii + jj + kk] = f[jj] + + Equivalently, eliminating the inner loop, this can be expressed as:: + + Ni, Nk = a.shape[:axis], a.shape[axis+1:] + for ii in ndindex(Ni): + for kk in ndindex(Nk): + out[ii + s_[...,] + kk] = func1d(arr[ii + s_[:,] + kk]) + + Parameters + ---------- + func1d : function (M,) -> (Nj...) + This function should accept 1-D arrays. It is applied to 1-D + slices of `arr` along the specified axis. + axis : integer + Axis along which `arr` is sliced. + arr : ndarray (Ni..., M, Nk...) + Input array. + args : any + Additional arguments to `func1d`. + kwargs : any + Additional named arguments to `func1d`. + + .. versionadded:: 1.9.0 + + + Returns + ------- + out : ndarray (Ni..., Nj..., Nk...) + The output array. The shape of `out` is identical to the shape of + `arr`, except along the `axis` dimension. This axis is removed, and + replaced with new dimensions equal to the shape of the return value + of `func1d`. So if `func1d` returns a scalar `out` will have one + fewer dimensions than `arr`. + + See Also + -------- + apply_over_axes : Apply a function repeatedly over multiple axes. + + Examples + -------- + >>> def my_func(a): + ... \"\"\"Average first and last element of a 1-D array\"\"\" + ... return (a[0] + a[-1]) * 0.5 + >>> b = np.array([[1,2,3], [4,5,6], [7,8,9]]) + >>> np.apply_along_axis(my_func, 0, b) + array([4., 5., 6.]) + >>> np.apply_along_axis(my_func, 1, b) + array([2., 5., 8.]) + + For a function that returns a 1D array, the number of dimensions in + `outarr` is the same as `arr`. + + >>> b = np.array([[8,1,7], [4,3,9], [5,2,6]]) + >>> np.apply_along_axis(sorted, 1, b) + array([[1, 7, 8], + [3, 4, 9], + [2, 5, 6]]) + + For a function that returns a higher dimensional array, those dimensions + are inserted in place of the `axis` dimension. + + >>> b = np.array([[1,2,3], [4,5,6], [7,8,9]]) + >>> np.apply_along_axis(np.diag, -1, b) + array([[[1, 0, 0], + [0, 2, 0], + [0, 0, 3]], + [[4, 0, 0], + [0, 5, 0], + [0, 0, 6]], + [[7, 0, 0], + [0, 8, 0], + [0, 0, 9]]]) + """ + # handle negative axes + arr = asanyarray(arr) + nd = arr.ndim + axis = normalize_axis_index(axis, nd) + + # arr, with the iteration axis at the end + in_dims = list(range(nd)) + inarr_view = transpose(arr, in_dims[:axis] + in_dims[axis+1:] + [axis]) + + # compute indices for the iteration axes, and append a trailing ellipsis to + # prevent 0d arrays decaying to scalars, which fixes gh-8642 + inds = ndindex(inarr_view.shape[:-1]) + inds = (ind + (Ellipsis,) for ind in inds) + + # invoke the function on the first item + try: + ind0 = next(inds) + except StopIteration as e: + raise ValueError( + 'Cannot apply_along_axis when any iteration dimensions are 0' + ) from None + res = asanyarray(func1d(inarr_view[ind0], *args, **kwargs)) + + # build a buffer for storing evaluations of func1d. + # remove the requested axis, and add the new ones on the end. + # laid out so that each write is contiguous. + # for a tuple index inds, buff[inds] = func1d(inarr_view[inds]) + buff = zeros(inarr_view.shape[:-1] + res.shape, res.dtype) + + # permutation of axes such that out = buff.transpose(buff_permute) + buff_dims = list(range(buff.ndim)) + buff_permute = ( + buff_dims[0 : axis] + + buff_dims[buff.ndim-res.ndim : buff.ndim] + + buff_dims[axis : buff.ndim-res.ndim] + ) + + # matrices have a nasty __array_prepare__ and __array_wrap__ + if not isinstance(res, matrix): + buff = res.__array_prepare__(buff) + + # save the first result, then compute and save all remaining results + buff[ind0] = res + for ind in inds: + buff[ind] = asanyarray(func1d(inarr_view[ind], *args, **kwargs)) + + if not isinstance(res, matrix): + # wrap the array, to preserve subclasses + buff = res.__array_wrap__(buff) + + # finally, rotate the inserted axes back to where they belong + return transpose(buff, buff_permute) + + else: + # matrices have to be transposed first, because they collapse dimensions! + out_arr = transpose(buff, buff_permute) + return res.__array_wrap__(out_arr) + + +def _apply_over_axes_dispatcher(func, a, axes): + return (a,) + + +@array_function_dispatch(_apply_over_axes_dispatcher) +def apply_over_axes(func, a, axes): + """ + Apply a function repeatedly over multiple axes. + + `func` is called as `res = func(a, axis)`, where `axis` is the first + element of `axes`. The result `res` of the function call must have + either the same dimensions as `a` or one less dimension. If `res` + has one less dimension than `a`, a dimension is inserted before + `axis`. The call to `func` is then repeated for each axis in `axes`, + with `res` as the first argument. + + Parameters + ---------- + func : function + This function must take two arguments, `func(a, axis)`. + a : array_like + Input array. + axes : array_like + Axes over which `func` is applied; the elements must be integers. + + Returns + ------- + apply_over_axis : ndarray + The output array. The number of dimensions is the same as `a`, + but the shape can be different. This depends on whether `func` + changes the shape of its output with respect to its input. + + See Also + -------- + apply_along_axis : + Apply a function to 1-D slices of an array along the given axis. + + Notes + ----- + This function is equivalent to tuple axis arguments to reorderable ufuncs + with keepdims=True. Tuple axis arguments to ufuncs have been available since + version 1.7.0. + + Examples + -------- + >>> a = np.arange(24).reshape(2,3,4) + >>> a + array([[[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]], + [[12, 13, 14, 15], + [16, 17, 18, 19], + [20, 21, 22, 23]]]) + + Sum over axes 0 and 2. The result has same number of dimensions + as the original array: + + >>> np.apply_over_axes(np.sum, a, [0,2]) + array([[[ 60], + [ 92], + [124]]]) + + Tuple axis arguments to ufuncs are equivalent: + + >>> np.sum(a, axis=(0,2), keepdims=True) + array([[[ 60], + [ 92], + [124]]]) + + """ + val = asarray(a) + N = a.ndim + if array(axes).ndim == 0: + axes = (axes,) + for axis in axes: + if axis < 0: + axis = N + axis + args = (val, axis) + res = func(*args) + if res.ndim == val.ndim: + val = res + else: + res = expand_dims(res, axis) + if res.ndim == val.ndim: + val = res + else: + raise ValueError("function is not returning " + "an array of the correct shape") + return val + + +def _expand_dims_dispatcher(a, axis): + return (a,) + + +@array_function_dispatch(_expand_dims_dispatcher) +def expand_dims(a, axis): + """ + Expand the shape of an array. + + Insert a new axis that will appear at the `axis` position in the expanded + array shape. + + Parameters + ---------- + a : array_like + Input array. + axis : int or tuple of ints + Position in the expanded axes where the new axis (or axes) is placed. + + .. deprecated:: 1.13.0 + Passing an axis where ``axis > a.ndim`` will be treated as + ``axis == a.ndim``, and passing ``axis < -a.ndim - 1`` will + be treated as ``axis == 0``. This behavior is deprecated. + + .. versionchanged:: 1.18.0 + A tuple of axes is now supported. Out of range axes as + described above are now forbidden and raise an `AxisError`. + + Returns + ------- + result : ndarray + View of `a` with the number of dimensions increased. + + See Also + -------- + squeeze : The inverse operation, removing singleton dimensions + reshape : Insert, remove, and combine dimensions, and resize existing ones + doc.indexing, atleast_1d, atleast_2d, atleast_3d + + Examples + -------- + >>> x = np.array([1, 2]) + >>> x.shape + (2,) + + The following is equivalent to ``x[np.newaxis, :]`` or ``x[np.newaxis]``: + + >>> y = np.expand_dims(x, axis=0) + >>> y + array([[1, 2]]) + >>> y.shape + (1, 2) + + The following is equivalent to ``x[:, np.newaxis]``: + + >>> y = np.expand_dims(x, axis=1) + >>> y + array([[1], + [2]]) + >>> y.shape + (2, 1) + + ``axis`` may also be a tuple: + + >>> y = np.expand_dims(x, axis=(0, 1)) + >>> y + array([[[1, 2]]]) + + >>> y = np.expand_dims(x, axis=(2, 0)) + >>> y + array([[[1], + [2]]]) + + Note that some examples may use ``None`` instead of ``np.newaxis``. These + are the same objects: + + >>> np.newaxis is None + True + + """ + if isinstance(a, matrix): + a = asarray(a) + else: + a = asanyarray(a) + + if type(axis) not in (tuple, list): + axis = (axis,) + + out_ndim = len(axis) + a.ndim + axis = normalize_axis_tuple(axis, out_ndim) + + shape_it = iter(a.shape) + shape = [1 if ax in axis else next(shape_it) for ax in range(out_ndim)] + + return a.reshape(shape) + + +row_stack = vstack + + +def _column_stack_dispatcher(tup): + return _arrays_for_stack_dispatcher(tup) + + +@array_function_dispatch(_column_stack_dispatcher) +def column_stack(tup): + """ + Stack 1-D arrays as columns into a 2-D array. + + Take a sequence of 1-D arrays and stack them as columns + to make a single 2-D array. 2-D arrays are stacked as-is, + just like with `hstack`. 1-D arrays are turned into 2-D columns + first. + + Parameters + ---------- + tup : sequence of 1-D or 2-D arrays. + Arrays to stack. All of them must have the same first dimension. + + Returns + ------- + stacked : 2-D array + The array formed by stacking the given arrays. + + See Also + -------- + stack, hstack, vstack, concatenate + + Examples + -------- + >>> a = np.array((1,2,3)) + >>> b = np.array((2,3,4)) + >>> np.column_stack((a,b)) + array([[1, 2], + [2, 3], + [3, 4]]) + + """ + if not overrides.ARRAY_FUNCTION_ENABLED: + # raise warning if necessary + _arrays_for_stack_dispatcher(tup, stacklevel=2) + + arrays = [] + for v in tup: + arr = asanyarray(v) + if arr.ndim < 2: + arr = array(arr, copy=False, subok=True, ndmin=2).T + arrays.append(arr) + return _nx.concatenate(arrays, 1) + + +def _dstack_dispatcher(tup): + return _arrays_for_stack_dispatcher(tup) + + +@array_function_dispatch(_dstack_dispatcher) +def dstack(tup): + """ + Stack arrays in sequence depth wise (along third axis). + + This is equivalent to concatenation along the third axis after 2-D arrays + of shape `(M,N)` have been reshaped to `(M,N,1)` and 1-D arrays of shape + `(N,)` have been reshaped to `(1,N,1)`. Rebuilds arrays divided by + `dsplit`. + + This function makes most sense for arrays with up to 3 dimensions. For + instance, for pixel-data with a height (first axis), width (second axis), + and r/g/b channels (third axis). The functions `concatenate`, `stack` and + `block` provide more general stacking and concatenation operations. + + Parameters + ---------- + tup : sequence of arrays + The arrays must have the same shape along all but the third axis. + 1-D or 2-D arrays must have the same shape. + + Returns + ------- + stacked : ndarray + The array formed by stacking the given arrays, will be at least 3-D. + + See Also + -------- + concatenate : Join a sequence of arrays along an existing axis. + stack : Join a sequence of arrays along a new axis. + block : Assemble an nd-array from nested lists of blocks. + vstack : Stack arrays in sequence vertically (row wise). + hstack : Stack arrays in sequence horizontally (column wise). + column_stack : Stack 1-D arrays as columns into a 2-D array. + dsplit : Split array along third axis. + + Examples + -------- + >>> a = np.array((1,2,3)) + >>> b = np.array((2,3,4)) + >>> np.dstack((a,b)) + array([[[1, 2], + [2, 3], + [3, 4]]]) + + >>> a = np.array([[1],[2],[3]]) + >>> b = np.array([[2],[3],[4]]) + >>> np.dstack((a,b)) + array([[[1, 2]], + [[2, 3]], + [[3, 4]]]) + + """ + if not overrides.ARRAY_FUNCTION_ENABLED: + # raise warning if necessary + _arrays_for_stack_dispatcher(tup, stacklevel=2) + + arrs = atleast_3d(*tup) + if not isinstance(arrs, list): + arrs = [arrs] + return _nx.concatenate(arrs, 2) + + +def _replace_zero_by_x_arrays(sub_arys): + for i in range(len(sub_arys)): + if _nx.ndim(sub_arys[i]) == 0: + sub_arys[i] = _nx.empty(0, dtype=sub_arys[i].dtype) + elif _nx.sometrue(_nx.equal(_nx.shape(sub_arys[i]), 0)): + sub_arys[i] = _nx.empty(0, dtype=sub_arys[i].dtype) + return sub_arys + + +def _array_split_dispatcher(ary, indices_or_sections, axis=None): + return (ary, indices_or_sections) + + +@array_function_dispatch(_array_split_dispatcher) +def array_split(ary, indices_or_sections, axis=0): + """ + Split an array into multiple sub-arrays. + + Please refer to the ``split`` documentation. The only difference + between these functions is that ``array_split`` allows + `indices_or_sections` to be an integer that does *not* equally + divide the axis. For an array of length l that should be split + into n sections, it returns l % n sub-arrays of size l//n + 1 + and the rest of size l//n. + + See Also + -------- + split : Split array into multiple sub-arrays of equal size. + + Examples + -------- + >>> x = np.arange(8.0) + >>> np.array_split(x, 3) + [array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7.])] + + >>> x = np.arange(9) + >>> np.array_split(x, 4) + [array([0, 1, 2]), array([3, 4]), array([5, 6]), array([7, 8])] + + """ + try: + Ntotal = ary.shape[axis] + except AttributeError: + Ntotal = len(ary) + try: + # handle array case. + Nsections = len(indices_or_sections) + 1 + div_points = [0] + list(indices_or_sections) + [Ntotal] + except TypeError: + # indices_or_sections is a scalar, not an array. + Nsections = int(indices_or_sections) + if Nsections <= 0: + raise ValueError('number sections must be larger than 0.') from None + Neach_section, extras = divmod(Ntotal, Nsections) + section_sizes = ([0] + + extras * [Neach_section+1] + + (Nsections-extras) * [Neach_section]) + div_points = _nx.array(section_sizes, dtype=_nx.intp).cumsum() + + sub_arys = [] + sary = _nx.swapaxes(ary, axis, 0) + for i in range(Nsections): + st = div_points[i] + end = div_points[i + 1] + sub_arys.append(_nx.swapaxes(sary[st:end], axis, 0)) + + return sub_arys + + +def _split_dispatcher(ary, indices_or_sections, axis=None): + return (ary, indices_or_sections) + + +@array_function_dispatch(_split_dispatcher) +def split(ary, indices_or_sections, axis=0): + """ + Split an array into multiple sub-arrays as views into `ary`. + + Parameters + ---------- + ary : ndarray + Array to be divided into sub-arrays. + indices_or_sections : int or 1-D array + If `indices_or_sections` is an integer, N, the array will be divided + into N equal arrays along `axis`. If such a split is not possible, + an error is raised. + + If `indices_or_sections` is a 1-D array of sorted integers, the entries + indicate where along `axis` the array is split. For example, + ``[2, 3]`` would, for ``axis=0``, result in + + - ary[:2] + - ary[2:3] + - ary[3:] + + If an index exceeds the dimension of the array along `axis`, + an empty sub-array is returned correspondingly. + axis : int, optional + The axis along which to split, default is 0. + + Returns + ------- + sub-arrays : list of ndarrays + A list of sub-arrays as views into `ary`. + + Raises + ------ + ValueError + If `indices_or_sections` is given as an integer, but + a split does not result in equal division. + + See Also + -------- + array_split : Split an array into multiple sub-arrays of equal or + near-equal size. Does not raise an exception if + an equal division cannot be made. + hsplit : Split array into multiple sub-arrays horizontally (column-wise). + vsplit : Split array into multiple sub-arrays vertically (row wise). + dsplit : Split array into multiple sub-arrays along the 3rd axis (depth). + concatenate : Join a sequence of arrays along an existing axis. + stack : Join a sequence of arrays along a new axis. + hstack : Stack arrays in sequence horizontally (column wise). + vstack : Stack arrays in sequence vertically (row wise). + dstack : Stack arrays in sequence depth wise (along third dimension). + + Examples + -------- + >>> x = np.arange(9.0) + >>> np.split(x, 3) + [array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7., 8.])] + + >>> x = np.arange(8.0) + >>> np.split(x, [3, 5, 6, 10]) + [array([0., 1., 2.]), + array([3., 4.]), + array([5.]), + array([6., 7.]), + array([], dtype=float64)] + + """ + try: + len(indices_or_sections) + except TypeError: + sections = indices_or_sections + N = ary.shape[axis] + if N % sections: + raise ValueError( + 'array split does not result in an equal division') from None + return array_split(ary, indices_or_sections, axis) + + +def _hvdsplit_dispatcher(ary, indices_or_sections): + return (ary, indices_or_sections) + + +@array_function_dispatch(_hvdsplit_dispatcher) +def hsplit(ary, indices_or_sections): + """ + Split an array into multiple sub-arrays horizontally (column-wise). + + Please refer to the `split` documentation. `hsplit` is equivalent + to `split` with ``axis=1``, the array is always split along the second + axis regardless of the array dimension. + + See Also + -------- + split : Split an array into multiple sub-arrays of equal size. + + Examples + -------- + >>> x = np.arange(16.0).reshape(4, 4) + >>> x + array([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.], + [12., 13., 14., 15.]]) + >>> np.hsplit(x, 2) + [array([[ 0., 1.], + [ 4., 5.], + [ 8., 9.], + [12., 13.]]), + array([[ 2., 3.], + [ 6., 7.], + [10., 11.], + [14., 15.]])] + >>> np.hsplit(x, np.array([3, 6])) + [array([[ 0., 1., 2.], + [ 4., 5., 6.], + [ 8., 9., 10.], + [12., 13., 14.]]), + array([[ 3.], + [ 7.], + [11.], + [15.]]), + array([], shape=(4, 0), dtype=float64)] + + With a higher dimensional array the split is still along the second axis. + + >>> x = np.arange(8.0).reshape(2, 2, 2) + >>> x + array([[[0., 1.], + [2., 3.]], + [[4., 5.], + [6., 7.]]]) + >>> np.hsplit(x, 2) + [array([[[0., 1.]], + [[4., 5.]]]), + array([[[2., 3.]], + [[6., 7.]]])] + + """ + if _nx.ndim(ary) == 0: + raise ValueError('hsplit only works on arrays of 1 or more dimensions') + if ary.ndim > 1: + return split(ary, indices_or_sections, 1) + else: + return split(ary, indices_or_sections, 0) + + +@array_function_dispatch(_hvdsplit_dispatcher) +def vsplit(ary, indices_or_sections): + """ + Split an array into multiple sub-arrays vertically (row-wise). + + Please refer to the ``split`` documentation. ``vsplit`` is equivalent + to ``split`` with `axis=0` (default), the array is always split along the + first axis regardless of the array dimension. + + See Also + -------- + split : Split an array into multiple sub-arrays of equal size. + + Examples + -------- + >>> x = np.arange(16.0).reshape(4, 4) + >>> x + array([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.], + [12., 13., 14., 15.]]) + >>> np.vsplit(x, 2) + [array([[0., 1., 2., 3.], + [4., 5., 6., 7.]]), array([[ 8., 9., 10., 11.], + [12., 13., 14., 15.]])] + >>> np.vsplit(x, np.array([3, 6])) + [array([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.]]), array([[12., 13., 14., 15.]]), array([], shape=(0, 4), dtype=float64)] + + With a higher dimensional array the split is still along the first axis. + + >>> x = np.arange(8.0).reshape(2, 2, 2) + >>> x + array([[[0., 1.], + [2., 3.]], + [[4., 5.], + [6., 7.]]]) + >>> np.vsplit(x, 2) + [array([[[0., 1.], + [2., 3.]]]), array([[[4., 5.], + [6., 7.]]])] + + """ + if _nx.ndim(ary) < 2: + raise ValueError('vsplit only works on arrays of 2 or more dimensions') + return split(ary, indices_or_sections, 0) + + +@array_function_dispatch(_hvdsplit_dispatcher) +def dsplit(ary, indices_or_sections): + """ + Split array into multiple sub-arrays along the 3rd axis (depth). + + Please refer to the `split` documentation. `dsplit` is equivalent + to `split` with ``axis=2``, the array is always split along the third + axis provided the array dimension is greater than or equal to 3. + + See Also + -------- + split : Split an array into multiple sub-arrays of equal size. + + Examples + -------- + >>> x = np.arange(16.0).reshape(2, 2, 4) + >>> x + array([[[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.]], + [[ 8., 9., 10., 11.], + [12., 13., 14., 15.]]]) + >>> np.dsplit(x, 2) + [array([[[ 0., 1.], + [ 4., 5.]], + [[ 8., 9.], + [12., 13.]]]), array([[[ 2., 3.], + [ 6., 7.]], + [[10., 11.], + [14., 15.]]])] + >>> np.dsplit(x, np.array([3, 6])) + [array([[[ 0., 1., 2.], + [ 4., 5., 6.]], + [[ 8., 9., 10.], + [12., 13., 14.]]]), + array([[[ 3.], + [ 7.]], + [[11.], + [15.]]]), + array([], shape=(2, 2, 0), dtype=float64)] + """ + if _nx.ndim(ary) < 3: + raise ValueError('dsplit only works on arrays of 3 or more dimensions') + return split(ary, indices_or_sections, 2) + +def get_array_prepare(*args): + """Find the wrapper for the array with the highest priority. + + In case of ties, leftmost wins. If no wrapper is found, return None + """ + wrappers = sorted((getattr(x, '__array_priority__', 0), -i, + x.__array_prepare__) for i, x in enumerate(args) + if hasattr(x, '__array_prepare__')) + if wrappers: + return wrappers[-1][-1] + return None + +def get_array_wrap(*args): + """Find the wrapper for the array with the highest priority. + + In case of ties, leftmost wins. If no wrapper is found, return None + """ + wrappers = sorted((getattr(x, '__array_priority__', 0), -i, + x.__array_wrap__) for i, x in enumerate(args) + if hasattr(x, '__array_wrap__')) + if wrappers: + return wrappers[-1][-1] + return None + + +def _kron_dispatcher(a, b): + return (a, b) + + +@array_function_dispatch(_kron_dispatcher) +def kron(a, b): + """ + Kronecker product of two arrays. + + Computes the Kronecker product, a composite array made of blocks of the + second array scaled by the first. + + Parameters + ---------- + a, b : array_like + + Returns + ------- + out : ndarray + + See Also + -------- + outer : The outer product + + Notes + ----- + The function assumes that the number of dimensions of `a` and `b` + are the same, if necessary prepending the smallest with ones. + If ``a.shape = (r0,r1,..,rN)`` and ``b.shape = (s0,s1,...,sN)``, + the Kronecker product has shape ``(r0*s0, r1*s1, ..., rN*SN)``. + The elements are products of elements from `a` and `b`, organized + explicitly by:: + + kron(a,b)[k0,k1,...,kN] = a[i0,i1,...,iN] * b[j0,j1,...,jN] + + where:: + + kt = it * st + jt, t = 0,...,N + + In the common 2-D case (N=1), the block structure can be visualized:: + + [[ a[0,0]*b, a[0,1]*b, ... , a[0,-1]*b ], + [ ... ... ], + [ a[-1,0]*b, a[-1,1]*b, ... , a[-1,-1]*b ]] + + + Examples + -------- + >>> np.kron([1,10,100], [5,6,7]) + array([ 5, 6, 7, ..., 500, 600, 700]) + >>> np.kron([5,6,7], [1,10,100]) + array([ 5, 50, 500, ..., 7, 70, 700]) + + >>> np.kron(np.eye(2), np.ones((2,2))) + array([[1., 1., 0., 0.], + [1., 1., 0., 0.], + [0., 0., 1., 1.], + [0., 0., 1., 1.]]) + + >>> a = np.arange(100).reshape((2,5,2,5)) + >>> b = np.arange(24).reshape((2,3,4)) + >>> c = np.kron(a,b) + >>> c.shape + (2, 10, 6, 20) + >>> I = (1,3,0,2) + >>> J = (0,2,1) + >>> J1 = (0,) + J # extend to ndim=4 + >>> S1 = (1,) + b.shape + >>> K = tuple(np.array(I) * np.array(S1) + np.array(J1)) + >>> c[K] == a[I]*b[J] + True + + """ + b = asanyarray(b) + a = array(a, copy=False, subok=True, ndmin=b.ndim) + ndb, nda = b.ndim, a.ndim + if (nda == 0 or ndb == 0): + return _nx.multiply(a, b) + as_ = a.shape + bs = b.shape + if not a.flags.contiguous: + a = reshape(a, as_) + if not b.flags.contiguous: + b = reshape(b, bs) + nd = ndb + if (ndb != nda): + if (ndb > nda): + as_ = (1,)*(ndb-nda) + as_ + else: + bs = (1,)*(nda-ndb) + bs + nd = nda + result = outer(a, b).reshape(as_+bs) + axis = nd-1 + for _ in range(nd): + result = concatenate(result, axis=axis) + wrapper = get_array_prepare(a, b) + if wrapper is not None: + result = wrapper(result) + wrapper = get_array_wrap(a, b) + if wrapper is not None: + result = wrapper(result) + return result + + +def _tile_dispatcher(A, reps): + return (A, reps) + + +@array_function_dispatch(_tile_dispatcher) +def tile(A, reps): + """ + Construct an array by repeating A the number of times given by reps. + + If `reps` has length ``d``, the result will have dimension of + ``max(d, A.ndim)``. + + If ``A.ndim < d``, `A` is promoted to be d-dimensional by prepending new + axes. So a shape (3,) array is promoted to (1, 3) for 2-D replication, + or shape (1, 1, 3) for 3-D replication. If this is not the desired + behavior, promote `A` to d-dimensions manually before calling this + function. + + If ``A.ndim > d``, `reps` is promoted to `A`.ndim by pre-pending 1's to it. + Thus for an `A` of shape (2, 3, 4, 5), a `reps` of (2, 2) is treated as + (1, 1, 2, 2). + + Note : Although tile may be used for broadcasting, it is strongly + recommended to use numpy's broadcasting operations and functions. + + Parameters + ---------- + A : array_like + The input array. + reps : array_like + The number of repetitions of `A` along each axis. + + Returns + ------- + c : ndarray + The tiled output array. + + See Also + -------- + repeat : Repeat elements of an array. + broadcast_to : Broadcast an array to a new shape + + Examples + -------- + >>> a = np.array([0, 1, 2]) + >>> np.tile(a, 2) + array([0, 1, 2, 0, 1, 2]) + >>> np.tile(a, (2, 2)) + array([[0, 1, 2, 0, 1, 2], + [0, 1, 2, 0, 1, 2]]) + >>> np.tile(a, (2, 1, 2)) + array([[[0, 1, 2, 0, 1, 2]], + [[0, 1, 2, 0, 1, 2]]]) + + >>> b = np.array([[1, 2], [3, 4]]) + >>> np.tile(b, 2) + array([[1, 2, 1, 2], + [3, 4, 3, 4]]) + >>> np.tile(b, (2, 1)) + array([[1, 2], + [3, 4], + [1, 2], + [3, 4]]) + + >>> c = np.array([1,2,3,4]) + >>> np.tile(c,(4,1)) + array([[1, 2, 3, 4], + [1, 2, 3, 4], + [1, 2, 3, 4], + [1, 2, 3, 4]]) + """ + try: + tup = tuple(reps) + except TypeError: + tup = (reps,) + d = len(tup) + if all(x == 1 for x in tup) and isinstance(A, _nx.ndarray): + # Fixes the problem that the function does not make a copy if A is a + # numpy array and the repetitions are 1 in all dimensions + return _nx.array(A, copy=True, subok=True, ndmin=d) + else: + # Note that no copy of zero-sized arrays is made. However since they + # have no data there is no risk of an inadvertent overwrite. + c = _nx.array(A, copy=False, subok=True, ndmin=d) + if (d < c.ndim): + tup = (1,)*(c.ndim-d) + tup + shape_out = tuple(s*t for s, t in zip(c.shape, tup)) + n = c.size + if n > 0: + for dim_in, nrep in zip(c.shape, tup): + if nrep != 1: + c = c.reshape(-1, n).repeat(nrep, 0) + n //= dim_in + return c.reshape(shape_out) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/shape_base.pyi b/.local/lib/python3.8/site-packages/numpy/lib/shape_base.pyi new file mode 100644 index 0000000..17016c9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/shape_base.pyi @@ -0,0 +1,216 @@ +from typing import List, TypeVar, Callable, Sequence, Any, overload, Tuple, SupportsIndex, Protocol + +from numpy import ( + generic, + integer, + dtype, + ufunc, + bool_, + unsignedinteger, + signedinteger, + floating, + complexfloating, + object_, +) + +from numpy.typing import ( + ArrayLike, + NDArray, + _ShapeLike, + _FiniteNestedSequence, + _SupportsArray, + _ArrayLikeBool_co, + _ArrayLikeUInt_co, + _ArrayLikeInt_co, + _ArrayLikeFloat_co, + _ArrayLikeComplex_co, + _ArrayLikeObject_co, +) + +from numpy.core.shape_base import vstack + +_SCT = TypeVar("_SCT", bound=generic) + +_ArrayLike = _FiniteNestedSequence[_SupportsArray[dtype[_SCT]]] + +# The signatures of `__array_wrap__` and `__array_prepare__` are the same; +# give them unique names for the sake of clarity +class _ArrayWrap(Protocol): + def __call__( + self, + array: NDArray[Any], + context: None | Tuple[ufunc, Tuple[Any, ...], int] = ..., + /, + ) -> Any: ... + +class _ArrayPrepare(Protocol): + def __call__( + self, + array: NDArray[Any], + context: None | Tuple[ufunc, Tuple[Any, ...], int] = ..., + /, + ) -> Any: ... + +class _SupportsArrayWrap(Protocol): + @property + def __array_wrap__(self) -> _ArrayWrap: ... + +class _SupportsArrayPrepare(Protocol): + @property + def __array_prepare__(self) -> _ArrayPrepare: ... + +__all__: List[str] + +row_stack = vstack + +def take_along_axis( + arr: _SCT | NDArray[_SCT], + indices: NDArray[integer[Any]], + axis: None | int, +) -> NDArray[_SCT]: ... + +def put_along_axis( + arr: NDArray[_SCT], + indices: NDArray[integer[Any]], + values: ArrayLike, + axis: None | int, +) -> None: ... + +@overload +def apply_along_axis( + func1d: Callable[..., _ArrayLike[_SCT]], + axis: SupportsIndex, + arr: ArrayLike, + *args: Any, + **kwargs: Any, +) -> NDArray[_SCT]: ... +@overload +def apply_along_axis( + func1d: Callable[..., ArrayLike], + axis: SupportsIndex, + arr: ArrayLike, + *args: Any, + **kwargs: Any, +) -> NDArray[Any]: ... + +def apply_over_axes( + func: Callable[[NDArray[Any], int], NDArray[_SCT]], + a: ArrayLike, + axes: int | Sequence[int], +) -> NDArray[_SCT]: ... + +@overload +def expand_dims( + a: _ArrayLike[_SCT], + axis: _ShapeLike, +) -> NDArray[_SCT]: ... +@overload +def expand_dims( + a: ArrayLike, + axis: _ShapeLike, +) -> NDArray[Any]: ... + +@overload +def column_stack(tup: Sequence[_ArrayLike[_SCT]]) -> NDArray[_SCT]: ... +@overload +def column_stack(tup: Sequence[ArrayLike]) -> NDArray[Any]: ... + +@overload +def dstack(tup: Sequence[_ArrayLike[_SCT]]) -> NDArray[_SCT]: ... +@overload +def dstack(tup: Sequence[ArrayLike]) -> NDArray[Any]: ... + +@overload +def array_split( + ary: _ArrayLike[_SCT], + indices_or_sections: _ShapeLike, + axis: SupportsIndex = ..., +) -> List[NDArray[_SCT]]: ... +@overload +def array_split( + ary: ArrayLike, + indices_or_sections: _ShapeLike, + axis: SupportsIndex = ..., +) -> List[NDArray[Any]]: ... + +@overload +def split( + ary: _ArrayLike[_SCT], + indices_or_sections: _ShapeLike, + axis: SupportsIndex = ..., +) -> List[NDArray[_SCT]]: ... +@overload +def split( + ary: ArrayLike, + indices_or_sections: _ShapeLike, + axis: SupportsIndex = ..., +) -> List[NDArray[Any]]: ... + +@overload +def hsplit( + ary: _ArrayLike[_SCT], + indices_or_sections: _ShapeLike, +) -> List[NDArray[_SCT]]: ... +@overload +def hsplit( + ary: ArrayLike, + indices_or_sections: _ShapeLike, +) -> List[NDArray[Any]]: ... + +@overload +def vsplit( + ary: _ArrayLike[_SCT], + indices_or_sections: _ShapeLike, +) -> List[NDArray[_SCT]]: ... +@overload +def vsplit( + ary: ArrayLike, + indices_or_sections: _ShapeLike, +) -> List[NDArray[Any]]: ... + +@overload +def dsplit( + ary: _ArrayLike[_SCT], + indices_or_sections: _ShapeLike, +) -> List[NDArray[_SCT]]: ... +@overload +def dsplit( + ary: ArrayLike, + indices_or_sections: _ShapeLike, +) -> List[NDArray[Any]]: ... + +@overload +def get_array_prepare(*args: _SupportsArrayPrepare) -> _ArrayPrepare: ... +@overload +def get_array_prepare(*args: object) -> None | _ArrayPrepare: ... + +@overload +def get_array_wrap(*args: _SupportsArrayWrap) -> _ArrayWrap: ... +@overload +def get_array_wrap(*args: object) -> None | _ArrayWrap: ... + +@overload +def kron(a: _ArrayLikeBool_co, b: _ArrayLikeBool_co) -> NDArray[bool_]: ... # type: ignore[misc] +@overload +def kron(a: _ArrayLikeUInt_co, b: _ArrayLikeUInt_co) -> NDArray[unsignedinteger[Any]]: ... # type: ignore[misc] +@overload +def kron(a: _ArrayLikeInt_co, b: _ArrayLikeInt_co) -> NDArray[signedinteger[Any]]: ... # type: ignore[misc] +@overload +def kron(a: _ArrayLikeFloat_co, b: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... # type: ignore[misc] +@overload +def kron(a: _ArrayLikeComplex_co, b: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def kron(a: _ArrayLikeObject_co, b: Any) -> NDArray[object_]: ... +@overload +def kron(a: Any, b: _ArrayLikeObject_co) -> NDArray[object_]: ... + +@overload +def tile( + A: _ArrayLike[_SCT], + reps: int | Sequence[int], +) -> NDArray[_SCT]: ... +@overload +def tile( + A: ArrayLike, + reps: int | Sequence[int], +) -> NDArray[Any]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/stride_tricks.py b/.local/lib/python3.8/site-packages/numpy/lib/stride_tricks.py new file mode 100644 index 0000000..5093993 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/stride_tricks.py @@ -0,0 +1,546 @@ +""" +Utilities that manipulate strides to achieve desirable effects. + +An explanation of strides can be found in the "ndarray.rst" file in the +NumPy reference guide. + +""" +import numpy as np +from numpy.core.numeric import normalize_axis_tuple +from numpy.core.overrides import array_function_dispatch, set_module + +__all__ = ['broadcast_to', 'broadcast_arrays', 'broadcast_shapes'] + + +class DummyArray: + """Dummy object that just exists to hang __array_interface__ dictionaries + and possibly keep alive a reference to a base array. + """ + + def __init__(self, interface, base=None): + self.__array_interface__ = interface + self.base = base + + +def _maybe_view_as_subclass(original_array, new_array): + if type(original_array) is not type(new_array): + # if input was an ndarray subclass and subclasses were OK, + # then view the result as that subclass. + new_array = new_array.view(type=type(original_array)) + # Since we have done something akin to a view from original_array, we + # should let the subclass finalize (if it has it implemented, i.e., is + # not None). + if new_array.__array_finalize__: + new_array.__array_finalize__(original_array) + return new_array + + +def as_strided(x, shape=None, strides=None, subok=False, writeable=True): + """ + Create a view into the array with the given shape and strides. + + .. warning:: This function has to be used with extreme care, see notes. + + Parameters + ---------- + x : ndarray + Array to create a new. + shape : sequence of int, optional + The shape of the new array. Defaults to ``x.shape``. + strides : sequence of int, optional + The strides of the new array. Defaults to ``x.strides``. + subok : bool, optional + .. versionadded:: 1.10 + + If True, subclasses are preserved. + writeable : bool, optional + .. versionadded:: 1.12 + + If set to False, the returned array will always be readonly. + Otherwise it will be writable if the original array was. It + is advisable to set this to False if possible (see Notes). + + Returns + ------- + view : ndarray + + See also + -------- + broadcast_to : broadcast an array to a given shape. + reshape : reshape an array. + lib.stride_tricks.sliding_window_view : + userfriendly and safe function for the creation of sliding window views. + + Notes + ----- + ``as_strided`` creates a view into the array given the exact strides + and shape. This means it manipulates the internal data structure of + ndarray and, if done incorrectly, the array elements can point to + invalid memory and can corrupt results or crash your program. + It is advisable to always use the original ``x.strides`` when + calculating new strides to avoid reliance on a contiguous memory + layout. + + Furthermore, arrays created with this function often contain self + overlapping memory, so that two elements are identical. + Vectorized write operations on such arrays will typically be + unpredictable. They may even give different results for small, large, + or transposed arrays. + Since writing to these arrays has to be tested and done with great + care, you may want to use ``writeable=False`` to avoid accidental write + operations. + + For these reasons it is advisable to avoid ``as_strided`` when + possible. + """ + # first convert input to array, possibly keeping subclass + x = np.array(x, copy=False, subok=subok) + interface = dict(x.__array_interface__) + if shape is not None: + interface['shape'] = tuple(shape) + if strides is not None: + interface['strides'] = tuple(strides) + + array = np.asarray(DummyArray(interface, base=x)) + # The route via `__interface__` does not preserve structured + # dtypes. Since dtype should remain unchanged, we set it explicitly. + array.dtype = x.dtype + + view = _maybe_view_as_subclass(x, array) + + if view.flags.writeable and not writeable: + view.flags.writeable = False + + return view + + +def _sliding_window_view_dispatcher(x, window_shape, axis=None, *, + subok=None, writeable=None): + return (x,) + + +@array_function_dispatch(_sliding_window_view_dispatcher) +def sliding_window_view(x, window_shape, axis=None, *, + subok=False, writeable=False): + """ + Create a sliding window view into the array with the given window shape. + + Also known as rolling or moving window, the window slides across all + dimensions of the array and extracts subsets of the array at all window + positions. + + .. versionadded:: 1.20.0 + + Parameters + ---------- + x : array_like + Array to create the sliding window view from. + window_shape : int or tuple of int + Size of window over each axis that takes part in the sliding window. + If `axis` is not present, must have same length as the number of input + array dimensions. Single integers `i` are treated as if they were the + tuple `(i,)`. + axis : int or tuple of int, optional + Axis or axes along which the sliding window is applied. + By default, the sliding window is applied to all axes and + `window_shape[i]` will refer to axis `i` of `x`. + If `axis` is given as a `tuple of int`, `window_shape[i]` will refer to + the axis `axis[i]` of `x`. + Single integers `i` are treated as if they were the tuple `(i,)`. + subok : bool, optional + If True, sub-classes will be passed-through, otherwise the returned + array will be forced to be a base-class array (default). + writeable : bool, optional + When true, allow writing to the returned view. The default is false, + as this should be used with caution: the returned view contains the + same memory location multiple times, so writing to one location will + cause others to change. + + Returns + ------- + view : ndarray + Sliding window view of the array. The sliding window dimensions are + inserted at the end, and the original dimensions are trimmed as + required by the size of the sliding window. + That is, ``view.shape = x_shape_trimmed + window_shape``, where + ``x_shape_trimmed`` is ``x.shape`` with every entry reduced by one less + than the corresponding window size. + + See Also + -------- + lib.stride_tricks.as_strided: A lower-level and less safe routine for + creating arbitrary views from custom shape and strides. + broadcast_to: broadcast an array to a given shape. + + Notes + ----- + For many applications using a sliding window view can be convenient, but + potentially very slow. Often specialized solutions exist, for example: + + - `scipy.signal.fftconvolve` + + - filtering functions in `scipy.ndimage` + + - moving window functions provided by + `bottleneck `_. + + As a rough estimate, a sliding window approach with an input size of `N` + and a window size of `W` will scale as `O(N*W)` where frequently a special + algorithm can achieve `O(N)`. That means that the sliding window variant + for a window size of 100 can be a 100 times slower than a more specialized + version. + + Nevertheless, for small window sizes, when no custom algorithm exists, or + as a prototyping and developing tool, this function can be a good solution. + + Examples + -------- + >>> x = np.arange(6) + >>> x.shape + (6,) + >>> v = sliding_window_view(x, 3) + >>> v.shape + (4, 3) + >>> v + array([[0, 1, 2], + [1, 2, 3], + [2, 3, 4], + [3, 4, 5]]) + + This also works in more dimensions, e.g. + + >>> i, j = np.ogrid[:3, :4] + >>> x = 10*i + j + >>> x.shape + (3, 4) + >>> x + array([[ 0, 1, 2, 3], + [10, 11, 12, 13], + [20, 21, 22, 23]]) + >>> shape = (2,2) + >>> v = sliding_window_view(x, shape) + >>> v.shape + (2, 3, 2, 2) + >>> v + array([[[[ 0, 1], + [10, 11]], + [[ 1, 2], + [11, 12]], + [[ 2, 3], + [12, 13]]], + [[[10, 11], + [20, 21]], + [[11, 12], + [21, 22]], + [[12, 13], + [22, 23]]]]) + + The axis can be specified explicitly: + + >>> v = sliding_window_view(x, 3, 0) + >>> v.shape + (1, 4, 3) + >>> v + array([[[ 0, 10, 20], + [ 1, 11, 21], + [ 2, 12, 22], + [ 3, 13, 23]]]) + + The same axis can be used several times. In that case, every use reduces + the corresponding original dimension: + + >>> v = sliding_window_view(x, (2, 3), (1, 1)) + >>> v.shape + (3, 1, 2, 3) + >>> v + array([[[[ 0, 1, 2], + [ 1, 2, 3]]], + [[[10, 11, 12], + [11, 12, 13]]], + [[[20, 21, 22], + [21, 22, 23]]]]) + + Combining with stepped slicing (`::step`), this can be used to take sliding + views which skip elements: + + >>> x = np.arange(7) + >>> sliding_window_view(x, 5)[:, ::2] + array([[0, 2, 4], + [1, 3, 5], + [2, 4, 6]]) + + or views which move by multiple elements + + >>> x = np.arange(7) + >>> sliding_window_view(x, 3)[::2, :] + array([[0, 1, 2], + [2, 3, 4], + [4, 5, 6]]) + + A common application of `sliding_window_view` is the calculation of running + statistics. The simplest example is the + `moving average `_: + + >>> x = np.arange(6) + >>> x.shape + (6,) + >>> v = sliding_window_view(x, 3) + >>> v.shape + (4, 3) + >>> v + array([[0, 1, 2], + [1, 2, 3], + [2, 3, 4], + [3, 4, 5]]) + >>> moving_average = v.mean(axis=-1) + >>> moving_average + array([1., 2., 3., 4.]) + + Note that a sliding window approach is often **not** optimal (see Notes). + """ + window_shape = (tuple(window_shape) + if np.iterable(window_shape) + else (window_shape,)) + # first convert input to array, possibly keeping subclass + x = np.array(x, copy=False, subok=subok) + + window_shape_array = np.array(window_shape) + if np.any(window_shape_array < 0): + raise ValueError('`window_shape` cannot contain negative values') + + if axis is None: + axis = tuple(range(x.ndim)) + if len(window_shape) != len(axis): + raise ValueError(f'Since axis is `None`, must provide ' + f'window_shape for all dimensions of `x`; ' + f'got {len(window_shape)} window_shape elements ' + f'and `x.ndim` is {x.ndim}.') + else: + axis = normalize_axis_tuple(axis, x.ndim, allow_duplicate=True) + if len(window_shape) != len(axis): + raise ValueError(f'Must provide matching length window_shape and ' + f'axis; got {len(window_shape)} window_shape ' + f'elements and {len(axis)} axes elements.') + + out_strides = x.strides + tuple(x.strides[ax] for ax in axis) + + # note: same axis can be windowed repeatedly + x_shape_trimmed = list(x.shape) + for ax, dim in zip(axis, window_shape): + if x_shape_trimmed[ax] < dim: + raise ValueError( + 'window shape cannot be larger than input array shape') + x_shape_trimmed[ax] -= dim - 1 + out_shape = tuple(x_shape_trimmed) + window_shape + return as_strided(x, strides=out_strides, shape=out_shape, + subok=subok, writeable=writeable) + + +def _broadcast_to(array, shape, subok, readonly): + shape = tuple(shape) if np.iterable(shape) else (shape,) + array = np.array(array, copy=False, subok=subok) + if not shape and array.shape: + raise ValueError('cannot broadcast a non-scalar to a scalar array') + if any(size < 0 for size in shape): + raise ValueError('all elements of broadcast shape must be non-' + 'negative') + extras = [] + it = np.nditer( + (array,), flags=['multi_index', 'refs_ok', 'zerosize_ok'] + extras, + op_flags=['readonly'], itershape=shape, order='C') + with it: + # never really has writebackifcopy semantics + broadcast = it.itviews[0] + result = _maybe_view_as_subclass(array, broadcast) + # In a future version this will go away + if not readonly and array.flags._writeable_no_warn: + result.flags.writeable = True + result.flags._warn_on_write = True + return result + + +def _broadcast_to_dispatcher(array, shape, subok=None): + return (array,) + + +@array_function_dispatch(_broadcast_to_dispatcher, module='numpy') +def broadcast_to(array, shape, subok=False): + """Broadcast an array to a new shape. + + Parameters + ---------- + array : array_like + The array to broadcast. + shape : tuple or int + The shape of the desired array. A single integer ``i`` is interpreted + as ``(i,)``. + subok : bool, optional + If True, then sub-classes will be passed-through, otherwise + the returned array will be forced to be a base-class array (default). + + Returns + ------- + broadcast : array + A readonly view on the original array with the given shape. It is + typically not contiguous. Furthermore, more than one element of a + broadcasted array may refer to a single memory location. + + Raises + ------ + ValueError + If the array is not compatible with the new shape according to NumPy's + broadcasting rules. + + See Also + -------- + broadcast + broadcast_arrays + broadcast_shapes + + Notes + ----- + .. versionadded:: 1.10.0 + + Examples + -------- + >>> x = np.array([1, 2, 3]) + >>> np.broadcast_to(x, (3, 3)) + array([[1, 2, 3], + [1, 2, 3], + [1, 2, 3]]) + """ + return _broadcast_to(array, shape, subok=subok, readonly=True) + + +def _broadcast_shape(*args): + """Returns the shape of the arrays that would result from broadcasting the + supplied arrays against each other. + """ + # use the old-iterator because np.nditer does not handle size 0 arrays + # consistently + b = np.broadcast(*args[:32]) + # unfortunately, it cannot handle 32 or more arguments directly + for pos in range(32, len(args), 31): + # ironically, np.broadcast does not properly handle np.broadcast + # objects (it treats them as scalars) + # use broadcasting to avoid allocating the full array + b = broadcast_to(0, b.shape) + b = np.broadcast(b, *args[pos:(pos + 31)]) + return b.shape + + +@set_module('numpy') +def broadcast_shapes(*args): + """ + Broadcast the input shapes into a single shape. + + :ref:`Learn more about broadcasting here `. + + .. versionadded:: 1.20.0 + + Parameters + ---------- + `*args` : tuples of ints, or ints + The shapes to be broadcast against each other. + + Returns + ------- + tuple + Broadcasted shape. + + Raises + ------ + ValueError + If the shapes are not compatible and cannot be broadcast according + to NumPy's broadcasting rules. + + See Also + -------- + broadcast + broadcast_arrays + broadcast_to + + Examples + -------- + >>> np.broadcast_shapes((1, 2), (3, 1), (3, 2)) + (3, 2) + + >>> np.broadcast_shapes((6, 7), (5, 6, 1), (7,), (5, 1, 7)) + (5, 6, 7) + """ + arrays = [np.empty(x, dtype=[]) for x in args] + return _broadcast_shape(*arrays) + + +def _broadcast_arrays_dispatcher(*args, subok=None): + return args + + +@array_function_dispatch(_broadcast_arrays_dispatcher, module='numpy') +def broadcast_arrays(*args, subok=False): + """ + Broadcast any number of arrays against each other. + + Parameters + ---------- + `*args` : array_likes + The arrays to broadcast. + + subok : bool, optional + If True, then sub-classes will be passed-through, otherwise + the returned arrays will be forced to be a base-class array (default). + + Returns + ------- + broadcasted : list of arrays + These arrays are views on the original arrays. They are typically + not contiguous. Furthermore, more than one element of a + broadcasted array may refer to a single memory location. If you need + to write to the arrays, make copies first. While you can set the + ``writable`` flag True, writing to a single output value may end up + changing more than one location in the output array. + + .. deprecated:: 1.17 + The output is currently marked so that if written to, a deprecation + warning will be emitted. A future version will set the + ``writable`` flag False so writing to it will raise an error. + + See Also + -------- + broadcast + broadcast_to + broadcast_shapes + + Examples + -------- + >>> x = np.array([[1,2,3]]) + >>> y = np.array([[4],[5]]) + >>> np.broadcast_arrays(x, y) + [array([[1, 2, 3], + [1, 2, 3]]), array([[4, 4, 4], + [5, 5, 5]])] + + Here is a useful idiom for getting contiguous copies instead of + non-contiguous views. + + >>> [np.array(a) for a in np.broadcast_arrays(x, y)] + [array([[1, 2, 3], + [1, 2, 3]]), array([[4, 4, 4], + [5, 5, 5]])] + + """ + # nditer is not used here to avoid the limit of 32 arrays. + # Otherwise, something like the following one-liner would suffice: + # return np.nditer(args, flags=['multi_index', 'zerosize_ok'], + # order='C').itviews + + args = [np.array(_m, copy=False, subok=subok) for _m in args] + + shape = _broadcast_shape(*args) + + if all(array.shape == shape for array in args): + # Common case where nothing needs to be broadcasted. + return args + + return [_broadcast_to(array, shape, subok=subok, readonly=False) + for array in args] diff --git a/.local/lib/python3.8/site-packages/numpy/lib/stride_tricks.pyi b/.local/lib/python3.8/site-packages/numpy/lib/stride_tricks.pyi new file mode 100644 index 0000000..aad4041 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/stride_tricks.pyi @@ -0,0 +1,81 @@ +from typing import Any, List, Dict, Iterable, TypeVar, overload, SupportsIndex + +from numpy import dtype, generic +from numpy.typing import ( + NDArray, + ArrayLike, + _ShapeLike, + _Shape, + _FiniteNestedSequence, + _SupportsArray, +) + +_SCT = TypeVar("_SCT", bound=generic) +_ArrayLike = _FiniteNestedSequence[_SupportsArray[dtype[_SCT]]] + +__all__: List[str] + +class DummyArray: + __array_interface__: Dict[str, Any] + base: None | NDArray[Any] + def __init__( + self, + interface: Dict[str, Any], + base: None | NDArray[Any] = ..., + ) -> None: ... + +@overload +def as_strided( + x: _ArrayLike[_SCT], + shape: None | Iterable[int] = ..., + strides: None | Iterable[int] = ..., + subok: bool = ..., + writeable: bool = ..., +) -> NDArray[_SCT]: ... +@overload +def as_strided( + x: ArrayLike, + shape: None | Iterable[int] = ..., + strides: None | Iterable[int] = ..., + subok: bool = ..., + writeable: bool = ..., +) -> NDArray[Any]: ... + +@overload +def sliding_window_view( + x: _ArrayLike[_SCT], + window_shape: int | Iterable[int], + axis: None | SupportsIndex = ..., + *, + subok: bool = ..., + writeable: bool = ..., +) -> NDArray[_SCT]: ... +@overload +def sliding_window_view( + x: ArrayLike, + window_shape: int | Iterable[int], + axis: None | SupportsIndex = ..., + *, + subok: bool = ..., + writeable: bool = ..., +) -> NDArray[Any]: ... + +@overload +def broadcast_to( + array: _ArrayLike[_SCT], + shape: int | Iterable[int], + subok: bool = ..., +) -> NDArray[_SCT]: ... +@overload +def broadcast_to( + array: ArrayLike, + shape: int | Iterable[int], + subok: bool = ..., +) -> NDArray[Any]: ... + +def broadcast_shapes(*args: _ShapeLike) -> _Shape: ... + +def broadcast_arrays( + *args: ArrayLike, + subok: bool = ..., +) -> List[NDArray[Any]]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..e4cfd2d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test__datasource.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test__datasource.cpython-38.pyc new file mode 100644 index 0000000..5866f8a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test__datasource.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test__iotools.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test__iotools.cpython-38.pyc new file mode 100644 index 0000000..9236fe6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test__iotools.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test__version.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test__version.cpython-38.pyc new file mode 100644 index 0000000..a5d2afa Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test__version.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_arraypad.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_arraypad.cpython-38.pyc new file mode 100644 index 0000000..d6d3e94 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_arraypad.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_arraysetops.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_arraysetops.cpython-38.pyc new file mode 100644 index 0000000..154eec7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_arraysetops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_arrayterator.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_arrayterator.cpython-38.pyc new file mode 100644 index 0000000..3a68165 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_arrayterator.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_financial_expired.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_financial_expired.cpython-38.pyc new file mode 100644 index 0000000..b8ae926 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_financial_expired.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_format.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_format.cpython-38.pyc new file mode 100644 index 0000000..2363235 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_format.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_function_base.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_function_base.cpython-38.pyc new file mode 100644 index 0000000..e496a79 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_function_base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_histograms.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_histograms.cpython-38.pyc new file mode 100644 index 0000000..ab9367b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_histograms.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_index_tricks.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_index_tricks.cpython-38.pyc new file mode 100644 index 0000000..d9b0373 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_index_tricks.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_io.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_io.cpython-38.pyc new file mode 100644 index 0000000..49ffc29 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_io.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_mixins.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_mixins.cpython-38.pyc new file mode 100644 index 0000000..3cf2065 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_mixins.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_nanfunctions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_nanfunctions.cpython-38.pyc new file mode 100644 index 0000000..18a03a3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_nanfunctions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_packbits.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_packbits.cpython-38.pyc new file mode 100644 index 0000000..273e3f4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_packbits.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_polynomial.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_polynomial.cpython-38.pyc new file mode 100644 index 0000000..96f1129 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_polynomial.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_recfunctions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_recfunctions.cpython-38.pyc new file mode 100644 index 0000000..be72146 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_recfunctions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_regression.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_regression.cpython-38.pyc new file mode 100644 index 0000000..f7a13c0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_regression.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_shape_base.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_shape_base.cpython-38.pyc new file mode 100644 index 0000000..abf4e7c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_shape_base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_stride_tricks.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_stride_tricks.cpython-38.pyc new file mode 100644 index 0000000..72b873b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_stride_tricks.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_twodim_base.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_twodim_base.cpython-38.pyc new file mode 100644 index 0000000..9f3de83 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_twodim_base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_type_check.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_type_check.cpython-38.pyc new file mode 100644 index 0000000..fa24e1c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_type_check.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_ufunclike.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_ufunclike.cpython-38.pyc new file mode 100644 index 0000000..3c9b0c7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_ufunclike.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_utils.cpython-38.pyc new file mode 100644 index 0000000..3e376ee Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/__pycache__/test_utils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/data/py2-objarr.npy b/.local/lib/python3.8/site-packages/numpy/lib/tests/data/py2-objarr.npy new file mode 100644 index 0000000..12936c9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/data/py2-objarr.npy differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/data/py2-objarr.npz b/.local/lib/python3.8/site-packages/numpy/lib/tests/data/py2-objarr.npz new file mode 100644 index 0000000..68a3b53 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/data/py2-objarr.npz differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/data/py3-objarr.npy b/.local/lib/python3.8/site-packages/numpy/lib/tests/data/py3-objarr.npy new file mode 100644 index 0000000..6776074 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/data/py3-objarr.npy differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/data/py3-objarr.npz b/.local/lib/python3.8/site-packages/numpy/lib/tests/data/py3-objarr.npz new file mode 100644 index 0000000..05eac0b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/data/py3-objarr.npz differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/data/python3.npy b/.local/lib/python3.8/site-packages/numpy/lib/tests/data/python3.npy new file mode 100644 index 0000000..7c6997d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/data/python3.npy differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/data/win64python2.npy b/.local/lib/python3.8/site-packages/numpy/lib/tests/data/win64python2.npy new file mode 100644 index 0000000..d9bc36a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/lib/tests/data/win64python2.npy differ diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test__datasource.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test__datasource.py new file mode 100644 index 0000000..2738d41 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test__datasource.py @@ -0,0 +1,350 @@ +import os +import pytest +from tempfile import mkdtemp, mkstemp, NamedTemporaryFile +from shutil import rmtree + +import numpy.lib._datasource as datasource +from numpy.testing import assert_, assert_equal, assert_raises + +import urllib.request as urllib_request +from urllib.parse import urlparse +from urllib.error import URLError + + +def urlopen_stub(url, data=None): + '''Stub to replace urlopen for testing.''' + if url == valid_httpurl(): + tmpfile = NamedTemporaryFile(prefix='urltmp_') + return tmpfile + else: + raise URLError('Name or service not known') + +# setup and teardown +old_urlopen = None + + +def setup_module(): + global old_urlopen + + old_urlopen = urllib_request.urlopen + urllib_request.urlopen = urlopen_stub + + +def teardown_module(): + urllib_request.urlopen = old_urlopen + +# A valid website for more robust testing +http_path = 'http://www.google.com/' +http_file = 'index.html' + +http_fakepath = 'http://fake.abc.web/site/' +http_fakefile = 'fake.txt' + +malicious_files = ['/etc/shadow', '../../shadow', + '..\\system.dat', 'c:\\windows\\system.dat'] + +magic_line = b'three is the magic number' + + +# Utility functions used by many tests +def valid_textfile(filedir): + # Generate and return a valid temporary file. + fd, path = mkstemp(suffix='.txt', prefix='dstmp_', dir=filedir, text=True) + os.close(fd) + return path + + +def invalid_textfile(filedir): + # Generate and return an invalid filename. + fd, path = mkstemp(suffix='.txt', prefix='dstmp_', dir=filedir) + os.close(fd) + os.remove(path) + return path + + +def valid_httpurl(): + return http_path+http_file + + +def invalid_httpurl(): + return http_fakepath+http_fakefile + + +def valid_baseurl(): + return http_path + + +def invalid_baseurl(): + return http_fakepath + + +def valid_httpfile(): + return http_file + + +def invalid_httpfile(): + return http_fakefile + + +class TestDataSourceOpen: + def setup(self): + self.tmpdir = mkdtemp() + self.ds = datasource.DataSource(self.tmpdir) + + def teardown(self): + rmtree(self.tmpdir) + del self.ds + + def test_ValidHTTP(self): + fh = self.ds.open(valid_httpurl()) + assert_(fh) + fh.close() + + def test_InvalidHTTP(self): + url = invalid_httpurl() + assert_raises(OSError, self.ds.open, url) + try: + self.ds.open(url) + except OSError as e: + # Regression test for bug fixed in r4342. + assert_(e.errno is None) + + def test_InvalidHTTPCacheURLError(self): + assert_raises(URLError, self.ds._cache, invalid_httpurl()) + + def test_ValidFile(self): + local_file = valid_textfile(self.tmpdir) + fh = self.ds.open(local_file) + assert_(fh) + fh.close() + + def test_InvalidFile(self): + invalid_file = invalid_textfile(self.tmpdir) + assert_raises(OSError, self.ds.open, invalid_file) + + def test_ValidGzipFile(self): + try: + import gzip + except ImportError: + # We don't have the gzip capabilities to test. + pytest.skip() + # Test datasource's internal file_opener for Gzip files. + filepath = os.path.join(self.tmpdir, 'foobar.txt.gz') + fp = gzip.open(filepath, 'w') + fp.write(magic_line) + fp.close() + fp = self.ds.open(filepath) + result = fp.readline() + fp.close() + assert_equal(magic_line, result) + + def test_ValidBz2File(self): + try: + import bz2 + except ImportError: + # We don't have the bz2 capabilities to test. + pytest.skip() + # Test datasource's internal file_opener for BZip2 files. + filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2') + fp = bz2.BZ2File(filepath, 'w') + fp.write(magic_line) + fp.close() + fp = self.ds.open(filepath) + result = fp.readline() + fp.close() + assert_equal(magic_line, result) + + +class TestDataSourceExists: + def setup(self): + self.tmpdir = mkdtemp() + self.ds = datasource.DataSource(self.tmpdir) + + def teardown(self): + rmtree(self.tmpdir) + del self.ds + + def test_ValidHTTP(self): + assert_(self.ds.exists(valid_httpurl())) + + def test_InvalidHTTP(self): + assert_equal(self.ds.exists(invalid_httpurl()), False) + + def test_ValidFile(self): + # Test valid file in destpath + tmpfile = valid_textfile(self.tmpdir) + assert_(self.ds.exists(tmpfile)) + # Test valid local file not in destpath + localdir = mkdtemp() + tmpfile = valid_textfile(localdir) + assert_(self.ds.exists(tmpfile)) + rmtree(localdir) + + def test_InvalidFile(self): + tmpfile = invalid_textfile(self.tmpdir) + assert_equal(self.ds.exists(tmpfile), False) + + +class TestDataSourceAbspath: + def setup(self): + self.tmpdir = os.path.abspath(mkdtemp()) + self.ds = datasource.DataSource(self.tmpdir) + + def teardown(self): + rmtree(self.tmpdir) + del self.ds + + def test_ValidHTTP(self): + scheme, netloc, upath, pms, qry, frg = urlparse(valid_httpurl()) + local_path = os.path.join(self.tmpdir, netloc, + upath.strip(os.sep).strip('/')) + assert_equal(local_path, self.ds.abspath(valid_httpurl())) + + def test_ValidFile(self): + tmpfile = valid_textfile(self.tmpdir) + tmpfilename = os.path.split(tmpfile)[-1] + # Test with filename only + assert_equal(tmpfile, self.ds.abspath(tmpfilename)) + # Test filename with complete path + assert_equal(tmpfile, self.ds.abspath(tmpfile)) + + def test_InvalidHTTP(self): + scheme, netloc, upath, pms, qry, frg = urlparse(invalid_httpurl()) + invalidhttp = os.path.join(self.tmpdir, netloc, + upath.strip(os.sep).strip('/')) + assert_(invalidhttp != self.ds.abspath(valid_httpurl())) + + def test_InvalidFile(self): + invalidfile = valid_textfile(self.tmpdir) + tmpfile = valid_textfile(self.tmpdir) + tmpfilename = os.path.split(tmpfile)[-1] + # Test with filename only + assert_(invalidfile != self.ds.abspath(tmpfilename)) + # Test filename with complete path + assert_(invalidfile != self.ds.abspath(tmpfile)) + + def test_sandboxing(self): + tmpfile = valid_textfile(self.tmpdir) + tmpfilename = os.path.split(tmpfile)[-1] + + tmp_path = lambda x: os.path.abspath(self.ds.abspath(x)) + + assert_(tmp_path(valid_httpurl()).startswith(self.tmpdir)) + assert_(tmp_path(invalid_httpurl()).startswith(self.tmpdir)) + assert_(tmp_path(tmpfile).startswith(self.tmpdir)) + assert_(tmp_path(tmpfilename).startswith(self.tmpdir)) + for fn in malicious_files: + assert_(tmp_path(http_path+fn).startswith(self.tmpdir)) + assert_(tmp_path(fn).startswith(self.tmpdir)) + + def test_windows_os_sep(self): + orig_os_sep = os.sep + try: + os.sep = '\\' + self.test_ValidHTTP() + self.test_ValidFile() + self.test_InvalidHTTP() + self.test_InvalidFile() + self.test_sandboxing() + finally: + os.sep = orig_os_sep + + +class TestRepositoryAbspath: + def setup(self): + self.tmpdir = os.path.abspath(mkdtemp()) + self.repos = datasource.Repository(valid_baseurl(), self.tmpdir) + + def teardown(self): + rmtree(self.tmpdir) + del self.repos + + def test_ValidHTTP(self): + scheme, netloc, upath, pms, qry, frg = urlparse(valid_httpurl()) + local_path = os.path.join(self.repos._destpath, netloc, + upath.strip(os.sep).strip('/')) + filepath = self.repos.abspath(valid_httpfile()) + assert_equal(local_path, filepath) + + def test_sandboxing(self): + tmp_path = lambda x: os.path.abspath(self.repos.abspath(x)) + assert_(tmp_path(valid_httpfile()).startswith(self.tmpdir)) + for fn in malicious_files: + assert_(tmp_path(http_path+fn).startswith(self.tmpdir)) + assert_(tmp_path(fn).startswith(self.tmpdir)) + + def test_windows_os_sep(self): + orig_os_sep = os.sep + try: + os.sep = '\\' + self.test_ValidHTTP() + self.test_sandboxing() + finally: + os.sep = orig_os_sep + + +class TestRepositoryExists: + def setup(self): + self.tmpdir = mkdtemp() + self.repos = datasource.Repository(valid_baseurl(), self.tmpdir) + + def teardown(self): + rmtree(self.tmpdir) + del self.repos + + def test_ValidFile(self): + # Create local temp file + tmpfile = valid_textfile(self.tmpdir) + assert_(self.repos.exists(tmpfile)) + + def test_InvalidFile(self): + tmpfile = invalid_textfile(self.tmpdir) + assert_equal(self.repos.exists(tmpfile), False) + + def test_RemoveHTTPFile(self): + assert_(self.repos.exists(valid_httpurl())) + + def test_CachedHTTPFile(self): + localfile = valid_httpurl() + # Create a locally cached temp file with an URL based + # directory structure. This is similar to what Repository.open + # would do. + scheme, netloc, upath, pms, qry, frg = urlparse(localfile) + local_path = os.path.join(self.repos._destpath, netloc) + os.mkdir(local_path, 0o0700) + tmpfile = valid_textfile(local_path) + assert_(self.repos.exists(tmpfile)) + + +class TestOpenFunc: + def setup(self): + self.tmpdir = mkdtemp() + + def teardown(self): + rmtree(self.tmpdir) + + def test_DataSourceOpen(self): + local_file = valid_textfile(self.tmpdir) + # Test case where destpath is passed in + fp = datasource.open(local_file, destpath=self.tmpdir) + assert_(fp) + fp.close() + # Test case where default destpath is used + fp = datasource.open(local_file) + assert_(fp) + fp.close() + +def test_del_attr_handling(): + # DataSource __del__ can be called + # even if __init__ fails when the + # Exception object is caught by the + # caller as happens in refguide_check + # is_deprecated() function + + ds = datasource.DataSource() + # simulate failed __init__ by removing key attribute + # produced within __init__ and expected by __del__ + del ds._istmpdest + # should not raise an AttributeError if __del__ + # gracefully handles failed __init__: + ds.__del__() diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test__iotools.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test__iotools.py new file mode 100644 index 0000000..a5b7870 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test__iotools.py @@ -0,0 +1,353 @@ +import time +from datetime import date + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_allclose, assert_raises, + ) +from numpy.lib._iotools import ( + LineSplitter, NameValidator, StringConverter, + has_nested_fields, easy_dtype, flatten_dtype + ) + + +class TestLineSplitter: + "Tests the LineSplitter class." + + def test_no_delimiter(self): + "Test LineSplitter w/o delimiter" + strg = " 1 2 3 4 5 # test" + test = LineSplitter()(strg) + assert_equal(test, ['1', '2', '3', '4', '5']) + test = LineSplitter('')(strg) + assert_equal(test, ['1', '2', '3', '4', '5']) + + def test_space_delimiter(self): + "Test space delimiter" + strg = " 1 2 3 4 5 # test" + test = LineSplitter(' ')(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5']) + test = LineSplitter(' ')(strg) + assert_equal(test, ['1 2 3 4', '5']) + + def test_tab_delimiter(self): + "Test tab delimiter" + strg = " 1\t 2\t 3\t 4\t 5 6" + test = LineSplitter('\t')(strg) + assert_equal(test, ['1', '2', '3', '4', '5 6']) + strg = " 1 2\t 3 4\t 5 6" + test = LineSplitter('\t')(strg) + assert_equal(test, ['1 2', '3 4', '5 6']) + + def test_other_delimiter(self): + "Test LineSplitter on delimiter" + strg = "1,2,3,4,,5" + test = LineSplitter(',')(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5']) + # + strg = " 1,2,3,4,,5 # test" + test = LineSplitter(',')(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5']) + + # gh-11028 bytes comment/delimiters should get encoded + strg = b" 1,2,3,4,,5 % test" + test = LineSplitter(delimiter=b',', comments=b'%')(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5']) + + def test_constant_fixed_width(self): + "Test LineSplitter w/ fixed-width fields" + strg = " 1 2 3 4 5 # test" + test = LineSplitter(3)(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5', '']) + # + strg = " 1 3 4 5 6# test" + test = LineSplitter(20)(strg) + assert_equal(test, ['1 3 4 5 6']) + # + strg = " 1 3 4 5 6# test" + test = LineSplitter(30)(strg) + assert_equal(test, ['1 3 4 5 6']) + + def test_variable_fixed_width(self): + strg = " 1 3 4 5 6# test" + test = LineSplitter((3, 6, 6, 3))(strg) + assert_equal(test, ['1', '3', '4 5', '6']) + # + strg = " 1 3 4 5 6# test" + test = LineSplitter((6, 6, 9))(strg) + assert_equal(test, ['1', '3 4', '5 6']) + +# ----------------------------------------------------------------------------- + + +class TestNameValidator: + + def test_case_sensitivity(self): + "Test case sensitivity" + names = ['A', 'a', 'b', 'c'] + test = NameValidator().validate(names) + assert_equal(test, ['A', 'a', 'b', 'c']) + test = NameValidator(case_sensitive=False).validate(names) + assert_equal(test, ['A', 'A_1', 'B', 'C']) + test = NameValidator(case_sensitive='upper').validate(names) + assert_equal(test, ['A', 'A_1', 'B', 'C']) + test = NameValidator(case_sensitive='lower').validate(names) + assert_equal(test, ['a', 'a_1', 'b', 'c']) + + # check exceptions + assert_raises(ValueError, NameValidator, case_sensitive='foobar') + + def test_excludelist(self): + "Test excludelist" + names = ['dates', 'data', 'Other Data', 'mask'] + validator = NameValidator(excludelist=['dates', 'data', 'mask']) + test = validator.validate(names) + assert_equal(test, ['dates_', 'data_', 'Other_Data', 'mask_']) + + def test_missing_names(self): + "Test validate missing names" + namelist = ('a', 'b', 'c') + validator = NameValidator() + assert_equal(validator(namelist), ['a', 'b', 'c']) + namelist = ('', 'b', 'c') + assert_equal(validator(namelist), ['f0', 'b', 'c']) + namelist = ('a', 'b', '') + assert_equal(validator(namelist), ['a', 'b', 'f0']) + namelist = ('', 'f0', '') + assert_equal(validator(namelist), ['f1', 'f0', 'f2']) + + def test_validate_nb_names(self): + "Test validate nb names" + namelist = ('a', 'b', 'c') + validator = NameValidator() + assert_equal(validator(namelist, nbfields=1), ('a',)) + assert_equal(validator(namelist, nbfields=5, defaultfmt="g%i"), + ['a', 'b', 'c', 'g0', 'g1']) + + def test_validate_wo_names(self): + "Test validate no names" + namelist = None + validator = NameValidator() + assert_(validator(namelist) is None) + assert_equal(validator(namelist, nbfields=3), ['f0', 'f1', 'f2']) + +# ----------------------------------------------------------------------------- + + +def _bytes_to_date(s): + return date(*time.strptime(s, "%Y-%m-%d")[:3]) + + +class TestStringConverter: + "Test StringConverter" + + def test_creation(self): + "Test creation of a StringConverter" + converter = StringConverter(int, -99999) + assert_equal(converter._status, 1) + assert_equal(converter.default, -99999) + + def test_upgrade(self): + "Tests the upgrade method." + + converter = StringConverter() + assert_equal(converter._status, 0) + + # test int + assert_equal(converter.upgrade('0'), 0) + assert_equal(converter._status, 1) + + # On systems where long defaults to 32-bit, the statuses will be + # offset by one, so we check for this here. + import numpy.core.numeric as nx + status_offset = int(nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize) + + # test int > 2**32 + assert_equal(converter.upgrade('17179869184'), 17179869184) + assert_equal(converter._status, 1 + status_offset) + + # test float + assert_allclose(converter.upgrade('0.'), 0.0) + assert_equal(converter._status, 2 + status_offset) + + # test complex + assert_equal(converter.upgrade('0j'), complex('0j')) + assert_equal(converter._status, 3 + status_offset) + + # test str + # note that the longdouble type has been skipped, so the + # _status increases by 2. Everything should succeed with + # unicode conversion (8). + for s in ['a', b'a']: + res = converter.upgrade(s) + assert_(type(res) is str) + assert_equal(res, 'a') + assert_equal(converter._status, 8 + status_offset) + + def test_missing(self): + "Tests the use of missing values." + converter = StringConverter(missing_values=('missing', + 'missed')) + converter.upgrade('0') + assert_equal(converter('0'), 0) + assert_equal(converter(''), converter.default) + assert_equal(converter('missing'), converter.default) + assert_equal(converter('missed'), converter.default) + try: + converter('miss') + except ValueError: + pass + + def test_upgrademapper(self): + "Tests updatemapper" + dateparser = _bytes_to_date + _original_mapper = StringConverter._mapper[:] + try: + StringConverter.upgrade_mapper(dateparser, date(2000, 1, 1)) + convert = StringConverter(dateparser, date(2000, 1, 1)) + test = convert('2001-01-01') + assert_equal(test, date(2001, 1, 1)) + test = convert('2009-01-01') + assert_equal(test, date(2009, 1, 1)) + test = convert('') + assert_equal(test, date(2000, 1, 1)) + finally: + StringConverter._mapper = _original_mapper + + def test_string_to_object(self): + "Make sure that string-to-object functions are properly recognized" + old_mapper = StringConverter._mapper[:] # copy of list + conv = StringConverter(_bytes_to_date) + assert_equal(conv._mapper, old_mapper) + assert_(hasattr(conv, 'default')) + + def test_keep_default(self): + "Make sure we don't lose an explicit default" + converter = StringConverter(None, missing_values='', + default=-999) + converter.upgrade('3.14159265') + assert_equal(converter.default, -999) + assert_equal(converter.type, np.dtype(float)) + # + converter = StringConverter( + None, missing_values='', default=0) + converter.upgrade('3.14159265') + assert_equal(converter.default, 0) + assert_equal(converter.type, np.dtype(float)) + + def test_keep_default_zero(self): + "Check that we don't lose a default of 0" + converter = StringConverter(int, default=0, + missing_values="N/A") + assert_equal(converter.default, 0) + + def test_keep_missing_values(self): + "Check that we're not losing missing values" + converter = StringConverter(int, default=0, + missing_values="N/A") + assert_equal( + converter.missing_values, {'', 'N/A'}) + + def test_int64_dtype(self): + "Check that int64 integer types can be specified" + converter = StringConverter(np.int64, default=0) + val = "-9223372036854775807" + assert_(converter(val) == -9223372036854775807) + val = "9223372036854775807" + assert_(converter(val) == 9223372036854775807) + + def test_uint64_dtype(self): + "Check that uint64 integer types can be specified" + converter = StringConverter(np.uint64, default=0) + val = "9223372043271415339" + assert_(converter(val) == 9223372043271415339) + + +class TestMiscFunctions: + + def test_has_nested_dtype(self): + "Test has_nested_dtype" + ndtype = np.dtype(float) + assert_equal(has_nested_fields(ndtype), False) + ndtype = np.dtype([('A', '|S3'), ('B', float)]) + assert_equal(has_nested_fields(ndtype), False) + ndtype = np.dtype([('A', int), ('B', [('BA', float), ('BB', '|S1')])]) + assert_equal(has_nested_fields(ndtype), True) + + def test_easy_dtype(self): + "Test ndtype on dtypes" + # Simple case + ndtype = float + assert_equal(easy_dtype(ndtype), np.dtype(float)) + # As string w/o names + ndtype = "i4, f8" + assert_equal(easy_dtype(ndtype), + np.dtype([('f0', "i4"), ('f1', "f8")])) + # As string w/o names but different default format + assert_equal(easy_dtype(ndtype, defaultfmt="field_%03i"), + np.dtype([('field_000', "i4"), ('field_001', "f8")])) + # As string w/ names + ndtype = "i4, f8" + assert_equal(easy_dtype(ndtype, names="a, b"), + np.dtype([('a', "i4"), ('b', "f8")])) + # As string w/ names (too many) + ndtype = "i4, f8" + assert_equal(easy_dtype(ndtype, names="a, b, c"), + np.dtype([('a', "i4"), ('b', "f8")])) + # As string w/ names (not enough) + ndtype = "i4, f8" + assert_equal(easy_dtype(ndtype, names=", b"), + np.dtype([('f0', "i4"), ('b', "f8")])) + # ... (with different default format) + assert_equal(easy_dtype(ndtype, names="a", defaultfmt="f%02i"), + np.dtype([('a', "i4"), ('f00', "f8")])) + # As list of tuples w/o names + ndtype = [('A', int), ('B', float)] + assert_equal(easy_dtype(ndtype), np.dtype([('A', int), ('B', float)])) + # As list of tuples w/ names + assert_equal(easy_dtype(ndtype, names="a,b"), + np.dtype([('a', int), ('b', float)])) + # As list of tuples w/ not enough names + assert_equal(easy_dtype(ndtype, names="a"), + np.dtype([('a', int), ('f0', float)])) + # As list of tuples w/ too many names + assert_equal(easy_dtype(ndtype, names="a,b,c"), + np.dtype([('a', int), ('b', float)])) + # As list of types w/o names + ndtype = (int, float, float) + assert_equal(easy_dtype(ndtype), + np.dtype([('f0', int), ('f1', float), ('f2', float)])) + # As list of types w names + ndtype = (int, float, float) + assert_equal(easy_dtype(ndtype, names="a, b, c"), + np.dtype([('a', int), ('b', float), ('c', float)])) + # As simple dtype w/ names + ndtype = np.dtype(float) + assert_equal(easy_dtype(ndtype, names="a, b, c"), + np.dtype([(_, float) for _ in ('a', 'b', 'c')])) + # As simple dtype w/o names (but multiple fields) + ndtype = np.dtype(float) + assert_equal( + easy_dtype(ndtype, names=['', '', ''], defaultfmt="f%02i"), + np.dtype([(_, float) for _ in ('f00', 'f01', 'f02')])) + + def test_flatten_dtype(self): + "Testing flatten_dtype" + # Standard dtype + dt = np.dtype([("a", "f8"), ("b", "f8")]) + dt_flat = flatten_dtype(dt) + assert_equal(dt_flat, [float, float]) + # Recursive dtype + dt = np.dtype([("a", [("aa", '|S1'), ("ab", '|S2')]), ("b", int)]) + dt_flat = flatten_dtype(dt) + assert_equal(dt_flat, [np.dtype('|S1'), np.dtype('|S2'), int]) + # dtype with shaped fields + dt = np.dtype([("a", (float, 2)), ("b", (int, 3))]) + dt_flat = flatten_dtype(dt) + assert_equal(dt_flat, [float, int]) + dt_flat = flatten_dtype(dt, True) + assert_equal(dt_flat, [float] * 2 + [int] * 3) + # dtype w/ titles + dt = np.dtype([(("a", "A"), "f8"), (("b", "B"), "f8")]) + dt_flat = flatten_dtype(dt) + assert_equal(dt_flat, [float, float]) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test__version.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test__version.py new file mode 100644 index 0000000..e6d41ad --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test__version.py @@ -0,0 +1,64 @@ +"""Tests for the NumpyVersion class. + +""" +from numpy.testing import assert_, assert_raises +from numpy.lib import NumpyVersion + + +def test_main_versions(): + assert_(NumpyVersion('1.8.0') == '1.8.0') + for ver in ['1.9.0', '2.0.0', '1.8.1', '10.0.1']: + assert_(NumpyVersion('1.8.0') < ver) + + for ver in ['1.7.0', '1.7.1', '0.9.9']: + assert_(NumpyVersion('1.8.0') > ver) + + +def test_version_1_point_10(): + # regression test for gh-2998. + assert_(NumpyVersion('1.9.0') < '1.10.0') + assert_(NumpyVersion('1.11.0') < '1.11.1') + assert_(NumpyVersion('1.11.0') == '1.11.0') + assert_(NumpyVersion('1.99.11') < '1.99.12') + + +def test_alpha_beta_rc(): + assert_(NumpyVersion('1.8.0rc1') == '1.8.0rc1') + for ver in ['1.8.0', '1.8.0rc2']: + assert_(NumpyVersion('1.8.0rc1') < ver) + + for ver in ['1.8.0a2', '1.8.0b3', '1.7.2rc4']: + assert_(NumpyVersion('1.8.0rc1') > ver) + + assert_(NumpyVersion('1.8.0b1') > '1.8.0a2') + + +def test_dev_version(): + assert_(NumpyVersion('1.9.0.dev-Unknown') < '1.9.0') + for ver in ['1.9.0', '1.9.0a1', '1.9.0b2', '1.9.0b2.dev-ffffffff']: + assert_(NumpyVersion('1.9.0.dev-f16acvda') < ver) + + assert_(NumpyVersion('1.9.0.dev-f16acvda') == '1.9.0.dev-11111111') + + +def test_dev_a_b_rc_mixed(): + assert_(NumpyVersion('1.9.0a2.dev-f16acvda') == '1.9.0a2.dev-11111111') + assert_(NumpyVersion('1.9.0a2.dev-6acvda54') < '1.9.0a2') + + +def test_dev0_version(): + assert_(NumpyVersion('1.9.0.dev0+Unknown') < '1.9.0') + for ver in ['1.9.0', '1.9.0a1', '1.9.0b2', '1.9.0b2.dev0+ffffffff']: + assert_(NumpyVersion('1.9.0.dev0+f16acvda') < ver) + + assert_(NumpyVersion('1.9.0.dev0+f16acvda') == '1.9.0.dev0+11111111') + + +def test_dev0_a_b_rc_mixed(): + assert_(NumpyVersion('1.9.0a2.dev0+f16acvda') == '1.9.0a2.dev0+11111111') + assert_(NumpyVersion('1.9.0a2.dev0+6acvda54') < '1.9.0a2') + + +def test_raises(): + for ver in ['1.9', '1,9.0', '1.7.x']: + assert_raises(ValueError, NumpyVersion, ver) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_arraypad.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_arraypad.py new file mode 100644 index 0000000..75db592 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_arraypad.py @@ -0,0 +1,1364 @@ +"""Tests for the array padding functions. + +""" +import pytest + +import numpy as np +from numpy.testing import assert_array_equal, assert_allclose, assert_equal +from numpy.lib.arraypad import _as_pairs + + +_numeric_dtypes = ( + np.sctypes["uint"] + + np.sctypes["int"] + + np.sctypes["float"] + + np.sctypes["complex"] +) +_all_modes = { + 'constant': {'constant_values': 0}, + 'edge': {}, + 'linear_ramp': {'end_values': 0}, + 'maximum': {'stat_length': None}, + 'mean': {'stat_length': None}, + 'median': {'stat_length': None}, + 'minimum': {'stat_length': None}, + 'reflect': {'reflect_type': 'even'}, + 'symmetric': {'reflect_type': 'even'}, + 'wrap': {}, + 'empty': {} +} + + +class TestAsPairs: + def test_single_value(self): + """Test casting for a single value.""" + expected = np.array([[3, 3]] * 10) + for x in (3, [3], [[3]]): + result = _as_pairs(x, 10) + assert_equal(result, expected) + # Test with dtype=object + obj = object() + assert_equal( + _as_pairs(obj, 10), + np.array([[obj, obj]] * 10) + ) + + def test_two_values(self): + """Test proper casting for two different values.""" + # Broadcasting in the first dimension with numbers + expected = np.array([[3, 4]] * 10) + for x in ([3, 4], [[3, 4]]): + result = _as_pairs(x, 10) + assert_equal(result, expected) + # and with dtype=object + obj = object() + assert_equal( + _as_pairs(["a", obj], 10), + np.array([["a", obj]] * 10) + ) + + # Broadcasting in the second / last dimension with numbers + assert_equal( + _as_pairs([[3], [4]], 2), + np.array([[3, 3], [4, 4]]) + ) + # and with dtype=object + assert_equal( + _as_pairs([["a"], [obj]], 2), + np.array([["a", "a"], [obj, obj]]) + ) + + def test_with_none(self): + expected = ((None, None), (None, None), (None, None)) + assert_equal( + _as_pairs(None, 3, as_index=False), + expected + ) + assert_equal( + _as_pairs(None, 3, as_index=True), + expected + ) + + def test_pass_through(self): + """Test if `x` already matching desired output are passed through.""" + expected = np.arange(12).reshape((6, 2)) + assert_equal( + _as_pairs(expected, 6), + expected + ) + + def test_as_index(self): + """Test results if `as_index=True`.""" + assert_equal( + _as_pairs([2.6, 3.3], 10, as_index=True), + np.array([[3, 3]] * 10, dtype=np.intp) + ) + assert_equal( + _as_pairs([2.6, 4.49], 10, as_index=True), + np.array([[3, 4]] * 10, dtype=np.intp) + ) + for x in (-3, [-3], [[-3]], [-3, 4], [3, -4], [[-3, 4]], [[4, -3]], + [[1, 2]] * 9 + [[1, -2]]): + with pytest.raises(ValueError, match="negative values"): + _as_pairs(x, 10, as_index=True) + + def test_exceptions(self): + """Ensure faulty usage is discovered.""" + with pytest.raises(ValueError, match="more dimensions than allowed"): + _as_pairs([[[3]]], 10) + with pytest.raises(ValueError, match="could not be broadcast"): + _as_pairs([[1, 2], [3, 4]], 3) + with pytest.raises(ValueError, match="could not be broadcast"): + _as_pairs(np.ones((2, 3)), 3) + + +class TestConditionalShortcuts: + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_zero_padding_shortcuts(self, mode): + test = np.arange(120).reshape(4, 5, 6) + pad_amt = [(0, 0) for _ in test.shape] + assert_array_equal(test, np.pad(test, pad_amt, mode=mode)) + + @pytest.mark.parametrize("mode", ['maximum', 'mean', 'median', 'minimum',]) + def test_shallow_statistic_range(self, mode): + test = np.arange(120).reshape(4, 5, 6) + pad_amt = [(1, 1) for _ in test.shape] + assert_array_equal(np.pad(test, pad_amt, mode='edge'), + np.pad(test, pad_amt, mode=mode, stat_length=1)) + + @pytest.mark.parametrize("mode", ['maximum', 'mean', 'median', 'minimum',]) + def test_clip_statistic_range(self, mode): + test = np.arange(30).reshape(5, 6) + pad_amt = [(3, 3) for _ in test.shape] + assert_array_equal(np.pad(test, pad_amt, mode=mode), + np.pad(test, pad_amt, mode=mode, stat_length=30)) + + +class TestStatistic: + def test_check_mean_stat_length(self): + a = np.arange(100).astype('f') + a = np.pad(a, ((25, 20), ), 'mean', stat_length=((2, 3), )) + b = np.array( + [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, + 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, + 0.5, 0.5, 0.5, 0.5, 0.5, + + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., + 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., + 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., + 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., + 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., + 60., 61., 62., 63., 64., 65., 66., 67., 68., 69., + 70., 71., 72., 73., 74., 75., 76., 77., 78., 79., + 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., + 90., 91., 92., 93., 94., 95., 96., 97., 98., 99., + + 98., 98., 98., 98., 98., 98., 98., 98., 98., 98., + 98., 98., 98., 98., 98., 98., 98., 98., 98., 98. + ]) + assert_array_equal(a, b) + + def test_check_maximum_1(self): + a = np.arange(100) + a = np.pad(a, (25, 20), 'maximum') + b = np.array( + [99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99] + ) + assert_array_equal(a, b) + + def test_check_maximum_2(self): + a = np.arange(100) + 1 + a = np.pad(a, (25, 20), 'maximum') + b = np.array( + [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, + + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, + + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100] + ) + assert_array_equal(a, b) + + def test_check_maximum_stat_length(self): + a = np.arange(100) + 1 + a = np.pad(a, (25, 20), 'maximum', stat_length=10) + b = np.array( + [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, + + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, + + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100] + ) + assert_array_equal(a, b) + + def test_check_minimum_1(self): + a = np.arange(100) + a = np.pad(a, (25, 20), 'minimum') + b = np.array( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ) + assert_array_equal(a, b) + + def test_check_minimum_2(self): + a = np.arange(100) + 2 + a = np.pad(a, (25, 20), 'minimum') + b = np.array( + [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, + + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, + 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, + 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, + + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2] + ) + assert_array_equal(a, b) + + def test_check_minimum_stat_length(self): + a = np.arange(100) + 1 + a = np.pad(a, (25, 20), 'minimum', stat_length=10) + b = np.array( + [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, + + 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, + 91, 91, 91, 91, 91, 91, 91, 91, 91, 91] + ) + assert_array_equal(a, b) + + def test_check_median(self): + a = np.arange(100).astype('f') + a = np.pad(a, (25, 20), 'median') + b = np.array( + [49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, + 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, + 49.5, 49.5, 49.5, 49.5, 49.5, + + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., + 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., + 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., + 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., + 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., + 60., 61., 62., 63., 64., 65., 66., 67., 68., 69., + 70., 71., 72., 73., 74., 75., 76., 77., 78., 79., + 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., + 90., 91., 92., 93., 94., 95., 96., 97., 98., 99., + + 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, + 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5] + ) + assert_array_equal(a, b) + + def test_check_median_01(self): + a = np.array([[3, 1, 4], [4, 5, 9], [9, 8, 2]]) + a = np.pad(a, 1, 'median') + b = np.array( + [[4, 4, 5, 4, 4], + + [3, 3, 1, 4, 3], + [5, 4, 5, 9, 5], + [8, 9, 8, 2, 8], + + [4, 4, 5, 4, 4]] + ) + assert_array_equal(a, b) + + def test_check_median_02(self): + a = np.array([[3, 1, 4], [4, 5, 9], [9, 8, 2]]) + a = np.pad(a.T, 1, 'median').T + b = np.array( + [[5, 4, 5, 4, 5], + + [3, 3, 1, 4, 3], + [5, 4, 5, 9, 5], + [8, 9, 8, 2, 8], + + [5, 4, 5, 4, 5]] + ) + assert_array_equal(a, b) + + def test_check_median_stat_length(self): + a = np.arange(100).astype('f') + a[1] = 2. + a[97] = 96. + a = np.pad(a, (25, 20), 'median', stat_length=(3, 5)) + b = np.array( + [ 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., + 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., + 2., 2., 2., 2., 2., + + 0., 2., 2., 3., 4., 5., 6., 7., 8., 9., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., + 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., + 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., + 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., + 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., + 60., 61., 62., 63., 64., 65., 66., 67., 68., 69., + 70., 71., 72., 73., 74., 75., 76., 77., 78., 79., + 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., + 90., 91., 92., 93., 94., 95., 96., 96., 98., 99., + + 96., 96., 96., 96., 96., 96., 96., 96., 96., 96., + 96., 96., 96., 96., 96., 96., 96., 96., 96., 96.] + ) + assert_array_equal(a, b) + + def test_check_mean_shape_one(self): + a = [[4, 5, 6]] + a = np.pad(a, (5, 7), 'mean', stat_length=2) + b = np.array( + [[4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6]] + ) + assert_array_equal(a, b) + + def test_check_mean_2(self): + a = np.arange(100).astype('f') + a = np.pad(a, (25, 20), 'mean') + b = np.array( + [49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, + 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, + 49.5, 49.5, 49.5, 49.5, 49.5, + + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., + 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., + 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., + 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., + 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., + 60., 61., 62., 63., 64., 65., 66., 67., 68., 69., + 70., 71., 72., 73., 74., 75., 76., 77., 78., 79., + 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., + 90., 91., 92., 93., 94., 95., 96., 97., 98., 99., + + 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, + 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5] + ) + assert_array_equal(a, b) + + @pytest.mark.parametrize("mode", [ + "mean", + "median", + "minimum", + "maximum" + ]) + def test_same_prepend_append(self, mode): + """ Test that appended and prepended values are equal """ + # This test is constructed to trigger floating point rounding errors in + # a way that caused gh-11216 for mode=='mean' + a = np.array([-1, 2, -1]) + np.array([0, 1e-12, 0], dtype=np.float64) + a = np.pad(a, (1, 1), mode) + assert_equal(a[0], a[-1]) + + @pytest.mark.parametrize("mode", ["mean", "median", "minimum", "maximum"]) + @pytest.mark.parametrize( + "stat_length", [-2, (-2,), (3, -1), ((5, 2), (-2, 3)), ((-4,), (2,))] + ) + def test_check_negative_stat_length(self, mode, stat_length): + arr = np.arange(30).reshape((6, 5)) + match = "index can't contain negative values" + with pytest.raises(ValueError, match=match): + np.pad(arr, 2, mode, stat_length=stat_length) + + def test_simple_stat_length(self): + a = np.arange(30) + a = np.reshape(a, (6, 5)) + a = np.pad(a, ((2, 3), (3, 2)), mode='mean', stat_length=(3,)) + b = np.array( + [[6, 6, 6, 5, 6, 7, 8, 9, 8, 8], + [6, 6, 6, 5, 6, 7, 8, 9, 8, 8], + + [1, 1, 1, 0, 1, 2, 3, 4, 3, 3], + [6, 6, 6, 5, 6, 7, 8, 9, 8, 8], + [11, 11, 11, 10, 11, 12, 13, 14, 13, 13], + [16, 16, 16, 15, 16, 17, 18, 19, 18, 18], + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], + [26, 26, 26, 25, 26, 27, 28, 29, 28, 28], + + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23]] + ) + assert_array_equal(a, b) + + @pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning") + @pytest.mark.filterwarnings( + "ignore:invalid value encountered in (true_divide|double_scalars):" + "RuntimeWarning" + ) + @pytest.mark.parametrize("mode", ["mean", "median"]) + def test_zero_stat_length_valid(self, mode): + arr = np.pad([1., 2.], (1, 2), mode, stat_length=0) + expected = np.array([np.nan, 1., 2., np.nan, np.nan]) + assert_equal(arr, expected) + + @pytest.mark.parametrize("mode", ["minimum", "maximum"]) + def test_zero_stat_length_invalid(self, mode): + match = "stat_length of 0 yields no value for padding" + with pytest.raises(ValueError, match=match): + np.pad([1., 2.], 0, mode, stat_length=0) + with pytest.raises(ValueError, match=match): + np.pad([1., 2.], 0, mode, stat_length=(1, 0)) + with pytest.raises(ValueError, match=match): + np.pad([1., 2.], 1, mode, stat_length=0) + with pytest.raises(ValueError, match=match): + np.pad([1., 2.], 1, mode, stat_length=(1, 0)) + + +class TestConstant: + def test_check_constant(self): + a = np.arange(100) + a = np.pad(a, (25, 20), 'constant', constant_values=(10, 20)) + b = np.array( + [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20] + ) + assert_array_equal(a, b) + + def test_check_constant_zeros(self): + a = np.arange(100) + a = np.pad(a, (25, 20), 'constant') + b = np.array( + [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ) + assert_array_equal(a, b) + + def test_check_constant_float(self): + # If input array is int, but constant_values are float, the dtype of + # the array to be padded is kept + arr = np.arange(30).reshape(5, 6) + test = np.pad(arr, (1, 2), mode='constant', + constant_values=1.1) + expected = np.array( + [[ 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [ 1, 0, 1, 2, 3, 4, 5, 1, 1], + [ 1, 6, 7, 8, 9, 10, 11, 1, 1], + [ 1, 12, 13, 14, 15, 16, 17, 1, 1], + [ 1, 18, 19, 20, 21, 22, 23, 1, 1], + [ 1, 24, 25, 26, 27, 28, 29, 1, 1], + + [ 1, 1, 1, 1, 1, 1, 1, 1, 1], + [ 1, 1, 1, 1, 1, 1, 1, 1, 1]] + ) + assert_allclose(test, expected) + + def test_check_constant_float2(self): + # If input array is float, and constant_values are float, the dtype of + # the array to be padded is kept - here retaining the float constants + arr = np.arange(30).reshape(5, 6) + arr_float = arr.astype(np.float64) + test = np.pad(arr_float, ((1, 2), (1, 2)), mode='constant', + constant_values=1.1) + expected = np.array( + [[ 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1], + + [ 1.1, 0. , 1. , 2. , 3. , 4. , 5. , 1.1, 1.1], + [ 1.1, 6. , 7. , 8. , 9. , 10. , 11. , 1.1, 1.1], + [ 1.1, 12. , 13. , 14. , 15. , 16. , 17. , 1.1, 1.1], + [ 1.1, 18. , 19. , 20. , 21. , 22. , 23. , 1.1, 1.1], + [ 1.1, 24. , 25. , 26. , 27. , 28. , 29. , 1.1, 1.1], + + [ 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1], + [ 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1]] + ) + assert_allclose(test, expected) + + def test_check_constant_float3(self): + a = np.arange(100, dtype=float) + a = np.pad(a, (25, 20), 'constant', constant_values=(-1.1, -1.2)) + b = np.array( + [-1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, + -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, + -1.1, -1.1, -1.1, -1.1, -1.1, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, + -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2] + ) + assert_allclose(a, b) + + def test_check_constant_odd_pad_amount(self): + arr = np.arange(30).reshape(5, 6) + test = np.pad(arr, ((1,), (2,)), mode='constant', + constant_values=3) + expected = np.array( + [[ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], + + [ 3, 3, 0, 1, 2, 3, 4, 5, 3, 3], + [ 3, 3, 6, 7, 8, 9, 10, 11, 3, 3], + [ 3, 3, 12, 13, 14, 15, 16, 17, 3, 3], + [ 3, 3, 18, 19, 20, 21, 22, 23, 3, 3], + [ 3, 3, 24, 25, 26, 27, 28, 29, 3, 3], + + [ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]] + ) + assert_allclose(test, expected) + + def test_check_constant_pad_2d(self): + arr = np.arange(4).reshape(2, 2) + test = np.lib.pad(arr, ((1, 2), (1, 3)), mode='constant', + constant_values=((1, 2), (3, 4))) + expected = np.array( + [[3, 1, 1, 4, 4, 4], + [3, 0, 1, 4, 4, 4], + [3, 2, 3, 4, 4, 4], + [3, 2, 2, 4, 4, 4], + [3, 2, 2, 4, 4, 4]] + ) + assert_allclose(test, expected) + + def test_check_large_integers(self): + uint64_max = 2 ** 64 - 1 + arr = np.full(5, uint64_max, dtype=np.uint64) + test = np.pad(arr, 1, mode="constant", constant_values=arr.min()) + expected = np.full(7, uint64_max, dtype=np.uint64) + assert_array_equal(test, expected) + + int64_max = 2 ** 63 - 1 + arr = np.full(5, int64_max, dtype=np.int64) + test = np.pad(arr, 1, mode="constant", constant_values=arr.min()) + expected = np.full(7, int64_max, dtype=np.int64) + assert_array_equal(test, expected) + + def test_check_object_array(self): + arr = np.empty(1, dtype=object) + obj_a = object() + arr[0] = obj_a + obj_b = object() + obj_c = object() + arr = np.pad(arr, pad_width=1, mode='constant', + constant_values=(obj_b, obj_c)) + + expected = np.empty((3,), dtype=object) + expected[0] = obj_b + expected[1] = obj_a + expected[2] = obj_c + + assert_array_equal(arr, expected) + + def test_pad_empty_dimension(self): + arr = np.zeros((3, 0, 2)) + result = np.pad(arr, [(0,), (2,), (1,)], mode="constant") + assert result.shape == (3, 4, 4) + + +class TestLinearRamp: + def test_check_simple(self): + a = np.arange(100).astype('f') + a = np.pad(a, (25, 20), 'linear_ramp', end_values=(4, 5)) + b = np.array( + [4.00, 3.84, 3.68, 3.52, 3.36, 3.20, 3.04, 2.88, 2.72, 2.56, + 2.40, 2.24, 2.08, 1.92, 1.76, 1.60, 1.44, 1.28, 1.12, 0.96, + 0.80, 0.64, 0.48, 0.32, 0.16, + + 0.00, 1.00, 2.00, 3.00, 4.00, 5.00, 6.00, 7.00, 8.00, 9.00, + 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, + 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, + 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, + 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, + 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, + 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, + 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, + 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, + 90.0, 91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, + + 94.3, 89.6, 84.9, 80.2, 75.5, 70.8, 66.1, 61.4, 56.7, 52.0, + 47.3, 42.6, 37.9, 33.2, 28.5, 23.8, 19.1, 14.4, 9.7, 5.] + ) + assert_allclose(a, b, rtol=1e-5, atol=1e-5) + + def test_check_2d(self): + arr = np.arange(20).reshape(4, 5).astype(np.float64) + test = np.pad(arr, (2, 2), mode='linear_ramp', end_values=(0, 0)) + expected = np.array( + [[0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0.5, 1., 1.5, 2., 1., 0.], + [0., 0., 0., 1., 2., 3., 4., 2., 0.], + [0., 2.5, 5., 6., 7., 8., 9., 4.5, 0.], + [0., 5., 10., 11., 12., 13., 14., 7., 0.], + [0., 7.5, 15., 16., 17., 18., 19., 9.5, 0.], + [0., 3.75, 7.5, 8., 8.5, 9., 9.5, 4.75, 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0.]]) + assert_allclose(test, expected) + + @pytest.mark.xfail(exceptions=(AssertionError,)) + def test_object_array(self): + from fractions import Fraction + arr = np.array([Fraction(1, 2), Fraction(-1, 2)]) + actual = np.pad(arr, (2, 3), mode='linear_ramp', end_values=0) + + # deliberately chosen to have a non-power-of-2 denominator such that + # rounding to floats causes a failure. + expected = np.array([ + Fraction( 0, 12), + Fraction( 3, 12), + Fraction( 6, 12), + Fraction(-6, 12), + Fraction(-4, 12), + Fraction(-2, 12), + Fraction(-0, 12), + ]) + assert_equal(actual, expected) + + def test_end_values(self): + """Ensure that end values are exact.""" + a = np.pad(np.ones(10).reshape(2, 5), (223, 123), mode="linear_ramp") + assert_equal(a[:, 0], 0.) + assert_equal(a[:, -1], 0.) + assert_equal(a[0, :], 0.) + assert_equal(a[-1, :], 0.) + + @pytest.mark.parametrize("dtype", _numeric_dtypes) + def test_negative_difference(self, dtype): + """ + Check correct behavior of unsigned dtypes if there is a negative + difference between the edge to pad and `end_values`. Check both cases + to be independent of implementation. Test behavior for all other dtypes + in case dtype casting interferes with complex dtypes. See gh-14191. + """ + x = np.array([3], dtype=dtype) + result = np.pad(x, 3, mode="linear_ramp", end_values=0) + expected = np.array([0, 1, 2, 3, 2, 1, 0], dtype=dtype) + assert_equal(result, expected) + + x = np.array([0], dtype=dtype) + result = np.pad(x, 3, mode="linear_ramp", end_values=3) + expected = np.array([3, 2, 1, 0, 1, 2, 3], dtype=dtype) + assert_equal(result, expected) + + +class TestReflect: + def test_check_simple(self): + a = np.arange(100) + a = np.pad(a, (25, 20), 'reflect') + b = np.array( + [25, 24, 23, 22, 21, 20, 19, 18, 17, 16, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, + 5, 4, 3, 2, 1, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, + 88, 87, 86, 85, 84, 83, 82, 81, 80, 79] + ) + assert_array_equal(a, b) + + def test_check_odd_method(self): + a = np.arange(100) + a = np.pad(a, (25, 20), 'reflect', reflect_type='odd') + b = np.array( + [-25, -24, -23, -22, -21, -20, -19, -18, -17, -16, + -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, + -5, -4, -3, -2, -1, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119] + ) + assert_array_equal(a, b) + + def test_check_large_pad(self): + a = [[4, 5, 6], [6, 7, 8]] + a = np.pad(a, (5, 7), 'reflect') + b = np.array( + [[7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], + + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], + + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5]] + ) + assert_array_equal(a, b) + + def test_check_shape(self): + a = [[4, 5, 6]] + a = np.pad(a, (5, 7), 'reflect') + b = np.array( + [[5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5]] + ) + assert_array_equal(a, b) + + def test_check_01(self): + a = np.pad([1, 2, 3], 2, 'reflect') + b = np.array([3, 2, 1, 2, 3, 2, 1]) + assert_array_equal(a, b) + + def test_check_02(self): + a = np.pad([1, 2, 3], 3, 'reflect') + b = np.array([2, 3, 2, 1, 2, 3, 2, 1, 2]) + assert_array_equal(a, b) + + def test_check_03(self): + a = np.pad([1, 2, 3], 4, 'reflect') + b = np.array([1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3]) + assert_array_equal(a, b) + + +class TestEmptyArray: + """Check how padding behaves on arrays with an empty dimension.""" + + @pytest.mark.parametrize( + # Keep parametrization ordered, otherwise pytest-xdist might believe + # that different tests were collected during parallelization + "mode", sorted(_all_modes.keys() - {"constant", "empty"}) + ) + def test_pad_empty_dimension(self, mode): + match = ("can't extend empty axis 0 using modes other than 'constant' " + "or 'empty'") + with pytest.raises(ValueError, match=match): + np.pad([], 4, mode=mode) + with pytest.raises(ValueError, match=match): + np.pad(np.ndarray(0), 4, mode=mode) + with pytest.raises(ValueError, match=match): + np.pad(np.zeros((0, 3)), ((1,), (0,)), mode=mode) + + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_pad_non_empty_dimension(self, mode): + result = np.pad(np.ones((2, 0, 2)), ((3,), (0,), (1,)), mode=mode) + assert result.shape == (8, 0, 4) + + +class TestSymmetric: + def test_check_simple(self): + a = np.arange(100) + a = np.pad(a, (25, 20), 'symmetric') + b = np.array( + [24, 23, 22, 21, 20, 19, 18, 17, 16, 15, + 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, + 4, 3, 2, 1, 0, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, + 89, 88, 87, 86, 85, 84, 83, 82, 81, 80] + ) + assert_array_equal(a, b) + + def test_check_odd_method(self): + a = np.arange(100) + a = np.pad(a, (25, 20), 'symmetric', reflect_type='odd') + b = np.array( + [-24, -23, -22, -21, -20, -19, -18, -17, -16, -15, + -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, + -4, -3, -2, -1, 0, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, + 109, 110, 111, 112, 113, 114, 115, 116, 117, 118] + ) + assert_array_equal(a, b) + + def test_check_large_pad(self): + a = [[4, 5, 6], [6, 7, 8]] + a = np.pad(a, (5, 7), 'symmetric') + b = np.array( + [[5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6]] + ) + + assert_array_equal(a, b) + + def test_check_large_pad_odd(self): + a = [[4, 5, 6], [6, 7, 8]] + a = np.pad(a, (5, 7), 'symmetric', reflect_type='odd') + b = np.array( + [[-3, -2, -2, -1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6], + [-3, -2, -2, -1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6], + [-1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8], + [-1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8], + [ 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10], + + [ 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10], + [ 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12], + + [ 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12], + [ 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14, 14], + [ 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14, 14], + [ 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14, 14, 15, 16, 16], + [ 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14, 14, 15, 16, 16], + [ 9, 10, 10, 11, 12, 12, 13, 14, 14, 15, 16, 16, 17, 18, 18], + [ 9, 10, 10, 11, 12, 12, 13, 14, 14, 15, 16, 16, 17, 18, 18]] + ) + assert_array_equal(a, b) + + def test_check_shape(self): + a = [[4, 5, 6]] + a = np.pad(a, (5, 7), 'symmetric') + b = np.array( + [[5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6]] + ) + assert_array_equal(a, b) + + def test_check_01(self): + a = np.pad([1, 2, 3], 2, 'symmetric') + b = np.array([2, 1, 1, 2, 3, 3, 2]) + assert_array_equal(a, b) + + def test_check_02(self): + a = np.pad([1, 2, 3], 3, 'symmetric') + b = np.array([3, 2, 1, 1, 2, 3, 3, 2, 1]) + assert_array_equal(a, b) + + def test_check_03(self): + a = np.pad([1, 2, 3], 6, 'symmetric') + b = np.array([1, 2, 3, 3, 2, 1, 1, 2, 3, 3, 2, 1, 1, 2, 3]) + assert_array_equal(a, b) + + +class TestWrap: + def test_check_simple(self): + a = np.arange(100) + a = np.pad(a, (25, 20), 'wrap') + b = np.array( + [75, 76, 77, 78, 79, 80, 81, 82, 83, 84, + 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, + 95, 96, 97, 98, 99, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + ) + assert_array_equal(a, b) + + def test_check_large_pad(self): + a = np.arange(12) + a = np.reshape(a, (3, 4)) + a = np.pad(a, (10, 12), 'wrap') + b = np.array( + [[10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11]] + ) + assert_array_equal(a, b) + + def test_check_01(self): + a = np.pad([1, 2, 3], 3, 'wrap') + b = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]) + assert_array_equal(a, b) + + def test_check_02(self): + a = np.pad([1, 2, 3], 4, 'wrap') + b = np.array([3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1]) + assert_array_equal(a, b) + + def test_pad_with_zero(self): + a = np.ones((3, 5)) + b = np.pad(a, (0, 5), mode="wrap") + assert_array_equal(a, b[:-5, :-5]) + + def test_repeated_wrapping(self): + """ + Check wrapping on each side individually if the wrapped area is longer + than the original array. + """ + a = np.arange(5) + b = np.pad(a, (12, 0), mode="wrap") + assert_array_equal(np.r_[a, a, a, a][3:], b) + + a = np.arange(5) + b = np.pad(a, (0, 12), mode="wrap") + assert_array_equal(np.r_[a, a, a, a][:-3], b) + + +class TestEdge: + def test_check_simple(self): + a = np.arange(12) + a = np.reshape(a, (4, 3)) + a = np.pad(a, ((2, 3), (3, 2)), 'edge') + b = np.array( + [[0, 0, 0, 0, 1, 2, 2, 2], + [0, 0, 0, 0, 1, 2, 2, 2], + + [0, 0, 0, 0, 1, 2, 2, 2], + [3, 3, 3, 3, 4, 5, 5, 5], + [6, 6, 6, 6, 7, 8, 8, 8], + [9, 9, 9, 9, 10, 11, 11, 11], + + [9, 9, 9, 9, 10, 11, 11, 11], + [9, 9, 9, 9, 10, 11, 11, 11], + [9, 9, 9, 9, 10, 11, 11, 11]] + ) + assert_array_equal(a, b) + + def test_check_width_shape_1_2(self): + # Check a pad_width of the form ((1, 2),). + # Regression test for issue gh-7808. + a = np.array([1, 2, 3]) + padded = np.pad(a, ((1, 2),), 'edge') + expected = np.array([1, 1, 2, 3, 3, 3]) + assert_array_equal(padded, expected) + + a = np.array([[1, 2, 3], [4, 5, 6]]) + padded = np.pad(a, ((1, 2),), 'edge') + expected = np.pad(a, ((1, 2), (1, 2)), 'edge') + assert_array_equal(padded, expected) + + a = np.arange(24).reshape(2, 3, 4) + padded = np.pad(a, ((1, 2),), 'edge') + expected = np.pad(a, ((1, 2), (1, 2), (1, 2)), 'edge') + assert_array_equal(padded, expected) + + +class TestEmpty: + def test_simple(self): + arr = np.arange(24).reshape(4, 6) + result = np.pad(arr, [(2, 3), (3, 1)], mode="empty") + assert result.shape == (9, 10) + assert_equal(arr, result[2:-3, 3:-1]) + + def test_pad_empty_dimension(self): + arr = np.zeros((3, 0, 2)) + result = np.pad(arr, [(0,), (2,), (1,)], mode="empty") + assert result.shape == (3, 4, 4) + + +def test_legacy_vector_functionality(): + def _padwithtens(vector, pad_width, iaxis, kwargs): + vector[:pad_width[0]] = 10 + vector[-pad_width[1]:] = 10 + + a = np.arange(6).reshape(2, 3) + a = np.pad(a, 2, _padwithtens) + b = np.array( + [[10, 10, 10, 10, 10, 10, 10], + [10, 10, 10, 10, 10, 10, 10], + + [10, 10, 0, 1, 2, 10, 10], + [10, 10, 3, 4, 5, 10, 10], + + [10, 10, 10, 10, 10, 10, 10], + [10, 10, 10, 10, 10, 10, 10]] + ) + assert_array_equal(a, b) + + +def test_unicode_mode(): + a = np.pad([1], 2, mode=u'constant') + b = np.array([0, 0, 1, 0, 0]) + assert_array_equal(a, b) + + +@pytest.mark.parametrize("mode", ["edge", "symmetric", "reflect", "wrap"]) +def test_object_input(mode): + # Regression test for issue gh-11395. + a = np.full((4, 3), fill_value=None) + pad_amt = ((2, 3), (3, 2)) + b = np.full((9, 8), fill_value=None) + assert_array_equal(np.pad(a, pad_amt, mode=mode), b) + + +class TestPadWidth: + @pytest.mark.parametrize("pad_width", [ + (4, 5, 6, 7), + ((1,), (2,), (3,)), + ((1, 2), (3, 4), (5, 6)), + ((3, 4, 5), (0, 1, 2)), + ]) + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_misshaped_pad_width(self, pad_width, mode): + arr = np.arange(30).reshape((6, 5)) + match = "operands could not be broadcast together" + with pytest.raises(ValueError, match=match): + np.pad(arr, pad_width, mode) + + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_misshaped_pad_width_2(self, mode): + arr = np.arange(30).reshape((6, 5)) + match = ("input operand has more dimensions than allowed by the axis " + "remapping") + with pytest.raises(ValueError, match=match): + np.pad(arr, (((3,), (4,), (5,)), ((0,), (1,), (2,))), mode) + + @pytest.mark.parametrize( + "pad_width", [-2, (-2,), (3, -1), ((5, 2), (-2, 3)), ((-4,), (2,))]) + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_negative_pad_width(self, pad_width, mode): + arr = np.arange(30).reshape((6, 5)) + match = "index can't contain negative values" + with pytest.raises(ValueError, match=match): + np.pad(arr, pad_width, mode) + + @pytest.mark.parametrize("pad_width, dtype", [ + ("3", None), + ("word", None), + (None, None), + (object(), None), + (3.4, None), + (((2, 3, 4), (3, 2)), object), + (complex(1, -1), None), + (((-2.1, 3), (3, 2)), None), + ]) + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_bad_type(self, pad_width, dtype, mode): + arr = np.arange(30).reshape((6, 5)) + match = "`pad_width` must be of integral type." + if dtype is not None: + # avoid DeprecationWarning when not specifying dtype + with pytest.raises(TypeError, match=match): + np.pad(arr, np.array(pad_width, dtype=dtype), mode) + else: + with pytest.raises(TypeError, match=match): + np.pad(arr, pad_width, mode) + with pytest.raises(TypeError, match=match): + np.pad(arr, np.array(pad_width), mode) + + def test_pad_width_as_ndarray(self): + a = np.arange(12) + a = np.reshape(a, (4, 3)) + a = np.pad(a, np.array(((2, 3), (3, 2))), 'edge') + b = np.array( + [[0, 0, 0, 0, 1, 2, 2, 2], + [0, 0, 0, 0, 1, 2, 2, 2], + + [0, 0, 0, 0, 1, 2, 2, 2], + [3, 3, 3, 3, 4, 5, 5, 5], + [6, 6, 6, 6, 7, 8, 8, 8], + [9, 9, 9, 9, 10, 11, 11, 11], + + [9, 9, 9, 9, 10, 11, 11, 11], + [9, 9, 9, 9, 10, 11, 11, 11], + [9, 9, 9, 9, 10, 11, 11, 11]] + ) + assert_array_equal(a, b) + + @pytest.mark.parametrize("pad_width", [0, (0, 0), ((0, 0), (0, 0))]) + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_zero_pad_width(self, pad_width, mode): + arr = np.arange(30).reshape(6, 5) + assert_array_equal(arr, np.pad(arr, pad_width, mode=mode)) + + +@pytest.mark.parametrize("mode", _all_modes.keys()) +def test_kwargs(mode): + """Test behavior of pad's kwargs for the given mode.""" + allowed = _all_modes[mode] + not_allowed = {} + for kwargs in _all_modes.values(): + if kwargs != allowed: + not_allowed.update(kwargs) + # Test if allowed keyword arguments pass + np.pad([1, 2, 3], 1, mode, **allowed) + # Test if prohibited keyword arguments of other modes raise an error + for key, value in not_allowed.items(): + match = "unsupported keyword arguments for mode '{}'".format(mode) + with pytest.raises(ValueError, match=match): + np.pad([1, 2, 3], 1, mode, **{key: value}) + + +def test_constant_zero_default(): + arr = np.array([1, 1]) + assert_array_equal(np.pad(arr, 2), [0, 0, 1, 1, 0, 0]) + + +@pytest.mark.parametrize("mode", [1, "const", object(), None, True, False]) +def test_unsupported_mode(mode): + match= "mode '{}' is not supported".format(mode) + with pytest.raises(ValueError, match=match): + np.pad([1, 2, 3], 4, mode=mode) + + +@pytest.mark.parametrize("mode", _all_modes.keys()) +def test_non_contiguous_array(mode): + arr = np.arange(24).reshape(4, 6)[::2, ::2] + result = np.pad(arr, (2, 3), mode) + assert result.shape == (7, 8) + assert_equal(result[2:-3, 2:-3], arr) + + +@pytest.mark.parametrize("mode", _all_modes.keys()) +def test_memory_layout_persistence(mode): + """Test if C and F order is preserved for all pad modes.""" + x = np.ones((5, 10), order='C') + assert np.pad(x, 5, mode).flags["C_CONTIGUOUS"] + x = np.ones((5, 10), order='F') + assert np.pad(x, 5, mode).flags["F_CONTIGUOUS"] + + +@pytest.mark.parametrize("dtype", _numeric_dtypes) +@pytest.mark.parametrize("mode", _all_modes.keys()) +def test_dtype_persistence(dtype, mode): + arr = np.zeros((3, 2, 1), dtype=dtype) + result = np.pad(arr, 1, mode=mode) + assert result.dtype == dtype diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_arraysetops.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_arraysetops.py new file mode 100644 index 0000000..13385cd --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_arraysetops.py @@ -0,0 +1,767 @@ +"""Test functions for 1D array set operations. + +""" +import numpy as np + +from numpy.testing import (assert_array_equal, assert_equal, + assert_raises, assert_raises_regex) +from numpy.lib.arraysetops import ( + ediff1d, intersect1d, setxor1d, union1d, setdiff1d, unique, in1d, isin + ) +import pytest + + +class TestSetOps: + + def test_intersect1d(self): + # unique inputs + a = np.array([5, 7, 1, 2]) + b = np.array([2, 4, 3, 1, 5]) + + ec = np.array([1, 2, 5]) + c = intersect1d(a, b, assume_unique=True) + assert_array_equal(c, ec) + + # non-unique inputs + a = np.array([5, 5, 7, 1, 2]) + b = np.array([2, 1, 4, 3, 3, 1, 5]) + + ed = np.array([1, 2, 5]) + c = intersect1d(a, b) + assert_array_equal(c, ed) + assert_array_equal([], intersect1d([], [])) + + def test_intersect1d_array_like(self): + # See gh-11772 + class Test: + def __array__(self): + return np.arange(3) + + a = Test() + res = intersect1d(a, a) + assert_array_equal(res, a) + res = intersect1d([1, 2, 3], [1, 2, 3]) + assert_array_equal(res, [1, 2, 3]) + + def test_intersect1d_indices(self): + # unique inputs + a = np.array([1, 2, 3, 4]) + b = np.array([2, 1, 4, 6]) + c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True) + ee = np.array([1, 2, 4]) + assert_array_equal(c, ee) + assert_array_equal(a[i1], ee) + assert_array_equal(b[i2], ee) + + # non-unique inputs + a = np.array([1, 2, 2, 3, 4, 3, 2]) + b = np.array([1, 8, 4, 2, 2, 3, 2, 3]) + c, i1, i2 = intersect1d(a, b, return_indices=True) + ef = np.array([1, 2, 3, 4]) + assert_array_equal(c, ef) + assert_array_equal(a[i1], ef) + assert_array_equal(b[i2], ef) + + # non1d, unique inputs + a = np.array([[2, 4, 5, 6], [7, 8, 1, 15]]) + b = np.array([[3, 2, 7, 6], [10, 12, 8, 9]]) + c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True) + ui1 = np.unravel_index(i1, a.shape) + ui2 = np.unravel_index(i2, b.shape) + ea = np.array([2, 6, 7, 8]) + assert_array_equal(ea, a[ui1]) + assert_array_equal(ea, b[ui2]) + + # non1d, not assumed to be uniqueinputs + a = np.array([[2, 4, 5, 6, 6], [4, 7, 8, 7, 2]]) + b = np.array([[3, 2, 7, 7], [10, 12, 8, 7]]) + c, i1, i2 = intersect1d(a, b, return_indices=True) + ui1 = np.unravel_index(i1, a.shape) + ui2 = np.unravel_index(i2, b.shape) + ea = np.array([2, 7, 8]) + assert_array_equal(ea, a[ui1]) + assert_array_equal(ea, b[ui2]) + + def test_setxor1d(self): + a = np.array([5, 7, 1, 2]) + b = np.array([2, 4, 3, 1, 5]) + + ec = np.array([3, 4, 7]) + c = setxor1d(a, b) + assert_array_equal(c, ec) + + a = np.array([1, 2, 3]) + b = np.array([6, 5, 4]) + + ec = np.array([1, 2, 3, 4, 5, 6]) + c = setxor1d(a, b) + assert_array_equal(c, ec) + + a = np.array([1, 8, 2, 3]) + b = np.array([6, 5, 4, 8]) + + ec = np.array([1, 2, 3, 4, 5, 6]) + c = setxor1d(a, b) + assert_array_equal(c, ec) + + assert_array_equal([], setxor1d([], [])) + + def test_ediff1d(self): + zero_elem = np.array([]) + one_elem = np.array([1]) + two_elem = np.array([1, 2]) + + assert_array_equal([], ediff1d(zero_elem)) + assert_array_equal([0], ediff1d(zero_elem, to_begin=0)) + assert_array_equal([0], ediff1d(zero_elem, to_end=0)) + assert_array_equal([-1, 0], ediff1d(zero_elem, to_begin=-1, to_end=0)) + assert_array_equal([], ediff1d(one_elem)) + assert_array_equal([1], ediff1d(two_elem)) + assert_array_equal([7, 1, 9], ediff1d(two_elem, to_begin=7, to_end=9)) + assert_array_equal([5, 6, 1, 7, 8], + ediff1d(two_elem, to_begin=[5, 6], to_end=[7, 8])) + assert_array_equal([1, 9], ediff1d(two_elem, to_end=9)) + assert_array_equal([1, 7, 8], ediff1d(two_elem, to_end=[7, 8])) + assert_array_equal([7, 1], ediff1d(two_elem, to_begin=7)) + assert_array_equal([5, 6, 1], ediff1d(two_elem, to_begin=[5, 6])) + + @pytest.mark.parametrize("ary, prepend, append, expected", [ + # should fail because trying to cast + # np.nan standard floating point value + # into an integer array: + (np.array([1, 2, 3], dtype=np.int64), + None, + np.nan, + 'to_end'), + # should fail because attempting + # to downcast to int type: + (np.array([1, 2, 3], dtype=np.int64), + np.array([5, 7, 2], dtype=np.float32), + None, + 'to_begin'), + # should fail because attempting to cast + # two special floating point values + # to integers (on both sides of ary), + # `to_begin` is in the error message as the impl checks this first: + (np.array([1., 3., 9.], dtype=np.int8), + np.nan, + np.nan, + 'to_begin'), + ]) + def test_ediff1d_forbidden_type_casts(self, ary, prepend, append, expected): + # verify resolution of gh-11490 + + # specifically, raise an appropriate + # Exception when attempting to append or + # prepend with an incompatible type + msg = 'dtype of `{}` must be compatible'.format(expected) + with assert_raises_regex(TypeError, msg): + ediff1d(ary=ary, + to_end=append, + to_begin=prepend) + + @pytest.mark.parametrize( + "ary,prepend,append,expected", + [ + (np.array([1, 2, 3], dtype=np.int16), + 2**16, # will be cast to int16 under same kind rule. + 2**16 + 4, + np.array([0, 1, 1, 4], dtype=np.int16)), + (np.array([1, 2, 3], dtype=np.float32), + np.array([5], dtype=np.float64), + None, + np.array([5, 1, 1], dtype=np.float32)), + (np.array([1, 2, 3], dtype=np.int32), + 0, + 0, + np.array([0, 1, 1, 0], dtype=np.int32)), + (np.array([1, 2, 3], dtype=np.int64), + 3, + -9, + np.array([3, 1, 1, -9], dtype=np.int64)), + ] + ) + def test_ediff1d_scalar_handling(self, + ary, + prepend, + append, + expected): + # maintain backwards-compatibility + # of scalar prepend / append behavior + # in ediff1d following fix for gh-11490 + actual = np.ediff1d(ary=ary, + to_end=append, + to_begin=prepend) + assert_equal(actual, expected) + assert actual.dtype == expected.dtype + + def test_isin(self): + # the tests for in1d cover most of isin's behavior + # if in1d is removed, would need to change those tests to test + # isin instead. + def _isin_slow(a, b): + b = np.asarray(b).flatten().tolist() + return a in b + isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1}) + + def assert_isin_equal(a, b): + x = isin(a, b) + y = isin_slow(a, b) + assert_array_equal(x, y) + + # multidimensional arrays in both arguments + a = np.arange(24).reshape([2, 3, 4]) + b = np.array([[10, 20, 30], [0, 1, 3], [11, 22, 33]]) + assert_isin_equal(a, b) + + # array-likes as both arguments + c = [(9, 8), (7, 6)] + d = (9, 7) + assert_isin_equal(c, d) + + # zero-d array: + f = np.array(3) + assert_isin_equal(f, b) + assert_isin_equal(a, f) + assert_isin_equal(f, f) + + # scalar: + assert_isin_equal(5, b) + assert_isin_equal(a, 6) + assert_isin_equal(5, 6) + + # empty array-like: + x = [] + assert_isin_equal(x, b) + assert_isin_equal(a, x) + assert_isin_equal(x, x) + + def test_in1d(self): + # we use two different sizes for the b array here to test the + # two different paths in in1d(). + for mult in (1, 10): + # One check without np.array to make sure lists are handled correct + a = [5, 7, 1, 2] + b = [2, 4, 3, 1, 5] * mult + ec = np.array([True, False, True, True]) + c = in1d(a, b, assume_unique=True) + assert_array_equal(c, ec) + + a[0] = 8 + ec = np.array([False, False, True, True]) + c = in1d(a, b, assume_unique=True) + assert_array_equal(c, ec) + + a[0], a[3] = 4, 8 + ec = np.array([True, False, True, False]) + c = in1d(a, b, assume_unique=True) + assert_array_equal(c, ec) + + a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5]) + b = [2, 3, 4] * mult + ec = [False, True, False, True, True, True, True, True, True, + False, True, False, False, False] + c = in1d(a, b) + assert_array_equal(c, ec) + + b = b + [5, 5, 4] * mult + ec = [True, True, True, True, True, True, True, True, True, True, + True, False, True, True] + c = in1d(a, b) + assert_array_equal(c, ec) + + a = np.array([5, 7, 1, 2]) + b = np.array([2, 4, 3, 1, 5] * mult) + ec = np.array([True, False, True, True]) + c = in1d(a, b) + assert_array_equal(c, ec) + + a = np.array([5, 7, 1, 1, 2]) + b = np.array([2, 4, 3, 3, 1, 5] * mult) + ec = np.array([True, False, True, True, True]) + c = in1d(a, b) + assert_array_equal(c, ec) + + a = np.array([5, 5]) + b = np.array([2, 2] * mult) + ec = np.array([False, False]) + c = in1d(a, b) + assert_array_equal(c, ec) + + a = np.array([5]) + b = np.array([2]) + ec = np.array([False]) + c = in1d(a, b) + assert_array_equal(c, ec) + + assert_array_equal(in1d([], []), []) + + def test_in1d_char_array(self): + a = np.array(['a', 'b', 'c', 'd', 'e', 'c', 'e', 'b']) + b = np.array(['a', 'c']) + + ec = np.array([True, False, True, False, False, True, False, False]) + c = in1d(a, b) + + assert_array_equal(c, ec) + + def test_in1d_invert(self): + "Test in1d's invert parameter" + # We use two different sizes for the b array here to test the + # two different paths in in1d(). + for mult in (1, 10): + a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5]) + b = [2, 3, 4] * mult + assert_array_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True)) + + def test_in1d_ravel(self): + # Test that in1d ravels its input arrays. This is not documented + # behavior however. The test is to ensure consistentency. + a = np.arange(6).reshape(2, 3) + b = np.arange(3, 9).reshape(3, 2) + long_b = np.arange(3, 63).reshape(30, 2) + ec = np.array([False, False, False, True, True, True]) + + assert_array_equal(in1d(a, b, assume_unique=True), ec) + assert_array_equal(in1d(a, b, assume_unique=False), ec) + assert_array_equal(in1d(a, long_b, assume_unique=True), ec) + assert_array_equal(in1d(a, long_b, assume_unique=False), ec) + + def test_in1d_first_array_is_object(self): + ar1 = [None] + ar2 = np.array([1]*10) + expected = np.array([False]) + result = np.in1d(ar1, ar2) + assert_array_equal(result, expected) + + def test_in1d_second_array_is_object(self): + ar1 = 1 + ar2 = np.array([None]*10) + expected = np.array([False]) + result = np.in1d(ar1, ar2) + assert_array_equal(result, expected) + + def test_in1d_both_arrays_are_object(self): + ar1 = [None] + ar2 = np.array([None]*10) + expected = np.array([True]) + result = np.in1d(ar1, ar2) + assert_array_equal(result, expected) + + def test_in1d_both_arrays_have_structured_dtype(self): + # Test arrays of a structured data type containing an integer field + # and a field of dtype `object` allowing for arbitrary Python objects + dt = np.dtype([('field1', int), ('field2', object)]) + ar1 = np.array([(1, None)], dtype=dt) + ar2 = np.array([(1, None)]*10, dtype=dt) + expected = np.array([True]) + result = np.in1d(ar1, ar2) + assert_array_equal(result, expected) + + def test_in1d_with_arrays_containing_tuples(self): + ar1 = np.array([(1,), 2], dtype=object) + ar2 = np.array([(1,), 2], dtype=object) + expected = np.array([True, True]) + result = np.in1d(ar1, ar2) + assert_array_equal(result, expected) + result = np.in1d(ar1, ar2, invert=True) + assert_array_equal(result, np.invert(expected)) + + # An integer is added at the end of the array to make sure + # that the array builder will create the array with tuples + # and after it's created the integer is removed. + # There's a bug in the array constructor that doesn't handle + # tuples properly and adding the integer fixes that. + ar1 = np.array([(1,), (2, 1), 1], dtype=object) + ar1 = ar1[:-1] + ar2 = np.array([(1,), (2, 1), 1], dtype=object) + ar2 = ar2[:-1] + expected = np.array([True, True]) + result = np.in1d(ar1, ar2) + assert_array_equal(result, expected) + result = np.in1d(ar1, ar2, invert=True) + assert_array_equal(result, np.invert(expected)) + + ar1 = np.array([(1,), (2, 3), 1], dtype=object) + ar1 = ar1[:-1] + ar2 = np.array([(1,), 2], dtype=object) + expected = np.array([True, False]) + result = np.in1d(ar1, ar2) + assert_array_equal(result, expected) + result = np.in1d(ar1, ar2, invert=True) + assert_array_equal(result, np.invert(expected)) + + def test_union1d(self): + a = np.array([5, 4, 7, 1, 2]) + b = np.array([2, 4, 3, 3, 2, 1, 5]) + + ec = np.array([1, 2, 3, 4, 5, 7]) + c = union1d(a, b) + assert_array_equal(c, ec) + + # Tests gh-10340, arguments to union1d should be + # flattened if they are not already 1D + x = np.array([[0, 1, 2], [3, 4, 5]]) + y = np.array([0, 1, 2, 3, 4]) + ez = np.array([0, 1, 2, 3, 4, 5]) + z = union1d(x, y) + assert_array_equal(z, ez) + + assert_array_equal([], union1d([], [])) + + def test_setdiff1d(self): + a = np.array([6, 5, 4, 7, 1, 2, 7, 4]) + b = np.array([2, 4, 3, 3, 2, 1, 5]) + + ec = np.array([6, 7]) + c = setdiff1d(a, b) + assert_array_equal(c, ec) + + a = np.arange(21) + b = np.arange(19) + ec = np.array([19, 20]) + c = setdiff1d(a, b) + assert_array_equal(c, ec) + + assert_array_equal([], setdiff1d([], [])) + a = np.array((), np.uint32) + assert_equal(setdiff1d(a, []).dtype, np.uint32) + + def test_setdiff1d_unique(self): + a = np.array([3, 2, 1]) + b = np.array([7, 5, 2]) + expected = np.array([3, 1]) + actual = setdiff1d(a, b, assume_unique=True) + assert_equal(actual, expected) + + def test_setdiff1d_char_array(self): + a = np.array(['a', 'b', 'c']) + b = np.array(['a', 'b', 's']) + assert_array_equal(setdiff1d(a, b), np.array(['c'])) + + def test_manyways(self): + a = np.array([5, 7, 1, 2, 8]) + b = np.array([9, 8, 2, 4, 3, 1, 5]) + + c1 = setxor1d(a, b) + aux1 = intersect1d(a, b) + aux2 = union1d(a, b) + c2 = setdiff1d(aux2, aux1) + assert_array_equal(c1, c2) + + +class TestUnique: + + def test_unique_1d(self): + + def check_all(a, b, i1, i2, c, dt): + base_msg = 'check {0} failed for type {1}' + + msg = base_msg.format('values', dt) + v = unique(a) + assert_array_equal(v, b, msg) + + msg = base_msg.format('return_index', dt) + v, j = unique(a, True, False, False) + assert_array_equal(v, b, msg) + assert_array_equal(j, i1, msg) + + msg = base_msg.format('return_inverse', dt) + v, j = unique(a, False, True, False) + assert_array_equal(v, b, msg) + assert_array_equal(j, i2, msg) + + msg = base_msg.format('return_counts', dt) + v, j = unique(a, False, False, True) + assert_array_equal(v, b, msg) + assert_array_equal(j, c, msg) + + msg = base_msg.format('return_index and return_inverse', dt) + v, j1, j2 = unique(a, True, True, False) + assert_array_equal(v, b, msg) + assert_array_equal(j1, i1, msg) + assert_array_equal(j2, i2, msg) + + msg = base_msg.format('return_index and return_counts', dt) + v, j1, j2 = unique(a, True, False, True) + assert_array_equal(v, b, msg) + assert_array_equal(j1, i1, msg) + assert_array_equal(j2, c, msg) + + msg = base_msg.format('return_inverse and return_counts', dt) + v, j1, j2 = unique(a, False, True, True) + assert_array_equal(v, b, msg) + assert_array_equal(j1, i2, msg) + assert_array_equal(j2, c, msg) + + msg = base_msg.format(('return_index, return_inverse ' + 'and return_counts'), dt) + v, j1, j2, j3 = unique(a, True, True, True) + assert_array_equal(v, b, msg) + assert_array_equal(j1, i1, msg) + assert_array_equal(j2, i2, msg) + assert_array_equal(j3, c, msg) + + a = [5, 7, 1, 2, 1, 5, 7]*10 + b = [1, 2, 5, 7] + i1 = [2, 3, 0, 1] + i2 = [2, 3, 0, 1, 0, 2, 3]*10 + c = np.multiply([2, 1, 2, 2], 10) + + # test for numeric arrays + types = [] + types.extend(np.typecodes['AllInteger']) + types.extend(np.typecodes['AllFloat']) + types.append('datetime64[D]') + types.append('timedelta64[D]') + for dt in types: + aa = np.array(a, dt) + bb = np.array(b, dt) + check_all(aa, bb, i1, i2, c, dt) + + # test for object arrays + dt = 'O' + aa = np.empty(len(a), dt) + aa[:] = a + bb = np.empty(len(b), dt) + bb[:] = b + check_all(aa, bb, i1, i2, c, dt) + + # test for structured arrays + dt = [('', 'i'), ('', 'i')] + aa = np.array(list(zip(a, a)), dt) + bb = np.array(list(zip(b, b)), dt) + check_all(aa, bb, i1, i2, c, dt) + + # test for ticket #2799 + aa = [1. + 0.j, 1 - 1.j, 1] + assert_array_equal(np.unique(aa), [1. - 1.j, 1. + 0.j]) + + # test for ticket #4785 + a = [(1, 2), (1, 2), (2, 3)] + unq = [1, 2, 3] + inv = [0, 1, 0, 1, 1, 2] + a1 = unique(a) + assert_array_equal(a1, unq) + a2, a2_inv = unique(a, return_inverse=True) + assert_array_equal(a2, unq) + assert_array_equal(a2_inv, inv) + + # test for chararrays with return_inverse (gh-5099) + a = np.chararray(5) + a[...] = '' + a2, a2_inv = np.unique(a, return_inverse=True) + assert_array_equal(a2_inv, np.zeros(5)) + + # test for ticket #9137 + a = [] + a1_idx = np.unique(a, return_index=True)[1] + a2_inv = np.unique(a, return_inverse=True)[1] + a3_idx, a3_inv = np.unique(a, return_index=True, + return_inverse=True)[1:] + assert_equal(a1_idx.dtype, np.intp) + assert_equal(a2_inv.dtype, np.intp) + assert_equal(a3_idx.dtype, np.intp) + assert_equal(a3_inv.dtype, np.intp) + + # test for ticket 2111 - float + a = [2.0, np.nan, 1.0, np.nan] + ua = [1.0, 2.0, np.nan] + ua_idx = [2, 0, 1] + ua_inv = [1, 2, 0, 2] + ua_cnt = [1, 1, 2] + assert_equal(np.unique(a), ua) + assert_equal(np.unique(a, return_index=True), (ua, ua_idx)) + assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv)) + assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt)) + + # test for ticket 2111 - complex + a = [2.0-1j, np.nan, 1.0+1j, complex(0.0, np.nan), complex(1.0, np.nan)] + ua = [1.0+1j, 2.0-1j, complex(0.0, np.nan)] + ua_idx = [2, 0, 3] + ua_inv = [1, 2, 0, 2, 2] + ua_cnt = [1, 1, 3] + assert_equal(np.unique(a), ua) + assert_equal(np.unique(a, return_index=True), (ua, ua_idx)) + assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv)) + assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt)) + + # test for ticket 2111 - datetime64 + nat = np.datetime64('nat') + a = [np.datetime64('2020-12-26'), nat, np.datetime64('2020-12-24'), nat] + ua = [np.datetime64('2020-12-24'), np.datetime64('2020-12-26'), nat] + ua_idx = [2, 0, 1] + ua_inv = [1, 2, 0, 2] + ua_cnt = [1, 1, 2] + assert_equal(np.unique(a), ua) + assert_equal(np.unique(a, return_index=True), (ua, ua_idx)) + assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv)) + assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt)) + + # test for ticket 2111 - timedelta + nat = np.timedelta64('nat') + a = [np.timedelta64(1, 'D'), nat, np.timedelta64(1, 'h'), nat] + ua = [np.timedelta64(1, 'h'), np.timedelta64(1, 'D'), nat] + ua_idx = [2, 0, 1] + ua_inv = [1, 2, 0, 2] + ua_cnt = [1, 1, 2] + assert_equal(np.unique(a), ua) + assert_equal(np.unique(a, return_index=True), (ua, ua_idx)) + assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv)) + assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt)) + + # test for gh-19300 + all_nans = [np.nan] * 4 + ua = [np.nan] + ua_idx = [0] + ua_inv = [0, 0, 0, 0] + ua_cnt = [4] + assert_equal(np.unique(all_nans), ua) + assert_equal(np.unique(all_nans, return_index=True), (ua, ua_idx)) + assert_equal(np.unique(all_nans, return_inverse=True), (ua, ua_inv)) + assert_equal(np.unique(all_nans, return_counts=True), (ua, ua_cnt)) + + def test_unique_axis_errors(self): + assert_raises(TypeError, self._run_axis_tests, object) + assert_raises(TypeError, self._run_axis_tests, + [('a', int), ('b', object)]) + + assert_raises(np.AxisError, unique, np.arange(10), axis=2) + assert_raises(np.AxisError, unique, np.arange(10), axis=-2) + + def test_unique_axis_list(self): + msg = "Unique failed on list of lists" + inp = [[0, 1, 0], [0, 1, 0]] + inp_arr = np.asarray(inp) + assert_array_equal(unique(inp, axis=0), unique(inp_arr, axis=0), msg) + assert_array_equal(unique(inp, axis=1), unique(inp_arr, axis=1), msg) + + def test_unique_axis(self): + types = [] + types.extend(np.typecodes['AllInteger']) + types.extend(np.typecodes['AllFloat']) + types.append('datetime64[D]') + types.append('timedelta64[D]') + types.append([('a', int), ('b', int)]) + types.append([('a', int), ('b', float)]) + + for dtype in types: + self._run_axis_tests(dtype) + + msg = 'Non-bitwise-equal booleans test failed' + data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool) + result = np.array([[False, True], [True, True]], dtype=bool) + assert_array_equal(unique(data, axis=0), result, msg) + + msg = 'Negative zero equality test failed' + data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]]) + result = np.array([[-0.0, 0.0]]) + assert_array_equal(unique(data, axis=0), result, msg) + + @pytest.mark.parametrize("axis", [0, -1]) + def test_unique_1d_with_axis(self, axis): + x = np.array([4, 3, 2, 3, 2, 1, 2, 2]) + uniq = unique(x, axis=axis) + assert_array_equal(uniq, [1, 2, 3, 4]) + + def test_unique_axis_zeros(self): + # issue 15559 + single_zero = np.empty(shape=(2, 0), dtype=np.int8) + uniq, idx, inv, cnt = unique(single_zero, axis=0, return_index=True, + return_inverse=True, return_counts=True) + + # there's 1 element of shape (0,) along axis 0 + assert_equal(uniq.dtype, single_zero.dtype) + assert_array_equal(uniq, np.empty(shape=(1, 0))) + assert_array_equal(idx, np.array([0])) + assert_array_equal(inv, np.array([0, 0])) + assert_array_equal(cnt, np.array([2])) + + # there's 0 elements of shape (2,) along axis 1 + uniq, idx, inv, cnt = unique(single_zero, axis=1, return_index=True, + return_inverse=True, return_counts=True) + + assert_equal(uniq.dtype, single_zero.dtype) + assert_array_equal(uniq, np.empty(shape=(2, 0))) + assert_array_equal(idx, np.array([])) + assert_array_equal(inv, np.array([])) + assert_array_equal(cnt, np.array([])) + + # test a "complicated" shape + shape = (0, 2, 0, 3, 0, 4, 0) + multiple_zeros = np.empty(shape=shape) + for axis in range(len(shape)): + expected_shape = list(shape) + if shape[axis] == 0: + expected_shape[axis] = 0 + else: + expected_shape[axis] = 1 + + assert_array_equal(unique(multiple_zeros, axis=axis), + np.empty(shape=expected_shape)) + + def test_unique_masked(self): + # issue 8664 + x = np.array([64, 0, 1, 2, 3, 63, 63, 0, 0, 0, 1, 2, 0, 63, 0], + dtype='uint8') + y = np.ma.masked_equal(x, 0) + + v = np.unique(y) + v2, i, c = np.unique(y, return_index=True, return_counts=True) + + msg = 'Unique returned different results when asked for index' + assert_array_equal(v.data, v2.data, msg) + assert_array_equal(v.mask, v2.mask, msg) + + def test_unique_sort_order_with_axis(self): + # These tests fail if sorting along axis is done by treating subarrays + # as unsigned byte strings. See gh-10495. + fmt = "sort order incorrect for integer type '%s'" + for dt in 'bhilq': + a = np.array([[-1], [0]], dt) + b = np.unique(a, axis=0) + assert_array_equal(a, b, fmt % dt) + + def _run_axis_tests(self, dtype): + data = np.array([[0, 1, 0, 0], + [1, 0, 0, 0], + [0, 1, 0, 0], + [1, 0, 0, 0]]).astype(dtype) + + msg = 'Unique with 1d array and axis=0 failed' + result = np.array([0, 1]) + assert_array_equal(unique(data), result.astype(dtype), msg) + + msg = 'Unique with 2d array and axis=0 failed' + result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]]) + assert_array_equal(unique(data, axis=0), result.astype(dtype), msg) + + msg = 'Unique with 2d array and axis=1 failed' + result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]]) + assert_array_equal(unique(data, axis=1), result.astype(dtype), msg) + + msg = 'Unique with 3d array and axis=2 failed' + data3d = np.array([[[1, 1], + [1, 0]], + [[0, 1], + [0, 0]]]).astype(dtype) + result = np.take(data3d, [1, 0], axis=2) + assert_array_equal(unique(data3d, axis=2), result, msg) + + uniq, idx, inv, cnt = unique(data, axis=0, return_index=True, + return_inverse=True, return_counts=True) + msg = "Unique's return_index=True failed with axis=0" + assert_array_equal(data[idx], uniq, msg) + msg = "Unique's return_inverse=True failed with axis=0" + assert_array_equal(uniq[inv], data) + msg = "Unique's return_counts=True failed with axis=0" + assert_array_equal(cnt, np.array([2, 2]), msg) + + uniq, idx, inv, cnt = unique(data, axis=1, return_index=True, + return_inverse=True, return_counts=True) + msg = "Unique's return_index=True failed with axis=1" + assert_array_equal(data[:, idx], uniq) + msg = "Unique's return_inverse=True failed with axis=1" + assert_array_equal(uniq[:, inv], data) + msg = "Unique's return_counts=True failed with axis=1" + assert_array_equal(cnt, np.array([2, 1, 1]), msg) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_arrayterator.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_arrayterator.py new file mode 100644 index 0000000..c00ed13 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_arrayterator.py @@ -0,0 +1,46 @@ +from operator import mul +from functools import reduce + +import numpy as np +from numpy.random import randint +from numpy.lib import Arrayterator +from numpy.testing import assert_ + + +def test(): + np.random.seed(np.arange(10)) + + # Create a random array + ndims = randint(5)+1 + shape = tuple(randint(10)+1 for dim in range(ndims)) + els = reduce(mul, shape) + a = np.arange(els) + a.shape = shape + + buf_size = randint(2*els) + b = Arrayterator(a, buf_size) + + # Check that each block has at most ``buf_size`` elements + for block in b: + assert_(len(block.flat) <= (buf_size or els)) + + # Check that all elements are iterated correctly + assert_(list(b.flat) == list(a.flat)) + + # Slice arrayterator + start = [randint(dim) for dim in shape] + stop = [randint(dim)+1 for dim in shape] + step = [randint(dim)+1 for dim in shape] + slice_ = tuple(slice(*t) for t in zip(start, stop, step)) + c = b[slice_] + d = a[slice_] + + # Check that each block has at most ``buf_size`` elements + for block in c: + assert_(len(block.flat) <= (buf_size or els)) + + # Check that the arrayterator is sliced correctly + assert_(np.all(c.__array__() == d)) + + # Check that all elements are iterated correctly + assert_(list(c.flat) == list(d.flat)) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_financial_expired.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_financial_expired.py new file mode 100644 index 0000000..70b0cd7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_financial_expired.py @@ -0,0 +1,13 @@ +import sys +import pytest +import numpy as np + + +@pytest.mark.skipif(sys.version_info[:2] < (3, 7), + reason="requires python 3.7 or higher") +def test_financial_expired(): + match = 'NEP 32' + with pytest.warns(DeprecationWarning, match=match): + func = np.fv + with pytest.raises(RuntimeError, match=match): + func(1, 2, 3) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_format.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_format.py new file mode 100644 index 0000000..78e67a8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_format.py @@ -0,0 +1,960 @@ +# doctest +r''' Test the .npy file format. + +Set up: + + >>> import sys + >>> from io import BytesIO + >>> from numpy.lib import format + >>> + >>> scalars = [ + ... np.uint8, + ... np.int8, + ... np.uint16, + ... np.int16, + ... np.uint32, + ... np.int32, + ... np.uint64, + ... np.int64, + ... np.float32, + ... np.float64, + ... np.complex64, + ... np.complex128, + ... object, + ... ] + >>> + >>> basic_arrays = [] + >>> + >>> for scalar in scalars: + ... for endian in '<>': + ... dtype = np.dtype(scalar).newbyteorder(endian) + ... basic = np.arange(15).astype(dtype) + ... basic_arrays.extend([ + ... np.array([], dtype=dtype), + ... np.array(10, dtype=dtype), + ... basic, + ... basic.reshape((3,5)), + ... basic.reshape((3,5)).T, + ... basic.reshape((3,5))[::-1,::2], + ... ]) + ... + >>> + >>> Pdescr = [ + ... ('x', 'i4', (2,)), + ... ('y', 'f8', (2, 2)), + ... ('z', 'u1')] + >>> + >>> + >>> PbufferT = [ + ... ([3,2], [[6.,4.],[6.,4.]], 8), + ... ([4,3], [[7.,5.],[7.,5.]], 9), + ... ] + >>> + >>> + >>> Ndescr = [ + ... ('x', 'i4', (2,)), + ... ('Info', [ + ... ('value', 'c16'), + ... ('y2', 'f8'), + ... ('Info2', [ + ... ('name', 'S2'), + ... ('value', 'c16', (2,)), + ... ('y3', 'f8', (2,)), + ... ('z3', 'u4', (2,))]), + ... ('name', 'S2'), + ... ('z2', 'b1')]), + ... ('color', 'S2'), + ... ('info', [ + ... ('Name', 'U8'), + ... ('Value', 'c16')]), + ... ('y', 'f8', (2, 2)), + ... ('z', 'u1')] + >>> + >>> + >>> NbufferT = [ + ... ([3,2], (6j, 6., ('nn', [6j,4j], [6.,4.], [1,2]), 'NN', True), 'cc', ('NN', 6j), [[6.,4.],[6.,4.]], 8), + ... ([4,3], (7j, 7., ('oo', [7j,5j], [7.,5.], [2,1]), 'OO', False), 'dd', ('OO', 7j), [[7.,5.],[7.,5.]], 9), + ... ] + >>> + >>> + >>> record_arrays = [ + ... np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('<')), + ... np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('<')), + ... np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('>')), + ... np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('>')), + ... ] + +Test the magic string writing. + + >>> format.magic(1, 0) + '\x93NUMPY\x01\x00' + >>> format.magic(0, 0) + '\x93NUMPY\x00\x00' + >>> format.magic(255, 255) + '\x93NUMPY\xff\xff' + >>> format.magic(2, 5) + '\x93NUMPY\x02\x05' + +Test the magic string reading. + + >>> format.read_magic(BytesIO(format.magic(1, 0))) + (1, 0) + >>> format.read_magic(BytesIO(format.magic(0, 0))) + (0, 0) + >>> format.read_magic(BytesIO(format.magic(255, 255))) + (255, 255) + >>> format.read_magic(BytesIO(format.magic(2, 5))) + (2, 5) + +Test the header writing. + + >>> for arr in basic_arrays + record_arrays: + ... f = BytesIO() + ... format.write_array_header_1_0(f, arr) # XXX: arr is not a dict, items gets called on it + ... print(repr(f.getvalue())) + ... + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '|u1', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '|u1', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '|i1', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '|i1', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': 'u2', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>u2', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': 'i2', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>i2', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': 'u4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>u4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': 'i4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>i4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': 'u8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>u8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': 'i8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>i8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': 'f4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>f4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': 'f8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>f8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': 'c8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>c8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': 'c16', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>c16', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': 'O', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': 'O', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 3)} \n" + "v\x00{'descr': [('x', 'i4', (2,)), ('y', '>f8', (2, 2)), ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n" + "\x16\x02{'descr': [('x', '>i4', (2,)),\n ('Info',\n [('value', '>c16'),\n ('y2', '>f8'),\n ('Info2',\n [('name', '|S2'),\n ('value', '>c16', (2,)),\n ('y3', '>f8', (2,)),\n ('z3', '>u4', (2,))]),\n ('name', '|S2'),\n ('z2', '|b1')]),\n ('color', '|S2'),\n ('info', [('Name', '>U8'), ('Value', '>c16')]),\n ('y', '>f8', (2, 2)),\n ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n" +''' +import sys +import os +import warnings +import pytest +from io import BytesIO + +import numpy as np +from numpy.testing import ( + assert_, assert_array_equal, assert_raises, assert_raises_regex, + assert_warns, + ) +from numpy.lib import format + + +# Generate some basic arrays to test with. +scalars = [ + np.uint8, + np.int8, + np.uint16, + np.int16, + np.uint32, + np.int32, + np.uint64, + np.int64, + np.float32, + np.float64, + np.complex64, + np.complex128, + object, +] +basic_arrays = [] +for scalar in scalars: + for endian in '<>': + dtype = np.dtype(scalar).newbyteorder(endian) + basic = np.arange(1500).astype(dtype) + basic_arrays.extend([ + # Empty + np.array([], dtype=dtype), + # Rank-0 + np.array(10, dtype=dtype), + # 1-D + basic, + # 2-D C-contiguous + basic.reshape((30, 50)), + # 2-D F-contiguous + basic.reshape((30, 50)).T, + # 2-D non-contiguous + basic.reshape((30, 50))[::-1, ::2], + ]) + +# More complicated record arrays. +# This is the structure of the table used for plain objects: +# +# +-+-+-+ +# |x|y|z| +# +-+-+-+ + +# Structure of a plain array description: +Pdescr = [ + ('x', 'i4', (2,)), + ('y', 'f8', (2, 2)), + ('z', 'u1')] + +# A plain list of tuples with values for testing: +PbufferT = [ + # x y z + ([3, 2], [[6., 4.], [6., 4.]], 8), + ([4, 3], [[7., 5.], [7., 5.]], 9), + ] + + +# This is the structure of the table used for nested objects (DON'T PANIC!): +# +# +-+---------------------------------+-----+----------+-+-+ +# |x|Info |color|info |y|z| +# | +-----+--+----------------+----+--+ +----+-----+ | | +# | |value|y2|Info2 |name|z2| |Name|Value| | | +# | | | +----+-----+--+--+ | | | | | | | +# | | | |name|value|y3|z3| | | | | | | | +# +-+-----+--+----+-----+--+--+----+--+-----+----+-----+-+-+ +# + +# The corresponding nested array description: +Ndescr = [ + ('x', 'i4', (2,)), + ('Info', [ + ('value', 'c16'), + ('y2', 'f8'), + ('Info2', [ + ('name', 'S2'), + ('value', 'c16', (2,)), + ('y3', 'f8', (2,)), + ('z3', 'u4', (2,))]), + ('name', 'S2'), + ('z2', 'b1')]), + ('color', 'S2'), + ('info', [ + ('Name', 'U8'), + ('Value', 'c16')]), + ('y', 'f8', (2, 2)), + ('z', 'u1')] + +NbufferT = [ + # x Info color info y z + # value y2 Info2 name z2 Name Value + # name value y3 z3 + ([3, 2], (6j, 6., ('nn', [6j, 4j], [6., 4.], [1, 2]), 'NN', True), + 'cc', ('NN', 6j), [[6., 4.], [6., 4.]], 8), + ([4, 3], (7j, 7., ('oo', [7j, 5j], [7., 5.], [2, 1]), 'OO', False), + 'dd', ('OO', 7j), [[7., 5.], [7., 5.]], 9), + ] + +record_arrays = [ + np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('<')), + np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('<')), + np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('>')), + np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('>')), + np.zeros(1, dtype=[('c', (' 1, a) + assert_array_equal(b, [3, 2, 2, 3, 3]) + + def test_place(self): + # Make sure that non-np.ndarray objects + # raise an error instead of doing nothing + assert_raises(TypeError, place, [1, 2, 3], [True, False], [0, 1]) + + a = np.array([1, 4, 3, 2, 5, 8, 7]) + place(a, [0, 1, 0, 1, 0, 1, 0], [2, 4, 6]) + assert_array_equal(a, [1, 2, 3, 4, 5, 6, 7]) + + place(a, np.zeros(7), []) + assert_array_equal(a, np.arange(1, 8)) + + place(a, [1, 0, 1, 0, 1, 0, 1], [8, 9]) + assert_array_equal(a, [8, 2, 9, 4, 8, 6, 9]) + assert_raises_regex(ValueError, "Cannot insert from an empty array", + lambda: place(a, [0, 0, 0, 0, 0, 1, 0], [])) + + # See Issue #6974 + a = np.array(['12', '34']) + place(a, [0, 1], '9') + assert_array_equal(a, ['12', '9']) + + def test_both(self): + a = rand(10) + mask = a > 0.5 + ac = a.copy() + c = extract(mask, a) + place(a, mask, 0) + place(a, mask, c) + assert_array_equal(a, ac) + + +# _foo1 and _foo2 are used in some tests in TestVectorize. + +def _foo1(x, y=1.0): + return y*math.floor(x) + + +def _foo2(x, y=1.0, z=0.0): + return y*math.floor(x) + z + + +class TestVectorize: + + def test_simple(self): + def addsubtract(a, b): + if a > b: + return a - b + else: + return a + b + + f = vectorize(addsubtract) + r = f([0, 3, 6, 9], [1, 3, 5, 7]) + assert_array_equal(r, [1, 6, 1, 2]) + + def test_scalar(self): + def addsubtract(a, b): + if a > b: + return a - b + else: + return a + b + + f = vectorize(addsubtract) + r = f([0, 3, 6, 9], 5) + assert_array_equal(r, [5, 8, 1, 4]) + + def test_large(self): + x = np.linspace(-3, 2, 10000) + f = vectorize(lambda x: x) + y = f(x) + assert_array_equal(y, x) + + def test_ufunc(self): + f = vectorize(math.cos) + args = np.array([0, 0.5 * np.pi, np.pi, 1.5 * np.pi, 2 * np.pi]) + r1 = f(args) + r2 = np.cos(args) + assert_array_almost_equal(r1, r2) + + def test_keywords(self): + + def foo(a, b=1): + return a + b + + f = vectorize(foo) + args = np.array([1, 2, 3]) + r1 = f(args) + r2 = np.array([2, 3, 4]) + assert_array_equal(r1, r2) + r1 = f(args, 2) + r2 = np.array([3, 4, 5]) + assert_array_equal(r1, r2) + + def test_keywords_with_otypes_order1(self): + # gh-1620: The second call of f would crash with + # `ValueError: invalid number of arguments`. + f = vectorize(_foo1, otypes=[float]) + # We're testing the caching of ufuncs by vectorize, so the order + # of these function calls is an important part of the test. + r1 = f(np.arange(3.0), 1.0) + r2 = f(np.arange(3.0)) + assert_array_equal(r1, r2) + + def test_keywords_with_otypes_order2(self): + # gh-1620: The second call of f would crash with + # `ValueError: non-broadcastable output operand with shape () + # doesn't match the broadcast shape (3,)`. + f = vectorize(_foo1, otypes=[float]) + # We're testing the caching of ufuncs by vectorize, so the order + # of these function calls is an important part of the test. + r1 = f(np.arange(3.0)) + r2 = f(np.arange(3.0), 1.0) + assert_array_equal(r1, r2) + + def test_keywords_with_otypes_order3(self): + # gh-1620: The third call of f would crash with + # `ValueError: invalid number of arguments`. + f = vectorize(_foo1, otypes=[float]) + # We're testing the caching of ufuncs by vectorize, so the order + # of these function calls is an important part of the test. + r1 = f(np.arange(3.0)) + r2 = f(np.arange(3.0), y=1.0) + r3 = f(np.arange(3.0)) + assert_array_equal(r1, r2) + assert_array_equal(r1, r3) + + def test_keywords_with_otypes_several_kwd_args1(self): + # gh-1620 Make sure different uses of keyword arguments + # don't break the vectorized function. + f = vectorize(_foo2, otypes=[float]) + # We're testing the caching of ufuncs by vectorize, so the order + # of these function calls is an important part of the test. + r1 = f(10.4, z=100) + r2 = f(10.4, y=-1) + r3 = f(10.4) + assert_equal(r1, _foo2(10.4, z=100)) + assert_equal(r2, _foo2(10.4, y=-1)) + assert_equal(r3, _foo2(10.4)) + + def test_keywords_with_otypes_several_kwd_args2(self): + # gh-1620 Make sure different uses of keyword arguments + # don't break the vectorized function. + f = vectorize(_foo2, otypes=[float]) + # We're testing the caching of ufuncs by vectorize, so the order + # of these function calls is an important part of the test. + r1 = f(z=100, x=10.4, y=-1) + r2 = f(1, 2, 3) + assert_equal(r1, _foo2(z=100, x=10.4, y=-1)) + assert_equal(r2, _foo2(1, 2, 3)) + + def test_keywords_no_func_code(self): + # This needs to test a function that has keywords but + # no func_code attribute, since otherwise vectorize will + # inspect the func_code. + import random + try: + vectorize(random.randrange) # Should succeed + except Exception: + raise AssertionError() + + def test_keywords2_ticket_2100(self): + # Test kwarg support: enhancement ticket 2100 + + def foo(a, b=1): + return a + b + + f = vectorize(foo) + args = np.array([1, 2, 3]) + r1 = f(a=args) + r2 = np.array([2, 3, 4]) + assert_array_equal(r1, r2) + r1 = f(b=1, a=args) + assert_array_equal(r1, r2) + r1 = f(args, b=2) + r2 = np.array([3, 4, 5]) + assert_array_equal(r1, r2) + + def test_keywords3_ticket_2100(self): + # Test excluded with mixed positional and kwargs: ticket 2100 + def mypolyval(x, p): + _p = list(p) + res = _p.pop(0) + while _p: + res = res * x + _p.pop(0) + return res + + vpolyval = np.vectorize(mypolyval, excluded=['p', 1]) + ans = [3, 6] + assert_array_equal(ans, vpolyval(x=[0, 1], p=[1, 2, 3])) + assert_array_equal(ans, vpolyval([0, 1], p=[1, 2, 3])) + assert_array_equal(ans, vpolyval([0, 1], [1, 2, 3])) + + def test_keywords4_ticket_2100(self): + # Test vectorizing function with no positional args. + @vectorize + def f(**kw): + res = 1.0 + for _k in kw: + res *= kw[_k] + return res + + assert_array_equal(f(a=[1, 2], b=[3, 4]), [3, 8]) + + def test_keywords5_ticket_2100(self): + # Test vectorizing function with no kwargs args. + @vectorize + def f(*v): + return np.prod(v) + + assert_array_equal(f([1, 2], [3, 4]), [3, 8]) + + def test_coverage1_ticket_2100(self): + def foo(): + return 1 + + f = vectorize(foo) + assert_array_equal(f(), 1) + + def test_assigning_docstring(self): + def foo(x): + """Original documentation""" + return x + + f = vectorize(foo) + assert_equal(f.__doc__, foo.__doc__) + + doc = "Provided documentation" + f = vectorize(foo, doc=doc) + assert_equal(f.__doc__, doc) + + def test_UnboundMethod_ticket_1156(self): + # Regression test for issue 1156 + class Foo: + b = 2 + + def bar(self, a): + return a ** self.b + + assert_array_equal(vectorize(Foo().bar)(np.arange(9)), + np.arange(9) ** 2) + assert_array_equal(vectorize(Foo.bar)(Foo(), np.arange(9)), + np.arange(9) ** 2) + + def test_execution_order_ticket_1487(self): + # Regression test for dependence on execution order: issue 1487 + f1 = vectorize(lambda x: x) + res1a = f1(np.arange(3)) + res1b = f1(np.arange(0.1, 3)) + f2 = vectorize(lambda x: x) + res2b = f2(np.arange(0.1, 3)) + res2a = f2(np.arange(3)) + assert_equal(res1a, res2a) + assert_equal(res1b, res2b) + + def test_string_ticket_1892(self): + # Test vectorization over strings: issue 1892. + f = np.vectorize(lambda x: x) + s = '0123456789' * 10 + assert_equal(s, f(s)) + + def test_cache(self): + # Ensure that vectorized func called exactly once per argument. + _calls = [0] + + @vectorize + def f(x): + _calls[0] += 1 + return x ** 2 + + f.cache = True + x = np.arange(5) + assert_array_equal(f(x), x * x) + assert_equal(_calls[0], len(x)) + + def test_otypes(self): + f = np.vectorize(lambda x: x) + f.otypes = 'i' + x = np.arange(5) + assert_array_equal(f(x), x) + + def test_parse_gufunc_signature(self): + assert_equal(nfb._parse_gufunc_signature('(x)->()'), ([('x',)], [()])) + assert_equal(nfb._parse_gufunc_signature('(x,y)->()'), + ([('x', 'y')], [()])) + assert_equal(nfb._parse_gufunc_signature('(x),(y)->()'), + ([('x',), ('y',)], [()])) + assert_equal(nfb._parse_gufunc_signature('(x)->(y)'), + ([('x',)], [('y',)])) + assert_equal(nfb._parse_gufunc_signature('(x)->(y),()'), + ([('x',)], [('y',), ()])) + assert_equal(nfb._parse_gufunc_signature('(),(a,b,c),(d)->(d,e)'), + ([(), ('a', 'b', 'c'), ('d',)], [('d', 'e')])) + + # Tests to check if whitespaces are ignored + assert_equal(nfb._parse_gufunc_signature('(x )->()'), ([('x',)], [()])) + assert_equal(nfb._parse_gufunc_signature('( x , y )->( )'), + ([('x', 'y')], [()])) + assert_equal(nfb._parse_gufunc_signature('(x),( y) ->()'), + ([('x',), ('y',)], [()])) + assert_equal(nfb._parse_gufunc_signature('( x)-> (y ) '), + ([('x',)], [('y',)])) + assert_equal(nfb._parse_gufunc_signature(' (x)->( y),( )'), + ([('x',)], [('y',), ()])) + assert_equal(nfb._parse_gufunc_signature( + '( ), ( a, b,c ) ,( d) -> (d , e)'), + ([(), ('a', 'b', 'c'), ('d',)], [('d', 'e')])) + + with assert_raises(ValueError): + nfb._parse_gufunc_signature('(x)(y)->()') + with assert_raises(ValueError): + nfb._parse_gufunc_signature('(x),(y)->') + with assert_raises(ValueError): + nfb._parse_gufunc_signature('((x))->(x)') + + def test_signature_simple(self): + def addsubtract(a, b): + if a > b: + return a - b + else: + return a + b + + f = vectorize(addsubtract, signature='(),()->()') + r = f([0, 3, 6, 9], [1, 3, 5, 7]) + assert_array_equal(r, [1, 6, 1, 2]) + + def test_signature_mean_last(self): + def mean(a): + return a.mean() + + f = vectorize(mean, signature='(n)->()') + r = f([[1, 3], [2, 4]]) + assert_array_equal(r, [2, 3]) + + def test_signature_center(self): + def center(a): + return a - a.mean() + + f = vectorize(center, signature='(n)->(n)') + r = f([[1, 3], [2, 4]]) + assert_array_equal(r, [[-1, 1], [-1, 1]]) + + def test_signature_two_outputs(self): + f = vectorize(lambda x: (x, x), signature='()->(),()') + r = f([1, 2, 3]) + assert_(isinstance(r, tuple) and len(r) == 2) + assert_array_equal(r[0], [1, 2, 3]) + assert_array_equal(r[1], [1, 2, 3]) + + def test_signature_outer(self): + f = vectorize(np.outer, signature='(a),(b)->(a,b)') + r = f([1, 2], [1, 2, 3]) + assert_array_equal(r, [[1, 2, 3], [2, 4, 6]]) + + r = f([[[1, 2]]], [1, 2, 3]) + assert_array_equal(r, [[[[1, 2, 3], [2, 4, 6]]]]) + + r = f([[1, 0], [2, 0]], [1, 2, 3]) + assert_array_equal(r, [[[1, 2, 3], [0, 0, 0]], + [[2, 4, 6], [0, 0, 0]]]) + + r = f([1, 2], [[1, 2, 3], [0, 0, 0]]) + assert_array_equal(r, [[[1, 2, 3], [2, 4, 6]], + [[0, 0, 0], [0, 0, 0]]]) + + def test_signature_computed_size(self): + f = vectorize(lambda x: x[:-1], signature='(n)->(m)') + r = f([1, 2, 3]) + assert_array_equal(r, [1, 2]) + + r = f([[1, 2, 3], [2, 3, 4]]) + assert_array_equal(r, [[1, 2], [2, 3]]) + + def test_signature_excluded(self): + + def foo(a, b=1): + return a + b + + f = vectorize(foo, signature='()->()', excluded={'b'}) + assert_array_equal(f([1, 2, 3]), [2, 3, 4]) + assert_array_equal(f([1, 2, 3], b=0), [1, 2, 3]) + + def test_signature_otypes(self): + f = vectorize(lambda x: x, signature='(n)->(n)', otypes=['float64']) + r = f([1, 2, 3]) + assert_equal(r.dtype, np.dtype('float64')) + assert_array_equal(r, [1, 2, 3]) + + def test_signature_invalid_inputs(self): + f = vectorize(operator.add, signature='(n),(n)->(n)') + with assert_raises_regex(TypeError, 'wrong number of positional'): + f([1, 2]) + with assert_raises_regex( + ValueError, 'does not have enough dimensions'): + f(1, 2) + with assert_raises_regex( + ValueError, 'inconsistent size for core dimension'): + f([1, 2], [1, 2, 3]) + + f = vectorize(operator.add, signature='()->()') + with assert_raises_regex(TypeError, 'wrong number of positional'): + f(1, 2) + + def test_signature_invalid_outputs(self): + + f = vectorize(lambda x: x[:-1], signature='(n)->(n)') + with assert_raises_regex( + ValueError, 'inconsistent size for core dimension'): + f([1, 2, 3]) + + f = vectorize(lambda x: x, signature='()->(),()') + with assert_raises_regex(ValueError, 'wrong number of outputs'): + f(1) + + f = vectorize(lambda x: (x, x), signature='()->()') + with assert_raises_regex(ValueError, 'wrong number of outputs'): + f([1, 2]) + + def test_size_zero_output(self): + # see issue 5868 + f = np.vectorize(lambda x: x) + x = np.zeros([0, 5], dtype=int) + with assert_raises_regex(ValueError, 'otypes'): + f(x) + + f.otypes = 'i' + assert_array_equal(f(x), x) + + f = np.vectorize(lambda x: x, signature='()->()') + with assert_raises_regex(ValueError, 'otypes'): + f(x) + + f = np.vectorize(lambda x: x, signature='()->()', otypes='i') + assert_array_equal(f(x), x) + + f = np.vectorize(lambda x: x, signature='(n)->(n)', otypes='i') + assert_array_equal(f(x), x) + + f = np.vectorize(lambda x: x, signature='(n)->(n)') + assert_array_equal(f(x.T), x.T) + + f = np.vectorize(lambda x: [x], signature='()->(n)', otypes='i') + with assert_raises_regex(ValueError, 'new output dimensions'): + f(x) + + def test_subclasses(self): + class subclass(np.ndarray): + pass + + m = np.array([[1., 0., 0.], + [0., 0., 1.], + [0., 1., 0.]]).view(subclass) + v = np.array([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]).view(subclass) + # generalized (gufunc) + matvec = np.vectorize(np.matmul, signature='(m,m),(m)->(m)') + r = matvec(m, v) + assert_equal(type(r), subclass) + assert_equal(r, [[1., 3., 2.], [4., 6., 5.], [7., 9., 8.]]) + + # element-wise (ufunc) + mult = np.vectorize(lambda x, y: x*y) + r = mult(m, v) + assert_equal(type(r), subclass) + assert_equal(r, m * v) + + +class TestLeaks: + class A: + iters = 20 + + def bound(self, *args): + return 0 + + @staticmethod + def unbound(*args): + return 0 + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + @pytest.mark.parametrize('name, incr', [ + ('bound', A.iters), + ('unbound', 0), + ]) + def test_frompyfunc_leaks(self, name, incr): + # exposed in gh-11867 as np.vectorized, but the problem stems from + # frompyfunc. + # class.attribute = np.frompyfunc() creates a + # reference cycle if is a bound class method. It requires a + # gc collection cycle to break the cycle (on CPython 3) + import gc + A_func = getattr(self.A, name) + gc.disable() + try: + refcount = sys.getrefcount(A_func) + for i in range(self.A.iters): + a = self.A() + a.f = np.frompyfunc(getattr(a, name), 1, 1) + out = a.f(np.arange(10)) + a = None + # A.func is part of a reference cycle if incr is non-zero + assert_equal(sys.getrefcount(A_func), refcount + incr) + for i in range(5): + gc.collect() + assert_equal(sys.getrefcount(A_func), refcount) + finally: + gc.enable() + +class TestDigitize: + + def test_forward(self): + x = np.arange(-6, 5) + bins = np.arange(-5, 5) + assert_array_equal(digitize(x, bins), np.arange(11)) + + def test_reverse(self): + x = np.arange(5, -6, -1) + bins = np.arange(5, -5, -1) + assert_array_equal(digitize(x, bins), np.arange(11)) + + def test_random(self): + x = rand(10) + bin = np.linspace(x.min(), x.max(), 10) + assert_(np.all(digitize(x, bin) != 0)) + + def test_right_basic(self): + x = [1, 5, 4, 10, 8, 11, 0] + bins = [1, 5, 10] + default_answer = [1, 2, 1, 3, 2, 3, 0] + assert_array_equal(digitize(x, bins), default_answer) + right_answer = [0, 1, 1, 2, 2, 3, 0] + assert_array_equal(digitize(x, bins, True), right_answer) + + def test_right_open(self): + x = np.arange(-6, 5) + bins = np.arange(-6, 4) + assert_array_equal(digitize(x, bins, True), np.arange(11)) + + def test_right_open_reverse(self): + x = np.arange(5, -6, -1) + bins = np.arange(4, -6, -1) + assert_array_equal(digitize(x, bins, True), np.arange(11)) + + def test_right_open_random(self): + x = rand(10) + bins = np.linspace(x.min(), x.max(), 10) + assert_(np.all(digitize(x, bins, True) != 10)) + + def test_monotonic(self): + x = [-1, 0, 1, 2] + bins = [0, 0, 1] + assert_array_equal(digitize(x, bins, False), [0, 2, 3, 3]) + assert_array_equal(digitize(x, bins, True), [0, 0, 2, 3]) + bins = [1, 1, 0] + assert_array_equal(digitize(x, bins, False), [3, 2, 0, 0]) + assert_array_equal(digitize(x, bins, True), [3, 3, 2, 0]) + bins = [1, 1, 1, 1] + assert_array_equal(digitize(x, bins, False), [0, 0, 4, 4]) + assert_array_equal(digitize(x, bins, True), [0, 0, 0, 4]) + bins = [0, 0, 1, 0] + assert_raises(ValueError, digitize, x, bins) + bins = [1, 1, 0, 1] + assert_raises(ValueError, digitize, x, bins) + + def test_casting_error(self): + x = [1, 2, 3 + 1.j] + bins = [1, 2, 3] + assert_raises(TypeError, digitize, x, bins) + x, bins = bins, x + assert_raises(TypeError, digitize, x, bins) + + def test_return_type(self): + # Functions returning indices should always return base ndarrays + class A(np.ndarray): + pass + a = np.arange(5).view(A) + b = np.arange(1, 3).view(A) + assert_(not isinstance(digitize(b, a, False), A)) + assert_(not isinstance(digitize(b, a, True), A)) + + def test_large_integers_increasing(self): + # gh-11022 + x = 2**54 # loses precision in a float + assert_equal(np.digitize(x, [x - 1, x + 1]), 1) + + @pytest.mark.xfail( + reason="gh-11022: np.core.multiarray._monoticity loses precision") + def test_large_integers_decreasing(self): + # gh-11022 + x = 2**54 # loses precision in a float + assert_equal(np.digitize(x, [x + 1, x - 1]), 1) + + +class TestUnwrap: + + def test_simple(self): + # check that unwrap removes jumps greater that 2*pi + assert_array_equal(unwrap([1, 1 + 2 * np.pi]), [1, 1]) + # check that unwrap maintains continuity + assert_(np.all(diff(unwrap(rand(10) * 100)) < np.pi)) + + def test_period(self): + # check that unwrap removes jumps greater that 255 + assert_array_equal(unwrap([1, 1 + 256], period=255), [1, 2]) + # check that unwrap maintains continuity + assert_(np.all(diff(unwrap(rand(10) * 1000, period=255)) < 255)) + # check simple case + simple_seq = np.array([0, 75, 150, 225, 300]) + wrap_seq = np.mod(simple_seq, 255) + assert_array_equal(unwrap(wrap_seq, period=255), simple_seq) + # check custom discont value + uneven_seq = np.array([0, 75, 150, 225, 300, 430]) + wrap_uneven = np.mod(uneven_seq, 250) + no_discont = unwrap(wrap_uneven, period=250) + assert_array_equal(no_discont, [0, 75, 150, 225, 300, 180]) + sm_discont = unwrap(wrap_uneven, period=250, discont=140) + assert_array_equal(sm_discont, [0, 75, 150, 225, 300, 430]) + assert sm_discont.dtype == wrap_uneven.dtype + + +@pytest.mark.parametrize( + "dtype", "O" + np.typecodes["AllInteger"] + np.typecodes["Float"] +) +@pytest.mark.parametrize("M", [0, 1, 10]) +class TestFilterwindows: + + def test_hanning(self, dtype: str, M: int) -> None: + scalar = np.array(M, dtype=dtype)[()] + + w = hanning(scalar) + if dtype == "O": + ref_dtype = np.float64 + else: + ref_dtype = np.result_type(scalar.dtype, np.float64) + assert w.dtype == ref_dtype + + # check symmetry + assert_equal(w, flipud(w)) + + # check known value + if scalar < 1: + assert_array_equal(w, np.array([])) + elif scalar == 1: + assert_array_equal(w, np.ones(1)) + else: + assert_almost_equal(np.sum(w, axis=0), 4.500, 4) + + def test_hamming(self, dtype: str, M: int) -> None: + scalar = np.array(M, dtype=dtype)[()] + + w = hamming(scalar) + if dtype == "O": + ref_dtype = np.float64 + else: + ref_dtype = np.result_type(scalar.dtype, np.float64) + assert w.dtype == ref_dtype + + # check symmetry + assert_equal(w, flipud(w)) + + # check known value + if scalar < 1: + assert_array_equal(w, np.array([])) + elif scalar == 1: + assert_array_equal(w, np.ones(1)) + else: + assert_almost_equal(np.sum(w, axis=0), 4.9400, 4) + + def test_bartlett(self, dtype: str, M: int) -> None: + scalar = np.array(M, dtype=dtype)[()] + + w = bartlett(scalar) + if dtype == "O": + ref_dtype = np.float64 + else: + ref_dtype = np.result_type(scalar.dtype, np.float64) + assert w.dtype == ref_dtype + + # check symmetry + assert_equal(w, flipud(w)) + + # check known value + if scalar < 1: + assert_array_equal(w, np.array([])) + elif scalar == 1: + assert_array_equal(w, np.ones(1)) + else: + assert_almost_equal(np.sum(w, axis=0), 4.4444, 4) + + def test_blackman(self, dtype: str, M: int) -> None: + scalar = np.array(M, dtype=dtype)[()] + + w = blackman(scalar) + if dtype == "O": + ref_dtype = np.float64 + else: + ref_dtype = np.result_type(scalar.dtype, np.float64) + assert w.dtype == ref_dtype + + # check symmetry + assert_equal(w, flipud(w)) + + # check known value + if scalar < 1: + assert_array_equal(w, np.array([])) + elif scalar == 1: + assert_array_equal(w, np.ones(1)) + else: + assert_almost_equal(np.sum(w, axis=0), 3.7800, 4) + + def test_kaiser(self, dtype: str, M: int) -> None: + scalar = np.array(M, dtype=dtype)[()] + + w = kaiser(scalar, 0) + if dtype == "O": + ref_dtype = np.float64 + else: + ref_dtype = np.result_type(scalar.dtype, np.float64) + assert w.dtype == ref_dtype + + # check symmetry + assert_equal(w, flipud(w)) + + # check known value + if scalar < 1: + assert_array_equal(w, np.array([])) + elif scalar == 1: + assert_array_equal(w, np.ones(1)) + else: + assert_almost_equal(np.sum(w, axis=0), 10, 15) + + +class TestTrapz: + + def test_simple(self): + x = np.arange(-10, 10, .1) + r = trapz(np.exp(-.5 * x ** 2) / np.sqrt(2 * np.pi), dx=0.1) + # check integral of normal equals 1 + assert_almost_equal(r, 1, 7) + + def test_ndim(self): + x = np.linspace(0, 1, 3) + y = np.linspace(0, 2, 8) + z = np.linspace(0, 3, 13) + + wx = np.ones_like(x) * (x[1] - x[0]) + wx[0] /= 2 + wx[-1] /= 2 + wy = np.ones_like(y) * (y[1] - y[0]) + wy[0] /= 2 + wy[-1] /= 2 + wz = np.ones_like(z) * (z[1] - z[0]) + wz[0] /= 2 + wz[-1] /= 2 + + q = x[:, None, None] + y[None,:, None] + z[None, None,:] + + qx = (q * wx[:, None, None]).sum(axis=0) + qy = (q * wy[None, :, None]).sum(axis=1) + qz = (q * wz[None, None, :]).sum(axis=2) + + # n-d `x` + r = trapz(q, x=x[:, None, None], axis=0) + assert_almost_equal(r, qx) + r = trapz(q, x=y[None,:, None], axis=1) + assert_almost_equal(r, qy) + r = trapz(q, x=z[None, None,:], axis=2) + assert_almost_equal(r, qz) + + # 1-d `x` + r = trapz(q, x=x, axis=0) + assert_almost_equal(r, qx) + r = trapz(q, x=y, axis=1) + assert_almost_equal(r, qy) + r = trapz(q, x=z, axis=2) + assert_almost_equal(r, qz) + + def test_masked(self): + # Testing that masked arrays behave as if the function is 0 where + # masked + x = np.arange(5) + y = x * x + mask = x == 2 + ym = np.ma.array(y, mask=mask) + r = 13.0 # sum(0.5 * (0 + 1) * 1.0 + 0.5 * (9 + 16)) + assert_almost_equal(trapz(ym, x), r) + + xm = np.ma.array(x, mask=mask) + assert_almost_equal(trapz(ym, xm), r) + + xm = np.ma.array(x, mask=mask) + assert_almost_equal(trapz(y, xm), r) + + +class TestSinc: + + def test_simple(self): + assert_(sinc(0) == 1) + w = sinc(np.linspace(-1, 1, 100)) + # check symmetry + assert_array_almost_equal(w, flipud(w), 7) + + def test_array_like(self): + x = [0, 0.5] + y1 = sinc(np.array(x)) + y2 = sinc(list(x)) + y3 = sinc(tuple(x)) + assert_array_equal(y1, y2) + assert_array_equal(y1, y3) + + +class TestUnique: + + def test_simple(self): + x = np.array([4, 3, 2, 1, 1, 2, 3, 4, 0]) + assert_(np.all(unique(x) == [0, 1, 2, 3, 4])) + assert_(unique(np.array([1, 1, 1, 1, 1])) == np.array([1])) + x = ['widget', 'ham', 'foo', 'bar', 'foo', 'ham'] + assert_(np.all(unique(x) == ['bar', 'foo', 'ham', 'widget'])) + x = np.array([5 + 6j, 1 + 1j, 1 + 10j, 10, 5 + 6j]) + assert_(np.all(unique(x) == [1 + 1j, 1 + 10j, 5 + 6j, 10])) + + +class TestCheckFinite: + + def test_simple(self): + a = [1, 2, 3] + b = [1, 2, np.inf] + c = [1, 2, np.nan] + np.lib.asarray_chkfinite(a) + assert_raises(ValueError, np.lib.asarray_chkfinite, b) + assert_raises(ValueError, np.lib.asarray_chkfinite, c) + + def test_dtype_order(self): + # Regression test for missing dtype and order arguments + a = [1, 2, 3] + a = np.lib.asarray_chkfinite(a, order='F', dtype=np.float64) + assert_(a.dtype == np.float64) + + +class TestCorrCoef: + A = np.array( + [[0.15391142, 0.18045767, 0.14197213], + [0.70461506, 0.96474128, 0.27906989], + [0.9297531, 0.32296769, 0.19267156]]) + B = np.array( + [[0.10377691, 0.5417086, 0.49807457], + [0.82872117, 0.77801674, 0.39226705], + [0.9314666, 0.66800209, 0.03538394]]) + res1 = np.array( + [[1., 0.9379533, -0.04931983], + [0.9379533, 1., 0.30007991], + [-0.04931983, 0.30007991, 1.]]) + res2 = np.array( + [[1., 0.9379533, -0.04931983, 0.30151751, 0.66318558, 0.51532523], + [0.9379533, 1., 0.30007991, -0.04781421, 0.88157256, 0.78052386], + [-0.04931983, 0.30007991, 1., -0.96717111, 0.71483595, 0.83053601], + [0.30151751, -0.04781421, -0.96717111, 1., -0.51366032, -0.66173113], + [0.66318558, 0.88157256, 0.71483595, -0.51366032, 1., 0.98317823], + [0.51532523, 0.78052386, 0.83053601, -0.66173113, 0.98317823, 1.]]) + + def test_non_array(self): + assert_almost_equal(np.corrcoef([0, 1, 0], [1, 0, 1]), + [[1., -1.], [-1., 1.]]) + + def test_simple(self): + tgt1 = corrcoef(self.A) + assert_almost_equal(tgt1, self.res1) + assert_(np.all(np.abs(tgt1) <= 1.0)) + + tgt2 = corrcoef(self.A, self.B) + assert_almost_equal(tgt2, self.res2) + assert_(np.all(np.abs(tgt2) <= 1.0)) + + def test_ddof(self): + # ddof raises DeprecationWarning + with suppress_warnings() as sup: + warnings.simplefilter("always") + assert_warns(DeprecationWarning, corrcoef, self.A, ddof=-1) + sup.filter(DeprecationWarning) + # ddof has no or negligible effect on the function + assert_almost_equal(corrcoef(self.A, ddof=-1), self.res1) + assert_almost_equal(corrcoef(self.A, self.B, ddof=-1), self.res2) + assert_almost_equal(corrcoef(self.A, ddof=3), self.res1) + assert_almost_equal(corrcoef(self.A, self.B, ddof=3), self.res2) + + def test_bias(self): + # bias raises DeprecationWarning + with suppress_warnings() as sup: + warnings.simplefilter("always") + assert_warns(DeprecationWarning, corrcoef, self.A, self.B, 1, 0) + assert_warns(DeprecationWarning, corrcoef, self.A, bias=0) + sup.filter(DeprecationWarning) + # bias has no or negligible effect on the function + assert_almost_equal(corrcoef(self.A, bias=1), self.res1) + + def test_complex(self): + x = np.array([[1, 2, 3], [1j, 2j, 3j]]) + res = corrcoef(x) + tgt = np.array([[1., -1.j], [1.j, 1.]]) + assert_allclose(res, tgt) + assert_(np.all(np.abs(res) <= 1.0)) + + def test_xy(self): + x = np.array([[1, 2, 3]]) + y = np.array([[1j, 2j, 3j]]) + assert_allclose(np.corrcoef(x, y), np.array([[1., -1.j], [1.j, 1.]])) + + def test_empty(self): + with warnings.catch_warnings(record=True): + warnings.simplefilter('always', RuntimeWarning) + assert_array_equal(corrcoef(np.array([])), np.nan) + assert_array_equal(corrcoef(np.array([]).reshape(0, 2)), + np.array([]).reshape(0, 0)) + assert_array_equal(corrcoef(np.array([]).reshape(2, 0)), + np.array([[np.nan, np.nan], [np.nan, np.nan]])) + + def test_extreme(self): + x = [[1e-100, 1e100], [1e100, 1e-100]] + with np.errstate(all='raise'): + c = corrcoef(x) + assert_array_almost_equal(c, np.array([[1., -1.], [-1., 1.]])) + assert_(np.all(np.abs(c) <= 1.0)) + + @pytest.mark.parametrize("test_type", [np.half, np.single, np.double, np.longdouble]) + def test_corrcoef_dtype(self, test_type): + cast_A = self.A.astype(test_type) + res = corrcoef(cast_A, dtype=test_type) + assert test_type == res.dtype + + +class TestCov: + x1 = np.array([[0, 2], [1, 1], [2, 0]]).T + res1 = np.array([[1., -1.], [-1., 1.]]) + x2 = np.array([0.0, 1.0, 2.0], ndmin=2) + frequencies = np.array([1, 4, 1]) + x2_repeats = np.array([[0.0], [1.0], [1.0], [1.0], [1.0], [2.0]]).T + res2 = np.array([[0.4, -0.4], [-0.4, 0.4]]) + unit_frequencies = np.ones(3, dtype=np.int_) + weights = np.array([1.0, 4.0, 1.0]) + res3 = np.array([[2. / 3., -2. / 3.], [-2. / 3., 2. / 3.]]) + unit_weights = np.ones(3) + x3 = np.array([0.3942, 0.5969, 0.7730, 0.9918, 0.7964]) + + def test_basic(self): + assert_allclose(cov(self.x1), self.res1) + + def test_complex(self): + x = np.array([[1, 2, 3], [1j, 2j, 3j]]) + res = np.array([[1., -1.j], [1.j, 1.]]) + assert_allclose(cov(x), res) + assert_allclose(cov(x, aweights=np.ones(3)), res) + + def test_xy(self): + x = np.array([[1, 2, 3]]) + y = np.array([[1j, 2j, 3j]]) + assert_allclose(cov(x, y), np.array([[1., -1.j], [1.j, 1.]])) + + def test_empty(self): + with warnings.catch_warnings(record=True): + warnings.simplefilter('always', RuntimeWarning) + assert_array_equal(cov(np.array([])), np.nan) + assert_array_equal(cov(np.array([]).reshape(0, 2)), + np.array([]).reshape(0, 0)) + assert_array_equal(cov(np.array([]).reshape(2, 0)), + np.array([[np.nan, np.nan], [np.nan, np.nan]])) + + def test_wrong_ddof(self): + with warnings.catch_warnings(record=True): + warnings.simplefilter('always', RuntimeWarning) + assert_array_equal(cov(self.x1, ddof=5), + np.array([[np.inf, -np.inf], + [-np.inf, np.inf]])) + + def test_1D_rowvar(self): + assert_allclose(cov(self.x3), cov(self.x3, rowvar=False)) + y = np.array([0.0780, 0.3107, 0.2111, 0.0334, 0.8501]) + assert_allclose(cov(self.x3, y), cov(self.x3, y, rowvar=False)) + + def test_1D_variance(self): + assert_allclose(cov(self.x3, ddof=1), np.var(self.x3, ddof=1)) + + def test_fweights(self): + assert_allclose(cov(self.x2, fweights=self.frequencies), + cov(self.x2_repeats)) + assert_allclose(cov(self.x1, fweights=self.frequencies), + self.res2) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies), + self.res1) + nonint = self.frequencies + 0.5 + assert_raises(TypeError, cov, self.x1, fweights=nonint) + f = np.ones((2, 3), dtype=np.int_) + assert_raises(RuntimeError, cov, self.x1, fweights=f) + f = np.ones(2, dtype=np.int_) + assert_raises(RuntimeError, cov, self.x1, fweights=f) + f = -1 * np.ones(3, dtype=np.int_) + assert_raises(ValueError, cov, self.x1, fweights=f) + + def test_aweights(self): + assert_allclose(cov(self.x1, aweights=self.weights), self.res3) + assert_allclose(cov(self.x1, aweights=3.0 * self.weights), + cov(self.x1, aweights=self.weights)) + assert_allclose(cov(self.x1, aweights=self.unit_weights), self.res1) + w = np.ones((2, 3)) + assert_raises(RuntimeError, cov, self.x1, aweights=w) + w = np.ones(2) + assert_raises(RuntimeError, cov, self.x1, aweights=w) + w = -1.0 * np.ones(3) + assert_raises(ValueError, cov, self.x1, aweights=w) + + def test_unit_fweights_and_aweights(self): + assert_allclose(cov(self.x2, fweights=self.frequencies, + aweights=self.unit_weights), + cov(self.x2_repeats)) + assert_allclose(cov(self.x1, fweights=self.frequencies, + aweights=self.unit_weights), + self.res2) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=self.unit_weights), + self.res1) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=self.weights), + self.res3) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=3.0 * self.weights), + cov(self.x1, aweights=self.weights)) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=self.unit_weights), + self.res1) + + @pytest.mark.parametrize("test_type", [np.half, np.single, np.double, np.longdouble]) + def test_cov_dtype(self, test_type): + cast_x1 = self.x1.astype(test_type) + res = cov(cast_x1, dtype=test_type) + assert test_type == res.dtype + + +class Test_I0: + + def test_simple(self): + assert_almost_equal( + i0(0.5), + np.array(1.0634833707413234)) + + # need at least one test above 8, as the implementation is piecewise + A = np.array([0.49842636, 0.6969809, 0.22011976, 0.0155549, 10.0]) + expected = np.array([1.06307822, 1.12518299, 1.01214991, 1.00006049, 2815.71662847]) + assert_almost_equal(i0(A), expected) + assert_almost_equal(i0(-A), expected) + + B = np.array([[0.827002, 0.99959078], + [0.89694769, 0.39298162], + [0.37954418, 0.05206293], + [0.36465447, 0.72446427], + [0.48164949, 0.50324519]]) + assert_almost_equal( + i0(B), + np.array([[1.17843223, 1.26583466], + [1.21147086, 1.03898290], + [1.03633899, 1.00067775], + [1.03352052, 1.13557954], + [1.05884290, 1.06432317]])) + # Regression test for gh-11205 + i0_0 = np.i0([0.]) + assert_equal(i0_0.shape, (1,)) + assert_array_equal(np.i0([0.]), np.array([1.])) + + def test_non_array(self): + a = np.arange(4) + + class array_like: + __array_interface__ = a.__array_interface__ + + def __array_wrap__(self, arr): + return self + + # E.g. pandas series survive ufunc calls through array-wrap: + assert isinstance(np.abs(array_like()), array_like) + exp = np.i0(a) + res = np.i0(array_like()) + + assert_array_equal(exp, res) + + def test_complex(self): + a = np.array([0, 1 + 2j]) + with pytest.raises(TypeError, match="i0 not supported for complex values"): + res = i0(a) + +class TestKaiser: + + def test_simple(self): + assert_(np.isfinite(kaiser(1, 1.0))) + assert_almost_equal(kaiser(0, 1.0), + np.array([])) + assert_almost_equal(kaiser(2, 1.0), + np.array([0.78984831, 0.78984831])) + assert_almost_equal(kaiser(5, 1.0), + np.array([0.78984831, 0.94503323, 1., + 0.94503323, 0.78984831])) + assert_almost_equal(kaiser(5, 1.56789), + np.array([0.58285404, 0.88409679, 1., + 0.88409679, 0.58285404])) + + def test_int_beta(self): + kaiser(3, 4) + + +class TestMsort: + + def test_simple(self): + A = np.array([[0.44567325, 0.79115165, 0.54900530], + [0.36844147, 0.37325583, 0.96098397], + [0.64864341, 0.52929049, 0.39172155]]) + assert_almost_equal( + msort(A), + np.array([[0.36844147, 0.37325583, 0.39172155], + [0.44567325, 0.52929049, 0.54900530], + [0.64864341, 0.79115165, 0.96098397]])) + + +class TestMeshgrid: + + def test_simple(self): + [X, Y] = meshgrid([1, 2, 3], [4, 5, 6, 7]) + assert_array_equal(X, np.array([[1, 2, 3], + [1, 2, 3], + [1, 2, 3], + [1, 2, 3]])) + assert_array_equal(Y, np.array([[4, 4, 4], + [5, 5, 5], + [6, 6, 6], + [7, 7, 7]])) + + def test_single_input(self): + [X] = meshgrid([1, 2, 3, 4]) + assert_array_equal(X, np.array([1, 2, 3, 4])) + + def test_no_input(self): + args = [] + assert_array_equal([], meshgrid(*args)) + assert_array_equal([], meshgrid(*args, copy=False)) + + def test_indexing(self): + x = [1, 2, 3] + y = [4, 5, 6, 7] + [X, Y] = meshgrid(x, y, indexing='ij') + assert_array_equal(X, np.array([[1, 1, 1, 1], + [2, 2, 2, 2], + [3, 3, 3, 3]])) + assert_array_equal(Y, np.array([[4, 5, 6, 7], + [4, 5, 6, 7], + [4, 5, 6, 7]])) + + # Test expected shapes: + z = [8, 9] + assert_(meshgrid(x, y)[0].shape == (4, 3)) + assert_(meshgrid(x, y, indexing='ij')[0].shape == (3, 4)) + assert_(meshgrid(x, y, z)[0].shape == (4, 3, 2)) + assert_(meshgrid(x, y, z, indexing='ij')[0].shape == (3, 4, 2)) + + assert_raises(ValueError, meshgrid, x, y, indexing='notvalid') + + def test_sparse(self): + [X, Y] = meshgrid([1, 2, 3], [4, 5, 6, 7], sparse=True) + assert_array_equal(X, np.array([[1, 2, 3]])) + assert_array_equal(Y, np.array([[4], [5], [6], [7]])) + + def test_invalid_arguments(self): + # Test that meshgrid complains about invalid arguments + # Regression test for issue #4755: + # https://github.com/numpy/numpy/issues/4755 + assert_raises(TypeError, meshgrid, + [1, 2, 3], [4, 5, 6, 7], indices='ij') + + def test_return_type(self): + # Test for appropriate dtype in returned arrays. + # Regression test for issue #5297 + # https://github.com/numpy/numpy/issues/5297 + x = np.arange(0, 10, dtype=np.float32) + y = np.arange(10, 20, dtype=np.float64) + + X, Y = np.meshgrid(x,y) + + assert_(X.dtype == x.dtype) + assert_(Y.dtype == y.dtype) + + # copy + X, Y = np.meshgrid(x,y, copy=True) + + assert_(X.dtype == x.dtype) + assert_(Y.dtype == y.dtype) + + # sparse + X, Y = np.meshgrid(x,y, sparse=True) + + assert_(X.dtype == x.dtype) + assert_(Y.dtype == y.dtype) + + def test_writeback(self): + # Issue 8561 + X = np.array([1.1, 2.2]) + Y = np.array([3.3, 4.4]) + x, y = np.meshgrid(X, Y, sparse=False, copy=True) + + x[0, :] = 0 + assert_equal(x[0, :], 0) + assert_equal(x[1, :], X) + + def test_nd_shape(self): + a, b, c, d, e = np.meshgrid(*([0] * i for i in range(1, 6))) + expected_shape = (2, 1, 3, 4, 5) + assert_equal(a.shape, expected_shape) + assert_equal(b.shape, expected_shape) + assert_equal(c.shape, expected_shape) + assert_equal(d.shape, expected_shape) + assert_equal(e.shape, expected_shape) + + def test_nd_values(self): + a, b, c = np.meshgrid([0], [1, 2], [3, 4, 5]) + assert_equal(a, [[[0, 0, 0]], [[0, 0, 0]]]) + assert_equal(b, [[[1, 1, 1]], [[2, 2, 2]]]) + assert_equal(c, [[[3, 4, 5]], [[3, 4, 5]]]) + + def test_nd_indexing(self): + a, b, c = np.meshgrid([0], [1, 2], [3, 4, 5], indexing='ij') + assert_equal(a, [[[0, 0, 0], [0, 0, 0]]]) + assert_equal(b, [[[1, 1, 1], [2, 2, 2]]]) + assert_equal(c, [[[3, 4, 5], [3, 4, 5]]]) + + +class TestPiecewise: + + def test_simple(self): + # Condition is single bool list + x = piecewise([0, 0], [True, False], [1]) + assert_array_equal(x, [1, 0]) + + # List of conditions: single bool list + x = piecewise([0, 0], [[True, False]], [1]) + assert_array_equal(x, [1, 0]) + + # Conditions is single bool array + x = piecewise([0, 0], np.array([True, False]), [1]) + assert_array_equal(x, [1, 0]) + + # Condition is single int array + x = piecewise([0, 0], np.array([1, 0]), [1]) + assert_array_equal(x, [1, 0]) + + # List of conditions: int array + x = piecewise([0, 0], [np.array([1, 0])], [1]) + assert_array_equal(x, [1, 0]) + + x = piecewise([0, 0], [[False, True]], [lambda x:-1]) + assert_array_equal(x, [0, -1]) + + assert_raises_regex(ValueError, '1 or 2 functions are expected', + piecewise, [0, 0], [[False, True]], []) + assert_raises_regex(ValueError, '1 or 2 functions are expected', + piecewise, [0, 0], [[False, True]], [1, 2, 3]) + + def test_two_conditions(self): + x = piecewise([1, 2], [[True, False], [False, True]], [3, 4]) + assert_array_equal(x, [3, 4]) + + def test_scalar_domains_three_conditions(self): + x = piecewise(3, [True, False, False], [4, 2, 0]) + assert_equal(x, 4) + + def test_default(self): + # No value specified for x[1], should be 0 + x = piecewise([1, 2], [True, False], [2]) + assert_array_equal(x, [2, 0]) + + # Should set x[1] to 3 + x = piecewise([1, 2], [True, False], [2, 3]) + assert_array_equal(x, [2, 3]) + + def test_0d(self): + x = np.array(3) + y = piecewise(x, x > 3, [4, 0]) + assert_(y.ndim == 0) + assert_(y == 0) + + x = 5 + y = piecewise(x, [True, False], [1, 0]) + assert_(y.ndim == 0) + assert_(y == 1) + + # With 3 ranges (It was failing, before) + y = piecewise(x, [False, False, True], [1, 2, 3]) + assert_array_equal(y, 3) + + def test_0d_comparison(self): + x = 3 + y = piecewise(x, [x <= 3, x > 3], [4, 0]) # Should succeed. + assert_equal(y, 4) + + # With 3 ranges (It was failing, before) + x = 4 + y = piecewise(x, [x <= 3, (x > 3) * (x <= 5), x > 5], [1, 2, 3]) + assert_array_equal(y, 2) + + assert_raises_regex(ValueError, '2 or 3 functions are expected', + piecewise, x, [x <= 3, x > 3], [1]) + assert_raises_regex(ValueError, '2 or 3 functions are expected', + piecewise, x, [x <= 3, x > 3], [1, 1, 1, 1]) + + def test_0d_0d_condition(self): + x = np.array(3) + c = np.array(x > 3) + y = piecewise(x, [c], [1, 2]) + assert_equal(y, 2) + + def test_multidimensional_extrafunc(self): + x = np.array([[-2.5, -1.5, -0.5], + [0.5, 1.5, 2.5]]) + y = piecewise(x, [x < 0, x >= 2], [-1, 1, 3]) + assert_array_equal(y, np.array([[-1., -1., -1.], + [3., 3., 1.]])) + + def test_subclasses(self): + class subclass(np.ndarray): + pass + x = np.arange(5.).view(subclass) + r = piecewise(x, [x<2., x>=4], [-1., 1., 0.]) + assert_equal(type(r), subclass) + assert_equal(r, [-1., -1., 0., 0., 1.]) + + +class TestBincount: + + def test_simple(self): + y = np.bincount(np.arange(4)) + assert_array_equal(y, np.ones(4)) + + def test_simple2(self): + y = np.bincount(np.array([1, 5, 2, 4, 1])) + assert_array_equal(y, np.array([0, 2, 1, 0, 1, 1])) + + def test_simple_weight(self): + x = np.arange(4) + w = np.array([0.2, 0.3, 0.5, 0.1]) + y = np.bincount(x, w) + assert_array_equal(y, w) + + def test_simple_weight2(self): + x = np.array([1, 2, 4, 5, 2]) + w = np.array([0.2, 0.3, 0.5, 0.1, 0.2]) + y = np.bincount(x, w) + assert_array_equal(y, np.array([0, 0.2, 0.5, 0, 0.5, 0.1])) + + def test_with_minlength(self): + x = np.array([0, 1, 0, 1, 1]) + y = np.bincount(x, minlength=3) + assert_array_equal(y, np.array([2, 3, 0])) + x = [] + y = np.bincount(x, minlength=0) + assert_array_equal(y, np.array([])) + + def test_with_minlength_smaller_than_maxvalue(self): + x = np.array([0, 1, 1, 2, 2, 3, 3]) + y = np.bincount(x, minlength=2) + assert_array_equal(y, np.array([1, 2, 2, 2])) + y = np.bincount(x, minlength=0) + assert_array_equal(y, np.array([1, 2, 2, 2])) + + def test_with_minlength_and_weights(self): + x = np.array([1, 2, 4, 5, 2]) + w = np.array([0.2, 0.3, 0.5, 0.1, 0.2]) + y = np.bincount(x, w, 8) + assert_array_equal(y, np.array([0, 0.2, 0.5, 0, 0.5, 0.1, 0, 0])) + + def test_empty(self): + x = np.array([], dtype=int) + y = np.bincount(x) + assert_array_equal(x, y) + + def test_empty_with_minlength(self): + x = np.array([], dtype=int) + y = np.bincount(x, minlength=5) + assert_array_equal(y, np.zeros(5, dtype=int)) + + def test_with_incorrect_minlength(self): + x = np.array([], dtype=int) + assert_raises_regex(TypeError, + "'str' object cannot be interpreted", + lambda: np.bincount(x, minlength="foobar")) + assert_raises_regex(ValueError, + "must not be negative", + lambda: np.bincount(x, minlength=-1)) + + x = np.arange(5) + assert_raises_regex(TypeError, + "'str' object cannot be interpreted", + lambda: np.bincount(x, minlength="foobar")) + assert_raises_regex(ValueError, + "must not be negative", + lambda: np.bincount(x, minlength=-1)) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_dtype_reference_leaks(self): + # gh-6805 + intp_refcount = sys.getrefcount(np.dtype(np.intp)) + double_refcount = sys.getrefcount(np.dtype(np.double)) + + for j in range(10): + np.bincount([1, 2, 3]) + assert_equal(sys.getrefcount(np.dtype(np.intp)), intp_refcount) + assert_equal(sys.getrefcount(np.dtype(np.double)), double_refcount) + + for j in range(10): + np.bincount([1, 2, 3], [4, 5, 6]) + assert_equal(sys.getrefcount(np.dtype(np.intp)), intp_refcount) + assert_equal(sys.getrefcount(np.dtype(np.double)), double_refcount) + + @pytest.mark.parametrize("vals", [[[2, 2]], 2]) + def test_error_not_1d(self, vals): + # Test that values has to be 1-D (both as array and nested list) + vals_arr = np.asarray(vals) + with assert_raises(ValueError): + np.bincount(vals_arr) + with assert_raises(ValueError): + np.bincount(vals) + + +class TestInterp: + + def test_exceptions(self): + assert_raises(ValueError, interp, 0, [], []) + assert_raises(ValueError, interp, 0, [0], [1, 2]) + assert_raises(ValueError, interp, 0, [0, 1], [1, 2], period=0) + assert_raises(ValueError, interp, 0, [], [], period=360) + assert_raises(ValueError, interp, 0, [0], [1, 2], period=360) + + def test_basic(self): + x = np.linspace(0, 1, 5) + y = np.linspace(0, 1, 5) + x0 = np.linspace(0, 1, 50) + assert_almost_equal(np.interp(x0, x, y), x0) + + def test_right_left_behavior(self): + # Needs range of sizes to test different code paths. + # size ==1 is special cased, 1 < size < 5 is linear search, and + # size >= 5 goes through local search and possibly binary search. + for size in range(1, 10): + xp = np.arange(size, dtype=np.double) + yp = np.ones(size, dtype=np.double) + incpts = np.array([-1, 0, size - 1, size], dtype=np.double) + decpts = incpts[::-1] + + incres = interp(incpts, xp, yp) + decres = interp(decpts, xp, yp) + inctgt = np.array([1, 1, 1, 1], dtype=float) + dectgt = inctgt[::-1] + assert_equal(incres, inctgt) + assert_equal(decres, dectgt) + + incres = interp(incpts, xp, yp, left=0) + decres = interp(decpts, xp, yp, left=0) + inctgt = np.array([0, 1, 1, 1], dtype=float) + dectgt = inctgt[::-1] + assert_equal(incres, inctgt) + assert_equal(decres, dectgt) + + incres = interp(incpts, xp, yp, right=2) + decres = interp(decpts, xp, yp, right=2) + inctgt = np.array([1, 1, 1, 2], dtype=float) + dectgt = inctgt[::-1] + assert_equal(incres, inctgt) + assert_equal(decres, dectgt) + + incres = interp(incpts, xp, yp, left=0, right=2) + decres = interp(decpts, xp, yp, left=0, right=2) + inctgt = np.array([0, 1, 1, 2], dtype=float) + dectgt = inctgt[::-1] + assert_equal(incres, inctgt) + assert_equal(decres, dectgt) + + def test_scalar_interpolation_point(self): + x = np.linspace(0, 1, 5) + y = np.linspace(0, 1, 5) + x0 = 0 + assert_almost_equal(np.interp(x0, x, y), x0) + x0 = .3 + assert_almost_equal(np.interp(x0, x, y), x0) + x0 = np.float32(.3) + assert_almost_equal(np.interp(x0, x, y), x0) + x0 = np.float64(.3) + assert_almost_equal(np.interp(x0, x, y), x0) + x0 = np.nan + assert_almost_equal(np.interp(x0, x, y), x0) + + def test_non_finite_behavior_exact_x(self): + x = [1, 2, 2.5, 3, 4] + xp = [1, 2, 3, 4] + fp = [1, 2, np.inf, 4] + assert_almost_equal(np.interp(x, xp, fp), [1, 2, np.inf, np.inf, 4]) + fp = [1, 2, np.nan, 4] + assert_almost_equal(np.interp(x, xp, fp), [1, 2, np.nan, np.nan, 4]) + + @pytest.fixture(params=[ + lambda x: np.float_(x), + lambda x: _make_complex(x, 0), + lambda x: _make_complex(0, x), + lambda x: _make_complex(x, np.multiply(x, -2)) + ], ids=[ + 'real', + 'complex-real', + 'complex-imag', + 'complex-both' + ]) + def sc(self, request): + """ scale function used by the below tests """ + return request.param + + def test_non_finite_any_nan(self, sc): + """ test that nans are propagated """ + assert_equal(np.interp(0.5, [np.nan, 1], sc([ 0, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, np.nan], sc([ 0, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, 1], sc([np.nan, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, 1], sc([ 0, np.nan])), sc(np.nan)) + + def test_non_finite_inf(self, sc): + """ Test that interp between opposite infs gives nan """ + assert_equal(np.interp(0.5, [-np.inf, +np.inf], sc([ 0, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, 1], sc([-np.inf, +np.inf])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, 1], sc([+np.inf, -np.inf])), sc(np.nan)) + + # unless the y values are equal + assert_equal(np.interp(0.5, [-np.inf, +np.inf], sc([ 10, 10])), sc(10)) + + def test_non_finite_half_inf_xf(self, sc): + """ Test that interp where both axes have a bound at inf gives nan """ + assert_equal(np.interp(0.5, [-np.inf, 1], sc([-np.inf, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [-np.inf, 1], sc([+np.inf, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [-np.inf, 1], sc([ 0, -np.inf])), sc(np.nan)) + assert_equal(np.interp(0.5, [-np.inf, 1], sc([ 0, +np.inf])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([-np.inf, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([+np.inf, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([ 0, -np.inf])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([ 0, +np.inf])), sc(np.nan)) + + def test_non_finite_half_inf_x(self, sc): + """ Test interp where the x axis has a bound at inf """ + assert_equal(np.interp(0.5, [-np.inf, -np.inf], sc([0, 10])), sc(10)) + assert_equal(np.interp(0.5, [-np.inf, 1 ], sc([0, 10])), sc(10)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([0, 10])), sc(0)) + assert_equal(np.interp(0.5, [+np.inf, +np.inf], sc([0, 10])), sc(0)) + + def test_non_finite_half_inf_f(self, sc): + """ Test interp where the f axis has a bound at inf """ + assert_equal(np.interp(0.5, [0, 1], sc([ 0, -np.inf])), sc(-np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([ 0, +np.inf])), sc(+np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([-np.inf, 10])), sc(-np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([+np.inf, 10])), sc(+np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([-np.inf, -np.inf])), sc(-np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([+np.inf, +np.inf])), sc(+np.inf)) + + def test_complex_interp(self): + # test complex interpolation + x = np.linspace(0, 1, 5) + y = np.linspace(0, 1, 5) + (1 + np.linspace(0, 1, 5))*1.0j + x0 = 0.3 + y0 = x0 + (1+x0)*1.0j + assert_almost_equal(np.interp(x0, x, y), y0) + # test complex left and right + x0 = -1 + left = 2 + 3.0j + assert_almost_equal(np.interp(x0, x, y, left=left), left) + x0 = 2.0 + right = 2 + 3.0j + assert_almost_equal(np.interp(x0, x, y, right=right), right) + # test complex non finite + x = [1, 2, 2.5, 3, 4] + xp = [1, 2, 3, 4] + fp = [1, 2+1j, np.inf, 4] + y = [1, 2+1j, np.inf+0.5j, np.inf, 4] + assert_almost_equal(np.interp(x, xp, fp), y) + # test complex periodic + x = [-180, -170, -185, 185, -10, -5, 0, 365] + xp = [190, -190, 350, -350] + fp = [5+1.0j, 10+2j, 3+3j, 4+4j] + y = [7.5+1.5j, 5.+1.0j, 8.75+1.75j, 6.25+1.25j, 3.+3j, 3.25+3.25j, + 3.5+3.5j, 3.75+3.75j] + assert_almost_equal(np.interp(x, xp, fp, period=360), y) + + def test_zero_dimensional_interpolation_point(self): + x = np.linspace(0, 1, 5) + y = np.linspace(0, 1, 5) + x0 = np.array(.3) + assert_almost_equal(np.interp(x0, x, y), x0) + + xp = np.array([0, 2, 4]) + fp = np.array([1, -1, 1]) + + actual = np.interp(np.array(1), xp, fp) + assert_equal(actual, 0) + assert_(isinstance(actual, np.float64)) + + actual = np.interp(np.array(4.5), xp, fp, period=4) + assert_equal(actual, 0.5) + assert_(isinstance(actual, np.float64)) + + def test_if_len_x_is_small(self): + xp = np.arange(0, 10, 0.0001) + fp = np.sin(xp) + assert_almost_equal(np.interp(np.pi, xp, fp), 0.0) + + def test_period(self): + x = [-180, -170, -185, 185, -10, -5, 0, 365] + xp = [190, -190, 350, -350] + fp = [5, 10, 3, 4] + y = [7.5, 5., 8.75, 6.25, 3., 3.25, 3.5, 3.75] + assert_almost_equal(np.interp(x, xp, fp, period=360), y) + x = np.array(x, order='F').reshape(2, -1) + y = np.array(y, order='C').reshape(2, -1) + assert_almost_equal(np.interp(x, xp, fp, period=360), y) + + +class TestPercentile: + + def test_basic(self): + x = np.arange(8) * 0.5 + assert_equal(np.percentile(x, 0), 0.) + assert_equal(np.percentile(x, 100), 3.5) + assert_equal(np.percentile(x, 50), 1.75) + x[1] = np.nan + assert_equal(np.percentile(x, 0), np.nan) + assert_equal(np.percentile(x, 0, method='nearest'), np.nan) + + def test_fraction(self): + x = [Fraction(i, 2) for i in range(8)] + + p = np.percentile(x, Fraction(0)) + assert_equal(p, Fraction(0)) + assert_equal(type(p), Fraction) + + p = np.percentile(x, Fraction(100)) + assert_equal(p, Fraction(7, 2)) + assert_equal(type(p), Fraction) + + p = np.percentile(x, Fraction(50)) + assert_equal(p, Fraction(7, 4)) + assert_equal(type(p), Fraction) + + p = np.percentile(x, [Fraction(50)]) + assert_equal(p, np.array([Fraction(7, 4)])) + assert_equal(type(p), np.ndarray) + + def test_api(self): + d = np.ones(5) + np.percentile(d, 5, None, None, False) + np.percentile(d, 5, None, None, False, 'linear') + o = np.ones((1,)) + np.percentile(d, 5, None, o, False, 'linear') + + def test_2D(self): + x = np.array([[1, 1, 1], + [1, 1, 1], + [4, 4, 3], + [1, 1, 1], + [1, 1, 1]]) + assert_array_equal(np.percentile(x, 50, axis=0), [1, 1, 1]) + + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + def test_linear_nan_1D(self, dtype): + # METHOD 1 of H&F + arr = np.asarray([15.0, np.NAN, 35.0, 40.0, 50.0], dtype=dtype) + res = np.percentile( + arr, + 40.0, + method="linear") + np.testing.assert_equal(res, np.NAN) + np.testing.assert_equal(res.dtype, arr.dtype) + + H_F_TYPE_CODES = [(int_type, np.float64) + for int_type in np.typecodes["AllInteger"] + ] + [(np.float16, np.float64), + (np.float32, np.float64), + (np.float64, np.float64), + (np.longdouble, np.longdouble), + (np.complex64, np.complex128), + (np.complex128, np.complex128), + (np.clongdouble, np.clongdouble), + (np.dtype("O"), np.float64)] + + @pytest.mark.parametrize(["input_dtype", "expected_dtype"], H_F_TYPE_CODES) + @pytest.mark.parametrize(["method", "expected"], + [("inverted_cdf", 20), + ("averaged_inverted_cdf", 27.5), + ("closest_observation", 20), + ("interpolated_inverted_cdf", 20), + ("hazen", 27.5), + ("weibull", 26), + ("linear", 29), + ("median_unbiased", 27), + ("normal_unbiased", 27.125), + ]) + def test_linear_interpolation(self, + method, + expected, + input_dtype, + expected_dtype): + arr = np.asarray([15.0, 20.0, 35.0, 40.0, 50.0], dtype=input_dtype) + actual = np.percentile(arr, 40.0, method=method) + + np.testing.assert_almost_equal(actual, expected, 14) + + if method in ["inverted_cdf", "closest_observation"]: + if input_dtype == "O": + np.testing.assert_equal(np.asarray(actual).dtype, np.float64) + else: + np.testing.assert_equal(np.asarray(actual).dtype, + np.dtype(input_dtype)) + else: + np.testing.assert_equal(np.asarray(actual).dtype, + np.dtype(expected_dtype)) + + TYPE_CODES = np.typecodes["AllInteger"] + np.typecodes["AllFloat"] + "O" + + @pytest.mark.parametrize("dtype", TYPE_CODES) + def test_lower_higher(self, dtype): + assert_equal(np.percentile(np.arange(10, dtype=dtype), 50, + method='lower'), 4) + assert_equal(np.percentile(np.arange(10, dtype=dtype), 50, + method='higher'), 5) + + @pytest.mark.parametrize("dtype", TYPE_CODES) + def test_midpoint(self, dtype): + assert_equal(np.percentile(np.arange(10, dtype=dtype), 51, + method='midpoint'), 4.5) + assert_equal(np.percentile(np.arange(9, dtype=dtype) + 1, 50, + method='midpoint'), 5) + assert_equal(np.percentile(np.arange(11, dtype=dtype), 51, + method='midpoint'), 5.5) + assert_equal(np.percentile(np.arange(11, dtype=dtype), 50, + method='midpoint'), 5) + + @pytest.mark.parametrize("dtype", TYPE_CODES) + def test_nearest(self, dtype): + assert_equal(np.percentile(np.arange(10, dtype=dtype), 51, + method='nearest'), 5) + assert_equal(np.percentile(np.arange(10, dtype=dtype), 49, + method='nearest'), 4) + + def test_linear_interpolation_extrapolation(self): + arr = np.random.rand(5) + + actual = np.percentile(arr, 100) + np.testing.assert_equal(actual, arr.max()) + + actual = np.percentile(arr, 0) + np.testing.assert_equal(actual, arr.min()) + + def test_sequence(self): + x = np.arange(8) * 0.5 + assert_equal(np.percentile(x, [0, 100, 50]), [0, 3.5, 1.75]) + + def test_axis(self): + x = np.arange(12).reshape(3, 4) + + assert_equal(np.percentile(x, (25, 50, 100)), [2.75, 5.5, 11.0]) + + r0 = [[2, 3, 4, 5], [4, 5, 6, 7], [8, 9, 10, 11]] + assert_equal(np.percentile(x, (25, 50, 100), axis=0), r0) + + r1 = [[0.75, 1.5, 3], [4.75, 5.5, 7], [8.75, 9.5, 11]] + assert_equal(np.percentile(x, (25, 50, 100), axis=1), np.array(r1).T) + + # ensure qth axis is always first as with np.array(old_percentile(..)) + x = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6) + assert_equal(np.percentile(x, (25, 50)).shape, (2,)) + assert_equal(np.percentile(x, (25, 50, 75)).shape, (3,)) + assert_equal(np.percentile(x, (25, 50), axis=0).shape, (2, 4, 5, 6)) + assert_equal(np.percentile(x, (25, 50), axis=1).shape, (2, 3, 5, 6)) + assert_equal(np.percentile(x, (25, 50), axis=2).shape, (2, 3, 4, 6)) + assert_equal(np.percentile(x, (25, 50), axis=3).shape, (2, 3, 4, 5)) + assert_equal( + np.percentile(x, (25, 50, 75), axis=1).shape, (3, 3, 5, 6)) + assert_equal(np.percentile(x, (25, 50), + method="higher").shape, (2,)) + assert_equal(np.percentile(x, (25, 50, 75), + method="higher").shape, (3,)) + assert_equal(np.percentile(x, (25, 50), axis=0, + method="higher").shape, (2, 4, 5, 6)) + assert_equal(np.percentile(x, (25, 50), axis=1, + method="higher").shape, (2, 3, 5, 6)) + assert_equal(np.percentile(x, (25, 50), axis=2, + method="higher").shape, (2, 3, 4, 6)) + assert_equal(np.percentile(x, (25, 50), axis=3, + method="higher").shape, (2, 3, 4, 5)) + assert_equal(np.percentile(x, (25, 50, 75), axis=1, + method="higher").shape, (3, 3, 5, 6)) + + def test_scalar_q(self): + # test for no empty dimensions for compatibility with old percentile + x = np.arange(12).reshape(3, 4) + assert_equal(np.percentile(x, 50), 5.5) + assert_(np.isscalar(np.percentile(x, 50))) + r0 = np.array([4., 5., 6., 7.]) + assert_equal(np.percentile(x, 50, axis=0), r0) + assert_equal(np.percentile(x, 50, axis=0).shape, r0.shape) + r1 = np.array([1.5, 5.5, 9.5]) + assert_almost_equal(np.percentile(x, 50, axis=1), r1) + assert_equal(np.percentile(x, 50, axis=1).shape, r1.shape) + + out = np.empty(1) + assert_equal(np.percentile(x, 50, out=out), 5.5) + assert_equal(out, 5.5) + out = np.empty(4) + assert_equal(np.percentile(x, 50, axis=0, out=out), r0) + assert_equal(out, r0) + out = np.empty(3) + assert_equal(np.percentile(x, 50, axis=1, out=out), r1) + assert_equal(out, r1) + + # test for no empty dimensions for compatibility with old percentile + x = np.arange(12).reshape(3, 4) + assert_equal(np.percentile(x, 50, method='lower'), 5.) + assert_(np.isscalar(np.percentile(x, 50))) + r0 = np.array([4., 5., 6., 7.]) + c0 = np.percentile(x, 50, method='lower', axis=0) + assert_equal(c0, r0) + assert_equal(c0.shape, r0.shape) + r1 = np.array([1., 5., 9.]) + c1 = np.percentile(x, 50, method='lower', axis=1) + assert_almost_equal(c1, r1) + assert_equal(c1.shape, r1.shape) + + out = np.empty((), dtype=x.dtype) + c = np.percentile(x, 50, method='lower', out=out) + assert_equal(c, 5) + assert_equal(out, 5) + out = np.empty(4, dtype=x.dtype) + c = np.percentile(x, 50, method='lower', axis=0, out=out) + assert_equal(c, r0) + assert_equal(out, r0) + out = np.empty(3, dtype=x.dtype) + c = np.percentile(x, 50, method='lower', axis=1, out=out) + assert_equal(c, r1) + assert_equal(out, r1) + + def test_exception(self): + assert_raises(ValueError, np.percentile, [1, 2], 56, + method='foobar') + assert_raises(ValueError, np.percentile, [1], 101) + assert_raises(ValueError, np.percentile, [1], -1) + assert_raises(ValueError, np.percentile, [1], list(range(50)) + [101]) + assert_raises(ValueError, np.percentile, [1], list(range(50)) + [-0.1]) + + def test_percentile_list(self): + assert_equal(np.percentile([1, 2, 3], 0), 1) + + def test_percentile_out(self): + x = np.array([1, 2, 3]) + y = np.zeros((3,)) + p = (1, 2, 3) + np.percentile(x, p, out=y) + assert_equal(np.percentile(x, p), y) + + x = np.array([[1, 2, 3], + [4, 5, 6]]) + + y = np.zeros((3, 3)) + np.percentile(x, p, axis=0, out=y) + assert_equal(np.percentile(x, p, axis=0), y) + + y = np.zeros((3, 2)) + np.percentile(x, p, axis=1, out=y) + assert_equal(np.percentile(x, p, axis=1), y) + + x = np.arange(12).reshape(3, 4) + # q.dim > 1, float + r0 = np.array([[2., 3., 4., 5.], [4., 5., 6., 7.]]) + out = np.empty((2, 4)) + assert_equal(np.percentile(x, (25, 50), axis=0, out=out), r0) + assert_equal(out, r0) + r1 = np.array([[0.75, 4.75, 8.75], [1.5, 5.5, 9.5]]) + out = np.empty((2, 3)) + assert_equal(np.percentile(x, (25, 50), axis=1, out=out), r1) + assert_equal(out, r1) + + # q.dim > 1, int + r0 = np.array([[0, 1, 2, 3], [4, 5, 6, 7]]) + out = np.empty((2, 4), dtype=x.dtype) + c = np.percentile(x, (25, 50), method='lower', axis=0, out=out) + assert_equal(c, r0) + assert_equal(out, r0) + r1 = np.array([[0, 4, 8], [1, 5, 9]]) + out = np.empty((2, 3), dtype=x.dtype) + c = np.percentile(x, (25, 50), method='lower', axis=1, out=out) + assert_equal(c, r1) + assert_equal(out, r1) + + def test_percentile_empty_dim(self): + # empty dims are preserved + d = np.arange(11 * 2).reshape(11, 1, 2, 1) + assert_array_equal(np.percentile(d, 50, axis=0).shape, (1, 2, 1)) + assert_array_equal(np.percentile(d, 50, axis=1).shape, (11, 2, 1)) + assert_array_equal(np.percentile(d, 50, axis=2).shape, (11, 1, 1)) + assert_array_equal(np.percentile(d, 50, axis=3).shape, (11, 1, 2)) + assert_array_equal(np.percentile(d, 50, axis=-1).shape, (11, 1, 2)) + assert_array_equal(np.percentile(d, 50, axis=-2).shape, (11, 1, 1)) + assert_array_equal(np.percentile(d, 50, axis=-3).shape, (11, 2, 1)) + assert_array_equal(np.percentile(d, 50, axis=-4).shape, (1, 2, 1)) + + assert_array_equal(np.percentile(d, 50, axis=2, + method='midpoint').shape, + (11, 1, 1)) + assert_array_equal(np.percentile(d, 50, axis=-2, + method='midpoint').shape, + (11, 1, 1)) + + assert_array_equal(np.array(np.percentile(d, [10, 50], axis=0)).shape, + (2, 1, 2, 1)) + assert_array_equal(np.array(np.percentile(d, [10, 50], axis=1)).shape, + (2, 11, 2, 1)) + assert_array_equal(np.array(np.percentile(d, [10, 50], axis=2)).shape, + (2, 11, 1, 1)) + assert_array_equal(np.array(np.percentile(d, [10, 50], axis=3)).shape, + (2, 11, 1, 2)) + + def test_percentile_no_overwrite(self): + a = np.array([2, 3, 4, 1]) + np.percentile(a, [50], overwrite_input=False) + assert_equal(a, np.array([2, 3, 4, 1])) + + a = np.array([2, 3, 4, 1]) + np.percentile(a, [50]) + assert_equal(a, np.array([2, 3, 4, 1])) + + def test_no_p_overwrite(self): + p = np.linspace(0., 100., num=5) + np.percentile(np.arange(100.), p, method="midpoint") + assert_array_equal(p, np.linspace(0., 100., num=5)) + p = np.linspace(0., 100., num=5).tolist() + np.percentile(np.arange(100.), p, method="midpoint") + assert_array_equal(p, np.linspace(0., 100., num=5).tolist()) + + def test_percentile_overwrite(self): + a = np.array([2, 3, 4, 1]) + b = np.percentile(a, [50], overwrite_input=True) + assert_equal(b, np.array([2.5])) + + b = np.percentile([2, 3, 4, 1], [50], overwrite_input=True) + assert_equal(b, np.array([2.5])) + + def test_extended_axis(self): + o = np.random.normal(size=(71, 23)) + x = np.dstack([o] * 10) + assert_equal(np.percentile(x, 30, axis=(0, 1)), np.percentile(o, 30)) + x = np.moveaxis(x, -1, 0) + assert_equal(np.percentile(x, 30, axis=(-2, -1)), np.percentile(o, 30)) + x = x.swapaxes(0, 1).copy() + assert_equal(np.percentile(x, 30, axis=(0, -1)), np.percentile(o, 30)) + x = x.swapaxes(0, 1).copy() + + assert_equal(np.percentile(x, [25, 60], axis=(0, 1, 2)), + np.percentile(x, [25, 60], axis=None)) + assert_equal(np.percentile(x, [25, 60], axis=(0,)), + np.percentile(x, [25, 60], axis=0)) + + d = np.arange(3 * 5 * 7 * 11).reshape((3, 5, 7, 11)) + np.random.shuffle(d.ravel()) + assert_equal(np.percentile(d, 25, axis=(0, 1, 2))[0], + np.percentile(d[:,:,:, 0].flatten(), 25)) + assert_equal(np.percentile(d, [10, 90], axis=(0, 1, 3))[:, 1], + np.percentile(d[:,:, 1,:].flatten(), [10, 90])) + assert_equal(np.percentile(d, 25, axis=(3, 1, -4))[2], + np.percentile(d[:,:, 2,:].flatten(), 25)) + assert_equal(np.percentile(d, 25, axis=(3, 1, 2))[2], + np.percentile(d[2,:,:,:].flatten(), 25)) + assert_equal(np.percentile(d, 25, axis=(3, 2))[2, 1], + np.percentile(d[2, 1,:,:].flatten(), 25)) + assert_equal(np.percentile(d, 25, axis=(1, -2))[2, 1], + np.percentile(d[2,:,:, 1].flatten(), 25)) + assert_equal(np.percentile(d, 25, axis=(1, 3))[2, 2], + np.percentile(d[2,:, 2,:].flatten(), 25)) + + def test_extended_axis_invalid(self): + d = np.ones((3, 5, 7, 11)) + assert_raises(np.AxisError, np.percentile, d, axis=-5, q=25) + assert_raises(np.AxisError, np.percentile, d, axis=(0, -5), q=25) + assert_raises(np.AxisError, np.percentile, d, axis=4, q=25) + assert_raises(np.AxisError, np.percentile, d, axis=(0, 4), q=25) + # each of these refers to the same axis twice + assert_raises(ValueError, np.percentile, d, axis=(1, 1), q=25) + assert_raises(ValueError, np.percentile, d, axis=(-1, -1), q=25) + assert_raises(ValueError, np.percentile, d, axis=(3, -1), q=25) + + def test_keepdims(self): + d = np.ones((3, 5, 7, 11)) + assert_equal(np.percentile(d, 7, axis=None, keepdims=True).shape, + (1, 1, 1, 1)) + assert_equal(np.percentile(d, 7, axis=(0, 1), keepdims=True).shape, + (1, 1, 7, 11)) + assert_equal(np.percentile(d, 7, axis=(0, 3), keepdims=True).shape, + (1, 5, 7, 1)) + assert_equal(np.percentile(d, 7, axis=(1,), keepdims=True).shape, + (3, 1, 7, 11)) + assert_equal(np.percentile(d, 7, (0, 1, 2, 3), keepdims=True).shape, + (1, 1, 1, 1)) + assert_equal(np.percentile(d, 7, axis=(0, 1, 3), keepdims=True).shape, + (1, 1, 7, 1)) + + assert_equal(np.percentile(d, [1, 7], axis=(0, 1, 3), + keepdims=True).shape, (2, 1, 1, 7, 1)) + assert_equal(np.percentile(d, [1, 7], axis=(0, 3), + keepdims=True).shape, (2, 1, 5, 7, 1)) + + def test_out(self): + o = np.zeros((4,)) + d = np.ones((3, 4)) + assert_equal(np.percentile(d, 0, 0, out=o), o) + assert_equal(np.percentile(d, 0, 0, method='nearest', out=o), o) + o = np.zeros((3,)) + assert_equal(np.percentile(d, 1, 1, out=o), o) + assert_equal(np.percentile(d, 1, 1, method='nearest', out=o), o) + + o = np.zeros(()) + assert_equal(np.percentile(d, 2, out=o), o) + assert_equal(np.percentile(d, 2, method='nearest', out=o), o) + + def test_out_nan(self): + with warnings.catch_warnings(record=True): + warnings.filterwarnings('always', '', RuntimeWarning) + o = np.zeros((4,)) + d = np.ones((3, 4)) + d[2, 1] = np.nan + assert_equal(np.percentile(d, 0, 0, out=o), o) + assert_equal( + np.percentile(d, 0, 0, method='nearest', out=o), o) + o = np.zeros((3,)) + assert_equal(np.percentile(d, 1, 1, out=o), o) + assert_equal( + np.percentile(d, 1, 1, method='nearest', out=o), o) + o = np.zeros(()) + assert_equal(np.percentile(d, 1, out=o), o) + assert_equal( + np.percentile(d, 1, method='nearest', out=o), o) + + def test_nan_behavior(self): + a = np.arange(24, dtype=float) + a[2] = np.nan + assert_equal(np.percentile(a, 0.3), np.nan) + assert_equal(np.percentile(a, 0.3, axis=0), np.nan) + assert_equal(np.percentile(a, [0.3, 0.6], axis=0), + np.array([np.nan] * 2)) + + a = np.arange(24, dtype=float).reshape(2, 3, 4) + a[1, 2, 3] = np.nan + a[1, 1, 2] = np.nan + + # no axis + assert_equal(np.percentile(a, 0.3), np.nan) + assert_equal(np.percentile(a, 0.3).ndim, 0) + + # axis0 zerod + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 0) + b[2, 3] = np.nan + b[1, 2] = np.nan + assert_equal(np.percentile(a, 0.3, 0), b) + + # axis0 not zerod + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), + [0.3, 0.6], 0) + b[:, 2, 3] = np.nan + b[:, 1, 2] = np.nan + assert_equal(np.percentile(a, [0.3, 0.6], 0), b) + + # axis1 zerod + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 1) + b[1, 3] = np.nan + b[1, 2] = np.nan + assert_equal(np.percentile(a, 0.3, 1), b) + # axis1 not zerod + b = np.percentile( + np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], 1) + b[:, 1, 3] = np.nan + b[:, 1, 2] = np.nan + assert_equal(np.percentile(a, [0.3, 0.6], 1), b) + + # axis02 zerod + b = np.percentile( + np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, (0, 2)) + b[1] = np.nan + b[2] = np.nan + assert_equal(np.percentile(a, 0.3, (0, 2)), b) + # axis02 not zerod + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), + [0.3, 0.6], (0, 2)) + b[:, 1] = np.nan + b[:, 2] = np.nan + assert_equal(np.percentile(a, [0.3, 0.6], (0, 2)), b) + # axis02 not zerod with method='nearest' + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), + [0.3, 0.6], (0, 2), method='nearest') + b[:, 1] = np.nan + b[:, 2] = np.nan + assert_equal(np.percentile( + a, [0.3, 0.6], (0, 2), method='nearest'), b) + + def test_nan_q(self): + # GH18830 + with pytest.raises(ValueError, match="Percentiles must be in"): + np.percentile([1, 2, 3, 4.0], np.nan) + with pytest.raises(ValueError, match="Percentiles must be in"): + np.percentile([1, 2, 3, 4.0], [np.nan]) + q = np.linspace(1.0, 99.0, 16) + q[0] = np.nan + with pytest.raises(ValueError, match="Percentiles must be in"): + np.percentile([1, 2, 3, 4.0], q) + + +class TestQuantile: + # most of this is already tested by TestPercentile + + def test_max_ulp(self): + x = [0.0, 0.2, 0.4] + a = np.quantile(x, 0.45) + # The default linear method would result in 0 + 0.2 * (0.45/2) = 0.18. + # 0.18 is not exactly representable and the formula leads to a 1 ULP + # different result. Ensure it is this exact within 1 ULP, see gh-20331. + np.testing.assert_array_max_ulp(a, 0.18, maxulp=1) + + def test_basic(self): + x = np.arange(8) * 0.5 + assert_equal(np.quantile(x, 0), 0.) + assert_equal(np.quantile(x, 1), 3.5) + assert_equal(np.quantile(x, 0.5), 1.75) + + @pytest.mark.xfail(reason="See gh-19154") + def test_correct_quantile_value(self): + a = np.array([True]) + tf_quant = np.quantile(True, False) + assert_equal(tf_quant, a[0]) + assert_equal(type(tf_quant), a.dtype) + a = np.array([False, True, True]) + quant_res = np.quantile(a, a) + assert_array_equal(quant_res, a) + assert_equal(quant_res.dtype, a.dtype) + + def test_fraction(self): + # fractional input, integral quantile + x = [Fraction(i, 2) for i in range(8)] + q = np.quantile(x, 0) + assert_equal(q, 0) + assert_equal(type(q), Fraction) + + q = np.quantile(x, 1) + assert_equal(q, Fraction(7, 2)) + assert_equal(type(q), Fraction) + + q = np.quantile(x, Fraction(1, 2)) + assert_equal(q, Fraction(7, 4)) + assert_equal(type(q), Fraction) + + q = np.quantile(x, [Fraction(1, 2)]) + assert_equal(q, np.array([Fraction(7, 4)])) + assert_equal(type(q), np.ndarray) + + q = np.quantile(x, [[Fraction(1, 2)]]) + assert_equal(q, np.array([[Fraction(7, 4)]])) + assert_equal(type(q), np.ndarray) + + # repeat with integral input but fractional quantile + x = np.arange(8) + assert_equal(np.quantile(x, Fraction(1, 2)), Fraction(7, 2)) + + def test_no_p_overwrite(self): + # this is worth retesting, because quantile does not make a copy + p0 = np.array([0, 0.75, 0.25, 0.5, 1.0]) + p = p0.copy() + np.quantile(np.arange(100.), p, method="midpoint") + assert_array_equal(p, p0) + + p0 = p0.tolist() + p = p.tolist() + np.quantile(np.arange(100.), p, method="midpoint") + assert_array_equal(p, p0) + + @pytest.mark.parametrize("dtype", np.typecodes["AllInteger"]) + def test_quantile_preserve_int_type(self, dtype): + res = np.quantile(np.array([1, 2], dtype=dtype), [0.5], + method="nearest") + assert res.dtype == dtype + + @pytest.mark.parametrize("method", + ['inverted_cdf', 'averaged_inverted_cdf', 'closest_observation', + 'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear', + 'median_unbiased', 'normal_unbiased', + 'nearest', 'lower', 'higher', 'midpoint']) + def test_quantile_monotonic(self, method): + # GH 14685 + # test that the return value of quantile is monotonic if p0 is ordered + # Also tests that the boundary values are not mishandled. + p0 = np.linspace(0, 1, 101) + quantile = np.quantile(np.array([0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 1, 1, 9, 9, 9, + 8, 8, 7]) * 0.1, p0, method=method) + assert_equal(np.sort(quantile), quantile) + + # Also test one where the number of data points is clearly divisible: + quantile = np.quantile([0., 1., 2., 3.], p0, method=method) + assert_equal(np.sort(quantile), quantile) + + @hypothesis.given( + arr=arrays(dtype=np.float64, + shape=st.integers(min_value=3, max_value=1000), + elements=st.floats(allow_infinity=False, allow_nan=False, + min_value=-1e300, max_value=1e300))) + def test_quantile_monotonic_hypo(self, arr): + p0 = np.arange(0, 1, 0.01) + quantile = np.quantile(arr, p0) + assert_equal(np.sort(quantile), quantile) + + def test_quantile_scalar_nan(self): + a = np.array([[10., 7., 4.], [3., 2., 1.]]) + a[0][1] = np.nan + actual = np.quantile(a, 0.5) + assert np.isscalar(actual) + assert_equal(np.quantile(a, 0.5), np.nan) + +class TestLerp: + @hypothesis.given(t0=st.floats(allow_nan=False, allow_infinity=False, + min_value=0, max_value=1), + t1=st.floats(allow_nan=False, allow_infinity=False, + min_value=0, max_value=1), + a = st.floats(allow_nan=False, allow_infinity=False, + min_value=-1e300, max_value=1e300), + b = st.floats(allow_nan=False, allow_infinity=False, + min_value=-1e300, max_value=1e300)) + def test_linear_interpolation_formula_monotonic(self, t0, t1, a, b): + l0 = nfb._lerp(a, b, t0) + l1 = nfb._lerp(a, b, t1) + if t0 == t1 or a == b: + assert l0 == l1 # uninteresting + elif (t0 < t1) == (a < b): + assert l0 <= l1 + else: + assert l0 >= l1 + + @hypothesis.given(t=st.floats(allow_nan=False, allow_infinity=False, + min_value=0, max_value=1), + a=st.floats(allow_nan=False, allow_infinity=False, + min_value=-1e300, max_value=1e300), + b=st.floats(allow_nan=False, allow_infinity=False, + min_value=-1e300, max_value=1e300)) + def test_linear_interpolation_formula_bounded(self, t, a, b): + if a <= b: + assert a <= nfb._lerp(a, b, t) <= b + else: + assert b <= nfb._lerp(a, b, t) <= a + + @hypothesis.given(t=st.floats(allow_nan=False, allow_infinity=False, + min_value=0, max_value=1), + a=st.floats(allow_nan=False, allow_infinity=False, + min_value=-1e300, max_value=1e300), + b=st.floats(allow_nan=False, allow_infinity=False, + min_value=-1e300, max_value=1e300)) + def test_linear_interpolation_formula_symmetric(self, t, a, b): + # double subtraction is needed to remove the extra precision of t < 0.5 + left = nfb._lerp(a, b, 1 - (1 - t)) + right = nfb._lerp(b, a, 1 - t) + assert left == right + + def test_linear_interpolation_formula_0d_inputs(self): + a = np.array(2) + b = np.array(5) + t = np.array(0.2) + assert nfb._lerp(a, b, t) == 2.6 + + +class TestMedian: + + def test_basic(self): + a0 = np.array(1) + a1 = np.arange(2) + a2 = np.arange(6).reshape(2, 3) + assert_equal(np.median(a0), 1) + assert_allclose(np.median(a1), 0.5) + assert_allclose(np.median(a2), 2.5) + assert_allclose(np.median(a2, axis=0), [1.5, 2.5, 3.5]) + assert_equal(np.median(a2, axis=1), [1, 4]) + assert_allclose(np.median(a2, axis=None), 2.5) + + a = np.array([0.0444502, 0.0463301, 0.141249, 0.0606775]) + assert_almost_equal((a[1] + a[3]) / 2., np.median(a)) + a = np.array([0.0463301, 0.0444502, 0.141249]) + assert_equal(a[0], np.median(a)) + a = np.array([0.0444502, 0.141249, 0.0463301]) + assert_equal(a[-1], np.median(a)) + # check array scalar result + assert_equal(np.median(a).ndim, 0) + a[1] = np.nan + assert_equal(np.median(a).ndim, 0) + + def test_axis_keyword(self): + a3 = np.array([[2, 3], + [0, 1], + [6, 7], + [4, 5]]) + for a in [a3, np.random.randint(0, 100, size=(2, 3, 4))]: + orig = a.copy() + np.median(a, axis=None) + for ax in range(a.ndim): + np.median(a, axis=ax) + assert_array_equal(a, orig) + + assert_allclose(np.median(a3, axis=0), [3, 4]) + assert_allclose(np.median(a3.T, axis=1), [3, 4]) + assert_allclose(np.median(a3), 3.5) + assert_allclose(np.median(a3, axis=None), 3.5) + assert_allclose(np.median(a3.T), 3.5) + + def test_overwrite_keyword(self): + a3 = np.array([[2, 3], + [0, 1], + [6, 7], + [4, 5]]) + a0 = np.array(1) + a1 = np.arange(2) + a2 = np.arange(6).reshape(2, 3) + assert_allclose(np.median(a0.copy(), overwrite_input=True), 1) + assert_allclose(np.median(a1.copy(), overwrite_input=True), 0.5) + assert_allclose(np.median(a2.copy(), overwrite_input=True), 2.5) + assert_allclose(np.median(a2.copy(), overwrite_input=True, axis=0), + [1.5, 2.5, 3.5]) + assert_allclose( + np.median(a2.copy(), overwrite_input=True, axis=1), [1, 4]) + assert_allclose( + np.median(a2.copy(), overwrite_input=True, axis=None), 2.5) + assert_allclose( + np.median(a3.copy(), overwrite_input=True, axis=0), [3, 4]) + assert_allclose(np.median(a3.T.copy(), overwrite_input=True, axis=1), + [3, 4]) + + a4 = np.arange(3 * 4 * 5, dtype=np.float32).reshape((3, 4, 5)) + np.random.shuffle(a4.ravel()) + assert_allclose(np.median(a4, axis=None), + np.median(a4.copy(), axis=None, overwrite_input=True)) + assert_allclose(np.median(a4, axis=0), + np.median(a4.copy(), axis=0, overwrite_input=True)) + assert_allclose(np.median(a4, axis=1), + np.median(a4.copy(), axis=1, overwrite_input=True)) + assert_allclose(np.median(a4, axis=2), + np.median(a4.copy(), axis=2, overwrite_input=True)) + + def test_array_like(self): + x = [1, 2, 3] + assert_almost_equal(np.median(x), 2) + x2 = [x] + assert_almost_equal(np.median(x2), 2) + assert_allclose(np.median(x2, axis=0), x) + + def test_subclass(self): + # gh-3846 + class MySubClass(np.ndarray): + + def __new__(cls, input_array, info=None): + obj = np.asarray(input_array).view(cls) + obj.info = info + return obj + + def mean(self, axis=None, dtype=None, out=None): + return -7 + + a = MySubClass([1, 2, 3]) + assert_equal(np.median(a), -7) + + @pytest.mark.parametrize('arr', + ([1., 2., 3.], [1., np.nan, 3.], np.nan, 0.)) + def test_subclass2(self, arr): + """Check that we return subclasses, even if a NaN scalar.""" + class MySubclass(np.ndarray): + pass + + m = np.median(np.array(arr).view(MySubclass)) + assert isinstance(m, MySubclass) + + def test_out(self): + o = np.zeros((4,)) + d = np.ones((3, 4)) + assert_equal(np.median(d, 0, out=o), o) + o = np.zeros((3,)) + assert_equal(np.median(d, 1, out=o), o) + o = np.zeros(()) + assert_equal(np.median(d, out=o), o) + + def test_out_nan(self): + with warnings.catch_warnings(record=True): + warnings.filterwarnings('always', '', RuntimeWarning) + o = np.zeros((4,)) + d = np.ones((3, 4)) + d[2, 1] = np.nan + assert_equal(np.median(d, 0, out=o), o) + o = np.zeros((3,)) + assert_equal(np.median(d, 1, out=o), o) + o = np.zeros(()) + assert_equal(np.median(d, out=o), o) + + def test_nan_behavior(self): + a = np.arange(24, dtype=float) + a[2] = np.nan + assert_equal(np.median(a), np.nan) + assert_equal(np.median(a, axis=0), np.nan) + + a = np.arange(24, dtype=float).reshape(2, 3, 4) + a[1, 2, 3] = np.nan + a[1, 1, 2] = np.nan + + # no axis + assert_equal(np.median(a), np.nan) + assert_equal(np.median(a).ndim, 0) + + # axis0 + b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 0) + b[2, 3] = np.nan + b[1, 2] = np.nan + assert_equal(np.median(a, 0), b) + + # axis1 + b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 1) + b[1, 3] = np.nan + b[1, 2] = np.nan + assert_equal(np.median(a, 1), b) + + # axis02 + b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), (0, 2)) + b[1] = np.nan + b[2] = np.nan + assert_equal(np.median(a, (0, 2)), b) + + def test_empty(self): + # mean(empty array) emits two warnings: empty slice and divide by 0 + a = np.array([], dtype=float) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a), np.nan) + assert_(w[0].category is RuntimeWarning) + assert_equal(len(w), 2) + + # multiple dimensions + a = np.array([], dtype=float, ndmin=3) + # no axis + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a), np.nan) + assert_(w[0].category is RuntimeWarning) + + # axis 0 and 1 + b = np.array([], dtype=float, ndmin=2) + assert_equal(np.median(a, axis=0), b) + assert_equal(np.median(a, axis=1), b) + + # axis 2 + b = np.array(np.nan, dtype=float, ndmin=2) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a, axis=2), b) + assert_(w[0].category is RuntimeWarning) + + def test_object(self): + o = np.arange(7.) + assert_(type(np.median(o.astype(object))), float) + o[2] = np.nan + assert_(type(np.median(o.astype(object))), float) + + def test_extended_axis(self): + o = np.random.normal(size=(71, 23)) + x = np.dstack([o] * 10) + assert_equal(np.median(x, axis=(0, 1)), np.median(o)) + x = np.moveaxis(x, -1, 0) + assert_equal(np.median(x, axis=(-2, -1)), np.median(o)) + x = x.swapaxes(0, 1).copy() + assert_equal(np.median(x, axis=(0, -1)), np.median(o)) + + assert_equal(np.median(x, axis=(0, 1, 2)), np.median(x, axis=None)) + assert_equal(np.median(x, axis=(0, )), np.median(x, axis=0)) + assert_equal(np.median(x, axis=(-1, )), np.median(x, axis=-1)) + + d = np.arange(3 * 5 * 7 * 11).reshape((3, 5, 7, 11)) + np.random.shuffle(d.ravel()) + assert_equal(np.median(d, axis=(0, 1, 2))[0], + np.median(d[:,:,:, 0].flatten())) + assert_equal(np.median(d, axis=(0, 1, 3))[1], + np.median(d[:,:, 1,:].flatten())) + assert_equal(np.median(d, axis=(3, 1, -4))[2], + np.median(d[:,:, 2,:].flatten())) + assert_equal(np.median(d, axis=(3, 1, 2))[2], + np.median(d[2,:,:,:].flatten())) + assert_equal(np.median(d, axis=(3, 2))[2, 1], + np.median(d[2, 1,:,:].flatten())) + assert_equal(np.median(d, axis=(1, -2))[2, 1], + np.median(d[2,:,:, 1].flatten())) + assert_equal(np.median(d, axis=(1, 3))[2, 2], + np.median(d[2,:, 2,:].flatten())) + + def test_extended_axis_invalid(self): + d = np.ones((3, 5, 7, 11)) + assert_raises(np.AxisError, np.median, d, axis=-5) + assert_raises(np.AxisError, np.median, d, axis=(0, -5)) + assert_raises(np.AxisError, np.median, d, axis=4) + assert_raises(np.AxisError, np.median, d, axis=(0, 4)) + assert_raises(ValueError, np.median, d, axis=(1, 1)) + + def test_keepdims(self): + d = np.ones((3, 5, 7, 11)) + assert_equal(np.median(d, axis=None, keepdims=True).shape, + (1, 1, 1, 1)) + assert_equal(np.median(d, axis=(0, 1), keepdims=True).shape, + (1, 1, 7, 11)) + assert_equal(np.median(d, axis=(0, 3), keepdims=True).shape, + (1, 5, 7, 1)) + assert_equal(np.median(d, axis=(1,), keepdims=True).shape, + (3, 1, 7, 11)) + assert_equal(np.median(d, axis=(0, 1, 2, 3), keepdims=True).shape, + (1, 1, 1, 1)) + assert_equal(np.median(d, axis=(0, 1, 3), keepdims=True).shape, + (1, 1, 7, 1)) + + +class TestAdd_newdoc_ufunc: + + def test_ufunc_arg(self): + assert_raises(TypeError, add_newdoc_ufunc, 2, "blah") + assert_raises(ValueError, add_newdoc_ufunc, np.add, "blah") + + def test_string_arg(self): + assert_raises(TypeError, add_newdoc_ufunc, np.add, 3) + + +class TestAdd_newdoc: + + @pytest.mark.skipif(sys.flags.optimize == 2, reason="Python running -OO") + @pytest.mark.xfail(IS_PYPY, reason="PyPy does not modify tp_doc") + def test_add_doc(self): + # test that np.add_newdoc did attach a docstring successfully: + tgt = "Current flat index into the array." + assert_equal(np.core.flatiter.index.__doc__[:len(tgt)], tgt) + assert_(len(np.core.ufunc.identity.__doc__) > 300) + assert_(len(np.lib.index_tricks.mgrid.__doc__) > 300) + + @pytest.mark.skipif(sys.flags.optimize == 2, reason="Python running -OO") + def test_errors_are_ignored(self): + prev_doc = np.core.flatiter.index.__doc__ + # nothing changed, but error ignored, this should probably + # give a warning (or even error) in the future. + np.add_newdoc("numpy.core", "flatiter", ("index", "bad docstring")) + assert prev_doc == np.core.flatiter.index.__doc__ + + +class TestAddDocstring(): + # Test should possibly be moved, but it also fits to be close to + # the newdoc tests... + @pytest.mark.skipif(sys.flags.optimize == 2, reason="Python running -OO") + @pytest.mark.skipif(IS_PYPY, reason="PyPy does not modify tp_doc") + def test_add_same_docstring(self): + # test for attributes (which are C-level defined) + np.add_docstring(np.ndarray.flat, np.ndarray.flat.__doc__) + # And typical functions: + def func(): + """docstring""" + return + + np.add_docstring(func, func.__doc__) + + @pytest.mark.skipif(sys.flags.optimize == 2, reason="Python running -OO") + def test_different_docstring_fails(self): + # test for attributes (which are C-level defined) + with assert_raises(RuntimeError): + np.add_docstring(np.ndarray.flat, "different docstring") + # And typical functions: + def func(): + """docstring""" + return + + with assert_raises(RuntimeError): + np.add_docstring(func, "different docstring") + + +class TestSortComplex: + + @pytest.mark.parametrize("type_in, type_out", [ + ('l', 'D'), + ('h', 'F'), + ('H', 'F'), + ('b', 'F'), + ('B', 'F'), + ('g', 'G'), + ]) + def test_sort_real(self, type_in, type_out): + # sort_complex() type casting for real input types + a = np.array([5, 3, 6, 2, 1], dtype=type_in) + actual = np.sort_complex(a) + expected = np.sort(a).astype(type_out) + assert_equal(actual, expected) + assert_equal(actual.dtype, expected.dtype) + + def test_sort_complex(self): + # sort_complex() handling of complex input + a = np.array([2 + 3j, 1 - 2j, 1 - 3j, 2 + 1j], dtype='D') + expected = np.array([1 - 3j, 1 - 2j, 2 + 1j, 2 + 3j], dtype='D') + actual = np.sort_complex(a) + assert_equal(actual, expected) + assert_equal(actual.dtype, expected.dtype) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_histograms.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_histograms.py new file mode 100644 index 0000000..fc16b73 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_histograms.py @@ -0,0 +1,838 @@ +import numpy as np + +from numpy.lib.histograms import histogram, histogramdd, histogram_bin_edges +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_almost_equal, + assert_array_almost_equal, assert_raises, assert_allclose, + assert_array_max_ulp, assert_raises_regex, suppress_warnings, + ) +import pytest + + +class TestHistogram: + + def setup(self): + pass + + def teardown(self): + pass + + def test_simple(self): + n = 100 + v = np.random.rand(n) + (a, b) = histogram(v) + # check if the sum of the bins equals the number of samples + assert_equal(np.sum(a, axis=0), n) + # check that the bin counts are evenly spaced when the data is from + # a linear function + (a, b) = histogram(np.linspace(0, 10, 100)) + assert_array_equal(a, 10) + + def test_one_bin(self): + # Ticket 632 + hist, edges = histogram([1, 2, 3, 4], [1, 2]) + assert_array_equal(hist, [2, ]) + assert_array_equal(edges, [1, 2]) + assert_raises(ValueError, histogram, [1, 2], bins=0) + h, e = histogram([1, 2], bins=1) + assert_equal(h, np.array([2])) + assert_allclose(e, np.array([1., 2.])) + + def test_normed(self): + sup = suppress_warnings() + with sup: + rec = sup.record(np.VisibleDeprecationWarning, '.*normed.*') + # Check that the integral of the density equals 1. + n = 100 + v = np.random.rand(n) + a, b = histogram(v, normed=True) + area = np.sum(a * np.diff(b)) + assert_almost_equal(area, 1) + assert_equal(len(rec), 1) + + sup = suppress_warnings() + with sup: + rec = sup.record(np.VisibleDeprecationWarning, '.*normed.*') + # Check with non-constant bin widths (buggy but backwards + # compatible) + v = np.arange(10) + bins = [0, 1, 5, 9, 10] + a, b = histogram(v, bins, normed=True) + area = np.sum(a * np.diff(b)) + assert_almost_equal(area, 1) + assert_equal(len(rec), 1) + + def test_density(self): + # Check that the integral of the density equals 1. + n = 100 + v = np.random.rand(n) + a, b = histogram(v, density=True) + area = np.sum(a * np.diff(b)) + assert_almost_equal(area, 1) + + # Check with non-constant bin widths + v = np.arange(10) + bins = [0, 1, 3, 6, 10] + a, b = histogram(v, bins, density=True) + assert_array_equal(a, .1) + assert_equal(np.sum(a * np.diff(b)), 1) + + # Test that passing False works too + a, b = histogram(v, bins, density=False) + assert_array_equal(a, [1, 2, 3, 4]) + + # Variable bin widths are especially useful to deal with + # infinities. + v = np.arange(10) + bins = [0, 1, 3, 6, np.inf] + a, b = histogram(v, bins, density=True) + assert_array_equal(a, [.1, .1, .1, 0.]) + + # Taken from a bug report from N. Becker on the numpy-discussion + # mailing list Aug. 6, 2010. + counts, dmy = np.histogram( + [1, 2, 3, 4], [0.5, 1.5, np.inf], density=True) + assert_equal(counts, [.25, 0]) + + def test_outliers(self): + # Check that outliers are not tallied + a = np.arange(10) + .5 + + # Lower outliers + h, b = histogram(a, range=[0, 9]) + assert_equal(h.sum(), 9) + + # Upper outliers + h, b = histogram(a, range=[1, 10]) + assert_equal(h.sum(), 9) + + # Normalization + h, b = histogram(a, range=[1, 9], density=True) + assert_almost_equal((h * np.diff(b)).sum(), 1, decimal=15) + + # Weights + w = np.arange(10) + .5 + h, b = histogram(a, range=[1, 9], weights=w, density=True) + assert_equal((h * np.diff(b)).sum(), 1) + + h, b = histogram(a, bins=8, range=[1, 9], weights=w) + assert_equal(h, w[1:-1]) + + def test_arr_weights_mismatch(self): + a = np.arange(10) + .5 + w = np.arange(11) + .5 + with assert_raises_regex(ValueError, "same shape as"): + h, b = histogram(a, range=[1, 9], weights=w, density=True) + + + def test_type(self): + # Check the type of the returned histogram + a = np.arange(10) + .5 + h, b = histogram(a) + assert_(np.issubdtype(h.dtype, np.integer)) + + h, b = histogram(a, density=True) + assert_(np.issubdtype(h.dtype, np.floating)) + + h, b = histogram(a, weights=np.ones(10, int)) + assert_(np.issubdtype(h.dtype, np.integer)) + + h, b = histogram(a, weights=np.ones(10, float)) + assert_(np.issubdtype(h.dtype, np.floating)) + + def test_f32_rounding(self): + # gh-4799, check that the rounding of the edges works with float32 + x = np.array([276.318359, -69.593948, 21.329449], dtype=np.float32) + y = np.array([5005.689453, 4481.327637, 6010.369629], dtype=np.float32) + counts_hist, xedges, yedges = np.histogram2d(x, y, bins=100) + assert_equal(counts_hist.sum(), 3.) + + def test_bool_conversion(self): + # gh-12107 + # Reference integer histogram + a = np.array([1, 1, 0], dtype=np.uint8) + int_hist, int_edges = np.histogram(a) + + # Should raise an warning on booleans + # Ensure that the histograms are equivalent, need to suppress + # the warnings to get the actual outputs + with suppress_warnings() as sup: + rec = sup.record(RuntimeWarning, 'Converting input from .*') + hist, edges = np.histogram([True, True, False]) + # A warning should be issued + assert_equal(len(rec), 1) + assert_array_equal(hist, int_hist) + assert_array_equal(edges, int_edges) + + def test_weights(self): + v = np.random.rand(100) + w = np.ones(100) * 5 + a, b = histogram(v) + na, nb = histogram(v, density=True) + wa, wb = histogram(v, weights=w) + nwa, nwb = histogram(v, weights=w, density=True) + assert_array_almost_equal(a * 5, wa) + assert_array_almost_equal(na, nwa) + + # Check weights are properly applied. + v = np.linspace(0, 10, 10) + w = np.concatenate((np.zeros(5), np.ones(5))) + wa, wb = histogram(v, bins=np.arange(11), weights=w) + assert_array_almost_equal(wa, w) + + # Check with integer weights + wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1]) + assert_array_equal(wa, [4, 5, 0, 1]) + wa, wb = histogram( + [1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], density=True) + assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4) + + # Check weights with non-uniform bin widths + a, b = histogram( + np.arange(9), [0, 1, 3, 6, 10], + weights=[2, 1, 1, 1, 1, 1, 1, 1, 1], density=True) + assert_almost_equal(a, [.2, .1, .1, .075]) + + def test_exotic_weights(self): + + # Test the use of weights that are not integer or floats, but e.g. + # complex numbers or object types. + + # Complex weights + values = np.array([1.3, 2.5, 2.3]) + weights = np.array([1, -1, 2]) + 1j * np.array([2, 1, 2]) + + # Check with custom bins + wa, wb = histogram(values, bins=[0, 2, 3], weights=weights) + assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3])) + + # Check with even bins + wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights) + assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3])) + + # Decimal weights + from decimal import Decimal + values = np.array([1.3, 2.5, 2.3]) + weights = np.array([Decimal(1), Decimal(2), Decimal(3)]) + + # Check with custom bins + wa, wb = histogram(values, bins=[0, 2, 3], weights=weights) + assert_array_almost_equal(wa, [Decimal(1), Decimal(5)]) + + # Check with even bins + wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights) + assert_array_almost_equal(wa, [Decimal(1), Decimal(5)]) + + def test_no_side_effects(self): + # This is a regression test that ensures that values passed to + # ``histogram`` are unchanged. + values = np.array([1.3, 2.5, 2.3]) + np.histogram(values, range=[-10, 10], bins=100) + assert_array_almost_equal(values, [1.3, 2.5, 2.3]) + + def test_empty(self): + a, b = histogram([], bins=([0, 1])) + assert_array_equal(a, np.array([0])) + assert_array_equal(b, np.array([0, 1])) + + def test_error_binnum_type (self): + # Tests if right Error is raised if bins argument is float + vals = np.linspace(0.0, 1.0, num=100) + histogram(vals, 5) + assert_raises(TypeError, histogram, vals, 2.4) + + def test_finite_range(self): + # Normal ranges should be fine + vals = np.linspace(0.0, 1.0, num=100) + histogram(vals, range=[0.25,0.75]) + assert_raises(ValueError, histogram, vals, range=[np.nan,0.75]) + assert_raises(ValueError, histogram, vals, range=[0.25,np.inf]) + + def test_invalid_range(self): + # start of range must be < end of range + vals = np.linspace(0.0, 1.0, num=100) + with assert_raises_regex(ValueError, "max must be larger than"): + np.histogram(vals, range=[0.1, 0.01]) + + def test_bin_edge_cases(self): + # Ensure that floating-point computations correctly place edge cases. + arr = np.array([337, 404, 739, 806, 1007, 1811, 2012]) + hist, edges = np.histogram(arr, bins=8296, range=(2, 2280)) + mask = hist > 0 + left_edges = edges[:-1][mask] + right_edges = edges[1:][mask] + for x, left, right in zip(arr, left_edges, right_edges): + assert_(x >= left) + assert_(x < right) + + def test_last_bin_inclusive_range(self): + arr = np.array([0., 0., 0., 1., 2., 3., 3., 4., 5.]) + hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5)) + assert_equal(hist[-1], 1) + + def test_bin_array_dims(self): + # gracefully handle bins object > 1 dimension + vals = np.linspace(0.0, 1.0, num=100) + bins = np.array([[0, 0.5], [0.6, 1.0]]) + with assert_raises_regex(ValueError, "must be 1d"): + np.histogram(vals, bins=bins) + + def test_unsigned_monotonicity_check(self): + # Ensures ValueError is raised if bins not increasing monotonically + # when bins contain unsigned values (see #9222) + arr = np.array([2]) + bins = np.array([1, 3, 1], dtype='uint64') + with assert_raises(ValueError): + hist, edges = np.histogram(arr, bins=bins) + + def test_object_array_of_0d(self): + # gh-7864 + assert_raises(ValueError, + histogram, [np.array(0.4) for i in range(10)] + [-np.inf]) + assert_raises(ValueError, + histogram, [np.array(0.4) for i in range(10)] + [np.inf]) + + # these should not crash + np.histogram([np.array(0.5) for i in range(10)] + [.500000000000001]) + np.histogram([np.array(0.5) for i in range(10)] + [.5]) + + def test_some_nan_values(self): + # gh-7503 + one_nan = np.array([0, 1, np.nan]) + all_nan = np.array([np.nan, np.nan]) + + # the internal comparisons with NaN give warnings + sup = suppress_warnings() + sup.filter(RuntimeWarning) + with sup: + # can't infer range with nan + assert_raises(ValueError, histogram, one_nan, bins='auto') + assert_raises(ValueError, histogram, all_nan, bins='auto') + + # explicit range solves the problem + h, b = histogram(one_nan, bins='auto', range=(0, 1)) + assert_equal(h.sum(), 2) # nan is not counted + h, b = histogram(all_nan, bins='auto', range=(0, 1)) + assert_equal(h.sum(), 0) # nan is not counted + + # as does an explicit set of bins + h, b = histogram(one_nan, bins=[0, 1]) + assert_equal(h.sum(), 2) # nan is not counted + h, b = histogram(all_nan, bins=[0, 1]) + assert_equal(h.sum(), 0) # nan is not counted + + def test_datetime(self): + begin = np.datetime64('2000-01-01', 'D') + offsets = np.array([0, 0, 1, 1, 2, 3, 5, 10, 20]) + bins = np.array([0, 2, 7, 20]) + dates = begin + offsets + date_bins = begin + bins + + td = np.dtype('timedelta64[D]') + + # Results should be the same for integer offsets or datetime values. + # For now, only explicit bins are supported, since linspace does not + # work on datetimes or timedeltas + d_count, d_edge = histogram(dates, bins=date_bins) + t_count, t_edge = histogram(offsets.astype(td), bins=bins.astype(td)) + i_count, i_edge = histogram(offsets, bins=bins) + + assert_equal(d_count, i_count) + assert_equal(t_count, i_count) + + assert_equal((d_edge - begin).astype(int), i_edge) + assert_equal(t_edge.astype(int), i_edge) + + assert_equal(d_edge.dtype, dates.dtype) + assert_equal(t_edge.dtype, td) + + def do_signed_overflow_bounds(self, dtype): + exponent = 8 * np.dtype(dtype).itemsize - 1 + arr = np.array([-2**exponent + 4, 2**exponent - 4], dtype=dtype) + hist, e = histogram(arr, bins=2) + assert_equal(e, [-2**exponent + 4, 0, 2**exponent - 4]) + assert_equal(hist, [1, 1]) + + def test_signed_overflow_bounds(self): + self.do_signed_overflow_bounds(np.byte) + self.do_signed_overflow_bounds(np.short) + self.do_signed_overflow_bounds(np.intc) + self.do_signed_overflow_bounds(np.int_) + self.do_signed_overflow_bounds(np.longlong) + + def do_precision_lower_bound(self, float_small, float_large): + eps = np.finfo(float_large).eps + + arr = np.array([1.0], float_small) + range = np.array([1.0 + eps, 2.0], float_large) + + # test is looking for behavior when the bounds change between dtypes + if range.astype(float_small)[0] != 1: + return + + # previously crashed + count, x_loc = np.histogram(arr, bins=1, range=range) + assert_equal(count, [1]) + + # gh-10322 means that the type comes from arr - this may change + assert_equal(x_loc.dtype, float_small) + + def do_precision_upper_bound(self, float_small, float_large): + eps = np.finfo(float_large).eps + + arr = np.array([1.0], float_small) + range = np.array([0.0, 1.0 - eps], float_large) + + # test is looking for behavior when the bounds change between dtypes + if range.astype(float_small)[-1] != 1: + return + + # previously crashed + count, x_loc = np.histogram(arr, bins=1, range=range) + assert_equal(count, [1]) + + # gh-10322 means that the type comes from arr - this may change + assert_equal(x_loc.dtype, float_small) + + def do_precision(self, float_small, float_large): + self.do_precision_lower_bound(float_small, float_large) + self.do_precision_upper_bound(float_small, float_large) + + def test_precision(self): + # not looping results in a useful stack trace upon failure + self.do_precision(np.half, np.single) + self.do_precision(np.half, np.double) + self.do_precision(np.half, np.longdouble) + self.do_precision(np.single, np.double) + self.do_precision(np.single, np.longdouble) + self.do_precision(np.double, np.longdouble) + + def test_histogram_bin_edges(self): + hist, e = histogram([1, 2, 3, 4], [1, 2]) + edges = histogram_bin_edges([1, 2, 3, 4], [1, 2]) + assert_array_equal(edges, e) + + arr = np.array([0., 0., 0., 1., 2., 3., 3., 4., 5.]) + hist, e = histogram(arr, bins=30, range=(-0.5, 5)) + edges = histogram_bin_edges(arr, bins=30, range=(-0.5, 5)) + assert_array_equal(edges, e) + + hist, e = histogram(arr, bins='auto', range=(0, 1)) + edges = histogram_bin_edges(arr, bins='auto', range=(0, 1)) + assert_array_equal(edges, e) + + +class TestHistogramOptimBinNums: + """ + Provide test coverage when using provided estimators for optimal number of + bins + """ + + def test_empty(self): + estimator_list = ['fd', 'scott', 'rice', 'sturges', + 'doane', 'sqrt', 'auto', 'stone'] + # check it can deal with empty data + for estimator in estimator_list: + a, b = histogram([], bins=estimator) + assert_array_equal(a, np.array([0])) + assert_array_equal(b, np.array([0, 1])) + + def test_simple(self): + """ + Straightforward testing with a mixture of linspace data (for + consistency). All test values have been precomputed and the values + shouldn't change + """ + # Some basic sanity checking, with some fixed data. + # Checking for the correct number of bins + basic_test = {50: {'fd': 4, 'scott': 4, 'rice': 8, 'sturges': 7, + 'doane': 8, 'sqrt': 8, 'auto': 7, 'stone': 2}, + 500: {'fd': 8, 'scott': 8, 'rice': 16, 'sturges': 10, + 'doane': 12, 'sqrt': 23, 'auto': 10, 'stone': 9}, + 5000: {'fd': 17, 'scott': 17, 'rice': 35, 'sturges': 14, + 'doane': 17, 'sqrt': 71, 'auto': 17, 'stone': 20}} + + for testlen, expectedResults in basic_test.items(): + # Create some sort of non uniform data to test with + # (2 peak uniform mixture) + x1 = np.linspace(-10, -1, testlen // 5 * 2) + x2 = np.linspace(1, 10, testlen // 5 * 3) + x = np.concatenate((x1, x2)) + for estimator, numbins in expectedResults.items(): + a, b = np.histogram(x, estimator) + assert_equal(len(a), numbins, err_msg="For the {0} estimator " + "with datasize of {1}".format(estimator, testlen)) + + def test_small(self): + """ + Smaller datasets have the potential to cause issues with the data + adaptive methods, especially the FD method. All bin numbers have been + precalculated. + """ + small_dat = {1: {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1, + 'doane': 1, 'sqrt': 1, 'stone': 1}, + 2: {'fd': 2, 'scott': 1, 'rice': 3, 'sturges': 2, + 'doane': 1, 'sqrt': 2, 'stone': 1}, + 3: {'fd': 2, 'scott': 2, 'rice': 3, 'sturges': 3, + 'doane': 3, 'sqrt': 2, 'stone': 1}} + + for testlen, expectedResults in small_dat.items(): + testdat = np.arange(testlen) + for estimator, expbins in expectedResults.items(): + a, b = np.histogram(testdat, estimator) + assert_equal(len(a), expbins, err_msg="For the {0} estimator " + "with datasize of {1}".format(estimator, testlen)) + + def test_incorrect_methods(self): + """ + Check a Value Error is thrown when an unknown string is passed in + """ + check_list = ['mad', 'freeman', 'histograms', 'IQR'] + for estimator in check_list: + assert_raises(ValueError, histogram, [1, 2, 3], estimator) + + def test_novariance(self): + """ + Check that methods handle no variance in data + Primarily for Scott and FD as the SD and IQR are both 0 in this case + """ + novar_dataset = np.ones(100) + novar_resultdict = {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1, + 'doane': 1, 'sqrt': 1, 'auto': 1, 'stone': 1} + + for estimator, numbins in novar_resultdict.items(): + a, b = np.histogram(novar_dataset, estimator) + assert_equal(len(a), numbins, err_msg="{0} estimator, " + "No Variance test".format(estimator)) + + def test_limited_variance(self): + """ + Check when IQR is 0, but variance exists, we return the sturges value + and not the fd value. + """ + lim_var_data = np.ones(1000) + lim_var_data[:3] = 0 + lim_var_data[-4:] = 100 + + edges_auto = histogram_bin_edges(lim_var_data, 'auto') + assert_equal(edges_auto, np.linspace(0, 100, 12)) + + edges_fd = histogram_bin_edges(lim_var_data, 'fd') + assert_equal(edges_fd, np.array([0, 100])) + + edges_sturges = histogram_bin_edges(lim_var_data, 'sturges') + assert_equal(edges_sturges, np.linspace(0, 100, 12)) + + def test_outlier(self): + """ + Check the FD, Scott and Doane with outliers. + + The FD estimates a smaller binwidth since it's less affected by + outliers. Since the range is so (artificially) large, this means more + bins, most of which will be empty, but the data of interest usually is + unaffected. The Scott estimator is more affected and returns fewer bins, + despite most of the variance being in one area of the data. The Doane + estimator lies somewhere between the other two. + """ + xcenter = np.linspace(-10, 10, 50) + outlier_dataset = np.hstack((np.linspace(-110, -100, 5), xcenter)) + + outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11, 'stone': 6} + + for estimator, numbins in outlier_resultdict.items(): + a, b = np.histogram(outlier_dataset, estimator) + assert_equal(len(a), numbins) + + def test_scott_vs_stone(self): + """Verify that Scott's rule and Stone's rule converges for normally distributed data""" + + def nbins_ratio(seed, size): + rng = np.random.RandomState(seed) + x = rng.normal(loc=0, scale=2, size=size) + a, b = len(np.histogram(x, 'stone')[0]), len(np.histogram(x, 'scott')[0]) + return a / (a + b) + + ll = [[nbins_ratio(seed, size) for size in np.geomspace(start=10, stop=100, num=4).round().astype(int)] + for seed in range(10)] + + # the average difference between the two methods decreases as the dataset size increases. + avg = abs(np.mean(ll, axis=0) - 0.5) + assert_almost_equal(avg, [0.15, 0.09, 0.08, 0.03], decimal=2) + + def test_simple_range(self): + """ + Straightforward testing with a mixture of linspace data (for + consistency). Adding in a 3rd mixture that will then be + completely ignored. All test values have been precomputed and + the shouldn't change. + """ + # some basic sanity checking, with some fixed data. + # Checking for the correct number of bins + basic_test = { + 50: {'fd': 8, 'scott': 8, 'rice': 15, + 'sturges': 14, 'auto': 14, 'stone': 8}, + 500: {'fd': 15, 'scott': 16, 'rice': 32, + 'sturges': 20, 'auto': 20, 'stone': 80}, + 5000: {'fd': 33, 'scott': 33, 'rice': 69, + 'sturges': 27, 'auto': 33, 'stone': 80} + } + + for testlen, expectedResults in basic_test.items(): + # create some sort of non uniform data to test with + # (3 peak uniform mixture) + x1 = np.linspace(-10, -1, testlen // 5 * 2) + x2 = np.linspace(1, 10, testlen // 5 * 3) + x3 = np.linspace(-100, -50, testlen) + x = np.hstack((x1, x2, x3)) + for estimator, numbins in expectedResults.items(): + a, b = np.histogram(x, estimator, range = (-20, 20)) + msg = "For the {0} estimator".format(estimator) + msg += " with datasize of {0}".format(testlen) + assert_equal(len(a), numbins, err_msg=msg) + + @pytest.mark.parametrize("bins", ['auto', 'fd', 'doane', 'scott', + 'stone', 'rice', 'sturges']) + def test_signed_integer_data(self, bins): + # Regression test for gh-14379. + a = np.array([-2, 0, 127], dtype=np.int8) + hist, edges = np.histogram(a, bins=bins) + hist32, edges32 = np.histogram(a.astype(np.int32), bins=bins) + assert_array_equal(hist, hist32) + assert_array_equal(edges, edges32) + + def test_simple_weighted(self): + """ + Check that weighted data raises a TypeError + """ + estimator_list = ['fd', 'scott', 'rice', 'sturges', 'auto'] + for estimator in estimator_list: + assert_raises(TypeError, histogram, [1, 2, 3], + estimator, weights=[1, 2, 3]) + + +class TestHistogramdd: + + def test_simple(self): + x = np.array([[-.5, .5, 1.5], [-.5, 1.5, 2.5], [-.5, 2.5, .5], + [.5, .5, 1.5], [.5, 1.5, 2.5], [.5, 2.5, 2.5]]) + H, edges = histogramdd(x, (2, 3, 3), + range=[[-1, 1], [0, 3], [0, 3]]) + answer = np.array([[[0, 1, 0], [0, 0, 1], [1, 0, 0]], + [[0, 1, 0], [0, 0, 1], [0, 0, 1]]]) + assert_array_equal(H, answer) + + # Check normalization + ed = [[-2, 0, 2], [0, 1, 2, 3], [0, 1, 2, 3]] + H, edges = histogramdd(x, bins=ed, density=True) + assert_(np.all(H == answer / 12.)) + + # Check that H has the correct shape. + H, edges = histogramdd(x, (2, 3, 4), + range=[[-1, 1], [0, 3], [0, 4]], + density=True) + answer = np.array([[[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]], + [[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0]]]) + assert_array_almost_equal(H, answer / 6., 4) + # Check that a sequence of arrays is accepted and H has the correct + # shape. + z = [np.squeeze(y) for y in np.split(x, 3, axis=1)] + H, edges = histogramdd( + z, bins=(4, 3, 2), range=[[-2, 2], [0, 3], [0, 2]]) + answer = np.array([[[0, 0], [0, 0], [0, 0]], + [[0, 1], [0, 0], [1, 0]], + [[0, 1], [0, 0], [0, 0]], + [[0, 0], [0, 0], [0, 0]]]) + assert_array_equal(H, answer) + + Z = np.zeros((5, 5, 5)) + Z[list(range(5)), list(range(5)), list(range(5))] = 1. + H, edges = histogramdd([np.arange(5), np.arange(5), np.arange(5)], 5) + assert_array_equal(H, Z) + + def test_shape_3d(self): + # All possible permutations for bins of different lengths in 3D. + bins = ((5, 4, 6), (6, 4, 5), (5, 6, 4), (4, 6, 5), (6, 5, 4), + (4, 5, 6)) + r = np.random.rand(10, 3) + for b in bins: + H, edges = histogramdd(r, b) + assert_(H.shape == b) + + def test_shape_4d(self): + # All possible permutations for bins of different lengths in 4D. + bins = ((7, 4, 5, 6), (4, 5, 7, 6), (5, 6, 4, 7), (7, 6, 5, 4), + (5, 7, 6, 4), (4, 6, 7, 5), (6, 5, 7, 4), (7, 5, 4, 6), + (7, 4, 6, 5), (6, 4, 7, 5), (6, 7, 5, 4), (4, 6, 5, 7), + (4, 7, 5, 6), (5, 4, 6, 7), (5, 7, 4, 6), (6, 7, 4, 5), + (6, 5, 4, 7), (4, 7, 6, 5), (4, 5, 6, 7), (7, 6, 4, 5), + (5, 4, 7, 6), (5, 6, 7, 4), (6, 4, 5, 7), (7, 5, 6, 4)) + + r = np.random.rand(10, 4) + for b in bins: + H, edges = histogramdd(r, b) + assert_(H.shape == b) + + def test_weights(self): + v = np.random.rand(100, 2) + hist, edges = histogramdd(v) + n_hist, edges = histogramdd(v, density=True) + w_hist, edges = histogramdd(v, weights=np.ones(100)) + assert_array_equal(w_hist, hist) + w_hist, edges = histogramdd(v, weights=np.ones(100) * 2, density=True) + assert_array_equal(w_hist, n_hist) + w_hist, edges = histogramdd(v, weights=np.ones(100, int) * 2) + assert_array_equal(w_hist, 2 * hist) + + def test_identical_samples(self): + x = np.zeros((10, 2), int) + hist, edges = histogramdd(x, bins=2) + assert_array_equal(edges[0], np.array([-0.5, 0., 0.5])) + + def test_empty(self): + a, b = histogramdd([[], []], bins=([0, 1], [0, 1])) + assert_array_max_ulp(a, np.array([[0.]])) + a, b = np.histogramdd([[], [], []], bins=2) + assert_array_max_ulp(a, np.zeros((2, 2, 2))) + + def test_bins_errors(self): + # There are two ways to specify bins. Check for the right errors + # when mixing those. + x = np.arange(8).reshape(2, 4) + assert_raises(ValueError, np.histogramdd, x, bins=[-1, 2, 4, 5]) + assert_raises(ValueError, np.histogramdd, x, bins=[1, 0.99, 1, 1]) + assert_raises( + ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 3, -3]]) + assert_(np.histogramdd(x, bins=[1, 1, 1, [1, 2, 3, 4]])) + + def test_inf_edges(self): + # Test using +/-inf bin edges works. See #1788. + with np.errstate(invalid='ignore'): + x = np.arange(6).reshape(3, 2) + expected = np.array([[1, 0], [0, 1], [0, 1]]) + h, e = np.histogramdd(x, bins=[3, [-np.inf, 2, 10]]) + assert_allclose(h, expected) + h, e = np.histogramdd(x, bins=[3, np.array([-1, 2, np.inf])]) + assert_allclose(h, expected) + h, e = np.histogramdd(x, bins=[3, [-np.inf, 3, np.inf]]) + assert_allclose(h, expected) + + def test_rightmost_binedge(self): + # Test event very close to rightmost binedge. See Github issue #4266 + x = [0.9999999995] + bins = [[0., 0.5, 1.0]] + hist, _ = histogramdd(x, bins=bins) + assert_(hist[0] == 0.0) + assert_(hist[1] == 1.) + x = [1.0] + bins = [[0., 0.5, 1.0]] + hist, _ = histogramdd(x, bins=bins) + assert_(hist[0] == 0.0) + assert_(hist[1] == 1.) + x = [1.0000000001] + bins = [[0., 0.5, 1.0]] + hist, _ = histogramdd(x, bins=bins) + assert_(hist[0] == 0.0) + assert_(hist[1] == 0.0) + x = [1.0001] + bins = [[0., 0.5, 1.0]] + hist, _ = histogramdd(x, bins=bins) + assert_(hist[0] == 0.0) + assert_(hist[1] == 0.0) + + def test_finite_range(self): + vals = np.random.random((100, 3)) + histogramdd(vals, range=[[0.0, 1.0], [0.25, 0.75], [0.25, 0.5]]) + assert_raises(ValueError, histogramdd, vals, + range=[[0.0, 1.0], [0.25, 0.75], [0.25, np.inf]]) + assert_raises(ValueError, histogramdd, vals, + range=[[0.0, 1.0], [np.nan, 0.75], [0.25, 0.5]]) + + def test_equal_edges(self): + """ Test that adjacent entries in an edge array can be equal """ + x = np.array([0, 1, 2]) + y = np.array([0, 1, 2]) + x_edges = np.array([0, 2, 2]) + y_edges = 1 + hist, edges = histogramdd((x, y), bins=(x_edges, y_edges)) + + hist_expected = np.array([ + [2.], + [1.], # x == 2 falls in the final bin + ]) + assert_equal(hist, hist_expected) + + def test_edge_dtype(self): + """ Test that if an edge array is input, its type is preserved """ + x = np.array([0, 10, 20]) + y = x / 10 + x_edges = np.array([0, 5, 15, 20]) + y_edges = x_edges / 10 + hist, edges = histogramdd((x, y), bins=(x_edges, y_edges)) + + assert_equal(edges[0].dtype, x_edges.dtype) + assert_equal(edges[1].dtype, y_edges.dtype) + + def test_large_integers(self): + big = 2**60 # Too large to represent with a full precision float + + x = np.array([0], np.int64) + x_edges = np.array([-1, +1], np.int64) + y = big + x + y_edges = big + x_edges + + hist, edges = histogramdd((x, y), bins=(x_edges, y_edges)) + + assert_equal(hist[0, 0], 1) + + def test_density_non_uniform_2d(self): + # Defines the following grid: + # + # 0 2 8 + # 0+-+-----+ + # + | + + # + | + + # 6+-+-----+ + # 8+-+-----+ + x_edges = np.array([0, 2, 8]) + y_edges = np.array([0, 6, 8]) + relative_areas = np.array([ + [3, 9], + [1, 3]]) + + # ensure the number of points in each region is proportional to its area + x = np.array([1] + [1]*3 + [7]*3 + [7]*9) + y = np.array([7] + [1]*3 + [7]*3 + [1]*9) + + # sanity check that the above worked as intended + hist, edges = histogramdd((y, x), bins=(y_edges, x_edges)) + assert_equal(hist, relative_areas) + + # resulting histogram should be uniform, since counts and areas are proportional + hist, edges = histogramdd((y, x), bins=(y_edges, x_edges), density=True) + assert_equal(hist, 1 / (8*8)) + + def test_density_non_uniform_1d(self): + # compare to histogram to show the results are the same + v = np.arange(10) + bins = np.array([0, 1, 3, 6, 10]) + hist, edges = histogram(v, bins, density=True) + hist_dd, edges_dd = histogramdd((v,), (bins,), density=True) + assert_equal(hist, hist_dd) + assert_equal(edges, edges_dd[0]) + + def test_density_via_normed(self): + # normed should simply alias to density argument + v = np.arange(10) + bins = np.array([0, 1, 3, 6, 10]) + hist, edges = histogram(v, bins, density=True) + hist_dd, edges_dd = histogramdd((v,), (bins,), normed=True) + assert_equal(hist, hist_dd) + assert_equal(edges, edges_dd[0]) + + def test_density_normed_redundancy(self): + v = np.arange(10) + bins = np.array([0, 1, 3, 6, 10]) + with assert_raises_regex(TypeError, "Cannot specify both"): + hist_dd, edges_dd = histogramdd((v,), (bins,), + density=True, + normed=True) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_index_tricks.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_index_tricks.py new file mode 100644 index 0000000..26a34be --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_index_tricks.py @@ -0,0 +1,517 @@ +import pytest + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_almost_equal, + assert_array_almost_equal, assert_raises, assert_raises_regex, + ) +from numpy.lib.index_tricks import ( + mgrid, ogrid, ndenumerate, fill_diagonal, diag_indices, diag_indices_from, + index_exp, ndindex, r_, s_, ix_ + ) + + +class TestRavelUnravelIndex: + def test_basic(self): + assert_equal(np.unravel_index(2, (2, 2)), (1, 0)) + + # test that new shape argument works properly + assert_equal(np.unravel_index(indices=2, + shape=(2, 2)), + (1, 0)) + + # test that an invalid second keyword argument + # is properly handled, including the old name `dims`. + with assert_raises(TypeError): + np.unravel_index(indices=2, hape=(2, 2)) + + with assert_raises(TypeError): + np.unravel_index(2, hape=(2, 2)) + + with assert_raises(TypeError): + np.unravel_index(254, ims=(17, 94)) + + with assert_raises(TypeError): + np.unravel_index(254, dims=(17, 94)) + + assert_equal(np.ravel_multi_index((1, 0), (2, 2)), 2) + assert_equal(np.unravel_index(254, (17, 94)), (2, 66)) + assert_equal(np.ravel_multi_index((2, 66), (17, 94)), 254) + assert_raises(ValueError, np.unravel_index, -1, (2, 2)) + assert_raises(TypeError, np.unravel_index, 0.5, (2, 2)) + assert_raises(ValueError, np.unravel_index, 4, (2, 2)) + assert_raises(ValueError, np.ravel_multi_index, (-3, 1), (2, 2)) + assert_raises(ValueError, np.ravel_multi_index, (2, 1), (2, 2)) + assert_raises(ValueError, np.ravel_multi_index, (0, -3), (2, 2)) + assert_raises(ValueError, np.ravel_multi_index, (0, 2), (2, 2)) + assert_raises(TypeError, np.ravel_multi_index, (0.1, 0.), (2, 2)) + + assert_equal(np.unravel_index((2*3 + 1)*6 + 4, (4, 3, 6)), [2, 1, 4]) + assert_equal( + np.ravel_multi_index([2, 1, 4], (4, 3, 6)), (2*3 + 1)*6 + 4) + + arr = np.array([[3, 6, 6], [4, 5, 1]]) + assert_equal(np.ravel_multi_index(arr, (7, 6)), [22, 41, 37]) + assert_equal( + np.ravel_multi_index(arr, (7, 6), order='F'), [31, 41, 13]) + assert_equal( + np.ravel_multi_index(arr, (4, 6), mode='clip'), [22, 23, 19]) + assert_equal(np.ravel_multi_index(arr, (4, 4), mode=('clip', 'wrap')), + [12, 13, 13]) + assert_equal(np.ravel_multi_index((3, 1, 4, 1), (6, 7, 8, 9)), 1621) + + assert_equal(np.unravel_index(np.array([22, 41, 37]), (7, 6)), + [[3, 6, 6], [4, 5, 1]]) + assert_equal( + np.unravel_index(np.array([31, 41, 13]), (7, 6), order='F'), + [[3, 6, 6], [4, 5, 1]]) + assert_equal(np.unravel_index(1621, (6, 7, 8, 9)), [3, 1, 4, 1]) + + def test_empty_indices(self): + msg1 = 'indices must be integral: the provided empty sequence was' + msg2 = 'only int indices permitted' + assert_raises_regex(TypeError, msg1, np.unravel_index, [], (10, 3, 5)) + assert_raises_regex(TypeError, msg1, np.unravel_index, (), (10, 3, 5)) + assert_raises_regex(TypeError, msg2, np.unravel_index, np.array([]), + (10, 3, 5)) + assert_equal(np.unravel_index(np.array([],dtype=int), (10, 3, 5)), + [[], [], []]) + assert_raises_regex(TypeError, msg1, np.ravel_multi_index, ([], []), + (10, 3)) + assert_raises_regex(TypeError, msg1, np.ravel_multi_index, ([], ['abc']), + (10, 3)) + assert_raises_regex(TypeError, msg2, np.ravel_multi_index, + (np.array([]), np.array([])), (5, 3)) + assert_equal(np.ravel_multi_index( + (np.array([], dtype=int), np.array([], dtype=int)), (5, 3)), []) + assert_equal(np.ravel_multi_index(np.array([[], []], dtype=int), + (5, 3)), []) + + def test_big_indices(self): + # ravel_multi_index for big indices (issue #7546) + if np.intp == np.int64: + arr = ([1, 29], [3, 5], [3, 117], [19, 2], + [2379, 1284], [2, 2], [0, 1]) + assert_equal( + np.ravel_multi_index(arr, (41, 7, 120, 36, 2706, 8, 6)), + [5627771580, 117259570957]) + + # test unravel_index for big indices (issue #9538) + assert_raises(ValueError, np.unravel_index, 1, (2**32-1, 2**31+1)) + + # test overflow checking for too big array (issue #7546) + dummy_arr = ([0],[0]) + half_max = np.iinfo(np.intp).max // 2 + assert_equal( + np.ravel_multi_index(dummy_arr, (half_max, 2)), [0]) + assert_raises(ValueError, + np.ravel_multi_index, dummy_arr, (half_max+1, 2)) + assert_equal( + np.ravel_multi_index(dummy_arr, (half_max, 2), order='F'), [0]) + assert_raises(ValueError, + np.ravel_multi_index, dummy_arr, (half_max+1, 2), order='F') + + def test_dtypes(self): + # Test with different data types + for dtype in [np.int16, np.uint16, np.int32, + np.uint32, np.int64, np.uint64]: + coords = np.array( + [[1, 0, 1, 2, 3, 4], [1, 6, 1, 3, 2, 0]], dtype=dtype) + shape = (5, 8) + uncoords = 8*coords[0]+coords[1] + assert_equal(np.ravel_multi_index(coords, shape), uncoords) + assert_equal(coords, np.unravel_index(uncoords, shape)) + uncoords = coords[0]+5*coords[1] + assert_equal( + np.ravel_multi_index(coords, shape, order='F'), uncoords) + assert_equal(coords, np.unravel_index(uncoords, shape, order='F')) + + coords = np.array( + [[1, 0, 1, 2, 3, 4], [1, 6, 1, 3, 2, 0], [1, 3, 1, 0, 9, 5]], + dtype=dtype) + shape = (5, 8, 10) + uncoords = 10*(8*coords[0]+coords[1])+coords[2] + assert_equal(np.ravel_multi_index(coords, shape), uncoords) + assert_equal(coords, np.unravel_index(uncoords, shape)) + uncoords = coords[0]+5*(coords[1]+8*coords[2]) + assert_equal( + np.ravel_multi_index(coords, shape, order='F'), uncoords) + assert_equal(coords, np.unravel_index(uncoords, shape, order='F')) + + def test_clipmodes(self): + # Test clipmodes + assert_equal( + np.ravel_multi_index([5, 1, -1, 2], (4, 3, 7, 12), mode='wrap'), + np.ravel_multi_index([1, 1, 6, 2], (4, 3, 7, 12))) + assert_equal(np.ravel_multi_index([5, 1, -1, 2], (4, 3, 7, 12), + mode=( + 'wrap', 'raise', 'clip', 'raise')), + np.ravel_multi_index([1, 1, 0, 2], (4, 3, 7, 12))) + assert_raises( + ValueError, np.ravel_multi_index, [5, 1, -1, 2], (4, 3, 7, 12)) + + def test_writeability(self): + # See gh-7269 + x, y = np.unravel_index([1, 2, 3], (4, 5)) + assert_(x.flags.writeable) + assert_(y.flags.writeable) + + def test_0d(self): + # gh-580 + x = np.unravel_index(0, ()) + assert_equal(x, ()) + + assert_raises_regex(ValueError, "0d array", np.unravel_index, [0], ()) + assert_raises_regex( + ValueError, "out of bounds", np.unravel_index, [1], ()) + + @pytest.mark.parametrize("mode", ["clip", "wrap", "raise"]) + def test_empty_array_ravel(self, mode): + res = np.ravel_multi_index( + np.zeros((3, 0), dtype=np.intp), (2, 1, 0), mode=mode) + assert(res.shape == (0,)) + + with assert_raises(ValueError): + np.ravel_multi_index( + np.zeros((3, 1), dtype=np.intp), (2, 1, 0), mode=mode) + + def test_empty_array_unravel(self): + res = np.unravel_index(np.zeros(0, dtype=np.intp), (2, 1, 0)) + # res is a tuple of three empty arrays + assert(len(res) == 3) + assert(all(a.shape == (0,) for a in res)) + + with assert_raises(ValueError): + np.unravel_index([1], (2, 1, 0)) + +class TestGrid: + def test_basic(self): + a = mgrid[-1:1:10j] + b = mgrid[-1:1:0.1] + assert_(a.shape == (10,)) + assert_(b.shape == (20,)) + assert_(a[0] == -1) + assert_almost_equal(a[-1], 1) + assert_(b[0] == -1) + assert_almost_equal(b[1]-b[0], 0.1, 11) + assert_almost_equal(b[-1], b[0]+19*0.1, 11) + assert_almost_equal(a[1]-a[0], 2.0/9.0, 11) + + def test_linspace_equivalence(self): + y, st = np.linspace(2, 10, retstep=True) + assert_almost_equal(st, 8/49.0) + assert_array_almost_equal(y, mgrid[2:10:50j], 13) + + def test_nd(self): + c = mgrid[-1:1:10j, -2:2:10j] + d = mgrid[-1:1:0.1, -2:2:0.2] + assert_(c.shape == (2, 10, 10)) + assert_(d.shape == (2, 20, 20)) + assert_array_equal(c[0][0, :], -np.ones(10, 'd')) + assert_array_equal(c[1][:, 0], -2*np.ones(10, 'd')) + assert_array_almost_equal(c[0][-1, :], np.ones(10, 'd'), 11) + assert_array_almost_equal(c[1][:, -1], 2*np.ones(10, 'd'), 11) + assert_array_almost_equal(d[0, 1, :] - d[0, 0, :], + 0.1*np.ones(20, 'd'), 11) + assert_array_almost_equal(d[1, :, 1] - d[1, :, 0], + 0.2*np.ones(20, 'd'), 11) + + def test_sparse(self): + grid_full = mgrid[-1:1:10j, -2:2:10j] + grid_sparse = ogrid[-1:1:10j, -2:2:10j] + + # sparse grids can be made dense by broadcasting + grid_broadcast = np.broadcast_arrays(*grid_sparse) + for f, b in zip(grid_full, grid_broadcast): + assert_equal(f, b) + + @pytest.mark.parametrize("start, stop, step, expected", [ + (None, 10, 10j, (200, 10)), + (-10, 20, None, (1800, 30)), + ]) + def test_mgrid_size_none_handling(self, start, stop, step, expected): + # regression test None value handling for + # start and step values used by mgrid; + # internally, this aims to cover previously + # unexplored code paths in nd_grid() + grid = mgrid[start:stop:step, start:stop:step] + # need a smaller grid to explore one of the + # untested code paths + grid_small = mgrid[start:stop:step] + assert_equal(grid.size, expected[0]) + assert_equal(grid_small.size, expected[1]) + + def test_accepts_npfloating(self): + # regression test for #16466 + grid64 = mgrid[0.1:0.33:0.1, ] + grid32 = mgrid[np.float32(0.1):np.float32(0.33):np.float32(0.1), ] + assert_(grid32.dtype == np.float64) + assert_array_almost_equal(grid64, grid32) + + # different code path for single slice + grid64 = mgrid[0.1:0.33:0.1] + grid32 = mgrid[np.float32(0.1):np.float32(0.33):np.float32(0.1)] + assert_(grid32.dtype == np.float64) + assert_array_almost_equal(grid64, grid32) + + def test_accepts_npcomplexfloating(self): + # Related to #16466 + assert_array_almost_equal( + mgrid[0.1:0.3:3j, ], mgrid[0.1:0.3:np.complex64(3j), ] + ) + + # different code path for single slice + assert_array_almost_equal( + mgrid[0.1:0.3:3j], mgrid[0.1:0.3:np.complex64(3j)] + ) + +class TestConcatenator: + def test_1d(self): + assert_array_equal(r_[1, 2, 3, 4, 5, 6], np.array([1, 2, 3, 4, 5, 6])) + b = np.ones(5) + c = r_[b, 0, 0, b] + assert_array_equal(c, [1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1]) + + def test_mixed_type(self): + g = r_[10.1, 1:10] + assert_(g.dtype == 'f8') + + def test_more_mixed_type(self): + g = r_[-10.1, np.array([1]), np.array([2, 3, 4]), 10.0] + assert_(g.dtype == 'f8') + + def test_complex_step(self): + # Regression test for #12262 + g = r_[0:36:100j] + assert_(g.shape == (100,)) + + # Related to #16466 + g = r_[0:36:np.complex64(100j)] + assert_(g.shape == (100,)) + + def test_2d(self): + b = np.random.rand(5, 5) + c = np.random.rand(5, 5) + d = r_['1', b, c] # append columns + assert_(d.shape == (5, 10)) + assert_array_equal(d[:, :5], b) + assert_array_equal(d[:, 5:], c) + d = r_[b, c] + assert_(d.shape == (10, 5)) + assert_array_equal(d[:5, :], b) + assert_array_equal(d[5:, :], c) + + def test_0d(self): + assert_equal(r_[0, np.array(1), 2], [0, 1, 2]) + assert_equal(r_[[0, 1, 2], np.array(3)], [0, 1, 2, 3]) + assert_equal(r_[np.array(0), [1, 2, 3]], [0, 1, 2, 3]) + + +class TestNdenumerate: + def test_basic(self): + a = np.array([[1, 2], [3, 4]]) + assert_equal(list(ndenumerate(a)), + [((0, 0), 1), ((0, 1), 2), ((1, 0), 3), ((1, 1), 4)]) + + +class TestIndexExpression: + def test_regression_1(self): + # ticket #1196 + a = np.arange(2) + assert_equal(a[:-1], a[s_[:-1]]) + assert_equal(a[:-1], a[index_exp[:-1]]) + + def test_simple_1(self): + a = np.random.rand(4, 5, 6) + + assert_equal(a[:, :3, [1, 2]], a[index_exp[:, :3, [1, 2]]]) + assert_equal(a[:, :3, [1, 2]], a[s_[:, :3, [1, 2]]]) + + +class TestIx_: + def test_regression_1(self): + # Test empty untyped inputs create outputs of indexing type, gh-5804 + a, = np.ix_(range(0)) + assert_equal(a.dtype, np.intp) + + a, = np.ix_([]) + assert_equal(a.dtype, np.intp) + + # but if the type is specified, don't change it + a, = np.ix_(np.array([], dtype=np.float32)) + assert_equal(a.dtype, np.float32) + + def test_shape_and_dtype(self): + sizes = (4, 5, 3, 2) + # Test both lists and arrays + for func in (range, np.arange): + arrays = np.ix_(*[func(sz) for sz in sizes]) + for k, (a, sz) in enumerate(zip(arrays, sizes)): + assert_equal(a.shape[k], sz) + assert_(all(sh == 1 for j, sh in enumerate(a.shape) if j != k)) + assert_(np.issubdtype(a.dtype, np.integer)) + + def test_bool(self): + bool_a = [True, False, True, True] + int_a, = np.nonzero(bool_a) + assert_equal(np.ix_(bool_a)[0], int_a) + + def test_1d_only(self): + idx2d = [[1, 2, 3], [4, 5, 6]] + assert_raises(ValueError, np.ix_, idx2d) + + def test_repeated_input(self): + length_of_vector = 5 + x = np.arange(length_of_vector) + out = ix_(x, x) + assert_equal(out[0].shape, (length_of_vector, 1)) + assert_equal(out[1].shape, (1, length_of_vector)) + # check that input shape is not modified + assert_equal(x.shape, (length_of_vector,)) + + +def test_c_(): + a = np.c_[np.array([[1, 2, 3]]), 0, 0, np.array([[4, 5, 6]])] + assert_equal(a, [[1, 2, 3, 0, 0, 4, 5, 6]]) + + +class TestFillDiagonal: + def test_basic(self): + a = np.zeros((3, 3), int) + fill_diagonal(a, 5) + assert_array_equal( + a, np.array([[5, 0, 0], + [0, 5, 0], + [0, 0, 5]]) + ) + + def test_tall_matrix(self): + a = np.zeros((10, 3), int) + fill_diagonal(a, 5) + assert_array_equal( + a, np.array([[5, 0, 0], + [0, 5, 0], + [0, 0, 5], + [0, 0, 0], + [0, 0, 0], + [0, 0, 0], + [0, 0, 0], + [0, 0, 0], + [0, 0, 0], + [0, 0, 0]]) + ) + + def test_tall_matrix_wrap(self): + a = np.zeros((10, 3), int) + fill_diagonal(a, 5, True) + assert_array_equal( + a, np.array([[5, 0, 0], + [0, 5, 0], + [0, 0, 5], + [0, 0, 0], + [5, 0, 0], + [0, 5, 0], + [0, 0, 5], + [0, 0, 0], + [5, 0, 0], + [0, 5, 0]]) + ) + + def test_wide_matrix(self): + a = np.zeros((3, 10), int) + fill_diagonal(a, 5) + assert_array_equal( + a, np.array([[5, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 5, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 5, 0, 0, 0, 0, 0, 0, 0]]) + ) + + def test_operate_4d_array(self): + a = np.zeros((3, 3, 3, 3), int) + fill_diagonal(a, 4) + i = np.array([0, 1, 2]) + assert_equal(np.where(a != 0), (i, i, i, i)) + + def test_low_dim_handling(self): + # raise error with low dimensionality + a = np.zeros(3, int) + with assert_raises_regex(ValueError, "at least 2-d"): + fill_diagonal(a, 5) + + def test_hetero_shape_handling(self): + # raise error with high dimensionality and + # shape mismatch + a = np.zeros((3,3,7,3), int) + with assert_raises_regex(ValueError, "equal length"): + fill_diagonal(a, 2) + + +def test_diag_indices(): + di = diag_indices(4) + a = np.array([[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12], + [13, 14, 15, 16]]) + a[di] = 100 + assert_array_equal( + a, np.array([[100, 2, 3, 4], + [5, 100, 7, 8], + [9, 10, 100, 12], + [13, 14, 15, 100]]) + ) + + # Now, we create indices to manipulate a 3-d array: + d3 = diag_indices(2, 3) + + # And use it to set the diagonal of a zeros array to 1: + a = np.zeros((2, 2, 2), int) + a[d3] = 1 + assert_array_equal( + a, np.array([[[1, 0], + [0, 0]], + [[0, 0], + [0, 1]]]) + ) + + +class TestDiagIndicesFrom: + + def test_diag_indices_from(self): + x = np.random.random((4, 4)) + r, c = diag_indices_from(x) + assert_array_equal(r, np.arange(4)) + assert_array_equal(c, np.arange(4)) + + def test_error_small_input(self): + x = np.ones(7) + with assert_raises_regex(ValueError, "at least 2-d"): + diag_indices_from(x) + + def test_error_shape_mismatch(self): + x = np.zeros((3, 3, 2, 3), int) + with assert_raises_regex(ValueError, "equal length"): + diag_indices_from(x) + + +def test_ndindex(): + x = list(ndindex(1, 2, 3)) + expected = [ix for ix, e in ndenumerate(np.zeros((1, 2, 3)))] + assert_array_equal(x, expected) + + x = list(ndindex((1, 2, 3))) + assert_array_equal(x, expected) + + # Test use of scalars and tuples + x = list(ndindex((3,))) + assert_array_equal(x, list(ndindex(3))) + + # Make sure size argument is optional + x = list(ndindex()) + assert_equal(x, [()]) + + x = list(ndindex(())) + assert_equal(x, [()]) + + # Make sure 0-sized ndindex works correctly + x = list(ndindex(*[0])) + assert_equal(x, []) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_io.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_io.py new file mode 100644 index 0000000..5201b8e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_io.py @@ -0,0 +1,2670 @@ +import sys +import gc +import gzip +import os +import threading +import time +import warnings +import io +import re +import pytest +from pathlib import Path +from tempfile import NamedTemporaryFile +from io import BytesIO, StringIO +from datetime import datetime +import locale +from multiprocessing import Process, Value +from ctypes import c_bool + +import numpy as np +import numpy.ma as ma +from numpy.lib._iotools import ConverterError, ConversionWarning +from numpy.compat import asbytes +from numpy.ma.testutils import assert_equal +from numpy.testing import ( + assert_warns, assert_, assert_raises_regex, assert_raises, + assert_allclose, assert_array_equal, temppath, tempdir, IS_PYPY, + HAS_REFCOUNT, suppress_warnings, assert_no_gc_cycles, assert_no_warnings, + break_cycles + ) +from numpy.testing._private.utils import requires_memory + + +class TextIO(BytesIO): + """Helper IO class. + + Writes encode strings to bytes if needed, reads return bytes. + This makes it easier to emulate files opened in binary mode + without needing to explicitly convert strings to bytes in + setting up the test data. + + """ + def __init__(self, s=""): + BytesIO.__init__(self, asbytes(s)) + + def write(self, s): + BytesIO.write(self, asbytes(s)) + + def writelines(self, lines): + BytesIO.writelines(self, [asbytes(s) for s in lines]) + + +IS_64BIT = sys.maxsize > 2**32 +try: + import bz2 + HAS_BZ2 = True +except ImportError: + HAS_BZ2 = False +try: + import lzma + HAS_LZMA = True +except ImportError: + HAS_LZMA = False + + +def strptime(s, fmt=None): + """ + This function is available in the datetime module only from Python >= + 2.5. + + """ + if type(s) == bytes: + s = s.decode("latin1") + return datetime(*time.strptime(s, fmt)[:3]) + + +class RoundtripTest: + def roundtrip(self, save_func, *args, **kwargs): + """ + save_func : callable + Function used to save arrays to file. + file_on_disk : bool + If true, store the file on disk, instead of in a + string buffer. + save_kwds : dict + Parameters passed to `save_func`. + load_kwds : dict + Parameters passed to `numpy.load`. + args : tuple of arrays + Arrays stored to file. + + """ + save_kwds = kwargs.get('save_kwds', {}) + load_kwds = kwargs.get('load_kwds', {"allow_pickle": True}) + file_on_disk = kwargs.get('file_on_disk', False) + + if file_on_disk: + target_file = NamedTemporaryFile(delete=False) + load_file = target_file.name + else: + target_file = BytesIO() + load_file = target_file + + try: + arr = args + + save_func(target_file, *arr, **save_kwds) + target_file.flush() + target_file.seek(0) + + if sys.platform == 'win32' and not isinstance(target_file, BytesIO): + target_file.close() + + arr_reloaded = np.load(load_file, **load_kwds) + + self.arr = arr + self.arr_reloaded = arr_reloaded + finally: + if not isinstance(target_file, BytesIO): + target_file.close() + # holds an open file descriptor so it can't be deleted on win + if 'arr_reloaded' in locals(): + if not isinstance(arr_reloaded, np.lib.npyio.NpzFile): + os.remove(target_file.name) + + def check_roundtrips(self, a): + self.roundtrip(a) + self.roundtrip(a, file_on_disk=True) + self.roundtrip(np.asfortranarray(a)) + self.roundtrip(np.asfortranarray(a), file_on_disk=True) + if a.shape[0] > 1: + # neither C nor Fortran contiguous for 2D arrays or more + self.roundtrip(np.asfortranarray(a)[1:]) + self.roundtrip(np.asfortranarray(a)[1:], file_on_disk=True) + + def test_array(self): + a = np.array([], float) + self.check_roundtrips(a) + + a = np.array([[1, 2], [3, 4]], float) + self.check_roundtrips(a) + + a = np.array([[1, 2], [3, 4]], int) + self.check_roundtrips(a) + + a = np.array([[1 + 5j, 2 + 6j], [3 + 7j, 4 + 8j]], dtype=np.csingle) + self.check_roundtrips(a) + + a = np.array([[1 + 5j, 2 + 6j], [3 + 7j, 4 + 8j]], dtype=np.cdouble) + self.check_roundtrips(a) + + def test_array_object(self): + a = np.array([], object) + self.check_roundtrips(a) + + a = np.array([[1, 2], [3, 4]], object) + self.check_roundtrips(a) + + def test_1D(self): + a = np.array([1, 2, 3, 4], int) + self.roundtrip(a) + + @pytest.mark.skipif(sys.platform == 'win32', reason="Fails on Win32") + def test_mmap(self): + a = np.array([[1, 2.5], [4, 7.3]]) + self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'}) + + a = np.asfortranarray([[1, 2.5], [4, 7.3]]) + self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'}) + + def test_record(self): + a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) + self.check_roundtrips(a) + + @pytest.mark.slow + def test_format_2_0(self): + dt = [(("%d" % i) * 100, float) for i in range(500)] + a = np.ones(1000, dtype=dt) + with warnings.catch_warnings(record=True): + warnings.filterwarnings('always', '', UserWarning) + self.check_roundtrips(a) + + +class TestSaveLoad(RoundtripTest): + def roundtrip(self, *args, **kwargs): + RoundtripTest.roundtrip(self, np.save, *args, **kwargs) + assert_equal(self.arr[0], self.arr_reloaded) + assert_equal(self.arr[0].dtype, self.arr_reloaded.dtype) + assert_equal(self.arr[0].flags.fnc, self.arr_reloaded.flags.fnc) + + +class TestSavezLoad(RoundtripTest): + def roundtrip(self, *args, **kwargs): + RoundtripTest.roundtrip(self, np.savez, *args, **kwargs) + try: + for n, arr in enumerate(self.arr): + reloaded = self.arr_reloaded['arr_%d' % n] + assert_equal(arr, reloaded) + assert_equal(arr.dtype, reloaded.dtype) + assert_equal(arr.flags.fnc, reloaded.flags.fnc) + finally: + # delete tempfile, must be done here on windows + if self.arr_reloaded.fid: + self.arr_reloaded.fid.close() + os.remove(self.arr_reloaded.fid.name) + + @pytest.mark.skipif(not IS_64BIT, reason="Needs 64bit platform") + @pytest.mark.slow + def test_big_arrays(self): + L = (1 << 31) + 100000 + a = np.empty(L, dtype=np.uint8) + with temppath(prefix="numpy_test_big_arrays_", suffix=".npz") as tmp: + np.savez(tmp, a=a) + del a + npfile = np.load(tmp) + a = npfile['a'] # Should succeed + npfile.close() + del a # Avoid pyflakes unused variable warning. + + def test_multiple_arrays(self): + a = np.array([[1, 2], [3, 4]], float) + b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex) + self.roundtrip(a, b) + + def test_named_arrays(self): + a = np.array([[1, 2], [3, 4]], float) + b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex) + c = BytesIO() + np.savez(c, file_a=a, file_b=b) + c.seek(0) + l = np.load(c) + assert_equal(a, l['file_a']) + assert_equal(b, l['file_b']) + + def test_BagObj(self): + a = np.array([[1, 2], [3, 4]], float) + b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex) + c = BytesIO() + np.savez(c, file_a=a, file_b=b) + c.seek(0) + l = np.load(c) + assert_equal(sorted(dir(l.f)), ['file_a','file_b']) + assert_equal(a, l.f.file_a) + assert_equal(b, l.f.file_b) + + def test_savez_filename_clashes(self): + # Test that issue #852 is fixed + # and savez functions in multithreaded environment + + def writer(error_list): + with temppath(suffix='.npz') as tmp: + arr = np.random.randn(500, 500) + try: + np.savez(tmp, arr=arr) + except OSError as err: + error_list.append(err) + + errors = [] + threads = [threading.Thread(target=writer, args=(errors,)) + for j in range(3)] + for t in threads: + t.start() + for t in threads: + t.join() + + if errors: + raise AssertionError(errors) + + def test_not_closing_opened_fid(self): + # Test that issue #2178 is fixed: + # verify could seek on 'loaded' file + with temppath(suffix='.npz') as tmp: + with open(tmp, 'wb') as fp: + np.savez(fp, data='LOVELY LOAD') + with open(tmp, 'rb', 10000) as fp: + fp.seek(0) + assert_(not fp.closed) + np.load(fp)['data'] + # fp must not get closed by .load + assert_(not fp.closed) + fp.seek(0) + assert_(not fp.closed) + + @pytest.mark.slow_pypy + def test_closing_fid(self): + # Test that issue #1517 (too many opened files) remains closed + # It might be a "weak" test since failed to get triggered on + # e.g. Debian sid of 2012 Jul 05 but was reported to + # trigger the failure on Ubuntu 10.04: + # http://projects.scipy.org/numpy/ticket/1517#comment:2 + with temppath(suffix='.npz') as tmp: + np.savez(tmp, data='LOVELY LOAD') + # We need to check if the garbage collector can properly close + # numpy npz file returned by np.load when their reference count + # goes to zero. Python 3 running in debug mode raises a + # ResourceWarning when file closing is left to the garbage + # collector, so we catch the warnings. + with suppress_warnings() as sup: + sup.filter(ResourceWarning) # TODO: specify exact message + for i in range(1, 1025): + try: + np.load(tmp)["data"] + except Exception as e: + msg = "Failed to load data from a file: %s" % e + raise AssertionError(msg) + finally: + if IS_PYPY: + gc.collect() + + def test_closing_zipfile_after_load(self): + # Check that zipfile owns file and can close it. This needs to + # pass a file name to load for the test. On windows failure will + # cause a second error will be raised when the attempt to remove + # the open file is made. + prefix = 'numpy_test_closing_zipfile_after_load_' + with temppath(suffix='.npz', prefix=prefix) as tmp: + np.savez(tmp, lab='place holder') + data = np.load(tmp) + fp = data.zip.fp + data.close() + assert_(fp.closed) + + +class TestSaveTxt: + def test_array(self): + a = np.array([[1, 2], [3, 4]], float) + fmt = "%.18e" + c = BytesIO() + np.savetxt(c, a, fmt=fmt) + c.seek(0) + assert_equal(c.readlines(), + [asbytes((fmt + ' ' + fmt + '\n') % (1, 2)), + asbytes((fmt + ' ' + fmt + '\n') % (3, 4))]) + + a = np.array([[1, 2], [3, 4]], int) + c = BytesIO() + np.savetxt(c, a, fmt='%d') + c.seek(0) + assert_equal(c.readlines(), [b'1 2\n', b'3 4\n']) + + def test_1D(self): + a = np.array([1, 2, 3, 4], int) + c = BytesIO() + np.savetxt(c, a, fmt='%d') + c.seek(0) + lines = c.readlines() + assert_equal(lines, [b'1\n', b'2\n', b'3\n', b'4\n']) + + def test_0D_3D(self): + c = BytesIO() + assert_raises(ValueError, np.savetxt, c, np.array(1)) + assert_raises(ValueError, np.savetxt, c, np.array([[[1], [2]]])) + + def test_structured(self): + a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) + c = BytesIO() + np.savetxt(c, a, fmt='%d') + c.seek(0) + assert_equal(c.readlines(), [b'1 2\n', b'3 4\n']) + + def test_structured_padded(self): + # gh-13297 + a = np.array([(1, 2, 3),(4, 5, 6)], dtype=[ + ('foo', 'i4'), ('bar', 'i4'), ('baz', 'i4') + ]) + c = BytesIO() + np.savetxt(c, a[['foo', 'baz']], fmt='%d') + c.seek(0) + assert_equal(c.readlines(), [b'1 3\n', b'4 6\n']) + + def test_multifield_view(self): + a = np.ones(1, dtype=[('x', 'i4'), ('y', 'i4'), ('z', 'f4')]) + v = a[['x', 'z']] + with temppath(suffix='.npy') as path: + path = Path(path) + np.save(path, v) + data = np.load(path) + assert_array_equal(data, v) + + def test_delimiter(self): + a = np.array([[1., 2.], [3., 4.]]) + c = BytesIO() + np.savetxt(c, a, delimiter=',', fmt='%d') + c.seek(0) + assert_equal(c.readlines(), [b'1,2\n', b'3,4\n']) + + def test_format(self): + a = np.array([(1, 2), (3, 4)]) + c = BytesIO() + # Sequence of formats + np.savetxt(c, a, fmt=['%02d', '%3.1f']) + c.seek(0) + assert_equal(c.readlines(), [b'01 2.0\n', b'03 4.0\n']) + + # A single multiformat string + c = BytesIO() + np.savetxt(c, a, fmt='%02d : %3.1f') + c.seek(0) + lines = c.readlines() + assert_equal(lines, [b'01 : 2.0\n', b'03 : 4.0\n']) + + # Specify delimiter, should be overridden + c = BytesIO() + np.savetxt(c, a, fmt='%02d : %3.1f', delimiter=',') + c.seek(0) + lines = c.readlines() + assert_equal(lines, [b'01 : 2.0\n', b'03 : 4.0\n']) + + # Bad fmt, should raise a ValueError + c = BytesIO() + assert_raises(ValueError, np.savetxt, c, a, fmt=99) + + def test_header_footer(self): + # Test the functionality of the header and footer keyword argument. + + c = BytesIO() + a = np.array([(1, 2), (3, 4)], dtype=int) + test_header_footer = 'Test header / footer' + # Test the header keyword argument + np.savetxt(c, a, fmt='%1d', header=test_header_footer) + c.seek(0) + assert_equal(c.read(), + asbytes('# ' + test_header_footer + '\n1 2\n3 4\n')) + # Test the footer keyword argument + c = BytesIO() + np.savetxt(c, a, fmt='%1d', footer=test_header_footer) + c.seek(0) + assert_equal(c.read(), + asbytes('1 2\n3 4\n# ' + test_header_footer + '\n')) + # Test the commentstr keyword argument used on the header + c = BytesIO() + commentstr = '% ' + np.savetxt(c, a, fmt='%1d', + header=test_header_footer, comments=commentstr) + c.seek(0) + assert_equal(c.read(), + asbytes(commentstr + test_header_footer + '\n' + '1 2\n3 4\n')) + # Test the commentstr keyword argument used on the footer + c = BytesIO() + commentstr = '% ' + np.savetxt(c, a, fmt='%1d', + footer=test_header_footer, comments=commentstr) + c.seek(0) + assert_equal(c.read(), + asbytes('1 2\n3 4\n' + commentstr + test_header_footer + '\n')) + + def test_file_roundtrip(self): + with temppath() as name: + a = np.array([(1, 2), (3, 4)]) + np.savetxt(name, a) + b = np.loadtxt(name) + assert_array_equal(a, b) + + def test_complex_arrays(self): + ncols = 2 + nrows = 2 + a = np.zeros((ncols, nrows), dtype=np.complex128) + re = np.pi + im = np.e + a[:] = re + 1.0j * im + + # One format only + c = BytesIO() + np.savetxt(c, a, fmt=' %+.3e') + c.seek(0) + lines = c.readlines() + assert_equal( + lines, + [b' ( +3.142e+00+ +2.718e+00j) ( +3.142e+00+ +2.718e+00j)\n', + b' ( +3.142e+00+ +2.718e+00j) ( +3.142e+00+ +2.718e+00j)\n']) + + # One format for each real and imaginary part + c = BytesIO() + np.savetxt(c, a, fmt=' %+.3e' * 2 * ncols) + c.seek(0) + lines = c.readlines() + assert_equal( + lines, + [b' +3.142e+00 +2.718e+00 +3.142e+00 +2.718e+00\n', + b' +3.142e+00 +2.718e+00 +3.142e+00 +2.718e+00\n']) + + # One format for each complex number + c = BytesIO() + np.savetxt(c, a, fmt=['(%.3e%+.3ej)'] * ncols) + c.seek(0) + lines = c.readlines() + assert_equal( + lines, + [b'(3.142e+00+2.718e+00j) (3.142e+00+2.718e+00j)\n', + b'(3.142e+00+2.718e+00j) (3.142e+00+2.718e+00j)\n']) + + def test_complex_negative_exponent(self): + # Previous to 1.15, some formats generated x+-yj, gh 7895 + ncols = 2 + nrows = 2 + a = np.zeros((ncols, nrows), dtype=np.complex128) + re = np.pi + im = np.e + a[:] = re - 1.0j * im + c = BytesIO() + np.savetxt(c, a, fmt='%.3e') + c.seek(0) + lines = c.readlines() + assert_equal( + lines, + [b' (3.142e+00-2.718e+00j) (3.142e+00-2.718e+00j)\n', + b' (3.142e+00-2.718e+00j) (3.142e+00-2.718e+00j)\n']) + + + def test_custom_writer(self): + + class CustomWriter(list): + def write(self, text): + self.extend(text.split(b'\n')) + + w = CustomWriter() + a = np.array([(1, 2), (3, 4)]) + np.savetxt(w, a) + b = np.loadtxt(w) + assert_array_equal(a, b) + + def test_unicode(self): + utf8 = b'\xcf\x96'.decode('UTF-8') + a = np.array([utf8], dtype=np.unicode_) + with tempdir() as tmpdir: + # set encoding as on windows it may not be unicode even on py3 + np.savetxt(os.path.join(tmpdir, 'test.csv'), a, fmt=['%s'], + encoding='UTF-8') + + def test_unicode_roundtrip(self): + utf8 = b'\xcf\x96'.decode('UTF-8') + a = np.array([utf8], dtype=np.unicode_) + # our gz wrapper support encoding + suffixes = ['', '.gz'] + if HAS_BZ2: + suffixes.append('.bz2') + if HAS_LZMA: + suffixes.extend(['.xz', '.lzma']) + with tempdir() as tmpdir: + for suffix in suffixes: + np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a, + fmt=['%s'], encoding='UTF-16-LE') + b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix), + encoding='UTF-16-LE', dtype=np.unicode_) + assert_array_equal(a, b) + + def test_unicode_bytestream(self): + utf8 = b'\xcf\x96'.decode('UTF-8') + a = np.array([utf8], dtype=np.unicode_) + s = BytesIO() + np.savetxt(s, a, fmt=['%s'], encoding='UTF-8') + s.seek(0) + assert_equal(s.read().decode('UTF-8'), utf8 + '\n') + + def test_unicode_stringstream(self): + utf8 = b'\xcf\x96'.decode('UTF-8') + a = np.array([utf8], dtype=np.unicode_) + s = StringIO() + np.savetxt(s, a, fmt=['%s'], encoding='UTF-8') + s.seek(0) + assert_equal(s.read(), utf8 + '\n') + + @pytest.mark.parametrize("fmt", [u"%f", b"%f"]) + @pytest.mark.parametrize("iotype", [StringIO, BytesIO]) + def test_unicode_and_bytes_fmt(self, fmt, iotype): + # string type of fmt should not matter, see also gh-4053 + a = np.array([1.]) + s = iotype() + np.savetxt(s, a, fmt=fmt) + s.seek(0) + if iotype is StringIO: + assert_equal(s.read(), u"%f\n" % 1.) + else: + assert_equal(s.read(), b"%f\n" % 1.) + + @pytest.mark.skipif(sys.platform=='win32', reason="files>4GB may not work") + @pytest.mark.slow + @requires_memory(free_bytes=7e9) + def test_large_zip(self): + def check_large_zip(memoryerror_raised): + memoryerror_raised.value = False + try: + # The test takes at least 6GB of memory, writes a file larger + # than 4GB. This tests the ``allowZip64`` kwarg to ``zipfile`` + test_data = np.asarray([np.random.rand( + np.random.randint(50,100),4) + for i in range(800000)], dtype=object) + with tempdir() as tmpdir: + np.savez(os.path.join(tmpdir, 'test.npz'), + test_data=test_data) + except MemoryError: + memoryerror_raised.value = True + raise + # run in a subprocess to ensure memory is released on PyPy, see gh-15775 + # Use an object in shared memory to re-raise the MemoryError exception + # in our process if needed, see gh-16889 + memoryerror_raised = Value(c_bool) + p = Process(target=check_large_zip, args=(memoryerror_raised,)) + p.start() + p.join() + if memoryerror_raised.value: + raise MemoryError("Child process raised a MemoryError exception") + # -9 indicates a SIGKILL, probably an OOM. + if p.exitcode == -9: + pytest.xfail("subprocess got a SIGKILL, apparently free memory was not sufficient") + assert p.exitcode == 0 + +class LoadTxtBase: + def check_compressed(self, fopen, suffixes): + # Test that we can load data from a compressed file + wanted = np.arange(6).reshape((2, 3)) + linesep = ('\n', '\r\n', '\r') + for sep in linesep: + data = '0 1 2' + sep + '3 4 5' + for suffix in suffixes: + with temppath(suffix=suffix) as name: + with fopen(name, mode='wt', encoding='UTF-32-LE') as f: + f.write(data) + res = self.loadfunc(name, encoding='UTF-32-LE') + assert_array_equal(res, wanted) + with fopen(name, "rt", encoding='UTF-32-LE') as f: + res = self.loadfunc(f) + assert_array_equal(res, wanted) + + def test_compressed_gzip(self): + self.check_compressed(gzip.open, ('.gz',)) + + @pytest.mark.skipif(not HAS_BZ2, reason="Needs bz2") + def test_compressed_bz2(self): + self.check_compressed(bz2.open, ('.bz2',)) + + @pytest.mark.skipif(not HAS_LZMA, reason="Needs lzma") + def test_compressed_lzma(self): + self.check_compressed(lzma.open, ('.xz', '.lzma')) + + def test_encoding(self): + with temppath() as path: + with open(path, "wb") as f: + f.write('0.\n1.\n2.'.encode("UTF-16")) + x = self.loadfunc(path, encoding="UTF-16") + assert_array_equal(x, [0., 1., 2.]) + + def test_stringload(self): + # umlaute + nonascii = b'\xc3\xb6\xc3\xbc\xc3\xb6'.decode("UTF-8") + with temppath() as path: + with open(path, "wb") as f: + f.write(nonascii.encode("UTF-16")) + x = self.loadfunc(path, encoding="UTF-16", dtype=np.unicode_) + assert_array_equal(x, nonascii) + + def test_binary_decode(self): + utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04' + v = self.loadfunc(BytesIO(utf16), dtype=np.unicode_, encoding='UTF-16') + assert_array_equal(v, np.array(utf16.decode('UTF-16').split())) + + def test_converters_decode(self): + # test converters that decode strings + c = TextIO() + c.write(b'\xcf\x96') + c.seek(0) + x = self.loadfunc(c, dtype=np.unicode_, + converters={0: lambda x: x.decode('UTF-8')}) + a = np.array([b'\xcf\x96'.decode('UTF-8')]) + assert_array_equal(x, a) + + def test_converters_nodecode(self): + # test native string converters enabled by setting an encoding + utf8 = b'\xcf\x96'.decode('UTF-8') + with temppath() as path: + with io.open(path, 'wt', encoding='UTF-8') as f: + f.write(utf8) + x = self.loadfunc(path, dtype=np.unicode_, + converters={0: lambda x: x + 't'}, + encoding='UTF-8') + a = np.array([utf8 + 't']) + assert_array_equal(x, a) + + +class TestLoadTxt(LoadTxtBase): + loadfunc = staticmethod(np.loadtxt) + + def setup(self): + # lower chunksize for testing + self.orig_chunk = np.lib.npyio._loadtxt_chunksize + np.lib.npyio._loadtxt_chunksize = 1 + def teardown(self): + np.lib.npyio._loadtxt_chunksize = self.orig_chunk + + def test_record(self): + c = TextIO() + c.write('1 2\n3 4') + c.seek(0) + x = np.loadtxt(c, dtype=[('x', np.int32), ('y', np.int32)]) + a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) + assert_array_equal(x, a) + + d = TextIO() + d.write('M 64.0 75.0\nF 25.0 60.0') + d.seek(0) + mydescriptor = {'names': ('gender', 'age', 'weight'), + 'formats': ('S1', 'i4', 'f4')} + b = np.array([('M', 64.0, 75.0), + ('F', 25.0, 60.0)], dtype=mydescriptor) + y = np.loadtxt(d, dtype=mydescriptor) + assert_array_equal(y, b) + + def test_array(self): + c = TextIO() + c.write('1 2\n3 4') + + c.seek(0) + x = np.loadtxt(c, dtype=int) + a = np.array([[1, 2], [3, 4]], int) + assert_array_equal(x, a) + + c.seek(0) + x = np.loadtxt(c, dtype=float) + a = np.array([[1, 2], [3, 4]], float) + assert_array_equal(x, a) + + def test_1D(self): + c = TextIO() + c.write('1\n2\n3\n4\n') + c.seek(0) + x = np.loadtxt(c, dtype=int) + a = np.array([1, 2, 3, 4], int) + assert_array_equal(x, a) + + c = TextIO() + c.write('1,2,3,4\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',') + a = np.array([1, 2, 3, 4], int) + assert_array_equal(x, a) + + def test_missing(self): + c = TextIO() + c.write('1,2,3,,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + converters={3: lambda s: int(s or - 999)}) + a = np.array([1, 2, 3, -999, 5], int) + assert_array_equal(x, a) + + def test_converters_with_usecols(self): + c = TextIO() + c.write('1,2,3,,5\n6,7,8,9,10\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + converters={3: lambda s: int(s or - 999)}, + usecols=(1, 3,)) + a = np.array([[2, -999], [7, 9]], int) + assert_array_equal(x, a) + + def test_comments_unicode(self): + c = TextIO() + c.write('# comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments=u'#') + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + def test_comments_byte(self): + c = TextIO() + c.write('# comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments=b'#') + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + def test_comments_multiple(self): + c = TextIO() + c.write('# comment\n1,2,3\n@ comment2\n4,5,6 // comment3') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments=['#', '@', '//']) + a = np.array([[1, 2, 3], [4, 5, 6]], int) + assert_array_equal(x, a) + + def test_comments_multi_chars(self): + c = TextIO() + c.write('/* comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments='/*') + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + # Check that '/*' is not transformed to ['/', '*'] + c = TextIO() + c.write('*/ comment\n1,2,3,5\n') + c.seek(0) + assert_raises(ValueError, np.loadtxt, c, dtype=int, delimiter=',', + comments='/*') + + def test_skiprows(self): + c = TextIO() + c.write('comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + skiprows=1) + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + c = TextIO() + c.write('# comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + skiprows=1) + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + def test_usecols(self): + a = np.array([[1, 2], [3, 4]], float) + c = BytesIO() + np.savetxt(c, a) + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=(1,)) + assert_array_equal(x, a[:, 1]) + + a = np.array([[1, 2, 3], [3, 4, 5]], float) + c = BytesIO() + np.savetxt(c, a) + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=(1, 2)) + assert_array_equal(x, a[:, 1:]) + + # Testing with arrays instead of tuples. + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=np.array([1, 2])) + assert_array_equal(x, a[:, 1:]) + + # Testing with an integer instead of a sequence + for int_type in [int, np.int8, np.int16, + np.int32, np.int64, np.uint8, np.uint16, + np.uint32, np.uint64]: + to_read = int_type(1) + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=to_read) + assert_array_equal(x, a[:, 1]) + + # Testing with some crazy custom integer type + class CrazyInt: + def __index__(self): + return 1 + + crazy_int = CrazyInt() + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=crazy_int) + assert_array_equal(x, a[:, 1]) + + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=(crazy_int,)) + assert_array_equal(x, a[:, 1]) + + # Checking with dtypes defined converters. + data = '''JOE 70.1 25.3 + BOB 60.5 27.9 + ''' + c = TextIO(data) + names = ['stid', 'temp'] + dtypes = ['S4', 'f8'] + arr = np.loadtxt(c, usecols=(0, 2), dtype=list(zip(names, dtypes))) + assert_equal(arr['stid'], [b"JOE", b"BOB"]) + assert_equal(arr['temp'], [25.3, 27.9]) + + # Testing non-ints in usecols + c.seek(0) + bogus_idx = 1.5 + assert_raises_regex( + TypeError, + '^usecols must be.*%s' % type(bogus_idx), + np.loadtxt, c, usecols=bogus_idx + ) + + assert_raises_regex( + TypeError, + '^usecols must be.*%s' % type(bogus_idx), + np.loadtxt, c, usecols=[0, bogus_idx, 0] + ) + + def test_fancy_dtype(self): + c = TextIO() + c.write('1,2,3.0\n4,5,6.0\n') + c.seek(0) + dt = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) + x = np.loadtxt(c, dtype=dt, delimiter=',') + a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dt) + assert_array_equal(x, a) + + def test_shaped_dtype(self): + c = TextIO("aaaa 1.0 8.0 1 2 3 4 5 6") + dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), + ('block', int, (2, 3))]) + x = np.loadtxt(c, dtype=dt) + a = np.array([('aaaa', 1.0, 8.0, [[1, 2, 3], [4, 5, 6]])], + dtype=dt) + assert_array_equal(x, a) + + def test_3d_shaped_dtype(self): + c = TextIO("aaaa 1.0 8.0 1 2 3 4 5 6 7 8 9 10 11 12") + dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), + ('block', int, (2, 2, 3))]) + x = np.loadtxt(c, dtype=dt) + a = np.array([('aaaa', 1.0, 8.0, + [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])], + dtype=dt) + assert_array_equal(x, a) + + def test_str_dtype(self): + # see gh-8033 + c = ["str1", "str2"] + + for dt in (str, np.bytes_): + a = np.array(["str1", "str2"], dtype=dt) + x = np.loadtxt(c, dtype=dt) + assert_array_equal(x, a) + + def test_empty_file(self): + with suppress_warnings() as sup: + sup.filter(message="loadtxt: Empty input file:") + c = TextIO() + x = np.loadtxt(c) + assert_equal(x.shape, (0,)) + x = np.loadtxt(c, dtype=np.int64) + assert_equal(x.shape, (0,)) + assert_(x.dtype == np.int64) + + def test_unused_converter(self): + c = TextIO() + c.writelines(['1 21\n', '3 42\n']) + c.seek(0) + data = np.loadtxt(c, usecols=(1,), + converters={0: lambda s: int(s, 16)}) + assert_array_equal(data, [21, 42]) + + c.seek(0) + data = np.loadtxt(c, usecols=(1,), + converters={1: lambda s: int(s, 16)}) + assert_array_equal(data, [33, 66]) + + def test_dtype_with_object(self): + # Test using an explicit dtype with an object + data = """ 1; 2001-01-01 + 2; 2002-01-31 """ + ndtype = [('idx', int), ('code', object)] + func = lambda s: strptime(s.strip(), "%Y-%m-%d") + converters = {1: func} + test = np.loadtxt(TextIO(data), delimiter=";", dtype=ndtype, + converters=converters) + control = np.array( + [(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))], + dtype=ndtype) + assert_equal(test, control) + + def test_uint64_type(self): + tgt = (9223372043271415339, 9223372043271415853) + c = TextIO() + c.write("%s %s" % tgt) + c.seek(0) + res = np.loadtxt(c, dtype=np.uint64) + assert_equal(res, tgt) + + def test_int64_type(self): + tgt = (-9223372036854775807, 9223372036854775807) + c = TextIO() + c.write("%s %s" % tgt) + c.seek(0) + res = np.loadtxt(c, dtype=np.int64) + assert_equal(res, tgt) + + def test_from_float_hex(self): + # IEEE doubles and floats only, otherwise the float32 + # conversion may fail. + tgt = np.logspace(-10, 10, 5).astype(np.float32) + tgt = np.hstack((tgt, -tgt)).astype(float) + inp = '\n'.join(map(float.hex, tgt)) + c = TextIO() + c.write(inp) + for dt in [float, np.float32]: + c.seek(0) + res = np.loadtxt(c, dtype=dt) + assert_equal(res, tgt, err_msg="%s" % dt) + + def test_default_float_converter_no_default_hex_conversion(self): + """ + Ensure that fromhex is only used for values with the correct prefix and + is not called by default. Regression test related to gh-19598. + """ + c = TextIO("a b c") + with pytest.raises( + ValueError, match="could not convert string to float" + ): + np.loadtxt(c) + + def test_default_float_converter_exception(self): + """ + Ensure that the exception message raised during failed floating point + conversion is correct. Regression test related to gh-19598. + """ + c = TextIO("qrs tuv") # Invalid values for default float converter + with pytest.raises( + ValueError, match="could not convert string to float" + ): + np.loadtxt(c) + + def test_from_complex(self): + tgt = (complex(1, 1), complex(1, -1)) + c = TextIO() + c.write("%s %s" % tgt) + c.seek(0) + res = np.loadtxt(c, dtype=complex) + assert_equal(res, tgt) + + def test_complex_misformatted(self): + # test for backward compatibility + # some complex formats used to generate x+-yj + a = np.zeros((2, 2), dtype=np.complex128) + re = np.pi + im = np.e + a[:] = re - 1.0j * im + c = BytesIO() + np.savetxt(c, a, fmt='%.16e') + c.seek(0) + txt = c.read() + c.seek(0) + # misformat the sign on the imaginary part, gh 7895 + txt_bad = txt.replace(b'e+00-', b'e00+-') + assert_(txt_bad != txt) + c.write(txt_bad) + c.seek(0) + res = np.loadtxt(c, dtype=complex) + assert_equal(res, a) + + def test_universal_newline(self): + with temppath() as name: + with open(name, 'w') as f: + f.write('1 21\r3 42\r') + data = np.loadtxt(name) + assert_array_equal(data, [[1, 21], [3, 42]]) + + def test_empty_field_after_tab(self): + c = TextIO() + c.write('1 \t2 \t3\tstart \n4\t5\t6\t \n7\t8\t9.5\t') + c.seek(0) + dt = {'names': ('x', 'y', 'z', 'comment'), + 'formats': (' num rows + c = TextIO() + c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + skiprows=1, max_rows=6) + a = np.array([[1, 2, 3, 5], [4, 5, 7, 8], [2, 1, 4, 5]], int) + assert_array_equal(x, a) + + +class Testfromregex: + def test_record(self): + c = TextIO() + c.write('1.312 foo\n1.534 bar\n4.444 qux') + c.seek(0) + + dt = [('num', np.float64), ('val', 'S3')] + x = np.fromregex(c, r"([0-9.]+)\s+(...)", dt) + a = np.array([(1.312, 'foo'), (1.534, 'bar'), (4.444, 'qux')], + dtype=dt) + assert_array_equal(x, a) + + def test_record_2(self): + c = TextIO() + c.write('1312 foo\n1534 bar\n4444 qux') + c.seek(0) + + dt = [('num', np.int32), ('val', 'S3')] + x = np.fromregex(c, r"(\d+)\s+(...)", dt) + a = np.array([(1312, 'foo'), (1534, 'bar'), (4444, 'qux')], + dtype=dt) + assert_array_equal(x, a) + + def test_record_3(self): + c = TextIO() + c.write('1312 foo\n1534 bar\n4444 qux') + c.seek(0) + + dt = [('num', np.float64)] + x = np.fromregex(c, r"(\d+)\s+...", dt) + a = np.array([(1312,), (1534,), (4444,)], dtype=dt) + assert_array_equal(x, a) + + @pytest.mark.parametrize("path_type", [str, Path]) + def test_record_unicode(self, path_type): + utf8 = b'\xcf\x96' + with temppath() as str_path: + path = path_type(str_path) + with open(path, 'wb') as f: + f.write(b'1.312 foo' + utf8 + b' \n1.534 bar\n4.444 qux') + + dt = [('num', np.float64), ('val', 'U4')] + x = np.fromregex(path, r"(?u)([0-9.]+)\s+(\w+)", dt, encoding='UTF-8') + a = np.array([(1.312, 'foo' + utf8.decode('UTF-8')), (1.534, 'bar'), + (4.444, 'qux')], dtype=dt) + assert_array_equal(x, a) + + regexp = re.compile(r"([0-9.]+)\s+(\w+)", re.UNICODE) + x = np.fromregex(path, regexp, dt, encoding='UTF-8') + assert_array_equal(x, a) + + def test_compiled_bytes(self): + regexp = re.compile(b'(\\d)') + c = BytesIO(b'123') + dt = [('num', np.float64)] + a = np.array([1, 2, 3], dtype=dt) + x = np.fromregex(c, regexp, dt) + assert_array_equal(x, a) + + def test_bad_dtype_not_structured(self): + regexp = re.compile(b'(\\d)') + c = BytesIO(b'123') + with pytest.raises(TypeError, match='structured datatype'): + np.fromregex(c, regexp, dtype=np.float64) + + +#####-------------------------------------------------------------------------- + + +class TestFromTxt(LoadTxtBase): + loadfunc = staticmethod(np.genfromtxt) + + def test_record(self): + # Test w/ explicit dtype + data = TextIO('1 2\n3 4') + test = np.genfromtxt(data, dtype=[('x', np.int32), ('y', np.int32)]) + control = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) + assert_equal(test, control) + # + data = TextIO('M 64.0 75.0\nF 25.0 60.0') + descriptor = {'names': ('gender', 'age', 'weight'), + 'formats': ('S1', 'i4', 'f4')} + control = np.array([('M', 64.0, 75.0), ('F', 25.0, 60.0)], + dtype=descriptor) + test = np.genfromtxt(data, dtype=descriptor) + assert_equal(test, control) + + def test_array(self): + # Test outputting a standard ndarray + data = TextIO('1 2\n3 4') + control = np.array([[1, 2], [3, 4]], dtype=int) + test = np.genfromtxt(data, dtype=int) + assert_array_equal(test, control) + # + data.seek(0) + control = np.array([[1, 2], [3, 4]], dtype=float) + test = np.loadtxt(data, dtype=float) + assert_array_equal(test, control) + + def test_1D(self): + # Test squeezing to 1D + control = np.array([1, 2, 3, 4], int) + # + data = TextIO('1\n2\n3\n4\n') + test = np.genfromtxt(data, dtype=int) + assert_array_equal(test, control) + # + data = TextIO('1,2,3,4\n') + test = np.genfromtxt(data, dtype=int, delimiter=',') + assert_array_equal(test, control) + + def test_comments(self): + # Test the stripping of comments + control = np.array([1, 2, 3, 5], int) + # Comment on its own line + data = TextIO('# comment\n1,2,3,5\n') + test = np.genfromtxt(data, dtype=int, delimiter=',', comments='#') + assert_equal(test, control) + # Comment at the end of a line + data = TextIO('1,2,3,5# comment\n') + test = np.genfromtxt(data, dtype=int, delimiter=',', comments='#') + assert_equal(test, control) + + def test_skiprows(self): + # Test row skipping + control = np.array([1, 2, 3, 5], int) + kwargs = dict(dtype=int, delimiter=',') + # + data = TextIO('comment\n1,2,3,5\n') + test = np.genfromtxt(data, skip_header=1, **kwargs) + assert_equal(test, control) + # + data = TextIO('# comment\n1,2,3,5\n') + test = np.loadtxt(data, skiprows=1, **kwargs) + assert_equal(test, control) + + def test_skip_footer(self): + data = ["# %i" % i for i in range(1, 6)] + data.append("A, B, C") + data.extend(["%i,%3.1f,%03s" % (i, i, i) for i in range(51)]) + data[-1] = "99,99" + kwargs = dict(delimiter=",", names=True, skip_header=5, skip_footer=10) + test = np.genfromtxt(TextIO("\n".join(data)), **kwargs) + ctrl = np.array([("%f" % i, "%f" % i, "%f" % i) for i in range(41)], + dtype=[(_, float) for _ in "ABC"]) + assert_equal(test, ctrl) + + def test_skip_footer_with_invalid(self): + with suppress_warnings() as sup: + sup.filter(ConversionWarning) + basestr = '1 1\n2 2\n3 3\n4 4\n5 \n6 \n7 \n' + # Footer too small to get rid of all invalid values + assert_raises(ValueError, np.genfromtxt, + TextIO(basestr), skip_footer=1) + # except ValueError: + # pass + a = np.genfromtxt( + TextIO(basestr), skip_footer=1, invalid_raise=False) + assert_equal(a, np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]])) + # + a = np.genfromtxt(TextIO(basestr), skip_footer=3) + assert_equal(a, np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]])) + # + basestr = '1 1\n2 \n3 3\n4 4\n5 \n6 6\n7 7\n' + a = np.genfromtxt( + TextIO(basestr), skip_footer=1, invalid_raise=False) + assert_equal(a, np.array([[1., 1.], [3., 3.], [4., 4.], [6., 6.]])) + a = np.genfromtxt( + TextIO(basestr), skip_footer=3, invalid_raise=False) + assert_equal(a, np.array([[1., 1.], [3., 3.], [4., 4.]])) + + def test_header(self): + # Test retrieving a header + data = TextIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0') + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(data, dtype=None, names=True) + assert_(w[0].category is np.VisibleDeprecationWarning) + control = {'gender': np.array([b'M', b'F']), + 'age': np.array([64.0, 25.0]), + 'weight': np.array([75.0, 60.0])} + assert_equal(test['gender'], control['gender']) + assert_equal(test['age'], control['age']) + assert_equal(test['weight'], control['weight']) + + def test_auto_dtype(self): + # Test the automatic definition of the output dtype + data = TextIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False') + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(data, dtype=None) + assert_(w[0].category is np.VisibleDeprecationWarning) + control = [np.array([b'A', b'BCD']), + np.array([64, 25]), + np.array([75.0, 60.0]), + np.array([3 + 4j, 5 + 6j]), + np.array([True, False]), ] + assert_equal(test.dtype.names, ['f0', 'f1', 'f2', 'f3', 'f4']) + for (i, ctrl) in enumerate(control): + assert_equal(test['f%i' % i], ctrl) + + def test_auto_dtype_uniform(self): + # Tests whether the output dtype can be uniformized + data = TextIO('1 2 3 4\n5 6 7 8\n') + test = np.genfromtxt(data, dtype=None) + control = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) + assert_equal(test, control) + + def test_fancy_dtype(self): + # Check that a nested dtype isn't MIA + data = TextIO('1,2,3.0\n4,5,6.0\n') + fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) + test = np.genfromtxt(data, dtype=fancydtype, delimiter=',') + control = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=fancydtype) + assert_equal(test, control) + + def test_names_overwrite(self): + # Test overwriting the names of the dtype + descriptor = {'names': ('g', 'a', 'w'), + 'formats': ('S1', 'i4', 'f4')} + data = TextIO(b'M 64.0 75.0\nF 25.0 60.0') + names = ('gender', 'age', 'weight') + test = np.genfromtxt(data, dtype=descriptor, names=names) + descriptor['names'] = names + control = np.array([('M', 64.0, 75.0), + ('F', 25.0, 60.0)], dtype=descriptor) + assert_equal(test, control) + + def test_commented_header(self): + # Check that names can be retrieved even if the line is commented out. + data = TextIO(""" +#gender age weight +M 21 72.100000 +F 35 58.330000 +M 33 21.99 + """) + # The # is part of the first name and should be deleted automatically. + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(data, names=True, dtype=None) + assert_(w[0].category is np.VisibleDeprecationWarning) + ctrl = np.array([('M', 21, 72.1), ('F', 35, 58.33), ('M', 33, 21.99)], + dtype=[('gender', '|S1'), ('age', int), ('weight', float)]) + assert_equal(test, ctrl) + # Ditto, but we should get rid of the first element + data = TextIO(b""" +# gender age weight +M 21 72.100000 +F 35 58.330000 +M 33 21.99 + """) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(data, names=True, dtype=None) + assert_(w[0].category is np.VisibleDeprecationWarning) + assert_equal(test, ctrl) + + def test_names_and_comments_none(self): + # Tests case when names is true but comments is None (gh-10780) + data = TextIO('col1 col2\n 1 2\n 3 4') + test = np.genfromtxt(data, dtype=(int, int), comments=None, names=True) + control = np.array([(1, 2), (3, 4)], dtype=[('col1', int), ('col2', int)]) + assert_equal(test, control) + + def test_file_is_closed_on_error(self): + # gh-13200 + with tempdir() as tmpdir: + fpath = os.path.join(tmpdir, "test.csv") + with open(fpath, "wb") as f: + f.write(u'\N{GREEK PI SYMBOL}'.encode('utf8')) + + # ResourceWarnings are emitted from a destructor, so won't be + # detected by regular propagation to errors. + with assert_no_warnings(): + with pytest.raises(UnicodeDecodeError): + np.genfromtxt(fpath, encoding="ascii") + + def test_autonames_and_usecols(self): + # Tests names and usecols + data = TextIO('A B C D\n aaaa 121 45 9.1') + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(data, usecols=('A', 'C', 'D'), + names=True, dtype=None) + assert_(w[0].category is np.VisibleDeprecationWarning) + control = np.array(('aaaa', 45, 9.1), + dtype=[('A', '|S4'), ('C', int), ('D', float)]) + assert_equal(test, control) + + def test_converters_with_usecols(self): + # Test the combination user-defined converters and usecol + data = TextIO('1,2,3,,5\n6,7,8,9,10\n') + test = np.genfromtxt(data, dtype=int, delimiter=',', + converters={3: lambda s: int(s or - 999)}, + usecols=(1, 3,)) + control = np.array([[2, -999], [7, 9]], int) + assert_equal(test, control) + + def test_converters_with_usecols_and_names(self): + # Tests names and usecols + data = TextIO('A B C D\n aaaa 121 45 9.1') + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(data, usecols=('A', 'C', 'D'), names=True, + dtype=None, + converters={'C': lambda s: 2 * int(s)}) + assert_(w[0].category is np.VisibleDeprecationWarning) + control = np.array(('aaaa', 90, 9.1), + dtype=[('A', '|S4'), ('C', int), ('D', float)]) + assert_equal(test, control) + + def test_converters_cornercases(self): + # Test the conversion to datetime. + converter = { + 'date': lambda s: strptime(s, '%Y-%m-%d %H:%M:%SZ')} + data = TextIO('2009-02-03 12:00:00Z, 72214.0') + test = np.genfromtxt(data, delimiter=',', dtype=None, + names=['date', 'stid'], converters=converter) + control = np.array((datetime(2009, 2, 3), 72214.), + dtype=[('date', np.object_), ('stid', float)]) + assert_equal(test, control) + + def test_converters_cornercases2(self): + # Test the conversion to datetime64. + converter = { + 'date': lambda s: np.datetime64(strptime(s, '%Y-%m-%d %H:%M:%SZ'))} + data = TextIO('2009-02-03 12:00:00Z, 72214.0') + test = np.genfromtxt(data, delimiter=',', dtype=None, + names=['date', 'stid'], converters=converter) + control = np.array((datetime(2009, 2, 3), 72214.), + dtype=[('date', 'datetime64[us]'), ('stid', float)]) + assert_equal(test, control) + + def test_unused_converter(self): + # Test whether unused converters are forgotten + data = TextIO("1 21\n 3 42\n") + test = np.genfromtxt(data, usecols=(1,), + converters={0: lambda s: int(s, 16)}) + assert_equal(test, [21, 42]) + # + data.seek(0) + test = np.genfromtxt(data, usecols=(1,), + converters={1: lambda s: int(s, 16)}) + assert_equal(test, [33, 66]) + + def test_invalid_converter(self): + strip_rand = lambda x: float((b'r' in x.lower() and x.split()[-1]) or + (b'r' not in x.lower() and x.strip() or 0.0)) + strip_per = lambda x: float((b'%' in x.lower() and x.split()[0]) or + (b'%' not in x.lower() and x.strip() or 0.0)) + s = TextIO("D01N01,10/1/2003 ,1 %,R 75,400,600\r\n" + "L24U05,12/5/2003, 2 %,1,300, 150.5\r\n" + "D02N03,10/10/2004,R 1,,7,145.55") + kwargs = dict( + converters={2: strip_per, 3: strip_rand}, delimiter=",", + dtype=None) + assert_raises(ConverterError, np.genfromtxt, s, **kwargs) + + def test_tricky_converter_bug1666(self): + # Test some corner cases + s = TextIO('q1,2\nq3,4') + cnv = lambda s: float(s[1:]) + test = np.genfromtxt(s, delimiter=',', converters={0: cnv}) + control = np.array([[1., 2.], [3., 4.]]) + assert_equal(test, control) + + def test_dtype_with_converters(self): + dstr = "2009; 23; 46" + test = np.genfromtxt(TextIO(dstr,), + delimiter=";", dtype=float, converters={0: bytes}) + control = np.array([('2009', 23., 46)], + dtype=[('f0', '|S4'), ('f1', float), ('f2', float)]) + assert_equal(test, control) + test = np.genfromtxt(TextIO(dstr,), + delimiter=";", dtype=float, converters={0: float}) + control = np.array([2009., 23., 46],) + assert_equal(test, control) + + def test_dtype_with_converters_and_usecols(self): + dstr = "1,5,-1,1:1\n2,8,-1,1:n\n3,3,-2,m:n\n" + dmap = {'1:1':0, '1:n':1, 'm:1':2, 'm:n':3} + dtyp = [('e1','i4'),('e2','i4'),('e3','i2'),('n', 'i1')] + conv = {0: int, 1: int, 2: int, 3: lambda r: dmap[r.decode()]} + test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',', + names=None, converters=conv) + control = np.rec.array([(1,5,-1,0), (2,8,-1,1), (3,3,-2,3)], dtype=dtyp) + assert_equal(test, control) + dtyp = [('e1','i4'),('e2','i4'),('n', 'i1')] + test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',', + usecols=(0,1,3), names=None, converters=conv) + control = np.rec.array([(1,5,0), (2,8,1), (3,3,3)], dtype=dtyp) + assert_equal(test, control) + + def test_dtype_with_object(self): + # Test using an explicit dtype with an object + data = """ 1; 2001-01-01 + 2; 2002-01-31 """ + ndtype = [('idx', int), ('code', object)] + func = lambda s: strptime(s.strip(), "%Y-%m-%d") + converters = {1: func} + test = np.genfromtxt(TextIO(data), delimiter=";", dtype=ndtype, + converters=converters) + control = np.array( + [(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))], + dtype=ndtype) + assert_equal(test, control) + + ndtype = [('nest', [('idx', int), ('code', object)])] + with assert_raises_regex(NotImplementedError, + 'Nested fields.* not supported.*'): + test = np.genfromtxt(TextIO(data), delimiter=";", + dtype=ndtype, converters=converters) + + # nested but empty fields also aren't supported + ndtype = [('idx', int), ('code', object), ('nest', [])] + with assert_raises_regex(NotImplementedError, + 'Nested fields.* not supported.*'): + test = np.genfromtxt(TextIO(data), delimiter=";", + dtype=ndtype, converters=converters) + + def test_dtype_with_object_no_converter(self): + # Object without a converter uses bytes: + parsed = np.genfromtxt(TextIO("1"), dtype=object) + assert parsed[()] == b"1" + parsed = np.genfromtxt(TextIO("string"), dtype=object) + assert parsed[()] == b"string" + + def test_userconverters_with_explicit_dtype(self): + # Test user_converters w/ explicit (standard) dtype + data = TextIO('skip,skip,2001-01-01,1.0,skip') + test = np.genfromtxt(data, delimiter=",", names=None, dtype=float, + usecols=(2, 3), converters={2: bytes}) + control = np.array([('2001-01-01', 1.)], + dtype=[('', '|S10'), ('', float)]) + assert_equal(test, control) + + def test_utf8_userconverters_with_explicit_dtype(self): + utf8 = b'\xcf\x96' + with temppath() as path: + with open(path, 'wb') as f: + f.write(b'skip,skip,2001-01-01' + utf8 + b',1.0,skip') + test = np.genfromtxt(path, delimiter=",", names=None, dtype=float, + usecols=(2, 3), converters={2: np.compat.unicode}, + encoding='UTF-8') + control = np.array([('2001-01-01' + utf8.decode('UTF-8'), 1.)], + dtype=[('', '|U11'), ('', float)]) + assert_equal(test, control) + + def test_spacedelimiter(self): + # Test space delimiter + data = TextIO("1 2 3 4 5\n6 7 8 9 10") + test = np.genfromtxt(data) + control = np.array([[1., 2., 3., 4., 5.], + [6., 7., 8., 9., 10.]]) + assert_equal(test, control) + + def test_integer_delimiter(self): + # Test using an integer for delimiter + data = " 1 2 3\n 4 5 67\n890123 4" + test = np.genfromtxt(TextIO(data), delimiter=3) + control = np.array([[1, 2, 3], [4, 5, 67], [890, 123, 4]]) + assert_equal(test, control) + + def test_missing(self): + data = TextIO('1,2,3,,5\n') + test = np.genfromtxt(data, dtype=int, delimiter=',', + converters={3: lambda s: int(s or - 999)}) + control = np.array([1, 2, 3, -999, 5], int) + assert_equal(test, control) + + def test_missing_with_tabs(self): + # Test w/ a delimiter tab + txt = "1\t2\t3\n\t2\t\n1\t\t3" + test = np.genfromtxt(TextIO(txt), delimiter="\t", + usemask=True,) + ctrl_d = np.array([(1, 2, 3), (np.nan, 2, np.nan), (1, np.nan, 3)],) + ctrl_m = np.array([(0, 0, 0), (1, 0, 1), (0, 1, 0)], dtype=bool) + assert_equal(test.data, ctrl_d) + assert_equal(test.mask, ctrl_m) + + def test_usecols(self): + # Test the selection of columns + # Select 1 column + control = np.array([[1, 2], [3, 4]], float) + data = TextIO() + np.savetxt(data, control) + data.seek(0) + test = np.genfromtxt(data, dtype=float, usecols=(1,)) + assert_equal(test, control[:, 1]) + # + control = np.array([[1, 2, 3], [3, 4, 5]], float) + data = TextIO() + np.savetxt(data, control) + data.seek(0) + test = np.genfromtxt(data, dtype=float, usecols=(1, 2)) + assert_equal(test, control[:, 1:]) + # Testing with arrays instead of tuples. + data.seek(0) + test = np.genfromtxt(data, dtype=float, usecols=np.array([1, 2])) + assert_equal(test, control[:, 1:]) + + def test_usecols_as_css(self): + # Test giving usecols with a comma-separated string + data = "1 2 3\n4 5 6" + test = np.genfromtxt(TextIO(data), + names="a, b, c", usecols="a, c") + ctrl = np.array([(1, 3), (4, 6)], dtype=[(_, float) for _ in "ac"]) + assert_equal(test, ctrl) + + def test_usecols_with_structured_dtype(self): + # Test usecols with an explicit structured dtype + data = TextIO("JOE 70.1 25.3\nBOB 60.5 27.9") + names = ['stid', 'temp'] + dtypes = ['S4', 'f8'] + test = np.genfromtxt( + data, usecols=(0, 2), dtype=list(zip(names, dtypes))) + assert_equal(test['stid'], [b"JOE", b"BOB"]) + assert_equal(test['temp'], [25.3, 27.9]) + + def test_usecols_with_integer(self): + # Test usecols with an integer + test = np.genfromtxt(TextIO(b"1 2 3\n4 5 6"), usecols=0) + assert_equal(test, np.array([1., 4.])) + + def test_usecols_with_named_columns(self): + # Test usecols with named columns + ctrl = np.array([(1, 3), (4, 6)], dtype=[('a', float), ('c', float)]) + data = "1 2 3\n4 5 6" + kwargs = dict(names="a, b, c") + test = np.genfromtxt(TextIO(data), usecols=(0, -1), **kwargs) + assert_equal(test, ctrl) + test = np.genfromtxt(TextIO(data), + usecols=('a', 'c'), **kwargs) + assert_equal(test, ctrl) + + def test_empty_file(self): + # Test that an empty file raises the proper warning. + with suppress_warnings() as sup: + sup.filter(message="genfromtxt: Empty input file:") + data = TextIO() + test = np.genfromtxt(data) + assert_equal(test, np.array([])) + + # when skip_header > 0 + test = np.genfromtxt(data, skip_header=1) + assert_equal(test, np.array([])) + + def test_fancy_dtype_alt(self): + # Check that a nested dtype isn't MIA + data = TextIO('1,2,3.0\n4,5,6.0\n') + fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) + test = np.genfromtxt(data, dtype=fancydtype, delimiter=',', usemask=True) + control = ma.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=fancydtype) + assert_equal(test, control) + + def test_shaped_dtype(self): + c = TextIO("aaaa 1.0 8.0 1 2 3 4 5 6") + dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), + ('block', int, (2, 3))]) + x = np.genfromtxt(c, dtype=dt) + a = np.array([('aaaa', 1.0, 8.0, [[1, 2, 3], [4, 5, 6]])], + dtype=dt) + assert_array_equal(x, a) + + def test_withmissing(self): + data = TextIO('A,B\n0,1\n2,N/A') + kwargs = dict(delimiter=",", missing_values="N/A", names=True) + test = np.genfromtxt(data, dtype=None, usemask=True, **kwargs) + control = ma.array([(0, 1), (2, -1)], + mask=[(False, False), (False, True)], + dtype=[('A', int), ('B', int)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + # + data.seek(0) + test = np.genfromtxt(data, usemask=True, **kwargs) + control = ma.array([(0, 1), (2, -1)], + mask=[(False, False), (False, True)], + dtype=[('A', float), ('B', float)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + def test_user_missing_values(self): + data = "A, B, C\n0, 0., 0j\n1, N/A, 1j\n-9, 2.2, N/A\n3, -99, 3j" + basekwargs = dict(dtype=None, delimiter=",", names=True,) + mdtype = [('A', int), ('B', float), ('C', complex)] + # + test = np.genfromtxt(TextIO(data), missing_values="N/A", + **basekwargs) + control = ma.array([(0, 0.0, 0j), (1, -999, 1j), + (-9, 2.2, -999j), (3, -99, 3j)], + mask=[(0, 0, 0), (0, 1, 0), (0, 0, 1), (0, 0, 0)], + dtype=mdtype) + assert_equal(test, control) + # + basekwargs['dtype'] = mdtype + test = np.genfromtxt(TextIO(data), + missing_values={0: -9, 1: -99, 2: -999j}, usemask=True, **basekwargs) + control = ma.array([(0, 0.0, 0j), (1, -999, 1j), + (-9, 2.2, -999j), (3, -99, 3j)], + mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)], + dtype=mdtype) + assert_equal(test, control) + # + test = np.genfromtxt(TextIO(data), + missing_values={0: -9, 'B': -99, 'C': -999j}, + usemask=True, + **basekwargs) + control = ma.array([(0, 0.0, 0j), (1, -999, 1j), + (-9, 2.2, -999j), (3, -99, 3j)], + mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)], + dtype=mdtype) + assert_equal(test, control) + + def test_user_filling_values(self): + # Test with missing and filling values + ctrl = np.array([(0, 3), (4, -999)], dtype=[('a', int), ('b', int)]) + data = "N/A, 2, 3\n4, ,???" + kwargs = dict(delimiter=",", + dtype=int, + names="a,b,c", + missing_values={0: "N/A", 'b': " ", 2: "???"}, + filling_values={0: 0, 'b': 0, 2: -999}) + test = np.genfromtxt(TextIO(data), **kwargs) + ctrl = np.array([(0, 2, 3), (4, 0, -999)], + dtype=[(_, int) for _ in "abc"]) + assert_equal(test, ctrl) + # + test = np.genfromtxt(TextIO(data), usecols=(0, -1), **kwargs) + ctrl = np.array([(0, 3), (4, -999)], dtype=[(_, int) for _ in "ac"]) + assert_equal(test, ctrl) + + data2 = "1,2,*,4\n5,*,7,8\n" + test = np.genfromtxt(TextIO(data2), delimiter=',', dtype=int, + missing_values="*", filling_values=0) + ctrl = np.array([[1, 2, 0, 4], [5, 0, 7, 8]]) + assert_equal(test, ctrl) + test = np.genfromtxt(TextIO(data2), delimiter=',', dtype=int, + missing_values="*", filling_values=-1) + ctrl = np.array([[1, 2, -1, 4], [5, -1, 7, 8]]) + assert_equal(test, ctrl) + + def test_withmissing_float(self): + data = TextIO('A,B\n0,1.5\n2,-999.00') + test = np.genfromtxt(data, dtype=None, delimiter=',', + missing_values='-999.0', names=True, usemask=True) + control = ma.array([(0, 1.5), (2, -1.)], + mask=[(False, False), (False, True)], + dtype=[('A', int), ('B', float)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + def test_with_masked_column_uniform(self): + # Test masked column + data = TextIO('1 2 3\n4 5 6\n') + test = np.genfromtxt(data, dtype=None, + missing_values='2,5', usemask=True) + control = ma.array([[1, 2, 3], [4, 5, 6]], mask=[[0, 1, 0], [0, 1, 0]]) + assert_equal(test, control) + + def test_with_masked_column_various(self): + # Test masked column + data = TextIO('True 2 3\nFalse 5 6\n') + test = np.genfromtxt(data, dtype=None, + missing_values='2,5', usemask=True) + control = ma.array([(1, 2, 3), (0, 5, 6)], + mask=[(0, 1, 0), (0, 1, 0)], + dtype=[('f0', bool), ('f1', bool), ('f2', int)]) + assert_equal(test, control) + + def test_invalid_raise(self): + # Test invalid raise + data = ["1, 1, 1, 1, 1"] * 50 + for i in range(5): + data[10 * i] = "2, 2, 2, 2 2" + data.insert(0, "a, b, c, d, e") + mdata = TextIO("\n".join(data)) + + kwargs = dict(delimiter=",", dtype=None, names=True) + def f(): + return np.genfromtxt(mdata, invalid_raise=False, **kwargs) + mtest = assert_warns(ConversionWarning, f) + assert_equal(len(mtest), 45) + assert_equal(mtest, np.ones(45, dtype=[(_, int) for _ in 'abcde'])) + # + mdata.seek(0) + assert_raises(ValueError, np.genfromtxt, mdata, + delimiter=",", names=True) + + def test_invalid_raise_with_usecols(self): + # Test invalid_raise with usecols + data = ["1, 1, 1, 1, 1"] * 50 + for i in range(5): + data[10 * i] = "2, 2, 2, 2 2" + data.insert(0, "a, b, c, d, e") + mdata = TextIO("\n".join(data)) + + kwargs = dict(delimiter=",", dtype=None, names=True, + invalid_raise=False) + def f(): + return np.genfromtxt(mdata, usecols=(0, 4), **kwargs) + mtest = assert_warns(ConversionWarning, f) + assert_equal(len(mtest), 45) + assert_equal(mtest, np.ones(45, dtype=[(_, int) for _ in 'ae'])) + # + mdata.seek(0) + mtest = np.genfromtxt(mdata, usecols=(0, 1), **kwargs) + assert_equal(len(mtest), 50) + control = np.ones(50, dtype=[(_, int) for _ in 'ab']) + control[[10 * _ for _ in range(5)]] = (2, 2) + assert_equal(mtest, control) + + def test_inconsistent_dtype(self): + # Test inconsistent dtype + data = ["1, 1, 1, 1, -1.1"] * 50 + mdata = TextIO("\n".join(data)) + + converters = {4: lambda x: "(%s)" % x.decode()} + kwargs = dict(delimiter=",", converters=converters, + dtype=[(_, int) for _ in 'abcde'],) + assert_raises(ValueError, np.genfromtxt, mdata, **kwargs) + + def test_default_field_format(self): + # Test default format + data = "0, 1, 2.3\n4, 5, 6.7" + mtest = np.genfromtxt(TextIO(data), + delimiter=",", dtype=None, defaultfmt="f%02i") + ctrl = np.array([(0, 1, 2.3), (4, 5, 6.7)], + dtype=[("f00", int), ("f01", int), ("f02", float)]) + assert_equal(mtest, ctrl) + + def test_single_dtype_wo_names(self): + # Test single dtype w/o names + data = "0, 1, 2.3\n4, 5, 6.7" + mtest = np.genfromtxt(TextIO(data), + delimiter=",", dtype=float, defaultfmt="f%02i") + ctrl = np.array([[0., 1., 2.3], [4., 5., 6.7]], dtype=float) + assert_equal(mtest, ctrl) + + def test_single_dtype_w_explicit_names(self): + # Test single dtype w explicit names + data = "0, 1, 2.3\n4, 5, 6.7" + mtest = np.genfromtxt(TextIO(data), + delimiter=",", dtype=float, names="a, b, c") + ctrl = np.array([(0., 1., 2.3), (4., 5., 6.7)], + dtype=[(_, float) for _ in "abc"]) + assert_equal(mtest, ctrl) + + def test_single_dtype_w_implicit_names(self): + # Test single dtype w implicit names + data = "a, b, c\n0, 1, 2.3\n4, 5, 6.7" + mtest = np.genfromtxt(TextIO(data), + delimiter=",", dtype=float, names=True) + ctrl = np.array([(0., 1., 2.3), (4., 5., 6.7)], + dtype=[(_, float) for _ in "abc"]) + assert_equal(mtest, ctrl) + + def test_easy_structured_dtype(self): + # Test easy structured dtype + data = "0, 1, 2.3\n4, 5, 6.7" + mtest = np.genfromtxt(TextIO(data), delimiter=",", + dtype=(int, float, float), defaultfmt="f_%02i") + ctrl = np.array([(0, 1., 2.3), (4, 5., 6.7)], + dtype=[("f_00", int), ("f_01", float), ("f_02", float)]) + assert_equal(mtest, ctrl) + + def test_autostrip(self): + # Test autostrip + data = "01/01/2003 , 1.3, abcde" + kwargs = dict(delimiter=",", dtype=None) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + mtest = np.genfromtxt(TextIO(data), **kwargs) + assert_(w[0].category is np.VisibleDeprecationWarning) + ctrl = np.array([('01/01/2003 ', 1.3, ' abcde')], + dtype=[('f0', '|S12'), ('f1', float), ('f2', '|S8')]) + assert_equal(mtest, ctrl) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + mtest = np.genfromtxt(TextIO(data), autostrip=True, **kwargs) + assert_(w[0].category is np.VisibleDeprecationWarning) + ctrl = np.array([('01/01/2003', 1.3, 'abcde')], + dtype=[('f0', '|S10'), ('f1', float), ('f2', '|S5')]) + assert_equal(mtest, ctrl) + + def test_replace_space(self): + # Test the 'replace_space' option + txt = "A.A, B (B), C:C\n1, 2, 3.14" + # Test default: replace ' ' by '_' and delete non-alphanum chars + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=None) + ctrl_dtype = [("AA", int), ("B_B", int), ("CC", float)] + ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) + assert_equal(test, ctrl) + # Test: no replace, no delete + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=None, + replace_space='', deletechars='') + ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", float)] + ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) + assert_equal(test, ctrl) + # Test: no delete (spaces are replaced by _) + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=None, + deletechars='') + ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", float)] + ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) + assert_equal(test, ctrl) + + def test_replace_space_known_dtype(self): + # Test the 'replace_space' (and related) options when dtype != None + txt = "A.A, B (B), C:C\n1, 2, 3" + # Test default: replace ' ' by '_' and delete non-alphanum chars + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=int) + ctrl_dtype = [("AA", int), ("B_B", int), ("CC", int)] + ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) + assert_equal(test, ctrl) + # Test: no replace, no delete + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=int, + replace_space='', deletechars='') + ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", int)] + ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) + assert_equal(test, ctrl) + # Test: no delete (spaces are replaced by _) + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=int, + deletechars='') + ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", int)] + ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) + assert_equal(test, ctrl) + + def test_incomplete_names(self): + # Test w/ incomplete names + data = "A,,C\n0,1,2\n3,4,5" + kwargs = dict(delimiter=",", names=True) + # w/ dtype=None + ctrl = np.array([(0, 1, 2), (3, 4, 5)], + dtype=[(_, int) for _ in ('A', 'f0', 'C')]) + test = np.genfromtxt(TextIO(data), dtype=None, **kwargs) + assert_equal(test, ctrl) + # w/ default dtype + ctrl = np.array([(0, 1, 2), (3, 4, 5)], + dtype=[(_, float) for _ in ('A', 'f0', 'C')]) + test = np.genfromtxt(TextIO(data), **kwargs) + + def test_names_auto_completion(self): + # Make sure that names are properly completed + data = "1 2 3\n 4 5 6" + test = np.genfromtxt(TextIO(data), + dtype=(int, float, int), names="a") + ctrl = np.array([(1, 2, 3), (4, 5, 6)], + dtype=[('a', int), ('f0', float), ('f1', int)]) + assert_equal(test, ctrl) + + def test_names_with_usecols_bug1636(self): + # Make sure we pick up the right names w/ usecols + data = "A,B,C,D,E\n0,1,2,3,4\n0,1,2,3,4\n0,1,2,3,4" + ctrl_names = ("A", "C", "E") + test = np.genfromtxt(TextIO(data), + dtype=(int, int, int), delimiter=",", + usecols=(0, 2, 4), names=True) + assert_equal(test.dtype.names, ctrl_names) + # + test = np.genfromtxt(TextIO(data), + dtype=(int, int, int), delimiter=",", + usecols=("A", "C", "E"), names=True) + assert_equal(test.dtype.names, ctrl_names) + # + test = np.genfromtxt(TextIO(data), + dtype=int, delimiter=",", + usecols=("A", "C", "E"), names=True) + assert_equal(test.dtype.names, ctrl_names) + + def test_fixed_width_names(self): + # Test fix-width w/ names + data = " A B C\n 0 1 2.3\n 45 67 9." + kwargs = dict(delimiter=(5, 5, 4), names=True, dtype=None) + ctrl = np.array([(0, 1, 2.3), (45, 67, 9.)], + dtype=[('A', int), ('B', int), ('C', float)]) + test = np.genfromtxt(TextIO(data), **kwargs) + assert_equal(test, ctrl) + # + kwargs = dict(delimiter=5, names=True, dtype=None) + ctrl = np.array([(0, 1, 2.3), (45, 67, 9.)], + dtype=[('A', int), ('B', int), ('C', float)]) + test = np.genfromtxt(TextIO(data), **kwargs) + assert_equal(test, ctrl) + + def test_filling_values(self): + # Test missing values + data = b"1, 2, 3\n1, , 5\n0, 6, \n" + kwargs = dict(delimiter=",", dtype=None, filling_values=-999) + ctrl = np.array([[1, 2, 3], [1, -999, 5], [0, 6, -999]], dtype=int) + test = np.genfromtxt(TextIO(data), **kwargs) + assert_equal(test, ctrl) + + def test_comments_is_none(self): + # Github issue 329 (None was previously being converted to 'None'). + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO("test1,testNonetherestofthedata"), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) + assert_equal(test[1], b'testNonetherestofthedata') + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO("test1, testNonetherestofthedata"), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) + assert_equal(test[1], b' testNonetherestofthedata') + + def test_latin1(self): + latin1 = b'\xf6\xfc\xf6' + norm = b"norm1,norm2,norm3\n" + enc = b"test1,testNonethe" + latin1 + b",test3\n" + s = norm + enc + norm + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO(s), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) + assert_equal(test[1, 0], b"test1") + assert_equal(test[1, 1], b"testNonethe" + latin1) + assert_equal(test[1, 2], b"test3") + test = np.genfromtxt(TextIO(s), + dtype=None, comments=None, delimiter=',', + encoding='latin1') + assert_equal(test[1, 0], u"test1") + assert_equal(test[1, 1], u"testNonethe" + latin1.decode('latin1')) + assert_equal(test[1, 2], u"test3") + + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO(b"0,testNonethe" + latin1), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) + assert_equal(test['f0'], 0) + assert_equal(test['f1'], b"testNonethe" + latin1) + + def test_binary_decode_autodtype(self): + utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04' + v = self.loadfunc(BytesIO(utf16), dtype=None, encoding='UTF-16') + assert_array_equal(v, np.array(utf16.decode('UTF-16').split())) + + def test_utf8_byte_encoding(self): + utf8 = b"\xcf\x96" + norm = b"norm1,norm2,norm3\n" + enc = b"test1,testNonethe" + utf8 + b",test3\n" + s = norm + enc + norm + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO(s), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) + ctl = np.array([ + [b'norm1', b'norm2', b'norm3'], + [b'test1', b'testNonethe' + utf8, b'test3'], + [b'norm1', b'norm2', b'norm3']]) + assert_array_equal(test, ctl) + + def test_utf8_file(self): + utf8 = b"\xcf\x96" + with temppath() as path: + with open(path, "wb") as f: + f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2) + test = np.genfromtxt(path, dtype=None, comments=None, + delimiter=',', encoding="UTF-8") + ctl = np.array([ + ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"], + ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]], + dtype=np.unicode_) + assert_array_equal(test, ctl) + + # test a mixed dtype + with open(path, "wb") as f: + f.write(b"0,testNonethe" + utf8) + test = np.genfromtxt(path, dtype=None, comments=None, + delimiter=',', encoding="UTF-8") + assert_equal(test['f0'], 0) + assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8")) + + def test_utf8_file_nodtype_unicode(self): + # bytes encoding with non-latin1 -> unicode upcast + utf8 = u'\u03d6' + latin1 = u'\xf6\xfc\xf6' + + # skip test if cannot encode utf8 test string with preferred + # encoding. The preferred encoding is assumed to be the default + # encoding of io.open. Will need to change this for PyTest, maybe + # using pytest.mark.xfail(raises=***). + try: + encoding = locale.getpreferredencoding() + utf8.encode(encoding) + except (UnicodeError, ImportError): + pytest.skip('Skipping test_utf8_file_nodtype_unicode, ' + 'unable to encode utf8 in preferred encoding') + + with temppath() as path: + with io.open(path, "wt") as f: + f.write(u"norm1,norm2,norm3\n") + f.write(u"norm1," + latin1 + u",norm3\n") + f.write(u"test1,testNonethe" + utf8 + u",test3\n") + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', + np.VisibleDeprecationWarning) + test = np.genfromtxt(path, dtype=None, comments=None, + delimiter=',') + # Check for warning when encoding not specified. + assert_(w[0].category is np.VisibleDeprecationWarning) + ctl = np.array([ + ["norm1", "norm2", "norm3"], + ["norm1", latin1, "norm3"], + ["test1", "testNonethe" + utf8, "test3"]], + dtype=np.unicode_) + assert_array_equal(test, ctl) + + def test_recfromtxt(self): + # + data = TextIO('A,B\n0,1\n2,3') + kwargs = dict(delimiter=",", missing_values="N/A", names=True) + test = np.recfromtxt(data, **kwargs) + control = np.array([(0, 1), (2, 3)], + dtype=[('A', int), ('B', int)]) + assert_(isinstance(test, np.recarray)) + assert_equal(test, control) + # + data = TextIO('A,B\n0,1\n2,N/A') + test = np.recfromtxt(data, dtype=None, usemask=True, **kwargs) + control = ma.array([(0, 1), (2, -1)], + mask=[(False, False), (False, True)], + dtype=[('A', int), ('B', int)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + assert_equal(test.A, [0, 2]) + + def test_recfromcsv(self): + # + data = TextIO('A,B\n0,1\n2,3') + kwargs = dict(missing_values="N/A", names=True, case_sensitive=True) + test = np.recfromcsv(data, dtype=None, **kwargs) + control = np.array([(0, 1), (2, 3)], + dtype=[('A', int), ('B', int)]) + assert_(isinstance(test, np.recarray)) + assert_equal(test, control) + # + data = TextIO('A,B\n0,1\n2,N/A') + test = np.recfromcsv(data, dtype=None, usemask=True, **kwargs) + control = ma.array([(0, 1), (2, -1)], + mask=[(False, False), (False, True)], + dtype=[('A', int), ('B', int)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + assert_equal(test.A, [0, 2]) + # + data = TextIO('A,B\n0,1\n2,3') + test = np.recfromcsv(data, missing_values='N/A',) + control = np.array([(0, 1), (2, 3)], + dtype=[('a', int), ('b', int)]) + assert_(isinstance(test, np.recarray)) + assert_equal(test, control) + # + data = TextIO('A,B\n0,1\n2,3') + dtype = [('a', int), ('b', float)] + test = np.recfromcsv(data, missing_values='N/A', dtype=dtype) + control = np.array([(0, 1), (2, 3)], + dtype=dtype) + assert_(isinstance(test, np.recarray)) + assert_equal(test, control) + + #gh-10394 + data = TextIO('color\n"red"\n"blue"') + test = np.recfromcsv(data, converters={0: lambda x: x.strip(b'\"')}) + control = np.array([('red',), ('blue',)], dtype=[('color', (bytes, 4))]) + assert_equal(test.dtype, control.dtype) + assert_equal(test, control) + + def test_max_rows(self): + # Test the `max_rows` keyword argument. + data = '1 2\n3 4\n5 6\n7 8\n9 10\n' + txt = TextIO(data) + a1 = np.genfromtxt(txt, max_rows=3) + a2 = np.genfromtxt(txt) + assert_equal(a1, [[1, 2], [3, 4], [5, 6]]) + assert_equal(a2, [[7, 8], [9, 10]]) + + # max_rows must be at least 1. + assert_raises(ValueError, np.genfromtxt, TextIO(data), max_rows=0) + + # An input with several invalid rows. + data = '1 1\n2 2\n0 \n3 3\n4 4\n5 \n6 \n7 \n' + + test = np.genfromtxt(TextIO(data), max_rows=2) + control = np.array([[1., 1.], [2., 2.]]) + assert_equal(test, control) + + # Test keywords conflict + assert_raises(ValueError, np.genfromtxt, TextIO(data), skip_footer=1, + max_rows=4) + + # Test with invalid value + assert_raises(ValueError, np.genfromtxt, TextIO(data), max_rows=4) + + # Test with invalid not raise + with suppress_warnings() as sup: + sup.filter(ConversionWarning) + + test = np.genfromtxt(TextIO(data), max_rows=4, invalid_raise=False) + control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]) + assert_equal(test, control) + + test = np.genfromtxt(TextIO(data), max_rows=5, invalid_raise=False) + control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]) + assert_equal(test, control) + + # Structured array with field names. + data = 'a b\n#c d\n1 1\n2 2\n#0 \n3 3\n4 4\n5 5\n' + + # Test with header, names and comments + txt = TextIO(data) + test = np.genfromtxt(txt, skip_header=1, max_rows=3, names=True) + control = np.array([(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)], + dtype=[('c', ' should convert to float + # 2**34 = 17179869184 => should convert to int64 + # 2**10 = 1024 => should convert to int (int32 on 32-bit systems, + # int64 on 64-bit systems) + + data = TextIO('73786976294838206464 17179869184 1024') + + test = np.genfromtxt(data, dtype=None) + + assert_equal(test.dtype.names, ['f0', 'f1', 'f2']) + + assert_(test.dtype['f0'] == float) + assert_(test.dtype['f1'] == np.int64) + assert_(test.dtype['f2'] == np.int_) + + assert_allclose(test['f0'], 73786976294838206464.) + assert_equal(test['f1'], 17179869184) + assert_equal(test['f2'], 1024) + + def test_unpack_structured(self): + # Regression test for gh-4341 + # Unpacking should work on structured arrays + txt = TextIO("M 21 72\nF 35 58") + dt = {'names': ('a', 'b', 'c'), 'formats': ('S1', 'i4', 'f4')} + a, b, c = np.genfromtxt(txt, dtype=dt, unpack=True) + assert_equal(a.dtype, np.dtype('S1')) + assert_equal(b.dtype, np.dtype('i4')) + assert_equal(c.dtype, np.dtype('f4')) + assert_array_equal(a, np.array([b'M', b'F'])) + assert_array_equal(b, np.array([21, 35])) + assert_array_equal(c, np.array([72., 58.])) + + def test_unpack_auto_dtype(self): + # Regression test for gh-4341 + # Unpacking should work when dtype=None + txt = TextIO("M 21 72.\nF 35 58.") + expected = (np.array(["M", "F"]), np.array([21, 35]), np.array([72., 58.])) + test = np.genfromtxt(txt, dtype=None, unpack=True, encoding="utf-8") + for arr, result in zip(expected, test): + assert_array_equal(arr, result) + assert_equal(arr.dtype, result.dtype) + + def test_unpack_single_name(self): + # Regression test for gh-4341 + # Unpacking should work when structured dtype has only one field + txt = TextIO("21\n35") + dt = {'names': ('a',), 'formats': ('i4',)} + expected = np.array([21, 35], dtype=np.int32) + test = np.genfromtxt(txt, dtype=dt, unpack=True) + assert_array_equal(expected, test) + assert_equal(expected.dtype, test.dtype) + + def test_squeeze_scalar(self): + # Regression test for gh-4341 + # Unpacking a scalar should give zero-dim output, + # even if dtype is structured + txt = TextIO("1") + dt = {'names': ('a',), 'formats': ('i4',)} + expected = np.array((1,), dtype=np.int32) + test = np.genfromtxt(txt, dtype=dt, unpack=True) + assert_array_equal(expected, test) + assert_equal((), test.shape) + assert_equal(expected.dtype, test.dtype) + + +class TestPathUsage: + # Test that pathlib.Path can be used + def test_loadtxt(self): + with temppath(suffix='.txt') as path: + path = Path(path) + a = np.array([[1.1, 2], [3, 4]]) + np.savetxt(path, a) + x = np.loadtxt(path) + assert_array_equal(x, a) + + def test_save_load(self): + # Test that pathlib.Path instances can be used with save. + with temppath(suffix='.npy') as path: + path = Path(path) + a = np.array([[1, 2], [3, 4]], int) + np.save(path, a) + data = np.load(path) + assert_array_equal(data, a) + + def test_save_load_memmap(self): + # Test that pathlib.Path instances can be loaded mem-mapped. + with temppath(suffix='.npy') as path: + path = Path(path) + a = np.array([[1, 2], [3, 4]], int) + np.save(path, a) + data = np.load(path, mmap_mode='r') + assert_array_equal(data, a) + # close the mem-mapped file + del data + if IS_PYPY: + break_cycles() + break_cycles() + + def test_save_load_memmap_readwrite(self): + # Test that pathlib.Path instances can be written mem-mapped. + with temppath(suffix='.npy') as path: + path = Path(path) + a = np.array([[1, 2], [3, 4]], int) + np.save(path, a) + b = np.load(path, mmap_mode='r+') + a[0][0] = 5 + b[0][0] = 5 + del b # closes the file + if IS_PYPY: + break_cycles() + break_cycles() + data = np.load(path) + assert_array_equal(data, a) + + def test_savez_load(self): + # Test that pathlib.Path instances can be used with savez. + with temppath(suffix='.npz') as path: + path = Path(path) + np.savez(path, lab='place holder') + with np.load(path) as data: + assert_array_equal(data['lab'], 'place holder') + + def test_savez_compressed_load(self): + # Test that pathlib.Path instances can be used with savez. + with temppath(suffix='.npz') as path: + path = Path(path) + np.savez_compressed(path, lab='place holder') + data = np.load(path) + assert_array_equal(data['lab'], 'place holder') + data.close() + + def test_genfromtxt(self): + with temppath(suffix='.txt') as path: + path = Path(path) + a = np.array([(1, 2), (3, 4)]) + np.savetxt(path, a) + data = np.genfromtxt(path) + assert_array_equal(a, data) + + def test_recfromtxt(self): + with temppath(suffix='.txt') as path: + path = Path(path) + with path.open('w') as f: + f.write(u'A,B\n0,1\n2,3') + + kwargs = dict(delimiter=",", missing_values="N/A", names=True) + test = np.recfromtxt(path, **kwargs) + control = np.array([(0, 1), (2, 3)], + dtype=[('A', int), ('B', int)]) + assert_(isinstance(test, np.recarray)) + assert_equal(test, control) + + def test_recfromcsv(self): + with temppath(suffix='.txt') as path: + path = Path(path) + with path.open('w') as f: + f.write(u'A,B\n0,1\n2,3') + + kwargs = dict(missing_values="N/A", names=True, case_sensitive=True) + test = np.recfromcsv(path, dtype=None, **kwargs) + control = np.array([(0, 1), (2, 3)], + dtype=[('A', int), ('B', int)]) + assert_(isinstance(test, np.recarray)) + assert_equal(test, control) + + +def test_gzip_load(): + a = np.random.random((5, 5)) + + s = BytesIO() + f = gzip.GzipFile(fileobj=s, mode="w") + + np.save(f, a) + f.close() + s.seek(0) + + f = gzip.GzipFile(fileobj=s, mode="r") + assert_array_equal(np.load(f), a) + + +# These next two classes encode the minimal API needed to save()/load() arrays. +# The `test_ducktyping` ensures they work correctly +class JustWriter: + def __init__(self, base): + self.base = base + + def write(self, s): + return self.base.write(s) + + def flush(self): + return self.base.flush() + +class JustReader: + def __init__(self, base): + self.base = base + + def read(self, n): + return self.base.read(n) + + def seek(self, off, whence=0): + return self.base.seek(off, whence) + + +def test_ducktyping(): + a = np.random.random((5, 5)) + + s = BytesIO() + f = JustWriter(s) + + np.save(f, a) + f.flush() + s.seek(0) + + f = JustReader(s) + assert_array_equal(np.load(f), a) + + + +def test_gzip_loadtxt(): + # Thanks to another windows brokenness, we can't use + # NamedTemporaryFile: a file created from this function cannot be + # reopened by another open call. So we first put the gzipped string + # of the test reference array, write it to a securely opened file, + # which is then read from by the loadtxt function + s = BytesIO() + g = gzip.GzipFile(fileobj=s, mode='w') + g.write(b'1 2 3\n') + g.close() + + s.seek(0) + with temppath(suffix='.gz') as name: + with open(name, 'wb') as f: + f.write(s.read()) + res = np.loadtxt(name) + s.close() + + assert_array_equal(res, [1, 2, 3]) + + +def test_gzip_loadtxt_from_string(): + s = BytesIO() + f = gzip.GzipFile(fileobj=s, mode="w") + f.write(b'1 2 3\n') + f.close() + s.seek(0) + + f = gzip.GzipFile(fileobj=s, mode="r") + assert_array_equal(np.loadtxt(f), [1, 2, 3]) + + +def test_npzfile_dict(): + s = BytesIO() + x = np.zeros((3, 3)) + y = np.zeros((3, 3)) + + np.savez(s, x=x, y=y) + s.seek(0) + + z = np.load(s) + + assert_('x' in z) + assert_('y' in z) + assert_('x' in z.keys()) + assert_('y' in z.keys()) + + for f, a in z.items(): + assert_(f in ['x', 'y']) + assert_equal(a.shape, (3, 3)) + + assert_(len(z.items()) == 2) + + for f in z: + assert_(f in ['x', 'y']) + + assert_('x' in z.keys()) + + +@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") +def test_load_refcount(): + # Check that objects returned by np.load are directly freed based on + # their refcount, rather than needing the gc to collect them. + + f = BytesIO() + np.savez(f, [1, 2, 3]) + f.seek(0) + + with assert_no_gc_cycles(): + np.load(f) + + f.seek(0) + dt = [("a", 'u1', 2), ("b", 'u1', 2)] + with assert_no_gc_cycles(): + x = np.loadtxt(TextIO("0 1 2 3"), dtype=dt) + assert_equal(x, np.array([((0, 1), (2, 3))], dtype=dt)) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_mixins.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_mixins.py new file mode 100644 index 0000000..6320587 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_mixins.py @@ -0,0 +1,216 @@ +import numbers +import operator + +import numpy as np +from numpy.testing import assert_, assert_equal, assert_raises + + +# NOTE: This class should be kept as an exact copy of the example from the +# docstring for NDArrayOperatorsMixin. + +class ArrayLike(np.lib.mixins.NDArrayOperatorsMixin): + def __init__(self, value): + self.value = np.asarray(value) + + # One might also consider adding the built-in list type to this + # list, to support operations like np.add(array_like, list) + _HANDLED_TYPES = (np.ndarray, numbers.Number) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + out = kwargs.get('out', ()) + for x in inputs + out: + # Only support operations with instances of _HANDLED_TYPES. + # Use ArrayLike instead of type(self) for isinstance to + # allow subclasses that don't override __array_ufunc__ to + # handle ArrayLike objects. + if not isinstance(x, self._HANDLED_TYPES + (ArrayLike,)): + return NotImplemented + + # Defer to the implementation of the ufunc on unwrapped values. + inputs = tuple(x.value if isinstance(x, ArrayLike) else x + for x in inputs) + if out: + kwargs['out'] = tuple( + x.value if isinstance(x, ArrayLike) else x + for x in out) + result = getattr(ufunc, method)(*inputs, **kwargs) + + if type(result) is tuple: + # multiple return values + return tuple(type(self)(x) for x in result) + elif method == 'at': + # no return value + return None + else: + # one return value + return type(self)(result) + + def __repr__(self): + return '%s(%r)' % (type(self).__name__, self.value) + + +def wrap_array_like(result): + if type(result) is tuple: + return tuple(ArrayLike(r) for r in result) + else: + return ArrayLike(result) + + +def _assert_equal_type_and_value(result, expected, err_msg=None): + assert_equal(type(result), type(expected), err_msg=err_msg) + if isinstance(result, tuple): + assert_equal(len(result), len(expected), err_msg=err_msg) + for result_item, expected_item in zip(result, expected): + _assert_equal_type_and_value(result_item, expected_item, err_msg) + else: + assert_equal(result.value, expected.value, err_msg=err_msg) + assert_equal(getattr(result.value, 'dtype', None), + getattr(expected.value, 'dtype', None), err_msg=err_msg) + + +_ALL_BINARY_OPERATORS = [ + operator.lt, + operator.le, + operator.eq, + operator.ne, + operator.gt, + operator.ge, + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.floordiv, + operator.mod, + divmod, + pow, + operator.lshift, + operator.rshift, + operator.and_, + operator.xor, + operator.or_, +] + + +class TestNDArrayOperatorsMixin: + + def test_array_like_add(self): + + def check(result): + _assert_equal_type_and_value(result, ArrayLike(0)) + + check(ArrayLike(0) + 0) + check(0 + ArrayLike(0)) + + check(ArrayLike(0) + np.array(0)) + check(np.array(0) + ArrayLike(0)) + + check(ArrayLike(np.array(0)) + 0) + check(0 + ArrayLike(np.array(0))) + + check(ArrayLike(np.array(0)) + np.array(0)) + check(np.array(0) + ArrayLike(np.array(0))) + + def test_inplace(self): + array_like = ArrayLike(np.array([0])) + array_like += 1 + _assert_equal_type_and_value(array_like, ArrayLike(np.array([1]))) + + array = np.array([0]) + array += ArrayLike(1) + _assert_equal_type_and_value(array, ArrayLike(np.array([1]))) + + def test_opt_out(self): + + class OptOut: + """Object that opts out of __array_ufunc__.""" + __array_ufunc__ = None + + def __add__(self, other): + return self + + def __radd__(self, other): + return self + + array_like = ArrayLike(1) + opt_out = OptOut() + + # supported operations + assert_(array_like + opt_out is opt_out) + assert_(opt_out + array_like is opt_out) + + # not supported + with assert_raises(TypeError): + # don't use the Python default, array_like = array_like + opt_out + array_like += opt_out + with assert_raises(TypeError): + array_like - opt_out + with assert_raises(TypeError): + opt_out - array_like + + def test_subclass(self): + + class SubArrayLike(ArrayLike): + """Should take precedence over ArrayLike.""" + + x = ArrayLike(0) + y = SubArrayLike(1) + _assert_equal_type_and_value(x + y, y) + _assert_equal_type_and_value(y + x, y) + + def test_object(self): + x = ArrayLike(0) + obj = object() + with assert_raises(TypeError): + x + obj + with assert_raises(TypeError): + obj + x + with assert_raises(TypeError): + x += obj + + def test_unary_methods(self): + array = np.array([-1, 0, 1, 2]) + array_like = ArrayLike(array) + for op in [operator.neg, + operator.pos, + abs, + operator.invert]: + _assert_equal_type_and_value(op(array_like), ArrayLike(op(array))) + + def test_forward_binary_methods(self): + array = np.array([-1, 0, 1, 2]) + array_like = ArrayLike(array) + for op in _ALL_BINARY_OPERATORS: + expected = wrap_array_like(op(array, 1)) + actual = op(array_like, 1) + err_msg = 'failed for operator {}'.format(op) + _assert_equal_type_and_value(expected, actual, err_msg=err_msg) + + def test_reflected_binary_methods(self): + for op in _ALL_BINARY_OPERATORS: + expected = wrap_array_like(op(2, 1)) + actual = op(2, ArrayLike(1)) + err_msg = 'failed for operator {}'.format(op) + _assert_equal_type_and_value(expected, actual, err_msg=err_msg) + + def test_matmul(self): + array = np.array([1, 2], dtype=np.float64) + array_like = ArrayLike(array) + expected = ArrayLike(np.float64(5)) + _assert_equal_type_and_value(expected, np.matmul(array_like, array)) + _assert_equal_type_and_value( + expected, operator.matmul(array_like, array)) + _assert_equal_type_and_value( + expected, operator.matmul(array, array_like)) + + def test_ufunc_at(self): + array = ArrayLike(np.array([1, 2, 3, 4])) + assert_(np.negative.at(array, np.array([0, 1])) is None) + _assert_equal_type_and_value(array, ArrayLike([-1, -2, 3, 4])) + + def test_ufunc_two_outputs(self): + mantissa, exponent = np.frexp(2 ** -3) + expected = (ArrayLike(mantissa), ArrayLike(exponent)) + _assert_equal_type_and_value( + np.frexp(ArrayLike(2 ** -3)), expected) + _assert_equal_type_and_value( + np.frexp(ArrayLike(np.array(2 ** -3))), expected) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_nanfunctions.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_nanfunctions.py new file mode 100644 index 0000000..733a077 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_nanfunctions.py @@ -0,0 +1,1186 @@ +import warnings +import pytest +import inspect + +import numpy as np +from numpy.lib.nanfunctions import _nan_mask, _replace_nan +from numpy.testing import ( + assert_, assert_equal, assert_almost_equal, assert_raises, + assert_array_equal, suppress_warnings + ) + + +# Test data +_ndat = np.array([[0.6244, np.nan, 0.2692, 0.0116, np.nan, 0.1170], + [0.5351, -0.9403, np.nan, 0.2100, 0.4759, 0.2833], + [np.nan, np.nan, np.nan, 0.1042, np.nan, -0.5954], + [0.1610, np.nan, np.nan, 0.1859, 0.3146, np.nan]]) + + +# Rows of _ndat with nans removed +_rdat = [np.array([0.6244, 0.2692, 0.0116, 0.1170]), + np.array([0.5351, -0.9403, 0.2100, 0.4759, 0.2833]), + np.array([0.1042, -0.5954]), + np.array([0.1610, 0.1859, 0.3146])] + +# Rows of _ndat with nans converted to ones +_ndat_ones = np.array([[0.6244, 1.0, 0.2692, 0.0116, 1.0, 0.1170], + [0.5351, -0.9403, 1.0, 0.2100, 0.4759, 0.2833], + [1.0, 1.0, 1.0, 0.1042, 1.0, -0.5954], + [0.1610, 1.0, 1.0, 0.1859, 0.3146, 1.0]]) + +# Rows of _ndat with nans converted to zeros +_ndat_zeros = np.array([[0.6244, 0.0, 0.2692, 0.0116, 0.0, 0.1170], + [0.5351, -0.9403, 0.0, 0.2100, 0.4759, 0.2833], + [0.0, 0.0, 0.0, 0.1042, 0.0, -0.5954], + [0.1610, 0.0, 0.0, 0.1859, 0.3146, 0.0]]) + + +class TestSignatureMatch: + NANFUNCS = { + np.nanmin: np.amin, + np.nanmax: np.amax, + np.nanargmin: np.argmin, + np.nanargmax: np.argmax, + np.nansum: np.sum, + np.nanprod: np.prod, + np.nancumsum: np.cumsum, + np.nancumprod: np.cumprod, + np.nanmean: np.mean, + np.nanmedian: np.median, + np.nanpercentile: np.percentile, + np.nanquantile: np.quantile, + np.nanvar: np.var, + np.nanstd: np.std, + } + IDS = [k.__name__ for k in NANFUNCS] + + @staticmethod + def get_signature(func, default="..."): + """Construct a signature and replace all default parameter-values.""" + prm_list = [] + signature = inspect.signature(func) + for prm in signature.parameters.values(): + if prm.default is inspect.Parameter.empty: + prm_list.append(prm) + else: + prm_list.append(prm.replace(default=default)) + return inspect.Signature(prm_list) + + @pytest.mark.parametrize("nan_func,func", NANFUNCS.items(), ids=IDS) + def test_signature_match(self, nan_func, func): + # Ignore the default parameter-values as they can sometimes differ + # between the two functions (*e.g.* one has `False` while the other + # has `np._NoValue`) + signature = self.get_signature(func) + nan_signature = self.get_signature(nan_func) + np.testing.assert_equal(signature, nan_signature) + + def test_exhaustiveness(self): + """Validate that all nan functions are actually tested.""" + np.testing.assert_equal( + set(self.IDS), set(np.lib.nanfunctions.__all__) + ) + + +class TestNanFunctions_MinMax: + + nanfuncs = [np.nanmin, np.nanmax] + stdfuncs = [np.min, np.max] + + def test_mutation(self): + # Check that passed array is not modified. + ndat = _ndat.copy() + for f in self.nanfuncs: + f(ndat) + assert_equal(ndat, _ndat) + + def test_keepdims(self): + mat = np.eye(3) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for axis in [None, 0, 1]: + tgt = rf(mat, axis=axis, keepdims=True) + res = nf(mat, axis=axis, keepdims=True) + assert_(res.ndim == tgt.ndim) + + def test_out(self): + mat = np.eye(3) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + resout = np.zeros(3) + tgt = rf(mat, axis=1) + res = nf(mat, axis=1, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + + def test_dtype_from_input(self): + codes = 'efdgFDG' + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for c in codes: + mat = np.eye(3, dtype=c) + tgt = rf(mat, axis=1).dtype.type + res = nf(mat, axis=1).dtype.type + assert_(res is tgt) + # scalar case + tgt = rf(mat, axis=None).dtype.type + res = nf(mat, axis=None).dtype.type + assert_(res is tgt) + + def test_result_values(self): + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + tgt = [rf(d) for d in _rdat] + res = nf(_ndat, axis=1) + assert_almost_equal(res, tgt) + + @pytest.mark.parametrize("axis", [None, 0, 1]) + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + @pytest.mark.parametrize("array", [ + np.array(np.nan), + np.full((3, 3), np.nan), + ], ids=["0d", "2d"]) + def test_allnans(self, axis, dtype, array): + if axis is not None and array.ndim == 0: + pytest.skip(f"`axis != None` not supported for 0d arrays") + + array = array.astype(dtype) + match = "All-NaN slice encountered" + for func in self.nanfuncs: + with pytest.warns(RuntimeWarning, match=match): + out = func(array, axis=axis) + assert np.isnan(out).all() + assert out.dtype == array.dtype + + def test_masked(self): + mat = np.ma.fix_invalid(_ndat) + msk = mat._mask.copy() + for f in [np.nanmin]: + res = f(mat, axis=1) + tgt = f(_ndat, axis=1) + assert_equal(res, tgt) + assert_equal(mat._mask, msk) + assert_(not np.isinf(mat).any()) + + def test_scalar(self): + for f in self.nanfuncs: + assert_(f(0.) == 0.) + + def test_subclass(self): + class MyNDArray(np.ndarray): + pass + + # Check that it works and that type and + # shape are preserved + mine = np.eye(3).view(MyNDArray) + for f in self.nanfuncs: + res = f(mine, axis=0) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == (3,)) + res = f(mine, axis=1) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == (3,)) + res = f(mine) + assert_(res.shape == ()) + + # check that rows of nan are dealt with for subclasses (#4628) + mine[1] = np.nan + for f in self.nanfuncs: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + res = f(mine, axis=0) + assert_(isinstance(res, MyNDArray)) + assert_(not np.any(np.isnan(res))) + assert_(len(w) == 0) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + res = f(mine, axis=1) + assert_(isinstance(res, MyNDArray)) + assert_(np.isnan(res[1]) and not np.isnan(res[0]) + and not np.isnan(res[2])) + assert_(len(w) == 1, 'no warning raised') + assert_(issubclass(w[0].category, RuntimeWarning)) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + res = f(mine) + assert_(res.shape == ()) + assert_(res != np.nan) + assert_(len(w) == 0) + + def test_object_array(self): + arr = np.array([[1.0, 2.0], [np.nan, 4.0], [np.nan, np.nan]], dtype=object) + assert_equal(np.nanmin(arr), 1.0) + assert_equal(np.nanmin(arr, axis=0), [1.0, 2.0]) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + # assert_equal does not work on object arrays of nan + assert_equal(list(np.nanmin(arr, axis=1)), [1.0, 4.0, np.nan]) + assert_(len(w) == 1, 'no warning raised') + assert_(issubclass(w[0].category, RuntimeWarning)) + + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + def test_initial(self, dtype): + class MyNDArray(np.ndarray): + pass + + ar = np.arange(9).astype(dtype) + ar[:5] = np.nan + + for f in self.nanfuncs: + initial = 100 if f is np.nanmax else 0 + + ret1 = f(ar, initial=initial) + assert ret1.dtype == dtype + assert ret1 == initial + + ret2 = f(ar.view(MyNDArray), initial=initial) + assert ret2.dtype == dtype + assert ret2 == initial + + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + def test_where(self, dtype): + class MyNDArray(np.ndarray): + pass + + ar = np.arange(9).reshape(3, 3).astype(dtype) + ar[0, :] = np.nan + where = np.ones_like(ar, dtype=np.bool_) + where[:, 0] = False + + for f in self.nanfuncs: + reference = 4 if f is np.nanmin else 8 + + ret1 = f(ar, where=where, initial=5) + assert ret1.dtype == dtype + assert ret1 == reference + + ret2 = f(ar.view(MyNDArray), where=where, initial=5) + assert ret2.dtype == dtype + assert ret2 == reference + + +class TestNanFunctions_ArgminArgmax: + + nanfuncs = [np.nanargmin, np.nanargmax] + + def test_mutation(self): + # Check that passed array is not modified. + ndat = _ndat.copy() + for f in self.nanfuncs: + f(ndat) + assert_equal(ndat, _ndat) + + def test_result_values(self): + for f, fcmp in zip(self.nanfuncs, [np.greater, np.less]): + for row in _ndat: + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in") + ind = f(row) + val = row[ind] + # comparing with NaN is tricky as the result + # is always false except for NaN != NaN + assert_(not np.isnan(val)) + assert_(not fcmp(val, row).any()) + assert_(not np.equal(val, row[:ind]).any()) + + @pytest.mark.parametrize("axis", [None, 0, 1]) + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + @pytest.mark.parametrize("array", [ + np.array(np.nan), + np.full((3, 3), np.nan), + ], ids=["0d", "2d"]) + def test_allnans(self, axis, dtype, array): + if axis is not None and array.ndim == 0: + pytest.skip(f"`axis != None` not supported for 0d arrays") + + array = array.astype(dtype) + for func in self.nanfuncs: + with pytest.raises(ValueError, match="All-NaN slice encountered"): + func(array, axis=axis) + + def test_empty(self): + mat = np.zeros((0, 3)) + for f in self.nanfuncs: + for axis in [0, None]: + assert_raises(ValueError, f, mat, axis=axis) + for axis in [1]: + res = f(mat, axis=axis) + assert_equal(res, np.zeros(0)) + + def test_scalar(self): + for f in self.nanfuncs: + assert_(f(0.) == 0.) + + def test_subclass(self): + class MyNDArray(np.ndarray): + pass + + # Check that it works and that type and + # shape are preserved + mine = np.eye(3).view(MyNDArray) + for f in self.nanfuncs: + res = f(mine, axis=0) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == (3,)) + res = f(mine, axis=1) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == (3,)) + res = f(mine) + assert_(res.shape == ()) + + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + def test_keepdims(self, dtype): + ar = np.arange(9).astype(dtype) + ar[:5] = np.nan + + for f in self.nanfuncs: + reference = 5 if f is np.nanargmin else 8 + ret = f(ar, keepdims=True) + assert ret.ndim == ar.ndim + assert ret == reference + + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + def test_out(self, dtype): + ar = np.arange(9).astype(dtype) + ar[:5] = np.nan + + for f in self.nanfuncs: + out = np.zeros((), dtype=np.intp) + reference = 5 if f is np.nanargmin else 8 + ret = f(ar, out=out) + assert ret is out + assert ret == reference + + + +_TEST_ARRAYS = { + "0d": np.array(5), + "1d": np.array([127, 39, 93, 87, 46]) +} +for _v in _TEST_ARRAYS.values(): + _v.setflags(write=False) + + +@pytest.mark.parametrize( + "dtype", + np.typecodes["AllInteger"] + np.typecodes["AllFloat"] + "O", +) +@pytest.mark.parametrize("mat", _TEST_ARRAYS.values(), ids=_TEST_ARRAYS.keys()) +class TestNanFunctions_NumberTypes: + nanfuncs = { + np.nanmin: np.min, + np.nanmax: np.max, + np.nanargmin: np.argmin, + np.nanargmax: np.argmax, + np.nansum: np.sum, + np.nanprod: np.prod, + np.nancumsum: np.cumsum, + np.nancumprod: np.cumprod, + np.nanmean: np.mean, + np.nanmedian: np.median, + np.nanvar: np.var, + np.nanstd: np.std, + } + nanfunc_ids = [i.__name__ for i in nanfuncs] + + @pytest.mark.parametrize("nanfunc,func", nanfuncs.items(), ids=nanfunc_ids) + @np.errstate(over="ignore") + def test_nanfunc(self, mat, dtype, nanfunc, func): + mat = mat.astype(dtype) + tgt = func(mat) + out = nanfunc(mat) + + assert_almost_equal(out, tgt) + if dtype == "O": + assert type(out) is type(tgt) + else: + assert out.dtype == tgt.dtype + + @pytest.mark.parametrize( + "nanfunc,func", + [(np.nanquantile, np.quantile), (np.nanpercentile, np.percentile)], + ids=["nanquantile", "nanpercentile"], + ) + def test_nanfunc_q(self, mat, dtype, nanfunc, func): + mat = mat.astype(dtype) + tgt = func(mat, q=1) + out = nanfunc(mat, q=1) + + assert_almost_equal(out, tgt) + if dtype == "O": + assert type(out) is type(tgt) + else: + assert out.dtype == tgt.dtype + + @pytest.mark.parametrize( + "nanfunc,func", + [(np.nanvar, np.var), (np.nanstd, np.std)], + ids=["nanvar", "nanstd"], + ) + def test_nanfunc_ddof(self, mat, dtype, nanfunc, func): + mat = mat.astype(dtype) + tgt = func(mat, ddof=0.5) + out = nanfunc(mat, ddof=0.5) + + assert_almost_equal(out, tgt) + if dtype == "O": + assert type(out) is type(tgt) + else: + assert out.dtype == tgt.dtype + + +class SharedNanFunctionsTestsMixin: + def test_mutation(self): + # Check that passed array is not modified. + ndat = _ndat.copy() + for f in self.nanfuncs: + f(ndat) + assert_equal(ndat, _ndat) + + def test_keepdims(self): + mat = np.eye(3) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for axis in [None, 0, 1]: + tgt = rf(mat, axis=axis, keepdims=True) + res = nf(mat, axis=axis, keepdims=True) + assert_(res.ndim == tgt.ndim) + + def test_out(self): + mat = np.eye(3) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + resout = np.zeros(3) + tgt = rf(mat, axis=1) + res = nf(mat, axis=1, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + + def test_dtype_from_dtype(self): + mat = np.eye(3) + codes = 'efdgFDG' + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for c in codes: + with suppress_warnings() as sup: + if nf in {np.nanstd, np.nanvar} and c in 'FDG': + # Giving the warning is a small bug, see gh-8000 + sup.filter(np.ComplexWarning) + tgt = rf(mat, dtype=np.dtype(c), axis=1).dtype.type + res = nf(mat, dtype=np.dtype(c), axis=1).dtype.type + assert_(res is tgt) + # scalar case + tgt = rf(mat, dtype=np.dtype(c), axis=None).dtype.type + res = nf(mat, dtype=np.dtype(c), axis=None).dtype.type + assert_(res is tgt) + + def test_dtype_from_char(self): + mat = np.eye(3) + codes = 'efdgFDG' + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for c in codes: + with suppress_warnings() as sup: + if nf in {np.nanstd, np.nanvar} and c in 'FDG': + # Giving the warning is a small bug, see gh-8000 + sup.filter(np.ComplexWarning) + tgt = rf(mat, dtype=c, axis=1).dtype.type + res = nf(mat, dtype=c, axis=1).dtype.type + assert_(res is tgt) + # scalar case + tgt = rf(mat, dtype=c, axis=None).dtype.type + res = nf(mat, dtype=c, axis=None).dtype.type + assert_(res is tgt) + + def test_dtype_from_input(self): + codes = 'efdgFDG' + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for c in codes: + mat = np.eye(3, dtype=c) + tgt = rf(mat, axis=1).dtype.type + res = nf(mat, axis=1).dtype.type + assert_(res is tgt, "res %s, tgt %s" % (res, tgt)) + # scalar case + tgt = rf(mat, axis=None).dtype.type + res = nf(mat, axis=None).dtype.type + assert_(res is tgt) + + def test_result_values(self): + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + tgt = [rf(d) for d in _rdat] + res = nf(_ndat, axis=1) + assert_almost_equal(res, tgt) + + def test_scalar(self): + for f in self.nanfuncs: + assert_(f(0.) == 0.) + + def test_subclass(self): + class MyNDArray(np.ndarray): + pass + + # Check that it works and that type and + # shape are preserved + array = np.eye(3) + mine = array.view(MyNDArray) + for f in self.nanfuncs: + expected_shape = f(array, axis=0).shape + res = f(mine, axis=0) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == expected_shape) + expected_shape = f(array, axis=1).shape + res = f(mine, axis=1) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == expected_shape) + expected_shape = f(array).shape + res = f(mine) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == expected_shape) + + +class TestNanFunctions_SumProd(SharedNanFunctionsTestsMixin): + + nanfuncs = [np.nansum, np.nanprod] + stdfuncs = [np.sum, np.prod] + + @pytest.mark.parametrize("axis", [None, 0, 1]) + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + @pytest.mark.parametrize("array", [ + np.array(np.nan), + np.full((3, 3), np.nan), + ], ids=["0d", "2d"]) + def test_allnans(self, axis, dtype, array): + if axis is not None and array.ndim == 0: + pytest.skip(f"`axis != None` not supported for 0d arrays") + + array = array.astype(dtype) + for func, identity in zip(self.nanfuncs, [0, 1]): + out = func(array, axis=axis) + assert np.all(out == identity) + assert out.dtype == array.dtype + + def test_empty(self): + for f, tgt_value in zip([np.nansum, np.nanprod], [0, 1]): + mat = np.zeros((0, 3)) + tgt = [tgt_value]*3 + res = f(mat, axis=0) + assert_equal(res, tgt) + tgt = [] + res = f(mat, axis=1) + assert_equal(res, tgt) + tgt = tgt_value + res = f(mat, axis=None) + assert_equal(res, tgt) + + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + def test_initial(self, dtype): + ar = np.arange(9).astype(dtype) + ar[:5] = np.nan + + for f in self.nanfuncs: + reference = 28 if f is np.nansum else 3360 + ret = f(ar, initial=2) + assert ret.dtype == dtype + assert ret == reference + + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + def test_where(self, dtype): + ar = np.arange(9).reshape(3, 3).astype(dtype) + ar[0, :] = np.nan + where = np.ones_like(ar, dtype=np.bool_) + where[:, 0] = False + + for f in self.nanfuncs: + reference = 26 if f is np.nansum else 2240 + ret = f(ar, where=where, initial=2) + assert ret.dtype == dtype + assert ret == reference + + +class TestNanFunctions_CumSumProd(SharedNanFunctionsTestsMixin): + + nanfuncs = [np.nancumsum, np.nancumprod] + stdfuncs = [np.cumsum, np.cumprod] + + @pytest.mark.parametrize("axis", [None, 0, 1]) + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + @pytest.mark.parametrize("array", [ + np.array(np.nan), + np.full((3, 3), np.nan) + ], ids=["0d", "2d"]) + def test_allnans(self, axis, dtype, array): + if axis is not None and array.ndim == 0: + pytest.skip(f"`axis != None` not supported for 0d arrays") + + array = array.astype(dtype) + for func, identity in zip(self.nanfuncs, [0, 1]): + out = func(array) + assert np.all(out == identity) + assert out.dtype == array.dtype + + def test_empty(self): + for f, tgt_value in zip(self.nanfuncs, [0, 1]): + mat = np.zeros((0, 3)) + tgt = tgt_value*np.ones((0, 3)) + res = f(mat, axis=0) + assert_equal(res, tgt) + tgt = mat + res = f(mat, axis=1) + assert_equal(res, tgt) + tgt = np.zeros((0)) + res = f(mat, axis=None) + assert_equal(res, tgt) + + def test_keepdims(self): + for f, g in zip(self.nanfuncs, self.stdfuncs): + mat = np.eye(3) + for axis in [None, 0, 1]: + tgt = f(mat, axis=axis, out=None) + res = g(mat, axis=axis, out=None) + assert_(res.ndim == tgt.ndim) + + for f in self.nanfuncs: + d = np.ones((3, 5, 7, 11)) + # Randomly set some elements to NaN: + rs = np.random.RandomState(0) + d[rs.rand(*d.shape) < 0.5] = np.nan + res = f(d, axis=None) + assert_equal(res.shape, (1155,)) + for axis in np.arange(4): + res = f(d, axis=axis) + assert_equal(res.shape, (3, 5, 7, 11)) + + def test_result_values(self): + for axis in (-2, -1, 0, 1, None): + tgt = np.cumprod(_ndat_ones, axis=axis) + res = np.nancumprod(_ndat, axis=axis) + assert_almost_equal(res, tgt) + tgt = np.cumsum(_ndat_zeros,axis=axis) + res = np.nancumsum(_ndat, axis=axis) + assert_almost_equal(res, tgt) + + def test_out(self): + mat = np.eye(3) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + resout = np.eye(3) + for axis in (-2, -1, 0, 1): + tgt = rf(mat, axis=axis) + res = nf(mat, axis=axis, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + + +class TestNanFunctions_MeanVarStd(SharedNanFunctionsTestsMixin): + + nanfuncs = [np.nanmean, np.nanvar, np.nanstd] + stdfuncs = [np.mean, np.var, np.std] + + def test_dtype_error(self): + for f in self.nanfuncs: + for dtype in [np.bool_, np.int_, np.object_]: + assert_raises(TypeError, f, _ndat, axis=1, dtype=dtype) + + def test_out_dtype_error(self): + for f in self.nanfuncs: + for dtype in [np.bool_, np.int_, np.object_]: + out = np.empty(_ndat.shape[0], dtype=dtype) + assert_raises(TypeError, f, _ndat, axis=1, out=out) + + def test_ddof(self): + nanfuncs = [np.nanvar, np.nanstd] + stdfuncs = [np.var, np.std] + for nf, rf in zip(nanfuncs, stdfuncs): + for ddof in [0, 1]: + tgt = [rf(d, ddof=ddof) for d in _rdat] + res = nf(_ndat, axis=1, ddof=ddof) + assert_almost_equal(res, tgt) + + def test_ddof_too_big(self): + nanfuncs = [np.nanvar, np.nanstd] + stdfuncs = [np.var, np.std] + dsize = [len(d) for d in _rdat] + for nf, rf in zip(nanfuncs, stdfuncs): + for ddof in range(5): + with suppress_warnings() as sup: + sup.record(RuntimeWarning) + sup.filter(np.ComplexWarning) + tgt = [ddof >= d for d in dsize] + res = nf(_ndat, axis=1, ddof=ddof) + assert_equal(np.isnan(res), tgt) + if any(tgt): + assert_(len(sup.log) == 1) + else: + assert_(len(sup.log) == 0) + + @pytest.mark.parametrize("axis", [None, 0, 1]) + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + @pytest.mark.parametrize("array", [ + np.array(np.nan), + np.full((3, 3), np.nan), + ], ids=["0d", "2d"]) + def test_allnans(self, axis, dtype, array): + if axis is not None and array.ndim == 0: + pytest.skip(f"`axis != None` not supported for 0d arrays") + + array = array.astype(dtype) + match = "(Degrees of freedom <= 0 for slice.)|(Mean of empty slice)" + for func in self.nanfuncs: + with pytest.warns(RuntimeWarning, match=match): + out = func(array, axis=axis) + assert np.isnan(out).all() + + # `nanvar` and `nanstd` convert complex inputs to their + # corresponding floating dtype + if func is np.nanmean: + assert out.dtype == array.dtype + else: + assert out.dtype == np.abs(array).dtype + + def test_empty(self): + mat = np.zeros((0, 3)) + for f in self.nanfuncs: + for axis in [0, None]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_(np.isnan(f(mat, axis=axis)).all()) + assert_(len(w) == 1) + assert_(issubclass(w[0].category, RuntimeWarning)) + for axis in [1]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_equal(f(mat, axis=axis), np.zeros([])) + assert_(len(w) == 0) + + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + def test_where(self, dtype): + ar = np.arange(9).reshape(3, 3).astype(dtype) + ar[0, :] = np.nan + where = np.ones_like(ar, dtype=np.bool_) + where[:, 0] = False + + for f, f_std in zip(self.nanfuncs, self.stdfuncs): + reference = f_std(ar[where][2:]) + dtype_reference = dtype if f is np.nanmean else ar.real.dtype + + ret = f(ar, where=where) + assert ret.dtype == dtype_reference + np.testing.assert_allclose(ret, reference) + + +_TIME_UNITS = ( + "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "ns", "ps", "fs", "as" +) + +# All `inexact` + `timdelta64` type codes +_TYPE_CODES = list(np.typecodes["AllFloat"]) +_TYPE_CODES += [f"m8[{unit}]" for unit in _TIME_UNITS] + + +class TestNanFunctions_Median: + + def test_mutation(self): + # Check that passed array is not modified. + ndat = _ndat.copy() + np.nanmedian(ndat) + assert_equal(ndat, _ndat) + + def test_keepdims(self): + mat = np.eye(3) + for axis in [None, 0, 1]: + tgt = np.median(mat, axis=axis, out=None, overwrite_input=False) + res = np.nanmedian(mat, axis=axis, out=None, overwrite_input=False) + assert_(res.ndim == tgt.ndim) + + d = np.ones((3, 5, 7, 11)) + # Randomly set some elements to NaN: + w = np.random.random((4, 200)) * np.array(d.shape)[:, None] + w = w.astype(np.intp) + d[tuple(w)] = np.nan + with suppress_warnings() as sup: + sup.filter(RuntimeWarning) + res = np.nanmedian(d, axis=None, keepdims=True) + assert_equal(res.shape, (1, 1, 1, 1)) + res = np.nanmedian(d, axis=(0, 1), keepdims=True) + assert_equal(res.shape, (1, 1, 7, 11)) + res = np.nanmedian(d, axis=(0, 3), keepdims=True) + assert_equal(res.shape, (1, 5, 7, 1)) + res = np.nanmedian(d, axis=(1,), keepdims=True) + assert_equal(res.shape, (3, 1, 7, 11)) + res = np.nanmedian(d, axis=(0, 1, 2, 3), keepdims=True) + assert_equal(res.shape, (1, 1, 1, 1)) + res = np.nanmedian(d, axis=(0, 1, 3), keepdims=True) + assert_equal(res.shape, (1, 1, 7, 1)) + + def test_out(self): + mat = np.random.rand(3, 3) + nan_mat = np.insert(mat, [0, 2], np.nan, axis=1) + resout = np.zeros(3) + tgt = np.median(mat, axis=1) + res = np.nanmedian(nan_mat, axis=1, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + # 0-d output: + resout = np.zeros(()) + tgt = np.median(mat, axis=None) + res = np.nanmedian(nan_mat, axis=None, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + res = np.nanmedian(nan_mat, axis=(0, 1), out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + + def test_small_large(self): + # test the small and large code paths, current cutoff 400 elements + for s in [5, 20, 51, 200, 1000]: + d = np.random.randn(4, s) + # Randomly set some elements to NaN: + w = np.random.randint(0, d.size, size=d.size // 5) + d.ravel()[w] = np.nan + d[:,0] = 1. # ensure at least one good value + # use normal median without nans to compare + tgt = [] + for x in d: + nonan = np.compress(~np.isnan(x), x) + tgt.append(np.median(nonan, overwrite_input=True)) + + assert_array_equal(np.nanmedian(d, axis=-1), tgt) + + def test_result_values(self): + tgt = [np.median(d) for d in _rdat] + res = np.nanmedian(_ndat, axis=1) + assert_almost_equal(res, tgt) + + @pytest.mark.parametrize("axis", [None, 0, 1]) + @pytest.mark.parametrize("dtype", _TYPE_CODES) + def test_allnans(self, dtype, axis): + mat = np.full((3, 3), np.nan).astype(dtype) + with suppress_warnings() as sup: + sup.record(RuntimeWarning) + + output = np.nanmedian(mat, axis=axis) + assert output.dtype == mat.dtype + assert np.isnan(output).all() + + if axis is None: + assert_(len(sup.log) == 1) + else: + assert_(len(sup.log) == 3) + + # Check scalar + scalar = np.array(np.nan).astype(dtype)[()] + output_scalar = np.nanmedian(scalar) + assert output_scalar.dtype == scalar.dtype + assert np.isnan(output_scalar) + + if axis is None: + assert_(len(sup.log) == 2) + else: + assert_(len(sup.log) == 4) + + def test_empty(self): + mat = np.zeros((0, 3)) + for axis in [0, None]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_(np.isnan(np.nanmedian(mat, axis=axis)).all()) + assert_(len(w) == 1) + assert_(issubclass(w[0].category, RuntimeWarning)) + for axis in [1]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_equal(np.nanmedian(mat, axis=axis), np.zeros([])) + assert_(len(w) == 0) + + def test_scalar(self): + assert_(np.nanmedian(0.) == 0.) + + def test_extended_axis_invalid(self): + d = np.ones((3, 5, 7, 11)) + assert_raises(np.AxisError, np.nanmedian, d, axis=-5) + assert_raises(np.AxisError, np.nanmedian, d, axis=(0, -5)) + assert_raises(np.AxisError, np.nanmedian, d, axis=4) + assert_raises(np.AxisError, np.nanmedian, d, axis=(0, 4)) + assert_raises(ValueError, np.nanmedian, d, axis=(1, 1)) + + def test_float_special(self): + with suppress_warnings() as sup: + sup.filter(RuntimeWarning) + for inf in [np.inf, -np.inf]: + a = np.array([[inf, np.nan], [np.nan, np.nan]]) + assert_equal(np.nanmedian(a, axis=0), [inf, np.nan]) + assert_equal(np.nanmedian(a, axis=1), [inf, np.nan]) + assert_equal(np.nanmedian(a), inf) + + # minimum fill value check + a = np.array([[np.nan, np.nan, inf], + [np.nan, np.nan, inf]]) + assert_equal(np.nanmedian(a), inf) + assert_equal(np.nanmedian(a, axis=0), [np.nan, np.nan, inf]) + assert_equal(np.nanmedian(a, axis=1), inf) + + # no mask path + a = np.array([[inf, inf], [inf, inf]]) + assert_equal(np.nanmedian(a, axis=1), inf) + + a = np.array([[inf, 7, -inf, -9], + [-10, np.nan, np.nan, 5], + [4, np.nan, np.nan, inf]], + dtype=np.float32) + if inf > 0: + assert_equal(np.nanmedian(a, axis=0), [4., 7., -inf, 5.]) + assert_equal(np.nanmedian(a), 4.5) + else: + assert_equal(np.nanmedian(a, axis=0), [-10., 7., -inf, -9.]) + assert_equal(np.nanmedian(a), -2.5) + assert_equal(np.nanmedian(a, axis=-1), [-1., -2.5, inf]) + + for i in range(0, 10): + for j in range(1, 10): + a = np.array([([np.nan] * i) + ([inf] * j)] * 2) + assert_equal(np.nanmedian(a), inf) + assert_equal(np.nanmedian(a, axis=1), inf) + assert_equal(np.nanmedian(a, axis=0), + ([np.nan] * i) + [inf] * j) + + a = np.array([([np.nan] * i) + ([-inf] * j)] * 2) + assert_equal(np.nanmedian(a), -inf) + assert_equal(np.nanmedian(a, axis=1), -inf) + assert_equal(np.nanmedian(a, axis=0), + ([np.nan] * i) + [-inf] * j) + + +class TestNanFunctions_Percentile: + + def test_mutation(self): + # Check that passed array is not modified. + ndat = _ndat.copy() + np.nanpercentile(ndat, 30) + assert_equal(ndat, _ndat) + + def test_keepdims(self): + mat = np.eye(3) + for axis in [None, 0, 1]: + tgt = np.percentile(mat, 70, axis=axis, out=None, + overwrite_input=False) + res = np.nanpercentile(mat, 70, axis=axis, out=None, + overwrite_input=False) + assert_(res.ndim == tgt.ndim) + + d = np.ones((3, 5, 7, 11)) + # Randomly set some elements to NaN: + w = np.random.random((4, 200)) * np.array(d.shape)[:, None] + w = w.astype(np.intp) + d[tuple(w)] = np.nan + with suppress_warnings() as sup: + sup.filter(RuntimeWarning) + res = np.nanpercentile(d, 90, axis=None, keepdims=True) + assert_equal(res.shape, (1, 1, 1, 1)) + res = np.nanpercentile(d, 90, axis=(0, 1), keepdims=True) + assert_equal(res.shape, (1, 1, 7, 11)) + res = np.nanpercentile(d, 90, axis=(0, 3), keepdims=True) + assert_equal(res.shape, (1, 5, 7, 1)) + res = np.nanpercentile(d, 90, axis=(1,), keepdims=True) + assert_equal(res.shape, (3, 1, 7, 11)) + res = np.nanpercentile(d, 90, axis=(0, 1, 2, 3), keepdims=True) + assert_equal(res.shape, (1, 1, 1, 1)) + res = np.nanpercentile(d, 90, axis=(0, 1, 3), keepdims=True) + assert_equal(res.shape, (1, 1, 7, 1)) + + def test_out(self): + mat = np.random.rand(3, 3) + nan_mat = np.insert(mat, [0, 2], np.nan, axis=1) + resout = np.zeros(3) + tgt = np.percentile(mat, 42, axis=1) + res = np.nanpercentile(nan_mat, 42, axis=1, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + # 0-d output: + resout = np.zeros(()) + tgt = np.percentile(mat, 42, axis=None) + res = np.nanpercentile(nan_mat, 42, axis=None, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + res = np.nanpercentile(nan_mat, 42, axis=(0, 1), out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + + def test_result_values(self): + tgt = [np.percentile(d, 28) for d in _rdat] + res = np.nanpercentile(_ndat, 28, axis=1) + assert_almost_equal(res, tgt) + # Transpose the array to fit the output convention of numpy.percentile + tgt = np.transpose([np.percentile(d, (28, 98)) for d in _rdat]) + res = np.nanpercentile(_ndat, (28, 98), axis=1) + assert_almost_equal(res, tgt) + + @pytest.mark.parametrize("axis", [None, 0, 1]) + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + @pytest.mark.parametrize("array", [ + np.array(np.nan), + np.full((3, 3), np.nan), + ], ids=["0d", "2d"]) + def test_allnans(self, axis, dtype, array): + if axis is not None and array.ndim == 0: + pytest.skip(f"`axis != None` not supported for 0d arrays") + + array = array.astype(dtype) + with pytest.warns(RuntimeWarning, match="All-NaN slice encountered"): + out = np.nanpercentile(array, 60, axis=axis) + assert np.isnan(out).all() + assert out.dtype == array.dtype + + def test_empty(self): + mat = np.zeros((0, 3)) + for axis in [0, None]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_(np.isnan(np.nanpercentile(mat, 40, axis=axis)).all()) + assert_(len(w) == 1) + assert_(issubclass(w[0].category, RuntimeWarning)) + for axis in [1]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_equal(np.nanpercentile(mat, 40, axis=axis), np.zeros([])) + assert_(len(w) == 0) + + def test_scalar(self): + assert_equal(np.nanpercentile(0., 100), 0.) + a = np.arange(6) + r = np.nanpercentile(a, 50, axis=0) + assert_equal(r, 2.5) + assert_(np.isscalar(r)) + + def test_extended_axis_invalid(self): + d = np.ones((3, 5, 7, 11)) + assert_raises(np.AxisError, np.nanpercentile, d, q=5, axis=-5) + assert_raises(np.AxisError, np.nanpercentile, d, q=5, axis=(0, -5)) + assert_raises(np.AxisError, np.nanpercentile, d, q=5, axis=4) + assert_raises(np.AxisError, np.nanpercentile, d, q=5, axis=(0, 4)) + assert_raises(ValueError, np.nanpercentile, d, q=5, axis=(1, 1)) + + def test_multiple_percentiles(self): + perc = [50, 100] + mat = np.ones((4, 3)) + nan_mat = np.nan * mat + # For checking consistency in higher dimensional case + large_mat = np.ones((3, 4, 5)) + large_mat[:, 0:2:4, :] = 0 + large_mat[:, :, 3:] *= 2 + for axis in [None, 0, 1]: + for keepdim in [False, True]: + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "All-NaN slice encountered") + val = np.percentile(mat, perc, axis=axis, keepdims=keepdim) + nan_val = np.nanpercentile(nan_mat, perc, axis=axis, + keepdims=keepdim) + assert_equal(nan_val.shape, val.shape) + + val = np.percentile(large_mat, perc, axis=axis, + keepdims=keepdim) + nan_val = np.nanpercentile(large_mat, perc, axis=axis, + keepdims=keepdim) + assert_equal(nan_val, val) + + megamat = np.ones((3, 4, 5, 6)) + assert_equal(np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6)) + + +class TestNanFunctions_Quantile: + # most of this is already tested by TestPercentile + + def test_regression(self): + ar = np.arange(24).reshape(2, 3, 4).astype(float) + ar[0][1] = np.nan + + assert_equal(np.nanquantile(ar, q=0.5), np.nanpercentile(ar, q=50)) + assert_equal(np.nanquantile(ar, q=0.5, axis=0), + np.nanpercentile(ar, q=50, axis=0)) + assert_equal(np.nanquantile(ar, q=0.5, axis=1), + np.nanpercentile(ar, q=50, axis=1)) + assert_equal(np.nanquantile(ar, q=[0.5], axis=1), + np.nanpercentile(ar, q=[50], axis=1)) + assert_equal(np.nanquantile(ar, q=[0.25, 0.5, 0.75], axis=1), + np.nanpercentile(ar, q=[25, 50, 75], axis=1)) + + def test_basic(self): + x = np.arange(8) * 0.5 + assert_equal(np.nanquantile(x, 0), 0.) + assert_equal(np.nanquantile(x, 1), 3.5) + assert_equal(np.nanquantile(x, 0.5), 1.75) + + def test_no_p_overwrite(self): + # this is worth retesting, because quantile does not make a copy + p0 = np.array([0, 0.75, 0.25, 0.5, 1.0]) + p = p0.copy() + np.nanquantile(np.arange(100.), p, method="midpoint") + assert_array_equal(p, p0) + + p0 = p0.tolist() + p = p.tolist() + np.nanquantile(np.arange(100.), p, method="midpoint") + assert_array_equal(p, p0) + + @pytest.mark.parametrize("axis", [None, 0, 1]) + @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"]) + @pytest.mark.parametrize("array", [ + np.array(np.nan), + np.full((3, 3), np.nan), + ], ids=["0d", "2d"]) + def test_allnans(self, axis, dtype, array): + if axis is not None and array.ndim == 0: + pytest.skip(f"`axis != None` not supported for 0d arrays") + + array = array.astype(dtype) + with pytest.warns(RuntimeWarning, match="All-NaN slice encountered"): + out = np.nanquantile(array, 1, axis=axis) + assert np.isnan(out).all() + assert out.dtype == array.dtype + +@pytest.mark.parametrize("arr, expected", [ + # array of floats with some nans + (np.array([np.nan, 5.0, np.nan, np.inf]), + np.array([False, True, False, True])), + # int64 array that can't possibly have nans + (np.array([1, 5, 7, 9], dtype=np.int64), + True), + # bool array that can't possibly have nans + (np.array([False, True, False, True]), + True), + # 2-D complex array with nans + (np.array([[np.nan, 5.0], + [np.nan, np.inf]], dtype=np.complex64), + np.array([[False, True], + [False, True]])), + ]) +def test__nan_mask(arr, expected): + for out in [None, np.empty(arr.shape, dtype=np.bool_)]: + actual = _nan_mask(arr, out=out) + assert_equal(actual, expected) + # the above won't distinguish between True proper + # and an array of True values; we want True proper + # for types that can't possibly contain NaN + if type(expected) is not np.ndarray: + assert actual is True + + +def test__replace_nan(): + """ Test that _replace_nan returns the original array if there are no + NaNs, not a copy. + """ + for dtype in [np.bool_, np.int32, np.int64]: + arr = np.array([0, 1], dtype=dtype) + result, mask = _replace_nan(arr, 0) + assert mask is None + # do not make a copy if there are no nans + assert result is arr + + for dtype in [np.float32, np.float64]: + arr = np.array([0, 1], dtype=dtype) + result, mask = _replace_nan(arr, 2) + assert (mask == False).all() + # mask is not None, so we make a copy + assert result is not arr + assert_equal(result, arr) + + arr_nan = np.array([0, 1, np.nan], dtype=dtype) + result_nan, mask_nan = _replace_nan(arr_nan, 2) + assert_equal(mask_nan, np.array([False, False, True])) + assert result_nan is not arr_nan + assert_equal(result_nan, np.array([0, 1, 2])) + assert np.isnan(arr_nan[-1]) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_packbits.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_packbits.py new file mode 100644 index 0000000..5b07f41 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_packbits.py @@ -0,0 +1,376 @@ +import numpy as np +from numpy.testing import assert_array_equal, assert_equal, assert_raises +import pytest +from itertools import chain + +def test_packbits(): + # Copied from the docstring. + a = [[[1, 0, 1], [0, 1, 0]], + [[1, 1, 0], [0, 0, 1]]] + for dt in '?bBhHiIlLqQ': + arr = np.array(a, dtype=dt) + b = np.packbits(arr, axis=-1) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, np.array([[[160], [64]], [[192], [32]]])) + + assert_raises(TypeError, np.packbits, np.array(a, dtype=float)) + + +def test_packbits_empty(): + shapes = [ + (0,), (10, 20, 0), (10, 0, 20), (0, 10, 20), (20, 0, 0), (0, 20, 0), + (0, 0, 20), (0, 0, 0), + ] + for dt in '?bBhHiIlLqQ': + for shape in shapes: + a = np.empty(shape, dtype=dt) + b = np.packbits(a) + assert_equal(b.dtype, np.uint8) + assert_equal(b.shape, (0,)) + + +def test_packbits_empty_with_axis(): + # Original shapes and lists of packed shapes for different axes. + shapes = [ + ((0,), [(0,)]), + ((10, 20, 0), [(2, 20, 0), (10, 3, 0), (10, 20, 0)]), + ((10, 0, 20), [(2, 0, 20), (10, 0, 20), (10, 0, 3)]), + ((0, 10, 20), [(0, 10, 20), (0, 2, 20), (0, 10, 3)]), + ((20, 0, 0), [(3, 0, 0), (20, 0, 0), (20, 0, 0)]), + ((0, 20, 0), [(0, 20, 0), (0, 3, 0), (0, 20, 0)]), + ((0, 0, 20), [(0, 0, 20), (0, 0, 20), (0, 0, 3)]), + ((0, 0, 0), [(0, 0, 0), (0, 0, 0), (0, 0, 0)]), + ] + for dt in '?bBhHiIlLqQ': + for in_shape, out_shapes in shapes: + for ax, out_shape in enumerate(out_shapes): + a = np.empty(in_shape, dtype=dt) + b = np.packbits(a, axis=ax) + assert_equal(b.dtype, np.uint8) + assert_equal(b.shape, out_shape) + +@pytest.mark.parametrize('bitorder', ('little', 'big')) +def test_packbits_large(bitorder): + # test data large enough for 16 byte vectorization + a = np.array([1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, + 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, + 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, + 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, + 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, + 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, + 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, + 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, + 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, + 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, + 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, + 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, + 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0]) + a = a.repeat(3) + for dtype in '?bBhHiIlLqQ': + arr = np.array(a, dtype=dtype) + b = np.packbits(arr, axis=None, bitorder=bitorder) + assert_equal(b.dtype, np.uint8) + r = [252, 127, 192, 3, 254, 7, 252, 0, 7, 31, 240, 0, 28, 1, 255, 252, + 113, 248, 3, 255, 192, 28, 15, 192, 28, 126, 0, 224, 127, 255, + 227, 142, 7, 31, 142, 63, 28, 126, 56, 227, 240, 0, 227, 128, 63, + 224, 14, 56, 252, 112, 56, 255, 241, 248, 3, 240, 56, 224, 112, + 63, 255, 255, 199, 224, 14, 0, 31, 143, 192, 3, 255, 199, 0, 1, + 255, 224, 1, 255, 252, 126, 63, 0, 1, 192, 252, 14, 63, 0, 15, + 199, 252, 113, 255, 3, 128, 56, 252, 14, 7, 0, 113, 255, 255, 142, 56, 227, + 129, 248, 227, 129, 199, 31, 128] + if bitorder == 'big': + assert_array_equal(b, r) + # equal for size being multiple of 8 + assert_array_equal(np.unpackbits(b, bitorder=bitorder)[:-4], a) + + # check last byte of different remainders (16 byte vectorization) + b = [np.packbits(arr[:-i], axis=None)[-1] for i in range(1, 16)] + assert_array_equal(b, [128, 128, 128, 31, 30, 28, 24, 16, 0, 0, 0, 199, + 198, 196, 192]) + + + arr = arr.reshape(36, 25) + b = np.packbits(arr, axis=0) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, [[190, 186, 178, 178, 150, 215, 87, 83, 83, 195, + 199, 206, 204, 204, 140, 140, 136, 136, 8, 40, 105, + 107, 75, 74, 88], + [72, 216, 248, 241, 227, 195, 202, 90, 90, 83, + 83, 119, 127, 109, 73, 64, 208, 244, 189, 45, + 41, 104, 122, 90, 18], + [113, 120, 248, 216, 152, 24, 60, 52, 182, 150, + 150, 150, 146, 210, 210, 246, 255, 255, 223, + 151, 21, 17, 17, 131, 163], + [214, 210, 210, 64, 68, 5, 5, 1, 72, 88, 92, + 92, 78, 110, 39, 181, 149, 220, 222, 218, 218, + 202, 234, 170, 168], + [0, 128, 128, 192, 80, 112, 48, 160, 160, 224, + 240, 208, 144, 128, 160, 224, 240, 208, 144, + 144, 176, 240, 224, 192, 128]]) + + b = np.packbits(arr, axis=1) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, [[252, 127, 192, 0], + [ 7, 252, 15, 128], + [240, 0, 28, 0], + [255, 128, 0, 128], + [192, 31, 255, 128], + [142, 63, 0, 0], + [255, 240, 7, 0], + [ 7, 224, 14, 0], + [126, 0, 224, 0], + [255, 255, 199, 0], + [ 56, 28, 126, 0], + [113, 248, 227, 128], + [227, 142, 63, 0], + [ 0, 28, 112, 0], + [ 15, 248, 3, 128], + [ 28, 126, 56, 0], + [ 56, 255, 241, 128], + [240, 7, 224, 0], + [227, 129, 192, 128], + [255, 255, 254, 0], + [126, 0, 224, 0], + [ 3, 241, 248, 0], + [ 0, 255, 241, 128], + [128, 0, 255, 128], + [224, 1, 255, 128], + [248, 252, 126, 0], + [ 0, 7, 3, 128], + [224, 113, 248, 0], + [ 0, 252, 127, 128], + [142, 63, 224, 0], + [224, 14, 63, 0], + [ 7, 3, 128, 0], + [113, 255, 255, 128], + [ 28, 113, 199, 0], + [ 7, 227, 142, 0], + [ 14, 56, 252, 0]]) + + arr = arr.T.copy() + b = np.packbits(arr, axis=0) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, [[252, 7, 240, 255, 192, 142, 255, 7, 126, 255, + 56, 113, 227, 0, 15, 28, 56, 240, 227, 255, + 126, 3, 0, 128, 224, 248, 0, 224, 0, 142, 224, + 7, 113, 28, 7, 14], + [127, 252, 0, 128, 31, 63, 240, 224, 0, 255, + 28, 248, 142, 28, 248, 126, 255, 7, 129, 255, + 0, 241, 255, 0, 1, 252, 7, 113, 252, 63, 14, + 3, 255, 113, 227, 56], + [192, 15, 28, 0, 255, 0, 7, 14, 224, 199, 126, + 227, 63, 112, 3, 56, 241, 224, 192, 254, 224, + 248, 241, 255, 255, 126, 3, 248, 127, 224, 63, + 128, 255, 199, 142, 252], + [0, 128, 0, 128, 128, 0, 0, 0, 0, 0, 0, 128, 0, + 0, 128, 0, 128, 0, 128, 0, 0, 0, 128, 128, + 128, 0, 128, 0, 128, 0, 0, 0, 128, 0, 0, 0]]) + + b = np.packbits(arr, axis=1) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, [[190, 72, 113, 214, 0], + [186, 216, 120, 210, 128], + [178, 248, 248, 210, 128], + [178, 241, 216, 64, 192], + [150, 227, 152, 68, 80], + [215, 195, 24, 5, 112], + [ 87, 202, 60, 5, 48], + [ 83, 90, 52, 1, 160], + [ 83, 90, 182, 72, 160], + [195, 83, 150, 88, 224], + [199, 83, 150, 92, 240], + [206, 119, 150, 92, 208], + [204, 127, 146, 78, 144], + [204, 109, 210, 110, 128], + [140, 73, 210, 39, 160], + [140, 64, 246, 181, 224], + [136, 208, 255, 149, 240], + [136, 244, 255, 220, 208], + [ 8, 189, 223, 222, 144], + [ 40, 45, 151, 218, 144], + [105, 41, 21, 218, 176], + [107, 104, 17, 202, 240], + [ 75, 122, 17, 234, 224], + [ 74, 90, 131, 170, 192], + [ 88, 18, 163, 168, 128]]) + + + # result is the same if input is multiplied with a nonzero value + for dtype in 'bBhHiIlLqQ': + arr = np.array(a, dtype=dtype) + rnd = np.random.randint(low=np.iinfo(dtype).min, + high=np.iinfo(dtype).max, size=arr.size, + dtype=dtype) + rnd[rnd == 0] = 1 + arr *= rnd.astype(dtype) + b = np.packbits(arr, axis=-1) + assert_array_equal(np.unpackbits(b)[:-4], a) + + assert_raises(TypeError, np.packbits, np.array(a, dtype=float)) + + +def test_packbits_very_large(): + # test some with a larger arrays gh-8637 + # code is covered earlier but larger array makes crash on bug more likely + for s in range(950, 1050): + for dt in '?bBhHiIlLqQ': + x = np.ones((200, s), dtype=bool) + np.packbits(x, axis=1) + + +def test_unpackbits(): + # Copied from the docstring. + a = np.array([[2], [7], [23]], dtype=np.uint8) + b = np.unpackbits(a, axis=1) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, np.array([[0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 0, 1, 1, 1]])) + +def test_pack_unpack_order(): + a = np.array([[2], [7], [23]], dtype=np.uint8) + b = np.unpackbits(a, axis=1) + assert_equal(b.dtype, np.uint8) + b_little = np.unpackbits(a, axis=1, bitorder='little') + b_big = np.unpackbits(a, axis=1, bitorder='big') + assert_array_equal(b, b_big) + assert_array_equal(a, np.packbits(b_little, axis=1, bitorder='little')) + assert_array_equal(b[:,::-1], b_little) + assert_array_equal(a, np.packbits(b_big, axis=1, bitorder='big')) + assert_raises(ValueError, np.unpackbits, a, bitorder='r') + assert_raises(TypeError, np.unpackbits, a, bitorder=10) + + + +def test_unpackbits_empty(): + a = np.empty((0,), dtype=np.uint8) + b = np.unpackbits(a) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, np.empty((0,))) + + +def test_unpackbits_empty_with_axis(): + # Lists of packed shapes for different axes and unpacked shapes. + shapes = [ + ([(0,)], (0,)), + ([(2, 24, 0), (16, 3, 0), (16, 24, 0)], (16, 24, 0)), + ([(2, 0, 24), (16, 0, 24), (16, 0, 3)], (16, 0, 24)), + ([(0, 16, 24), (0, 2, 24), (0, 16, 3)], (0, 16, 24)), + ([(3, 0, 0), (24, 0, 0), (24, 0, 0)], (24, 0, 0)), + ([(0, 24, 0), (0, 3, 0), (0, 24, 0)], (0, 24, 0)), + ([(0, 0, 24), (0, 0, 24), (0, 0, 3)], (0, 0, 24)), + ([(0, 0, 0), (0, 0, 0), (0, 0, 0)], (0, 0, 0)), + ] + for in_shapes, out_shape in shapes: + for ax, in_shape in enumerate(in_shapes): + a = np.empty(in_shape, dtype=np.uint8) + b = np.unpackbits(a, axis=ax) + assert_equal(b.dtype, np.uint8) + assert_equal(b.shape, out_shape) + + +def test_unpackbits_large(): + # test all possible numbers via comparison to already tested packbits + d = np.arange(277, dtype=np.uint8) + assert_array_equal(np.packbits(np.unpackbits(d)), d) + assert_array_equal(np.packbits(np.unpackbits(d[::2])), d[::2]) + d = np.tile(d, (3, 1)) + assert_array_equal(np.packbits(np.unpackbits(d, axis=1), axis=1), d) + d = d.T.copy() + assert_array_equal(np.packbits(np.unpackbits(d, axis=0), axis=0), d) + + +class TestCount(): + x = np.array([ + [1, 0, 1, 0, 0, 1, 0], + [0, 1, 1, 1, 0, 0, 0], + [0, 0, 1, 0, 0, 1, 1], + [1, 1, 0, 0, 0, 1, 1], + [1, 0, 1, 0, 1, 0, 1], + [0, 0, 1, 1, 1, 0, 0], + [0, 1, 0, 1, 0, 1, 0], + ], dtype=np.uint8) + padded1 = np.zeros(57, dtype=np.uint8) + padded1[:49] = x.ravel() + padded1b = np.zeros(57, dtype=np.uint8) + padded1b[:49] = x[::-1].copy().ravel() + padded2 = np.zeros((9, 9), dtype=np.uint8) + padded2[:7, :7] = x + + @pytest.mark.parametrize('bitorder', ('little', 'big')) + @pytest.mark.parametrize('count', chain(range(58), range(-1, -57, -1))) + def test_roundtrip(self, bitorder, count): + if count < 0: + # one extra zero of padding + cutoff = count - 1 + else: + cutoff = count + # test complete invertibility of packbits and unpackbits with count + packed = np.packbits(self.x, bitorder=bitorder) + unpacked = np.unpackbits(packed, count=count, bitorder=bitorder) + assert_equal(unpacked.dtype, np.uint8) + assert_array_equal(unpacked, self.padded1[:cutoff]) + + @pytest.mark.parametrize('kwargs', [ + {}, {'count': None}, + ]) + def test_count(self, kwargs): + packed = np.packbits(self.x) + unpacked = np.unpackbits(packed, **kwargs) + assert_equal(unpacked.dtype, np.uint8) + assert_array_equal(unpacked, self.padded1[:-1]) + + @pytest.mark.parametrize('bitorder', ('little', 'big')) + # delta==-1 when count<0 because one extra zero of padding + @pytest.mark.parametrize('count', chain(range(8), range(-1, -9, -1))) + def test_roundtrip_axis(self, bitorder, count): + if count < 0: + # one extra zero of padding + cutoff = count - 1 + else: + cutoff = count + packed0 = np.packbits(self.x, axis=0, bitorder=bitorder) + unpacked0 = np.unpackbits(packed0, axis=0, count=count, + bitorder=bitorder) + assert_equal(unpacked0.dtype, np.uint8) + assert_array_equal(unpacked0, self.padded2[:cutoff, :self.x.shape[1]]) + + packed1 = np.packbits(self.x, axis=1, bitorder=bitorder) + unpacked1 = np.unpackbits(packed1, axis=1, count=count, + bitorder=bitorder) + assert_equal(unpacked1.dtype, np.uint8) + assert_array_equal(unpacked1, self.padded2[:self.x.shape[0], :cutoff]) + + @pytest.mark.parametrize('kwargs', [ + {}, {'count': None}, + {'bitorder' : 'little'}, + {'bitorder': 'little', 'count': None}, + {'bitorder' : 'big'}, + {'bitorder': 'big', 'count': None}, + ]) + def test_axis_count(self, kwargs): + packed0 = np.packbits(self.x, axis=0) + unpacked0 = np.unpackbits(packed0, axis=0, **kwargs) + assert_equal(unpacked0.dtype, np.uint8) + if kwargs.get('bitorder', 'big') == 'big': + assert_array_equal(unpacked0, self.padded2[:-1, :self.x.shape[1]]) + else: + assert_array_equal(unpacked0[::-1, :], self.padded2[:-1, :self.x.shape[1]]) + + packed1 = np.packbits(self.x, axis=1) + unpacked1 = np.unpackbits(packed1, axis=1, **kwargs) + assert_equal(unpacked1.dtype, np.uint8) + if kwargs.get('bitorder', 'big') == 'big': + assert_array_equal(unpacked1, self.padded2[:self.x.shape[0], :-1]) + else: + assert_array_equal(unpacked1[:, ::-1], self.padded2[:self.x.shape[0], :-1]) + + def test_bad_count(self): + packed0 = np.packbits(self.x, axis=0) + assert_raises(ValueError, np.unpackbits, packed0, axis=0, count=-9) + packed1 = np.packbits(self.x, axis=1) + assert_raises(ValueError, np.unpackbits, packed1, axis=1, count=-9) + packed = np.packbits(self.x) + assert_raises(ValueError, np.unpackbits, packed, count=-57) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_polynomial.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_polynomial.py new file mode 100644 index 0000000..3734344 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_polynomial.py @@ -0,0 +1,303 @@ +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_almost_equal, + assert_array_almost_equal, assert_raises, assert_allclose + ) + +import pytest + +# `poly1d` has some support for `bool_` and `timedelta64`, +# but it is limited and they are therefore excluded here +TYPE_CODES = np.typecodes["AllInteger"] + np.typecodes["AllFloat"] + "O" + + +class TestPolynomial: + def test_poly1d_str_and_repr(self): + p = np.poly1d([1., 2, 3]) + assert_equal(repr(p), 'poly1d([1., 2., 3.])') + assert_equal(str(p), + ' 2\n' + '1 x + 2 x + 3') + + q = np.poly1d([3., 2, 1]) + assert_equal(repr(q), 'poly1d([3., 2., 1.])') + assert_equal(str(q), + ' 2\n' + '3 x + 2 x + 1') + + r = np.poly1d([1.89999 + 2j, -3j, -5.12345678, 2 + 1j]) + assert_equal(str(r), + ' 3 2\n' + '(1.9 + 2j) x - 3j x - 5.123 x + (2 + 1j)') + + assert_equal(str(np.poly1d([-3, -2, -1])), + ' 2\n' + '-3 x - 2 x - 1') + + def test_poly1d_resolution(self): + p = np.poly1d([1., 2, 3]) + q = np.poly1d([3., 2, 1]) + assert_equal(p(0), 3.0) + assert_equal(p(5), 38.0) + assert_equal(q(0), 1.0) + assert_equal(q(5), 86.0) + + def test_poly1d_math(self): + # here we use some simple coeffs to make calculations easier + p = np.poly1d([1., 2, 4]) + q = np.poly1d([4., 2, 1]) + assert_equal(p/q, (np.poly1d([0.25]), np.poly1d([1.5, 3.75]))) + assert_equal(p.integ(), np.poly1d([1/3, 1., 4., 0.])) + assert_equal(p.integ(1), np.poly1d([1/3, 1., 4., 0.])) + + p = np.poly1d([1., 2, 3]) + q = np.poly1d([3., 2, 1]) + assert_equal(p * q, np.poly1d([3., 8., 14., 8., 3.])) + assert_equal(p + q, np.poly1d([4., 4., 4.])) + assert_equal(p - q, np.poly1d([-2., 0., 2.])) + assert_equal(p ** 4, np.poly1d([1., 8., 36., 104., 214., 312., 324., 216., 81.])) + assert_equal(p(q), np.poly1d([9., 12., 16., 8., 6.])) + assert_equal(q(p), np.poly1d([3., 12., 32., 40., 34.])) + assert_equal(p.deriv(), np.poly1d([2., 2.])) + assert_equal(p.deriv(2), np.poly1d([2.])) + assert_equal(np.polydiv(np.poly1d([1, 0, -1]), np.poly1d([1, 1])), + (np.poly1d([1., -1.]), np.poly1d([0.]))) + + @pytest.mark.parametrize("type_code", TYPE_CODES) + def test_poly1d_misc(self, type_code: str) -> None: + dtype = np.dtype(type_code) + ar = np.array([1, 2, 3], dtype=dtype) + p = np.poly1d(ar) + + # `__eq__` + assert_equal(np.asarray(p), ar) + assert_equal(np.asarray(p).dtype, dtype) + assert_equal(len(p), 2) + + # `__getitem__` + comparison_dct = {-1: 0, 0: 3, 1: 2, 2: 1, 3: 0} + for index, ref in comparison_dct.items(): + scalar = p[index] + assert_equal(scalar, ref) + if dtype == np.object_: + assert isinstance(scalar, int) + else: + assert_equal(scalar.dtype, dtype) + + def test_poly1d_variable_arg(self): + q = np.poly1d([1., 2, 3], variable='y') + assert_equal(str(q), + ' 2\n' + '1 y + 2 y + 3') + q = np.poly1d([1., 2, 3], variable='lambda') + assert_equal(str(q), + ' 2\n' + '1 lambda + 2 lambda + 3') + + def test_poly(self): + assert_array_almost_equal(np.poly([3, -np.sqrt(2), np.sqrt(2)]), + [1, -3, -2, 6]) + + # From matlab docs + A = [[1, 2, 3], [4, 5, 6], [7, 8, 0]] + assert_array_almost_equal(np.poly(A), [1, -6, -72, -27]) + + # Should produce real output for perfect conjugates + assert_(np.isrealobj(np.poly([+1.082j, +2.613j, -2.613j, -1.082j]))) + assert_(np.isrealobj(np.poly([0+1j, -0+-1j, 1+2j, + 1-2j, 1.+3.5j, 1-3.5j]))) + assert_(np.isrealobj(np.poly([1j, -1j, 1+2j, 1-2j, 1+3j, 1-3.j]))) + assert_(np.isrealobj(np.poly([1j, -1j, 1+2j, 1-2j]))) + assert_(np.isrealobj(np.poly([1j, -1j, 2j, -2j]))) + assert_(np.isrealobj(np.poly([1j, -1j]))) + assert_(np.isrealobj(np.poly([1, -1]))) + + assert_(np.iscomplexobj(np.poly([1j, -1.0000001j]))) + + np.random.seed(42) + a = np.random.randn(100) + 1j*np.random.randn(100) + assert_(np.isrealobj(np.poly(np.concatenate((a, np.conjugate(a)))))) + + def test_roots(self): + assert_array_equal(np.roots([1, 0, 0]), [0, 0]) + + def test_str_leading_zeros(self): + p = np.poly1d([4, 3, 2, 1]) + p[3] = 0 + assert_equal(str(p), + " 2\n" + "3 x + 2 x + 1") + + p = np.poly1d([1, 2]) + p[0] = 0 + p[1] = 0 + assert_equal(str(p), " \n0") + + def test_polyfit(self): + c = np.array([3., 2., 1.]) + x = np.linspace(0, 2, 7) + y = np.polyval(c, x) + err = [1, -1, 1, -1, 1, -1, 1] + weights = np.arange(8, 1, -1)**2/7.0 + + # Check exception when too few points for variance estimate. Note that + # the estimate requires the number of data points to exceed + # degree + 1 + assert_raises(ValueError, np.polyfit, + [1], [1], deg=0, cov=True) + + # check 1D case + m, cov = np.polyfit(x, y+err, 2, cov=True) + est = [3.8571, 0.2857, 1.619] + assert_almost_equal(est, m, decimal=4) + val0 = [[ 1.4694, -2.9388, 0.8163], + [-2.9388, 6.3673, -2.1224], + [ 0.8163, -2.1224, 1.161 ]] + assert_almost_equal(val0, cov, decimal=4) + + m2, cov2 = np.polyfit(x, y+err, 2, w=weights, cov=True) + assert_almost_equal([4.8927, -1.0177, 1.7768], m2, decimal=4) + val = [[ 4.3964, -5.0052, 0.4878], + [-5.0052, 6.8067, -0.9089], + [ 0.4878, -0.9089, 0.3337]] + assert_almost_equal(val, cov2, decimal=4) + + m3, cov3 = np.polyfit(x, y+err, 2, w=weights, cov="unscaled") + assert_almost_equal([4.8927, -1.0177, 1.7768], m3, decimal=4) + val = [[ 0.1473, -0.1677, 0.0163], + [-0.1677, 0.228 , -0.0304], + [ 0.0163, -0.0304, 0.0112]] + assert_almost_equal(val, cov3, decimal=4) + + # check 2D (n,1) case + y = y[:, np.newaxis] + c = c[:, np.newaxis] + assert_almost_equal(c, np.polyfit(x, y, 2)) + # check 2D (n,2) case + yy = np.concatenate((y, y), axis=1) + cc = np.concatenate((c, c), axis=1) + assert_almost_equal(cc, np.polyfit(x, yy, 2)) + + m, cov = np.polyfit(x, yy + np.array(err)[:, np.newaxis], 2, cov=True) + assert_almost_equal(est, m[:, 0], decimal=4) + assert_almost_equal(est, m[:, 1], decimal=4) + assert_almost_equal(val0, cov[:, :, 0], decimal=4) + assert_almost_equal(val0, cov[:, :, 1], decimal=4) + + # check order 1 (deg=0) case, were the analytic results are simple + np.random.seed(123) + y = np.random.normal(size=(4, 10000)) + mean, cov = np.polyfit(np.zeros(y.shape[0]), y, deg=0, cov=True) + # Should get sigma_mean = sigma/sqrt(N) = 1./sqrt(4) = 0.5. + assert_allclose(mean.std(), 0.5, atol=0.01) + assert_allclose(np.sqrt(cov.mean()), 0.5, atol=0.01) + # Without scaling, since reduced chi2 is 1, the result should be the same. + mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=np.ones(y.shape[0]), + deg=0, cov="unscaled") + assert_allclose(mean.std(), 0.5, atol=0.01) + assert_almost_equal(np.sqrt(cov.mean()), 0.5) + # If we estimate our errors wrong, no change with scaling: + w = np.full(y.shape[0], 1./0.5) + mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=w, deg=0, cov=True) + assert_allclose(mean.std(), 0.5, atol=0.01) + assert_allclose(np.sqrt(cov.mean()), 0.5, atol=0.01) + # But if we do not scale, our estimate for the error in the mean will + # differ. + mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=w, deg=0, cov="unscaled") + assert_allclose(mean.std(), 0.5, atol=0.01) + assert_almost_equal(np.sqrt(cov.mean()), 0.25) + + def test_objects(self): + from decimal import Decimal + p = np.poly1d([Decimal('4.0'), Decimal('3.0'), Decimal('2.0')]) + p2 = p * Decimal('1.333333333333333') + assert_(p2[1] == Decimal("3.9999999999999990")) + p2 = p.deriv() + assert_(p2[1] == Decimal('8.0')) + p2 = p.integ() + assert_(p2[3] == Decimal("1.333333333333333333333333333")) + assert_(p2[2] == Decimal('1.5')) + assert_(np.issubdtype(p2.coeffs.dtype, np.object_)) + p = np.poly([Decimal(1), Decimal(2)]) + assert_equal(np.poly([Decimal(1), Decimal(2)]), + [1, Decimal(-3), Decimal(2)]) + + def test_complex(self): + p = np.poly1d([3j, 2j, 1j]) + p2 = p.integ() + assert_((p2.coeffs == [1j, 1j, 1j, 0]).all()) + p2 = p.deriv() + assert_((p2.coeffs == [6j, 2j]).all()) + + def test_integ_coeffs(self): + p = np.poly1d([3, 2, 1]) + p2 = p.integ(3, k=[9, 7, 6]) + assert_( + (p2.coeffs == [1/4./5., 1/3./4., 1/2./3., 9/1./2., 7, 6]).all()) + + def test_zero_dims(self): + try: + np.poly(np.zeros((0, 0))) + except ValueError: + pass + + def test_poly_int_overflow(self): + """ + Regression test for gh-5096. + """ + v = np.arange(1, 21) + assert_almost_equal(np.poly(v), np.poly(np.diag(v))) + + def test_zero_poly_dtype(self): + """ + Regression test for gh-16354. + """ + z = np.array([0, 0, 0]) + p = np.poly1d(z.astype(np.int64)) + assert_equal(p.coeffs.dtype, np.int64) + + p = np.poly1d(z.astype(np.float32)) + assert_equal(p.coeffs.dtype, np.float32) + + p = np.poly1d(z.astype(np.complex64)) + assert_equal(p.coeffs.dtype, np.complex64) + + def test_poly_eq(self): + p = np.poly1d([1, 2, 3]) + p2 = np.poly1d([1, 2, 4]) + assert_equal(p == None, False) + assert_equal(p != None, True) + assert_equal(p == p, True) + assert_equal(p == p2, False) + assert_equal(p != p2, True) + + def test_polydiv(self): + b = np.poly1d([2, 6, 6, 1]) + a = np.poly1d([-1j, (1+2j), -(2+1j), 1]) + q, r = np.polydiv(b, a) + assert_equal(q.coeffs.dtype, np.complex128) + assert_equal(r.coeffs.dtype, np.complex128) + assert_equal(q*a + r, b) + + c = [1, 2, 3] + d = np.poly1d([1, 2, 3]) + s, t = np.polydiv(c, d) + assert isinstance(s, np.poly1d) + assert isinstance(t, np.poly1d) + u, v = np.polydiv(d, c) + assert isinstance(u, np.poly1d) + assert isinstance(v, np.poly1d) + + def test_poly_coeffs_mutable(self): + """ Coefficients should be modifiable """ + p = np.poly1d([1, 2, 3]) + + p.coeffs += 1 + assert_equal(p.coeffs, [2, 3, 4]) + + p.coeffs[2] += 10 + assert_equal(p.coeffs, [2, 3, 14]) + + # this never used to be allowed - let's not add features to deprecated + # APIs + assert_raises(AttributeError, setattr, p, 'coeffs', np.array(1)) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_recfunctions.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_recfunctions.py new file mode 100644 index 0000000..2f3c14d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_recfunctions.py @@ -0,0 +1,979 @@ +import pytest + +import numpy as np +import numpy.ma as ma +from numpy.ma.mrecords import MaskedRecords +from numpy.ma.testutils import assert_equal +from numpy.testing import assert_, assert_raises +from numpy.lib.recfunctions import ( + drop_fields, rename_fields, get_fieldstructure, recursive_fill_fields, + find_duplicates, merge_arrays, append_fields, stack_arrays, join_by, + repack_fields, unstructured_to_structured, structured_to_unstructured, + apply_along_fields, require_fields, assign_fields_by_name) +get_fieldspec = np.lib.recfunctions._get_fieldspec +get_names = np.lib.recfunctions.get_names +get_names_flat = np.lib.recfunctions.get_names_flat +zip_descr = np.lib.recfunctions._zip_descr +zip_dtype = np.lib.recfunctions._zip_dtype + + +class TestRecFunctions: + # Misc tests + + def setup(self): + x = np.array([1, 2, ]) + y = np.array([10, 20, 30]) + z = np.array([('A', 1.), ('B', 2.)], + dtype=[('A', '|S3'), ('B', float)]) + w = np.array([(1, (2, 3.0)), (4, (5, 6.0))], + dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + self.data = (w, x, y, z) + + def test_zip_descr(self): + # Test zip_descr + (w, x, y, z) = self.data + + # Std array + test = zip_descr((x, x), flatten=True) + assert_equal(test, + np.dtype([('', int), ('', int)])) + test = zip_descr((x, x), flatten=False) + assert_equal(test, + np.dtype([('', int), ('', int)])) + + # Std & flexible-dtype + test = zip_descr((x, z), flatten=True) + assert_equal(test, + np.dtype([('', int), ('A', '|S3'), ('B', float)])) + test = zip_descr((x, z), flatten=False) + assert_equal(test, + np.dtype([('', int), + ('', [('A', '|S3'), ('B', float)])])) + + # Standard & nested dtype + test = zip_descr((x, w), flatten=True) + assert_equal(test, + np.dtype([('', int), + ('a', int), + ('ba', float), ('bb', int)])) + test = zip_descr((x, w), flatten=False) + assert_equal(test, + np.dtype([('', int), + ('', [('a', int), + ('b', [('ba', float), ('bb', int)])])])) + + def test_drop_fields(self): + # Test drop_fields + a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], + dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + + # A basic field + test = drop_fields(a, 'a') + control = np.array([((2, 3.0),), ((5, 6.0),)], + dtype=[('b', [('ba', float), ('bb', int)])]) + assert_equal(test, control) + + # Another basic field (but nesting two fields) + test = drop_fields(a, 'b') + control = np.array([(1,), (4,)], dtype=[('a', int)]) + assert_equal(test, control) + + # A nested sub-field + test = drop_fields(a, ['ba', ]) + control = np.array([(1, (3.0,)), (4, (6.0,))], + dtype=[('a', int), ('b', [('bb', int)])]) + assert_equal(test, control) + + # All the nested sub-field from a field: zap that field + test = drop_fields(a, ['ba', 'bb']) + control = np.array([(1,), (4,)], dtype=[('a', int)]) + assert_equal(test, control) + + # dropping all fields results in an array with no fields + test = drop_fields(a, ['a', 'b']) + control = np.array([(), ()], dtype=[]) + assert_equal(test, control) + + def test_rename_fields(self): + # Test rename fields + a = np.array([(1, (2, [3.0, 30.])), (4, (5, [6.0, 60.]))], + dtype=[('a', int), + ('b', [('ba', float), ('bb', (float, 2))])]) + test = rename_fields(a, {'a': 'A', 'bb': 'BB'}) + newdtype = [('A', int), ('b', [('ba', float), ('BB', (float, 2))])] + control = a.view(newdtype) + assert_equal(test.dtype, newdtype) + assert_equal(test, control) + + def test_get_names(self): + # Test get_names + ndtype = np.dtype([('A', '|S3'), ('B', float)]) + test = get_names(ndtype) + assert_equal(test, ('A', 'B')) + + ndtype = np.dtype([('a', int), ('b', [('ba', float), ('bb', int)])]) + test = get_names(ndtype) + assert_equal(test, ('a', ('b', ('ba', 'bb')))) + + ndtype = np.dtype([('a', int), ('b', [])]) + test = get_names(ndtype) + assert_equal(test, ('a', ('b', ()))) + + ndtype = np.dtype([]) + test = get_names(ndtype) + assert_equal(test, ()) + + def test_get_names_flat(self): + # Test get_names_flat + ndtype = np.dtype([('A', '|S3'), ('B', float)]) + test = get_names_flat(ndtype) + assert_equal(test, ('A', 'B')) + + ndtype = np.dtype([('a', int), ('b', [('ba', float), ('bb', int)])]) + test = get_names_flat(ndtype) + assert_equal(test, ('a', 'b', 'ba', 'bb')) + + ndtype = np.dtype([('a', int), ('b', [])]) + test = get_names_flat(ndtype) + assert_equal(test, ('a', 'b')) + + ndtype = np.dtype([]) + test = get_names_flat(ndtype) + assert_equal(test, ()) + + def test_get_fieldstructure(self): + # Test get_fieldstructure + + # No nested fields + ndtype = np.dtype([('A', '|S3'), ('B', float)]) + test = get_fieldstructure(ndtype) + assert_equal(test, {'A': [], 'B': []}) + + # One 1-nested field + ndtype = np.dtype([('A', int), ('B', [('BA', float), ('BB', '|S1')])]) + test = get_fieldstructure(ndtype) + assert_equal(test, {'A': [], 'B': [], 'BA': ['B', ], 'BB': ['B']}) + + # One 2-nested fields + ndtype = np.dtype([('A', int), + ('B', [('BA', int), + ('BB', [('BBA', int), ('BBB', int)])])]) + test = get_fieldstructure(ndtype) + control = {'A': [], 'B': [], 'BA': ['B'], 'BB': ['B'], + 'BBA': ['B', 'BB'], 'BBB': ['B', 'BB']} + assert_equal(test, control) + + # 0 fields + ndtype = np.dtype([]) + test = get_fieldstructure(ndtype) + assert_equal(test, {}) + + def test_find_duplicates(self): + # Test find_duplicates + a = ma.array([(2, (2., 'B')), (1, (2., 'B')), (2, (2., 'B')), + (1, (1., 'B')), (2, (2., 'B')), (2, (2., 'C'))], + mask=[(0, (0, 0)), (0, (0, 0)), (0, (0, 0)), + (0, (0, 0)), (1, (0, 0)), (0, (1, 0))], + dtype=[('A', int), ('B', [('BA', float), ('BB', '|S1')])]) + + test = find_duplicates(a, ignoremask=False, return_index=True) + control = [0, 2] + assert_equal(sorted(test[-1]), control) + assert_equal(test[0], a[test[-1]]) + + test = find_duplicates(a, key='A', return_index=True) + control = [0, 1, 2, 3, 5] + assert_equal(sorted(test[-1]), control) + assert_equal(test[0], a[test[-1]]) + + test = find_duplicates(a, key='B', return_index=True) + control = [0, 1, 2, 4] + assert_equal(sorted(test[-1]), control) + assert_equal(test[0], a[test[-1]]) + + test = find_duplicates(a, key='BA', return_index=True) + control = [0, 1, 2, 4] + assert_equal(sorted(test[-1]), control) + assert_equal(test[0], a[test[-1]]) + + test = find_duplicates(a, key='BB', return_index=True) + control = [0, 1, 2, 3, 4] + assert_equal(sorted(test[-1]), control) + assert_equal(test[0], a[test[-1]]) + + def test_find_duplicates_ignoremask(self): + # Test the ignoremask option of find_duplicates + ndtype = [('a', int)] + a = ma.array([1, 1, 1, 2, 2, 3, 3], + mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype) + test = find_duplicates(a, ignoremask=True, return_index=True) + control = [0, 1, 3, 4] + assert_equal(sorted(test[-1]), control) + assert_equal(test[0], a[test[-1]]) + + test = find_duplicates(a, ignoremask=False, return_index=True) + control = [0, 1, 2, 3, 4, 6] + assert_equal(sorted(test[-1]), control) + assert_equal(test[0], a[test[-1]]) + + def test_repack_fields(self): + dt = np.dtype('u1,f4,i8', align=True) + a = np.zeros(2, dtype=dt) + + assert_equal(repack_fields(dt), np.dtype('u1,f4,i8')) + assert_equal(repack_fields(a).itemsize, 13) + assert_equal(repack_fields(repack_fields(dt), align=True), dt) + + # make sure type is preserved + dt = np.dtype((np.record, dt)) + assert_(repack_fields(dt).type is np.record) + + def test_structured_to_unstructured(self): + a = np.zeros(4, dtype=[('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)]) + out = structured_to_unstructured(a) + assert_equal(out, np.zeros((4,5), dtype='f8')) + + b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], + dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) + out = np.mean(structured_to_unstructured(b[['x', 'z']]), axis=-1) + assert_equal(out, np.array([ 3. , 5.5, 9. , 11. ])) + out = np.mean(structured_to_unstructured(b[['x']]), axis=-1) + assert_equal(out, np.array([ 1. , 4. , 7. , 10. ])) + + c = np.arange(20).reshape((4,5)) + out = unstructured_to_structured(c, a.dtype) + want = np.array([( 0, ( 1., 2), [ 3., 4.]), + ( 5, ( 6., 7), [ 8., 9.]), + (10, (11., 12), [13., 14.]), + (15, (16., 17), [18., 19.])], + dtype=[('a', 'i4'), + ('b', [('f0', 'f4'), ('f1', 'u2')]), + ('c', 'f4', (2,))]) + assert_equal(out, want) + + d = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], + dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) + assert_equal(apply_along_fields(np.mean, d), + np.array([ 8.0/3, 16.0/3, 26.0/3, 11. ])) + assert_equal(apply_along_fields(np.mean, d[['x', 'z']]), + np.array([ 3. , 5.5, 9. , 11. ])) + + # check that for uniform field dtypes we get a view, not a copy: + d = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], + dtype=[('x', 'i4'), ('y', 'i4'), ('z', 'i4')]) + dd = structured_to_unstructured(d) + ddd = unstructured_to_structured(dd, d.dtype) + assert_(dd.base is d) + assert_(ddd.base is d) + + # including uniform fields with subarrays unpacked + d = np.array([(1, [2, 3], [[ 4, 5], [ 6, 7]]), + (8, [9, 10], [[11, 12], [13, 14]])], + dtype=[('x0', 'i4'), ('x1', ('i4', 2)), + ('x2', ('i4', (2, 2)))]) + dd = structured_to_unstructured(d) + ddd = unstructured_to_structured(dd, d.dtype) + assert_(dd.base is d) + assert_(ddd.base is d) + + # test that nested fields with identical names don't break anything + point = np.dtype([('x', int), ('y', int)]) + triangle = np.dtype([('a', point), ('b', point), ('c', point)]) + arr = np.zeros(10, triangle) + res = structured_to_unstructured(arr, dtype=int) + assert_equal(res, np.zeros((10, 6), dtype=int)) + + + # test nested combinations of subarrays and structured arrays, gh-13333 + def subarray(dt, shape): + return np.dtype((dt, shape)) + + def structured(*dts): + return np.dtype([('x{}'.format(i), dt) for i, dt in enumerate(dts)]) + + def inspect(dt, dtype=None): + arr = np.zeros((), dt) + ret = structured_to_unstructured(arr, dtype=dtype) + backarr = unstructured_to_structured(ret, dt) + return ret.shape, ret.dtype, backarr.dtype + + dt = structured(subarray(structured(np.int32, np.int32), 3)) + assert_equal(inspect(dt), ((6,), np.int32, dt)) + + dt = structured(subarray(subarray(np.int32, 2), 2)) + assert_equal(inspect(dt), ((4,), np.int32, dt)) + + dt = structured(np.int32) + assert_equal(inspect(dt), ((1,), np.int32, dt)) + + dt = structured(np.int32, subarray(subarray(np.int32, 2), 2)) + assert_equal(inspect(dt), ((5,), np.int32, dt)) + + dt = structured() + assert_raises(ValueError, structured_to_unstructured, np.zeros(3, dt)) + + # these currently don't work, but we may make it work in the future + assert_raises(NotImplementedError, structured_to_unstructured, + np.zeros(3, dt), dtype=np.int32) + assert_raises(NotImplementedError, unstructured_to_structured, + np.zeros((3,0), dtype=np.int32)) + + def test_field_assignment_by_name(self): + a = np.ones(2, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')]) + newdt = [('b', 'f4'), ('c', 'u1')] + + assert_equal(require_fields(a, newdt), np.ones(2, newdt)) + + b = np.array([(1,2), (3,4)], dtype=newdt) + assign_fields_by_name(a, b, zero_unassigned=False) + assert_equal(a, np.array([(1,1,2),(1,3,4)], dtype=a.dtype)) + assign_fields_by_name(a, b) + assert_equal(a, np.array([(0,1,2),(0,3,4)], dtype=a.dtype)) + + # test nested fields + a = np.ones(2, dtype=[('a', [('b', 'f8'), ('c', 'u1')])]) + newdt = [('a', [('c', 'u1')])] + assert_equal(require_fields(a, newdt), np.ones(2, newdt)) + b = np.array([((2,),), ((3,),)], dtype=newdt) + assign_fields_by_name(a, b, zero_unassigned=False) + assert_equal(a, np.array([((1,2),), ((1,3),)], dtype=a.dtype)) + assign_fields_by_name(a, b) + assert_equal(a, np.array([((0,2),), ((0,3),)], dtype=a.dtype)) + + # test unstructured code path for 0d arrays + a, b = np.array(3), np.array(0) + assign_fields_by_name(b, a) + assert_equal(b[()], 3) + + +class TestRecursiveFillFields: + # Test recursive_fill_fields. + def test_simple_flexible(self): + # Test recursive_fill_fields on flexible-array + a = np.array([(1, 10.), (2, 20.)], dtype=[('A', int), ('B', float)]) + b = np.zeros((3,), dtype=a.dtype) + test = recursive_fill_fields(a, b) + control = np.array([(1, 10.), (2, 20.), (0, 0.)], + dtype=[('A', int), ('B', float)]) + assert_equal(test, control) + + def test_masked_flexible(self): + # Test recursive_fill_fields on masked flexible-array + a = ma.array([(1, 10.), (2, 20.)], mask=[(0, 1), (1, 0)], + dtype=[('A', int), ('B', float)]) + b = ma.zeros((3,), dtype=a.dtype) + test = recursive_fill_fields(a, b) + control = ma.array([(1, 10.), (2, 20.), (0, 0.)], + mask=[(0, 1), (1, 0), (0, 0)], + dtype=[('A', int), ('B', float)]) + assert_equal(test, control) + + +class TestMergeArrays: + # Test merge_arrays + + def setup(self): + x = np.array([1, 2, ]) + y = np.array([10, 20, 30]) + z = np.array( + [('A', 1.), ('B', 2.)], dtype=[('A', '|S3'), ('B', float)]) + w = np.array( + [(1, (2, 3.0, ())), (4, (5, 6.0, ()))], + dtype=[('a', int), ('b', [('ba', float), ('bb', int), ('bc', [])])]) + self.data = (w, x, y, z) + + def test_solo(self): + # Test merge_arrays on a single array. + (_, x, _, z) = self.data + + test = merge_arrays(x) + control = np.array([(1,), (2,)], dtype=[('f0', int)]) + assert_equal(test, control) + test = merge_arrays((x,)) + assert_equal(test, control) + + test = merge_arrays(z, flatten=False) + assert_equal(test, z) + test = merge_arrays(z, flatten=True) + assert_equal(test, z) + + def test_solo_w_flatten(self): + # Test merge_arrays on a single array w & w/o flattening + w = self.data[0] + test = merge_arrays(w, flatten=False) + assert_equal(test, w) + + test = merge_arrays(w, flatten=True) + control = np.array([(1, 2, 3.0), (4, 5, 6.0)], + dtype=[('a', int), ('ba', float), ('bb', int)]) + assert_equal(test, control) + + def test_standard(self): + # Test standard & standard + # Test merge arrays + (_, x, y, _) = self.data + test = merge_arrays((x, y), usemask=False) + control = np.array([(1, 10), (2, 20), (-1, 30)], + dtype=[('f0', int), ('f1', int)]) + assert_equal(test, control) + + test = merge_arrays((x, y), usemask=True) + control = ma.array([(1, 10), (2, 20), (-1, 30)], + mask=[(0, 0), (0, 0), (1, 0)], + dtype=[('f0', int), ('f1', int)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + def test_flatten(self): + # Test standard & flexible + (_, x, _, z) = self.data + test = merge_arrays((x, z), flatten=True) + control = np.array([(1, 'A', 1.), (2, 'B', 2.)], + dtype=[('f0', int), ('A', '|S3'), ('B', float)]) + assert_equal(test, control) + + test = merge_arrays((x, z), flatten=False) + control = np.array([(1, ('A', 1.)), (2, ('B', 2.))], + dtype=[('f0', int), + ('f1', [('A', '|S3'), ('B', float)])]) + assert_equal(test, control) + + def test_flatten_wflexible(self): + # Test flatten standard & nested + (w, x, _, _) = self.data + test = merge_arrays((x, w), flatten=True) + control = np.array([(1, 1, 2, 3.0), (2, 4, 5, 6.0)], + dtype=[('f0', int), + ('a', int), ('ba', float), ('bb', int)]) + assert_equal(test, control) + + test = merge_arrays((x, w), flatten=False) + controldtype = [('f0', int), + ('f1', [('a', int), + ('b', [('ba', float), ('bb', int), ('bc', [])])])] + control = np.array([(1., (1, (2, 3.0, ()))), (2, (4, (5, 6.0, ())))], + dtype=controldtype) + assert_equal(test, control) + + def test_wmasked_arrays(self): + # Test merge_arrays masked arrays + (_, x, _, _) = self.data + mx = ma.array([1, 2, 3], mask=[1, 0, 0]) + test = merge_arrays((x, mx), usemask=True) + control = ma.array([(1, 1), (2, 2), (-1, 3)], + mask=[(0, 1), (0, 0), (1, 0)], + dtype=[('f0', int), ('f1', int)]) + assert_equal(test, control) + test = merge_arrays((x, mx), usemask=True, asrecarray=True) + assert_equal(test, control) + assert_(isinstance(test, MaskedRecords)) + + def test_w_singlefield(self): + # Test single field + test = merge_arrays((np.array([1, 2]).view([('a', int)]), + np.array([10., 20., 30.])),) + control = ma.array([(1, 10.), (2, 20.), (-1, 30.)], + mask=[(0, 0), (0, 0), (1, 0)], + dtype=[('a', int), ('f1', float)]) + assert_equal(test, control) + + def test_w_shorter_flex(self): + # Test merge_arrays w/ a shorter flexndarray. + z = self.data[-1] + + # Fixme, this test looks incomplete and broken + #test = merge_arrays((z, np.array([10, 20, 30]).view([('C', int)]))) + #control = np.array([('A', 1., 10), ('B', 2., 20), ('-1', -1, 20)], + # dtype=[('A', '|S3'), ('B', float), ('C', int)]) + #assert_equal(test, control) + + # Hack to avoid pyflakes warnings about unused variables + merge_arrays((z, np.array([10, 20, 30]).view([('C', int)]))) + np.array([('A', 1., 10), ('B', 2., 20), ('-1', -1, 20)], + dtype=[('A', '|S3'), ('B', float), ('C', int)]) + + def test_singlerecord(self): + (_, x, y, z) = self.data + test = merge_arrays((x[0], y[0], z[0]), usemask=False) + control = np.array([(1, 10, ('A', 1))], + dtype=[('f0', int), + ('f1', int), + ('f2', [('A', '|S3'), ('B', float)])]) + assert_equal(test, control) + + +class TestAppendFields: + # Test append_fields + + def setup(self): + x = np.array([1, 2, ]) + y = np.array([10, 20, 30]) + z = np.array( + [('A', 1.), ('B', 2.)], dtype=[('A', '|S3'), ('B', float)]) + w = np.array([(1, (2, 3.0)), (4, (5, 6.0))], + dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + self.data = (w, x, y, z) + + def test_append_single(self): + # Test simple case + (_, x, _, _) = self.data + test = append_fields(x, 'A', data=[10, 20, 30]) + control = ma.array([(1, 10), (2, 20), (-1, 30)], + mask=[(0, 0), (0, 0), (1, 0)], + dtype=[('f0', int), ('A', int)],) + assert_equal(test, control) + + def test_append_double(self): + # Test simple case + (_, x, _, _) = self.data + test = append_fields(x, ('A', 'B'), data=[[10, 20, 30], [100, 200]]) + control = ma.array([(1, 10, 100), (2, 20, 200), (-1, 30, -1)], + mask=[(0, 0, 0), (0, 0, 0), (1, 0, 1)], + dtype=[('f0', int), ('A', int), ('B', int)],) + assert_equal(test, control) + + def test_append_on_flex(self): + # Test append_fields on flexible type arrays + z = self.data[-1] + test = append_fields(z, 'C', data=[10, 20, 30]) + control = ma.array([('A', 1., 10), ('B', 2., 20), (-1, -1., 30)], + mask=[(0, 0, 0), (0, 0, 0), (1, 1, 0)], + dtype=[('A', '|S3'), ('B', float), ('C', int)],) + assert_equal(test, control) + + def test_append_on_nested(self): + # Test append_fields on nested fields + w = self.data[0] + test = append_fields(w, 'C', data=[10, 20, 30]) + control = ma.array([(1, (2, 3.0), 10), + (4, (5, 6.0), 20), + (-1, (-1, -1.), 30)], + mask=[( + 0, (0, 0), 0), (0, (0, 0), 0), (1, (1, 1), 0)], + dtype=[('a', int), + ('b', [('ba', float), ('bb', int)]), + ('C', int)],) + assert_equal(test, control) + + +class TestStackArrays: + # Test stack_arrays + def setup(self): + x = np.array([1, 2, ]) + y = np.array([10, 20, 30]) + z = np.array( + [('A', 1.), ('B', 2.)], dtype=[('A', '|S3'), ('B', float)]) + w = np.array([(1, (2, 3.0)), (4, (5, 6.0))], + dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + self.data = (w, x, y, z) + + def test_solo(self): + # Test stack_arrays on single arrays + (_, x, _, _) = self.data + test = stack_arrays((x,)) + assert_equal(test, x) + assert_(test is x) + + test = stack_arrays(x) + assert_equal(test, x) + assert_(test is x) + + def test_unnamed_fields(self): + # Tests combinations of arrays w/o named fields + (_, x, y, _) = self.data + + test = stack_arrays((x, x), usemask=False) + control = np.array([1, 2, 1, 2]) + assert_equal(test, control) + + test = stack_arrays((x, y), usemask=False) + control = np.array([1, 2, 10, 20, 30]) + assert_equal(test, control) + + test = stack_arrays((y, x), usemask=False) + control = np.array([10, 20, 30, 1, 2]) + assert_equal(test, control) + + def test_unnamed_and_named_fields(self): + # Test combination of arrays w/ & w/o named fields + (_, x, _, z) = self.data + + test = stack_arrays((x, z)) + control = ma.array([(1, -1, -1), (2, -1, -1), + (-1, 'A', 1), (-1, 'B', 2)], + mask=[(0, 1, 1), (0, 1, 1), + (1, 0, 0), (1, 0, 0)], + dtype=[('f0', int), ('A', '|S3'), ('B', float)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + test = stack_arrays((z, x)) + control = ma.array([('A', 1, -1), ('B', 2, -1), + (-1, -1, 1), (-1, -1, 2), ], + mask=[(0, 0, 1), (0, 0, 1), + (1, 1, 0), (1, 1, 0)], + dtype=[('A', '|S3'), ('B', float), ('f2', int)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + test = stack_arrays((z, z, x)) + control = ma.array([('A', 1, -1), ('B', 2, -1), + ('A', 1, -1), ('B', 2, -1), + (-1, -1, 1), (-1, -1, 2), ], + mask=[(0, 0, 1), (0, 0, 1), + (0, 0, 1), (0, 0, 1), + (1, 1, 0), (1, 1, 0)], + dtype=[('A', '|S3'), ('B', float), ('f2', int)]) + assert_equal(test, control) + + def test_matching_named_fields(self): + # Test combination of arrays w/ matching field names + (_, x, _, z) = self.data + zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], + dtype=[('A', '|S3'), ('B', float), ('C', float)]) + test = stack_arrays((z, zz)) + control = ma.array([('A', 1, -1), ('B', 2, -1), + ( + 'a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], + dtype=[('A', '|S3'), ('B', float), ('C', float)], + mask=[(0, 0, 1), (0, 0, 1), + (0, 0, 0), (0, 0, 0), (0, 0, 0)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + test = stack_arrays((z, zz, x)) + ndtype = [('A', '|S3'), ('B', float), ('C', float), ('f3', int)] + control = ma.array([('A', 1, -1, -1), ('B', 2, -1, -1), + ('a', 10., 100., -1), ('b', 20., 200., -1), + ('c', 30., 300., -1), + (-1, -1, -1, 1), (-1, -1, -1, 2)], + dtype=ndtype, + mask=[(0, 0, 1, 1), (0, 0, 1, 1), + (0, 0, 0, 1), (0, 0, 0, 1), (0, 0, 0, 1), + (1, 1, 1, 0), (1, 1, 1, 0)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + def test_defaults(self): + # Test defaults: no exception raised if keys of defaults are not fields. + (_, _, _, z) = self.data + zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], + dtype=[('A', '|S3'), ('B', float), ('C', float)]) + defaults = {'A': '???', 'B': -999., 'C': -9999., 'D': -99999.} + test = stack_arrays((z, zz), defaults=defaults) + control = ma.array([('A', 1, -9999.), ('B', 2, -9999.), + ( + 'a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], + dtype=[('A', '|S3'), ('B', float), ('C', float)], + mask=[(0, 0, 1), (0, 0, 1), + (0, 0, 0), (0, 0, 0), (0, 0, 0)]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + def test_autoconversion(self): + # Tests autoconversion + adtype = [('A', int), ('B', bool), ('C', float)] + a = ma.array([(1, 2, 3)], mask=[(0, 1, 0)], dtype=adtype) + bdtype = [('A', int), ('B', float), ('C', float)] + b = ma.array([(4, 5, 6)], dtype=bdtype) + control = ma.array([(1, 2, 3), (4, 5, 6)], mask=[(0, 1, 0), (0, 0, 0)], + dtype=bdtype) + test = stack_arrays((a, b), autoconvert=True) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + with assert_raises(TypeError): + stack_arrays((a, b), autoconvert=False) + + def test_checktitles(self): + # Test using titles in the field names + adtype = [(('a', 'A'), int), (('b', 'B'), bool), (('c', 'C'), float)] + a = ma.array([(1, 2, 3)], mask=[(0, 1, 0)], dtype=adtype) + bdtype = [(('a', 'A'), int), (('b', 'B'), bool), (('c', 'C'), float)] + b = ma.array([(4, 5, 6)], dtype=bdtype) + test = stack_arrays((a, b)) + control = ma.array([(1, 2, 3), (4, 5, 6)], mask=[(0, 1, 0), (0, 0, 0)], + dtype=bdtype) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + def test_subdtype(self): + z = np.array([ + ('A', 1), ('B', 2) + ], dtype=[('A', '|S3'), ('B', float, (1,))]) + zz = np.array([ + ('a', [10.], 100.), ('b', [20.], 200.), ('c', [30.], 300.) + ], dtype=[('A', '|S3'), ('B', float, (1,)), ('C', float)]) + + res = stack_arrays((z, zz)) + expected = ma.array( + data=[ + (b'A', [1.0], 0), + (b'B', [2.0], 0), + (b'a', [10.0], 100.0), + (b'b', [20.0], 200.0), + (b'c', [30.0], 300.0)], + mask=[ + (False, [False], True), + (False, [False], True), + (False, [False], False), + (False, [False], False), + (False, [False], False) + ], + dtype=zz.dtype + ) + assert_equal(res.dtype, expected.dtype) + assert_equal(res, expected) + assert_equal(res.mask, expected.mask) + + +class TestJoinBy: + def setup(self): + self.a = np.array(list(zip(np.arange(10), np.arange(50, 60), + np.arange(100, 110))), + dtype=[('a', int), ('b', int), ('c', int)]) + self.b = np.array(list(zip(np.arange(5, 15), np.arange(65, 75), + np.arange(100, 110))), + dtype=[('a', int), ('b', int), ('d', int)]) + + def test_inner_join(self): + # Basic test of join_by + a, b = self.a, self.b + + test = join_by('a', a, b, jointype='inner') + control = np.array([(5, 55, 65, 105, 100), (6, 56, 66, 106, 101), + (7, 57, 67, 107, 102), (8, 58, 68, 108, 103), + (9, 59, 69, 109, 104)], + dtype=[('a', int), ('b1', int), ('b2', int), + ('c', int), ('d', int)]) + assert_equal(test, control) + + def test_join(self): + a, b = self.a, self.b + + # Fixme, this test is broken + #test = join_by(('a', 'b'), a, b) + #control = np.array([(5, 55, 105, 100), (6, 56, 106, 101), + # (7, 57, 107, 102), (8, 58, 108, 103), + # (9, 59, 109, 104)], + # dtype=[('a', int), ('b', int), + # ('c', int), ('d', int)]) + #assert_equal(test, control) + + # Hack to avoid pyflakes unused variable warnings + join_by(('a', 'b'), a, b) + np.array([(5, 55, 105, 100), (6, 56, 106, 101), + (7, 57, 107, 102), (8, 58, 108, 103), + (9, 59, 109, 104)], + dtype=[('a', int), ('b', int), + ('c', int), ('d', int)]) + + def test_join_subdtype(self): + # tests the bug in https://stackoverflow.com/q/44769632/102441 + foo = np.array([(1,)], + dtype=[('key', int)]) + bar = np.array([(1, np.array([1,2,3]))], + dtype=[('key', int), ('value', 'uint16', 3)]) + res = join_by('key', foo, bar) + assert_equal(res, bar.view(ma.MaskedArray)) + + def test_outer_join(self): + a, b = self.a, self.b + + test = join_by(('a', 'b'), a, b, 'outer') + control = ma.array([(0, 50, 100, -1), (1, 51, 101, -1), + (2, 52, 102, -1), (3, 53, 103, -1), + (4, 54, 104, -1), (5, 55, 105, -1), + (5, 65, -1, 100), (6, 56, 106, -1), + (6, 66, -1, 101), (7, 57, 107, -1), + (7, 67, -1, 102), (8, 58, 108, -1), + (8, 68, -1, 103), (9, 59, 109, -1), + (9, 69, -1, 104), (10, 70, -1, 105), + (11, 71, -1, 106), (12, 72, -1, 107), + (13, 73, -1, 108), (14, 74, -1, 109)], + mask=[(0, 0, 0, 1), (0, 0, 0, 1), + (0, 0, 0, 1), (0, 0, 0, 1), + (0, 0, 0, 1), (0, 0, 0, 1), + (0, 0, 1, 0), (0, 0, 0, 1), + (0, 0, 1, 0), (0, 0, 0, 1), + (0, 0, 1, 0), (0, 0, 0, 1), + (0, 0, 1, 0), (0, 0, 0, 1), + (0, 0, 1, 0), (0, 0, 1, 0), + (0, 0, 1, 0), (0, 0, 1, 0), + (0, 0, 1, 0), (0, 0, 1, 0)], + dtype=[('a', int), ('b', int), + ('c', int), ('d', int)]) + assert_equal(test, control) + + def test_leftouter_join(self): + a, b = self.a, self.b + + test = join_by(('a', 'b'), a, b, 'leftouter') + control = ma.array([(0, 50, 100, -1), (1, 51, 101, -1), + (2, 52, 102, -1), (3, 53, 103, -1), + (4, 54, 104, -1), (5, 55, 105, -1), + (6, 56, 106, -1), (7, 57, 107, -1), + (8, 58, 108, -1), (9, 59, 109, -1)], + mask=[(0, 0, 0, 1), (0, 0, 0, 1), + (0, 0, 0, 1), (0, 0, 0, 1), + (0, 0, 0, 1), (0, 0, 0, 1), + (0, 0, 0, 1), (0, 0, 0, 1), + (0, 0, 0, 1), (0, 0, 0, 1)], + dtype=[('a', int), ('b', int), ('c', int), ('d', int)]) + assert_equal(test, control) + + def test_different_field_order(self): + # gh-8940 + a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'f4'), ('c', 'u1')]) + b = np.ones(3, dtype=[('c', 'u1'), ('b', 'f4'), ('a', 'i4')]) + # this should not give a FutureWarning: + j = join_by(['c', 'b'], a, b, jointype='inner', usemask=False) + assert_equal(j.dtype.names, ['b', 'c', 'a1', 'a2']) + + def test_duplicate_keys(self): + a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'f4'), ('c', 'u1')]) + b = np.ones(3, dtype=[('c', 'u1'), ('b', 'f4'), ('a', 'i4')]) + assert_raises(ValueError, join_by, ['a', 'b', 'b'], a, b) + + @pytest.mark.xfail(reason="See comment at gh-9343") + def test_same_name_different_dtypes_key(self): + a_dtype = np.dtype([('key', 'S5'), ('value', ' 2**32 + + +def _add_keepdims(func): + """ hack in keepdims behavior into a function taking an axis """ + @functools.wraps(func) + def wrapped(a, axis, **kwargs): + res = func(a, axis=axis, **kwargs) + if axis is None: + axis = 0 # res is now a scalar, so we can insert this anywhere + return np.expand_dims(res, axis=axis) + return wrapped + + +class TestTakeAlongAxis: + def test_argequivalent(self): + """ Test it translates from arg to """ + from numpy.random import rand + a = rand(3, 4, 5) + + funcs = [ + (np.sort, np.argsort, dict()), + (_add_keepdims(np.min), _add_keepdims(np.argmin), dict()), + (_add_keepdims(np.max), _add_keepdims(np.argmax), dict()), + (np.partition, np.argpartition, dict(kth=2)), + ] + + for func, argfunc, kwargs in funcs: + for axis in list(range(a.ndim)) + [None]: + a_func = func(a, axis=axis, **kwargs) + ai_func = argfunc(a, axis=axis, **kwargs) + assert_equal(a_func, take_along_axis(a, ai_func, axis=axis)) + + def test_invalid(self): + """ Test it errors when indices has too few dimensions """ + a = np.ones((10, 10)) + ai = np.ones((10, 2), dtype=np.intp) + + # sanity check + take_along_axis(a, ai, axis=1) + + # not enough indices + assert_raises(ValueError, take_along_axis, a, np.array(1), axis=1) + # bool arrays not allowed + assert_raises(IndexError, take_along_axis, a, ai.astype(bool), axis=1) + # float arrays not allowed + assert_raises(IndexError, take_along_axis, a, ai.astype(float), axis=1) + # invalid axis + assert_raises(np.AxisError, take_along_axis, a, ai, axis=10) + + def test_empty(self): + """ Test everything is ok with empty results, even with inserted dims """ + a = np.ones((3, 4, 5)) + ai = np.ones((3, 0, 5), dtype=np.intp) + + actual = take_along_axis(a, ai, axis=1) + assert_equal(actual.shape, ai.shape) + + def test_broadcast(self): + """ Test that non-indexing dimensions are broadcast in both directions """ + a = np.ones((3, 4, 1)) + ai = np.ones((1, 2, 5), dtype=np.intp) + actual = take_along_axis(a, ai, axis=1) + assert_equal(actual.shape, (3, 2, 5)) + + +class TestPutAlongAxis: + def test_replace_max(self): + a_base = np.array([[10, 30, 20], [60, 40, 50]]) + + for axis in list(range(a_base.ndim)) + [None]: + # we mutate this in the loop + a = a_base.copy() + + # replace the max with a small value + i_max = _add_keepdims(np.argmax)(a, axis=axis) + put_along_axis(a, i_max, -99, axis=axis) + + # find the new minimum, which should max + i_min = _add_keepdims(np.argmin)(a, axis=axis) + + assert_equal(i_min, i_max) + + def test_broadcast(self): + """ Test that non-indexing dimensions are broadcast in both directions """ + a = np.ones((3, 4, 1)) + ai = np.arange(10, dtype=np.intp).reshape((1, 2, 5)) % 4 + put_along_axis(a, ai, 20, axis=1) + assert_equal(take_along_axis(a, ai, axis=1), 20) + + +class TestApplyAlongAxis: + def test_simple(self): + a = np.ones((20, 10), 'd') + assert_array_equal( + apply_along_axis(len, 0, a), len(a)*np.ones(a.shape[1])) + + def test_simple101(self): + a = np.ones((10, 101), 'd') + assert_array_equal( + apply_along_axis(len, 0, a), len(a)*np.ones(a.shape[1])) + + def test_3d(self): + a = np.arange(27).reshape((3, 3, 3)) + assert_array_equal(apply_along_axis(np.sum, 0, a), + [[27, 30, 33], [36, 39, 42], [45, 48, 51]]) + + def test_preserve_subclass(self): + def double(row): + return row * 2 + + class MyNDArray(np.ndarray): + pass + + m = np.array([[0, 1], [2, 3]]).view(MyNDArray) + expected = np.array([[0, 2], [4, 6]]).view(MyNDArray) + + result = apply_along_axis(double, 0, m) + assert_(isinstance(result, MyNDArray)) + assert_array_equal(result, expected) + + result = apply_along_axis(double, 1, m) + assert_(isinstance(result, MyNDArray)) + assert_array_equal(result, expected) + + def test_subclass(self): + class MinimalSubclass(np.ndarray): + data = 1 + + def minimal_function(array): + return array.data + + a = np.zeros((6, 3)).view(MinimalSubclass) + + assert_array_equal( + apply_along_axis(minimal_function, 0, a), np.array([1, 1, 1]) + ) + + def test_scalar_array(self, cls=np.ndarray): + a = np.ones((6, 3)).view(cls) + res = apply_along_axis(np.sum, 0, a) + assert_(isinstance(res, cls)) + assert_array_equal(res, np.array([6, 6, 6]).view(cls)) + + def test_0d_array(self, cls=np.ndarray): + def sum_to_0d(x): + """ Sum x, returning a 0d array of the same class """ + assert_equal(x.ndim, 1) + return np.squeeze(np.sum(x, keepdims=True)) + a = np.ones((6, 3)).view(cls) + res = apply_along_axis(sum_to_0d, 0, a) + assert_(isinstance(res, cls)) + assert_array_equal(res, np.array([6, 6, 6]).view(cls)) + + res = apply_along_axis(sum_to_0d, 1, a) + assert_(isinstance(res, cls)) + assert_array_equal(res, np.array([3, 3, 3, 3, 3, 3]).view(cls)) + + def test_axis_insertion(self, cls=np.ndarray): + def f1to2(x): + """produces an asymmetric non-square matrix from x""" + assert_equal(x.ndim, 1) + return (x[::-1] * x[1:,None]).view(cls) + + a2d = np.arange(6*3).reshape((6, 3)) + + # 2d insertion along first axis + actual = apply_along_axis(f1to2, 0, a2d) + expected = np.stack([ + f1to2(a2d[:,i]) for i in range(a2d.shape[1]) + ], axis=-1).view(cls) + assert_equal(type(actual), type(expected)) + assert_equal(actual, expected) + + # 2d insertion along last axis + actual = apply_along_axis(f1to2, 1, a2d) + expected = np.stack([ + f1to2(a2d[i,:]) for i in range(a2d.shape[0]) + ], axis=0).view(cls) + assert_equal(type(actual), type(expected)) + assert_equal(actual, expected) + + # 3d insertion along middle axis + a3d = np.arange(6*5*3).reshape((6, 5, 3)) + + actual = apply_along_axis(f1to2, 1, a3d) + expected = np.stack([ + np.stack([ + f1to2(a3d[i,:,j]) for i in range(a3d.shape[0]) + ], axis=0) + for j in range(a3d.shape[2]) + ], axis=-1).view(cls) + assert_equal(type(actual), type(expected)) + assert_equal(actual, expected) + + def test_subclass_preservation(self): + class MinimalSubclass(np.ndarray): + pass + self.test_scalar_array(MinimalSubclass) + self.test_0d_array(MinimalSubclass) + self.test_axis_insertion(MinimalSubclass) + + def test_axis_insertion_ma(self): + def f1to2(x): + """produces an asymmetric non-square matrix from x""" + assert_equal(x.ndim, 1) + res = x[::-1] * x[1:,None] + return np.ma.masked_where(res%5==0, res) + a = np.arange(6*3).reshape((6, 3)) + res = apply_along_axis(f1to2, 0, a) + assert_(isinstance(res, np.ma.masked_array)) + assert_equal(res.ndim, 3) + assert_array_equal(res[:,:,0].mask, f1to2(a[:,0]).mask) + assert_array_equal(res[:,:,1].mask, f1to2(a[:,1]).mask) + assert_array_equal(res[:,:,2].mask, f1to2(a[:,2]).mask) + + def test_tuple_func1d(self): + def sample_1d(x): + return x[1], x[0] + res = np.apply_along_axis(sample_1d, 1, np.array([[1, 2], [3, 4]])) + assert_array_equal(res, np.array([[2, 1], [4, 3]])) + + def test_empty(self): + # can't apply_along_axis when there's no chance to call the function + def never_call(x): + assert_(False) # should never be reached + + a = np.empty((0, 0)) + assert_raises(ValueError, np.apply_along_axis, never_call, 0, a) + assert_raises(ValueError, np.apply_along_axis, never_call, 1, a) + + # but it's sometimes ok with some non-zero dimensions + def empty_to_1(x): + assert_(len(x) == 0) + return 1 + + a = np.empty((10, 0)) + actual = np.apply_along_axis(empty_to_1, 1, a) + assert_equal(actual, np.ones(10)) + assert_raises(ValueError, np.apply_along_axis, empty_to_1, 0, a) + + def test_with_iterable_object(self): + # from issue 5248 + d = np.array([ + [{1, 11}, {2, 22}, {3, 33}], + [{4, 44}, {5, 55}, {6, 66}] + ]) + actual = np.apply_along_axis(lambda a: set.union(*a), 0, d) + expected = np.array([{1, 11, 4, 44}, {2, 22, 5, 55}, {3, 33, 6, 66}]) + + assert_equal(actual, expected) + + # issue 8642 - assert_equal doesn't detect this! + for i in np.ndindex(actual.shape): + assert_equal(type(actual[i]), type(expected[i])) + + +class TestApplyOverAxes: + def test_simple(self): + a = np.arange(24).reshape(2, 3, 4) + aoa_a = apply_over_axes(np.sum, a, [0, 2]) + assert_array_equal(aoa_a, np.array([[[60], [92], [124]]])) + + +class TestExpandDims: + def test_functionality(self): + s = (2, 3, 4, 5) + a = np.empty(s) + for axis in range(-5, 4): + b = expand_dims(a, axis) + assert_(b.shape[axis] == 1) + assert_(np.squeeze(b).shape == s) + + def test_axis_tuple(self): + a = np.empty((3, 3, 3)) + assert np.expand_dims(a, axis=(0, 1, 2)).shape == (1, 1, 1, 3, 3, 3) + assert np.expand_dims(a, axis=(0, -1, -2)).shape == (1, 3, 3, 3, 1, 1) + assert np.expand_dims(a, axis=(0, 3, 5)).shape == (1, 3, 3, 1, 3, 1) + assert np.expand_dims(a, axis=(0, -3, -5)).shape == (1, 1, 3, 1, 3, 3) + + def test_axis_out_of_range(self): + s = (2, 3, 4, 5) + a = np.empty(s) + assert_raises(np.AxisError, expand_dims, a, -6) + assert_raises(np.AxisError, expand_dims, a, 5) + + a = np.empty((3, 3, 3)) + assert_raises(np.AxisError, expand_dims, a, (0, -6)) + assert_raises(np.AxisError, expand_dims, a, (0, 5)) + + def test_repeated_axis(self): + a = np.empty((3, 3, 3)) + assert_raises(ValueError, expand_dims, a, axis=(1, 1)) + + def test_subclasses(self): + a = np.arange(10).reshape((2, 5)) + a = np.ma.array(a, mask=a%3 == 0) + + expanded = np.expand_dims(a, axis=1) + assert_(isinstance(expanded, np.ma.MaskedArray)) + assert_equal(expanded.shape, (2, 1, 5)) + assert_equal(expanded.mask.shape, (2, 1, 5)) + + +class TestArraySplit: + def test_integer_0_split(self): + a = np.arange(10) + assert_raises(ValueError, array_split, a, 0) + + def test_integer_split(self): + a = np.arange(10) + res = array_split(a, 1) + desired = [np.arange(10)] + compare_results(res, desired) + + res = array_split(a, 2) + desired = [np.arange(5), np.arange(5, 10)] + compare_results(res, desired) + + res = array_split(a, 3) + desired = [np.arange(4), np.arange(4, 7), np.arange(7, 10)] + compare_results(res, desired) + + res = array_split(a, 4) + desired = [np.arange(3), np.arange(3, 6), np.arange(6, 8), + np.arange(8, 10)] + compare_results(res, desired) + + res = array_split(a, 5) + desired = [np.arange(2), np.arange(2, 4), np.arange(4, 6), + np.arange(6, 8), np.arange(8, 10)] + compare_results(res, desired) + + res = array_split(a, 6) + desired = [np.arange(2), np.arange(2, 4), np.arange(4, 6), + np.arange(6, 8), np.arange(8, 9), np.arange(9, 10)] + compare_results(res, desired) + + res = array_split(a, 7) + desired = [np.arange(2), np.arange(2, 4), np.arange(4, 6), + np.arange(6, 7), np.arange(7, 8), np.arange(8, 9), + np.arange(9, 10)] + compare_results(res, desired) + + res = array_split(a, 8) + desired = [np.arange(2), np.arange(2, 4), np.arange(4, 5), + np.arange(5, 6), np.arange(6, 7), np.arange(7, 8), + np.arange(8, 9), np.arange(9, 10)] + compare_results(res, desired) + + res = array_split(a, 9) + desired = [np.arange(2), np.arange(2, 3), np.arange(3, 4), + np.arange(4, 5), np.arange(5, 6), np.arange(6, 7), + np.arange(7, 8), np.arange(8, 9), np.arange(9, 10)] + compare_results(res, desired) + + res = array_split(a, 10) + desired = [np.arange(1), np.arange(1, 2), np.arange(2, 3), + np.arange(3, 4), np.arange(4, 5), np.arange(5, 6), + np.arange(6, 7), np.arange(7, 8), np.arange(8, 9), + np.arange(9, 10)] + compare_results(res, desired) + + res = array_split(a, 11) + desired = [np.arange(1), np.arange(1, 2), np.arange(2, 3), + np.arange(3, 4), np.arange(4, 5), np.arange(5, 6), + np.arange(6, 7), np.arange(7, 8), np.arange(8, 9), + np.arange(9, 10), np.array([])] + compare_results(res, desired) + + def test_integer_split_2D_rows(self): + a = np.array([np.arange(10), np.arange(10)]) + res = array_split(a, 3, axis=0) + tgt = [np.array([np.arange(10)]), np.array([np.arange(10)]), + np.zeros((0, 10))] + compare_results(res, tgt) + assert_(a.dtype.type is res[-1].dtype.type) + + # Same thing for manual splits: + res = array_split(a, [0, 1], axis=0) + tgt = [np.zeros((0, 10)), np.array([np.arange(10)]), + np.array([np.arange(10)])] + compare_results(res, tgt) + assert_(a.dtype.type is res[-1].dtype.type) + + def test_integer_split_2D_cols(self): + a = np.array([np.arange(10), np.arange(10)]) + res = array_split(a, 3, axis=-1) + desired = [np.array([np.arange(4), np.arange(4)]), + np.array([np.arange(4, 7), np.arange(4, 7)]), + np.array([np.arange(7, 10), np.arange(7, 10)])] + compare_results(res, desired) + + def test_integer_split_2D_default(self): + """ This will fail if we change default axis + """ + a = np.array([np.arange(10), np.arange(10)]) + res = array_split(a, 3) + tgt = [np.array([np.arange(10)]), np.array([np.arange(10)]), + np.zeros((0, 10))] + compare_results(res, tgt) + assert_(a.dtype.type is res[-1].dtype.type) + # perhaps should check higher dimensions + + @pytest.mark.skipif(not IS_64BIT, reason="Needs 64bit platform") + def test_integer_split_2D_rows_greater_max_int32(self): + a = np.broadcast_to([0], (1 << 32, 2)) + res = array_split(a, 4) + chunk = np.broadcast_to([0], (1 << 30, 2)) + tgt = [chunk] * 4 + for i in range(len(tgt)): + assert_equal(res[i].shape, tgt[i].shape) + + def test_index_split_simple(self): + a = np.arange(10) + indices = [1, 5, 7] + res = array_split(a, indices, axis=-1) + desired = [np.arange(0, 1), np.arange(1, 5), np.arange(5, 7), + np.arange(7, 10)] + compare_results(res, desired) + + def test_index_split_low_bound(self): + a = np.arange(10) + indices = [0, 5, 7] + res = array_split(a, indices, axis=-1) + desired = [np.array([]), np.arange(0, 5), np.arange(5, 7), + np.arange(7, 10)] + compare_results(res, desired) + + def test_index_split_high_bound(self): + a = np.arange(10) + indices = [0, 5, 7, 10, 12] + res = array_split(a, indices, axis=-1) + desired = [np.array([]), np.arange(0, 5), np.arange(5, 7), + np.arange(7, 10), np.array([]), np.array([])] + compare_results(res, desired) + + +class TestSplit: + # The split function is essentially the same as array_split, + # except that it test if splitting will result in an + # equal split. Only test for this case. + + def test_equal_split(self): + a = np.arange(10) + res = split(a, 2) + desired = [np.arange(5), np.arange(5, 10)] + compare_results(res, desired) + + def test_unequal_split(self): + a = np.arange(10) + assert_raises(ValueError, split, a, 3) + + +class TestColumnStack: + def test_non_iterable(self): + assert_raises(TypeError, column_stack, 1) + + def test_1D_arrays(self): + # example from docstring + a = np.array((1, 2, 3)) + b = np.array((2, 3, 4)) + expected = np.array([[1, 2], + [2, 3], + [3, 4]]) + actual = np.column_stack((a, b)) + assert_equal(actual, expected) + + def test_2D_arrays(self): + # same as hstack 2D docstring example + a = np.array([[1], [2], [3]]) + b = np.array([[2], [3], [4]]) + expected = np.array([[1, 2], + [2, 3], + [3, 4]]) + actual = np.column_stack((a, b)) + assert_equal(actual, expected) + + def test_generator(self): + with assert_warns(FutureWarning): + column_stack((np.arange(3) for _ in range(2))) + + +class TestDstack: + def test_non_iterable(self): + assert_raises(TypeError, dstack, 1) + + def test_0D_array(self): + a = np.array(1) + b = np.array(2) + res = dstack([a, b]) + desired = np.array([[[1, 2]]]) + assert_array_equal(res, desired) + + def test_1D_array(self): + a = np.array([1]) + b = np.array([2]) + res = dstack([a, b]) + desired = np.array([[[1, 2]]]) + assert_array_equal(res, desired) + + def test_2D_array(self): + a = np.array([[1], [2]]) + b = np.array([[1], [2]]) + res = dstack([a, b]) + desired = np.array([[[1, 1]], [[2, 2, ]]]) + assert_array_equal(res, desired) + + def test_2D_array2(self): + a = np.array([1, 2]) + b = np.array([1, 2]) + res = dstack([a, b]) + desired = np.array([[[1, 1], [2, 2]]]) + assert_array_equal(res, desired) + + def test_generator(self): + with assert_warns(FutureWarning): + dstack((np.arange(3) for _ in range(2))) + + +# array_split has more comprehensive test of splitting. +# only do simple test on hsplit, vsplit, and dsplit +class TestHsplit: + """Only testing for integer splits. + + """ + def test_non_iterable(self): + assert_raises(ValueError, hsplit, 1, 1) + + def test_0D_array(self): + a = np.array(1) + try: + hsplit(a, 2) + assert_(0) + except ValueError: + pass + + def test_1D_array(self): + a = np.array([1, 2, 3, 4]) + res = hsplit(a, 2) + desired = [np.array([1, 2]), np.array([3, 4])] + compare_results(res, desired) + + def test_2D_array(self): + a = np.array([[1, 2, 3, 4], + [1, 2, 3, 4]]) + res = hsplit(a, 2) + desired = [np.array([[1, 2], [1, 2]]), np.array([[3, 4], [3, 4]])] + compare_results(res, desired) + + +class TestVsplit: + """Only testing for integer splits. + + """ + def test_non_iterable(self): + assert_raises(ValueError, vsplit, 1, 1) + + def test_0D_array(self): + a = np.array(1) + assert_raises(ValueError, vsplit, a, 2) + + def test_1D_array(self): + a = np.array([1, 2, 3, 4]) + try: + vsplit(a, 2) + assert_(0) + except ValueError: + pass + + def test_2D_array(self): + a = np.array([[1, 2, 3, 4], + [1, 2, 3, 4]]) + res = vsplit(a, 2) + desired = [np.array([[1, 2, 3, 4]]), np.array([[1, 2, 3, 4]])] + compare_results(res, desired) + + +class TestDsplit: + # Only testing for integer splits. + def test_non_iterable(self): + assert_raises(ValueError, dsplit, 1, 1) + + def test_0D_array(self): + a = np.array(1) + assert_raises(ValueError, dsplit, a, 2) + + def test_1D_array(self): + a = np.array([1, 2, 3, 4]) + assert_raises(ValueError, dsplit, a, 2) + + def test_2D_array(self): + a = np.array([[1, 2, 3, 4], + [1, 2, 3, 4]]) + try: + dsplit(a, 2) + assert_(0) + except ValueError: + pass + + def test_3D_array(self): + a = np.array([[[1, 2, 3, 4], + [1, 2, 3, 4]], + [[1, 2, 3, 4], + [1, 2, 3, 4]]]) + res = dsplit(a, 2) + desired = [np.array([[[1, 2], [1, 2]], [[1, 2], [1, 2]]]), + np.array([[[3, 4], [3, 4]], [[3, 4], [3, 4]]])] + compare_results(res, desired) + + +class TestSqueeze: + def test_basic(self): + from numpy.random import rand + + a = rand(20, 10, 10, 1, 1) + b = rand(20, 1, 10, 1, 20) + c = rand(1, 1, 20, 10) + assert_array_equal(np.squeeze(a), np.reshape(a, (20, 10, 10))) + assert_array_equal(np.squeeze(b), np.reshape(b, (20, 10, 20))) + assert_array_equal(np.squeeze(c), np.reshape(c, (20, 10))) + + # Squeezing to 0-dim should still give an ndarray + a = [[[1.5]]] + res = np.squeeze(a) + assert_equal(res, 1.5) + assert_equal(res.ndim, 0) + assert_equal(type(res), np.ndarray) + + +class TestKron: + def test_return_type(self): + class myarray(np.ndarray): + __array_priority__ = 0.0 + + a = np.ones([2, 2]) + ma = myarray(a.shape, a.dtype, a.data) + assert_equal(type(kron(a, a)), np.ndarray) + assert_equal(type(kron(ma, ma)), myarray) + assert_equal(type(kron(a, ma)), np.ndarray) + assert_equal(type(kron(ma, a)), myarray) + + +class TestTile: + def test_basic(self): + a = np.array([0, 1, 2]) + b = [[1, 2], [3, 4]] + assert_equal(tile(a, 2), [0, 1, 2, 0, 1, 2]) + assert_equal(tile(a, (2, 2)), [[0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2]]) + assert_equal(tile(a, (1, 2)), [[0, 1, 2, 0, 1, 2]]) + assert_equal(tile(b, 2), [[1, 2, 1, 2], [3, 4, 3, 4]]) + assert_equal(tile(b, (2, 1)), [[1, 2], [3, 4], [1, 2], [3, 4]]) + assert_equal(tile(b, (2, 2)), [[1, 2, 1, 2], [3, 4, 3, 4], + [1, 2, 1, 2], [3, 4, 3, 4]]) + + def test_tile_one_repetition_on_array_gh4679(self): + a = np.arange(5) + b = tile(a, 1) + b += 2 + assert_equal(a, np.arange(5)) + + def test_empty(self): + a = np.array([[[]]]) + b = np.array([[], []]) + c = tile(b, 2).shape + d = tile(a, (3, 2, 5)).shape + assert_equal(c, (2, 0)) + assert_equal(d, (3, 2, 0)) + + def test_kroncompare(self): + from numpy.random import randint + + reps = [(2,), (1, 2), (2, 1), (2, 2), (2, 3, 2), (3, 2)] + shape = [(3,), (2, 3), (3, 4, 3), (3, 2, 3), (4, 3, 2, 4), (2, 2)] + for s in shape: + b = randint(0, 10, size=s) + for r in reps: + a = np.ones(r, b.dtype) + large = tile(b, r) + klarge = kron(a, b) + assert_equal(large, klarge) + + +class TestMayShareMemory: + def test_basic(self): + d = np.ones((50, 60)) + d2 = np.ones((30, 60, 6)) + assert_(np.may_share_memory(d, d)) + assert_(np.may_share_memory(d, d[::-1])) + assert_(np.may_share_memory(d, d[::2])) + assert_(np.may_share_memory(d, d[1:, ::-1])) + + assert_(not np.may_share_memory(d[::-1], d2)) + assert_(not np.may_share_memory(d[::2], d2)) + assert_(not np.may_share_memory(d[1:, ::-1], d2)) + assert_(np.may_share_memory(d2[1:, ::-1], d2)) + + +# Utility +def compare_results(res, desired): + """Compare lists of arrays.""" + if len(res) != len(desired): + raise ValueError("Iterables have different lengths") + # See also PEP 618 for Python 3.10 + for x, y in zip(res, desired): + assert_array_equal(x, y) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_stride_tricks.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_stride_tricks.py new file mode 100644 index 0000000..efec5d2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_stride_tricks.py @@ -0,0 +1,645 @@ +import numpy as np +from numpy.core._rational_tests import rational +from numpy.testing import ( + assert_equal, assert_array_equal, assert_raises, assert_, + assert_raises_regex, assert_warns, + ) +from numpy.lib.stride_tricks import ( + as_strided, broadcast_arrays, _broadcast_shape, broadcast_to, + broadcast_shapes, sliding_window_view, + ) +import pytest + + +def assert_shapes_correct(input_shapes, expected_shape): + # Broadcast a list of arrays with the given input shapes and check the + # common output shape. + + inarrays = [np.zeros(s) for s in input_shapes] + outarrays = broadcast_arrays(*inarrays) + outshapes = [a.shape for a in outarrays] + expected = [expected_shape] * len(inarrays) + assert_equal(outshapes, expected) + + +def assert_incompatible_shapes_raise(input_shapes): + # Broadcast a list of arrays with the given (incompatible) input shapes + # and check that they raise a ValueError. + + inarrays = [np.zeros(s) for s in input_shapes] + assert_raises(ValueError, broadcast_arrays, *inarrays) + + +def assert_same_as_ufunc(shape0, shape1, transposed=False, flipped=False): + # Broadcast two shapes against each other and check that the data layout + # is the same as if a ufunc did the broadcasting. + + x0 = np.zeros(shape0, dtype=int) + # Note that multiply.reduce's identity element is 1.0, so when shape1==(), + # this gives the desired n==1. + n = int(np.multiply.reduce(shape1)) + x1 = np.arange(n).reshape(shape1) + if transposed: + x0 = x0.T + x1 = x1.T + if flipped: + x0 = x0[::-1] + x1 = x1[::-1] + # Use the add ufunc to do the broadcasting. Since we're adding 0s to x1, the + # result should be exactly the same as the broadcasted view of x1. + y = x0 + x1 + b0, b1 = broadcast_arrays(x0, x1) + assert_array_equal(y, b1) + + +def test_same(): + x = np.arange(10) + y = np.arange(10) + bx, by = broadcast_arrays(x, y) + assert_array_equal(x, bx) + assert_array_equal(y, by) + +def test_broadcast_kwargs(): + # ensure that a TypeError is appropriately raised when + # np.broadcast_arrays() is called with any keyword + # argument other than 'subok' + x = np.arange(10) + y = np.arange(10) + + with assert_raises_regex(TypeError, 'got an unexpected keyword'): + broadcast_arrays(x, y, dtype='float64') + + +def test_one_off(): + x = np.array([[1, 2, 3]]) + y = np.array([[1], [2], [3]]) + bx, by = broadcast_arrays(x, y) + bx0 = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) + by0 = bx0.T + assert_array_equal(bx0, bx) + assert_array_equal(by0, by) + + +def test_same_input_shapes(): + # Check that the final shape is just the input shape. + + data = [ + (), + (1,), + (3,), + (0, 1), + (0, 3), + (1, 0), + (3, 0), + (1, 3), + (3, 1), + (3, 3), + ] + for shape in data: + input_shapes = [shape] + # Single input. + assert_shapes_correct(input_shapes, shape) + # Double input. + input_shapes2 = [shape, shape] + assert_shapes_correct(input_shapes2, shape) + # Triple input. + input_shapes3 = [shape, shape, shape] + assert_shapes_correct(input_shapes3, shape) + + +def test_two_compatible_by_ones_input_shapes(): + # Check that two different input shapes of the same length, but some have + # ones, broadcast to the correct shape. + + data = [ + [[(1,), (3,)], (3,)], + [[(1, 3), (3, 3)], (3, 3)], + [[(3, 1), (3, 3)], (3, 3)], + [[(1, 3), (3, 1)], (3, 3)], + [[(1, 1), (3, 3)], (3, 3)], + [[(1, 1), (1, 3)], (1, 3)], + [[(1, 1), (3, 1)], (3, 1)], + [[(1, 0), (0, 0)], (0, 0)], + [[(0, 1), (0, 0)], (0, 0)], + [[(1, 0), (0, 1)], (0, 0)], + [[(1, 1), (0, 0)], (0, 0)], + [[(1, 1), (1, 0)], (1, 0)], + [[(1, 1), (0, 1)], (0, 1)], + ] + for input_shapes, expected_shape in data: + assert_shapes_correct(input_shapes, expected_shape) + # Reverse the input shapes since broadcasting should be symmetric. + assert_shapes_correct(input_shapes[::-1], expected_shape) + + +def test_two_compatible_by_prepending_ones_input_shapes(): + # Check that two different input shapes (of different lengths) broadcast + # to the correct shape. + + data = [ + [[(), (3,)], (3,)], + [[(3,), (3, 3)], (3, 3)], + [[(3,), (3, 1)], (3, 3)], + [[(1,), (3, 3)], (3, 3)], + [[(), (3, 3)], (3, 3)], + [[(1, 1), (3,)], (1, 3)], + [[(1,), (3, 1)], (3, 1)], + [[(1,), (1, 3)], (1, 3)], + [[(), (1, 3)], (1, 3)], + [[(), (3, 1)], (3, 1)], + [[(), (0,)], (0,)], + [[(0,), (0, 0)], (0, 0)], + [[(0,), (0, 1)], (0, 0)], + [[(1,), (0, 0)], (0, 0)], + [[(), (0, 0)], (0, 0)], + [[(1, 1), (0,)], (1, 0)], + [[(1,), (0, 1)], (0, 1)], + [[(1,), (1, 0)], (1, 0)], + [[(), (1, 0)], (1, 0)], + [[(), (0, 1)], (0, 1)], + ] + for input_shapes, expected_shape in data: + assert_shapes_correct(input_shapes, expected_shape) + # Reverse the input shapes since broadcasting should be symmetric. + assert_shapes_correct(input_shapes[::-1], expected_shape) + + +def test_incompatible_shapes_raise_valueerror(): + # Check that a ValueError is raised for incompatible shapes. + + data = [ + [(3,), (4,)], + [(2, 3), (2,)], + [(3,), (3,), (4,)], + [(1, 3, 4), (2, 3, 3)], + ] + for input_shapes in data: + assert_incompatible_shapes_raise(input_shapes) + # Reverse the input shapes since broadcasting should be symmetric. + assert_incompatible_shapes_raise(input_shapes[::-1]) + + +def test_same_as_ufunc(): + # Check that the data layout is the same as if a ufunc did the operation. + + data = [ + [[(1,), (3,)], (3,)], + [[(1, 3), (3, 3)], (3, 3)], + [[(3, 1), (3, 3)], (3, 3)], + [[(1, 3), (3, 1)], (3, 3)], + [[(1, 1), (3, 3)], (3, 3)], + [[(1, 1), (1, 3)], (1, 3)], + [[(1, 1), (3, 1)], (3, 1)], + [[(1, 0), (0, 0)], (0, 0)], + [[(0, 1), (0, 0)], (0, 0)], + [[(1, 0), (0, 1)], (0, 0)], + [[(1, 1), (0, 0)], (0, 0)], + [[(1, 1), (1, 0)], (1, 0)], + [[(1, 1), (0, 1)], (0, 1)], + [[(), (3,)], (3,)], + [[(3,), (3, 3)], (3, 3)], + [[(3,), (3, 1)], (3, 3)], + [[(1,), (3, 3)], (3, 3)], + [[(), (3, 3)], (3, 3)], + [[(1, 1), (3,)], (1, 3)], + [[(1,), (3, 1)], (3, 1)], + [[(1,), (1, 3)], (1, 3)], + [[(), (1, 3)], (1, 3)], + [[(), (3, 1)], (3, 1)], + [[(), (0,)], (0,)], + [[(0,), (0, 0)], (0, 0)], + [[(0,), (0, 1)], (0, 0)], + [[(1,), (0, 0)], (0, 0)], + [[(), (0, 0)], (0, 0)], + [[(1, 1), (0,)], (1, 0)], + [[(1,), (0, 1)], (0, 1)], + [[(1,), (1, 0)], (1, 0)], + [[(), (1, 0)], (1, 0)], + [[(), (0, 1)], (0, 1)], + ] + for input_shapes, expected_shape in data: + assert_same_as_ufunc(input_shapes[0], input_shapes[1], + "Shapes: %s %s" % (input_shapes[0], input_shapes[1])) + # Reverse the input shapes since broadcasting should be symmetric. + assert_same_as_ufunc(input_shapes[1], input_shapes[0]) + # Try them transposed, too. + assert_same_as_ufunc(input_shapes[0], input_shapes[1], True) + # ... and flipped for non-rank-0 inputs in order to test negative + # strides. + if () not in input_shapes: + assert_same_as_ufunc(input_shapes[0], input_shapes[1], False, True) + assert_same_as_ufunc(input_shapes[0], input_shapes[1], True, True) + + +def test_broadcast_to_succeeds(): + data = [ + [np.array(0), (0,), np.array(0)], + [np.array(0), (1,), np.zeros(1)], + [np.array(0), (3,), np.zeros(3)], + [np.ones(1), (1,), np.ones(1)], + [np.ones(1), (2,), np.ones(2)], + [np.ones(1), (1, 2, 3), np.ones((1, 2, 3))], + [np.arange(3), (3,), np.arange(3)], + [np.arange(3), (1, 3), np.arange(3).reshape(1, -1)], + [np.arange(3), (2, 3), np.array([[0, 1, 2], [0, 1, 2]])], + # test if shape is not a tuple + [np.ones(0), 0, np.ones(0)], + [np.ones(1), 1, np.ones(1)], + [np.ones(1), 2, np.ones(2)], + # these cases with size 0 are strange, but they reproduce the behavior + # of broadcasting with ufuncs (see test_same_as_ufunc above) + [np.ones(1), (0,), np.ones(0)], + [np.ones((1, 2)), (0, 2), np.ones((0, 2))], + [np.ones((2, 1)), (2, 0), np.ones((2, 0))], + ] + for input_array, shape, expected in data: + actual = broadcast_to(input_array, shape) + assert_array_equal(expected, actual) + + +def test_broadcast_to_raises(): + data = [ + [(0,), ()], + [(1,), ()], + [(3,), ()], + [(3,), (1,)], + [(3,), (2,)], + [(3,), (4,)], + [(1, 2), (2, 1)], + [(1, 1), (1,)], + [(1,), -1], + [(1,), (-1,)], + [(1, 2), (-1, 2)], + ] + for orig_shape, target_shape in data: + arr = np.zeros(orig_shape) + assert_raises(ValueError, lambda: broadcast_to(arr, target_shape)) + + +def test_broadcast_shape(): + # tests internal _broadcast_shape + # _broadcast_shape is already exercised indirectly by broadcast_arrays + # _broadcast_shape is also exercised by the public broadcast_shapes function + assert_equal(_broadcast_shape(), ()) + assert_equal(_broadcast_shape([1, 2]), (2,)) + assert_equal(_broadcast_shape(np.ones((1, 1))), (1, 1)) + assert_equal(_broadcast_shape(np.ones((1, 1)), np.ones((3, 4))), (3, 4)) + assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 32)), (1, 2)) + assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 100)), (1, 2)) + + # regression tests for gh-5862 + assert_equal(_broadcast_shape(*([np.ones(2)] * 32 + [1])), (2,)) + bad_args = [np.ones(2)] * 32 + [np.ones(3)] * 32 + assert_raises(ValueError, lambda: _broadcast_shape(*bad_args)) + + +def test_broadcast_shapes_succeeds(): + # tests public broadcast_shapes + data = [ + [[], ()], + [[()], ()], + [[(7,)], (7,)], + [[(1, 2), (2,)], (1, 2)], + [[(1, 1)], (1, 1)], + [[(1, 1), (3, 4)], (3, 4)], + [[(6, 7), (5, 6, 1), (7,), (5, 1, 7)], (5, 6, 7)], + [[(5, 6, 1)], (5, 6, 1)], + [[(1, 3), (3, 1)], (3, 3)], + [[(1, 0), (0, 0)], (0, 0)], + [[(0, 1), (0, 0)], (0, 0)], + [[(1, 0), (0, 1)], (0, 0)], + [[(1, 1), (0, 0)], (0, 0)], + [[(1, 1), (1, 0)], (1, 0)], + [[(1, 1), (0, 1)], (0, 1)], + [[(), (0,)], (0,)], + [[(0,), (0, 0)], (0, 0)], + [[(0,), (0, 1)], (0, 0)], + [[(1,), (0, 0)], (0, 0)], + [[(), (0, 0)], (0, 0)], + [[(1, 1), (0,)], (1, 0)], + [[(1,), (0, 1)], (0, 1)], + [[(1,), (1, 0)], (1, 0)], + [[(), (1, 0)], (1, 0)], + [[(), (0, 1)], (0, 1)], + [[(1,), (3,)], (3,)], + [[2, (3, 2)], (3, 2)], + ] + for input_shapes, target_shape in data: + assert_equal(broadcast_shapes(*input_shapes), target_shape) + + assert_equal(broadcast_shapes(*([(1, 2)] * 32)), (1, 2)) + assert_equal(broadcast_shapes(*([(1, 2)] * 100)), (1, 2)) + + # regression tests for gh-5862 + assert_equal(broadcast_shapes(*([(2,)] * 32)), (2,)) + + +def test_broadcast_shapes_raises(): + # tests public broadcast_shapes + data = [ + [(3,), (4,)], + [(2, 3), (2,)], + [(3,), (3,), (4,)], + [(1, 3, 4), (2, 3, 3)], + [(1, 2), (3,1), (3,2), (10, 5)], + [2, (2, 3)], + ] + for input_shapes in data: + assert_raises(ValueError, lambda: broadcast_shapes(*input_shapes)) + + bad_args = [(2,)] * 32 + [(3,)] * 32 + assert_raises(ValueError, lambda: broadcast_shapes(*bad_args)) + + +def test_as_strided(): + a = np.array([None]) + a_view = as_strided(a) + expected = np.array([None]) + assert_array_equal(a_view, np.array([None])) + + a = np.array([1, 2, 3, 4]) + a_view = as_strided(a, shape=(2,), strides=(2 * a.itemsize,)) + expected = np.array([1, 3]) + assert_array_equal(a_view, expected) + + a = np.array([1, 2, 3, 4]) + a_view = as_strided(a, shape=(3, 4), strides=(0, 1 * a.itemsize)) + expected = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]]) + assert_array_equal(a_view, expected) + + # Regression test for gh-5081 + dt = np.dtype([('num', 'i4'), ('obj', 'O')]) + a = np.empty((4,), dtype=dt) + a['num'] = np.arange(1, 5) + a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize)) + expected_num = [[1, 2, 3, 4]] * 3 + expected_obj = [[None]*4]*3 + assert_equal(a_view.dtype, dt) + assert_array_equal(expected_num, a_view['num']) + assert_array_equal(expected_obj, a_view['obj']) + + # Make sure that void types without fields are kept unchanged + a = np.empty((4,), dtype='V4') + a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize)) + assert_equal(a.dtype, a_view.dtype) + + # Make sure that the only type that could fail is properly handled + dt = np.dtype({'names': [''], 'formats': ['V4']}) + a = np.empty((4,), dtype=dt) + a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize)) + assert_equal(a.dtype, a_view.dtype) + + # Custom dtypes should not be lost (gh-9161) + r = [rational(i) for i in range(4)] + a = np.array(r, dtype=rational) + a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize)) + assert_equal(a.dtype, a_view.dtype) + assert_array_equal([r] * 3, a_view) + + +class TestSlidingWindowView: + def test_1d(self): + arr = np.arange(5) + arr_view = sliding_window_view(arr, 2) + expected = np.array([[0, 1], + [1, 2], + [2, 3], + [3, 4]]) + assert_array_equal(arr_view, expected) + + def test_2d(self): + i, j = np.ogrid[:3, :4] + arr = 10*i + j + shape = (2, 2) + arr_view = sliding_window_view(arr, shape) + expected = np.array([[[[0, 1], [10, 11]], + [[1, 2], [11, 12]], + [[2, 3], [12, 13]]], + [[[10, 11], [20, 21]], + [[11, 12], [21, 22]], + [[12, 13], [22, 23]]]]) + assert_array_equal(arr_view, expected) + + def test_2d_with_axis(self): + i, j = np.ogrid[:3, :4] + arr = 10*i + j + arr_view = sliding_window_view(arr, 3, 0) + expected = np.array([[[0, 10, 20], + [1, 11, 21], + [2, 12, 22], + [3, 13, 23]]]) + assert_array_equal(arr_view, expected) + + def test_2d_repeated_axis(self): + i, j = np.ogrid[:3, :4] + arr = 10*i + j + arr_view = sliding_window_view(arr, (2, 3), (1, 1)) + expected = np.array([[[[0, 1, 2], + [1, 2, 3]]], + [[[10, 11, 12], + [11, 12, 13]]], + [[[20, 21, 22], + [21, 22, 23]]]]) + assert_array_equal(arr_view, expected) + + def test_2d_without_axis(self): + i, j = np.ogrid[:4, :4] + arr = 10*i + j + shape = (2, 3) + arr_view = sliding_window_view(arr, shape) + expected = np.array([[[[0, 1, 2], [10, 11, 12]], + [[1, 2, 3], [11, 12, 13]]], + [[[10, 11, 12], [20, 21, 22]], + [[11, 12, 13], [21, 22, 23]]], + [[[20, 21, 22], [30, 31, 32]], + [[21, 22, 23], [31, 32, 33]]]]) + assert_array_equal(arr_view, expected) + + def test_errors(self): + i, j = np.ogrid[:4, :4] + arr = 10*i + j + with pytest.raises(ValueError, match='cannot contain negative values'): + sliding_window_view(arr, (-1, 3)) + with pytest.raises( + ValueError, + match='must provide window_shape for all dimensions of `x`'): + sliding_window_view(arr, (1,)) + with pytest.raises( + ValueError, + match='Must provide matching length window_shape and axis'): + sliding_window_view(arr, (1, 3, 4), axis=(0, 1)) + with pytest.raises( + ValueError, + match='window shape cannot be larger than input array'): + sliding_window_view(arr, (5, 5)) + + def test_writeable(self): + arr = np.arange(5) + view = sliding_window_view(arr, 2, writeable=False) + assert_(not view.flags.writeable) + with pytest.raises( + ValueError, + match='assignment destination is read-only'): + view[0, 0] = 3 + view = sliding_window_view(arr, 2, writeable=True) + assert_(view.flags.writeable) + view[0, 1] = 3 + assert_array_equal(arr, np.array([0, 3, 2, 3, 4])) + + def test_subok(self): + class MyArray(np.ndarray): + pass + + arr = np.arange(5).view(MyArray) + assert_(not isinstance(sliding_window_view(arr, 2, + subok=False), + MyArray)) + assert_(isinstance(sliding_window_view(arr, 2, subok=True), MyArray)) + # Default behavior + assert_(not isinstance(sliding_window_view(arr, 2), MyArray)) + + +def as_strided_writeable(): + arr = np.ones(10) + view = as_strided(arr, writeable=False) + assert_(not view.flags.writeable) + + # Check that writeable also is fine: + view = as_strided(arr, writeable=True) + assert_(view.flags.writeable) + view[...] = 3 + assert_array_equal(arr, np.full_like(arr, 3)) + + # Test that things do not break down for readonly: + arr.flags.writeable = False + view = as_strided(arr, writeable=False) + view = as_strided(arr, writeable=True) + assert_(not view.flags.writeable) + + +class VerySimpleSubClass(np.ndarray): + def __new__(cls, *args, **kwargs): + return np.array(*args, subok=True, **kwargs).view(cls) + + +class SimpleSubClass(VerySimpleSubClass): + def __new__(cls, *args, **kwargs): + self = np.array(*args, subok=True, **kwargs).view(cls) + self.info = 'simple' + return self + + def __array_finalize__(self, obj): + self.info = getattr(obj, 'info', '') + ' finalized' + + +def test_subclasses(): + # test that subclass is preserved only if subok=True + a = VerySimpleSubClass([1, 2, 3, 4]) + assert_(type(a) is VerySimpleSubClass) + a_view = as_strided(a, shape=(2,), strides=(2 * a.itemsize,)) + assert_(type(a_view) is np.ndarray) + a_view = as_strided(a, shape=(2,), strides=(2 * a.itemsize,), subok=True) + assert_(type(a_view) is VerySimpleSubClass) + # test that if a subclass has __array_finalize__, it is used + a = SimpleSubClass([1, 2, 3, 4]) + a_view = as_strided(a, shape=(2,), strides=(2 * a.itemsize,), subok=True) + assert_(type(a_view) is SimpleSubClass) + assert_(a_view.info == 'simple finalized') + + # similar tests for broadcast_arrays + b = np.arange(len(a)).reshape(-1, 1) + a_view, b_view = broadcast_arrays(a, b) + assert_(type(a_view) is np.ndarray) + assert_(type(b_view) is np.ndarray) + assert_(a_view.shape == b_view.shape) + a_view, b_view = broadcast_arrays(a, b, subok=True) + assert_(type(a_view) is SimpleSubClass) + assert_(a_view.info == 'simple finalized') + assert_(type(b_view) is np.ndarray) + assert_(a_view.shape == b_view.shape) + + # and for broadcast_to + shape = (2, 4) + a_view = broadcast_to(a, shape) + assert_(type(a_view) is np.ndarray) + assert_(a_view.shape == shape) + a_view = broadcast_to(a, shape, subok=True) + assert_(type(a_view) is SimpleSubClass) + assert_(a_view.info == 'simple finalized') + assert_(a_view.shape == shape) + + +def test_writeable(): + # broadcast_to should return a readonly array + original = np.array([1, 2, 3]) + result = broadcast_to(original, (2, 3)) + assert_equal(result.flags.writeable, False) + assert_raises(ValueError, result.__setitem__, slice(None), 0) + + # but the result of broadcast_arrays needs to be writeable, to + # preserve backwards compatibility + for is_broadcast, results in [(False, broadcast_arrays(original,)), + (True, broadcast_arrays(0, original))]: + for result in results: + # This will change to False in a future version + if is_broadcast: + with assert_warns(FutureWarning): + assert_equal(result.flags.writeable, True) + with assert_warns(DeprecationWarning): + result[:] = 0 + # Warning not emitted, writing to the array resets it + assert_equal(result.flags.writeable, True) + else: + # No warning: + assert_equal(result.flags.writeable, True) + + for results in [broadcast_arrays(original), + broadcast_arrays(0, original)]: + for result in results: + # resets the warn_on_write DeprecationWarning + result.flags.writeable = True + # check: no warning emitted + assert_equal(result.flags.writeable, True) + result[:] = 0 + + # keep readonly input readonly + original.flags.writeable = False + _, result = broadcast_arrays(0, original) + assert_equal(result.flags.writeable, False) + + # regression test for GH6491 + shape = (2,) + strides = [0] + tricky_array = as_strided(np.array(0), shape, strides) + other = np.zeros((1,)) + first, second = broadcast_arrays(tricky_array, other) + assert_(first.shape == second.shape) + + +def test_writeable_memoryview(): + # The result of broadcast_arrays exports as a non-writeable memoryview + # because otherwise there is no good way to opt in to the new behaviour + # (i.e. you would need to set writeable to False explicitly). + # See gh-13929. + original = np.array([1, 2, 3]) + + for is_broadcast, results in [(False, broadcast_arrays(original,)), + (True, broadcast_arrays(0, original))]: + for result in results: + # This will change to False in a future version + if is_broadcast: + # memoryview(result, writable=True) will give warning but cannot + # be tested using the python API. + assert memoryview(result).readonly + else: + assert not memoryview(result).readonly + + +def test_reference_types(): + input_array = np.array('a', dtype=object) + expected = np.array(['a'] * 3, dtype=object) + actual = broadcast_to(input_array, (3,)) + assert_array_equal(expected, actual) + + actual, _ = broadcast_arrays(input_array, np.ones(3)) + assert_array_equal(expected, actual) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_twodim_base.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_twodim_base.py new file mode 100644 index 0000000..c1c5a16 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_twodim_base.py @@ -0,0 +1,542 @@ +"""Test functions for matrix module + +""" +from numpy.testing import ( + assert_equal, assert_array_equal, assert_array_max_ulp, + assert_array_almost_equal, assert_raises, assert_ + ) + +from numpy import ( + arange, add, fliplr, flipud, zeros, ones, eye, array, diag, histogram2d, + tri, mask_indices, triu_indices, triu_indices_from, tril_indices, + tril_indices_from, vander, + ) + +import numpy as np + + +from numpy.core.tests.test_overrides import requires_array_function + + +import pytest + + +def get_mat(n): + data = arange(n) + data = add.outer(data, data) + return data + + +class TestEye: + def test_basic(self): + assert_equal(eye(4), + array([[1, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 1]])) + + assert_equal(eye(4, dtype='f'), + array([[1, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 1]], 'f')) + + assert_equal(eye(3) == 1, + eye(3, dtype=bool)) + + def test_diag(self): + assert_equal(eye(4, k=1), + array([[0, 1, 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 1], + [0, 0, 0, 0]])) + + assert_equal(eye(4, k=-1), + array([[0, 0, 0, 0], + [1, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0]])) + + def test_2d(self): + assert_equal(eye(4, 3), + array([[1, 0, 0], + [0, 1, 0], + [0, 0, 1], + [0, 0, 0]])) + + assert_equal(eye(3, 4), + array([[1, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0]])) + + def test_diag2d(self): + assert_equal(eye(3, 4, k=2), + array([[0, 0, 1, 0], + [0, 0, 0, 1], + [0, 0, 0, 0]])) + + assert_equal(eye(4, 3, k=-2), + array([[0, 0, 0], + [0, 0, 0], + [1, 0, 0], + [0, 1, 0]])) + + def test_eye_bounds(self): + assert_equal(eye(2, 2, 1), [[0, 1], [0, 0]]) + assert_equal(eye(2, 2, -1), [[0, 0], [1, 0]]) + assert_equal(eye(2, 2, 2), [[0, 0], [0, 0]]) + assert_equal(eye(2, 2, -2), [[0, 0], [0, 0]]) + assert_equal(eye(3, 2, 2), [[0, 0], [0, 0], [0, 0]]) + assert_equal(eye(3, 2, 1), [[0, 1], [0, 0], [0, 0]]) + assert_equal(eye(3, 2, -1), [[0, 0], [1, 0], [0, 1]]) + assert_equal(eye(3, 2, -2), [[0, 0], [0, 0], [1, 0]]) + assert_equal(eye(3, 2, -3), [[0, 0], [0, 0], [0, 0]]) + + def test_strings(self): + assert_equal(eye(2, 2, dtype='S3'), + [[b'1', b''], [b'', b'1']]) + + def test_bool(self): + assert_equal(eye(2, 2, dtype=bool), [[True, False], [False, True]]) + + def test_order(self): + mat_c = eye(4, 3, k=-1) + mat_f = eye(4, 3, k=-1, order='F') + assert_equal(mat_c, mat_f) + assert mat_c.flags.c_contiguous + assert not mat_c.flags.f_contiguous + assert not mat_f.flags.c_contiguous + assert mat_f.flags.f_contiguous + + +class TestDiag: + def test_vector(self): + vals = (100 * arange(5)).astype('l') + b = zeros((5, 5)) + for k in range(5): + b[k, k] = vals[k] + assert_equal(diag(vals), b) + b = zeros((7, 7)) + c = b.copy() + for k in range(5): + b[k, k + 2] = vals[k] + c[k + 2, k] = vals[k] + assert_equal(diag(vals, k=2), b) + assert_equal(diag(vals, k=-2), c) + + def test_matrix(self, vals=None): + if vals is None: + vals = (100 * get_mat(5) + 1).astype('l') + b = zeros((5,)) + for k in range(5): + b[k] = vals[k, k] + assert_equal(diag(vals), b) + b = b * 0 + for k in range(3): + b[k] = vals[k, k + 2] + assert_equal(diag(vals, 2), b[:3]) + for k in range(3): + b[k] = vals[k + 2, k] + assert_equal(diag(vals, -2), b[:3]) + + def test_fortran_order(self): + vals = array((100 * get_mat(5) + 1), order='F', dtype='l') + self.test_matrix(vals) + + def test_diag_bounds(self): + A = [[1, 2], [3, 4], [5, 6]] + assert_equal(diag(A, k=2), []) + assert_equal(diag(A, k=1), [2]) + assert_equal(diag(A, k=0), [1, 4]) + assert_equal(diag(A, k=-1), [3, 6]) + assert_equal(diag(A, k=-2), [5]) + assert_equal(diag(A, k=-3), []) + + def test_failure(self): + assert_raises(ValueError, diag, [[[1]]]) + + +class TestFliplr: + def test_basic(self): + assert_raises(ValueError, fliplr, ones(4)) + a = get_mat(4) + b = a[:, ::-1] + assert_equal(fliplr(a), b) + a = [[0, 1, 2], + [3, 4, 5]] + b = [[2, 1, 0], + [5, 4, 3]] + assert_equal(fliplr(a), b) + + +class TestFlipud: + def test_basic(self): + a = get_mat(4) + b = a[::-1, :] + assert_equal(flipud(a), b) + a = [[0, 1, 2], + [3, 4, 5]] + b = [[3, 4, 5], + [0, 1, 2]] + assert_equal(flipud(a), b) + + +class TestHistogram2d: + def test_simple(self): + x = array( + [0.41702200, 0.72032449, 1.1437481e-4, 0.302332573, 0.146755891]) + y = array( + [0.09233859, 0.18626021, 0.34556073, 0.39676747, 0.53881673]) + xedges = np.linspace(0, 1, 10) + yedges = np.linspace(0, 1, 10) + H = histogram2d(x, y, (xedges, yedges))[0] + answer = array( + [[0, 0, 0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 1, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0]]) + assert_array_equal(H.T, answer) + H = histogram2d(x, y, xedges)[0] + assert_array_equal(H.T, answer) + H, xedges, yedges = histogram2d(list(range(10)), list(range(10))) + assert_array_equal(H, eye(10, 10)) + assert_array_equal(xedges, np.linspace(0, 9, 11)) + assert_array_equal(yedges, np.linspace(0, 9, 11)) + + def test_asym(self): + x = array([1, 1, 2, 3, 4, 4, 4, 5]) + y = array([1, 3, 2, 0, 1, 2, 3, 4]) + H, xed, yed = histogram2d( + x, y, (6, 5), range=[[0, 6], [0, 5]], density=True) + answer = array( + [[0., 0, 0, 0, 0], + [0, 1, 0, 1, 0], + [0, 0, 1, 0, 0], + [1, 0, 0, 0, 0], + [0, 1, 1, 1, 0], + [0, 0, 0, 0, 1]]) + assert_array_almost_equal(H, answer/8., 3) + assert_array_equal(xed, np.linspace(0, 6, 7)) + assert_array_equal(yed, np.linspace(0, 5, 6)) + + def test_density(self): + x = array([1, 2, 3, 1, 2, 3, 1, 2, 3]) + y = array([1, 1, 1, 2, 2, 2, 3, 3, 3]) + H, xed, yed = histogram2d( + x, y, [[1, 2, 3, 5], [1, 2, 3, 5]], density=True) + answer = array([[1, 1, .5], + [1, 1, .5], + [.5, .5, .25]])/9. + assert_array_almost_equal(H, answer, 3) + + def test_all_outliers(self): + r = np.random.rand(100) + 1. + 1e6 # histogramdd rounds by decimal=6 + H, xed, yed = histogram2d(r, r, (4, 5), range=([0, 1], [0, 1])) + assert_array_equal(H, 0) + + def test_empty(self): + a, edge1, edge2 = histogram2d([], [], bins=([0, 1], [0, 1])) + assert_array_max_ulp(a, array([[0.]])) + + a, edge1, edge2 = histogram2d([], [], bins=4) + assert_array_max_ulp(a, np.zeros((4, 4))) + + def test_binparameter_combination(self): + x = array( + [0, 0.09207008, 0.64575234, 0.12875982, 0.47390599, + 0.59944483, 1]) + y = array( + [0, 0.14344267, 0.48988575, 0.30558665, 0.44700682, + 0.15886423, 1]) + edges = (0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1) + H, xe, ye = histogram2d(x, y, (edges, 4)) + answer = array( + [[2., 0., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 0., 0.], + [0., 0., 0., 0.], + [0., 1., 0., 0.], + [1., 0., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 0., 0.], + [0., 0., 0., 0.], + [0., 0., 0., 1.]]) + assert_array_equal(H, answer) + assert_array_equal(ye, array([0., 0.25, 0.5, 0.75, 1])) + H, xe, ye = histogram2d(x, y, (4, edges)) + answer = array( + [[1., 1., 0., 1., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.], + [0., 1., 0., 0., 1., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]]) + assert_array_equal(H, answer) + assert_array_equal(xe, array([0., 0.25, 0.5, 0.75, 1])) + + @requires_array_function + def test_dispatch(self): + class ShouldDispatch: + def __array_function__(self, function, types, args, kwargs): + return types, args, kwargs + + xy = [1, 2] + s_d = ShouldDispatch() + r = histogram2d(s_d, xy) + # Cannot use assert_equal since that dispatches... + assert_(r == ((ShouldDispatch,), (s_d, xy), {})) + r = histogram2d(xy, s_d) + assert_(r == ((ShouldDispatch,), (xy, s_d), {})) + r = histogram2d(xy, xy, bins=s_d) + assert_(r, ((ShouldDispatch,), (xy, xy), dict(bins=s_d))) + r = histogram2d(xy, xy, bins=[s_d, 5]) + assert_(r, ((ShouldDispatch,), (xy, xy), dict(bins=[s_d, 5]))) + assert_raises(Exception, histogram2d, xy, xy, bins=[s_d]) + r = histogram2d(xy, xy, weights=s_d) + assert_(r, ((ShouldDispatch,), (xy, xy), dict(weights=s_d))) + + @pytest.mark.parametrize(("x_len", "y_len"), [(10, 11), (20, 19)]) + def test_bad_length(self, x_len, y_len): + x, y = np.ones(x_len), np.ones(y_len) + with pytest.raises(ValueError, + match='x and y must have the same length.'): + histogram2d(x, y) + + +class TestTri: + def test_dtype(self): + out = array([[1, 0, 0], + [1, 1, 0], + [1, 1, 1]]) + assert_array_equal(tri(3), out) + assert_array_equal(tri(3, dtype=bool), out.astype(bool)) + + +def test_tril_triu_ndim2(): + for dtype in np.typecodes['AllFloat'] + np.typecodes['AllInteger']: + a = np.ones((2, 2), dtype=dtype) + b = np.tril(a) + c = np.triu(a) + assert_array_equal(b, [[1, 0], [1, 1]]) + assert_array_equal(c, b.T) + # should return the same dtype as the original array + assert_equal(b.dtype, a.dtype) + assert_equal(c.dtype, a.dtype) + + +def test_tril_triu_ndim3(): + for dtype in np.typecodes['AllFloat'] + np.typecodes['AllInteger']: + a = np.array([ + [[1, 1], [1, 1]], + [[1, 1], [1, 0]], + [[1, 1], [0, 0]], + ], dtype=dtype) + a_tril_desired = np.array([ + [[1, 0], [1, 1]], + [[1, 0], [1, 0]], + [[1, 0], [0, 0]], + ], dtype=dtype) + a_triu_desired = np.array([ + [[1, 1], [0, 1]], + [[1, 1], [0, 0]], + [[1, 1], [0, 0]], + ], dtype=dtype) + a_triu_observed = np.triu(a) + a_tril_observed = np.tril(a) + assert_array_equal(a_triu_observed, a_triu_desired) + assert_array_equal(a_tril_observed, a_tril_desired) + assert_equal(a_triu_observed.dtype, a.dtype) + assert_equal(a_tril_observed.dtype, a.dtype) + + +def test_tril_triu_with_inf(): + # Issue 4859 + arr = np.array([[1, 1, np.inf], + [1, 1, 1], + [np.inf, 1, 1]]) + out_tril = np.array([[1, 0, 0], + [1, 1, 0], + [np.inf, 1, 1]]) + out_triu = out_tril.T + assert_array_equal(np.triu(arr), out_triu) + assert_array_equal(np.tril(arr), out_tril) + + +def test_tril_triu_dtype(): + # Issue 4916 + # tril and triu should return the same dtype as input + for c in np.typecodes['All']: + if c == 'V': + continue + arr = np.zeros((3, 3), dtype=c) + assert_equal(np.triu(arr).dtype, arr.dtype) + assert_equal(np.tril(arr).dtype, arr.dtype) + + # check special cases + arr = np.array([['2001-01-01T12:00', '2002-02-03T13:56'], + ['2004-01-01T12:00', '2003-01-03T13:45']], + dtype='datetime64') + assert_equal(np.triu(arr).dtype, arr.dtype) + assert_equal(np.tril(arr).dtype, arr.dtype) + + arr = np.zeros((3,3), dtype='f4,f4') + assert_equal(np.triu(arr).dtype, arr.dtype) + assert_equal(np.tril(arr).dtype, arr.dtype) + + +def test_mask_indices(): + # simple test without offset + iu = mask_indices(3, np.triu) + a = np.arange(9).reshape(3, 3) + assert_array_equal(a[iu], array([0, 1, 2, 4, 5, 8])) + # Now with an offset + iu1 = mask_indices(3, np.triu, 1) + assert_array_equal(a[iu1], array([1, 2, 5])) + + +def test_tril_indices(): + # indices without and with offset + il1 = tril_indices(4) + il2 = tril_indices(4, k=2) + il3 = tril_indices(4, m=5) + il4 = tril_indices(4, k=2, m=5) + + a = np.array([[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12], + [13, 14, 15, 16]]) + b = np.arange(1, 21).reshape(4, 5) + + # indexing: + assert_array_equal(a[il1], + array([1, 5, 6, 9, 10, 11, 13, 14, 15, 16])) + assert_array_equal(b[il3], + array([1, 6, 7, 11, 12, 13, 16, 17, 18, 19])) + + # And for assigning values: + a[il1] = -1 + assert_array_equal(a, + array([[-1, 2, 3, 4], + [-1, -1, 7, 8], + [-1, -1, -1, 12], + [-1, -1, -1, -1]])) + b[il3] = -1 + assert_array_equal(b, + array([[-1, 2, 3, 4, 5], + [-1, -1, 8, 9, 10], + [-1, -1, -1, 14, 15], + [-1, -1, -1, -1, 20]])) + # These cover almost the whole array (two diagonals right of the main one): + a[il2] = -10 + assert_array_equal(a, + array([[-10, -10, -10, 4], + [-10, -10, -10, -10], + [-10, -10, -10, -10], + [-10, -10, -10, -10]])) + b[il4] = -10 + assert_array_equal(b, + array([[-10, -10, -10, 4, 5], + [-10, -10, -10, -10, 10], + [-10, -10, -10, -10, -10], + [-10, -10, -10, -10, -10]])) + + +class TestTriuIndices: + def test_triu_indices(self): + iu1 = triu_indices(4) + iu2 = triu_indices(4, k=2) + iu3 = triu_indices(4, m=5) + iu4 = triu_indices(4, k=2, m=5) + + a = np.array([[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12], + [13, 14, 15, 16]]) + b = np.arange(1, 21).reshape(4, 5) + + # Both for indexing: + assert_array_equal(a[iu1], + array([1, 2, 3, 4, 6, 7, 8, 11, 12, 16])) + assert_array_equal(b[iu3], + array([1, 2, 3, 4, 5, 7, 8, 9, + 10, 13, 14, 15, 19, 20])) + + # And for assigning values: + a[iu1] = -1 + assert_array_equal(a, + array([[-1, -1, -1, -1], + [5, -1, -1, -1], + [9, 10, -1, -1], + [13, 14, 15, -1]])) + b[iu3] = -1 + assert_array_equal(b, + array([[-1, -1, -1, -1, -1], + [6, -1, -1, -1, -1], + [11, 12, -1, -1, -1], + [16, 17, 18, -1, -1]])) + + # These cover almost the whole array (two diagonals right of the + # main one): + a[iu2] = -10 + assert_array_equal(a, + array([[-1, -1, -10, -10], + [5, -1, -1, -10], + [9, 10, -1, -1], + [13, 14, 15, -1]])) + b[iu4] = -10 + assert_array_equal(b, + array([[-1, -1, -10, -10, -10], + [6, -1, -1, -10, -10], + [11, 12, -1, -1, -10], + [16, 17, 18, -1, -1]])) + + +class TestTrilIndicesFrom: + def test_exceptions(self): + assert_raises(ValueError, tril_indices_from, np.ones((2,))) + assert_raises(ValueError, tril_indices_from, np.ones((2, 2, 2))) + # assert_raises(ValueError, tril_indices_from, np.ones((2, 3))) + + +class TestTriuIndicesFrom: + def test_exceptions(self): + assert_raises(ValueError, triu_indices_from, np.ones((2,))) + assert_raises(ValueError, triu_indices_from, np.ones((2, 2, 2))) + # assert_raises(ValueError, triu_indices_from, np.ones((2, 3))) + + +class TestVander: + def test_basic(self): + c = np.array([0, 1, -2, 3]) + v = vander(c) + powers = np.array([[0, 0, 0, 0, 1], + [1, 1, 1, 1, 1], + [16, -8, 4, -2, 1], + [81, 27, 9, 3, 1]]) + # Check default value of N: + assert_array_equal(v, powers[:, 1:]) + # Check a range of N values, including 0 and 5 (greater than default) + m = powers.shape[1] + for n in range(6): + v = vander(c, N=n) + assert_array_equal(v, powers[:, m-n:m]) + + def test_dtypes(self): + c = array([11, -12, 13], dtype=np.int8) + v = vander(c) + expected = np.array([[121, 11, 1], + [144, -12, 1], + [169, 13, 1]]) + assert_array_equal(v, expected) + + c = array([1.0+1j, 1.0-1j]) + v = vander(c, N=3) + expected = np.array([[2j, 1+1j, 1], + [-2j, 1-1j, 1]]) + # The data is floating point, but the values are small integers, + # so assert_array_equal *should* be safe here (rather than, say, + # assert_array_almost_equal). + assert_array_equal(v, expected) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_type_check.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_type_check.py new file mode 100644 index 0000000..3f4ca63 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_type_check.py @@ -0,0 +1,478 @@ +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_raises + ) +from numpy.lib.type_check import ( + common_type, mintypecode, isreal, iscomplex, isposinf, isneginf, + nan_to_num, isrealobj, iscomplexobj, asfarray, real_if_close + ) + + +def assert_all(x): + assert_(np.all(x), x) + + +class TestCommonType: + def test_basic(self): + ai32 = np.array([[1, 2], [3, 4]], dtype=np.int32) + af16 = np.array([[1, 2], [3, 4]], dtype=np.float16) + af32 = np.array([[1, 2], [3, 4]], dtype=np.float32) + af64 = np.array([[1, 2], [3, 4]], dtype=np.float64) + acs = np.array([[1+5j, 2+6j], [3+7j, 4+8j]], dtype=np.csingle) + acd = np.array([[1+5j, 2+6j], [3+7j, 4+8j]], dtype=np.cdouble) + assert_(common_type(ai32) == np.float64) + assert_(common_type(af16) == np.float16) + assert_(common_type(af32) == np.float32) + assert_(common_type(af64) == np.float64) + assert_(common_type(acs) == np.csingle) + assert_(common_type(acd) == np.cdouble) + + +class TestMintypecode: + + def test_default_1(self): + for itype in '1bcsuwil': + assert_equal(mintypecode(itype), 'd') + assert_equal(mintypecode('f'), 'f') + assert_equal(mintypecode('d'), 'd') + assert_equal(mintypecode('F'), 'F') + assert_equal(mintypecode('D'), 'D') + + def test_default_2(self): + for itype in '1bcsuwil': + assert_equal(mintypecode(itype+'f'), 'f') + assert_equal(mintypecode(itype+'d'), 'd') + assert_equal(mintypecode(itype+'F'), 'F') + assert_equal(mintypecode(itype+'D'), 'D') + assert_equal(mintypecode('ff'), 'f') + assert_equal(mintypecode('fd'), 'd') + assert_equal(mintypecode('fF'), 'F') + assert_equal(mintypecode('fD'), 'D') + assert_equal(mintypecode('df'), 'd') + assert_equal(mintypecode('dd'), 'd') + #assert_equal(mintypecode('dF',savespace=1),'F') + assert_equal(mintypecode('dF'), 'D') + assert_equal(mintypecode('dD'), 'D') + assert_equal(mintypecode('Ff'), 'F') + #assert_equal(mintypecode('Fd',savespace=1),'F') + assert_equal(mintypecode('Fd'), 'D') + assert_equal(mintypecode('FF'), 'F') + assert_equal(mintypecode('FD'), 'D') + assert_equal(mintypecode('Df'), 'D') + assert_equal(mintypecode('Dd'), 'D') + assert_equal(mintypecode('DF'), 'D') + assert_equal(mintypecode('DD'), 'D') + + def test_default_3(self): + assert_equal(mintypecode('fdF'), 'D') + #assert_equal(mintypecode('fdF',savespace=1),'F') + assert_equal(mintypecode('fdD'), 'D') + assert_equal(mintypecode('fFD'), 'D') + assert_equal(mintypecode('dFD'), 'D') + + assert_equal(mintypecode('ifd'), 'd') + assert_equal(mintypecode('ifF'), 'F') + assert_equal(mintypecode('ifD'), 'D') + assert_equal(mintypecode('idF'), 'D') + #assert_equal(mintypecode('idF',savespace=1),'F') + assert_equal(mintypecode('idD'), 'D') + + +class TestIsscalar: + + def test_basic(self): + assert_(np.isscalar(3)) + assert_(not np.isscalar([3])) + assert_(not np.isscalar((3,))) + assert_(np.isscalar(3j)) + assert_(np.isscalar(4.0)) + + +class TestReal: + + def test_real(self): + y = np.random.rand(10,) + assert_array_equal(y, np.real(y)) + + y = np.array(1) + out = np.real(y) + assert_array_equal(y, out) + assert_(isinstance(out, np.ndarray)) + + y = 1 + out = np.real(y) + assert_equal(y, out) + assert_(not isinstance(out, np.ndarray)) + + def test_cmplx(self): + y = np.random.rand(10,)+1j*np.random.rand(10,) + assert_array_equal(y.real, np.real(y)) + + y = np.array(1 + 1j) + out = np.real(y) + assert_array_equal(y.real, out) + assert_(isinstance(out, np.ndarray)) + + y = 1 + 1j + out = np.real(y) + assert_equal(1.0, out) + assert_(not isinstance(out, np.ndarray)) + + +class TestImag: + + def test_real(self): + y = np.random.rand(10,) + assert_array_equal(0, np.imag(y)) + + y = np.array(1) + out = np.imag(y) + assert_array_equal(0, out) + assert_(isinstance(out, np.ndarray)) + + y = 1 + out = np.imag(y) + assert_equal(0, out) + assert_(not isinstance(out, np.ndarray)) + + def test_cmplx(self): + y = np.random.rand(10,)+1j*np.random.rand(10,) + assert_array_equal(y.imag, np.imag(y)) + + y = np.array(1 + 1j) + out = np.imag(y) + assert_array_equal(y.imag, out) + assert_(isinstance(out, np.ndarray)) + + y = 1 + 1j + out = np.imag(y) + assert_equal(1.0, out) + assert_(not isinstance(out, np.ndarray)) + + +class TestIscomplex: + + def test_fail(self): + z = np.array([-1, 0, 1]) + res = iscomplex(z) + assert_(not np.sometrue(res, axis=0)) + + def test_pass(self): + z = np.array([-1j, 1, 0]) + res = iscomplex(z) + assert_array_equal(res, [1, 0, 0]) + + +class TestIsreal: + + def test_pass(self): + z = np.array([-1, 0, 1j]) + res = isreal(z) + assert_array_equal(res, [1, 1, 0]) + + def test_fail(self): + z = np.array([-1j, 1, 0]) + res = isreal(z) + assert_array_equal(res, [0, 1, 1]) + + +class TestIscomplexobj: + + def test_basic(self): + z = np.array([-1, 0, 1]) + assert_(not iscomplexobj(z)) + z = np.array([-1j, 0, -1]) + assert_(iscomplexobj(z)) + + def test_scalar(self): + assert_(not iscomplexobj(1.0)) + assert_(iscomplexobj(1+0j)) + + def test_list(self): + assert_(iscomplexobj([3, 1+0j, True])) + assert_(not iscomplexobj([3, 1, True])) + + def test_duck(self): + class DummyComplexArray: + @property + def dtype(self): + return np.dtype(complex) + dummy = DummyComplexArray() + assert_(iscomplexobj(dummy)) + + def test_pandas_duck(self): + # This tests a custom np.dtype duck-typed class, such as used by pandas + # (pandas.core.dtypes) + class PdComplex(np.complex128): + pass + class PdDtype: + name = 'category' + names = None + type = PdComplex + kind = 'c' + str = ' 1e10) and assert_all(np.isfinite(vals[2])) + assert_equal(type(vals), np.ndarray) + + # perform the same tests but with nan, posinf and neginf keywords + with np.errstate(divide='ignore', invalid='ignore'): + vals = nan_to_num(np.array((-1., 0, 1))/0., + nan=10, posinf=20, neginf=30) + assert_equal(vals, [30, 10, 20]) + assert_all(np.isfinite(vals[[0, 2]])) + assert_equal(type(vals), np.ndarray) + + # perform the same test but in-place + with np.errstate(divide='ignore', invalid='ignore'): + vals = np.array((-1., 0, 1))/0. + result = nan_to_num(vals, copy=False) + + assert_(result is vals) + assert_all(vals[0] < -1e10) and assert_all(np.isfinite(vals[0])) + assert_(vals[1] == 0) + assert_all(vals[2] > 1e10) and assert_all(np.isfinite(vals[2])) + assert_equal(type(vals), np.ndarray) + + # perform the same test but in-place + with np.errstate(divide='ignore', invalid='ignore'): + vals = np.array((-1., 0, 1))/0. + result = nan_to_num(vals, copy=False, nan=10, posinf=20, neginf=30) + + assert_(result is vals) + assert_equal(vals, [30, 10, 20]) + assert_all(np.isfinite(vals[[0, 2]])) + assert_equal(type(vals), np.ndarray) + + def test_array(self): + vals = nan_to_num([1]) + assert_array_equal(vals, np.array([1], int)) + assert_equal(type(vals), np.ndarray) + vals = nan_to_num([1], nan=10, posinf=20, neginf=30) + assert_array_equal(vals, np.array([1], int)) + assert_equal(type(vals), np.ndarray) + + def test_integer(self): + vals = nan_to_num(1) + assert_all(vals == 1) + assert_equal(type(vals), np.int_) + vals = nan_to_num(1, nan=10, posinf=20, neginf=30) + assert_all(vals == 1) + assert_equal(type(vals), np.int_) + + def test_float(self): + vals = nan_to_num(1.0) + assert_all(vals == 1.0) + assert_equal(type(vals), np.float_) + vals = nan_to_num(1.1, nan=10, posinf=20, neginf=30) + assert_all(vals == 1.1) + assert_equal(type(vals), np.float_) + + def test_complex_good(self): + vals = nan_to_num(1+1j) + assert_all(vals == 1+1j) + assert_equal(type(vals), np.complex_) + vals = nan_to_num(1+1j, nan=10, posinf=20, neginf=30) + assert_all(vals == 1+1j) + assert_equal(type(vals), np.complex_) + + def test_complex_bad(self): + with np.errstate(divide='ignore', invalid='ignore'): + v = 1 + 1j + v += np.array(0+1.j)/0. + vals = nan_to_num(v) + # !! This is actually (unexpectedly) zero + assert_all(np.isfinite(vals)) + assert_equal(type(vals), np.complex_) + + def test_complex_bad2(self): + with np.errstate(divide='ignore', invalid='ignore'): + v = 1 + 1j + v += np.array(-1+1.j)/0. + vals = nan_to_num(v) + assert_all(np.isfinite(vals)) + assert_equal(type(vals), np.complex_) + # Fixme + #assert_all(vals.imag > 1e10) and assert_all(np.isfinite(vals)) + # !! This is actually (unexpectedly) positive + # !! inf. Comment out for now, and see if it + # !! changes + #assert_all(vals.real < -1e10) and assert_all(np.isfinite(vals)) + + def test_do_not_rewrite_previous_keyword(self): + # This is done to test that when, for instance, nan=np.inf then these + # values are not rewritten by posinf keyword to the posinf value. + with np.errstate(divide='ignore', invalid='ignore'): + vals = nan_to_num(np.array((-1., 0, 1))/0., nan=np.inf, posinf=999) + assert_all(np.isfinite(vals[[0, 2]])) + assert_all(vals[0] < -1e10) + assert_equal(vals[[1, 2]], [np.inf, 999]) + assert_equal(type(vals), np.ndarray) + + +class TestRealIfClose: + + def test_basic(self): + a = np.random.rand(10) + b = real_if_close(a+1e-15j) + assert_all(isrealobj(b)) + assert_array_equal(a, b) + b = real_if_close(a+1e-7j) + assert_all(iscomplexobj(b)) + b = real_if_close(a+1e-7j, tol=1e-6) + assert_all(isrealobj(b)) + + +class TestArrayConversion: + + def test_asfarray(self): + a = asfarray(np.array([1, 2, 3])) + assert_equal(a.__class__, np.ndarray) + assert_(np.issubdtype(a.dtype, np.floating)) + + # previously this would infer dtypes from arrays, unlike every single + # other numpy function + assert_raises(TypeError, + asfarray, np.array([1, 2, 3]), dtype=np.array(1.0)) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_ufunclike.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_ufunclike.py new file mode 100644 index 0000000..c280b69 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_ufunclike.py @@ -0,0 +1,104 @@ +import numpy as np +import numpy.core as nx +import numpy.lib.ufunclike as ufl +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_warns, assert_raises +) + + +class TestUfunclike: + + def test_isposinf(self): + a = nx.array([nx.inf, -nx.inf, nx.nan, 0.0, 3.0, -3.0]) + out = nx.zeros(a.shape, bool) + tgt = nx.array([True, False, False, False, False, False]) + + res = ufl.isposinf(a) + assert_equal(res, tgt) + res = ufl.isposinf(a, out) + assert_equal(res, tgt) + assert_equal(out, tgt) + + a = a.astype(np.complex_) + with assert_raises(TypeError): + ufl.isposinf(a) + + def test_isneginf(self): + a = nx.array([nx.inf, -nx.inf, nx.nan, 0.0, 3.0, -3.0]) + out = nx.zeros(a.shape, bool) + tgt = nx.array([False, True, False, False, False, False]) + + res = ufl.isneginf(a) + assert_equal(res, tgt) + res = ufl.isneginf(a, out) + assert_equal(res, tgt) + assert_equal(out, tgt) + + a = a.astype(np.complex_) + with assert_raises(TypeError): + ufl.isneginf(a) + + def test_fix(self): + a = nx.array([[1.0, 1.1, 1.5, 1.8], [-1.0, -1.1, -1.5, -1.8]]) + out = nx.zeros(a.shape, float) + tgt = nx.array([[1., 1., 1., 1.], [-1., -1., -1., -1.]]) + + res = ufl.fix(a) + assert_equal(res, tgt) + res = ufl.fix(a, out) + assert_equal(res, tgt) + assert_equal(out, tgt) + assert_equal(ufl.fix(3.14), 3) + + def test_fix_with_subclass(self): + class MyArray(nx.ndarray): + def __new__(cls, data, metadata=None): + res = nx.array(data, copy=True).view(cls) + res.metadata = metadata + return res + + def __array_wrap__(self, obj, context=None): + if isinstance(obj, MyArray): + obj.metadata = self.metadata + return obj + + def __array_finalize__(self, obj): + self.metadata = getattr(obj, 'metadata', None) + return self + + a = nx.array([1.1, -1.1]) + m = MyArray(a, metadata='foo') + f = ufl.fix(m) + assert_array_equal(f, nx.array([1, -1])) + assert_(isinstance(f, MyArray)) + assert_equal(f.metadata, 'foo') + + # check 0d arrays don't decay to scalars + m0d = m[0,...] + m0d.metadata = 'bar' + f0d = ufl.fix(m0d) + assert_(isinstance(f0d, MyArray)) + assert_equal(f0d.metadata, 'bar') + + def test_deprecated(self): + # NumPy 1.13.0, 2017-04-26 + assert_warns(DeprecationWarning, ufl.fix, [1, 2], y=nx.empty(2)) + assert_warns(DeprecationWarning, ufl.isposinf, [1, 2], y=nx.empty(2)) + assert_warns(DeprecationWarning, ufl.isneginf, [1, 2], y=nx.empty(2)) + + def test_scalar(self): + x = np.inf + actual = np.isposinf(x) + expected = np.True_ + assert_equal(actual, expected) + assert_equal(type(actual), type(expected)) + + x = -3.4 + actual = np.fix(x) + expected = np.float64(-3.0) + assert_equal(actual, expected) + assert_equal(type(actual), type(expected)) + + out = np.array(0.0) + actual = np.fix(x, out=out) + assert_(actual is out) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/tests/test_utils.py b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_utils.py new file mode 100644 index 0000000..72c9183 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/tests/test_utils.py @@ -0,0 +1,174 @@ +import inspect +import sys +import pytest + +from numpy.core import arange +from numpy.testing import assert_, assert_equal, assert_raises_regex +from numpy.lib import deprecate, deprecate_with_doc +import numpy.lib.utils as utils + +from io import StringIO + + +@pytest.mark.skipif(sys.flags.optimize == 2, reason="Python running -OO") +@pytest.mark.skipif( + sys.version_info == (3, 10, 0, "candidate", 1), + reason="Broken as of bpo-44524", +) +def test_lookfor(): + out = StringIO() + utils.lookfor('eigenvalue', module='numpy', output=out, + import_modules=False) + out = out.getvalue() + assert_('numpy.linalg.eig' in out) + + +@deprecate +def old_func(self, x): + return x + + +@deprecate(message="Rather use new_func2") +def old_func2(self, x): + return x + + +def old_func3(self, x): + return x +new_func3 = deprecate(old_func3, old_name="old_func3", new_name="new_func3") + + +def old_func4(self, x): + """Summary. + + Further info. + """ + return x +new_func4 = deprecate(old_func4) + + +def old_func5(self, x): + """Summary. + + Bizarre indentation. + """ + return x +new_func5 = deprecate(old_func5, message="This function is\ndeprecated.") + + +def old_func6(self, x): + """ + Also in PEP-257. + """ + return x +new_func6 = deprecate(old_func6) + + +@deprecate_with_doc(msg="Rather use new_func7") +def old_func7(self,x): + return x + + +def test_deprecate_decorator(): + assert_('deprecated' in old_func.__doc__) + + +def test_deprecate_decorator_message(): + assert_('Rather use new_func2' in old_func2.__doc__) + + +def test_deprecate_fn(): + assert_('old_func3' in new_func3.__doc__) + assert_('new_func3' in new_func3.__doc__) + + +def test_deprecate_with_doc_decorator_message(): + assert_('Rather use new_func7' in old_func7.__doc__) + + +@pytest.mark.skipif(sys.flags.optimize == 2, reason="-OO discards docstrings") +@pytest.mark.parametrize('old_func, new_func', [ + (old_func4, new_func4), + (old_func5, new_func5), + (old_func6, new_func6), +]) +def test_deprecate_help_indentation(old_func, new_func): + _compare_docs(old_func, new_func) + # Ensure we don't mess up the indentation + for knd, func in (('old', old_func), ('new', new_func)): + for li, line in enumerate(func.__doc__.split('\n')): + if li == 0: + assert line.startswith(' ') or not line.startswith(' '), knd + elif line: + assert line.startswith(' '), knd + + +def _compare_docs(old_func, new_func): + old_doc = inspect.getdoc(old_func) + new_doc = inspect.getdoc(new_func) + index = new_doc.index('\n\n') + 2 + assert_equal(new_doc[index:], old_doc) + + +@pytest.mark.skipif(sys.flags.optimize == 2, reason="-OO discards docstrings") +def test_deprecate_preserve_whitespace(): + assert_('\n Bizarre' in new_func5.__doc__) + + +def test_safe_eval_nameconstant(): + # Test if safe_eval supports Python 3.4 _ast.NameConstant + utils.safe_eval('None') + + +class TestByteBounds: + + def test_byte_bounds(self): + # pointer difference matches size * itemsize + # due to contiguity + a = arange(12).reshape(3, 4) + low, high = utils.byte_bounds(a) + assert_equal(high - low, a.size * a.itemsize) + + def test_unusual_order_positive_stride(self): + a = arange(12).reshape(3, 4) + b = a.T + low, high = utils.byte_bounds(b) + assert_equal(high - low, b.size * b.itemsize) + + def test_unusual_order_negative_stride(self): + a = arange(12).reshape(3, 4) + b = a.T[::-1] + low, high = utils.byte_bounds(b) + assert_equal(high - low, b.size * b.itemsize) + + def test_strided(self): + a = arange(12) + b = a[::2] + low, high = utils.byte_bounds(b) + # the largest pointer address is lost (even numbers only in the + # stride), and compensate addresses for striding by 2 + assert_equal(high - low, b.size * 2 * b.itemsize - b.itemsize) + + +def test_assert_raises_regex_context_manager(): + with assert_raises_regex(ValueError, 'no deprecation warning'): + raise ValueError('no deprecation warning') + + +def test_info_method_heading(): + # info(class) should only print "Methods:" heading if methods exist + + class NoPublicMethods: + pass + + class WithPublicMethods: + def first_method(): + pass + + def _has_method_heading(cls): + out = StringIO() + utils.info(cls, output=out) + return 'Methods:' in out.getvalue() + + assert _has_method_heading(WithPublicMethods) + assert not _has_method_heading(NoPublicMethods) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/twodim_base.py b/.local/lib/python3.8/site-packages/numpy/lib/twodim_base.py new file mode 100644 index 0000000..3e5ad31 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/twodim_base.py @@ -0,0 +1,1127 @@ +""" Basic functions for manipulating 2d arrays + +""" +import functools + +from numpy.core.numeric import ( + asanyarray, arange, zeros, greater_equal, multiply, ones, + asarray, where, int8, int16, int32, int64, intp, empty, promote_types, + diagonal, nonzero, indices + ) +from numpy.core.overrides import set_array_function_like_doc, set_module +from numpy.core import overrides +from numpy.core import iinfo +from numpy.lib.stride_tricks import broadcast_to + + +__all__ = [ + 'diag', 'diagflat', 'eye', 'fliplr', 'flipud', 'tri', 'triu', + 'tril', 'vander', 'histogram2d', 'mask_indices', 'tril_indices', + 'tril_indices_from', 'triu_indices', 'triu_indices_from', ] + + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + + +i1 = iinfo(int8) +i2 = iinfo(int16) +i4 = iinfo(int32) + + +def _min_int(low, high): + """ get small int that fits the range """ + if high <= i1.max and low >= i1.min: + return int8 + if high <= i2.max and low >= i2.min: + return int16 + if high <= i4.max and low >= i4.min: + return int32 + return int64 + + +def _flip_dispatcher(m): + return (m,) + + +@array_function_dispatch(_flip_dispatcher) +def fliplr(m): + """ + Reverse the order of elements along axis 1 (left/right). + + For a 2-D array, this flips the entries in each row in the left/right + direction. Columns are preserved, but appear in a different order than + before. + + Parameters + ---------- + m : array_like + Input array, must be at least 2-D. + + Returns + ------- + f : ndarray + A view of `m` with the columns reversed. Since a view + is returned, this operation is :math:`\\mathcal O(1)`. + + See Also + -------- + flipud : Flip array in the up/down direction. + flip : Flip array in one or more dimensions. + rot90 : Rotate array counterclockwise. + + Notes + ----- + Equivalent to ``m[:,::-1]`` or ``np.flip(m, axis=1)``. + Requires the array to be at least 2-D. + + Examples + -------- + >>> A = np.diag([1.,2.,3.]) + >>> A + array([[1., 0., 0.], + [0., 2., 0.], + [0., 0., 3.]]) + >>> np.fliplr(A) + array([[0., 0., 1.], + [0., 2., 0.], + [3., 0., 0.]]) + + >>> A = np.random.randn(2,3,5) + >>> np.all(np.fliplr(A) == A[:,::-1,...]) + True + + """ + m = asanyarray(m) + if m.ndim < 2: + raise ValueError("Input must be >= 2-d.") + return m[:, ::-1] + + +@array_function_dispatch(_flip_dispatcher) +def flipud(m): + """ + Reverse the order of elements along axis 0 (up/down). + + For a 2-D array, this flips the entries in each column in the up/down + direction. Rows are preserved, but appear in a different order than before. + + Parameters + ---------- + m : array_like + Input array. + + Returns + ------- + out : array_like + A view of `m` with the rows reversed. Since a view is + returned, this operation is :math:`\\mathcal O(1)`. + + See Also + -------- + fliplr : Flip array in the left/right direction. + flip : Flip array in one or more dimensions. + rot90 : Rotate array counterclockwise. + + Notes + ----- + Equivalent to ``m[::-1, ...]`` or ``np.flip(m, axis=0)``. + Requires the array to be at least 1-D. + + Examples + -------- + >>> A = np.diag([1.0, 2, 3]) + >>> A + array([[1., 0., 0.], + [0., 2., 0.], + [0., 0., 3.]]) + >>> np.flipud(A) + array([[0., 0., 3.], + [0., 2., 0.], + [1., 0., 0.]]) + + >>> A = np.random.randn(2,3,5) + >>> np.all(np.flipud(A) == A[::-1,...]) + True + + >>> np.flipud([1,2]) + array([2, 1]) + + """ + m = asanyarray(m) + if m.ndim < 1: + raise ValueError("Input must be >= 1-d.") + return m[::-1, ...] + + +def _eye_dispatcher(N, M=None, k=None, dtype=None, order=None, *, like=None): + return (like,) + + +@set_array_function_like_doc +@set_module('numpy') +def eye(N, M=None, k=0, dtype=float, order='C', *, like=None): + """ + Return a 2-D array with ones on the diagonal and zeros elsewhere. + + Parameters + ---------- + N : int + Number of rows in the output. + M : int, optional + Number of columns in the output. If None, defaults to `N`. + k : int, optional + Index of the diagonal: 0 (the default) refers to the main diagonal, + a positive value refers to an upper diagonal, and a negative value + to a lower diagonal. + dtype : data-type, optional + Data-type of the returned array. + order : {'C', 'F'}, optional + Whether the output should be stored in row-major (C-style) or + column-major (Fortran-style) order in memory. + + .. versionadded:: 1.14.0 + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + I : ndarray of shape (N,M) + An array where all elements are equal to zero, except for the `k`-th + diagonal, whose values are equal to one. + + See Also + -------- + identity : (almost) equivalent function + diag : diagonal 2-D array from a 1-D array specified by the user. + + Examples + -------- + >>> np.eye(2, dtype=int) + array([[1, 0], + [0, 1]]) + >>> np.eye(3, k=1) + array([[0., 1., 0.], + [0., 0., 1.], + [0., 0., 0.]]) + + """ + if like is not None: + return _eye_with_like(N, M=M, k=k, dtype=dtype, order=order, like=like) + if M is None: + M = N + m = zeros((N, M), dtype=dtype, order=order) + if k >= M: + return m + if k >= 0: + i = k + else: + i = (-k) * M + m[:M-k].flat[i::M+1] = 1 + return m + + +_eye_with_like = array_function_dispatch( + _eye_dispatcher +)(eye) + + +def _diag_dispatcher(v, k=None): + return (v,) + + +@array_function_dispatch(_diag_dispatcher) +def diag(v, k=0): + """ + Extract a diagonal or construct a diagonal array. + + See the more detailed documentation for ``numpy.diagonal`` if you use this + function to extract a diagonal and wish to write to the resulting array; + whether it returns a copy or a view depends on what version of numpy you + are using. + + Parameters + ---------- + v : array_like + If `v` is a 2-D array, return a copy of its `k`-th diagonal. + If `v` is a 1-D array, return a 2-D array with `v` on the `k`-th + diagonal. + k : int, optional + Diagonal in question. The default is 0. Use `k>0` for diagonals + above the main diagonal, and `k<0` for diagonals below the main + diagonal. + + Returns + ------- + out : ndarray + The extracted diagonal or constructed diagonal array. + + See Also + -------- + diagonal : Return specified diagonals. + diagflat : Create a 2-D array with the flattened input as a diagonal. + trace : Sum along diagonals. + triu : Upper triangle of an array. + tril : Lower triangle of an array. + + Examples + -------- + >>> x = np.arange(9).reshape((3,3)) + >>> x + array([[0, 1, 2], + [3, 4, 5], + [6, 7, 8]]) + + >>> np.diag(x) + array([0, 4, 8]) + >>> np.diag(x, k=1) + array([1, 5]) + >>> np.diag(x, k=-1) + array([3, 7]) + + >>> np.diag(np.diag(x)) + array([[0, 0, 0], + [0, 4, 0], + [0, 0, 8]]) + + """ + v = asanyarray(v) + s = v.shape + if len(s) == 1: + n = s[0]+abs(k) + res = zeros((n, n), v.dtype) + if k >= 0: + i = k + else: + i = (-k) * n + res[:n-k].flat[i::n+1] = v + return res + elif len(s) == 2: + return diagonal(v, k) + else: + raise ValueError("Input must be 1- or 2-d.") + + +@array_function_dispatch(_diag_dispatcher) +def diagflat(v, k=0): + """ + Create a two-dimensional array with the flattened input as a diagonal. + + Parameters + ---------- + v : array_like + Input data, which is flattened and set as the `k`-th + diagonal of the output. + k : int, optional + Diagonal to set; 0, the default, corresponds to the "main" diagonal, + a positive (negative) `k` giving the number of the diagonal above + (below) the main. + + Returns + ------- + out : ndarray + The 2-D output array. + + See Also + -------- + diag : MATLAB work-alike for 1-D and 2-D arrays. + diagonal : Return specified diagonals. + trace : Sum along diagonals. + + Examples + -------- + >>> np.diagflat([[1,2], [3,4]]) + array([[1, 0, 0, 0], + [0, 2, 0, 0], + [0, 0, 3, 0], + [0, 0, 0, 4]]) + + >>> np.diagflat([1,2], 1) + array([[0, 1, 0], + [0, 0, 2], + [0, 0, 0]]) + + """ + try: + wrap = v.__array_wrap__ + except AttributeError: + wrap = None + v = asarray(v).ravel() + s = len(v) + n = s + abs(k) + res = zeros((n, n), v.dtype) + if (k >= 0): + i = arange(0, n-k, dtype=intp) + fi = i+k+i*n + else: + i = arange(0, n+k, dtype=intp) + fi = i+(i-k)*n + res.flat[fi] = v + if not wrap: + return res + return wrap(res) + + +def _tri_dispatcher(N, M=None, k=None, dtype=None, *, like=None): + return (like,) + + +@set_array_function_like_doc +@set_module('numpy') +def tri(N, M=None, k=0, dtype=float, *, like=None): + """ + An array with ones at and below the given diagonal and zeros elsewhere. + + Parameters + ---------- + N : int + Number of rows in the array. + M : int, optional + Number of columns in the array. + By default, `M` is taken equal to `N`. + k : int, optional + The sub-diagonal at and below which the array is filled. + `k` = 0 is the main diagonal, while `k` < 0 is below it, + and `k` > 0 is above. The default is 0. + dtype : dtype, optional + Data type of the returned array. The default is float. + ${ARRAY_FUNCTION_LIKE} + + .. versionadded:: 1.20.0 + + Returns + ------- + tri : ndarray of shape (N, M) + Array with its lower triangle filled with ones and zero elsewhere; + in other words ``T[i,j] == 1`` for ``j <= i + k``, 0 otherwise. + + Examples + -------- + >>> np.tri(3, 5, 2, dtype=int) + array([[1, 1, 1, 0, 0], + [1, 1, 1, 1, 0], + [1, 1, 1, 1, 1]]) + + >>> np.tri(3, 5, -1) + array([[0., 0., 0., 0., 0.], + [1., 0., 0., 0., 0.], + [1., 1., 0., 0., 0.]]) + + """ + if like is not None: + return _tri_with_like(N, M=M, k=k, dtype=dtype, like=like) + + if M is None: + M = N + + m = greater_equal.outer(arange(N, dtype=_min_int(0, N)), + arange(-k, M-k, dtype=_min_int(-k, M - k))) + + # Avoid making a copy if the requested type is already bool + m = m.astype(dtype, copy=False) + + return m + + +_tri_with_like = array_function_dispatch( + _tri_dispatcher +)(tri) + + +def _trilu_dispatcher(m, k=None): + return (m,) + + +@array_function_dispatch(_trilu_dispatcher) +def tril(m, k=0): + """ + Lower triangle of an array. + + Return a copy of an array with elements above the `k`-th diagonal zeroed. + For arrays with ``ndim`` exceeding 2, `tril` will apply to the final two + axes. + + Parameters + ---------- + m : array_like, shape (..., M, N) + Input array. + k : int, optional + Diagonal above which to zero elements. `k = 0` (the default) is the + main diagonal, `k < 0` is below it and `k > 0` is above. + + Returns + ------- + tril : ndarray, shape (..., M, N) + Lower triangle of `m`, of same shape and data-type as `m`. + + See Also + -------- + triu : same thing, only for the upper triangle + + Examples + -------- + >>> np.tril([[1,2,3],[4,5,6],[7,8,9],[10,11,12]], -1) + array([[ 0, 0, 0], + [ 4, 0, 0], + [ 7, 8, 0], + [10, 11, 12]]) + + >>> np.tril(np.arange(3*4*5).reshape(3, 4, 5)) + array([[[ 0, 0, 0, 0, 0], + [ 5, 6, 0, 0, 0], + [10, 11, 12, 0, 0], + [15, 16, 17, 18, 0]], + [[20, 0, 0, 0, 0], + [25, 26, 0, 0, 0], + [30, 31, 32, 0, 0], + [35, 36, 37, 38, 0]], + [[40, 0, 0, 0, 0], + [45, 46, 0, 0, 0], + [50, 51, 52, 0, 0], + [55, 56, 57, 58, 0]]]) + + """ + m = asanyarray(m) + mask = tri(*m.shape[-2:], k=k, dtype=bool) + + return where(mask, m, zeros(1, m.dtype)) + + +@array_function_dispatch(_trilu_dispatcher) +def triu(m, k=0): + """ + Upper triangle of an array. + + Return a copy of an array with the elements below the `k`-th diagonal + zeroed. For arrays with ``ndim`` exceeding 2, `triu` will apply to the final + two axes. + + Please refer to the documentation for `tril` for further details. + + See Also + -------- + tril : lower triangle of an array + + Examples + -------- + >>> np.triu([[1,2,3],[4,5,6],[7,8,9],[10,11,12]], -1) + array([[ 1, 2, 3], + [ 4, 5, 6], + [ 0, 8, 9], + [ 0, 0, 12]]) + + >>> np.triu(np.arange(3*4*5).reshape(3, 4, 5)) + array([[[ 0, 1, 2, 3, 4], + [ 0, 6, 7, 8, 9], + [ 0, 0, 12, 13, 14], + [ 0, 0, 0, 18, 19]], + [[20, 21, 22, 23, 24], + [ 0, 26, 27, 28, 29], + [ 0, 0, 32, 33, 34], + [ 0, 0, 0, 38, 39]], + [[40, 41, 42, 43, 44], + [ 0, 46, 47, 48, 49], + [ 0, 0, 52, 53, 54], + [ 0, 0, 0, 58, 59]]]) + + """ + m = asanyarray(m) + mask = tri(*m.shape[-2:], k=k-1, dtype=bool) + + return where(mask, zeros(1, m.dtype), m) + + +def _vander_dispatcher(x, N=None, increasing=None): + return (x,) + + +# Originally borrowed from John Hunter and matplotlib +@array_function_dispatch(_vander_dispatcher) +def vander(x, N=None, increasing=False): + """ + Generate a Vandermonde matrix. + + The columns of the output matrix are powers of the input vector. The + order of the powers is determined by the `increasing` boolean argument. + Specifically, when `increasing` is False, the `i`-th output column is + the input vector raised element-wise to the power of ``N - i - 1``. Such + a matrix with a geometric progression in each row is named for Alexandre- + Theophile Vandermonde. + + Parameters + ---------- + x : array_like + 1-D input array. + N : int, optional + Number of columns in the output. If `N` is not specified, a square + array is returned (``N = len(x)``). + increasing : bool, optional + Order of the powers of the columns. If True, the powers increase + from left to right, if False (the default) they are reversed. + + .. versionadded:: 1.9.0 + + Returns + ------- + out : ndarray + Vandermonde matrix. If `increasing` is False, the first column is + ``x^(N-1)``, the second ``x^(N-2)`` and so forth. If `increasing` is + True, the columns are ``x^0, x^1, ..., x^(N-1)``. + + See Also + -------- + polynomial.polynomial.polyvander + + Examples + -------- + >>> x = np.array([1, 2, 3, 5]) + >>> N = 3 + >>> np.vander(x, N) + array([[ 1, 1, 1], + [ 4, 2, 1], + [ 9, 3, 1], + [25, 5, 1]]) + + >>> np.column_stack([x**(N-1-i) for i in range(N)]) + array([[ 1, 1, 1], + [ 4, 2, 1], + [ 9, 3, 1], + [25, 5, 1]]) + + >>> x = np.array([1, 2, 3, 5]) + >>> np.vander(x) + array([[ 1, 1, 1, 1], + [ 8, 4, 2, 1], + [ 27, 9, 3, 1], + [125, 25, 5, 1]]) + >>> np.vander(x, increasing=True) + array([[ 1, 1, 1, 1], + [ 1, 2, 4, 8], + [ 1, 3, 9, 27], + [ 1, 5, 25, 125]]) + + The determinant of a square Vandermonde matrix is the product + of the differences between the values of the input vector: + + >>> np.linalg.det(np.vander(x)) + 48.000000000000043 # may vary + >>> (5-3)*(5-2)*(5-1)*(3-2)*(3-1)*(2-1) + 48 + + """ + x = asarray(x) + if x.ndim != 1: + raise ValueError("x must be a one-dimensional array or sequence.") + if N is None: + N = len(x) + + v = empty((len(x), N), dtype=promote_types(x.dtype, int)) + tmp = v[:, ::-1] if not increasing else v + + if N > 0: + tmp[:, 0] = 1 + if N > 1: + tmp[:, 1:] = x[:, None] + multiply.accumulate(tmp[:, 1:], out=tmp[:, 1:], axis=1) + + return v + + +def _histogram2d_dispatcher(x, y, bins=None, range=None, normed=None, + weights=None, density=None): + yield x + yield y + + # This terrible logic is adapted from the checks in histogram2d + try: + N = len(bins) + except TypeError: + N = 1 + if N == 2: + yield from bins # bins=[x, y] + else: + yield bins + + yield weights + + +@array_function_dispatch(_histogram2d_dispatcher) +def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, + density=None): + """ + Compute the bi-dimensional histogram of two data samples. + + Parameters + ---------- + x : array_like, shape (N,) + An array containing the x coordinates of the points to be + histogrammed. + y : array_like, shape (N,) + An array containing the y coordinates of the points to be + histogrammed. + bins : int or array_like or [int, int] or [array, array], optional + The bin specification: + + * If int, the number of bins for the two dimensions (nx=ny=bins). + * If array_like, the bin edges for the two dimensions + (x_edges=y_edges=bins). + * If [int, int], the number of bins in each dimension + (nx, ny = bins). + * If [array, array], the bin edges in each dimension + (x_edges, y_edges = bins). + * A combination [int, array] or [array, int], where int + is the number of bins and array is the bin edges. + + range : array_like, shape(2,2), optional + The leftmost and rightmost edges of the bins along each dimension + (if not specified explicitly in the `bins` parameters): + ``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range + will be considered outliers and not tallied in the histogram. + density : bool, optional + If False, the default, returns the number of samples in each bin. + If True, returns the probability *density* function at the bin, + ``bin_count / sample_count / bin_area``. + normed : bool, optional + An alias for the density argument that behaves identically. To avoid + confusion with the broken normed argument to `histogram`, `density` + should be preferred. + weights : array_like, shape(N,), optional + An array of values ``w_i`` weighing each sample ``(x_i, y_i)``. + Weights are normalized to 1 if `normed` is True. If `normed` is + False, the values of the returned histogram are equal to the sum of + the weights belonging to the samples falling into each bin. + + Returns + ------- + H : ndarray, shape(nx, ny) + The bi-dimensional histogram of samples `x` and `y`. Values in `x` + are histogrammed along the first dimension and values in `y` are + histogrammed along the second dimension. + xedges : ndarray, shape(nx+1,) + The bin edges along the first dimension. + yedges : ndarray, shape(ny+1,) + The bin edges along the second dimension. + + See Also + -------- + histogram : 1D histogram + histogramdd : Multidimensional histogram + + Notes + ----- + When `normed` is True, then the returned histogram is the sample + density, defined such that the sum over bins of the product + ``bin_value * bin_area`` is 1. + + Please note that the histogram does not follow the Cartesian convention + where `x` values are on the abscissa and `y` values on the ordinate + axis. Rather, `x` is histogrammed along the first dimension of the + array (vertical), and `y` along the second dimension of the array + (horizontal). This ensures compatibility with `histogramdd`. + + Examples + -------- + >>> from matplotlib.image import NonUniformImage + >>> import matplotlib.pyplot as plt + + Construct a 2-D histogram with variable bin width. First define the bin + edges: + + >>> xedges = [0, 1, 3, 5] + >>> yedges = [0, 2, 3, 4, 6] + + Next we create a histogram H with random bin content: + + >>> x = np.random.normal(2, 1, 100) + >>> y = np.random.normal(1, 1, 100) + >>> H, xedges, yedges = np.histogram2d(x, y, bins=(xedges, yedges)) + >>> # Histogram does not follow Cartesian convention (see Notes), + >>> # therefore transpose H for visualization purposes. + >>> H = H.T + + :func:`imshow ` can only display square bins: + + >>> fig = plt.figure(figsize=(7, 3)) + >>> ax = fig.add_subplot(131, title='imshow: square bins') + >>> plt.imshow(H, interpolation='nearest', origin='lower', + ... extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]]) + + + :func:`pcolormesh ` can display actual edges: + + >>> ax = fig.add_subplot(132, title='pcolormesh: actual edges', + ... aspect='equal') + >>> X, Y = np.meshgrid(xedges, yedges) + >>> ax.pcolormesh(X, Y, H) + + + :class:`NonUniformImage ` can be used to + display actual bin edges with interpolation: + + >>> ax = fig.add_subplot(133, title='NonUniformImage: interpolated', + ... aspect='equal', xlim=xedges[[0, -1]], ylim=yedges[[0, -1]]) + >>> im = NonUniformImage(ax, interpolation='bilinear') + >>> xcenters = (xedges[:-1] + xedges[1:]) / 2 + >>> ycenters = (yedges[:-1] + yedges[1:]) / 2 + >>> im.set_data(xcenters, ycenters, H) + >>> ax.images.append(im) + >>> plt.show() + + It is also possible to construct a 2-D histogram without specifying bin + edges: + + >>> # Generate non-symmetric test data + >>> n = 10000 + >>> x = np.linspace(1, 100, n) + >>> y = 2*np.log(x) + np.random.rand(n) - 0.5 + >>> # Compute 2d histogram. Note the order of x/y and xedges/yedges + >>> H, yedges, xedges = np.histogram2d(y, x, bins=20) + + Now we can plot the histogram using + :func:`pcolormesh `, and a + :func:`hexbin ` for comparison. + + >>> # Plot histogram using pcolormesh + >>> fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True) + >>> ax1.pcolormesh(xedges, yedges, H, cmap='rainbow') + >>> ax1.plot(x, 2*np.log(x), 'k-') + >>> ax1.set_xlim(x.min(), x.max()) + >>> ax1.set_ylim(y.min(), y.max()) + >>> ax1.set_xlabel('x') + >>> ax1.set_ylabel('y') + >>> ax1.set_title('histogram2d') + >>> ax1.grid() + + >>> # Create hexbin plot for comparison + >>> ax2.hexbin(x, y, gridsize=20, cmap='rainbow') + >>> ax2.plot(x, 2*np.log(x), 'k-') + >>> ax2.set_title('hexbin') + >>> ax2.set_xlim(x.min(), x.max()) + >>> ax2.set_xlabel('x') + >>> ax2.grid() + + >>> plt.show() + """ + from numpy import histogramdd + + if len(x) != len(y): + raise ValueError('x and y must have the same length.') + + try: + N = len(bins) + except TypeError: + N = 1 + + if N != 1 and N != 2: + xedges = yedges = asarray(bins) + bins = [xedges, yedges] + hist, edges = histogramdd([x, y], bins, range, normed, weights, density) + return hist, edges[0], edges[1] + + +@set_module('numpy') +def mask_indices(n, mask_func, k=0): + """ + Return the indices to access (n, n) arrays, given a masking function. + + Assume `mask_func` is a function that, for a square array a of size + ``(n, n)`` with a possible offset argument `k`, when called as + ``mask_func(a, k)`` returns a new array with zeros in certain locations + (functions like `triu` or `tril` do precisely this). Then this function + returns the indices where the non-zero values would be located. + + Parameters + ---------- + n : int + The returned indices will be valid to access arrays of shape (n, n). + mask_func : callable + A function whose call signature is similar to that of `triu`, `tril`. + That is, ``mask_func(x, k)`` returns a boolean array, shaped like `x`. + `k` is an optional argument to the function. + k : scalar + An optional argument which is passed through to `mask_func`. Functions + like `triu`, `tril` take a second argument that is interpreted as an + offset. + + Returns + ------- + indices : tuple of arrays. + The `n` arrays of indices corresponding to the locations where + ``mask_func(np.ones((n, n)), k)`` is True. + + See Also + -------- + triu, tril, triu_indices, tril_indices + + Notes + ----- + .. versionadded:: 1.4.0 + + Examples + -------- + These are the indices that would allow you to access the upper triangular + part of any 3x3 array: + + >>> iu = np.mask_indices(3, np.triu) + + For example, if `a` is a 3x3 array: + + >>> a = np.arange(9).reshape(3, 3) + >>> a + array([[0, 1, 2], + [3, 4, 5], + [6, 7, 8]]) + >>> a[iu] + array([0, 1, 2, 4, 5, 8]) + + An offset can be passed also to the masking function. This gets us the + indices starting on the first diagonal right of the main one: + + >>> iu1 = np.mask_indices(3, np.triu, 1) + + with which we now extract only three elements: + + >>> a[iu1] + array([1, 2, 5]) + + """ + m = ones((n, n), int) + a = mask_func(m, k) + return nonzero(a != 0) + + +@set_module('numpy') +def tril_indices(n, k=0, m=None): + """ + Return the indices for the lower-triangle of an (n, m) array. + + Parameters + ---------- + n : int + The row dimension of the arrays for which the returned + indices will be valid. + k : int, optional + Diagonal offset (see `tril` for details). + m : int, optional + .. versionadded:: 1.9.0 + + The column dimension of the arrays for which the returned + arrays will be valid. + By default `m` is taken equal to `n`. + + + Returns + ------- + inds : tuple of arrays + The indices for the triangle. The returned tuple contains two arrays, + each with the indices along one dimension of the array. + + See also + -------- + triu_indices : similar function, for upper-triangular. + mask_indices : generic function accepting an arbitrary mask function. + tril, triu + + Notes + ----- + .. versionadded:: 1.4.0 + + Examples + -------- + Compute two different sets of indices to access 4x4 arrays, one for the + lower triangular part starting at the main diagonal, and one starting two + diagonals further right: + + >>> il1 = np.tril_indices(4) + >>> il2 = np.tril_indices(4, 2) + + Here is how they can be used with a sample array: + + >>> a = np.arange(16).reshape(4, 4) + >>> a + array([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11], + [12, 13, 14, 15]]) + + Both for indexing: + + >>> a[il1] + array([ 0, 4, 5, ..., 13, 14, 15]) + + And for assigning values: + + >>> a[il1] = -1 + >>> a + array([[-1, 1, 2, 3], + [-1, -1, 6, 7], + [-1, -1, -1, 11], + [-1, -1, -1, -1]]) + + These cover almost the whole array (two diagonals right of the main one): + + >>> a[il2] = -10 + >>> a + array([[-10, -10, -10, 3], + [-10, -10, -10, -10], + [-10, -10, -10, -10], + [-10, -10, -10, -10]]) + + """ + tri_ = tri(n, m, k=k, dtype=bool) + + return tuple(broadcast_to(inds, tri_.shape)[tri_] + for inds in indices(tri_.shape, sparse=True)) + + +def _trilu_indices_form_dispatcher(arr, k=None): + return (arr,) + + +@array_function_dispatch(_trilu_indices_form_dispatcher) +def tril_indices_from(arr, k=0): + """ + Return the indices for the lower-triangle of arr. + + See `tril_indices` for full details. + + Parameters + ---------- + arr : array_like + The indices will be valid for square arrays whose dimensions are + the same as arr. + k : int, optional + Diagonal offset (see `tril` for details). + + See Also + -------- + tril_indices, tril + + Notes + ----- + .. versionadded:: 1.4.0 + + """ + if arr.ndim != 2: + raise ValueError("input array must be 2-d") + return tril_indices(arr.shape[-2], k=k, m=arr.shape[-1]) + + +@set_module('numpy') +def triu_indices(n, k=0, m=None): + """ + Return the indices for the upper-triangle of an (n, m) array. + + Parameters + ---------- + n : int + The size of the arrays for which the returned indices will + be valid. + k : int, optional + Diagonal offset (see `triu` for details). + m : int, optional + .. versionadded:: 1.9.0 + + The column dimension of the arrays for which the returned + arrays will be valid. + By default `m` is taken equal to `n`. + + + Returns + ------- + inds : tuple, shape(2) of ndarrays, shape(`n`) + The indices for the triangle. The returned tuple contains two arrays, + each with the indices along one dimension of the array. Can be used + to slice a ndarray of shape(`n`, `n`). + + See also + -------- + tril_indices : similar function, for lower-triangular. + mask_indices : generic function accepting an arbitrary mask function. + triu, tril + + Notes + ----- + .. versionadded:: 1.4.0 + + Examples + -------- + Compute two different sets of indices to access 4x4 arrays, one for the + upper triangular part starting at the main diagonal, and one starting two + diagonals further right: + + >>> iu1 = np.triu_indices(4) + >>> iu2 = np.triu_indices(4, 2) + + Here is how they can be used with a sample array: + + >>> a = np.arange(16).reshape(4, 4) + >>> a + array([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11], + [12, 13, 14, 15]]) + + Both for indexing: + + >>> a[iu1] + array([ 0, 1, 2, ..., 10, 11, 15]) + + And for assigning values: + + >>> a[iu1] = -1 + >>> a + array([[-1, -1, -1, -1], + [ 4, -1, -1, -1], + [ 8, 9, -1, -1], + [12, 13, 14, -1]]) + + These cover only a small part of the whole array (two diagonals right + of the main one): + + >>> a[iu2] = -10 + >>> a + array([[ -1, -1, -10, -10], + [ 4, -1, -1, -10], + [ 8, 9, -1, -1], + [ 12, 13, 14, -1]]) + + """ + tri_ = ~tri(n, m, k=k - 1, dtype=bool) + + return tuple(broadcast_to(inds, tri_.shape)[tri_] + for inds in indices(tri_.shape, sparse=True)) + + +@array_function_dispatch(_trilu_indices_form_dispatcher) +def triu_indices_from(arr, k=0): + """ + Return the indices for the upper-triangle of arr. + + See `triu_indices` for full details. + + Parameters + ---------- + arr : ndarray, shape(N, N) + The indices will be valid for square arrays. + k : int, optional + Diagonal offset (see `triu` for details). + + Returns + ------- + triu_indices_from : tuple, shape(2) of ndarray, shape(N) + Indices for the upper-triangle of `arr`. + + See Also + -------- + triu_indices, triu + + Notes + ----- + .. versionadded:: 1.4.0 + + """ + if arr.ndim != 2: + raise ValueError("input array must be 2-d") + return triu_indices(arr.shape[-2], k=k, m=arr.shape[-1]) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/twodim_base.pyi b/.local/lib/python3.8/site-packages/numpy/lib/twodim_base.pyi new file mode 100644 index 0000000..cba503c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/twodim_base.pyi @@ -0,0 +1,255 @@ +from typing import ( + Any, + Callable, + List, + Sequence, + overload, + Tuple, + Type, + TypeVar, + Union, +) + +from numpy import ( + ndarray, + dtype, + generic, + number, + bool_, + timedelta64, + datetime64, + int_, + intp, + float64, + signedinteger, + floating, + complexfloating, + object_, + _OrderCF, +) + +from numpy.typing import ( + DTypeLike, + _SupportsDType, + ArrayLike, + NDArray, + _FiniteNestedSequence, + _SupportsArray, + _ArrayLikeInt_co, + _ArrayLikeFloat_co, + _ArrayLikeComplex_co, + _ArrayLikeObject_co, +) + +_T = TypeVar("_T") +_SCT = TypeVar("_SCT", bound=generic) + +# The returned arrays dtype must be compatible with `np.equal` +_MaskFunc = Callable[ + [NDArray[int_], _T], + NDArray[Union[number[Any], bool_, timedelta64, datetime64, object_]], +] + +_DTypeLike = Union[ + Type[_SCT], + dtype[_SCT], + _SupportsDType[dtype[_SCT]], +] +_ArrayLike = _FiniteNestedSequence[_SupportsArray[dtype[_SCT]]] + +__all__: List[str] + +@overload +def fliplr(m: _ArrayLike[_SCT]) -> NDArray[_SCT]: ... +@overload +def fliplr(m: ArrayLike) -> NDArray[Any]: ... + +@overload +def flipud(m: _ArrayLike[_SCT]) -> NDArray[_SCT]: ... +@overload +def flipud(m: ArrayLike) -> NDArray[Any]: ... + +@overload +def eye( + N: int, + M: None | int = ..., + k: int = ..., + dtype: None = ..., + order: _OrderCF = ..., + *, + like: None | ArrayLike = ..., +) -> NDArray[float64]: ... +@overload +def eye( + N: int, + M: None | int = ..., + k: int = ..., + dtype: _DTypeLike[_SCT] = ..., + order: _OrderCF = ..., + *, + like: None | ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def eye( + N: int, + M: None | int = ..., + k: int = ..., + dtype: DTypeLike = ..., + order: _OrderCF = ..., + *, + like: None | ArrayLike = ..., +) -> NDArray[Any]: ... + +@overload +def diag(v: _ArrayLike[_SCT], k: int = ...) -> NDArray[_SCT]: ... +@overload +def diag(v: ArrayLike, k: int = ...) -> NDArray[Any]: ... + +@overload +def diagflat(v: _ArrayLike[_SCT], k: int = ...) -> NDArray[_SCT]: ... +@overload +def diagflat(v: ArrayLike, k: int = ...) -> NDArray[Any]: ... + +@overload +def tri( + N: int, + M: None | int = ..., + k: int = ..., + dtype: None = ..., + *, + like: None | ArrayLike = ... +) -> NDArray[float64]: ... +@overload +def tri( + N: int, + M: None | int = ..., + k: int = ..., + dtype: _DTypeLike[_SCT] = ..., + *, + like: None | ArrayLike = ... +) -> NDArray[_SCT]: ... +@overload +def tri( + N: int, + M: None | int = ..., + k: int = ..., + dtype: DTypeLike = ..., + *, + like: None | ArrayLike = ... +) -> NDArray[Any]: ... + +@overload +def tril(v: _ArrayLike[_SCT], k: int = ...) -> NDArray[_SCT]: ... +@overload +def tril(v: ArrayLike, k: int = ...) -> NDArray[Any]: ... + +@overload +def triu(v: _ArrayLike[_SCT], k: int = ...) -> NDArray[_SCT]: ... +@overload +def triu(v: ArrayLike, k: int = ...) -> NDArray[Any]: ... + +@overload +def vander( # type: ignore[misc] + x: _ArrayLikeInt_co, + N: None | int = ..., + increasing: bool = ..., +) -> NDArray[signedinteger[Any]]: ... +@overload +def vander( # type: ignore[misc] + x: _ArrayLikeFloat_co, + N: None | int = ..., + increasing: bool = ..., +) -> NDArray[floating[Any]]: ... +@overload +def vander( + x: _ArrayLikeComplex_co, + N: None | int = ..., + increasing: bool = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def vander( + x: _ArrayLikeObject_co, + N: None | int = ..., + increasing: bool = ..., +) -> NDArray[object_]: ... + +@overload +def histogram2d( # type: ignore[misc] + x: _ArrayLikeFloat_co, + y: _ArrayLikeFloat_co, + bins: int | Sequence[int] = ..., + range: None | _ArrayLikeFloat_co = ..., + normed: None | bool = ..., + weights: None | _ArrayLikeFloat_co = ..., + density: None | bool = ..., +) -> Tuple[ + NDArray[float64], + NDArray[floating[Any]], + NDArray[floating[Any]], +]: ... +@overload +def histogram2d( + x: _ArrayLikeComplex_co, + y: _ArrayLikeComplex_co, + bins: int | Sequence[int] = ..., + range: None | _ArrayLikeFloat_co = ..., + normed: None | bool = ..., + weights: None | _ArrayLikeFloat_co = ..., + density: None | bool = ..., +) -> Tuple[ + NDArray[float64], + NDArray[complexfloating[Any, Any]], + NDArray[complexfloating[Any, Any]], +]: ... +@overload # TODO: Sort out `bins` +def histogram2d( + x: _ArrayLikeComplex_co, + y: _ArrayLikeComplex_co, + bins: Sequence[_ArrayLikeInt_co], + range: None | _ArrayLikeFloat_co = ..., + normed: None | bool = ..., + weights: None | _ArrayLikeFloat_co = ..., + density: None | bool = ..., +) -> Tuple[ + NDArray[float64], + NDArray[Any], + NDArray[Any], +]: ... + +# NOTE: we're assuming/demanding here the `mask_func` returns +# an ndarray of shape `(n, n)`; otherwise there is the possibility +# of the output tuple having more or less than 2 elements +@overload +def mask_indices( + n: int, + mask_func: _MaskFunc[int], + k: int = ..., +) -> Tuple[NDArray[intp], NDArray[intp]]: ... +@overload +def mask_indices( + n: int, + mask_func: _MaskFunc[_T], + k: _T, +) -> Tuple[NDArray[intp], NDArray[intp]]: ... + +def tril_indices( + n: int, + k: int = ..., + m: None | int = ..., +) -> Tuple[NDArray[int_], NDArray[int_]]: ... + +def tril_indices_from( + arr: NDArray[Any], + k: int = ..., +) -> Tuple[NDArray[int_], NDArray[int_]]: ... + +def triu_indices( + n: int, + k: int = ..., + m: None | int = ..., +) -> Tuple[NDArray[int_], NDArray[int_]]: ... + +def triu_indices_from( + arr: NDArray[Any], + k: int = ..., +) -> Tuple[NDArray[int_], NDArray[int_]]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/type_check.py b/.local/lib/python3.8/site-packages/numpy/lib/type_check.py new file mode 100644 index 0000000..56afd83 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/type_check.py @@ -0,0 +1,769 @@ +"""Automatically adapted for numpy Sep 19, 2005 by convertcode.py + +""" +import functools +import warnings + +__all__ = ['iscomplexobj', 'isrealobj', 'imag', 'iscomplex', + 'isreal', 'nan_to_num', 'real', 'real_if_close', + 'typename', 'asfarray', 'mintypecode', 'asscalar', + 'common_type'] + +import numpy.core.numeric as _nx +from numpy.core.numeric import asarray, asanyarray, isnan, zeros +from numpy.core.overrides import set_module +from numpy.core import overrides +from .ufunclike import isneginf, isposinf + + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + + +_typecodes_by_elsize = 'GDFgdfQqLlIiHhBb?' + + +@set_module('numpy') +def mintypecode(typechars, typeset='GDFgdf', default='d'): + """ + Return the character for the minimum-size type to which given types can + be safely cast. + + The returned type character must represent the smallest size dtype such + that an array of the returned type can handle the data from an array of + all types in `typechars` (or if `typechars` is an array, then its + dtype.char). + + Parameters + ---------- + typechars : list of str or array_like + If a list of strings, each string should represent a dtype. + If array_like, the character representation of the array dtype is used. + typeset : str or list of str, optional + The set of characters that the returned character is chosen from. + The default set is 'GDFgdf'. + default : str, optional + The default character, this is returned if none of the characters in + `typechars` matches a character in `typeset`. + + Returns + ------- + typechar : str + The character representing the minimum-size type that was found. + + See Also + -------- + dtype, sctype2char, maximum_sctype + + Examples + -------- + >>> np.mintypecode(['d', 'f', 'S']) + 'd' + >>> x = np.array([1.1, 2-3.j]) + >>> np.mintypecode(x) + 'D' + + >>> np.mintypecode('abceh', default='G') + 'G' + + """ + typecodes = ((isinstance(t, str) and t) or asarray(t).dtype.char + for t in typechars) + intersection = set(t for t in typecodes if t in typeset) + if not intersection: + return default + if 'F' in intersection and 'd' in intersection: + return 'D' + return min(intersection, key=_typecodes_by_elsize.index) + + +def _asfarray_dispatcher(a, dtype=None): + return (a,) + + +@array_function_dispatch(_asfarray_dispatcher) +def asfarray(a, dtype=_nx.float_): + """ + Return an array converted to a float type. + + Parameters + ---------- + a : array_like + The input array. + dtype : str or dtype object, optional + Float type code to coerce input array `a`. If `dtype` is one of the + 'int' dtypes, it is replaced with float64. + + Returns + ------- + out : ndarray + The input `a` as a float ndarray. + + Examples + -------- + >>> np.asfarray([2, 3]) + array([2., 3.]) + >>> np.asfarray([2, 3], dtype='float') + array([2., 3.]) + >>> np.asfarray([2, 3], dtype='int8') + array([2., 3.]) + + """ + if not _nx.issubdtype(dtype, _nx.inexact): + dtype = _nx.float_ + return asarray(a, dtype=dtype) + + +def _real_dispatcher(val): + return (val,) + + +@array_function_dispatch(_real_dispatcher) +def real(val): + """ + Return the real part of the complex argument. + + Parameters + ---------- + val : array_like + Input array. + + Returns + ------- + out : ndarray or scalar + The real component of the complex argument. If `val` is real, the type + of `val` is used for the output. If `val` has complex elements, the + returned type is float. + + See Also + -------- + real_if_close, imag, angle + + Examples + -------- + >>> a = np.array([1+2j, 3+4j, 5+6j]) + >>> a.real + array([1., 3., 5.]) + >>> a.real = 9 + >>> a + array([9.+2.j, 9.+4.j, 9.+6.j]) + >>> a.real = np.array([9, 8, 7]) + >>> a + array([9.+2.j, 8.+4.j, 7.+6.j]) + >>> np.real(1 + 1j) + 1.0 + + """ + try: + return val.real + except AttributeError: + return asanyarray(val).real + + +def _imag_dispatcher(val): + return (val,) + + +@array_function_dispatch(_imag_dispatcher) +def imag(val): + """ + Return the imaginary part of the complex argument. + + Parameters + ---------- + val : array_like + Input array. + + Returns + ------- + out : ndarray or scalar + The imaginary component of the complex argument. If `val` is real, + the type of `val` is used for the output. If `val` has complex + elements, the returned type is float. + + See Also + -------- + real, angle, real_if_close + + Examples + -------- + >>> a = np.array([1+2j, 3+4j, 5+6j]) + >>> a.imag + array([2., 4., 6.]) + >>> a.imag = np.array([8, 10, 12]) + >>> a + array([1. +8.j, 3.+10.j, 5.+12.j]) + >>> np.imag(1 + 1j) + 1.0 + + """ + try: + return val.imag + except AttributeError: + return asanyarray(val).imag + + +def _is_type_dispatcher(x): + return (x,) + + +@array_function_dispatch(_is_type_dispatcher) +def iscomplex(x): + """ + Returns a bool array, where True if input element is complex. + + What is tested is whether the input has a non-zero imaginary part, not if + the input type is complex. + + Parameters + ---------- + x : array_like + Input array. + + Returns + ------- + out : ndarray of bools + Output array. + + See Also + -------- + isreal + iscomplexobj : Return True if x is a complex type or an array of complex + numbers. + + Examples + -------- + >>> np.iscomplex([1+1j, 1+0j, 4.5, 3, 2, 2j]) + array([ True, False, False, False, False, True]) + + """ + ax = asanyarray(x) + if issubclass(ax.dtype.type, _nx.complexfloating): + return ax.imag != 0 + res = zeros(ax.shape, bool) + return res[()] # convert to scalar if needed + + +@array_function_dispatch(_is_type_dispatcher) +def isreal(x): + """ + Returns a bool array, where True if input element is real. + + If element has complex type with zero complex part, the return value + for that element is True. + + Parameters + ---------- + x : array_like + Input array. + + Returns + ------- + out : ndarray, bool + Boolean array of same shape as `x`. + + Notes + ----- + `isreal` may behave unexpectedly for string or object arrays (see examples) + + See Also + -------- + iscomplex + isrealobj : Return True if x is not a complex type. + + Examples + -------- + >>> a = np.array([1+1j, 1+0j, 4.5, 3, 2, 2j], dtype=complex) + >>> np.isreal(a) + array([False, True, True, True, True, False]) + + The function does not work on string arrays. + + >>> a = np.array([2j, "a"], dtype="U") + >>> np.isreal(a) # Warns about non-elementwise comparison + False + + Returns True for all elements in input array of ``dtype=object`` even if + any of the elements is complex. + + >>> a = np.array([1, "2", 3+4j], dtype=object) + >>> np.isreal(a) + array([ True, True, True]) + + isreal should not be used with object arrays + + >>> a = np.array([1+2j, 2+1j], dtype=object) + >>> np.isreal(a) + array([ True, True]) + + """ + return imag(x) == 0 + + +@array_function_dispatch(_is_type_dispatcher) +def iscomplexobj(x): + """ + Check for a complex type or an array of complex numbers. + + The type of the input is checked, not the value. Even if the input + has an imaginary part equal to zero, `iscomplexobj` evaluates to True. + + Parameters + ---------- + x : any + The input can be of any type and shape. + + Returns + ------- + iscomplexobj : bool + The return value, True if `x` is of a complex type or has at least + one complex element. + + See Also + -------- + isrealobj, iscomplex + + Examples + -------- + >>> np.iscomplexobj(1) + False + >>> np.iscomplexobj(1+0j) + True + >>> np.iscomplexobj([3, 1+0j, True]) + True + + """ + try: + dtype = x.dtype + type_ = dtype.type + except AttributeError: + type_ = asarray(x).dtype.type + return issubclass(type_, _nx.complexfloating) + + +@array_function_dispatch(_is_type_dispatcher) +def isrealobj(x): + """ + Return True if x is a not complex type or an array of complex numbers. + + The type of the input is checked, not the value. So even if the input + has an imaginary part equal to zero, `isrealobj` evaluates to False + if the data type is complex. + + Parameters + ---------- + x : any + The input can be of any type and shape. + + Returns + ------- + y : bool + The return value, False if `x` is of a complex type. + + See Also + -------- + iscomplexobj, isreal + + Notes + ----- + The function is only meant for arrays with numerical values but it + accepts all other objects. Since it assumes array input, the return + value of other objects may be True. + + >>> np.isrealobj('A string') + True + >>> np.isrealobj(False) + True + >>> np.isrealobj(None) + True + + Examples + -------- + >>> np.isrealobj(1) + True + >>> np.isrealobj(1+0j) + False + >>> np.isrealobj([3, 1+0j, True]) + False + + """ + return not iscomplexobj(x) + +#----------------------------------------------------------------------------- + +def _getmaxmin(t): + from numpy.core import getlimits + f = getlimits.finfo(t) + return f.max, f.min + + +def _nan_to_num_dispatcher(x, copy=None, nan=None, posinf=None, neginf=None): + return (x,) + + +@array_function_dispatch(_nan_to_num_dispatcher) +def nan_to_num(x, copy=True, nan=0.0, posinf=None, neginf=None): + """ + Replace NaN with zero and infinity with large finite numbers (default + behaviour) or with the numbers defined by the user using the `nan`, + `posinf` and/or `neginf` keywords. + + If `x` is inexact, NaN is replaced by zero or by the user defined value in + `nan` keyword, infinity is replaced by the largest finite floating point + values representable by ``x.dtype`` or by the user defined value in + `posinf` keyword and -infinity is replaced by the most negative finite + floating point values representable by ``x.dtype`` or by the user defined + value in `neginf` keyword. + + For complex dtypes, the above is applied to each of the real and + imaginary components of `x` separately. + + If `x` is not inexact, then no replacements are made. + + Parameters + ---------- + x : scalar or array_like + Input data. + copy : bool, optional + Whether to create a copy of `x` (True) or to replace values + in-place (False). The in-place operation only occurs if + casting to an array does not require a copy. + Default is True. + + .. versionadded:: 1.13 + nan : int, float, optional + Value to be used to fill NaN values. If no value is passed + then NaN values will be replaced with 0.0. + + .. versionadded:: 1.17 + posinf : int, float, optional + Value to be used to fill positive infinity values. If no value is + passed then positive infinity values will be replaced with a very + large number. + + .. versionadded:: 1.17 + neginf : int, float, optional + Value to be used to fill negative infinity values. If no value is + passed then negative infinity values will be replaced with a very + small (or negative) number. + + .. versionadded:: 1.17 + + + + Returns + ------- + out : ndarray + `x`, with the non-finite values replaced. If `copy` is False, this may + be `x` itself. + + See Also + -------- + isinf : Shows which elements are positive or negative infinity. + isneginf : Shows which elements are negative infinity. + isposinf : Shows which elements are positive infinity. + isnan : Shows which elements are Not a Number (NaN). + isfinite : Shows which elements are finite (not NaN, not infinity) + + Notes + ----- + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). This means that Not a Number is not equivalent to infinity. + + Examples + -------- + >>> np.nan_to_num(np.inf) + 1.7976931348623157e+308 + >>> np.nan_to_num(-np.inf) + -1.7976931348623157e+308 + >>> np.nan_to_num(np.nan) + 0.0 + >>> x = np.array([np.inf, -np.inf, np.nan, -128, 128]) + >>> np.nan_to_num(x) + array([ 1.79769313e+308, -1.79769313e+308, 0.00000000e+000, # may vary + -1.28000000e+002, 1.28000000e+002]) + >>> np.nan_to_num(x, nan=-9999, posinf=33333333, neginf=33333333) + array([ 3.3333333e+07, 3.3333333e+07, -9.9990000e+03, + -1.2800000e+02, 1.2800000e+02]) + >>> y = np.array([complex(np.inf, np.nan), np.nan, complex(np.nan, np.inf)]) + array([ 1.79769313e+308, -1.79769313e+308, 0.00000000e+000, # may vary + -1.28000000e+002, 1.28000000e+002]) + >>> np.nan_to_num(y) + array([ 1.79769313e+308 +0.00000000e+000j, # may vary + 0.00000000e+000 +0.00000000e+000j, + 0.00000000e+000 +1.79769313e+308j]) + >>> np.nan_to_num(y, nan=111111, posinf=222222) + array([222222.+111111.j, 111111. +0.j, 111111.+222222.j]) + """ + x = _nx.array(x, subok=True, copy=copy) + xtype = x.dtype.type + + isscalar = (x.ndim == 0) + + if not issubclass(xtype, _nx.inexact): + return x[()] if isscalar else x + + iscomplex = issubclass(xtype, _nx.complexfloating) + + dest = (x.real, x.imag) if iscomplex else (x,) + maxf, minf = _getmaxmin(x.real.dtype) + if posinf is not None: + maxf = posinf + if neginf is not None: + minf = neginf + for d in dest: + idx_nan = isnan(d) + idx_posinf = isposinf(d) + idx_neginf = isneginf(d) + _nx.copyto(d, nan, where=idx_nan) + _nx.copyto(d, maxf, where=idx_posinf) + _nx.copyto(d, minf, where=idx_neginf) + return x[()] if isscalar else x + +#----------------------------------------------------------------------------- + +def _real_if_close_dispatcher(a, tol=None): + return (a,) + + +@array_function_dispatch(_real_if_close_dispatcher) +def real_if_close(a, tol=100): + """ + If input is complex with all imaginary parts close to zero, return + real parts. + + "Close to zero" is defined as `tol` * (machine epsilon of the type for + `a`). + + Parameters + ---------- + a : array_like + Input array. + tol : float + Tolerance in machine epsilons for the complex part of the elements + in the array. + + Returns + ------- + out : ndarray + If `a` is real, the type of `a` is used for the output. If `a` + has complex elements, the returned type is float. + + See Also + -------- + real, imag, angle + + Notes + ----- + Machine epsilon varies from machine to machine and between data types + but Python floats on most platforms have a machine epsilon equal to + 2.2204460492503131e-16. You can use 'np.finfo(float).eps' to print + out the machine epsilon for floats. + + Examples + -------- + >>> np.finfo(float).eps + 2.2204460492503131e-16 # may vary + + >>> np.real_if_close([2.1 + 4e-14j, 5.2 + 3e-15j], tol=1000) + array([2.1, 5.2]) + >>> np.real_if_close([2.1 + 4e-13j, 5.2 + 3e-15j], tol=1000) + array([2.1+4.e-13j, 5.2 + 3e-15j]) + + """ + a = asanyarray(a) + if not issubclass(a.dtype.type, _nx.complexfloating): + return a + if tol > 1: + from numpy.core import getlimits + f = getlimits.finfo(a.dtype.type) + tol = f.eps * tol + if _nx.all(_nx.absolute(a.imag) < tol): + a = a.real + return a + + +def _asscalar_dispatcher(a): + # 2018-10-10, 1.16 + warnings.warn('np.asscalar(a) is deprecated since NumPy v1.16, use ' + 'a.item() instead', DeprecationWarning, stacklevel=3) + return (a,) + + +@array_function_dispatch(_asscalar_dispatcher) +def asscalar(a): + """ + Convert an array of size 1 to its scalar equivalent. + + .. deprecated:: 1.16 + + Deprecated, use `numpy.ndarray.item()` instead. + + Parameters + ---------- + a : ndarray + Input array of size 1. + + Returns + ------- + out : scalar + Scalar representation of `a`. The output data type is the same type + returned by the input's `item` method. + + Examples + -------- + >>> np.asscalar(np.array([24])) + 24 + """ + return a.item() + +#----------------------------------------------------------------------------- + +_namefromtype = {'S1': 'character', + '?': 'bool', + 'b': 'signed char', + 'B': 'unsigned char', + 'h': 'short', + 'H': 'unsigned short', + 'i': 'integer', + 'I': 'unsigned integer', + 'l': 'long integer', + 'L': 'unsigned long integer', + 'q': 'long long integer', + 'Q': 'unsigned long long integer', + 'f': 'single precision', + 'd': 'double precision', + 'g': 'long precision', + 'F': 'complex single precision', + 'D': 'complex double precision', + 'G': 'complex long double precision', + 'S': 'string', + 'U': 'unicode', + 'V': 'void', + 'O': 'object' + } + +@set_module('numpy') +def typename(char): + """ + Return a description for the given data type code. + + Parameters + ---------- + char : str + Data type code. + + Returns + ------- + out : str + Description of the input data type code. + + See Also + -------- + dtype, typecodes + + Examples + -------- + >>> typechars = ['S1', '?', 'B', 'D', 'G', 'F', 'I', 'H', 'L', 'O', 'Q', + ... 'S', 'U', 'V', 'b', 'd', 'g', 'f', 'i', 'h', 'l', 'q'] + >>> for typechar in typechars: + ... print(typechar, ' : ', np.typename(typechar)) + ... + S1 : character + ? : bool + B : unsigned char + D : complex double precision + G : complex long double precision + F : complex single precision + I : unsigned integer + H : unsigned short + L : unsigned long integer + O : object + Q : unsigned long long integer + S : string + U : unicode + V : void + b : signed char + d : double precision + g : long precision + f : single precision + i : integer + h : short + l : long integer + q : long long integer + + """ + return _namefromtype[char] + +#----------------------------------------------------------------------------- + +#determine the "minimum common type" for a group of arrays. +array_type = [[_nx.half, _nx.single, _nx.double, _nx.longdouble], + [None, _nx.csingle, _nx.cdouble, _nx.clongdouble]] +array_precision = {_nx.half: 0, + _nx.single: 1, + _nx.double: 2, + _nx.longdouble: 3, + _nx.csingle: 1, + _nx.cdouble: 2, + _nx.clongdouble: 3} + + +def _common_type_dispatcher(*arrays): + return arrays + + +@array_function_dispatch(_common_type_dispatcher) +def common_type(*arrays): + """ + Return a scalar type which is common to the input arrays. + + The return type will always be an inexact (i.e. floating point) scalar + type, even if all the arrays are integer arrays. If one of the inputs is + an integer array, the minimum precision type that is returned is a + 64-bit floating point dtype. + + All input arrays except int64 and uint64 can be safely cast to the + returned dtype without loss of information. + + Parameters + ---------- + array1, array2, ... : ndarrays + Input arrays. + + Returns + ------- + out : data type code + Data type code. + + See Also + -------- + dtype, mintypecode + + Examples + -------- + >>> np.common_type(np.arange(2, dtype=np.float32)) + + >>> np.common_type(np.arange(2, dtype=np.float32), np.arange(2)) + + >>> np.common_type(np.arange(4), np.array([45, 6.j]), np.array([45.0])) + + + """ + is_complex = False + precision = 0 + for a in arrays: + t = a.dtype.type + if iscomplexobj(a): + is_complex = True + if issubclass(t, _nx.integer): + p = 2 # array_precision[_nx.double] + else: + p = array_precision.get(t, None) + if p is None: + raise TypeError("can't get common type for non-numeric array") + precision = max(precision, p) + if is_complex: + return array_type[1][precision] + else: + return array_type[0][precision] diff --git a/.local/lib/python3.8/site-packages/numpy/lib/type_check.pyi b/.local/lib/python3.8/site-packages/numpy/lib/type_check.pyi new file mode 100644 index 0000000..0a55dbf --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/type_check.pyi @@ -0,0 +1,231 @@ +from typing import ( + Literal as L, + Any, + Container, + Iterable, + List, + overload, + Type, + TypeVar, + Protocol, +) + +from numpy import ( + dtype, + generic, + bool_, + floating, + float64, + complexfloating, + integer, +) + +from numpy.typing import ( + ArrayLike, + DTypeLike, + NBitBase, + NDArray, + _64Bit, + _SupportsDType, + _ScalarLike_co, + _FiniteNestedSequence, + _SupportsArray, + _DTypeLikeComplex, +) + +_T = TypeVar("_T") +_T_co = TypeVar("_T_co", covariant=True) +_SCT = TypeVar("_SCT", bound=generic) +_NBit1 = TypeVar("_NBit1", bound=NBitBase) +_NBit2 = TypeVar("_NBit2", bound=NBitBase) + +_ArrayLike = _FiniteNestedSequence[_SupportsArray[dtype[_SCT]]] + +class _SupportsReal(Protocol[_T_co]): + @property + def real(self) -> _T_co: ... + +class _SupportsImag(Protocol[_T_co]): + @property + def imag(self) -> _T_co: ... + +__all__: List[str] + +def mintypecode( + typechars: Iterable[str | ArrayLike], + typeset: Container[str] = ..., + default: str = ..., +) -> str: ... + +# `asfarray` ignores dtypes if they're not inexact + +@overload +def asfarray( + a: object, + dtype: None | Type[float] = ..., +) -> NDArray[float64]: ... +@overload +def asfarray( # type: ignore[misc] + a: Any, + dtype: _DTypeLikeComplex, +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def asfarray( + a: Any, + dtype: DTypeLike, +) -> NDArray[floating[Any]]: ... + +@overload +def real(val: _SupportsReal[_T]) -> _T: ... +@overload +def real(val: ArrayLike) -> NDArray[Any]: ... + +@overload +def imag(val: _SupportsImag[_T]) -> _T: ... +@overload +def imag(val: ArrayLike) -> NDArray[Any]: ... + +@overload +def iscomplex(x: _ScalarLike_co) -> bool_: ... # type: ignore[misc] +@overload +def iscomplex(x: ArrayLike) -> NDArray[bool_]: ... + +@overload +def isreal(x: _ScalarLike_co) -> bool_: ... # type: ignore[misc] +@overload +def isreal(x: ArrayLike) -> NDArray[bool_]: ... + +def iscomplexobj(x: _SupportsDType[dtype[Any]] | ArrayLike) -> bool: ... + +def isrealobj(x: _SupportsDType[dtype[Any]] | ArrayLike) -> bool: ... + +@overload +def nan_to_num( # type: ignore[misc] + x: _SCT, + copy: bool = ..., + nan: float = ..., + posinf: None | float = ..., + neginf: None | float = ..., +) -> _SCT: ... +@overload +def nan_to_num( + x: _ScalarLike_co, + copy: bool = ..., + nan: float = ..., + posinf: None | float = ..., + neginf: None | float = ..., +) -> Any: ... +@overload +def nan_to_num( + x: _ArrayLike[_SCT], + copy: bool = ..., + nan: float = ..., + posinf: None | float = ..., + neginf: None | float = ..., +) -> NDArray[_SCT]: ... +@overload +def nan_to_num( + x: ArrayLike, + copy: bool = ..., + nan: float = ..., + posinf: None | float = ..., + neginf: None | float = ..., +) -> NDArray[Any]: ... + +# If one passes a complex array to `real_if_close`, then one is reasonably +# expected to verify the output dtype (so we can return an unsafe union here) + +@overload +def real_if_close( # type: ignore[misc] + a: _ArrayLike[complexfloating[_NBit1, _NBit1]], + tol: float = ..., +) -> NDArray[floating[_NBit1]] | NDArray[complexfloating[_NBit1, _NBit1]]: ... +@overload +def real_if_close( + a: _ArrayLike[_SCT], + tol: float = ..., +) -> NDArray[_SCT]: ... +@overload +def real_if_close( + a: ArrayLike, + tol: float = ..., +) -> NDArray[Any]: ... + +# NOTE: deprecated +# def asscalar(a): ... + +@overload +def typename(char: L['S1']) -> L['character']: ... +@overload +def typename(char: L['?']) -> L['bool']: ... +@overload +def typename(char: L['b']) -> L['signed char']: ... +@overload +def typename(char: L['B']) -> L['unsigned char']: ... +@overload +def typename(char: L['h']) -> L['short']: ... +@overload +def typename(char: L['H']) -> L['unsigned short']: ... +@overload +def typename(char: L['i']) -> L['integer']: ... +@overload +def typename(char: L['I']) -> L['unsigned integer']: ... +@overload +def typename(char: L['l']) -> L['long integer']: ... +@overload +def typename(char: L['L']) -> L['unsigned long integer']: ... +@overload +def typename(char: L['q']) -> L['long long integer']: ... +@overload +def typename(char: L['Q']) -> L['unsigned long long integer']: ... +@overload +def typename(char: L['f']) -> L['single precision']: ... +@overload +def typename(char: L['d']) -> L['double precision']: ... +@overload +def typename(char: L['g']) -> L['long precision']: ... +@overload +def typename(char: L['F']) -> L['complex single precision']: ... +@overload +def typename(char: L['D']) -> L['complex double precision']: ... +@overload +def typename(char: L['G']) -> L['complex long double precision']: ... +@overload +def typename(char: L['S']) -> L['string']: ... +@overload +def typename(char: L['U']) -> L['unicode']: ... +@overload +def typename(char: L['V']) -> L['void']: ... +@overload +def typename(char: L['O']) -> L['object']: ... + +@overload +def common_type( # type: ignore[misc] + *arrays: _SupportsDType[dtype[ + integer[Any] + ]] +) -> Type[floating[_64Bit]]: ... +@overload +def common_type( # type: ignore[misc] + *arrays: _SupportsDType[dtype[ + floating[_NBit1] + ]] +) -> Type[floating[_NBit1]]: ... +@overload +def common_type( # type: ignore[misc] + *arrays: _SupportsDType[dtype[ + integer[Any] | floating[_NBit1] + ]] +) -> Type[floating[_NBit1 | _64Bit]]: ... +@overload +def common_type( # type: ignore[misc] + *arrays: _SupportsDType[dtype[ + floating[_NBit1] | complexfloating[_NBit2, _NBit2] + ]] +) -> Type[complexfloating[_NBit1 | _NBit2, _NBit1 | _NBit2]]: ... +@overload +def common_type( + *arrays: _SupportsDType[dtype[ + integer[Any] | floating[_NBit1] | complexfloating[_NBit2, _NBit2] + ]] +) -> Type[complexfloating[_64Bit | _NBit1 | _NBit2, _64Bit | _NBit1 | _NBit2]]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/ufunclike.py b/.local/lib/python3.8/site-packages/numpy/lib/ufunclike.py new file mode 100644 index 0000000..a93c477 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/ufunclike.py @@ -0,0 +1,268 @@ +""" +Module of functions that are like ufuncs in acting on arrays and optionally +storing results in an output array. + +""" +__all__ = ['fix', 'isneginf', 'isposinf'] + +import numpy.core.numeric as nx +from numpy.core.overrides import ( + array_function_dispatch, ARRAY_FUNCTION_ENABLED, +) +import warnings +import functools + + +def _deprecate_out_named_y(f): + """ + Allow the out argument to be passed as the name `y` (deprecated) + + In future, this decorator should be removed. + """ + @functools.wraps(f) + def func(x, out=None, **kwargs): + if 'y' in kwargs: + if 'out' in kwargs: + raise TypeError( + "{} got multiple values for argument 'out'/'y'" + .format(f.__name__) + ) + out = kwargs.pop('y') + # NumPy 1.13.0, 2017-04-26 + warnings.warn( + "The name of the out argument to {} has changed from `y` to " + "`out`, to match other ufuncs.".format(f.__name__), + DeprecationWarning, stacklevel=3) + return f(x, out=out, **kwargs) + + return func + + +def _fix_out_named_y(f): + """ + Allow the out argument to be passed as the name `y` (deprecated) + + This decorator should only be used if _deprecate_out_named_y is used on + a corresponding dispatcher function. + """ + @functools.wraps(f) + def func(x, out=None, **kwargs): + if 'y' in kwargs: + # we already did error checking in _deprecate_out_named_y + out = kwargs.pop('y') + return f(x, out=out, **kwargs) + + return func + + +def _fix_and_maybe_deprecate_out_named_y(f): + """ + Use the appropriate decorator, depending upon if dispatching is being used. + """ + if ARRAY_FUNCTION_ENABLED: + return _fix_out_named_y(f) + else: + return _deprecate_out_named_y(f) + + +@_deprecate_out_named_y +def _dispatcher(x, out=None): + return (x, out) + + +@array_function_dispatch(_dispatcher, verify=False, module='numpy') +@_fix_and_maybe_deprecate_out_named_y +def fix(x, out=None): + """ + Round to nearest integer towards zero. + + Round an array of floats element-wise to nearest integer towards zero. + The rounded values are returned as floats. + + Parameters + ---------- + x : array_like + An array of floats to be rounded + out : ndarray, optional + A location into which the result is stored. If provided, it must have + a shape that the input broadcasts to. If not provided or None, a + freshly-allocated array is returned. + + Returns + ------- + out : ndarray of floats + A float array with the same dimensions as the input. + If second argument is not supplied then a float array is returned + with the rounded values. + + If a second argument is supplied the result is stored there. + The return value `out` is then a reference to that array. + + See Also + -------- + rint, trunc, floor, ceil + around : Round to given number of decimals + + Examples + -------- + >>> np.fix(3.14) + 3.0 + >>> np.fix(3) + 3.0 + >>> np.fix([2.1, 2.9, -2.1, -2.9]) + array([ 2., 2., -2., -2.]) + + """ + # promote back to an array if flattened + res = nx.asanyarray(nx.ceil(x, out=out)) + res = nx.floor(x, out=res, where=nx.greater_equal(x, 0)) + + # when no out argument is passed and no subclasses are involved, flatten + # scalars + if out is None and type(res) is nx.ndarray: + res = res[()] + return res + + +@array_function_dispatch(_dispatcher, verify=False, module='numpy') +@_fix_and_maybe_deprecate_out_named_y +def isposinf(x, out=None): + """ + Test element-wise for positive infinity, return result as bool array. + + Parameters + ---------- + x : array_like + The input array. + out : array_like, optional + A location into which the result is stored. If provided, it must have a + shape that the input broadcasts to. If not provided or None, a + freshly-allocated boolean array is returned. + + Returns + ------- + out : ndarray + A boolean array with the same dimensions as the input. + If second argument is not supplied then a boolean array is returned + with values True where the corresponding element of the input is + positive infinity and values False where the element of the input is + not positive infinity. + + If a second argument is supplied the result is stored there. If the + type of that array is a numeric type the result is represented as zeros + and ones, if the type is boolean then as False and True. + The return value `out` is then a reference to that array. + + See Also + -------- + isinf, isneginf, isfinite, isnan + + Notes + ----- + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). + + Errors result if the second argument is also supplied when x is a scalar + input, if first and second arguments have different shapes, or if the + first argument has complex values + + Examples + -------- + >>> np.isposinf(np.PINF) + True + >>> np.isposinf(np.inf) + True + >>> np.isposinf(np.NINF) + False + >>> np.isposinf([-np.inf, 0., np.inf]) + array([False, False, True]) + + >>> x = np.array([-np.inf, 0., np.inf]) + >>> y = np.array([2, 2, 2]) + >>> np.isposinf(x, y) + array([0, 0, 1]) + >>> y + array([0, 0, 1]) + + """ + is_inf = nx.isinf(x) + try: + signbit = ~nx.signbit(x) + except TypeError as e: + dtype = nx.asanyarray(x).dtype + raise TypeError(f'This operation is not supported for {dtype} values ' + 'because it would be ambiguous.') from e + else: + return nx.logical_and(is_inf, signbit, out) + + +@array_function_dispatch(_dispatcher, verify=False, module='numpy') +@_fix_and_maybe_deprecate_out_named_y +def isneginf(x, out=None): + """ + Test element-wise for negative infinity, return result as bool array. + + Parameters + ---------- + x : array_like + The input array. + out : array_like, optional + A location into which the result is stored. If provided, it must have a + shape that the input broadcasts to. If not provided or None, a + freshly-allocated boolean array is returned. + + Returns + ------- + out : ndarray + A boolean array with the same dimensions as the input. + If second argument is not supplied then a numpy boolean array is + returned with values True where the corresponding element of the + input is negative infinity and values False where the element of + the input is not negative infinity. + + If a second argument is supplied the result is stored there. If the + type of that array is a numeric type the result is represented as + zeros and ones, if the type is boolean then as False and True. The + return value `out` is then a reference to that array. + + See Also + -------- + isinf, isposinf, isnan, isfinite + + Notes + ----- + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). + + Errors result if the second argument is also supplied when x is a scalar + input, if first and second arguments have different shapes, or if the + first argument has complex values. + + Examples + -------- + >>> np.isneginf(np.NINF) + True + >>> np.isneginf(np.inf) + False + >>> np.isneginf(np.PINF) + False + >>> np.isneginf([-np.inf, 0., np.inf]) + array([ True, False, False]) + + >>> x = np.array([-np.inf, 0., np.inf]) + >>> y = np.array([2, 2, 2]) + >>> np.isneginf(x, y) + array([1, 0, 0]) + >>> y + array([1, 0, 0]) + + """ + is_inf = nx.isinf(x) + try: + signbit = nx.signbit(x) + except TypeError as e: + dtype = nx.asanyarray(x).dtype + raise TypeError(f'This operation is not supported for {dtype} values ' + 'because it would be ambiguous.') from e + else: + return nx.logical_and(is_inf, signbit, out) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/ufunclike.pyi b/.local/lib/python3.8/site-packages/numpy/lib/ufunclike.pyi new file mode 100644 index 0000000..03f08eb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/ufunclike.pyi @@ -0,0 +1,66 @@ +from typing import Any, overload, TypeVar, List, Union + +from numpy import floating, bool_, object_, ndarray +from numpy.typing import ( + NDArray, + _FloatLike_co, + _ArrayLikeFloat_co, + _ArrayLikeObject_co, +) + +_ArrayType = TypeVar("_ArrayType", bound=ndarray[Any, Any]) + +__all__: List[str] + +@overload +def fix( # type: ignore[misc] + x: _FloatLike_co, + out: None = ..., +) -> floating[Any]: ... +@overload +def fix( + x: _ArrayLikeFloat_co, + out: None = ..., +) -> NDArray[floating[Any]]: ... +@overload +def fix( + x: _ArrayLikeObject_co, + out: None = ..., +) -> NDArray[object_]: ... +@overload +def fix( + x: Union[_ArrayLikeFloat_co, _ArrayLikeObject_co], + out: _ArrayType, +) -> _ArrayType: ... + +@overload +def isposinf( # type: ignore[misc] + x: _FloatLike_co, + out: None = ..., +) -> bool_: ... +@overload +def isposinf( + x: _ArrayLikeFloat_co, + out: None = ..., +) -> NDArray[bool_]: ... +@overload +def isposinf( + x: _ArrayLikeFloat_co, + out: _ArrayType, +) -> _ArrayType: ... + +@overload +def isneginf( # type: ignore[misc] + x: _FloatLike_co, + out: None = ..., +) -> bool_: ... +@overload +def isneginf( + x: _ArrayLikeFloat_co, + out: None = ..., +) -> NDArray[bool_]: ... +@overload +def isneginf( + x: _ArrayLikeFloat_co, + out: _ArrayType, +) -> _ArrayType: ... diff --git a/.local/lib/python3.8/site-packages/numpy/lib/user_array.py b/.local/lib/python3.8/site-packages/numpy/lib/user_array.py new file mode 100644 index 0000000..0e96b47 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/user_array.py @@ -0,0 +1,286 @@ +""" +Standard container-class for easy multiple-inheritance. + +Try to inherit from the ndarray instead of using this class as this is not +complete. + +""" +from numpy.core import ( + array, asarray, absolute, add, subtract, multiply, divide, + remainder, power, left_shift, right_shift, bitwise_and, bitwise_or, + bitwise_xor, invert, less, less_equal, not_equal, equal, greater, + greater_equal, shape, reshape, arange, sin, sqrt, transpose +) + + +class container: + """ + container(data, dtype=None, copy=True) + + Standard container-class for easy multiple-inheritance. + + Methods + ------- + copy + tostring + byteswap + astype + + """ + def __init__(self, data, dtype=None, copy=True): + self.array = array(data, dtype, copy=copy) + + def __repr__(self): + if self.ndim > 0: + return self.__class__.__name__ + repr(self.array)[len("array"):] + else: + return self.__class__.__name__ + "(" + repr(self.array) + ")" + + def __array__(self, t=None): + if t: + return self.array.astype(t) + return self.array + + # Array as sequence + def __len__(self): + return len(self.array) + + def __getitem__(self, index): + return self._rc(self.array[index]) + + def __setitem__(self, index, value): + self.array[index] = asarray(value, self.dtype) + + def __abs__(self): + return self._rc(absolute(self.array)) + + def __neg__(self): + return self._rc(-self.array) + + def __add__(self, other): + return self._rc(self.array + asarray(other)) + + __radd__ = __add__ + + def __iadd__(self, other): + add(self.array, other, self.array) + return self + + def __sub__(self, other): + return self._rc(self.array - asarray(other)) + + def __rsub__(self, other): + return self._rc(asarray(other) - self.array) + + def __isub__(self, other): + subtract(self.array, other, self.array) + return self + + def __mul__(self, other): + return self._rc(multiply(self.array, asarray(other))) + + __rmul__ = __mul__ + + def __imul__(self, other): + multiply(self.array, other, self.array) + return self + + def __div__(self, other): + return self._rc(divide(self.array, asarray(other))) + + def __rdiv__(self, other): + return self._rc(divide(asarray(other), self.array)) + + def __idiv__(self, other): + divide(self.array, other, self.array) + return self + + def __mod__(self, other): + return self._rc(remainder(self.array, other)) + + def __rmod__(self, other): + return self._rc(remainder(other, self.array)) + + def __imod__(self, other): + remainder(self.array, other, self.array) + return self + + def __divmod__(self, other): + return (self._rc(divide(self.array, other)), + self._rc(remainder(self.array, other))) + + def __rdivmod__(self, other): + return (self._rc(divide(other, self.array)), + self._rc(remainder(other, self.array))) + + def __pow__(self, other): + return self._rc(power(self.array, asarray(other))) + + def __rpow__(self, other): + return self._rc(power(asarray(other), self.array)) + + def __ipow__(self, other): + power(self.array, other, self.array) + return self + + def __lshift__(self, other): + return self._rc(left_shift(self.array, other)) + + def __rshift__(self, other): + return self._rc(right_shift(self.array, other)) + + def __rlshift__(self, other): + return self._rc(left_shift(other, self.array)) + + def __rrshift__(self, other): + return self._rc(right_shift(other, self.array)) + + def __ilshift__(self, other): + left_shift(self.array, other, self.array) + return self + + def __irshift__(self, other): + right_shift(self.array, other, self.array) + return self + + def __and__(self, other): + return self._rc(bitwise_and(self.array, other)) + + def __rand__(self, other): + return self._rc(bitwise_and(other, self.array)) + + def __iand__(self, other): + bitwise_and(self.array, other, self.array) + return self + + def __xor__(self, other): + return self._rc(bitwise_xor(self.array, other)) + + def __rxor__(self, other): + return self._rc(bitwise_xor(other, self.array)) + + def __ixor__(self, other): + bitwise_xor(self.array, other, self.array) + return self + + def __or__(self, other): + return self._rc(bitwise_or(self.array, other)) + + def __ror__(self, other): + return self._rc(bitwise_or(other, self.array)) + + def __ior__(self, other): + bitwise_or(self.array, other, self.array) + return self + + def __pos__(self): + return self._rc(self.array) + + def __invert__(self): + return self._rc(invert(self.array)) + + def _scalarfunc(self, func): + if self.ndim == 0: + return func(self[0]) + else: + raise TypeError( + "only rank-0 arrays can be converted to Python scalars.") + + def __complex__(self): + return self._scalarfunc(complex) + + def __float__(self): + return self._scalarfunc(float) + + def __int__(self): + return self._scalarfunc(int) + + def __hex__(self): + return self._scalarfunc(hex) + + def __oct__(self): + return self._scalarfunc(oct) + + def __lt__(self, other): + return self._rc(less(self.array, other)) + + def __le__(self, other): + return self._rc(less_equal(self.array, other)) + + def __eq__(self, other): + return self._rc(equal(self.array, other)) + + def __ne__(self, other): + return self._rc(not_equal(self.array, other)) + + def __gt__(self, other): + return self._rc(greater(self.array, other)) + + def __ge__(self, other): + return self._rc(greater_equal(self.array, other)) + + def copy(self): + "" + return self._rc(self.array.copy()) + + def tostring(self): + "" + return self.array.tostring() + + def tobytes(self): + "" + return self.array.tobytes() + + def byteswap(self): + "" + return self._rc(self.array.byteswap()) + + def astype(self, typecode): + "" + return self._rc(self.array.astype(typecode)) + + def _rc(self, a): + if len(shape(a)) == 0: + return a + else: + return self.__class__(a) + + def __array_wrap__(self, *args): + return self.__class__(args[0]) + + def __setattr__(self, attr, value): + if attr == 'array': + object.__setattr__(self, attr, value) + return + try: + self.array.__setattr__(attr, value) + except AttributeError: + object.__setattr__(self, attr, value) + + # Only called after other approaches fail. + def __getattr__(self, attr): + if (attr == 'array'): + return object.__getattribute__(self, attr) + return self.array.__getattribute__(attr) + +############################################################# +# Test of class container +############################################################# +if __name__ == '__main__': + temp = reshape(arange(10000), (100, 100)) + + ua = container(temp) + # new object created begin test + print(dir(ua)) + print(shape(ua), ua.shape) # I have changed Numeric.py + + ua_small = ua[:3, :5] + print(ua_small) + # this did not change ua[0,0], which is not normal behavior + ua_small[0, 0] = 10 + print(ua_small[0, 0], ua[0, 0]) + print(sin(ua_small) / 3. * 6. + sqrt(ua_small ** 2)) + print(less(ua_small, 103), type(less(ua_small, 103))) + print(type(ua_small * reshape(arange(15), shape(ua_small)))) + print(reshape(ua_small, (5, 3))) + print(transpose(ua_small)) diff --git a/.local/lib/python3.8/site-packages/numpy/lib/utils.py b/.local/lib/python3.8/site-packages/numpy/lib/utils.py new file mode 100644 index 0000000..1df2ab0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/utils.py @@ -0,0 +1,1067 @@ +import os +import sys +import textwrap +import types +import re +import warnings + +from numpy.core.numerictypes import issubclass_, issubsctype, issubdtype +from numpy.core.overrides import set_module +from numpy.core import ndarray, ufunc, asarray +import numpy as np + +__all__ = [ + 'issubclass_', 'issubsctype', 'issubdtype', 'deprecate', + 'deprecate_with_doc', 'get_include', 'info', 'source', 'who', + 'lookfor', 'byte_bounds', 'safe_eval' + ] + +def get_include(): + """ + Return the directory that contains the NumPy \\*.h header files. + + Extension modules that need to compile against NumPy should use this + function to locate the appropriate include directory. + + Notes + ----- + When using ``distutils``, for example in ``setup.py``. + :: + + import numpy as np + ... + Extension('extension_name', ... + include_dirs=[np.get_include()]) + ... + + """ + import numpy + if numpy.show_config is None: + # running from numpy source directory + d = os.path.join(os.path.dirname(numpy.__file__), 'core', 'include') + else: + # using installed numpy core headers + import numpy.core as core + d = os.path.join(os.path.dirname(core.__file__), 'include') + return d + + +def _set_function_name(func, name): + func.__name__ = name + return func + + +class _Deprecate: + """ + Decorator class to deprecate old functions. + + Refer to `deprecate` for details. + + See Also + -------- + deprecate + + """ + + def __init__(self, old_name=None, new_name=None, message=None): + self.old_name = old_name + self.new_name = new_name + self.message = message + + def __call__(self, func, *args, **kwargs): + """ + Decorator call. Refer to ``decorate``. + + """ + old_name = self.old_name + new_name = self.new_name + message = self.message + + if old_name is None: + try: + old_name = func.__name__ + except AttributeError: + old_name = func.__name__ + if new_name is None: + depdoc = "`%s` is deprecated!" % old_name + else: + depdoc = "`%s` is deprecated, use `%s` instead!" % \ + (old_name, new_name) + + if message is not None: + depdoc += "\n" + message + + def newfunc(*args,**kwds): + """`arrayrange` is deprecated, use `arange` instead!""" + warnings.warn(depdoc, DeprecationWarning, stacklevel=2) + return func(*args, **kwds) + + newfunc = _set_function_name(newfunc, old_name) + doc = func.__doc__ + if doc is None: + doc = depdoc + else: + lines = doc.expandtabs().split('\n') + indent = _get_indent(lines[1:]) + if lines[0].lstrip(): + # Indent the original first line to let inspect.cleandoc() + # dedent the docstring despite the deprecation notice. + doc = indent * ' ' + doc + else: + # Remove the same leading blank lines as cleandoc() would. + skip = len(lines[0]) + 1 + for line in lines[1:]: + if len(line) > indent: + break + skip += len(line) + 1 + doc = doc[skip:] + depdoc = textwrap.indent(depdoc, ' ' * indent) + doc = '\n\n'.join([depdoc, doc]) + newfunc.__doc__ = doc + try: + d = func.__dict__ + except AttributeError: + pass + else: + newfunc.__dict__.update(d) + return newfunc + + +def _get_indent(lines): + """ + Determines the leading whitespace that could be removed from all the lines. + """ + indent = sys.maxsize + for line in lines: + content = len(line.lstrip()) + if content: + indent = min(indent, len(line) - content) + if indent == sys.maxsize: + indent = 0 + return indent + + +def deprecate(*args, **kwargs): + """ + Issues a DeprecationWarning, adds warning to `old_name`'s + docstring, rebinds ``old_name.__name__`` and returns the new + function object. + + This function may also be used as a decorator. + + Parameters + ---------- + func : function + The function to be deprecated. + old_name : str, optional + The name of the function to be deprecated. Default is None, in + which case the name of `func` is used. + new_name : str, optional + The new name for the function. Default is None, in which case the + deprecation message is that `old_name` is deprecated. If given, the + deprecation message is that `old_name` is deprecated and `new_name` + should be used instead. + message : str, optional + Additional explanation of the deprecation. Displayed in the + docstring after the warning. + + Returns + ------- + old_func : function + The deprecated function. + + Examples + -------- + Note that ``olduint`` returns a value after printing Deprecation + Warning: + + >>> olduint = np.deprecate(np.uint) + DeprecationWarning: `uint64` is deprecated! # may vary + >>> olduint(6) + 6 + + """ + # Deprecate may be run as a function or as a decorator + # If run as a function, we initialise the decorator class + # and execute its __call__ method. + + if args: + fn = args[0] + args = args[1:] + + return _Deprecate(*args, **kwargs)(fn) + else: + return _Deprecate(*args, **kwargs) + + +def deprecate_with_doc(msg): + """ + Deprecates a function and includes the deprecation in its docstring. + + This function is used as a decorator. It returns an object that can be + used to issue a DeprecationWarning, by passing the to-be decorated + function as argument, this adds warning to the to-be decorated function's + docstring and returns the new function object. + + See Also + -------- + deprecate : Decorate a function such that it issues a `DeprecationWarning` + + Parameters + ---------- + msg : str + Additional explanation of the deprecation. Displayed in the + docstring after the warning. + + Returns + ------- + obj : object + + """ + return _Deprecate(message=msg) + + +#-------------------------------------------- +# Determine if two arrays can share memory +#-------------------------------------------- + +def byte_bounds(a): + """ + Returns pointers to the end-points of an array. + + Parameters + ---------- + a : ndarray + Input array. It must conform to the Python-side of the array + interface. + + Returns + ------- + (low, high) : tuple of 2 integers + The first integer is the first byte of the array, the second + integer is just past the last byte of the array. If `a` is not + contiguous it will not use every byte between the (`low`, `high`) + values. + + Examples + -------- + >>> I = np.eye(2, dtype='f'); I.dtype + dtype('float32') + >>> low, high = np.byte_bounds(I) + >>> high - low == I.size*I.itemsize + True + >>> I = np.eye(2); I.dtype + dtype('float64') + >>> low, high = np.byte_bounds(I) + >>> high - low == I.size*I.itemsize + True + + """ + ai = a.__array_interface__ + a_data = ai['data'][0] + astrides = ai['strides'] + ashape = ai['shape'] + bytes_a = asarray(a).dtype.itemsize + + a_low = a_high = a_data + if astrides is None: + # contiguous case + a_high += a.size * bytes_a + else: + for shape, stride in zip(ashape, astrides): + if stride < 0: + a_low += (shape-1)*stride + else: + a_high += (shape-1)*stride + a_high += bytes_a + return a_low, a_high + + +#----------------------------------------------------------------------------- +# Function for output and information on the variables used. +#----------------------------------------------------------------------------- + + +def who(vardict=None): + """ + Print the NumPy arrays in the given dictionary. + + If there is no dictionary passed in or `vardict` is None then returns + NumPy arrays in the globals() dictionary (all NumPy arrays in the + namespace). + + Parameters + ---------- + vardict : dict, optional + A dictionary possibly containing ndarrays. Default is globals(). + + Returns + ------- + out : None + Returns 'None'. + + Notes + ----- + Prints out the name, shape, bytes and type of all of the ndarrays + present in `vardict`. + + Examples + -------- + >>> a = np.arange(10) + >>> b = np.ones(20) + >>> np.who() + Name Shape Bytes Type + =========================================================== + a 10 80 int64 + b 20 160 float64 + Upper bound on total bytes = 240 + + >>> d = {'x': np.arange(2.0), 'y': np.arange(3.0), 'txt': 'Some str', + ... 'idx':5} + >>> np.who(d) + Name Shape Bytes Type + =========================================================== + x 2 16 float64 + y 3 24 float64 + Upper bound on total bytes = 40 + + """ + if vardict is None: + frame = sys._getframe().f_back + vardict = frame.f_globals + sta = [] + cache = {} + for name in vardict.keys(): + if isinstance(vardict[name], ndarray): + var = vardict[name] + idv = id(var) + if idv in cache.keys(): + namestr = name + " (%s)" % cache[idv] + original = 0 + else: + cache[idv] = name + namestr = name + original = 1 + shapestr = " x ".join(map(str, var.shape)) + bytestr = str(var.nbytes) + sta.append([namestr, shapestr, bytestr, var.dtype.name, + original]) + + maxname = 0 + maxshape = 0 + maxbyte = 0 + totalbytes = 0 + for val in sta: + if maxname < len(val[0]): + maxname = len(val[0]) + if maxshape < len(val[1]): + maxshape = len(val[1]) + if maxbyte < len(val[2]): + maxbyte = len(val[2]) + if val[4]: + totalbytes += int(val[2]) + + if len(sta) > 0: + sp1 = max(10, maxname) + sp2 = max(10, maxshape) + sp3 = max(10, maxbyte) + prval = "Name %s Shape %s Bytes %s Type" % (sp1*' ', sp2*' ', sp3*' ') + print(prval + "\n" + "="*(len(prval)+5) + "\n") + + for val in sta: + print("%s %s %s %s %s %s %s" % (val[0], ' '*(sp1-len(val[0])+4), + val[1], ' '*(sp2-len(val[1])+5), + val[2], ' '*(sp3-len(val[2])+5), + val[3])) + print("\nUpper bound on total bytes = %d" % totalbytes) + return + +#----------------------------------------------------------------------------- + + +# NOTE: pydoc defines a help function which works similarly to this +# except it uses a pager to take over the screen. + +# combine name and arguments and split to multiple lines of width +# characters. End lines on a comma and begin argument list indented with +# the rest of the arguments. +def _split_line(name, arguments, width): + firstwidth = len(name) + k = firstwidth + newstr = name + sepstr = ", " + arglist = arguments.split(sepstr) + for argument in arglist: + if k == firstwidth: + addstr = "" + else: + addstr = sepstr + k = k + len(argument) + len(addstr) + if k > width: + k = firstwidth + 1 + len(argument) + newstr = newstr + ",\n" + " "*(firstwidth+2) + argument + else: + newstr = newstr + addstr + argument + return newstr + +_namedict = None +_dictlist = None + +# Traverse all module directories underneath globals +# to see if something is defined +def _makenamedict(module='numpy'): + module = __import__(module, globals(), locals(), []) + thedict = {module.__name__:module.__dict__} + dictlist = [module.__name__] + totraverse = [module.__dict__] + while True: + if len(totraverse) == 0: + break + thisdict = totraverse.pop(0) + for x in thisdict.keys(): + if isinstance(thisdict[x], types.ModuleType): + modname = thisdict[x].__name__ + if modname not in dictlist: + moddict = thisdict[x].__dict__ + dictlist.append(modname) + totraverse.append(moddict) + thedict[modname] = moddict + return thedict, dictlist + + +def _info(obj, output=sys.stdout): + """Provide information about ndarray obj. + + Parameters + ---------- + obj : ndarray + Must be ndarray, not checked. + output + Where printed output goes. + + Notes + ----- + Copied over from the numarray module prior to its removal. + Adapted somewhat as only numpy is an option now. + + Called by info. + + """ + extra = "" + tic = "" + bp = lambda x: x + cls = getattr(obj, '__class__', type(obj)) + nm = getattr(cls, '__name__', cls) + strides = obj.strides + endian = obj.dtype.byteorder + + print("class: ", nm, file=output) + print("shape: ", obj.shape, file=output) + print("strides: ", strides, file=output) + print("itemsize: ", obj.itemsize, file=output) + print("aligned: ", bp(obj.flags.aligned), file=output) + print("contiguous: ", bp(obj.flags.contiguous), file=output) + print("fortran: ", obj.flags.fortran, file=output) + print( + "data pointer: %s%s" % (hex(obj.ctypes._as_parameter_.value), extra), + file=output + ) + print("byteorder: ", end=' ', file=output) + if endian in ['|', '=']: + print("%s%s%s" % (tic, sys.byteorder, tic), file=output) + byteswap = False + elif endian == '>': + print("%sbig%s" % (tic, tic), file=output) + byteswap = sys.byteorder != "big" + else: + print("%slittle%s" % (tic, tic), file=output) + byteswap = sys.byteorder != "little" + print("byteswap: ", bp(byteswap), file=output) + print("type: %s" % obj.dtype, file=output) + + +@set_module('numpy') +def info(object=None, maxwidth=76, output=sys.stdout, toplevel='numpy'): + """ + Get help information for a function, class, or module. + + Parameters + ---------- + object : object or str, optional + Input object or name to get information about. If `object` is a + numpy object, its docstring is given. If it is a string, available + modules are searched for matching objects. If None, information + about `info` itself is returned. + maxwidth : int, optional + Printing width. + output : file like object, optional + File like object that the output is written to, default is + ``stdout``. The object has to be opened in 'w' or 'a' mode. + toplevel : str, optional + Start search at this level. + + See Also + -------- + source, lookfor + + Notes + ----- + When used interactively with an object, ``np.info(obj)`` is equivalent + to ``help(obj)`` on the Python prompt or ``obj?`` on the IPython + prompt. + + Examples + -------- + >>> np.info(np.polyval) # doctest: +SKIP + polyval(p, x) + Evaluate the polynomial p at x. + ... + + When using a string for `object` it is possible to get multiple results. + + >>> np.info('fft') # doctest: +SKIP + *** Found in numpy *** + Core FFT routines + ... + *** Found in numpy.fft *** + fft(a, n=None, axis=-1) + ... + *** Repeat reference found in numpy.fft.fftpack *** + *** Total of 3 references found. *** + + """ + global _namedict, _dictlist + # Local import to speed up numpy's import time. + import pydoc + import inspect + + if (hasattr(object, '_ppimport_importer') or + hasattr(object, '_ppimport_module')): + object = object._ppimport_module + elif hasattr(object, '_ppimport_attr'): + object = object._ppimport_attr + + if object is None: + info(info) + elif isinstance(object, ndarray): + _info(object, output=output) + elif isinstance(object, str): + if _namedict is None: + _namedict, _dictlist = _makenamedict(toplevel) + numfound = 0 + objlist = [] + for namestr in _dictlist: + try: + obj = _namedict[namestr][object] + if id(obj) in objlist: + print("\n " + "*** Repeat reference found in %s *** " % namestr, + file=output + ) + else: + objlist.append(id(obj)) + print(" *** Found in %s ***" % namestr, file=output) + info(obj) + print("-"*maxwidth, file=output) + numfound += 1 + except KeyError: + pass + if numfound == 0: + print("Help for %s not found." % object, file=output) + else: + print("\n " + "*** Total of %d references found. ***" % numfound, + file=output + ) + + elif inspect.isfunction(object) or inspect.ismethod(object): + name = object.__name__ + try: + arguments = str(inspect.signature(object)) + except Exception: + arguments = "()" + + if len(name+arguments) > maxwidth: + argstr = _split_line(name, arguments, maxwidth) + else: + argstr = name + arguments + + print(" " + argstr + "\n", file=output) + print(inspect.getdoc(object), file=output) + + elif inspect.isclass(object): + name = object.__name__ + try: + arguments = str(inspect.signature(object)) + except Exception: + arguments = "()" + + if len(name+arguments) > maxwidth: + argstr = _split_line(name, arguments, maxwidth) + else: + argstr = name + arguments + + print(" " + argstr + "\n", file=output) + doc1 = inspect.getdoc(object) + if doc1 is None: + if hasattr(object, '__init__'): + print(inspect.getdoc(object.__init__), file=output) + else: + print(inspect.getdoc(object), file=output) + + methods = pydoc.allmethods(object) + + public_methods = [meth for meth in methods if meth[0] != '_'] + if public_methods: + print("\n\nMethods:\n", file=output) + for meth in public_methods: + thisobj = getattr(object, meth, None) + if thisobj is not None: + methstr, other = pydoc.splitdoc( + inspect.getdoc(thisobj) or "None" + ) + print(" %s -- %s" % (meth, methstr), file=output) + + elif hasattr(object, '__doc__'): + print(inspect.getdoc(object), file=output) + + +@set_module('numpy') +def source(object, output=sys.stdout): + """ + Print or write to a file the source code for a NumPy object. + + The source code is only returned for objects written in Python. Many + functions and classes are defined in C and will therefore not return + useful information. + + Parameters + ---------- + object : numpy object + Input object. This can be any object (function, class, module, + ...). + output : file object, optional + If `output` not supplied then source code is printed to screen + (sys.stdout). File object must be created with either write 'w' or + append 'a' modes. + + See Also + -------- + lookfor, info + + Examples + -------- + >>> np.source(np.interp) #doctest: +SKIP + In file: /usr/lib/python2.6/dist-packages/numpy/lib/function_base.py + def interp(x, xp, fp, left=None, right=None): + \"\"\".... (full docstring printed)\"\"\" + if isinstance(x, (float, int, number)): + return compiled_interp([x], xp, fp, left, right).item() + else: + return compiled_interp(x, xp, fp, left, right) + + The source code is only returned for objects written in Python. + + >>> np.source(np.array) #doctest: +SKIP + Not available for this object. + + """ + # Local import to speed up numpy's import time. + import inspect + try: + print("In file: %s\n" % inspect.getsourcefile(object), file=output) + print(inspect.getsource(object), file=output) + except Exception: + print("Not available for this object.", file=output) + + +# Cache for lookfor: {id(module): {name: (docstring, kind, index), ...}...} +# where kind: "func", "class", "module", "object" +# and index: index in breadth-first namespace traversal +_lookfor_caches = {} + +# regexp whose match indicates that the string may contain a function +# signature +_function_signature_re = re.compile(r"[a-z0-9_]+\(.*[,=].*\)", re.I) + + +@set_module('numpy') +def lookfor(what, module=None, import_modules=True, regenerate=False, + output=None): + """ + Do a keyword search on docstrings. + + A list of objects that matched the search is displayed, + sorted by relevance. All given keywords need to be found in the + docstring for it to be returned as a result, but the order does + not matter. + + Parameters + ---------- + what : str + String containing words to look for. + module : str or list, optional + Name of module(s) whose docstrings to go through. + import_modules : bool, optional + Whether to import sub-modules in packages. Default is True. + regenerate : bool, optional + Whether to re-generate the docstring cache. Default is False. + output : file-like, optional + File-like object to write the output to. If omitted, use a pager. + + See Also + -------- + source, info + + Notes + ----- + Relevance is determined only roughly, by checking if the keywords occur + in the function name, at the start of a docstring, etc. + + Examples + -------- + >>> np.lookfor('binary representation') # doctest: +SKIP + Search results for 'binary representation' + ------------------------------------------ + numpy.binary_repr + Return the binary representation of the input number as a string. + numpy.core.setup_common.long_double_representation + Given a binary dump as given by GNU od -b, look for long double + numpy.base_repr + Return a string representation of a number in the given base system. + ... + + """ + import pydoc + + # Cache + cache = _lookfor_generate_cache(module, import_modules, regenerate) + + # Search + # XXX: maybe using a real stemming search engine would be better? + found = [] + whats = str(what).lower().split() + if not whats: + return + + for name, (docstring, kind, index) in cache.items(): + if kind in ('module', 'object'): + # don't show modules or objects + continue + doc = docstring.lower() + if all(w in doc for w in whats): + found.append(name) + + # Relevance sort + # XXX: this is full Harrison-Stetson heuristics now, + # XXX: it probably could be improved + + kind_relevance = {'func': 1000, 'class': 1000, + 'module': -1000, 'object': -1000} + + def relevance(name, docstr, kind, index): + r = 0 + # do the keywords occur within the start of the docstring? + first_doc = "\n".join(docstr.lower().strip().split("\n")[:3]) + r += sum([200 for w in whats if w in first_doc]) + # do the keywords occur in the function name? + r += sum([30 for w in whats if w in name]) + # is the full name long? + r += -len(name) * 5 + # is the object of bad type? + r += kind_relevance.get(kind, -1000) + # is the object deep in namespace hierarchy? + r += -name.count('.') * 10 + r += max(-index / 100, -100) + return r + + def relevance_value(a): + return relevance(a, *cache[a]) + found.sort(key=relevance_value) + + # Pretty-print + s = "Search results for '%s'" % (' '.join(whats)) + help_text = [s, "-"*len(s)] + for name in found[::-1]: + doc, kind, ix = cache[name] + + doclines = [line.strip() for line in doc.strip().split("\n") + if line.strip()] + + # find a suitable short description + try: + first_doc = doclines[0].strip() + if _function_signature_re.search(first_doc): + first_doc = doclines[1].strip() + except IndexError: + first_doc = "" + help_text.append("%s\n %s" % (name, first_doc)) + + if not found: + help_text.append("Nothing found.") + + # Output + if output is not None: + output.write("\n".join(help_text)) + elif len(help_text) > 10: + pager = pydoc.getpager() + pager("\n".join(help_text)) + else: + print("\n".join(help_text)) + +def _lookfor_generate_cache(module, import_modules, regenerate): + """ + Generate docstring cache for given module. + + Parameters + ---------- + module : str, None, module + Module for which to generate docstring cache + import_modules : bool + Whether to import sub-modules in packages. + regenerate : bool + Re-generate the docstring cache + + Returns + ------- + cache : dict {obj_full_name: (docstring, kind, index), ...} + Docstring cache for the module, either cached one (regenerate=False) + or newly generated. + + """ + # Local import to speed up numpy's import time. + import inspect + + from io import StringIO + + if module is None: + module = "numpy" + + if isinstance(module, str): + try: + __import__(module) + except ImportError: + return {} + module = sys.modules[module] + elif isinstance(module, list) or isinstance(module, tuple): + cache = {} + for mod in module: + cache.update(_lookfor_generate_cache(mod, import_modules, + regenerate)) + return cache + + if id(module) in _lookfor_caches and not regenerate: + return _lookfor_caches[id(module)] + + # walk items and collect docstrings + cache = {} + _lookfor_caches[id(module)] = cache + seen = {} + index = 0 + stack = [(module.__name__, module)] + while stack: + name, item = stack.pop(0) + if id(item) in seen: + continue + seen[id(item)] = True + + index += 1 + kind = "object" + + if inspect.ismodule(item): + kind = "module" + try: + _all = item.__all__ + except AttributeError: + _all = None + + # import sub-packages + if import_modules and hasattr(item, '__path__'): + for pth in item.__path__: + for mod_path in os.listdir(pth): + this_py = os.path.join(pth, mod_path) + init_py = os.path.join(pth, mod_path, '__init__.py') + if (os.path.isfile(this_py) and + mod_path.endswith('.py')): + to_import = mod_path[:-3] + elif os.path.isfile(init_py): + to_import = mod_path + else: + continue + if to_import == '__init__': + continue + + try: + old_stdout = sys.stdout + old_stderr = sys.stderr + try: + sys.stdout = StringIO() + sys.stderr = StringIO() + __import__("%s.%s" % (name, to_import)) + finally: + sys.stdout = old_stdout + sys.stderr = old_stderr + # Catch SystemExit, too + except (Exception, SystemExit): + continue + + for n, v in _getmembers(item): + try: + item_name = getattr(v, '__name__', "%s.%s" % (name, n)) + mod_name = getattr(v, '__module__', None) + except NameError: + # ref. SWIG's global cvars + # NameError: Unknown C global variable + item_name = "%s.%s" % (name, n) + mod_name = None + if '.' not in item_name and mod_name: + item_name = "%s.%s" % (mod_name, item_name) + + if not item_name.startswith(name + '.'): + # don't crawl "foreign" objects + if isinstance(v, ufunc): + # ... unless they are ufuncs + pass + else: + continue + elif not (inspect.ismodule(v) or _all is None or n in _all): + continue + stack.append(("%s.%s" % (name, n), v)) + elif inspect.isclass(item): + kind = "class" + for n, v in _getmembers(item): + stack.append(("%s.%s" % (name, n), v)) + elif hasattr(item, "__call__"): + kind = "func" + + try: + doc = inspect.getdoc(item) + except NameError: + # ref SWIG's NameError: Unknown C global variable + doc = None + if doc is not None: + cache[name] = (doc, kind, index) + + return cache + +def _getmembers(item): + import inspect + try: + members = inspect.getmembers(item) + except Exception: + members = [(x, getattr(item, x)) for x in dir(item) + if hasattr(item, x)] + return members + + +def safe_eval(source): + """ + Protected string evaluation. + + Evaluate a string containing a Python literal expression without + allowing the execution of arbitrary non-literal code. + + Parameters + ---------- + source : str + The string to evaluate. + + Returns + ------- + obj : object + The result of evaluating `source`. + + Raises + ------ + SyntaxError + If the code has invalid Python syntax, or if it contains + non-literal code. + + Examples + -------- + >>> np.safe_eval('1') + 1 + >>> np.safe_eval('[1, 2, 3]') + [1, 2, 3] + >>> np.safe_eval('{"foo": ("bar", 10.0)}') + {'foo': ('bar', 10.0)} + + >>> np.safe_eval('import os') + Traceback (most recent call last): + ... + SyntaxError: invalid syntax + + >>> np.safe_eval('open("/home/user/.ssh/id_dsa").read()') + Traceback (most recent call last): + ... + ValueError: malformed node or string: <_ast.Call object at 0x...> + + """ + # Local import to speed up numpy's import time. + import ast + return ast.literal_eval(source) + + +def _median_nancheck(data, result, axis): + """ + Utility function to check median result from data for NaN values at the end + and return NaN in that case. Input result can also be a MaskedArray. + + Parameters + ---------- + data : array + Sorted input data to median function + result : Array or MaskedArray + Result of median function. + axis : int + Axis along which the median was computed. + + Returns + ------- + result : scalar or ndarray + Median or NaN in axes which contained NaN in the input. If the input + was an array, NaN will be inserted in-place. If a scalar, either the + input itself or a scalar NaN. + """ + if data.size == 0: + return result + n = np.isnan(data.take(-1, axis=axis)) + # masked NaN values are ok + if np.ma.isMaskedArray(n): + n = n.filled(False) + if np.count_nonzero(n.ravel()) > 0: + # Without given output, it is possible that the current result is a + # numpy scalar, which is not writeable. If so, just return nan. + if isinstance(result, np.generic): + return data.dtype.type(np.nan) + + result[n] = np.nan + return result + +def _opt_info(): + """ + Returns a string contains the supported CPU features by the current build. + + The string format can be explained as follows: + - dispatched features that are supported by the running machine + end with `*`. + - dispatched features that are "not" supported by the running machine + end with `?`. + - remained features are representing the baseline. + """ + from numpy.core._multiarray_umath import ( + __cpu_features__, __cpu_baseline__, __cpu_dispatch__ + ) + + if len(__cpu_baseline__) == 0 and len(__cpu_dispatch__) == 0: + return '' + + enabled_features = ' '.join(__cpu_baseline__) + for feature in __cpu_dispatch__: + if __cpu_features__[feature]: + enabled_features += f" {feature}*" + else: + enabled_features += f" {feature}?" + + return enabled_features +#----------------------------------------------------------------------------- diff --git a/.local/lib/python3.8/site-packages/numpy/lib/utils.pyi b/.local/lib/python3.8/site-packages/numpy/lib/utils.pyi new file mode 100644 index 0000000..f0a8797 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/lib/utils.pyi @@ -0,0 +1,95 @@ +from ast import AST +from typing import ( + Any, + Callable, + List, + Mapping, + Optional, + overload, + Sequence, + Tuple, + TypeVar, + Union, + Protocol, +) + +from numpy import ndarray, generic + +from numpy.core.numerictypes import ( + issubclass_ as issubclass_, + issubdtype as issubdtype, + issubsctype as issubsctype, +) + +_T_contra = TypeVar("_T_contra", contravariant=True) +_FuncType = TypeVar("_FuncType", bound=Callable[..., Any]) + +# A file-like object opened in `w` mode +class _SupportsWrite(Protocol[_T_contra]): + def write(self, s: _T_contra, /) -> Any: ... + +__all__: List[str] + +class _Deprecate: + old_name: Optional[str] + new_name: Optional[str] + message: Optional[str] + def __init__( + self, + old_name: Optional[str] = ..., + new_name: Optional[str] = ..., + message: Optional[str] = ..., + ) -> None: ... + # NOTE: `__call__` can in principle take arbitrary `*args` and `**kwargs`, + # even though they aren't used for anything + def __call__(self, func: _FuncType) -> _FuncType: ... + +def get_include() -> str: ... + +@overload +def deprecate( + *, + old_name: Optional[str] = ..., + new_name: Optional[str] = ..., + message: Optional[str] = ..., +) -> _Deprecate: ... +@overload +def deprecate( + func: _FuncType, + /, + old_name: Optional[str] = ..., + new_name: Optional[str] = ..., + message: Optional[str] = ..., +) -> _FuncType: ... + +def deprecate_with_doc(msg: Optional[str]) -> _Deprecate: ... + +# NOTE: In practice `byte_bounds` can (potentially) take any object +# implementing the `__array_interface__` protocol. The caveat is +# that certain keys, marked as optional in the spec, must be present for +# `byte_bounds`. This concerns `"strides"` and `"data"`. +def byte_bounds(a: Union[generic, ndarray[Any, Any]]) -> Tuple[int, int]: ... + +def who(vardict: Optional[Mapping[str, ndarray[Any, Any]]] = ...) -> None: ... + +def info( + object: object = ..., + maxwidth: int = ..., + output: Optional[_SupportsWrite[str]] = ..., + toplevel: str = ..., +) -> None: ... + +def source( + object: object, + output: Optional[_SupportsWrite[str]] = ..., +) -> None: ... + +def lookfor( + what: str, + module: Union[None, str, Sequence[str]] = ..., + import_modules: bool = ..., + regenerate: bool = ..., + output: Optional[_SupportsWrite[str]] =..., +) -> None: ... + +def safe_eval(source: Union[str, AST]) -> Any: ... diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/__init__.py b/.local/lib/python3.8/site-packages/numpy/linalg/__init__.py new file mode 100644 index 0000000..93943de --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/linalg/__init__.py @@ -0,0 +1,80 @@ +""" +``numpy.linalg`` +================ + +The NumPy linear algebra functions rely on BLAS and LAPACK to provide efficient +low level implementations of standard linear algebra algorithms. Those +libraries may be provided by NumPy itself using C versions of a subset of their +reference implementations but, when possible, highly optimized libraries that +take advantage of specialized processor functionality are preferred. Examples +of such libraries are OpenBLAS, MKL (TM), and ATLAS. Because those libraries +are multithreaded and processor dependent, environmental variables and external +packages such as threadpoolctl may be needed to control the number of threads +or specify the processor architecture. + +- OpenBLAS: https://www.openblas.net/ +- threadpoolctl: https://github.com/joblib/threadpoolctl + +Please note that the most-used linear algebra functions in NumPy are present in +the main ``numpy`` namespace rather than in ``numpy.linalg``. There are: +``dot``, ``vdot``, ``inner``, ``outer``, ``matmul``, ``tensordot``, ``einsum``, +``einsum_path`` and ``kron``. + +Functions present in numpy.linalg are listed below. + + +Matrix and vector products +-------------------------- + + multi_dot + matrix_power + +Decompositions +-------------- + + cholesky + qr + svd + +Matrix eigenvalues +------------------ + + eig + eigh + eigvals + eigvalsh + +Norms and other numbers +----------------------- + + norm + cond + det + matrix_rank + slogdet + +Solving equations and inverting matrices +---------------------------------------- + + solve + tensorsolve + lstsq + inv + pinv + tensorinv + +Exceptions +---------- + + LinAlgError + +""" +# To get sub-modules +from . import linalg +from .linalg import * + +__all__ = linalg.__all__.copy() + +from numpy._pytesttester import PytestTester +test = PytestTester(__name__) +del PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/__init__.pyi b/.local/lib/python3.8/site-packages/numpy/linalg/__init__.pyi new file mode 100644 index 0000000..d457f15 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/linalg/__init__.pyi @@ -0,0 +1,32 @@ +from typing import Any, List + +from numpy.linalg.linalg import ( + matrix_power as matrix_power, + solve as solve, + tensorsolve as tensorsolve, + tensorinv as tensorinv, + inv as inv, + cholesky as cholesky, + eigvals as eigvals, + eigvalsh as eigvalsh, + pinv as pinv, + slogdet as slogdet, + det as det, + svd as svd, + eig as eig, + eigh as eigh, + lstsq as lstsq, + norm as norm, + qr as qr, + cond as cond, + matrix_rank as matrix_rank, + multi_dot as multi_dot, +) + +from numpy._pytesttester import PytestTester + +__all__: List[str] +__path__: List[str] +test: PytestTester + +class LinAlgError(Exception): ... diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/linalg/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..cd1d448 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/linalg/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/__pycache__/linalg.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/linalg/__pycache__/linalg.cpython-38.pyc new file mode 100644 index 0000000..707111b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/linalg/__pycache__/linalg.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/linalg/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..362ef66 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/linalg/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/_umath_linalg.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/linalg/_umath_linalg.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..c09f47d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/linalg/_umath_linalg.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/lapack_lite.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/linalg/lapack_lite.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..183789b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/linalg/lapack_lite.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/linalg.py b/.local/lib/python3.8/site-packages/numpy/linalg/linalg.py new file mode 100644 index 0000000..0c27e06 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/linalg/linalg.py @@ -0,0 +1,2800 @@ +"""Lite version of scipy.linalg. + +Notes +----- +This module is a lite version of the linalg.py module in SciPy which +contains high-level Python interface to the LAPACK library. The lite +version only accesses the following LAPACK functions: dgesv, zgesv, +dgeev, zgeev, dgesdd, zgesdd, dgelsd, zgelsd, dsyevd, zheevd, dgetrf, +zgetrf, dpotrf, zpotrf, dgeqrf, zgeqrf, zungqr, dorgqr. +""" + +__all__ = ['matrix_power', 'solve', 'tensorsolve', 'tensorinv', 'inv', + 'cholesky', 'eigvals', 'eigvalsh', 'pinv', 'slogdet', 'det', + 'svd', 'eig', 'eigh', 'lstsq', 'norm', 'qr', 'cond', 'matrix_rank', + 'LinAlgError', 'multi_dot'] + +import functools +import operator +import warnings + +from numpy.core import ( + array, asarray, zeros, empty, empty_like, intc, single, double, + csingle, cdouble, inexact, complexfloating, newaxis, all, Inf, dot, + add, multiply, sqrt, fastCopyAndTranspose, sum, isfinite, + finfo, errstate, geterrobj, moveaxis, amin, amax, product, abs, + atleast_2d, intp, asanyarray, object_, matmul, + swapaxes, divide, count_nonzero, isnan, sign, argsort, sort +) +from numpy.core.multiarray import normalize_axis_index +from numpy.core.overrides import set_module +from numpy.core import overrides +from numpy.lib.twodim_base import triu, eye +from numpy.linalg import _umath_linalg + + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy.linalg') + + +fortran_int = intc + + +@set_module('numpy.linalg') +class LinAlgError(Exception): + """ + Generic Python-exception-derived object raised by linalg functions. + + General purpose exception class, derived from Python's exception.Exception + class, programmatically raised in linalg functions when a Linear + Algebra-related condition would prevent further correct execution of the + function. + + Parameters + ---------- + None + + Examples + -------- + >>> from numpy import linalg as LA + >>> LA.inv(np.zeros((2,2))) + Traceback (most recent call last): + File "", line 1, in + File "...linalg.py", line 350, + in inv return wrap(solve(a, identity(a.shape[0], dtype=a.dtype))) + File "...linalg.py", line 249, + in solve + raise LinAlgError('Singular matrix') + numpy.linalg.LinAlgError: Singular matrix + + """ + + +def _determine_error_states(): + errobj = geterrobj() + bufsize = errobj[0] + + with errstate(invalid='call', over='ignore', + divide='ignore', under='ignore'): + invalid_call_errmask = geterrobj()[1] + + return [bufsize, invalid_call_errmask, None] + +# Dealing with errors in _umath_linalg +_linalg_error_extobj = _determine_error_states() +del _determine_error_states + +def _raise_linalgerror_singular(err, flag): + raise LinAlgError("Singular matrix") + +def _raise_linalgerror_nonposdef(err, flag): + raise LinAlgError("Matrix is not positive definite") + +def _raise_linalgerror_eigenvalues_nonconvergence(err, flag): + raise LinAlgError("Eigenvalues did not converge") + +def _raise_linalgerror_svd_nonconvergence(err, flag): + raise LinAlgError("SVD did not converge") + +def _raise_linalgerror_lstsq(err, flag): + raise LinAlgError("SVD did not converge in Linear Least Squares") + +def _raise_linalgerror_qr(err, flag): + raise LinAlgError("Incorrect argument found while performing " + "QR factorization") + +def get_linalg_error_extobj(callback): + extobj = list(_linalg_error_extobj) # make a copy + extobj[2] = callback + return extobj + +def _makearray(a): + new = asarray(a) + wrap = getattr(a, "__array_prepare__", new.__array_wrap__) + return new, wrap + +def isComplexType(t): + return issubclass(t, complexfloating) + +_real_types_map = {single : single, + double : double, + csingle : single, + cdouble : double} + +_complex_types_map = {single : csingle, + double : cdouble, + csingle : csingle, + cdouble : cdouble} + +def _realType(t, default=double): + return _real_types_map.get(t, default) + +def _complexType(t, default=cdouble): + return _complex_types_map.get(t, default) + +def _commonType(*arrays): + # in lite version, use higher precision (always double or cdouble) + result_type = single + is_complex = False + for a in arrays: + if issubclass(a.dtype.type, inexact): + if isComplexType(a.dtype.type): + is_complex = True + rt = _realType(a.dtype.type, default=None) + if rt is None: + # unsupported inexact scalar + raise TypeError("array type %s is unsupported in linalg" % + (a.dtype.name,)) + else: + rt = double + if rt is double: + result_type = double + if is_complex: + t = cdouble + result_type = _complex_types_map[result_type] + else: + t = double + return t, result_type + + +# _fastCopyAndTranpose assumes the input is 2D (as all the calls in here are). + +_fastCT = fastCopyAndTranspose + +def _to_native_byte_order(*arrays): + ret = [] + for arr in arrays: + if arr.dtype.byteorder not in ('=', '|'): + ret.append(asarray(arr, dtype=arr.dtype.newbyteorder('='))) + else: + ret.append(arr) + if len(ret) == 1: + return ret[0] + else: + return ret + +def _fastCopyAndTranspose(type, *arrays): + cast_arrays = () + for a in arrays: + if a.dtype.type is not type: + a = a.astype(type) + cast_arrays = cast_arrays + (_fastCT(a),) + if len(cast_arrays) == 1: + return cast_arrays[0] + else: + return cast_arrays + +def _assert_2d(*arrays): + for a in arrays: + if a.ndim != 2: + raise LinAlgError('%d-dimensional array given. Array must be ' + 'two-dimensional' % a.ndim) + +def _assert_stacked_2d(*arrays): + for a in arrays: + if a.ndim < 2: + raise LinAlgError('%d-dimensional array given. Array must be ' + 'at least two-dimensional' % a.ndim) + +def _assert_stacked_square(*arrays): + for a in arrays: + m, n = a.shape[-2:] + if m != n: + raise LinAlgError('Last 2 dimensions of the array must be square') + +def _assert_finite(*arrays): + for a in arrays: + if not isfinite(a).all(): + raise LinAlgError("Array must not contain infs or NaNs") + +def _is_empty_2d(arr): + # check size first for efficiency + return arr.size == 0 and product(arr.shape[-2:]) == 0 + + +def transpose(a): + """ + Transpose each matrix in a stack of matrices. + + Unlike np.transpose, this only swaps the last two axes, rather than all of + them + + Parameters + ---------- + a : (...,M,N) array_like + + Returns + ------- + aT : (...,N,M) ndarray + """ + return swapaxes(a, -1, -2) + +# Linear equations + +def _tensorsolve_dispatcher(a, b, axes=None): + return (a, b) + + +@array_function_dispatch(_tensorsolve_dispatcher) +def tensorsolve(a, b, axes=None): + """ + Solve the tensor equation ``a x = b`` for x. + + It is assumed that all indices of `x` are summed over in the product, + together with the rightmost indices of `a`, as is done in, for example, + ``tensordot(a, x, axes=b.ndim)``. + + Parameters + ---------- + a : array_like + Coefficient tensor, of shape ``b.shape + Q``. `Q`, a tuple, equals + the shape of that sub-tensor of `a` consisting of the appropriate + number of its rightmost indices, and must be such that + ``prod(Q) == prod(b.shape)`` (in which sense `a` is said to be + 'square'). + b : array_like + Right-hand tensor, which can be of any shape. + axes : tuple of ints, optional + Axes in `a` to reorder to the right, before inversion. + If None (default), no reordering is done. + + Returns + ------- + x : ndarray, shape Q + + Raises + ------ + LinAlgError + If `a` is singular or not 'square' (in the above sense). + + See Also + -------- + numpy.tensordot, tensorinv, numpy.einsum + + Examples + -------- + >>> a = np.eye(2*3*4) + >>> a.shape = (2*3, 4, 2, 3, 4) + >>> b = np.random.randn(2*3, 4) + >>> x = np.linalg.tensorsolve(a, b) + >>> x.shape + (2, 3, 4) + >>> np.allclose(np.tensordot(a, x, axes=3), b) + True + + """ + a, wrap = _makearray(a) + b = asarray(b) + an = a.ndim + + if axes is not None: + allaxes = list(range(0, an)) + for k in axes: + allaxes.remove(k) + allaxes.insert(an, k) + a = a.transpose(allaxes) + + oldshape = a.shape[-(an-b.ndim):] + prod = 1 + for k in oldshape: + prod *= k + + a = a.reshape(-1, prod) + b = b.ravel() + res = wrap(solve(a, b)) + res.shape = oldshape + return res + + +def _solve_dispatcher(a, b): + return (a, b) + + +@array_function_dispatch(_solve_dispatcher) +def solve(a, b): + """ + Solve a linear matrix equation, or system of linear scalar equations. + + Computes the "exact" solution, `x`, of the well-determined, i.e., full + rank, linear matrix equation `ax = b`. + + Parameters + ---------- + a : (..., M, M) array_like + Coefficient matrix. + b : {(..., M,), (..., M, K)}, array_like + Ordinate or "dependent variable" values. + + Returns + ------- + x : {(..., M,), (..., M, K)} ndarray + Solution to the system a x = b. Returned shape is identical to `b`. + + Raises + ------ + LinAlgError + If `a` is singular or not square. + + See Also + -------- + scipy.linalg.solve : Similar function in SciPy. + + Notes + ----- + + .. versionadded:: 1.8.0 + + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + The solutions are computed using LAPACK routine ``_gesv``. + + `a` must be square and of full-rank, i.e., all rows (or, equivalently, + columns) must be linearly independent; if either is not true, use + `lstsq` for the least-squares best "solution" of the + system/equation. + + References + ---------- + .. [1] G. Strang, *Linear Algebra and Its Applications*, 2nd Ed., Orlando, + FL, Academic Press, Inc., 1980, pg. 22. + + Examples + -------- + Solve the system of equations ``x0 + 2 * x1 = 1`` and ``3 * x0 + 5 * x1 = 2``: + + >>> a = np.array([[1, 2], [3, 5]]) + >>> b = np.array([1, 2]) + >>> x = np.linalg.solve(a, b) + >>> x + array([-1., 1.]) + + Check that the solution is correct: + + >>> np.allclose(np.dot(a, x), b) + True + + """ + a, _ = _makearray(a) + _assert_stacked_2d(a) + _assert_stacked_square(a) + b, wrap = _makearray(b) + t, result_t = _commonType(a, b) + + # We use the b = (..., M,) logic, only if the number of extra dimensions + # match exactly + if b.ndim == a.ndim - 1: + gufunc = _umath_linalg.solve1 + else: + gufunc = _umath_linalg.solve + + signature = 'DD->D' if isComplexType(t) else 'dd->d' + extobj = get_linalg_error_extobj(_raise_linalgerror_singular) + r = gufunc(a, b, signature=signature, extobj=extobj) + + return wrap(r.astype(result_t, copy=False)) + + +def _tensorinv_dispatcher(a, ind=None): + return (a,) + + +@array_function_dispatch(_tensorinv_dispatcher) +def tensorinv(a, ind=2): + """ + Compute the 'inverse' of an N-dimensional array. + + The result is an inverse for `a` relative to the tensordot operation + ``tensordot(a, b, ind)``, i. e., up to floating-point accuracy, + ``tensordot(tensorinv(a), a, ind)`` is the "identity" tensor for the + tensordot operation. + + Parameters + ---------- + a : array_like + Tensor to 'invert'. Its shape must be 'square', i. e., + ``prod(a.shape[:ind]) == prod(a.shape[ind:])``. + ind : int, optional + Number of first indices that are involved in the inverse sum. + Must be a positive integer, default is 2. + + Returns + ------- + b : ndarray + `a`'s tensordot inverse, shape ``a.shape[ind:] + a.shape[:ind]``. + + Raises + ------ + LinAlgError + If `a` is singular or not 'square' (in the above sense). + + See Also + -------- + numpy.tensordot, tensorsolve + + Examples + -------- + >>> a = np.eye(4*6) + >>> a.shape = (4, 6, 8, 3) + >>> ainv = np.linalg.tensorinv(a, ind=2) + >>> ainv.shape + (8, 3, 4, 6) + >>> b = np.random.randn(4, 6) + >>> np.allclose(np.tensordot(ainv, b), np.linalg.tensorsolve(a, b)) + True + + >>> a = np.eye(4*6) + >>> a.shape = (24, 8, 3) + >>> ainv = np.linalg.tensorinv(a, ind=1) + >>> ainv.shape + (8, 3, 24) + >>> b = np.random.randn(24) + >>> np.allclose(np.tensordot(ainv, b, 1), np.linalg.tensorsolve(a, b)) + True + + """ + a = asarray(a) + oldshape = a.shape + prod = 1 + if ind > 0: + invshape = oldshape[ind:] + oldshape[:ind] + for k in oldshape[ind:]: + prod *= k + else: + raise ValueError("Invalid ind argument.") + a = a.reshape(prod, -1) + ia = inv(a) + return ia.reshape(*invshape) + + +# Matrix inversion + +def _unary_dispatcher(a): + return (a,) + + +@array_function_dispatch(_unary_dispatcher) +def inv(a): + """ + Compute the (multiplicative) inverse of a matrix. + + Given a square matrix `a`, return the matrix `ainv` satisfying + ``dot(a, ainv) = dot(ainv, a) = eye(a.shape[0])``. + + Parameters + ---------- + a : (..., M, M) array_like + Matrix to be inverted. + + Returns + ------- + ainv : (..., M, M) ndarray or matrix + (Multiplicative) inverse of the matrix `a`. + + Raises + ------ + LinAlgError + If `a` is not square or inversion fails. + + See Also + -------- + scipy.linalg.inv : Similar function in SciPy. + + Notes + ----- + + .. versionadded:: 1.8.0 + + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + Examples + -------- + >>> from numpy.linalg import inv + >>> a = np.array([[1., 2.], [3., 4.]]) + >>> ainv = inv(a) + >>> np.allclose(np.dot(a, ainv), np.eye(2)) + True + >>> np.allclose(np.dot(ainv, a), np.eye(2)) + True + + If a is a matrix object, then the return value is a matrix as well: + + >>> ainv = inv(np.matrix(a)) + >>> ainv + matrix([[-2. , 1. ], + [ 1.5, -0.5]]) + + Inverses of several matrices can be computed at once: + + >>> a = np.array([[[1., 2.], [3., 4.]], [[1, 3], [3, 5]]]) + >>> inv(a) + array([[[-2. , 1. ], + [ 1.5 , -0.5 ]], + [[-1.25, 0.75], + [ 0.75, -0.25]]]) + + """ + a, wrap = _makearray(a) + _assert_stacked_2d(a) + _assert_stacked_square(a) + t, result_t = _commonType(a) + + signature = 'D->D' if isComplexType(t) else 'd->d' + extobj = get_linalg_error_extobj(_raise_linalgerror_singular) + ainv = _umath_linalg.inv(a, signature=signature, extobj=extobj) + return wrap(ainv.astype(result_t, copy=False)) + + +def _matrix_power_dispatcher(a, n): + return (a,) + + +@array_function_dispatch(_matrix_power_dispatcher) +def matrix_power(a, n): + """ + Raise a square matrix to the (integer) power `n`. + + For positive integers `n`, the power is computed by repeated matrix + squarings and matrix multiplications. If ``n == 0``, the identity matrix + of the same shape as M is returned. If ``n < 0``, the inverse + is computed and then raised to the ``abs(n)``. + + .. note:: Stacks of object matrices are not currently supported. + + Parameters + ---------- + a : (..., M, M) array_like + Matrix to be "powered". + n : int + The exponent can be any integer or long integer, positive, + negative, or zero. + + Returns + ------- + a**n : (..., M, M) ndarray or matrix object + The return value is the same shape and type as `M`; + if the exponent is positive or zero then the type of the + elements is the same as those of `M`. If the exponent is + negative the elements are floating-point. + + Raises + ------ + LinAlgError + For matrices that are not square or that (for negative powers) cannot + be inverted numerically. + + Examples + -------- + >>> from numpy.linalg import matrix_power + >>> i = np.array([[0, 1], [-1, 0]]) # matrix equiv. of the imaginary unit + >>> matrix_power(i, 3) # should = -i + array([[ 0, -1], + [ 1, 0]]) + >>> matrix_power(i, 0) + array([[1, 0], + [0, 1]]) + >>> matrix_power(i, -3) # should = 1/(-i) = i, but w/ f.p. elements + array([[ 0., 1.], + [-1., 0.]]) + + Somewhat more sophisticated example + + >>> q = np.zeros((4, 4)) + >>> q[0:2, 0:2] = -i + >>> q[2:4, 2:4] = i + >>> q # one of the three quaternion units not equal to 1 + array([[ 0., -1., 0., 0.], + [ 1., 0., 0., 0.], + [ 0., 0., 0., 1.], + [ 0., 0., -1., 0.]]) + >>> matrix_power(q, 2) # = -np.eye(4) + array([[-1., 0., 0., 0.], + [ 0., -1., 0., 0.], + [ 0., 0., -1., 0.], + [ 0., 0., 0., -1.]]) + + """ + a = asanyarray(a) + _assert_stacked_2d(a) + _assert_stacked_square(a) + + try: + n = operator.index(n) + except TypeError as e: + raise TypeError("exponent must be an integer") from e + + # Fall back on dot for object arrays. Object arrays are not supported by + # the current implementation of matmul using einsum + if a.dtype != object: + fmatmul = matmul + elif a.ndim == 2: + fmatmul = dot + else: + raise NotImplementedError( + "matrix_power not supported for stacks of object arrays") + + if n == 0: + a = empty_like(a) + a[...] = eye(a.shape[-2], dtype=a.dtype) + return a + + elif n < 0: + a = inv(a) + n = abs(n) + + # short-cuts. + if n == 1: + return a + + elif n == 2: + return fmatmul(a, a) + + elif n == 3: + return fmatmul(fmatmul(a, a), a) + + # Use binary decomposition to reduce the number of matrix multiplications. + # Here, we iterate over the bits of n, from LSB to MSB, raise `a` to + # increasing powers of 2, and multiply into the result as needed. + z = result = None + while n > 0: + z = a if z is None else fmatmul(z, z) + n, bit = divmod(n, 2) + if bit: + result = z if result is None else fmatmul(result, z) + + return result + + +# Cholesky decomposition + + +@array_function_dispatch(_unary_dispatcher) +def cholesky(a): + """ + Cholesky decomposition. + + Return the Cholesky decomposition, `L * L.H`, of the square matrix `a`, + where `L` is lower-triangular and .H is the conjugate transpose operator + (which is the ordinary transpose if `a` is real-valued). `a` must be + Hermitian (symmetric if real-valued) and positive-definite. No + checking is performed to verify whether `a` is Hermitian or not. + In addition, only the lower-triangular and diagonal elements of `a` + are used. Only `L` is actually returned. + + Parameters + ---------- + a : (..., M, M) array_like + Hermitian (symmetric if all elements are real), positive-definite + input matrix. + + Returns + ------- + L : (..., M, M) array_like + Upper or lower-triangular Cholesky factor of `a`. Returns a + matrix object if `a` is a matrix object. + + Raises + ------ + LinAlgError + If the decomposition fails, for example, if `a` is not + positive-definite. + + See Also + -------- + scipy.linalg.cholesky : Similar function in SciPy. + scipy.linalg.cholesky_banded : Cholesky decompose a banded Hermitian + positive-definite matrix. + scipy.linalg.cho_factor : Cholesky decomposition of a matrix, to use in + `scipy.linalg.cho_solve`. + + Notes + ----- + + .. versionadded:: 1.8.0 + + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + The Cholesky decomposition is often used as a fast way of solving + + .. math:: A \\mathbf{x} = \\mathbf{b} + + (when `A` is both Hermitian/symmetric and positive-definite). + + First, we solve for :math:`\\mathbf{y}` in + + .. math:: L \\mathbf{y} = \\mathbf{b}, + + and then for :math:`\\mathbf{x}` in + + .. math:: L.H \\mathbf{x} = \\mathbf{y}. + + Examples + -------- + >>> A = np.array([[1,-2j],[2j,5]]) + >>> A + array([[ 1.+0.j, -0.-2.j], + [ 0.+2.j, 5.+0.j]]) + >>> L = np.linalg.cholesky(A) + >>> L + array([[1.+0.j, 0.+0.j], + [0.+2.j, 1.+0.j]]) + >>> np.dot(L, L.T.conj()) # verify that L * L.H = A + array([[1.+0.j, 0.-2.j], + [0.+2.j, 5.+0.j]]) + >>> A = [[1,-2j],[2j,5]] # what happens if A is only array_like? + >>> np.linalg.cholesky(A) # an ndarray object is returned + array([[1.+0.j, 0.+0.j], + [0.+2.j, 1.+0.j]]) + >>> # But a matrix object is returned if A is a matrix object + >>> np.linalg.cholesky(np.matrix(A)) + matrix([[ 1.+0.j, 0.+0.j], + [ 0.+2.j, 1.+0.j]]) + + """ + extobj = get_linalg_error_extobj(_raise_linalgerror_nonposdef) + gufunc = _umath_linalg.cholesky_lo + a, wrap = _makearray(a) + _assert_stacked_2d(a) + _assert_stacked_square(a) + t, result_t = _commonType(a) + signature = 'D->D' if isComplexType(t) else 'd->d' + r = gufunc(a, signature=signature, extobj=extobj) + return wrap(r.astype(result_t, copy=False)) + + +# QR decomposition + +def _qr_dispatcher(a, mode=None): + return (a,) + + +@array_function_dispatch(_qr_dispatcher) +def qr(a, mode='reduced'): + """ + Compute the qr factorization of a matrix. + + Factor the matrix `a` as *qr*, where `q` is orthonormal and `r` is + upper-triangular. + + Parameters + ---------- + a : array_like, shape (..., M, N) + An array-like object with the dimensionality of at least 2. + mode : {'reduced', 'complete', 'r', 'raw'}, optional + If K = min(M, N), then + + * 'reduced' : returns q, r with dimensions + (..., M, K), (..., K, N) (default) + * 'complete' : returns q, r with dimensions (..., M, M), (..., M, N) + * 'r' : returns r only with dimensions (..., K, N) + * 'raw' : returns h, tau with dimensions (..., N, M), (..., K,) + + The options 'reduced', 'complete, and 'raw' are new in numpy 1.8, + see the notes for more information. The default is 'reduced', and to + maintain backward compatibility with earlier versions of numpy both + it and the old default 'full' can be omitted. Note that array h + returned in 'raw' mode is transposed for calling Fortran. The + 'economic' mode is deprecated. The modes 'full' and 'economic' may + be passed using only the first letter for backwards compatibility, + but all others must be spelled out. See the Notes for more + explanation. + + + Returns + ------- + q : ndarray of float or complex, optional + A matrix with orthonormal columns. When mode = 'complete' the + result is an orthogonal/unitary matrix depending on whether or not + a is real/complex. The determinant may be either +/- 1 in that + case. In case the number of dimensions in the input array is + greater than 2 then a stack of the matrices with above properties + is returned. + r : ndarray of float or complex, optional + The upper-triangular matrix or a stack of upper-triangular + matrices if the number of dimensions in the input array is greater + than 2. + (h, tau) : ndarrays of np.double or np.cdouble, optional + The array h contains the Householder reflectors that generate q + along with r. The tau array contains scaling factors for the + reflectors. In the deprecated 'economic' mode only h is returned. + + Raises + ------ + LinAlgError + If factoring fails. + + See Also + -------- + scipy.linalg.qr : Similar function in SciPy. + scipy.linalg.rq : Compute RQ decomposition of a matrix. + + Notes + ----- + This is an interface to the LAPACK routines ``dgeqrf``, ``zgeqrf``, + ``dorgqr``, and ``zungqr``. + + For more information on the qr factorization, see for example: + https://en.wikipedia.org/wiki/QR_factorization + + Subclasses of `ndarray` are preserved except for the 'raw' mode. So if + `a` is of type `matrix`, all the return values will be matrices too. + + New 'reduced', 'complete', and 'raw' options for mode were added in + NumPy 1.8.0 and the old option 'full' was made an alias of 'reduced'. In + addition the options 'full' and 'economic' were deprecated. Because + 'full' was the previous default and 'reduced' is the new default, + backward compatibility can be maintained by letting `mode` default. + The 'raw' option was added so that LAPACK routines that can multiply + arrays by q using the Householder reflectors can be used. Note that in + this case the returned arrays are of type np.double or np.cdouble and + the h array is transposed to be FORTRAN compatible. No routines using + the 'raw' return are currently exposed by numpy, but some are available + in lapack_lite and just await the necessary work. + + Examples + -------- + >>> a = np.random.randn(9, 6) + >>> q, r = np.linalg.qr(a) + >>> np.allclose(a, np.dot(q, r)) # a does equal qr + True + >>> r2 = np.linalg.qr(a, mode='r') + >>> np.allclose(r, r2) # mode='r' returns the same r as mode='full' + True + >>> a = np.random.normal(size=(3, 2, 2)) # Stack of 2 x 2 matrices as input + >>> q, r = np.linalg.qr(a) + >>> q.shape + (3, 2, 2) + >>> r.shape + (3, 2, 2) + >>> np.allclose(a, np.matmul(q, r)) + True + + Example illustrating a common use of `qr`: solving of least squares + problems + + What are the least-squares-best `m` and `y0` in ``y = y0 + mx`` for + the following data: {(0,1), (1,0), (1,2), (2,1)}. (Graph the points + and you'll see that it should be y0 = 0, m = 1.) The answer is provided + by solving the over-determined matrix equation ``Ax = b``, where:: + + A = array([[0, 1], [1, 1], [1, 1], [2, 1]]) + x = array([[y0], [m]]) + b = array([[1], [0], [2], [1]]) + + If A = qr such that q is orthonormal (which is always possible via + Gram-Schmidt), then ``x = inv(r) * (q.T) * b``. (In numpy practice, + however, we simply use `lstsq`.) + + >>> A = np.array([[0, 1], [1, 1], [1, 1], [2, 1]]) + >>> A + array([[0, 1], + [1, 1], + [1, 1], + [2, 1]]) + >>> b = np.array([1, 0, 2, 1]) + >>> q, r = np.linalg.qr(A) + >>> p = np.dot(q.T, b) + >>> np.dot(np.linalg.inv(r), p) + array([ 1.1e-16, 1.0e+00]) + + """ + if mode not in ('reduced', 'complete', 'r', 'raw'): + if mode in ('f', 'full'): + # 2013-04-01, 1.8 + msg = "".join(( + "The 'full' option is deprecated in favor of 'reduced'.\n", + "For backward compatibility let mode default.")) + warnings.warn(msg, DeprecationWarning, stacklevel=3) + mode = 'reduced' + elif mode in ('e', 'economic'): + # 2013-04-01, 1.8 + msg = "The 'economic' option is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=3) + mode = 'economic' + else: + raise ValueError(f"Unrecognized mode '{mode}'") + + a, wrap = _makearray(a) + _assert_stacked_2d(a) + m, n = a.shape[-2:] + t, result_t = _commonType(a) + a = a.astype(t, copy=True) + a = _to_native_byte_order(a) + mn = min(m, n) + + if m <= n: + gufunc = _umath_linalg.qr_r_raw_m + else: + gufunc = _umath_linalg.qr_r_raw_n + + signature = 'D->D' if isComplexType(t) else 'd->d' + extobj = get_linalg_error_extobj(_raise_linalgerror_qr) + tau = gufunc(a, signature=signature, extobj=extobj) + + # handle modes that don't return q + if mode == 'r': + r = triu(a[..., :mn, :]) + r = r.astype(result_t, copy=False) + return wrap(r) + + if mode == 'raw': + q = transpose(a) + q = q.astype(result_t, copy=False) + tau = tau.astype(result_t, copy=False) + return wrap(q), tau + + if mode == 'economic': + a = a.astype(result_t, copy=False) + return wrap(a) + + # mc is the number of columns in the resulting q + # matrix. If the mode is complete then it is + # same as number of rows, and if the mode is reduced, + # then it is the minimum of number of rows and columns. + if mode == 'complete' and m > n: + mc = m + gufunc = _umath_linalg.qr_complete + else: + mc = mn + gufunc = _umath_linalg.qr_reduced + + signature = 'DD->D' if isComplexType(t) else 'dd->d' + extobj = get_linalg_error_extobj(_raise_linalgerror_qr) + q = gufunc(a, tau, signature=signature, extobj=extobj) + r = triu(a[..., :mc, :]) + + q = q.astype(result_t, copy=False) + r = r.astype(result_t, copy=False) + + return wrap(q), wrap(r) + +# Eigenvalues + + +@array_function_dispatch(_unary_dispatcher) +def eigvals(a): + """ + Compute the eigenvalues of a general matrix. + + Main difference between `eigvals` and `eig`: the eigenvectors aren't + returned. + + Parameters + ---------- + a : (..., M, M) array_like + A complex- or real-valued matrix whose eigenvalues will be computed. + + Returns + ------- + w : (..., M,) ndarray + The eigenvalues, each repeated according to its multiplicity. + They are not necessarily ordered, nor are they necessarily + real for real matrices. + + Raises + ------ + LinAlgError + If the eigenvalue computation does not converge. + + See Also + -------- + eig : eigenvalues and right eigenvectors of general arrays + eigvalsh : eigenvalues of real symmetric or complex Hermitian + (conjugate symmetric) arrays. + eigh : eigenvalues and eigenvectors of real symmetric or complex + Hermitian (conjugate symmetric) arrays. + scipy.linalg.eigvals : Similar function in SciPy. + + Notes + ----- + + .. versionadded:: 1.8.0 + + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + This is implemented using the ``_geev`` LAPACK routines which compute + the eigenvalues and eigenvectors of general square arrays. + + Examples + -------- + Illustration, using the fact that the eigenvalues of a diagonal matrix + are its diagonal elements, that multiplying a matrix on the left + by an orthogonal matrix, `Q`, and on the right by `Q.T` (the transpose + of `Q`), preserves the eigenvalues of the "middle" matrix. In other words, + if `Q` is orthogonal, then ``Q * A * Q.T`` has the same eigenvalues as + ``A``: + + >>> from numpy import linalg as LA + >>> x = np.random.random() + >>> Q = np.array([[np.cos(x), -np.sin(x)], [np.sin(x), np.cos(x)]]) + >>> LA.norm(Q[0, :]), LA.norm(Q[1, :]), np.dot(Q[0, :],Q[1, :]) + (1.0, 1.0, 0.0) + + Now multiply a diagonal matrix by ``Q`` on one side and by ``Q.T`` on the other: + + >>> D = np.diag((-1,1)) + >>> LA.eigvals(D) + array([-1., 1.]) + >>> A = np.dot(Q, D) + >>> A = np.dot(A, Q.T) + >>> LA.eigvals(A) + array([ 1., -1.]) # random + + """ + a, wrap = _makearray(a) + _assert_stacked_2d(a) + _assert_stacked_square(a) + _assert_finite(a) + t, result_t = _commonType(a) + + extobj = get_linalg_error_extobj( + _raise_linalgerror_eigenvalues_nonconvergence) + signature = 'D->D' if isComplexType(t) else 'd->D' + w = _umath_linalg.eigvals(a, signature=signature, extobj=extobj) + + if not isComplexType(t): + if all(w.imag == 0): + w = w.real + result_t = _realType(result_t) + else: + result_t = _complexType(result_t) + + return w.astype(result_t, copy=False) + + +def _eigvalsh_dispatcher(a, UPLO=None): + return (a,) + + +@array_function_dispatch(_eigvalsh_dispatcher) +def eigvalsh(a, UPLO='L'): + """ + Compute the eigenvalues of a complex Hermitian or real symmetric matrix. + + Main difference from eigh: the eigenvectors are not computed. + + Parameters + ---------- + a : (..., M, M) array_like + A complex- or real-valued matrix whose eigenvalues are to be + computed. + UPLO : {'L', 'U'}, optional + Specifies whether the calculation is done with the lower triangular + part of `a` ('L', default) or the upper triangular part ('U'). + Irrespective of this value only the real parts of the diagonal will + be considered in the computation to preserve the notion of a Hermitian + matrix. It therefore follows that the imaginary part of the diagonal + will always be treated as zero. + + Returns + ------- + w : (..., M,) ndarray + The eigenvalues in ascending order, each repeated according to + its multiplicity. + + Raises + ------ + LinAlgError + If the eigenvalue computation does not converge. + + See Also + -------- + eigh : eigenvalues and eigenvectors of real symmetric or complex Hermitian + (conjugate symmetric) arrays. + eigvals : eigenvalues of general real or complex arrays. + eig : eigenvalues and right eigenvectors of general real or complex + arrays. + scipy.linalg.eigvalsh : Similar function in SciPy. + + Notes + ----- + + .. versionadded:: 1.8.0 + + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + The eigenvalues are computed using LAPACK routines ``_syevd``, ``_heevd``. + + Examples + -------- + >>> from numpy import linalg as LA + >>> a = np.array([[1, -2j], [2j, 5]]) + >>> LA.eigvalsh(a) + array([ 0.17157288, 5.82842712]) # may vary + + >>> # demonstrate the treatment of the imaginary part of the diagonal + >>> a = np.array([[5+2j, 9-2j], [0+2j, 2-1j]]) + >>> a + array([[5.+2.j, 9.-2.j], + [0.+2.j, 2.-1.j]]) + >>> # with UPLO='L' this is numerically equivalent to using LA.eigvals() + >>> # with: + >>> b = np.array([[5.+0.j, 0.-2.j], [0.+2.j, 2.-0.j]]) + >>> b + array([[5.+0.j, 0.-2.j], + [0.+2.j, 2.+0.j]]) + >>> wa = LA.eigvalsh(a) + >>> wb = LA.eigvals(b) + >>> wa; wb + array([1., 6.]) + array([6.+0.j, 1.+0.j]) + + """ + UPLO = UPLO.upper() + if UPLO not in ('L', 'U'): + raise ValueError("UPLO argument must be 'L' or 'U'") + + extobj = get_linalg_error_extobj( + _raise_linalgerror_eigenvalues_nonconvergence) + if UPLO == 'L': + gufunc = _umath_linalg.eigvalsh_lo + else: + gufunc = _umath_linalg.eigvalsh_up + + a, wrap = _makearray(a) + _assert_stacked_2d(a) + _assert_stacked_square(a) + t, result_t = _commonType(a) + signature = 'D->d' if isComplexType(t) else 'd->d' + w = gufunc(a, signature=signature, extobj=extobj) + return w.astype(_realType(result_t), copy=False) + +def _convertarray(a): + t, result_t = _commonType(a) + a = _fastCT(a.astype(t)) + return a, t, result_t + + +# Eigenvectors + + +@array_function_dispatch(_unary_dispatcher) +def eig(a): + """ + Compute the eigenvalues and right eigenvectors of a square array. + + Parameters + ---------- + a : (..., M, M) array + Matrices for which the eigenvalues and right eigenvectors will + be computed + + Returns + ------- + w : (..., M) array + The eigenvalues, each repeated according to its multiplicity. + The eigenvalues are not necessarily ordered. The resulting + array will be of complex type, unless the imaginary part is + zero in which case it will be cast to a real type. When `a` + is real the resulting eigenvalues will be real (0 imaginary + part) or occur in conjugate pairs + + v : (..., M, M) array + The normalized (unit "length") eigenvectors, such that the + column ``v[:,i]`` is the eigenvector corresponding to the + eigenvalue ``w[i]``. + + Raises + ------ + LinAlgError + If the eigenvalue computation does not converge. + + See Also + -------- + eigvals : eigenvalues of a non-symmetric array. + eigh : eigenvalues and eigenvectors of a real symmetric or complex + Hermitian (conjugate symmetric) array. + eigvalsh : eigenvalues of a real symmetric or complex Hermitian + (conjugate symmetric) array. + scipy.linalg.eig : Similar function in SciPy that also solves the + generalized eigenvalue problem. + scipy.linalg.schur : Best choice for unitary and other non-Hermitian + normal matrices. + + Notes + ----- + + .. versionadded:: 1.8.0 + + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + This is implemented using the ``_geev`` LAPACK routines which compute + the eigenvalues and eigenvectors of general square arrays. + + The number `w` is an eigenvalue of `a` if there exists a vector + `v` such that ``a @ v = w * v``. Thus, the arrays `a`, `w`, and + `v` satisfy the equations ``a @ v[:,i] = w[i] * v[:,i]`` + for :math:`i \\in \\{0,...,M-1\\}`. + + The array `v` of eigenvectors may not be of maximum rank, that is, some + of the columns may be linearly dependent, although round-off error may + obscure that fact. If the eigenvalues are all different, then theoretically + the eigenvectors are linearly independent and `a` can be diagonalized by + a similarity transformation using `v`, i.e, ``inv(v) @ a @ v`` is diagonal. + + For non-Hermitian normal matrices the SciPy function `scipy.linalg.schur` + is preferred because the matrix `v` is guaranteed to be unitary, which is + not the case when using `eig`. The Schur factorization produces an + upper triangular matrix rather than a diagonal matrix, but for normal + matrices only the diagonal of the upper triangular matrix is needed, the + rest is roundoff error. + + Finally, it is emphasized that `v` consists of the *right* (as in + right-hand side) eigenvectors of `a`. A vector `y` satisfying + ``y.T @ a = z * y.T`` for some number `z` is called a *left* + eigenvector of `a`, and, in general, the left and right eigenvectors + of a matrix are not necessarily the (perhaps conjugate) transposes + of each other. + + References + ---------- + G. Strang, *Linear Algebra and Its Applications*, 2nd Ed., Orlando, FL, + Academic Press, Inc., 1980, Various pp. + + Examples + -------- + >>> from numpy import linalg as LA + + (Almost) trivial example with real e-values and e-vectors. + + >>> w, v = LA.eig(np.diag((1, 2, 3))) + >>> w; v + array([1., 2., 3.]) + array([[1., 0., 0.], + [0., 1., 0.], + [0., 0., 1.]]) + + Real matrix possessing complex e-values and e-vectors; note that the + e-values are complex conjugates of each other. + + >>> w, v = LA.eig(np.array([[1, -1], [1, 1]])) + >>> w; v + array([1.+1.j, 1.-1.j]) + array([[0.70710678+0.j , 0.70710678-0.j ], + [0. -0.70710678j, 0. +0.70710678j]]) + + Complex-valued matrix with real e-values (but complex-valued e-vectors); + note that ``a.conj().T == a``, i.e., `a` is Hermitian. + + >>> a = np.array([[1, 1j], [-1j, 1]]) + >>> w, v = LA.eig(a) + >>> w; v + array([2.+0.j, 0.+0.j]) + array([[ 0. +0.70710678j, 0.70710678+0.j ], # may vary + [ 0.70710678+0.j , -0. +0.70710678j]]) + + Be careful about round-off error! + + >>> a = np.array([[1 + 1e-9, 0], [0, 1 - 1e-9]]) + >>> # Theor. e-values are 1 +/- 1e-9 + >>> w, v = LA.eig(a) + >>> w; v + array([1., 1.]) + array([[1., 0.], + [0., 1.]]) + + """ + a, wrap = _makearray(a) + _assert_stacked_2d(a) + _assert_stacked_square(a) + _assert_finite(a) + t, result_t = _commonType(a) + + extobj = get_linalg_error_extobj( + _raise_linalgerror_eigenvalues_nonconvergence) + signature = 'D->DD' if isComplexType(t) else 'd->DD' + w, vt = _umath_linalg.eig(a, signature=signature, extobj=extobj) + + if not isComplexType(t) and all(w.imag == 0.0): + w = w.real + vt = vt.real + result_t = _realType(result_t) + else: + result_t = _complexType(result_t) + + vt = vt.astype(result_t, copy=False) + return w.astype(result_t, copy=False), wrap(vt) + + +@array_function_dispatch(_eigvalsh_dispatcher) +def eigh(a, UPLO='L'): + """ + Return the eigenvalues and eigenvectors of a complex Hermitian + (conjugate symmetric) or a real symmetric matrix. + + Returns two objects, a 1-D array containing the eigenvalues of `a`, and + a 2-D square array or matrix (depending on the input type) of the + corresponding eigenvectors (in columns). + + Parameters + ---------- + a : (..., M, M) array + Hermitian or real symmetric matrices whose eigenvalues and + eigenvectors are to be computed. + UPLO : {'L', 'U'}, optional + Specifies whether the calculation is done with the lower triangular + part of `a` ('L', default) or the upper triangular part ('U'). + Irrespective of this value only the real parts of the diagonal will + be considered in the computation to preserve the notion of a Hermitian + matrix. It therefore follows that the imaginary part of the diagonal + will always be treated as zero. + + Returns + ------- + w : (..., M) ndarray + The eigenvalues in ascending order, each repeated according to + its multiplicity. + v : {(..., M, M) ndarray, (..., M, M) matrix} + The column ``v[:, i]`` is the normalized eigenvector corresponding + to the eigenvalue ``w[i]``. Will return a matrix object if `a` is + a matrix object. + + Raises + ------ + LinAlgError + If the eigenvalue computation does not converge. + + See Also + -------- + eigvalsh : eigenvalues of real symmetric or complex Hermitian + (conjugate symmetric) arrays. + eig : eigenvalues and right eigenvectors for non-symmetric arrays. + eigvals : eigenvalues of non-symmetric arrays. + scipy.linalg.eigh : Similar function in SciPy (but also solves the + generalized eigenvalue problem). + + Notes + ----- + + .. versionadded:: 1.8.0 + + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + The eigenvalues/eigenvectors are computed using LAPACK routines ``_syevd``, + ``_heevd``. + + The eigenvalues of real symmetric or complex Hermitian matrices are + always real. [1]_ The array `v` of (column) eigenvectors is unitary + and `a`, `w`, and `v` satisfy the equations + ``dot(a, v[:, i]) = w[i] * v[:, i]``. + + References + ---------- + .. [1] G. Strang, *Linear Algebra and Its Applications*, 2nd Ed., Orlando, + FL, Academic Press, Inc., 1980, pg. 222. + + Examples + -------- + >>> from numpy import linalg as LA + >>> a = np.array([[1, -2j], [2j, 5]]) + >>> a + array([[ 1.+0.j, -0.-2.j], + [ 0.+2.j, 5.+0.j]]) + >>> w, v = LA.eigh(a) + >>> w; v + array([0.17157288, 5.82842712]) + array([[-0.92387953+0.j , -0.38268343+0.j ], # may vary + [ 0. +0.38268343j, 0. -0.92387953j]]) + + >>> np.dot(a, v[:, 0]) - w[0] * v[:, 0] # verify 1st e-val/vec pair + array([5.55111512e-17+0.0000000e+00j, 0.00000000e+00+1.2490009e-16j]) + >>> np.dot(a, v[:, 1]) - w[1] * v[:, 1] # verify 2nd e-val/vec pair + array([0.+0.j, 0.+0.j]) + + >>> A = np.matrix(a) # what happens if input is a matrix object + >>> A + matrix([[ 1.+0.j, -0.-2.j], + [ 0.+2.j, 5.+0.j]]) + >>> w, v = LA.eigh(A) + >>> w; v + array([0.17157288, 5.82842712]) + matrix([[-0.92387953+0.j , -0.38268343+0.j ], # may vary + [ 0. +0.38268343j, 0. -0.92387953j]]) + + >>> # demonstrate the treatment of the imaginary part of the diagonal + >>> a = np.array([[5+2j, 9-2j], [0+2j, 2-1j]]) + >>> a + array([[5.+2.j, 9.-2.j], + [0.+2.j, 2.-1.j]]) + >>> # with UPLO='L' this is numerically equivalent to using LA.eig() with: + >>> b = np.array([[5.+0.j, 0.-2.j], [0.+2.j, 2.-0.j]]) + >>> b + array([[5.+0.j, 0.-2.j], + [0.+2.j, 2.+0.j]]) + >>> wa, va = LA.eigh(a) + >>> wb, vb = LA.eig(b) + >>> wa; wb + array([1., 6.]) + array([6.+0.j, 1.+0.j]) + >>> va; vb + array([[-0.4472136 +0.j , -0.89442719+0.j ], # may vary + [ 0. +0.89442719j, 0. -0.4472136j ]]) + array([[ 0.89442719+0.j , -0. +0.4472136j], + [-0. +0.4472136j, 0.89442719+0.j ]]) + """ + UPLO = UPLO.upper() + if UPLO not in ('L', 'U'): + raise ValueError("UPLO argument must be 'L' or 'U'") + + a, wrap = _makearray(a) + _assert_stacked_2d(a) + _assert_stacked_square(a) + t, result_t = _commonType(a) + + extobj = get_linalg_error_extobj( + _raise_linalgerror_eigenvalues_nonconvergence) + if UPLO == 'L': + gufunc = _umath_linalg.eigh_lo + else: + gufunc = _umath_linalg.eigh_up + + signature = 'D->dD' if isComplexType(t) else 'd->dd' + w, vt = gufunc(a, signature=signature, extobj=extobj) + w = w.astype(_realType(result_t), copy=False) + vt = vt.astype(result_t, copy=False) + return w, wrap(vt) + + +# Singular value decomposition + +def _svd_dispatcher(a, full_matrices=None, compute_uv=None, hermitian=None): + return (a,) + + +@array_function_dispatch(_svd_dispatcher) +def svd(a, full_matrices=True, compute_uv=True, hermitian=False): + """ + Singular Value Decomposition. + + When `a` is a 2D array, it is factorized as ``u @ np.diag(s) @ vh + = (u * s) @ vh``, where `u` and `vh` are 2D unitary arrays and `s` is a 1D + array of `a`'s singular values. When `a` is higher-dimensional, SVD is + applied in stacked mode as explained below. + + Parameters + ---------- + a : (..., M, N) array_like + A real or complex array with ``a.ndim >= 2``. + full_matrices : bool, optional + If True (default), `u` and `vh` have the shapes ``(..., M, M)`` and + ``(..., N, N)``, respectively. Otherwise, the shapes are + ``(..., M, K)`` and ``(..., K, N)``, respectively, where + ``K = min(M, N)``. + compute_uv : bool, optional + Whether or not to compute `u` and `vh` in addition to `s`. True + by default. + hermitian : bool, optional + If True, `a` is assumed to be Hermitian (symmetric if real-valued), + enabling a more efficient method for finding singular values. + Defaults to False. + + .. versionadded:: 1.17.0 + + Returns + ------- + u : { (..., M, M), (..., M, K) } array + Unitary array(s). The first ``a.ndim - 2`` dimensions have the same + size as those of the input `a`. The size of the last two dimensions + depends on the value of `full_matrices`. Only returned when + `compute_uv` is True. + s : (..., K) array + Vector(s) with the singular values, within each vector sorted in + descending order. The first ``a.ndim - 2`` dimensions have the same + size as those of the input `a`. + vh : { (..., N, N), (..., K, N) } array + Unitary array(s). The first ``a.ndim - 2`` dimensions have the same + size as those of the input `a`. The size of the last two dimensions + depends on the value of `full_matrices`. Only returned when + `compute_uv` is True. + + Raises + ------ + LinAlgError + If SVD computation does not converge. + + See Also + -------- + scipy.linalg.svd : Similar function in SciPy. + scipy.linalg.svdvals : Compute singular values of a matrix. + + Notes + ----- + + .. versionchanged:: 1.8.0 + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + The decomposition is performed using LAPACK routine ``_gesdd``. + + SVD is usually described for the factorization of a 2D matrix :math:`A`. + The higher-dimensional case will be discussed below. In the 2D case, SVD is + written as :math:`A = U S V^H`, where :math:`A = a`, :math:`U= u`, + :math:`S= \\mathtt{np.diag}(s)` and :math:`V^H = vh`. The 1D array `s` + contains the singular values of `a` and `u` and `vh` are unitary. The rows + of `vh` are the eigenvectors of :math:`A^H A` and the columns of `u` are + the eigenvectors of :math:`A A^H`. In both cases the corresponding + (possibly non-zero) eigenvalues are given by ``s**2``. + + If `a` has more than two dimensions, then broadcasting rules apply, as + explained in :ref:`routines.linalg-broadcasting`. This means that SVD is + working in "stacked" mode: it iterates over all indices of the first + ``a.ndim - 2`` dimensions and for each combination SVD is applied to the + last two indices. The matrix `a` can be reconstructed from the + decomposition with either ``(u * s[..., None, :]) @ vh`` or + ``u @ (s[..., None] * vh)``. (The ``@`` operator can be replaced by the + function ``np.matmul`` for python versions below 3.5.) + + If `a` is a ``matrix`` object (as opposed to an ``ndarray``), then so are + all the return values. + + Examples + -------- + >>> a = np.random.randn(9, 6) + 1j*np.random.randn(9, 6) + >>> b = np.random.randn(2, 7, 8, 3) + 1j*np.random.randn(2, 7, 8, 3) + + Reconstruction based on full SVD, 2D case: + + >>> u, s, vh = np.linalg.svd(a, full_matrices=True) + >>> u.shape, s.shape, vh.shape + ((9, 9), (6,), (6, 6)) + >>> np.allclose(a, np.dot(u[:, :6] * s, vh)) + True + >>> smat = np.zeros((9, 6), dtype=complex) + >>> smat[:6, :6] = np.diag(s) + >>> np.allclose(a, np.dot(u, np.dot(smat, vh))) + True + + Reconstruction based on reduced SVD, 2D case: + + >>> u, s, vh = np.linalg.svd(a, full_matrices=False) + >>> u.shape, s.shape, vh.shape + ((9, 6), (6,), (6, 6)) + >>> np.allclose(a, np.dot(u * s, vh)) + True + >>> smat = np.diag(s) + >>> np.allclose(a, np.dot(u, np.dot(smat, vh))) + True + + Reconstruction based on full SVD, 4D case: + + >>> u, s, vh = np.linalg.svd(b, full_matrices=True) + >>> u.shape, s.shape, vh.shape + ((2, 7, 8, 8), (2, 7, 3), (2, 7, 3, 3)) + >>> np.allclose(b, np.matmul(u[..., :3] * s[..., None, :], vh)) + True + >>> np.allclose(b, np.matmul(u[..., :3], s[..., None] * vh)) + True + + Reconstruction based on reduced SVD, 4D case: + + >>> u, s, vh = np.linalg.svd(b, full_matrices=False) + >>> u.shape, s.shape, vh.shape + ((2, 7, 8, 3), (2, 7, 3), (2, 7, 3, 3)) + >>> np.allclose(b, np.matmul(u * s[..., None, :], vh)) + True + >>> np.allclose(b, np.matmul(u, s[..., None] * vh)) + True + + """ + import numpy as _nx + a, wrap = _makearray(a) + + if hermitian: + # note: lapack svd returns eigenvalues with s ** 2 sorted descending, + # but eig returns s sorted ascending, so we re-order the eigenvalues + # and related arrays to have the correct order + if compute_uv: + s, u = eigh(a) + sgn = sign(s) + s = abs(s) + sidx = argsort(s)[..., ::-1] + sgn = _nx.take_along_axis(sgn, sidx, axis=-1) + s = _nx.take_along_axis(s, sidx, axis=-1) + u = _nx.take_along_axis(u, sidx[..., None, :], axis=-1) + # singular values are unsigned, move the sign into v + vt = transpose(u * sgn[..., None, :]).conjugate() + return wrap(u), s, wrap(vt) + else: + s = eigvalsh(a) + s = s[..., ::-1] + s = abs(s) + return sort(s)[..., ::-1] + + _assert_stacked_2d(a) + t, result_t = _commonType(a) + + extobj = get_linalg_error_extobj(_raise_linalgerror_svd_nonconvergence) + + m, n = a.shape[-2:] + if compute_uv: + if full_matrices: + if m < n: + gufunc = _umath_linalg.svd_m_f + else: + gufunc = _umath_linalg.svd_n_f + else: + if m < n: + gufunc = _umath_linalg.svd_m_s + else: + gufunc = _umath_linalg.svd_n_s + + signature = 'D->DdD' if isComplexType(t) else 'd->ddd' + u, s, vh = gufunc(a, signature=signature, extobj=extobj) + u = u.astype(result_t, copy=False) + s = s.astype(_realType(result_t), copy=False) + vh = vh.astype(result_t, copy=False) + return wrap(u), s, wrap(vh) + else: + if m < n: + gufunc = _umath_linalg.svd_m + else: + gufunc = _umath_linalg.svd_n + + signature = 'D->d' if isComplexType(t) else 'd->d' + s = gufunc(a, signature=signature, extobj=extobj) + s = s.astype(_realType(result_t), copy=False) + return s + + +def _cond_dispatcher(x, p=None): + return (x,) + + +@array_function_dispatch(_cond_dispatcher) +def cond(x, p=None): + """ + Compute the condition number of a matrix. + + This function is capable of returning the condition number using + one of seven different norms, depending on the value of `p` (see + Parameters below). + + Parameters + ---------- + x : (..., M, N) array_like + The matrix whose condition number is sought. + p : {None, 1, -1, 2, -2, inf, -inf, 'fro'}, optional + Order of the norm used in the condition number computation: + + ===== ============================ + p norm for matrices + ===== ============================ + None 2-norm, computed directly using the ``SVD`` + 'fro' Frobenius norm + inf max(sum(abs(x), axis=1)) + -inf min(sum(abs(x), axis=1)) + 1 max(sum(abs(x), axis=0)) + -1 min(sum(abs(x), axis=0)) + 2 2-norm (largest sing. value) + -2 smallest singular value + ===== ============================ + + inf means the `numpy.inf` object, and the Frobenius norm is + the root-of-sum-of-squares norm. + + Returns + ------- + c : {float, inf} + The condition number of the matrix. May be infinite. + + See Also + -------- + numpy.linalg.norm + + Notes + ----- + The condition number of `x` is defined as the norm of `x` times the + norm of the inverse of `x` [1]_; the norm can be the usual L2-norm + (root-of-sum-of-squares) or one of a number of other matrix norms. + + References + ---------- + .. [1] G. Strang, *Linear Algebra and Its Applications*, Orlando, FL, + Academic Press, Inc., 1980, pg. 285. + + Examples + -------- + >>> from numpy import linalg as LA + >>> a = np.array([[1, 0, -1], [0, 1, 0], [1, 0, 1]]) + >>> a + array([[ 1, 0, -1], + [ 0, 1, 0], + [ 1, 0, 1]]) + >>> LA.cond(a) + 1.4142135623730951 + >>> LA.cond(a, 'fro') + 3.1622776601683795 + >>> LA.cond(a, np.inf) + 2.0 + >>> LA.cond(a, -np.inf) + 1.0 + >>> LA.cond(a, 1) + 2.0 + >>> LA.cond(a, -1) + 1.0 + >>> LA.cond(a, 2) + 1.4142135623730951 + >>> LA.cond(a, -2) + 0.70710678118654746 # may vary + >>> min(LA.svd(a, compute_uv=False))*min(LA.svd(LA.inv(a), compute_uv=False)) + 0.70710678118654746 # may vary + + """ + x = asarray(x) # in case we have a matrix + if _is_empty_2d(x): + raise LinAlgError("cond is not defined on empty arrays") + if p is None or p == 2 or p == -2: + s = svd(x, compute_uv=False) + with errstate(all='ignore'): + if p == -2: + r = s[..., -1] / s[..., 0] + else: + r = s[..., 0] / s[..., -1] + else: + # Call inv(x) ignoring errors. The result array will + # contain nans in the entries where inversion failed. + _assert_stacked_2d(x) + _assert_stacked_square(x) + t, result_t = _commonType(x) + signature = 'D->D' if isComplexType(t) else 'd->d' + with errstate(all='ignore'): + invx = _umath_linalg.inv(x, signature=signature) + r = norm(x, p, axis=(-2, -1)) * norm(invx, p, axis=(-2, -1)) + r = r.astype(result_t, copy=False) + + # Convert nans to infs unless the original array had nan entries + r = asarray(r) + nan_mask = isnan(r) + if nan_mask.any(): + nan_mask &= ~isnan(x).any(axis=(-2, -1)) + if r.ndim > 0: + r[nan_mask] = Inf + elif nan_mask: + r[()] = Inf + + # Convention is to return scalars instead of 0d arrays + if r.ndim == 0: + r = r[()] + + return r + + +def _matrix_rank_dispatcher(A, tol=None, hermitian=None): + return (A,) + + +@array_function_dispatch(_matrix_rank_dispatcher) +def matrix_rank(A, tol=None, hermitian=False): + """ + Return matrix rank of array using SVD method + + Rank of the array is the number of singular values of the array that are + greater than `tol`. + + .. versionchanged:: 1.14 + Can now operate on stacks of matrices + + Parameters + ---------- + A : {(M,), (..., M, N)} array_like + Input vector or stack of matrices. + tol : (...) array_like, float, optional + Threshold below which SVD values are considered zero. If `tol` is + None, and ``S`` is an array with singular values for `M`, and + ``eps`` is the epsilon value for datatype of ``S``, then `tol` is + set to ``S.max() * max(M, N) * eps``. + + .. versionchanged:: 1.14 + Broadcasted against the stack of matrices + hermitian : bool, optional + If True, `A` is assumed to be Hermitian (symmetric if real-valued), + enabling a more efficient method for finding singular values. + Defaults to False. + + .. versionadded:: 1.14 + + Returns + ------- + rank : (...) array_like + Rank of A. + + Notes + ----- + The default threshold to detect rank deficiency is a test on the magnitude + of the singular values of `A`. By default, we identify singular values less + than ``S.max() * max(M, N) * eps`` as indicating rank deficiency (with + the symbols defined above). This is the algorithm MATLAB uses [1]. It also + appears in *Numerical recipes* in the discussion of SVD solutions for linear + least squares [2]. + + This default threshold is designed to detect rank deficiency accounting for + the numerical errors of the SVD computation. Imagine that there is a column + in `A` that is an exact (in floating point) linear combination of other + columns in `A`. Computing the SVD on `A` will not produce a singular value + exactly equal to 0 in general: any difference of the smallest SVD value from + 0 will be caused by numerical imprecision in the calculation of the SVD. + Our threshold for small SVD values takes this numerical imprecision into + account, and the default threshold will detect such numerical rank + deficiency. The threshold may declare a matrix `A` rank deficient even if + the linear combination of some columns of `A` is not exactly equal to + another column of `A` but only numerically very close to another column of + `A`. + + We chose our default threshold because it is in wide use. Other thresholds + are possible. For example, elsewhere in the 2007 edition of *Numerical + recipes* there is an alternative threshold of ``S.max() * + np.finfo(A.dtype).eps / 2. * np.sqrt(m + n + 1.)``. The authors describe + this threshold as being based on "expected roundoff error" (p 71). + + The thresholds above deal with floating point roundoff error in the + calculation of the SVD. However, you may have more information about the + sources of error in `A` that would make you consider other tolerance values + to detect *effective* rank deficiency. The most useful measure of the + tolerance depends on the operations you intend to use on your matrix. For + example, if your data come from uncertain measurements with uncertainties + greater than floating point epsilon, choosing a tolerance near that + uncertainty may be preferable. The tolerance may be absolute if the + uncertainties are absolute rather than relative. + + References + ---------- + .. [1] MATLAB reference documentation, "Rank" + https://www.mathworks.com/help/techdoc/ref/rank.html + .. [2] W. H. Press, S. A. Teukolsky, W. T. Vetterling and B. P. Flannery, + "Numerical Recipes (3rd edition)", Cambridge University Press, 2007, + page 795. + + Examples + -------- + >>> from numpy.linalg import matrix_rank + >>> matrix_rank(np.eye(4)) # Full rank matrix + 4 + >>> I=np.eye(4); I[-1,-1] = 0. # rank deficient matrix + >>> matrix_rank(I) + 3 + >>> matrix_rank(np.ones((4,))) # 1 dimension - rank 1 unless all 0 + 1 + >>> matrix_rank(np.zeros((4,))) + 0 + """ + A = asarray(A) + if A.ndim < 2: + return int(not all(A==0)) + S = svd(A, compute_uv=False, hermitian=hermitian) + if tol is None: + tol = S.max(axis=-1, keepdims=True) * max(A.shape[-2:]) * finfo(S.dtype).eps + else: + tol = asarray(tol)[..., newaxis] + return count_nonzero(S > tol, axis=-1) + + +# Generalized inverse + +def _pinv_dispatcher(a, rcond=None, hermitian=None): + return (a,) + + +@array_function_dispatch(_pinv_dispatcher) +def pinv(a, rcond=1e-15, hermitian=False): + """ + Compute the (Moore-Penrose) pseudo-inverse of a matrix. + + Calculate the generalized inverse of a matrix using its + singular-value decomposition (SVD) and including all + *large* singular values. + + .. versionchanged:: 1.14 + Can now operate on stacks of matrices + + Parameters + ---------- + a : (..., M, N) array_like + Matrix or stack of matrices to be pseudo-inverted. + rcond : (...) array_like of float + Cutoff for small singular values. + Singular values less than or equal to + ``rcond * largest_singular_value`` are set to zero. + Broadcasts against the stack of matrices. + hermitian : bool, optional + If True, `a` is assumed to be Hermitian (symmetric if real-valued), + enabling a more efficient method for finding singular values. + Defaults to False. + + .. versionadded:: 1.17.0 + + Returns + ------- + B : (..., N, M) ndarray + The pseudo-inverse of `a`. If `a` is a `matrix` instance, then so + is `B`. + + Raises + ------ + LinAlgError + If the SVD computation does not converge. + + See Also + -------- + scipy.linalg.pinv : Similar function in SciPy. + scipy.linalg.pinv2 : Similar function in SciPy (SVD-based). + scipy.linalg.pinvh : Compute the (Moore-Penrose) pseudo-inverse of a + Hermitian matrix. + + Notes + ----- + The pseudo-inverse of a matrix A, denoted :math:`A^+`, is + defined as: "the matrix that 'solves' [the least-squares problem] + :math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then + :math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`. + + It can be shown that if :math:`Q_1 \\Sigma Q_2^T = A` is the singular + value decomposition of A, then + :math:`A^+ = Q_2 \\Sigma^+ Q_1^T`, where :math:`Q_{1,2}` are + orthogonal matrices, :math:`\\Sigma` is a diagonal matrix consisting + of A's so-called singular values, (followed, typically, by + zeros), and then :math:`\\Sigma^+` is simply the diagonal matrix + consisting of the reciprocals of A's singular values + (again, followed by zeros). [1]_ + + References + ---------- + .. [1] G. Strang, *Linear Algebra and Its Applications*, 2nd Ed., Orlando, + FL, Academic Press, Inc., 1980, pp. 139-142. + + Examples + -------- + The following example checks that ``a * a+ * a == a`` and + ``a+ * a * a+ == a+``: + + >>> a = np.random.randn(9, 6) + >>> B = np.linalg.pinv(a) + >>> np.allclose(a, np.dot(a, np.dot(B, a))) + True + >>> np.allclose(B, np.dot(B, np.dot(a, B))) + True + + """ + a, wrap = _makearray(a) + rcond = asarray(rcond) + if _is_empty_2d(a): + m, n = a.shape[-2:] + res = empty(a.shape[:-2] + (n, m), dtype=a.dtype) + return wrap(res) + a = a.conjugate() + u, s, vt = svd(a, full_matrices=False, hermitian=hermitian) + + # discard small singular values + cutoff = rcond[..., newaxis] * amax(s, axis=-1, keepdims=True) + large = s > cutoff + s = divide(1, s, where=large, out=s) + s[~large] = 0 + + res = matmul(transpose(vt), multiply(s[..., newaxis], transpose(u))) + return wrap(res) + + +# Determinant + + +@array_function_dispatch(_unary_dispatcher) +def slogdet(a): + """ + Compute the sign and (natural) logarithm of the determinant of an array. + + If an array has a very small or very large determinant, then a call to + `det` may overflow or underflow. This routine is more robust against such + issues, because it computes the logarithm of the determinant rather than + the determinant itself. + + Parameters + ---------- + a : (..., M, M) array_like + Input array, has to be a square 2-D array. + + Returns + ------- + sign : (...) array_like + A number representing the sign of the determinant. For a real matrix, + this is 1, 0, or -1. For a complex matrix, this is a complex number + with absolute value 1 (i.e., it is on the unit circle), or else 0. + logdet : (...) array_like + The natural log of the absolute value of the determinant. + + If the determinant is zero, then `sign` will be 0 and `logdet` will be + -Inf. In all cases, the determinant is equal to ``sign * np.exp(logdet)``. + + See Also + -------- + det + + Notes + ----- + + .. versionadded:: 1.8.0 + + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + .. versionadded:: 1.6.0 + + The determinant is computed via LU factorization using the LAPACK + routine ``z/dgetrf``. + + + Examples + -------- + The determinant of a 2-D array ``[[a, b], [c, d]]`` is ``ad - bc``: + + >>> a = np.array([[1, 2], [3, 4]]) + >>> (sign, logdet) = np.linalg.slogdet(a) + >>> (sign, logdet) + (-1, 0.69314718055994529) # may vary + >>> sign * np.exp(logdet) + -2.0 + + Computing log-determinants for a stack of matrices: + + >>> a = np.array([ [[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]] ]) + >>> a.shape + (3, 2, 2) + >>> sign, logdet = np.linalg.slogdet(a) + >>> (sign, logdet) + (array([-1., -1., -1.]), array([ 0.69314718, 1.09861229, 2.07944154])) + >>> sign * np.exp(logdet) + array([-2., -3., -8.]) + + This routine succeeds where ordinary `det` does not: + + >>> np.linalg.det(np.eye(500) * 0.1) + 0.0 + >>> np.linalg.slogdet(np.eye(500) * 0.1) + (1, -1151.2925464970228) + + """ + a = asarray(a) + _assert_stacked_2d(a) + _assert_stacked_square(a) + t, result_t = _commonType(a) + real_t = _realType(result_t) + signature = 'D->Dd' if isComplexType(t) else 'd->dd' + sign, logdet = _umath_linalg.slogdet(a, signature=signature) + sign = sign.astype(result_t, copy=False) + logdet = logdet.astype(real_t, copy=False) + return sign, logdet + + +@array_function_dispatch(_unary_dispatcher) +def det(a): + """ + Compute the determinant of an array. + + Parameters + ---------- + a : (..., M, M) array_like + Input array to compute determinants for. + + Returns + ------- + det : (...) array_like + Determinant of `a`. + + See Also + -------- + slogdet : Another way to represent the determinant, more suitable + for large matrices where underflow/overflow may occur. + scipy.linalg.det : Similar function in SciPy. + + Notes + ----- + + .. versionadded:: 1.8.0 + + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + The determinant is computed via LU factorization using the LAPACK + routine ``z/dgetrf``. + + Examples + -------- + The determinant of a 2-D array [[a, b], [c, d]] is ad - bc: + + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.linalg.det(a) + -2.0 # may vary + + Computing determinants for a stack of matrices: + + >>> a = np.array([ [[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]] ]) + >>> a.shape + (3, 2, 2) + >>> np.linalg.det(a) + array([-2., -3., -8.]) + + """ + a = asarray(a) + _assert_stacked_2d(a) + _assert_stacked_square(a) + t, result_t = _commonType(a) + signature = 'D->D' if isComplexType(t) else 'd->d' + r = _umath_linalg.det(a, signature=signature) + r = r.astype(result_t, copy=False) + return r + + +# Linear Least Squares + +def _lstsq_dispatcher(a, b, rcond=None): + return (a, b) + + +@array_function_dispatch(_lstsq_dispatcher) +def lstsq(a, b, rcond="warn"): + r""" + Return the least-squares solution to a linear matrix equation. + + Computes the vector `x` that approximately solves the equation + ``a @ x = b``. The equation may be under-, well-, or over-determined + (i.e., the number of linearly independent rows of `a` can be less than, + equal to, or greater than its number of linearly independent columns). + If `a` is square and of full rank, then `x` (but for round-off error) + is the "exact" solution of the equation. Else, `x` minimizes the + Euclidean 2-norm :math:`||b - ax||`. If there are multiple minimizing + solutions, the one with the smallest 2-norm :math:`||x||` is returned. + + Parameters + ---------- + a : (M, N) array_like + "Coefficient" matrix. + b : {(M,), (M, K)} array_like + Ordinate or "dependent variable" values. If `b` is two-dimensional, + the least-squares solution is calculated for each of the `K` columns + of `b`. + rcond : float, optional + Cut-off ratio for small singular values of `a`. + For the purposes of rank determination, singular values are treated + as zero if they are smaller than `rcond` times the largest singular + value of `a`. + + .. versionchanged:: 1.14.0 + If not set, a FutureWarning is given. The previous default + of ``-1`` will use the machine precision as `rcond` parameter, + the new default will use the machine precision times `max(M, N)`. + To silence the warning and use the new default, use ``rcond=None``, + to keep using the old behavior, use ``rcond=-1``. + + Returns + ------- + x : {(N,), (N, K)} ndarray + Least-squares solution. If `b` is two-dimensional, + the solutions are in the `K` columns of `x`. + residuals : {(1,), (K,), (0,)} ndarray + Sums of squared residuals: Squared Euclidean 2-norm for each column in + ``b - a @ x``. + If the rank of `a` is < N or M <= N, this is an empty array. + If `b` is 1-dimensional, this is a (1,) shape array. + Otherwise the shape is (K,). + rank : int + Rank of matrix `a`. + s : (min(M, N),) ndarray + Singular values of `a`. + + Raises + ------ + LinAlgError + If computation does not converge. + + See Also + -------- + scipy.linalg.lstsq : Similar function in SciPy. + + Notes + ----- + If `b` is a matrix, then all array results are returned as matrices. + + Examples + -------- + Fit a line, ``y = mx + c``, through some noisy data-points: + + >>> x = np.array([0, 1, 2, 3]) + >>> y = np.array([-1, 0.2, 0.9, 2.1]) + + By examining the coefficients, we see that the line should have a + gradient of roughly 1 and cut the y-axis at, more or less, -1. + + We can rewrite the line equation as ``y = Ap``, where ``A = [[x 1]]`` + and ``p = [[m], [c]]``. Now use `lstsq` to solve for `p`: + + >>> A = np.vstack([x, np.ones(len(x))]).T + >>> A + array([[ 0., 1.], + [ 1., 1.], + [ 2., 1.], + [ 3., 1.]]) + + >>> m, c = np.linalg.lstsq(A, y, rcond=None)[0] + >>> m, c + (1.0 -0.95) # may vary + + Plot the data along with the fitted line: + + >>> import matplotlib.pyplot as plt + >>> _ = plt.plot(x, y, 'o', label='Original data', markersize=10) + >>> _ = plt.plot(x, m*x + c, 'r', label='Fitted line') + >>> _ = plt.legend() + >>> plt.show() + + """ + a, _ = _makearray(a) + b, wrap = _makearray(b) + is_1d = b.ndim == 1 + if is_1d: + b = b[:, newaxis] + _assert_2d(a, b) + m, n = a.shape[-2:] + m2, n_rhs = b.shape[-2:] + if m != m2: + raise LinAlgError('Incompatible dimensions') + + t, result_t = _commonType(a, b) + result_real_t = _realType(result_t) + + # Determine default rcond value + if rcond == "warn": + # 2017-08-19, 1.14.0 + warnings.warn("`rcond` parameter will change to the default of " + "machine precision times ``max(M, N)`` where M and N " + "are the input matrix dimensions.\n" + "To use the future default and silence this warning " + "we advise to pass `rcond=None`, to keep using the old, " + "explicitly pass `rcond=-1`.", + FutureWarning, stacklevel=3) + rcond = -1 + if rcond is None: + rcond = finfo(t).eps * max(n, m) + + if m <= n: + gufunc = _umath_linalg.lstsq_m + else: + gufunc = _umath_linalg.lstsq_n + + signature = 'DDd->Ddid' if isComplexType(t) else 'ddd->ddid' + extobj = get_linalg_error_extobj(_raise_linalgerror_lstsq) + if n_rhs == 0: + # lapack can't handle n_rhs = 0 - so allocate the array one larger in that axis + b = zeros(b.shape[:-2] + (m, n_rhs + 1), dtype=b.dtype) + x, resids, rank, s = gufunc(a, b, rcond, signature=signature, extobj=extobj) + if m == 0: + x[...] = 0 + if n_rhs == 0: + # remove the item we added + x = x[..., :n_rhs] + resids = resids[..., :n_rhs] + + # remove the axis we added + if is_1d: + x = x.squeeze(axis=-1) + # we probably should squeeze resids too, but we can't + # without breaking compatibility. + + # as documented + if rank != n or m <= n: + resids = array([], result_real_t) + + # coerce output arrays + s = s.astype(result_real_t, copy=False) + resids = resids.astype(result_real_t, copy=False) + x = x.astype(result_t, copy=True) # Copying lets the memory in r_parts be freed + return wrap(x), wrap(resids), rank, s + + +def _multi_svd_norm(x, row_axis, col_axis, op): + """Compute a function of the singular values of the 2-D matrices in `x`. + + This is a private utility function used by `numpy.linalg.norm()`. + + Parameters + ---------- + x : ndarray + row_axis, col_axis : int + The axes of `x` that hold the 2-D matrices. + op : callable + This should be either numpy.amin or `numpy.amax` or `numpy.sum`. + + Returns + ------- + result : float or ndarray + If `x` is 2-D, the return values is a float. + Otherwise, it is an array with ``x.ndim - 2`` dimensions. + The return values are either the minimum or maximum or sum of the + singular values of the matrices, depending on whether `op` + is `numpy.amin` or `numpy.amax` or `numpy.sum`. + + """ + y = moveaxis(x, (row_axis, col_axis), (-2, -1)) + result = op(svd(y, compute_uv=False), axis=-1) + return result + + +def _norm_dispatcher(x, ord=None, axis=None, keepdims=None): + return (x,) + + +@array_function_dispatch(_norm_dispatcher) +def norm(x, ord=None, axis=None, keepdims=False): + """ + Matrix or vector norm. + + This function is able to return one of eight different matrix norms, + or one of an infinite number of vector norms (described below), depending + on the value of the ``ord`` parameter. + + Parameters + ---------- + x : array_like + Input array. If `axis` is None, `x` must be 1-D or 2-D, unless `ord` + is None. If both `axis` and `ord` are None, the 2-norm of + ``x.ravel`` will be returned. + ord : {non-zero int, inf, -inf, 'fro', 'nuc'}, optional + Order of the norm (see table under ``Notes``). inf means numpy's + `inf` object. The default is None. + axis : {None, int, 2-tuple of ints}, optional. + If `axis` is an integer, it specifies the axis of `x` along which to + compute the vector norms. If `axis` is a 2-tuple, it specifies the + axes that hold 2-D matrices, and the matrix norms of these matrices + are computed. If `axis` is None then either a vector norm (when `x` + is 1-D) or a matrix norm (when `x` is 2-D) is returned. The default + is None. + + .. versionadded:: 1.8.0 + + keepdims : bool, optional + If this is set to True, the axes which are normed over are left in the + result as dimensions with size one. With this option the result will + broadcast correctly against the original `x`. + + .. versionadded:: 1.10.0 + + Returns + ------- + n : float or ndarray + Norm of the matrix or vector(s). + + See Also + -------- + scipy.linalg.norm : Similar function in SciPy. + + Notes + ----- + For values of ``ord < 1``, the result is, strictly speaking, not a + mathematical 'norm', but it may still be useful for various numerical + purposes. + + The following norms can be calculated: + + ===== ============================ ========================== + ord norm for matrices norm for vectors + ===== ============================ ========================== + None Frobenius norm 2-norm + 'fro' Frobenius norm -- + 'nuc' nuclear norm -- + inf max(sum(abs(x), axis=1)) max(abs(x)) + -inf min(sum(abs(x), axis=1)) min(abs(x)) + 0 -- sum(x != 0) + 1 max(sum(abs(x), axis=0)) as below + -1 min(sum(abs(x), axis=0)) as below + 2 2-norm (largest sing. value) as below + -2 smallest singular value as below + other -- sum(abs(x)**ord)**(1./ord) + ===== ============================ ========================== + + The Frobenius norm is given by [1]_: + + :math:`||A||_F = [\\sum_{i,j} abs(a_{i,j})^2]^{1/2}` + + The nuclear norm is the sum of the singular values. + + Both the Frobenius and nuclear norm orders are only defined for + matrices and raise a ValueError when ``x.ndim != 2``. + + References + ---------- + .. [1] G. H. Golub and C. F. Van Loan, *Matrix Computations*, + Baltimore, MD, Johns Hopkins University Press, 1985, pg. 15 + + Examples + -------- + >>> from numpy import linalg as LA + >>> a = np.arange(9) - 4 + >>> a + array([-4, -3, -2, ..., 2, 3, 4]) + >>> b = a.reshape((3, 3)) + >>> b + array([[-4, -3, -2], + [-1, 0, 1], + [ 2, 3, 4]]) + + >>> LA.norm(a) + 7.745966692414834 + >>> LA.norm(b) + 7.745966692414834 + >>> LA.norm(b, 'fro') + 7.745966692414834 + >>> LA.norm(a, np.inf) + 4.0 + >>> LA.norm(b, np.inf) + 9.0 + >>> LA.norm(a, -np.inf) + 0.0 + >>> LA.norm(b, -np.inf) + 2.0 + + >>> LA.norm(a, 1) + 20.0 + >>> LA.norm(b, 1) + 7.0 + >>> LA.norm(a, -1) + -4.6566128774142013e-010 + >>> LA.norm(b, -1) + 6.0 + >>> LA.norm(a, 2) + 7.745966692414834 + >>> LA.norm(b, 2) + 7.3484692283495345 + + >>> LA.norm(a, -2) + 0.0 + >>> LA.norm(b, -2) + 1.8570331885190563e-016 # may vary + >>> LA.norm(a, 3) + 5.8480354764257312 # may vary + >>> LA.norm(a, -3) + 0.0 + + Using the `axis` argument to compute vector norms: + + >>> c = np.array([[ 1, 2, 3], + ... [-1, 1, 4]]) + >>> LA.norm(c, axis=0) + array([ 1.41421356, 2.23606798, 5. ]) + >>> LA.norm(c, axis=1) + array([ 3.74165739, 4.24264069]) + >>> LA.norm(c, ord=1, axis=1) + array([ 6., 6.]) + + Using the `axis` argument to compute matrix norms: + + >>> m = np.arange(8).reshape(2,2,2) + >>> LA.norm(m, axis=(1,2)) + array([ 3.74165739, 11.22497216]) + >>> LA.norm(m[0, :, :]), LA.norm(m[1, :, :]) + (3.7416573867739413, 11.224972160321824) + + """ + x = asarray(x) + + if not issubclass(x.dtype.type, (inexact, object_)): + x = x.astype(float) + + # Immediately handle some default, simple, fast, and common cases. + if axis is None: + ndim = x.ndim + if ((ord is None) or + (ord in ('f', 'fro') and ndim == 2) or + (ord == 2 and ndim == 1)): + + x = x.ravel(order='K') + if isComplexType(x.dtype.type): + sqnorm = dot(x.real, x.real) + dot(x.imag, x.imag) + else: + sqnorm = dot(x, x) + ret = sqrt(sqnorm) + if keepdims: + ret = ret.reshape(ndim*[1]) + return ret + + # Normalize the `axis` argument to a tuple. + nd = x.ndim + if axis is None: + axis = tuple(range(nd)) + elif not isinstance(axis, tuple): + try: + axis = int(axis) + except Exception as e: + raise TypeError("'axis' must be None, an integer or a tuple of integers") from e + axis = (axis,) + + if len(axis) == 1: + if ord == Inf: + return abs(x).max(axis=axis, keepdims=keepdims) + elif ord == -Inf: + return abs(x).min(axis=axis, keepdims=keepdims) + elif ord == 0: + # Zero norm + return (x != 0).astype(x.real.dtype).sum(axis=axis, keepdims=keepdims) + elif ord == 1: + # special case for speedup + return add.reduce(abs(x), axis=axis, keepdims=keepdims) + elif ord is None or ord == 2: + # special case for speedup + s = (x.conj() * x).real + return sqrt(add.reduce(s, axis=axis, keepdims=keepdims)) + # None of the str-type keywords for ord ('fro', 'nuc') + # are valid for vectors + elif isinstance(ord, str): + raise ValueError(f"Invalid norm order '{ord}' for vectors") + else: + absx = abs(x) + absx **= ord + ret = add.reduce(absx, axis=axis, keepdims=keepdims) + ret **= (1 / ord) + return ret + elif len(axis) == 2: + row_axis, col_axis = axis + row_axis = normalize_axis_index(row_axis, nd) + col_axis = normalize_axis_index(col_axis, nd) + if row_axis == col_axis: + raise ValueError('Duplicate axes given.') + if ord == 2: + ret = _multi_svd_norm(x, row_axis, col_axis, amax) + elif ord == -2: + ret = _multi_svd_norm(x, row_axis, col_axis, amin) + elif ord == 1: + if col_axis > row_axis: + col_axis -= 1 + ret = add.reduce(abs(x), axis=row_axis).max(axis=col_axis) + elif ord == Inf: + if row_axis > col_axis: + row_axis -= 1 + ret = add.reduce(abs(x), axis=col_axis).max(axis=row_axis) + elif ord == -1: + if col_axis > row_axis: + col_axis -= 1 + ret = add.reduce(abs(x), axis=row_axis).min(axis=col_axis) + elif ord == -Inf: + if row_axis > col_axis: + row_axis -= 1 + ret = add.reduce(abs(x), axis=col_axis).min(axis=row_axis) + elif ord in [None, 'fro', 'f']: + ret = sqrt(add.reduce((x.conj() * x).real, axis=axis)) + elif ord == 'nuc': + ret = _multi_svd_norm(x, row_axis, col_axis, sum) + else: + raise ValueError("Invalid norm order for matrices.") + if keepdims: + ret_shape = list(x.shape) + ret_shape[axis[0]] = 1 + ret_shape[axis[1]] = 1 + ret = ret.reshape(ret_shape) + return ret + else: + raise ValueError("Improper number of dimensions to norm.") + + +# multi_dot + +def _multidot_dispatcher(arrays, *, out=None): + yield from arrays + yield out + + +@array_function_dispatch(_multidot_dispatcher) +def multi_dot(arrays, *, out=None): + """ + Compute the dot product of two or more arrays in a single function call, + while automatically selecting the fastest evaluation order. + + `multi_dot` chains `numpy.dot` and uses optimal parenthesization + of the matrices [1]_ [2]_. Depending on the shapes of the matrices, + this can speed up the multiplication a lot. + + If the first argument is 1-D it is treated as a row vector. + If the last argument is 1-D it is treated as a column vector. + The other arguments must be 2-D. + + Think of `multi_dot` as:: + + def multi_dot(arrays): return functools.reduce(np.dot, arrays) + + + Parameters + ---------- + arrays : sequence of array_like + If the first argument is 1-D it is treated as row vector. + If the last argument is 1-D it is treated as column vector. + The other arguments must be 2-D. + out : ndarray, optional + Output argument. This must have the exact kind that would be returned + if it was not used. In particular, it must have the right type, must be + C-contiguous, and its dtype must be the dtype that would be returned + for `dot(a, b)`. This is a performance feature. Therefore, if these + conditions are not met, an exception is raised, instead of attempting + to be flexible. + + .. versionadded:: 1.19.0 + + Returns + ------- + output : ndarray + Returns the dot product of the supplied arrays. + + See Also + -------- + numpy.dot : dot multiplication with two arguments. + + References + ---------- + + .. [1] Cormen, "Introduction to Algorithms", Chapter 15.2, p. 370-378 + .. [2] https://en.wikipedia.org/wiki/Matrix_chain_multiplication + + Examples + -------- + `multi_dot` allows you to write:: + + >>> from numpy.linalg import multi_dot + >>> # Prepare some data + >>> A = np.random.random((10000, 100)) + >>> B = np.random.random((100, 1000)) + >>> C = np.random.random((1000, 5)) + >>> D = np.random.random((5, 333)) + >>> # the actual dot multiplication + >>> _ = multi_dot([A, B, C, D]) + + instead of:: + + >>> _ = np.dot(np.dot(np.dot(A, B), C), D) + >>> # or + >>> _ = A.dot(B).dot(C).dot(D) + + Notes + ----- + The cost for a matrix multiplication can be calculated with the + following function:: + + def cost(A, B): + return A.shape[0] * A.shape[1] * B.shape[1] + + Assume we have three matrices + :math:`A_{10x100}, B_{100x5}, C_{5x50}`. + + The costs for the two different parenthesizations are as follows:: + + cost((AB)C) = 10*100*5 + 10*5*50 = 5000 + 2500 = 7500 + cost(A(BC)) = 10*100*50 + 100*5*50 = 50000 + 25000 = 75000 + + """ + n = len(arrays) + # optimization only makes sense for len(arrays) > 2 + if n < 2: + raise ValueError("Expecting at least two arrays.") + elif n == 2: + return dot(arrays[0], arrays[1], out=out) + + arrays = [asanyarray(a) for a in arrays] + + # save original ndim to reshape the result array into the proper form later + ndim_first, ndim_last = arrays[0].ndim, arrays[-1].ndim + # Explicitly convert vectors to 2D arrays to keep the logic of the internal + # _multi_dot_* functions as simple as possible. + if arrays[0].ndim == 1: + arrays[0] = atleast_2d(arrays[0]) + if arrays[-1].ndim == 1: + arrays[-1] = atleast_2d(arrays[-1]).T + _assert_2d(*arrays) + + # _multi_dot_three is much faster than _multi_dot_matrix_chain_order + if n == 3: + result = _multi_dot_three(arrays[0], arrays[1], arrays[2], out=out) + else: + order = _multi_dot_matrix_chain_order(arrays) + result = _multi_dot(arrays, order, 0, n - 1, out=out) + + # return proper shape + if ndim_first == 1 and ndim_last == 1: + return result[0, 0] # scalar + elif ndim_first == 1 or ndim_last == 1: + return result.ravel() # 1-D + else: + return result + + +def _multi_dot_three(A, B, C, out=None): + """ + Find the best order for three arrays and do the multiplication. + + For three arguments `_multi_dot_three` is approximately 15 times faster + than `_multi_dot_matrix_chain_order` + + """ + a0, a1b0 = A.shape + b1c0, c1 = C.shape + # cost1 = cost((AB)C) = a0*a1b0*b1c0 + a0*b1c0*c1 + cost1 = a0 * b1c0 * (a1b0 + c1) + # cost2 = cost(A(BC)) = a1b0*b1c0*c1 + a0*a1b0*c1 + cost2 = a1b0 * c1 * (a0 + b1c0) + + if cost1 < cost2: + return dot(dot(A, B), C, out=out) + else: + return dot(A, dot(B, C), out=out) + + +def _multi_dot_matrix_chain_order(arrays, return_costs=False): + """ + Return a np.array that encodes the optimal order of mutiplications. + + The optimal order array is then used by `_multi_dot()` to do the + multiplication. + + Also return the cost matrix if `return_costs` is `True` + + The implementation CLOSELY follows Cormen, "Introduction to Algorithms", + Chapter 15.2, p. 370-378. Note that Cormen uses 1-based indices. + + cost[i, j] = min([ + cost[prefix] + cost[suffix] + cost_mult(prefix, suffix) + for k in range(i, j)]) + + """ + n = len(arrays) + # p stores the dimensions of the matrices + # Example for p: A_{10x100}, B_{100x5}, C_{5x50} --> p = [10, 100, 5, 50] + p = [a.shape[0] for a in arrays] + [arrays[-1].shape[1]] + # m is a matrix of costs of the subproblems + # m[i,j]: min number of scalar multiplications needed to compute A_{i..j} + m = zeros((n, n), dtype=double) + # s is the actual ordering + # s[i, j] is the value of k at which we split the product A_i..A_j + s = empty((n, n), dtype=intp) + + for l in range(1, n): + for i in range(n - l): + j = i + l + m[i, j] = Inf + for k in range(i, j): + q = m[i, k] + m[k+1, j] + p[i]*p[k+1]*p[j+1] + if q < m[i, j]: + m[i, j] = q + s[i, j] = k # Note that Cormen uses 1-based index + + return (s, m) if return_costs else s + + +def _multi_dot(arrays, order, i, j, out=None): + """Actually do the multiplication with the given order.""" + if i == j: + # the initial call with non-None out should never get here + assert out is None + + return arrays[i] + else: + return dot(_multi_dot(arrays, order, i, order[i, j]), + _multi_dot(arrays, order, order[i, j] + 1, j), + out=out) diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/linalg.pyi b/.local/lib/python3.8/site-packages/numpy/linalg/linalg.pyi new file mode 100644 index 0000000..a60b953 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/linalg/linalg.pyi @@ -0,0 +1,284 @@ +from typing import ( + Literal as L, + List, + Iterable, + overload, + TypeVar, + Any, + SupportsIndex, + SupportsInt, + Tuple, +) + +from numpy import ( + generic, + floating, + complexfloating, + int32, + float64, + complex128, +) + +from numpy.linalg import LinAlgError as LinAlgError + +from numpy.typing import ( + NDArray, + ArrayLike, + _ArrayLikeInt_co, + _ArrayLikeFloat_co, + _ArrayLikeComplex_co, + _ArrayLikeTD64_co, + _ArrayLikeObject_co, +) + +_T = TypeVar("_T") +_ArrayType = TypeVar("_ArrayType", bound=NDArray[Any]) + +_2Tuple = Tuple[_T, _T] +_ModeKind = L["reduced", "complete", "r", "raw"] + +__all__: List[str] + +@overload +def tensorsolve( + a: _ArrayLikeInt_co, + b: _ArrayLikeInt_co, + axes: None | Iterable[int] =..., +) -> NDArray[float64]: ... +@overload +def tensorsolve( + a: _ArrayLikeFloat_co, + b: _ArrayLikeFloat_co, + axes: None | Iterable[int] =..., +) -> NDArray[floating[Any]]: ... +@overload +def tensorsolve( + a: _ArrayLikeComplex_co, + b: _ArrayLikeComplex_co, + axes: None | Iterable[int] =..., +) -> NDArray[complexfloating[Any, Any]]: ... + +@overload +def solve( + a: _ArrayLikeInt_co, + b: _ArrayLikeInt_co, +) -> NDArray[float64]: ... +@overload +def solve( + a: _ArrayLikeFloat_co, + b: _ArrayLikeFloat_co, +) -> NDArray[floating[Any]]: ... +@overload +def solve( + a: _ArrayLikeComplex_co, + b: _ArrayLikeComplex_co, +) -> NDArray[complexfloating[Any, Any]]: ... + +@overload +def tensorinv( + a: _ArrayLikeInt_co, + ind: int = ..., +) -> NDArray[float64]: ... +@overload +def tensorinv( + a: _ArrayLikeFloat_co, + ind: int = ..., +) -> NDArray[floating[Any]]: ... +@overload +def tensorinv( + a: _ArrayLikeComplex_co, + ind: int = ..., +) -> NDArray[complexfloating[Any, Any]]: ... + +@overload +def inv(a: _ArrayLikeInt_co) -> NDArray[float64]: ... +@overload +def inv(a: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... +@overload +def inv(a: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... + +# TODO: The supported input and output dtypes are dependent on the value of `n`. +# For example: `n < 0` always casts integer types to float64 +def matrix_power( + a: _ArrayLikeComplex_co | _ArrayLikeObject_co, + n: SupportsIndex, +) -> NDArray[Any]: ... + +@overload +def cholesky(a: _ArrayLikeInt_co) -> NDArray[float64]: ... +@overload +def cholesky(a: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ... +@overload +def cholesky(a: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... + +@overload +def qr(a: _ArrayLikeInt_co, mode: _ModeKind = ...) -> _2Tuple[NDArray[float64]]: ... +@overload +def qr(a: _ArrayLikeFloat_co, mode: _ModeKind = ...) -> _2Tuple[NDArray[floating[Any]]]: ... +@overload +def qr(a: _ArrayLikeComplex_co, mode: _ModeKind = ...) -> _2Tuple[NDArray[complexfloating[Any, Any]]]: ... + +@overload +def eigvals(a: _ArrayLikeInt_co) -> NDArray[float64] | NDArray[complex128]: ... +@overload +def eigvals(a: _ArrayLikeFloat_co) -> NDArray[floating[Any]] | NDArray[complexfloating[Any, Any]]: ... +@overload +def eigvals(a: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ... + +@overload +def eigvalsh(a: _ArrayLikeInt_co, UPLO: L["L", "U", "l", "u"] = ...) -> NDArray[float64]: ... +@overload +def eigvalsh(a: _ArrayLikeComplex_co, UPLO: L["L", "U", "l", "u"] = ...) -> NDArray[floating[Any]]: ... + +@overload +def eig(a: _ArrayLikeInt_co) -> _2Tuple[NDArray[float64]] | _2Tuple[NDArray[complex128]]: ... +@overload +def eig(a: _ArrayLikeFloat_co) -> _2Tuple[NDArray[floating[Any]]] | _2Tuple[NDArray[complexfloating[Any, Any]]]: ... +@overload +def eig(a: _ArrayLikeComplex_co) -> _2Tuple[NDArray[complexfloating[Any, Any]]]: ... + +@overload +def eigh( + a: _ArrayLikeInt_co, + UPLO: L["L", "U", "l", "u"] = ..., +) -> Tuple[NDArray[float64], NDArray[float64]]: ... +@overload +def eigh( + a: _ArrayLikeFloat_co, + UPLO: L["L", "U", "l", "u"] = ..., +) -> Tuple[NDArray[floating[Any]], NDArray[floating[Any]]]: ... +@overload +def eigh( + a: _ArrayLikeComplex_co, + UPLO: L["L", "U", "l", "u"] = ..., +) -> Tuple[NDArray[floating[Any]], NDArray[complexfloating[Any, Any]]]: ... + +@overload +def svd( + a: _ArrayLikeInt_co, + full_matrices: bool = ..., + compute_uv: L[True] = ..., + hermitian: bool = ..., +) -> Tuple[ + NDArray[float64], + NDArray[float64], + NDArray[float64], +]: ... +@overload +def svd( + a: _ArrayLikeFloat_co, + full_matrices: bool = ..., + compute_uv: L[True] = ..., + hermitian: bool = ..., +) -> Tuple[ + NDArray[floating[Any]], + NDArray[floating[Any]], + NDArray[floating[Any]], +]: ... +@overload +def svd( + a: _ArrayLikeComplex_co, + full_matrices: bool = ..., + compute_uv: L[True] = ..., + hermitian: bool = ..., +) -> Tuple[ + NDArray[complexfloating[Any, Any]], + NDArray[floating[Any]], + NDArray[complexfloating[Any, Any]], +]: ... +@overload +def svd( + a: _ArrayLikeInt_co, + full_matrices: bool = ..., + compute_uv: L[False] = ..., + hermitian: bool = ..., +) -> NDArray[float64]: ... +@overload +def svd( + a: _ArrayLikeComplex_co, + full_matrices: bool = ..., + compute_uv: L[False] = ..., + hermitian: bool = ..., +) -> NDArray[floating[Any]]: ... + +# TODO: Returns a scalar for 2D arrays and +# a `(x.ndim - 2)`` dimensionl array otherwise +def cond(x: _ArrayLikeComplex_co, p: None | float | L["fro", "nuc"] = ...) -> Any: ... + +# TODO: Returns `int` for <2D arrays and `intp` otherwise +def matrix_rank( + A: _ArrayLikeComplex_co, + tol: None | _ArrayLikeFloat_co = ..., + hermitian: bool = ..., +) -> Any: ... + +@overload +def pinv( + a: _ArrayLikeInt_co, + rcond: _ArrayLikeFloat_co = ..., + hermitian: bool = ..., +) -> NDArray[float64]: ... +@overload +def pinv( + a: _ArrayLikeFloat_co, + rcond: _ArrayLikeFloat_co = ..., + hermitian: bool = ..., +) -> NDArray[floating[Any]]: ... +@overload +def pinv( + a: _ArrayLikeComplex_co, + rcond: _ArrayLikeFloat_co = ..., + hermitian: bool = ..., +) -> NDArray[complexfloating[Any, Any]]: ... + +# TODO: Returns a 2-tuple of scalars for 2D arrays and +# a 2-tuple of `(a.ndim - 2)`` dimensionl arrays otherwise +def slogdet(a: _ArrayLikeComplex_co) -> _2Tuple[Any]: ... + +# TODO: Returns a 2-tuple of scalars for 2D arrays and +# a 2-tuple of `(a.ndim - 2)`` dimensionl arrays otherwise +def det(a: _ArrayLikeComplex_co) -> Any: ... + +@overload +def lstsq(a: _ArrayLikeInt_co, b: _ArrayLikeInt_co, rcond: None | float = ...) -> Tuple[ + NDArray[float64], + NDArray[float64], + int32, + NDArray[float64], +]: ... +@overload +def lstsq(a: _ArrayLikeFloat_co, b: _ArrayLikeFloat_co, rcond: None | float = ...) -> Tuple[ + NDArray[floating[Any]], + NDArray[floating[Any]], + int32, + NDArray[floating[Any]], +]: ... +@overload +def lstsq(a: _ArrayLikeComplex_co, b: _ArrayLikeComplex_co, rcond: None | float = ...) -> Tuple[ + NDArray[complexfloating[Any, Any]], + NDArray[floating[Any]], + int32, + NDArray[floating[Any]], +]: ... + +@overload +def norm( + x: ArrayLike, + ord: None | float | L["fro", "nuc"] = ..., + axis: None = ..., + keepdims: bool = ..., +) -> floating[Any]: ... +@overload +def norm( + x: ArrayLike, + ord: None | float | L["fro", "nuc"] = ..., + axis: SupportsInt | SupportsIndex | Tuple[int, ...] = ..., + keepdims: bool = ..., +) -> Any: ... + +# TODO: Returns a scalar or array +def multi_dot( + arrays: Iterable[_ArrayLikeComplex_co | _ArrayLikeObject_co | _ArrayLikeTD64_co], + *, + out: None | NDArray[Any] = ..., +) -> Any: ... diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/setup.py b/.local/lib/python3.8/site-packages/numpy/linalg/setup.py new file mode 100644 index 0000000..94536bb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/linalg/setup.py @@ -0,0 +1,82 @@ +import os +import sys + +def configuration(parent_package='', top_path=None): + from numpy.distutils.misc_util import Configuration + from numpy.distutils.system_info import get_info, system_info + config = Configuration('linalg', parent_package, top_path) + + config.add_subpackage('tests') + + # Configure lapack_lite + + src_dir = 'lapack_lite' + lapack_lite_src = [ + os.path.join(src_dir, 'python_xerbla.c'), + os.path.join(src_dir, 'f2c_z_lapack.c'), + os.path.join(src_dir, 'f2c_c_lapack.c'), + os.path.join(src_dir, 'f2c_d_lapack.c'), + os.path.join(src_dir, 'f2c_s_lapack.c'), + os.path.join(src_dir, 'f2c_lapack.c'), + os.path.join(src_dir, 'f2c_blas.c'), + os.path.join(src_dir, 'f2c_config.c'), + os.path.join(src_dir, 'f2c.c'), + ] + all_sources = config.paths(lapack_lite_src) + + if os.environ.get('NPY_USE_BLAS_ILP64', "0") != "0": + lapack_info = get_info('lapack_ilp64_opt', 2) + else: + lapack_info = get_info('lapack_opt', 0) # and {} + + use_lapack_lite = not lapack_info + + if use_lapack_lite: + # This makes numpy.distutils write the fact that lapack_lite + # is being used to numpy.__config__ + class numpy_linalg_lapack_lite(system_info): + def calc_info(self): + info = {'language': 'c'} + if sys.maxsize > 2**32: + # Build lapack-lite in 64-bit integer mode. + # The suffix is arbitrary (lapack_lite symbols follow it), + # but use the "64_" convention here. + info['define_macros'] = [ + ('HAVE_BLAS_ILP64', None), + ('BLAS_SYMBOL_SUFFIX', '64_') + ] + self.set_info(**info) + + lapack_info = numpy_linalg_lapack_lite().get_info(2) + + def get_lapack_lite_sources(ext, build_dir): + if use_lapack_lite: + print("### Warning: Using unoptimized lapack ###") + return all_sources + else: + if sys.platform == 'win32': + print("### Warning: python_xerbla.c is disabled ###") + return [] + return [all_sources[0]] + + config.add_extension( + 'lapack_lite', + sources=['lapack_litemodule.c', get_lapack_lite_sources], + depends=['lapack_lite/f2c.h'], + extra_info=lapack_info, + ) + + # umath_linalg module + config.add_extension( + '_umath_linalg', + sources=['umath_linalg.c.src', get_lapack_lite_sources], + depends=['lapack_lite/f2c.h'], + extra_info=lapack_info, + libraries=['npymath'], + ) + config.add_data_files('*.pyi') + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/linalg/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/linalg/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..500f041 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/linalg/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/tests/__pycache__/test_deprecations.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/linalg/tests/__pycache__/test_deprecations.cpython-38.pyc new file mode 100644 index 0000000..087d21b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/linalg/tests/__pycache__/test_deprecations.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/tests/__pycache__/test_linalg.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/linalg/tests/__pycache__/test_linalg.cpython-38.pyc new file mode 100644 index 0000000..036e2af Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/linalg/tests/__pycache__/test_linalg.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/tests/__pycache__/test_regression.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/linalg/tests/__pycache__/test_regression.cpython-38.pyc new file mode 100644 index 0000000..3c0f3a8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/linalg/tests/__pycache__/test_regression.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/tests/test_deprecations.py b/.local/lib/python3.8/site-packages/numpy/linalg/tests/test_deprecations.py new file mode 100644 index 0000000..cd4c108 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/linalg/tests/test_deprecations.py @@ -0,0 +1,20 @@ +"""Test deprecation and future warnings. + +""" +import numpy as np +from numpy.testing import assert_warns + + +def test_qr_mode_full_future_warning(): + """Check mode='full' FutureWarning. + + In numpy 1.8 the mode options 'full' and 'economic' in linalg.qr were + deprecated. The release date will probably be sometime in the summer + of 2013. + + """ + a = np.eye(2) + assert_warns(DeprecationWarning, np.linalg.qr, a, mode='full') + assert_warns(DeprecationWarning, np.linalg.qr, a, mode='f') + assert_warns(DeprecationWarning, np.linalg.qr, a, mode='economic') + assert_warns(DeprecationWarning, np.linalg.qr, a, mode='e') diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/tests/test_linalg.py b/.local/lib/python3.8/site-packages/numpy/linalg/tests/test_linalg.py new file mode 100644 index 0000000..c1ba84a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/linalg/tests/test_linalg.py @@ -0,0 +1,2152 @@ +""" Test functions for linalg module + +""" +import os +import sys +import itertools +import traceback +import textwrap +import subprocess +import pytest + +import numpy as np +from numpy import array, single, double, csingle, cdouble, dot, identity, matmul +from numpy.core import swapaxes +from numpy import multiply, atleast_2d, inf, asarray +from numpy import linalg +from numpy.linalg import matrix_power, norm, matrix_rank, multi_dot, LinAlgError +from numpy.linalg.linalg import _multi_dot_matrix_chain_order +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_array_equal, + assert_almost_equal, assert_allclose, suppress_warnings, + assert_raises_regex, HAS_LAPACK64, + ) + + +def consistent_subclass(out, in_): + # For ndarray subclass input, our output should have the same subclass + # (non-ndarray input gets converted to ndarray). + return type(out) is (type(in_) if isinstance(in_, np.ndarray) + else np.ndarray) + + +old_assert_almost_equal = assert_almost_equal + + +def assert_almost_equal(a, b, single_decimal=6, double_decimal=12, **kw): + if asarray(a).dtype.type in (single, csingle): + decimal = single_decimal + else: + decimal = double_decimal + old_assert_almost_equal(a, b, decimal=decimal, **kw) + + +def get_real_dtype(dtype): + return {single: single, double: double, + csingle: single, cdouble: double}[dtype] + + +def get_complex_dtype(dtype): + return {single: csingle, double: cdouble, + csingle: csingle, cdouble: cdouble}[dtype] + + +def get_rtol(dtype): + # Choose a safe rtol + if dtype in (single, csingle): + return 1e-5 + else: + return 1e-11 + + +# used to categorize tests +all_tags = { + 'square', 'nonsquare', 'hermitian', # mutually exclusive + 'generalized', 'size-0', 'strided' # optional additions +} + + +class LinalgCase: + def __init__(self, name, a, b, tags=set()): + """ + A bundle of arguments to be passed to a test case, with an identifying + name, the operands a and b, and a set of tags to filter the tests + """ + assert_(isinstance(name, str)) + self.name = name + self.a = a + self.b = b + self.tags = frozenset(tags) # prevent shared tags + + def check(self, do): + """ + Run the function `do` on this test case, expanding arguments + """ + do(self.a, self.b, tags=self.tags) + + def __repr__(self): + return f'' + + +def apply_tag(tag, cases): + """ + Add the given tag (a string) to each of the cases (a list of LinalgCase + objects) + """ + assert tag in all_tags, "Invalid tag" + for case in cases: + case.tags = case.tags | {tag} + return cases + + +# +# Base test cases +# + +np.random.seed(1234) + +CASES = [] + +# square test cases +CASES += apply_tag('square', [ + LinalgCase("single", + array([[1., 2.], [3., 4.]], dtype=single), + array([2., 1.], dtype=single)), + LinalgCase("double", + array([[1., 2.], [3., 4.]], dtype=double), + array([2., 1.], dtype=double)), + LinalgCase("double_2", + array([[1., 2.], [3., 4.]], dtype=double), + array([[2., 1., 4.], [3., 4., 6.]], dtype=double)), + LinalgCase("csingle", + array([[1. + 2j, 2 + 3j], [3 + 4j, 4 + 5j]], dtype=csingle), + array([2. + 1j, 1. + 2j], dtype=csingle)), + LinalgCase("cdouble", + array([[1. + 2j, 2 + 3j], [3 + 4j, 4 + 5j]], dtype=cdouble), + array([2. + 1j, 1. + 2j], dtype=cdouble)), + LinalgCase("cdouble_2", + array([[1. + 2j, 2 + 3j], [3 + 4j, 4 + 5j]], dtype=cdouble), + array([[2. + 1j, 1. + 2j, 1 + 3j], [1 - 2j, 1 - 3j, 1 - 6j]], dtype=cdouble)), + LinalgCase("0x0", + np.empty((0, 0), dtype=double), + np.empty((0,), dtype=double), + tags={'size-0'}), + LinalgCase("8x8", + np.random.rand(8, 8), + np.random.rand(8)), + LinalgCase("1x1", + np.random.rand(1, 1), + np.random.rand(1)), + LinalgCase("nonarray", + [[1, 2], [3, 4]], + [2, 1]), +]) + +# non-square test-cases +CASES += apply_tag('nonsquare', [ + LinalgCase("single_nsq_1", + array([[1., 2., 3.], [3., 4., 6.]], dtype=single), + array([2., 1.], dtype=single)), + LinalgCase("single_nsq_2", + array([[1., 2.], [3., 4.], [5., 6.]], dtype=single), + array([2., 1., 3.], dtype=single)), + LinalgCase("double_nsq_1", + array([[1., 2., 3.], [3., 4., 6.]], dtype=double), + array([2., 1.], dtype=double)), + LinalgCase("double_nsq_2", + array([[1., 2.], [3., 4.], [5., 6.]], dtype=double), + array([2., 1., 3.], dtype=double)), + LinalgCase("csingle_nsq_1", + array( + [[1. + 1j, 2. + 2j, 3. - 3j], [3. - 5j, 4. + 9j, 6. + 2j]], dtype=csingle), + array([2. + 1j, 1. + 2j], dtype=csingle)), + LinalgCase("csingle_nsq_2", + array( + [[1. + 1j, 2. + 2j], [3. - 3j, 4. - 9j], [5. - 4j, 6. + 8j]], dtype=csingle), + array([2. + 1j, 1. + 2j, 3. - 3j], dtype=csingle)), + LinalgCase("cdouble_nsq_1", + array( + [[1. + 1j, 2. + 2j, 3. - 3j], [3. - 5j, 4. + 9j, 6. + 2j]], dtype=cdouble), + array([2. + 1j, 1. + 2j], dtype=cdouble)), + LinalgCase("cdouble_nsq_2", + array( + [[1. + 1j, 2. + 2j], [3. - 3j, 4. - 9j], [5. - 4j, 6. + 8j]], dtype=cdouble), + array([2. + 1j, 1. + 2j, 3. - 3j], dtype=cdouble)), + LinalgCase("cdouble_nsq_1_2", + array( + [[1. + 1j, 2. + 2j, 3. - 3j], [3. - 5j, 4. + 9j, 6. + 2j]], dtype=cdouble), + array([[2. + 1j, 1. + 2j], [1 - 1j, 2 - 2j]], dtype=cdouble)), + LinalgCase("cdouble_nsq_2_2", + array( + [[1. + 1j, 2. + 2j], [3. - 3j, 4. - 9j], [5. - 4j, 6. + 8j]], dtype=cdouble), + array([[2. + 1j, 1. + 2j], [1 - 1j, 2 - 2j], [1 - 1j, 2 - 2j]], dtype=cdouble)), + LinalgCase("8x11", + np.random.rand(8, 11), + np.random.rand(8)), + LinalgCase("1x5", + np.random.rand(1, 5), + np.random.rand(1)), + LinalgCase("5x1", + np.random.rand(5, 1), + np.random.rand(5)), + LinalgCase("0x4", + np.random.rand(0, 4), + np.random.rand(0), + tags={'size-0'}), + LinalgCase("4x0", + np.random.rand(4, 0), + np.random.rand(4), + tags={'size-0'}), +]) + +# hermitian test-cases +CASES += apply_tag('hermitian', [ + LinalgCase("hsingle", + array([[1., 2.], [2., 1.]], dtype=single), + None), + LinalgCase("hdouble", + array([[1., 2.], [2., 1.]], dtype=double), + None), + LinalgCase("hcsingle", + array([[1., 2 + 3j], [2 - 3j, 1]], dtype=csingle), + None), + LinalgCase("hcdouble", + array([[1., 2 + 3j], [2 - 3j, 1]], dtype=cdouble), + None), + LinalgCase("hempty", + np.empty((0, 0), dtype=double), + None, + tags={'size-0'}), + LinalgCase("hnonarray", + [[1, 2], [2, 1]], + None), + LinalgCase("matrix_b_only", + array([[1., 2.], [2., 1.]]), + None), + LinalgCase("hmatrix_1x1", + np.random.rand(1, 1), + None), +]) + + +# +# Gufunc test cases +# +def _make_generalized_cases(): + new_cases = [] + + for case in CASES: + if not isinstance(case.a, np.ndarray): + continue + + a = np.array([case.a, 2 * case.a, 3 * case.a]) + if case.b is None: + b = None + else: + b = np.array([case.b, 7 * case.b, 6 * case.b]) + new_case = LinalgCase(case.name + "_tile3", a, b, + tags=case.tags | {'generalized'}) + new_cases.append(new_case) + + a = np.array([case.a] * 2 * 3).reshape((3, 2) + case.a.shape) + if case.b is None: + b = None + else: + b = np.array([case.b] * 2 * 3).reshape((3, 2) + case.b.shape) + new_case = LinalgCase(case.name + "_tile213", a, b, + tags=case.tags | {'generalized'}) + new_cases.append(new_case) + + return new_cases + + +CASES += _make_generalized_cases() + + +# +# Generate stride combination variations of the above +# +def _stride_comb_iter(x): + """ + Generate cartesian product of strides for all axes + """ + + if not isinstance(x, np.ndarray): + yield x, "nop" + return + + stride_set = [(1,)] * x.ndim + stride_set[-1] = (1, 3, -4) + if x.ndim > 1: + stride_set[-2] = (1, 3, -4) + if x.ndim > 2: + stride_set[-3] = (1, -4) + + for repeats in itertools.product(*tuple(stride_set)): + new_shape = [abs(a * b) for a, b in zip(x.shape, repeats)] + slices = tuple([slice(None, None, repeat) for repeat in repeats]) + + # new array with different strides, but same data + xi = np.empty(new_shape, dtype=x.dtype) + xi.view(np.uint32).fill(0xdeadbeef) + xi = xi[slices] + xi[...] = x + xi = xi.view(x.__class__) + assert_(np.all(xi == x)) + yield xi, "stride_" + "_".join(["%+d" % j for j in repeats]) + + # generate also zero strides if possible + if x.ndim >= 1 and x.shape[-1] == 1: + s = list(x.strides) + s[-1] = 0 + xi = np.lib.stride_tricks.as_strided(x, strides=s) + yield xi, "stride_xxx_0" + if x.ndim >= 2 and x.shape[-2] == 1: + s = list(x.strides) + s[-2] = 0 + xi = np.lib.stride_tricks.as_strided(x, strides=s) + yield xi, "stride_xxx_0_x" + if x.ndim >= 2 and x.shape[:-2] == (1, 1): + s = list(x.strides) + s[-1] = 0 + s[-2] = 0 + xi = np.lib.stride_tricks.as_strided(x, strides=s) + yield xi, "stride_xxx_0_0" + + +def _make_strided_cases(): + new_cases = [] + for case in CASES: + for a, a_label in _stride_comb_iter(case.a): + for b, b_label in _stride_comb_iter(case.b): + new_case = LinalgCase(case.name + "_" + a_label + "_" + b_label, a, b, + tags=case.tags | {'strided'}) + new_cases.append(new_case) + return new_cases + + +CASES += _make_strided_cases() + + +# +# Test different routines against the above cases +# +class LinalgTestCase: + TEST_CASES = CASES + + def check_cases(self, require=set(), exclude=set()): + """ + Run func on each of the cases with all of the tags in require, and none + of the tags in exclude + """ + for case in self.TEST_CASES: + # filter by require and exclude + if case.tags & require != require: + continue + if case.tags & exclude: + continue + + try: + case.check(self.do) + except Exception as e: + msg = f'In test case: {case!r}\n\n' + msg += traceback.format_exc() + raise AssertionError(msg) from e + + +class LinalgSquareTestCase(LinalgTestCase): + + def test_sq_cases(self): + self.check_cases(require={'square'}, + exclude={'generalized', 'size-0'}) + + def test_empty_sq_cases(self): + self.check_cases(require={'square', 'size-0'}, + exclude={'generalized'}) + + +class LinalgNonsquareTestCase(LinalgTestCase): + + def test_nonsq_cases(self): + self.check_cases(require={'nonsquare'}, + exclude={'generalized', 'size-0'}) + + def test_empty_nonsq_cases(self): + self.check_cases(require={'nonsquare', 'size-0'}, + exclude={'generalized'}) + + +class HermitianTestCase(LinalgTestCase): + + def test_herm_cases(self): + self.check_cases(require={'hermitian'}, + exclude={'generalized', 'size-0'}) + + def test_empty_herm_cases(self): + self.check_cases(require={'hermitian', 'size-0'}, + exclude={'generalized'}) + + +class LinalgGeneralizedSquareTestCase(LinalgTestCase): + + @pytest.mark.slow + def test_generalized_sq_cases(self): + self.check_cases(require={'generalized', 'square'}, + exclude={'size-0'}) + + @pytest.mark.slow + def test_generalized_empty_sq_cases(self): + self.check_cases(require={'generalized', 'square', 'size-0'}) + + +class LinalgGeneralizedNonsquareTestCase(LinalgTestCase): + + @pytest.mark.slow + def test_generalized_nonsq_cases(self): + self.check_cases(require={'generalized', 'nonsquare'}, + exclude={'size-0'}) + + @pytest.mark.slow + def test_generalized_empty_nonsq_cases(self): + self.check_cases(require={'generalized', 'nonsquare', 'size-0'}) + + +class HermitianGeneralizedTestCase(LinalgTestCase): + + @pytest.mark.slow + def test_generalized_herm_cases(self): + self.check_cases(require={'generalized', 'hermitian'}, + exclude={'size-0'}) + + @pytest.mark.slow + def test_generalized_empty_herm_cases(self): + self.check_cases(require={'generalized', 'hermitian', 'size-0'}, + exclude={'none'}) + + +def dot_generalized(a, b): + a = asarray(a) + if a.ndim >= 3: + if a.ndim == b.ndim: + # matrix x matrix + new_shape = a.shape[:-1] + b.shape[-1:] + elif a.ndim == b.ndim + 1: + # matrix x vector + new_shape = a.shape[:-1] + else: + raise ValueError("Not implemented...") + r = np.empty(new_shape, dtype=np.common_type(a, b)) + for c in itertools.product(*map(range, a.shape[:-2])): + r[c] = dot(a[c], b[c]) + return r + else: + return dot(a, b) + + +def identity_like_generalized(a): + a = asarray(a) + if a.ndim >= 3: + r = np.empty(a.shape, dtype=a.dtype) + r[...] = identity(a.shape[-2]) + return r + else: + return identity(a.shape[0]) + + +class SolveCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): + # kept apart from TestSolve for use for testing with matrices. + def do(self, a, b, tags): + x = linalg.solve(a, b) + assert_almost_equal(b, dot_generalized(a, x)) + assert_(consistent_subclass(x, b)) + + +class TestSolve(SolveCases): + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + assert_equal(linalg.solve(x, x).dtype, dtype) + + def test_0_size(self): + class ArraySubclass(np.ndarray): + pass + # Test system of 0x0 matrices + a = np.arange(8).reshape(2, 2, 2) + b = np.arange(6).reshape(1, 2, 3).view(ArraySubclass) + + expected = linalg.solve(a, b)[:, 0:0, :] + result = linalg.solve(a[:, 0:0, 0:0], b[:, 0:0, :]) + assert_array_equal(result, expected) + assert_(isinstance(result, ArraySubclass)) + + # Test errors for non-square and only b's dimension being 0 + assert_raises(linalg.LinAlgError, linalg.solve, a[:, 0:0, 0:1], b) + assert_raises(ValueError, linalg.solve, a, b[:, 0:0, :]) + + # Test broadcasting error + b = np.arange(6).reshape(1, 3, 2) # broadcasting error + assert_raises(ValueError, linalg.solve, a, b) + assert_raises(ValueError, linalg.solve, a[0:0], b[0:0]) + + # Test zero "single equations" with 0x0 matrices. + b = np.arange(2).reshape(1, 2).view(ArraySubclass) + expected = linalg.solve(a, b)[:, 0:0] + result = linalg.solve(a[:, 0:0, 0:0], b[:, 0:0]) + assert_array_equal(result, expected) + assert_(isinstance(result, ArraySubclass)) + + b = np.arange(3).reshape(1, 3) + assert_raises(ValueError, linalg.solve, a, b) + assert_raises(ValueError, linalg.solve, a[0:0], b[0:0]) + assert_raises(ValueError, linalg.solve, a[:, 0:0, 0:0], b) + + def test_0_size_k(self): + # test zero multiple equation (K=0) case. + class ArraySubclass(np.ndarray): + pass + a = np.arange(4).reshape(1, 2, 2) + b = np.arange(6).reshape(3, 2, 1).view(ArraySubclass) + + expected = linalg.solve(a, b)[:, :, 0:0] + result = linalg.solve(a, b[:, :, 0:0]) + assert_array_equal(result, expected) + assert_(isinstance(result, ArraySubclass)) + + # test both zero. + expected = linalg.solve(a, b)[:, 0:0, 0:0] + result = linalg.solve(a[:, 0:0, 0:0], b[:, 0:0, 0:0]) + assert_array_equal(result, expected) + assert_(isinstance(result, ArraySubclass)) + + +class InvCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): + + def do(self, a, b, tags): + a_inv = linalg.inv(a) + assert_almost_equal(dot_generalized(a, a_inv), + identity_like_generalized(a)) + assert_(consistent_subclass(a_inv, a)) + + +class TestInv(InvCases): + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + assert_equal(linalg.inv(x).dtype, dtype) + + def test_0_size(self): + # Check that all kinds of 0-sized arrays work + class ArraySubclass(np.ndarray): + pass + a = np.zeros((0, 1, 1), dtype=np.int_).view(ArraySubclass) + res = linalg.inv(a) + assert_(res.dtype.type is np.float64) + assert_equal(a.shape, res.shape) + assert_(isinstance(res, ArraySubclass)) + + a = np.zeros((0, 0), dtype=np.complex64).view(ArraySubclass) + res = linalg.inv(a) + assert_(res.dtype.type is np.complex64) + assert_equal(a.shape, res.shape) + assert_(isinstance(res, ArraySubclass)) + + +class EigvalsCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): + + def do(self, a, b, tags): + ev = linalg.eigvals(a) + evalues, evectors = linalg.eig(a) + assert_almost_equal(ev, evalues) + + +class TestEigvals(EigvalsCases): + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + assert_equal(linalg.eigvals(x).dtype, dtype) + x = np.array([[1, 0.5], [-1, 1]], dtype=dtype) + assert_equal(linalg.eigvals(x).dtype, get_complex_dtype(dtype)) + + def test_0_size(self): + # Check that all kinds of 0-sized arrays work + class ArraySubclass(np.ndarray): + pass + a = np.zeros((0, 1, 1), dtype=np.int_).view(ArraySubclass) + res = linalg.eigvals(a) + assert_(res.dtype.type is np.float64) + assert_equal((0, 1), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(res, np.ndarray)) + + a = np.zeros((0, 0), dtype=np.complex64).view(ArraySubclass) + res = linalg.eigvals(a) + assert_(res.dtype.type is np.complex64) + assert_equal((0,), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(res, np.ndarray)) + + +class EigCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): + + def do(self, a, b, tags): + evalues, evectors = linalg.eig(a) + assert_allclose(dot_generalized(a, evectors), + np.asarray(evectors) * np.asarray(evalues)[..., None, :], + rtol=get_rtol(evalues.dtype)) + assert_(consistent_subclass(evectors, a)) + + +class TestEig(EigCases): + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + w, v = np.linalg.eig(x) + assert_equal(w.dtype, dtype) + assert_equal(v.dtype, dtype) + + x = np.array([[1, 0.5], [-1, 1]], dtype=dtype) + w, v = np.linalg.eig(x) + assert_equal(w.dtype, get_complex_dtype(dtype)) + assert_equal(v.dtype, get_complex_dtype(dtype)) + + def test_0_size(self): + # Check that all kinds of 0-sized arrays work + class ArraySubclass(np.ndarray): + pass + a = np.zeros((0, 1, 1), dtype=np.int_).view(ArraySubclass) + res, res_v = linalg.eig(a) + assert_(res_v.dtype.type is np.float64) + assert_(res.dtype.type is np.float64) + assert_equal(a.shape, res_v.shape) + assert_equal((0, 1), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(a, np.ndarray)) + + a = np.zeros((0, 0), dtype=np.complex64).view(ArraySubclass) + res, res_v = linalg.eig(a) + assert_(res_v.dtype.type is np.complex64) + assert_(res.dtype.type is np.complex64) + assert_equal(a.shape, res_v.shape) + assert_equal((0,), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(a, np.ndarray)) + + +class SVDBaseTests: + hermitian = False + + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + u, s, vh = linalg.svd(x) + assert_equal(u.dtype, dtype) + assert_equal(s.dtype, get_real_dtype(dtype)) + assert_equal(vh.dtype, dtype) + s = linalg.svd(x, compute_uv=False, hermitian=self.hermitian) + assert_equal(s.dtype, get_real_dtype(dtype)) + + +class SVDCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): + + def do(self, a, b, tags): + u, s, vt = linalg.svd(a, False) + assert_allclose(a, dot_generalized(np.asarray(u) * np.asarray(s)[..., None, :], + np.asarray(vt)), + rtol=get_rtol(u.dtype)) + assert_(consistent_subclass(u, a)) + assert_(consistent_subclass(vt, a)) + + +class TestSVD(SVDCases, SVDBaseTests): + def test_empty_identity(self): + """ Empty input should put an identity matrix in u or vh """ + x = np.empty((4, 0)) + u, s, vh = linalg.svd(x, compute_uv=True, hermitian=self.hermitian) + assert_equal(u.shape, (4, 4)) + assert_equal(vh.shape, (0, 0)) + assert_equal(u, np.eye(4)) + + x = np.empty((0, 4)) + u, s, vh = linalg.svd(x, compute_uv=True, hermitian=self.hermitian) + assert_equal(u.shape, (0, 0)) + assert_equal(vh.shape, (4, 4)) + assert_equal(vh, np.eye(4)) + + +class SVDHermitianCases(HermitianTestCase, HermitianGeneralizedTestCase): + + def do(self, a, b, tags): + u, s, vt = linalg.svd(a, False, hermitian=True) + assert_allclose(a, dot_generalized(np.asarray(u) * np.asarray(s)[..., None, :], + np.asarray(vt)), + rtol=get_rtol(u.dtype)) + def hermitian(mat): + axes = list(range(mat.ndim)) + axes[-1], axes[-2] = axes[-2], axes[-1] + return np.conj(np.transpose(mat, axes=axes)) + + assert_almost_equal(np.matmul(u, hermitian(u)), np.broadcast_to(np.eye(u.shape[-1]), u.shape)) + assert_almost_equal(np.matmul(vt, hermitian(vt)), np.broadcast_to(np.eye(vt.shape[-1]), vt.shape)) + assert_equal(np.sort(s)[..., ::-1], s) + assert_(consistent_subclass(u, a)) + assert_(consistent_subclass(vt, a)) + + +class TestSVDHermitian(SVDHermitianCases, SVDBaseTests): + hermitian = True + + +class CondCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): + # cond(x, p) for p in (None, 2, -2) + + def do(self, a, b, tags): + c = asarray(a) # a might be a matrix + if 'size-0' in tags: + assert_raises(LinAlgError, linalg.cond, c) + return + + # +-2 norms + s = linalg.svd(c, compute_uv=False) + assert_almost_equal( + linalg.cond(a), s[..., 0] / s[..., -1], + single_decimal=5, double_decimal=11) + assert_almost_equal( + linalg.cond(a, 2), s[..., 0] / s[..., -1], + single_decimal=5, double_decimal=11) + assert_almost_equal( + linalg.cond(a, -2), s[..., -1] / s[..., 0], + single_decimal=5, double_decimal=11) + + # Other norms + cinv = np.linalg.inv(c) + assert_almost_equal( + linalg.cond(a, 1), + abs(c).sum(-2).max(-1) * abs(cinv).sum(-2).max(-1), + single_decimal=5, double_decimal=11) + assert_almost_equal( + linalg.cond(a, -1), + abs(c).sum(-2).min(-1) * abs(cinv).sum(-2).min(-1), + single_decimal=5, double_decimal=11) + assert_almost_equal( + linalg.cond(a, np.inf), + abs(c).sum(-1).max(-1) * abs(cinv).sum(-1).max(-1), + single_decimal=5, double_decimal=11) + assert_almost_equal( + linalg.cond(a, -np.inf), + abs(c).sum(-1).min(-1) * abs(cinv).sum(-1).min(-1), + single_decimal=5, double_decimal=11) + assert_almost_equal( + linalg.cond(a, 'fro'), + np.sqrt((abs(c)**2).sum(-1).sum(-1) + * (abs(cinv)**2).sum(-1).sum(-1)), + single_decimal=5, double_decimal=11) + + +class TestCond(CondCases): + def test_basic_nonsvd(self): + # Smoketest the non-svd norms + A = array([[1., 0, 1], [0, -2., 0], [0, 0, 3.]]) + assert_almost_equal(linalg.cond(A, inf), 4) + assert_almost_equal(linalg.cond(A, -inf), 2/3) + assert_almost_equal(linalg.cond(A, 1), 4) + assert_almost_equal(linalg.cond(A, -1), 0.5) + assert_almost_equal(linalg.cond(A, 'fro'), np.sqrt(265 / 12)) + + def test_singular(self): + # Singular matrices have infinite condition number for + # positive norms, and negative norms shouldn't raise + # exceptions + As = [np.zeros((2, 2)), np.ones((2, 2))] + p_pos = [None, 1, 2, 'fro'] + p_neg = [-1, -2] + for A, p in itertools.product(As, p_pos): + # Inversion may not hit exact infinity, so just check the + # number is large + assert_(linalg.cond(A, p) > 1e15) + for A, p in itertools.product(As, p_neg): + linalg.cond(A, p) + + @pytest.mark.xfail(True, run=False, + reason="Platform/LAPACK-dependent failure, " + "see gh-18914") + def test_nan(self): + # nans should be passed through, not converted to infs + ps = [None, 1, -1, 2, -2, 'fro'] + p_pos = [None, 1, 2, 'fro'] + + A = np.ones((2, 2)) + A[0,1] = np.nan + for p in ps: + c = linalg.cond(A, p) + assert_(isinstance(c, np.float_)) + assert_(np.isnan(c)) + + A = np.ones((3, 2, 2)) + A[1,0,1] = np.nan + for p in ps: + c = linalg.cond(A, p) + assert_(np.isnan(c[1])) + if p in p_pos: + assert_(c[0] > 1e15) + assert_(c[2] > 1e15) + else: + assert_(not np.isnan(c[0])) + assert_(not np.isnan(c[2])) + + def test_stacked_singular(self): + # Check behavior when only some of the stacked matrices are + # singular + np.random.seed(1234) + A = np.random.rand(2, 2, 2, 2) + A[0,0] = 0 + A[1,1] = 0 + + for p in (None, 1, 2, 'fro', -1, -2): + c = linalg.cond(A, p) + assert_equal(c[0,0], np.inf) + assert_equal(c[1,1], np.inf) + assert_(np.isfinite(c[0,1])) + assert_(np.isfinite(c[1,0])) + + +class PinvCases(LinalgSquareTestCase, + LinalgNonsquareTestCase, + LinalgGeneralizedSquareTestCase, + LinalgGeneralizedNonsquareTestCase): + + def do(self, a, b, tags): + a_ginv = linalg.pinv(a) + # `a @ a_ginv == I` does not hold if a is singular + dot = dot_generalized + assert_almost_equal(dot(dot(a, a_ginv), a), a, single_decimal=5, double_decimal=11) + assert_(consistent_subclass(a_ginv, a)) + + +class TestPinv(PinvCases): + pass + + +class PinvHermitianCases(HermitianTestCase, HermitianGeneralizedTestCase): + + def do(self, a, b, tags): + a_ginv = linalg.pinv(a, hermitian=True) + # `a @ a_ginv == I` does not hold if a is singular + dot = dot_generalized + assert_almost_equal(dot(dot(a, a_ginv), a), a, single_decimal=5, double_decimal=11) + assert_(consistent_subclass(a_ginv, a)) + + +class TestPinvHermitian(PinvHermitianCases): + pass + + +class DetCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): + + def do(self, a, b, tags): + d = linalg.det(a) + (s, ld) = linalg.slogdet(a) + if asarray(a).dtype.type in (single, double): + ad = asarray(a).astype(double) + else: + ad = asarray(a).astype(cdouble) + ev = linalg.eigvals(ad) + assert_almost_equal(d, multiply.reduce(ev, axis=-1)) + assert_almost_equal(s * np.exp(ld), multiply.reduce(ev, axis=-1)) + + s = np.atleast_1d(s) + ld = np.atleast_1d(ld) + m = (s != 0) + assert_almost_equal(np.abs(s[m]), 1) + assert_equal(ld[~m], -inf) + + +class TestDet(DetCases): + def test_zero(self): + assert_equal(linalg.det([[0.0]]), 0.0) + assert_equal(type(linalg.det([[0.0]])), double) + assert_equal(linalg.det([[0.0j]]), 0.0) + assert_equal(type(linalg.det([[0.0j]])), cdouble) + + assert_equal(linalg.slogdet([[0.0]]), (0.0, -inf)) + assert_equal(type(linalg.slogdet([[0.0]])[0]), double) + assert_equal(type(linalg.slogdet([[0.0]])[1]), double) + assert_equal(linalg.slogdet([[0.0j]]), (0.0j, -inf)) + assert_equal(type(linalg.slogdet([[0.0j]])[0]), cdouble) + assert_equal(type(linalg.slogdet([[0.0j]])[1]), double) + + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + assert_equal(np.linalg.det(x).dtype, dtype) + ph, s = np.linalg.slogdet(x) + assert_equal(s.dtype, get_real_dtype(dtype)) + assert_equal(ph.dtype, dtype) + + def test_0_size(self): + a = np.zeros((0, 0), dtype=np.complex64) + res = linalg.det(a) + assert_equal(res, 1.) + assert_(res.dtype.type is np.complex64) + res = linalg.slogdet(a) + assert_equal(res, (1, 0)) + assert_(res[0].dtype.type is np.complex64) + assert_(res[1].dtype.type is np.float32) + + a = np.zeros((0, 0), dtype=np.float64) + res = linalg.det(a) + assert_equal(res, 1.) + assert_(res.dtype.type is np.float64) + res = linalg.slogdet(a) + assert_equal(res, (1, 0)) + assert_(res[0].dtype.type is np.float64) + assert_(res[1].dtype.type is np.float64) + + +class LstsqCases(LinalgSquareTestCase, LinalgNonsquareTestCase): + + def do(self, a, b, tags): + arr = np.asarray(a) + m, n = arr.shape + u, s, vt = linalg.svd(a, False) + x, residuals, rank, sv = linalg.lstsq(a, b, rcond=-1) + if m == 0: + assert_((x == 0).all()) + if m <= n: + assert_almost_equal(b, dot(a, x)) + assert_equal(rank, m) + else: + assert_equal(rank, n) + assert_almost_equal(sv, sv.__array_wrap__(s)) + if rank == n and m > n: + expect_resids = ( + np.asarray(abs(np.dot(a, x) - b)) ** 2).sum(axis=0) + expect_resids = np.asarray(expect_resids) + if np.asarray(b).ndim == 1: + expect_resids.shape = (1,) + assert_equal(residuals.shape, expect_resids.shape) + else: + expect_resids = np.array([]).view(type(x)) + assert_almost_equal(residuals, expect_resids) + assert_(np.issubdtype(residuals.dtype, np.floating)) + assert_(consistent_subclass(x, b)) + assert_(consistent_subclass(residuals, b)) + + +class TestLstsq(LstsqCases): + def test_future_rcond(self): + a = np.array([[0., 1., 0., 1., 2., 0.], + [0., 2., 0., 0., 1., 0.], + [1., 0., 1., 0., 0., 4.], + [0., 0., 0., 2., 3., 0.]]).T + + b = np.array([1, 0, 0, 0, 0, 0]) + with suppress_warnings() as sup: + w = sup.record(FutureWarning, "`rcond` parameter will change") + x, residuals, rank, s = linalg.lstsq(a, b) + assert_(rank == 4) + x, residuals, rank, s = linalg.lstsq(a, b, rcond=-1) + assert_(rank == 4) + x, residuals, rank, s = linalg.lstsq(a, b, rcond=None) + assert_(rank == 3) + # Warning should be raised exactly once (first command) + assert_(len(w) == 1) + + @pytest.mark.parametrize(["m", "n", "n_rhs"], [ + (4, 2, 2), + (0, 4, 1), + (0, 4, 2), + (4, 0, 1), + (4, 0, 2), + (4, 2, 0), + (0, 0, 0) + ]) + def test_empty_a_b(self, m, n, n_rhs): + a = np.arange(m * n).reshape(m, n) + b = np.ones((m, n_rhs)) + x, residuals, rank, s = linalg.lstsq(a, b, rcond=None) + if m == 0: + assert_((x == 0).all()) + assert_equal(x.shape, (n, n_rhs)) + assert_equal(residuals.shape, ((n_rhs,) if m > n else (0,))) + if m > n and n_rhs > 0: + # residuals are exactly the squared norms of b's columns + r = b - np.dot(a, x) + assert_almost_equal(residuals, (r * r).sum(axis=-2)) + assert_equal(rank, min(m, n)) + assert_equal(s.shape, (min(m, n),)) + + def test_incompatible_dims(self): + # use modified version of docstring example + x = np.array([0, 1, 2, 3]) + y = np.array([-1, 0.2, 0.9, 2.1, 3.3]) + A = np.vstack([x, np.ones(len(x))]).T + with assert_raises_regex(LinAlgError, "Incompatible dimensions"): + linalg.lstsq(A, y, rcond=None) + + +@pytest.mark.parametrize('dt', [np.dtype(c) for c in '?bBhHiIqQefdgFDGO']) +class TestMatrixPower: + + rshft_0 = np.eye(4) + rshft_1 = rshft_0[[3, 0, 1, 2]] + rshft_2 = rshft_0[[2, 3, 0, 1]] + rshft_3 = rshft_0[[1, 2, 3, 0]] + rshft_all = [rshft_0, rshft_1, rshft_2, rshft_3] + noninv = array([[1, 0], [0, 0]]) + stacked = np.block([[[rshft_0]]]*2) + #FIXME the 'e' dtype might work in future + dtnoinv = [object, np.dtype('e'), np.dtype('g'), np.dtype('G')] + + def test_large_power(self, dt): + rshft = self.rshft_1.astype(dt) + assert_equal( + matrix_power(rshft, 2**100 + 2**10 + 2**5 + 0), self.rshft_0) + assert_equal( + matrix_power(rshft, 2**100 + 2**10 + 2**5 + 1), self.rshft_1) + assert_equal( + matrix_power(rshft, 2**100 + 2**10 + 2**5 + 2), self.rshft_2) + assert_equal( + matrix_power(rshft, 2**100 + 2**10 + 2**5 + 3), self.rshft_3) + + def test_power_is_zero(self, dt): + def tz(M): + mz = matrix_power(M, 0) + assert_equal(mz, identity_like_generalized(M)) + assert_equal(mz.dtype, M.dtype) + + for mat in self.rshft_all: + tz(mat.astype(dt)) + if dt != object: + tz(self.stacked.astype(dt)) + + def test_power_is_one(self, dt): + def tz(mat): + mz = matrix_power(mat, 1) + assert_equal(mz, mat) + assert_equal(mz.dtype, mat.dtype) + + for mat in self.rshft_all: + tz(mat.astype(dt)) + if dt != object: + tz(self.stacked.astype(dt)) + + def test_power_is_two(self, dt): + def tz(mat): + mz = matrix_power(mat, 2) + mmul = matmul if mat.dtype != object else dot + assert_equal(mz, mmul(mat, mat)) + assert_equal(mz.dtype, mat.dtype) + + for mat in self.rshft_all: + tz(mat.astype(dt)) + if dt != object: + tz(self.stacked.astype(dt)) + + def test_power_is_minus_one(self, dt): + def tz(mat): + invmat = matrix_power(mat, -1) + mmul = matmul if mat.dtype != object else dot + assert_almost_equal( + mmul(invmat, mat), identity_like_generalized(mat)) + + for mat in self.rshft_all: + if dt not in self.dtnoinv: + tz(mat.astype(dt)) + + def test_exceptions_bad_power(self, dt): + mat = self.rshft_0.astype(dt) + assert_raises(TypeError, matrix_power, mat, 1.5) + assert_raises(TypeError, matrix_power, mat, [1]) + + def test_exceptions_non_square(self, dt): + assert_raises(LinAlgError, matrix_power, np.array([1], dt), 1) + assert_raises(LinAlgError, matrix_power, np.array([[1], [2]], dt), 1) + assert_raises(LinAlgError, matrix_power, np.ones((4, 3, 2), dt), 1) + + def test_exceptions_not_invertible(self, dt): + if dt in self.dtnoinv: + return + mat = self.noninv.astype(dt) + assert_raises(LinAlgError, matrix_power, mat, -1) + + +class TestEigvalshCases(HermitianTestCase, HermitianGeneralizedTestCase): + + def do(self, a, b, tags): + # note that eigenvalue arrays returned by eig must be sorted since + # their order isn't guaranteed. + ev = linalg.eigvalsh(a, 'L') + evalues, evectors = linalg.eig(a) + evalues.sort(axis=-1) + assert_allclose(ev, evalues, rtol=get_rtol(ev.dtype)) + + ev2 = linalg.eigvalsh(a, 'U') + assert_allclose(ev2, evalues, rtol=get_rtol(ev.dtype)) + + +class TestEigvalsh: + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + w = np.linalg.eigvalsh(x) + assert_equal(w.dtype, get_real_dtype(dtype)) + + def test_invalid(self): + x = np.array([[1, 0.5], [0.5, 1]], dtype=np.float32) + assert_raises(ValueError, np.linalg.eigvalsh, x, UPLO="lrong") + assert_raises(ValueError, np.linalg.eigvalsh, x, "lower") + assert_raises(ValueError, np.linalg.eigvalsh, x, "upper") + + def test_UPLO(self): + Klo = np.array([[0, 0], [1, 0]], dtype=np.double) + Kup = np.array([[0, 1], [0, 0]], dtype=np.double) + tgt = np.array([-1, 1], dtype=np.double) + rtol = get_rtol(np.double) + + # Check default is 'L' + w = np.linalg.eigvalsh(Klo) + assert_allclose(w, tgt, rtol=rtol) + # Check 'L' + w = np.linalg.eigvalsh(Klo, UPLO='L') + assert_allclose(w, tgt, rtol=rtol) + # Check 'l' + w = np.linalg.eigvalsh(Klo, UPLO='l') + assert_allclose(w, tgt, rtol=rtol) + # Check 'U' + w = np.linalg.eigvalsh(Kup, UPLO='U') + assert_allclose(w, tgt, rtol=rtol) + # Check 'u' + w = np.linalg.eigvalsh(Kup, UPLO='u') + assert_allclose(w, tgt, rtol=rtol) + + def test_0_size(self): + # Check that all kinds of 0-sized arrays work + class ArraySubclass(np.ndarray): + pass + a = np.zeros((0, 1, 1), dtype=np.int_).view(ArraySubclass) + res = linalg.eigvalsh(a) + assert_(res.dtype.type is np.float64) + assert_equal((0, 1), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(res, np.ndarray)) + + a = np.zeros((0, 0), dtype=np.complex64).view(ArraySubclass) + res = linalg.eigvalsh(a) + assert_(res.dtype.type is np.float32) + assert_equal((0,), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(res, np.ndarray)) + + +class TestEighCases(HermitianTestCase, HermitianGeneralizedTestCase): + + def do(self, a, b, tags): + # note that eigenvalue arrays returned by eig must be sorted since + # their order isn't guaranteed. + ev, evc = linalg.eigh(a) + evalues, evectors = linalg.eig(a) + evalues.sort(axis=-1) + assert_almost_equal(ev, evalues) + + assert_allclose(dot_generalized(a, evc), + np.asarray(ev)[..., None, :] * np.asarray(evc), + rtol=get_rtol(ev.dtype)) + + ev2, evc2 = linalg.eigh(a, 'U') + assert_almost_equal(ev2, evalues) + + assert_allclose(dot_generalized(a, evc2), + np.asarray(ev2)[..., None, :] * np.asarray(evc2), + rtol=get_rtol(ev.dtype), err_msg=repr(a)) + + +class TestEigh: + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + w, v = np.linalg.eigh(x) + assert_equal(w.dtype, get_real_dtype(dtype)) + assert_equal(v.dtype, dtype) + + def test_invalid(self): + x = np.array([[1, 0.5], [0.5, 1]], dtype=np.float32) + assert_raises(ValueError, np.linalg.eigh, x, UPLO="lrong") + assert_raises(ValueError, np.linalg.eigh, x, "lower") + assert_raises(ValueError, np.linalg.eigh, x, "upper") + + def test_UPLO(self): + Klo = np.array([[0, 0], [1, 0]], dtype=np.double) + Kup = np.array([[0, 1], [0, 0]], dtype=np.double) + tgt = np.array([-1, 1], dtype=np.double) + rtol = get_rtol(np.double) + + # Check default is 'L' + w, v = np.linalg.eigh(Klo) + assert_allclose(w, tgt, rtol=rtol) + # Check 'L' + w, v = np.linalg.eigh(Klo, UPLO='L') + assert_allclose(w, tgt, rtol=rtol) + # Check 'l' + w, v = np.linalg.eigh(Klo, UPLO='l') + assert_allclose(w, tgt, rtol=rtol) + # Check 'U' + w, v = np.linalg.eigh(Kup, UPLO='U') + assert_allclose(w, tgt, rtol=rtol) + # Check 'u' + w, v = np.linalg.eigh(Kup, UPLO='u') + assert_allclose(w, tgt, rtol=rtol) + + def test_0_size(self): + # Check that all kinds of 0-sized arrays work + class ArraySubclass(np.ndarray): + pass + a = np.zeros((0, 1, 1), dtype=np.int_).view(ArraySubclass) + res, res_v = linalg.eigh(a) + assert_(res_v.dtype.type is np.float64) + assert_(res.dtype.type is np.float64) + assert_equal(a.shape, res_v.shape) + assert_equal((0, 1), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(a, np.ndarray)) + + a = np.zeros((0, 0), dtype=np.complex64).view(ArraySubclass) + res, res_v = linalg.eigh(a) + assert_(res_v.dtype.type is np.complex64) + assert_(res.dtype.type is np.float32) + assert_equal(a.shape, res_v.shape) + assert_equal((0,), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(a, np.ndarray)) + + +class _TestNormBase: + dt = None + dec = None + + +class _TestNormGeneral(_TestNormBase): + + def test_empty(self): + assert_equal(norm([]), 0.0) + assert_equal(norm(array([], dtype=self.dt)), 0.0) + assert_equal(norm(atleast_2d(array([], dtype=self.dt))), 0.0) + + def test_vector_return_type(self): + a = np.array([1, 0, 1]) + + exact_types = np.typecodes['AllInteger'] + inexact_types = np.typecodes['AllFloat'] + + all_types = exact_types + inexact_types + + for each_inexact_types in all_types: + at = a.astype(each_inexact_types) + + an = norm(at, -np.inf) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 0.0) + + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "divide by zero encountered") + an = norm(at, -1) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 0.0) + + an = norm(at, 0) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 2) + + an = norm(at, 1) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 2.0) + + an = norm(at, 2) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, an.dtype.type(2.0)**an.dtype.type(1.0/2.0)) + + an = norm(at, 4) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, an.dtype.type(2.0)**an.dtype.type(1.0/4.0)) + + an = norm(at, np.inf) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 1.0) + + def test_vector(self): + a = [1, 2, 3, 4] + b = [-1, -2, -3, -4] + c = [-1, 2, -3, 4] + + def _test(v): + np.testing.assert_almost_equal(norm(v), 30 ** 0.5, + decimal=self.dec) + np.testing.assert_almost_equal(norm(v, inf), 4.0, + decimal=self.dec) + np.testing.assert_almost_equal(norm(v, -inf), 1.0, + decimal=self.dec) + np.testing.assert_almost_equal(norm(v, 1), 10.0, + decimal=self.dec) + np.testing.assert_almost_equal(norm(v, -1), 12.0 / 25, + decimal=self.dec) + np.testing.assert_almost_equal(norm(v, 2), 30 ** 0.5, + decimal=self.dec) + np.testing.assert_almost_equal(norm(v, -2), ((205. / 144) ** -0.5), + decimal=self.dec) + np.testing.assert_almost_equal(norm(v, 0), 4, + decimal=self.dec) + + for v in (a, b, c,): + _test(v) + + for v in (array(a, dtype=self.dt), array(b, dtype=self.dt), + array(c, dtype=self.dt)): + _test(v) + + def test_axis(self): + # Vector norms. + # Compare the use of `axis` with computing the norm of each row + # or column separately. + A = array([[1, 2, 3], [4, 5, 6]], dtype=self.dt) + for order in [None, -1, 0, 1, 2, 3, np.Inf, -np.Inf]: + expected0 = [norm(A[:, k], ord=order) for k in range(A.shape[1])] + assert_almost_equal(norm(A, ord=order, axis=0), expected0) + expected1 = [norm(A[k, :], ord=order) for k in range(A.shape[0])] + assert_almost_equal(norm(A, ord=order, axis=1), expected1) + + # Matrix norms. + B = np.arange(1, 25, dtype=self.dt).reshape(2, 3, 4) + nd = B.ndim + for order in [None, -2, 2, -1, 1, np.Inf, -np.Inf, 'fro']: + for axis in itertools.combinations(range(-nd, nd), 2): + row_axis, col_axis = axis + if row_axis < 0: + row_axis += nd + if col_axis < 0: + col_axis += nd + if row_axis == col_axis: + assert_raises(ValueError, norm, B, ord=order, axis=axis) + else: + n = norm(B, ord=order, axis=axis) + + # The logic using k_index only works for nd = 3. + # This has to be changed if nd is increased. + k_index = nd - (row_axis + col_axis) + if row_axis < col_axis: + expected = [norm(B[:].take(k, axis=k_index), ord=order) + for k in range(B.shape[k_index])] + else: + expected = [norm(B[:].take(k, axis=k_index).T, ord=order) + for k in range(B.shape[k_index])] + assert_almost_equal(n, expected) + + def test_keepdims(self): + A = np.arange(1, 25, dtype=self.dt).reshape(2, 3, 4) + + allclose_err = 'order {0}, axis = {1}' + shape_err = 'Shape mismatch found {0}, expected {1}, order={2}, axis={3}' + + # check the order=None, axis=None case + expected = norm(A, ord=None, axis=None) + found = norm(A, ord=None, axis=None, keepdims=True) + assert_allclose(np.squeeze(found), expected, + err_msg=allclose_err.format(None, None)) + expected_shape = (1, 1, 1) + assert_(found.shape == expected_shape, + shape_err.format(found.shape, expected_shape, None, None)) + + # Vector norms. + for order in [None, -1, 0, 1, 2, 3, np.Inf, -np.Inf]: + for k in range(A.ndim): + expected = norm(A, ord=order, axis=k) + found = norm(A, ord=order, axis=k, keepdims=True) + assert_allclose(np.squeeze(found), expected, + err_msg=allclose_err.format(order, k)) + expected_shape = list(A.shape) + expected_shape[k] = 1 + expected_shape = tuple(expected_shape) + assert_(found.shape == expected_shape, + shape_err.format(found.shape, expected_shape, order, k)) + + # Matrix norms. + for order in [None, -2, 2, -1, 1, np.Inf, -np.Inf, 'fro', 'nuc']: + for k in itertools.permutations(range(A.ndim), 2): + expected = norm(A, ord=order, axis=k) + found = norm(A, ord=order, axis=k, keepdims=True) + assert_allclose(np.squeeze(found), expected, + err_msg=allclose_err.format(order, k)) + expected_shape = list(A.shape) + expected_shape[k[0]] = 1 + expected_shape[k[1]] = 1 + expected_shape = tuple(expected_shape) + assert_(found.shape == expected_shape, + shape_err.format(found.shape, expected_shape, order, k)) + + +class _TestNorm2D(_TestNormBase): + # Define the part for 2d arrays separately, so we can subclass this + # and run the tests using np.matrix in matrixlib.tests.test_matrix_linalg. + array = np.array + + def test_matrix_empty(self): + assert_equal(norm(self.array([[]], dtype=self.dt)), 0.0) + + def test_matrix_return_type(self): + a = self.array([[1, 0, 1], [0, 1, 1]]) + + exact_types = np.typecodes['AllInteger'] + + # float32, complex64, float64, complex128 types are the only types + # allowed by `linalg`, which performs the matrix operations used + # within `norm`. + inexact_types = 'fdFD' + + all_types = exact_types + inexact_types + + for each_inexact_types in all_types: + at = a.astype(each_inexact_types) + + an = norm(at, -np.inf) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 2.0) + + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "divide by zero encountered") + an = norm(at, -1) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 1.0) + + an = norm(at, 1) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 2.0) + + an = norm(at, 2) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 3.0**(1.0/2.0)) + + an = norm(at, -2) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 1.0) + + an = norm(at, np.inf) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 2.0) + + an = norm(at, 'fro') + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 2.0) + + an = norm(at, 'nuc') + assert_(issubclass(an.dtype.type, np.floating)) + # Lower bar needed to support low precision floats. + # They end up being off by 1 in the 7th place. + np.testing.assert_almost_equal(an, 2.7320508075688772, decimal=6) + + def test_matrix_2x2(self): + A = self.array([[1, 3], [5, 7]], dtype=self.dt) + assert_almost_equal(norm(A), 84 ** 0.5) + assert_almost_equal(norm(A, 'fro'), 84 ** 0.5) + assert_almost_equal(norm(A, 'nuc'), 10.0) + assert_almost_equal(norm(A, inf), 12.0) + assert_almost_equal(norm(A, -inf), 4.0) + assert_almost_equal(norm(A, 1), 10.0) + assert_almost_equal(norm(A, -1), 6.0) + assert_almost_equal(norm(A, 2), 9.1231056256176615) + assert_almost_equal(norm(A, -2), 0.87689437438234041) + + assert_raises(ValueError, norm, A, 'nofro') + assert_raises(ValueError, norm, A, -3) + assert_raises(ValueError, norm, A, 0) + + def test_matrix_3x3(self): + # This test has been added because the 2x2 example + # happened to have equal nuclear norm and induced 1-norm. + # The 1/10 scaling factor accommodates the absolute tolerance + # used in assert_almost_equal. + A = (1 / 10) * \ + self.array([[1, 2, 3], [6, 0, 5], [3, 2, 1]], dtype=self.dt) + assert_almost_equal(norm(A), (1 / 10) * 89 ** 0.5) + assert_almost_equal(norm(A, 'fro'), (1 / 10) * 89 ** 0.5) + assert_almost_equal(norm(A, 'nuc'), 1.3366836911774836) + assert_almost_equal(norm(A, inf), 1.1) + assert_almost_equal(norm(A, -inf), 0.6) + assert_almost_equal(norm(A, 1), 1.0) + assert_almost_equal(norm(A, -1), 0.4) + assert_almost_equal(norm(A, 2), 0.88722940323461277) + assert_almost_equal(norm(A, -2), 0.19456584790481812) + + def test_bad_args(self): + # Check that bad arguments raise the appropriate exceptions. + + A = self.array([[1, 2, 3], [4, 5, 6]], dtype=self.dt) + B = np.arange(1, 25, dtype=self.dt).reshape(2, 3, 4) + + # Using `axis=` or passing in a 1-D array implies vector + # norms are being computed, so also using `ord='fro'` + # or `ord='nuc'` or any other string raises a ValueError. + assert_raises(ValueError, norm, A, 'fro', 0) + assert_raises(ValueError, norm, A, 'nuc', 0) + assert_raises(ValueError, norm, [3, 4], 'fro', None) + assert_raises(ValueError, norm, [3, 4], 'nuc', None) + assert_raises(ValueError, norm, [3, 4], 'test', None) + + # Similarly, norm should raise an exception when ord is any finite + # number other than 1, 2, -1 or -2 when computing matrix norms. + for order in [0, 3]: + assert_raises(ValueError, norm, A, order, None) + assert_raises(ValueError, norm, A, order, (0, 1)) + assert_raises(ValueError, norm, B, order, (1, 2)) + + # Invalid axis + assert_raises(np.AxisError, norm, B, None, 3) + assert_raises(np.AxisError, norm, B, None, (2, 3)) + assert_raises(ValueError, norm, B, None, (0, 1, 2)) + + +class _TestNorm(_TestNorm2D, _TestNormGeneral): + pass + + +class TestNorm_NonSystematic: + + def test_longdouble_norm(self): + # Non-regression test: p-norm of longdouble would previously raise + # UnboundLocalError. + x = np.arange(10, dtype=np.longdouble) + old_assert_almost_equal(norm(x, ord=3), 12.65, decimal=2) + + def test_intmin(self): + # Non-regression test: p-norm of signed integer would previously do + # float cast and abs in the wrong order. + x = np.array([-2 ** 31], dtype=np.int32) + old_assert_almost_equal(norm(x, ord=3), 2 ** 31, decimal=5) + + def test_complex_high_ord(self): + # gh-4156 + d = np.empty((2,), dtype=np.clongdouble) + d[0] = 6 + 7j + d[1] = -6 + 7j + res = 11.615898132184 + old_assert_almost_equal(np.linalg.norm(d, ord=3), res, decimal=10) + d = d.astype(np.complex128) + old_assert_almost_equal(np.linalg.norm(d, ord=3), res, decimal=9) + d = d.astype(np.complex64) + old_assert_almost_equal(np.linalg.norm(d, ord=3), res, decimal=5) + + +# Separate definitions so we can use them for matrix tests. +class _TestNormDoubleBase(_TestNormBase): + dt = np.double + dec = 12 + + +class _TestNormSingleBase(_TestNormBase): + dt = np.float32 + dec = 6 + + +class _TestNormInt64Base(_TestNormBase): + dt = np.int64 + dec = 12 + + +class TestNormDouble(_TestNorm, _TestNormDoubleBase): + pass + + +class TestNormSingle(_TestNorm, _TestNormSingleBase): + pass + + +class TestNormInt64(_TestNorm, _TestNormInt64Base): + pass + + +class TestMatrixRank: + + def test_matrix_rank(self): + # Full rank matrix + assert_equal(4, matrix_rank(np.eye(4))) + # rank deficient matrix + I = np.eye(4) + I[-1, -1] = 0. + assert_equal(matrix_rank(I), 3) + # All zeros - zero rank + assert_equal(matrix_rank(np.zeros((4, 4))), 0) + # 1 dimension - rank 1 unless all 0 + assert_equal(matrix_rank([1, 0, 0, 0]), 1) + assert_equal(matrix_rank(np.zeros((4,))), 0) + # accepts array-like + assert_equal(matrix_rank([1]), 1) + # greater than 2 dimensions treated as stacked matrices + ms = np.array([I, np.eye(4), np.zeros((4,4))]) + assert_equal(matrix_rank(ms), np.array([3, 4, 0])) + # works on scalar + assert_equal(matrix_rank(1), 1) + + def test_symmetric_rank(self): + assert_equal(4, matrix_rank(np.eye(4), hermitian=True)) + assert_equal(1, matrix_rank(np.ones((4, 4)), hermitian=True)) + assert_equal(0, matrix_rank(np.zeros((4, 4)), hermitian=True)) + # rank deficient matrix + I = np.eye(4) + I[-1, -1] = 0. + assert_equal(3, matrix_rank(I, hermitian=True)) + # manually supplied tolerance + I[-1, -1] = 1e-8 + assert_equal(4, matrix_rank(I, hermitian=True, tol=0.99e-8)) + assert_equal(3, matrix_rank(I, hermitian=True, tol=1.01e-8)) + + +def test_reduced_rank(): + # Test matrices with reduced rank + rng = np.random.RandomState(20120714) + for i in range(100): + # Make a rank deficient matrix + X = rng.normal(size=(40, 10)) + X[:, 0] = X[:, 1] + X[:, 2] + # Assert that matrix_rank detected deficiency + assert_equal(matrix_rank(X), 9) + X[:, 3] = X[:, 4] + X[:, 5] + assert_equal(matrix_rank(X), 8) + + +class TestQR: + # Define the array class here, so run this on matrices elsewhere. + array = np.array + + def check_qr(self, a): + # This test expects the argument `a` to be an ndarray or + # a subclass of an ndarray of inexact type. + a_type = type(a) + a_dtype = a.dtype + m, n = a.shape + k = min(m, n) + + # mode == 'complete' + q, r = linalg.qr(a, mode='complete') + assert_(q.dtype == a_dtype) + assert_(r.dtype == a_dtype) + assert_(isinstance(q, a_type)) + assert_(isinstance(r, a_type)) + assert_(q.shape == (m, m)) + assert_(r.shape == (m, n)) + assert_almost_equal(dot(q, r), a) + assert_almost_equal(dot(q.T.conj(), q), np.eye(m)) + assert_almost_equal(np.triu(r), r) + + # mode == 'reduced' + q1, r1 = linalg.qr(a, mode='reduced') + assert_(q1.dtype == a_dtype) + assert_(r1.dtype == a_dtype) + assert_(isinstance(q1, a_type)) + assert_(isinstance(r1, a_type)) + assert_(q1.shape == (m, k)) + assert_(r1.shape == (k, n)) + assert_almost_equal(dot(q1, r1), a) + assert_almost_equal(dot(q1.T.conj(), q1), np.eye(k)) + assert_almost_equal(np.triu(r1), r1) + + # mode == 'r' + r2 = linalg.qr(a, mode='r') + assert_(r2.dtype == a_dtype) + assert_(isinstance(r2, a_type)) + assert_almost_equal(r2, r1) + + + @pytest.mark.parametrize(["m", "n"], [ + (3, 0), + (0, 3), + (0, 0) + ]) + def test_qr_empty(self, m, n): + k = min(m, n) + a = np.empty((m, n)) + + self.check_qr(a) + + h, tau = np.linalg.qr(a, mode='raw') + assert_equal(h.dtype, np.double) + assert_equal(tau.dtype, np.double) + assert_equal(h.shape, (n, m)) + assert_equal(tau.shape, (k,)) + + def test_mode_raw(self): + # The factorization is not unique and varies between libraries, + # so it is not possible to check against known values. Functional + # testing is a possibility, but awaits the exposure of more + # of the functions in lapack_lite. Consequently, this test is + # very limited in scope. Note that the results are in FORTRAN + # order, hence the h arrays are transposed. + a = self.array([[1, 2], [3, 4], [5, 6]], dtype=np.double) + + # Test double + h, tau = linalg.qr(a, mode='raw') + assert_(h.dtype == np.double) + assert_(tau.dtype == np.double) + assert_(h.shape == (2, 3)) + assert_(tau.shape == (2,)) + + h, tau = linalg.qr(a.T, mode='raw') + assert_(h.dtype == np.double) + assert_(tau.dtype == np.double) + assert_(h.shape == (3, 2)) + assert_(tau.shape == (2,)) + + def test_mode_all_but_economic(self): + a = self.array([[1, 2], [3, 4]]) + b = self.array([[1, 2], [3, 4], [5, 6]]) + for dt in "fd": + m1 = a.astype(dt) + m2 = b.astype(dt) + self.check_qr(m1) + self.check_qr(m2) + self.check_qr(m2.T) + + for dt in "fd": + m1 = 1 + 1j * a.astype(dt) + m2 = 1 + 1j * b.astype(dt) + self.check_qr(m1) + self.check_qr(m2) + self.check_qr(m2.T) + + def check_qr_stacked(self, a): + # This test expects the argument `a` to be an ndarray or + # a subclass of an ndarray of inexact type. + a_type = type(a) + a_dtype = a.dtype + m, n = a.shape[-2:] + k = min(m, n) + + # mode == 'complete' + q, r = linalg.qr(a, mode='complete') + assert_(q.dtype == a_dtype) + assert_(r.dtype == a_dtype) + assert_(isinstance(q, a_type)) + assert_(isinstance(r, a_type)) + assert_(q.shape[-2:] == (m, m)) + assert_(r.shape[-2:] == (m, n)) + assert_almost_equal(matmul(q, r), a) + I_mat = np.identity(q.shape[-1]) + stack_I_mat = np.broadcast_to(I_mat, + q.shape[:-2] + (q.shape[-1],)*2) + assert_almost_equal(matmul(swapaxes(q, -1, -2).conj(), q), stack_I_mat) + assert_almost_equal(np.triu(r[..., :, :]), r) + + # mode == 'reduced' + q1, r1 = linalg.qr(a, mode='reduced') + assert_(q1.dtype == a_dtype) + assert_(r1.dtype == a_dtype) + assert_(isinstance(q1, a_type)) + assert_(isinstance(r1, a_type)) + assert_(q1.shape[-2:] == (m, k)) + assert_(r1.shape[-2:] == (k, n)) + assert_almost_equal(matmul(q1, r1), a) + I_mat = np.identity(q1.shape[-1]) + stack_I_mat = np.broadcast_to(I_mat, + q1.shape[:-2] + (q1.shape[-1],)*2) + assert_almost_equal(matmul(swapaxes(q1, -1, -2).conj(), q1), + stack_I_mat) + assert_almost_equal(np.triu(r1[..., :, :]), r1) + + # mode == 'r' + r2 = linalg.qr(a, mode='r') + assert_(r2.dtype == a_dtype) + assert_(isinstance(r2, a_type)) + assert_almost_equal(r2, r1) + + @pytest.mark.parametrize("size", [ + (3, 4), (4, 3), (4, 4), + (3, 0), (0, 3)]) + @pytest.mark.parametrize("outer_size", [ + (2, 2), (2,), (2, 3, 4)]) + @pytest.mark.parametrize("dt", [ + np.single, np.double, + np.csingle, np.cdouble]) + def test_stacked_inputs(self, outer_size, size, dt): + + A = np.random.normal(size=outer_size + size).astype(dt) + B = np.random.normal(size=outer_size + size).astype(dt) + self.check_qr_stacked(A) + self.check_qr_stacked(A + 1.j*B) + + +class TestCholesky: + # TODO: are there no other tests for cholesky? + + def test_basic_property(self): + # Check A = L L^H + shapes = [(1, 1), (2, 2), (3, 3), (50, 50), (3, 10, 10)] + dtypes = (np.float32, np.float64, np.complex64, np.complex128) + + for shape, dtype in itertools.product(shapes, dtypes): + np.random.seed(1) + a = np.random.randn(*shape) + if np.issubdtype(dtype, np.complexfloating): + a = a + 1j*np.random.randn(*shape) + + t = list(range(len(shape))) + t[-2:] = -1, -2 + + a = np.matmul(a.transpose(t).conj(), a) + a = np.asarray(a, dtype=dtype) + + c = np.linalg.cholesky(a) + + b = np.matmul(c, c.transpose(t).conj()) + assert_allclose(b, a, + err_msg=f'{shape} {dtype}\n{a}\n{c}', + atol=500 * a.shape[0] * np.finfo(dtype).eps) + + def test_0_size(self): + class ArraySubclass(np.ndarray): + pass + a = np.zeros((0, 1, 1), dtype=np.int_).view(ArraySubclass) + res = linalg.cholesky(a) + assert_equal(a.shape, res.shape) + assert_(res.dtype.type is np.float64) + # for documentation purpose: + assert_(isinstance(res, np.ndarray)) + + a = np.zeros((1, 0, 0), dtype=np.complex64).view(ArraySubclass) + res = linalg.cholesky(a) + assert_equal(a.shape, res.shape) + assert_(res.dtype.type is np.complex64) + assert_(isinstance(res, np.ndarray)) + + +def test_byteorder_check(): + # Byte order check should pass for native order + if sys.byteorder == 'little': + native = '<' + else: + native = '>' + + for dtt in (np.float32, np.float64): + arr = np.eye(4, dtype=dtt) + n_arr = arr.newbyteorder(native) + sw_arr = arr.newbyteorder('S').byteswap() + assert_equal(arr.dtype.byteorder, '=') + for routine in (linalg.inv, linalg.det, linalg.pinv): + # Normal call + res = routine(arr) + # Native but not '=' + assert_array_equal(res, routine(n_arr)) + # Swapped + assert_array_equal(res, routine(sw_arr)) + + +def test_generalized_raise_multiloop(): + # It should raise an error even if the error doesn't occur in the + # last iteration of the ufunc inner loop + + invertible = np.array([[1, 2], [3, 4]]) + non_invertible = np.array([[1, 1], [1, 1]]) + + x = np.zeros([4, 4, 2, 2])[1::2] + x[...] = invertible + x[0, 0] = non_invertible + + assert_raises(np.linalg.LinAlgError, np.linalg.inv, x) + + +def test_xerbla_override(): + # Check that our xerbla has been successfully linked in. If it is not, + # the default xerbla routine is called, which prints a message to stdout + # and may, or may not, abort the process depending on the LAPACK package. + + XERBLA_OK = 255 + + try: + pid = os.fork() + except (OSError, AttributeError): + # fork failed, or not running on POSIX + pytest.skip("Not POSIX or fork failed.") + + if pid == 0: + # child; close i/o file handles + os.close(1) + os.close(0) + # Avoid producing core files. + import resource + resource.setrlimit(resource.RLIMIT_CORE, (0, 0)) + # These calls may abort. + try: + np.linalg.lapack_lite.xerbla() + except ValueError: + pass + except Exception: + os._exit(os.EX_CONFIG) + + try: + a = np.array([[1.]]) + np.linalg.lapack_lite.dorgqr( + 1, 1, 1, a, + 0, # <- invalid value + a, a, 0, 0) + except ValueError as e: + if "DORGQR parameter number 5" in str(e): + # success, reuse error code to mark success as + # FORTRAN STOP returns as success. + os._exit(XERBLA_OK) + + # Did not abort, but our xerbla was not linked in. + os._exit(os.EX_CONFIG) + else: + # parent + pid, status = os.wait() + if os.WEXITSTATUS(status) != XERBLA_OK: + pytest.skip('Numpy xerbla not linked in.') + + +@pytest.mark.slow +def test_sdot_bug_8577(): + # Regression test that loading certain other libraries does not + # result to wrong results in float32 linear algebra. + # + # There's a bug gh-8577 on OSX that can trigger this, and perhaps + # there are also other situations in which it occurs. + # + # Do the check in a separate process. + + bad_libs = ['PyQt5.QtWidgets', 'IPython'] + + template = textwrap.dedent(""" + import sys + {before} + try: + import {bad_lib} + except ImportError: + sys.exit(0) + {after} + x = np.ones(2, dtype=np.float32) + sys.exit(0 if np.allclose(x.dot(x), 2.0) else 1) + """) + + for bad_lib in bad_libs: + code = template.format(before="import numpy as np", after="", + bad_lib=bad_lib) + subprocess.check_call([sys.executable, "-c", code]) + + # Swapped import order + code = template.format(after="import numpy as np", before="", + bad_lib=bad_lib) + subprocess.check_call([sys.executable, "-c", code]) + + +class TestMultiDot: + + def test_basic_function_with_three_arguments(self): + # multi_dot with three arguments uses a fast hand coded algorithm to + # determine the optimal order. Therefore test it separately. + A = np.random.random((6, 2)) + B = np.random.random((2, 6)) + C = np.random.random((6, 2)) + + assert_almost_equal(multi_dot([A, B, C]), A.dot(B).dot(C)) + assert_almost_equal(multi_dot([A, B, C]), np.dot(A, np.dot(B, C))) + + def test_basic_function_with_two_arguments(self): + # separate code path with two arguments + A = np.random.random((6, 2)) + B = np.random.random((2, 6)) + + assert_almost_equal(multi_dot([A, B]), A.dot(B)) + assert_almost_equal(multi_dot([A, B]), np.dot(A, B)) + + def test_basic_function_with_dynamic_programming_optimization(self): + # multi_dot with four or more arguments uses the dynamic programming + # optimization and therefore deserve a separate + A = np.random.random((6, 2)) + B = np.random.random((2, 6)) + C = np.random.random((6, 2)) + D = np.random.random((2, 1)) + assert_almost_equal(multi_dot([A, B, C, D]), A.dot(B).dot(C).dot(D)) + + def test_vector_as_first_argument(self): + # The first argument can be 1-D + A1d = np.random.random(2) # 1-D + B = np.random.random((2, 6)) + C = np.random.random((6, 2)) + D = np.random.random((2, 2)) + + # the result should be 1-D + assert_equal(multi_dot([A1d, B, C, D]).shape, (2,)) + + def test_vector_as_last_argument(self): + # The last argument can be 1-D + A = np.random.random((6, 2)) + B = np.random.random((2, 6)) + C = np.random.random((6, 2)) + D1d = np.random.random(2) # 1-D + + # the result should be 1-D + assert_equal(multi_dot([A, B, C, D1d]).shape, (6,)) + + def test_vector_as_first_and_last_argument(self): + # The first and last arguments can be 1-D + A1d = np.random.random(2) # 1-D + B = np.random.random((2, 6)) + C = np.random.random((6, 2)) + D1d = np.random.random(2) # 1-D + + # the result should be a scalar + assert_equal(multi_dot([A1d, B, C, D1d]).shape, ()) + + def test_three_arguments_and_out(self): + # multi_dot with three arguments uses a fast hand coded algorithm to + # determine the optimal order. Therefore test it separately. + A = np.random.random((6, 2)) + B = np.random.random((2, 6)) + C = np.random.random((6, 2)) + + out = np.zeros((6, 2)) + ret = multi_dot([A, B, C], out=out) + assert out is ret + assert_almost_equal(out, A.dot(B).dot(C)) + assert_almost_equal(out, np.dot(A, np.dot(B, C))) + + def test_two_arguments_and_out(self): + # separate code path with two arguments + A = np.random.random((6, 2)) + B = np.random.random((2, 6)) + out = np.zeros((6, 6)) + ret = multi_dot([A, B], out=out) + assert out is ret + assert_almost_equal(out, A.dot(B)) + assert_almost_equal(out, np.dot(A, B)) + + def test_dynamic_programming_optimization_and_out(self): + # multi_dot with four or more arguments uses the dynamic programming + # optimization and therefore deserve a separate test + A = np.random.random((6, 2)) + B = np.random.random((2, 6)) + C = np.random.random((6, 2)) + D = np.random.random((2, 1)) + out = np.zeros((6, 1)) + ret = multi_dot([A, B, C, D], out=out) + assert out is ret + assert_almost_equal(out, A.dot(B).dot(C).dot(D)) + + def test_dynamic_programming_logic(self): + # Test for the dynamic programming part + # This test is directly taken from Cormen page 376. + arrays = [np.random.random((30, 35)), + np.random.random((35, 15)), + np.random.random((15, 5)), + np.random.random((5, 10)), + np.random.random((10, 20)), + np.random.random((20, 25))] + m_expected = np.array([[0., 15750., 7875., 9375., 11875., 15125.], + [0., 0., 2625., 4375., 7125., 10500.], + [0., 0., 0., 750., 2500., 5375.], + [0., 0., 0., 0., 1000., 3500.], + [0., 0., 0., 0., 0., 5000.], + [0., 0., 0., 0., 0., 0.]]) + s_expected = np.array([[0, 1, 1, 3, 3, 3], + [0, 0, 2, 3, 3, 3], + [0, 0, 0, 3, 3, 3], + [0, 0, 0, 0, 4, 5], + [0, 0, 0, 0, 0, 5], + [0, 0, 0, 0, 0, 0]], dtype=int) + s_expected -= 1 # Cormen uses 1-based index, python does not. + + s, m = _multi_dot_matrix_chain_order(arrays, return_costs=True) + + # Only the upper triangular part (without the diagonal) is interesting. + assert_almost_equal(np.triu(s[:-1, 1:]), + np.triu(s_expected[:-1, 1:])) + assert_almost_equal(np.triu(m), np.triu(m_expected)) + + def test_too_few_input_arrays(self): + assert_raises(ValueError, multi_dot, []) + assert_raises(ValueError, multi_dot, [np.random.random((3, 3))]) + + +class TestTensorinv: + + @pytest.mark.parametrize("arr, ind", [ + (np.ones((4, 6, 8, 2)), 2), + (np.ones((3, 3, 2)), 1), + ]) + def test_non_square_handling(self, arr, ind): + with assert_raises(LinAlgError): + linalg.tensorinv(arr, ind=ind) + + @pytest.mark.parametrize("shape, ind", [ + # examples from docstring + ((4, 6, 8, 3), 2), + ((24, 8, 3), 1), + ]) + def test_tensorinv_shape(self, shape, ind): + a = np.eye(24) + a.shape = shape + ainv = linalg.tensorinv(a=a, ind=ind) + expected = a.shape[ind:] + a.shape[:ind] + actual = ainv.shape + assert_equal(actual, expected) + + @pytest.mark.parametrize("ind", [ + 0, -2, + ]) + def test_tensorinv_ind_limit(self, ind): + a = np.eye(24) + a.shape = (4, 6, 8, 3) + with assert_raises(ValueError): + linalg.tensorinv(a=a, ind=ind) + + def test_tensorinv_result(self): + # mimic a docstring example + a = np.eye(24) + a.shape = (24, 8, 3) + ainv = linalg.tensorinv(a, ind=1) + b = np.ones(24) + assert_allclose(np.tensordot(ainv, b, 1), np.linalg.tensorsolve(a, b)) + + +def test_unsupported_commontype(): + # linalg gracefully handles unsupported type + arr = np.array([[1, -2], [2, 5]], dtype='float16') + with assert_raises_regex(TypeError, "unsupported in linalg"): + linalg.cholesky(arr) + + +#@pytest.mark.slow +#@pytest.mark.xfail(not HAS_LAPACK64, run=False, +# reason="Numpy not compiled with 64-bit BLAS/LAPACK") +#@requires_memory(free_bytes=16e9) +@pytest.mark.skip(reason="Bad memory reports lead to OOM in ci testing") +def test_blas64_dot(): + n = 2**32 + a = np.zeros([1, n], dtype=np.float32) + b = np.ones([1, 1], dtype=np.float32) + a[0,-1] = 1 + c = np.dot(b, a) + assert_equal(c[0,-1], 1) + + +@pytest.mark.xfail(not HAS_LAPACK64, + reason="Numpy not compiled with 64-bit BLAS/LAPACK") +def test_blas64_geqrf_lwork_smoketest(): + # Smoke test LAPACK geqrf lwork call with 64-bit integers + dtype = np.float64 + lapack_routine = np.linalg.lapack_lite.dgeqrf + + m = 2**32 + 1 + n = 2**32 + 1 + lda = m + + # Dummy arrays, not referenced by the lapack routine, so don't + # need to be of the right size + a = np.zeros([1, 1], dtype=dtype) + work = np.zeros([1], dtype=dtype) + tau = np.zeros([1], dtype=dtype) + + # Size query + results = lapack_routine(m, n, a, lda, tau, work, -1, 0) + assert_equal(results['info'], 0) + assert_equal(results['m'], m) + assert_equal(results['n'], m) + + # Should result to an integer of a reasonable size + lwork = int(work.item()) + assert_(2**32 < lwork < 2**42) diff --git a/.local/lib/python3.8/site-packages/numpy/linalg/tests/test_regression.py b/.local/lib/python3.8/site-packages/numpy/linalg/tests/test_regression.py new file mode 100644 index 0000000..7ed932b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/linalg/tests/test_regression.py @@ -0,0 +1,148 @@ +""" Test functions for linalg module +""" +import warnings + +import numpy as np +from numpy import linalg, arange, float64, array, dot, transpose +from numpy.testing import ( + assert_, assert_raises, assert_equal, assert_array_equal, + assert_array_almost_equal, assert_array_less +) + + +class TestRegression: + + def test_eig_build(self): + # Ticket #652 + rva = array([1.03221168e+02 + 0.j, + -1.91843603e+01 + 0.j, + -6.04004526e-01 + 15.84422474j, + -6.04004526e-01 - 15.84422474j, + -1.13692929e+01 + 0.j, + -6.57612485e-01 + 10.41755503j, + -6.57612485e-01 - 10.41755503j, + 1.82126812e+01 + 0.j, + 1.06011014e+01 + 0.j, + 7.80732773e+00 + 0.j, + -7.65390898e-01 + 0.j, + 1.51971555e-15 + 0.j, + -1.51308713e-15 + 0.j]) + a = arange(13 * 13, dtype=float64) + a.shape = (13, 13) + a = a % 17 + va, ve = linalg.eig(a) + va.sort() + rva.sort() + assert_array_almost_equal(va, rva) + + def test_eigh_build(self): + # Ticket 662. + rvals = [68.60568999, 89.57756725, 106.67185574] + + cov = array([[77.70273908, 3.51489954, 15.64602427], + [3.51489954, 88.97013878, -1.07431931], + [15.64602427, -1.07431931, 98.18223512]]) + + vals, vecs = linalg.eigh(cov) + assert_array_almost_equal(vals, rvals) + + def test_svd_build(self): + # Ticket 627. + a = array([[0., 1.], [1., 1.], [2., 1.], [3., 1.]]) + m, n = a.shape + u, s, vh = linalg.svd(a) + + b = dot(transpose(u[:, n:]), a) + + assert_array_almost_equal(b, np.zeros((2, 2))) + + def test_norm_vector_badarg(self): + # Regression for #786: Frobenius norm for vectors raises + # ValueError. + assert_raises(ValueError, linalg.norm, array([1., 2., 3.]), 'fro') + + def test_lapack_endian(self): + # For bug #1482 + a = array([[5.7998084, -2.1825367], + [-2.1825367, 9.85910595]], dtype='>f8') + b = array(a, dtype=' 0.5) + assert_equal(c, 1) + assert_equal(np.linalg.matrix_rank(a), 1) + assert_array_less(1, np.linalg.norm(a, ord=2)) + + def test_norm_object_array(self): + # gh-7575 + testvector = np.array([np.array([0, 1]), 0, 0], dtype=object) + + norm = linalg.norm(testvector) + assert_array_equal(norm, [0, 1]) + assert_(norm.dtype == np.dtype('float64')) + + norm = linalg.norm(testvector, ord=1) + assert_array_equal(norm, [0, 1]) + assert_(norm.dtype != np.dtype('float64')) + + norm = linalg.norm(testvector, ord=2) + assert_array_equal(norm, [0, 1]) + assert_(norm.dtype == np.dtype('float64')) + + assert_raises(ValueError, linalg.norm, testvector, ord='fro') + assert_raises(ValueError, linalg.norm, testvector, ord='nuc') + assert_raises(ValueError, linalg.norm, testvector, ord=np.inf) + assert_raises(ValueError, linalg.norm, testvector, ord=-np.inf) + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + assert_raises((AttributeError, DeprecationWarning), + linalg.norm, testvector, ord=0) + assert_raises(ValueError, linalg.norm, testvector, ord=-1) + assert_raises(ValueError, linalg.norm, testvector, ord=-2) + + testmatrix = np.array([[np.array([0, 1]), 0, 0], + [0, 0, 0]], dtype=object) + + norm = linalg.norm(testmatrix) + assert_array_equal(norm, [0, 1]) + assert_(norm.dtype == np.dtype('float64')) + + norm = linalg.norm(testmatrix, ord='fro') + assert_array_equal(norm, [0, 1]) + assert_(norm.dtype == np.dtype('float64')) + + assert_raises(TypeError, linalg.norm, testmatrix, ord='nuc') + assert_raises(ValueError, linalg.norm, testmatrix, ord=np.inf) + assert_raises(ValueError, linalg.norm, testmatrix, ord=-np.inf) + assert_raises(ValueError, linalg.norm, testmatrix, ord=0) + assert_raises(ValueError, linalg.norm, testmatrix, ord=1) + assert_raises(ValueError, linalg.norm, testmatrix, ord=-1) + assert_raises(TypeError, linalg.norm, testmatrix, ord=2) + assert_raises(TypeError, linalg.norm, testmatrix, ord=-2) + assert_raises(ValueError, linalg.norm, testmatrix, ord=3) + + def test_lstsq_complex_larger_rhs(self): + # gh-9891 + size = 20 + n_rhs = 70 + G = np.random.randn(size, size) + 1j * np.random.randn(size, size) + u = np.random.randn(size, n_rhs) + 1j * np.random.randn(size, n_rhs) + b = G.dot(u) + # This should work without segmentation fault. + u_lstsq, res, rank, sv = linalg.lstsq(G, b, rcond=None) + # check results just in case + assert_array_almost_equal(u_lstsq, u) diff --git a/.local/lib/python3.8/site-packages/numpy/ma/__init__.py b/.local/lib/python3.8/site-packages/numpy/ma/__init__.py new file mode 100644 index 0000000..870cc4e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/__init__.py @@ -0,0 +1,54 @@ +""" +============= +Masked Arrays +============= + +Arrays sometimes contain invalid or missing data. When doing operations +on such arrays, we wish to suppress invalid values, which is the purpose masked +arrays fulfill (an example of typical use is given below). + +For example, examine the following array: + +>>> x = np.array([2, 1, 3, np.nan, 5, 2, 3, np.nan]) + +When we try to calculate the mean of the data, the result is undetermined: + +>>> np.mean(x) +nan + +The mean is calculated using roughly ``np.sum(x)/len(x)``, but since +any number added to ``NaN`` [1]_ produces ``NaN``, this doesn't work. Enter +masked arrays: + +>>> m = np.ma.masked_array(x, np.isnan(x)) +>>> m +masked_array(data = [2.0 1.0 3.0 -- 5.0 2.0 3.0 --], + mask = [False False False True False False False True], + fill_value=1e+20) + +Here, we construct a masked array that suppress all ``NaN`` values. We +may now proceed to calculate the mean of the other values: + +>>> np.mean(m) +2.6666666666666665 + +.. [1] Not-a-Number, a floating point value that is the result of an + invalid operation. + +.. moduleauthor:: Pierre Gerard-Marchant +.. moduleauthor:: Jarrod Millman + +""" +from . import core +from .core import * + +from . import extras +from .extras import * + +__all__ = ['core', 'extras'] +__all__ += core.__all__ +__all__ += extras.__all__ + +from numpy._pytesttester import PytestTester +test = PytestTester(__name__) +del PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/ma/__init__.pyi b/.local/lib/python3.8/site-packages/numpy/ma/__init__.pyi new file mode 100644 index 0000000..26d44b5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/__init__.pyi @@ -0,0 +1,236 @@ +from typing import Any, List + +from numpy._pytesttester import PytestTester + +from numpy.ma import extras as extras + +from numpy.ma.core import ( + MAError as MAError, + MaskError as MaskError, + MaskType as MaskType, + MaskedArray as MaskedArray, + abs as abs, + absolute as absolute, + add as add, + all as all, + allclose as allclose, + allequal as allequal, + alltrue as alltrue, + amax as amax, + amin as amin, + angle as angle, + anom as anom, + anomalies as anomalies, + any as any, + append as append, + arange as arange, + arccos as arccos, + arccosh as arccosh, + arcsin as arcsin, + arcsinh as arcsinh, + arctan as arctan, + arctan2 as arctan2, + arctanh as arctanh, + argmax as argmax, + argmin as argmin, + argsort as argsort, + around as around, + array as array, + asanyarray as asanyarray, + asarray as asarray, + bitwise_and as bitwise_and, + bitwise_or as bitwise_or, + bitwise_xor as bitwise_xor, + bool_ as bool_, + ceil as ceil, + choose as choose, + clip as clip, + common_fill_value as common_fill_value, + compress as compress, + compressed as compressed, + concatenate as concatenate, + conjugate as conjugate, + convolve as convolve, + copy as copy, + correlate as correlate, + cos as cos, + cosh as cosh, + count as count, + cumprod as cumprod, + cumsum as cumsum, + default_fill_value as default_fill_value, + diag as diag, + diagonal as diagonal, + diff as diff, + divide as divide, + empty as empty, + empty_like as empty_like, + equal as equal, + exp as exp, + expand_dims as expand_dims, + fabs as fabs, + filled as filled, + fix_invalid as fix_invalid, + flatten_mask as flatten_mask, + flatten_structured_array as flatten_structured_array, + floor as floor, + floor_divide as floor_divide, + fmod as fmod, + frombuffer as frombuffer, + fromflex as fromflex, + fromfunction as fromfunction, + getdata as getdata, + getmask as getmask, + getmaskarray as getmaskarray, + greater as greater, + greater_equal as greater_equal, + harden_mask as harden_mask, + hypot as hypot, + identity as identity, + ids as ids, + indices as indices, + inner as inner, + innerproduct as innerproduct, + isMA as isMA, + isMaskedArray as isMaskedArray, + is_mask as is_mask, + is_masked as is_masked, + isarray as isarray, + left_shift as left_shift, + less as less, + less_equal as less_equal, + log as log, + log10 as log10, + log2 as log2, + logical_and as logical_and, + logical_not as logical_not, + logical_or as logical_or, + logical_xor as logical_xor, + make_mask as make_mask, + make_mask_descr as make_mask_descr, + make_mask_none as make_mask_none, + mask_or as mask_or, + masked as masked, + masked_array as masked_array, + masked_equal as masked_equal, + masked_greater as masked_greater, + masked_greater_equal as masked_greater_equal, + masked_inside as masked_inside, + masked_invalid as masked_invalid, + masked_less as masked_less, + masked_less_equal as masked_less_equal, + masked_not_equal as masked_not_equal, + masked_object as masked_object, + masked_outside as masked_outside, + masked_print_option as masked_print_option, + masked_singleton as masked_singleton, + masked_values as masked_values, + masked_where as masked_where, + max as max, + maximum as maximum, + maximum_fill_value as maximum_fill_value, + mean as mean, + min as min, + minimum as minimum, + minimum_fill_value as minimum_fill_value, + mod as mod, + multiply as multiply, + mvoid as mvoid, + ndim as ndim, + negative as negative, + nomask as nomask, + nonzero as nonzero, + not_equal as not_equal, + ones as ones, + outer as outer, + outerproduct as outerproduct, + power as power, + prod as prod, + product as product, + ptp as ptp, + put as put, + putmask as putmask, + ravel as ravel, + remainder as remainder, + repeat as repeat, + reshape as reshape, + resize as resize, + right_shift as right_shift, + round as round, + round_ as round_, + set_fill_value as set_fill_value, + shape as shape, + sin as sin, + sinh as sinh, + size as size, + soften_mask as soften_mask, + sometrue as sometrue, + sort as sort, + sqrt as sqrt, + squeeze as squeeze, + std as std, + subtract as subtract, + sum as sum, + swapaxes as swapaxes, + take as take, + tan as tan, + tanh as tanh, + trace as trace, + transpose as transpose, + true_divide as true_divide, + var as var, + where as where, + zeros as zeros, +) + +from numpy.ma.extras import ( + apply_along_axis as apply_along_axis, + apply_over_axes as apply_over_axes, + atleast_1d as atleast_1d, + atleast_2d as atleast_2d, + atleast_3d as atleast_3d, + average as average, + clump_masked as clump_masked, + clump_unmasked as clump_unmasked, + column_stack as column_stack, + compress_cols as compress_cols, + compress_nd as compress_nd, + compress_rowcols as compress_rowcols, + compress_rows as compress_rows, + count_masked as count_masked, + corrcoef as corrcoef, + cov as cov, + diagflat as diagflat, + dot as dot, + dstack as dstack, + ediff1d as ediff1d, + flatnotmasked_contiguous as flatnotmasked_contiguous, + flatnotmasked_edges as flatnotmasked_edges, + hsplit as hsplit, + hstack as hstack, + isin as isin, + in1d as in1d, + intersect1d as intersect1d, + mask_cols as mask_cols, + mask_rowcols as mask_rowcols, + mask_rows as mask_rows, + masked_all as masked_all, + masked_all_like as masked_all_like, + median as median, + mr_ as mr_, + notmasked_contiguous as notmasked_contiguous, + notmasked_edges as notmasked_edges, + polyfit as polyfit, + row_stack as row_stack, + setdiff1d as setdiff1d, + setxor1d as setxor1d, + stack as stack, + unique as unique, + union1d as union1d, + vander as vander, + vstack as vstack, +) + +__all__: List[str] +__path__: List[str] +test: PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..7499e8c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/bench.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/bench.cpython-38.pyc new file mode 100644 index 0000000..d2f1246 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/bench.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/core.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/core.cpython-38.pyc new file mode 100644 index 0000000..d3a4fe2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/core.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/extras.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/extras.cpython-38.pyc new file mode 100644 index 0000000..ce7a4ed Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/extras.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/mrecords.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/mrecords.cpython-38.pyc new file mode 100644 index 0000000..af8b0c8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/mrecords.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..787ab2e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/testutils.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/testutils.cpython-38.pyc new file mode 100644 index 0000000..0c97bd1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/testutils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/timer_comparison.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/timer_comparison.cpython-38.pyc new file mode 100644 index 0000000..3e55ecc Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/__pycache__/timer_comparison.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/bench.py b/.local/lib/python3.8/site-packages/numpy/ma/bench.py new file mode 100644 index 0000000..5686568 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/bench.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 + +import timeit +import numpy + + +############################################################################### +# Global variables # +############################################################################### + + +# Small arrays +xs = numpy.random.uniform(-1, 1, 6).reshape(2, 3) +ys = numpy.random.uniform(-1, 1, 6).reshape(2, 3) +zs = xs + 1j * ys +m1 = [[True, False, False], [False, False, True]] +m2 = [[True, False, True], [False, False, True]] +nmxs = numpy.ma.array(xs, mask=m1) +nmys = numpy.ma.array(ys, mask=m2) +nmzs = numpy.ma.array(zs, mask=m1) + +# Big arrays +xl = numpy.random.uniform(-1, 1, 100*100).reshape(100, 100) +yl = numpy.random.uniform(-1, 1, 100*100).reshape(100, 100) +zl = xl + 1j * yl +maskx = xl > 0.8 +masky = yl < -0.8 +nmxl = numpy.ma.array(xl, mask=maskx) +nmyl = numpy.ma.array(yl, mask=masky) +nmzl = numpy.ma.array(zl, mask=maskx) + + +############################################################################### +# Functions # +############################################################################### + + +def timer(s, v='', nloop=500, nrep=3): + units = ["s", "ms", "µs", "ns"] + scaling = [1, 1e3, 1e6, 1e9] + print("%s : %-50s : " % (v, s), end=' ') + varnames = ["%ss,nm%ss,%sl,nm%sl" % tuple(x*4) for x in 'xyz'] + setup = 'from __main__ import numpy, ma, %s' % ','.join(varnames) + Timer = timeit.Timer(stmt=s, setup=setup) + best = min(Timer.repeat(nrep, nloop)) / nloop + if best > 0.0: + order = min(-int(numpy.floor(numpy.log10(best)) // 3), 3) + else: + order = 3 + print("%d loops, best of %d: %.*g %s per loop" % (nloop, nrep, + 3, + best * scaling[order], + units[order])) + + +def compare_functions_1v(func, nloop=500, + xs=xs, nmxs=nmxs, xl=xl, nmxl=nmxl): + funcname = func.__name__ + print("-"*50) + print(f'{funcname} on small arrays') + module, data = "numpy.ma", "nmxs" + timer("%(module)s.%(funcname)s(%(data)s)" % locals(), v="%11s" % module, nloop=nloop) + + print("%s on large arrays" % funcname) + module, data = "numpy.ma", "nmxl" + timer("%(module)s.%(funcname)s(%(data)s)" % locals(), v="%11s" % module, nloop=nloop) + return + +def compare_methods(methodname, args, vars='x', nloop=500, test=True, + xs=xs, nmxs=nmxs, xl=xl, nmxl=nmxl): + print("-"*50) + print(f'{methodname} on small arrays') + data, ver = f'nm{vars}l', 'numpy.ma' + timer("%(data)s.%(methodname)s(%(args)s)" % locals(), v=ver, nloop=nloop) + + print("%s on large arrays" % methodname) + data, ver = "nm%sl" % vars, 'numpy.ma' + timer("%(data)s.%(methodname)s(%(args)s)" % locals(), v=ver, nloop=nloop) + return + +def compare_functions_2v(func, nloop=500, test=True, + xs=xs, nmxs=nmxs, + ys=ys, nmys=nmys, + xl=xl, nmxl=nmxl, + yl=yl, nmyl=nmyl): + funcname = func.__name__ + print("-"*50) + print(f'{funcname} on small arrays') + module, data = "numpy.ma", "nmxs,nmys" + timer("%(module)s.%(funcname)s(%(data)s)" % locals(), v="%11s" % module, nloop=nloop) + + print(f'{funcname} on large arrays') + module, data = "numpy.ma", "nmxl,nmyl" + timer("%(module)s.%(funcname)s(%(data)s)" % locals(), v="%11s" % module, nloop=nloop) + return + + +if __name__ == '__main__': + compare_functions_1v(numpy.sin) + compare_functions_1v(numpy.log) + compare_functions_1v(numpy.sqrt) + + compare_functions_2v(numpy.multiply) + compare_functions_2v(numpy.divide) + compare_functions_2v(numpy.power) + + compare_methods('ravel', '', nloop=1000) + compare_methods('conjugate', '', 'z', nloop=1000) + compare_methods('transpose', '', nloop=1000) + compare_methods('compressed', '', nloop=1000) + compare_methods('__getitem__', '0', nloop=1000) + compare_methods('__getitem__', '(0,0)', nloop=1000) + compare_methods('__getitem__', '[0,-1]', nloop=1000) + compare_methods('__setitem__', '0, 17', nloop=1000, test=False) + compare_methods('__setitem__', '(0,0), 17', nloop=1000, test=False) + + print("-"*50) + print("__setitem__ on small arrays") + timer('nmxs.__setitem__((-1,0),numpy.ma.masked)', 'numpy.ma ', nloop=10000) + + print("-"*50) + print("__setitem__ on large arrays") + timer('nmxl.__setitem__((-1,0),numpy.ma.masked)', 'numpy.ma ', nloop=10000) + + print("-"*50) + print("where on small arrays") + timer('numpy.ma.where(nmxs>2,nmxs,nmys)', 'numpy.ma ', nloop=1000) + print("-"*50) + print("where on large arrays") + timer('numpy.ma.where(nmxl>2,nmxl,nmyl)', 'numpy.ma ', nloop=100) diff --git a/.local/lib/python3.8/site-packages/numpy/ma/core.py b/.local/lib/python3.8/site-packages/numpy/ma/core.py new file mode 100644 index 0000000..491c2c6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/core.py @@ -0,0 +1,8283 @@ +""" +numpy.ma : a package to handle missing or invalid values. + +This package was initially written for numarray by Paul F. Dubois +at Lawrence Livermore National Laboratory. +In 2006, the package was completely rewritten by Pierre Gerard-Marchant +(University of Georgia) to make the MaskedArray class a subclass of ndarray, +and to improve support of structured arrays. + + +Copyright 1999, 2000, 2001 Regents of the University of California. +Released for unlimited redistribution. + +* Adapted for numpy_core 2005 by Travis Oliphant and (mainly) Paul Dubois. +* Subclassing of the base `ndarray` 2006 by Pierre Gerard-Marchant + (pgmdevlist_AT_gmail_DOT_com) +* Improvements suggested by Reggie Dugard (reggie_AT_merfinllc_DOT_com) + +.. moduleauthor:: Pierre Gerard-Marchant + +""" +# pylint: disable-msg=E1002 +import builtins +import inspect +import operator +import warnings +import textwrap +import re +from functools import reduce + +import numpy as np +import numpy.core.umath as umath +import numpy.core.numerictypes as ntypes +from numpy import ndarray, amax, amin, iscomplexobj, bool_, _NoValue +from numpy import array as narray +from numpy.lib.function_base import angle +from numpy.compat import ( + getargspec, formatargspec, long, unicode, bytes + ) +from numpy import expand_dims +from numpy.core.numeric import normalize_axis_tuple + + +__all__ = [ + 'MAError', 'MaskError', 'MaskType', 'MaskedArray', 'abs', 'absolute', + 'add', 'all', 'allclose', 'allequal', 'alltrue', 'amax', 'amin', + 'angle', 'anom', 'anomalies', 'any', 'append', 'arange', 'arccos', + 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', + 'argmax', 'argmin', 'argsort', 'around', 'array', 'asanyarray', + 'asarray', 'bitwise_and', 'bitwise_or', 'bitwise_xor', 'bool_', 'ceil', + 'choose', 'clip', 'common_fill_value', 'compress', 'compressed', + 'concatenate', 'conjugate', 'convolve', 'copy', 'correlate', 'cos', 'cosh', + 'count', 'cumprod', 'cumsum', 'default_fill_value', 'diag', 'diagonal', + 'diff', 'divide', 'empty', 'empty_like', 'equal', 'exp', + 'expand_dims', 'fabs', 'filled', 'fix_invalid', 'flatten_mask', + 'flatten_structured_array', 'floor', 'floor_divide', 'fmod', + 'frombuffer', 'fromflex', 'fromfunction', 'getdata', 'getmask', + 'getmaskarray', 'greater', 'greater_equal', 'harden_mask', 'hypot', + 'identity', 'ids', 'indices', 'inner', 'innerproduct', 'isMA', + 'isMaskedArray', 'is_mask', 'is_masked', 'isarray', 'left_shift', + 'less', 'less_equal', 'log', 'log10', 'log2', + 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'make_mask', + 'make_mask_descr', 'make_mask_none', 'mask_or', 'masked', + 'masked_array', 'masked_equal', 'masked_greater', + 'masked_greater_equal', 'masked_inside', 'masked_invalid', + 'masked_less', 'masked_less_equal', 'masked_not_equal', + 'masked_object', 'masked_outside', 'masked_print_option', + 'masked_singleton', 'masked_values', 'masked_where', 'max', 'maximum', + 'maximum_fill_value', 'mean', 'min', 'minimum', 'minimum_fill_value', + 'mod', 'multiply', 'mvoid', 'ndim', 'negative', 'nomask', 'nonzero', + 'not_equal', 'ones', 'ones_like', 'outer', 'outerproduct', 'power', 'prod', + 'product', 'ptp', 'put', 'putmask', 'ravel', 'remainder', + 'repeat', 'reshape', 'resize', 'right_shift', 'round', 'round_', + 'set_fill_value', 'shape', 'sin', 'sinh', 'size', 'soften_mask', + 'sometrue', 'sort', 'sqrt', 'squeeze', 'std', 'subtract', 'sum', + 'swapaxes', 'take', 'tan', 'tanh', 'trace', 'transpose', 'true_divide', + 'var', 'where', 'zeros', 'zeros_like', + ] + +MaskType = np.bool_ +nomask = MaskType(0) + +class MaskedArrayFutureWarning(FutureWarning): + pass + +def _deprecate_argsort_axis(arr): + """ + Adjust the axis passed to argsort, warning if necessary + + Parameters + ---------- + arr + The array which argsort was called on + + np.ma.argsort has a long-term bug where the default of the axis argument + is wrong (gh-8701), which now must be kept for backwards compatibility. + Thankfully, this only makes a difference when arrays are 2- or more- + dimensional, so we only need a warning then. + """ + if arr.ndim <= 1: + # no warning needed - but switch to -1 anyway, to avoid surprising + # subclasses, which are more likely to implement scalar axes. + return -1 + else: + # 2017-04-11, Numpy 1.13.0, gh-8701: warn on axis default + warnings.warn( + "In the future the default for argsort will be axis=-1, not the " + "current None, to match its documentation and np.argsort. " + "Explicitly pass -1 or None to silence this warning.", + MaskedArrayFutureWarning, stacklevel=3) + return None + + +def doc_note(initialdoc, note): + """ + Adds a Notes section to an existing docstring. + + """ + if initialdoc is None: + return + if note is None: + return initialdoc + + notesplit = re.split(r'\n\s*?Notes\n\s*?-----', inspect.cleandoc(initialdoc)) + notedoc = "\n\nNotes\n-----\n%s\n" % inspect.cleandoc(note) + + return ''.join(notesplit[:1] + [notedoc] + notesplit[1:]) + + +def get_object_signature(obj): + """ + Get the signature from obj + + """ + try: + sig = formatargspec(*getargspec(obj)) + except TypeError: + sig = '' + return sig + + +############################################################################### +# Exceptions # +############################################################################### + + +class MAError(Exception): + """ + Class for masked array related errors. + + """ + pass + + +class MaskError(MAError): + """ + Class for mask related errors. + + """ + pass + + +############################################################################### +# Filling options # +############################################################################### + + +# b: boolean - c: complex - f: floats - i: integer - O: object - S: string +default_filler = {'b': True, + 'c': 1.e20 + 0.0j, + 'f': 1.e20, + 'i': 999999, + 'O': '?', + 'S': b'N/A', + 'u': 999999, + 'V': b'???', + 'U': u'N/A' + } + +# Add datetime64 and timedelta64 types +for v in ["Y", "M", "W", "D", "h", "m", "s", "ms", "us", "ns", "ps", + "fs", "as"]: + default_filler["M8[" + v + "]"] = np.datetime64("NaT", v) + default_filler["m8[" + v + "]"] = np.timedelta64("NaT", v) + +float_types_list = [np.half, np.single, np.double, np.longdouble, + np.csingle, np.cdouble, np.clongdouble] +max_filler = ntypes._minvals +max_filler.update([(k, -np.inf) for k in float_types_list[:4]]) +max_filler.update([(k, complex(-np.inf, -np.inf)) for k in float_types_list[-3:]]) + +min_filler = ntypes._maxvals +min_filler.update([(k, +np.inf) for k in float_types_list[:4]]) +min_filler.update([(k, complex(+np.inf, +np.inf)) for k in float_types_list[-3:]]) + +del float_types_list + +def _recursive_fill_value(dtype, f): + """ + Recursively produce a fill value for `dtype`, calling f on scalar dtypes + """ + if dtype.names is not None: + vals = tuple(_recursive_fill_value(dtype[name], f) for name in dtype.names) + return np.array(vals, dtype=dtype)[()] # decay to void scalar from 0d + elif dtype.subdtype: + subtype, shape = dtype.subdtype + subval = _recursive_fill_value(subtype, f) + return np.full(shape, subval) + else: + return f(dtype) + + +def _get_dtype_of(obj): + """ Convert the argument for *_fill_value into a dtype """ + if isinstance(obj, np.dtype): + return obj + elif hasattr(obj, 'dtype'): + return obj.dtype + else: + return np.asanyarray(obj).dtype + + +def default_fill_value(obj): + """ + Return the default fill value for the argument object. + + The default filling value depends on the datatype of the input + array or the type of the input scalar: + + ======== ======== + datatype default + ======== ======== + bool True + int 999999 + float 1.e20 + complex 1.e20+0j + object '?' + string 'N/A' + ======== ======== + + For structured types, a structured scalar is returned, with each field the + default fill value for its type. + + For subarray types, the fill value is an array of the same size containing + the default scalar fill value. + + Parameters + ---------- + obj : ndarray, dtype or scalar + The array data-type or scalar for which the default fill value + is returned. + + Returns + ------- + fill_value : scalar + The default fill value. + + Examples + -------- + >>> np.ma.default_fill_value(1) + 999999 + >>> np.ma.default_fill_value(np.array([1.1, 2., np.pi])) + 1e+20 + >>> np.ma.default_fill_value(np.dtype(complex)) + (1e+20+0j) + + """ + def _scalar_fill_value(dtype): + if dtype.kind in 'Mm': + return default_filler.get(dtype.str[1:], '?') + else: + return default_filler.get(dtype.kind, '?') + + dtype = _get_dtype_of(obj) + return _recursive_fill_value(dtype, _scalar_fill_value) + + +def _extremum_fill_value(obj, extremum, extremum_name): + + def _scalar_fill_value(dtype): + try: + return extremum[dtype] + except KeyError as e: + raise TypeError( + f"Unsuitable type {dtype} for calculating {extremum_name}." + ) from None + + dtype = _get_dtype_of(obj) + return _recursive_fill_value(dtype, _scalar_fill_value) + + +def minimum_fill_value(obj): + """ + Return the maximum value that can be represented by the dtype of an object. + + This function is useful for calculating a fill value suitable for + taking the minimum of an array with a given dtype. + + Parameters + ---------- + obj : ndarray, dtype or scalar + An object that can be queried for it's numeric type. + + Returns + ------- + val : scalar + The maximum representable value. + + Raises + ------ + TypeError + If `obj` isn't a suitable numeric type. + + See Also + -------- + maximum_fill_value : The inverse function. + set_fill_value : Set the filling value of a masked array. + MaskedArray.fill_value : Return current fill value. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.int8() + >>> ma.minimum_fill_value(a) + 127 + >>> a = np.int32() + >>> ma.minimum_fill_value(a) + 2147483647 + + An array of numeric data can also be passed. + + >>> a = np.array([1, 2, 3], dtype=np.int8) + >>> ma.minimum_fill_value(a) + 127 + >>> a = np.array([1, 2, 3], dtype=np.float32) + >>> ma.minimum_fill_value(a) + inf + + """ + return _extremum_fill_value(obj, min_filler, "minimum") + + +def maximum_fill_value(obj): + """ + Return the minimum value that can be represented by the dtype of an object. + + This function is useful for calculating a fill value suitable for + taking the maximum of an array with a given dtype. + + Parameters + ---------- + obj : ndarray, dtype or scalar + An object that can be queried for it's numeric type. + + Returns + ------- + val : scalar + The minimum representable value. + + Raises + ------ + TypeError + If `obj` isn't a suitable numeric type. + + See Also + -------- + minimum_fill_value : The inverse function. + set_fill_value : Set the filling value of a masked array. + MaskedArray.fill_value : Return current fill value. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.int8() + >>> ma.maximum_fill_value(a) + -128 + >>> a = np.int32() + >>> ma.maximum_fill_value(a) + -2147483648 + + An array of numeric data can also be passed. + + >>> a = np.array([1, 2, 3], dtype=np.int8) + >>> ma.maximum_fill_value(a) + -128 + >>> a = np.array([1, 2, 3], dtype=np.float32) + >>> ma.maximum_fill_value(a) + -inf + + """ + return _extremum_fill_value(obj, max_filler, "maximum") + + +def _recursive_set_fill_value(fillvalue, dt): + """ + Create a fill value for a structured dtype. + + Parameters + ---------- + fillvalue : scalar or array_like + Scalar or array representing the fill value. If it is of shorter + length than the number of fields in dt, it will be resized. + dt : dtype + The structured dtype for which to create the fill value. + + Returns + ------- + val: tuple + A tuple of values corresponding to the structured fill value. + + """ + fillvalue = np.resize(fillvalue, len(dt.names)) + output_value = [] + for (fval, name) in zip(fillvalue, dt.names): + cdtype = dt[name] + if cdtype.subdtype: + cdtype = cdtype.subdtype[0] + + if cdtype.names is not None: + output_value.append(tuple(_recursive_set_fill_value(fval, cdtype))) + else: + output_value.append(np.array(fval, dtype=cdtype).item()) + return tuple(output_value) + + +def _check_fill_value(fill_value, ndtype): + """ + Private function validating the given `fill_value` for the given dtype. + + If fill_value is None, it is set to the default corresponding to the dtype. + + If fill_value is not None, its value is forced to the given dtype. + + The result is always a 0d array. + + """ + ndtype = np.dtype(ndtype) + if fill_value is None: + fill_value = default_fill_value(ndtype) + elif ndtype.names is not None: + if isinstance(fill_value, (ndarray, np.void)): + try: + fill_value = np.array(fill_value, copy=False, dtype=ndtype) + except ValueError as e: + err_msg = "Unable to transform %s to dtype %s" + raise ValueError(err_msg % (fill_value, ndtype)) from e + else: + fill_value = np.asarray(fill_value, dtype=object) + fill_value = np.array(_recursive_set_fill_value(fill_value, ndtype), + dtype=ndtype) + else: + if isinstance(fill_value, str) and (ndtype.char not in 'OSVU'): + # Note this check doesn't work if fill_value is not a scalar + err_msg = "Cannot set fill value of string with array of dtype %s" + raise TypeError(err_msg % ndtype) + else: + # In case we want to convert 1e20 to int. + # Also in case of converting string arrays. + try: + fill_value = np.array(fill_value, copy=False, dtype=ndtype) + except (OverflowError, ValueError) as e: + # Raise TypeError instead of OverflowError or ValueError. + # OverflowError is seldom used, and the real problem here is + # that the passed fill_value is not compatible with the ndtype. + err_msg = "Cannot convert fill_value %s to dtype %s" + raise TypeError(err_msg % (fill_value, ndtype)) from e + return np.array(fill_value) + + +def set_fill_value(a, fill_value): + """ + Set the filling value of a, if a is a masked array. + + This function changes the fill value of the masked array `a` in place. + If `a` is not a masked array, the function returns silently, without + doing anything. + + Parameters + ---------- + a : array_like + Input array. + fill_value : dtype + Filling value. A consistency test is performed to make sure + the value is compatible with the dtype of `a`. + + Returns + ------- + None + Nothing returned by this function. + + See Also + -------- + maximum_fill_value : Return the default fill value for a dtype. + MaskedArray.fill_value : Return current fill value. + MaskedArray.set_fill_value : Equivalent method. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.arange(5) + >>> a + array([0, 1, 2, 3, 4]) + >>> a = ma.masked_where(a < 3, a) + >>> a + masked_array(data=[--, --, --, 3, 4], + mask=[ True, True, True, False, False], + fill_value=999999) + >>> ma.set_fill_value(a, -999) + >>> a + masked_array(data=[--, --, --, 3, 4], + mask=[ True, True, True, False, False], + fill_value=-999) + + Nothing happens if `a` is not a masked array. + + >>> a = list(range(5)) + >>> a + [0, 1, 2, 3, 4] + >>> ma.set_fill_value(a, 100) + >>> a + [0, 1, 2, 3, 4] + >>> a = np.arange(5) + >>> a + array([0, 1, 2, 3, 4]) + >>> ma.set_fill_value(a, 100) + >>> a + array([0, 1, 2, 3, 4]) + + """ + if isinstance(a, MaskedArray): + a.set_fill_value(fill_value) + return + + +def get_fill_value(a): + """ + Return the filling value of a, if any. Otherwise, returns the + default filling value for that type. + + """ + if isinstance(a, MaskedArray): + result = a.fill_value + else: + result = default_fill_value(a) + return result + + +def common_fill_value(a, b): + """ + Return the common filling value of two masked arrays, if any. + + If ``a.fill_value == b.fill_value``, return the fill value, + otherwise return None. + + Parameters + ---------- + a, b : MaskedArray + The masked arrays for which to compare fill values. + + Returns + ------- + fill_value : scalar or None + The common fill value, or None. + + Examples + -------- + >>> x = np.ma.array([0, 1.], fill_value=3) + >>> y = np.ma.array([0, 1.], fill_value=3) + >>> np.ma.common_fill_value(x, y) + 3.0 + + """ + t1 = get_fill_value(a) + t2 = get_fill_value(b) + if t1 == t2: + return t1 + return None + + +def filled(a, fill_value=None): + """ + Return input as an array with masked data replaced by a fill value. + + If `a` is not a `MaskedArray`, `a` itself is returned. + If `a` is a `MaskedArray` and `fill_value` is None, `fill_value` is set to + ``a.fill_value``. + + Parameters + ---------- + a : MaskedArray or array_like + An input object. + fill_value : array_like, optional. + Can be scalar or non-scalar. If non-scalar, the + resulting filled array should be broadcastable + over input array. Default is None. + + Returns + ------- + a : ndarray + The filled array. + + See Also + -------- + compressed + + Examples + -------- + >>> x = np.ma.array(np.arange(9).reshape(3, 3), mask=[[1, 0, 0], + ... [1, 0, 0], + ... [0, 0, 0]]) + >>> x.filled() + array([[999999, 1, 2], + [999999, 4, 5], + [ 6, 7, 8]]) + >>> x.filled(fill_value=333) + array([[333, 1, 2], + [333, 4, 5], + [ 6, 7, 8]]) + >>> x.filled(fill_value=np.arange(3)) + array([[0, 1, 2], + [0, 4, 5], + [6, 7, 8]]) + + """ + if hasattr(a, 'filled'): + return a.filled(fill_value) + + elif isinstance(a, ndarray): + # Should we check for contiguity ? and a.flags['CONTIGUOUS']: + return a + elif isinstance(a, dict): + return np.array(a, 'O') + else: + return np.array(a) + + +def get_masked_subclass(*arrays): + """ + Return the youngest subclass of MaskedArray from a list of (masked) arrays. + + In case of siblings, the first listed takes over. + + """ + if len(arrays) == 1: + arr = arrays[0] + if isinstance(arr, MaskedArray): + rcls = type(arr) + else: + rcls = MaskedArray + else: + arrcls = [type(a) for a in arrays] + rcls = arrcls[0] + if not issubclass(rcls, MaskedArray): + rcls = MaskedArray + for cls in arrcls[1:]: + if issubclass(cls, rcls): + rcls = cls + # Don't return MaskedConstant as result: revert to MaskedArray + if rcls.__name__ == 'MaskedConstant': + return MaskedArray + return rcls + + +def getdata(a, subok=True): + """ + Return the data of a masked array as an ndarray. + + Return the data of `a` (if any) as an ndarray if `a` is a ``MaskedArray``, + else return `a` as a ndarray or subclass (depending on `subok`) if not. + + Parameters + ---------- + a : array_like + Input ``MaskedArray``, alternatively a ndarray or a subclass thereof. + subok : bool + Whether to force the output to be a `pure` ndarray (False) or to + return a subclass of ndarray if appropriate (True, default). + + See Also + -------- + getmask : Return the mask of a masked array, or nomask. + getmaskarray : Return the mask of a masked array, or full array of False. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = ma.masked_equal([[1,2],[3,4]], 2) + >>> a + masked_array( + data=[[1, --], + [3, 4]], + mask=[[False, True], + [False, False]], + fill_value=2) + >>> ma.getdata(a) + array([[1, 2], + [3, 4]]) + + Equivalently use the ``MaskedArray`` `data` attribute. + + >>> a.data + array([[1, 2], + [3, 4]]) + + """ + try: + data = a._data + except AttributeError: + data = np.array(a, copy=False, subok=subok) + if not subok: + return data.view(ndarray) + return data + + +get_data = getdata + + +def fix_invalid(a, mask=nomask, copy=True, fill_value=None): + """ + Return input with invalid data masked and replaced by a fill value. + + Invalid data means values of `nan`, `inf`, etc. + + Parameters + ---------- + a : array_like + Input array, a (subclass of) ndarray. + mask : sequence, optional + Mask. Must be convertible to an array of booleans with the same + shape as `data`. True indicates a masked (i.e. invalid) data. + copy : bool, optional + Whether to use a copy of `a` (True) or to fix `a` in place (False). + Default is True. + fill_value : scalar, optional + Value used for fixing invalid data. Default is None, in which case + the ``a.fill_value`` is used. + + Returns + ------- + b : MaskedArray + The input array with invalid entries fixed. + + Notes + ----- + A copy is performed by default. + + Examples + -------- + >>> x = np.ma.array([1., -1, np.nan, np.inf], mask=[1] + [0]*3) + >>> x + masked_array(data=[--, -1.0, nan, inf], + mask=[ True, False, False, False], + fill_value=1e+20) + >>> np.ma.fix_invalid(x) + masked_array(data=[--, -1.0, --, --], + mask=[ True, False, True, True], + fill_value=1e+20) + + >>> fixed = np.ma.fix_invalid(x) + >>> fixed.data + array([ 1.e+00, -1.e+00, 1.e+20, 1.e+20]) + >>> x.data + array([ 1., -1., nan, inf]) + + """ + a = masked_array(a, copy=copy, mask=mask, subok=True) + invalid = np.logical_not(np.isfinite(a._data)) + if not invalid.any(): + return a + a._mask |= invalid + if fill_value is None: + fill_value = a.fill_value + a._data[invalid] = fill_value + return a + +def is_string_or_list_of_strings(val): + return (isinstance(val, str) or + (isinstance(val, list) and val and + builtins.all(isinstance(s, str) for s in val))) + +############################################################################### +# Ufuncs # +############################################################################### + + +ufunc_domain = {} +ufunc_fills = {} + + +class _DomainCheckInterval: + """ + Define a valid interval, so that : + + ``domain_check_interval(a,b)(x) == True`` where + ``x < a`` or ``x > b``. + + """ + + def __init__(self, a, b): + "domain_check_interval(a,b)(x) = true where x < a or y > b" + if a > b: + (a, b) = (b, a) + self.a = a + self.b = b + + def __call__(self, x): + "Execute the call behavior." + # nans at masked positions cause RuntimeWarnings, even though + # they are masked. To avoid this we suppress warnings. + with np.errstate(invalid='ignore'): + return umath.logical_or(umath.greater(x, self.b), + umath.less(x, self.a)) + + +class _DomainTan: + """ + Define a valid interval for the `tan` function, so that: + + ``domain_tan(eps) = True`` where ``abs(cos(x)) < eps`` + + """ + + def __init__(self, eps): + "domain_tan(eps) = true where abs(cos(x)) < eps)" + self.eps = eps + + def __call__(self, x): + "Executes the call behavior." + with np.errstate(invalid='ignore'): + return umath.less(umath.absolute(umath.cos(x)), self.eps) + + +class _DomainSafeDivide: + """ + Define a domain for safe division. + + """ + + def __init__(self, tolerance=None): + self.tolerance = tolerance + + def __call__(self, a, b): + # Delay the selection of the tolerance to here in order to reduce numpy + # import times. The calculation of these parameters is a substantial + # component of numpy's import time. + if self.tolerance is None: + self.tolerance = np.finfo(float).tiny + # don't call ma ufuncs from __array_wrap__ which would fail for scalars + a, b = np.asarray(a), np.asarray(b) + with np.errstate(invalid='ignore'): + return umath.absolute(a) * self.tolerance >= umath.absolute(b) + + +class _DomainGreater: + """ + DomainGreater(v)(x) is True where x <= v. + + """ + + def __init__(self, critical_value): + "DomainGreater(v)(x) = true where x <= v" + self.critical_value = critical_value + + def __call__(self, x): + "Executes the call behavior." + with np.errstate(invalid='ignore'): + return umath.less_equal(x, self.critical_value) + + +class _DomainGreaterEqual: + """ + DomainGreaterEqual(v)(x) is True where x < v. + + """ + + def __init__(self, critical_value): + "DomainGreaterEqual(v)(x) = true where x < v" + self.critical_value = critical_value + + def __call__(self, x): + "Executes the call behavior." + with np.errstate(invalid='ignore'): + return umath.less(x, self.critical_value) + + +class _MaskedUFunc: + def __init__(self, ufunc): + self.f = ufunc + self.__doc__ = ufunc.__doc__ + self.__name__ = ufunc.__name__ + + def __str__(self): + return f"Masked version of {self.f}" + + +class _MaskedUnaryOperation(_MaskedUFunc): + """ + Defines masked version of unary operations, where invalid values are + pre-masked. + + Parameters + ---------- + mufunc : callable + The function for which to define a masked version. Made available + as ``_MaskedUnaryOperation.f``. + fill : scalar, optional + Filling value, default is 0. + domain : class instance + Domain for the function. Should be one of the ``_Domain*`` + classes. Default is None. + + """ + + def __init__(self, mufunc, fill=0, domain=None): + super().__init__(mufunc) + self.fill = fill + self.domain = domain + ufunc_domain[mufunc] = domain + ufunc_fills[mufunc] = fill + + def __call__(self, a, *args, **kwargs): + """ + Execute the call behavior. + + """ + d = getdata(a) + # Deal with domain + if self.domain is not None: + # Case 1.1. : Domained function + # nans at masked positions cause RuntimeWarnings, even though + # they are masked. To avoid this we suppress warnings. + with np.errstate(divide='ignore', invalid='ignore'): + result = self.f(d, *args, **kwargs) + # Make a mask + m = ~umath.isfinite(result) + m |= self.domain(d) + m |= getmask(a) + else: + # Case 1.2. : Function without a domain + # Get the result and the mask + with np.errstate(divide='ignore', invalid='ignore'): + result = self.f(d, *args, **kwargs) + m = getmask(a) + + if not result.ndim: + # Case 2.1. : The result is scalarscalar + if m: + return masked + return result + + if m is not nomask: + # Case 2.2. The result is an array + # We need to fill the invalid data back w/ the input Now, + # that's plain silly: in C, we would just skip the element and + # keep the original, but we do have to do it that way in Python + + # In case result has a lower dtype than the inputs (as in + # equal) + try: + np.copyto(result, d, where=m) + except TypeError: + pass + # Transform to + masked_result = result.view(get_masked_subclass(a)) + masked_result._mask = m + masked_result._update_from(a) + return masked_result + + +class _MaskedBinaryOperation(_MaskedUFunc): + """ + Define masked version of binary operations, where invalid + values are pre-masked. + + Parameters + ---------- + mbfunc : function + The function for which to define a masked version. Made available + as ``_MaskedBinaryOperation.f``. + domain : class instance + Default domain for the function. Should be one of the ``_Domain*`` + classes. Default is None. + fillx : scalar, optional + Filling value for the first argument, default is 0. + filly : scalar, optional + Filling value for the second argument, default is 0. + + """ + + def __init__(self, mbfunc, fillx=0, filly=0): + """ + abfunc(fillx, filly) must be defined. + + abfunc(x, filly) = x for all x to enable reduce. + + """ + super().__init__(mbfunc) + self.fillx = fillx + self.filly = filly + ufunc_domain[mbfunc] = None + ufunc_fills[mbfunc] = (fillx, filly) + + def __call__(self, a, b, *args, **kwargs): + """ + Execute the call behavior. + + """ + # Get the data, as ndarray + (da, db) = (getdata(a), getdata(b)) + # Get the result + with np.errstate(): + np.seterr(divide='ignore', invalid='ignore') + result = self.f(da, db, *args, **kwargs) + # Get the mask for the result + (ma, mb) = (getmask(a), getmask(b)) + if ma is nomask: + if mb is nomask: + m = nomask + else: + m = umath.logical_or(getmaskarray(a), mb) + elif mb is nomask: + m = umath.logical_or(ma, getmaskarray(b)) + else: + m = umath.logical_or(ma, mb) + + # Case 1. : scalar + if not result.ndim: + if m: + return masked + return result + + # Case 2. : array + # Revert result to da where masked + if m is not nomask and m.any(): + # any errors, just abort; impossible to guarantee masked values + try: + np.copyto(result, da, casting='unsafe', where=m) + except Exception: + pass + + # Transforms to a (subclass of) MaskedArray + masked_result = result.view(get_masked_subclass(a, b)) + masked_result._mask = m + if isinstance(a, MaskedArray): + masked_result._update_from(a) + elif isinstance(b, MaskedArray): + masked_result._update_from(b) + return masked_result + + def reduce(self, target, axis=0, dtype=None): + """ + Reduce `target` along the given `axis`. + + """ + tclass = get_masked_subclass(target) + m = getmask(target) + t = filled(target, self.filly) + if t.shape == (): + t = t.reshape(1) + if m is not nomask: + m = make_mask(m, copy=True) + m.shape = (1,) + + if m is nomask: + tr = self.f.reduce(t, axis) + mr = nomask + else: + tr = self.f.reduce(t, axis, dtype=dtype) + mr = umath.logical_and.reduce(m, axis) + + if not tr.shape: + if mr: + return masked + else: + return tr + masked_tr = tr.view(tclass) + masked_tr._mask = mr + return masked_tr + + def outer(self, a, b): + """ + Return the function applied to the outer product of a and b. + + """ + (da, db) = (getdata(a), getdata(b)) + d = self.f.outer(da, db) + ma = getmask(a) + mb = getmask(b) + if ma is nomask and mb is nomask: + m = nomask + else: + ma = getmaskarray(a) + mb = getmaskarray(b) + m = umath.logical_or.outer(ma, mb) + if (not m.ndim) and m: + return masked + if m is not nomask: + np.copyto(d, da, where=m) + if not d.shape: + return d + masked_d = d.view(get_masked_subclass(a, b)) + masked_d._mask = m + return masked_d + + def accumulate(self, target, axis=0): + """Accumulate `target` along `axis` after filling with y fill + value. + + """ + tclass = get_masked_subclass(target) + t = filled(target, self.filly) + result = self.f.accumulate(t, axis) + masked_result = result.view(tclass) + return masked_result + + + +class _DomainedBinaryOperation(_MaskedUFunc): + """ + Define binary operations that have a domain, like divide. + + They have no reduce, outer or accumulate. + + Parameters + ---------- + mbfunc : function + The function for which to define a masked version. Made available + as ``_DomainedBinaryOperation.f``. + domain : class instance + Default domain for the function. Should be one of the ``_Domain*`` + classes. + fillx : scalar, optional + Filling value for the first argument, default is 0. + filly : scalar, optional + Filling value for the second argument, default is 0. + + """ + + def __init__(self, dbfunc, domain, fillx=0, filly=0): + """abfunc(fillx, filly) must be defined. + abfunc(x, filly) = x for all x to enable reduce. + """ + super().__init__(dbfunc) + self.domain = domain + self.fillx = fillx + self.filly = filly + ufunc_domain[dbfunc] = domain + ufunc_fills[dbfunc] = (fillx, filly) + + def __call__(self, a, b, *args, **kwargs): + "Execute the call behavior." + # Get the data + (da, db) = (getdata(a), getdata(b)) + # Get the result + with np.errstate(divide='ignore', invalid='ignore'): + result = self.f(da, db, *args, **kwargs) + # Get the mask as a combination of the source masks and invalid + m = ~umath.isfinite(result) + m |= getmask(a) + m |= getmask(b) + # Apply the domain + domain = ufunc_domain.get(self.f, None) + if domain is not None: + m |= domain(da, db) + # Take care of the scalar case first + if not m.ndim: + if m: + return masked + else: + return result + # When the mask is True, put back da if possible + # any errors, just abort; impossible to guarantee masked values + try: + np.copyto(result, 0, casting='unsafe', where=m) + # avoid using "*" since this may be overlaid + masked_da = umath.multiply(m, da) + # only add back if it can be cast safely + if np.can_cast(masked_da.dtype, result.dtype, casting='safe'): + result += masked_da + except Exception: + pass + + # Transforms to a (subclass of) MaskedArray + masked_result = result.view(get_masked_subclass(a, b)) + masked_result._mask = m + if isinstance(a, MaskedArray): + masked_result._update_from(a) + elif isinstance(b, MaskedArray): + masked_result._update_from(b) + return masked_result + + +# Unary ufuncs +exp = _MaskedUnaryOperation(umath.exp) +conjugate = _MaskedUnaryOperation(umath.conjugate) +sin = _MaskedUnaryOperation(umath.sin) +cos = _MaskedUnaryOperation(umath.cos) +arctan = _MaskedUnaryOperation(umath.arctan) +arcsinh = _MaskedUnaryOperation(umath.arcsinh) +sinh = _MaskedUnaryOperation(umath.sinh) +cosh = _MaskedUnaryOperation(umath.cosh) +tanh = _MaskedUnaryOperation(umath.tanh) +abs = absolute = _MaskedUnaryOperation(umath.absolute) +angle = _MaskedUnaryOperation(angle) # from numpy.lib.function_base +fabs = _MaskedUnaryOperation(umath.fabs) +negative = _MaskedUnaryOperation(umath.negative) +floor = _MaskedUnaryOperation(umath.floor) +ceil = _MaskedUnaryOperation(umath.ceil) +around = _MaskedUnaryOperation(np.round_) +logical_not = _MaskedUnaryOperation(umath.logical_not) + +# Domained unary ufuncs +sqrt = _MaskedUnaryOperation(umath.sqrt, 0.0, + _DomainGreaterEqual(0.0)) +log = _MaskedUnaryOperation(umath.log, 1.0, + _DomainGreater(0.0)) +log2 = _MaskedUnaryOperation(umath.log2, 1.0, + _DomainGreater(0.0)) +log10 = _MaskedUnaryOperation(umath.log10, 1.0, + _DomainGreater(0.0)) +tan = _MaskedUnaryOperation(umath.tan, 0.0, + _DomainTan(1e-35)) +arcsin = _MaskedUnaryOperation(umath.arcsin, 0.0, + _DomainCheckInterval(-1.0, 1.0)) +arccos = _MaskedUnaryOperation(umath.arccos, 0.0, + _DomainCheckInterval(-1.0, 1.0)) +arccosh = _MaskedUnaryOperation(umath.arccosh, 1.0, + _DomainGreaterEqual(1.0)) +arctanh = _MaskedUnaryOperation(umath.arctanh, 0.0, + _DomainCheckInterval(-1.0 + 1e-15, 1.0 - 1e-15)) + +# Binary ufuncs +add = _MaskedBinaryOperation(umath.add) +subtract = _MaskedBinaryOperation(umath.subtract) +multiply = _MaskedBinaryOperation(umath.multiply, 1, 1) +arctan2 = _MaskedBinaryOperation(umath.arctan2, 0.0, 1.0) +equal = _MaskedBinaryOperation(umath.equal) +equal.reduce = None +not_equal = _MaskedBinaryOperation(umath.not_equal) +not_equal.reduce = None +less_equal = _MaskedBinaryOperation(umath.less_equal) +less_equal.reduce = None +greater_equal = _MaskedBinaryOperation(umath.greater_equal) +greater_equal.reduce = None +less = _MaskedBinaryOperation(umath.less) +less.reduce = None +greater = _MaskedBinaryOperation(umath.greater) +greater.reduce = None +logical_and = _MaskedBinaryOperation(umath.logical_and) +alltrue = _MaskedBinaryOperation(umath.logical_and, 1, 1).reduce +logical_or = _MaskedBinaryOperation(umath.logical_or) +sometrue = logical_or.reduce +logical_xor = _MaskedBinaryOperation(umath.logical_xor) +bitwise_and = _MaskedBinaryOperation(umath.bitwise_and) +bitwise_or = _MaskedBinaryOperation(umath.bitwise_or) +bitwise_xor = _MaskedBinaryOperation(umath.bitwise_xor) +hypot = _MaskedBinaryOperation(umath.hypot) + +# Domained binary ufuncs +divide = _DomainedBinaryOperation(umath.divide, _DomainSafeDivide(), 0, 1) +true_divide = _DomainedBinaryOperation(umath.true_divide, + _DomainSafeDivide(), 0, 1) +floor_divide = _DomainedBinaryOperation(umath.floor_divide, + _DomainSafeDivide(), 0, 1) +remainder = _DomainedBinaryOperation(umath.remainder, + _DomainSafeDivide(), 0, 1) +fmod = _DomainedBinaryOperation(umath.fmod, _DomainSafeDivide(), 0, 1) +mod = _DomainedBinaryOperation(umath.mod, _DomainSafeDivide(), 0, 1) + + +############################################################################### +# Mask creation functions # +############################################################################### + + +def _replace_dtype_fields_recursive(dtype, primitive_dtype): + "Private function allowing recursion in _replace_dtype_fields." + _recurse = _replace_dtype_fields_recursive + + # Do we have some name fields ? + if dtype.names is not None: + descr = [] + for name in dtype.names: + field = dtype.fields[name] + if len(field) == 3: + # Prepend the title to the name + name = (field[-1], name) + descr.append((name, _recurse(field[0], primitive_dtype))) + new_dtype = np.dtype(descr) + + # Is this some kind of composite a la (float,2) + elif dtype.subdtype: + descr = list(dtype.subdtype) + descr[0] = _recurse(dtype.subdtype[0], primitive_dtype) + new_dtype = np.dtype(tuple(descr)) + + # this is a primitive type, so do a direct replacement + else: + new_dtype = primitive_dtype + + # preserve identity of dtypes + if new_dtype == dtype: + new_dtype = dtype + + return new_dtype + + +def _replace_dtype_fields(dtype, primitive_dtype): + """ + Construct a dtype description list from a given dtype. + + Returns a new dtype object, with all fields and subtypes in the given type + recursively replaced with `primitive_dtype`. + + Arguments are coerced to dtypes first. + """ + dtype = np.dtype(dtype) + primitive_dtype = np.dtype(primitive_dtype) + return _replace_dtype_fields_recursive(dtype, primitive_dtype) + + +def make_mask_descr(ndtype): + """ + Construct a dtype description list from a given dtype. + + Returns a new dtype object, with the type of all fields in `ndtype` to a + boolean type. Field names are not altered. + + Parameters + ---------- + ndtype : dtype + The dtype to convert. + + Returns + ------- + result : dtype + A dtype that looks like `ndtype`, the type of all fields is boolean. + + Examples + -------- + >>> import numpy.ma as ma + >>> dtype = np.dtype({'names':['foo', 'bar'], + ... 'formats':[np.float32, np.int64]}) + >>> dtype + dtype([('foo', '>> ma.make_mask_descr(dtype) + dtype([('foo', '|b1'), ('bar', '|b1')]) + >>> ma.make_mask_descr(np.float32) + dtype('bool') + + """ + return _replace_dtype_fields(ndtype, MaskType) + + +def getmask(a): + """ + Return the mask of a masked array, or nomask. + + Return the mask of `a` as an ndarray if `a` is a `MaskedArray` and the + mask is not `nomask`, else return `nomask`. To guarantee a full array + of booleans of the same shape as a, use `getmaskarray`. + + Parameters + ---------- + a : array_like + Input `MaskedArray` for which the mask is required. + + See Also + -------- + getdata : Return the data of a masked array as an ndarray. + getmaskarray : Return the mask of a masked array, or full array of False. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = ma.masked_equal([[1,2],[3,4]], 2) + >>> a + masked_array( + data=[[1, --], + [3, 4]], + mask=[[False, True], + [False, False]], + fill_value=2) + >>> ma.getmask(a) + array([[False, True], + [False, False]]) + + Equivalently use the `MaskedArray` `mask` attribute. + + >>> a.mask + array([[False, True], + [False, False]]) + + Result when mask == `nomask` + + >>> b = ma.masked_array([[1,2],[3,4]]) + >>> b + masked_array( + data=[[1, 2], + [3, 4]], + mask=False, + fill_value=999999) + >>> ma.nomask + False + >>> ma.getmask(b) == ma.nomask + True + >>> b.mask == ma.nomask + True + + """ + return getattr(a, '_mask', nomask) + + +get_mask = getmask + + +def getmaskarray(arr): + """ + Return the mask of a masked array, or full boolean array of False. + + Return the mask of `arr` as an ndarray if `arr` is a `MaskedArray` and + the mask is not `nomask`, else return a full boolean array of False of + the same shape as `arr`. + + Parameters + ---------- + arr : array_like + Input `MaskedArray` for which the mask is required. + + See Also + -------- + getmask : Return the mask of a masked array, or nomask. + getdata : Return the data of a masked array as an ndarray. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = ma.masked_equal([[1,2],[3,4]], 2) + >>> a + masked_array( + data=[[1, --], + [3, 4]], + mask=[[False, True], + [False, False]], + fill_value=2) + >>> ma.getmaskarray(a) + array([[False, True], + [False, False]]) + + Result when mask == ``nomask`` + + >>> b = ma.masked_array([[1,2],[3,4]]) + >>> b + masked_array( + data=[[1, 2], + [3, 4]], + mask=False, + fill_value=999999) + >>> ma.getmaskarray(b) + array([[False, False], + [False, False]]) + + """ + mask = getmask(arr) + if mask is nomask: + mask = make_mask_none(np.shape(arr), getattr(arr, 'dtype', None)) + return mask + + +def is_mask(m): + """ + Return True if m is a valid, standard mask. + + This function does not check the contents of the input, only that the + type is MaskType. In particular, this function returns False if the + mask has a flexible dtype. + + Parameters + ---------- + m : array_like + Array to test. + + Returns + ------- + result : bool + True if `m.dtype.type` is MaskType, False otherwise. + + See Also + -------- + ma.isMaskedArray : Test whether input is an instance of MaskedArray. + + Examples + -------- + >>> import numpy.ma as ma + >>> m = ma.masked_equal([0, 1, 0, 2, 3], 0) + >>> m + masked_array(data=[--, 1, --, 2, 3], + mask=[ True, False, True, False, False], + fill_value=0) + >>> ma.is_mask(m) + False + >>> ma.is_mask(m.mask) + True + + Input must be an ndarray (or have similar attributes) + for it to be considered a valid mask. + + >>> m = [False, True, False] + >>> ma.is_mask(m) + False + >>> m = np.array([False, True, False]) + >>> m + array([False, True, False]) + >>> ma.is_mask(m) + True + + Arrays with complex dtypes don't return True. + + >>> dtype = np.dtype({'names':['monty', 'pithon'], + ... 'formats':[bool, bool]}) + >>> dtype + dtype([('monty', '|b1'), ('pithon', '|b1')]) + >>> m = np.array([(True, False), (False, True), (True, False)], + ... dtype=dtype) + >>> m + array([( True, False), (False, True), ( True, False)], + dtype=[('monty', '?'), ('pithon', '?')]) + >>> ma.is_mask(m) + False + + """ + try: + return m.dtype.type is MaskType + except AttributeError: + return False + + +def _shrink_mask(m): + """ + Shrink a mask to nomask if possible + """ + if m.dtype.names is None and not m.any(): + return nomask + else: + return m + + +def make_mask(m, copy=False, shrink=True, dtype=MaskType): + """ + Create a boolean mask from an array. + + Return `m` as a boolean mask, creating a copy if necessary or requested. + The function can accept any sequence that is convertible to integers, + or ``nomask``. Does not require that contents must be 0s and 1s, values + of 0 are interpreted as False, everything else as True. + + Parameters + ---------- + m : array_like + Potential mask. + copy : bool, optional + Whether to return a copy of `m` (True) or `m` itself (False). + shrink : bool, optional + Whether to shrink `m` to ``nomask`` if all its values are False. + dtype : dtype, optional + Data-type of the output mask. By default, the output mask has a + dtype of MaskType (bool). If the dtype is flexible, each field has + a boolean dtype. This is ignored when `m` is ``nomask``, in which + case ``nomask`` is always returned. + + Returns + ------- + result : ndarray + A boolean mask derived from `m`. + + Examples + -------- + >>> import numpy.ma as ma + >>> m = [True, False, True, True] + >>> ma.make_mask(m) + array([ True, False, True, True]) + >>> m = [1, 0, 1, 1] + >>> ma.make_mask(m) + array([ True, False, True, True]) + >>> m = [1, 0, 2, -3] + >>> ma.make_mask(m) + array([ True, False, True, True]) + + Effect of the `shrink` parameter. + + >>> m = np.zeros(4) + >>> m + array([0., 0., 0., 0.]) + >>> ma.make_mask(m) + False + >>> ma.make_mask(m, shrink=False) + array([False, False, False, False]) + + Using a flexible `dtype`. + + >>> m = [1, 0, 1, 1] + >>> n = [0, 1, 0, 0] + >>> arr = [] + >>> for man, mouse in zip(m, n): + ... arr.append((man, mouse)) + >>> arr + [(1, 0), (0, 1), (1, 0), (1, 0)] + >>> dtype = np.dtype({'names':['man', 'mouse'], + ... 'formats':[np.int64, np.int64]}) + >>> arr = np.array(arr, dtype=dtype) + >>> arr + array([(1, 0), (0, 1), (1, 0), (1, 0)], + dtype=[('man', '>> ma.make_mask(arr, dtype=dtype) + array([(True, False), (False, True), (True, False), (True, False)], + dtype=[('man', '|b1'), ('mouse', '|b1')]) + + """ + if m is nomask: + return nomask + + # Make sure the input dtype is valid. + dtype = make_mask_descr(dtype) + + # legacy boolean special case: "existence of fields implies true" + if isinstance(m, ndarray) and m.dtype.fields and dtype == np.bool_: + return np.ones(m.shape, dtype=dtype) + + # Fill the mask in case there are missing data; turn it into an ndarray. + result = np.array(filled(m, True), copy=copy, dtype=dtype, subok=True) + # Bas les masques ! + if shrink: + result = _shrink_mask(result) + return result + + +def make_mask_none(newshape, dtype=None): + """ + Return a boolean mask of the given shape, filled with False. + + This function returns a boolean ndarray with all entries False, that can + be used in common mask manipulations. If a complex dtype is specified, the + type of each field is converted to a boolean type. + + Parameters + ---------- + newshape : tuple + A tuple indicating the shape of the mask. + dtype : {None, dtype}, optional + If None, use a MaskType instance. Otherwise, use a new datatype with + the same fields as `dtype`, converted to boolean types. + + Returns + ------- + result : ndarray + An ndarray of appropriate shape and dtype, filled with False. + + See Also + -------- + make_mask : Create a boolean mask from an array. + make_mask_descr : Construct a dtype description list from a given dtype. + + Examples + -------- + >>> import numpy.ma as ma + >>> ma.make_mask_none((3,)) + array([False, False, False]) + + Defining a more complex dtype. + + >>> dtype = np.dtype({'names':['foo', 'bar'], + ... 'formats':[np.float32, np.int64]}) + >>> dtype + dtype([('foo', '>> ma.make_mask_none((3,), dtype=dtype) + array([(False, False), (False, False), (False, False)], + dtype=[('foo', '|b1'), ('bar', '|b1')]) + + """ + if dtype is None: + result = np.zeros(newshape, dtype=MaskType) + else: + result = np.zeros(newshape, dtype=make_mask_descr(dtype)) + return result + + +def _recursive_mask_or(m1, m2, newmask): + names = m1.dtype.names + for name in names: + current1 = m1[name] + if current1.dtype.names is not None: + _recursive_mask_or(current1, m2[name], newmask[name]) + else: + umath.logical_or(current1, m2[name], newmask[name]) + + +def mask_or(m1, m2, copy=False, shrink=True): + """ + Combine two masks with the ``logical_or`` operator. + + The result may be a view on `m1` or `m2` if the other is `nomask` + (i.e. False). + + Parameters + ---------- + m1, m2 : array_like + Input masks. + copy : bool, optional + If copy is False and one of the inputs is `nomask`, return a view + of the other input mask. Defaults to False. + shrink : bool, optional + Whether to shrink the output to `nomask` if all its values are + False. Defaults to True. + + Returns + ------- + mask : output mask + The result masks values that are masked in either `m1` or `m2`. + + Raises + ------ + ValueError + If `m1` and `m2` have different flexible dtypes. + + Examples + -------- + >>> m1 = np.ma.make_mask([0, 1, 1, 0]) + >>> m2 = np.ma.make_mask([1, 0, 0, 0]) + >>> np.ma.mask_or(m1, m2) + array([ True, True, True, False]) + + """ + + if (m1 is nomask) or (m1 is False): + dtype = getattr(m2, 'dtype', MaskType) + return make_mask(m2, copy=copy, shrink=shrink, dtype=dtype) + if (m2 is nomask) or (m2 is False): + dtype = getattr(m1, 'dtype', MaskType) + return make_mask(m1, copy=copy, shrink=shrink, dtype=dtype) + if m1 is m2 and is_mask(m1): + return m1 + (dtype1, dtype2) = (getattr(m1, 'dtype', None), getattr(m2, 'dtype', None)) + if dtype1 != dtype2: + raise ValueError("Incompatible dtypes '%s'<>'%s'" % (dtype1, dtype2)) + if dtype1.names is not None: + # Allocate an output mask array with the properly broadcast shape. + newmask = np.empty(np.broadcast(m1, m2).shape, dtype1) + _recursive_mask_or(m1, m2, newmask) + return newmask + return make_mask(umath.logical_or(m1, m2), copy=copy, shrink=shrink) + + +def flatten_mask(mask): + """ + Returns a completely flattened version of the mask, where nested fields + are collapsed. + + Parameters + ---------- + mask : array_like + Input array, which will be interpreted as booleans. + + Returns + ------- + flattened_mask : ndarray of bools + The flattened input. + + Examples + -------- + >>> mask = np.array([0, 0, 1]) + >>> np.ma.flatten_mask(mask) + array([False, False, True]) + + >>> mask = np.array([(0, 0), (0, 1)], dtype=[('a', bool), ('b', bool)]) + >>> np.ma.flatten_mask(mask) + array([False, False, False, True]) + + >>> mdtype = [('a', bool), ('b', [('ba', bool), ('bb', bool)])] + >>> mask = np.array([(0, (0, 0)), (0, (0, 1))], dtype=mdtype) + >>> np.ma.flatten_mask(mask) + array([False, False, False, False, False, True]) + + """ + + def _flatmask(mask): + "Flatten the mask and returns a (maybe nested) sequence of booleans." + mnames = mask.dtype.names + if mnames is not None: + return [flatten_mask(mask[name]) for name in mnames] + else: + return mask + + def _flatsequence(sequence): + "Generates a flattened version of the sequence." + try: + for element in sequence: + if hasattr(element, '__iter__'): + yield from _flatsequence(element) + else: + yield element + except TypeError: + yield sequence + + mask = np.asarray(mask) + flattened = _flatsequence(_flatmask(mask)) + return np.array([_ for _ in flattened], dtype=bool) + + +def _check_mask_axis(mask, axis, keepdims=np._NoValue): + "Check whether there are masked values along the given axis" + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + if mask is not nomask: + return mask.all(axis=axis, **kwargs) + return nomask + + +############################################################################### +# Masking functions # +############################################################################### + +def masked_where(condition, a, copy=True): + """ + Mask an array where a condition is met. + + Return `a` as an array masked where `condition` is True. + Any masked values of `a` or `condition` are also masked in the output. + + Parameters + ---------- + condition : array_like + Masking condition. When `condition` tests floating point values for + equality, consider using ``masked_values`` instead. + a : array_like + Array to mask. + copy : bool + If True (default) make a copy of `a` in the result. If False modify + `a` in place and return a view. + + Returns + ------- + result : MaskedArray + The result of masking `a` where `condition` is True. + + See Also + -------- + masked_values : Mask using floating point equality. + masked_equal : Mask where equal to a given value. + masked_not_equal : Mask where `not` equal to a given value. + masked_less_equal : Mask where less than or equal to a given value. + masked_greater_equal : Mask where greater than or equal to a given value. + masked_less : Mask where less than a given value. + masked_greater : Mask where greater than a given value. + masked_inside : Mask inside a given interval. + masked_outside : Mask outside a given interval. + masked_invalid : Mask invalid values (NaNs or infs). + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.arange(4) + >>> a + array([0, 1, 2, 3]) + >>> ma.masked_where(a <= 2, a) + masked_array(data=[--, --, --, 3], + mask=[ True, True, True, False], + fill_value=999999) + + Mask array `b` conditional on `a`. + + >>> b = ['a', 'b', 'c', 'd'] + >>> ma.masked_where(a == 2, b) + masked_array(data=['a', 'b', --, 'd'], + mask=[False, False, True, False], + fill_value='N/A', + dtype='>> c = ma.masked_where(a <= 2, a) + >>> c + masked_array(data=[--, --, --, 3], + mask=[ True, True, True, False], + fill_value=999999) + >>> c[0] = 99 + >>> c + masked_array(data=[99, --, --, 3], + mask=[False, True, True, False], + fill_value=999999) + >>> a + array([0, 1, 2, 3]) + >>> c = ma.masked_where(a <= 2, a, copy=False) + >>> c[0] = 99 + >>> c + masked_array(data=[99, --, --, 3], + mask=[False, True, True, False], + fill_value=999999) + >>> a + array([99, 1, 2, 3]) + + When `condition` or `a` contain masked values. + + >>> a = np.arange(4) + >>> a = ma.masked_where(a == 2, a) + >>> a + masked_array(data=[0, 1, --, 3], + mask=[False, False, True, False], + fill_value=999999) + >>> b = np.arange(4) + >>> b = ma.masked_where(b == 0, b) + >>> b + masked_array(data=[--, 1, 2, 3], + mask=[ True, False, False, False], + fill_value=999999) + >>> ma.masked_where(a == 3, b) + masked_array(data=[--, 1, --, --], + mask=[ True, False, True, True], + fill_value=999999) + + """ + # Make sure that condition is a valid standard-type mask. + cond = make_mask(condition, shrink=False) + a = np.array(a, copy=copy, subok=True) + + (cshape, ashape) = (cond.shape, a.shape) + if cshape and cshape != ashape: + raise IndexError("Inconsistent shape between the condition and the input" + " (got %s and %s)" % (cshape, ashape)) + if hasattr(a, '_mask'): + cond = mask_or(cond, a._mask) + cls = type(a) + else: + cls = MaskedArray + result = a.view(cls) + # Assign to *.mask so that structured masks are handled correctly. + result.mask = _shrink_mask(cond) + # There is no view of a boolean so when 'a' is a MaskedArray with nomask + # the update to the result's mask has no effect. + if not copy and hasattr(a, '_mask') and getmask(a) is nomask: + a._mask = result._mask.view() + return result + + +def masked_greater(x, value, copy=True): + """ + Mask an array where greater than a given value. + + This function is a shortcut to ``masked_where``, with + `condition` = (x > value). + + See Also + -------- + masked_where : Mask where a condition is met. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.arange(4) + >>> a + array([0, 1, 2, 3]) + >>> ma.masked_greater(a, 2) + masked_array(data=[0, 1, 2, --], + mask=[False, False, False, True], + fill_value=999999) + + """ + return masked_where(greater(x, value), x, copy=copy) + + +def masked_greater_equal(x, value, copy=True): + """ + Mask an array where greater than or equal to a given value. + + This function is a shortcut to ``masked_where``, with + `condition` = (x >= value). + + See Also + -------- + masked_where : Mask where a condition is met. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.arange(4) + >>> a + array([0, 1, 2, 3]) + >>> ma.masked_greater_equal(a, 2) + masked_array(data=[0, 1, --, --], + mask=[False, False, True, True], + fill_value=999999) + + """ + return masked_where(greater_equal(x, value), x, copy=copy) + + +def masked_less(x, value, copy=True): + """ + Mask an array where less than a given value. + + This function is a shortcut to ``masked_where``, with + `condition` = (x < value). + + See Also + -------- + masked_where : Mask where a condition is met. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.arange(4) + >>> a + array([0, 1, 2, 3]) + >>> ma.masked_less(a, 2) + masked_array(data=[--, --, 2, 3], + mask=[ True, True, False, False], + fill_value=999999) + + """ + return masked_where(less(x, value), x, copy=copy) + + +def masked_less_equal(x, value, copy=True): + """ + Mask an array where less than or equal to a given value. + + This function is a shortcut to ``masked_where``, with + `condition` = (x <= value). + + See Also + -------- + masked_where : Mask where a condition is met. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.arange(4) + >>> a + array([0, 1, 2, 3]) + >>> ma.masked_less_equal(a, 2) + masked_array(data=[--, --, --, 3], + mask=[ True, True, True, False], + fill_value=999999) + + """ + return masked_where(less_equal(x, value), x, copy=copy) + + +def masked_not_equal(x, value, copy=True): + """ + Mask an array where `not` equal to a given value. + + This function is a shortcut to ``masked_where``, with + `condition` = (x != value). + + See Also + -------- + masked_where : Mask where a condition is met. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.arange(4) + >>> a + array([0, 1, 2, 3]) + >>> ma.masked_not_equal(a, 2) + masked_array(data=[--, --, 2, --], + mask=[ True, True, False, True], + fill_value=999999) + + """ + return masked_where(not_equal(x, value), x, copy=copy) + + +def masked_equal(x, value, copy=True): + """ + Mask an array where equal to a given value. + + This function is a shortcut to ``masked_where``, with + `condition` = (x == value). For floating point arrays, + consider using ``masked_values(x, value)``. + + See Also + -------- + masked_where : Mask where a condition is met. + masked_values : Mask using floating point equality. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.arange(4) + >>> a + array([0, 1, 2, 3]) + >>> ma.masked_equal(a, 2) + masked_array(data=[0, 1, --, 3], + mask=[False, False, True, False], + fill_value=2) + + """ + output = masked_where(equal(x, value), x, copy=copy) + output.fill_value = value + return output + + +def masked_inside(x, v1, v2, copy=True): + """ + Mask an array inside a given interval. + + Shortcut to ``masked_where``, where `condition` is True for `x` inside + the interval [v1,v2] (v1 <= x <= v2). The boundaries `v1` and `v2` + can be given in either order. + + See Also + -------- + masked_where : Mask where a condition is met. + + Notes + ----- + The array `x` is prefilled with its filling value. + + Examples + -------- + >>> import numpy.ma as ma + >>> x = [0.31, 1.2, 0.01, 0.2, -0.4, -1.1] + >>> ma.masked_inside(x, -0.3, 0.3) + masked_array(data=[0.31, 1.2, --, --, -0.4, -1.1], + mask=[False, False, True, True, False, False], + fill_value=1e+20) + + The order of `v1` and `v2` doesn't matter. + + >>> ma.masked_inside(x, 0.3, -0.3) + masked_array(data=[0.31, 1.2, --, --, -0.4, -1.1], + mask=[False, False, True, True, False, False], + fill_value=1e+20) + + """ + if v2 < v1: + (v1, v2) = (v2, v1) + xf = filled(x) + condition = (xf >= v1) & (xf <= v2) + return masked_where(condition, x, copy=copy) + + +def masked_outside(x, v1, v2, copy=True): + """ + Mask an array outside a given interval. + + Shortcut to ``masked_where``, where `condition` is True for `x` outside + the interval [v1,v2] (x < v1)|(x > v2). + The boundaries `v1` and `v2` can be given in either order. + + See Also + -------- + masked_where : Mask where a condition is met. + + Notes + ----- + The array `x` is prefilled with its filling value. + + Examples + -------- + >>> import numpy.ma as ma + >>> x = [0.31, 1.2, 0.01, 0.2, -0.4, -1.1] + >>> ma.masked_outside(x, -0.3, 0.3) + masked_array(data=[--, --, 0.01, 0.2, --, --], + mask=[ True, True, False, False, True, True], + fill_value=1e+20) + + The order of `v1` and `v2` doesn't matter. + + >>> ma.masked_outside(x, 0.3, -0.3) + masked_array(data=[--, --, 0.01, 0.2, --, --], + mask=[ True, True, False, False, True, True], + fill_value=1e+20) + + """ + if v2 < v1: + (v1, v2) = (v2, v1) + xf = filled(x) + condition = (xf < v1) | (xf > v2) + return masked_where(condition, x, copy=copy) + + +def masked_object(x, value, copy=True, shrink=True): + """ + Mask the array `x` where the data are exactly equal to value. + + This function is similar to `masked_values`, but only suitable + for object arrays: for floating point, use `masked_values` instead. + + Parameters + ---------- + x : array_like + Array to mask + value : object + Comparison value + copy : {True, False}, optional + Whether to return a copy of `x`. + shrink : {True, False}, optional + Whether to collapse a mask full of False to nomask + + Returns + ------- + result : MaskedArray + The result of masking `x` where equal to `value`. + + See Also + -------- + masked_where : Mask where a condition is met. + masked_equal : Mask where equal to a given value (integers). + masked_values : Mask using floating point equality. + + Examples + -------- + >>> import numpy.ma as ma + >>> food = np.array(['green_eggs', 'ham'], dtype=object) + >>> # don't eat spoiled food + >>> eat = ma.masked_object(food, 'green_eggs') + >>> eat + masked_array(data=[--, 'ham'], + mask=[ True, False], + fill_value='green_eggs', + dtype=object) + >>> # plain ol` ham is boring + >>> fresh_food = np.array(['cheese', 'ham', 'pineapple'], dtype=object) + >>> eat = ma.masked_object(fresh_food, 'green_eggs') + >>> eat + masked_array(data=['cheese', 'ham', 'pineapple'], + mask=False, + fill_value='green_eggs', + dtype=object) + + Note that `mask` is set to ``nomask`` if possible. + + >>> eat + masked_array(data=['cheese', 'ham', 'pineapple'], + mask=False, + fill_value='green_eggs', + dtype=object) + + """ + if isMaskedArray(x): + condition = umath.equal(x._data, value) + mask = x._mask + else: + condition = umath.equal(np.asarray(x), value) + mask = nomask + mask = mask_or(mask, make_mask(condition, shrink=shrink)) + return masked_array(x, mask=mask, copy=copy, fill_value=value) + + +def masked_values(x, value, rtol=1e-5, atol=1e-8, copy=True, shrink=True): + """ + Mask using floating point equality. + + Return a MaskedArray, masked where the data in array `x` are approximately + equal to `value`, determined using `isclose`. The default tolerances for + `masked_values` are the same as those for `isclose`. + + For integer types, exact equality is used, in the same way as + `masked_equal`. + + The fill_value is set to `value` and the mask is set to ``nomask`` if + possible. + + Parameters + ---------- + x : array_like + Array to mask. + value : float + Masking value. + rtol, atol : float, optional + Tolerance parameters passed on to `isclose` + copy : bool, optional + Whether to return a copy of `x`. + shrink : bool, optional + Whether to collapse a mask full of False to ``nomask``. + + Returns + ------- + result : MaskedArray + The result of masking `x` where approximately equal to `value`. + + See Also + -------- + masked_where : Mask where a condition is met. + masked_equal : Mask where equal to a given value (integers). + + Examples + -------- + >>> import numpy.ma as ma + >>> x = np.array([1, 1.1, 2, 1.1, 3]) + >>> ma.masked_values(x, 1.1) + masked_array(data=[1.0, --, 2.0, --, 3.0], + mask=[False, True, False, True, False], + fill_value=1.1) + + Note that `mask` is set to ``nomask`` if possible. + + >>> ma.masked_values(x, 1.5) + masked_array(data=[1. , 1.1, 2. , 1.1, 3. ], + mask=False, + fill_value=1.5) + + For integers, the fill value will be different in general to the + result of ``masked_equal``. + + >>> x = np.arange(5) + >>> x + array([0, 1, 2, 3, 4]) + >>> ma.masked_values(x, 2) + masked_array(data=[0, 1, --, 3, 4], + mask=[False, False, True, False, False], + fill_value=2) + >>> ma.masked_equal(x, 2) + masked_array(data=[0, 1, --, 3, 4], + mask=[False, False, True, False, False], + fill_value=2) + + """ + xnew = filled(x, value) + if np.issubdtype(xnew.dtype, np.floating): + mask = np.isclose(xnew, value, atol=atol, rtol=rtol) + else: + mask = umath.equal(xnew, value) + ret = masked_array(xnew, mask=mask, copy=copy, fill_value=value) + if shrink: + ret.shrink_mask() + return ret + + +def masked_invalid(a, copy=True): + """ + Mask an array where invalid values occur (NaNs or infs). + + This function is a shortcut to ``masked_where``, with + `condition` = ~(np.isfinite(a)). Any pre-existing mask is conserved. + Only applies to arrays with a dtype where NaNs or infs make sense + (i.e. floating point types), but accepts any array_like object. + + See Also + -------- + masked_where : Mask where a condition is met. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.arange(5, dtype=float) + >>> a[2] = np.NaN + >>> a[3] = np.PINF + >>> a + array([ 0., 1., nan, inf, 4.]) + >>> ma.masked_invalid(a) + masked_array(data=[0.0, 1.0, --, --, 4.0], + mask=[False, False, True, True, False], + fill_value=1e+20) + + """ + a = np.array(a, copy=copy, subok=True) + mask = getattr(a, '_mask', None) + if mask is not None: + condition = ~(np.isfinite(getdata(a))) + if mask is not nomask: + condition |= mask + cls = type(a) + else: + condition = ~(np.isfinite(a)) + cls = MaskedArray + result = a.view(cls) + result._mask = condition + return result + + +############################################################################### +# Printing options # +############################################################################### + + +class _MaskedPrintOption: + """ + Handle the string used to represent missing data in a masked array. + + """ + + def __init__(self, display): + """ + Create the masked_print_option object. + + """ + self._display = display + self._enabled = True + + def display(self): + """ + Display the string to print for masked values. + + """ + return self._display + + def set_display(self, s): + """ + Set the string to print for masked values. + + """ + self._display = s + + def enabled(self): + """ + Is the use of the display value enabled? + + """ + return self._enabled + + def enable(self, shrink=1): + """ + Set the enabling shrink to `shrink`. + + """ + self._enabled = shrink + + def __str__(self): + return str(self._display) + + __repr__ = __str__ + +# if you single index into a masked location you get this object. +masked_print_option = _MaskedPrintOption('--') + + +def _recursive_printoption(result, mask, printopt): + """ + Puts printoptions in result where mask is True. + + Private function allowing for recursion + + """ + names = result.dtype.names + if names is not None: + for name in names: + curdata = result[name] + curmask = mask[name] + _recursive_printoption(curdata, curmask, printopt) + else: + np.copyto(result, printopt, where=mask) + return + +# For better or worse, these end in a newline +_legacy_print_templates = dict( + long_std=textwrap.dedent("""\ + masked_%(name)s(data = + %(data)s, + %(nlen)s mask = + %(mask)s, + %(nlen)s fill_value = %(fill)s) + """), + long_flx=textwrap.dedent("""\ + masked_%(name)s(data = + %(data)s, + %(nlen)s mask = + %(mask)s, + %(nlen)s fill_value = %(fill)s, + %(nlen)s dtype = %(dtype)s) + """), + short_std=textwrap.dedent("""\ + masked_%(name)s(data = %(data)s, + %(nlen)s mask = %(mask)s, + %(nlen)s fill_value = %(fill)s) + """), + short_flx=textwrap.dedent("""\ + masked_%(name)s(data = %(data)s, + %(nlen)s mask = %(mask)s, + %(nlen)s fill_value = %(fill)s, + %(nlen)s dtype = %(dtype)s) + """) +) + +############################################################################### +# MaskedArray class # +############################################################################### + + +def _recursive_filled(a, mask, fill_value): + """ + Recursively fill `a` with `fill_value`. + + """ + names = a.dtype.names + for name in names: + current = a[name] + if current.dtype.names is not None: + _recursive_filled(current, mask[name], fill_value[name]) + else: + np.copyto(current, fill_value[name], where=mask[name]) + + +def flatten_structured_array(a): + """ + Flatten a structured array. + + The data type of the output is chosen such that it can represent all of the + (nested) fields. + + Parameters + ---------- + a : structured array + + Returns + ------- + output : masked array or ndarray + A flattened masked array if the input is a masked array, otherwise a + standard ndarray. + + Examples + -------- + >>> ndtype = [('a', int), ('b', float)] + >>> a = np.array([(1, 1), (2, 2)], dtype=ndtype) + >>> np.ma.flatten_structured_array(a) + array([[1., 1.], + [2., 2.]]) + + """ + + def flatten_sequence(iterable): + """ + Flattens a compound of nested iterables. + + """ + for elm in iter(iterable): + if hasattr(elm, '__iter__'): + yield from flatten_sequence(elm) + else: + yield elm + + a = np.asanyarray(a) + inishape = a.shape + a = a.ravel() + if isinstance(a, MaskedArray): + out = np.array([tuple(flatten_sequence(d.item())) for d in a._data]) + out = out.view(MaskedArray) + out._mask = np.array([tuple(flatten_sequence(d.item())) + for d in getmaskarray(a)]) + else: + out = np.array([tuple(flatten_sequence(d.item())) for d in a]) + if len(inishape) > 1: + newshape = list(out.shape) + newshape[0] = inishape + out.shape = tuple(flatten_sequence(newshape)) + return out + + +def _arraymethod(funcname, onmask=True): + """ + Return a class method wrapper around a basic array method. + + Creates a class method which returns a masked array, where the new + ``_data`` array is the output of the corresponding basic method called + on the original ``_data``. + + If `onmask` is True, the new mask is the output of the method called + on the initial mask. Otherwise, the new mask is just a reference + to the initial mask. + + Parameters + ---------- + funcname : str + Name of the function to apply on data. + onmask : bool + Whether the mask must be processed also (True) or left + alone (False). Default is True. Make available as `_onmask` + attribute. + + Returns + ------- + method : instancemethod + Class method wrapper of the specified basic array method. + + """ + def wrapped_method(self, *args, **params): + result = getattr(self._data, funcname)(*args, **params) + result = result.view(type(self)) + result._update_from(self) + mask = self._mask + if not onmask: + result.__setmask__(mask) + elif mask is not nomask: + # __setmask__ makes a copy, which we don't want + result._mask = getattr(mask, funcname)(*args, **params) + return result + methdoc = getattr(ndarray, funcname, None) or getattr(np, funcname, None) + if methdoc is not None: + wrapped_method.__doc__ = methdoc.__doc__ + wrapped_method.__name__ = funcname + return wrapped_method + + +class MaskedIterator: + """ + Flat iterator object to iterate over masked arrays. + + A `MaskedIterator` iterator is returned by ``x.flat`` for any masked array + `x`. It allows iterating over the array as if it were a 1-D array, + either in a for-loop or by calling its `next` method. + + Iteration is done in C-contiguous style, with the last index varying the + fastest. The iterator can also be indexed using basic slicing or + advanced indexing. + + See Also + -------- + MaskedArray.flat : Return a flat iterator over an array. + MaskedArray.flatten : Returns a flattened copy of an array. + + Notes + ----- + `MaskedIterator` is not exported by the `ma` module. Instead of + instantiating a `MaskedIterator` directly, use `MaskedArray.flat`. + + Examples + -------- + >>> x = np.ma.array(arange(6).reshape(2, 3)) + >>> fl = x.flat + >>> type(fl) + + >>> for item in fl: + ... print(item) + ... + 0 + 1 + 2 + 3 + 4 + 5 + + Extracting more than a single element b indexing the `MaskedIterator` + returns a masked array: + + >>> fl[2:4] + masked_array(data = [2 3], + mask = False, + fill_value = 999999) + + """ + + def __init__(self, ma): + self.ma = ma + self.dataiter = ma._data.flat + + if ma._mask is nomask: + self.maskiter = None + else: + self.maskiter = ma._mask.flat + + def __iter__(self): + return self + + def __getitem__(self, indx): + result = self.dataiter.__getitem__(indx).view(type(self.ma)) + if self.maskiter is not None: + _mask = self.maskiter.__getitem__(indx) + if isinstance(_mask, ndarray): + # set shape to match that of data; this is needed for matrices + _mask.shape = result.shape + result._mask = _mask + elif isinstance(_mask, np.void): + return mvoid(result, mask=_mask, hardmask=self.ma._hardmask) + elif _mask: # Just a scalar, masked + return masked + return result + + # This won't work if ravel makes a copy + def __setitem__(self, index, value): + self.dataiter[index] = getdata(value) + if self.maskiter is not None: + self.maskiter[index] = getmaskarray(value) + + def __next__(self): + """ + Return the next value, or raise StopIteration. + + Examples + -------- + >>> x = np.ma.array([3, 2], mask=[0, 1]) + >>> fl = x.flat + >>> next(fl) + 3 + >>> next(fl) + masked + >>> next(fl) + Traceback (most recent call last): + ... + StopIteration + + """ + d = next(self.dataiter) + if self.maskiter is not None: + m = next(self.maskiter) + if isinstance(m, np.void): + return mvoid(d, mask=m, hardmask=self.ma._hardmask) + elif m: # Just a scalar, masked + return masked + return d + + +class MaskedArray(ndarray): + """ + An array class with possibly masked values. + + Masked values of True exclude the corresponding element from any + computation. + + Construction:: + + x = MaskedArray(data, mask=nomask, dtype=None, copy=False, subok=True, + ndmin=0, fill_value=None, keep_mask=True, hard_mask=None, + shrink=True, order=None) + + Parameters + ---------- + data : array_like + Input data. + mask : sequence, optional + Mask. Must be convertible to an array of booleans with the same + shape as `data`. True indicates a masked (i.e. invalid) data. + dtype : dtype, optional + Data type of the output. + If `dtype` is None, the type of the data argument (``data.dtype``) + is used. If `dtype` is not None and different from ``data.dtype``, + a copy is performed. + copy : bool, optional + Whether to copy the input data (True), or to use a reference instead. + Default is False. + subok : bool, optional + Whether to return a subclass of `MaskedArray` if possible (True) or a + plain `MaskedArray`. Default is True. + ndmin : int, optional + Minimum number of dimensions. Default is 0. + fill_value : scalar, optional + Value used to fill in the masked values when necessary. + If None, a default based on the data-type is used. + keep_mask : bool, optional + Whether to combine `mask` with the mask of the input data, if any + (True), or to use only `mask` for the output (False). Default is True. + hard_mask : bool, optional + Whether to use a hard mask or not. With a hard mask, masked values + cannot be unmasked. Default is False. + shrink : bool, optional + Whether to force compression of an empty mask. Default is True. + order : {'C', 'F', 'A'}, optional + Specify the order of the array. If order is 'C', then the array + will be in C-contiguous order (last-index varies the fastest). + If order is 'F', then the returned array will be in + Fortran-contiguous order (first-index varies the fastest). + If order is 'A' (default), then the returned array may be + in any order (either C-, Fortran-contiguous, or even discontiguous), + unless a copy is required, in which case it will be C-contiguous. + + Examples + -------- + + The ``mask`` can be initialized with an array of boolean values + with the same shape as ``data``. + + >>> data = np.arange(6).reshape((2, 3)) + >>> np.ma.MaskedArray(data, mask=[[False, True, False], + ... [False, False, True]]) + masked_array( + data=[[0, --, 2], + [3, 4, --]], + mask=[[False, True, False], + [False, False, True]], + fill_value=999999) + + Alternatively, the ``mask`` can be initialized to homogeneous boolean + array with the same shape as ``data`` by passing in a scalar + boolean value: + + >>> np.ma.MaskedArray(data, mask=False) + masked_array( + data=[[0, 1, 2], + [3, 4, 5]], + mask=[[False, False, False], + [False, False, False]], + fill_value=999999) + + >>> np.ma.MaskedArray(data, mask=True) + masked_array( + data=[[--, --, --], + [--, --, --]], + mask=[[ True, True, True], + [ True, True, True]], + fill_value=999999, + dtype=int64) + + .. note:: + The recommended practice for initializing ``mask`` with a scalar + boolean value is to use ``True``/``False`` rather than + ``np.True_``/``np.False_``. The reason is :attr:`nomask` + is represented internally as ``np.False_``. + + >>> np.False_ is np.ma.nomask + True + + """ + + __array_priority__ = 15 + _defaultmask = nomask + _defaulthardmask = False + _baseclass = ndarray + + # Maximum number of elements per axis used when printing an array. The + # 1d case is handled separately because we need more values in this case. + _print_width = 100 + _print_width_1d = 1500 + + def __new__(cls, data=None, mask=nomask, dtype=None, copy=False, + subok=True, ndmin=0, fill_value=None, keep_mask=True, + hard_mask=None, shrink=True, order=None): + """ + Create a new masked array from scratch. + + Notes + ----- + A masked array can also be created by taking a .view(MaskedArray). + + """ + # Process data. + _data = np.array(data, dtype=dtype, copy=copy, + order=order, subok=True, ndmin=ndmin) + _baseclass = getattr(data, '_baseclass', type(_data)) + # Check that we're not erasing the mask. + if isinstance(data, MaskedArray) and (data.shape != _data.shape): + copy = True + + # Here, we copy the _view_, so that we can attach new properties to it + # we must never do .view(MaskedConstant), as that would create a new + # instance of np.ma.masked, which make identity comparison fail + if isinstance(data, cls) and subok and not isinstance(data, MaskedConstant): + _data = ndarray.view(_data, type(data)) + else: + _data = ndarray.view(_data, cls) + + # Handle the case where data is not a subclass of ndarray, but + # still has the _mask attribute like MaskedArrays + if hasattr(data, '_mask') and not isinstance(data, ndarray): + _data._mask = data._mask + # FIXME: should we set `_data._sharedmask = True`? + # Process mask. + # Type of the mask + mdtype = make_mask_descr(_data.dtype) + + if mask is nomask: + # Case 1. : no mask in input. + # Erase the current mask ? + if not keep_mask: + # With a reduced version + if shrink: + _data._mask = nomask + # With full version + else: + _data._mask = np.zeros(_data.shape, dtype=mdtype) + # Check whether we missed something + elif isinstance(data, (tuple, list)): + try: + # If data is a sequence of masked array + mask = np.array( + [getmaskarray(np.asanyarray(m, dtype=_data.dtype)) + for m in data], dtype=mdtype) + except ValueError: + # If data is nested + mask = nomask + # Force shrinking of the mask if needed (and possible) + if (mdtype == MaskType) and mask.any(): + _data._mask = mask + _data._sharedmask = False + else: + _data._sharedmask = not copy + if copy: + _data._mask = _data._mask.copy() + # Reset the shape of the original mask + if getmask(data) is not nomask: + data._mask.shape = data.shape + else: + # Case 2. : With a mask in input. + # If mask is boolean, create an array of True or False + if mask is True and mdtype == MaskType: + mask = np.ones(_data.shape, dtype=mdtype) + elif mask is False and mdtype == MaskType: + mask = np.zeros(_data.shape, dtype=mdtype) + else: + # Read the mask with the current mdtype + try: + mask = np.array(mask, copy=copy, dtype=mdtype) + # Or assume it's a sequence of bool/int + except TypeError: + mask = np.array([tuple([m] * len(mdtype)) for m in mask], + dtype=mdtype) + # Make sure the mask and the data have the same shape + if mask.shape != _data.shape: + (nd, nm) = (_data.size, mask.size) + if nm == 1: + mask = np.resize(mask, _data.shape) + elif nm == nd: + mask = np.reshape(mask, _data.shape) + else: + msg = "Mask and data not compatible: data size is %i, " + \ + "mask size is %i." + raise MaskError(msg % (nd, nm)) + copy = True + # Set the mask to the new value + if _data._mask is nomask: + _data._mask = mask + _data._sharedmask = not copy + else: + if not keep_mask: + _data._mask = mask + _data._sharedmask = not copy + else: + if _data.dtype.names is not None: + def _recursive_or(a, b): + "do a|=b on each field of a, recursively" + for name in a.dtype.names: + (af, bf) = (a[name], b[name]) + if af.dtype.names is not None: + _recursive_or(af, bf) + else: + af |= bf + + _recursive_or(_data._mask, mask) + else: + _data._mask = np.logical_or(mask, _data._mask) + _data._sharedmask = False + # Update fill_value. + if fill_value is None: + fill_value = getattr(data, '_fill_value', None) + # But don't run the check unless we have something to check. + if fill_value is not None: + _data._fill_value = _check_fill_value(fill_value, _data.dtype) + # Process extra options .. + if hard_mask is None: + _data._hardmask = getattr(data, '_hardmask', False) + else: + _data._hardmask = hard_mask + _data._baseclass = _baseclass + return _data + + + def _update_from(self, obj): + """ + Copies some attributes of obj to self. + + """ + if isinstance(obj, ndarray): + _baseclass = type(obj) + else: + _baseclass = ndarray + # We need to copy the _basedict to avoid backward propagation + _optinfo = {} + _optinfo.update(getattr(obj, '_optinfo', {})) + _optinfo.update(getattr(obj, '_basedict', {})) + if not isinstance(obj, MaskedArray): + _optinfo.update(getattr(obj, '__dict__', {})) + _dict = dict(_fill_value=getattr(obj, '_fill_value', None), + _hardmask=getattr(obj, '_hardmask', False), + _sharedmask=getattr(obj, '_sharedmask', False), + _isfield=getattr(obj, '_isfield', False), + _baseclass=getattr(obj, '_baseclass', _baseclass), + _optinfo=_optinfo, + _basedict=_optinfo) + self.__dict__.update(_dict) + self.__dict__.update(_optinfo) + return + + def __array_finalize__(self, obj): + """ + Finalizes the masked array. + + """ + # Get main attributes. + self._update_from(obj) + + # We have to decide how to initialize self.mask, based on + # obj.mask. This is very difficult. There might be some + # correspondence between the elements in the array we are being + # created from (= obj) and us. Or there might not. This method can + # be called in all kinds of places for all kinds of reasons -- could + # be empty_like, could be slicing, could be a ufunc, could be a view. + # The numpy subclassing interface simply doesn't give us any way + # to know, which means that at best this method will be based on + # guesswork and heuristics. To make things worse, there isn't even any + # clear consensus about what the desired behavior is. For instance, + # most users think that np.empty_like(marr) -- which goes via this + # method -- should return a masked array with an empty mask (see + # gh-3404 and linked discussions), but others disagree, and they have + # existing code which depends on empty_like returning an array that + # matches the input mask. + # + # Historically our algorithm was: if the template object mask had the + # same *number of elements* as us, then we used *it's mask object + # itself* as our mask, so that writes to us would also write to the + # original array. This is horribly broken in multiple ways. + # + # Now what we do instead is, if the template object mask has the same + # number of elements as us, and we do not have the same base pointer + # as the template object (b/c views like arr[...] should keep the same + # mask), then we make a copy of the template object mask and use + # that. This is also horribly broken but somewhat less so. Maybe. + if isinstance(obj, ndarray): + # XX: This looks like a bug -- shouldn't it check self.dtype + # instead? + if obj.dtype.names is not None: + _mask = getmaskarray(obj) + else: + _mask = getmask(obj) + + # If self and obj point to exactly the same data, then probably + # self is a simple view of obj (e.g., self = obj[...]), so they + # should share the same mask. (This isn't 100% reliable, e.g. self + # could be the first row of obj, or have strange strides, but as a + # heuristic it's not bad.) In all other cases, we make a copy of + # the mask, so that future modifications to 'self' do not end up + # side-effecting 'obj' as well. + if (_mask is not nomask and obj.__array_interface__["data"][0] + != self.__array_interface__["data"][0]): + # We should make a copy. But we could get here via astype, + # in which case the mask might need a new dtype as well + # (e.g., changing to or from a structured dtype), and the + # order could have changed. So, change the mask type if + # needed and use astype instead of copy. + if self.dtype == obj.dtype: + _mask_dtype = _mask.dtype + else: + _mask_dtype = make_mask_descr(self.dtype) + + if self.flags.c_contiguous: + order = "C" + elif self.flags.f_contiguous: + order = "F" + else: + order = "K" + + _mask = _mask.astype(_mask_dtype, order) + else: + # Take a view so shape changes, etc., do not propagate back. + _mask = _mask.view() + else: + _mask = nomask + + self._mask = _mask + # Finalize the mask + if self._mask is not nomask: + try: + self._mask.shape = self.shape + except ValueError: + self._mask = nomask + except (TypeError, AttributeError): + # When _mask.shape is not writable (because it's a void) + pass + + # Finalize the fill_value + if self._fill_value is not None: + self._fill_value = _check_fill_value(self._fill_value, self.dtype) + elif self.dtype.names is not None: + # Finalize the default fill_value for structured arrays + self._fill_value = _check_fill_value(None, self.dtype) + + def __array_wrap__(self, obj, context=None): + """ + Special hook for ufuncs. + + Wraps the numpy array and sets the mask according to context. + + """ + if obj is self: # for in-place operations + result = obj + else: + result = obj.view(type(self)) + result._update_from(self) + + if context is not None: + result._mask = result._mask.copy() + func, args, out_i = context + # args sometimes contains outputs (gh-10459), which we don't want + input_args = args[:func.nin] + m = reduce(mask_or, [getmaskarray(arg) for arg in input_args]) + # Get the domain mask + domain = ufunc_domain.get(func, None) + if domain is not None: + # Take the domain, and make sure it's a ndarray + with np.errstate(divide='ignore', invalid='ignore'): + d = filled(domain(*input_args), True) + + if d.any(): + # Fill the result where the domain is wrong + try: + # Binary domain: take the last value + fill_value = ufunc_fills[func][-1] + except TypeError: + # Unary domain: just use this one + fill_value = ufunc_fills[func] + except KeyError: + # Domain not recognized, use fill_value instead + fill_value = self.fill_value + + np.copyto(result, fill_value, where=d) + + # Update the mask + if m is nomask: + m = d + else: + # Don't modify inplace, we risk back-propagation + m = (m | d) + + # Make sure the mask has the proper size + if result is not self and result.shape == () and m: + return masked + else: + result._mask = m + result._sharedmask = False + + return result + + def view(self, dtype=None, type=None, fill_value=None): + """ + Return a view of the MaskedArray data. + + Parameters + ---------- + dtype : data-type or ndarray sub-class, optional + Data-type descriptor of the returned view, e.g., float32 or int16. + The default, None, results in the view having the same data-type + as `a`. As with ``ndarray.view``, dtype can also be specified as + an ndarray sub-class, which then specifies the type of the + returned object (this is equivalent to setting the ``type`` + parameter). + type : Python type, optional + Type of the returned view, either ndarray or a subclass. The + default None results in type preservation. + fill_value : scalar, optional + The value to use for invalid entries (None by default). + If None, then this argument is inferred from the passed `dtype`, or + in its absence the original array, as discussed in the notes below. + + See Also + -------- + numpy.ndarray.view : Equivalent method on ndarray object. + + Notes + ----- + + ``a.view()`` is used two different ways: + + ``a.view(some_dtype)`` or ``a.view(dtype=some_dtype)`` constructs a view + of the array's memory with a different data-type. This can cause a + reinterpretation of the bytes of memory. + + ``a.view(ndarray_subclass)`` or ``a.view(type=ndarray_subclass)`` just + returns an instance of `ndarray_subclass` that looks at the same array + (same shape, dtype, etc.) This does not cause a reinterpretation of the + memory. + + If `fill_value` is not specified, but `dtype` is specified (and is not + an ndarray sub-class), the `fill_value` of the MaskedArray will be + reset. If neither `fill_value` nor `dtype` are specified (or if + `dtype` is an ndarray sub-class), then the fill value is preserved. + Finally, if `fill_value` is specified, but `dtype` is not, the fill + value is set to the specified value. + + For ``a.view(some_dtype)``, if ``some_dtype`` has a different number of + bytes per entry than the previous dtype (for example, converting a + regular array to a structured array), then the behavior of the view + cannot be predicted just from the superficial appearance of ``a`` (shown + by ``print(a)``). It also depends on exactly how ``a`` is stored in + memory. Therefore if ``a`` is C-ordered versus fortran-ordered, versus + defined as a slice or transpose, etc., the view may give different + results. + """ + + if dtype is None: + if type is None: + output = ndarray.view(self) + else: + output = ndarray.view(self, type) + elif type is None: + try: + if issubclass(dtype, ndarray): + output = ndarray.view(self, dtype) + dtype = None + else: + output = ndarray.view(self, dtype) + except TypeError: + output = ndarray.view(self, dtype) + else: + output = ndarray.view(self, dtype, type) + + # also make the mask be a view (so attr changes to the view's + # mask do no affect original object's mask) + # (especially important to avoid affecting np.masked singleton) + if getmask(output) is not nomask: + output._mask = output._mask.view() + + # Make sure to reset the _fill_value if needed + if getattr(output, '_fill_value', None) is not None: + if fill_value is None: + if dtype is None: + pass # leave _fill_value as is + else: + output._fill_value = None + else: + output.fill_value = fill_value + return output + + def __getitem__(self, indx): + """ + x.__getitem__(y) <==> x[y] + + Return the item described by i, as a masked array. + + """ + # We could directly use ndarray.__getitem__ on self. + # But then we would have to modify __array_finalize__ to prevent the + # mask of being reshaped if it hasn't been set up properly yet + # So it's easier to stick to the current version + dout = self.data[indx] + _mask = self._mask + + def _is_scalar(m): + return not isinstance(m, np.ndarray) + + def _scalar_heuristic(arr, elem): + """ + Return whether `elem` is a scalar result of indexing `arr`, or None + if undecidable without promoting nomask to a full mask + """ + # obviously a scalar + if not isinstance(elem, np.ndarray): + return True + + # object array scalar indexing can return anything + elif arr.dtype.type is np.object_: + if arr.dtype is not elem.dtype: + # elem is an array, but dtypes do not match, so must be + # an element + return True + + # well-behaved subclass that only returns 0d arrays when + # expected - this is not a scalar + elif type(arr).__getitem__ == ndarray.__getitem__: + return False + + return None + + if _mask is not nomask: + # _mask cannot be a subclass, so it tells us whether we should + # expect a scalar. It also cannot be of dtype object. + mout = _mask[indx] + scalar_expected = _is_scalar(mout) + + else: + # attempt to apply the heuristic to avoid constructing a full mask + mout = nomask + scalar_expected = _scalar_heuristic(self.data, dout) + if scalar_expected is None: + # heuristics have failed + # construct a full array, so we can be certain. This is costly. + # we could also fall back on ndarray.__getitem__(self.data, indx) + scalar_expected = _is_scalar(getmaskarray(self)[indx]) + + # Did we extract a single item? + if scalar_expected: + # A record + if isinstance(dout, np.void): + # We should always re-cast to mvoid, otherwise users can + # change masks on rows that already have masked values, but not + # on rows that have no masked values, which is inconsistent. + return mvoid(dout, mask=mout, hardmask=self._hardmask) + + # special case introduced in gh-5962 + elif (self.dtype.type is np.object_ and + isinstance(dout, np.ndarray) and + dout is not masked): + # If masked, turn into a MaskedArray, with everything masked. + if mout: + return MaskedArray(dout, mask=True) + else: + return dout + + # Just a scalar + else: + if mout: + return masked + else: + return dout + else: + # Force dout to MA + dout = dout.view(type(self)) + # Inherit attributes from self + dout._update_from(self) + # Check the fill_value + if is_string_or_list_of_strings(indx): + if self._fill_value is not None: + dout._fill_value = self._fill_value[indx] + + # Something like gh-15895 has happened if this check fails. + # _fill_value should always be an ndarray. + if not isinstance(dout._fill_value, np.ndarray): + raise RuntimeError('Internal NumPy error.') + # If we're indexing a multidimensional field in a + # structured array (such as dtype("(2,)i2,(2,)i1")), + # dimensionality goes up (M[field].ndim == M.ndim + + # M.dtype[field].ndim). That's fine for + # M[field] but problematic for M[field].fill_value + # which should have shape () to avoid breaking several + # methods. There is no great way out, so set to + # first element. See issue #6723. + if dout._fill_value.ndim > 0: + if not (dout._fill_value == + dout._fill_value.flat[0]).all(): + warnings.warn( + "Upon accessing multidimensional field " + f"{indx!s}, need to keep dimensionality " + "of fill_value at 0. Discarding " + "heterogeneous fill_value and setting " + f"all to {dout._fill_value[0]!s}.", + stacklevel=2) + # Need to use `.flat[0:1].squeeze(...)` instead of just + # `.flat[0]` to ensure the result is a 0d array and not + # a scalar. + dout._fill_value = dout._fill_value.flat[0:1].squeeze(axis=0) + dout._isfield = True + # Update the mask if needed + if mout is not nomask: + # set shape to match that of data; this is needed for matrices + dout._mask = reshape(mout, dout.shape) + dout._sharedmask = True + # Note: Don't try to check for m.any(), that'll take too long + return dout + + def __setitem__(self, indx, value): + """ + x.__setitem__(i, y) <==> x[i]=y + + Set item described by index. If value is masked, masks those + locations. + + """ + if self is masked: + raise MaskError('Cannot alter the masked element.') + _data = self._data + _mask = self._mask + if isinstance(indx, str): + _data[indx] = value + if _mask is nomask: + self._mask = _mask = make_mask_none(self.shape, self.dtype) + _mask[indx] = getmask(value) + return + + _dtype = _data.dtype + + if value is masked: + # The mask wasn't set: create a full version. + if _mask is nomask: + _mask = self._mask = make_mask_none(self.shape, _dtype) + # Now, set the mask to its value. + if _dtype.names is not None: + _mask[indx] = tuple([True] * len(_dtype.names)) + else: + _mask[indx] = True + return + + # Get the _data part of the new value + dval = getattr(value, '_data', value) + # Get the _mask part of the new value + mval = getmask(value) + if _dtype.names is not None and mval is nomask: + mval = tuple([False] * len(_dtype.names)) + if _mask is nomask: + # Set the data, then the mask + _data[indx] = dval + if mval is not nomask: + _mask = self._mask = make_mask_none(self.shape, _dtype) + _mask[indx] = mval + elif not self._hardmask: + # Set the data, then the mask + if (isinstance(indx, masked_array) and + not isinstance(value, masked_array)): + _data[indx.data] = dval + else: + _data[indx] = dval + _mask[indx] = mval + elif hasattr(indx, 'dtype') and (indx.dtype == MaskType): + indx = indx * umath.logical_not(_mask) + _data[indx] = dval + else: + if _dtype.names is not None: + err_msg = "Flexible 'hard' masks are not yet supported." + raise NotImplementedError(err_msg) + mindx = mask_or(_mask[indx], mval, copy=True) + dindx = self._data[indx] + if dindx.size > 1: + np.copyto(dindx, dval, where=~mindx) + elif mindx is nomask: + dindx = dval + _data[indx] = dindx + _mask[indx] = mindx + return + + # Define so that we can overwrite the setter. + @property + def dtype(self): + return super().dtype + + @dtype.setter + def dtype(self, dtype): + super(MaskedArray, type(self)).dtype.__set__(self, dtype) + if self._mask is not nomask: + self._mask = self._mask.view(make_mask_descr(dtype), ndarray) + # Try to reset the shape of the mask (if we don't have a void). + # This raises a ValueError if the dtype change won't work. + try: + self._mask.shape = self.shape + except (AttributeError, TypeError): + pass + + @property + def shape(self): + return super().shape + + @shape.setter + def shape(self, shape): + super(MaskedArray, type(self)).shape.__set__(self, shape) + # Cannot use self._mask, since it may not (yet) exist when a + # masked matrix sets the shape. + if getmask(self) is not nomask: + self._mask.shape = self.shape + + def __setmask__(self, mask, copy=False): + """ + Set the mask. + + """ + idtype = self.dtype + current_mask = self._mask + if mask is masked: + mask = True + + if current_mask is nomask: + # Make sure the mask is set + # Just don't do anything if there's nothing to do. + if mask is nomask: + return + current_mask = self._mask = make_mask_none(self.shape, idtype) + + if idtype.names is None: + # No named fields. + # Hardmask: don't unmask the data + if self._hardmask: + current_mask |= mask + # Softmask: set everything to False + # If it's obviously a compatible scalar, use a quick update + # method. + elif isinstance(mask, (int, float, np.bool_, np.number)): + current_mask[...] = mask + # Otherwise fall back to the slower, general purpose way. + else: + current_mask.flat = mask + else: + # Named fields w/ + mdtype = current_mask.dtype + mask = np.array(mask, copy=False) + # Mask is a singleton + if not mask.ndim: + # It's a boolean : make a record + if mask.dtype.kind == 'b': + mask = np.array(tuple([mask.item()] * len(mdtype)), + dtype=mdtype) + # It's a record: make sure the dtype is correct + else: + mask = mask.astype(mdtype) + # Mask is a sequence + else: + # Make sure the new mask is a ndarray with the proper dtype + try: + mask = np.array(mask, copy=copy, dtype=mdtype) + # Or assume it's a sequence of bool/int + except TypeError: + mask = np.array([tuple([m] * len(mdtype)) for m in mask], + dtype=mdtype) + # Hardmask: don't unmask the data + if self._hardmask: + for n in idtype.names: + current_mask[n] |= mask[n] + # Softmask: set everything to False + # If it's obviously a compatible scalar, use a quick update + # method. + elif isinstance(mask, (int, float, np.bool_, np.number)): + current_mask[...] = mask + # Otherwise fall back to the slower, general purpose way. + else: + current_mask.flat = mask + # Reshape if needed + if current_mask.shape: + current_mask.shape = self.shape + return + + _set_mask = __setmask__ + + @property + def mask(self): + """ Current mask. """ + + # We could try to force a reshape, but that wouldn't work in some + # cases. + # Return a view so that the dtype and shape cannot be changed in place + # This still preserves nomask by identity + return self._mask.view() + + @mask.setter + def mask(self, value): + self.__setmask__(value) + + @property + def recordmask(self): + """ + Get or set the mask of the array if it has no named fields. For + structured arrays, returns a ndarray of booleans where entries are + ``True`` if **all** the fields are masked, ``False`` otherwise: + + >>> x = np.ma.array([(1, 1), (2, 2), (3, 3), (4, 4), (5, 5)], + ... mask=[(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)], + ... dtype=[('a', int), ('b', int)]) + >>> x.recordmask + array([False, False, True, False, False]) + """ + + _mask = self._mask.view(ndarray) + if _mask.dtype.names is None: + return _mask + return np.all(flatten_structured_array(_mask), axis=-1) + + @recordmask.setter + def recordmask(self, mask): + raise NotImplementedError("Coming soon: setting the mask per records!") + + def harden_mask(self): + """ + Force the mask to hard. + + Whether the mask of a masked array is hard or soft is determined by + its `~ma.MaskedArray.hardmask` property. `harden_mask` sets + `~ma.MaskedArray.hardmask` to ``True``. + + See Also + -------- + ma.MaskedArray.hardmask + + """ + self._hardmask = True + return self + + def soften_mask(self): + """ + Force the mask to soft. + + Whether the mask of a masked array is hard or soft is determined by + its `~ma.MaskedArray.hardmask` property. `soften_mask` sets + `~ma.MaskedArray.hardmask` to ``False``. + + See Also + -------- + ma.MaskedArray.hardmask + + """ + self._hardmask = False + return self + + @property + def hardmask(self): + """ Hardness of the mask """ + return self._hardmask + + def unshare_mask(self): + """ + Copy the mask and set the sharedmask flag to False. + + Whether the mask is shared between masked arrays can be seen from + the `sharedmask` property. `unshare_mask` ensures the mask is not shared. + A copy of the mask is only made if it was shared. + + See Also + -------- + sharedmask + + """ + if self._sharedmask: + self._mask = self._mask.copy() + self._sharedmask = False + return self + + @property + def sharedmask(self): + """ Share status of the mask (read-only). """ + return self._sharedmask + + def shrink_mask(self): + """ + Reduce a mask to nomask when possible. + + Parameters + ---------- + None + + Returns + ------- + None + + Examples + -------- + >>> x = np.ma.array([[1,2 ], [3, 4]], mask=[0]*4) + >>> x.mask + array([[False, False], + [False, False]]) + >>> x.shrink_mask() + masked_array( + data=[[1, 2], + [3, 4]], + mask=False, + fill_value=999999) + >>> x.mask + False + + """ + self._mask = _shrink_mask(self._mask) + return self + + @property + def baseclass(self): + """ Class of the underlying data (read-only). """ + return self._baseclass + + def _get_data(self): + """ + Returns the underlying data, as a view of the masked array. + + If the underlying data is a subclass of :class:`numpy.ndarray`, it is + returned as such. + + >>> x = np.ma.array(np.matrix([[1, 2], [3, 4]]), mask=[[0, 1], [1, 0]]) + >>> x.data + matrix([[1, 2], + [3, 4]]) + + The type of the data can be accessed through the :attr:`baseclass` + attribute. + """ + return ndarray.view(self, self._baseclass) + + _data = property(fget=_get_data) + data = property(fget=_get_data) + + @property + def flat(self): + """ Return a flat iterator, or set a flattened version of self to value. """ + return MaskedIterator(self) + + @flat.setter + def flat(self, value): + y = self.ravel() + y[:] = value + + @property + def fill_value(self): + """ + The filling value of the masked array is a scalar. When setting, None + will set to a default based on the data type. + + Examples + -------- + >>> for dt in [np.int32, np.int64, np.float64, np.complex128]: + ... np.ma.array([0, 1], dtype=dt).get_fill_value() + ... + 999999 + 999999 + 1e+20 + (1e+20+0j) + + >>> x = np.ma.array([0, 1.], fill_value=-np.inf) + >>> x.fill_value + -inf + >>> x.fill_value = np.pi + >>> x.fill_value + 3.1415926535897931 # may vary + + Reset to default: + + >>> x.fill_value = None + >>> x.fill_value + 1e+20 + + """ + if self._fill_value is None: + self._fill_value = _check_fill_value(None, self.dtype) + + # Temporary workaround to account for the fact that str and bytes + # scalars cannot be indexed with (), whereas all other numpy + # scalars can. See issues #7259 and #7267. + # The if-block can be removed after #7267 has been fixed. + if isinstance(self._fill_value, ndarray): + return self._fill_value[()] + return self._fill_value + + @fill_value.setter + def fill_value(self, value=None): + target = _check_fill_value(value, self.dtype) + if not target.ndim == 0: + # 2019-11-12, 1.18.0 + warnings.warn( + "Non-scalar arrays for the fill value are deprecated. Use " + "arrays with scalar values instead. The filled function " + "still supports any array as `fill_value`.", + DeprecationWarning, stacklevel=2) + + _fill_value = self._fill_value + if _fill_value is None: + # Create the attribute if it was undefined + self._fill_value = target + else: + # Don't overwrite the attribute, just fill it (for propagation) + _fill_value[()] = target + + # kept for compatibility + get_fill_value = fill_value.fget + set_fill_value = fill_value.fset + + def filled(self, fill_value=None): + """ + Return a copy of self, with masked values filled with a given value. + **However**, if there are no masked values to fill, self will be + returned instead as an ndarray. + + Parameters + ---------- + fill_value : array_like, optional + The value to use for invalid entries. Can be scalar or non-scalar. + If non-scalar, the resulting ndarray must be broadcastable over + input array. Default is None, in which case, the `fill_value` + attribute of the array is used instead. + + Returns + ------- + filled_array : ndarray + A copy of ``self`` with invalid entries replaced by *fill_value* + (be it the function argument or the attribute of ``self``), or + ``self`` itself as an ndarray if there are no invalid entries to + be replaced. + + Notes + ----- + The result is **not** a MaskedArray! + + Examples + -------- + >>> x = np.ma.array([1,2,3,4,5], mask=[0,0,1,0,1], fill_value=-999) + >>> x.filled() + array([ 1, 2, -999, 4, -999]) + >>> x.filled(fill_value=1000) + array([ 1, 2, 1000, 4, 1000]) + >>> type(x.filled()) + + + Subclassing is preserved. This means that if, e.g., the data part of + the masked array is a recarray, `filled` returns a recarray: + + >>> x = np.array([(-1, 2), (-3, 4)], dtype='i8,i8').view(np.recarray) + >>> m = np.ma.array(x, mask=[(True, False), (False, True)]) + >>> m.filled() + rec.array([(999999, 2), ( -3, 999999)], + dtype=[('f0', '>> x = np.ma.array(np.arange(5), mask=[0]*2 + [1]*3) + >>> x.compressed() + array([0, 1]) + >>> type(x.compressed()) + + + """ + data = ndarray.ravel(self._data) + if self._mask is not nomask: + data = data.compress(np.logical_not(ndarray.ravel(self._mask))) + return data + + def compress(self, condition, axis=None, out=None): + """ + Return `a` where condition is ``True``. + + If condition is a `~ma.MaskedArray`, missing values are considered + as ``False``. + + Parameters + ---------- + condition : var + Boolean 1-d array selecting which entries to return. If len(condition) + is less than the size of a along the axis, then output is truncated + to length of condition array. + axis : {None, int}, optional + Axis along which the operation must be performed. + out : {None, ndarray}, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output but the type will be cast if + necessary. + + Returns + ------- + result : MaskedArray + A :class:`~ma.MaskedArray` object. + + Notes + ----- + Please note the difference with :meth:`compressed` ! + The output of :meth:`compress` has a mask, the output of + :meth:`compressed` does not. + + Examples + -------- + >>> x = np.ma.array([[1,2,3],[4,5,6],[7,8,9]], mask=[0] + [1,0]*4) + >>> x + masked_array( + data=[[1, --, 3], + [--, 5, --], + [7, --, 9]], + mask=[[False, True, False], + [ True, False, True], + [False, True, False]], + fill_value=999999) + >>> x.compress([1, 0, 1]) + masked_array(data=[1, 3], + mask=[False, False], + fill_value=999999) + + >>> x.compress([1, 0, 1], axis=1) + masked_array( + data=[[1, 3], + [--, --], + [7, 9]], + mask=[[False, False], + [ True, True], + [False, False]], + fill_value=999999) + + """ + # Get the basic components + (_data, _mask) = (self._data, self._mask) + + # Force the condition to a regular ndarray and forget the missing + # values. + condition = np.asarray(condition) + + _new = _data.compress(condition, axis=axis, out=out).view(type(self)) + _new._update_from(self) + if _mask is not nomask: + _new._mask = _mask.compress(condition, axis=axis) + return _new + + def _insert_masked_print(self): + """ + Replace masked values with masked_print_option, casting all innermost + dtypes to object. + """ + if masked_print_option.enabled(): + mask = self._mask + if mask is nomask: + res = self._data + else: + # convert to object array to make filled work + data = self._data + # For big arrays, to avoid a costly conversion to the + # object dtype, extract the corners before the conversion. + print_width = (self._print_width if self.ndim > 1 + else self._print_width_1d) + for axis in range(self.ndim): + if data.shape[axis] > print_width: + ind = print_width // 2 + arr = np.split(data, (ind, -ind), axis=axis) + data = np.concatenate((arr[0], arr[2]), axis=axis) + arr = np.split(mask, (ind, -ind), axis=axis) + mask = np.concatenate((arr[0], arr[2]), axis=axis) + + rdtype = _replace_dtype_fields(self.dtype, "O") + res = data.astype(rdtype) + _recursive_printoption(res, mask, masked_print_option) + else: + res = self.filled(self.fill_value) + return res + + def __str__(self): + return str(self._insert_masked_print()) + + def __repr__(self): + """ + Literal string representation. + + """ + if self._baseclass is np.ndarray: + name = 'array' + else: + name = self._baseclass.__name__ + + + # 2016-11-19: Demoted to legacy format + if np.core.arrayprint._get_legacy_print_mode() <= 113: + is_long = self.ndim > 1 + parameters = dict( + name=name, + nlen=" " * len(name), + data=str(self), + mask=str(self._mask), + fill=str(self.fill_value), + dtype=str(self.dtype) + ) + is_structured = bool(self.dtype.names) + key = '{}_{}'.format( + 'long' if is_long else 'short', + 'flx' if is_structured else 'std' + ) + return _legacy_print_templates[key] % parameters + + prefix = f"masked_{name}(" + + dtype_needed = ( + not np.core.arrayprint.dtype_is_implied(self.dtype) or + np.all(self.mask) or + self.size == 0 + ) + + # determine which keyword args need to be shown + keys = ['data', 'mask', 'fill_value'] + if dtype_needed: + keys.append('dtype') + + # array has only one row (non-column) + is_one_row = builtins.all(dim == 1 for dim in self.shape[:-1]) + + # choose what to indent each keyword with + min_indent = 2 + if is_one_row: + # first key on the same line as the type, remaining keys + # aligned by equals + indents = {} + indents[keys[0]] = prefix + for k in keys[1:]: + n = builtins.max(min_indent, len(prefix + keys[0]) - len(k)) + indents[k] = ' ' * n + prefix = '' # absorbed into the first indent + else: + # each key on its own line, indented by two spaces + indents = {k: ' ' * min_indent for k in keys} + prefix = prefix + '\n' # first key on the next line + + # format the field values + reprs = {} + reprs['data'] = np.array2string( + self._insert_masked_print(), + separator=", ", + prefix=indents['data'] + 'data=', + suffix=',') + reprs['mask'] = np.array2string( + self._mask, + separator=", ", + prefix=indents['mask'] + 'mask=', + suffix=',') + reprs['fill_value'] = repr(self.fill_value) + if dtype_needed: + reprs['dtype'] = np.core.arrayprint.dtype_short_repr(self.dtype) + + # join keys with values and indentations + result = ',\n'.join( + '{}{}={}'.format(indents[k], k, reprs[k]) + for k in keys + ) + return prefix + result + ')' + + def _delegate_binop(self, other): + # This emulates the logic in + # private/binop_override.h:forward_binop_should_defer + if isinstance(other, type(self)): + return False + array_ufunc = getattr(other, "__array_ufunc__", False) + if array_ufunc is False: + other_priority = getattr(other, "__array_priority__", -1000000) + return self.__array_priority__ < other_priority + else: + # If array_ufunc is not None, it will be called inside the ufunc; + # None explicitly tells us to not call the ufunc, i.e., defer. + return array_ufunc is None + + def _comparison(self, other, compare): + """Compare self with other using operator.eq or operator.ne. + + When either of the elements is masked, the result is masked as well, + but the underlying boolean data are still set, with self and other + considered equal if both are masked, and unequal otherwise. + + For structured arrays, all fields are combined, with masked values + ignored. The result is masked if all fields were masked, with self + and other considered equal only if both were fully masked. + """ + omask = getmask(other) + smask = self.mask + mask = mask_or(smask, omask, copy=True) + + odata = getdata(other) + if mask.dtype.names is not None: + # For possibly masked structured arrays we need to be careful, + # since the standard structured array comparison will use all + # fields, masked or not. To avoid masked fields influencing the + # outcome, we set all masked fields in self to other, so they'll + # count as equal. To prepare, we ensure we have the right shape. + broadcast_shape = np.broadcast(self, odata).shape + sbroadcast = np.broadcast_to(self, broadcast_shape, subok=True) + sbroadcast._mask = mask + sdata = sbroadcast.filled(odata) + # Now take care of the mask; the merged mask should have an item + # masked if all fields were masked (in one and/or other). + mask = (mask == np.ones((), mask.dtype)) + + else: + # For regular arrays, just use the data as they come. + sdata = self.data + + check = compare(sdata, odata) + + if isinstance(check, (np.bool_, bool)): + return masked if mask else check + + if mask is not nomask: + # Adjust elements that were masked, which should be treated + # as equal if masked in both, unequal if masked in one. + # Note that this works automatically for structured arrays too. + check = np.where(mask, compare(smask, omask), check) + if mask.shape != check.shape: + # Guarantee consistency of the shape, making a copy since the + # the mask may need to get written to later. + mask = np.broadcast_to(mask, check.shape).copy() + + check = check.view(type(self)) + check._update_from(self) + check._mask = mask + + # Cast fill value to bool_ if needed. If it cannot be cast, the + # default boolean fill value is used. + if check._fill_value is not None: + try: + fill = _check_fill_value(check._fill_value, np.bool_) + except (TypeError, ValueError): + fill = _check_fill_value(None, np.bool_) + check._fill_value = fill + + return check + + def __eq__(self, other): + """Check whether other equals self elementwise. + + When either of the elements is masked, the result is masked as well, + but the underlying boolean data are still set, with self and other + considered equal if both are masked, and unequal otherwise. + + For structured arrays, all fields are combined, with masked values + ignored. The result is masked if all fields were masked, with self + and other considered equal only if both were fully masked. + """ + return self._comparison(other, operator.eq) + + def __ne__(self, other): + """Check whether other does not equal self elementwise. + + When either of the elements is masked, the result is masked as well, + but the underlying boolean data are still set, with self and other + considered equal if both are masked, and unequal otherwise. + + For structured arrays, all fields are combined, with masked values + ignored. The result is masked if all fields were masked, with self + and other considered equal only if both were fully masked. + """ + return self._comparison(other, operator.ne) + + def __add__(self, other): + """ + Add self to other, and return a new masked array. + + """ + if self._delegate_binop(other): + return NotImplemented + return add(self, other) + + def __radd__(self, other): + """ + Add other to self, and return a new masked array. + + """ + # In analogy with __rsub__ and __rdiv__, use original order: + # we get here from `other + self`. + return add(other, self) + + def __sub__(self, other): + """ + Subtract other from self, and return a new masked array. + + """ + if self._delegate_binop(other): + return NotImplemented + return subtract(self, other) + + def __rsub__(self, other): + """ + Subtract self from other, and return a new masked array. + + """ + return subtract(other, self) + + def __mul__(self, other): + "Multiply self by other, and return a new masked array." + if self._delegate_binop(other): + return NotImplemented + return multiply(self, other) + + def __rmul__(self, other): + """ + Multiply other by self, and return a new masked array. + + """ + # In analogy with __rsub__ and __rdiv__, use original order: + # we get here from `other * self`. + return multiply(other, self) + + def __div__(self, other): + """ + Divide other into self, and return a new masked array. + + """ + if self._delegate_binop(other): + return NotImplemented + return divide(self, other) + + def __truediv__(self, other): + """ + Divide other into self, and return a new masked array. + + """ + if self._delegate_binop(other): + return NotImplemented + return true_divide(self, other) + + def __rtruediv__(self, other): + """ + Divide self into other, and return a new masked array. + + """ + return true_divide(other, self) + + def __floordiv__(self, other): + """ + Divide other into self, and return a new masked array. + + """ + if self._delegate_binop(other): + return NotImplemented + return floor_divide(self, other) + + def __rfloordiv__(self, other): + """ + Divide self into other, and return a new masked array. + + """ + return floor_divide(other, self) + + def __pow__(self, other): + """ + Raise self to the power other, masking the potential NaNs/Infs + + """ + if self._delegate_binop(other): + return NotImplemented + return power(self, other) + + def __rpow__(self, other): + """ + Raise other to the power self, masking the potential NaNs/Infs + + """ + return power(other, self) + + def __iadd__(self, other): + """ + Add other to self in-place. + + """ + m = getmask(other) + if self._mask is nomask: + if m is not nomask and m.any(): + self._mask = make_mask_none(self.shape, self.dtype) + self._mask += m + else: + if m is not nomask: + self._mask += m + self._data.__iadd__(np.where(self._mask, self.dtype.type(0), + getdata(other))) + return self + + def __isub__(self, other): + """ + Subtract other from self in-place. + + """ + m = getmask(other) + if self._mask is nomask: + if m is not nomask and m.any(): + self._mask = make_mask_none(self.shape, self.dtype) + self._mask += m + elif m is not nomask: + self._mask += m + self._data.__isub__(np.where(self._mask, self.dtype.type(0), + getdata(other))) + return self + + def __imul__(self, other): + """ + Multiply self by other in-place. + + """ + m = getmask(other) + if self._mask is nomask: + if m is not nomask and m.any(): + self._mask = make_mask_none(self.shape, self.dtype) + self._mask += m + elif m is not nomask: + self._mask += m + self._data.__imul__(np.where(self._mask, self.dtype.type(1), + getdata(other))) + return self + + def __idiv__(self, other): + """ + Divide self by other in-place. + + """ + other_data = getdata(other) + dom_mask = _DomainSafeDivide().__call__(self._data, other_data) + other_mask = getmask(other) + new_mask = mask_or(other_mask, dom_mask) + # The following 3 lines control the domain filling + if dom_mask.any(): + (_, fval) = ufunc_fills[np.divide] + other_data = np.where(dom_mask, fval, other_data) + self._mask |= new_mask + self._data.__idiv__(np.where(self._mask, self.dtype.type(1), + other_data)) + return self + + def __ifloordiv__(self, other): + """ + Floor divide self by other in-place. + + """ + other_data = getdata(other) + dom_mask = _DomainSafeDivide().__call__(self._data, other_data) + other_mask = getmask(other) + new_mask = mask_or(other_mask, dom_mask) + # The following 3 lines control the domain filling + if dom_mask.any(): + (_, fval) = ufunc_fills[np.floor_divide] + other_data = np.where(dom_mask, fval, other_data) + self._mask |= new_mask + self._data.__ifloordiv__(np.where(self._mask, self.dtype.type(1), + other_data)) + return self + + def __itruediv__(self, other): + """ + True divide self by other in-place. + + """ + other_data = getdata(other) + dom_mask = _DomainSafeDivide().__call__(self._data, other_data) + other_mask = getmask(other) + new_mask = mask_or(other_mask, dom_mask) + # The following 3 lines control the domain filling + if dom_mask.any(): + (_, fval) = ufunc_fills[np.true_divide] + other_data = np.where(dom_mask, fval, other_data) + self._mask |= new_mask + self._data.__itruediv__(np.where(self._mask, self.dtype.type(1), + other_data)) + return self + + def __ipow__(self, other): + """ + Raise self to the power other, in place. + + """ + other_data = getdata(other) + other_mask = getmask(other) + with np.errstate(divide='ignore', invalid='ignore'): + self._data.__ipow__(np.where(self._mask, self.dtype.type(1), + other_data)) + invalid = np.logical_not(np.isfinite(self._data)) + if invalid.any(): + if self._mask is not nomask: + self._mask |= invalid + else: + self._mask = invalid + np.copyto(self._data, self.fill_value, where=invalid) + new_mask = mask_or(other_mask, invalid) + self._mask = mask_or(self._mask, new_mask) + return self + + def __float__(self): + """ + Convert to float. + + """ + if self.size > 1: + raise TypeError("Only length-1 arrays can be converted " + "to Python scalars") + elif self._mask: + warnings.warn("Warning: converting a masked element to nan.", stacklevel=2) + return np.nan + return float(self.item()) + + def __int__(self): + """ + Convert to int. + + """ + if self.size > 1: + raise TypeError("Only length-1 arrays can be converted " + "to Python scalars") + elif self._mask: + raise MaskError('Cannot convert masked element to a Python int.') + return int(self.item()) + + @property + def imag(self): + """ + The imaginary part of the masked array. + + This property is a view on the imaginary part of this `MaskedArray`. + + See Also + -------- + real + + Examples + -------- + >>> x = np.ma.array([1+1.j, -2j, 3.45+1.6j], mask=[False, True, False]) + >>> x.imag + masked_array(data=[1.0, --, 1.6], + mask=[False, True, False], + fill_value=1e+20) + + """ + result = self._data.imag.view(type(self)) + result.__setmask__(self._mask) + return result + + # kept for compatibility + get_imag = imag.fget + + @property + def real(self): + """ + The real part of the masked array. + + This property is a view on the real part of this `MaskedArray`. + + See Also + -------- + imag + + Examples + -------- + >>> x = np.ma.array([1+1.j, -2j, 3.45+1.6j], mask=[False, True, False]) + >>> x.real + masked_array(data=[1.0, --, 3.45], + mask=[False, True, False], + fill_value=1e+20) + + """ + result = self._data.real.view(type(self)) + result.__setmask__(self._mask) + return result + + # kept for compatibility + get_real = real.fget + + def count(self, axis=None, keepdims=np._NoValue): + """ + Count the non-masked elements of the array along the given axis. + + Parameters + ---------- + axis : None or int or tuple of ints, optional + Axis or axes along which the count is performed. + The default, None, performs the count over all + the dimensions of the input array. `axis` may be negative, in + which case it counts from the last to the first axis. + + .. versionadded:: 1.10.0 + + If this is a tuple of ints, the count is performed on multiple + axes, instead of a single axis or all the axes as before. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. + + Returns + ------- + result : ndarray or scalar + An array with the same shape as the input array, with the specified + axis removed. If the array is a 0-d array, or if `axis` is None, a + scalar is returned. + + See Also + -------- + ma.count_masked : Count masked elements in array or along a given axis. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = ma.arange(6).reshape((2, 3)) + >>> a[1, :] = ma.masked + >>> a + masked_array( + data=[[0, 1, 2], + [--, --, --]], + mask=[[False, False, False], + [ True, True, True]], + fill_value=999999) + >>> a.count() + 3 + + When the `axis` keyword is specified an array of appropriate size is + returned. + + >>> a.count(axis=0) + array([1, 1, 1]) + >>> a.count(axis=1) + array([3, 0]) + + """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + m = self._mask + # special case for matrices (we assume no other subclasses modify + # their dimensions) + if isinstance(self.data, np.matrix): + if m is nomask: + m = np.zeros(self.shape, dtype=np.bool_) + m = m.view(type(self.data)) + + if m is nomask: + # compare to _count_reduce_items in _methods.py + + if self.shape == (): + if axis not in (None, 0): + raise np.AxisError(axis=axis, ndim=self.ndim) + return 1 + elif axis is None: + if kwargs.get('keepdims', False): + return np.array(self.size, dtype=np.intp, ndmin=self.ndim) + return self.size + + axes = normalize_axis_tuple(axis, self.ndim) + items = 1 + for ax in axes: + items *= self.shape[ax] + + if kwargs.get('keepdims', False): + out_dims = list(self.shape) + for a in axes: + out_dims[a] = 1 + else: + out_dims = [d for n, d in enumerate(self.shape) + if n not in axes] + # make sure to return a 0-d array if axis is supplied + return np.full(out_dims, items, dtype=np.intp) + + # take care of the masked singleton + if self is masked: + return 0 + + return (~m).sum(axis=axis, dtype=np.intp, **kwargs) + + def ravel(self, order='C'): + """ + Returns a 1D version of self, as a view. + + Parameters + ---------- + order : {'C', 'F', 'A', 'K'}, optional + The elements of `a` are read using this index order. 'C' means to + index the elements in C-like order, with the last axis index + changing fastest, back to the first axis index changing slowest. + 'F' means to index the elements in Fortran-like index order, with + the first index changing fastest, and the last index changing + slowest. Note that the 'C' and 'F' options take no account of the + memory layout of the underlying array, and only refer to the order + of axis indexing. 'A' means to read the elements in Fortran-like + index order if `m` is Fortran *contiguous* in memory, C-like order + otherwise. 'K' means to read the elements in the order they occur + in memory, except for reversing the data when strides are negative. + By default, 'C' index order is used. + + Returns + ------- + MaskedArray + Output view is of shape ``(self.size,)`` (or + ``(np.ma.product(self.shape),)``). + + Examples + -------- + >>> x = np.ma.array([[1,2,3],[4,5,6],[7,8,9]], mask=[0] + [1,0]*4) + >>> x + masked_array( + data=[[1, --, 3], + [--, 5, --], + [7, --, 9]], + mask=[[False, True, False], + [ True, False, True], + [False, True, False]], + fill_value=999999) + >>> x.ravel() + masked_array(data=[1, --, 3, --, 5, --, 7, --, 9], + mask=[False, True, False, True, False, True, False, True, + False], + fill_value=999999) + + """ + r = ndarray.ravel(self._data, order=order).view(type(self)) + r._update_from(self) + if self._mask is not nomask: + r._mask = ndarray.ravel(self._mask, order=order).reshape(r.shape) + else: + r._mask = nomask + return r + + + def reshape(self, *s, **kwargs): + """ + Give a new shape to the array without changing its data. + + Returns a masked array containing the same data, but with a new shape. + The result is a view on the original array; if this is not possible, a + ValueError is raised. + + Parameters + ---------- + shape : int or tuple of ints + The new shape should be compatible with the original shape. If an + integer is supplied, then the result will be a 1-D array of that + length. + order : {'C', 'F'}, optional + Determines whether the array data should be viewed as in C + (row-major) or FORTRAN (column-major) order. + + Returns + ------- + reshaped_array : array + A new view on the array. + + See Also + -------- + reshape : Equivalent function in the masked array module. + numpy.ndarray.reshape : Equivalent method on ndarray object. + numpy.reshape : Equivalent function in the NumPy module. + + Notes + ----- + The reshaping operation cannot guarantee that a copy will not be made, + to modify the shape in place, use ``a.shape = s`` + + Examples + -------- + >>> x = np.ma.array([[1,2],[3,4]], mask=[1,0,0,1]) + >>> x + masked_array( + data=[[--, 2], + [3, --]], + mask=[[ True, False], + [False, True]], + fill_value=999999) + >>> x = x.reshape((4,1)) + >>> x + masked_array( + data=[[--], + [2], + [3], + [--]], + mask=[[ True], + [False], + [False], + [ True]], + fill_value=999999) + + """ + kwargs.update(order=kwargs.get('order', 'C')) + result = self._data.reshape(*s, **kwargs).view(type(self)) + result._update_from(self) + mask = self._mask + if mask is not nomask: + result._mask = mask.reshape(*s, **kwargs) + return result + + def resize(self, newshape, refcheck=True, order=False): + """ + .. warning:: + + This method does nothing, except raise a ValueError exception. A + masked array does not own its data and therefore cannot safely be + resized in place. Use the `numpy.ma.resize` function instead. + + This method is difficult to implement safely and may be deprecated in + future releases of NumPy. + + """ + # Note : the 'order' keyword looks broken, let's just drop it + errmsg = "A masked array does not own its data "\ + "and therefore cannot be resized.\n" \ + "Use the numpy.ma.resize function instead." + raise ValueError(errmsg) + + def put(self, indices, values, mode='raise'): + """ + Set storage-indexed locations to corresponding values. + + Sets self._data.flat[n] = values[n] for each n in indices. + If `values` is shorter than `indices` then it will repeat. + If `values` has some masked values, the initial mask is updated + in consequence, else the corresponding values are unmasked. + + Parameters + ---------- + indices : 1-D array_like + Target indices, interpreted as integers. + values : array_like + Values to place in self._data copy at target indices. + mode : {'raise', 'wrap', 'clip'}, optional + Specifies how out-of-bounds indices will behave. + 'raise' : raise an error. + 'wrap' : wrap around. + 'clip' : clip to the range. + + Notes + ----- + `values` can be a scalar or length 1 array. + + Examples + -------- + >>> x = np.ma.array([[1,2,3],[4,5,6],[7,8,9]], mask=[0] + [1,0]*4) + >>> x + masked_array( + data=[[1, --, 3], + [--, 5, --], + [7, --, 9]], + mask=[[False, True, False], + [ True, False, True], + [False, True, False]], + fill_value=999999) + >>> x.put([0,4,8],[10,20,30]) + >>> x + masked_array( + data=[[10, --, 3], + [--, 20, --], + [7, --, 30]], + mask=[[False, True, False], + [ True, False, True], + [False, True, False]], + fill_value=999999) + + >>> x.put(4,999) + >>> x + masked_array( + data=[[10, --, 3], + [--, 999, --], + [7, --, 30]], + mask=[[False, True, False], + [ True, False, True], + [False, True, False]], + fill_value=999999) + + """ + # Hard mask: Get rid of the values/indices that fall on masked data + if self._hardmask and self._mask is not nomask: + mask = self._mask[indices] + indices = narray(indices, copy=False) + values = narray(values, copy=False, subok=True) + values.resize(indices.shape) + indices = indices[~mask] + values = values[~mask] + + self._data.put(indices, values, mode=mode) + + # short circuit if neither self nor values are masked + if self._mask is nomask and getmask(values) is nomask: + return + + m = getmaskarray(self) + + if getmask(values) is nomask: + m.put(indices, False, mode=mode) + else: + m.put(indices, values._mask, mode=mode) + m = make_mask(m, copy=False, shrink=True) + self._mask = m + return + + def ids(self): + """ + Return the addresses of the data and mask areas. + + Parameters + ---------- + None + + Examples + -------- + >>> x = np.ma.array([1, 2, 3], mask=[0, 1, 1]) + >>> x.ids() + (166670640, 166659832) # may vary + + If the array has no mask, the address of `nomask` is returned. This address + is typically not close to the data in memory: + + >>> x = np.ma.array([1, 2, 3]) + >>> x.ids() + (166691080, 3083169284) # may vary + + """ + if self._mask is nomask: + return (self.ctypes.data, id(nomask)) + return (self.ctypes.data, self._mask.ctypes.data) + + def iscontiguous(self): + """ + Return a boolean indicating whether the data is contiguous. + + Parameters + ---------- + None + + Examples + -------- + >>> x = np.ma.array([1, 2, 3]) + >>> x.iscontiguous() + True + + `iscontiguous` returns one of the flags of the masked array: + + >>> x.flags + C_CONTIGUOUS : True + F_CONTIGUOUS : True + OWNDATA : False + WRITEABLE : True + ALIGNED : True + WRITEBACKIFCOPY : False + UPDATEIFCOPY : False + + """ + return self.flags['CONTIGUOUS'] + + def all(self, axis=None, out=None, keepdims=np._NoValue): + """ + Returns True if all elements evaluate to True. + + The output array is masked where all the values along the given axis + are masked: if the output would have been a scalar and that all the + values are masked, then the output is `masked`. + + Refer to `numpy.all` for full documentation. + + See Also + -------- + numpy.ndarray.all : corresponding function for ndarrays + numpy.all : equivalent function + + Examples + -------- + >>> np.ma.array([1,2,3]).all() + True + >>> a = np.ma.array([1,2,3], mask=True) + >>> (a.all() is np.ma.masked) + True + + """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + mask = _check_mask_axis(self._mask, axis, **kwargs) + if out is None: + d = self.filled(True).all(axis=axis, **kwargs).view(type(self)) + if d.ndim: + d.__setmask__(mask) + elif mask: + return masked + return d + self.filled(True).all(axis=axis, out=out, **kwargs) + if isinstance(out, MaskedArray): + if out.ndim or mask: + out.__setmask__(mask) + return out + + def any(self, axis=None, out=None, keepdims=np._NoValue): + """ + Returns True if any of the elements of `a` evaluate to True. + + Masked values are considered as False during computation. + + Refer to `numpy.any` for full documentation. + + See Also + -------- + numpy.ndarray.any : corresponding function for ndarrays + numpy.any : equivalent function + + """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + mask = _check_mask_axis(self._mask, axis, **kwargs) + if out is None: + d = self.filled(False).any(axis=axis, **kwargs).view(type(self)) + if d.ndim: + d.__setmask__(mask) + elif mask: + d = masked + return d + self.filled(False).any(axis=axis, out=out, **kwargs) + if isinstance(out, MaskedArray): + if out.ndim or mask: + out.__setmask__(mask) + return out + + def nonzero(self): + """ + Return the indices of unmasked elements that are not zero. + + Returns a tuple of arrays, one for each dimension, containing the + indices of the non-zero elements in that dimension. The corresponding + non-zero values can be obtained with:: + + a[a.nonzero()] + + To group the indices by element, rather than dimension, use + instead:: + + np.transpose(a.nonzero()) + + The result of this is always a 2d array, with a row for each non-zero + element. + + Parameters + ---------- + None + + Returns + ------- + tuple_of_arrays : tuple + Indices of elements that are non-zero. + + See Also + -------- + numpy.nonzero : + Function operating on ndarrays. + flatnonzero : + Return indices that are non-zero in the flattened version of the input + array. + numpy.ndarray.nonzero : + Equivalent ndarray method. + count_nonzero : + Counts the number of non-zero elements in the input array. + + Examples + -------- + >>> import numpy.ma as ma + >>> x = ma.array(np.eye(3)) + >>> x + masked_array( + data=[[1., 0., 0.], + [0., 1., 0.], + [0., 0., 1.]], + mask=False, + fill_value=1e+20) + >>> x.nonzero() + (array([0, 1, 2]), array([0, 1, 2])) + + Masked elements are ignored. + + >>> x[1, 1] = ma.masked + >>> x + masked_array( + data=[[1.0, 0.0, 0.0], + [0.0, --, 0.0], + [0.0, 0.0, 1.0]], + mask=[[False, False, False], + [False, True, False], + [False, False, False]], + fill_value=1e+20) + >>> x.nonzero() + (array([0, 2]), array([0, 2])) + + Indices can also be grouped by element. + + >>> np.transpose(x.nonzero()) + array([[0, 0], + [2, 2]]) + + A common use for ``nonzero`` is to find the indices of an array, where + a condition is True. Given an array `a`, the condition `a` > 3 is a + boolean array and since False is interpreted as 0, ma.nonzero(a > 3) + yields the indices of the `a` where the condition is true. + + >>> a = ma.array([[1,2,3],[4,5,6],[7,8,9]]) + >>> a > 3 + masked_array( + data=[[False, False, False], + [ True, True, True], + [ True, True, True]], + mask=False, + fill_value=True) + >>> ma.nonzero(a > 3) + (array([1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2])) + + The ``nonzero`` method of the condition array can also be called. + + >>> (a > 3).nonzero() + (array([1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2])) + + """ + return narray(self.filled(0), copy=False).nonzero() + + def trace(self, offset=0, axis1=0, axis2=1, dtype=None, out=None): + """ + (this docstring should be overwritten) + """ + #!!!: implement out + test! + m = self._mask + if m is nomask: + result = super().trace(offset=offset, axis1=axis1, axis2=axis2, + out=out) + return result.astype(dtype) + else: + D = self.diagonal(offset=offset, axis1=axis1, axis2=axis2) + return D.astype(dtype).filled(0).sum(axis=-1, out=out) + trace.__doc__ = ndarray.trace.__doc__ + + def dot(self, b, out=None, strict=False): + """ + a.dot(b, out=None) + + Masked dot product of two arrays. Note that `out` and `strict` are + located in different positions than in `ma.dot`. In order to + maintain compatibility with the functional version, it is + recommended that the optional arguments be treated as keyword only. + At some point that may be mandatory. + + .. versionadded:: 1.10.0 + + Parameters + ---------- + b : masked_array_like + Inputs array. + out : masked_array, optional + Output argument. This must have the exact kind that would be + returned if it was not used. In particular, it must have the + right type, must be C-contiguous, and its dtype must be the + dtype that would be returned for `ma.dot(a,b)`. This is a + performance feature. Therefore, if these conditions are not + met, an exception is raised, instead of attempting to be + flexible. + strict : bool, optional + Whether masked data are propagated (True) or set to 0 (False) + for the computation. Default is False. Propagating the mask + means that if a masked value appears in a row or column, the + whole row or column is considered masked. + + .. versionadded:: 1.10.2 + + See Also + -------- + numpy.ma.dot : equivalent function + + """ + return dot(self, b, out=out, strict=strict) + + def sum(self, axis=None, dtype=None, out=None, keepdims=np._NoValue): + """ + Return the sum of the array elements over the given axis. + + Masked elements are set to 0 internally. + + Refer to `numpy.sum` for full documentation. + + See Also + -------- + numpy.ndarray.sum : corresponding function for ndarrays + numpy.sum : equivalent function + + Examples + -------- + >>> x = np.ma.array([[1,2,3],[4,5,6],[7,8,9]], mask=[0] + [1,0]*4) + >>> x + masked_array( + data=[[1, --, 3], + [--, 5, --], + [7, --, 9]], + mask=[[False, True, False], + [ True, False, True], + [False, True, False]], + fill_value=999999) + >>> x.sum() + 25 + >>> x.sum(axis=1) + masked_array(data=[4, 5, 16], + mask=[False, False, False], + fill_value=999999) + >>> x.sum(axis=0) + masked_array(data=[8, 5, 12], + mask=[False, False, False], + fill_value=999999) + >>> print(type(x.sum(axis=0, dtype=np.int64)[0])) + + + """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + _mask = self._mask + newmask = _check_mask_axis(_mask, axis, **kwargs) + # No explicit output + if out is None: + result = self.filled(0).sum(axis, dtype=dtype, **kwargs) + rndim = getattr(result, 'ndim', 0) + if rndim: + result = result.view(type(self)) + result.__setmask__(newmask) + elif newmask: + result = masked + return result + # Explicit output + result = self.filled(0).sum(axis, dtype=dtype, out=out, **kwargs) + if isinstance(out, MaskedArray): + outmask = getmask(out) + if outmask is nomask: + outmask = out._mask = make_mask_none(out.shape) + outmask.flat = newmask + return out + + def cumsum(self, axis=None, dtype=None, out=None): + """ + Return the cumulative sum of the array elements over the given axis. + + Masked values are set to 0 internally during the computation. + However, their position is saved, and the result will be masked at + the same locations. + + Refer to `numpy.cumsum` for full documentation. + + Notes + ----- + The mask is lost if `out` is not a valid :class:`ma.MaskedArray` ! + + Arithmetic is modular when using integer types, and no error is + raised on overflow. + + See Also + -------- + numpy.ndarray.cumsum : corresponding function for ndarrays + numpy.cumsum : equivalent function + + Examples + -------- + >>> marr = np.ma.array(np.arange(10), mask=[0,0,0,1,1,1,0,0,0,0]) + >>> marr.cumsum() + masked_array(data=[0, 1, 3, --, --, --, 9, 16, 24, 33], + mask=[False, False, False, True, True, True, False, False, + False, False], + fill_value=999999) + + """ + result = self.filled(0).cumsum(axis=axis, dtype=dtype, out=out) + if out is not None: + if isinstance(out, MaskedArray): + out.__setmask__(self.mask) + return out + result = result.view(type(self)) + result.__setmask__(self._mask) + return result + + def prod(self, axis=None, dtype=None, out=None, keepdims=np._NoValue): + """ + Return the product of the array elements over the given axis. + + Masked elements are set to 1 internally for computation. + + Refer to `numpy.prod` for full documentation. + + Notes + ----- + Arithmetic is modular when using integer types, and no error is raised + on overflow. + + See Also + -------- + numpy.ndarray.prod : corresponding function for ndarrays + numpy.prod : equivalent function + """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + _mask = self._mask + newmask = _check_mask_axis(_mask, axis, **kwargs) + # No explicit output + if out is None: + result = self.filled(1).prod(axis, dtype=dtype, **kwargs) + rndim = getattr(result, 'ndim', 0) + if rndim: + result = result.view(type(self)) + result.__setmask__(newmask) + elif newmask: + result = masked + return result + # Explicit output + result = self.filled(1).prod(axis, dtype=dtype, out=out, **kwargs) + if isinstance(out, MaskedArray): + outmask = getmask(out) + if outmask is nomask: + outmask = out._mask = make_mask_none(out.shape) + outmask.flat = newmask + return out + product = prod + + def cumprod(self, axis=None, dtype=None, out=None): + """ + Return the cumulative product of the array elements over the given axis. + + Masked values are set to 1 internally during the computation. + However, their position is saved, and the result will be masked at + the same locations. + + Refer to `numpy.cumprod` for full documentation. + + Notes + ----- + The mask is lost if `out` is not a valid MaskedArray ! + + Arithmetic is modular when using integer types, and no error is + raised on overflow. + + See Also + -------- + numpy.ndarray.cumprod : corresponding function for ndarrays + numpy.cumprod : equivalent function + """ + result = self.filled(1).cumprod(axis=axis, dtype=dtype, out=out) + if out is not None: + if isinstance(out, MaskedArray): + out.__setmask__(self._mask) + return out + result = result.view(type(self)) + result.__setmask__(self._mask) + return result + + def mean(self, axis=None, dtype=None, out=None, keepdims=np._NoValue): + """ + Returns the average of the array elements along given axis. + + Masked entries are ignored, and result elements which are not + finite will be masked. + + Refer to `numpy.mean` for full documentation. + + See Also + -------- + numpy.ndarray.mean : corresponding function for ndarrays + numpy.mean : Equivalent function + numpy.ma.average : Weighted average. + + Examples + -------- + >>> a = np.ma.array([1,2,3], mask=[False, False, True]) + >>> a + masked_array(data=[1, 2, --], + mask=[False, False, True], + fill_value=999999) + >>> a.mean() + 1.5 + + """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + if self._mask is nomask: + result = super().mean(axis=axis, dtype=dtype, **kwargs)[()] + else: + dsum = self.sum(axis=axis, dtype=dtype, **kwargs) + cnt = self.count(axis=axis, **kwargs) + if cnt.shape == () and (cnt == 0): + result = masked + else: + result = dsum * 1. / cnt + if out is not None: + out.flat = result + if isinstance(out, MaskedArray): + outmask = getmask(out) + if outmask is nomask: + outmask = out._mask = make_mask_none(out.shape) + outmask.flat = getmask(result) + return out + return result + + def anom(self, axis=None, dtype=None): + """ + Compute the anomalies (deviations from the arithmetic mean) + along the given axis. + + Returns an array of anomalies, with the same shape as the input and + where the arithmetic mean is computed along the given axis. + + Parameters + ---------- + axis : int, optional + Axis over which the anomalies are taken. + The default is to use the mean of the flattened array as reference. + dtype : dtype, optional + Type to use in computing the variance. For arrays of integer type + the default is float32; for arrays of float types it is the same as + the array type. + + See Also + -------- + mean : Compute the mean of the array. + + Examples + -------- + >>> a = np.ma.array([1,2,3]) + >>> a.anom() + masked_array(data=[-1., 0., 1.], + mask=False, + fill_value=1e+20) + + """ + m = self.mean(axis, dtype) + if not axis: + return self - m + else: + return self - expand_dims(m, axis) + + def var(self, axis=None, dtype=None, out=None, ddof=0, + keepdims=np._NoValue): + """ + Returns the variance of the array elements along given axis. + + Masked entries are ignored, and result elements which are not + finite will be masked. + + Refer to `numpy.var` for full documentation. + + See Also + -------- + numpy.ndarray.var : corresponding function for ndarrays + numpy.var : Equivalent function + """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + # Easy case: nomask, business as usual + if self._mask is nomask: + ret = super().var(axis=axis, dtype=dtype, out=out, ddof=ddof, + **kwargs)[()] + if out is not None: + if isinstance(out, MaskedArray): + out.__setmask__(nomask) + return out + return ret + + # Some data are masked, yay! + cnt = self.count(axis=axis, **kwargs) - ddof + danom = self - self.mean(axis, dtype, keepdims=True) + if iscomplexobj(self): + danom = umath.absolute(danom) ** 2 + else: + danom *= danom + dvar = divide(danom.sum(axis, **kwargs), cnt).view(type(self)) + # Apply the mask if it's not a scalar + if dvar.ndim: + dvar._mask = mask_or(self._mask.all(axis, **kwargs), (cnt <= 0)) + dvar._update_from(self) + elif getmask(dvar): + # Make sure that masked is returned when the scalar is masked. + dvar = masked + if out is not None: + if isinstance(out, MaskedArray): + out.flat = 0 + out.__setmask__(True) + elif out.dtype.kind in 'biu': + errmsg = "Masked data information would be lost in one or "\ + "more location." + raise MaskError(errmsg) + else: + out.flat = np.nan + return out + # In case with have an explicit output + if out is not None: + # Set the data + out.flat = dvar + # Set the mask if needed + if isinstance(out, MaskedArray): + out.__setmask__(dvar.mask) + return out + return dvar + var.__doc__ = np.var.__doc__ + + def std(self, axis=None, dtype=None, out=None, ddof=0, + keepdims=np._NoValue): + """ + Returns the standard deviation of the array elements along given axis. + + Masked entries are ignored. + + Refer to `numpy.std` for full documentation. + + See Also + -------- + numpy.ndarray.std : corresponding function for ndarrays + numpy.std : Equivalent function + """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + dvar = self.var(axis, dtype, out, ddof, **kwargs) + if dvar is not masked: + if out is not None: + np.power(out, 0.5, out=out, casting='unsafe') + return out + dvar = sqrt(dvar) + return dvar + + def round(self, decimals=0, out=None): + """ + Return each element rounded to the given number of decimals. + + Refer to `numpy.around` for full documentation. + + See Also + -------- + numpy.ndarray.round : corresponding function for ndarrays + numpy.around : equivalent function + """ + result = self._data.round(decimals=decimals, out=out).view(type(self)) + if result.ndim > 0: + result._mask = self._mask + result._update_from(self) + elif self._mask: + # Return masked when the scalar is masked + result = masked + # No explicit output: we're done + if out is None: + return result + if isinstance(out, MaskedArray): + out.__setmask__(self._mask) + return out + + def argsort(self, axis=np._NoValue, kind=None, order=None, + endwith=True, fill_value=None): + """ + Return an ndarray of indices that sort the array along the + specified axis. Masked values are filled beforehand to + `fill_value`. + + Parameters + ---------- + axis : int, optional + Axis along which to sort. If None, the default, the flattened array + is used. + + .. versionchanged:: 1.13.0 + Previously, the default was documented to be -1, but that was + in error. At some future date, the default will change to -1, as + originally intended. + Until then, the axis should be given explicitly when + ``arr.ndim > 1``, to avoid a FutureWarning. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional + The sorting algorithm used. + order : list, optional + When `a` is an array with fields defined, this argument specifies + which fields to compare first, second, etc. Not all fields need be + specified. + endwith : {True, False}, optional + Whether missing values (if any) should be treated as the largest values + (True) or the smallest values (False) + When the array contains unmasked values at the same extremes of the + datatype, the ordering of these values and the masked values is + undefined. + fill_value : scalar or None, optional + Value used internally for the masked values. + If ``fill_value`` is not None, it supersedes ``endwith``. + + Returns + ------- + index_array : ndarray, int + Array of indices that sort `a` along the specified axis. + In other words, ``a[index_array]`` yields a sorted `a`. + + See Also + -------- + ma.MaskedArray.sort : Describes sorting algorithms used. + lexsort : Indirect stable sort with multiple keys. + numpy.ndarray.sort : Inplace sort. + + Notes + ----- + See `sort` for notes on the different sorting algorithms. + + Examples + -------- + >>> a = np.ma.array([3,2,1], mask=[False, False, True]) + >>> a + masked_array(data=[3, 2, --], + mask=[False, False, True], + fill_value=999999) + >>> a.argsort() + array([1, 0, 2]) + + """ + + # 2017-04-11, Numpy 1.13.0, gh-8701: warn on axis default + if axis is np._NoValue: + axis = _deprecate_argsort_axis(self) + + if fill_value is None: + if endwith: + # nan > inf + if np.issubdtype(self.dtype, np.floating): + fill_value = np.nan + else: + fill_value = minimum_fill_value(self) + else: + fill_value = maximum_fill_value(self) + + filled = self.filled(fill_value) + return filled.argsort(axis=axis, kind=kind, order=order) + + def argmin(self, axis=None, fill_value=None, out=None, *, + keepdims=np._NoValue): + """ + Return array of indices to the minimum values along the given axis. + + Parameters + ---------- + axis : {None, integer} + If None, the index is into the flattened array, otherwise along + the specified axis + fill_value : scalar or None, optional + Value used to fill in the masked values. If None, the output of + minimum_fill_value(self._data) is used instead. + out : {None, array}, optional + Array into which the result can be placed. Its type is preserved + and it must be of the right shape to hold the output. + + Returns + ------- + ndarray or scalar + If multi-dimension input, returns a new ndarray of indices to the + minimum values along the given axis. Otherwise, returns a scalar + of index to the minimum values along the given axis. + + Examples + -------- + >>> x = np.ma.array(np.arange(4), mask=[1,1,0,0]) + >>> x.shape = (2,2) + >>> x + masked_array( + data=[[--, --], + [2, 3]], + mask=[[ True, True], + [False, False]], + fill_value=999999) + >>> x.argmin(axis=0, fill_value=-1) + array([0, 0]) + >>> x.argmin(axis=0, fill_value=9) + array([1, 1]) + + """ + if fill_value is None: + fill_value = minimum_fill_value(self) + d = self.filled(fill_value).view(ndarray) + keepdims = False if keepdims is np._NoValue else bool(keepdims) + return d.argmin(axis, out=out, keepdims=keepdims) + + def argmax(self, axis=None, fill_value=None, out=None, *, + keepdims=np._NoValue): + """ + Returns array of indices of the maximum values along the given axis. + Masked values are treated as if they had the value fill_value. + + Parameters + ---------- + axis : {None, integer} + If None, the index is into the flattened array, otherwise along + the specified axis + fill_value : scalar or None, optional + Value used to fill in the masked values. If None, the output of + maximum_fill_value(self._data) is used instead. + out : {None, array}, optional + Array into which the result can be placed. Its type is preserved + and it must be of the right shape to hold the output. + + Returns + ------- + index_array : {integer_array} + + Examples + -------- + >>> a = np.arange(6).reshape(2,3) + >>> a.argmax() + 5 + >>> a.argmax(0) + array([1, 1, 1]) + >>> a.argmax(1) + array([2, 2]) + + """ + if fill_value is None: + fill_value = maximum_fill_value(self._data) + d = self.filled(fill_value).view(ndarray) + keepdims = False if keepdims is np._NoValue else bool(keepdims) + return d.argmax(axis, out=out, keepdims=keepdims) + + def sort(self, axis=-1, kind=None, order=None, + endwith=True, fill_value=None): + """ + Sort the array, in-place + + Parameters + ---------- + a : array_like + Array to be sorted. + axis : int, optional + Axis along which to sort. If None, the array is flattened before + sorting. The default is -1, which sorts along the last axis. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional + The sorting algorithm used. + order : list, optional + When `a` is a structured array, this argument specifies which fields + to compare first, second, and so on. This list does not need to + include all of the fields. + endwith : {True, False}, optional + Whether missing values (if any) should be treated as the largest values + (True) or the smallest values (False) + When the array contains unmasked values sorting at the same extremes of the + datatype, the ordering of these values and the masked values is + undefined. + fill_value : scalar or None, optional + Value used internally for the masked values. + If ``fill_value`` is not None, it supersedes ``endwith``. + + Returns + ------- + sorted_array : ndarray + Array of the same type and shape as `a`. + + See Also + -------- + numpy.ndarray.sort : Method to sort an array in-place. + argsort : Indirect sort. + lexsort : Indirect stable sort on multiple keys. + searchsorted : Find elements in a sorted array. + + Notes + ----- + See ``sort`` for notes on the different sorting algorithms. + + Examples + -------- + >>> a = np.ma.array([1, 2, 5, 4, 3],mask=[0, 1, 0, 1, 0]) + >>> # Default + >>> a.sort() + >>> a + masked_array(data=[1, 3, 5, --, --], + mask=[False, False, False, True, True], + fill_value=999999) + + >>> a = np.ma.array([1, 2, 5, 4, 3],mask=[0, 1, 0, 1, 0]) + >>> # Put missing values in the front + >>> a.sort(endwith=False) + >>> a + masked_array(data=[--, --, 1, 3, 5], + mask=[ True, True, False, False, False], + fill_value=999999) + + >>> a = np.ma.array([1, 2, 5, 4, 3],mask=[0, 1, 0, 1, 0]) + >>> # fill_value takes over endwith + >>> a.sort(endwith=False, fill_value=3) + >>> a + masked_array(data=[1, --, --, 3, 5], + mask=[False, True, True, False, False], + fill_value=999999) + + """ + if self._mask is nomask: + ndarray.sort(self, axis=axis, kind=kind, order=order) + return + + if self is masked: + return + + sidx = self.argsort(axis=axis, kind=kind, order=order, + fill_value=fill_value, endwith=endwith) + + self[...] = np.take_along_axis(self, sidx, axis=axis) + + def min(self, axis=None, out=None, fill_value=None, keepdims=np._NoValue): + """ + Return the minimum along a given axis. + + Parameters + ---------- + axis : {None, int}, optional + Axis along which to operate. By default, ``axis`` is None and the + flattened input is used. + out : array_like, optional + Alternative output array in which to place the result. Must be of + the same shape and buffer length as the expected output. + fill_value : scalar or None, optional + Value used to fill in the masked values. + If None, use the output of `minimum_fill_value`. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. + + Returns + ------- + amin : array_like + New array holding the result. + If ``out`` was specified, ``out`` is returned. + + See Also + -------- + ma.minimum_fill_value + Returns the minimum filling value for a given datatype. + + """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + _mask = self._mask + newmask = _check_mask_axis(_mask, axis, **kwargs) + if fill_value is None: + fill_value = minimum_fill_value(self) + # No explicit output + if out is None: + result = self.filled(fill_value).min( + axis=axis, out=out, **kwargs).view(type(self)) + if result.ndim: + # Set the mask + result.__setmask__(newmask) + # Get rid of Infs + if newmask.ndim: + np.copyto(result, result.fill_value, where=newmask) + elif newmask: + result = masked + return result + # Explicit output + result = self.filled(fill_value).min(axis=axis, out=out, **kwargs) + if isinstance(out, MaskedArray): + outmask = getmask(out) + if outmask is nomask: + outmask = out._mask = make_mask_none(out.shape) + outmask.flat = newmask + else: + if out.dtype.kind in 'biu': + errmsg = "Masked data information would be lost in one or more"\ + " location." + raise MaskError(errmsg) + np.copyto(out, np.nan, where=newmask) + return out + + # unique to masked arrays + def mini(self, axis=None): + """ + Return the array minimum along the specified axis. + + .. deprecated:: 1.13.0 + This function is identical to both: + + * ``self.min(keepdims=True, axis=axis).squeeze(axis=axis)`` + * ``np.ma.minimum.reduce(self, axis=axis)`` + + Typically though, ``self.min(axis=axis)`` is sufficient. + + Parameters + ---------- + axis : int, optional + The axis along which to find the minima. Default is None, in which case + the minimum value in the whole array is returned. + + Returns + ------- + min : scalar or MaskedArray + If `axis` is None, the result is a scalar. Otherwise, if `axis` is + given and the array is at least 2-D, the result is a masked array with + dimension one smaller than the array on which `mini` is called. + + Examples + -------- + >>> x = np.ma.array(np.arange(6), mask=[0 ,1, 0, 0, 0 ,1]).reshape(3, 2) + >>> x + masked_array( + data=[[0, --], + [2, 3], + [4, --]], + mask=[[False, True], + [False, False], + [False, True]], + fill_value=999999) + >>> x.mini() + masked_array(data=0, + mask=False, + fill_value=999999) + >>> x.mini(axis=0) + masked_array(data=[0, 3], + mask=[False, False], + fill_value=999999) + >>> x.mini(axis=1) + masked_array(data=[0, 2, 4], + mask=[False, False, False], + fill_value=999999) + + There is a small difference between `mini` and `min`: + + >>> x[:,1].mini(axis=0) + masked_array(data=3, + mask=False, + fill_value=999999) + >>> x[:,1].min(axis=0) + 3 + """ + + # 2016-04-13, 1.13.0, gh-8764 + warnings.warn( + "`mini` is deprecated; use the `min` method or " + "`np.ma.minimum.reduce instead.", + DeprecationWarning, stacklevel=2) + return minimum.reduce(self, axis) + + def max(self, axis=None, out=None, fill_value=None, keepdims=np._NoValue): + """ + Return the maximum along a given axis. + + Parameters + ---------- + axis : {None, int}, optional + Axis along which to operate. By default, ``axis`` is None and the + flattened input is used. + out : array_like, optional + Alternative output array in which to place the result. Must + be of the same shape and buffer length as the expected output. + fill_value : scalar or None, optional + Value used to fill in the masked values. + If None, use the output of maximum_fill_value(). + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. + + Returns + ------- + amax : array_like + New array holding the result. + If ``out`` was specified, ``out`` is returned. + + See Also + -------- + ma.maximum_fill_value + Returns the maximum filling value for a given datatype. + + """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + _mask = self._mask + newmask = _check_mask_axis(_mask, axis, **kwargs) + if fill_value is None: + fill_value = maximum_fill_value(self) + # No explicit output + if out is None: + result = self.filled(fill_value).max( + axis=axis, out=out, **kwargs).view(type(self)) + if result.ndim: + # Set the mask + result.__setmask__(newmask) + # Get rid of Infs + if newmask.ndim: + np.copyto(result, result.fill_value, where=newmask) + elif newmask: + result = masked + return result + # Explicit output + result = self.filled(fill_value).max(axis=axis, out=out, **kwargs) + if isinstance(out, MaskedArray): + outmask = getmask(out) + if outmask is nomask: + outmask = out._mask = make_mask_none(out.shape) + outmask.flat = newmask + else: + + if out.dtype.kind in 'biu': + errmsg = "Masked data information would be lost in one or more"\ + " location." + raise MaskError(errmsg) + np.copyto(out, np.nan, where=newmask) + return out + + def ptp(self, axis=None, out=None, fill_value=None, keepdims=False): + """ + Return (maximum - minimum) along the given dimension + (i.e. peak-to-peak value). + + .. warning:: + `ptp` preserves the data type of the array. This means the + return value for an input of signed integers with n bits + (e.g. `np.int8`, `np.int16`, etc) is also a signed integer + with n bits. In that case, peak-to-peak values greater than + ``2**(n-1)-1`` will be returned as negative values. An example + with a work-around is shown below. + + Parameters + ---------- + axis : {None, int}, optional + Axis along which to find the peaks. If None (default) the + flattened array is used. + out : {None, array_like}, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output + but the type will be cast if necessary. + fill_value : scalar or None, optional + Value used to fill in the masked values. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. + + Returns + ------- + ptp : ndarray. + A new array holding the result, unless ``out`` was + specified, in which case a reference to ``out`` is returned. + + Examples + -------- + >>> x = np.ma.MaskedArray([[4, 9, 2, 10], + ... [6, 9, 7, 12]]) + + >>> x.ptp(axis=1) + masked_array(data=[8, 6], + mask=False, + fill_value=999999) + + >>> x.ptp(axis=0) + masked_array(data=[2, 0, 5, 2], + mask=False, + fill_value=999999) + + >>> x.ptp() + 10 + + This example shows that a negative value can be returned when + the input is an array of signed integers. + + >>> y = np.ma.MaskedArray([[1, 127], + ... [0, 127], + ... [-1, 127], + ... [-2, 127]], dtype=np.int8) + >>> y.ptp(axis=1) + masked_array(data=[ 126, 127, -128, -127], + mask=False, + fill_value=999999, + dtype=int8) + + A work-around is to use the `view()` method to view the result as + unsigned integers with the same bit width: + + >>> y.ptp(axis=1).view(np.uint8) + masked_array(data=[126, 127, 128, 129], + mask=False, + fill_value=999999, + dtype=uint8) + """ + if out is None: + result = self.max(axis=axis, fill_value=fill_value, + keepdims=keepdims) + result -= self.min(axis=axis, fill_value=fill_value, + keepdims=keepdims) + return result + out.flat = self.max(axis=axis, out=out, fill_value=fill_value, + keepdims=keepdims) + min_value = self.min(axis=axis, fill_value=fill_value, + keepdims=keepdims) + np.subtract(out, min_value, out=out, casting='unsafe') + return out + + def partition(self, *args, **kwargs): + warnings.warn("Warning: 'partition' will ignore the 'mask' " + f"of the {self.__class__.__name__}.", + stacklevel=2) + return super().partition(*args, **kwargs) + + def argpartition(self, *args, **kwargs): + warnings.warn("Warning: 'argpartition' will ignore the 'mask' " + f"of the {self.__class__.__name__}.", + stacklevel=2) + return super().argpartition(*args, **kwargs) + + def take(self, indices, axis=None, out=None, mode='raise'): + """ + """ + (_data, _mask) = (self._data, self._mask) + cls = type(self) + # Make sure the indices are not masked + maskindices = getmask(indices) + if maskindices is not nomask: + indices = indices.filled(0) + # Get the data, promoting scalars to 0d arrays with [...] so that + # .view works correctly + if out is None: + out = _data.take(indices, axis=axis, mode=mode)[...].view(cls) + else: + np.take(_data, indices, axis=axis, mode=mode, out=out) + # Get the mask + if isinstance(out, MaskedArray): + if _mask is nomask: + outmask = maskindices + else: + outmask = _mask.take(indices, axis=axis, mode=mode) + outmask |= maskindices + out.__setmask__(outmask) + # demote 0d arrays back to scalars, for consistency with ndarray.take + return out[()] + + # Array methods + copy = _arraymethod('copy') + diagonal = _arraymethod('diagonal') + flatten = _arraymethod('flatten') + repeat = _arraymethod('repeat') + squeeze = _arraymethod('squeeze') + swapaxes = _arraymethod('swapaxes') + T = property(fget=lambda self: self.transpose()) + transpose = _arraymethod('transpose') + + def tolist(self, fill_value=None): + """ + Return the data portion of the masked array as a hierarchical Python list. + + Data items are converted to the nearest compatible Python type. + Masked values are converted to `fill_value`. If `fill_value` is None, + the corresponding entries in the output list will be ``None``. + + Parameters + ---------- + fill_value : scalar, optional + The value to use for invalid entries. Default is None. + + Returns + ------- + result : list + The Python list representation of the masked array. + + Examples + -------- + >>> x = np.ma.array([[1,2,3], [4,5,6], [7,8,9]], mask=[0] + [1,0]*4) + >>> x.tolist() + [[1, None, 3], [None, 5, None], [7, None, 9]] + >>> x.tolist(-999) + [[1, -999, 3], [-999, 5, -999], [7, -999, 9]] + + """ + _mask = self._mask + # No mask ? Just return .data.tolist ? + if _mask is nomask: + return self._data.tolist() + # Explicit fill_value: fill the array and get the list + if fill_value is not None: + return self.filled(fill_value).tolist() + # Structured array. + names = self.dtype.names + if names: + result = self._data.astype([(_, object) for _ in names]) + for n in names: + result[n][_mask[n]] = None + return result.tolist() + # Standard arrays. + if _mask is nomask: + return [None] + # Set temps to save time when dealing w/ marrays. + inishape = self.shape + result = np.array(self._data.ravel(), dtype=object) + result[_mask.ravel()] = None + result.shape = inishape + return result.tolist() + + def tostring(self, fill_value=None, order='C'): + r""" + A compatibility alias for `tobytes`, with exactly the same behavior. + + Despite its name, it returns `bytes` not `str`\ s. + + .. deprecated:: 1.19.0 + """ + # 2020-03-30, Numpy 1.19.0 + warnings.warn( + "tostring() is deprecated. Use tobytes() instead.", + DeprecationWarning, stacklevel=2) + + return self.tobytes(fill_value, order=order) + + def tobytes(self, fill_value=None, order='C'): + """ + Return the array data as a string containing the raw bytes in the array. + + The array is filled with a fill value before the string conversion. + + .. versionadded:: 1.9.0 + + Parameters + ---------- + fill_value : scalar, optional + Value used to fill in the masked values. Default is None, in which + case `MaskedArray.fill_value` is used. + order : {'C','F','A'}, optional + Order of the data item in the copy. Default is 'C'. + + - 'C' -- C order (row major). + - 'F' -- Fortran order (column major). + - 'A' -- Any, current order of array. + - None -- Same as 'A'. + + See Also + -------- + numpy.ndarray.tobytes + tolist, tofile + + Notes + ----- + As for `ndarray.tobytes`, information about the shape, dtype, etc., + but also about `fill_value`, will be lost. + + Examples + -------- + >>> x = np.ma.array(np.array([[1, 2], [3, 4]]), mask=[[0, 1], [1, 0]]) + >>> x.tobytes() + b'\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00?B\\x0f\\x00\\x00\\x00\\x00\\x00?B\\x0f\\x00\\x00\\x00\\x00\\x00\\x04\\x00\\x00\\x00\\x00\\x00\\x00\\x00' + + """ + return self.filled(fill_value).tobytes(order=order) + + def tofile(self, fid, sep="", format="%s"): + """ + Save a masked array to a file in binary format. + + .. warning:: + This function is not implemented yet. + + Raises + ------ + NotImplementedError + When `tofile` is called. + + """ + raise NotImplementedError("MaskedArray.tofile() not implemented yet.") + + def toflex(self): + """ + Transforms a masked array into a flexible-type array. + + The flexible type array that is returned will have two fields: + + * the ``_data`` field stores the ``_data`` part of the array. + * the ``_mask`` field stores the ``_mask`` part of the array. + + Parameters + ---------- + None + + Returns + ------- + record : ndarray + A new flexible-type `ndarray` with two fields: the first element + containing a value, the second element containing the corresponding + mask boolean. The returned record shape matches self.shape. + + Notes + ----- + A side-effect of transforming a masked array into a flexible `ndarray` is + that meta information (``fill_value``, ...) will be lost. + + Examples + -------- + >>> x = np.ma.array([[1,2,3],[4,5,6],[7,8,9]], mask=[0] + [1,0]*4) + >>> x + masked_array( + data=[[1, --, 3], + [--, 5, --], + [7, --, 9]], + mask=[[False, True, False], + [ True, False, True], + [False, True, False]], + fill_value=999999) + >>> x.toflex() + array([[(1, False), (2, True), (3, False)], + [(4, True), (5, False), (6, True)], + [(7, False), (8, True), (9, False)]], + dtype=[('_data', 'i2", (2,))]) + # x = A[0]; y = x["A"]; then y.mask["A"].size==2 + # and we can not say masked/unmasked. + # The result is no longer mvoid! + # See also issue #6724. + return masked_array( + data=self._data[indx], mask=m[indx], + fill_value=self._fill_value[indx], + hard_mask=self._hardmask) + if m is not nomask and m[indx]: + return masked + return self._data[indx] + + def __setitem__(self, indx, value): + self._data[indx] = value + if self._hardmask: + self._mask[indx] |= getattr(value, "_mask", False) + else: + self._mask[indx] = getattr(value, "_mask", False) + + def __str__(self): + m = self._mask + if m is nomask: + return str(self._data) + + rdtype = _replace_dtype_fields(self._data.dtype, "O") + data_arr = super()._data + res = data_arr.astype(rdtype) + _recursive_printoption(res, self._mask, masked_print_option) + return str(res) + + __repr__ = __str__ + + def __iter__(self): + "Defines an iterator for mvoid" + (_data, _mask) = (self._data, self._mask) + if _mask is nomask: + yield from _data + else: + for (d, m) in zip(_data, _mask): + if m: + yield masked + else: + yield d + + def __len__(self): + return self._data.__len__() + + def filled(self, fill_value=None): + """ + Return a copy with masked fields filled with a given value. + + Parameters + ---------- + fill_value : array_like, optional + The value to use for invalid entries. Can be scalar or + non-scalar. If latter is the case, the filled array should + be broadcastable over input array. Default is None, in + which case the `fill_value` attribute is used instead. + + Returns + ------- + filled_void + A `np.void` object + + See Also + -------- + MaskedArray.filled + + """ + return asarray(self).filled(fill_value)[()] + + def tolist(self): + """ + Transforms the mvoid object into a tuple. + + Masked fields are replaced by None. + + Returns + ------- + returned_tuple + Tuple of fields + """ + _mask = self._mask + if _mask is nomask: + return self._data.tolist() + result = [] + for (d, m) in zip(self._data, self._mask): + if m: + result.append(None) + else: + # .item() makes sure we return a standard Python object + result.append(d.item()) + return tuple(result) + + +############################################################################## +# Shortcuts # +############################################################################## + + +def isMaskedArray(x): + """ + Test whether input is an instance of MaskedArray. + + This function returns True if `x` is an instance of MaskedArray + and returns False otherwise. Any object is accepted as input. + + Parameters + ---------- + x : object + Object to test. + + Returns + ------- + result : bool + True if `x` is a MaskedArray. + + See Also + -------- + isMA : Alias to isMaskedArray. + isarray : Alias to isMaskedArray. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.eye(3, 3) + >>> a + array([[ 1., 0., 0.], + [ 0., 1., 0.], + [ 0., 0., 1.]]) + >>> m = ma.masked_values(a, 0) + >>> m + masked_array( + data=[[1.0, --, --], + [--, 1.0, --], + [--, --, 1.0]], + mask=[[False, True, True], + [ True, False, True], + [ True, True, False]], + fill_value=0.0) + >>> ma.isMaskedArray(a) + False + >>> ma.isMaskedArray(m) + True + >>> ma.isMaskedArray([0, 1, 2]) + False + + """ + return isinstance(x, MaskedArray) + + +isarray = isMaskedArray +isMA = isMaskedArray # backward compatibility + + +class MaskedConstant(MaskedArray): + # the lone np.ma.masked instance + __singleton = None + + @classmethod + def __has_singleton(cls): + # second case ensures `cls.__singleton` is not just a view on the + # superclass singleton + return cls.__singleton is not None and type(cls.__singleton) is cls + + def __new__(cls): + if not cls.__has_singleton(): + # We define the masked singleton as a float for higher precedence. + # Note that it can be tricky sometimes w/ type comparison + data = np.array(0.) + mask = np.array(True) + + # prevent any modifications + data.flags.writeable = False + mask.flags.writeable = False + + # don't fall back on MaskedArray.__new__(MaskedConstant), since + # that might confuse it - this way, the construction is entirely + # within our control + cls.__singleton = MaskedArray(data, mask=mask).view(cls) + + return cls.__singleton + + def __array_finalize__(self, obj): + if not self.__has_singleton(): + # this handles the `.view` in __new__, which we want to copy across + # properties normally + return super().__array_finalize__(obj) + elif self is self.__singleton: + # not clear how this can happen, play it safe + pass + else: + # everywhere else, we want to downcast to MaskedArray, to prevent a + # duplicate maskedconstant. + self.__class__ = MaskedArray + MaskedArray.__array_finalize__(self, obj) + + def __array_prepare__(self, obj, context=None): + return self.view(MaskedArray).__array_prepare__(obj, context) + + def __array_wrap__(self, obj, context=None): + return self.view(MaskedArray).__array_wrap__(obj, context) + + def __str__(self): + return str(masked_print_option._display) + + def __repr__(self): + if self is MaskedConstant.__singleton: + return 'masked' + else: + # it's a subclass, or something is wrong, make it obvious + return object.__repr__(self) + + def __format__(self, format_spec): + # Replace ndarray.__format__ with the default, which supports no format characters. + # Supporting format characters is unwise here, because we do not know what type + # the user was expecting - better to not guess. + try: + return object.__format__(self, format_spec) + except TypeError: + # 2020-03-23, NumPy 1.19.0 + warnings.warn( + "Format strings passed to MaskedConstant are ignored, but in future may " + "error or produce different behavior", + FutureWarning, stacklevel=2 + ) + return object.__format__(self, "") + + def __reduce__(self): + """Override of MaskedArray's __reduce__. + """ + return (self.__class__, ()) + + # inplace operations have no effect. We have to override them to avoid + # trying to modify the readonly data and mask arrays + def __iop__(self, other): + return self + __iadd__ = \ + __isub__ = \ + __imul__ = \ + __ifloordiv__ = \ + __itruediv__ = \ + __ipow__ = \ + __iop__ + del __iop__ # don't leave this around + + def copy(self, *args, **kwargs): + """ Copy is a no-op on the maskedconstant, as it is a scalar """ + # maskedconstant is a scalar, so copy doesn't need to copy. There's + # precedent for this with `np.bool_` scalars. + return self + + def __copy__(self): + return self + + def __deepcopy__(self, memo): + return self + + def __setattr__(self, attr, value): + if not self.__has_singleton(): + # allow the singleton to be initialized + return super().__setattr__(attr, value) + elif self is self.__singleton: + raise AttributeError( + f"attributes of {self!r} are not writeable") + else: + # duplicate instance - we can end up here from __array_finalize__, + # where we set the __class__ attribute + return super().__setattr__(attr, value) + + +masked = masked_singleton = MaskedConstant() +masked_array = MaskedArray + + +def array(data, dtype=None, copy=False, order=None, + mask=nomask, fill_value=None, keep_mask=True, + hard_mask=False, shrink=True, subok=True, ndmin=0): + """ + Shortcut to MaskedArray. + + The options are in a different order for convenience and backwards + compatibility. + + """ + return MaskedArray(data, mask=mask, dtype=dtype, copy=copy, + subok=subok, keep_mask=keep_mask, + hard_mask=hard_mask, fill_value=fill_value, + ndmin=ndmin, shrink=shrink, order=order) +array.__doc__ = masked_array.__doc__ + + +def is_masked(x): + """ + Determine whether input has masked values. + + Accepts any object as input, but always returns False unless the + input is a MaskedArray containing masked values. + + Parameters + ---------- + x : array_like + Array to check for masked values. + + Returns + ------- + result : bool + True if `x` is a MaskedArray with masked values, False otherwise. + + Examples + -------- + >>> import numpy.ma as ma + >>> x = ma.masked_equal([0, 1, 0, 2, 3], 0) + >>> x + masked_array(data=[--, 1, --, 2, 3], + mask=[ True, False, True, False, False], + fill_value=0) + >>> ma.is_masked(x) + True + >>> x = ma.masked_equal([0, 1, 0, 2, 3], 42) + >>> x + masked_array(data=[0, 1, 0, 2, 3], + mask=False, + fill_value=42) + >>> ma.is_masked(x) + False + + Always returns False if `x` isn't a MaskedArray. + + >>> x = [False, True, False] + >>> ma.is_masked(x) + False + >>> x = 'a string' + >>> ma.is_masked(x) + False + + """ + m = getmask(x) + if m is nomask: + return False + elif m.any(): + return True + return False + + +############################################################################## +# Extrema functions # +############################################################################## + + +class _extrema_operation(_MaskedUFunc): + """ + Generic class for maximum/minimum functions. + + .. note:: + This is the base class for `_maximum_operation` and + `_minimum_operation`. + + """ + def __init__(self, ufunc, compare, fill_value): + super().__init__(ufunc) + self.compare = compare + self.fill_value_func = fill_value + + def __call__(self, a, b=None): + "Executes the call behavior." + if b is None: + # 2016-04-13, 1.13.0 + warnings.warn( + f"Single-argument form of np.ma.{self.__name__} is deprecated. Use " + f"np.ma.{self.__name__}.reduce instead.", + DeprecationWarning, stacklevel=2) + return self.reduce(a) + return where(self.compare(a, b), a, b) + + def reduce(self, target, axis=np._NoValue): + "Reduce target along the given axis." + target = narray(target, copy=False, subok=True) + m = getmask(target) + + if axis is np._NoValue and target.ndim > 1: + # 2017-05-06, Numpy 1.13.0: warn on axis default + warnings.warn( + f"In the future the default for ma.{self.__name__}.reduce will be axis=0, " + f"not the current None, to match np.{self.__name__}.reduce. " + "Explicitly pass 0 or None to silence this warning.", + MaskedArrayFutureWarning, stacklevel=2) + axis = None + + if axis is not np._NoValue: + kwargs = dict(axis=axis) + else: + kwargs = dict() + + if m is nomask: + t = self.f.reduce(target, **kwargs) + else: + target = target.filled( + self.fill_value_func(target)).view(type(target)) + t = self.f.reduce(target, **kwargs) + m = umath.logical_and.reduce(m, **kwargs) + if hasattr(t, '_mask'): + t._mask = m + elif m: + t = masked + return t + + def outer(self, a, b): + "Return the function applied to the outer product of a and b." + ma = getmask(a) + mb = getmask(b) + if ma is nomask and mb is nomask: + m = nomask + else: + ma = getmaskarray(a) + mb = getmaskarray(b) + m = logical_or.outer(ma, mb) + result = self.f.outer(filled(a), filled(b)) + if not isinstance(result, MaskedArray): + result = result.view(MaskedArray) + result._mask = m + return result + +def min(obj, axis=None, out=None, fill_value=None, keepdims=np._NoValue): + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + try: + return obj.min(axis=axis, fill_value=fill_value, out=out, **kwargs) + except (AttributeError, TypeError): + # If obj doesn't have a min method, or if the method doesn't accept a + # fill_value argument + return asanyarray(obj).min(axis=axis, fill_value=fill_value, + out=out, **kwargs) +min.__doc__ = MaskedArray.min.__doc__ + +def max(obj, axis=None, out=None, fill_value=None, keepdims=np._NoValue): + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + try: + return obj.max(axis=axis, fill_value=fill_value, out=out, **kwargs) + except (AttributeError, TypeError): + # If obj doesn't have a max method, or if the method doesn't accept a + # fill_value argument + return asanyarray(obj).max(axis=axis, fill_value=fill_value, + out=out, **kwargs) +max.__doc__ = MaskedArray.max.__doc__ + + +def ptp(obj, axis=None, out=None, fill_value=None, keepdims=np._NoValue): + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + try: + return obj.ptp(axis, out=out, fill_value=fill_value, **kwargs) + except (AttributeError, TypeError): + # If obj doesn't have a ptp method or if the method doesn't accept + # a fill_value argument + return asanyarray(obj).ptp(axis=axis, fill_value=fill_value, + out=out, **kwargs) +ptp.__doc__ = MaskedArray.ptp.__doc__ + + +############################################################################## +# Definition of functions from the corresponding methods # +############################################################################## + + +class _frommethod: + """ + Define functions from existing MaskedArray methods. + + Parameters + ---------- + methodname : str + Name of the method to transform. + + """ + + def __init__(self, methodname, reversed=False): + self.__name__ = methodname + self.__doc__ = self.getdoc() + self.reversed = reversed + + def getdoc(self): + "Return the doc of the function (from the doc of the method)." + meth = getattr(MaskedArray, self.__name__, None) or\ + getattr(np, self.__name__, None) + signature = self.__name__ + get_object_signature(meth) + if meth is not None: + doc = """ %s\n%s""" % ( + signature, getattr(meth, '__doc__', None)) + return doc + + def __call__(self, a, *args, **params): + if self.reversed: + args = list(args) + a, args[0] = args[0], a + + marr = asanyarray(a) + method_name = self.__name__ + method = getattr(type(marr), method_name, None) + if method is None: + # use the corresponding np function + method = getattr(np, method_name) + + return method(marr, *args, **params) + + +all = _frommethod('all') +anomalies = anom = _frommethod('anom') +any = _frommethod('any') +compress = _frommethod('compress', reversed=True) +cumprod = _frommethod('cumprod') +cumsum = _frommethod('cumsum') +copy = _frommethod('copy') +diagonal = _frommethod('diagonal') +harden_mask = _frommethod('harden_mask') +ids = _frommethod('ids') +maximum = _extrema_operation(umath.maximum, greater, maximum_fill_value) +mean = _frommethod('mean') +minimum = _extrema_operation(umath.minimum, less, minimum_fill_value) +nonzero = _frommethod('nonzero') +prod = _frommethod('prod') +product = _frommethod('prod') +ravel = _frommethod('ravel') +repeat = _frommethod('repeat') +shrink_mask = _frommethod('shrink_mask') +soften_mask = _frommethod('soften_mask') +std = _frommethod('std') +sum = _frommethod('sum') +swapaxes = _frommethod('swapaxes') +#take = _frommethod('take') +trace = _frommethod('trace') +var = _frommethod('var') + +count = _frommethod('count') + +def take(a, indices, axis=None, out=None, mode='raise'): + """ + """ + a = masked_array(a) + return a.take(indices, axis=axis, out=out, mode=mode) + + +def power(a, b, third=None): + """ + Returns element-wise base array raised to power from second array. + + This is the masked array version of `numpy.power`. For details see + `numpy.power`. + + See Also + -------- + numpy.power + + Notes + ----- + The *out* argument to `numpy.power` is not supported, `third` has to be + None. + + """ + if third is not None: + raise MaskError("3-argument power not supported.") + # Get the masks + ma = getmask(a) + mb = getmask(b) + m = mask_or(ma, mb) + # Get the rawdata + fa = getdata(a) + fb = getdata(b) + # Get the type of the result (so that we preserve subclasses) + if isinstance(a, MaskedArray): + basetype = type(a) + else: + basetype = MaskedArray + # Get the result and view it as a (subclass of) MaskedArray + with np.errstate(divide='ignore', invalid='ignore'): + result = np.where(m, fa, umath.power(fa, fb)).view(basetype) + result._update_from(a) + # Find where we're in trouble w/ NaNs and Infs + invalid = np.logical_not(np.isfinite(result.view(ndarray))) + # Add the initial mask + if m is not nomask: + if not result.ndim: + return masked + result._mask = np.logical_or(m, invalid) + # Fix the invalid parts + if invalid.any(): + if not result.ndim: + return masked + elif result._mask is nomask: + result._mask = invalid + result._data[invalid] = result.fill_value + return result + +argmin = _frommethod('argmin') +argmax = _frommethod('argmax') + +def argsort(a, axis=np._NoValue, kind=None, order=None, endwith=True, fill_value=None): + "Function version of the eponymous method." + a = np.asanyarray(a) + + # 2017-04-11, Numpy 1.13.0, gh-8701: warn on axis default + if axis is np._NoValue: + axis = _deprecate_argsort_axis(a) + + if isinstance(a, MaskedArray): + return a.argsort(axis=axis, kind=kind, order=order, + endwith=endwith, fill_value=fill_value) + else: + return a.argsort(axis=axis, kind=kind, order=order) +argsort.__doc__ = MaskedArray.argsort.__doc__ + +def sort(a, axis=-1, kind=None, order=None, endwith=True, fill_value=None): + """ + Return a sorted copy of the masked array. + + Equivalent to creating a copy of the array + and applying the MaskedArray ``sort()`` method. + + Refer to ``MaskedArray.sort`` for the full documentation + + See Also + -------- + MaskedArray.sort : equivalent method + """ + a = np.array(a, copy=True, subok=True) + if axis is None: + a = a.flatten() + axis = 0 + + if isinstance(a, MaskedArray): + a.sort(axis=axis, kind=kind, order=order, + endwith=endwith, fill_value=fill_value) + else: + a.sort(axis=axis, kind=kind, order=order) + return a + + +def compressed(x): + """ + Return all the non-masked data as a 1-D array. + + This function is equivalent to calling the "compressed" method of a + `ma.MaskedArray`, see `ma.MaskedArray.compressed` for details. + + See Also + -------- + ma.MaskedArray.compressed : Equivalent method. + + """ + return asanyarray(x).compressed() + + +def concatenate(arrays, axis=0): + """ + Concatenate a sequence of arrays along the given axis. + + Parameters + ---------- + arrays : sequence of array_like + The arrays must have the same shape, except in the dimension + corresponding to `axis` (the first, by default). + axis : int, optional + The axis along which the arrays will be joined. Default is 0. + + Returns + ------- + result : MaskedArray + The concatenated array with any masked entries preserved. + + See Also + -------- + numpy.concatenate : Equivalent function in the top-level NumPy module. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = ma.arange(3) + >>> a[1] = ma.masked + >>> b = ma.arange(2, 5) + >>> a + masked_array(data=[0, --, 2], + mask=[False, True, False], + fill_value=999999) + >>> b + masked_array(data=[2, 3, 4], + mask=False, + fill_value=999999) + >>> ma.concatenate([a, b]) + masked_array(data=[0, --, 2, 2, 3, 4], + mask=[False, True, False, False, False, False], + fill_value=999999) + + """ + d = np.concatenate([getdata(a) for a in arrays], axis) + rcls = get_masked_subclass(*arrays) + data = d.view(rcls) + # Check whether one of the arrays has a non-empty mask. + for x in arrays: + if getmask(x) is not nomask: + break + else: + return data + # OK, so we have to concatenate the masks + dm = np.concatenate([getmaskarray(a) for a in arrays], axis) + dm = dm.reshape(d.shape) + + # If we decide to keep a '_shrinkmask' option, we want to check that + # all of them are True, and then check for dm.any() + data._mask = _shrink_mask(dm) + return data + + +def diag(v, k=0): + """ + Extract a diagonal or construct a diagonal array. + + This function is the equivalent of `numpy.diag` that takes masked + values into account, see `numpy.diag` for details. + + See Also + -------- + numpy.diag : Equivalent function for ndarrays. + + """ + output = np.diag(v, k).view(MaskedArray) + if getmask(v) is not nomask: + output._mask = np.diag(v._mask, k) + return output + + +def left_shift(a, n): + """ + Shift the bits of an integer to the left. + + This is the masked array version of `numpy.left_shift`, for details + see that function. + + See Also + -------- + numpy.left_shift + + """ + m = getmask(a) + if m is nomask: + d = umath.left_shift(filled(a), n) + return masked_array(d) + else: + d = umath.left_shift(filled(a, 0), n) + return masked_array(d, mask=m) + + +def right_shift(a, n): + """ + Shift the bits of an integer to the right. + + This is the masked array version of `numpy.right_shift`, for details + see that function. + + See Also + -------- + numpy.right_shift + + """ + m = getmask(a) + if m is nomask: + d = umath.right_shift(filled(a), n) + return masked_array(d) + else: + d = umath.right_shift(filled(a, 0), n) + return masked_array(d, mask=m) + + +def put(a, indices, values, mode='raise'): + """ + Set storage-indexed locations to corresponding values. + + This function is equivalent to `MaskedArray.put`, see that method + for details. + + See Also + -------- + MaskedArray.put + + """ + # We can't use 'frommethod', the order of arguments is different + try: + return a.put(indices, values, mode=mode) + except AttributeError: + return narray(a, copy=False).put(indices, values, mode=mode) + + +def putmask(a, mask, values): # , mode='raise'): + """ + Changes elements of an array based on conditional and input values. + + This is the masked array version of `numpy.putmask`, for details see + `numpy.putmask`. + + See Also + -------- + numpy.putmask + + Notes + ----- + Using a masked array as `values` will **not** transform a `ndarray` into + a `MaskedArray`. + + """ + # We can't use 'frommethod', the order of arguments is different + if not isinstance(a, MaskedArray): + a = a.view(MaskedArray) + (valdata, valmask) = (getdata(values), getmask(values)) + if getmask(a) is nomask: + if valmask is not nomask: + a._sharedmask = True + a._mask = make_mask_none(a.shape, a.dtype) + np.copyto(a._mask, valmask, where=mask) + elif a._hardmask: + if valmask is not nomask: + m = a._mask.copy() + np.copyto(m, valmask, where=mask) + a.mask |= m + else: + if valmask is nomask: + valmask = getmaskarray(values) + np.copyto(a._mask, valmask, where=mask) + np.copyto(a._data, valdata, where=mask) + return + + +def transpose(a, axes=None): + """ + Permute the dimensions of an array. + + This function is exactly equivalent to `numpy.transpose`. + + See Also + -------- + numpy.transpose : Equivalent function in top-level NumPy module. + + Examples + -------- + >>> import numpy.ma as ma + >>> x = ma.arange(4).reshape((2,2)) + >>> x[1, 1] = ma.masked + >>> x + masked_array( + data=[[0, 1], + [2, --]], + mask=[[False, False], + [False, True]], + fill_value=999999) + + >>> ma.transpose(x) + masked_array( + data=[[0, 2], + [1, --]], + mask=[[False, False], + [False, True]], + fill_value=999999) + """ + # We can't use 'frommethod', as 'transpose' doesn't take keywords + try: + return a.transpose(axes) + except AttributeError: + return narray(a, copy=False).transpose(axes).view(MaskedArray) + + +def reshape(a, new_shape, order='C'): + """ + Returns an array containing the same data with a new shape. + + Refer to `MaskedArray.reshape` for full documentation. + + See Also + -------- + MaskedArray.reshape : equivalent function + + """ + # We can't use 'frommethod', it whine about some parameters. Dmmit. + try: + return a.reshape(new_shape, order=order) + except AttributeError: + _tmp = narray(a, copy=False).reshape(new_shape, order=order) + return _tmp.view(MaskedArray) + + +def resize(x, new_shape): + """ + Return a new masked array with the specified size and shape. + + This is the masked equivalent of the `numpy.resize` function. The new + array is filled with repeated copies of `x` (in the order that the + data are stored in memory). If `x` is masked, the new array will be + masked, and the new mask will be a repetition of the old one. + + See Also + -------- + numpy.resize : Equivalent function in the top level NumPy module. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = ma.array([[1, 2] ,[3, 4]]) + >>> a[0, 1] = ma.masked + >>> a + masked_array( + data=[[1, --], + [3, 4]], + mask=[[False, True], + [False, False]], + fill_value=999999) + >>> np.resize(a, (3, 3)) + masked_array( + data=[[1, 2, 3], + [4, 1, 2], + [3, 4, 1]], + mask=False, + fill_value=999999) + >>> ma.resize(a, (3, 3)) + masked_array( + data=[[1, --, 3], + [4, 1, --], + [3, 4, 1]], + mask=[[False, True, False], + [False, False, True], + [False, False, False]], + fill_value=999999) + + A MaskedArray is always returned, regardless of the input type. + + >>> a = np.array([[1, 2] ,[3, 4]]) + >>> ma.resize(a, (3, 3)) + masked_array( + data=[[1, 2, 3], + [4, 1, 2], + [3, 4, 1]], + mask=False, + fill_value=999999) + + """ + # We can't use _frommethods here, as N.resize is notoriously whiny. + m = getmask(x) + if m is not nomask: + m = np.resize(m, new_shape) + result = np.resize(x, new_shape).view(get_masked_subclass(x)) + if result.ndim: + result._mask = m + return result + + +def ndim(obj): + """ + maskedarray version of the numpy function. + + """ + return np.ndim(getdata(obj)) + +ndim.__doc__ = np.ndim.__doc__ + + +def shape(obj): + "maskedarray version of the numpy function." + return np.shape(getdata(obj)) +shape.__doc__ = np.shape.__doc__ + + +def size(obj, axis=None): + "maskedarray version of the numpy function." + return np.size(getdata(obj), axis) +size.__doc__ = np.size.__doc__ + + +############################################################################## +# Extra functions # +############################################################################## + + +def where(condition, x=_NoValue, y=_NoValue): + """ + Return a masked array with elements from `x` or `y`, depending on condition. + + .. note:: + When only `condition` is provided, this function is identical to + `nonzero`. The rest of this documentation covers only the case where + all three arguments are provided. + + Parameters + ---------- + condition : array_like, bool + Where True, yield `x`, otherwise yield `y`. + x, y : array_like, optional + Values from which to choose. `x`, `y` and `condition` need to be + broadcastable to some shape. + + Returns + ------- + out : MaskedArray + An masked array with `masked` elements where the condition is masked, + elements from `x` where `condition` is True, and elements from `y` + elsewhere. + + See Also + -------- + numpy.where : Equivalent function in the top-level NumPy module. + nonzero : The function that is called when x and y are omitted + + Examples + -------- + >>> x = np.ma.array(np.arange(9.).reshape(3, 3), mask=[[0, 1, 0], + ... [1, 0, 1], + ... [0, 1, 0]]) + >>> x + masked_array( + data=[[0.0, --, 2.0], + [--, 4.0, --], + [6.0, --, 8.0]], + mask=[[False, True, False], + [ True, False, True], + [False, True, False]], + fill_value=1e+20) + >>> np.ma.where(x > 5, x, -3.1416) + masked_array( + data=[[-3.1416, --, -3.1416], + [--, -3.1416, --], + [6.0, --, 8.0]], + mask=[[False, True, False], + [ True, False, True], + [False, True, False]], + fill_value=1e+20) + + """ + + # handle the single-argument case + missing = (x is _NoValue, y is _NoValue).count(True) + if missing == 1: + raise ValueError("Must provide both 'x' and 'y' or neither.") + if missing == 2: + return nonzero(condition) + + # we only care if the condition is true - false or masked pick y + cf = filled(condition, False) + xd = getdata(x) + yd = getdata(y) + + # we need the full arrays here for correct final dimensions + cm = getmaskarray(condition) + xm = getmaskarray(x) + ym = getmaskarray(y) + + # deal with the fact that masked.dtype == float64, but we don't actually + # want to treat it as that. + if x is masked and y is not masked: + xd = np.zeros((), dtype=yd.dtype) + xm = np.ones((), dtype=ym.dtype) + elif y is masked and x is not masked: + yd = np.zeros((), dtype=xd.dtype) + ym = np.ones((), dtype=xm.dtype) + + data = np.where(cf, xd, yd) + mask = np.where(cf, xm, ym) + mask = np.where(cm, np.ones((), dtype=mask.dtype), mask) + + # collapse the mask, for backwards compatibility + mask = _shrink_mask(mask) + + return masked_array(data, mask=mask) + + +def choose(indices, choices, out=None, mode='raise'): + """ + Use an index array to construct a new array from a list of choices. + + Given an array of integers and a list of n choice arrays, this method + will create a new array that merges each of the choice arrays. Where a + value in `index` is i, the new array will have the value that choices[i] + contains in the same place. + + Parameters + ---------- + indices : ndarray of ints + This array must contain integers in ``[0, n-1]``, where n is the + number of choices. + choices : sequence of arrays + Choice arrays. The index array and all of the choices should be + broadcastable to the same shape. + out : array, optional + If provided, the result will be inserted into this array. It should + be of the appropriate shape and `dtype`. + mode : {'raise', 'wrap', 'clip'}, optional + Specifies how out-of-bounds indices will behave. + + * 'raise' : raise an error + * 'wrap' : wrap around + * 'clip' : clip to the range + + Returns + ------- + merged_array : array + + See Also + -------- + choose : equivalent function + + Examples + -------- + >>> choice = np.array([[1,1,1], [2,2,2], [3,3,3]]) + >>> a = np.array([2, 1, 0]) + >>> np.ma.choose(a, choice) + masked_array(data=[3, 2, 1], + mask=False, + fill_value=999999) + + """ + def fmask(x): + "Returns the filled array, or True if masked." + if x is masked: + return True + return filled(x) + + def nmask(x): + "Returns the mask, True if ``masked``, False if ``nomask``." + if x is masked: + return True + return getmask(x) + # Get the indices. + c = filled(indices, 0) + # Get the masks. + masks = [nmask(x) for x in choices] + data = [fmask(x) for x in choices] + # Construct the mask + outputmask = np.choose(c, masks, mode=mode) + outputmask = make_mask(mask_or(outputmask, getmask(indices)), + copy=False, shrink=True) + # Get the choices. + d = np.choose(c, data, mode=mode, out=out).view(MaskedArray) + if out is not None: + if isinstance(out, MaskedArray): + out.__setmask__(outputmask) + return out + d.__setmask__(outputmask) + return d + + +def round_(a, decimals=0, out=None): + """ + Return a copy of a, rounded to 'decimals' places. + + When 'decimals' is negative, it specifies the number of positions + to the left of the decimal point. The real and imaginary parts of + complex numbers are rounded separately. Nothing is done if the + array is not of float type and 'decimals' is greater than or equal + to 0. + + Parameters + ---------- + decimals : int + Number of decimals to round to. May be negative. + out : array_like + Existing array to use for output. + If not given, returns a default copy of a. + + Notes + ----- + If out is given and does not have a mask attribute, the mask of a + is lost! + + """ + if out is None: + return np.round_(a, decimals, out) + else: + np.round_(getdata(a), decimals, out) + if hasattr(out, '_mask'): + out._mask = getmask(a) + return out +round = round_ + + +# Needed by dot, so move here from extras.py. It will still be exported +# from extras.py for compatibility. +def mask_rowcols(a, axis=None): + """ + Mask rows and/or columns of a 2D array that contain masked values. + + Mask whole rows and/or columns of a 2D array that contain + masked values. The masking behavior is selected using the + `axis` parameter. + + - If `axis` is None, rows *and* columns are masked. + - If `axis` is 0, only rows are masked. + - If `axis` is 1 or -1, only columns are masked. + + Parameters + ---------- + a : array_like, MaskedArray + The array to mask. If not a MaskedArray instance (or if no array + elements are masked). The result is a MaskedArray with `mask` set + to `nomask` (False). Must be a 2D array. + axis : int, optional + Axis along which to perform the operation. If None, applies to a + flattened version of the array. + + Returns + ------- + a : MaskedArray + A modified version of the input array, masked depending on the value + of the `axis` parameter. + + Raises + ------ + NotImplementedError + If input array `a` is not 2D. + + See Also + -------- + mask_rows : Mask rows of a 2D array that contain masked values. + mask_cols : Mask cols of a 2D array that contain masked values. + masked_where : Mask where a condition is met. + + Notes + ----- + The input array's mask is modified by this function. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.zeros((3, 3), dtype=int) + >>> a[1, 1] = 1 + >>> a + array([[0, 0, 0], + [0, 1, 0], + [0, 0, 0]]) + >>> a = ma.masked_equal(a, 1) + >>> a + masked_array( + data=[[0, 0, 0], + [0, --, 0], + [0, 0, 0]], + mask=[[False, False, False], + [False, True, False], + [False, False, False]], + fill_value=1) + >>> ma.mask_rowcols(a) + masked_array( + data=[[0, --, 0], + [--, --, --], + [0, --, 0]], + mask=[[False, True, False], + [ True, True, True], + [False, True, False]], + fill_value=1) + + """ + a = array(a, subok=False) + if a.ndim != 2: + raise NotImplementedError("mask_rowcols works for 2D arrays only.") + m = getmask(a) + # Nothing is masked: return a + if m is nomask or not m.any(): + return a + maskedval = m.nonzero() + a._mask = a._mask.copy() + if not axis: + a[np.unique(maskedval[0])] = masked + if axis in [None, 1, -1]: + a[:, np.unique(maskedval[1])] = masked + return a + + +# Include masked dot here to avoid import problems in getting it from +# extras.py. Note that it is not included in __all__, but rather exported +# from extras in order to avoid backward compatibility problems. +def dot(a, b, strict=False, out=None): + """ + Return the dot product of two arrays. + + This function is the equivalent of `numpy.dot` that takes masked values + into account. Note that `strict` and `out` are in different position + than in the method version. In order to maintain compatibility with the + corresponding method, it is recommended that the optional arguments be + treated as keyword only. At some point that may be mandatory. + + .. note:: + Works only with 2-D arrays at the moment. + + + Parameters + ---------- + a, b : masked_array_like + Inputs arrays. + strict : bool, optional + Whether masked data are propagated (True) or set to 0 (False) for + the computation. Default is False. Propagating the mask means that + if a masked value appears in a row or column, the whole row or + column is considered masked. + out : masked_array, optional + Output argument. This must have the exact kind that would be returned + if it was not used. In particular, it must have the right type, must be + C-contiguous, and its dtype must be the dtype that would be returned + for `dot(a,b)`. This is a performance feature. Therefore, if these + conditions are not met, an exception is raised, instead of attempting + to be flexible. + + .. versionadded:: 1.10.2 + + See Also + -------- + numpy.dot : Equivalent function for ndarrays. + + Examples + -------- + >>> a = np.ma.array([[1, 2, 3], [4, 5, 6]], mask=[[1, 0, 0], [0, 0, 0]]) + >>> b = np.ma.array([[1, 2], [3, 4], [5, 6]], mask=[[1, 0], [0, 0], [0, 0]]) + >>> np.ma.dot(a, b) + masked_array( + data=[[21, 26], + [45, 64]], + mask=[[False, False], + [False, False]], + fill_value=999999) + >>> np.ma.dot(a, b, strict=True) + masked_array( + data=[[--, --], + [--, 64]], + mask=[[ True, True], + [ True, False]], + fill_value=999999) + + """ + # !!!: Works only with 2D arrays. There should be a way to get it to run + # with higher dimension + if strict and (a.ndim == 2) and (b.ndim == 2): + a = mask_rowcols(a, 0) + b = mask_rowcols(b, 1) + am = ~getmaskarray(a) + bm = ~getmaskarray(b) + + if out is None: + d = np.dot(filled(a, 0), filled(b, 0)) + m = ~np.dot(am, bm) + if d.ndim == 0: + d = np.asarray(d) + r = d.view(get_masked_subclass(a, b)) + r.__setmask__(m) + return r + else: + d = np.dot(filled(a, 0), filled(b, 0), out._data) + if out.mask.shape != d.shape: + out._mask = np.empty(d.shape, MaskType) + np.dot(am, bm, out._mask) + np.logical_not(out._mask, out._mask) + return out + + +def inner(a, b): + """ + Returns the inner product of a and b for arrays of floating point types. + + Like the generic NumPy equivalent the product sum is over the last dimension + of a and b. The first argument is not conjugated. + + """ + fa = filled(a, 0) + fb = filled(b, 0) + if fa.ndim == 0: + fa.shape = (1,) + if fb.ndim == 0: + fb.shape = (1,) + return np.inner(fa, fb).view(MaskedArray) +inner.__doc__ = doc_note(np.inner.__doc__, + "Masked values are replaced by 0.") +innerproduct = inner + + +def outer(a, b): + "maskedarray version of the numpy function." + fa = filled(a, 0).ravel() + fb = filled(b, 0).ravel() + d = np.outer(fa, fb) + ma = getmask(a) + mb = getmask(b) + if ma is nomask and mb is nomask: + return masked_array(d) + ma = getmaskarray(a) + mb = getmaskarray(b) + m = make_mask(1 - np.outer(1 - ma, 1 - mb), copy=False) + return masked_array(d, mask=m) +outer.__doc__ = doc_note(np.outer.__doc__, + "Masked values are replaced by 0.") +outerproduct = outer + + +def _convolve_or_correlate(f, a, v, mode, propagate_mask): + """ + Helper function for ma.correlate and ma.convolve + """ + if propagate_mask: + # results which are contributed to by either item in any pair being invalid + mask = ( + f(getmaskarray(a), np.ones(np.shape(v), dtype=bool), mode=mode) + | f(np.ones(np.shape(a), dtype=bool), getmaskarray(v), mode=mode) + ) + data = f(getdata(a), getdata(v), mode=mode) + else: + # results which are not contributed to by any pair of valid elements + mask = ~f(~getmaskarray(a), ~getmaskarray(v)) + data = f(filled(a, 0), filled(v, 0), mode=mode) + + return masked_array(data, mask=mask) + + +def correlate(a, v, mode='valid', propagate_mask=True): + """ + Cross-correlation of two 1-dimensional sequences. + + Parameters + ---------- + a, v : array_like + Input sequences. + mode : {'valid', 'same', 'full'}, optional + Refer to the `np.convolve` docstring. Note that the default + is 'valid', unlike `convolve`, which uses 'full'. + propagate_mask : bool + If True, then a result element is masked if any masked element contributes towards it. + If False, then a result element is only masked if no non-masked element + contribute towards it + + Returns + ------- + out : MaskedArray + Discrete cross-correlation of `a` and `v`. + + See Also + -------- + numpy.correlate : Equivalent function in the top-level NumPy module. + """ + return _convolve_or_correlate(np.correlate, a, v, mode, propagate_mask) + + +def convolve(a, v, mode='full', propagate_mask=True): + """ + Returns the discrete, linear convolution of two one-dimensional sequences. + + Parameters + ---------- + a, v : array_like + Input sequences. + mode : {'valid', 'same', 'full'}, optional + Refer to the `np.convolve` docstring. + propagate_mask : bool + If True, then if any masked element is included in the sum for a result + element, then the result is masked. + If False, then the result element is only masked if no non-masked cells + contribute towards it + + Returns + ------- + out : MaskedArray + Discrete, linear convolution of `a` and `v`. + + See Also + -------- + numpy.convolve : Equivalent function in the top-level NumPy module. + """ + return _convolve_or_correlate(np.convolve, a, v, mode, propagate_mask) + + +def allequal(a, b, fill_value=True): + """ + Return True if all entries of a and b are equal, using + fill_value as a truth value where either or both are masked. + + Parameters + ---------- + a, b : array_like + Input arrays to compare. + fill_value : bool, optional + Whether masked values in a or b are considered equal (True) or not + (False). + + Returns + ------- + y : bool + Returns True if the two arrays are equal within the given + tolerance, False otherwise. If either array contains NaN, + then False is returned. + + See Also + -------- + all, any + numpy.ma.allclose + + Examples + -------- + >>> a = np.ma.array([1e10, 1e-7, 42.0], mask=[0, 0, 1]) + >>> a + masked_array(data=[10000000000.0, 1e-07, --], + mask=[False, False, True], + fill_value=1e+20) + + >>> b = np.array([1e10, 1e-7, -42.0]) + >>> b + array([ 1.00000000e+10, 1.00000000e-07, -4.20000000e+01]) + >>> np.ma.allequal(a, b, fill_value=False) + False + >>> np.ma.allequal(a, b) + True + + """ + m = mask_or(getmask(a), getmask(b)) + if m is nomask: + x = getdata(a) + y = getdata(b) + d = umath.equal(x, y) + return d.all() + elif fill_value: + x = getdata(a) + y = getdata(b) + d = umath.equal(x, y) + dm = array(d, mask=m, copy=False) + return dm.filled(True).all(None) + else: + return False + + +def allclose(a, b, masked_equal=True, rtol=1e-5, atol=1e-8): + """ + Returns True if two arrays are element-wise equal within a tolerance. + + This function is equivalent to `allclose` except that masked values + are treated as equal (default) or unequal, depending on the `masked_equal` + argument. + + Parameters + ---------- + a, b : array_like + Input arrays to compare. + masked_equal : bool, optional + Whether masked values in `a` and `b` are considered equal (True) or not + (False). They are considered equal by default. + rtol : float, optional + Relative tolerance. The relative difference is equal to ``rtol * b``. + Default is 1e-5. + atol : float, optional + Absolute tolerance. The absolute difference is equal to `atol`. + Default is 1e-8. + + Returns + ------- + y : bool + Returns True if the two arrays are equal within the given + tolerance, False otherwise. If either array contains NaN, then + False is returned. + + See Also + -------- + all, any + numpy.allclose : the non-masked `allclose`. + + Notes + ----- + If the following equation is element-wise True, then `allclose` returns + True:: + + absolute(`a` - `b`) <= (`atol` + `rtol` * absolute(`b`)) + + Return True if all elements of `a` and `b` are equal subject to + given tolerances. + + Examples + -------- + >>> a = np.ma.array([1e10, 1e-7, 42.0], mask=[0, 0, 1]) + >>> a + masked_array(data=[10000000000.0, 1e-07, --], + mask=[False, False, True], + fill_value=1e+20) + >>> b = np.ma.array([1e10, 1e-8, -42.0], mask=[0, 0, 1]) + >>> np.ma.allclose(a, b) + False + + >>> a = np.ma.array([1e10, 1e-8, 42.0], mask=[0, 0, 1]) + >>> b = np.ma.array([1.00001e10, 1e-9, -42.0], mask=[0, 0, 1]) + >>> np.ma.allclose(a, b) + True + >>> np.ma.allclose(a, b, masked_equal=False) + False + + Masked values are not compared directly. + + >>> a = np.ma.array([1e10, 1e-8, 42.0], mask=[0, 0, 1]) + >>> b = np.ma.array([1.00001e10, 1e-9, 42.0], mask=[0, 0, 1]) + >>> np.ma.allclose(a, b) + True + >>> np.ma.allclose(a, b, masked_equal=False) + False + + """ + x = masked_array(a, copy=False) + y = masked_array(b, copy=False) + + # make sure y is an inexact type to avoid abs(MIN_INT); will cause + # casting of x later. + # NOTE: We explicitly allow timedelta, which used to work. This could + # possibly be deprecated. See also gh-18286. + # timedelta works if `atol` is an integer or also a timedelta. + # Although, the default tolerances are unlikely to be useful + if y.dtype.kind != "m": + dtype = np.result_type(y, 1.) + if y.dtype != dtype: + y = masked_array(y, dtype=dtype, copy=False) + + m = mask_or(getmask(x), getmask(y)) + xinf = np.isinf(masked_array(x, copy=False, mask=m)).filled(False) + # If we have some infs, they should fall at the same place. + if not np.all(xinf == filled(np.isinf(y), False)): + return False + # No infs at all + if not np.any(xinf): + d = filled(less_equal(absolute(x - y), atol + rtol * absolute(y)), + masked_equal) + return np.all(d) + + if not np.all(filled(x[xinf] == y[xinf], masked_equal)): + return False + x = x[~xinf] + y = y[~xinf] + + d = filled(less_equal(absolute(x - y), atol + rtol * absolute(y)), + masked_equal) + + return np.all(d) + + +def asarray(a, dtype=None, order=None): + """ + Convert the input to a masked array of the given data-type. + + No copy is performed if the input is already an `ndarray`. If `a` is + a subclass of `MaskedArray`, a base class `MaskedArray` is returned. + + Parameters + ---------- + a : array_like + Input data, in any form that can be converted to a masked array. This + includes lists, lists of tuples, tuples, tuples of tuples, tuples + of lists, ndarrays and masked arrays. + dtype : dtype, optional + By default, the data-type is inferred from the input data. + order : {'C', 'F'}, optional + Whether to use row-major ('C') or column-major ('FORTRAN') memory + representation. Default is 'C'. + + Returns + ------- + out : MaskedArray + Masked array interpretation of `a`. + + See Also + -------- + asanyarray : Similar to `asarray`, but conserves subclasses. + + Examples + -------- + >>> x = np.arange(10.).reshape(2, 5) + >>> x + array([[0., 1., 2., 3., 4.], + [5., 6., 7., 8., 9.]]) + >>> np.ma.asarray(x) + masked_array( + data=[[0., 1., 2., 3., 4.], + [5., 6., 7., 8., 9.]], + mask=False, + fill_value=1e+20) + >>> type(np.ma.asarray(x)) + + + """ + order = order or 'C' + return masked_array(a, dtype=dtype, copy=False, keep_mask=True, + subok=False, order=order) + + +def asanyarray(a, dtype=None): + """ + Convert the input to a masked array, conserving subclasses. + + If `a` is a subclass of `MaskedArray`, its class is conserved. + No copy is performed if the input is already an `ndarray`. + + Parameters + ---------- + a : array_like + Input data, in any form that can be converted to an array. + dtype : dtype, optional + By default, the data-type is inferred from the input data. + order : {'C', 'F'}, optional + Whether to use row-major ('C') or column-major ('FORTRAN') memory + representation. Default is 'C'. + + Returns + ------- + out : MaskedArray + MaskedArray interpretation of `a`. + + See Also + -------- + asarray : Similar to `asanyarray`, but does not conserve subclass. + + Examples + -------- + >>> x = np.arange(10.).reshape(2, 5) + >>> x + array([[0., 1., 2., 3., 4.], + [5., 6., 7., 8., 9.]]) + >>> np.ma.asanyarray(x) + masked_array( + data=[[0., 1., 2., 3., 4.], + [5., 6., 7., 8., 9.]], + mask=False, + fill_value=1e+20) + >>> type(np.ma.asanyarray(x)) + + + """ + # workaround for #8666, to preserve identity. Ideally the bottom line + # would handle this for us. + if isinstance(a, MaskedArray) and (dtype is None or dtype == a.dtype): + return a + return masked_array(a, dtype=dtype, copy=False, keep_mask=True, subok=True) + + +############################################################################## +# Pickling # +############################################################################## + +def _pickle_warn(method): + # NumPy 1.15.0, 2017-12-10 + warnings.warn( + f"np.ma.{method} is deprecated, use pickle.{method} instead", + DeprecationWarning, stacklevel=3) + + +def fromfile(file, dtype=float, count=-1, sep=''): + raise NotImplementedError( + "fromfile() not yet implemented for a MaskedArray.") + + +def fromflex(fxarray): + """ + Build a masked array from a suitable flexible-type array. + + The input array has to have a data-type with ``_data`` and ``_mask`` + fields. This type of array is output by `MaskedArray.toflex`. + + Parameters + ---------- + fxarray : ndarray + The structured input array, containing ``_data`` and ``_mask`` + fields. If present, other fields are discarded. + + Returns + ------- + result : MaskedArray + The constructed masked array. + + See Also + -------- + MaskedArray.toflex : Build a flexible-type array from a masked array. + + Examples + -------- + >>> x = np.ma.array(np.arange(9).reshape(3, 3), mask=[0] + [1, 0] * 4) + >>> rec = x.toflex() + >>> rec + array([[(0, False), (1, True), (2, False)], + [(3, True), (4, False), (5, True)], + [(6, False), (7, True), (8, False)]], + dtype=[('_data', '>> x2 = np.ma.fromflex(rec) + >>> x2 + masked_array( + data=[[0, --, 2], + [--, 4, --], + [6, --, 8]], + mask=[[False, True, False], + [ True, False, True], + [False, True, False]], + fill_value=999999) + + Extra fields can be present in the structured array but are discarded: + + >>> dt = [('_data', '>> rec2 = np.zeros((2, 2), dtype=dt) + >>> rec2 + array([[(0, False, 0.), (0, False, 0.)], + [(0, False, 0.), (0, False, 0.)]], + dtype=[('_data', '>> y = np.ma.fromflex(rec2) + >>> y + masked_array( + data=[[0, 0], + [0, 0]], + mask=[[False, False], + [False, False]], + fill_value=999999, + dtype=int32) + + """ + return masked_array(fxarray['_data'], mask=fxarray['_mask']) + + +class _convert2ma: + + """ + Convert functions from numpy to numpy.ma. + + Parameters + ---------- + _methodname : string + Name of the method to transform. + + """ + __doc__ = None + + def __init__(self, funcname, np_ret, np_ma_ret, params=None): + self._func = getattr(np, funcname) + self.__doc__ = self.getdoc(np_ret, np_ma_ret) + self._extras = params or {} + + def getdoc(self, np_ret, np_ma_ret): + "Return the doc of the function (from the doc of the method)." + doc = getattr(self._func, '__doc__', None) + sig = get_object_signature(self._func) + if doc: + doc = self._replace_return_type(doc, np_ret, np_ma_ret) + # Add the signature of the function at the beginning of the doc + if sig: + sig = "%s%s\n" % (self._func.__name__, sig) + doc = sig + doc + return doc + + def _replace_return_type(self, doc, np_ret, np_ma_ret): + """ + Replace documentation of ``np`` function's return type. + + Replaces it with the proper type for the ``np.ma`` function. + + Parameters + ---------- + doc : str + The documentation of the ``np`` method. + np_ret : str + The return type string of the ``np`` method that we want to + replace. (e.g. "out : ndarray") + np_ma_ret : str + The return type string of the ``np.ma`` method. + (e.g. "out : MaskedArray") + """ + if np_ret not in doc: + raise RuntimeError( + f"Failed to replace `{np_ret}` with `{np_ma_ret}`. " + f"The documentation string for return type, {np_ret}, is not " + f"found in the docstring for `np.{self._func.__name__}`. " + f"Fix the docstring for `np.{self._func.__name__}` or " + "update the expected string for return type." + ) + + return doc.replace(np_ret, np_ma_ret) + + def __call__(self, *args, **params): + # Find the common parameters to the call and the definition + _extras = self._extras + common_params = set(params).intersection(_extras) + # Drop the common parameters from the call + for p in common_params: + _extras[p] = params.pop(p) + # Get the result + result = self._func.__call__(*args, **params).view(MaskedArray) + if "fill_value" in common_params: + result.fill_value = _extras.get("fill_value", None) + if "hardmask" in common_params: + result._hardmask = bool(_extras.get("hard_mask", False)) + return result + + +arange = _convert2ma( + 'arange', + params=dict(fill_value=None, hardmask=False), + np_ret='arange : ndarray', + np_ma_ret='arange : MaskedArray', +) +clip = _convert2ma( + 'clip', + params=dict(fill_value=None, hardmask=False), + np_ret='clipped_array : ndarray', + np_ma_ret='clipped_array : MaskedArray', +) +diff = _convert2ma( + 'diff', + params=dict(fill_value=None, hardmask=False), + np_ret='diff : ndarray', + np_ma_ret='diff : MaskedArray', +) +empty = _convert2ma( + 'empty', + params=dict(fill_value=None, hardmask=False), + np_ret='out : ndarray', + np_ma_ret='out : MaskedArray', +) +empty_like = _convert2ma( + 'empty_like', + np_ret='out : ndarray', + np_ma_ret='out : MaskedArray', +) +frombuffer = _convert2ma( + 'frombuffer', + np_ret='out : ndarray', + np_ma_ret='out: MaskedArray', +) +fromfunction = _convert2ma( + 'fromfunction', + np_ret='fromfunction : any', + np_ma_ret='fromfunction: MaskedArray', +) +identity = _convert2ma( + 'identity', + params=dict(fill_value=None, hardmask=False), + np_ret='out : ndarray', + np_ma_ret='out : MaskedArray', +) +indices = _convert2ma( + 'indices', + params=dict(fill_value=None, hardmask=False), + np_ret='grid : one ndarray or tuple of ndarrays', + np_ma_ret='grid : one MaskedArray or tuple of MaskedArrays', +) +ones = _convert2ma( + 'ones', + params=dict(fill_value=None, hardmask=False), + np_ret='out : ndarray', + np_ma_ret='out : MaskedArray', +) +ones_like = _convert2ma( + 'ones_like', + np_ret='out : ndarray', + np_ma_ret='out : MaskedArray', +) +squeeze = _convert2ma( + 'squeeze', + params=dict(fill_value=None, hardmask=False), + np_ret='squeezed : ndarray', + np_ma_ret='squeezed : MaskedArray', +) +zeros = _convert2ma( + 'zeros', + params=dict(fill_value=None, hardmask=False), + np_ret='out : ndarray', + np_ma_ret='out : MaskedArray', +) +zeros_like = _convert2ma( + 'zeros_like', + np_ret='out : ndarray', + np_ma_ret='out : MaskedArray', +) + + +def append(a, b, axis=None): + """Append values to the end of an array. + + .. versionadded:: 1.9.0 + + Parameters + ---------- + a : array_like + Values are appended to a copy of this array. + b : array_like + These values are appended to a copy of `a`. It must be of the + correct shape (the same shape as `a`, excluding `axis`). If `axis` + is not specified, `b` can be any shape and will be flattened + before use. + axis : int, optional + The axis along which `v` are appended. If `axis` is not given, + both `a` and `b` are flattened before use. + + Returns + ------- + append : MaskedArray + A copy of `a` with `b` appended to `axis`. Note that `append` + does not occur in-place: a new array is allocated and filled. If + `axis` is None, the result is a flattened array. + + See Also + -------- + numpy.append : Equivalent function in the top-level NumPy module. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = ma.masked_values([1, 2, 3], 2) + >>> b = ma.masked_values([[4, 5, 6], [7, 8, 9]], 7) + >>> ma.append(a, b) + masked_array(data=[1, --, 3, 4, 5, 6, --, 8, 9], + mask=[False, True, False, False, False, False, True, False, + False], + fill_value=999999) + """ + return concatenate([a, b], axis) diff --git a/.local/lib/python3.8/site-packages/numpy/ma/core.pyi b/.local/lib/python3.8/site-packages/numpy/ma/core.pyi new file mode 100644 index 0000000..bc1f45a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/core.pyi @@ -0,0 +1,468 @@ +from typing import Any, List, TypeVar, Callable +from numpy import ndarray, dtype, float64 + +from numpy import ( + amax as amax, + amin as amin, + bool_ as bool_, + expand_dims as expand_dims, + diff as diff, + clip as clip, + indices as indices, + ones_like as ones_like, + squeeze as squeeze, + zeros_like as zeros_like, +) + +from numpy.lib.function_base import ( + angle as angle, +) + +# TODO: Set the `bound` to something more suitable once we +# have proper shape support +_ShapeType = TypeVar("_ShapeType", bound=Any) +_DType_co = TypeVar("_DType_co", bound=dtype[Any], covariant=True) + +__all__: List[str] + +MaskType = bool_ +nomask: bool_ + +class MaskedArrayFutureWarning(FutureWarning): ... +class MAError(Exception): ... +class MaskError(MAError): ... + +def default_fill_value(obj): ... +def minimum_fill_value(obj): ... +def maximum_fill_value(obj): ... +def set_fill_value(a, fill_value): ... +def common_fill_value(a, b): ... +def filled(a, fill_value=...): ... +def getdata(a, subok=...): ... +get_data = getdata + +def fix_invalid(a, mask=..., copy=..., fill_value=...): ... + +class _MaskedUFunc: + f: Any + __doc__: Any + __name__: Any + def __init__(self, ufunc): ... + +class _MaskedUnaryOperation(_MaskedUFunc): + fill: Any + domain: Any + def __init__(self, mufunc, fill=..., domain=...): ... + def __call__(self, a, *args, **kwargs): ... + +class _MaskedBinaryOperation(_MaskedUFunc): + fillx: Any + filly: Any + def __init__(self, mbfunc, fillx=..., filly=...): ... + def __call__(self, a, b, *args, **kwargs): ... + def reduce(self, target, axis=..., dtype=...): ... + def outer(self, a, b): ... + def accumulate(self, target, axis=...): ... + +class _DomainedBinaryOperation(_MaskedUFunc): + domain: Any + fillx: Any + filly: Any + def __init__(self, dbfunc, domain, fillx=..., filly=...): ... + def __call__(self, a, b, *args, **kwargs): ... + +exp: _MaskedUnaryOperation +conjugate: _MaskedUnaryOperation +sin: _MaskedUnaryOperation +cos: _MaskedUnaryOperation +arctan: _MaskedUnaryOperation +arcsinh: _MaskedUnaryOperation +sinh: _MaskedUnaryOperation +cosh: _MaskedUnaryOperation +tanh: _MaskedUnaryOperation +abs: _MaskedUnaryOperation +absolute: _MaskedUnaryOperation +fabs: _MaskedUnaryOperation +negative: _MaskedUnaryOperation +floor: _MaskedUnaryOperation +ceil: _MaskedUnaryOperation +around: _MaskedUnaryOperation +logical_not: _MaskedUnaryOperation +sqrt: _MaskedUnaryOperation +log: _MaskedUnaryOperation +log2: _MaskedUnaryOperation +log10: _MaskedUnaryOperation +tan: _MaskedUnaryOperation +arcsin: _MaskedUnaryOperation +arccos: _MaskedUnaryOperation +arccosh: _MaskedUnaryOperation +arctanh: _MaskedUnaryOperation + +add: _MaskedBinaryOperation +subtract: _MaskedBinaryOperation +multiply: _MaskedBinaryOperation +arctan2: _MaskedBinaryOperation +equal: _MaskedBinaryOperation +not_equal: _MaskedBinaryOperation +less_equal: _MaskedBinaryOperation +greater_equal: _MaskedBinaryOperation +less: _MaskedBinaryOperation +greater: _MaskedBinaryOperation +logical_and: _MaskedBinaryOperation +alltrue: _MaskedBinaryOperation +logical_or: _MaskedBinaryOperation +sometrue: Callable[..., Any] +logical_xor: _MaskedBinaryOperation +bitwise_and: _MaskedBinaryOperation +bitwise_or: _MaskedBinaryOperation +bitwise_xor: _MaskedBinaryOperation +hypot: _MaskedBinaryOperation +divide: _MaskedBinaryOperation +true_divide: _MaskedBinaryOperation +floor_divide: _MaskedBinaryOperation +remainder: _MaskedBinaryOperation +fmod: _MaskedBinaryOperation +mod: _MaskedBinaryOperation + +def make_mask_descr(ndtype): ... +def getmask(a): ... +get_mask = getmask + +def getmaskarray(arr): ... +def is_mask(m): ... +def make_mask(m, copy=..., shrink=..., dtype=...): ... +def make_mask_none(newshape, dtype=...): ... +def mask_or(m1, m2, copy=..., shrink=...): ... +def flatten_mask(mask): ... +def masked_where(condition, a, copy=...): ... +def masked_greater(x, value, copy=...): ... +def masked_greater_equal(x, value, copy=...): ... +def masked_less(x, value, copy=...): ... +def masked_less_equal(x, value, copy=...): ... +def masked_not_equal(x, value, copy=...): ... +def masked_equal(x, value, copy=...): ... +def masked_inside(x, v1, v2, copy=...): ... +def masked_outside(x, v1, v2, copy=...): ... +def masked_object(x, value, copy=..., shrink=...): ... +def masked_values(x, value, rtol=..., atol=..., copy=..., shrink=...): ... +def masked_invalid(a, copy=...): ... + +class _MaskedPrintOption: + def __init__(self, display): ... + def display(self): ... + def set_display(self, s): ... + def enabled(self): ... + def enable(self, shrink=...): ... + +masked_print_option: _MaskedPrintOption + +def flatten_structured_array(a): ... + +class MaskedIterator: + ma: Any + dataiter: Any + maskiter: Any + def __init__(self, ma): ... + def __iter__(self): ... + def __getitem__(self, indx): ... + def __setitem__(self, index, value): ... + def __next__(self): ... + +class MaskedArray(ndarray[_ShapeType, _DType_co]): + __array_priority__: Any + def __new__(cls, data=..., mask=..., dtype=..., copy=..., subok=..., ndmin=..., fill_value=..., keep_mask=..., hard_mask=..., shrink=..., order=...): ... + def __array_finalize__(self, obj): ... + def __array_wrap__(self, obj, context=...): ... + def view(self, dtype=..., type=..., fill_value=...): ... + def __getitem__(self, indx): ... + def __setitem__(self, indx, value): ... + @property + def dtype(self): ... + @dtype.setter + def dtype(self, dtype): ... + @property + def shape(self): ... + @shape.setter + def shape(self, shape): ... + def __setmask__(self, mask, copy=...): ... + @property + def mask(self): ... + @mask.setter + def mask(self, value): ... + @property + def recordmask(self): ... + @recordmask.setter + def recordmask(self, mask): ... + def harden_mask(self): ... + def soften_mask(self): ... + @property + def hardmask(self): ... + def unshare_mask(self): ... + @property + def sharedmask(self): ... + def shrink_mask(self): ... + @property + def baseclass(self): ... + data: Any + @property + def flat(self): ... + @flat.setter + def flat(self, value): ... + @property + def fill_value(self): ... + @fill_value.setter + def fill_value(self, value=...): ... + get_fill_value: Any + set_fill_value: Any + def filled(self, fill_value=...): ... + def compressed(self): ... + def compress(self, condition, axis=..., out=...): ... + def __eq__(self, other): ... + def __ne__(self, other): ... + def __add__(self, other): ... + def __radd__(self, other): ... + def __sub__(self, other): ... + def __rsub__(self, other): ... + def __mul__(self, other): ... + def __rmul__(self, other): ... + def __div__(self, other): ... + def __truediv__(self, other): ... + def __rtruediv__(self, other): ... + def __floordiv__(self, other): ... + def __rfloordiv__(self, other): ... + def __pow__(self, other): ... + def __rpow__(self, other): ... + def __iadd__(self, other): ... + def __isub__(self, other): ... + def __imul__(self, other): ... + def __idiv__(self, other): ... + def __ifloordiv__(self, other): ... + def __itruediv__(self, other): ... + def __ipow__(self, other): ... + def __float__(self): ... + def __int__(self): ... + @property # type: ignore[misc] + def imag(self): ... + get_imag: Any + @property # type: ignore[misc] + def real(self): ... + get_real: Any + def count(self, axis=..., keepdims=...): ... + def ravel(self, order=...): ... + def reshape(self, *s, **kwargs): ... + def resize(self, newshape, refcheck=..., order=...): ... + def put(self, indices, values, mode=...): ... + def ids(self): ... + def iscontiguous(self): ... + def all(self, axis=..., out=..., keepdims=...): ... + def any(self, axis=..., out=..., keepdims=...): ... + def nonzero(self): ... + def trace(self, offset=..., axis1=..., axis2=..., dtype=..., out=...): ... + def dot(self, b, out=..., strict=...): ... + def sum(self, axis=..., dtype=..., out=..., keepdims=...): ... + def cumsum(self, axis=..., dtype=..., out=...): ... + def prod(self, axis=..., dtype=..., out=..., keepdims=...): ... + product: Any + def cumprod(self, axis=..., dtype=..., out=...): ... + def mean(self, axis=..., dtype=..., out=..., keepdims=...): ... + def anom(self, axis=..., dtype=...): ... + def var(self, axis=..., dtype=..., out=..., ddof=..., keepdims=...): ... + def std(self, axis=..., dtype=..., out=..., ddof=..., keepdims=...): ... + def round(self, decimals=..., out=...): ... + def argsort(self, axis=..., kind=..., order=..., endwith=..., fill_value=...): ... + def argmin(self, axis=..., fill_value=..., out=..., *, keepdims=...): ... + def argmax(self, axis=..., fill_value=..., out=..., *, keepdims=...): ... + def sort(self, axis=..., kind=..., order=..., endwith=..., fill_value=...): ... + def min(self, axis=..., out=..., fill_value=..., keepdims=...): ... + # NOTE: deprecated + # def mini(self, axis=...): ... + # def tostring(self, fill_value=..., order=...): ... + def max(self, axis=..., out=..., fill_value=..., keepdims=...): ... + def ptp(self, axis=..., out=..., fill_value=..., keepdims=...): ... + def partition(self, *args, **kwargs): ... + def argpartition(self, *args, **kwargs): ... + def take(self, indices, axis=..., out=..., mode=...): ... + copy: Any + diagonal: Any + flatten: Any + repeat: Any + squeeze: Any + swapaxes: Any + T: Any + transpose: Any + def tolist(self, fill_value=...): ... + def tobytes(self, fill_value=..., order=...): ... + def tofile(self, fid, sep=..., format=...): ... + def toflex(self): ... + torecords: Any + def __reduce__(self): ... + def __deepcopy__(self, memo=...): ... + +class mvoid(MaskedArray[_ShapeType, _DType_co]): + def __new__( + self, + data, + mask=..., + dtype=..., + fill_value=..., + hardmask=..., + copy=..., + subok=..., + ): ... + def __getitem__(self, indx): ... + def __setitem__(self, indx, value): ... + def __iter__(self): ... + def __len__(self): ... + def filled(self, fill_value=...): ... + def tolist(self): ... + +def isMaskedArray(x): ... +isarray = isMaskedArray +isMA = isMaskedArray + +# 0D float64 array +class MaskedConstant(MaskedArray[Any, dtype[float64]]): + def __new__(cls): ... + __class__: Any + def __array_finalize__(self, obj): ... + def __array_prepare__(self, obj, context=...): ... + def __array_wrap__(self, obj, context=...): ... + def __format__(self, format_spec): ... + def __reduce__(self): ... + def __iop__(self, other): ... + __iadd__: Any + __isub__: Any + __imul__: Any + __ifloordiv__: Any + __itruediv__: Any + __ipow__: Any + def copy(self, *args, **kwargs): ... + def __copy__(self): ... + def __deepcopy__(self, memo): ... + def __setattr__(self, attr, value): ... + +masked: MaskedConstant +masked_singleton: MaskedConstant +masked_array = MaskedArray + +def array( + data, + dtype=..., + copy=..., + order=..., + mask=..., + fill_value=..., + keep_mask=..., + hard_mask=..., + shrink=..., + subok=..., + ndmin=..., +): ... +def is_masked(x): ... + +class _extrema_operation(_MaskedUFunc): + compare: Any + fill_value_func: Any + def __init__(self, ufunc, compare, fill_value): ... + # NOTE: in practice `b` has a default value, but users should + # explicitly provide a value here as the default is deprecated + def __call__(self, a, b): ... + def reduce(self, target, axis=...): ... + def outer(self, a, b): ... + +def min(obj, axis=..., out=..., fill_value=..., keepdims=...): ... +def max(obj, axis=..., out=..., fill_value=..., keepdims=...): ... +def ptp(obj, axis=..., out=..., fill_value=..., keepdims=...): ... + +class _frommethod: + __name__: Any + __doc__: Any + reversed: Any + def __init__(self, methodname, reversed=...): ... + def getdoc(self): ... + def __call__(self, a, *args, **params): ... + +all: _frommethod +anomalies: _frommethod +anom: _frommethod +any: _frommethod +compress: _frommethod +cumprod: _frommethod +cumsum: _frommethod +copy: _frommethod +diagonal: _frommethod +harden_mask: _frommethod +ids: _frommethod +mean: _frommethod +nonzero: _frommethod +prod: _frommethod +product: _frommethod +ravel: _frommethod +repeat: _frommethod +soften_mask: _frommethod +std: _frommethod +sum: _frommethod +swapaxes: _frommethod +trace: _frommethod +var: _frommethod +count: _frommethod +argmin: _frommethod +argmax: _frommethod + +minimum: _extrema_operation +maximum: _extrema_operation + +def take(a, indices, axis=..., out=..., mode=...): ... +def power(a, b, third=...): ... +def argsort(a, axis=..., kind=..., order=..., endwith=..., fill_value=...): ... +def sort(a, axis=..., kind=..., order=..., endwith=..., fill_value=...): ... +def compressed(x): ... +def concatenate(arrays, axis=...): ... +def diag(v, k=...): ... +def left_shift(a, n): ... +def right_shift(a, n): ... +def put(a, indices, values, mode=...): ... +def putmask(a, mask, values): ... +def transpose(a, axes=...): ... +def reshape(a, new_shape, order=...): ... +def resize(x, new_shape): ... +def ndim(obj): ... +def shape(obj): ... +def size(obj, axis=...): ... +def where(condition, x=..., y=...): ... +def choose(indices, choices, out=..., mode=...): ... +def round_(a, decimals=..., out=...): ... +round = round_ + +def inner(a, b): ... +innerproduct = inner + +def outer(a, b): ... +outerproduct = outer + +def correlate(a, v, mode=..., propagate_mask=...): ... +def convolve(a, v, mode=..., propagate_mask=...): ... +def allequal(a, b, fill_value=...): ... +def allclose(a, b, masked_equal=..., rtol=..., atol=...): ... +def asarray(a, dtype=..., order=...): ... +def asanyarray(a, dtype=...): ... +def fromflex(fxarray): ... + +class _convert2ma: + __doc__: Any + def __init__(self, funcname, params=...): ... + def getdoc(self): ... + def __call__(self, *args, **params): ... + +arange: _convert2ma +empty: _convert2ma +empty_like: _convert2ma +frombuffer: _convert2ma +fromfunction: _convert2ma +identity: _convert2ma +ones: _convert2ma +zeros: _convert2ma + +def append(a, b, axis=...): ... +def dot(a, b, strict=..., out=...): ... +def mask_rowcols(a, axis=...): ... diff --git a/.local/lib/python3.8/site-packages/numpy/ma/extras.py b/.local/lib/python3.8/site-packages/numpy/ma/extras.py new file mode 100644 index 0000000..38bf1f0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/extras.py @@ -0,0 +1,1921 @@ +""" +Masked arrays add-ons. + +A collection of utilities for `numpy.ma`. + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +:version: $Id: extras.py 3473 2007-10-29 15:18:13Z jarrod.millman $ + +""" +__all__ = [ + 'apply_along_axis', 'apply_over_axes', 'atleast_1d', 'atleast_2d', + 'atleast_3d', 'average', 'clump_masked', 'clump_unmasked', + 'column_stack', 'compress_cols', 'compress_nd', 'compress_rowcols', + 'compress_rows', 'count_masked', 'corrcoef', 'cov', 'diagflat', 'dot', + 'dstack', 'ediff1d', 'flatnotmasked_contiguous', 'flatnotmasked_edges', + 'hsplit', 'hstack', 'isin', 'in1d', 'intersect1d', 'mask_cols', 'mask_rowcols', + 'mask_rows', 'masked_all', 'masked_all_like', 'median', 'mr_', + 'notmasked_contiguous', 'notmasked_edges', 'polyfit', 'row_stack', + 'setdiff1d', 'setxor1d', 'stack', 'unique', 'union1d', 'vander', 'vstack', + ] + +import itertools +import warnings + +from . import core as ma +from .core import ( + MaskedArray, MAError, add, array, asarray, concatenate, filled, count, + getmask, getmaskarray, make_mask_descr, masked, masked_array, mask_or, + nomask, ones, sort, zeros, getdata, get_masked_subclass, dot, + mask_rowcols + ) + +import numpy as np +from numpy import ndarray, array as nxarray +from numpy.core.multiarray import normalize_axis_index +from numpy.core.numeric import normalize_axis_tuple +from numpy.lib.function_base import _ureduce +from numpy.lib.index_tricks import AxisConcatenator + + +def issequence(seq): + """ + Is seq a sequence (ndarray, list or tuple)? + + """ + return isinstance(seq, (ndarray, tuple, list)) + + +def count_masked(arr, axis=None): + """ + Count the number of masked elements along the given axis. + + Parameters + ---------- + arr : array_like + An array with (possibly) masked elements. + axis : int, optional + Axis along which to count. If None (default), a flattened + version of the array is used. + + Returns + ------- + count : int, ndarray + The total number of masked elements (axis=None) or the number + of masked elements along each slice of the given axis. + + See Also + -------- + MaskedArray.count : Count non-masked elements. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.arange(9).reshape((3,3)) + >>> a = ma.array(a) + >>> a[1, 0] = ma.masked + >>> a[1, 2] = ma.masked + >>> a[2, 1] = ma.masked + >>> a + masked_array( + data=[[0, 1, 2], + [--, 4, --], + [6, --, 8]], + mask=[[False, False, False], + [ True, False, True], + [False, True, False]], + fill_value=999999) + >>> ma.count_masked(a) + 3 + + When the `axis` keyword is used an array is returned. + + >>> ma.count_masked(a, axis=0) + array([1, 1, 1]) + >>> ma.count_masked(a, axis=1) + array([0, 2, 1]) + + """ + m = getmaskarray(arr) + return m.sum(axis) + + +def masked_all(shape, dtype=float): + """ + Empty masked array with all elements masked. + + Return an empty masked array of the given shape and dtype, where all the + data are masked. + + Parameters + ---------- + shape : tuple + Shape of the required MaskedArray. + dtype : dtype, optional + Data type of the output. + + Returns + ------- + a : MaskedArray + A masked array with all data masked. + + See Also + -------- + masked_all_like : Empty masked array modelled on an existing array. + + Examples + -------- + >>> import numpy.ma as ma + >>> ma.masked_all((3, 3)) + masked_array( + data=[[--, --, --], + [--, --, --], + [--, --, --]], + mask=[[ True, True, True], + [ True, True, True], + [ True, True, True]], + fill_value=1e+20, + dtype=float64) + + The `dtype` parameter defines the underlying data type. + + >>> a = ma.masked_all((3, 3)) + >>> a.dtype + dtype('float64') + >>> a = ma.masked_all((3, 3), dtype=np.int32) + >>> a.dtype + dtype('int32') + + """ + a = masked_array(np.empty(shape, dtype), + mask=np.ones(shape, make_mask_descr(dtype))) + return a + + +def masked_all_like(arr): + """ + Empty masked array with the properties of an existing array. + + Return an empty masked array of the same shape and dtype as + the array `arr`, where all the data are masked. + + Parameters + ---------- + arr : ndarray + An array describing the shape and dtype of the required MaskedArray. + + Returns + ------- + a : MaskedArray + A masked array with all data masked. + + Raises + ------ + AttributeError + If `arr` doesn't have a shape attribute (i.e. not an ndarray) + + See Also + -------- + masked_all : Empty masked array with all elements masked. + + Examples + -------- + >>> import numpy.ma as ma + >>> arr = np.zeros((2, 3), dtype=np.float32) + >>> arr + array([[0., 0., 0.], + [0., 0., 0.]], dtype=float32) + >>> ma.masked_all_like(arr) + masked_array( + data=[[--, --, --], + [--, --, --]], + mask=[[ True, True, True], + [ True, True, True]], + fill_value=1e+20, + dtype=float32) + + The dtype of the masked array matches the dtype of `arr`. + + >>> arr.dtype + dtype('float32') + >>> ma.masked_all_like(arr).dtype + dtype('float32') + + """ + a = np.empty_like(arr).view(MaskedArray) + a._mask = np.ones(a.shape, dtype=make_mask_descr(a.dtype)) + return a + + +#####-------------------------------------------------------------------------- +#---- --- Standard functions --- +#####-------------------------------------------------------------------------- +class _fromnxfunction: + """ + Defines a wrapper to adapt NumPy functions to masked arrays. + + + An instance of `_fromnxfunction` can be called with the same parameters + as the wrapped NumPy function. The docstring of `newfunc` is adapted from + the wrapped function as well, see `getdoc`. + + This class should not be used directly. Instead, one of its extensions that + provides support for a specific type of input should be used. + + Parameters + ---------- + funcname : str + The name of the function to be adapted. The function should be + in the NumPy namespace (i.e. ``np.funcname``). + + """ + + def __init__(self, funcname): + self.__name__ = funcname + self.__doc__ = self.getdoc() + + def getdoc(self): + """ + Retrieve the docstring and signature from the function. + + The ``__doc__`` attribute of the function is used as the docstring for + the new masked array version of the function. A note on application + of the function to the mask is appended. + + Parameters + ---------- + None + + """ + npfunc = getattr(np, self.__name__, None) + doc = getattr(npfunc, '__doc__', None) + if doc: + sig = self.__name__ + ma.get_object_signature(npfunc) + doc = ma.doc_note(doc, "The function is applied to both the _data " + "and the _mask, if any.") + return '\n\n'.join((sig, doc)) + return + + def __call__(self, *args, **params): + pass + + +class _fromnxfunction_single(_fromnxfunction): + """ + A version of `_fromnxfunction` that is called with a single array + argument followed by auxiliary args that are passed verbatim for + both the data and mask calls. + """ + def __call__(self, x, *args, **params): + func = getattr(np, self.__name__) + if isinstance(x, ndarray): + _d = func(x.__array__(), *args, **params) + _m = func(getmaskarray(x), *args, **params) + return masked_array(_d, mask=_m) + else: + _d = func(np.asarray(x), *args, **params) + _m = func(getmaskarray(x), *args, **params) + return masked_array(_d, mask=_m) + + +class _fromnxfunction_seq(_fromnxfunction): + """ + A version of `_fromnxfunction` that is called with a single sequence + of arrays followed by auxiliary args that are passed verbatim for + both the data and mask calls. + """ + def __call__(self, x, *args, **params): + func = getattr(np, self.__name__) + _d = func(tuple([np.asarray(a) for a in x]), *args, **params) + _m = func(tuple([getmaskarray(a) for a in x]), *args, **params) + return masked_array(_d, mask=_m) + + +class _fromnxfunction_args(_fromnxfunction): + """ + A version of `_fromnxfunction` that is called with multiple array + arguments. The first non-array-like input marks the beginning of the + arguments that are passed verbatim for both the data and mask calls. + Array arguments are processed independently and the results are + returned in a list. If only one array is found, the return value is + just the processed array instead of a list. + """ + def __call__(self, *args, **params): + func = getattr(np, self.__name__) + arrays = [] + args = list(args) + while len(args) > 0 and issequence(args[0]): + arrays.append(args.pop(0)) + res = [] + for x in arrays: + _d = func(np.asarray(x), *args, **params) + _m = func(getmaskarray(x), *args, **params) + res.append(masked_array(_d, mask=_m)) + if len(arrays) == 1: + return res[0] + return res + + +class _fromnxfunction_allargs(_fromnxfunction): + """ + A version of `_fromnxfunction` that is called with multiple array + arguments. Similar to `_fromnxfunction_args` except that all args + are converted to arrays even if they are not so already. This makes + it possible to process scalars as 1-D arrays. Only keyword arguments + are passed through verbatim for the data and mask calls. Arrays + arguments are processed independently and the results are returned + in a list. If only one arg is present, the return value is just the + processed array instead of a list. + """ + def __call__(self, *args, **params): + func = getattr(np, self.__name__) + res = [] + for x in args: + _d = func(np.asarray(x), **params) + _m = func(getmaskarray(x), **params) + res.append(masked_array(_d, mask=_m)) + if len(args) == 1: + return res[0] + return res + + +atleast_1d = _fromnxfunction_allargs('atleast_1d') +atleast_2d = _fromnxfunction_allargs('atleast_2d') +atleast_3d = _fromnxfunction_allargs('atleast_3d') + +vstack = row_stack = _fromnxfunction_seq('vstack') +hstack = _fromnxfunction_seq('hstack') +column_stack = _fromnxfunction_seq('column_stack') +dstack = _fromnxfunction_seq('dstack') +stack = _fromnxfunction_seq('stack') + +hsplit = _fromnxfunction_single('hsplit') + +diagflat = _fromnxfunction_single('diagflat') + + +#####-------------------------------------------------------------------------- +#---- +#####-------------------------------------------------------------------------- +def flatten_inplace(seq): + """Flatten a sequence in place.""" + k = 0 + while (k != len(seq)): + while hasattr(seq[k], '__iter__'): + seq[k:(k + 1)] = seq[k] + k += 1 + return seq + + +def apply_along_axis(func1d, axis, arr, *args, **kwargs): + """ + (This docstring should be overwritten) + """ + arr = array(arr, copy=False, subok=True) + nd = arr.ndim + axis = normalize_axis_index(axis, nd) + ind = [0] * (nd - 1) + i = np.zeros(nd, 'O') + indlist = list(range(nd)) + indlist.remove(axis) + i[axis] = slice(None, None) + outshape = np.asarray(arr.shape).take(indlist) + i.put(indlist, ind) + res = func1d(arr[tuple(i.tolist())], *args, **kwargs) + # if res is a number, then we have a smaller output array + asscalar = np.isscalar(res) + if not asscalar: + try: + len(res) + except TypeError: + asscalar = True + # Note: we shouldn't set the dtype of the output from the first result + # so we force the type to object, and build a list of dtypes. We'll + # just take the largest, to avoid some downcasting + dtypes = [] + if asscalar: + dtypes.append(np.asarray(res).dtype) + outarr = zeros(outshape, object) + outarr[tuple(ind)] = res + Ntot = np.product(outshape) + k = 1 + while k < Ntot: + # increment the index + ind[-1] += 1 + n = -1 + while (ind[n] >= outshape[n]) and (n > (1 - nd)): + ind[n - 1] += 1 + ind[n] = 0 + n -= 1 + i.put(indlist, ind) + res = func1d(arr[tuple(i.tolist())], *args, **kwargs) + outarr[tuple(ind)] = res + dtypes.append(asarray(res).dtype) + k += 1 + else: + res = array(res, copy=False, subok=True) + j = i.copy() + j[axis] = ([slice(None, None)] * res.ndim) + j.put(indlist, ind) + Ntot = np.product(outshape) + holdshape = outshape + outshape = list(arr.shape) + outshape[axis] = res.shape + dtypes.append(asarray(res).dtype) + outshape = flatten_inplace(outshape) + outarr = zeros(outshape, object) + outarr[tuple(flatten_inplace(j.tolist()))] = res + k = 1 + while k < Ntot: + # increment the index + ind[-1] += 1 + n = -1 + while (ind[n] >= holdshape[n]) and (n > (1 - nd)): + ind[n - 1] += 1 + ind[n] = 0 + n -= 1 + i.put(indlist, ind) + j.put(indlist, ind) + res = func1d(arr[tuple(i.tolist())], *args, **kwargs) + outarr[tuple(flatten_inplace(j.tolist()))] = res + dtypes.append(asarray(res).dtype) + k += 1 + max_dtypes = np.dtype(np.asarray(dtypes).max()) + if not hasattr(arr, '_mask'): + result = np.asarray(outarr, dtype=max_dtypes) + else: + result = asarray(outarr, dtype=max_dtypes) + result.fill_value = ma.default_fill_value(result) + return result +apply_along_axis.__doc__ = np.apply_along_axis.__doc__ + + +def apply_over_axes(func, a, axes): + """ + (This docstring will be overwritten) + """ + val = asarray(a) + N = a.ndim + if array(axes).ndim == 0: + axes = (axes,) + for axis in axes: + if axis < 0: + axis = N + axis + args = (val, axis) + res = func(*args) + if res.ndim == val.ndim: + val = res + else: + res = ma.expand_dims(res, axis) + if res.ndim == val.ndim: + val = res + else: + raise ValueError("function is not returning " + "an array of the correct shape") + return val + +if apply_over_axes.__doc__ is not None: + apply_over_axes.__doc__ = np.apply_over_axes.__doc__[ + :np.apply_over_axes.__doc__.find('Notes')].rstrip() + \ + """ + + Examples + -------- + >>> a = np.ma.arange(24).reshape(2,3,4) + >>> a[:,0,1] = np.ma.masked + >>> a[:,1,:] = np.ma.masked + >>> a + masked_array( + data=[[[0, --, 2, 3], + [--, --, --, --], + [8, 9, 10, 11]], + [[12, --, 14, 15], + [--, --, --, --], + [20, 21, 22, 23]]], + mask=[[[False, True, False, False], + [ True, True, True, True], + [False, False, False, False]], + [[False, True, False, False], + [ True, True, True, True], + [False, False, False, False]]], + fill_value=999999) + >>> np.ma.apply_over_axes(np.ma.sum, a, [0,2]) + masked_array( + data=[[[46], + [--], + [124]]], + mask=[[[False], + [ True], + [False]]], + fill_value=999999) + + Tuple axis arguments to ufuncs are equivalent: + + >>> np.ma.sum(a, axis=(0,2)).reshape((1,-1,1)) + masked_array( + data=[[[46], + [--], + [124]]], + mask=[[[False], + [ True], + [False]]], + fill_value=999999) + """ + + +def average(a, axis=None, weights=None, returned=False): + """ + Return the weighted average of array over the given axis. + + Parameters + ---------- + a : array_like + Data to be averaged. + Masked entries are not taken into account in the computation. + axis : int, optional + Axis along which to average `a`. If None, averaging is done over + the flattened array. + weights : array_like, optional + The importance that each element has in the computation of the average. + The weights array can either be 1-D (in which case its length must be + the size of `a` along the given axis) or of the same shape as `a`. + If ``weights=None``, then all data in `a` are assumed to have a + weight equal to one. The 1-D calculation is:: + + avg = sum(a * weights) / sum(weights) + + The only constraint on `weights` is that `sum(weights)` must not be 0. + returned : bool, optional + Flag indicating whether a tuple ``(result, sum of weights)`` + should be returned as output (True), or just the result (False). + Default is False. + + Returns + ------- + average, [sum_of_weights] : (tuple of) scalar or MaskedArray + The average along the specified axis. When returned is `True`, + return a tuple with the average as the first element and the sum + of the weights as the second element. The return type is `np.float64` + if `a` is of integer type and floats smaller than `float64`, or the + input data-type, otherwise. If returned, `sum_of_weights` is always + `float64`. + + Examples + -------- + >>> a = np.ma.array([1., 2., 3., 4.], mask=[False, False, True, True]) + >>> np.ma.average(a, weights=[3, 1, 0, 0]) + 1.25 + + >>> x = np.ma.arange(6.).reshape(3, 2) + >>> x + masked_array( + data=[[0., 1.], + [2., 3.], + [4., 5.]], + mask=False, + fill_value=1e+20) + >>> avg, sumweights = np.ma.average(x, axis=0, weights=[1, 2, 3], + ... returned=True) + >>> avg + masked_array(data=[2.6666666666666665, 3.6666666666666665], + mask=[False, False], + fill_value=1e+20) + + """ + a = asarray(a) + m = getmask(a) + + # inspired by 'average' in numpy/lib/function_base.py + + if weights is None: + avg = a.mean(axis) + scl = avg.dtype.type(a.count(axis)) + else: + wgt = np.asanyarray(weights) + + if issubclass(a.dtype.type, (np.integer, np.bool_)): + result_dtype = np.result_type(a.dtype, wgt.dtype, 'f8') + else: + result_dtype = np.result_type(a.dtype, wgt.dtype) + + # Sanity checks + if a.shape != wgt.shape: + if axis is None: + raise TypeError( + "Axis must be specified when shapes of a and weights " + "differ.") + if wgt.ndim != 1: + raise TypeError( + "1D weights expected when shapes of a and weights differ.") + if wgt.shape[0] != a.shape[axis]: + raise ValueError( + "Length of weights not compatible with specified axis.") + + # setup wgt to broadcast along axis + wgt = np.broadcast_to(wgt, (a.ndim-1)*(1,) + wgt.shape, subok=True) + wgt = wgt.swapaxes(-1, axis) + + if m is not nomask: + wgt = wgt*(~a.mask) + + scl = wgt.sum(axis=axis, dtype=result_dtype) + avg = np.multiply(a, wgt, dtype=result_dtype).sum(axis)/scl + + if returned: + if scl.shape != avg.shape: + scl = np.broadcast_to(scl, avg.shape).copy() + return avg, scl + else: + return avg + + +def median(a, axis=None, out=None, overwrite_input=False, keepdims=False): + """ + Compute the median along the specified axis. + + Returns the median of the array elements. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + axis : int, optional + Axis along which the medians are computed. The default (None) is + to compute the median along a flattened version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output + but the type will be cast if necessary. + overwrite_input : bool, optional + If True, then allow use of memory of input array (a) for + calculations. The input array will be modified by the call to + median. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. Note that, if `overwrite_input` is True, and the input + is not already an `ndarray`, an error will be raised. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + .. versionadded:: 1.10.0 + + Returns + ------- + median : ndarray + A new array holding the result is returned unless out is + specified, in which case a reference to out is returned. + Return data-type is `float64` for integers and floats smaller than + `float64`, or the input data-type, otherwise. + + See Also + -------- + mean + + Notes + ----- + Given a vector ``V`` with ``N`` non masked values, the median of ``V`` + is the middle value of a sorted copy of ``V`` (``Vs``) - i.e. + ``Vs[(N-1)/2]``, when ``N`` is odd, or ``{Vs[N/2 - 1] + Vs[N/2]}/2`` + when ``N`` is even. + + Examples + -------- + >>> x = np.ma.array(np.arange(8), mask=[0]*4 + [1]*4) + >>> np.ma.median(x) + 1.5 + + >>> x = np.ma.array(np.arange(10).reshape(2, 5), mask=[0]*6 + [1]*4) + >>> np.ma.median(x) + 2.5 + >>> np.ma.median(x, axis=-1, overwrite_input=True) + masked_array(data=[2.0, 5.0], + mask=[False, False], + fill_value=1e+20) + + """ + if not hasattr(a, 'mask'): + m = np.median(getdata(a, subok=True), axis=axis, + out=out, overwrite_input=overwrite_input, + keepdims=keepdims) + if isinstance(m, np.ndarray) and 1 <= m.ndim: + return masked_array(m, copy=False) + else: + return m + + r, k = _ureduce(a, func=_median, axis=axis, out=out, + overwrite_input=overwrite_input) + if keepdims: + return r.reshape(k) + else: + return r + +def _median(a, axis=None, out=None, overwrite_input=False): + # when an unmasked NaN is present return it, so we need to sort the NaN + # values behind the mask + if np.issubdtype(a.dtype, np.inexact): + fill_value = np.inf + else: + fill_value = None + if overwrite_input: + if axis is None: + asorted = a.ravel() + asorted.sort(fill_value=fill_value) + else: + a.sort(axis=axis, fill_value=fill_value) + asorted = a + else: + asorted = sort(a, axis=axis, fill_value=fill_value) + + if axis is None: + axis = 0 + else: + axis = normalize_axis_index(axis, asorted.ndim) + + if asorted.shape[axis] == 0: + # for empty axis integer indices fail so use slicing to get same result + # as median (which is mean of empty slice = nan) + indexer = [slice(None)] * asorted.ndim + indexer[axis] = slice(0, 0) + indexer = tuple(indexer) + return np.ma.mean(asorted[indexer], axis=axis, out=out) + + if asorted.ndim == 1: + idx, odd = divmod(count(asorted), 2) + mid = asorted[idx + odd - 1:idx + 1] + if np.issubdtype(asorted.dtype, np.inexact) and asorted.size > 0: + # avoid inf / x = masked + s = mid.sum(out=out) + if not odd: + s = np.true_divide(s, 2., casting='safe', out=out) + s = np.lib.utils._median_nancheck(asorted, s, axis) + else: + s = mid.mean(out=out) + + # if result is masked either the input contained enough + # minimum_fill_value so that it would be the median or all values + # masked + if np.ma.is_masked(s) and not np.all(asorted.mask): + return np.ma.minimum_fill_value(asorted) + return s + + counts = count(asorted, axis=axis, keepdims=True) + h = counts // 2 + + # duplicate high if odd number of elements so mean does nothing + odd = counts % 2 == 1 + l = np.where(odd, h, h-1) + + lh = np.concatenate([l,h], axis=axis) + + # get low and high median + low_high = np.take_along_axis(asorted, lh, axis=axis) + + def replace_masked(s): + # Replace masked entries with minimum_full_value unless it all values + # are masked. This is required as the sort order of values equal or + # larger than the fill value is undefined and a valid value placed + # elsewhere, e.g. [4, --, inf]. + if np.ma.is_masked(s): + rep = (~np.all(asorted.mask, axis=axis, keepdims=True)) & s.mask + s.data[rep] = np.ma.minimum_fill_value(asorted) + s.mask[rep] = False + + replace_masked(low_high) + + if np.issubdtype(asorted.dtype, np.inexact): + # avoid inf / x = masked + s = np.ma.sum(low_high, axis=axis, out=out) + np.true_divide(s.data, 2., casting='unsafe', out=s.data) + + s = np.lib.utils._median_nancheck(asorted, s, axis) + else: + s = np.ma.mean(low_high, axis=axis, out=out) + + return s + + +def compress_nd(x, axis=None): + """Suppress slices from multiple dimensions which contain masked values. + + Parameters + ---------- + x : array_like, MaskedArray + The array to operate on. If not a MaskedArray instance (or if no array + elements are masked), `x` is interpreted as a MaskedArray with `mask` + set to `nomask`. + axis : tuple of ints or int, optional + Which dimensions to suppress slices from can be configured with this + parameter. + - If axis is a tuple of ints, those are the axes to suppress slices from. + - If axis is an int, then that is the only axis to suppress slices from. + - If axis is None, all axis are selected. + + Returns + ------- + compress_array : ndarray + The compressed array. + """ + x = asarray(x) + m = getmask(x) + # Set axis to tuple of ints + if axis is None: + axis = tuple(range(x.ndim)) + else: + axis = normalize_axis_tuple(axis, x.ndim) + + # Nothing is masked: return x + if m is nomask or not m.any(): + return x._data + # All is masked: return empty + if m.all(): + return nxarray([]) + # Filter elements through boolean indexing + data = x._data + for ax in axis: + axes = tuple(list(range(ax)) + list(range(ax + 1, x.ndim))) + data = data[(slice(None),)*ax + (~m.any(axis=axes),)] + return data + +def compress_rowcols(x, axis=None): + """ + Suppress the rows and/or columns of a 2-D array that contain + masked values. + + The suppression behavior is selected with the `axis` parameter. + + - If axis is None, both rows and columns are suppressed. + - If axis is 0, only rows are suppressed. + - If axis is 1 or -1, only columns are suppressed. + + Parameters + ---------- + x : array_like, MaskedArray + The array to operate on. If not a MaskedArray instance (or if no array + elements are masked), `x` is interpreted as a MaskedArray with + `mask` set to `nomask`. Must be a 2D array. + axis : int, optional + Axis along which to perform the operation. Default is None. + + Returns + ------- + compressed_array : ndarray + The compressed array. + + Examples + -------- + >>> x = np.ma.array(np.arange(9).reshape(3, 3), mask=[[1, 0, 0], + ... [1, 0, 0], + ... [0, 0, 0]]) + >>> x + masked_array( + data=[[--, 1, 2], + [--, 4, 5], + [6, 7, 8]], + mask=[[ True, False, False], + [ True, False, False], + [False, False, False]], + fill_value=999999) + + >>> np.ma.compress_rowcols(x) + array([[7, 8]]) + >>> np.ma.compress_rowcols(x, 0) + array([[6, 7, 8]]) + >>> np.ma.compress_rowcols(x, 1) + array([[1, 2], + [4, 5], + [7, 8]]) + + """ + if asarray(x).ndim != 2: + raise NotImplementedError("compress_rowcols works for 2D arrays only.") + return compress_nd(x, axis=axis) + + +def compress_rows(a): + """ + Suppress whole rows of a 2-D array that contain masked values. + + This is equivalent to ``np.ma.compress_rowcols(a, 0)``, see + `compress_rowcols` for details. + + See Also + -------- + compress_rowcols + + """ + a = asarray(a) + if a.ndim != 2: + raise NotImplementedError("compress_rows works for 2D arrays only.") + return compress_rowcols(a, 0) + +def compress_cols(a): + """ + Suppress whole columns of a 2-D array that contain masked values. + + This is equivalent to ``np.ma.compress_rowcols(a, 1)``, see + `compress_rowcols` for details. + + See Also + -------- + compress_rowcols + + """ + a = asarray(a) + if a.ndim != 2: + raise NotImplementedError("compress_cols works for 2D arrays only.") + return compress_rowcols(a, 1) + +def mask_rows(a, axis=np._NoValue): + """ + Mask rows of a 2D array that contain masked values. + + This function is a shortcut to ``mask_rowcols`` with `axis` equal to 0. + + See Also + -------- + mask_rowcols : Mask rows and/or columns of a 2D array. + masked_where : Mask where a condition is met. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.zeros((3, 3), dtype=int) + >>> a[1, 1] = 1 + >>> a + array([[0, 0, 0], + [0, 1, 0], + [0, 0, 0]]) + >>> a = ma.masked_equal(a, 1) + >>> a + masked_array( + data=[[0, 0, 0], + [0, --, 0], + [0, 0, 0]], + mask=[[False, False, False], + [False, True, False], + [False, False, False]], + fill_value=1) + + >>> ma.mask_rows(a) + masked_array( + data=[[0, 0, 0], + [--, --, --], + [0, 0, 0]], + mask=[[False, False, False], + [ True, True, True], + [False, False, False]], + fill_value=1) + + """ + if axis is not np._NoValue: + # remove the axis argument when this deprecation expires + # NumPy 1.18.0, 2019-11-28 + warnings.warn( + "The axis argument has always been ignored, in future passing it " + "will raise TypeError", DeprecationWarning, stacklevel=2) + return mask_rowcols(a, 0) + +def mask_cols(a, axis=np._NoValue): + """ + Mask columns of a 2D array that contain masked values. + + This function is a shortcut to ``mask_rowcols`` with `axis` equal to 1. + + See Also + -------- + mask_rowcols : Mask rows and/or columns of a 2D array. + masked_where : Mask where a condition is met. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.zeros((3, 3), dtype=int) + >>> a[1, 1] = 1 + >>> a + array([[0, 0, 0], + [0, 1, 0], + [0, 0, 0]]) + >>> a = ma.masked_equal(a, 1) + >>> a + masked_array( + data=[[0, 0, 0], + [0, --, 0], + [0, 0, 0]], + mask=[[False, False, False], + [False, True, False], + [False, False, False]], + fill_value=1) + >>> ma.mask_cols(a) + masked_array( + data=[[0, --, 0], + [0, --, 0], + [0, --, 0]], + mask=[[False, True, False], + [False, True, False], + [False, True, False]], + fill_value=1) + + """ + if axis is not np._NoValue: + # remove the axis argument when this deprecation expires + # NumPy 1.18.0, 2019-11-28 + warnings.warn( + "The axis argument has always been ignored, in future passing it " + "will raise TypeError", DeprecationWarning, stacklevel=2) + return mask_rowcols(a, 1) + + +#####-------------------------------------------------------------------------- +#---- --- arraysetops --- +#####-------------------------------------------------------------------------- + +def ediff1d(arr, to_end=None, to_begin=None): + """ + Compute the differences between consecutive elements of an array. + + This function is the equivalent of `numpy.ediff1d` that takes masked + values into account, see `numpy.ediff1d` for details. + + See Also + -------- + numpy.ediff1d : Equivalent function for ndarrays. + + """ + arr = ma.asanyarray(arr).flat + ed = arr[1:] - arr[:-1] + arrays = [ed] + # + if to_begin is not None: + arrays.insert(0, to_begin) + if to_end is not None: + arrays.append(to_end) + # + if len(arrays) != 1: + # We'll save ourselves a copy of a potentially large array in the common + # case where neither to_begin or to_end was given. + ed = hstack(arrays) + # + return ed + + +def unique(ar1, return_index=False, return_inverse=False): + """ + Finds the unique elements of an array. + + Masked values are considered the same element (masked). The output array + is always a masked array. See `numpy.unique` for more details. + + See Also + -------- + numpy.unique : Equivalent function for ndarrays. + + """ + output = np.unique(ar1, + return_index=return_index, + return_inverse=return_inverse) + if isinstance(output, tuple): + output = list(output) + output[0] = output[0].view(MaskedArray) + output = tuple(output) + else: + output = output.view(MaskedArray) + return output + + +def intersect1d(ar1, ar2, assume_unique=False): + """ + Returns the unique elements common to both arrays. + + Masked values are considered equal one to the other. + The output is always a masked array. + + See `numpy.intersect1d` for more details. + + See Also + -------- + numpy.intersect1d : Equivalent function for ndarrays. + + Examples + -------- + >>> x = np.ma.array([1, 3, 3, 3], mask=[0, 0, 0, 1]) + >>> y = np.ma.array([3, 1, 1, 1], mask=[0, 0, 0, 1]) + >>> np.ma.intersect1d(x, y) + masked_array(data=[1, 3, --], + mask=[False, False, True], + fill_value=999999) + + """ + if assume_unique: + aux = ma.concatenate((ar1, ar2)) + else: + # Might be faster than unique( intersect1d( ar1, ar2 ) )? + aux = ma.concatenate((unique(ar1), unique(ar2))) + aux.sort() + return aux[:-1][aux[1:] == aux[:-1]] + + +def setxor1d(ar1, ar2, assume_unique=False): + """ + Set exclusive-or of 1-D arrays with unique elements. + + The output is always a masked array. See `numpy.setxor1d` for more details. + + See Also + -------- + numpy.setxor1d : Equivalent function for ndarrays. + + """ + if not assume_unique: + ar1 = unique(ar1) + ar2 = unique(ar2) + + aux = ma.concatenate((ar1, ar2)) + if aux.size == 0: + return aux + aux.sort() + auxf = aux.filled() +# flag = ediff1d( aux, to_end = 1, to_begin = 1 ) == 0 + flag = ma.concatenate(([True], (auxf[1:] != auxf[:-1]), [True])) +# flag2 = ediff1d( flag ) == 0 + flag2 = (flag[1:] == flag[:-1]) + return aux[flag2] + + +def in1d(ar1, ar2, assume_unique=False, invert=False): + """ + Test whether each element of an array is also present in a second + array. + + The output is always a masked array. See `numpy.in1d` for more details. + + We recommend using :func:`isin` instead of `in1d` for new code. + + See Also + -------- + isin : Version of this function that preserves the shape of ar1. + numpy.in1d : Equivalent function for ndarrays. + + Notes + ----- + .. versionadded:: 1.4.0 + + """ + if not assume_unique: + ar1, rev_idx = unique(ar1, return_inverse=True) + ar2 = unique(ar2) + + ar = ma.concatenate((ar1, ar2)) + # We need this to be a stable sort, so always use 'mergesort' + # here. The values from the first array should always come before + # the values from the second array. + order = ar.argsort(kind='mergesort') + sar = ar[order] + if invert: + bool_ar = (sar[1:] != sar[:-1]) + else: + bool_ar = (sar[1:] == sar[:-1]) + flag = ma.concatenate((bool_ar, [invert])) + indx = order.argsort(kind='mergesort')[:len(ar1)] + + if assume_unique: + return flag[indx] + else: + return flag[indx][rev_idx] + + +def isin(element, test_elements, assume_unique=False, invert=False): + """ + Calculates `element in test_elements`, broadcasting over + `element` only. + + The output is always a masked array of the same shape as `element`. + See `numpy.isin` for more details. + + See Also + -------- + in1d : Flattened version of this function. + numpy.isin : Equivalent function for ndarrays. + + Notes + ----- + .. versionadded:: 1.13.0 + + """ + element = ma.asarray(element) + return in1d(element, test_elements, assume_unique=assume_unique, + invert=invert).reshape(element.shape) + + +def union1d(ar1, ar2): + """ + Union of two arrays. + + The output is always a masked array. See `numpy.union1d` for more details. + + See Also + -------- + numpy.union1d : Equivalent function for ndarrays. + + """ + return unique(ma.concatenate((ar1, ar2), axis=None)) + + +def setdiff1d(ar1, ar2, assume_unique=False): + """ + Set difference of 1D arrays with unique elements. + + The output is always a masked array. See `numpy.setdiff1d` for more + details. + + See Also + -------- + numpy.setdiff1d : Equivalent function for ndarrays. + + Examples + -------- + >>> x = np.ma.array([1, 2, 3, 4], mask=[0, 1, 0, 1]) + >>> np.ma.setdiff1d(x, [1, 2]) + masked_array(data=[3, --], + mask=[False, True], + fill_value=999999) + + """ + if assume_unique: + ar1 = ma.asarray(ar1).ravel() + else: + ar1 = unique(ar1) + ar2 = unique(ar2) + return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)] + + +############################################################################### +# Covariance # +############################################################################### + + +def _covhelper(x, y=None, rowvar=True, allow_masked=True): + """ + Private function for the computation of covariance and correlation + coefficients. + + """ + x = ma.array(x, ndmin=2, copy=True, dtype=float) + xmask = ma.getmaskarray(x) + # Quick exit if we can't process masked data + if not allow_masked and xmask.any(): + raise ValueError("Cannot process masked data.") + # + if x.shape[0] == 1: + rowvar = True + # Make sure that rowvar is either 0 or 1 + rowvar = int(bool(rowvar)) + axis = 1 - rowvar + if rowvar: + tup = (slice(None), None) + else: + tup = (None, slice(None)) + # + if y is None: + xnotmask = np.logical_not(xmask).astype(int) + else: + y = array(y, copy=False, ndmin=2, dtype=float) + ymask = ma.getmaskarray(y) + if not allow_masked and ymask.any(): + raise ValueError("Cannot process masked data.") + if xmask.any() or ymask.any(): + if y.shape == x.shape: + # Define some common mask + common_mask = np.logical_or(xmask, ymask) + if common_mask is not nomask: + xmask = x._mask = y._mask = ymask = common_mask + x._sharedmask = False + y._sharedmask = False + x = ma.concatenate((x, y), axis) + xnotmask = np.logical_not(np.concatenate((xmask, ymask), axis)).astype(int) + x -= x.mean(axis=rowvar)[tup] + return (x, xnotmask, rowvar) + + +def cov(x, y=None, rowvar=True, bias=False, allow_masked=True, ddof=None): + """ + Estimate the covariance matrix. + + Except for the handling of missing data this function does the same as + `numpy.cov`. For more details and examples, see `numpy.cov`. + + By default, masked values are recognized as such. If `x` and `y` have the + same shape, a common mask is allocated: if ``x[i,j]`` is masked, then + ``y[i,j]`` will also be masked. + Setting `allow_masked` to False will raise an exception if values are + missing in either of the input arrays. + + Parameters + ---------- + x : array_like + A 1-D or 2-D array containing multiple variables and observations. + Each row of `x` represents a variable, and each column a single + observation of all those variables. Also see `rowvar` below. + y : array_like, optional + An additional set of variables and observations. `y` has the same + shape as `x`. + rowvar : bool, optional + If `rowvar` is True (default), then each row represents a + variable, with observations in the columns. Otherwise, the relationship + is transposed: each column represents a variable, while the rows + contain observations. + bias : bool, optional + Default normalization (False) is by ``(N-1)``, where ``N`` is the + number of observations given (unbiased estimate). If `bias` is True, + then normalization is by ``N``. This keyword can be overridden by + the keyword ``ddof`` in numpy versions >= 1.5. + allow_masked : bool, optional + If True, masked values are propagated pair-wise: if a value is masked + in `x`, the corresponding value is masked in `y`. + If False, raises a `ValueError` exception when some values are missing. + ddof : {None, int}, optional + If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is + the number of observations; this overrides the value implied by + ``bias``. The default value is ``None``. + + .. versionadded:: 1.5 + + Raises + ------ + ValueError + Raised if some values are missing and `allow_masked` is False. + + See Also + -------- + numpy.cov + + """ + # Check inputs + if ddof is not None and ddof != int(ddof): + raise ValueError("ddof must be an integer") + # Set up ddof + if ddof is None: + if bias: + ddof = 0 + else: + ddof = 1 + + (x, xnotmask, rowvar) = _covhelper(x, y, rowvar, allow_masked) + if not rowvar: + fact = np.dot(xnotmask.T, xnotmask) * 1. - ddof + result = (dot(x.T, x.conj(), strict=False) / fact).squeeze() + else: + fact = np.dot(xnotmask, xnotmask.T) * 1. - ddof + result = (dot(x, x.T.conj(), strict=False) / fact).squeeze() + return result + + +def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, allow_masked=True, + ddof=np._NoValue): + """ + Return Pearson product-moment correlation coefficients. + + Except for the handling of missing data this function does the same as + `numpy.corrcoef`. For more details and examples, see `numpy.corrcoef`. + + Parameters + ---------- + x : array_like + A 1-D or 2-D array containing multiple variables and observations. + Each row of `x` represents a variable, and each column a single + observation of all those variables. Also see `rowvar` below. + y : array_like, optional + An additional set of variables and observations. `y` has the same + shape as `x`. + rowvar : bool, optional + If `rowvar` is True (default), then each row represents a + variable, with observations in the columns. Otherwise, the relationship + is transposed: each column represents a variable, while the rows + contain observations. + bias : _NoValue, optional + Has no effect, do not use. + + .. deprecated:: 1.10.0 + allow_masked : bool, optional + If True, masked values are propagated pair-wise: if a value is masked + in `x`, the corresponding value is masked in `y`. + If False, raises an exception. Because `bias` is deprecated, this + argument needs to be treated as keyword only to avoid a warning. + ddof : _NoValue, optional + Has no effect, do not use. + + .. deprecated:: 1.10.0 + + See Also + -------- + numpy.corrcoef : Equivalent function in top-level NumPy module. + cov : Estimate the covariance matrix. + + Notes + ----- + This function accepts but discards arguments `bias` and `ddof`. This is + for backwards compatibility with previous versions of this function. These + arguments had no effect on the return values of the function and can be + safely ignored in this and previous versions of numpy. + """ + msg = 'bias and ddof have no effect and are deprecated' + if bias is not np._NoValue or ddof is not np._NoValue: + # 2015-03-15, 1.10 + warnings.warn(msg, DeprecationWarning, stacklevel=2) + # Get the data + (x, xnotmask, rowvar) = _covhelper(x, y, rowvar, allow_masked) + # Compute the covariance matrix + if not rowvar: + fact = np.dot(xnotmask.T, xnotmask) * 1. + c = (dot(x.T, x.conj(), strict=False) / fact).squeeze() + else: + fact = np.dot(xnotmask, xnotmask.T) * 1. + c = (dot(x, x.T.conj(), strict=False) / fact).squeeze() + # Check whether we have a scalar + try: + diag = ma.diagonal(c) + except ValueError: + return 1 + # + if xnotmask.all(): + _denom = ma.sqrt(ma.multiply.outer(diag, diag)) + else: + _denom = diagflat(diag) + _denom._sharedmask = False # We know return is always a copy + n = x.shape[1 - rowvar] + if rowvar: + for i in range(n - 1): + for j in range(i + 1, n): + _x = mask_cols(vstack((x[i], x[j]))).var(axis=1) + _denom[i, j] = _denom[j, i] = ma.sqrt(ma.multiply.reduce(_x)) + else: + for i in range(n - 1): + for j in range(i + 1, n): + _x = mask_cols( + vstack((x[:, i], x[:, j]))).var(axis=1) + _denom[i, j] = _denom[j, i] = ma.sqrt(ma.multiply.reduce(_x)) + return c / _denom + +#####-------------------------------------------------------------------------- +#---- --- Concatenation helpers --- +#####-------------------------------------------------------------------------- + +class MAxisConcatenator(AxisConcatenator): + """ + Translate slice objects to concatenation along an axis. + + For documentation on usage, see `mr_class`. + + See Also + -------- + mr_class + + """ + concatenate = staticmethod(concatenate) + + @classmethod + def makemat(cls, arr): + # There used to be a view as np.matrix here, but we may eventually + # deprecate that class. In preparation, we use the unmasked version + # to construct the matrix (with copy=False for backwards compatibility + # with the .view) + data = super().makemat(arr.data, copy=False) + return array(data, mask=arr.mask) + + def __getitem__(self, key): + # matrix builder syntax, like 'a, b; c, d' + if isinstance(key, str): + raise MAError("Unavailable for masked array.") + + return super().__getitem__(key) + + +class mr_class(MAxisConcatenator): + """ + Translate slice objects to concatenation along the first axis. + + This is the masked array version of `lib.index_tricks.RClass`. + + See Also + -------- + lib.index_tricks.RClass + + Examples + -------- + >>> np.ma.mr_[np.ma.array([1,2,3]), 0, 0, np.ma.array([4,5,6])] + masked_array(data=[1, 2, 3, ..., 4, 5, 6], + mask=False, + fill_value=999999) + + """ + def __init__(self): + MAxisConcatenator.__init__(self, 0) + +mr_ = mr_class() + +#####-------------------------------------------------------------------------- +#---- Find unmasked data --- +#####-------------------------------------------------------------------------- + +def flatnotmasked_edges(a): + """ + Find the indices of the first and last unmasked values. + + Expects a 1-D `MaskedArray`, returns None if all values are masked. + + Parameters + ---------- + a : array_like + Input 1-D `MaskedArray` + + Returns + ------- + edges : ndarray or None + The indices of first and last non-masked value in the array. + Returns None if all values are masked. + + See Also + -------- + flatnotmasked_contiguous, notmasked_contiguous, notmasked_edges + clump_masked, clump_unmasked + + Notes + ----- + Only accepts 1-D arrays. + + Examples + -------- + >>> a = np.ma.arange(10) + >>> np.ma.flatnotmasked_edges(a) + array([0, 9]) + + >>> mask = (a < 3) | (a > 8) | (a == 5) + >>> a[mask] = np.ma.masked + >>> np.array(a[~a.mask]) + array([3, 4, 6, 7, 8]) + + >>> np.ma.flatnotmasked_edges(a) + array([3, 8]) + + >>> a[:] = np.ma.masked + >>> print(np.ma.flatnotmasked_edges(a)) + None + + """ + m = getmask(a) + if m is nomask or not np.any(m): + return np.array([0, a.size - 1]) + unmasked = np.flatnonzero(~m) + if len(unmasked) > 0: + return unmasked[[0, -1]] + else: + return None + + +def notmasked_edges(a, axis=None): + """ + Find the indices of the first and last unmasked values along an axis. + + If all values are masked, return None. Otherwise, return a list + of two tuples, corresponding to the indices of the first and last + unmasked values respectively. + + Parameters + ---------- + a : array_like + The input array. + axis : int, optional + Axis along which to perform the operation. + If None (default), applies to a flattened version of the array. + + Returns + ------- + edges : ndarray or list + An array of start and end indexes if there are any masked data in + the array. If there are no masked data in the array, `edges` is a + list of the first and last index. + + See Also + -------- + flatnotmasked_contiguous, flatnotmasked_edges, notmasked_contiguous + clump_masked, clump_unmasked + + Examples + -------- + >>> a = np.arange(9).reshape((3, 3)) + >>> m = np.zeros_like(a) + >>> m[1:, 1:] = 1 + + >>> am = np.ma.array(a, mask=m) + >>> np.array(am[~am.mask]) + array([0, 1, 2, 3, 6]) + + >>> np.ma.notmasked_edges(am) + array([0, 6]) + + """ + a = asarray(a) + if axis is None or a.ndim == 1: + return flatnotmasked_edges(a) + m = getmaskarray(a) + idx = array(np.indices(a.shape), mask=np.asarray([m] * a.ndim)) + return [tuple([idx[i].min(axis).compressed() for i in range(a.ndim)]), + tuple([idx[i].max(axis).compressed() for i in range(a.ndim)]), ] + + +def flatnotmasked_contiguous(a): + """ + Find contiguous unmasked data in a masked array along the given axis. + + Parameters + ---------- + a : narray + The input array. + + Returns + ------- + slice_list : list + A sorted sequence of `slice` objects (start index, end index). + + .. versionchanged:: 1.15.0 + Now returns an empty list instead of None for a fully masked array + + See Also + -------- + flatnotmasked_edges, notmasked_contiguous, notmasked_edges + clump_masked, clump_unmasked + + Notes + ----- + Only accepts 2-D arrays at most. + + Examples + -------- + >>> a = np.ma.arange(10) + >>> np.ma.flatnotmasked_contiguous(a) + [slice(0, 10, None)] + + >>> mask = (a < 3) | (a > 8) | (a == 5) + >>> a[mask] = np.ma.masked + >>> np.array(a[~a.mask]) + array([3, 4, 6, 7, 8]) + + >>> np.ma.flatnotmasked_contiguous(a) + [slice(3, 5, None), slice(6, 9, None)] + >>> a[:] = np.ma.masked + >>> np.ma.flatnotmasked_contiguous(a) + [] + + """ + m = getmask(a) + if m is nomask: + return [slice(0, a.size)] + i = 0 + result = [] + for (k, g) in itertools.groupby(m.ravel()): + n = len(list(g)) + if not k: + result.append(slice(i, i + n)) + i += n + return result + +def notmasked_contiguous(a, axis=None): + """ + Find contiguous unmasked data in a masked array along the given axis. + + Parameters + ---------- + a : array_like + The input array. + axis : int, optional + Axis along which to perform the operation. + If None (default), applies to a flattened version of the array, and this + is the same as `flatnotmasked_contiguous`. + + Returns + ------- + endpoints : list + A list of slices (start and end indexes) of unmasked indexes + in the array. + + If the input is 2d and axis is specified, the result is a list of lists. + + See Also + -------- + flatnotmasked_edges, flatnotmasked_contiguous, notmasked_edges + clump_masked, clump_unmasked + + Notes + ----- + Only accepts 2-D arrays at most. + + Examples + -------- + >>> a = np.arange(12).reshape((3, 4)) + >>> mask = np.zeros_like(a) + >>> mask[1:, :-1] = 1; mask[0, 1] = 1; mask[-1, 0] = 0 + >>> ma = np.ma.array(a, mask=mask) + >>> ma + masked_array( + data=[[0, --, 2, 3], + [--, --, --, 7], + [8, --, --, 11]], + mask=[[False, True, False, False], + [ True, True, True, False], + [False, True, True, False]], + fill_value=999999) + >>> np.array(ma[~ma.mask]) + array([ 0, 2, 3, 7, 8, 11]) + + >>> np.ma.notmasked_contiguous(ma) + [slice(0, 1, None), slice(2, 4, None), slice(7, 9, None), slice(11, 12, None)] + + >>> np.ma.notmasked_contiguous(ma, axis=0) + [[slice(0, 1, None), slice(2, 3, None)], [], [slice(0, 1, None)], [slice(0, 3, None)]] + + >>> np.ma.notmasked_contiguous(ma, axis=1) + [[slice(0, 1, None), slice(2, 4, None)], [slice(3, 4, None)], [slice(0, 1, None), slice(3, 4, None)]] + + """ + a = asarray(a) + nd = a.ndim + if nd > 2: + raise NotImplementedError("Currently limited to atmost 2D array.") + if axis is None or nd == 1: + return flatnotmasked_contiguous(a) + # + result = [] + # + other = (axis + 1) % 2 + idx = [0, 0] + idx[axis] = slice(None, None) + # + for i in range(a.shape[other]): + idx[other] = i + result.append(flatnotmasked_contiguous(a[tuple(idx)])) + return result + + +def _ezclump(mask): + """ + Finds the clumps (groups of data with the same values) for a 1D bool array. + + Returns a series of slices. + """ + if mask.ndim > 1: + mask = mask.ravel() + idx = (mask[1:] ^ mask[:-1]).nonzero() + idx = idx[0] + 1 + + if mask[0]: + if len(idx) == 0: + return [slice(0, mask.size)] + + r = [slice(0, idx[0])] + r.extend((slice(left, right) + for left, right in zip(idx[1:-1:2], idx[2::2]))) + else: + if len(idx) == 0: + return [] + + r = [slice(left, right) for left, right in zip(idx[:-1:2], idx[1::2])] + + if mask[-1]: + r.append(slice(idx[-1], mask.size)) + return r + + +def clump_unmasked(a): + """ + Return list of slices corresponding to the unmasked clumps of a 1-D array. + (A "clump" is defined as a contiguous region of the array). + + Parameters + ---------- + a : ndarray + A one-dimensional masked array. + + Returns + ------- + slices : list of slice + The list of slices, one for each continuous region of unmasked + elements in `a`. + + Notes + ----- + .. versionadded:: 1.4.0 + + See Also + -------- + flatnotmasked_edges, flatnotmasked_contiguous, notmasked_edges + notmasked_contiguous, clump_masked + + Examples + -------- + >>> a = np.ma.masked_array(np.arange(10)) + >>> a[[0, 1, 2, 6, 8, 9]] = np.ma.masked + >>> np.ma.clump_unmasked(a) + [slice(3, 6, None), slice(7, 8, None)] + + """ + mask = getattr(a, '_mask', nomask) + if mask is nomask: + return [slice(0, a.size)] + return _ezclump(~mask) + + +def clump_masked(a): + """ + Returns a list of slices corresponding to the masked clumps of a 1-D array. + (A "clump" is defined as a contiguous region of the array). + + Parameters + ---------- + a : ndarray + A one-dimensional masked array. + + Returns + ------- + slices : list of slice + The list of slices, one for each continuous region of masked elements + in `a`. + + Notes + ----- + .. versionadded:: 1.4.0 + + See Also + -------- + flatnotmasked_edges, flatnotmasked_contiguous, notmasked_edges + notmasked_contiguous, clump_unmasked + + Examples + -------- + >>> a = np.ma.masked_array(np.arange(10)) + >>> a[[0, 1, 2, 6, 8, 9]] = np.ma.masked + >>> np.ma.clump_masked(a) + [slice(0, 3, None), slice(6, 7, None), slice(8, 10, None)] + + """ + mask = ma.getmask(a) + if mask is nomask: + return [] + return _ezclump(mask) + + +############################################################################### +# Polynomial fit # +############################################################################### + + +def vander(x, n=None): + """ + Masked values in the input array result in rows of zeros. + + """ + _vander = np.vander(x, n) + m = getmask(x) + if m is not nomask: + _vander[m] = 0 + return _vander + +vander.__doc__ = ma.doc_note(np.vander.__doc__, vander.__doc__) + + +def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): + """ + Any masked values in x is propagated in y, and vice-versa. + + """ + x = asarray(x) + y = asarray(y) + + m = getmask(x) + if y.ndim == 1: + m = mask_or(m, getmask(y)) + elif y.ndim == 2: + my = getmask(mask_rows(y)) + if my is not nomask: + m = mask_or(m, my[:, 0]) + else: + raise TypeError("Expected a 1D or 2D array for y!") + + if w is not None: + w = asarray(w) + if w.ndim != 1: + raise TypeError("expected a 1-d array for weights") + if w.shape[0] != y.shape[0]: + raise TypeError("expected w and y to have the same length") + m = mask_or(m, getmask(w)) + + if m is not nomask: + not_m = ~m + if w is not None: + w = w[not_m] + return np.polyfit(x[not_m], y[not_m], deg, rcond, full, w, cov) + else: + return np.polyfit(x, y, deg, rcond, full, w, cov) + +polyfit.__doc__ = ma.doc_note(np.polyfit.__doc__, polyfit.__doc__) diff --git a/.local/lib/python3.8/site-packages/numpy/ma/extras.pyi b/.local/lib/python3.8/site-packages/numpy/ma/extras.pyi new file mode 100644 index 0000000..e58e43b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/extras.pyi @@ -0,0 +1,84 @@ +from typing import Any, List +from numpy.lib.index_tricks import AxisConcatenator + +from numpy.ma.core import ( + dot as dot, + mask_rowcols as mask_rowcols, +) + +__all__: List[str] + +def count_masked(arr, axis=...): ... +def masked_all(shape, dtype = ...): ... +def masked_all_like(arr): ... + +class _fromnxfunction: + __name__: Any + __doc__: Any + def __init__(self, funcname): ... + def getdoc(self): ... + def __call__(self, *args, **params): ... + +class _fromnxfunction_single(_fromnxfunction): + def __call__(self, x, *args, **params): ... + +class _fromnxfunction_seq(_fromnxfunction): + def __call__(self, x, *args, **params): ... + +class _fromnxfunction_allargs(_fromnxfunction): + def __call__(self, *args, **params): ... + +atleast_1d: _fromnxfunction_allargs +atleast_2d: _fromnxfunction_allargs +atleast_3d: _fromnxfunction_allargs + +vstack: _fromnxfunction_seq +row_stack: _fromnxfunction_seq +hstack: _fromnxfunction_seq +column_stack: _fromnxfunction_seq +dstack: _fromnxfunction_seq +stack: _fromnxfunction_seq + +hsplit: _fromnxfunction_single +diagflat: _fromnxfunction_single + +def apply_along_axis(func1d, axis, arr, *args, **kwargs): ... +def apply_over_axes(func, a, axes): ... +def average(a, axis=..., weights=..., returned=...): ... +def median(a, axis=..., out=..., overwrite_input=..., keepdims=...): ... +def compress_nd(x, axis=...): ... +def compress_rowcols(x, axis=...): ... +def compress_rows(a): ... +def compress_cols(a): ... +def mask_rows(a, axis = ...): ... +def mask_cols(a, axis = ...): ... +def ediff1d(arr, to_end=..., to_begin=...): ... +def unique(ar1, return_index=..., return_inverse=...): ... +def intersect1d(ar1, ar2, assume_unique=...): ... +def setxor1d(ar1, ar2, assume_unique=...): ... +def in1d(ar1, ar2, assume_unique=..., invert=...): ... +def isin(element, test_elements, assume_unique=..., invert=...): ... +def union1d(ar1, ar2): ... +def setdiff1d(ar1, ar2, assume_unique=...): ... +def cov(x, y=..., rowvar=..., bias=..., allow_masked=..., ddof=...): ... +def corrcoef(x, y=..., rowvar=..., bias = ..., allow_masked=..., ddof = ...): ... + +class MAxisConcatenator(AxisConcatenator): + concatenate: Any + @classmethod + def makemat(cls, arr): ... + def __getitem__(self, key): ... + +class mr_class(MAxisConcatenator): + def __init__(self): ... + +mr_: mr_class + +def flatnotmasked_edges(a): ... +def notmasked_edges(a, axis=...): ... +def flatnotmasked_contiguous(a): ... +def notmasked_contiguous(a, axis=...): ... +def clump_unmasked(a): ... +def clump_masked(a): ... +def vander(x, n=...): ... +def polyfit(x, y, deg, rcond=..., full=..., w=..., cov=...): ... diff --git a/.local/lib/python3.8/site-packages/numpy/ma/mrecords.py b/.local/lib/python3.8/site-packages/numpy/ma/mrecords.py new file mode 100644 index 0000000..1e8103b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/mrecords.py @@ -0,0 +1,783 @@ +""":mod:`numpy.ma..mrecords` + +Defines the equivalent of :class:`numpy.recarrays` for masked arrays, +where fields can be accessed as attributes. +Note that :class:`numpy.ma.MaskedArray` already supports structured datatypes +and the masking of individual fields. + +.. moduleauthor:: Pierre Gerard-Marchant + +""" +# We should make sure that no field is called '_mask','mask','_fieldmask', +# or whatever restricted keywords. An idea would be to no bother in the +# first place, and then rename the invalid fields with a trailing +# underscore. Maybe we could just overload the parser function ? + +from numpy.ma import ( + MAError, MaskedArray, masked, nomask, masked_array, getdata, + getmaskarray, filled +) +import numpy.ma as ma +import warnings + +import numpy as np +from numpy import ( + bool_, dtype, ndarray, recarray, array as narray +) +from numpy.core.records import ( + fromarrays as recfromarrays, fromrecords as recfromrecords +) + +_byteorderconv = np.core.records._byteorderconv + + +_check_fill_value = ma.core._check_fill_value + + +__all__ = [ + 'MaskedRecords', 'mrecarray', 'fromarrays', 'fromrecords', + 'fromtextfile', 'addfield', +] + +reserved_fields = ['_data', '_mask', '_fieldmask', 'dtype'] + + +def _checknames(descr, names=None): + """ + Checks that field names ``descr`` are not reserved keywords. + + If this is the case, a default 'f%i' is substituted. If the argument + `names` is not None, updates the field names to valid names. + + """ + ndescr = len(descr) + default_names = ['f%i' % i for i in range(ndescr)] + if names is None: + new_names = default_names + else: + if isinstance(names, (tuple, list)): + new_names = names + elif isinstance(names, str): + new_names = names.split(',') + else: + raise NameError(f'illegal input names {names!r}') + nnames = len(new_names) + if nnames < ndescr: + new_names += default_names[nnames:] + ndescr = [] + for (n, d, t) in zip(new_names, default_names, descr.descr): + if n in reserved_fields: + if t[0] in reserved_fields: + ndescr.append((d, t[1])) + else: + ndescr.append(t) + else: + ndescr.append((n, t[1])) + return np.dtype(ndescr) + + +def _get_fieldmask(self): + mdescr = [(n, '|b1') for n in self.dtype.names] + fdmask = np.empty(self.shape, dtype=mdescr) + fdmask.flat = tuple([False] * len(mdescr)) + return fdmask + + +class MaskedRecords(MaskedArray): + """ + + Attributes + ---------- + _data : recarray + Underlying data, as a record array. + _mask : boolean array + Mask of the records. A record is masked when all its fields are + masked. + _fieldmask : boolean recarray + Record array of booleans, setting the mask of each individual field + of each record. + _fill_value : record + Filling values for each field. + + """ + + def __new__(cls, shape, dtype=None, buf=None, offset=0, strides=None, + formats=None, names=None, titles=None, + byteorder=None, aligned=False, + mask=nomask, hard_mask=False, fill_value=None, keep_mask=True, + copy=False, + **options): + + self = recarray.__new__(cls, shape, dtype=dtype, buf=buf, offset=offset, + strides=strides, formats=formats, names=names, + titles=titles, byteorder=byteorder, + aligned=aligned,) + + mdtype = ma.make_mask_descr(self.dtype) + if mask is nomask or not np.size(mask): + if not keep_mask: + self._mask = tuple([False] * len(mdtype)) + else: + mask = np.array(mask, copy=copy) + if mask.shape != self.shape: + (nd, nm) = (self.size, mask.size) + if nm == 1: + mask = np.resize(mask, self.shape) + elif nm == nd: + mask = np.reshape(mask, self.shape) + else: + msg = "Mask and data not compatible: data size is %i, " + \ + "mask size is %i." + raise MAError(msg % (nd, nm)) + if not keep_mask: + self.__setmask__(mask) + self._sharedmask = True + else: + if mask.dtype == mdtype: + _mask = mask + else: + _mask = np.array([tuple([m] * len(mdtype)) for m in mask], + dtype=mdtype) + self._mask = _mask + return self + + def __array_finalize__(self, obj): + # Make sure we have a _fieldmask by default + _mask = getattr(obj, '_mask', None) + if _mask is None: + objmask = getattr(obj, '_mask', nomask) + _dtype = ndarray.__getattribute__(self, 'dtype') + if objmask is nomask: + _mask = ma.make_mask_none(self.shape, dtype=_dtype) + else: + mdescr = ma.make_mask_descr(_dtype) + _mask = narray([tuple([m] * len(mdescr)) for m in objmask], + dtype=mdescr).view(recarray) + # Update some of the attributes + _dict = self.__dict__ + _dict.update(_mask=_mask) + self._update_from(obj) + if _dict['_baseclass'] == ndarray: + _dict['_baseclass'] = recarray + return + + @property + def _data(self): + """ + Returns the data as a recarray. + + """ + return ndarray.view(self, recarray) + + @property + def _fieldmask(self): + """ + Alias to mask. + + """ + return self._mask + + def __len__(self): + """ + Returns the length + + """ + # We have more than one record + if self.ndim: + return len(self._data) + # We have only one record: return the nb of fields + return len(self.dtype) + + def __getattribute__(self, attr): + try: + return object.__getattribute__(self, attr) + except AttributeError: + # attr must be a fieldname + pass + fielddict = ndarray.__getattribute__(self, 'dtype').fields + try: + res = fielddict[attr][:2] + except (TypeError, KeyError) as e: + raise AttributeError( + f'record array has no attribute {attr}') from e + # So far, so good + _localdict = ndarray.__getattribute__(self, '__dict__') + _data = ndarray.view(self, _localdict['_baseclass']) + obj = _data.getfield(*res) + if obj.dtype.names is not None: + raise NotImplementedError("MaskedRecords is currently limited to" + "simple records.") + # Get some special attributes + # Reset the object's mask + hasmasked = False + _mask = _localdict.get('_mask', None) + if _mask is not None: + try: + _mask = _mask[attr] + except IndexError: + # Couldn't find a mask: use the default (nomask) + pass + tp_len = len(_mask.dtype) + hasmasked = _mask.view((bool, ((tp_len,) if tp_len else ()))).any() + if (obj.shape or hasmasked): + obj = obj.view(MaskedArray) + obj._baseclass = ndarray + obj._isfield = True + obj._mask = _mask + # Reset the field values + _fill_value = _localdict.get('_fill_value', None) + if _fill_value is not None: + try: + obj._fill_value = _fill_value[attr] + except ValueError: + obj._fill_value = None + else: + obj = obj.item() + return obj + + def __setattr__(self, attr, val): + """ + Sets the attribute attr to the value val. + + """ + # Should we call __setmask__ first ? + if attr in ['mask', 'fieldmask']: + self.__setmask__(val) + return + # Create a shortcut (so that we don't have to call getattr all the time) + _localdict = object.__getattribute__(self, '__dict__') + # Check whether we're creating a new field + newattr = attr not in _localdict + try: + # Is attr a generic attribute ? + ret = object.__setattr__(self, attr, val) + except Exception: + # Not a generic attribute: exit if it's not a valid field + fielddict = ndarray.__getattribute__(self, 'dtype').fields or {} + optinfo = ndarray.__getattribute__(self, '_optinfo') or {} + if not (attr in fielddict or attr in optinfo): + raise + else: + # Get the list of names + fielddict = ndarray.__getattribute__(self, 'dtype').fields or {} + # Check the attribute + if attr not in fielddict: + return ret + if newattr: + # We just added this one or this setattr worked on an + # internal attribute. + try: + object.__delattr__(self, attr) + except Exception: + return ret + # Let's try to set the field + try: + res = fielddict[attr][:2] + except (TypeError, KeyError) as e: + raise AttributeError( + f'record array has no attribute {attr}') from e + + if val is masked: + _fill_value = _localdict['_fill_value'] + if _fill_value is not None: + dval = _localdict['_fill_value'][attr] + else: + dval = val + mval = True + else: + dval = filled(val) + mval = getmaskarray(val) + obj = ndarray.__getattribute__(self, '_data').setfield(dval, *res) + _localdict['_mask'].__setitem__(attr, mval) + return obj + + def __getitem__(self, indx): + """ + Returns all the fields sharing the same fieldname base. + + The fieldname base is either `_data` or `_mask`. + + """ + _localdict = self.__dict__ + _mask = ndarray.__getattribute__(self, '_mask') + _data = ndarray.view(self, _localdict['_baseclass']) + # We want a field + if isinstance(indx, str): + # Make sure _sharedmask is True to propagate back to _fieldmask + # Don't use _set_mask, there are some copies being made that + # break propagation Don't force the mask to nomask, that wreaks + # easy masking + obj = _data[indx].view(MaskedArray) + obj._mask = _mask[indx] + obj._sharedmask = True + fval = _localdict['_fill_value'] + if fval is not None: + obj._fill_value = fval[indx] + # Force to masked if the mask is True + if not obj.ndim and obj._mask: + return masked + return obj + # We want some elements. + # First, the data. + obj = np.array(_data[indx], copy=False).view(mrecarray) + obj._mask = np.array(_mask[indx], copy=False).view(recarray) + return obj + + def __setitem__(self, indx, value): + """ + Sets the given record to value. + + """ + MaskedArray.__setitem__(self, indx, value) + if isinstance(indx, str): + self._mask[indx] = ma.getmaskarray(value) + + def __str__(self): + """ + Calculates the string representation. + + """ + if self.size > 1: + mstr = [f"({','.join([str(i) for i in s])})" + for s in zip(*[getattr(self, f) for f in self.dtype.names])] + return f"[{', '.join(mstr)}]" + else: + mstr = [f"{','.join([str(i) for i in s])}" + for s in zip([getattr(self, f) for f in self.dtype.names])] + return f"({', '.join(mstr)})" + + def __repr__(self): + """ + Calculates the repr representation. + + """ + _names = self.dtype.names + fmt = "%%%is : %%s" % (max([len(n) for n in _names]) + 4,) + reprstr = [fmt % (f, getattr(self, f)) for f in self.dtype.names] + reprstr.insert(0, 'masked_records(') + reprstr.extend([fmt % (' fill_value', self.fill_value), + ' )']) + return str("\n".join(reprstr)) + + def view(self, dtype=None, type=None): + """ + Returns a view of the mrecarray. + + """ + # OK, basic copy-paste from MaskedArray.view. + if dtype is None: + if type is None: + output = ndarray.view(self) + else: + output = ndarray.view(self, type) + # Here again. + elif type is None: + try: + if issubclass(dtype, ndarray): + output = ndarray.view(self, dtype) + else: + output = ndarray.view(self, dtype) + # OK, there's the change + except TypeError: + dtype = np.dtype(dtype) + # we need to revert to MaskedArray, but keeping the possibility + # of subclasses (eg, TimeSeriesRecords), so we'll force a type + # set to the first parent + if dtype.fields is None: + basetype = self.__class__.__bases__[0] + output = self.__array__().view(dtype, basetype) + output._update_from(self) + else: + output = ndarray.view(self, dtype) + output._fill_value = None + else: + output = ndarray.view(self, dtype, type) + # Update the mask, just like in MaskedArray.view + if (getattr(output, '_mask', nomask) is not nomask): + mdtype = ma.make_mask_descr(output.dtype) + output._mask = self._mask.view(mdtype, ndarray) + output._mask.shape = output.shape + return output + + def harden_mask(self): + """ + Forces the mask to hard. + + """ + self._hardmask = True + + def soften_mask(self): + """ + Forces the mask to soft + + """ + self._hardmask = False + + def copy(self): + """ + Returns a copy of the masked record. + + """ + copied = self._data.copy().view(type(self)) + copied._mask = self._mask.copy() + return copied + + def tolist(self, fill_value=None): + """ + Return the data portion of the array as a list. + + Data items are converted to the nearest compatible Python type. + Masked values are converted to fill_value. If fill_value is None, + the corresponding entries in the output list will be ``None``. + + """ + if fill_value is not None: + return self.filled(fill_value).tolist() + result = narray(self.filled().tolist(), dtype=object) + mask = narray(self._mask.tolist()) + result[mask] = None + return result.tolist() + + def __getstate__(self): + """Return the internal state of the masked array. + + This is for pickling. + + """ + state = (1, + self.shape, + self.dtype, + self.flags.fnc, + self._data.tobytes(), + self._mask.tobytes(), + self._fill_value, + ) + return state + + def __setstate__(self, state): + """ + Restore the internal state of the masked array. + + This is for pickling. ``state`` is typically the output of the + ``__getstate__`` output, and is a 5-tuple: + + - class name + - a tuple giving the shape of the data + - a typecode for the data + - a binary string for the data + - a binary string for the mask. + + """ + (ver, shp, typ, isf, raw, msk, flv) = state + ndarray.__setstate__(self, (shp, typ, isf, raw)) + mdtype = dtype([(k, bool_) for (k, _) in self.dtype.descr]) + self.__dict__['_mask'].__setstate__((shp, mdtype, isf, msk)) + self.fill_value = flv + + def __reduce__(self): + """ + Return a 3-tuple for pickling a MaskedArray. + + """ + return (_mrreconstruct, + (self.__class__, self._baseclass, (0,), 'b',), + self.__getstate__()) + + +def _mrreconstruct(subtype, baseclass, baseshape, basetype,): + """ + Build a new MaskedArray from the information stored in a pickle. + + """ + _data = ndarray.__new__(baseclass, baseshape, basetype).view(subtype) + _mask = ndarray.__new__(ndarray, baseshape, 'b1') + return subtype.__new__(subtype, _data, mask=_mask, dtype=basetype,) + +mrecarray = MaskedRecords + + +############################################################################### +# Constructors # +############################################################################### + + +def fromarrays(arraylist, dtype=None, shape=None, formats=None, + names=None, titles=None, aligned=False, byteorder=None, + fill_value=None): + """ + Creates a mrecarray from a (flat) list of masked arrays. + + Parameters + ---------- + arraylist : sequence + A list of (masked) arrays. Each element of the sequence is first converted + to a masked array if needed. If a 2D array is passed as argument, it is + processed line by line + dtype : {None, dtype}, optional + Data type descriptor. + shape : {None, integer}, optional + Number of records. If None, shape is defined from the shape of the + first array in the list. + formats : {None, sequence}, optional + Sequence of formats for each individual field. If None, the formats will + be autodetected by inspecting the fields and selecting the highest dtype + possible. + names : {None, sequence}, optional + Sequence of the names of each field. + fill_value : {None, sequence}, optional + Sequence of data to be used as filling values. + + Notes + ----- + Lists of tuples should be preferred over lists of lists for faster processing. + + """ + datalist = [getdata(x) for x in arraylist] + masklist = [np.atleast_1d(getmaskarray(x)) for x in arraylist] + _array = recfromarrays(datalist, + dtype=dtype, shape=shape, formats=formats, + names=names, titles=titles, aligned=aligned, + byteorder=byteorder).view(mrecarray) + _array._mask.flat = list(zip(*masklist)) + if fill_value is not None: + _array.fill_value = fill_value + return _array + + +def fromrecords(reclist, dtype=None, shape=None, formats=None, names=None, + titles=None, aligned=False, byteorder=None, + fill_value=None, mask=nomask): + """ + Creates a MaskedRecords from a list of records. + + Parameters + ---------- + reclist : sequence + A list of records. Each element of the sequence is first converted + to a masked array if needed. If a 2D array is passed as argument, it is + processed line by line + dtype : {None, dtype}, optional + Data type descriptor. + shape : {None,int}, optional + Number of records. If None, ``shape`` is defined from the shape of the + first array in the list. + formats : {None, sequence}, optional + Sequence of formats for each individual field. If None, the formats will + be autodetected by inspecting the fields and selecting the highest dtype + possible. + names : {None, sequence}, optional + Sequence of the names of each field. + fill_value : {None, sequence}, optional + Sequence of data to be used as filling values. + mask : {nomask, sequence}, optional. + External mask to apply on the data. + + Notes + ----- + Lists of tuples should be preferred over lists of lists for faster processing. + + """ + # Grab the initial _fieldmask, if needed: + _mask = getattr(reclist, '_mask', None) + # Get the list of records. + if isinstance(reclist, ndarray): + # Make sure we don't have some hidden mask + if isinstance(reclist, MaskedArray): + reclist = reclist.filled().view(ndarray) + # Grab the initial dtype, just in case + if dtype is None: + dtype = reclist.dtype + reclist = reclist.tolist() + mrec = recfromrecords(reclist, dtype=dtype, shape=shape, formats=formats, + names=names, titles=titles, + aligned=aligned, byteorder=byteorder).view(mrecarray) + # Set the fill_value if needed + if fill_value is not None: + mrec.fill_value = fill_value + # Now, let's deal w/ the mask + if mask is not nomask: + mask = np.array(mask, copy=False) + maskrecordlength = len(mask.dtype) + if maskrecordlength: + mrec._mask.flat = mask + elif mask.ndim == 2: + mrec._mask.flat = [tuple(m) for m in mask] + else: + mrec.__setmask__(mask) + if _mask is not None: + mrec._mask[:] = _mask + return mrec + + +def _guessvartypes(arr): + """ + Tries to guess the dtypes of the str_ ndarray `arr`. + + Guesses by testing element-wise conversion. Returns a list of dtypes. + The array is first converted to ndarray. If the array is 2D, the test + is performed on the first line. An exception is raised if the file is + 3D or more. + + """ + vartypes = [] + arr = np.asarray(arr) + if arr.ndim == 2: + arr = arr[0] + elif arr.ndim > 2: + raise ValueError("The array should be 2D at most!") + # Start the conversion loop. + for f in arr: + try: + int(f) + except (ValueError, TypeError): + try: + float(f) + except (ValueError, TypeError): + try: + complex(f) + except (ValueError, TypeError): + vartypes.append(arr.dtype) + else: + vartypes.append(np.dtype(complex)) + else: + vartypes.append(np.dtype(float)) + else: + vartypes.append(np.dtype(int)) + return vartypes + + +def openfile(fname): + """ + Opens the file handle of file `fname`. + + """ + # A file handle + if hasattr(fname, 'readline'): + return fname + # Try to open the file and guess its type + try: + f = open(fname) + except FileNotFoundError as e: + raise FileNotFoundError(f"No such file: '{fname}'") from e + if f.readline()[:2] != "\\x": + f.seek(0, 0) + return f + f.close() + raise NotImplementedError("Wow, binary file") + + +def fromtextfile(fname, delimiter=None, commentchar='#', missingchar='', + varnames=None, vartypes=None, + *, delimitor=np._NoValue): # backwards compatibility + """ + Creates a mrecarray from data stored in the file `filename`. + + Parameters + ---------- + fname : {file name/handle} + Handle of an opened file. + delimiter : {None, string}, optional + Alphanumeric character used to separate columns in the file. + If None, any (group of) white spacestring(s) will be used. + commentchar : {'#', string}, optional + Alphanumeric character used to mark the start of a comment. + missingchar : {'', string}, optional + String indicating missing data, and used to create the masks. + varnames : {None, sequence}, optional + Sequence of the variable names. If None, a list will be created from + the first non empty line of the file. + vartypes : {None, sequence}, optional + Sequence of the variables dtypes. If None, it will be estimated from + the first non-commented line. + + + Ultra simple: the varnames are in the header, one line""" + if delimitor is not np._NoValue: + if delimiter is not None: + raise TypeError("fromtextfile() got multiple values for argument " + "'delimiter'") + # NumPy 1.22.0, 2021-09-23 + warnings.warn("The 'delimitor' keyword argument of " + "numpy.ma.mrecords.fromtextfile() is deprecated " + "since NumPy 1.22.0, use 'delimiter' instead.", + DeprecationWarning, stacklevel=2) + delimiter = delimitor + + # Try to open the file. + ftext = openfile(fname) + + # Get the first non-empty line as the varnames + while True: + line = ftext.readline() + firstline = line[:line.find(commentchar)].strip() + _varnames = firstline.split(delimiter) + if len(_varnames) > 1: + break + if varnames is None: + varnames = _varnames + + # Get the data. + _variables = masked_array([line.strip().split(delimiter) for line in ftext + if line[0] != commentchar and len(line) > 1]) + (_, nfields) = _variables.shape + ftext.close() + + # Try to guess the dtype. + if vartypes is None: + vartypes = _guessvartypes(_variables[0]) + else: + vartypes = [np.dtype(v) for v in vartypes] + if len(vartypes) != nfields: + msg = "Attempting to %i dtypes for %i fields!" + msg += " Reverting to default." + warnings.warn(msg % (len(vartypes), nfields), stacklevel=2) + vartypes = _guessvartypes(_variables[0]) + + # Construct the descriptor. + mdescr = [(n, f) for (n, f) in zip(varnames, vartypes)] + mfillv = [ma.default_fill_value(f) for f in vartypes] + + # Get the data and the mask. + # We just need a list of masked_arrays. It's easier to create it like that: + _mask = (_variables.T == missingchar) + _datalist = [masked_array(a, mask=m, dtype=t, fill_value=f) + for (a, m, t, f) in zip(_variables.T, _mask, vartypes, mfillv)] + + return fromarrays(_datalist, dtype=mdescr) + + +def addfield(mrecord, newfield, newfieldname=None): + """Adds a new field to the masked record array + + Uses `newfield` as data and `newfieldname` as name. If `newfieldname` + is None, the new field name is set to 'fi', where `i` is the number of + existing fields. + + """ + _data = mrecord._data + _mask = mrecord._mask + if newfieldname is None or newfieldname in reserved_fields: + newfieldname = 'f%i' % len(_data.dtype) + newfield = ma.array(newfield) + # Get the new data. + # Create a new empty recarray + newdtype = np.dtype(_data.dtype.descr + [(newfieldname, newfield.dtype)]) + newdata = recarray(_data.shape, newdtype) + # Add the existing field + [newdata.setfield(_data.getfield(*f), *f) + for f in _data.dtype.fields.values()] + # Add the new field + newdata.setfield(newfield._data, *newdata.dtype.fields[newfieldname]) + newdata = newdata.view(MaskedRecords) + # Get the new mask + # Create a new empty recarray + newmdtype = np.dtype([(n, bool_) for n in newdtype.names]) + newmask = recarray(_data.shape, newmdtype) + # Add the old masks + [newmask.setfield(_mask.getfield(*f), *f) + for f in _mask.dtype.fields.values()] + # Add the mask of the new field + newmask.setfield(getmaskarray(newfield), + *newmask.dtype.fields[newfieldname]) + newdata._mask = newmask + return newdata diff --git a/.local/lib/python3.8/site-packages/numpy/ma/mrecords.pyi b/.local/lib/python3.8/site-packages/numpy/ma/mrecords.pyi new file mode 100644 index 0000000..7bd8678 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/mrecords.pyi @@ -0,0 +1,90 @@ +from typing import List, Any, TypeVar + +from numpy import dtype +from numpy.ma import MaskedArray + +__all__: List[str] + +# TODO: Set the `bound` to something more suitable once we +# have proper shape support +_ShapeType = TypeVar("_ShapeType", bound=Any) +_DType_co = TypeVar("_DType_co", bound=dtype[Any], covariant=True) + +class MaskedRecords(MaskedArray[_ShapeType, _DType_co]): + def __new__( + cls, + shape, + dtype=..., + buf=..., + offset=..., + strides=..., + formats=..., + names=..., + titles=..., + byteorder=..., + aligned=..., + mask=..., + hard_mask=..., + fill_value=..., + keep_mask=..., + copy=..., + **options, + ): ... + _mask: Any + _fill_value: Any + @property + def _data(self): ... + @property + def _fieldmask(self): ... + def __array_finalize__(self, obj): ... + def __len__(self): ... + def __getattribute__(self, attr): ... + def __setattr__(self, attr, val): ... + def __getitem__(self, indx): ... + def __setitem__(self, indx, value): ... + def view(self, dtype=..., type=...): ... + def harden_mask(self): ... + def soften_mask(self): ... + def copy(self): ... + def tolist(self, fill_value=...): ... + def __reduce__(self): ... + +mrecarray = MaskedRecords + +def fromarrays( + arraylist, + dtype=..., + shape=..., + formats=..., + names=..., + titles=..., + aligned=..., + byteorder=..., + fill_value=..., +): ... + +def fromrecords( + reclist, + dtype=..., + shape=..., + formats=..., + names=..., + titles=..., + aligned=..., + byteorder=..., + fill_value=..., + mask=..., +): ... + +def fromtextfile( + fname, + delimiter=..., + commentchar=..., + missingchar=..., + varnames=..., + vartypes=..., + # NOTE: deprecated: NumPy 1.22.0, 2021-09-23 + # delimitor=..., +): ... + +def addfield(mrecord, newfield, newfieldname=...): ... diff --git a/.local/lib/python3.8/site-packages/numpy/ma/setup.py b/.local/lib/python3.8/site-packages/numpy/ma/setup.py new file mode 100644 index 0000000..018d38c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/setup.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('ma', parent_package, top_path) + config.add_subpackage('tests') + config.add_data_files('*.pyi') + return config + +if __name__ == "__main__": + from numpy.distutils.core import setup + config = configuration(top_path='').todict() + setup(**config) diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/ma/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..3e3b6f7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_core.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_core.cpython-38.pyc new file mode 100644 index 0000000..945528d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_core.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_deprecations.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_deprecations.cpython-38.pyc new file mode 100644 index 0000000..fc9b7b6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_deprecations.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_extras.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_extras.cpython-38.pyc new file mode 100644 index 0000000..e1c01e4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_extras.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_mrecords.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_mrecords.cpython-38.pyc new file mode 100644 index 0000000..6e87f33 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_mrecords.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_old_ma.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_old_ma.cpython-38.pyc new file mode 100644 index 0000000..4b4994e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_old_ma.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_regression.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_regression.cpython-38.pyc new file mode 100644 index 0000000..5b8d164 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_regression.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_subclassing.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_subclassing.cpython-38.pyc new file mode 100644 index 0000000..b7b2c5d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/ma/tests/__pycache__/test_subclassing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/test_core.py b/.local/lib/python3.8/site-packages/numpy/ma/tests/test_core.py new file mode 100644 index 0000000..c8f7f42 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/tests/test_core.py @@ -0,0 +1,5448 @@ +# pylint: disable-msg=W0400,W0511,W0611,W0612,W0614,R0201,E1102 +"""Tests suite for MaskedArray & subclassing. + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +""" +__author__ = "Pierre GF Gerard-Marchant" + +import sys +import warnings +import operator +import itertools +import textwrap +import pytest + +from functools import reduce + + +import numpy as np +import numpy.ma.core +import numpy.core.fromnumeric as fromnumeric +import numpy.core.umath as umath +from numpy.testing import ( + assert_raises, assert_warns, suppress_warnings + ) +from numpy import ndarray +from numpy.compat import asbytes +from numpy.ma.testutils import ( + assert_, assert_array_equal, assert_equal, assert_almost_equal, + assert_equal_records, fail_if_equal, assert_not_equal, + assert_mask_equal + ) +from numpy.ma.core import ( + MAError, MaskError, MaskType, MaskedArray, abs, absolute, add, all, + allclose, allequal, alltrue, angle, anom, arange, arccos, arccosh, arctan2, + arcsin, arctan, argsort, array, asarray, choose, concatenate, + conjugate, cos, cosh, count, default_fill_value, diag, divide, doc_note, + empty, empty_like, equal, exp, flatten_mask, filled, fix_invalid, + flatten_structured_array, fromflex, getmask, getmaskarray, greater, + greater_equal, identity, inner, isMaskedArray, less, less_equal, log, + log10, make_mask, make_mask_descr, mask_or, masked, masked_array, + masked_equal, masked_greater, masked_greater_equal, masked_inside, + masked_less, masked_less_equal, masked_not_equal, masked_outside, + masked_print_option, masked_values, masked_where, max, maximum, + maximum_fill_value, min, minimum, minimum_fill_value, mod, multiply, + mvoid, nomask, not_equal, ones, ones_like, outer, power, product, put, + putmask, ravel, repeat, reshape, resize, shape, sin, sinh, sometrue, sort, + sqrt, subtract, sum, take, tan, tanh, transpose, where, zeros, zeros_like, + ) +from numpy.compat import pickle + +pi = np.pi + + +suppress_copy_mask_on_assignment = suppress_warnings() +suppress_copy_mask_on_assignment.filter( + numpy.ma.core.MaskedArrayFutureWarning, + "setting an item on a masked array which has a shared mask will not copy") + + +# For parametrized numeric testing +num_dts = [np.dtype(dt_) for dt_ in '?bhilqBHILQefdgFD'] +num_ids = [dt_.char for dt_ in num_dts] + + +class TestMaskedArray: + # Base test class for MaskedArrays. + + def setup(self): + # Base data definition. + x = np.array([1., 1., 1., -2., pi/2.0, 4., 5., -10., 10., 1., 2., 3.]) + y = np.array([5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.]) + a10 = 10. + m1 = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + m2 = [0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1] + xm = masked_array(x, mask=m1) + ym = masked_array(y, mask=m2) + z = np.array([-.5, 0., .5, .8]) + zm = masked_array(z, mask=[0, 1, 0, 0]) + xf = np.where(m1, 1e+20, x) + xm.set_fill_value(1e+20) + self.d = (x, y, a10, m1, m2, xm, ym, z, zm, xf) + + def test_basicattributes(self): + # Tests some basic array attributes. + a = array([1, 3, 2]) + b = array([1, 3, 2], mask=[1, 0, 1]) + assert_equal(a.ndim, 1) + assert_equal(b.ndim, 1) + assert_equal(a.size, 3) + assert_equal(b.size, 3) + assert_equal(a.shape, (3,)) + assert_equal(b.shape, (3,)) + + def test_basic0d(self): + # Checks masking a scalar + x = masked_array(0) + assert_equal(str(x), '0') + x = masked_array(0, mask=True) + assert_equal(str(x), str(masked_print_option)) + x = masked_array(0, mask=False) + assert_equal(str(x), '0') + x = array(0, mask=1) + assert_(x.filled().dtype is x._data.dtype) + + def test_basic1d(self): + # Test of basic array creation and properties in 1 dimension. + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + assert_(not isMaskedArray(x)) + assert_(isMaskedArray(xm)) + assert_((xm - ym).filled(0).any()) + fail_if_equal(xm.mask.astype(int), ym.mask.astype(int)) + s = x.shape + assert_equal(np.shape(xm), s) + assert_equal(xm.shape, s) + assert_equal(xm.dtype, x.dtype) + assert_equal(zm.dtype, z.dtype) + assert_equal(xm.size, reduce(lambda x, y:x * y, s)) + assert_equal(count(xm), len(m1) - reduce(lambda x, y:x + y, m1)) + assert_array_equal(xm, xf) + assert_array_equal(filled(xm, 1.e20), xf) + assert_array_equal(x, xm) + + def test_basic2d(self): + # Test of basic array creation and properties in 2 dimensions. + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + for s in [(4, 3), (6, 2)]: + x.shape = s + y.shape = s + xm.shape = s + ym.shape = s + xf.shape = s + + assert_(not isMaskedArray(x)) + assert_(isMaskedArray(xm)) + assert_equal(shape(xm), s) + assert_equal(xm.shape, s) + assert_equal(xm.size, reduce(lambda x, y:x * y, s)) + assert_equal(count(xm), len(m1) - reduce(lambda x, y:x + y, m1)) + assert_equal(xm, xf) + assert_equal(filled(xm, 1.e20), xf) + assert_equal(x, xm) + + def test_concatenate_basic(self): + # Tests concatenations. + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + # basic concatenation + assert_equal(np.concatenate((x, y)), concatenate((xm, ym))) + assert_equal(np.concatenate((x, y)), concatenate((x, y))) + assert_equal(np.concatenate((x, y)), concatenate((xm, y))) + assert_equal(np.concatenate((x, y, x)), concatenate((x, ym, x))) + + def test_concatenate_alongaxis(self): + # Tests concatenations. + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + # Concatenation along an axis + s = (3, 4) + x.shape = y.shape = xm.shape = ym.shape = s + assert_equal(xm.mask, np.reshape(m1, s)) + assert_equal(ym.mask, np.reshape(m2, s)) + xmym = concatenate((xm, ym), 1) + assert_equal(np.concatenate((x, y), 1), xmym) + assert_equal(np.concatenate((xm.mask, ym.mask), 1), xmym._mask) + + x = zeros(2) + y = array(ones(2), mask=[False, True]) + z = concatenate((x, y)) + assert_array_equal(z, [0, 0, 1, 1]) + assert_array_equal(z.mask, [False, False, False, True]) + z = concatenate((y, x)) + assert_array_equal(z, [1, 1, 0, 0]) + assert_array_equal(z.mask, [False, True, False, False]) + + def test_concatenate_flexible(self): + # Tests the concatenation on flexible arrays. + data = masked_array(list(zip(np.random.rand(10), + np.arange(10))), + dtype=[('a', float), ('b', int)]) + + test = concatenate([data[:5], data[5:]]) + assert_equal_records(test, data) + + def test_creation_ndmin(self): + # Check the use of ndmin + x = array([1, 2, 3], mask=[1, 0, 0], ndmin=2) + assert_equal(x.shape, (1, 3)) + assert_equal(x._data, [[1, 2, 3]]) + assert_equal(x._mask, [[1, 0, 0]]) + + def test_creation_ndmin_from_maskedarray(self): + # Make sure we're not losing the original mask w/ ndmin + x = array([1, 2, 3]) + x[-1] = masked + xx = array(x, ndmin=2, dtype=float) + assert_equal(x.shape, x._mask.shape) + assert_equal(xx.shape, xx._mask.shape) + + def test_creation_maskcreation(self): + # Tests how masks are initialized at the creation of Maskedarrays. + data = arange(24, dtype=float) + data[[3, 6, 15]] = masked + dma_1 = MaskedArray(data) + assert_equal(dma_1.mask, data.mask) + dma_2 = MaskedArray(dma_1) + assert_equal(dma_2.mask, dma_1.mask) + dma_3 = MaskedArray(dma_1, mask=[1, 0, 0, 0] * 6) + fail_if_equal(dma_3.mask, dma_1.mask) + + x = array([1, 2, 3], mask=True) + assert_equal(x._mask, [True, True, True]) + x = array([1, 2, 3], mask=False) + assert_equal(x._mask, [False, False, False]) + y = array([1, 2, 3], mask=x._mask, copy=False) + assert_(np.may_share_memory(x.mask, y.mask)) + y = array([1, 2, 3], mask=x._mask, copy=True) + assert_(not np.may_share_memory(x.mask, y.mask)) + + def test_masked_singleton_array_creation_warns(self): + # The first works, but should not (ideally), there may be no way + # to solve this, however, as long as `np.ma.masked` is an ndarray. + np.array(np.ma.masked) + with pytest.warns(UserWarning): + # Tries to create a float array, using `float(np.ma.masked)`. + # We may want to define this is invalid behaviour in the future! + # (requiring np.ma.masked to be a known NumPy scalar probably + # with a DType.) + np.array([3., np.ma.masked]) + + def test_creation_with_list_of_maskedarrays(self): + # Tests creating a masked array from a list of masked arrays. + x = array(np.arange(5), mask=[1, 0, 0, 0, 0]) + data = array((x, x[::-1])) + assert_equal(data, [[0, 1, 2, 3, 4], [4, 3, 2, 1, 0]]) + assert_equal(data._mask, [[1, 0, 0, 0, 0], [0, 0, 0, 0, 1]]) + + x.mask = nomask + data = array((x, x[::-1])) + assert_equal(data, [[0, 1, 2, 3, 4], [4, 3, 2, 1, 0]]) + assert_(data.mask is nomask) + + def test_creation_with_list_of_maskedarrays_no_bool_cast(self): + # Tests the regression in gh-18551 + masked_str = np.ma.masked_array(['a', 'b'], mask=[True, False]) + normal_int = np.arange(2) + res = np.ma.asarray([masked_str, normal_int], dtype="U21") + assert_array_equal(res.mask, [[True, False], [False, False]]) + + # The above only failed due a long chain of oddity, try also with + # an object array that cannot be converted to bool always: + class NotBool(): + def __bool__(self): + raise ValueError("not a bool!") + masked_obj = np.ma.masked_array([NotBool(), 'b'], mask=[True, False]) + # Check that the NotBool actually fails like we would expect: + with pytest.raises(ValueError, match="not a bool!"): + np.asarray([masked_obj], dtype=bool) + + res = np.ma.asarray([masked_obj, normal_int]) + assert_array_equal(res.mask, [[True, False], [False, False]]) + + def test_creation_from_ndarray_with_padding(self): + x = np.array([('A', 0)], dtype={'names':['f0','f1'], + 'formats':['S4','i8'], + 'offsets':[0,8]}) + array(x) # used to fail due to 'V' padding field in x.dtype.descr + + def test_unknown_keyword_parameter(self): + with pytest.raises(TypeError, match="unexpected keyword argument"): + MaskedArray([1, 2, 3], maks=[0, 1, 0]) # `mask` is misspelled. + + def test_asarray(self): + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + xm.fill_value = -9999 + xm._hardmask = True + xmm = asarray(xm) + assert_equal(xmm._data, xm._data) + assert_equal(xmm._mask, xm._mask) + assert_equal(xmm.fill_value, xm.fill_value) + assert_equal(xmm._hardmask, xm._hardmask) + + def test_asarray_default_order(self): + # See Issue #6646 + m = np.eye(3).T + assert_(not m.flags.c_contiguous) + + new_m = asarray(m) + assert_(new_m.flags.c_contiguous) + + def test_asarray_enforce_order(self): + # See Issue #6646 + m = np.eye(3).T + assert_(not m.flags.c_contiguous) + + new_m = asarray(m, order='C') + assert_(new_m.flags.c_contiguous) + + def test_fix_invalid(self): + # Checks fix_invalid. + with np.errstate(invalid='ignore'): + data = masked_array([np.nan, 0., 1.], mask=[0, 0, 1]) + data_fixed = fix_invalid(data) + assert_equal(data_fixed._data, [data.fill_value, 0., 1.]) + assert_equal(data_fixed._mask, [1., 0., 1.]) + + def test_maskedelement(self): + # Test of masked element + x = arange(6) + x[1] = masked + assert_(str(masked) == '--') + assert_(x[1] is masked) + assert_equal(filled(x[1], 0), 0) + + def test_set_element_as_object(self): + # Tests setting elements with object + a = empty(1, dtype=object) + x = (1, 2, 3, 4, 5) + a[0] = x + assert_equal(a[0], x) + assert_(a[0] is x) + + import datetime + dt = datetime.datetime.now() + a[0] = dt + assert_(a[0] is dt) + + def test_indexing(self): + # Tests conversions and indexing + x1 = np.array([1, 2, 4, 3]) + x2 = array(x1, mask=[1, 0, 0, 0]) + x3 = array(x1, mask=[0, 1, 0, 1]) + x4 = array(x1) + # test conversion to strings + str(x2) # raises? + repr(x2) # raises? + assert_equal(np.sort(x1), sort(x2, endwith=False)) + # tests of indexing + assert_(type(x2[1]) is type(x1[1])) + assert_(x1[1] == x2[1]) + assert_(x2[0] is masked) + assert_equal(x1[2], x2[2]) + assert_equal(x1[2:5], x2[2:5]) + assert_equal(x1[:], x2[:]) + assert_equal(x1[1:], x3[1:]) + x1[2] = 9 + x2[2] = 9 + assert_equal(x1, x2) + x1[1:3] = 99 + x2[1:3] = 99 + assert_equal(x1, x2) + x2[1] = masked + assert_equal(x1, x2) + x2[1:3] = masked + assert_equal(x1, x2) + x2[:] = x1 + x2[1] = masked + assert_(allequal(getmask(x2), array([0, 1, 0, 0]))) + x3[:] = masked_array([1, 2, 3, 4], [0, 1, 1, 0]) + assert_(allequal(getmask(x3), array([0, 1, 1, 0]))) + x4[:] = masked_array([1, 2, 3, 4], [0, 1, 1, 0]) + assert_(allequal(getmask(x4), array([0, 1, 1, 0]))) + assert_(allequal(x4, array([1, 2, 3, 4]))) + x1 = np.arange(5) * 1.0 + x2 = masked_values(x1, 3.0) + assert_equal(x1, x2) + assert_(allequal(array([0, 0, 0, 1, 0], MaskType), x2.mask)) + assert_equal(3.0, x2.fill_value) + x1 = array([1, 'hello', 2, 3], object) + x2 = np.array([1, 'hello', 2, 3], object) + s1 = x1[1] + s2 = x2[1] + assert_equal(type(s2), str) + assert_equal(type(s1), str) + assert_equal(s1, s2) + assert_(x1[1:1].shape == (0,)) + + @suppress_copy_mask_on_assignment + def test_copy(self): + # Tests of some subtle points of copying and sizing. + n = [0, 0, 1, 0, 0] + m = make_mask(n) + m2 = make_mask(m) + assert_(m is m2) + m3 = make_mask(m, copy=True) + assert_(m is not m3) + + x1 = np.arange(5) + y1 = array(x1, mask=m) + assert_equal(y1._data.__array_interface__, x1.__array_interface__) + assert_(allequal(x1, y1.data)) + assert_equal(y1._mask.__array_interface__, m.__array_interface__) + + y1a = array(y1) + # Default for masked array is not to copy; see gh-10318. + assert_(y1a._data.__array_interface__ == + y1._data.__array_interface__) + assert_(y1a._mask.__array_interface__ == + y1._mask.__array_interface__) + + y2 = array(x1, mask=m3) + assert_(y2._data.__array_interface__ == x1.__array_interface__) + assert_(y2._mask.__array_interface__ == m3.__array_interface__) + assert_(y2[2] is masked) + y2[2] = 9 + assert_(y2[2] is not masked) + assert_(y2._mask.__array_interface__ == m3.__array_interface__) + assert_(allequal(y2.mask, 0)) + + y2a = array(x1, mask=m, copy=1) + assert_(y2a._data.__array_interface__ != x1.__array_interface__) + #assert_( y2a._mask is not m) + assert_(y2a._mask.__array_interface__ != m.__array_interface__) + assert_(y2a[2] is masked) + y2a[2] = 9 + assert_(y2a[2] is not masked) + #assert_( y2a._mask is not m) + assert_(y2a._mask.__array_interface__ != m.__array_interface__) + assert_(allequal(y2a.mask, 0)) + + y3 = array(x1 * 1.0, mask=m) + assert_(filled(y3).dtype is (x1 * 1.0).dtype) + + x4 = arange(4) + x4[2] = masked + y4 = resize(x4, (8,)) + assert_equal(concatenate([x4, x4]), y4) + assert_equal(getmask(y4), [0, 0, 1, 0, 0, 0, 1, 0]) + y5 = repeat(x4, (2, 2, 2, 2), axis=0) + assert_equal(y5, [0, 0, 1, 1, 2, 2, 3, 3]) + y6 = repeat(x4, 2, axis=0) + assert_equal(y5, y6) + y7 = x4.repeat((2, 2, 2, 2), axis=0) + assert_equal(y5, y7) + y8 = x4.repeat(2, 0) + assert_equal(y5, y8) + + y9 = x4.copy() + assert_equal(y9._data, x4._data) + assert_equal(y9._mask, x4._mask) + + x = masked_array([1, 2, 3], mask=[0, 1, 0]) + # Copy is False by default + y = masked_array(x) + assert_equal(y._data.ctypes.data, x._data.ctypes.data) + assert_equal(y._mask.ctypes.data, x._mask.ctypes.data) + y = masked_array(x, copy=True) + assert_not_equal(y._data.ctypes.data, x._data.ctypes.data) + assert_not_equal(y._mask.ctypes.data, x._mask.ctypes.data) + + def test_copy_0d(self): + # gh-9430 + x = np.ma.array(43, mask=True) + xc = x.copy() + assert_equal(xc.mask, True) + + def test_copy_on_python_builtins(self): + # Tests copy works on python builtins (issue#8019) + assert_(isMaskedArray(np.ma.copy([1,2,3]))) + assert_(isMaskedArray(np.ma.copy((1,2,3)))) + + def test_copy_immutable(self): + # Tests that the copy method is immutable, GitHub issue #5247 + a = np.ma.array([1, 2, 3]) + b = np.ma.array([4, 5, 6]) + a_copy_method = a.copy + b.copy + assert_equal(a_copy_method(), [1, 2, 3]) + + def test_deepcopy(self): + from copy import deepcopy + a = array([0, 1, 2], mask=[False, True, False]) + copied = deepcopy(a) + assert_equal(copied.mask, a.mask) + assert_not_equal(id(a._mask), id(copied._mask)) + + copied[1] = 1 + assert_equal(copied.mask, [0, 0, 0]) + assert_equal(a.mask, [0, 1, 0]) + + copied = deepcopy(a) + assert_equal(copied.mask, a.mask) + copied.mask[1] = False + assert_equal(copied.mask, [0, 0, 0]) + assert_equal(a.mask, [0, 1, 0]) + + def test_format(self): + a = array([0, 1, 2], mask=[False, True, False]) + assert_equal(format(a), "[0 -- 2]") + assert_equal(format(masked), "--") + assert_equal(format(masked, ""), "--") + + # Postponed from PR #15410, perhaps address in the future. + # assert_equal(format(masked, " >5"), " --") + # assert_equal(format(masked, " <5"), "-- ") + + # Expect a FutureWarning for using format_spec with MaskedElement + with assert_warns(FutureWarning): + with_format_string = format(masked, " >5") + assert_equal(with_format_string, "--") + + def test_str_repr(self): + a = array([0, 1, 2], mask=[False, True, False]) + assert_equal(str(a), '[0 -- 2]') + assert_equal( + repr(a), + textwrap.dedent('''\ + masked_array(data=[0, --, 2], + mask=[False, True, False], + fill_value=999999)''') + ) + + # arrays with a continuation + a = np.ma.arange(2000) + a[1:50] = np.ma.masked + assert_equal( + repr(a), + textwrap.dedent('''\ + masked_array(data=[0, --, --, ..., 1997, 1998, 1999], + mask=[False, True, True, ..., False, False, False], + fill_value=999999)''') + ) + + # line-wrapped 1d arrays are correctly aligned + a = np.ma.arange(20) + assert_equal( + repr(a), + textwrap.dedent('''\ + masked_array(data=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19], + mask=False, + fill_value=999999)''') + ) + + # 2d arrays cause wrapping + a = array([[1, 2, 3], [4, 5, 6]], dtype=np.int8) + a[1,1] = np.ma.masked + assert_equal( + repr(a), + textwrap.dedent('''\ + masked_array( + data=[[1, 2, 3], + [4, --, 6]], + mask=[[False, False, False], + [False, True, False]], + fill_value=999999, + dtype=int8)''') + ) + + # but not it they're a row vector + assert_equal( + repr(a[:1]), + textwrap.dedent('''\ + masked_array(data=[[1, 2, 3]], + mask=[[False, False, False]], + fill_value=999999, + dtype=int8)''') + ) + + # dtype=int is implied, so not shown + assert_equal( + repr(a.astype(int)), + textwrap.dedent('''\ + masked_array( + data=[[1, 2, 3], + [4, --, 6]], + mask=[[False, False, False], + [False, True, False]], + fill_value=999999)''') + ) + + def test_str_repr_legacy(self): + oldopts = np.get_printoptions() + np.set_printoptions(legacy='1.13') + try: + a = array([0, 1, 2], mask=[False, True, False]) + assert_equal(str(a), '[0 -- 2]') + assert_equal(repr(a), 'masked_array(data = [0 -- 2],\n' + ' mask = [False True False],\n' + ' fill_value = 999999)\n') + + a = np.ma.arange(2000) + a[1:50] = np.ma.masked + assert_equal( + repr(a), + 'masked_array(data = [0 -- -- ..., 1997 1998 1999],\n' + ' mask = [False True True ..., False False False],\n' + ' fill_value = 999999)\n' + ) + finally: + np.set_printoptions(**oldopts) + + def test_0d_unicode(self): + u = u'caf\xe9' + utype = type(u) + + arr_nomask = np.ma.array(u) + arr_masked = np.ma.array(u, mask=True) + + assert_equal(utype(arr_nomask), u) + assert_equal(utype(arr_masked), u'--') + + def test_pickling(self): + # Tests pickling + for dtype in (int, float, str, object): + a = arange(10).astype(dtype) + a.fill_value = 999 + + masks = ([0, 0, 0, 1, 0, 1, 0, 1, 0, 1], # partially masked + True, # Fully masked + False) # Fully unmasked + + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + for mask in masks: + a.mask = mask + a_pickled = pickle.loads(pickle.dumps(a, protocol=proto)) + assert_equal(a_pickled._mask, a._mask) + assert_equal(a_pickled._data, a._data) + if dtype in (object, int): + assert_equal(a_pickled.fill_value, 999) + else: + assert_equal(a_pickled.fill_value, dtype(999)) + assert_array_equal(a_pickled.mask, mask) + + def test_pickling_subbaseclass(self): + # Test pickling w/ a subclass of ndarray + x = np.array([(1.0, 2), (3.0, 4)], + dtype=[('x', float), ('y', int)]).view(np.recarray) + a = masked_array(x, mask=[(True, False), (False, True)]) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + a_pickled = pickle.loads(pickle.dumps(a, protocol=proto)) + assert_equal(a_pickled._mask, a._mask) + assert_equal(a_pickled, a) + assert_(isinstance(a_pickled._data, np.recarray)) + + def test_pickling_maskedconstant(self): + # Test pickling MaskedConstant + mc = np.ma.masked + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + mc_pickled = pickle.loads(pickle.dumps(mc, protocol=proto)) + assert_equal(mc_pickled._baseclass, mc._baseclass) + assert_equal(mc_pickled._mask, mc._mask) + assert_equal(mc_pickled._data, mc._data) + + def test_pickling_wstructured(self): + # Tests pickling w/ structured array + a = array([(1, 1.), (2, 2.)], mask=[(0, 0), (0, 1)], + dtype=[('a', int), ('b', float)]) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + a_pickled = pickle.loads(pickle.dumps(a, protocol=proto)) + assert_equal(a_pickled._mask, a._mask) + assert_equal(a_pickled, a) + + def test_pickling_keepalignment(self): + # Tests pickling w/ F_CONTIGUOUS arrays + a = arange(10) + a.shape = (-1, 2) + b = a.T + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + test = pickle.loads(pickle.dumps(b, protocol=proto)) + assert_equal(test, b) + + def test_single_element_subscript(self): + # Tests single element subscripts of Maskedarrays. + a = array([1, 3, 2]) + b = array([1, 3, 2], mask=[1, 0, 1]) + assert_equal(a[0].shape, ()) + assert_equal(b[0].shape, ()) + assert_equal(b[1].shape, ()) + + def test_topython(self): + # Tests some communication issues with Python. + assert_equal(1, int(array(1))) + assert_equal(1.0, float(array(1))) + assert_equal(1, int(array([[[1]]]))) + assert_equal(1.0, float(array([[1]]))) + assert_raises(TypeError, float, array([1, 1])) + + with suppress_warnings() as sup: + sup.filter(UserWarning, 'Warning: converting a masked element') + assert_(np.isnan(float(array([1], mask=[1])))) + + a = array([1, 2, 3], mask=[1, 0, 0]) + assert_raises(TypeError, lambda: float(a)) + assert_equal(float(a[-1]), 3.) + assert_(np.isnan(float(a[0]))) + assert_raises(TypeError, int, a) + assert_equal(int(a[-1]), 3) + assert_raises(MAError, lambda:int(a[0])) + + def test_oddfeatures_1(self): + # Test of other odd features + x = arange(20) + x = x.reshape(4, 5) + x.flat[5] = 12 + assert_(x[1, 0] == 12) + z = x + 10j * x + assert_equal(z.real, x) + assert_equal(z.imag, 10 * x) + assert_equal((z * conjugate(z)).real, 101 * x * x) + z.imag[...] = 0.0 + + x = arange(10) + x[3] = masked + assert_(str(x[3]) == str(masked)) + c = x >= 8 + assert_(count(where(c, masked, masked)) == 0) + assert_(shape(where(c, masked, masked)) == c.shape) + + z = masked_where(c, x) + assert_(z.dtype is x.dtype) + assert_(z[3] is masked) + assert_(z[4] is not masked) + assert_(z[7] is not masked) + assert_(z[8] is masked) + assert_(z[9] is masked) + assert_equal(x, z) + + def test_oddfeatures_2(self): + # Tests some more features. + x = array([1., 2., 3., 4., 5.]) + c = array([1, 1, 1, 0, 0]) + x[2] = masked + z = where(c, x, -x) + assert_equal(z, [1., 2., 0., -4., -5]) + c[0] = masked + z = where(c, x, -x) + assert_equal(z, [1., 2., 0., -4., -5]) + assert_(z[0] is masked) + assert_(z[1] is not masked) + assert_(z[2] is masked) + + @suppress_copy_mask_on_assignment + def test_oddfeatures_3(self): + # Tests some generic features + atest = array([10], mask=True) + btest = array([20]) + idx = atest.mask + atest[idx] = btest[idx] + assert_equal(atest, [20]) + + def test_filled_with_object_dtype(self): + a = np.ma.masked_all(1, dtype='O') + assert_equal(a.filled('x')[0], 'x') + + def test_filled_with_flexible_dtype(self): + # Test filled w/ flexible dtype + flexi = array([(1, 1, 1)], + dtype=[('i', int), ('s', '|S8'), ('f', float)]) + flexi[0] = masked + assert_equal(flexi.filled(), + np.array([(default_fill_value(0), + default_fill_value('0'), + default_fill_value(0.),)], dtype=flexi.dtype)) + flexi[0] = masked + assert_equal(flexi.filled(1), + np.array([(1, '1', 1.)], dtype=flexi.dtype)) + + def test_filled_with_mvoid(self): + # Test filled w/ mvoid + ndtype = [('a', int), ('b', float)] + a = mvoid((1, 2.), mask=[(0, 1)], dtype=ndtype) + # Filled using default + test = a.filled() + assert_equal(tuple(test), (1, default_fill_value(1.))) + # Explicit fill_value + test = a.filled((-1, -1)) + assert_equal(tuple(test), (1, -1)) + # Using predefined filling values + a.fill_value = (-999, -999) + assert_equal(tuple(a.filled()), (1, -999)) + + def test_filled_with_nested_dtype(self): + # Test filled w/ nested dtype + ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])] + a = array([(1, (1, 1)), (2, (2, 2))], + mask=[(0, (1, 0)), (0, (0, 1))], dtype=ndtype) + test = a.filled(0) + control = np.array([(1, (0, 1)), (2, (2, 0))], dtype=ndtype) + assert_equal(test, control) + + test = a['B'].filled(0) + control = np.array([(0, 1), (2, 0)], dtype=a['B'].dtype) + assert_equal(test, control) + + # test if mask gets set correctly (see #6760) + Z = numpy.ma.zeros(2, numpy.dtype([("A", "(2,2)i1,(2,2)i1", (2,2))])) + assert_equal(Z.data.dtype, numpy.dtype([('A', [('f0', 'i1', (2, 2)), + ('f1', 'i1', (2, 2))], (2, 2))])) + assert_equal(Z.mask.dtype, numpy.dtype([('A', [('f0', '?', (2, 2)), + ('f1', '?', (2, 2))], (2, 2))])) + + def test_filled_with_f_order(self): + # Test filled w/ F-contiguous array + a = array(np.array([(0, 1, 2), (4, 5, 6)], order='F'), + mask=np.array([(0, 0, 1), (1, 0, 0)], order='F'), + order='F') # this is currently ignored + assert_(a.flags['F_CONTIGUOUS']) + assert_(a.filled(0).flags['F_CONTIGUOUS']) + + def test_optinfo_propagation(self): + # Checks that _optinfo dictionary isn't back-propagated + x = array([1, 2, 3, ], dtype=float) + x._optinfo['info'] = '???' + y = x.copy() + assert_equal(y._optinfo['info'], '???') + y._optinfo['info'] = '!!!' + assert_equal(x._optinfo['info'], '???') + + def test_optinfo_forward_propagation(self): + a = array([1,2,2,4]) + a._optinfo["key"] = "value" + assert_equal(a._optinfo["key"], (a == 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a != 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a > 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a >= 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a <= 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a + 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a - 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a * 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a / 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], a[:2]._optinfo["key"]) + assert_equal(a._optinfo["key"], a[[0,0,2]]._optinfo["key"]) + assert_equal(a._optinfo["key"], np.exp(a)._optinfo["key"]) + assert_equal(a._optinfo["key"], np.abs(a)._optinfo["key"]) + assert_equal(a._optinfo["key"], array(a, copy=True)._optinfo["key"]) + assert_equal(a._optinfo["key"], np.zeros_like(a)._optinfo["key"]) + + def test_fancy_printoptions(self): + # Test printing a masked array w/ fancy dtype. + fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) + test = array([(1, (2, 3.0)), (4, (5, 6.0))], + mask=[(1, (0, 1)), (0, (1, 0))], + dtype=fancydtype) + control = "[(--, (2, --)) (4, (--, 6.0))]" + assert_equal(str(test), control) + + # Test 0-d array with multi-dimensional dtype + t_2d0 = masked_array(data = (0, [[0.0, 0.0, 0.0], + [0.0, 0.0, 0.0]], + 0.0), + mask = (False, [[True, False, True], + [False, False, True]], + False), + dtype = "int, (2,3)float, float") + control = "(0, [[--, 0.0, --], [0.0, 0.0, --]], 0.0)" + assert_equal(str(t_2d0), control) + + def test_flatten_structured_array(self): + # Test flatten_structured_array on arrays + # On ndarray + ndtype = [('a', int), ('b', float)] + a = np.array([(1, 1), (2, 2)], dtype=ndtype) + test = flatten_structured_array(a) + control = np.array([[1., 1.], [2., 2.]], dtype=float) + assert_equal(test, control) + assert_equal(test.dtype, control.dtype) + # On masked_array + a = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) + test = flatten_structured_array(a) + control = array([[1., 1.], [2., 2.]], + mask=[[0, 1], [1, 0]], dtype=float) + assert_equal(test, control) + assert_equal(test.dtype, control.dtype) + assert_equal(test.mask, control.mask) + # On masked array with nested structure + ndtype = [('a', int), ('b', [('ba', int), ('bb', float)])] + a = array([(1, (1, 1.1)), (2, (2, 2.2))], + mask=[(0, (1, 0)), (1, (0, 1))], dtype=ndtype) + test = flatten_structured_array(a) + control = array([[1., 1., 1.1], [2., 2., 2.2]], + mask=[[0, 1, 0], [1, 0, 1]], dtype=float) + assert_equal(test, control) + assert_equal(test.dtype, control.dtype) + assert_equal(test.mask, control.mask) + # Keeping the initial shape + ndtype = [('a', int), ('b', float)] + a = np.array([[(1, 1), ], [(2, 2), ]], dtype=ndtype) + test = flatten_structured_array(a) + control = np.array([[[1., 1.], ], [[2., 2.], ]], dtype=float) + assert_equal(test, control) + assert_equal(test.dtype, control.dtype) + + def test_void0d(self): + # Test creating a mvoid object + ndtype = [('a', int), ('b', int)] + a = np.array([(1, 2,)], dtype=ndtype)[0] + f = mvoid(a) + assert_(isinstance(f, mvoid)) + + a = masked_array([(1, 2)], mask=[(1, 0)], dtype=ndtype)[0] + assert_(isinstance(a, mvoid)) + + a = masked_array([(1, 2), (1, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype) + f = mvoid(a._data[0], a._mask[0]) + assert_(isinstance(f, mvoid)) + + def test_mvoid_getitem(self): + # Test mvoid.__getitem__ + ndtype = [('a', int), ('b', int)] + a = masked_array([(1, 2,), (3, 4)], mask=[(0, 0), (1, 0)], + dtype=ndtype) + # w/o mask + f = a[0] + assert_(isinstance(f, mvoid)) + assert_equal((f[0], f['a']), (1, 1)) + assert_equal(f['b'], 2) + # w/ mask + f = a[1] + assert_(isinstance(f, mvoid)) + assert_(f[0] is masked) + assert_(f['a'] is masked) + assert_equal(f[1], 4) + + # exotic dtype + A = masked_array(data=[([0,1],)], + mask=[([True, False],)], + dtype=[("A", ">i2", (2,))]) + assert_equal(A[0]["A"], A["A"][0]) + assert_equal(A[0]["A"], masked_array(data=[0, 1], + mask=[True, False], dtype=">i2")) + + def test_mvoid_iter(self): + # Test iteration on __getitem__ + ndtype = [('a', int), ('b', int)] + a = masked_array([(1, 2,), (3, 4)], mask=[(0, 0), (1, 0)], + dtype=ndtype) + # w/o mask + assert_equal(list(a[0]), [1, 2]) + # w/ mask + assert_equal(list(a[1]), [masked, 4]) + + def test_mvoid_print(self): + # Test printing a mvoid + mx = array([(1, 1), (2, 2)], dtype=[('a', int), ('b', int)]) + assert_equal(str(mx[0]), "(1, 1)") + mx['b'][0] = masked + ini_display = masked_print_option._display + masked_print_option.set_display("-X-") + try: + assert_equal(str(mx[0]), "(1, -X-)") + assert_equal(repr(mx[0]), "(1, -X-)") + finally: + masked_print_option.set_display(ini_display) + + # also check if there are object datatypes (see gh-7493) + mx = array([(1,), (2,)], dtype=[('a', 'O')]) + assert_equal(str(mx[0]), "(1,)") + + def test_mvoid_multidim_print(self): + + # regression test for gh-6019 + t_ma = masked_array(data = [([1, 2, 3],)], + mask = [([False, True, False],)], + fill_value = ([999999, 999999, 999999],), + dtype = [('a', ' 1: + assert_equal(np.concatenate((x, y), 1), concatenate((xm, ym), 1)) + assert_equal(np.add.reduce(x, 1), add.reduce(x, 1)) + assert_equal(np.sum(x, 1), sum(x, 1)) + assert_equal(np.product(x, 1), product(x, 1)) + + def test_binops_d2D(self): + # Test binary operations on 2D data + a = array([[1.], [2.], [3.]], mask=[[False], [True], [True]]) + b = array([[2., 3.], [4., 5.], [6., 7.]]) + + test = a * b + control = array([[2., 3.], [2., 2.], [3., 3.]], + mask=[[0, 0], [1, 1], [1, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + test = b * a + control = array([[2., 3.], [4., 5.], [6., 7.]], + mask=[[0, 0], [1, 1], [1, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + a = array([[1.], [2.], [3.]]) + b = array([[2., 3.], [4., 5.], [6., 7.]], + mask=[[0, 0], [0, 0], [0, 1]]) + test = a * b + control = array([[2, 3], [8, 10], [18, 3]], + mask=[[0, 0], [0, 0], [0, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + test = b * a + control = array([[2, 3], [8, 10], [18, 7]], + mask=[[0, 0], [0, 0], [0, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + def test_domained_binops_d2D(self): + # Test domained binary operations on 2D data + a = array([[1.], [2.], [3.]], mask=[[False], [True], [True]]) + b = array([[2., 3.], [4., 5.], [6., 7.]]) + + test = a / b + control = array([[1. / 2., 1. / 3.], [2., 2.], [3., 3.]], + mask=[[0, 0], [1, 1], [1, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + test = b / a + control = array([[2. / 1., 3. / 1.], [4., 5.], [6., 7.]], + mask=[[0, 0], [1, 1], [1, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + a = array([[1.], [2.], [3.]]) + b = array([[2., 3.], [4., 5.], [6., 7.]], + mask=[[0, 0], [0, 0], [0, 1]]) + test = a / b + control = array([[1. / 2, 1. / 3], [2. / 4, 2. / 5], [3. / 6, 3]], + mask=[[0, 0], [0, 0], [0, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + test = b / a + control = array([[2 / 1., 3 / 1.], [4 / 2., 5 / 2.], [6 / 3., 7]], + mask=[[0, 0], [0, 0], [0, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + def test_noshrinking(self): + # Check that we don't shrink a mask when not wanted + # Binary operations + a = masked_array([1., 2., 3.], mask=[False, False, False], + shrink=False) + b = a + 1 + assert_equal(b.mask, [0, 0, 0]) + # In place binary operation + a += 1 + assert_equal(a.mask, [0, 0, 0]) + # Domained binary operation + b = a / 1. + assert_equal(b.mask, [0, 0, 0]) + # In place binary operation + a /= 1. + assert_equal(a.mask, [0, 0, 0]) + + def test_ufunc_nomask(self): + # check the case ufuncs should set the mask to false + m = np.ma.array([1]) + # check we don't get array([False], dtype=bool) + assert_equal(np.true_divide(m, 5).mask.shape, ()) + + def test_noshink_on_creation(self): + # Check that the mask is not shrunk on array creation when not wanted + a = np.ma.masked_values([1., 2.5, 3.1], 1.5, shrink=False) + assert_equal(a.mask, [0, 0, 0]) + + def test_mod(self): + # Tests mod + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + assert_equal(mod(x, y), mod(xm, ym)) + test = mod(ym, xm) + assert_equal(test, np.mod(ym, xm)) + assert_equal(test.mask, mask_or(xm.mask, ym.mask)) + test = mod(xm, ym) + assert_equal(test, np.mod(xm, ym)) + assert_equal(test.mask, mask_or(mask_or(xm.mask, ym.mask), (ym == 0))) + + def test_TakeTransposeInnerOuter(self): + # Test of take, transpose, inner, outer products + x = arange(24) + y = np.arange(24) + x[5:6] = masked + x = x.reshape(2, 3, 4) + y = y.reshape(2, 3, 4) + assert_equal(np.transpose(y, (2, 0, 1)), transpose(x, (2, 0, 1))) + assert_equal(np.take(y, (2, 0, 1), 1), take(x, (2, 0, 1), 1)) + assert_equal(np.inner(filled(x, 0), filled(y, 0)), + inner(x, y)) + assert_equal(np.outer(filled(x, 0), filled(y, 0)), + outer(x, y)) + y = array(['abc', 1, 'def', 2, 3], object) + y[2] = masked + t = take(y, [0, 3, 4]) + assert_(t[0] == 'abc') + assert_(t[1] == 2) + assert_(t[2] == 3) + + def test_imag_real(self): + # Check complex + xx = array([1 + 10j, 20 + 2j], mask=[1, 0]) + assert_equal(xx.imag, [10, 2]) + assert_equal(xx.imag.filled(), [1e+20, 2]) + assert_equal(xx.imag.dtype, xx._data.imag.dtype) + assert_equal(xx.real, [1, 20]) + assert_equal(xx.real.filled(), [1e+20, 20]) + assert_equal(xx.real.dtype, xx._data.real.dtype) + + def test_methods_with_output(self): + xm = array(np.random.uniform(0, 10, 12)).reshape(3, 4) + xm[:, 0] = xm[0] = xm[-1, -1] = masked + + funclist = ('sum', 'prod', 'var', 'std', 'max', 'min', 'ptp', 'mean',) + + for funcname in funclist: + npfunc = getattr(np, funcname) + xmmeth = getattr(xm, funcname) + # A ndarray as explicit input + output = np.empty(4, dtype=float) + output.fill(-9999) + result = npfunc(xm, axis=0, out=output) + # ... the result should be the given output + assert_(result is output) + assert_equal(result, xmmeth(axis=0, out=output)) + + output = empty(4, dtype=int) + result = xmmeth(axis=0, out=output) + assert_(result is output) + assert_(output[0] is masked) + + def test_eq_on_structured(self): + # Test the equality of structured arrays + ndtype = [('A', int), ('B', int)] + a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype) + + test = (a == a) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [False, False]) + assert_(test.fill_value == True) + + test = (a == a[0]) + assert_equal(test.data, [True, False]) + assert_equal(test.mask, [False, False]) + assert_(test.fill_value == True) + + b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype) + test = (a == b) + assert_equal(test.data, [False, True]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + test = (a[0] == b) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) + test = (a == b) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [False, False]) + assert_(test.fill_value == True) + + # complicated dtype, 2-dimensional array. + ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])] + a = array([[(1, (1, 1)), (2, (2, 2))], + [(3, (3, 3)), (4, (4, 4))]], + mask=[[(0, (1, 0)), (0, (0, 1))], + [(1, (0, 0)), (1, (1, 1))]], dtype=ndtype) + test = (a[0, 0] == a) + assert_equal(test.data, [[True, False], [False, False]]) + assert_equal(test.mask, [[False, False], [False, True]]) + assert_(test.fill_value == True) + + def test_ne_on_structured(self): + # Test the equality of structured arrays + ndtype = [('A', int), ('B', int)] + a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype) + + test = (a != a) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [False, False]) + assert_(test.fill_value == True) + + test = (a != a[0]) + assert_equal(test.data, [False, True]) + assert_equal(test.mask, [False, False]) + assert_(test.fill_value == True) + + b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype) + test = (a != b) + assert_equal(test.data, [True, False]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + test = (a[0] != b) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) + test = (a != b) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [False, False]) + assert_(test.fill_value == True) + + # complicated dtype, 2-dimensional array. + ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])] + a = array([[(1, (1, 1)), (2, (2, 2))], + [(3, (3, 3)), (4, (4, 4))]], + mask=[[(0, (1, 0)), (0, (0, 1))], + [(1, (0, 0)), (1, (1, 1))]], dtype=ndtype) + test = (a[0, 0] != a) + assert_equal(test.data, [[False, True], [True, True]]) + assert_equal(test.mask, [[False, False], [False, True]]) + assert_(test.fill_value == True) + + def test_eq_ne_structured_extra(self): + # ensure simple examples are symmetric and make sense. + # from https://github.com/numpy/numpy/pull/8590#discussion_r101126465 + dt = np.dtype('i4,i4') + for m1 in (mvoid((1, 2), mask=(0, 0), dtype=dt), + mvoid((1, 2), mask=(0, 1), dtype=dt), + mvoid((1, 2), mask=(1, 0), dtype=dt), + mvoid((1, 2), mask=(1, 1), dtype=dt)): + ma1 = m1.view(MaskedArray) + r1 = ma1.view('2i4') + for m2 in (np.array((1, 1), dtype=dt), + mvoid((1, 1), dtype=dt), + mvoid((1, 0), mask=(0, 1), dtype=dt), + mvoid((3, 2), mask=(0, 1), dtype=dt)): + ma2 = m2.view(MaskedArray) + r2 = ma2.view('2i4') + eq_expected = (r1 == r2).all() + assert_equal(m1 == m2, eq_expected) + assert_equal(m2 == m1, eq_expected) + assert_equal(ma1 == m2, eq_expected) + assert_equal(m1 == ma2, eq_expected) + assert_equal(ma1 == ma2, eq_expected) + # Also check it is the same if we do it element by element. + el_by_el = [m1[name] == m2[name] for name in dt.names] + assert_equal(array(el_by_el, dtype=bool).all(), eq_expected) + ne_expected = (r1 != r2).any() + assert_equal(m1 != m2, ne_expected) + assert_equal(m2 != m1, ne_expected) + assert_equal(ma1 != m2, ne_expected) + assert_equal(m1 != ma2, ne_expected) + assert_equal(ma1 != ma2, ne_expected) + el_by_el = [m1[name] != m2[name] for name in dt.names] + assert_equal(array(el_by_el, dtype=bool).any(), ne_expected) + + @pytest.mark.parametrize('dt', ['S', 'U']) + @pytest.mark.parametrize('fill', [None, 'A']) + def test_eq_for_strings(self, dt, fill): + # Test the equality of structured arrays + a = array(['a', 'b'], dtype=dt, mask=[0, 1], fill_value=fill) + + test = (a == a) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + test = (a == a[0]) + assert_equal(test.data, [True, False]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + b = array(['a', 'b'], dtype=dt, mask=[1, 0], fill_value=fill) + test = (a == b) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [True, True]) + assert_(test.fill_value == True) + + test = (a[0] == b) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + test = (b == a[0]) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + @pytest.mark.parametrize('dt', ['S', 'U']) + @pytest.mark.parametrize('fill', [None, 'A']) + def test_ne_for_strings(self, dt, fill): + # Test the equality of structured arrays + a = array(['a', 'b'], dtype=dt, mask=[0, 1], fill_value=fill) + + test = (a != a) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + test = (a != a[0]) + assert_equal(test.data, [False, True]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + b = array(['a', 'b'], dtype=dt, mask=[1, 0], fill_value=fill) + test = (a != b) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [True, True]) + assert_(test.fill_value == True) + + test = (a[0] != b) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + test = (b != a[0]) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + @pytest.mark.parametrize('dt1', num_dts, ids=num_ids) + @pytest.mark.parametrize('dt2', num_dts, ids=num_ids) + @pytest.mark.parametrize('fill', [None, 1]) + def test_eq_for_numeric(self, dt1, dt2, fill): + # Test the equality of structured arrays + a = array([0, 1], dtype=dt1, mask=[0, 1], fill_value=fill) + + test = (a == a) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + test = (a == a[0]) + assert_equal(test.data, [True, False]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + b = array([0, 1], dtype=dt2, mask=[1, 0], fill_value=fill) + test = (a == b) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [True, True]) + assert_(test.fill_value == True) + + test = (a[0] == b) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + test = (b == a[0]) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + @pytest.mark.parametrize('dt1', num_dts, ids=num_ids) + @pytest.mark.parametrize('dt2', num_dts, ids=num_ids) + @pytest.mark.parametrize('fill', [None, 1]) + def test_ne_for_numeric(self, dt1, dt2, fill): + # Test the equality of structured arrays + a = array([0, 1], dtype=dt1, mask=[0, 1], fill_value=fill) + + test = (a != a) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + test = (a != a[0]) + assert_equal(test.data, [False, True]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + b = array([0, 1], dtype=dt2, mask=[1, 0], fill_value=fill) + test = (a != b) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [True, True]) + assert_(test.fill_value == True) + + test = (a[0] != b) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + test = (b != a[0]) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + def test_eq_with_None(self): + # Really, comparisons with None should not be done, but check them + # anyway. Note that pep8 will flag these tests. + # Deprecation is in place for arrays, and when it happens this + # test will fail (and have to be changed accordingly). + + # With partial mask + with suppress_warnings() as sup: + sup.filter(FutureWarning, "Comparison to `None`") + a = array([None, 1], mask=[0, 1]) + assert_equal(a == None, array([True, False], mask=[0, 1])) + assert_equal(a.data == None, [True, False]) + assert_equal(a != None, array([False, True], mask=[0, 1])) + # With nomask + a = array([None, 1], mask=False) + assert_equal(a == None, [True, False]) + assert_equal(a != None, [False, True]) + # With complete mask + a = array([None, 2], mask=True) + assert_equal(a == None, array([False, True], mask=True)) + assert_equal(a != None, array([True, False], mask=True)) + # Fully masked, even comparison to None should return "masked" + a = masked + assert_equal(a == None, masked) + + def test_eq_with_scalar(self): + a = array(1) + assert_equal(a == 1, True) + assert_equal(a == 0, False) + assert_equal(a != 1, False) + assert_equal(a != 0, True) + b = array(1, mask=True) + assert_equal(b == 0, masked) + assert_equal(b == 1, masked) + assert_equal(b != 0, masked) + assert_equal(b != 1, masked) + + def test_eq_different_dimensions(self): + m1 = array([1, 1], mask=[0, 1]) + # test comparison with both masked and regular arrays. + for m2 in (array([[0, 1], [1, 2]]), + np.array([[0, 1], [1, 2]])): + test = (m1 == m2) + assert_equal(test.data, [[False, False], + [True, False]]) + assert_equal(test.mask, [[False, True], + [False, True]]) + + def test_numpyarithmetic(self): + # Check that the mask is not back-propagated when using numpy functions + a = masked_array([-1, 0, 1, 2, 3], mask=[0, 0, 0, 0, 1]) + control = masked_array([np.nan, np.nan, 0, np.log(2), -1], + mask=[1, 1, 0, 0, 1]) + + test = log(a) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + assert_equal(a.mask, [0, 0, 0, 0, 1]) + + test = np.log(a) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + assert_equal(a.mask, [0, 0, 0, 0, 1]) + + +class TestMaskedArrayAttributes: + + def test_keepmask(self): + # Tests the keep mask flag + x = masked_array([1, 2, 3], mask=[1, 0, 0]) + mx = masked_array(x) + assert_equal(mx.mask, x.mask) + mx = masked_array(x, mask=[0, 1, 0], keep_mask=False) + assert_equal(mx.mask, [0, 1, 0]) + mx = masked_array(x, mask=[0, 1, 0], keep_mask=True) + assert_equal(mx.mask, [1, 1, 0]) + # We default to true + mx = masked_array(x, mask=[0, 1, 0]) + assert_equal(mx.mask, [1, 1, 0]) + + def test_hardmask(self): + # Test hard_mask + d = arange(5) + n = [0, 0, 0, 1, 1] + m = make_mask(n) + xh = array(d, mask=m, hard_mask=True) + # We need to copy, to avoid updating d in xh ! + xs = array(d, mask=m, hard_mask=False, copy=True) + xh[[1, 4]] = [10, 40] + xs[[1, 4]] = [10, 40] + assert_equal(xh._data, [0, 10, 2, 3, 4]) + assert_equal(xs._data, [0, 10, 2, 3, 40]) + assert_equal(xs.mask, [0, 0, 0, 1, 0]) + assert_(xh._hardmask) + assert_(not xs._hardmask) + xh[1:4] = [10, 20, 30] + xs[1:4] = [10, 20, 30] + assert_equal(xh._data, [0, 10, 20, 3, 4]) + assert_equal(xs._data, [0, 10, 20, 30, 40]) + assert_equal(xs.mask, nomask) + xh[0] = masked + xs[0] = masked + assert_equal(xh.mask, [1, 0, 0, 1, 1]) + assert_equal(xs.mask, [1, 0, 0, 0, 0]) + xh[:] = 1 + xs[:] = 1 + assert_equal(xh._data, [0, 1, 1, 3, 4]) + assert_equal(xs._data, [1, 1, 1, 1, 1]) + assert_equal(xh.mask, [1, 0, 0, 1, 1]) + assert_equal(xs.mask, nomask) + # Switch to soft mask + xh.soften_mask() + xh[:] = arange(5) + assert_equal(xh._data, [0, 1, 2, 3, 4]) + assert_equal(xh.mask, nomask) + # Switch back to hard mask + xh.harden_mask() + xh[xh < 3] = masked + assert_equal(xh._data, [0, 1, 2, 3, 4]) + assert_equal(xh._mask, [1, 1, 1, 0, 0]) + xh[filled(xh > 1, False)] = 5 + assert_equal(xh._data, [0, 1, 2, 5, 5]) + assert_equal(xh._mask, [1, 1, 1, 0, 0]) + + xh = array([[1, 2], [3, 4]], mask=[[1, 0], [0, 0]], hard_mask=True) + xh[0] = 0 + assert_equal(xh._data, [[1, 0], [3, 4]]) + assert_equal(xh._mask, [[1, 0], [0, 0]]) + xh[-1, -1] = 5 + assert_equal(xh._data, [[1, 0], [3, 5]]) + assert_equal(xh._mask, [[1, 0], [0, 0]]) + xh[filled(xh < 5, False)] = 2 + assert_equal(xh._data, [[1, 2], [2, 5]]) + assert_equal(xh._mask, [[1, 0], [0, 0]]) + + def test_hardmask_again(self): + # Another test of hardmask + d = arange(5) + n = [0, 0, 0, 1, 1] + m = make_mask(n) + xh = array(d, mask=m, hard_mask=True) + xh[4:5] = 999 + xh[0:1] = 999 + assert_equal(xh._data, [999, 1, 2, 3, 4]) + + def test_hardmask_oncemore_yay(self): + # OK, yet another test of hardmask + # Make sure that harden_mask/soften_mask//unshare_mask returns self + a = array([1, 2, 3], mask=[1, 0, 0]) + b = a.harden_mask() + assert_equal(a, b) + b[0] = 0 + assert_equal(a, b) + assert_equal(b, array([1, 2, 3], mask=[1, 0, 0])) + a = b.soften_mask() + a[0] = 0 + assert_equal(a, b) + assert_equal(b, array([0, 2, 3], mask=[0, 0, 0])) + + def test_smallmask(self): + # Checks the behaviour of _smallmask + a = arange(10) + a[1] = masked + a[1] = 1 + assert_equal(a._mask, nomask) + a = arange(10) + a._smallmask = False + a[1] = masked + a[1] = 1 + assert_equal(a._mask, zeros(10)) + + def test_shrink_mask(self): + # Tests .shrink_mask() + a = array([1, 2, 3], mask=[0, 0, 0]) + b = a.shrink_mask() + assert_equal(a, b) + assert_equal(a.mask, nomask) + + # Mask cannot be shrunk on structured types, so is a no-op + a = np.ma.array([(1, 2.0)], [('a', int), ('b', float)]) + b = a.copy() + a.shrink_mask() + assert_equal(a.mask, b.mask) + + def test_flat(self): + # Test that flat can return all types of items [#4585, #4615] + # test 2-D record array + # ... on structured array w/ masked records + x = array([[(1, 1.1, 'one'), (2, 2.2, 'two'), (3, 3.3, 'thr')], + [(4, 4.4, 'fou'), (5, 5.5, 'fiv'), (6, 6.6, 'six')]], + dtype=[('a', int), ('b', float), ('c', '|S8')]) + x['a'][0, 1] = masked + x['b'][1, 0] = masked + x['c'][0, 2] = masked + x[-1, -1] = masked + xflat = x.flat + assert_equal(xflat[0], x[0, 0]) + assert_equal(xflat[1], x[0, 1]) + assert_equal(xflat[2], x[0, 2]) + assert_equal(xflat[:3], x[0]) + assert_equal(xflat[3], x[1, 0]) + assert_equal(xflat[4], x[1, 1]) + assert_equal(xflat[5], x[1, 2]) + assert_equal(xflat[3:], x[1]) + assert_equal(xflat[-1], x[-1, -1]) + i = 0 + j = 0 + for xf in xflat: + assert_equal(xf, x[j, i]) + i += 1 + if i >= x.shape[-1]: + i = 0 + j += 1 + + def test_assign_dtype(self): + # check that the mask's dtype is updated when dtype is changed + a = np.zeros(4, dtype='f4,i4') + + m = np.ma.array(a) + m.dtype = np.dtype('f4') + repr(m) # raises? + assert_equal(m.dtype, np.dtype('f4')) + + # check that dtype changes that change shape of mask too much + # are not allowed + def assign(): + m = np.ma.array(a) + m.dtype = np.dtype('f8') + assert_raises(ValueError, assign) + + b = a.view(dtype='f4', type=np.ma.MaskedArray) # raises? + assert_equal(b.dtype, np.dtype('f4')) + + # check that nomask is preserved + a = np.zeros(4, dtype='f4') + m = np.ma.array(a) + m.dtype = np.dtype('f4,i4') + assert_equal(m.dtype, np.dtype('f4,i4')) + assert_equal(m._mask, np.ma.nomask) + + +class TestFillingValues: + + def test_check_on_scalar(self): + # Test _check_fill_value set to valid and invalid values + _check_fill_value = np.ma.core._check_fill_value + + fval = _check_fill_value(0, int) + assert_equal(fval, 0) + fval = _check_fill_value(None, int) + assert_equal(fval, default_fill_value(0)) + + fval = _check_fill_value(0, "|S3") + assert_equal(fval, b"0") + fval = _check_fill_value(None, "|S3") + assert_equal(fval, default_fill_value(b"camelot!")) + assert_raises(TypeError, _check_fill_value, 1e+20, int) + assert_raises(TypeError, _check_fill_value, 'stuff', int) + + def test_check_on_fields(self): + # Tests _check_fill_value with records + _check_fill_value = np.ma.core._check_fill_value + ndtype = [('a', int), ('b', float), ('c', "|S3")] + # A check on a list should return a single record + fval = _check_fill_value([-999, -12345678.9, "???"], ndtype) + assert_(isinstance(fval, ndarray)) + assert_equal(fval.item(), [-999, -12345678.9, b"???"]) + # A check on None should output the defaults + fval = _check_fill_value(None, ndtype) + assert_(isinstance(fval, ndarray)) + assert_equal(fval.item(), [default_fill_value(0), + default_fill_value(0.), + asbytes(default_fill_value("0"))]) + #.....Using a structured type as fill_value should work + fill_val = np.array((-999, -12345678.9, "???"), dtype=ndtype) + fval = _check_fill_value(fill_val, ndtype) + assert_(isinstance(fval, ndarray)) + assert_equal(fval.item(), [-999, -12345678.9, b"???"]) + + #.....Using a flexible type w/ a different type shouldn't matter + # BEHAVIOR in 1.5 and earlier, and 1.13 and later: match structured + # types by position + fill_val = np.array((-999, -12345678.9, "???"), + dtype=[("A", int), ("B", float), ("C", "|S3")]) + fval = _check_fill_value(fill_val, ndtype) + assert_(isinstance(fval, ndarray)) + assert_equal(fval.item(), [-999, -12345678.9, b"???"]) + + #.....Using an object-array shouldn't matter either + fill_val = np.ndarray(shape=(1,), dtype=object) + fill_val[0] = (-999, -12345678.9, b"???") + fval = _check_fill_value(fill_val, object) + assert_(isinstance(fval, ndarray)) + assert_equal(fval.item(), [-999, -12345678.9, b"???"]) + # NOTE: This test was never run properly as "fill_value" rather than + # "fill_val" was assigned. Written properly, it fails. + #fill_val = np.array((-999, -12345678.9, "???")) + #fval = _check_fill_value(fill_val, ndtype) + #assert_(isinstance(fval, ndarray)) + #assert_equal(fval.item(), [-999, -12345678.9, b"???"]) + #.....One-field-only flexible type should work as well + ndtype = [("a", int)] + fval = _check_fill_value(-999999999, ndtype) + assert_(isinstance(fval, ndarray)) + assert_equal(fval.item(), (-999999999,)) + + def test_fillvalue_conversion(self): + # Tests the behavior of fill_value during conversion + # We had a tailored comment to make sure special attributes are + # properly dealt with + a = array([b'3', b'4', b'5']) + a._optinfo.update({'comment':"updated!"}) + + b = array(a, dtype=int) + assert_equal(b._data, [3, 4, 5]) + assert_equal(b.fill_value, default_fill_value(0)) + + b = array(a, dtype=float) + assert_equal(b._data, [3, 4, 5]) + assert_equal(b.fill_value, default_fill_value(0.)) + + b = a.astype(int) + assert_equal(b._data, [3, 4, 5]) + assert_equal(b.fill_value, default_fill_value(0)) + assert_equal(b._optinfo['comment'], "updated!") + + b = a.astype([('a', '|S3')]) + assert_equal(b['a']._data, a._data) + assert_equal(b['a'].fill_value, a.fill_value) + + def test_default_fill_value(self): + # check all calling conventions + f1 = default_fill_value(1.) + f2 = default_fill_value(np.array(1.)) + f3 = default_fill_value(np.array(1.).dtype) + assert_equal(f1, f2) + assert_equal(f1, f3) + + def test_default_fill_value_structured(self): + fields = array([(1, 1, 1)], + dtype=[('i', int), ('s', '|S8'), ('f', float)]) + + f1 = default_fill_value(fields) + f2 = default_fill_value(fields.dtype) + expected = np.array((default_fill_value(0), + default_fill_value('0'), + default_fill_value(0.)), dtype=fields.dtype) + assert_equal(f1, expected) + assert_equal(f2, expected) + + def test_default_fill_value_void(self): + dt = np.dtype([('v', 'V7')]) + f = default_fill_value(dt) + assert_equal(f['v'], np.array(default_fill_value(dt['v']), dt['v'])) + + def test_fillvalue(self): + # Yet more fun with the fill_value + data = masked_array([1, 2, 3], fill_value=-999) + series = data[[0, 2, 1]] + assert_equal(series._fill_value, data._fill_value) + + mtype = [('f', float), ('s', '|S3')] + x = array([(1, 'a'), (2, 'b'), (pi, 'pi')], dtype=mtype) + x.fill_value = 999 + assert_equal(x.fill_value.item(), [999., b'999']) + assert_equal(x['f'].fill_value, 999) + assert_equal(x['s'].fill_value, b'999') + + x.fill_value = (9, '???') + assert_equal(x.fill_value.item(), (9, b'???')) + assert_equal(x['f'].fill_value, 9) + assert_equal(x['s'].fill_value, b'???') + + x = array([1, 2, 3.1]) + x.fill_value = 999 + assert_equal(np.asarray(x.fill_value).dtype, float) + assert_equal(x.fill_value, 999.) + assert_equal(x._fill_value, np.array(999.)) + + def test_subarray_fillvalue(self): + # gh-10483 test multi-field index fill value + fields = array([(1, 1, 1)], + dtype=[('i', int), ('s', '|S8'), ('f', float)]) + with suppress_warnings() as sup: + sup.filter(FutureWarning, "Numpy has detected") + subfields = fields[['i', 'f']] + assert_equal(tuple(subfields.fill_value), (999999, 1.e+20)) + # test comparison does not raise: + subfields[1:] == subfields[:-1] + + def test_fillvalue_exotic_dtype(self): + # Tests yet more exotic flexible dtypes + _check_fill_value = np.ma.core._check_fill_value + ndtype = [('i', int), ('s', '|S8'), ('f', float)] + control = np.array((default_fill_value(0), + default_fill_value('0'), + default_fill_value(0.),), + dtype=ndtype) + assert_equal(_check_fill_value(None, ndtype), control) + # The shape shouldn't matter + ndtype = [('f0', float, (2, 2))] + control = np.array((default_fill_value(0.),), + dtype=[('f0', float)]).astype(ndtype) + assert_equal(_check_fill_value(None, ndtype), control) + control = np.array((0,), dtype=[('f0', float)]).astype(ndtype) + assert_equal(_check_fill_value(0, ndtype), control) + + ndtype = np.dtype("int, (2,3)float, float") + control = np.array((default_fill_value(0), + default_fill_value(0.), + default_fill_value(0.),), + dtype="int, float, float").astype(ndtype) + test = _check_fill_value(None, ndtype) + assert_equal(test, control) + control = np.array((0, 0, 0), dtype="int, float, float").astype(ndtype) + assert_equal(_check_fill_value(0, ndtype), control) + # but when indexing, fill value should become scalar not tuple + # See issue #6723 + M = masked_array(control) + assert_equal(M["f1"].fill_value.ndim, 0) + + def test_fillvalue_datetime_timedelta(self): + # Test default fillvalue for datetime64 and timedelta64 types. + # See issue #4476, this would return '?' which would cause errors + # elsewhere + + for timecode in ("as", "fs", "ps", "ns", "us", "ms", "s", "m", + "h", "D", "W", "M", "Y"): + control = numpy.datetime64("NaT", timecode) + test = default_fill_value(numpy.dtype(" 0 + + # test different unary domains + sqrt(m) + log(m) + tan(m) + arcsin(m) + arccos(m) + arccosh(m) + + # test binary domains + divide(m, 2) + + # also check that allclose uses ma ufuncs, to avoid warning + allclose(m, 0.5) + +class TestMaskedArrayInPlaceArithmetic: + # Test MaskedArray Arithmetic + + def setup(self): + x = arange(10) + y = arange(10) + xm = arange(10) + xm[2] = masked + self.intdata = (x, y, xm) + self.floatdata = (x.astype(float), y.astype(float), xm.astype(float)) + self.othertypes = np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + self.othertypes = [np.dtype(_).type for _ in self.othertypes] + self.uint8data = ( + x.astype(np.uint8), + y.astype(np.uint8), + xm.astype(np.uint8) + ) + + def test_inplace_addition_scalar(self): + # Test of inplace additions + (x, y, xm) = self.intdata + xm[2] = masked + x += 1 + assert_equal(x, y + 1) + xm += 1 + assert_equal(xm, y + 1) + + (x, _, xm) = self.floatdata + id1 = x.data.ctypes.data + x += 1. + assert_(id1 == x.data.ctypes.data) + assert_equal(x, y + 1.) + + def test_inplace_addition_array(self): + # Test of inplace additions + (x, y, xm) = self.intdata + m = xm.mask + a = arange(10, dtype=np.int16) + a[-1] = masked + x += a + xm += a + assert_equal(x, y + a) + assert_equal(xm, y + a) + assert_equal(xm.mask, mask_or(m, a.mask)) + + def test_inplace_subtraction_scalar(self): + # Test of inplace subtractions + (x, y, xm) = self.intdata + x -= 1 + assert_equal(x, y - 1) + xm -= 1 + assert_equal(xm, y - 1) + + def test_inplace_subtraction_array(self): + # Test of inplace subtractions + (x, y, xm) = self.floatdata + m = xm.mask + a = arange(10, dtype=float) + a[-1] = masked + x -= a + xm -= a + assert_equal(x, y - a) + assert_equal(xm, y - a) + assert_equal(xm.mask, mask_or(m, a.mask)) + + def test_inplace_multiplication_scalar(self): + # Test of inplace multiplication + (x, y, xm) = self.floatdata + x *= 2.0 + assert_equal(x, y * 2) + xm *= 2.0 + assert_equal(xm, y * 2) + + def test_inplace_multiplication_array(self): + # Test of inplace multiplication + (x, y, xm) = self.floatdata + m = xm.mask + a = arange(10, dtype=float) + a[-1] = masked + x *= a + xm *= a + assert_equal(x, y * a) + assert_equal(xm, y * a) + assert_equal(xm.mask, mask_or(m, a.mask)) + + def test_inplace_division_scalar_int(self): + # Test of inplace division + (x, y, xm) = self.intdata + x = arange(10) * 2 + xm = arange(10) * 2 + xm[2] = masked + x //= 2 + assert_equal(x, y) + xm //= 2 + assert_equal(xm, y) + + def test_inplace_division_scalar_float(self): + # Test of inplace division + (x, y, xm) = self.floatdata + x /= 2.0 + assert_equal(x, y / 2.0) + xm /= arange(10) + assert_equal(xm, ones((10,))) + + def test_inplace_division_array_float(self): + # Test of inplace division + (x, y, xm) = self.floatdata + m = xm.mask + a = arange(10, dtype=float) + a[-1] = masked + x /= a + xm /= a + assert_equal(x, y / a) + assert_equal(xm, y / a) + assert_equal(xm.mask, mask_or(mask_or(m, a.mask), (a == 0))) + + def test_inplace_division_misc(self): + + x = [1., 1., 1., -2., pi / 2., 4., 5., -10., 10., 1., 2., 3.] + y = [5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.] + m1 = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + m2 = [0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1] + xm = masked_array(x, mask=m1) + ym = masked_array(y, mask=m2) + + z = xm / ym + assert_equal(z._mask, [1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1]) + assert_equal(z._data, + [1., 1., 1., -1., -pi / 2., 4., 5., 1., 1., 1., 2., 3.]) + + xm = xm.copy() + xm /= ym + assert_equal(xm._mask, [1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1]) + assert_equal(z._data, + [1., 1., 1., -1., -pi / 2., 4., 5., 1., 1., 1., 2., 3.]) + + def test_datafriendly_add(self): + # Test keeping data w/ (inplace) addition + x = array([1, 2, 3], mask=[0, 0, 1]) + # Test add w/ scalar + xx = x + 1 + assert_equal(xx.data, [2, 3, 3]) + assert_equal(xx.mask, [0, 0, 1]) + # Test iadd w/ scalar + x += 1 + assert_equal(x.data, [2, 3, 3]) + assert_equal(x.mask, [0, 0, 1]) + # Test add w/ array + x = array([1, 2, 3], mask=[0, 0, 1]) + xx = x + array([1, 2, 3], mask=[1, 0, 0]) + assert_equal(xx.data, [1, 4, 3]) + assert_equal(xx.mask, [1, 0, 1]) + # Test iadd w/ array + x = array([1, 2, 3], mask=[0, 0, 1]) + x += array([1, 2, 3], mask=[1, 0, 0]) + assert_equal(x.data, [1, 4, 3]) + assert_equal(x.mask, [1, 0, 1]) + + def test_datafriendly_sub(self): + # Test keeping data w/ (inplace) subtraction + # Test sub w/ scalar + x = array([1, 2, 3], mask=[0, 0, 1]) + xx = x - 1 + assert_equal(xx.data, [0, 1, 3]) + assert_equal(xx.mask, [0, 0, 1]) + # Test isub w/ scalar + x = array([1, 2, 3], mask=[0, 0, 1]) + x -= 1 + assert_equal(x.data, [0, 1, 3]) + assert_equal(x.mask, [0, 0, 1]) + # Test sub w/ array + x = array([1, 2, 3], mask=[0, 0, 1]) + xx = x - array([1, 2, 3], mask=[1, 0, 0]) + assert_equal(xx.data, [1, 0, 3]) + assert_equal(xx.mask, [1, 0, 1]) + # Test isub w/ array + x = array([1, 2, 3], mask=[0, 0, 1]) + x -= array([1, 2, 3], mask=[1, 0, 0]) + assert_equal(x.data, [1, 0, 3]) + assert_equal(x.mask, [1, 0, 1]) + + def test_datafriendly_mul(self): + # Test keeping data w/ (inplace) multiplication + # Test mul w/ scalar + x = array([1, 2, 3], mask=[0, 0, 1]) + xx = x * 2 + assert_equal(xx.data, [2, 4, 3]) + assert_equal(xx.mask, [0, 0, 1]) + # Test imul w/ scalar + x = array([1, 2, 3], mask=[0, 0, 1]) + x *= 2 + assert_equal(x.data, [2, 4, 3]) + assert_equal(x.mask, [0, 0, 1]) + # Test mul w/ array + x = array([1, 2, 3], mask=[0, 0, 1]) + xx = x * array([10, 20, 30], mask=[1, 0, 0]) + assert_equal(xx.data, [1, 40, 3]) + assert_equal(xx.mask, [1, 0, 1]) + # Test imul w/ array + x = array([1, 2, 3], mask=[0, 0, 1]) + x *= array([10, 20, 30], mask=[1, 0, 0]) + assert_equal(x.data, [1, 40, 3]) + assert_equal(x.mask, [1, 0, 1]) + + def test_datafriendly_div(self): + # Test keeping data w/ (inplace) division + # Test div on scalar + x = array([1, 2, 3], mask=[0, 0, 1]) + xx = x / 2. + assert_equal(xx.data, [1 / 2., 2 / 2., 3]) + assert_equal(xx.mask, [0, 0, 1]) + # Test idiv on scalar + x = array([1., 2., 3.], mask=[0, 0, 1]) + x /= 2. + assert_equal(x.data, [1 / 2., 2 / 2., 3]) + assert_equal(x.mask, [0, 0, 1]) + # Test div on array + x = array([1., 2., 3.], mask=[0, 0, 1]) + xx = x / array([10., 20., 30.], mask=[1, 0, 0]) + assert_equal(xx.data, [1., 2. / 20., 3.]) + assert_equal(xx.mask, [1, 0, 1]) + # Test idiv on array + x = array([1., 2., 3.], mask=[0, 0, 1]) + x /= array([10., 20., 30.], mask=[1, 0, 0]) + assert_equal(x.data, [1., 2 / 20., 3.]) + assert_equal(x.mask, [1, 0, 1]) + + def test_datafriendly_pow(self): + # Test keeping data w/ (inplace) power + # Test pow on scalar + x = array([1., 2., 3.], mask=[0, 0, 1]) + xx = x ** 2.5 + assert_equal(xx.data, [1., 2. ** 2.5, 3.]) + assert_equal(xx.mask, [0, 0, 1]) + # Test ipow on scalar + x **= 2.5 + assert_equal(x.data, [1., 2. ** 2.5, 3]) + assert_equal(x.mask, [0, 0, 1]) + + def test_datafriendly_add_arrays(self): + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 0]) + a += b + assert_equal(a, [[2, 2], [4, 4]]) + if a.mask is not nomask: + assert_equal(a.mask, [[0, 0], [0, 0]]) + + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 1]) + a += b + assert_equal(a, [[2, 2], [4, 4]]) + assert_equal(a.mask, [[0, 1], [0, 1]]) + + def test_datafriendly_sub_arrays(self): + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 0]) + a -= b + assert_equal(a, [[0, 0], [2, 2]]) + if a.mask is not nomask: + assert_equal(a.mask, [[0, 0], [0, 0]]) + + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 1]) + a -= b + assert_equal(a, [[0, 0], [2, 2]]) + assert_equal(a.mask, [[0, 1], [0, 1]]) + + def test_datafriendly_mul_arrays(self): + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 0]) + a *= b + assert_equal(a, [[1, 1], [3, 3]]) + if a.mask is not nomask: + assert_equal(a.mask, [[0, 0], [0, 0]]) + + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 1]) + a *= b + assert_equal(a, [[1, 1], [3, 3]]) + assert_equal(a.mask, [[0, 1], [0, 1]]) + + def test_inplace_addition_scalar_type(self): + # Test of inplace additions + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + xm[2] = masked + x += t(1) + assert_equal(x, y + t(1)) + xm += t(1) + assert_equal(xm, y + t(1)) + + assert_equal(len(w), 0, f'Failed on type={t}.') + + def test_inplace_addition_array_type(self): + # Test of inplace additions + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + m = xm.mask + a = arange(10, dtype=t) + a[-1] = masked + x += a + xm += a + assert_equal(x, y + a) + assert_equal(xm, y + a) + assert_equal(xm.mask, mask_or(m, a.mask)) + + assert_equal(len(w), 0, f'Failed on type={t}.') + + def test_inplace_subtraction_scalar_type(self): + # Test of inplace subtractions + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + x -= t(1) + assert_equal(x, y - t(1)) + xm -= t(1) + assert_equal(xm, y - t(1)) + + assert_equal(len(w), 0, f'Failed on type={t}.') + + def test_inplace_subtraction_array_type(self): + # Test of inplace subtractions + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + m = xm.mask + a = arange(10, dtype=t) + a[-1] = masked + x -= a + xm -= a + assert_equal(x, y - a) + assert_equal(xm, y - a) + assert_equal(xm.mask, mask_or(m, a.mask)) + + assert_equal(len(w), 0, f'Failed on type={t}.') + + def test_inplace_multiplication_scalar_type(self): + # Test of inplace multiplication + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + x *= t(2) + assert_equal(x, y * t(2)) + xm *= t(2) + assert_equal(xm, y * t(2)) + + assert_equal(len(w), 0, f'Failed on type={t}.') + + def test_inplace_multiplication_array_type(self): + # Test of inplace multiplication + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + m = xm.mask + a = arange(10, dtype=t) + a[-1] = masked + x *= a + xm *= a + assert_equal(x, y * a) + assert_equal(xm, y * a) + assert_equal(xm.mask, mask_or(m, a.mask)) + + assert_equal(len(w), 0, f'Failed on type={t}.') + + def test_inplace_floor_division_scalar_type(self): + # Test of inplace division + # Check for TypeError in case of unsupported types + unsupported = {np.dtype(t).type for t in np.typecodes["Complex"]} + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + x = arange(10, dtype=t) * t(2) + xm = arange(10, dtype=t) * t(2) + xm[2] = masked + try: + x //= t(2) + xm //= t(2) + assert_equal(x, y) + assert_equal(xm, y) + + assert_equal(len(w), 0, "Failed on type=%s." % t) + except TypeError: + msg = f"Supported type {t} throwing TypeError" + assert t in unsupported, msg + + def test_inplace_floor_division_array_type(self): + # Test of inplace division + # Check for TypeError in case of unsupported types + unsupported = {np.dtype(t).type for t in np.typecodes["Complex"]} + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + m = xm.mask + a = arange(10, dtype=t) + a[-1] = masked + try: + x //= a + xm //= a + assert_equal(x, y // a) + assert_equal(xm, y // a) + assert_equal( + xm.mask, + mask_or(mask_or(m, a.mask), (a == t(0))) + ) + + assert_equal(len(w), 0, f'Failed on type={t}.') + except TypeError: + msg = f"Supported type {t} throwing TypeError" + assert t in unsupported, msg + + def test_inplace_division_scalar_type(self): + # Test of inplace division + for t in self.othertypes: + with suppress_warnings() as sup: + sup.record(UserWarning) + + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + x = arange(10, dtype=t) * t(2) + xm = arange(10, dtype=t) * t(2) + xm[2] = masked + + # May get a DeprecationWarning or a TypeError. + # + # This is a consequence of the fact that this is true divide + # and will require casting to float for calculation and + # casting back to the original type. This will only be raised + # with integers. Whether it is an error or warning is only + # dependent on how stringent the casting rules are. + # + # Will handle the same way. + try: + x /= t(2) + assert_equal(x, y) + except (DeprecationWarning, TypeError) as e: + warnings.warn(str(e), stacklevel=1) + try: + xm /= t(2) + assert_equal(xm, y) + except (DeprecationWarning, TypeError) as e: + warnings.warn(str(e), stacklevel=1) + + if issubclass(t, np.integer): + assert_equal(len(sup.log), 2, f'Failed on type={t}.') + else: + assert_equal(len(sup.log), 0, f'Failed on type={t}.') + + def test_inplace_division_array_type(self): + # Test of inplace division + for t in self.othertypes: + with suppress_warnings() as sup: + sup.record(UserWarning) + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + m = xm.mask + a = arange(10, dtype=t) + a[-1] = masked + + # May get a DeprecationWarning or a TypeError. + # + # This is a consequence of the fact that this is true divide + # and will require casting to float for calculation and + # casting back to the original type. This will only be raised + # with integers. Whether it is an error or warning is only + # dependent on how stringent the casting rules are. + # + # Will handle the same way. + try: + x /= a + assert_equal(x, y / a) + except (DeprecationWarning, TypeError) as e: + warnings.warn(str(e), stacklevel=1) + try: + xm /= a + assert_equal(xm, y / a) + assert_equal( + xm.mask, + mask_or(mask_or(m, a.mask), (a == t(0))) + ) + except (DeprecationWarning, TypeError) as e: + warnings.warn(str(e), stacklevel=1) + + if issubclass(t, np.integer): + assert_equal(len(sup.log), 2, f'Failed on type={t}.') + else: + assert_equal(len(sup.log), 0, f'Failed on type={t}.') + + def test_inplace_pow_type(self): + # Test keeping data w/ (inplace) power + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + # Test pow on scalar + x = array([1, 2, 3], mask=[0, 0, 1], dtype=t) + xx = x ** t(2) + xx_r = array([1, 2 ** 2, 3], mask=[0, 0, 1], dtype=t) + assert_equal(xx.data, xx_r.data) + assert_equal(xx.mask, xx_r.mask) + # Test ipow on scalar + x **= t(2) + assert_equal(x.data, xx_r.data) + assert_equal(x.mask, xx_r.mask) + + assert_equal(len(w), 0, f'Failed on type={t}.') + + +class TestMaskedArrayMethods: + # Test class for miscellaneous MaskedArrays methods. + def setup(self): + # Base data definition. + x = np.array([8.375, 7.545, 8.828, 8.5, 1.757, 5.928, + 8.43, 7.78, 9.865, 5.878, 8.979, 4.732, + 3.012, 6.022, 5.095, 3.116, 5.238, 3.957, + 6.04, 9.63, 7.712, 3.382, 4.489, 6.479, + 7.189, 9.645, 5.395, 4.961, 9.894, 2.893, + 7.357, 9.828, 6.272, 3.758, 6.693, 0.993]) + X = x.reshape(6, 6) + XX = x.reshape(3, 2, 2, 3) + + m = np.array([0, 1, 0, 1, 0, 0, + 1, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 0, 0, + 0, 0, 1, 0, 1, 0]) + mx = array(data=x, mask=m) + mX = array(data=X, mask=m.reshape(X.shape)) + mXX = array(data=XX, mask=m.reshape(XX.shape)) + + m2 = np.array([1, 1, 0, 1, 0, 0, + 1, 1, 1, 1, 0, 1, + 0, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 1, 0, + 0, 0, 1, 0, 1, 1]) + m2x = array(data=x, mask=m2) + m2X = array(data=X, mask=m2.reshape(X.shape)) + m2XX = array(data=XX, mask=m2.reshape(XX.shape)) + self.d = (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) + + def test_generic_methods(self): + # Tests some MaskedArray methods. + a = array([1, 3, 2]) + assert_equal(a.any(), a._data.any()) + assert_equal(a.all(), a._data.all()) + assert_equal(a.argmax(), a._data.argmax()) + assert_equal(a.argmin(), a._data.argmin()) + assert_equal(a.choose(0, 1, 2, 3, 4), a._data.choose(0, 1, 2, 3, 4)) + assert_equal(a.compress([1, 0, 1]), a._data.compress([1, 0, 1])) + assert_equal(a.conj(), a._data.conj()) + assert_equal(a.conjugate(), a._data.conjugate()) + + m = array([[1, 2], [3, 4]]) + assert_equal(m.diagonal(), m._data.diagonal()) + assert_equal(a.sum(), a._data.sum()) + assert_equal(a.take([1, 2]), a._data.take([1, 2])) + assert_equal(m.transpose(), m._data.transpose()) + + def test_allclose(self): + # Tests allclose on arrays + a = np.random.rand(10) + b = a + np.random.rand(10) * 1e-8 + assert_(allclose(a, b)) + # Test allclose w/ infs + a[0] = np.inf + assert_(not allclose(a, b)) + b[0] = np.inf + assert_(allclose(a, b)) + # Test allclose w/ masked + a = masked_array(a) + a[-1] = masked + assert_(allclose(a, b, masked_equal=True)) + assert_(not allclose(a, b, masked_equal=False)) + # Test comparison w/ scalar + a *= 1e-8 + a[0] = 0 + assert_(allclose(a, 0, masked_equal=True)) + + # Test that the function works for MIN_INT integer typed arrays + a = masked_array([np.iinfo(np.int_).min], dtype=np.int_) + assert_(allclose(a, a)) + + def test_allclose_timedelta(self): + # Allclose currently works for timedelta64 as long as `atol` is + # an integer or also a timedelta64 + a = np.array([[1, 2, 3, 4]], dtype="m8[ns]") + assert allclose(a, a, atol=0) + assert allclose(a, a, atol=np.timedelta64(1, "ns")) + + def test_allany(self): + # Checks the any/all methods/functions. + x = np.array([[0.13, 0.26, 0.90], + [0.28, 0.33, 0.63], + [0.31, 0.87, 0.70]]) + m = np.array([[True, False, False], + [False, False, False], + [True, True, False]], dtype=np.bool_) + mx = masked_array(x, mask=m) + mxbig = (mx > 0.5) + mxsmall = (mx < 0.5) + + assert_(not mxbig.all()) + assert_(mxbig.any()) + assert_equal(mxbig.all(0), [False, False, True]) + assert_equal(mxbig.all(1), [False, False, True]) + assert_equal(mxbig.any(0), [False, False, True]) + assert_equal(mxbig.any(1), [True, True, True]) + + assert_(not mxsmall.all()) + assert_(mxsmall.any()) + assert_equal(mxsmall.all(0), [True, True, False]) + assert_equal(mxsmall.all(1), [False, False, False]) + assert_equal(mxsmall.any(0), [True, True, False]) + assert_equal(mxsmall.any(1), [True, True, False]) + + def test_allany_oddities(self): + # Some fun with all and any + store = empty((), dtype=bool) + full = array([1, 2, 3], mask=True) + + assert_(full.all() is masked) + full.all(out=store) + assert_(store) + assert_(store._mask, True) + assert_(store is not masked) + + store = empty((), dtype=bool) + assert_(full.any() is masked) + full.any(out=store) + assert_(not store) + assert_(store._mask, True) + assert_(store is not masked) + + def test_argmax_argmin(self): + # Tests argmin & argmax on MaskedArrays. + (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) = self.d + + assert_equal(mx.argmin(), 35) + assert_equal(mX.argmin(), 35) + assert_equal(m2x.argmin(), 4) + assert_equal(m2X.argmin(), 4) + assert_equal(mx.argmax(), 28) + assert_equal(mX.argmax(), 28) + assert_equal(m2x.argmax(), 31) + assert_equal(m2X.argmax(), 31) + + assert_equal(mX.argmin(0), [2, 2, 2, 5, 0, 5]) + assert_equal(m2X.argmin(0), [2, 2, 4, 5, 0, 4]) + assert_equal(mX.argmax(0), [0, 5, 0, 5, 4, 0]) + assert_equal(m2X.argmax(0), [5, 5, 0, 5, 1, 0]) + + assert_equal(mX.argmin(1), [4, 1, 0, 0, 5, 5, ]) + assert_equal(m2X.argmin(1), [4, 4, 0, 0, 5, 3]) + assert_equal(mX.argmax(1), [2, 4, 1, 1, 4, 1]) + assert_equal(m2X.argmax(1), [2, 4, 1, 1, 1, 1]) + + def test_clip(self): + # Tests clip on MaskedArrays. + x = np.array([8.375, 7.545, 8.828, 8.5, 1.757, 5.928, + 8.43, 7.78, 9.865, 5.878, 8.979, 4.732, + 3.012, 6.022, 5.095, 3.116, 5.238, 3.957, + 6.04, 9.63, 7.712, 3.382, 4.489, 6.479, + 7.189, 9.645, 5.395, 4.961, 9.894, 2.893, + 7.357, 9.828, 6.272, 3.758, 6.693, 0.993]) + m = np.array([0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0]) + mx = array(x, mask=m) + clipped = mx.clip(2, 8) + assert_equal(clipped.mask, mx.mask) + assert_equal(clipped._data, x.clip(2, 8)) + assert_equal(clipped._data, mx._data.clip(2, 8)) + + def test_clip_out(self): + # gh-14140 + a = np.arange(10) + m = np.ma.MaskedArray(a, mask=[0, 1] * 5) + m.clip(0, 5, out=m) + assert_equal(m.mask, [0, 1] * 5) + + def test_compress(self): + # test compress + a = masked_array([1., 2., 3., 4., 5.], fill_value=9999) + condition = (a > 1.5) & (a < 3.5) + assert_equal(a.compress(condition), [2., 3.]) + + a[[2, 3]] = masked + b = a.compress(condition) + assert_equal(b._data, [2., 3.]) + assert_equal(b._mask, [0, 1]) + assert_equal(b.fill_value, 9999) + assert_equal(b, a[condition]) + + condition = (a < 4.) + b = a.compress(condition) + assert_equal(b._data, [1., 2., 3.]) + assert_equal(b._mask, [0, 0, 1]) + assert_equal(b.fill_value, 9999) + assert_equal(b, a[condition]) + + a = masked_array([[10, 20, 30], [40, 50, 60]], + mask=[[0, 0, 1], [1, 0, 0]]) + b = a.compress(a.ravel() >= 22) + assert_equal(b._data, [30, 40, 50, 60]) + assert_equal(b._mask, [1, 1, 0, 0]) + + x = np.array([3, 1, 2]) + b = a.compress(x >= 2, axis=1) + assert_equal(b._data, [[10, 30], [40, 60]]) + assert_equal(b._mask, [[0, 1], [1, 0]]) + + def test_compressed(self): + # Tests compressed + a = array([1, 2, 3, 4], mask=[0, 0, 0, 0]) + b = a.compressed() + assert_equal(b, a) + a[0] = masked + b = a.compressed() + assert_equal(b, [2, 3, 4]) + + def test_empty(self): + # Tests empty/like + datatype = [('a', int), ('b', float), ('c', '|S8')] + a = masked_array([(1, 1.1, '1.1'), (2, 2.2, '2.2'), (3, 3.3, '3.3')], + dtype=datatype) + assert_equal(len(a.fill_value.item()), len(datatype)) + + b = empty_like(a) + assert_equal(b.shape, a.shape) + assert_equal(b.fill_value, a.fill_value) + + b = empty(len(a), dtype=datatype) + assert_equal(b.shape, a.shape) + assert_equal(b.fill_value, a.fill_value) + + # check empty_like mask handling + a = masked_array([1, 2, 3], mask=[False, True, False]) + b = empty_like(a) + assert_(not np.may_share_memory(a.mask, b.mask)) + b = a.view(masked_array) + assert_(np.may_share_memory(a.mask, b.mask)) + + def test_zeros(self): + # Tests zeros/like + datatype = [('a', int), ('b', float), ('c', '|S8')] + a = masked_array([(1, 1.1, '1.1'), (2, 2.2, '2.2'), (3, 3.3, '3.3')], + dtype=datatype) + assert_equal(len(a.fill_value.item()), len(datatype)) + + b = zeros(len(a), dtype=datatype) + assert_equal(b.shape, a.shape) + assert_equal(b.fill_value, a.fill_value) + + b = zeros_like(a) + assert_equal(b.shape, a.shape) + assert_equal(b.fill_value, a.fill_value) + + # check zeros_like mask handling + a = masked_array([1, 2, 3], mask=[False, True, False]) + b = zeros_like(a) + assert_(not np.may_share_memory(a.mask, b.mask)) + b = a.view() + assert_(np.may_share_memory(a.mask, b.mask)) + + def test_ones(self): + # Tests ones/like + datatype = [('a', int), ('b', float), ('c', '|S8')] + a = masked_array([(1, 1.1, '1.1'), (2, 2.2, '2.2'), (3, 3.3, '3.3')], + dtype=datatype) + assert_equal(len(a.fill_value.item()), len(datatype)) + + b = ones(len(a), dtype=datatype) + assert_equal(b.shape, a.shape) + assert_equal(b.fill_value, a.fill_value) + + b = ones_like(a) + assert_equal(b.shape, a.shape) + assert_equal(b.fill_value, a.fill_value) + + # check ones_like mask handling + a = masked_array([1, 2, 3], mask=[False, True, False]) + b = ones_like(a) + assert_(not np.may_share_memory(a.mask, b.mask)) + b = a.view() + assert_(np.may_share_memory(a.mask, b.mask)) + + @suppress_copy_mask_on_assignment + def test_put(self): + # Tests put. + d = arange(5) + n = [0, 0, 0, 1, 1] + m = make_mask(n) + x = array(d, mask=m) + assert_(x[3] is masked) + assert_(x[4] is masked) + x[[1, 4]] = [10, 40] + assert_(x[3] is masked) + assert_(x[4] is not masked) + assert_equal(x, [0, 10, 2, -1, 40]) + + x = masked_array(arange(10), mask=[1, 0, 0, 0, 0] * 2) + i = [0, 2, 4, 6] + x.put(i, [6, 4, 2, 0]) + assert_equal(x, asarray([6, 1, 4, 3, 2, 5, 0, 7, 8, 9, ])) + assert_equal(x.mask, [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]) + x.put(i, masked_array([0, 2, 4, 6], [1, 0, 1, 0])) + assert_array_equal(x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ]) + assert_equal(x.mask, [1, 0, 0, 0, 1, 1, 0, 0, 0, 0]) + + x = masked_array(arange(10), mask=[1, 0, 0, 0, 0] * 2) + put(x, i, [6, 4, 2, 0]) + assert_equal(x, asarray([6, 1, 4, 3, 2, 5, 0, 7, 8, 9, ])) + assert_equal(x.mask, [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]) + put(x, i, masked_array([0, 2, 4, 6], [1, 0, 1, 0])) + assert_array_equal(x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ]) + assert_equal(x.mask, [1, 0, 0, 0, 1, 1, 0, 0, 0, 0]) + + def test_put_nomask(self): + # GitHub issue 6425 + x = zeros(10) + z = array([3., -1.], mask=[False, True]) + + x.put([1, 2], z) + assert_(x[0] is not masked) + assert_equal(x[0], 0) + assert_(x[1] is not masked) + assert_equal(x[1], 3) + assert_(x[2] is masked) + assert_(x[3] is not masked) + assert_equal(x[3], 0) + + def test_put_hardmask(self): + # Tests put on hardmask + d = arange(5) + n = [0, 0, 0, 1, 1] + m = make_mask(n) + xh = array(d + 1, mask=m, hard_mask=True, copy=True) + xh.put([4, 2, 0, 1, 3], [1, 2, 3, 4, 5]) + assert_equal(xh._data, [3, 4, 2, 4, 5]) + + def test_putmask(self): + x = arange(6) + 1 + mx = array(x, mask=[0, 0, 0, 1, 1, 1]) + mask = [0, 0, 1, 0, 0, 1] + # w/o mask, w/o masked values + xx = x.copy() + putmask(xx, mask, 99) + assert_equal(xx, [1, 2, 99, 4, 5, 99]) + # w/ mask, w/o masked values + mxx = mx.copy() + putmask(mxx, mask, 99) + assert_equal(mxx._data, [1, 2, 99, 4, 5, 99]) + assert_equal(mxx._mask, [0, 0, 0, 1, 1, 0]) + # w/o mask, w/ masked values + values = array([10, 20, 30, 40, 50, 60], mask=[1, 1, 1, 0, 0, 0]) + xx = x.copy() + putmask(xx, mask, values) + assert_equal(xx._data, [1, 2, 30, 4, 5, 60]) + assert_equal(xx._mask, [0, 0, 1, 0, 0, 0]) + # w/ mask, w/ masked values + mxx = mx.copy() + putmask(mxx, mask, values) + assert_equal(mxx._data, [1, 2, 30, 4, 5, 60]) + assert_equal(mxx._mask, [0, 0, 1, 1, 1, 0]) + # w/ mask, w/ masked values + hardmask + mxx = mx.copy() + mxx.harden_mask() + putmask(mxx, mask, values) + assert_equal(mxx, [1, 2, 30, 4, 5, 60]) + + def test_ravel(self): + # Tests ravel + a = array([[1, 2, 3, 4, 5]], mask=[[0, 1, 0, 0, 0]]) + aravel = a.ravel() + assert_equal(aravel._mask.shape, aravel.shape) + a = array([0, 0], mask=[1, 1]) + aravel = a.ravel() + assert_equal(aravel._mask.shape, a.shape) + # Checks that small_mask is preserved + a = array([1, 2, 3, 4], mask=[0, 0, 0, 0], shrink=False) + assert_equal(a.ravel()._mask, [0, 0, 0, 0]) + # Test that the fill_value is preserved + a.fill_value = -99 + a.shape = (2, 2) + ar = a.ravel() + assert_equal(ar._mask, [0, 0, 0, 0]) + assert_equal(ar._data, [1, 2, 3, 4]) + assert_equal(ar.fill_value, -99) + # Test index ordering + assert_equal(a.ravel(order='C'), [1, 2, 3, 4]) + assert_equal(a.ravel(order='F'), [1, 3, 2, 4]) + + def test_reshape(self): + # Tests reshape + x = arange(4) + x[0] = masked + y = x.reshape(2, 2) + assert_equal(y.shape, (2, 2,)) + assert_equal(y._mask.shape, (2, 2,)) + assert_equal(x.shape, (4,)) + assert_equal(x._mask.shape, (4,)) + + def test_sort(self): + # Test sort + x = array([1, 4, 2, 3], mask=[0, 1, 0, 0], dtype=np.uint8) + + sortedx = sort(x) + assert_equal(sortedx._data, [1, 2, 3, 4]) + assert_equal(sortedx._mask, [0, 0, 0, 1]) + + sortedx = sort(x, endwith=False) + assert_equal(sortedx._data, [4, 1, 2, 3]) + assert_equal(sortedx._mask, [1, 0, 0, 0]) + + x.sort() + assert_equal(x._data, [1, 2, 3, 4]) + assert_equal(x._mask, [0, 0, 0, 1]) + + x = array([1, 4, 2, 3], mask=[0, 1, 0, 0], dtype=np.uint8) + x.sort(endwith=False) + assert_equal(x._data, [4, 1, 2, 3]) + assert_equal(x._mask, [1, 0, 0, 0]) + + x = [1, 4, 2, 3] + sortedx = sort(x) + assert_(not isinstance(sorted, MaskedArray)) + + x = array([0, 1, -1, -2, 2], mask=nomask, dtype=np.int8) + sortedx = sort(x, endwith=False) + assert_equal(sortedx._data, [-2, -1, 0, 1, 2]) + x = array([0, 1, -1, -2, 2], mask=[0, 1, 0, 0, 1], dtype=np.int8) + sortedx = sort(x, endwith=False) + assert_equal(sortedx._data, [1, 2, -2, -1, 0]) + assert_equal(sortedx._mask, [1, 1, 0, 0, 0]) + + x = array([0, -1], dtype=np.int8) + sortedx = sort(x, kind="stable") + assert_equal(sortedx, array([-1, 0], dtype=np.int8)) + + def test_stable_sort(self): + x = array([1, 2, 3, 1, 2, 3], dtype=np.uint8) + expected = array([0, 3, 1, 4, 2, 5]) + computed = argsort(x, kind='stable') + assert_equal(computed, expected) + + def test_argsort_matches_sort(self): + x = array([1, 4, 2, 3], mask=[0, 1, 0, 0], dtype=np.uint8) + + for kwargs in [dict(), + dict(endwith=True), + dict(endwith=False), + dict(fill_value=2), + dict(fill_value=2, endwith=True), + dict(fill_value=2, endwith=False)]: + sortedx = sort(x, **kwargs) + argsortedx = x[argsort(x, **kwargs)] + assert_equal(sortedx._data, argsortedx._data) + assert_equal(sortedx._mask, argsortedx._mask) + + def test_sort_2d(self): + # Check sort of 2D array. + # 2D array w/o mask + a = masked_array([[8, 4, 1], [2, 0, 9]]) + a.sort(0) + assert_equal(a, [[2, 0, 1], [8, 4, 9]]) + a = masked_array([[8, 4, 1], [2, 0, 9]]) + a.sort(1) + assert_equal(a, [[1, 4, 8], [0, 2, 9]]) + # 2D array w/mask + a = masked_array([[8, 4, 1], [2, 0, 9]], mask=[[1, 0, 0], [0, 0, 1]]) + a.sort(0) + assert_equal(a, [[2, 0, 1], [8, 4, 9]]) + assert_equal(a._mask, [[0, 0, 0], [1, 0, 1]]) + a = masked_array([[8, 4, 1], [2, 0, 9]], mask=[[1, 0, 0], [0, 0, 1]]) + a.sort(1) + assert_equal(a, [[1, 4, 8], [0, 2, 9]]) + assert_equal(a._mask, [[0, 0, 1], [0, 0, 1]]) + # 3D + a = masked_array([[[7, 8, 9], [4, 5, 6], [1, 2, 3]], + [[1, 2, 3], [7, 8, 9], [4, 5, 6]], + [[7, 8, 9], [1, 2, 3], [4, 5, 6]], + [[4, 5, 6], [1, 2, 3], [7, 8, 9]]]) + a[a % 4 == 0] = masked + am = a.copy() + an = a.filled(99) + am.sort(0) + an.sort(0) + assert_equal(am, an) + am = a.copy() + an = a.filled(99) + am.sort(1) + an.sort(1) + assert_equal(am, an) + am = a.copy() + an = a.filled(99) + am.sort(2) + an.sort(2) + assert_equal(am, an) + + def test_sort_flexible(self): + # Test sort on structured dtype. + a = array( + data=[(3, 3), (3, 2), (2, 2), (2, 1), (1, 0), (1, 1), (1, 2)], + mask=[(0, 0), (0, 1), (0, 0), (0, 0), (1, 0), (0, 0), (0, 0)], + dtype=[('A', int), ('B', int)]) + mask_last = array( + data=[(1, 1), (1, 2), (2, 1), (2, 2), (3, 3), (3, 2), (1, 0)], + mask=[(0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 1), (1, 0)], + dtype=[('A', int), ('B', int)]) + mask_first = array( + data=[(1, 0), (1, 1), (1, 2), (2, 1), (2, 2), (3, 2), (3, 3)], + mask=[(1, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 1), (0, 0)], + dtype=[('A', int), ('B', int)]) + + test = sort(a) + assert_equal(test, mask_last) + assert_equal(test.mask, mask_last.mask) + + test = sort(a, endwith=False) + assert_equal(test, mask_first) + assert_equal(test.mask, mask_first.mask) + + # Test sort on dtype with subarray (gh-8069) + # Just check that the sort does not error, structured array subarrays + # are treated as byte strings and that leads to differing behavior + # depending on endianness and `endwith`. + dt = np.dtype([('v', int, 2)]) + a = a.view(dt) + test = sort(a) + test = sort(a, endwith=False) + + def test_argsort(self): + # Test argsort + a = array([1, 5, 2, 4, 3], mask=[1, 0, 0, 1, 0]) + assert_equal(np.argsort(a), argsort(a)) + + def test_squeeze(self): + # Check squeeze + data = masked_array([[1, 2, 3]]) + assert_equal(data.squeeze(), [1, 2, 3]) + data = masked_array([[1, 2, 3]], mask=[[1, 1, 1]]) + assert_equal(data.squeeze(), [1, 2, 3]) + assert_equal(data.squeeze()._mask, [1, 1, 1]) + + # normal ndarrays return a view + arr = np.array([[1]]) + arr_sq = arr.squeeze() + assert_equal(arr_sq, 1) + arr_sq[...] = 2 + assert_equal(arr[0,0], 2) + + # so maskedarrays should too + m_arr = masked_array([[1]], mask=True) + m_arr_sq = m_arr.squeeze() + assert_(m_arr_sq is not np.ma.masked) + assert_equal(m_arr_sq.mask, True) + m_arr_sq[...] = 2 + assert_equal(m_arr[0,0], 2) + + def test_swapaxes(self): + # Tests swapaxes on MaskedArrays. + x = np.array([8.375, 7.545, 8.828, 8.5, 1.757, 5.928, + 8.43, 7.78, 9.865, 5.878, 8.979, 4.732, + 3.012, 6.022, 5.095, 3.116, 5.238, 3.957, + 6.04, 9.63, 7.712, 3.382, 4.489, 6.479, + 7.189, 9.645, 5.395, 4.961, 9.894, 2.893, + 7.357, 9.828, 6.272, 3.758, 6.693, 0.993]) + m = np.array([0, 1, 0, 1, 0, 0, + 1, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 0, 0, + 0, 0, 1, 0, 1, 0]) + mX = array(x, mask=m).reshape(6, 6) + mXX = mX.reshape(3, 2, 2, 3) + + mXswapped = mX.swapaxes(0, 1) + assert_equal(mXswapped[-1], mX[:, -1]) + + mXXswapped = mXX.swapaxes(0, 2) + assert_equal(mXXswapped.shape, (2, 2, 3, 3)) + + def test_take(self): + # Tests take + x = masked_array([10, 20, 30, 40], [0, 1, 0, 1]) + assert_equal(x.take([0, 0, 3]), masked_array([10, 10, 40], [0, 0, 1])) + assert_equal(x.take([0, 0, 3]), x[[0, 0, 3]]) + assert_equal(x.take([[0, 1], [0, 1]]), + masked_array([[10, 20], [10, 20]], [[0, 1], [0, 1]])) + + # assert_equal crashes when passed np.ma.mask + assert_(x[1] is np.ma.masked) + assert_(x.take(1) is np.ma.masked) + + x = array([[10, 20, 30], [40, 50, 60]], mask=[[0, 0, 1], [1, 0, 0, ]]) + assert_equal(x.take([0, 2], axis=1), + array([[10, 30], [40, 60]], mask=[[0, 1], [1, 0]])) + assert_equal(take(x, [0, 2], axis=1), + array([[10, 30], [40, 60]], mask=[[0, 1], [1, 0]])) + + def test_take_masked_indices(self): + # Test take w/ masked indices + a = np.array((40, 18, 37, 9, 22)) + indices = np.arange(3)[None,:] + np.arange(5)[:, None] + mindices = array(indices, mask=(indices >= len(a))) + # No mask + test = take(a, mindices, mode='clip') + ctrl = array([[40, 18, 37], + [18, 37, 9], + [37, 9, 22], + [9, 22, 22], + [22, 22, 22]]) + assert_equal(test, ctrl) + # Masked indices + test = take(a, mindices) + ctrl = array([[40, 18, 37], + [18, 37, 9], + [37, 9, 22], + [9, 22, 40], + [22, 40, 40]]) + ctrl[3, 2] = ctrl[4, 1] = ctrl[4, 2] = masked + assert_equal(test, ctrl) + assert_equal(test.mask, ctrl.mask) + # Masked input + masked indices + a = array((40, 18, 37, 9, 22), mask=(0, 1, 0, 0, 0)) + test = take(a, mindices) + ctrl[0, 1] = ctrl[1, 0] = masked + assert_equal(test, ctrl) + assert_equal(test.mask, ctrl.mask) + + def test_tolist(self): + # Tests to list + # ... on 1D + x = array(np.arange(12)) + x[[1, -2]] = masked + xlist = x.tolist() + assert_(xlist[1] is None) + assert_(xlist[-2] is None) + # ... on 2D + x.shape = (3, 4) + xlist = x.tolist() + ctrl = [[0, None, 2, 3], [4, 5, 6, 7], [8, 9, None, 11]] + assert_equal(xlist[0], [0, None, 2, 3]) + assert_equal(xlist[1], [4, 5, 6, 7]) + assert_equal(xlist[2], [8, 9, None, 11]) + assert_equal(xlist, ctrl) + # ... on structured array w/ masked records + x = array(list(zip([1, 2, 3], + [1.1, 2.2, 3.3], + ['one', 'two', 'thr'])), + dtype=[('a', int), ('b', float), ('c', '|S8')]) + x[-1] = masked + assert_equal(x.tolist(), + [(1, 1.1, b'one'), + (2, 2.2, b'two'), + (None, None, None)]) + # ... on structured array w/ masked fields + a = array([(1, 2,), (3, 4)], mask=[(0, 1), (0, 0)], + dtype=[('a', int), ('b', int)]) + test = a.tolist() + assert_equal(test, [[1, None], [3, 4]]) + # ... on mvoid + a = a[0] + test = a.tolist() + assert_equal(test, [1, None]) + + def test_tolist_specialcase(self): + # Test mvoid.tolist: make sure we return a standard Python object + a = array([(0, 1), (2, 3)], dtype=[('a', int), ('b', int)]) + # w/o mask: each entry is a np.void whose elements are standard Python + for entry in a: + for item in entry.tolist(): + assert_(not isinstance(item, np.generic)) + # w/ mask: each entry is a ma.void whose elements should be + # standard Python + a.mask[0] = (0, 1) + for entry in a: + for item in entry.tolist(): + assert_(not isinstance(item, np.generic)) + + def test_toflex(self): + # Test the conversion to records + data = arange(10) + record = data.toflex() + assert_equal(record['_data'], data._data) + assert_equal(record['_mask'], data._mask) + + data[[0, 1, 2, -1]] = masked + record = data.toflex() + assert_equal(record['_data'], data._data) + assert_equal(record['_mask'], data._mask) + + ndtype = [('i', int), ('s', '|S3'), ('f', float)] + data = array([(i, s, f) for (i, s, f) in zip(np.arange(10), + 'ABCDEFGHIJKLM', + np.random.rand(10))], + dtype=ndtype) + data[[0, 1, 2, -1]] = masked + record = data.toflex() + assert_equal(record['_data'], data._data) + assert_equal(record['_mask'], data._mask) + + ndtype = np.dtype("int, (2,3)float, float") + data = array([(i, f, ff) for (i, f, ff) in zip(np.arange(10), + np.random.rand(10), + np.random.rand(10))], + dtype=ndtype) + data[[0, 1, 2, -1]] = masked + record = data.toflex() + assert_equal_records(record['_data'], data._data) + assert_equal_records(record['_mask'], data._mask) + + def test_fromflex(self): + # Test the reconstruction of a masked_array from a record + a = array([1, 2, 3]) + test = fromflex(a.toflex()) + assert_equal(test, a) + assert_equal(test.mask, a.mask) + + a = array([1, 2, 3], mask=[0, 0, 1]) + test = fromflex(a.toflex()) + assert_equal(test, a) + assert_equal(test.mask, a.mask) + + a = array([(1, 1.), (2, 2.), (3, 3.)], mask=[(1, 0), (0, 0), (0, 1)], + dtype=[('A', int), ('B', float)]) + test = fromflex(a.toflex()) + assert_equal(test, a) + assert_equal(test.data, a.data) + + def test_arraymethod(self): + # Test a _arraymethod w/ n argument + marray = masked_array([[1, 2, 3, 4, 5]], mask=[0, 0, 1, 0, 0]) + control = masked_array([[1], [2], [3], [4], [5]], + mask=[0, 0, 1, 0, 0]) + assert_equal(marray.T, control) + assert_equal(marray.transpose(), control) + + assert_equal(MaskedArray.cumsum(marray.T, 0), control.cumsum(0)) + + def test_arraymethod_0d(self): + # gh-9430 + x = np.ma.array(42, mask=True) + assert_equal(x.T.mask, x.mask) + assert_equal(x.T.data, x.data) + + def test_transpose_view(self): + x = np.ma.array([[1, 2, 3], [4, 5, 6]]) + x[0,1] = np.ma.masked + xt = x.T + + xt[1,0] = 10 + xt[0,1] = np.ma.masked + + assert_equal(x.data, xt.T.data) + assert_equal(x.mask, xt.T.mask) + + def test_diagonal_view(self): + x = np.ma.zeros((3,3)) + x[0,0] = 10 + x[1,1] = np.ma.masked + x[2,2] = 20 + xd = x.diagonal() + x[1,1] = 15 + assert_equal(xd.mask, x.diagonal().mask) + assert_equal(xd.data, x.diagonal().data) + + +class TestMaskedArrayMathMethods: + + def setup(self): + # Base data definition. + x = np.array([8.375, 7.545, 8.828, 8.5, 1.757, 5.928, + 8.43, 7.78, 9.865, 5.878, 8.979, 4.732, + 3.012, 6.022, 5.095, 3.116, 5.238, 3.957, + 6.04, 9.63, 7.712, 3.382, 4.489, 6.479, + 7.189, 9.645, 5.395, 4.961, 9.894, 2.893, + 7.357, 9.828, 6.272, 3.758, 6.693, 0.993]) + X = x.reshape(6, 6) + XX = x.reshape(3, 2, 2, 3) + + m = np.array([0, 1, 0, 1, 0, 0, + 1, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 0, 0, + 0, 0, 1, 0, 1, 0]) + mx = array(data=x, mask=m) + mX = array(data=X, mask=m.reshape(X.shape)) + mXX = array(data=XX, mask=m.reshape(XX.shape)) + + m2 = np.array([1, 1, 0, 1, 0, 0, + 1, 1, 1, 1, 0, 1, + 0, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 1, 0, + 0, 0, 1, 0, 1, 1]) + m2x = array(data=x, mask=m2) + m2X = array(data=X, mask=m2.reshape(X.shape)) + m2XX = array(data=XX, mask=m2.reshape(XX.shape)) + self.d = (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) + + def test_cumsumprod(self): + # Tests cumsum & cumprod on MaskedArrays. + (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) = self.d + mXcp = mX.cumsum(0) + assert_equal(mXcp._data, mX.filled(0).cumsum(0)) + mXcp = mX.cumsum(1) + assert_equal(mXcp._data, mX.filled(0).cumsum(1)) + + mXcp = mX.cumprod(0) + assert_equal(mXcp._data, mX.filled(1).cumprod(0)) + mXcp = mX.cumprod(1) + assert_equal(mXcp._data, mX.filled(1).cumprod(1)) + + def test_cumsumprod_with_output(self): + # Tests cumsum/cumprod w/ output + xm = array(np.random.uniform(0, 10, 12)).reshape(3, 4) + xm[:, 0] = xm[0] = xm[-1, -1] = masked + + for funcname in ('cumsum', 'cumprod'): + npfunc = getattr(np, funcname) + xmmeth = getattr(xm, funcname) + + # A ndarray as explicit input + output = np.empty((3, 4), dtype=float) + output.fill(-9999) + result = npfunc(xm, axis=0, out=output) + # ... the result should be the given output + assert_(result is output) + assert_equal(result, xmmeth(axis=0, out=output)) + + output = empty((3, 4), dtype=int) + result = xmmeth(axis=0, out=output) + assert_(result is output) + + def test_ptp(self): + # Tests ptp on MaskedArrays. + (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) = self.d + (n, m) = X.shape + assert_equal(mx.ptp(), mx.compressed().ptp()) + rows = np.zeros(n, float) + cols = np.zeros(m, float) + for k in range(m): + cols[k] = mX[:, k].compressed().ptp() + for k in range(n): + rows[k] = mX[k].compressed().ptp() + assert_equal(mX.ptp(0), cols) + assert_equal(mX.ptp(1), rows) + + def test_add_object(self): + x = masked_array(['a', 'b'], mask=[1, 0], dtype=object) + y = x + 'x' + assert_equal(y[1], 'bx') + assert_(y.mask[0]) + + def test_sum_object(self): + # Test sum on object dtype + a = masked_array([1, 2, 3], mask=[1, 0, 0], dtype=object) + assert_equal(a.sum(), 5) + a = masked_array([[1, 2, 3], [4, 5, 6]], dtype=object) + assert_equal(a.sum(axis=0), [5, 7, 9]) + + def test_prod_object(self): + # Test prod on object dtype + a = masked_array([1, 2, 3], mask=[1, 0, 0], dtype=object) + assert_equal(a.prod(), 2 * 3) + a = masked_array([[1, 2, 3], [4, 5, 6]], dtype=object) + assert_equal(a.prod(axis=0), [4, 10, 18]) + + def test_meananom_object(self): + # Test mean/anom on object dtype + a = masked_array([1, 2, 3], dtype=object) + assert_equal(a.mean(), 2) + assert_equal(a.anom(), [-1, 0, 1]) + + def test_anom_shape(self): + a = masked_array([1, 2, 3]) + assert_equal(a.anom().shape, a.shape) + a.mask = True + assert_equal(a.anom().shape, a.shape) + assert_(np.ma.is_masked(a.anom())) + + def test_anom(self): + a = masked_array(np.arange(1, 7).reshape(2, 3)) + assert_almost_equal(a.anom(), + [[-2.5, -1.5, -0.5], [0.5, 1.5, 2.5]]) + assert_almost_equal(a.anom(axis=0), + [[-1.5, -1.5, -1.5], [1.5, 1.5, 1.5]]) + assert_almost_equal(a.anom(axis=1), + [[-1., 0., 1.], [-1., 0., 1.]]) + a.mask = [[0, 0, 1], [0, 1, 0]] + mval = -99 + assert_almost_equal(a.anom().filled(mval), + [[-2.25, -1.25, mval], [0.75, mval, 2.75]]) + assert_almost_equal(a.anom(axis=0).filled(mval), + [[-1.5, 0.0, mval], [1.5, mval, 0.0]]) + assert_almost_equal(a.anom(axis=1).filled(mval), + [[-0.5, 0.5, mval], [-1.0, mval, 1.0]]) + + def test_trace(self): + # Tests trace on MaskedArrays. + (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) = self.d + mXdiag = mX.diagonal() + assert_equal(mX.trace(), mX.diagonal().compressed().sum()) + assert_almost_equal(mX.trace(), + X.trace() - sum(mXdiag.mask * X.diagonal(), + axis=0)) + assert_equal(np.trace(mX), mX.trace()) + + # gh-5560 + arr = np.arange(2*4*4).reshape(2,4,4) + m_arr = np.ma.masked_array(arr, False) + assert_equal(arr.trace(axis1=1, axis2=2), m_arr.trace(axis1=1, axis2=2)) + + def test_dot(self): + # Tests dot on MaskedArrays. + (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) = self.d + fx = mx.filled(0) + r = mx.dot(mx) + assert_almost_equal(r.filled(0), fx.dot(fx)) + assert_(r.mask is nomask) + + fX = mX.filled(0) + r = mX.dot(mX) + assert_almost_equal(r.filled(0), fX.dot(fX)) + assert_(r.mask[1,3]) + r1 = empty_like(r) + mX.dot(mX, out=r1) + assert_almost_equal(r, r1) + + mYY = mXX.swapaxes(-1, -2) + fXX, fYY = mXX.filled(0), mYY.filled(0) + r = mXX.dot(mYY) + assert_almost_equal(r.filled(0), fXX.dot(fYY)) + r1 = empty_like(r) + mXX.dot(mYY, out=r1) + assert_almost_equal(r, r1) + + def test_dot_shape_mismatch(self): + # regression test + x = masked_array([[1,2],[3,4]], mask=[[0,1],[0,0]]) + y = masked_array([[1,2],[3,4]], mask=[[0,1],[0,0]]) + z = masked_array([[0,1],[3,3]]) + x.dot(y, out=z) + assert_almost_equal(z.filled(0), [[1, 0], [15, 16]]) + assert_almost_equal(z.mask, [[0, 1], [0, 0]]) + + def test_varmean_nomask(self): + # gh-5769 + foo = array([1,2,3,4], dtype='f8') + bar = array([1,2,3,4], dtype='f8') + assert_equal(type(foo.mean()), np.float64) + assert_equal(type(foo.var()), np.float64) + assert((foo.mean() == bar.mean()) is np.bool_(True)) + + # check array type is preserved and out works + foo = array(np.arange(16).reshape((4,4)), dtype='f8') + bar = empty(4, dtype='f4') + assert_equal(type(foo.mean(axis=1)), MaskedArray) + assert_equal(type(foo.var(axis=1)), MaskedArray) + assert_(foo.mean(axis=1, out=bar) is bar) + assert_(foo.var(axis=1, out=bar) is bar) + + def test_varstd(self): + # Tests var & std on MaskedArrays. + (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) = self.d + assert_almost_equal(mX.var(axis=None), mX.compressed().var()) + assert_almost_equal(mX.std(axis=None), mX.compressed().std()) + assert_almost_equal(mX.std(axis=None, ddof=1), + mX.compressed().std(ddof=1)) + assert_almost_equal(mX.var(axis=None, ddof=1), + mX.compressed().var(ddof=1)) + assert_equal(mXX.var(axis=3).shape, XX.var(axis=3).shape) + assert_equal(mX.var().shape, X.var().shape) + (mXvar0, mXvar1) = (mX.var(axis=0), mX.var(axis=1)) + assert_almost_equal(mX.var(axis=None, ddof=2), + mX.compressed().var(ddof=2)) + assert_almost_equal(mX.std(axis=None, ddof=2), + mX.compressed().std(ddof=2)) + for k in range(6): + assert_almost_equal(mXvar1[k], mX[k].compressed().var()) + assert_almost_equal(mXvar0[k], mX[:, k].compressed().var()) + assert_almost_equal(np.sqrt(mXvar0[k]), + mX[:, k].compressed().std()) + + @suppress_copy_mask_on_assignment + def test_varstd_specialcases(self): + # Test a special case for var + nout = np.array(-1, dtype=float) + mout = array(-1, dtype=float) + + x = array(arange(10), mask=True) + for methodname in ('var', 'std'): + method = getattr(x, methodname) + assert_(method() is masked) + assert_(method(0) is masked) + assert_(method(-1) is masked) + # Using a masked array as explicit output + method(out=mout) + assert_(mout is not masked) + assert_equal(mout.mask, True) + # Using a ndarray as explicit output + method(out=nout) + assert_(np.isnan(nout)) + + x = array(arange(10), mask=True) + x[-1] = 9 + for methodname in ('var', 'std'): + method = getattr(x, methodname) + assert_(method(ddof=1) is masked) + assert_(method(0, ddof=1) is masked) + assert_(method(-1, ddof=1) is masked) + # Using a masked array as explicit output + method(out=mout, ddof=1) + assert_(mout is not masked) + assert_equal(mout.mask, True) + # Using a ndarray as explicit output + method(out=nout, ddof=1) + assert_(np.isnan(nout)) + + def test_varstd_ddof(self): + a = array([[1, 1, 0], [1, 1, 0]], mask=[[0, 0, 1], [0, 0, 1]]) + test = a.std(axis=0, ddof=0) + assert_equal(test.filled(0), [0, 0, 0]) + assert_equal(test.mask, [0, 0, 1]) + test = a.std(axis=0, ddof=1) + assert_equal(test.filled(0), [0, 0, 0]) + assert_equal(test.mask, [0, 0, 1]) + test = a.std(axis=0, ddof=2) + assert_equal(test.filled(0), [0, 0, 0]) + assert_equal(test.mask, [1, 1, 1]) + + def test_diag(self): + # Test diag + x = arange(9).reshape((3, 3)) + x[1, 1] = masked + out = np.diag(x) + assert_equal(out, [0, 4, 8]) + out = diag(x) + assert_equal(out, [0, 4, 8]) + assert_equal(out.mask, [0, 1, 0]) + out = diag(out) + control = array([[0, 0, 0], [0, 4, 0], [0, 0, 8]], + mask=[[0, 0, 0], [0, 1, 0], [0, 0, 0]]) + assert_equal(out, control) + + def test_axis_methods_nomask(self): + # Test the combination nomask & methods w/ axis + a = array([[1, 2, 3], [4, 5, 6]]) + + assert_equal(a.sum(0), [5, 7, 9]) + assert_equal(a.sum(-1), [6, 15]) + assert_equal(a.sum(1), [6, 15]) + + assert_equal(a.prod(0), [4, 10, 18]) + assert_equal(a.prod(-1), [6, 120]) + assert_equal(a.prod(1), [6, 120]) + + assert_equal(a.min(0), [1, 2, 3]) + assert_equal(a.min(-1), [1, 4]) + assert_equal(a.min(1), [1, 4]) + + assert_equal(a.max(0), [4, 5, 6]) + assert_equal(a.max(-1), [3, 6]) + assert_equal(a.max(1), [3, 6]) + + +class TestMaskedArrayMathMethodsComplex: + # Test class for miscellaneous MaskedArrays methods. + def setup(self): + # Base data definition. + x = np.array([8.375j, 7.545j, 8.828j, 8.5j, 1.757j, 5.928, + 8.43, 7.78, 9.865, 5.878, 8.979, 4.732, + 3.012, 6.022, 5.095, 3.116, 5.238, 3.957, + 6.04, 9.63, 7.712, 3.382, 4.489, 6.479j, + 7.189j, 9.645, 5.395, 4.961, 9.894, 2.893, + 7.357, 9.828, 6.272, 3.758, 6.693, 0.993j]) + X = x.reshape(6, 6) + XX = x.reshape(3, 2, 2, 3) + + m = np.array([0, 1, 0, 1, 0, 0, + 1, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 0, 0, + 0, 0, 1, 0, 1, 0]) + mx = array(data=x, mask=m) + mX = array(data=X, mask=m.reshape(X.shape)) + mXX = array(data=XX, mask=m.reshape(XX.shape)) + + m2 = np.array([1, 1, 0, 1, 0, 0, + 1, 1, 1, 1, 0, 1, + 0, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 1, 0, + 0, 0, 1, 0, 1, 1]) + m2x = array(data=x, mask=m2) + m2X = array(data=X, mask=m2.reshape(X.shape)) + m2XX = array(data=XX, mask=m2.reshape(XX.shape)) + self.d = (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) + + def test_varstd(self): + # Tests var & std on MaskedArrays. + (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) = self.d + assert_almost_equal(mX.var(axis=None), mX.compressed().var()) + assert_almost_equal(mX.std(axis=None), mX.compressed().std()) + assert_equal(mXX.var(axis=3).shape, XX.var(axis=3).shape) + assert_equal(mX.var().shape, X.var().shape) + (mXvar0, mXvar1) = (mX.var(axis=0), mX.var(axis=1)) + assert_almost_equal(mX.var(axis=None, ddof=2), + mX.compressed().var(ddof=2)) + assert_almost_equal(mX.std(axis=None, ddof=2), + mX.compressed().std(ddof=2)) + for k in range(6): + assert_almost_equal(mXvar1[k], mX[k].compressed().var()) + assert_almost_equal(mXvar0[k], mX[:, k].compressed().var()) + assert_almost_equal(np.sqrt(mXvar0[k]), + mX[:, k].compressed().std()) + + +class TestMaskedArrayFunctions: + # Test class for miscellaneous functions. + + def setup(self): + x = np.array([1., 1., 1., -2., pi/2.0, 4., 5., -10., 10., 1., 2., 3.]) + y = np.array([5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.]) + m1 = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + m2 = [0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1] + xm = masked_array(x, mask=m1) + ym = masked_array(y, mask=m2) + xm.set_fill_value(1e+20) + self.info = (xm, ym) + + def test_masked_where_bool(self): + x = [1, 2] + y = masked_where(False, x) + assert_equal(y, [1, 2]) + assert_equal(y[1], 2) + + def test_masked_equal_wlist(self): + x = [1, 2, 3] + mx = masked_equal(x, 3) + assert_equal(mx, x) + assert_equal(mx._mask, [0, 0, 1]) + mx = masked_not_equal(x, 3) + assert_equal(mx, x) + assert_equal(mx._mask, [1, 1, 0]) + + def test_masked_equal_fill_value(self): + x = [1, 2, 3] + mx = masked_equal(x, 3) + assert_equal(mx._mask, [0, 0, 1]) + assert_equal(mx.fill_value, 3) + + def test_masked_where_condition(self): + # Tests masking functions. + x = array([1., 2., 3., 4., 5.]) + x[2] = masked + assert_equal(masked_where(greater(x, 2), x), masked_greater(x, 2)) + assert_equal(masked_where(greater_equal(x, 2), x), + masked_greater_equal(x, 2)) + assert_equal(masked_where(less(x, 2), x), masked_less(x, 2)) + assert_equal(masked_where(less_equal(x, 2), x), + masked_less_equal(x, 2)) + assert_equal(masked_where(not_equal(x, 2), x), masked_not_equal(x, 2)) + assert_equal(masked_where(equal(x, 2), x), masked_equal(x, 2)) + assert_equal(masked_where(not_equal(x, 2), x), masked_not_equal(x, 2)) + assert_equal(masked_where([1, 1, 0, 0, 0], [1, 2, 3, 4, 5]), + [99, 99, 3, 4, 5]) + + def test_masked_where_oddities(self): + # Tests some generic features. + atest = ones((10, 10, 10), dtype=float) + btest = zeros(atest.shape, MaskType) + ctest = masked_where(btest, atest) + assert_equal(atest, ctest) + + def test_masked_where_shape_constraint(self): + a = arange(10) + with assert_raises(IndexError): + masked_equal(1, a) + test = masked_equal(a, 1) + assert_equal(test.mask, [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]) + + def test_masked_where_structured(self): + # test that masked_where on a structured array sets a structured + # mask (see issue #2972) + a = np.zeros(10, dtype=[("A", " 6, x) + + def test_masked_otherfunctions(self): + assert_equal(masked_inside(list(range(5)), 1, 3), + [0, 199, 199, 199, 4]) + assert_equal(masked_outside(list(range(5)), 1, 3), [199, 1, 2, 3, 199]) + assert_equal(masked_inside(array(list(range(5)), + mask=[1, 0, 0, 0, 0]), 1, 3).mask, + [1, 1, 1, 1, 0]) + assert_equal(masked_outside(array(list(range(5)), + mask=[0, 1, 0, 0, 0]), 1, 3).mask, + [1, 1, 0, 0, 1]) + assert_equal(masked_equal(array(list(range(5)), + mask=[1, 0, 0, 0, 0]), 2).mask, + [1, 0, 1, 0, 0]) + assert_equal(masked_not_equal(array([2, 2, 1, 2, 1], + mask=[1, 0, 0, 0, 0]), 2).mask, + [1, 0, 1, 0, 1]) + + def test_round(self): + a = array([1.23456, 2.34567, 3.45678, 4.56789, 5.67890], + mask=[0, 1, 0, 0, 0]) + assert_equal(a.round(), [1., 2., 3., 5., 6.]) + assert_equal(a.round(1), [1.2, 2.3, 3.5, 4.6, 5.7]) + assert_equal(a.round(3), [1.235, 2.346, 3.457, 4.568, 5.679]) + b = empty_like(a) + a.round(out=b) + assert_equal(b, [1., 2., 3., 5., 6.]) + + x = array([1., 2., 3., 4., 5.]) + c = array([1, 1, 1, 0, 0]) + x[2] = masked + z = where(c, x, -x) + assert_equal(z, [1., 2., 0., -4., -5]) + c[0] = masked + z = where(c, x, -x) + assert_equal(z, [1., 2., 0., -4., -5]) + assert_(z[0] is masked) + assert_(z[1] is not masked) + assert_(z[2] is masked) + + def test_round_with_output(self): + # Testing round with an explicit output + + xm = array(np.random.uniform(0, 10, 12)).reshape(3, 4) + xm[:, 0] = xm[0] = xm[-1, -1] = masked + + # A ndarray as explicit input + output = np.empty((3, 4), dtype=float) + output.fill(-9999) + result = np.round(xm, decimals=2, out=output) + # ... the result should be the given output + assert_(result is output) + assert_equal(result, xm.round(decimals=2, out=output)) + + output = empty((3, 4), dtype=float) + result = xm.round(decimals=2, out=output) + assert_(result is output) + + def test_round_with_scalar(self): + # Testing round with scalar/zero dimension input + # GH issue 2244 + a = array(1.1, mask=[False]) + assert_equal(a.round(), 1) + + a = array(1.1, mask=[True]) + assert_(a.round() is masked) + + a = array(1.1, mask=[False]) + output = np.empty(1, dtype=float) + output.fill(-9999) + a.round(out=output) + assert_equal(output, 1) + + a = array(1.1, mask=[False]) + output = array(-9999., mask=[True]) + a.round(out=output) + assert_equal(output[()], 1) + + a = array(1.1, mask=[True]) + output = array(-9999., mask=[False]) + a.round(out=output) + assert_(output[()] is masked) + + def test_identity(self): + a = identity(5) + assert_(isinstance(a, MaskedArray)) + assert_equal(a, np.identity(5)) + + def test_power(self): + x = -1.1 + assert_almost_equal(power(x, 2.), 1.21) + assert_(power(x, masked) is masked) + x = array([-1.1, -1.1, 1.1, 1.1, 0.]) + b = array([0.5, 2., 0.5, 2., -1.], mask=[0, 0, 0, 0, 1]) + y = power(x, b) + assert_almost_equal(y, [0, 1.21, 1.04880884817, 1.21, 0.]) + assert_equal(y._mask, [1, 0, 0, 0, 1]) + b.mask = nomask + y = power(x, b) + assert_equal(y._mask, [1, 0, 0, 0, 1]) + z = x ** b + assert_equal(z._mask, y._mask) + assert_almost_equal(z, y) + assert_almost_equal(z._data, y._data) + x **= b + assert_equal(x._mask, y._mask) + assert_almost_equal(x, y) + assert_almost_equal(x._data, y._data) + + def test_power_with_broadcasting(self): + # Test power w/ broadcasting + a2 = np.array([[1., 2., 3.], [4., 5., 6.]]) + a2m = array(a2, mask=[[1, 0, 0], [0, 0, 1]]) + b1 = np.array([2, 4, 3]) + b2 = np.array([b1, b1]) + b2m = array(b2, mask=[[0, 1, 0], [0, 1, 0]]) + + ctrl = array([[1 ** 2, 2 ** 4, 3 ** 3], [4 ** 2, 5 ** 4, 6 ** 3]], + mask=[[1, 1, 0], [0, 1, 1]]) + # No broadcasting, base & exp w/ mask + test = a2m ** b2m + assert_equal(test, ctrl) + assert_equal(test.mask, ctrl.mask) + # No broadcasting, base w/ mask, exp w/o mask + test = a2m ** b2 + assert_equal(test, ctrl) + assert_equal(test.mask, a2m.mask) + # No broadcasting, base w/o mask, exp w/ mask + test = a2 ** b2m + assert_equal(test, ctrl) + assert_equal(test.mask, b2m.mask) + + ctrl = array([[2 ** 2, 4 ** 4, 3 ** 3], [2 ** 2, 4 ** 4, 3 ** 3]], + mask=[[0, 1, 0], [0, 1, 0]]) + test = b1 ** b2m + assert_equal(test, ctrl) + assert_equal(test.mask, ctrl.mask) + test = b2m ** b1 + assert_equal(test, ctrl) + assert_equal(test.mask, ctrl.mask) + + def test_where(self): + # Test the where function + x = np.array([1., 1., 1., -2., pi/2.0, 4., 5., -10., 10., 1., 2., 3.]) + y = np.array([5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.]) + m1 = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + m2 = [0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1] + xm = masked_array(x, mask=m1) + ym = masked_array(y, mask=m2) + xm.set_fill_value(1e+20) + + d = where(xm > 2, xm, -9) + assert_equal(d, [-9., -9., -9., -9., -9., 4., + -9., -9., 10., -9., -9., 3.]) + assert_equal(d._mask, xm._mask) + d = where(xm > 2, -9, ym) + assert_equal(d, [5., 0., 3., 2., -1., -9., + -9., -10., -9., 1., 0., -9.]) + assert_equal(d._mask, [1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]) + d = where(xm > 2, xm, masked) + assert_equal(d, [-9., -9., -9., -9., -9., 4., + -9., -9., 10., -9., -9., 3.]) + tmp = xm._mask.copy() + tmp[(xm <= 2).filled(True)] = True + assert_equal(d._mask, tmp) + + ixm = xm.astype(int) + d = where(ixm > 2, ixm, masked) + assert_equal(d, [-9, -9, -9, -9, -9, 4, -9, -9, 10, -9, -9, 3]) + assert_equal(d.dtype, ixm.dtype) + + def test_where_object(self): + a = np.array(None) + b = masked_array(None) + r = b.copy() + assert_equal(np.ma.where(True, a, a), r) + assert_equal(np.ma.where(True, b, b), r) + + def test_where_with_masked_choice(self): + x = arange(10) + x[3] = masked + c = x >= 8 + # Set False to masked + z = where(c, x, masked) + assert_(z.dtype is x.dtype) + assert_(z[3] is masked) + assert_(z[4] is masked) + assert_(z[7] is masked) + assert_(z[8] is not masked) + assert_(z[9] is not masked) + assert_equal(x, z) + # Set True to masked + z = where(c, masked, x) + assert_(z.dtype is x.dtype) + assert_(z[3] is masked) + assert_(z[4] is not masked) + assert_(z[7] is not masked) + assert_(z[8] is masked) + assert_(z[9] is masked) + + def test_where_with_masked_condition(self): + x = array([1., 2., 3., 4., 5.]) + c = array([1, 1, 1, 0, 0]) + x[2] = masked + z = where(c, x, -x) + assert_equal(z, [1., 2., 0., -4., -5]) + c[0] = masked + z = where(c, x, -x) + assert_equal(z, [1., 2., 0., -4., -5]) + assert_(z[0] is masked) + assert_(z[1] is not masked) + assert_(z[2] is masked) + + x = arange(1, 6) + x[-1] = masked + y = arange(1, 6) * 10 + y[2] = masked + c = array([1, 1, 1, 0, 0], mask=[1, 0, 0, 0, 0]) + cm = c.filled(1) + z = where(c, x, y) + zm = where(cm, x, y) + assert_equal(z, zm) + assert_(getmask(zm) is nomask) + assert_equal(zm, [1, 2, 3, 40, 50]) + z = where(c, masked, 1) + assert_equal(z, [99, 99, 99, 1, 1]) + z = where(c, 1, masked) + assert_equal(z, [99, 1, 1, 99, 99]) + + def test_where_type(self): + # Test the type conservation with where + x = np.arange(4, dtype=np.int32) + y = np.arange(4, dtype=np.float32) * 2.2 + test = where(x > 1.5, y, x).dtype + control = np.find_common_type([np.int32, np.float32], []) + assert_equal(test, control) + + def test_where_broadcast(self): + # Issue 8599 + x = np.arange(9).reshape(3, 3) + y = np.zeros(3) + core = np.where([1, 0, 1], x, y) + ma = where([1, 0, 1], x, y) + + assert_equal(core, ma) + assert_equal(core.dtype, ma.dtype) + + def test_where_structured(self): + # Issue 8600 + dt = np.dtype([('a', int), ('b', int)]) + x = np.array([(1, 2), (3, 4), (5, 6)], dtype=dt) + y = np.array((10, 20), dtype=dt) + core = np.where([0, 1, 1], x, y) + ma = np.where([0, 1, 1], x, y) + + assert_equal(core, ma) + assert_equal(core.dtype, ma.dtype) + + def test_where_structured_masked(self): + dt = np.dtype([('a', int), ('b', int)]) + x = np.array([(1, 2), (3, 4), (5, 6)], dtype=dt) + + ma = where([0, 1, 1], x, masked) + expected = masked_where([1, 0, 0], x) + + assert_equal(ma.dtype, expected.dtype) + assert_equal(ma, expected) + assert_equal(ma.mask, expected.mask) + + def test_choose(self): + # Test choose + choices = [[0, 1, 2, 3], [10, 11, 12, 13], + [20, 21, 22, 23], [30, 31, 32, 33]] + chosen = choose([2, 3, 1, 0], choices) + assert_equal(chosen, array([20, 31, 12, 3])) + chosen = choose([2, 4, 1, 0], choices, mode='clip') + assert_equal(chosen, array([20, 31, 12, 3])) + chosen = choose([2, 4, 1, 0], choices, mode='wrap') + assert_equal(chosen, array([20, 1, 12, 3])) + # Check with some masked indices + indices_ = array([2, 4, 1, 0], mask=[1, 0, 0, 1]) + chosen = choose(indices_, choices, mode='wrap') + assert_equal(chosen, array([99, 1, 12, 99])) + assert_equal(chosen.mask, [1, 0, 0, 1]) + # Check with some masked choices + choices = array(choices, mask=[[0, 0, 0, 1], [1, 1, 0, 1], + [1, 0, 0, 0], [0, 0, 0, 0]]) + indices_ = [2, 3, 1, 0] + chosen = choose(indices_, choices, mode='wrap') + assert_equal(chosen, array([20, 31, 12, 3])) + assert_equal(chosen.mask, [1, 0, 0, 1]) + + def test_choose_with_out(self): + # Test choose with an explicit out keyword + choices = [[0, 1, 2, 3], [10, 11, 12, 13], + [20, 21, 22, 23], [30, 31, 32, 33]] + store = empty(4, dtype=int) + chosen = choose([2, 3, 1, 0], choices, out=store) + assert_equal(store, array([20, 31, 12, 3])) + assert_(store is chosen) + # Check with some masked indices + out + store = empty(4, dtype=int) + indices_ = array([2, 3, 1, 0], mask=[1, 0, 0, 1]) + chosen = choose(indices_, choices, mode='wrap', out=store) + assert_equal(store, array([99, 31, 12, 99])) + assert_equal(store.mask, [1, 0, 0, 1]) + # Check with some masked choices + out ina ndarray ! + choices = array(choices, mask=[[0, 0, 0, 1], [1, 1, 0, 1], + [1, 0, 0, 0], [0, 0, 0, 0]]) + indices_ = [2, 3, 1, 0] + store = empty(4, dtype=int).view(ndarray) + chosen = choose(indices_, choices, mode='wrap', out=store) + assert_equal(store, array([999999, 31, 12, 999999])) + + def test_reshape(self): + a = arange(10) + a[0] = masked + # Try the default + b = a.reshape((5, 2)) + assert_equal(b.shape, (5, 2)) + assert_(b.flags['C']) + # Try w/ arguments as list instead of tuple + b = a.reshape(5, 2) + assert_equal(b.shape, (5, 2)) + assert_(b.flags['C']) + # Try w/ order + b = a.reshape((5, 2), order='F') + assert_equal(b.shape, (5, 2)) + assert_(b.flags['F']) + # Try w/ order + b = a.reshape(5, 2, order='F') + assert_equal(b.shape, (5, 2)) + assert_(b.flags['F']) + + c = np.reshape(a, (2, 5)) + assert_(isinstance(c, MaskedArray)) + assert_equal(c.shape, (2, 5)) + assert_(c[0, 0] is masked) + assert_(c.flags['C']) + + def test_make_mask_descr(self): + # Flexible + ntype = [('a', float), ('b', float)] + test = make_mask_descr(ntype) + assert_equal(test, [('a', bool), ('b', bool)]) + assert_(test is make_mask_descr(test)) + + # Standard w/ shape + ntype = (float, 2) + test = make_mask_descr(ntype) + assert_equal(test, (bool, 2)) + assert_(test is make_mask_descr(test)) + + # Standard standard + ntype = float + test = make_mask_descr(ntype) + assert_equal(test, np.dtype(bool)) + assert_(test is make_mask_descr(test)) + + # Nested + ntype = [('a', float), ('b', [('ba', float), ('bb', float)])] + test = make_mask_descr(ntype) + control = np.dtype([('a', 'b1'), ('b', [('ba', 'b1'), ('bb', 'b1')])]) + assert_equal(test, control) + assert_(test is make_mask_descr(test)) + + # Named+ shape + ntype = [('a', (float, 2))] + test = make_mask_descr(ntype) + assert_equal(test, np.dtype([('a', (bool, 2))])) + assert_(test is make_mask_descr(test)) + + # 2 names + ntype = [(('A', 'a'), float)] + test = make_mask_descr(ntype) + assert_equal(test, np.dtype([(('A', 'a'), bool)])) + assert_(test is make_mask_descr(test)) + + # nested boolean types should preserve identity + base_type = np.dtype([('a', int, 3)]) + base_mtype = make_mask_descr(base_type) + sub_type = np.dtype([('a', int), ('b', base_mtype)]) + test = make_mask_descr(sub_type) + assert_equal(test, np.dtype([('a', bool), ('b', [('a', bool, 3)])])) + assert_(test.fields['b'][0] is base_mtype) + + def test_make_mask(self): + # Test make_mask + # w/ a list as an input + mask = [0, 1] + test = make_mask(mask) + assert_equal(test.dtype, MaskType) + assert_equal(test, [0, 1]) + # w/ a ndarray as an input + mask = np.array([0, 1], dtype=bool) + test = make_mask(mask) + assert_equal(test.dtype, MaskType) + assert_equal(test, [0, 1]) + # w/ a flexible-type ndarray as an input - use default + mdtype = [('a', bool), ('b', bool)] + mask = np.array([(0, 0), (0, 1)], dtype=mdtype) + test = make_mask(mask) + assert_equal(test.dtype, MaskType) + assert_equal(test, [1, 1]) + # w/ a flexible-type ndarray as an input - use input dtype + mdtype = [('a', bool), ('b', bool)] + mask = np.array([(0, 0), (0, 1)], dtype=mdtype) + test = make_mask(mask, dtype=mask.dtype) + assert_equal(test.dtype, mdtype) + assert_equal(test, mask) + # w/ a flexible-type ndarray as an input - use input dtype + mdtype = [('a', float), ('b', float)] + bdtype = [('a', bool), ('b', bool)] + mask = np.array([(0, 0), (0, 1)], dtype=mdtype) + test = make_mask(mask, dtype=mask.dtype) + assert_equal(test.dtype, bdtype) + assert_equal(test, np.array([(0, 0), (0, 1)], dtype=bdtype)) + # Ensure this also works for void + mask = np.array((False, True), dtype='?,?')[()] + assert_(isinstance(mask, np.void)) + test = make_mask(mask, dtype=mask.dtype) + assert_equal(test, mask) + assert_(test is not mask) + mask = np.array((0, 1), dtype='i4,i4')[()] + test2 = make_mask(mask, dtype=mask.dtype) + assert_equal(test2, test) + # test that nomask is returned when m is nomask. + bools = [True, False] + dtypes = [MaskType, float] + msgformat = 'copy=%s, shrink=%s, dtype=%s' + for cpy, shr, dt in itertools.product(bools, bools, dtypes): + res = make_mask(nomask, copy=cpy, shrink=shr, dtype=dt) + assert_(res is nomask, msgformat % (cpy, shr, dt)) + + def test_mask_or(self): + # Initialize + mtype = [('a', bool), ('b', bool)] + mask = np.array([(0, 0), (0, 1), (1, 0), (0, 0)], dtype=mtype) + # Test using nomask as input + test = mask_or(mask, nomask) + assert_equal(test, mask) + test = mask_or(nomask, mask) + assert_equal(test, mask) + # Using False as input + test = mask_or(mask, False) + assert_equal(test, mask) + # Using another array w / the same dtype + other = np.array([(0, 1), (0, 1), (0, 1), (0, 1)], dtype=mtype) + test = mask_or(mask, other) + control = np.array([(0, 1), (0, 1), (1, 1), (0, 1)], dtype=mtype) + assert_equal(test, control) + # Using another array w / a different dtype + othertype = [('A', bool), ('B', bool)] + other = np.array([(0, 1), (0, 1), (0, 1), (0, 1)], dtype=othertype) + try: + test = mask_or(mask, other) + except ValueError: + pass + # Using nested arrays + dtype = [('a', bool), ('b', [('ba', bool), ('bb', bool)])] + amask = np.array([(0, (1, 0)), (0, (1, 0))], dtype=dtype) + bmask = np.array([(1, (0, 1)), (0, (0, 0))], dtype=dtype) + cntrl = np.array([(1, (1, 1)), (0, (1, 0))], dtype=dtype) + assert_equal(mask_or(amask, bmask), cntrl) + + def test_flatten_mask(self): + # Tests flatten mask + # Standard dtype + mask = np.array([0, 0, 1], dtype=bool) + assert_equal(flatten_mask(mask), mask) + # Flexible dtype + mask = np.array([(0, 0), (0, 1)], dtype=[('a', bool), ('b', bool)]) + test = flatten_mask(mask) + control = np.array([0, 0, 0, 1], dtype=bool) + assert_equal(test, control) + + mdtype = [('a', bool), ('b', [('ba', bool), ('bb', bool)])] + data = [(0, (0, 0)), (0, (0, 1))] + mask = np.array(data, dtype=mdtype) + test = flatten_mask(mask) + control = np.array([0, 0, 0, 0, 0, 1], dtype=bool) + assert_equal(test, control) + + def test_on_ndarray(self): + # Test functions on ndarrays + a = np.array([1, 2, 3, 4]) + m = array(a, mask=False) + test = anom(a) + assert_equal(test, m.anom()) + test = reshape(a, (2, 2)) + assert_equal(test, m.reshape(2, 2)) + + def test_compress(self): + # Test compress function on ndarray and masked array + # Address Github #2495. + arr = np.arange(8) + arr.shape = 4, 2 + cond = np.array([True, False, True, True]) + control = arr[[0, 2, 3]] + test = np.ma.compress(cond, arr, axis=0) + assert_equal(test, control) + marr = np.ma.array(arr) + test = np.ma.compress(cond, marr, axis=0) + assert_equal(test, control) + + def test_compressed(self): + # Test ma.compressed function. + # Address gh-4026 + a = np.ma.array([1, 2]) + test = np.ma.compressed(a) + assert_(type(test) is np.ndarray) + + # Test case when input data is ndarray subclass + class A(np.ndarray): + pass + + a = np.ma.array(A(shape=0)) + test = np.ma.compressed(a) + assert_(type(test) is A) + + # Test that compress flattens + test = np.ma.compressed([[1],[2]]) + assert_equal(test.ndim, 1) + test = np.ma.compressed([[[[[1]]]]]) + assert_equal(test.ndim, 1) + + # Test case when input is MaskedArray subclass + class M(MaskedArray): + pass + + test = np.ma.compressed(M([[[]], [[]]])) + assert_equal(test.ndim, 1) + + # with .compressed() overridden + class M(MaskedArray): + def compressed(self): + return 42 + + test = np.ma.compressed(M([[[]], [[]]])) + assert_equal(test, 42) + + def test_convolve(self): + a = masked_equal(np.arange(5), 2) + b = np.array([1, 1]) + test = np.ma.convolve(a, b) + assert_equal(test, masked_equal([0, 1, -1, -1, 7, 4], -1)) + + test = np.ma.convolve(a, b, propagate_mask=False) + assert_equal(test, masked_equal([0, 1, 1, 3, 7, 4], -1)) + + test = np.ma.convolve([1, 1], [1, 1, 1]) + assert_equal(test, masked_equal([1, 2, 2, 1], -1)) + + a = [1, 1] + b = masked_equal([1, -1, -1, 1], -1) + test = np.ma.convolve(a, b, propagate_mask=False) + assert_equal(test, masked_equal([1, 1, -1, 1, 1], -1)) + test = np.ma.convolve(a, b, propagate_mask=True) + assert_equal(test, masked_equal([-1, -1, -1, -1, -1], -1)) + + +class TestMaskedFields: + + def setup(self): + ilist = [1, 2, 3, 4, 5] + flist = [1.1, 2.2, 3.3, 4.4, 5.5] + slist = ['one', 'two', 'three', 'four', 'five'] + ddtype = [('a', int), ('b', float), ('c', '|S8')] + mdtype = [('a', bool), ('b', bool), ('c', bool)] + mask = [0, 1, 0, 0, 1] + base = array(list(zip(ilist, flist, slist)), mask=mask, dtype=ddtype) + self.data = dict(base=base, mask=mask, ddtype=ddtype, mdtype=mdtype) + + def test_set_records_masks(self): + base = self.data['base'] + mdtype = self.data['mdtype'] + # Set w/ nomask or masked + base.mask = nomask + assert_equal_records(base._mask, np.zeros(base.shape, dtype=mdtype)) + base.mask = masked + assert_equal_records(base._mask, np.ones(base.shape, dtype=mdtype)) + # Set w/ simple boolean + base.mask = False + assert_equal_records(base._mask, np.zeros(base.shape, dtype=mdtype)) + base.mask = True + assert_equal_records(base._mask, np.ones(base.shape, dtype=mdtype)) + # Set w/ list + base.mask = [0, 0, 0, 1, 1] + assert_equal_records(base._mask, + np.array([(x, x, x) for x in [0, 0, 0, 1, 1]], + dtype=mdtype)) + + def test_set_record_element(self): + # Check setting an element of a record) + base = self.data['base'] + (base_a, base_b, base_c) = (base['a'], base['b'], base['c']) + base[0] = (pi, pi, 'pi') + + assert_equal(base_a.dtype, int) + assert_equal(base_a._data, [3, 2, 3, 4, 5]) + + assert_equal(base_b.dtype, float) + assert_equal(base_b._data, [pi, 2.2, 3.3, 4.4, 5.5]) + + assert_equal(base_c.dtype, '|S8') + assert_equal(base_c._data, + [b'pi', b'two', b'three', b'four', b'five']) + + def test_set_record_slice(self): + base = self.data['base'] + (base_a, base_b, base_c) = (base['a'], base['b'], base['c']) + base[:3] = (pi, pi, 'pi') + + assert_equal(base_a.dtype, int) + assert_equal(base_a._data, [3, 3, 3, 4, 5]) + + assert_equal(base_b.dtype, float) + assert_equal(base_b._data, [pi, pi, pi, 4.4, 5.5]) + + assert_equal(base_c.dtype, '|S8') + assert_equal(base_c._data, + [b'pi', b'pi', b'pi', b'four', b'five']) + + def test_mask_element(self): + "Check record access" + base = self.data['base'] + base[0] = masked + + for n in ('a', 'b', 'c'): + assert_equal(base[n].mask, [1, 1, 0, 0, 1]) + assert_equal(base[n]._data, base._data[n]) + + def test_getmaskarray(self): + # Test getmaskarray on flexible dtype + ndtype = [('a', int), ('b', float)] + test = empty(3, dtype=ndtype) + assert_equal(getmaskarray(test), + np.array([(0, 0), (0, 0), (0, 0)], + dtype=[('a', '|b1'), ('b', '|b1')])) + test[:] = masked + assert_equal(getmaskarray(test), + np.array([(1, 1), (1, 1), (1, 1)], + dtype=[('a', '|b1'), ('b', '|b1')])) + + def test_view(self): + # Test view w/ flexible dtype + iterator = list(zip(np.arange(10), np.random.rand(10))) + data = np.array(iterator) + a = array(iterator, dtype=[('a', float), ('b', float)]) + a.mask[0] = (1, 0) + controlmask = np.array([1] + 19 * [0], dtype=bool) + # Transform globally to simple dtype + test = a.view(float) + assert_equal(test, data.ravel()) + assert_equal(test.mask, controlmask) + # Transform globally to dty + test = a.view((float, 2)) + assert_equal(test, data) + assert_equal(test.mask, controlmask.reshape(-1, 2)) + + def test_getitem(self): + ndtype = [('a', float), ('b', float)] + a = array(list(zip(np.random.rand(10), np.arange(10))), dtype=ndtype) + a.mask = np.array(list(zip([0, 0, 0, 0, 0, 0, 0, 0, 1, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 1, 0])), + dtype=[('a', bool), ('b', bool)]) + + def _test_index(i): + assert_equal(type(a[i]), mvoid) + assert_equal_records(a[i]._data, a._data[i]) + assert_equal_records(a[i]._mask, a._mask[i]) + + assert_equal(type(a[i, ...]), MaskedArray) + assert_equal_records(a[i,...]._data, a._data[i,...]) + assert_equal_records(a[i,...]._mask, a._mask[i,...]) + + _test_index(1) # No mask + _test_index(0) # One element masked + _test_index(-2) # All element masked + + def test_setitem(self): + # Issue 4866: check that one can set individual items in [record][col] + # and [col][record] order + ndtype = np.dtype([('a', float), ('b', int)]) + ma = np.ma.MaskedArray([(1.0, 1), (2.0, 2)], dtype=ndtype) + ma['a'][1] = 3.0 + assert_equal(ma['a'], np.array([1.0, 3.0])) + ma[1]['a'] = 4.0 + assert_equal(ma['a'], np.array([1.0, 4.0])) + # Issue 2403 + mdtype = np.dtype([('a', bool), ('b', bool)]) + # soft mask + control = np.array([(False, True), (True, True)], dtype=mdtype) + a = np.ma.masked_all((2,), dtype=ndtype) + a['a'][0] = 2 + assert_equal(a.mask, control) + a = np.ma.masked_all((2,), dtype=ndtype) + a[0]['a'] = 2 + assert_equal(a.mask, control) + # hard mask + control = np.array([(True, True), (True, True)], dtype=mdtype) + a = np.ma.masked_all((2,), dtype=ndtype) + a.harden_mask() + a['a'][0] = 2 + assert_equal(a.mask, control) + a = np.ma.masked_all((2,), dtype=ndtype) + a.harden_mask() + a[0]['a'] = 2 + assert_equal(a.mask, control) + + def test_setitem_scalar(self): + # 8510 + mask_0d = np.ma.masked_array(1, mask=True) + arr = np.ma.arange(3) + arr[0] = mask_0d + assert_array_equal(arr.mask, [True, False, False]) + + def test_element_len(self): + # check that len() works for mvoid (Github issue #576) + for rec in self.data['base']: + assert_equal(len(rec), len(self.data['ddtype'])) + + +class TestMaskedObjectArray: + + def test_getitem(self): + arr = np.ma.array([None, None]) + for dt in [float, object]: + a0 = np.eye(2).astype(dt) + a1 = np.eye(3).astype(dt) + arr[0] = a0 + arr[1] = a1 + + assert_(arr[0] is a0) + assert_(arr[1] is a1) + assert_(isinstance(arr[0,...], MaskedArray)) + assert_(isinstance(arr[1,...], MaskedArray)) + assert_(arr[0,...][()] is a0) + assert_(arr[1,...][()] is a1) + + arr[0] = np.ma.masked + + assert_(arr[1] is a1) + assert_(isinstance(arr[0,...], MaskedArray)) + assert_(isinstance(arr[1,...], MaskedArray)) + assert_equal(arr[0,...].mask, True) + assert_(arr[1,...][()] is a1) + + # gh-5962 - object arrays of arrays do something special + assert_equal(arr[0].data, a0) + assert_equal(arr[0].mask, True) + assert_equal(arr[0,...][()].data, a0) + assert_equal(arr[0,...][()].mask, True) + + def test_nested_ma(self): + + arr = np.ma.array([None, None]) + # set the first object to be an unmasked masked constant. A little fiddly + arr[0,...] = np.array([np.ma.masked], object)[0,...] + + # check the above line did what we were aiming for + assert_(arr.data[0] is np.ma.masked) + + # test that getitem returned the value by identity + assert_(arr[0] is np.ma.masked) + + # now mask the masked value! + arr[0] = np.ma.masked + assert_(arr[0] is np.ma.masked) + + +class TestMaskedView: + + def setup(self): + iterator = list(zip(np.arange(10), np.random.rand(10))) + data = np.array(iterator) + a = array(iterator, dtype=[('a', float), ('b', float)]) + a.mask[0] = (1, 0) + controlmask = np.array([1] + 19 * [0], dtype=bool) + self.data = (data, a, controlmask) + + def test_view_to_nothing(self): + (data, a, controlmask) = self.data + test = a.view() + assert_(isinstance(test, MaskedArray)) + assert_equal(test._data, a._data) + assert_equal(test._mask, a._mask) + + def test_view_to_type(self): + (data, a, controlmask) = self.data + test = a.view(np.ndarray) + assert_(not isinstance(test, MaskedArray)) + assert_equal(test, a._data) + assert_equal_records(test, data.view(a.dtype).squeeze()) + + def test_view_to_simple_dtype(self): + (data, a, controlmask) = self.data + # View globally + test = a.view(float) + assert_(isinstance(test, MaskedArray)) + assert_equal(test, data.ravel()) + assert_equal(test.mask, controlmask) + + def test_view_to_flexible_dtype(self): + (data, a, controlmask) = self.data + + test = a.view([('A', float), ('B', float)]) + assert_equal(test.mask.dtype.names, ('A', 'B')) + assert_equal(test['A'], a['a']) + assert_equal(test['B'], a['b']) + + test = a[0].view([('A', float), ('B', float)]) + assert_(isinstance(test, MaskedArray)) + assert_equal(test.mask.dtype.names, ('A', 'B')) + assert_equal(test['A'], a['a'][0]) + assert_equal(test['B'], a['b'][0]) + + test = a[-1].view([('A', float), ('B', float)]) + assert_(isinstance(test, MaskedArray)) + assert_equal(test.dtype.names, ('A', 'B')) + assert_equal(test['A'], a['a'][-1]) + assert_equal(test['B'], a['b'][-1]) + + def test_view_to_subdtype(self): + (data, a, controlmask) = self.data + # View globally + test = a.view((float, 2)) + assert_(isinstance(test, MaskedArray)) + assert_equal(test, data) + assert_equal(test.mask, controlmask.reshape(-1, 2)) + # View on 1 masked element + test = a[0].view((float, 2)) + assert_(isinstance(test, MaskedArray)) + assert_equal(test, data[0]) + assert_equal(test.mask, (1, 0)) + # View on 1 unmasked element + test = a[-1].view((float, 2)) + assert_(isinstance(test, MaskedArray)) + assert_equal(test, data[-1]) + + def test_view_to_dtype_and_type(self): + (data, a, controlmask) = self.data + + test = a.view((float, 2), np.recarray) + assert_equal(test, data) + assert_(isinstance(test, np.recarray)) + assert_(not isinstance(test, MaskedArray)) + + +class TestOptionalArgs: + def test_ndarrayfuncs(self): + # test axis arg behaves the same as ndarray (including multiple axes) + + d = np.arange(24.0).reshape((2,3,4)) + m = np.zeros(24, dtype=bool).reshape((2,3,4)) + # mask out last element of last dimension + m[:,:,-1] = True + a = np.ma.array(d, mask=m) + + def testaxis(f, a, d): + numpy_f = numpy.__getattribute__(f) + ma_f = np.ma.__getattribute__(f) + + # test axis arg + assert_equal(ma_f(a, axis=1)[...,:-1], numpy_f(d[...,:-1], axis=1)) + assert_equal(ma_f(a, axis=(0,1))[...,:-1], + numpy_f(d[...,:-1], axis=(0,1))) + + def testkeepdims(f, a, d): + numpy_f = numpy.__getattribute__(f) + ma_f = np.ma.__getattribute__(f) + + # test keepdims arg + assert_equal(ma_f(a, keepdims=True).shape, + numpy_f(d, keepdims=True).shape) + assert_equal(ma_f(a, keepdims=False).shape, + numpy_f(d, keepdims=False).shape) + + # test both at once + assert_equal(ma_f(a, axis=1, keepdims=True)[...,:-1], + numpy_f(d[...,:-1], axis=1, keepdims=True)) + assert_equal(ma_f(a, axis=(0,1), keepdims=True)[...,:-1], + numpy_f(d[...,:-1], axis=(0,1), keepdims=True)) + + for f in ['sum', 'prod', 'mean', 'var', 'std']: + testaxis(f, a, d) + testkeepdims(f, a, d) + + for f in ['min', 'max']: + testaxis(f, a, d) + + d = (np.arange(24).reshape((2,3,4))%2 == 0) + a = np.ma.array(d, mask=m) + for f in ['all', 'any']: + testaxis(f, a, d) + testkeepdims(f, a, d) + + def test_count(self): + # test np.ma.count specially + + d = np.arange(24.0).reshape((2,3,4)) + m = np.zeros(24, dtype=bool).reshape((2,3,4)) + m[:,0,:] = True + a = np.ma.array(d, mask=m) + + assert_equal(count(a), 16) + assert_equal(count(a, axis=1), 2*ones((2,4))) + assert_equal(count(a, axis=(0,1)), 4*ones((4,))) + assert_equal(count(a, keepdims=True), 16*ones((1,1,1))) + assert_equal(count(a, axis=1, keepdims=True), 2*ones((2,1,4))) + assert_equal(count(a, axis=(0,1), keepdims=True), 4*ones((1,1,4))) + assert_equal(count(a, axis=-2), 2*ones((2,4))) + assert_raises(ValueError, count, a, axis=(1,1)) + assert_raises(np.AxisError, count, a, axis=3) + + # check the 'nomask' path + a = np.ma.array(d, mask=nomask) + + assert_equal(count(a), 24) + assert_equal(count(a, axis=1), 3*ones((2,4))) + assert_equal(count(a, axis=(0,1)), 6*ones((4,))) + assert_equal(count(a, keepdims=True), 24*ones((1,1,1))) + assert_equal(np.ndim(count(a, keepdims=True)), 3) + assert_equal(count(a, axis=1, keepdims=True), 3*ones((2,1,4))) + assert_equal(count(a, axis=(0,1), keepdims=True), 6*ones((1,1,4))) + assert_equal(count(a, axis=-2), 3*ones((2,4))) + assert_raises(ValueError, count, a, axis=(1,1)) + assert_raises(np.AxisError, count, a, axis=3) + + # check the 'masked' singleton + assert_equal(count(np.ma.masked), 0) + + # check 0-d arrays do not allow axis > 0 + assert_raises(np.AxisError, count, np.ma.array(1), axis=1) + + +class TestMaskedConstant: + def _do_add_test(self, add): + # sanity check + assert_(add(np.ma.masked, 1) is np.ma.masked) + + # now try with a vector + vector = np.array([1, 2, 3]) + result = add(np.ma.masked, vector) + + # lots of things could go wrong here + assert_(result is not np.ma.masked) + assert_(not isinstance(result, np.ma.core.MaskedConstant)) + assert_equal(result.shape, vector.shape) + assert_equal(np.ma.getmask(result), np.ones(vector.shape, dtype=bool)) + + def test_ufunc(self): + self._do_add_test(np.add) + + def test_operator(self): + self._do_add_test(lambda a, b: a + b) + + def test_ctor(self): + m = np.ma.array(np.ma.masked) + + # most importantly, we do not want to create a new MaskedConstant + # instance + assert_(not isinstance(m, np.ma.core.MaskedConstant)) + assert_(m is not np.ma.masked) + + def test_repr(self): + # copies should not exist, but if they do, it should be obvious that + # something is wrong + assert_equal(repr(np.ma.masked), 'masked') + + # create a new instance in a weird way + masked2 = np.ma.MaskedArray.__new__(np.ma.core.MaskedConstant) + assert_not_equal(repr(masked2), 'masked') + + def test_pickle(self): + from io import BytesIO + + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + with BytesIO() as f: + pickle.dump(np.ma.masked, f, protocol=proto) + f.seek(0) + res = pickle.load(f) + assert_(res is np.ma.masked) + + def test_copy(self): + # gh-9328 + # copy is a no-op, like it is with np.True_ + assert_equal( + np.ma.masked.copy() is np.ma.masked, + np.True_.copy() is np.True_) + + def test__copy(self): + import copy + assert_( + copy.copy(np.ma.masked) is np.ma.masked) + + def test_deepcopy(self): + import copy + assert_( + copy.deepcopy(np.ma.masked) is np.ma.masked) + + def test_immutable(self): + orig = np.ma.masked + assert_raises(np.ma.core.MaskError, operator.setitem, orig, (), 1) + assert_raises(ValueError,operator.setitem, orig.data, (), 1) + assert_raises(ValueError, operator.setitem, orig.mask, (), False) + + view = np.ma.masked.view(np.ma.MaskedArray) + assert_raises(ValueError, operator.setitem, view, (), 1) + assert_raises(ValueError, operator.setitem, view.data, (), 1) + assert_raises(ValueError, operator.setitem, view.mask, (), False) + + def test_coercion_int(self): + a_i = np.zeros((), int) + assert_raises(MaskError, operator.setitem, a_i, (), np.ma.masked) + assert_raises(MaskError, int, np.ma.masked) + + def test_coercion_float(self): + a_f = np.zeros((), float) + assert_warns(UserWarning, operator.setitem, a_f, (), np.ma.masked) + assert_(np.isnan(a_f[()])) + + @pytest.mark.xfail(reason="See gh-9750") + def test_coercion_unicode(self): + a_u = np.zeros((), 'U10') + a_u[()] = np.ma.masked + assert_equal(a_u[()], u'--') + + @pytest.mark.xfail(reason="See gh-9750") + def test_coercion_bytes(self): + a_b = np.zeros((), 'S10') + a_b[()] = np.ma.masked + assert_equal(a_b[()], b'--') + + def test_subclass(self): + # https://github.com/astropy/astropy/issues/6645 + class Sub(type(np.ma.masked)): pass + + a = Sub() + assert_(a is Sub()) + assert_(a is not np.ma.masked) + assert_not_equal(repr(a), 'masked') + + def test_attributes_readonly(self): + assert_raises(AttributeError, setattr, np.ma.masked, 'shape', (1,)) + assert_raises(AttributeError, setattr, np.ma.masked, 'dtype', np.int64) + + +class TestMaskedWhereAliases: + + # TODO: Test masked_object, masked_equal, ... + + def test_masked_values(self): + res = masked_values(np.array([-32768.0]), np.int16(-32768)) + assert_equal(res.mask, [True]) + + res = masked_values(np.inf, np.inf) + assert_equal(res.mask, True) + + res = np.ma.masked_values(np.inf, -np.inf) + assert_equal(res.mask, False) + + res = np.ma.masked_values([1, 2, 3, 4], 5, shrink=True) + assert_(res.mask is np.ma.nomask) + + res = np.ma.masked_values([1, 2, 3, 4], 5, shrink=False) + assert_equal(res.mask, [False] * 4) + + +def test_masked_array(): + a = np.ma.array([0, 1, 2, 3], mask=[0, 0, 1, 0]) + assert_equal(np.argwhere(a), [[1], [3]]) + +def test_masked_array_no_copy(): + # check nomask array is updated in place + a = np.ma.array([1, 2, 3, 4]) + _ = np.ma.masked_where(a == 3, a, copy=False) + assert_array_equal(a.mask, [False, False, True, False]) + # check masked array is updated in place + a = np.ma.array([1, 2, 3, 4], mask=[1, 0, 0, 0]) + _ = np.ma.masked_where(a == 3, a, copy=False) + assert_array_equal(a.mask, [True, False, True, False]) + +def test_append_masked_array(): + a = np.ma.masked_equal([1,2,3], value=2) + b = np.ma.masked_equal([4,3,2], value=2) + + result = np.ma.append(a, b) + expected_data = [1, 2, 3, 4, 3, 2] + expected_mask = [False, True, False, False, False, True] + assert_array_equal(result.data, expected_data) + assert_array_equal(result.mask, expected_mask) + + a = np.ma.masked_all((2,2)) + b = np.ma.ones((3,1)) + + result = np.ma.append(a, b) + expected_data = [1] * 3 + expected_mask = [True] * 4 + [False] * 3 + assert_array_equal(result.data[-3], expected_data) + assert_array_equal(result.mask, expected_mask) + + result = np.ma.append(a, b, axis=None) + assert_array_equal(result.data[-3], expected_data) + assert_array_equal(result.mask, expected_mask) + + +def test_append_masked_array_along_axis(): + a = np.ma.masked_equal([1,2,3], value=2) + b = np.ma.masked_values([[4, 5, 6], [7, 8, 9]], 7) + + # When `axis` is specified, `values` must have the correct shape. + assert_raises(ValueError, np.ma.append, a, b, axis=0) + + result = np.ma.append(a[np.newaxis,:], b, axis=0) + expected = np.ma.arange(1, 10) + expected[[1, 6]] = np.ma.masked + expected = expected.reshape((3,3)) + assert_array_equal(result.data, expected.data) + assert_array_equal(result.mask, expected.mask) + +def test_default_fill_value_complex(): + # regression test for Python 3, where 'unicode' was not defined + assert_(default_fill_value(1 + 1j) == 1.e20 + 0.0j) + + +def test_ufunc_with_output(): + # check that giving an output argument always returns that output. + # Regression test for gh-8416. + x = array([1., 2., 3.], mask=[0, 0, 1]) + y = np.add(x, 1., out=x) + assert_(y is x) + + +def test_ufunc_with_out_varied(): + """ Test that masked arrays are immune to gh-10459 """ + # the mask of the output should not affect the result, however it is passed + a = array([ 1, 2, 3], mask=[1, 0, 0]) + b = array([10, 20, 30], mask=[1, 0, 0]) + out = array([ 0, 0, 0], mask=[0, 0, 1]) + expected = array([11, 22, 33], mask=[1, 0, 0]) + + out_pos = out.copy() + res_pos = np.add(a, b, out_pos) + + out_kw = out.copy() + res_kw = np.add(a, b, out=out_kw) + + out_tup = out.copy() + res_tup = np.add(a, b, out=(out_tup,)) + + assert_equal(res_kw.mask, expected.mask) + assert_equal(res_kw.data, expected.data) + assert_equal(res_tup.mask, expected.mask) + assert_equal(res_tup.data, expected.data) + assert_equal(res_pos.mask, expected.mask) + assert_equal(res_pos.data, expected.data) + + +def test_astype_mask_ordering(): + descr = [('v', int, 3), ('x', [('y', float)])] + x = array([ + [([1, 2, 3], (1.0,)), ([1, 2, 3], (2.0,))], + [([1, 2, 3], (3.0,)), ([1, 2, 3], (4.0,))]], dtype=descr) + x[0]['v'][0] = np.ma.masked + + x_a = x.astype(descr) + assert x_a.dtype.names == np.dtype(descr).names + assert x_a.mask.dtype.names == np.dtype(descr).names + assert_equal(x, x_a) + + assert_(x is x.astype(x.dtype, copy=False)) + assert_equal(type(x.astype(x.dtype, subok=False)), np.ndarray) + + x_f = x.astype(x.dtype, order='F') + assert_(x_f.flags.f_contiguous) + assert_(x_f.mask.flags.f_contiguous) + + # Also test the same indirectly, via np.array + x_a2 = np.array(x, dtype=descr, subok=True) + assert x_a2.dtype.names == np.dtype(descr).names + assert x_a2.mask.dtype.names == np.dtype(descr).names + assert_equal(x, x_a2) + + assert_(x is np.array(x, dtype=descr, copy=False, subok=True)) + + x_f2 = np.array(x, dtype=x.dtype, order='F', subok=True) + assert_(x_f2.flags.f_contiguous) + assert_(x_f2.mask.flags.f_contiguous) + + +@pytest.mark.parametrize('dt1', num_dts, ids=num_ids) +@pytest.mark.parametrize('dt2', num_dts, ids=num_ids) +@pytest.mark.filterwarnings('ignore::numpy.ComplexWarning') +def test_astype_basic(dt1, dt2): + # See gh-12070 + src = np.ma.array(ones(3, dt1), fill_value=1) + dst = src.astype(dt2) + + assert_(src.fill_value == 1) + assert_(src.dtype == dt1) + assert_(src.fill_value.dtype == dt1) + + assert_(dst.fill_value == 1) + assert_(dst.dtype == dt2) + assert_(dst.fill_value.dtype == dt2) + + assert_equal(src, dst) + + +def test_fieldless_void(): + dt = np.dtype([]) # a void dtype with no fields + x = np.empty(4, dt) + + # these arrays contain no values, so there's little to test - but this + # shouldn't crash + mx = np.ma.array(x) + assert_equal(mx.dtype, x.dtype) + assert_equal(mx.shape, x.shape) + + mx = np.ma.array(x, mask=x) + assert_equal(mx.dtype, x.dtype) + assert_equal(mx.shape, x.shape) + + +def test_mask_shape_assignment_does_not_break_masked(): + a = np.ma.masked + b = np.ma.array(1, mask=a.mask) + b.shape = (1,) + assert_equal(a.mask.shape, ()) + +@pytest.mark.skipif(sys.flags.optimize > 1, + reason="no docstrings present to inspect when PYTHONOPTIMIZE/Py_OptimizeFlag > 1") +def test_doc_note(): + def method(self): + """This docstring + + Has multiple lines + + And notes + + Notes + ----- + original note + """ + pass + + expected_doc = """This docstring + +Has multiple lines + +And notes + +Notes +----- +note + +original note""" + + assert_equal(np.ma.core.doc_note(method.__doc__, "note"), expected_doc) diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/test_deprecations.py b/.local/lib/python3.8/site-packages/numpy/ma/tests/test_deprecations.py new file mode 100644 index 0000000..3e0e09f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/tests/test_deprecations.py @@ -0,0 +1,89 @@ +"""Test deprecation and future warnings. + +""" +import pytest +import numpy as np +from numpy.testing import assert_warns +from numpy.ma.testutils import assert_equal +from numpy.ma.core import MaskedArrayFutureWarning +import io +import textwrap + +class TestArgsort: + """ gh-8701 """ + def _test_base(self, argsort, cls): + arr_0d = np.array(1).view(cls) + argsort(arr_0d) + + arr_1d = np.array([1, 2, 3]).view(cls) + argsort(arr_1d) + + # argsort has a bad default for >1d arrays + arr_2d = np.array([[1, 2], [3, 4]]).view(cls) + result = assert_warns( + np.ma.core.MaskedArrayFutureWarning, argsort, arr_2d) + assert_equal(result, argsort(arr_2d, axis=None)) + + # should be no warnings for explicitly specifying it + argsort(arr_2d, axis=None) + argsort(arr_2d, axis=-1) + + def test_function_ndarray(self): + return self._test_base(np.ma.argsort, np.ndarray) + + def test_function_maskedarray(self): + return self._test_base(np.ma.argsort, np.ma.MaskedArray) + + def test_method(self): + return self._test_base(np.ma.MaskedArray.argsort, np.ma.MaskedArray) + + +class TestMinimumMaximum: + def test_minimum(self): + assert_warns(DeprecationWarning, np.ma.minimum, np.ma.array([1, 2])) + + def test_maximum(self): + assert_warns(DeprecationWarning, np.ma.maximum, np.ma.array([1, 2])) + + def test_axis_default(self): + # NumPy 1.13, 2017-05-06 + + data1d = np.ma.arange(6) + data2d = data1d.reshape(2, 3) + + ma_min = np.ma.minimum.reduce + ma_max = np.ma.maximum.reduce + + # check that the default axis is still None, but warns on 2d arrays + result = assert_warns(MaskedArrayFutureWarning, ma_max, data2d) + assert_equal(result, ma_max(data2d, axis=None)) + + result = assert_warns(MaskedArrayFutureWarning, ma_min, data2d) + assert_equal(result, ma_min(data2d, axis=None)) + + # no warnings on 1d, as both new and old defaults are equivalent + result = ma_min(data1d) + assert_equal(result, ma_min(data1d, axis=None)) + assert_equal(result, ma_min(data1d, axis=0)) + + result = ma_max(data1d) + assert_equal(result, ma_max(data1d, axis=None)) + assert_equal(result, ma_max(data1d, axis=0)) + + +class TestFromtextfile: + def test_fromtextfile_delimitor(self): + # NumPy 1.22.0, 2021-09-23 + + textfile = io.StringIO(textwrap.dedent( + """ + A,B,C,D + 'string 1';1;1.0;'mixed column' + 'string 2';2;2.0; + 'string 3';3;3.0;123 + 'string 4';4;4.0;3.14 + """ + )) + + with pytest.warns(DeprecationWarning): + result = np.ma.mrecords.fromtextfile(textfile, delimitor=';') diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/test_extras.py b/.local/lib/python3.8/site-packages/numpy/ma/tests/test_extras.py new file mode 100644 index 0000000..e735b9b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/tests/test_extras.py @@ -0,0 +1,1705 @@ +# pylint: disable-msg=W0611, W0612, W0511 +"""Tests suite for MaskedArray. +Adapted from the original test_ma by Pierre Gerard-Marchant + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +:version: $Id: test_extras.py 3473 2007-10-29 15:18:13Z jarrod.millman $ + +""" +import warnings +import itertools +import pytest + +import numpy as np +from numpy.testing import ( + assert_warns, suppress_warnings + ) +from numpy.ma.testutils import ( + assert_, assert_array_equal, assert_equal, assert_almost_equal + ) +from numpy.ma.core import ( + array, arange, masked, MaskedArray, masked_array, getmaskarray, shape, + nomask, ones, zeros, count + ) +from numpy.ma.extras import ( + atleast_1d, atleast_2d, atleast_3d, mr_, dot, polyfit, cov, corrcoef, + median, average, unique, setxor1d, setdiff1d, union1d, intersect1d, in1d, + ediff1d, apply_over_axes, apply_along_axis, compress_nd, compress_rowcols, + mask_rowcols, clump_masked, clump_unmasked, flatnotmasked_contiguous, + notmasked_contiguous, notmasked_edges, masked_all, masked_all_like, isin, + diagflat, stack, vstack + ) + + +class TestGeneric: + # + def test_masked_all(self): + # Tests masked_all + # Standard dtype + test = masked_all((2,), dtype=float) + control = array([1, 1], mask=[1, 1], dtype=float) + assert_equal(test, control) + # Flexible dtype + dt = np.dtype({'names': ['a', 'b'], 'formats': ['f', 'f']}) + test = masked_all((2,), dtype=dt) + control = array([(0, 0), (0, 0)], mask=[(1, 1), (1, 1)], dtype=dt) + assert_equal(test, control) + test = masked_all((2, 2), dtype=dt) + control = array([[(0, 0), (0, 0)], [(0, 0), (0, 0)]], + mask=[[(1, 1), (1, 1)], [(1, 1), (1, 1)]], + dtype=dt) + assert_equal(test, control) + # Nested dtype + dt = np.dtype([('a', 'f'), ('b', [('ba', 'f'), ('bb', 'f')])]) + test = masked_all((2,), dtype=dt) + control = array([(1, (1, 1)), (1, (1, 1))], + mask=[(1, (1, 1)), (1, (1, 1))], dtype=dt) + assert_equal(test, control) + test = masked_all((2,), dtype=dt) + control = array([(1, (1, 1)), (1, (1, 1))], + mask=[(1, (1, 1)), (1, (1, 1))], dtype=dt) + assert_equal(test, control) + test = masked_all((1, 1), dtype=dt) + control = array([[(1, (1, 1))]], mask=[[(1, (1, 1))]], dtype=dt) + assert_equal(test, control) + + def test_masked_all_with_object_nested(self): + # Test masked_all works with nested array with dtype of an 'object' + # refers to issue #15895 + my_dtype = np.dtype([('b', ([('c', object)], (1,)))]) + masked_arr = np.ma.masked_all((1,), my_dtype) + + assert_equal(type(masked_arr['b']), np.ma.core.MaskedArray) + assert_equal(type(masked_arr['b']['c']), np.ma.core.MaskedArray) + assert_equal(len(masked_arr['b']['c']), 1) + assert_equal(masked_arr['b']['c'].shape, (1, 1)) + assert_equal(masked_arr['b']['c']._fill_value.shape, ()) + + def test_masked_all_with_object(self): + # same as above except that the array is not nested + my_dtype = np.dtype([('b', (object, (1,)))]) + masked_arr = np.ma.masked_all((1,), my_dtype) + + assert_equal(type(masked_arr['b']), np.ma.core.MaskedArray) + assert_equal(len(masked_arr['b']), 1) + assert_equal(masked_arr['b'].shape, (1, 1)) + assert_equal(masked_arr['b']._fill_value.shape, ()) + + def test_masked_all_like(self): + # Tests masked_all + # Standard dtype + base = array([1, 2], dtype=float) + test = masked_all_like(base) + control = array([1, 1], mask=[1, 1], dtype=float) + assert_equal(test, control) + # Flexible dtype + dt = np.dtype({'names': ['a', 'b'], 'formats': ['f', 'f']}) + base = array([(0, 0), (0, 0)], mask=[(1, 1), (1, 1)], dtype=dt) + test = masked_all_like(base) + control = array([(10, 10), (10, 10)], mask=[(1, 1), (1, 1)], dtype=dt) + assert_equal(test, control) + # Nested dtype + dt = np.dtype([('a', 'f'), ('b', [('ba', 'f'), ('bb', 'f')])]) + control = array([(1, (1, 1)), (1, (1, 1))], + mask=[(1, (1, 1)), (1, (1, 1))], dtype=dt) + test = masked_all_like(control) + assert_equal(test, control) + + def check_clump(self, f): + for i in range(1, 7): + for j in range(2**i): + k = np.arange(i, dtype=int) + ja = np.full(i, j, dtype=int) + a = masked_array(2**k) + a.mask = (ja & (2**k)) != 0 + s = 0 + for sl in f(a): + s += a.data[sl].sum() + if f == clump_unmasked: + assert_equal(a.compressed().sum(), s) + else: + a.mask = ~a.mask + assert_equal(a.compressed().sum(), s) + + def test_clump_masked(self): + # Test clump_masked + a = masked_array(np.arange(10)) + a[[0, 1, 2, 6, 8, 9]] = masked + # + test = clump_masked(a) + control = [slice(0, 3), slice(6, 7), slice(8, 10)] + assert_equal(test, control) + + self.check_clump(clump_masked) + + def test_clump_unmasked(self): + # Test clump_unmasked + a = masked_array(np.arange(10)) + a[[0, 1, 2, 6, 8, 9]] = masked + test = clump_unmasked(a) + control = [slice(3, 6), slice(7, 8), ] + assert_equal(test, control) + + self.check_clump(clump_unmasked) + + def test_flatnotmasked_contiguous(self): + # Test flatnotmasked_contiguous + a = arange(10) + # No mask + test = flatnotmasked_contiguous(a) + assert_equal(test, [slice(0, a.size)]) + # mask of all false + a.mask = np.zeros(10, dtype=bool) + assert_equal(test, [slice(0, a.size)]) + # Some mask + a[(a < 3) | (a > 8) | (a == 5)] = masked + test = flatnotmasked_contiguous(a) + assert_equal(test, [slice(3, 5), slice(6, 9)]) + # + a[:] = masked + test = flatnotmasked_contiguous(a) + assert_equal(test, []) + + +class TestAverage: + # Several tests of average. Why so many ? Good point... + def test_testAverage1(self): + # Test of average. + ott = array([0., 1., 2., 3.], mask=[True, False, False, False]) + assert_equal(2.0, average(ott, axis=0)) + assert_equal(2.0, average(ott, weights=[1., 1., 2., 1.])) + result, wts = average(ott, weights=[1., 1., 2., 1.], returned=True) + assert_equal(2.0, result) + assert_(wts == 4.0) + ott[:] = masked + assert_equal(average(ott, axis=0).mask, [True]) + ott = array([0., 1., 2., 3.], mask=[True, False, False, False]) + ott = ott.reshape(2, 2) + ott[:, 1] = masked + assert_equal(average(ott, axis=0), [2.0, 0.0]) + assert_equal(average(ott, axis=1).mask[0], [True]) + assert_equal([2., 0.], average(ott, axis=0)) + result, wts = average(ott, axis=0, returned=True) + assert_equal(wts, [1., 0.]) + + def test_testAverage2(self): + # More tests of average. + w1 = [0, 1, 1, 1, 1, 0] + w2 = [[0, 1, 1, 1, 1, 0], [1, 0, 0, 0, 0, 1]] + x = arange(6, dtype=np.float_) + assert_equal(average(x, axis=0), 2.5) + assert_equal(average(x, axis=0, weights=w1), 2.5) + y = array([arange(6, dtype=np.float_), 2.0 * arange(6)]) + assert_equal(average(y, None), np.add.reduce(np.arange(6)) * 3. / 12.) + assert_equal(average(y, axis=0), np.arange(6) * 3. / 2.) + assert_equal(average(y, axis=1), + [average(x, axis=0), average(x, axis=0) * 2.0]) + assert_equal(average(y, None, weights=w2), 20. / 6.) + assert_equal(average(y, axis=0, weights=w2), + [0., 1., 2., 3., 4., 10.]) + assert_equal(average(y, axis=1), + [average(x, axis=0), average(x, axis=0) * 2.0]) + m1 = zeros(6) + m2 = [0, 0, 1, 1, 0, 0] + m3 = [[0, 0, 1, 1, 0, 0], [0, 1, 1, 1, 1, 0]] + m4 = ones(6) + m5 = [0, 1, 1, 1, 1, 1] + assert_equal(average(masked_array(x, m1), axis=0), 2.5) + assert_equal(average(masked_array(x, m2), axis=0), 2.5) + assert_equal(average(masked_array(x, m4), axis=0).mask, [True]) + assert_equal(average(masked_array(x, m5), axis=0), 0.0) + assert_equal(count(average(masked_array(x, m4), axis=0)), 0) + z = masked_array(y, m3) + assert_equal(average(z, None), 20. / 6.) + assert_equal(average(z, axis=0), [0., 1., 99., 99., 4.0, 7.5]) + assert_equal(average(z, axis=1), [2.5, 5.0]) + assert_equal(average(z, axis=0, weights=w2), + [0., 1., 99., 99., 4.0, 10.0]) + + def test_testAverage3(self): + # Yet more tests of average! + a = arange(6) + b = arange(6) * 3 + r1, w1 = average([[a, b], [b, a]], axis=1, returned=True) + assert_equal(shape(r1), shape(w1)) + assert_equal(r1.shape, w1.shape) + r2, w2 = average(ones((2, 2, 3)), axis=0, weights=[3, 1], returned=True) + assert_equal(shape(w2), shape(r2)) + r2, w2 = average(ones((2, 2, 3)), returned=True) + assert_equal(shape(w2), shape(r2)) + r2, w2 = average(ones((2, 2, 3)), weights=ones((2, 2, 3)), returned=True) + assert_equal(shape(w2), shape(r2)) + a2d = array([[1, 2], [0, 4]], float) + a2dm = masked_array(a2d, [[False, False], [True, False]]) + a2da = average(a2d, axis=0) + assert_equal(a2da, [0.5, 3.0]) + a2dma = average(a2dm, axis=0) + assert_equal(a2dma, [1.0, 3.0]) + a2dma = average(a2dm, axis=None) + assert_equal(a2dma, 7. / 3.) + a2dma = average(a2dm, axis=1) + assert_equal(a2dma, [1.5, 4.0]) + + def test_onintegers_with_mask(self): + # Test average on integers with mask + a = average(array([1, 2])) + assert_equal(a, 1.5) + a = average(array([1, 2, 3, 4], mask=[False, False, True, True])) + assert_equal(a, 1.5) + + def test_complex(self): + # Test with complex data. + # (Regression test for https://github.com/numpy/numpy/issues/2684) + mask = np.array([[0, 0, 0, 1, 0], + [0, 1, 0, 0, 0]], dtype=bool) + a = masked_array([[0, 1+2j, 3+4j, 5+6j, 7+8j], + [9j, 0+1j, 2+3j, 4+5j, 7+7j]], + mask=mask) + + av = average(a) + expected = np.average(a.compressed()) + assert_almost_equal(av.real, expected.real) + assert_almost_equal(av.imag, expected.imag) + + av0 = average(a, axis=0) + expected0 = average(a.real, axis=0) + average(a.imag, axis=0)*1j + assert_almost_equal(av0.real, expected0.real) + assert_almost_equal(av0.imag, expected0.imag) + + av1 = average(a, axis=1) + expected1 = average(a.real, axis=1) + average(a.imag, axis=1)*1j + assert_almost_equal(av1.real, expected1.real) + assert_almost_equal(av1.imag, expected1.imag) + + # Test with the 'weights' argument. + wts = np.array([[0.5, 1.0, 2.0, 1.0, 0.5], + [1.0, 1.0, 1.0, 1.0, 1.0]]) + wav = average(a, weights=wts) + expected = np.average(a.compressed(), weights=wts[~mask]) + assert_almost_equal(wav.real, expected.real) + assert_almost_equal(wav.imag, expected.imag) + + wav0 = average(a, weights=wts, axis=0) + expected0 = (average(a.real, weights=wts, axis=0) + + average(a.imag, weights=wts, axis=0)*1j) + assert_almost_equal(wav0.real, expected0.real) + assert_almost_equal(wav0.imag, expected0.imag) + + wav1 = average(a, weights=wts, axis=1) + expected1 = (average(a.real, weights=wts, axis=1) + + average(a.imag, weights=wts, axis=1)*1j) + assert_almost_equal(wav1.real, expected1.real) + assert_almost_equal(wav1.imag, expected1.imag) + + def test_masked_weights(self): + # Test with masked weights. + # (Regression test for https://github.com/numpy/numpy/issues/10438) + a = np.ma.array(np.arange(9).reshape(3, 3), + mask=[[1, 0, 0], [1, 0, 0], [0, 0, 0]]) + weights_unmasked = masked_array([5, 28, 31], mask=False) + weights_masked = masked_array([5, 28, 31], mask=[1, 0, 0]) + + avg_unmasked = average(a, axis=0, + weights=weights_unmasked, returned=False) + expected_unmasked = np.array([6.0, 5.21875, 6.21875]) + assert_almost_equal(avg_unmasked, expected_unmasked) + + avg_masked = average(a, axis=0, weights=weights_masked, returned=False) + expected_masked = np.array([6.0, 5.576271186440678, 6.576271186440678]) + assert_almost_equal(avg_masked, expected_masked) + + +class TestConcatenator: + # Tests for mr_, the equivalent of r_ for masked arrays. + + def test_1d(self): + # Tests mr_ on 1D arrays. + assert_array_equal(mr_[1, 2, 3, 4, 5, 6], array([1, 2, 3, 4, 5, 6])) + b = ones(5) + m = [1, 0, 0, 0, 0] + d = masked_array(b, mask=m) + c = mr_[d, 0, 0, d] + assert_(isinstance(c, MaskedArray)) + assert_array_equal(c, [1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1]) + assert_array_equal(c.mask, mr_[m, 0, 0, m]) + + def test_2d(self): + # Tests mr_ on 2D arrays. + a_1 = np.random.rand(5, 5) + a_2 = np.random.rand(5, 5) + m_1 = np.round_(np.random.rand(5, 5), 0) + m_2 = np.round_(np.random.rand(5, 5), 0) + b_1 = masked_array(a_1, mask=m_1) + b_2 = masked_array(a_2, mask=m_2) + # append columns + d = mr_['1', b_1, b_2] + assert_(d.shape == (5, 10)) + assert_array_equal(d[:, :5], b_1) + assert_array_equal(d[:, 5:], b_2) + assert_array_equal(d.mask, np.r_['1', m_1, m_2]) + d = mr_[b_1, b_2] + assert_(d.shape == (10, 5)) + assert_array_equal(d[:5,:], b_1) + assert_array_equal(d[5:,:], b_2) + assert_array_equal(d.mask, np.r_[m_1, m_2]) + + def test_masked_constant(self): + actual = mr_[np.ma.masked, 1] + assert_equal(actual.mask, [True, False]) + assert_equal(actual.data[1], 1) + + actual = mr_[[1, 2], np.ma.masked] + assert_equal(actual.mask, [False, False, True]) + assert_equal(actual.data[:2], [1, 2]) + + +class TestNotMasked: + # Tests notmasked_edges and notmasked_contiguous. + + def test_edges(self): + # Tests unmasked_edges + data = masked_array(np.arange(25).reshape(5, 5), + mask=[[0, 0, 1, 0, 0], + [0, 0, 0, 1, 1], + [1, 1, 0, 0, 0], + [0, 0, 0, 0, 0], + [1, 1, 1, 0, 0]],) + test = notmasked_edges(data, None) + assert_equal(test, [0, 24]) + test = notmasked_edges(data, 0) + assert_equal(test[0], [(0, 0, 1, 0, 0), (0, 1, 2, 3, 4)]) + assert_equal(test[1], [(3, 3, 3, 4, 4), (0, 1, 2, 3, 4)]) + test = notmasked_edges(data, 1) + assert_equal(test[0], [(0, 1, 2, 3, 4), (0, 0, 2, 0, 3)]) + assert_equal(test[1], [(0, 1, 2, 3, 4), (4, 2, 4, 4, 4)]) + # + test = notmasked_edges(data.data, None) + assert_equal(test, [0, 24]) + test = notmasked_edges(data.data, 0) + assert_equal(test[0], [(0, 0, 0, 0, 0), (0, 1, 2, 3, 4)]) + assert_equal(test[1], [(4, 4, 4, 4, 4), (0, 1, 2, 3, 4)]) + test = notmasked_edges(data.data, -1) + assert_equal(test[0], [(0, 1, 2, 3, 4), (0, 0, 0, 0, 0)]) + assert_equal(test[1], [(0, 1, 2, 3, 4), (4, 4, 4, 4, 4)]) + # + data[-2] = masked + test = notmasked_edges(data, 0) + assert_equal(test[0], [(0, 0, 1, 0, 0), (0, 1, 2, 3, 4)]) + assert_equal(test[1], [(1, 1, 2, 4, 4), (0, 1, 2, 3, 4)]) + test = notmasked_edges(data, -1) + assert_equal(test[0], [(0, 1, 2, 4), (0, 0, 2, 3)]) + assert_equal(test[1], [(0, 1, 2, 4), (4, 2, 4, 4)]) + + def test_contiguous(self): + # Tests notmasked_contiguous + a = masked_array(np.arange(24).reshape(3, 8), + mask=[[0, 0, 0, 0, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 1, 0]]) + tmp = notmasked_contiguous(a, None) + assert_equal(tmp, [ + slice(0, 4, None), + slice(16, 22, None), + slice(23, 24, None) + ]) + + tmp = notmasked_contiguous(a, 0) + assert_equal(tmp, [ + [slice(0, 1, None), slice(2, 3, None)], + [slice(0, 1, None), slice(2, 3, None)], + [slice(0, 1, None), slice(2, 3, None)], + [slice(0, 1, None), slice(2, 3, None)], + [slice(2, 3, None)], + [slice(2, 3, None)], + [], + [slice(2, 3, None)] + ]) + # + tmp = notmasked_contiguous(a, 1) + assert_equal(tmp, [ + [slice(0, 4, None)], + [], + [slice(0, 6, None), slice(7, 8, None)] + ]) + + +class TestCompressFunctions: + + def test_compress_nd(self): + # Tests compress_nd + x = np.array(list(range(3*4*5))).reshape(3, 4, 5) + m = np.zeros((3,4,5)).astype(bool) + m[1,1,1] = True + x = array(x, mask=m) + + # axis=None + a = compress_nd(x) + assert_equal(a, [[[ 0, 2, 3, 4], + [10, 12, 13, 14], + [15, 17, 18, 19]], + [[40, 42, 43, 44], + [50, 52, 53, 54], + [55, 57, 58, 59]]]) + + # axis=0 + a = compress_nd(x, 0) + assert_equal(a, [[[ 0, 1, 2, 3, 4], + [ 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19]], + [[40, 41, 42, 43, 44], + [45, 46, 47, 48, 49], + [50, 51, 52, 53, 54], + [55, 56, 57, 58, 59]]]) + + # axis=1 + a = compress_nd(x, 1) + assert_equal(a, [[[ 0, 1, 2, 3, 4], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19]], + [[20, 21, 22, 23, 24], + [30, 31, 32, 33, 34], + [35, 36, 37, 38, 39]], + [[40, 41, 42, 43, 44], + [50, 51, 52, 53, 54], + [55, 56, 57, 58, 59]]]) + + a2 = compress_nd(x, (1,)) + a3 = compress_nd(x, -2) + a4 = compress_nd(x, (-2,)) + assert_equal(a, a2) + assert_equal(a, a3) + assert_equal(a, a4) + + # axis=2 + a = compress_nd(x, 2) + assert_equal(a, [[[ 0, 2, 3, 4], + [ 5, 7, 8, 9], + [10, 12, 13, 14], + [15, 17, 18, 19]], + [[20, 22, 23, 24], + [25, 27, 28, 29], + [30, 32, 33, 34], + [35, 37, 38, 39]], + [[40, 42, 43, 44], + [45, 47, 48, 49], + [50, 52, 53, 54], + [55, 57, 58, 59]]]) + + a2 = compress_nd(x, (2,)) + a3 = compress_nd(x, -1) + a4 = compress_nd(x, (-1,)) + assert_equal(a, a2) + assert_equal(a, a3) + assert_equal(a, a4) + + # axis=(0, 1) + a = compress_nd(x, (0, 1)) + assert_equal(a, [[[ 0, 1, 2, 3, 4], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19]], + [[40, 41, 42, 43, 44], + [50, 51, 52, 53, 54], + [55, 56, 57, 58, 59]]]) + a2 = compress_nd(x, (0, -2)) + assert_equal(a, a2) + + # axis=(1, 2) + a = compress_nd(x, (1, 2)) + assert_equal(a, [[[ 0, 2, 3, 4], + [10, 12, 13, 14], + [15, 17, 18, 19]], + [[20, 22, 23, 24], + [30, 32, 33, 34], + [35, 37, 38, 39]], + [[40, 42, 43, 44], + [50, 52, 53, 54], + [55, 57, 58, 59]]]) + + a2 = compress_nd(x, (-2, 2)) + a3 = compress_nd(x, (1, -1)) + a4 = compress_nd(x, (-2, -1)) + assert_equal(a, a2) + assert_equal(a, a3) + assert_equal(a, a4) + + # axis=(0, 2) + a = compress_nd(x, (0, 2)) + assert_equal(a, [[[ 0, 2, 3, 4], + [ 5, 7, 8, 9], + [10, 12, 13, 14], + [15, 17, 18, 19]], + [[40, 42, 43, 44], + [45, 47, 48, 49], + [50, 52, 53, 54], + [55, 57, 58, 59]]]) + + a2 = compress_nd(x, (0, -1)) + assert_equal(a, a2) + + def test_compress_rowcols(self): + # Tests compress_rowcols + x = array(np.arange(9).reshape(3, 3), + mask=[[1, 0, 0], [0, 0, 0], [0, 0, 0]]) + assert_equal(compress_rowcols(x), [[4, 5], [7, 8]]) + assert_equal(compress_rowcols(x, 0), [[3, 4, 5], [6, 7, 8]]) + assert_equal(compress_rowcols(x, 1), [[1, 2], [4, 5], [7, 8]]) + x = array(x._data, mask=[[0, 0, 0], [0, 1, 0], [0, 0, 0]]) + assert_equal(compress_rowcols(x), [[0, 2], [6, 8]]) + assert_equal(compress_rowcols(x, 0), [[0, 1, 2], [6, 7, 8]]) + assert_equal(compress_rowcols(x, 1), [[0, 2], [3, 5], [6, 8]]) + x = array(x._data, mask=[[1, 0, 0], [0, 1, 0], [0, 0, 0]]) + assert_equal(compress_rowcols(x), [[8]]) + assert_equal(compress_rowcols(x, 0), [[6, 7, 8]]) + assert_equal(compress_rowcols(x, 1,), [[2], [5], [8]]) + x = array(x._data, mask=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + assert_equal(compress_rowcols(x).size, 0) + assert_equal(compress_rowcols(x, 0).size, 0) + assert_equal(compress_rowcols(x, 1).size, 0) + + def test_mask_rowcols(self): + # Tests mask_rowcols. + x = array(np.arange(9).reshape(3, 3), + mask=[[1, 0, 0], [0, 0, 0], [0, 0, 0]]) + assert_equal(mask_rowcols(x).mask, + [[1, 1, 1], [1, 0, 0], [1, 0, 0]]) + assert_equal(mask_rowcols(x, 0).mask, + [[1, 1, 1], [0, 0, 0], [0, 0, 0]]) + assert_equal(mask_rowcols(x, 1).mask, + [[1, 0, 0], [1, 0, 0], [1, 0, 0]]) + x = array(x._data, mask=[[0, 0, 0], [0, 1, 0], [0, 0, 0]]) + assert_equal(mask_rowcols(x).mask, + [[0, 1, 0], [1, 1, 1], [0, 1, 0]]) + assert_equal(mask_rowcols(x, 0).mask, + [[0, 0, 0], [1, 1, 1], [0, 0, 0]]) + assert_equal(mask_rowcols(x, 1).mask, + [[0, 1, 0], [0, 1, 0], [0, 1, 0]]) + x = array(x._data, mask=[[1, 0, 0], [0, 1, 0], [0, 0, 0]]) + assert_equal(mask_rowcols(x).mask, + [[1, 1, 1], [1, 1, 1], [1, 1, 0]]) + assert_equal(mask_rowcols(x, 0).mask, + [[1, 1, 1], [1, 1, 1], [0, 0, 0]]) + assert_equal(mask_rowcols(x, 1,).mask, + [[1, 1, 0], [1, 1, 0], [1, 1, 0]]) + x = array(x._data, mask=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + assert_(mask_rowcols(x).all() is masked) + assert_(mask_rowcols(x, 0).all() is masked) + assert_(mask_rowcols(x, 1).all() is masked) + assert_(mask_rowcols(x).mask.all()) + assert_(mask_rowcols(x, 0).mask.all()) + assert_(mask_rowcols(x, 1).mask.all()) + + @pytest.mark.parametrize("axis", [None, 0, 1]) + @pytest.mark.parametrize(["func", "rowcols_axis"], + [(np.ma.mask_rows, 0), (np.ma.mask_cols, 1)]) + def test_mask_row_cols_axis_deprecation(self, axis, func, rowcols_axis): + # Test deprecation of the axis argument to `mask_rows` and `mask_cols` + x = array(np.arange(9).reshape(3, 3), + mask=[[1, 0, 0], [0, 0, 0], [0, 0, 0]]) + + with assert_warns(DeprecationWarning): + res = func(x, axis=axis) + assert_equal(res, mask_rowcols(x, rowcols_axis)) + + def test_dot(self): + # Tests dot product + n = np.arange(1, 7) + # + m = [1, 0, 0, 0, 0, 0] + a = masked_array(n, mask=m).reshape(2, 3) + b = masked_array(n, mask=m).reshape(3, 2) + c = dot(a, b, strict=True) + assert_equal(c.mask, [[1, 1], [1, 0]]) + c = dot(b, a, strict=True) + assert_equal(c.mask, [[1, 1, 1], [1, 0, 0], [1, 0, 0]]) + c = dot(a, b, strict=False) + assert_equal(c, np.dot(a.filled(0), b.filled(0))) + c = dot(b, a, strict=False) + assert_equal(c, np.dot(b.filled(0), a.filled(0))) + # + m = [0, 0, 0, 0, 0, 1] + a = masked_array(n, mask=m).reshape(2, 3) + b = masked_array(n, mask=m).reshape(3, 2) + c = dot(a, b, strict=True) + assert_equal(c.mask, [[0, 1], [1, 1]]) + c = dot(b, a, strict=True) + assert_equal(c.mask, [[0, 0, 1], [0, 0, 1], [1, 1, 1]]) + c = dot(a, b, strict=False) + assert_equal(c, np.dot(a.filled(0), b.filled(0))) + assert_equal(c, dot(a, b)) + c = dot(b, a, strict=False) + assert_equal(c, np.dot(b.filled(0), a.filled(0))) + # + m = [0, 0, 0, 0, 0, 0] + a = masked_array(n, mask=m).reshape(2, 3) + b = masked_array(n, mask=m).reshape(3, 2) + c = dot(a, b) + assert_equal(c.mask, nomask) + c = dot(b, a) + assert_equal(c.mask, nomask) + # + a = masked_array(n, mask=[1, 0, 0, 0, 0, 0]).reshape(2, 3) + b = masked_array(n, mask=[0, 0, 0, 0, 0, 0]).reshape(3, 2) + c = dot(a, b, strict=True) + assert_equal(c.mask, [[1, 1], [0, 0]]) + c = dot(a, b, strict=False) + assert_equal(c, np.dot(a.filled(0), b.filled(0))) + c = dot(b, a, strict=True) + assert_equal(c.mask, [[1, 0, 0], [1, 0, 0], [1, 0, 0]]) + c = dot(b, a, strict=False) + assert_equal(c, np.dot(b.filled(0), a.filled(0))) + # + a = masked_array(n, mask=[0, 0, 0, 0, 0, 1]).reshape(2, 3) + b = masked_array(n, mask=[0, 0, 0, 0, 0, 0]).reshape(3, 2) + c = dot(a, b, strict=True) + assert_equal(c.mask, [[0, 0], [1, 1]]) + c = dot(a, b) + assert_equal(c, np.dot(a.filled(0), b.filled(0))) + c = dot(b, a, strict=True) + assert_equal(c.mask, [[0, 0, 1], [0, 0, 1], [0, 0, 1]]) + c = dot(b, a, strict=False) + assert_equal(c, np.dot(b.filled(0), a.filled(0))) + # + a = masked_array(n, mask=[0, 0, 0, 0, 0, 1]).reshape(2, 3) + b = masked_array(n, mask=[0, 0, 1, 0, 0, 0]).reshape(3, 2) + c = dot(a, b, strict=True) + assert_equal(c.mask, [[1, 0], [1, 1]]) + c = dot(a, b, strict=False) + assert_equal(c, np.dot(a.filled(0), b.filled(0))) + c = dot(b, a, strict=True) + assert_equal(c.mask, [[0, 0, 1], [1, 1, 1], [0, 0, 1]]) + c = dot(b, a, strict=False) + assert_equal(c, np.dot(b.filled(0), a.filled(0))) + + def test_dot_returns_maskedarray(self): + # See gh-6611 + a = np.eye(3) + b = array(a) + assert_(type(dot(a, a)) is MaskedArray) + assert_(type(dot(a, b)) is MaskedArray) + assert_(type(dot(b, a)) is MaskedArray) + assert_(type(dot(b, b)) is MaskedArray) + + def test_dot_out(self): + a = array(np.eye(3)) + out = array(np.zeros((3, 3))) + res = dot(a, a, out=out) + assert_(res is out) + assert_equal(a, res) + + +class TestApplyAlongAxis: + # Tests 2D functions + def test_3d(self): + a = arange(12.).reshape(2, 2, 3) + + def myfunc(b): + return b[1] + + xa = apply_along_axis(myfunc, 2, a) + assert_equal(xa, [[1, 4], [7, 10]]) + + # Tests kwargs functions + def test_3d_kwargs(self): + a = arange(12).reshape(2, 2, 3) + + def myfunc(b, offset=0): + return b[1+offset] + + xa = apply_along_axis(myfunc, 2, a, offset=1) + assert_equal(xa, [[2, 5], [8, 11]]) + + +class TestApplyOverAxes: + # Tests apply_over_axes + def test_basic(self): + a = arange(24).reshape(2, 3, 4) + test = apply_over_axes(np.sum, a, [0, 2]) + ctrl = np.array([[[60], [92], [124]]]) + assert_equal(test, ctrl) + a[(a % 2).astype(bool)] = masked + test = apply_over_axes(np.sum, a, [0, 2]) + ctrl = np.array([[[28], [44], [60]]]) + assert_equal(test, ctrl) + + +class TestMedian: + def test_pytype(self): + r = np.ma.median([[np.inf, np.inf], [np.inf, np.inf]], axis=-1) + assert_equal(r, np.inf) + + def test_inf(self): + # test that even which computes handles inf / x = masked + r = np.ma.median(np.ma.masked_array([[np.inf, np.inf], + [np.inf, np.inf]]), axis=-1) + assert_equal(r, np.inf) + r = np.ma.median(np.ma.masked_array([[np.inf, np.inf], + [np.inf, np.inf]]), axis=None) + assert_equal(r, np.inf) + # all masked + r = np.ma.median(np.ma.masked_array([[np.inf, np.inf], + [np.inf, np.inf]], mask=True), + axis=-1) + assert_equal(r.mask, True) + r = np.ma.median(np.ma.masked_array([[np.inf, np.inf], + [np.inf, np.inf]], mask=True), + axis=None) + assert_equal(r.mask, True) + + def test_non_masked(self): + x = np.arange(9) + assert_equal(np.ma.median(x), 4.) + assert_(type(np.ma.median(x)) is not MaskedArray) + x = range(8) + assert_equal(np.ma.median(x), 3.5) + assert_(type(np.ma.median(x)) is not MaskedArray) + x = 5 + assert_equal(np.ma.median(x), 5.) + assert_(type(np.ma.median(x)) is not MaskedArray) + # integer + x = np.arange(9 * 8).reshape(9, 8) + assert_equal(np.ma.median(x, axis=0), np.median(x, axis=0)) + assert_equal(np.ma.median(x, axis=1), np.median(x, axis=1)) + assert_(np.ma.median(x, axis=1) is not MaskedArray) + # float + x = np.arange(9 * 8.).reshape(9, 8) + assert_equal(np.ma.median(x, axis=0), np.median(x, axis=0)) + assert_equal(np.ma.median(x, axis=1), np.median(x, axis=1)) + assert_(np.ma.median(x, axis=1) is not MaskedArray) + + def test_docstring_examples(self): + "test the examples given in the docstring of ma.median" + x = array(np.arange(8), mask=[0]*4 + [1]*4) + assert_equal(np.ma.median(x), 1.5) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + x = array(np.arange(10).reshape(2, 5), mask=[0]*6 + [1]*4) + assert_equal(np.ma.median(x), 2.5) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + ma_x = np.ma.median(x, axis=-1, overwrite_input=True) + assert_equal(ma_x, [2., 5.]) + assert_equal(ma_x.shape, (2,), "shape mismatch") + assert_(type(ma_x) is MaskedArray) + + def test_axis_argument_errors(self): + msg = "mask = %s, ndim = %s, axis = %s, overwrite_input = %s" + for ndmin in range(5): + for mask in [False, True]: + x = array(1, ndmin=ndmin, mask=mask) + + # Valid axis values should not raise exception + args = itertools.product(range(-ndmin, ndmin), [False, True]) + for axis, over in args: + try: + np.ma.median(x, axis=axis, overwrite_input=over) + except Exception: + raise AssertionError(msg % (mask, ndmin, axis, over)) + + # Invalid axis values should raise exception + args = itertools.product([-(ndmin + 1), ndmin], [False, True]) + for axis, over in args: + try: + np.ma.median(x, axis=axis, overwrite_input=over) + except np.AxisError: + pass + else: + raise AssertionError(msg % (mask, ndmin, axis, over)) + + def test_masked_0d(self): + # Check values + x = array(1, mask=False) + assert_equal(np.ma.median(x), 1) + x = array(1, mask=True) + assert_equal(np.ma.median(x), np.ma.masked) + + def test_masked_1d(self): + x = array(np.arange(5), mask=True) + assert_equal(np.ma.median(x), np.ma.masked) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is np.ma.core.MaskedConstant) + x = array(np.arange(5), mask=False) + assert_equal(np.ma.median(x), 2.) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + x = array(np.arange(5), mask=[0,1,0,0,0]) + assert_equal(np.ma.median(x), 2.5) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + x = array(np.arange(5), mask=[0,1,1,1,1]) + assert_equal(np.ma.median(x), 0.) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + # integer + x = array(np.arange(5), mask=[0,1,1,0,0]) + assert_equal(np.ma.median(x), 3.) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + # float + x = array(np.arange(5.), mask=[0,1,1,0,0]) + assert_equal(np.ma.median(x), 3.) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + # integer + x = array(np.arange(6), mask=[0,1,1,1,1,0]) + assert_equal(np.ma.median(x), 2.5) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + # float + x = array(np.arange(6.), mask=[0,1,1,1,1,0]) + assert_equal(np.ma.median(x), 2.5) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + + def test_1d_shape_consistency(self): + assert_equal(np.ma.median(array([1,2,3],mask=[0,0,0])).shape, + np.ma.median(array([1,2,3],mask=[0,1,0])).shape ) + + def test_2d(self): + # Tests median w/ 2D + (n, p) = (101, 30) + x = masked_array(np.linspace(-1., 1., n),) + x[:10] = x[-10:] = masked + z = masked_array(np.empty((n, p), dtype=float)) + z[:, 0] = x[:] + idx = np.arange(len(x)) + for i in range(1, p): + np.random.shuffle(idx) + z[:, i] = x[idx] + assert_equal(median(z[:, 0]), 0) + assert_equal(median(z), 0) + assert_equal(median(z, axis=0), np.zeros(p)) + assert_equal(median(z.T, axis=1), np.zeros(p)) + + def test_2d_waxis(self): + # Tests median w/ 2D arrays and different axis. + x = masked_array(np.arange(30).reshape(10, 3)) + x[:3] = x[-3:] = masked + assert_equal(median(x), 14.5) + assert_(type(np.ma.median(x)) is not MaskedArray) + assert_equal(median(x, axis=0), [13.5, 14.5, 15.5]) + assert_(type(np.ma.median(x, axis=0)) is MaskedArray) + assert_equal(median(x, axis=1), [0, 0, 0, 10, 13, 16, 19, 0, 0, 0]) + assert_(type(np.ma.median(x, axis=1)) is MaskedArray) + assert_equal(median(x, axis=1).mask, [1, 1, 1, 0, 0, 0, 0, 1, 1, 1]) + + def test_3d(self): + # Tests median w/ 3D + x = np.ma.arange(24).reshape(3, 4, 2) + x[x % 3 == 0] = masked + assert_equal(median(x, 0), [[12, 9], [6, 15], [12, 9], [18, 15]]) + x.shape = (4, 3, 2) + assert_equal(median(x, 0), [[99, 10], [11, 99], [13, 14]]) + x = np.ma.arange(24).reshape(4, 3, 2) + x[x % 5 == 0] = masked + assert_equal(median(x, 0), [[12, 10], [8, 9], [16, 17]]) + + def test_neg_axis(self): + x = masked_array(np.arange(30).reshape(10, 3)) + x[:3] = x[-3:] = masked + assert_equal(median(x, axis=-1), median(x, axis=1)) + + def test_out_1d(self): + # integer float even odd + for v in (30, 30., 31, 31.): + x = masked_array(np.arange(v)) + x[:3] = x[-3:] = masked + out = masked_array(np.ones(())) + r = median(x, out=out) + if v == 30: + assert_equal(out, 14.5) + else: + assert_equal(out, 15.) + assert_(r is out) + assert_(type(r) is MaskedArray) + + def test_out(self): + # integer float even odd + for v in (40, 40., 30, 30.): + x = masked_array(np.arange(v).reshape(10, -1)) + x[:3] = x[-3:] = masked + out = masked_array(np.ones(10)) + r = median(x, axis=1, out=out) + if v == 30: + e = masked_array([0.]*3 + [10, 13, 16, 19] + [0.]*3, + mask=[True] * 3 + [False] * 4 + [True] * 3) + else: + e = masked_array([0.]*3 + [13.5, 17.5, 21.5, 25.5] + [0.]*3, + mask=[True]*3 + [False]*4 + [True]*3) + assert_equal(r, e) + assert_(r is out) + assert_(type(r) is MaskedArray) + + def test_single_non_masked_value_on_axis(self): + data = [[1., 0.], + [0., 3.], + [0., 0.]] + masked_arr = np.ma.masked_equal(data, 0) + expected = [1., 3.] + assert_array_equal(np.ma.median(masked_arr, axis=0), + expected) + + def test_nan(self): + for mask in (False, np.zeros(6, dtype=bool)): + dm = np.ma.array([[1, np.nan, 3], [1, 2, 3]]) + dm.mask = mask + + # scalar result + r = np.ma.median(dm, axis=None) + assert_(np.isscalar(r)) + assert_array_equal(r, np.nan) + r = np.ma.median(dm.ravel(), axis=0) + assert_(np.isscalar(r)) + assert_array_equal(r, np.nan) + + r = np.ma.median(dm, axis=0) + assert_equal(type(r), MaskedArray) + assert_array_equal(r, [1, np.nan, 3]) + r = np.ma.median(dm, axis=1) + assert_equal(type(r), MaskedArray) + assert_array_equal(r, [np.nan, 2]) + r = np.ma.median(dm, axis=-1) + assert_equal(type(r), MaskedArray) + assert_array_equal(r, [np.nan, 2]) + + dm = np.ma.array([[1, np.nan, 3], [1, 2, 3]]) + dm[:, 2] = np.ma.masked + assert_array_equal(np.ma.median(dm, axis=None), np.nan) + assert_array_equal(np.ma.median(dm, axis=0), [1, np.nan, 3]) + assert_array_equal(np.ma.median(dm, axis=1), [np.nan, 1.5]) + + def test_out_nan(self): + o = np.ma.masked_array(np.zeros((4,))) + d = np.ma.masked_array(np.ones((3, 4))) + d[2, 1] = np.nan + d[2, 2] = np.ma.masked + assert_equal(np.ma.median(d, 0, out=o), o) + o = np.ma.masked_array(np.zeros((3,))) + assert_equal(np.ma.median(d, 1, out=o), o) + o = np.ma.masked_array(np.zeros(())) + assert_equal(np.ma.median(d, out=o), o) + + def test_nan_behavior(self): + a = np.ma.masked_array(np.arange(24, dtype=float)) + a[::3] = np.ma.masked + a[2] = np.nan + assert_array_equal(np.ma.median(a), np.nan) + assert_array_equal(np.ma.median(a, axis=0), np.nan) + + a = np.ma.masked_array(np.arange(24, dtype=float).reshape(2, 3, 4)) + a.mask = np.arange(a.size) % 2 == 1 + aorig = a.copy() + a[1, 2, 3] = np.nan + a[1, 1, 2] = np.nan + + # no axis + assert_array_equal(np.ma.median(a), np.nan) + assert_(np.isscalar(np.ma.median(a))) + + # axis0 + b = np.ma.median(aorig, axis=0) + b[2, 3] = np.nan + b[1, 2] = np.nan + assert_equal(np.ma.median(a, 0), b) + + # axis1 + b = np.ma.median(aorig, axis=1) + b[1, 3] = np.nan + b[1, 2] = np.nan + assert_equal(np.ma.median(a, 1), b) + + # axis02 + b = np.ma.median(aorig, axis=(0, 2)) + b[1] = np.nan + b[2] = np.nan + assert_equal(np.ma.median(a, (0, 2)), b) + + def test_ambigous_fill(self): + # 255 is max value, used as filler for sort + a = np.array([[3, 3, 255], [3, 3, 255]], dtype=np.uint8) + a = np.ma.masked_array(a, mask=a == 3) + assert_array_equal(np.ma.median(a, axis=1), 255) + assert_array_equal(np.ma.median(a, axis=1).mask, False) + assert_array_equal(np.ma.median(a, axis=0), a[0]) + assert_array_equal(np.ma.median(a), 255) + + def test_special(self): + for inf in [np.inf, -np.inf]: + a = np.array([[inf, np.nan], [np.nan, np.nan]]) + a = np.ma.masked_array(a, mask=np.isnan(a)) + assert_equal(np.ma.median(a, axis=0), [inf, np.nan]) + assert_equal(np.ma.median(a, axis=1), [inf, np.nan]) + assert_equal(np.ma.median(a), inf) + + a = np.array([[np.nan, np.nan, inf], [np.nan, np.nan, inf]]) + a = np.ma.masked_array(a, mask=np.isnan(a)) + assert_array_equal(np.ma.median(a, axis=1), inf) + assert_array_equal(np.ma.median(a, axis=1).mask, False) + assert_array_equal(np.ma.median(a, axis=0), a[0]) + assert_array_equal(np.ma.median(a), inf) + + # no mask + a = np.array([[inf, inf], [inf, inf]]) + assert_equal(np.ma.median(a), inf) + assert_equal(np.ma.median(a, axis=0), inf) + assert_equal(np.ma.median(a, axis=1), inf) + + a = np.array([[inf, 7, -inf, -9], + [-10, np.nan, np.nan, 5], + [4, np.nan, np.nan, inf]], + dtype=np.float32) + a = np.ma.masked_array(a, mask=np.isnan(a)) + if inf > 0: + assert_equal(np.ma.median(a, axis=0), [4., 7., -inf, 5.]) + assert_equal(np.ma.median(a), 4.5) + else: + assert_equal(np.ma.median(a, axis=0), [-10., 7., -inf, -9.]) + assert_equal(np.ma.median(a), -2.5) + assert_equal(np.ma.median(a, axis=1), [-1., -2.5, inf]) + + for i in range(0, 10): + for j in range(1, 10): + a = np.array([([np.nan] * i) + ([inf] * j)] * 2) + a = np.ma.masked_array(a, mask=np.isnan(a)) + assert_equal(np.ma.median(a), inf) + assert_equal(np.ma.median(a, axis=1), inf) + assert_equal(np.ma.median(a, axis=0), + ([np.nan] * i) + [inf] * j) + + def test_empty(self): + # empty arrays + a = np.ma.masked_array(np.array([], dtype=float)) + with suppress_warnings() as w: + w.record(RuntimeWarning) + assert_array_equal(np.ma.median(a), np.nan) + assert_(w.log[0].category is RuntimeWarning) + + # multiple dimensions + a = np.ma.masked_array(np.array([], dtype=float, ndmin=3)) + # no axis + with suppress_warnings() as w: + w.record(RuntimeWarning) + warnings.filterwarnings('always', '', RuntimeWarning) + assert_array_equal(np.ma.median(a), np.nan) + assert_(w.log[0].category is RuntimeWarning) + + # axis 0 and 1 + b = np.ma.masked_array(np.array([], dtype=float, ndmin=2)) + assert_equal(np.ma.median(a, axis=0), b) + assert_equal(np.ma.median(a, axis=1), b) + + # axis 2 + b = np.ma.masked_array(np.array(np.nan, dtype=float, ndmin=2)) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.ma.median(a, axis=2), b) + assert_(w[0].category is RuntimeWarning) + + def test_object(self): + o = np.ma.masked_array(np.arange(7.)) + assert_(type(np.ma.median(o.astype(object))), float) + o[2] = np.nan + assert_(type(np.ma.median(o.astype(object))), float) + + +class TestCov: + + def setup(self): + self.data = array(np.random.rand(12)) + + def test_1d_without_missing(self): + # Test cov on 1D variable w/o missing values + x = self.data + assert_almost_equal(np.cov(x), cov(x)) + assert_almost_equal(np.cov(x, rowvar=False), cov(x, rowvar=False)) + assert_almost_equal(np.cov(x, rowvar=False, bias=True), + cov(x, rowvar=False, bias=True)) + + def test_2d_without_missing(self): + # Test cov on 1 2D variable w/o missing values + x = self.data.reshape(3, 4) + assert_almost_equal(np.cov(x), cov(x)) + assert_almost_equal(np.cov(x, rowvar=False), cov(x, rowvar=False)) + assert_almost_equal(np.cov(x, rowvar=False, bias=True), + cov(x, rowvar=False, bias=True)) + + def test_1d_with_missing(self): + # Test cov 1 1D variable w/missing values + x = self.data + x[-1] = masked + x -= x.mean() + nx = x.compressed() + assert_almost_equal(np.cov(nx), cov(x)) + assert_almost_equal(np.cov(nx, rowvar=False), cov(x, rowvar=False)) + assert_almost_equal(np.cov(nx, rowvar=False, bias=True), + cov(x, rowvar=False, bias=True)) + # + try: + cov(x, allow_masked=False) + except ValueError: + pass + # + # 2 1D variables w/ missing values + nx = x[1:-1] + assert_almost_equal(np.cov(nx, nx[::-1]), cov(x, x[::-1])) + assert_almost_equal(np.cov(nx, nx[::-1], rowvar=False), + cov(x, x[::-1], rowvar=False)) + assert_almost_equal(np.cov(nx, nx[::-1], rowvar=False, bias=True), + cov(x, x[::-1], rowvar=False, bias=True)) + + def test_2d_with_missing(self): + # Test cov on 2D variable w/ missing value + x = self.data + x[-1] = masked + x = x.reshape(3, 4) + valid = np.logical_not(getmaskarray(x)).astype(int) + frac = np.dot(valid, valid.T) + xf = (x - x.mean(1)[:, None]).filled(0) + assert_almost_equal(cov(x), + np.cov(xf) * (x.shape[1] - 1) / (frac - 1.)) + assert_almost_equal(cov(x, bias=True), + np.cov(xf, bias=True) * x.shape[1] / frac) + frac = np.dot(valid.T, valid) + xf = (x - x.mean(0)).filled(0) + assert_almost_equal(cov(x, rowvar=False), + (np.cov(xf, rowvar=False) * + (x.shape[0] - 1) / (frac - 1.))) + assert_almost_equal(cov(x, rowvar=False, bias=True), + (np.cov(xf, rowvar=False, bias=True) * + x.shape[0] / frac)) + + +class TestCorrcoef: + + def setup(self): + self.data = array(np.random.rand(12)) + self.data2 = array(np.random.rand(12)) + + def test_ddof(self): + # ddof raises DeprecationWarning + x, y = self.data, self.data2 + expected = np.corrcoef(x) + expected2 = np.corrcoef(x, y) + with suppress_warnings() as sup: + warnings.simplefilter("always") + assert_warns(DeprecationWarning, corrcoef, x, ddof=-1) + sup.filter(DeprecationWarning, "bias and ddof have no effect") + # ddof has no or negligible effect on the function + assert_almost_equal(np.corrcoef(x, ddof=0), corrcoef(x, ddof=0)) + assert_almost_equal(corrcoef(x, ddof=-1), expected) + assert_almost_equal(corrcoef(x, y, ddof=-1), expected2) + assert_almost_equal(corrcoef(x, ddof=3), expected) + assert_almost_equal(corrcoef(x, y, ddof=3), expected2) + + def test_bias(self): + x, y = self.data, self.data2 + expected = np.corrcoef(x) + # bias raises DeprecationWarning + with suppress_warnings() as sup: + warnings.simplefilter("always") + assert_warns(DeprecationWarning, corrcoef, x, y, True, False) + assert_warns(DeprecationWarning, corrcoef, x, y, True, True) + assert_warns(DeprecationWarning, corrcoef, x, bias=False) + sup.filter(DeprecationWarning, "bias and ddof have no effect") + # bias has no or negligible effect on the function + assert_almost_equal(corrcoef(x, bias=1), expected) + + def test_1d_without_missing(self): + # Test cov on 1D variable w/o missing values + x = self.data + assert_almost_equal(np.corrcoef(x), corrcoef(x)) + assert_almost_equal(np.corrcoef(x, rowvar=False), + corrcoef(x, rowvar=False)) + with suppress_warnings() as sup: + sup.filter(DeprecationWarning, "bias and ddof have no effect") + assert_almost_equal(np.corrcoef(x, rowvar=False, bias=True), + corrcoef(x, rowvar=False, bias=True)) + + def test_2d_without_missing(self): + # Test corrcoef on 1 2D variable w/o missing values + x = self.data.reshape(3, 4) + assert_almost_equal(np.corrcoef(x), corrcoef(x)) + assert_almost_equal(np.corrcoef(x, rowvar=False), + corrcoef(x, rowvar=False)) + with suppress_warnings() as sup: + sup.filter(DeprecationWarning, "bias and ddof have no effect") + assert_almost_equal(np.corrcoef(x, rowvar=False, bias=True), + corrcoef(x, rowvar=False, bias=True)) + + def test_1d_with_missing(self): + # Test corrcoef 1 1D variable w/missing values + x = self.data + x[-1] = masked + x -= x.mean() + nx = x.compressed() + assert_almost_equal(np.corrcoef(nx), corrcoef(x)) + assert_almost_equal(np.corrcoef(nx, rowvar=False), + corrcoef(x, rowvar=False)) + with suppress_warnings() as sup: + sup.filter(DeprecationWarning, "bias and ddof have no effect") + assert_almost_equal(np.corrcoef(nx, rowvar=False, bias=True), + corrcoef(x, rowvar=False, bias=True)) + try: + corrcoef(x, allow_masked=False) + except ValueError: + pass + # 2 1D variables w/ missing values + nx = x[1:-1] + assert_almost_equal(np.corrcoef(nx, nx[::-1]), corrcoef(x, x[::-1])) + assert_almost_equal(np.corrcoef(nx, nx[::-1], rowvar=False), + corrcoef(x, x[::-1], rowvar=False)) + with suppress_warnings() as sup: + sup.filter(DeprecationWarning, "bias and ddof have no effect") + # ddof and bias have no or negligible effect on the function + assert_almost_equal(np.corrcoef(nx, nx[::-1]), + corrcoef(x, x[::-1], bias=1)) + assert_almost_equal(np.corrcoef(nx, nx[::-1]), + corrcoef(x, x[::-1], ddof=2)) + + def test_2d_with_missing(self): + # Test corrcoef on 2D variable w/ missing value + x = self.data + x[-1] = masked + x = x.reshape(3, 4) + + test = corrcoef(x) + control = np.corrcoef(x) + assert_almost_equal(test[:-1, :-1], control[:-1, :-1]) + with suppress_warnings() as sup: + sup.filter(DeprecationWarning, "bias and ddof have no effect") + # ddof and bias have no or negligible effect on the function + assert_almost_equal(corrcoef(x, ddof=-2)[:-1, :-1], + control[:-1, :-1]) + assert_almost_equal(corrcoef(x, ddof=3)[:-1, :-1], + control[:-1, :-1]) + assert_almost_equal(corrcoef(x, bias=1)[:-1, :-1], + control[:-1, :-1]) + + +class TestPolynomial: + # + def test_polyfit(self): + # Tests polyfit + # On ndarrays + x = np.random.rand(10) + y = np.random.rand(20).reshape(-1, 2) + assert_almost_equal(polyfit(x, y, 3), np.polyfit(x, y, 3)) + # ON 1D maskedarrays + x = x.view(MaskedArray) + x[0] = masked + y = y.view(MaskedArray) + y[0, 0] = y[-1, -1] = masked + # + (C, R, K, S, D) = polyfit(x, y[:, 0], 3, full=True) + (c, r, k, s, d) = np.polyfit(x[1:], y[1:, 0].compressed(), 3, + full=True) + for (a, a_) in zip((C, R, K, S, D), (c, r, k, s, d)): + assert_almost_equal(a, a_) + # + (C, R, K, S, D) = polyfit(x, y[:, -1], 3, full=True) + (c, r, k, s, d) = np.polyfit(x[1:-1], y[1:-1, -1], 3, full=True) + for (a, a_) in zip((C, R, K, S, D), (c, r, k, s, d)): + assert_almost_equal(a, a_) + # + (C, R, K, S, D) = polyfit(x, y, 3, full=True) + (c, r, k, s, d) = np.polyfit(x[1:-1], y[1:-1,:], 3, full=True) + for (a, a_) in zip((C, R, K, S, D), (c, r, k, s, d)): + assert_almost_equal(a, a_) + # + w = np.random.rand(10) + 1 + wo = w.copy() + xs = x[1:-1] + ys = y[1:-1] + ws = w[1:-1] + (C, R, K, S, D) = polyfit(x, y, 3, full=True, w=w) + (c, r, k, s, d) = np.polyfit(xs, ys, 3, full=True, w=ws) + assert_equal(w, wo) + for (a, a_) in zip((C, R, K, S, D), (c, r, k, s, d)): + assert_almost_equal(a, a_) + + def test_polyfit_with_masked_NaNs(self): + x = np.random.rand(10) + y = np.random.rand(20).reshape(-1, 2) + + x[0] = np.nan + y[-1,-1] = np.nan + x = x.view(MaskedArray) + y = y.view(MaskedArray) + x[0] = masked + y[-1,-1] = masked + + (C, R, K, S, D) = polyfit(x, y, 3, full=True) + (c, r, k, s, d) = np.polyfit(x[1:-1], y[1:-1,:], 3, full=True) + for (a, a_) in zip((C, R, K, S, D), (c, r, k, s, d)): + assert_almost_equal(a, a_) + + +class TestArraySetOps: + + def test_unique_onlist(self): + # Test unique on list + data = [1, 1, 1, 2, 2, 3] + test = unique(data, return_index=True, return_inverse=True) + assert_(isinstance(test[0], MaskedArray)) + assert_equal(test[0], masked_array([1, 2, 3], mask=[0, 0, 0])) + assert_equal(test[1], [0, 3, 5]) + assert_equal(test[2], [0, 0, 0, 1, 1, 2]) + + def test_unique_onmaskedarray(self): + # Test unique on masked data w/use_mask=True + data = masked_array([1, 1, 1, 2, 2, 3], mask=[0, 0, 1, 0, 1, 0]) + test = unique(data, return_index=True, return_inverse=True) + assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1])) + assert_equal(test[1], [0, 3, 5, 2]) + assert_equal(test[2], [0, 0, 3, 1, 3, 2]) + # + data.fill_value = 3 + data = masked_array(data=[1, 1, 1, 2, 2, 3], + mask=[0, 0, 1, 0, 1, 0], fill_value=3) + test = unique(data, return_index=True, return_inverse=True) + assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1])) + assert_equal(test[1], [0, 3, 5, 2]) + assert_equal(test[2], [0, 0, 3, 1, 3, 2]) + + def test_unique_allmasked(self): + # Test all masked + data = masked_array([1, 1, 1], mask=True) + test = unique(data, return_index=True, return_inverse=True) + assert_equal(test[0], masked_array([1, ], mask=[True])) + assert_equal(test[1], [0]) + assert_equal(test[2], [0, 0, 0]) + # + # Test masked + data = masked + test = unique(data, return_index=True, return_inverse=True) + assert_equal(test[0], masked_array(masked)) + assert_equal(test[1], [0]) + assert_equal(test[2], [0]) + + def test_ediff1d(self): + # Tests mediff1d + x = masked_array(np.arange(5), mask=[1, 0, 0, 0, 1]) + control = array([1, 1, 1, 4], mask=[1, 0, 0, 1]) + test = ediff1d(x) + assert_equal(test, control) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + + def test_ediff1d_tobegin(self): + # Test ediff1d w/ to_begin + x = masked_array(np.arange(5), mask=[1, 0, 0, 0, 1]) + test = ediff1d(x, to_begin=masked) + control = array([0, 1, 1, 1, 4], mask=[1, 1, 0, 0, 1]) + assert_equal(test, control) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + # + test = ediff1d(x, to_begin=[1, 2, 3]) + control = array([1, 2, 3, 1, 1, 1, 4], mask=[0, 0, 0, 1, 0, 0, 1]) + assert_equal(test, control) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + + def test_ediff1d_toend(self): + # Test ediff1d w/ to_end + x = masked_array(np.arange(5), mask=[1, 0, 0, 0, 1]) + test = ediff1d(x, to_end=masked) + control = array([1, 1, 1, 4, 0], mask=[1, 0, 0, 1, 1]) + assert_equal(test, control) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + # + test = ediff1d(x, to_end=[1, 2, 3]) + control = array([1, 1, 1, 4, 1, 2, 3], mask=[1, 0, 0, 1, 0, 0, 0]) + assert_equal(test, control) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + + def test_ediff1d_tobegin_toend(self): + # Test ediff1d w/ to_begin and to_end + x = masked_array(np.arange(5), mask=[1, 0, 0, 0, 1]) + test = ediff1d(x, to_end=masked, to_begin=masked) + control = array([0, 1, 1, 1, 4, 0], mask=[1, 1, 0, 0, 1, 1]) + assert_equal(test, control) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + # + test = ediff1d(x, to_end=[1, 2, 3], to_begin=masked) + control = array([0, 1, 1, 1, 4, 1, 2, 3], + mask=[1, 1, 0, 0, 1, 0, 0, 0]) + assert_equal(test, control) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + + def test_ediff1d_ndarray(self): + # Test ediff1d w/ a ndarray + x = np.arange(5) + test = ediff1d(x) + control = array([1, 1, 1, 1], mask=[0, 0, 0, 0]) + assert_equal(test, control) + assert_(isinstance(test, MaskedArray)) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + # + test = ediff1d(x, to_end=masked, to_begin=masked) + control = array([0, 1, 1, 1, 1, 0], mask=[1, 0, 0, 0, 0, 1]) + assert_(isinstance(test, MaskedArray)) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + + def test_intersect1d(self): + # Test intersect1d + x = array([1, 3, 3, 3], mask=[0, 0, 0, 1]) + y = array([3, 1, 1, 1], mask=[0, 0, 0, 1]) + test = intersect1d(x, y) + control = array([1, 3, -1], mask=[0, 0, 1]) + assert_equal(test, control) + + def test_setxor1d(self): + # Test setxor1d + a = array([1, 2, 5, 7, -1], mask=[0, 0, 0, 0, 1]) + b = array([1, 2, 3, 4, 5, -1], mask=[0, 0, 0, 0, 0, 1]) + test = setxor1d(a, b) + assert_equal(test, array([3, 4, 7])) + # + a = array([1, 2, 5, 7, -1], mask=[0, 0, 0, 0, 1]) + b = [1, 2, 3, 4, 5] + test = setxor1d(a, b) + assert_equal(test, array([3, 4, 7, -1], mask=[0, 0, 0, 1])) + # + a = array([1, 2, 3]) + b = array([6, 5, 4]) + test = setxor1d(a, b) + assert_(isinstance(test, MaskedArray)) + assert_equal(test, [1, 2, 3, 4, 5, 6]) + # + a = array([1, 8, 2, 3], mask=[0, 1, 0, 0]) + b = array([6, 5, 4, 8], mask=[0, 0, 0, 1]) + test = setxor1d(a, b) + assert_(isinstance(test, MaskedArray)) + assert_equal(test, [1, 2, 3, 4, 5, 6]) + # + assert_array_equal([], setxor1d([], [])) + + def test_isin(self): + # the tests for in1d cover most of isin's behavior + # if in1d is removed, would need to change those tests to test + # isin instead. + a = np.arange(24).reshape([2, 3, 4]) + mask = np.zeros([2, 3, 4]) + mask[1, 2, 0] = 1 + a = array(a, mask=mask) + b = array(data=[0, 10, 20, 30, 1, 3, 11, 22, 33], + mask=[0, 1, 0, 1, 0, 1, 0, 1, 0]) + ec = zeros((2, 3, 4), dtype=bool) + ec[0, 0, 0] = True + ec[0, 0, 1] = True + ec[0, 2, 3] = True + c = isin(a, b) + assert_(isinstance(c, MaskedArray)) + assert_array_equal(c, ec) + #compare results of np.isin to ma.isin + d = np.isin(a, b[~b.mask]) & ~a.mask + assert_array_equal(c, d) + + def test_in1d(self): + # Test in1d + a = array([1, 2, 5, 7, -1], mask=[0, 0, 0, 0, 1]) + b = array([1, 2, 3, 4, 5, -1], mask=[0, 0, 0, 0, 0, 1]) + test = in1d(a, b) + assert_equal(test, [True, True, True, False, True]) + # + a = array([5, 5, 2, 1, -1], mask=[0, 0, 0, 0, 1]) + b = array([1, 5, -1], mask=[0, 0, 1]) + test = in1d(a, b) + assert_equal(test, [True, True, False, True, True]) + # + assert_array_equal([], in1d([], [])) + + def test_in1d_invert(self): + # Test in1d's invert parameter + a = array([1, 2, 5, 7, -1], mask=[0, 0, 0, 0, 1]) + b = array([1, 2, 3, 4, 5, -1], mask=[0, 0, 0, 0, 0, 1]) + assert_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True)) + + a = array([5, 5, 2, 1, -1], mask=[0, 0, 0, 0, 1]) + b = array([1, 5, -1], mask=[0, 0, 1]) + assert_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True)) + + assert_array_equal([], in1d([], [], invert=True)) + + def test_union1d(self): + # Test union1d + a = array([1, 2, 5, 7, 5, -1], mask=[0, 0, 0, 0, 0, 1]) + b = array([1, 2, 3, 4, 5, -1], mask=[0, 0, 0, 0, 0, 1]) + test = union1d(a, b) + control = array([1, 2, 3, 4, 5, 7, -1], mask=[0, 0, 0, 0, 0, 0, 1]) + assert_equal(test, control) + + # Tests gh-10340, arguments to union1d should be + # flattened if they are not already 1D + x = array([[0, 1, 2], [3, 4, 5]], mask=[[0, 0, 0], [0, 0, 1]]) + y = array([0, 1, 2, 3, 4], mask=[0, 0, 0, 0, 1]) + ez = array([0, 1, 2, 3, 4, 5], mask=[0, 0, 0, 0, 0, 1]) + z = union1d(x, y) + assert_equal(z, ez) + # + assert_array_equal([], union1d([], [])) + + def test_setdiff1d(self): + # Test setdiff1d + a = array([6, 5, 4, 7, 7, 1, 2, 1], mask=[0, 0, 0, 0, 0, 0, 0, 1]) + b = array([2, 4, 3, 3, 2, 1, 5]) + test = setdiff1d(a, b) + assert_equal(test, array([6, 7, -1], mask=[0, 0, 1])) + # + a = arange(10) + b = arange(8) + assert_equal(setdiff1d(a, b), array([8, 9])) + a = array([], np.uint32, mask=[]) + assert_equal(setdiff1d(a, []).dtype, np.uint32) + + def test_setdiff1d_char_array(self): + # Test setdiff1d_charray + a = np.array(['a', 'b', 'c']) + b = np.array(['a', 'b', 's']) + assert_array_equal(setdiff1d(a, b), np.array(['c'])) + + +class TestShapeBase: + + def test_atleast_2d(self): + # Test atleast_2d + a = masked_array([0, 1, 2], mask=[0, 1, 0]) + b = atleast_2d(a) + assert_equal(b.shape, (1, 3)) + assert_equal(b.mask.shape, b.data.shape) + assert_equal(a.shape, (3,)) + assert_equal(a.mask.shape, a.data.shape) + assert_equal(b.mask.shape, b.data.shape) + + def test_shape_scalar(self): + # the atleast and diagflat function should work with scalars + # GitHub issue #3367 + # Additionally, the atleast functions should accept multiple scalars + # correctly + b = atleast_1d(1.0) + assert_equal(b.shape, (1,)) + assert_equal(b.mask.shape, b.shape) + assert_equal(b.data.shape, b.shape) + + b = atleast_1d(1.0, 2.0) + for a in b: + assert_equal(a.shape, (1,)) + assert_equal(a.mask.shape, a.shape) + assert_equal(a.data.shape, a.shape) + + b = atleast_2d(1.0) + assert_equal(b.shape, (1, 1)) + assert_equal(b.mask.shape, b.shape) + assert_equal(b.data.shape, b.shape) + + b = atleast_2d(1.0, 2.0) + for a in b: + assert_equal(a.shape, (1, 1)) + assert_equal(a.mask.shape, a.shape) + assert_equal(a.data.shape, a.shape) + + b = atleast_3d(1.0) + assert_equal(b.shape, (1, 1, 1)) + assert_equal(b.mask.shape, b.shape) + assert_equal(b.data.shape, b.shape) + + b = atleast_3d(1.0, 2.0) + for a in b: + assert_equal(a.shape, (1, 1, 1)) + assert_equal(a.mask.shape, a.shape) + assert_equal(a.data.shape, a.shape) + + + b = diagflat(1.0) + assert_equal(b.shape, (1, 1)) + assert_equal(b.mask.shape, b.data.shape) + + +class TestStack: + + def test_stack_1d(self): + a = masked_array([0, 1, 2], mask=[0, 1, 0]) + b = masked_array([9, 8, 7], mask=[1, 0, 0]) + + c = stack([a, b], axis=0) + assert_equal(c.shape, (2, 3)) + assert_array_equal(a.mask, c[0].mask) + assert_array_equal(b.mask, c[1].mask) + + d = vstack([a, b]) + assert_array_equal(c.data, d.data) + assert_array_equal(c.mask, d.mask) + + c = stack([a, b], axis=1) + assert_equal(c.shape, (3, 2)) + assert_array_equal(a.mask, c[:, 0].mask) + assert_array_equal(b.mask, c[:, 1].mask) + + def test_stack_masks(self): + a = masked_array([0, 1, 2], mask=True) + b = masked_array([9, 8, 7], mask=False) + + c = stack([a, b], axis=0) + assert_equal(c.shape, (2, 3)) + assert_array_equal(a.mask, c[0].mask) + assert_array_equal(b.mask, c[1].mask) + + d = vstack([a, b]) + assert_array_equal(c.data, d.data) + assert_array_equal(c.mask, d.mask) + + c = stack([a, b], axis=1) + assert_equal(c.shape, (3, 2)) + assert_array_equal(a.mask, c[:, 0].mask) + assert_array_equal(b.mask, c[:, 1].mask) + + def test_stack_nd(self): + # 2D + shp = (3, 2) + d1 = np.random.randint(0, 10, shp) + d2 = np.random.randint(0, 10, shp) + m1 = np.random.randint(0, 2, shp).astype(bool) + m2 = np.random.randint(0, 2, shp).astype(bool) + a1 = masked_array(d1, mask=m1) + a2 = masked_array(d2, mask=m2) + + c = stack([a1, a2], axis=0) + c_shp = (2,) + shp + assert_equal(c.shape, c_shp) + assert_array_equal(a1.mask, c[0].mask) + assert_array_equal(a2.mask, c[1].mask) + + c = stack([a1, a2], axis=-1) + c_shp = shp + (2,) + assert_equal(c.shape, c_shp) + assert_array_equal(a1.mask, c[..., 0].mask) + assert_array_equal(a2.mask, c[..., 1].mask) + + # 4D + shp = (3, 2, 4, 5,) + d1 = np.random.randint(0, 10, shp) + d2 = np.random.randint(0, 10, shp) + m1 = np.random.randint(0, 2, shp).astype(bool) + m2 = np.random.randint(0, 2, shp).astype(bool) + a1 = masked_array(d1, mask=m1) + a2 = masked_array(d2, mask=m2) + + c = stack([a1, a2], axis=0) + c_shp = (2,) + shp + assert_equal(c.shape, c_shp) + assert_array_equal(a1.mask, c[0].mask) + assert_array_equal(a2.mask, c[1].mask) + + c = stack([a1, a2], axis=-1) + c_shp = shp + (2,) + assert_equal(c.shape, c_shp) + assert_array_equal(a1.mask, c[..., 0].mask) + assert_array_equal(a2.mask, c[..., 1].mask) diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/test_mrecords.py b/.local/lib/python3.8/site-packages/numpy/ma/tests/test_mrecords.py new file mode 100644 index 0000000..4b2c01d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/tests/test_mrecords.py @@ -0,0 +1,493 @@ +# pylint: disable-msg=W0611, W0612, W0511,R0201 +"""Tests suite for mrecords. + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu + +""" +import numpy as np +import numpy.ma as ma +from numpy import recarray +from numpy.ma import masked, nomask +from numpy.testing import temppath +from numpy.core.records import ( + fromrecords as recfromrecords, fromarrays as recfromarrays + ) +from numpy.ma.mrecords import ( + MaskedRecords, mrecarray, fromarrays, fromtextfile, fromrecords, + addfield + ) +from numpy.ma.testutils import ( + assert_, assert_equal, + assert_equal_records, + ) +from numpy.compat import pickle + + +class TestMRecords: + + ilist = [1, 2, 3, 4, 5] + flist = [1.1, 2.2, 3.3, 4.4, 5.5] + slist = [b'one', b'two', b'three', b'four', b'five'] + ddtype = [('a', int), ('b', float), ('c', '|S8')] + mask = [0, 1, 0, 0, 1] + base = ma.array(list(zip(ilist, flist, slist)), mask=mask, dtype=ddtype) + + def test_byview(self): + # Test creation by view + base = self.base + mbase = base.view(mrecarray) + assert_equal(mbase.recordmask, base.recordmask) + assert_equal_records(mbase._mask, base._mask) + assert_(isinstance(mbase._data, recarray)) + assert_equal_records(mbase._data, base._data.view(recarray)) + for field in ('a', 'b', 'c'): + assert_equal(base[field], mbase[field]) + assert_equal_records(mbase.view(mrecarray), mbase) + + def test_get(self): + # Tests fields retrieval + base = self.base.copy() + mbase = base.view(mrecarray) + # As fields.......... + for field in ('a', 'b', 'c'): + assert_equal(getattr(mbase, field), mbase[field]) + assert_equal(base[field], mbase[field]) + # as elements ....... + mbase_first = mbase[0] + assert_(isinstance(mbase_first, mrecarray)) + assert_equal(mbase_first.dtype, mbase.dtype) + assert_equal(mbase_first.tolist(), (1, 1.1, b'one')) + # Used to be mask, now it's recordmask + assert_equal(mbase_first.recordmask, nomask) + assert_equal(mbase_first._mask.item(), (False, False, False)) + assert_equal(mbase_first['a'], mbase['a'][0]) + mbase_last = mbase[-1] + assert_(isinstance(mbase_last, mrecarray)) + assert_equal(mbase_last.dtype, mbase.dtype) + assert_equal(mbase_last.tolist(), (None, None, None)) + # Used to be mask, now it's recordmask + assert_equal(mbase_last.recordmask, True) + assert_equal(mbase_last._mask.item(), (True, True, True)) + assert_equal(mbase_last['a'], mbase['a'][-1]) + assert_((mbase_last['a'] is masked)) + # as slice .......... + mbase_sl = mbase[:2] + assert_(isinstance(mbase_sl, mrecarray)) + assert_equal(mbase_sl.dtype, mbase.dtype) + # Used to be mask, now it's recordmask + assert_equal(mbase_sl.recordmask, [0, 1]) + assert_equal_records(mbase_sl.mask, + np.array([(False, False, False), + (True, True, True)], + dtype=mbase._mask.dtype)) + assert_equal_records(mbase_sl, base[:2].view(mrecarray)) + for field in ('a', 'b', 'c'): + assert_equal(getattr(mbase_sl, field), base[:2][field]) + + def test_set_fields(self): + # Tests setting fields. + base = self.base.copy() + mbase = base.view(mrecarray) + mbase = mbase.copy() + mbase.fill_value = (999999, 1e20, 'N/A') + # Change the data, the mask should be conserved + mbase.a._data[:] = 5 + assert_equal(mbase['a']._data, [5, 5, 5, 5, 5]) + assert_equal(mbase['a']._mask, [0, 1, 0, 0, 1]) + # Change the elements, and the mask will follow + mbase.a = 1 + assert_equal(mbase['a']._data, [1]*5) + assert_equal(ma.getmaskarray(mbase['a']), [0]*5) + # Use to be _mask, now it's recordmask + assert_equal(mbase.recordmask, [False]*5) + assert_equal(mbase._mask.tolist(), + np.array([(0, 0, 0), + (0, 1, 1), + (0, 0, 0), + (0, 0, 0), + (0, 1, 1)], + dtype=bool)) + # Set a field to mask ........................ + mbase.c = masked + # Use to be mask, and now it's still mask ! + assert_equal(mbase.c.mask, [1]*5) + assert_equal(mbase.c.recordmask, [1]*5) + assert_equal(ma.getmaskarray(mbase['c']), [1]*5) + assert_equal(ma.getdata(mbase['c']), [b'N/A']*5) + assert_equal(mbase._mask.tolist(), + np.array([(0, 0, 1), + (0, 1, 1), + (0, 0, 1), + (0, 0, 1), + (0, 1, 1)], + dtype=bool)) + # Set fields by slices ....................... + mbase = base.view(mrecarray).copy() + mbase.a[3:] = 5 + assert_equal(mbase.a, [1, 2, 3, 5, 5]) + assert_equal(mbase.a._mask, [0, 1, 0, 0, 0]) + mbase.b[3:] = masked + assert_equal(mbase.b, base['b']) + assert_equal(mbase.b._mask, [0, 1, 0, 1, 1]) + # Set fields globally.......................... + ndtype = [('alpha', '|S1'), ('num', int)] + data = ma.array([('a', 1), ('b', 2), ('c', 3)], dtype=ndtype) + rdata = data.view(MaskedRecords) + val = ma.array([10, 20, 30], mask=[1, 0, 0]) + + rdata['num'] = val + assert_equal(rdata.num, val) + assert_equal(rdata.num.mask, [1, 0, 0]) + + def test_set_fields_mask(self): + # Tests setting the mask of a field. + base = self.base.copy() + # This one has already a mask.... + mbase = base.view(mrecarray) + mbase['a'][-2] = masked + assert_equal(mbase.a, [1, 2, 3, 4, 5]) + assert_equal(mbase.a._mask, [0, 1, 0, 1, 1]) + # This one has not yet + mbase = fromarrays([np.arange(5), np.random.rand(5)], + dtype=[('a', int), ('b', float)]) + mbase['a'][-2] = masked + assert_equal(mbase.a, [0, 1, 2, 3, 4]) + assert_equal(mbase.a._mask, [0, 0, 0, 1, 0]) + + def test_set_mask(self): + base = self.base.copy() + mbase = base.view(mrecarray) + # Set the mask to True ....................... + mbase.mask = masked + assert_equal(ma.getmaskarray(mbase['b']), [1]*5) + assert_equal(mbase['a']._mask, mbase['b']._mask) + assert_equal(mbase['a']._mask, mbase['c']._mask) + assert_equal(mbase._mask.tolist(), + np.array([(1, 1, 1)]*5, dtype=bool)) + # Delete the mask ............................ + mbase.mask = nomask + assert_equal(ma.getmaskarray(mbase['c']), [0]*5) + assert_equal(mbase._mask.tolist(), + np.array([(0, 0, 0)]*5, dtype=bool)) + + def test_set_mask_fromarray(self): + base = self.base.copy() + mbase = base.view(mrecarray) + # Sets the mask w/ an array + mbase.mask = [1, 0, 0, 0, 1] + assert_equal(mbase.a.mask, [1, 0, 0, 0, 1]) + assert_equal(mbase.b.mask, [1, 0, 0, 0, 1]) + assert_equal(mbase.c.mask, [1, 0, 0, 0, 1]) + # Yay, once more ! + mbase.mask = [0, 0, 0, 0, 1] + assert_equal(mbase.a.mask, [0, 0, 0, 0, 1]) + assert_equal(mbase.b.mask, [0, 0, 0, 0, 1]) + assert_equal(mbase.c.mask, [0, 0, 0, 0, 1]) + + def test_set_mask_fromfields(self): + mbase = self.base.copy().view(mrecarray) + + nmask = np.array( + [(0, 1, 0), (0, 1, 0), (1, 0, 1), (1, 0, 1), (0, 0, 0)], + dtype=[('a', bool), ('b', bool), ('c', bool)]) + mbase.mask = nmask + assert_equal(mbase.a.mask, [0, 0, 1, 1, 0]) + assert_equal(mbase.b.mask, [1, 1, 0, 0, 0]) + assert_equal(mbase.c.mask, [0, 0, 1, 1, 0]) + # Reinitialize and redo + mbase.mask = False + mbase.fieldmask = nmask + assert_equal(mbase.a.mask, [0, 0, 1, 1, 0]) + assert_equal(mbase.b.mask, [1, 1, 0, 0, 0]) + assert_equal(mbase.c.mask, [0, 0, 1, 1, 0]) + + def test_set_elements(self): + base = self.base.copy() + # Set an element to mask ..................... + mbase = base.view(mrecarray).copy() + mbase[-2] = masked + assert_equal( + mbase._mask.tolist(), + np.array([(0, 0, 0), (1, 1, 1), (0, 0, 0), (1, 1, 1), (1, 1, 1)], + dtype=bool)) + # Used to be mask, now it's recordmask! + assert_equal(mbase.recordmask, [0, 1, 0, 1, 1]) + # Set slices ................................. + mbase = base.view(mrecarray).copy() + mbase[:2] = (5, 5, 5) + assert_equal(mbase.a._data, [5, 5, 3, 4, 5]) + assert_equal(mbase.a._mask, [0, 0, 0, 0, 1]) + assert_equal(mbase.b._data, [5., 5., 3.3, 4.4, 5.5]) + assert_equal(mbase.b._mask, [0, 0, 0, 0, 1]) + assert_equal(mbase.c._data, + [b'5', b'5', b'three', b'four', b'five']) + assert_equal(mbase.b._mask, [0, 0, 0, 0, 1]) + + mbase = base.view(mrecarray).copy() + mbase[:2] = masked + assert_equal(mbase.a._data, [1, 2, 3, 4, 5]) + assert_equal(mbase.a._mask, [1, 1, 0, 0, 1]) + assert_equal(mbase.b._data, [1.1, 2.2, 3.3, 4.4, 5.5]) + assert_equal(mbase.b._mask, [1, 1, 0, 0, 1]) + assert_equal(mbase.c._data, + [b'one', b'two', b'three', b'four', b'five']) + assert_equal(mbase.b._mask, [1, 1, 0, 0, 1]) + + def test_setslices_hardmask(self): + # Tests setting slices w/ hardmask. + base = self.base.copy() + mbase = base.view(mrecarray) + mbase.harden_mask() + try: + mbase[-2:] = (5, 5, 5) + assert_equal(mbase.a._data, [1, 2, 3, 5, 5]) + assert_equal(mbase.b._data, [1.1, 2.2, 3.3, 5, 5.5]) + assert_equal(mbase.c._data, + [b'one', b'two', b'three', b'5', b'five']) + assert_equal(mbase.a._mask, [0, 1, 0, 0, 1]) + assert_equal(mbase.b._mask, mbase.a._mask) + assert_equal(mbase.b._mask, mbase.c._mask) + except NotImplementedError: + # OK, not implemented yet... + pass + except AssertionError: + raise + else: + raise Exception("Flexible hard masks should be supported !") + # Not using a tuple should crash + try: + mbase[-2:] = 3 + except (NotImplementedError, TypeError): + pass + else: + raise TypeError("Should have expected a readable buffer object!") + + def test_hardmask(self): + # Test hardmask + base = self.base.copy() + mbase = base.view(mrecarray) + mbase.harden_mask() + assert_(mbase._hardmask) + mbase.mask = nomask + assert_equal_records(mbase._mask, base._mask) + mbase.soften_mask() + assert_(not mbase._hardmask) + mbase.mask = nomask + # So, the mask of a field is no longer set to nomask... + assert_equal_records(mbase._mask, + ma.make_mask_none(base.shape, base.dtype)) + assert_(ma.make_mask(mbase['b']._mask) is nomask) + assert_equal(mbase['a']._mask, mbase['b']._mask) + + def test_pickling(self): + # Test pickling + base = self.base.copy() + mrec = base.view(mrecarray) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + _ = pickle.dumps(mrec, protocol=proto) + mrec_ = pickle.loads(_) + assert_equal(mrec_.dtype, mrec.dtype) + assert_equal_records(mrec_._data, mrec._data) + assert_equal(mrec_._mask, mrec._mask) + assert_equal_records(mrec_._mask, mrec._mask) + + def test_filled(self): + # Test filling the array + _a = ma.array([1, 2, 3], mask=[0, 0, 1], dtype=int) + _b = ma.array([1.1, 2.2, 3.3], mask=[0, 0, 1], dtype=float) + _c = ma.array(['one', 'two', 'three'], mask=[0, 0, 1], dtype='|S8') + ddtype = [('a', int), ('b', float), ('c', '|S8')] + mrec = fromarrays([_a, _b, _c], dtype=ddtype, + fill_value=(99999, 99999., 'N/A')) + mrecfilled = mrec.filled() + assert_equal(mrecfilled['a'], np.array((1, 2, 99999), dtype=int)) + assert_equal(mrecfilled['b'], np.array((1.1, 2.2, 99999.), + dtype=float)) + assert_equal(mrecfilled['c'], np.array(('one', 'two', 'N/A'), + dtype='|S8')) + + def test_tolist(self): + # Test tolist. + _a = ma.array([1, 2, 3], mask=[0, 0, 1], dtype=int) + _b = ma.array([1.1, 2.2, 3.3], mask=[0, 0, 1], dtype=float) + _c = ma.array(['one', 'two', 'three'], mask=[1, 0, 0], dtype='|S8') + ddtype = [('a', int), ('b', float), ('c', '|S8')] + mrec = fromarrays([_a, _b, _c], dtype=ddtype, + fill_value=(99999, 99999., 'N/A')) + + assert_equal(mrec.tolist(), + [(1, 1.1, None), (2, 2.2, b'two'), + (None, None, b'three')]) + + def test_withnames(self): + # Test the creation w/ format and names + x = mrecarray(1, formats=float, names='base') + x[0]['base'] = 10 + assert_equal(x['base'][0], 10) + + def test_exotic_formats(self): + # Test that 'exotic' formats are processed properly + easy = mrecarray(1, dtype=[('i', int), ('s', '|S8'), ('f', float)]) + easy[0] = masked + assert_equal(easy.filled(1).item(), (1, b'1', 1.)) + + solo = mrecarray(1, dtype=[('f0', ' 1: + assert_(eq(np.concatenate((x, y), 1), + concatenate((xm, ym), 1))) + assert_(eq(np.add.reduce(x, 1), add.reduce(x, 1))) + assert_(eq(np.sum(x, 1), sum(x, 1))) + assert_(eq(np.product(x, 1), product(x, 1))) + + def test_testCI(self): + # Test of conversions and indexing + x1 = np.array([1, 2, 4, 3]) + x2 = array(x1, mask=[1, 0, 0, 0]) + x3 = array(x1, mask=[0, 1, 0, 1]) + x4 = array(x1) + # test conversion to strings + str(x2) # raises? + repr(x2) # raises? + assert_(eq(np.sort(x1), sort(x2, fill_value=0))) + # tests of indexing + assert_(type(x2[1]) is type(x1[1])) + assert_(x1[1] == x2[1]) + assert_(x2[0] is masked) + assert_(eq(x1[2], x2[2])) + assert_(eq(x1[2:5], x2[2:5])) + assert_(eq(x1[:], x2[:])) + assert_(eq(x1[1:], x3[1:])) + x1[2] = 9 + x2[2] = 9 + assert_(eq(x1, x2)) + x1[1:3] = 99 + x2[1:3] = 99 + assert_(eq(x1, x2)) + x2[1] = masked + assert_(eq(x1, x2)) + x2[1:3] = masked + assert_(eq(x1, x2)) + x2[:] = x1 + x2[1] = masked + assert_(allequal(getmask(x2), array([0, 1, 0, 0]))) + x3[:] = masked_array([1, 2, 3, 4], [0, 1, 1, 0]) + assert_(allequal(getmask(x3), array([0, 1, 1, 0]))) + x4[:] = masked_array([1, 2, 3, 4], [0, 1, 1, 0]) + assert_(allequal(getmask(x4), array([0, 1, 1, 0]))) + assert_(allequal(x4, array([1, 2, 3, 4]))) + x1 = np.arange(5) * 1.0 + x2 = masked_values(x1, 3.0) + assert_(eq(x1, x2)) + assert_(allequal(array([0, 0, 0, 1, 0], MaskType), x2.mask)) + assert_(eq(3.0, x2.fill_value)) + x1 = array([1, 'hello', 2, 3], object) + x2 = np.array([1, 'hello', 2, 3], object) + s1 = x1[1] + s2 = x2[1] + assert_equal(type(s2), str) + assert_equal(type(s1), str) + assert_equal(s1, s2) + assert_(x1[1:1].shape == (0,)) + + def test_testCopySize(self): + # Tests of some subtle points of copying and sizing. + n = [0, 0, 1, 0, 0] + m = make_mask(n) + m2 = make_mask(m) + assert_(m is m2) + m3 = make_mask(m, copy=True) + assert_(m is not m3) + + x1 = np.arange(5) + y1 = array(x1, mask=m) + assert_(y1._data is not x1) + assert_(allequal(x1, y1._data)) + assert_(y1._mask is m) + + y1a = array(y1, copy=0) + # For copy=False, one might expect that the array would just + # passed on, i.e., that it would be "is" instead of "==". + # See gh-4043 for discussion. + assert_(y1a._mask.__array_interface__ == + y1._mask.__array_interface__) + + y2 = array(x1, mask=m3, copy=0) + assert_(y2._mask is m3) + assert_(y2[2] is masked) + y2[2] = 9 + assert_(y2[2] is not masked) + assert_(y2._mask is m3) + assert_(allequal(y2.mask, 0)) + + y2a = array(x1, mask=m, copy=1) + assert_(y2a._mask is not m) + assert_(y2a[2] is masked) + y2a[2] = 9 + assert_(y2a[2] is not masked) + assert_(y2a._mask is not m) + assert_(allequal(y2a.mask, 0)) + + y3 = array(x1 * 1.0, mask=m) + assert_(filled(y3).dtype is (x1 * 1.0).dtype) + + x4 = arange(4) + x4[2] = masked + y4 = resize(x4, (8,)) + assert_(eq(concatenate([x4, x4]), y4)) + assert_(eq(getmask(y4), [0, 0, 1, 0, 0, 0, 1, 0])) + y5 = repeat(x4, (2, 2, 2, 2), axis=0) + assert_(eq(y5, [0, 0, 1, 1, 2, 2, 3, 3])) + y6 = repeat(x4, 2, axis=0) + assert_(eq(y5, y6)) + + def test_testPut(self): + # Test of put + d = arange(5) + n = [0, 0, 0, 1, 1] + m = make_mask(n) + m2 = m.copy() + x = array(d, mask=m) + assert_(x[3] is masked) + assert_(x[4] is masked) + x[[1, 4]] = [10, 40] + assert_(x._mask is m) + assert_(x[3] is masked) + assert_(x[4] is not masked) + assert_(eq(x, [0, 10, 2, -1, 40])) + + x = array(d, mask=m2, copy=True) + x.put([0, 1, 2], [-1, 100, 200]) + assert_(x._mask is not m2) + assert_(x[3] is masked) + assert_(x[4] is masked) + assert_(eq(x, [-1, 100, 200, 0, 0])) + + def test_testPut2(self): + # Test of put + d = arange(5) + x = array(d, mask=[0, 0, 0, 0, 0]) + z = array([10, 40], mask=[1, 0]) + assert_(x[2] is not masked) + assert_(x[3] is not masked) + x[2:4] = z + assert_(x[2] is masked) + assert_(x[3] is not masked) + assert_(eq(x, [0, 1, 10, 40, 4])) + + d = arange(5) + x = array(d, mask=[0, 0, 0, 0, 0]) + y = x[2:4] + z = array([10, 40], mask=[1, 0]) + assert_(x[2] is not masked) + assert_(x[3] is not masked) + y[:] = z + assert_(y[0] is masked) + assert_(y[1] is not masked) + assert_(eq(y, [10, 40])) + assert_(x[2] is masked) + assert_(x[3] is not masked) + assert_(eq(x, [0, 1, 10, 40, 4])) + + def test_testMaPut(self): + (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d + m = [1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1] + i = np.nonzero(m)[0] + put(ym, i, zm) + assert_(all(take(ym, i, axis=0) == zm)) + + def test_testOddFeatures(self): + # Test of other odd features + x = arange(20) + x = x.reshape(4, 5) + x.flat[5] = 12 + assert_(x[1, 0] == 12) + z = x + 10j * x + assert_(eq(z.real, x)) + assert_(eq(z.imag, 10 * x)) + assert_(eq((z * conjugate(z)).real, 101 * x * x)) + z.imag[...] = 0.0 + + x = arange(10) + x[3] = masked + assert_(str(x[3]) == str(masked)) + c = x >= 8 + assert_(count(where(c, masked, masked)) == 0) + assert_(shape(where(c, masked, masked)) == c.shape) + z = where(c, x, masked) + assert_(z.dtype is x.dtype) + assert_(z[3] is masked) + assert_(z[4] is masked) + assert_(z[7] is masked) + assert_(z[8] is not masked) + assert_(z[9] is not masked) + assert_(eq(x, z)) + z = where(c, masked, x) + assert_(z.dtype is x.dtype) + assert_(z[3] is masked) + assert_(z[4] is not masked) + assert_(z[7] is not masked) + assert_(z[8] is masked) + assert_(z[9] is masked) + z = masked_where(c, x) + assert_(z.dtype is x.dtype) + assert_(z[3] is masked) + assert_(z[4] is not masked) + assert_(z[7] is not masked) + assert_(z[8] is masked) + assert_(z[9] is masked) + assert_(eq(x, z)) + x = array([1., 2., 3., 4., 5.]) + c = array([1, 1, 1, 0, 0]) + x[2] = masked + z = where(c, x, -x) + assert_(eq(z, [1., 2., 0., -4., -5])) + c[0] = masked + z = where(c, x, -x) + assert_(eq(z, [1., 2., 0., -4., -5])) + assert_(z[0] is masked) + assert_(z[1] is not masked) + assert_(z[2] is masked) + assert_(eq(masked_where(greater(x, 2), x), masked_greater(x, 2))) + assert_(eq(masked_where(greater_equal(x, 2), x), + masked_greater_equal(x, 2))) + assert_(eq(masked_where(less(x, 2), x), masked_less(x, 2))) + assert_(eq(masked_where(less_equal(x, 2), x), masked_less_equal(x, 2))) + assert_(eq(masked_where(not_equal(x, 2), x), masked_not_equal(x, 2))) + assert_(eq(masked_where(equal(x, 2), x), masked_equal(x, 2))) + assert_(eq(masked_where(not_equal(x, 2), x), masked_not_equal(x, 2))) + assert_(eq(masked_inside(list(range(5)), 1, 3), [0, 199, 199, 199, 4])) + assert_(eq(masked_outside(list(range(5)), 1, 3), [199, 1, 2, 3, 199])) + assert_(eq(masked_inside(array(list(range(5)), + mask=[1, 0, 0, 0, 0]), 1, 3).mask, + [1, 1, 1, 1, 0])) + assert_(eq(masked_outside(array(list(range(5)), + mask=[0, 1, 0, 0, 0]), 1, 3).mask, + [1, 1, 0, 0, 1])) + assert_(eq(masked_equal(array(list(range(5)), + mask=[1, 0, 0, 0, 0]), 2).mask, + [1, 0, 1, 0, 0])) + assert_(eq(masked_not_equal(array([2, 2, 1, 2, 1], + mask=[1, 0, 0, 0, 0]), 2).mask, + [1, 0, 1, 0, 1])) + assert_(eq(masked_where([1, 1, 0, 0, 0], [1, 2, 3, 4, 5]), + [99, 99, 3, 4, 5])) + atest = ones((10, 10, 10), dtype=np.float32) + btest = zeros(atest.shape, MaskType) + ctest = masked_where(btest, atest) + assert_(eq(atest, ctest)) + z = choose(c, (-x, x)) + assert_(eq(z, [1., 2., 0., -4., -5])) + assert_(z[0] is masked) + assert_(z[1] is not masked) + assert_(z[2] is masked) + x = arange(6) + x[5] = masked + y = arange(6) * 10 + y[2] = masked + c = array([1, 1, 1, 0, 0, 0], mask=[1, 0, 0, 0, 0, 0]) + cm = c.filled(1) + z = where(c, x, y) + zm = where(cm, x, y) + assert_(eq(z, zm)) + assert_(getmask(zm) is nomask) + assert_(eq(zm, [0, 1, 2, 30, 40, 50])) + z = where(c, masked, 1) + assert_(eq(z, [99, 99, 99, 1, 1, 1])) + z = where(c, 1, masked) + assert_(eq(z, [99, 1, 1, 99, 99, 99])) + + def test_testMinMax2(self): + # Test of minimum, maximum. + assert_(eq(minimum([1, 2, 3], [4, 0, 9]), [1, 0, 3])) + assert_(eq(maximum([1, 2, 3], [4, 0, 9]), [4, 2, 9])) + x = arange(5) + y = arange(5) - 2 + x[3] = masked + y[0] = masked + assert_(eq(minimum(x, y), where(less(x, y), x, y))) + assert_(eq(maximum(x, y), where(greater(x, y), x, y))) + assert_(minimum.reduce(x) == 0) + assert_(maximum.reduce(x) == 4) + + def test_testTakeTransposeInnerOuter(self): + # Test of take, transpose, inner, outer products + x = arange(24) + y = np.arange(24) + x[5:6] = masked + x = x.reshape(2, 3, 4) + y = y.reshape(2, 3, 4) + assert_(eq(np.transpose(y, (2, 0, 1)), transpose(x, (2, 0, 1)))) + assert_(eq(np.take(y, (2, 0, 1), 1), take(x, (2, 0, 1), 1))) + assert_(eq(np.inner(filled(x, 0), filled(y, 0)), + inner(x, y))) + assert_(eq(np.outer(filled(x, 0), filled(y, 0)), + outer(x, y))) + y = array(['abc', 1, 'def', 2, 3], object) + y[2] = masked + t = take(y, [0, 3, 4]) + assert_(t[0] == 'abc') + assert_(t[1] == 2) + assert_(t[2] == 3) + + def test_testInplace(self): + # Test of inplace operations and rich comparisons + y = arange(10) + + x = arange(10) + xm = arange(10) + xm[2] = masked + x += 1 + assert_(eq(x, y + 1)) + xm += 1 + assert_(eq(x, y + 1)) + + x = arange(10) + xm = arange(10) + xm[2] = masked + x -= 1 + assert_(eq(x, y - 1)) + xm -= 1 + assert_(eq(xm, y - 1)) + + x = arange(10) * 1.0 + xm = arange(10) * 1.0 + xm[2] = masked + x *= 2.0 + assert_(eq(x, y * 2)) + xm *= 2.0 + assert_(eq(xm, y * 2)) + + x = arange(10) * 2 + xm = arange(10) + xm[2] = masked + x //= 2 + assert_(eq(x, y)) + xm //= 2 + assert_(eq(x, y)) + + x = arange(10) * 1.0 + xm = arange(10) * 1.0 + xm[2] = masked + x /= 2.0 + assert_(eq(x, y / 2.0)) + xm /= arange(10) + assert_(eq(xm, ones((10,)))) + + x = arange(10).astype(np.float32) + xm = arange(10) + xm[2] = masked + x += 1. + assert_(eq(x, y + 1.)) + + def test_testPickle(self): + # Test of pickling + x = arange(12) + x[4:10:2] = masked + x = x.reshape(4, 3) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + s = pickle.dumps(x, protocol=proto) + y = pickle.loads(s) + assert_(eq(x, y)) + + def test_testMasked(self): + # Test of masked element + xx = arange(6) + xx[1] = masked + assert_(str(masked) == '--') + assert_(xx[1] is masked) + assert_equal(filled(xx[1], 0), 0) + + def test_testAverage1(self): + # Test of average. + ott = array([0., 1., 2., 3.], mask=[1, 0, 0, 0]) + assert_(eq(2.0, average(ott, axis=0))) + assert_(eq(2.0, average(ott, weights=[1., 1., 2., 1.]))) + result, wts = average(ott, weights=[1., 1., 2., 1.], returned=True) + assert_(eq(2.0, result)) + assert_(wts == 4.0) + ott[:] = masked + assert_(average(ott, axis=0) is masked) + ott = array([0., 1., 2., 3.], mask=[1, 0, 0, 0]) + ott = ott.reshape(2, 2) + ott[:, 1] = masked + assert_(eq(average(ott, axis=0), [2.0, 0.0])) + assert_(average(ott, axis=1)[0] is masked) + assert_(eq([2., 0.], average(ott, axis=0))) + result, wts = average(ott, axis=0, returned=True) + assert_(eq(wts, [1., 0.])) + + def test_testAverage2(self): + # More tests of average. + w1 = [0, 1, 1, 1, 1, 0] + w2 = [[0, 1, 1, 1, 1, 0], [1, 0, 0, 0, 0, 1]] + x = arange(6) + assert_(allclose(average(x, axis=0), 2.5)) + assert_(allclose(average(x, axis=0, weights=w1), 2.5)) + y = array([arange(6), 2.0 * arange(6)]) + assert_(allclose(average(y, None), + np.add.reduce(np.arange(6)) * 3. / 12.)) + assert_(allclose(average(y, axis=0), np.arange(6) * 3. / 2.)) + assert_(allclose(average(y, axis=1), + [average(x, axis=0), average(x, axis=0)*2.0])) + assert_(allclose(average(y, None, weights=w2), 20. / 6.)) + assert_(allclose(average(y, axis=0, weights=w2), + [0., 1., 2., 3., 4., 10.])) + assert_(allclose(average(y, axis=1), + [average(x, axis=0), average(x, axis=0)*2.0])) + m1 = zeros(6) + m2 = [0, 0, 1, 1, 0, 0] + m3 = [[0, 0, 1, 1, 0, 0], [0, 1, 1, 1, 1, 0]] + m4 = ones(6) + m5 = [0, 1, 1, 1, 1, 1] + assert_(allclose(average(masked_array(x, m1), axis=0), 2.5)) + assert_(allclose(average(masked_array(x, m2), axis=0), 2.5)) + assert_(average(masked_array(x, m4), axis=0) is masked) + assert_equal(average(masked_array(x, m5), axis=0), 0.0) + assert_equal(count(average(masked_array(x, m4), axis=0)), 0) + z = masked_array(y, m3) + assert_(allclose(average(z, None), 20. / 6.)) + assert_(allclose(average(z, axis=0), + [0., 1., 99., 99., 4.0, 7.5])) + assert_(allclose(average(z, axis=1), [2.5, 5.0])) + assert_(allclose(average(z, axis=0, weights=w2), + [0., 1., 99., 99., 4.0, 10.0])) + + a = arange(6) + b = arange(6) * 3 + r1, w1 = average([[a, b], [b, a]], axis=1, returned=True) + assert_equal(shape(r1), shape(w1)) + assert_equal(r1.shape, w1.shape) + r2, w2 = average(ones((2, 2, 3)), axis=0, weights=[3, 1], returned=True) + assert_equal(shape(w2), shape(r2)) + r2, w2 = average(ones((2, 2, 3)), returned=True) + assert_equal(shape(w2), shape(r2)) + r2, w2 = average(ones((2, 2, 3)), weights=ones((2, 2, 3)), returned=True) + assert_(shape(w2) == shape(r2)) + a2d = array([[1, 2], [0, 4]], float) + a2dm = masked_array(a2d, [[0, 0], [1, 0]]) + a2da = average(a2d, axis=0) + assert_(eq(a2da, [0.5, 3.0])) + a2dma = average(a2dm, axis=0) + assert_(eq(a2dma, [1.0, 3.0])) + a2dma = average(a2dm, axis=None) + assert_(eq(a2dma, 7. / 3.)) + a2dma = average(a2dm, axis=1) + assert_(eq(a2dma, [1.5, 4.0])) + + def test_testToPython(self): + assert_equal(1, int(array(1))) + assert_equal(1.0, float(array(1))) + assert_equal(1, int(array([[[1]]]))) + assert_equal(1.0, float(array([[1]]))) + assert_raises(TypeError, float, array([1, 1])) + assert_raises(ValueError, bool, array([0, 1])) + assert_raises(ValueError, bool, array([0, 0], mask=[0, 1])) + + def test_testScalarArithmetic(self): + xm = array(0, mask=1) + #TODO FIXME: Find out what the following raises a warning in r8247 + with np.errstate(divide='ignore'): + assert_((1 / array(0)).mask) + assert_((1 + xm).mask) + assert_((-xm).mask) + assert_((-xm).mask) + assert_(maximum(xm, xm).mask) + assert_(minimum(xm, xm).mask) + assert_(xm.filled().dtype is xm._data.dtype) + x = array(0, mask=0) + assert_(x.filled() == x._data) + assert_equal(str(xm), str(masked_print_option)) + + def test_testArrayMethods(self): + a = array([1, 3, 2]) + assert_(eq(a.any(), a._data.any())) + assert_(eq(a.all(), a._data.all())) + assert_(eq(a.argmax(), a._data.argmax())) + assert_(eq(a.argmin(), a._data.argmin())) + assert_(eq(a.choose(0, 1, 2, 3, 4), + a._data.choose(0, 1, 2, 3, 4))) + assert_(eq(a.compress([1, 0, 1]), a._data.compress([1, 0, 1]))) + assert_(eq(a.conj(), a._data.conj())) + assert_(eq(a.conjugate(), a._data.conjugate())) + m = array([[1, 2], [3, 4]]) + assert_(eq(m.diagonal(), m._data.diagonal())) + assert_(eq(a.sum(), a._data.sum())) + assert_(eq(a.take([1, 2]), a._data.take([1, 2]))) + assert_(eq(m.transpose(), m._data.transpose())) + + def test_testArrayAttributes(self): + a = array([1, 3, 2]) + assert_equal(a.ndim, 1) + + def test_testAPI(self): + assert_(not [m for m in dir(np.ndarray) + if m not in dir(MaskedArray) and + not m.startswith('_')]) + + def test_testSingleElementSubscript(self): + a = array([1, 3, 2]) + b = array([1, 3, 2], mask=[1, 0, 1]) + assert_equal(a[0].shape, ()) + assert_equal(b[0].shape, ()) + assert_equal(b[1].shape, ()) + + def test_assignment_by_condition(self): + # Test for gh-18951 + a = array([1, 2, 3, 4], mask=[1, 0, 1, 0]) + c = a >= 3 + a[c] = 5 + assert_(a[2] is masked) + + def test_assignment_by_condition_2(self): + # gh-19721 + a = masked_array([0, 1], mask=[False, False]) + b = masked_array([0, 1], mask=[True, True]) + mask = a < 1 + b[mask] = a[mask] + expected_mask = [False, True] + assert_equal(b.mask, expected_mask) + + +class TestUfuncs: + def setup(self): + self.d = (array([1.0, 0, -1, pi / 2] * 2, mask=[0, 1] + [0] * 6), + array([1.0, 0, -1, pi / 2] * 2, mask=[1, 0] + [0] * 6),) + + def test_testUfuncRegression(self): + f_invalid_ignore = [ + 'sqrt', 'arctanh', 'arcsin', 'arccos', + 'arccosh', 'arctanh', 'log', 'log10', 'divide', + 'true_divide', 'floor_divide', 'remainder', 'fmod'] + for f in ['sqrt', 'log', 'log10', 'exp', 'conjugate', + 'sin', 'cos', 'tan', + 'arcsin', 'arccos', 'arctan', + 'sinh', 'cosh', 'tanh', + 'arcsinh', + 'arccosh', + 'arctanh', + 'absolute', 'fabs', 'negative', + 'floor', 'ceil', + 'logical_not', + 'add', 'subtract', 'multiply', + 'divide', 'true_divide', 'floor_divide', + 'remainder', 'fmod', 'hypot', 'arctan2', + 'equal', 'not_equal', 'less_equal', 'greater_equal', + 'less', 'greater', + 'logical_and', 'logical_or', 'logical_xor']: + try: + uf = getattr(umath, f) + except AttributeError: + uf = getattr(fromnumeric, f) + mf = getattr(np.ma, f) + args = self.d[:uf.nin] + with np.errstate(): + if f in f_invalid_ignore: + np.seterr(invalid='ignore') + if f in ['arctanh', 'log', 'log10']: + np.seterr(divide='ignore') + ur = uf(*args) + mr = mf(*args) + assert_(eq(ur.filled(0), mr.filled(0), f)) + assert_(eqmask(ur.mask, mr.mask)) + + def test_reduce(self): + a = self.d[0] + assert_(not alltrue(a, axis=0)) + assert_(sometrue(a, axis=0)) + assert_equal(sum(a[:3], axis=0), 0) + assert_equal(product(a, axis=0), 0) + + def test_minmax(self): + a = arange(1, 13).reshape(3, 4) + amask = masked_where(a < 5, a) + assert_equal(amask.max(), a.max()) + assert_equal(amask.min(), 5) + assert_((amask.max(0) == a.max(0)).all()) + assert_((amask.min(0) == [5, 6, 7, 8]).all()) + assert_(amask.max(1)[0].mask) + assert_(amask.min(1)[0].mask) + + def test_nonzero(self): + for t in "?bhilqpBHILQPfdgFDGO": + x = array([1, 0, 2, 0], mask=[0, 0, 1, 1]) + assert_(eq(nonzero(x), [0])) + + +class TestArrayMethods: + + def setup(self): + x = np.array([8.375, 7.545, 8.828, 8.5, 1.757, 5.928, + 8.43, 7.78, 9.865, 5.878, 8.979, 4.732, + 3.012, 6.022, 5.095, 3.116, 5.238, 3.957, + 6.04, 9.63, 7.712, 3.382, 4.489, 6.479, + 7.189, 9.645, 5.395, 4.961, 9.894, 2.893, + 7.357, 9.828, 6.272, 3.758, 6.693, 0.993]) + X = x.reshape(6, 6) + XX = x.reshape(3, 2, 2, 3) + + m = np.array([0, 1, 0, 1, 0, 0, + 1, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 0, 0, + 0, 0, 1, 0, 1, 0]) + mx = array(data=x, mask=m) + mX = array(data=X, mask=m.reshape(X.shape)) + mXX = array(data=XX, mask=m.reshape(XX.shape)) + + self.d = (x, X, XX, m, mx, mX, mXX) + + def test_trace(self): + (x, X, XX, m, mx, mX, mXX,) = self.d + mXdiag = mX.diagonal() + assert_equal(mX.trace(), mX.diagonal().compressed().sum()) + assert_(eq(mX.trace(), + X.trace() - sum(mXdiag.mask * X.diagonal(), + axis=0))) + + def test_clip(self): + (x, X, XX, m, mx, mX, mXX,) = self.d + clipped = mx.clip(2, 8) + assert_(eq(clipped.mask, mx.mask)) + assert_(eq(clipped._data, x.clip(2, 8))) + assert_(eq(clipped._data, mx._data.clip(2, 8))) + + def test_ptp(self): + (x, X, XX, m, mx, mX, mXX,) = self.d + (n, m) = X.shape + assert_equal(mx.ptp(), mx.compressed().ptp()) + rows = np.zeros(n, np.float_) + cols = np.zeros(m, np.float_) + for k in range(m): + cols[k] = mX[:, k].compressed().ptp() + for k in range(n): + rows[k] = mX[k].compressed().ptp() + assert_(eq(mX.ptp(0), cols)) + assert_(eq(mX.ptp(1), rows)) + + def test_swapaxes(self): + (x, X, XX, m, mx, mX, mXX,) = self.d + mXswapped = mX.swapaxes(0, 1) + assert_(eq(mXswapped[-1], mX[:, -1])) + mXXswapped = mXX.swapaxes(0, 2) + assert_equal(mXXswapped.shape, (2, 2, 3, 3)) + + def test_cumprod(self): + (x, X, XX, m, mx, mX, mXX,) = self.d + mXcp = mX.cumprod(0) + assert_(eq(mXcp._data, mX.filled(1).cumprod(0))) + mXcp = mX.cumprod(1) + assert_(eq(mXcp._data, mX.filled(1).cumprod(1))) + + def test_cumsum(self): + (x, X, XX, m, mx, mX, mXX,) = self.d + mXcp = mX.cumsum(0) + assert_(eq(mXcp._data, mX.filled(0).cumsum(0))) + mXcp = mX.cumsum(1) + assert_(eq(mXcp._data, mX.filled(0).cumsum(1))) + + def test_varstd(self): + (x, X, XX, m, mx, mX, mXX,) = self.d + assert_(eq(mX.var(axis=None), mX.compressed().var())) + assert_(eq(mX.std(axis=None), mX.compressed().std())) + assert_(eq(mXX.var(axis=3).shape, XX.var(axis=3).shape)) + assert_(eq(mX.var().shape, X.var().shape)) + (mXvar0, mXvar1) = (mX.var(axis=0), mX.var(axis=1)) + for k in range(6): + assert_(eq(mXvar1[k], mX[k].compressed().var())) + assert_(eq(mXvar0[k], mX[:, k].compressed().var())) + assert_(eq(np.sqrt(mXvar0[k]), + mX[:, k].compressed().std())) + + +def eqmask(m1, m2): + if m1 is nomask: + return m2 is nomask + if m2 is nomask: + return m1 is nomask + return (m1 == m2).all() diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/test_regression.py b/.local/lib/python3.8/site-packages/numpy/ma/tests/test_regression.py new file mode 100644 index 0000000..7e76eb0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/tests/test_regression.py @@ -0,0 +1,91 @@ +import numpy as np +from numpy.testing import ( + assert_, assert_array_equal, assert_allclose, suppress_warnings + ) + + +class TestRegression: + def test_masked_array_create(self): + # Ticket #17 + x = np.ma.masked_array([0, 1, 2, 3, 0, 4, 5, 6], + mask=[0, 0, 0, 1, 1, 1, 0, 0]) + assert_array_equal(np.ma.nonzero(x), [[1, 2, 6, 7]]) + + def test_masked_array(self): + # Ticket #61 + np.ma.array(1, mask=[1]) + + def test_mem_masked_where(self): + # Ticket #62 + from numpy.ma import masked_where, MaskType + a = np.zeros((1, 1)) + b = np.zeros(a.shape, MaskType) + c = masked_where(b, a) + a-c + + def test_masked_array_multiply(self): + # Ticket #254 + a = np.ma.zeros((4, 1)) + a[2, 0] = np.ma.masked + b = np.zeros((4, 2)) + a*b + b*a + + def test_masked_array_repeat(self): + # Ticket #271 + np.ma.array([1], mask=False).repeat(10) + + def test_masked_array_repr_unicode(self): + # Ticket #1256 + repr(np.ma.array(u"Unicode")) + + def test_atleast_2d(self): + # Ticket #1559 + a = np.ma.masked_array([0.0, 1.2, 3.5], mask=[False, True, False]) + b = np.atleast_2d(a) + assert_(a.mask.ndim == 1) + assert_(b.mask.ndim == 2) + + def test_set_fill_value_unicode_py3(self): + # Ticket #2733 + a = np.ma.masked_array(['a', 'b', 'c'], mask=[1, 0, 0]) + a.fill_value = 'X' + assert_(a.fill_value == 'X') + + def test_var_sets_maskedarray_scalar(self): + # Issue gh-2757 + a = np.ma.array(np.arange(5), mask=True) + mout = np.ma.array(-1, dtype=float) + a.var(out=mout) + assert_(mout._data == 0) + + def test_ddof_corrcoef(self): + # See gh-3336 + x = np.ma.masked_equal([1, 2, 3, 4, 5], 4) + y = np.array([2, 2.5, 3.1, 3, 5]) + # this test can be removed after deprecation. + with suppress_warnings() as sup: + sup.filter(DeprecationWarning, "bias and ddof have no effect") + r0 = np.ma.corrcoef(x, y, ddof=0) + r1 = np.ma.corrcoef(x, y, ddof=1) + # ddof should not have an effect (it gets cancelled out) + assert_allclose(r0.data, r1.data) + + def test_mask_not_backmangled(self): + # See gh-10314. Test case taken from gh-3140. + a = np.ma.MaskedArray([1., 2.], mask=[False, False]) + assert_(a.mask.shape == (2,)) + b = np.tile(a, (2, 1)) + # Check that the above no longer changes a.shape to (1, 2) + assert_(a.mask.shape == (2,)) + assert_(b.shape == (2, 2)) + assert_(b.mask.shape == (2, 2)) + + def test_empty_list_on_structured(self): + # See gh-12464. Indexing with empty list should give empty result. + ma = np.ma.MaskedArray([(1, 1.), (2, 2.), (3, 3.)], dtype='i4,f4') + assert_array_equal(ma[[]], ma[:0]) + + def test_masked_array_tobytes_fortran(self): + ma = np.ma.arange(4).reshape((2,2)) + assert_array_equal(ma.tobytes(order='F'), ma.T.tobytes()) diff --git a/.local/lib/python3.8/site-packages/numpy/ma/tests/test_subclassing.py b/.local/lib/python3.8/site-packages/numpy/ma/tests/test_subclassing.py new file mode 100644 index 0000000..83a9b2f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/tests/test_subclassing.py @@ -0,0 +1,387 @@ +# pylint: disable-msg=W0611, W0612, W0511,R0201 +"""Tests suite for MaskedArray & subclassing. + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +:version: $Id: test_subclassing.py 3473 2007-10-29 15:18:13Z jarrod.millman $ + +""" +import numpy as np +from numpy.testing import assert_, assert_raises +from numpy.ma.testutils import assert_equal +from numpy.ma.core import ( + array, arange, masked, MaskedArray, masked_array, log, add, hypot, + divide, asarray, asanyarray, nomask + ) +# from numpy.ma.core import ( + +def assert_startswith(a, b): + # produces a better error message than assert_(a.startswith(b)) + assert_equal(a[:len(b)], b) + +class SubArray(np.ndarray): + # Defines a generic np.ndarray subclass, that stores some metadata + # in the dictionary `info`. + def __new__(cls,arr,info={}): + x = np.asanyarray(arr).view(cls) + x.info = info.copy() + return x + + def __array_finalize__(self, obj): + if callable(getattr(super(), '__array_finalize__', None)): + super().__array_finalize__(obj) + self.info = getattr(obj, 'info', {}).copy() + return + + def __add__(self, other): + result = super().__add__(other) + result.info['added'] = result.info.get('added', 0) + 1 + return result + + def __iadd__(self, other): + result = super().__iadd__(other) + result.info['iadded'] = result.info.get('iadded', 0) + 1 + return result + + +subarray = SubArray + + +class SubMaskedArray(MaskedArray): + """Pure subclass of MaskedArray, keeping some info on subclass.""" + def __new__(cls, info=None, **kwargs): + obj = super().__new__(cls, **kwargs) + obj._optinfo['info'] = info + return obj + + +class MSubArray(SubArray, MaskedArray): + + def __new__(cls, data, info={}, mask=nomask): + subarr = SubArray(data, info) + _data = MaskedArray.__new__(cls, data=subarr, mask=mask) + _data.info = subarr.info + return _data + + @property + def _series(self): + _view = self.view(MaskedArray) + _view._sharedmask = False + return _view + +msubarray = MSubArray + + +# Also a subclass that overrides __str__, __repr__ and __setitem__, disallowing +# setting to non-class values (and thus np.ma.core.masked_print_option) +# and overrides __array_wrap__, updating the info dict, to check that this +# doesn't get destroyed by MaskedArray._update_from. But this one also needs +# its own iterator... +class CSAIterator: + """ + Flat iterator object that uses its own setter/getter + (works around ndarray.flat not propagating subclass setters/getters + see https://github.com/numpy/numpy/issues/4564) + roughly following MaskedIterator + """ + def __init__(self, a): + self._original = a + self._dataiter = a.view(np.ndarray).flat + + def __iter__(self): + return self + + def __getitem__(self, indx): + out = self._dataiter.__getitem__(indx) + if not isinstance(out, np.ndarray): + out = out.__array__() + out = out.view(type(self._original)) + return out + + def __setitem__(self, index, value): + self._dataiter[index] = self._original._validate_input(value) + + def __next__(self): + return next(self._dataiter).__array__().view(type(self._original)) + + +class ComplicatedSubArray(SubArray): + + def __str__(self): + return f'myprefix {self.view(SubArray)} mypostfix' + + def __repr__(self): + # Return a repr that does not start with 'name(' + return f'<{self.__class__.__name__} {self}>' + + def _validate_input(self, value): + if not isinstance(value, ComplicatedSubArray): + raise ValueError("Can only set to MySubArray values") + return value + + def __setitem__(self, item, value): + # validation ensures direct assignment with ndarray or + # masked_print_option will fail + super().__setitem__(item, self._validate_input(value)) + + def __getitem__(self, item): + # ensure getter returns our own class also for scalars + value = super().__getitem__(item) + if not isinstance(value, np.ndarray): # scalar + value = value.__array__().view(ComplicatedSubArray) + return value + + @property + def flat(self): + return CSAIterator(self) + + @flat.setter + def flat(self, value): + y = self.ravel() + y[:] = value + + def __array_wrap__(self, obj, context=None): + obj = super().__array_wrap__(obj, context) + if context is not None and context[0] is np.multiply: + obj.info['multiplied'] = obj.info.get('multiplied', 0) + 1 + + return obj + + +class TestSubclassing: + # Test suite for masked subclasses of ndarray. + + def setup(self): + x = np.arange(5, dtype='float') + mx = msubarray(x, mask=[0, 1, 0, 0, 0]) + self.data = (x, mx) + + def test_data_subclassing(self): + # Tests whether the subclass is kept. + x = np.arange(5) + m = [0, 0, 1, 0, 0] + xsub = SubArray(x) + xmsub = masked_array(xsub, mask=m) + assert_(isinstance(xmsub, MaskedArray)) + assert_equal(xmsub._data, xsub) + assert_(isinstance(xmsub._data, SubArray)) + + def test_maskedarray_subclassing(self): + # Tests subclassing MaskedArray + (x, mx) = self.data + assert_(isinstance(mx._data, subarray)) + + def test_masked_unary_operations(self): + # Tests masked_unary_operation + (x, mx) = self.data + with np.errstate(divide='ignore'): + assert_(isinstance(log(mx), msubarray)) + assert_equal(log(x), np.log(x)) + + def test_masked_binary_operations(self): + # Tests masked_binary_operation + (x, mx) = self.data + # Result should be a msubarray + assert_(isinstance(add(mx, mx), msubarray)) + assert_(isinstance(add(mx, x), msubarray)) + # Result should work + assert_equal(add(mx, x), mx+x) + assert_(isinstance(add(mx, mx)._data, subarray)) + assert_(isinstance(add.outer(mx, mx), msubarray)) + assert_(isinstance(hypot(mx, mx), msubarray)) + assert_(isinstance(hypot(mx, x), msubarray)) + + def test_masked_binary_operations2(self): + # Tests domained_masked_binary_operation + (x, mx) = self.data + xmx = masked_array(mx.data.__array__(), mask=mx.mask) + assert_(isinstance(divide(mx, mx), msubarray)) + assert_(isinstance(divide(mx, x), msubarray)) + assert_equal(divide(mx, mx), divide(xmx, xmx)) + + def test_attributepropagation(self): + x = array(arange(5), mask=[0]+[1]*4) + my = masked_array(subarray(x)) + ym = msubarray(x) + # + z = (my+1) + assert_(isinstance(z, MaskedArray)) + assert_(not isinstance(z, MSubArray)) + assert_(isinstance(z._data, SubArray)) + assert_equal(z._data.info, {}) + # + z = (ym+1) + assert_(isinstance(z, MaskedArray)) + assert_(isinstance(z, MSubArray)) + assert_(isinstance(z._data, SubArray)) + assert_(z._data.info['added'] > 0) + # Test that inplace methods from data get used (gh-4617) + ym += 1 + assert_(isinstance(ym, MaskedArray)) + assert_(isinstance(ym, MSubArray)) + assert_(isinstance(ym._data, SubArray)) + assert_(ym._data.info['iadded'] > 0) + # + ym._set_mask([1, 0, 0, 0, 1]) + assert_equal(ym._mask, [1, 0, 0, 0, 1]) + ym._series._set_mask([0, 0, 0, 0, 1]) + assert_equal(ym._mask, [0, 0, 0, 0, 1]) + # + xsub = subarray(x, info={'name':'x'}) + mxsub = masked_array(xsub) + assert_(hasattr(mxsub, 'info')) + assert_equal(mxsub.info, xsub.info) + + def test_subclasspreservation(self): + # Checks that masked_array(...,subok=True) preserves the class. + x = np.arange(5) + m = [0, 0, 1, 0, 0] + xinfo = [(i, j) for (i, j) in zip(x, m)] + xsub = MSubArray(x, mask=m, info={'xsub':xinfo}) + # + mxsub = masked_array(xsub, subok=False) + assert_(not isinstance(mxsub, MSubArray)) + assert_(isinstance(mxsub, MaskedArray)) + assert_equal(mxsub._mask, m) + # + mxsub = asarray(xsub) + assert_(not isinstance(mxsub, MSubArray)) + assert_(isinstance(mxsub, MaskedArray)) + assert_equal(mxsub._mask, m) + # + mxsub = masked_array(xsub, subok=True) + assert_(isinstance(mxsub, MSubArray)) + assert_equal(mxsub.info, xsub.info) + assert_equal(mxsub._mask, xsub._mask) + # + mxsub = asanyarray(xsub) + assert_(isinstance(mxsub, MSubArray)) + assert_equal(mxsub.info, xsub.info) + assert_equal(mxsub._mask, m) + + def test_subclass_items(self): + """test that getter and setter go via baseclass""" + x = np.arange(5) + xcsub = ComplicatedSubArray(x) + mxcsub = masked_array(xcsub, mask=[True, False, True, False, False]) + # getter should return a ComplicatedSubArray, even for single item + # first check we wrote ComplicatedSubArray correctly + assert_(isinstance(xcsub[1], ComplicatedSubArray)) + assert_(isinstance(xcsub[1,...], ComplicatedSubArray)) + assert_(isinstance(xcsub[1:4], ComplicatedSubArray)) + + # now that it propagates inside the MaskedArray + assert_(isinstance(mxcsub[1], ComplicatedSubArray)) + assert_(isinstance(mxcsub[1,...].data, ComplicatedSubArray)) + assert_(mxcsub[0] is masked) + assert_(isinstance(mxcsub[0,...].data, ComplicatedSubArray)) + assert_(isinstance(mxcsub[1:4].data, ComplicatedSubArray)) + + # also for flattened version (which goes via MaskedIterator) + assert_(isinstance(mxcsub.flat[1].data, ComplicatedSubArray)) + assert_(mxcsub.flat[0] is masked) + assert_(isinstance(mxcsub.flat[1:4].base, ComplicatedSubArray)) + + # setter should only work with ComplicatedSubArray input + # first check we wrote ComplicatedSubArray correctly + assert_raises(ValueError, xcsub.__setitem__, 1, x[4]) + # now that it propagates inside the MaskedArray + assert_raises(ValueError, mxcsub.__setitem__, 1, x[4]) + assert_raises(ValueError, mxcsub.__setitem__, slice(1, 4), x[1:4]) + mxcsub[1] = xcsub[4] + mxcsub[1:4] = xcsub[1:4] + # also for flattened version (which goes via MaskedIterator) + assert_raises(ValueError, mxcsub.flat.__setitem__, 1, x[4]) + assert_raises(ValueError, mxcsub.flat.__setitem__, slice(1, 4), x[1:4]) + mxcsub.flat[1] = xcsub[4] + mxcsub.flat[1:4] = xcsub[1:4] + + def test_subclass_nomask_items(self): + x = np.arange(5) + xcsub = ComplicatedSubArray(x) + mxcsub_nomask = masked_array(xcsub) + + assert_(isinstance(mxcsub_nomask[1,...].data, ComplicatedSubArray)) + assert_(isinstance(mxcsub_nomask[0,...].data, ComplicatedSubArray)) + + assert_(isinstance(mxcsub_nomask[1], ComplicatedSubArray)) + assert_(isinstance(mxcsub_nomask[0], ComplicatedSubArray)) + + def test_subclass_repr(self): + """test that repr uses the name of the subclass + and 'array' for np.ndarray""" + x = np.arange(5) + mx = masked_array(x, mask=[True, False, True, False, False]) + assert_startswith(repr(mx), 'masked_array') + xsub = SubArray(x) + mxsub = masked_array(xsub, mask=[True, False, True, False, False]) + assert_startswith(repr(mxsub), + f'masked_{SubArray.__name__}(data=[--, 1, --, 3, 4]') + + def test_subclass_str(self): + """test str with subclass that has overridden str, setitem""" + # first without override + x = np.arange(5) + xsub = SubArray(x) + mxsub = masked_array(xsub, mask=[True, False, True, False, False]) + assert_equal(str(mxsub), '[-- 1 -- 3 4]') + + xcsub = ComplicatedSubArray(x) + assert_raises(ValueError, xcsub.__setitem__, 0, + np.ma.core.masked_print_option) + mxcsub = masked_array(xcsub, mask=[True, False, True, False, False]) + assert_equal(str(mxcsub), 'myprefix [-- 1 -- 3 4] mypostfix') + + def test_pure_subclass_info_preservation(self): + # Test that ufuncs and methods conserve extra information consistently; + # see gh-7122. + arr1 = SubMaskedArray('test', data=[1,2,3,4,5,6]) + arr2 = SubMaskedArray(data=[0,1,2,3,4,5]) + diff1 = np.subtract(arr1, arr2) + assert_('info' in diff1._optinfo) + assert_(diff1._optinfo['info'] == 'test') + diff2 = arr1 - arr2 + assert_('info' in diff2._optinfo) + assert_(diff2._optinfo['info'] == 'test') + + +class ArrayNoInheritance: + """Quantity-like class that does not inherit from ndarray""" + def __init__(self, data, units): + self.magnitude = data + self.units = units + + def __getattr__(self, attr): + return getattr(self.magnitude, attr) + + +def test_array_no_inheritance(): + data_masked = np.ma.array([1, 2, 3], mask=[True, False, True]) + data_masked_units = ArrayNoInheritance(data_masked, 'meters') + + # Get the masked representation of the Quantity-like class + new_array = np.ma.array(data_masked_units) + assert_equal(data_masked.data, new_array.data) + assert_equal(data_masked.mask, new_array.mask) + # Test sharing the mask + data_masked.mask = [True, False, False] + assert_equal(data_masked.mask, new_array.mask) + assert_(new_array.sharedmask) + + # Get the masked representation of the Quantity-like class + new_array = np.ma.array(data_masked_units, copy=True) + assert_equal(data_masked.data, new_array.data) + assert_equal(data_masked.mask, new_array.mask) + # Test that the mask is not shared when copy=True + data_masked.mask = [True, False, True] + assert_equal([True, False, False], new_array.mask) + assert_(not new_array.sharedmask) + + # Get the masked representation of the Quantity-like class + new_array = np.ma.array(data_masked_units, keep_mask=False) + assert_equal(data_masked.data, new_array.data) + # The change did not affect the original mask + assert_equal(data_masked.mask, [True, False, True]) + # Test that the mask is False and not shared when keep_mask=False + assert_(not new_array.mask) + assert_(not new_array.sharedmask) diff --git a/.local/lib/python3.8/site-packages/numpy/ma/testutils.py b/.local/lib/python3.8/site-packages/numpy/ma/testutils.py new file mode 100644 index 0000000..2dd479a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/testutils.py @@ -0,0 +1,288 @@ +"""Miscellaneous functions for testing masked arrays and subclasses + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +:version: $Id: testutils.py 3529 2007-11-13 08:01:14Z jarrod.millman $ + +""" +import operator + +import numpy as np +from numpy import ndarray, float_ +import numpy.core.umath as umath +import numpy.testing +from numpy.testing import ( + assert_, assert_allclose, assert_array_almost_equal_nulp, + assert_raises, build_err_msg + ) +from .core import mask_or, getmask, masked_array, nomask, masked, filled + +__all__masked = [ + 'almost', 'approx', 'assert_almost_equal', 'assert_array_almost_equal', + 'assert_array_approx_equal', 'assert_array_compare', + 'assert_array_equal', 'assert_array_less', 'assert_close', + 'assert_equal', 'assert_equal_records', 'assert_mask_equal', + 'assert_not_equal', 'fail_if_array_equal', + ] + +# Include some normal test functions to avoid breaking other projects who +# have mistakenly included them from this file. SciPy is one. That is +# unfortunate, as some of these functions are not intended to work with +# masked arrays. But there was no way to tell before. +from unittest import TestCase +__some__from_testing = [ + 'TestCase', 'assert_', 'assert_allclose', 'assert_array_almost_equal_nulp', + 'assert_raises' + ] + +__all__ = __all__masked + __some__from_testing + + +def approx(a, b, fill_value=True, rtol=1e-5, atol=1e-8): + """ + Returns true if all components of a and b are equal to given tolerances. + + If fill_value is True, masked values considered equal. Otherwise, + masked values are considered unequal. The relative error rtol should + be positive and << 1.0 The absolute error atol comes into play for + those elements of b that are very small or zero; it says how small a + must be also. + + """ + m = mask_or(getmask(a), getmask(b)) + d1 = filled(a) + d2 = filled(b) + if d1.dtype.char == "O" or d2.dtype.char == "O": + return np.equal(d1, d2).ravel() + x = filled(masked_array(d1, copy=False, mask=m), fill_value).astype(float_) + y = filled(masked_array(d2, copy=False, mask=m), 1).astype(float_) + d = np.less_equal(umath.absolute(x - y), atol + rtol * umath.absolute(y)) + return d.ravel() + + +def almost(a, b, decimal=6, fill_value=True): + """ + Returns True if a and b are equal up to decimal places. + + If fill_value is True, masked values considered equal. Otherwise, + masked values are considered unequal. + + """ + m = mask_or(getmask(a), getmask(b)) + d1 = filled(a) + d2 = filled(b) + if d1.dtype.char == "O" or d2.dtype.char == "O": + return np.equal(d1, d2).ravel() + x = filled(masked_array(d1, copy=False, mask=m), fill_value).astype(float_) + y = filled(masked_array(d2, copy=False, mask=m), 1).astype(float_) + d = np.around(np.abs(x - y), decimal) <= 10.0 ** (-decimal) + return d.ravel() + + +def _assert_equal_on_sequences(actual, desired, err_msg=''): + """ + Asserts the equality of two non-array sequences. + + """ + assert_equal(len(actual), len(desired), err_msg) + for k in range(len(desired)): + assert_equal(actual[k], desired[k], f'item={k!r}\n{err_msg}') + return + + +def assert_equal_records(a, b): + """ + Asserts that two records are equal. + + Pretty crude for now. + + """ + assert_equal(a.dtype, b.dtype) + for f in a.dtype.names: + (af, bf) = (operator.getitem(a, f), operator.getitem(b, f)) + if not (af is masked) and not (bf is masked): + assert_equal(operator.getitem(a, f), operator.getitem(b, f)) + return + + +def assert_equal(actual, desired, err_msg=''): + """ + Asserts that two items are equal. + + """ + # Case #1: dictionary ..... + if isinstance(desired, dict): + if not isinstance(actual, dict): + raise AssertionError(repr(type(actual))) + assert_equal(len(actual), len(desired), err_msg) + for k, i in desired.items(): + if k not in actual: + raise AssertionError(f"{k} not in {actual}") + assert_equal(actual[k], desired[k], f'key={k!r}\n{err_msg}') + return + # Case #2: lists ..... + if isinstance(desired, (list, tuple)) and isinstance(actual, (list, tuple)): + return _assert_equal_on_sequences(actual, desired, err_msg='') + if not (isinstance(actual, ndarray) or isinstance(desired, ndarray)): + msg = build_err_msg([actual, desired], err_msg,) + if not desired == actual: + raise AssertionError(msg) + return + # Case #4. arrays or equivalent + if ((actual is masked) and not (desired is masked)) or \ + ((desired is masked) and not (actual is masked)): + msg = build_err_msg([actual, desired], + err_msg, header='', names=('x', 'y')) + raise ValueError(msg) + actual = np.asanyarray(actual) + desired = np.asanyarray(desired) + (actual_dtype, desired_dtype) = (actual.dtype, desired.dtype) + if actual_dtype.char == "S" and desired_dtype.char == "S": + return _assert_equal_on_sequences(actual.tolist(), + desired.tolist(), + err_msg='') + return assert_array_equal(actual, desired, err_msg) + + +def fail_if_equal(actual, desired, err_msg='',): + """ + Raises an assertion error if two items are equal. + + """ + if isinstance(desired, dict): + if not isinstance(actual, dict): + raise AssertionError(repr(type(actual))) + fail_if_equal(len(actual), len(desired), err_msg) + for k, i in desired.items(): + if k not in actual: + raise AssertionError(repr(k)) + fail_if_equal(actual[k], desired[k], f'key={k!r}\n{err_msg}') + return + if isinstance(desired, (list, tuple)) and isinstance(actual, (list, tuple)): + fail_if_equal(len(actual), len(desired), err_msg) + for k in range(len(desired)): + fail_if_equal(actual[k], desired[k], f'item={k!r}\n{err_msg}') + return + if isinstance(actual, np.ndarray) or isinstance(desired, np.ndarray): + return fail_if_array_equal(actual, desired, err_msg) + msg = build_err_msg([actual, desired], err_msg) + if not desired != actual: + raise AssertionError(msg) + + +assert_not_equal = fail_if_equal + + +def assert_almost_equal(actual, desired, decimal=7, err_msg='', verbose=True): + """ + Asserts that two items are almost equal. + + The test is equivalent to abs(desired-actual) < 0.5 * 10**(-decimal). + + """ + if isinstance(actual, np.ndarray) or isinstance(desired, np.ndarray): + return assert_array_almost_equal(actual, desired, decimal=decimal, + err_msg=err_msg, verbose=verbose) + msg = build_err_msg([actual, desired], + err_msg=err_msg, verbose=verbose) + if not round(abs(desired - actual), decimal) == 0: + raise AssertionError(msg) + + +assert_close = assert_almost_equal + + +def assert_array_compare(comparison, x, y, err_msg='', verbose=True, header='', + fill_value=True): + """ + Asserts that comparison between two masked arrays is satisfied. + + The comparison is elementwise. + + """ + # Allocate a common mask and refill + m = mask_or(getmask(x), getmask(y)) + x = masked_array(x, copy=False, mask=m, keep_mask=False, subok=False) + y = masked_array(y, copy=False, mask=m, keep_mask=False, subok=False) + if ((x is masked) and not (y is masked)) or \ + ((y is masked) and not (x is masked)): + msg = build_err_msg([x, y], err_msg=err_msg, verbose=verbose, + header=header, names=('x', 'y')) + raise ValueError(msg) + # OK, now run the basic tests on filled versions + return np.testing.assert_array_compare(comparison, + x.filled(fill_value), + y.filled(fill_value), + err_msg=err_msg, + verbose=verbose, header=header) + + +def assert_array_equal(x, y, err_msg='', verbose=True): + """ + Checks the elementwise equality of two masked arrays. + + """ + assert_array_compare(operator.__eq__, x, y, + err_msg=err_msg, verbose=verbose, + header='Arrays are not equal') + + +def fail_if_array_equal(x, y, err_msg='', verbose=True): + """ + Raises an assertion error if two masked arrays are not equal elementwise. + + """ + def compare(x, y): + return (not np.alltrue(approx(x, y))) + assert_array_compare(compare, x, y, err_msg=err_msg, verbose=verbose, + header='Arrays are not equal') + + +def assert_array_approx_equal(x, y, decimal=6, err_msg='', verbose=True): + """ + Checks the equality of two masked arrays, up to given number odecimals. + + The equality is checked elementwise. + + """ + def compare(x, y): + "Returns the result of the loose comparison between x and y)." + return approx(x, y, rtol=10. ** -decimal) + assert_array_compare(compare, x, y, err_msg=err_msg, verbose=verbose, + header='Arrays are not almost equal') + + +def assert_array_almost_equal(x, y, decimal=6, err_msg='', verbose=True): + """ + Checks the equality of two masked arrays, up to given number odecimals. + + The equality is checked elementwise. + + """ + def compare(x, y): + "Returns the result of the loose comparison between x and y)." + return almost(x, y, decimal) + assert_array_compare(compare, x, y, err_msg=err_msg, verbose=verbose, + header='Arrays are not almost equal') + + +def assert_array_less(x, y, err_msg='', verbose=True): + """ + Checks that x is smaller than y elementwise. + + """ + assert_array_compare(operator.__lt__, x, y, + err_msg=err_msg, verbose=verbose, + header='Arrays are not less-ordered') + + +def assert_mask_equal(m1, m2, err_msg=''): + """ + Asserts the equality of two masks. + + """ + if m1 is nomask: + assert_(m2 is nomask) + if m2 is nomask: + assert_(m1 is nomask) + assert_array_equal(m1, m2, err_msg=err_msg) diff --git a/.local/lib/python3.8/site-packages/numpy/ma/timer_comparison.py b/.local/lib/python3.8/site-packages/numpy/ma/timer_comparison.py new file mode 100644 index 0000000..9eb1a23 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/ma/timer_comparison.py @@ -0,0 +1,443 @@ +import timeit +from functools import reduce + +import numpy as np +from numpy import float_ +import numpy.core.fromnumeric as fromnumeric + +from numpy.testing import build_err_msg + + +pi = np.pi + +class ModuleTester: + def __init__(self, module): + self.module = module + self.allequal = module.allequal + self.arange = module.arange + self.array = module.array + self.concatenate = module.concatenate + self.count = module.count + self.equal = module.equal + self.filled = module.filled + self.getmask = module.getmask + self.getmaskarray = module.getmaskarray + self.id = id + self.inner = module.inner + self.make_mask = module.make_mask + self.masked = module.masked + self.masked_array = module.masked_array + self.masked_values = module.masked_values + self.mask_or = module.mask_or + self.nomask = module.nomask + self.ones = module.ones + self.outer = module.outer + self.repeat = module.repeat + self.resize = module.resize + self.sort = module.sort + self.take = module.take + self.transpose = module.transpose + self.zeros = module.zeros + self.MaskType = module.MaskType + try: + self.umath = module.umath + except AttributeError: + self.umath = module.core.umath + self.testnames = [] + + def assert_array_compare(self, comparison, x, y, err_msg='', header='', + fill_value=True): + """ + Assert that a comparison of two masked arrays is satisfied elementwise. + + """ + xf = self.filled(x) + yf = self.filled(y) + m = self.mask_or(self.getmask(x), self.getmask(y)) + + x = self.filled(self.masked_array(xf, mask=m), fill_value) + y = self.filled(self.masked_array(yf, mask=m), fill_value) + if (x.dtype.char != "O"): + x = x.astype(float_) + if isinstance(x, np.ndarray) and x.size > 1: + x[np.isnan(x)] = 0 + elif np.isnan(x): + x = 0 + if (y.dtype.char != "O"): + y = y.astype(float_) + if isinstance(y, np.ndarray) and y.size > 1: + y[np.isnan(y)] = 0 + elif np.isnan(y): + y = 0 + try: + cond = (x.shape == () or y.shape == ()) or x.shape == y.shape + if not cond: + msg = build_err_msg([x, y], + err_msg + + f'\n(shapes {x.shape}, {y.shape} mismatch)', + header=header, + names=('x', 'y')) + assert cond, msg + val = comparison(x, y) + if m is not self.nomask and fill_value: + val = self.masked_array(val, mask=m) + if isinstance(val, bool): + cond = val + reduced = [0] + else: + reduced = val.ravel() + cond = reduced.all() + reduced = reduced.tolist() + if not cond: + match = 100-100.0*reduced.count(1)/len(reduced) + msg = build_err_msg([x, y], + err_msg + + '\n(mismatch %s%%)' % (match,), + header=header, + names=('x', 'y')) + assert cond, msg + except ValueError as e: + msg = build_err_msg([x, y], err_msg, header=header, names=('x', 'y')) + raise ValueError(msg) from e + + def assert_array_equal(self, x, y, err_msg=''): + """ + Checks the elementwise equality of two masked arrays. + + """ + self.assert_array_compare(self.equal, x, y, err_msg=err_msg, + header='Arrays are not equal') + + @np.errstate(all='ignore') + def test_0(self): + """ + Tests creation + + """ + x = np.array([1., 1., 1., -2., pi/2.0, 4., 5., -10., 10., 1., 2., 3.]) + m = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + xm = self.masked_array(x, mask=m) + xm[0] + + @np.errstate(all='ignore') + def test_1(self): + """ + Tests creation + + """ + x = np.array([1., 1., 1., -2., pi/2.0, 4., 5., -10., 10., 1., 2., 3.]) + y = np.array([5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.]) + m1 = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + m2 = [0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1] + xm = self.masked_array(x, mask=m1) + ym = self.masked_array(y, mask=m2) + xf = np.where(m1, 1.e+20, x) + xm.set_fill_value(1.e+20) + + assert((xm-ym).filled(0).any()) + s = x.shape + assert(xm.size == reduce(lambda x, y:x*y, s)) + assert(self.count(xm) == len(m1) - reduce(lambda x, y:x+y, m1)) + + for s in [(4, 3), (6, 2)]: + x.shape = s + y.shape = s + xm.shape = s + ym.shape = s + xf.shape = s + assert(self.count(xm) == len(m1) - reduce(lambda x, y:x+y, m1)) + + @np.errstate(all='ignore') + def test_2(self): + """ + Tests conversions and indexing. + + """ + x1 = np.array([1, 2, 4, 3]) + x2 = self.array(x1, mask=[1, 0, 0, 0]) + x3 = self.array(x1, mask=[0, 1, 0, 1]) + x4 = self.array(x1) + # test conversion to strings, no errors + str(x2) + repr(x2) + # tests of indexing + assert type(x2[1]) is type(x1[1]) + assert x1[1] == x2[1] + x1[2] = 9 + x2[2] = 9 + self.assert_array_equal(x1, x2) + x1[1:3] = 99 + x2[1:3] = 99 + x2[1] = self.masked + x2[1:3] = self.masked + x2[:] = x1 + x2[1] = self.masked + x3[:] = self.masked_array([1, 2, 3, 4], [0, 1, 1, 0]) + x4[:] = self.masked_array([1, 2, 3, 4], [0, 1, 1, 0]) + x1 = np.arange(5)*1.0 + x2 = self.masked_values(x1, 3.0) + x1 = self.array([1, 'hello', 2, 3], object) + x2 = np.array([1, 'hello', 2, 3], object) + # check that no error occurs. + x1[1] + x2[1] + assert x1[1:1].shape == (0,) + # Tests copy-size + n = [0, 0, 1, 0, 0] + m = self.make_mask(n) + m2 = self.make_mask(m) + assert(m is m2) + m3 = self.make_mask(m, copy=1) + assert(m is not m3) + + @np.errstate(all='ignore') + def test_3(self): + """ + Tests resize/repeat + + """ + x4 = self.arange(4) + x4[2] = self.masked + y4 = self.resize(x4, (8,)) + assert self.allequal(self.concatenate([x4, x4]), y4) + assert self.allequal(self.getmask(y4), [0, 0, 1, 0, 0, 0, 1, 0]) + y5 = self.repeat(x4, (2, 2, 2, 2), axis=0) + self.assert_array_equal(y5, [0, 0, 1, 1, 2, 2, 3, 3]) + y6 = self.repeat(x4, 2, axis=0) + assert self.allequal(y5, y6) + y7 = x4.repeat((2, 2, 2, 2), axis=0) + assert self.allequal(y5, y7) + y8 = x4.repeat(2, 0) + assert self.allequal(y5, y8) + + @np.errstate(all='ignore') + def test_4(self): + """ + Test of take, transpose, inner, outer products. + + """ + x = self.arange(24) + y = np.arange(24) + x[5:6] = self.masked + x = x.reshape(2, 3, 4) + y = y.reshape(2, 3, 4) + assert self.allequal(np.transpose(y, (2, 0, 1)), self.transpose(x, (2, 0, 1))) + assert self.allequal(np.take(y, (2, 0, 1), 1), self.take(x, (2, 0, 1), 1)) + assert self.allequal(np.inner(self.filled(x, 0), self.filled(y, 0)), + self.inner(x, y)) + assert self.allequal(np.outer(self.filled(x, 0), self.filled(y, 0)), + self.outer(x, y)) + y = self.array(['abc', 1, 'def', 2, 3], object) + y[2] = self.masked + t = self.take(y, [0, 3, 4]) + assert t[0] == 'abc' + assert t[1] == 2 + assert t[2] == 3 + + @np.errstate(all='ignore') + def test_5(self): + """ + Tests inplace w/ scalar + + """ + x = self.arange(10) + y = self.arange(10) + xm = self.arange(10) + xm[2] = self.masked + x += 1 + assert self.allequal(x, y+1) + xm += 1 + assert self.allequal(xm, y+1) + + x = self.arange(10) + xm = self.arange(10) + xm[2] = self.masked + x -= 1 + assert self.allequal(x, y-1) + xm -= 1 + assert self.allequal(xm, y-1) + + x = self.arange(10)*1.0 + xm = self.arange(10)*1.0 + xm[2] = self.masked + x *= 2.0 + assert self.allequal(x, y*2) + xm *= 2.0 + assert self.allequal(xm, y*2) + + x = self.arange(10)*2 + xm = self.arange(10)*2 + xm[2] = self.masked + x /= 2 + assert self.allequal(x, y) + xm /= 2 + assert self.allequal(xm, y) + + x = self.arange(10)*1.0 + xm = self.arange(10)*1.0 + xm[2] = self.masked + x /= 2.0 + assert self.allequal(x, y/2.0) + xm /= self.arange(10) + self.assert_array_equal(xm, self.ones((10,))) + + x = self.arange(10).astype(float_) + xm = self.arange(10) + xm[2] = self.masked + x += 1. + assert self.allequal(x, y + 1.) + + @np.errstate(all='ignore') + def test_6(self): + """ + Tests inplace w/ array + + """ + x = self.arange(10, dtype=float_) + y = self.arange(10) + xm = self.arange(10, dtype=float_) + xm[2] = self.masked + m = xm.mask + a = self.arange(10, dtype=float_) + a[-1] = self.masked + x += a + xm += a + assert self.allequal(x, y+a) + assert self.allequal(xm, y+a) + assert self.allequal(xm.mask, self.mask_or(m, a.mask)) + + x = self.arange(10, dtype=float_) + xm = self.arange(10, dtype=float_) + xm[2] = self.masked + m = xm.mask + a = self.arange(10, dtype=float_) + a[-1] = self.masked + x -= a + xm -= a + assert self.allequal(x, y-a) + assert self.allequal(xm, y-a) + assert self.allequal(xm.mask, self.mask_or(m, a.mask)) + + x = self.arange(10, dtype=float_) + xm = self.arange(10, dtype=float_) + xm[2] = self.masked + m = xm.mask + a = self.arange(10, dtype=float_) + a[-1] = self.masked + x *= a + xm *= a + assert self.allequal(x, y*a) + assert self.allequal(xm, y*a) + assert self.allequal(xm.mask, self.mask_or(m, a.mask)) + + x = self.arange(10, dtype=float_) + xm = self.arange(10, dtype=float_) + xm[2] = self.masked + m = xm.mask + a = self.arange(10, dtype=float_) + a[-1] = self.masked + x /= a + xm /= a + + @np.errstate(all='ignore') + def test_7(self): + "Tests ufunc" + d = (self.array([1.0, 0, -1, pi/2]*2, mask=[0, 1]+[0]*6), + self.array([1.0, 0, -1, pi/2]*2, mask=[1, 0]+[0]*6),) + for f in ['sqrt', 'log', 'log10', 'exp', 'conjugate', +# 'sin', 'cos', 'tan', +# 'arcsin', 'arccos', 'arctan', +# 'sinh', 'cosh', 'tanh', +# 'arcsinh', +# 'arccosh', +# 'arctanh', +# 'absolute', 'fabs', 'negative', +# # 'nonzero', 'around', +# 'floor', 'ceil', +# # 'sometrue', 'alltrue', +# 'logical_not', +# 'add', 'subtract', 'multiply', +# 'divide', 'true_divide', 'floor_divide', +# 'remainder', 'fmod', 'hypot', 'arctan2', +# 'equal', 'not_equal', 'less_equal', 'greater_equal', +# 'less', 'greater', +# 'logical_and', 'logical_or', 'logical_xor', + ]: + try: + uf = getattr(self.umath, f) + except AttributeError: + uf = getattr(fromnumeric, f) + mf = getattr(self.module, f) + args = d[:uf.nin] + ur = uf(*args) + mr = mf(*args) + self.assert_array_equal(ur.filled(0), mr.filled(0), f) + self.assert_array_equal(ur._mask, mr._mask) + + @np.errstate(all='ignore') + def test_99(self): + # test average + ott = self.array([0., 1., 2., 3.], mask=[1, 0, 0, 0]) + self.assert_array_equal(2.0, self.average(ott, axis=0)) + self.assert_array_equal(2.0, self.average(ott, weights=[1., 1., 2., 1.])) + result, wts = self.average(ott, weights=[1., 1., 2., 1.], returned=1) + self.assert_array_equal(2.0, result) + assert(wts == 4.0) + ott[:] = self.masked + assert(self.average(ott, axis=0) is self.masked) + ott = self.array([0., 1., 2., 3.], mask=[1, 0, 0, 0]) + ott = ott.reshape(2, 2) + ott[:, 1] = self.masked + self.assert_array_equal(self.average(ott, axis=0), [2.0, 0.0]) + assert(self.average(ott, axis=1)[0] is self.masked) + self.assert_array_equal([2., 0.], self.average(ott, axis=0)) + result, wts = self.average(ott, axis=0, returned=1) + self.assert_array_equal(wts, [1., 0.]) + w1 = [0, 1, 1, 1, 1, 0] + w2 = [[0, 1, 1, 1, 1, 0], [1, 0, 0, 0, 0, 1]] + x = self.arange(6) + self.assert_array_equal(self.average(x, axis=0), 2.5) + self.assert_array_equal(self.average(x, axis=0, weights=w1), 2.5) + y = self.array([self.arange(6), 2.0*self.arange(6)]) + self.assert_array_equal(self.average(y, None), np.add.reduce(np.arange(6))*3./12.) + self.assert_array_equal(self.average(y, axis=0), np.arange(6) * 3./2.) + self.assert_array_equal(self.average(y, axis=1), [self.average(x, axis=0), self.average(x, axis=0) * 2.0]) + self.assert_array_equal(self.average(y, None, weights=w2), 20./6.) + self.assert_array_equal(self.average(y, axis=0, weights=w2), [0., 1., 2., 3., 4., 10.]) + self.assert_array_equal(self.average(y, axis=1), [self.average(x, axis=0), self.average(x, axis=0) * 2.0]) + m1 = self.zeros(6) + m2 = [0, 0, 1, 1, 0, 0] + m3 = [[0, 0, 1, 1, 0, 0], [0, 1, 1, 1, 1, 0]] + m4 = self.ones(6) + m5 = [0, 1, 1, 1, 1, 1] + self.assert_array_equal(self.average(self.masked_array(x, m1), axis=0), 2.5) + self.assert_array_equal(self.average(self.masked_array(x, m2), axis=0), 2.5) + self.assert_array_equal(self.average(self.masked_array(x, m5), axis=0), 0.0) + self.assert_array_equal(self.count(self.average(self.masked_array(x, m4), axis=0)), 0) + z = self.masked_array(y, m3) + self.assert_array_equal(self.average(z, None), 20./6.) + self.assert_array_equal(self.average(z, axis=0), [0., 1., 99., 99., 4.0, 7.5]) + self.assert_array_equal(self.average(z, axis=1), [2.5, 5.0]) + self.assert_array_equal(self.average(z, axis=0, weights=w2), [0., 1., 99., 99., 4.0, 10.0]) + + @np.errstate(all='ignore') + def test_A(self): + x = self.arange(24) + x[5:6] = self.masked + x = x.reshape(2, 3, 4) + + +if __name__ == '__main__': + setup_base = ("from __main__ import ModuleTester \n" + "import numpy\n" + "tester = ModuleTester(module)\n") + setup_cur = "import numpy.ma.core as module\n" + setup_base + (nrepeat, nloop) = (10, 10) + + for i in range(1, 8): + func = 'tester.test_%i()' % i + cur = timeit.Timer(func, setup_cur).repeat(nrepeat, nloop*10) + cur = np.sort(cur) + print("#%i" % i + 50*'.') + print(eval("ModuleTester.test_%i.__doc__" % i)) + print(f'core_current : {cur[0]:.3f} - {cur[1]:.3f}') diff --git a/.local/lib/python3.8/site-packages/numpy/matlib.py b/.local/lib/python3.8/site-packages/numpy/matlib.py new file mode 100644 index 0000000..bd6b632 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/matlib.py @@ -0,0 +1,376 @@ +import warnings + +# 2018-05-29, PendingDeprecationWarning added to matrix.__new__ +# 2020-01-23, numpy 1.19.0 PendingDeprecatonWarning +warnings.warn("Importing from numpy.matlib is deprecated since 1.19.0. " + "The matrix subclass is not the recommended way to represent " + "matrices or deal with linear algebra (see " + "https://docs.scipy.org/doc/numpy/user/numpy-for-matlab-users.html). " + "Please adjust your code to use regular ndarray. ", + PendingDeprecationWarning, stacklevel=2) + +import numpy as np +from numpy.matrixlib.defmatrix import matrix, asmatrix +# Matlib.py contains all functions in the numpy namespace with a few +# replacements. See doc/source/reference/routines.matlib.rst for details. +# Need * as we're copying the numpy namespace. +from numpy import * # noqa: F403 + +__version__ = np.__version__ + +__all__ = np.__all__[:] # copy numpy namespace +__all__ += ['rand', 'randn', 'repmat'] + +def empty(shape, dtype=None, order='C'): + """Return a new matrix of given shape and type, without initializing entries. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty matrix. + dtype : data-type, optional + Desired output data-type. + order : {'C', 'F'}, optional + Whether to store multi-dimensional data in row-major + (C-style) or column-major (Fortran-style) order in + memory. + + See Also + -------- + empty_like, zeros + + Notes + ----- + `empty`, unlike `zeros`, does not set the matrix values to zero, + and may therefore be marginally faster. On the other hand, it requires + the user to manually set all the values in the array, and should be + used with caution. + + Examples + -------- + >>> import numpy.matlib + >>> np.matlib.empty((2, 2)) # filled with random data + matrix([[ 6.76425276e-320, 9.79033856e-307], # random + [ 7.39337286e-309, 3.22135945e-309]]) + >>> np.matlib.empty((2, 2), dtype=int) + matrix([[ 6600475, 0], # random + [ 6586976, 22740995]]) + + """ + return ndarray.__new__(matrix, shape, dtype, order=order) + +def ones(shape, dtype=None, order='C'): + """ + Matrix of ones. + + Return a matrix of given shape and type, filled with ones. + + Parameters + ---------- + shape : {sequence of ints, int} + Shape of the matrix + dtype : data-type, optional + The desired data-type for the matrix, default is np.float64. + order : {'C', 'F'}, optional + Whether to store matrix in C- or Fortran-contiguous order, + default is 'C'. + + Returns + ------- + out : matrix + Matrix of ones of given shape, dtype, and order. + + See Also + -------- + ones : Array of ones. + matlib.zeros : Zero matrix. + + Notes + ----- + If `shape` has length one i.e. ``(N,)``, or is a scalar ``N``, + `out` becomes a single row matrix of shape ``(1,N)``. + + Examples + -------- + >>> np.matlib.ones((2,3)) + matrix([[1., 1., 1.], + [1., 1., 1.]]) + + >>> np.matlib.ones(2) + matrix([[1., 1.]]) + + """ + a = ndarray.__new__(matrix, shape, dtype, order=order) + a.fill(1) + return a + +def zeros(shape, dtype=None, order='C'): + """ + Return a matrix of given shape and type, filled with zeros. + + Parameters + ---------- + shape : int or sequence of ints + Shape of the matrix + dtype : data-type, optional + The desired data-type for the matrix, default is float. + order : {'C', 'F'}, optional + Whether to store the result in C- or Fortran-contiguous order, + default is 'C'. + + Returns + ------- + out : matrix + Zero matrix of given shape, dtype, and order. + + See Also + -------- + numpy.zeros : Equivalent array function. + matlib.ones : Return a matrix of ones. + + Notes + ----- + If `shape` has length one i.e. ``(N,)``, or is a scalar ``N``, + `out` becomes a single row matrix of shape ``(1,N)``. + + Examples + -------- + >>> import numpy.matlib + >>> np.matlib.zeros((2, 3)) + matrix([[0., 0., 0.], + [0., 0., 0.]]) + + >>> np.matlib.zeros(2) + matrix([[0., 0.]]) + + """ + a = ndarray.__new__(matrix, shape, dtype, order=order) + a.fill(0) + return a + +def identity(n,dtype=None): + """ + Returns the square identity matrix of given size. + + Parameters + ---------- + n : int + Size of the returned identity matrix. + dtype : data-type, optional + Data-type of the output. Defaults to ``float``. + + Returns + ------- + out : matrix + `n` x `n` matrix with its main diagonal set to one, + and all other elements zero. + + See Also + -------- + numpy.identity : Equivalent array function. + matlib.eye : More general matrix identity function. + + Examples + -------- + >>> import numpy.matlib + >>> np.matlib.identity(3, dtype=int) + matrix([[1, 0, 0], + [0, 1, 0], + [0, 0, 1]]) + + """ + a = array([1]+n*[0], dtype=dtype) + b = empty((n, n), dtype=dtype) + b.flat = a + return b + +def eye(n,M=None, k=0, dtype=float, order='C'): + """ + Return a matrix with ones on the diagonal and zeros elsewhere. + + Parameters + ---------- + n : int + Number of rows in the output. + M : int, optional + Number of columns in the output, defaults to `n`. + k : int, optional + Index of the diagonal: 0 refers to the main diagonal, + a positive value refers to an upper diagonal, + and a negative value to a lower diagonal. + dtype : dtype, optional + Data-type of the returned matrix. + order : {'C', 'F'}, optional + Whether the output should be stored in row-major (C-style) or + column-major (Fortran-style) order in memory. + + .. versionadded:: 1.14.0 + + Returns + ------- + I : matrix + A `n` x `M` matrix where all elements are equal to zero, + except for the `k`-th diagonal, whose values are equal to one. + + See Also + -------- + numpy.eye : Equivalent array function. + identity : Square identity matrix. + + Examples + -------- + >>> import numpy.matlib + >>> np.matlib.eye(3, k=1, dtype=float) + matrix([[0., 1., 0.], + [0., 0., 1.], + [0., 0., 0.]]) + + """ + return asmatrix(np.eye(n, M=M, k=k, dtype=dtype, order=order)) + +def rand(*args): + """ + Return a matrix of random values with given shape. + + Create a matrix of the given shape and propagate it with + random samples from a uniform distribution over ``[0, 1)``. + + Parameters + ---------- + \\*args : Arguments + Shape of the output. + If given as N integers, each integer specifies the size of one + dimension. + If given as a tuple, this tuple gives the complete shape. + + Returns + ------- + out : ndarray + The matrix of random values with shape given by `\\*args`. + + See Also + -------- + randn, numpy.random.RandomState.rand + + Examples + -------- + >>> np.random.seed(123) + >>> import numpy.matlib + >>> np.matlib.rand(2, 3) + matrix([[0.69646919, 0.28613933, 0.22685145], + [0.55131477, 0.71946897, 0.42310646]]) + >>> np.matlib.rand((2, 3)) + matrix([[0.9807642 , 0.68482974, 0.4809319 ], + [0.39211752, 0.34317802, 0.72904971]]) + + If the first argument is a tuple, other arguments are ignored: + + >>> np.matlib.rand((2, 3), 4) + matrix([[0.43857224, 0.0596779 , 0.39804426], + [0.73799541, 0.18249173, 0.17545176]]) + + """ + if isinstance(args[0], tuple): + args = args[0] + return asmatrix(np.random.rand(*args)) + +def randn(*args): + """ + Return a random matrix with data from the "standard normal" distribution. + + `randn` generates a matrix filled with random floats sampled from a + univariate "normal" (Gaussian) distribution of mean 0 and variance 1. + + Parameters + ---------- + \\*args : Arguments + Shape of the output. + If given as N integers, each integer specifies the size of one + dimension. If given as a tuple, this tuple gives the complete shape. + + Returns + ------- + Z : matrix of floats + A matrix of floating-point samples drawn from the standard normal + distribution. + + See Also + -------- + rand, numpy.random.RandomState.randn + + Notes + ----- + For random samples from :math:`N(\\mu, \\sigma^2)`, use: + + ``sigma * np.matlib.randn(...) + mu`` + + Examples + -------- + >>> np.random.seed(123) + >>> import numpy.matlib + >>> np.matlib.randn(1) + matrix([[-1.0856306]]) + >>> np.matlib.randn(1, 2, 3) + matrix([[ 0.99734545, 0.2829785 , -1.50629471], + [-0.57860025, 1.65143654, -2.42667924]]) + + Two-by-four matrix of samples from :math:`N(3, 6.25)`: + + >>> 2.5 * np.matlib.randn((2, 4)) + 3 + matrix([[1.92771843, 6.16484065, 0.83314899, 1.30278462], + [2.76322758, 6.72847407, 1.40274501, 1.8900451 ]]) + + """ + if isinstance(args[0], tuple): + args = args[0] + return asmatrix(np.random.randn(*args)) + +def repmat(a, m, n): + """ + Repeat a 0-D to 2-D array or matrix MxN times. + + Parameters + ---------- + a : array_like + The array or matrix to be repeated. + m, n : int + The number of times `a` is repeated along the first and second axes. + + Returns + ------- + out : ndarray + The result of repeating `a`. + + Examples + -------- + >>> import numpy.matlib + >>> a0 = np.array(1) + >>> np.matlib.repmat(a0, 2, 3) + array([[1, 1, 1], + [1, 1, 1]]) + + >>> a1 = np.arange(4) + >>> np.matlib.repmat(a1, 2, 2) + array([[0, 1, 2, 3, 0, 1, 2, 3], + [0, 1, 2, 3, 0, 1, 2, 3]]) + + >>> a2 = np.asmatrix(np.arange(6).reshape(2, 3)) + >>> np.matlib.repmat(a2, 2, 3) + matrix([[0, 1, 2, 0, 1, 2, 0, 1, 2], + [3, 4, 5, 3, 4, 5, 3, 4, 5], + [0, 1, 2, 0, 1, 2, 0, 1, 2], + [3, 4, 5, 3, 4, 5, 3, 4, 5]]) + + """ + a = asanyarray(a) + ndim = a.ndim + if ndim == 0: + origrows, origcols = (1, 1) + elif ndim == 1: + origrows, origcols = (1, a.shape[0]) + else: + origrows, origcols = a.shape + rows = origrows * m + cols = origcols * n + c = a.reshape(1, a.size).repeat(m, 0).reshape(rows, origcols).repeat(n, 0) + return c.reshape(rows, cols) diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/__init__.py b/.local/lib/python3.8/site-packages/numpy/matrixlib/__init__.py new file mode 100644 index 0000000..54154d1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/matrixlib/__init__.py @@ -0,0 +1,10 @@ +"""Sub-package containing the matrix class and related functions. + +""" +from .defmatrix import * + +__all__ = defmatrix.__all__ + +from numpy._pytesttester import PytestTester +test = PytestTester(__name__) +del PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/__init__.pyi b/.local/lib/python3.8/site-packages/numpy/matrixlib/__init__.pyi new file mode 100644 index 0000000..c1b82d2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/matrixlib/__init__.pyi @@ -0,0 +1,17 @@ +from typing import List + +from numpy._pytesttester import PytestTester + +from numpy import ( + matrix as matrix, +) + +from numpy.matrixlib.defmatrix import ( + bmat as bmat, + mat as mat, + asmatrix as asmatrix, +) + +__all__: List[str] +__path__: List[str] +test: PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/matrixlib/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..4523686 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/matrixlib/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/__pycache__/defmatrix.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/matrixlib/__pycache__/defmatrix.cpython-38.pyc new file mode 100644 index 0000000..4b8236c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/matrixlib/__pycache__/defmatrix.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/matrixlib/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..add897b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/matrixlib/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/defmatrix.py b/.local/lib/python3.8/site-packages/numpy/matrixlib/defmatrix.py new file mode 100644 index 0000000..a414ee9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/matrixlib/defmatrix.py @@ -0,0 +1,1113 @@ +__all__ = ['matrix', 'bmat', 'mat', 'asmatrix'] + +import sys +import warnings +import ast +import numpy.core.numeric as N +from numpy.core.numeric import concatenate, isscalar +from numpy.core.overrides import set_module +# While not in __all__, matrix_power used to be defined here, so we import +# it for backward compatibility. +from numpy.linalg import matrix_power + + +def _convert_from_string(data): + for char in '[]': + data = data.replace(char, '') + + rows = data.split(';') + newdata = [] + count = 0 + for row in rows: + trow = row.split(',') + newrow = [] + for col in trow: + temp = col.split() + newrow.extend(map(ast.literal_eval, temp)) + if count == 0: + Ncols = len(newrow) + elif len(newrow) != Ncols: + raise ValueError("Rows not the same size.") + count += 1 + newdata.append(newrow) + return newdata + + +@set_module('numpy') +def asmatrix(data, dtype=None): + """ + Interpret the input as a matrix. + + Unlike `matrix`, `asmatrix` does not make a copy if the input is already + a matrix or an ndarray. Equivalent to ``matrix(data, copy=False)``. + + Parameters + ---------- + data : array_like + Input data. + dtype : data-type + Data-type of the output matrix. + + Returns + ------- + mat : matrix + `data` interpreted as a matrix. + + Examples + -------- + >>> x = np.array([[1, 2], [3, 4]]) + + >>> m = np.asmatrix(x) + + >>> x[0,0] = 5 + + >>> m + matrix([[5, 2], + [3, 4]]) + + """ + return matrix(data, dtype=dtype, copy=False) + + +@set_module('numpy') +class matrix(N.ndarray): + """ + matrix(data, dtype=None, copy=True) + + .. note:: It is no longer recommended to use this class, even for linear + algebra. Instead use regular arrays. The class may be removed + in the future. + + Returns a matrix from an array-like object, or from a string of data. + A matrix is a specialized 2-D array that retains its 2-D nature + through operations. It has certain special operators, such as ``*`` + (matrix multiplication) and ``**`` (matrix power). + + Parameters + ---------- + data : array_like or string + If `data` is a string, it is interpreted as a matrix with commas + or spaces separating columns, and semicolons separating rows. + dtype : data-type + Data-type of the output matrix. + copy : bool + If `data` is already an `ndarray`, then this flag determines + whether the data is copied (the default), or whether a view is + constructed. + + See Also + -------- + array + + Examples + -------- + >>> a = np.matrix('1 2; 3 4') + >>> a + matrix([[1, 2], + [3, 4]]) + + >>> np.matrix([[1, 2], [3, 4]]) + matrix([[1, 2], + [3, 4]]) + + """ + __array_priority__ = 10.0 + def __new__(subtype, data, dtype=None, copy=True): + warnings.warn('the matrix subclass is not the recommended way to ' + 'represent matrices or deal with linear algebra (see ' + 'https://docs.scipy.org/doc/numpy/user/' + 'numpy-for-matlab-users.html). ' + 'Please adjust your code to use regular ndarray.', + PendingDeprecationWarning, stacklevel=2) + if isinstance(data, matrix): + dtype2 = data.dtype + if (dtype is None): + dtype = dtype2 + if (dtype2 == dtype) and (not copy): + return data + return data.astype(dtype) + + if isinstance(data, N.ndarray): + if dtype is None: + intype = data.dtype + else: + intype = N.dtype(dtype) + new = data.view(subtype) + if intype != data.dtype: + return new.astype(intype) + if copy: return new.copy() + else: return new + + if isinstance(data, str): + data = _convert_from_string(data) + + # now convert data to an array + arr = N.array(data, dtype=dtype, copy=copy) + ndim = arr.ndim + shape = arr.shape + if (ndim > 2): + raise ValueError("matrix must be 2-dimensional") + elif ndim == 0: + shape = (1, 1) + elif ndim == 1: + shape = (1, shape[0]) + + order = 'C' + if (ndim == 2) and arr.flags.fortran: + order = 'F' + + if not (order or arr.flags.contiguous): + arr = arr.copy() + + ret = N.ndarray.__new__(subtype, shape, arr.dtype, + buffer=arr, + order=order) + return ret + + def __array_finalize__(self, obj): + self._getitem = False + if (isinstance(obj, matrix) and obj._getitem): return + ndim = self.ndim + if (ndim == 2): + return + if (ndim > 2): + newshape = tuple([x for x in self.shape if x > 1]) + ndim = len(newshape) + if ndim == 2: + self.shape = newshape + return + elif (ndim > 2): + raise ValueError("shape too large to be a matrix.") + else: + newshape = self.shape + if ndim == 0: + self.shape = (1, 1) + elif ndim == 1: + self.shape = (1, newshape[0]) + return + + def __getitem__(self, index): + self._getitem = True + + try: + out = N.ndarray.__getitem__(self, index) + finally: + self._getitem = False + + if not isinstance(out, N.ndarray): + return out + + if out.ndim == 0: + return out[()] + if out.ndim == 1: + sh = out.shape[0] + # Determine when we should have a column array + try: + n = len(index) + except Exception: + n = 0 + if n > 1 and isscalar(index[1]): + out.shape = (sh, 1) + else: + out.shape = (1, sh) + return out + + def __mul__(self, other): + if isinstance(other, (N.ndarray, list, tuple)) : + # This promotes 1-D vectors to row vectors + return N.dot(self, asmatrix(other)) + if isscalar(other) or not hasattr(other, '__rmul__') : + return N.dot(self, other) + return NotImplemented + + def __rmul__(self, other): + return N.dot(other, self) + + def __imul__(self, other): + self[:] = self * other + return self + + def __pow__(self, other): + return matrix_power(self, other) + + def __ipow__(self, other): + self[:] = self ** other + return self + + def __rpow__(self, other): + return NotImplemented + + def _align(self, axis): + """A convenience function for operations that need to preserve axis + orientation. + """ + if axis is None: + return self[0, 0] + elif axis==0: + return self + elif axis==1: + return self.transpose() + else: + raise ValueError("unsupported axis") + + def _collapse(self, axis): + """A convenience function for operations that want to collapse + to a scalar like _align, but are using keepdims=True + """ + if axis is None: + return self[0, 0] + else: + return self + + # Necessary because base-class tolist expects dimension + # reduction by x[0] + def tolist(self): + """ + Return the matrix as a (possibly nested) list. + + See `ndarray.tolist` for full documentation. + + See Also + -------- + ndarray.tolist + + Examples + -------- + >>> x = np.matrix(np.arange(12).reshape((3,4))); x + matrix([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + >>> x.tolist() + [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]] + + """ + return self.__array__().tolist() + + # To preserve orientation of result... + def sum(self, axis=None, dtype=None, out=None): + """ + Returns the sum of the matrix elements, along the given axis. + + Refer to `numpy.sum` for full documentation. + + See Also + -------- + numpy.sum + + Notes + ----- + This is the same as `ndarray.sum`, except that where an `ndarray` would + be returned, a `matrix` object is returned instead. + + Examples + -------- + >>> x = np.matrix([[1, 2], [4, 3]]) + >>> x.sum() + 10 + >>> x.sum(axis=1) + matrix([[3], + [7]]) + >>> x.sum(axis=1, dtype='float') + matrix([[3.], + [7.]]) + >>> out = np.zeros((2, 1), dtype='float') + >>> x.sum(axis=1, dtype='float', out=np.asmatrix(out)) + matrix([[3.], + [7.]]) + + """ + return N.ndarray.sum(self, axis, dtype, out, keepdims=True)._collapse(axis) + + + # To update docstring from array to matrix... + def squeeze(self, axis=None): + """ + Return a possibly reshaped matrix. + + Refer to `numpy.squeeze` for more documentation. + + Parameters + ---------- + axis : None or int or tuple of ints, optional + Selects a subset of the axes of length one in the shape. + If an axis is selected with shape entry greater than one, + an error is raised. + + Returns + ------- + squeezed : matrix + The matrix, but as a (1, N) matrix if it had shape (N, 1). + + See Also + -------- + numpy.squeeze : related function + + Notes + ----- + If `m` has a single column then that column is returned + as the single row of a matrix. Otherwise `m` is returned. + The returned matrix is always either `m` itself or a view into `m`. + Supplying an axis keyword argument will not affect the returned matrix + but it may cause an error to be raised. + + Examples + -------- + >>> c = np.matrix([[1], [2]]) + >>> c + matrix([[1], + [2]]) + >>> c.squeeze() + matrix([[1, 2]]) + >>> r = c.T + >>> r + matrix([[1, 2]]) + >>> r.squeeze() + matrix([[1, 2]]) + >>> m = np.matrix([[1, 2], [3, 4]]) + >>> m.squeeze() + matrix([[1, 2], + [3, 4]]) + + """ + return N.ndarray.squeeze(self, axis=axis) + + + # To update docstring from array to matrix... + def flatten(self, order='C'): + """ + Return a flattened copy of the matrix. + + All `N` elements of the matrix are placed into a single row. + + Parameters + ---------- + order : {'C', 'F', 'A', 'K'}, optional + 'C' means to flatten in row-major (C-style) order. 'F' means to + flatten in column-major (Fortran-style) order. 'A' means to + flatten in column-major order if `m` is Fortran *contiguous* in + memory, row-major order otherwise. 'K' means to flatten `m` in + the order the elements occur in memory. The default is 'C'. + + Returns + ------- + y : matrix + A copy of the matrix, flattened to a `(1, N)` matrix where `N` + is the number of elements in the original matrix. + + See Also + -------- + ravel : Return a flattened array. + flat : A 1-D flat iterator over the matrix. + + Examples + -------- + >>> m = np.matrix([[1,2], [3,4]]) + >>> m.flatten() + matrix([[1, 2, 3, 4]]) + >>> m.flatten('F') + matrix([[1, 3, 2, 4]]) + + """ + return N.ndarray.flatten(self, order=order) + + def mean(self, axis=None, dtype=None, out=None): + """ + Returns the average of the matrix elements along the given axis. + + Refer to `numpy.mean` for full documentation. + + See Also + -------- + numpy.mean + + Notes + ----- + Same as `ndarray.mean` except that, where that returns an `ndarray`, + this returns a `matrix` object. + + Examples + -------- + >>> x = np.matrix(np.arange(12).reshape((3, 4))) + >>> x + matrix([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + >>> x.mean() + 5.5 + >>> x.mean(0) + matrix([[4., 5., 6., 7.]]) + >>> x.mean(1) + matrix([[ 1.5], + [ 5.5], + [ 9.5]]) + + """ + return N.ndarray.mean(self, axis, dtype, out, keepdims=True)._collapse(axis) + + def std(self, axis=None, dtype=None, out=None, ddof=0): + """ + Return the standard deviation of the array elements along the given axis. + + Refer to `numpy.std` for full documentation. + + See Also + -------- + numpy.std + + Notes + ----- + This is the same as `ndarray.std`, except that where an `ndarray` would + be returned, a `matrix` object is returned instead. + + Examples + -------- + >>> x = np.matrix(np.arange(12).reshape((3, 4))) + >>> x + matrix([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + >>> x.std() + 3.4520525295346629 # may vary + >>> x.std(0) + matrix([[ 3.26598632, 3.26598632, 3.26598632, 3.26598632]]) # may vary + >>> x.std(1) + matrix([[ 1.11803399], + [ 1.11803399], + [ 1.11803399]]) + + """ + return N.ndarray.std(self, axis, dtype, out, ddof, keepdims=True)._collapse(axis) + + def var(self, axis=None, dtype=None, out=None, ddof=0): + """ + Returns the variance of the matrix elements, along the given axis. + + Refer to `numpy.var` for full documentation. + + See Also + -------- + numpy.var + + Notes + ----- + This is the same as `ndarray.var`, except that where an `ndarray` would + be returned, a `matrix` object is returned instead. + + Examples + -------- + >>> x = np.matrix(np.arange(12).reshape((3, 4))) + >>> x + matrix([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + >>> x.var() + 11.916666666666666 + >>> x.var(0) + matrix([[ 10.66666667, 10.66666667, 10.66666667, 10.66666667]]) # may vary + >>> x.var(1) + matrix([[1.25], + [1.25], + [1.25]]) + + """ + return N.ndarray.var(self, axis, dtype, out, ddof, keepdims=True)._collapse(axis) + + def prod(self, axis=None, dtype=None, out=None): + """ + Return the product of the array elements over the given axis. + + Refer to `prod` for full documentation. + + See Also + -------- + prod, ndarray.prod + + Notes + ----- + Same as `ndarray.prod`, except, where that returns an `ndarray`, this + returns a `matrix` object instead. + + Examples + -------- + >>> x = np.matrix(np.arange(12).reshape((3,4))); x + matrix([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + >>> x.prod() + 0 + >>> x.prod(0) + matrix([[ 0, 45, 120, 231]]) + >>> x.prod(1) + matrix([[ 0], + [ 840], + [7920]]) + + """ + return N.ndarray.prod(self, axis, dtype, out, keepdims=True)._collapse(axis) + + def any(self, axis=None, out=None): + """ + Test whether any array element along a given axis evaluates to True. + + Refer to `numpy.any` for full documentation. + + Parameters + ---------- + axis : int, optional + Axis along which logical OR is performed + out : ndarray, optional + Output to existing array instead of creating new one, must have + same shape as expected output + + Returns + ------- + any : bool, ndarray + Returns a single bool if `axis` is ``None``; otherwise, + returns `ndarray` + + """ + return N.ndarray.any(self, axis, out, keepdims=True)._collapse(axis) + + def all(self, axis=None, out=None): + """ + Test whether all matrix elements along a given axis evaluate to True. + + Parameters + ---------- + See `numpy.all` for complete descriptions + + See Also + -------- + numpy.all + + Notes + ----- + This is the same as `ndarray.all`, but it returns a `matrix` object. + + Examples + -------- + >>> x = np.matrix(np.arange(12).reshape((3,4))); x + matrix([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + >>> y = x[0]; y + matrix([[0, 1, 2, 3]]) + >>> (x == y) + matrix([[ True, True, True, True], + [False, False, False, False], + [False, False, False, False]]) + >>> (x == y).all() + False + >>> (x == y).all(0) + matrix([[False, False, False, False]]) + >>> (x == y).all(1) + matrix([[ True], + [False], + [False]]) + + """ + return N.ndarray.all(self, axis, out, keepdims=True)._collapse(axis) + + def max(self, axis=None, out=None): + """ + Return the maximum value along an axis. + + Parameters + ---------- + See `amax` for complete descriptions + + See Also + -------- + amax, ndarray.max + + Notes + ----- + This is the same as `ndarray.max`, but returns a `matrix` object + where `ndarray.max` would return an ndarray. + + Examples + -------- + >>> x = np.matrix(np.arange(12).reshape((3,4))); x + matrix([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + >>> x.max() + 11 + >>> x.max(0) + matrix([[ 8, 9, 10, 11]]) + >>> x.max(1) + matrix([[ 3], + [ 7], + [11]]) + + """ + return N.ndarray.max(self, axis, out, keepdims=True)._collapse(axis) + + def argmax(self, axis=None, out=None): + """ + Indexes of the maximum values along an axis. + + Return the indexes of the first occurrences of the maximum values + along the specified axis. If axis is None, the index is for the + flattened matrix. + + Parameters + ---------- + See `numpy.argmax` for complete descriptions + + See Also + -------- + numpy.argmax + + Notes + ----- + This is the same as `ndarray.argmax`, but returns a `matrix` object + where `ndarray.argmax` would return an `ndarray`. + + Examples + -------- + >>> x = np.matrix(np.arange(12).reshape((3,4))); x + matrix([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + >>> x.argmax() + 11 + >>> x.argmax(0) + matrix([[2, 2, 2, 2]]) + >>> x.argmax(1) + matrix([[3], + [3], + [3]]) + + """ + return N.ndarray.argmax(self, axis, out)._align(axis) + + def min(self, axis=None, out=None): + """ + Return the minimum value along an axis. + + Parameters + ---------- + See `amin` for complete descriptions. + + See Also + -------- + amin, ndarray.min + + Notes + ----- + This is the same as `ndarray.min`, but returns a `matrix` object + where `ndarray.min` would return an ndarray. + + Examples + -------- + >>> x = -np.matrix(np.arange(12).reshape((3,4))); x + matrix([[ 0, -1, -2, -3], + [ -4, -5, -6, -7], + [ -8, -9, -10, -11]]) + >>> x.min() + -11 + >>> x.min(0) + matrix([[ -8, -9, -10, -11]]) + >>> x.min(1) + matrix([[ -3], + [ -7], + [-11]]) + + """ + return N.ndarray.min(self, axis, out, keepdims=True)._collapse(axis) + + def argmin(self, axis=None, out=None): + """ + Indexes of the minimum values along an axis. + + Return the indexes of the first occurrences of the minimum values + along the specified axis. If axis is None, the index is for the + flattened matrix. + + Parameters + ---------- + See `numpy.argmin` for complete descriptions. + + See Also + -------- + numpy.argmin + + Notes + ----- + This is the same as `ndarray.argmin`, but returns a `matrix` object + where `ndarray.argmin` would return an `ndarray`. + + Examples + -------- + >>> x = -np.matrix(np.arange(12).reshape((3,4))); x + matrix([[ 0, -1, -2, -3], + [ -4, -5, -6, -7], + [ -8, -9, -10, -11]]) + >>> x.argmin() + 11 + >>> x.argmin(0) + matrix([[2, 2, 2, 2]]) + >>> x.argmin(1) + matrix([[3], + [3], + [3]]) + + """ + return N.ndarray.argmin(self, axis, out)._align(axis) + + def ptp(self, axis=None, out=None): + """ + Peak-to-peak (maximum - minimum) value along the given axis. + + Refer to `numpy.ptp` for full documentation. + + See Also + -------- + numpy.ptp + + Notes + ----- + Same as `ndarray.ptp`, except, where that would return an `ndarray` object, + this returns a `matrix` object. + + Examples + -------- + >>> x = np.matrix(np.arange(12).reshape((3,4))); x + matrix([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + >>> x.ptp() + 11 + >>> x.ptp(0) + matrix([[8, 8, 8, 8]]) + >>> x.ptp(1) + matrix([[3], + [3], + [3]]) + + """ + return N.ndarray.ptp(self, axis, out)._align(axis) + + @property + def I(self): + """ + Returns the (multiplicative) inverse of invertible `self`. + + Parameters + ---------- + None + + Returns + ------- + ret : matrix object + If `self` is non-singular, `ret` is such that ``ret * self`` == + ``self * ret`` == ``np.matrix(np.eye(self[0,:].size))`` all return + ``True``. + + Raises + ------ + numpy.linalg.LinAlgError: Singular matrix + If `self` is singular. + + See Also + -------- + linalg.inv + + Examples + -------- + >>> m = np.matrix('[1, 2; 3, 4]'); m + matrix([[1, 2], + [3, 4]]) + >>> m.getI() + matrix([[-2. , 1. ], + [ 1.5, -0.5]]) + >>> m.getI() * m + matrix([[ 1., 0.], # may vary + [ 0., 1.]]) + + """ + M, N = self.shape + if M == N: + from numpy.linalg import inv as func + else: + from numpy.linalg import pinv as func + return asmatrix(func(self)) + + @property + def A(self): + """ + Return `self` as an `ndarray` object. + + Equivalent to ``np.asarray(self)``. + + Parameters + ---------- + None + + Returns + ------- + ret : ndarray + `self` as an `ndarray` + + Examples + -------- + >>> x = np.matrix(np.arange(12).reshape((3,4))); x + matrix([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + >>> x.getA() + array([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + + """ + return self.__array__() + + @property + def A1(self): + """ + Return `self` as a flattened `ndarray`. + + Equivalent to ``np.asarray(x).ravel()`` + + Parameters + ---------- + None + + Returns + ------- + ret : ndarray + `self`, 1-D, as an `ndarray` + + Examples + -------- + >>> x = np.matrix(np.arange(12).reshape((3,4))); x + matrix([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + >>> x.getA1() + array([ 0, 1, 2, ..., 9, 10, 11]) + + + """ + return self.__array__().ravel() + + + def ravel(self, order='C'): + """ + Return a flattened matrix. + + Refer to `numpy.ravel` for more documentation. + + Parameters + ---------- + order : {'C', 'F', 'A', 'K'}, optional + The elements of `m` are read using this index order. 'C' means to + index the elements in C-like order, with the last axis index + changing fastest, back to the first axis index changing slowest. + 'F' means to index the elements in Fortran-like index order, with + the first index changing fastest, and the last index changing + slowest. Note that the 'C' and 'F' options take no account of the + memory layout of the underlying array, and only refer to the order + of axis indexing. 'A' means to read the elements in Fortran-like + index order if `m` is Fortran *contiguous* in memory, C-like order + otherwise. 'K' means to read the elements in the order they occur + in memory, except for reversing the data when strides are negative. + By default, 'C' index order is used. + + Returns + ------- + ret : matrix + Return the matrix flattened to shape `(1, N)` where `N` + is the number of elements in the original matrix. + A copy is made only if necessary. + + See Also + -------- + matrix.flatten : returns a similar output matrix but always a copy + matrix.flat : a flat iterator on the array. + numpy.ravel : related function which returns an ndarray + + """ + return N.ndarray.ravel(self, order=order) + + @property + def T(self): + """ + Returns the transpose of the matrix. + + Does *not* conjugate! For the complex conjugate transpose, use ``.H``. + + Parameters + ---------- + None + + Returns + ------- + ret : matrix object + The (non-conjugated) transpose of the matrix. + + See Also + -------- + transpose, getH + + Examples + -------- + >>> m = np.matrix('[1, 2; 3, 4]') + >>> m + matrix([[1, 2], + [3, 4]]) + >>> m.getT() + matrix([[1, 3], + [2, 4]]) + + """ + return self.transpose() + + @property + def H(self): + """ + Returns the (complex) conjugate transpose of `self`. + + Equivalent to ``np.transpose(self)`` if `self` is real-valued. + + Parameters + ---------- + None + + Returns + ------- + ret : matrix object + complex conjugate transpose of `self` + + Examples + -------- + >>> x = np.matrix(np.arange(12).reshape((3,4))) + >>> z = x - 1j*x; z + matrix([[ 0. +0.j, 1. -1.j, 2. -2.j, 3. -3.j], + [ 4. -4.j, 5. -5.j, 6. -6.j, 7. -7.j], + [ 8. -8.j, 9. -9.j, 10.-10.j, 11.-11.j]]) + >>> z.getH() + matrix([[ 0. -0.j, 4. +4.j, 8. +8.j], + [ 1. +1.j, 5. +5.j, 9. +9.j], + [ 2. +2.j, 6. +6.j, 10.+10.j], + [ 3. +3.j, 7. +7.j, 11.+11.j]]) + + """ + if issubclass(self.dtype.type, N.complexfloating): + return self.transpose().conjugate() + else: + return self.transpose() + + # kept for compatibility + getT = T.fget + getA = A.fget + getA1 = A1.fget + getH = H.fget + getI = I.fget + +def _from_string(str, gdict, ldict): + rows = str.split(';') + rowtup = [] + for row in rows: + trow = row.split(',') + newrow = [] + for x in trow: + newrow.extend(x.split()) + trow = newrow + coltup = [] + for col in trow: + col = col.strip() + try: + thismat = ldict[col] + except KeyError: + try: + thismat = gdict[col] + except KeyError as e: + raise NameError(f"name {col!r} is not defined") from None + + coltup.append(thismat) + rowtup.append(concatenate(coltup, axis=-1)) + return concatenate(rowtup, axis=0) + + +@set_module('numpy') +def bmat(obj, ldict=None, gdict=None): + """ + Build a matrix object from a string, nested sequence, or array. + + Parameters + ---------- + obj : str or array_like + Input data. If a string, variables in the current scope may be + referenced by name. + ldict : dict, optional + A dictionary that replaces local operands in current frame. + Ignored if `obj` is not a string or `gdict` is None. + gdict : dict, optional + A dictionary that replaces global operands in current frame. + Ignored if `obj` is not a string. + + Returns + ------- + out : matrix + Returns a matrix object, which is a specialized 2-D array. + + See Also + -------- + block : + A generalization of this function for N-d arrays, that returns normal + ndarrays. + + Examples + -------- + >>> A = np.mat('1 1; 1 1') + >>> B = np.mat('2 2; 2 2') + >>> C = np.mat('3 4; 5 6') + >>> D = np.mat('7 8; 9 0') + + All the following expressions construct the same block matrix: + + >>> np.bmat([[A, B], [C, D]]) + matrix([[1, 1, 2, 2], + [1, 1, 2, 2], + [3, 4, 7, 8], + [5, 6, 9, 0]]) + >>> np.bmat(np.r_[np.c_[A, B], np.c_[C, D]]) + matrix([[1, 1, 2, 2], + [1, 1, 2, 2], + [3, 4, 7, 8], + [5, 6, 9, 0]]) + >>> np.bmat('A,B; C,D') + matrix([[1, 1, 2, 2], + [1, 1, 2, 2], + [3, 4, 7, 8], + [5, 6, 9, 0]]) + + """ + if isinstance(obj, str): + if gdict is None: + # get previous frame + frame = sys._getframe().f_back + glob_dict = frame.f_globals + loc_dict = frame.f_locals + else: + glob_dict = gdict + loc_dict = ldict + + return matrix(_from_string(obj, glob_dict, loc_dict)) + + if isinstance(obj, (tuple, list)): + # [[A,B],[C,D]] + arr_rows = [] + for row in obj: + if isinstance(row, N.ndarray): # not 2-d + return matrix(concatenate(obj, axis=-1)) + else: + arr_rows.append(concatenate(row, axis=-1)) + return matrix(concatenate(arr_rows, axis=0)) + if isinstance(obj, N.ndarray): + return matrix(obj) + +mat = asmatrix diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/defmatrix.pyi b/.local/lib/python3.8/site-packages/numpy/matrixlib/defmatrix.pyi new file mode 100644 index 0000000..6c86ea1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/matrixlib/defmatrix.pyi @@ -0,0 +1,15 @@ +from typing import List, Any, Sequence, Mapping +from numpy import matrix as matrix +from numpy.typing import ArrayLike, DTypeLike, NDArray + +__all__: List[str] + +def bmat( + obj: str | Sequence[ArrayLike] | NDArray[Any], + ldict: None | Mapping[str, Any] = ..., + gdict: None | Mapping[str, Any] = ..., +) -> matrix[Any, Any]: ... + +def asmatrix(data: ArrayLike, dtype: DTypeLike = ...) -> matrix[Any, Any]: ... + +mat = asmatrix diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/setup.py b/.local/lib/python3.8/site-packages/numpy/matrixlib/setup.py new file mode 100644 index 0000000..4fed75d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/matrixlib/setup.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 +def configuration(parent_package='', top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('matrixlib', parent_package, top_path) + config.add_subpackage('tests') + config.add_data_files('*.pyi') + return config + +if __name__ == "__main__": + from numpy.distutils.core import setup + config = configuration(top_path='').todict() + setup(**config) diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..45db7e9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_defmatrix.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_defmatrix.cpython-38.pyc new file mode 100644 index 0000000..06ecc65 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_defmatrix.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_interaction.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_interaction.cpython-38.pyc new file mode 100644 index 0000000..b2bbbc7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_interaction.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_masked_matrix.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_masked_matrix.cpython-38.pyc new file mode 100644 index 0000000..909f8b2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_masked_matrix.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_matrix_linalg.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_matrix_linalg.cpython-38.pyc new file mode 100644 index 0000000..75972e7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_matrix_linalg.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_multiarray.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_multiarray.cpython-38.pyc new file mode 100644 index 0000000..78aa760 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_multiarray.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_numeric.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_numeric.cpython-38.pyc new file mode 100644 index 0000000..b2acfce Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_numeric.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_regression.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_regression.cpython-38.pyc new file mode 100644 index 0000000..c4447f6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/__pycache__/test_regression.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_defmatrix.py b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_defmatrix.py new file mode 100644 index 0000000..4cb5f3a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_defmatrix.py @@ -0,0 +1,453 @@ +import collections.abc + +import numpy as np +from numpy import matrix, asmatrix, bmat +from numpy.testing import ( + assert_, assert_equal, assert_almost_equal, assert_array_equal, + assert_array_almost_equal, assert_raises + ) +from numpy.linalg import matrix_power +from numpy.matrixlib import mat + +class TestCtor: + def test_basic(self): + A = np.array([[1, 2], [3, 4]]) + mA = matrix(A) + assert_(np.all(mA.A == A)) + + B = bmat("A,A;A,A") + C = bmat([[A, A], [A, A]]) + D = np.array([[1, 2, 1, 2], + [3, 4, 3, 4], + [1, 2, 1, 2], + [3, 4, 3, 4]]) + assert_(np.all(B.A == D)) + assert_(np.all(C.A == D)) + + E = np.array([[5, 6], [7, 8]]) + AEresult = matrix([[1, 2, 5, 6], [3, 4, 7, 8]]) + assert_(np.all(bmat([A, E]) == AEresult)) + + vec = np.arange(5) + mvec = matrix(vec) + assert_(mvec.shape == (1, 5)) + + def test_exceptions(self): + # Check for ValueError when called with invalid string data. + assert_raises(ValueError, matrix, "invalid") + + def test_bmat_nondefault_str(self): + A = np.array([[1, 2], [3, 4]]) + B = np.array([[5, 6], [7, 8]]) + Aresult = np.array([[1, 2, 1, 2], + [3, 4, 3, 4], + [1, 2, 1, 2], + [3, 4, 3, 4]]) + mixresult = np.array([[1, 2, 5, 6], + [3, 4, 7, 8], + [5, 6, 1, 2], + [7, 8, 3, 4]]) + assert_(np.all(bmat("A,A;A,A") == Aresult)) + assert_(np.all(bmat("A,A;A,A", ldict={'A':B}) == Aresult)) + assert_raises(TypeError, bmat, "A,A;A,A", gdict={'A':B}) + assert_( + np.all(bmat("A,A;A,A", ldict={'A':A}, gdict={'A':B}) == Aresult)) + b2 = bmat("A,B;C,D", ldict={'A':A,'B':B}, gdict={'C':B,'D':A}) + assert_(np.all(b2 == mixresult)) + + +class TestProperties: + def test_sum(self): + """Test whether matrix.sum(axis=1) preserves orientation. + Fails in NumPy <= 0.9.6.2127. + """ + M = matrix([[1, 2, 0, 0], + [3, 4, 0, 0], + [1, 2, 1, 2], + [3, 4, 3, 4]]) + sum0 = matrix([8, 12, 4, 6]) + sum1 = matrix([3, 7, 6, 14]).T + sumall = 30 + assert_array_equal(sum0, M.sum(axis=0)) + assert_array_equal(sum1, M.sum(axis=1)) + assert_equal(sumall, M.sum()) + + assert_array_equal(sum0, np.sum(M, axis=0)) + assert_array_equal(sum1, np.sum(M, axis=1)) + assert_equal(sumall, np.sum(M)) + + def test_prod(self): + x = matrix([[1, 2, 3], [4, 5, 6]]) + assert_equal(x.prod(), 720) + assert_equal(x.prod(0), matrix([[4, 10, 18]])) + assert_equal(x.prod(1), matrix([[6], [120]])) + + assert_equal(np.prod(x), 720) + assert_equal(np.prod(x, axis=0), matrix([[4, 10, 18]])) + assert_equal(np.prod(x, axis=1), matrix([[6], [120]])) + + y = matrix([0, 1, 3]) + assert_(y.prod() == 0) + + def test_max(self): + x = matrix([[1, 2, 3], [4, 5, 6]]) + assert_equal(x.max(), 6) + assert_equal(x.max(0), matrix([[4, 5, 6]])) + assert_equal(x.max(1), matrix([[3], [6]])) + + assert_equal(np.max(x), 6) + assert_equal(np.max(x, axis=0), matrix([[4, 5, 6]])) + assert_equal(np.max(x, axis=1), matrix([[3], [6]])) + + def test_min(self): + x = matrix([[1, 2, 3], [4, 5, 6]]) + assert_equal(x.min(), 1) + assert_equal(x.min(0), matrix([[1, 2, 3]])) + assert_equal(x.min(1), matrix([[1], [4]])) + + assert_equal(np.min(x), 1) + assert_equal(np.min(x, axis=0), matrix([[1, 2, 3]])) + assert_equal(np.min(x, axis=1), matrix([[1], [4]])) + + def test_ptp(self): + x = np.arange(4).reshape((2, 2)) + assert_(x.ptp() == 3) + assert_(np.all(x.ptp(0) == np.array([2, 2]))) + assert_(np.all(x.ptp(1) == np.array([1, 1]))) + + def test_var(self): + x = np.arange(9).reshape((3, 3)) + mx = x.view(np.matrix) + assert_equal(x.var(ddof=0), mx.var(ddof=0)) + assert_equal(x.var(ddof=1), mx.var(ddof=1)) + + def test_basic(self): + import numpy.linalg as linalg + + A = np.array([[1., 2.], + [3., 4.]]) + mA = matrix(A) + assert_(np.allclose(linalg.inv(A), mA.I)) + assert_(np.all(np.array(np.transpose(A) == mA.T))) + assert_(np.all(np.array(np.transpose(A) == mA.H))) + assert_(np.all(A == mA.A)) + + B = A + 2j*A + mB = matrix(B) + assert_(np.allclose(linalg.inv(B), mB.I)) + assert_(np.all(np.array(np.transpose(B) == mB.T))) + assert_(np.all(np.array(np.transpose(B).conj() == mB.H))) + + def test_pinv(self): + x = matrix(np.arange(6).reshape(2, 3)) + xpinv = matrix([[-0.77777778, 0.27777778], + [-0.11111111, 0.11111111], + [ 0.55555556, -0.05555556]]) + assert_almost_equal(x.I, xpinv) + + def test_comparisons(self): + A = np.arange(100).reshape(10, 10) + mA = matrix(A) + mB = matrix(A) + 0.1 + assert_(np.all(mB == A+0.1)) + assert_(np.all(mB == matrix(A+0.1))) + assert_(not np.any(mB == matrix(A-0.1))) + assert_(np.all(mA < mB)) + assert_(np.all(mA <= mB)) + assert_(np.all(mA <= mA)) + assert_(not np.any(mA < mA)) + + assert_(not np.any(mB < mA)) + assert_(np.all(mB >= mA)) + assert_(np.all(mB >= mB)) + assert_(not np.any(mB > mB)) + + assert_(np.all(mA == mA)) + assert_(not np.any(mA == mB)) + assert_(np.all(mB != mA)) + + assert_(not np.all(abs(mA) > 0)) + assert_(np.all(abs(mB > 0))) + + def test_asmatrix(self): + A = np.arange(100).reshape(10, 10) + mA = asmatrix(A) + A[0, 0] = -10 + assert_(A[0, 0] == mA[0, 0]) + + def test_noaxis(self): + A = matrix([[1, 0], [0, 1]]) + assert_(A.sum() == matrix(2)) + assert_(A.mean() == matrix(0.5)) + + def test_repr(self): + A = matrix([[1, 0], [0, 1]]) + assert_(repr(A) == "matrix([[1, 0],\n [0, 1]])") + + def test_make_bool_matrix_from_str(self): + A = matrix('True; True; False') + B = matrix([[True], [True], [False]]) + assert_array_equal(A, B) + +class TestCasting: + def test_basic(self): + A = np.arange(100).reshape(10, 10) + mA = matrix(A) + + mB = mA.copy() + O = np.ones((10, 10), np.float64) * 0.1 + mB = mB + O + assert_(mB.dtype.type == np.float64) + assert_(np.all(mA != mB)) + assert_(np.all(mB == mA+0.1)) + + mC = mA.copy() + O = np.ones((10, 10), np.complex128) + mC = mC * O + assert_(mC.dtype.type == np.complex128) + assert_(np.all(mA != mB)) + + +class TestAlgebra: + def test_basic(self): + import numpy.linalg as linalg + + A = np.array([[1., 2.], [3., 4.]]) + mA = matrix(A) + + B = np.identity(2) + for i in range(6): + assert_(np.allclose((mA ** i).A, B)) + B = np.dot(B, A) + + Ainv = linalg.inv(A) + B = np.identity(2) + for i in range(6): + assert_(np.allclose((mA ** -i).A, B)) + B = np.dot(B, Ainv) + + assert_(np.allclose((mA * mA).A, np.dot(A, A))) + assert_(np.allclose((mA + mA).A, (A + A))) + assert_(np.allclose((3*mA).A, (3*A))) + + mA2 = matrix(A) + mA2 *= 3 + assert_(np.allclose(mA2.A, 3*A)) + + def test_pow(self): + """Test raising a matrix to an integer power works as expected.""" + m = matrix("1. 2.; 3. 4.") + m2 = m.copy() + m2 **= 2 + mi = m.copy() + mi **= -1 + m4 = m2.copy() + m4 **= 2 + assert_array_almost_equal(m2, m**2) + assert_array_almost_equal(m4, np.dot(m2, m2)) + assert_array_almost_equal(np.dot(mi, m), np.eye(2)) + + def test_scalar_type_pow(self): + m = matrix([[1, 2], [3, 4]]) + for scalar_t in [np.int8, np.uint8]: + two = scalar_t(2) + assert_array_almost_equal(m ** 2, m ** two) + + def test_notimplemented(self): + '''Check that 'not implemented' operations produce a failure.''' + A = matrix([[1., 2.], + [3., 4.]]) + + # __rpow__ + with assert_raises(TypeError): + 1.0**A + + # __mul__ with something not a list, ndarray, tuple, or scalar + with assert_raises(TypeError): + A*object() + + +class TestMatrixReturn: + def test_instance_methods(self): + a = matrix([1.0], dtype='f8') + methodargs = { + 'astype': ('intc',), + 'clip': (0.0, 1.0), + 'compress': ([1],), + 'repeat': (1,), + 'reshape': (1,), + 'swapaxes': (0, 0), + 'dot': np.array([1.0]), + } + excluded_methods = [ + 'argmin', 'choose', 'dump', 'dumps', 'fill', 'getfield', + 'getA', 'getA1', 'item', 'nonzero', 'put', 'putmask', 'resize', + 'searchsorted', 'setflags', 'setfield', 'sort', + 'partition', 'argpartition', + 'take', 'tofile', 'tolist', 'tostring', 'tobytes', 'all', 'any', + 'sum', 'argmax', 'argmin', 'min', 'max', 'mean', 'var', 'ptp', + 'prod', 'std', 'ctypes', 'itemset', + ] + for attrib in dir(a): + if attrib.startswith('_') or attrib in excluded_methods: + continue + f = getattr(a, attrib) + if isinstance(f, collections.abc.Callable): + # reset contents of a + a.astype('f8') + a.fill(1.0) + if attrib in methodargs: + args = methodargs[attrib] + else: + args = () + b = f(*args) + assert_(type(b) is matrix, "%s" % attrib) + assert_(type(a.real) is matrix) + assert_(type(a.imag) is matrix) + c, d = matrix([0.0]).nonzero() + assert_(type(c) is np.ndarray) + assert_(type(d) is np.ndarray) + + +class TestIndexing: + def test_basic(self): + x = asmatrix(np.zeros((3, 2), float)) + y = np.zeros((3, 1), float) + y[:, 0] = [0.8, 0.2, 0.3] + x[:, 1] = y > 0.5 + assert_equal(x, [[0, 1], [0, 0], [0, 0]]) + + +class TestNewScalarIndexing: + a = matrix([[1, 2], [3, 4]]) + + def test_dimesions(self): + a = self.a + x = a[0] + assert_equal(x.ndim, 2) + + def test_array_from_matrix_list(self): + a = self.a + x = np.array([a, a]) + assert_equal(x.shape, [2, 2, 2]) + + def test_array_to_list(self): + a = self.a + assert_equal(a.tolist(), [[1, 2], [3, 4]]) + + def test_fancy_indexing(self): + a = self.a + x = a[1, [0, 1, 0]] + assert_(isinstance(x, matrix)) + assert_equal(x, matrix([[3, 4, 3]])) + x = a[[1, 0]] + assert_(isinstance(x, matrix)) + assert_equal(x, matrix([[3, 4], [1, 2]])) + x = a[[[1], [0]], [[1, 0], [0, 1]]] + assert_(isinstance(x, matrix)) + assert_equal(x, matrix([[4, 3], [1, 2]])) + + def test_matrix_element(self): + x = matrix([[1, 2, 3], [4, 5, 6]]) + assert_equal(x[0][0], matrix([[1, 2, 3]])) + assert_equal(x[0][0].shape, (1, 3)) + assert_equal(x[0].shape, (1, 3)) + assert_equal(x[:, 0].shape, (2, 1)) + + x = matrix(0) + assert_equal(x[0, 0], 0) + assert_equal(x[0], 0) + assert_equal(x[:, 0].shape, x.shape) + + def test_scalar_indexing(self): + x = asmatrix(np.zeros((3, 2), float)) + assert_equal(x[0, 0], x[0][0]) + + def test_row_column_indexing(self): + x = asmatrix(np.eye(2)) + assert_array_equal(x[0,:], [[1, 0]]) + assert_array_equal(x[1,:], [[0, 1]]) + assert_array_equal(x[:, 0], [[1], [0]]) + assert_array_equal(x[:, 1], [[0], [1]]) + + def test_boolean_indexing(self): + A = np.arange(6) + A.shape = (3, 2) + x = asmatrix(A) + assert_array_equal(x[:, np.array([True, False])], x[:, 0]) + assert_array_equal(x[np.array([True, False, False]),:], x[0,:]) + + def test_list_indexing(self): + A = np.arange(6) + A.shape = (3, 2) + x = asmatrix(A) + assert_array_equal(x[:, [1, 0]], x[:, ::-1]) + assert_array_equal(x[[2, 1, 0],:], x[::-1,:]) + + +class TestPower: + def test_returntype(self): + a = np.array([[0, 1], [0, 0]]) + assert_(type(matrix_power(a, 2)) is np.ndarray) + a = mat(a) + assert_(type(matrix_power(a, 2)) is matrix) + + def test_list(self): + assert_array_equal(matrix_power([[0, 1], [0, 0]], 2), [[0, 0], [0, 0]]) + + +class TestShape: + + a = np.array([[1], [2]]) + m = matrix([[1], [2]]) + + def test_shape(self): + assert_equal(self.a.shape, (2, 1)) + assert_equal(self.m.shape, (2, 1)) + + def test_numpy_ravel(self): + assert_equal(np.ravel(self.a).shape, (2,)) + assert_equal(np.ravel(self.m).shape, (2,)) + + def test_member_ravel(self): + assert_equal(self.a.ravel().shape, (2,)) + assert_equal(self.m.ravel().shape, (1, 2)) + + def test_member_flatten(self): + assert_equal(self.a.flatten().shape, (2,)) + assert_equal(self.m.flatten().shape, (1, 2)) + + def test_numpy_ravel_order(self): + x = np.array([[1, 2, 3], [4, 5, 6]]) + assert_equal(np.ravel(x), [1, 2, 3, 4, 5, 6]) + assert_equal(np.ravel(x, order='F'), [1, 4, 2, 5, 3, 6]) + assert_equal(np.ravel(x.T), [1, 4, 2, 5, 3, 6]) + assert_equal(np.ravel(x.T, order='A'), [1, 2, 3, 4, 5, 6]) + x = matrix([[1, 2, 3], [4, 5, 6]]) + assert_equal(np.ravel(x), [1, 2, 3, 4, 5, 6]) + assert_equal(np.ravel(x, order='F'), [1, 4, 2, 5, 3, 6]) + assert_equal(np.ravel(x.T), [1, 4, 2, 5, 3, 6]) + assert_equal(np.ravel(x.T, order='A'), [1, 2, 3, 4, 5, 6]) + + def test_matrix_ravel_order(self): + x = matrix([[1, 2, 3], [4, 5, 6]]) + assert_equal(x.ravel(), [[1, 2, 3, 4, 5, 6]]) + assert_equal(x.ravel(order='F'), [[1, 4, 2, 5, 3, 6]]) + assert_equal(x.T.ravel(), [[1, 4, 2, 5, 3, 6]]) + assert_equal(x.T.ravel(order='A'), [[1, 2, 3, 4, 5, 6]]) + + def test_array_memory_sharing(self): + assert_(np.may_share_memory(self.a, self.a.ravel())) + assert_(not np.may_share_memory(self.a, self.a.flatten())) + + def test_matrix_memory_sharing(self): + assert_(np.may_share_memory(self.m, self.m.ravel())) + assert_(not np.may_share_memory(self.m, self.m.flatten())) + + def test_expand_dims_matrix(self): + # matrices are always 2d - so expand_dims only makes sense when the + # type is changed away from matrix. + a = np.arange(10).reshape((2, 5)).view(np.matrix) + expanded = np.expand_dims(a, axis=1) + assert_equal(expanded.ndim, 3) + assert_(not isinstance(expanded, np.matrix)) diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_interaction.py b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_interaction.py new file mode 100644 index 0000000..5154bd6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_interaction.py @@ -0,0 +1,354 @@ +"""Tests of interaction of matrix with other parts of numpy. + +Note that tests with MaskedArray and linalg are done in separate files. +""" +import pytest + +import textwrap +import warnings + +import numpy as np +from numpy.testing import (assert_, assert_equal, assert_raises, + assert_raises_regex, assert_array_equal, + assert_almost_equal, assert_array_almost_equal) + + +def test_fancy_indexing(): + # The matrix class messes with the shape. While this is always + # weird (getitem is not used, it does not have setitem nor knows + # about fancy indexing), this tests gh-3110 + # 2018-04-29: moved here from core.tests.test_index. + m = np.matrix([[1, 2], [3, 4]]) + + assert_(isinstance(m[[0, 1, 0], :], np.matrix)) + + # gh-3110. Note the transpose currently because matrices do *not* + # support dimension fixing for fancy indexing correctly. + x = np.asmatrix(np.arange(50).reshape(5, 10)) + assert_equal(x[:2, np.array(-1)], x[:2, -1].T) + + +def test_polynomial_mapdomain(): + # test that polynomial preserved matrix subtype. + # 2018-04-29: moved here from polynomial.tests.polyutils. + dom1 = [0, 4] + dom2 = [1, 3] + x = np.matrix([dom1, dom1]) + res = np.polynomial.polyutils.mapdomain(x, dom1, dom2) + assert_(isinstance(res, np.matrix)) + + +def test_sort_matrix_none(): + # 2018-04-29: moved here from core.tests.test_multiarray + a = np.matrix([[2, 1, 0]]) + actual = np.sort(a, axis=None) + expected = np.matrix([[0, 1, 2]]) + assert_equal(actual, expected) + assert_(type(expected) is np.matrix) + + +def test_partition_matrix_none(): + # gh-4301 + # 2018-04-29: moved here from core.tests.test_multiarray + a = np.matrix([[2, 1, 0]]) + actual = np.partition(a, 1, axis=None) + expected = np.matrix([[0, 1, 2]]) + assert_equal(actual, expected) + assert_(type(expected) is np.matrix) + + +def test_dot_scalar_and_matrix_of_objects(): + # Ticket #2469 + # 2018-04-29: moved here from core.tests.test_multiarray + arr = np.matrix([1, 2], dtype=object) + desired = np.matrix([[3, 6]], dtype=object) + assert_equal(np.dot(arr, 3), desired) + assert_equal(np.dot(3, arr), desired) + + +def test_inner_scalar_and_matrix(): + # 2018-04-29: moved here from core.tests.test_multiarray + for dt in np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + '?': + sca = np.array(3, dtype=dt)[()] + arr = np.matrix([[1, 2], [3, 4]], dtype=dt) + desired = np.matrix([[3, 6], [9, 12]], dtype=dt) + assert_equal(np.inner(arr, sca), desired) + assert_equal(np.inner(sca, arr), desired) + + +def test_inner_scalar_and_matrix_of_objects(): + # Ticket #4482 + # 2018-04-29: moved here from core.tests.test_multiarray + arr = np.matrix([1, 2], dtype=object) + desired = np.matrix([[3, 6]], dtype=object) + assert_equal(np.inner(arr, 3), desired) + assert_equal(np.inner(3, arr), desired) + + +def test_iter_allocate_output_subtype(): + # Make sure that the subtype with priority wins + # 2018-04-29: moved here from core.tests.test_nditer, given the + # matrix specific shape test. + + # matrix vs ndarray + a = np.matrix([[1, 2], [3, 4]]) + b = np.arange(4).reshape(2, 2).T + i = np.nditer([a, b, None], [], + [['readonly'], ['readonly'], ['writeonly', 'allocate']]) + assert_(type(i.operands[2]) is np.matrix) + assert_(type(i.operands[2]) is not np.ndarray) + assert_equal(i.operands[2].shape, (2, 2)) + + # matrix always wants things to be 2D + b = np.arange(4).reshape(1, 2, 2) + assert_raises(RuntimeError, np.nditer, [a, b, None], [], + [['readonly'], ['readonly'], ['writeonly', 'allocate']]) + # but if subtypes are disabled, the result can still work + i = np.nditer([a, b, None], [], + [['readonly'], ['readonly'], + ['writeonly', 'allocate', 'no_subtype']]) + assert_(type(i.operands[2]) is np.ndarray) + assert_(type(i.operands[2]) is not np.matrix) + assert_equal(i.operands[2].shape, (1, 2, 2)) + + +def like_function(): + # 2018-04-29: moved here from core.tests.test_numeric + a = np.matrix([[1, 2], [3, 4]]) + for like_function in np.zeros_like, np.ones_like, np.empty_like: + b = like_function(a) + assert_(type(b) is np.matrix) + + c = like_function(a, subok=False) + assert_(type(c) is not np.matrix) + + +def test_array_astype(): + # 2018-04-29: copied here from core.tests.test_api + # subok=True passes through a matrix + a = np.matrix([[0, 1, 2], [3, 4, 5]], dtype='f4') + b = a.astype('f4', subok=True, copy=False) + assert_(a is b) + + # subok=True is default, and creates a subtype on a cast + b = a.astype('i4', copy=False) + assert_equal(a, b) + assert_equal(type(b), np.matrix) + + # subok=False never returns a matrix + b = a.astype('f4', subok=False, copy=False) + assert_equal(a, b) + assert_(not (a is b)) + assert_(type(b) is not np.matrix) + + +def test_stack(): + # 2018-04-29: copied here from core.tests.test_shape_base + # check np.matrix cannot be stacked + m = np.matrix([[1, 2], [3, 4]]) + assert_raises_regex(ValueError, 'shape too large to be a matrix', + np.stack, [m, m]) + + +def test_object_scalar_multiply(): + # Tickets #2469 and #4482 + # 2018-04-29: moved here from core.tests.test_ufunc + arr = np.matrix([1, 2], dtype=object) + desired = np.matrix([[3, 6]], dtype=object) + assert_equal(np.multiply(arr, 3), desired) + assert_equal(np.multiply(3, arr), desired) + + +def test_nanfunctions_matrices(): + # Check that it works and that type and + # shape are preserved + # 2018-04-29: moved here from core.tests.test_nanfunctions + mat = np.matrix(np.eye(3)) + for f in [np.nanmin, np.nanmax]: + res = f(mat, axis=0) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (1, 3)) + res = f(mat, axis=1) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (3, 1)) + res = f(mat) + assert_(np.isscalar(res)) + # check that rows of nan are dealt with for subclasses (#4628) + mat[1] = np.nan + for f in [np.nanmin, np.nanmax]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + res = f(mat, axis=0) + assert_(isinstance(res, np.matrix)) + assert_(not np.any(np.isnan(res))) + assert_(len(w) == 0) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + res = f(mat, axis=1) + assert_(isinstance(res, np.matrix)) + assert_(np.isnan(res[1, 0]) and not np.isnan(res[0, 0]) + and not np.isnan(res[2, 0])) + assert_(len(w) == 1, 'no warning raised') + assert_(issubclass(w[0].category, RuntimeWarning)) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + res = f(mat) + assert_(np.isscalar(res)) + assert_(res != np.nan) + assert_(len(w) == 0) + + +def test_nanfunctions_matrices_general(): + # Check that it works and that type and + # shape are preserved + # 2018-04-29: moved here from core.tests.test_nanfunctions + mat = np.matrix(np.eye(3)) + for f in (np.nanargmin, np.nanargmax, np.nansum, np.nanprod, + np.nanmean, np.nanvar, np.nanstd): + res = f(mat, axis=0) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (1, 3)) + res = f(mat, axis=1) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (3, 1)) + res = f(mat) + assert_(np.isscalar(res)) + + for f in np.nancumsum, np.nancumprod: + res = f(mat, axis=0) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (3, 3)) + res = f(mat, axis=1) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (3, 3)) + res = f(mat) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (1, 3*3)) + + +def test_average_matrix(): + # 2018-04-29: moved here from core.tests.test_function_base. + y = np.matrix(np.random.rand(5, 5)) + assert_array_equal(y.mean(0), np.average(y, 0)) + + a = np.matrix([[1, 2], [3, 4]]) + w = np.matrix([[1, 2], [3, 4]]) + + r = np.average(a, axis=0, weights=w) + assert_equal(type(r), np.matrix) + assert_equal(r, [[2.5, 10.0/3]]) + + +def test_trapz_matrix(): + # Test to make sure matrices give the same answer as ndarrays + # 2018-04-29: moved here from core.tests.test_function_base. + x = np.linspace(0, 5) + y = x * x + r = np.trapz(y, x) + mx = np.matrix(x) + my = np.matrix(y) + mr = np.trapz(my, mx) + assert_almost_equal(mr, r) + + +def test_ediff1d_matrix(): + # 2018-04-29: moved here from core.tests.test_arraysetops. + assert(isinstance(np.ediff1d(np.matrix(1)), np.matrix)) + assert(isinstance(np.ediff1d(np.matrix(1), to_begin=1), np.matrix)) + + +def test_apply_along_axis_matrix(): + # this test is particularly malicious because matrix + # refuses to become 1d + # 2018-04-29: moved here from core.tests.test_shape_base. + def double(row): + return row * 2 + + m = np.matrix([[0, 1], [2, 3]]) + expected = np.matrix([[0, 2], [4, 6]]) + + result = np.apply_along_axis(double, 0, m) + assert_(isinstance(result, np.matrix)) + assert_array_equal(result, expected) + + result = np.apply_along_axis(double, 1, m) + assert_(isinstance(result, np.matrix)) + assert_array_equal(result, expected) + + +def test_kron_matrix(): + # 2018-04-29: moved here from core.tests.test_shape_base. + a = np.ones([2, 2]) + m = np.asmatrix(a) + assert_equal(type(np.kron(a, a)), np.ndarray) + assert_equal(type(np.kron(m, m)), np.matrix) + assert_equal(type(np.kron(a, m)), np.matrix) + assert_equal(type(np.kron(m, a)), np.matrix) + + +class TestConcatenatorMatrix: + # 2018-04-29: moved here from core.tests.test_index_tricks. + def test_matrix(self): + a = [1, 2] + b = [3, 4] + + ab_r = np.r_['r', a, b] + ab_c = np.r_['c', a, b] + + assert_equal(type(ab_r), np.matrix) + assert_equal(type(ab_c), np.matrix) + + assert_equal(np.array(ab_r), [[1, 2, 3, 4]]) + assert_equal(np.array(ab_c), [[1], [2], [3], [4]]) + + assert_raises(ValueError, lambda: np.r_['rc', a, b]) + + def test_matrix_scalar(self): + r = np.r_['r', [1, 2], 3] + assert_equal(type(r), np.matrix) + assert_equal(np.array(r), [[1, 2, 3]]) + + def test_matrix_builder(self): + a = np.array([1]) + b = np.array([2]) + c = np.array([3]) + d = np.array([4]) + actual = np.r_['a, b; c, d'] + expected = np.bmat([[a, b], [c, d]]) + + assert_equal(actual, expected) + assert_equal(type(actual), type(expected)) + + +def test_array_equal_error_message_matrix(): + # 2018-04-29: moved here from testing.tests.test_utils. + with pytest.raises(AssertionError) as exc_info: + assert_equal(np.array([1, 2]), np.matrix([1, 2])) + msg = str(exc_info.value) + msg_reference = textwrap.dedent("""\ + + Arrays are not equal + + (shapes (2,), (1, 2) mismatch) + x: array([1, 2]) + y: matrix([[1, 2]])""") + assert_equal(msg, msg_reference) + + +def test_array_almost_equal_matrix(): + # Matrix slicing keeps things 2-D, while array does not necessarily. + # See gh-8452. + # 2018-04-29: moved here from testing.tests.test_utils. + m1 = np.matrix([[1., 2.]]) + m2 = np.matrix([[1., np.nan]]) + m3 = np.matrix([[1., -np.inf]]) + m4 = np.matrix([[np.nan, np.inf]]) + m5 = np.matrix([[1., 2.], [np.nan, np.inf]]) + for assert_func in assert_array_almost_equal, assert_almost_equal: + for m in m1, m2, m3, m4, m5: + assert_func(m, m) + a = np.array(m) + assert_func(a, m) + assert_func(m, a) diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_masked_matrix.py b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_masked_matrix.py new file mode 100644 index 0000000..95d3f44 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_masked_matrix.py @@ -0,0 +1,231 @@ +import numpy as np +from numpy.testing import assert_warns +from numpy.ma.testutils import (assert_, assert_equal, assert_raises, + assert_array_equal) +from numpy.ma.core import (masked_array, masked_values, masked, allequal, + MaskType, getmask, MaskedArray, nomask, + log, add, hypot, divide) +from numpy.ma.extras import mr_ +from numpy.compat import pickle + + +class MMatrix(MaskedArray, np.matrix,): + + def __new__(cls, data, mask=nomask): + mat = np.matrix(data) + _data = MaskedArray.__new__(cls, data=mat, mask=mask) + return _data + + def __array_finalize__(self, obj): + np.matrix.__array_finalize__(self, obj) + MaskedArray.__array_finalize__(self, obj) + return + + @property + def _series(self): + _view = self.view(MaskedArray) + _view._sharedmask = False + return _view + + +class TestMaskedMatrix: + def test_matrix_indexing(self): + # Tests conversions and indexing + x1 = np.matrix([[1, 2, 3], [4, 3, 2]]) + x2 = masked_array(x1, mask=[[1, 0, 0], [0, 1, 0]]) + x3 = masked_array(x1, mask=[[0, 1, 0], [1, 0, 0]]) + x4 = masked_array(x1) + # test conversion to strings + str(x2) # raises? + repr(x2) # raises? + # tests of indexing + assert_(type(x2[1, 0]) is type(x1[1, 0])) + assert_(x1[1, 0] == x2[1, 0]) + assert_(x2[1, 1] is masked) + assert_equal(x1[0, 2], x2[0, 2]) + assert_equal(x1[0, 1:], x2[0, 1:]) + assert_equal(x1[:, 2], x2[:, 2]) + assert_equal(x1[:], x2[:]) + assert_equal(x1[1:], x3[1:]) + x1[0, 2] = 9 + x2[0, 2] = 9 + assert_equal(x1, x2) + x1[0, 1:] = 99 + x2[0, 1:] = 99 + assert_equal(x1, x2) + x2[0, 1] = masked + assert_equal(x1, x2) + x2[0, 1:] = masked + assert_equal(x1, x2) + x2[0, :] = x1[0, :] + x2[0, 1] = masked + assert_(allequal(getmask(x2), np.array([[0, 1, 0], [0, 1, 0]]))) + x3[1, :] = masked_array([1, 2, 3], [1, 1, 0]) + assert_(allequal(getmask(x3)[1], masked_array([1, 1, 0]))) + assert_(allequal(getmask(x3[1]), masked_array([1, 1, 0]))) + x4[1, :] = masked_array([1, 2, 3], [1, 1, 0]) + assert_(allequal(getmask(x4[1]), masked_array([1, 1, 0]))) + assert_(allequal(x4[1], masked_array([1, 2, 3]))) + x1 = np.matrix(np.arange(5) * 1.0) + x2 = masked_values(x1, 3.0) + assert_equal(x1, x2) + assert_(allequal(masked_array([0, 0, 0, 1, 0], dtype=MaskType), + x2.mask)) + assert_equal(3.0, x2.fill_value) + + def test_pickling_subbaseclass(self): + # Test pickling w/ a subclass of ndarray + a = masked_array(np.matrix(list(range(10))), mask=[1, 0, 1, 0, 0] * 2) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + a_pickled = pickle.loads(pickle.dumps(a, protocol=proto)) + assert_equal(a_pickled._mask, a._mask) + assert_equal(a_pickled, a) + assert_(isinstance(a_pickled._data, np.matrix)) + + def test_count_mean_with_matrix(self): + m = masked_array(np.matrix([[1, 2], [3, 4]]), mask=np.zeros((2, 2))) + + assert_equal(m.count(axis=0).shape, (1, 2)) + assert_equal(m.count(axis=1).shape, (2, 1)) + + # Make sure broadcasting inside mean and var work + assert_equal(m.mean(axis=0), [[2., 3.]]) + assert_equal(m.mean(axis=1), [[1.5], [3.5]]) + + def test_flat(self): + # Test that flat can return items even for matrices [#4585, #4615] + # test simple access + test = masked_array(np.matrix([[1, 2, 3]]), mask=[0, 0, 1]) + assert_equal(test.flat[1], 2) + assert_equal(test.flat[2], masked) + assert_(np.all(test.flat[0:2] == test[0, 0:2])) + # Test flat on masked_matrices + test = masked_array(np.matrix([[1, 2, 3]]), mask=[0, 0, 1]) + test.flat = masked_array([3, 2, 1], mask=[1, 0, 0]) + control = masked_array(np.matrix([[3, 2, 1]]), mask=[1, 0, 0]) + assert_equal(test, control) + # Test setting + test = masked_array(np.matrix([[1, 2, 3]]), mask=[0, 0, 1]) + testflat = test.flat + testflat[:] = testflat[[2, 1, 0]] + assert_equal(test, control) + testflat[0] = 9 + # test that matrices keep the correct shape (#4615) + a = masked_array(np.matrix(np.eye(2)), mask=0) + b = a.flat + b01 = b[:2] + assert_equal(b01.data, np.array([[1., 0.]])) + assert_equal(b01.mask, np.array([[False, False]])) + + def test_allany_onmatrices(self): + x = np.array([[0.13, 0.26, 0.90], + [0.28, 0.33, 0.63], + [0.31, 0.87, 0.70]]) + X = np.matrix(x) + m = np.array([[True, False, False], + [False, False, False], + [True, True, False]], dtype=np.bool_) + mX = masked_array(X, mask=m) + mXbig = (mX > 0.5) + mXsmall = (mX < 0.5) + + assert_(not mXbig.all()) + assert_(mXbig.any()) + assert_equal(mXbig.all(0), np.matrix([False, False, True])) + assert_equal(mXbig.all(1), np.matrix([False, False, True]).T) + assert_equal(mXbig.any(0), np.matrix([False, False, True])) + assert_equal(mXbig.any(1), np.matrix([True, True, True]).T) + + assert_(not mXsmall.all()) + assert_(mXsmall.any()) + assert_equal(mXsmall.all(0), np.matrix([True, True, False])) + assert_equal(mXsmall.all(1), np.matrix([False, False, False]).T) + assert_equal(mXsmall.any(0), np.matrix([True, True, False])) + assert_equal(mXsmall.any(1), np.matrix([True, True, False]).T) + + def test_compressed(self): + a = masked_array(np.matrix([1, 2, 3, 4]), mask=[0, 0, 0, 0]) + b = a.compressed() + assert_equal(b, a) + assert_(isinstance(b, np.matrix)) + a[0, 0] = masked + b = a.compressed() + assert_equal(b, [[2, 3, 4]]) + + def test_ravel(self): + a = masked_array(np.matrix([1, 2, 3, 4, 5]), mask=[[0, 1, 0, 0, 0]]) + aravel = a.ravel() + assert_equal(aravel.shape, (1, 5)) + assert_equal(aravel._mask.shape, a.shape) + + def test_view(self): + # Test view w/ flexible dtype + iterator = list(zip(np.arange(10), np.random.rand(10))) + data = np.array(iterator) + a = masked_array(iterator, dtype=[('a', float), ('b', float)]) + a.mask[0] = (1, 0) + test = a.view((float, 2), np.matrix) + assert_equal(test, data) + assert_(isinstance(test, np.matrix)) + assert_(not isinstance(test, MaskedArray)) + + +class TestSubclassing: + # Test suite for masked subclasses of ndarray. + + def setup(self): + x = np.arange(5, dtype='float') + mx = MMatrix(x, mask=[0, 1, 0, 0, 0]) + self.data = (x, mx) + + def test_maskedarray_subclassing(self): + # Tests subclassing MaskedArray + (x, mx) = self.data + assert_(isinstance(mx._data, np.matrix)) + + def test_masked_unary_operations(self): + # Tests masked_unary_operation + (x, mx) = self.data + with np.errstate(divide='ignore'): + assert_(isinstance(log(mx), MMatrix)) + assert_equal(log(x), np.log(x)) + + def test_masked_binary_operations(self): + # Tests masked_binary_operation + (x, mx) = self.data + # Result should be a MMatrix + assert_(isinstance(add(mx, mx), MMatrix)) + assert_(isinstance(add(mx, x), MMatrix)) + # Result should work + assert_equal(add(mx, x), mx+x) + assert_(isinstance(add(mx, mx)._data, np.matrix)) + with assert_warns(DeprecationWarning): + assert_(isinstance(add.outer(mx, mx), MMatrix)) + assert_(isinstance(hypot(mx, mx), MMatrix)) + assert_(isinstance(hypot(mx, x), MMatrix)) + + def test_masked_binary_operations2(self): + # Tests domained_masked_binary_operation + (x, mx) = self.data + xmx = masked_array(mx.data.__array__(), mask=mx.mask) + assert_(isinstance(divide(mx, mx), MMatrix)) + assert_(isinstance(divide(mx, x), MMatrix)) + assert_equal(divide(mx, mx), divide(xmx, xmx)) + +class TestConcatenator: + # Tests for mr_, the equivalent of r_ for masked arrays. + + def test_matrix_builder(self): + assert_raises(np.ma.MAError, lambda: mr_['1, 2; 3, 4']) + + def test_matrix(self): + # Test consistency with unmasked version. If we ever deprecate + # matrix, this test should either still pass, or both actual and + # expected should fail to be build. + actual = mr_['r', 1, 2, 3] + expected = np.ma.array(np.r_['r', 1, 2, 3]) + assert_array_equal(actual, expected) + + # outer type is masked array, inner type is matrix + assert_equal(type(actual), type(expected)) + assert_equal(type(actual.data), type(expected.data)) diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_matrix_linalg.py b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_matrix_linalg.py new file mode 100644 index 0000000..106c2e3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_matrix_linalg.py @@ -0,0 +1,93 @@ +""" Test functions for linalg module using the matrix class.""" +import numpy as np + +from numpy.linalg.tests.test_linalg import ( + LinalgCase, apply_tag, TestQR as _TestQR, LinalgTestCase, + _TestNorm2D, _TestNormDoubleBase, _TestNormSingleBase, _TestNormInt64Base, + SolveCases, InvCases, EigvalsCases, EigCases, SVDCases, CondCases, + PinvCases, DetCases, LstsqCases) + + +CASES = [] + +# square test cases +CASES += apply_tag('square', [ + LinalgCase("0x0_matrix", + np.empty((0, 0), dtype=np.double).view(np.matrix), + np.empty((0, 1), dtype=np.double).view(np.matrix), + tags={'size-0'}), + LinalgCase("matrix_b_only", + np.array([[1., 2.], [3., 4.]]), + np.matrix([2., 1.]).T), + LinalgCase("matrix_a_and_b", + np.matrix([[1., 2.], [3., 4.]]), + np.matrix([2., 1.]).T), +]) + +# hermitian test-cases +CASES += apply_tag('hermitian', [ + LinalgCase("hmatrix_a_and_b", + np.matrix([[1., 2.], [2., 1.]]), + None), +]) +# No need to make generalized or strided cases for matrices. + + +class MatrixTestCase(LinalgTestCase): + TEST_CASES = CASES + + +class TestSolveMatrix(SolveCases, MatrixTestCase): + pass + + +class TestInvMatrix(InvCases, MatrixTestCase): + pass + + +class TestEigvalsMatrix(EigvalsCases, MatrixTestCase): + pass + + +class TestEigMatrix(EigCases, MatrixTestCase): + pass + + +class TestSVDMatrix(SVDCases, MatrixTestCase): + pass + + +class TestCondMatrix(CondCases, MatrixTestCase): + pass + + +class TestPinvMatrix(PinvCases, MatrixTestCase): + pass + + +class TestDetMatrix(DetCases, MatrixTestCase): + pass + + +class TestLstsqMatrix(LstsqCases, MatrixTestCase): + pass + + +class _TestNorm2DMatrix(_TestNorm2D): + array = np.matrix + + +class TestNormDoubleMatrix(_TestNorm2DMatrix, _TestNormDoubleBase): + pass + + +class TestNormSingleMatrix(_TestNorm2DMatrix, _TestNormSingleBase): + pass + + +class TestNormInt64Matrix(_TestNorm2DMatrix, _TestNormInt64Base): + pass + + +class TestQRMatrix(_TestQR): + array = np.matrix diff --git a/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_multiarray.py b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_multiarray.py new file mode 100644 index 0000000..638d0d1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/matrixlib/tests/test_multiarray.py @@ -0,0 +1,16 @@ +import numpy as np +from numpy.testing import assert_, assert_equal, assert_array_equal + +class TestView: + def test_type(self): + x = np.array([1, 2, 3]) + assert_(isinstance(x.view(np.matrix), np.matrix)) + + def test_keywords(self): + x = np.array([(1, 2)], dtype=[('a', np.int8), ('b', np.int8)]) + # We must be specific about the endianness here: + y = x.view(dtype='>> from numpy.polynomial import Chebyshev + >>> c = Chebyshev.fit(xdata, ydata, deg=1) + +is preferred over the `chebyshev.chebfit` function from the +``np.polynomial.chebyshev`` module:: + + >>> from numpy.polynomial.chebyshev import chebfit + >>> c = chebfit(xdata, ydata, deg=1) + +See :doc:`routines.polynomials.classes` for more details. + +Convenience Classes +=================== + +The following lists the various constants and methods common to all of +the classes representing the various kinds of polynomials. In the following, +the term ``Poly`` represents any one of the convenience classes (e.g. +`~polynomial.Polynomial`, `~chebyshev.Chebyshev`, `~hermite.Hermite`, etc.) +while the lowercase ``p`` represents an **instance** of a polynomial class. + +Constants +--------- + +- ``Poly.domain`` -- Default domain +- ``Poly.window`` -- Default window +- ``Poly.basis_name`` -- String used to represent the basis +- ``Poly.maxpower`` -- Maximum value ``n`` such that ``p**n`` is allowed +- ``Poly.nickname`` -- String used in printing + +Creation +-------- + +Methods for creating polynomial instances. + +- ``Poly.basis(degree)`` -- Basis polynomial of given degree +- ``Poly.identity()`` -- ``p`` where ``p(x) = x`` for all ``x`` +- ``Poly.fit(x, y, deg)`` -- ``p`` of degree ``deg`` with coefficients + determined by the least-squares fit to the data ``x``, ``y`` +- ``Poly.fromroots(roots)`` -- ``p`` with specified roots +- ``p.copy()`` -- Create a copy of ``p`` + +Conversion +---------- + +Methods for converting a polynomial instance of one kind to another. + +- ``p.cast(Poly)`` -- Convert ``p`` to instance of kind ``Poly`` +- ``p.convert(Poly)`` -- Convert ``p`` to instance of kind ``Poly`` or map + between ``domain`` and ``window`` + +Calculus +-------- +- ``p.deriv()`` -- Take the derivative of ``p`` +- ``p.integ()`` -- Integrate ``p`` + +Validation +---------- +- ``Poly.has_samecoef(p1, p2)`` -- Check if coefficients match +- ``Poly.has_samedomain(p1, p2)`` -- Check if domains match +- ``Poly.has_sametype(p1, p2)`` -- Check if types match +- ``Poly.has_samewindow(p1, p2)`` -- Check if windows match + +Misc +---- +- ``p.linspace()`` -- Return ``x, p(x)`` at equally-spaced points in ``domain`` +- ``p.mapparms()`` -- Return the parameters for the linear mapping between + ``domain`` and ``window``. +- ``p.roots()`` -- Return the roots of `p`. +- ``p.trim()`` -- Remove trailing coefficients. +- ``p.cutdeg(degree)`` -- Truncate p to given degree +- ``p.truncate(size)`` -- Truncate p to given size + +""" +from .polynomial import Polynomial +from .chebyshev import Chebyshev +from .legendre import Legendre +from .hermite import Hermite +from .hermite_e import HermiteE +from .laguerre import Laguerre + +__all__ = [ + "set_default_printstyle", + "polynomial", "Polynomial", + "chebyshev", "Chebyshev", + "legendre", "Legendre", + "hermite", "Hermite", + "hermite_e", "HermiteE", + "laguerre", "Laguerre", +] + + +def set_default_printstyle(style): + """ + Set the default format for the string representation of polynomials. + + Values for ``style`` must be valid inputs to ``__format__``, i.e. 'ascii' + or 'unicode'. + + Parameters + ---------- + style : str + Format string for default printing style. Must be either 'ascii' or + 'unicode'. + + Notes + ----- + The default format depends on the platform: 'unicode' is used on + Unix-based systems and 'ascii' on Windows. This determination is based on + default font support for the unicode superscript and subscript ranges. + + Examples + -------- + >>> p = np.polynomial.Polynomial([1, 2, 3]) + >>> c = np.polynomial.Chebyshev([1, 2, 3]) + >>> np.polynomial.set_default_printstyle('unicode') + >>> print(p) + 1.0 + 2.0·x¹ + 3.0·x² + >>> print(c) + 1.0 + 2.0·T₁(x) + 3.0·T₂(x) + >>> np.polynomial.set_default_printstyle('ascii') + >>> print(p) + 1.0 + 2.0 x**1 + 3.0 x**2 + >>> print(c) + 1.0 + 2.0 T_1(x) + 3.0 T_2(x) + >>> # Formatting supersedes all class/package-level defaults + >>> print(f"{p:unicode}") + 1.0 + 2.0·x¹ + 3.0·x² + """ + if style not in ('unicode', 'ascii'): + raise ValueError( + f"Unsupported format string '{style}'. Valid options are 'ascii' " + f"and 'unicode'" + ) + _use_unicode = True + if style == 'ascii': + _use_unicode = False + from ._polybase import ABCPolyBase + ABCPolyBase._use_unicode = _use_unicode + + +from numpy._pytesttester import PytestTester +test = PytestTester(__name__) +del PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/__init__.pyi b/.local/lib/python3.8/site-packages/numpy/polynomial/__init__.pyi new file mode 100644 index 0000000..e0cfedd --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/__init__.pyi @@ -0,0 +1,24 @@ +from typing import List + +from numpy._pytesttester import PytestTester + +from numpy.polynomial import ( + chebyshev as chebyshev, + hermite as hermite, + hermite_e as hermite_e, + laguerre as laguerre, + legendre as legendre, + polynomial as polynomial, +) +from numpy.polynomial.chebyshev import Chebyshev as Chebyshev +from numpy.polynomial.hermite import Hermite as Hermite +from numpy.polynomial.hermite_e import HermiteE as HermiteE +from numpy.polynomial.laguerre import Laguerre as Laguerre +from numpy.polynomial.legendre import Legendre as Legendre +from numpy.polynomial.polynomial import Polynomial as Polynomial + +__all__: List[str] +__path__: List[str] +test: PytestTester + +def set_default_printstyle(style): ... diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..67566a0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/_polybase.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/_polybase.cpython-38.pyc new file mode 100644 index 0000000..fb81f18 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/_polybase.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/chebyshev.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/chebyshev.cpython-38.pyc new file mode 100644 index 0000000..1b4b082 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/chebyshev.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/hermite.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/hermite.cpython-38.pyc new file mode 100644 index 0000000..d333592 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/hermite.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/hermite_e.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/hermite_e.cpython-38.pyc new file mode 100644 index 0000000..4976258 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/hermite_e.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/laguerre.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/laguerre.cpython-38.pyc new file mode 100644 index 0000000..3a054c7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/laguerre.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/legendre.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/legendre.cpython-38.pyc new file mode 100644 index 0000000..163a5be Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/legendre.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/polynomial.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/polynomial.cpython-38.pyc new file mode 100644 index 0000000..4a1e8e5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/polynomial.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/polyutils.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/polyutils.cpython-38.pyc new file mode 100644 index 0000000..1e6de60 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/polyutils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..a7d0f4b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/_polybase.py b/.local/lib/python3.8/site-packages/numpy/polynomial/_polybase.py new file mode 100644 index 0000000..155d728 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/_polybase.py @@ -0,0 +1,1141 @@ +""" +Abstract base class for the various polynomial Classes. + +The ABCPolyBase class provides the methods needed to implement the common API +for the various polynomial classes. It operates as a mixin, but uses the +abc module from the stdlib, hence it is only available for Python >= 2.6. + +""" +import os +import abc +import numbers + +import numpy as np +from . import polyutils as pu + +__all__ = ['ABCPolyBase'] + +class ABCPolyBase(abc.ABC): + """An abstract base class for immutable series classes. + + ABCPolyBase provides the standard Python numerical methods + '+', '-', '*', '//', '%', 'divmod', '**', and '()' along with the + methods listed below. + + .. versionadded:: 1.9.0 + + Parameters + ---------- + coef : array_like + Series coefficients in order of increasing degree, i.e., + ``(1, 2, 3)`` gives ``1*P_0(x) + 2*P_1(x) + 3*P_2(x)``, where + ``P_i`` is the basis polynomials of degree ``i``. + domain : (2,) array_like, optional + Domain to use. The interval ``[domain[0], domain[1]]`` is mapped + to the interval ``[window[0], window[1]]`` by shifting and scaling. + The default value is the derived class domain. + window : (2,) array_like, optional + Window, see domain for its use. The default value is the + derived class window. + + Attributes + ---------- + coef : (N,) ndarray + Series coefficients in order of increasing degree. + domain : (2,) ndarray + Domain that is mapped to window. + window : (2,) ndarray + Window that domain is mapped to. + + Class Attributes + ---------------- + maxpower : int + Maximum power allowed, i.e., the largest number ``n`` such that + ``p(x)**n`` is allowed. This is to limit runaway polynomial size. + domain : (2,) ndarray + Default domain of the class. + window : (2,) ndarray + Default window of the class. + + """ + + # Not hashable + __hash__ = None + + # Opt out of numpy ufuncs and Python ops with ndarray subclasses. + __array_ufunc__ = None + + # Limit runaway size. T_n^m has degree n*m + maxpower = 100 + + # Unicode character mappings for improved __str__ + _superscript_mapping = str.maketrans({ + "0": "⁰", + "1": "¹", + "2": "²", + "3": "³", + "4": "⁴", + "5": "⁵", + "6": "⁶", + "7": "⁷", + "8": "⁸", + "9": "⁹" + }) + _subscript_mapping = str.maketrans({ + "0": "₀", + "1": "₁", + "2": "₂", + "3": "₃", + "4": "₄", + "5": "₅", + "6": "₆", + "7": "₇", + "8": "₈", + "9": "₉" + }) + # Some fonts don't support full unicode character ranges necessary for + # the full set of superscripts and subscripts, including common/default + # fonts in Windows shells/terminals. Therefore, default to ascii-only + # printing on windows. + _use_unicode = not os.name == 'nt' + + @property + @abc.abstractmethod + def domain(self): + pass + + @property + @abc.abstractmethod + def window(self): + pass + + @property + @abc.abstractmethod + def basis_name(self): + pass + + @staticmethod + @abc.abstractmethod + def _add(c1, c2): + pass + + @staticmethod + @abc.abstractmethod + def _sub(c1, c2): + pass + + @staticmethod + @abc.abstractmethod + def _mul(c1, c2): + pass + + @staticmethod + @abc.abstractmethod + def _div(c1, c2): + pass + + @staticmethod + @abc.abstractmethod + def _pow(c, pow, maxpower=None): + pass + + @staticmethod + @abc.abstractmethod + def _val(x, c): + pass + + @staticmethod + @abc.abstractmethod + def _int(c, m, k, lbnd, scl): + pass + + @staticmethod + @abc.abstractmethod + def _der(c, m, scl): + pass + + @staticmethod + @abc.abstractmethod + def _fit(x, y, deg, rcond, full): + pass + + @staticmethod + @abc.abstractmethod + def _line(off, scl): + pass + + @staticmethod + @abc.abstractmethod + def _roots(c): + pass + + @staticmethod + @abc.abstractmethod + def _fromroots(r): + pass + + def has_samecoef(self, other): + """Check if coefficients match. + + .. versionadded:: 1.6.0 + + Parameters + ---------- + other : class instance + The other class must have the ``coef`` attribute. + + Returns + ------- + bool : boolean + True if the coefficients are the same, False otherwise. + + """ + if len(self.coef) != len(other.coef): + return False + elif not np.all(self.coef == other.coef): + return False + else: + return True + + def has_samedomain(self, other): + """Check if domains match. + + .. versionadded:: 1.6.0 + + Parameters + ---------- + other : class instance + The other class must have the ``domain`` attribute. + + Returns + ------- + bool : boolean + True if the domains are the same, False otherwise. + + """ + return np.all(self.domain == other.domain) + + def has_samewindow(self, other): + """Check if windows match. + + .. versionadded:: 1.6.0 + + Parameters + ---------- + other : class instance + The other class must have the ``window`` attribute. + + Returns + ------- + bool : boolean + True if the windows are the same, False otherwise. + + """ + return np.all(self.window == other.window) + + def has_sametype(self, other): + """Check if types match. + + .. versionadded:: 1.7.0 + + Parameters + ---------- + other : object + Class instance. + + Returns + ------- + bool : boolean + True if other is same class as self + + """ + return isinstance(other, self.__class__) + + def _get_coefficients(self, other): + """Interpret other as polynomial coefficients. + + The `other` argument is checked to see if it is of the same + class as self with identical domain and window. If so, + return its coefficients, otherwise return `other`. + + .. versionadded:: 1.9.0 + + Parameters + ---------- + other : anything + Object to be checked. + + Returns + ------- + coef + The coefficients of`other` if it is a compatible instance, + of ABCPolyBase, otherwise `other`. + + Raises + ------ + TypeError + When `other` is an incompatible instance of ABCPolyBase. + + """ + if isinstance(other, ABCPolyBase): + if not isinstance(other, self.__class__): + raise TypeError("Polynomial types differ") + elif not np.all(self.domain == other.domain): + raise TypeError("Domains differ") + elif not np.all(self.window == other.window): + raise TypeError("Windows differ") + return other.coef + return other + + def __init__(self, coef, domain=None, window=None): + [coef] = pu.as_series([coef], trim=False) + self.coef = coef + + if domain is not None: + [domain] = pu.as_series([domain], trim=False) + if len(domain) != 2: + raise ValueError("Domain has wrong number of elements.") + self.domain = domain + + if window is not None: + [window] = pu.as_series([window], trim=False) + if len(window) != 2: + raise ValueError("Window has wrong number of elements.") + self.window = window + + def __repr__(self): + coef = repr(self.coef)[6:-1] + domain = repr(self.domain)[6:-1] + window = repr(self.window)[6:-1] + name = self.__class__.__name__ + return f"{name}({coef}, domain={domain}, window={window})" + + def __format__(self, fmt_str): + if fmt_str == '': + return self.__str__() + if fmt_str not in ('ascii', 'unicode'): + raise ValueError( + f"Unsupported format string '{fmt_str}' passed to " + f"{self.__class__}.__format__. Valid options are " + f"'ascii' and 'unicode'" + ) + if fmt_str == 'ascii': + return self._generate_string(self._str_term_ascii) + return self._generate_string(self._str_term_unicode) + + def __str__(self): + if self._use_unicode: + return self._generate_string(self._str_term_unicode) + return self._generate_string(self._str_term_ascii) + + def _generate_string(self, term_method): + """ + Generate the full string representation of the polynomial, using + ``term_method`` to generate each polynomial term. + """ + # Get configuration for line breaks + linewidth = np.get_printoptions().get('linewidth', 75) + if linewidth < 1: + linewidth = 1 + out = f"{self.coef[0]}" + for i, coef in enumerate(self.coef[1:]): + out += " " + power = str(i + 1) + # Polynomial coefficient + # The coefficient array can be an object array with elements that + # will raise a TypeError with >= 0 (e.g. strings or Python + # complex). In this case, represent the coefficient as-is. + try: + if coef >= 0: + next_term = f"+ {coef}" + else: + next_term = f"- {-coef}" + except TypeError: + next_term = f"+ {coef}" + # Polynomial term + next_term += term_method(power, "x") + # Length of the current line with next term added + line_len = len(out.split('\n')[-1]) + len(next_term) + # If not the last term in the polynomial, it will be two + # characters longer due to the +/- with the next term + if i < len(self.coef[1:]) - 1: + line_len += 2 + # Handle linebreaking + if line_len >= linewidth: + next_term = next_term.replace(" ", "\n", 1) + out += next_term + return out + + @classmethod + def _str_term_unicode(cls, i, arg_str): + """ + String representation of single polynomial term using unicode + characters for superscripts and subscripts. + """ + if cls.basis_name is None: + raise NotImplementedError( + "Subclasses must define either a basis_name, or override " + "_str_term_unicode(cls, i, arg_str)" + ) + return (f"·{cls.basis_name}{i.translate(cls._subscript_mapping)}" + f"({arg_str})") + + @classmethod + def _str_term_ascii(cls, i, arg_str): + """ + String representation of a single polynomial term using ** and _ to + represent superscripts and subscripts, respectively. + """ + if cls.basis_name is None: + raise NotImplementedError( + "Subclasses must define either a basis_name, or override " + "_str_term_ascii(cls, i, arg_str)" + ) + return f" {cls.basis_name}_{i}({arg_str})" + + @classmethod + def _repr_latex_term(cls, i, arg_str, needs_parens): + if cls.basis_name is None: + raise NotImplementedError( + "Subclasses must define either a basis name, or override " + "_repr_latex_term(i, arg_str, needs_parens)") + # since we always add parens, we don't care if the expression needs them + return f"{{{cls.basis_name}}}_{{{i}}}({arg_str})" + + @staticmethod + def _repr_latex_scalar(x): + # TODO: we're stuck with disabling math formatting until we handle + # exponents in this function + return r'\text{{{}}}'.format(x) + + def _repr_latex_(self): + # get the scaled argument string to the basis functions + off, scale = self.mapparms() + if off == 0 and scale == 1: + term = 'x' + needs_parens = False + elif scale == 1: + term = f"{self._repr_latex_scalar(off)} + x" + needs_parens = True + elif off == 0: + term = f"{self._repr_latex_scalar(scale)}x" + needs_parens = True + else: + term = ( + f"{self._repr_latex_scalar(off)} + " + f"{self._repr_latex_scalar(scale)}x" + ) + needs_parens = True + + mute = r"\color{{LightGray}}{{{}}}".format + + parts = [] + for i, c in enumerate(self.coef): + # prevent duplication of + and - signs + if i == 0: + coef_str = f"{self._repr_latex_scalar(c)}" + elif not isinstance(c, numbers.Real): + coef_str = f" + ({self._repr_latex_scalar(c)})" + elif not np.signbit(c): + coef_str = f" + {self._repr_latex_scalar(c)}" + else: + coef_str = f" - {self._repr_latex_scalar(-c)}" + + # produce the string for the term + term_str = self._repr_latex_term(i, term, needs_parens) + if term_str == '1': + part = coef_str + else: + part = rf"{coef_str}\,{term_str}" + + if c == 0: + part = mute(part) + + parts.append(part) + + if parts: + body = ''.join(parts) + else: + # in case somehow there are no coefficients at all + body = '0' + + return rf"$x \mapsto {body}$" + + + + # Pickle and copy + + def __getstate__(self): + ret = self.__dict__.copy() + ret['coef'] = self.coef.copy() + ret['domain'] = self.domain.copy() + ret['window'] = self.window.copy() + return ret + + def __setstate__(self, dict): + self.__dict__ = dict + + # Call + + def __call__(self, arg): + off, scl = pu.mapparms(self.domain, self.window) + arg = off + scl*arg + return self._val(arg, self.coef) + + def __iter__(self): + return iter(self.coef) + + def __len__(self): + return len(self.coef) + + # Numeric properties. + + def __neg__(self): + return self.__class__(-self.coef, self.domain, self.window) + + def __pos__(self): + return self + + def __add__(self, other): + othercoef = self._get_coefficients(other) + try: + coef = self._add(self.coef, othercoef) + except Exception: + return NotImplemented + return self.__class__(coef, self.domain, self.window) + + def __sub__(self, other): + othercoef = self._get_coefficients(other) + try: + coef = self._sub(self.coef, othercoef) + except Exception: + return NotImplemented + return self.__class__(coef, self.domain, self.window) + + def __mul__(self, other): + othercoef = self._get_coefficients(other) + try: + coef = self._mul(self.coef, othercoef) + except Exception: + return NotImplemented + return self.__class__(coef, self.domain, self.window) + + def __truediv__(self, other): + # there is no true divide if the rhs is not a Number, although it + # could return the first n elements of an infinite series. + # It is hard to see where n would come from, though. + if not isinstance(other, numbers.Number) or isinstance(other, bool): + raise TypeError( + f"unsupported types for true division: " + f"'{type(self)}', '{type(other)}'" + ) + return self.__floordiv__(other) + + def __floordiv__(self, other): + res = self.__divmod__(other) + if res is NotImplemented: + return res + return res[0] + + def __mod__(self, other): + res = self.__divmod__(other) + if res is NotImplemented: + return res + return res[1] + + def __divmod__(self, other): + othercoef = self._get_coefficients(other) + try: + quo, rem = self._div(self.coef, othercoef) + except ZeroDivisionError: + raise + except Exception: + return NotImplemented + quo = self.__class__(quo, self.domain, self.window) + rem = self.__class__(rem, self.domain, self.window) + return quo, rem + + def __pow__(self, other): + coef = self._pow(self.coef, other, maxpower=self.maxpower) + res = self.__class__(coef, self.domain, self.window) + return res + + def __radd__(self, other): + try: + coef = self._add(other, self.coef) + except Exception: + return NotImplemented + return self.__class__(coef, self.domain, self.window) + + def __rsub__(self, other): + try: + coef = self._sub(other, self.coef) + except Exception: + return NotImplemented + return self.__class__(coef, self.domain, self.window) + + def __rmul__(self, other): + try: + coef = self._mul(other, self.coef) + except Exception: + return NotImplemented + return self.__class__(coef, self.domain, self.window) + + def __rdiv__(self, other): + # set to __floordiv__ /. + return self.__rfloordiv__(other) + + def __rtruediv__(self, other): + # An instance of ABCPolyBase is not considered a + # Number. + return NotImplemented + + def __rfloordiv__(self, other): + res = self.__rdivmod__(other) + if res is NotImplemented: + return res + return res[0] + + def __rmod__(self, other): + res = self.__rdivmod__(other) + if res is NotImplemented: + return res + return res[1] + + def __rdivmod__(self, other): + try: + quo, rem = self._div(other, self.coef) + except ZeroDivisionError: + raise + except Exception: + return NotImplemented + quo = self.__class__(quo, self.domain, self.window) + rem = self.__class__(rem, self.domain, self.window) + return quo, rem + + def __eq__(self, other): + res = (isinstance(other, self.__class__) and + np.all(self.domain == other.domain) and + np.all(self.window == other.window) and + (self.coef.shape == other.coef.shape) and + np.all(self.coef == other.coef)) + return res + + def __ne__(self, other): + return not self.__eq__(other) + + # + # Extra methods. + # + + def copy(self): + """Return a copy. + + Returns + ------- + new_series : series + Copy of self. + + """ + return self.__class__(self.coef, self.domain, self.window) + + def degree(self): + """The degree of the series. + + .. versionadded:: 1.5.0 + + Returns + ------- + degree : int + Degree of the series, one less than the number of coefficients. + + """ + return len(self) - 1 + + def cutdeg(self, deg): + """Truncate series to the given degree. + + Reduce the degree of the series to `deg` by discarding the + high order terms. If `deg` is greater than the current degree a + copy of the current series is returned. This can be useful in least + squares where the coefficients of the high degree terms may be very + small. + + .. versionadded:: 1.5.0 + + Parameters + ---------- + deg : non-negative int + The series is reduced to degree `deg` by discarding the high + order terms. The value of `deg` must be a non-negative integer. + + Returns + ------- + new_series : series + New instance of series with reduced degree. + + """ + return self.truncate(deg + 1) + + def trim(self, tol=0): + """Remove trailing coefficients + + Remove trailing coefficients until a coefficient is reached whose + absolute value greater than `tol` or the beginning of the series is + reached. If all the coefficients would be removed the series is set + to ``[0]``. A new series instance is returned with the new + coefficients. The current instance remains unchanged. + + Parameters + ---------- + tol : non-negative number. + All trailing coefficients less than `tol` will be removed. + + Returns + ------- + new_series : series + New instance of series with trimmed coefficients. + + """ + coef = pu.trimcoef(self.coef, tol) + return self.__class__(coef, self.domain, self.window) + + def truncate(self, size): + """Truncate series to length `size`. + + Reduce the series to length `size` by discarding the high + degree terms. The value of `size` must be a positive integer. This + can be useful in least squares where the coefficients of the + high degree terms may be very small. + + Parameters + ---------- + size : positive int + The series is reduced to length `size` by discarding the high + degree terms. The value of `size` must be a positive integer. + + Returns + ------- + new_series : series + New instance of series with truncated coefficients. + + """ + isize = int(size) + if isize != size or isize < 1: + raise ValueError("size must be a positive integer") + if isize >= len(self.coef): + coef = self.coef + else: + coef = self.coef[:isize] + return self.__class__(coef, self.domain, self.window) + + def convert(self, domain=None, kind=None, window=None): + """Convert series to a different kind and/or domain and/or window. + + Parameters + ---------- + domain : array_like, optional + The domain of the converted series. If the value is None, + the default domain of `kind` is used. + kind : class, optional + The polynomial series type class to which the current instance + should be converted. If kind is None, then the class of the + current instance is used. + window : array_like, optional + The window of the converted series. If the value is None, + the default window of `kind` is used. + + Returns + ------- + new_series : series + The returned class can be of different type than the current + instance and/or have a different domain and/or different + window. + + Notes + ----- + Conversion between domains and class types can result in + numerically ill defined series. + + """ + if kind is None: + kind = self.__class__ + if domain is None: + domain = kind.domain + if window is None: + window = kind.window + return self(kind.identity(domain, window=window)) + + def mapparms(self): + """Return the mapping parameters. + + The returned values define a linear map ``off + scl*x`` that is + applied to the input arguments before the series is evaluated. The + map depends on the ``domain`` and ``window``; if the current + ``domain`` is equal to the ``window`` the resulting map is the + identity. If the coefficients of the series instance are to be + used by themselves outside this class, then the linear function + must be substituted for the ``x`` in the standard representation of + the base polynomials. + + Returns + ------- + off, scl : float or complex + The mapping function is defined by ``off + scl*x``. + + Notes + ----- + If the current domain is the interval ``[l1, r1]`` and the window + is ``[l2, r2]``, then the linear mapping function ``L`` is + defined by the equations:: + + L(l1) = l2 + L(r1) = r2 + + """ + return pu.mapparms(self.domain, self.window) + + def integ(self, m=1, k=[], lbnd=None): + """Integrate. + + Return a series instance that is the definite integral of the + current series. + + Parameters + ---------- + m : non-negative int + The number of integrations to perform. + k : array_like + Integration constants. The first constant is applied to the + first integration, the second to the second, and so on. The + list of values must less than or equal to `m` in length and any + missing values are set to zero. + lbnd : Scalar + The lower bound of the definite integral. + + Returns + ------- + new_series : series + A new series representing the integral. The domain is the same + as the domain of the integrated series. + + """ + off, scl = self.mapparms() + if lbnd is None: + lbnd = 0 + else: + lbnd = off + scl*lbnd + coef = self._int(self.coef, m, k, lbnd, 1./scl) + return self.__class__(coef, self.domain, self.window) + + def deriv(self, m=1): + """Differentiate. + + Return a series instance of that is the derivative of the current + series. + + Parameters + ---------- + m : non-negative int + Find the derivative of order `m`. + + Returns + ------- + new_series : series + A new series representing the derivative. The domain is the same + as the domain of the differentiated series. + + """ + off, scl = self.mapparms() + coef = self._der(self.coef, m, scl) + return self.__class__(coef, self.domain, self.window) + + def roots(self): + """Return the roots of the series polynomial. + + Compute the roots for the series. Note that the accuracy of the + roots decrease the further outside the domain they lie. + + Returns + ------- + roots : ndarray + Array containing the roots of the series. + + """ + roots = self._roots(self.coef) + return pu.mapdomain(roots, self.window, self.domain) + + def linspace(self, n=100, domain=None): + """Return x, y values at equally spaced points in domain. + + Returns the x, y values at `n` linearly spaced points across the + domain. Here y is the value of the polynomial at the points x. By + default the domain is the same as that of the series instance. + This method is intended mostly as a plotting aid. + + .. versionadded:: 1.5.0 + + Parameters + ---------- + n : int, optional + Number of point pairs to return. The default value is 100. + domain : {None, array_like}, optional + If not None, the specified domain is used instead of that of + the calling instance. It should be of the form ``[beg,end]``. + The default is None which case the class domain is used. + + Returns + ------- + x, y : ndarray + x is equal to linspace(self.domain[0], self.domain[1], n) and + y is the series evaluated at element of x. + + """ + if domain is None: + domain = self.domain + x = np.linspace(domain[0], domain[1], n) + y = self(x) + return x, y + + @classmethod + def fit(cls, x, y, deg, domain=None, rcond=None, full=False, w=None, + window=None): + """Least squares fit to data. + + Return a series instance that is the least squares fit to the data + `y` sampled at `x`. The domain of the returned instance can be + specified and this will often result in a superior fit with less + chance of ill conditioning. + + Parameters + ---------- + x : array_like, shape (M,) + x-coordinates of the M sample points ``(x[i], y[i])``. + y : array_like, shape (M,) + y-coordinates of the M sample points ``(x[i], y[i])``. + deg : int or 1-D array_like + Degree(s) of the fitting polynomials. If `deg` is a single integer + all terms up to and including the `deg`'th term are included in the + fit. For NumPy versions >= 1.11.0 a list of integers specifying the + degrees of the terms to include may be used instead. + domain : {None, [beg, end], []}, optional + Domain to use for the returned series. If ``None``, + then a minimal domain that covers the points `x` is chosen. If + ``[]`` the class domain is used. The default value was the + class domain in NumPy 1.4 and ``None`` in later versions. + The ``[]`` option was added in numpy 1.5.0. + rcond : float, optional + Relative condition number of the fit. Singular values smaller + than this relative to the largest singular value will be + ignored. The default value is len(x)*eps, where eps is the + relative precision of the float type, about 2e-16 in most + cases. + full : bool, optional + Switch determining nature of return value. When it is False + (the default) just the coefficients are returned, when True + diagnostic information from the singular value decomposition is + also returned. + w : array_like, shape (M,), optional + Weights. If not None, the weight ``w[i]`` applies to the unsquared + residual ``y[i] - y_hat[i]`` at ``x[i]``. Ideally the weights are + chosen so that the errors of the products ``w[i]*y[i]`` all have + the same variance. When using inverse-variance weighting, use + ``w[i] = 1/sigma(y[i])``. The default value is None. + + .. versionadded:: 1.5.0 + window : {[beg, end]}, optional + Window to use for the returned series. The default + value is the default class domain + + .. versionadded:: 1.6.0 + + Returns + ------- + new_series : series + A series that represents the least squares fit to the data and + has the domain and window specified in the call. If the + coefficients for the unscaled and unshifted basis polynomials are + of interest, do ``new_series.convert().coef``. + + [resid, rank, sv, rcond] : list + These values are only returned if ``full == True`` + + - resid -- sum of squared residuals of the least squares fit + - rank -- the numerical rank of the scaled Vandermonde matrix + - sv -- singular values of the scaled Vandermonde matrix + - rcond -- value of `rcond`. + + For more details, see `linalg.lstsq`. + + """ + if domain is None: + domain = pu.getdomain(x) + elif type(domain) is list and len(domain) == 0: + domain = cls.domain + + if window is None: + window = cls.window + + xnew = pu.mapdomain(x, domain, window) + res = cls._fit(xnew, y, deg, w=w, rcond=rcond, full=full) + if full: + [coef, status] = res + return cls(coef, domain=domain, window=window), status + else: + coef = res + return cls(coef, domain=domain, window=window) + + @classmethod + def fromroots(cls, roots, domain=[], window=None): + """Return series instance that has the specified roots. + + Returns a series representing the product + ``(x - r[0])*(x - r[1])*...*(x - r[n-1])``, where ``r`` is a + list of roots. + + Parameters + ---------- + roots : array_like + List of roots. + domain : {[], None, array_like}, optional + Domain for the resulting series. If None the domain is the + interval from the smallest root to the largest. If [] the + domain is the class domain. The default is []. + window : {None, array_like}, optional + Window for the returned series. If None the class window is + used. The default is None. + + Returns + ------- + new_series : series + Series with the specified roots. + + """ + [roots] = pu.as_series([roots], trim=False) + if domain is None: + domain = pu.getdomain(roots) + elif type(domain) is list and len(domain) == 0: + domain = cls.domain + + if window is None: + window = cls.window + + deg = len(roots) + off, scl = pu.mapparms(domain, window) + rnew = off + scl*roots + coef = cls._fromroots(rnew) / scl**deg + return cls(coef, domain=domain, window=window) + + @classmethod + def identity(cls, domain=None, window=None): + """Identity function. + + If ``p`` is the returned series, then ``p(x) == x`` for all + values of x. + + Parameters + ---------- + domain : {None, array_like}, optional + If given, the array must be of the form ``[beg, end]``, where + ``beg`` and ``end`` are the endpoints of the domain. If None is + given then the class domain is used. The default is None. + window : {None, array_like}, optional + If given, the resulting array must be if the form + ``[beg, end]``, where ``beg`` and ``end`` are the endpoints of + the window. If None is given then the class window is used. The + default is None. + + Returns + ------- + new_series : series + Series of representing the identity. + + """ + if domain is None: + domain = cls.domain + if window is None: + window = cls.window + off, scl = pu.mapparms(window, domain) + coef = cls._line(off, scl) + return cls(coef, domain, window) + + @classmethod + def basis(cls, deg, domain=None, window=None): + """Series basis polynomial of degree `deg`. + + Returns the series representing the basis polynomial of degree `deg`. + + .. versionadded:: 1.7.0 + + Parameters + ---------- + deg : int + Degree of the basis polynomial for the series. Must be >= 0. + domain : {None, array_like}, optional + If given, the array must be of the form ``[beg, end]``, where + ``beg`` and ``end`` are the endpoints of the domain. If None is + given then the class domain is used. The default is None. + window : {None, array_like}, optional + If given, the resulting array must be if the form + ``[beg, end]``, where ``beg`` and ``end`` are the endpoints of + the window. If None is given then the class window is used. The + default is None. + + Returns + ------- + new_series : series + A series with the coefficient of the `deg` term set to one and + all others zero. + + """ + if domain is None: + domain = cls.domain + if window is None: + window = cls.window + ideg = int(deg) + + if ideg != deg or ideg < 0: + raise ValueError("deg must be non-negative integer") + return cls([0]*ideg + [1], domain, window) + + @classmethod + def cast(cls, series, domain=None, window=None): + """Convert series to series of this class. + + The `series` is expected to be an instance of some polynomial + series of one of the types supported by by the numpy.polynomial + module, but could be some other class that supports the convert + method. + + .. versionadded:: 1.7.0 + + Parameters + ---------- + series : series + The series instance to be converted. + domain : {None, array_like}, optional + If given, the array must be of the form ``[beg, end]``, where + ``beg`` and ``end`` are the endpoints of the domain. If None is + given then the class domain is used. The default is None. + window : {None, array_like}, optional + If given, the resulting array must be if the form + ``[beg, end]``, where ``beg`` and ``end`` are the endpoints of + the window. If None is given then the class window is used. The + default is None. + + Returns + ------- + new_series : series + A series of the same kind as the calling class and equal to + `series` when evaluated. + + See Also + -------- + convert : similar instance method + + """ + if domain is None: + domain = cls.domain + if window is None: + window = cls.window + return series.convert(domain, cls, window) diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/_polybase.pyi b/.local/lib/python3.8/site-packages/numpy/polynomial/_polybase.pyi new file mode 100644 index 0000000..c416014 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/_polybase.pyi @@ -0,0 +1,69 @@ +import abc +from typing import Any, List, ClassVar + +__all__: List[str] + +class ABCPolyBase(abc.ABC): + __hash__: ClassVar[None] # type: ignore[assignment] + __array_ufunc__: ClassVar[None] + maxpower: ClassVar[int] + coef: Any + @property + @abc.abstractmethod + def domain(self): ... + @property + @abc.abstractmethod + def window(self): ... + @property + @abc.abstractmethod + def basis_name(self): ... + def has_samecoef(self, other): ... + def has_samedomain(self, other): ... + def has_samewindow(self, other): ... + def has_sametype(self, other): ... + def __init__(self, coef, domain=..., window=...): ... + def __format__(self, fmt_str): ... + def __call__(self, arg): ... + def __iter__(self): ... + def __len__(self): ... + def __neg__(self): ... + def __pos__(self): ... + def __add__(self, other): ... + def __sub__(self, other): ... + def __mul__(self, other): ... + def __truediv__(self, other): ... + def __floordiv__(self, other): ... + def __mod__(self, other): ... + def __divmod__(self, other): ... + def __pow__(self, other): ... + def __radd__(self, other): ... + def __rsub__(self, other): ... + def __rmul__(self, other): ... + def __rdiv__(self, other): ... + def __rtruediv__(self, other): ... + def __rfloordiv__(self, other): ... + def __rmod__(self, other): ... + def __rdivmod__(self, other): ... + def __eq__(self, other): ... + def __ne__(self, other): ... + def copy(self): ... + def degree(self): ... + def cutdeg(self, deg): ... + def trim(self, tol=...): ... + def truncate(self, size): ... + def convert(self, domain=..., kind=..., window=...): ... + def mapparms(self): ... + def integ(self, m=..., k = ..., lbnd=...): ... + def deriv(self, m=...): ... + def roots(self): ... + def linspace(self, n=..., domain=...): ... + @classmethod + def fit(cls, x, y, deg, domain=..., rcond=..., full=..., w=..., window=...): ... + @classmethod + def fromroots(cls, roots, domain = ..., window=...): ... + @classmethod + def identity(cls, domain=..., window=...): ... + @classmethod + def basis(cls, deg, domain=..., window=...): ... + @classmethod + def cast(cls, series, domain=..., window=...): ... diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/chebyshev.py b/.local/lib/python3.8/site-packages/numpy/polynomial/chebyshev.py new file mode 100644 index 0000000..89ce815 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/chebyshev.py @@ -0,0 +1,2075 @@ +""" +==================================================== +Chebyshev Series (:mod:`numpy.polynomial.chebyshev`) +==================================================== + +This module provides a number of objects (mostly functions) useful for +dealing with Chebyshev series, including a `Chebyshev` class that +encapsulates the usual arithmetic operations. (General information +on how this module represents and works with such polynomials is in the +docstring for its "parent" sub-package, `numpy.polynomial`). + +Classes +------- + +.. autosummary:: + :toctree: generated/ + + Chebyshev + + +Constants +--------- + +.. autosummary:: + :toctree: generated/ + + chebdomain + chebzero + chebone + chebx + +Arithmetic +---------- + +.. autosummary:: + :toctree: generated/ + + chebadd + chebsub + chebmulx + chebmul + chebdiv + chebpow + chebval + chebval2d + chebval3d + chebgrid2d + chebgrid3d + +Calculus +-------- + +.. autosummary:: + :toctree: generated/ + + chebder + chebint + +Misc Functions +-------------- + +.. autosummary:: + :toctree: generated/ + + chebfromroots + chebroots + chebvander + chebvander2d + chebvander3d + chebgauss + chebweight + chebcompanion + chebfit + chebpts1 + chebpts2 + chebtrim + chebline + cheb2poly + poly2cheb + chebinterpolate + +See also +-------- +`numpy.polynomial` + +Notes +----- +The implementations of multiplication, division, integration, and +differentiation use the algebraic identities [1]_: + +.. math:: + T_n(x) = \\frac{z^n + z^{-n}}{2} \\\\ + z\\frac{dx}{dz} = \\frac{z - z^{-1}}{2}. + +where + +.. math:: x = \\frac{z + z^{-1}}{2}. + +These identities allow a Chebyshev series to be expressed as a finite, +symmetric Laurent series. In this module, this sort of Laurent series +is referred to as a "z-series." + +References +---------- +.. [1] A. T. Benjamin, et al., "Combinatorial Trigonometry with Chebyshev + Polynomials," *Journal of Statistical Planning and Inference 14*, 2008 + (https://web.archive.org/web/20080221202153/https://www.math.hmc.edu/~benjamin/papers/CombTrig.pdf, pg. 4) + +""" +import numpy as np +import numpy.linalg as la +from numpy.core.multiarray import normalize_axis_index + +from . import polyutils as pu +from ._polybase import ABCPolyBase + +__all__ = [ + 'chebzero', 'chebone', 'chebx', 'chebdomain', 'chebline', 'chebadd', + 'chebsub', 'chebmulx', 'chebmul', 'chebdiv', 'chebpow', 'chebval', + 'chebder', 'chebint', 'cheb2poly', 'poly2cheb', 'chebfromroots', + 'chebvander', 'chebfit', 'chebtrim', 'chebroots', 'chebpts1', + 'chebpts2', 'Chebyshev', 'chebval2d', 'chebval3d', 'chebgrid2d', + 'chebgrid3d', 'chebvander2d', 'chebvander3d', 'chebcompanion', + 'chebgauss', 'chebweight', 'chebinterpolate'] + +chebtrim = pu.trimcoef + +# +# A collection of functions for manipulating z-series. These are private +# functions and do minimal error checking. +# + +def _cseries_to_zseries(c): + """Convert Chebyshev series to z-series. + + Convert a Chebyshev series to the equivalent z-series. The result is + never an empty array. The dtype of the return is the same as that of + the input. No checks are run on the arguments as this routine is for + internal use. + + Parameters + ---------- + c : 1-D ndarray + Chebyshev coefficients, ordered from low to high + + Returns + ------- + zs : 1-D ndarray + Odd length symmetric z-series, ordered from low to high. + + """ + n = c.size + zs = np.zeros(2*n-1, dtype=c.dtype) + zs[n-1:] = c/2 + return zs + zs[::-1] + + +def _zseries_to_cseries(zs): + """Convert z-series to a Chebyshev series. + + Convert a z series to the equivalent Chebyshev series. The result is + never an empty array. The dtype of the return is the same as that of + the input. No checks are run on the arguments as this routine is for + internal use. + + Parameters + ---------- + zs : 1-D ndarray + Odd length symmetric z-series, ordered from low to high. + + Returns + ------- + c : 1-D ndarray + Chebyshev coefficients, ordered from low to high. + + """ + n = (zs.size + 1)//2 + c = zs[n-1:].copy() + c[1:n] *= 2 + return c + + +def _zseries_mul(z1, z2): + """Multiply two z-series. + + Multiply two z-series to produce a z-series. + + Parameters + ---------- + z1, z2 : 1-D ndarray + The arrays must be 1-D but this is not checked. + + Returns + ------- + product : 1-D ndarray + The product z-series. + + Notes + ----- + This is simply convolution. If symmetric/anti-symmetric z-series are + denoted by S/A then the following rules apply: + + S*S, A*A -> S + S*A, A*S -> A + + """ + return np.convolve(z1, z2) + + +def _zseries_div(z1, z2): + """Divide the first z-series by the second. + + Divide `z1` by `z2` and return the quotient and remainder as z-series. + Warning: this implementation only applies when both z1 and z2 have the + same symmetry, which is sufficient for present purposes. + + Parameters + ---------- + z1, z2 : 1-D ndarray + The arrays must be 1-D and have the same symmetry, but this is not + checked. + + Returns + ------- + + (quotient, remainder) : 1-D ndarrays + Quotient and remainder as z-series. + + Notes + ----- + This is not the same as polynomial division on account of the desired form + of the remainder. If symmetric/anti-symmetric z-series are denoted by S/A + then the following rules apply: + + S/S -> S,S + A/A -> S,A + + The restriction to types of the same symmetry could be fixed but seems like + unneeded generality. There is no natural form for the remainder in the case + where there is no symmetry. + + """ + z1 = z1.copy() + z2 = z2.copy() + lc1 = len(z1) + lc2 = len(z2) + if lc2 == 1: + z1 /= z2 + return z1, z1[:1]*0 + elif lc1 < lc2: + return z1[:1]*0, z1 + else: + dlen = lc1 - lc2 + scl = z2[0] + z2 /= scl + quo = np.empty(dlen + 1, dtype=z1.dtype) + i = 0 + j = dlen + while i < j: + r = z1[i] + quo[i] = z1[i] + quo[dlen - i] = r + tmp = r*z2 + z1[i:i+lc2] -= tmp + z1[j:j+lc2] -= tmp + i += 1 + j -= 1 + r = z1[i] + quo[i] = r + tmp = r*z2 + z1[i:i+lc2] -= tmp + quo /= scl + rem = z1[i+1:i-1+lc2].copy() + return quo, rem + + +def _zseries_der(zs): + """Differentiate a z-series. + + The derivative is with respect to x, not z. This is achieved using the + chain rule and the value of dx/dz given in the module notes. + + Parameters + ---------- + zs : z-series + The z-series to differentiate. + + Returns + ------- + derivative : z-series + The derivative + + Notes + ----- + The zseries for x (ns) has been multiplied by two in order to avoid + using floats that are incompatible with Decimal and likely other + specialized scalar types. This scaling has been compensated by + multiplying the value of zs by two also so that the two cancels in the + division. + + """ + n = len(zs)//2 + ns = np.array([-1, 0, 1], dtype=zs.dtype) + zs *= np.arange(-n, n+1)*2 + d, r = _zseries_div(zs, ns) + return d + + +def _zseries_int(zs): + """Integrate a z-series. + + The integral is with respect to x, not z. This is achieved by a change + of variable using dx/dz given in the module notes. + + Parameters + ---------- + zs : z-series + The z-series to integrate + + Returns + ------- + integral : z-series + The indefinite integral + + Notes + ----- + The zseries for x (ns) has been multiplied by two in order to avoid + using floats that are incompatible with Decimal and likely other + specialized scalar types. This scaling has been compensated by + dividing the resulting zs by two. + + """ + n = 1 + len(zs)//2 + ns = np.array([-1, 0, 1], dtype=zs.dtype) + zs = _zseries_mul(zs, ns) + div = np.arange(-n, n+1)*2 + zs[:n] /= div[:n] + zs[n+1:] /= div[n+1:] + zs[n] = 0 + return zs + +# +# Chebyshev series functions +# + + +def poly2cheb(pol): + """ + Convert a polynomial to a Chebyshev series. + + Convert an array representing the coefficients of a polynomial (relative + to the "standard" basis) ordered from lowest degree to highest, to an + array of the coefficients of the equivalent Chebyshev series, ordered + from lowest to highest degree. + + Parameters + ---------- + pol : array_like + 1-D array containing the polynomial coefficients + + Returns + ------- + c : ndarray + 1-D array containing the coefficients of the equivalent Chebyshev + series. + + See Also + -------- + cheb2poly + + Notes + ----- + The easy way to do conversions between polynomial basis sets + is to use the convert method of a class instance. + + Examples + -------- + >>> from numpy import polynomial as P + >>> p = P.Polynomial(range(4)) + >>> p + Polynomial([0., 1., 2., 3.], domain=[-1, 1], window=[-1, 1]) + >>> c = p.convert(kind=P.Chebyshev) + >>> c + Chebyshev([1. , 3.25, 1. , 0.75], domain=[-1., 1.], window=[-1., 1.]) + >>> P.chebyshev.poly2cheb(range(4)) + array([1. , 3.25, 1. , 0.75]) + + """ + [pol] = pu.as_series([pol]) + deg = len(pol) - 1 + res = 0 + for i in range(deg, -1, -1): + res = chebadd(chebmulx(res), pol[i]) + return res + + +def cheb2poly(c): + """ + Convert a Chebyshev series to a polynomial. + + Convert an array representing the coefficients of a Chebyshev series, + ordered from lowest degree to highest, to an array of the coefficients + of the equivalent polynomial (relative to the "standard" basis) ordered + from lowest to highest degree. + + Parameters + ---------- + c : array_like + 1-D array containing the Chebyshev series coefficients, ordered + from lowest order term to highest. + + Returns + ------- + pol : ndarray + 1-D array containing the coefficients of the equivalent polynomial + (relative to the "standard" basis) ordered from lowest order term + to highest. + + See Also + -------- + poly2cheb + + Notes + ----- + The easy way to do conversions between polynomial basis sets + is to use the convert method of a class instance. + + Examples + -------- + >>> from numpy import polynomial as P + >>> c = P.Chebyshev(range(4)) + >>> c + Chebyshev([0., 1., 2., 3.], domain=[-1, 1], window=[-1, 1]) + >>> p = c.convert(kind=P.Polynomial) + >>> p + Polynomial([-2., -8., 4., 12.], domain=[-1., 1.], window=[-1., 1.]) + >>> P.chebyshev.cheb2poly(range(4)) + array([-2., -8., 4., 12.]) + + """ + from .polynomial import polyadd, polysub, polymulx + + [c] = pu.as_series([c]) + n = len(c) + if n < 3: + return c + else: + c0 = c[-2] + c1 = c[-1] + # i is the current degree of c1 + for i in range(n - 1, 1, -1): + tmp = c0 + c0 = polysub(c[i - 2], c1) + c1 = polyadd(tmp, polymulx(c1)*2) + return polyadd(c0, polymulx(c1)) + + +# +# These are constant arrays are of integer type so as to be compatible +# with the widest range of other types, such as Decimal. +# + +# Chebyshev default domain. +chebdomain = np.array([-1, 1]) + +# Chebyshev coefficients representing zero. +chebzero = np.array([0]) + +# Chebyshev coefficients representing one. +chebone = np.array([1]) + +# Chebyshev coefficients representing the identity x. +chebx = np.array([0, 1]) + + +def chebline(off, scl): + """ + Chebyshev series whose graph is a straight line. + + Parameters + ---------- + off, scl : scalars + The specified line is given by ``off + scl*x``. + + Returns + ------- + y : ndarray + This module's representation of the Chebyshev series for + ``off + scl*x``. + + See Also + -------- + numpy.polynomial.polynomial.polyline + numpy.polynomial.legendre.legline + numpy.polynomial.laguerre.lagline + numpy.polynomial.hermite.hermline + numpy.polynomial.hermite_e.hermeline + + Examples + -------- + >>> import numpy.polynomial.chebyshev as C + >>> C.chebline(3,2) + array([3, 2]) + >>> C.chebval(-3, C.chebline(3,2)) # should be -3 + -3.0 + + """ + if scl != 0: + return np.array([off, scl]) + else: + return np.array([off]) + + +def chebfromroots(roots): + """ + Generate a Chebyshev series with given roots. + + The function returns the coefficients of the polynomial + + .. math:: p(x) = (x - r_0) * (x - r_1) * ... * (x - r_n), + + in Chebyshev form, where the `r_n` are the roots specified in `roots`. + If a zero has multiplicity n, then it must appear in `roots` n times. + For instance, if 2 is a root of multiplicity three and 3 is a root of + multiplicity 2, then `roots` looks something like [2, 2, 2, 3, 3]. The + roots can appear in any order. + + If the returned coefficients are `c`, then + + .. math:: p(x) = c_0 + c_1 * T_1(x) + ... + c_n * T_n(x) + + The coefficient of the last term is not generally 1 for monic + polynomials in Chebyshev form. + + Parameters + ---------- + roots : array_like + Sequence containing the roots. + + Returns + ------- + out : ndarray + 1-D array of coefficients. If all roots are real then `out` is a + real array, if some of the roots are complex, then `out` is complex + even if all the coefficients in the result are real (see Examples + below). + + See Also + -------- + numpy.polynomial.polynomial.polyfromroots + numpy.polynomial.legendre.legfromroots + numpy.polynomial.laguerre.lagfromroots + numpy.polynomial.hermite.hermfromroots + numpy.polynomial.hermite_e.hermefromroots + + Examples + -------- + >>> import numpy.polynomial.chebyshev as C + >>> C.chebfromroots((-1,0,1)) # x^3 - x relative to the standard basis + array([ 0. , -0.25, 0. , 0.25]) + >>> j = complex(0,1) + >>> C.chebfromroots((-j,j)) # x^2 + 1 relative to the standard basis + array([1.5+0.j, 0. +0.j, 0.5+0.j]) + + """ + return pu._fromroots(chebline, chebmul, roots) + + +def chebadd(c1, c2): + """ + Add one Chebyshev series to another. + + Returns the sum of two Chebyshev series `c1` + `c2`. The arguments + are sequences of coefficients ordered from lowest order term to + highest, i.e., [1,2,3] represents the series ``T_0 + 2*T_1 + 3*T_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Chebyshev series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Array representing the Chebyshev series of their sum. + + See Also + -------- + chebsub, chebmulx, chebmul, chebdiv, chebpow + + Notes + ----- + Unlike multiplication, division, etc., the sum of two Chebyshev series + is a Chebyshev series (without having to "reproject" the result onto + the basis set) so addition, just like that of "standard" polynomials, + is simply "component-wise." + + Examples + -------- + >>> from numpy.polynomial import chebyshev as C + >>> c1 = (1,2,3) + >>> c2 = (3,2,1) + >>> C.chebadd(c1,c2) + array([4., 4., 4.]) + + """ + return pu._add(c1, c2) + + +def chebsub(c1, c2): + """ + Subtract one Chebyshev series from another. + + Returns the difference of two Chebyshev series `c1` - `c2`. The + sequences of coefficients are from lowest order term to highest, i.e., + [1,2,3] represents the series ``T_0 + 2*T_1 + 3*T_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Chebyshev series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Of Chebyshev series coefficients representing their difference. + + See Also + -------- + chebadd, chebmulx, chebmul, chebdiv, chebpow + + Notes + ----- + Unlike multiplication, division, etc., the difference of two Chebyshev + series is a Chebyshev series (without having to "reproject" the result + onto the basis set) so subtraction, just like that of "standard" + polynomials, is simply "component-wise." + + Examples + -------- + >>> from numpy.polynomial import chebyshev as C + >>> c1 = (1,2,3) + >>> c2 = (3,2,1) + >>> C.chebsub(c1,c2) + array([-2., 0., 2.]) + >>> C.chebsub(c2,c1) # -C.chebsub(c1,c2) + array([ 2., 0., -2.]) + + """ + return pu._sub(c1, c2) + + +def chebmulx(c): + """Multiply a Chebyshev series by x. + + Multiply the polynomial `c` by x, where x is the independent + variable. + + + Parameters + ---------- + c : array_like + 1-D array of Chebyshev series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Array representing the result of the multiplication. + + Notes + ----- + + .. versionadded:: 1.5.0 + + Examples + -------- + >>> from numpy.polynomial import chebyshev as C + >>> C.chebmulx([1,2,3]) + array([1. , 2.5, 1. , 1.5]) + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + # The zero series needs special treatment + if len(c) == 1 and c[0] == 0: + return c + + prd = np.empty(len(c) + 1, dtype=c.dtype) + prd[0] = c[0]*0 + prd[1] = c[0] + if len(c) > 1: + tmp = c[1:]/2 + prd[2:] = tmp + prd[0:-2] += tmp + return prd + + +def chebmul(c1, c2): + """ + Multiply one Chebyshev series by another. + + Returns the product of two Chebyshev series `c1` * `c2`. The arguments + are sequences of coefficients, from lowest order "term" to highest, + e.g., [1,2,3] represents the series ``T_0 + 2*T_1 + 3*T_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Chebyshev series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Of Chebyshev series coefficients representing their product. + + See Also + -------- + chebadd, chebsub, chebmulx, chebdiv, chebpow + + Notes + ----- + In general, the (polynomial) product of two C-series results in terms + that are not in the Chebyshev polynomial basis set. Thus, to express + the product as a C-series, it is typically necessary to "reproject" + the product onto said basis set, which typically produces + "unintuitive live" (but correct) results; see Examples section below. + + Examples + -------- + >>> from numpy.polynomial import chebyshev as C + >>> c1 = (1,2,3) + >>> c2 = (3,2,1) + >>> C.chebmul(c1,c2) # multiplication requires "reprojection" + array([ 6.5, 12. , 12. , 4. , 1.5]) + + """ + # c1, c2 are trimmed copies + [c1, c2] = pu.as_series([c1, c2]) + z1 = _cseries_to_zseries(c1) + z2 = _cseries_to_zseries(c2) + prd = _zseries_mul(z1, z2) + ret = _zseries_to_cseries(prd) + return pu.trimseq(ret) + + +def chebdiv(c1, c2): + """ + Divide one Chebyshev series by another. + + Returns the quotient-with-remainder of two Chebyshev series + `c1` / `c2`. The arguments are sequences of coefficients from lowest + order "term" to highest, e.g., [1,2,3] represents the series + ``T_0 + 2*T_1 + 3*T_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Chebyshev series coefficients ordered from low to + high. + + Returns + ------- + [quo, rem] : ndarrays + Of Chebyshev series coefficients representing the quotient and + remainder. + + See Also + -------- + chebadd, chebsub, chebmulx, chebmul, chebpow + + Notes + ----- + In general, the (polynomial) division of one C-series by another + results in quotient and remainder terms that are not in the Chebyshev + polynomial basis set. Thus, to express these results as C-series, it + is typically necessary to "reproject" the results onto said basis + set, which typically produces "unintuitive" (but correct) results; + see Examples section below. + + Examples + -------- + >>> from numpy.polynomial import chebyshev as C + >>> c1 = (1,2,3) + >>> c2 = (3,2,1) + >>> C.chebdiv(c1,c2) # quotient "intuitive," remainder not + (array([3.]), array([-8., -4.])) + >>> c2 = (0,1,2,3) + >>> C.chebdiv(c2,c1) # neither "intuitive" + (array([0., 2.]), array([-2., -4.])) + + """ + # c1, c2 are trimmed copies + [c1, c2] = pu.as_series([c1, c2]) + if c2[-1] == 0: + raise ZeroDivisionError() + + # note: this is more efficient than `pu._div(chebmul, c1, c2)` + lc1 = len(c1) + lc2 = len(c2) + if lc1 < lc2: + return c1[:1]*0, c1 + elif lc2 == 1: + return c1/c2[-1], c1[:1]*0 + else: + z1 = _cseries_to_zseries(c1) + z2 = _cseries_to_zseries(c2) + quo, rem = _zseries_div(z1, z2) + quo = pu.trimseq(_zseries_to_cseries(quo)) + rem = pu.trimseq(_zseries_to_cseries(rem)) + return quo, rem + + +def chebpow(c, pow, maxpower=16): + """Raise a Chebyshev series to a power. + + Returns the Chebyshev series `c` raised to the power `pow`. The + argument `c` is a sequence of coefficients ordered from low to high. + i.e., [1,2,3] is the series ``T_0 + 2*T_1 + 3*T_2.`` + + Parameters + ---------- + c : array_like + 1-D array of Chebyshev series coefficients ordered from low to + high. + pow : integer + Power to which the series will be raised + maxpower : integer, optional + Maximum power allowed. This is mainly to limit growth of the series + to unmanageable size. Default is 16 + + Returns + ------- + coef : ndarray + Chebyshev series of power. + + See Also + -------- + chebadd, chebsub, chebmulx, chebmul, chebdiv + + Examples + -------- + >>> from numpy.polynomial import chebyshev as C + >>> C.chebpow([1, 2, 3, 4], 2) + array([15.5, 22. , 16. , ..., 12.5, 12. , 8. ]) + + """ + # note: this is more efficient than `pu._pow(chebmul, c1, c2)`, as it + # avoids converting between z and c series repeatedly + + # c is a trimmed copy + [c] = pu.as_series([c]) + power = int(pow) + if power != pow or power < 0: + raise ValueError("Power must be a non-negative integer.") + elif maxpower is not None and power > maxpower: + raise ValueError("Power is too large") + elif power == 0: + return np.array([1], dtype=c.dtype) + elif power == 1: + return c + else: + # This can be made more efficient by using powers of two + # in the usual way. + zs = _cseries_to_zseries(c) + prd = zs + for i in range(2, power + 1): + prd = np.convolve(prd, zs) + return _zseries_to_cseries(prd) + + +def chebder(c, m=1, scl=1, axis=0): + """ + Differentiate a Chebyshev series. + + Returns the Chebyshev series coefficients `c` differentiated `m` times + along `axis`. At each iteration the result is multiplied by `scl` (the + scaling factor is for use in a linear change of variable). The argument + `c` is an array of coefficients from low to high degree along each + axis, e.g., [1,2,3] represents the series ``1*T_0 + 2*T_1 + 3*T_2`` + while [[1,2],[1,2]] represents ``1*T_0(x)*T_0(y) + 1*T_1(x)*T_0(y) + + 2*T_0(x)*T_1(y) + 2*T_1(x)*T_1(y)`` if axis=0 is ``x`` and axis=1 is + ``y``. + + Parameters + ---------- + c : array_like + Array of Chebyshev series coefficients. If c is multidimensional + the different axis correspond to different variables with the + degree in each axis given by the corresponding index. + m : int, optional + Number of derivatives taken, must be non-negative. (Default: 1) + scl : scalar, optional + Each differentiation is multiplied by `scl`. The end result is + multiplication by ``scl**m``. This is for use in a linear change of + variable. (Default: 1) + axis : int, optional + Axis over which the derivative is taken. (Default: 0). + + .. versionadded:: 1.7.0 + + Returns + ------- + der : ndarray + Chebyshev series of the derivative. + + See Also + -------- + chebint + + Notes + ----- + In general, the result of differentiating a C-series needs to be + "reprojected" onto the C-series basis set. Thus, typically, the + result of this function is "unintuitive," albeit correct; see Examples + section below. + + Examples + -------- + >>> from numpy.polynomial import chebyshev as C + >>> c = (1,2,3,4) + >>> C.chebder(c) + array([14., 12., 24.]) + >>> C.chebder(c,3) + array([96.]) + >>> C.chebder(c,scl=-1) + array([-14., -12., -24.]) + >>> C.chebder(c,2,-1) + array([12., 96.]) + + """ + c = np.array(c, ndmin=1, copy=True) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + cnt = pu._deprecate_as_int(m, "the order of derivation") + iaxis = pu._deprecate_as_int(axis, "the axis") + if cnt < 0: + raise ValueError("The order of derivation must be non-negative") + iaxis = normalize_axis_index(iaxis, c.ndim) + + if cnt == 0: + return c + + c = np.moveaxis(c, iaxis, 0) + n = len(c) + if cnt >= n: + c = c[:1]*0 + else: + for i in range(cnt): + n = n - 1 + c *= scl + der = np.empty((n,) + c.shape[1:], dtype=c.dtype) + for j in range(n, 2, -1): + der[j - 1] = (2*j)*c[j] + c[j - 2] += (j*c[j])/(j - 2) + if n > 1: + der[1] = 4*c[2] + der[0] = c[1] + c = der + c = np.moveaxis(c, 0, iaxis) + return c + + +def chebint(c, m=1, k=[], lbnd=0, scl=1, axis=0): + """ + Integrate a Chebyshev series. + + Returns the Chebyshev series coefficients `c` integrated `m` times from + `lbnd` along `axis`. At each iteration the resulting series is + **multiplied** by `scl` and an integration constant, `k`, is added. + The scaling factor is for use in a linear change of variable. ("Buyer + beware": note that, depending on what one is doing, one may want `scl` + to be the reciprocal of what one might expect; for more information, + see the Notes section below.) The argument `c` is an array of + coefficients from low to high degree along each axis, e.g., [1,2,3] + represents the series ``T_0 + 2*T_1 + 3*T_2`` while [[1,2],[1,2]] + represents ``1*T_0(x)*T_0(y) + 1*T_1(x)*T_0(y) + 2*T_0(x)*T_1(y) + + 2*T_1(x)*T_1(y)`` if axis=0 is ``x`` and axis=1 is ``y``. + + Parameters + ---------- + c : array_like + Array of Chebyshev series coefficients. If c is multidimensional + the different axis correspond to different variables with the + degree in each axis given by the corresponding index. + m : int, optional + Order of integration, must be positive. (Default: 1) + k : {[], list, scalar}, optional + Integration constant(s). The value of the first integral at zero + is the first value in the list, the value of the second integral + at zero is the second value, etc. If ``k == []`` (the default), + all constants are set to zero. If ``m == 1``, a single scalar can + be given instead of a list. + lbnd : scalar, optional + The lower bound of the integral. (Default: 0) + scl : scalar, optional + Following each integration the result is *multiplied* by `scl` + before the integration constant is added. (Default: 1) + axis : int, optional + Axis over which the integral is taken. (Default: 0). + + .. versionadded:: 1.7.0 + + Returns + ------- + S : ndarray + C-series coefficients of the integral. + + Raises + ------ + ValueError + If ``m < 1``, ``len(k) > m``, ``np.ndim(lbnd) != 0``, or + ``np.ndim(scl) != 0``. + + See Also + -------- + chebder + + Notes + ----- + Note that the result of each integration is *multiplied* by `scl`. + Why is this important to note? Say one is making a linear change of + variable :math:`u = ax + b` in an integral relative to `x`. Then + :math:`dx = du/a`, so one will need to set `scl` equal to + :math:`1/a`- perhaps not what one would have first thought. + + Also note that, in general, the result of integrating a C-series needs + to be "reprojected" onto the C-series basis set. Thus, typically, + the result of this function is "unintuitive," albeit correct; see + Examples section below. + + Examples + -------- + >>> from numpy.polynomial import chebyshev as C + >>> c = (1,2,3) + >>> C.chebint(c) + array([ 0.5, -0.5, 0.5, 0.5]) + >>> C.chebint(c,3) + array([ 0.03125 , -0.1875 , 0.04166667, -0.05208333, 0.01041667, # may vary + 0.00625 ]) + >>> C.chebint(c, k=3) + array([ 3.5, -0.5, 0.5, 0.5]) + >>> C.chebint(c,lbnd=-2) + array([ 8.5, -0.5, 0.5, 0.5]) + >>> C.chebint(c,scl=-2) + array([-1., 1., -1., -1.]) + + """ + c = np.array(c, ndmin=1, copy=True) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + if not np.iterable(k): + k = [k] + cnt = pu._deprecate_as_int(m, "the order of integration") + iaxis = pu._deprecate_as_int(axis, "the axis") + if cnt < 0: + raise ValueError("The order of integration must be non-negative") + if len(k) > cnt: + raise ValueError("Too many integration constants") + if np.ndim(lbnd) != 0: + raise ValueError("lbnd must be a scalar.") + if np.ndim(scl) != 0: + raise ValueError("scl must be a scalar.") + iaxis = normalize_axis_index(iaxis, c.ndim) + + if cnt == 0: + return c + + c = np.moveaxis(c, iaxis, 0) + k = list(k) + [0]*(cnt - len(k)) + for i in range(cnt): + n = len(c) + c *= scl + if n == 1 and np.all(c[0] == 0): + c[0] += k[i] + else: + tmp = np.empty((n + 1,) + c.shape[1:], dtype=c.dtype) + tmp[0] = c[0]*0 + tmp[1] = c[0] + if n > 1: + tmp[2] = c[1]/4 + for j in range(2, n): + tmp[j + 1] = c[j]/(2*(j + 1)) + tmp[j - 1] -= c[j]/(2*(j - 1)) + tmp[0] += k[i] - chebval(lbnd, tmp) + c = tmp + c = np.moveaxis(c, 0, iaxis) + return c + + +def chebval(x, c, tensor=True): + """ + Evaluate a Chebyshev series at points x. + + If `c` is of length `n + 1`, this function returns the value: + + .. math:: p(x) = c_0 * T_0(x) + c_1 * T_1(x) + ... + c_n * T_n(x) + + The parameter `x` is converted to an array only if it is a tuple or a + list, otherwise it is treated as a scalar. In either case, either `x` + or its elements must support multiplication and addition both with + themselves and with the elements of `c`. + + If `c` is a 1-D array, then `p(x)` will have the same shape as `x`. If + `c` is multidimensional, then the shape of the result depends on the + value of `tensor`. If `tensor` is true the shape will be c.shape[1:] + + x.shape. If `tensor` is false the shape will be c.shape[1:]. Note that + scalars have shape (,). + + Trailing zeros in the coefficients will be used in the evaluation, so + they should be avoided if efficiency is a concern. + + Parameters + ---------- + x : array_like, compatible object + If `x` is a list or tuple, it is converted to an ndarray, otherwise + it is left unchanged and treated as a scalar. In either case, `x` + or its elements must support addition and multiplication with + with themselves and with the elements of `c`. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree n are contained in c[n]. If `c` is multidimensional the + remaining indices enumerate multiple polynomials. In the two + dimensional case the coefficients may be thought of as stored in + the columns of `c`. + tensor : boolean, optional + If True, the shape of the coefficient array is extended with ones + on the right, one for each dimension of `x`. Scalars have dimension 0 + for this action. The result is that every column of coefficients in + `c` is evaluated for every element of `x`. If False, `x` is broadcast + over the columns of `c` for the evaluation. This keyword is useful + when `c` is multidimensional. The default value is True. + + .. versionadded:: 1.7.0 + + Returns + ------- + values : ndarray, algebra_like + The shape of the return value is described above. + + See Also + -------- + chebval2d, chebgrid2d, chebval3d, chebgrid3d + + Notes + ----- + The evaluation uses Clenshaw recursion, aka synthetic division. + + """ + c = np.array(c, ndmin=1, copy=True) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + if isinstance(x, (tuple, list)): + x = np.asarray(x) + if isinstance(x, np.ndarray) and tensor: + c = c.reshape(c.shape + (1,)*x.ndim) + + if len(c) == 1: + c0 = c[0] + c1 = 0 + elif len(c) == 2: + c0 = c[0] + c1 = c[1] + else: + x2 = 2*x + c0 = c[-2] + c1 = c[-1] + for i in range(3, len(c) + 1): + tmp = c0 + c0 = c[-i] - c1 + c1 = tmp + c1*x2 + return c0 + c1*x + + +def chebval2d(x, y, c): + """ + Evaluate a 2-D Chebyshev series at points (x, y). + + This function returns the values: + + .. math:: p(x,y) = \\sum_{i,j} c_{i,j} * T_i(x) * T_j(y) + + The parameters `x` and `y` are converted to arrays only if they are + tuples or a lists, otherwise they are treated as a scalars and they + must have the same shape after conversion. In either case, either `x` + and `y` or their elements must support multiplication and addition both + with themselves and with the elements of `c`. + + If `c` is a 1-D array a one is implicitly appended to its shape to make + it 2-D. The shape of the result will be c.shape[2:] + x.shape. + + Parameters + ---------- + x, y : array_like, compatible objects + The two dimensional series is evaluated at the points `(x, y)`, + where `x` and `y` must have the same shape. If `x` or `y` is a list + or tuple, it is first converted to an ndarray, otherwise it is left + unchanged and if it isn't an ndarray it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term + of multi-degree i,j is contained in ``c[i,j]``. If `c` has + dimension greater than 2 the remaining indices enumerate multiple + sets of coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional Chebyshev series at points formed + from pairs of corresponding values from `x` and `y`. + + See Also + -------- + chebval, chebgrid2d, chebval3d, chebgrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._valnd(chebval, c, x, y) + + +def chebgrid2d(x, y, c): + """ + Evaluate a 2-D Chebyshev series on the Cartesian product of x and y. + + This function returns the values: + + .. math:: p(a,b) = \\sum_{i,j} c_{i,j} * T_i(a) * T_j(b), + + where the points `(a, b)` consist of all pairs formed by taking + `a` from `x` and `b` from `y`. The resulting points form a grid with + `x` in the first dimension and `y` in the second. + + The parameters `x` and `y` are converted to arrays only if they are + tuples or a lists, otherwise they are treated as a scalars. In either + case, either `x` and `y` or their elements must support multiplication + and addition both with themselves and with the elements of `c`. + + If `c` has fewer than two dimensions, ones are implicitly appended to + its shape to make it 2-D. The shape of the result will be c.shape[2:] + + x.shape + y.shape. + + Parameters + ---------- + x, y : array_like, compatible objects + The two dimensional series is evaluated at the points in the + Cartesian product of `x` and `y`. If `x` or `y` is a list or + tuple, it is first converted to an ndarray, otherwise it is left + unchanged and, if it isn't an ndarray, it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term of + multi-degree i,j is contained in `c[i,j]`. If `c` has dimension + greater than two the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional Chebyshev series at points in the + Cartesian product of `x` and `y`. + + See Also + -------- + chebval, chebval2d, chebval3d, chebgrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._gridnd(chebval, c, x, y) + + +def chebval3d(x, y, z, c): + """ + Evaluate a 3-D Chebyshev series at points (x, y, z). + + This function returns the values: + + .. math:: p(x,y,z) = \\sum_{i,j,k} c_{i,j,k} * T_i(x) * T_j(y) * T_k(z) + + The parameters `x`, `y`, and `z` are converted to arrays only if + they are tuples or a lists, otherwise they are treated as a scalars and + they must have the same shape after conversion. In either case, either + `x`, `y`, and `z` or their elements must support multiplication and + addition both with themselves and with the elements of `c`. + + If `c` has fewer than 3 dimensions, ones are implicitly appended to its + shape to make it 3-D. The shape of the result will be c.shape[3:] + + x.shape. + + Parameters + ---------- + x, y, z : array_like, compatible object + The three dimensional series is evaluated at the points + `(x, y, z)`, where `x`, `y`, and `z` must have the same shape. If + any of `x`, `y`, or `z` is a list or tuple, it is first converted + to an ndarray, otherwise it is left unchanged and if it isn't an + ndarray it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term of + multi-degree i,j,k is contained in ``c[i,j,k]``. If `c` has dimension + greater than 3 the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the multidimensional polynomial on points formed with + triples of corresponding values from `x`, `y`, and `z`. + + See Also + -------- + chebval, chebval2d, chebgrid2d, chebgrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._valnd(chebval, c, x, y, z) + + +def chebgrid3d(x, y, z, c): + """ + Evaluate a 3-D Chebyshev series on the Cartesian product of x, y, and z. + + This function returns the values: + + .. math:: p(a,b,c) = \\sum_{i,j,k} c_{i,j,k} * T_i(a) * T_j(b) * T_k(c) + + where the points `(a, b, c)` consist of all triples formed by taking + `a` from `x`, `b` from `y`, and `c` from `z`. The resulting points form + a grid with `x` in the first dimension, `y` in the second, and `z` in + the third. + + The parameters `x`, `y`, and `z` are converted to arrays only if they + are tuples or a lists, otherwise they are treated as a scalars. In + either case, either `x`, `y`, and `z` or their elements must support + multiplication and addition both with themselves and with the elements + of `c`. + + If `c` has fewer than three dimensions, ones are implicitly appended to + its shape to make it 3-D. The shape of the result will be c.shape[3:] + + x.shape + y.shape + z.shape. + + Parameters + ---------- + x, y, z : array_like, compatible objects + The three dimensional series is evaluated at the points in the + Cartesian product of `x`, `y`, and `z`. If `x`,`y`, or `z` is a + list or tuple, it is first converted to an ndarray, otherwise it is + left unchanged and, if it isn't an ndarray, it is treated as a + scalar. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree i,j are contained in ``c[i,j]``. If `c` has dimension + greater than two the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional polynomial at points in the Cartesian + product of `x` and `y`. + + See Also + -------- + chebval, chebval2d, chebgrid2d, chebval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._gridnd(chebval, c, x, y, z) + + +def chebvander(x, deg): + """Pseudo-Vandermonde matrix of given degree. + + Returns the pseudo-Vandermonde matrix of degree `deg` and sample points + `x`. The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., i] = T_i(x), + + where `0 <= i <= deg`. The leading indices of `V` index the elements of + `x` and the last index is the degree of the Chebyshev polynomial. + + If `c` is a 1-D array of coefficients of length `n + 1` and `V` is the + matrix ``V = chebvander(x, n)``, then ``np.dot(V, c)`` and + ``chebval(x, c)`` are the same up to roundoff. This equivalence is + useful both for least squares fitting and for the evaluation of a large + number of Chebyshev series of the same degree and sample points. + + Parameters + ---------- + x : array_like + Array of points. The dtype is converted to float64 or complex128 + depending on whether any of the elements are complex. If `x` is + scalar it is converted to a 1-D array. + deg : int + Degree of the resulting matrix. + + Returns + ------- + vander : ndarray + The pseudo Vandermonde matrix. The shape of the returned matrix is + ``x.shape + (deg + 1,)``, where The last index is the degree of the + corresponding Chebyshev polynomial. The dtype will be the same as + the converted `x`. + + """ + ideg = pu._deprecate_as_int(deg, "deg") + if ideg < 0: + raise ValueError("deg must be non-negative") + + x = np.array(x, copy=False, ndmin=1) + 0.0 + dims = (ideg + 1,) + x.shape + dtyp = x.dtype + v = np.empty(dims, dtype=dtyp) + # Use forward recursion to generate the entries. + v[0] = x*0 + 1 + if ideg > 0: + x2 = 2*x + v[1] = x + for i in range(2, ideg + 1): + v[i] = v[i-1]*x2 - v[i-2] + return np.moveaxis(v, 0, -1) + + +def chebvander2d(x, y, deg): + """Pseudo-Vandermonde matrix of given degrees. + + Returns the pseudo-Vandermonde matrix of degrees `deg` and sample + points `(x, y)`. The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., (deg[1] + 1)*i + j] = T_i(x) * T_j(y), + + where `0 <= i <= deg[0]` and `0 <= j <= deg[1]`. The leading indices of + `V` index the points `(x, y)` and the last index encodes the degrees of + the Chebyshev polynomials. + + If ``V = chebvander2d(x, y, [xdeg, ydeg])``, then the columns of `V` + correspond to the elements of a 2-D coefficient array `c` of shape + (xdeg + 1, ydeg + 1) in the order + + .. math:: c_{00}, c_{01}, c_{02} ... , c_{10}, c_{11}, c_{12} ... + + and ``np.dot(V, c.flat)`` and ``chebval2d(x, y, c)`` will be the same + up to roundoff. This equivalence is useful both for least squares + fitting and for the evaluation of a large number of 2-D Chebyshev + series of the same degrees and sample points. + + Parameters + ---------- + x, y : array_like + Arrays of point coordinates, all of the same shape. The dtypes + will be converted to either float64 or complex128 depending on + whether any of the elements are complex. Scalars are converted to + 1-D arrays. + deg : list of ints + List of maximum degrees of the form [x_deg, y_deg]. + + Returns + ------- + vander2d : ndarray + The shape of the returned matrix is ``x.shape + (order,)``, where + :math:`order = (deg[0]+1)*(deg[1]+1)`. The dtype will be the same + as the converted `x` and `y`. + + See Also + -------- + chebvander, chebvander3d, chebval2d, chebval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._vander_nd_flat((chebvander, chebvander), (x, y), deg) + + +def chebvander3d(x, y, z, deg): + """Pseudo-Vandermonde matrix of given degrees. + + Returns the pseudo-Vandermonde matrix of degrees `deg` and sample + points `(x, y, z)`. If `l, m, n` are the given degrees in `x, y, z`, + then The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., (m+1)(n+1)i + (n+1)j + k] = T_i(x)*T_j(y)*T_k(z), + + where `0 <= i <= l`, `0 <= j <= m`, and `0 <= j <= n`. The leading + indices of `V` index the points `(x, y, z)` and the last index encodes + the degrees of the Chebyshev polynomials. + + If ``V = chebvander3d(x, y, z, [xdeg, ydeg, zdeg])``, then the columns + of `V` correspond to the elements of a 3-D coefficient array `c` of + shape (xdeg + 1, ydeg + 1, zdeg + 1) in the order + + .. math:: c_{000}, c_{001}, c_{002},... , c_{010}, c_{011}, c_{012},... + + and ``np.dot(V, c.flat)`` and ``chebval3d(x, y, z, c)`` will be the + same up to roundoff. This equivalence is useful both for least squares + fitting and for the evaluation of a large number of 3-D Chebyshev + series of the same degrees and sample points. + + Parameters + ---------- + x, y, z : array_like + Arrays of point coordinates, all of the same shape. The dtypes will + be converted to either float64 or complex128 depending on whether + any of the elements are complex. Scalars are converted to 1-D + arrays. + deg : list of ints + List of maximum degrees of the form [x_deg, y_deg, z_deg]. + + Returns + ------- + vander3d : ndarray + The shape of the returned matrix is ``x.shape + (order,)``, where + :math:`order = (deg[0]+1)*(deg[1]+1)*(deg[2]+1)`. The dtype will + be the same as the converted `x`, `y`, and `z`. + + See Also + -------- + chebvander, chebvander3d, chebval2d, chebval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._vander_nd_flat((chebvander, chebvander, chebvander), (x, y, z), deg) + + +def chebfit(x, y, deg, rcond=None, full=False, w=None): + """ + Least squares fit of Chebyshev series to data. + + Return the coefficients of a Chebyshev series of degree `deg` that is the + least squares fit to the data values `y` given at points `x`. If `y` is + 1-D the returned coefficients will also be 1-D. If `y` is 2-D multiple + fits are done, one for each column of `y`, and the resulting + coefficients are stored in the corresponding columns of a 2-D return. + The fitted polynomial(s) are in the form + + .. math:: p(x) = c_0 + c_1 * T_1(x) + ... + c_n * T_n(x), + + where `n` is `deg`. + + Parameters + ---------- + x : array_like, shape (M,) + x-coordinates of the M sample points ``(x[i], y[i])``. + y : array_like, shape (M,) or (M, K) + y-coordinates of the sample points. Several data sets of sample + points sharing the same x-coordinates can be fitted at once by + passing in a 2D-array that contains one dataset per column. + deg : int or 1-D array_like + Degree(s) of the fitting polynomials. If `deg` is a single integer, + all terms up to and including the `deg`'th term are included in the + fit. For NumPy versions >= 1.11.0 a list of integers specifying the + degrees of the terms to include may be used instead. + rcond : float, optional + Relative condition number of the fit. Singular values smaller than + this relative to the largest singular value will be ignored. The + default value is len(x)*eps, where eps is the relative precision of + the float type, about 2e-16 in most cases. + full : bool, optional + Switch determining nature of return value. When it is False (the + default) just the coefficients are returned, when True diagnostic + information from the singular value decomposition is also returned. + w : array_like, shape (`M`,), optional + Weights. If not None, the weight ``w[i]`` applies to the unsquared + residual ``y[i] - y_hat[i]`` at ``x[i]``. Ideally the weights are + chosen so that the errors of the products ``w[i]*y[i]`` all have the + same variance. When using inverse-variance weighting, use + ``w[i] = 1/sigma(y[i])``. The default value is None. + + .. versionadded:: 1.5.0 + + Returns + ------- + coef : ndarray, shape (M,) or (M, K) + Chebyshev coefficients ordered from low to high. If `y` was 2-D, + the coefficients for the data in column k of `y` are in column + `k`. + + [residuals, rank, singular_values, rcond] : list + These values are only returned if ``full == True`` + + - residuals -- sum of squared residuals of the least squares fit + - rank -- the numerical rank of the scaled Vandermonde matrix + - singular_values -- singular values of the scaled Vandermonde matrix + - rcond -- value of `rcond`. + + For more details, see `numpy.linalg.lstsq`. + + Warns + ----- + RankWarning + The rank of the coefficient matrix in the least-squares fit is + deficient. The warning is only raised if ``full == False``. The + warnings can be turned off by + + >>> import warnings + >>> warnings.simplefilter('ignore', np.RankWarning) + + See Also + -------- + numpy.polynomial.polynomial.polyfit + numpy.polynomial.legendre.legfit + numpy.polynomial.laguerre.lagfit + numpy.polynomial.hermite.hermfit + numpy.polynomial.hermite_e.hermefit + chebval : Evaluates a Chebyshev series. + chebvander : Vandermonde matrix of Chebyshev series. + chebweight : Chebyshev weight function. + numpy.linalg.lstsq : Computes a least-squares fit from the matrix. + scipy.interpolate.UnivariateSpline : Computes spline fits. + + Notes + ----- + The solution is the coefficients of the Chebyshev series `p` that + minimizes the sum of the weighted squared errors + + .. math:: E = \\sum_j w_j^2 * |y_j - p(x_j)|^2, + + where :math:`w_j` are the weights. This problem is solved by setting up + as the (typically) overdetermined matrix equation + + .. math:: V(x) * c = w * y, + + where `V` is the weighted pseudo Vandermonde matrix of `x`, `c` are the + coefficients to be solved for, `w` are the weights, and `y` are the + observed values. This equation is then solved using the singular value + decomposition of `V`. + + If some of the singular values of `V` are so small that they are + neglected, then a `RankWarning` will be issued. This means that the + coefficient values may be poorly determined. Using a lower order fit + will usually get rid of the warning. The `rcond` parameter can also be + set to a value smaller than its default, but the resulting fit may be + spurious and have large contributions from roundoff error. + + Fits using Chebyshev series are usually better conditioned than fits + using power series, but much can depend on the distribution of the + sample points and the smoothness of the data. If the quality of the fit + is inadequate splines may be a good alternative. + + References + ---------- + .. [1] Wikipedia, "Curve fitting", + https://en.wikipedia.org/wiki/Curve_fitting + + Examples + -------- + + """ + return pu._fit(chebvander, x, y, deg, rcond, full, w) + + +def chebcompanion(c): + """Return the scaled companion matrix of c. + + The basis polynomials are scaled so that the companion matrix is + symmetric when `c` is a Chebyshev basis polynomial. This provides + better eigenvalue estimates than the unscaled case and for basis + polynomials the eigenvalues are guaranteed to be real if + `numpy.linalg.eigvalsh` is used to obtain them. + + Parameters + ---------- + c : array_like + 1-D array of Chebyshev series coefficients ordered from low to high + degree. + + Returns + ------- + mat : ndarray + Scaled companion matrix of dimensions (deg, deg). + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + if len(c) < 2: + raise ValueError('Series must have maximum degree of at least 1.') + if len(c) == 2: + return np.array([[-c[0]/c[1]]]) + + n = len(c) - 1 + mat = np.zeros((n, n), dtype=c.dtype) + scl = np.array([1.] + [np.sqrt(.5)]*(n-1)) + top = mat.reshape(-1)[1::n+1] + bot = mat.reshape(-1)[n::n+1] + top[0] = np.sqrt(.5) + top[1:] = 1/2 + bot[...] = top + mat[:, -1] -= (c[:-1]/c[-1])*(scl/scl[-1])*.5 + return mat + + +def chebroots(c): + """ + Compute the roots of a Chebyshev series. + + Return the roots (a.k.a. "zeros") of the polynomial + + .. math:: p(x) = \\sum_i c[i] * T_i(x). + + Parameters + ---------- + c : 1-D array_like + 1-D array of coefficients. + + Returns + ------- + out : ndarray + Array of the roots of the series. If all the roots are real, + then `out` is also real, otherwise it is complex. + + See Also + -------- + numpy.polynomial.polynomial.polyroots + numpy.polynomial.legendre.legroots + numpy.polynomial.laguerre.lagroots + numpy.polynomial.hermite.hermroots + numpy.polynomial.hermite_e.hermeroots + + Notes + ----- + The root estimates are obtained as the eigenvalues of the companion + matrix, Roots far from the origin of the complex plane may have large + errors due to the numerical instability of the series for such + values. Roots with multiplicity greater than 1 will also show larger + errors as the value of the series near such points is relatively + insensitive to errors in the roots. Isolated roots near the origin can + be improved by a few iterations of Newton's method. + + The Chebyshev series basis polynomials aren't powers of `x` so the + results of this function may seem unintuitive. + + Examples + -------- + >>> import numpy.polynomial.chebyshev as cheb + >>> cheb.chebroots((-1, 1,-1, 1)) # T3 - T2 + T1 - T0 has real roots + array([ -5.00000000e-01, 2.60860684e-17, 1.00000000e+00]) # may vary + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + if len(c) < 2: + return np.array([], dtype=c.dtype) + if len(c) == 2: + return np.array([-c[0]/c[1]]) + + # rotated companion matrix reduces error + m = chebcompanion(c)[::-1,::-1] + r = la.eigvals(m) + r.sort() + return r + + +def chebinterpolate(func, deg, args=()): + """Interpolate a function at the Chebyshev points of the first kind. + + Returns the Chebyshev series that interpolates `func` at the Chebyshev + points of the first kind in the interval [-1, 1]. The interpolating + series tends to a minmax approximation to `func` with increasing `deg` + if the function is continuous in the interval. + + .. versionadded:: 1.14.0 + + Parameters + ---------- + func : function + The function to be approximated. It must be a function of a single + variable of the form ``f(x, a, b, c...)``, where ``a, b, c...`` are + extra arguments passed in the `args` parameter. + deg : int + Degree of the interpolating polynomial + args : tuple, optional + Extra arguments to be used in the function call. Default is no extra + arguments. + + Returns + ------- + coef : ndarray, shape (deg + 1,) + Chebyshev coefficients of the interpolating series ordered from low to + high. + + Examples + -------- + >>> import numpy.polynomial.chebyshev as C + >>> C.chebfromfunction(lambda x: np.tanh(x) + 0.5, 8) + array([ 5.00000000e-01, 8.11675684e-01, -9.86864911e-17, + -5.42457905e-02, -2.71387850e-16, 4.51658839e-03, + 2.46716228e-17, -3.79694221e-04, -3.26899002e-16]) + + Notes + ----- + + The Chebyshev polynomials used in the interpolation are orthogonal when + sampled at the Chebyshev points of the first kind. If it is desired to + constrain some of the coefficients they can simply be set to the desired + value after the interpolation, no new interpolation or fit is needed. This + is especially useful if it is known apriori that some of coefficients are + zero. For instance, if the function is even then the coefficients of the + terms of odd degree in the result can be set to zero. + + """ + deg = np.asarray(deg) + + # check arguments. + if deg.ndim > 0 or deg.dtype.kind not in 'iu' or deg.size == 0: + raise TypeError("deg must be an int") + if deg < 0: + raise ValueError("expected deg >= 0") + + order = deg + 1 + xcheb = chebpts1(order) + yfunc = func(xcheb, *args) + m = chebvander(xcheb, deg) + c = np.dot(m.T, yfunc) + c[0] /= order + c[1:] /= 0.5*order + + return c + + +def chebgauss(deg): + """ + Gauss-Chebyshev quadrature. + + Computes the sample points and weights for Gauss-Chebyshev quadrature. + These sample points and weights will correctly integrate polynomials of + degree :math:`2*deg - 1` or less over the interval :math:`[-1, 1]` with + the weight function :math:`f(x) = 1/\\sqrt{1 - x^2}`. + + Parameters + ---------- + deg : int + Number of sample points and weights. It must be >= 1. + + Returns + ------- + x : ndarray + 1-D ndarray containing the sample points. + y : ndarray + 1-D ndarray containing the weights. + + Notes + ----- + + .. versionadded:: 1.7.0 + + The results have only been tested up to degree 100, higher degrees may + be problematic. For Gauss-Chebyshev there are closed form solutions for + the sample points and weights. If n = `deg`, then + + .. math:: x_i = \\cos(\\pi (2 i - 1) / (2 n)) + + .. math:: w_i = \\pi / n + + """ + ideg = pu._deprecate_as_int(deg, "deg") + if ideg <= 0: + raise ValueError("deg must be a positive integer") + + x = np.cos(np.pi * np.arange(1, 2*ideg, 2) / (2.0*ideg)) + w = np.ones(ideg)*(np.pi/ideg) + + return x, w + + +def chebweight(x): + """ + The weight function of the Chebyshev polynomials. + + The weight function is :math:`1/\\sqrt{1 - x^2}` and the interval of + integration is :math:`[-1, 1]`. The Chebyshev polynomials are + orthogonal, but not normalized, with respect to this weight function. + + Parameters + ---------- + x : array_like + Values at which the weight function will be computed. + + Returns + ------- + w : ndarray + The weight function at `x`. + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + w = 1./(np.sqrt(1. + x) * np.sqrt(1. - x)) + return w + + +def chebpts1(npts): + """ + Chebyshev points of the first kind. + + The Chebyshev points of the first kind are the points ``cos(x)``, + where ``x = [pi*(k + .5)/npts for k in range(npts)]``. + + Parameters + ---------- + npts : int + Number of sample points desired. + + Returns + ------- + pts : ndarray + The Chebyshev points of the first kind. + + See Also + -------- + chebpts2 + + Notes + ----- + + .. versionadded:: 1.5.0 + + """ + _npts = int(npts) + if _npts != npts: + raise ValueError("npts must be integer") + if _npts < 1: + raise ValueError("npts must be >= 1") + + x = 0.5 * np.pi / _npts * np.arange(-_npts+1, _npts+1, 2) + return np.sin(x) + + +def chebpts2(npts): + """ + Chebyshev points of the second kind. + + The Chebyshev points of the second kind are the points ``cos(x)``, + where ``x = [pi*k/(npts - 1) for k in range(npts)]``. + + Parameters + ---------- + npts : int + Number of sample points desired. + + Returns + ------- + pts : ndarray + The Chebyshev points of the second kind. + + Notes + ----- + + .. versionadded:: 1.5.0 + + """ + _npts = int(npts) + if _npts != npts: + raise ValueError("npts must be integer") + if _npts < 2: + raise ValueError("npts must be >= 2") + + x = np.linspace(-np.pi, 0, _npts) + return np.cos(x) + + +# +# Chebyshev series class +# + +class Chebyshev(ABCPolyBase): + """A Chebyshev series class. + + The Chebyshev class provides the standard Python numerical methods + '+', '-', '*', '//', '%', 'divmod', '**', and '()' as well as the + methods listed below. + + Parameters + ---------- + coef : array_like + Chebyshev coefficients in order of increasing degree, i.e., + ``(1, 2, 3)`` gives ``1*T_0(x) + 2*T_1(x) + 3*T_2(x)``. + domain : (2,) array_like, optional + Domain to use. The interval ``[domain[0], domain[1]]`` is mapped + to the interval ``[window[0], window[1]]`` by shifting and scaling. + The default value is [-1, 1]. + window : (2,) array_like, optional + Window, see `domain` for its use. The default value is [-1, 1]. + + .. versionadded:: 1.6.0 + + """ + # Virtual Functions + _add = staticmethod(chebadd) + _sub = staticmethod(chebsub) + _mul = staticmethod(chebmul) + _div = staticmethod(chebdiv) + _pow = staticmethod(chebpow) + _val = staticmethod(chebval) + _int = staticmethod(chebint) + _der = staticmethod(chebder) + _fit = staticmethod(chebfit) + _line = staticmethod(chebline) + _roots = staticmethod(chebroots) + _fromroots = staticmethod(chebfromroots) + + @classmethod + def interpolate(cls, func, deg, domain=None, args=()): + """Interpolate a function at the Chebyshev points of the first kind. + + Returns the series that interpolates `func` at the Chebyshev points of + the first kind scaled and shifted to the `domain`. The resulting series + tends to a minmax approximation of `func` when the function is + continuous in the domain. + + .. versionadded:: 1.14.0 + + Parameters + ---------- + func : function + The function to be interpolated. It must be a function of a single + variable of the form ``f(x, a, b, c...)``, where ``a, b, c...`` are + extra arguments passed in the `args` parameter. + deg : int + Degree of the interpolating polynomial. + domain : {None, [beg, end]}, optional + Domain over which `func` is interpolated. The default is None, in + which case the domain is [-1, 1]. + args : tuple, optional + Extra arguments to be used in the function call. Default is no + extra arguments. + + Returns + ------- + polynomial : Chebyshev instance + Interpolating Chebyshev instance. + + Notes + ----- + See `numpy.polynomial.chebfromfunction` for more details. + + """ + if domain is None: + domain = cls.domain + xfunc = lambda x: func(pu.mapdomain(x, cls.window, domain), *args) + coef = chebinterpolate(xfunc, deg) + return cls(coef, domain=domain) + + # Virtual properties + domain = np.array(chebdomain) + window = np.array(chebdomain) + basis_name = 'T' diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/chebyshev.pyi b/.local/lib/python3.8/site-packages/numpy/polynomial/chebyshev.pyi new file mode 100644 index 0000000..841c085 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/chebyshev.pyi @@ -0,0 +1,51 @@ +from typing import Any, List + +from numpy import ndarray, dtype, int_ +from numpy.polynomial._polybase import ABCPolyBase +from numpy.polynomial.polyutils import trimcoef + +__all__: List[str] + +chebtrim = trimcoef + +def poly2cheb(pol): ... +def cheb2poly(c): ... + +chebdomain: ndarray[Any, dtype[int_]] +chebzero: ndarray[Any, dtype[int_]] +chebone: ndarray[Any, dtype[int_]] +chebx: ndarray[Any, dtype[int_]] + +def chebline(off, scl): ... +def chebfromroots(roots): ... +def chebadd(c1, c2): ... +def chebsub(c1, c2): ... +def chebmulx(c): ... +def chebmul(c1, c2): ... +def chebdiv(c1, c2): ... +def chebpow(c, pow, maxpower=...): ... +def chebder(c, m=..., scl=..., axis=...): ... +def chebint(c, m=..., k = ..., lbnd=..., scl=..., axis=...): ... +def chebval(x, c, tensor=...): ... +def chebval2d(x, y, c): ... +def chebgrid2d(x, y, c): ... +def chebval3d(x, y, z, c): ... +def chebgrid3d(x, y, z, c): ... +def chebvander(x, deg): ... +def chebvander2d(x, y, deg): ... +def chebvander3d(x, y, z, deg): ... +def chebfit(x, y, deg, rcond=..., full=..., w=...): ... +def chebcompanion(c): ... +def chebroots(c): ... +def chebinterpolate(func, deg, args = ...): ... +def chebgauss(deg): ... +def chebweight(x): ... +def chebpts1(npts): ... +def chebpts2(npts): ... + +class Chebyshev(ABCPolyBase): + @classmethod + def interpolate(cls, func, deg, domain=..., args = ...): ... + domain: Any + window: Any + basis_name: Any diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/hermite.py b/.local/lib/python3.8/site-packages/numpy/polynomial/hermite.py new file mode 100644 index 0000000..9b0735a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/hermite.py @@ -0,0 +1,1697 @@ +""" +============================================================== +Hermite Series, "Physicists" (:mod:`numpy.polynomial.hermite`) +============================================================== + +This module provides a number of objects (mostly functions) useful for +dealing with Hermite series, including a `Hermite` class that +encapsulates the usual arithmetic operations. (General information +on how this module represents and works with such polynomials is in the +docstring for its "parent" sub-package, `numpy.polynomial`). + +Classes +------- +.. autosummary:: + :toctree: generated/ + + Hermite + +Constants +--------- +.. autosummary:: + :toctree: generated/ + + hermdomain + hermzero + hermone + hermx + +Arithmetic +---------- +.. autosummary:: + :toctree: generated/ + + hermadd + hermsub + hermmulx + hermmul + hermdiv + hermpow + hermval + hermval2d + hermval3d + hermgrid2d + hermgrid3d + +Calculus +-------- +.. autosummary:: + :toctree: generated/ + + hermder + hermint + +Misc Functions +-------------- +.. autosummary:: + :toctree: generated/ + + hermfromroots + hermroots + hermvander + hermvander2d + hermvander3d + hermgauss + hermweight + hermcompanion + hermfit + hermtrim + hermline + herm2poly + poly2herm + +See also +-------- +`numpy.polynomial` + +""" +import numpy as np +import numpy.linalg as la +from numpy.core.multiarray import normalize_axis_index + +from . import polyutils as pu +from ._polybase import ABCPolyBase + +__all__ = [ + 'hermzero', 'hermone', 'hermx', 'hermdomain', 'hermline', 'hermadd', + 'hermsub', 'hermmulx', 'hermmul', 'hermdiv', 'hermpow', 'hermval', + 'hermder', 'hermint', 'herm2poly', 'poly2herm', 'hermfromroots', + 'hermvander', 'hermfit', 'hermtrim', 'hermroots', 'Hermite', + 'hermval2d', 'hermval3d', 'hermgrid2d', 'hermgrid3d', 'hermvander2d', + 'hermvander3d', 'hermcompanion', 'hermgauss', 'hermweight'] + +hermtrim = pu.trimcoef + + +def poly2herm(pol): + """ + poly2herm(pol) + + Convert a polynomial to a Hermite series. + + Convert an array representing the coefficients of a polynomial (relative + to the "standard" basis) ordered from lowest degree to highest, to an + array of the coefficients of the equivalent Hermite series, ordered + from lowest to highest degree. + + Parameters + ---------- + pol : array_like + 1-D array containing the polynomial coefficients + + Returns + ------- + c : ndarray + 1-D array containing the coefficients of the equivalent Hermite + series. + + See Also + -------- + herm2poly + + Notes + ----- + The easy way to do conversions between polynomial basis sets + is to use the convert method of a class instance. + + Examples + -------- + >>> from numpy.polynomial.hermite import poly2herm + >>> poly2herm(np.arange(4)) + array([1. , 2.75 , 0.5 , 0.375]) + + """ + [pol] = pu.as_series([pol]) + deg = len(pol) - 1 + res = 0 + for i in range(deg, -1, -1): + res = hermadd(hermmulx(res), pol[i]) + return res + + +def herm2poly(c): + """ + Convert a Hermite series to a polynomial. + + Convert an array representing the coefficients of a Hermite series, + ordered from lowest degree to highest, to an array of the coefficients + of the equivalent polynomial (relative to the "standard" basis) ordered + from lowest to highest degree. + + Parameters + ---------- + c : array_like + 1-D array containing the Hermite series coefficients, ordered + from lowest order term to highest. + + Returns + ------- + pol : ndarray + 1-D array containing the coefficients of the equivalent polynomial + (relative to the "standard" basis) ordered from lowest order term + to highest. + + See Also + -------- + poly2herm + + Notes + ----- + The easy way to do conversions between polynomial basis sets + is to use the convert method of a class instance. + + Examples + -------- + >>> from numpy.polynomial.hermite import herm2poly + >>> herm2poly([ 1. , 2.75 , 0.5 , 0.375]) + array([0., 1., 2., 3.]) + + """ + from .polynomial import polyadd, polysub, polymulx + + [c] = pu.as_series([c]) + n = len(c) + if n == 1: + return c + if n == 2: + c[1] *= 2 + return c + else: + c0 = c[-2] + c1 = c[-1] + # i is the current degree of c1 + for i in range(n - 1, 1, -1): + tmp = c0 + c0 = polysub(c[i - 2], c1*(2*(i - 1))) + c1 = polyadd(tmp, polymulx(c1)*2) + return polyadd(c0, polymulx(c1)*2) + +# +# These are constant arrays are of integer type so as to be compatible +# with the widest range of other types, such as Decimal. +# + +# Hermite +hermdomain = np.array([-1, 1]) + +# Hermite coefficients representing zero. +hermzero = np.array([0]) + +# Hermite coefficients representing one. +hermone = np.array([1]) + +# Hermite coefficients representing the identity x. +hermx = np.array([0, 1/2]) + + +def hermline(off, scl): + """ + Hermite series whose graph is a straight line. + + + + Parameters + ---------- + off, scl : scalars + The specified line is given by ``off + scl*x``. + + Returns + ------- + y : ndarray + This module's representation of the Hermite series for + ``off + scl*x``. + + See Also + -------- + numpy.polynomial.polynomial.polyline + numpy.polynomial.chebyshev.chebline + numpy.polynomial.legendre.legline + numpy.polynomial.laguerre.lagline + numpy.polynomial.hermite_e.hermeline + + Examples + -------- + >>> from numpy.polynomial.hermite import hermline, hermval + >>> hermval(0,hermline(3, 2)) + 3.0 + >>> hermval(1,hermline(3, 2)) + 5.0 + + """ + if scl != 0: + return np.array([off, scl/2]) + else: + return np.array([off]) + + +def hermfromroots(roots): + """ + Generate a Hermite series with given roots. + + The function returns the coefficients of the polynomial + + .. math:: p(x) = (x - r_0) * (x - r_1) * ... * (x - r_n), + + in Hermite form, where the `r_n` are the roots specified in `roots`. + If a zero has multiplicity n, then it must appear in `roots` n times. + For instance, if 2 is a root of multiplicity three and 3 is a root of + multiplicity 2, then `roots` looks something like [2, 2, 2, 3, 3]. The + roots can appear in any order. + + If the returned coefficients are `c`, then + + .. math:: p(x) = c_0 + c_1 * H_1(x) + ... + c_n * H_n(x) + + The coefficient of the last term is not generally 1 for monic + polynomials in Hermite form. + + Parameters + ---------- + roots : array_like + Sequence containing the roots. + + Returns + ------- + out : ndarray + 1-D array of coefficients. If all roots are real then `out` is a + real array, if some of the roots are complex, then `out` is complex + even if all the coefficients in the result are real (see Examples + below). + + See Also + -------- + numpy.polynomial.polynomial.polyfromroots + numpy.polynomial.legendre.legfromroots + numpy.polynomial.laguerre.lagfromroots + numpy.polynomial.chebyshev.chebfromroots + numpy.polynomial.hermite_e.hermefromroots + + Examples + -------- + >>> from numpy.polynomial.hermite import hermfromroots, hermval + >>> coef = hermfromroots((-1, 0, 1)) + >>> hermval((-1, 0, 1), coef) + array([0., 0., 0.]) + >>> coef = hermfromroots((-1j, 1j)) + >>> hermval((-1j, 1j), coef) + array([0.+0.j, 0.+0.j]) + + """ + return pu._fromroots(hermline, hermmul, roots) + + +def hermadd(c1, c2): + """ + Add one Hermite series to another. + + Returns the sum of two Hermite series `c1` + `c2`. The arguments + are sequences of coefficients ordered from lowest order term to + highest, i.e., [1,2,3] represents the series ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Hermite series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Array representing the Hermite series of their sum. + + See Also + -------- + hermsub, hermmulx, hermmul, hermdiv, hermpow + + Notes + ----- + Unlike multiplication, division, etc., the sum of two Hermite series + is a Hermite series (without having to "reproject" the result onto + the basis set) so addition, just like that of "standard" polynomials, + is simply "component-wise." + + Examples + -------- + >>> from numpy.polynomial.hermite import hermadd + >>> hermadd([1, 2, 3], [1, 2, 3, 4]) + array([2., 4., 6., 4.]) + + """ + return pu._add(c1, c2) + + +def hermsub(c1, c2): + """ + Subtract one Hermite series from another. + + Returns the difference of two Hermite series `c1` - `c2`. The + sequences of coefficients are from lowest order term to highest, i.e., + [1,2,3] represents the series ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Hermite series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Of Hermite series coefficients representing their difference. + + See Also + -------- + hermadd, hermmulx, hermmul, hermdiv, hermpow + + Notes + ----- + Unlike multiplication, division, etc., the difference of two Hermite + series is a Hermite series (without having to "reproject" the result + onto the basis set) so subtraction, just like that of "standard" + polynomials, is simply "component-wise." + + Examples + -------- + >>> from numpy.polynomial.hermite import hermsub + >>> hermsub([1, 2, 3, 4], [1, 2, 3]) + array([0., 0., 0., 4.]) + + """ + return pu._sub(c1, c2) + + +def hermmulx(c): + """Multiply a Hermite series by x. + + Multiply the Hermite series `c` by x, where x is the independent + variable. + + + Parameters + ---------- + c : array_like + 1-D array of Hermite series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Array representing the result of the multiplication. + + See Also + -------- + hermadd, hermsub, hermmul, hermdiv, hermpow + + Notes + ----- + The multiplication uses the recursion relationship for Hermite + polynomials in the form + + .. math:: + + xP_i(x) = (P_{i + 1}(x)/2 + i*P_{i - 1}(x)) + + Examples + -------- + >>> from numpy.polynomial.hermite import hermmulx + >>> hermmulx([1, 2, 3]) + array([2. , 6.5, 1. , 1.5]) + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + # The zero series needs special treatment + if len(c) == 1 and c[0] == 0: + return c + + prd = np.empty(len(c) + 1, dtype=c.dtype) + prd[0] = c[0]*0 + prd[1] = c[0]/2 + for i in range(1, len(c)): + prd[i + 1] = c[i]/2 + prd[i - 1] += c[i]*i + return prd + + +def hermmul(c1, c2): + """ + Multiply one Hermite series by another. + + Returns the product of two Hermite series `c1` * `c2`. The arguments + are sequences of coefficients, from lowest order "term" to highest, + e.g., [1,2,3] represents the series ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Hermite series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Of Hermite series coefficients representing their product. + + See Also + -------- + hermadd, hermsub, hermmulx, hermdiv, hermpow + + Notes + ----- + In general, the (polynomial) product of two C-series results in terms + that are not in the Hermite polynomial basis set. Thus, to express + the product as a Hermite series, it is necessary to "reproject" the + product onto said basis set, which may produce "unintuitive" (but + correct) results; see Examples section below. + + Examples + -------- + >>> from numpy.polynomial.hermite import hermmul + >>> hermmul([1, 2, 3], [0, 1, 2]) + array([52., 29., 52., 7., 6.]) + + """ + # s1, s2 are trimmed copies + [c1, c2] = pu.as_series([c1, c2]) + + if len(c1) > len(c2): + c = c2 + xs = c1 + else: + c = c1 + xs = c2 + + if len(c) == 1: + c0 = c[0]*xs + c1 = 0 + elif len(c) == 2: + c0 = c[0]*xs + c1 = c[1]*xs + else: + nd = len(c) + c0 = c[-2]*xs + c1 = c[-1]*xs + for i in range(3, len(c) + 1): + tmp = c0 + nd = nd - 1 + c0 = hermsub(c[-i]*xs, c1*(2*(nd - 1))) + c1 = hermadd(tmp, hermmulx(c1)*2) + return hermadd(c0, hermmulx(c1)*2) + + +def hermdiv(c1, c2): + """ + Divide one Hermite series by another. + + Returns the quotient-with-remainder of two Hermite series + `c1` / `c2`. The arguments are sequences of coefficients from lowest + order "term" to highest, e.g., [1,2,3] represents the series + ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Hermite series coefficients ordered from low to + high. + + Returns + ------- + [quo, rem] : ndarrays + Of Hermite series coefficients representing the quotient and + remainder. + + See Also + -------- + hermadd, hermsub, hermmulx, hermmul, hermpow + + Notes + ----- + In general, the (polynomial) division of one Hermite series by another + results in quotient and remainder terms that are not in the Hermite + polynomial basis set. Thus, to express these results as a Hermite + series, it is necessary to "reproject" the results onto the Hermite + basis set, which may produce "unintuitive" (but correct) results; see + Examples section below. + + Examples + -------- + >>> from numpy.polynomial.hermite import hermdiv + >>> hermdiv([ 52., 29., 52., 7., 6.], [0, 1, 2]) + (array([1., 2., 3.]), array([0.])) + >>> hermdiv([ 54., 31., 52., 7., 6.], [0, 1, 2]) + (array([1., 2., 3.]), array([2., 2.])) + >>> hermdiv([ 53., 30., 52., 7., 6.], [0, 1, 2]) + (array([1., 2., 3.]), array([1., 1.])) + + """ + return pu._div(hermmul, c1, c2) + + +def hermpow(c, pow, maxpower=16): + """Raise a Hermite series to a power. + + Returns the Hermite series `c` raised to the power `pow`. The + argument `c` is a sequence of coefficients ordered from low to high. + i.e., [1,2,3] is the series ``P_0 + 2*P_1 + 3*P_2.`` + + Parameters + ---------- + c : array_like + 1-D array of Hermite series coefficients ordered from low to + high. + pow : integer + Power to which the series will be raised + maxpower : integer, optional + Maximum power allowed. This is mainly to limit growth of the series + to unmanageable size. Default is 16 + + Returns + ------- + coef : ndarray + Hermite series of power. + + See Also + -------- + hermadd, hermsub, hermmulx, hermmul, hermdiv + + Examples + -------- + >>> from numpy.polynomial.hermite import hermpow + >>> hermpow([1, 2, 3], 2) + array([81., 52., 82., 12., 9.]) + + """ + return pu._pow(hermmul, c, pow, maxpower) + + +def hermder(c, m=1, scl=1, axis=0): + """ + Differentiate a Hermite series. + + Returns the Hermite series coefficients `c` differentiated `m` times + along `axis`. At each iteration the result is multiplied by `scl` (the + scaling factor is for use in a linear change of variable). The argument + `c` is an array of coefficients from low to high degree along each + axis, e.g., [1,2,3] represents the series ``1*H_0 + 2*H_1 + 3*H_2`` + while [[1,2],[1,2]] represents ``1*H_0(x)*H_0(y) + 1*H_1(x)*H_0(y) + + 2*H_0(x)*H_1(y) + 2*H_1(x)*H_1(y)`` if axis=0 is ``x`` and axis=1 is + ``y``. + + Parameters + ---------- + c : array_like + Array of Hermite series coefficients. If `c` is multidimensional the + different axis correspond to different variables with the degree in + each axis given by the corresponding index. + m : int, optional + Number of derivatives taken, must be non-negative. (Default: 1) + scl : scalar, optional + Each differentiation is multiplied by `scl`. The end result is + multiplication by ``scl**m``. This is for use in a linear change of + variable. (Default: 1) + axis : int, optional + Axis over which the derivative is taken. (Default: 0). + + .. versionadded:: 1.7.0 + + Returns + ------- + der : ndarray + Hermite series of the derivative. + + See Also + -------- + hermint + + Notes + ----- + In general, the result of differentiating a Hermite series does not + resemble the same operation on a power series. Thus the result of this + function may be "unintuitive," albeit correct; see Examples section + below. + + Examples + -------- + >>> from numpy.polynomial.hermite import hermder + >>> hermder([ 1. , 0.5, 0.5, 0.5]) + array([1., 2., 3.]) + >>> hermder([-0.5, 1./2., 1./8., 1./12., 1./16.], m=2) + array([1., 2., 3.]) + + """ + c = np.array(c, ndmin=1, copy=True) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + cnt = pu._deprecate_as_int(m, "the order of derivation") + iaxis = pu._deprecate_as_int(axis, "the axis") + if cnt < 0: + raise ValueError("The order of derivation must be non-negative") + iaxis = normalize_axis_index(iaxis, c.ndim) + + if cnt == 0: + return c + + c = np.moveaxis(c, iaxis, 0) + n = len(c) + if cnt >= n: + c = c[:1]*0 + else: + for i in range(cnt): + n = n - 1 + c *= scl + der = np.empty((n,) + c.shape[1:], dtype=c.dtype) + for j in range(n, 0, -1): + der[j - 1] = (2*j)*c[j] + c = der + c = np.moveaxis(c, 0, iaxis) + return c + + +def hermint(c, m=1, k=[], lbnd=0, scl=1, axis=0): + """ + Integrate a Hermite series. + + Returns the Hermite series coefficients `c` integrated `m` times from + `lbnd` along `axis`. At each iteration the resulting series is + **multiplied** by `scl` and an integration constant, `k`, is added. + The scaling factor is for use in a linear change of variable. ("Buyer + beware": note that, depending on what one is doing, one may want `scl` + to be the reciprocal of what one might expect; for more information, + see the Notes section below.) The argument `c` is an array of + coefficients from low to high degree along each axis, e.g., [1,2,3] + represents the series ``H_0 + 2*H_1 + 3*H_2`` while [[1,2],[1,2]] + represents ``1*H_0(x)*H_0(y) + 1*H_1(x)*H_0(y) + 2*H_0(x)*H_1(y) + + 2*H_1(x)*H_1(y)`` if axis=0 is ``x`` and axis=1 is ``y``. + + Parameters + ---------- + c : array_like + Array of Hermite series coefficients. If c is multidimensional the + different axis correspond to different variables with the degree in + each axis given by the corresponding index. + m : int, optional + Order of integration, must be positive. (Default: 1) + k : {[], list, scalar}, optional + Integration constant(s). The value of the first integral at + ``lbnd`` is the first value in the list, the value of the second + integral at ``lbnd`` is the second value, etc. If ``k == []`` (the + default), all constants are set to zero. If ``m == 1``, a single + scalar can be given instead of a list. + lbnd : scalar, optional + The lower bound of the integral. (Default: 0) + scl : scalar, optional + Following each integration the result is *multiplied* by `scl` + before the integration constant is added. (Default: 1) + axis : int, optional + Axis over which the integral is taken. (Default: 0). + + .. versionadded:: 1.7.0 + + Returns + ------- + S : ndarray + Hermite series coefficients of the integral. + + Raises + ------ + ValueError + If ``m < 0``, ``len(k) > m``, ``np.ndim(lbnd) != 0``, or + ``np.ndim(scl) != 0``. + + See Also + -------- + hermder + + Notes + ----- + Note that the result of each integration is *multiplied* by `scl`. + Why is this important to note? Say one is making a linear change of + variable :math:`u = ax + b` in an integral relative to `x`. Then + :math:`dx = du/a`, so one will need to set `scl` equal to + :math:`1/a` - perhaps not what one would have first thought. + + Also note that, in general, the result of integrating a C-series needs + to be "reprojected" onto the C-series basis set. Thus, typically, + the result of this function is "unintuitive," albeit correct; see + Examples section below. + + Examples + -------- + >>> from numpy.polynomial.hermite import hermint + >>> hermint([1,2,3]) # integrate once, value 0 at 0. + array([1. , 0.5, 0.5, 0.5]) + >>> hermint([1,2,3], m=2) # integrate twice, value & deriv 0 at 0 + array([-0.5 , 0.5 , 0.125 , 0.08333333, 0.0625 ]) # may vary + >>> hermint([1,2,3], k=1) # integrate once, value 1 at 0. + array([2. , 0.5, 0.5, 0.5]) + >>> hermint([1,2,3], lbnd=-1) # integrate once, value 0 at -1 + array([-2. , 0.5, 0.5, 0.5]) + >>> hermint([1,2,3], m=2, k=[1,2], lbnd=-1) + array([ 1.66666667, -0.5 , 0.125 , 0.08333333, 0.0625 ]) # may vary + + """ + c = np.array(c, ndmin=1, copy=True) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + if not np.iterable(k): + k = [k] + cnt = pu._deprecate_as_int(m, "the order of integration") + iaxis = pu._deprecate_as_int(axis, "the axis") + if cnt < 0: + raise ValueError("The order of integration must be non-negative") + if len(k) > cnt: + raise ValueError("Too many integration constants") + if np.ndim(lbnd) != 0: + raise ValueError("lbnd must be a scalar.") + if np.ndim(scl) != 0: + raise ValueError("scl must be a scalar.") + iaxis = normalize_axis_index(iaxis, c.ndim) + + if cnt == 0: + return c + + c = np.moveaxis(c, iaxis, 0) + k = list(k) + [0]*(cnt - len(k)) + for i in range(cnt): + n = len(c) + c *= scl + if n == 1 and np.all(c[0] == 0): + c[0] += k[i] + else: + tmp = np.empty((n + 1,) + c.shape[1:], dtype=c.dtype) + tmp[0] = c[0]*0 + tmp[1] = c[0]/2 + for j in range(1, n): + tmp[j + 1] = c[j]/(2*(j + 1)) + tmp[0] += k[i] - hermval(lbnd, tmp) + c = tmp + c = np.moveaxis(c, 0, iaxis) + return c + + +def hermval(x, c, tensor=True): + """ + Evaluate an Hermite series at points x. + + If `c` is of length `n + 1`, this function returns the value: + + .. math:: p(x) = c_0 * H_0(x) + c_1 * H_1(x) + ... + c_n * H_n(x) + + The parameter `x` is converted to an array only if it is a tuple or a + list, otherwise it is treated as a scalar. In either case, either `x` + or its elements must support multiplication and addition both with + themselves and with the elements of `c`. + + If `c` is a 1-D array, then `p(x)` will have the same shape as `x`. If + `c` is multidimensional, then the shape of the result depends on the + value of `tensor`. If `tensor` is true the shape will be c.shape[1:] + + x.shape. If `tensor` is false the shape will be c.shape[1:]. Note that + scalars have shape (,). + + Trailing zeros in the coefficients will be used in the evaluation, so + they should be avoided if efficiency is a concern. + + Parameters + ---------- + x : array_like, compatible object + If `x` is a list or tuple, it is converted to an ndarray, otherwise + it is left unchanged and treated as a scalar. In either case, `x` + or its elements must support addition and multiplication with + with themselves and with the elements of `c`. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree n are contained in c[n]. If `c` is multidimensional the + remaining indices enumerate multiple polynomials. In the two + dimensional case the coefficients may be thought of as stored in + the columns of `c`. + tensor : boolean, optional + If True, the shape of the coefficient array is extended with ones + on the right, one for each dimension of `x`. Scalars have dimension 0 + for this action. The result is that every column of coefficients in + `c` is evaluated for every element of `x`. If False, `x` is broadcast + over the columns of `c` for the evaluation. This keyword is useful + when `c` is multidimensional. The default value is True. + + .. versionadded:: 1.7.0 + + Returns + ------- + values : ndarray, algebra_like + The shape of the return value is described above. + + See Also + -------- + hermval2d, hermgrid2d, hermval3d, hermgrid3d + + Notes + ----- + The evaluation uses Clenshaw recursion, aka synthetic division. + + Examples + -------- + >>> from numpy.polynomial.hermite import hermval + >>> coef = [1,2,3] + >>> hermval(1, coef) + 11.0 + >>> hermval([[1,2],[3,4]], coef) + array([[ 11., 51.], + [115., 203.]]) + + """ + c = np.array(c, ndmin=1, copy=False) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + if isinstance(x, (tuple, list)): + x = np.asarray(x) + if isinstance(x, np.ndarray) and tensor: + c = c.reshape(c.shape + (1,)*x.ndim) + + x2 = x*2 + if len(c) == 1: + c0 = c[0] + c1 = 0 + elif len(c) == 2: + c0 = c[0] + c1 = c[1] + else: + nd = len(c) + c0 = c[-2] + c1 = c[-1] + for i in range(3, len(c) + 1): + tmp = c0 + nd = nd - 1 + c0 = c[-i] - c1*(2*(nd - 1)) + c1 = tmp + c1*x2 + return c0 + c1*x2 + + +def hermval2d(x, y, c): + """ + Evaluate a 2-D Hermite series at points (x, y). + + This function returns the values: + + .. math:: p(x,y) = \\sum_{i,j} c_{i,j} * H_i(x) * H_j(y) + + The parameters `x` and `y` are converted to arrays only if they are + tuples or a lists, otherwise they are treated as a scalars and they + must have the same shape after conversion. In either case, either `x` + and `y` or their elements must support multiplication and addition both + with themselves and with the elements of `c`. + + If `c` is a 1-D array a one is implicitly appended to its shape to make + it 2-D. The shape of the result will be c.shape[2:] + x.shape. + + Parameters + ---------- + x, y : array_like, compatible objects + The two dimensional series is evaluated at the points `(x, y)`, + where `x` and `y` must have the same shape. If `x` or `y` is a list + or tuple, it is first converted to an ndarray, otherwise it is left + unchanged and if it isn't an ndarray it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term + of multi-degree i,j is contained in ``c[i,j]``. If `c` has + dimension greater than two the remaining indices enumerate multiple + sets of coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional polynomial at points formed with + pairs of corresponding values from `x` and `y`. + + See Also + -------- + hermval, hermgrid2d, hermval3d, hermgrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._valnd(hermval, c, x, y) + + +def hermgrid2d(x, y, c): + """ + Evaluate a 2-D Hermite series on the Cartesian product of x and y. + + This function returns the values: + + .. math:: p(a,b) = \\sum_{i,j} c_{i,j} * H_i(a) * H_j(b) + + where the points `(a, b)` consist of all pairs formed by taking + `a` from `x` and `b` from `y`. The resulting points form a grid with + `x` in the first dimension and `y` in the second. + + The parameters `x` and `y` are converted to arrays only if they are + tuples or a lists, otherwise they are treated as a scalars. In either + case, either `x` and `y` or their elements must support multiplication + and addition both with themselves and with the elements of `c`. + + If `c` has fewer than two dimensions, ones are implicitly appended to + its shape to make it 2-D. The shape of the result will be c.shape[2:] + + x.shape. + + Parameters + ---------- + x, y : array_like, compatible objects + The two dimensional series is evaluated at the points in the + Cartesian product of `x` and `y`. If `x` or `y` is a list or + tuple, it is first converted to an ndarray, otherwise it is left + unchanged and, if it isn't an ndarray, it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree i,j are contained in ``c[i,j]``. If `c` has dimension + greater than two the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional polynomial at points in the Cartesian + product of `x` and `y`. + + See Also + -------- + hermval, hermval2d, hermval3d, hermgrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._gridnd(hermval, c, x, y) + + +def hermval3d(x, y, z, c): + """ + Evaluate a 3-D Hermite series at points (x, y, z). + + This function returns the values: + + .. math:: p(x,y,z) = \\sum_{i,j,k} c_{i,j,k} * H_i(x) * H_j(y) * H_k(z) + + The parameters `x`, `y`, and `z` are converted to arrays only if + they are tuples or a lists, otherwise they are treated as a scalars and + they must have the same shape after conversion. In either case, either + `x`, `y`, and `z` or their elements must support multiplication and + addition both with themselves and with the elements of `c`. + + If `c` has fewer than 3 dimensions, ones are implicitly appended to its + shape to make it 3-D. The shape of the result will be c.shape[3:] + + x.shape. + + Parameters + ---------- + x, y, z : array_like, compatible object + The three dimensional series is evaluated at the points + `(x, y, z)`, where `x`, `y`, and `z` must have the same shape. If + any of `x`, `y`, or `z` is a list or tuple, it is first converted + to an ndarray, otherwise it is left unchanged and if it isn't an + ndarray it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term of + multi-degree i,j,k is contained in ``c[i,j,k]``. If `c` has dimension + greater than 3 the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the multidimensional polynomial on points formed with + triples of corresponding values from `x`, `y`, and `z`. + + See Also + -------- + hermval, hermval2d, hermgrid2d, hermgrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._valnd(hermval, c, x, y, z) + + +def hermgrid3d(x, y, z, c): + """ + Evaluate a 3-D Hermite series on the Cartesian product of x, y, and z. + + This function returns the values: + + .. math:: p(a,b,c) = \\sum_{i,j,k} c_{i,j,k} * H_i(a) * H_j(b) * H_k(c) + + where the points `(a, b, c)` consist of all triples formed by taking + `a` from `x`, `b` from `y`, and `c` from `z`. The resulting points form + a grid with `x` in the first dimension, `y` in the second, and `z` in + the third. + + The parameters `x`, `y`, and `z` are converted to arrays only if they + are tuples or a lists, otherwise they are treated as a scalars. In + either case, either `x`, `y`, and `z` or their elements must support + multiplication and addition both with themselves and with the elements + of `c`. + + If `c` has fewer than three dimensions, ones are implicitly appended to + its shape to make it 3-D. The shape of the result will be c.shape[3:] + + x.shape + y.shape + z.shape. + + Parameters + ---------- + x, y, z : array_like, compatible objects + The three dimensional series is evaluated at the points in the + Cartesian product of `x`, `y`, and `z`. If `x`,`y`, or `z` is a + list or tuple, it is first converted to an ndarray, otherwise it is + left unchanged and, if it isn't an ndarray, it is treated as a + scalar. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree i,j are contained in ``c[i,j]``. If `c` has dimension + greater than two the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional polynomial at points in the Cartesian + product of `x` and `y`. + + See Also + -------- + hermval, hermval2d, hermgrid2d, hermval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._gridnd(hermval, c, x, y, z) + + +def hermvander(x, deg): + """Pseudo-Vandermonde matrix of given degree. + + Returns the pseudo-Vandermonde matrix of degree `deg` and sample points + `x`. The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., i] = H_i(x), + + where `0 <= i <= deg`. The leading indices of `V` index the elements of + `x` and the last index is the degree of the Hermite polynomial. + + If `c` is a 1-D array of coefficients of length `n + 1` and `V` is the + array ``V = hermvander(x, n)``, then ``np.dot(V, c)`` and + ``hermval(x, c)`` are the same up to roundoff. This equivalence is + useful both for least squares fitting and for the evaluation of a large + number of Hermite series of the same degree and sample points. + + Parameters + ---------- + x : array_like + Array of points. The dtype is converted to float64 or complex128 + depending on whether any of the elements are complex. If `x` is + scalar it is converted to a 1-D array. + deg : int + Degree of the resulting matrix. + + Returns + ------- + vander : ndarray + The pseudo-Vandermonde matrix. The shape of the returned matrix is + ``x.shape + (deg + 1,)``, where The last index is the degree of the + corresponding Hermite polynomial. The dtype will be the same as + the converted `x`. + + Examples + -------- + >>> from numpy.polynomial.hermite import hermvander + >>> x = np.array([-1, 0, 1]) + >>> hermvander(x, 3) + array([[ 1., -2., 2., 4.], + [ 1., 0., -2., -0.], + [ 1., 2., 2., -4.]]) + + """ + ideg = pu._deprecate_as_int(deg, "deg") + if ideg < 0: + raise ValueError("deg must be non-negative") + + x = np.array(x, copy=False, ndmin=1) + 0.0 + dims = (ideg + 1,) + x.shape + dtyp = x.dtype + v = np.empty(dims, dtype=dtyp) + v[0] = x*0 + 1 + if ideg > 0: + x2 = x*2 + v[1] = x2 + for i in range(2, ideg + 1): + v[i] = (v[i-1]*x2 - v[i-2]*(2*(i - 1))) + return np.moveaxis(v, 0, -1) + + +def hermvander2d(x, y, deg): + """Pseudo-Vandermonde matrix of given degrees. + + Returns the pseudo-Vandermonde matrix of degrees `deg` and sample + points `(x, y)`. The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., (deg[1] + 1)*i + j] = H_i(x) * H_j(y), + + where `0 <= i <= deg[0]` and `0 <= j <= deg[1]`. The leading indices of + `V` index the points `(x, y)` and the last index encodes the degrees of + the Hermite polynomials. + + If ``V = hermvander2d(x, y, [xdeg, ydeg])``, then the columns of `V` + correspond to the elements of a 2-D coefficient array `c` of shape + (xdeg + 1, ydeg + 1) in the order + + .. math:: c_{00}, c_{01}, c_{02} ... , c_{10}, c_{11}, c_{12} ... + + and ``np.dot(V, c.flat)`` and ``hermval2d(x, y, c)`` will be the same + up to roundoff. This equivalence is useful both for least squares + fitting and for the evaluation of a large number of 2-D Hermite + series of the same degrees and sample points. + + Parameters + ---------- + x, y : array_like + Arrays of point coordinates, all of the same shape. The dtypes + will be converted to either float64 or complex128 depending on + whether any of the elements are complex. Scalars are converted to 1-D + arrays. + deg : list of ints + List of maximum degrees of the form [x_deg, y_deg]. + + Returns + ------- + vander2d : ndarray + The shape of the returned matrix is ``x.shape + (order,)``, where + :math:`order = (deg[0]+1)*(deg[1]+1)`. The dtype will be the same + as the converted `x` and `y`. + + See Also + -------- + hermvander, hermvander3d, hermval2d, hermval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._vander_nd_flat((hermvander, hermvander), (x, y), deg) + + +def hermvander3d(x, y, z, deg): + """Pseudo-Vandermonde matrix of given degrees. + + Returns the pseudo-Vandermonde matrix of degrees `deg` and sample + points `(x, y, z)`. If `l, m, n` are the given degrees in `x, y, z`, + then The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., (m+1)(n+1)i + (n+1)j + k] = H_i(x)*H_j(y)*H_k(z), + + where `0 <= i <= l`, `0 <= j <= m`, and `0 <= j <= n`. The leading + indices of `V` index the points `(x, y, z)` and the last index encodes + the degrees of the Hermite polynomials. + + If ``V = hermvander3d(x, y, z, [xdeg, ydeg, zdeg])``, then the columns + of `V` correspond to the elements of a 3-D coefficient array `c` of + shape (xdeg + 1, ydeg + 1, zdeg + 1) in the order + + .. math:: c_{000}, c_{001}, c_{002},... , c_{010}, c_{011}, c_{012},... + + and ``np.dot(V, c.flat)`` and ``hermval3d(x, y, z, c)`` will be the + same up to roundoff. This equivalence is useful both for least squares + fitting and for the evaluation of a large number of 3-D Hermite + series of the same degrees and sample points. + + Parameters + ---------- + x, y, z : array_like + Arrays of point coordinates, all of the same shape. The dtypes will + be converted to either float64 or complex128 depending on whether + any of the elements are complex. Scalars are converted to 1-D + arrays. + deg : list of ints + List of maximum degrees of the form [x_deg, y_deg, z_deg]. + + Returns + ------- + vander3d : ndarray + The shape of the returned matrix is ``x.shape + (order,)``, where + :math:`order = (deg[0]+1)*(deg[1]+1)*(deg[2]+1)`. The dtype will + be the same as the converted `x`, `y`, and `z`. + + See Also + -------- + hermvander, hermvander3d, hermval2d, hermval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._vander_nd_flat((hermvander, hermvander, hermvander), (x, y, z), deg) + + +def hermfit(x, y, deg, rcond=None, full=False, w=None): + """ + Least squares fit of Hermite series to data. + + Return the coefficients of a Hermite series of degree `deg` that is the + least squares fit to the data values `y` given at points `x`. If `y` is + 1-D the returned coefficients will also be 1-D. If `y` is 2-D multiple + fits are done, one for each column of `y`, and the resulting + coefficients are stored in the corresponding columns of a 2-D return. + The fitted polynomial(s) are in the form + + .. math:: p(x) = c_0 + c_1 * H_1(x) + ... + c_n * H_n(x), + + where `n` is `deg`. + + Parameters + ---------- + x : array_like, shape (M,) + x-coordinates of the M sample points ``(x[i], y[i])``. + y : array_like, shape (M,) or (M, K) + y-coordinates of the sample points. Several data sets of sample + points sharing the same x-coordinates can be fitted at once by + passing in a 2D-array that contains one dataset per column. + deg : int or 1-D array_like + Degree(s) of the fitting polynomials. If `deg` is a single integer + all terms up to and including the `deg`'th term are included in the + fit. For NumPy versions >= 1.11.0 a list of integers specifying the + degrees of the terms to include may be used instead. + rcond : float, optional + Relative condition number of the fit. Singular values smaller than + this relative to the largest singular value will be ignored. The + default value is len(x)*eps, where eps is the relative precision of + the float type, about 2e-16 in most cases. + full : bool, optional + Switch determining nature of return value. When it is False (the + default) just the coefficients are returned, when True diagnostic + information from the singular value decomposition is also returned. + w : array_like, shape (`M`,), optional + Weights. If not None, the weight ``w[i]`` applies to the unsquared + residual ``y[i] - y_hat[i]`` at ``x[i]``. Ideally the weights are + chosen so that the errors of the products ``w[i]*y[i]`` all have the + same variance. When using inverse-variance weighting, use + ``w[i] = 1/sigma(y[i])``. The default value is None. + + Returns + ------- + coef : ndarray, shape (M,) or (M, K) + Hermite coefficients ordered from low to high. If `y` was 2-D, + the coefficients for the data in column k of `y` are in column + `k`. + + [residuals, rank, singular_values, rcond] : list + These values are only returned if ``full == True`` + + - residuals -- sum of squared residuals of the least squares fit + - rank -- the numerical rank of the scaled Vandermonde matrix + - singular_values -- singular values of the scaled Vandermonde matrix + - rcond -- value of `rcond`. + + For more details, see `numpy.linalg.lstsq`. + + Warns + ----- + RankWarning + The rank of the coefficient matrix in the least-squares fit is + deficient. The warning is only raised if ``full == False``. The + warnings can be turned off by + + >>> import warnings + >>> warnings.simplefilter('ignore', np.RankWarning) + + See Also + -------- + numpy.polynomial.chebyshev.chebfit + numpy.polynomial.legendre.legfit + numpy.polynomial.laguerre.lagfit + numpy.polynomial.polynomial.polyfit + numpy.polynomial.hermite_e.hermefit + hermval : Evaluates a Hermite series. + hermvander : Vandermonde matrix of Hermite series. + hermweight : Hermite weight function + numpy.linalg.lstsq : Computes a least-squares fit from the matrix. + scipy.interpolate.UnivariateSpline : Computes spline fits. + + Notes + ----- + The solution is the coefficients of the Hermite series `p` that + minimizes the sum of the weighted squared errors + + .. math:: E = \\sum_j w_j^2 * |y_j - p(x_j)|^2, + + where the :math:`w_j` are the weights. This problem is solved by + setting up the (typically) overdetermined matrix equation + + .. math:: V(x) * c = w * y, + + where `V` is the weighted pseudo Vandermonde matrix of `x`, `c` are the + coefficients to be solved for, `w` are the weights, `y` are the + observed values. This equation is then solved using the singular value + decomposition of `V`. + + If some of the singular values of `V` are so small that they are + neglected, then a `RankWarning` will be issued. This means that the + coefficient values may be poorly determined. Using a lower order fit + will usually get rid of the warning. The `rcond` parameter can also be + set to a value smaller than its default, but the resulting fit may be + spurious and have large contributions from roundoff error. + + Fits using Hermite series are probably most useful when the data can be + approximated by ``sqrt(w(x)) * p(x)``, where `w(x)` is the Hermite + weight. In that case the weight ``sqrt(w(x[i]))`` should be used + together with data values ``y[i]/sqrt(w(x[i]))``. The weight function is + available as `hermweight`. + + References + ---------- + .. [1] Wikipedia, "Curve fitting", + https://en.wikipedia.org/wiki/Curve_fitting + + Examples + -------- + >>> from numpy.polynomial.hermite import hermfit, hermval + >>> x = np.linspace(-10, 10) + >>> err = np.random.randn(len(x))/10 + >>> y = hermval(x, [1, 2, 3]) + err + >>> hermfit(x, y, 2) + array([1.0218, 1.9986, 2.9999]) # may vary + + """ + return pu._fit(hermvander, x, y, deg, rcond, full, w) + + +def hermcompanion(c): + """Return the scaled companion matrix of c. + + The basis polynomials are scaled so that the companion matrix is + symmetric when `c` is an Hermite basis polynomial. This provides + better eigenvalue estimates than the unscaled case and for basis + polynomials the eigenvalues are guaranteed to be real if + `numpy.linalg.eigvalsh` is used to obtain them. + + Parameters + ---------- + c : array_like + 1-D array of Hermite series coefficients ordered from low to high + degree. + + Returns + ------- + mat : ndarray + Scaled companion matrix of dimensions (deg, deg). + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + if len(c) < 2: + raise ValueError('Series must have maximum degree of at least 1.') + if len(c) == 2: + return np.array([[-.5*c[0]/c[1]]]) + + n = len(c) - 1 + mat = np.zeros((n, n), dtype=c.dtype) + scl = np.hstack((1., 1./np.sqrt(2.*np.arange(n - 1, 0, -1)))) + scl = np.multiply.accumulate(scl)[::-1] + top = mat.reshape(-1)[1::n+1] + bot = mat.reshape(-1)[n::n+1] + top[...] = np.sqrt(.5*np.arange(1, n)) + bot[...] = top + mat[:, -1] -= scl*c[:-1]/(2.0*c[-1]) + return mat + + +def hermroots(c): + """ + Compute the roots of a Hermite series. + + Return the roots (a.k.a. "zeros") of the polynomial + + .. math:: p(x) = \\sum_i c[i] * H_i(x). + + Parameters + ---------- + c : 1-D array_like + 1-D array of coefficients. + + Returns + ------- + out : ndarray + Array of the roots of the series. If all the roots are real, + then `out` is also real, otherwise it is complex. + + See Also + -------- + numpy.polynomial.polynomial.polyroots + numpy.polynomial.legendre.legroots + numpy.polynomial.laguerre.lagroots + numpy.polynomial.chebyshev.chebroots + numpy.polynomial.hermite_e.hermeroots + + Notes + ----- + The root estimates are obtained as the eigenvalues of the companion + matrix, Roots far from the origin of the complex plane may have large + errors due to the numerical instability of the series for such + values. Roots with multiplicity greater than 1 will also show larger + errors as the value of the series near such points is relatively + insensitive to errors in the roots. Isolated roots near the origin can + be improved by a few iterations of Newton's method. + + The Hermite series basis polynomials aren't powers of `x` so the + results of this function may seem unintuitive. + + Examples + -------- + >>> from numpy.polynomial.hermite import hermroots, hermfromroots + >>> coef = hermfromroots([-1, 0, 1]) + >>> coef + array([0. , 0.25 , 0. , 0.125]) + >>> hermroots(coef) + array([-1.00000000e+00, -1.38777878e-17, 1.00000000e+00]) + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + if len(c) <= 1: + return np.array([], dtype=c.dtype) + if len(c) == 2: + return np.array([-.5*c[0]/c[1]]) + + # rotated companion matrix reduces error + m = hermcompanion(c)[::-1,::-1] + r = la.eigvals(m) + r.sort() + return r + + +def _normed_hermite_n(x, n): + """ + Evaluate a normalized Hermite polynomial. + + Compute the value of the normalized Hermite polynomial of degree ``n`` + at the points ``x``. + + + Parameters + ---------- + x : ndarray of double. + Points at which to evaluate the function + n : int + Degree of the normalized Hermite function to be evaluated. + + Returns + ------- + values : ndarray + The shape of the return value is described above. + + Notes + ----- + .. versionadded:: 1.10.0 + + This function is needed for finding the Gauss points and integration + weights for high degrees. The values of the standard Hermite functions + overflow when n >= 207. + + """ + if n == 0: + return np.full(x.shape, 1/np.sqrt(np.sqrt(np.pi))) + + c0 = 0. + c1 = 1./np.sqrt(np.sqrt(np.pi)) + nd = float(n) + for i in range(n - 1): + tmp = c0 + c0 = -c1*np.sqrt((nd - 1.)/nd) + c1 = tmp + c1*x*np.sqrt(2./nd) + nd = nd - 1.0 + return c0 + c1*x*np.sqrt(2) + + +def hermgauss(deg): + """ + Gauss-Hermite quadrature. + + Computes the sample points and weights for Gauss-Hermite quadrature. + These sample points and weights will correctly integrate polynomials of + degree :math:`2*deg - 1` or less over the interval :math:`[-\\inf, \\inf]` + with the weight function :math:`f(x) = \\exp(-x^2)`. + + Parameters + ---------- + deg : int + Number of sample points and weights. It must be >= 1. + + Returns + ------- + x : ndarray + 1-D ndarray containing the sample points. + y : ndarray + 1-D ndarray containing the weights. + + Notes + ----- + + .. versionadded:: 1.7.0 + + The results have only been tested up to degree 100, higher degrees may + be problematic. The weights are determined by using the fact that + + .. math:: w_k = c / (H'_n(x_k) * H_{n-1}(x_k)) + + where :math:`c` is a constant independent of :math:`k` and :math:`x_k` + is the k'th root of :math:`H_n`, and then scaling the results to get + the right value when integrating 1. + + """ + ideg = pu._deprecate_as_int(deg, "deg") + if ideg <= 0: + raise ValueError("deg must be a positive integer") + + # first approximation of roots. We use the fact that the companion + # matrix is symmetric in this case in order to obtain better zeros. + c = np.array([0]*deg + [1], dtype=np.float64) + m = hermcompanion(c) + x = la.eigvalsh(m) + + # improve roots by one application of Newton + dy = _normed_hermite_n(x, ideg) + df = _normed_hermite_n(x, ideg - 1) * np.sqrt(2*ideg) + x -= dy/df + + # compute the weights. We scale the factor to avoid possible numerical + # overflow. + fm = _normed_hermite_n(x, ideg - 1) + fm /= np.abs(fm).max() + w = 1/(fm * fm) + + # for Hermite we can also symmetrize + w = (w + w[::-1])/2 + x = (x - x[::-1])/2 + + # scale w to get the right value + w *= np.sqrt(np.pi) / w.sum() + + return x, w + + +def hermweight(x): + """ + Weight function of the Hermite polynomials. + + The weight function is :math:`\\exp(-x^2)` and the interval of + integration is :math:`[-\\inf, \\inf]`. the Hermite polynomials are + orthogonal, but not normalized, with respect to this weight function. + + Parameters + ---------- + x : array_like + Values at which the weight function will be computed. + + Returns + ------- + w : ndarray + The weight function at `x`. + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + w = np.exp(-x**2) + return w + + +# +# Hermite series class +# + +class Hermite(ABCPolyBase): + """An Hermite series class. + + The Hermite class provides the standard Python numerical methods + '+', '-', '*', '//', '%', 'divmod', '**', and '()' as well as the + attributes and methods listed in the `ABCPolyBase` documentation. + + Parameters + ---------- + coef : array_like + Hermite coefficients in order of increasing degree, i.e, + ``(1, 2, 3)`` gives ``1*H_0(x) + 2*H_1(X) + 3*H_2(x)``. + domain : (2,) array_like, optional + Domain to use. The interval ``[domain[0], domain[1]]`` is mapped + to the interval ``[window[0], window[1]]`` by shifting and scaling. + The default value is [-1, 1]. + window : (2,) array_like, optional + Window, see `domain` for its use. The default value is [-1, 1]. + + .. versionadded:: 1.6.0 + + """ + # Virtual Functions + _add = staticmethod(hermadd) + _sub = staticmethod(hermsub) + _mul = staticmethod(hermmul) + _div = staticmethod(hermdiv) + _pow = staticmethod(hermpow) + _val = staticmethod(hermval) + _int = staticmethod(hermint) + _der = staticmethod(hermder) + _fit = staticmethod(hermfit) + _line = staticmethod(hermline) + _roots = staticmethod(hermroots) + _fromroots = staticmethod(hermfromroots) + + # Virtual properties + domain = np.array(hermdomain) + window = np.array(hermdomain) + basis_name = 'H' diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/hermite.pyi b/.local/lib/python3.8/site-packages/numpy/polynomial/hermite.pyi new file mode 100644 index 0000000..8364a5b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/hermite.pyi @@ -0,0 +1,46 @@ +from typing import Any, List + +from numpy import ndarray, dtype, int_, float_ +from numpy.polynomial._polybase import ABCPolyBase +from numpy.polynomial.polyutils import trimcoef + +__all__: list[str] + +hermtrim = trimcoef + +def poly2herm(pol): ... +def herm2poly(c): ... + +hermdomain: ndarray[Any, dtype[int_]] +hermzero: ndarray[Any, dtype[int_]] +hermone: ndarray[Any, dtype[int_]] +hermx: ndarray[Any, dtype[float_]] + +def hermline(off, scl): ... +def hermfromroots(roots): ... +def hermadd(c1, c2): ... +def hermsub(c1, c2): ... +def hermmulx(c): ... +def hermmul(c1, c2): ... +def hermdiv(c1, c2): ... +def hermpow(c, pow, maxpower=...): ... +def hermder(c, m=..., scl=..., axis=...): ... +def hermint(c, m=..., k = ..., lbnd=..., scl=..., axis=...): ... +def hermval(x, c, tensor=...): ... +def hermval2d(x, y, c): ... +def hermgrid2d(x, y, c): ... +def hermval3d(x, y, z, c): ... +def hermgrid3d(x, y, z, c): ... +def hermvander(x, deg): ... +def hermvander2d(x, y, deg): ... +def hermvander3d(x, y, z, deg): ... +def hermfit(x, y, deg, rcond=..., full=..., w=...): ... +def hermcompanion(c): ... +def hermroots(c): ... +def hermgauss(deg): ... +def hermweight(x): ... + +class Hermite(ABCPolyBase): + domain: Any + window: Any + basis_name: Any diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/hermite_e.py b/.local/lib/python3.8/site-packages/numpy/polynomial/hermite_e.py new file mode 100644 index 0000000..182c562 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/hermite_e.py @@ -0,0 +1,1689 @@ +""" +=================================================================== +HermiteE Series, "Probabilists" (:mod:`numpy.polynomial.hermite_e`) +=================================================================== + +This module provides a number of objects (mostly functions) useful for +dealing with Hermite_e series, including a `HermiteE` class that +encapsulates the usual arithmetic operations. (General information +on how this module represents and works with such polynomials is in the +docstring for its "parent" sub-package, `numpy.polynomial`). + +Classes +------- +.. autosummary:: + :toctree: generated/ + + HermiteE + +Constants +--------- +.. autosummary:: + :toctree: generated/ + + hermedomain + hermezero + hermeone + hermex + +Arithmetic +---------- +.. autosummary:: + :toctree: generated/ + + hermeadd + hermesub + hermemulx + hermemul + hermediv + hermepow + hermeval + hermeval2d + hermeval3d + hermegrid2d + hermegrid3d + +Calculus +-------- +.. autosummary:: + :toctree: generated/ + + hermeder + hermeint + +Misc Functions +-------------- +.. autosummary:: + :toctree: generated/ + + hermefromroots + hermeroots + hermevander + hermevander2d + hermevander3d + hermegauss + hermeweight + hermecompanion + hermefit + hermetrim + hermeline + herme2poly + poly2herme + +See also +-------- +`numpy.polynomial` + +""" +import numpy as np +import numpy.linalg as la +from numpy.core.multiarray import normalize_axis_index + +from . import polyutils as pu +from ._polybase import ABCPolyBase + +__all__ = [ + 'hermezero', 'hermeone', 'hermex', 'hermedomain', 'hermeline', + 'hermeadd', 'hermesub', 'hermemulx', 'hermemul', 'hermediv', + 'hermepow', 'hermeval', 'hermeder', 'hermeint', 'herme2poly', + 'poly2herme', 'hermefromroots', 'hermevander', 'hermefit', 'hermetrim', + 'hermeroots', 'HermiteE', 'hermeval2d', 'hermeval3d', 'hermegrid2d', + 'hermegrid3d', 'hermevander2d', 'hermevander3d', 'hermecompanion', + 'hermegauss', 'hermeweight'] + +hermetrim = pu.trimcoef + + +def poly2herme(pol): + """ + poly2herme(pol) + + Convert a polynomial to a Hermite series. + + Convert an array representing the coefficients of a polynomial (relative + to the "standard" basis) ordered from lowest degree to highest, to an + array of the coefficients of the equivalent Hermite series, ordered + from lowest to highest degree. + + Parameters + ---------- + pol : array_like + 1-D array containing the polynomial coefficients + + Returns + ------- + c : ndarray + 1-D array containing the coefficients of the equivalent Hermite + series. + + See Also + -------- + herme2poly + + Notes + ----- + The easy way to do conversions between polynomial basis sets + is to use the convert method of a class instance. + + Examples + -------- + >>> from numpy.polynomial.hermite_e import poly2herme + >>> poly2herme(np.arange(4)) + array([ 2., 10., 2., 3.]) + + """ + [pol] = pu.as_series([pol]) + deg = len(pol) - 1 + res = 0 + for i in range(deg, -1, -1): + res = hermeadd(hermemulx(res), pol[i]) + return res + + +def herme2poly(c): + """ + Convert a Hermite series to a polynomial. + + Convert an array representing the coefficients of a Hermite series, + ordered from lowest degree to highest, to an array of the coefficients + of the equivalent polynomial (relative to the "standard" basis) ordered + from lowest to highest degree. + + Parameters + ---------- + c : array_like + 1-D array containing the Hermite series coefficients, ordered + from lowest order term to highest. + + Returns + ------- + pol : ndarray + 1-D array containing the coefficients of the equivalent polynomial + (relative to the "standard" basis) ordered from lowest order term + to highest. + + See Also + -------- + poly2herme + + Notes + ----- + The easy way to do conversions between polynomial basis sets + is to use the convert method of a class instance. + + Examples + -------- + >>> from numpy.polynomial.hermite_e import herme2poly + >>> herme2poly([ 2., 10., 2., 3.]) + array([0., 1., 2., 3.]) + + """ + from .polynomial import polyadd, polysub, polymulx + + [c] = pu.as_series([c]) + n = len(c) + if n == 1: + return c + if n == 2: + return c + else: + c0 = c[-2] + c1 = c[-1] + # i is the current degree of c1 + for i in range(n - 1, 1, -1): + tmp = c0 + c0 = polysub(c[i - 2], c1*(i - 1)) + c1 = polyadd(tmp, polymulx(c1)) + return polyadd(c0, polymulx(c1)) + +# +# These are constant arrays are of integer type so as to be compatible +# with the widest range of other types, such as Decimal. +# + +# Hermite +hermedomain = np.array([-1, 1]) + +# Hermite coefficients representing zero. +hermezero = np.array([0]) + +# Hermite coefficients representing one. +hermeone = np.array([1]) + +# Hermite coefficients representing the identity x. +hermex = np.array([0, 1]) + + +def hermeline(off, scl): + """ + Hermite series whose graph is a straight line. + + Parameters + ---------- + off, scl : scalars + The specified line is given by ``off + scl*x``. + + Returns + ------- + y : ndarray + This module's representation of the Hermite series for + ``off + scl*x``. + + See Also + -------- + numpy.polynomial.polynomial.polyline + numpy.polynomial.chebyshev.chebline + numpy.polynomial.legendre.legline + numpy.polynomial.laguerre.lagline + numpy.polynomial.hermite.hermline + + Examples + -------- + >>> from numpy.polynomial.hermite_e import hermeline + >>> from numpy.polynomial.hermite_e import hermeline, hermeval + >>> hermeval(0,hermeline(3, 2)) + 3.0 + >>> hermeval(1,hermeline(3, 2)) + 5.0 + + """ + if scl != 0: + return np.array([off, scl]) + else: + return np.array([off]) + + +def hermefromroots(roots): + """ + Generate a HermiteE series with given roots. + + The function returns the coefficients of the polynomial + + .. math:: p(x) = (x - r_0) * (x - r_1) * ... * (x - r_n), + + in HermiteE form, where the `r_n` are the roots specified in `roots`. + If a zero has multiplicity n, then it must appear in `roots` n times. + For instance, if 2 is a root of multiplicity three and 3 is a root of + multiplicity 2, then `roots` looks something like [2, 2, 2, 3, 3]. The + roots can appear in any order. + + If the returned coefficients are `c`, then + + .. math:: p(x) = c_0 + c_1 * He_1(x) + ... + c_n * He_n(x) + + The coefficient of the last term is not generally 1 for monic + polynomials in HermiteE form. + + Parameters + ---------- + roots : array_like + Sequence containing the roots. + + Returns + ------- + out : ndarray + 1-D array of coefficients. If all roots are real then `out` is a + real array, if some of the roots are complex, then `out` is complex + even if all the coefficients in the result are real (see Examples + below). + + See Also + -------- + numpy.polynomial.polynomial.polyfromroots + numpy.polynomial.legendre.legfromroots + numpy.polynomial.laguerre.lagfromroots + numpy.polynomial.hermite.hermfromroots + numpy.polynomial.chebyshev.chebfromroots + + Examples + -------- + >>> from numpy.polynomial.hermite_e import hermefromroots, hermeval + >>> coef = hermefromroots((-1, 0, 1)) + >>> hermeval((-1, 0, 1), coef) + array([0., 0., 0.]) + >>> coef = hermefromroots((-1j, 1j)) + >>> hermeval((-1j, 1j), coef) + array([0.+0.j, 0.+0.j]) + + """ + return pu._fromroots(hermeline, hermemul, roots) + + +def hermeadd(c1, c2): + """ + Add one Hermite series to another. + + Returns the sum of two Hermite series `c1` + `c2`. The arguments + are sequences of coefficients ordered from lowest order term to + highest, i.e., [1,2,3] represents the series ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Hermite series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Array representing the Hermite series of their sum. + + See Also + -------- + hermesub, hermemulx, hermemul, hermediv, hermepow + + Notes + ----- + Unlike multiplication, division, etc., the sum of two Hermite series + is a Hermite series (without having to "reproject" the result onto + the basis set) so addition, just like that of "standard" polynomials, + is simply "component-wise." + + Examples + -------- + >>> from numpy.polynomial.hermite_e import hermeadd + >>> hermeadd([1, 2, 3], [1, 2, 3, 4]) + array([2., 4., 6., 4.]) + + """ + return pu._add(c1, c2) + + +def hermesub(c1, c2): + """ + Subtract one Hermite series from another. + + Returns the difference of two Hermite series `c1` - `c2`. The + sequences of coefficients are from lowest order term to highest, i.e., + [1,2,3] represents the series ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Hermite series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Of Hermite series coefficients representing their difference. + + See Also + -------- + hermeadd, hermemulx, hermemul, hermediv, hermepow + + Notes + ----- + Unlike multiplication, division, etc., the difference of two Hermite + series is a Hermite series (without having to "reproject" the result + onto the basis set) so subtraction, just like that of "standard" + polynomials, is simply "component-wise." + + Examples + -------- + >>> from numpy.polynomial.hermite_e import hermesub + >>> hermesub([1, 2, 3, 4], [1, 2, 3]) + array([0., 0., 0., 4.]) + + """ + return pu._sub(c1, c2) + + +def hermemulx(c): + """Multiply a Hermite series by x. + + Multiply the Hermite series `c` by x, where x is the independent + variable. + + + Parameters + ---------- + c : array_like + 1-D array of Hermite series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Array representing the result of the multiplication. + + Notes + ----- + The multiplication uses the recursion relationship for Hermite + polynomials in the form + + .. math:: + + xP_i(x) = (P_{i + 1}(x) + iP_{i - 1}(x))) + + Examples + -------- + >>> from numpy.polynomial.hermite_e import hermemulx + >>> hermemulx([1, 2, 3]) + array([2., 7., 2., 3.]) + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + # The zero series needs special treatment + if len(c) == 1 and c[0] == 0: + return c + + prd = np.empty(len(c) + 1, dtype=c.dtype) + prd[0] = c[0]*0 + prd[1] = c[0] + for i in range(1, len(c)): + prd[i + 1] = c[i] + prd[i - 1] += c[i]*i + return prd + + +def hermemul(c1, c2): + """ + Multiply one Hermite series by another. + + Returns the product of two Hermite series `c1` * `c2`. The arguments + are sequences of coefficients, from lowest order "term" to highest, + e.g., [1,2,3] represents the series ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Hermite series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Of Hermite series coefficients representing their product. + + See Also + -------- + hermeadd, hermesub, hermemulx, hermediv, hermepow + + Notes + ----- + In general, the (polynomial) product of two C-series results in terms + that are not in the Hermite polynomial basis set. Thus, to express + the product as a Hermite series, it is necessary to "reproject" the + product onto said basis set, which may produce "unintuitive" (but + correct) results; see Examples section below. + + Examples + -------- + >>> from numpy.polynomial.hermite_e import hermemul + >>> hermemul([1, 2, 3], [0, 1, 2]) + array([14., 15., 28., 7., 6.]) + + """ + # s1, s2 are trimmed copies + [c1, c2] = pu.as_series([c1, c2]) + + if len(c1) > len(c2): + c = c2 + xs = c1 + else: + c = c1 + xs = c2 + + if len(c) == 1: + c0 = c[0]*xs + c1 = 0 + elif len(c) == 2: + c0 = c[0]*xs + c1 = c[1]*xs + else: + nd = len(c) + c0 = c[-2]*xs + c1 = c[-1]*xs + for i in range(3, len(c) + 1): + tmp = c0 + nd = nd - 1 + c0 = hermesub(c[-i]*xs, c1*(nd - 1)) + c1 = hermeadd(tmp, hermemulx(c1)) + return hermeadd(c0, hermemulx(c1)) + + +def hermediv(c1, c2): + """ + Divide one Hermite series by another. + + Returns the quotient-with-remainder of two Hermite series + `c1` / `c2`. The arguments are sequences of coefficients from lowest + order "term" to highest, e.g., [1,2,3] represents the series + ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Hermite series coefficients ordered from low to + high. + + Returns + ------- + [quo, rem] : ndarrays + Of Hermite series coefficients representing the quotient and + remainder. + + See Also + -------- + hermeadd, hermesub, hermemulx, hermemul, hermepow + + Notes + ----- + In general, the (polynomial) division of one Hermite series by another + results in quotient and remainder terms that are not in the Hermite + polynomial basis set. Thus, to express these results as a Hermite + series, it is necessary to "reproject" the results onto the Hermite + basis set, which may produce "unintuitive" (but correct) results; see + Examples section below. + + Examples + -------- + >>> from numpy.polynomial.hermite_e import hermediv + >>> hermediv([ 14., 15., 28., 7., 6.], [0, 1, 2]) + (array([1., 2., 3.]), array([0.])) + >>> hermediv([ 15., 17., 28., 7., 6.], [0, 1, 2]) + (array([1., 2., 3.]), array([1., 2.])) + + """ + return pu._div(hermemul, c1, c2) + + +def hermepow(c, pow, maxpower=16): + """Raise a Hermite series to a power. + + Returns the Hermite series `c` raised to the power `pow`. The + argument `c` is a sequence of coefficients ordered from low to high. + i.e., [1,2,3] is the series ``P_0 + 2*P_1 + 3*P_2.`` + + Parameters + ---------- + c : array_like + 1-D array of Hermite series coefficients ordered from low to + high. + pow : integer + Power to which the series will be raised + maxpower : integer, optional + Maximum power allowed. This is mainly to limit growth of the series + to unmanageable size. Default is 16 + + Returns + ------- + coef : ndarray + Hermite series of power. + + See Also + -------- + hermeadd, hermesub, hermemulx, hermemul, hermediv + + Examples + -------- + >>> from numpy.polynomial.hermite_e import hermepow + >>> hermepow([1, 2, 3], 2) + array([23., 28., 46., 12., 9.]) + + """ + return pu._pow(hermemul, c, pow, maxpower) + + +def hermeder(c, m=1, scl=1, axis=0): + """ + Differentiate a Hermite_e series. + + Returns the series coefficients `c` differentiated `m` times along + `axis`. At each iteration the result is multiplied by `scl` (the + scaling factor is for use in a linear change of variable). The argument + `c` is an array of coefficients from low to high degree along each + axis, e.g., [1,2,3] represents the series ``1*He_0 + 2*He_1 + 3*He_2`` + while [[1,2],[1,2]] represents ``1*He_0(x)*He_0(y) + 1*He_1(x)*He_0(y) + + 2*He_0(x)*He_1(y) + 2*He_1(x)*He_1(y)`` if axis=0 is ``x`` and axis=1 + is ``y``. + + Parameters + ---------- + c : array_like + Array of Hermite_e series coefficients. If `c` is multidimensional + the different axis correspond to different variables with the + degree in each axis given by the corresponding index. + m : int, optional + Number of derivatives taken, must be non-negative. (Default: 1) + scl : scalar, optional + Each differentiation is multiplied by `scl`. The end result is + multiplication by ``scl**m``. This is for use in a linear change of + variable. (Default: 1) + axis : int, optional + Axis over which the derivative is taken. (Default: 0). + + .. versionadded:: 1.7.0 + + Returns + ------- + der : ndarray + Hermite series of the derivative. + + See Also + -------- + hermeint + + Notes + ----- + In general, the result of differentiating a Hermite series does not + resemble the same operation on a power series. Thus the result of this + function may be "unintuitive," albeit correct; see Examples section + below. + + Examples + -------- + >>> from numpy.polynomial.hermite_e import hermeder + >>> hermeder([ 1., 1., 1., 1.]) + array([1., 2., 3.]) + >>> hermeder([-0.25, 1., 1./2., 1./3., 1./4 ], m=2) + array([1., 2., 3.]) + + """ + c = np.array(c, ndmin=1, copy=True) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + cnt = pu._deprecate_as_int(m, "the order of derivation") + iaxis = pu._deprecate_as_int(axis, "the axis") + if cnt < 0: + raise ValueError("The order of derivation must be non-negative") + iaxis = normalize_axis_index(iaxis, c.ndim) + + if cnt == 0: + return c + + c = np.moveaxis(c, iaxis, 0) + n = len(c) + if cnt >= n: + return c[:1]*0 + else: + for i in range(cnt): + n = n - 1 + c *= scl + der = np.empty((n,) + c.shape[1:], dtype=c.dtype) + for j in range(n, 0, -1): + der[j - 1] = j*c[j] + c = der + c = np.moveaxis(c, 0, iaxis) + return c + + +def hermeint(c, m=1, k=[], lbnd=0, scl=1, axis=0): + """ + Integrate a Hermite_e series. + + Returns the Hermite_e series coefficients `c` integrated `m` times from + `lbnd` along `axis`. At each iteration the resulting series is + **multiplied** by `scl` and an integration constant, `k`, is added. + The scaling factor is for use in a linear change of variable. ("Buyer + beware": note that, depending on what one is doing, one may want `scl` + to be the reciprocal of what one might expect; for more information, + see the Notes section below.) The argument `c` is an array of + coefficients from low to high degree along each axis, e.g., [1,2,3] + represents the series ``H_0 + 2*H_1 + 3*H_2`` while [[1,2],[1,2]] + represents ``1*H_0(x)*H_0(y) + 1*H_1(x)*H_0(y) + 2*H_0(x)*H_1(y) + + 2*H_1(x)*H_1(y)`` if axis=0 is ``x`` and axis=1 is ``y``. + + Parameters + ---------- + c : array_like + Array of Hermite_e series coefficients. If c is multidimensional + the different axis correspond to different variables with the + degree in each axis given by the corresponding index. + m : int, optional + Order of integration, must be positive. (Default: 1) + k : {[], list, scalar}, optional + Integration constant(s). The value of the first integral at + ``lbnd`` is the first value in the list, the value of the second + integral at ``lbnd`` is the second value, etc. If ``k == []`` (the + default), all constants are set to zero. If ``m == 1``, a single + scalar can be given instead of a list. + lbnd : scalar, optional + The lower bound of the integral. (Default: 0) + scl : scalar, optional + Following each integration the result is *multiplied* by `scl` + before the integration constant is added. (Default: 1) + axis : int, optional + Axis over which the integral is taken. (Default: 0). + + .. versionadded:: 1.7.0 + + Returns + ------- + S : ndarray + Hermite_e series coefficients of the integral. + + Raises + ------ + ValueError + If ``m < 0``, ``len(k) > m``, ``np.ndim(lbnd) != 0``, or + ``np.ndim(scl) != 0``. + + See Also + -------- + hermeder + + Notes + ----- + Note that the result of each integration is *multiplied* by `scl`. + Why is this important to note? Say one is making a linear change of + variable :math:`u = ax + b` in an integral relative to `x`. Then + :math:`dx = du/a`, so one will need to set `scl` equal to + :math:`1/a` - perhaps not what one would have first thought. + + Also note that, in general, the result of integrating a C-series needs + to be "reprojected" onto the C-series basis set. Thus, typically, + the result of this function is "unintuitive," albeit correct; see + Examples section below. + + Examples + -------- + >>> from numpy.polynomial.hermite_e import hermeint + >>> hermeint([1, 2, 3]) # integrate once, value 0 at 0. + array([1., 1., 1., 1.]) + >>> hermeint([1, 2, 3], m=2) # integrate twice, value & deriv 0 at 0 + array([-0.25 , 1. , 0.5 , 0.33333333, 0.25 ]) # may vary + >>> hermeint([1, 2, 3], k=1) # integrate once, value 1 at 0. + array([2., 1., 1., 1.]) + >>> hermeint([1, 2, 3], lbnd=-1) # integrate once, value 0 at -1 + array([-1., 1., 1., 1.]) + >>> hermeint([1, 2, 3], m=2, k=[1, 2], lbnd=-1) + array([ 1.83333333, 0. , 0.5 , 0.33333333, 0.25 ]) # may vary + + """ + c = np.array(c, ndmin=1, copy=True) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + if not np.iterable(k): + k = [k] + cnt = pu._deprecate_as_int(m, "the order of integration") + iaxis = pu._deprecate_as_int(axis, "the axis") + if cnt < 0: + raise ValueError("The order of integration must be non-negative") + if len(k) > cnt: + raise ValueError("Too many integration constants") + if np.ndim(lbnd) != 0: + raise ValueError("lbnd must be a scalar.") + if np.ndim(scl) != 0: + raise ValueError("scl must be a scalar.") + iaxis = normalize_axis_index(iaxis, c.ndim) + + if cnt == 0: + return c + + c = np.moveaxis(c, iaxis, 0) + k = list(k) + [0]*(cnt - len(k)) + for i in range(cnt): + n = len(c) + c *= scl + if n == 1 and np.all(c[0] == 0): + c[0] += k[i] + else: + tmp = np.empty((n + 1,) + c.shape[1:], dtype=c.dtype) + tmp[0] = c[0]*0 + tmp[1] = c[0] + for j in range(1, n): + tmp[j + 1] = c[j]/(j + 1) + tmp[0] += k[i] - hermeval(lbnd, tmp) + c = tmp + c = np.moveaxis(c, 0, iaxis) + return c + + +def hermeval(x, c, tensor=True): + """ + Evaluate an HermiteE series at points x. + + If `c` is of length `n + 1`, this function returns the value: + + .. math:: p(x) = c_0 * He_0(x) + c_1 * He_1(x) + ... + c_n * He_n(x) + + The parameter `x` is converted to an array only if it is a tuple or a + list, otherwise it is treated as a scalar. In either case, either `x` + or its elements must support multiplication and addition both with + themselves and with the elements of `c`. + + If `c` is a 1-D array, then `p(x)` will have the same shape as `x`. If + `c` is multidimensional, then the shape of the result depends on the + value of `tensor`. If `tensor` is true the shape will be c.shape[1:] + + x.shape. If `tensor` is false the shape will be c.shape[1:]. Note that + scalars have shape (,). + + Trailing zeros in the coefficients will be used in the evaluation, so + they should be avoided if efficiency is a concern. + + Parameters + ---------- + x : array_like, compatible object + If `x` is a list or tuple, it is converted to an ndarray, otherwise + it is left unchanged and treated as a scalar. In either case, `x` + or its elements must support addition and multiplication with + with themselves and with the elements of `c`. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree n are contained in c[n]. If `c` is multidimensional the + remaining indices enumerate multiple polynomials. In the two + dimensional case the coefficients may be thought of as stored in + the columns of `c`. + tensor : boolean, optional + If True, the shape of the coefficient array is extended with ones + on the right, one for each dimension of `x`. Scalars have dimension 0 + for this action. The result is that every column of coefficients in + `c` is evaluated for every element of `x`. If False, `x` is broadcast + over the columns of `c` for the evaluation. This keyword is useful + when `c` is multidimensional. The default value is True. + + .. versionadded:: 1.7.0 + + Returns + ------- + values : ndarray, algebra_like + The shape of the return value is described above. + + See Also + -------- + hermeval2d, hermegrid2d, hermeval3d, hermegrid3d + + Notes + ----- + The evaluation uses Clenshaw recursion, aka synthetic division. + + Examples + -------- + >>> from numpy.polynomial.hermite_e import hermeval + >>> coef = [1,2,3] + >>> hermeval(1, coef) + 3.0 + >>> hermeval([[1,2],[3,4]], coef) + array([[ 3., 14.], + [31., 54.]]) + + """ + c = np.array(c, ndmin=1, copy=False) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + if isinstance(x, (tuple, list)): + x = np.asarray(x) + if isinstance(x, np.ndarray) and tensor: + c = c.reshape(c.shape + (1,)*x.ndim) + + if len(c) == 1: + c0 = c[0] + c1 = 0 + elif len(c) == 2: + c0 = c[0] + c1 = c[1] + else: + nd = len(c) + c0 = c[-2] + c1 = c[-1] + for i in range(3, len(c) + 1): + tmp = c0 + nd = nd - 1 + c0 = c[-i] - c1*(nd - 1) + c1 = tmp + c1*x + return c0 + c1*x + + +def hermeval2d(x, y, c): + """ + Evaluate a 2-D HermiteE series at points (x, y). + + This function returns the values: + + .. math:: p(x,y) = \\sum_{i,j} c_{i,j} * He_i(x) * He_j(y) + + The parameters `x` and `y` are converted to arrays only if they are + tuples or a lists, otherwise they are treated as a scalars and they + must have the same shape after conversion. In either case, either `x` + and `y` or their elements must support multiplication and addition both + with themselves and with the elements of `c`. + + If `c` is a 1-D array a one is implicitly appended to its shape to make + it 2-D. The shape of the result will be c.shape[2:] + x.shape. + + Parameters + ---------- + x, y : array_like, compatible objects + The two dimensional series is evaluated at the points `(x, y)`, + where `x` and `y` must have the same shape. If `x` or `y` is a list + or tuple, it is first converted to an ndarray, otherwise it is left + unchanged and if it isn't an ndarray it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term + of multi-degree i,j is contained in ``c[i,j]``. If `c` has + dimension greater than two the remaining indices enumerate multiple + sets of coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional polynomial at points formed with + pairs of corresponding values from `x` and `y`. + + See Also + -------- + hermeval, hermegrid2d, hermeval3d, hermegrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._valnd(hermeval, c, x, y) + + +def hermegrid2d(x, y, c): + """ + Evaluate a 2-D HermiteE series on the Cartesian product of x and y. + + This function returns the values: + + .. math:: p(a,b) = \\sum_{i,j} c_{i,j} * H_i(a) * H_j(b) + + where the points `(a, b)` consist of all pairs formed by taking + `a` from `x` and `b` from `y`. The resulting points form a grid with + `x` in the first dimension and `y` in the second. + + The parameters `x` and `y` are converted to arrays only if they are + tuples or a lists, otherwise they are treated as a scalars. In either + case, either `x` and `y` or their elements must support multiplication + and addition both with themselves and with the elements of `c`. + + If `c` has fewer than two dimensions, ones are implicitly appended to + its shape to make it 2-D. The shape of the result will be c.shape[2:] + + x.shape. + + Parameters + ---------- + x, y : array_like, compatible objects + The two dimensional series is evaluated at the points in the + Cartesian product of `x` and `y`. If `x` or `y` is a list or + tuple, it is first converted to an ndarray, otherwise it is left + unchanged and, if it isn't an ndarray, it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree i,j are contained in ``c[i,j]``. If `c` has dimension + greater than two the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional polynomial at points in the Cartesian + product of `x` and `y`. + + See Also + -------- + hermeval, hermeval2d, hermeval3d, hermegrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._gridnd(hermeval, c, x, y) + + +def hermeval3d(x, y, z, c): + """ + Evaluate a 3-D Hermite_e series at points (x, y, z). + + This function returns the values: + + .. math:: p(x,y,z) = \\sum_{i,j,k} c_{i,j,k} * He_i(x) * He_j(y) * He_k(z) + + The parameters `x`, `y`, and `z` are converted to arrays only if + they are tuples or a lists, otherwise they are treated as a scalars and + they must have the same shape after conversion. In either case, either + `x`, `y`, and `z` or their elements must support multiplication and + addition both with themselves and with the elements of `c`. + + If `c` has fewer than 3 dimensions, ones are implicitly appended to its + shape to make it 3-D. The shape of the result will be c.shape[3:] + + x.shape. + + Parameters + ---------- + x, y, z : array_like, compatible object + The three dimensional series is evaluated at the points + `(x, y, z)`, where `x`, `y`, and `z` must have the same shape. If + any of `x`, `y`, or `z` is a list or tuple, it is first converted + to an ndarray, otherwise it is left unchanged and if it isn't an + ndarray it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term of + multi-degree i,j,k is contained in ``c[i,j,k]``. If `c` has dimension + greater than 3 the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the multidimensional polynomial on points formed with + triples of corresponding values from `x`, `y`, and `z`. + + See Also + -------- + hermeval, hermeval2d, hermegrid2d, hermegrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._valnd(hermeval, c, x, y, z) + + +def hermegrid3d(x, y, z, c): + """ + Evaluate a 3-D HermiteE series on the Cartesian product of x, y, and z. + + This function returns the values: + + .. math:: p(a,b,c) = \\sum_{i,j,k} c_{i,j,k} * He_i(a) * He_j(b) * He_k(c) + + where the points `(a, b, c)` consist of all triples formed by taking + `a` from `x`, `b` from `y`, and `c` from `z`. The resulting points form + a grid with `x` in the first dimension, `y` in the second, and `z` in + the third. + + The parameters `x`, `y`, and `z` are converted to arrays only if they + are tuples or a lists, otherwise they are treated as a scalars. In + either case, either `x`, `y`, and `z` or their elements must support + multiplication and addition both with themselves and with the elements + of `c`. + + If `c` has fewer than three dimensions, ones are implicitly appended to + its shape to make it 3-D. The shape of the result will be c.shape[3:] + + x.shape + y.shape + z.shape. + + Parameters + ---------- + x, y, z : array_like, compatible objects + The three dimensional series is evaluated at the points in the + Cartesian product of `x`, `y`, and `z`. If `x`,`y`, or `z` is a + list or tuple, it is first converted to an ndarray, otherwise it is + left unchanged and, if it isn't an ndarray, it is treated as a + scalar. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree i,j are contained in ``c[i,j]``. If `c` has dimension + greater than two the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional polynomial at points in the Cartesian + product of `x` and `y`. + + See Also + -------- + hermeval, hermeval2d, hermegrid2d, hermeval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._gridnd(hermeval, c, x, y, z) + + +def hermevander(x, deg): + """Pseudo-Vandermonde matrix of given degree. + + Returns the pseudo-Vandermonde matrix of degree `deg` and sample points + `x`. The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., i] = He_i(x), + + where `0 <= i <= deg`. The leading indices of `V` index the elements of + `x` and the last index is the degree of the HermiteE polynomial. + + If `c` is a 1-D array of coefficients of length `n + 1` and `V` is the + array ``V = hermevander(x, n)``, then ``np.dot(V, c)`` and + ``hermeval(x, c)`` are the same up to roundoff. This equivalence is + useful both for least squares fitting and for the evaluation of a large + number of HermiteE series of the same degree and sample points. + + Parameters + ---------- + x : array_like + Array of points. The dtype is converted to float64 or complex128 + depending on whether any of the elements are complex. If `x` is + scalar it is converted to a 1-D array. + deg : int + Degree of the resulting matrix. + + Returns + ------- + vander : ndarray + The pseudo-Vandermonde matrix. The shape of the returned matrix is + ``x.shape + (deg + 1,)``, where The last index is the degree of the + corresponding HermiteE polynomial. The dtype will be the same as + the converted `x`. + + Examples + -------- + >>> from numpy.polynomial.hermite_e import hermevander + >>> x = np.array([-1, 0, 1]) + >>> hermevander(x, 3) + array([[ 1., -1., 0., 2.], + [ 1., 0., -1., -0.], + [ 1., 1., 0., -2.]]) + + """ + ideg = pu._deprecate_as_int(deg, "deg") + if ideg < 0: + raise ValueError("deg must be non-negative") + + x = np.array(x, copy=False, ndmin=1) + 0.0 + dims = (ideg + 1,) + x.shape + dtyp = x.dtype + v = np.empty(dims, dtype=dtyp) + v[0] = x*0 + 1 + if ideg > 0: + v[1] = x + for i in range(2, ideg + 1): + v[i] = (v[i-1]*x - v[i-2]*(i - 1)) + return np.moveaxis(v, 0, -1) + + +def hermevander2d(x, y, deg): + """Pseudo-Vandermonde matrix of given degrees. + + Returns the pseudo-Vandermonde matrix of degrees `deg` and sample + points `(x, y)`. The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., (deg[1] + 1)*i + j] = He_i(x) * He_j(y), + + where `0 <= i <= deg[0]` and `0 <= j <= deg[1]`. The leading indices of + `V` index the points `(x, y)` and the last index encodes the degrees of + the HermiteE polynomials. + + If ``V = hermevander2d(x, y, [xdeg, ydeg])``, then the columns of `V` + correspond to the elements of a 2-D coefficient array `c` of shape + (xdeg + 1, ydeg + 1) in the order + + .. math:: c_{00}, c_{01}, c_{02} ... , c_{10}, c_{11}, c_{12} ... + + and ``np.dot(V, c.flat)`` and ``hermeval2d(x, y, c)`` will be the same + up to roundoff. This equivalence is useful both for least squares + fitting and for the evaluation of a large number of 2-D HermiteE + series of the same degrees and sample points. + + Parameters + ---------- + x, y : array_like + Arrays of point coordinates, all of the same shape. The dtypes + will be converted to either float64 or complex128 depending on + whether any of the elements are complex. Scalars are converted to + 1-D arrays. + deg : list of ints + List of maximum degrees of the form [x_deg, y_deg]. + + Returns + ------- + vander2d : ndarray + The shape of the returned matrix is ``x.shape + (order,)``, where + :math:`order = (deg[0]+1)*(deg[1]+1)`. The dtype will be the same + as the converted `x` and `y`. + + See Also + -------- + hermevander, hermevander3d, hermeval2d, hermeval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._vander_nd_flat((hermevander, hermevander), (x, y), deg) + + +def hermevander3d(x, y, z, deg): + """Pseudo-Vandermonde matrix of given degrees. + + Returns the pseudo-Vandermonde matrix of degrees `deg` and sample + points `(x, y, z)`. If `l, m, n` are the given degrees in `x, y, z`, + then Hehe pseudo-Vandermonde matrix is defined by + + .. math:: V[..., (m+1)(n+1)i + (n+1)j + k] = He_i(x)*He_j(y)*He_k(z), + + where `0 <= i <= l`, `0 <= j <= m`, and `0 <= j <= n`. The leading + indices of `V` index the points `(x, y, z)` and the last index encodes + the degrees of the HermiteE polynomials. + + If ``V = hermevander3d(x, y, z, [xdeg, ydeg, zdeg])``, then the columns + of `V` correspond to the elements of a 3-D coefficient array `c` of + shape (xdeg + 1, ydeg + 1, zdeg + 1) in the order + + .. math:: c_{000}, c_{001}, c_{002},... , c_{010}, c_{011}, c_{012},... + + and ``np.dot(V, c.flat)`` and ``hermeval3d(x, y, z, c)`` will be the + same up to roundoff. This equivalence is useful both for least squares + fitting and for the evaluation of a large number of 3-D HermiteE + series of the same degrees and sample points. + + Parameters + ---------- + x, y, z : array_like + Arrays of point coordinates, all of the same shape. The dtypes will + be converted to either float64 or complex128 depending on whether + any of the elements are complex. Scalars are converted to 1-D + arrays. + deg : list of ints + List of maximum degrees of the form [x_deg, y_deg, z_deg]. + + Returns + ------- + vander3d : ndarray + The shape of the returned matrix is ``x.shape + (order,)``, where + :math:`order = (deg[0]+1)*(deg[1]+1)*(deg[2]+1)`. The dtype will + be the same as the converted `x`, `y`, and `z`. + + See Also + -------- + hermevander, hermevander3d, hermeval2d, hermeval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._vander_nd_flat((hermevander, hermevander, hermevander), (x, y, z), deg) + + +def hermefit(x, y, deg, rcond=None, full=False, w=None): + """ + Least squares fit of Hermite series to data. + + Return the coefficients of a HermiteE series of degree `deg` that is + the least squares fit to the data values `y` given at points `x`. If + `y` is 1-D the returned coefficients will also be 1-D. If `y` is 2-D + multiple fits are done, one for each column of `y`, and the resulting + coefficients are stored in the corresponding columns of a 2-D return. + The fitted polynomial(s) are in the form + + .. math:: p(x) = c_0 + c_1 * He_1(x) + ... + c_n * He_n(x), + + where `n` is `deg`. + + Parameters + ---------- + x : array_like, shape (M,) + x-coordinates of the M sample points ``(x[i], y[i])``. + y : array_like, shape (M,) or (M, K) + y-coordinates of the sample points. Several data sets of sample + points sharing the same x-coordinates can be fitted at once by + passing in a 2D-array that contains one dataset per column. + deg : int or 1-D array_like + Degree(s) of the fitting polynomials. If `deg` is a single integer + all terms up to and including the `deg`'th term are included in the + fit. For NumPy versions >= 1.11.0 a list of integers specifying the + degrees of the terms to include may be used instead. + rcond : float, optional + Relative condition number of the fit. Singular values smaller than + this relative to the largest singular value will be ignored. The + default value is len(x)*eps, where eps is the relative precision of + the float type, about 2e-16 in most cases. + full : bool, optional + Switch determining nature of return value. When it is False (the + default) just the coefficients are returned, when True diagnostic + information from the singular value decomposition is also returned. + w : array_like, shape (`M`,), optional + Weights. If not None, the weight ``w[i]`` applies to the unsquared + residual ``y[i] - y_hat[i]`` at ``x[i]``. Ideally the weights are + chosen so that the errors of the products ``w[i]*y[i]`` all have the + same variance. When using inverse-variance weighting, use + ``w[i] = 1/sigma(y[i])``. The default value is None. + + Returns + ------- + coef : ndarray, shape (M,) or (M, K) + Hermite coefficients ordered from low to high. If `y` was 2-D, + the coefficients for the data in column k of `y` are in column + `k`. + + [residuals, rank, singular_values, rcond] : list + These values are only returned if ``full == True`` + + - residuals -- sum of squared residuals of the least squares fit + - rank -- the numerical rank of the scaled Vandermonde matrix + - singular_values -- singular values of the scaled Vandermonde matrix + - rcond -- value of `rcond`. + + For more details, see `numpy.linalg.lstsq`. + + Warns + ----- + RankWarning + The rank of the coefficient matrix in the least-squares fit is + deficient. The warning is only raised if ``full = False``. The + warnings can be turned off by + + >>> import warnings + >>> warnings.simplefilter('ignore', np.RankWarning) + + See Also + -------- + numpy.polynomial.chebyshev.chebfit + numpy.polynomial.legendre.legfit + numpy.polynomial.polynomial.polyfit + numpy.polynomial.hermite.hermfit + numpy.polynomial.laguerre.lagfit + hermeval : Evaluates a Hermite series. + hermevander : pseudo Vandermonde matrix of Hermite series. + hermeweight : HermiteE weight function. + numpy.linalg.lstsq : Computes a least-squares fit from the matrix. + scipy.interpolate.UnivariateSpline : Computes spline fits. + + Notes + ----- + The solution is the coefficients of the HermiteE series `p` that + minimizes the sum of the weighted squared errors + + .. math:: E = \\sum_j w_j^2 * |y_j - p(x_j)|^2, + + where the :math:`w_j` are the weights. This problem is solved by + setting up the (typically) overdetermined matrix equation + + .. math:: V(x) * c = w * y, + + where `V` is the pseudo Vandermonde matrix of `x`, the elements of `c` + are the coefficients to be solved for, and the elements of `y` are the + observed values. This equation is then solved using the singular value + decomposition of `V`. + + If some of the singular values of `V` are so small that they are + neglected, then a `RankWarning` will be issued. This means that the + coefficient values may be poorly determined. Using a lower order fit + will usually get rid of the warning. The `rcond` parameter can also be + set to a value smaller than its default, but the resulting fit may be + spurious and have large contributions from roundoff error. + + Fits using HermiteE series are probably most useful when the data can + be approximated by ``sqrt(w(x)) * p(x)``, where `w(x)` is the HermiteE + weight. In that case the weight ``sqrt(w(x[i]))`` should be used + together with data values ``y[i]/sqrt(w(x[i]))``. The weight function is + available as `hermeweight`. + + References + ---------- + .. [1] Wikipedia, "Curve fitting", + https://en.wikipedia.org/wiki/Curve_fitting + + Examples + -------- + >>> from numpy.polynomial.hermite_e import hermefit, hermeval + >>> x = np.linspace(-10, 10) + >>> np.random.seed(123) + >>> err = np.random.randn(len(x))/10 + >>> y = hermeval(x, [1, 2, 3]) + err + >>> hermefit(x, y, 2) + array([ 1.01690445, 1.99951418, 2.99948696]) # may vary + + """ + return pu._fit(hermevander, x, y, deg, rcond, full, w) + + +def hermecompanion(c): + """ + Return the scaled companion matrix of c. + + The basis polynomials are scaled so that the companion matrix is + symmetric when `c` is an HermiteE basis polynomial. This provides + better eigenvalue estimates than the unscaled case and for basis + polynomials the eigenvalues are guaranteed to be real if + `numpy.linalg.eigvalsh` is used to obtain them. + + Parameters + ---------- + c : array_like + 1-D array of HermiteE series coefficients ordered from low to high + degree. + + Returns + ------- + mat : ndarray + Scaled companion matrix of dimensions (deg, deg). + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + if len(c) < 2: + raise ValueError('Series must have maximum degree of at least 1.') + if len(c) == 2: + return np.array([[-c[0]/c[1]]]) + + n = len(c) - 1 + mat = np.zeros((n, n), dtype=c.dtype) + scl = np.hstack((1., 1./np.sqrt(np.arange(n - 1, 0, -1)))) + scl = np.multiply.accumulate(scl)[::-1] + top = mat.reshape(-1)[1::n+1] + bot = mat.reshape(-1)[n::n+1] + top[...] = np.sqrt(np.arange(1, n)) + bot[...] = top + mat[:, -1] -= scl*c[:-1]/c[-1] + return mat + + +def hermeroots(c): + """ + Compute the roots of a HermiteE series. + + Return the roots (a.k.a. "zeros") of the polynomial + + .. math:: p(x) = \\sum_i c[i] * He_i(x). + + Parameters + ---------- + c : 1-D array_like + 1-D array of coefficients. + + Returns + ------- + out : ndarray + Array of the roots of the series. If all the roots are real, + then `out` is also real, otherwise it is complex. + + See Also + -------- + numpy.polynomial.polynomial.polyroots + numpy.polynomial.legendre.legroots + numpy.polynomial.laguerre.lagroots + numpy.polynomial.hermite.hermroots + numpy.polynomial.chebyshev.chebroots + + Notes + ----- + The root estimates are obtained as the eigenvalues of the companion + matrix, Roots far from the origin of the complex plane may have large + errors due to the numerical instability of the series for such + values. Roots with multiplicity greater than 1 will also show larger + errors as the value of the series near such points is relatively + insensitive to errors in the roots. Isolated roots near the origin can + be improved by a few iterations of Newton's method. + + The HermiteE series basis polynomials aren't powers of `x` so the + results of this function may seem unintuitive. + + Examples + -------- + >>> from numpy.polynomial.hermite_e import hermeroots, hermefromroots + >>> coef = hermefromroots([-1, 0, 1]) + >>> coef + array([0., 2., 0., 1.]) + >>> hermeroots(coef) + array([-1., 0., 1.]) # may vary + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + if len(c) <= 1: + return np.array([], dtype=c.dtype) + if len(c) == 2: + return np.array([-c[0]/c[1]]) + + # rotated companion matrix reduces error + m = hermecompanion(c)[::-1,::-1] + r = la.eigvals(m) + r.sort() + return r + + +def _normed_hermite_e_n(x, n): + """ + Evaluate a normalized HermiteE polynomial. + + Compute the value of the normalized HermiteE polynomial of degree ``n`` + at the points ``x``. + + + Parameters + ---------- + x : ndarray of double. + Points at which to evaluate the function + n : int + Degree of the normalized HermiteE function to be evaluated. + + Returns + ------- + values : ndarray + The shape of the return value is described above. + + Notes + ----- + .. versionadded:: 1.10.0 + + This function is needed for finding the Gauss points and integration + weights for high degrees. The values of the standard HermiteE functions + overflow when n >= 207. + + """ + if n == 0: + return np.full(x.shape, 1/np.sqrt(np.sqrt(2*np.pi))) + + c0 = 0. + c1 = 1./np.sqrt(np.sqrt(2*np.pi)) + nd = float(n) + for i in range(n - 1): + tmp = c0 + c0 = -c1*np.sqrt((nd - 1.)/nd) + c1 = tmp + c1*x*np.sqrt(1./nd) + nd = nd - 1.0 + return c0 + c1*x + + +def hermegauss(deg): + """ + Gauss-HermiteE quadrature. + + Computes the sample points and weights for Gauss-HermiteE quadrature. + These sample points and weights will correctly integrate polynomials of + degree :math:`2*deg - 1` or less over the interval :math:`[-\\inf, \\inf]` + with the weight function :math:`f(x) = \\exp(-x^2/2)`. + + Parameters + ---------- + deg : int + Number of sample points and weights. It must be >= 1. + + Returns + ------- + x : ndarray + 1-D ndarray containing the sample points. + y : ndarray + 1-D ndarray containing the weights. + + Notes + ----- + + .. versionadded:: 1.7.0 + + The results have only been tested up to degree 100, higher degrees may + be problematic. The weights are determined by using the fact that + + .. math:: w_k = c / (He'_n(x_k) * He_{n-1}(x_k)) + + where :math:`c` is a constant independent of :math:`k` and :math:`x_k` + is the k'th root of :math:`He_n`, and then scaling the results to get + the right value when integrating 1. + + """ + ideg = pu._deprecate_as_int(deg, "deg") + if ideg <= 0: + raise ValueError("deg must be a positive integer") + + # first approximation of roots. We use the fact that the companion + # matrix is symmetric in this case in order to obtain better zeros. + c = np.array([0]*deg + [1]) + m = hermecompanion(c) + x = la.eigvalsh(m) + + # improve roots by one application of Newton + dy = _normed_hermite_e_n(x, ideg) + df = _normed_hermite_e_n(x, ideg - 1) * np.sqrt(ideg) + x -= dy/df + + # compute the weights. We scale the factor to avoid possible numerical + # overflow. + fm = _normed_hermite_e_n(x, ideg - 1) + fm /= np.abs(fm).max() + w = 1/(fm * fm) + + # for Hermite_e we can also symmetrize + w = (w + w[::-1])/2 + x = (x - x[::-1])/2 + + # scale w to get the right value + w *= np.sqrt(2*np.pi) / w.sum() + + return x, w + + +def hermeweight(x): + """Weight function of the Hermite_e polynomials. + + The weight function is :math:`\\exp(-x^2/2)` and the interval of + integration is :math:`[-\\inf, \\inf]`. the HermiteE polynomials are + orthogonal, but not normalized, with respect to this weight function. + + Parameters + ---------- + x : array_like + Values at which the weight function will be computed. + + Returns + ------- + w : ndarray + The weight function at `x`. + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + w = np.exp(-.5*x**2) + return w + + +# +# HermiteE series class +# + +class HermiteE(ABCPolyBase): + """An HermiteE series class. + + The HermiteE class provides the standard Python numerical methods + '+', '-', '*', '//', '%', 'divmod', '**', and '()' as well as the + attributes and methods listed in the `ABCPolyBase` documentation. + + Parameters + ---------- + coef : array_like + HermiteE coefficients in order of increasing degree, i.e, + ``(1, 2, 3)`` gives ``1*He_0(x) + 2*He_1(X) + 3*He_2(x)``. + domain : (2,) array_like, optional + Domain to use. The interval ``[domain[0], domain[1]]`` is mapped + to the interval ``[window[0], window[1]]`` by shifting and scaling. + The default value is [-1, 1]. + window : (2,) array_like, optional + Window, see `domain` for its use. The default value is [-1, 1]. + + .. versionadded:: 1.6.0 + + """ + # Virtual Functions + _add = staticmethod(hermeadd) + _sub = staticmethod(hermesub) + _mul = staticmethod(hermemul) + _div = staticmethod(hermediv) + _pow = staticmethod(hermepow) + _val = staticmethod(hermeval) + _int = staticmethod(hermeint) + _der = staticmethod(hermeder) + _fit = staticmethod(hermefit) + _line = staticmethod(hermeline) + _roots = staticmethod(hermeroots) + _fromroots = staticmethod(hermefromroots) + + # Virtual properties + domain = np.array(hermedomain) + window = np.array(hermedomain) + basis_name = 'He' diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/hermite_e.pyi b/.local/lib/python3.8/site-packages/numpy/polynomial/hermite_e.pyi new file mode 100644 index 0000000..c029bfd --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/hermite_e.pyi @@ -0,0 +1,46 @@ +from typing import Any, List + +from numpy import ndarray, dtype, int_ +from numpy.polynomial._polybase import ABCPolyBase +from numpy.polynomial.polyutils import trimcoef + +__all__: list[str] + +hermetrim = trimcoef + +def poly2herme(pol): ... +def herme2poly(c): ... + +hermedomain: ndarray[Any, dtype[int_]] +hermezero: ndarray[Any, dtype[int_]] +hermeone: ndarray[Any, dtype[int_]] +hermex: ndarray[Any, dtype[int_]] + +def hermeline(off, scl): ... +def hermefromroots(roots): ... +def hermeadd(c1, c2): ... +def hermesub(c1, c2): ... +def hermemulx(c): ... +def hermemul(c1, c2): ... +def hermediv(c1, c2): ... +def hermepow(c, pow, maxpower=...): ... +def hermeder(c, m=..., scl=..., axis=...): ... +def hermeint(c, m=..., k = ..., lbnd=..., scl=..., axis=...): ... +def hermeval(x, c, tensor=...): ... +def hermeval2d(x, y, c): ... +def hermegrid2d(x, y, c): ... +def hermeval3d(x, y, z, c): ... +def hermegrid3d(x, y, z, c): ... +def hermevander(x, deg): ... +def hermevander2d(x, y, deg): ... +def hermevander3d(x, y, z, deg): ... +def hermefit(x, y, deg, rcond=..., full=..., w=...): ... +def hermecompanion(c): ... +def hermeroots(c): ... +def hermegauss(deg): ... +def hermeweight(x): ... + +class HermiteE(ABCPolyBase): + domain: Any + window: Any + basis_name: Any diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/laguerre.py b/.local/lib/python3.8/site-packages/numpy/polynomial/laguerre.py new file mode 100644 index 0000000..d9ca373 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/laguerre.py @@ -0,0 +1,1645 @@ +""" +================================================== +Laguerre Series (:mod:`numpy.polynomial.laguerre`) +================================================== + +This module provides a number of objects (mostly functions) useful for +dealing with Laguerre series, including a `Laguerre` class that +encapsulates the usual arithmetic operations. (General information +on how this module represents and works with such polynomials is in the +docstring for its "parent" sub-package, `numpy.polynomial`). + +Classes +------- +.. autosummary:: + :toctree: generated/ + + Laguerre + +Constants +--------- +.. autosummary:: + :toctree: generated/ + + lagdomain + lagzero + lagone + lagx + +Arithmetic +---------- +.. autosummary:: + :toctree: generated/ + + lagadd + lagsub + lagmulx + lagmul + lagdiv + lagpow + lagval + lagval2d + lagval3d + laggrid2d + laggrid3d + +Calculus +-------- +.. autosummary:: + :toctree: generated/ + + lagder + lagint + +Misc Functions +-------------- +.. autosummary:: + :toctree: generated/ + + lagfromroots + lagroots + lagvander + lagvander2d + lagvander3d + laggauss + lagweight + lagcompanion + lagfit + lagtrim + lagline + lag2poly + poly2lag + +See also +-------- +`numpy.polynomial` + +""" +import numpy as np +import numpy.linalg as la +from numpy.core.multiarray import normalize_axis_index + +from . import polyutils as pu +from ._polybase import ABCPolyBase + +__all__ = [ + 'lagzero', 'lagone', 'lagx', 'lagdomain', 'lagline', 'lagadd', + 'lagsub', 'lagmulx', 'lagmul', 'lagdiv', 'lagpow', 'lagval', 'lagder', + 'lagint', 'lag2poly', 'poly2lag', 'lagfromroots', 'lagvander', + 'lagfit', 'lagtrim', 'lagroots', 'Laguerre', 'lagval2d', 'lagval3d', + 'laggrid2d', 'laggrid3d', 'lagvander2d', 'lagvander3d', 'lagcompanion', + 'laggauss', 'lagweight'] + +lagtrim = pu.trimcoef + + +def poly2lag(pol): + """ + poly2lag(pol) + + Convert a polynomial to a Laguerre series. + + Convert an array representing the coefficients of a polynomial (relative + to the "standard" basis) ordered from lowest degree to highest, to an + array of the coefficients of the equivalent Laguerre series, ordered + from lowest to highest degree. + + Parameters + ---------- + pol : array_like + 1-D array containing the polynomial coefficients + + Returns + ------- + c : ndarray + 1-D array containing the coefficients of the equivalent Laguerre + series. + + See Also + -------- + lag2poly + + Notes + ----- + The easy way to do conversions between polynomial basis sets + is to use the convert method of a class instance. + + Examples + -------- + >>> from numpy.polynomial.laguerre import poly2lag + >>> poly2lag(np.arange(4)) + array([ 23., -63., 58., -18.]) + + """ + [pol] = pu.as_series([pol]) + res = 0 + for p in pol[::-1]: + res = lagadd(lagmulx(res), p) + return res + + +def lag2poly(c): + """ + Convert a Laguerre series to a polynomial. + + Convert an array representing the coefficients of a Laguerre series, + ordered from lowest degree to highest, to an array of the coefficients + of the equivalent polynomial (relative to the "standard" basis) ordered + from lowest to highest degree. + + Parameters + ---------- + c : array_like + 1-D array containing the Laguerre series coefficients, ordered + from lowest order term to highest. + + Returns + ------- + pol : ndarray + 1-D array containing the coefficients of the equivalent polynomial + (relative to the "standard" basis) ordered from lowest order term + to highest. + + See Also + -------- + poly2lag + + Notes + ----- + The easy way to do conversions between polynomial basis sets + is to use the convert method of a class instance. + + Examples + -------- + >>> from numpy.polynomial.laguerre import lag2poly + >>> lag2poly([ 23., -63., 58., -18.]) + array([0., 1., 2., 3.]) + + """ + from .polynomial import polyadd, polysub, polymulx + + [c] = pu.as_series([c]) + n = len(c) + if n == 1: + return c + else: + c0 = c[-2] + c1 = c[-1] + # i is the current degree of c1 + for i in range(n - 1, 1, -1): + tmp = c0 + c0 = polysub(c[i - 2], (c1*(i - 1))/i) + c1 = polyadd(tmp, polysub((2*i - 1)*c1, polymulx(c1))/i) + return polyadd(c0, polysub(c1, polymulx(c1))) + +# +# These are constant arrays are of integer type so as to be compatible +# with the widest range of other types, such as Decimal. +# + +# Laguerre +lagdomain = np.array([0, 1]) + +# Laguerre coefficients representing zero. +lagzero = np.array([0]) + +# Laguerre coefficients representing one. +lagone = np.array([1]) + +# Laguerre coefficients representing the identity x. +lagx = np.array([1, -1]) + + +def lagline(off, scl): + """ + Laguerre series whose graph is a straight line. + + Parameters + ---------- + off, scl : scalars + The specified line is given by ``off + scl*x``. + + Returns + ------- + y : ndarray + This module's representation of the Laguerre series for + ``off + scl*x``. + + See Also + -------- + numpy.polynomial.polynomial.polyline + numpy.polynomial.chebyshev.chebline + numpy.polynomial.legendre.legline + numpy.polynomial.hermite.hermline + numpy.polynomial.hermite_e.hermeline + + Examples + -------- + >>> from numpy.polynomial.laguerre import lagline, lagval + >>> lagval(0,lagline(3, 2)) + 3.0 + >>> lagval(1,lagline(3, 2)) + 5.0 + + """ + if scl != 0: + return np.array([off + scl, -scl]) + else: + return np.array([off]) + + +def lagfromroots(roots): + """ + Generate a Laguerre series with given roots. + + The function returns the coefficients of the polynomial + + .. math:: p(x) = (x - r_0) * (x - r_1) * ... * (x - r_n), + + in Laguerre form, where the `r_n` are the roots specified in `roots`. + If a zero has multiplicity n, then it must appear in `roots` n times. + For instance, if 2 is a root of multiplicity three and 3 is a root of + multiplicity 2, then `roots` looks something like [2, 2, 2, 3, 3]. The + roots can appear in any order. + + If the returned coefficients are `c`, then + + .. math:: p(x) = c_0 + c_1 * L_1(x) + ... + c_n * L_n(x) + + The coefficient of the last term is not generally 1 for monic + polynomials in Laguerre form. + + Parameters + ---------- + roots : array_like + Sequence containing the roots. + + Returns + ------- + out : ndarray + 1-D array of coefficients. If all roots are real then `out` is a + real array, if some of the roots are complex, then `out` is complex + even if all the coefficients in the result are real (see Examples + below). + + See Also + -------- + numpy.polynomial.polynomial.polyfromroots + numpy.polynomial.legendre.legfromroots + numpy.polynomial.chebyshev.chebfromroots + numpy.polynomial.hermite.hermfromroots + numpy.polynomial.hermite_e.hermefromroots + + Examples + -------- + >>> from numpy.polynomial.laguerre import lagfromroots, lagval + >>> coef = lagfromroots((-1, 0, 1)) + >>> lagval((-1, 0, 1), coef) + array([0., 0., 0.]) + >>> coef = lagfromroots((-1j, 1j)) + >>> lagval((-1j, 1j), coef) + array([0.+0.j, 0.+0.j]) + + """ + return pu._fromroots(lagline, lagmul, roots) + + +def lagadd(c1, c2): + """ + Add one Laguerre series to another. + + Returns the sum of two Laguerre series `c1` + `c2`. The arguments + are sequences of coefficients ordered from lowest order term to + highest, i.e., [1,2,3] represents the series ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Laguerre series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Array representing the Laguerre series of their sum. + + See Also + -------- + lagsub, lagmulx, lagmul, lagdiv, lagpow + + Notes + ----- + Unlike multiplication, division, etc., the sum of two Laguerre series + is a Laguerre series (without having to "reproject" the result onto + the basis set) so addition, just like that of "standard" polynomials, + is simply "component-wise." + + Examples + -------- + >>> from numpy.polynomial.laguerre import lagadd + >>> lagadd([1, 2, 3], [1, 2, 3, 4]) + array([2., 4., 6., 4.]) + + + """ + return pu._add(c1, c2) + + +def lagsub(c1, c2): + """ + Subtract one Laguerre series from another. + + Returns the difference of two Laguerre series `c1` - `c2`. The + sequences of coefficients are from lowest order term to highest, i.e., + [1,2,3] represents the series ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Laguerre series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Of Laguerre series coefficients representing their difference. + + See Also + -------- + lagadd, lagmulx, lagmul, lagdiv, lagpow + + Notes + ----- + Unlike multiplication, division, etc., the difference of two Laguerre + series is a Laguerre series (without having to "reproject" the result + onto the basis set) so subtraction, just like that of "standard" + polynomials, is simply "component-wise." + + Examples + -------- + >>> from numpy.polynomial.laguerre import lagsub + >>> lagsub([1, 2, 3, 4], [1, 2, 3]) + array([0., 0., 0., 4.]) + + """ + return pu._sub(c1, c2) + + +def lagmulx(c): + """Multiply a Laguerre series by x. + + Multiply the Laguerre series `c` by x, where x is the independent + variable. + + + Parameters + ---------- + c : array_like + 1-D array of Laguerre series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Array representing the result of the multiplication. + + See Also + -------- + lagadd, lagsub, lagmul, lagdiv, lagpow + + Notes + ----- + The multiplication uses the recursion relationship for Laguerre + polynomials in the form + + .. math:: + + xP_i(x) = (-(i + 1)*P_{i + 1}(x) + (2i + 1)P_{i}(x) - iP_{i - 1}(x)) + + Examples + -------- + >>> from numpy.polynomial.laguerre import lagmulx + >>> lagmulx([1, 2, 3]) + array([-1., -1., 11., -9.]) + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + # The zero series needs special treatment + if len(c) == 1 and c[0] == 0: + return c + + prd = np.empty(len(c) + 1, dtype=c.dtype) + prd[0] = c[0] + prd[1] = -c[0] + for i in range(1, len(c)): + prd[i + 1] = -c[i]*(i + 1) + prd[i] += c[i]*(2*i + 1) + prd[i - 1] -= c[i]*i + return prd + + +def lagmul(c1, c2): + """ + Multiply one Laguerre series by another. + + Returns the product of two Laguerre series `c1` * `c2`. The arguments + are sequences of coefficients, from lowest order "term" to highest, + e.g., [1,2,3] represents the series ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Laguerre series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Of Laguerre series coefficients representing their product. + + See Also + -------- + lagadd, lagsub, lagmulx, lagdiv, lagpow + + Notes + ----- + In general, the (polynomial) product of two C-series results in terms + that are not in the Laguerre polynomial basis set. Thus, to express + the product as a Laguerre series, it is necessary to "reproject" the + product onto said basis set, which may produce "unintuitive" (but + correct) results; see Examples section below. + + Examples + -------- + >>> from numpy.polynomial.laguerre import lagmul + >>> lagmul([1, 2, 3], [0, 1, 2]) + array([ 8., -13., 38., -51., 36.]) + + """ + # s1, s2 are trimmed copies + [c1, c2] = pu.as_series([c1, c2]) + + if len(c1) > len(c2): + c = c2 + xs = c1 + else: + c = c1 + xs = c2 + + if len(c) == 1: + c0 = c[0]*xs + c1 = 0 + elif len(c) == 2: + c0 = c[0]*xs + c1 = c[1]*xs + else: + nd = len(c) + c0 = c[-2]*xs + c1 = c[-1]*xs + for i in range(3, len(c) + 1): + tmp = c0 + nd = nd - 1 + c0 = lagsub(c[-i]*xs, (c1*(nd - 1))/nd) + c1 = lagadd(tmp, lagsub((2*nd - 1)*c1, lagmulx(c1))/nd) + return lagadd(c0, lagsub(c1, lagmulx(c1))) + + +def lagdiv(c1, c2): + """ + Divide one Laguerre series by another. + + Returns the quotient-with-remainder of two Laguerre series + `c1` / `c2`. The arguments are sequences of coefficients from lowest + order "term" to highest, e.g., [1,2,3] represents the series + ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Laguerre series coefficients ordered from low to + high. + + Returns + ------- + [quo, rem] : ndarrays + Of Laguerre series coefficients representing the quotient and + remainder. + + See Also + -------- + lagadd, lagsub, lagmulx, lagmul, lagpow + + Notes + ----- + In general, the (polynomial) division of one Laguerre series by another + results in quotient and remainder terms that are not in the Laguerre + polynomial basis set. Thus, to express these results as a Laguerre + series, it is necessary to "reproject" the results onto the Laguerre + basis set, which may produce "unintuitive" (but correct) results; see + Examples section below. + + Examples + -------- + >>> from numpy.polynomial.laguerre import lagdiv + >>> lagdiv([ 8., -13., 38., -51., 36.], [0, 1, 2]) + (array([1., 2., 3.]), array([0.])) + >>> lagdiv([ 9., -12., 38., -51., 36.], [0, 1, 2]) + (array([1., 2., 3.]), array([1., 1.])) + + """ + return pu._div(lagmul, c1, c2) + + +def lagpow(c, pow, maxpower=16): + """Raise a Laguerre series to a power. + + Returns the Laguerre series `c` raised to the power `pow`. The + argument `c` is a sequence of coefficients ordered from low to high. + i.e., [1,2,3] is the series ``P_0 + 2*P_1 + 3*P_2.`` + + Parameters + ---------- + c : array_like + 1-D array of Laguerre series coefficients ordered from low to + high. + pow : integer + Power to which the series will be raised + maxpower : integer, optional + Maximum power allowed. This is mainly to limit growth of the series + to unmanageable size. Default is 16 + + Returns + ------- + coef : ndarray + Laguerre series of power. + + See Also + -------- + lagadd, lagsub, lagmulx, lagmul, lagdiv + + Examples + -------- + >>> from numpy.polynomial.laguerre import lagpow + >>> lagpow([1, 2, 3], 2) + array([ 14., -16., 56., -72., 54.]) + + """ + return pu._pow(lagmul, c, pow, maxpower) + + +def lagder(c, m=1, scl=1, axis=0): + """ + Differentiate a Laguerre series. + + Returns the Laguerre series coefficients `c` differentiated `m` times + along `axis`. At each iteration the result is multiplied by `scl` (the + scaling factor is for use in a linear change of variable). The argument + `c` is an array of coefficients from low to high degree along each + axis, e.g., [1,2,3] represents the series ``1*L_0 + 2*L_1 + 3*L_2`` + while [[1,2],[1,2]] represents ``1*L_0(x)*L_0(y) + 1*L_1(x)*L_0(y) + + 2*L_0(x)*L_1(y) + 2*L_1(x)*L_1(y)`` if axis=0 is ``x`` and axis=1 is + ``y``. + + Parameters + ---------- + c : array_like + Array of Laguerre series coefficients. If `c` is multidimensional + the different axis correspond to different variables with the + degree in each axis given by the corresponding index. + m : int, optional + Number of derivatives taken, must be non-negative. (Default: 1) + scl : scalar, optional + Each differentiation is multiplied by `scl`. The end result is + multiplication by ``scl**m``. This is for use in a linear change of + variable. (Default: 1) + axis : int, optional + Axis over which the derivative is taken. (Default: 0). + + .. versionadded:: 1.7.0 + + Returns + ------- + der : ndarray + Laguerre series of the derivative. + + See Also + -------- + lagint + + Notes + ----- + In general, the result of differentiating a Laguerre series does not + resemble the same operation on a power series. Thus the result of this + function may be "unintuitive," albeit correct; see Examples section + below. + + Examples + -------- + >>> from numpy.polynomial.laguerre import lagder + >>> lagder([ 1., 1., 1., -3.]) + array([1., 2., 3.]) + >>> lagder([ 1., 0., 0., -4., 3.], m=2) + array([1., 2., 3.]) + + """ + c = np.array(c, ndmin=1, copy=True) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + + cnt = pu._deprecate_as_int(m, "the order of derivation") + iaxis = pu._deprecate_as_int(axis, "the axis") + if cnt < 0: + raise ValueError("The order of derivation must be non-negative") + iaxis = normalize_axis_index(iaxis, c.ndim) + + if cnt == 0: + return c + + c = np.moveaxis(c, iaxis, 0) + n = len(c) + if cnt >= n: + c = c[:1]*0 + else: + for i in range(cnt): + n = n - 1 + c *= scl + der = np.empty((n,) + c.shape[1:], dtype=c.dtype) + for j in range(n, 1, -1): + der[j - 1] = -c[j] + c[j - 1] += c[j] + der[0] = -c[1] + c = der + c = np.moveaxis(c, 0, iaxis) + return c + + +def lagint(c, m=1, k=[], lbnd=0, scl=1, axis=0): + """ + Integrate a Laguerre series. + + Returns the Laguerre series coefficients `c` integrated `m` times from + `lbnd` along `axis`. At each iteration the resulting series is + **multiplied** by `scl` and an integration constant, `k`, is added. + The scaling factor is for use in a linear change of variable. ("Buyer + beware": note that, depending on what one is doing, one may want `scl` + to be the reciprocal of what one might expect; for more information, + see the Notes section below.) The argument `c` is an array of + coefficients from low to high degree along each axis, e.g., [1,2,3] + represents the series ``L_0 + 2*L_1 + 3*L_2`` while [[1,2],[1,2]] + represents ``1*L_0(x)*L_0(y) + 1*L_1(x)*L_0(y) + 2*L_0(x)*L_1(y) + + 2*L_1(x)*L_1(y)`` if axis=0 is ``x`` and axis=1 is ``y``. + + + Parameters + ---------- + c : array_like + Array of Laguerre series coefficients. If `c` is multidimensional + the different axis correspond to different variables with the + degree in each axis given by the corresponding index. + m : int, optional + Order of integration, must be positive. (Default: 1) + k : {[], list, scalar}, optional + Integration constant(s). The value of the first integral at + ``lbnd`` is the first value in the list, the value of the second + integral at ``lbnd`` is the second value, etc. If ``k == []`` (the + default), all constants are set to zero. If ``m == 1``, a single + scalar can be given instead of a list. + lbnd : scalar, optional + The lower bound of the integral. (Default: 0) + scl : scalar, optional + Following each integration the result is *multiplied* by `scl` + before the integration constant is added. (Default: 1) + axis : int, optional + Axis over which the integral is taken. (Default: 0). + + .. versionadded:: 1.7.0 + + Returns + ------- + S : ndarray + Laguerre series coefficients of the integral. + + Raises + ------ + ValueError + If ``m < 0``, ``len(k) > m``, ``np.ndim(lbnd) != 0``, or + ``np.ndim(scl) != 0``. + + See Also + -------- + lagder + + Notes + ----- + Note that the result of each integration is *multiplied* by `scl`. + Why is this important to note? Say one is making a linear change of + variable :math:`u = ax + b` in an integral relative to `x`. Then + :math:`dx = du/a`, so one will need to set `scl` equal to + :math:`1/a` - perhaps not what one would have first thought. + + Also note that, in general, the result of integrating a C-series needs + to be "reprojected" onto the C-series basis set. Thus, typically, + the result of this function is "unintuitive," albeit correct; see + Examples section below. + + Examples + -------- + >>> from numpy.polynomial.laguerre import lagint + >>> lagint([1,2,3]) + array([ 1., 1., 1., -3.]) + >>> lagint([1,2,3], m=2) + array([ 1., 0., 0., -4., 3.]) + >>> lagint([1,2,3], k=1) + array([ 2., 1., 1., -3.]) + >>> lagint([1,2,3], lbnd=-1) + array([11.5, 1. , 1. , -3. ]) + >>> lagint([1,2], m=2, k=[1,2], lbnd=-1) + array([ 11.16666667, -5. , -3. , 2. ]) # may vary + + """ + c = np.array(c, ndmin=1, copy=True) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + if not np.iterable(k): + k = [k] + cnt = pu._deprecate_as_int(m, "the order of integration") + iaxis = pu._deprecate_as_int(axis, "the axis") + if cnt < 0: + raise ValueError("The order of integration must be non-negative") + if len(k) > cnt: + raise ValueError("Too many integration constants") + if np.ndim(lbnd) != 0: + raise ValueError("lbnd must be a scalar.") + if np.ndim(scl) != 0: + raise ValueError("scl must be a scalar.") + iaxis = normalize_axis_index(iaxis, c.ndim) + + if cnt == 0: + return c + + c = np.moveaxis(c, iaxis, 0) + k = list(k) + [0]*(cnt - len(k)) + for i in range(cnt): + n = len(c) + c *= scl + if n == 1 and np.all(c[0] == 0): + c[0] += k[i] + else: + tmp = np.empty((n + 1,) + c.shape[1:], dtype=c.dtype) + tmp[0] = c[0] + tmp[1] = -c[0] + for j in range(1, n): + tmp[j] += c[j] + tmp[j + 1] = -c[j] + tmp[0] += k[i] - lagval(lbnd, tmp) + c = tmp + c = np.moveaxis(c, 0, iaxis) + return c + + +def lagval(x, c, tensor=True): + """ + Evaluate a Laguerre series at points x. + + If `c` is of length `n + 1`, this function returns the value: + + .. math:: p(x) = c_0 * L_0(x) + c_1 * L_1(x) + ... + c_n * L_n(x) + + The parameter `x` is converted to an array only if it is a tuple or a + list, otherwise it is treated as a scalar. In either case, either `x` + or its elements must support multiplication and addition both with + themselves and with the elements of `c`. + + If `c` is a 1-D array, then `p(x)` will have the same shape as `x`. If + `c` is multidimensional, then the shape of the result depends on the + value of `tensor`. If `tensor` is true the shape will be c.shape[1:] + + x.shape. If `tensor` is false the shape will be c.shape[1:]. Note that + scalars have shape (,). + + Trailing zeros in the coefficients will be used in the evaluation, so + they should be avoided if efficiency is a concern. + + Parameters + ---------- + x : array_like, compatible object + If `x` is a list or tuple, it is converted to an ndarray, otherwise + it is left unchanged and treated as a scalar. In either case, `x` + or its elements must support addition and multiplication with + with themselves and with the elements of `c`. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree n are contained in c[n]. If `c` is multidimensional the + remaining indices enumerate multiple polynomials. In the two + dimensional case the coefficients may be thought of as stored in + the columns of `c`. + tensor : boolean, optional + If True, the shape of the coefficient array is extended with ones + on the right, one for each dimension of `x`. Scalars have dimension 0 + for this action. The result is that every column of coefficients in + `c` is evaluated for every element of `x`. If False, `x` is broadcast + over the columns of `c` for the evaluation. This keyword is useful + when `c` is multidimensional. The default value is True. + + .. versionadded:: 1.7.0 + + Returns + ------- + values : ndarray, algebra_like + The shape of the return value is described above. + + See Also + -------- + lagval2d, laggrid2d, lagval3d, laggrid3d + + Notes + ----- + The evaluation uses Clenshaw recursion, aka synthetic division. + + Examples + -------- + >>> from numpy.polynomial.laguerre import lagval + >>> coef = [1,2,3] + >>> lagval(1, coef) + -0.5 + >>> lagval([[1,2],[3,4]], coef) + array([[-0.5, -4. ], + [-4.5, -2. ]]) + + """ + c = np.array(c, ndmin=1, copy=False) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + if isinstance(x, (tuple, list)): + x = np.asarray(x) + if isinstance(x, np.ndarray) and tensor: + c = c.reshape(c.shape + (1,)*x.ndim) + + if len(c) == 1: + c0 = c[0] + c1 = 0 + elif len(c) == 2: + c0 = c[0] + c1 = c[1] + else: + nd = len(c) + c0 = c[-2] + c1 = c[-1] + for i in range(3, len(c) + 1): + tmp = c0 + nd = nd - 1 + c0 = c[-i] - (c1*(nd - 1))/nd + c1 = tmp + (c1*((2*nd - 1) - x))/nd + return c0 + c1*(1 - x) + + +def lagval2d(x, y, c): + """ + Evaluate a 2-D Laguerre series at points (x, y). + + This function returns the values: + + .. math:: p(x,y) = \\sum_{i,j} c_{i,j} * L_i(x) * L_j(y) + + The parameters `x` and `y` are converted to arrays only if they are + tuples or a lists, otherwise they are treated as a scalars and they + must have the same shape after conversion. In either case, either `x` + and `y` or their elements must support multiplication and addition both + with themselves and with the elements of `c`. + + If `c` is a 1-D array a one is implicitly appended to its shape to make + it 2-D. The shape of the result will be c.shape[2:] + x.shape. + + Parameters + ---------- + x, y : array_like, compatible objects + The two dimensional series is evaluated at the points `(x, y)`, + where `x` and `y` must have the same shape. If `x` or `y` is a list + or tuple, it is first converted to an ndarray, otherwise it is left + unchanged and if it isn't an ndarray it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term + of multi-degree i,j is contained in ``c[i,j]``. If `c` has + dimension greater than two the remaining indices enumerate multiple + sets of coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional polynomial at points formed with + pairs of corresponding values from `x` and `y`. + + See Also + -------- + lagval, laggrid2d, lagval3d, laggrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._valnd(lagval, c, x, y) + + +def laggrid2d(x, y, c): + """ + Evaluate a 2-D Laguerre series on the Cartesian product of x and y. + + This function returns the values: + + .. math:: p(a,b) = \\sum_{i,j} c_{i,j} * L_i(a) * L_j(b) + + where the points `(a, b)` consist of all pairs formed by taking + `a` from `x` and `b` from `y`. The resulting points form a grid with + `x` in the first dimension and `y` in the second. + + The parameters `x` and `y` are converted to arrays only if they are + tuples or a lists, otherwise they are treated as a scalars. In either + case, either `x` and `y` or their elements must support multiplication + and addition both with themselves and with the elements of `c`. + + If `c` has fewer than two dimensions, ones are implicitly appended to + its shape to make it 2-D. The shape of the result will be c.shape[2:] + + x.shape + y.shape. + + Parameters + ---------- + x, y : array_like, compatible objects + The two dimensional series is evaluated at the points in the + Cartesian product of `x` and `y`. If `x` or `y` is a list or + tuple, it is first converted to an ndarray, otherwise it is left + unchanged and, if it isn't an ndarray, it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term of + multi-degree i,j is contained in `c[i,j]`. If `c` has dimension + greater than two the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional Chebyshev series at points in the + Cartesian product of `x` and `y`. + + See Also + -------- + lagval, lagval2d, lagval3d, laggrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._gridnd(lagval, c, x, y) + + +def lagval3d(x, y, z, c): + """ + Evaluate a 3-D Laguerre series at points (x, y, z). + + This function returns the values: + + .. math:: p(x,y,z) = \\sum_{i,j,k} c_{i,j,k} * L_i(x) * L_j(y) * L_k(z) + + The parameters `x`, `y`, and `z` are converted to arrays only if + they are tuples or a lists, otherwise they are treated as a scalars and + they must have the same shape after conversion. In either case, either + `x`, `y`, and `z` or their elements must support multiplication and + addition both with themselves and with the elements of `c`. + + If `c` has fewer than 3 dimensions, ones are implicitly appended to its + shape to make it 3-D. The shape of the result will be c.shape[3:] + + x.shape. + + Parameters + ---------- + x, y, z : array_like, compatible object + The three dimensional series is evaluated at the points + `(x, y, z)`, where `x`, `y`, and `z` must have the same shape. If + any of `x`, `y`, or `z` is a list or tuple, it is first converted + to an ndarray, otherwise it is left unchanged and if it isn't an + ndarray it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term of + multi-degree i,j,k is contained in ``c[i,j,k]``. If `c` has dimension + greater than 3 the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the multidimensional polynomial on points formed with + triples of corresponding values from `x`, `y`, and `z`. + + See Also + -------- + lagval, lagval2d, laggrid2d, laggrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._valnd(lagval, c, x, y, z) + + +def laggrid3d(x, y, z, c): + """ + Evaluate a 3-D Laguerre series on the Cartesian product of x, y, and z. + + This function returns the values: + + .. math:: p(a,b,c) = \\sum_{i,j,k} c_{i,j,k} * L_i(a) * L_j(b) * L_k(c) + + where the points `(a, b, c)` consist of all triples formed by taking + `a` from `x`, `b` from `y`, and `c` from `z`. The resulting points form + a grid with `x` in the first dimension, `y` in the second, and `z` in + the third. + + The parameters `x`, `y`, and `z` are converted to arrays only if they + are tuples or a lists, otherwise they are treated as a scalars. In + either case, either `x`, `y`, and `z` or their elements must support + multiplication and addition both with themselves and with the elements + of `c`. + + If `c` has fewer than three dimensions, ones are implicitly appended to + its shape to make it 3-D. The shape of the result will be c.shape[3:] + + x.shape + y.shape + z.shape. + + Parameters + ---------- + x, y, z : array_like, compatible objects + The three dimensional series is evaluated at the points in the + Cartesian product of `x`, `y`, and `z`. If `x`,`y`, or `z` is a + list or tuple, it is first converted to an ndarray, otherwise it is + left unchanged and, if it isn't an ndarray, it is treated as a + scalar. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree i,j are contained in ``c[i,j]``. If `c` has dimension + greater than two the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional polynomial at points in the Cartesian + product of `x` and `y`. + + See Also + -------- + lagval, lagval2d, laggrid2d, lagval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._gridnd(lagval, c, x, y, z) + + +def lagvander(x, deg): + """Pseudo-Vandermonde matrix of given degree. + + Returns the pseudo-Vandermonde matrix of degree `deg` and sample points + `x`. The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., i] = L_i(x) + + where `0 <= i <= deg`. The leading indices of `V` index the elements of + `x` and the last index is the degree of the Laguerre polynomial. + + If `c` is a 1-D array of coefficients of length `n + 1` and `V` is the + array ``V = lagvander(x, n)``, then ``np.dot(V, c)`` and + ``lagval(x, c)`` are the same up to roundoff. This equivalence is + useful both for least squares fitting and for the evaluation of a large + number of Laguerre series of the same degree and sample points. + + Parameters + ---------- + x : array_like + Array of points. The dtype is converted to float64 or complex128 + depending on whether any of the elements are complex. If `x` is + scalar it is converted to a 1-D array. + deg : int + Degree of the resulting matrix. + + Returns + ------- + vander : ndarray + The pseudo-Vandermonde matrix. The shape of the returned matrix is + ``x.shape + (deg + 1,)``, where The last index is the degree of the + corresponding Laguerre polynomial. The dtype will be the same as + the converted `x`. + + Examples + -------- + >>> from numpy.polynomial.laguerre import lagvander + >>> x = np.array([0, 1, 2]) + >>> lagvander(x, 3) + array([[ 1. , 1. , 1. , 1. ], + [ 1. , 0. , -0.5 , -0.66666667], + [ 1. , -1. , -1. , -0.33333333]]) + + """ + ideg = pu._deprecate_as_int(deg, "deg") + if ideg < 0: + raise ValueError("deg must be non-negative") + + x = np.array(x, copy=False, ndmin=1) + 0.0 + dims = (ideg + 1,) + x.shape + dtyp = x.dtype + v = np.empty(dims, dtype=dtyp) + v[0] = x*0 + 1 + if ideg > 0: + v[1] = 1 - x + for i in range(2, ideg + 1): + v[i] = (v[i-1]*(2*i - 1 - x) - v[i-2]*(i - 1))/i + return np.moveaxis(v, 0, -1) + + +def lagvander2d(x, y, deg): + """Pseudo-Vandermonde matrix of given degrees. + + Returns the pseudo-Vandermonde matrix of degrees `deg` and sample + points `(x, y)`. The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., (deg[1] + 1)*i + j] = L_i(x) * L_j(y), + + where `0 <= i <= deg[0]` and `0 <= j <= deg[1]`. The leading indices of + `V` index the points `(x, y)` and the last index encodes the degrees of + the Laguerre polynomials. + + If ``V = lagvander2d(x, y, [xdeg, ydeg])``, then the columns of `V` + correspond to the elements of a 2-D coefficient array `c` of shape + (xdeg + 1, ydeg + 1) in the order + + .. math:: c_{00}, c_{01}, c_{02} ... , c_{10}, c_{11}, c_{12} ... + + and ``np.dot(V, c.flat)`` and ``lagval2d(x, y, c)`` will be the same + up to roundoff. This equivalence is useful both for least squares + fitting and for the evaluation of a large number of 2-D Laguerre + series of the same degrees and sample points. + + Parameters + ---------- + x, y : array_like + Arrays of point coordinates, all of the same shape. The dtypes + will be converted to either float64 or complex128 depending on + whether any of the elements are complex. Scalars are converted to + 1-D arrays. + deg : list of ints + List of maximum degrees of the form [x_deg, y_deg]. + + Returns + ------- + vander2d : ndarray + The shape of the returned matrix is ``x.shape + (order,)``, where + :math:`order = (deg[0]+1)*(deg[1]+1)`. The dtype will be the same + as the converted `x` and `y`. + + See Also + -------- + lagvander, lagvander3d, lagval2d, lagval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._vander_nd_flat((lagvander, lagvander), (x, y), deg) + + +def lagvander3d(x, y, z, deg): + """Pseudo-Vandermonde matrix of given degrees. + + Returns the pseudo-Vandermonde matrix of degrees `deg` and sample + points `(x, y, z)`. If `l, m, n` are the given degrees in `x, y, z`, + then The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., (m+1)(n+1)i + (n+1)j + k] = L_i(x)*L_j(y)*L_k(z), + + where `0 <= i <= l`, `0 <= j <= m`, and `0 <= j <= n`. The leading + indices of `V` index the points `(x, y, z)` and the last index encodes + the degrees of the Laguerre polynomials. + + If ``V = lagvander3d(x, y, z, [xdeg, ydeg, zdeg])``, then the columns + of `V` correspond to the elements of a 3-D coefficient array `c` of + shape (xdeg + 1, ydeg + 1, zdeg + 1) in the order + + .. math:: c_{000}, c_{001}, c_{002},... , c_{010}, c_{011}, c_{012},... + + and ``np.dot(V, c.flat)`` and ``lagval3d(x, y, z, c)`` will be the + same up to roundoff. This equivalence is useful both for least squares + fitting and for the evaluation of a large number of 3-D Laguerre + series of the same degrees and sample points. + + Parameters + ---------- + x, y, z : array_like + Arrays of point coordinates, all of the same shape. The dtypes will + be converted to either float64 or complex128 depending on whether + any of the elements are complex. Scalars are converted to 1-D + arrays. + deg : list of ints + List of maximum degrees of the form [x_deg, y_deg, z_deg]. + + Returns + ------- + vander3d : ndarray + The shape of the returned matrix is ``x.shape + (order,)``, where + :math:`order = (deg[0]+1)*(deg[1]+1)*(deg[2]+1)`. The dtype will + be the same as the converted `x`, `y`, and `z`. + + See Also + -------- + lagvander, lagvander3d, lagval2d, lagval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._vander_nd_flat((lagvander, lagvander, lagvander), (x, y, z), deg) + + +def lagfit(x, y, deg, rcond=None, full=False, w=None): + """ + Least squares fit of Laguerre series to data. + + Return the coefficients of a Laguerre series of degree `deg` that is the + least squares fit to the data values `y` given at points `x`. If `y` is + 1-D the returned coefficients will also be 1-D. If `y` is 2-D multiple + fits are done, one for each column of `y`, and the resulting + coefficients are stored in the corresponding columns of a 2-D return. + The fitted polynomial(s) are in the form + + .. math:: p(x) = c_0 + c_1 * L_1(x) + ... + c_n * L_n(x), + + where `n` is `deg`. + + Parameters + ---------- + x : array_like, shape (M,) + x-coordinates of the M sample points ``(x[i], y[i])``. + y : array_like, shape (M,) or (M, K) + y-coordinates of the sample points. Several data sets of sample + points sharing the same x-coordinates can be fitted at once by + passing in a 2D-array that contains one dataset per column. + deg : int or 1-D array_like + Degree(s) of the fitting polynomials. If `deg` is a single integer + all terms up to and including the `deg`'th term are included in the + fit. For NumPy versions >= 1.11.0 a list of integers specifying the + degrees of the terms to include may be used instead. + rcond : float, optional + Relative condition number of the fit. Singular values smaller than + this relative to the largest singular value will be ignored. The + default value is len(x)*eps, where eps is the relative precision of + the float type, about 2e-16 in most cases. + full : bool, optional + Switch determining nature of return value. When it is False (the + default) just the coefficients are returned, when True diagnostic + information from the singular value decomposition is also returned. + w : array_like, shape (`M`,), optional + Weights. If not None, the weight ``w[i]`` applies to the unsquared + residual ``y[i] - y_hat[i]`` at ``x[i]``. Ideally the weights are + chosen so that the errors of the products ``w[i]*y[i]`` all have the + same variance. When using inverse-variance weighting, use + ``w[i] = 1/sigma(y[i])``. The default value is None. + + Returns + ------- + coef : ndarray, shape (M,) or (M, K) + Laguerre coefficients ordered from low to high. If `y` was 2-D, + the coefficients for the data in column k of `y` are in column + `k`. + + [residuals, rank, singular_values, rcond] : list + These values are only returned if ``full == True`` + + - residuals -- sum of squared residuals of the least squares fit + - rank -- the numerical rank of the scaled Vandermonde matrix + - singular_values -- singular values of the scaled Vandermonde matrix + - rcond -- value of `rcond`. + + For more details, see `numpy.linalg.lstsq`. + + Warns + ----- + RankWarning + The rank of the coefficient matrix in the least-squares fit is + deficient. The warning is only raised if ``full == False``. The + warnings can be turned off by + + >>> import warnings + >>> warnings.simplefilter('ignore', np.RankWarning) + + See Also + -------- + numpy.polynomial.polynomial.polyfit + numpy.polynomial.legendre.legfit + numpy.polynomial.chebyshev.chebfit + numpy.polynomial.hermite.hermfit + numpy.polynomial.hermite_e.hermefit + lagval : Evaluates a Laguerre series. + lagvander : pseudo Vandermonde matrix of Laguerre series. + lagweight : Laguerre weight function. + numpy.linalg.lstsq : Computes a least-squares fit from the matrix. + scipy.interpolate.UnivariateSpline : Computes spline fits. + + Notes + ----- + The solution is the coefficients of the Laguerre series `p` that + minimizes the sum of the weighted squared errors + + .. math:: E = \\sum_j w_j^2 * |y_j - p(x_j)|^2, + + where the :math:`w_j` are the weights. This problem is solved by + setting up as the (typically) overdetermined matrix equation + + .. math:: V(x) * c = w * y, + + where `V` is the weighted pseudo Vandermonde matrix of `x`, `c` are the + coefficients to be solved for, `w` are the weights, and `y` are the + observed values. This equation is then solved using the singular value + decomposition of `V`. + + If some of the singular values of `V` are so small that they are + neglected, then a `RankWarning` will be issued. This means that the + coefficient values may be poorly determined. Using a lower order fit + will usually get rid of the warning. The `rcond` parameter can also be + set to a value smaller than its default, but the resulting fit may be + spurious and have large contributions from roundoff error. + + Fits using Laguerre series are probably most useful when the data can + be approximated by ``sqrt(w(x)) * p(x)``, where `w(x)` is the Laguerre + weight. In that case the weight ``sqrt(w(x[i]))`` should be used + together with data values ``y[i]/sqrt(w(x[i]))``. The weight function is + available as `lagweight`. + + References + ---------- + .. [1] Wikipedia, "Curve fitting", + https://en.wikipedia.org/wiki/Curve_fitting + + Examples + -------- + >>> from numpy.polynomial.laguerre import lagfit, lagval + >>> x = np.linspace(0, 10) + >>> err = np.random.randn(len(x))/10 + >>> y = lagval(x, [1, 2, 3]) + err + >>> lagfit(x, y, 2) + array([ 0.96971004, 2.00193749, 3.00288744]) # may vary + + """ + return pu._fit(lagvander, x, y, deg, rcond, full, w) + + +def lagcompanion(c): + """ + Return the companion matrix of c. + + The usual companion matrix of the Laguerre polynomials is already + symmetric when `c` is a basis Laguerre polynomial, so no scaling is + applied. + + Parameters + ---------- + c : array_like + 1-D array of Laguerre series coefficients ordered from low to high + degree. + + Returns + ------- + mat : ndarray + Companion matrix of dimensions (deg, deg). + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + if len(c) < 2: + raise ValueError('Series must have maximum degree of at least 1.') + if len(c) == 2: + return np.array([[1 + c[0]/c[1]]]) + + n = len(c) - 1 + mat = np.zeros((n, n), dtype=c.dtype) + top = mat.reshape(-1)[1::n+1] + mid = mat.reshape(-1)[0::n+1] + bot = mat.reshape(-1)[n::n+1] + top[...] = -np.arange(1, n) + mid[...] = 2.*np.arange(n) + 1. + bot[...] = top + mat[:, -1] += (c[:-1]/c[-1])*n + return mat + + +def lagroots(c): + """ + Compute the roots of a Laguerre series. + + Return the roots (a.k.a. "zeros") of the polynomial + + .. math:: p(x) = \\sum_i c[i] * L_i(x). + + Parameters + ---------- + c : 1-D array_like + 1-D array of coefficients. + + Returns + ------- + out : ndarray + Array of the roots of the series. If all the roots are real, + then `out` is also real, otherwise it is complex. + + See Also + -------- + numpy.polynomial.polynomial.polyroots + numpy.polynomial.legendre.legroots + numpy.polynomial.chebyshev.chebroots + numpy.polynomial.hermite.hermroots + numpy.polynomial.hermite_e.hermeroots + + Notes + ----- + The root estimates are obtained as the eigenvalues of the companion + matrix, Roots far from the origin of the complex plane may have large + errors due to the numerical instability of the series for such + values. Roots with multiplicity greater than 1 will also show larger + errors as the value of the series near such points is relatively + insensitive to errors in the roots. Isolated roots near the origin can + be improved by a few iterations of Newton's method. + + The Laguerre series basis polynomials aren't powers of `x` so the + results of this function may seem unintuitive. + + Examples + -------- + >>> from numpy.polynomial.laguerre import lagroots, lagfromroots + >>> coef = lagfromroots([0, 1, 2]) + >>> coef + array([ 2., -8., 12., -6.]) + >>> lagroots(coef) + array([-4.4408921e-16, 1.0000000e+00, 2.0000000e+00]) + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + if len(c) <= 1: + return np.array([], dtype=c.dtype) + if len(c) == 2: + return np.array([1 + c[0]/c[1]]) + + # rotated companion matrix reduces error + m = lagcompanion(c)[::-1,::-1] + r = la.eigvals(m) + r.sort() + return r + + +def laggauss(deg): + """ + Gauss-Laguerre quadrature. + + Computes the sample points and weights for Gauss-Laguerre quadrature. + These sample points and weights will correctly integrate polynomials of + degree :math:`2*deg - 1` or less over the interval :math:`[0, \\inf]` + with the weight function :math:`f(x) = \\exp(-x)`. + + Parameters + ---------- + deg : int + Number of sample points and weights. It must be >= 1. + + Returns + ------- + x : ndarray + 1-D ndarray containing the sample points. + y : ndarray + 1-D ndarray containing the weights. + + Notes + ----- + + .. versionadded:: 1.7.0 + + The results have only been tested up to degree 100 higher degrees may + be problematic. The weights are determined by using the fact that + + .. math:: w_k = c / (L'_n(x_k) * L_{n-1}(x_k)) + + where :math:`c` is a constant independent of :math:`k` and :math:`x_k` + is the k'th root of :math:`L_n`, and then scaling the results to get + the right value when integrating 1. + + """ + ideg = pu._deprecate_as_int(deg, "deg") + if ideg <= 0: + raise ValueError("deg must be a positive integer") + + # first approximation of roots. We use the fact that the companion + # matrix is symmetric in this case in order to obtain better zeros. + c = np.array([0]*deg + [1]) + m = lagcompanion(c) + x = la.eigvalsh(m) + + # improve roots by one application of Newton + dy = lagval(x, c) + df = lagval(x, lagder(c)) + x -= dy/df + + # compute the weights. We scale the factor to avoid possible numerical + # overflow. + fm = lagval(x, c[1:]) + fm /= np.abs(fm).max() + df /= np.abs(df).max() + w = 1/(fm * df) + + # scale w to get the right value, 1 in this case + w /= w.sum() + + return x, w + + +def lagweight(x): + """Weight function of the Laguerre polynomials. + + The weight function is :math:`exp(-x)` and the interval of integration + is :math:`[0, \\inf]`. The Laguerre polynomials are orthogonal, but not + normalized, with respect to this weight function. + + Parameters + ---------- + x : array_like + Values at which the weight function will be computed. + + Returns + ------- + w : ndarray + The weight function at `x`. + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + w = np.exp(-x) + return w + +# +# Laguerre series class +# + +class Laguerre(ABCPolyBase): + """A Laguerre series class. + + The Laguerre class provides the standard Python numerical methods + '+', '-', '*', '//', '%', 'divmod', '**', and '()' as well as the + attributes and methods listed in the `ABCPolyBase` documentation. + + Parameters + ---------- + coef : array_like + Laguerre coefficients in order of increasing degree, i.e, + ``(1, 2, 3)`` gives ``1*L_0(x) + 2*L_1(X) + 3*L_2(x)``. + domain : (2,) array_like, optional + Domain to use. The interval ``[domain[0], domain[1]]`` is mapped + to the interval ``[window[0], window[1]]`` by shifting and scaling. + The default value is [0, 1]. + window : (2,) array_like, optional + Window, see `domain` for its use. The default value is [0, 1]. + + .. versionadded:: 1.6.0 + + """ + # Virtual Functions + _add = staticmethod(lagadd) + _sub = staticmethod(lagsub) + _mul = staticmethod(lagmul) + _div = staticmethod(lagdiv) + _pow = staticmethod(lagpow) + _val = staticmethod(lagval) + _int = staticmethod(lagint) + _der = staticmethod(lagder) + _fit = staticmethod(lagfit) + _line = staticmethod(lagline) + _roots = staticmethod(lagroots) + _fromroots = staticmethod(lagfromroots) + + # Virtual properties + domain = np.array(lagdomain) + window = np.array(lagdomain) + basis_name = 'L' diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/laguerre.pyi b/.local/lib/python3.8/site-packages/numpy/polynomial/laguerre.pyi new file mode 100644 index 0000000..2b9ab34 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/laguerre.pyi @@ -0,0 +1,46 @@ +from typing import Any, List + +from numpy import ndarray, dtype, int_ +from numpy.polynomial._polybase import ABCPolyBase +from numpy.polynomial.polyutils import trimcoef + +__all__: list[str] + +lagtrim = trimcoef + +def poly2lag(pol): ... +def lag2poly(c): ... + +lagdomain: ndarray[Any, dtype[int_]] +lagzero: ndarray[Any, dtype[int_]] +lagone: ndarray[Any, dtype[int_]] +lagx: ndarray[Any, dtype[int_]] + +def lagline(off, scl): ... +def lagfromroots(roots): ... +def lagadd(c1, c2): ... +def lagsub(c1, c2): ... +def lagmulx(c): ... +def lagmul(c1, c2): ... +def lagdiv(c1, c2): ... +def lagpow(c, pow, maxpower=...): ... +def lagder(c, m=..., scl=..., axis=...): ... +def lagint(c, m=..., k = ..., lbnd=..., scl=..., axis=...): ... +def lagval(x, c, tensor=...): ... +def lagval2d(x, y, c): ... +def laggrid2d(x, y, c): ... +def lagval3d(x, y, z, c): ... +def laggrid3d(x, y, z, c): ... +def lagvander(x, deg): ... +def lagvander2d(x, y, deg): ... +def lagvander3d(x, y, z, deg): ... +def lagfit(x, y, deg, rcond=..., full=..., w=...): ... +def lagcompanion(c): ... +def lagroots(c): ... +def laggauss(deg): ... +def lagweight(x): ... + +class Laguerre(ABCPolyBase): + domain: Any + window: Any + basis_name: Any diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/legendre.py b/.local/lib/python3.8/site-packages/numpy/polynomial/legendre.py new file mode 100644 index 0000000..2e8052e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/legendre.py @@ -0,0 +1,1658 @@ +""" +================================================== +Legendre Series (:mod:`numpy.polynomial.legendre`) +================================================== + +This module provides a number of objects (mostly functions) useful for +dealing with Legendre series, including a `Legendre` class that +encapsulates the usual arithmetic operations. (General information +on how this module represents and works with such polynomials is in the +docstring for its "parent" sub-package, `numpy.polynomial`). + +Classes +------- +.. autosummary:: + :toctree: generated/ + + Legendre + +Constants +--------- + +.. autosummary:: + :toctree: generated/ + + legdomain + legzero + legone + legx + +Arithmetic +---------- + +.. autosummary:: + :toctree: generated/ + + legadd + legsub + legmulx + legmul + legdiv + legpow + legval + legval2d + legval3d + leggrid2d + leggrid3d + +Calculus +-------- + +.. autosummary:: + :toctree: generated/ + + legder + legint + +Misc Functions +-------------- + +.. autosummary:: + :toctree: generated/ + + legfromroots + legroots + legvander + legvander2d + legvander3d + leggauss + legweight + legcompanion + legfit + legtrim + legline + leg2poly + poly2leg + +See also +-------- +numpy.polynomial + +""" +import numpy as np +import numpy.linalg as la +from numpy.core.multiarray import normalize_axis_index + +from . import polyutils as pu +from ._polybase import ABCPolyBase + +__all__ = [ + 'legzero', 'legone', 'legx', 'legdomain', 'legline', 'legadd', + 'legsub', 'legmulx', 'legmul', 'legdiv', 'legpow', 'legval', 'legder', + 'legint', 'leg2poly', 'poly2leg', 'legfromroots', 'legvander', + 'legfit', 'legtrim', 'legroots', 'Legendre', 'legval2d', 'legval3d', + 'leggrid2d', 'leggrid3d', 'legvander2d', 'legvander3d', 'legcompanion', + 'leggauss', 'legweight'] + +legtrim = pu.trimcoef + + +def poly2leg(pol): + """ + Convert a polynomial to a Legendre series. + + Convert an array representing the coefficients of a polynomial (relative + to the "standard" basis) ordered from lowest degree to highest, to an + array of the coefficients of the equivalent Legendre series, ordered + from lowest to highest degree. + + Parameters + ---------- + pol : array_like + 1-D array containing the polynomial coefficients + + Returns + ------- + c : ndarray + 1-D array containing the coefficients of the equivalent Legendre + series. + + See Also + -------- + leg2poly + + Notes + ----- + The easy way to do conversions between polynomial basis sets + is to use the convert method of a class instance. + + Examples + -------- + >>> from numpy import polynomial as P + >>> p = P.Polynomial(np.arange(4)) + >>> p + Polynomial([0., 1., 2., 3.], domain=[-1, 1], window=[-1, 1]) + >>> c = P.Legendre(P.legendre.poly2leg(p.coef)) + >>> c + Legendre([ 1. , 3.25, 1. , 0.75], domain=[-1, 1], window=[-1, 1]) # may vary + + """ + [pol] = pu.as_series([pol]) + deg = len(pol) - 1 + res = 0 + for i in range(deg, -1, -1): + res = legadd(legmulx(res), pol[i]) + return res + + +def leg2poly(c): + """ + Convert a Legendre series to a polynomial. + + Convert an array representing the coefficients of a Legendre series, + ordered from lowest degree to highest, to an array of the coefficients + of the equivalent polynomial (relative to the "standard" basis) ordered + from lowest to highest degree. + + Parameters + ---------- + c : array_like + 1-D array containing the Legendre series coefficients, ordered + from lowest order term to highest. + + Returns + ------- + pol : ndarray + 1-D array containing the coefficients of the equivalent polynomial + (relative to the "standard" basis) ordered from lowest order term + to highest. + + See Also + -------- + poly2leg + + Notes + ----- + The easy way to do conversions between polynomial basis sets + is to use the convert method of a class instance. + + Examples + -------- + >>> from numpy import polynomial as P + >>> c = P.Legendre(range(4)) + >>> c + Legendre([0., 1., 2., 3.], domain=[-1, 1], window=[-1, 1]) + >>> p = c.convert(kind=P.Polynomial) + >>> p + Polynomial([-1. , -3.5, 3. , 7.5], domain=[-1., 1.], window=[-1., 1.]) + >>> P.leg2poly(range(4)) + array([-1. , -3.5, 3. , 7.5]) + + + """ + from .polynomial import polyadd, polysub, polymulx + + [c] = pu.as_series([c]) + n = len(c) + if n < 3: + return c + else: + c0 = c[-2] + c1 = c[-1] + # i is the current degree of c1 + for i in range(n - 1, 1, -1): + tmp = c0 + c0 = polysub(c[i - 2], (c1*(i - 1))/i) + c1 = polyadd(tmp, (polymulx(c1)*(2*i - 1))/i) + return polyadd(c0, polymulx(c1)) + +# +# These are constant arrays are of integer type so as to be compatible +# with the widest range of other types, such as Decimal. +# + +# Legendre +legdomain = np.array([-1, 1]) + +# Legendre coefficients representing zero. +legzero = np.array([0]) + +# Legendre coefficients representing one. +legone = np.array([1]) + +# Legendre coefficients representing the identity x. +legx = np.array([0, 1]) + + +def legline(off, scl): + """ + Legendre series whose graph is a straight line. + + + + Parameters + ---------- + off, scl : scalars + The specified line is given by ``off + scl*x``. + + Returns + ------- + y : ndarray + This module's representation of the Legendre series for + ``off + scl*x``. + + See Also + -------- + numpy.polynomial.polynomial.polyline + numpy.polynomial.chebyshev.chebline + numpy.polynomial.laguerre.lagline + numpy.polynomial.hermite.hermline + numpy.polynomial.hermite_e.hermeline + + Examples + -------- + >>> import numpy.polynomial.legendre as L + >>> L.legline(3,2) + array([3, 2]) + >>> L.legval(-3, L.legline(3,2)) # should be -3 + -3.0 + + """ + if scl != 0: + return np.array([off, scl]) + else: + return np.array([off]) + + +def legfromroots(roots): + """ + Generate a Legendre series with given roots. + + The function returns the coefficients of the polynomial + + .. math:: p(x) = (x - r_0) * (x - r_1) * ... * (x - r_n), + + in Legendre form, where the `r_n` are the roots specified in `roots`. + If a zero has multiplicity n, then it must appear in `roots` n times. + For instance, if 2 is a root of multiplicity three and 3 is a root of + multiplicity 2, then `roots` looks something like [2, 2, 2, 3, 3]. The + roots can appear in any order. + + If the returned coefficients are `c`, then + + .. math:: p(x) = c_0 + c_1 * L_1(x) + ... + c_n * L_n(x) + + The coefficient of the last term is not generally 1 for monic + polynomials in Legendre form. + + Parameters + ---------- + roots : array_like + Sequence containing the roots. + + Returns + ------- + out : ndarray + 1-D array of coefficients. If all roots are real then `out` is a + real array, if some of the roots are complex, then `out` is complex + even if all the coefficients in the result are real (see Examples + below). + + See Also + -------- + numpy.polynomial.polynomial.polyfromroots + numpy.polynomial.chebyshev.chebfromroots + numpy.polynomial.laguerre.lagfromroots + numpy.polynomial.hermite.hermfromroots + numpy.polynomial.hermite_e.hermefromroots + + Examples + -------- + >>> import numpy.polynomial.legendre as L + >>> L.legfromroots((-1,0,1)) # x^3 - x relative to the standard basis + array([ 0. , -0.4, 0. , 0.4]) + >>> j = complex(0,1) + >>> L.legfromroots((-j,j)) # x^2 + 1 relative to the standard basis + array([ 1.33333333+0.j, 0.00000000+0.j, 0.66666667+0.j]) # may vary + + """ + return pu._fromroots(legline, legmul, roots) + + +def legadd(c1, c2): + """ + Add one Legendre series to another. + + Returns the sum of two Legendre series `c1` + `c2`. The arguments + are sequences of coefficients ordered from lowest order term to + highest, i.e., [1,2,3] represents the series ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Legendre series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Array representing the Legendre series of their sum. + + See Also + -------- + legsub, legmulx, legmul, legdiv, legpow + + Notes + ----- + Unlike multiplication, division, etc., the sum of two Legendre series + is a Legendre series (without having to "reproject" the result onto + the basis set) so addition, just like that of "standard" polynomials, + is simply "component-wise." + + Examples + -------- + >>> from numpy.polynomial import legendre as L + >>> c1 = (1,2,3) + >>> c2 = (3,2,1) + >>> L.legadd(c1,c2) + array([4., 4., 4.]) + + """ + return pu._add(c1, c2) + + +def legsub(c1, c2): + """ + Subtract one Legendre series from another. + + Returns the difference of two Legendre series `c1` - `c2`. The + sequences of coefficients are from lowest order term to highest, i.e., + [1,2,3] represents the series ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Legendre series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Of Legendre series coefficients representing their difference. + + See Also + -------- + legadd, legmulx, legmul, legdiv, legpow + + Notes + ----- + Unlike multiplication, division, etc., the difference of two Legendre + series is a Legendre series (without having to "reproject" the result + onto the basis set) so subtraction, just like that of "standard" + polynomials, is simply "component-wise." + + Examples + -------- + >>> from numpy.polynomial import legendre as L + >>> c1 = (1,2,3) + >>> c2 = (3,2,1) + >>> L.legsub(c1,c2) + array([-2., 0., 2.]) + >>> L.legsub(c2,c1) # -C.legsub(c1,c2) + array([ 2., 0., -2.]) + + """ + return pu._sub(c1, c2) + + +def legmulx(c): + """Multiply a Legendre series by x. + + Multiply the Legendre series `c` by x, where x is the independent + variable. + + + Parameters + ---------- + c : array_like + 1-D array of Legendre series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Array representing the result of the multiplication. + + See Also + -------- + legadd, legmul, legdiv, legpow + + Notes + ----- + The multiplication uses the recursion relationship for Legendre + polynomials in the form + + .. math:: + + xP_i(x) = ((i + 1)*P_{i + 1}(x) + i*P_{i - 1}(x))/(2i + 1) + + Examples + -------- + >>> from numpy.polynomial import legendre as L + >>> L.legmulx([1,2,3]) + array([ 0.66666667, 2.2, 1.33333333, 1.8]) # may vary + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + # The zero series needs special treatment + if len(c) == 1 and c[0] == 0: + return c + + prd = np.empty(len(c) + 1, dtype=c.dtype) + prd[0] = c[0]*0 + prd[1] = c[0] + for i in range(1, len(c)): + j = i + 1 + k = i - 1 + s = i + j + prd[j] = (c[i]*j)/s + prd[k] += (c[i]*i)/s + return prd + + +def legmul(c1, c2): + """ + Multiply one Legendre series by another. + + Returns the product of two Legendre series `c1` * `c2`. The arguments + are sequences of coefficients, from lowest order "term" to highest, + e.g., [1,2,3] represents the series ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Legendre series coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Of Legendre series coefficients representing their product. + + See Also + -------- + legadd, legsub, legmulx, legdiv, legpow + + Notes + ----- + In general, the (polynomial) product of two C-series results in terms + that are not in the Legendre polynomial basis set. Thus, to express + the product as a Legendre series, it is necessary to "reproject" the + product onto said basis set, which may produce "unintuitive" (but + correct) results; see Examples section below. + + Examples + -------- + >>> from numpy.polynomial import legendre as L + >>> c1 = (1,2,3) + >>> c2 = (3,2) + >>> L.legmul(c1,c2) # multiplication requires "reprojection" + array([ 4.33333333, 10.4 , 11.66666667, 3.6 ]) # may vary + + """ + # s1, s2 are trimmed copies + [c1, c2] = pu.as_series([c1, c2]) + + if len(c1) > len(c2): + c = c2 + xs = c1 + else: + c = c1 + xs = c2 + + if len(c) == 1: + c0 = c[0]*xs + c1 = 0 + elif len(c) == 2: + c0 = c[0]*xs + c1 = c[1]*xs + else: + nd = len(c) + c0 = c[-2]*xs + c1 = c[-1]*xs + for i in range(3, len(c) + 1): + tmp = c0 + nd = nd - 1 + c0 = legsub(c[-i]*xs, (c1*(nd - 1))/nd) + c1 = legadd(tmp, (legmulx(c1)*(2*nd - 1))/nd) + return legadd(c0, legmulx(c1)) + + +def legdiv(c1, c2): + """ + Divide one Legendre series by another. + + Returns the quotient-with-remainder of two Legendre series + `c1` / `c2`. The arguments are sequences of coefficients from lowest + order "term" to highest, e.g., [1,2,3] represents the series + ``P_0 + 2*P_1 + 3*P_2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of Legendre series coefficients ordered from low to + high. + + Returns + ------- + quo, rem : ndarrays + Of Legendre series coefficients representing the quotient and + remainder. + + See Also + -------- + legadd, legsub, legmulx, legmul, legpow + + Notes + ----- + In general, the (polynomial) division of one Legendre series by another + results in quotient and remainder terms that are not in the Legendre + polynomial basis set. Thus, to express these results as a Legendre + series, it is necessary to "reproject" the results onto the Legendre + basis set, which may produce "unintuitive" (but correct) results; see + Examples section below. + + Examples + -------- + >>> from numpy.polynomial import legendre as L + >>> c1 = (1,2,3) + >>> c2 = (3,2,1) + >>> L.legdiv(c1,c2) # quotient "intuitive," remainder not + (array([3.]), array([-8., -4.])) + >>> c2 = (0,1,2,3) + >>> L.legdiv(c2,c1) # neither "intuitive" + (array([-0.07407407, 1.66666667]), array([-1.03703704, -2.51851852])) # may vary + + """ + return pu._div(legmul, c1, c2) + + +def legpow(c, pow, maxpower=16): + """Raise a Legendre series to a power. + + Returns the Legendre series `c` raised to the power `pow`. The + argument `c` is a sequence of coefficients ordered from low to high. + i.e., [1,2,3] is the series ``P_0 + 2*P_1 + 3*P_2.`` + + Parameters + ---------- + c : array_like + 1-D array of Legendre series coefficients ordered from low to + high. + pow : integer + Power to which the series will be raised + maxpower : integer, optional + Maximum power allowed. This is mainly to limit growth of the series + to unmanageable size. Default is 16 + + Returns + ------- + coef : ndarray + Legendre series of power. + + See Also + -------- + legadd, legsub, legmulx, legmul, legdiv + + """ + return pu._pow(legmul, c, pow, maxpower) + + +def legder(c, m=1, scl=1, axis=0): + """ + Differentiate a Legendre series. + + Returns the Legendre series coefficients `c` differentiated `m` times + along `axis`. At each iteration the result is multiplied by `scl` (the + scaling factor is for use in a linear change of variable). The argument + `c` is an array of coefficients from low to high degree along each + axis, e.g., [1,2,3] represents the series ``1*L_0 + 2*L_1 + 3*L_2`` + while [[1,2],[1,2]] represents ``1*L_0(x)*L_0(y) + 1*L_1(x)*L_0(y) + + 2*L_0(x)*L_1(y) + 2*L_1(x)*L_1(y)`` if axis=0 is ``x`` and axis=1 is + ``y``. + + Parameters + ---------- + c : array_like + Array of Legendre series coefficients. If c is multidimensional the + different axis correspond to different variables with the degree in + each axis given by the corresponding index. + m : int, optional + Number of derivatives taken, must be non-negative. (Default: 1) + scl : scalar, optional + Each differentiation is multiplied by `scl`. The end result is + multiplication by ``scl**m``. This is for use in a linear change of + variable. (Default: 1) + axis : int, optional + Axis over which the derivative is taken. (Default: 0). + + .. versionadded:: 1.7.0 + + Returns + ------- + der : ndarray + Legendre series of the derivative. + + See Also + -------- + legint + + Notes + ----- + In general, the result of differentiating a Legendre series does not + resemble the same operation on a power series. Thus the result of this + function may be "unintuitive," albeit correct; see Examples section + below. + + Examples + -------- + >>> from numpy.polynomial import legendre as L + >>> c = (1,2,3,4) + >>> L.legder(c) + array([ 6., 9., 20.]) + >>> L.legder(c, 3) + array([60.]) + >>> L.legder(c, scl=-1) + array([ -6., -9., -20.]) + >>> L.legder(c, 2,-1) + array([ 9., 60.]) + + """ + c = np.array(c, ndmin=1, copy=True) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + cnt = pu._deprecate_as_int(m, "the order of derivation") + iaxis = pu._deprecate_as_int(axis, "the axis") + if cnt < 0: + raise ValueError("The order of derivation must be non-negative") + iaxis = normalize_axis_index(iaxis, c.ndim) + + if cnt == 0: + return c + + c = np.moveaxis(c, iaxis, 0) + n = len(c) + if cnt >= n: + c = c[:1]*0 + else: + for i in range(cnt): + n = n - 1 + c *= scl + der = np.empty((n,) + c.shape[1:], dtype=c.dtype) + for j in range(n, 2, -1): + der[j - 1] = (2*j - 1)*c[j] + c[j - 2] += c[j] + if n > 1: + der[1] = 3*c[2] + der[0] = c[1] + c = der + c = np.moveaxis(c, 0, iaxis) + return c + + +def legint(c, m=1, k=[], lbnd=0, scl=1, axis=0): + """ + Integrate a Legendre series. + + Returns the Legendre series coefficients `c` integrated `m` times from + `lbnd` along `axis`. At each iteration the resulting series is + **multiplied** by `scl` and an integration constant, `k`, is added. + The scaling factor is for use in a linear change of variable. ("Buyer + beware": note that, depending on what one is doing, one may want `scl` + to be the reciprocal of what one might expect; for more information, + see the Notes section below.) The argument `c` is an array of + coefficients from low to high degree along each axis, e.g., [1,2,3] + represents the series ``L_0 + 2*L_1 + 3*L_2`` while [[1,2],[1,2]] + represents ``1*L_0(x)*L_0(y) + 1*L_1(x)*L_0(y) + 2*L_0(x)*L_1(y) + + 2*L_1(x)*L_1(y)`` if axis=0 is ``x`` and axis=1 is ``y``. + + Parameters + ---------- + c : array_like + Array of Legendre series coefficients. If c is multidimensional the + different axis correspond to different variables with the degree in + each axis given by the corresponding index. + m : int, optional + Order of integration, must be positive. (Default: 1) + k : {[], list, scalar}, optional + Integration constant(s). The value of the first integral at + ``lbnd`` is the first value in the list, the value of the second + integral at ``lbnd`` is the second value, etc. If ``k == []`` (the + default), all constants are set to zero. If ``m == 1``, a single + scalar can be given instead of a list. + lbnd : scalar, optional + The lower bound of the integral. (Default: 0) + scl : scalar, optional + Following each integration the result is *multiplied* by `scl` + before the integration constant is added. (Default: 1) + axis : int, optional + Axis over which the integral is taken. (Default: 0). + + .. versionadded:: 1.7.0 + + Returns + ------- + S : ndarray + Legendre series coefficient array of the integral. + + Raises + ------ + ValueError + If ``m < 0``, ``len(k) > m``, ``np.ndim(lbnd) != 0``, or + ``np.ndim(scl) != 0``. + + See Also + -------- + legder + + Notes + ----- + Note that the result of each integration is *multiplied* by `scl`. + Why is this important to note? Say one is making a linear change of + variable :math:`u = ax + b` in an integral relative to `x`. Then + :math:`dx = du/a`, so one will need to set `scl` equal to + :math:`1/a` - perhaps not what one would have first thought. + + Also note that, in general, the result of integrating a C-series needs + to be "reprojected" onto the C-series basis set. Thus, typically, + the result of this function is "unintuitive," albeit correct; see + Examples section below. + + Examples + -------- + >>> from numpy.polynomial import legendre as L + >>> c = (1,2,3) + >>> L.legint(c) + array([ 0.33333333, 0.4 , 0.66666667, 0.6 ]) # may vary + >>> L.legint(c, 3) + array([ 1.66666667e-02, -1.78571429e-02, 4.76190476e-02, # may vary + -1.73472348e-18, 1.90476190e-02, 9.52380952e-03]) + >>> L.legint(c, k=3) + array([ 3.33333333, 0.4 , 0.66666667, 0.6 ]) # may vary + >>> L.legint(c, lbnd=-2) + array([ 7.33333333, 0.4 , 0.66666667, 0.6 ]) # may vary + >>> L.legint(c, scl=2) + array([ 0.66666667, 0.8 , 1.33333333, 1.2 ]) # may vary + + """ + c = np.array(c, ndmin=1, copy=True) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + if not np.iterable(k): + k = [k] + cnt = pu._deprecate_as_int(m, "the order of integration") + iaxis = pu._deprecate_as_int(axis, "the axis") + if cnt < 0: + raise ValueError("The order of integration must be non-negative") + if len(k) > cnt: + raise ValueError("Too many integration constants") + if np.ndim(lbnd) != 0: + raise ValueError("lbnd must be a scalar.") + if np.ndim(scl) != 0: + raise ValueError("scl must be a scalar.") + iaxis = normalize_axis_index(iaxis, c.ndim) + + if cnt == 0: + return c + + c = np.moveaxis(c, iaxis, 0) + k = list(k) + [0]*(cnt - len(k)) + for i in range(cnt): + n = len(c) + c *= scl + if n == 1 and np.all(c[0] == 0): + c[0] += k[i] + else: + tmp = np.empty((n + 1,) + c.shape[1:], dtype=c.dtype) + tmp[0] = c[0]*0 + tmp[1] = c[0] + if n > 1: + tmp[2] = c[1]/3 + for j in range(2, n): + t = c[j]/(2*j + 1) + tmp[j + 1] = t + tmp[j - 1] -= t + tmp[0] += k[i] - legval(lbnd, tmp) + c = tmp + c = np.moveaxis(c, 0, iaxis) + return c + + +def legval(x, c, tensor=True): + """ + Evaluate a Legendre series at points x. + + If `c` is of length `n + 1`, this function returns the value: + + .. math:: p(x) = c_0 * L_0(x) + c_1 * L_1(x) + ... + c_n * L_n(x) + + The parameter `x` is converted to an array only if it is a tuple or a + list, otherwise it is treated as a scalar. In either case, either `x` + or its elements must support multiplication and addition both with + themselves and with the elements of `c`. + + If `c` is a 1-D array, then `p(x)` will have the same shape as `x`. If + `c` is multidimensional, then the shape of the result depends on the + value of `tensor`. If `tensor` is true the shape will be c.shape[1:] + + x.shape. If `tensor` is false the shape will be c.shape[1:]. Note that + scalars have shape (,). + + Trailing zeros in the coefficients will be used in the evaluation, so + they should be avoided if efficiency is a concern. + + Parameters + ---------- + x : array_like, compatible object + If `x` is a list or tuple, it is converted to an ndarray, otherwise + it is left unchanged and treated as a scalar. In either case, `x` + or its elements must support addition and multiplication with + with themselves and with the elements of `c`. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree n are contained in c[n]. If `c` is multidimensional the + remaining indices enumerate multiple polynomials. In the two + dimensional case the coefficients may be thought of as stored in + the columns of `c`. + tensor : boolean, optional + If True, the shape of the coefficient array is extended with ones + on the right, one for each dimension of `x`. Scalars have dimension 0 + for this action. The result is that every column of coefficients in + `c` is evaluated for every element of `x`. If False, `x` is broadcast + over the columns of `c` for the evaluation. This keyword is useful + when `c` is multidimensional. The default value is True. + + .. versionadded:: 1.7.0 + + Returns + ------- + values : ndarray, algebra_like + The shape of the return value is described above. + + See Also + -------- + legval2d, leggrid2d, legval3d, leggrid3d + + Notes + ----- + The evaluation uses Clenshaw recursion, aka synthetic division. + + """ + c = np.array(c, ndmin=1, copy=False) + if c.dtype.char in '?bBhHiIlLqQpP': + c = c.astype(np.double) + if isinstance(x, (tuple, list)): + x = np.asarray(x) + if isinstance(x, np.ndarray) and tensor: + c = c.reshape(c.shape + (1,)*x.ndim) + + if len(c) == 1: + c0 = c[0] + c1 = 0 + elif len(c) == 2: + c0 = c[0] + c1 = c[1] + else: + nd = len(c) + c0 = c[-2] + c1 = c[-1] + for i in range(3, len(c) + 1): + tmp = c0 + nd = nd - 1 + c0 = c[-i] - (c1*(nd - 1))/nd + c1 = tmp + (c1*x*(2*nd - 1))/nd + return c0 + c1*x + + +def legval2d(x, y, c): + """ + Evaluate a 2-D Legendre series at points (x, y). + + This function returns the values: + + .. math:: p(x,y) = \\sum_{i,j} c_{i,j} * L_i(x) * L_j(y) + + The parameters `x` and `y` are converted to arrays only if they are + tuples or a lists, otherwise they are treated as a scalars and they + must have the same shape after conversion. In either case, either `x` + and `y` or their elements must support multiplication and addition both + with themselves and with the elements of `c`. + + If `c` is a 1-D array a one is implicitly appended to its shape to make + it 2-D. The shape of the result will be c.shape[2:] + x.shape. + + Parameters + ---------- + x, y : array_like, compatible objects + The two dimensional series is evaluated at the points `(x, y)`, + where `x` and `y` must have the same shape. If `x` or `y` is a list + or tuple, it is first converted to an ndarray, otherwise it is left + unchanged and if it isn't an ndarray it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term + of multi-degree i,j is contained in ``c[i,j]``. If `c` has + dimension greater than two the remaining indices enumerate multiple + sets of coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional Legendre series at points formed + from pairs of corresponding values from `x` and `y`. + + See Also + -------- + legval, leggrid2d, legval3d, leggrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._valnd(legval, c, x, y) + + +def leggrid2d(x, y, c): + """ + Evaluate a 2-D Legendre series on the Cartesian product of x and y. + + This function returns the values: + + .. math:: p(a,b) = \\sum_{i,j} c_{i,j} * L_i(a) * L_j(b) + + where the points `(a, b)` consist of all pairs formed by taking + `a` from `x` and `b` from `y`. The resulting points form a grid with + `x` in the first dimension and `y` in the second. + + The parameters `x` and `y` are converted to arrays only if they are + tuples or a lists, otherwise they are treated as a scalars. In either + case, either `x` and `y` or their elements must support multiplication + and addition both with themselves and with the elements of `c`. + + If `c` has fewer than two dimensions, ones are implicitly appended to + its shape to make it 2-D. The shape of the result will be c.shape[2:] + + x.shape + y.shape. + + Parameters + ---------- + x, y : array_like, compatible objects + The two dimensional series is evaluated at the points in the + Cartesian product of `x` and `y`. If `x` or `y` is a list or + tuple, it is first converted to an ndarray, otherwise it is left + unchanged and, if it isn't an ndarray, it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term of + multi-degree i,j is contained in `c[i,j]`. If `c` has dimension + greater than two the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional Chebyshev series at points in the + Cartesian product of `x` and `y`. + + See Also + -------- + legval, legval2d, legval3d, leggrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._gridnd(legval, c, x, y) + + +def legval3d(x, y, z, c): + """ + Evaluate a 3-D Legendre series at points (x, y, z). + + This function returns the values: + + .. math:: p(x,y,z) = \\sum_{i,j,k} c_{i,j,k} * L_i(x) * L_j(y) * L_k(z) + + The parameters `x`, `y`, and `z` are converted to arrays only if + they are tuples or a lists, otherwise they are treated as a scalars and + they must have the same shape after conversion. In either case, either + `x`, `y`, and `z` or their elements must support multiplication and + addition both with themselves and with the elements of `c`. + + If `c` has fewer than 3 dimensions, ones are implicitly appended to its + shape to make it 3-D. The shape of the result will be c.shape[3:] + + x.shape. + + Parameters + ---------- + x, y, z : array_like, compatible object + The three dimensional series is evaluated at the points + `(x, y, z)`, where `x`, `y`, and `z` must have the same shape. If + any of `x`, `y`, or `z` is a list or tuple, it is first converted + to an ndarray, otherwise it is left unchanged and if it isn't an + ndarray it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term of + multi-degree i,j,k is contained in ``c[i,j,k]``. If `c` has dimension + greater than 3 the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the multidimensional polynomial on points formed with + triples of corresponding values from `x`, `y`, and `z`. + + See Also + -------- + legval, legval2d, leggrid2d, leggrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._valnd(legval, c, x, y, z) + + +def leggrid3d(x, y, z, c): + """ + Evaluate a 3-D Legendre series on the Cartesian product of x, y, and z. + + This function returns the values: + + .. math:: p(a,b,c) = \\sum_{i,j,k} c_{i,j,k} * L_i(a) * L_j(b) * L_k(c) + + where the points `(a, b, c)` consist of all triples formed by taking + `a` from `x`, `b` from `y`, and `c` from `z`. The resulting points form + a grid with `x` in the first dimension, `y` in the second, and `z` in + the third. + + The parameters `x`, `y`, and `z` are converted to arrays only if they + are tuples or a lists, otherwise they are treated as a scalars. In + either case, either `x`, `y`, and `z` or their elements must support + multiplication and addition both with themselves and with the elements + of `c`. + + If `c` has fewer than three dimensions, ones are implicitly appended to + its shape to make it 3-D. The shape of the result will be c.shape[3:] + + x.shape + y.shape + z.shape. + + Parameters + ---------- + x, y, z : array_like, compatible objects + The three dimensional series is evaluated at the points in the + Cartesian product of `x`, `y`, and `z`. If `x`,`y`, or `z` is a + list or tuple, it is first converted to an ndarray, otherwise it is + left unchanged and, if it isn't an ndarray, it is treated as a + scalar. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree i,j are contained in ``c[i,j]``. If `c` has dimension + greater than two the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional polynomial at points in the Cartesian + product of `x` and `y`. + + See Also + -------- + legval, legval2d, leggrid2d, legval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._gridnd(legval, c, x, y, z) + + +def legvander(x, deg): + """Pseudo-Vandermonde matrix of given degree. + + Returns the pseudo-Vandermonde matrix of degree `deg` and sample points + `x`. The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., i] = L_i(x) + + where `0 <= i <= deg`. The leading indices of `V` index the elements of + `x` and the last index is the degree of the Legendre polynomial. + + If `c` is a 1-D array of coefficients of length `n + 1` and `V` is the + array ``V = legvander(x, n)``, then ``np.dot(V, c)`` and + ``legval(x, c)`` are the same up to roundoff. This equivalence is + useful both for least squares fitting and for the evaluation of a large + number of Legendre series of the same degree and sample points. + + Parameters + ---------- + x : array_like + Array of points. The dtype is converted to float64 or complex128 + depending on whether any of the elements are complex. If `x` is + scalar it is converted to a 1-D array. + deg : int + Degree of the resulting matrix. + + Returns + ------- + vander : ndarray + The pseudo-Vandermonde matrix. The shape of the returned matrix is + ``x.shape + (deg + 1,)``, where The last index is the degree of the + corresponding Legendre polynomial. The dtype will be the same as + the converted `x`. + + """ + ideg = pu._deprecate_as_int(deg, "deg") + if ideg < 0: + raise ValueError("deg must be non-negative") + + x = np.array(x, copy=False, ndmin=1) + 0.0 + dims = (ideg + 1,) + x.shape + dtyp = x.dtype + v = np.empty(dims, dtype=dtyp) + # Use forward recursion to generate the entries. This is not as accurate + # as reverse recursion in this application but it is more efficient. + v[0] = x*0 + 1 + if ideg > 0: + v[1] = x + for i in range(2, ideg + 1): + v[i] = (v[i-1]*x*(2*i - 1) - v[i-2]*(i - 1))/i + return np.moveaxis(v, 0, -1) + + +def legvander2d(x, y, deg): + """Pseudo-Vandermonde matrix of given degrees. + + Returns the pseudo-Vandermonde matrix of degrees `deg` and sample + points `(x, y)`. The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., (deg[1] + 1)*i + j] = L_i(x) * L_j(y), + + where `0 <= i <= deg[0]` and `0 <= j <= deg[1]`. The leading indices of + `V` index the points `(x, y)` and the last index encodes the degrees of + the Legendre polynomials. + + If ``V = legvander2d(x, y, [xdeg, ydeg])``, then the columns of `V` + correspond to the elements of a 2-D coefficient array `c` of shape + (xdeg + 1, ydeg + 1) in the order + + .. math:: c_{00}, c_{01}, c_{02} ... , c_{10}, c_{11}, c_{12} ... + + and ``np.dot(V, c.flat)`` and ``legval2d(x, y, c)`` will be the same + up to roundoff. This equivalence is useful both for least squares + fitting and for the evaluation of a large number of 2-D Legendre + series of the same degrees and sample points. + + Parameters + ---------- + x, y : array_like + Arrays of point coordinates, all of the same shape. The dtypes + will be converted to either float64 or complex128 depending on + whether any of the elements are complex. Scalars are converted to + 1-D arrays. + deg : list of ints + List of maximum degrees of the form [x_deg, y_deg]. + + Returns + ------- + vander2d : ndarray + The shape of the returned matrix is ``x.shape + (order,)``, where + :math:`order = (deg[0]+1)*(deg[1]+1)`. The dtype will be the same + as the converted `x` and `y`. + + See Also + -------- + legvander, legvander3d, legval2d, legval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._vander_nd_flat((legvander, legvander), (x, y), deg) + + +def legvander3d(x, y, z, deg): + """Pseudo-Vandermonde matrix of given degrees. + + Returns the pseudo-Vandermonde matrix of degrees `deg` and sample + points `(x, y, z)`. If `l, m, n` are the given degrees in `x, y, z`, + then The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., (m+1)(n+1)i + (n+1)j + k] = L_i(x)*L_j(y)*L_k(z), + + where `0 <= i <= l`, `0 <= j <= m`, and `0 <= j <= n`. The leading + indices of `V` index the points `(x, y, z)` and the last index encodes + the degrees of the Legendre polynomials. + + If ``V = legvander3d(x, y, z, [xdeg, ydeg, zdeg])``, then the columns + of `V` correspond to the elements of a 3-D coefficient array `c` of + shape (xdeg + 1, ydeg + 1, zdeg + 1) in the order + + .. math:: c_{000}, c_{001}, c_{002},... , c_{010}, c_{011}, c_{012},... + + and ``np.dot(V, c.flat)`` and ``legval3d(x, y, z, c)`` will be the + same up to roundoff. This equivalence is useful both for least squares + fitting and for the evaluation of a large number of 3-D Legendre + series of the same degrees and sample points. + + Parameters + ---------- + x, y, z : array_like + Arrays of point coordinates, all of the same shape. The dtypes will + be converted to either float64 or complex128 depending on whether + any of the elements are complex. Scalars are converted to 1-D + arrays. + deg : list of ints + List of maximum degrees of the form [x_deg, y_deg, z_deg]. + + Returns + ------- + vander3d : ndarray + The shape of the returned matrix is ``x.shape + (order,)``, where + :math:`order = (deg[0]+1)*(deg[1]+1)*(deg[2]+1)`. The dtype will + be the same as the converted `x`, `y`, and `z`. + + See Also + -------- + legvander, legvander3d, legval2d, legval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._vander_nd_flat((legvander, legvander, legvander), (x, y, z), deg) + + +def legfit(x, y, deg, rcond=None, full=False, w=None): + """ + Least squares fit of Legendre series to data. + + Return the coefficients of a Legendre series of degree `deg` that is the + least squares fit to the data values `y` given at points `x`. If `y` is + 1-D the returned coefficients will also be 1-D. If `y` is 2-D multiple + fits are done, one for each column of `y`, and the resulting + coefficients are stored in the corresponding columns of a 2-D return. + The fitted polynomial(s) are in the form + + .. math:: p(x) = c_0 + c_1 * L_1(x) + ... + c_n * L_n(x), + + where `n` is `deg`. + + Parameters + ---------- + x : array_like, shape (M,) + x-coordinates of the M sample points ``(x[i], y[i])``. + y : array_like, shape (M,) or (M, K) + y-coordinates of the sample points. Several data sets of sample + points sharing the same x-coordinates can be fitted at once by + passing in a 2D-array that contains one dataset per column. + deg : int or 1-D array_like + Degree(s) of the fitting polynomials. If `deg` is a single integer + all terms up to and including the `deg`'th term are included in the + fit. For NumPy versions >= 1.11.0 a list of integers specifying the + degrees of the terms to include may be used instead. + rcond : float, optional + Relative condition number of the fit. Singular values smaller than + this relative to the largest singular value will be ignored. The + default value is len(x)*eps, where eps is the relative precision of + the float type, about 2e-16 in most cases. + full : bool, optional + Switch determining nature of return value. When it is False (the + default) just the coefficients are returned, when True diagnostic + information from the singular value decomposition is also returned. + w : array_like, shape (`M`,), optional + Weights. If not None, the weight ``w[i]`` applies to the unsquared + residual ``y[i] - y_hat[i]`` at ``x[i]``. Ideally the weights are + chosen so that the errors of the products ``w[i]*y[i]`` all have the + same variance. When using inverse-variance weighting, use + ``w[i] = 1/sigma(y[i])``. The default value is None. + + .. versionadded:: 1.5.0 + + Returns + ------- + coef : ndarray, shape (M,) or (M, K) + Legendre coefficients ordered from low to high. If `y` was + 2-D, the coefficients for the data in column k of `y` are in + column `k`. If `deg` is specified as a list, coefficients for + terms not included in the fit are set equal to zero in the + returned `coef`. + + [residuals, rank, singular_values, rcond] : list + These values are only returned if ``full == True`` + + - residuals -- sum of squared residuals of the least squares fit + - rank -- the numerical rank of the scaled Vandermonde matrix + - singular_values -- singular values of the scaled Vandermonde matrix + - rcond -- value of `rcond`. + + For more details, see `numpy.linalg.lstsq`. + + Warns + ----- + RankWarning + The rank of the coefficient matrix in the least-squares fit is + deficient. The warning is only raised if ``full == False``. The + warnings can be turned off by + + >>> import warnings + >>> warnings.simplefilter('ignore', np.RankWarning) + + See Also + -------- + numpy.polynomial.polynomial.polyfit + numpy.polynomial.chebyshev.chebfit + numpy.polynomial.laguerre.lagfit + numpy.polynomial.hermite.hermfit + numpy.polynomial.hermite_e.hermefit + legval : Evaluates a Legendre series. + legvander : Vandermonde matrix of Legendre series. + legweight : Legendre weight function (= 1). + numpy.linalg.lstsq : Computes a least-squares fit from the matrix. + scipy.interpolate.UnivariateSpline : Computes spline fits. + + Notes + ----- + The solution is the coefficients of the Legendre series `p` that + minimizes the sum of the weighted squared errors + + .. math:: E = \\sum_j w_j^2 * |y_j - p(x_j)|^2, + + where :math:`w_j` are the weights. This problem is solved by setting up + as the (typically) overdetermined matrix equation + + .. math:: V(x) * c = w * y, + + where `V` is the weighted pseudo Vandermonde matrix of `x`, `c` are the + coefficients to be solved for, `w` are the weights, and `y` are the + observed values. This equation is then solved using the singular value + decomposition of `V`. + + If some of the singular values of `V` are so small that they are + neglected, then a `RankWarning` will be issued. This means that the + coefficient values may be poorly determined. Using a lower order fit + will usually get rid of the warning. The `rcond` parameter can also be + set to a value smaller than its default, but the resulting fit may be + spurious and have large contributions from roundoff error. + + Fits using Legendre series are usually better conditioned than fits + using power series, but much can depend on the distribution of the + sample points and the smoothness of the data. If the quality of the fit + is inadequate splines may be a good alternative. + + References + ---------- + .. [1] Wikipedia, "Curve fitting", + https://en.wikipedia.org/wiki/Curve_fitting + + Examples + -------- + + """ + return pu._fit(legvander, x, y, deg, rcond, full, w) + + +def legcompanion(c): + """Return the scaled companion matrix of c. + + The basis polynomials are scaled so that the companion matrix is + symmetric when `c` is an Legendre basis polynomial. This provides + better eigenvalue estimates than the unscaled case and for basis + polynomials the eigenvalues are guaranteed to be real if + `numpy.linalg.eigvalsh` is used to obtain them. + + Parameters + ---------- + c : array_like + 1-D array of Legendre series coefficients ordered from low to high + degree. + + Returns + ------- + mat : ndarray + Scaled companion matrix of dimensions (deg, deg). + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + if len(c) < 2: + raise ValueError('Series must have maximum degree of at least 1.') + if len(c) == 2: + return np.array([[-c[0]/c[1]]]) + + n = len(c) - 1 + mat = np.zeros((n, n), dtype=c.dtype) + scl = 1./np.sqrt(2*np.arange(n) + 1) + top = mat.reshape(-1)[1::n+1] + bot = mat.reshape(-1)[n::n+1] + top[...] = np.arange(1, n)*scl[:n-1]*scl[1:n] + bot[...] = top + mat[:, -1] -= (c[:-1]/c[-1])*(scl/scl[-1])*(n/(2*n - 1)) + return mat + + +def legroots(c): + """ + Compute the roots of a Legendre series. + + Return the roots (a.k.a. "zeros") of the polynomial + + .. math:: p(x) = \\sum_i c[i] * L_i(x). + + Parameters + ---------- + c : 1-D array_like + 1-D array of coefficients. + + Returns + ------- + out : ndarray + Array of the roots of the series. If all the roots are real, + then `out` is also real, otherwise it is complex. + + See Also + -------- + numpy.polynomial.polynomial.polyroots + numpy.polynomial.chebyshev.chebroots + numpy.polynomial.laguerre.lagroots + numpy.polynomial.hermite.hermroots + numpy.polynomial.hermite_e.hermeroots + + Notes + ----- + The root estimates are obtained as the eigenvalues of the companion + matrix, Roots far from the origin of the complex plane may have large + errors due to the numerical instability of the series for such values. + Roots with multiplicity greater than 1 will also show larger errors as + the value of the series near such points is relatively insensitive to + errors in the roots. Isolated roots near the origin can be improved by + a few iterations of Newton's method. + + The Legendre series basis polynomials aren't powers of ``x`` so the + results of this function may seem unintuitive. + + Examples + -------- + >>> import numpy.polynomial.legendre as leg + >>> leg.legroots((1, 2, 3, 4)) # 4L_3 + 3L_2 + 2L_1 + 1L_0, all real roots + array([-0.85099543, -0.11407192, 0.51506735]) # may vary + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + if len(c) < 2: + return np.array([], dtype=c.dtype) + if len(c) == 2: + return np.array([-c[0]/c[1]]) + + # rotated companion matrix reduces error + m = legcompanion(c)[::-1,::-1] + r = la.eigvals(m) + r.sort() + return r + + +def leggauss(deg): + """ + Gauss-Legendre quadrature. + + Computes the sample points and weights for Gauss-Legendre quadrature. + These sample points and weights will correctly integrate polynomials of + degree :math:`2*deg - 1` or less over the interval :math:`[-1, 1]` with + the weight function :math:`f(x) = 1`. + + Parameters + ---------- + deg : int + Number of sample points and weights. It must be >= 1. + + Returns + ------- + x : ndarray + 1-D ndarray containing the sample points. + y : ndarray + 1-D ndarray containing the weights. + + Notes + ----- + + .. versionadded:: 1.7.0 + + The results have only been tested up to degree 100, higher degrees may + be problematic. The weights are determined by using the fact that + + .. math:: w_k = c / (L'_n(x_k) * L_{n-1}(x_k)) + + where :math:`c` is a constant independent of :math:`k` and :math:`x_k` + is the k'th root of :math:`L_n`, and then scaling the results to get + the right value when integrating 1. + + """ + ideg = pu._deprecate_as_int(deg, "deg") + if ideg <= 0: + raise ValueError("deg must be a positive integer") + + # first approximation of roots. We use the fact that the companion + # matrix is symmetric in this case in order to obtain better zeros. + c = np.array([0]*deg + [1]) + m = legcompanion(c) + x = la.eigvalsh(m) + + # improve roots by one application of Newton + dy = legval(x, c) + df = legval(x, legder(c)) + x -= dy/df + + # compute the weights. We scale the factor to avoid possible numerical + # overflow. + fm = legval(x, c[1:]) + fm /= np.abs(fm).max() + df /= np.abs(df).max() + w = 1/(fm * df) + + # for Legendre we can also symmetrize + w = (w + w[::-1])/2 + x = (x - x[::-1])/2 + + # scale w to get the right value + w *= 2. / w.sum() + + return x, w + + +def legweight(x): + """ + Weight function of the Legendre polynomials. + + The weight function is :math:`1` and the interval of integration is + :math:`[-1, 1]`. The Legendre polynomials are orthogonal, but not + normalized, with respect to this weight function. + + Parameters + ---------- + x : array_like + Values at which the weight function will be computed. + + Returns + ------- + w : ndarray + The weight function at `x`. + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + w = x*0.0 + 1.0 + return w + +# +# Legendre series class +# + +class Legendre(ABCPolyBase): + """A Legendre series class. + + The Legendre class provides the standard Python numerical methods + '+', '-', '*', '//', '%', 'divmod', '**', and '()' as well as the + attributes and methods listed in the `ABCPolyBase` documentation. + + Parameters + ---------- + coef : array_like + Legendre coefficients in order of increasing degree, i.e., + ``(1, 2, 3)`` gives ``1*P_0(x) + 2*P_1(x) + 3*P_2(x)``. + domain : (2,) array_like, optional + Domain to use. The interval ``[domain[0], domain[1]]`` is mapped + to the interval ``[window[0], window[1]]`` by shifting and scaling. + The default value is [-1, 1]. + window : (2,) array_like, optional + Window, see `domain` for its use. The default value is [-1, 1]. + + .. versionadded:: 1.6.0 + + """ + # Virtual Functions + _add = staticmethod(legadd) + _sub = staticmethod(legsub) + _mul = staticmethod(legmul) + _div = staticmethod(legdiv) + _pow = staticmethod(legpow) + _val = staticmethod(legval) + _int = staticmethod(legint) + _der = staticmethod(legder) + _fit = staticmethod(legfit) + _line = staticmethod(legline) + _roots = staticmethod(legroots) + _fromroots = staticmethod(legfromroots) + + # Virtual properties + domain = np.array(legdomain) + window = np.array(legdomain) + basis_name = 'P' diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/legendre.pyi b/.local/lib/python3.8/site-packages/numpy/polynomial/legendre.pyi new file mode 100644 index 0000000..86aef17 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/legendre.pyi @@ -0,0 +1,46 @@ +from typing import Any, List + +from numpy import ndarray, dtype, int_ +from numpy.polynomial._polybase import ABCPolyBase +from numpy.polynomial.polyutils import trimcoef + +__all__: list[str] + +legtrim = trimcoef + +def poly2leg(pol): ... +def leg2poly(c): ... + +legdomain: ndarray[Any, dtype[int_]] +legzero: ndarray[Any, dtype[int_]] +legone: ndarray[Any, dtype[int_]] +legx: ndarray[Any, dtype[int_]] + +def legline(off, scl): ... +def legfromroots(roots): ... +def legadd(c1, c2): ... +def legsub(c1, c2): ... +def legmulx(c): ... +def legmul(c1, c2): ... +def legdiv(c1, c2): ... +def legpow(c, pow, maxpower=...): ... +def legder(c, m=..., scl=..., axis=...): ... +def legint(c, m=..., k = ..., lbnd=..., scl=..., axis=...): ... +def legval(x, c, tensor=...): ... +def legval2d(x, y, c): ... +def leggrid2d(x, y, c): ... +def legval3d(x, y, z, c): ... +def leggrid3d(x, y, z, c): ... +def legvander(x, deg): ... +def legvander2d(x, y, deg): ... +def legvander3d(x, y, z, deg): ... +def legfit(x, y, deg, rcond=..., full=..., w=...): ... +def legcompanion(c): ... +def legroots(c): ... +def leggauss(deg): ... +def legweight(x): ... + +class Legendre(ABCPolyBase): + domain: Any + window: Any + basis_name: Any diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/polynomial.py b/.local/lib/python3.8/site-packages/numpy/polynomial/polynomial.py new file mode 100644 index 0000000..3c2663b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/polynomial.py @@ -0,0 +1,1530 @@ +""" +================================================= +Power Series (:mod:`numpy.polynomial.polynomial`) +================================================= + +This module provides a number of objects (mostly functions) useful for +dealing with polynomials, including a `Polynomial` class that +encapsulates the usual arithmetic operations. (General information +on how this module represents and works with polynomial objects is in +the docstring for its "parent" sub-package, `numpy.polynomial`). + +Classes +------- +.. autosummary:: + :toctree: generated/ + + Polynomial + +Constants +--------- +.. autosummary:: + :toctree: generated/ + + polydomain + polyzero + polyone + polyx + +Arithmetic +---------- +.. autosummary:: + :toctree: generated/ + + polyadd + polysub + polymulx + polymul + polydiv + polypow + polyval + polyval2d + polyval3d + polygrid2d + polygrid3d + +Calculus +-------- +.. autosummary:: + :toctree: generated/ + + polyder + polyint + +Misc Functions +-------------- +.. autosummary:: + :toctree: generated/ + + polyfromroots + polyroots + polyvalfromroots + polyvander + polyvander2d + polyvander3d + polycompanion + polyfit + polytrim + polyline + +See Also +-------- +`numpy.polynomial` + +""" +__all__ = [ + 'polyzero', 'polyone', 'polyx', 'polydomain', 'polyline', 'polyadd', + 'polysub', 'polymulx', 'polymul', 'polydiv', 'polypow', 'polyval', + 'polyvalfromroots', 'polyder', 'polyint', 'polyfromroots', 'polyvander', + 'polyfit', 'polytrim', 'polyroots', 'Polynomial', 'polyval2d', 'polyval3d', + 'polygrid2d', 'polygrid3d', 'polyvander2d', 'polyvander3d'] + +import numpy as np +import numpy.linalg as la +from numpy.core.multiarray import normalize_axis_index + +from . import polyutils as pu +from ._polybase import ABCPolyBase + +polytrim = pu.trimcoef + +# +# These are constant arrays are of integer type so as to be compatible +# with the widest range of other types, such as Decimal. +# + +# Polynomial default domain. +polydomain = np.array([-1, 1]) + +# Polynomial coefficients representing zero. +polyzero = np.array([0]) + +# Polynomial coefficients representing one. +polyone = np.array([1]) + +# Polynomial coefficients representing the identity x. +polyx = np.array([0, 1]) + +# +# Polynomial series functions +# + + +def polyline(off, scl): + """ + Returns an array representing a linear polynomial. + + Parameters + ---------- + off, scl : scalars + The "y-intercept" and "slope" of the line, respectively. + + Returns + ------- + y : ndarray + This module's representation of the linear polynomial ``off + + scl*x``. + + See Also + -------- + numpy.polynomial.chebyshev.chebline + numpy.polynomial.legendre.legline + numpy.polynomial.laguerre.lagline + numpy.polynomial.hermite.hermline + numpy.polynomial.hermite_e.hermeline + + Examples + -------- + >>> from numpy.polynomial import polynomial as P + >>> P.polyline(1,-1) + array([ 1, -1]) + >>> P.polyval(1, P.polyline(1,-1)) # should be 0 + 0.0 + + """ + if scl != 0: + return np.array([off, scl]) + else: + return np.array([off]) + + +def polyfromroots(roots): + """ + Generate a monic polynomial with given roots. + + Return the coefficients of the polynomial + + .. math:: p(x) = (x - r_0) * (x - r_1) * ... * (x - r_n), + + where the ``r_n`` are the roots specified in `roots`. If a zero has + multiplicity n, then it must appear in `roots` n times. For instance, + if 2 is a root of multiplicity three and 3 is a root of multiplicity 2, + then `roots` looks something like [2, 2, 2, 3, 3]. The roots can appear + in any order. + + If the returned coefficients are `c`, then + + .. math:: p(x) = c_0 + c_1 * x + ... + x^n + + The coefficient of the last term is 1 for monic polynomials in this + form. + + Parameters + ---------- + roots : array_like + Sequence containing the roots. + + Returns + ------- + out : ndarray + 1-D array of the polynomial's coefficients If all the roots are + real, then `out` is also real, otherwise it is complex. (see + Examples below). + + See Also + -------- + numpy.polynomial.chebyshev.chebfromroots + numpy.polynomial.legendre.legfromroots + numpy.polynomial.laguerre.lagfromroots + numpy.polynomial.hermite.hermfromroots + numpy.polynomial.hermite_e.hermefromroots + + Notes + ----- + The coefficients are determined by multiplying together linear factors + of the form ``(x - r_i)``, i.e. + + .. math:: p(x) = (x - r_0) (x - r_1) ... (x - r_n) + + where ``n == len(roots) - 1``; note that this implies that ``1`` is always + returned for :math:`a_n`. + + Examples + -------- + >>> from numpy.polynomial import polynomial as P + >>> P.polyfromroots((-1,0,1)) # x(x - 1)(x + 1) = x^3 - x + array([ 0., -1., 0., 1.]) + >>> j = complex(0,1) + >>> P.polyfromroots((-j,j)) # complex returned, though values are real + array([1.+0.j, 0.+0.j, 1.+0.j]) + + """ + return pu._fromroots(polyline, polymul, roots) + + +def polyadd(c1, c2): + """ + Add one polynomial to another. + + Returns the sum of two polynomials `c1` + `c2`. The arguments are + sequences of coefficients from lowest order term to highest, i.e., + [1,2,3] represents the polynomial ``1 + 2*x + 3*x**2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of polynomial coefficients ordered from low to high. + + Returns + ------- + out : ndarray + The coefficient array representing their sum. + + See Also + -------- + polysub, polymulx, polymul, polydiv, polypow + + Examples + -------- + >>> from numpy.polynomial import polynomial as P + >>> c1 = (1,2,3) + >>> c2 = (3,2,1) + >>> sum = P.polyadd(c1,c2); sum + array([4., 4., 4.]) + >>> P.polyval(2, sum) # 4 + 4(2) + 4(2**2) + 28.0 + + """ + return pu._add(c1, c2) + + +def polysub(c1, c2): + """ + Subtract one polynomial from another. + + Returns the difference of two polynomials `c1` - `c2`. The arguments + are sequences of coefficients from lowest order term to highest, i.e., + [1,2,3] represents the polynomial ``1 + 2*x + 3*x**2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of polynomial coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Of coefficients representing their difference. + + See Also + -------- + polyadd, polymulx, polymul, polydiv, polypow + + Examples + -------- + >>> from numpy.polynomial import polynomial as P + >>> c1 = (1,2,3) + >>> c2 = (3,2,1) + >>> P.polysub(c1,c2) + array([-2., 0., 2.]) + >>> P.polysub(c2,c1) # -P.polysub(c1,c2) + array([ 2., 0., -2.]) + + """ + return pu._sub(c1, c2) + + +def polymulx(c): + """Multiply a polynomial by x. + + Multiply the polynomial `c` by x, where x is the independent + variable. + + + Parameters + ---------- + c : array_like + 1-D array of polynomial coefficients ordered from low to + high. + + Returns + ------- + out : ndarray + Array representing the result of the multiplication. + + See Also + -------- + polyadd, polysub, polymul, polydiv, polypow + + Notes + ----- + + .. versionadded:: 1.5.0 + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + # The zero series needs special treatment + if len(c) == 1 and c[0] == 0: + return c + + prd = np.empty(len(c) + 1, dtype=c.dtype) + prd[0] = c[0]*0 + prd[1:] = c + return prd + + +def polymul(c1, c2): + """ + Multiply one polynomial by another. + + Returns the product of two polynomials `c1` * `c2`. The arguments are + sequences of coefficients, from lowest order term to highest, e.g., + [1,2,3] represents the polynomial ``1 + 2*x + 3*x**2.`` + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of coefficients representing a polynomial, relative to the + "standard" basis, and ordered from lowest order term to highest. + + Returns + ------- + out : ndarray + Of the coefficients of their product. + + See Also + -------- + polyadd, polysub, polymulx, polydiv, polypow + + Examples + -------- + >>> from numpy.polynomial import polynomial as P + >>> c1 = (1,2,3) + >>> c2 = (3,2,1) + >>> P.polymul(c1,c2) + array([ 3., 8., 14., 8., 3.]) + + """ + # c1, c2 are trimmed copies + [c1, c2] = pu.as_series([c1, c2]) + ret = np.convolve(c1, c2) + return pu.trimseq(ret) + + +def polydiv(c1, c2): + """ + Divide one polynomial by another. + + Returns the quotient-with-remainder of two polynomials `c1` / `c2`. + The arguments are sequences of coefficients, from lowest order term + to highest, e.g., [1,2,3] represents ``1 + 2*x + 3*x**2``. + + Parameters + ---------- + c1, c2 : array_like + 1-D arrays of polynomial coefficients ordered from low to high. + + Returns + ------- + [quo, rem] : ndarrays + Of coefficient series representing the quotient and remainder. + + See Also + -------- + polyadd, polysub, polymulx, polymul, polypow + + Examples + -------- + >>> from numpy.polynomial import polynomial as P + >>> c1 = (1,2,3) + >>> c2 = (3,2,1) + >>> P.polydiv(c1,c2) + (array([3.]), array([-8., -4.])) + >>> P.polydiv(c2,c1) + (array([ 0.33333333]), array([ 2.66666667, 1.33333333])) # may vary + + """ + # c1, c2 are trimmed copies + [c1, c2] = pu.as_series([c1, c2]) + if c2[-1] == 0: + raise ZeroDivisionError() + + # note: this is more efficient than `pu._div(polymul, c1, c2)` + lc1 = len(c1) + lc2 = len(c2) + if lc1 < lc2: + return c1[:1]*0, c1 + elif lc2 == 1: + return c1/c2[-1], c1[:1]*0 + else: + dlen = lc1 - lc2 + scl = c2[-1] + c2 = c2[:-1]/scl + i = dlen + j = lc1 - 1 + while i >= 0: + c1[i:j] -= c2*c1[j] + i -= 1 + j -= 1 + return c1[j+1:]/scl, pu.trimseq(c1[:j+1]) + + +def polypow(c, pow, maxpower=None): + """Raise a polynomial to a power. + + Returns the polynomial `c` raised to the power `pow`. The argument + `c` is a sequence of coefficients ordered from low to high. i.e., + [1,2,3] is the series ``1 + 2*x + 3*x**2.`` + + Parameters + ---------- + c : array_like + 1-D array of array of series coefficients ordered from low to + high degree. + pow : integer + Power to which the series will be raised + maxpower : integer, optional + Maximum power allowed. This is mainly to limit growth of the series + to unmanageable size. Default is 16 + + Returns + ------- + coef : ndarray + Power series of power. + + See Also + -------- + polyadd, polysub, polymulx, polymul, polydiv + + Examples + -------- + >>> from numpy.polynomial import polynomial as P + >>> P.polypow([1,2,3], 2) + array([ 1., 4., 10., 12., 9.]) + + """ + # note: this is more efficient than `pu._pow(polymul, c1, c2)`, as it + # avoids calling `as_series` repeatedly + return pu._pow(np.convolve, c, pow, maxpower) + + +def polyder(c, m=1, scl=1, axis=0): + """ + Differentiate a polynomial. + + Returns the polynomial coefficients `c` differentiated `m` times along + `axis`. At each iteration the result is multiplied by `scl` (the + scaling factor is for use in a linear change of variable). The + argument `c` is an array of coefficients from low to high degree along + each axis, e.g., [1,2,3] represents the polynomial ``1 + 2*x + 3*x**2`` + while [[1,2],[1,2]] represents ``1 + 1*x + 2*y + 2*x*y`` if axis=0 is + ``x`` and axis=1 is ``y``. + + Parameters + ---------- + c : array_like + Array of polynomial coefficients. If c is multidimensional the + different axis correspond to different variables with the degree + in each axis given by the corresponding index. + m : int, optional + Number of derivatives taken, must be non-negative. (Default: 1) + scl : scalar, optional + Each differentiation is multiplied by `scl`. The end result is + multiplication by ``scl**m``. This is for use in a linear change + of variable. (Default: 1) + axis : int, optional + Axis over which the derivative is taken. (Default: 0). + + .. versionadded:: 1.7.0 + + Returns + ------- + der : ndarray + Polynomial coefficients of the derivative. + + See Also + -------- + polyint + + Examples + -------- + >>> from numpy.polynomial import polynomial as P + >>> c = (1,2,3,4) # 1 + 2x + 3x**2 + 4x**3 + >>> P.polyder(c) # (d/dx)(c) = 2 + 6x + 12x**2 + array([ 2., 6., 12.]) + >>> P.polyder(c,3) # (d**3/dx**3)(c) = 24 + array([24.]) + >>> P.polyder(c,scl=-1) # (d/d(-x))(c) = -2 - 6x - 12x**2 + array([ -2., -6., -12.]) + >>> P.polyder(c,2,-1) # (d**2/d(-x)**2)(c) = 6 + 24x + array([ 6., 24.]) + + """ + c = np.array(c, ndmin=1, copy=True) + if c.dtype.char in '?bBhHiIlLqQpP': + # astype fails with NA + c = c + 0.0 + cdt = c.dtype + cnt = pu._deprecate_as_int(m, "the order of derivation") + iaxis = pu._deprecate_as_int(axis, "the axis") + if cnt < 0: + raise ValueError("The order of derivation must be non-negative") + iaxis = normalize_axis_index(iaxis, c.ndim) + + if cnt == 0: + return c + + c = np.moveaxis(c, iaxis, 0) + n = len(c) + if cnt >= n: + c = c[:1]*0 + else: + for i in range(cnt): + n = n - 1 + c *= scl + der = np.empty((n,) + c.shape[1:], dtype=cdt) + for j in range(n, 0, -1): + der[j - 1] = j*c[j] + c = der + c = np.moveaxis(c, 0, iaxis) + return c + + +def polyint(c, m=1, k=[], lbnd=0, scl=1, axis=0): + """ + Integrate a polynomial. + + Returns the polynomial coefficients `c` integrated `m` times from + `lbnd` along `axis`. At each iteration the resulting series is + **multiplied** by `scl` and an integration constant, `k`, is added. + The scaling factor is for use in a linear change of variable. ("Buyer + beware": note that, depending on what one is doing, one may want `scl` + to be the reciprocal of what one might expect; for more information, + see the Notes section below.) The argument `c` is an array of + coefficients, from low to high degree along each axis, e.g., [1,2,3] + represents the polynomial ``1 + 2*x + 3*x**2`` while [[1,2],[1,2]] + represents ``1 + 1*x + 2*y + 2*x*y`` if axis=0 is ``x`` and axis=1 is + ``y``. + + Parameters + ---------- + c : array_like + 1-D array of polynomial coefficients, ordered from low to high. + m : int, optional + Order of integration, must be positive. (Default: 1) + k : {[], list, scalar}, optional + Integration constant(s). The value of the first integral at zero + is the first value in the list, the value of the second integral + at zero is the second value, etc. If ``k == []`` (the default), + all constants are set to zero. If ``m == 1``, a single scalar can + be given instead of a list. + lbnd : scalar, optional + The lower bound of the integral. (Default: 0) + scl : scalar, optional + Following each integration the result is *multiplied* by `scl` + before the integration constant is added. (Default: 1) + axis : int, optional + Axis over which the integral is taken. (Default: 0). + + .. versionadded:: 1.7.0 + + Returns + ------- + S : ndarray + Coefficient array of the integral. + + Raises + ------ + ValueError + If ``m < 1``, ``len(k) > m``, ``np.ndim(lbnd) != 0``, or + ``np.ndim(scl) != 0``. + + See Also + -------- + polyder + + Notes + ----- + Note that the result of each integration is *multiplied* by `scl`. Why + is this important to note? Say one is making a linear change of + variable :math:`u = ax + b` in an integral relative to `x`. Then + :math:`dx = du/a`, so one will need to set `scl` equal to + :math:`1/a` - perhaps not what one would have first thought. + + Examples + -------- + >>> from numpy.polynomial import polynomial as P + >>> c = (1,2,3) + >>> P.polyint(c) # should return array([0, 1, 1, 1]) + array([0., 1., 1., 1.]) + >>> P.polyint(c,3) # should return array([0, 0, 0, 1/6, 1/12, 1/20]) + array([ 0. , 0. , 0. , 0.16666667, 0.08333333, # may vary + 0.05 ]) + >>> P.polyint(c,k=3) # should return array([3, 1, 1, 1]) + array([3., 1., 1., 1.]) + >>> P.polyint(c,lbnd=-2) # should return array([6, 1, 1, 1]) + array([6., 1., 1., 1.]) + >>> P.polyint(c,scl=-2) # should return array([0, -2, -2, -2]) + array([ 0., -2., -2., -2.]) + + """ + c = np.array(c, ndmin=1, copy=True) + if c.dtype.char in '?bBhHiIlLqQpP': + # astype doesn't preserve mask attribute. + c = c + 0.0 + cdt = c.dtype + if not np.iterable(k): + k = [k] + cnt = pu._deprecate_as_int(m, "the order of integration") + iaxis = pu._deprecate_as_int(axis, "the axis") + if cnt < 0: + raise ValueError("The order of integration must be non-negative") + if len(k) > cnt: + raise ValueError("Too many integration constants") + if np.ndim(lbnd) != 0: + raise ValueError("lbnd must be a scalar.") + if np.ndim(scl) != 0: + raise ValueError("scl must be a scalar.") + iaxis = normalize_axis_index(iaxis, c.ndim) + + if cnt == 0: + return c + + k = list(k) + [0]*(cnt - len(k)) + c = np.moveaxis(c, iaxis, 0) + for i in range(cnt): + n = len(c) + c *= scl + if n == 1 and np.all(c[0] == 0): + c[0] += k[i] + else: + tmp = np.empty((n + 1,) + c.shape[1:], dtype=cdt) + tmp[0] = c[0]*0 + tmp[1] = c[0] + for j in range(1, n): + tmp[j + 1] = c[j]/(j + 1) + tmp[0] += k[i] - polyval(lbnd, tmp) + c = tmp + c = np.moveaxis(c, 0, iaxis) + return c + + +def polyval(x, c, tensor=True): + """ + Evaluate a polynomial at points x. + + If `c` is of length `n + 1`, this function returns the value + + .. math:: p(x) = c_0 + c_1 * x + ... + c_n * x^n + + The parameter `x` is converted to an array only if it is a tuple or a + list, otherwise it is treated as a scalar. In either case, either `x` + or its elements must support multiplication and addition both with + themselves and with the elements of `c`. + + If `c` is a 1-D array, then `p(x)` will have the same shape as `x`. If + `c` is multidimensional, then the shape of the result depends on the + value of `tensor`. If `tensor` is true the shape will be c.shape[1:] + + x.shape. If `tensor` is false the shape will be c.shape[1:]. Note that + scalars have shape (,). + + Trailing zeros in the coefficients will be used in the evaluation, so + they should be avoided if efficiency is a concern. + + Parameters + ---------- + x : array_like, compatible object + If `x` is a list or tuple, it is converted to an ndarray, otherwise + it is left unchanged and treated as a scalar. In either case, `x` + or its elements must support addition and multiplication with + with themselves and with the elements of `c`. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree n are contained in c[n]. If `c` is multidimensional the + remaining indices enumerate multiple polynomials. In the two + dimensional case the coefficients may be thought of as stored in + the columns of `c`. + tensor : boolean, optional + If True, the shape of the coefficient array is extended with ones + on the right, one for each dimension of `x`. Scalars have dimension 0 + for this action. The result is that every column of coefficients in + `c` is evaluated for every element of `x`. If False, `x` is broadcast + over the columns of `c` for the evaluation. This keyword is useful + when `c` is multidimensional. The default value is True. + + .. versionadded:: 1.7.0 + + Returns + ------- + values : ndarray, compatible object + The shape of the returned array is described above. + + See Also + -------- + polyval2d, polygrid2d, polyval3d, polygrid3d + + Notes + ----- + The evaluation uses Horner's method. + + Examples + -------- + >>> from numpy.polynomial.polynomial import polyval + >>> polyval(1, [1,2,3]) + 6.0 + >>> a = np.arange(4).reshape(2,2) + >>> a + array([[0, 1], + [2, 3]]) + >>> polyval(a, [1,2,3]) + array([[ 1., 6.], + [17., 34.]]) + >>> coef = np.arange(4).reshape(2,2) # multidimensional coefficients + >>> coef + array([[0, 1], + [2, 3]]) + >>> polyval([1,2], coef, tensor=True) + array([[2., 4.], + [4., 7.]]) + >>> polyval([1,2], coef, tensor=False) + array([2., 7.]) + + """ + c = np.array(c, ndmin=1, copy=False) + if c.dtype.char in '?bBhHiIlLqQpP': + # astype fails with NA + c = c + 0.0 + if isinstance(x, (tuple, list)): + x = np.asarray(x) + if isinstance(x, np.ndarray) and tensor: + c = c.reshape(c.shape + (1,)*x.ndim) + + c0 = c[-1] + x*0 + for i in range(2, len(c) + 1): + c0 = c[-i] + c0*x + return c0 + + +def polyvalfromroots(x, r, tensor=True): + """ + Evaluate a polynomial specified by its roots at points x. + + If `r` is of length `N`, this function returns the value + + .. math:: p(x) = \\prod_{n=1}^{N} (x - r_n) + + The parameter `x` is converted to an array only if it is a tuple or a + list, otherwise it is treated as a scalar. In either case, either `x` + or its elements must support multiplication and addition both with + themselves and with the elements of `r`. + + If `r` is a 1-D array, then `p(x)` will have the same shape as `x`. If `r` + is multidimensional, then the shape of the result depends on the value of + `tensor`. If `tensor is ``True`` the shape will be r.shape[1:] + x.shape; + that is, each polynomial is evaluated at every value of `x`. If `tensor` is + ``False``, the shape will be r.shape[1:]; that is, each polynomial is + evaluated only for the corresponding broadcast value of `x`. Note that + scalars have shape (,). + + .. versionadded:: 1.12 + + Parameters + ---------- + x : array_like, compatible object + If `x` is a list or tuple, it is converted to an ndarray, otherwise + it is left unchanged and treated as a scalar. In either case, `x` + or its elements must support addition and multiplication with + with themselves and with the elements of `r`. + r : array_like + Array of roots. If `r` is multidimensional the first index is the + root index, while the remaining indices enumerate multiple + polynomials. For instance, in the two dimensional case the roots + of each polynomial may be thought of as stored in the columns of `r`. + tensor : boolean, optional + If True, the shape of the roots array is extended with ones on the + right, one for each dimension of `x`. Scalars have dimension 0 for this + action. The result is that every column of coefficients in `r` is + evaluated for every element of `x`. If False, `x` is broadcast over the + columns of `r` for the evaluation. This keyword is useful when `r` is + multidimensional. The default value is True. + + Returns + ------- + values : ndarray, compatible object + The shape of the returned array is described above. + + See Also + -------- + polyroots, polyfromroots, polyval + + Examples + -------- + >>> from numpy.polynomial.polynomial import polyvalfromroots + >>> polyvalfromroots(1, [1,2,3]) + 0.0 + >>> a = np.arange(4).reshape(2,2) + >>> a + array([[0, 1], + [2, 3]]) + >>> polyvalfromroots(a, [-1, 0, 1]) + array([[-0., 0.], + [ 6., 24.]]) + >>> r = np.arange(-2, 2).reshape(2,2) # multidimensional coefficients + >>> r # each column of r defines one polynomial + array([[-2, -1], + [ 0, 1]]) + >>> b = [-2, 1] + >>> polyvalfromroots(b, r, tensor=True) + array([[-0., 3.], + [ 3., 0.]]) + >>> polyvalfromroots(b, r, tensor=False) + array([-0., 0.]) + """ + r = np.array(r, ndmin=1, copy=False) + if r.dtype.char in '?bBhHiIlLqQpP': + r = r.astype(np.double) + if isinstance(x, (tuple, list)): + x = np.asarray(x) + if isinstance(x, np.ndarray): + if tensor: + r = r.reshape(r.shape + (1,)*x.ndim) + elif x.ndim >= r.ndim: + raise ValueError("x.ndim must be < r.ndim when tensor == False") + return np.prod(x - r, axis=0) + + +def polyval2d(x, y, c): + """ + Evaluate a 2-D polynomial at points (x, y). + + This function returns the value + + .. math:: p(x,y) = \\sum_{i,j} c_{i,j} * x^i * y^j + + The parameters `x` and `y` are converted to arrays only if they are + tuples or a lists, otherwise they are treated as a scalars and they + must have the same shape after conversion. In either case, either `x` + and `y` or their elements must support multiplication and addition both + with themselves and with the elements of `c`. + + If `c` has fewer than two dimensions, ones are implicitly appended to + its shape to make it 2-D. The shape of the result will be c.shape[2:] + + x.shape. + + Parameters + ---------- + x, y : array_like, compatible objects + The two dimensional series is evaluated at the points `(x, y)`, + where `x` and `y` must have the same shape. If `x` or `y` is a list + or tuple, it is first converted to an ndarray, otherwise it is left + unchanged and, if it isn't an ndarray, it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term + of multi-degree i,j is contained in `c[i,j]`. If `c` has + dimension greater than two the remaining indices enumerate multiple + sets of coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional polynomial at points formed with + pairs of corresponding values from `x` and `y`. + + See Also + -------- + polyval, polygrid2d, polyval3d, polygrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._valnd(polyval, c, x, y) + + +def polygrid2d(x, y, c): + """ + Evaluate a 2-D polynomial on the Cartesian product of x and y. + + This function returns the values: + + .. math:: p(a,b) = \\sum_{i,j} c_{i,j} * a^i * b^j + + where the points `(a, b)` consist of all pairs formed by taking + `a` from `x` and `b` from `y`. The resulting points form a grid with + `x` in the first dimension and `y` in the second. + + The parameters `x` and `y` are converted to arrays only if they are + tuples or a lists, otherwise they are treated as a scalars. In either + case, either `x` and `y` or their elements must support multiplication + and addition both with themselves and with the elements of `c`. + + If `c` has fewer than two dimensions, ones are implicitly appended to + its shape to make it 2-D. The shape of the result will be c.shape[2:] + + x.shape + y.shape. + + Parameters + ---------- + x, y : array_like, compatible objects + The two dimensional series is evaluated at the points in the + Cartesian product of `x` and `y`. If `x` or `y` is a list or + tuple, it is first converted to an ndarray, otherwise it is left + unchanged and, if it isn't an ndarray, it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree i,j are contained in ``c[i,j]``. If `c` has dimension + greater than two the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional polynomial at points in the Cartesian + product of `x` and `y`. + + See Also + -------- + polyval, polyval2d, polyval3d, polygrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._gridnd(polyval, c, x, y) + + +def polyval3d(x, y, z, c): + """ + Evaluate a 3-D polynomial at points (x, y, z). + + This function returns the values: + + .. math:: p(x,y,z) = \\sum_{i,j,k} c_{i,j,k} * x^i * y^j * z^k + + The parameters `x`, `y`, and `z` are converted to arrays only if + they are tuples or a lists, otherwise they are treated as a scalars and + they must have the same shape after conversion. In either case, either + `x`, `y`, and `z` or their elements must support multiplication and + addition both with themselves and with the elements of `c`. + + If `c` has fewer than 3 dimensions, ones are implicitly appended to its + shape to make it 3-D. The shape of the result will be c.shape[3:] + + x.shape. + + Parameters + ---------- + x, y, z : array_like, compatible object + The three dimensional series is evaluated at the points + `(x, y, z)`, where `x`, `y`, and `z` must have the same shape. If + any of `x`, `y`, or `z` is a list or tuple, it is first converted + to an ndarray, otherwise it is left unchanged and if it isn't an + ndarray it is treated as a scalar. + c : array_like + Array of coefficients ordered so that the coefficient of the term of + multi-degree i,j,k is contained in ``c[i,j,k]``. If `c` has dimension + greater than 3 the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the multidimensional polynomial on points formed with + triples of corresponding values from `x`, `y`, and `z`. + + See Also + -------- + polyval, polyval2d, polygrid2d, polygrid3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._valnd(polyval, c, x, y, z) + + +def polygrid3d(x, y, z, c): + """ + Evaluate a 3-D polynomial on the Cartesian product of x, y and z. + + This function returns the values: + + .. math:: p(a,b,c) = \\sum_{i,j,k} c_{i,j,k} * a^i * b^j * c^k + + where the points `(a, b, c)` consist of all triples formed by taking + `a` from `x`, `b` from `y`, and `c` from `z`. The resulting points form + a grid with `x` in the first dimension, `y` in the second, and `z` in + the third. + + The parameters `x`, `y`, and `z` are converted to arrays only if they + are tuples or a lists, otherwise they are treated as a scalars. In + either case, either `x`, `y`, and `z` or their elements must support + multiplication and addition both with themselves and with the elements + of `c`. + + If `c` has fewer than three dimensions, ones are implicitly appended to + its shape to make it 3-D. The shape of the result will be c.shape[3:] + + x.shape + y.shape + z.shape. + + Parameters + ---------- + x, y, z : array_like, compatible objects + The three dimensional series is evaluated at the points in the + Cartesian product of `x`, `y`, and `z`. If `x`,`y`, or `z` is a + list or tuple, it is first converted to an ndarray, otherwise it is + left unchanged and, if it isn't an ndarray, it is treated as a + scalar. + c : array_like + Array of coefficients ordered so that the coefficients for terms of + degree i,j are contained in ``c[i,j]``. If `c` has dimension + greater than two the remaining indices enumerate multiple sets of + coefficients. + + Returns + ------- + values : ndarray, compatible object + The values of the two dimensional polynomial at points in the Cartesian + product of `x` and `y`. + + See Also + -------- + polyval, polyval2d, polygrid2d, polyval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._gridnd(polyval, c, x, y, z) + + +def polyvander(x, deg): + """Vandermonde matrix of given degree. + + Returns the Vandermonde matrix of degree `deg` and sample points + `x`. The Vandermonde matrix is defined by + + .. math:: V[..., i] = x^i, + + where `0 <= i <= deg`. The leading indices of `V` index the elements of + `x` and the last index is the power of `x`. + + If `c` is a 1-D array of coefficients of length `n + 1` and `V` is the + matrix ``V = polyvander(x, n)``, then ``np.dot(V, c)`` and + ``polyval(x, c)`` are the same up to roundoff. This equivalence is + useful both for least squares fitting and for the evaluation of a large + number of polynomials of the same degree and sample points. + + Parameters + ---------- + x : array_like + Array of points. The dtype is converted to float64 or complex128 + depending on whether any of the elements are complex. If `x` is + scalar it is converted to a 1-D array. + deg : int + Degree of the resulting matrix. + + Returns + ------- + vander : ndarray. + The Vandermonde matrix. The shape of the returned matrix is + ``x.shape + (deg + 1,)``, where the last index is the power of `x`. + The dtype will be the same as the converted `x`. + + See Also + -------- + polyvander2d, polyvander3d + + """ + ideg = pu._deprecate_as_int(deg, "deg") + if ideg < 0: + raise ValueError("deg must be non-negative") + + x = np.array(x, copy=False, ndmin=1) + 0.0 + dims = (ideg + 1,) + x.shape + dtyp = x.dtype + v = np.empty(dims, dtype=dtyp) + v[0] = x*0 + 1 + if ideg > 0: + v[1] = x + for i in range(2, ideg + 1): + v[i] = v[i-1]*x + return np.moveaxis(v, 0, -1) + + +def polyvander2d(x, y, deg): + """Pseudo-Vandermonde matrix of given degrees. + + Returns the pseudo-Vandermonde matrix of degrees `deg` and sample + points `(x, y)`. The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., (deg[1] + 1)*i + j] = x^i * y^j, + + where `0 <= i <= deg[0]` and `0 <= j <= deg[1]`. The leading indices of + `V` index the points `(x, y)` and the last index encodes the powers of + `x` and `y`. + + If ``V = polyvander2d(x, y, [xdeg, ydeg])``, then the columns of `V` + correspond to the elements of a 2-D coefficient array `c` of shape + (xdeg + 1, ydeg + 1) in the order + + .. math:: c_{00}, c_{01}, c_{02} ... , c_{10}, c_{11}, c_{12} ... + + and ``np.dot(V, c.flat)`` and ``polyval2d(x, y, c)`` will be the same + up to roundoff. This equivalence is useful both for least squares + fitting and for the evaluation of a large number of 2-D polynomials + of the same degrees and sample points. + + Parameters + ---------- + x, y : array_like + Arrays of point coordinates, all of the same shape. The dtypes + will be converted to either float64 or complex128 depending on + whether any of the elements are complex. Scalars are converted to + 1-D arrays. + deg : list of ints + List of maximum degrees of the form [x_deg, y_deg]. + + Returns + ------- + vander2d : ndarray + The shape of the returned matrix is ``x.shape + (order,)``, where + :math:`order = (deg[0]+1)*(deg([1]+1)`. The dtype will be the same + as the converted `x` and `y`. + + See Also + -------- + polyvander, polyvander3d, polyval2d, polyval3d + + """ + return pu._vander_nd_flat((polyvander, polyvander), (x, y), deg) + + +def polyvander3d(x, y, z, deg): + """Pseudo-Vandermonde matrix of given degrees. + + Returns the pseudo-Vandermonde matrix of degrees `deg` and sample + points `(x, y, z)`. If `l, m, n` are the given degrees in `x, y, z`, + then The pseudo-Vandermonde matrix is defined by + + .. math:: V[..., (m+1)(n+1)i + (n+1)j + k] = x^i * y^j * z^k, + + where `0 <= i <= l`, `0 <= j <= m`, and `0 <= j <= n`. The leading + indices of `V` index the points `(x, y, z)` and the last index encodes + the powers of `x`, `y`, and `z`. + + If ``V = polyvander3d(x, y, z, [xdeg, ydeg, zdeg])``, then the columns + of `V` correspond to the elements of a 3-D coefficient array `c` of + shape (xdeg + 1, ydeg + 1, zdeg + 1) in the order + + .. math:: c_{000}, c_{001}, c_{002},... , c_{010}, c_{011}, c_{012},... + + and ``np.dot(V, c.flat)`` and ``polyval3d(x, y, z, c)`` will be the + same up to roundoff. This equivalence is useful both for least squares + fitting and for the evaluation of a large number of 3-D polynomials + of the same degrees and sample points. + + Parameters + ---------- + x, y, z : array_like + Arrays of point coordinates, all of the same shape. The dtypes will + be converted to either float64 or complex128 depending on whether + any of the elements are complex. Scalars are converted to 1-D + arrays. + deg : list of ints + List of maximum degrees of the form [x_deg, y_deg, z_deg]. + + Returns + ------- + vander3d : ndarray + The shape of the returned matrix is ``x.shape + (order,)``, where + :math:`order = (deg[0]+1)*(deg([1]+1)*(deg[2]+1)`. The dtype will + be the same as the converted `x`, `y`, and `z`. + + See Also + -------- + polyvander, polyvander3d, polyval2d, polyval3d + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + return pu._vander_nd_flat((polyvander, polyvander, polyvander), (x, y, z), deg) + + +def polyfit(x, y, deg, rcond=None, full=False, w=None): + """ + Least-squares fit of a polynomial to data. + + Return the coefficients of a polynomial of degree `deg` that is the + least squares fit to the data values `y` given at points `x`. If `y` is + 1-D the returned coefficients will also be 1-D. If `y` is 2-D multiple + fits are done, one for each column of `y`, and the resulting + coefficients are stored in the corresponding columns of a 2-D return. + The fitted polynomial(s) are in the form + + .. math:: p(x) = c_0 + c_1 * x + ... + c_n * x^n, + + where `n` is `deg`. + + Parameters + ---------- + x : array_like, shape (`M`,) + x-coordinates of the `M` sample (data) points ``(x[i], y[i])``. + y : array_like, shape (`M`,) or (`M`, `K`) + y-coordinates of the sample points. Several sets of sample points + sharing the same x-coordinates can be (independently) fit with one + call to `polyfit` by passing in for `y` a 2-D array that contains + one data set per column. + deg : int or 1-D array_like + Degree(s) of the fitting polynomials. If `deg` is a single integer + all terms up to and including the `deg`'th term are included in the + fit. For NumPy versions >= 1.11.0 a list of integers specifying the + degrees of the terms to include may be used instead. + rcond : float, optional + Relative condition number of the fit. Singular values smaller + than `rcond`, relative to the largest singular value, will be + ignored. The default value is ``len(x)*eps``, where `eps` is the + relative precision of the platform's float type, about 2e-16 in + most cases. + full : bool, optional + Switch determining the nature of the return value. When ``False`` + (the default) just the coefficients are returned; when ``True``, + diagnostic information from the singular value decomposition (used + to solve the fit's matrix equation) is also returned. + w : array_like, shape (`M`,), optional + Weights. If not None, the weight ``w[i]`` applies to the unsquared + residual ``y[i] - y_hat[i]`` at ``x[i]``. Ideally the weights are + chosen so that the errors of the products ``w[i]*y[i]`` all have the + same variance. When using inverse-variance weighting, use + ``w[i] = 1/sigma(y[i])``. The default value is None. + + .. versionadded:: 1.5.0 + + Returns + ------- + coef : ndarray, shape (`deg` + 1,) or (`deg` + 1, `K`) + Polynomial coefficients ordered from low to high. If `y` was 2-D, + the coefficients in column `k` of `coef` represent the polynomial + fit to the data in `y`'s `k`-th column. + + [residuals, rank, singular_values, rcond] : list + These values are only returned if ``full == True`` + + - residuals -- sum of squared residuals of the least squares fit + - rank -- the numerical rank of the scaled Vandermonde matrix + - singular_values -- singular values of the scaled Vandermonde matrix + - rcond -- value of `rcond`. + + For more details, see `numpy.linalg.lstsq`. + + Raises + ------ + RankWarning + Raised if the matrix in the least-squares fit is rank deficient. + The warning is only raised if ``full == False``. The warnings can + be turned off by: + + >>> import warnings + >>> warnings.simplefilter('ignore', np.RankWarning) + + See Also + -------- + numpy.polynomial.chebyshev.chebfit + numpy.polynomial.legendre.legfit + numpy.polynomial.laguerre.lagfit + numpy.polynomial.hermite.hermfit + numpy.polynomial.hermite_e.hermefit + polyval : Evaluates a polynomial. + polyvander : Vandermonde matrix for powers. + numpy.linalg.lstsq : Computes a least-squares fit from the matrix. + scipy.interpolate.UnivariateSpline : Computes spline fits. + + Notes + ----- + The solution is the coefficients of the polynomial `p` that minimizes + the sum of the weighted squared errors + + .. math:: E = \\sum_j w_j^2 * |y_j - p(x_j)|^2, + + where the :math:`w_j` are the weights. This problem is solved by + setting up the (typically) over-determined matrix equation: + + .. math:: V(x) * c = w * y, + + where `V` is the weighted pseudo Vandermonde matrix of `x`, `c` are the + coefficients to be solved for, `w` are the weights, and `y` are the + observed values. This equation is then solved using the singular value + decomposition of `V`. + + If some of the singular values of `V` are so small that they are + neglected (and `full` == ``False``), a `RankWarning` will be raised. + This means that the coefficient values may be poorly determined. + Fitting to a lower order polynomial will usually get rid of the warning + (but may not be what you want, of course; if you have independent + reason(s) for choosing the degree which isn't working, you may have to: + a) reconsider those reasons, and/or b) reconsider the quality of your + data). The `rcond` parameter can also be set to a value smaller than + its default, but the resulting fit may be spurious and have large + contributions from roundoff error. + + Polynomial fits using double precision tend to "fail" at about + (polynomial) degree 20. Fits using Chebyshev or Legendre series are + generally better conditioned, but much can still depend on the + distribution of the sample points and the smoothness of the data. If + the quality of the fit is inadequate, splines may be a good + alternative. + + Examples + -------- + >>> np.random.seed(123) + >>> from numpy.polynomial import polynomial as P + >>> x = np.linspace(-1,1,51) # x "data": [-1, -0.96, ..., 0.96, 1] + >>> y = x**3 - x + np.random.randn(len(x)) # x^3 - x + N(0,1) "noise" + >>> c, stats = P.polyfit(x,y,3,full=True) + >>> np.random.seed(123) + >>> c # c[0], c[2] should be approx. 0, c[1] approx. -1, c[3] approx. 1 + array([ 0.01909725, -1.30598256, -0.00577963, 1.02644286]) # may vary + >>> stats # note the large SSR, explaining the rather poor results + [array([ 38.06116253]), 4, array([ 1.38446749, 1.32119158, 0.50443316, # may vary + 0.28853036]), 1.1324274851176597e-014] + + Same thing without the added noise + + >>> y = x**3 - x + >>> c, stats = P.polyfit(x,y,3,full=True) + >>> c # c[0], c[2] should be "very close to 0", c[1] ~= -1, c[3] ~= 1 + array([-6.36925336e-18, -1.00000000e+00, -4.08053781e-16, 1.00000000e+00]) + >>> stats # note the minuscule SSR + [array([ 7.46346754e-31]), 4, array([ 1.38446749, 1.32119158, # may vary + 0.50443316, 0.28853036]), 1.1324274851176597e-014] + + """ + return pu._fit(polyvander, x, y, deg, rcond, full, w) + + +def polycompanion(c): + """ + Return the companion matrix of c. + + The companion matrix for power series cannot be made symmetric by + scaling the basis, so this function differs from those for the + orthogonal polynomials. + + Parameters + ---------- + c : array_like + 1-D array of polynomial coefficients ordered from low to high + degree. + + Returns + ------- + mat : ndarray + Companion matrix of dimensions (deg, deg). + + Notes + ----- + + .. versionadded:: 1.7.0 + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + if len(c) < 2: + raise ValueError('Series must have maximum degree of at least 1.') + if len(c) == 2: + return np.array([[-c[0]/c[1]]]) + + n = len(c) - 1 + mat = np.zeros((n, n), dtype=c.dtype) + bot = mat.reshape(-1)[n::n+1] + bot[...] = 1 + mat[:, -1] -= c[:-1]/c[-1] + return mat + + +def polyroots(c): + """ + Compute the roots of a polynomial. + + Return the roots (a.k.a. "zeros") of the polynomial + + .. math:: p(x) = \\sum_i c[i] * x^i. + + Parameters + ---------- + c : 1-D array_like + 1-D array of polynomial coefficients. + + Returns + ------- + out : ndarray + Array of the roots of the polynomial. If all the roots are real, + then `out` is also real, otherwise it is complex. + + See Also + -------- + numpy.polynomial.chebyshev.chebroots + numpy.polynomial.legendre.legroots + numpy.polynomial.laguerre.lagroots + numpy.polynomial.hermite.hermroots + numpy.polynomial.hermite_e.hermeroots + + Notes + ----- + The root estimates are obtained as the eigenvalues of the companion + matrix, Roots far from the origin of the complex plane may have large + errors due to the numerical instability of the power series for such + values. Roots with multiplicity greater than 1 will also show larger + errors as the value of the series near such points is relatively + insensitive to errors in the roots. Isolated roots near the origin can + be improved by a few iterations of Newton's method. + + Examples + -------- + >>> import numpy.polynomial.polynomial as poly + >>> poly.polyroots(poly.polyfromroots((-1,0,1))) + array([-1., 0., 1.]) + >>> poly.polyroots(poly.polyfromroots((-1,0,1))).dtype + dtype('float64') + >>> j = complex(0,1) + >>> poly.polyroots(poly.polyfromroots((-j,0,j))) + array([ 0.00000000e+00+0.j, 0.00000000e+00+1.j, 2.77555756e-17-1.j]) # may vary + + """ + # c is a trimmed copy + [c] = pu.as_series([c]) + if len(c) < 2: + return np.array([], dtype=c.dtype) + if len(c) == 2: + return np.array([-c[0]/c[1]]) + + # rotated companion matrix reduces error + m = polycompanion(c)[::-1,::-1] + r = la.eigvals(m) + r.sort() + return r + + +# +# polynomial class +# + +class Polynomial(ABCPolyBase): + """A power series class. + + The Polynomial class provides the standard Python numerical methods + '+', '-', '*', '//', '%', 'divmod', '**', and '()' as well as the + attributes and methods listed in the `ABCPolyBase` documentation. + + Parameters + ---------- + coef : array_like + Polynomial coefficients in order of increasing degree, i.e., + ``(1, 2, 3)`` give ``1 + 2*x + 3*x**2``. + domain : (2,) array_like, optional + Domain to use. The interval ``[domain[0], domain[1]]`` is mapped + to the interval ``[window[0], window[1]]`` by shifting and scaling. + The default value is [-1, 1]. + window : (2,) array_like, optional + Window, see `domain` for its use. The default value is [-1, 1]. + + .. versionadded:: 1.6.0 + + """ + # Virtual Functions + _add = staticmethod(polyadd) + _sub = staticmethod(polysub) + _mul = staticmethod(polymul) + _div = staticmethod(polydiv) + _pow = staticmethod(polypow) + _val = staticmethod(polyval) + _int = staticmethod(polyint) + _der = staticmethod(polyder) + _fit = staticmethod(polyfit) + _line = staticmethod(polyline) + _roots = staticmethod(polyroots) + _fromroots = staticmethod(polyfromroots) + + # Virtual properties + domain = np.array(polydomain) + window = np.array(polydomain) + basis_name = None + + @classmethod + def _str_term_unicode(cls, i, arg_str): + return f"·{arg_str}{i.translate(cls._superscript_mapping)}" + + @staticmethod + def _str_term_ascii(i, arg_str): + return f" {arg_str}**{i}" + + @staticmethod + def _repr_latex_term(i, arg_str, needs_parens): + if needs_parens: + arg_str = rf"\left({arg_str}\right)" + if i == 0: + return '1' + elif i == 1: + return arg_str + else: + return f"{arg_str}^{{{i}}}" diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/polynomial.pyi b/.local/lib/python3.8/site-packages/numpy/polynomial/polynomial.pyi new file mode 100644 index 0000000..f779300 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/polynomial.pyi @@ -0,0 +1,41 @@ +from typing import Any, List + +from numpy import ndarray, dtype, int_ +from numpy.polynomial._polybase import ABCPolyBase +from numpy.polynomial.polyutils import trimcoef + +__all__: list[str] + +polytrim = trimcoef + +polydomain: ndarray[Any, dtype[int_]] +polyzero: ndarray[Any, dtype[int_]] +polyone: ndarray[Any, dtype[int_]] +polyx: ndarray[Any, dtype[int_]] + +def polyline(off, scl): ... +def polyfromroots(roots): ... +def polyadd(c1, c2): ... +def polysub(c1, c2): ... +def polymulx(c): ... +def polymul(c1, c2): ... +def polydiv(c1, c2): ... +def polypow(c, pow, maxpower=...): ... +def polyder(c, m=..., scl=..., axis=...): ... +def polyint(c, m=..., k=..., lbnd=..., scl=..., axis=...): ... +def polyval(x, c, tensor=...): ... +def polyvalfromroots(x, r, tensor=...): ... +def polyval2d(x, y, c): ... +def polygrid2d(x, y, c): ... +def polyval3d(x, y, z, c): ... +def polygrid3d(x, y, z, c): ... +def polyvander(x, deg): ... +def polyvander2d(x, y, deg): ... +def polyvander3d(x, y, z, deg): ... +def polyfit(x, y, deg, rcond=..., full=..., w=...): ... +def polyroots(c): ... + +class Polynomial(ABCPolyBase): + domain: Any + window: Any + basis_name: Any diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/polyutils.py b/.local/lib/python3.8/site-packages/numpy/polynomial/polyutils.py new file mode 100644 index 0000000..a2bc75a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/polyutils.py @@ -0,0 +1,750 @@ +""" +Utility classes and functions for the polynomial modules. + +This module provides: error and warning objects; a polynomial base class; +and some routines used in both the `polynomial` and `chebyshev` modules. + +Warning objects +--------------- + +.. autosummary:: + :toctree: generated/ + + RankWarning raised in least-squares fit for rank-deficient matrix. + +Functions +--------- + +.. autosummary:: + :toctree: generated/ + + as_series convert list of array_likes into 1-D arrays of common type. + trimseq remove trailing zeros. + trimcoef remove small trailing coefficients. + getdomain return the domain appropriate for a given set of abscissae. + mapdomain maps points between domains. + mapparms parameters of the linear map between domains. + +""" +import operator +import functools +import warnings + +import numpy as np + +__all__ = [ + 'RankWarning', 'as_series', 'trimseq', + 'trimcoef', 'getdomain', 'mapdomain', 'mapparms'] + +# +# Warnings and Exceptions +# + +class RankWarning(UserWarning): + """Issued by chebfit when the design matrix is rank deficient.""" + pass + +# +# Helper functions to convert inputs to 1-D arrays +# +def trimseq(seq): + """Remove small Poly series coefficients. + + Parameters + ---------- + seq : sequence + Sequence of Poly series coefficients. This routine fails for + empty sequences. + + Returns + ------- + series : sequence + Subsequence with trailing zeros removed. If the resulting sequence + would be empty, return the first element. The returned sequence may + or may not be a view. + + Notes + ----- + Do not lose the type info if the sequence contains unknown objects. + + """ + if len(seq) == 0: + return seq + else: + for i in range(len(seq) - 1, -1, -1): + if seq[i] != 0: + break + return seq[:i+1] + + +def as_series(alist, trim=True): + """ + Return argument as a list of 1-d arrays. + + The returned list contains array(s) of dtype double, complex double, or + object. A 1-d argument of shape ``(N,)`` is parsed into ``N`` arrays of + size one; a 2-d argument of shape ``(M,N)`` is parsed into ``M`` arrays + of size ``N`` (i.e., is "parsed by row"); and a higher dimensional array + raises a Value Error if it is not first reshaped into either a 1-d or 2-d + array. + + Parameters + ---------- + alist : array_like + A 1- or 2-d array_like + trim : boolean, optional + When True, trailing zeros are removed from the inputs. + When False, the inputs are passed through intact. + + Returns + ------- + [a1, a2,...] : list of 1-D arrays + A copy of the input data as a list of 1-d arrays. + + Raises + ------ + ValueError + Raised when `as_series` cannot convert its input to 1-d arrays, or at + least one of the resulting arrays is empty. + + Examples + -------- + >>> from numpy.polynomial import polyutils as pu + >>> a = np.arange(4) + >>> pu.as_series(a) + [array([0.]), array([1.]), array([2.]), array([3.])] + >>> b = np.arange(6).reshape((2,3)) + >>> pu.as_series(b) + [array([0., 1., 2.]), array([3., 4., 5.])] + + >>> pu.as_series((1, np.arange(3), np.arange(2, dtype=np.float16))) + [array([1.]), array([0., 1., 2.]), array([0., 1.])] + + >>> pu.as_series([2, [1.1, 0.]]) + [array([2.]), array([1.1])] + + >>> pu.as_series([2, [1.1, 0.]], trim=False) + [array([2.]), array([1.1, 0. ])] + + """ + arrays = [np.array(a, ndmin=1, copy=False) for a in alist] + if min([a.size for a in arrays]) == 0: + raise ValueError("Coefficient array is empty") + if any(a.ndim != 1 for a in arrays): + raise ValueError("Coefficient array is not 1-d") + if trim: + arrays = [trimseq(a) for a in arrays] + + if any(a.dtype == np.dtype(object) for a in arrays): + ret = [] + for a in arrays: + if a.dtype != np.dtype(object): + tmp = np.empty(len(a), dtype=np.dtype(object)) + tmp[:] = a[:] + ret.append(tmp) + else: + ret.append(a.copy()) + else: + try: + dtype = np.common_type(*arrays) + except Exception as e: + raise ValueError("Coefficient arrays have no common type") from e + ret = [np.array(a, copy=True, dtype=dtype) for a in arrays] + return ret + + +def trimcoef(c, tol=0): + """ + Remove "small" "trailing" coefficients from a polynomial. + + "Small" means "small in absolute value" and is controlled by the + parameter `tol`; "trailing" means highest order coefficient(s), e.g., in + ``[0, 1, 1, 0, 0]`` (which represents ``0 + x + x**2 + 0*x**3 + 0*x**4``) + both the 3-rd and 4-th order coefficients would be "trimmed." + + Parameters + ---------- + c : array_like + 1-d array of coefficients, ordered from lowest order to highest. + tol : number, optional + Trailing (i.e., highest order) elements with absolute value less + than or equal to `tol` (default value is zero) are removed. + + Returns + ------- + trimmed : ndarray + 1-d array with trailing zeros removed. If the resulting series + would be empty, a series containing a single zero is returned. + + Raises + ------ + ValueError + If `tol` < 0 + + See Also + -------- + trimseq + + Examples + -------- + >>> from numpy.polynomial import polyutils as pu + >>> pu.trimcoef((0,0,3,0,5,0,0)) + array([0., 0., 3., 0., 5.]) + >>> pu.trimcoef((0,0,1e-3,0,1e-5,0,0),1e-3) # item == tol is trimmed + array([0.]) + >>> i = complex(0,1) # works for complex + >>> pu.trimcoef((3e-4,1e-3*(1-i),5e-4,2e-5*(1+i)), 1e-3) + array([0.0003+0.j , 0.001 -0.001j]) + + """ + if tol < 0: + raise ValueError("tol must be non-negative") + + [c] = as_series([c]) + [ind] = np.nonzero(np.abs(c) > tol) + if len(ind) == 0: + return c[:1]*0 + else: + return c[:ind[-1] + 1].copy() + +def getdomain(x): + """ + Return a domain suitable for given abscissae. + + Find a domain suitable for a polynomial or Chebyshev series + defined at the values supplied. + + Parameters + ---------- + x : array_like + 1-d array of abscissae whose domain will be determined. + + Returns + ------- + domain : ndarray + 1-d array containing two values. If the inputs are complex, then + the two returned points are the lower left and upper right corners + of the smallest rectangle (aligned with the axes) in the complex + plane containing the points `x`. If the inputs are real, then the + two points are the ends of the smallest interval containing the + points `x`. + + See Also + -------- + mapparms, mapdomain + + Examples + -------- + >>> from numpy.polynomial import polyutils as pu + >>> points = np.arange(4)**2 - 5; points + array([-5, -4, -1, 4]) + >>> pu.getdomain(points) + array([-5., 4.]) + >>> c = np.exp(complex(0,1)*np.pi*np.arange(12)/6) # unit circle + >>> pu.getdomain(c) + array([-1.-1.j, 1.+1.j]) + + """ + [x] = as_series([x], trim=False) + if x.dtype.char in np.typecodes['Complex']: + rmin, rmax = x.real.min(), x.real.max() + imin, imax = x.imag.min(), x.imag.max() + return np.array((complex(rmin, imin), complex(rmax, imax))) + else: + return np.array((x.min(), x.max())) + +def mapparms(old, new): + """ + Linear map parameters between domains. + + Return the parameters of the linear map ``offset + scale*x`` that maps + `old` to `new` such that ``old[i] -> new[i]``, ``i = 0, 1``. + + Parameters + ---------- + old, new : array_like + Domains. Each domain must (successfully) convert to a 1-d array + containing precisely two values. + + Returns + ------- + offset, scale : scalars + The map ``L(x) = offset + scale*x`` maps the first domain to the + second. + + See Also + -------- + getdomain, mapdomain + + Notes + ----- + Also works for complex numbers, and thus can be used to calculate the + parameters required to map any line in the complex plane to any other + line therein. + + Examples + -------- + >>> from numpy.polynomial import polyutils as pu + >>> pu.mapparms((-1,1),(-1,1)) + (0.0, 1.0) + >>> pu.mapparms((1,-1),(-1,1)) + (-0.0, -1.0) + >>> i = complex(0,1) + >>> pu.mapparms((-i,-1),(1,i)) + ((1+1j), (1-0j)) + + """ + oldlen = old[1] - old[0] + newlen = new[1] - new[0] + off = (old[1]*new[0] - old[0]*new[1])/oldlen + scl = newlen/oldlen + return off, scl + +def mapdomain(x, old, new): + """ + Apply linear map to input points. + + The linear map ``offset + scale*x`` that maps the domain `old` to + the domain `new` is applied to the points `x`. + + Parameters + ---------- + x : array_like + Points to be mapped. If `x` is a subtype of ndarray the subtype + will be preserved. + old, new : array_like + The two domains that determine the map. Each must (successfully) + convert to 1-d arrays containing precisely two values. + + Returns + ------- + x_out : ndarray + Array of points of the same shape as `x`, after application of the + linear map between the two domains. + + See Also + -------- + getdomain, mapparms + + Notes + ----- + Effectively, this implements: + + .. math:: + x\\_out = new[0] + m(x - old[0]) + + where + + .. math:: + m = \\frac{new[1]-new[0]}{old[1]-old[0]} + + Examples + -------- + >>> from numpy.polynomial import polyutils as pu + >>> old_domain = (-1,1) + >>> new_domain = (0,2*np.pi) + >>> x = np.linspace(-1,1,6); x + array([-1. , -0.6, -0.2, 0.2, 0.6, 1. ]) + >>> x_out = pu.mapdomain(x, old_domain, new_domain); x_out + array([ 0. , 1.25663706, 2.51327412, 3.76991118, 5.02654825, # may vary + 6.28318531]) + >>> x - pu.mapdomain(x_out, new_domain, old_domain) + array([0., 0., 0., 0., 0., 0.]) + + Also works for complex numbers (and thus can be used to map any line in + the complex plane to any other line therein). + + >>> i = complex(0,1) + >>> old = (-1 - i, 1 + i) + >>> new = (-1 + i, 1 - i) + >>> z = np.linspace(old[0], old[1], 6); z + array([-1. -1.j , -0.6-0.6j, -0.2-0.2j, 0.2+0.2j, 0.6+0.6j, 1. +1.j ]) + >>> new_z = pu.mapdomain(z, old, new); new_z + array([-1.0+1.j , -0.6+0.6j, -0.2+0.2j, 0.2-0.2j, 0.6-0.6j, 1.0-1.j ]) # may vary + + """ + x = np.asanyarray(x) + off, scl = mapparms(old, new) + return off + scl*x + + +def _nth_slice(i, ndim): + sl = [np.newaxis] * ndim + sl[i] = slice(None) + return tuple(sl) + + +def _vander_nd(vander_fs, points, degrees): + r""" + A generalization of the Vandermonde matrix for N dimensions + + The result is built by combining the results of 1d Vandermonde matrices, + + .. math:: + W[i_0, \ldots, i_M, j_0, \ldots, j_N] = \prod_{k=0}^N{V_k(x_k)[i_0, \ldots, i_M, j_k]} + + where + + .. math:: + N &= \texttt{len(points)} = \texttt{len(degrees)} = \texttt{len(vander\_fs)} \\ + M &= \texttt{points[k].ndim} \\ + V_k &= \texttt{vander\_fs[k]} \\ + x_k &= \texttt{points[k]} \\ + 0 \le j_k &\le \texttt{degrees[k]} + + Expanding the one-dimensional :math:`V_k` functions gives: + + .. math:: + W[i_0, \ldots, i_M, j_0, \ldots, j_N] = \prod_{k=0}^N{B_{k, j_k}(x_k[i_0, \ldots, i_M])} + + where :math:`B_{k,m}` is the m'th basis of the polynomial construction used along + dimension :math:`k`. For a regular polynomial, :math:`B_{k, m}(x) = P_m(x) = x^m`. + + Parameters + ---------- + vander_fs : Sequence[function(array_like, int) -> ndarray] + The 1d vander function to use for each axis, such as ``polyvander`` + points : Sequence[array_like] + Arrays of point coordinates, all of the same shape. The dtypes + will be converted to either float64 or complex128 depending on + whether any of the elements are complex. Scalars are converted to + 1-D arrays. + This must be the same length as `vander_fs`. + degrees : Sequence[int] + The maximum degree (inclusive) to use for each axis. + This must be the same length as `vander_fs`. + + Returns + ------- + vander_nd : ndarray + An array of shape ``points[0].shape + tuple(d + 1 for d in degrees)``. + """ + n_dims = len(vander_fs) + if n_dims != len(points): + raise ValueError( + f"Expected {n_dims} dimensions of sample points, got {len(points)}") + if n_dims != len(degrees): + raise ValueError( + f"Expected {n_dims} dimensions of degrees, got {len(degrees)}") + if n_dims == 0: + raise ValueError("Unable to guess a dtype or shape when no points are given") + + # convert to the same shape and type + points = tuple(np.array(tuple(points), copy=False) + 0.0) + + # produce the vandermonde matrix for each dimension, placing the last + # axis of each in an independent trailing axis of the output + vander_arrays = ( + vander_fs[i](points[i], degrees[i])[(...,) + _nth_slice(i, n_dims)] + for i in range(n_dims) + ) + + # we checked this wasn't empty already, so no `initial` needed + return functools.reduce(operator.mul, vander_arrays) + + +def _vander_nd_flat(vander_fs, points, degrees): + """ + Like `_vander_nd`, but flattens the last ``len(degrees)`` axes into a single axis + + Used to implement the public ``vanderd`` functions. + """ + v = _vander_nd(vander_fs, points, degrees) + return v.reshape(v.shape[:-len(degrees)] + (-1,)) + + +def _fromroots(line_f, mul_f, roots): + """ + Helper function used to implement the ``fromroots`` functions. + + Parameters + ---------- + line_f : function(float, float) -> ndarray + The ``line`` function, such as ``polyline`` + mul_f : function(array_like, array_like) -> ndarray + The ``mul`` function, such as ``polymul`` + roots + See the ``fromroots`` functions for more detail + """ + if len(roots) == 0: + return np.ones(1) + else: + [roots] = as_series([roots], trim=False) + roots.sort() + p = [line_f(-r, 1) for r in roots] + n = len(p) + while n > 1: + m, r = divmod(n, 2) + tmp = [mul_f(p[i], p[i+m]) for i in range(m)] + if r: + tmp[0] = mul_f(tmp[0], p[-1]) + p = tmp + n = m + return p[0] + + +def _valnd(val_f, c, *args): + """ + Helper function used to implement the ``vald`` functions. + + Parameters + ---------- + val_f : function(array_like, array_like, tensor: bool) -> array_like + The ``val`` function, such as ``polyval`` + c, args + See the ``vald`` functions for more detail + """ + args = [np.asanyarray(a) for a in args] + shape0 = args[0].shape + if not all((a.shape == shape0 for a in args[1:])): + if len(args) == 3: + raise ValueError('x, y, z are incompatible') + elif len(args) == 2: + raise ValueError('x, y are incompatible') + else: + raise ValueError('ordinates are incompatible') + it = iter(args) + x0 = next(it) + + # use tensor on only the first + c = val_f(x0, c) + for xi in it: + c = val_f(xi, c, tensor=False) + return c + + +def _gridnd(val_f, c, *args): + """ + Helper function used to implement the ``gridd`` functions. + + Parameters + ---------- + val_f : function(array_like, array_like, tensor: bool) -> array_like + The ``val`` function, such as ``polyval`` + c, args + See the ``gridd`` functions for more detail + """ + for xi in args: + c = val_f(xi, c) + return c + + +def _div(mul_f, c1, c2): + """ + Helper function used to implement the ``div`` functions. + + Implementation uses repeated subtraction of c2 multiplied by the nth basis. + For some polynomial types, a more efficient approach may be possible. + + Parameters + ---------- + mul_f : function(array_like, array_like) -> array_like + The ``mul`` function, such as ``polymul`` + c1, c2 + See the ``div`` functions for more detail + """ + # c1, c2 are trimmed copies + [c1, c2] = as_series([c1, c2]) + if c2[-1] == 0: + raise ZeroDivisionError() + + lc1 = len(c1) + lc2 = len(c2) + if lc1 < lc2: + return c1[:1]*0, c1 + elif lc2 == 1: + return c1/c2[-1], c1[:1]*0 + else: + quo = np.empty(lc1 - lc2 + 1, dtype=c1.dtype) + rem = c1 + for i in range(lc1 - lc2, - 1, -1): + p = mul_f([0]*i + [1], c2) + q = rem[-1]/p[-1] + rem = rem[:-1] - q*p[:-1] + quo[i] = q + return quo, trimseq(rem) + + +def _add(c1, c2): + """ Helper function used to implement the ``add`` functions. """ + # c1, c2 are trimmed copies + [c1, c2] = as_series([c1, c2]) + if len(c1) > len(c2): + c1[:c2.size] += c2 + ret = c1 + else: + c2[:c1.size] += c1 + ret = c2 + return trimseq(ret) + + +def _sub(c1, c2): + """ Helper function used to implement the ``sub`` functions. """ + # c1, c2 are trimmed copies + [c1, c2] = as_series([c1, c2]) + if len(c1) > len(c2): + c1[:c2.size] -= c2 + ret = c1 + else: + c2 = -c2 + c2[:c1.size] += c1 + ret = c2 + return trimseq(ret) + + +def _fit(vander_f, x, y, deg, rcond=None, full=False, w=None): + """ + Helper function used to implement the ``fit`` functions. + + Parameters + ---------- + vander_f : function(array_like, int) -> ndarray + The 1d vander function, such as ``polyvander`` + c1, c2 + See the ``fit`` functions for more detail + """ + x = np.asarray(x) + 0.0 + y = np.asarray(y) + 0.0 + deg = np.asarray(deg) + + # check arguments. + if deg.ndim > 1 or deg.dtype.kind not in 'iu' or deg.size == 0: + raise TypeError("deg must be an int or non-empty 1-D array of int") + if deg.min() < 0: + raise ValueError("expected deg >= 0") + if x.ndim != 1: + raise TypeError("expected 1D vector for x") + if x.size == 0: + raise TypeError("expected non-empty vector for x") + if y.ndim < 1 or y.ndim > 2: + raise TypeError("expected 1D or 2D array for y") + if len(x) != len(y): + raise TypeError("expected x and y to have same length") + + if deg.ndim == 0: + lmax = deg + order = lmax + 1 + van = vander_f(x, lmax) + else: + deg = np.sort(deg) + lmax = deg[-1] + order = len(deg) + van = vander_f(x, lmax)[:, deg] + + # set up the least squares matrices in transposed form + lhs = van.T + rhs = y.T + if w is not None: + w = np.asarray(w) + 0.0 + if w.ndim != 1: + raise TypeError("expected 1D vector for w") + if len(x) != len(w): + raise TypeError("expected x and w to have same length") + # apply weights. Don't use inplace operations as they + # can cause problems with NA. + lhs = lhs * w + rhs = rhs * w + + # set rcond + if rcond is None: + rcond = len(x)*np.finfo(x.dtype).eps + + # Determine the norms of the design matrix columns. + if issubclass(lhs.dtype.type, np.complexfloating): + scl = np.sqrt((np.square(lhs.real) + np.square(lhs.imag)).sum(1)) + else: + scl = np.sqrt(np.square(lhs).sum(1)) + scl[scl == 0] = 1 + + # Solve the least squares problem. + c, resids, rank, s = np.linalg.lstsq(lhs.T/scl, rhs.T, rcond) + c = (c.T/scl).T + + # Expand c to include non-fitted coefficients which are set to zero + if deg.ndim > 0: + if c.ndim == 2: + cc = np.zeros((lmax+1, c.shape[1]), dtype=c.dtype) + else: + cc = np.zeros(lmax+1, dtype=c.dtype) + cc[deg] = c + c = cc + + # warn on rank reduction + if rank != order and not full: + msg = "The fit may be poorly conditioned" + warnings.warn(msg, RankWarning, stacklevel=2) + + if full: + return c, [resids, rank, s, rcond] + else: + return c + + +def _pow(mul_f, c, pow, maxpower): + """ + Helper function used to implement the ``pow`` functions. + + Parameters + ---------- + mul_f : function(array_like, array_like) -> ndarray + The ``mul`` function, such as ``polymul`` + c : array_like + 1-D array of array of series coefficients + pow, maxpower + See the ``pow`` functions for more detail + """ + # c is a trimmed copy + [c] = as_series([c]) + power = int(pow) + if power != pow or power < 0: + raise ValueError("Power must be a non-negative integer.") + elif maxpower is not None and power > maxpower: + raise ValueError("Power is too large") + elif power == 0: + return np.array([1], dtype=c.dtype) + elif power == 1: + return c + else: + # This can be made more efficient by using powers of two + # in the usual way. + prd = c + for i in range(2, power + 1): + prd = mul_f(prd, c) + return prd + + +def _deprecate_as_int(x, desc): + """ + Like `operator.index`, but emits a deprecation warning when passed a float + + Parameters + ---------- + x : int-like, or float with integral value + Value to interpret as an integer + desc : str + description to include in any error message + + Raises + ------ + TypeError : if x is a non-integral float or non-numeric + DeprecationWarning : if x is an integral float + """ + try: + return operator.index(x) + except TypeError as e: + # Numpy 1.17.0, 2019-03-11 + try: + ix = int(x) + except TypeError: + pass + else: + if ix == x: + warnings.warn( + f"In future, this will raise TypeError, as {desc} will " + "need to be an integer not just an integral float.", + DeprecationWarning, + stacklevel=3 + ) + return ix + + raise TypeError(f"{desc} must be an integer") from e diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/polyutils.pyi b/.local/lib/python3.8/site-packages/numpy/polynomial/polyutils.pyi new file mode 100644 index 0000000..52c9cfc --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/polyutils.pyi @@ -0,0 +1,12 @@ +from typing import List + +__all__: List[str] + +class RankWarning(UserWarning): ... + +def trimseq(seq): ... +def as_series(alist, trim=...): ... +def trimcoef(c, tol=...): ... +def getdomain(x): ... +def mapparms(old, new): ... +def mapdomain(x, old, new): ... diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/setup.py b/.local/lib/python3.8/site-packages/numpy/polynomial/setup.py new file mode 100644 index 0000000..b58e867 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/setup.py @@ -0,0 +1,10 @@ +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('polynomial', parent_package, top_path) + config.add_subpackage('tests') + config.add_data_files('*.pyi') + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..41f7f8c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_chebyshev.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_chebyshev.cpython-38.pyc new file mode 100644 index 0000000..9d15fa8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_chebyshev.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_classes.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_classes.cpython-38.pyc new file mode 100644 index 0000000..26b6e3c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_classes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_hermite.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_hermite.cpython-38.pyc new file mode 100644 index 0000000..b6d9af1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_hermite.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_hermite_e.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_hermite_e.cpython-38.pyc new file mode 100644 index 0000000..93b155f Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_hermite_e.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_laguerre.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_laguerre.cpython-38.pyc new file mode 100644 index 0000000..23eb982 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_laguerre.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_legendre.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_legendre.cpython-38.pyc new file mode 100644 index 0000000..4b64d80 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_legendre.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_polynomial.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_polynomial.cpython-38.pyc new file mode 100644 index 0000000..3cd9e30 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_polynomial.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_polyutils.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_polyutils.cpython-38.pyc new file mode 100644 index 0000000..1daf828 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_polyutils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_printing.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_printing.cpython-38.pyc new file mode 100644 index 0000000..8dc98d1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/__pycache__/test_printing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_chebyshev.py b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_chebyshev.py new file mode 100644 index 0000000..2f54beb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_chebyshev.py @@ -0,0 +1,619 @@ +"""Tests for chebyshev module. + +""" +from functools import reduce + +import numpy as np +import numpy.polynomial.chebyshev as cheb +from numpy.polynomial.polynomial import polyval +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) + + +def trim(x): + return cheb.chebtrim(x, tol=1e-6) + +T0 = [1] +T1 = [0, 1] +T2 = [-1, 0, 2] +T3 = [0, -3, 0, 4] +T4 = [1, 0, -8, 0, 8] +T5 = [0, 5, 0, -20, 0, 16] +T6 = [-1, 0, 18, 0, -48, 0, 32] +T7 = [0, -7, 0, 56, 0, -112, 0, 64] +T8 = [1, 0, -32, 0, 160, 0, -256, 0, 128] +T9 = [0, 9, 0, -120, 0, 432, 0, -576, 0, 256] + +Tlist = [T0, T1, T2, T3, T4, T5, T6, T7, T8, T9] + + +class TestPrivate: + + def test__cseries_to_zseries(self): + for i in range(5): + inp = np.array([2] + [1]*i, np.double) + tgt = np.array([.5]*i + [2] + [.5]*i, np.double) + res = cheb._cseries_to_zseries(inp) + assert_equal(res, tgt) + + def test__zseries_to_cseries(self): + for i in range(5): + inp = np.array([.5]*i + [2] + [.5]*i, np.double) + tgt = np.array([2] + [1]*i, np.double) + res = cheb._zseries_to_cseries(inp) + assert_equal(res, tgt) + + +class TestConstants: + + def test_chebdomain(self): + assert_equal(cheb.chebdomain, [-1, 1]) + + def test_chebzero(self): + assert_equal(cheb.chebzero, [0]) + + def test_chebone(self): + assert_equal(cheb.chebone, [1]) + + def test_chebx(self): + assert_equal(cheb.chebx, [0, 1]) + + +class TestArithmetic: + + def test_chebadd(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] += 1 + res = cheb.chebadd([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_chebsub(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] -= 1 + res = cheb.chebsub([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_chebmulx(self): + assert_equal(cheb.chebmulx([0]), [0]) + assert_equal(cheb.chebmulx([1]), [0, 1]) + for i in range(1, 5): + ser = [0]*i + [1] + tgt = [0]*(i - 1) + [.5, 0, .5] + assert_equal(cheb.chebmulx(ser), tgt) + + def test_chebmul(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + tgt = np.zeros(i + j + 1) + tgt[i + j] += .5 + tgt[abs(i - j)] += .5 + res = cheb.chebmul([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_chebdiv(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + ci = [0]*i + [1] + cj = [0]*j + [1] + tgt = cheb.chebadd(ci, cj) + quo, rem = cheb.chebdiv(tgt, ci) + res = cheb.chebadd(cheb.chebmul(quo, ci), rem) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_chebpow(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + c = np.arange(i + 1) + tgt = reduce(cheb.chebmul, [c]*j, np.array([1])) + res = cheb.chebpow(c, j) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + +class TestEvaluation: + # coefficients of 1 + 2*x + 3*x**2 + c1d = np.array([2.5, 2., 1.5]) + c2d = np.einsum('i,j->ij', c1d, c1d) + c3d = np.einsum('i,j,k->ijk', c1d, c1d, c1d) + + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + y = polyval(x, [1., 2., 3.]) + + def test_chebval(self): + #check empty input + assert_equal(cheb.chebval([], [1]).size, 0) + + #check normal input) + x = np.linspace(-1, 1) + y = [polyval(x, c) for c in Tlist] + for i in range(10): + msg = f"At i={i}" + tgt = y[i] + res = cheb.chebval(x, [0]*i + [1]) + assert_almost_equal(res, tgt, err_msg=msg) + + #check that shape is preserved + for i in range(3): + dims = [2]*i + x = np.zeros(dims) + assert_equal(cheb.chebval(x, [1]).shape, dims) + assert_equal(cheb.chebval(x, [1, 0]).shape, dims) + assert_equal(cheb.chebval(x, [1, 0, 0]).shape, dims) + + def test_chebval2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, cheb.chebval2d, x1, x2[:2], self.c2d) + + #test values + tgt = y1*y2 + res = cheb.chebval2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = cheb.chebval2d(z, z, self.c2d) + assert_(res.shape == (2, 3)) + + def test_chebval3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, cheb.chebval3d, x1, x2, x3[:2], self.c3d) + + #test values + tgt = y1*y2*y3 + res = cheb.chebval3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = cheb.chebval3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)) + + def test_chebgrid2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j->ij', y1, y2) + res = cheb.chebgrid2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = cheb.chebgrid2d(z, z, self.c2d) + assert_(res.shape == (2, 3)*2) + + def test_chebgrid3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j,k->ijk', y1, y2, y3) + res = cheb.chebgrid3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = cheb.chebgrid3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)*3) + + +class TestIntegral: + + def test_chebint(self): + # check exceptions + assert_raises(TypeError, cheb.chebint, [0], .5) + assert_raises(ValueError, cheb.chebint, [0], -1) + assert_raises(ValueError, cheb.chebint, [0], 1, [0, 0]) + assert_raises(ValueError, cheb.chebint, [0], lbnd=[0]) + assert_raises(ValueError, cheb.chebint, [0], scl=[0]) + assert_raises(TypeError, cheb.chebint, [0], axis=.5) + + # test integration of zero polynomial + for i in range(2, 5): + k = [0]*(i - 2) + [1] + res = cheb.chebint([0], m=i, k=k) + assert_almost_equal(res, [0, 1]) + + # check single integration with integration constant + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [1/scl] + chebpol = cheb.poly2cheb(pol) + chebint = cheb.chebint(chebpol, m=1, k=[i]) + res = cheb.cheb2poly(chebint) + assert_almost_equal(trim(res), trim(tgt)) + + # check single integration with integration constant and lbnd + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + chebpol = cheb.poly2cheb(pol) + chebint = cheb.chebint(chebpol, m=1, k=[i], lbnd=-1) + assert_almost_equal(cheb.chebval(-1, chebint), i) + + # check single integration with integration constant and scaling + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [2/scl] + chebpol = cheb.poly2cheb(pol) + chebint = cheb.chebint(chebpol, m=1, k=[i], scl=2) + res = cheb.cheb2poly(chebint) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with default k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = cheb.chebint(tgt, m=1) + res = cheb.chebint(pol, m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with defined k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = cheb.chebint(tgt, m=1, k=[k]) + res = cheb.chebint(pol, m=j, k=list(range(j))) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with lbnd + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = cheb.chebint(tgt, m=1, k=[k], lbnd=-1) + res = cheb.chebint(pol, m=j, k=list(range(j)), lbnd=-1) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with scaling + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = cheb.chebint(tgt, m=1, k=[k], scl=2) + res = cheb.chebint(pol, m=j, k=list(range(j)), scl=2) + assert_almost_equal(trim(res), trim(tgt)) + + def test_chebint_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([cheb.chebint(c) for c in c2d.T]).T + res = cheb.chebint(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([cheb.chebint(c) for c in c2d]) + res = cheb.chebint(c2d, axis=1) + assert_almost_equal(res, tgt) + + tgt = np.vstack([cheb.chebint(c, k=3) for c in c2d]) + res = cheb.chebint(c2d, k=3, axis=1) + assert_almost_equal(res, tgt) + + +class TestDerivative: + + def test_chebder(self): + # check exceptions + assert_raises(TypeError, cheb.chebder, [0], .5) + assert_raises(ValueError, cheb.chebder, [0], -1) + + # check that zeroth derivative does nothing + for i in range(5): + tgt = [0]*i + [1] + res = cheb.chebder(tgt, m=0) + assert_equal(trim(res), trim(tgt)) + + # check that derivation is the inverse of integration + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = cheb.chebder(cheb.chebint(tgt, m=j), m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check derivation with scaling + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = cheb.chebder(cheb.chebint(tgt, m=j, scl=2), m=j, scl=.5) + assert_almost_equal(trim(res), trim(tgt)) + + def test_chebder_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([cheb.chebder(c) for c in c2d.T]).T + res = cheb.chebder(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([cheb.chebder(c) for c in c2d]) + res = cheb.chebder(c2d, axis=1) + assert_almost_equal(res, tgt) + + +class TestVander: + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + + def test_chebvander(self): + # check for 1d x + x = np.arange(3) + v = cheb.chebvander(x, 3) + assert_(v.shape == (3, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], cheb.chebval(x, coef)) + + # check for 2d x + x = np.array([[1, 2], [3, 4], [5, 6]]) + v = cheb.chebvander(x, 3) + assert_(v.shape == (3, 2, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], cheb.chebval(x, coef)) + + def test_chebvander2d(self): + # also tests chebval2d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3)) + van = cheb.chebvander2d(x1, x2, [1, 2]) + tgt = cheb.chebval2d(x1, x2, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = cheb.chebvander2d([x1], [x2], [1, 2]) + assert_(van.shape == (1, 5, 6)) + + def test_chebvander3d(self): + # also tests chebval3d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3, 4)) + van = cheb.chebvander3d(x1, x2, x3, [1, 2, 3]) + tgt = cheb.chebval3d(x1, x2, x3, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = cheb.chebvander3d([x1], [x2], [x3], [1, 2, 3]) + assert_(van.shape == (1, 5, 24)) + + +class TestFitting: + + def test_chebfit(self): + def f(x): + return x*(x - 1)*(x - 2) + + def f2(x): + return x**4 + x**2 + 1 + + # Test exceptions + assert_raises(ValueError, cheb.chebfit, [1], [1], -1) + assert_raises(TypeError, cheb.chebfit, [[1]], [1], 0) + assert_raises(TypeError, cheb.chebfit, [], [1], 0) + assert_raises(TypeError, cheb.chebfit, [1], [[[1]]], 0) + assert_raises(TypeError, cheb.chebfit, [1, 2], [1], 0) + assert_raises(TypeError, cheb.chebfit, [1], [1, 2], 0) + assert_raises(TypeError, cheb.chebfit, [1], [1], 0, w=[[1]]) + assert_raises(TypeError, cheb.chebfit, [1], [1], 0, w=[1, 1]) + assert_raises(ValueError, cheb.chebfit, [1], [1], [-1,]) + assert_raises(ValueError, cheb.chebfit, [1], [1], [2, -1, 6]) + assert_raises(TypeError, cheb.chebfit, [1], [1], []) + + # Test fit + x = np.linspace(0, 2) + y = f(x) + # + coef3 = cheb.chebfit(x, y, 3) + assert_equal(len(coef3), 4) + assert_almost_equal(cheb.chebval(x, coef3), y) + coef3 = cheb.chebfit(x, y, [0, 1, 2, 3]) + assert_equal(len(coef3), 4) + assert_almost_equal(cheb.chebval(x, coef3), y) + # + coef4 = cheb.chebfit(x, y, 4) + assert_equal(len(coef4), 5) + assert_almost_equal(cheb.chebval(x, coef4), y) + coef4 = cheb.chebfit(x, y, [0, 1, 2, 3, 4]) + assert_equal(len(coef4), 5) + assert_almost_equal(cheb.chebval(x, coef4), y) + # check things still work if deg is not in strict increasing + coef4 = cheb.chebfit(x, y, [2, 3, 4, 1, 0]) + assert_equal(len(coef4), 5) + assert_almost_equal(cheb.chebval(x, coef4), y) + # + coef2d = cheb.chebfit(x, np.array([y, y]).T, 3) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + coef2d = cheb.chebfit(x, np.array([y, y]).T, [0, 1, 2, 3]) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + # test weighting + w = np.zeros_like(x) + yw = y.copy() + w[1::2] = 1 + y[0::2] = 0 + wcoef3 = cheb.chebfit(x, yw, 3, w=w) + assert_almost_equal(wcoef3, coef3) + wcoef3 = cheb.chebfit(x, yw, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef3, coef3) + # + wcoef2d = cheb.chebfit(x, np.array([yw, yw]).T, 3, w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + wcoef2d = cheb.chebfit(x, np.array([yw, yw]).T, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + # test scaling with complex values x points whose square + # is zero when summed. + x = [1, 1j, -1, -1j] + assert_almost_equal(cheb.chebfit(x, x, 1), [0, 1]) + assert_almost_equal(cheb.chebfit(x, x, [0, 1]), [0, 1]) + # test fitting only even polynomials + x = np.linspace(-1, 1) + y = f2(x) + coef1 = cheb.chebfit(x, y, 4) + assert_almost_equal(cheb.chebval(x, coef1), y) + coef2 = cheb.chebfit(x, y, [0, 2, 4]) + assert_almost_equal(cheb.chebval(x, coef2), y) + assert_almost_equal(coef1, coef2) + + +class TestInterpolate: + + def f(self, x): + return x * (x - 1) * (x - 2) + + def test_raises(self): + assert_raises(ValueError, cheb.chebinterpolate, self.f, -1) + assert_raises(TypeError, cheb.chebinterpolate, self.f, 10.) + + def test_dimensions(self): + for deg in range(1, 5): + assert_(cheb.chebinterpolate(self.f, deg).shape == (deg + 1,)) + + def test_approximation(self): + + def powx(x, p): + return x**p + + x = np.linspace(-1, 1, 10) + for deg in range(0, 10): + for p in range(0, deg + 1): + c = cheb.chebinterpolate(powx, deg, (p,)) + assert_almost_equal(cheb.chebval(x, c), powx(x, p), decimal=12) + + +class TestCompanion: + + def test_raises(self): + assert_raises(ValueError, cheb.chebcompanion, []) + assert_raises(ValueError, cheb.chebcompanion, [1]) + + def test_dimensions(self): + for i in range(1, 5): + coef = [0]*i + [1] + assert_(cheb.chebcompanion(coef).shape == (i, i)) + + def test_linear_root(self): + assert_(cheb.chebcompanion([1, 2])[0, 0] == -.5) + + +class TestGauss: + + def test_100(self): + x, w = cheb.chebgauss(100) + + # test orthogonality. Note that the results need to be normalized, + # otherwise the huge values that can arise from fast growing + # functions like Laguerre can be very confusing. + v = cheb.chebvander(x, 99) + vv = np.dot(v.T * w, v) + vd = 1/np.sqrt(vv.diagonal()) + vv = vd[:, None] * vv * vd + assert_almost_equal(vv, np.eye(100)) + + # check that the integral of 1 is correct + tgt = np.pi + assert_almost_equal(w.sum(), tgt) + + +class TestMisc: + + def test_chebfromroots(self): + res = cheb.chebfromroots([]) + assert_almost_equal(trim(res), [1]) + for i in range(1, 5): + roots = np.cos(np.linspace(-np.pi, 0, 2*i + 1)[1::2]) + tgt = [0]*i + [1] + res = cheb.chebfromroots(roots)*2**(i-1) + assert_almost_equal(trim(res), trim(tgt)) + + def test_chebroots(self): + assert_almost_equal(cheb.chebroots([1]), []) + assert_almost_equal(cheb.chebroots([1, 2]), [-.5]) + for i in range(2, 5): + tgt = np.linspace(-1, 1, i) + res = cheb.chebroots(cheb.chebfromroots(tgt)) + assert_almost_equal(trim(res), trim(tgt)) + + def test_chebtrim(self): + coef = [2, -1, 1, 0] + + # Test exceptions + assert_raises(ValueError, cheb.chebtrim, coef, -1) + + # Test results + assert_equal(cheb.chebtrim(coef), coef[:-1]) + assert_equal(cheb.chebtrim(coef, 1), coef[:-3]) + assert_equal(cheb.chebtrim(coef, 2), [0]) + + def test_chebline(self): + assert_equal(cheb.chebline(3, 4), [3, 4]) + + def test_cheb2poly(self): + for i in range(10): + assert_almost_equal(cheb.cheb2poly([0]*i + [1]), Tlist[i]) + + def test_poly2cheb(self): + for i in range(10): + assert_almost_equal(cheb.poly2cheb(Tlist[i]), [0]*i + [1]) + + def test_weight(self): + x = np.linspace(-1, 1, 11)[1:-1] + tgt = 1./(np.sqrt(1 + x) * np.sqrt(1 - x)) + res = cheb.chebweight(x) + assert_almost_equal(res, tgt) + + def test_chebpts1(self): + #test exceptions + assert_raises(ValueError, cheb.chebpts1, 1.5) + assert_raises(ValueError, cheb.chebpts1, 0) + + #test points + tgt = [0] + assert_almost_equal(cheb.chebpts1(1), tgt) + tgt = [-0.70710678118654746, 0.70710678118654746] + assert_almost_equal(cheb.chebpts1(2), tgt) + tgt = [-0.86602540378443871, 0, 0.86602540378443871] + assert_almost_equal(cheb.chebpts1(3), tgt) + tgt = [-0.9238795325, -0.3826834323, 0.3826834323, 0.9238795325] + assert_almost_equal(cheb.chebpts1(4), tgt) + + def test_chebpts2(self): + #test exceptions + assert_raises(ValueError, cheb.chebpts2, 1.5) + assert_raises(ValueError, cheb.chebpts2, 1) + + #test points + tgt = [-1, 1] + assert_almost_equal(cheb.chebpts2(2), tgt) + tgt = [-1, 0, 1] + assert_almost_equal(cheb.chebpts2(3), tgt) + tgt = [-1, -0.5, .5, 1] + assert_almost_equal(cheb.chebpts2(4), tgt) + tgt = [-1.0, -0.707106781187, 0, 0.707106781187, 1.0] + assert_almost_equal(cheb.chebpts2(5), tgt) diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_classes.py b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_classes.py new file mode 100644 index 0000000..6322062 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_classes.py @@ -0,0 +1,600 @@ +"""Test inter-conversion of different polynomial classes. + +This tests the convert and cast methods of all the polynomial classes. + +""" +import operator as op +from numbers import Number + +import pytest +import numpy as np +from numpy.polynomial import ( + Polynomial, Legendre, Chebyshev, Laguerre, Hermite, HermiteE) +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) +from numpy.polynomial.polyutils import RankWarning + +# +# fixtures +# + +classes = ( + Polynomial, Legendre, Chebyshev, Laguerre, + Hermite, HermiteE + ) +classids = tuple(cls.__name__ for cls in classes) + +@pytest.fixture(params=classes, ids=classids) +def Poly(request): + return request.param + +# +# helper functions +# +random = np.random.random + + +def assert_poly_almost_equal(p1, p2, msg=""): + try: + assert_(np.all(p1.domain == p2.domain)) + assert_(np.all(p1.window == p2.window)) + assert_almost_equal(p1.coef, p2.coef) + except AssertionError: + msg = f"Result: {p1}\nTarget: {p2}" + raise AssertionError(msg) + + +# +# Test conversion methods that depend on combinations of two classes. +# + +Poly1 = Poly +Poly2 = Poly + + +def test_conversion(Poly1, Poly2): + x = np.linspace(0, 1, 10) + coef = random((3,)) + + d1 = Poly1.domain + random((2,))*.25 + w1 = Poly1.window + random((2,))*.25 + p1 = Poly1(coef, domain=d1, window=w1) + + d2 = Poly2.domain + random((2,))*.25 + w2 = Poly2.window + random((2,))*.25 + p2 = p1.convert(kind=Poly2, domain=d2, window=w2) + + assert_almost_equal(p2.domain, d2) + assert_almost_equal(p2.window, w2) + assert_almost_equal(p2(x), p1(x)) + + +def test_cast(Poly1, Poly2): + x = np.linspace(0, 1, 10) + coef = random((3,)) + + d1 = Poly1.domain + random((2,))*.25 + w1 = Poly1.window + random((2,))*.25 + p1 = Poly1(coef, domain=d1, window=w1) + + d2 = Poly2.domain + random((2,))*.25 + w2 = Poly2.window + random((2,))*.25 + p2 = Poly2.cast(p1, domain=d2, window=w2) + + assert_almost_equal(p2.domain, d2) + assert_almost_equal(p2.window, w2) + assert_almost_equal(p2(x), p1(x)) + + +# +# test methods that depend on one class +# + + +def test_identity(Poly): + d = Poly.domain + random((2,))*.25 + w = Poly.window + random((2,))*.25 + x = np.linspace(d[0], d[1], 11) + p = Poly.identity(domain=d, window=w) + assert_equal(p.domain, d) + assert_equal(p.window, w) + assert_almost_equal(p(x), x) + + +def test_basis(Poly): + d = Poly.domain + random((2,))*.25 + w = Poly.window + random((2,))*.25 + p = Poly.basis(5, domain=d, window=w) + assert_equal(p.domain, d) + assert_equal(p.window, w) + assert_equal(p.coef, [0]*5 + [1]) + + +def test_fromroots(Poly): + # check that requested roots are zeros of a polynomial + # of correct degree, domain, and window. + d = Poly.domain + random((2,))*.25 + w = Poly.window + random((2,))*.25 + r = random((5,)) + p1 = Poly.fromroots(r, domain=d, window=w) + assert_equal(p1.degree(), len(r)) + assert_equal(p1.domain, d) + assert_equal(p1.window, w) + assert_almost_equal(p1(r), 0) + + # check that polynomial is monic + pdom = Polynomial.domain + pwin = Polynomial.window + p2 = Polynomial.cast(p1, domain=pdom, window=pwin) + assert_almost_equal(p2.coef[-1], 1) + + +def test_bad_conditioned_fit(Poly): + + x = [0., 0., 1.] + y = [1., 2., 3.] + + # check RankWarning is raised + with pytest.warns(RankWarning) as record: + Poly.fit(x, y, 2) + assert record[0].message.args[0] == "The fit may be poorly conditioned" + + +def test_fit(Poly): + + def f(x): + return x*(x - 1)*(x - 2) + x = np.linspace(0, 3) + y = f(x) + + # check default value of domain and window + p = Poly.fit(x, y, 3) + assert_almost_equal(p.domain, [0, 3]) + assert_almost_equal(p(x), y) + assert_equal(p.degree(), 3) + + # check with given domains and window + d = Poly.domain + random((2,))*.25 + w = Poly.window + random((2,))*.25 + p = Poly.fit(x, y, 3, domain=d, window=w) + assert_almost_equal(p(x), y) + assert_almost_equal(p.domain, d) + assert_almost_equal(p.window, w) + p = Poly.fit(x, y, [0, 1, 2, 3], domain=d, window=w) + assert_almost_equal(p(x), y) + assert_almost_equal(p.domain, d) + assert_almost_equal(p.window, w) + + # check with class domain default + p = Poly.fit(x, y, 3, []) + assert_equal(p.domain, Poly.domain) + assert_equal(p.window, Poly.window) + p = Poly.fit(x, y, [0, 1, 2, 3], []) + assert_equal(p.domain, Poly.domain) + assert_equal(p.window, Poly.window) + + # check that fit accepts weights. + w = np.zeros_like(x) + z = y + random(y.shape)*.25 + w[::2] = 1 + p1 = Poly.fit(x[::2], z[::2], 3) + p2 = Poly.fit(x, z, 3, w=w) + p3 = Poly.fit(x, z, [0, 1, 2, 3], w=w) + assert_almost_equal(p1(x), p2(x)) + assert_almost_equal(p2(x), p3(x)) + + +def test_equal(Poly): + p1 = Poly([1, 2, 3], domain=[0, 1], window=[2, 3]) + p2 = Poly([1, 1, 1], domain=[0, 1], window=[2, 3]) + p3 = Poly([1, 2, 3], domain=[1, 2], window=[2, 3]) + p4 = Poly([1, 2, 3], domain=[0, 1], window=[1, 2]) + assert_(p1 == p1) + assert_(not p1 == p2) + assert_(not p1 == p3) + assert_(not p1 == p4) + + +def test_not_equal(Poly): + p1 = Poly([1, 2, 3], domain=[0, 1], window=[2, 3]) + p2 = Poly([1, 1, 1], domain=[0, 1], window=[2, 3]) + p3 = Poly([1, 2, 3], domain=[1, 2], window=[2, 3]) + p4 = Poly([1, 2, 3], domain=[0, 1], window=[1, 2]) + assert_(not p1 != p1) + assert_(p1 != p2) + assert_(p1 != p3) + assert_(p1 != p4) + + +def test_add(Poly): + # This checks commutation, not numerical correctness + c1 = list(random((4,)) + .5) + c2 = list(random((3,)) + .5) + p1 = Poly(c1) + p2 = Poly(c2) + p3 = p1 + p2 + assert_poly_almost_equal(p2 + p1, p3) + assert_poly_almost_equal(p1 + c2, p3) + assert_poly_almost_equal(c2 + p1, p3) + assert_poly_almost_equal(p1 + tuple(c2), p3) + assert_poly_almost_equal(tuple(c2) + p1, p3) + assert_poly_almost_equal(p1 + np.array(c2), p3) + assert_poly_almost_equal(np.array(c2) + p1, p3) + assert_raises(TypeError, op.add, p1, Poly([0], domain=Poly.domain + 1)) + assert_raises(TypeError, op.add, p1, Poly([0], window=Poly.window + 1)) + if Poly is Polynomial: + assert_raises(TypeError, op.add, p1, Chebyshev([0])) + else: + assert_raises(TypeError, op.add, p1, Polynomial([0])) + + +def test_sub(Poly): + # This checks commutation, not numerical correctness + c1 = list(random((4,)) + .5) + c2 = list(random((3,)) + .5) + p1 = Poly(c1) + p2 = Poly(c2) + p3 = p1 - p2 + assert_poly_almost_equal(p2 - p1, -p3) + assert_poly_almost_equal(p1 - c2, p3) + assert_poly_almost_equal(c2 - p1, -p3) + assert_poly_almost_equal(p1 - tuple(c2), p3) + assert_poly_almost_equal(tuple(c2) - p1, -p3) + assert_poly_almost_equal(p1 - np.array(c2), p3) + assert_poly_almost_equal(np.array(c2) - p1, -p3) + assert_raises(TypeError, op.sub, p1, Poly([0], domain=Poly.domain + 1)) + assert_raises(TypeError, op.sub, p1, Poly([0], window=Poly.window + 1)) + if Poly is Polynomial: + assert_raises(TypeError, op.sub, p1, Chebyshev([0])) + else: + assert_raises(TypeError, op.sub, p1, Polynomial([0])) + + +def test_mul(Poly): + c1 = list(random((4,)) + .5) + c2 = list(random((3,)) + .5) + p1 = Poly(c1) + p2 = Poly(c2) + p3 = p1 * p2 + assert_poly_almost_equal(p2 * p1, p3) + assert_poly_almost_equal(p1 * c2, p3) + assert_poly_almost_equal(c2 * p1, p3) + assert_poly_almost_equal(p1 * tuple(c2), p3) + assert_poly_almost_equal(tuple(c2) * p1, p3) + assert_poly_almost_equal(p1 * np.array(c2), p3) + assert_poly_almost_equal(np.array(c2) * p1, p3) + assert_poly_almost_equal(p1 * 2, p1 * Poly([2])) + assert_poly_almost_equal(2 * p1, p1 * Poly([2])) + assert_raises(TypeError, op.mul, p1, Poly([0], domain=Poly.domain + 1)) + assert_raises(TypeError, op.mul, p1, Poly([0], window=Poly.window + 1)) + if Poly is Polynomial: + assert_raises(TypeError, op.mul, p1, Chebyshev([0])) + else: + assert_raises(TypeError, op.mul, p1, Polynomial([0])) + + +def test_floordiv(Poly): + c1 = list(random((4,)) + .5) + c2 = list(random((3,)) + .5) + c3 = list(random((2,)) + .5) + p1 = Poly(c1) + p2 = Poly(c2) + p3 = Poly(c3) + p4 = p1 * p2 + p3 + c4 = list(p4.coef) + assert_poly_almost_equal(p4 // p2, p1) + assert_poly_almost_equal(p4 // c2, p1) + assert_poly_almost_equal(c4 // p2, p1) + assert_poly_almost_equal(p4 // tuple(c2), p1) + assert_poly_almost_equal(tuple(c4) // p2, p1) + assert_poly_almost_equal(p4 // np.array(c2), p1) + assert_poly_almost_equal(np.array(c4) // p2, p1) + assert_poly_almost_equal(2 // p2, Poly([0])) + assert_poly_almost_equal(p2 // 2, 0.5*p2) + assert_raises( + TypeError, op.floordiv, p1, Poly([0], domain=Poly.domain + 1)) + assert_raises( + TypeError, op.floordiv, p1, Poly([0], window=Poly.window + 1)) + if Poly is Polynomial: + assert_raises(TypeError, op.floordiv, p1, Chebyshev([0])) + else: + assert_raises(TypeError, op.floordiv, p1, Polynomial([0])) + + +def test_truediv(Poly): + # true division is valid only if the denominator is a Number and + # not a python bool. + p1 = Poly([1,2,3]) + p2 = p1 * 5 + + for stype in np.ScalarType: + if not issubclass(stype, Number) or issubclass(stype, bool): + continue + s = stype(5) + assert_poly_almost_equal(op.truediv(p2, s), p1) + assert_raises(TypeError, op.truediv, s, p2) + for stype in (int, float): + s = stype(5) + assert_poly_almost_equal(op.truediv(p2, s), p1) + assert_raises(TypeError, op.truediv, s, p2) + for stype in [complex]: + s = stype(5, 0) + assert_poly_almost_equal(op.truediv(p2, s), p1) + assert_raises(TypeError, op.truediv, s, p2) + for s in [tuple(), list(), dict(), bool(), np.array([1])]: + assert_raises(TypeError, op.truediv, p2, s) + assert_raises(TypeError, op.truediv, s, p2) + for ptype in classes: + assert_raises(TypeError, op.truediv, p2, ptype(1)) + + +def test_mod(Poly): + # This checks commutation, not numerical correctness + c1 = list(random((4,)) + .5) + c2 = list(random((3,)) + .5) + c3 = list(random((2,)) + .5) + p1 = Poly(c1) + p2 = Poly(c2) + p3 = Poly(c3) + p4 = p1 * p2 + p3 + c4 = list(p4.coef) + assert_poly_almost_equal(p4 % p2, p3) + assert_poly_almost_equal(p4 % c2, p3) + assert_poly_almost_equal(c4 % p2, p3) + assert_poly_almost_equal(p4 % tuple(c2), p3) + assert_poly_almost_equal(tuple(c4) % p2, p3) + assert_poly_almost_equal(p4 % np.array(c2), p3) + assert_poly_almost_equal(np.array(c4) % p2, p3) + assert_poly_almost_equal(2 % p2, Poly([2])) + assert_poly_almost_equal(p2 % 2, Poly([0])) + assert_raises(TypeError, op.mod, p1, Poly([0], domain=Poly.domain + 1)) + assert_raises(TypeError, op.mod, p1, Poly([0], window=Poly.window + 1)) + if Poly is Polynomial: + assert_raises(TypeError, op.mod, p1, Chebyshev([0])) + else: + assert_raises(TypeError, op.mod, p1, Polynomial([0])) + + +def test_divmod(Poly): + # This checks commutation, not numerical correctness + c1 = list(random((4,)) + .5) + c2 = list(random((3,)) + .5) + c3 = list(random((2,)) + .5) + p1 = Poly(c1) + p2 = Poly(c2) + p3 = Poly(c3) + p4 = p1 * p2 + p3 + c4 = list(p4.coef) + quo, rem = divmod(p4, p2) + assert_poly_almost_equal(quo, p1) + assert_poly_almost_equal(rem, p3) + quo, rem = divmod(p4, c2) + assert_poly_almost_equal(quo, p1) + assert_poly_almost_equal(rem, p3) + quo, rem = divmod(c4, p2) + assert_poly_almost_equal(quo, p1) + assert_poly_almost_equal(rem, p3) + quo, rem = divmod(p4, tuple(c2)) + assert_poly_almost_equal(quo, p1) + assert_poly_almost_equal(rem, p3) + quo, rem = divmod(tuple(c4), p2) + assert_poly_almost_equal(quo, p1) + assert_poly_almost_equal(rem, p3) + quo, rem = divmod(p4, np.array(c2)) + assert_poly_almost_equal(quo, p1) + assert_poly_almost_equal(rem, p3) + quo, rem = divmod(np.array(c4), p2) + assert_poly_almost_equal(quo, p1) + assert_poly_almost_equal(rem, p3) + quo, rem = divmod(p2, 2) + assert_poly_almost_equal(quo, 0.5*p2) + assert_poly_almost_equal(rem, Poly([0])) + quo, rem = divmod(2, p2) + assert_poly_almost_equal(quo, Poly([0])) + assert_poly_almost_equal(rem, Poly([2])) + assert_raises(TypeError, divmod, p1, Poly([0], domain=Poly.domain + 1)) + assert_raises(TypeError, divmod, p1, Poly([0], window=Poly.window + 1)) + if Poly is Polynomial: + assert_raises(TypeError, divmod, p1, Chebyshev([0])) + else: + assert_raises(TypeError, divmod, p1, Polynomial([0])) + + +def test_roots(Poly): + d = Poly.domain * 1.25 + .25 + w = Poly.window + tgt = np.linspace(d[0], d[1], 5) + res = np.sort(Poly.fromroots(tgt, domain=d, window=w).roots()) + assert_almost_equal(res, tgt) + # default domain and window + res = np.sort(Poly.fromroots(tgt).roots()) + assert_almost_equal(res, tgt) + + +def test_degree(Poly): + p = Poly.basis(5) + assert_equal(p.degree(), 5) + + +def test_copy(Poly): + p1 = Poly.basis(5) + p2 = p1.copy() + assert_(p1 == p2) + assert_(p1 is not p2) + assert_(p1.coef is not p2.coef) + assert_(p1.domain is not p2.domain) + assert_(p1.window is not p2.window) + + +def test_integ(Poly): + P = Polynomial + # Check defaults + p0 = Poly.cast(P([1*2, 2*3, 3*4])) + p1 = P.cast(p0.integ()) + p2 = P.cast(p0.integ(2)) + assert_poly_almost_equal(p1, P([0, 2, 3, 4])) + assert_poly_almost_equal(p2, P([0, 0, 1, 1, 1])) + # Check with k + p0 = Poly.cast(P([1*2, 2*3, 3*4])) + p1 = P.cast(p0.integ(k=1)) + p2 = P.cast(p0.integ(2, k=[1, 1])) + assert_poly_almost_equal(p1, P([1, 2, 3, 4])) + assert_poly_almost_equal(p2, P([1, 1, 1, 1, 1])) + # Check with lbnd + p0 = Poly.cast(P([1*2, 2*3, 3*4])) + p1 = P.cast(p0.integ(lbnd=1)) + p2 = P.cast(p0.integ(2, lbnd=1)) + assert_poly_almost_equal(p1, P([-9, 2, 3, 4])) + assert_poly_almost_equal(p2, P([6, -9, 1, 1, 1])) + # Check scaling + d = 2*Poly.domain + p0 = Poly.cast(P([1*2, 2*3, 3*4]), domain=d) + p1 = P.cast(p0.integ()) + p2 = P.cast(p0.integ(2)) + assert_poly_almost_equal(p1, P([0, 2, 3, 4])) + assert_poly_almost_equal(p2, P([0, 0, 1, 1, 1])) + + +def test_deriv(Poly): + # Check that the derivative is the inverse of integration. It is + # assumes that the integration has been checked elsewhere. + d = Poly.domain + random((2,))*.25 + w = Poly.window + random((2,))*.25 + p1 = Poly([1, 2, 3], domain=d, window=w) + p2 = p1.integ(2, k=[1, 2]) + p3 = p1.integ(1, k=[1]) + assert_almost_equal(p2.deriv(1).coef, p3.coef) + assert_almost_equal(p2.deriv(2).coef, p1.coef) + # default domain and window + p1 = Poly([1, 2, 3]) + p2 = p1.integ(2, k=[1, 2]) + p3 = p1.integ(1, k=[1]) + assert_almost_equal(p2.deriv(1).coef, p3.coef) + assert_almost_equal(p2.deriv(2).coef, p1.coef) + + +def test_linspace(Poly): + d = Poly.domain + random((2,))*.25 + w = Poly.window + random((2,))*.25 + p = Poly([1, 2, 3], domain=d, window=w) + # check default domain + xtgt = np.linspace(d[0], d[1], 20) + ytgt = p(xtgt) + xres, yres = p.linspace(20) + assert_almost_equal(xres, xtgt) + assert_almost_equal(yres, ytgt) + # check specified domain + xtgt = np.linspace(0, 2, 20) + ytgt = p(xtgt) + xres, yres = p.linspace(20, domain=[0, 2]) + assert_almost_equal(xres, xtgt) + assert_almost_equal(yres, ytgt) + + +def test_pow(Poly): + d = Poly.domain + random((2,))*.25 + w = Poly.window + random((2,))*.25 + tgt = Poly([1], domain=d, window=w) + tst = Poly([1, 2, 3], domain=d, window=w) + for i in range(5): + assert_poly_almost_equal(tst**i, tgt) + tgt = tgt * tst + # default domain and window + tgt = Poly([1]) + tst = Poly([1, 2, 3]) + for i in range(5): + assert_poly_almost_equal(tst**i, tgt) + tgt = tgt * tst + # check error for invalid powers + assert_raises(ValueError, op.pow, tgt, 1.5) + assert_raises(ValueError, op.pow, tgt, -1) + + +def test_call(Poly): + P = Polynomial + d = Poly.domain + x = np.linspace(d[0], d[1], 11) + + # Check defaults + p = Poly.cast(P([1, 2, 3])) + tgt = 1 + x*(2 + 3*x) + res = p(x) + assert_almost_equal(res, tgt) + + +def test_cutdeg(Poly): + p = Poly([1, 2, 3]) + assert_raises(ValueError, p.cutdeg, .5) + assert_raises(ValueError, p.cutdeg, -1) + assert_equal(len(p.cutdeg(3)), 3) + assert_equal(len(p.cutdeg(2)), 3) + assert_equal(len(p.cutdeg(1)), 2) + assert_equal(len(p.cutdeg(0)), 1) + + +def test_truncate(Poly): + p = Poly([1, 2, 3]) + assert_raises(ValueError, p.truncate, .5) + assert_raises(ValueError, p.truncate, 0) + assert_equal(len(p.truncate(4)), 3) + assert_equal(len(p.truncate(3)), 3) + assert_equal(len(p.truncate(2)), 2) + assert_equal(len(p.truncate(1)), 1) + + +def test_trim(Poly): + c = [1, 1e-6, 1e-12, 0] + p = Poly(c) + assert_equal(p.trim().coef, c[:3]) + assert_equal(p.trim(1e-10).coef, c[:2]) + assert_equal(p.trim(1e-5).coef, c[:1]) + + +def test_mapparms(Poly): + # check with defaults. Should be identity. + d = Poly.domain + w = Poly.window + p = Poly([1], domain=d, window=w) + assert_almost_equal([0, 1], p.mapparms()) + # + w = 2*d + 1 + p = Poly([1], domain=d, window=w) + assert_almost_equal([1, 2], p.mapparms()) + + +def test_ufunc_override(Poly): + p = Poly([1, 2, 3]) + x = np.ones(3) + assert_raises(TypeError, np.add, p, x) + assert_raises(TypeError, np.add, x, p) + + +# +# Test class method that only exists for some classes +# + + +class TestInterpolate: + + def f(self, x): + return x * (x - 1) * (x - 2) + + def test_raises(self): + assert_raises(ValueError, Chebyshev.interpolate, self.f, -1) + assert_raises(TypeError, Chebyshev.interpolate, self.f, 10.) + + def test_dimensions(self): + for deg in range(1, 5): + assert_(Chebyshev.interpolate(self.f, deg).degree() == deg) + + def test_approximation(self): + + def powx(x, p): + return x**p + + x = np.linspace(0, 2, 10) + for deg in range(0, 10): + for t in range(0, deg + 1): + p = Chebyshev.interpolate(powx, deg, domain=[0, 2], args=(t,)) + assert_almost_equal(p(x), powx(x, t), decimal=11) diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_hermite.py b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_hermite.py new file mode 100644 index 0000000..53ee084 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_hermite.py @@ -0,0 +1,555 @@ +"""Tests for hermite module. + +""" +from functools import reduce + +import numpy as np +import numpy.polynomial.hermite as herm +from numpy.polynomial.polynomial import polyval +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) + +H0 = np.array([1]) +H1 = np.array([0, 2]) +H2 = np.array([-2, 0, 4]) +H3 = np.array([0, -12, 0, 8]) +H4 = np.array([12, 0, -48, 0, 16]) +H5 = np.array([0, 120, 0, -160, 0, 32]) +H6 = np.array([-120, 0, 720, 0, -480, 0, 64]) +H7 = np.array([0, -1680, 0, 3360, 0, -1344, 0, 128]) +H8 = np.array([1680, 0, -13440, 0, 13440, 0, -3584, 0, 256]) +H9 = np.array([0, 30240, 0, -80640, 0, 48384, 0, -9216, 0, 512]) + +Hlist = [H0, H1, H2, H3, H4, H5, H6, H7, H8, H9] + + +def trim(x): + return herm.hermtrim(x, tol=1e-6) + + +class TestConstants: + + def test_hermdomain(self): + assert_equal(herm.hermdomain, [-1, 1]) + + def test_hermzero(self): + assert_equal(herm.hermzero, [0]) + + def test_hermone(self): + assert_equal(herm.hermone, [1]) + + def test_hermx(self): + assert_equal(herm.hermx, [0, .5]) + + +class TestArithmetic: + x = np.linspace(-3, 3, 100) + + def test_hermadd(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] += 1 + res = herm.hermadd([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_hermsub(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] -= 1 + res = herm.hermsub([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_hermmulx(self): + assert_equal(herm.hermmulx([0]), [0]) + assert_equal(herm.hermmulx([1]), [0, .5]) + for i in range(1, 5): + ser = [0]*i + [1] + tgt = [0]*(i - 1) + [i, 0, .5] + assert_equal(herm.hermmulx(ser), tgt) + + def test_hermmul(self): + # check values of result + for i in range(5): + pol1 = [0]*i + [1] + val1 = herm.hermval(self.x, pol1) + for j in range(5): + msg = f"At i={i}, j={j}" + pol2 = [0]*j + [1] + val2 = herm.hermval(self.x, pol2) + pol3 = herm.hermmul(pol1, pol2) + val3 = herm.hermval(self.x, pol3) + assert_(len(pol3) == i + j + 1, msg) + assert_almost_equal(val3, val1*val2, err_msg=msg) + + def test_hermdiv(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + ci = [0]*i + [1] + cj = [0]*j + [1] + tgt = herm.hermadd(ci, cj) + quo, rem = herm.hermdiv(tgt, ci) + res = herm.hermadd(herm.hermmul(quo, ci), rem) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_hermpow(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + c = np.arange(i + 1) + tgt = reduce(herm.hermmul, [c]*j, np.array([1])) + res = herm.hermpow(c, j) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + +class TestEvaluation: + # coefficients of 1 + 2*x + 3*x**2 + c1d = np.array([2.5, 1., .75]) + c2d = np.einsum('i,j->ij', c1d, c1d) + c3d = np.einsum('i,j,k->ijk', c1d, c1d, c1d) + + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + y = polyval(x, [1., 2., 3.]) + + def test_hermval(self): + #check empty input + assert_equal(herm.hermval([], [1]).size, 0) + + #check normal input) + x = np.linspace(-1, 1) + y = [polyval(x, c) for c in Hlist] + for i in range(10): + msg = f"At i={i}" + tgt = y[i] + res = herm.hermval(x, [0]*i + [1]) + assert_almost_equal(res, tgt, err_msg=msg) + + #check that shape is preserved + for i in range(3): + dims = [2]*i + x = np.zeros(dims) + assert_equal(herm.hermval(x, [1]).shape, dims) + assert_equal(herm.hermval(x, [1, 0]).shape, dims) + assert_equal(herm.hermval(x, [1, 0, 0]).shape, dims) + + def test_hermval2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, herm.hermval2d, x1, x2[:2], self.c2d) + + #test values + tgt = y1*y2 + res = herm.hermval2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herm.hermval2d(z, z, self.c2d) + assert_(res.shape == (2, 3)) + + def test_hermval3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, herm.hermval3d, x1, x2, x3[:2], self.c3d) + + #test values + tgt = y1*y2*y3 + res = herm.hermval3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herm.hermval3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)) + + def test_hermgrid2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j->ij', y1, y2) + res = herm.hermgrid2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herm.hermgrid2d(z, z, self.c2d) + assert_(res.shape == (2, 3)*2) + + def test_hermgrid3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j,k->ijk', y1, y2, y3) + res = herm.hermgrid3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herm.hermgrid3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)*3) + + +class TestIntegral: + + def test_hermint(self): + # check exceptions + assert_raises(TypeError, herm.hermint, [0], .5) + assert_raises(ValueError, herm.hermint, [0], -1) + assert_raises(ValueError, herm.hermint, [0], 1, [0, 0]) + assert_raises(ValueError, herm.hermint, [0], lbnd=[0]) + assert_raises(ValueError, herm.hermint, [0], scl=[0]) + assert_raises(TypeError, herm.hermint, [0], axis=.5) + + # test integration of zero polynomial + for i in range(2, 5): + k = [0]*(i - 2) + [1] + res = herm.hermint([0], m=i, k=k) + assert_almost_equal(res, [0, .5]) + + # check single integration with integration constant + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [1/scl] + hermpol = herm.poly2herm(pol) + hermint = herm.hermint(hermpol, m=1, k=[i]) + res = herm.herm2poly(hermint) + assert_almost_equal(trim(res), trim(tgt)) + + # check single integration with integration constant and lbnd + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + hermpol = herm.poly2herm(pol) + hermint = herm.hermint(hermpol, m=1, k=[i], lbnd=-1) + assert_almost_equal(herm.hermval(-1, hermint), i) + + # check single integration with integration constant and scaling + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [2/scl] + hermpol = herm.poly2herm(pol) + hermint = herm.hermint(hermpol, m=1, k=[i], scl=2) + res = herm.herm2poly(hermint) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with default k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herm.hermint(tgt, m=1) + res = herm.hermint(pol, m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with defined k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herm.hermint(tgt, m=1, k=[k]) + res = herm.hermint(pol, m=j, k=list(range(j))) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with lbnd + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herm.hermint(tgt, m=1, k=[k], lbnd=-1) + res = herm.hermint(pol, m=j, k=list(range(j)), lbnd=-1) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with scaling + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herm.hermint(tgt, m=1, k=[k], scl=2) + res = herm.hermint(pol, m=j, k=list(range(j)), scl=2) + assert_almost_equal(trim(res), trim(tgt)) + + def test_hermint_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([herm.hermint(c) for c in c2d.T]).T + res = herm.hermint(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([herm.hermint(c) for c in c2d]) + res = herm.hermint(c2d, axis=1) + assert_almost_equal(res, tgt) + + tgt = np.vstack([herm.hermint(c, k=3) for c in c2d]) + res = herm.hermint(c2d, k=3, axis=1) + assert_almost_equal(res, tgt) + + +class TestDerivative: + + def test_hermder(self): + # check exceptions + assert_raises(TypeError, herm.hermder, [0], .5) + assert_raises(ValueError, herm.hermder, [0], -1) + + # check that zeroth derivative does nothing + for i in range(5): + tgt = [0]*i + [1] + res = herm.hermder(tgt, m=0) + assert_equal(trim(res), trim(tgt)) + + # check that derivation is the inverse of integration + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = herm.hermder(herm.hermint(tgt, m=j), m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check derivation with scaling + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = herm.hermder(herm.hermint(tgt, m=j, scl=2), m=j, scl=.5) + assert_almost_equal(trim(res), trim(tgt)) + + def test_hermder_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([herm.hermder(c) for c in c2d.T]).T + res = herm.hermder(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([herm.hermder(c) for c in c2d]) + res = herm.hermder(c2d, axis=1) + assert_almost_equal(res, tgt) + + +class TestVander: + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + + def test_hermvander(self): + # check for 1d x + x = np.arange(3) + v = herm.hermvander(x, 3) + assert_(v.shape == (3, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], herm.hermval(x, coef)) + + # check for 2d x + x = np.array([[1, 2], [3, 4], [5, 6]]) + v = herm.hermvander(x, 3) + assert_(v.shape == (3, 2, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], herm.hermval(x, coef)) + + def test_hermvander2d(self): + # also tests hermval2d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3)) + van = herm.hermvander2d(x1, x2, [1, 2]) + tgt = herm.hermval2d(x1, x2, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = herm.hermvander2d([x1], [x2], [1, 2]) + assert_(van.shape == (1, 5, 6)) + + def test_hermvander3d(self): + # also tests hermval3d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3, 4)) + van = herm.hermvander3d(x1, x2, x3, [1, 2, 3]) + tgt = herm.hermval3d(x1, x2, x3, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = herm.hermvander3d([x1], [x2], [x3], [1, 2, 3]) + assert_(van.shape == (1, 5, 24)) + + +class TestFitting: + + def test_hermfit(self): + def f(x): + return x*(x - 1)*(x - 2) + + def f2(x): + return x**4 + x**2 + 1 + + # Test exceptions + assert_raises(ValueError, herm.hermfit, [1], [1], -1) + assert_raises(TypeError, herm.hermfit, [[1]], [1], 0) + assert_raises(TypeError, herm.hermfit, [], [1], 0) + assert_raises(TypeError, herm.hermfit, [1], [[[1]]], 0) + assert_raises(TypeError, herm.hermfit, [1, 2], [1], 0) + assert_raises(TypeError, herm.hermfit, [1], [1, 2], 0) + assert_raises(TypeError, herm.hermfit, [1], [1], 0, w=[[1]]) + assert_raises(TypeError, herm.hermfit, [1], [1], 0, w=[1, 1]) + assert_raises(ValueError, herm.hermfit, [1], [1], [-1,]) + assert_raises(ValueError, herm.hermfit, [1], [1], [2, -1, 6]) + assert_raises(TypeError, herm.hermfit, [1], [1], []) + + # Test fit + x = np.linspace(0, 2) + y = f(x) + # + coef3 = herm.hermfit(x, y, 3) + assert_equal(len(coef3), 4) + assert_almost_equal(herm.hermval(x, coef3), y) + coef3 = herm.hermfit(x, y, [0, 1, 2, 3]) + assert_equal(len(coef3), 4) + assert_almost_equal(herm.hermval(x, coef3), y) + # + coef4 = herm.hermfit(x, y, 4) + assert_equal(len(coef4), 5) + assert_almost_equal(herm.hermval(x, coef4), y) + coef4 = herm.hermfit(x, y, [0, 1, 2, 3, 4]) + assert_equal(len(coef4), 5) + assert_almost_equal(herm.hermval(x, coef4), y) + # check things still work if deg is not in strict increasing + coef4 = herm.hermfit(x, y, [2, 3, 4, 1, 0]) + assert_equal(len(coef4), 5) + assert_almost_equal(herm.hermval(x, coef4), y) + # + coef2d = herm.hermfit(x, np.array([y, y]).T, 3) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + coef2d = herm.hermfit(x, np.array([y, y]).T, [0, 1, 2, 3]) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + # test weighting + w = np.zeros_like(x) + yw = y.copy() + w[1::2] = 1 + y[0::2] = 0 + wcoef3 = herm.hermfit(x, yw, 3, w=w) + assert_almost_equal(wcoef3, coef3) + wcoef3 = herm.hermfit(x, yw, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef3, coef3) + # + wcoef2d = herm.hermfit(x, np.array([yw, yw]).T, 3, w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + wcoef2d = herm.hermfit(x, np.array([yw, yw]).T, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + # test scaling with complex values x points whose square + # is zero when summed. + x = [1, 1j, -1, -1j] + assert_almost_equal(herm.hermfit(x, x, 1), [0, .5]) + assert_almost_equal(herm.hermfit(x, x, [0, 1]), [0, .5]) + # test fitting only even Legendre polynomials + x = np.linspace(-1, 1) + y = f2(x) + coef1 = herm.hermfit(x, y, 4) + assert_almost_equal(herm.hermval(x, coef1), y) + coef2 = herm.hermfit(x, y, [0, 2, 4]) + assert_almost_equal(herm.hermval(x, coef2), y) + assert_almost_equal(coef1, coef2) + + +class TestCompanion: + + def test_raises(self): + assert_raises(ValueError, herm.hermcompanion, []) + assert_raises(ValueError, herm.hermcompanion, [1]) + + def test_dimensions(self): + for i in range(1, 5): + coef = [0]*i + [1] + assert_(herm.hermcompanion(coef).shape == (i, i)) + + def test_linear_root(self): + assert_(herm.hermcompanion([1, 2])[0, 0] == -.25) + + +class TestGauss: + + def test_100(self): + x, w = herm.hermgauss(100) + + # test orthogonality. Note that the results need to be normalized, + # otherwise the huge values that can arise from fast growing + # functions like Laguerre can be very confusing. + v = herm.hermvander(x, 99) + vv = np.dot(v.T * w, v) + vd = 1/np.sqrt(vv.diagonal()) + vv = vd[:, None] * vv * vd + assert_almost_equal(vv, np.eye(100)) + + # check that the integral of 1 is correct + tgt = np.sqrt(np.pi) + assert_almost_equal(w.sum(), tgt) + + +class TestMisc: + + def test_hermfromroots(self): + res = herm.hermfromroots([]) + assert_almost_equal(trim(res), [1]) + for i in range(1, 5): + roots = np.cos(np.linspace(-np.pi, 0, 2*i + 1)[1::2]) + pol = herm.hermfromroots(roots) + res = herm.hermval(roots, pol) + tgt = 0 + assert_(len(pol) == i + 1) + assert_almost_equal(herm.herm2poly(pol)[-1], 1) + assert_almost_equal(res, tgt) + + def test_hermroots(self): + assert_almost_equal(herm.hermroots([1]), []) + assert_almost_equal(herm.hermroots([1, 1]), [-.5]) + for i in range(2, 5): + tgt = np.linspace(-1, 1, i) + res = herm.hermroots(herm.hermfromroots(tgt)) + assert_almost_equal(trim(res), trim(tgt)) + + def test_hermtrim(self): + coef = [2, -1, 1, 0] + + # Test exceptions + assert_raises(ValueError, herm.hermtrim, coef, -1) + + # Test results + assert_equal(herm.hermtrim(coef), coef[:-1]) + assert_equal(herm.hermtrim(coef, 1), coef[:-3]) + assert_equal(herm.hermtrim(coef, 2), [0]) + + def test_hermline(self): + assert_equal(herm.hermline(3, 4), [3, 2]) + + def test_herm2poly(self): + for i in range(10): + assert_almost_equal(herm.herm2poly([0]*i + [1]), Hlist[i]) + + def test_poly2herm(self): + for i in range(10): + assert_almost_equal(herm.poly2herm(Hlist[i]), [0]*i + [1]) + + def test_weight(self): + x = np.linspace(-5, 5, 11) + tgt = np.exp(-x**2) + res = herm.hermweight(x) + assert_almost_equal(res, tgt) diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_hermite_e.py b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_hermite_e.py new file mode 100644 index 0000000..2d262a3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_hermite_e.py @@ -0,0 +1,556 @@ +"""Tests for hermite_e module. + +""" +from functools import reduce + +import numpy as np +import numpy.polynomial.hermite_e as herme +from numpy.polynomial.polynomial import polyval +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) + +He0 = np.array([1]) +He1 = np.array([0, 1]) +He2 = np.array([-1, 0, 1]) +He3 = np.array([0, -3, 0, 1]) +He4 = np.array([3, 0, -6, 0, 1]) +He5 = np.array([0, 15, 0, -10, 0, 1]) +He6 = np.array([-15, 0, 45, 0, -15, 0, 1]) +He7 = np.array([0, -105, 0, 105, 0, -21, 0, 1]) +He8 = np.array([105, 0, -420, 0, 210, 0, -28, 0, 1]) +He9 = np.array([0, 945, 0, -1260, 0, 378, 0, -36, 0, 1]) + +Helist = [He0, He1, He2, He3, He4, He5, He6, He7, He8, He9] + + +def trim(x): + return herme.hermetrim(x, tol=1e-6) + + +class TestConstants: + + def test_hermedomain(self): + assert_equal(herme.hermedomain, [-1, 1]) + + def test_hermezero(self): + assert_equal(herme.hermezero, [0]) + + def test_hermeone(self): + assert_equal(herme.hermeone, [1]) + + def test_hermex(self): + assert_equal(herme.hermex, [0, 1]) + + +class TestArithmetic: + x = np.linspace(-3, 3, 100) + + def test_hermeadd(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] += 1 + res = herme.hermeadd([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_hermesub(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] -= 1 + res = herme.hermesub([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_hermemulx(self): + assert_equal(herme.hermemulx([0]), [0]) + assert_equal(herme.hermemulx([1]), [0, 1]) + for i in range(1, 5): + ser = [0]*i + [1] + tgt = [0]*(i - 1) + [i, 0, 1] + assert_equal(herme.hermemulx(ser), tgt) + + def test_hermemul(self): + # check values of result + for i in range(5): + pol1 = [0]*i + [1] + val1 = herme.hermeval(self.x, pol1) + for j in range(5): + msg = f"At i={i}, j={j}" + pol2 = [0]*j + [1] + val2 = herme.hermeval(self.x, pol2) + pol3 = herme.hermemul(pol1, pol2) + val3 = herme.hermeval(self.x, pol3) + assert_(len(pol3) == i + j + 1, msg) + assert_almost_equal(val3, val1*val2, err_msg=msg) + + def test_hermediv(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + ci = [0]*i + [1] + cj = [0]*j + [1] + tgt = herme.hermeadd(ci, cj) + quo, rem = herme.hermediv(tgt, ci) + res = herme.hermeadd(herme.hermemul(quo, ci), rem) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_hermepow(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + c = np.arange(i + 1) + tgt = reduce(herme.hermemul, [c]*j, np.array([1])) + res = herme.hermepow(c, j) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + +class TestEvaluation: + # coefficients of 1 + 2*x + 3*x**2 + c1d = np.array([4., 2., 3.]) + c2d = np.einsum('i,j->ij', c1d, c1d) + c3d = np.einsum('i,j,k->ijk', c1d, c1d, c1d) + + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + y = polyval(x, [1., 2., 3.]) + + def test_hermeval(self): + #check empty input + assert_equal(herme.hermeval([], [1]).size, 0) + + #check normal input) + x = np.linspace(-1, 1) + y = [polyval(x, c) for c in Helist] + for i in range(10): + msg = f"At i={i}" + tgt = y[i] + res = herme.hermeval(x, [0]*i + [1]) + assert_almost_equal(res, tgt, err_msg=msg) + + #check that shape is preserved + for i in range(3): + dims = [2]*i + x = np.zeros(dims) + assert_equal(herme.hermeval(x, [1]).shape, dims) + assert_equal(herme.hermeval(x, [1, 0]).shape, dims) + assert_equal(herme.hermeval(x, [1, 0, 0]).shape, dims) + + def test_hermeval2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, herme.hermeval2d, x1, x2[:2], self.c2d) + + #test values + tgt = y1*y2 + res = herme.hermeval2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herme.hermeval2d(z, z, self.c2d) + assert_(res.shape == (2, 3)) + + def test_hermeval3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, herme.hermeval3d, x1, x2, x3[:2], self.c3d) + + #test values + tgt = y1*y2*y3 + res = herme.hermeval3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herme.hermeval3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)) + + def test_hermegrid2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j->ij', y1, y2) + res = herme.hermegrid2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herme.hermegrid2d(z, z, self.c2d) + assert_(res.shape == (2, 3)*2) + + def test_hermegrid3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j,k->ijk', y1, y2, y3) + res = herme.hermegrid3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herme.hermegrid3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)*3) + + +class TestIntegral: + + def test_hermeint(self): + # check exceptions + assert_raises(TypeError, herme.hermeint, [0], .5) + assert_raises(ValueError, herme.hermeint, [0], -1) + assert_raises(ValueError, herme.hermeint, [0], 1, [0, 0]) + assert_raises(ValueError, herme.hermeint, [0], lbnd=[0]) + assert_raises(ValueError, herme.hermeint, [0], scl=[0]) + assert_raises(TypeError, herme.hermeint, [0], axis=.5) + + # test integration of zero polynomial + for i in range(2, 5): + k = [0]*(i - 2) + [1] + res = herme.hermeint([0], m=i, k=k) + assert_almost_equal(res, [0, 1]) + + # check single integration with integration constant + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [1/scl] + hermepol = herme.poly2herme(pol) + hermeint = herme.hermeint(hermepol, m=1, k=[i]) + res = herme.herme2poly(hermeint) + assert_almost_equal(trim(res), trim(tgt)) + + # check single integration with integration constant and lbnd + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + hermepol = herme.poly2herme(pol) + hermeint = herme.hermeint(hermepol, m=1, k=[i], lbnd=-1) + assert_almost_equal(herme.hermeval(-1, hermeint), i) + + # check single integration with integration constant and scaling + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [2/scl] + hermepol = herme.poly2herme(pol) + hermeint = herme.hermeint(hermepol, m=1, k=[i], scl=2) + res = herme.herme2poly(hermeint) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with default k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herme.hermeint(tgt, m=1) + res = herme.hermeint(pol, m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with defined k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herme.hermeint(tgt, m=1, k=[k]) + res = herme.hermeint(pol, m=j, k=list(range(j))) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with lbnd + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herme.hermeint(tgt, m=1, k=[k], lbnd=-1) + res = herme.hermeint(pol, m=j, k=list(range(j)), lbnd=-1) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with scaling + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herme.hermeint(tgt, m=1, k=[k], scl=2) + res = herme.hermeint(pol, m=j, k=list(range(j)), scl=2) + assert_almost_equal(trim(res), trim(tgt)) + + def test_hermeint_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([herme.hermeint(c) for c in c2d.T]).T + res = herme.hermeint(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([herme.hermeint(c) for c in c2d]) + res = herme.hermeint(c2d, axis=1) + assert_almost_equal(res, tgt) + + tgt = np.vstack([herme.hermeint(c, k=3) for c in c2d]) + res = herme.hermeint(c2d, k=3, axis=1) + assert_almost_equal(res, tgt) + + +class TestDerivative: + + def test_hermeder(self): + # check exceptions + assert_raises(TypeError, herme.hermeder, [0], .5) + assert_raises(ValueError, herme.hermeder, [0], -1) + + # check that zeroth derivative does nothing + for i in range(5): + tgt = [0]*i + [1] + res = herme.hermeder(tgt, m=0) + assert_equal(trim(res), trim(tgt)) + + # check that derivation is the inverse of integration + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = herme.hermeder(herme.hermeint(tgt, m=j), m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check derivation with scaling + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = herme.hermeder( + herme.hermeint(tgt, m=j, scl=2), m=j, scl=.5) + assert_almost_equal(trim(res), trim(tgt)) + + def test_hermeder_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([herme.hermeder(c) for c in c2d.T]).T + res = herme.hermeder(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([herme.hermeder(c) for c in c2d]) + res = herme.hermeder(c2d, axis=1) + assert_almost_equal(res, tgt) + + +class TestVander: + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + + def test_hermevander(self): + # check for 1d x + x = np.arange(3) + v = herme.hermevander(x, 3) + assert_(v.shape == (3, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], herme.hermeval(x, coef)) + + # check for 2d x + x = np.array([[1, 2], [3, 4], [5, 6]]) + v = herme.hermevander(x, 3) + assert_(v.shape == (3, 2, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], herme.hermeval(x, coef)) + + def test_hermevander2d(self): + # also tests hermeval2d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3)) + van = herme.hermevander2d(x1, x2, [1, 2]) + tgt = herme.hermeval2d(x1, x2, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = herme.hermevander2d([x1], [x2], [1, 2]) + assert_(van.shape == (1, 5, 6)) + + def test_hermevander3d(self): + # also tests hermeval3d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3, 4)) + van = herme.hermevander3d(x1, x2, x3, [1, 2, 3]) + tgt = herme.hermeval3d(x1, x2, x3, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = herme.hermevander3d([x1], [x2], [x3], [1, 2, 3]) + assert_(van.shape == (1, 5, 24)) + + +class TestFitting: + + def test_hermefit(self): + def f(x): + return x*(x - 1)*(x - 2) + + def f2(x): + return x**4 + x**2 + 1 + + # Test exceptions + assert_raises(ValueError, herme.hermefit, [1], [1], -1) + assert_raises(TypeError, herme.hermefit, [[1]], [1], 0) + assert_raises(TypeError, herme.hermefit, [], [1], 0) + assert_raises(TypeError, herme.hermefit, [1], [[[1]]], 0) + assert_raises(TypeError, herme.hermefit, [1, 2], [1], 0) + assert_raises(TypeError, herme.hermefit, [1], [1, 2], 0) + assert_raises(TypeError, herme.hermefit, [1], [1], 0, w=[[1]]) + assert_raises(TypeError, herme.hermefit, [1], [1], 0, w=[1, 1]) + assert_raises(ValueError, herme.hermefit, [1], [1], [-1,]) + assert_raises(ValueError, herme.hermefit, [1], [1], [2, -1, 6]) + assert_raises(TypeError, herme.hermefit, [1], [1], []) + + # Test fit + x = np.linspace(0, 2) + y = f(x) + # + coef3 = herme.hermefit(x, y, 3) + assert_equal(len(coef3), 4) + assert_almost_equal(herme.hermeval(x, coef3), y) + coef3 = herme.hermefit(x, y, [0, 1, 2, 3]) + assert_equal(len(coef3), 4) + assert_almost_equal(herme.hermeval(x, coef3), y) + # + coef4 = herme.hermefit(x, y, 4) + assert_equal(len(coef4), 5) + assert_almost_equal(herme.hermeval(x, coef4), y) + coef4 = herme.hermefit(x, y, [0, 1, 2, 3, 4]) + assert_equal(len(coef4), 5) + assert_almost_equal(herme.hermeval(x, coef4), y) + # check things still work if deg is not in strict increasing + coef4 = herme.hermefit(x, y, [2, 3, 4, 1, 0]) + assert_equal(len(coef4), 5) + assert_almost_equal(herme.hermeval(x, coef4), y) + # + coef2d = herme.hermefit(x, np.array([y, y]).T, 3) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + coef2d = herme.hermefit(x, np.array([y, y]).T, [0, 1, 2, 3]) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + # test weighting + w = np.zeros_like(x) + yw = y.copy() + w[1::2] = 1 + y[0::2] = 0 + wcoef3 = herme.hermefit(x, yw, 3, w=w) + assert_almost_equal(wcoef3, coef3) + wcoef3 = herme.hermefit(x, yw, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef3, coef3) + # + wcoef2d = herme.hermefit(x, np.array([yw, yw]).T, 3, w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + wcoef2d = herme.hermefit(x, np.array([yw, yw]).T, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + # test scaling with complex values x points whose square + # is zero when summed. + x = [1, 1j, -1, -1j] + assert_almost_equal(herme.hermefit(x, x, 1), [0, 1]) + assert_almost_equal(herme.hermefit(x, x, [0, 1]), [0, 1]) + # test fitting only even Legendre polynomials + x = np.linspace(-1, 1) + y = f2(x) + coef1 = herme.hermefit(x, y, 4) + assert_almost_equal(herme.hermeval(x, coef1), y) + coef2 = herme.hermefit(x, y, [0, 2, 4]) + assert_almost_equal(herme.hermeval(x, coef2), y) + assert_almost_equal(coef1, coef2) + + +class TestCompanion: + + def test_raises(self): + assert_raises(ValueError, herme.hermecompanion, []) + assert_raises(ValueError, herme.hermecompanion, [1]) + + def test_dimensions(self): + for i in range(1, 5): + coef = [0]*i + [1] + assert_(herme.hermecompanion(coef).shape == (i, i)) + + def test_linear_root(self): + assert_(herme.hermecompanion([1, 2])[0, 0] == -.5) + + +class TestGauss: + + def test_100(self): + x, w = herme.hermegauss(100) + + # test orthogonality. Note that the results need to be normalized, + # otherwise the huge values that can arise from fast growing + # functions like Laguerre can be very confusing. + v = herme.hermevander(x, 99) + vv = np.dot(v.T * w, v) + vd = 1/np.sqrt(vv.diagonal()) + vv = vd[:, None] * vv * vd + assert_almost_equal(vv, np.eye(100)) + + # check that the integral of 1 is correct + tgt = np.sqrt(2*np.pi) + assert_almost_equal(w.sum(), tgt) + + +class TestMisc: + + def test_hermefromroots(self): + res = herme.hermefromroots([]) + assert_almost_equal(trim(res), [1]) + for i in range(1, 5): + roots = np.cos(np.linspace(-np.pi, 0, 2*i + 1)[1::2]) + pol = herme.hermefromroots(roots) + res = herme.hermeval(roots, pol) + tgt = 0 + assert_(len(pol) == i + 1) + assert_almost_equal(herme.herme2poly(pol)[-1], 1) + assert_almost_equal(res, tgt) + + def test_hermeroots(self): + assert_almost_equal(herme.hermeroots([1]), []) + assert_almost_equal(herme.hermeroots([1, 1]), [-1]) + for i in range(2, 5): + tgt = np.linspace(-1, 1, i) + res = herme.hermeroots(herme.hermefromroots(tgt)) + assert_almost_equal(trim(res), trim(tgt)) + + def test_hermetrim(self): + coef = [2, -1, 1, 0] + + # Test exceptions + assert_raises(ValueError, herme.hermetrim, coef, -1) + + # Test results + assert_equal(herme.hermetrim(coef), coef[:-1]) + assert_equal(herme.hermetrim(coef, 1), coef[:-3]) + assert_equal(herme.hermetrim(coef, 2), [0]) + + def test_hermeline(self): + assert_equal(herme.hermeline(3, 4), [3, 4]) + + def test_herme2poly(self): + for i in range(10): + assert_almost_equal(herme.herme2poly([0]*i + [1]), Helist[i]) + + def test_poly2herme(self): + for i in range(10): + assert_almost_equal(herme.poly2herme(Helist[i]), [0]*i + [1]) + + def test_weight(self): + x = np.linspace(-5, 5, 11) + tgt = np.exp(-.5*x**2) + res = herme.hermeweight(x) + assert_almost_equal(res, tgt) diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_laguerre.py b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_laguerre.py new file mode 100644 index 0000000..227ef3c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_laguerre.py @@ -0,0 +1,537 @@ +"""Tests for laguerre module. + +""" +from functools import reduce + +import numpy as np +import numpy.polynomial.laguerre as lag +from numpy.polynomial.polynomial import polyval +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) + +L0 = np.array([1])/1 +L1 = np.array([1, -1])/1 +L2 = np.array([2, -4, 1])/2 +L3 = np.array([6, -18, 9, -1])/6 +L4 = np.array([24, -96, 72, -16, 1])/24 +L5 = np.array([120, -600, 600, -200, 25, -1])/120 +L6 = np.array([720, -4320, 5400, -2400, 450, -36, 1])/720 + +Llist = [L0, L1, L2, L3, L4, L5, L6] + + +def trim(x): + return lag.lagtrim(x, tol=1e-6) + + +class TestConstants: + + def test_lagdomain(self): + assert_equal(lag.lagdomain, [0, 1]) + + def test_lagzero(self): + assert_equal(lag.lagzero, [0]) + + def test_lagone(self): + assert_equal(lag.lagone, [1]) + + def test_lagx(self): + assert_equal(lag.lagx, [1, -1]) + + +class TestArithmetic: + x = np.linspace(-3, 3, 100) + + def test_lagadd(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] += 1 + res = lag.lagadd([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_lagsub(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] -= 1 + res = lag.lagsub([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_lagmulx(self): + assert_equal(lag.lagmulx([0]), [0]) + assert_equal(lag.lagmulx([1]), [1, -1]) + for i in range(1, 5): + ser = [0]*i + [1] + tgt = [0]*(i - 1) + [-i, 2*i + 1, -(i + 1)] + assert_almost_equal(lag.lagmulx(ser), tgt) + + def test_lagmul(self): + # check values of result + for i in range(5): + pol1 = [0]*i + [1] + val1 = lag.lagval(self.x, pol1) + for j in range(5): + msg = f"At i={i}, j={j}" + pol2 = [0]*j + [1] + val2 = lag.lagval(self.x, pol2) + pol3 = lag.lagmul(pol1, pol2) + val3 = lag.lagval(self.x, pol3) + assert_(len(pol3) == i + j + 1, msg) + assert_almost_equal(val3, val1*val2, err_msg=msg) + + def test_lagdiv(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + ci = [0]*i + [1] + cj = [0]*j + [1] + tgt = lag.lagadd(ci, cj) + quo, rem = lag.lagdiv(tgt, ci) + res = lag.lagadd(lag.lagmul(quo, ci), rem) + assert_almost_equal(trim(res), trim(tgt), err_msg=msg) + + def test_lagpow(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + c = np.arange(i + 1) + tgt = reduce(lag.lagmul, [c]*j, np.array([1])) + res = lag.lagpow(c, j) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + +class TestEvaluation: + # coefficients of 1 + 2*x + 3*x**2 + c1d = np.array([9., -14., 6.]) + c2d = np.einsum('i,j->ij', c1d, c1d) + c3d = np.einsum('i,j,k->ijk', c1d, c1d, c1d) + + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + y = polyval(x, [1., 2., 3.]) + + def test_lagval(self): + #check empty input + assert_equal(lag.lagval([], [1]).size, 0) + + #check normal input) + x = np.linspace(-1, 1) + y = [polyval(x, c) for c in Llist] + for i in range(7): + msg = f"At i={i}" + tgt = y[i] + res = lag.lagval(x, [0]*i + [1]) + assert_almost_equal(res, tgt, err_msg=msg) + + #check that shape is preserved + for i in range(3): + dims = [2]*i + x = np.zeros(dims) + assert_equal(lag.lagval(x, [1]).shape, dims) + assert_equal(lag.lagval(x, [1, 0]).shape, dims) + assert_equal(lag.lagval(x, [1, 0, 0]).shape, dims) + + def test_lagval2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, lag.lagval2d, x1, x2[:2], self.c2d) + + #test values + tgt = y1*y2 + res = lag.lagval2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = lag.lagval2d(z, z, self.c2d) + assert_(res.shape == (2, 3)) + + def test_lagval3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, lag.lagval3d, x1, x2, x3[:2], self.c3d) + + #test values + tgt = y1*y2*y3 + res = lag.lagval3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = lag.lagval3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)) + + def test_laggrid2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j->ij', y1, y2) + res = lag.laggrid2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = lag.laggrid2d(z, z, self.c2d) + assert_(res.shape == (2, 3)*2) + + def test_laggrid3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j,k->ijk', y1, y2, y3) + res = lag.laggrid3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = lag.laggrid3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)*3) + + +class TestIntegral: + + def test_lagint(self): + # check exceptions + assert_raises(TypeError, lag.lagint, [0], .5) + assert_raises(ValueError, lag.lagint, [0], -1) + assert_raises(ValueError, lag.lagint, [0], 1, [0, 0]) + assert_raises(ValueError, lag.lagint, [0], lbnd=[0]) + assert_raises(ValueError, lag.lagint, [0], scl=[0]) + assert_raises(TypeError, lag.lagint, [0], axis=.5) + + # test integration of zero polynomial + for i in range(2, 5): + k = [0]*(i - 2) + [1] + res = lag.lagint([0], m=i, k=k) + assert_almost_equal(res, [1, -1]) + + # check single integration with integration constant + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [1/scl] + lagpol = lag.poly2lag(pol) + lagint = lag.lagint(lagpol, m=1, k=[i]) + res = lag.lag2poly(lagint) + assert_almost_equal(trim(res), trim(tgt)) + + # check single integration with integration constant and lbnd + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + lagpol = lag.poly2lag(pol) + lagint = lag.lagint(lagpol, m=1, k=[i], lbnd=-1) + assert_almost_equal(lag.lagval(-1, lagint), i) + + # check single integration with integration constant and scaling + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [2/scl] + lagpol = lag.poly2lag(pol) + lagint = lag.lagint(lagpol, m=1, k=[i], scl=2) + res = lag.lag2poly(lagint) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with default k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = lag.lagint(tgt, m=1) + res = lag.lagint(pol, m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with defined k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = lag.lagint(tgt, m=1, k=[k]) + res = lag.lagint(pol, m=j, k=list(range(j))) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with lbnd + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = lag.lagint(tgt, m=1, k=[k], lbnd=-1) + res = lag.lagint(pol, m=j, k=list(range(j)), lbnd=-1) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with scaling + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = lag.lagint(tgt, m=1, k=[k], scl=2) + res = lag.lagint(pol, m=j, k=list(range(j)), scl=2) + assert_almost_equal(trim(res), trim(tgt)) + + def test_lagint_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([lag.lagint(c) for c in c2d.T]).T + res = lag.lagint(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([lag.lagint(c) for c in c2d]) + res = lag.lagint(c2d, axis=1) + assert_almost_equal(res, tgt) + + tgt = np.vstack([lag.lagint(c, k=3) for c in c2d]) + res = lag.lagint(c2d, k=3, axis=1) + assert_almost_equal(res, tgt) + + +class TestDerivative: + + def test_lagder(self): + # check exceptions + assert_raises(TypeError, lag.lagder, [0], .5) + assert_raises(ValueError, lag.lagder, [0], -1) + + # check that zeroth derivative does nothing + for i in range(5): + tgt = [0]*i + [1] + res = lag.lagder(tgt, m=0) + assert_equal(trim(res), trim(tgt)) + + # check that derivation is the inverse of integration + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = lag.lagder(lag.lagint(tgt, m=j), m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check derivation with scaling + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = lag.lagder(lag.lagint(tgt, m=j, scl=2), m=j, scl=.5) + assert_almost_equal(trim(res), trim(tgt)) + + def test_lagder_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([lag.lagder(c) for c in c2d.T]).T + res = lag.lagder(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([lag.lagder(c) for c in c2d]) + res = lag.lagder(c2d, axis=1) + assert_almost_equal(res, tgt) + + +class TestVander: + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + + def test_lagvander(self): + # check for 1d x + x = np.arange(3) + v = lag.lagvander(x, 3) + assert_(v.shape == (3, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], lag.lagval(x, coef)) + + # check for 2d x + x = np.array([[1, 2], [3, 4], [5, 6]]) + v = lag.lagvander(x, 3) + assert_(v.shape == (3, 2, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], lag.lagval(x, coef)) + + def test_lagvander2d(self): + # also tests lagval2d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3)) + van = lag.lagvander2d(x1, x2, [1, 2]) + tgt = lag.lagval2d(x1, x2, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = lag.lagvander2d([x1], [x2], [1, 2]) + assert_(van.shape == (1, 5, 6)) + + def test_lagvander3d(self): + # also tests lagval3d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3, 4)) + van = lag.lagvander3d(x1, x2, x3, [1, 2, 3]) + tgt = lag.lagval3d(x1, x2, x3, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = lag.lagvander3d([x1], [x2], [x3], [1, 2, 3]) + assert_(van.shape == (1, 5, 24)) + + +class TestFitting: + + def test_lagfit(self): + def f(x): + return x*(x - 1)*(x - 2) + + # Test exceptions + assert_raises(ValueError, lag.lagfit, [1], [1], -1) + assert_raises(TypeError, lag.lagfit, [[1]], [1], 0) + assert_raises(TypeError, lag.lagfit, [], [1], 0) + assert_raises(TypeError, lag.lagfit, [1], [[[1]]], 0) + assert_raises(TypeError, lag.lagfit, [1, 2], [1], 0) + assert_raises(TypeError, lag.lagfit, [1], [1, 2], 0) + assert_raises(TypeError, lag.lagfit, [1], [1], 0, w=[[1]]) + assert_raises(TypeError, lag.lagfit, [1], [1], 0, w=[1, 1]) + assert_raises(ValueError, lag.lagfit, [1], [1], [-1,]) + assert_raises(ValueError, lag.lagfit, [1], [1], [2, -1, 6]) + assert_raises(TypeError, lag.lagfit, [1], [1], []) + + # Test fit + x = np.linspace(0, 2) + y = f(x) + # + coef3 = lag.lagfit(x, y, 3) + assert_equal(len(coef3), 4) + assert_almost_equal(lag.lagval(x, coef3), y) + coef3 = lag.lagfit(x, y, [0, 1, 2, 3]) + assert_equal(len(coef3), 4) + assert_almost_equal(lag.lagval(x, coef3), y) + # + coef4 = lag.lagfit(x, y, 4) + assert_equal(len(coef4), 5) + assert_almost_equal(lag.lagval(x, coef4), y) + coef4 = lag.lagfit(x, y, [0, 1, 2, 3, 4]) + assert_equal(len(coef4), 5) + assert_almost_equal(lag.lagval(x, coef4), y) + # + coef2d = lag.lagfit(x, np.array([y, y]).T, 3) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + coef2d = lag.lagfit(x, np.array([y, y]).T, [0, 1, 2, 3]) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + # test weighting + w = np.zeros_like(x) + yw = y.copy() + w[1::2] = 1 + y[0::2] = 0 + wcoef3 = lag.lagfit(x, yw, 3, w=w) + assert_almost_equal(wcoef3, coef3) + wcoef3 = lag.lagfit(x, yw, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef3, coef3) + # + wcoef2d = lag.lagfit(x, np.array([yw, yw]).T, 3, w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + wcoef2d = lag.lagfit(x, np.array([yw, yw]).T, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + # test scaling with complex values x points whose square + # is zero when summed. + x = [1, 1j, -1, -1j] + assert_almost_equal(lag.lagfit(x, x, 1), [1, -1]) + assert_almost_equal(lag.lagfit(x, x, [0, 1]), [1, -1]) + + +class TestCompanion: + + def test_raises(self): + assert_raises(ValueError, lag.lagcompanion, []) + assert_raises(ValueError, lag.lagcompanion, [1]) + + def test_dimensions(self): + for i in range(1, 5): + coef = [0]*i + [1] + assert_(lag.lagcompanion(coef).shape == (i, i)) + + def test_linear_root(self): + assert_(lag.lagcompanion([1, 2])[0, 0] == 1.5) + + +class TestGauss: + + def test_100(self): + x, w = lag.laggauss(100) + + # test orthogonality. Note that the results need to be normalized, + # otherwise the huge values that can arise from fast growing + # functions like Laguerre can be very confusing. + v = lag.lagvander(x, 99) + vv = np.dot(v.T * w, v) + vd = 1/np.sqrt(vv.diagonal()) + vv = vd[:, None] * vv * vd + assert_almost_equal(vv, np.eye(100)) + + # check that the integral of 1 is correct + tgt = 1.0 + assert_almost_equal(w.sum(), tgt) + + +class TestMisc: + + def test_lagfromroots(self): + res = lag.lagfromroots([]) + assert_almost_equal(trim(res), [1]) + for i in range(1, 5): + roots = np.cos(np.linspace(-np.pi, 0, 2*i + 1)[1::2]) + pol = lag.lagfromroots(roots) + res = lag.lagval(roots, pol) + tgt = 0 + assert_(len(pol) == i + 1) + assert_almost_equal(lag.lag2poly(pol)[-1], 1) + assert_almost_equal(res, tgt) + + def test_lagroots(self): + assert_almost_equal(lag.lagroots([1]), []) + assert_almost_equal(lag.lagroots([0, 1]), [1]) + for i in range(2, 5): + tgt = np.linspace(0, 3, i) + res = lag.lagroots(lag.lagfromroots(tgt)) + assert_almost_equal(trim(res), trim(tgt)) + + def test_lagtrim(self): + coef = [2, -1, 1, 0] + + # Test exceptions + assert_raises(ValueError, lag.lagtrim, coef, -1) + + # Test results + assert_equal(lag.lagtrim(coef), coef[:-1]) + assert_equal(lag.lagtrim(coef, 1), coef[:-3]) + assert_equal(lag.lagtrim(coef, 2), [0]) + + def test_lagline(self): + assert_equal(lag.lagline(3, 4), [7, -4]) + + def test_lag2poly(self): + for i in range(7): + assert_almost_equal(lag.lag2poly([0]*i + [1]), Llist[i]) + + def test_poly2lag(self): + for i in range(7): + assert_almost_equal(lag.poly2lag(Llist[i]), [0]*i + [1]) + + def test_weight(self): + x = np.linspace(0, 10, 11) + tgt = np.exp(-x) + res = lag.lagweight(x) + assert_almost_equal(res, tgt) diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_legendre.py b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_legendre.py new file mode 100644 index 0000000..92399c1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_legendre.py @@ -0,0 +1,568 @@ +"""Tests for legendre module. + +""" +from functools import reduce + +import numpy as np +import numpy.polynomial.legendre as leg +from numpy.polynomial.polynomial import polyval +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) + +L0 = np.array([1]) +L1 = np.array([0, 1]) +L2 = np.array([-1, 0, 3])/2 +L3 = np.array([0, -3, 0, 5])/2 +L4 = np.array([3, 0, -30, 0, 35])/8 +L5 = np.array([0, 15, 0, -70, 0, 63])/8 +L6 = np.array([-5, 0, 105, 0, -315, 0, 231])/16 +L7 = np.array([0, -35, 0, 315, 0, -693, 0, 429])/16 +L8 = np.array([35, 0, -1260, 0, 6930, 0, -12012, 0, 6435])/128 +L9 = np.array([0, 315, 0, -4620, 0, 18018, 0, -25740, 0, 12155])/128 + +Llist = [L0, L1, L2, L3, L4, L5, L6, L7, L8, L9] + + +def trim(x): + return leg.legtrim(x, tol=1e-6) + + +class TestConstants: + + def test_legdomain(self): + assert_equal(leg.legdomain, [-1, 1]) + + def test_legzero(self): + assert_equal(leg.legzero, [0]) + + def test_legone(self): + assert_equal(leg.legone, [1]) + + def test_legx(self): + assert_equal(leg.legx, [0, 1]) + + +class TestArithmetic: + x = np.linspace(-1, 1, 100) + + def test_legadd(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] += 1 + res = leg.legadd([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_legsub(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] -= 1 + res = leg.legsub([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_legmulx(self): + assert_equal(leg.legmulx([0]), [0]) + assert_equal(leg.legmulx([1]), [0, 1]) + for i in range(1, 5): + tmp = 2*i + 1 + ser = [0]*i + [1] + tgt = [0]*(i - 1) + [i/tmp, 0, (i + 1)/tmp] + assert_equal(leg.legmulx(ser), tgt) + + def test_legmul(self): + # check values of result + for i in range(5): + pol1 = [0]*i + [1] + val1 = leg.legval(self.x, pol1) + for j in range(5): + msg = f"At i={i}, j={j}" + pol2 = [0]*j + [1] + val2 = leg.legval(self.x, pol2) + pol3 = leg.legmul(pol1, pol2) + val3 = leg.legval(self.x, pol3) + assert_(len(pol3) == i + j + 1, msg) + assert_almost_equal(val3, val1*val2, err_msg=msg) + + def test_legdiv(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + ci = [0]*i + [1] + cj = [0]*j + [1] + tgt = leg.legadd(ci, cj) + quo, rem = leg.legdiv(tgt, ci) + res = leg.legadd(leg.legmul(quo, ci), rem) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_legpow(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + c = np.arange(i + 1) + tgt = reduce(leg.legmul, [c]*j, np.array([1])) + res = leg.legpow(c, j) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + +class TestEvaluation: + # coefficients of 1 + 2*x + 3*x**2 + c1d = np.array([2., 2., 2.]) + c2d = np.einsum('i,j->ij', c1d, c1d) + c3d = np.einsum('i,j,k->ijk', c1d, c1d, c1d) + + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + y = polyval(x, [1., 2., 3.]) + + def test_legval(self): + #check empty input + assert_equal(leg.legval([], [1]).size, 0) + + #check normal input) + x = np.linspace(-1, 1) + y = [polyval(x, c) for c in Llist] + for i in range(10): + msg = f"At i={i}" + tgt = y[i] + res = leg.legval(x, [0]*i + [1]) + assert_almost_equal(res, tgt, err_msg=msg) + + #check that shape is preserved + for i in range(3): + dims = [2]*i + x = np.zeros(dims) + assert_equal(leg.legval(x, [1]).shape, dims) + assert_equal(leg.legval(x, [1, 0]).shape, dims) + assert_equal(leg.legval(x, [1, 0, 0]).shape, dims) + + def test_legval2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, leg.legval2d, x1, x2[:2], self.c2d) + + #test values + tgt = y1*y2 + res = leg.legval2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = leg.legval2d(z, z, self.c2d) + assert_(res.shape == (2, 3)) + + def test_legval3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, leg.legval3d, x1, x2, x3[:2], self.c3d) + + #test values + tgt = y1*y2*y3 + res = leg.legval3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = leg.legval3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)) + + def test_leggrid2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j->ij', y1, y2) + res = leg.leggrid2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = leg.leggrid2d(z, z, self.c2d) + assert_(res.shape == (2, 3)*2) + + def test_leggrid3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j,k->ijk', y1, y2, y3) + res = leg.leggrid3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = leg.leggrid3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)*3) + + +class TestIntegral: + + def test_legint(self): + # check exceptions + assert_raises(TypeError, leg.legint, [0], .5) + assert_raises(ValueError, leg.legint, [0], -1) + assert_raises(ValueError, leg.legint, [0], 1, [0, 0]) + assert_raises(ValueError, leg.legint, [0], lbnd=[0]) + assert_raises(ValueError, leg.legint, [0], scl=[0]) + assert_raises(TypeError, leg.legint, [0], axis=.5) + + # test integration of zero polynomial + for i in range(2, 5): + k = [0]*(i - 2) + [1] + res = leg.legint([0], m=i, k=k) + assert_almost_equal(res, [0, 1]) + + # check single integration with integration constant + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [1/scl] + legpol = leg.poly2leg(pol) + legint = leg.legint(legpol, m=1, k=[i]) + res = leg.leg2poly(legint) + assert_almost_equal(trim(res), trim(tgt)) + + # check single integration with integration constant and lbnd + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + legpol = leg.poly2leg(pol) + legint = leg.legint(legpol, m=1, k=[i], lbnd=-1) + assert_almost_equal(leg.legval(-1, legint), i) + + # check single integration with integration constant and scaling + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [2/scl] + legpol = leg.poly2leg(pol) + legint = leg.legint(legpol, m=1, k=[i], scl=2) + res = leg.leg2poly(legint) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with default k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = leg.legint(tgt, m=1) + res = leg.legint(pol, m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with defined k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = leg.legint(tgt, m=1, k=[k]) + res = leg.legint(pol, m=j, k=list(range(j))) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with lbnd + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = leg.legint(tgt, m=1, k=[k], lbnd=-1) + res = leg.legint(pol, m=j, k=list(range(j)), lbnd=-1) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with scaling + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = leg.legint(tgt, m=1, k=[k], scl=2) + res = leg.legint(pol, m=j, k=list(range(j)), scl=2) + assert_almost_equal(trim(res), trim(tgt)) + + def test_legint_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([leg.legint(c) for c in c2d.T]).T + res = leg.legint(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([leg.legint(c) for c in c2d]) + res = leg.legint(c2d, axis=1) + assert_almost_equal(res, tgt) + + tgt = np.vstack([leg.legint(c, k=3) for c in c2d]) + res = leg.legint(c2d, k=3, axis=1) + assert_almost_equal(res, tgt) + + def test_legint_zerointord(self): + assert_equal(leg.legint((1, 2, 3), 0), (1, 2, 3)) + + +class TestDerivative: + + def test_legder(self): + # check exceptions + assert_raises(TypeError, leg.legder, [0], .5) + assert_raises(ValueError, leg.legder, [0], -1) + + # check that zeroth derivative does nothing + for i in range(5): + tgt = [0]*i + [1] + res = leg.legder(tgt, m=0) + assert_equal(trim(res), trim(tgt)) + + # check that derivation is the inverse of integration + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = leg.legder(leg.legint(tgt, m=j), m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check derivation with scaling + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = leg.legder(leg.legint(tgt, m=j, scl=2), m=j, scl=.5) + assert_almost_equal(trim(res), trim(tgt)) + + def test_legder_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([leg.legder(c) for c in c2d.T]).T + res = leg.legder(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([leg.legder(c) for c in c2d]) + res = leg.legder(c2d, axis=1) + assert_almost_equal(res, tgt) + + def test_legder_orderhigherthancoeff(self): + c = (1, 2, 3, 4) + assert_equal(leg.legder(c, 4), [0]) + +class TestVander: + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + + def test_legvander(self): + # check for 1d x + x = np.arange(3) + v = leg.legvander(x, 3) + assert_(v.shape == (3, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], leg.legval(x, coef)) + + # check for 2d x + x = np.array([[1, 2], [3, 4], [5, 6]]) + v = leg.legvander(x, 3) + assert_(v.shape == (3, 2, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], leg.legval(x, coef)) + + def test_legvander2d(self): + # also tests polyval2d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3)) + van = leg.legvander2d(x1, x2, [1, 2]) + tgt = leg.legval2d(x1, x2, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = leg.legvander2d([x1], [x2], [1, 2]) + assert_(van.shape == (1, 5, 6)) + + def test_legvander3d(self): + # also tests polyval3d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3, 4)) + van = leg.legvander3d(x1, x2, x3, [1, 2, 3]) + tgt = leg.legval3d(x1, x2, x3, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = leg.legvander3d([x1], [x2], [x3], [1, 2, 3]) + assert_(van.shape == (1, 5, 24)) + + def test_legvander_negdeg(self): + assert_raises(ValueError, leg.legvander, (1, 2, 3), -1) + + +class TestFitting: + + def test_legfit(self): + def f(x): + return x*(x - 1)*(x - 2) + + def f2(x): + return x**4 + x**2 + 1 + + # Test exceptions + assert_raises(ValueError, leg.legfit, [1], [1], -1) + assert_raises(TypeError, leg.legfit, [[1]], [1], 0) + assert_raises(TypeError, leg.legfit, [], [1], 0) + assert_raises(TypeError, leg.legfit, [1], [[[1]]], 0) + assert_raises(TypeError, leg.legfit, [1, 2], [1], 0) + assert_raises(TypeError, leg.legfit, [1], [1, 2], 0) + assert_raises(TypeError, leg.legfit, [1], [1], 0, w=[[1]]) + assert_raises(TypeError, leg.legfit, [1], [1], 0, w=[1, 1]) + assert_raises(ValueError, leg.legfit, [1], [1], [-1,]) + assert_raises(ValueError, leg.legfit, [1], [1], [2, -1, 6]) + assert_raises(TypeError, leg.legfit, [1], [1], []) + + # Test fit + x = np.linspace(0, 2) + y = f(x) + # + coef3 = leg.legfit(x, y, 3) + assert_equal(len(coef3), 4) + assert_almost_equal(leg.legval(x, coef3), y) + coef3 = leg.legfit(x, y, [0, 1, 2, 3]) + assert_equal(len(coef3), 4) + assert_almost_equal(leg.legval(x, coef3), y) + # + coef4 = leg.legfit(x, y, 4) + assert_equal(len(coef4), 5) + assert_almost_equal(leg.legval(x, coef4), y) + coef4 = leg.legfit(x, y, [0, 1, 2, 3, 4]) + assert_equal(len(coef4), 5) + assert_almost_equal(leg.legval(x, coef4), y) + # check things still work if deg is not in strict increasing + coef4 = leg.legfit(x, y, [2, 3, 4, 1, 0]) + assert_equal(len(coef4), 5) + assert_almost_equal(leg.legval(x, coef4), y) + # + coef2d = leg.legfit(x, np.array([y, y]).T, 3) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + coef2d = leg.legfit(x, np.array([y, y]).T, [0, 1, 2, 3]) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + # test weighting + w = np.zeros_like(x) + yw = y.copy() + w[1::2] = 1 + y[0::2] = 0 + wcoef3 = leg.legfit(x, yw, 3, w=w) + assert_almost_equal(wcoef3, coef3) + wcoef3 = leg.legfit(x, yw, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef3, coef3) + # + wcoef2d = leg.legfit(x, np.array([yw, yw]).T, 3, w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + wcoef2d = leg.legfit(x, np.array([yw, yw]).T, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + # test scaling with complex values x points whose square + # is zero when summed. + x = [1, 1j, -1, -1j] + assert_almost_equal(leg.legfit(x, x, 1), [0, 1]) + assert_almost_equal(leg.legfit(x, x, [0, 1]), [0, 1]) + # test fitting only even Legendre polynomials + x = np.linspace(-1, 1) + y = f2(x) + coef1 = leg.legfit(x, y, 4) + assert_almost_equal(leg.legval(x, coef1), y) + coef2 = leg.legfit(x, y, [0, 2, 4]) + assert_almost_equal(leg.legval(x, coef2), y) + assert_almost_equal(coef1, coef2) + + +class TestCompanion: + + def test_raises(self): + assert_raises(ValueError, leg.legcompanion, []) + assert_raises(ValueError, leg.legcompanion, [1]) + + def test_dimensions(self): + for i in range(1, 5): + coef = [0]*i + [1] + assert_(leg.legcompanion(coef).shape == (i, i)) + + def test_linear_root(self): + assert_(leg.legcompanion([1, 2])[0, 0] == -.5) + + +class TestGauss: + + def test_100(self): + x, w = leg.leggauss(100) + + # test orthogonality. Note that the results need to be normalized, + # otherwise the huge values that can arise from fast growing + # functions like Laguerre can be very confusing. + v = leg.legvander(x, 99) + vv = np.dot(v.T * w, v) + vd = 1/np.sqrt(vv.diagonal()) + vv = vd[:, None] * vv * vd + assert_almost_equal(vv, np.eye(100)) + + # check that the integral of 1 is correct + tgt = 2.0 + assert_almost_equal(w.sum(), tgt) + + +class TestMisc: + + def test_legfromroots(self): + res = leg.legfromroots([]) + assert_almost_equal(trim(res), [1]) + for i in range(1, 5): + roots = np.cos(np.linspace(-np.pi, 0, 2*i + 1)[1::2]) + pol = leg.legfromroots(roots) + res = leg.legval(roots, pol) + tgt = 0 + assert_(len(pol) == i + 1) + assert_almost_equal(leg.leg2poly(pol)[-1], 1) + assert_almost_equal(res, tgt) + + def test_legroots(self): + assert_almost_equal(leg.legroots([1]), []) + assert_almost_equal(leg.legroots([1, 2]), [-.5]) + for i in range(2, 5): + tgt = np.linspace(-1, 1, i) + res = leg.legroots(leg.legfromroots(tgt)) + assert_almost_equal(trim(res), trim(tgt)) + + def test_legtrim(self): + coef = [2, -1, 1, 0] + + # Test exceptions + assert_raises(ValueError, leg.legtrim, coef, -1) + + # Test results + assert_equal(leg.legtrim(coef), coef[:-1]) + assert_equal(leg.legtrim(coef, 1), coef[:-3]) + assert_equal(leg.legtrim(coef, 2), [0]) + + def test_legline(self): + assert_equal(leg.legline(3, 4), [3, 4]) + + def test_legline_zeroscl(self): + assert_equal(leg.legline(3, 0), [3]) + + def test_leg2poly(self): + for i in range(10): + assert_almost_equal(leg.leg2poly([0]*i + [1]), Llist[i]) + + def test_poly2leg(self): + for i in range(10): + assert_almost_equal(leg.poly2leg(Llist[i]), [0]*i + [1]) + + def test_weight(self): + x = np.linspace(-1, 1, 11) + tgt = 1. + res = leg.legweight(x) + assert_almost_equal(res, tgt) diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_polynomial.py b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_polynomial.py new file mode 100644 index 0000000..a0a09fc --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_polynomial.py @@ -0,0 +1,600 @@ +"""Tests for polynomial module. + +""" +from functools import reduce + +import numpy as np +import numpy.polynomial.polynomial as poly +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + assert_warns, assert_array_equal, assert_raises_regex) + + +def trim(x): + return poly.polytrim(x, tol=1e-6) + +T0 = [1] +T1 = [0, 1] +T2 = [-1, 0, 2] +T3 = [0, -3, 0, 4] +T4 = [1, 0, -8, 0, 8] +T5 = [0, 5, 0, -20, 0, 16] +T6 = [-1, 0, 18, 0, -48, 0, 32] +T7 = [0, -7, 0, 56, 0, -112, 0, 64] +T8 = [1, 0, -32, 0, 160, 0, -256, 0, 128] +T9 = [0, 9, 0, -120, 0, 432, 0, -576, 0, 256] + +Tlist = [T0, T1, T2, T3, T4, T5, T6, T7, T8, T9] + + +class TestConstants: + + def test_polydomain(self): + assert_equal(poly.polydomain, [-1, 1]) + + def test_polyzero(self): + assert_equal(poly.polyzero, [0]) + + def test_polyone(self): + assert_equal(poly.polyone, [1]) + + def test_polyx(self): + assert_equal(poly.polyx, [0, 1]) + + +class TestArithmetic: + + def test_polyadd(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] += 1 + res = poly.polyadd([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_polysub(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] -= 1 + res = poly.polysub([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_polymulx(self): + assert_equal(poly.polymulx([0]), [0]) + assert_equal(poly.polymulx([1]), [0, 1]) + for i in range(1, 5): + ser = [0]*i + [1] + tgt = [0]*(i + 1) + [1] + assert_equal(poly.polymulx(ser), tgt) + + def test_polymul(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + tgt = np.zeros(i + j + 1) + tgt[i + j] += 1 + res = poly.polymul([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_polydiv(self): + # check zero division + assert_raises(ZeroDivisionError, poly.polydiv, [1], [0]) + + # check scalar division + quo, rem = poly.polydiv([2], [2]) + assert_equal((quo, rem), (1, 0)) + quo, rem = poly.polydiv([2, 2], [2]) + assert_equal((quo, rem), ((1, 1), 0)) + + # check rest. + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + ci = [0]*i + [1, 2] + cj = [0]*j + [1, 2] + tgt = poly.polyadd(ci, cj) + quo, rem = poly.polydiv(tgt, ci) + res = poly.polyadd(poly.polymul(quo, ci), rem) + assert_equal(res, tgt, err_msg=msg) + + def test_polypow(self): + for i in range(5): + for j in range(5): + msg = f"At i={i}, j={j}" + c = np.arange(i + 1) + tgt = reduce(poly.polymul, [c]*j, np.array([1])) + res = poly.polypow(c, j) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + +class TestEvaluation: + # coefficients of 1 + 2*x + 3*x**2 + c1d = np.array([1., 2., 3.]) + c2d = np.einsum('i,j->ij', c1d, c1d) + c3d = np.einsum('i,j,k->ijk', c1d, c1d, c1d) + + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + y = poly.polyval(x, [1., 2., 3.]) + + def test_polyval(self): + #check empty input + assert_equal(poly.polyval([], [1]).size, 0) + + #check normal input) + x = np.linspace(-1, 1) + y = [x**i for i in range(5)] + for i in range(5): + tgt = y[i] + res = poly.polyval(x, [0]*i + [1]) + assert_almost_equal(res, tgt) + tgt = x*(x**2 - 1) + res = poly.polyval(x, [0, -1, 0, 1]) + assert_almost_equal(res, tgt) + + #check that shape is preserved + for i in range(3): + dims = [2]*i + x = np.zeros(dims) + assert_equal(poly.polyval(x, [1]).shape, dims) + assert_equal(poly.polyval(x, [1, 0]).shape, dims) + assert_equal(poly.polyval(x, [1, 0, 0]).shape, dims) + + #check masked arrays are processed correctly + mask = [False, True, False] + mx = np.ma.array([1, 2, 3], mask=mask) + res = np.polyval([7, 5, 3], mx) + assert_array_equal(res.mask, mask) + + #check subtypes of ndarray are preserved + class C(np.ndarray): + pass + + cx = np.array([1, 2, 3]).view(C) + assert_equal(type(np.polyval([2, 3, 4], cx)), C) + + def test_polyvalfromroots(self): + # check exception for broadcasting x values over root array with + # too few dimensions + assert_raises(ValueError, poly.polyvalfromroots, + [1], [1], tensor=False) + + # check empty input + assert_equal(poly.polyvalfromroots([], [1]).size, 0) + assert_(poly.polyvalfromroots([], [1]).shape == (0,)) + + # check empty input + multidimensional roots + assert_equal(poly.polyvalfromroots([], [[1] * 5]).size, 0) + assert_(poly.polyvalfromroots([], [[1] * 5]).shape == (5, 0)) + + # check scalar input + assert_equal(poly.polyvalfromroots(1, 1), 0) + assert_(poly.polyvalfromroots(1, np.ones((3, 3))).shape == (3,)) + + # check normal input) + x = np.linspace(-1, 1) + y = [x**i for i in range(5)] + for i in range(1, 5): + tgt = y[i] + res = poly.polyvalfromroots(x, [0]*i) + assert_almost_equal(res, tgt) + tgt = x*(x - 1)*(x + 1) + res = poly.polyvalfromroots(x, [-1, 0, 1]) + assert_almost_equal(res, tgt) + + # check that shape is preserved + for i in range(3): + dims = [2]*i + x = np.zeros(dims) + assert_equal(poly.polyvalfromroots(x, [1]).shape, dims) + assert_equal(poly.polyvalfromroots(x, [1, 0]).shape, dims) + assert_equal(poly.polyvalfromroots(x, [1, 0, 0]).shape, dims) + + # check compatibility with factorization + ptest = [15, 2, -16, -2, 1] + r = poly.polyroots(ptest) + x = np.linspace(-1, 1) + assert_almost_equal(poly.polyval(x, ptest), + poly.polyvalfromroots(x, r)) + + # check multidimensional arrays of roots and values + # check tensor=False + rshape = (3, 5) + x = np.arange(-3, 2) + r = np.random.randint(-5, 5, size=rshape) + res = poly.polyvalfromroots(x, r, tensor=False) + tgt = np.empty(r.shape[1:]) + for ii in range(tgt.size): + tgt[ii] = poly.polyvalfromroots(x[ii], r[:, ii]) + assert_equal(res, tgt) + + # check tensor=True + x = np.vstack([x, 2*x]) + res = poly.polyvalfromroots(x, r, tensor=True) + tgt = np.empty(r.shape[1:] + x.shape) + for ii in range(r.shape[1]): + for jj in range(x.shape[0]): + tgt[ii, jj, :] = poly.polyvalfromroots(x[jj], r[:, ii]) + assert_equal(res, tgt) + + def test_polyval2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises_regex(ValueError, 'incompatible', + poly.polyval2d, x1, x2[:2], self.c2d) + + #test values + tgt = y1*y2 + res = poly.polyval2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = poly.polyval2d(z, z, self.c2d) + assert_(res.shape == (2, 3)) + + def test_polyval3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises_regex(ValueError, 'incompatible', + poly.polyval3d, x1, x2, x3[:2], self.c3d) + + #test values + tgt = y1*y2*y3 + res = poly.polyval3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = poly.polyval3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)) + + def test_polygrid2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j->ij', y1, y2) + res = poly.polygrid2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = poly.polygrid2d(z, z, self.c2d) + assert_(res.shape == (2, 3)*2) + + def test_polygrid3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j,k->ijk', y1, y2, y3) + res = poly.polygrid3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = poly.polygrid3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)*3) + + +class TestIntegral: + + def test_polyint(self): + # check exceptions + assert_raises(TypeError, poly.polyint, [0], .5) + assert_raises(ValueError, poly.polyint, [0], -1) + assert_raises(ValueError, poly.polyint, [0], 1, [0, 0]) + assert_raises(ValueError, poly.polyint, [0], lbnd=[0]) + assert_raises(ValueError, poly.polyint, [0], scl=[0]) + assert_raises(TypeError, poly.polyint, [0], axis=.5) + with assert_warns(DeprecationWarning): + poly.polyint([1, 1], 1.) + + # test integration of zero polynomial + for i in range(2, 5): + k = [0]*(i - 2) + [1] + res = poly.polyint([0], m=i, k=k) + assert_almost_equal(res, [0, 1]) + + # check single integration with integration constant + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [1/scl] + res = poly.polyint(pol, m=1, k=[i]) + assert_almost_equal(trim(res), trim(tgt)) + + # check single integration with integration constant and lbnd + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + res = poly.polyint(pol, m=1, k=[i], lbnd=-1) + assert_almost_equal(poly.polyval(-1, res), i) + + # check single integration with integration constant and scaling + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [2/scl] + res = poly.polyint(pol, m=1, k=[i], scl=2) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with default k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = poly.polyint(tgt, m=1) + res = poly.polyint(pol, m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with defined k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = poly.polyint(tgt, m=1, k=[k]) + res = poly.polyint(pol, m=j, k=list(range(j))) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with lbnd + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = poly.polyint(tgt, m=1, k=[k], lbnd=-1) + res = poly.polyint(pol, m=j, k=list(range(j)), lbnd=-1) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with scaling + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = poly.polyint(tgt, m=1, k=[k], scl=2) + res = poly.polyint(pol, m=j, k=list(range(j)), scl=2) + assert_almost_equal(trim(res), trim(tgt)) + + def test_polyint_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([poly.polyint(c) for c in c2d.T]).T + res = poly.polyint(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([poly.polyint(c) for c in c2d]) + res = poly.polyint(c2d, axis=1) + assert_almost_equal(res, tgt) + + tgt = np.vstack([poly.polyint(c, k=3) for c in c2d]) + res = poly.polyint(c2d, k=3, axis=1) + assert_almost_equal(res, tgt) + + +class TestDerivative: + + def test_polyder(self): + # check exceptions + assert_raises(TypeError, poly.polyder, [0], .5) + assert_raises(ValueError, poly.polyder, [0], -1) + + # check that zeroth derivative does nothing + for i in range(5): + tgt = [0]*i + [1] + res = poly.polyder(tgt, m=0) + assert_equal(trim(res), trim(tgt)) + + # check that derivation is the inverse of integration + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = poly.polyder(poly.polyint(tgt, m=j), m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check derivation with scaling + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = poly.polyder(poly.polyint(tgt, m=j, scl=2), m=j, scl=.5) + assert_almost_equal(trim(res), trim(tgt)) + + def test_polyder_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([poly.polyder(c) for c in c2d.T]).T + res = poly.polyder(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([poly.polyder(c) for c in c2d]) + res = poly.polyder(c2d, axis=1) + assert_almost_equal(res, tgt) + + +class TestVander: + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + + def test_polyvander(self): + # check for 1d x + x = np.arange(3) + v = poly.polyvander(x, 3) + assert_(v.shape == (3, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], poly.polyval(x, coef)) + + # check for 2d x + x = np.array([[1, 2], [3, 4], [5, 6]]) + v = poly.polyvander(x, 3) + assert_(v.shape == (3, 2, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], poly.polyval(x, coef)) + + def test_polyvander2d(self): + # also tests polyval2d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3)) + van = poly.polyvander2d(x1, x2, [1, 2]) + tgt = poly.polyval2d(x1, x2, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = poly.polyvander2d([x1], [x2], [1, 2]) + assert_(van.shape == (1, 5, 6)) + + def test_polyvander3d(self): + # also tests polyval3d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3, 4)) + van = poly.polyvander3d(x1, x2, x3, [1, 2, 3]) + tgt = poly.polyval3d(x1, x2, x3, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = poly.polyvander3d([x1], [x2], [x3], [1, 2, 3]) + assert_(van.shape == (1, 5, 24)) + + def test_polyvandernegdeg(self): + x = np.arange(3) + assert_raises(ValueError, poly.polyvander, x, -1) + + +class TestCompanion: + + def test_raises(self): + assert_raises(ValueError, poly.polycompanion, []) + assert_raises(ValueError, poly.polycompanion, [1]) + + def test_dimensions(self): + for i in range(1, 5): + coef = [0]*i + [1] + assert_(poly.polycompanion(coef).shape == (i, i)) + + def test_linear_root(self): + assert_(poly.polycompanion([1, 2])[0, 0] == -.5) + + +class TestMisc: + + def test_polyfromroots(self): + res = poly.polyfromroots([]) + assert_almost_equal(trim(res), [1]) + for i in range(1, 5): + roots = np.cos(np.linspace(-np.pi, 0, 2*i + 1)[1::2]) + tgt = Tlist[i] + res = poly.polyfromroots(roots)*2**(i-1) + assert_almost_equal(trim(res), trim(tgt)) + + def test_polyroots(self): + assert_almost_equal(poly.polyroots([1]), []) + assert_almost_equal(poly.polyroots([1, 2]), [-.5]) + for i in range(2, 5): + tgt = np.linspace(-1, 1, i) + res = poly.polyroots(poly.polyfromroots(tgt)) + assert_almost_equal(trim(res), trim(tgt)) + + def test_polyfit(self): + def f(x): + return x*(x - 1)*(x - 2) + + def f2(x): + return x**4 + x**2 + 1 + + # Test exceptions + assert_raises(ValueError, poly.polyfit, [1], [1], -1) + assert_raises(TypeError, poly.polyfit, [[1]], [1], 0) + assert_raises(TypeError, poly.polyfit, [], [1], 0) + assert_raises(TypeError, poly.polyfit, [1], [[[1]]], 0) + assert_raises(TypeError, poly.polyfit, [1, 2], [1], 0) + assert_raises(TypeError, poly.polyfit, [1], [1, 2], 0) + assert_raises(TypeError, poly.polyfit, [1], [1], 0, w=[[1]]) + assert_raises(TypeError, poly.polyfit, [1], [1], 0, w=[1, 1]) + assert_raises(ValueError, poly.polyfit, [1], [1], [-1,]) + assert_raises(ValueError, poly.polyfit, [1], [1], [2, -1, 6]) + assert_raises(TypeError, poly.polyfit, [1], [1], []) + + # Test fit + x = np.linspace(0, 2) + y = f(x) + # + coef3 = poly.polyfit(x, y, 3) + assert_equal(len(coef3), 4) + assert_almost_equal(poly.polyval(x, coef3), y) + coef3 = poly.polyfit(x, y, [0, 1, 2, 3]) + assert_equal(len(coef3), 4) + assert_almost_equal(poly.polyval(x, coef3), y) + # + coef4 = poly.polyfit(x, y, 4) + assert_equal(len(coef4), 5) + assert_almost_equal(poly.polyval(x, coef4), y) + coef4 = poly.polyfit(x, y, [0, 1, 2, 3, 4]) + assert_equal(len(coef4), 5) + assert_almost_equal(poly.polyval(x, coef4), y) + # + coef2d = poly.polyfit(x, np.array([y, y]).T, 3) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + coef2d = poly.polyfit(x, np.array([y, y]).T, [0, 1, 2, 3]) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + # test weighting + w = np.zeros_like(x) + yw = y.copy() + w[1::2] = 1 + yw[0::2] = 0 + wcoef3 = poly.polyfit(x, yw, 3, w=w) + assert_almost_equal(wcoef3, coef3) + wcoef3 = poly.polyfit(x, yw, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef3, coef3) + # + wcoef2d = poly.polyfit(x, np.array([yw, yw]).T, 3, w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + wcoef2d = poly.polyfit(x, np.array([yw, yw]).T, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + # test scaling with complex values x points whose square + # is zero when summed. + x = [1, 1j, -1, -1j] + assert_almost_equal(poly.polyfit(x, x, 1), [0, 1]) + assert_almost_equal(poly.polyfit(x, x, [0, 1]), [0, 1]) + # test fitting only even Polyendre polynomials + x = np.linspace(-1, 1) + y = f2(x) + coef1 = poly.polyfit(x, y, 4) + assert_almost_equal(poly.polyval(x, coef1), y) + coef2 = poly.polyfit(x, y, [0, 2, 4]) + assert_almost_equal(poly.polyval(x, coef2), y) + assert_almost_equal(coef1, coef2) + + def test_polytrim(self): + coef = [2, -1, 1, 0] + + # Test exceptions + assert_raises(ValueError, poly.polytrim, coef, -1) + + # Test results + assert_equal(poly.polytrim(coef), coef[:-1]) + assert_equal(poly.polytrim(coef, 1), coef[:-3]) + assert_equal(poly.polytrim(coef, 2), [0]) + + def test_polyline(self): + assert_equal(poly.polyline(3, 4), [3, 4]) + + def test_polyline_zero(self): + assert_equal(poly.polyline(3, 0), [3]) diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_polyutils.py b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_polyutils.py new file mode 100644 index 0000000..cc63079 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_polyutils.py @@ -0,0 +1,121 @@ +"""Tests for polyutils module. + +""" +import numpy as np +import numpy.polynomial.polyutils as pu +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) + + +class TestMisc: + + def test_trimseq(self): + for i in range(5): + tgt = [1] + res = pu.trimseq([1] + [0]*5) + assert_equal(res, tgt) + + def test_as_series(self): + # check exceptions + assert_raises(ValueError, pu.as_series, [[]]) + assert_raises(ValueError, pu.as_series, [[[1, 2]]]) + assert_raises(ValueError, pu.as_series, [[1], ['a']]) + # check common types + types = ['i', 'd', 'O'] + for i in range(len(types)): + for j in range(i): + ci = np.ones(1, types[i]) + cj = np.ones(1, types[j]) + [resi, resj] = pu.as_series([ci, cj]) + assert_(resi.dtype.char == resj.dtype.char) + assert_(resj.dtype.char == types[i]) + + def test_trimcoef(self): + coef = [2, -1, 1, 0] + # Test exceptions + assert_raises(ValueError, pu.trimcoef, coef, -1) + # Test results + assert_equal(pu.trimcoef(coef), coef[:-1]) + assert_equal(pu.trimcoef(coef, 1), coef[:-3]) + assert_equal(pu.trimcoef(coef, 2), [0]) + + def test_vander_nd_exception(self): + # n_dims != len(points) + assert_raises(ValueError, pu._vander_nd, (), (1, 2, 3), [90]) + # n_dims != len(degrees) + assert_raises(ValueError, pu._vander_nd, (), (), [90.65]) + # n_dims == 0 + assert_raises(ValueError, pu._vander_nd, (), (), []) + + def test_div_zerodiv(self): + # c2[-1] == 0 + assert_raises(ZeroDivisionError, pu._div, pu._div, (1, 2, 3), [0]) + + def test_pow_too_large(self): + # power > maxpower + assert_raises(ValueError, pu._pow, (), [1, 2, 3], 5, 4) + +class TestDomain: + + def test_getdomain(self): + # test for real values + x = [1, 10, 3, -1] + tgt = [-1, 10] + res = pu.getdomain(x) + assert_almost_equal(res, tgt) + + # test for complex values + x = [1 + 1j, 1 - 1j, 0, 2] + tgt = [-1j, 2 + 1j] + res = pu.getdomain(x) + assert_almost_equal(res, tgt) + + def test_mapdomain(self): + # test for real values + dom1 = [0, 4] + dom2 = [1, 3] + tgt = dom2 + res = pu.mapdomain(dom1, dom1, dom2) + assert_almost_equal(res, tgt) + + # test for complex values + dom1 = [0 - 1j, 2 + 1j] + dom2 = [-2, 2] + tgt = dom2 + x = dom1 + res = pu.mapdomain(x, dom1, dom2) + assert_almost_equal(res, tgt) + + # test for multidimensional arrays + dom1 = [0, 4] + dom2 = [1, 3] + tgt = np.array([dom2, dom2]) + x = np.array([dom1, dom1]) + res = pu.mapdomain(x, dom1, dom2) + assert_almost_equal(res, tgt) + + # test that subtypes are preserved. + class MyNDArray(np.ndarray): + pass + + dom1 = [0, 4] + dom2 = [1, 3] + x = np.array([dom1, dom1]).view(MyNDArray) + res = pu.mapdomain(x, dom1, dom2) + assert_(isinstance(res, MyNDArray)) + + def test_mapparms(self): + # test for real values + dom1 = [0, 4] + dom2 = [1, 3] + tgt = [1, .5] + res = pu. mapparms(dom1, dom2) + assert_almost_equal(res, tgt) + + # test for complex values + dom1 = [0 - 1j, 2 + 1j] + dom2 = [-2, 2] + tgt = [-1 + 1j, 1 - 1j] + res = pu.mapparms(dom1, dom2) + assert_almost_equal(res, tgt) diff --git a/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_printing.py b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_printing.py new file mode 100644 index 0000000..4e9902a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/polynomial/tests/test_printing.py @@ -0,0 +1,390 @@ +import pytest +from numpy.core import array, arange, printoptions +import numpy.polynomial as poly +from numpy.testing import assert_equal, assert_ + +# For testing polynomial printing with object arrays +from fractions import Fraction +from decimal import Decimal + + +class TestStrUnicodeSuperSubscripts: + + @pytest.fixture(scope='class', autouse=True) + def use_unicode(self): + poly.set_default_printstyle('unicode') + + @pytest.mark.parametrize(('inp', 'tgt'), ( + ([1, 2, 3], "1.0 + 2.0·x¹ + 3.0·x²"), + ([-1, 0, 3, -1], "-1.0 + 0.0·x¹ + 3.0·x² - 1.0·x³"), + (arange(12), ("0.0 + 1.0·x¹ + 2.0·x² + 3.0·x³ + 4.0·x⁴ + 5.0·x⁵ + " + "6.0·x⁶ + 7.0·x⁷ +\n8.0·x⁸ + 9.0·x⁹ + 10.0·x¹⁰ + " + "11.0·x¹¹")), + )) + def test_polynomial_str(self, inp, tgt): + res = str(poly.Polynomial(inp)) + assert_equal(res, tgt) + + @pytest.mark.parametrize(('inp', 'tgt'), ( + ([1, 2, 3], "1.0 + 2.0·T₁(x) + 3.0·T₂(x)"), + ([-1, 0, 3, -1], "-1.0 + 0.0·T₁(x) + 3.0·T₂(x) - 1.0·T₃(x)"), + (arange(12), ("0.0 + 1.0·T₁(x) + 2.0·T₂(x) + 3.0·T₃(x) + 4.0·T₄(x) + " + "5.0·T₅(x) +\n6.0·T₆(x) + 7.0·T₇(x) + 8.0·T₈(x) + " + "9.0·T₉(x) + 10.0·T₁₀(x) + 11.0·T₁₁(x)")), + )) + def test_chebyshev_str(self, inp, tgt): + res = str(poly.Chebyshev(inp)) + assert_equal(res, tgt) + + @pytest.mark.parametrize(('inp', 'tgt'), ( + ([1, 2, 3], "1.0 + 2.0·P₁(x) + 3.0·P₂(x)"), + ([-1, 0, 3, -1], "-1.0 + 0.0·P₁(x) + 3.0·P₂(x) - 1.0·P₃(x)"), + (arange(12), ("0.0 + 1.0·P₁(x) + 2.0·P₂(x) + 3.0·P₃(x) + 4.0·P₄(x) + " + "5.0·P₅(x) +\n6.0·P₆(x) + 7.0·P₇(x) + 8.0·P₈(x) + " + "9.0·P₉(x) + 10.0·P₁₀(x) + 11.0·P₁₁(x)")), + )) + def test_legendre_str(self, inp, tgt): + res = str(poly.Legendre(inp)) + assert_equal(res, tgt) + + @pytest.mark.parametrize(('inp', 'tgt'), ( + ([1, 2, 3], "1.0 + 2.0·H₁(x) + 3.0·H₂(x)"), + ([-1, 0, 3, -1], "-1.0 + 0.0·H₁(x) + 3.0·H₂(x) - 1.0·H₃(x)"), + (arange(12), ("0.0 + 1.0·H₁(x) + 2.0·H₂(x) + 3.0·H₃(x) + 4.0·H₄(x) + " + "5.0·H₅(x) +\n6.0·H₆(x) + 7.0·H₇(x) + 8.0·H₈(x) + " + "9.0·H₉(x) + 10.0·H₁₀(x) + 11.0·H₁₁(x)")), + )) + def test_hermite_str(self, inp, tgt): + res = str(poly.Hermite(inp)) + assert_equal(res, tgt) + + @pytest.mark.parametrize(('inp', 'tgt'), ( + ([1, 2, 3], "1.0 + 2.0·He₁(x) + 3.0·He₂(x)"), + ([-1, 0, 3, -1], "-1.0 + 0.0·He₁(x) + 3.0·He₂(x) - 1.0·He₃(x)"), + (arange(12), ("0.0 + 1.0·He₁(x) + 2.0·He₂(x) + 3.0·He₃(x) + " + "4.0·He₄(x) + 5.0·He₅(x) +\n6.0·He₆(x) + 7.0·He₇(x) + " + "8.0·He₈(x) + 9.0·He₉(x) + 10.0·He₁₀(x) +\n" + "11.0·He₁₁(x)")), + )) + def test_hermiteE_str(self, inp, tgt): + res = str(poly.HermiteE(inp)) + assert_equal(res, tgt) + + @pytest.mark.parametrize(('inp', 'tgt'), ( + ([1, 2, 3], "1.0 + 2.0·L₁(x) + 3.0·L₂(x)"), + ([-1, 0, 3, -1], "-1.0 + 0.0·L₁(x) + 3.0·L₂(x) - 1.0·L₃(x)"), + (arange(12), ("0.0 + 1.0·L₁(x) + 2.0·L₂(x) + 3.0·L₃(x) + 4.0·L₄(x) + " + "5.0·L₅(x) +\n6.0·L₆(x) + 7.0·L₇(x) + 8.0·L₈(x) + " + "9.0·L₉(x) + 10.0·L₁₀(x) + 11.0·L₁₁(x)")), + )) + def test_laguerre_str(self, inp, tgt): + res = str(poly.Laguerre(inp)) + assert_equal(res, tgt) + + +class TestStrAscii: + + @pytest.fixture(scope='class', autouse=True) + def use_ascii(self): + poly.set_default_printstyle('ascii') + + @pytest.mark.parametrize(('inp', 'tgt'), ( + ([1, 2, 3], "1.0 + 2.0 x**1 + 3.0 x**2"), + ([-1, 0, 3, -1], "-1.0 + 0.0 x**1 + 3.0 x**2 - 1.0 x**3"), + (arange(12), ("0.0 + 1.0 x**1 + 2.0 x**2 + 3.0 x**3 + 4.0 x**4 + " + "5.0 x**5 + 6.0 x**6 +\n7.0 x**7 + 8.0 x**8 + " + "9.0 x**9 + 10.0 x**10 + 11.0 x**11")), + )) + def test_polynomial_str(self, inp, tgt): + res = str(poly.Polynomial(inp)) + assert_equal(res, tgt) + + @pytest.mark.parametrize(('inp', 'tgt'), ( + ([1, 2, 3], "1.0 + 2.0 T_1(x) + 3.0 T_2(x)"), + ([-1, 0, 3, -1], "-1.0 + 0.0 T_1(x) + 3.0 T_2(x) - 1.0 T_3(x)"), + (arange(12), ("0.0 + 1.0 T_1(x) + 2.0 T_2(x) + 3.0 T_3(x) + " + "4.0 T_4(x) + 5.0 T_5(x) +\n6.0 T_6(x) + 7.0 T_7(x) + " + "8.0 T_8(x) + 9.0 T_9(x) + 10.0 T_10(x) +\n" + "11.0 T_11(x)")), + )) + def test_chebyshev_str(self, inp, tgt): + res = str(poly.Chebyshev(inp)) + assert_equal(res, tgt) + + @pytest.mark.parametrize(('inp', 'tgt'), ( + ([1, 2, 3], "1.0 + 2.0 P_1(x) + 3.0 P_2(x)"), + ([-1, 0, 3, -1], "-1.0 + 0.0 P_1(x) + 3.0 P_2(x) - 1.0 P_3(x)"), + (arange(12), ("0.0 + 1.0 P_1(x) + 2.0 P_2(x) + 3.0 P_3(x) + " + "4.0 P_4(x) + 5.0 P_5(x) +\n6.0 P_6(x) + 7.0 P_7(x) + " + "8.0 P_8(x) + 9.0 P_9(x) + 10.0 P_10(x) +\n" + "11.0 P_11(x)")), + )) + def test_legendre_str(self, inp, tgt): + res = str(poly.Legendre(inp)) + assert_equal(res, tgt) + + @pytest.mark.parametrize(('inp', 'tgt'), ( + ([1, 2, 3], "1.0 + 2.0 H_1(x) + 3.0 H_2(x)"), + ([-1, 0, 3, -1], "-1.0 + 0.0 H_1(x) + 3.0 H_2(x) - 1.0 H_3(x)"), + (arange(12), ("0.0 + 1.0 H_1(x) + 2.0 H_2(x) + 3.0 H_3(x) + " + "4.0 H_4(x) + 5.0 H_5(x) +\n6.0 H_6(x) + 7.0 H_7(x) + " + "8.0 H_8(x) + 9.0 H_9(x) + 10.0 H_10(x) +\n" + "11.0 H_11(x)")), + )) + def test_hermite_str(self, inp, tgt): + res = str(poly.Hermite(inp)) + assert_equal(res, tgt) + + @pytest.mark.parametrize(('inp', 'tgt'), ( + ([1, 2, 3], "1.0 + 2.0 He_1(x) + 3.0 He_2(x)"), + ([-1, 0, 3, -1], "-1.0 + 0.0 He_1(x) + 3.0 He_2(x) - 1.0 He_3(x)"), + (arange(12), ("0.0 + 1.0 He_1(x) + 2.0 He_2(x) + 3.0 He_3(x) + " + "4.0 He_4(x) +\n5.0 He_5(x) + 6.0 He_6(x) + " + "7.0 He_7(x) + 8.0 He_8(x) + 9.0 He_9(x) +\n" + "10.0 He_10(x) + 11.0 He_11(x)")), + )) + def test_hermiteE_str(self, inp, tgt): + res = str(poly.HermiteE(inp)) + assert_equal(res, tgt) + + @pytest.mark.parametrize(('inp', 'tgt'), ( + ([1, 2, 3], "1.0 + 2.0 L_1(x) + 3.0 L_2(x)"), + ([-1, 0, 3, -1], "-1.0 + 0.0 L_1(x) + 3.0 L_2(x) - 1.0 L_3(x)"), + (arange(12), ("0.0 + 1.0 L_1(x) + 2.0 L_2(x) + 3.0 L_3(x) + " + "4.0 L_4(x) + 5.0 L_5(x) +\n6.0 L_6(x) + 7.0 L_7(x) + " + "8.0 L_8(x) + 9.0 L_9(x) + 10.0 L_10(x) +\n" + "11.0 L_11(x)")), + )) + def test_laguerre_str(self, inp, tgt): + res = str(poly.Laguerre(inp)) + assert_equal(res, tgt) + + +class TestLinebreaking: + + @pytest.fixture(scope='class', autouse=True) + def use_ascii(self): + poly.set_default_printstyle('ascii') + + def test_single_line_one_less(self): + # With 'ascii' style, len(str(p)) is default linewidth - 1 (i.e. 74) + p = poly.Polynomial([123456789, 123456789, 123456789, 1234, 1]) + assert_equal(len(str(p)), 74) + assert_equal(str(p), ( + '123456789.0 + 123456789.0 x**1 + 123456789.0 x**2 + ' + '1234.0 x**3 + 1.0 x**4' + )) + + def test_num_chars_is_linewidth(self): + # len(str(p)) == default linewidth == 75 + p = poly.Polynomial([123456789, 123456789, 123456789, 1234, 10]) + assert_equal(len(str(p)), 75) + assert_equal(str(p), ( + '123456789.0 + 123456789.0 x**1 + 123456789.0 x**2 + ' + '1234.0 x**3 +\n10.0 x**4' + )) + + def test_first_linebreak_multiline_one_less_than_linewidth(self): + # Multiline str where len(first_line) + len(next_term) == lw - 1 == 74 + p = poly.Polynomial( + [123456789, 123456789, 123456789, 12, 1, 123456789] + ) + assert_equal(len(str(p).split('\n')[0]), 74) + assert_equal(str(p), ( + '123456789.0 + 123456789.0 x**1 + 123456789.0 x**2 + ' + '12.0 x**3 + 1.0 x**4 +\n123456789.0 x**5' + )) + + def test_first_linebreak_multiline_on_linewidth(self): + # First line is one character longer than previous test + p = poly.Polynomial( + [123456789, 123456789, 123456789, 123, 1, 123456789] + ) + assert_equal(str(p), ( + '123456789.0 + 123456789.0 x**1 + 123456789.0 x**2 + ' + '123.0 x**3 +\n1.0 x**4 + 123456789.0 x**5' + )) + + @pytest.mark.parametrize(('lw', 'tgt'), ( + (75, ('0.0 + 10.0 x**1 + 200.0 x**2 + 3000.0 x**3 + 40000.0 x**4 +\n' + '500000.0 x**5 + 600000.0 x**6 + 70000.0 x**7 + 8000.0 x**8 + ' + '900.0 x**9')), + (45, ('0.0 + 10.0 x**1 + 200.0 x**2 + 3000.0 x**3 +\n40000.0 x**4 + ' + '500000.0 x**5 +\n600000.0 x**6 + 70000.0 x**7 + 8000.0 x**8 +\n' + '900.0 x**9')), + (132, ('0.0 + 10.0 x**1 + 200.0 x**2 + 3000.0 x**3 + 40000.0 x**4 + ' + '500000.0 x**5 + 600000.0 x**6 + 70000.0 x**7 + 8000.0 x**8 + ' + '900.0 x**9')), + )) + def test_linewidth_printoption(self, lw, tgt): + p = poly.Polynomial( + [0, 10, 200, 3000, 40000, 500000, 600000, 70000, 8000, 900] + ) + with printoptions(linewidth=lw): + assert_equal(str(p), tgt) + for line in str(p).split('\n'): + assert_(len(line) < lw) + + +def test_set_default_printoptions(): + p = poly.Polynomial([1, 2, 3]) + c = poly.Chebyshev([1, 2, 3]) + poly.set_default_printstyle('ascii') + assert_equal(str(p), "1.0 + 2.0 x**1 + 3.0 x**2") + assert_equal(str(c), "1.0 + 2.0 T_1(x) + 3.0 T_2(x)") + poly.set_default_printstyle('unicode') + assert_equal(str(p), "1.0 + 2.0·x¹ + 3.0·x²") + assert_equal(str(c), "1.0 + 2.0·T₁(x) + 3.0·T₂(x)") + with pytest.raises(ValueError): + poly.set_default_printstyle('invalid_input') + + +def test_complex_coefficients(): + """Test both numpy and built-in complex.""" + coefs = [0+1j, 1+1j, -2+2j, 3+0j] + # numpy complex + p1 = poly.Polynomial(coefs) + # Python complex + p2 = poly.Polynomial(array(coefs, dtype=object)) + poly.set_default_printstyle('unicode') + assert_equal(str(p1), "1j + (1+1j)·x¹ - (2-2j)·x² + (3+0j)·x³") + assert_equal(str(p2), "1j + (1+1j)·x¹ + (-2+2j)·x² + (3+0j)·x³") + poly.set_default_printstyle('ascii') + assert_equal(str(p1), "1j + (1+1j) x**1 - (2-2j) x**2 + (3+0j) x**3") + assert_equal(str(p2), "1j + (1+1j) x**1 + (-2+2j) x**2 + (3+0j) x**3") + + +@pytest.mark.parametrize(('coefs', 'tgt'), ( + (array([Fraction(1, 2), Fraction(3, 4)], dtype=object), ( + "1/2 + 3/4·x¹" + )), + (array([1, 2, Fraction(5, 7)], dtype=object), ( + "1 + 2·x¹ + 5/7·x²" + )), + (array([Decimal('1.00'), Decimal('2.2'), 3], dtype=object), ( + "1.00 + 2.2·x¹ + 3·x²" + )), +)) +def test_numeric_object_coefficients(coefs, tgt): + p = poly.Polynomial(coefs) + poly.set_default_printstyle('unicode') + assert_equal(str(p), tgt) + + +@pytest.mark.parametrize(('coefs', 'tgt'), ( + (array([1, 2, 'f'], dtype=object), '1 + 2·x¹ + f·x²'), + (array([1, 2, [3, 4]], dtype=object), '1 + 2·x¹ + [3, 4]·x²'), +)) +def test_nonnumeric_object_coefficients(coefs, tgt): + """ + Test coef fallback for object arrays of non-numeric coefficients. + """ + p = poly.Polynomial(coefs) + poly.set_default_printstyle('unicode') + assert_equal(str(p), tgt) + + +class TestFormat: + def test_format_unicode(self): + poly.set_default_printstyle('ascii') + p = poly.Polynomial([1, 2, 0, -1]) + assert_equal(format(p, 'unicode'), "1.0 + 2.0·x¹ + 0.0·x² - 1.0·x³") + + def test_format_ascii(self): + poly.set_default_printstyle('unicode') + p = poly.Polynomial([1, 2, 0, -1]) + assert_equal( + format(p, 'ascii'), "1.0 + 2.0 x**1 + 0.0 x**2 - 1.0 x**3" + ) + + def test_empty_formatstr(self): + poly.set_default_printstyle('ascii') + p = poly.Polynomial([1, 2, 3]) + assert_equal(format(p), "1.0 + 2.0 x**1 + 3.0 x**2") + assert_equal(f"{p}", "1.0 + 2.0 x**1 + 3.0 x**2") + + def test_bad_formatstr(self): + p = poly.Polynomial([1, 2, 0, -1]) + with pytest.raises(ValueError): + format(p, '.2f') + + +class TestRepr: + def test_polynomial_str(self): + res = repr(poly.Polynomial([0, 1])) + tgt = 'Polynomial([0., 1.], domain=[-1, 1], window=[-1, 1])' + assert_equal(res, tgt) + + def test_chebyshev_str(self): + res = repr(poly.Chebyshev([0, 1])) + tgt = 'Chebyshev([0., 1.], domain=[-1, 1], window=[-1, 1])' + assert_equal(res, tgt) + + def test_legendre_repr(self): + res = repr(poly.Legendre([0, 1])) + tgt = 'Legendre([0., 1.], domain=[-1, 1], window=[-1, 1])' + assert_equal(res, tgt) + + def test_hermite_repr(self): + res = repr(poly.Hermite([0, 1])) + tgt = 'Hermite([0., 1.], domain=[-1, 1], window=[-1, 1])' + assert_equal(res, tgt) + + def test_hermiteE_repr(self): + res = repr(poly.HermiteE([0, 1])) + tgt = 'HermiteE([0., 1.], domain=[-1, 1], window=[-1, 1])' + assert_equal(res, tgt) + + def test_laguerre_repr(self): + res = repr(poly.Laguerre([0, 1])) + tgt = 'Laguerre([0., 1.], domain=[0, 1], window=[0, 1])' + assert_equal(res, tgt) + + +class TestLatexRepr: + """Test the latex repr used by Jupyter""" + + def as_latex(self, obj): + # right now we ignore the formatting of scalars in our tests, since + # it makes them too verbose. Ideally, the formatting of scalars will + # be fixed such that tests below continue to pass + obj._repr_latex_scalar = lambda x: str(x) + try: + return obj._repr_latex_() + finally: + del obj._repr_latex_scalar + + def test_simple_polynomial(self): + # default input + p = poly.Polynomial([1, 2, 3]) + assert_equal(self.as_latex(p), + r'$x \mapsto 1.0 + 2.0\,x + 3.0\,x^{2}$') + + # translated input + p = poly.Polynomial([1, 2, 3], domain=[-2, 0]) + assert_equal(self.as_latex(p), + r'$x \mapsto 1.0 + 2.0\,\left(1.0 + x\right) + 3.0\,\left(1.0 + x\right)^{2}$') + + # scaled input + p = poly.Polynomial([1, 2, 3], domain=[-0.5, 0.5]) + assert_equal(self.as_latex(p), + r'$x \mapsto 1.0 + 2.0\,\left(2.0x\right) + 3.0\,\left(2.0x\right)^{2}$') + + # affine input + p = poly.Polynomial([1, 2, 3], domain=[-1, 0]) + assert_equal(self.as_latex(p), + r'$x \mapsto 1.0 + 2.0\,\left(1.0 + 2.0x\right) + 3.0\,\left(1.0 + 2.0x\right)^{2}$') + + def test_basis_func(self): + p = poly.Chebyshev([1, 2, 3]) + assert_equal(self.as_latex(p), + r'$x \mapsto 1.0\,{T}_{0}(x) + 2.0\,{T}_{1}(x) + 3.0\,{T}_{2}(x)$') + # affine input - check no surplus parens are added + p = poly.Chebyshev([1, 2, 3], domain=[-1, 0]) + assert_equal(self.as_latex(p), + r'$x \mapsto 1.0\,{T}_{0}(1.0 + 2.0x) + 2.0\,{T}_{1}(1.0 + 2.0x) + 3.0\,{T}_{2}(1.0 + 2.0x)$') + + def test_multichar_basis_func(self): + p = poly.HermiteE([1, 2, 3]) + assert_equal(self.as_latex(p), + r'$x \mapsto 1.0\,{He}_{0}(x) + 2.0\,{He}_{1}(x) + 3.0\,{He}_{2}(x)$') diff --git a/.local/lib/python3.8/site-packages/numpy/py.typed b/.local/lib/python3.8/site-packages/numpy/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/random/__init__.pxd b/.local/lib/python3.8/site-packages/numpy/random/__init__.pxd new file mode 100644 index 0000000..1f90572 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/__init__.pxd @@ -0,0 +1,14 @@ +cimport numpy as np +from libc.stdint cimport uint32_t, uint64_t + +cdef extern from "numpy/random/bitgen.h": + struct bitgen: + void *state + uint64_t (*next_uint64)(void *st) nogil + uint32_t (*next_uint32)(void *st) nogil + double (*next_double)(void *st) nogil + uint64_t (*next_raw)(void *st) nogil + + ctypedef bitgen bitgen_t + +from numpy.random.bit_generator cimport BitGenerator, SeedSequence diff --git a/.local/lib/python3.8/site-packages/numpy/random/__init__.py b/.local/lib/python3.8/site-packages/numpy/random/__init__.py new file mode 100644 index 0000000..2e8f99f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/__init__.py @@ -0,0 +1,215 @@ +""" +======================== +Random Number Generation +======================== + +Use ``default_rng()`` to create a `Generator` and call its methods. + +=============== ========================================================= +Generator +--------------- --------------------------------------------------------- +Generator Class implementing all of the random number distributions +default_rng Default constructor for ``Generator`` +=============== ========================================================= + +============================================= === +BitGenerator Streams that work with Generator +--------------------------------------------- --- +MT19937 +PCG64 +PCG64DXSM +Philox +SFC64 +============================================= === + +============================================= === +Getting entropy to initialize a BitGenerator +--------------------------------------------- --- +SeedSequence +============================================= === + + +Legacy +------ + +For backwards compatibility with previous versions of numpy before 1.17, the +various aliases to the global `RandomState` methods are left alone and do not +use the new `Generator` API. + +==================== ========================================================= +Utility functions +-------------------- --------------------------------------------------------- +random Uniformly distributed floats over ``[0, 1)`` +bytes Uniformly distributed random bytes. +permutation Randomly permute a sequence / generate a random sequence. +shuffle Randomly permute a sequence in place. +choice Random sample from 1-D array. +==================== ========================================================= + +==================== ========================================================= +Compatibility +functions - removed +in the new API +-------------------- --------------------------------------------------------- +rand Uniformly distributed values. +randn Normally distributed values. +ranf Uniformly distributed floating point numbers. +random_integers Uniformly distributed integers in a given range. + (deprecated, use ``integers(..., closed=True)`` instead) +random_sample Alias for `random_sample` +randint Uniformly distributed integers in a given range +seed Seed the legacy random number generator. +==================== ========================================================= + +==================== ========================================================= +Univariate +distributions +-------------------- --------------------------------------------------------- +beta Beta distribution over ``[0, 1]``. +binomial Binomial distribution. +chisquare :math:`\\chi^2` distribution. +exponential Exponential distribution. +f F (Fisher-Snedecor) distribution. +gamma Gamma distribution. +geometric Geometric distribution. +gumbel Gumbel distribution. +hypergeometric Hypergeometric distribution. +laplace Laplace distribution. +logistic Logistic distribution. +lognormal Log-normal distribution. +logseries Logarithmic series distribution. +negative_binomial Negative binomial distribution. +noncentral_chisquare Non-central chi-square distribution. +noncentral_f Non-central F distribution. +normal Normal / Gaussian distribution. +pareto Pareto distribution. +poisson Poisson distribution. +power Power distribution. +rayleigh Rayleigh distribution. +triangular Triangular distribution. +uniform Uniform distribution. +vonmises Von Mises circular distribution. +wald Wald (inverse Gaussian) distribution. +weibull Weibull distribution. +zipf Zipf's distribution over ranked data. +==================== ========================================================= + +==================== ========================================================== +Multivariate +distributions +-------------------- ---------------------------------------------------------- +dirichlet Multivariate generalization of Beta distribution. +multinomial Multivariate generalization of the binomial distribution. +multivariate_normal Multivariate generalization of the normal distribution. +==================== ========================================================== + +==================== ========================================================= +Standard +distributions +-------------------- --------------------------------------------------------- +standard_cauchy Standard Cauchy-Lorentz distribution. +standard_exponential Standard exponential distribution. +standard_gamma Standard Gamma distribution. +standard_normal Standard normal distribution. +standard_t Standard Student's t-distribution. +==================== ========================================================= + +==================== ========================================================= +Internal functions +-------------------- --------------------------------------------------------- +get_state Get tuple representing internal state of generator. +set_state Set state of generator. +==================== ========================================================= + + +""" +__all__ = [ + 'beta', + 'binomial', + 'bytes', + 'chisquare', + 'choice', + 'dirichlet', + 'exponential', + 'f', + 'gamma', + 'geometric', + 'get_state', + 'gumbel', + 'hypergeometric', + 'laplace', + 'logistic', + 'lognormal', + 'logseries', + 'multinomial', + 'multivariate_normal', + 'negative_binomial', + 'noncentral_chisquare', + 'noncentral_f', + 'normal', + 'pareto', + 'permutation', + 'poisson', + 'power', + 'rand', + 'randint', + 'randn', + 'random', + 'random_integers', + 'random_sample', + 'ranf', + 'rayleigh', + 'sample', + 'seed', + 'set_state', + 'shuffle', + 'standard_cauchy', + 'standard_exponential', + 'standard_gamma', + 'standard_normal', + 'standard_t', + 'triangular', + 'uniform', + 'vonmises', + 'wald', + 'weibull', + 'zipf', +] + +# add these for module-freeze analysis (like PyInstaller) +from . import _pickle +from . import _common +from . import _bounded_integers + +from ._generator import Generator, default_rng +from .bit_generator import SeedSequence, BitGenerator +from ._mt19937 import MT19937 +from ._pcg64 import PCG64, PCG64DXSM +from ._philox import Philox +from ._sfc64 import SFC64 +from .mtrand import * + +__all__ += ['Generator', 'RandomState', 'SeedSequence', 'MT19937', + 'Philox', 'PCG64', 'PCG64DXSM', 'SFC64', 'default_rng', + 'BitGenerator'] + + +def __RandomState_ctor(): + """Return a RandomState instance. + + This function exists solely to assist (un)pickling. + + Note that the state of the RandomState returned here is irrelevant, as this + function's entire purpose is to return a newly allocated RandomState whose + state pickle can set. Consequently the RandomState returned by this function + is a freshly allocated copy with a seed=0. + + See https://github.com/numpy/numpy/issues/4763 for a detailed discussion + + """ + return RandomState(seed=0) + + +from numpy._pytesttester import PytestTester +test = PytestTester(__name__) +del PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/random/__init__.pyi b/.local/lib/python3.8/site-packages/numpy/random/__init__.pyi new file mode 100644 index 0000000..bf61476 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/__init__.pyi @@ -0,0 +1,72 @@ +from typing import List + +from numpy._pytesttester import PytestTester + +from numpy.random._generator import Generator as Generator +from numpy.random._generator import default_rng as default_rng +from numpy.random._mt19937 import MT19937 as MT19937 +from numpy.random._pcg64 import ( + PCG64 as PCG64, + PCG64DXSM as PCG64DXSM, +) +from numpy.random._philox import Philox as Philox +from numpy.random._sfc64 import SFC64 as SFC64 +from numpy.random.bit_generator import BitGenerator as BitGenerator +from numpy.random.bit_generator import SeedSequence as SeedSequence +from numpy.random.mtrand import ( + RandomState as RandomState, + beta as beta, + binomial as binomial, + bytes as bytes, + chisquare as chisquare, + choice as choice, + dirichlet as dirichlet, + exponential as exponential, + f as f, + gamma as gamma, + geometric as geometric, + get_state as get_state, + gumbel as gumbel, + hypergeometric as hypergeometric, + laplace as laplace, + logistic as logistic, + lognormal as lognormal, + logseries as logseries, + multinomial as multinomial, + multivariate_normal as multivariate_normal, + negative_binomial as negative_binomial, + noncentral_chisquare as noncentral_chisquare, + noncentral_f as noncentral_f, + normal as normal, + pareto as pareto, + permutation as permutation, + poisson as poisson, + power as power, + rand as rand, + randint as randint, + randn as randn, + random as random, + random_integers as random_integers, + random_sample as random_sample, + ranf as ranf, + rayleigh as rayleigh, + sample as sample, + seed as seed, + set_state as set_state, + shuffle as shuffle, + standard_cauchy as standard_cauchy, + standard_exponential as standard_exponential, + standard_gamma as standard_gamma, + standard_normal as standard_normal, + standard_t as standard_t, + triangular as triangular, + uniform as uniform, + vonmises as vonmises, + wald as wald, + weibull as weibull, + zipf as zipf, +) + +__all__: List[str] +__path__: List[str] +test: PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/random/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..cb9b823 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/__pycache__/_pickle.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/__pycache__/_pickle.cpython-38.pyc new file mode 100644 index 0000000..e04ce9c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/__pycache__/_pickle.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..0404a4b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/_bounded_integers.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/random/_bounded_integers.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..6c2d5b0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/_bounded_integers.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/_bounded_integers.pxd b/.local/lib/python3.8/site-packages/numpy/random/_bounded_integers.pxd new file mode 100644 index 0000000..7e41463 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_bounded_integers.pxd @@ -0,0 +1,29 @@ +from libc.stdint cimport (uint8_t, uint16_t, uint32_t, uint64_t, + int8_t, int16_t, int32_t, int64_t, intptr_t) +import numpy as np +cimport numpy as np +ctypedef np.npy_bool bool_t + +from numpy.random cimport bitgen_t + +cdef inline uint64_t _gen_mask(uint64_t max_val) nogil: + """Mask generator for use in bounded random numbers""" + # Smallest bit mask >= max + cdef uint64_t mask = max_val + mask |= mask >> 1 + mask |= mask >> 2 + mask |= mask >> 4 + mask |= mask >> 8 + mask |= mask >> 16 + mask |= mask >> 32 + return mask + +cdef object _rand_uint64(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock) +cdef object _rand_uint32(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock) +cdef object _rand_uint16(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock) +cdef object _rand_uint8(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock) +cdef object _rand_bool(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock) +cdef object _rand_int64(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock) +cdef object _rand_int32(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock) +cdef object _rand_int16(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock) +cdef object _rand_int8(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock) diff --git a/.local/lib/python3.8/site-packages/numpy/random/_common.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/random/_common.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..ceed83b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/_common.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/_common.pxd b/.local/lib/python3.8/site-packages/numpy/random/_common.pxd new file mode 100644 index 0000000..9f2e8c3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_common.pxd @@ -0,0 +1,106 @@ +#cython: language_level=3 + +from libc.stdint cimport uint32_t, uint64_t, int32_t, int64_t + +import numpy as np +cimport numpy as np + +from numpy.random cimport bitgen_t + +cdef double POISSON_LAM_MAX +cdef double LEGACY_POISSON_LAM_MAX +cdef uint64_t MAXSIZE + +cdef enum ConstraintType: + CONS_NONE + CONS_NON_NEGATIVE + CONS_POSITIVE + CONS_POSITIVE_NOT_NAN + CONS_BOUNDED_0_1 + CONS_BOUNDED_0_1_NOTNAN + CONS_BOUNDED_GT_0_1 + CONS_GT_1 + CONS_GTE_1 + CONS_POISSON + LEGACY_CONS_POISSON + +ctypedef ConstraintType constraint_type + +cdef object benchmark(bitgen_t *bitgen, object lock, Py_ssize_t cnt, object method) +cdef object random_raw(bitgen_t *bitgen, object lock, object size, object output) +cdef object prepare_cffi(bitgen_t *bitgen) +cdef object prepare_ctypes(bitgen_t *bitgen) +cdef int check_constraint(double val, object name, constraint_type cons) except -1 +cdef int check_array_constraint(np.ndarray val, object name, constraint_type cons) except -1 + +cdef extern from "include/aligned_malloc.h": + cdef void *PyArray_realloc_aligned(void *p, size_t n) + cdef void *PyArray_malloc_aligned(size_t n) + cdef void *PyArray_calloc_aligned(size_t n, size_t s) + cdef void PyArray_free_aligned(void *p) + +ctypedef void (*random_double_fill)(bitgen_t *state, np.npy_intp count, double* out) nogil +ctypedef double (*random_double_0)(void *state) nogil +ctypedef double (*random_double_1)(void *state, double a) nogil +ctypedef double (*random_double_2)(void *state, double a, double b) nogil +ctypedef double (*random_double_3)(void *state, double a, double b, double c) nogil + +ctypedef double (*random_float_fill)(bitgen_t *state, np.npy_intp count, float* out) nogil +ctypedef float (*random_float_0)(bitgen_t *state) nogil +ctypedef float (*random_float_1)(bitgen_t *state, float a) nogil + +ctypedef int64_t (*random_uint_0)(void *state) nogil +ctypedef int64_t (*random_uint_d)(void *state, double a) nogil +ctypedef int64_t (*random_uint_dd)(void *state, double a, double b) nogil +ctypedef int64_t (*random_uint_di)(void *state, double a, uint64_t b) nogil +ctypedef int64_t (*random_uint_i)(void *state, int64_t a) nogil +ctypedef int64_t (*random_uint_iii)(void *state, int64_t a, int64_t b, int64_t c) nogil + +ctypedef uint32_t (*random_uint_0_32)(bitgen_t *state) nogil +ctypedef uint32_t (*random_uint_1_i_32)(bitgen_t *state, uint32_t a) nogil + +ctypedef int32_t (*random_int_2_i_32)(bitgen_t *state, int32_t a, int32_t b) nogil +ctypedef int64_t (*random_int_2_i)(bitgen_t *state, int64_t a, int64_t b) nogil + +cdef double kahan_sum(double *darr, np.npy_intp n) + +cdef inline double uint64_to_double(uint64_t rnd) nogil: + return (rnd >> 11) * (1.0 / 9007199254740992.0) + +cdef object double_fill(void *func, bitgen_t *state, object size, object lock, object out) + +cdef object float_fill(void *func, bitgen_t *state, object size, object lock, object out) + +cdef object float_fill_from_double(void *func, bitgen_t *state, object size, object lock, object out) + +cdef object wrap_int(object val, object bits) + +cdef np.ndarray int_to_array(object value, object name, object bits, object uint_size) + +cdef validate_output_shape(iter_shape, np.ndarray output) + +cdef object cont(void *func, void *state, object size, object lock, int narg, + object a, object a_name, constraint_type a_constraint, + object b, object b_name, constraint_type b_constraint, + object c, object c_name, constraint_type c_constraint, + object out) + +cdef object disc(void *func, void *state, object size, object lock, + int narg_double, int narg_int64, + object a, object a_name, constraint_type a_constraint, + object b, object b_name, constraint_type b_constraint, + object c, object c_name, constraint_type c_constraint) + +cdef object cont_f(void *func, bitgen_t *state, object size, object lock, + object a, object a_name, constraint_type a_constraint, + object out) + +cdef object cont_broadcast_3(void *func, void *state, object size, object lock, + np.ndarray a_arr, object a_name, constraint_type a_constraint, + np.ndarray b_arr, object b_name, constraint_type b_constraint, + np.ndarray c_arr, object c_name, constraint_type c_constraint) + +cdef object discrete_broadcast_iii(void *func, void *state, object size, object lock, + np.ndarray a_arr, object a_name, constraint_type a_constraint, + np.ndarray b_arr, object b_name, constraint_type b_constraint, + np.ndarray c_arr, object c_name, constraint_type c_constraint) diff --git a/.local/lib/python3.8/site-packages/numpy/random/_examples/cffi/__pycache__/extending.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/_examples/cffi/__pycache__/extending.cpython-38.pyc new file mode 100644 index 0000000..54065bf Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/_examples/cffi/__pycache__/extending.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/_examples/cffi/__pycache__/parse.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/_examples/cffi/__pycache__/parse.cpython-38.pyc new file mode 100644 index 0000000..d7ce749 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/_examples/cffi/__pycache__/parse.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/_examples/cffi/extending.py b/.local/lib/python3.8/site-packages/numpy/random/_examples/cffi/extending.py new file mode 100644 index 0000000..8440d40 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_examples/cffi/extending.py @@ -0,0 +1,40 @@ +""" +Use cffi to access any of the underlying C functions from distributions.h +""" +import os +import numpy as np +import cffi +from .parse import parse_distributions_h +ffi = cffi.FFI() + +inc_dir = os.path.join(np.get_include(), 'numpy') + +# Basic numpy types +ffi.cdef(''' + typedef intptr_t npy_intp; + typedef unsigned char npy_bool; + +''') + +parse_distributions_h(ffi, inc_dir) + +lib = ffi.dlopen(np.random._generator.__file__) + +# Compare the distributions.h random_standard_normal_fill to +# Generator.standard_random +bit_gen = np.random.PCG64() +rng = np.random.Generator(bit_gen) +state = bit_gen.state + +interface = rng.bit_generator.cffi +n = 100 +vals_cffi = ffi.new('double[%d]' % n) +lib.random_standard_normal_fill(interface.bit_generator, n, vals_cffi) + +# reset the state +bit_gen.state = state + +vals = rng.standard_normal(n) + +for i in range(n): + assert vals[i] == vals_cffi[i] diff --git a/.local/lib/python3.8/site-packages/numpy/random/_examples/cffi/parse.py b/.local/lib/python3.8/site-packages/numpy/random/_examples/cffi/parse.py new file mode 100644 index 0000000..daff6bd --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_examples/cffi/parse.py @@ -0,0 +1,55 @@ +import os + + +def parse_distributions_h(ffi, inc_dir): + """ + Parse distributions.h located in inc_dir for CFFI, filling in the ffi.cdef + + Read the function declarations without the "#define ..." macros that will + be filled in when loading the library. + """ + + with open(os.path.join(inc_dir, 'random', 'bitgen.h')) as fid: + s = [] + for line in fid: + # massage the include file + if line.strip().startswith('#'): + continue + s.append(line) + ffi.cdef('\n'.join(s)) + + with open(os.path.join(inc_dir, 'random', 'distributions.h')) as fid: + s = [] + in_skip = 0 + ignoring = False + for line in fid: + # check for and remove extern "C" guards + if ignoring: + if line.strip().startswith('#endif'): + ignoring = False + continue + if line.strip().startswith('#ifdef __cplusplus'): + ignoring = True + + # massage the include file + if line.strip().startswith('#'): + continue + + # skip any inlined function definition + # which starts with 'static NPY_INLINE xxx(...) {' + # and ends with a closing '}' + if line.strip().startswith('static NPY_INLINE'): + in_skip += line.count('{') + continue + elif in_skip > 0: + in_skip += line.count('{') + in_skip -= line.count('}') + continue + + # replace defines with their value or remove them + line = line.replace('DECLDIR', '') + line = line.replace('NPY_INLINE', '') + line = line.replace('RAND_INT_TYPE', 'int64_t') + s.append(line) + ffi.cdef('\n'.join(s)) + diff --git a/.local/lib/python3.8/site-packages/numpy/random/_examples/cython/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/_examples/cython/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..5b38a82 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/_examples/cython/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/_examples/cython/extending.pyx b/.local/lib/python3.8/site-packages/numpy/random/_examples/cython/extending.pyx new file mode 100644 index 0000000..3a7f81a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_examples/cython/extending.pyx @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +#cython: language_level=3 + +from libc.stdint cimport uint32_t +from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer + +import numpy as np +cimport numpy as np +cimport cython + +from numpy.random cimport bitgen_t +from numpy.random import PCG64 + +np.import_array() + + +@cython.boundscheck(False) +@cython.wraparound(False) +def uniform_mean(Py_ssize_t n): + cdef Py_ssize_t i + cdef bitgen_t *rng + cdef const char *capsule_name = "BitGenerator" + cdef double[::1] random_values + cdef np.ndarray randoms + + x = PCG64() + capsule = x.capsule + if not PyCapsule_IsValid(capsule, capsule_name): + raise ValueError("Invalid pointer to anon_func_state") + rng = PyCapsule_GetPointer(capsule, capsule_name) + random_values = np.empty(n) + # Best practice is to acquire the lock whenever generating random values. + # This prevents other threads from modifying the state. Acquiring the lock + # is only necessary if if the GIL is also released, as in this example. + with x.lock, nogil: + for i in range(n): + random_values[i] = rng.next_double(rng.state) + randoms = np.asarray(random_values) + return randoms.mean() + + +# This function is declared nogil so it can be used without the GIL below +cdef uint32_t bounded_uint(uint32_t lb, uint32_t ub, bitgen_t *rng) nogil: + cdef uint32_t mask, delta, val + mask = delta = ub - lb + mask |= mask >> 1 + mask |= mask >> 2 + mask |= mask >> 4 + mask |= mask >> 8 + mask |= mask >> 16 + + val = rng.next_uint32(rng.state) & mask + while val > delta: + val = rng.next_uint32(rng.state) & mask + + return lb + val + + +@cython.boundscheck(False) +@cython.wraparound(False) +def bounded_uints(uint32_t lb, uint32_t ub, Py_ssize_t n): + cdef Py_ssize_t i + cdef bitgen_t *rng + cdef uint32_t[::1] out + cdef const char *capsule_name = "BitGenerator" + + x = PCG64() + out = np.empty(n, dtype=np.uint32) + capsule = x.capsule + + if not PyCapsule_IsValid(capsule, capsule_name): + raise ValueError("Invalid pointer to anon_func_state") + rng = PyCapsule_GetPointer(capsule, capsule_name) + + with x.lock, nogil: + for i in range(n): + out[i] = bounded_uint(lb, ub, rng) + return np.asarray(out) diff --git a/.local/lib/python3.8/site-packages/numpy/random/_examples/cython/extending_distributions.pyx b/.local/lib/python3.8/site-packages/numpy/random/_examples/cython/extending_distributions.pyx new file mode 100644 index 0000000..d908e92 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_examples/cython/extending_distributions.pyx @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +#cython: language_level=3 +""" +This file shows how the to use a BitGenerator to create a distribution. +""" +import numpy as np +cimport numpy as np +cimport cython +from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer +from libc.stdint cimport uint16_t, uint64_t +from numpy.random cimport bitgen_t +from numpy.random import PCG64 +from numpy.random.c_distributions cimport ( + random_standard_uniform_fill, random_standard_uniform_fill_f) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def uniforms(Py_ssize_t n): + """ + Create an array of `n` uniformly distributed doubles. + A 'real' distribution would want to process the values into + some non-uniform distribution + """ + cdef Py_ssize_t i + cdef bitgen_t *rng + cdef const char *capsule_name = "BitGenerator" + cdef double[::1] random_values + + x = PCG64() + capsule = x.capsule + # Optional check that the capsule if from a BitGenerator + if not PyCapsule_IsValid(capsule, capsule_name): + raise ValueError("Invalid pointer to anon_func_state") + # Cast the pointer + rng = PyCapsule_GetPointer(capsule, capsule_name) + random_values = np.empty(n, dtype='float64') + with x.lock, nogil: + for i in range(n): + # Call the function + random_values[i] = rng.next_double(rng.state) + randoms = np.asarray(random_values) + + return randoms + +# cython example 2 +@cython.boundscheck(False) +@cython.wraparound(False) +def uint10_uniforms(Py_ssize_t n): + """Uniform 10 bit integers stored as 16-bit unsigned integers""" + cdef Py_ssize_t i + cdef bitgen_t *rng + cdef const char *capsule_name = "BitGenerator" + cdef uint16_t[::1] random_values + cdef int bits_remaining + cdef int width = 10 + cdef uint64_t buff, mask = 0x3FF + + x = PCG64() + capsule = x.capsule + if not PyCapsule_IsValid(capsule, capsule_name): + raise ValueError("Invalid pointer to anon_func_state") + rng = PyCapsule_GetPointer(capsule, capsule_name) + random_values = np.empty(n, dtype='uint16') + # Best practice is to release GIL and acquire the lock + bits_remaining = 0 + with x.lock, nogil: + for i in range(n): + if bits_remaining < width: + buff = rng.next_uint64(rng.state) + random_values[i] = buff & mask + buff >>= width + + randoms = np.asarray(random_values) + return randoms + +# cython example 3 +def uniforms_ex(bit_generator, Py_ssize_t n, dtype=np.float64): + """ + Create an array of `n` uniformly distributed doubles via a "fill" function. + + A 'real' distribution would want to process the values into + some non-uniform distribution + + Parameters + ---------- + bit_generator: BitGenerator instance + n: int + Output vector length + dtype: {str, dtype}, optional + Desired dtype, either 'd' (or 'float64') or 'f' (or 'float32'). The + default dtype value is 'd' + """ + cdef Py_ssize_t i + cdef bitgen_t *rng + cdef const char *capsule_name = "BitGenerator" + cdef np.ndarray randoms + + capsule = bit_generator.capsule + # Optional check that the capsule if from a BitGenerator + if not PyCapsule_IsValid(capsule, capsule_name): + raise ValueError("Invalid pointer to anon_func_state") + # Cast the pointer + rng = PyCapsule_GetPointer(capsule, capsule_name) + + _dtype = np.dtype(dtype) + randoms = np.empty(n, dtype=_dtype) + if _dtype == np.float32: + with bit_generator.lock: + random_standard_uniform_fill_f(rng, n, np.PyArray_DATA(randoms)) + elif _dtype == np.float64: + with bit_generator.lock: + random_standard_uniform_fill(rng, n, np.PyArray_DATA(randoms)) + else: + raise TypeError('Unsupported dtype %r for random' % _dtype) + return randoms + diff --git a/.local/lib/python3.8/site-packages/numpy/random/_examples/cython/setup.py b/.local/lib/python3.8/site-packages/numpy/random/_examples/cython/setup.py new file mode 100644 index 0000000..f41150f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_examples/cython/setup.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +""" +Build the Cython demonstrations of low-level access to NumPy random + +Usage: python setup.py build_ext -i +""" +import setuptools # triggers monkeypatching distutils +from distutils.core import setup +from os.path import dirname, join, abspath + +import numpy as np +from Cython.Build import cythonize +from numpy.distutils.misc_util import get_info +from setuptools.extension import Extension + +path = dirname(__file__) +src_dir = join(dirname(path), '..', 'src') +defs = [('NPY_NO_DEPRECATED_API', 0)] +inc_path = np.get_include() +lib_path = [abspath(join(np.get_include(), '..', '..', 'random', 'lib'))] +lib_path += get_info('npymath')['library_dirs'] + +extending = Extension("extending", + sources=[join('.', 'extending.pyx')], + include_dirs=[ + np.get_include(), + join(path, '..', '..') + ], + define_macros=defs, + ) +distributions = Extension("extending_distributions", + sources=[join('.', 'extending_distributions.pyx')], + include_dirs=[inc_path], + library_dirs=lib_path, + libraries=['npyrandom', 'npymath'], + define_macros=defs, + ) + +extensions = [extending, distributions] + +setup( + ext_modules=cythonize(extensions) +) diff --git a/.local/lib/python3.8/site-packages/numpy/random/_examples/numba/__pycache__/extending.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/_examples/numba/__pycache__/extending.cpython-38.pyc new file mode 100644 index 0000000..ed5a499 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/_examples/numba/__pycache__/extending.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/_examples/numba/__pycache__/extending_distributions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/_examples/numba/__pycache__/extending_distributions.cpython-38.pyc new file mode 100644 index 0000000..17e49a3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/_examples/numba/__pycache__/extending_distributions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/_examples/numba/extending.py b/.local/lib/python3.8/site-packages/numpy/random/_examples/numba/extending.py new file mode 100644 index 0000000..f387db6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_examples/numba/extending.py @@ -0,0 +1,84 @@ +import numpy as np +import numba as nb + +from numpy.random import PCG64 +from timeit import timeit + +bit_gen = PCG64() +next_d = bit_gen.cffi.next_double +state_addr = bit_gen.cffi.state_address + +def normals(n, state): + out = np.empty(n) + for i in range((n + 1) // 2): + x1 = 2.0 * next_d(state) - 1.0 + x2 = 2.0 * next_d(state) - 1.0 + r2 = x1 * x1 + x2 * x2 + while r2 >= 1.0 or r2 == 0.0: + x1 = 2.0 * next_d(state) - 1.0 + x2 = 2.0 * next_d(state) - 1.0 + r2 = x1 * x1 + x2 * x2 + f = np.sqrt(-2.0 * np.log(r2) / r2) + out[2 * i] = f * x1 + if 2 * i + 1 < n: + out[2 * i + 1] = f * x2 + return out + +# Compile using Numba +normalsj = nb.jit(normals, nopython=True) +# Must use state address not state with numba +n = 10000 + +def numbacall(): + return normalsj(n, state_addr) + +rg = np.random.Generator(PCG64()) + +def numpycall(): + return rg.normal(size=n) + +# Check that the functions work +r1 = numbacall() +r2 = numpycall() +assert r1.shape == (n,) +assert r1.shape == r2.shape + +t1 = timeit(numbacall, number=1000) +print(f'{t1:.2f} secs for {n} PCG64 (Numba/PCG64) gaussian randoms') +t2 = timeit(numpycall, number=1000) +print(f'{t2:.2f} secs for {n} PCG64 (NumPy/PCG64) gaussian randoms') + +# example 2 + +next_u32 = bit_gen.ctypes.next_uint32 +ctypes_state = bit_gen.ctypes.state + +@nb.jit(nopython=True) +def bounded_uint(lb, ub, state): + mask = delta = ub - lb + mask |= mask >> 1 + mask |= mask >> 2 + mask |= mask >> 4 + mask |= mask >> 8 + mask |= mask >> 16 + + val = next_u32(state) & mask + while val > delta: + val = next_u32(state) & mask + + return lb + val + + +print(bounded_uint(323, 2394691, ctypes_state.value)) + + +@nb.jit(nopython=True) +def bounded_uints(lb, ub, n, state): + out = np.empty(n, dtype=np.uint32) + for i in range(n): + out[i] = bounded_uint(lb, ub, state) + + +bounded_uints(323, 2394691, 10000000, ctypes_state.value) + + diff --git a/.local/lib/python3.8/site-packages/numpy/random/_examples/numba/extending_distributions.py b/.local/lib/python3.8/site-packages/numpy/random/_examples/numba/extending_distributions.py new file mode 100644 index 0000000..7cf8bf0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_examples/numba/extending_distributions.py @@ -0,0 +1,67 @@ +r""" +Building the required library in this example requires a source distribution +of NumPy or clone of the NumPy git repository since distributions.c is not +included in binary distributions. + +On *nix, execute in numpy/random/src/distributions + +export ${PYTHON_VERSION}=3.8 # Python version +export PYTHON_INCLUDE=#path to Python's include folder, usually \ + ${PYTHON_HOME}/include/python${PYTHON_VERSION}m +export NUMPY_INCLUDE=#path to numpy's include folder, usually \ + ${PYTHON_HOME}/lib/python${PYTHON_VERSION}/site-packages/numpy/core/include +gcc -shared -o libdistributions.so -fPIC distributions.c \ + -I${NUMPY_INCLUDE} -I${PYTHON_INCLUDE} +mv libdistributions.so ../../_examples/numba/ + +On Windows + +rem PYTHON_HOME and PYTHON_VERSION are setup dependent, this is an example +set PYTHON_HOME=c:\Anaconda +set PYTHON_VERSION=38 +cl.exe /LD .\distributions.c -DDLL_EXPORT \ + -I%PYTHON_HOME%\lib\site-packages\numpy\core\include \ + -I%PYTHON_HOME%\include %PYTHON_HOME%\libs\python%PYTHON_VERSION%.lib +move distributions.dll ../../_examples/numba/ +""" +import os + +import numba as nb +import numpy as np +from cffi import FFI + +from numpy.random import PCG64 + +ffi = FFI() +if os.path.exists('./distributions.dll'): + lib = ffi.dlopen('./distributions.dll') +elif os.path.exists('./libdistributions.so'): + lib = ffi.dlopen('./libdistributions.so') +else: + raise RuntimeError('Required DLL/so file was not found.') + +ffi.cdef(""" +double random_standard_normal(void *bitgen_state); +""") +x = PCG64() +xffi = x.cffi +bit_generator = xffi.bit_generator + +random_standard_normal = lib.random_standard_normal + + +def normals(n, bit_generator): + out = np.empty(n) + for i in range(n): + out[i] = random_standard_normal(bit_generator) + return out + + +normalsj = nb.jit(normals, nopython=True) + +# Numba requires a memory address for void * +# Can also get address from x.ctypes.bit_generator.value +bit_generator_address = int(ffi.cast('uintptr_t', bit_generator)) + +norm = normalsj(1000, bit_generator_address) +print(norm[:12]) diff --git a/.local/lib/python3.8/site-packages/numpy/random/_generator.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/random/_generator.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..2b9d636 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/_generator.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/_generator.pyi b/.local/lib/python3.8/site-packages/numpy/random/_generator.pyi new file mode 100644 index 0000000..c574bef --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_generator.pyi @@ -0,0 +1,647 @@ +from typing import Any, Callable, Dict, Optional, Tuple, Type, Union, overload, TypeVar, Literal + +from numpy import ( + bool_, + dtype, + float32, + float64, + int8, + int16, + int32, + int64, + int_, + ndarray, + uint, + uint8, + uint16, + uint32, + uint64, +) +from numpy.random import BitGenerator, SeedSequence +from numpy.typing import ( + ArrayLike, + _ArrayLikeFloat_co, + _ArrayLikeInt_co, + _DoubleCodes, + _DTypeLikeBool, + _DTypeLikeInt, + _DTypeLikeUInt, + _Float32Codes, + _Float64Codes, + _Int8Codes, + _Int16Codes, + _Int32Codes, + _Int64Codes, + _IntCodes, + _ShapeLike, + _SingleCodes, + _SupportsDType, + _UInt8Codes, + _UInt16Codes, + _UInt32Codes, + _UInt64Codes, + _UIntCodes, +) + +_ArrayType = TypeVar("_ArrayType", bound=ndarray[Any, Any]) + +_DTypeLikeFloat32 = Union[ + dtype[float32], + _SupportsDType[dtype[float32]], + Type[float32], + _Float32Codes, + _SingleCodes, +] + +_DTypeLikeFloat64 = Union[ + dtype[float64], + _SupportsDType[dtype[float64]], + Type[float], + Type[float64], + _Float64Codes, + _DoubleCodes, +] + +class Generator: + def __init__(self, bit_generator: BitGenerator) -> None: ... + def __repr__(self) -> str: ... + def __str__(self) -> str: ... + def __getstate__(self) -> Dict[str, Any]: ... + def __setstate__(self, state: Dict[str, Any]) -> None: ... + def __reduce__(self) -> Tuple[Callable[[str], Generator], Tuple[str], Dict[str, Any]]: ... + @property + def bit_generator(self) -> BitGenerator: ... + def bytes(self, length: int) -> bytes: ... + @overload + def standard_normal( # type: ignore[misc] + self, + size: None = ..., + dtype: Union[_DTypeLikeFloat32, _DTypeLikeFloat64] = ..., + out: None = ..., + ) -> float: ... + @overload + def standard_normal( # type: ignore[misc] + self, + size: _ShapeLike = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_normal( # type: ignore[misc] + self, + *, + out: ndarray[Any, dtype[float64]] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_normal( # type: ignore[misc] + self, + size: _ShapeLike = ..., + dtype: _DTypeLikeFloat32 = ..., + out: Optional[ndarray[Any, dtype[float32]]] = ..., + ) -> ndarray[Any, dtype[float32]]: ... + @overload + def standard_normal( # type: ignore[misc] + self, + size: _ShapeLike = ..., + dtype: _DTypeLikeFloat64 = ..., + out: Optional[ndarray[Any, dtype[float64]]] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def permutation(self, x: int, axis: int = ...) -> ndarray[Any, dtype[int64]]: ... + @overload + def permutation(self, x: ArrayLike, axis: int = ...) -> ndarray[Any, Any]: ... + @overload + def standard_exponential( # type: ignore[misc] + self, + size: None = ..., + dtype: Union[_DTypeLikeFloat32, _DTypeLikeFloat64] = ..., + method: Literal["zig", "inv"] = ..., + out: None = ..., + ) -> float: ... + @overload + def standard_exponential( + self, + size: _ShapeLike = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_exponential( + self, + *, + out: ndarray[Any, dtype[float64]] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_exponential( + self, + size: _ShapeLike = ..., + *, + method: Literal["zig", "inv"] = ..., + out: Optional[ndarray[Any, dtype[float64]]] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_exponential( + self, + size: _ShapeLike = ..., + dtype: _DTypeLikeFloat32 = ..., + method: Literal["zig", "inv"] = ..., + out: Optional[ndarray[Any, dtype[float32]]] = ..., + ) -> ndarray[Any, dtype[float32]]: ... + @overload + def standard_exponential( + self, + size: _ShapeLike = ..., + dtype: _DTypeLikeFloat64 = ..., + method: Literal["zig", "inv"] = ..., + out: Optional[ndarray[Any, dtype[float64]]] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def random( # type: ignore[misc] + self, + size: None = ..., + dtype: Union[_DTypeLikeFloat32, _DTypeLikeFloat64] = ..., + out: None = ..., + ) -> float: ... + @overload + def random( + self, + *, + out: ndarray[Any, dtype[float64]] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def random( + self, + size: _ShapeLike = ..., + *, + out: Optional[ndarray[Any, dtype[float64]]] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def random( + self, + size: _ShapeLike = ..., + dtype: _DTypeLikeFloat32 = ..., + out: Optional[ndarray[Any, dtype[float32]]] = ..., + ) -> ndarray[Any, dtype[float32]]: ... + @overload + def random( + self, + size: _ShapeLike = ..., + dtype: _DTypeLikeFloat64 = ..., + out: Optional[ndarray[Any, dtype[float64]]] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def beta(self, a: float, b: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def beta( + self, a: _ArrayLikeFloat_co, b: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def exponential(self, scale: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def exponential( + self, scale: _ArrayLikeFloat_co = ..., size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def integers( # type: ignore[misc] + self, + low: int, + high: Optional[int] = ..., + ) -> int: ... + @overload + def integers( # type: ignore[misc] + self, + low: int, + high: Optional[int] = ..., + size: None = ..., + dtype: _DTypeLikeBool = ..., + endpoint: bool = ..., + ) -> bool: ... + @overload + def integers( # type: ignore[misc] + self, + low: int, + high: Optional[int] = ..., + size: None = ..., + dtype: Union[_DTypeLikeInt, _DTypeLikeUInt] = ..., + endpoint: bool = ..., + ) -> int: ... + @overload + def integers( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[int64]]: ... + @overload + def integers( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: _DTypeLikeBool = ..., + endpoint: bool = ..., + ) -> ndarray[Any, dtype[bool_]]: ... + @overload + def integers( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[dtype[int8], Type[int8], _Int8Codes, _SupportsDType[dtype[int8]]] = ..., + endpoint: bool = ..., + ) -> ndarray[Any, dtype[int8]]: ... + @overload + def integers( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[dtype[int16], Type[int16], _Int16Codes, _SupportsDType[dtype[int16]]] = ..., + endpoint: bool = ..., + ) -> ndarray[Any, dtype[int16]]: ... + @overload + def integers( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[dtype[int32], Type[int32], _Int32Codes, _SupportsDType[dtype[int32]]] = ..., + endpoint: bool = ..., + ) -> ndarray[Any, dtype[Union[int32]]]: ... + @overload + def integers( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Optional[ + Union[dtype[int64], Type[int64], _Int64Codes, _SupportsDType[dtype[int64]]] + ] = ..., + endpoint: bool = ..., + ) -> ndarray[Any, dtype[int64]]: ... + @overload + def integers( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[dtype[uint8], Type[uint8], _UInt8Codes, _SupportsDType[dtype[uint8]]] = ..., + endpoint: bool = ..., + ) -> ndarray[Any, dtype[uint8]]: ... + @overload + def integers( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[ + dtype[uint16], Type[uint16], _UInt16Codes, _SupportsDType[dtype[uint16]] + ] = ..., + endpoint: bool = ..., + ) -> ndarray[Any, dtype[Union[uint16]]]: ... + @overload + def integers( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[ + dtype[uint32], Type[uint32], _UInt32Codes, _SupportsDType[dtype[uint32]] + ] = ..., + endpoint: bool = ..., + ) -> ndarray[Any, dtype[uint32]]: ... + @overload + def integers( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[ + dtype[uint64], Type[uint64], _UInt64Codes, _SupportsDType[dtype[uint64]] + ] = ..., + endpoint: bool = ..., + ) -> ndarray[Any, dtype[uint64]]: ... + @overload + def integers( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[ + dtype[int_], Type[int], Type[int_], _IntCodes, _SupportsDType[dtype[int_]] + ] = ..., + endpoint: bool = ..., + ) -> ndarray[Any, dtype[int_]]: ... + @overload + def integers( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[dtype[uint], Type[uint], _UIntCodes, _SupportsDType[dtype[uint]]] = ..., + endpoint: bool = ..., + ) -> ndarray[Any, dtype[uint]]: ... + # TODO: Use a TypeVar _T here to get away from Any output? Should be int->ndarray[Any,dtype[int64]], ArrayLike[_T] -> Union[_T, ndarray[Any,Any]] + @overload + def choice( + self, + a: int, + size: None = ..., + replace: bool = ..., + p: Optional[_ArrayLikeFloat_co] = ..., + axis: int = ..., + shuffle: bool = ..., + ) -> int: ... + @overload + def choice( + self, + a: int, + size: _ShapeLike = ..., + replace: bool = ..., + p: Optional[_ArrayLikeFloat_co] = ..., + axis: int = ..., + shuffle: bool = ..., + ) -> ndarray[Any, dtype[int64]]: ... + @overload + def choice( + self, + a: ArrayLike, + size: None = ..., + replace: bool = ..., + p: Optional[_ArrayLikeFloat_co] = ..., + axis: int = ..., + shuffle: bool = ..., + ) -> Any: ... + @overload + def choice( + self, + a: ArrayLike, + size: _ShapeLike = ..., + replace: bool = ..., + p: Optional[_ArrayLikeFloat_co] = ..., + axis: int = ..., + shuffle: bool = ..., + ) -> ndarray[Any, Any]: ... + @overload + def uniform(self, low: float = ..., high: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def uniform( + self, + low: _ArrayLikeFloat_co = ..., + high: _ArrayLikeFloat_co = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def normal(self, loc: float = ..., scale: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def normal( + self, + loc: _ArrayLikeFloat_co = ..., + scale: _ArrayLikeFloat_co = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_gamma( # type: ignore[misc] + self, + shape: float, + size: None = ..., + dtype: Union[_DTypeLikeFloat32, _DTypeLikeFloat64] = ..., + out: None = ..., + ) -> float: ... + @overload + def standard_gamma( + self, + shape: _ArrayLikeFloat_co, + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_gamma( + self, + shape: _ArrayLikeFloat_co, + *, + out: ndarray[Any, dtype[float64]] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_gamma( + self, + shape: _ArrayLikeFloat_co, + size: Optional[_ShapeLike] = ..., + dtype: _DTypeLikeFloat32 = ..., + out: Optional[ndarray[Any, dtype[float32]]] = ..., + ) -> ndarray[Any, dtype[float32]]: ... + @overload + def standard_gamma( + self, + shape: _ArrayLikeFloat_co, + size: Optional[_ShapeLike] = ..., + dtype: _DTypeLikeFloat64 = ..., + out: Optional[ndarray[Any, dtype[float64]]] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def gamma(self, shape: float, scale: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def gamma( + self, + shape: _ArrayLikeFloat_co, + scale: _ArrayLikeFloat_co = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def f(self, dfnum: float, dfden: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def f( + self, dfnum: _ArrayLikeFloat_co, dfden: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def noncentral_f(self, dfnum: float, dfden: float, nonc: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def noncentral_f( + self, + dfnum: _ArrayLikeFloat_co, + dfden: _ArrayLikeFloat_co, + nonc: _ArrayLikeFloat_co, + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def chisquare(self, df: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def chisquare( + self, df: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def noncentral_chisquare(self, df: float, nonc: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def noncentral_chisquare( + self, df: _ArrayLikeFloat_co, nonc: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_t(self, df: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def standard_t( + self, df: _ArrayLikeFloat_co, size: None = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_t( + self, df: _ArrayLikeFloat_co, size: _ShapeLike = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def vonmises(self, mu: float, kappa: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def vonmises( + self, mu: _ArrayLikeFloat_co, kappa: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def pareto(self, a: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def pareto( + self, a: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def weibull(self, a: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def weibull( + self, a: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def power(self, a: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def power( + self, a: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_cauchy(self, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def standard_cauchy(self, size: _ShapeLike = ...) -> ndarray[Any, dtype[float64]]: ... + @overload + def laplace(self, loc: float = ..., scale: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def laplace( + self, + loc: _ArrayLikeFloat_co = ..., + scale: _ArrayLikeFloat_co = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def gumbel(self, loc: float = ..., scale: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def gumbel( + self, + loc: _ArrayLikeFloat_co = ..., + scale: _ArrayLikeFloat_co = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def logistic(self, loc: float = ..., scale: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def logistic( + self, + loc: _ArrayLikeFloat_co = ..., + scale: _ArrayLikeFloat_co = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def lognormal(self, mean: float = ..., sigma: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def lognormal( + self, + mean: _ArrayLikeFloat_co = ..., + sigma: _ArrayLikeFloat_co = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def rayleigh(self, scale: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def rayleigh( + self, scale: _ArrayLikeFloat_co = ..., size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def wald(self, mean: float, scale: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def wald( + self, mean: _ArrayLikeFloat_co, scale: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def triangular(self, left: float, mode: float, right: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def triangular( + self, + left: _ArrayLikeFloat_co, + mode: _ArrayLikeFloat_co, + right: _ArrayLikeFloat_co, + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def binomial(self, n: int, p: float, size: None = ...) -> int: ... # type: ignore[misc] + @overload + def binomial( + self, n: _ArrayLikeInt_co, p: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[int64]]: ... + @overload + def negative_binomial(self, n: float, p: float, size: None = ...) -> int: ... # type: ignore[misc] + @overload + def negative_binomial( + self, n: _ArrayLikeFloat_co, p: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[int64]]: ... + @overload + def poisson(self, lam: float = ..., size: None = ...) -> int: ... # type: ignore[misc] + @overload + def poisson( + self, lam: _ArrayLikeFloat_co = ..., size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[int64]]: ... + @overload + def zipf(self, a: float, size: None = ...) -> int: ... # type: ignore[misc] + @overload + def zipf( + self, a: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[int64]]: ... + @overload + def geometric(self, p: float, size: None = ...) -> int: ... # type: ignore[misc] + @overload + def geometric( + self, p: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[int64]]: ... + @overload + def hypergeometric(self, ngood: int, nbad: int, nsample: int, size: None = ...) -> int: ... # type: ignore[misc] + @overload + def hypergeometric( + self, + ngood: _ArrayLikeInt_co, + nbad: _ArrayLikeInt_co, + nsample: _ArrayLikeInt_co, + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[int64]]: ... + @overload + def logseries(self, p: float, size: None = ...) -> int: ... # type: ignore[misc] + @overload + def logseries( + self, p: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[int64]]: ... + def multivariate_normal( + self, + mean: _ArrayLikeFloat_co, + cov: _ArrayLikeFloat_co, + size: Optional[_ShapeLike] = ..., + check_valid: Literal["warn", "raise", "ignore"] = ..., + tol: float = ..., + *, + method: Literal["svd", "eigh", "cholesky"] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + def multinomial( + self, n: _ArrayLikeInt_co, + pvals: _ArrayLikeFloat_co, + size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[int64]]: ... + def multivariate_hypergeometric( + self, + colors: _ArrayLikeInt_co, + nsample: int, + size: Optional[_ShapeLike] = ..., + method: Literal["marginals", "count"] = ..., + ) -> ndarray[Any, dtype[int64]]: ... + def dirichlet( + self, alpha: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + def permuted( + self, x: ArrayLike, *, axis: Optional[int] = ..., out: Optional[ndarray[Any, Any]] = ... + ) -> ndarray[Any, Any]: ... + def shuffle(self, x: ArrayLike, axis: int = ...) -> None: ... + +def default_rng( + seed: Union[None, _ArrayLikeInt_co, SeedSequence, BitGenerator, Generator] = ... +) -> Generator: ... diff --git a/.local/lib/python3.8/site-packages/numpy/random/_mt19937.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/random/_mt19937.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..081de15 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/_mt19937.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/_mt19937.pyi b/.local/lib/python3.8/site-packages/numpy/random/_mt19937.pyi new file mode 100644 index 0000000..820f273 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_mt19937.pyi @@ -0,0 +1,22 @@ +from typing import Any, Union, TypedDict + +from numpy import dtype, ndarray, uint32 +from numpy.random.bit_generator import BitGenerator, SeedSequence +from numpy.typing import _ArrayLikeInt_co + +class _MT19937Internal(TypedDict): + key: ndarray[Any, dtype[uint32]] + pos: int + +class _MT19937State(TypedDict): + bit_generator: str + state: _MT19937Internal + +class MT19937(BitGenerator): + def __init__(self, seed: Union[None, _ArrayLikeInt_co, SeedSequence] = ...) -> None: ... + def _legacy_seeding(self, seed: _ArrayLikeInt_co) -> None: ... + def jumped(self, jumps: int = ...) -> MT19937: ... + @property + def state(self) -> _MT19937State: ... + @state.setter + def state(self, value: _MT19937State) -> None: ... diff --git a/.local/lib/python3.8/site-packages/numpy/random/_pcg64.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/random/_pcg64.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..8de5ad9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/_pcg64.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/_pcg64.pyi b/.local/lib/python3.8/site-packages/numpy/random/_pcg64.pyi new file mode 100644 index 0000000..4881a98 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_pcg64.pyi @@ -0,0 +1,42 @@ +from typing import Union, TypedDict + +from numpy.random.bit_generator import BitGenerator, SeedSequence +from numpy.typing import _ArrayLikeInt_co + +class _PCG64Internal(TypedDict): + state: int + inc: int + +class _PCG64State(TypedDict): + bit_generator: str + state: _PCG64Internal + has_uint32: int + uinteger: int + +class PCG64(BitGenerator): + def __init__(self, seed: Union[None, _ArrayLikeInt_co, SeedSequence] = ...) -> None: ... + def jumped(self, jumps: int = ...) -> PCG64: ... + @property + def state( + self, + ) -> _PCG64State: ... + @state.setter + def state( + self, + value: _PCG64State, + ) -> None: ... + def advance(self, delta: int) -> PCG64: ... + +class PCG64DXSM(BitGenerator): + def __init__(self, seed: Union[None, _ArrayLikeInt_co, SeedSequence] = ...) -> None: ... + def jumped(self, jumps: int = ...) -> PCG64DXSM: ... + @property + def state( + self, + ) -> _PCG64State: ... + @state.setter + def state( + self, + value: _PCG64State, + ) -> None: ... + def advance(self, delta: int) -> PCG64DXSM: ... diff --git a/.local/lib/python3.8/site-packages/numpy/random/_philox.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/random/_philox.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..57b1c4a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/_philox.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/_philox.pyi b/.local/lib/python3.8/site-packages/numpy/random/_philox.pyi new file mode 100644 index 0000000..dd1c5e6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_philox.pyi @@ -0,0 +1,36 @@ +from typing import Any, Union, TypedDict + +from numpy import dtype, ndarray, uint64 +from numpy.random.bit_generator import BitGenerator, SeedSequence +from numpy.typing import _ArrayLikeInt_co + +class _PhiloxInternal(TypedDict): + counter: ndarray[Any, dtype[uint64]] + key: ndarray[Any, dtype[uint64]] + +class _PhiloxState(TypedDict): + bit_generator: str + state: _PhiloxInternal + buffer: ndarray[Any, dtype[uint64]] + buffer_pos: int + has_uint32: int + uinteger: int + +class Philox(BitGenerator): + def __init__( + self, + seed: Union[None, _ArrayLikeInt_co, SeedSequence] = ..., + counter: Union[None, _ArrayLikeInt_co] = ..., + key: Union[None, _ArrayLikeInt_co] = ..., + ) -> None: ... + @property + def state( + self, + ) -> _PhiloxState: ... + @state.setter + def state( + self, + value: _PhiloxState, + ) -> None: ... + def jumped(self, jumps: int = ...) -> Philox: ... + def advance(self, delta: int) -> Philox: ... diff --git a/.local/lib/python3.8/site-packages/numpy/random/_pickle.py b/.local/lib/python3.8/site-packages/numpy/random/_pickle.py new file mode 100644 index 0000000..a32f64f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_pickle.py @@ -0,0 +1,83 @@ +from .mtrand import RandomState +from ._philox import Philox +from ._pcg64 import PCG64, PCG64DXSM +from ._sfc64 import SFC64 + +from ._generator import Generator +from ._mt19937 import MT19937 + +BitGenerators = {'MT19937': MT19937, + 'PCG64': PCG64, + 'PCG64DXSM': PCG64DXSM, + 'Philox': Philox, + 'SFC64': SFC64, + } + + +def __generator_ctor(bit_generator_name='MT19937'): + """ + Pickling helper function that returns a Generator object + + Parameters + ---------- + bit_generator_name : str + String containing the core BitGenerator + + Returns + ------- + rg: Generator + Generator using the named core BitGenerator + """ + if bit_generator_name in BitGenerators: + bit_generator = BitGenerators[bit_generator_name] + else: + raise ValueError(str(bit_generator_name) + ' is not a known ' + 'BitGenerator module.') + + return Generator(bit_generator()) + + +def __bit_generator_ctor(bit_generator_name='MT19937'): + """ + Pickling helper function that returns a bit generator object + + Parameters + ---------- + bit_generator_name : str + String containing the name of the BitGenerator + + Returns + ------- + bit_generator: BitGenerator + BitGenerator instance + """ + if bit_generator_name in BitGenerators: + bit_generator = BitGenerators[bit_generator_name] + else: + raise ValueError(str(bit_generator_name) + ' is not a known ' + 'BitGenerator module.') + + return bit_generator() + + +def __randomstate_ctor(bit_generator_name='MT19937'): + """ + Pickling helper function that returns a legacy RandomState-like object + + Parameters + ---------- + bit_generator_name : str + String containing the core BitGenerator + + Returns + ------- + rs: RandomState + Legacy RandomState using the named core BitGenerator + """ + if bit_generator_name in BitGenerators: + bit_generator = BitGenerators[bit_generator_name] + else: + raise ValueError(str(bit_generator_name) + ' is not a known ' + 'BitGenerator module.') + + return RandomState(bit_generator()) diff --git a/.local/lib/python3.8/site-packages/numpy/random/_sfc64.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/random/_sfc64.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..fa34c28 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/_sfc64.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/_sfc64.pyi b/.local/lib/python3.8/site-packages/numpy/random/_sfc64.pyi new file mode 100644 index 0000000..94d11a2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/_sfc64.pyi @@ -0,0 +1,28 @@ +from typing import Any, Union, TypedDict + +from numpy import dtype as dtype +from numpy import ndarray as ndarray +from numpy import uint64 +from numpy.random.bit_generator import BitGenerator, SeedSequence +from numpy.typing import _ArrayLikeInt_co + +class _SFC64Internal(TypedDict): + state: ndarray[Any, dtype[uint64]] + +class _SFC64State(TypedDict): + bit_generator: str + state: _SFC64Internal + has_uint32: int + uinteger: int + +class SFC64(BitGenerator): + def __init__(self, seed: Union[None, _ArrayLikeInt_co, SeedSequence] = ...) -> None: ... + @property + def state( + self, + ) -> _SFC64State: ... + @state.setter + def state( + self, + value: _SFC64State, + ) -> None: ... diff --git a/.local/lib/python3.8/site-packages/numpy/random/bit_generator.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/random/bit_generator.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..00fdb8e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/bit_generator.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/bit_generator.pxd b/.local/lib/python3.8/site-packages/numpy/random/bit_generator.pxd new file mode 100644 index 0000000..dfa7d0a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/bit_generator.pxd @@ -0,0 +1,35 @@ +cimport numpy as np +from libc.stdint cimport uint32_t, uint64_t + +cdef extern from "numpy/random/bitgen.h": + struct bitgen: + void *state + uint64_t (*next_uint64)(void *st) nogil + uint32_t (*next_uint32)(void *st) nogil + double (*next_double)(void *st) nogil + uint64_t (*next_raw)(void *st) nogil + + ctypedef bitgen bitgen_t + +cdef class BitGenerator(): + cdef readonly object _seed_seq + cdef readonly object lock + cdef bitgen_t _bitgen + cdef readonly object _ctypes + cdef readonly object _cffi + cdef readonly object capsule + + +cdef class SeedSequence(): + cdef readonly object entropy + cdef readonly tuple spawn_key + cdef readonly Py_ssize_t pool_size + cdef readonly object pool + cdef readonly uint32_t n_children_spawned + + cdef mix_entropy(self, np.ndarray[np.npy_uint32, ndim=1] mixer, + np.ndarray[np.npy_uint32, ndim=1] entropy_array) + cdef get_assembled_entropy(self) + +cdef class SeedlessSequence(): + pass diff --git a/.local/lib/python3.8/site-packages/numpy/random/bit_generator.pyi b/.local/lib/python3.8/site-packages/numpy/random/bit_generator.pyi new file mode 100644 index 0000000..fa2f1ab --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/bit_generator.pyi @@ -0,0 +1,116 @@ +import abc +from threading import Lock +from typing import ( + Any, + Callable, + Dict, + List, + Mapping, + NamedTuple, + Optional, + Sequence, + Tuple, + Type, + TypedDict, + TypeVar, + Union, + overload, + Literal, +) + +from numpy import dtype, ndarray, uint32, uint64 +from numpy.typing import _ArrayLikeInt_co, _ShapeLike, _SupportsDType, _UInt32Codes, _UInt64Codes + +_T = TypeVar("_T") + +_DTypeLikeUint32 = Union[ + dtype[uint32], + _SupportsDType[dtype[uint32]], + Type[uint32], + _UInt32Codes, +] +_DTypeLikeUint64 = Union[ + dtype[uint64], + _SupportsDType[dtype[uint64]], + Type[uint64], + _UInt64Codes, +] + +class _SeedSeqState(TypedDict): + entropy: Union[None, int, Sequence[int]] + spawn_key: Tuple[int, ...] + pool_size: int + n_children_spawned: int + +class _Interface(NamedTuple): + state_address: Any + state: Any + next_uint64: Any + next_uint32: Any + next_double: Any + bit_generator: Any + +class ISeedSequence(abc.ABC): + @abc.abstractmethod + def generate_state( + self, n_words: int, dtype: Union[_DTypeLikeUint32, _DTypeLikeUint64] = ... + ) -> ndarray[Any, dtype[Union[uint32, uint64]]]: ... + +class ISpawnableSeedSequence(ISeedSequence): + @abc.abstractmethod + def spawn(self: _T, n_children: int) -> List[_T]: ... + +class SeedlessSeedSequence(ISpawnableSeedSequence): + def generate_state( + self, n_words: int, dtype: Union[_DTypeLikeUint32, _DTypeLikeUint64] = ... + ) -> ndarray[Any, dtype[Union[uint32, uint64]]]: ... + def spawn(self: _T, n_children: int) -> List[_T]: ... + +class SeedSequence(ISpawnableSeedSequence): + entropy: Union[None, int, Sequence[int]] + spawn_key: Tuple[int, ...] + pool_size: int + n_children_spawned: int + pool: ndarray[Any, dtype[uint32]] + def __init__( + self, + entropy: Union[None, int, Sequence[int], _ArrayLikeInt_co] = ..., + *, + spawn_key: Sequence[int] = ..., + pool_size: int = ..., + n_children_spawned: int = ..., + ) -> None: ... + def __repr__(self) -> str: ... + @property + def state( + self, + ) -> _SeedSeqState: ... + def generate_state( + self, n_words: int, dtype: Union[_DTypeLikeUint32, _DTypeLikeUint64] = ... + ) -> ndarray[Any, dtype[Union[uint32, uint64]]]: ... + def spawn(self, n_children: int) -> List[SeedSequence]: ... + +class BitGenerator(abc.ABC): + lock: Lock + def __init__(self, seed: Union[None, _ArrayLikeInt_co, SeedSequence] = ...) -> None: ... + def __getstate__(self) -> Dict[str, Any]: ... + def __setstate__(self, state: Dict[str, Any]) -> None: ... + def __reduce__( + self, + ) -> Tuple[Callable[[str], BitGenerator], Tuple[str], Tuple[Dict[str, Any]]]: ... + @abc.abstractmethod + @property + def state(self) -> Mapping[str, Any]: ... + @state.setter + def state(self, value: Mapping[str, Any]) -> None: ... + @overload + def random_raw(self, size: None = ..., output: Literal[True] = ...) -> int: ... # type: ignore[misc] + @overload + def random_raw(self, size: _ShapeLike = ..., output: Literal[True] = ...) -> ndarray[Any, dtype[uint64]]: ... # type: ignore[misc] + @overload + def random_raw(self, size: Optional[_ShapeLike] = ..., output: Literal[False] = ...) -> None: ... # type: ignore[misc] + def _benchmark(self, cnt: int, method: str = ...) -> None: ... + @property + def ctypes(self) -> _Interface: ... + @property + def cffi(self) -> _Interface: ... diff --git a/.local/lib/python3.8/site-packages/numpy/random/c_distributions.pxd b/.local/lib/python3.8/site-packages/numpy/random/c_distributions.pxd new file mode 100644 index 0000000..6f905ed --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/c_distributions.pxd @@ -0,0 +1,114 @@ +#!python +#cython: wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3 +from numpy cimport npy_intp + +from libc.stdint cimport (uint64_t, int32_t, int64_t) +from numpy.random cimport bitgen_t + +cdef extern from "numpy/random/distributions.h": + + struct s_binomial_t: + int has_binomial + double psave + int64_t nsave + double r + double q + double fm + int64_t m + double p1 + double xm + double xl + double xr + double c + double laml + double lamr + double p2 + double p3 + double p4 + + ctypedef s_binomial_t binomial_t + + double random_standard_uniform(bitgen_t *bitgen_state) nogil + void random_standard_uniform_fill(bitgen_t* bitgen_state, npy_intp cnt, double *out) nogil + double random_standard_exponential(bitgen_t *bitgen_state) nogil + double random_standard_exponential_f(bitgen_t *bitgen_state) nogil + void random_standard_exponential_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out) nogil + void random_standard_exponential_fill_f(bitgen_t *bitgen_state, npy_intp cnt, double *out) nogil + void random_standard_exponential_inv_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out) nogil + void random_standard_exponential_inv_fill_f(bitgen_t *bitgen_state, npy_intp cnt, double *out) nogil + double random_standard_normal(bitgen_t* bitgen_state) nogil + void random_standard_normal_fill(bitgen_t *bitgen_state, npy_intp count, double *out) nogil + void random_standard_normal_fill_f(bitgen_t *bitgen_state, npy_intp count, float *out) nogil + double random_standard_gamma(bitgen_t *bitgen_state, double shape) nogil + + float random_standard_uniform_f(bitgen_t *bitgen_state) nogil + void random_standard_uniform_fill_f(bitgen_t* bitgen_state, npy_intp cnt, float *out) nogil + float random_standard_normal_f(bitgen_t* bitgen_state) nogil + float random_standard_gamma_f(bitgen_t *bitgen_state, float shape) nogil + + int64_t random_positive_int64(bitgen_t *bitgen_state) nogil + int32_t random_positive_int32(bitgen_t *bitgen_state) nogil + int64_t random_positive_int(bitgen_t *bitgen_state) nogil + uint64_t random_uint(bitgen_t *bitgen_state) nogil + + double random_normal(bitgen_t *bitgen_state, double loc, double scale) nogil + + double random_gamma(bitgen_t *bitgen_state, double shape, double scale) nogil + float random_gamma_f(bitgen_t *bitgen_state, float shape, float scale) nogil + + double random_exponential(bitgen_t *bitgen_state, double scale) nogil + double random_uniform(bitgen_t *bitgen_state, double lower, double range) nogil + double random_beta(bitgen_t *bitgen_state, double a, double b) nogil + double random_chisquare(bitgen_t *bitgen_state, double df) nogil + double random_f(bitgen_t *bitgen_state, double dfnum, double dfden) nogil + double random_standard_cauchy(bitgen_t *bitgen_state) nogil + double random_pareto(bitgen_t *bitgen_state, double a) nogil + double random_weibull(bitgen_t *bitgen_state, double a) nogil + double random_power(bitgen_t *bitgen_state, double a) nogil + double random_laplace(bitgen_t *bitgen_state, double loc, double scale) nogil + double random_gumbel(bitgen_t *bitgen_state, double loc, double scale) nogil + double random_logistic(bitgen_t *bitgen_state, double loc, double scale) nogil + double random_lognormal(bitgen_t *bitgen_state, double mean, double sigma) nogil + double random_rayleigh(bitgen_t *bitgen_state, double mode) nogil + double random_standard_t(bitgen_t *bitgen_state, double df) nogil + double random_noncentral_chisquare(bitgen_t *bitgen_state, double df, + double nonc) nogil + double random_noncentral_f(bitgen_t *bitgen_state, double dfnum, + double dfden, double nonc) nogil + double random_wald(bitgen_t *bitgen_state, double mean, double scale) nogil + double random_vonmises(bitgen_t *bitgen_state, double mu, double kappa) nogil + double random_triangular(bitgen_t *bitgen_state, double left, double mode, + double right) nogil + + int64_t random_poisson(bitgen_t *bitgen_state, double lam) nogil + int64_t random_negative_binomial(bitgen_t *bitgen_state, double n, double p) nogil + int64_t random_binomial(bitgen_t *bitgen_state, double p, int64_t n, binomial_t *binomial) nogil + int64_t random_logseries(bitgen_t *bitgen_state, double p) nogil + int64_t random_geometric_search(bitgen_t *bitgen_state, double p) nogil + int64_t random_geometric_inversion(bitgen_t *bitgen_state, double p) nogil + int64_t random_geometric(bitgen_t *bitgen_state, double p) nogil + int64_t random_zipf(bitgen_t *bitgen_state, double a) nogil + int64_t random_hypergeometric(bitgen_t *bitgen_state, int64_t good, int64_t bad, + int64_t sample) nogil + + uint64_t random_interval(bitgen_t *bitgen_state, uint64_t max) nogil + + # Generate random uint64 numbers in closed interval [off, off + rng]. + uint64_t random_bounded_uint64(bitgen_t *bitgen_state, + uint64_t off, uint64_t rng, + uint64_t mask, bint use_masked) nogil + + void random_multinomial(bitgen_t *bitgen_state, int64_t n, int64_t *mnix, + double *pix, npy_intp d, binomial_t *binomial) nogil + + int random_multivariate_hypergeometric_count(bitgen_t *bitgen_state, + int64_t total, + size_t num_colors, int64_t *colors, + int64_t nsample, + size_t num_variates, int64_t *variates) nogil + void random_multivariate_hypergeometric_marginals(bitgen_t *bitgen_state, + int64_t total, + size_t num_colors, int64_t *colors, + int64_t nsample, + size_t num_variates, int64_t *variates) nogil + diff --git a/.local/lib/python3.8/site-packages/numpy/random/lib/libnpyrandom.a b/.local/lib/python3.8/site-packages/numpy/random/lib/libnpyrandom.a new file mode 100644 index 0000000..1f85872 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/lib/libnpyrandom.a differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/mtrand.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/numpy/random/mtrand.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..5dbbc0d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/mtrand.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/mtrand.pyi b/.local/lib/python3.8/site-packages/numpy/random/mtrand.pyi new file mode 100644 index 0000000..cbe87a2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/mtrand.pyi @@ -0,0 +1,573 @@ +from typing import Any, Callable, Dict, Optional, Tuple, Type, Union, overload, Literal + +from numpy import ( + bool_, + dtype, + float32, + float64, + int8, + int16, + int32, + int64, + int_, + ndarray, + uint, + uint8, + uint16, + uint32, + uint64, +) +from numpy.random.bit_generator import BitGenerator +from numpy.typing import ( + ArrayLike, + _ArrayLikeFloat_co, + _ArrayLikeInt_co, + _DoubleCodes, + _DTypeLikeBool, + _DTypeLikeInt, + _DTypeLikeUInt, + _Float32Codes, + _Float64Codes, + _Int8Codes, + _Int16Codes, + _Int32Codes, + _Int64Codes, + _IntCodes, + _ShapeLike, + _SingleCodes, + _SupportsDType, + _UInt8Codes, + _UInt16Codes, + _UInt32Codes, + _UInt64Codes, + _UIntCodes, +) + +_DTypeLikeFloat32 = Union[ + dtype[float32], + _SupportsDType[dtype[float32]], + Type[float32], + _Float32Codes, + _SingleCodes, +] + +_DTypeLikeFloat64 = Union[ + dtype[float64], + _SupportsDType[dtype[float64]], + Type[float], + Type[float64], + _Float64Codes, + _DoubleCodes, +] + +class RandomState: + _bit_generator: BitGenerator + def __init__(self, seed: Union[None, _ArrayLikeInt_co, BitGenerator] = ...) -> None: ... + def __repr__(self) -> str: ... + def __str__(self) -> str: ... + def __getstate__(self) -> Dict[str, Any]: ... + def __setstate__(self, state: Dict[str, Any]) -> None: ... + def __reduce__(self) -> Tuple[Callable[[str], RandomState], Tuple[str], Dict[str, Any]]: ... + def seed(self, seed: Optional[_ArrayLikeFloat_co] = ...) -> None: ... + @overload + def get_state(self, legacy: Literal[False] = ...) -> Dict[str, Any]: ... + @overload + def get_state( + self, legacy: Literal[True] = ... + ) -> Union[Dict[str, Any], Tuple[str, ndarray[Any, dtype[uint32]], int, int, float]]: ... + def set_state( + self, state: Union[Dict[str, Any], Tuple[str, ndarray[Any, dtype[uint32]], int, int, float]] + ) -> None: ... + @overload + def random_sample(self, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def random_sample(self, size: _ShapeLike = ...) -> ndarray[Any, dtype[float64]]: ... + @overload + def random(self, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def random(self, size: _ShapeLike = ...) -> ndarray[Any, dtype[float64]]: ... + @overload + def beta(self, a: float, b: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def beta( + self, a: _ArrayLikeFloat_co, b: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def exponential(self, scale: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def exponential( + self, scale: _ArrayLikeFloat_co = ..., size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_exponential(self, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def standard_exponential(self, size: _ShapeLike = ...) -> ndarray[Any, dtype[float64]]: ... + @overload + def tomaxint(self, size: None = ...) -> int: ... # type: ignore[misc] + @overload + def tomaxint(self, size: _ShapeLike = ...) -> ndarray[Any, dtype[int_]]: ... + @overload + def randint( # type: ignore[misc] + self, + low: int, + high: Optional[int] = ..., + ) -> int: ... + @overload + def randint( # type: ignore[misc] + self, + low: int, + high: Optional[int] = ..., + size: None = ..., + dtype: _DTypeLikeBool = ..., + ) -> bool: ... + @overload + def randint( # type: ignore[misc] + self, + low: int, + high: Optional[int] = ..., + size: None = ..., + dtype: Union[_DTypeLikeInt, _DTypeLikeUInt] = ..., + ) -> int: ... + @overload + def randint( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[int_]]: ... + @overload + def randint( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: _DTypeLikeBool = ..., + ) -> ndarray[Any, dtype[bool_]]: ... + @overload + def randint( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[dtype[int8], Type[int8], _Int8Codes, _SupportsDType[dtype[int8]]] = ..., + ) -> ndarray[Any, dtype[int8]]: ... + @overload + def randint( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[dtype[int16], Type[int16], _Int16Codes, _SupportsDType[dtype[int16]]] = ..., + ) -> ndarray[Any, dtype[int16]]: ... + @overload + def randint( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[dtype[int32], Type[int32], _Int32Codes, _SupportsDType[dtype[int32]]] = ..., + ) -> ndarray[Any, dtype[Union[int32]]]: ... + @overload + def randint( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Optional[ + Union[dtype[int64], Type[int64], _Int64Codes, _SupportsDType[dtype[int64]]] + ] = ..., + ) -> ndarray[Any, dtype[int64]]: ... + @overload + def randint( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[dtype[uint8], Type[uint8], _UInt8Codes, _SupportsDType[dtype[uint8]]] = ..., + ) -> ndarray[Any, dtype[uint8]]: ... + @overload + def randint( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[ + dtype[uint16], Type[uint16], _UInt16Codes, _SupportsDType[dtype[uint16]] + ] = ..., + ) -> ndarray[Any, dtype[Union[uint16]]]: ... + @overload + def randint( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[ + dtype[uint32], Type[uint32], _UInt32Codes, _SupportsDType[dtype[uint32]] + ] = ..., + ) -> ndarray[Any, dtype[uint32]]: ... + @overload + def randint( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[ + dtype[uint64], Type[uint64], _UInt64Codes, _SupportsDType[dtype[uint64]] + ] = ..., + ) -> ndarray[Any, dtype[uint64]]: ... + @overload + def randint( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[ + dtype[int_], Type[int], Type[int_], _IntCodes, _SupportsDType[dtype[int_]] + ] = ..., + ) -> ndarray[Any, dtype[int_]]: ... + @overload + def randint( # type: ignore[misc] + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + dtype: Union[dtype[uint], Type[uint], _UIntCodes, _SupportsDType[dtype[uint]]] = ..., + ) -> ndarray[Any, dtype[uint]]: ... + def bytes(self, length: int) -> bytes: ... + @overload + def choice( + self, + a: int, + size: None = ..., + replace: bool = ..., + p: Optional[_ArrayLikeFloat_co] = ..., + ) -> int: ... + @overload + def choice( + self, + a: int, + size: _ShapeLike = ..., + replace: bool = ..., + p: Optional[_ArrayLikeFloat_co] = ..., + ) -> ndarray[Any, dtype[int_]]: ... + @overload + def choice( + self, + a: ArrayLike, + size: None = ..., + replace: bool = ..., + p: Optional[_ArrayLikeFloat_co] = ..., + ) -> Any: ... + @overload + def choice( + self, + a: ArrayLike, + size: _ShapeLike = ..., + replace: bool = ..., + p: Optional[_ArrayLikeFloat_co] = ..., + ) -> ndarray[Any, Any]: ... + @overload + def uniform(self, low: float = ..., high: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def uniform( + self, + low: _ArrayLikeFloat_co = ..., + high: _ArrayLikeFloat_co = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def rand(self) -> float: ... + @overload + def rand(self, *args: int) -> ndarray[Any, dtype[float64]]: ... + @overload + def randn(self) -> float: ... + @overload + def randn(self, *args: int) -> ndarray[Any, dtype[float64]]: ... + @overload + def random_integers(self, low: int, high: Optional[int] = ..., size: None = ...) -> int: ... # type: ignore[misc] + @overload + def random_integers( + self, + low: _ArrayLikeInt_co, + high: Optional[_ArrayLikeInt_co] = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[int_]]: ... + @overload + def standard_normal(self, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def standard_normal( # type: ignore[misc] + self, size: _ShapeLike = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def normal(self, loc: float = ..., scale: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def normal( + self, + loc: _ArrayLikeFloat_co = ..., + scale: _ArrayLikeFloat_co = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_gamma( # type: ignore[misc] + self, + shape: float, + size: None = ..., + ) -> float: ... + @overload + def standard_gamma( + self, + shape: _ArrayLikeFloat_co, + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def gamma(self, shape: float, scale: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def gamma( + self, + shape: _ArrayLikeFloat_co, + scale: _ArrayLikeFloat_co = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def f(self, dfnum: float, dfden: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def f( + self, dfnum: _ArrayLikeFloat_co, dfden: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def noncentral_f(self, dfnum: float, dfden: float, nonc: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def noncentral_f( + self, + dfnum: _ArrayLikeFloat_co, + dfden: _ArrayLikeFloat_co, + nonc: _ArrayLikeFloat_co, + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def chisquare(self, df: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def chisquare( + self, df: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def noncentral_chisquare(self, df: float, nonc: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def noncentral_chisquare( + self, df: _ArrayLikeFloat_co, nonc: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_t(self, df: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def standard_t( + self, df: _ArrayLikeFloat_co, size: None = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_t( + self, df: _ArrayLikeFloat_co, size: _ShapeLike = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def vonmises(self, mu: float, kappa: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def vonmises( + self, mu: _ArrayLikeFloat_co, kappa: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def pareto(self, a: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def pareto( + self, a: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def weibull(self, a: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def weibull( + self, a: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def power(self, a: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def power( + self, a: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def standard_cauchy(self, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def standard_cauchy(self, size: _ShapeLike = ...) -> ndarray[Any, dtype[float64]]: ... + @overload + def laplace(self, loc: float = ..., scale: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def laplace( + self, + loc: _ArrayLikeFloat_co = ..., + scale: _ArrayLikeFloat_co = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def gumbel(self, loc: float = ..., scale: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def gumbel( + self, + loc: _ArrayLikeFloat_co = ..., + scale: _ArrayLikeFloat_co = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def logistic(self, loc: float = ..., scale: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def logistic( + self, + loc: _ArrayLikeFloat_co = ..., + scale: _ArrayLikeFloat_co = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def lognormal(self, mean: float = ..., sigma: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def lognormal( + self, + mean: _ArrayLikeFloat_co = ..., + sigma: _ArrayLikeFloat_co = ..., + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def rayleigh(self, scale: float = ..., size: None = ...) -> float: ... # type: ignore[misc] + @overload + def rayleigh( + self, scale: _ArrayLikeFloat_co = ..., size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def wald(self, mean: float, scale: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def wald( + self, mean: _ArrayLikeFloat_co, scale: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def triangular(self, left: float, mode: float, right: float, size: None = ...) -> float: ... # type: ignore[misc] + @overload + def triangular( + self, + left: _ArrayLikeFloat_co, + mode: _ArrayLikeFloat_co, + right: _ArrayLikeFloat_co, + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[float64]]: ... + @overload + def binomial(self, n: int, p: float, size: None = ...) -> int: ... # type: ignore[misc] + @overload + def binomial( + self, n: _ArrayLikeInt_co, p: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[int_]]: ... + @overload + def negative_binomial(self, n: float, p: float, size: None = ...) -> int: ... # type: ignore[misc] + @overload + def negative_binomial( + self, n: _ArrayLikeFloat_co, p: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[int_]]: ... + @overload + def poisson(self, lam: float = ..., size: None = ...) -> int: ... # type: ignore[misc] + @overload + def poisson( + self, lam: _ArrayLikeFloat_co = ..., size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[int_]]: ... + @overload + def zipf(self, a: float, size: None = ...) -> int: ... # type: ignore[misc] + @overload + def zipf( + self, a: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[int_]]: ... + @overload + def geometric(self, p: float, size: None = ...) -> int: ... # type: ignore[misc] + @overload + def geometric( + self, p: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[int_]]: ... + @overload + def hypergeometric(self, ngood: int, nbad: int, nsample: int, size: None = ...) -> int: ... # type: ignore[misc] + @overload + def hypergeometric( + self, + ngood: _ArrayLikeInt_co, + nbad: _ArrayLikeInt_co, + nsample: _ArrayLikeInt_co, + size: Optional[_ShapeLike] = ..., + ) -> ndarray[Any, dtype[int_]]: ... + @overload + def logseries(self, p: float, size: None = ...) -> int: ... # type: ignore[misc] + @overload + def logseries( + self, p: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[int_]]: ... + def multivariate_normal( + self, + mean: _ArrayLikeFloat_co, + cov: _ArrayLikeFloat_co, + size: Optional[_ShapeLike] = ..., + check_valid: Literal["warn", "raise", "ignore"] = ..., + tol: float = ..., + ) -> ndarray[Any, dtype[float64]]: ... + def multinomial( + self, n: _ArrayLikeInt_co, pvals: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[int_]]: ... + def dirichlet( + self, alpha: _ArrayLikeFloat_co, size: Optional[_ShapeLike] = ... + ) -> ndarray[Any, dtype[float64]]: ... + def shuffle(self, x: ArrayLike) -> None: ... + @overload + def permutation(self, x: int) -> ndarray[Any, dtype[int_]]: ... + @overload + def permutation(self, x: ArrayLike) -> ndarray[Any, Any]: ... + +_rand: RandomState + +beta = _rand.beta +binomial = _rand.binomial +bytes = _rand.bytes +chisquare = _rand.chisquare +choice = _rand.choice +dirichlet = _rand.dirichlet +exponential = _rand.exponential +f = _rand.f +gamma = _rand.gamma +get_state = _rand.get_state +geometric = _rand.geometric +gumbel = _rand.gumbel +hypergeometric = _rand.hypergeometric +laplace = _rand.laplace +logistic = _rand.logistic +lognormal = _rand.lognormal +logseries = _rand.logseries +multinomial = _rand.multinomial +multivariate_normal = _rand.multivariate_normal +negative_binomial = _rand.negative_binomial +noncentral_chisquare = _rand.noncentral_chisquare +noncentral_f = _rand.noncentral_f +normal = _rand.normal +pareto = _rand.pareto +permutation = _rand.permutation +poisson = _rand.poisson +power = _rand.power +rand = _rand.rand +randint = _rand.randint +randn = _rand.randn +random = _rand.random +random_integers = _rand.random_integers +random_sample = _rand.random_sample +rayleigh = _rand.rayleigh +seed = _rand.seed +set_state = _rand.set_state +shuffle = _rand.shuffle +standard_cauchy = _rand.standard_cauchy +standard_exponential = _rand.standard_exponential +standard_gamma = _rand.standard_gamma +standard_normal = _rand.standard_normal +standard_t = _rand.standard_t +triangular = _rand.triangular +uniform = _rand.uniform +vonmises = _rand.vonmises +wald = _rand.wald +weibull = _rand.weibull +zipf = _rand.zipf +# Two legacy that are trivial wrappers around random_sample +sample = _rand.random_sample +ranf = _rand.random_sample diff --git a/.local/lib/python3.8/site-packages/numpy/random/setup.py b/.local/lib/python3.8/site-packages/numpy/random/setup.py new file mode 100644 index 0000000..866c0cb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/setup.py @@ -0,0 +1,162 @@ +import os +import platform +import sys +from os.path import join + +from numpy.distutils.system_info import platform_bits + +is_msvc = (platform.platform().startswith('Windows') and + platform.python_compiler().startswith('MS')) + + +def configuration(parent_package='', top_path=None): + from numpy.distutils.misc_util import Configuration, get_mathlibs + config = Configuration('random', parent_package, top_path) + + def generate_libraries(ext, build_dir): + config_cmd = config.get_config_cmd() + libs = get_mathlibs() + if sys.platform == 'win32': + libs.extend(['Advapi32', 'Kernel32']) + ext.libraries.extend(libs) + return None + + # enable unix large file support on 32 bit systems + # (64 bit off_t, lseek -> lseek64 etc.) + if sys.platform[:3] == 'aix': + defs = [('_LARGE_FILES', None)] + else: + defs = [('_FILE_OFFSET_BITS', '64'), + ('_LARGEFILE_SOURCE', '1'), + ('_LARGEFILE64_SOURCE', '1')] + + defs.append(('NPY_NO_DEPRECATED_API', 0)) + config.add_subpackage('tests') + config.add_data_dir('tests/data') + config.add_data_dir('_examples') + + EXTRA_LINK_ARGS = [] + EXTRA_LIBRARIES = ['npyrandom'] + if os.name != 'nt': + # Math lib + EXTRA_LIBRARIES.append('m') + # Some bit generators exclude GCC inlining + EXTRA_COMPILE_ARGS = ['-U__GNUC_GNU_INLINE__'] + + if is_msvc and platform_bits == 32: + # 32-bit windows requires explicit sse2 option + EXTRA_COMPILE_ARGS += ['/arch:SSE2'] + elif not is_msvc: + # Some bit generators require c99 + EXTRA_COMPILE_ARGS += ['-std=c99'] + + # Use legacy integer variable sizes + LEGACY_DEFS = [('NP_RANDOM_LEGACY', '1')] + PCG64_DEFS = [] + # One can force emulated 128-bit arithmetic if one wants. + #PCG64_DEFS += [('PCG_FORCE_EMULATED_128BIT_MATH', '1')] + depends = ['__init__.pxd', 'c_distributions.pxd', 'bit_generator.pxd'] + + # npyrandom - a library like npymath + npyrandom_sources = [ + 'src/distributions/logfactorial.c', + 'src/distributions/distributions.c', + 'src/distributions/random_mvhg_count.c', + 'src/distributions/random_mvhg_marginals.c', + 'src/distributions/random_hypergeometric.c', + ] + + def gl_if_msvc(build_cmd): + """ Add flag if we are using MSVC compiler + + We can't see this in our scope, because we have not initialized the + distutils build command, so use this deferred calculation to run when + we are building the library. + """ + # Keep in sync with numpy/core/setup.py + if build_cmd.compiler.compiler_type == 'msvc': + # explicitly disable whole-program optimization + return ['/GL-'] + return [] + + config.add_installed_library('npyrandom', + sources=npyrandom_sources, + install_dir='lib', + build_info={ + 'include_dirs' : [], # empty list required for creating npyrandom.h + 'extra_compiler_args': [gl_if_msvc], + }) + + for gen in ['mt19937']: + # gen.pyx, src/gen/gen.c, src/gen/gen-jump.c + config.add_extension(f'_{gen}', + sources=[f'_{gen}.c', + f'src/{gen}/{gen}.c', + f'src/{gen}/{gen}-jump.c'], + include_dirs=['.', 'src', join('src', gen)], + libraries=EXTRA_LIBRARIES, + extra_compile_args=EXTRA_COMPILE_ARGS, + extra_link_args=EXTRA_LINK_ARGS, + depends=depends + [f'_{gen}.pyx'], + define_macros=defs, + ) + for gen in ['philox', 'pcg64', 'sfc64']: + # gen.pyx, src/gen/gen.c + _defs = defs + PCG64_DEFS if gen == 'pcg64' else defs + config.add_extension(f'_{gen}', + sources=[f'_{gen}.c', + f'src/{gen}/{gen}.c'], + include_dirs=['.', 'src', join('src', gen)], + libraries=EXTRA_LIBRARIES, + extra_compile_args=EXTRA_COMPILE_ARGS, + extra_link_args=EXTRA_LINK_ARGS, + depends=depends + [f'_{gen}.pyx', + 'bit_generator.pyx', 'bit_generator.pxd'], + define_macros=_defs, + ) + for gen in ['_common', 'bit_generator']: + # gen.pyx + config.add_extension(gen, + sources=[f'{gen}.c'], + libraries=EXTRA_LIBRARIES, + extra_compile_args=EXTRA_COMPILE_ARGS, + extra_link_args=EXTRA_LINK_ARGS, + include_dirs=['.', 'src'], + depends=depends + [f'{gen}.pyx', f'{gen}.pxd',], + define_macros=defs, + ) + config.add_data_files(f'{gen}.pxd') + for gen in ['_generator', '_bounded_integers']: + # gen.pyx, src/distributions/distributions.c + config.add_extension(gen, + sources=[f'{gen}.c'], + libraries=EXTRA_LIBRARIES + ['npymath'], + extra_compile_args=EXTRA_COMPILE_ARGS, + include_dirs=['.', 'src'], + extra_link_args=EXTRA_LINK_ARGS, + depends=depends + [f'{gen}.pyx'], + define_macros=defs, + ) + config.add_data_files('_bounded_integers.pxd') + mtrand_libs = ['m', 'npymath'] if os.name != 'nt' else ['npymath'] + config.add_extension('mtrand', + sources=['mtrand.c', + 'src/legacy/legacy-distributions.c', + 'src/distributions/distributions.c', + ], + include_dirs=['.', 'src', 'src/legacy'], + libraries=mtrand_libs, + extra_compile_args=EXTRA_COMPILE_ARGS, + extra_link_args=EXTRA_LINK_ARGS, + depends=depends + ['mtrand.pyx'], + define_macros=defs + LEGACY_DEFS, + ) + config.add_data_files(*depends) + config.add_data_files('*.pyi') + return config + + +if __name__ == '__main__': + from numpy.distutils.core import setup + + setup(configuration=configuration) diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/random/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..0ca54ca Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_direct.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_direct.cpython-38.pyc new file mode 100644 index 0000000..8708974 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_direct.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_extending.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_extending.cpython-38.pyc new file mode 100644 index 0000000..fb2791b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_extending.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_generator_mt19937.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_generator_mt19937.cpython-38.pyc new file mode 100644 index 0000000..b54ef2e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_generator_mt19937.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_generator_mt19937_regressions.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_generator_mt19937_regressions.cpython-38.pyc new file mode 100644 index 0000000..b8386a8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_generator_mt19937_regressions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_random.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_random.cpython-38.pyc new file mode 100644 index 0000000..192227b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_random.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_randomstate.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_randomstate.cpython-38.pyc new file mode 100644 index 0000000..d36b95a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_randomstate.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_randomstate_regression.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_randomstate_regression.cpython-38.pyc new file mode 100644 index 0000000..2080fde Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_randomstate_regression.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_regression.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_regression.cpython-38.pyc new file mode 100644 index 0000000..d5f90d9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_regression.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_seed_sequence.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_seed_sequence.cpython-38.pyc new file mode 100644 index 0000000..4ef068f Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_seed_sequence.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_smoke.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_smoke.cpython-38.pyc new file mode 100644 index 0000000..13bded3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/tests/__pycache__/test_smoke.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/data/__init__.py b/.local/lib/python3.8/site-packages/numpy/random/tests/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/data/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/random/tests/data/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..cab68fe Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/random/tests/data/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/data/mt19937-testset-1.csv b/.local/lib/python3.8/site-packages/numpy/random/tests/data/mt19937-testset-1.csv new file mode 100644 index 0000000..b97bfa6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/data/mt19937-testset-1.csv @@ -0,0 +1,1001 @@ +seed, 0xdeadbeaf +0, 0xc816921f +1, 0xb3623c6d +2, 0x5fa391bb +3, 0x40178d9 +4, 0x7dcc9811 +5, 0x548eb8e6 +6, 0x92ba3125 +7, 0x65fde68d +8, 0x2f81ec95 +9, 0xbd94f7a2 +10, 0xdc4d9bcc +11, 0xa672bf13 +12, 0xb41113e +13, 0xec7e0066 +14, 0x50239372 +15, 0xd9d66b1d +16, 0xab72a161 +17, 0xddc2e29f +18, 0x7ea29ab4 +19, 0x80d141ba +20, 0xb1c7edf1 +21, 0x44d29203 +22, 0xe224d98 +23, 0x5b3e9d26 +24, 0x14fd567c +25, 0x27d98c96 +26, 0x838779fc +27, 0x92a138a +28, 0x5d08965b +29, 0x531e0ad6 +30, 0x984ee8f4 +31, 0x1ed78539 +32, 0x32bd6d8d +33, 0xc37c8516 +34, 0x9aef5c6b +35, 0x3aacd139 +36, 0xd96ed154 +37, 0x489cd1ed +38, 0x2cba4b3b +39, 0x76c6ae72 +40, 0x2dae02b9 +41, 0x52ac5fd6 +42, 0xc2b5e265 +43, 0x630e6a28 +44, 0x3f560d5d +45, 0x9315bdf3 +46, 0xf1055aba +47, 0x840e42c6 +48, 0xf2099c6b +49, 0x15ff7696 +50, 0x7948d146 +51, 0x97342961 +52, 0x7a7a21c +53, 0xc66f4fb1 +54, 0x23c4103e +55, 0xd7321f98 +56, 0xeb7efb75 +57, 0xe02490b5 +58, 0x2aa02de +59, 0x8bee0bf7 +60, 0xfc2da059 +61, 0xae835034 +62, 0x678f2075 +63, 0x6d03094b +64, 0x56455e05 +65, 0x18b32373 +66, 0x8ff0356b +67, 0x1fe442fb +68, 0x3f1ab6c3 +69, 0xb6fd21b +70, 0xfc310eb2 +71, 0xb19e9a4d +72, 0x17ddee72 +73, 0xfd534251 +74, 0x9e500564 +75, 0x9013a036 +76, 0xcf08f118 +77, 0x6b6d5969 +78, 0x3ccf1977 +79, 0x7cc11497 +80, 0x651c6ac9 +81, 0x4d6b104b +82, 0x9a28314e +83, 0x14c237be +84, 0x9cfc8d52 +85, 0x2947fad5 +86, 0xd71eff49 +87, 0x5188730e +88, 0x4b894614 +89, 0xf4fa2a34 +90, 0x42f7cc69 +91, 0x4089c9e8 +92, 0xbf0bbfe4 +93, 0x3cea65c +94, 0xc6221207 +95, 0x1bb71a8f +96, 0x54843fe7 +97, 0xbc59de4c +98, 0x79c6ee64 +99, 0x14e57a26 +100, 0x68d88fe +101, 0x2b86ef64 +102, 0x8ffff3c1 +103, 0x5bdd573f +104, 0x85671813 +105, 0xefe32ca2 +106, 0x105ded1e +107, 0x90ca2769 +108, 0xb33963ac +109, 0x363fbbc3 +110, 0x3b3763ae +111, 0x1d50ab88 +112, 0xc9ec01eb +113, 0xc8bbeada +114, 0x5d704692 +115, 0x5fd9e40 +116, 0xe61c125 +117, 0x2fe05792 +118, 0xda8afb72 +119, 0x4cbaa653 +120, 0xdd2243df +121, 0x896fd3f5 +122, 0x5bc23db +123, 0xa1c4e807 +124, 0x57d1a24d +125, 0x66503ddc +126, 0xcf7c0838 +127, 0x19e034fc +128, 0x66807450 +129, 0xfc219b3b +130, 0xe8a843e7 +131, 0x9ce61f08 +132, 0x92b950d6 +133, 0xce955ec4 +134, 0xda0d1f0d +135, 0x960c6250 +136, 0x39552432 +137, 0xde845e84 +138, 0xff3b4b11 +139, 0x5d918e6f +140, 0xbb930df2 +141, 0x7cfb0993 +142, 0x5400e1e9 +143, 0x3bfa0954 +144, 0x7e2605fb +145, 0x11941591 +146, 0x887e6994 +147, 0xdc8bed45 +148, 0x45b3fb50 +149, 0xfbdf8358 +150, 0x41507468 +151, 0x34c87166 +152, 0x17f64d77 +153, 0x3bbaf4f8 +154, 0x4f26f37e +155, 0x4a56ebf2 +156, 0x81100f1 +157, 0x96d94eae +158, 0xca88fda5 +159, 0x2eef3a60 +160, 0x952afbd3 +161, 0x2bec88c7 +162, 0x52335c4b +163, 0x8296db8e +164, 0x4da7d00a +165, 0xc00ac899 +166, 0xadff8c72 +167, 0xbecf26cf +168, 0x8835c83c +169, 0x1d13c804 +170, 0xaa940ddc +171, 0x68222cfe +172, 0x4569c0e1 +173, 0x29077976 +174, 0x32d4a5af +175, 0xd31fcdef +176, 0xdc60682b +177, 0x7c95c368 +178, 0x75a70213 +179, 0x43021751 +180, 0x5e52e0a6 +181, 0xf7e190b5 +182, 0xee3e4bb +183, 0x2fe3b150 +184, 0xcf419c07 +185, 0x478a4570 +186, 0xe5c3ea50 +187, 0x417f30a8 +188, 0xf0cfdaa0 +189, 0xd1f7f738 +190, 0x2c70fc23 +191, 0x54fc89f9 +192, 0x444dcf01 +193, 0xec2a002d +194, 0xef0c3a88 +195, 0xde21be9 +196, 0x88ab3296 +197, 0x3028897c +198, 0x264b200b +199, 0xd8ae0706 +200, 0x9eef901a +201, 0xbd1b96e0 +202, 0xea71366c +203, 0x1465b694 +204, 0x5a794650 +205, 0x83df52d4 +206, 0x8262413d +207, 0x5bc148c0 +208, 0xe0ecd80c +209, 0x40649571 +210, 0xb4d2ee5f +211, 0xedfd7d09 +212, 0xa082e25f +213, 0xc62992d1 +214, 0xbc7e65ee +215, 0x5499cf8a +216, 0xac28f775 +217, 0x649840fb +218, 0xd4c54805 +219, 0x1d166ba6 +220, 0xbeb1171f +221, 0x45b66703 +222, 0x78c03349 +223, 0x38d2a6ff +224, 0x935cae8b +225, 0x1d07dc3f +226, 0x6c1ed365 +227, 0x579fc585 +228, 0x1320c0ec +229, 0x632757eb +230, 0xd265a397 +231, 0x70e9b6c2 +232, 0xc81e322c +233, 0xa27153cf +234, 0x2118ba19 +235, 0x514ec400 +236, 0x2bd0ecd6 +237, 0xc3e7dae3 +238, 0xfa39355e +239, 0x48f23cc1 +240, 0xbcf75948 +241, 0x53ccc70c +242, 0x75346423 +243, 0x951181e0 +244, 0x348e90df +245, 0x14365d7f +246, 0xfbc95d7a +247, 0xdc98a9e6 +248, 0xed202df7 +249, 0xa59ec913 +250, 0x6b6e9ae2 +251, 0x1697f265 +252, 0x15d322d0 +253, 0xa2e7ee0a +254, 0x88860b7e +255, 0x455d8b9d +256, 0x2f5c59cb +257, 0xac49c9f1 +258, 0xa6a6a039 +259, 0xc057f56b +260, 0xf1ff1208 +261, 0x5eb8dc9d +262, 0xe6702509 +263, 0xe238b0ed +264, 0x5ae32e3d +265, 0xa88ebbdf +266, 0xef885ae7 +267, 0xafa6d49b +268, 0xc94499e0 +269, 0x1a196325 +270, 0x88938da3 +271, 0x14f4345 +272, 0xd8e33637 +273, 0xa3551bd5 +274, 0x73fe35c7 +275, 0x9561e94b +276, 0xd673bf68 +277, 0x16134872 +278, 0x68c42f9f +279, 0xdf7574c8 +280, 0x8809bab9 +281, 0x1432cf69 +282, 0xafb66bf1 +283, 0xc184aa7b +284, 0xedbf2007 +285, 0xbd420ce1 +286, 0x761033a0 +287, 0xff7e351f +288, 0xd6c3780e +289, 0x5844416f +290, 0xc6c0ee1c +291, 0xd2e147db +292, 0x92ac601a +293, 0x393e846b +294, 0x18196cca +295, 0x54a22be +296, 0x32bab1c4 +297, 0x60365183 +298, 0x64fa342 +299, 0xca24a493 +300, 0xd8cc8b83 +301, 0x3faf102b +302, 0x6e09bb58 +303, 0x812f0ea +304, 0x592c95d8 +305, 0xe45ea4c5 +306, 0x23aebf83 +307, 0xbd9691d4 +308, 0xf47b4baa +309, 0x4ac7b487 +310, 0xcce18803 +311, 0x3377556e +312, 0x3ff8e6b6 +313, 0x99d22063 +314, 0x23250bec +315, 0x4e1f9861 +316, 0x8554249b +317, 0x8635c2fc +318, 0xe8426e8a +319, 0x966c29d8 +320, 0x270b6082 +321, 0x3180a8a1 +322, 0xe7e1668b +323, 0x7f868dc +324, 0xcf4c17cf +325, 0xe31de4d1 +326, 0xc8c8aff4 +327, 0xae8db704 +328, 0x3c928cc2 +329, 0xe12cd48 +330, 0xb33ecd04 +331, 0xb93d7cbe +332, 0x49c69d6a +333, 0x7d3bce64 +334, 0x86bc219 +335, 0x8408233b +336, 0x44dc7479 +337, 0xdf80d538 +338, 0xf3db02c3 +339, 0xbbbd31d7 +340, 0x121281f +341, 0x7521e9a3 +342, 0x8859675a +343, 0x75aa6502 +344, 0x430ed15b +345, 0xecf0a28d +346, 0x659774fd +347, 0xd58a2311 +348, 0x512389a9 +349, 0xff65e1ff +350, 0xb6ddf222 +351, 0xe3458895 +352, 0x8b13cd6e +353, 0xd4a22870 +354, 0xe604c50c +355, 0x27f54f26 +356, 0x8f7f422f +357, 0x9735b4cf +358, 0x414072b0 +359, 0x76a1c6d5 +360, 0xa2208c06 +361, 0x83cd0f61 +362, 0x6c4f7ead +363, 0x6553cf76 +364, 0xeffcf44 +365, 0x7f434a3f +366, 0x9dc364bd +367, 0x3cdf52ed +368, 0xad597594 +369, 0x9c3e211b +370, 0x6c04a33f +371, 0x885dafa6 +372, 0xbbdaca71 +373, 0x7ae5dd5c +374, 0x37675644 +375, 0x251853c6 +376, 0x130b086b +377, 0x143fa54b +378, 0x54cdc282 +379, 0x9faff5b3 +380, 0x502a5c8b +381, 0xd9524550 +382, 0xae221aa6 +383, 0x55cf759b +384, 0x24782da4 +385, 0xd715d815 +386, 0x250ea09a +387, 0x4e0744ac +388, 0x11e15814 +389, 0xabe5f9df +390, 0xc8146350 +391, 0xfba67d9b +392, 0x2b82e42f +393, 0xd4ea96fc +394, 0x5ffc179e +395, 0x1598bafe +396, 0x7fb6d662 +397, 0x1a12a0db +398, 0x450cee4a +399, 0x85f8e12 +400, 0xce71b594 +401, 0xd4bb1d19 +402, 0x968f379d +403, 0x54cc1d52 +404, 0x467e6066 +405, 0x7da5f9a9 +406, 0x70977034 +407, 0x49e65c4b +408, 0xd08570d1 +409, 0x7acdf60b +410, 0xdffa038b +411, 0x9ce14e4c +412, 0x107cbbf8 +413, 0xdd746ca0 +414, 0xc6370a46 +415, 0xe7f83312 +416, 0x373fa9ce +417, 0xd822a2c6 +418, 0x1d4efea6 +419, 0xc53dcadb +420, 0x9b4e898f +421, 0x71daa6bf +422, 0x7a0bc78b +423, 0xd7b86f50 +424, 0x1b8b3286 +425, 0xcf9425dd +426, 0xd5263220 +427, 0x4ea0b647 +428, 0xc767fe64 +429, 0xcfc5e67 +430, 0xcc6a2942 +431, 0xa51eff00 +432, 0x76092e1b +433, 0xf606e80f +434, 0x824b5e20 +435, 0xebb55e14 +436, 0x783d96a6 +437, 0x10696512 +438, 0x17ee510a +439, 0x3ab70a1f +440, 0xcce6b210 +441, 0x8f72f0fb +442, 0xf0610b41 +443, 0x83d01fb5 +444, 0x6b3de36 +445, 0xe4c2e84f +446, 0x9c43bb15 +447, 0xddf2905 +448, 0x7dd63556 +449, 0x3662ca09 +450, 0xfb81f35b +451, 0xc2c8a72a +452, 0x8e93c37 +453, 0xa93da2d4 +454, 0xa03af8f1 +455, 0x8d75159a +456, 0x15f010b0 +457, 0xa296ab06 +458, 0xe55962ba +459, 0xeae700a9 +460, 0xe388964a +461, 0x917f2bec +462, 0x1c203fea +463, 0x792a01ba +464, 0xa93a80ac +465, 0x9eb8a197 +466, 0x56c0bc73 +467, 0xb8f05799 +468, 0xf429a8c8 +469, 0xb92cee42 +470, 0xf8864ec +471, 0x62f2518a +472, 0x3a7bfa3e +473, 0x12e56e6d +474, 0xd7a18313 +475, 0x41fa3899 +476, 0xa09c4956 +477, 0xebcfd94a +478, 0xc485f90b +479, 0x4391ce40 +480, 0x742a3333 +481, 0xc932f9e5 +482, 0x75c6c263 +483, 0x80937f0 +484, 0xcf21833c +485, 0x16027520 +486, 0xd42e669f +487, 0xb0f01fb7 +488, 0xb35896f1 +489, 0x763737a9 +490, 0x1bb20209 +491, 0x3551f189 +492, 0x56bc2602 +493, 0xb6eacf4 +494, 0x42ec4d11 +495, 0x245cc68 +496, 0xc27ac43b +497, 0x9d903466 +498, 0xce3f0c05 +499, 0xb708c31c +500, 0xc0fd37eb +501, 0x95938b2c +502, 0xf20175a7 +503, 0x4a86ee9b +504, 0xbe039a58 +505, 0xd41cabe7 +506, 0x83bc99ba +507, 0x761d60e1 +508, 0x7737cc2e +509, 0x2b82fc4b +510, 0x375aa401 +511, 0xfe9597a0 +512, 0x5543806a +513, 0x44f31238 +514, 0x7df31538 +515, 0x74cfa770 +516, 0x8755d881 +517, 0x1fde665a +518, 0xda8bf315 +519, 0x973d8e95 +520, 0x72205228 +521, 0x8fe59717 +522, 0x7bb90b34 +523, 0xef6ed945 +524, 0x16fd4a38 +525, 0x5db44de1 +526, 0xf09f93b3 +527, 0xe84824cc +528, 0x945bb50e +529, 0xd0be4aa5 +530, 0x47c277c2 +531, 0xd3800c28 +532, 0xac1c33ec +533, 0xd3dacce +534, 0x811c8387 +535, 0x6761b36 +536, 0x70d3882f +537, 0xd6e62e3a +538, 0xea25daa2 +539, 0xb07f39d1 +540, 0x391d89d7 +541, 0x84b6fb5e +542, 0x3dda3fca +543, 0x229e80a4 +544, 0x3d94a4b7 +545, 0x5d3d576a +546, 0xad7818a0 +547, 0xce23b03a +548, 0x7aa2079c +549, 0x9a6be555 +550, 0x83f3b34a +551, 0x1848f9d9 +552, 0xd8fefc1c +553, 0x48e6ce48 +554, 0x52e55750 +555, 0xf41a71cf +556, 0xba08e259 +557, 0xfaf06a15 +558, 0xeaaac0fb +559, 0x34f90098 +560, 0xb1dfffbb +561, 0x718daec2 +562, 0xab4dda21 +563, 0xd27cc1ee +564, 0x4aafbc4c +565, 0x356dfb4f +566, 0x83fcdfd6 +567, 0x8f0bcde0 +568, 0x4363f844 +569, 0xadc0f4d5 +570, 0x3bde994e +571, 0x3884d452 +572, 0x21876b4a +573, 0x9c985398 +574, 0xca55a226 +575, 0x3a88c583 +576, 0x916dc33c +577, 0x8f67d1d7 +578, 0x3b26a667 +579, 0xe4ddeb4b +580, 0x1a9d8c33 +581, 0x81c9b74f +582, 0x9ed1e9df +583, 0x6e61aecf +584, 0x95e95a5d +585, 0x68864ff5 +586, 0xb8fa5b9 +587, 0x72b1b3de +588, 0x5e18a86b +589, 0xd7f2337d +590, 0xd70e0925 +591, 0xb573a4c1 +592, 0xc77b3f8a +593, 0x389b20de +594, 0x16cf6afb +595, 0xa39bd275 +596, 0xf491cf01 +597, 0x6f88a802 +598, 0x8510af05 +599, 0xe7cd549a +600, 0x8603179a +601, 0xef43f191 +602, 0xf9b64c60 +603, 0xb00254a7 +604, 0xd7c06a2d +605, 0x17e9380b +606, 0x529e727b +607, 0xaaa8fe0a +608, 0xfb64ff4c +609, 0xcd75af26 +610, 0xfb717c87 +611, 0xa0789899 +612, 0x10391ec9 +613, 0x7e9b40b3 +614, 0x18536554 +615, 0x728c05f7 +616, 0x787dca98 +617, 0xad948d1 +618, 0x44c18def +619, 0x3303f2ec +620, 0xa15acb5 +621, 0xb58d38f4 +622, 0xfe041ef8 +623, 0xd151a956 +624, 0x7b9168e8 +625, 0x5ebeca06 +626, 0x90fe95df +627, 0xf76875aa +628, 0xb2e0d664 +629, 0x2e3253b7 +630, 0x68e34469 +631, 0x1f0c2d89 +632, 0x13a34ac2 +633, 0x5ffeb841 +634, 0xe381e91c +635, 0xb8549a92 +636, 0x3f35cf1 +637, 0xda0f9dcb +638, 0xdd9828a6 +639, 0xe1428f29 +640, 0xf4db80b5 +641, 0xdac30af5 +642, 0x1af1dd17 +643, 0x9a540254 +644, 0xcab68a38 +645, 0x33560361 +646, 0x2fbf3886 +647, 0xbc785923 +648, 0xe081cd10 +649, 0x8e473356 +650, 0xd102c357 +651, 0xeea4fe48 +652, 0x248d3453 +653, 0x1da79ac +654, 0x815a65ff +655, 0x27693e76 +656, 0xb7d5af40 +657, 0x6d245d30 +658, 0x9e06fa8f +659, 0xb0570dcb +660, 0x469f0005 +661, 0x3e0ca132 +662, 0xd89bbf3 +663, 0xd61ccd47 +664, 0x6383878 +665, 0x62b5956 +666, 0x4dc83675 +667, 0x93fd8492 +668, 0x5a0091f5 +669, 0xc9f9bc3 +670, 0xa26e7778 +671, 0xeabf2d01 +672, 0xe612dc06 +673, 0x85d89ff9 +674, 0xd1763179 +675, 0xcb88947b +676, 0x9e8757a5 +677, 0xe100e85c +678, 0x904166eb +679, 0x4996243d +680, 0x4038e1cb +681, 0x2be2c63d +682, 0x77017e81 +683, 0x3b1f556b +684, 0x1c785c77 +685, 0x6869b8bd +686, 0xe1217ed4 +687, 0x4012ab2f +688, 0xc06c0d8e +689, 0x2122eb68 +690, 0xad1783fd +691, 0x5f0c80e3 +692, 0x828f7efa +693, 0x29328399 +694, 0xeadf1087 +695, 0x85dc0037 +696, 0x9691ef26 +697, 0xc0947a53 +698, 0x2a178d2a +699, 0x2a2c7e8f +700, 0x90378380 +701, 0xaad8d326 +702, 0x9cf1c3c8 +703, 0x84eccd44 +704, 0x79e61808 +705, 0x8b3f454e +706, 0x209e6e1 +707, 0x51f88378 +708, 0xc210226f +709, 0xd982adb5 +710, 0x55d44a31 +711, 0x9817d443 +712, 0xa328c626 +713, 0x13455966 +714, 0xb8f681d3 +715, 0x2a3c713b +716, 0xc186959b +717, 0x814a74b0 +718, 0xed7bc90 +719, 0xa88d3d6d +720, 0x88a9f561 +721, 0x73aa1c0a +722, 0xdfeff404 +723, 0xec037e4b +724, 0xa5c209f0 +725, 0xb3a223b4 +726, 0x24ce3709 +727, 0x3184c790 +728, 0xa1398c62 +729, 0x2f92034e +730, 0xbb37a79a +731, 0x605287b4 +732, 0x8faa772c +733, 0x6ce56c1d +734, 0xc035fb4c +735, 0x7cf5b316 +736, 0x6502645 +737, 0xa283d810 +738, 0x778bc2f1 +739, 0xfdf99313 +740, 0x1f513265 +741, 0xbd3837e2 +742, 0x9b84a9a +743, 0x2139ce91 +744, 0x61a8e890 +745, 0xf9ff12db +746, 0xb43d2ea7 +747, 0x88532e61 +748, 0x175a6655 +749, 0x7a6c4f72 +750, 0x6dafc1b7 +751, 0x449b1459 +752, 0x514f654f +753, 0x9a6731e2 +754, 0x8632da43 +755, 0xc81b0422 +756, 0x81fe9005 +757, 0x15b79618 +758, 0xb5fa629f +759, 0x987a474f +760, 0x1c74f54e +761, 0xf9743232 +762, 0xec4b55f +763, 0x87d761e5 +764, 0xd1ad78b7 +765, 0x453d9350 +766, 0xc7a7d85 +767, 0xb2576ff5 +768, 0xcdde49b7 +769, 0x8e1f763e +770, 0x1338583e +771, 0xfd65b9dc +772, 0x4f19c4f4 +773, 0x3a52d73d +774, 0xd3509c4c +775, 0xda24fe31 +776, 0xe2de56ba +777, 0x2db5e540 +778, 0x23172734 +779, 0x4db572f +780, 0xeb941718 +781, 0x84c2649a +782, 0x3b1e5b6a +783, 0x4c9c61b9 +784, 0x3bccd11 +785, 0xb4d7b78e +786, 0x48580ae5 +787, 0xd273ab68 +788, 0x25c11615 +789, 0x470b53f6 +790, 0x329c2068 +791, 0x1693721b +792, 0xf8c9aacf +793, 0x4c3d5693 +794, 0xd778284e +795, 0xae1cb24f +796, 0x3c11d1b3 +797, 0xddd2b0c0 +798, 0x90269fa7 +799, 0x5666e0a2 +800, 0xf9f195a4 +801, 0x61d78eb2 +802, 0xada5a7c0 +803, 0xaa272fbe +804, 0xba3bae2f +805, 0xd0b70fc2 +806, 0x529f32b +807, 0xda7a3e21 +808, 0x9a776a20 +809, 0xb21f9635 +810, 0xb3acc14e +811, 0xac55f56 +812, 0x29dccf41 +813, 0x32dabdb3 +814, 0xaa032f58 +815, 0xfa406af4 +816, 0xce3c415d +817, 0xb44fb4d9 +818, 0x32248d1c +819, 0x680c6440 +820, 0xae2337b +821, 0x294cb597 +822, 0x5bca48fe +823, 0xaef19f40 +824, 0xad60406 +825, 0x4781f090 +826, 0xfd691ffc +827, 0xb6568268 +828, 0xa56c72cb +829, 0xf8a9e0fc +830, 0x9af4fd02 +831, 0x2cd30932 +832, 0x776cefd7 +833, 0xe31f476e +834, 0x6d94a437 +835, 0xb3cab598 +836, 0xf582d13f +837, 0x3bf8759d +838, 0xc3777dc +839, 0x5e425ea8 +840, 0x1c7ff4ed +841, 0x1c2e97d1 +842, 0xc062d2b4 +843, 0x46dc80e0 +844, 0xbcdb47e6 +845, 0x32282fe0 +846, 0xaba89063 +847, 0x5e94e9bb +848, 0x3e667f78 +849, 0xea6eb21a +850, 0xe56e54e8 +851, 0xa0383510 +852, 0x6768fe2b +853, 0xb53ac3e0 +854, 0x779569a0 +855, 0xeca83c6a +856, 0x24db4d2d +857, 0x4585f696 +858, 0xf84748b2 +859, 0xf6a4dd5b +860, 0x31fb524d +861, 0x67ab39fe +862, 0x5882a899 +863, 0x9a05fcf6 +864, 0x712b5674 +865, 0xe8c6958f +866, 0x4b448bb3 +867, 0x530b9abf +868, 0xb491f491 +869, 0x98352c62 +870, 0x2d0a50e3 +871, 0xeb4384da +872, 0x36246f07 +873, 0xcbc5c1a +874, 0xae24031d +875, 0x44d11ed6 +876, 0xf07f1608 +877, 0xf296aadd +878, 0x3bcfe3be +879, 0x8fa1e7df +880, 0xfd317a6e +881, 0xe4975c44 +882, 0x15205892 +883, 0xa762d4df +884, 0xf1167365 +885, 0x6811cc00 +886, 0x8315f23 +887, 0xe045b4b1 +888, 0xa8496414 +889, 0xbed313ae +890, 0xcdae3ddb +891, 0xa9c22c9 +892, 0x275fab1a +893, 0xedd65fa +894, 0x4c188229 +895, 0x63a83e58 +896, 0x18aa9207 +897, 0xa41f2e78 +898, 0xd9f63653 +899, 0xbe2be73b +900, 0xa3364d39 +901, 0x896d5428 +902, 0xc737539e +903, 0x745a78c6 +904, 0xf0b2b042 +905, 0x510773b4 +906, 0x92ad8e37 +907, 0x27f2f8c4 +908, 0x23704cc8 +909, 0x3d95a77f +910, 0xf08587a4 +911, 0xbd696a25 +912, 0x948924f3 +913, 0x8cddb634 +914, 0xcd2a4910 +915, 0x8e0e300e +916, 0x83815a9b +917, 0x67383510 +918, 0x3c18f0d0 +919, 0xc7a7bccc +920, 0x7cc2d3a2 +921, 0x52eb2eeb +922, 0xe4a257e5 +923, 0xec76160e +924, 0x63f9ad68 +925, 0x36d0bbbf +926, 0x957bc4e4 +927, 0xc9ed90ff +928, 0x4cb6059d +929, 0x2f86eca1 +930, 0x3e3665a3 +931, 0x9b7eb6f4 +932, 0x492e7e18 +933, 0xa098aa51 +934, 0x7eb568b2 +935, 0x3fd639ba +936, 0x7bebcf1 +937, 0x99c844ad +938, 0x43cb5ec7 +939, 0x8dfbbef5 +940, 0x5be413ff +941, 0xd93b976d +942, 0xc1c7a86d +943, 0x1f0e93d0 +944, 0x498204a2 +945, 0xe8fe832a +946, 0x2236bd7 +947, 0x89953769 +948, 0x2acc3491 +949, 0x2c4f22c6 +950, 0xd7996277 +951, 0x3bcdc349 +952, 0xfc286630 +953, 0x5f8909fd +954, 0x242677c0 +955, 0x4cb34104 +956, 0xa6ff8100 +957, 0x39ea47ec +958, 0x9bd54140 +959, 0x7502ffe8 +960, 0x7ebef8ae +961, 0x1ed8abe4 +962, 0xfaba8450 +963, 0xc197b65f +964, 0x19431455 +965, 0xe229c176 +966, 0xeb2967da +967, 0xe0c5dc05 +968, 0xa84e3227 +969, 0x10dd9e0f +970, 0xbdb70b02 +971, 0xce24808a +972, 0x423edab8 +973, 0x194caf71 +974, 0x144f150d +975, 0xf811c2d2 +976, 0xc224ee85 +977, 0x2b217a5b +978, 0xf78a5a79 +979, 0x6554a4b1 +980, 0x769582df +981, 0xf4b2cf93 +982, 0x89648483 +983, 0xb3283a3e +984, 0x82b895db +985, 0x79388ef0 +986, 0x54bc42a6 +987, 0xc4dd39d9 +988, 0x45b33b7d +989, 0x8703b2c1 +990, 0x1cc94806 +991, 0xe0f43e49 +992, 0xcaa7b6bc +993, 0x4f88e9af +994, 0x1477cce5 +995, 0x347dd115 +996, 0x36e335fa +997, 0xb93c9a31 +998, 0xaac3a175 +999, 0x68a19647 diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/data/mt19937-testset-2.csv b/.local/lib/python3.8/site-packages/numpy/random/tests/data/mt19937-testset-2.csv new file mode 100644 index 0000000..cdb8e47 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/data/mt19937-testset-2.csv @@ -0,0 +1,1001 @@ +seed, 0x0 +0, 0x7ab4ea94 +1, 0x9b561119 +2, 0x4957d02e +3, 0x7dd3fdc2 +4, 0x5affe54 +5, 0x5a01741c +6, 0x8b9e8c1f +7, 0xda5bf11a +8, 0x509226 +9, 0x64e2ea17 +10, 0x82c6dab5 +11, 0xe4302515 +12, 0x8198b873 +13, 0xc3ec9a82 +14, 0x829dff28 +15, 0x5278e44f +16, 0x994a7d2c +17, 0xf1c89398 +18, 0xaf2fddec +19, 0x22abc6ee +20, 0x963dbd43 +21, 0xc29edffb +22, 0x41c1ce07 +23, 0x9c90034d +24, 0x1f17a796 +25, 0x3833caa8 +26, 0xb8795528 +27, 0xebc595a2 +28, 0xf8f5b5dd +29, 0xc2881f72 +30, 0x18e5d3f0 +31, 0x9b19ac7a +32, 0xb9992436 +33, 0xc00052b3 +34, 0xb63f4475 +35, 0x962642d9 +36, 0x63506c10 +37, 0x2be6b127 +38, 0x569bdbc6 +39, 0x7f185e01 +40, 0xebb55f53 +41, 0x1c30198c +42, 0x7c8d75c6 +43, 0xd3f2186b +44, 0xaca5b9b1 +45, 0xbc49ff45 +46, 0xc4a802af +47, 0x2cecd86f +48, 0x8e0da529 +49, 0x1f22b00e +50, 0x4559ea80 +51, 0x60f587d8 +52, 0x7c7460e9 +53, 0x67be0a4a +54, 0x987a0183 +55, 0x7bd30f1 +56, 0xab18c4ac +57, 0xffdbfb64 +58, 0x9ea917f9 +59, 0x1239dab7 +60, 0x38efabeb +61, 0x5da91888 +62, 0x8f49ed62 +63, 0x83f60b1e +64, 0x5950a3fc +65, 0xd8911104 +66, 0x19e8859e +67, 0x1a4d89ec +68, 0x968ca180 +69, 0x9e1b6da3 +70, 0x3d99c2c +71, 0x55f76289 +72, 0x8fa28b9e +73, 0x9fe01d33 +74, 0xdade4e38 +75, 0x1ea04290 +76, 0xa7263313 +77, 0xaafc762e +78, 0x460476d6 +79, 0x31226e12 +80, 0x451d3f05 +81, 0xd0d2764b +82, 0xd06e1ab3 +83, 0x1394e3f4 +84, 0x2fc04ea3 +85, 0x5b8401c +86, 0xebd6c929 +87, 0xe881687c +88, 0x94bdd66a +89, 0xabf85983 +90, 0x223ad12d +91, 0x2aaeeaa3 +92, 0x1f704934 +93, 0x2db2efb6 +94, 0xf49b8dfb +95, 0x5bdbbb9d +96, 0xba0cd0db +97, 0x4ec4674e +98, 0xad0129e +99, 0x7a66129b +100, 0x50d12c5e +101, 0x85b1d335 +102, 0x3efda58a +103, 0xecd886fb +104, 0x8ecadd3d +105, 0x60ebac0f +106, 0x5e10fe79 +107, 0xa84f7e5d +108, 0x43931288 +109, 0xfacf448 +110, 0x4ee01997 +111, 0xcdc0a651 +112, 0x33c87037 +113, 0x8b50fc03 +114, 0xf52aad34 +115, 0xda6cd856 +116, 0x7585bea0 +117, 0xe947c762 +118, 0x4ddff5d8 +119, 0xe0e79b3b +120, 0xb804cf09 +121, 0x84765c44 +122, 0x3ff666b4 +123, 0xe31621ad +124, 0x816f2236 +125, 0x228176bc +126, 0xfdc14904 +127, 0x635f5077 +128, 0x6981a817 +129, 0xfd9a0300 +130, 0xd3fa8a24 +131, 0xd67c1a77 +132, 0x903fe97a +133, 0xf7c4a4d5 +134, 0x109f2058 +135, 0x48ab87fe +136, 0xfd6f1928 +137, 0x707e9452 +138, 0xf327db9e +139, 0x7b80d76d +140, 0xfb6ba193 +141, 0x454a1ad0 +142, 0xe20b51e +143, 0xb774d085 +144, 0x6b1ed574 +145, 0xb1e77de4 +146, 0xe2a83b37 +147, 0x33d3176f +148, 0x2f0ca0fc +149, 0x17f51e2 +150, 0x7c1fbf55 +151, 0xf09e9cd0 +152, 0xe3d9bacd +153, 0x4244db0a +154, 0x876c09fc +155, 0x9db4fc2f +156, 0xd3771d60 +157, 0x25fc6a75 +158, 0xb309915c +159, 0xc50ee027 +160, 0xaa5b7b38 +161, 0x4c650ded +162, 0x1acb2879 +163, 0x50db5887 +164, 0x90054847 +165, 0xfef23e5b +166, 0x2dd7b7d5 +167, 0x990b8c2e +168, 0x6001a601 +169, 0xb5d314c4 +170, 0xfbfb7bf9 +171, 0x1aba997d +172, 0x814e7304 +173, 0x989d956a +174, 0x86d5a29c +175, 0x70a9fa08 +176, 0xc4ccba87 +177, 0x7e9cb366 +178, 0xee18eb0a +179, 0x44f5be58 +180, 0x91d4af2d +181, 0x5ab6e593 +182, 0x9fd6bb4d +183, 0x85894ce +184, 0x728a2401 +185, 0xf006f6d4 +186, 0xd782741e +187, 0x842cd5bd +188, 0xfb5883aa +189, 0x7e5a471 +190, 0x83ff6965 +191, 0xc9675c6b +192, 0xb6ced3c7 +193, 0x3de6425b +194, 0x25e14db4 +195, 0x69ca3dec +196, 0x81342d13 +197, 0xd7cd8417 +198, 0x88d15e69 +199, 0xefba17c9 +200, 0x43d595e6 +201, 0x89d4cf25 +202, 0x7cae9b9b +203, 0x2242c621 +204, 0x27fc3598 +205, 0x467b1d84 +206, 0xe84d4622 +207, 0xa26bf980 +208, 0x80411010 +209, 0xe2c2bfea +210, 0xbc6ca25a +211, 0x3ddb592a +212, 0xdd46eb9e +213, 0xdfe8f657 +214, 0x2cedc974 +215, 0xf0dc546b +216, 0xd46be68f +217, 0x26d8a5aa +218, 0x76e96ba3 +219, 0x7d5b5353 +220, 0xf532237c +221, 0x6478b79 +222, 0x9b81a5e5 +223, 0x5fc68e5c +224, 0x68436e70 +225, 0x2a0043f9 +226, 0x108d523c +227, 0x7a4c32a3 +228, 0x9c84c742 +229, 0x6f813dae +230, 0xfcc5bbcc +231, 0x215b6f3a +232, 0x84cb321d +233, 0x7913a248 +234, 0xb1e6b585 +235, 0x49376b31 +236, 0x1dc896b0 +237, 0x347051ad +238, 0x5524c042 +239, 0xda0eef9d +240, 0xf2e73342 +241, 0xbeee2f9d +242, 0x7c702874 +243, 0x9eb3bd34 +244, 0x97b09700 +245, 0xcdbab1d4 +246, 0x4a2f6ed1 +247, 0x2047bda5 +248, 0x3ecc7005 +249, 0x8d0d5e67 +250, 0x40876fb5 +251, 0xb5fd2187 +252, 0xe915d8af +253, 0x9a2351c7 +254, 0xccc658ae +255, 0xebb1eddc +256, 0xc4a83671 +257, 0xffb2548f +258, 0xe4fe387a +259, 0x477aaab4 +260, 0x8475a4e4 +261, 0xf8823e46 +262, 0xe4130f71 +263, 0xbdb54482 +264, 0x98fe0462 +265, 0xf36b27b8 +266, 0xed7733da +267, 0x5f428afc +268, 0x43a3a21a +269, 0xf8370b55 +270, 0xfade1de1 +271, 0xd9a038ea +272, 0x3c69af23 +273, 0x24df7dd0 +274, 0xf66d9353 +275, 0x71d811be +276, 0xcc4d024b +277, 0xb8c30bf0 +278, 0x4198509d +279, 0x8b37ba36 +280, 0xa41ae29a +281, 0x8cf7799e +282, 0x5cd0136a +283, 0xa11324ef +284, 0x2f8b6d4b +285, 0x3657cf17 +286, 0x35b6873f +287, 0xee6e5bd7 +288, 0xbeeaa98 +289, 0x9ad3c581 +290, 0xe2376c3f +291, 0x738027cc +292, 0x536ac839 +293, 0xf066227 +294, 0x6c9cb0f9 +295, 0x84082ae6 +296, 0xab38ae9d +297, 0x493eade9 +298, 0xcb630b3a +299, 0x64d44250 +300, 0xe5efb557 +301, 0xea2424d9 +302, 0x11a690ba +303, 0x30a48ae4 +304, 0x58987e53 +305, 0x94ec6076 +306, 0x5d3308fa +307, 0xf1635ebb +308, 0x56a5ab90 +309, 0x2b2f2ee4 +310, 0x6f9e6483 +311, 0x8b93e327 +312, 0xa7ce140b +313, 0x4c8aa42 +314, 0x7657bb3f +315, 0xf250fd75 +316, 0x1edfcb0f +317, 0xdb42ace3 +318, 0xf8147e16 +319, 0xd1992bd +320, 0x64bb14d1 +321, 0x423e724d +322, 0x7b172f7c +323, 0x17171696 +324, 0x4acaf83b +325, 0x7a83527e +326, 0xfc980c60 +327, 0xc8b56bb +328, 0x2453f77f +329, 0x85ad1bf9 +330, 0x62a85dfe +331, 0x48238c4d +332, 0xbb3ec1eb +333, 0x4c1c039c +334, 0x1f37f571 +335, 0x98aecb63 +336, 0xc3b3ddd6 +337, 0xd22dad4 +338, 0xe49671a3 +339, 0xe3baf945 +340, 0xb9e21680 +341, 0xda562856 +342, 0xe8b88ce4 +343, 0x86f88de2 +344, 0x986faf76 +345, 0x6f0025c3 +346, 0x3fe21234 +347, 0xd8d3f729 +348, 0xc2d11c6f +349, 0xd4f9e8f +350, 0xf61a0aa +351, 0xc48bb313 +352, 0xe944e940 +353, 0xf1801b2e +354, 0x253590be +355, 0x981f069d +356, 0x891454d8 +357, 0xa4f824ad +358, 0x6dd2cc48 +359, 0x3018827e +360, 0x3fb329e6 +361, 0x65276517 +362, 0x8d2c0dd2 +363, 0xc965b48e +364, 0x85d14d90 +365, 0x5a51623c +366, 0xa9573d6a +367, 0x82d00edf +368, 0x5ed7ce07 +369, 0x1d946abc +370, 0x24fa567b +371, 0x83ef5ecc +372, 0x9001724a +373, 0xc4fe48f3 +374, 0x1e07c25c +375, 0xf4d5e65e +376, 0xb734f6e9 +377, 0x327a2df8 +378, 0x766d59b7 +379, 0x625e6b61 +380, 0xe82f32d7 +381, 0x1566c638 +382, 0x2e815871 +383, 0x606514aa +384, 0x36b7386e +385, 0xcaa8ce08 +386, 0xb453fe9c +387, 0x48574e23 +388, 0x71f0da06 +389, 0xa8a79463 +390, 0x6b590210 +391, 0x86e989db +392, 0x42899f4f +393, 0x7a654ef9 +394, 0x4c4fe932 +395, 0x77b2fd10 +396, 0xb6b4565c +397, 0xa2e537a3 +398, 0xef5a3dca +399, 0x41235ea8 +400, 0x95c90541 +401, 0x50ad32c4 +402, 0xc1b8e0a4 +403, 0x498e9aab +404, 0xffc965f1 +405, 0x72633485 +406, 0x3a731aef +407, 0x7cfddd0b +408, 0xb04d4129 +409, 0x184fc28e +410, 0x424369b0 +411, 0xf9ae13a1 +412, 0xaf357c8d +413, 0x7a19228e +414, 0xb46de2a8 +415, 0xeff2ac76 +416, 0xa6c9357b +417, 0x614f19c1 +418, 0x8ee1a53f +419, 0xbe1257b1 +420, 0xf72651fe +421, 0xd347c298 +422, 0x96dd2f23 +423, 0x5bb1d63e +424, 0x32e10887 +425, 0x36a144da +426, 0x9d70e791 +427, 0x5e535a25 +428, 0x214253da +429, 0x2e43dd40 +430, 0xfc0413f4 +431, 0x1f5ea409 +432, 0x1754c126 +433, 0xcdbeebbe +434, 0x1fb44a14 +435, 0xaec7926 +436, 0xb9d9a1e +437, 0x9e4a6577 +438, 0x8b1f04c5 +439, 0x19854e8a +440, 0x531080cd +441, 0xc0cbd73 +442, 0x20399d77 +443, 0x7d8e9ed5 +444, 0x66177598 +445, 0x4d18a5c2 +446, 0xe08ebf58 +447, 0xb1f9c87b +448, 0x66bedb10 +449, 0x26670d21 +450, 0x7a7892da +451, 0x69b69d86 +452, 0xd04f1d1c +453, 0xaf469625 +454, 0x7946b813 +455, 0x1ee596bd +456, 0x7f365d85 +457, 0x795b662b +458, 0x194ad02d +459, 0x5a9649b5 +460, 0x6085e278 +461, 0x2cf54550 +462, 0x9c77ea0b +463, 0x3c6ff8b +464, 0x2141cd34 +465, 0xb90bc671 +466, 0x35037c4b +467, 0xd04c0d76 +468, 0xc75bff8 +469, 0x8f52003b +470, 0xfad3d031 +471, 0x667024bc +472, 0xcb04ea36 +473, 0x3e03d587 +474, 0x2644d3a0 +475, 0xa8fe99ba +476, 0x2b9a55fc +477, 0x45c4d44a +478, 0xd059881 +479, 0xe07fcd20 +480, 0x4e22046c +481, 0x7c2cbf81 +482, 0xbf7f23de +483, 0x69d924c3 +484, 0xe53cd01 +485, 0x3879017c +486, 0xa590e558 +487, 0x263bc076 +488, 0x245465b1 +489, 0x449212c6 +490, 0x249dcb29 +491, 0x703d42d7 +492, 0x140eb9ec +493, 0xc86c5741 +494, 0x7992aa5b +495, 0xb8b76a91 +496, 0x771dac3d +497, 0x4ecd81e3 +498, 0xe5ac30b3 +499, 0xf4d7a5a6 +500, 0xac24b97 +501, 0x63494d78 +502, 0x627ffa89 +503, 0xfa4f330 +504, 0x8098a1aa +505, 0xcc0c61dc +506, 0x34749fa0 +507, 0x7f217822 +508, 0x418d6f15 +509, 0xa4b6e51e +510, 0x1036de68 +511, 0x1436986e +512, 0x44df961d +513, 0x368e4651 +514, 0x6a9e5d8c +515, 0x27d1597e +516, 0xa1926c62 +517, 0x8d1f2b55 +518, 0x5797eb42 +519, 0xa90f9e81 +520, 0x57547b10 +521, 0xdbbcca8e +522, 0x9edd2d86 +523, 0xbb0a7527 +524, 0x7662380c +525, 0xe7c98590 +526, 0x950fbf3f +527, 0xdc2b76b3 +528, 0x8a945102 +529, 0x3f0a1a85 +530, 0xeb215834 +531, 0xc59f2802 +532, 0xe2a4610 +533, 0x8b5a8665 +534, 0x8b2d9933 +535, 0x40a4f0bc +536, 0xaab5bc67 +537, 0x1442a69e +538, 0xdf531193 +539, 0x698d3db4 +540, 0x2d40324e +541, 0x1a25feb2 +542, 0xe8cc898f +543, 0xf12e98f5 +544, 0xc03ad34c +545, 0xf62fceff +546, 0xdd827e1e +547, 0x7d8ccb3b +548, 0xab2d6bc1 +549, 0xc323a124 +550, 0x8184a19a +551, 0xc3c4e934 +552, 0x5487424d +553, 0xd6a81a44 +554, 0x90a8689d +555, 0xe69c4c67 +556, 0xbdae02dd +557, 0x72a18a79 +558, 0x2a88e907 +559, 0x31cf4b5d +560, 0xb157772f +561, 0x206ba601 +562, 0x18529232 +563, 0x7dac90d8 +564, 0x3a5f8a09 +565, 0x9f4b64a3 +566, 0xae373af9 +567, 0x1d79447c +568, 0x2a23684b +569, 0x41fb7ba4 +570, 0x55e4bb9e +571, 0xd7619d3e +572, 0xc04e4dd8 +573, 0x8418d516 +574, 0x2b2ca585 +575, 0xfa8eedf +576, 0x5bafd977 +577, 0x31974fb0 +578, 0x9eb6697b +579, 0xc8be22f5 +580, 0x173b126a +581, 0x8809becf +582, 0x3e41efe1 +583, 0x3d6cbbb8 +584, 0x278c81d8 +585, 0xa6f08434 +586, 0xa0e6601d +587, 0x2fccd88d +588, 0x3cbc8beb +589, 0x5f65d864 +590, 0xa1ff8ddf +591, 0x609dcb7c +592, 0x4a4e1663 +593, 0xeae5531 +594, 0x962a7c85 +595, 0x1e110607 +596, 0x8c5db5d0 +597, 0xc7f2337e +598, 0xc94fcc9c +599, 0xe7f62629 +600, 0x6c9aa9f8 +601, 0x2e27fe0e +602, 0x4d0dae12 +603, 0x9eecf588 +604, 0x977ba3f2 +605, 0xed0a51af +606, 0x3f3ec633 +607, 0xc174b2ec +608, 0x590be8a9 +609, 0x4f630d18 +610, 0xf579e989 +611, 0xe2a55584 +612, 0xee11edcd +613, 0x150a4833 +614, 0xc0a0535c +615, 0xb5e00993 +616, 0xb6435700 +617, 0xa98dbff +618, 0x315716af +619, 0x94395776 +620, 0x6cbd48d9 +621, 0xab17f8fc +622, 0xa794ffb7 +623, 0x6b55e231 +624, 0x89ff5783 +625, 0x431dcb26 +626, 0x270f9bf8 +627, 0x2af1b8d0 +628, 0x881745ed +629, 0x17e1be4e +630, 0x132a0ec4 +631, 0x5712df17 +632, 0x2dfb3334 +633, 0xf5a35519 +634, 0xcafbdac6 +635, 0x73b6189d +636, 0x10107cac +637, 0x18c1045e +638, 0xbc19bbad +639, 0x8b4f05ac +640, 0x5830d038 +641, 0x468cd98a +642, 0x5b83a201 +643, 0xf0ccdd9c +644, 0xcb20c4bd +645, 0x1ff186c9 +646, 0xcdddb47f +647, 0x5c65ce6 +648, 0xb748c580 +649, 0x23b6f262 +650, 0xe2ba8e5c +651, 0x9a164a03 +652, 0x62d3322e +653, 0x918d8b43 +654, 0x45c8b49d +655, 0xce172c6e +656, 0x23febc6 +657, 0x84fdc5b7 +658, 0xe7d1fd82 +659, 0xf0ddf3a6 +660, 0x87050436 +661, 0x13d46375 +662, 0x5b191c78 +663, 0x2cbd99c0 +664, 0x7686c7f +665, 0xcff56c84 +666, 0x7f9b4486 +667, 0xefc997fe +668, 0x984d4588 +669, 0xfa44f36a +670, 0x7a5276c1 +671, 0xcfde6176 +672, 0xcacf7b1d +673, 0xcffae9a7 +674, 0xe98848d5 +675, 0xd4346001 +676, 0xa2196cac +677, 0x217f07dc +678, 0x42d5bef +679, 0x6f2e8838 +680, 0x4677a24 +681, 0x4ad9cd54 +682, 0x43df42af +683, 0x2dde417 +684, 0xaef5acb1 +685, 0xf377f4b3 +686, 0x7d870d40 +687, 0xe53df1c2 +688, 0xaeb5be50 +689, 0x7c92eac0 +690, 0x4f00838c +691, 0x91e05e84 +692, 0x23856c80 +693, 0xc4266fa6 +694, 0x912fddb +695, 0x34d42d22 +696, 0x6c02ffa +697, 0xe47d093 +698, 0x183c55b3 +699, 0xc161d142 +700, 0x3d43ff5f +701, 0xc944a36 +702, 0x27bb9fc6 +703, 0x75c91080 +704, 0x2460d0dc +705, 0xd2174558 +706, 0x68062dbf +707, 0x778e5c6e +708, 0xa4dc9a +709, 0x7a191e69 +710, 0xc084b2ba +711, 0xbb391d2 +712, 0x88849be +713, 0x69c02714 +714, 0x69d4a389 +715, 0x8f51854d +716, 0xaf10bb82 +717, 0x4d5d1c77 +718, 0x53b53109 +719, 0xa0a92aa0 +720, 0x83ecb757 +721, 0x5325752a +722, 0x114e466e +723, 0x4b3f2780 +724, 0xa7a6a39c +725, 0x5e723357 +726, 0xa6b8be9b +727, 0x157c32ff +728, 0x8b898012 +729, 0xd7ff2b1e +730, 0x69cd8444 +731, 0x6ad8030c +732, 0xa08a49ec +733, 0xfbc055d3 +734, 0xedf17e46 +735, 0xc9526200 +736, 0x3849b88a +737, 0x2746860b +738, 0xae13d0c1 +739, 0x4f15154f +740, 0xd65c3975 +741, 0x6a377278 +742, 0x54d501f7 +743, 0x81a054ea +744, 0x143592ba +745, 0x97714ad6 +746, 0x4f9926d9 +747, 0x4f7ac56d +748, 0xe87ca939 +749, 0x58b76f6f +750, 0x60901ad8 +751, 0x3e401bb6 +752, 0xa058468e +753, 0xc0bb14f6 +754, 0x2cb8f02a +755, 0x7c2cf756 +756, 0x34c31de5 +757, 0x9b243e83 +758, 0xa5c85ab4 +759, 0x2741e3b3 +760, 0x1249000e +761, 0x3fc4e72b +762, 0xa3e038a2 +763, 0x952dd92c +764, 0x2b821966 +765, 0xfa81b365 +766, 0x530919b9 +767, 0x4486d66f +768, 0xccf4f3c1 +769, 0xa8bddd1d +770, 0xcc295eb9 +771, 0xfccbe42f +772, 0x38bacd8d +773, 0x2261854f +774, 0x56068c62 +775, 0x9bdaeb8 +776, 0x555fa5b6 +777, 0x20fe615e +778, 0x49fb23d3 +779, 0xd093bad6 +780, 0x54919e86 +781, 0x7373eb24 +782, 0xfbaa7a98 +783, 0x5f62fb39 +784, 0xe03bc9ec +785, 0xa5074d41 +786, 0xa1cefb1 +787, 0x13912d74 +788, 0xf6421b8 +789, 0xfcb48812 +790, 0x8f1db50b +791, 0xc1654b87 +792, 0x948b43c2 +793, 0xf503ef77 +794, 0x117d891d +795, 0x5493ffa +796, 0x171313b1 +797, 0xa4b62e1e +798, 0x77454ea6 +799, 0xbea0aff0 +800, 0x13c36389 +801, 0xe3b60bac +802, 0xa176bed3 +803, 0x2863d428 +804, 0xe2314f46 +805, 0xa85cd3d4 +806, 0x7866e57 +807, 0x8f03f5bc +808, 0x239ae +809, 0x46f279fb +810, 0xcca00559 +811, 0xaa07a104 +812, 0x89123d08 +813, 0x2e6856ba +814, 0x43a9780d +815, 0x676cff25 +816, 0x6744b87d +817, 0xee260d4f +818, 0xb98d8b77 +819, 0x9b0ca455 +820, 0x659f6fe +821, 0x28d20d1c +822, 0x601f2657 +823, 0xdec3073e +824, 0x61263863 +825, 0x1a13435a +826, 0x27497d1e +827, 0x17a8458e +828, 0xdddc407d +829, 0x4bb2e8ac +830, 0x16b2aedb +831, 0x77ccd696 +832, 0x9d108fcd +833, 0x25ad233e +834, 0xaa9bc370 +835, 0xa873ab50 +836, 0xaf19c9d9 +837, 0x696e1e6b +838, 0x1fdc4bf4 +839, 0x4c2ebc81 +840, 0xde4929ed +841, 0xf4d0c10c +842, 0xb6595b76 +843, 0x75cbb1b3 +844, 0xbcb6de49 +845, 0xe23157fd +846, 0x5e596078 +847, 0xa69b0d29 +848, 0x2118a41 +849, 0x7088c16 +850, 0xc75e1e1 +851, 0x6a4af2d6 +852, 0xf19c6521 +853, 0xaff7b3b1 +854, 0x615295c7 +855, 0xbda3a8d7 +856, 0x5b5ca72e +857, 0xdad9d80f +858, 0xfa81c084 +859, 0xf4703fa +860, 0x3ca54540 +861, 0xa8961d51 +862, 0x53d1ecc2 +863, 0x808d83b6 +864, 0x68e8c48e +865, 0x89be2039 +866, 0x9088ea11 +867, 0xb8665d12 +868, 0x91272f9 +869, 0x53dddff2 +870, 0xb7a54ab +871, 0xd2b645ca +872, 0x99fb8590 +873, 0x5315c8e +874, 0x2a913806 +875, 0x7f15eb2b +876, 0xa7f1cc5d +877, 0xbb2ee836 +878, 0xd9fafd60 +879, 0x17448d6f +880, 0x999ec436 +881, 0x482ec606 +882, 0x9b403c0e +883, 0x569eb51b +884, 0xb275d1a6 +885, 0xadd29c31 +886, 0xb7ebdb15 +887, 0xdfef3662 +888, 0x51aba6db +889, 0x6d41946d +890, 0x77bf8896 +891, 0xcafa6fab +892, 0x976ab40f +893, 0x49a6d86b +894, 0x56639e55 +895, 0x9945b996 +896, 0x81459b50 +897, 0xbce97542 +898, 0xe397c9c9 +899, 0x247a5955 +900, 0xb72b1573 +901, 0x86306f86 +902, 0x34f65dc5 +903, 0x909360c0 +904, 0xf3f696ef +905, 0xcb9faae5 +906, 0x93daecd9 +907, 0xde1af7af +908, 0x43a1f2d +909, 0x6d75cde5 +910, 0x9e412b6 +911, 0x5673fed +912, 0x16bb511a +913, 0x35ef4cca +914, 0x4e615aca +915, 0x5cdaf47a +916, 0x26676047 +917, 0x8c199325 +918, 0x2adf0cb9 +919, 0x84f2e6fd +920, 0x5e627f64 +921, 0xb7cee354 +922, 0x542ab4a6 +923, 0xe59cd83b +924, 0x89cc3f10 +925, 0x92b0f5f +926, 0xc1328370 +927, 0x8208d9f7 +928, 0x68eb00cf +929, 0xfadd4ac4 +930, 0x2517784f +931, 0x4042b99 +932, 0x75ce0230 +933, 0x97c5a1b4 +934, 0x1a97f709 +935, 0x4c62781e +936, 0xf530a83 +937, 0x75776413 +938, 0x321c7240 +939, 0x6afe4e36 +940, 0xad00a2b4 +941, 0xbc05477d +942, 0xb0911e80 +943, 0x9935b87d +944, 0xd535eec5 +945, 0x149af45e +946, 0x786934b0 +947, 0xbc13cdac +948, 0x208bfa2e +949, 0xcf4b39cc +950, 0x6ac6c172 +951, 0xbfa9a37 +952, 0x42d28db6 +953, 0x2bf1ea63 +954, 0xbed6e677 +955, 0x50325d27 +956, 0xa79d3b8b +957, 0x52448bb1 +958, 0xefaad1bd +959, 0x833a2e54 +960, 0xd9de549a +961, 0x9f59672f +962, 0x9d5f5f16 +963, 0x1c914489 +964, 0xc08fa058 +965, 0xb188698b +966, 0xdc4672b5 +967, 0x594f720e +968, 0x56ed428f +969, 0x9b0898af +970, 0x8a64d3d5 +971, 0x773308d6 +972, 0x84d62098 +973, 0x46da7cf9 +974, 0x1114eae7 +975, 0xf9f2a092 +976, 0x5363a28 +977, 0xf2db7b3a +978, 0x102c71a9 +979, 0xe8e76aaf +980, 0x77a97b3b +981, 0x77b090d +982, 0x1099620e +983, 0xa6daaae6 +984, 0x86ff4713 +985, 0xc0ef85b8 +986, 0xf621d409 +987, 0xfd1561e2 +988, 0x4bcc687d +989, 0x596f760 +990, 0x7c8819f9 +991, 0x8cb865b8 +992, 0xadea115a +993, 0x56609348 +994, 0xb321ac14 +995, 0x1bac7db2 +996, 0x5fe6ee2 +997, 0xe9bfe072 +998, 0x15549e74 +999, 0xad8c191b diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/data/pcg64-testset-1.csv b/.local/lib/python3.8/site-packages/numpy/random/tests/data/pcg64-testset-1.csv new file mode 100644 index 0000000..0c8271f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/data/pcg64-testset-1.csv @@ -0,0 +1,1001 @@ +seed, 0xdeadbeaf +0, 0x60d24054e17a0698 +1, 0xd5e79d89856e4f12 +2, 0xd254972fe64bd782 +3, 0xf1e3072a53c72571 +4, 0xd7c1d7393d4115c9 +5, 0x77b75928b763e1e2 +6, 0xee6dee05190f7909 +7, 0x15f7b1c51d7fa319 +8, 0x27e44105f26ac2d7 +9, 0xcc0d88b29e5b415 +10, 0xe07b1a90c685e361 +11, 0xd2e430240de95e38 +12, 0x3260bca9a24ca9da +13, 0x9b3cf2e92385adb7 +14, 0x30b5514548271976 +15, 0xa3a1fa16c124faf9 +16, 0xf53e17e918e45bb6 +17, 0x26f19faaeb833bfc +18, 0x95e1d605730cce1b +19, 0xa7b520c5c093c1aa +20, 0x4b68c010c9b106a3 +21, 0x25e19fe91df703f0 +22, 0x898364bb0bf593cb +23, 0x5bd6ab7dbaa125db +24, 0xd1fe47f25152045c +25, 0x3bb11919addf2409 +26, 0x26a8cb7b3f54af8 +27, 0xe6a27ee11200aa24 +28, 0x7cb585ab01e22000 +29, 0x78e60028676d2ef3 +30, 0x5c32535e5a899528 +31, 0x83e8b6f8c4a46fb3 +32, 0xe56ef7668a161246 +33, 0x36dcbc15aeb73055 +34, 0x5ea247f0bd188acb +35, 0x438b547b84601a80 +36, 0x8acda2a1273e9e3d +37, 0x2b05e30a4b40c24c +38, 0xfd87236bd13af032 +39, 0x471df211d8d985ef +40, 0x18e8a5609a793292 +41, 0x46f0951fab6dc4e3 +42, 0x6c199c4e700f6795 +43, 0xf04aa16bfb7d22cb +44, 0xd763d269fbaffc89 +45, 0x9991930cefbe5c2b +46, 0xb2a11b953f824c96 +47, 0x63fd9f52172c44b0 +48, 0x183bdad907b1d848 +49, 0xe17953cddb931c52 +50, 0x515cf16726ec205a +51, 0x88c327605150711a +52, 0xc7090dd79cbc8dc3 +53, 0xcb487cedeb00a350 +54, 0xc8abf254d87b657 +55, 0xd43cc4cbfb493d1a +56, 0x8705452e5d9ed1e +57, 0xcecd11446769cf43 +58, 0xde72156c8d65bc69 +59, 0x796a8f0f47d52ee8 +60, 0xb4c0da443917d6c3 +61, 0xe07ad7568a8e3dc3 +62, 0xc24a8da39ce6dc21 +63, 0x92b21ea80a8556eb +64, 0x572f21e531edf3af +65, 0x9b917ed56bbed198 +66, 0xe65fd8ddc5ab3d7d +67, 0xf55a80a8ec84fa18 +68, 0x18fc22e1a5227b61 +69, 0x72305dc7eeaa79d3 +70, 0x47ce58a36e7592cf +71, 0x14c6374340c0f7cc +72, 0x6f98273d4eb5a2c +73, 0x59a8702c46fe8f8a +74, 0xb67cbd8113cfe57f +75, 0xaa03c5db5f5b7690 +76, 0x3fb0f77ea4568013 +77, 0x756530990398b26e +78, 0x4c1952b2a3a6a343 +79, 0x1da15c5383074582 +80, 0xb405b21c81c274f7 +81, 0xbe664677a16788b +82, 0x9d2e37550bcee656 +83, 0x8b4589f0d9defe02 +84, 0x2935f018ee06a59 +85, 0x3834bf88be97ed11 +86, 0xa610d049cea79b6d +87, 0xd49ffc0d09a59ea9 +88, 0x4073365b76567adf +89, 0x499eefb9bb7513e2 +90, 0x74a743ee6b0138a9 +91, 0x3bf0880f2d947594 +92, 0x555d1c0498600a99 +93, 0x923b32a88ef2ffa4 +94, 0x7325411065fbedea +95, 0x9f4129ff8b79d300 +96, 0xab2b0a9b8a3785dc +97, 0x11734bdfba3a1713 +98, 0xc8333398841ba585 +99, 0xee2409cc234e6742 +100, 0xf6638e700872ecd2 +101, 0x10875300c13cd284 +102, 0x27a9bbed7c15b2d3 +103, 0x3c87f8fef31ce9bd +104, 0x92be263cd0914a95 +105, 0xa7b0f11bc742307e +106, 0x4a56f788cc1c1a3c +107, 0x4a130fa32257a48b +108, 0x5d4d9eda16e90286 +109, 0x7cc2af564844bedc +110, 0x2532867bfe7cda1a +111, 0xb1c504676611fd17 +112, 0xce8e86cfb4189aee +113, 0x99685898980d1970 +114, 0x8c3b67db23bcf1e +115, 0x73e14c93905b135f +116, 0xf0271b64ac2bd4d3 +117, 0xf4beba82f3ec1b2d +118, 0x1cdbf3ee9f210af +119, 0x2e938557c09c3ea6 +120, 0x2d314ccfa6ffd81d +121, 0x31ad47079950ade4 +122, 0x342b27547b900872 +123, 0x171b0e20b9ef1a76 +124, 0xdf10ce6318b03654 +125, 0x1d625df4aa718897 +126, 0x8712715a9f6e02ec +127, 0xb4a072da725bca3b +128, 0x19d346cb7734bd42 +129, 0xfd4281d311cb2958 +130, 0x58274c9519fc8789 +131, 0x4cacf29d885fd544 +132, 0x784b14d1c2523b80 +133, 0x2d25242131bb2373 +134, 0xcd2a5e43a7d9abf9 +135, 0x15eda3806e650ecb +136, 0xdaac5e277d764d96 +137, 0xdc5a5dd59aaa94e0 +138, 0x40d00237a46d5999 +139, 0x6205dd35a692743f +140, 0xbbd8236740361f09 +141, 0x1625c9f4e7288bf9 +142, 0xb74f12df1479e3ce +143, 0xb2d72a51b43d7131 +144, 0xf006a324b3707c83 +145, 0x28e8ab4abe7655b8 +146, 0xfb480093ad7ab55 +147, 0x3f8abd0d6ff8d272 +148, 0xc81a94177ac26bb7 +149, 0x3cdc178307751b14 +150, 0x9de84cc2b10ba025 +151, 0x3f8ab5aefcd046e2 +152, 0x43bdb894e1ee83b2 +153, 0xe288a40f3f06ac9d +154, 0xdab62a7d04b4f30f +155, 0x49f4e20295e1a805 +156, 0x3643764805e0edef +157, 0x9449954618b6b +158, 0x6c87e0d4508e0ce0 +159, 0x3a334be688a9dd7b +160, 0xb35c39228776e499 +161, 0xc4118bfff938490e +162, 0x88cbde3dcbb034b2 +163, 0xf91b287793c417c3 +164, 0x42b15f731a59f5b3 +165, 0xffa27104bbe4814d +166, 0x1b6789d138beccde +167, 0x542c2c1440d0ceb9 +168, 0x367294504d18fa0d +169, 0xf918b60e804a1b58 +170, 0xd390964e33a9d0e3 +171, 0x23bb1be7c4030fe8 +172, 0x9731054d039a8afb +173, 0x1a6205026b9d139b +174, 0x2fa13b318254a07e +175, 0x69571de7d8520626 +176, 0x641a13d7c03332b7 +177, 0x76a6237818f7a441 +178, 0x4e77860d0c660d81 +179, 0x4441448a1c1cbdb2 +180, 0xccd7783a042046e5 +181, 0xf620d8e0805e3200 +182, 0x7de02971367fdd0c +183, 0x539c263c5914cab1 +184, 0x9c3b9ba1a87bbf08 +185, 0x6d95baa34cda215f +186, 0x2db3f83ace0bac5f +187, 0x7f5af1da2dc670a4 +188, 0xfcc098d16c891bfb +189, 0x81a33df1d7a5ab12 +190, 0x767b0f863c8e9882 +191, 0x7a92983830de483d +192, 0xfa7598c37a79ac25 +193, 0xb89b3ca42ce03053 +194, 0x457a542b8efed4f7 +195, 0x571b7737fd0eeda7 +196, 0xa0f59e524485c0a +197, 0x82dca766b7901efd +198, 0xa68243caf6a3bd5d +199, 0x1bac981c6c740e5e +200, 0xbcd51bedf9103e44 +201, 0x4e197efd3ae5a7bf +202, 0x523568efd782268b +203, 0x5ec4ef1191fef09 +204, 0xed751ed5e31c9ab +205, 0x44eac24de03e1b29 +206, 0x9237d57c011d3fb3 +207, 0xa8c6da0f7692f235 +208, 0x9f9eb6bc15d6cac7 +209, 0x34bb8e0c93427aad +210, 0x115febd738eaac4a +211, 0xa439991ed139d27a +212, 0x45c7c2633d8710a2 +213, 0x48b7475f3405a3ce +214, 0x80158497c77bd00b +215, 0x935c316a5b1657cb +216, 0x59c5d54440e9695e +217, 0x337c78c5b3d0ede2 +218, 0x8c46bb956b93790d +219, 0xbf1dd03e471d71c5 +220, 0x2d375e90a4bef583 +221, 0xd0365428331b3790 +222, 0xfcd3969ac827ecd4 +223, 0x392fb6c580498410 +224, 0x6d6db4ceab5ea6c0 +225, 0x9bf84f1972e24786 +226, 0x798dfd820959dcc5 +227, 0x2e425095e65e8bfb +228, 0x8c1aa11536b1c9c3 +229, 0xd28e2ef9b12f6f74 +230, 0x86583bc98c8f78d2 +231, 0x489877530e3f93e7 +232, 0xb1d9430631104a15 +233, 0x1814f6098e6263bd +234, 0x8e2658a4e0d4cd53 +235, 0x5afe20e2531cdb2a +236, 0x30d02f7c4755c9bf +237, 0xe1e217cda16ed2d2 +238, 0xccb4913a42e3b791 +239, 0xfff21363ac183226 +240, 0xe788690bbda147a7 +241, 0x76905cf5917bfc6a +242, 0x2a8fa58f7916f52c +243, 0xf903c0cc0357815a +244, 0x15d20f243a4998d2 +245, 0x5b7decee5a86ea44 +246, 0x114f7fc421211185 +247, 0x328eb21715764c50 +248, 0xaffaa3f45c0678fd +249, 0x2579e6ef50378393 +250, 0x7610ab7743c19795 +251, 0xf9923d2bd101b197 +252, 0x57e42e7a62ba7e53 +253, 0x9f1dc217b4f02901 +254, 0x88a9ebd86509b234 +255, 0x867fc926aecc8591 +256, 0xaf22c1bfef04c718 +257, 0x39f701f0313f4288 +258, 0x6171ad397e6faab2 +259, 0x239bb5b9abdec4fc +260, 0xd9a591e25dd01c6e +261, 0x826dc4a75b628e49 +262, 0xf112b152c408f47 +263, 0x6843a06110f86c0 +264, 0x965e56a7185c1332 +265, 0x8d84492edbc71710 +266, 0xeee8ec111cfd1319 +267, 0xf2858e94ad98e458 +268, 0xbc9589fdf5f3a97e +269, 0xaf0ceef3bc375130 +270, 0x48f4aaf13fa75c1e +271, 0x111e9db47bee758f +272, 0xea3171df130164ba +273, 0x2a7bbe30bf827ab6 +274, 0xc516c3fdbf758c35 +275, 0xec55097754b04be5 +276, 0x374a997d52b6d3e6 +277, 0x487df5456085ffbc +278, 0x528883b84df8eafe +279, 0x805f77ab5ba26f86 +280, 0x8eb81477dc04f213 +281, 0x471ea08ec6794d72 +282, 0x69d3667ecc4d2176 +283, 0x98b7b6e295548a66 +284, 0x3877713c173f8f2 +285, 0xa00542570d0e8de3 +286, 0xf534b1bfa4033e50 +287, 0x7e1fedeac8bf6b26 +288, 0x8043f37c89628af4 +289, 0x1dd7039ec295e86d +290, 0xce9c05b763a40cc4 +291, 0x246926481e61028f +292, 0xb7cb0f1babf5893b +293, 0xefe6b777f37fc63e +294, 0xebbcabb4cb35cdcb +295, 0x39fa63cd711eeea9 +296, 0xad5d3ba7aaf30c8d +297, 0x8e9e78fe46021990 +298, 0xc7eaef6e7d5a3c62 +299, 0xefccdd5495d3f386 +300, 0x2179557ee8cfc76a +301, 0x88a77f621f0885ce +302, 0xafda62674543d90c +303, 0xb8e6fbe2e13e56c0 +304, 0x8bfbbe26a14f9b1a +305, 0x1404f59f5851f8c3 +306, 0x1140c53a0489566d +307, 0x3edf2d138b5c3f1d +308, 0x75d6bb275d817dc +309, 0x8e660ae27107664e +310, 0x7a8021038ee303e1 +311, 0x2042ef5eefa9079f +312, 0xe3e7b90bbf6d457a +313, 0xf3f819d2bb9405b +314, 0x522e42155cae0c10 +315, 0xf5bfbb975b40e233 +316, 0x2cf82b614dd95cfa +317, 0x183ef4a96bc40e55 +318, 0x9f6e351c5ba4e752 +319, 0x37c1110683c90846 +320, 0x1d89b7a996d8a977 +321, 0x18a444f77c7cb4d9 +322, 0xd0a8a971b78dc893 +323, 0x860232fb9e6543f1 +324, 0x60b6097f51002555 +325, 0xca1e5214123e3894 +326, 0xe03fe695c95f99bb +327, 0x2c7c6779d5f03622 +328, 0xafeeee42f63055d1 +329, 0x670dde905515936a +330, 0x9a922f42b59fb094 +331, 0xddb5ff49af5a651a +332, 0xe61b04c9e58ebbf8 +333, 0x4e459dcf272e7fc4 +334, 0xd549e92c16adceeb +335, 0x7a17dba1299d4a9c +336, 0x825d756109f2b585 +337, 0xba142e61a9cb203e +338, 0xc2a19f00e9c04a30 +339, 0x2d0f8140d23d0652 +340, 0x8b866d4d4d6caaf4 +341, 0x4f11d90dd91f8217 +342, 0xf6efc37373b9e0d +343, 0x248493d6cd6a4736 +344, 0xd12b6ae74a951a3e +345, 0x56e34722070b70a7 +346, 0x22d3f201cc9fa0eb +347, 0xbfdcc320008291b7 +348, 0x1a7a6922e9204fbd +349, 0x831421e0c4945ae4 +350, 0x66316feddddf0e11 +351, 0xa8c86a1517456554 +352, 0x14a9049ad989e335 +353, 0x837022259f141ecd +354, 0xcb71793a06c261f7 +355, 0x4aeefc07ebe09a79 +356, 0x8982f15aa3b6594b +357, 0x67bccfa7ed9b0d5b +358, 0xb377463b523e9dec +359, 0x53d3d594870fecb7 +360, 0xa5274b1caec5a60a +361, 0xd6316d0cb643db39 +362, 0xabc1a9b536de88ce +363, 0xed2fdb1383d2a077 +364, 0x12319c6feb97221b +365, 0x7e0f6cd40ef47403 +366, 0x86135c84fe26dbf8 +367, 0xc96622d3fbbee19b +368, 0xe3989d8d8511573f +369, 0x42cc365554d1fdc7 +370, 0x4c1a1eb8bbce8b4f +371, 0xfc4e30e7ef2034c1 +372, 0xc490444317a91e76 +373, 0x7ccdf469ff5dc81c +374, 0xf5a0da4110cc09d7 +375, 0x505227baf34c0fb5 +376, 0xbe58737e8a35cc88 +377, 0xd449bee91b3e8c41 +378, 0x3e590e23299d0e6 +379, 0x291a7d9e0a64caf7 +380, 0xdc6fafbdfebd2293 +381, 0x8223f1e259fe8a65 +382, 0x6186fbc9efd9e3df +383, 0xfda39b07e4007ffb +384, 0xfc19aea98574dc02 +385, 0xd0e10d354fcacd8c +386, 0xc9619916544a55a5 +387, 0xd454d50a8c8558cd +388, 0xcd94a246712d91e +389, 0x76a771f5d1231cce +390, 0xdd20cb2b7b370ee5 +391, 0xa6f4f50feca57c49 +392, 0x78c8fb431f17ab9c +393, 0x1b692b79a59b43cc +394, 0x4c45045d287da7e6 +395, 0x522132e18bf43928 +396, 0x25c458983138b41c +397, 0x2a1fb426ef229796 +398, 0x74dc324c74e5dd3d +399, 0x6df75e3eb6eb5374 +400, 0xb63f2f4f9ca25b61 +401, 0xac72286112ee54d6 +402, 0x5a966f3d0a6863c4 +403, 0x8d7046bc64a46fc2 +404, 0xa7b740fd6e3087eb +405, 0xcdbcbe0340cfcdf5 +406, 0xcb632613bf312b65 +407, 0xa91b3f2c2aac238b +408, 0xa06deb3f5ae555a3 +409, 0x29d72e1f8db69 +410, 0x2d004bae09728ea6 +411, 0xc6eee5dce0736cc1 +412, 0xa7493145500ff60f +413, 0xc4d68c4aa18ab93c +414, 0x8210c29e79d48d7f +415, 0xd0999d7889ecbef6 +416, 0x6e3bd61e66e93566 +417, 0xe6cc13d47d7d7b1f +418, 0x3d6f181f42e03979 +419, 0xbed4e14fd867604a +420, 0xbe511c84067bd86d +421, 0x49a876d89e697d38 +422, 0xc04c3dde8f889c98 +423, 0xaf293eeab0f53e3f +424, 0x9f6291dd65732cd6 +425, 0xd7811ac01de78c01 +426, 0xe385cf0261d50ec2 +427, 0x5a64134b3542bbf +428, 0xf9d1302bc6f13a68 +429, 0x5d2aabbea37d8c31 +430, 0xd9842e99a5192970 +431, 0x713eadc4cd30e837 +432, 0xb7b002fc72abb413 +433, 0x276cfeea526af1cf +434, 0x8519fe79b633a0ce +435, 0x2f0e87363705a3e2 +436, 0x9adbac0be3c371e7 +437, 0xf3f44ba899a6173c +438, 0x782d6c29618fde2b +439, 0x7f61062acec408f +440, 0x6e79cd836359258f +441, 0x5c8e9b138df5785a +442, 0xa54359c9f39a9a84 +443, 0xeec3f033135084b0 +444, 0x883ee717787a535c +445, 0x9a2422b513a73b00 +446, 0x2dd4beddcdd64a58 +447, 0x90c8a13202239c7b +448, 0x85b352ab759646d9 +449, 0x139f5cb2e46c53aa +450, 0xe1d3ba6c721c66d1 +451, 0xaa66e0edc4b60a98 +452, 0x3521275c75be29b6 +453, 0x490a5190b3edfa5d +454, 0xd2abcdd2ccb2f14e +455, 0x9d9be8bef4a5857d +456, 0xde19676f13ef7755 +457, 0xdac2fee2e42615f3 +458, 0xf4239801cb02f2ab +459, 0xaa8bf923ed91875c +460, 0x61d18a1940e4c7c0 +461, 0x1eb6aa3d5f077a6d +462, 0xee7374c063bf29d8 +463, 0x2f0a59e34d76268d +464, 0xc92e80e17d1eb3e9 +465, 0xafd05b3ec3d2ca72 +466, 0x28a61ad8d6c497b8 +467, 0xa7094d6834ad7d47 +468, 0x57d80ea9eccbb4f +469, 0xb047e0fee6cdaf16 +470, 0x44f41b5eb48c00bb +471, 0xd6dc8e1eb9c8c9ba +472, 0x47adfd2c638c7849 +473, 0x365d63db7d526c68 +474, 0xc21cda439016135d +475, 0x14d10c3f0f98863c +476, 0xa93e56f74e037602 +477, 0x3b4e9c8915bdc9 +478, 0xb46f5ae155e54aa2 +479, 0x8e470d21ce1943e1 +480, 0x60b96301b5ba2e8d +481, 0x1b473a41d381f9ff +482, 0xabcf5a8e3269e73f +483, 0xd410f6e94fb21fa1 +484, 0x65d1a47eebf87e5e +485, 0x48eaa201c61cb843 +486, 0x212c1abc2499bfc5 +487, 0x4255ad8377d2d8d +488, 0x44caeef472010612 +489, 0xffae764524f572f2 +490, 0x78d374d20c9ee550 +491, 0x6e003206c0511cee +492, 0x7998a159145bfb82 +493, 0x921239650bda1d4d +494, 0xae05025509bcfdc5 +495, 0xc6430c980be407b4 +496, 0x78524f1744b153f1 +497, 0x84089e6f468181fe +498, 0x8d0d21d7dfb6c254 +499, 0x90bad90502a33603 +500, 0x3072a403cbd16315 +501, 0xdfadddf3f1c040c2 +502, 0x22f0b0639d9ff975 +503, 0xb49e48a4cad0765b +504, 0x95a0a04f8239709d +505, 0x56e147a24a4c481f +506, 0xacf16ef61dea4c7e +507, 0x424040afd2700de6 +508, 0xc67e8096a3c717a9 +509, 0x39f164181dd0a399 +510, 0x2449cedc1d62198c +511, 0x7a53df11a1f1a61c +512, 0x5596f1d4a3badae3 +513, 0x38ed4c822072b3d0 +514, 0xf07ef346b3fd730a +515, 0xfd349c35c3ed51fd +516, 0x2f15c9c7890f8f32 +517, 0x3b470df52b173c29 +518, 0xd31bfc8981281af7 +519, 0xbbcc9bdf561215bb +520, 0x5782fffea326574f +521, 0xb0ebdcfcc5e03290 +522, 0x7fd89d93d2b3fbef +523, 0x280ea1865d9ba2 +524, 0xe726959845b2c100 +525, 0xd0361f032cd7dbb1 +526, 0x3c65ec2028b81a22 +527, 0x5221e9b2188920bf +528, 0xeb5ab27c4125ec20 +529, 0x80a32dd48b54f0a4 +530, 0x369b5ced1012bebb +531, 0x582d35d76530bc6f +532, 0x7b50dc9b48e1e37d +533, 0x37fdfe8bbacf8dad +534, 0x7a0cb7e6e93840ea +535, 0xa1132c870be0b2ce +536, 0x9d8ac2c68267cd1a +537, 0x470969b647fa7df4 +538, 0xabcb7d8adf7e2d24 +539, 0xacdebec9bdf9eb1c +540, 0xe30f4cbf7eb6a59 +541, 0x746673836c4df41d +542, 0x75120a6b647bb326 +543, 0x2f4eab556c3f6878 +544, 0xd84651ab05405b7a +545, 0x9e695808b9622284 +546, 0xc93b71e56aa6e1a5 +547, 0x2be7f3be4a7b7050 +548, 0x6497e910b6733241 +549, 0xcf7050dfd08076fc +550, 0x4e3cc156eca183f7 +551, 0xf801a33d9326c265 +552, 0x6aa293c8a47d40e6 +553, 0x28c429755faa6230 +554, 0x82b818651f54e7bb +555, 0xa84d726d7acdbead +556, 0x5cfa535d5774965d +557, 0x4a34b7b1cb48d53 +558, 0x86a7b5bce426de84 +559, 0xfcd2307cecdb7318 +560, 0x16dbaaa71181a038 +561, 0x88e7e8cd261c2547 +562, 0x3c09ba6d1d5ea913 +563, 0x5dd3d643734ee5b6 +564, 0x326d725fe8cbb33 +565, 0x7bcca9ca2da8e784 +566, 0x482dcf6b11d7f9a4 +567, 0x1291b605b4cd3e04 +568, 0x6988181b50e2f4a8 +569, 0x649e3c37131fc292 +570, 0x4eeb67b9e21eba54 +571, 0xc051d39073dec45f +572, 0xc99c52e110270d67 +573, 0xcb813d5d77868add +574, 0x423a5f13573e7ac0 +575, 0x231ac4cc4fe73616 +576, 0x4c22b888a6e600ea +577, 0x8059a6dc7c9e25c6 +578, 0x49f498a5b8ad22de +579, 0xf1e812cc6d1826c8 +580, 0xbbaf60abe8b11e00 +581, 0x1d31d7f4d8be9a6a +582, 0xfeadce70a9a10c14 +583, 0xb47c635bc136996a +584, 0xd88e694c8da030cb +585, 0xc41bbe132aff1364 +586, 0x34249ab18a4b0800 +587, 0xf14b5c825aa736cc +588, 0x2710be6b08df78e +589, 0x2ab56bcc9bf9e740 +590, 0x9b7f6e591b5f648 +591, 0xfb665c3772f34135 +592, 0x628a0a5d2db5d8d5 +593, 0xb3e3f251e61b5259 +594, 0x82310ae33faf1b23 +595, 0x24af8723a65cbd0b +596, 0x671c93282fc4ad97 +597, 0x6cabeaac77270cad +598, 0xef4643fe38b02b7f +599, 0x7b011549d1ac6653 +600, 0xe2af87b9fccfe89 +601, 0x36b71ad67197ac8a +602, 0xdbba55d06f2fd93b +603, 0xf571dbd764b7f7e5 +604, 0x38ea402501cdbd45 +605, 0xb8ab5b5b1bab2913 +606, 0xfab973c4d45f32bd +607, 0x9364f1717c2636b9 +608, 0xfad00f4d983e00fe +609, 0xc90c532a11aef75a +610, 0x64a6eda96e44783c +611, 0x35891f2eb84520be +612, 0x28d216080caed43 +613, 0x129629cc5bd206f6 +614, 0x22c3d39822cbb4b3 +615, 0xf1efbf4cce1eaa2b +616, 0x7070cba12524ed08 +617, 0xa7ed0be9deabf20d +618, 0x8ddb4cd6b454f76b +619, 0xb82814b1db37b63 +620, 0x418e83b36de01876 +621, 0x9a538c7f39c6413 +622, 0xee0cd7abf8a2ecb9 +623, 0xa9222b07e95590f3 +624, 0x6296a415d68341e6 +625, 0x981e0a5a8f811929 +626, 0x4bb372d3b0de283d +627, 0xa9805b5971866e16 +628, 0xaf3b5f5183497657 +629, 0x2152b0fd23c3d9f +630, 0xb730c325b7173180 +631, 0x1e3439d231608c19 +632, 0x1c5ba6031379823c +633, 0x87f5d12d6d365cbc +634, 0xd3bc7f29614bc594 +635, 0x63102214bb391268 +636, 0x482bbd5bba648a44 +637, 0x6a23604690759dc4 +638, 0x4091d41408d3a39e +639, 0x7cd017f922101b15 +640, 0x7ce9004ac5f9231 +641, 0x978bc3d8ec7f7fdf +642, 0x5bd0c4d780580c11 +643, 0x4313c068bb040153 +644, 0x3ab7dab7bc38bf80 +645, 0x3aaf9c187728deea +646, 0x6633a4ce8efb88d9 +647, 0x7263b089878f00fc +648, 0xd0d767e96fe00eb8 +649, 0x184a7c0c01908028 +650, 0x1ebdf41e6f76e186 +651, 0xeb740ee1d0402083 +652, 0xfccf4974edb1c339 +653, 0x16e2707aa28306d +654, 0x1684f0bdb018c3a5 +655, 0x887b6b67b88aa862 +656, 0x923d7810a2bea33a +657, 0x56b3560babef5d6b +658, 0xb39a14614c54b8c6 +659, 0x33e4dc545a509fc8 +660, 0x26e21f84142da9b +661, 0xdd07598125756855 +662, 0x572d49a071d7ae0a +663, 0xba3c7e3baea28760 +664, 0x7ecdb2d714db4b61 +665, 0x1c62b4920e1b2fe2 +666, 0x71bfafb70092834a +667, 0xd710a4228f60d56a +668, 0xeb16277d4ce4e95b +669, 0x968168c90b16d3a1 +670, 0xac3439dfe8ad0062 +671, 0x5a8226f9dd5876ad +672, 0xb843affe917291b0 +673, 0xd76d1e67051f8259 +674, 0xb73a6638cce8ccde +675, 0xa0e6afd3c7295f9 +676, 0xff8857b4bbb5f4c6 +677, 0x99becf78938f0426 +678, 0xfcd17edc1e70f004 +679, 0x6223b8b23f2f50 +680, 0xca875f3e84587b4c +681, 0x7d1e81e589f87fb9 +682, 0x9eb621586aa826fc +683, 0xf46fb9ef5b9c2086 +684, 0x2882c9b7092725f3 +685, 0x5493f099bbedcd02 +686, 0x90c1ec979ffa811d +687, 0x963f765025bcc53 +688, 0x56194e3ec3d9d4e9 +689, 0x7ec4720954cac1f0 +690, 0xfab3145171af7f90 +691, 0x52a0b4e41a13b593 +692, 0x740e2d4d5909d126 +693, 0x98f5339c09c94a28 +694, 0x1700e462fe8dec76 +695, 0x3dbffc2aa4695ac3 +696, 0x5763edacabdfe2a1 +697, 0x7b5b623ce49ef21d +698, 0x30addc66f49860df +699, 0xcc7511a6c31bceda +700, 0x1b25b61ca75db43b +701, 0x416bc4c298e59046 +702, 0x4cd11fe2d74e4649 +703, 0xb54458a9229fc978 +704, 0x8c21a27882b6ca35 +705, 0x57887c8b5e01639b +706, 0xf4e893da996680bb +707, 0x8d601297702c9c0d +708, 0x2a27904a30aa53af +709, 0x497800f6917ea8d0 +710, 0xe96db3340ada9c00 +711, 0xcc23166f14c010ee +712, 0x782690d78fa65ec9 +713, 0xf3e00d74a0878eda +714, 0xa7cbb683decca0a3 +715, 0xdd2e038e683a94aa +716, 0xe2096ff8da896ca5 +717, 0xf7c83400afdabe11 +718, 0x395b8c6f6a4086a4 +719, 0x4a164ec05bee71d4 +720, 0xe87aa5d1ca0462fe +721, 0x8dbc5aed6dff9ceb +722, 0x12120d1e9552707b +723, 0x877dca6889b3e6cd +724, 0xbd65605c01e900fb +725, 0xbd6b82c4157c3115 +726, 0x8b60282732caf78a +727, 0x279fcf5e5de9e57f +728, 0x34b34ebfb6a37eae +729, 0xd258cc1a14e03b7b +730, 0x9a528ba3db4a13fb +731, 0xffa0aea59d057746 +732, 0x27fa7f456cd37c4e +733, 0xe1117a57a6fdce63 +734, 0xdc8fc903970a1551 +735, 0x492dd104f30faf29 +736, 0x110def0959e5652b +737, 0x7f8d1997636fdd15 +738, 0xfb77b05e538a9b59 +739, 0x2e41fa35b4b01fc6 +740, 0xbc35ae69a3374085 +741, 0x192c2a681c2d9b4b +742, 0x12566b8866c189d6 +743, 0x9d88ea785c5185c8 +744, 0x30a621ad5f983c4 +745, 0x8b875efe1206f587 +746, 0x224d25c3af6e3423 +747, 0x7503e976a1ac7bcc +748, 0x3c98aa869e823859 +749, 0x3d8835304b646892 +750, 0xf6353330ff970bc2 +751, 0x8a673f5e2edb8acb +752, 0xf2fdcc53493838b9 +753, 0x85ddcd526236af16 +754, 0x60afb99814c676c5 +755, 0x32a1c2749e281ca8 +756, 0x2367a92ae3bee9ca +757, 0x219fe082703743cc +758, 0x34d8b74dc85182a9 +759, 0xdd04164c72db23f +760, 0xe293ac28fe2671a9 +761, 0x9ca7d169cbda6f45 +762, 0x705c47972b4240ed +763, 0xc10eda9eeb536209 +764, 0xc36ddacd0c94e85d +765, 0x8eb592c27e8cd0d2 +766, 0x3e815991c76e7cc4 +767, 0xac9cfce31acf7580 +768, 0xbf7a4cb31c7aee94 +769, 0x663077444aceecf6 +770, 0xe7f614ff386eb568 +771, 0x79d7a229c66912c0 +772, 0x161ed4311f63e1f3 +773, 0x308a5faeb9982ede +774, 0x7b38ddb9b7efd10 +775, 0x1e103a2589b27ecf +776, 0x67b02baf4259f27e +777, 0x868921c115ea2eee +778, 0x959791912200f71e +779, 0x4dd55f36dec10557 +780, 0xe3464d90080cb99d +781, 0xfb2d4f6accce652f +782, 0x109900a9257d77ba +783, 0x3c4bda8e2c83684c +784, 0xc9ae040fb7f868c6 +785, 0x78098ffe994f4905 +786, 0x7a94c33eca77f0b4 +787, 0xbe6a2a95e9b5c0e8 +788, 0x797d39cf963f4837 +789, 0x8d2e249e4425d06d +790, 0x6ae2c30cd5da06f4 +791, 0x904489de762b179f +792, 0x84713e2dfb591e3b +793, 0x6405a40da3f6f51b +794, 0x976b560d663a2df1 +795, 0xed1c544784ba1e22 +796, 0xca658e995ed9344c +797, 0x2b1c6b8e4db49025 +798, 0x52b1513da528bad +799, 0x3c63406d256d9968 +800, 0x63a31ca3d423f85e +801, 0xb05a81f55789a720 +802, 0xd04412992c476c8e +803, 0x828ec2f77a150a3d +804, 0xee50926671bb60c6 +805, 0x5aa70f93e2df61b4 +806, 0x94d60fa2e8655858 +807, 0x3f5e5b770703cc7d +808, 0xc62dfb2688ca7784 +809, 0xaaf02e1e8ba89fe4 +810, 0x4ab74e0d8c047405 +811, 0x31ee04fbac6fcead +812, 0x1203b78b8228f5af +813, 0x412a70836f9aa71a +814, 0xab51cf98c03f1819 +815, 0x783a3ce9ce137f65 +816, 0x8897085b0a072cf2 +817, 0x685dd9bde8798cb +818, 0x9a1fac7b1705e2c1 +819, 0xf3e9ff98de48e9cb +820, 0x5c2d3eb1a1fbe917 +821, 0x3bda718b6b54d82e +822, 0x29f2dd18f22f0821 +823, 0xb992da1572ac3597 +824, 0xacb69e7aa14b34f7 +825, 0xcd36e3ad14f088d1 +826, 0x6aaacc96a1ec55e8 +827, 0xf8ac593f154fe68f +828, 0x18fc9cbff012339f +829, 0x2f3368ccbbb99899 +830, 0x7cec7d17f37031f7 +831, 0x96e86bfaadcb8fc2 +832, 0x74f9e7ee3d42a752 +833, 0xbd52f6c7d9b0733 +834, 0xa48e6d96bb6ce1c9 +835, 0xaefa058254b82133 +836, 0xb7a19edfd0929107 +837, 0x6160ce9125b26e26 +838, 0x6537dbbde1d2aed +839, 0xc567f9a6bec52dde +840, 0xca29fd3f22443342 +841, 0x7732aa6db6a1c476 +842, 0x8f5a4d7df6b11b3 +843, 0x76649262aa7e31e1 +844, 0x60a13eb125fbc829 +845, 0xc81e4d123dd21ac1 +846, 0x643cbb09bb72f86b +847, 0xf971a98fb25555a6 +848, 0xffa2774c66692d56 +849, 0xcb33c16c50b13ea9 +850, 0xfabf388dffda0e9b +851, 0x55d41ec12ca24b9f +852, 0x91cf693a3467e807 +853, 0x6be2c00b2c31d6dd +854, 0xc5cf513b5251ae28 +855, 0xffc4384212403dec +856, 0x45d4e1865255a69d +857, 0xfb1dcf956972086a +858, 0xcae946a55c4c55b8 +859, 0x7351ac7720e385c1 +860, 0x19aa8ffd86240254 +861, 0x8f515ae78f4040da +862, 0x1e1ed2058de50fce +863, 0x22d006dcdb374243 +864, 0x6e0f0ede7c95b441 +865, 0x70e8aa81b53b4d25 +866, 0x998f309ea41e3814 +867, 0x89ed6598fb66f390 +868, 0xb5997dc3278060df +869, 0xb2a021eac4f7e046 +870, 0x3705b60aa2fd0768 +871, 0xfc415079ab9200e +872, 0xf2871ac4cf45ecc9 +873, 0x24bf758d2246175f +874, 0xac503dd6f8141b3 +875, 0x4e879d12d9f03b3 +876, 0x82034af8cf93b644 +877, 0x59899dd7e478a6c7 +878, 0xae90addb6eb11507 +879, 0x1524ddf76730cdef +880, 0x6fd4afd5456b1c9d +881, 0xcddb9221ea001cbc +882, 0x64ff400bbf2e8604 +883, 0x6dda10549b06ed9b +884, 0xed2c85104c261527 +885, 0xc7e09217d29929a8 +886, 0x56284df611a428b1 +887, 0x1a7608289c0a61 +888, 0x7cb63db15166ff66 +889, 0xc6013c76fcdcdc72 +890, 0x8e5dd566c7a5a676 +891, 0x5a8e8565f40d133b +892, 0xe465973455848c44 +893, 0xf92eecbfe0f3c2c0 +894, 0x7d64155d4dcc5cac +895, 0xf17595706f988dad +896, 0xd590a001a6a19c5c +897, 0x82a164475758db3d +898, 0x6b144993ea1bbe32 +899, 0x22a81a7a6e453779 +900, 0x8e8c298df1a68a73 +901, 0x78056afd6d936b4c +902, 0xaaceef0325faaf62 +903, 0xe78bb7699f82266f +904, 0x523a2d283c5a5166 +905, 0x7076d87088f6c6db +906, 0x6087dd54cff5aeb2 +907, 0x7ef82e62cb851680 +908, 0x4e8bcc8ed84d03d8 +909, 0xd12fa0361df3cfd3 +910, 0xefb89c79f8127297 +911, 0xa9af4e2fbce0b1f8 +912, 0x462136685b70331e +913, 0xe9e74c93da699b77 +914, 0x9ec69215fb11d0c3 +915, 0xc10f229939e3e111 +916, 0x3f67fa79e41d2374 +917, 0xd5e7c1a9a7185162 +918, 0xa1dcce9ec91492fe +919, 0xd4e61f0727b5d21b +920, 0xdf6cdce46551800a +921, 0xa3f256ce906982d3 +922, 0x209742a6b9ffc27 +923, 0x4006c96958526a57 +924, 0x9606aebc75a1967e +925, 0x91b9f42fb64189df +926, 0xb27119defcb938bc +927, 0x128cc7a84ba05597 +928, 0x6c3df613c62d0d30 +929, 0x3adf69d48b629ec7 +930, 0xda42ee493837b128 +931, 0xb8e770480e760bb5 +932, 0x9feb55d57c99c626 +933, 0x29812d80afdae3ed +934, 0xae4222a64276a8c7 +935, 0xe3897212a5b4ed53 +936, 0x98bedfd13886e669 +937, 0xca858675d7fc0d0e +938, 0x28a359f665354234 +939, 0xfac2ccabe4128b35 +940, 0x61373cc5d11ca180 +941, 0x7007605a4512a87a +942, 0xe71f8eade7b30b3d +943, 0x3a9e77f9b99bd04d +944, 0x70d3e42488098866 +945, 0xd30fc159c7cd4d99 +946, 0xe4d3f6600d2e2d6f +947, 0x1088324dfa955c25 +948, 0x516437acd4764623 +949, 0x38a31abe50d0aa03 +950, 0x72e1054e9dc02ba +951, 0xe6971dd664d1a2e2 +952, 0xf6698cb095d3b702 +953, 0xad995a5a8c19bd92 +954, 0x34e53c6936f656e6 +955, 0x10de240bc07c757a +956, 0x3e3b9a6861c2bd1c +957, 0x9c0b0b97d3712ec9 +958, 0xabf1505a75043aed +959, 0xbdf93d3de3274179 +960, 0x28fa5904d3f62c28 +961, 0xc3b97b39ef6c5133 +962, 0xf2b2219225b8679d +963, 0x8be4ec0f930c0aaa +964, 0x47de5a56aa590643 +965, 0xb6f871b304129856 +966, 0x80a61c06233ab0f9 +967, 0x3ce6c3af8101b055 +968, 0x85b911708274e7d1 +969, 0x4cab65d093a488b7 +970, 0xaabc4b10661fe28e +971, 0x35b16dea64474a68 +972, 0x1d6eb5b093361223 +973, 0xc39107b92f0fe1fb +974, 0x1d09e048073c4841 +975, 0xc6a02f43aca8cb2f +976, 0xaf6613dbc7da909c +977, 0x5ac2a40c230aa756 +978, 0x33afb5e7c01c39a5 +979, 0xc7b0b20ea8b7d0ef +980, 0xdf7306c8ccb1bbea +981, 0x9710efc0c188b2a0 +982, 0xd6303eadb72c873e +983, 0xa38ca609b118f35a +984, 0x8390613065c6e535 +985, 0xdf9a0106757e431f +986, 0x8bcf77039788e143 +987, 0x6026806a986b378e +988, 0x482ff3b1394cb1dc +989, 0x2a27d0ccac9ede9c +990, 0x53c77f26e271b3ab +991, 0x1ba004cf276cf3f +992, 0xc135b0517dc81f7c +993, 0x5d137838db75e442 +994, 0x3fe505f93d1dbdd7 +995, 0x351654ae7d598294 +996, 0x173f8d182af9d84d +997, 0xf97dfcd164fe11c5 +998, 0xcda423e5ad43b290 +999, 0xa5cb380b8de10d10 diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/data/pcg64-testset-2.csv b/.local/lib/python3.8/site-packages/numpy/random/tests/data/pcg64-testset-2.csv new file mode 100644 index 0000000..7c13e31 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/data/pcg64-testset-2.csv @@ -0,0 +1,1001 @@ +seed, 0x0 +0, 0xa30febcfd9c2825f +1, 0x4510bdf882d9d721 +2, 0xa7d3da94ecde8b8 +3, 0x43b27b61342f01d +4, 0xd0327a782cde513b +5, 0xe9aa5979a6401c4e +6, 0x9b4c7b7180edb27f +7, 0xbac0495ff8829a45 +8, 0x8b2b01e7a1dc7fbf +9, 0xef60e8078f56bfed +10, 0xd0dbc74d4700374c +11, 0xb37868abbe90b0 +12, 0xdb7ed8bf64e6f5f0 +13, 0x89910738de7951f +14, 0xbacab307c3cfd379 +15, 0x2cf7c449d8b927a6 +16, 0xdcf94b3a16db7f0e +17, 0x8a9d33d905a8792e +18, 0x4cb9eb2014951238 +19, 0x6c353acf7b26d6f1 +20, 0x73ff53d673aa30c +21, 0x1fd10760015eca68 +22, 0xabae0aa9021eeba8 +23, 0xa5ae363a868ee2bb +24, 0x9d89e0f041de6631 +25, 0x6238b133c3991a65 +26, 0xff49267d75fef51a +27, 0xfb180656ce13c53f +28, 0xaf7fadf36128712d +29, 0xa6847fc6f339c63e +30, 0xb03e0b80d71ea5bc +31, 0x63905abcb43969af +32, 0x2295af3ee00a3bba +33, 0xb8b375b994330415 +34, 0x867d9ef1d8716a3b +35, 0x4f6c02f5601b4e18 +36, 0x7c5fb4c16c470d18 +37, 0xe3b57986b804b343 +38, 0xef1d79d212aca692 +39, 0x5b98774c8806209c +40, 0x924fc76bac38a5d1 +41, 0x5266084c412ddeed +42, 0x98240bf9b831d6a3 +43, 0x5681599e81219442 +44, 0x6441248fc2ba92bc +45, 0xe3e9051a540349ea +46, 0x3a2700034390baa3 +47, 0x9f893155b6d402bc +48, 0x158207910c6d8aef +49, 0xd5282ab7608c2cbc +50, 0xc97f4651669dee4f +51, 0x3d4750d95103ed60 +52, 0xe0614542caac1f04 +53, 0xefe5092144cfc6c +54, 0x560bc486abd7e9ae +55, 0x2678b71392daa4b8 +56, 0x734970d3dc2ba416 +57, 0xcbdbe849e51e4aaf +58, 0x3b0b5e28b491556c +59, 0xd51449ac45abd88 +60, 0x6790b59991f1b7ab +61, 0x32d1c039ff2415bc +62, 0x173b9772f24f72e0 +63, 0x9490a9ca9f883b1b +64, 0x4c775989e6214222 +65, 0xac07db37e6ee6114 +66, 0x331371b2e3f10aee +67, 0xf12e5326c21c28e4 +68, 0x5d77dc280c70d614 +69, 0x1b01bd17a2f281ec +70, 0xa10d3b5882938487 +71, 0xed5a0033c394ae8f +72, 0x70bc8ea568ea44b4 +73, 0xf4600ae77965e730 +74, 0x7ff92c0b321ce233 +75, 0x6cdbc87d0cc1d670 +76, 0x9ec64f0cf2000eb1 +77, 0xfebea50259800f68 +78, 0xf2edf9019a8fd343 +79, 0x75c584ac042e5468 +80, 0xc1fa8481d5bf9a1d +81, 0x7f57180168514ac2 +82, 0x878100716b94f81e +83, 0xc929406e3af17fd2 +84, 0x6a26e2c013e4bf4d +85, 0xbc071d8848280955 +86, 0xb60d75abbfd1bdac +87, 0xee9b76afeca9fa69 +88, 0x1d6c399d2f452810 +89, 0xbaa0bc1621e25c83 +90, 0xed6ba792f8671ba5 +91, 0xf7ca02c2ab11d8d7 +92, 0x3c3cadadf0b21e3 +93, 0xdd1784571e864e9c +94, 0xfb2f992015157509 +95, 0xf50bb9f0d3ced743 +96, 0x261565f75c3e185f +97, 0xf8fe33b284513e60 +98, 0xe3d2d10b5e024664 +99, 0xd28717566242cf35 +100, 0x7ae07d133ac5b789 +101, 0x3b7ccaaa53ac338e +102, 0xcd480bace4871650 +103, 0xec6c78f923c080e9 +104, 0x44211d0ff8919d59 +105, 0x89f79af76d2a45fe +106, 0x71583fd8a837548b +107, 0xee57269c261511f5 +108, 0xa5ee8f3b128c5d1 +109, 0xbb64c20ed0765a17 +110, 0x9d4790ab2eeaf7e4 +111, 0x742f3db806d9e98 +112, 0xb81ec97aed6a0d1b +113, 0x41808b34f6a8a23 +114, 0xc20913af175dfd4d +115, 0x834427db263b22bb +116, 0xedd9c632e611828a +117, 0x10eac8524496f571 +118, 0xd76091b97eb00ab7 +119, 0x111298ae9fe95666 +120, 0x5824b2e2a6719c43 +121, 0x6e280ec539e934ed +122, 0xf74fd832df90083e +123, 0x8fee6d0f241c2e97 +124, 0x4244f331c2f19c3c +125, 0x3dde75a845cce97f +126, 0xe35bb8e635a9915b +127, 0x39d2943037f7932e +128, 0x1fe2d134201d0970 +129, 0x49d00b63c749b804 +130, 0x960c2942cd4e4e04 +131, 0x8dd8e009dbc0435f +132, 0xcf493495c3a055cd +133, 0x8f7b5a1c0f9fe9cd +134, 0x49d5f90374641a25 +135, 0x69b3932073d3524c +136, 0xd170603e7de84ee2 +137, 0xa062ba3ed3539948 +138, 0xf5861cc5b5d56c82 +139, 0x5e914998a30c7e76 +140, 0x8d77f2ad1503c0f1 +141, 0x980b6a9e3b4181fb +142, 0xd9299cd50694c084 +143, 0x253dc0f8f1cec4c5 +144, 0x68110fb9d1b3e695 +145, 0xe8f3120d0aabc461 +146, 0xb066e7df0dfb042 +147, 0xd29ce0f797e6b60b +148, 0x6a569bb7ca33bd42 +149, 0xd46e08b2dc2385f8 +150, 0x28c61d11d055767 +151, 0x5d73aa3d1a2bb725 +152, 0x1421191e1c14829a +153, 0xa711bfb6423df35e +154, 0x461af97a86308006 +155, 0xb3e1018ff3519367 +156, 0xf19cf866a268ef2b +157, 0x207715eac9199d1d +158, 0xdd621c410975b78c +159, 0xf390aea68683610 +160, 0x617a2d107a0047d9 +161, 0x6e05ac416e5bebf0 +162, 0x7d253e70506c1bed +163, 0xf9f96f4a7dd53810 +164, 0xc693b29cb1573f73 +165, 0x4f1146b0020ea544 +166, 0x45140608fbd40579 +167, 0xdcf57219828ce6be +168, 0xe19d58cca37b5b32 +169, 0x82bda95b2a161235 +170, 0x5823c3d8a2b6c9ba +171, 0xfeb2e74092fdf89a +172, 0x50e1ad1abc8f869d +173, 0x2ec63d0c105eb8da +174, 0xe14e1c4845a3264a +175, 0xcff53670455eb6aa +176, 0xaafaccd24619fa3e +177, 0xf55a988486e2422a +178, 0xecfba16a90ff4d04 +179, 0xbf8d36c2f644757a +180, 0xdc56ed75a0dd6249 +181, 0x3f45023eff17c3bb +182, 0x2428bbfe90023fab +183, 0xab892c611adcb70c +184, 0xb6f13d8c0c2b9d74 +185, 0x2ac3fb11d224f2a8 +186, 0x65433dcfae2d9351 +187, 0xe906859ae4b45f82 +188, 0x8fb7f5f093d76a3b +189, 0x940dd290b5e88d1a +190, 0x31b27d21bef116e7 +191, 0x86a964e2c83b5296 +192, 0x85ffd17bc079a9e8 +193, 0x16c47c724e7ab7f1 +194, 0xfb6098a9867e7d7f +195, 0x9246fb69092c6cb2 +196, 0x1a4033572760f32 +197, 0xc5cc568a8b273b84 +198, 0xfa6f9f2fbdd44abc +199, 0x9701b8e087718ba3 +200, 0x51d6a7dcf73f8f3a +201, 0x30008172cc6a972d +202, 0xac2ab49a5ca6ac81 +203, 0x31f28ef79461e54c +204, 0x93e35a8da8cc6132 +205, 0x9a2c58beeba3d5b9 +206, 0xf6615c1de266ac39 +207, 0x127ff9f8166b766b +208, 0x7ffe380e80a69556 +209, 0xbe7d2c228e1542f7 +210, 0x2d5ebb4e50ba1746 +211, 0x63585761ae1bf684 +212, 0x1019eb5cee022fea +213, 0xb9d3540ab58da30d +214, 0x1677f4cb45620eb9 +215, 0x6524baee51783822 +216, 0xdf9f2ddcfabb0adc +217, 0x78e8acc43b287935 +218, 0xe9a1974e999222b5 +219, 0xc41324ec2291e780 +220, 0xea52abc9ecdcbc9f +221, 0x209d7bcd46ec6b04 +222, 0x12d504c09803db2e +223, 0x1200e6bf21475d81 +224, 0xde6d3c2b35fd2cfc +225, 0xa2526900ac33bd3c +226, 0x7f1f5290fc432bc5 +227, 0x29ddfb380a3d69c8 +228, 0xac79cb6942a2909d +229, 0x516996685b67a92a +230, 0xb5fc39041cb828bb +231, 0x75d9d8ca0644a276 +232, 0x81e98b76be92a3e9 +233, 0xca27888fafe12179 +234, 0x17be2ae039925765 +235, 0x9429846c0e6d0342 +236, 0x327dfd50439815e9 +237, 0xcee20cd7bc254aeb +238, 0x7d250389f453f29e +239, 0xfd1b232a85c95569 +240, 0x2ed55fac80f3e9e9 +241, 0xf6886c20417a1be7 +242, 0xcd08e61f0b0fdfde +243, 0x7b33e34da5c27bff +244, 0xd043c4b7d5603dd5 +245, 0x9a544e4c70a3b686 +246, 0xa7b60398c381f771 +247, 0xe9e7a3487c4bd4f2 +248, 0x10b58fdfe1ff112c +249, 0xd5c1c9748c0f4ceb +250, 0x61be9d09159d54ff +251, 0x5356f51e8239f510 +252, 0xfe7889d9b202ecef +253, 0xc7fc19ca5d263d5d +254, 0x7c4c07e61dfd9f69 +255, 0x6c315fe5015f300a +256, 0xe0a5bc00039747b4 +257, 0x16397fdcf829ee80 +258, 0xb55aee80d16a5169 +259, 0xca0609944d007eea +260, 0xcc982249f65a02ce +261, 0x528161feb149c148 +262, 0xcbf08ba49b41c006 +263, 0x39af1ff0b6f14138 +264, 0x5cc036be69799aec +265, 0x6adde125b1db21c5 +266, 0x8a99d83d6b613b67 +267, 0x1cd43fca9451f74c +268, 0x682dbb26ecc96365 +269, 0x13b4be2ceb43e3 +270, 0xbe8fbc3b6f4f581e +271, 0xda148a2f4bda5719 +272, 0x239106ca3319f393 +273, 0xb42b4dde641f0dd5 +274, 0xd233cfdf4cb0af74 +275, 0xfb5919d905589afc +276, 0xd802a8860c10b66a +277, 0x6c923e1d00e7b5bc +278, 0xfacce1134f383b89 +279, 0xf9570abda7a6d553 +280, 0x80f0f9796a208f18 +281, 0xc0e1df5280951c57 +282, 0xe9f143f08257bbe0 +283, 0x79e4c6463123d588 +284, 0xdd2118583f2b1684 +285, 0xb399ff5f2329fa18 +286, 0x4b3e9ebae96f813c +287, 0xc484dbf247787384 +288, 0x921865eb97603f2c +289, 0x18063c68e257d300 +290, 0x643181f345e7fc26 +291, 0x12e0b0e8eadf9fa7 +292, 0x79e613fe73dfa354 +293, 0x6db4c59203b7217a +294, 0x6c7a0e9ba6139eaf +295, 0x9617c7ac4e3f6d97 +296, 0x1f68a7b4fb1b4b75 +297, 0xef0b7ab24944f466 +298, 0xaf1dee1f4be1bc89 +299, 0xd2e355c959f5fd8d +300, 0xe594c3fb95d96efc +301, 0x9554766ca3342906 +302, 0xa4bbdc77d12842c +303, 0xb62400211ee489a8 +304, 0x91abadaaa3bbe67c +305, 0xd371eeb91deb42bb +306, 0x883bab35cbd2b6e5 +307, 0xd030c3d9411a9041 +308, 0xff3c110a858ff000 +309, 0x59bdf5ca47d0bde7 +310, 0x2bc80fa3cdba1853 +311, 0x6444ccb652662cb8 +312, 0xc0c7e256b9e90339 +313, 0x70714ea9c9d72302 +314, 0x96a0142f9d897d27 +315, 0x209a9097c5a91ef7 +316, 0xb9e33afc5171e009 +317, 0x47b37af433a58d40 +318, 0x30cc4ffbfa831d26 +319, 0xdcea4a85ff815466 +320, 0x907d5bd027f2e5cc +321, 0x7c081f6852e04a4b +322, 0xe61950749c1d502b +323, 0x1604e937ee69834a +324, 0xb2372d952dd25309 +325, 0x53f6a5b834c72577 +326, 0x2ce7a74395e0b694 +327, 0xacbf9ab4fe91f225 +328, 0x5ce1e63d3a2bb90f +329, 0x54740da3a5ed139b +330, 0xf194ddb39f29880b +331, 0x3305374f5d8ec08b +332, 0x831dd0164927ff4a +333, 0x625baa78e4458cf +334, 0x29d27dc0a4a71152 +335, 0xe227bae9a1401034 +336, 0xca0c209831846b2b +337, 0x8e8cc54b08b5a411 +338, 0x38f2b4acaac27db6 +339, 0x8ec88baac814e86b +340, 0x31c08e46b007bde +341, 0xb686c02722794c09 +342, 0xb77cf8fc682e3907 +343, 0xa56334e7f606f4b2 +344, 0x9c80b127bddd5f4f +345, 0x12df14834cd858bf +346, 0x3f14762a9cf5fb9f +347, 0x930a70941ef5779e +348, 0x64e96c849c30c080 +349, 0xfdf53bfba1300484 +350, 0xec7a9363c21bc616 +351, 0x26e9fd6a115ecb47 +352, 0x9707a84b5bc77fbb +353, 0xb23b2737b20d5903 +354, 0x22f4825ae80f6501 +355, 0x500644b12be6a01b +356, 0xb746645b2af082db +357, 0xe6af051f697892f8 +358, 0x577c724248a1cfc6 +359, 0x3d2b6a434c84eed3 +360, 0xd260f5efd7328314 +361, 0x95c16cc84bb3f55c +362, 0x7a01b2e4e0e80ca7 +363, 0x41930c3ce70a0935 +364, 0x1299bccf39d4e110 +365, 0x494883ba1a8a87f +366, 0x9478ecfe2d918e60 +367, 0x30ec9a5670cda8af +368, 0xf9bc877e833e2b99 +369, 0x1b83a0acfbb4a8db +370, 0x73bc1740c0d18880 +371, 0x65086ca9773cb3e1 +372, 0x3b78c3ccd63cff2e +373, 0xbfae748795acfb31 +374, 0xa4c9d5d56a15ba20 +375, 0xb9cb41721e52b71e +376, 0x1532f15d4dc47748 +377, 0x5a4d647a4b9ee632 +378, 0x8513c7c5a50898d9 +379, 0x6d3d98ccd5461b2e +380, 0xa65e99be2fe98d6 +381, 0x31abc8855334a0e5 +382, 0xf1ed22a661dca5b8 +383, 0x299e2b63229e03be +384, 0xda201a06687bce48 +385, 0xd27794b302142c55 +386, 0x642bd3e1c7898a9d +387, 0x777f1ff00afa1a87 +388, 0xd2f1c84fb3877baa +389, 0xae417583289191fd +390, 0xd641f1d88e0e2d55 +391, 0xc1f1d98fb5d18ebf +392, 0xb0f72aecdadce97b +393, 0xe9b8abc764f6018a +394, 0xd2a37cff8e890594 +395, 0x2dd70d631a528771 +396, 0xbf8ba0478c18e336 +397, 0x1630bf47f372ce0a +398, 0x6d04ea20dc3f46b8 +399, 0x6591881bf34337f2 +400, 0x33c149c7eb5b4103 +401, 0xf01a8c9857c86748 +402, 0x184348cdfc16d215 +403, 0x141168b253d2ed7 +404, 0x52aaf012ef50a6f1 +405, 0xfda1722387e16f4c +406, 0x43c30f57d6c038fa +407, 0xd4a8611f5f96d214 +408, 0x2c512ce17e987f2c +409, 0x961ce450f0fa2822 +410, 0xf55a506ec6cea9cd +411, 0xb76d694d9c7f5ef6 +412, 0xfb029216dbd8e988 +413, 0x93162501896a0081 +414, 0xfbbbd2c5ab300f5c +415, 0xd648b6da7387d491 +416, 0xc73b4697471d9d98 +417, 0xe37412bf1c93ee76 +418, 0xa1a96d96570e6637 +419, 0x5b3ab4f82428f65c +420, 0x873d849b188aa36f +421, 0x39fbee0ffc9fa9ff +422, 0xc70d21b744d677fe +423, 0x2b8a43c23043d209 +424, 0x93c33eaa37370d16 +425, 0x8930ac1880f2b0ef +426, 0xac01d27707036af0 +427, 0xc2af3fee504343a0 +428, 0x1c1dae2ad5535d97 +429, 0x9ffc21804b76a480 +430, 0x69f903412cc13563 +431, 0x9d3c4e2759a0c47d +432, 0xb1a8f894be6302b9 +433, 0x95e1fd7951479506 +434, 0xbb9e6c03cd4ae8e3 +435, 0x85206010c9b737cf +436, 0x767e813694d6238c +437, 0x4969af329ccbb30a +438, 0x3aa9af1075aaea5c +439, 0xb1ff519e8118a993 +440, 0xb21a23a3c91180fe +441, 0x320b24582ca3fd88 +442, 0xf8ca56415fb4e453 +443, 0xabd0899c07205e77 +444, 0x87fdc7a44b4ad50f +445, 0xd75744911641a278 +446, 0x7c8c9a65df6fcb95 +447, 0x79d785e3c7a5b695 +448, 0x421e4565ba1f592f +449, 0x27f87eb2517835cf +450, 0xb62cc4297441c83e +451, 0xd817a80ac815ca6d +452, 0xad84388130df2aa8 +453, 0x5e6b1640452d6ac8 +454, 0x936285e15edce2a3 +455, 0x903bccc4969768e8 +456, 0xefc2cb7b109d3140 +457, 0x633e9dfdda2d903a +458, 0x2a2f3225925678a1 +459, 0xe07eac91a27f8547 +460, 0xe50ced40eda78cb3 +461, 0xc5b22500e1c7441 +462, 0x32becf61bca3aa72 +463, 0xa2e37c4b30671344 +464, 0xc9f1c1910f45d544 +465, 0x9b50333b2dcdf730 +466, 0x310bfd53a1684b94 +467, 0x1e1dc21e66ac6455 +468, 0x81876c2bfb1ed5a1 +469, 0xd0c54a3e25eadc7b +470, 0x3791b6fbbd5c7ba0 +471, 0x133be57356c599fc +472, 0x8d1148eb8e83fdea +473, 0x311aedba0d8b42cc +474, 0x1142ae52745f94bb +475, 0xc5f4ab2fbde8c4a3 +476, 0xd23be827b5b24f6d +477, 0x65f95194cd122715 +478, 0x4b48969d73125922 +479, 0x46f165052b8ff988 +480, 0x5c689f94b9275ff4 +481, 0x93b03823ff2d536b +482, 0x871f3775aa4e3523 +483, 0x5af829f7cc0f66a5 +484, 0xa32e05739cbeac8c +485, 0xacff1856ddace0fe +486, 0x8eeb5e7f991a5322 +487, 0x6325c2720e0dbdea +488, 0x9fb817bc4fdf5200 +489, 0x9786f0d850e43d78 +490, 0x571f76dd7f9fb77a +491, 0x4d9e94e181cbc63f +492, 0x8bb632d3376c547a +493, 0x9cc26d9efd1c88b9 +494, 0x9c5d49579df52b0b +495, 0x6201abf7e1cda07b +496, 0x90d68f0c6c884963 +497, 0xfc5b66188ef7f561 +498, 0x6d9303cf2e0e0f95 +499, 0xd7cfcff535f5ed07 +500, 0x14d1a1228daa4ac6 +501, 0xe00ef5762f66ae50 +502, 0xf113a79471582978 +503, 0x430985281785dc7a +504, 0x31914108c206ed5 +505, 0x7ba6707b6419971c +506, 0x2ec63b033ce112e5 +507, 0xf8bcd36ced3b41e3 +508, 0xe5cf908c8010414b +509, 0xf5ee224b7c703e30 +510, 0x9a9733af0b12338b +511, 0x83e18cc00ace34f8 +512, 0xd52cff39e23008b8 +513, 0xa700578136b9c0c5 +514, 0x3fa179d32ac51f99 +515, 0xef2d5eab6d4ad380 +516, 0x709024a5abd032df +517, 0xc607c7ee349ede87 +518, 0x803d784e9731eb5f +519, 0x2ef06f4ba769282d +520, 0x4bc1dca1e9f07eb9 +521, 0x930c958a7a72f94d +522, 0x249bc8db2cc7a3bf +523, 0x3845305798f9a5d +524, 0x6f137eca9ab6f948 +525, 0xc31f5a963d31bd67 +526, 0x9d39693d5383626f +527, 0x52fb41c335a8b98e +528, 0xb79d1a29a06006ec +529, 0x7c0926a7a3eda2cc +530, 0xffdf5214406fd53e +531, 0xc6aa02a7e94282b9 +532, 0xd4a4431b4aa301ee +533, 0x4271cc0f9420d3ab +534, 0x26fccd7cc7fc2485 +535, 0x330594bb945b8d5a +536, 0x6ea8eaad12e5cb8c +537, 0x831c3467726bede3 +538, 0x31d1eb10017eaa61 +539, 0xc7aa75e41508f5cb +540, 0xde51810f0cadd0b5 +541, 0x50e5b3e73692f80b +542, 0x82107ec55636e188 +543, 0x9828ef175d843ab4 +544, 0xb8edc6a860dd421e +545, 0x25c0c138fd537ac3 +546, 0x47e72a771e8eb563 +547, 0xbb0f8c5333f4a2cc +548, 0x91750d2fb9b2d479 +549, 0xe662d8f6fe38df36 +550, 0x72a6d879fb5619f0 +551, 0x6817c7878dcbf077 +552, 0x4e7741cb484661e8 +553, 0x3b3b3ba0be5711bf +554, 0xa6989f5d25868765 +555, 0x43c276398997e4e0 +556, 0xdcbe16a94da28870 +557, 0x454936980a699c99 +558, 0xac614bfa8f0266c6 +559, 0x9174841392e213d5 +560, 0xa0e2acffc5fc9d1f +561, 0xe53a08a7a0e6521a +562, 0x2b845cf7c24172e0 +563, 0x265a4fc5f7adec0d +564, 0x1f34fbe5f1e49420 +565, 0x139181f6fb647f20 +566, 0x88c35d46e2fcd05e +567, 0x2a6d5b55903c0459 +568, 0xcea28eb621ad7bf1 +569, 0x5c9cdc13e7aaa30 +570, 0x5fe63e14746e7103 +571, 0x7923e53d73835db9 +572, 0x376e661210bf1b06 +573, 0x5b1cab85450efdd5 +574, 0x3908dc096c70b452 +575, 0x4825e303cd1f396f +576, 0xed476bfd702957c3 +577, 0x6acc013aff5db743 +578, 0x62c80b776343d488 +579, 0x9c75edcd5b012697 +580, 0xaa053362a3b9770a +581, 0xa907e236c7c07e94 +582, 0x15b2c380451692c0 +583, 0x94f79142697bd61f +584, 0xbc657d31ea98d44f +585, 0xcbaa5e52517a1f5e +586, 0x96aa2e44a7c4a03f +587, 0x216d3c66db2b515d +588, 0x157001807e3ca88a +589, 0x52b3a596bdd3859a +590, 0xed747e7fc5e3adac +591, 0x78fd765ddb2c448d +592, 0xe53dc7299ed8614e +593, 0x75ad41fb1d7a790a +594, 0xc14f6b944b0e6cb1 +595, 0x7c314b69fce3df1c +596, 0xb56d82eb740d7abc +597, 0x5132a93c41251fdb +598, 0xe3ce35bd2a82f958 +599, 0x440571a981c722f2 +600, 0x194cdfd9f186bc9 +601, 0xb89e522a5db00939 +602, 0xad35f339f68df3c8 +603, 0xa82ab18420322293 +604, 0xaffa6df9b72b27c4 +605, 0x9615694d23beaa2c +606, 0x1d82ebe563abad91 +607, 0xab50ef65fbd94385 +608, 0x1b070dbd70a9a14 +609, 0x2ececa796abbadf0 +610, 0x6bbeafe9e81ab2a2 +611, 0x60dcd0d2a9b76914 +612, 0x1e748039ef05c33f +613, 0x6d4d17f2213ccdff +614, 0x9fa56132957bc987 +615, 0x60a17185de2428eb +616, 0xb56038ddf306479c +617, 0x3b1db5df92d06d8b +618, 0x24d1bba8bdedf580 +619, 0xbfb7e6740ebaa4d9 +620, 0xab31c4473e46f61d +621, 0x6deb3cdd8fd5869f +622, 0x23032e47746d72d6 +623, 0xa9e72d734e10f2e8 +624, 0xbffd199b6157bc23 +625, 0x29f8254df273fb62 +626, 0xb076142130ee55ec +627, 0x5b0b08374126c309 +628, 0xea4536aae979521f +629, 0xc064e7abec91a174 +630, 0x46133ef80c59d935 +631, 0xf0227e2da1b14160 +632, 0x675a76641e1af5a +633, 0x2f50a069b33d198c +634, 0x3ded5a65e1d657eb +635, 0xbb6999b020694f6b +636, 0x86b2f2b33487aed7 +637, 0x76e14e85f8bfb4cf +638, 0x38f7f1e44bd4e0db +639, 0xc1a7d41b7e80d4ae +640, 0x1dfaaf80bbceb42e +641, 0x3f51c11497720c2b +642, 0xce6da1415ddb8b80 +643, 0x7377d8bcd359b5f3 +644, 0xe077208f3f810aca +645, 0x9a06a8a2dacbffce +646, 0xca1f99156b09b735 +647, 0x2ff9a93064d91451 +648, 0x50f3ea93f351a7ef +649, 0x606fceccb07054de +650, 0x7e83d6d2f8f6685d +651, 0x78f3995291c5d407 +652, 0xd28d2460e22d0228 +653, 0x2c5636f68a0054dd +654, 0xd9fafb1c56c8f6cb +655, 0xe39889b5f9d74464 +656, 0x1355372bf5db2cc1 +657, 0x26768426b9ac323 +658, 0x4af1dbdc1111fd89 +659, 0x66973587943b927f +660, 0xf86f5f50684dfb1d +661, 0x1247d574ff79b534 +662, 0xc8039f3259210fe2 +663, 0x79b573235c92a9f5 +664, 0x213f642d8450e2f0 +665, 0x5db7706973376566 +666, 0x6182c12e69b373d7 +667, 0x3e5ac47300aec07f +668, 0x4b5b6c57b1574376 +669, 0x6b7fcceefd56b17c +670, 0xf656c3455cb9d4b8 +671, 0x7577e2e13329721f +672, 0xf33c0c53ce956e8d +673, 0x7d0f328ee356174 +674, 0x10ec9a168088686e +675, 0x71ef1776d062dfa +676, 0xaa7b590a488a6bc4 +677, 0x38612b6dd8049a1c +678, 0x939045e36874f731 +679, 0xcb9d1d74c56d5ac9 +680, 0x54f1c1c8fef1d8ff +681, 0x3ee4b85c8c7e939e +682, 0xb9b4608e019f352c +683, 0x79d4701275d12e6a +684, 0x2632a2d9835c7f19 +685, 0x1662cd9fba293692 +686, 0xbcb70265115ee944 +687, 0xdc43fb9761468604 +688, 0xe3eec4e7d3871352 +689, 0x829531753226989d +690, 0x2748cc67f540e074 +691, 0x39c4af25d607837d +692, 0x741a243f4cb5df99 +693, 0xda1353287e18b49a +694, 0xa6735689d751ea74 +695, 0x46326d587340ce0b +696, 0xc18531df4550012b +697, 0x6f7901e05dd4b818 +698, 0xfb966afc4c001d63 +699, 0x6dc10fca67a9cfdb +700, 0xd6527ffadf0feaae +701, 0x3b900172045e25d +702, 0xb7dd594cdded6a46 +703, 0x6602aee7ec1599fc +704, 0x7fbf12f23747546a +705, 0x32e63f662bd2de0d +706, 0xedf47770b67ed641 +707, 0x331bef83481c5c2a +708, 0x8fc4256fdf05158c +709, 0x98eba48dabccf5e0 +710, 0xdbc2f2cdb7b1c154 +711, 0x7777755616517ad3 +712, 0xd473c147d2628ac1 +713, 0x861e15d1d760b5a7 +714, 0xf4d25926405ecb07 +715, 0xb7739c69effff86e +716, 0xe97fbafa6f96830c +717, 0xf13e8a334e8bede1 +718, 0xcd60010cba4ee4f9 +719, 0x1f537ac2b82e6008 +720, 0x1fda8d781a89140a +721, 0x9dc204f3f4a463f0 +722, 0x456dcd18eb56a1ab +723, 0x629957bc87bd16a1 +724, 0x2c8000ddb8c75253 +725, 0xc31dae9ec8449284 +726, 0xdac05c8baa2b691a +727, 0x21ff7be9ffa3e7ac +728, 0x844f4b5ed4ee08d0 +729, 0x651f913fd636c994 +730, 0xca3e71a2110b2d49 +731, 0x7709bc42253ed09d +732, 0xbb164d45b6569d43 +733, 0x90ec2f040c20a112 +734, 0xfa6e77e9166f5be4 +735, 0x6b6d12c1842d587d +736, 0xfcd7ff8466e25e2a +737, 0x6a5a2ed8bd971297 +738, 0x2ec35f6bba5adcbc +739, 0xc83676e16651249a +740, 0x458f6064cefe10ba +741, 0x90d54d527e6cd028 +742, 0xa5613e88db27c388 +743, 0x331e0c7d85aa1abc +744, 0x8cee4977e210358 +745, 0xfcae379aa6cbff8e +746, 0xd1407afc97a57e86 +747, 0x1fab25c864f094ae +748, 0xd914864a63004552 +749, 0x4214d226a20f1384 +750, 0x3f4e0d80c488b715 +751, 0xc5ca2f654024b7c8 +752, 0xc1e27a124e7c821c +753, 0xd890a915ffc7918c +754, 0x22fba040ce51a9f8 +755, 0xbf61cebd8891617a +756, 0x7846609ee228e319 +757, 0x536d1854375509b8 +758, 0xbbfb45fc6e666f50 +759, 0xd85b4c0527f9d7d6 +760, 0x528cc9c7fa2a84c8 +761, 0x27a1baece647f2cb +762, 0xfddf0cb92fe09dc3 +763, 0xeb5008fe965d8d96 +764, 0x4a3307937eb2e5c8 +765, 0xd07d74c240c6c363 +766, 0x16f62290179d1bbf +767, 0xe99c9bcc9cb1ece7 +768, 0xc64f9be03c8a93be +769, 0x32659effaf666c1f +770, 0x4bb228cfb30b6672 +771, 0x98764870842068a5 +772, 0x5b12ef2d2cd8bdcc +773, 0xbc79d1c1b41f28b8 +774, 0x97a517cf3279fc9a +775, 0x34ffd46c1d4d6025 +776, 0x9c302307ee25c8f0 +777, 0x399604eed1f18a8 +778, 0x1c9b813c2043142a +779, 0x2944ea5e55267fe9 +780, 0x5a8a9f5e728ea667 +781, 0x30c8440adb804a0 +782, 0xee0e6b627099a937 +783, 0x3d50757ada3c52da +784, 0x4548916b32c813ab +785, 0x602a186fe5bf109b +786, 0xf0d440a2227ba304 +787, 0x5a10d4e0ca9ea32b +788, 0x6e5eb90da13ba64c +789, 0x4c6af8fd04241ab2 +790, 0xf9eb31d26e093006 +791, 0x5d674878839fe3ea +792, 0x1562b55b2484e47c +793, 0xa87188c099c1cb61 +794, 0xb7736b8aa02a3392 +795, 0x5f4b301125abb20f +796, 0x361d566984637f44 +797, 0x68c4b3feac8bd0c3 +798, 0x7066c634dd2503c1 +799, 0xfecbf7c9441eb6ea +800, 0xdbc26ae0fc81436b +801, 0x9ef3e2b48252e7a4 +802, 0x31a49b4c339b37c7 +803, 0xb01b2a83cf346cf4 +804, 0xc24dc2347f82fbe3 +805, 0x134cad272dcd410f +806, 0x61260742823ba59c +807, 0x53ac4c193a97c730 +808, 0x9207c9833af34b52 +809, 0xa72e7ee77078d1f5 +810, 0x2e6f6e1b05936885 +811, 0x783b99ce5dbf9464 +812, 0xfdfeb6f0d027bb44 +813, 0x40eeb27096f92b0 +814, 0x5ef96ff5d4a4521f +815, 0x5595806ae873718a +816, 0x67d449eecf4ca1c3 +817, 0xde837ab611364f3f +818, 0x7034c24d2b139be9 +819, 0xe21166603e0a9c86 +820, 0x935694435c1f0d51 +821, 0x6cb3bec90c126088 +822, 0x4096ef662b7a9f89 +823, 0xd2d85b8d238d8c15 +824, 0xa4ea533ce3ec59b2 +825, 0x3654729d80a2db29 +826, 0x214c4cc3906d29d4 +827, 0x201c447e7588e373 +828, 0xe8b8f0ae25f683eb +829, 0x6744aaf5754e38af +830, 0xd1ffb10d6f27a061 +831, 0xe536733a7b3a6c30 +832, 0x39f0f66e47cbf2c9 +833, 0x856a9593526fde2 +834, 0x2e2a817a0098ea4b +835, 0xc5e1eeb551a0e3d3 +836, 0x3f21e2f5e2d50b2 +837, 0x906af56c66dd9f8c +838, 0x30f6dbd70329fac8 +839, 0xc443dfddf3c01a60 +840, 0x7ab85d9aa9675470 +841, 0x8c9080bd39717bfc +842, 0x4b1ccdb3c3597f6f +843, 0x74e2542d70ab5d67 +844, 0xbb3d236aad00f74 +845, 0xcf3cadf9a2804774 +846, 0xe851d9750e42bd07 +847, 0xc0ad82029b1c371f +848, 0x7ee119eb552d6c07 +849, 0xd8024049bd1d784a +850, 0xfa67a899760363 +851, 0xaa7c2f438b178197 +852, 0xc473674a47ffe064 +853, 0x539fbe3fc674c270 +854, 0xdb48484748a76f3b +855, 0xc73b2b092060d +856, 0xa1d2a15345016f5d +857, 0x4d0fe8599f9bba47 +858, 0xa0edc275e6f8f1d1 +859, 0x40590a8655bc8d72 +860, 0x35b4223161f05f75 +861, 0xa04c0c0f616752dc +862, 0x7f371ed2ca45432d +863, 0x2ff1a08f75ac6438 +864, 0xe2dc5c3682282f48 +865, 0xe1e4179fa98d9013 +866, 0x8cb083d6843a73d5 +867, 0xb4c2b5921b706854 +868, 0x738e14c0e7352445 +869, 0xcd2b646f91afd8c7 +870, 0xd5779a5b57a264fd +871, 0xc39ff855586c7d07 +872, 0x3e3f0098c631a859 +873, 0x644e02fae032110 +874, 0xa8834613c0a45278 +875, 0x69482f2c08e10657 +876, 0xe4ee475bdb87e69a +877, 0xdc1ef7b25c0d0019 +878, 0x88a3fa2be18d8744 +879, 0x60a02e0b21c5bec7 +880, 0xb6867b88aa19bc1a +881, 0xb599409affcf10eb +882, 0xaeaa1778a5e59daa +883, 0xd7a91a52c16663e3 +884, 0x93cb269affe07b1c +885, 0x841b6ced3a4ba815 +886, 0x84541768e1540a5c +887, 0xe3943c84f83b3020 +888, 0x5de366fbd7b45258 +889, 0xd787cc3bde91a661 +890, 0x814071446edecb57 +891, 0x15d8c602a1141514 +892, 0x72f07bc8002d1d0d +893, 0x4a8bd8dc9a1f0f3e +894, 0x8723796ae0f20d35 +895, 0xda7283c2051f73b2 +896, 0x2df0cc247f90bd3b +897, 0x79a8522b968f990a +898, 0x951ede190c8b9d02 +899, 0xc512f1a5b14b018a +900, 0xf0e3ddc03b9a4259 +901, 0x8cf4a35ad312e15f +902, 0xebef28926b11094b +903, 0x5628ba687325921c +904, 0xc3aa75e57edc49c3 +905, 0xc38382fa98e762ba +906, 0x8d209e896285848e +907, 0x2c7d6adf592b4a3e +908, 0x62de48e36f8338f3 +909, 0x4a752741e00de30e +910, 0xf7855b70f1f6ec2b +911, 0xa505fa4428199e43 +912, 0xe8b6b423b826bbac +913, 0x4bd1206cf8786d05 +914, 0x6dcf040391fe3bf4 +915, 0x913f500f87e1bba3 +916, 0x5acf775aa180a5d5 +917, 0x74dd28d9432ce739 +918, 0x996c2ff2f0dc2495 +919, 0x73dbfe6c56effe4 +920, 0x56fddd25196f5e40 +921, 0xe87810158f5b7 +922, 0x7b8795e996383f1f +923, 0x9ba5ee7c777c4c82 +924, 0x17ce3908d270fe1c +925, 0x3df9e613c1aedfae +926, 0xcdd26871b32fc8e1 +927, 0xd71cb13afc633979 +928, 0x63427c8ea9b1c79e +929, 0xd070f7664d3b405d +930, 0x46f2a9e32d9fb769 +931, 0xb4c3822a45e9fe9b +932, 0x8ba30b97fe6f5ec7 +933, 0x70aa554ee2fc11f9 +934, 0xa80c99dbe0cfcfaf +935, 0x36d9250cb2d68ed +936, 0x2995e4b9e1cd1db4 +937, 0x4b3803ba57fc570f +938, 0xae3959e7d740eaa5 +939, 0xb4cbd6662adbae08 +940, 0xae46576446e8dbc4 +941, 0xc4828e008a9a8a54 +942, 0x145d7db8e6554b2f +943, 0x1b1b8916a730c371 +944, 0xdaf84b2bebe31963 +945, 0x5b59b80ef23a2403 +946, 0x9180c7e89cab6fd3 +947, 0x80e58f5411babf34 +948, 0xa06cf55185b9b005 +949, 0x13b2c798424173ad +950, 0xc510f8e706311d49 +951, 0x1f974b83b6046d3a +952, 0xae6e8e85e822d1c3 +953, 0x66f2c8dc3274a31a +954, 0x7e04dbcbf65bd377 +955, 0xabf41ede01ec20a4 +956, 0x5efa0948f6bbb2ea +957, 0xbc91c99d8592255 +958, 0xf6d6917911d86d75 +959, 0x85ce273d54e9097a +960, 0xbdfd30f2420fff92 +961, 0x8802f02f610b537c +962, 0xd1d70037ed543229 +963, 0x908aaf97f9693a46 +964, 0x1f6cfeaa0834d53a +965, 0xa453fd1648ce04d2 +966, 0x2c38bb85ebc64af9 +967, 0xd2daff551c90c4f8 +968, 0xae5a0d949797d784 +969, 0xf0974c8552ac9593 +970, 0xa10b70499f65c693 +971, 0x39a449ebd594ddff +972, 0x8ea090f2b17b9b49 +973, 0xc592de318090fd83 +974, 0xb63e4fbc467b6912 +975, 0x57a0c1c5ce0e4dcc +976, 0xa7c517cf3d436b35 +977, 0xef6dcb0f3fad038b +978, 0xaf4fb60315b91287 +979, 0x5e0776f67304f331 +980, 0xe927753b8e6f7932 +981, 0xd3df2dd92559e304 +982, 0xdaed52aa6af44413 +983, 0x1b59f4dac1e181f8 +984, 0x4a73c2293877ef39 +985, 0xca45d0d015fe44de +986, 0x4659c8b7853735a8 +987, 0x12de6466bdf8adeb +988, 0xaeea857a09bfec15 +989, 0xcc9cf4b3c0b88a23 +990, 0xa44ae52396a5e1bf +991, 0x5847a724305d137f +992, 0x8f4d4de223956182 +993, 0x58254dfada867a8 +994, 0x900a98222c2f339e +995, 0xdb575260935d51d5 +996, 0x13fb4bfbbc0d7b53 +997, 0x62213850186bb92b +998, 0x2a34823312c00388 +999, 0x6148329042f743b0 diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/data/pcg64dxsm-testset-1.csv b/.local/lib/python3.8/site-packages/numpy/random/tests/data/pcg64dxsm-testset-1.csv new file mode 100644 index 0000000..39cef05 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/data/pcg64dxsm-testset-1.csv @@ -0,0 +1,1001 @@ +seed, 0xdeadbeaf +0, 0xdf1ddcf1e22521fe +1, 0xc71b2f9c706cf151 +2, 0x6922a8cc24ad96b2 +3, 0x82738c549beccc30 +4, 0x5e8415cdb1f17580 +5, 0x64c54ad0c09cb43 +6, 0x361a17a607dce278 +7, 0x4346f6afb7acad68 +8, 0x6e9f14d4f6398d6b +9, 0xf818d4343f8ed822 +10, 0x6327647daf508ed6 +11, 0xe1d1dbe5496a262a +12, 0xfc081e619076b2e0 +13, 0x37126563a956ab1 +14, 0x8bb46e155db16b9 +15, 0x56449f006c9f3fb4 +16, 0x34a9273550941803 +17, 0x5b4df62660f99462 +18, 0xb8665cad532e3018 +19, 0x72fc3e5f7f84216a +20, 0x71d3c47f6fd59939 +21, 0xfd4218afa1de463b +22, 0xc84054c78e0a9a71 +23, 0xae59034726be61a8 +24, 0xa6a5f21de983654d +25, 0x3b633acf572009da +26, 0x6a0884f347ab54c8 +27, 0x7a907ebe9adcab50 +28, 0xbe779be53d7b8d4a +29, 0xf5976e8c69b9dcd1 +30, 0x1d8302f114699e11 +31, 0x7d37e43042c038a0 +32, 0x2cc1d4edc2a40f35 +33, 0x83e3347bb2d581f1 +34, 0x253f8698651a844d +35, 0x4312dea0dd4e32f6 +36, 0x10f106439964ea3a +37, 0x810eb374844868cc +38, 0x366342a54b1978cc +39, 0x9fb39b13aaddfb5e +40, 0xdb91fd0d9482bed7 +41, 0x89f6ea4ca9c68204 +42, 0x146b31ccca461792 +43, 0x203fd9724deb2486 +44, 0x58a84f23748e25cb +45, 0x2f20eb6aeb94e88 +46, 0x14d3581460e473c +47, 0xad5bd0d25f37d047 +48, 0x1cf88fa16de258b2 +49, 0x3bcab6485b7a341 +50, 0xb2433b37f227d90c +51, 0x2cffd7e0a8360cc8 +52, 0x5d2eeff7c9ebc847 +53, 0x6fd7c7ae23f9f64b +54, 0x381650b2d00f175d +55, 0x9d93edcedc873cae +56, 0x56e369a033d4cb49 +57, 0x7547997116a3bac +58, 0x11debaa897fd4665 +59, 0xdf799d2b73bd6fb8 +60, 0x3747d299c66624d +61, 0xac9346701afd0cfa +62, 0xac90e150fa13c7bf +63, 0x85c56ad2248c2871 +64, 0xdea66bf35c45f195 +65, 0x59cf910ea079fb74 +66, 0x2f841bb782274586 +67, 0x9814df4384d92bd9 +68, 0x15bc70824be09925 +69, 0x16d4d0524c0503a3 +70, 0xf04ea249135c0cc7 +71, 0xa707ab509b7e3032 +72, 0x465459efa869e372 +73, 0x64cbf70a783fab67 +74, 0x36b3541a14ca8ed7 +75, 0x9a4dfae8f4c596bf +76, 0x11d9a04224281be3 +77, 0xe09bbe6d5e98ec32 +78, 0xa6c60d908973aa0d +79, 0x7c524c57dd5915c8 +80, 0xa810c170b27f1fdc +81, 0xce5d409819621583 +82, 0xfe2ee3d5332a3525 +83, 0x162fb7c8b32045eb +84, 0x4a3327156b0b2d83 +85, 0x808d0282f971064 +86, 0x2e6f04cf5ed27e60 +87, 0xaf6800699cca67a9 +88, 0xc7590aae7244c3bf +89, 0x7824345f4713f5f9 +90, 0x8f713505f8fd059b +91, 0x3d5b5b9bb6b1e80e +92, 0x8674f45e5dc40d79 +93, 0xcb1e36846aa14773 +94, 0xe0ae45b2b9b778c1 +95, 0xd7254ce931eefcfb +96, 0xef34e15e4f55ac0a +97, 0xf17cc0ba15a99bc4 +98, 0x77bb0f7ffe7b31f1 +99, 0x6ee86438d2e71d38 +100, 0x584890f86829a455 +101, 0x7baf0d8d30ba70fe +102, 0xb1ac8f326b8403ae +103, 0xcc1963435c874ba7 +104, 0x9c483b953d1334ce +105, 0xc0924bcbf3e10941 +106, 0x21bcc581558717b1 +107, 0x2c5ad1623f8d292b +108, 0xa8ea110f6124557e +109, 0x15f24a6c5c4c591 +110, 0x40fe0d9cd7629126 +111, 0xcfe8f2b3b081484d +112, 0x891383f4b4cac284 +113, 0x76f2fcdef7fa845 +114, 0x4edd12133aed0584 +115, 0xd53c06d12308873d +116, 0xf7f22882c17f86bf +117, 0xfbaa4aad72f35e10 +118, 0x627610da2e3c0cc3 +119, 0x582b16a143634d9a +120, 0x9b4a7f69ed38f4a0 +121, 0x2df694974d1e1cbe +122, 0xe5be6eaafed5d4b +123, 0xc48e2a288ad6605e +124, 0xbcb088149ce27c2b +125, 0x3cb6a7fb06ceecbe +126, 0x516735fff3b9e3ac +127, 0x5cbafc551ee5008d +128, 0xee27d1ab855c5fd5 +129, 0xc99fb341f6baf846 +130, 0x7ad8891b92058e6d +131, 0xf50310d03c1ac6c7 +132, 0x947e281d998cbd3e +133, 0x1d4d94a93824fe80 +134, 0x5568b77289e7ee73 +135, 0x7d82d1b2b41e3c8b +136, 0x1af462c7abc787b +137, 0xcfd8dfe80bfae1ef +138, 0xd314caeb723a63ea +139, 0x1c63ddcfc1145429 +140, 0x3801b7cc6cbf2437 +141, 0xc327d5b9fdafddd3 +142, 0xe140278430ca3c78 +143, 0x4d0345a685cb6ef8 +144, 0x47640dc86e261ff9 +145, 0xab817f158523ebf4 +146, 0x37c51e35fbe65a6b +147, 0xab090f475d30a178 +148, 0x4d3ec225bf599fc1 +149, 0xefd517b0041679b1 +150, 0x20ad50bca4da32c5 +151, 0x75e1f7cd07fad86d +152, 0x348cf781ee655f4b +153, 0x9375f0e5ffc2d2ec +154, 0x7689082fd5f7279c +155, 0x633e56f763561e77 +156, 0x9d1752d70861f9fd +157, 0xa3c994b4e70b0b0f +158, 0xabf7276a58701b88 +159, 0xbfa18d1a0540d000 +160, 0xc6a28a2475646d26 +161, 0x7cdf108583f65085 +162, 0x82dcefb9f32104be +163, 0xc6baadd0adc6b446 +164, 0x7a63cff01075b1b4 +165, 0x67ac62e575c89919 +166, 0x96fa4320a0942035 +167, 0xc4658859385b325f +168, 0xde22c17ff47808f6 +169, 0xbb952c4d89e2f2ec +170, 0x638251fbc55bdc37 +171, 0x38918b307a03b3ea +172, 0xccb60f2cedbb570b +173, 0x3c06f4086a28f012 +174, 0x4e8d238388986e33 +175, 0x1760b7793514a143 +176, 0xa3f924efe49ee7d6 +177, 0xaf6be2dbaebc0bdf +178, 0x6782682090dffe09 +179, 0xb63a4d90d848e8ef +180, 0x5f649c7eaf4c54c5 +181, 0xbe57582426a085ba +182, 0xb5dd825aa52fb76d +183, 0x74cb4e6ca4039617 +184, 0x382e578bf0a49588 +185, 0xc043e8ea6e1dcdae +186, 0xf902addd5c04fa7c +187, 0xf3337994612528db +188, 0x4e8fd48d6d15b4e6 +189, 0x7190a509927c07ab +190, 0x864c2dee5b7108ae +191, 0xbb9972ddc196f467 +192, 0x1ea02ab3ca10a448 +193, 0xe50a8ffde35ddef9 +194, 0x7bd2f59a67183541 +195, 0x5a940b30d8fcd27a +196, 0x82b4cea62623d4d3 +197, 0x6fbda76d4afef445 +198, 0x8b1f6880f418328e +199, 0x8b69a025c72c54b7 +200, 0xb71e0f3986a3835f +201, 0xa4a7ddb8b9816825 +202, 0x945dcda28228b1d8 +203, 0xb471abf2f8044d72 +204, 0xf07d4af64742b1ba +205, 0xfca5190bc4dd6a2a +206, 0xd681497262e11bc5 +207, 0xbe95d5f00c577028 +208, 0x56313439fd8bde19 +209, 0x3f3d9ac9b5ee6522 +210, 0x7b8d457dd2b49bbe +211, 0xe76b5747885d214b +212, 0xa8a695b3deb493ea +213, 0x5292446548c95d71 +214, 0xbf5cdf0d436412df +215, 0x7936abaed779d28d +216, 0x659c6e8073b3a06d +217, 0x86c9ff28f5543b71 +218, 0x6faa748445a99146 +219, 0xdcc1e6ab57904fd7 +220, 0x770bd61233addc5f +221, 0x16963e041e46d94f +222, 0x158e6cb2934157ac +223, 0xb65088a8fd246441 +224, 0x2b12ced6ce8a68c3 +225, 0x59a18d02cd6082b3 +226, 0x4ddbc318cb5488ee +227, 0x3d4cf520b3ed20a1 +228, 0x7028b3a92e2b292d +229, 0xf141da264a250e4d +230, 0x9788d53e86041c37 +231, 0x1bb91238a7c97dbf +232, 0x81953d0ddb634309 +233, 0xfa39ccfe14d2d46 +234, 0xf7c7861c9b7e8399 +235, 0x18d27ca50d9dc249 +236, 0x258dfdf38510d0d9 +237, 0x9e72d8af910ea76f +238, 0x4f8ef24b96de50ad +239, 0xb9d9c12297e03dc9 +240, 0x91994e41b4a1929c +241, 0x8defa79b2ccc83b9 +242, 0x948566748706dac5 +243, 0x7b0454946e70e4cf +244, 0x340b7cb298c70ed7 +245, 0x6602005330cebd95 +246, 0xf71cb803aa61f722 +247, 0x4683fb07fc70ae8a +248, 0xc6db9f0c4de3ed88 +249, 0x3e8dfae2a593cef9 +250, 0x615f7c38e3862b33 +251, 0x676c7996550d857 +252, 0xc6d520d54a5c266a +253, 0x202b1e8eef14aa2e +254, 0xa3a84891a27a582 +255, 0x84dbee451658d47f +256, 0x254c7cd97e777e3a +257, 0xf50b6e977f0eba50 +258, 0x2898b1d3062a4798 +259, 0x4096f7cbbb019773 +260, 0x9fb8e75548062c50 +261, 0x4647071e5ca318ec +262, 0x2b4750bdb3b3b01 +263, 0x88ac41cc69a39786 +264, 0x705e25476ef46fa3 +265, 0xc0c1db19884a48a6 +266, 0x1364c0afdbb465e5 +267, 0x58e98534701272a6 +268, 0x746a5ea9701517c0 +269, 0x523a70bc6b300b67 +270, 0x9b1c098eda8564ad +271, 0xfbaeb28d3637067f +272, 0xddd9a13551fdba65 +273, 0x56461a670559e832 +274, 0xab4fd79be85570ad +275, 0xd4b691ecaff8ca55 +276, 0x11a4495939e7f004 +277, 0x40d069d19477eb47 +278, 0xe790783d285cd81e +279, 0xde8218b16d935bc7 +280, 0x2635e8c65cd4182d +281, 0xeae402623e3454 +282, 0x9f99c833184e0279 +283, 0x3d0f79a0d52d84e7 +284, 0xc1f8edb10c625b90 +285, 0x9b4546363d1f0489 +286, 0x98d86d0b1212a282 +287, 0x386b53863161200d +288, 0xbe1165c7fe48a135 +289, 0xb9658b04dbbfdc8c +290, 0xcea14eddfe84d71a +291, 0x55d03298be74abe7 +292, 0x5be3b50d961ffd7e +293, 0xc76b1045dc4b78e1 +294, 0x7830e3ff3f6c3d4c +295, 0xb617adb36ca3729 +296, 0x4a51bdb194f14aa9 +297, 0x246024e54e6b682a +298, 0x33d42fc9c6d33083 +299, 0xadccba149f31e1d +300, 0x5183e66b9002f8b +301, 0x70eb2416404d51b7 +302, 0x26c25eb225535351 +303, 0xbc2d5b0d23076561 +304, 0x5823019ddead1da +305, 0x85cfa109fca69f62 +306, 0x26017933e7e1efd9 +307, 0x3ec7be9a32212753 +308, 0x697e8a0697cd6f60 +309, 0x44735f6cca03920f +310, 0x8cc655eb94ee212e +311, 0x8b8b74eba84929a0 +312, 0x7708ccedd0c98c80 +313, 0x1b6f21f19777cbe1 +314, 0x363e564bd5fadedb +315, 0x5921543a641591fe +316, 0xc390786d68ea8a1b +317, 0x9b293138dc033fca +318, 0x45447ca8dc843345 +319, 0xee6ef6755bc49c5e +320, 0x70a3a1f5163c3be5 +321, 0xf05e25448b6343b0 +322, 0x4739f4f8717b7e69 +323, 0xb006141975bf957 +324, 0x31874a91b707f452 +325, 0x3a07f2c90bae2869 +326, 0xb73dae5499a55c5e +327, 0x489070893bb51575 +328, 0x7129acf423940575 +329, 0x38c41f4b90130972 +330, 0xc5260ca65f5a84a1 +331, 0x6e76194f39563932 +332, 0x62ca1f9ca3de3ca6 +333, 0xb4a97874e640853f +334, 0x38ed0f71e311cc02 +335, 0xde183b81099e8f47 +336, 0x9bb8bf8e6694346 +337, 0xd15497b6bf81e0f2 +338, 0xaaae52536c00111 +339, 0x4e4e60d1435aaafd +340, 0x5a15512e5d6ea721 +341, 0xff0f1ffabfc6664f +342, 0xba3ffcedc5f97fec +343, 0xef87f391c0c6bfb6 +344, 0x4a888c5d31eb0f98 +345, 0x559a3fbfd7946e95 +346, 0xe45b44a0db5a9bad +347, 0x9457898964190af1 +348, 0xd9357dfaab76cd9e +349, 0xa60e907178d965a1 +350, 0x76b2dc3032dc2f4a +351, 0x13549b9c2802120 +352, 0x8656b965a66a1800 +353, 0x16802e6e22456a23 +354, 0x23b62edc60efaa9 +355, 0x6832a366e1e4ea3b +356, 0x46b1b41093ff2b1e +357, 0x55c857128143f219 +358, 0x7fc35ddf5e138200 +359, 0x790abe78be67467e +360, 0xa4446fc08babd466 +361, 0xc23d70327999b855 +362, 0x2e019d1597148196 +363, 0xfefd98e560403ab8 +364, 0xbe5f0a33da330d58 +365, 0x3078a4e9d43ca395 +366, 0x511bfedd6f12f2b3 +367, 0x8bc138e335be987c +368, 0x24640f803465716d +369, 0xf6530b04d0bd618f +370, 0x9b7833e5aa782716 +371, 0x778cd35aea5841b1 +372, 0xecea3c458cefbc60 +373, 0x5107ae83fc527f46 +374, 0x278ad83d44bd2d1a +375, 0x7014a382295aeb16 +376, 0xf326dd762048743f +377, 0x858633d56279e553 +378, 0x76408154085f01bc +379, 0x3e77d3364d02e746 +380, 0x2f26cea26cadd50b +381, 0x6d6846a4ecb84273 +382, 0x4847e96f2df5f76 +383, 0x5a8610f46e13ff61 +384, 0x4e7a7cac403e10dd +385, 0x754bdf2e20c7bc90 +386, 0x8bdd80e6c51bd0be +387, 0x61c655fae2b4bc52 +388, 0x60873ef48e3d2f03 +389, 0x9d7d8d3698a0b4a4 +390, 0xdf48e9c355cd5d4b +391, 0x69ecf03e20be99ac +392, 0xc1a0c5a339bd1815 +393, 0x2e3263a6a3adccb +394, 0x23557459719adbdc +395, 0xd1b709a3b330e5a +396, 0xade5ab00a5d88b9d +397, 0x69a6bd644120cfad +398, 0x40187ecceee92342 +399, 0x1c41964ba1ac78da +400, 0x9ac5c51cbecabe67 +401, 0xbdc075781cf36d55 +402, 0xeaf5a32246ded56 +403, 0xcda0b67e39c0fb71 +404, 0x4839ee456ef7cc95 +405, 0xf17092fdd41d5658 +406, 0x2b5d422e60ae3253 +407, 0x3effe71102008551 +408, 0x20a47108e83934b7 +409, 0xd02da65fe768a88f +410, 0xeb046bd56afa4026 +411, 0x70c0509c08e0fbe0 +412, 0x1d35c38d4f8bac6c +413, 0x9aa8eb6466f392e0 +414, 0x587bd4a430740f30 +415, 0x82978fe4bad4195 +416, 0xdc4ebc4c0feb50ab +417, 0xd3b7164d0240c06f +418, 0x6e2ad6e5a5003a63 +419, 0xa24b430e2ee6b59c +420, 0x2905f49fd5073094 +421, 0x5f209e4de03aa941 +422, 0x57b7da3e0bedb1dc +423, 0x5e054018875b01f5 +424, 0xb2f2da6145658db3 +425, 0xbd9c94a69a8eb651 +426, 0x9c5f9a07cd6ac749 +427, 0x2296c4af4d529c38 +428, 0x522ed800fafdefab +429, 0xe2a447ced0c66791 +430, 0x937f10d45e455fef +431, 0xc882987d9e29a24 +432, 0x4610bfd6a247ee1a +433, 0x562ba3e50870059 +434, 0x59d8d58793602189 +435, 0xfe9a606e3e34abe +436, 0x6825f7932a5e9282 +437, 0xe77f7061bab476ad +438, 0xbf42001da340ace3 +439, 0x9c3e9230f5e47960 +440, 0x2c0f700d96d5ad58 +441, 0x330048b7cd18f1f9 +442, 0xffc08785eca5cca9 +443, 0xb5879046915f07a5 +444, 0xef51fe26f83c988e +445, 0xfa4c2968e7881a9a +446, 0xc0a9744455a4aad +447, 0xbd2ad686d6313928 +448, 0x6b9f0984c127682a +449, 0xc9aaa00a5da59ed8 +450, 0x762a0c4b98980dbf +451, 0x52d1a2393d3ca2d1 +452, 0x1e9308f2861db15c +453, 0xe7b3c74fe4b4a844 +454, 0x485e15704a7fc594 +455, 0x9e7f67ea44c221f6 +456, 0xbab9ad47fde916e0 +457, 0x50e383912b7fc1f4 +458, 0xaad63db8abcef62d +459, 0xc2f0c5699f47f013 +460, 0xee15b36ada826812 +461, 0x2a1b1cf1e1777142 +462, 0x8adb03ede79e937d +463, 0xf14105ef65643bf3 +464, 0x752bbaefc374a3c7 +465, 0xa4980a08a5a21d23 +466, 0x418a1c05194b2db7 +467, 0xdd6ff32efe1c3cd6 +468, 0x272473ed1f0d3aa2 +469, 0x1e7fdebadabe6c06 +470, 0xd1baa90c17b3842f +471, 0xd3d3a778e9c8404a +472, 0x781ae7fda49fa1a0 +473, 0x61c44fdbdacc672d +474, 0x6d447d0a1404f257 +475, 0x9303e8bdfbfb894d +476, 0x3b3482cdec016244 +477, 0xb149bf245d062e7b +478, 0x96f8d54b14cf992d +479, 0x4741549a01f8c3d0 +480, 0x48270811b2992af +481, 0x7b58f175cd25d147 +482, 0x8f19a840b56f4be9 +483, 0x84a77f43c0951a93 +484, 0x34e1a69381f0c374 +485, 0xb158383c9b4040f +486, 0x372f1abc7cf3a9fa +487, 0x5439819a84571763 +488, 0xabf8515e9084e2fa +489, 0xb02312b9387ff99 +490, 0x238a85bb47a68b12 +491, 0x2068cb83857c49bb +492, 0xc6170e743083664c +493, 0x745cf8470bcb8467 +494, 0xe3a759a301670300 +495, 0x292c7686ad3e67da +496, 0x359efedaff192a45 +497, 0x511f2c31a2d8c475 +498, 0x97fd041bf21c20b3 +499, 0x25ef1fe841b7b3f6 +500, 0xbb71739e656f262d +501, 0x2729b0e989b6b7b8 +502, 0xd2142702ec7dbabf +503, 0x7008decd2488ee3f +504, 0x69daa95e303298d7 +505, 0xc35eca4efb8baa5a +506, 0xf3f16d261cec3b6c +507, 0x22371c1d75396bd3 +508, 0x7aefa08eccae857e +509, 0x255b493c5e3c2a2f +510, 0x779474a077d34241 +511, 0x5199c42686bea241 +512, 0x16c83931e293b8d3 +513, 0xa57fe8db8c0302c7 +514, 0xd7ace619e5312eb1 +515, 0x8740f013306d217c +516, 0xb6a1ad5e29f4d453 +517, 0x31abf7c964688597 +518, 0xbc3d791daed71e7 +519, 0x31ee4ca67b7056ed +520, 0x1ab5416bfe290ea3 +521, 0x93db416f6d3b843a +522, 0xed83bbe5b1dd2fed +523, 0xece38271470d9b6d +524, 0x3a620f42663cd8ae +525, 0x50c87e02acafee5d +526, 0xcabeb8bedbc6dab5 +527, 0x2880a6d09970c729 +528, 0x4aba5dd3bfc81bc +529, 0xaba54edf41080cec +530, 0xb86bb916fc85a169 +531, 0x4c41de87bc79d8ca +532, 0xcce2a202622945fe +533, 0x513f086fad94c107 +534, 0x18b3960c11f8cc96 +535, 0x2f0d1cfd1896e236 +536, 0x1702ae3880d79b15 +537, 0x88923749029ae81 +538, 0x84810d4bdec668eb +539, 0xf85b0a123f4fc68d +540, 0x93efd68974b6e4d1 +541, 0x5d16d6d993a071c9 +542, 0x94436858f94ca43b +543, 0xb3dbb9ed0cb180b6 +544, 0x6447030a010b8c99 +545, 0xd7224897c62925d8 +546, 0xb0c13c1d50605d3a +547, 0xdff02c7cb9d45f30 +548, 0xe8103179f983570d +549, 0xbc552037d6d0a24e +550, 0x775e500b01486b0d +551, 0x2050ac632c694dd6 +552, 0x218910387c4d7ae7 +553, 0xf83e8b68ff885d5d +554, 0xe3374ec25fca51a3 +555, 0xfa750ffa3a60f3af +556, 0x29ee40ba6df5592e +557, 0x70e21a68f48260d2 +558, 0x3805ca72cd40886e +559, 0x2f23e73f8eabf062 +560, 0x2296f80cdf6531ae +561, 0x903099ed968db43a +562, 0xf044445cf9f2929f +563, 0xcd47fdc2de1b7a1 +564, 0xaab1cbd4f849da99 +565, 0x5fc990688da01acb +566, 0xa9cee52ea7dab392 +567, 0xecefc3a4349283a8 +568, 0xdd6b572972e3fafc +569, 0xc1f0b1a2ffb155da +570, 0xc30d53fc17bd25c8 +571, 0x8afa89c77834db28 +572, 0x5569a596fb32896c +573, 0x36f207fc8df3e3d4 +574, 0x57c2bd58517d81db +575, 0xb524693e73d0061c +576, 0xb69f6eb233f5c48b +577, 0x4f0fb23cab8dc695 +578, 0x492c1ad0a48df8df +579, 0xf6dcc348ec8dec1f +580, 0xa4d8708d6eb2e262 +581, 0x4c2072c2c9766ff1 +582, 0xa9bf27c4304875f0 +583, 0xfc8fb8066d4f9ae2 +584, 0x188095f6235fec3c +585, 0x1d8227a2938c2864 +586, 0x89ea50c599010378 +587, 0xcac86df0a7c6d56d +588, 0x47a8c5df84c7d78 +589, 0xe607ae24ea228bfa +590, 0x36624a7996efe104 +591, 0x5d72881c1227d810 +592, 0x78694a6750374c8 +593, 0x7b9a217d4ab5ff45 +594, 0xd53e5d6f7504becc +595, 0x197a72d3f4889a0e +596, 0xfdc70c4755a8df36 +597, 0xd0fda83748c77f74 +598, 0x7ddc919ac9d6dcc9 +599, 0x785c810a6a2dc08b +600, 0xba4be83e7e36896c +601, 0x379d6fe80cf2bffe +602, 0x74cae2dabc429206 +603, 0x1efac32d5d34c917 +604, 0x3cb64e2f98d36e70 +605, 0xc0a7c3cdc3c60aa7 +606, 0x699dfadd38790ebe +607, 0x4861e61b3ecfbeac +608, 0x531744826c345baa +609, 0x5ec26427ad450cba +610, 0xf2c1741479abdcae +611, 0xe9328a78b2595458 +612, 0x30cd1bdf087acd7f +613, 0x7491ced4e009adbe +614, 0xdcd942df1e2e7023 +615, 0xfe63f01689fee35 +616, 0x80282dfe5eaedc42 +617, 0x6ecdea86495f8427 +618, 0xe0adfdd5e9ed31c3 +619, 0xf32bd2a7418127e +620, 0x8aabba078db6ee2 +621, 0xa8a8e60499145aca +622, 0xf76b086ac4e8a0f2 +623, 0x6e55b3c452ff27f8 +624, 0xe18fa7cd025a71bf +625, 0xeed7b685fde0fa25 +626, 0xba9b6c95867fa721 +627, 0x4c2603bc69de2df2 +628, 0xaac87eee1b58cd66 +629, 0x3c9af6656e01282c +630, 0x2dfa05ce8ff476b6 +631, 0xeae9143fcf92f23d +632, 0x3f0699f631be3bc8 +633, 0xa0f5f79f2492bd67 +634, 0x59c47722388131ed +635, 0x5f6e9d2941cef1de +636, 0xe9ad915c09788b7b +637, 0x92c6d37e4f9482f5 +638, 0x57d301b7fdadd911 +639, 0x7e952d23d2a8443 +640, 0xbb2fa5e0704b3871 +641, 0xe5642199be36e2d5 +642, 0x5020b60d54358291 +643, 0xa0b6317ec3f60343 +644, 0xb57b08b99540bc5c +645, 0x21f1890adc997a88 +646, 0xfcf824200dd9da2d +647, 0x8146293d83d425d1 +648, 0xdadfbf5fbb99d420 +649, 0x1eb9bbc5e6482b7d +650, 0xd40ff44f1bbd0f1c +651, 0xa9f948ba2d08afa5 +652, 0x638cc07c5301e601 +653, 0x1f984baa606e14e8 +654, 0x44e153671081f398 +655, 0xb17882eeb1d77a5d +656, 0x5fd8dbee995f14c +657, 0xff3533e87f81b7fe +658, 0x2f44124293c49795 +659, 0x3bf6b51e9360248 +660, 0x72d615edf1436371 +661, 0x8fc5cf4a38adab9d +662, 0xfa517e9022078374 +663, 0xf356733f3e26f4d8 +664, 0x20ea099cdc6aad40 +665, 0xe15b977deb37637d +666, 0xcc85601b89dae88d +667, 0x5768c62f8dd4905c +668, 0xa43cc632b4e56ea +669, 0xc4240cf980e82458 +670, 0xb194e8ffb4b3eeb6 +671, 0xee753cf2219c5fa1 +672, 0xfe2500192181d44d +673, 0x2d03d7d6493dd821 +674, 0xff0e787bb98e7f9b +675, 0xa05cf8d3bd810ce7 +676, 0x718d5d6dcbbdcd65 +677, 0x8d0b5343a06931c +678, 0xae3a00a932e7eaf9 +679, 0x7ed3d8f18f983e18 +680, 0x3bb778ee466dc143 +681, 0x711c685c4e9062c0 +682, 0x104c3af5d7ac9834 +683, 0x17bdbb671fb5d5cf +684, 0xabf26caead4d2292 +685, 0xa45f02866467c005 +686, 0xf3769a32dc945d2d +687, 0xe78d0007f6aabb66 +688, 0x34b60be4acbd8d4b +689, 0x58c0b04b69359084 +690, 0x3a8bb354c212b1 +691, 0x6b82a8f3d70058d5 +692, 0x405bdef80a276a4a +693, 0xe20ca40ee9195cad +694, 0xf5dd96ba2446fefd +695, 0xc1e180c55fe55e3c +696, 0xa329caf6daa952b3 +697, 0xb4809dd0c84a6b0a +698, 0xd27f82661070cee7 +699, 0xa7121f15ee2b0d8a +700, 0x4bdaea70d6b34583 +701, 0xe821dc2f310f7a49 +702, 0x4c00a5a68e76f647 +703, 0x331065b064a2d5ea +704, 0xac0c2ce3dc04fa37 +705, 0x56b32b37b8229008 +706, 0xe757cdb51534fcfa +707, 0xd3ff183576b2fad7 +708, 0x179e1f4190f197a7 +709, 0xf874c626a7c9aae5 +710, 0xd58514ffc37c80e4 +711, 0xc65de31d33fa7fd3 +712, 0x6f6637052025769b +713, 0xca1c6bdadb519cc0 +714, 0xd1f3534cde37828a +715, 0xc858c339eee4830a +716, 0x2371eacc215e02f4 +717, 0x84e5022db85bbbe9 +718, 0x5f71c50bba48610e +719, 0xe420192dad9c323f +720, 0x2889342721fca003 +721, 0x83e64f63334f501d +722, 0xac2617172953f2c +723, 0xfa1f78d8433938ff +724, 0x5578382760051462 +725, 0x375d7a2e3b90af16 +726, 0xb93ff44e6c07552d +727, 0xded1d5ad811e818c +728, 0x7cf256b3b29e3a8c +729, 0x78d581b8e7bf95e8 +730, 0x5b69192f2caa6ad3 +731, 0xa9e25855a52de3ce +732, 0x69d8e8fc45cc188d +733, 0x5dd012c139ad347d +734, 0xfcb01c07b77db606 +735, 0x56253e36ab3d1cce +736, 0x1181edbb3ea2192 +737, 0x325bef47ff19a08d +738, 0xd3e231ceb27e5f7 +739, 0x8e819dd2de7956d2 +740, 0x34a9689fe6f84a51 +741, 0x3e4eeb719a9c2927 +742, 0x5c3b3440581d0aaf +743, 0x57caf51897d7c920 +744, 0xec6a458130464b40 +745, 0xe98f044e0da40e9b +746, 0xbe38662020eeb8e7 +747, 0x7b8c407c632724ae +748, 0x16c7cfa97b33a544 +749, 0xd23359e2e978ae5a +750, 0x4fdba458250933dd +751, 0x3c9e0713cfe616ba +752, 0x6f0df87b13163b42 +753, 0xc460902cb852cc97 +754, 0x289df8fefd6b0bce +755, 0x4ac2a2a1c3fb8029 +756, 0x2fc3e24d8b68eef7 +757, 0x34564386a59aab9a +758, 0x31047391ebd67ce4 +759, 0x6c23d070a0564d41 +760, 0xba6387b2b72545f7 +761, 0xcdcf1008058387af +762, 0xc9308fa98db05192 +763, 0xdbdbb5abd01a9d84 +764, 0x937088275c7804ab +765, 0x6f6accfefe34ee81 +766, 0x5c33c74c49cfdb2c +767, 0x5e1a771edfb92bd3 +768, 0x6e89b009069ecae7 +769, 0x34d64e17ec0e8968 +770, 0x841203d0cde0c330 +771, 0x7642cc9d7eb9e9cb +772, 0xca01d2e8c128b97e +773, 0x5b8390617b3304ab +774, 0x52ec4ed10de1eb2d +775, 0xb90f288b9616f237 +776, 0x5bd43cd49617b2e2 +777, 0x1a53e21d25230596 +778, 0x36ccd15207a21cd6 +779, 0xc8263d780618fd3c +780, 0x6eb520598c6ce1cb +781, 0x493c99a3b341564f +782, 0xab999e9c5aa8764f +783, 0xab2fa4ceaba84b +784, 0xbbd2f17e5cb2331b +785, 0xc8b4d377c0cc4e81 +786, 0x31f71a6e165c4b1e +787, 0xd1011e55fb3addaa +788, 0x5f7ec34728dfa59 +789, 0x2aef59e60a84eb0f +790, 0x5dde6f09aec9ad5f +791, 0x968c6cdbc0ef0438 +792, 0x1957133afa15b13a +793, 0xbaf28f27573a64c2 +794, 0xc6f6ddd543ebf862 +795, 0xdd7534315ec9ae1e +796, 0xd2b80cd2758dd3b +797, 0xa38c3da00cc81538 +798, 0x15c95b82d3f9b0f9 +799, 0x6704930287ce2571 +800, 0x9c40cc2f6f4ecb0c +801, 0xc8de91f50b22e94e +802, 0x39272e8fddbfdf0a +803, 0x879e0aa810a117d +804, 0xa312fff4e9e5f3bd +805, 0x10dd747f2835dfec +806, 0xeb8466db7171cdae +807, 0xaa808d87b9ad040a +808, 0xab4d2229a329243a +809, 0x7c622f70d46f789c +810, 0x5d41cef5965b2a8e +811, 0xce97ec4702410d99 +812, 0x5beba2812c91211b +813, 0xf134b46c93a3fec7 +814, 0x76401d5630127226 +815, 0xc55fc9d9eacd4ec1 +816, 0xaec8cefaa12f813f +817, 0x2f845dcfd7b00722 +818, 0x3380ab4c20885921 +819, 0xdb68ad2597691b74 +820, 0x8a7e4951455f563f +821, 0x2372d007ed761c53 +822, 0xcab691907714c4f1 +823, 0x16bc31d6f3abec1a +824, 0x7dff639fbcf1824 +825, 0x6666985fbcff543d +826, 0xb618948e3d8e6d0c +827, 0x77b87837c794e068 +828, 0xcd48288d54fcb5a8 +829, 0x47a773ed6ae30dc3 +830, 0xba85ae44e203c942 +831, 0xa7a7b21791a25b2d +832, 0x4029dd92e63f19e0 +833, 0xc2ad66ab85e7d5aa +834, 0xa0f237c96fdab0db +835, 0xffefb0ab1ca18ed +836, 0x90cb4500785fd7d5 +837, 0xa7dd3120f4876435 +838, 0x53f7872624694300 +839, 0xea111326ff0040d9 +840, 0x5f83cb4cce40c83b +841, 0x918e04936c3b504d +842, 0x87a8db4c0e15e87c +843, 0x7cff39da6a0dedd0 +844, 0x36f7de2037f85381 +845, 0xd1d8d94022a1e9a7 +846, 0x2c9930127dc33ec9 +847, 0x6cb4719dcd0101c6 +848, 0xc01868cde76935f7 +849, 0x6b86f2ec1ab50143 +850, 0x68af607d8d94ae61 +851, 0xe216c5b95feedf34 +852, 0x4b866bd91efe2e4b +853, 0x4bff79df08f92c99 +854, 0x6ff664ea806acfd1 +855, 0x7fce0b3f9ece39bc +856, 0x29bc90b59cb3db97 +857, 0x833c4b419198607d +858, 0xf3573e36ca4d4768 +859, 0x50d71c0a3c2a3fa8 +860, 0xd754591aea2017e7 +861, 0x3f9126f1ee1ebf3 +862, 0xe775d7f4b1e43de8 +863, 0xe93d51628c263060 +864, 0x83e77f6fb32d6d82 +865, 0x43dd7eef823408e4 +866, 0x1c843c2c90180662 +867, 0xe924dafb9a16066b +868, 0x6af3ee96e7b7fbd9 +869, 0x94d5c4f37befcd1f +870, 0x40ffb04bedef4236 +871, 0x71c17bbc20e553e +872, 0x101f7a0a6208729f +873, 0x5ca34570cf923548 +874, 0x8e3139db2e96e814 +875, 0x3ab96d96263d048d +876, 0x97f3c0bbc6755c3c +877, 0x31fc72daedaef3dc +878, 0x71f8d7855d10789b +879, 0xce6dc97b4662333b +880, 0xfddc2aabd342bc61 +881, 0xefbd4007ff8c7d2e +882, 0xf72cd6c689ef8758 +883, 0x932c8b0c0e755137 +884, 0x94cc4dedd58ff69 +885, 0xde4dfd6890535979 +886, 0xdb00dcd2dcb4a50a +887, 0xb0466240b4548107 +888, 0x9cb9264c7b90d1a3 +889, 0x357e378e9be5766b +890, 0x6e0316ef03367bbf +891, 0x201ea18839544ca +892, 0x803ff3406be5f338 +893, 0xf9d5e82fd4144bb2 +894, 0x1b6b88ca701e9f47 +895, 0xd1fe5ab8e1f89cc0 +896, 0x14171fe176c4bece +897, 0x887948bdef78beaa +898, 0x80449ddc3eb9b977 +899, 0x5f4e1f900fb4bcf3 +900, 0xbe30f8701909f8e2 +901, 0xd1f2a2fb5503306d +902, 0x6b1c77238dc23803 +903, 0x102156a6c9860f66 +904, 0x4cd446e099edf4c1 +905, 0xc79ac6cbc911f33b +906, 0x3ee096ffe3384f1c +907, 0xb58f83b18a306dc7 +908, 0x9f76582141de56b2 +909, 0x9ddfa85e02c13866 +910, 0x4d9a19d4ce90a543 +911, 0xbf81ab39fd17d376 +912, 0x5327e5054c6a74f1 +913, 0xd5062dd31db1a9b7 +914, 0x645853735527edc +915, 0x485393967f91af08 +916, 0xeff9667dcf77ca68 +917, 0xd012313f5fbec464 +918, 0xbeae35bdfae55144 +919, 0x302c41ebac8444a0 +920, 0x9ccdb6c2fe58fba8 +921, 0x567753af68ed23f8 +922, 0xff90f790e43efec3 +923, 0x970cc756fb799696 +924, 0xe59239d1c44915 +925, 0x4d2d189fb3941f05 +926, 0x96f23085db165a9c +927, 0xa1202dec7a37b1a5 +928, 0xc0c1ee74bcd7dc1a +929, 0x9edcf2048b30333a +930, 0xd848588ba7e865fb +931, 0x8d9f0897317cab40 +932, 0x67b96f15e25924fb +933, 0xefc8d8536619ee42 +934, 0xf3f621d22bdde0c2 +935, 0x68610a0de862ae32 +936, 0xa22ca5142de24cbd +937, 0x8815452f4e6b4801 +938, 0x4e9c1b607b2750e5 +939, 0x19b3c09ba6fc9b25 +940, 0x9b2543c8836780ac +941, 0xe702b8f950e56431 +942, 0xb357cc329cac3917 +943, 0x387bf86a17a31e08 +944, 0x9940b983d331b163 +945, 0xf5d89d7fe9095e18 +946, 0x4362682329e5c4d1 +947, 0xd2132573f6ae7b42 +948, 0xc0a5849e23a61606 +949, 0xdadbddf47265bc02 +950, 0x1b96f00339a705f7 +951, 0x94e6642329288913 +952, 0x825ab3f10e6d330b +953, 0x1a1c31ac9d883ea0 +954, 0xb49076b7155c6f47 +955, 0x920cf3085dfe3ccb +956, 0x9743407c9f28e825 +957, 0x6ce8a28622402719 +958, 0xce2fe67e06baf8a6 +959, 0x3a16b34784ecf5e6 +960, 0x140467cc1d162a0c +961, 0x32d4772692ab625 +962, 0xa4f4b28562f43336 +963, 0x885b4335457bd84a +964, 0x499d3ed26c87ad8a +965, 0xc7328bcedb9a545e +966, 0xc6dd76a6cbf5d2b2 +967, 0xba9c22be404ee1aa +968, 0x70e6aee45f23521d +969, 0x61e03a798593c177 +970, 0x171671f809c68213 +971, 0x28d54872fc1d914c +972, 0x43c2fcd9bd098b53 +973, 0x172ad4c4a98b9d37 +974, 0x330860c9460f2516 +975, 0x49547f472df984f4 +976, 0x873b2436d3f0e114 +977, 0x6f99accf4ea050b6 +978, 0x5968ac874ed51613 +979, 0x4939d70d29a3c611 +980, 0x11f381ed28738d3d +981, 0xa97430d36ab3a869 +982, 0xe6fa880801129e22 +983, 0xf84decbd8f48c913 +984, 0x4425c0ed1e9a82a5 +985, 0x7a1f9485e9929d5a +986, 0xc7c51f155dfce1c6 +987, 0x9619a39501d74f2b +988, 0x7c7035955dbf4c1b +989, 0xc61ee569cf57c2c9 +990, 0x3eaf7c5b0df734e1 +991, 0xe71cb4064d1ede05 +992, 0x356e3cec80e418b2 +993, 0xca04306243a15be6 +994, 0x941cf3881fa18896 +995, 0x30dbb0e819d644e0 +996, 0xaae22c0bef02859a +997, 0x7bd30917bbaa8a94 +998, 0x2672547bc8d7d329 +999, 0x4955c92aaa231578 diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/data/pcg64dxsm-testset-2.csv b/.local/lib/python3.8/site-packages/numpy/random/tests/data/pcg64dxsm-testset-2.csv new file mode 100644 index 0000000..878c5ea --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/data/pcg64dxsm-testset-2.csv @@ -0,0 +1,1001 @@ +seed, 0x0 +0, 0xd97e4a147f788a70 +1, 0x8dfa7bce56e3a253 +2, 0x13556ed9f53d3c10 +3, 0x55dbf1c241341e98 +4, 0xa2cd98f722eb0e0a +5, 0x83dfc407203ade8 +6, 0xeaa083df518f030d +7, 0x44968c87e432852b +8, 0x573107b9cb8d9ecc +9, 0x9eedd1da50b9daca +10, 0xb33a6735ca451e3c +11, 0x72830d2b39677262 +12, 0x9da8c512fd0207e8 +13, 0x1fc5c91954a2672b +14, 0xd33479437116e08 +15, 0x9ccdd9390cee46f3 +16, 0x1fd39bb01acd9e76 +17, 0xedc1869a42ff7fe5 +18, 0xbd68ca0b42a6e7e9 +19, 0x620b67df09621b1f +20, 0xfa11d51bd6950221 +21, 0xc8c45b36e7d28d08 +22, 0xe9c91272fbaad777 +23, 0x2dc87a143f220e90 +24, 0x6376a7c82361f49d +25, 0x552c5e434232fe75 +26, 0x468f7f872ac195bc +27, 0x32bed6858125cf89 +28, 0xe4f06111494d09d3 +29, 0xa5c166ffea248b80 +30, 0x4e26605b97064a3f +31, 0xceafd9f6fc5569d +32, 0xb772f2f9eed9e106 +33, 0x672c65e6a93534e2 +34, 0xcdc5e1a28d1bd6a0 +35, 0x1ed9c96daeebd3e3 +36, 0x4d189dcfc0c93c3f +37, 0x50df5a95c62f4b43 +38, 0xcccf4949fa65bbb8 +39, 0x19b8073d53cdc984 +40, 0x6fb40bba35483703 +41, 0xb02de4aef86b515a +42, 0x4d90c63655350310 +43, 0xea44e4089825b16c +44, 0x8d676958b1f9da2b +45, 0x6d313940917ae195 +46, 0x1b1d35a4c1dd19f4 +47, 0x117720f8397337ef +48, 0xcc073cf3ac11eeaa +49, 0x8331ec58a9ff8acb +50, 0xf3dc2a308b6b866f +51, 0x7eba1202663382b6 +52, 0x8269839debeb4e5a +53, 0x87fd3dc0f9181a8e +54, 0xabe62ddd3c925f03 +55, 0x7f56f146944fe8d4 +56, 0xc535972150852068 +57, 0x60b252d453bd3a68 +58, 0x4251f0134634490a +59, 0x338950da210dfeb2 +60, 0xcadfe932971c9471 +61, 0xfb7049457fab470e +62, 0x9bfb8145a4459dff +63, 0x4a89dda3898f9d8a +64, 0x88cc560151483929 +65, 0x277dc820f4b6796e +66, 0x3524bd07ea0afb88 +67, 0x92eb6ffb2bf14311 +68, 0xf6559be0783f3fe9 +69, 0xf0844f9af54af00d +70, 0xdd5e0b59adcef8a +71, 0x4ff7e4f2ab18554c +72, 0x3fa22c8a02634587 +73, 0x1db8e1a9442fe300 +74, 0x40cf15953ad3d3e7 +75, 0x92af15fe1a9f6f0a +76, 0xab4a0e466fb0cfd +77, 0x944f1555a06cca82 +78, 0x10cf48412f1f6066 +79, 0x7f51f9a455f9e8e1 +80, 0x47ee93530f024c7e +81, 0x36cf2f0413e0f6f2 +82, 0xa315e23731969407 +83, 0xd8e2796327cf5f87 +84, 0xa86072696a555c34 +85, 0xee3f0b8804feaab7 +86, 0x41e80dc858f8360b +87, 0x31ec2e9b78f5b29 +88, 0xd397fb9b8561344c +89, 0x28081e724e649b74 +90, 0x5c135fc3fc672348 +91, 0x9a276ca70ce9caa0 +92, 0x9216da059229050a +93, 0xcf7d375ed68007b0 +94, 0xa68ad1963724a770 +95, 0xd4350de8d3b6787c +96, 0xee7d2c2cc275b6d2 +97, 0x71645ec738749735 +98, 0x45abdf8c68d33dbb +99, 0xe71cadb692c705ea +100, 0x60af6f061fd90622 +101, 0x1eabe2072632c99d +102, 0x947dda995a402cb6 +103, 0xbb19f49a3454f3b +104, 0xe6e43e907407758c +105, 0xfe2b67016bd6873a +106, 0x7fdb4dd8ab30a722 +107, 0x39d3265b0ff1a45b +108, 0xed24c0e4fce8d0c2 +109, 0xf6e074f86faf669d +110, 0x9142040df8dc2a79 +111, 0x9682ab16bc939a9c +112, 0x6a4e80c378d971c8 +113, 0x31309c2c7fc2d3d6 +114, 0xb7237ec682993339 +115, 0x6a30c06bb83dccd9 +116, 0x21c8e9b6d8e7c382 +117, 0x258a24ae6f086a19 +118, 0xb76edb5be7df5c35 +119, 0x3c11d7d5c16e7175 +120, 0xbdfc34c31eff66e1 +121, 0x8af66e44be8bf3a2 +122, 0x3053292e193dec28 +123, 0xd0cc44545b454995 +124, 0x408ac01a9289d56 +125, 0x4e02d34318ec2e85 +126, 0x9413ff3777c6eb6b +127, 0xa3a301f8e37eb3df +128, 0x14e6306bd8d8f9f9 +129, 0xd3ea06ce16c4a653 +130, 0x170abe5429122982 +131, 0x7f9e6fddc6cacb85 +132, 0xa41b93e10a10a4c8 +133, 0x239216f9d5b6d0b5 +134, 0x985fcb6cb4190d98 +135, 0xb45e3e7c68f480c6 +136, 0xc1b2fc2e0446211c +137, 0x4596adb28858c498 +138, 0x2dd706f3458ddc75 +139, 0x29c988c86f75464 +140, 0xac33a65aa679a60 +141, 0xa28fef762d39d938 +142, 0x541e6fa48647f53 +143, 0x27838d56b2649735 +144, 0x8e143d318a796212 +145, 0xaea6097745f586b8 +146, 0x636143330f8ee2e6 +147, 0xc2d05fd8b945b172 +148, 0x6e355f9eb4353055 +149, 0xeb64ca42e8bf282e +150, 0xe8202dfd9da0fe5 +151, 0x7305689c9d790cba +152, 0xf122f8b1bef32970 +153, 0x9562887e38c32ba5 +154, 0xf9cd9be121b738d +155, 0x6238e0c398307913 +156, 0x5f2e79bb07c30f47 +157, 0x8ce8e45c465006e +158, 0x39281fe1e99e2441 +159, 0xafb10c2ca2874fea +160, 0x6e52f91633f83cf +161, 0x8ff12c1ac73c4494 +162, 0xe48608a09365af59 +163, 0xefd9bbc7e76e6a33 +164, 0xbe16a39d5c38ec92 +165, 0x6a6ffbcaf5a2330f +166, 0xdd5d6ac7d998d43d +167, 0x207bf978226d4f11 +168, 0xf8eec56bd2a0f62e +169, 0xa5bccf05dce0d975 +170, 0x93cf3ec1afe457a6 +171, 0x38651466d201f736 +172, 0x3ad21473985c9184 +173, 0xc6407a3bd38c92a6 +174, 0xb1ec42c7afa90a25 +175, 0xbdeca984df8b7dd3 +176, 0xb6926b1d00aa6c55 +177, 0x86141d0022352d49 +178, 0x169316256135ee09 +179, 0xffb1c7767af02a5c +180, 0x502af38ad19f5c91 +181, 0xfbf6cbc080086658 +182, 0x33cf9b219edae501 +183, 0x46e69bebd77b8862 +184, 0xf11e0cc91125d041 +185, 0xb4cd1649f85e078f +186, 0xb49be408db4e952 +187, 0xb0b8db46140cce3c +188, 0xba647f2174012be7 +189, 0x4f0a09e406970ac9 +190, 0xf868c7aec9890a5c +191, 0xde4c8fa7498ea090 +192, 0x872ceb197978c1d4 +193, 0x1eb5cd9c3269b258 +194, 0x3ea189f91724f014 +195, 0x41379656f7746f2c +196, 0x7bd18493aca60e51 +197, 0x5380c23b0cbbf15e +198, 0x920b72835f88246b +199, 0x24d7f734a4548b8e +200, 0x9944edb57e5aa145 +201, 0x4628e136ebb8afe1 +202, 0xb4ee6a776356e2a7 +203, 0x481cbe9744ccf7d7 +204, 0x7e8d67e8b0b995d9 +205, 0xeeacde100af7b47e +206, 0x103da08f2487dab7 +207, 0x6b9890a91d831459 +208, 0xd0c5beae37b572c7 +209, 0xfdccc371ee73fcc +210, 0x65438f0a367a2003 +211, 0x5d23b2c818a7e943 +212, 0x9a8ed45ac04b58b3 +213, 0xdaf3c3f1695dce10 +214, 0x5960eec706fa2bc0 +215, 0x98ca652facb80d40 +216, 0x72970ae5e2194143 +217, 0x18c6374d878c5c94 +218, 0x20fa51f997381900 +219, 0x3af253dba26d6e1d +220, 0x1b23d65db15c7f78 +221, 0x9f53ae976259b0e3 +222, 0x9a6addb28dc92d49 +223, 0x1e085c4accd0a7d7 +224, 0xe9d3f4cc9bad6ce5 +225, 0xe018fad78b5b1059 +226, 0x5ef7682232b4b95 +227, 0xb2242aa649f5de80 +228, 0x8f3e6d8dd99b9e4e +229, 0xb9be6cc22949d62a +230, 0xecbdc7beaa5ff1fe +231, 0xd388db43a855bdf0 +232, 0xd71ee3238852568d +233, 0x85ab3056304c04b5 +234, 0x2ed7ae7ad3cfc3cb +235, 0x781d1b03d40b6c48 +236, 0x7d3c740886657e6d +237, 0x982cfa6828daa6b0 +238, 0x278579599c529464 +239, 0x773adecfae9f0e08 +240, 0x63a243ea4b85c5d7 +241, 0x59940074fc3709e1 +242, 0xc914a2eed58a6363 +243, 0x2602b04274dd724c +244, 0xdf636eb7636c2c42 +245, 0x891a334d0d26c547 +246, 0xde8cd586d499e22d +247, 0x3ea1aa4d9b7035b6 +248, 0xd085cff6f9501523 +249, 0xe82a872f374959e +250, 0x55cb495bbd42cc53 +251, 0x5f42b3226e56ca97 +252, 0xea463f6f203493a3 +253, 0xeef3718e57731737 +254, 0x1bd4f9d62b7f9f3c +255, 0x19284f5e74817511 +256, 0xaf6e842c7450ca87 +257, 0x1d27d2b08a6b3600 +258, 0xfb4b912b396a52e3 +259, 0x30804d4c5c710121 +260, 0x4907e82564e36338 +261, 0x6441cf3b2900ddb7 +262, 0xd76de6f51988dc66 +263, 0x4f298ef96fd5e6d2 +264, 0x65432960c009f83d +265, 0x65ebed07e1d2e3df +266, 0xf83ee8078febca20 +267, 0x7bb18e9d74fc5b29 +268, 0x597b5fbc2261d91 +269, 0xea4f8ed0732b15b2 +270, 0xba2267f74f458268 +271, 0x3f304acabd746bbb +272, 0x7bd187af85659a82 +273, 0x88e20dbdb7a08ea3 +274, 0x2a2dc948c772fcb4 +275, 0x87784fec2993c867 +276, 0x89163933cd362d4e +277, 0xfd7b24f04302f957 +278, 0x9bdd544405dfb153 +279, 0xddee0fac58ffc611 +280, 0xa8e8993417e71ec1 +281, 0x55e0ab46ff7757af +282, 0x53e7645f08d3d7df +283, 0xbf78e563bc656ba2 +284, 0x1d162253b45ee2de +285, 0x15e2bfefedf29eb4 +286, 0x4e2a4584aa394702 +287, 0xa89fb12b01525897 +288, 0x825bd98f0544e4df +289, 0xfc6c50da6750700 +290, 0xc24aaabde7d28423 +291, 0x79d6f4660fcb19e5 +292, 0xee7d4fb40c8d659f +293, 0x70bc281b462e811d +294, 0x23ed4dc9636519a7 +295, 0xcb7c3f5a5711b935 +296, 0xe73090e0508c5d9d +297, 0xb25a331f375952a6 +298, 0xa64c86e0c04740f6 +299, 0xb8f3ffc8d56ac124 +300, 0x2479266fc5ee6b15 +301, 0x8d5792d27f5ffbcb +302, 0xb064298be946cd52 +303, 0xf0934a98912ffe26 +304, 0xbe805682c6634d98 +305, 0xe0e6e2c010012b4f +306, 0x58c47d475f75976 +307, 0x358c9a6e646b2b4a +308, 0x7e7c4ffca5b17ba7 +309, 0x43585c8c9a24a04c +310, 0x5154ddbcd68d5c2c +311, 0x4a2b062d3742a5e +312, 0xca5691191da2b946 +313, 0x696a542109457466 +314, 0x9eb5d658a5022ba5 +315, 0x8158cf6b599ab8dc +316, 0x1b95391eaa4af4a6 +317, 0x9953e79bd0fc3107 +318, 0x8639690086748123 +319, 0x2d35781c287c6842 +320, 0x393ef0001cd7bc8f +321, 0xe3a61be8c5f2c22a +322, 0x5e4ff21b847cc29b +323, 0x4c9c9389a370eb84 +324, 0xd43a25a8fc3635fa +325, 0xf6790e4a85385508 +326, 0x37edf0c81cb95e1d +327, 0x52db00d6e6e79af8 +328, 0x3b202bceeb7f096 +329, 0x2a164a1c776136bb +330, 0x73e03ee3fd80fd1b +331, 0xd2c58c0746b8d858 +332, 0x2ed2cb0038153d22 +333, 0x98996d0fc8ceeacc +334, 0xa4ed0589936b37f +335, 0x5f61cf41a6d2c172 +336, 0xa6d4afb538c110d7 +337, 0xe85834541baadf1a +338, 0x4c8967107fd49212 +339, 0x49bafb762ab1a8c1 +340, 0x45d540e2a834bf17 +341, 0x1c0ec8b4ed671dac +342, 0x3d503ce2c83fe883 +343, 0x437bfffd95f42022 +344, 0xc82d1e3d5c2bc8d2 +345, 0x7a0a9cbfcb0d3f24 +346, 0xc0a4f00251b7a3be +347, 0xb5be24e74bb6a1c6 +348, 0xa3104b94b57545b1 +349, 0x86de7d0c4b97b361 +350, 0x879c1483f26538a6 +351, 0xd74c87557f6accfb +352, 0x2f9be40dbf0fe8a1 +353, 0x445a93398f608d89 +354, 0x7b3cb8a7211d7fdc +355, 0xe86cc51290d031e7 +356, 0x33ef3594052ad79f +357, 0xc61911d241dbb590 +358, 0x37cccb0c0e3de461 +359, 0xb75259124080b48b +360, 0xd81e8961beb4abe5 +361, 0xf4542deb84a754e +362, 0x6ea036d00385f02e +363, 0xa7b60b0ac3b88681 +364, 0x108a6c36ca30baf5 +365, 0x4a2adc5bbfe2bf07 +366, 0x4079501f892a5342 +367, 0x55e113963c5448f0 +368, 0x8019ff4903b37242 +369, 0x109c6dcdb7ec6618 +370, 0x1239ac50944da450 +371, 0xe1399c7f94c651c1 +372, 0x5a6bbbae388d365a +373, 0x4d72be57b8810929 +374, 0x3f067df24384e1fb +375, 0x4f8b9e0f7f6c7be +376, 0x202492c342a3b08 +377, 0x250753192af93a3 +378, 0xfba1159d9de2cb8e +379, 0xba964497ab05505c +380, 0x1329ec5d8a709dca +381, 0x32927cacb6cd22bb +382, 0x6b4d7db904187d56 +383, 0xe76adccf8e841e02 +384, 0x8c4bf4b6a788202 +385, 0x3013a3b409831651 +386, 0x7427d125c475412f +387, 0x84dcc4bb2bf43202 +388, 0x117526f1101372a5 +389, 0xfe95d64b8984bd72 +390, 0x524e129934cc55c1 +391, 0xc3db4b0418c36d30 +392, 0xe1cb2047e9c19f7a +393, 0xea43d6c8d8982795 +394, 0xe80ac8a37df89ed +395, 0xfecc2104329ed306 +396, 0xa5c38aac9c1d51ea +397, 0x3abe5d1c01e4fe17 +398, 0x717a805d97fcc7ac +399, 0x94441f8207a1fb78 +400, 0x22d7869c5f002607 +401, 0x349e899f28c3a1b9 +402, 0x5639950cdea92b75 +403, 0x7e08450497c375b +404, 0x94bf898b475d211d +405, 0x75c761a402375104 +406, 0x1930920ec9d2a1e7 +407, 0xb774ba1bc6f6e4e2 +408, 0xf715602412e5d900 +409, 0x87bb995f4a13f0ba +410, 0xa3c787868dfa9c8d +411, 0xa17fd42a5a4f0987 +412, 0x4a9f7d435242b86 +413, 0x240364aff88f8aef +414, 0xe7cd4cf4bf39f144 +415, 0xd030f313ca4c2692 +416, 0xc46696f4e03ec1e9 +417, 0x22c60f1ec21060b3 +418, 0x16c88058fd68986f +419, 0x69ca448e8e6bde3f +420, 0x3466c2cdec218abd +421, 0x837ac4d05e6b117d +422, 0x911210e154690191 +423, 0x9ece851d6fa358b7 +424, 0x42f79cb0c45e7897 +425, 0xbf7583babd7c499b +426, 0x2059fe8031c6e0b9 +427, 0xabbec8fc00f7e51d +428, 0x88809d86a3a256e1 +429, 0xd36056df829fdcb5 +430, 0x515632b6cb914c64 +431, 0xba76d06c2558874 +432, 0x632c54ca4214d253 +433, 0xadec487adf2cb215 +434, 0x521e663e1940513d +435, 0xb1b638b548806694 +436, 0xbe2d5bfbe57d2c72 +437, 0x8b89e7719db02f7 +438, 0x90ba5281c1d56e63 +439, 0x899e1b92fceea102 +440, 0xf90d918e15182fa6 +441, 0x94a489ce96c948c4 +442, 0xad34db453517fcd4 +443, 0xc5264eb2de15930f +444, 0x101b4e6603a21cee +445, 0xef9b6258d6e85fff +446, 0x6075c7d6c048bd7a +447, 0x6f03232c64e438aa +448, 0x18c983d7105ee469 +449, 0x3ffc23f5c1375879 +450, 0xbc1b4a00afb1f9f +451, 0x5afa6b2bb8c6b46e +452, 0xe7fce4af2f2c152a +453, 0x5b00ab5c4b3982c7 +454, 0x2d4b0c9c0eb4bd0c +455, 0x61d926270642f1f2 +456, 0x7219c485c23a2377 +457, 0x7e471c752fecd895 +458, 0x23c4d30a4d17ba1f +459, 0x65cb277fe565ca22 +460, 0xcbb56ed9c701363b +461, 0xfd04ab3a6eba8282 +462, 0x19c9e5c8bab38500 +463, 0xea4c15227676b65b +464, 0x20f3412606c8da6f +465, 0xb06782d3bf61a239 +466, 0xf96e02d5276a9a31 +467, 0x835d256b42aa52a6 +468, 0x25b09151747f39c1 +469, 0x64507386e1103eda +470, 0x51cbc05716ef88e4 +471, 0x998cd9b7989e81cc +472, 0x9d7115416bec28d1 +473, 0xc992ca39de97906b +474, 0xd571e6f7ca598214 +475, 0xafc7fb6ccd9abbf8 +476, 0x88ef456febff7bf4 +477, 0xdbe87ccc55b157d2 +478, 0xaab95e405f8a4f6d +479, 0xad586a385e74af4f +480, 0x23cd15225c8485aa +481, 0x370940bf47900ac7 +482, 0xefd6afda1a4b0ead +483, 0x9cb1a4c90993dd7a +484, 0xff7893e8b2f70b11 +485, 0xb09e1807c0638e8e +486, 0xb10915dcb4978f74 +487, 0x88212ab0051a85eb +488, 0x7af41b76e1ec793f +489, 0x2e5c486406d3fefd +490, 0xebe54eff67f513cc +491, 0xab6c90d0876a79b8 +492, 0x224df82f93fe9089 +493, 0xc51c1ce053dc9cd2 +494, 0x5ef35a4d8a633ee7 +495, 0x4aca033459c2585f +496, 0xd066932c6eefb23d +497, 0x5309768aab9a7591 +498, 0xa2a3e33823df37f9 +499, 0xcec77ff6a359ee9 +500, 0x784dc62d999d3483 +501, 0x84e789fb8acc985d +502, 0xd590237e86aa60f +503, 0x737e2ffe1c8ad600 +504, 0xc019c3a39a99eab8 +505, 0x6a39e9836964c516 +506, 0xe0fe43129535d9da +507, 0xdfc5f603d639d4de +508, 0x7b9a7d048a9c03b6 +509, 0xbb5aa520faa27fdd +510, 0x2a09b4200f398fa2 +511, 0x38cc88107904064e +512, 0xa9a90d0b2d92bb25 +513, 0x9419762f87e987e3 +514, 0x1a52c525153dedcd +515, 0xc26d9973dd65ae99 +516, 0x8e89bd9d0dc6e6a1 +517, 0x2f30868dc01bfb53 +518, 0x20f09d99b46501c4 +519, 0x78b468a563b8f1e9 +520, 0xcccf34b0b6c380c7 +521, 0xf554e7dc815297e6 +522, 0x332a585cfb4a50ef +523, 0xa9fb64a2b6da41d7 +524, 0xdcd2a5a337391ce0 +525, 0x8a9bd3e324c6463d +526, 0x9f4487d725503bdd +527, 0xf72282d82f1d0ff +528, 0x308f4160abb72d42 +529, 0x648de1db3a601b08 +530, 0x36cab5192e7ebd39 +531, 0x7975fbe4ab6a1c66 +532, 0xd515b4d72243864e +533, 0x43a568f8b915e895 +534, 0x15fa9f2057bdb91d +535, 0x7a43858ef7a222dc +536, 0x17b4a9175ac074fe +537, 0xa932c833b8d0f8f8 +538, 0x1d2db93a9a587678 +539, 0x98abd1d146124d27 +540, 0xf0ab0431671740aa +541, 0xa9d182467540ad33 +542, 0x41c8a6cfc331b7fc +543, 0xa52c6bd0fcd1d228 +544, 0x2773c29a34dc6fa3 +545, 0x3098230746fc1f37 +546, 0xd63311bb4f23fabe +547, 0x6712bf530cd2faec +548, 0x342e8f342e42c4dd +549, 0xfbd83331851cdcad +550, 0xe903be1361bbc34d +551, 0xd94372e5077e3ef9 +552, 0x95aaa234f194bd8 +553, 0x20c0c8fb11e27538 +554, 0xfaf47dc90462b30b +555, 0x8ddc6d144147682a +556, 0xf626833fd926af55 +557, 0x5df93c34290d1793 +558, 0xb06a903e6e9fca5e +559, 0x10c792dc851d77ca +560, 0xd9b1b817b18e56cb +561, 0x3a81730c408eb408 +562, 0x65052c04a8d4b63c +563, 0x3328546598e33742 +564, 0xeca44a13f62d156d +565, 0x69f83d1d86b20170 +566, 0x937764200412027d +567, 0xc57eb1b58df0f191 +568, 0xa1c7d67dce81bc41 +569, 0x8e709c59a6a579ce +570, 0x776a2f5155d46c70 +571, 0xd92906fbbc373aa5 +572, 0xe97ad478a2a98bf6 +573, 0xc296c8819ac815f +574, 0x613ede67ba70e93e +575, 0xe145222498f99cde +576, 0xafcdfa7a3c1cf9bf +577, 0x1c89252176db670d +578, 0xad245eda5c0865ff +579, 0x249463d3053eb917 +580, 0xc9be16d337517c0b +581, 0xefcc82bf67b8f731 +582, 0x1e01577d029e0d00 +583, 0xad9c24b2a4f3d418 +584, 0xed2cceb510db4d0f +585, 0xbddadcdb92400c70 +586, 0x67d6b0476ef82186 +587, 0xbc7662ff7bf19f73 +588, 0x9d94452a729e6e92 +589, 0x6b278d8594f55428 +590, 0x6c4b31cceb1b2109 +591, 0xccc6c3a726701e9 +592, 0x6bc28ece07df8925 +593, 0xc0422b7bf150ccc4 +594, 0xab7158f044e73479 +595, 0xdf3347546d9ed83f +596, 0x3b3235a02c70dff4 +597, 0x2551c49c14ea8d77 +598, 0xee2f7f5bb3cc228e +599, 0x39b87bfe8c882d39 +600, 0x7dd420fad380b51c +601, 0xffe64976af093f96 +602, 0x4a4f48dc6e7eaa5f +603, 0x85f2514d32fdc8cc +604, 0x1ab1215fd7f94801 +605, 0x4cd1200fc795b774 +606, 0xcf8af463a38942ee +607, 0x319caa7ce3022721 +608, 0x8cd9798a76d1aea4 +609, 0x2bd3933ac7afd34e +610, 0x85d4c323403cf811 +611, 0xd7b956d3064efa30 +612, 0x67a078dbf1f13068 +613, 0x665fa6c83e87c290 +614, 0x9333ac2416d2469b +615, 0xdfb1fd21a0094977 +616, 0xa1962a6e2c25f8ff +617, 0x1f3b10a7ed5287cf +618, 0x70641efb3d362713 +619, 0xe527a2cf85d00918 +620, 0x9741e45d3f9890a3 +621, 0x6cb74b5d4d36db4b +622, 0xf24734d622bd2209 +623, 0xadd6d94f78e9d378 +624, 0xc3bbdb59225cca7f +625, 0x5ad36614275b30cd +626, 0x495568dd74eea434 +627, 0xf35de47e0ffe1f2d +628, 0xefa209dca719ab18 +629, 0x844ddcaeb5b99ae8 +630, 0x37449670a1dc7b19 +631, 0x5a4612c166f845c1 +632, 0xe70f7782f2087947 +633, 0x98d484deac365721 +634, 0x705302198cf52457 +635, 0x7135ae0f5b77df41 +636, 0x342ac6e44a9b6fc3 +637, 0x2713fd2a59af5826 +638, 0x6e1a3f90f84efa75 +639, 0x9fb3b4dd446ca040 +640, 0x530044ae91e6bd49 +641, 0xe984c4183974dc3e +642, 0x40c1fa961997d066 +643, 0xb7868250d8c21559 +644, 0x8bc929fa085fd1de +645, 0x7bdb63288dc8733e +646, 0xac4faad24326a468 +647, 0x1c6e799833aea0b1 +648, 0xcc8a749e94f20f36 +649, 0x4e7abfd0443547c5 +650, 0xb661c73bb8caa358 +651, 0x4a800f5728ff2351 +652, 0x8c15e15189b9f7ed +653, 0xab367846b811362c +654, 0x4ba7508f0851ca2a +655, 0xe9af891acbafc356 +656, 0xbdebe183989601f8 +657, 0x4c665ea496afc061 +658, 0x3ca1d14a5f2ed7c +659, 0xfbdff10a1027dd21 +660, 0xdfd28f77c8cff968 +661, 0xc4fbaadf8a3e9c77 +662, 0xdac7e448b218c589 +663, 0xb26390b5befd19e2 +664, 0xd2ef14916c66dba9 +665, 0xfab600284b0ff86b +666, 0xf04a1c229b58dabb +667, 0xc21c45637e452476 +668, 0xd1435966f75e0791 +669, 0xc1f28522eda4a2d0 +670, 0x52332ae8f1222185 +671, 0x81c6c0790c0bf47e +672, 0xfebd215e7d8ffb86 +673, 0x68c5dce55dbe962b +674, 0x231d09cb0d2531d1 +675, 0x3218fba199dbbc6b +676, 0x8f23c535f8ea0bf6 +677, 0x6c228963e1df8bd9 +678, 0x9843c7722ed153e3 +679, 0xd032d99e419bddec +680, 0xe2dca88aa7814cab +681, 0x4d53fb8c6a59cdc2 +682, 0x8fb3abc46157b68b +683, 0xa3e733087e09b8e +684, 0x6bdc1aee029d6b96 +685, 0x4089667a8906d65b +686, 0x8f3026a52d39dd03 +687, 0x6d2e0ccb567bae84 +688, 0x74bad450199e464 +689, 0xf114fb68a8f300d5 +690, 0xc7a5cc7b374c7d10 +691, 0xf0e93da639b279d1 +692, 0xb9943841ad493166 +693, 0x77a69290455a3664 +694, 0x41530da2ebea054b +695, 0xe8f9fab03ea24abf +696, 0xaa931f0c9f55a57a +697, 0xb4d68a75d56f97ae +698, 0x3d58ff898b6ba297 +699, 0x49d81e08faf5a3f5 +700, 0xfc5207b9f3697f3b +701, 0xa25911abb3cf19b7 +702, 0x6b8908eb67c3a41 +703, 0xd63ef402e2e3fa33 +704, 0x728e75d3f33b14c5 +705, 0x248cb1b8bc6f379a +706, 0x3aa3d6d2b8c72996 +707, 0x49cc50bd2d3d2860 +708, 0xb4e1387647c72075 +709, 0x435a1630a4a81ed3 +710, 0xa5ea13005d2460cf +711, 0xc7a613df37d159ec +712, 0x95721ccc218b857e +713, 0xd4b70d8c86b124d3 +714, 0x2b82bcc4b612d494 +715, 0xaf13062885276050 +716, 0xcbd8fcf571a33d9c +717, 0x3f7f67ca1125fc15 +718, 0xddf4bb45aac81b4c +719, 0x23606da62de9c040 +720, 0xa3a172375666b636 +721, 0x292f87387a6c6c3c +722, 0xd1d10d00c5496fe1 +723, 0x86b0411ce8a25550 +724, 0x38e0487872e33976 +725, 0x363e49f88ddfd42c +726, 0x45bdf1e9f6b66b0a +727, 0x8a6fff3de394f9b5 +728, 0x8502158bb03f6209 +729, 0x22e24d16dba42907 +730, 0x3fe3ba427cc2b779 +731, 0x77144793f66b3d7e +732, 0xcf8912ccb29b8af9 +733, 0xdc856caff2abd670 +734, 0xe6d3ae0b0d9d4c8b +735, 0xb8f5d40e454c539f +736, 0x79ca953114fbc6b7 +737, 0x478d6f4bbfa38837 +738, 0x9babae1a3ffdc340 +739, 0x40edd56802bae613 +740, 0x97a56c2dcccf0641 +741, 0xafc250257f027f8e +742, 0x8da41ef1edf69125 +743, 0x6574b0280ff9d309 +744, 0x197c776151b8f820 +745, 0x6b03e077c9dac3b6 +746, 0x24a40ebbc5c341c5 +747, 0x50e585169a6a1c4b +748, 0x37783a5a6a3e4e02 +749, 0xb3de81ee6fbad647 +750, 0xf4f292f57ca4591e +751, 0x6214e9e7d44d30a +752, 0x5920190c56d21c12 +753, 0x9ac163419b5e0c9b +754, 0xfc2328761ae8ed93 +755, 0xc68f945b545508c6 +756, 0x687c49a17ce0a5e2 +757, 0x276d8f53d30d4ab4 +758, 0x8201804970343ce1 +759, 0x1b5d323cc2e7fb7e +760, 0x6f351ef04fd904b +761, 0x6c793a7d455d5198 +762, 0x46f5d108430ae91f +763, 0xac16a15b2a0cf77f +764, 0xa0d479d9e4122b9d +765, 0x3afd94604307f19 +766, 0x2573ed6d39d38dbf +767, 0xa58e14ba60b4294b +768, 0xe69c1aed5840d156 +769, 0x4cf6fda7f04855c2 +770, 0x2fb65a56ef5f22da +771, 0xf95819434d5dc220 +772, 0x29c65133623dafba +773, 0x8e997bd018467523 +774, 0xfd08ba9d498461a7 +775, 0xdd52243bc78a5592 +776, 0x39c30108f6db88b3 +777, 0x38af8e1894f259b9 +778, 0x97eedf3b4ae5f6de +779, 0x757825add80c5ece +780, 0xf0fdd90ac14edb14 +781, 0xbbb19d4cc8cac6d4 +782, 0x9a82234edfae05e3 +783, 0x704401c61d1edf1c +784, 0x8b0eb481fb3a1fb2 +785, 0xef6f36e7cc06c002 +786, 0x7a208b17e04b8cd7 +787, 0xf20e33d498838fe9 +788, 0xc2bdb22117058326 +789, 0x6ec31939eb4ca543 +790, 0x6f1654838f507a21 +791, 0xc65ab81a955d2b93 +792, 0x40b1420fdd9531b8 +793, 0xe31f221cab9f4f40 +794, 0x798cdd414c1deb7a +795, 0x9c84e9c7d41cd983 +796, 0x63d6b1ae3b60b7fa +797, 0xb42bfdd1a2f78ffa +798, 0x37e431eaccaaa8e9 +799, 0x7508142a0f73eac9 +800, 0x91662a023df5893a +801, 0x59782070e2fe3031 +802, 0xb2acd589a8ce7961 +803, 0xa224743fa877b292 +804, 0xaa5362aa27e6ed9e +805, 0xa394a4e520c0c1c7 +806, 0xe49b16d2018ffb6f +807, 0xb8074b9f2f1e762b +808, 0xcf5f86143d5c23a7 +809, 0xfd838785db987087 +810, 0x31b1889df389aff8 +811, 0x30aaca876a4383b +812, 0x1731bb71c4c38d4f +813, 0x9a83a65395e05458 +814, 0x99cd0c8d67c8f4fc +815, 0xfbd9fdc849b761a5 +816, 0x82c04834fc466889 +817, 0xdeef9d6e715e8c97 +818, 0x549c281c16da6078 +819, 0x2d70661254ad599d +820, 0x57995793a72acac +821, 0xf1727005116183ba +822, 0xa22bb38945285de3 +823, 0x4f2d687fe45131ff +824, 0x5666c87ddbbc981f +825, 0xbcb4b2d4e7a517d0 +826, 0x5e794dd2e20b785d +827, 0x449ad020149e093c +828, 0x7704ee0412d106f5 +829, 0x83cbdf257b072ac1 +830, 0xae5c4fc9f638b0da +831, 0x7b9e5a64e372ed47 +832, 0x7eddbbb22c2cdf57 +833, 0x3f19ebfa155b08e +834, 0x91d991154dfd7177 +835, 0x611ae74b952d387f +836, 0x3fdf7a335bda36ee +837, 0xdf182433fc7a7c05 +838, 0x62c78598d1f8db0a +839, 0xc3750c69d2c5c1f0 +840, 0xf1318024709efdee +841, 0xaa3fd360d224dc29 +842, 0x62af53b2f307c19 +843, 0xdf527683c58120c2 +844, 0x3281deecc496f93d +845, 0x4f704ad31527ef08 +846, 0x127a14a5e07cfdfc +847, 0x90d0b1f549255c92 +848, 0xbc3406b212c5e1fc +849, 0x4e89f39379dba91d +850, 0x1290ef43c4998e6e +851, 0xecfeb1a1cb1c6e1b +852, 0x2067e90403003bf1 +853, 0x38ae04be30bdbeba +854, 0x8a3537f298baedda +855, 0xd07f3b825cdb2936 +856, 0xea020b5aebae8b45 +857, 0xfcd614ab031132b0 +858, 0x5fb682a4ff2268f5 +859, 0xd1c4662ce65596f4 +860, 0x7026b8270dd0b8dc +861, 0x8101ec4b4beae45a +862, 0xa0e9dc87940610a6 +863, 0x83ec33679d83165b +864, 0x981847ca82e86d41 +865, 0xda84c188a304a0b7 +866, 0x3c37529c5a5bbbb8 +867, 0x34a8491ce3e19a5a +868, 0xd36ad716a2fa6cb8 +869, 0xfd1d1d6a5189a15c +870, 0x9716eb47851e8d8d +871, 0x7dfb13ea3b15c5aa +872, 0xbdf6e707f45113a5 +873, 0xb8118261b04bd097 +874, 0x6191f9895881bec6 +875, 0x7aac257ae11acf9b +876, 0x35a491e1537ff120 +877, 0xe078943432efa71c +878, 0xb3338485dd3dc2b9 +879, 0x456060975d2bb3b5 +880, 0xaddc4c451bdfc44c +881, 0x18bfa7beacf96430 +882, 0x8802ebcaf0f67498 +883, 0xad922a5a825bd780 +884, 0x9fb4587d748f4efa +885, 0xdb2a445136cd5e7 +886, 0xb98b3676ea8e96ac +887, 0xb02d8d244d784878 +888, 0xa1a8442b18860abb +889, 0x6a3029ba1361e5d1 +890, 0xf426d5fac161eb1 +891, 0xfa5ac2b87acecb23 +892, 0xaa659896e50535df +893, 0xf40dd7a3d3c5c8ed +894, 0x3f8367abecb705bc +895, 0x2d60e7525873358f +896, 0xc4a9d3948a0c3937 +897, 0x5ecc04fef6003909 +898, 0x7a865004918cba2 +899, 0x47ae110a678ec10b +900, 0xa0f02f629d91aa67 +901, 0x4848b99e7fac9347 +902, 0xaa858346d63b80ac +903, 0xeb5bf42ee161eeef +904, 0x4d35d723d3c6ba37 +905, 0xdf22ca6ca93b64a7 +906, 0x9d198520f97b25b1 +907, 0x3068415350778efe +908, 0xf3709f2e8793c2fe +909, 0xd1517bac8dd9f16f +910, 0xfb99bccaa15861dc +911, 0xa9ad607d796a2521 +912, 0x55d3793d36bd22e4 +913, 0xf99270d891ff7401 +914, 0x401750a5c4aa8238 +915, 0xd84b3003e6f28309 +916, 0x8a23798b5fa7c98b +917, 0xadd58bbc8f43e399 +918, 0xbd8c741ada62c6a8 +919, 0xbdc6937bc55b49fa +920, 0x4aefa82201b8502 +921, 0x17adf29a717b303 +922, 0xa6ed2197be168f6c +923, 0x1ba47543f4359a95 +924, 0xe34299949ac01ae9 +925, 0x711c76cffc9b62f3 +926, 0xbac259895508a4b7 +927, 0x3c8b3b3626b0d900 +928, 0x1a8d23fbe2ae71bf +929, 0xca984fa3b5a5c3a1 +930, 0xb1986ab7521a9c93 +931, 0xd6b5b2c8d47a75b5 +932, 0xc7f1c4a88afb4957 +933, 0xdeb58033a3acd6cc +934, 0xabe49ddfe1167e67 +935, 0x8d559c10205c06e3 +936, 0xea07a1a7de67a651 +937, 0xcbef60db15b6fef8 +938, 0xbfca142cff280e7 +939, 0x362693eba0732221 +940, 0x7463237e134db103 +941, 0x45574ddb5035e17a +942, 0xfc65e0cb9b94a1aa +943, 0x3154c55f1d86b36d +944, 0x2d93a96dd6ab2d8b +945, 0xbe3bc1d1f2542a25 +946, 0xdd4b541f7385bdaa +947, 0x3b56b919d914e3f8 +948, 0x82fd51468a21895f +949, 0x8988cf120731b916 +950, 0xa06a61db5fb93e32 +951, 0x6ed66c1b36f68623 +952, 0x875ae844d2f01c59 +953, 0x17ccd7ac912e5925 +954, 0x12fe2a66b8e40cb1 +955, 0xf843e5e3923ad791 +956, 0xa17560f2fd4ef48 +957, 0x27a2968191a8ee07 +958, 0xa9aab4d22ff44a3c +959, 0x63cd0dcc3bb083ae +960, 0x7a30b48c6160bf85 +961, 0x956160fb572503b3 +962, 0xc47f6b7546640257 +963, 0xaf4b625f7f49153 +964, 0x2f5c86a790e0c7e8 +965, 0xb52e0610ae07f0b8 +966, 0x38a589292c3d849e +967, 0xc3e9ef655d30b4ef +968, 0xb5695f765cda998a +969, 0xde5d5e692a028e91 +970, 0x839476721555f72e +971, 0x48b20679b17d9ebf +972, 0xe3d4c6b2c26fb0df +973, 0xce5a9834f0b4e71f +974, 0x533abb253d5d420e +975, 0x9eac5ad9aed34627 +976, 0xc0f2a01ab3c90dbb +977, 0x6528eda93f6a066c +978, 0xc16a1b625e467ade +979, 0x1a4a320fb5e8b098 +980, 0x8819cccd8b4ab32f +981, 0x42daa88531fd0bfd +982, 0xcf732226409be17c +983, 0xfddcdb25ccbf378c +984, 0x9b15b603bf589fc1 +985, 0x2436066b95d366fe +986, 0x8d42eff2e9cbda90 +987, 0x694b2fc8a4e8303c +988, 0x8e207f98aaea3ccd +989, 0x4730d7a620f822d9 +990, 0x468dc9ca30fe2fd4 +991, 0x74b36d8a1c0f031b +992, 0x3c1aac1c488c1a94 +993, 0x19d0101042444585 +994, 0x8ec50c56d0c8adf4 +995, 0x721ec629e4d66394 +996, 0x3ca5ad93abeac4a4 +997, 0xaaebc76e71592623 +998, 0x969cc319e3ed6058 +999, 0xc0a277e3b2bfc3de diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/data/philox-testset-1.csv b/.local/lib/python3.8/site-packages/numpy/random/tests/data/philox-testset-1.csv new file mode 100644 index 0000000..e448cbf --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/data/philox-testset-1.csv @@ -0,0 +1,1001 @@ +seed, 0xdeadbeaf +0, 0xedc95200e2bd66a5 +1, 0x581d4e43b7682352 +2, 0x4be7278f5e373eab +3, 0xee47f17991a9e7ea +4, 0x38a7d2ae422f2e2c +5, 0xe2a6730a3b4a8a15 +6, 0x1588b7a841486442 +7, 0x13ad777246700504 +8, 0x14d157e0f5e18204 +9, 0xd87c22a7ee8c13f1 +10, 0x30cc389ce3542ba1 +11, 0xb8a53348955bb2e9 +12, 0xc08802e3c454f74f +13, 0xb444f627671a5780 +14, 0x4b6dd42b29cbf567 +15, 0x6109c7dc0bc5f7d5 +16, 0x85c954715d6b5b1e +17, 0x646178d3d9a3a5d5 +18, 0xebbde42b1cd83465 +19, 0x3d015102f6bc9c1a +20, 0x720fe2ec3798d5fd +21, 0x93120961289ceb2e +22, 0xc9207e960a56fae2 +23, 0xa7f042f31d991b98 +24, 0x5fac117415fae74b +25, 0xd0a970ba8dddc287 +26, 0x84b4e7e51b43106 +27, 0x6ad02bf525ea265f +28, 0xcdc7e5992b36ef8f +29, 0x44d4985209261d60 +30, 0x628c02d50f4b902e +31, 0xc7b1914922d1e76d +32, 0xfde99ff895cba51d +33, 0x175a0be050fa985f +34, 0x47297d3699e03228 +35, 0xccf1e9aeaa3339cd +36, 0x9fdd18ebeeaf15b1 +37, 0x7c94c9ab68747011 +38, 0x612d8ef22c1fa80f +39, 0x13f52b860de89ab5 +40, 0x81f264b8c139c43b +41, 0x8d017ba4ef1e85ba +42, 0x6d0556f46219951e +43, 0x8ee7b85663cf67b6 +44, 0x2432fc707645fe67 +45, 0xaf814046051e5941 +46, 0x4d432a83739ac76f +47, 0x59e5060d0983ccdd +48, 0xdd20e828b83d9b53 +49, 0x1b891800d7385f4c +50, 0x10e86a026c52ff5e +51, 0xb932f11723f7b90c +52, 0xb2413d0a1f3582d0 +53, 0xe7cd4edda65fc6b5 +54, 0x6d3808848d56593b +55, 0x192a727c3c7f47d9 +56, 0x9659d8aea5db8c16 +57, 0x4242c79fe2c77c16 +58, 0x605f90c913827cea +59, 0x53e153c8bfc2138a +60, 0xed2158fbdef5910e +61, 0xae9e6e29d4cb5060 +62, 0x7dd51afaad3b11ce +63, 0x2b9ba533d01a5453 +64, 0x7e0e9cf2b6c72c8 +65, 0x1cc8b3c7747ed147 +66, 0x9b102651e2e11b48 +67, 0x30b0b53cbaac33ea +68, 0x70c28aec39b99b85 +69, 0x5f1417ff536fdb75 +70, 0x3a1d91abd53acf58 +71, 0xba116a1772168259 +72, 0xf5369bc9bd284151 +73, 0x67bf11373bf183ca +74, 0xef0b2d44dbd33dc7 +75, 0xbfd567ee1a2953ed +76, 0x7d373f2579b5e5c6 +77, 0x756eeae7bcdd99be +78, 0x75f16eb9faa56f3b +79, 0x96d55ded2b54b9a5 +80, 0x94495191db692c24 +81, 0x32358bdd56bab38c +82, 0x3f6b64078576579 +83, 0x7177e7948bc064c9 +84, 0x2cbf23f09ba9bc91 +85, 0x9b97cc31c26645f5 +86, 0x5af2d239ff9028b1 +87, 0x316fa920e0332abe +88, 0x46535b7d1cae10a0 +89, 0x21f0a6869298022c +90, 0xf395c623b12deb14 +91, 0x8573995180675aa7 +92, 0xc3076509f4dc42d5 +93, 0x15e11e49760c6066 +94, 0xe8a6d311e67a021d +95, 0x7482f389c883339b +96, 0xda6f881573cba403 +97, 0xb110ffb847e42f07 +98, 0x2c3393140605ccf9 +99, 0xba1c8ba37d8bdc33 +100, 0x59adf43db7a86fe0 +101, 0xb4fcbf6aa585ca85 +102, 0xd794a93c18033fa6 +103, 0x6e839c01985f9d4 +104, 0x64065bf28222b2c7 +105, 0x6a6359b293fa0640 +106, 0x5ff610969e383e44 +107, 0xa8172c263f05c7f7 +108, 0x62a0172e8bd75d07 +109, 0x7be66e3c453b65ac +110, 0x6a3b8d5a14014292 +111, 0xa2583e6087450020 +112, 0xd5d3ecc480c627d2 +113, 0xa24e83f1eec8a27c +114, 0xa23febd2a99ee75a +115, 0x9a5fbf91c7310366 +116, 0x5b63156932e039b +117, 0x942af3c569908505 +118, 0x89a850f71ab6a912 +119, 0xfeadc803ac132fe9 +120, 0x67bf60e758250f3 +121, 0x533c25103466a697 +122, 0xb7deede3482f9769 +123, 0x325e043b53bba915 +124, 0x9e8d9e7fde132006 +125, 0x6bacc6860bbc436e +126, 0xb3ea0534c42b1c53 +127, 0xb2389334db583172 +128, 0xa74b1bfbf5242ee4 +129, 0x53a487e2dc51d15c +130, 0xe5a3b538d2c7a82e +131, 0x7b6c70bb0c4cadaf +132, 0xae20791b2081df1 +133, 0xc685c12e3c61d32c +134, 0x60110e6b0286e882 +135, 0x49682119c774045c +136, 0x53dc11a3bbd072e +137, 0xbdc87c6e732d9c2d +138, 0xcc4620861ebac8fd +139, 0x7e9c3558759350cc +140, 0x157408dee34891ba +141, 0x9bcad1855b80651b +142, 0xd81b29141d636908 +143, 0x1ed041a9f319c69d +144, 0x805b2f541208b490 +145, 0x484ef3bba2eb7c66 +146, 0xb6b5e37d50a99691 +147, 0xabc26a7d9e97e85f +148, 0xcba2a3cce0417c2f +149, 0xa030dfffd701993c +150, 0x2bf2dc50582ebf33 +151, 0xd9df13dd3eb9993e +152, 0x31ca28b757232ae5 +153, 0x614562a0ccf37263 +154, 0x44d635b01725afbb +155, 0x5ae230bc9ca9cd +156, 0xb23a124eb98705c6 +157, 0x6395675444981b11 +158, 0xd97314c34119f9ca +159, 0x9de61048327dd980 +160, 0x16bac6bded819707 +161, 0xcea3700e3e84b8c7 +162, 0xaa96955e2ee9c408 +163, 0x95361dcc93b5bc99 +164, 0x306921aed3713287 +165, 0x4df87f3130cd302a +166, 0x37c451daeb6a4af5 +167, 0x8dbbe35f911d5cc1 +168, 0x518157ce61cb10f9 +169, 0x669f577aebc7b35b +170, 0x4b0a5824a8786040 +171, 0x519bc3528de379f5 +172, 0x6128012516b54e02 +173, 0x98e4f165e5e6a6dd +174, 0x6404d03618a9b882 +175, 0x15b6aeb3d9cd8dc5 +176, 0x87ed2c1bae83c35b +177, 0x8377fc0252d41278 +178, 0x843f89d257a9ba02 +179, 0xcdda696ea95d0180 +180, 0xcfc4b23a50a89def +181, 0xf37fd270d5e29902 +182, 0xafe14418f76b7efa +183, 0xf984b81577076842 +184, 0xe8c60649ccb5458d +185, 0x3b7be8e50f8ff27b +186, 0xaa7506f25cef1464 +187, 0x5e513da59f106688 +188, 0x3c585e1f21a90d91 +189, 0x1df0e2075af292a +190, 0x29fdd36d4f72795f +191, 0xb162fe6c24cb4741 +192, 0x45073a8c02bd12c4 +193, 0xcbaaa395c2106f34 +194, 0x5db3c4c6011bc21c +195, 0x1b02aac4f752e377 +196, 0xa2dfb583eb7bec5 +197, 0xfe1d728805d34bb1 +198, 0xf647fb78bb4601ec +199, 0xd17be06f0d1f51ef +200, 0x39ec97c26e3d18a0 +201, 0xb7117c6037e142c8 +202, 0xe3a6ce6e6c71a028 +203, 0xe70a265e5db90bb2 +204, 0x24da4480530def1e +205, 0xfd82b28ce11d9a90 +206, 0x5bf61ead55074a1d +207, 0xbe9899c61dec480d +208, 0xae7d66d21e51ec9e +209, 0x384ee62c26a08419 +210, 0x6648dccb7c2f4abf +211, 0xc72aa0c2c708bdc9 +212, 0x205c5946b2b5ba71 +213, 0xd4d8d0b01890a812 +214, 0x56f185493625378d +215, 0x92f8072c81d39bd0 +216, 0xa60b3ceecb3e4979 +217, 0xfcf41d88b63b5896 +218, 0xf5a49aa845c14003 +219, 0xffcc7e99eee1e705 +220, 0xdd98312a7a43b32d +221, 0xa6339bd7730b004 +222, 0xdac7874ba7e30386 +223, 0xadf6f0b0d321c8 +224, 0x126a173ae4ffa39f +225, 0x5c854b137385c1e7 +226, 0x8173d471b1e69c00 +227, 0x23fa34de43581e27 +228, 0x343b373aef4507b1 +229, 0xa482d262b4ea919c +230, 0xf7fbef1b6f7fbba +231, 0xd8ce559487976613 +232, 0xbf3c8dd1e6ebc654 +233, 0xda41ed375451e988 +234, 0xf54906371fd4b9b3 +235, 0x5b6bb41231a04230 +236, 0x866d816482b29c17 +237, 0x11315b96941f27dc +238, 0xff95c79205c47d50 +239, 0x19c4fff96fbdac98 +240, 0xbfb1ae6e4131d0f4 +241, 0x9d20923f3cdb82c9 +242, 0x282175507c865dff +243, 0xdfd5e58a40fe29be +244, 0xedbd906ff40c8e4f +245, 0x11b04fc82614ccb3 +246, 0xeceb8afda76ae49f +247, 0xa4856913847c2cdf +248, 0x6f1425f15a627f2a +249, 0xdf144ffedf60349e +250, 0x392d7ecfd77cc65f +251, 0x72b8e2531049b2c6 +252, 0x5a7eb2bdb0ec9529 +253, 0xdcfd4306443e78c1 +254, 0x89ad67ed86cd7583 +255, 0x276b06c0779a6c8f +256, 0xb2dbb723196a0ac3 +257, 0x66c86a3b65906016 +258, 0x938348768a730b47 +259, 0x5f5282de938d1a96 +260, 0xa4d4588c4b473b1f +261, 0x8daed5962be4796f +262, 0x9dde8d796985a56e +263, 0x46be06dbd9ed9543 +264, 0xdf98286ceb9c5955 +265, 0xa1da1f52d7a7ca2b +266, 0x5a7f1449f24bbd62 +267, 0x3aedc4e324e525fd +268, 0xced62464cd0154e1 +269, 0x148fc035e7d88ce3 +270, 0x82f8878948f40d4c +271, 0x4c04d9cdd6135c17 +272, 0xdf046948d86b3b93 +273, 0x2f0dec84f403fe40 +274, 0xa61954fb71e63c0d +275, 0x616d8496f00382e8 +276, 0x162c622472746e27 +277, 0x43bcfe48731d2ceb +278, 0xff22432f9ff16d85 +279, 0xc033ed32bb0ad5a4 +280, 0x5d3717cc91c0ce09 +281, 0x7a39a4852d251075 +282, 0x61cd73d71d6e6a6 +283, 0xe37e2ea4783ab1a5 +284, 0x60e1882162579ea8 +285, 0x9258ec33f1a88e00 +286, 0x24b32acf029f0407 +287, 0x1410fc9aea6d3fac +288, 0x6054cf2a3c71d8f7 +289, 0x82f7605157a66183 +290, 0x3b34c1c0dff9eac5 +291, 0xfebe01b6d5c61819 +292, 0x7372187c68b777f2 +293, 0xc6923812cda479f0 +294, 0x386613be41b45156 +295, 0x92cfebe8cc4014b +296, 0x8e13c4595849828b +297, 0x90e47390d412291f +298, 0x6b21a1d93d285138 +299, 0xbf5b1f5922f04b12 +300, 0x21e65d1643b3cb69 +301, 0xf7683b131948ac3c +302, 0xe5d99fc926196ed2 +303, 0x7b138debbec90116 +304, 0x8a2650a75c2c2a5c +305, 0x20689a768f9b347b +306, 0xdfa2900cfb72dc6e +307, 0x98959c3855611cc2 +308, 0x5fdb71b89596cc7c +309, 0x1c14ac5c49568c7b +310, 0x958c4293016091fe +311, 0x7484522eb0087243 +312, 0xc4018dfb34fc190f +313, 0xca638567e9888860 +314, 0x102cd4805f0c0e89 +315, 0xcc3bc438e04548f8 +316, 0xb808944bb56ea5be +317, 0xffd4778dbf945c57 +318, 0xfe42617784c0233b +319, 0x3eccbfeae9b42d3c +320, 0xd9f1b585fd0bfa60 +321, 0x5c063d1b2705d5dd +322, 0x8e8bec3519941b64 +323, 0x9e94c36cbec2a42 +324, 0x1cd19f5b64ffd3ad +325, 0x9632e3aebfc68e66 +326, 0x98960c2d9da4ae45 +327, 0xb76994b1f2bbfc1f +328, 0xca184a737d3971cc +329, 0x964d31b07183adfb +330, 0xe9e0ff351cd276d4 +331, 0xb5747c860b05bbe4 +332, 0x5549ddc3bd3862e2 +333, 0x495496677b27873b +334, 0x53910baa26e3ea18 +335, 0xaa07a07ad0a688d3 +336, 0xbb43bd1f09ecdb1e +337, 0xe2ebc105699dd84 +338, 0x6e815a2729584035 +339, 0x2caab1713b17948a +340, 0x43d39d209fa41c90 +341, 0xfe3e71089d5d1c3a +342, 0xa778646c32f81177 +343, 0x8d42bfb86e6e92d5 +344, 0x175571f70b4fcfbe +345, 0x2a66a6fe10dc3b5b +346, 0xd9545e85235ca709 +347, 0x5642781c77ced48a +348, 0x24facc40b72ccd09 +349, 0xa800fbacce33f6f8 +350, 0x675f58a0ff19fba +351, 0x35aedf57bb5cde1b +352, 0xe5535a6b63f6d068 +353, 0x84dffd0102aaa85d +354, 0x621faad65467aaa7 +355, 0x596ad85b556b112f +356, 0x837545fff8894c7a +357, 0x3d9a4ae1356bc6a6 +358, 0xcd8b7153205d4ad0 +359, 0x98afdd40f1ed09a6 +360, 0xa38b2dc55a5cf87f +361, 0x484aecce2b6838bc +362, 0x6af05c26bdab18d9 +363, 0xf418b7399dcf2e4b +364, 0x1cfa38789b0d2445 +365, 0xfbed23c34166ee67 +366, 0x38e6820039e4912a +367, 0x1fe94911e963591e +368, 0x1291c79aee29ad70 +369, 0x65eccfc89506f963 +370, 0x7d14de3b2f55b1f6 +371, 0x82eb79c36cd2a739 +372, 0x41ffe3b75ea0def5 +373, 0x9eba9156470a51d9 +374, 0xd17c00b981db37d1 +375, 0xf688769a75601aa7 +376, 0xbcf738e9e03d571e +377, 0x14712e56df8f919b +378, 0xab14e227d156e310 +379, 0xf53d193e993e351e +380, 0x857fae46bd312141 +381, 0xc2dd71e41b639966 +382, 0x74f8b987a3d00ad1 +383, 0x5bce8526dc527981 +384, 0x94910926c172a379 +385, 0x503c45557688a9d5 +386, 0x244d03834e05807f +387, 0x6e014cbab9c7a31f +388, 0xae544c638530facf +389, 0x9b853aaaf9cbc22d +390, 0xfb42ab7024d060ed +391, 0x74cc3fba0dfd7ff2 +392, 0x24ec9e8f62144ad5 +393, 0x72f082954307bbe7 +394, 0x36feda21bbf67577 +395, 0x3222191611b832f1 +396, 0xd0584e81bcac8b0b +397, 0xdce8d793ef75e771 +398, 0x978824c6c2578fc +399, 0x6e8f77503b3c2ee4 +400, 0xc85d2d86fecf5d03 +401, 0x3d35b4a5d4d723c4 +402, 0xd3987dfd4727fff3 +403, 0xd3cde63fb6a31add +404, 0xf6699e86165bdaeb +405, 0x9d60ba158ec364c4 +406, 0x920c3c18b346bfc9 +407, 0x770fd1fdfbc236ca +408, 0x45998cfc5fc12ddd +409, 0xd74a3454e888834b +410, 0xbf2aa68081a4a28f +411, 0xea41b26a6f1da1b3 +412, 0x5560a2d24b9d5903 +413, 0xe3791f652a228d8b +414, 0x365116d3b5a8520c +415, 0xb1b2bd46528f8969 +416, 0xfcfe14943ef16ae7 +417, 0xf4d43425e8a535dc +418, 0xe6cf10a78782a7e0 +419, 0x9c7ac0de46556e3e +420, 0xc667ae0856eed9ef +421, 0x47dbb532e16f9c7e +422, 0xdf4785a5d89ee82e +423, 0xbd014925ce79dbcf +424, 0xea0d663fb58fa5be +425, 0x51af07d5cc3821fb +426, 0x27a1bdcdc4159a9d +427, 0x520c986c59b1e140 +428, 0x50b73fd9bacd5b39 +429, 0xae5240641f51e4f3 +430, 0x71faecc164ed9681 +431, 0xda95aa35529a7ee +432, 0xe25ba29b853c1c6d +433, 0x9871a925cda53735 +434, 0xde481ad8540e114d +435, 0xa2997f540e8abca0 +436, 0xc9683c5035e28185 +437, 0x1082471b57182bac +438, 0xbd3ecf0f0b788988 +439, 0xf479760776fbb342 +440, 0x3730929200d91f44 +441, 0xc1762d79ae72809c +442, 0xfaa0a4c7b1686cb3 +443, 0xd581e6d55afdafcd +444, 0x6cf57bdfba2dcf6d +445, 0xdef79d9fe6a5bcef +446, 0x13ed376e18132bd3 +447, 0xbe67efd72defa2a +448, 0x5acc176c468966ea +449, 0x8b35b626af139187 +450, 0x446de3fac0d973ac +451, 0xe1d49e06dc890317 +452, 0x817bc3fd21fc09b7 +453, 0xb71c3958a13d5579 +454, 0x8746e010f73d7148 +455, 0x1b61c06009922e83 +456, 0xba17e62e6b092316 +457, 0x1375fa23c4db8290 +458, 0x3f071230f51245a6 +459, 0x51c99a086a61cd13 +460, 0x5f0f2ae78589e1fd +461, 0x604834e114bbbc27 +462, 0x5eb2a7a34814e9a9 +463, 0x77a6907f386bf11e +464, 0x99525de2bd407eeb +465, 0xb818348c57b3b98f +466, 0x25f5f9e702fbe78d +467, 0x8f66669e6f884473 +468, 0x1e47d46e2af4f919 +469, 0xf6a19df846476833 +470, 0xff00c67bcd06621f +471, 0xe3dfe069795d72d8 +472, 0x8affc88b2fea4d73 +473, 0x66df747e5f827168 +474, 0xf368ec338d898a0e +475, 0x9e1f1a739c5984a2 +476, 0x46a1c90e1ca32cbc +477, 0xc261bc305ed8d762 +478, 0x754d7949f7da9e72 +479, 0x4c8fbbb14ef47b17 +480, 0xccbdc67a3848d80d +481, 0x3c25e6f58bae751d +482, 0x7078b163b936d9b6 +483, 0x440e27463c134ecf +484, 0x6c83ee39f324db0f +485, 0x27cf901b22aea535 +486, 0x57262dec79a3f366 +487, 0x91db09f1dbb524fb +488, 0xd7436eefba865df2 +489, 0x16c86b0a275a3f43 +490, 0x689493e6681deaa9 +491, 0x7e1dc536c1a9ac42 +492, 0x1145beac3ac7f5cc +493, 0x3d05e211a104b2b0 +494, 0x4f9e77ced3c52f44 +495, 0x53de1369354add72 +496, 0x1fb60f835f47cdeb +497, 0x6ab36f089e40c106 +498, 0xaabffcb0d3d04c7 +499, 0xaa399686d921bd25 +500, 0x2bf8dd8b6d6fa7f0 +501, 0x1ddbf4e124329613 +502, 0x466a740241466a72 +503, 0x98d7381eb68a761 +504, 0x817691510bc4857a +505, 0x8837622c0171fe33 +506, 0xcba078873179ee16 +507, 0x13adad1ab7b75af4 +508, 0x3bac3f502428840c +509, 0xbeb3cce138de9a91 +510, 0x30ef556e40b5f0b4 +511, 0x19c22abdf3bbb108 +512, 0x977e66ea4ddc7cf +513, 0x9f4a505f223d3bf3 +514, 0x6bc3f42ac79ec87b +515, 0x31e77712158d6c23 +516, 0x6d8de4295a28af0d +517, 0xee1807dbda72adb7 +518, 0xda54140179cd038f +519, 0x715aa5cdac38e062 +520, 0x5a7e55e99a22fa16 +521, 0xf190c36aa8edbe4f +522, 0xccadd93a82c1d044 +523, 0x7070e6d5012c3f15 +524, 0x50a83341a26c1ba5 +525, 0x11bca7cc634142e5 +526, 0x623a0d27867d8b04 +527, 0x75c18acff54fbf6e +528, 0x455ae7d933497a6f +529, 0xf624cf27d030c3d3 +530, 0x7a852716f8758bac +531, 0xe7a497ac1fa2b5b4 +532, 0xf84f097498f57562 +533, 0xc4bb392f87f65943 +534, 0x618e79a5d499fbfb +535, 0xb3c0b61d82b48b8 +536, 0x4750a10815c78ea7 +537, 0x9cf09cca3ddece69 +538, 0x2a69f1c94cc901a2 +539, 0x347a0e446e1ce86d +540, 0xb06f3a5a5ab37bb1 +541, 0x8035bd0713d591db +542, 0x539c9637042c3a1f +543, 0xd7ba4dc6b273cbd7 +544, 0x12f3f99933444f85 +545, 0x4a9517b9783fb9a4 +546, 0x6422b2ea95093bc5 +547, 0x3a5ecff0f996c2a6 +548, 0x31de504efc76a723 +549, 0x7ccb7c5233c21a9f +550, 0xc687d9e6ce4186e8 +551, 0x6e40769d6940376a +552, 0xf51207314f1f7528 +553, 0x67ee3acb190865e3 +554, 0xe08d586270588761 +555, 0xe387fa489af1a75c +556, 0x73414a52d29d8375 +557, 0x671a38191cf2a357 +558, 0xe00fb25b1aa54008 +559, 0x11a0610e22cf549b +560, 0xc90cc865d57c75be +561, 0x90d0863cc15f2b79 +562, 0x8b3e60d32ebcb856 +563, 0xb28cc55af621e04a +564, 0xcf60bd3cb2a5ab1d +565, 0x212cb5d421948f86 +566, 0xee297b96e0a3363f +567, 0x4e9392ff998760d1 +568, 0x61940c8d0105ba3e +569, 0x14ebcbae72a59a16 +570, 0xdf0f39a3d10c02af +571, 0xfc047b2b3c1c549d +572, 0x91718b5b98e3b286 +573, 0x9ea9539b1547d326 +574, 0x7a5a624a89a165e6 +575, 0x145b37dcaa8c4166 +576, 0x63814bbb90e5616c +577, 0xc4bc3ca6c38bb739 +578, 0x853c3a61ddc6626c +579, 0xa7ce8481c433829a +580, 0x8aff426941cc07b +581, 0x2dc3347ca68d8b95 +582, 0xce69f44f349e9917 +583, 0x2fa5cb8aca009b11 +584, 0xf26bb012115d9aca +585, 0xafa01c2f2d27235a +586, 0xabcba21f1b40305e +587, 0xfec20c896c0c1128 +588, 0xc5f7a71ebacadfa0 +589, 0xc8479ad14bab4eef +590, 0xad86ec9a3e7d3dc +591, 0xbbecd65292b915c5 +592, 0xb1f9e28149e67446 +593, 0x708d081c03dad352 +594, 0xaa8a84dbd1de916c +595, 0x9aa3efb29ba9480b +596, 0xd3c63969ff11443e +597, 0x1e9e9ac861315919 +598, 0x4fe227f91e66b41d +599, 0xefc0212d43d253ab +600, 0x98341437727c42d1 +601, 0x5ea85c0fe9008adc +602, 0x7891b15faa808613 +603, 0x32db2d63989aacfd +604, 0xc92f7f28e88fd7bc +605, 0x3513545eb6549475 +606, 0x49abe0082906fbf8 +607, 0xcee1e1a6551e729c +608, 0x38556672b592a28e +609, 0xc3e61409c4ec2d45 +610, 0x96c67ce2995a0fd4 +611, 0x9b9b0cada870293 +612, 0x82d6dd5dada48037 +613, 0xeea4f415299f1706 +614, 0x371107895f152ab3 +615, 0x2f6686159f4396bb +616, 0x61005a2ff3680089 +617, 0x9d2f2cafb595e6b6 +618, 0x4a812a920f011672 +619, 0x317554d3a77385d7 +620, 0x24c01086727eb74b +621, 0xa15ff76d618a3a9e +622, 0x2121bfd983859940 +623, 0x384d11577eea8114 +624, 0xab0f4299f3c44d88 +625, 0x136fd4b07cfa14d9 +626, 0x665fe45cbfaa972a +627, 0x76c5a23398a314e9 +628, 0x5507036357ccda98 +629, 0xd9b8c5ac9dce632b +630, 0x366bc71781da6e27 +631, 0xdd2b2ba1d6be6d15 +632, 0xf33ed0d50ea6f1a6 +633, 0xf05a9b1900174c18 +634, 0x3947e1419e2787cf +635, 0x6c742b1e029637d0 +636, 0x32aba12196a0d2e8 +637, 0x1b94aab2e82e7df +638, 0x68b617db19229d6 +639, 0x6c88a95ac0a33f98 +640, 0xdc9b95fd60c2d23e +641, 0x999e6971d3afc8b3 +642, 0x7071fc6ad8b60129 +643, 0x41a8184ef62485f6 +644, 0xb68e0605c7d5e713 +645, 0x272b961a1d1bbee +646, 0x23f04e76446187b0 +647, 0x999a7a8f6d33f260 +648, 0xdbd6318df4f168d +649, 0x8f5e74c84c40711e +650, 0x8ccc6b04393a19d6 +651, 0xadcd24b782dd8d3d +652, 0x1a966b4f80ef9499 +653, 0xcb6d4f9ff5a280f0 +654, 0x8095ff2b8484018a +655, 0xbfd3389611b8e771 +656, 0x278eb670b7d12d51 +657, 0x31df54ca8d65c20f +658, 0x121c7fb38af6985e +659, 0x84fb94f38fe1d0a +660, 0x15ae8af1a6d48f02 +661, 0x8d51e4a62cba1a28 +662, 0x58e6b6b3ae0f9e42 +663, 0x9365a0a85669cc99 +664, 0xe56e92f65a2106df +665, 0x68fa299c66b428fc +666, 0x55e51bb0b0a832c6 +667, 0x48b565293f9bc494 +668, 0x73d8132b1cbabb57 +669, 0x9178ac3926c36cbc +670, 0xe2f22c7b28ea5e0f +671, 0x6af45322a99afb12 +672, 0x59072fcb486a46f4 +673, 0x166b717b08d3d8e +674, 0xd4e627a2dfacc4ab +675, 0x33dad6f2921dedaa +676, 0x4b13b806834a6704 +677, 0xe5f7971b398ed54d +678, 0x20bfae65e3e6899b +679, 0x881dab45d2b4fc98 +680, 0x6f248126b5b885be +681, 0x7aeb39e986f9deee +682, 0xf819f9574b8c3a03 +683, 0xff3d93ed6bd9781a +684, 0x3a31e2e24a2f6385 +685, 0x7888a88f8944a5e +686, 0x4faee12f5de95537 +687, 0x7f3e4efccdb2ed67 +688, 0x91e0f2fc12593af5 +689, 0xb5be8a4b886a40d3 +690, 0x998e8288ac3a9b1b +691, 0x85c48fc8b1349e7b +692, 0xf03af25222d8fae5 +693, 0x45467e805b242c2e +694, 0xa2350db793dbebdc +695, 0xfebe5b61d2174553 +696, 0xa9a331f02c54ad0b +697, 0xe94e49a0f905aef3 +698, 0xe54b4c812b55e3da +699, 0xdc454114c6bc0278 +700, 0x99c7765ab476baa2 +701, 0xccd9590e47fdff7c +702, 0xfa2bcae7afd6cb71 +703, 0x2c1bf1a433a6f0f7 +704, 0x53882c62ff0aab28 +705, 0x80ac900f844dacc +706, 0x27ba8eb5c4a44d54 +707, 0x78f3dfb072a46004 +708, 0x34e00e6ec629edce +709, 0x5b88d19b552d1fbd +710, 0xe4df375dc79df432 +711, 0x37446312ff79c3b4 +712, 0xb72256900a95fa6d +713, 0x89f3171fbdff0bfc +714, 0xd37885b048687eba +715, 0xbb033213b283b60e +716, 0xcf10b523ee769030 +717, 0xbf8070b6cfd7bafb +718, 0xb7194da81fd1763b +719, 0xbfc303de88e68d24 +720, 0xb949c7a5aea8a072 +721, 0x844216e7bae90455 +722, 0xf1e7f20840049a33 +723, 0x96e3263ad0cae794 +724, 0x10772d51f6e9ba49 +725, 0xcea24fccae9d23b3 +726, 0xefd378add9dde040 +727, 0xba0c7c5275805976 +728, 0x2e2a04608f64fa8c +729, 0xafb42ec43aa0fa7 +730, 0x30444b84241ac465 +731, 0x19ef384bac4493ab +732, 0xfd1ac615d3ba5ab9 +733, 0x6cc781ba38643aff +734, 0x30ff27ebed875cfd +735, 0xee1a261aca97ae62 +736, 0xc5a92715202bc940 +737, 0x9e6ec76f93c657ff +738, 0x9b9fd55f55191ca5 +739, 0x654b13af008d8f03 +740, 0x1b7f030d9bd0719f +741, 0x6d622e277550cb7f +742, 0x3f8ee6b8830d0538 +743, 0x475462bcd0de190f +744, 0x21380e8a513bdbcd +745, 0x629bf3771b1bd7a4 +746, 0x3b5fd0b62c353709 +747, 0xf95634006ec3867e +748, 0x1be8bb584a6653c2 +749, 0x2e2d3cfa85320ce8 +750, 0x5b904b692252d11d +751, 0x4bfd76631d527990 +752, 0xc019571ca2bec4a0 +753, 0xf2eb730cea4cd751 +754, 0xd4571d709530191a +755, 0x3b5bd947061f5a7d +756, 0x56e2322cd2d1d1c0 +757, 0xa8830a5f62019f83 +758, 0x901d130c1b873cf3 +759, 0xb5dd29b363c61299 +760, 0xbb710bec3a17b26d +761, 0xc0c464daca0f2328 +762, 0x4dc8055df02650f5 +763, 0x3d3cd9bbe8b957af +764, 0xdb79612c2635b828 +765, 0xe25b3a8ad8fa3040 +766, 0xd5875c563cbf236b +767, 0x46861c1c3849c9bc +768, 0xf84bf1a2814dff43 +769, 0x6d8103902e0ad5e6 +770, 0x99f51c9be8af79e5 +771, 0xb0bfa8540ff94a96 +772, 0xaf45109a4e06f7d0 +773, 0x281df3e55aea9bfc +774, 0x6a1155ca8aa40e60 +775, 0x754d32c5de1f5da +776, 0xce1eafb1c6ca916f +777, 0xc4f2185fa8577bd1 +778, 0x4a188e9bdb5501d9 +779, 0xbb14107e99bd5550 +780, 0xf0381d8425ec2962 +781, 0x213dbfffc16ec4f6 +782, 0x7a999c5a28ea65bc +783, 0x23758c2aba7709ff +784, 0xea7e4bb205e93b44 +785, 0x9c5a31e53911c658 +786, 0x7f04d0bbdc689ddc +787, 0xe3ed89ab8d78dcb3 +788, 0x73c38bfb43986210 +789, 0x740c7d787eb8e158 +790, 0x5284fafdfb3fb9ec +791, 0x2e91a58ac1fb1409 +792, 0xb94a600bf0a09af3 +793, 0x533ea4dbe07d81dd +794, 0x48c3f1a736b3c5fd +795, 0x56ae3499fa8720ce +796, 0x526f2def663ca818 +797, 0x2f085759c65665c4 +798, 0xf715f042c69e0db4 +799, 0x110889c399231e60 +800, 0x64584a244866f3a0 +801, 0xf02ec101a39405d3 +802, 0xe73cd5e9a7f17283 +803, 0xfea64869e7028234 +804, 0x97559974ad877891 +805, 0xc8695aba1dc9f2e5 +806, 0x7b62b76ffc2264ec +807, 0xf5e1df172ec5ccd +808, 0xafaeb68765e443bd +809, 0xd3870eb2e8337623 +810, 0x4f944d684138fb39 +811, 0x6977c575038916ad +812, 0x8ada1a225df95a56 +813, 0xe4044c6c58d15e54 +814, 0x4e5121366681cf2 +815, 0xcf8640b079357b0d +816, 0xcd5b157d44106fa3 +817, 0x9d7a5481279e25a1 +818, 0xe10e9db41fb4b34f +819, 0x1052607be1eadff9 +820, 0x3403d67232fe2265 +821, 0xac9358f498c34afc +822, 0x820172da0dc39c9 +823, 0xe186e91a3b826b6a +824, 0x1a838e2a40284445 +825, 0x1870b617ebd7bce6 +826, 0xcb7cba4424be1ed7 +827, 0x6a2e56e40fdf9041 +828, 0xace93bbe108f97ee +829, 0xfeb9bc74ac41ca08 +830, 0x8cb2d05b0f6a1f51 +831, 0x73792309f3fac0a9 +832, 0x2507343d431308ca +833, 0xd0ea1197be615412 +834, 0xb1870812f1d2fa94 +835, 0x6d067b6935dcd23e +836, 0xaf161014e5492c31 +837, 0xd4be0dce97064be4 +838, 0xf8edfe3fc75c20f1 +839, 0x894751dc442d2d9c +840, 0xb4a95f6a6663456c +841, 0x74e93162e2d805db +842, 0x784bc5f3a7a2f645 +843, 0xd234d7c5b0582ea9 +844, 0x491f28d0ab6cb97c +845, 0xa79419e5cf4336c3 +846, 0x66b00141978c849 +847, 0xa7ddbd64698d563f +848, 0xefc33a4a5d97d4b2 +849, 0x95075514a65aebdc +850, 0x40eca5b3e28cd25e +851, 0x90ec7d00e9c9e35d +852, 0x63e84104d5af417a +853, 0xdaca0ea32df5744 +854, 0x7ed54f2587795881 +855, 0x5a73931760af4ee0 +856, 0x857d1a185a3081ec +857, 0x6eac2aabe67fb463 +858, 0xd1f86155d8bfc55f +859, 0x6d56398f3e7877ef +860, 0x7642f61dfc62bc17 +861, 0x1d76b12843246ffa +862, 0xde7817809b8a31d0 +863, 0xbcca9cd091198f9d +864, 0xf71ca566dddcdfd4 +865, 0xea4386ee8b61d082 +866, 0xe351729d6010bac4 +867, 0xfd685d8a49910dd6 +868, 0xa7a20ea6c686bd3 +869, 0x1cdaf82f4dbd5536 +870, 0xa3da1d1e77dda3e0 +871, 0x4f723b3818ff8b2a +872, 0x1290669eca152469 +873, 0xb54158b52d30651b +874, 0xc06b74f2c7f0fee +875, 0x7d5840bcbf702379 +876, 0x19fa4c1254a82ed +877, 0xcf5ce090ad0b38ea +878, 0xd4edd6ac9437e16d +879, 0xc6ebf25eb623b426 +880, 0xd2b6dbdf00d8fea2 +881, 0x949cf98391cc59e1 +882, 0x380a0c7d0356f7b3 +883, 0x8ffefe32465473bf +884, 0x637b6542d27c861e +885, 0x347d12ffc664ecd9 +886, 0xea66e3a0c75a6b37 +887, 0xc3aff6f34fb537a1 +888, 0x67bdf3579959bf49 +889, 0xa17a348e3a74b723 +890, 0x93c9ef26ddadd569 +891, 0x483909059a5ac0b2 +892, 0x26ec9074b56d5a0d +893, 0x6216000d9a48403a +894, 0x79b43909eab1ec05 +895, 0xe4a8e8d03649e0de +896, 0x1435d666f3ccdc08 +897, 0xb9e22ba902650a0e +898, 0x44dffcccc68b41f8 +899, 0x23e60dcc7a559a17 +900, 0x6fd1735eacd81266 +901, 0xf6bda0745ea20c8e +902, 0x85efcaefe271e07c +903, 0x9be996ee931cef42 +904, 0xe78b41c158611d64 +905, 0xd6201df605839830 +906, 0x702e8e47d2769fd3 +907, 0xb8dcf70e18cf14c +908, 0xac2690bab1bf5c17 +909, 0x92b166b71205d696 +910, 0xb0e73c795fc6df28 +911, 0x4bf2322c8b6b6f0d +912, 0xa842fbe67918cea0 +913, 0xb01a8675d9294e54 +914, 0xfbe3c94f03ca5af2 +915, 0x51a5c089600c441f +916, 0x60f0fd7512d85ded +917, 0xef3113d3bc2cadb0 +918, 0xe1ea128ade300d60 +919, 0xde413b7f8d92d746 +920, 0xfc32c6d43f47c5d8 +921, 0x69d551d8c2b54c68 +922, 0xb9bc68c175777943 +923, 0xb9c79c687f0dae90 +924, 0xd799421ef883c06e +925, 0xbff553ca95a29a3e +926, 0xfc9ffac46bd0aca1 +927, 0x4f6c3a30c80c3e5a +928, 0x8b7245bc6dc4a0a +929, 0xaf4e191a4575ff60 +930, 0x41218c4a76b90f0b +931, 0x986052aa51b8e89b +932, 0x284b464ed5622f9 +933, 0xba6bded912626b40 +934, 0x43cad3ed7443cb5c +935, 0x21641fa95725f328 +936, 0x6d99d6d09d755822 +937, 0x8246dfa2d4838492 +938, 0xd2ee70b9056f4726 +939, 0x87db515a786fbb8b +940, 0x7c63e4c1d7786e7d +941, 0xd1a9d548f10b3e88 +942, 0xa00856475f3b74c9 +943, 0x7f1964ce67148bf4 +944, 0x446650ec71e6018c +945, 0xb1805ca07d1b6345 +946, 0x869c0a1625b7271b +947, 0x79d6da06ce2ecfe2 +948, 0xec7b3cafc5e3c85f +949, 0x1745ce21e39f2c3d +950, 0xd9a0a7af6ee97825 +951, 0x680e0e52a6e11d5c +952, 0xd86b3f344ff7f4cd +953, 0xab56af117c840b9c +954, 0x5c5404c7e333a10e +955, 0x4f1eb462f35d990d +956, 0xf857605a5644458e +957, 0x3bb87cdf09262f86 +958, 0xd57295baf6da64b +959, 0xb5993f48472f2894 +960, 0x7d1a501608c060b2 +961, 0x45fabe2d0e54adf0 +962, 0xbb41c3806afb4efe +963, 0xbfbc506049424c8 +964, 0xb7dd6b67f2203344 +965, 0x389ce52eff883b81 +966, 0xe259c55c0cf6d000 +967, 0x70fb3e3824f7d213 +968, 0x9f36d5599ed55f4b +969, 0xd14cf6f12f83c4f7 +970, 0x570a09d56aaa0b66 +971, 0x8accafd527f4598 +972, 0xa42d64c62175adfd +973, 0xddb9c6a87b6e1558 +974, 0xd80b6c69fa1cde2a +975, 0x44ebaac10082207b +976, 0xf99be8889552fa1a +977, 0x38253cd4b38b5dc5 +978, 0x85356c8b02675791 +979, 0xbf91677b2ecdcf55 +980, 0x2316cb85e93f366e +981, 0x9abf35954db6b053 +982, 0xf49f7425e086b45a +983, 0x8f5b625e074afde2 +984, 0xe0d614559791b080 +985, 0xbf7b866afab2a525 +986, 0xde89d7e1641a6412 +987, 0x1d10687d8ae5b86f +988, 0x1f034caa0e904cbd +989, 0x2086357aec8a7a2c +990, 0x22dc476b80c56e1e +991, 0xbef5a73cc0e3a493 +992, 0xddfa3829b26ed797 +993, 0x8917a87ec3d4dc78 +994, 0xfeabe390628c365e +995, 0x581b0c4f6fb2d642 +996, 0x1ef8c590adbf5b9a +997, 0x4d8e13aac0cce879 +998, 0xfe38f71e5977fad0 +999, 0x1f83a32d4adfd2ed diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/data/philox-testset-2.csv b/.local/lib/python3.8/site-packages/numpy/random/tests/data/philox-testset-2.csv new file mode 100644 index 0000000..69d24c3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/data/philox-testset-2.csv @@ -0,0 +1,1001 @@ +seed, 0x0 +0, 0x399e5b222b82fa9 +1, 0x41fd08c1f00f3bc5 +2, 0x78b8824162ee4d04 +3, 0x176747919e02739d +4, 0xfaa88f002a8d3596 +5, 0x418eb6f592e6c227 +6, 0xef83020b8344dd45 +7, 0x30a74a1a6eaa064b +8, 0x93d43bf97a490c3 +9, 0xe4ba28b442194cc +10, 0xc829083a168a8656 +11, 0x73f45d50f8e22849 +12, 0xf912db57352824cc +13, 0xf524216927b12ada +14, 0x22b7697473b1dfda +15, 0x311e2a936414b39f +16, 0xb905abfdcc425be6 +17, 0x4b14630d031eac9c +18, 0x1cf0c4ae01222bc8 +19, 0xa6c33efc6e82ef3 +20, 0x43b3576937ba0948 +21, 0x1e483d17cdde108a +22, 0x6722784cac11ac88 +23, 0xee87569a48fc45d7 +24, 0xb821dcbe74d18661 +25, 0xa5d1876ef3da1a81 +26, 0xe4121c2af72a483 +27, 0x2d747e355a52cf43 +28, 0x609059957bd03725 +29, 0xc3327244b49e16c5 +30, 0xb5ae6cb000dde769 +31, 0x774315003209017 +32, 0xa2013397ba8db605 +33, 0x73b228945dbcd957 +34, 0x801af7190375d3c0 +35, 0xae6dca29f24c9c67 +36, 0xd1cc0bcb1ca26249 +37, 0x1defa62a5bd853be +38, 0x67c2f5557fa89462 +39, 0xf1729b58122fab02 +40, 0xb67eb71949ec6c42 +41, 0x5456366ec1f8f7d7 +42, 0x44492b32eb7966f5 +43, 0xa801804159f175f1 +44, 0x5a416f23cac70d84 +45, 0x186f55293302303d +46, 0x7339d5d7b6a43639 +47, 0xfc6df38d6a566121 +48, 0xed2fe018f150b39e +49, 0x508e0b04a781fa1b +50, 0x8bee9d50f32eaf50 +51, 0x9870015d37e63cc +52, 0x93c6b12309c14f2d +53, 0xb571cf798abe93ff +54, 0x85c35a297a88ae6e +55, 0x9b1b79afe497a2ae +56, 0x1ca02e5b95d96b8d +57, 0x5bb695a666c0a94a +58, 0x4e3caf9bbab0b208 +59, 0x44a44be1a89f2dc1 +60, 0x4ff37c33445758d1 +61, 0xd0e02875322f35da +62, 0xfd449a91fb92646b +63, 0xbe0b49096b95db4d +64, 0xffa3647cad13ef5d +65, 0x75c127a61acd10c8 +66, 0xd65f697756f5f98e +67, 0x3ced84be93d94434 +68, 0x4da3095c2fc46d68 +69, 0x67564e2a771ee9ac +70, 0x36944775180644a9 +71, 0xf458db1c177cdb60 +72, 0x5b58406dcd034c8 +73, 0x793301a3fdab2a73 +74, 0x1c2a1a16d6db6128 +75, 0xc2dacd4ddddbe56c +76, 0x2e7d15be2301a111 +77, 0xd4f4a6341b3bcd18 +78, 0x3622996bbe6a9e3b +79, 0xaf29aa9a7d6d47da +80, 0x6d7dbb74a4cd68ae +81, 0xc260a17e0f39f841 +82, 0xdee0170f2af66f0d +83, 0xf84ae780d7b5a06e +84, 0x8326247b73f43c3a +85, 0xd44eef44b4f98b84 +86, 0x3d10aee62ec895e3 +87, 0x4f23fef01bf703b3 +88, 0xf8e50aa57d888df6 +89, 0x7da67411e3bef261 +90, 0x1d00f2769b2f96d7 +91, 0x7ef9a15b7444b84e +92, 0xcfa16436cc2b7e21 +93, 0x29ab8cfac00460ff +94, 0x23613de8608b0e70 +95, 0xb1aa0980625798a8 +96, 0xb9256fd29db7df99 +97, 0xdacf311bf3e7fa18 +98, 0xa013c8f9fada20d8 +99, 0xaf5fd4fe8230fe3e +100, 0xd3d59ca55102bc5c +101, 0x9d08e2aa5242767f +102, 0x40278fe131e83b53 +103, 0x56397d03c7c14c98 +104, 0xe874b77b119359b3 +105, 0x926a1ba4304ab19f +106, 0x1e115d5aa695a91d +107, 0xc6a459df441f2fe3 +108, 0x2ca842bc1b0b3c6a +109, 0x24c804cf8e5eed16 +110, 0x7ca00fc4a4c3ebd3 +111, 0x546af7cecc4a4ba6 +112, 0x8faae1fa18fd6e3 +113, 0x40420b0089641a6a +114, 0x88175a35d9abcb83 +115, 0xf7d746d1b8b1357c +116, 0x7dae771a651be970 +117, 0x2f6485247ee4df84 +118, 0x6883702fab2d8ec5 +119, 0xeb7eea829a67f9a6 +120, 0x60d5880b485562ed +121, 0x7d4ca3d7e41a4e7e +122, 0xbb7fef961ab8de18 +123, 0x3b92452fb810c164 +124, 0x5f4b4755348b338 +125, 0xca45a715a7539806 +126, 0xc33efd9da5399dd +127, 0x593d665a51d4aedd +128, 0x75d6b8636563036b +129, 0x7b57caa55e262082 +130, 0x4ede7427969e0dd5 +131, 0xc3f19b6f78ea00b +132, 0xeea7bab9be2181ea +133, 0x652c45fe9c420c04 +134, 0x14ba9e3d175670ee +135, 0xd2ad156ba6490474 +136, 0x4d65ae41065f614 +137, 0x6ff911c8afa28eb1 +138, 0xedc2b33588f3cb68 +139, 0x437c8bc324666a2f +140, 0x828cee25457a3f0 +141, 0x530c986091f31b9b +142, 0x2f34671e8326ade7 +143, 0x4f686a8f4d77f6da +144, 0xa4c1987083498895 +145, 0xbce5a88b672b0fb1 +146, 0x8476115a9e6a00cc +147, 0x16de18a55dd2c238 +148, 0xdf38cf4c416232bc +149, 0x2cb837924e7559f3 +150, 0xfad4727484e982ed +151, 0x32a55d4b7801e4f +152, 0x8b9ef96804bd10a5 +153, 0xa1fd422c9b5cf2a9 +154, 0xf46ddb122eb7e442 +155, 0x6e3842547afa3b33 +156, 0x863dee1c34afe5c4 +157, 0x6a43a1935b6db171 +158, 0x1060a5c2f8145821 +159, 0xf783ec9ed34c4607 +160, 0x1da4a86bf5f8c0b0 +161, 0x4c7714041ba12af8 +162, 0x580da7010be2f192 +163, 0xad682fe795a7ea7a +164, 0x6687b6cb88a9ed2c +165, 0x3c8d4b175517cd18 +166, 0xe9247c3a524a6b6b +167, 0x337ca9cfaa02658 +168, 0xed95399481c6feec +169, 0x58726a088e606062 +170, 0xfe7588a5b4ee342a +171, 0xee434c7ed146fdee +172, 0xe2ade8b60fdc4ba5 +173, 0xd57e4c155de4eaab +174, 0xdefeae12de1137cb +175, 0xb7a276a241316ac1 +176, 0xeb838b1b1df4ca15 +177, 0x6f78965edea32f6f +178, 0x18bebd264d7a5d53 +179, 0x3641c691d77005ec +180, 0xbe70ed7efea8c24c +181, 0x33047fa8d03ca560 +182, 0x3bed0d2221ff0f87 +183, 0x23083a6ffbcf38a2 +184, 0xc23eb827073d3fa5 +185, 0xc873bb3415e9fb9b +186, 0xa4645179e54147fe +187, 0x2c72fb443f66e207 +188, 0x98084915dd89d8f4 +189, 0x88baa2de12c99037 +190, 0x85c74ab238cb795f +191, 0xe122186469ea3a26 +192, 0x4c3bba99b3249292 +193, 0x85d6845d9a015234 +194, 0x147ddd69c13e6a31 +195, 0x255f4d678c9a570b +196, 0x2d7c0c410bf962b4 +197, 0x58eb7649e0aa16ca +198, 0x9d240bf662fe0783 +199, 0x5f74f6fa32d293cc +200, 0x4928e52f0f79d9b9 +201, 0xe61c2b87146b706d +202, 0xcfcd90d100cf5431 +203, 0xf15ea8138e6aa178 +204, 0x6ab8287024f9a819 +205, 0xed8942593db74e01 +206, 0xefc00e4ec2ae36dd +207, 0xc21429fb9387f334 +208, 0xf9a3389e285a9bce +209, 0xacdee8c43aae49b3 +210, 0xefc382f02ad55c25 +211, 0x1153b50e8d406b72 +212, 0xb00d39ebcc2f89d8 +213, 0xde62f0b9831c8850 +214, 0xc076994662eef6c7 +215, 0x66f08f4752f1e3ef +216, 0x283b90619796249a +217, 0x4e4869bc4227499e +218, 0xb45ad78a49efd7ed +219, 0xffe19aa77abf5f4b +220, 0xfce11a0daf913aef +221, 0x7e4e64450d5cdceb +222, 0xe9621997cfd62762 +223, 0x4d2c9e156868081 +224, 0x4e2d96eb7cc9a08 +225, 0xda74849bba6e3bd3 +226, 0x6f4621da935e7fde +227, 0xb94b914aa0497259 +228, 0xd50d03e8b8db1563 +229, 0x1a45c1ce5dca422e +230, 0xc8d30d33276f843f +231, 0xb57245774e4176b4 +232, 0x8d36342c05abbbb1 +233, 0x3591ad893ecf9e78 +234, 0x62f4717239ee0ac8 +235, 0x9b71148a1a1d4200 +236, 0x65f8e0f56dd94463 +237, 0x453b1fcfd4fac8c2 +238, 0x4c25e48e54a55865 +239, 0xa866baa05112ace2 +240, 0x7741d3c69c6e79c5 +241, 0x7deb375e8f4f7a8a +242, 0xc242087ede42abd8 +243, 0x2fa9d1d488750c4b +244, 0xe8940137a935d3d3 +245, 0x1dab4918ca24b2f2 +246, 0xe2368c782168fe3e +247, 0x6e8b2d1d73695909 +248, 0x70455ebea268b33e +249, 0x656a919202e28da1 +250, 0x5a5a8935647da999 +251, 0x428c6f77e118c13c +252, 0xa87aee2b675bb083 +253, 0x3873a6412b239969 +254, 0x5f72c1e91cb8a2ee +255, 0xa25af80a1beb5679 +256, 0x1af65d27c7b4abc3 +257, 0x133437060670e067 +258, 0xb1990fa39a97d32e +259, 0x724adc89ae10ed17 +260, 0x3f682a3f2363a240 +261, 0x29198f8dbd343499 +262, 0xdfaeeaa42bc51105 +263, 0x5baff3901b9480c2 +264, 0x3f760a67043e77f5 +265, 0x610fa7aa355a43ba +266, 0x394856ac09c4f7a7 +267, 0x1d9229d058aee82e +268, 0x19c674804c41aeec +269, 0x74cf12372012f4aa +270, 0xa5d89b353fa2f6ca +271, 0x697e4f672ac363dd +272, 0xde6f55ba73df5af9 +273, 0x679cf537510bd68f +274, 0x3dc916114ae9ef7e +275, 0xd7e31a66ec2ee7ba +276, 0xc21bebb968728495 +277, 0xc5e0781414e2adfd +278, 0x71147b5412ddd4bd +279, 0x3b864b410625cca9 +280, 0x433d67c0036cdc6 +281, 0x48083afa0ae20b1b +282, 0x2d80beecd64ac4e8 +283, 0x2a753c27c3a3ee3e +284, 0xb2c5e6afd1fe051a +285, 0xea677930cd66c46b +286, 0x4c3960932f92810a +287, 0xf1b367a9e527eaba +288, 0xb7d92a8a9a69a98e +289, 0x9f9ad3210bd6b453 +290, 0x817f2889db2dcbd8 +291, 0x4270a665ac15813c +292, 0x90b85353bd2be4dd +293, 0x10c0460f7b2d68d +294, 0x11cef32b94f947f5 +295, 0x3cf29ed8e7d477e8 +296, 0x793aaa9bd50599ef +297, 0xbac15d1190014aad +298, 0x987944ae80b5cb13 +299, 0x460aa51f8d57c484 +300, 0xc77df0385f97c2d3 +301, 0x92e743b7293a3822 +302, 0xbc3458bcfbcbb8c0 +303, 0xe277bcf3d04b4ed7 +304, 0xa537ae5cf1c9a31c +305, 0x95eb00d30bd8cfb2 +306, 0x6376361c24e4f2dd +307, 0x374477fe87b9ea8e +308, 0x8210f1a9a039902e +309, 0xe7628f7031321f68 +310, 0x8b8e9c0888fc1d3d +311, 0x306be461fdc9e0ed +312, 0x510009372f9b56f5 +313, 0xa6e6fa486b7a027a +314, 0x9d3f002025203b5a +315, 0x7a46e0e81ecbef86 +316, 0x41e280c611d04df0 +317, 0xedcec10418a99e8a +318, 0x5c27b6327e0b9dbd +319, 0xa81ed2035b509f07 +320, 0x3581e855983a4cc4 +321, 0x4744594b25e9809d +322, 0xc737ac7c27fbd0ed +323, 0x1b523a307045433a +324, 0x8b4ce9171076f1d9 +325, 0x2db02d817cd5eec0 +326, 0x24a1f1229af50288 +327, 0x5550c0dcf583ff16 +328, 0x3587baaa122ec422 +329, 0xf9d3dc894229e510 +330, 0xf3100430d5cf8e87 +331, 0xc31af79862f8e2fb +332, 0xd20582063b9f3537 +333, 0xac5e90ac95fcc7ad +334, 0x107c4c704d5109d4 +335, 0xebc8628906dbfd70 +336, 0x215242776da8c531 +337, 0xa98002f1dcf08b51 +338, 0xbc3bdc07f3b09718 +339, 0x238677062495b512 +340, 0x53b4796f2a3c49e8 +341, 0x6424286467e22f0e +342, 0x14d0952a11a71bac +343, 0x2f97098149b82514 +344, 0x3777f2fdc425ad2 +345, 0xa32f2382938876d4 +346, 0xda8a39a021f20ae3 +347, 0x364361ef0a6ac32c +348, 0x4413eede008ff05a +349, 0x8dda8ace851aa327 +350, 0x4303cabbdcecd1ee +351, 0x2e69f06d74aa549f +352, 0x4797079cd4d9275c +353, 0xc7b1890917e98307 +354, 0x34031b0e822a4b4c +355, 0xfc79f76b566303ea +356, 0x77014adbe255a930 +357, 0xab6c43dd162f3be5 +358, 0xa430041f3463f6b9 +359, 0x5c191a32ada3f84a +360, 0xe8674a0781645a31 +361, 0x3a11cb667b8d0916 +362, 0xaedc73e80c39fd8a +363, 0xfde12c1b42328765 +364, 0x97abb7dcccdc1a0b +365, 0x52475c14d2167bc8 +366, 0x540e8811196d5aff +367, 0xa867e4ccdb2b4b77 +368, 0x2be04af61e5bcfb9 +369, 0x81b645102bfc5dfd +370, 0x96a52c9a66c6450f +371, 0x632ec2d136889234 +372, 0x4ed530c0b36a6c25 +373, 0x6f4851225546b75 +374, 0x2c065d6ba46a1144 +375, 0xf8a3613ff416551d +376, 0xb5f0fd60e9c971a9 +377, 0x339011a03bb4be65 +378, 0x9439f72b6995ded6 +379, 0xc1b03f3ef3b2292d +380, 0xad12fd221daab3ae +381, 0xf615b770f2cf996f +382, 0x269d0fdcb764172 +383, 0x67837025e8039256 +384, 0x6402831fc823fafa +385, 0x22854146a4abb964 +386, 0x7b5ad9b5a1bad7a8 +387, 0x67170e7beb6ac935 +388, 0xfc2d1e8e24adfaaa +389, 0x7ded4395345ff40d +390, 0x418981760a80dd07 +391, 0xc03bef38022c1d2 +392, 0x3a11850b26eade29 +393, 0xaa56d02c7175c5f4 +394, 0xd83b7917b9bfbff5 +395, 0x3c1df2f8fa6fced3 +396, 0xf3d6e2999c0bb760 +397, 0xc66d683a59a950e3 +398, 0x8e3972a9d73ffabf +399, 0x97720a0443edffd9 +400, 0xa85f5d2fe198444a +401, 0xfc5f0458e1b0de5e +402, 0xe3973f03df632b87 +403, 0xe151073c84c594b3 +404, 0x68eb4e22e7ff8ecf +405, 0x274f36eaed7cae27 +406, 0x3b87b1eb60896b13 +407, 0xbe0b2f831442d70a +408, 0x2782ed7a48a1b328 +409, 0xb3619d890310f704 +410, 0xb03926b11b55921a +411, 0xdb46fc44aa6a0ce4 +412, 0x4b063e2ef2e9453a +413, 0xe1584f1aeec60fb5 +414, 0x7092bd6a879c5a49 +415, 0xb84e1e7c7d52b0e6 +416, 0x29d09ca48db64dfb +417, 0x8f6c4a402066e905 +418, 0x77390795eabc36b +419, 0xcc2dc2e4141cc69f +420, 0x2727f83beb9e3c7c +421, 0x1b29868619331de0 +422, 0xd38c571e192c246f +423, 0x535327479fe37b6f +424, 0xaff9ce5758617eb3 +425, 0x5658539e9288a4e4 +426, 0x8df91d87126c4c6d +427, 0xe931cf8fdba6e255 +428, 0x815dfdf25fbee9e8 +429, 0x5c61f4c7cba91697 +430, 0xdd5f5512fe2313a1 +431, 0x499dd918a92a53cd +432, 0xa7e969d007c97dfd +433, 0xb8d39c6fc81ac0bb +434, 0x1d646983def5746c +435, 0x44d4b3b17432a60c +436, 0x65664232a14db1e3 +437, 0xda8fae6433e7500b +438, 0xbe51b94ff2a3fe94 +439, 0xe9b1bd9a9098ef9f +440, 0xfe47d54176297ef5 +441, 0xb8ab99bc03bb7135 +442, 0xcfad97f608565b38 +443, 0xf05da71f6760d9c1 +444, 0xef8da40a7c70e7b +445, 0xe0465d58dbd5d138 +446, 0xb54a2d70eb1a938 +447, 0xfdd50c905958f2d8 +448, 0x3c41933c90a57d43 +449, 0x678f6d894c6ad0bb +450, 0x403e8f4582274e8 +451, 0x5cbbe975668df6b0 +452, 0x297e6520a7902f03 +453, 0x8f6dded33cd1efd7 +454, 0x8e903c97be8d783b +455, 0x10bd015577e30f77 +456, 0x3fcd69d1c36eab0c +457, 0xb45989f3ca198d3 +458, 0x507655ce02b491a9 +459, 0xa92cf99bb78602ce +460, 0xebfb82055fbc2f0f +461, 0x3334256279289b7a +462, 0xc19d2a0f740ee0ac +463, 0x8bb070dea3934905 +464, 0xa4ab57d3a8d1b3eb +465, 0xfee1b09bcacf7ff4 +466, 0xccc7fb41ceec41fa +467, 0xd4da49094eb5a74d +468, 0xed5c693770af02ed +469, 0x369dabc9bbfaa8e4 +470, 0x7eab9f360d054199 +471, 0xe36dbebf5ee94076 +472, 0xd30840e499b23d7 +473, 0x8678e6cb545015ff +474, 0x3a47932ca0b336e +475, 0xeb7c742b6e93d6fe +476, 0x1404ea51fe5a62a9 +477, 0xa72cd49db978e288 +478, 0xfd7bada020173dcf +479, 0xc9e74fc7abe50054 +480, 0x93197847bb66808d +481, 0x25fd5f053dce5698 +482, 0xe198a9b18cc21f4 +483, 0x5cc27b1689452d5d +484, 0x8b3657af955a98dc +485, 0xc17f7584f54aa1c0 +486, 0xe821b088246b1427 +487, 0x32b5a9f6b45b6fa0 +488, 0x2aef7c315c2bae0c +489, 0xe1af8129846b705a +490, 0x4123b4c091b34614 +491, 0x6999d61ec341c073 +492, 0x14b9a8fcf86831ea +493, 0xfd4cff6548f46c9f +494, 0x350c3b7e6cc8d7d6 +495, 0x202a5047fecafcd5 +496, 0xa82509fe496bb57d +497, 0x835e4b2608b575fe +498, 0xf3abe3da919f54ec +499, 0x8705a21e2c9b8796 +500, 0xfd02d1427005c314 +501, 0xa38458faa637f49b +502, 0x61622f2360e7622a +503, 0xe89335a773c2963b +504, 0x481264b659b0e0d0 +505, 0x1e82ae94ebf62f15 +506, 0x8ea7812de49209d4 +507, 0xff963d764680584 +508, 0x418a68bef717f4af +509, 0x581f0e7621a8ab91 +510, 0x840337e9a0ec4150 +511, 0x951ef61b344be505 +512, 0xc8b1b899feb61ec2 +513, 0x8b78ca13c56f6ed9 +514, 0x3d2fd793715a946f +515, 0xf1c04fabcd0f4084 +516, 0x92b602614a9a9fcc +517, 0x7991bd7a94a65be7 +518, 0x5dead10b06cad2d7 +519, 0xda7719b33f722f06 +520, 0x9d87a722b7bff71e +521, 0xb038e479071409e9 +522, 0xf4e8bbec48054775 +523, 0x4fec2cd7a28a88ea +524, 0x839e28526aad3e56 +525, 0xd37ec57852a98bf0 +526, 0xdef2cbbe00f3a02d +527, 0x1aecfe01a9e4d801 +528, 0x59018d3c8beaf067 +529, 0x892753e6ac8bf3cd +530, 0xefdd3437023d2d1c +531, 0x447bfbd148c8cb88 +532, 0x282380221bd442b8 +533, 0xfce8658d1347384a +534, 0x60b211a7ec6bfa8 +535, 0xd21729cfcc692974 +536, 0x162087ecd5038a47 +537, 0x2b17000c4bce39d2 +538, 0x3a1f75ff6adcdce0 +539, 0x721a411d312f1a2c +540, 0x9c13b6133f66934d +541, 0xaa975d14978980e5 +542, 0x9403dbd4754203fa +543, 0x588c15762fdd643 +544, 0xdd1290f8d0ada73a +545, 0xd9b77380936103f4 +546, 0xb2e2047a356eb829 +547, 0x7019e5e7f76f7a47 +548, 0x3c29a461f62b001d +549, 0xa07dc6cfab59c116 +550, 0x9b97e278433f8eb +551, 0x6affc714e7236588 +552, 0x36170aeb32911a73 +553, 0x4a665104d364a789 +554, 0x4be01464ec276c9c +555, 0x71bb10271a8b4ecf +556, 0xbf62e1d068bc018 +557, 0xc9ada5db2cbbb413 +558, 0x2bded75e726650e5 +559, 0x33d5a7af2f34385d +560, 0x8179c46661d85657 +561, 0x324ebcfd29267359 +562, 0xac4c9311dc9f9110 +563, 0xc14bb6a52f9f9c0 +564, 0xc430abe15e7fb9db +565, 0xf1cce5c14df91c38 +566, 0x651e3efa2c0750d3 +567, 0x38a33604a8be5c75 +568, 0x7aaf77fe7ff56a49 +569, 0xc0d1cc56bbf27706 +570, 0x887aa47324e156c6 +571, 0x12547c004b085e8d +572, 0xd86a8d6fbbbfd011 +573, 0x57c860188c92d7b4 +574, 0xcd5d3843d361b8ca +575, 0x8f586ef05a9cb3ef +576, 0x174456e1ba6267d5 +577, 0xf5dc302c62fe583c +578, 0xa349442fabcdb71 +579, 0xe5123c1a8b6fd08e +580, 0x80681552aa318593 +581, 0xb295396deaef1e31 +582, 0xabb626e0b900e32b +583, 0xf024db8d3f19c15e +584, 0x1d04bb9548e2fb6c +585, 0xd8ed2b2214936c2b +586, 0x618ca1e430a52bc9 +587, 0xccbca44a6088136b +588, 0xd0481855c8b9ccbe +589, 0x3c92a2fade28bdf7 +590, 0x855e9fefc38c0816 +591, 0x1269bbfe55a7b27c +592, 0x1d6c853d83726d43 +593, 0xc8655511cc7fcafc +594, 0x301503eb125a9b0e +595, 0xb3108e4532016b11 +596, 0xbb7ab6245da9cb3d +597, 0x18004c49116d85eb +598, 0x3480849c20f61129 +599, 0xe28f45157463937b +600, 0x8e85e61060f2ce1 +601, 0x1673da4ec589ba5e +602, 0x74b9a6bd1b194712 +603, 0xed39e147fa8b7601 +604, 0x28ce54019102ca77 +605, 0x42e0347f6d7a2f30 +606, 0xb6a908d1c4814731 +607, 0x16c3435e4e9a126d +608, 0x8880190514c1ad54 +609, 0xfffd86229a6f773c +610, 0x4f2420cdb0aa1a93 +611, 0xf8e1acb4120fc1fa +612, 0x63a8c553ab36a2f2 +613, 0x86b88cf3c0a6a190 +614, 0x44d8b2801622c792 +615, 0xf6eae14e93082ff1 +616, 0xd9ed4f5d1b8fac61 +617, 0x1808ce17f4e1f70 +618, 0x446e83ea336f262f +619, 0xc7c802b04c0917b7 +620, 0x626f45fd64968b73 +621, 0x9ffa540edc9b2c5c +622, 0xa96a1e219e486af8 +623, 0x2bb8963884e887a1 +624, 0xba7f68a5d029e3c4 +625, 0xefc45f44392d9ca0 +626, 0x98d77762503c5eab +627, 0xd89bcf62f2da627c +628, 0xa3cab8347f833151 +629, 0xa095b7595907d5c7 +630, 0x3b3041274286181 +631, 0xb518db8919eb71fa +632, 0x187036c14fdc9a36 +633, 0xd06e28301e696f5d +634, 0xdbc71184e0c56492 +635, 0xfe51e9cae6125bfd +636, 0x3b12d17cd014df24 +637, 0x3b95e4e2c986ac1a +638, 0x29c1cce59fb2dea2 +639, 0x58c05793182a49d6 +640, 0xc016477e330d8c00 +641, 0x79ef335133ada5d +642, 0x168e2cad941203f3 +643, 0xf99d0f219d702ef0 +644, 0x655628068f8f135b +645, 0xdcdea51910ae3f92 +646, 0x8e4505039c567892 +647, 0x91a9ec7e947c89ae +648, 0x8717172530f93949 +649, 0x1c80aba9a440171a +650, 0x9c8f83f6ebe7441e +651, 0x6c05e1efea4aa7f9 +652, 0x10af696b777c01b +653, 0x5892e9d9a92fc309 +654, 0xd2ba7da71e709432 +655, 0x46378c7c3269a466 +656, 0x942c63dfe18e772c +657, 0x6245cf02ef2476f +658, 0x6f265b2759ea2aea +659, 0x5aa757f17d17f4a6 +660, 0x1ad6a3c44fa09be6 +661, 0xe861af14e7015fb8 +662, 0x86be2e7db388c77 +663, 0x5c7bba32b519e9a0 +664, 0x3feb314850c4437b +665, 0x97955add60cfb45b +666, 0xfdb536230a540bdc +667, 0xdac9d7bf6e58512e +668, 0x4894c00e474e8120 +669, 0xa1918a37739da366 +670, 0xa8097f2096532807 +671, 0x592afe50e6c5e643 +672, 0xd69050ee6dcb33dc +673, 0xa6956b262dd3c561 +674, 0x1a55c815555e63f7 +675, 0x2ec7fd37516de2bb +676, 0x8ec251d9c70e76ba +677, 0x9b76e4abafd2689 +678, 0x9ce3f5c751a57df1 +679, 0x915c4818bf287bc7 +680, 0x2293a0d1fe07c735 +681, 0x7627dcd5d5a66d3d +682, 0xb5e4f92cc49c7138 +683, 0x6fc51298731d268c +684, 0xd19800aa95441f87 +685, 0x14f70f31162fa115 +686, 0x41a3da3752936f59 +687, 0xbec0652be95652ee +688, 0x7aa4bdb1020a290f +689, 0x4382d0d9bee899ef +690, 0xe6d988ae4277d6ff +691, 0xe618088ccb2a32d1 +692, 0x411669dfaa899e90 +693, 0x234e2bf4ba76d9f +694, 0xe109fe4cb7828687 +695, 0x1fb96b5022b0b360 +696, 0x6b24ad76c061a716 +697, 0x7e1781d4d7ecee15 +698, 0xf20c2dbe82ba38ba +699, 0xeda8e8ae1d943655 +700, 0xa58d196e2a77eaec +701, 0x44564765a5995a0b +702, 0x11902fe871ecae21 +703, 0x2ea60279900e675d +704, 0x38427227c18a9a96 +705, 0xe0af01490a1b1b48 +706, 0x826f91997e057824 +707, 0x1e57308e6e50451 +708, 0xb42d469bbbfdc350 +709, 0xb9734cff1109c49b +710, 0x98967559bb9d364f +711, 0xd6be360041907c12 +712, 0xa86a1279122a1e21 +713, 0x26f99a8527bfc698 +714, 0xfa8b85758f28f5d6 +715, 0xe3057429940806ae +716, 0x4bee2d7e84f93b2b +717, 0x948350a76ea506f4 +718, 0xa139154488045e74 +719, 0x8893579ba5e78085 +720, 0x5f21c215c6a9e397 +721, 0x456134f3a59641dc +722, 0x92c0273f8e97a9c6 +723, 0xd2936c9c3f0c6936 +724, 0xcfa4221e752c4735 +725, 0x28cd5a7457355dca +726, 0xecdfdde23d90999f +727, 0x60631b2d494d032b +728, 0xf67289df269a827f +729, 0xcbe8011ef0f5b7ef +730, 0x20eea973c70a84f5 +731, 0xbe1fd200398557ce +732, 0xd2279ee030191bba +733, 0xf2bd4291dedaf819 +734, 0xfc6d167dbe8c402 +735, 0x39ac298da5d0044b +736, 0xceac026f5f561ce +737, 0x10a5b0bdd8ad60e6 +738, 0xdeb3c626df6d4bcb +739, 0x3c128962e77ff6ca +740, 0xc786262e9c67a0e5 +741, 0x4332855b3febcdc0 +742, 0x7bda9724d1c0e020 +743, 0x6a8c93399bc4df22 +744, 0xa9b20100ac707396 +745, 0xa11a3458502c4eb5 +746, 0xb185461c60478941 +747, 0x13131d56195b7ff6 +748, 0x8d55875ddbd4aa1c +749, 0xc09b67425f469aa5 +750, 0x39e33786cc7594c4 +751, 0x75e96db8e4b08b93 +752, 0xda01cd12a3275d1e +753, 0x2c49e7822344fab5 +754, 0x9bd5f10612514ca7 +755, 0x1c801a5c828e7332 +756, 0x29797d3f4f6c7b4c +757, 0xac992715e21e4e53 +758, 0xe40e89ee887ddb37 +759, 0x15189a2b265a783b +760, 0xa854159a52af5c5 +761, 0xb9d8a5a81c12bead +762, 0x3240cdc9d59e2a58 +763, 0x1d0b872234cf8e23 +764, 0xc01224cf6ce12cff +765, 0x2601e9f3905c8663 +766, 0xd4ecf9890168d6b4 +767, 0xa45db796d89bfdd5 +768, 0x9f389406dad64ab4 +769, 0xa5a851adce43ffe3 +770, 0xd0962c41c26e5aa9 +771, 0x8a671679e48510a4 +772, 0xc196dc0924a6bfeb +773, 0x3ead661043b549cb +774, 0x51af4ca737d405ac +775, 0xf4425b5c62275fb6 +776, 0x71e69d1f818c10f5 +777, 0xacaf4af2d3c70162 +778, 0x2e1f1d4fd7524244 +779, 0xe54fdd8f388890e8 +780, 0xfda0d33e84eb2b83 +781, 0x53965c5e392b81da +782, 0x5c92288267263097 +783, 0xcac1b431c878c66c +784, 0x36c0e1cf417241c6 +785, 0x5cc4d9cd1a36bf2c +786, 0x32e4257bb5d3e470 +787, 0x4aecff904adb44fb +788, 0x4d91a8e0d1d60cac +789, 0xa3b478388385b038 +790, 0x48d955f24eba70be +791, 0x310e4deb07f24f68 +792, 0x8853e73b1f30a5a +793, 0x278aee45c2a65c5 +794, 0xf6932eedbd62fb0b +795, 0xafb95958c82fafad +796, 0x78e807c18616c16c +797, 0xd7abadda7488ed9f +798, 0x2dd72e2572aa2ae6 +799, 0x6ec3791982c2be09 +800, 0x6865bb314fac478f +801, 0xa14dc0ce09000d1a +802, 0xb8081ad134da10f2 +803, 0xc4ac1534aa825ef5 +804, 0xd83aeb48ae2d538f +805, 0x38052027e3074be4 +806, 0xa9833e06ef136582 +807, 0x4f02d790ec9fd78 +808, 0xec2f60bc711c5bdc +809, 0x9253b0d12268e561 +810, 0xa8ac607fdd62c206 +811, 0x895e28ebc920289f +812, 0xe2fd42b154243ac7 +813, 0xc69cac2f776eee19 +814, 0xf4d4ac11db56d0dc +815, 0xa8d37049b9f39833 +816, 0x75abbf8a196c337c +817, 0xb115bb76750d27b8 +818, 0x39426d187839154 +819, 0xd488423e7f38bf83 +820, 0xbb92e0c76ecb6a62 +821, 0x3055a018ce39f4e3 +822, 0xc93fe0e907729bfb +823, 0x65985d17c5863340 +824, 0x2088ae081b2028e1 +825, 0x6e628de873314057 +826, 0x864377cccf573f0e +827, 0xae03f4c9aa63d132 +828, 0xb1db766d6404c66d +829, 0xdce5a22414a374b +830, 0x622155b777819997 +831, 0x69fe96e620371f3c +832, 0xa9c67dbc326d94fc +833, 0x932a84ae5dd43bab +834, 0xe2301a20f6c48c3f +835, 0x795d2e79c6477300 +836, 0xd8e3e631289521e7 +837, 0xae2684979002dfd6 +838, 0xc9c2392377550f89 +839, 0xa1b0c99d508ef7ec +840, 0x593aef3c5a5272ec +841, 0xe32e511a4b7162cd +842, 0xab3b81655f5a2857 +843, 0x1b535e1a0aaf053e +844, 0x5b33f56c1b6a07e2 +845, 0x782dc8cfcac4ef36 +846, 0xb3d4f256eecfd202 +847, 0xf73a6598f58c4f7e +848, 0xd5722189524870ae +849, 0x707878de6b995fc0 +850, 0xc3eb6ba73e3d7e8a +851, 0xca75c017655b75a7 +852, 0x1b29369ea3541e5f +853, 0x352e98858bdb58a3 +854, 0x1e4412d184b6b27d +855, 0x2d375ba0304b2d17 +856, 0x56c30fce69a5d08e +857, 0x6b8c2b0c06584bda +858, 0xde4dfff228c8c91f +859, 0xb7c9edd574e6287f +860, 0xf6078281c9fca2b2 +861, 0xb9b9a51de02a2f1e +862, 0xa411bef31c0103b0 +863, 0xc5facd8fc5e1d7a3 +864, 0x54e631c05ddf7359 +865, 0x815b42b3fd06c474 +866, 0xc9ac07566fda18ec +867, 0xd84ea62957bd8e15 +868, 0x5575f74b5cfd8803 +869, 0x5779a8d460c2e304 +870, 0xfd6e87e264a85587 +871, 0xa1d674daa320b26d +872, 0x2c3c3ec64b35afc4 +873, 0x393a274ff03e6935 +874, 0x1f40ecbac52c50ea +875, 0xc3de64fa324ffc0c +876, 0x56ae828b7f9deb04 +877, 0xe7c1a77b5c1f2cb3 +878, 0xa4c4aab19ea921cc +879, 0xec164c238825822c +880, 0xa6a3304770c03b03 +881, 0x3a63641d5b1e8123 +882, 0x42677be3a54617ef +883, 0xa2680423e3a200c0 +884, 0x8b17cf75f3f37277 +885, 0xe7ce65a49242be3d +886, 0x7f85934271323e4b +887, 0xcfb0f431f79a4fab +888, 0x392e4041a8505b65 +889, 0xd3e5daf0d8b25ea6 +890, 0x9447eff675d80f53 +891, 0xea27a9d53cfaeea8 +892, 0xe3f2335945a83ba +893, 0x8875a43ce216413b +894, 0xe49941f9eabce33e +895, 0x9357c1296683a5b1 +896, 0xf0f16439e81ee701 +897, 0x3181515295ffd79a +898, 0x9d7150fffd169ed8 +899, 0x2d6a1d281e255a72 +900, 0x81bf1286fb3a92b6 +901, 0x566d3079b499e279 +902, 0xc7939ca8f047341 +903, 0xb1f8050e7c2d59f6 +904, 0x605701045e7be192 +905, 0x51b73360e8e31a1c +906, 0x9f4ad54483ba9fe0 +907, 0xd3085b8fcf69d1c8 +908, 0xc3e7475026dc5f0b +909, 0x5800f8554b157354 +910, 0x37dfdf858cfcd963 +911, 0x3a1fce05ce385072 +912, 0xf495c062645c20c3 +913, 0xdcbeec2c3492c773 +914, 0xc38f427589d1d0b4 +915, 0x681ead60216a8184 +916, 0x4bd569c40cc88c41 +917, 0x49b0d442e130b7a2 +918, 0xee349156b7d1fa3f +919, 0x2bde2d2db055135b +920, 0xc6a460d2fbcb2378 +921, 0xd0f170494ff3dbb +922, 0xb294422492528a23 +923, 0xfc95873c854e7b86 +924, 0x6c9c3ad1797bb19c +925, 0xe0c06f2aab65062d +926, 0x58e32ce0f11e3a81 +927, 0xa745fcd729ff5036 +928, 0x599b249b2fc2cdb2 +929, 0x78f23b5b0dd5b082 +930, 0x6de3e957f549ecfc +931, 0x9d0712fa6d878756 +932, 0x9076e8554e4a413a +933, 0xf3185818c0294de8 +934, 0x5de7cdf4b455b9b6 +935, 0xb15f6908ed703f7d +936, 0x98c654dfedc6818 +937, 0x120502ab0e93ae42 +938, 0x67966a98a58dc120 +939, 0x1caa0fc628989482 +940, 0xd8b2c3cd480a8625 +941, 0x85c70071b3aed671 +942, 0xff385f8473714662 +943, 0xe2868e4bf3773b63 +944, 0x96cf8019b279298e +945, 0x8511cc930bd74800 +946, 0x5312e48fdd55f5ab +947, 0xfcdae564b52df78d +948, 0x9eee48373e652176 +949, 0x953788f6bcbc56b0 +950, 0xd1a3855dbd2f6b37 +951, 0x3ad32acf77f4d1e9 +952, 0x917c7be81b003e30 +953, 0x9ce817da1e2e9dfb +954, 0x2968983db162d44d +955, 0x1e005decef5828ad +956, 0xc38fe59d1aa4f3d5 +957, 0xf357f1710dc02f1d +958, 0x2613912a4c83ec67 +959, 0x832a11470b9a17cb +960, 0x5e85508a611f0dad +961, 0x2781131677f59d56 +962, 0xa82358d7d4b0237f +963, 0xfbf8b3cc030c3af6 +964, 0x68b2f68ac8a55adb +965, 0x3b6fcf353add0ada +966, 0xd1956049bcd15bd5 +967, 0x95b76f31c7f98b6d +968, 0x814b6690df971a84 +969, 0xdcf7959cddd819e4 +970, 0xcf8c72c5d804fc88 +971, 0x56883769c8945a22 +972, 0x1f034652f658cf46 +973, 0x41df1324cda235a1 +974, 0xeccd32524504a054 +975, 0x974e0910a04ec02c +976, 0x72104507b821f6db +977, 0x791f8d089f273044 +978, 0xe0f79a4f567f73c3 +979, 0x52fe5bea3997f024 +980, 0x5f8b9b446494f78 +981, 0xfd9f511947059190 +982, 0x3aea9dac6063bce3 +983, 0xbfdae4dfc24aee60 +984, 0xa82cdbbf0a280318 +985, 0xf460aae18d70aa9d +986, 0x997367cb204a57c4 +987, 0x616e21ab95ba05ef +988, 0x9bfc93bec116769f +989, 0x2b2ee27c37a3fa5b +990, 0xb25c6ed54006ee38 +991, 0xab04d4a5c69e69a5 +992, 0x6d2f6b45f2d8438f +993, 0x4ad2f32afc82f092 +994, 0x513d718908f709c0 +995, 0x5272aadc4fffca51 +996, 0xeb3f87e66156ef5d +997, 0xf8a3d5a46a86ba85 +998, 0xdb4548a86f27abfd +999, 0x57c05f47ff62380d diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/data/sfc64-testset-1.csv b/.local/lib/python3.8/site-packages/numpy/random/tests/data/sfc64-testset-1.csv new file mode 100644 index 0000000..4fffe69 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/data/sfc64-testset-1.csv @@ -0,0 +1,1001 @@ +seed, 0xdeadbeaf +0, 0xa475f55fbb6bc638 +1, 0xb2d594b6c29d971c +2, 0x275bc4ece4484fb1 +3, 0x569be72d9b3492fb +4, 0x89a5bb9b206a670c +5, 0xd951bfa06afdc3f9 +6, 0x7ee2e1029d52a265 +7, 0x12ef1d4de0cb4d4c +8, 0x41658ba8f0ef0280 +9, 0x5b650c82e4fe09c5 +10, 0x638a9f3e30ec4e94 +11, 0x147487fb2ba9233e +12, 0x89ef035603d2d1fb +13, 0xe66ca57a190e6cbe +14, 0x330f673740dd61fc +15, 0xc71d3dce2f8bb34e +16, 0x3c07c39ff150b185 +17, 0x5df952b6cae8f099 +18, 0x9f09f2b1f0ceac80 +19, 0x19598eee2d0c4c67 +20, 0x64e06483702e0ebd +21, 0xda04d1fdb545f7fa +22, 0xf2cf53b61a0c4f9b +23, 0xf0bb724ce196f66e +24, 0x71cefde55d9cf0f +25, 0x6323f62824a20048 +26, 0x1e93604680f14b4e +27, 0xd9d8fad1d4654025 +28, 0xf4ee25af2e76ca08 +29, 0x6af3325896befa98 +30, 0xad9e43abf5e04053 +31, 0xbf930e318ce09de3 +32, 0x61f9583b4f9ffe76 +33, 0x9b69d0b3d5ec8958 +34, 0xa608f250f9b2ca41 +35, 0x6fdba7073dc2bb5d +36, 0xa9d57601efea6d26 +37, 0xc24a88a994954105 +38, 0xc728b1f78d88fe5b +39, 0x88da88c2b083b3b2 +40, 0xa9e27f7303c76cfd +41, 0xc4c24608c29176eb +42, 0x5420b58466b972fd +43, 0xd2018a661b6756c8 +44, 0x7caed83d9573fc7 +45, 0x562a3d81b849a06a +46, 0x16588af120c21f2c +47, 0x658109a7e0eb4837 +48, 0x877aabb14d3822e1 +49, 0x95704c342c3745fe +50, 0xeeb8a0dc81603616 +51, 0x431bf94889290419 +52, 0xe4a9410ab92a5863 +53, 0xbc6be64ea60f12ba +54, 0x328a2da920015063 +55, 0x40f6b3bf8271ae07 +56, 0x4068ff00a0e854f8 +57, 0x1b287572ca13fa78 +58, 0xa11624a600490b99 +59, 0x4a04ef29eb7150fa +60, 0xcc9469ab5ffb739 +61, 0x99a6a9f8d95e782 +62, 0x8e90356573e7a070 +63, 0xa740b8fb415c81c4 +64, 0x47eccef67447f3da +65, 0x2c720afe3a62a49b +66, 0xe2a747f0a43eacf4 +67, 0xba063a87ab165576 +68, 0xbc1c78ed27feb5a3 +69, 0x285a19fa3974f9d +70, 0x489c61e704f5f0e3 +71, 0xf5ab04f6b03f238b +72, 0x7e25f88138a110dd +73, 0xc3d1cef3d7c1f1d1 +74, 0xc3de6ec64d0d8e00 +75, 0x73682a15b6cc5088 +76, 0x6fecbeb319163dc5 +77, 0x7e100d5defe570a1 +78, 0xad2af9af076dce57 +79, 0x3c65100e23cd3a9a +80, 0x4b442cc6cfe521bb +81, 0xe89dc50f8ab1ef75 +82, 0x8b3c6fdc2496566 +83, 0xdfc50042bc2c308c +84, 0xe39c5f158b33d2b2 +85, 0x92f6adefdfeb0ac +86, 0xdf5808a949c85b3e +87, 0x437384021c9dace9 +88, 0xa7b5ed0d3d67d8f +89, 0xe1408f8b21da3c34 +90, 0xa1bba125c1e80522 +91, 0x7611dc4710385264 +92, 0xb00a46ea84082917 +93, 0x51bf8002ffa87cef +94, 0x9bb81013e9810adc +95, 0xd28f6600013541cd +96, 0xc2ca3b1fa7791c1f +97, 0x47f9ad58f099c82c +98, 0x4d1bb9458469caf9 +99, 0xca0b165b2844257 +100, 0xc3b2e667d075dc66 +101, 0xde22f71136a3dbb1 +102, 0x23b4e3b6f219e4c3 +103, 0x327e0db4c9782f66 +104, 0x9365506a6c7a1807 +105, 0x3e868382dedd3be7 +106, 0xff04fa6534bcaa99 +107, 0x96621a8862995305 +108, 0x81bf39cb5f8e1df7 +109, 0x79b684bb8c37af7a +110, 0xae3bc073c3cde33c +111, 0x7805674112c899ac +112, 0xd95a27995abb20f2 +113, 0x71a503c57b105c40 +114, 0x5ff00d6a73ec8acc +115, 0x12f96391d91e47c2 +116, 0xd55ca097b3bd4947 +117, 0x794d79d20468b04 +118, 0x35d814efb0d7a07d +119, 0xfa9ac9bd0aae76d3 +120, 0xa77b8a3711e175cd +121, 0xe6694fbf421f9489 +122, 0xd8f1756525a1a0aa +123, 0xe38dfa8426277433 +124, 0x16b640c269bbcd44 +125, 0x2a7a5a67ca24cfeb +126, 0x669039c28d5344b4 +127, 0x2a445ee81fd596bb +128, 0x600df94cf25607e0 +129, 0x9358561a7579abff +130, 0xee1d52ea179fc274 +131, 0x21a8b325e89d31be +132, 0x36fc0917486eec0a +133, 0x3d99f40717a6be9f +134, 0x39ac140051ca55ff +135, 0xcef7447c26711575 +136, 0xf22666870eff441d +137, 0x4a53c6134e1c7268 +138, 0xd26de518ad6bdb1b +139, 0x1a736bf75b8b0e55 +140, 0xef1523f4e6bd0219 +141, 0xb287b32fd615ad92 +142, 0x2583d6af5e841dd5 +143, 0x4b9294aae7ca670c +144, 0xf5aa4a84174f3ca9 +145, 0x886300f9e0dc6376 +146, 0x3611401e475ef130 +147, 0x69b56432b367e1ac +148, 0x30c330e9ab36b7c4 +149, 0x1e0e73079a85b8d5 +150, 0x40fdfc7a5bfaecf +151, 0xd7760f3e8e75a085 +152, 0x1cc1891f7f625313 +153, 0xeece1fe6165b4272 +154, 0xe61111b0c166a3c1 +155, 0x2f1201563312f185 +156, 0xfd10e8ecdd2a57cb +157, 0x51cdc8c9dd3a89bf +158, 0xed13cc93938b5496 +159, 0x843816129750526b +160, 0xd09995cd6819ada +161, 0x4601e778d40607df +162, 0xef9df06bd66c2ea0 +163, 0xae0bdecd3db65d69 +164, 0xbb921a3c65a4ae9a +165, 0xd66698ce8e9361be +166, 0xacdc91647b6068f4 +167, 0xe505ef68f2a5c1c0 +168, 0xd6e62fd27c6ab137 +169, 0x6a2ba2c6a4641d86 +170, 0x9c89143715c3b81 +171, 0xe408c4e00362601a +172, 0x986155cbf5d4bd9d +173, 0xb9e6831728c893a7 +174, 0xb985497c3bf88d8c +175, 0xd0d729214b727bec +176, 0x4e557f75fece38a +177, 0x6572067fdfd623ca +178, 0x178d49bb4d5cd794 +179, 0xe6baf59f60445d82 +180, 0x5607d53518e3a8d2 +181, 0xba7931adb6ebbd61 +182, 0xe853576172611329 +183, 0xe945daff96000c44 +184, 0x565b9ba3d952a176 +185, 0xcdb54d4f88c584c8 +186, 0x482a7499bee9b5e5 +187, 0x76560dd0affe825b +188, 0x2a56221faa5ca22c +189, 0x7729be5b361f5a25 +190, 0xd6f2195795764876 +191, 0x59ef7f8f423f18c5 +192, 0x7ebefed6d02adde1 +193, 0xcfec7265329c73e5 +194, 0x4fd8606a5e59881c +195, 0x95860982ae370b73 +196, 0xdecfa33b1f902acc +197, 0xf9b8a57400b7c0a6 +198, 0xd20b822672ec857b +199, 0x4eb81084096c7364 +200, 0xe535c29a44d9b6ad +201, 0xdef8b48ebacb2e29 +202, 0x1063bc2b8ba0e915 +203, 0xe4e837fb53d76d02 +204, 0x4df935db53579fb8 +205, 0xa30a0c8053869a89 +206, 0xe891ee58a388a7b5 +207, 0x17931a0c64b8a985 +208, 0xaf2d350b494ce1b3 +209, 0x2ab9345ffbcfed82 +210, 0x7de3fe628a2592f0 +211, 0x85cf54fab8b7e79d +212, 0x42d221520edab71b +213, 0x17b695b3af36c233 +214, 0xa4ffe50fe53eb485 +215, 0x1102d242db800e4d +216, 0xc8dc01f0233b3b6 +217, 0x984a030321053d36 +218, 0x27fa8dc7b7112c0e +219, 0xba634dd8294e177f +220, 0xe67ce34b36332eb +221, 0x8f1351e1894fb41a +222, 0xb522a3048761fd30 +223, 0xc350ad9bc6729edc +224, 0xe0ed105bd3c805e1 +225, 0xa14043d2b0825aa7 +226, 0xee7779ce7fc11fdf +227, 0xc0fa8ba23a60ab25 +228, 0xb596d1ce259afbad +229, 0xaa9b8445537fdf62 +230, 0x770ab2c700762e13 +231, 0xe812f1183e40cc1 +232, 0x44bc898e57aefbbd +233, 0xdd8a871df785c996 +234, 0x88836a5e371eb36b +235, 0xb6081c9152623f27 +236, 0x895acbcd6528ca96 +237, 0xfb67e33ddfbed435 +238, 0xaf7af47d323ce26 +239, 0xe354a510c3c39b2d +240, 0x5cacdedda0672ba3 +241, 0xa440d9a2c6c22b09 +242, 0x6395099f48d64304 +243, 0xc11cf04c75f655b5 +244, 0x1c4e054d144ddb30 +245, 0x3e0c2db89d336636 +246, 0x127ecf18a5b0b9a7 +247, 0x3b50551a88ea7a73 +248, 0xbd27003e47f1f684 +249, 0xf32d657782baac9b +250, 0x727f5cabf020bc9 +251, 0x39c1c1c226197dc7 +252, 0x5552c87b35deeb69 +253, 0x64d54067b5ce493f +254, 0x3494b091fe28dda0 +255, 0xdf0278bc85ee2965 +256, 0xdef16fec25efbd66 +257, 0xe2be09f578c4ce28 +258, 0xd27a9271979d3019 +259, 0x427f6fcd71845e3 +260, 0x26b52c5f81ec142b +261, 0x98267efc3986ad46 +262, 0x7bf4165ddb7e4374 +263, 0xd05f7996d7941010 +264, 0x3b3991de97b45f14 +265, 0x9068217fb4f27a30 +266, 0xd8fe295160afc7f3 +267, 0x8a159fab4c3bc06f +268, 0x57855506d19080b6 +269, 0x7636df6b3f2367a4 +270, 0x2844ee3abd1d5ec9 +271, 0xe5788de061f51c16 +272, 0x69e78cc9132a164 +273, 0xacd53cde6d8cd421 +274, 0xb23f3100068e91da +275, 0x4140070a47f53891 +276, 0xe4a422225a96e53a +277, 0xb82a8925a272a2ac +278, 0x7c2f9573590fe3b7 +279, 0xbaf80764db170575 +280, 0x955abffa54358368 +281, 0x355ce7460614a869 +282, 0x3700ede779a4afbf +283, 0x10a6ec01d92d68cd +284, 0x3308f5a0a4c0afef +285, 0x97b892d7601136c9 +286, 0x4955c3b941b8552e +287, 0xca85aa67e941961d +288, 0xb1859ae5db28e9d2 +289, 0x305d072ac1521fbd +290, 0xed52a868996085bb +291, 0x723bfa6a76358852 +292, 0x78d946ecd97c5fb3 +293, 0x39205b30a8e23e79 +294, 0xb927e3d086baadbe +295, 0xa18d6946136e1ff5 +296, 0xdab6f0b51c1eb5ff +297, 0xf0a640bf7a1af60c +298, 0xf0e81db09004d0d4 +299, 0xfe76cebdbe5a4dde +300, 0x2dafe9cc3decc376 +301, 0x4c871fdf1af34205 +302, 0xe79617d0c8fa893b +303, 0xee658aaad3a141f7 +304, 0xfd91aa74863e19f1 +305, 0x841b8f55c103cc22 +306, 0x22766ed65444ad5d +307, 0x56d03d1beca6c17a +308, 0x5fd4c112c92036ae +309, 0x75466ae58a5616dc +310, 0xfbf98b1081e802a9 +311, 0xdc325e957bf6d8f5 +312, 0xb08da7015ebd19b7 +313, 0xf25a9c0944f0c073 +314, 0xf4625bafa0ced718 +315, 0x4349c9e093a9e692 +316, 0x75a9ccd4dd8935cb +317, 0x7e6cf9e539361e91 +318, 0x20fdd22fb6edd475 +319, 0x5973021b57c2311f +320, 0x75392403667edc15 +321, 0xed9b2156ea70d9f1 +322, 0xf40c114db50b64a0 +323, 0xe26bb2c9eef20c62 +324, 0x409c1e3037869f03 +325, 0xcdfd71fdda3b7f91 +326, 0xa0dfae46816777d6 +327, 0xde060a8f61a8deb8 +328, 0x890e082a8b0ca4fc +329, 0xb9f2958eddf2d0db +330, 0xd17c148020d20e30 +331, 0xffdc9cc176fe7201 +332, 0xffb83d925b764c1 +333, 0x817ea639e313da8d +334, 0xa4dd335dd891ca91 +335, 0x1342d25a5e81f488 +336, 0xfa7eb9c3cf466b03 +337, 0xfe0a423d44b185d0 +338, 0x101cfd430ab96049 +339, 0x7b5d3eda9c4504b +340, 0xe20ccc006e0193f1 +341, 0xf54ccddedebc5df0 +342, 0xc0edd142bd58f1db +343, 0x3831f40d378d2430 +344, 0x80132353f0a88289 +345, 0x688f23c419d03ef8 +346, 0x4c6837e697884066 +347, 0x699387bb2e9a3a8f +348, 0x8996f860342448d8 +349, 0xb0f80dff99bfa5cc +350, 0x3e927a7f9ea12c8e +351, 0xd7e498d1e5f9dff3 +352, 0x78ecb97bb3f864cc +353, 0x3c4ffd069a014d38 +354, 0xf8d5073a1e09b4d4 +355, 0x8717e854f9faef23 +356, 0xfbcc5478d8d0ad7 +357, 0xd3cd8b233ca274ff +358, 0x8bd8f11f79beb265 +359, 0xf64498a832d8fd0e +360, 0xb01bba75112131ec +361, 0x55572445a7869781 +362, 0x7b56622f18cb3d7a +363, 0x7f192c9e075bdb83 +364, 0xd9a112f836b83ff3 +365, 0x68673b37269653dc +366, 0xe46a9433fb6a0879 +367, 0x127d756ca4779001 +368, 0xc1378e8b1e8eab94 +369, 0x1006edb0f51d078c +370, 0xc6dd53961232d926 +371, 0x9a4aeef44038256d +372, 0xd357f4fa652d4f5f +373, 0x59f3d2cc3378598 +374, 0xe76e6207a824a7fc +375, 0x5fc5e33712ceffef +376, 0x77d24aeb0ccb1adc +377, 0x5be4b2826805659e +378, 0x257c69d787e64634 +379, 0x58dd52ca6bc727b1 +380, 0x3ab997767235ea33 +381, 0x986a2a7a966fad14 +382, 0xc900f8b27761dcc4 +383, 0x44991bdb13795700 +384, 0xe5c145a4fe733b2 +385, 0x56f041b56bffe0d3 +386, 0x5779c4fef8067996 +387, 0xa0fe8748e829532d +388, 0x840c1277d78d9dd4 +389, 0x37ebcb315432acbc +390, 0xf4bc8738433ba3be +391, 0x8b122993f2e10062 +392, 0xe1fe8481f2681ed5 +393, 0x8e23f1630d9f494a +394, 0xda24661a01b7d0b3 +395, 0x7a02942a179cee36 +396, 0xf1e08a3c09b71ac +397, 0x3dec2cc7ee0bd8fd +398, 0x1f3e480113d805d4 +399, 0xc061b973ad4e3f2c +400, 0x6bea750f17a66836 +401, 0xbc2add72eac84c25 +402, 0xcff058d3f97934ca +403, 0x54ccc30987778ec2 +404, 0x93449ec1e1469558 +405, 0xe2ff369eb0c6836 +406, 0x41c2df2d63bf8e55 +407, 0xf9302629b6c71be2 +408, 0xdd30376b8e5ab29a +409, 0x12db9e04f911d754 +410, 0x8d03d6cd359f1b97 +411, 0xe15956511abf1cee +412, 0x9b68e10e2c2fd940 +413, 0x2e28de6491c1ce53 +414, 0x52b329b72d0c109d +415, 0xc2c0b115f9da2a60 +416, 0x6ca084105271bbff +417, 0x49b92b8676058c1e +418, 0x767fc92a70f7e5a3 +419, 0x87ba4ed4b65a6aa0 +420, 0xf70b052e0a3975e9 +421, 0x3e925c3306db9eec +422, 0x43253f1d96ac9513 +423, 0xe3e04f1a1ea454c4 +424, 0x763e3f4cc81ba0c8 +425, 0x2a2721ac69265705 +426, 0xdf3b0ac6416ea214 +427, 0xa6a6b57450f3e000 +428, 0xc3d3b1ac7dbfe6ac +429, 0xb66e5e6f7d2e4ec0 +430, 0x43c65296f98f0f04 +431, 0xdb0f6e3ff974d842 +432, 0x3d6b48e02ebb203b +433, 0xd74674ebf09d8f27 +434, 0xbe65243c58fc1200 +435, 0x55eb210a68d42625 +436, 0x87badab097dbe883 +437, 0xada3fda85a53824f +438, 0xef2791e8f48cd37a +439, 0x3fe7fceb927a641a +440, 0xd3bffd3ff031ac78 +441, 0xb94efe03da4d18fb +442, 0x162a0ad8da65ea68 +443, 0x300f234ef5b7e4a6 +444, 0xa2a8b4c77024e4fb +445, 0x5950f095ddd7b109 +446, 0xded66dd2b1bb02ba +447, 0x8ec24b7fa509bcb6 +448, 0x9bede53d924bdad6 +449, 0xa9c3f46423be1930 +450, 0x6dfc90597f8de8b4 +451, 0xb7419ebc65b434f0 +452, 0xa6596949238f58b9 +453, 0x966cbade640829b8 +454, 0x58c74877bdcbf65e +455, 0xaa103b8f89b0c453 +456, 0x219f0a86e41179a4 +457, 0x90f534fc06ddc57f +458, 0x8db7cdd644f1affa +459, 0x38f91de0167127ac +460, 0xdcd2a65e4df43daa +461, 0x3e04f34a7e01f834 +462, 0x5b237eea68007768 +463, 0x7ff4d2b015921768 +464, 0xf786b286549d3d51 +465, 0xaefa053fc2c3884c +466, 0x8e6a8ff381515d36 +467, 0x35b94f3d0a1fce3c +468, 0x165266d19e9abb64 +469, 0x1deb5caa5f9d8076 +470, 0x13ab91290c7cfe9d +471, 0x3651ca9856be3e05 +472, 0xe7b705f6e9cccc19 +473, 0xd6e7f79668c127ed +474, 0xa9faf37154896f92 +475, 0x89fbf190603e0ab1 +476, 0xb34d155a86f942d0 +477, 0xb2d4400a78bfdd76 +478, 0x7c0946aca8cfb3f0 +479, 0x7492771591c9d0e8 +480, 0xd084d95c5ca2eb28 +481, 0xb18d12bd3a6023e +482, 0xea217ed7b864d80b +483, 0xe52f69a755dd5c6f +484, 0x127133993d81c4aa +485, 0xe07188fcf1670bfb +486, 0x178fbfe668e4661d +487, 0x1c9ee14bb0cda154 +488, 0x8d043b96b6668f98 +489, 0xbc858986ec96ca2b +490, 0x7660f779d528b6b7 +491, 0xd448c6a1f74ae1d3 +492, 0x178e122cfc2a6862 +493, 0x236f000abaf2d23b +494, 0x171b27f3f0921915 +495, 0x4c3ff07652f50a70 +496, 0x18663e5e7d3a66ca +497, 0xb38c97946c750cc9 +498, 0xc5031aae6f78f909 +499, 0x4d1514e2925e95c1 +500, 0x4c2184a741dabfbb +501, 0xfd410364edf77182 +502, 0xc228157f863ee873 +503, 0x9856fdc735cc09fc +504, 0x660496cd1e41d60e +505, 0x2edf1d7e01954c32 +506, 0xd32e94639bdd98cf +507, 0x8e153f48709a77d +508, 0x89357f332d2d6561 +509, 0x1840d512c97085e6 +510, 0x2f18d035c9e26a85 +511, 0x77b88b1448b26d5b +512, 0xc1ca6ef4cdae0799 +513, 0xcc203f9e4508165f +514, 0xeaf762fbc9e0cbbe +515, 0xc070c687f3c4a290 +516, 0xd49ed321068d5c15 +517, 0x84a55eec17ee64ee +518, 0x4d8ee685298a8871 +519, 0x9ff5f17d7e029793 +520, 0x791d7d0d62e46302 +521, 0xab218b9114e22bc6 +522, 0x4902b7ab3f7119a7 +523, 0x694930f2e29b049e +524, 0x1a3c90650848999f +525, 0x79f1b9d8499c932b +526, 0xfacb6d3d55e3c92f +527, 0x8fd8b4f25a5da9f5 +528, 0xd037dcc3a7e62ae7 +529, 0xfecf57300d8f84f4 +530, 0x32079b1e1dc12d48 +531, 0xe5f8f1e62b288f54 +532, 0x97feba3a9c108894 +533, 0xd279a51e1899a9a0 +534, 0xd68eea8e8e363fa8 +535, 0x7394cf2deeca9386 +536, 0x5f70b0c80f1dbf10 +537, 0x8d646916ed40462 +538, 0xd253bb1c8a12bbb6 +539, 0x38f399a821fbd73e +540, 0x947523a26333ac90 +541, 0xb52e90affbc52a37 +542, 0xcf899cd964654da4 +543, 0xdf66ae9cca8d99e7 +544, 0x6051478e57c21b6a +545, 0xffa7dc975af3c1da +546, 0x195c7bff2d1a8f5 +547, 0x64f12b6575cf984d +548, 0x536034cb842cf9e1 +549, 0x180f247ce5bbfad +550, 0x8ced45081b134867 +551, 0x532bbfdf426710f3 +552, 0x4747933e74c4f54d +553, 0x197a890dc4793401 +554, 0x76c7cc2bd42fae2 +555, 0xdabfd67f69675dd0 +556, 0x85c690a68cdb3197 +557, 0xe482cec89ce8f92 +558, 0x20bc9fb7797011b1 +559, 0x76dc85a2185782ad +560, 0x3df37c164422117a +561, 0x99211f5d231e0ab0 +562, 0xef7fd794a0a91f4 +563, 0x419577151915f5fe +564, 0x3ce14a0a7135dae3 +565, 0x389b57598a075d6a +566, 0x8cc2a9d51b5af9aa +567, 0xe80a9beffbd13f13 +568, 0x65e96b22ea8a54d8 +569, 0x79f38c4164138ede +570, 0xd1955846cba03d81 +571, 0x60359fe58e4f26d6 +572, 0x4ea724f585f8d13e +573, 0x316dfdbadc801a3c +574, 0x20aa29b7c6dd66fe +575, 0x65eaf83a6a008caa +576, 0x407000aff1b9e8cb +577, 0xb4d49bfb2b268c40 +578, 0xd4e6fe8a7a0f14a9 +579, 0xe34afef924e8f58e +580, 0xe377b0c891844824 +581, 0x29c2e20c112d30c8 +582, 0x906aad1fe0c18a95 +583, 0x308385f0efbb6474 +584, 0xf23900481bf70445 +585, 0xfdfe3ade7f937a55 +586, 0xf37aae71c33c4f97 +587, 0x1c81e3775a8bed85 +588, 0x7eb5013882ce35ea +589, 0x37a1c1692495818d +590, 0x3f90ae118622a0ba +591, 0x58e4fe6fea29b037 +592, 0xd10ff1d269808825 +593, 0xbce30edb60c21bba +594, 0x123732329afd6fee +595, 0x429b4059f797d840 +596, 0x421166568a8c4be1 +597, 0x88f895c424c1bd7f +598, 0x2adaf7a7b9f781cb +599, 0xa425644b26cb698 +600, 0x8cc44d2486cc5743 +601, 0xdb9f357a33abf6ba +602, 0x1a57c4ea77a4d70c +603, 0x1dea29be75239e44 +604, 0x463141a137121a06 +605, 0x8fecfbbe0b8a9517 +606, 0x92c83984b3566123 +607, 0x3b1c69180ed28665 +608, 0x14a6073425ea8717 +609, 0x71f4c2b3283238d7 +610, 0xb3d491e3152f19f +611, 0x3a0ba3a11ebac5d2 +612, 0xddb4d1dd4c0f54ac +613, 0xdb8f36fe02414035 +614, 0x1cf5df5031b1902c +615, 0x23a20ed12ef95870 +616, 0xf113e573b2dedcbb +617, 0x308e2395cde0a9fa +618, 0xd377a22581c3a7da +619, 0xe0ced97a947a66fb +620, 0xe44f4de9cd754b00 +621, 0x2344943337d9d1bf +622, 0x4b5ae5e2ea6e749c +623, 0x9b8d2e3ef41d1c01 +624, 0x59a5a53ebbd24c6b +625, 0x4f7611bf9e8a06fb +626, 0xea38c7b61361cd06 +627, 0xf125a2bfdd2c0c7 +628, 0x2df8dcb5926b9ebb +629, 0x233e18720cc56988 +630, 0x974c61379b4aa95e +631, 0xc7fe24c1c868910b +632, 0x818fd1affc82a842 +633, 0xcee92a952a26d38e +634, 0x8962f575ebcbf43 +635, 0x7770687e3678c460 +636, 0xdfb1db4ed1298117 +637, 0xb9db54cb03d434d3 +638, 0x34aebbf2244257ad +639, 0xd836db0cb210c490 +640, 0x935daed7138957cd +641, 0x3cd914b14e7948fd +642, 0xd0472e9ed0a0f7f0 +643, 0xa9df33dca697f75e +644, 0x15e9ea259398721a +645, 0x23eeba0f970abd60 +646, 0x2217fdf8bbe99a12 +647, 0x5ea490a95717b198 +648, 0xf4e2bfc28280b639 +649, 0x9d19916072d6f05c +650, 0x5e0387cab1734c6a +651, 0x93c2c8ac26e5f01e +652, 0xb0d934354d957eb1 +653, 0xee5099a1eef3188c +654, 0x8be0abca8edc1115 +655, 0x989a60845dbf5aa3 +656, 0x181c7ed964eee892 +657, 0x49838ea07481288d +658, 0x17dbc75d66116b2e +659, 0xa4cafb7a87c0117e +660, 0xab2d0ae44cdc2e6e +661, 0xdf802f2457e7da6 +662, 0x4b966c4b9187e124 +663, 0x62de9db6f4811e1a +664, 0x1e20485968bc62 +665, 0xe9ac288265caca94 +666, 0xc5c694d349aa8c1a +667, 0x3d67f2083d9bdf10 +668, 0x9a2468e503085486 +669, 0x9d6acd3dc152d1a3 +670, 0xca951e2aeee8df77 +671, 0x2707371af9cdd7b0 +672, 0x2347ae6a4eb5ecbd +673, 0x16abe5582cb426f +674, 0x523af4ff980bbccb +675, 0xb07a0f043e3694aa +676, 0x14d7c3da81b2de7 +677, 0xf471f1b8ac22305b +678, 0xdb087ffff9e18520 +679, 0x1a352db3574359e8 +680, 0x48d5431502cc7476 +681, 0x7c9b7e7003dfd1bf +682, 0x4f43a48aae987169 +683, 0x9a5d3eb66dedb3e9 +684, 0xa7b331af76a9f817 +685, 0xba440154b118ab2d +686, 0x64d22344ce24c9c6 +687, 0xa22377bd52bd043 +688, 0x9dfa1bb18ca6c5f7 +689, 0xdccf44a92f644c8b +690, 0xf623d0a49fd18145 +691, 0x556d5c37978e28b3 +692, 0xad96e32ce9d2bb8b +693, 0x2e479c120be52798 +694, 0x7501cf871af7b2f7 +695, 0xd02536a5d026a5b8 +696, 0x4b37ff53e76ab5a4 +697, 0xdb3a4039caaeab13 +698, 0x6cbd65e3b700c7be +699, 0x7367abd98761a147 +700, 0xf4f9ba216a35aa77 +701, 0xf88ca25ce921eb86 +702, 0xb211de082ec2cbf2 +703, 0xdd94aa46ec57e12e +704, 0xa967d74ad8210240 +705, 0xdaa1fada8cfa887 +706, 0x85901d081c4488ee +707, 0xcf67f79a699ef06 +708, 0x7f2f1f0de921ee14 +709, 0x28bc61e9d3f2328b +710, 0x3332f2963faf18e5 +711, 0x4167ac71fcf43a6 +712, 0x843c1746b0160b74 +713, 0xd9be80070c578a5e +714, 0xbd7250c9af1473e7 +715, 0x43f78afaa3647899 +716, 0x91c6b5dd715a75a5 +717, 0x29cc66c8a07bfef3 +718, 0x3f5c667311dc22be +719, 0x4f49cd47958260cd +720, 0xbef8be43d920b64e +721, 0x7a892a5f13061d8b +722, 0x9532f40125c819b1 +723, 0x924fca3045f8a564 +724, 0x9b2c6442453b0c20 +725, 0x7e21009085b8e793 +726, 0x9b98c17e17af59d2 +727, 0xba61acb73e3ae89a +728, 0xb9d61a710555c138 +729, 0xc2a425d80978974b +730, 0xa275e13592da7d67 +731, 0xe962103202d9ad0f +732, 0xbdf8367a4d6f33fd +733, 0xe59beb2f8648bdc8 +734, 0xb4c387d8fbc4ac1c +735, 0x5e3f276b63054b75 +736, 0xf27e616aa54d8464 +737, 0x3f271661d1cd7426 +738, 0x43a69dbee7502c78 +739, 0x8066fcea6df059a1 +740, 0x3c10f19409bdc993 +741, 0x6ba6f43fb21f23e0 +742, 0x9e182d70a5bccf09 +743, 0x1520783d2a63a199 +744, 0xba1dcc0c70b9cace +745, 0x1009e1e9b1032d8 +746, 0xf632f6a95fb0315 +747, 0x48e711c7114cbfff +748, 0xef281dcec67debf7 +749, 0x33789894d6abf59b +750, 0x6c8e541fffbe7f9c +751, 0x85417f13b08e0a88 +752, 0x9a581e36d589608f +753, 0x461dca50b1befd35 +754, 0x5a3231680dde6462 +755, 0xcc57acf729780b97 +756, 0x50301efef62e1054 +757, 0x675d042cd4f6bbc9 +758, 0x1652fdd3794384c9 +759, 0x1c93bbeeb763cd4d +760, 0x44b7240c4b105242 +761, 0x4c6af2a1b606ccfb +762, 0x18fc43ece2ec1a40 +763, 0x859a5511aeae8acb +764, 0x2f56826f1996ad2f +765, 0xa8e95ce8bb363bdf +766, 0xf4da396054e50e4b +767, 0x5493865e9895883c +768, 0x768e4c8b332ac0e3 +769, 0x32195d2aa583fca5 +770, 0xf2f353f21266bc15 +771, 0x43cddf1d021307d +772, 0x6031e3aa30300e4a +773, 0x4f1298469ac6088f +774, 0x4b4d450bafac574e +775, 0x23e1cf9c0582a22b +776, 0x2e9036980db49cd0 +777, 0xe4e228b113c411b2 +778, 0x8bddcdb82b51706 +779, 0xd2a7ea8288593629 +780, 0x67fe90e98fdda61 +781, 0x7b63494dba95717b +782, 0x105625904510d782 +783, 0xdf4aa2242454e50a +784, 0x32541d6cd7d6c7e3 +785, 0x5661fb432591cf3b +786, 0xce920a5ed047bce7 +787, 0xed4178a3c96eea8f +788, 0xe378cd996e39863b +789, 0x169e1fdc8e2b05e1 +790, 0xaee1812ef7149a96 +791, 0x648571c7453d12c5 +792, 0xb7b6bc9328573c43 +793, 0xe7fb969078e270d7 +794, 0xdfc2b1b8985f6e6f +795, 0x862b6527ee39a1aa +796, 0x1ee329aea91d7882 +797, 0x20d25324f2fe704 +798, 0xbfcc47401fc3bbfd +799, 0x1515cdc8d48b2904 +800, 0xbd6eefe86284261c +801, 0x9b1f28e3b35f22ee +802, 0x842a29d35e5aecda +803, 0xf2346109ad370765 +804, 0x24d68add5a71afd9 +805, 0x4a691421613d91e2 +806, 0x60e3058b3c244051 +807, 0x79194905cdaa5de8 +808, 0xe0e2df35c01e8987 +809, 0xe29b78beffbb5e4a +810, 0xcdcdbc020218c19e +811, 0x5ae0af8c16feae43 +812, 0x8109292feeaf14fa +813, 0x34113f7508dfa521 +814, 0xc062ac163f56730a +815, 0xf1660e66ec6d4c4c +816, 0x5966c55f60151c80 +817, 0x3865ae8ec934b17 +818, 0x472a7314afb055ec +819, 0x7a24277309a44a44 +820, 0x556e02dd35d38baa +821, 0x9849611a1bc96ec1 +822, 0xd176f5d5a8eb0843 +823, 0x44db12ec60510030 +824, 0x272e3a06a0030078 +825, 0x7c4764dbefc075ea +826, 0x910712f3735c1183 +827, 0xd49a2da74ae7aff6 +828, 0xcf9b3e6e8f776d71 +829, 0x27789fe3ec481a02 +830, 0x86659f82c6b5912b +831, 0xe044b3dbf339158c +832, 0x99d81f6bb62a37b0 +833, 0x5f5830c246fada9a +834, 0xe68abab1eeb432cb +835, 0x49c5c5ace04e104 +836, 0x1ac3871b3fc6771b +837, 0x773b39f32d070652 +838, 0x9c4138c2ae58b1f3 +839, 0xac41c63d7452ac60 +840, 0x9248826b245359e1 +841, 0x99bba1c7a64f1670 +842, 0xe0dc99ff4ebb92f2 +843, 0x113638652740f87c +844, 0xebf51e94da88cfc +845, 0x5441c344b81b2585 +846, 0xe1e69e0bc2de652a +847, 0xe9ab6d64ae42ed1e +848, 0x879af8730e305f31 +849, 0x36b9ad912c7e00d6 +850, 0x83ef5e9fca853886 +851, 0xda54d48bb20ea974 +852, 0x32c6d93aefa92aa2 +853, 0x4e887b2c3391847d +854, 0x50966e815f42b1b8 +855, 0x53411ac087832837 +856, 0x46f64fef79df4f29 +857, 0xb34aae3924cd272c +858, 0xf5ad455869a0adbe +859, 0x8351ded7144edac8 +860, 0xeb558af089677494 +861, 0x36ed71d69293a8d6 +862, 0x659f90bf5431b254 +863, 0x53349102b7519949 +864, 0x3db83e20b1713610 +865, 0x6d63f96090556254 +866, 0x4cc0467e8f45c645 +867, 0xb8840c4bd5cd4091 +868, 0xbd381463cc93d584 +869, 0x203410d878c2066d +870, 0x2ebea06213cf71c8 +871, 0x598e8fb75e3fceb4 +872, 0xdcca41ceba0fce02 +873, 0x61bf69212b56aae5 +874, 0x97eed7f70c9114fa +875, 0xf46f37a8b7a063f9 +876, 0x66c8f4ffe5bd6efa +877, 0xe43fd6efda2d4e32 +878, 0x12d6c799e5ad01de +879, 0x9ac83e7f8b709360 +880, 0xbbb7bb3c1957513d +881, 0x7f87c08d4b3796b0 +882, 0x9a7d1d74b6aa4a5c +883, 0xa4314530ff741b6f +884, 0x99a80c6b6f15fca8 +885, 0xd2fec81d6d5fc3ce +886, 0x15a98be1cc40cea +887, 0x98693eb7719366f3 +888, 0x36ccdc2a9e9d4de8 +889, 0x3c8208f63d77df25 +890, 0xca2e376e2343df6 +891, 0xcc9b17cbb54420c6 +892, 0x8724c44a64d7dcb8 +893, 0x9d00c6949ff33869 +894, 0xf4f8e584d2699372 +895, 0x88f4748cdd5a2d53 +896, 0xe215072a1205bc6d +897, 0x190934fe6d740442 +898, 0x7fac5c0ab2af106d +899, 0x1b86633a0bd84fa1 +900, 0x1293e54318492dfb +901, 0x433324fd390f34b9 +902, 0x4c5eb2c67a44643b +903, 0x59a6e281c388b0dd +904, 0xe78e03f9c44623b7 +905, 0x91307a93c768fc3d +906, 0xde8867b004d8e3ff +907, 0xdf52c3f57b7c5862 +908, 0x993f3e1d10358a92 +909, 0x9ccb10bc3e18662d +910, 0x45093ce48a114c73 +911, 0xd59d05979d26330a +912, 0x417c0e03300119a9 +913, 0x1c336500f90cde81 +914, 0x1c8ccd29ead9b85b +915, 0xb76baf3e55d4d950 +916, 0x133ad6196c75fd7e +917, 0x34200b0cde7ed560 +918, 0x9c7c3dacb213c8d9 +919, 0xd97563c4fd9bf1b6 +920, 0x5d910e871835b6cb +921, 0x7d46c4733a16bdf9 +922, 0xe41d73194ddc87b2 +923, 0x7d3d8a0855a465a9 +924, 0x70c2a8b5d3f90c0f +925, 0x9e7565ca5dccfe12 +926, 0x2c0acb4577aa51b1 +927, 0x3d2cd211145b79c7 +928, 0x15a7b17aa6da7732 +929, 0xab44a3730c27d780 +930, 0xf008bd6c802bde3a +931, 0x82ed86ddf3619f77 +932, 0xaabe982ab15c49f9 +933, 0x9bcad8fa6d8e58a4 +934, 0x8f39ed8243718aa1 +935, 0xe9489340e03e3cb6 +936, 0xc722314f5eefb8d0 +937, 0x870e8869a436df59 +938, 0x4dae75b8087a8204 +939, 0xe1d790f6ec6e425b +940, 0xafd39ea1b1d0ed09 +941, 0xdf2c99e464ddf08f +942, 0x74936d859ab9644d +943, 0x3871302164250e73 +944, 0x764b68921e911886 +945, 0x2a1d024b26bb9d66 +946, 0x797fba43918e75b4 +947, 0x62ec6d24ccca335b +948, 0xf4bd8b951762b520 +949, 0x9d450dede9119397 +950, 0x5393a26d10f8c124 +951, 0x6b74769392896b57 +952, 0x7f61dbcc0e328581 +953, 0x64e1df3884d0d94 +954, 0xba77dcdf23738c37 +955, 0xf8e288bc0a177475 +956, 0x4a8abfd1702ecb7d +957, 0x53f22886694736a7 +958, 0x8fc982597ced3e3 +959, 0x1bc46090f820fff7 +960, 0x8bd31f965d02229f +961, 0x65cd0cb29996ee53 +962, 0x702e0f4fcf8c2e9f +963, 0x293b77bff307a9a0 +964, 0x125a986b8b305788 +965, 0x416b0eea428ebf3c +966, 0xeac85421ab0e8469 +967, 0x7f5496095019aa68 +968, 0x1a96d7afbc708e0 +969, 0xb91262e6766e01e1 +970, 0xd0a549cc4ccc6954 +971, 0x75a9a073f50c8a0d +972, 0xae275d2c1c6cd23c +973, 0xcf159b5ec5d28fd4 +974, 0x75d0838ce9b92b +975, 0xd4eddcee6dc4677f +976, 0x6a0a8ad5df6b75b8 +977, 0x6f3fd0ef0f13ecc4 +978, 0xb75a5826c1a8f8a8 +979, 0xd47098bbc7943766 +980, 0x3d4ddd62d5f23dd1 +981, 0x760a904e4583841c +982, 0x2afeb5022b4cf1f +983, 0x66d5f653729f0a13 +984, 0x9a6a5ab62980d30f +985, 0xc332f5643bbf8d5b +986, 0x848fb702e4056a90 +987, 0xa057beaf3f9e8c5f +988, 0x6cc603e4560a6c6a +989, 0xec761811a7b23211 +990, 0xb14aa4090a82aaa5 +991, 0xe29d9d028a5b2dbb +992, 0x5564e53738d68f97 +993, 0xfabca36542eaaf3b +994, 0xb9912fcb782020a2 +995, 0xe865e01b349284fd +996, 0x540b5ff11c5f9274 +997, 0x3463f64e1e7451dc +998, 0xe15d3e2f33b735f8 +999, 0xf5433336eadef6e diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/data/sfc64-testset-2.csv b/.local/lib/python3.8/site-packages/numpy/random/tests/data/sfc64-testset-2.csv new file mode 100644 index 0000000..70aebd5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/data/sfc64-testset-2.csv @@ -0,0 +1,1001 @@ +seed, 0x0 +0, 0x91959e5fb96a6332 +1, 0x3c1dd8a25a7e9f21 +2, 0x657bdffc99798d9e +3, 0x1a04de320b19e022 +4, 0x65b92af0e5f3c61c +5, 0x9c84070ce8f743c0 +6, 0xbb10e573693cdb25 +7, 0xd65ea9e76b37fb6b +8, 0x503efd0e76c8ae66 +9, 0xd711dcd04c26d0f +10, 0x12f53f435814ac8c +11, 0xb392cd402cfc82bd +12, 0x461764550e06c889 +13, 0x716a48b3514e6979 +14, 0xdd0a322213c18ad7 +15, 0x6673a8ca0a05c4d7 +16, 0x2992ef333437f844 +17, 0xc4aaf7e8240b2aad +18, 0x6ab0a1af1f41474f +19, 0xb0bae400c226941d +20, 0xe5f80c2eeeab48c6 +21, 0x3832c6a93a4024bf +22, 0x280bd824fabe8368 +23, 0x66b626228321e5ff +24, 0xe0bdfba5325a307e +25, 0x3a5f65c6ef254e05 +26, 0x99ea12503cb02f94 +27, 0x5d01fd2db77d420b +28, 0x6959bf5f36b2368d +29, 0xd856e30c62b5f5be +30, 0xe33233e1d8140e66 +31, 0xb78be619d415fa8d +32, 0x4f943bb2cc63d3b +33, 0x9b1460b290952d81 +34, 0x19205d794826740e +35, 0x64617bd9d7a6a1ff +36, 0x30442124b55ea76a +37, 0xebbbc3b29d0333fc +38, 0x39235a0fe359751c +39, 0xf9629768891121aa +40, 0x32052f53f366e05a +41, 0x60cc5b412c925bc8 +42, 0xf8b7ecda1c0e5a9 +43, 0x195f036e170a2568 +44, 0xfe06d0381a9ca782 +45, 0x919d89e8b88eebbf +46, 0xa47fb30148cf0d43 +47, 0x5c983e99d5f9fd56 +48, 0xe7492cdb6a1d42cd +49, 0xf9cfe5c865b0cfd8 +50, 0x35b653367bbc3b99 +51, 0xb1d92f6f4d4e440b +52, 0x737e1d5bd87ed9c0 +53, 0x7a880ca1498f8e17 +54, 0x687dae8494f9a3f7 +55, 0x6bae1989f441d5d7 +56, 0x71ad3fa5a9195c2e +57, 0x16b3969779f5d03 +58, 0xd1bce2ac973f15b3 +59, 0xa114b1ee2ce0dcdd +60, 0x270d75c11eb1b8d5 +61, 0xc48ffa087c0a7bc +62, 0xaaf9dc48cda9848d +63, 0x8111cf10ef6e584d +64, 0x6736df6af40ee6f4 +65, 0x1a1a111682fbf98d +66, 0xeb217658e1cb3b5d +67, 0xcaf58a8b79de9dec +68, 0x25d0ffd63c88d7a1 +69, 0x4c498cd871b7f176 +70, 0x4069a6156eb0cf3c +71, 0xdf012f12edcdd867 +72, 0x7734c0ac8edb1689 +73, 0xed6960ac53dbc245 +74, 0x305e20da8868c661 +75, 0x5f0c7a3719956f95 +76, 0x66842bbe3b28895 +77, 0xb608bc9a31eac410 +78, 0xfcb17d5529503abd +79, 0x829ae5cbc29b92ee +80, 0x17f2f0027bc24f3a +81, 0x435926c33d8f44cc +82, 0x3ab899327098dbec +83, 0xaf78573b27f8ead8 +84, 0xa8b334fabcf8dc60 +85, 0xcdf3b366a6a303db +86, 0x8da9379dd62b34c8 +87, 0xb0ba511955f264a7 +88, 0x9d72e21a644f961d +89, 0xfac28382e2e7e710 +90, 0xd457065f048410aa +91, 0x1cae57d952563969 +92, 0x5a160a6223253e03 +93, 0x2c45df736d73c8bd +94, 0x7f651ebc6ad9cec5 +95, 0x77a6be96c7d2e7e7 +96, 0x1721fb1dbfd6546a +97, 0xf73f433ecff3c997 +98, 0xed1e80f680965bfe +99, 0x6705ad67a3003b30 +100, 0xac21134efcadb9f7 +101, 0x4d2ba0a91d456ac +102, 0x59da7b59434eb52b +103, 0x26c1d070fd414b5f +104, 0xed7079ddfce83d9a +105, 0x9277d21f88e0fb7a +106, 0xfae16b9a8d53d282 +107, 0xb08a0e2e405fdf7d +108, 0x2ea20df44229d6ec +109, 0x80e4634cd3612825 +110, 0xbe62e8aeba8f8a1a +111, 0x4981209769c190fb +112, 0xcec96ef14c7e1f65 +113, 0x73fe4457b47e7b53 +114, 0x1d66300677315c31 +115, 0xe26821290498c4cc +116, 0xf6110248fd8fb1c5 +117, 0x30fd7fe32dbd8be3 +118, 0x534ec9b910a2bd72 +119, 0x8f9bfe878bbf7382 +120, 0x4f4eb5295c0c2193 +121, 0xdeb22f03a913be9e +122, 0x40f716f8e2a8886c +123, 0xc65007d0e386cdb1 +124, 0x9bdd26d92b143a14 +125, 0xf644b0b77ea44625 +126, 0x75f5a53f6b01993a +127, 0xfe803e347bf41010 +128, 0x594bff5fa17bc360 +129, 0x3551edfb450373c7 +130, 0x898f9dad433615db +131, 0x923d2406daa26d49 +132, 0x99e07faccbc33426 +133, 0x7389f9ff4470f807 +134, 0xdc2a25957c6df90b +135, 0x33c6d8965ef3053f +136, 0x51a8f07e838f1ab +137, 0x91c5db369380274f +138, 0xc37de65ac56b207e +139, 0xfcc6d2375dde7f14 +140, 0xa4e6418bff505958 +141, 0x4b8b9f78e46953c4 +142, 0x255ab2e0f93cf278 +143, 0xdf650717af3d96ef +144, 0x2caa21cba3aae2b2 +145, 0xce7e46c6f393daa4 +146, 0x1d5b3573f9997ac7 +147, 0x5280c556e850847d +148, 0x32edc31bef920ad7 +149, 0xefaa6b0b08cf2c6 +150, 0x5151c99d97b111c5 +151, 0x35ccf4bf53d17590 +152, 0xa210d7bd8697b385 +153, 0xa9419f95738fbe61 +154, 0xdeccf93a1a4fdc90 +155, 0xd0ea3365b18e7a05 +156, 0x84122df6dcd31b9a +157, 0x33040a2125cea5f5 +158, 0xfe18306a862f6d86 +159, 0xdb97c8392e5c4457 +160, 0xc3e0fa735e80e422 +161, 0x7d106ff36467a0c1 +162, 0xb9825eecc720a76d +163, 0x7fefc6f771647081 +164, 0xf5df3f5b3977bf13 +165, 0x18fb22736d36f1e0 +166, 0xadc4637b4953abfc +167, 0x174e66d3e17974bd +168, 0xf1614c51df4db5db +169, 0x6664ecde5717b293 +170, 0xd5bc5b6839265c26 +171, 0xf6ca9ce1af3f1832 +172, 0xca696789a9d506ea +173, 0x7399c246c8f9d53 +174, 0xadf49049626417e2 +175, 0xbcd84af37d09ab91 +176, 0xbb41c177f3a3fa45 +177, 0x592becc814d55302 +178, 0xa88b4e65f6cfe5f7 +179, 0xa0a55e34ff879426 +180, 0x3c2ea6aa725b42b7 +181, 0x65ac4a407b1f9521 +182, 0xde63d53f7e88b556 +183, 0x18bc76696d015f40 +184, 0xd1363f2cd4c116a8 +185, 0x2fe859be19a48e4a +186, 0x83d6099b1415e656 +187, 0x43f2cbc1a4ee6410 +188, 0xb2eca3d3421c533d +189, 0xc52b98ea3f031f5d +190, 0xfe57eb01da07e9d1 +191, 0xf9377883537a6031 +192, 0x364030c05dac7add +193, 0x6815cb06b35d4404 +194, 0xceae2d4ce31894be +195, 0xc602bcdf6062bf6a +196, 0xc8e4bd8dcc6062e3 +197, 0x9c29e87b92a1a791 +198, 0x41e626b871ca9651 +199, 0x325c3d1fb8efbcd8 +200, 0x7dbbacf8e3419fb3 +201, 0x3602e72516bb7319 +202, 0x537a008ebd94d24b +203, 0xda7714fc9d4d161d +204, 0x1c8c73700e1b621b +205, 0x2749b80937d6c939 +206, 0x76ee6abac5b14d33 +207, 0xf18d1e92cb6a8b5c +208, 0x6ce9579d9291c721 +209, 0x60523c745a40e58 +210, 0x637f837fcc901757 +211, 0x2ff71b19661dc5b3 +212, 0x393ab586326ad16f +213, 0xa0970ea30fe742b7 +214, 0x570222d7f27fe5ae +215, 0x3b5806d43fd38629 +216, 0x129a0ad7420180c5 +217, 0x1c4726355778d52c +218, 0x7c1459cf77656499 +219, 0xfe038a0932132069 +220, 0x4c4cc317a937483a +221, 0xa333d24067e926ba +222, 0x401d9b6ab37f6ef2 +223, 0x87ad0e491ebe4a2a +224, 0xfc02f312e72d121d +225, 0xfde715b3b99767b2 +226, 0xd111c342ba521c92 +227, 0x83b221b10879c617 +228, 0x6a1bf5c01fdf4277 +229, 0x166bfc0c3f5892ee +230, 0x4608d556d7c57856 +231, 0x8d786857c95ece49 +232, 0x2d357445a1aca4ac +233, 0x79620dae28ecd796 +234, 0x90e715dc0f2201c4 +235, 0x173b68b4c9f4b665 +236, 0x4e14d040ebac4eef +237, 0xbd25960b4b892e +238, 0x911a199db6f1989d +239, 0xfe822d7c601fd2e0 +240, 0x9b4c1d58d8223a69 +241, 0x907c1891283843b0 +242, 0xf4868bf54061c4b2 +243, 0x17f8cd1fc24efd85 +244, 0xd44253f9af14c3aa +245, 0x16d0da0cb911d43c +246, 0x3c6a46615828e79a +247, 0x498591c1138e11a5 +248, 0xcc0f26336d0d6141 +249, 0x4d3ebc873212309a +250, 0x16bad7792d5c2c6a +251, 0x474215a80b2bbd11 +252, 0x7159848abd8492fc +253, 0x359341c50973685f +254, 0x27512ee7bf784a4a +255, 0x45228ea080f70447 +256, 0x880cab616500d50e +257, 0x12fae93f9830d56e +258, 0x6744ee64348d9acd +259, 0x484dada28cd2a828 +260, 0x98491d0729e41863 +261, 0x2f15aac43c2863b0 +262, 0x5727a34d77a1da0f +263, 0xa435cebef6a62eed +264, 0xd211697d57b053b0 +265, 0x65aa757b68bd557 +266, 0xe3a1b7a2d8a3e06a +267, 0x2adf64e67252a7a9 +268, 0xadadcb75cadee276 +269, 0x7934bc57ac8d97bf +270, 0xccff0d0f412e0606 +271, 0x101a82aa3e8f3db9 +272, 0xb0f2498094b4575c +273, 0xba2561d9ef26ed8a +274, 0xfbcd1268fc3febe1 +275, 0x9aa10bb19eb152e0 +276, 0xf496217a601a6d72 +277, 0xe4be1e4f2fa91363 +278, 0x473a602bf3dd68eb +279, 0xfe8ed2a48c26f4b5 +280, 0x20e94b1a00159476 +281, 0x93e1cb1c6af86ec7 +282, 0x4fcba3898f7442ba +283, 0x5150c3a3d94891df +284, 0x91cfce6c85b033ea +285, 0x625e8a832a806491 +286, 0x28c97ba72e3ec0b2 +287, 0x8e172de217c71ea1 +288, 0x926b80216c732639 +289, 0x28b19431a649ae3d +290, 0x57c039a6e95a3795 +291, 0xfbc354182fe52718 +292, 0x819dfd7c7d534cef +293, 0xabb4093a619ed44f +294, 0xe785b7ac6f656745 +295, 0xb647b4588b2f942f +296, 0x64cf870a14c72d27 +297, 0x6d4a4a2a0ba9b37e +298, 0x78bfb0427d7ce6b0 +299, 0x8dcc72b8bfc79ac6 +300, 0x1c14d915d5e76c99 +301, 0xaf48ddea6f096d79 +302, 0x51b39b67aa130d8 +303, 0x1aeeb39d4def06de +304, 0xd678092ffedfdd27 +305, 0x8f54787f325111d3 +306, 0xf2ca2e827beaa6bc +307, 0x339d134099e98545 +308, 0x1f6a8a7b33942e43 +309, 0x952c8065dbef669a +310, 0xe066aeb6690147f7 +311, 0xed25aa92cf58ebb6 +312, 0x7601edce215ef521 +313, 0xed1c5b396abd9434 +314, 0x4fd1e407535de9d5 +315, 0xccc8315a0d4d1441 +316, 0x85753e250bb86976 +317, 0xf232e469378761c3 +318, 0x81d691b8e9aef3c6 +319, 0x224a2f9cab0ad0e +320, 0x978f3d3e50007f4e +321, 0xd3713e6a6c0cbe60 +322, 0xcce8f1eadd41f80d +323, 0x34bda028a97d469 +324, 0x90e242fdf0f59183 +325, 0x4d749754fbc5f092 +326, 0x4399f5b7851cc87b +327, 0xcb921a5f25f6c5d7 +328, 0x120bf5d0162101 +329, 0x1304cc2aa352735a +330, 0xf7236c5d0d5d417b +331, 0xc31b320fc1654306 +332, 0xb468c6b23f3fb4e7 +333, 0xb5985b5bfaca4166 +334, 0x898285a1cd2f8375 +335, 0xa13493da372aa7c9 +336, 0x15c80c09c12634e7 +337, 0x9b765c5cc9d438bd +338, 0xee7da816a9201dcb +339, 0x92e269f73b5a248e +340, 0xa8086c5de81400ce +341, 0xe0053901853d42be +342, 0x821df32c012f433e +343, 0x17a6d69ca37387c7 +344, 0x2b10044bfba3501f +345, 0x8dfd262afc2e8515 +346, 0xd68c2c7b60226371 +347, 0xe81ac114e4416774 +348, 0x5896d60061ebc471 +349, 0xa996e3147811dbd1 +350, 0xa819c7b80ecb3661 +351, 0x982ad71b38afbc01 +352, 0xab152b65aa17b7fe +353, 0x4582bc282ef187ef +354, 0xab5a17fe8d9bc669 +355, 0x83664fa9cb0284b7 +356, 0x234c4b0091968f52 +357, 0x8ab5f51805688d37 +358, 0xe9e11186e0c53eda +359, 0x10df37ef1de2eccf +360, 0x780f1b0d52db968f +361, 0x50bd4ff292872cd5 +362, 0x51e681c265f5ad0 +363, 0x842c49660a527566 +364, 0x6e56ee026e9eda87 +365, 0x4cf39e40d8c80393 +366, 0x13e466df371f7e1f +367, 0xf2ce1799f38e028e +368, 0x833c8db7adc6ff0e +369, 0xc6e189abc2ec98f +370, 0xafebb3721283fec5 +371, 0xb49bc1eb5cc17bdc +372, 0xf1d02e818f5e4488 +373, 0xe5e9d5b41a1dd815 +374, 0xce8aca6573b1bfe5 +375, 0x9b0a5d70e268b1d5 +376, 0xf3c0503a8358f4de +377, 0x2681605dd755669d +378, 0xea265ca7601efc70 +379, 0xa93747f0a159439f +380, 0x62a86ede78a23e50 +381, 0xac8a18935c3d063c +382, 0x729c0a298f5059f5 +383, 0xbbf195e5b54399f4 +384, 0x38aa9d551f968900 +385, 0x3b3e700c58778caa +386, 0x68e6e33c4443957a +387, 0x7c56fc13eb269815 +388, 0xaf7daca39711804a +389, 0x50fde6d10f9544b3 +390, 0xf3d37159f6f6c03d +391, 0x82d298f5c1a71685 +392, 0x478661ac54c5002c +393, 0x6053768e1a324ae0 +394, 0xde8fb4a7e56707ea +395, 0xaa2809301faa8cf4 +396, 0x690a8d49fedd0722 +397, 0xe17c481b9c217de9 +398, 0x60d1d8a2b57288e3 +399, 0x149adfaadc6b0886 +400, 0xa3c18b6eb79cd5fa +401, 0x5774e3a091af5f58 +402, 0x2acca57ff30e5712 +403, 0x94454d67367c4b0c +404, 0x581b2985ac2df5ca +405, 0x71618e50744f3e70 +406, 0x270a7f3bd9a94ae6 +407, 0x3ef81af9bb36cd7b +408, 0x8a4a2592875254aa +409, 0x704ac6086fbb414a +410, 0xda774d5d3f57414d +411, 0xe20d3358b918ae9e +412, 0x934a6b9f7b91e247 +413, 0xf91649cde87ec42c +414, 0x248cec5f9b6ced30 +415, 0x56791809fd8d64ba +416, 0xf502b2765c1395f +417, 0x6b04ec973d75aa7f +418, 0xb0339f2794bb26f +419, 0x4c524636efbaea49 +420, 0x6bbf3876e9738748 +421, 0xf686524e754e9e24 +422, 0x8dafa05a42d19cd3 +423, 0xc5f069ab2434008e +424, 0x4fd64cc713cba76 +425, 0xdbf93450c881ed5f +426, 0x492e278ebabb59a2 +427, 0x993fddfde4542642 +428, 0xecde68a72c8d4e52 +429, 0xe0760b3074c311fd +430, 0x68dc0e7e06528707 +431, 0x52b50edf49c0fdc7 +432, 0xb2bd4185c138f412 +433, 0x431496d7e1d86f3 +434, 0xa4e605b037e26c44 +435, 0x58236ae1f0aca2b5 +436, 0x26c72c420fc314d8 +437, 0x20134e982ab99a2b +438, 0x544b59b8b211374b +439, 0x1301c42f3a14d993 +440, 0x52a6ea740f763b0f +441, 0xf209d70c2bebf119 +442, 0xac66a4ebc2aa1be +443, 0x683713ed35878788 +444, 0x2b5578acec06b80c +445, 0x86428efa11c45b36 +446, 0xb49010adb17d291e +447, 0x73b686bd8664b6be +448, 0x6d28ebf57b6884cc +449, 0x9712091230ff58d9 +450, 0xc9c91f74c38b286 +451, 0x776310ac41dc008e +452, 0x2f3739df0bf6a88e +453, 0x5792dc62b94db675 +454, 0x5715910d024b06af +455, 0xeb1dd745458da08 +456, 0xfce7b07ccfa851a7 +457, 0xc305f1e983ac368 +458, 0x485aa9519ac00bb0 +459, 0xa5354f6589fb0ea0 +460, 0x32fee02dfdbf4454 +461, 0x4d1ddc304bbefaaa +462, 0x789a270a1737e57e +463, 0x9f3072f4b1ed8156 +464, 0x4de3c00e89058120 +465, 0xb00a02529e0a86fa +466, 0x539f6f0edd845d9a +467, 0x85e578fe15a8c001 +468, 0xa12c8e1a72cce7d8 +469, 0xc6908abbc2b1828 +470, 0xcf70090774cbb38c +471, 0x3b636a6977b45d4a +472, 0xf0a731b220680b57 +473, 0x18973929f51443a8 +474, 0xe93e1fbe7eadabe +475, 0x8233730f0a6dfa02 +476, 0x66e50b6919b0ab74 +477, 0xb1aba87c97fd08a2 +478, 0xd4dffc1fbc117ad6 +479, 0x6f7fa65724b96e6a +480, 0x4bd5800dee92e0fa +481, 0xe18a959db6256da +482, 0xe53a291bc66df487 +483, 0xb7ec306a08651806 +484, 0x1847a6b80d2821e1 +485, 0xda50391283b14d39 +486, 0xacc4d3cd7cceb97a +487, 0x57f70185165b7bc6 +488, 0x302b6d597c3aaba7 +489, 0xa47f32d037eab51e +490, 0xe1509b4408abc559 +491, 0x4f30a1d7c2934157 +492, 0x2ad03e6c60b650b2 +493, 0x334d9c337b0a9064 +494, 0xc7f442821e7aac12 +495, 0xbcdeb09298694cdd +496, 0xe42402389f8f0fb4 +497, 0xe5de56af539df727 +498, 0x7017f9b2101ee240 +499, 0x1ee5e68d5b10001d +500, 0x436229051836387a +501, 0xcd532d6d6ec38fb7 +502, 0x30a66606fdf38272 +503, 0xfdaa2ab9cf798496 +504, 0x4277b4adec70e7df +505, 0x72cfc30256e0eaef +506, 0x3c3359fd9bd34917 +507, 0xb7aa89598856efb0 +508, 0xf72226f8bf299ef5 +509, 0x258c499275a4356f +510, 0x999a56bfc7f20d76 +511, 0x2b3e7432e20c18b +512, 0x2d1251332f760cb5 +513, 0x7420e0eea62157c5 +514, 0xe85c895aa27cec3d +515, 0x27a0545c7020d57c +516, 0xc68638a65b4fff0d +517, 0xfda473983a4ea747 +518, 0xd19fe65fb4c06062 +519, 0x6b1374e050ee15e4 +520, 0x80065ecd49bc4bef +521, 0x4ee655954bc838de +522, 0xe8fb777504a72299 +523, 0x86b652ea70f4bdde +524, 0xcdc9e0fbde7e4f33 +525, 0x352c0a50cd3ac56 +526, 0x4b8605d368be75dc +527, 0x1ac9ea8129efbc37 +528, 0x470325faa99f39c5 +529, 0x25dd7ef9adccf7a1 +530, 0x5ae2c7a03e965816 +531, 0xf733d2df59dacc7d +532, 0xa05bbf0a8a1a7a70 +533, 0xe8aa3f102846ef5f +534, 0xc9b85ec49ae71789 +535, 0xb904c14ed1cb1936 +536, 0x5ae618230b5f0444 +537, 0x97987fe47b5d7467 +538, 0xabb3aca8865ca761 +539, 0x38bfdf29d4508228 +540, 0x353654f408353330 +541, 0xeb7e92930ae4ef0d +542, 0xec50f1a7ca526b96 +543, 0xd5e2dc08b5697544 +544, 0x24c7fd69d5ec32df +545, 0x6f7e1095568b8620 +546, 0x6ed9c16ca13b3c8 +547, 0xe676ef460002130f +548, 0xa3a01a3992c4b430 +549, 0xe2130406c3b1f202 +550, 0xa8f7263e2aedcd20 +551, 0xc45d71ef2e35f507 +552, 0x37155594021da7ba +553, 0x22dc94f19de73159 +554, 0x7969fc6bffc5443f +555, 0x97def7e44faa6bfe +556, 0x8b940f5e8931d71f +557, 0xd95b1dd3f1a3fdd5 +558, 0x1c83bfdca615701a +559, 0xb7fcb56279ceca6b +560, 0xd84f8950f20dcd0 +561, 0xb03343698de3cbe0 +562, 0xf64565d448d71f71 +563, 0xda52b4676e0ae662 +564, 0xda39c2c05b4ffb91 +565, 0xb35e2560421f6a85 +566, 0x1a7b108d48ac3646 +567, 0xc4e264dc390d79ed +568, 0xa10727dfd9813256 +569, 0x40d23154e720e4f7 +570, 0xd9fa7cd7e313e119 +571, 0xcbf29107859e6013 +572, 0xc357338553d940b7 +573, 0x2641b7ab0bdfcbaa +574, 0xd12f2b6060533ae7 +575, 0xd0435aa626411c56 +576, 0x44af4a488a9cec72 +577, 0xb934232ea8fa5696 +578, 0x760a8b12072b572d +579, 0xfab18f9942cfa9b3 +580, 0x5676834c1fe84d16 +581, 0x9c54e4fddb353236 +582, 0xab49edfc9551f293 +583, 0x567f1fb45a871d +584, 0x32a967c873998834 +585, 0x99240aad380ef8d1 +586, 0x7f66cbd432859a64 +587, 0x4cdc8a4658166822 +588, 0x984e3984a5766492 +589, 0xa3b2d0a3d64d3d94 +590, 0x177f667172f2affc +591, 0xb1a90607a73a303f +592, 0xe600b6c36427f878 +593, 0xf758f9834cb7f466 +594, 0x8ee9fce4a3f36449 +595, 0xcb8f11533e7da347 +596, 0xe7cf647794dabd7c +597, 0xc9d92cfe6110806 +598, 0xea1335fa9145a1ec +599, 0xbc6c29821d094552 +600, 0x37b9d6a858cc8bc3 +601, 0xf24e4c694929893e +602, 0x55d025ce2d7d0004 +603, 0xccdc69acccf4267b +604, 0xc491c04340c222eb +605, 0xba50f75ecec9befb +606, 0x1ec7bd85b8fe3bb9 +607, 0xe4de66498c59ae8a +608, 0x38aa9e912712c889 +609, 0xcee0e43c5cc31566 +610, 0x72b69aa708fc7ed +611, 0xdff70b7f6fa96679 +612, 0xd6d71d82112aadc3 +613, 0x365177892cb78531 +614, 0xa54852b39de4f72c +615, 0x11dd5832bf16dd59 +616, 0x248a0f3369c97097 +617, 0xa14cec0260e26792 +618, 0x3517616ff142bed1 +619, 0x9b693ad39dab7636 +620, 0x739dff825e994434 +621, 0x67711e7356098c9 +622, 0xa81f8515d2fdf458 +623, 0xdac2908113fe568e +624, 0xe99944ebc6e2806a +625, 0x671728ca5b030975 +626, 0xfdad20edb2b4a789 +627, 0xedc6e466bd0369d2 +628, 0x88b5d469821f7e1b +629, 0x2eabf94049a522a5 +630, 0x247794b7a2f5a8e3 +631, 0x278942bdbe02c649 +632, 0xbe5a9a9196ab99c1 +633, 0x75955060866da1b5 +634, 0xdedcfa149273c0b5 +635, 0xdbeb7a57758f3867 +636, 0x7b9053347a2c8d5a +637, 0xa059b3f2eed338a5 +638, 0x59401a46ded3b79f +639, 0x38044ba56a6d19fb +640, 0x72c7221b4e77e779 +641, 0x526df3491a3a34da +642, 0xc3b31184ba16c0c2 +643, 0xd94c7144488624af +644, 0xcf966ee4dc373f91 +645, 0x62049e65dd416266 +646, 0x7c2adccb925bf8f +647, 0xd5fa5c22ed4ef8e1 +648, 0xd00134ebd11f2cd1 +649, 0xfbdf81767bed3634 +650, 0x62e8cc8ff66b6e26 +651, 0x3a72d6bcd4f2dcf7 +652, 0xf1cd45b1b46a86ed +653, 0x1271f98e0938bb9a +654, 0x82e6927e83dc31fa +655, 0x7b9b0e0acb67b92d +656, 0x6df503e397b2e701 +657, 0x93888f6fb561e0c3 +658, 0x393fb6069a40291 +659, 0x967a7d894cc0754d +660, 0x6e298996ad866333 +661, 0x5ff3cf5559d6ab46 +662, 0xd0d70508c40349f5 +663, 0xc64c66c0dd426b33 +664, 0x8fea340ee35c64dd +665, 0xf9cd381eb3060005 +666, 0xfcc37c2799fc0b11 +667, 0x6a37c91d65b489fa +668, 0x57231000fa0a0c9d +669, 0x55f6e292c6703f9a +670, 0xd0508ffbfa55a7a6 +671, 0x885db543276bdac8 +672, 0xc26dbe6a26b0e704 +673, 0x21f884874ebd709e +674, 0x711f0b6c8f732220 +675, 0x354d0a361eaee195 +676, 0x721344d8d30b006a +677, 0xa0e090a0d3a56f07 +678, 0x16b3d5d823a4952b +679, 0x59d7874bc9eae7b6 +680, 0x9bbb32710076455f +681, 0xd4fb22242ffabafd +682, 0xe1d4ac6770be1d89 +683, 0xb259cedebc73dc8a +684, 0x35faaa3b4246ab69 +685, 0x5d26addefdaee89 +686, 0x8e7ec350da0f3545 +687, 0xd0f316eed9f8fc79 +688, 0x98b2a52c9bf291b2 +689, 0xe4d294a8aca6a314 +690, 0x25bd554e6aa7673c +691, 0xcfde5dcba5be2a6c +692, 0xb5e01fb48d2d2107 +693, 0xe1caf28948028536 +694, 0xd434aa0a26f3ee9b +695, 0xd17723381641b8f6 +696, 0xfe73bd1f3f3768a2 +697, 0x1cc6b1abd08d67e9 +698, 0x247e328371a28de0 +699, 0x502e7942e5a9104a +700, 0x6a030fd242eb4502 +701, 0xa2ffe02744014ce8 +702, 0x59290763b18fe04e +703, 0xcf14241564271436 +704, 0xb0fb73c3c1503aff +705, 0x94e27c622f82137a +706, 0x747a5b406ac3e1f0 +707, 0x9a914e96a732031d +708, 0x59f68c6c8f078835 +709, 0x809d012c73eb4724 +710, 0x5b3c3b73e1b37d74 +711, 0xdde60ef3ba49cdf7 +712, 0x87a14e1f9c761986 +713, 0x4109b960604522af +714, 0x122d0e1ed0eb6bb9 +715, 0xadc0d29e80bfe33 +716, 0xa25b1b44f5fc8e4e +717, 0xbab85d8a9b793f20 +718, 0x825f4cbced0e7d1e +719, 0x2d6ae8807acb37ea +720, 0x8234420adce2e39 +721, 0x4a8ad4da6b804807 +722, 0x1e19f9bc215e5245 +723, 0x1d6f4848a916dd5e +724, 0x9ac40dfcdc2d39cc +725, 0x9f3524e3086155ec +726, 0x861fffc43124b2ef +727, 0xe640e3b756396372 +728, 0x41cb0f0c5e149669 +729, 0xe0bd37e1192e4205 +730, 0x62917d3858f4ce47 +731, 0xa36e7eb4d855820a +732, 0x204b90255a3bf724 +733, 0x66ee83a0175535bc +734, 0x2c14ce7c6b0c1423 +735, 0x85d9495fa514f70d +736, 0x5a4fe45ead874dbc +737, 0xe72248dcb8cfc863 +738, 0xfc21ff2932ed98cd +739, 0xcbba1edd735b5cad +740, 0x91ddc32809679bf5 +741, 0x192cdf2c7631ea1f +742, 0xbbc451ddf2ea286f +743, 0xad9e80cae2397a64 +744, 0x6918f0119b95d0e5 +745, 0xa40379017a27d70a +746, 0x1aaeddb600e61e1 +747, 0x15afd93cbd7adda9 +748, 0x156719bc2b757ff4 +749, 0x13d9a59e2b2df49d +750, 0x9a490986eaddf0a +751, 0xef9a350f0b3eb6b4 +752, 0x5de7f6295ba4fa4d +753, 0x7f37fd087c3fdb49 +754, 0xa9fe3749d6f3f209 +755, 0x50912ac036d9bfb +756, 0x982cb4d726a441f8 +757, 0x8ca8d8af59b872d0 +758, 0x7f8adfb0ceeade8a +759, 0xdad390ec742be44 +760, 0xa637944d0045be5b +761, 0x3569a3b3af807061 +762, 0x9599da8eae14511d +763, 0xc333e8d19589b01a +764, 0xfb9b524a20b571e1 +765, 0xbd9dc8b37ce5c3e1 +766, 0x142333005fa389ac +767, 0x1368bc37cd5bcce1 +768, 0x16094907ad6ecf73 +769, 0xb32c90dbba4c1130 +770, 0x82761d97c1747dd0 +771, 0x599f9f267ae3444d +772, 0x79ad3382994852e1 +773, 0x2511f06d9ef06e54 +774, 0xb35e6ab7d5bbddae +775, 0xfca9fa83a2988732 +776, 0x7d4350f0394ac3ba +777, 0xa52a9527bb176ea3 +778, 0xb49fa0ceb2aa8353 +779, 0x1f62e504d1468cc0 +780, 0xe1a77bfccce6efc3 +781, 0x776cdff4dc0d6797 +782, 0x56612e39b652c1f2 +783, 0x5f096a29294eda04 +784, 0x7978abc3aabd8b23 +785, 0x79dd875e0485b979 +786, 0x8a98aa4d5735d778 +787, 0xcca43940f69d2388 +788, 0xb2d4b156f144f93a +789, 0xbd528a676e9a862 +790, 0x2a394939c8e7ec5e +791, 0xb1da900c6efe4abc +792, 0x9869af479de4c034 +793, 0x78dbdfb88ac7c1db +794, 0x18cb169143088041 +795, 0xe69e5461c51a3e13 +796, 0x5389fa16ea98183c +797, 0xed7c80d1be1ea520 +798, 0x87246fc359758ced +799, 0xab323eba95fae4ed +800, 0xbc4c0dde7f8a1828 +801, 0xdb739f7955610b1a +802, 0xecd8c68c3434cc +803, 0x138c2eb88c477f44 +804, 0x28a65f96727aae41 +805, 0xdee879f2cf5629d +806, 0x684f0c90ef20070f +807, 0xa24a819ef5621800 +808, 0x8d0054f870e4fdcb +809, 0x99e8c6e695b600b +810, 0x50b705245891f7c3 +811, 0xc02eed3a6e58e51a +812, 0x443d64e95443606c +813, 0xca24959cfbd2d120 +814, 0xe072609ea48815bc +815, 0xbcc715026590315b +816, 0x3e76df24d7aa5938 +817, 0xd8ff04940d9b79ae +818, 0x54474ce790059bcd +819, 0x278390dd6aa70e81 +820, 0xf4df619fe35414e4 +821, 0x757d71270264e615 +822, 0x1e8a373699c11b23 +823, 0xef68c82046e67dd6 +824, 0xe280006599972620 +825, 0x234e095183b0f4d6 +826, 0xe3b7560ed9839749 +827, 0xcd5ec4086572332e +828, 0xc41c0d4aaa279108 +829, 0x4b9cd6126bc16a6d +830, 0x4a7252734f3e3dd0 +831, 0xb3132df156cc103a +832, 0xf9e4abbf7b64464a +833, 0xf936df27fb3c47b7 +834, 0x9142960873f6d71a +835, 0x4ba6aa3235cdb10d +836, 0x3237a2e765ba7766 +837, 0xd62f0b94c8e99e54 +838, 0x26b682f90a3ae41b +839, 0x40ad5e82072b6f81 +840, 0xd0198101f5484000 +841, 0xe4fac60ba11c332 +842, 0x472d0b0a95ef9d38 +843, 0x8512557aec5a3d8f +844, 0xef83169d3efd4de9 +845, 0x53fe89283e7a7676 +846, 0x2f50933053d69fc4 +847, 0x76f5e4362e2e53a2 +848, 0x8676fdccce28874a +849, 0x2737764c1fb1f821 +850, 0x4a6f70afc066ab55 +851, 0x27f8e151e310fca4 +852, 0xd606960ccbe85161 +853, 0xcce51d7ddd270a32 +854, 0xb4235999794875c2 +855, 0x580084e358e884 +856, 0x2159d5e6dc8586d7 +857, 0x87bd54d8599b3ba4 +858, 0x3e9ade6a2181664 +859, 0x5e6e140406d97623 +860, 0x511545d5aa0080a2 +861, 0xf49d78ed219aac57 +862, 0xbece1f9c90b8ea87 +863, 0x1c741cac36a2c514 +864, 0x7453c141047db967 +865, 0xd751832a5037eba2 +866, 0x71370a3f30ada1f7 +867, 0x7c01cf2dcb408631 +868, 0x1052a4fbdccc0fa1 +869, 0x13d525c9df3fb6c +870, 0xa3aa8dbfee760c55 +871, 0xc0288d200f5155cf +872, 0x79f4bcd12af567c3 +873, 0x8160d163bb548755 +874, 0x5cf2995fb69fd2df +875, 0xcc98ed01396639df +876, 0xad95f1d9cfc8256e +877, 0xa3df27d9fbdbfb9d +878, 0x83e5f5dda4d52929 +879, 0x9adc05043009f55b +880, 0xdfe8329dfde1c001 +881, 0x9980ccdd5298e6a2 +882, 0x636a7bd134f6ef56 +883, 0xef5ff780c4be6ba4 +884, 0x290d71dc77a56d16 +885, 0x6d65db9ff58de1e6 +886, 0x944b063b3805a696 +887, 0xce468ca2cce33008 +888, 0x5ba1ccb840f80f48 +889, 0x28ddce36fc9ad268 +890, 0x4f77ef254d507a21 +891, 0xce9b4057fadf3ab +892, 0xb518bc68298730e6 +893, 0xd2eb5b8e2ec665b0 +894, 0xe1583303a4f87344 +895, 0x9d5a0df4fbe1bed5 +896, 0x2ba9bc03ec8cfd07 +897, 0x479ed880a96ca669 +898, 0xcedf96338324771a +899, 0x312f4fc2da41ffaa +900, 0xa0eb9cf23b5e1ed8 +901, 0xf8f88f975dc3f539 +902, 0x4a37e185d0e96e0f +903, 0xf829654a5c0b46f9 +904, 0x3909cca7a7f8c7fb +905, 0x4c2e1d66ceb45105 +906, 0xaffaa19e1db8af87 +907, 0x9ec498246bd18c76 +908, 0x21d51558edc089da +909, 0xe8984112cd1b1561 +910, 0x7de1d2cf54b0c0e1 +911, 0xa06729aed50bfb9d +912, 0xcf19f733e5db19e1 +913, 0x70edf2624ab777cd +914, 0x46685becad10e078 +915, 0x825e0f6add46785 +916, 0x66d4af3b15f70de4 +917, 0xc676614b0666b21 +918, 0x282a916c864f5cb7 +919, 0x2707283a3f512167 +920, 0x37ff3afda7461623 +921, 0xc767eb1205e4ca86 +922, 0x46b359aecc4ea25b +923, 0x67fbbb797a16dbb1 +924, 0x64fd4ba57122290e +925, 0x8acc2a8ae59d8fac +926, 0x64a49298599acc67 +927, 0xedf00de67177ce30 +928, 0x1ea9d8d7e76d2d2c +929, 0x363fcac323f70eb2 +930, 0x19e6e3ec8a9712eb +931, 0xca541e96b0961f09 +932, 0x4d8fd34c2822ec46 +933, 0x2fdd56a50b32f705 +934, 0xaac2fcf251e3fd3 +935, 0xb0c600299e57045c +936, 0xd951ec589e909e38 +937, 0x4dc8414390cae508 +938, 0x537ef9d5e2321344 +939, 0xa57bc21fd31aa2dc +940, 0xa3a60df564183750 +941, 0xbe69a5ce2e369fb6 +942, 0x7744601f4c053ec8 +943, 0x3838452af42f2612 +944, 0xd4f0dad7115a54e9 +945, 0x629cf68d8009a624 +946, 0x2211c8fa34cb98cb +947, 0x8040b19e2213db83 +948, 0xb2a86d3ba2384fd +949, 0x4b85cec4f93f0dab +950, 0xc8d212d21ea6845d +951, 0x5b271a03a4fe2be0 +952, 0xff4f671319ad8434 +953, 0x8e615a919d5afa96 +954, 0xea7f47c53161160a +955, 0x33273930b13c6efc +956, 0x98eedda27fb59c3c +957, 0x188dc5e92e939677 +958, 0x9dbd0fa0911430f1 +959, 0x5b3dcf3fa75dfd2b +960, 0x3f03846febdb275d +961, 0x20cc24faea9e9cf6 +962, 0x854f3ac66199ff5d +963, 0x31169ac99d341e6f +964, 0xa85daed3c0bc1bbe +965, 0x64633711e71ba5dd +966, 0x530e79978dc73334 +967, 0x636f2ee6e20aef13 +968, 0xf6220f8b6d9a58fb +969, 0x425db8fa32141a7b +970, 0xac7c210f4b02be95 +971, 0x5fe8cfbe197a7754 +972, 0xfff7d40c79420ea +973, 0x5f8bab9ef4697b77 +974, 0xaf6fe54e45b23fe8 +975, 0xce79456ccc70bbce +976, 0x645ef680f48f1c00 +977, 0xa4dfac46e2028595 +978, 0x6bece4c41effc5df +979, 0xd316df886442641f +980, 0xa4f6ff994edd2a6 +981, 0x30281ae3cc49abe4 +982, 0x39acb7b663dea974 +983, 0x5e8829b01a7c06fb +984, 0x87bdb08cf027f13e +985, 0xdfa5ede784e802f6 +986, 0x46d03d55711c38cc +987, 0xa55a961fc9788306 +988, 0xbf09ded495a2e57a +989, 0xcd601b29a639cc16 +990, 0x2193ce026bfd1085 +991, 0x25ba27f3f225be13 +992, 0x6f685be82f64f2fe +993, 0xec8454108229c450 +994, 0x6e79d8d205447a44 +995, 0x9ed7b6a96b9ccd68 +996, 0xae7134b3b7f8ee37 +997, 0x66963de0e5ebcc02 +998, 0x29c8dcd0d17c423f +999, 0xfb8482c827eb90bc diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/test_direct.py b/.local/lib/python3.8/site-packages/numpy/random/tests/test_direct.py new file mode 100644 index 0000000..58d966a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/test_direct.py @@ -0,0 +1,478 @@ +import os +from os.path import join +import sys + +import numpy as np +from numpy.testing import (assert_equal, assert_allclose, assert_array_equal, + assert_raises) +import pytest + +from numpy.random import ( + Generator, MT19937, PCG64, PCG64DXSM, Philox, RandomState, SeedSequence, + SFC64, default_rng +) +from numpy.random._common import interface + +try: + import cffi # noqa: F401 + + MISSING_CFFI = False +except ImportError: + MISSING_CFFI = True + +try: + import ctypes # noqa: F401 + + MISSING_CTYPES = False +except ImportError: + MISSING_CTYPES = False + +if sys.flags.optimize > 1: + # no docstrings present to inspect when PYTHONOPTIMIZE/Py_OptimizeFlag > 1 + # cffi cannot succeed + MISSING_CFFI = True + + +pwd = os.path.dirname(os.path.abspath(__file__)) + + +def assert_state_equal(actual, target): + for key in actual: + if isinstance(actual[key], dict): + assert_state_equal(actual[key], target[key]) + elif isinstance(actual[key], np.ndarray): + assert_array_equal(actual[key], target[key]) + else: + assert actual[key] == target[key] + + +def uint32_to_float32(u): + return ((u >> np.uint32(8)) * (1.0 / 2**24)).astype(np.float32) + + +def uniform32_from_uint64(x): + x = np.uint64(x) + upper = np.array(x >> np.uint64(32), dtype=np.uint32) + lower = np.uint64(0xffffffff) + lower = np.array(x & lower, dtype=np.uint32) + joined = np.column_stack([lower, upper]).ravel() + return uint32_to_float32(joined) + + +def uniform32_from_uint53(x): + x = np.uint64(x) >> np.uint64(16) + x = np.uint32(x & np.uint64(0xffffffff)) + return uint32_to_float32(x) + + +def uniform32_from_uint32(x): + return uint32_to_float32(x) + + +def uniform32_from_uint(x, bits): + if bits == 64: + return uniform32_from_uint64(x) + elif bits == 53: + return uniform32_from_uint53(x) + elif bits == 32: + return uniform32_from_uint32(x) + else: + raise NotImplementedError + + +def uniform_from_uint(x, bits): + if bits in (64, 63, 53): + return uniform_from_uint64(x) + elif bits == 32: + return uniform_from_uint32(x) + + +def uniform_from_uint64(x): + return (x >> np.uint64(11)) * (1.0 / 9007199254740992.0) + + +def uniform_from_uint32(x): + out = np.empty(len(x) // 2) + for i in range(0, len(x), 2): + a = x[i] >> 5 + b = x[i + 1] >> 6 + out[i // 2] = (a * 67108864.0 + b) / 9007199254740992.0 + return out + + +def uniform_from_dsfmt(x): + return x.view(np.double) - 1.0 + + +def gauss_from_uint(x, n, bits): + if bits in (64, 63): + doubles = uniform_from_uint64(x) + elif bits == 32: + doubles = uniform_from_uint32(x) + else: # bits == 'dsfmt' + doubles = uniform_from_dsfmt(x) + gauss = [] + loc = 0 + x1 = x2 = 0.0 + while len(gauss) < n: + r2 = 2 + while r2 >= 1.0 or r2 == 0.0: + x1 = 2.0 * doubles[loc] - 1.0 + x2 = 2.0 * doubles[loc + 1] - 1.0 + r2 = x1 * x1 + x2 * x2 + loc += 2 + + f = np.sqrt(-2.0 * np.log(r2) / r2) + gauss.append(f * x2) + gauss.append(f * x1) + + return gauss[:n] + + +def test_seedsequence(): + from numpy.random.bit_generator import (ISeedSequence, + ISpawnableSeedSequence, + SeedlessSeedSequence) + + s1 = SeedSequence(range(10), spawn_key=(1, 2), pool_size=6) + s1.spawn(10) + s2 = SeedSequence(**s1.state) + assert_equal(s1.state, s2.state) + assert_equal(s1.n_children_spawned, s2.n_children_spawned) + + # The interfaces cannot be instantiated themselves. + assert_raises(TypeError, ISeedSequence) + assert_raises(TypeError, ISpawnableSeedSequence) + dummy = SeedlessSeedSequence() + assert_raises(NotImplementedError, dummy.generate_state, 10) + assert len(dummy.spawn(10)) == 10 + + +class Base: + dtype = np.uint64 + data2 = data1 = {} + + @classmethod + def setup_class(cls): + cls.bit_generator = PCG64 + cls.bits = 64 + cls.dtype = np.uint64 + cls.seed_error_type = TypeError + cls.invalid_init_types = [] + cls.invalid_init_values = [] + + @classmethod + def _read_csv(cls, filename): + with open(filename) as csv: + seed = csv.readline() + seed = seed.split(',') + seed = [int(s.strip(), 0) for s in seed[1:]] + data = [] + for line in csv: + data.append(int(line.split(',')[-1].strip(), 0)) + return {'seed': seed, 'data': np.array(data, dtype=cls.dtype)} + + def test_raw(self): + bit_generator = self.bit_generator(*self.data1['seed']) + uints = bit_generator.random_raw(1000) + assert_equal(uints, self.data1['data']) + + bit_generator = self.bit_generator(*self.data1['seed']) + uints = bit_generator.random_raw() + assert_equal(uints, self.data1['data'][0]) + + bit_generator = self.bit_generator(*self.data2['seed']) + uints = bit_generator.random_raw(1000) + assert_equal(uints, self.data2['data']) + + def test_random_raw(self): + bit_generator = self.bit_generator(*self.data1['seed']) + uints = bit_generator.random_raw(output=False) + assert uints is None + uints = bit_generator.random_raw(1000, output=False) + assert uints is None + + def test_gauss_inv(self): + n = 25 + rs = RandomState(self.bit_generator(*self.data1['seed'])) + gauss = rs.standard_normal(n) + assert_allclose(gauss, + gauss_from_uint(self.data1['data'], n, self.bits)) + + rs = RandomState(self.bit_generator(*self.data2['seed'])) + gauss = rs.standard_normal(25) + assert_allclose(gauss, + gauss_from_uint(self.data2['data'], n, self.bits)) + + def test_uniform_double(self): + rs = Generator(self.bit_generator(*self.data1['seed'])) + vals = uniform_from_uint(self.data1['data'], self.bits) + uniforms = rs.random(len(vals)) + assert_allclose(uniforms, vals) + assert_equal(uniforms.dtype, np.float64) + + rs = Generator(self.bit_generator(*self.data2['seed'])) + vals = uniform_from_uint(self.data2['data'], self.bits) + uniforms = rs.random(len(vals)) + assert_allclose(uniforms, vals) + assert_equal(uniforms.dtype, np.float64) + + def test_uniform_float(self): + rs = Generator(self.bit_generator(*self.data1['seed'])) + vals = uniform32_from_uint(self.data1['data'], self.bits) + uniforms = rs.random(len(vals), dtype=np.float32) + assert_allclose(uniforms, vals) + assert_equal(uniforms.dtype, np.float32) + + rs = Generator(self.bit_generator(*self.data2['seed'])) + vals = uniform32_from_uint(self.data2['data'], self.bits) + uniforms = rs.random(len(vals), dtype=np.float32) + assert_allclose(uniforms, vals) + assert_equal(uniforms.dtype, np.float32) + + def test_repr(self): + rs = Generator(self.bit_generator(*self.data1['seed'])) + assert 'Generator' in repr(rs) + assert f'{id(rs):#x}'.upper().replace('X', 'x') in repr(rs) + + def test_str(self): + rs = Generator(self.bit_generator(*self.data1['seed'])) + assert 'Generator' in str(rs) + assert str(self.bit_generator.__name__) in str(rs) + assert f'{id(rs):#x}'.upper().replace('X', 'x') not in str(rs) + + def test_pickle(self): + import pickle + + bit_generator = self.bit_generator(*self.data1['seed']) + state = bit_generator.state + bitgen_pkl = pickle.dumps(bit_generator) + reloaded = pickle.loads(bitgen_pkl) + reloaded_state = reloaded.state + assert_array_equal(Generator(bit_generator).standard_normal(1000), + Generator(reloaded).standard_normal(1000)) + assert bit_generator is not reloaded + assert_state_equal(reloaded_state, state) + + ss = SeedSequence(100) + aa = pickle.loads(pickle.dumps(ss)) + assert_equal(ss.state, aa.state) + + def test_invalid_state_type(self): + bit_generator = self.bit_generator(*self.data1['seed']) + with pytest.raises(TypeError): + bit_generator.state = {'1'} + + def test_invalid_state_value(self): + bit_generator = self.bit_generator(*self.data1['seed']) + state = bit_generator.state + state['bit_generator'] = 'otherBitGenerator' + with pytest.raises(ValueError): + bit_generator.state = state + + def test_invalid_init_type(self): + bit_generator = self.bit_generator + for st in self.invalid_init_types: + with pytest.raises(TypeError): + bit_generator(*st) + + def test_invalid_init_values(self): + bit_generator = self.bit_generator + for st in self.invalid_init_values: + with pytest.raises((ValueError, OverflowError)): + bit_generator(*st) + + def test_benchmark(self): + bit_generator = self.bit_generator(*self.data1['seed']) + bit_generator._benchmark(1) + bit_generator._benchmark(1, 'double') + with pytest.raises(ValueError): + bit_generator._benchmark(1, 'int32') + + @pytest.mark.skipif(MISSING_CFFI, reason='cffi not available') + def test_cffi(self): + bit_generator = self.bit_generator(*self.data1['seed']) + cffi_interface = bit_generator.cffi + assert isinstance(cffi_interface, interface) + other_cffi_interface = bit_generator.cffi + assert other_cffi_interface is cffi_interface + + @pytest.mark.skipif(MISSING_CTYPES, reason='ctypes not available') + def test_ctypes(self): + bit_generator = self.bit_generator(*self.data1['seed']) + ctypes_interface = bit_generator.ctypes + assert isinstance(ctypes_interface, interface) + other_ctypes_interface = bit_generator.ctypes + assert other_ctypes_interface is ctypes_interface + + def test_getstate(self): + bit_generator = self.bit_generator(*self.data1['seed']) + state = bit_generator.state + alt_state = bit_generator.__getstate__() + assert_state_equal(state, alt_state) + + +class TestPhilox(Base): + @classmethod + def setup_class(cls): + cls.bit_generator = Philox + cls.bits = 64 + cls.dtype = np.uint64 + cls.data1 = cls._read_csv( + join(pwd, './data/philox-testset-1.csv')) + cls.data2 = cls._read_csv( + join(pwd, './data/philox-testset-2.csv')) + cls.seed_error_type = TypeError + cls.invalid_init_types = [] + cls.invalid_init_values = [(1, None, 1), (-1,), (None, None, 2 ** 257 + 1)] + + def test_set_key(self): + bit_generator = self.bit_generator(*self.data1['seed']) + state = bit_generator.state + keyed = self.bit_generator(counter=state['state']['counter'], + key=state['state']['key']) + assert_state_equal(bit_generator.state, keyed.state) + + +class TestPCG64(Base): + @classmethod + def setup_class(cls): + cls.bit_generator = PCG64 + cls.bits = 64 + cls.dtype = np.uint64 + cls.data1 = cls._read_csv(join(pwd, './data/pcg64-testset-1.csv')) + cls.data2 = cls._read_csv(join(pwd, './data/pcg64-testset-2.csv')) + cls.seed_error_type = (ValueError, TypeError) + cls.invalid_init_types = [(3.2,), ([None],), (1, None)] + cls.invalid_init_values = [(-1,)] + + def test_advance_symmetry(self): + rs = Generator(self.bit_generator(*self.data1['seed'])) + state = rs.bit_generator.state + step = -0x9e3779b97f4a7c150000000000000000 + rs.bit_generator.advance(step) + val_neg = rs.integers(10) + rs.bit_generator.state = state + rs.bit_generator.advance(2**128 + step) + val_pos = rs.integers(10) + rs.bit_generator.state = state + rs.bit_generator.advance(10 * 2**128 + step) + val_big = rs.integers(10) + assert val_neg == val_pos + assert val_big == val_pos + + def test_advange_large(self): + rs = Generator(self.bit_generator(38219308213743)) + pcg = rs.bit_generator + state = pcg.state["state"] + initial_state = 287608843259529770491897792873167516365 + assert state["state"] == initial_state + pcg.advance(sum(2**i for i in (96, 64, 32, 16, 8, 4, 2, 1))) + state = pcg.state["state"] + advanced_state = 135275564607035429730177404003164635391 + assert state["state"] == advanced_state + + +class TestPCG64DXSM(Base): + @classmethod + def setup_class(cls): + cls.bit_generator = PCG64DXSM + cls.bits = 64 + cls.dtype = np.uint64 + cls.data1 = cls._read_csv(join(pwd, './data/pcg64dxsm-testset-1.csv')) + cls.data2 = cls._read_csv(join(pwd, './data/pcg64dxsm-testset-2.csv')) + cls.seed_error_type = (ValueError, TypeError) + cls.invalid_init_types = [(3.2,), ([None],), (1, None)] + cls.invalid_init_values = [(-1,)] + + def test_advance_symmetry(self): + rs = Generator(self.bit_generator(*self.data1['seed'])) + state = rs.bit_generator.state + step = -0x9e3779b97f4a7c150000000000000000 + rs.bit_generator.advance(step) + val_neg = rs.integers(10) + rs.bit_generator.state = state + rs.bit_generator.advance(2**128 + step) + val_pos = rs.integers(10) + rs.bit_generator.state = state + rs.bit_generator.advance(10 * 2**128 + step) + val_big = rs.integers(10) + assert val_neg == val_pos + assert val_big == val_pos + + def test_advange_large(self): + rs = Generator(self.bit_generator(38219308213743)) + pcg = rs.bit_generator + state = pcg.state + initial_state = 287608843259529770491897792873167516365 + assert state["state"]["state"] == initial_state + pcg.advance(sum(2**i for i in (96, 64, 32, 16, 8, 4, 2, 1))) + state = pcg.state["state"] + advanced_state = 277778083536782149546677086420637664879 + assert state["state"] == advanced_state + + +class TestMT19937(Base): + @classmethod + def setup_class(cls): + cls.bit_generator = MT19937 + cls.bits = 32 + cls.dtype = np.uint32 + cls.data1 = cls._read_csv(join(pwd, './data/mt19937-testset-1.csv')) + cls.data2 = cls._read_csv(join(pwd, './data/mt19937-testset-2.csv')) + cls.seed_error_type = ValueError + cls.invalid_init_types = [] + cls.invalid_init_values = [(-1,)] + + def test_seed_float_array(self): + assert_raises(TypeError, self.bit_generator, np.array([np.pi])) + assert_raises(TypeError, self.bit_generator, np.array([-np.pi])) + assert_raises(TypeError, self.bit_generator, np.array([np.pi, -np.pi])) + assert_raises(TypeError, self.bit_generator, np.array([0, np.pi])) + assert_raises(TypeError, self.bit_generator, [np.pi]) + assert_raises(TypeError, self.bit_generator, [0, np.pi]) + + def test_state_tuple(self): + rs = Generator(self.bit_generator(*self.data1['seed'])) + bit_generator = rs.bit_generator + state = bit_generator.state + desired = rs.integers(2 ** 16) + tup = (state['bit_generator'], state['state']['key'], + state['state']['pos']) + bit_generator.state = tup + actual = rs.integers(2 ** 16) + assert_equal(actual, desired) + tup = tup + (0, 0.0) + bit_generator.state = tup + actual = rs.integers(2 ** 16) + assert_equal(actual, desired) + + +class TestSFC64(Base): + @classmethod + def setup_class(cls): + cls.bit_generator = SFC64 + cls.bits = 64 + cls.dtype = np.uint64 + cls.data1 = cls._read_csv( + join(pwd, './data/sfc64-testset-1.csv')) + cls.data2 = cls._read_csv( + join(pwd, './data/sfc64-testset-2.csv')) + cls.seed_error_type = (ValueError, TypeError) + cls.invalid_init_types = [(3.2,), ([None],), (1, None)] + cls.invalid_init_values = [(-1,)] + + +class TestDefaultRNG: + def test_seed(self): + for args in [(), (None,), (1234,), ([1234, 5678],)]: + rg = default_rng(*args) + assert isinstance(rg.bit_generator, PCG64) + + def test_passthrough(self): + bg = Philox() + rg = default_rng(bg) + assert rg.bit_generator is bg + rg2 = default_rng(rg) + assert rg2 is rg + assert rg2.bit_generator is bg diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/test_extending.py b/.local/lib/python3.8/site-packages/numpy/random/tests/test_extending.py new file mode 100644 index 0000000..d362092 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/test_extending.py @@ -0,0 +1,95 @@ +import os +import pytest +import shutil +import subprocess +import sys +import warnings +import numpy as np +from numpy.distutils.misc_util import exec_mod_from_location + +try: + import cffi +except ImportError: + cffi = None + +if sys.flags.optimize > 1: + # no docstrings present to inspect when PYTHONOPTIMIZE/Py_OptimizeFlag > 1 + # cffi cannot succeed + cffi = None + +try: + with warnings.catch_warnings(record=True) as w: + # numba issue gh-4733 + warnings.filterwarnings('always', '', DeprecationWarning) + import numba +except ImportError: + numba = None + +try: + import cython + from Cython.Compiler.Version import version as cython_version +except ImportError: + cython = None +else: + from distutils.version import LooseVersion + # Cython 0.29.21 is required for Python 3.9 and there are + # other fixes in the 0.29 series that are needed even for earlier + # Python versions. + # Note: keep in sync with the one in pyproject.toml + required_version = LooseVersion('0.29.21') + if LooseVersion(cython_version) < required_version: + # too old or wrong cython, skip the test + cython = None + +@pytest.mark.skipif(cython is None, reason="requires cython") +@pytest.mark.slow +def test_cython(tmp_path): + srcdir = os.path.join(os.path.dirname(__file__), '..') + shutil.copytree(srcdir, tmp_path / 'random') + # build the examples and "install" them into a temporary directory + build_dir = tmp_path / 'random' / '_examples' / 'cython' + subprocess.check_call([sys.executable, 'setup.py', 'build', 'install', + '--prefix', str(tmp_path / 'installdir'), + '--single-version-externally-managed', + '--record', str(tmp_path/ 'tmp_install_log.txt'), + ], + cwd=str(build_dir), + ) + # gh-16162: make sure numpy's __init__.pxd was used for cython + # not really part of this test, but it is a convenient place to check + with open(build_dir / 'extending.c') as fid: + txt_to_find = 'NumPy API declarations from "numpy/__init__.pxd"' + for i, line in enumerate(fid): + if txt_to_find in line: + break + else: + assert False, ("Could not find '{}' in C file, " + "wrong pxd used".format(txt_to_find)) + # get the path to the so's + so1 = so2 = None + with open(tmp_path /'tmp_install_log.txt') as fid: + for line in fid: + if 'extending.' in line: + so1 = line.strip() + if 'extending_distributions' in line: + so2 = line.strip() + assert so1 is not None + assert so2 is not None + # import the so's without adding the directory to sys.path + exec_mod_from_location('extending', so1) + extending_distributions = exec_mod_from_location( + 'extending_distributions', so2) + # actually test the cython c-extension + from numpy.random import PCG64 + values = extending_distributions.uniforms_ex(PCG64(0), 10, 'd') + assert values.shape == (10,) + assert values.dtype == np.float64 + +@pytest.mark.skipif(numba is None or cffi is None, + reason="requires numba and cffi") +def test_numba(): + from numpy.random._examples.numba import extending # noqa: F401 + +@pytest.mark.skipif(cffi is None, reason="requires cffi") +def test_cffi(): + from numpy.random._examples.cffi import extending # noqa: F401 diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/test_generator_mt19937.py b/.local/lib/python3.8/site-packages/numpy/random/tests/test_generator_mt19937.py new file mode 100644 index 0000000..e5411b8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/test_generator_mt19937.py @@ -0,0 +1,2671 @@ +import sys +import hashlib + +import pytest + +import numpy as np +from numpy.linalg import LinAlgError +from numpy.testing import ( + assert_, assert_raises, assert_equal, assert_allclose, + assert_warns, assert_no_warnings, assert_array_equal, + assert_array_almost_equal, suppress_warnings) + +from numpy.random import Generator, MT19937, SeedSequence, RandomState + +random = Generator(MT19937()) + +JUMP_TEST_DATA = [ + { + "seed": 0, + "steps": 10, + "initial": {"key_sha256": "bb1636883c2707b51c5b7fc26c6927af4430f2e0785a8c7bc886337f919f9edf", "pos": 9}, + "jumped": {"key_sha256": "ff682ac12bb140f2d72fba8d3506cf4e46817a0db27aae1683867629031d8d55", "pos": 598}, + }, + { + "seed":384908324, + "steps":312, + "initial": {"key_sha256": "16b791a1e04886ccbbb4d448d6ff791267dc458ae599475d08d5cced29d11614", "pos": 311}, + "jumped": {"key_sha256": "a0110a2cf23b56be0feaed8f787a7fc84bef0cb5623003d75b26bdfa1c18002c", "pos": 276}, + }, + { + "seed": [839438204, 980239840, 859048019, 821], + "steps": 511, + "initial": {"key_sha256": "d306cf01314d51bd37892d874308200951a35265ede54d200f1e065004c3e9ea", "pos": 510}, + "jumped": {"key_sha256": "0e00ab449f01a5195a83b4aee0dfbc2ce8d46466a640b92e33977d2e42f777f8", "pos": 475}, + }, +] + +@pytest.fixture(scope='module', params=[True, False]) +def endpoint(request): + return request.param + + +class TestSeed: + def test_scalar(self): + s = Generator(MT19937(0)) + assert_equal(s.integers(1000), 479) + s = Generator(MT19937(4294967295)) + assert_equal(s.integers(1000), 324) + + def test_array(self): + s = Generator(MT19937(range(10))) + assert_equal(s.integers(1000), 465) + s = Generator(MT19937(np.arange(10))) + assert_equal(s.integers(1000), 465) + s = Generator(MT19937([0])) + assert_equal(s.integers(1000), 479) + s = Generator(MT19937([4294967295])) + assert_equal(s.integers(1000), 324) + + def test_seedsequence(self): + s = MT19937(SeedSequence(0)) + assert_equal(s.random_raw(1), 2058676884) + + def test_invalid_scalar(self): + # seed must be an unsigned 32 bit integer + assert_raises(TypeError, MT19937, -0.5) + assert_raises(ValueError, MT19937, -1) + + def test_invalid_array(self): + # seed must be an unsigned integer + assert_raises(TypeError, MT19937, [-0.5]) + assert_raises(ValueError, MT19937, [-1]) + assert_raises(ValueError, MT19937, [1, -2, 4294967296]) + + def test_noninstantized_bitgen(self): + assert_raises(ValueError, Generator, MT19937) + + +class TestBinomial: + def test_n_zero(self): + # Tests the corner case of n == 0 for the binomial distribution. + # binomial(0, p) should be zero for any p in [0, 1]. + # This test addresses issue #3480. + zeros = np.zeros(2, dtype='int') + for p in [0, .5, 1]: + assert_(random.binomial(0, p) == 0) + assert_array_equal(random.binomial(zeros, p), zeros) + + def test_p_is_nan(self): + # Issue #4571. + assert_raises(ValueError, random.binomial, 1, np.nan) + + +class TestMultinomial: + def test_basic(self): + random.multinomial(100, [0.2, 0.8]) + + def test_zero_probability(self): + random.multinomial(100, [0.2, 0.8, 0.0, 0.0, 0.0]) + + def test_int_negative_interval(self): + assert_(-5 <= random.integers(-5, -1) < -1) + x = random.integers(-5, -1, 5) + assert_(np.all(-5 <= x)) + assert_(np.all(x < -1)) + + def test_size(self): + # gh-3173 + p = [0.5, 0.5] + assert_equal(random.multinomial(1, p, np.uint32(1)).shape, (1, 2)) + assert_equal(random.multinomial(1, p, np.uint32(1)).shape, (1, 2)) + assert_equal(random.multinomial(1, p, np.uint32(1)).shape, (1, 2)) + assert_equal(random.multinomial(1, p, [2, 2]).shape, (2, 2, 2)) + assert_equal(random.multinomial(1, p, (2, 2)).shape, (2, 2, 2)) + assert_equal(random.multinomial(1, p, np.array((2, 2))).shape, + (2, 2, 2)) + + assert_raises(TypeError, random.multinomial, 1, p, + float(1)) + + def test_invalid_prob(self): + assert_raises(ValueError, random.multinomial, 100, [1.1, 0.2]) + assert_raises(ValueError, random.multinomial, 100, [-.1, 0.9]) + + def test_invalid_n(self): + assert_raises(ValueError, random.multinomial, -1, [0.8, 0.2]) + assert_raises(ValueError, random.multinomial, [-1] * 10, [0.8, 0.2]) + + def test_p_non_contiguous(self): + p = np.arange(15.) + p /= np.sum(p[1::3]) + pvals = p[1::3] + random = Generator(MT19937(1432985819)) + non_contig = random.multinomial(100, pvals=pvals) + random = Generator(MT19937(1432985819)) + contig = random.multinomial(100, pvals=np.ascontiguousarray(pvals)) + assert_array_equal(non_contig, contig) + + def test_multinomial_pvals_float32(self): + x = np.array([9.9e-01, 9.9e-01, 1.0e-09, 1.0e-09, 1.0e-09, 1.0e-09, + 1.0e-09, 1.0e-09, 1.0e-09, 1.0e-09], dtype=np.float32) + pvals = x / x.sum() + random = Generator(MT19937(1432985819)) + match = r"[\w\s]*pvals array is cast to 64-bit floating" + with pytest.raises(ValueError, match=match): + random.multinomial(1, pvals) + +class TestMultivariateHypergeometric: + + def setup(self): + self.seed = 8675309 + + def test_argument_validation(self): + # Error cases... + + # `colors` must be a 1-d sequence + assert_raises(ValueError, random.multivariate_hypergeometric, + 10, 4) + + # Negative nsample + assert_raises(ValueError, random.multivariate_hypergeometric, + [2, 3, 4], -1) + + # Negative color + assert_raises(ValueError, random.multivariate_hypergeometric, + [-1, 2, 3], 2) + + # nsample exceeds sum(colors) + assert_raises(ValueError, random.multivariate_hypergeometric, + [2, 3, 4], 10) + + # nsample exceeds sum(colors) (edge case of empty colors) + assert_raises(ValueError, random.multivariate_hypergeometric, + [], 1) + + # Validation errors associated with very large values in colors. + assert_raises(ValueError, random.multivariate_hypergeometric, + [999999999, 101], 5, 1, 'marginals') + + int64_info = np.iinfo(np.int64) + max_int64 = int64_info.max + max_int64_index = max_int64 // int64_info.dtype.itemsize + assert_raises(ValueError, random.multivariate_hypergeometric, + [max_int64_index - 100, 101], 5, 1, 'count') + + @pytest.mark.parametrize('method', ['count', 'marginals']) + def test_edge_cases(self, method): + # Set the seed, but in fact, all the results in this test are + # deterministic, so we don't really need this. + random = Generator(MT19937(self.seed)) + + x = random.multivariate_hypergeometric([0, 0, 0], 0, method=method) + assert_array_equal(x, [0, 0, 0]) + + x = random.multivariate_hypergeometric([], 0, method=method) + assert_array_equal(x, []) + + x = random.multivariate_hypergeometric([], 0, size=1, method=method) + assert_array_equal(x, np.empty((1, 0), dtype=np.int64)) + + x = random.multivariate_hypergeometric([1, 2, 3], 0, method=method) + assert_array_equal(x, [0, 0, 0]) + + x = random.multivariate_hypergeometric([9, 0, 0], 3, method=method) + assert_array_equal(x, [3, 0, 0]) + + colors = [1, 1, 0, 1, 1] + x = random.multivariate_hypergeometric(colors, sum(colors), + method=method) + assert_array_equal(x, colors) + + x = random.multivariate_hypergeometric([3, 4, 5], 12, size=3, + method=method) + assert_array_equal(x, [[3, 4, 5]]*3) + + # Cases for nsample: + # nsample < 10 + # 10 <= nsample < colors.sum()/2 + # colors.sum()/2 < nsample < colors.sum() - 10 + # colors.sum() - 10 < nsample < colors.sum() + @pytest.mark.parametrize('nsample', [8, 25, 45, 55]) + @pytest.mark.parametrize('method', ['count', 'marginals']) + @pytest.mark.parametrize('size', [5, (2, 3), 150000]) + def test_typical_cases(self, nsample, method, size): + random = Generator(MT19937(self.seed)) + + colors = np.array([10, 5, 20, 25]) + sample = random.multivariate_hypergeometric(colors, nsample, size, + method=method) + if isinstance(size, int): + expected_shape = (size,) + colors.shape + else: + expected_shape = size + colors.shape + assert_equal(sample.shape, expected_shape) + assert_((sample >= 0).all()) + assert_((sample <= colors).all()) + assert_array_equal(sample.sum(axis=-1), + np.full(size, fill_value=nsample, dtype=int)) + if isinstance(size, int) and size >= 100000: + # This sample is large enough to compare its mean to + # the expected values. + assert_allclose(sample.mean(axis=0), + nsample * colors / colors.sum(), + rtol=1e-3, atol=0.005) + + def test_repeatability1(self): + random = Generator(MT19937(self.seed)) + sample = random.multivariate_hypergeometric([3, 4, 5], 5, size=5, + method='count') + expected = np.array([[2, 1, 2], + [2, 1, 2], + [1, 1, 3], + [2, 0, 3], + [2, 1, 2]]) + assert_array_equal(sample, expected) + + def test_repeatability2(self): + random = Generator(MT19937(self.seed)) + sample = random.multivariate_hypergeometric([20, 30, 50], 50, + size=5, + method='marginals') + expected = np.array([[ 9, 17, 24], + [ 7, 13, 30], + [ 9, 15, 26], + [ 9, 17, 24], + [12, 14, 24]]) + assert_array_equal(sample, expected) + + def test_repeatability3(self): + random = Generator(MT19937(self.seed)) + sample = random.multivariate_hypergeometric([20, 30, 50], 12, + size=5, + method='marginals') + expected = np.array([[2, 3, 7], + [5, 3, 4], + [2, 5, 5], + [5, 3, 4], + [1, 5, 6]]) + assert_array_equal(sample, expected) + + +class TestSetState: + def setup(self): + self.seed = 1234567890 + self.rg = Generator(MT19937(self.seed)) + self.bit_generator = self.rg.bit_generator + self.state = self.bit_generator.state + self.legacy_state = (self.state['bit_generator'], + self.state['state']['key'], + self.state['state']['pos']) + + def test_gaussian_reset(self): + # Make sure the cached every-other-Gaussian is reset. + old = self.rg.standard_normal(size=3) + self.bit_generator.state = self.state + new = self.rg.standard_normal(size=3) + assert_(np.all(old == new)) + + def test_gaussian_reset_in_media_res(self): + # When the state is saved with a cached Gaussian, make sure the + # cached Gaussian is restored. + + self.rg.standard_normal() + state = self.bit_generator.state + old = self.rg.standard_normal(size=3) + self.bit_generator.state = state + new = self.rg.standard_normal(size=3) + assert_(np.all(old == new)) + + def test_negative_binomial(self): + # Ensure that the negative binomial results take floating point + # arguments without truncation. + self.rg.negative_binomial(0.5, 0.5) + + +class TestIntegers: + rfunc = random.integers + + # valid integer/boolean types + itype = [bool, np.int8, np.uint8, np.int16, np.uint16, + np.int32, np.uint32, np.int64, np.uint64] + + def test_unsupported_type(self, endpoint): + assert_raises(TypeError, self.rfunc, 1, endpoint=endpoint, dtype=float) + + def test_bounds_checking(self, endpoint): + for dt in self.itype: + lbnd = 0 if dt is bool else np.iinfo(dt).min + ubnd = 2 if dt is bool else np.iinfo(dt).max + 1 + ubnd = ubnd - 1 if endpoint else ubnd + assert_raises(ValueError, self.rfunc, lbnd - 1, ubnd, + endpoint=endpoint, dtype=dt) + assert_raises(ValueError, self.rfunc, lbnd, ubnd + 1, + endpoint=endpoint, dtype=dt) + assert_raises(ValueError, self.rfunc, ubnd, lbnd, + endpoint=endpoint, dtype=dt) + assert_raises(ValueError, self.rfunc, 1, 0, endpoint=endpoint, + dtype=dt) + + assert_raises(ValueError, self.rfunc, [lbnd - 1], ubnd, + endpoint=endpoint, dtype=dt) + assert_raises(ValueError, self.rfunc, [lbnd], [ubnd + 1], + endpoint=endpoint, dtype=dt) + assert_raises(ValueError, self.rfunc, [ubnd], [lbnd], + endpoint=endpoint, dtype=dt) + assert_raises(ValueError, self.rfunc, 1, [0], + endpoint=endpoint, dtype=dt) + + def test_bounds_checking_array(self, endpoint): + for dt in self.itype: + lbnd = 0 if dt is bool else np.iinfo(dt).min + ubnd = 2 if dt is bool else np.iinfo(dt).max + (not endpoint) + + assert_raises(ValueError, self.rfunc, [lbnd - 1] * 2, [ubnd] * 2, + endpoint=endpoint, dtype=dt) + assert_raises(ValueError, self.rfunc, [lbnd] * 2, + [ubnd + 1] * 2, endpoint=endpoint, dtype=dt) + assert_raises(ValueError, self.rfunc, ubnd, [lbnd] * 2, + endpoint=endpoint, dtype=dt) + assert_raises(ValueError, self.rfunc, [1] * 2, 0, + endpoint=endpoint, dtype=dt) + + def test_rng_zero_and_extremes(self, endpoint): + for dt in self.itype: + lbnd = 0 if dt is bool else np.iinfo(dt).min + ubnd = 2 if dt is bool else np.iinfo(dt).max + 1 + ubnd = ubnd - 1 if endpoint else ubnd + is_open = not endpoint + + tgt = ubnd - 1 + assert_equal(self.rfunc(tgt, tgt + is_open, size=1000, + endpoint=endpoint, dtype=dt), tgt) + assert_equal(self.rfunc([tgt], tgt + is_open, size=1000, + endpoint=endpoint, dtype=dt), tgt) + + tgt = lbnd + assert_equal(self.rfunc(tgt, tgt + is_open, size=1000, + endpoint=endpoint, dtype=dt), tgt) + assert_equal(self.rfunc(tgt, [tgt + is_open], size=1000, + endpoint=endpoint, dtype=dt), tgt) + + tgt = (lbnd + ubnd) // 2 + assert_equal(self.rfunc(tgt, tgt + is_open, size=1000, + endpoint=endpoint, dtype=dt), tgt) + assert_equal(self.rfunc([tgt], [tgt + is_open], + size=1000, endpoint=endpoint, dtype=dt), + tgt) + + def test_rng_zero_and_extremes_array(self, endpoint): + size = 1000 + for dt in self.itype: + lbnd = 0 if dt is bool else np.iinfo(dt).min + ubnd = 2 if dt is bool else np.iinfo(dt).max + 1 + ubnd = ubnd - 1 if endpoint else ubnd + + tgt = ubnd - 1 + assert_equal(self.rfunc([tgt], [tgt + 1], + size=size, dtype=dt), tgt) + assert_equal(self.rfunc( + [tgt] * size, [tgt + 1] * size, dtype=dt), tgt) + assert_equal(self.rfunc( + [tgt] * size, [tgt + 1] * size, size=size, dtype=dt), tgt) + + tgt = lbnd + assert_equal(self.rfunc([tgt], [tgt + 1], + size=size, dtype=dt), tgt) + assert_equal(self.rfunc( + [tgt] * size, [tgt + 1] * size, dtype=dt), tgt) + assert_equal(self.rfunc( + [tgt] * size, [tgt + 1] * size, size=size, dtype=dt), tgt) + + tgt = (lbnd + ubnd) // 2 + assert_equal(self.rfunc([tgt], [tgt + 1], + size=size, dtype=dt), tgt) + assert_equal(self.rfunc( + [tgt] * size, [tgt + 1] * size, dtype=dt), tgt) + assert_equal(self.rfunc( + [tgt] * size, [tgt + 1] * size, size=size, dtype=dt), tgt) + + def test_full_range(self, endpoint): + # Test for ticket #1690 + + for dt in self.itype: + lbnd = 0 if dt is bool else np.iinfo(dt).min + ubnd = 2 if dt is bool else np.iinfo(dt).max + 1 + ubnd = ubnd - 1 if endpoint else ubnd + + try: + self.rfunc(lbnd, ubnd, endpoint=endpoint, dtype=dt) + except Exception as e: + raise AssertionError("No error should have been raised, " + "but one was with the following " + "message:\n\n%s" % str(e)) + + def test_full_range_array(self, endpoint): + # Test for ticket #1690 + + for dt in self.itype: + lbnd = 0 if dt is bool else np.iinfo(dt).min + ubnd = 2 if dt is bool else np.iinfo(dt).max + 1 + ubnd = ubnd - 1 if endpoint else ubnd + + try: + self.rfunc([lbnd] * 2, [ubnd], endpoint=endpoint, dtype=dt) + except Exception as e: + raise AssertionError("No error should have been raised, " + "but one was with the following " + "message:\n\n%s" % str(e)) + + def test_in_bounds_fuzz(self, endpoint): + # Don't use fixed seed + random = Generator(MT19937()) + + for dt in self.itype[1:]: + for ubnd in [4, 8, 16]: + vals = self.rfunc(2, ubnd - endpoint, size=2 ** 16, + endpoint=endpoint, dtype=dt) + assert_(vals.max() < ubnd) + assert_(vals.min() >= 2) + + vals = self.rfunc(0, 2 - endpoint, size=2 ** 16, endpoint=endpoint, + dtype=bool) + assert_(vals.max() < 2) + assert_(vals.min() >= 0) + + def test_scalar_array_equiv(self, endpoint): + for dt in self.itype: + lbnd = 0 if dt is bool else np.iinfo(dt).min + ubnd = 2 if dt is bool else np.iinfo(dt).max + 1 + ubnd = ubnd - 1 if endpoint else ubnd + + size = 1000 + random = Generator(MT19937(1234)) + scalar = random.integers(lbnd, ubnd, size=size, endpoint=endpoint, + dtype=dt) + + random = Generator(MT19937(1234)) + scalar_array = random.integers([lbnd], [ubnd], size=size, + endpoint=endpoint, dtype=dt) + + random = Generator(MT19937(1234)) + array = random.integers([lbnd] * size, [ubnd] * + size, size=size, endpoint=endpoint, dtype=dt) + assert_array_equal(scalar, scalar_array) + assert_array_equal(scalar, array) + + def test_repeatability(self, endpoint): + # We use a sha256 hash of generated sequences of 1000 samples + # in the range [0, 6) for all but bool, where the range + # is [0, 2). Hashes are for little endian numbers. + tgt = {'bool': '053594a9b82d656f967c54869bc6970aa0358cf94ad469c81478459c6a90eee3', + 'int16': '54de9072b6ee9ff7f20b58329556a46a447a8a29d67db51201bf88baa6e4e5d4', + 'int32': 'd3a0d5efb04542b25ac712e50d21f39ac30f312a5052e9bbb1ad3baa791ac84b', + 'int64': '14e224389ac4580bfbdccb5697d6190b496f91227cf67df60989de3d546389b1', + 'int8': '0e203226ff3fbbd1580f15da4621e5f7164d0d8d6b51696dd42d004ece2cbec1', + 'uint16': '54de9072b6ee9ff7f20b58329556a46a447a8a29d67db51201bf88baa6e4e5d4', + 'uint32': 'd3a0d5efb04542b25ac712e50d21f39ac30f312a5052e9bbb1ad3baa791ac84b', + 'uint64': '14e224389ac4580bfbdccb5697d6190b496f91227cf67df60989de3d546389b1', + 'uint8': '0e203226ff3fbbd1580f15da4621e5f7164d0d8d6b51696dd42d004ece2cbec1'} + + for dt in self.itype[1:]: + random = Generator(MT19937(1234)) + + # view as little endian for hash + if sys.byteorder == 'little': + val = random.integers(0, 6 - endpoint, size=1000, endpoint=endpoint, + dtype=dt) + else: + val = random.integers(0, 6 - endpoint, size=1000, endpoint=endpoint, + dtype=dt).byteswap() + + res = hashlib.sha256(val).hexdigest() + assert_(tgt[np.dtype(dt).name] == res) + + # bools do not depend on endianness + random = Generator(MT19937(1234)) + val = random.integers(0, 2 - endpoint, size=1000, endpoint=endpoint, + dtype=bool).view(np.int8) + res = hashlib.sha256(val).hexdigest() + assert_(tgt[np.dtype(bool).name] == res) + + def test_repeatability_broadcasting(self, endpoint): + for dt in self.itype: + lbnd = 0 if dt in (bool, np.bool_) else np.iinfo(dt).min + ubnd = 2 if dt in (bool, np.bool_) else np.iinfo(dt).max + 1 + ubnd = ubnd - 1 if endpoint else ubnd + + # view as little endian for hash + random = Generator(MT19937(1234)) + val = random.integers(lbnd, ubnd, size=1000, endpoint=endpoint, + dtype=dt) + + random = Generator(MT19937(1234)) + val_bc = random.integers([lbnd] * 1000, ubnd, endpoint=endpoint, + dtype=dt) + + assert_array_equal(val, val_bc) + + random = Generator(MT19937(1234)) + val_bc = random.integers([lbnd] * 1000, [ubnd] * 1000, + endpoint=endpoint, dtype=dt) + + assert_array_equal(val, val_bc) + + @pytest.mark.parametrize( + 'bound, expected', + [(2**32 - 1, np.array([517043486, 1364798665, 1733884389, 1353720612, + 3769704066, 1170797179, 4108474671])), + (2**32, np.array([517043487, 1364798666, 1733884390, 1353720613, + 3769704067, 1170797180, 4108474672])), + (2**32 + 1, np.array([517043487, 1733884390, 3769704068, 4108474673, + 1831631863, 1215661561, 3869512430]))] + ) + def test_repeatability_32bit_boundary(self, bound, expected): + for size in [None, len(expected)]: + random = Generator(MT19937(1234)) + x = random.integers(bound, size=size) + assert_equal(x, expected if size is not None else expected[0]) + + def test_repeatability_32bit_boundary_broadcasting(self): + desired = np.array([[[1622936284, 3620788691, 1659384060], + [1417365545, 760222891, 1909653332], + [3788118662, 660249498, 4092002593]], + [[3625610153, 2979601262, 3844162757], + [ 685800658, 120261497, 2694012896], + [1207779440, 1586594375, 3854335050]], + [[3004074748, 2310761796, 3012642217], + [2067714190, 2786677879, 1363865881], + [ 791663441, 1867303284, 2169727960]], + [[1939603804, 1250951100, 298950036], + [1040128489, 3791912209, 3317053765], + [3155528714, 61360675, 2305155588]], + [[ 817688762, 1335621943, 3288952434], + [1770890872, 1102951817, 1957607470], + [3099996017, 798043451, 48334215]]]) + for size in [None, (5, 3, 3)]: + random = Generator(MT19937(12345)) + x = random.integers([[-1], [0], [1]], + [2**32 - 1, 2**32, 2**32 + 1], + size=size) + assert_array_equal(x, desired if size is not None else desired[0]) + + def test_int64_uint64_broadcast_exceptions(self, endpoint): + configs = {np.uint64: ((0, 2**65), (-1, 2**62), (10, 9), (0, 0)), + np.int64: ((0, 2**64), (-(2**64), 2**62), (10, 9), (0, 0), + (-2**63-1, -2**63-1))} + for dtype in configs: + for config in configs[dtype]: + low, high = config + high = high - endpoint + low_a = np.array([[low]*10]) + high_a = np.array([high] * 10) + assert_raises(ValueError, random.integers, low, high, + endpoint=endpoint, dtype=dtype) + assert_raises(ValueError, random.integers, low_a, high, + endpoint=endpoint, dtype=dtype) + assert_raises(ValueError, random.integers, low, high_a, + endpoint=endpoint, dtype=dtype) + assert_raises(ValueError, random.integers, low_a, high_a, + endpoint=endpoint, dtype=dtype) + + low_o = np.array([[low]*10], dtype=object) + high_o = np.array([high] * 10, dtype=object) + assert_raises(ValueError, random.integers, low_o, high, + endpoint=endpoint, dtype=dtype) + assert_raises(ValueError, random.integers, low, high_o, + endpoint=endpoint, dtype=dtype) + assert_raises(ValueError, random.integers, low_o, high_o, + endpoint=endpoint, dtype=dtype) + + def test_int64_uint64_corner_case(self, endpoint): + # When stored in Numpy arrays, `lbnd` is casted + # as np.int64, and `ubnd` is casted as np.uint64. + # Checking whether `lbnd` >= `ubnd` used to be + # done solely via direct comparison, which is incorrect + # because when Numpy tries to compare both numbers, + # it casts both to np.float64 because there is + # no integer superset of np.int64 and np.uint64. However, + # `ubnd` is too large to be represented in np.float64, + # causing it be round down to np.iinfo(np.int64).max, + # leading to a ValueError because `lbnd` now equals + # the new `ubnd`. + + dt = np.int64 + tgt = np.iinfo(np.int64).max + lbnd = np.int64(np.iinfo(np.int64).max) + ubnd = np.uint64(np.iinfo(np.int64).max + 1 - endpoint) + + # None of these function calls should + # generate a ValueError now. + actual = random.integers(lbnd, ubnd, endpoint=endpoint, dtype=dt) + assert_equal(actual, tgt) + + def test_respect_dtype_singleton(self, endpoint): + # See gh-7203 + for dt in self.itype: + lbnd = 0 if dt is bool else np.iinfo(dt).min + ubnd = 2 if dt is bool else np.iinfo(dt).max + 1 + ubnd = ubnd - 1 if endpoint else ubnd + dt = np.bool_ if dt is bool else dt + + sample = self.rfunc(lbnd, ubnd, endpoint=endpoint, dtype=dt) + assert_equal(sample.dtype, dt) + + for dt in (bool, int, np.compat.long): + lbnd = 0 if dt is bool else np.iinfo(dt).min + ubnd = 2 if dt is bool else np.iinfo(dt).max + 1 + ubnd = ubnd - 1 if endpoint else ubnd + + # gh-7284: Ensure that we get Python data types + sample = self.rfunc(lbnd, ubnd, endpoint=endpoint, dtype=dt) + assert not hasattr(sample, 'dtype') + assert_equal(type(sample), dt) + + def test_respect_dtype_array(self, endpoint): + # See gh-7203 + for dt in self.itype: + lbnd = 0 if dt is bool else np.iinfo(dt).min + ubnd = 2 if dt is bool else np.iinfo(dt).max + 1 + ubnd = ubnd - 1 if endpoint else ubnd + dt = np.bool_ if dt is bool else dt + + sample = self.rfunc([lbnd], [ubnd], endpoint=endpoint, dtype=dt) + assert_equal(sample.dtype, dt) + sample = self.rfunc([lbnd] * 2, [ubnd] * 2, endpoint=endpoint, + dtype=dt) + assert_equal(sample.dtype, dt) + + def test_zero_size(self, endpoint): + # See gh-7203 + for dt in self.itype: + sample = self.rfunc(0, 0, (3, 0, 4), endpoint=endpoint, dtype=dt) + assert sample.shape == (3, 0, 4) + assert sample.dtype == dt + assert self.rfunc(0, -10, 0, endpoint=endpoint, + dtype=dt).shape == (0,) + assert_equal(random.integers(0, 0, size=(3, 0, 4)).shape, + (3, 0, 4)) + assert_equal(random.integers(0, -10, size=0).shape, (0,)) + assert_equal(random.integers(10, 10, size=0).shape, (0,)) + + def test_error_byteorder(self): + other_byteord_dt = 'i4' + with pytest.raises(ValueError): + random.integers(0, 200, size=10, dtype=other_byteord_dt) + + # chi2max is the maximum acceptable chi-squared value. + @pytest.mark.slow + @pytest.mark.parametrize('sample_size,high,dtype,chi2max', + [(5000000, 5, np.int8, 125.0), # p-value ~4.6e-25 + (5000000, 7, np.uint8, 150.0), # p-value ~7.7e-30 + (10000000, 2500, np.int16, 3300.0), # p-value ~3.0e-25 + (50000000, 5000, np.uint16, 6500.0), # p-value ~3.5e-25 + ]) + def test_integers_small_dtype_chisquared(self, sample_size, high, + dtype, chi2max): + # Regression test for gh-14774. + samples = random.integers(high, size=sample_size, dtype=dtype) + + values, counts = np.unique(samples, return_counts=True) + expected = sample_size / high + chi2 = ((counts - expected)**2 / expected).sum() + assert chi2 < chi2max + + +class TestRandomDist: + # Make sure the random distribution returns the correct value for a + # given seed + + def setup(self): + self.seed = 1234567890 + + def test_integers(self): + random = Generator(MT19937(self.seed)) + actual = random.integers(-99, 99, size=(3, 2)) + desired = np.array([[-80, -56], [41, 37], [-83, -16]]) + assert_array_equal(actual, desired) + + def test_integers_masked(self): + # Test masked rejection sampling algorithm to generate array of + # uint32 in an interval. + random = Generator(MT19937(self.seed)) + actual = random.integers(0, 99, size=(3, 2), dtype=np.uint32) + desired = np.array([[9, 21], [70, 68], [8, 41]], dtype=np.uint32) + assert_array_equal(actual, desired) + + def test_integers_closed(self): + random = Generator(MT19937(self.seed)) + actual = random.integers(-99, 99, size=(3, 2), endpoint=True) + desired = np.array([[-80, -56], [ 41, 38], [-83, -15]]) + assert_array_equal(actual, desired) + + def test_integers_max_int(self): + # Tests whether integers with closed=True can generate the + # maximum allowed Python int that can be converted + # into a C long. Previous implementations of this + # method have thrown an OverflowError when attempting + # to generate this integer. + actual = random.integers(np.iinfo('l').max, np.iinfo('l').max, + endpoint=True) + + desired = np.iinfo('l').max + assert_equal(actual, desired) + + def test_random(self): + random = Generator(MT19937(self.seed)) + actual = random.random((3, 2)) + desired = np.array([[0.096999199829214, 0.707517457682192], + [0.084364834598269, 0.767731206553125], + [0.665069021359413, 0.715487190596693]]) + assert_array_almost_equal(actual, desired, decimal=15) + + random = Generator(MT19937(self.seed)) + actual = random.random() + assert_array_almost_equal(actual, desired[0, 0], decimal=15) + + def test_random_float(self): + random = Generator(MT19937(self.seed)) + actual = random.random((3, 2)) + desired = np.array([[0.0969992 , 0.70751746], + [0.08436483, 0.76773121], + [0.66506902, 0.71548719]]) + assert_array_almost_equal(actual, desired, decimal=7) + + def test_random_float_scalar(self): + random = Generator(MT19937(self.seed)) + actual = random.random(dtype=np.float32) + desired = 0.0969992 + assert_array_almost_equal(actual, desired, decimal=7) + + @pytest.mark.parametrize('dtype, uint_view_type', + [(np.float32, np.uint32), + (np.float64, np.uint64)]) + def test_random_distribution_of_lsb(self, dtype, uint_view_type): + random = Generator(MT19937(self.seed)) + sample = random.random(100000, dtype=dtype) + num_ones_in_lsb = np.count_nonzero(sample.view(uint_view_type) & 1) + # The probability of a 1 in the least significant bit is 0.25. + # With a sample size of 100000, the probability that num_ones_in_lsb + # is outside the following range is less than 5e-11. + assert 24100 < num_ones_in_lsb < 25900 + + def test_random_unsupported_type(self): + assert_raises(TypeError, random.random, dtype='int32') + + def test_choice_uniform_replace(self): + random = Generator(MT19937(self.seed)) + actual = random.choice(4, 4) + desired = np.array([0, 0, 2, 2], dtype=np.int64) + assert_array_equal(actual, desired) + + def test_choice_nonuniform_replace(self): + random = Generator(MT19937(self.seed)) + actual = random.choice(4, 4, p=[0.4, 0.4, 0.1, 0.1]) + desired = np.array([0, 1, 0, 1], dtype=np.int64) + assert_array_equal(actual, desired) + + def test_choice_uniform_noreplace(self): + random = Generator(MT19937(self.seed)) + actual = random.choice(4, 3, replace=False) + desired = np.array([2, 0, 3], dtype=np.int64) + assert_array_equal(actual, desired) + actual = random.choice(4, 4, replace=False, shuffle=False) + desired = np.arange(4, dtype=np.int64) + assert_array_equal(actual, desired) + + def test_choice_nonuniform_noreplace(self): + random = Generator(MT19937(self.seed)) + actual = random.choice(4, 3, replace=False, p=[0.1, 0.3, 0.5, 0.1]) + desired = np.array([0, 2, 3], dtype=np.int64) + assert_array_equal(actual, desired) + + def test_choice_noninteger(self): + random = Generator(MT19937(self.seed)) + actual = random.choice(['a', 'b', 'c', 'd'], 4) + desired = np.array(['a', 'a', 'c', 'c']) + assert_array_equal(actual, desired) + + def test_choice_multidimensional_default_axis(self): + random = Generator(MT19937(self.seed)) + actual = random.choice([[0, 1], [2, 3], [4, 5], [6, 7]], 3) + desired = np.array([[0, 1], [0, 1], [4, 5]]) + assert_array_equal(actual, desired) + + def test_choice_multidimensional_custom_axis(self): + random = Generator(MT19937(self.seed)) + actual = random.choice([[0, 1], [2, 3], [4, 5], [6, 7]], 1, axis=1) + desired = np.array([[0], [2], [4], [6]]) + assert_array_equal(actual, desired) + + def test_choice_exceptions(self): + sample = random.choice + assert_raises(ValueError, sample, -1, 3) + assert_raises(ValueError, sample, 3., 3) + assert_raises(ValueError, sample, [], 3) + assert_raises(ValueError, sample, [1, 2, 3, 4], 3, + p=[[0.25, 0.25], [0.25, 0.25]]) + assert_raises(ValueError, sample, [1, 2], 3, p=[0.4, 0.4, 0.2]) + assert_raises(ValueError, sample, [1, 2], 3, p=[1.1, -0.1]) + assert_raises(ValueError, sample, [1, 2], 3, p=[0.4, 0.4]) + assert_raises(ValueError, sample, [1, 2, 3], 4, replace=False) + # gh-13087 + assert_raises(ValueError, sample, [1, 2, 3], -2, replace=False) + assert_raises(ValueError, sample, [1, 2, 3], (-1,), replace=False) + assert_raises(ValueError, sample, [1, 2, 3], (-1, 1), replace=False) + assert_raises(ValueError, sample, [1, 2, 3], 2, + replace=False, p=[1, 0, 0]) + + def test_choice_return_shape(self): + p = [0.1, 0.9] + # Check scalar + assert_(np.isscalar(random.choice(2, replace=True))) + assert_(np.isscalar(random.choice(2, replace=False))) + assert_(np.isscalar(random.choice(2, replace=True, p=p))) + assert_(np.isscalar(random.choice(2, replace=False, p=p))) + assert_(np.isscalar(random.choice([1, 2], replace=True))) + assert_(random.choice([None], replace=True) is None) + a = np.array([1, 2]) + arr = np.empty(1, dtype=object) + arr[0] = a + assert_(random.choice(arr, replace=True) is a) + + # Check 0-d array + s = tuple() + assert_(not np.isscalar(random.choice(2, s, replace=True))) + assert_(not np.isscalar(random.choice(2, s, replace=False))) + assert_(not np.isscalar(random.choice(2, s, replace=True, p=p))) + assert_(not np.isscalar(random.choice(2, s, replace=False, p=p))) + assert_(not np.isscalar(random.choice([1, 2], s, replace=True))) + assert_(random.choice([None], s, replace=True).ndim == 0) + a = np.array([1, 2]) + arr = np.empty(1, dtype=object) + arr[0] = a + assert_(random.choice(arr, s, replace=True).item() is a) + + # Check multi dimensional array + s = (2, 3) + p = [0.1, 0.1, 0.1, 0.1, 0.4, 0.2] + assert_equal(random.choice(6, s, replace=True).shape, s) + assert_equal(random.choice(6, s, replace=False).shape, s) + assert_equal(random.choice(6, s, replace=True, p=p).shape, s) + assert_equal(random.choice(6, s, replace=False, p=p).shape, s) + assert_equal(random.choice(np.arange(6), s, replace=True).shape, s) + + # Check zero-size + assert_equal(random.integers(0, 0, size=(3, 0, 4)).shape, (3, 0, 4)) + assert_equal(random.integers(0, -10, size=0).shape, (0,)) + assert_equal(random.integers(10, 10, size=0).shape, (0,)) + assert_equal(random.choice(0, size=0).shape, (0,)) + assert_equal(random.choice([], size=(0,)).shape, (0,)) + assert_equal(random.choice(['a', 'b'], size=(3, 0, 4)).shape, + (3, 0, 4)) + assert_raises(ValueError, random.choice, [], 10) + + def test_choice_nan_probabilities(self): + a = np.array([42, 1, 2]) + p = [None, None, None] + assert_raises(ValueError, random.choice, a, p=p) + + def test_choice_p_non_contiguous(self): + p = np.ones(10) / 5 + p[1::2] = 3.0 + random = Generator(MT19937(self.seed)) + non_contig = random.choice(5, 3, p=p[::2]) + random = Generator(MT19937(self.seed)) + contig = random.choice(5, 3, p=np.ascontiguousarray(p[::2])) + assert_array_equal(non_contig, contig) + + def test_choice_return_type(self): + # gh 9867 + p = np.ones(4) / 4. + actual = random.choice(4, 2) + assert actual.dtype == np.int64 + actual = random.choice(4, 2, replace=False) + assert actual.dtype == np.int64 + actual = random.choice(4, 2, p=p) + assert actual.dtype == np.int64 + actual = random.choice(4, 2, p=p, replace=False) + assert actual.dtype == np.int64 + + def test_choice_large_sample(self): + choice_hash = '4266599d12bfcfb815213303432341c06b4349f5455890446578877bb322e222' + random = Generator(MT19937(self.seed)) + actual = random.choice(10000, 5000, replace=False) + if sys.byteorder != 'little': + actual = actual.byteswap() + res = hashlib.sha256(actual.view(np.int8)).hexdigest() + assert_(choice_hash == res) + + def test_bytes(self): + random = Generator(MT19937(self.seed)) + actual = random.bytes(10) + desired = b'\x86\xf0\xd4\x18\xe1\x81\t8%\xdd' + assert_equal(actual, desired) + + def test_shuffle(self): + # Test lists, arrays (of various dtypes), and multidimensional versions + # of both, c-contiguous or not: + for conv in [lambda x: np.array([]), + lambda x: x, + lambda x: np.asarray(x).astype(np.int8), + lambda x: np.asarray(x).astype(np.float32), + lambda x: np.asarray(x).astype(np.complex64), + lambda x: np.asarray(x).astype(object), + lambda x: [(i, i) for i in x], + lambda x: np.asarray([[i, i] for i in x]), + lambda x: np.vstack([x, x]).T, + # gh-11442 + lambda x: (np.asarray([(i, i) for i in x], + [("a", int), ("b", int)]) + .view(np.recarray)), + # gh-4270 + lambda x: np.asarray([(i, i) for i in x], + [("a", object, (1,)), + ("b", np.int32, (1,))])]: + random = Generator(MT19937(self.seed)) + alist = conv([1, 2, 3, 4, 5, 6, 7, 8, 9, 0]) + random.shuffle(alist) + actual = alist + desired = conv([4, 1, 9, 8, 0, 5, 3, 6, 2, 7]) + assert_array_equal(actual, desired) + + def test_shuffle_custom_axis(self): + random = Generator(MT19937(self.seed)) + actual = np.arange(16).reshape((4, 4)) + random.shuffle(actual, axis=1) + desired = np.array([[ 0, 3, 1, 2], + [ 4, 7, 5, 6], + [ 8, 11, 9, 10], + [12, 15, 13, 14]]) + assert_array_equal(actual, desired) + random = Generator(MT19937(self.seed)) + actual = np.arange(16).reshape((4, 4)) + random.shuffle(actual, axis=-1) + assert_array_equal(actual, desired) + + def test_shuffle_custom_axis_empty(self): + random = Generator(MT19937(self.seed)) + desired = np.array([]).reshape((0, 6)) + for axis in (0, 1): + actual = np.array([]).reshape((0, 6)) + random.shuffle(actual, axis=axis) + assert_array_equal(actual, desired) + + def test_shuffle_axis_nonsquare(self): + y1 = np.arange(20).reshape(2, 10) + y2 = y1.copy() + random = Generator(MT19937(self.seed)) + random.shuffle(y1, axis=1) + random = Generator(MT19937(self.seed)) + random.shuffle(y2.T) + assert_array_equal(y1, y2) + + def test_shuffle_masked(self): + # gh-3263 + a = np.ma.masked_values(np.reshape(range(20), (5, 4)) % 3 - 1, -1) + b = np.ma.masked_values(np.arange(20) % 3 - 1, -1) + a_orig = a.copy() + b_orig = b.copy() + for i in range(50): + random.shuffle(a) + assert_equal( + sorted(a.data[~a.mask]), sorted(a_orig.data[~a_orig.mask])) + random.shuffle(b) + assert_equal( + sorted(b.data[~b.mask]), sorted(b_orig.data[~b_orig.mask])) + + def test_shuffle_exceptions(self): + random = Generator(MT19937(self.seed)) + arr = np.arange(10) + assert_raises(np.AxisError, random.shuffle, arr, 1) + arr = np.arange(9).reshape((3, 3)) + assert_raises(np.AxisError, random.shuffle, arr, 3) + assert_raises(TypeError, random.shuffle, arr, slice(1, 2, None)) + arr = [[1, 2, 3], [4, 5, 6]] + assert_raises(NotImplementedError, random.shuffle, arr, 1) + + arr = np.array(3) + assert_raises(TypeError, random.shuffle, arr) + arr = np.ones((3, 2)) + assert_raises(np.AxisError, random.shuffle, arr, 2) + + def test_permutation(self): + random = Generator(MT19937(self.seed)) + alist = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] + actual = random.permutation(alist) + desired = [4, 1, 9, 8, 0, 5, 3, 6, 2, 7] + assert_array_equal(actual, desired) + + random = Generator(MT19937(self.seed)) + arr_2d = np.atleast_2d([1, 2, 3, 4, 5, 6, 7, 8, 9, 0]).T + actual = random.permutation(arr_2d) + assert_array_equal(actual, np.atleast_2d(desired).T) + + bad_x_str = "abcd" + assert_raises(np.AxisError, random.permutation, bad_x_str) + + bad_x_float = 1.2 + assert_raises(np.AxisError, random.permutation, bad_x_float) + + random = Generator(MT19937(self.seed)) + integer_val = 10 + desired = [3, 0, 8, 7, 9, 4, 2, 5, 1, 6] + + actual = random.permutation(integer_val) + assert_array_equal(actual, desired) + + def test_permutation_custom_axis(self): + a = np.arange(16).reshape((4, 4)) + desired = np.array([[ 0, 3, 1, 2], + [ 4, 7, 5, 6], + [ 8, 11, 9, 10], + [12, 15, 13, 14]]) + random = Generator(MT19937(self.seed)) + actual = random.permutation(a, axis=1) + assert_array_equal(actual, desired) + random = Generator(MT19937(self.seed)) + actual = random.permutation(a, axis=-1) + assert_array_equal(actual, desired) + + def test_permutation_exceptions(self): + random = Generator(MT19937(self.seed)) + arr = np.arange(10) + assert_raises(np.AxisError, random.permutation, arr, 1) + arr = np.arange(9).reshape((3, 3)) + assert_raises(np.AxisError, random.permutation, arr, 3) + assert_raises(TypeError, random.permutation, arr, slice(1, 2, None)) + + @pytest.mark.parametrize("dtype", [int, object]) + @pytest.mark.parametrize("axis, expected", + [(None, np.array([[3, 7, 0, 9, 10, 11], + [8, 4, 2, 5, 1, 6]])), + (0, np.array([[6, 1, 2, 9, 10, 11], + [0, 7, 8, 3, 4, 5]])), + (1, np.array([[ 5, 3, 4, 0, 2, 1], + [11, 9, 10, 6, 8, 7]]))]) + def test_permuted(self, dtype, axis, expected): + random = Generator(MT19937(self.seed)) + x = np.arange(12).reshape(2, 6).astype(dtype) + random.permuted(x, axis=axis, out=x) + assert_array_equal(x, expected) + + random = Generator(MT19937(self.seed)) + x = np.arange(12).reshape(2, 6).astype(dtype) + y = random.permuted(x, axis=axis) + assert y.dtype == dtype + assert_array_equal(y, expected) + + def test_permuted_with_strides(self): + random = Generator(MT19937(self.seed)) + x0 = np.arange(22).reshape(2, 11) + x1 = x0.copy() + x = x0[:, ::3] + y = random.permuted(x, axis=1, out=x) + expected = np.array([[0, 9, 3, 6], + [14, 20, 11, 17]]) + assert_array_equal(y, expected) + x1[:, ::3] = expected + # Verify that the original x0 was modified in-place as expected. + assert_array_equal(x1, x0) + + def test_permuted_empty(self): + y = random.permuted([]) + assert_array_equal(y, []) + + @pytest.mark.parametrize('outshape', [(2, 3), 5]) + def test_permuted_out_with_wrong_shape(self, outshape): + a = np.array([1, 2, 3]) + out = np.zeros(outshape, dtype=a.dtype) + with pytest.raises(ValueError, match='same shape'): + random.permuted(a, out=out) + + def test_permuted_out_with_wrong_type(self): + out = np.zeros((3, 5), dtype=np.int32) + x = np.ones((3, 5)) + with pytest.raises(TypeError, match='Cannot cast'): + random.permuted(x, axis=1, out=out) + + def test_beta(self): + random = Generator(MT19937(self.seed)) + actual = random.beta(.1, .9, size=(3, 2)) + desired = np.array( + [[1.083029353267698e-10, 2.449965303168024e-11], + [2.397085162969853e-02, 3.590779671820755e-08], + [2.830254190078299e-04, 1.744709918330393e-01]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_binomial(self): + random = Generator(MT19937(self.seed)) + actual = random.binomial(100.123, .456, size=(3, 2)) + desired = np.array([[42, 41], + [42, 48], + [44, 50]]) + assert_array_equal(actual, desired) + + random = Generator(MT19937(self.seed)) + actual = random.binomial(100.123, .456) + desired = 42 + assert_array_equal(actual, desired) + + def test_chisquare(self): + random = Generator(MT19937(self.seed)) + actual = random.chisquare(50, size=(3, 2)) + desired = np.array([[32.9850547060149, 39.0219480493301], + [56.2006134779419, 57.3474165711485], + [55.4243733880198, 55.4209797925213]]) + assert_array_almost_equal(actual, desired, decimal=13) + + def test_dirichlet(self): + random = Generator(MT19937(self.seed)) + alpha = np.array([51.72840233779265162, 39.74494232180943953]) + actual = random.dirichlet(alpha, size=(3, 2)) + desired = np.array([[[0.5439892869558927, 0.45601071304410745], + [0.5588917345860708, 0.4411082654139292 ]], + [[0.5632074165063435, 0.43679258349365657], + [0.54862581112627, 0.45137418887373015]], + [[0.49961831357047226, 0.5003816864295278 ], + [0.52374806183482, 0.47625193816517997]]]) + assert_array_almost_equal(actual, desired, decimal=15) + bad_alpha = np.array([5.4e-01, -1.0e-16]) + assert_raises(ValueError, random.dirichlet, bad_alpha) + + random = Generator(MT19937(self.seed)) + alpha = np.array([51.72840233779265162, 39.74494232180943953]) + actual = random.dirichlet(alpha) + assert_array_almost_equal(actual, desired[0, 0], decimal=15) + + def test_dirichlet_size(self): + # gh-3173 + p = np.array([51.72840233779265162, 39.74494232180943953]) + assert_equal(random.dirichlet(p, np.uint32(1)).shape, (1, 2)) + assert_equal(random.dirichlet(p, np.uint32(1)).shape, (1, 2)) + assert_equal(random.dirichlet(p, np.uint32(1)).shape, (1, 2)) + assert_equal(random.dirichlet(p, [2, 2]).shape, (2, 2, 2)) + assert_equal(random.dirichlet(p, (2, 2)).shape, (2, 2, 2)) + assert_equal(random.dirichlet(p, np.array((2, 2))).shape, (2, 2, 2)) + + assert_raises(TypeError, random.dirichlet, p, float(1)) + + def test_dirichlet_bad_alpha(self): + # gh-2089 + alpha = np.array([5.4e-01, -1.0e-16]) + assert_raises(ValueError, random.dirichlet, alpha) + + # gh-15876 + assert_raises(ValueError, random.dirichlet, [[5, 1]]) + assert_raises(ValueError, random.dirichlet, [[5], [1]]) + assert_raises(ValueError, random.dirichlet, [[[5], [1]], [[1], [5]]]) + assert_raises(ValueError, random.dirichlet, np.array([[5, 1], [1, 5]])) + + def test_dirichlet_alpha_non_contiguous(self): + a = np.array([51.72840233779265162, -1.0, 39.74494232180943953]) + alpha = a[::2] + random = Generator(MT19937(self.seed)) + non_contig = random.dirichlet(alpha, size=(3, 2)) + random = Generator(MT19937(self.seed)) + contig = random.dirichlet(np.ascontiguousarray(alpha), + size=(3, 2)) + assert_array_almost_equal(non_contig, contig) + + def test_dirichlet_small_alpha(self): + eps = 1.0e-9 # 1.0e-10 -> runtime x 10; 1e-11 -> runtime x 200, etc. + alpha = eps * np.array([1., 1.0e-3]) + random = Generator(MT19937(self.seed)) + actual = random.dirichlet(alpha, size=(3, 2)) + expected = np.array([ + [[1., 0.], + [1., 0.]], + [[1., 0.], + [1., 0.]], + [[1., 0.], + [1., 0.]] + ]) + assert_array_almost_equal(actual, expected, decimal=15) + + @pytest.mark.slow + def test_dirichlet_moderately_small_alpha(self): + # Use alpha.max() < 0.1 to trigger stick breaking code path + alpha = np.array([0.02, 0.04, 0.03]) + exact_mean = alpha / alpha.sum() + random = Generator(MT19937(self.seed)) + sample = random.dirichlet(alpha, size=20000000) + sample_mean = sample.mean(axis=0) + assert_allclose(sample_mean, exact_mean, rtol=1e-3) + + def test_exponential(self): + random = Generator(MT19937(self.seed)) + actual = random.exponential(1.1234, size=(3, 2)) + desired = np.array([[0.098845481066258, 1.560752510746964], + [0.075730916041636, 1.769098974710777], + [1.488602544592235, 2.49684815275751 ]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_exponential_0(self): + assert_equal(random.exponential(scale=0), 0) + assert_raises(ValueError, random.exponential, scale=-0.) + + def test_f(self): + random = Generator(MT19937(self.seed)) + actual = random.f(12, 77, size=(3, 2)) + desired = np.array([[0.461720027077085, 1.100441958872451], + [1.100337455217484, 0.91421736740018 ], + [0.500811891303113, 0.826802454552058]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_gamma(self): + random = Generator(MT19937(self.seed)) + actual = random.gamma(5, 3, size=(3, 2)) + desired = np.array([[ 5.03850858902096, 7.9228656732049 ], + [18.73983605132985, 19.57961681699238], + [18.17897755150825, 18.17653912505234]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_gamma_0(self): + assert_equal(random.gamma(shape=0, scale=0), 0) + assert_raises(ValueError, random.gamma, shape=-0., scale=-0.) + + def test_geometric(self): + random = Generator(MT19937(self.seed)) + actual = random.geometric(.123456789, size=(3, 2)) + desired = np.array([[1, 11], + [1, 12], + [11, 17]]) + assert_array_equal(actual, desired) + + def test_geometric_exceptions(self): + assert_raises(ValueError, random.geometric, 1.1) + assert_raises(ValueError, random.geometric, [1.1] * 10) + assert_raises(ValueError, random.geometric, -0.1) + assert_raises(ValueError, random.geometric, [-0.1] * 10) + with np.errstate(invalid='ignore'): + assert_raises(ValueError, random.geometric, np.nan) + assert_raises(ValueError, random.geometric, [np.nan] * 10) + + def test_gumbel(self): + random = Generator(MT19937(self.seed)) + actual = random.gumbel(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[ 4.688397515056245, -0.289514845417841], + [ 4.981176042584683, -0.633224272589149], + [-0.055915275687488, -0.333962478257953]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_gumbel_0(self): + assert_equal(random.gumbel(scale=0), 0) + assert_raises(ValueError, random.gumbel, scale=-0.) + + def test_hypergeometric(self): + random = Generator(MT19937(self.seed)) + actual = random.hypergeometric(10.1, 5.5, 14, size=(3, 2)) + desired = np.array([[ 9, 9], + [ 9, 9], + [10, 9]]) + assert_array_equal(actual, desired) + + # Test nbad = 0 + actual = random.hypergeometric(5, 0, 3, size=4) + desired = np.array([3, 3, 3, 3]) + assert_array_equal(actual, desired) + + actual = random.hypergeometric(15, 0, 12, size=4) + desired = np.array([12, 12, 12, 12]) + assert_array_equal(actual, desired) + + # Test ngood = 0 + actual = random.hypergeometric(0, 5, 3, size=4) + desired = np.array([0, 0, 0, 0]) + assert_array_equal(actual, desired) + + actual = random.hypergeometric(0, 15, 12, size=4) + desired = np.array([0, 0, 0, 0]) + assert_array_equal(actual, desired) + + def test_laplace(self): + random = Generator(MT19937(self.seed)) + actual = random.laplace(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[-3.156353949272393, 1.195863024830054], + [-3.435458081645966, 1.656882398925444], + [ 0.924824032467446, 1.251116432209336]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_laplace_0(self): + assert_equal(random.laplace(scale=0), 0) + assert_raises(ValueError, random.laplace, scale=-0.) + + def test_logistic(self): + random = Generator(MT19937(self.seed)) + actual = random.logistic(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[-4.338584631510999, 1.890171436749954], + [-4.64547787337966 , 2.514545562919217], + [ 1.495389489198666, 1.967827627577474]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_lognormal(self): + random = Generator(MT19937(self.seed)) + actual = random.lognormal(mean=.123456789, sigma=2.0, size=(3, 2)) + desired = np.array([[ 0.0268252166335, 13.9534486483053], + [ 0.1204014788936, 2.2422077497792], + [ 4.2484199496128, 12.0093343977523]]) + assert_array_almost_equal(actual, desired, decimal=13) + + def test_lognormal_0(self): + assert_equal(random.lognormal(sigma=0), 1) + assert_raises(ValueError, random.lognormal, sigma=-0.) + + def test_logseries(self): + random = Generator(MT19937(self.seed)) + actual = random.logseries(p=.923456789, size=(3, 2)) + desired = np.array([[14, 17], + [3, 18], + [5, 1]]) + assert_array_equal(actual, desired) + + def test_logseries_exceptions(self): + with np.errstate(invalid='ignore'): + assert_raises(ValueError, random.logseries, np.nan) + assert_raises(ValueError, random.logseries, [np.nan] * 10) + + def test_multinomial(self): + random = Generator(MT19937(self.seed)) + actual = random.multinomial(20, [1 / 6.] * 6, size=(3, 2)) + desired = np.array([[[1, 5, 1, 6, 4, 3], + [4, 2, 6, 2, 4, 2]], + [[5, 3, 2, 6, 3, 1], + [4, 4, 0, 2, 3, 7]], + [[6, 3, 1, 5, 3, 2], + [5, 5, 3, 1, 2, 4]]]) + assert_array_equal(actual, desired) + + @pytest.mark.parametrize("method", ["svd", "eigh", "cholesky"]) + def test_multivariate_normal(self, method): + random = Generator(MT19937(self.seed)) + mean = (.123456789, 10) + cov = [[1, 0], [0, 1]] + size = (3, 2) + actual = random.multivariate_normal(mean, cov, size, method=method) + desired = np.array([[[-1.747478062846581, 11.25613495182354 ], + [-0.9967333370066214, 10.342002097029821 ]], + [[ 0.7850019631242964, 11.181113712443013 ], + [ 0.8901349653255224, 8.873825399642492 ]], + [[ 0.7130260107430003, 9.551628690083056 ], + [ 0.7127098726541128, 11.991709234143173 ]]]) + + assert_array_almost_equal(actual, desired, decimal=15) + + # Check for default size, was raising deprecation warning + actual = random.multivariate_normal(mean, cov, method=method) + desired = np.array([0.233278563284287, 9.424140804347195]) + assert_array_almost_equal(actual, desired, decimal=15) + # Check that non symmetric covariance input raises exception when + # check_valid='raises' if using default svd method. + mean = [0, 0] + cov = [[1, 2], [1, 2]] + assert_raises(ValueError, random.multivariate_normal, mean, cov, + check_valid='raise') + + # Check that non positive-semidefinite covariance warns with + # RuntimeWarning + cov = [[1, 2], [2, 1]] + assert_warns(RuntimeWarning, random.multivariate_normal, mean, cov) + assert_warns(RuntimeWarning, random.multivariate_normal, mean, cov, + method='eigh') + assert_raises(LinAlgError, random.multivariate_normal, mean, cov, + method='cholesky') + + # and that it doesn't warn with RuntimeWarning check_valid='ignore' + assert_no_warnings(random.multivariate_normal, mean, cov, + check_valid='ignore') + + # and that it raises with RuntimeWarning check_valid='raises' + assert_raises(ValueError, random.multivariate_normal, mean, cov, + check_valid='raise') + assert_raises(ValueError, random.multivariate_normal, mean, cov, + check_valid='raise', method='eigh') + + # check degenerate samples from singular covariance matrix + cov = [[1, 1], [1, 1]] + if method in ('svd', 'eigh'): + samples = random.multivariate_normal(mean, cov, size=(3, 2), + method=method) + assert_array_almost_equal(samples[..., 0], samples[..., 1], + decimal=6) + else: + assert_raises(LinAlgError, random.multivariate_normal, mean, cov, + method='cholesky') + + cov = np.array([[1, 0.1], [0.1, 1]], dtype=np.float32) + with suppress_warnings() as sup: + random.multivariate_normal(mean, cov, method=method) + w = sup.record(RuntimeWarning) + assert len(w) == 0 + + mu = np.zeros(2) + cov = np.eye(2) + assert_raises(ValueError, random.multivariate_normal, mean, cov, + check_valid='other') + assert_raises(ValueError, random.multivariate_normal, + np.zeros((2, 1, 1)), cov) + assert_raises(ValueError, random.multivariate_normal, + mu, np.empty((3, 2))) + assert_raises(ValueError, random.multivariate_normal, + mu, np.eye(3)) + + @pytest.mark.parametrize("method", ["svd", "eigh", "cholesky"]) + def test_multivariate_normal_basic_stats(self, method): + random = Generator(MT19937(self.seed)) + n_s = 1000 + mean = np.array([1, 2]) + cov = np.array([[2, 1], [1, 2]]) + s = random.multivariate_normal(mean, cov, size=(n_s,), method=method) + s_center = s - mean + cov_emp = (s_center.T @ s_center) / (n_s - 1) + # these are pretty loose and are only designed to detect major errors + assert np.all(np.abs(s_center.mean(-2)) < 0.1) + assert np.all(np.abs(cov_emp - cov) < 0.2) + + def test_negative_binomial(self): + random = Generator(MT19937(self.seed)) + actual = random.negative_binomial(n=100, p=.12345, size=(3, 2)) + desired = np.array([[543, 727], + [775, 760], + [600, 674]]) + assert_array_equal(actual, desired) + + def test_negative_binomial_exceptions(self): + with np.errstate(invalid='ignore'): + assert_raises(ValueError, random.negative_binomial, 100, np.nan) + assert_raises(ValueError, random.negative_binomial, 100, + [np.nan] * 10) + + def test_negative_binomial_p0_exception(self): + # Verify that p=0 raises an exception. + with assert_raises(ValueError): + x = random.negative_binomial(1, 0) + + def test_noncentral_chisquare(self): + random = Generator(MT19937(self.seed)) + actual = random.noncentral_chisquare(df=5, nonc=5, size=(3, 2)) + desired = np.array([[ 1.70561552362133, 15.97378184942111], + [13.71483425173724, 20.17859633310629], + [11.3615477156643 , 3.67891108738029]]) + assert_array_almost_equal(actual, desired, decimal=14) + + actual = random.noncentral_chisquare(df=.5, nonc=.2, size=(3, 2)) + desired = np.array([[9.41427665607629e-04, 1.70473157518850e-04], + [1.14554372041263e+00, 1.38187755933435e-03], + [1.90659181905387e+00, 1.21772577941822e+00]]) + assert_array_almost_equal(actual, desired, decimal=14) + + random = Generator(MT19937(self.seed)) + actual = random.noncentral_chisquare(df=5, nonc=0, size=(3, 2)) + desired = np.array([[0.82947954590419, 1.80139670767078], + [6.58720057417794, 7.00491463609814], + [6.31101879073157, 6.30982307753005]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_noncentral_f(self): + random = Generator(MT19937(self.seed)) + actual = random.noncentral_f(dfnum=5, dfden=2, nonc=1, + size=(3, 2)) + desired = np.array([[0.060310671139 , 0.23866058175939], + [0.86860246709073, 0.2668510459738 ], + [0.23375780078364, 1.88922102885943]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_noncentral_f_nan(self): + random = Generator(MT19937(self.seed)) + actual = random.noncentral_f(dfnum=5, dfden=2, nonc=np.nan) + assert np.isnan(actual) + + def test_normal(self): + random = Generator(MT19937(self.seed)) + actual = random.normal(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[-3.618412914693162, 2.635726692647081], + [-2.116923463013243, 0.807460983059643], + [ 1.446547137248593, 2.485684213886024]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_normal_0(self): + assert_equal(random.normal(scale=0), 0) + assert_raises(ValueError, random.normal, scale=-0.) + + def test_pareto(self): + random = Generator(MT19937(self.seed)) + actual = random.pareto(a=.123456789, size=(3, 2)) + desired = np.array([[1.0394926776069018e+00, 7.7142534343505773e+04], + [7.2640150889064703e-01, 3.4650454783825594e+05], + [4.5852344481994740e+04, 6.5851383009539105e+07]]) + # For some reason on 32-bit x86 Ubuntu 12.10 the [1, 0] entry in this + # matrix differs by 24 nulps. Discussion: + # https://mail.python.org/pipermail/numpy-discussion/2012-September/063801.html + # Consensus is that this is probably some gcc quirk that affects + # rounding but not in any important way, so we just use a looser + # tolerance on this test: + np.testing.assert_array_almost_equal_nulp(actual, desired, nulp=30) + + def test_poisson(self): + random = Generator(MT19937(self.seed)) + actual = random.poisson(lam=.123456789, size=(3, 2)) + desired = np.array([[0, 0], + [0, 0], + [0, 0]]) + assert_array_equal(actual, desired) + + def test_poisson_exceptions(self): + lambig = np.iinfo('int64').max + lamneg = -1 + assert_raises(ValueError, random.poisson, lamneg) + assert_raises(ValueError, random.poisson, [lamneg] * 10) + assert_raises(ValueError, random.poisson, lambig) + assert_raises(ValueError, random.poisson, [lambig] * 10) + with np.errstate(invalid='ignore'): + assert_raises(ValueError, random.poisson, np.nan) + assert_raises(ValueError, random.poisson, [np.nan] * 10) + + def test_power(self): + random = Generator(MT19937(self.seed)) + actual = random.power(a=.123456789, size=(3, 2)) + desired = np.array([[1.977857368842754e-09, 9.806792196620341e-02], + [2.482442984543471e-10, 1.527108843266079e-01], + [8.188283434244285e-02, 3.950547209346948e-01]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_rayleigh(self): + random = Generator(MT19937(self.seed)) + actual = random.rayleigh(scale=10, size=(3, 2)) + desired = np.array([[4.19494429102666, 16.66920198906598], + [3.67184544902662, 17.74695521962917], + [16.27935397855501, 21.08355560691792]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_rayleigh_0(self): + assert_equal(random.rayleigh(scale=0), 0) + assert_raises(ValueError, random.rayleigh, scale=-0.) + + def test_standard_cauchy(self): + random = Generator(MT19937(self.seed)) + actual = random.standard_cauchy(size=(3, 2)) + desired = np.array([[-1.489437778266206, -3.275389641569784], + [ 0.560102864910406, -0.680780916282552], + [-1.314912905226277, 0.295852965660225]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_standard_exponential(self): + random = Generator(MT19937(self.seed)) + actual = random.standard_exponential(size=(3, 2), method='inv') + desired = np.array([[0.102031839440643, 1.229350298474972], + [0.088137284693098, 1.459859985522667], + [1.093830802293668, 1.256977002164613]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_standard_expoential_type_error(self): + assert_raises(TypeError, random.standard_exponential, dtype=np.int32) + + def test_standard_gamma(self): + random = Generator(MT19937(self.seed)) + actual = random.standard_gamma(shape=3, size=(3, 2)) + desired = np.array([[0.62970724056362, 1.22379851271008], + [3.899412530884 , 4.12479964250139], + [3.74994102464584, 3.74929307690815]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_standard_gammma_scalar_float(self): + random = Generator(MT19937(self.seed)) + actual = random.standard_gamma(3, dtype=np.float32) + desired = 2.9242148399353027 + assert_array_almost_equal(actual, desired, decimal=6) + + def test_standard_gamma_float(self): + random = Generator(MT19937(self.seed)) + actual = random.standard_gamma(shape=3, size=(3, 2)) + desired = np.array([[0.62971, 1.2238 ], + [3.89941, 4.1248 ], + [3.74994, 3.74929]]) + assert_array_almost_equal(actual, desired, decimal=5) + + def test_standard_gammma_float_out(self): + actual = np.zeros((3, 2), dtype=np.float32) + random = Generator(MT19937(self.seed)) + random.standard_gamma(10.0, out=actual, dtype=np.float32) + desired = np.array([[10.14987, 7.87012], + [ 9.46284, 12.56832], + [13.82495, 7.81533]], dtype=np.float32) + assert_array_almost_equal(actual, desired, decimal=5) + + random = Generator(MT19937(self.seed)) + random.standard_gamma(10.0, out=actual, size=(3, 2), dtype=np.float32) + assert_array_almost_equal(actual, desired, decimal=5) + + def test_standard_gamma_unknown_type(self): + assert_raises(TypeError, random.standard_gamma, 1., + dtype='int32') + + def test_out_size_mismatch(self): + out = np.zeros(10) + assert_raises(ValueError, random.standard_gamma, 10.0, size=20, + out=out) + assert_raises(ValueError, random.standard_gamma, 10.0, size=(10, 1), + out=out) + + def test_standard_gamma_0(self): + assert_equal(random.standard_gamma(shape=0), 0) + assert_raises(ValueError, random.standard_gamma, shape=-0.) + + def test_standard_normal(self): + random = Generator(MT19937(self.seed)) + actual = random.standard_normal(size=(3, 2)) + desired = np.array([[-1.870934851846581, 1.25613495182354 ], + [-1.120190126006621, 0.342002097029821], + [ 0.661545174124296, 1.181113712443012]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_standard_normal_unsupported_type(self): + assert_raises(TypeError, random.standard_normal, dtype=np.int32) + + def test_standard_t(self): + random = Generator(MT19937(self.seed)) + actual = random.standard_t(df=10, size=(3, 2)) + desired = np.array([[-1.484666193042647, 0.30597891831161 ], + [ 1.056684299648085, -0.407312602088507], + [ 0.130704414281157, -2.038053410490321]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_triangular(self): + random = Generator(MT19937(self.seed)) + actual = random.triangular(left=5.12, mode=10.23, right=20.34, + size=(3, 2)) + desired = np.array([[ 7.86664070590917, 13.6313848513185 ], + [ 7.68152445215983, 14.36169131136546], + [13.16105603911429, 13.72341621856971]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_uniform(self): + random = Generator(MT19937(self.seed)) + actual = random.uniform(low=1.23, high=10.54, size=(3, 2)) + desired = np.array([[2.13306255040998 , 7.816987531021207], + [2.015436610109887, 8.377577533009589], + [7.421792588856135, 7.891185744455209]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_uniform_range_bounds(self): + fmin = np.finfo('float').min + fmax = np.finfo('float').max + + func = random.uniform + assert_raises(OverflowError, func, -np.inf, 0) + assert_raises(OverflowError, func, 0, np.inf) + assert_raises(OverflowError, func, fmin, fmax) + assert_raises(OverflowError, func, [-np.inf], [0]) + assert_raises(OverflowError, func, [0], [np.inf]) + + # (fmax / 1e17) - fmin is within range, so this should not throw + # account for i386 extended precision DBL_MAX / 1e17 + DBL_MAX > + # DBL_MAX by increasing fmin a bit + random.uniform(low=np.nextafter(fmin, 1), high=fmax / 1e17) + + def test_uniform_zero_range(self): + func = random.uniform + result = func(1.5, 1.5) + assert_allclose(result, 1.5) + result = func([0.0, np.pi], [0.0, np.pi]) + assert_allclose(result, [0.0, np.pi]) + result = func([[2145.12], [2145.12]], [2145.12, 2145.12]) + assert_allclose(result, 2145.12 + np.zeros((2, 2))) + + def test_uniform_neg_range(self): + func = random.uniform + assert_raises(ValueError, func, 2, 1) + assert_raises(ValueError, func, [1, 2], [1, 1]) + assert_raises(ValueError, func, [[0, 1],[2, 3]], 2) + + def test_scalar_exception_propagation(self): + # Tests that exceptions are correctly propagated in distributions + # when called with objects that throw exceptions when converted to + # scalars. + # + # Regression test for gh: 8865 + + class ThrowingFloat(np.ndarray): + def __float__(self): + raise TypeError + + throwing_float = np.array(1.0).view(ThrowingFloat) + assert_raises(TypeError, random.uniform, throwing_float, + throwing_float) + + class ThrowingInteger(np.ndarray): + def __int__(self): + raise TypeError + + throwing_int = np.array(1).view(ThrowingInteger) + assert_raises(TypeError, random.hypergeometric, throwing_int, 1, 1) + + def test_vonmises(self): + random = Generator(MT19937(self.seed)) + actual = random.vonmises(mu=1.23, kappa=1.54, size=(3, 2)) + desired = np.array([[ 1.107972248690106, 2.841536476232361], + [ 1.832602376042457, 1.945511926976032], + [-0.260147475776542, 2.058047492231698]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_vonmises_small(self): + # check infinite loop, gh-4720 + random = Generator(MT19937(self.seed)) + r = random.vonmises(mu=0., kappa=1.1e-8, size=10**6) + assert_(np.isfinite(r).all()) + + def test_vonmises_nan(self): + random = Generator(MT19937(self.seed)) + r = random.vonmises(mu=0., kappa=np.nan) + assert_(np.isnan(r)) + + @pytest.mark.parametrize("kappa", [1e4, 1e15]) + def test_vonmises_large_kappa(self, kappa): + random = Generator(MT19937(self.seed)) + rs = RandomState(random.bit_generator) + state = random.bit_generator.state + + random_state_vals = rs.vonmises(0, kappa, size=10) + random.bit_generator.state = state + gen_vals = random.vonmises(0, kappa, size=10) + if kappa < 1e6: + assert_allclose(random_state_vals, gen_vals) + else: + assert np.all(random_state_vals != gen_vals) + + @pytest.mark.parametrize("mu", [-7., -np.pi, -3.1, np.pi, 3.2]) + @pytest.mark.parametrize("kappa", [1e-9, 1e-6, 1, 1e3, 1e15]) + def test_vonmises_large_kappa_range(self, mu, kappa): + random = Generator(MT19937(self.seed)) + r = random.vonmises(mu, kappa, 50) + assert_(np.all(r > -np.pi) and np.all(r <= np.pi)) + + def test_wald(self): + random = Generator(MT19937(self.seed)) + actual = random.wald(mean=1.23, scale=1.54, size=(3, 2)) + desired = np.array([[0.26871721804551, 3.2233942732115 ], + [2.20328374987066, 2.40958405189353], + [2.07093587449261, 0.73073890064369]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_weibull(self): + random = Generator(MT19937(self.seed)) + actual = random.weibull(a=1.23, size=(3, 2)) + desired = np.array([[0.138613914769468, 1.306463419753191], + [0.111623365934763, 1.446570494646721], + [1.257145775276011, 1.914247725027957]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_weibull_0(self): + random = Generator(MT19937(self.seed)) + assert_equal(random.weibull(a=0, size=12), np.zeros(12)) + assert_raises(ValueError, random.weibull, a=-0.) + + def test_zipf(self): + random = Generator(MT19937(self.seed)) + actual = random.zipf(a=1.23, size=(3, 2)) + desired = np.array([[ 1, 1], + [ 10, 867], + [354, 2]]) + assert_array_equal(actual, desired) + + +class TestBroadcast: + # tests that functions that broadcast behave + # correctly when presented with non-scalar arguments + def setup(self): + self.seed = 123456789 + + + def test_uniform(self): + random = Generator(MT19937(self.seed)) + low = [0] + high = [1] + uniform = random.uniform + desired = np.array([0.16693771389729, 0.19635129550675, 0.75563050964095]) + + random = Generator(MT19937(self.seed)) + actual = random.uniform(low * 3, high) + assert_array_almost_equal(actual, desired, decimal=14) + + random = Generator(MT19937(self.seed)) + actual = random.uniform(low, high * 3) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_normal(self): + loc = [0] + scale = [1] + bad_scale = [-1] + random = Generator(MT19937(self.seed)) + desired = np.array([-0.38736406738527, 0.79594375042255, 0.0197076236097]) + + random = Generator(MT19937(self.seed)) + actual = random.normal(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, random.normal, loc * 3, bad_scale) + + random = Generator(MT19937(self.seed)) + normal = random.normal + actual = normal(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, normal, loc, bad_scale * 3) + + def test_beta(self): + a = [1] + b = [2] + bad_a = [-1] + bad_b = [-2] + desired = np.array([0.18719338682602, 0.73234824491364, 0.17928615186455]) + + random = Generator(MT19937(self.seed)) + beta = random.beta + actual = beta(a * 3, b) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, beta, bad_a * 3, b) + assert_raises(ValueError, beta, a * 3, bad_b) + + random = Generator(MT19937(self.seed)) + actual = random.beta(a, b * 3) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_exponential(self): + scale = [1] + bad_scale = [-1] + desired = np.array([0.67245993212806, 0.21380495318094, 0.7177848928629]) + + random = Generator(MT19937(self.seed)) + actual = random.exponential(scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, random.exponential, bad_scale * 3) + + def test_standard_gamma(self): + shape = [1] + bad_shape = [-1] + desired = np.array([0.67245993212806, 0.21380495318094, 0.7177848928629]) + + random = Generator(MT19937(self.seed)) + std_gamma = random.standard_gamma + actual = std_gamma(shape * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, std_gamma, bad_shape * 3) + + def test_gamma(self): + shape = [1] + scale = [2] + bad_shape = [-1] + bad_scale = [-2] + desired = np.array([1.34491986425611, 0.42760990636187, 1.4355697857258]) + + random = Generator(MT19937(self.seed)) + gamma = random.gamma + actual = gamma(shape * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gamma, bad_shape * 3, scale) + assert_raises(ValueError, gamma, shape * 3, bad_scale) + + random = Generator(MT19937(self.seed)) + gamma = random.gamma + actual = gamma(shape, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gamma, bad_shape, scale * 3) + assert_raises(ValueError, gamma, shape, bad_scale * 3) + + def test_f(self): + dfnum = [1] + dfden = [2] + bad_dfnum = [-1] + bad_dfden = [-2] + desired = np.array([0.07765056244107, 7.72951397913186, 0.05786093891763]) + + random = Generator(MT19937(self.seed)) + f = random.f + actual = f(dfnum * 3, dfden) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, f, bad_dfnum * 3, dfden) + assert_raises(ValueError, f, dfnum * 3, bad_dfden) + + random = Generator(MT19937(self.seed)) + f = random.f + actual = f(dfnum, dfden * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, f, bad_dfnum, dfden * 3) + assert_raises(ValueError, f, dfnum, bad_dfden * 3) + + def test_noncentral_f(self): + dfnum = [2] + dfden = [3] + nonc = [4] + bad_dfnum = [0] + bad_dfden = [-1] + bad_nonc = [-2] + desired = np.array([2.02434240411421, 12.91838601070124, 1.24395160354629]) + + random = Generator(MT19937(self.seed)) + nonc_f = random.noncentral_f + actual = nonc_f(dfnum * 3, dfden, nonc) + assert_array_almost_equal(actual, desired, decimal=14) + assert np.all(np.isnan(nonc_f(dfnum, dfden, [np.nan] * 3))) + + assert_raises(ValueError, nonc_f, bad_dfnum * 3, dfden, nonc) + assert_raises(ValueError, nonc_f, dfnum * 3, bad_dfden, nonc) + assert_raises(ValueError, nonc_f, dfnum * 3, dfden, bad_nonc) + + random = Generator(MT19937(self.seed)) + nonc_f = random.noncentral_f + actual = nonc_f(dfnum, dfden * 3, nonc) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_f, bad_dfnum, dfden * 3, nonc) + assert_raises(ValueError, nonc_f, dfnum, bad_dfden * 3, nonc) + assert_raises(ValueError, nonc_f, dfnum, dfden * 3, bad_nonc) + + random = Generator(MT19937(self.seed)) + nonc_f = random.noncentral_f + actual = nonc_f(dfnum, dfden, nonc * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_f, bad_dfnum, dfden, nonc * 3) + assert_raises(ValueError, nonc_f, dfnum, bad_dfden, nonc * 3) + assert_raises(ValueError, nonc_f, dfnum, dfden, bad_nonc * 3) + + def test_noncentral_f_small_df(self): + random = Generator(MT19937(self.seed)) + desired = np.array([0.04714867120827, 0.1239390327694]) + actual = random.noncentral_f(0.9, 0.9, 2, size=2) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_chisquare(self): + df = [1] + bad_df = [-1] + desired = np.array([0.05573640064251, 1.47220224353539, 2.9469379318589]) + + random = Generator(MT19937(self.seed)) + actual = random.chisquare(df * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, random.chisquare, bad_df * 3) + + def test_noncentral_chisquare(self): + df = [1] + nonc = [2] + bad_df = [-1] + bad_nonc = [-2] + desired = np.array([0.07710766249436, 5.27829115110304, 0.630732147399]) + + random = Generator(MT19937(self.seed)) + nonc_chi = random.noncentral_chisquare + actual = nonc_chi(df * 3, nonc) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_chi, bad_df * 3, nonc) + assert_raises(ValueError, nonc_chi, df * 3, bad_nonc) + + random = Generator(MT19937(self.seed)) + nonc_chi = random.noncentral_chisquare + actual = nonc_chi(df, nonc * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_chi, bad_df, nonc * 3) + assert_raises(ValueError, nonc_chi, df, bad_nonc * 3) + + def test_standard_t(self): + df = [1] + bad_df = [-1] + desired = np.array([-1.39498829447098, -1.23058658835223, 0.17207021065983]) + + random = Generator(MT19937(self.seed)) + actual = random.standard_t(df * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, random.standard_t, bad_df * 3) + + def test_vonmises(self): + mu = [2] + kappa = [1] + bad_kappa = [-1] + desired = np.array([2.25935584988528, 2.23326261461399, -2.84152146503326]) + + random = Generator(MT19937(self.seed)) + actual = random.vonmises(mu * 3, kappa) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, random.vonmises, mu * 3, bad_kappa) + + random = Generator(MT19937(self.seed)) + actual = random.vonmises(mu, kappa * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, random.vonmises, mu, bad_kappa * 3) + + def test_pareto(self): + a = [1] + bad_a = [-1] + desired = np.array([0.95905052946317, 0.2383810889437 , 1.04988745750013]) + + random = Generator(MT19937(self.seed)) + actual = random.pareto(a * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, random.pareto, bad_a * 3) + + def test_weibull(self): + a = [1] + bad_a = [-1] + desired = np.array([0.67245993212806, 0.21380495318094, 0.7177848928629]) + + random = Generator(MT19937(self.seed)) + actual = random.weibull(a * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, random.weibull, bad_a * 3) + + def test_power(self): + a = [1] + bad_a = [-1] + desired = np.array([0.48954864361052, 0.19249412888486, 0.51216834058807]) + + random = Generator(MT19937(self.seed)) + actual = random.power(a * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, random.power, bad_a * 3) + + def test_laplace(self): + loc = [0] + scale = [1] + bad_scale = [-1] + desired = np.array([-1.09698732625119, -0.93470271947368, 0.71592671378202]) + + random = Generator(MT19937(self.seed)) + laplace = random.laplace + actual = laplace(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, laplace, loc * 3, bad_scale) + + random = Generator(MT19937(self.seed)) + laplace = random.laplace + actual = laplace(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, laplace, loc, bad_scale * 3) + + def test_gumbel(self): + loc = [0] + scale = [1] + bad_scale = [-1] + desired = np.array([1.70020068231762, 1.52054354273631, -0.34293267607081]) + + random = Generator(MT19937(self.seed)) + gumbel = random.gumbel + actual = gumbel(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gumbel, loc * 3, bad_scale) + + random = Generator(MT19937(self.seed)) + gumbel = random.gumbel + actual = gumbel(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gumbel, loc, bad_scale * 3) + + def test_logistic(self): + loc = [0] + scale = [1] + bad_scale = [-1] + desired = np.array([-1.607487640433, -1.40925686003678, 1.12887112820397]) + + random = Generator(MT19937(self.seed)) + actual = random.logistic(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, random.logistic, loc * 3, bad_scale) + + random = Generator(MT19937(self.seed)) + actual = random.logistic(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, random.logistic, loc, bad_scale * 3) + assert_equal(random.logistic(1.0, 0.0), 1.0) + + def test_lognormal(self): + mean = [0] + sigma = [1] + bad_sigma = [-1] + desired = np.array([0.67884390500697, 2.21653186290321, 1.01990310084276]) + + random = Generator(MT19937(self.seed)) + lognormal = random.lognormal + actual = lognormal(mean * 3, sigma) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, lognormal, mean * 3, bad_sigma) + + random = Generator(MT19937(self.seed)) + actual = random.lognormal(mean, sigma * 3) + assert_raises(ValueError, random.lognormal, mean, bad_sigma * 3) + + def test_rayleigh(self): + scale = [1] + bad_scale = [-1] + desired = np.array( + [1.1597068009872629, + 0.6539188836253857, + 1.1981526554349398] + ) + + random = Generator(MT19937(self.seed)) + actual = random.rayleigh(scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, random.rayleigh, bad_scale * 3) + + def test_wald(self): + mean = [0.5] + scale = [1] + bad_mean = [0] + bad_scale = [-2] + desired = np.array([0.38052407392905, 0.50701641508592, 0.484935249864]) + + random = Generator(MT19937(self.seed)) + actual = random.wald(mean * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, random.wald, bad_mean * 3, scale) + assert_raises(ValueError, random.wald, mean * 3, bad_scale) + + random = Generator(MT19937(self.seed)) + actual = random.wald(mean, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, random.wald, bad_mean, scale * 3) + assert_raises(ValueError, random.wald, mean, bad_scale * 3) + + def test_triangular(self): + left = [1] + right = [3] + mode = [2] + bad_left_one = [3] + bad_mode_one = [4] + bad_left_two, bad_mode_two = right * 2 + desired = np.array([1.57781954604754, 1.62665986867957, 2.30090130831326]) + + random = Generator(MT19937(self.seed)) + triangular = random.triangular + actual = triangular(left * 3, mode, right) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, triangular, bad_left_one * 3, mode, right) + assert_raises(ValueError, triangular, left * 3, bad_mode_one, right) + assert_raises(ValueError, triangular, bad_left_two * 3, bad_mode_two, + right) + + random = Generator(MT19937(self.seed)) + triangular = random.triangular + actual = triangular(left, mode * 3, right) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, triangular, bad_left_one, mode * 3, right) + assert_raises(ValueError, triangular, left, bad_mode_one * 3, right) + assert_raises(ValueError, triangular, bad_left_two, bad_mode_two * 3, + right) + + random = Generator(MT19937(self.seed)) + triangular = random.triangular + actual = triangular(left, mode, right * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, triangular, bad_left_one, mode, right * 3) + assert_raises(ValueError, triangular, left, bad_mode_one, right * 3) + assert_raises(ValueError, triangular, bad_left_two, bad_mode_two, + right * 3) + + assert_raises(ValueError, triangular, 10., 0., 20.) + assert_raises(ValueError, triangular, 10., 25., 20.) + assert_raises(ValueError, triangular, 10., 10., 10.) + + def test_binomial(self): + n = [1] + p = [0.5] + bad_n = [-1] + bad_p_one = [-1] + bad_p_two = [1.5] + desired = np.array([0, 0, 1]) + + random = Generator(MT19937(self.seed)) + binom = random.binomial + actual = binom(n * 3, p) + assert_array_equal(actual, desired) + assert_raises(ValueError, binom, bad_n * 3, p) + assert_raises(ValueError, binom, n * 3, bad_p_one) + assert_raises(ValueError, binom, n * 3, bad_p_two) + + random = Generator(MT19937(self.seed)) + actual = random.binomial(n, p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, binom, bad_n, p * 3) + assert_raises(ValueError, binom, n, bad_p_one * 3) + assert_raises(ValueError, binom, n, bad_p_two * 3) + + def test_negative_binomial(self): + n = [1] + p = [0.5] + bad_n = [-1] + bad_p_one = [-1] + bad_p_two = [1.5] + desired = np.array([0, 2, 1], dtype=np.int64) + + random = Generator(MT19937(self.seed)) + neg_binom = random.negative_binomial + actual = neg_binom(n * 3, p) + assert_array_equal(actual, desired) + assert_raises(ValueError, neg_binom, bad_n * 3, p) + assert_raises(ValueError, neg_binom, n * 3, bad_p_one) + assert_raises(ValueError, neg_binom, n * 3, bad_p_two) + + random = Generator(MT19937(self.seed)) + neg_binom = random.negative_binomial + actual = neg_binom(n, p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, neg_binom, bad_n, p * 3) + assert_raises(ValueError, neg_binom, n, bad_p_one * 3) + assert_raises(ValueError, neg_binom, n, bad_p_two * 3) + + def test_poisson(self): + + lam = [1] + bad_lam_one = [-1] + desired = np.array([0, 0, 3]) + + random = Generator(MT19937(self.seed)) + max_lam = random._poisson_lam_max + bad_lam_two = [max_lam * 2] + poisson = random.poisson + actual = poisson(lam * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, poisson, bad_lam_one * 3) + assert_raises(ValueError, poisson, bad_lam_two * 3) + + def test_zipf(self): + a = [2] + bad_a = [0] + desired = np.array([1, 8, 1]) + + random = Generator(MT19937(self.seed)) + zipf = random.zipf + actual = zipf(a * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, zipf, bad_a * 3) + with np.errstate(invalid='ignore'): + assert_raises(ValueError, zipf, np.nan) + assert_raises(ValueError, zipf, [0, 0, np.nan]) + + def test_geometric(self): + p = [0.5] + bad_p_one = [-1] + bad_p_two = [1.5] + desired = np.array([1, 1, 3]) + + random = Generator(MT19937(self.seed)) + geometric = random.geometric + actual = geometric(p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, geometric, bad_p_one * 3) + assert_raises(ValueError, geometric, bad_p_two * 3) + + def test_hypergeometric(self): + ngood = [1] + nbad = [2] + nsample = [2] + bad_ngood = [-1] + bad_nbad = [-2] + bad_nsample_one = [-1] + bad_nsample_two = [4] + desired = np.array([0, 0, 1]) + + random = Generator(MT19937(self.seed)) + actual = random.hypergeometric(ngood * 3, nbad, nsample) + assert_array_equal(actual, desired) + assert_raises(ValueError, random.hypergeometric, bad_ngood * 3, nbad, nsample) + assert_raises(ValueError, random.hypergeometric, ngood * 3, bad_nbad, nsample) + assert_raises(ValueError, random.hypergeometric, ngood * 3, nbad, bad_nsample_one) + assert_raises(ValueError, random.hypergeometric, ngood * 3, nbad, bad_nsample_two) + + random = Generator(MT19937(self.seed)) + actual = random.hypergeometric(ngood, nbad * 3, nsample) + assert_array_equal(actual, desired) + assert_raises(ValueError, random.hypergeometric, bad_ngood, nbad * 3, nsample) + assert_raises(ValueError, random.hypergeometric, ngood, bad_nbad * 3, nsample) + assert_raises(ValueError, random.hypergeometric, ngood, nbad * 3, bad_nsample_one) + assert_raises(ValueError, random.hypergeometric, ngood, nbad * 3, bad_nsample_two) + + random = Generator(MT19937(self.seed)) + hypergeom = random.hypergeometric + actual = hypergeom(ngood, nbad, nsample * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, hypergeom, bad_ngood, nbad, nsample * 3) + assert_raises(ValueError, hypergeom, ngood, bad_nbad, nsample * 3) + assert_raises(ValueError, hypergeom, ngood, nbad, bad_nsample_one * 3) + assert_raises(ValueError, hypergeom, ngood, nbad, bad_nsample_two * 3) + + assert_raises(ValueError, hypergeom, -1, 10, 20) + assert_raises(ValueError, hypergeom, 10, -1, 20) + assert_raises(ValueError, hypergeom, 10, 10, -1) + assert_raises(ValueError, hypergeom, 10, 10, 25) + + # ValueError for arguments that are too big. + assert_raises(ValueError, hypergeom, 2**30, 10, 20) + assert_raises(ValueError, hypergeom, 999, 2**31, 50) + assert_raises(ValueError, hypergeom, 999, [2**29, 2**30], 1000) + + def test_logseries(self): + p = [0.5] + bad_p_one = [2] + bad_p_two = [-1] + desired = np.array([1, 1, 1]) + + random = Generator(MT19937(self.seed)) + logseries = random.logseries + actual = logseries(p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, logseries, bad_p_one * 3) + assert_raises(ValueError, logseries, bad_p_two * 3) + + def test_multinomial(self): + random = Generator(MT19937(self.seed)) + actual = random.multinomial([5, 20], [1 / 6.] * 6, size=(3, 2)) + desired = np.array([[[0, 0, 2, 1, 2, 0], + [2, 3, 6, 4, 2, 3]], + [[1, 0, 1, 0, 2, 1], + [7, 2, 2, 1, 4, 4]], + [[0, 2, 0, 1, 2, 0], + [3, 2, 3, 3, 4, 5]]], dtype=np.int64) + assert_array_equal(actual, desired) + + random = Generator(MT19937(self.seed)) + actual = random.multinomial([5, 20], [1 / 6.] * 6) + desired = np.array([[0, 0, 2, 1, 2, 0], + [2, 3, 6, 4, 2, 3]], dtype=np.int64) + assert_array_equal(actual, desired) + + random = Generator(MT19937(self.seed)) + actual = random.multinomial([5, 20], [[1 / 6.] * 6] * 2) + desired = np.array([[0, 0, 2, 1, 2, 0], + [2, 3, 6, 4, 2, 3]], dtype=np.int64) + assert_array_equal(actual, desired) + + random = Generator(MT19937(self.seed)) + actual = random.multinomial([[5], [20]], [[1 / 6.] * 6] * 2) + desired = np.array([[[0, 0, 2, 1, 2, 0], + [0, 0, 2, 1, 1, 1]], + [[4, 2, 3, 3, 5, 3], + [7, 2, 2, 1, 4, 4]]], dtype=np.int64) + assert_array_equal(actual, desired) + + @pytest.mark.parametrize("n", [10, + np.array([10, 10]), + np.array([[[10]], [[10]]]) + ] + ) + def test_multinomial_pval_broadcast(self, n): + random = Generator(MT19937(self.seed)) + pvals = np.array([1 / 4] * 4) + actual = random.multinomial(n, pvals) + n_shape = tuple() if isinstance(n, int) else n.shape + expected_shape = n_shape + (4,) + assert actual.shape == expected_shape + pvals = np.vstack([pvals, pvals]) + actual = random.multinomial(n, pvals) + expected_shape = np.broadcast_shapes(n_shape, pvals.shape[:-1]) + (4,) + assert actual.shape == expected_shape + + pvals = np.vstack([[pvals], [pvals]]) + actual = random.multinomial(n, pvals) + expected_shape = np.broadcast_shapes(n_shape, pvals.shape[:-1]) + assert actual.shape == expected_shape + (4,) + actual = random.multinomial(n, pvals, size=(3, 2) + expected_shape) + assert actual.shape == (3, 2) + expected_shape + (4,) + + with pytest.raises(ValueError): + # Ensure that size is not broadcast + actual = random.multinomial(n, pvals, size=(1,) * 6) + + def test_invalid_pvals_broadcast(self): + random = Generator(MT19937(self.seed)) + pvals = [[1 / 6] * 6, [1 / 4] * 6] + assert_raises(ValueError, random.multinomial, 1, pvals) + assert_raises(ValueError, random.multinomial, 6, 0.5) + + def test_empty_outputs(self): + random = Generator(MT19937(self.seed)) + actual = random.multinomial(np.empty((10, 0, 6), "i8"), [1 / 6] * 6) + assert actual.shape == (10, 0, 6, 6) + actual = random.multinomial(12, np.empty((10, 0, 10))) + assert actual.shape == (10, 0, 10) + actual = random.multinomial(np.empty((3, 0, 7), "i8"), + np.empty((3, 0, 7, 4))) + assert actual.shape == (3, 0, 7, 4) + + +class TestThread: + # make sure each state produces the same sequence even in threads + def setup(self): + self.seeds = range(4) + + def check_function(self, function, sz): + from threading import Thread + + out1 = np.empty((len(self.seeds),) + sz) + out2 = np.empty((len(self.seeds),) + sz) + + # threaded generation + t = [Thread(target=function, args=(Generator(MT19937(s)), o)) + for s, o in zip(self.seeds, out1)] + [x.start() for x in t] + [x.join() for x in t] + + # the same serial + for s, o in zip(self.seeds, out2): + function(Generator(MT19937(s)), o) + + # these platforms change x87 fpu precision mode in threads + if np.intp().dtype.itemsize == 4 and sys.platform == "win32": + assert_array_almost_equal(out1, out2) + else: + assert_array_equal(out1, out2) + + def test_normal(self): + def gen_random(state, out): + out[...] = state.normal(size=10000) + + self.check_function(gen_random, sz=(10000,)) + + def test_exp(self): + def gen_random(state, out): + out[...] = state.exponential(scale=np.ones((100, 1000))) + + self.check_function(gen_random, sz=(100, 1000)) + + def test_multinomial(self): + def gen_random(state, out): + out[...] = state.multinomial(10, [1 / 6.] * 6, size=10000) + + self.check_function(gen_random, sz=(10000, 6)) + + +# See Issue #4263 +class TestSingleEltArrayInput: + def setup(self): + self.argOne = np.array([2]) + self.argTwo = np.array([3]) + self.argThree = np.array([4]) + self.tgtShape = (1,) + + def test_one_arg_funcs(self): + funcs = (random.exponential, random.standard_gamma, + random.chisquare, random.standard_t, + random.pareto, random.weibull, + random.power, random.rayleigh, + random.poisson, random.zipf, + random.geometric, random.logseries) + + probfuncs = (random.geometric, random.logseries) + + for func in funcs: + if func in probfuncs: # p < 1.0 + out = func(np.array([0.5])) + + else: + out = func(self.argOne) + + assert_equal(out.shape, self.tgtShape) + + def test_two_arg_funcs(self): + funcs = (random.uniform, random.normal, + random.beta, random.gamma, + random.f, random.noncentral_chisquare, + random.vonmises, random.laplace, + random.gumbel, random.logistic, + random.lognormal, random.wald, + random.binomial, random.negative_binomial) + + probfuncs = (random.binomial, random.negative_binomial) + + for func in funcs: + if func in probfuncs: # p <= 1 + argTwo = np.array([0.5]) + + else: + argTwo = self.argTwo + + out = func(self.argOne, argTwo) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne[0], argTwo) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne, argTwo[0]) + assert_equal(out.shape, self.tgtShape) + + def test_integers(self, endpoint): + itype = [np.bool_, np.int8, np.uint8, np.int16, np.uint16, + np.int32, np.uint32, np.int64, np.uint64] + func = random.integers + high = np.array([1]) + low = np.array([0]) + + for dt in itype: + out = func(low, high, endpoint=endpoint, dtype=dt) + assert_equal(out.shape, self.tgtShape) + + out = func(low[0], high, endpoint=endpoint, dtype=dt) + assert_equal(out.shape, self.tgtShape) + + out = func(low, high[0], endpoint=endpoint, dtype=dt) + assert_equal(out.shape, self.tgtShape) + + def test_three_arg_funcs(self): + funcs = [random.noncentral_f, random.triangular, + random.hypergeometric] + + for func in funcs: + out = func(self.argOne, self.argTwo, self.argThree) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne[0], self.argTwo, self.argThree) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne, self.argTwo[0], self.argThree) + assert_equal(out.shape, self.tgtShape) + + +@pytest.mark.parametrize("config", JUMP_TEST_DATA) +def test_jumped(config): + # Each config contains the initial seed, a number of raw steps + # the sha256 hashes of the initial and the final states' keys and + # the position of of the initial and the final state. + # These were produced using the original C implementation. + seed = config["seed"] + steps = config["steps"] + + mt19937 = MT19937(seed) + # Burn step + mt19937.random_raw(steps) + key = mt19937.state["state"]["key"] + if sys.byteorder == 'big': + key = key.byteswap() + sha256 = hashlib.sha256(key) + assert mt19937.state["state"]["pos"] == config["initial"]["pos"] + assert sha256.hexdigest() == config["initial"]["key_sha256"] + + jumped = mt19937.jumped() + key = jumped.state["state"]["key"] + if sys.byteorder == 'big': + key = key.byteswap() + sha256 = hashlib.sha256(key) + assert jumped.state["state"]["pos"] == config["jumped"]["pos"] + assert sha256.hexdigest() == config["jumped"]["key_sha256"] + + +def test_broadcast_size_error(): + mu = np.ones(3) + sigma = np.ones((4, 3)) + size = (10, 4, 2) + assert random.normal(mu, sigma, size=(5, 4, 3)).shape == (5, 4, 3) + with pytest.raises(ValueError): + random.normal(mu, sigma, size=size) + with pytest.raises(ValueError): + random.normal(mu, sigma, size=(1, 3)) + with pytest.raises(ValueError): + random.normal(mu, sigma, size=(4, 1, 1)) + # 1 arg + shape = np.ones((4, 3)) + with pytest.raises(ValueError): + random.standard_gamma(shape, size=size) + with pytest.raises(ValueError): + random.standard_gamma(shape, size=(3,)) + with pytest.raises(ValueError): + random.standard_gamma(shape, size=3) + # Check out + out = np.empty(size) + with pytest.raises(ValueError): + random.standard_gamma(shape, out=out) + + # 2 arg + with pytest.raises(ValueError): + random.binomial(1, [0.3, 0.7], size=(2, 1)) + with pytest.raises(ValueError): + random.binomial([1, 2], 0.3, size=(2, 1)) + with pytest.raises(ValueError): + random.binomial([1, 2], [0.3, 0.7], size=(2, 1)) + with pytest.raises(ValueError): + random.multinomial([2, 2], [.3, .7], size=(2, 1)) + + # 3 arg + a = random.chisquare(5, size=3) + b = random.chisquare(5, size=(4, 3)) + c = random.chisquare(5, size=(5, 4, 3)) + assert random.noncentral_f(a, b, c).shape == (5, 4, 3) + with pytest.raises(ValueError, match=r"Output size \(6, 5, 1, 1\) is"): + random.noncentral_f(a, b, c, size=(6, 5, 1, 1)) + + +def test_broadcast_size_scalar(): + mu = np.ones(3) + sigma = np.ones(3) + random.normal(mu, sigma, size=3) + with pytest.raises(ValueError): + random.normal(mu, sigma, size=2) + + +def test_ragged_shuffle(): + # GH 18142 + seq = [[], [], 1] + gen = Generator(MT19937(0)) + assert_no_warnings(gen.shuffle, seq) + assert seq == [1, [], []] + + +@pytest.mark.parametrize("high", [-2, [-2]]) +@pytest.mark.parametrize("endpoint", [True, False]) +def test_single_arg_integer_exception(high, endpoint): + # GH 14333 + gen = Generator(MT19937(0)) + msg = 'high < 0' if endpoint else 'high <= 0' + with pytest.raises(ValueError, match=msg): + gen.integers(high, endpoint=endpoint) + msg = 'low > high' if endpoint else 'low >= high' + with pytest.raises(ValueError, match=msg): + gen.integers(-1, high, endpoint=endpoint) + with pytest.raises(ValueError, match=msg): + gen.integers([-1], high, endpoint=endpoint) + + +@pytest.mark.parametrize("dtype", ["f4", "f8"]) +def test_c_contig_req_out(dtype): + # GH 18704 + out = np.empty((2, 3), order="F", dtype=dtype) + shape = [1, 2, 3] + with pytest.raises(ValueError, match="Supplied output array"): + random.standard_gamma(shape, out=out, dtype=dtype) + with pytest.raises(ValueError, match="Supplied output array"): + random.standard_gamma(shape, out=out, size=out.shape, dtype=dtype) + + +@pytest.mark.parametrize("dtype", ["f4", "f8"]) +@pytest.mark.parametrize("order", ["F", "C"]) +@pytest.mark.parametrize("dist", [random.standard_normal, random.random]) +def test_contig_req_out(dist, order, dtype): + # GH 18704 + out = np.empty((2, 3), dtype=dtype, order=order) + variates = dist(out=out, dtype=dtype) + assert variates is out + variates = dist(out=out, dtype=dtype, size=out.shape) + assert variates is out diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/test_generator_mt19937_regressions.py b/.local/lib/python3.8/site-packages/numpy/random/tests/test_generator_mt19937_regressions.py new file mode 100644 index 0000000..0227d65 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/test_generator_mt19937_regressions.py @@ -0,0 +1,150 @@ +from numpy.testing import (assert_, assert_array_equal) +import numpy as np +import pytest +from numpy.random import Generator, MT19937 + +mt19937 = Generator(MT19937()) + + +class TestRegression: + + def test_vonmises_range(self): + # Make sure generated random variables are in [-pi, pi]. + # Regression test for ticket #986. + for mu in np.linspace(-7., 7., 5): + r = mt19937.vonmises(mu, 1, 50) + assert_(np.all(r > -np.pi) and np.all(r <= np.pi)) + + def test_hypergeometric_range(self): + # Test for ticket #921 + assert_(np.all(mt19937.hypergeometric(3, 18, 11, size=10) < 4)) + assert_(np.all(mt19937.hypergeometric(18, 3, 11, size=10) > 0)) + + # Test for ticket #5623 + args = (2**20 - 2, 2**20 - 2, 2**20 - 2) # Check for 32-bit systems + assert_(mt19937.hypergeometric(*args) > 0) + + def test_logseries_convergence(self): + # Test for ticket #923 + N = 1000 + mt19937 = Generator(MT19937(0)) + rvsn = mt19937.logseries(0.8, size=N) + # these two frequency counts should be close to theoretical + # numbers with this large sample + # theoretical large N result is 0.49706795 + freq = np.sum(rvsn == 1) / N + msg = f'Frequency was {freq:f}, should be > 0.45' + assert_(freq > 0.45, msg) + # theoretical large N result is 0.19882718 + freq = np.sum(rvsn == 2) / N + msg = f'Frequency was {freq:f}, should be < 0.23' + assert_(freq < 0.23, msg) + + def test_shuffle_mixed_dimension(self): + # Test for trac ticket #2074 + for t in [[1, 2, 3, None], + [(1, 1), (2, 2), (3, 3), None], + [1, (2, 2), (3, 3), None], + [(1, 1), 2, 3, None]]: + mt19937 = Generator(MT19937(12345)) + shuffled = np.array(t, dtype=object) + mt19937.shuffle(shuffled) + expected = np.array([t[2], t[0], t[3], t[1]], dtype=object) + assert_array_equal(np.array(shuffled, dtype=object), expected) + + def test_call_within_randomstate(self): + # Check that custom BitGenerator does not call into global state + res = np.array([1, 8, 0, 1, 5, 3, 3, 8, 1, 4]) + for i in range(3): + mt19937 = Generator(MT19937(i)) + m = Generator(MT19937(4321)) + # If m.state is not honored, the result will change + assert_array_equal(m.choice(10, size=10, p=np.ones(10)/10.), res) + + def test_multivariate_normal_size_types(self): + # Test for multivariate_normal issue with 'size' argument. + # Check that the multivariate_normal size argument can be a + # numpy integer. + mt19937.multivariate_normal([0], [[0]], size=1) + mt19937.multivariate_normal([0], [[0]], size=np.int_(1)) + mt19937.multivariate_normal([0], [[0]], size=np.int64(1)) + + def test_beta_small_parameters(self): + # Test that beta with small a and b parameters does not produce + # NaNs due to roundoff errors causing 0 / 0, gh-5851 + mt19937 = Generator(MT19937(1234567890)) + x = mt19937.beta(0.0001, 0.0001, size=100) + assert_(not np.any(np.isnan(x)), 'Nans in mt19937.beta') + + def test_choice_sum_of_probs_tolerance(self): + # The sum of probs should be 1.0 with some tolerance. + # For low precision dtypes the tolerance was too tight. + # See numpy github issue 6123. + mt19937 = Generator(MT19937(1234)) + a = [1, 2, 3] + counts = [4, 4, 2] + for dt in np.float16, np.float32, np.float64: + probs = np.array(counts, dtype=dt) / sum(counts) + c = mt19937.choice(a, p=probs) + assert_(c in a) + with pytest.raises(ValueError): + mt19937.choice(a, p=probs*0.9) + + def test_shuffle_of_array_of_different_length_strings(self): + # Test that permuting an array of different length strings + # will not cause a segfault on garbage collection + # Tests gh-7710 + mt19937 = Generator(MT19937(1234)) + + a = np.array(['a', 'a' * 1000]) + + for _ in range(100): + mt19937.shuffle(a) + + # Force Garbage Collection - should not segfault. + import gc + gc.collect() + + def test_shuffle_of_array_of_objects(self): + # Test that permuting an array of objects will not cause + # a segfault on garbage collection. + # See gh-7719 + mt19937 = Generator(MT19937(1234)) + a = np.array([np.arange(1), np.arange(4)], dtype=object) + + for _ in range(1000): + mt19937.shuffle(a) + + # Force Garbage Collection - should not segfault. + import gc + gc.collect() + + def test_permutation_subclass(self): + class N(np.ndarray): + pass + + mt19937 = Generator(MT19937(1)) + orig = np.arange(3).view(N) + perm = mt19937.permutation(orig) + assert_array_equal(perm, np.array([2, 0, 1])) + assert_array_equal(orig, np.arange(3).view(N)) + + class M: + a = np.arange(5) + + def __array__(self): + return self.a + + mt19937 = Generator(MT19937(1)) + m = M() + perm = mt19937.permutation(m) + assert_array_equal(perm, np.array([4, 1, 3, 0, 2])) + assert_array_equal(m.__array__(), np.arange(5)) + + def test_gamma_0(self): + assert mt19937.standard_gamma(0.0) == 0.0 + assert_array_equal(mt19937.standard_gamma([0.0]), 0.0) + + actual = mt19937.standard_gamma([0.0], dtype='float') + expected = np.array([0.], dtype=np.float32) + assert_array_equal(actual, expected) diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/test_random.py b/.local/lib/python3.8/site-packages/numpy/random/tests/test_random.py new file mode 100644 index 0000000..6a584a5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/test_random.py @@ -0,0 +1,1739 @@ +import warnings + +import pytest + +import numpy as np +from numpy.testing import ( + assert_, assert_raises, assert_equal, assert_warns, + assert_no_warnings, assert_array_equal, assert_array_almost_equal, + suppress_warnings + ) +from numpy import random +import sys + + +class TestSeed: + def test_scalar(self): + s = np.random.RandomState(0) + assert_equal(s.randint(1000), 684) + s = np.random.RandomState(4294967295) + assert_equal(s.randint(1000), 419) + + def test_array(self): + s = np.random.RandomState(range(10)) + assert_equal(s.randint(1000), 468) + s = np.random.RandomState(np.arange(10)) + assert_equal(s.randint(1000), 468) + s = np.random.RandomState([0]) + assert_equal(s.randint(1000), 973) + s = np.random.RandomState([4294967295]) + assert_equal(s.randint(1000), 265) + + def test_invalid_scalar(self): + # seed must be an unsigned 32 bit integer + assert_raises(TypeError, np.random.RandomState, -0.5) + assert_raises(ValueError, np.random.RandomState, -1) + + def test_invalid_array(self): + # seed must be an unsigned 32 bit integer + assert_raises(TypeError, np.random.RandomState, [-0.5]) + assert_raises(ValueError, np.random.RandomState, [-1]) + assert_raises(ValueError, np.random.RandomState, [4294967296]) + assert_raises(ValueError, np.random.RandomState, [1, 2, 4294967296]) + assert_raises(ValueError, np.random.RandomState, [1, -2, 4294967296]) + + def test_invalid_array_shape(self): + # gh-9832 + assert_raises(ValueError, np.random.RandomState, + np.array([], dtype=np.int64)) + assert_raises(ValueError, np.random.RandomState, [[1, 2, 3]]) + assert_raises(ValueError, np.random.RandomState, [[1, 2, 3], + [4, 5, 6]]) + + +class TestBinomial: + def test_n_zero(self): + # Tests the corner case of n == 0 for the binomial distribution. + # binomial(0, p) should be zero for any p in [0, 1]. + # This test addresses issue #3480. + zeros = np.zeros(2, dtype='int') + for p in [0, .5, 1]: + assert_(random.binomial(0, p) == 0) + assert_array_equal(random.binomial(zeros, p), zeros) + + def test_p_is_nan(self): + # Issue #4571. + assert_raises(ValueError, random.binomial, 1, np.nan) + + +class TestMultinomial: + def test_basic(self): + random.multinomial(100, [0.2, 0.8]) + + def test_zero_probability(self): + random.multinomial(100, [0.2, 0.8, 0.0, 0.0, 0.0]) + + def test_int_negative_interval(self): + assert_(-5 <= random.randint(-5, -1) < -1) + x = random.randint(-5, -1, 5) + assert_(np.all(-5 <= x)) + assert_(np.all(x < -1)) + + def test_size(self): + # gh-3173 + p = [0.5, 0.5] + assert_equal(np.random.multinomial(1, p, np.uint32(1)).shape, (1, 2)) + assert_equal(np.random.multinomial(1, p, np.uint32(1)).shape, (1, 2)) + assert_equal(np.random.multinomial(1, p, np.uint32(1)).shape, (1, 2)) + assert_equal(np.random.multinomial(1, p, [2, 2]).shape, (2, 2, 2)) + assert_equal(np.random.multinomial(1, p, (2, 2)).shape, (2, 2, 2)) + assert_equal(np.random.multinomial(1, p, np.array((2, 2))).shape, + (2, 2, 2)) + + assert_raises(TypeError, np.random.multinomial, 1, p, + float(1)) + + def test_multidimensional_pvals(self): + assert_raises(ValueError, np.random.multinomial, 10, [[0, 1]]) + assert_raises(ValueError, np.random.multinomial, 10, [[0], [1]]) + assert_raises(ValueError, np.random.multinomial, 10, [[[0], [1]], [[1], [0]]]) + assert_raises(ValueError, np.random.multinomial, 10, np.array([[0, 1], [1, 0]])) + + +class TestSetState: + def setup(self): + self.seed = 1234567890 + self.prng = random.RandomState(self.seed) + self.state = self.prng.get_state() + + def test_basic(self): + old = self.prng.tomaxint(16) + self.prng.set_state(self.state) + new = self.prng.tomaxint(16) + assert_(np.all(old == new)) + + def test_gaussian_reset(self): + # Make sure the cached every-other-Gaussian is reset. + old = self.prng.standard_normal(size=3) + self.prng.set_state(self.state) + new = self.prng.standard_normal(size=3) + assert_(np.all(old == new)) + + def test_gaussian_reset_in_media_res(self): + # When the state is saved with a cached Gaussian, make sure the + # cached Gaussian is restored. + + self.prng.standard_normal() + state = self.prng.get_state() + old = self.prng.standard_normal(size=3) + self.prng.set_state(state) + new = self.prng.standard_normal(size=3) + assert_(np.all(old == new)) + + def test_backwards_compatibility(self): + # Make sure we can accept old state tuples that do not have the + # cached Gaussian value. + old_state = self.state[:-2] + x1 = self.prng.standard_normal(size=16) + self.prng.set_state(old_state) + x2 = self.prng.standard_normal(size=16) + self.prng.set_state(self.state) + x3 = self.prng.standard_normal(size=16) + assert_(np.all(x1 == x2)) + assert_(np.all(x1 == x3)) + + def test_negative_binomial(self): + # Ensure that the negative binomial results take floating point + # arguments without truncation. + self.prng.negative_binomial(0.5, 0.5) + + +class TestRandint: + + rfunc = np.random.randint + + # valid integer/boolean types + itype = [np.bool_, np.int8, np.uint8, np.int16, np.uint16, + np.int32, np.uint32, np.int64, np.uint64] + + def test_unsupported_type(self): + assert_raises(TypeError, self.rfunc, 1, dtype=float) + + def test_bounds_checking(self): + for dt in self.itype: + lbnd = 0 if dt is np.bool_ else np.iinfo(dt).min + ubnd = 2 if dt is np.bool_ else np.iinfo(dt).max + 1 + assert_raises(ValueError, self.rfunc, lbnd - 1, ubnd, dtype=dt) + assert_raises(ValueError, self.rfunc, lbnd, ubnd + 1, dtype=dt) + assert_raises(ValueError, self.rfunc, ubnd, lbnd, dtype=dt) + assert_raises(ValueError, self.rfunc, 1, 0, dtype=dt) + + def test_rng_zero_and_extremes(self): + for dt in self.itype: + lbnd = 0 if dt is np.bool_ else np.iinfo(dt).min + ubnd = 2 if dt is np.bool_ else np.iinfo(dt).max + 1 + + tgt = ubnd - 1 + assert_equal(self.rfunc(tgt, tgt + 1, size=1000, dtype=dt), tgt) + + tgt = lbnd + assert_equal(self.rfunc(tgt, tgt + 1, size=1000, dtype=dt), tgt) + + tgt = (lbnd + ubnd)//2 + assert_equal(self.rfunc(tgt, tgt + 1, size=1000, dtype=dt), tgt) + + def test_full_range(self): + # Test for ticket #1690 + + for dt in self.itype: + lbnd = 0 if dt is np.bool_ else np.iinfo(dt).min + ubnd = 2 if dt is np.bool_ else np.iinfo(dt).max + 1 + + try: + self.rfunc(lbnd, ubnd, dtype=dt) + except Exception as e: + raise AssertionError("No error should have been raised, " + "but one was with the following " + "message:\n\n%s" % str(e)) + + def test_in_bounds_fuzz(self): + # Don't use fixed seed + np.random.seed() + + for dt in self.itype[1:]: + for ubnd in [4, 8, 16]: + vals = self.rfunc(2, ubnd, size=2**16, dtype=dt) + assert_(vals.max() < ubnd) + assert_(vals.min() >= 2) + + vals = self.rfunc(0, 2, size=2**16, dtype=np.bool_) + + assert_(vals.max() < 2) + assert_(vals.min() >= 0) + + def test_repeatability(self): + import hashlib + # We use a sha256 hash of generated sequences of 1000 samples + # in the range [0, 6) for all but bool, where the range + # is [0, 2). Hashes are for little endian numbers. + tgt = {'bool': '509aea74d792fb931784c4b0135392c65aec64beee12b0cc167548a2c3d31e71', + 'int16': '7b07f1a920e46f6d0fe02314155a2330bcfd7635e708da50e536c5ebb631a7d4', + 'int32': 'e577bfed6c935de944424667e3da285012e741892dcb7051a8f1ce68ab05c92f', + 'int64': '0fbead0b06759df2cfb55e43148822d4a1ff953c7eb19a5b08445a63bb64fa9e', + 'int8': '001aac3a5acb935a9b186cbe14a1ca064b8bb2dd0b045d48abeacf74d0203404', + 'uint16': '7b07f1a920e46f6d0fe02314155a2330bcfd7635e708da50e536c5ebb631a7d4', + 'uint32': 'e577bfed6c935de944424667e3da285012e741892dcb7051a8f1ce68ab05c92f', + 'uint64': '0fbead0b06759df2cfb55e43148822d4a1ff953c7eb19a5b08445a63bb64fa9e', + 'uint8': '001aac3a5acb935a9b186cbe14a1ca064b8bb2dd0b045d48abeacf74d0203404'} + + for dt in self.itype[1:]: + np.random.seed(1234) + + # view as little endian for hash + if sys.byteorder == 'little': + val = self.rfunc(0, 6, size=1000, dtype=dt) + else: + val = self.rfunc(0, 6, size=1000, dtype=dt).byteswap() + + res = hashlib.sha256(val.view(np.int8)).hexdigest() + assert_(tgt[np.dtype(dt).name] == res) + + # bools do not depend on endianness + np.random.seed(1234) + val = self.rfunc(0, 2, size=1000, dtype=bool).view(np.int8) + res = hashlib.sha256(val).hexdigest() + assert_(tgt[np.dtype(bool).name] == res) + + def test_int64_uint64_corner_case(self): + # When stored in Numpy arrays, `lbnd` is casted + # as np.int64, and `ubnd` is casted as np.uint64. + # Checking whether `lbnd` >= `ubnd` used to be + # done solely via direct comparison, which is incorrect + # because when Numpy tries to compare both numbers, + # it casts both to np.float64 because there is + # no integer superset of np.int64 and np.uint64. However, + # `ubnd` is too large to be represented in np.float64, + # causing it be round down to np.iinfo(np.int64).max, + # leading to a ValueError because `lbnd` now equals + # the new `ubnd`. + + dt = np.int64 + tgt = np.iinfo(np.int64).max + lbnd = np.int64(np.iinfo(np.int64).max) + ubnd = np.uint64(np.iinfo(np.int64).max + 1) + + # None of these function calls should + # generate a ValueError now. + actual = np.random.randint(lbnd, ubnd, dtype=dt) + assert_equal(actual, tgt) + + def test_respect_dtype_singleton(self): + # See gh-7203 + for dt in self.itype: + lbnd = 0 if dt is np.bool_ else np.iinfo(dt).min + ubnd = 2 if dt is np.bool_ else np.iinfo(dt).max + 1 + + sample = self.rfunc(lbnd, ubnd, dtype=dt) + assert_equal(sample.dtype, np.dtype(dt)) + + for dt in (bool, int, np.compat.long): + lbnd = 0 if dt is bool else np.iinfo(dt).min + ubnd = 2 if dt is bool else np.iinfo(dt).max + 1 + + # gh-7284: Ensure that we get Python data types + sample = self.rfunc(lbnd, ubnd, dtype=dt) + assert_(not hasattr(sample, 'dtype')) + assert_equal(type(sample), dt) + + +class TestRandomDist: + # Make sure the random distribution returns the correct value for a + # given seed + + def setup(self): + self.seed = 1234567890 + + def test_rand(self): + np.random.seed(self.seed) + actual = np.random.rand(3, 2) + desired = np.array([[0.61879477158567997, 0.59162362775974664], + [0.88868358904449662, 0.89165480011560816], + [0.4575674820298663, 0.7781880808593471]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_randn(self): + np.random.seed(self.seed) + actual = np.random.randn(3, 2) + desired = np.array([[1.34016345771863121, 1.73759122771936081], + [1.498988344300628, -0.2286433324536169], + [2.031033998682787, 2.17032494605655257]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_randint(self): + np.random.seed(self.seed) + actual = np.random.randint(-99, 99, size=(3, 2)) + desired = np.array([[31, 3], + [-52, 41], + [-48, -66]]) + assert_array_equal(actual, desired) + + def test_random_integers(self): + np.random.seed(self.seed) + with suppress_warnings() as sup: + w = sup.record(DeprecationWarning) + actual = np.random.random_integers(-99, 99, size=(3, 2)) + assert_(len(w) == 1) + desired = np.array([[31, 3], + [-52, 41], + [-48, -66]]) + assert_array_equal(actual, desired) + + def test_random_integers_max_int(self): + # Tests whether random_integers can generate the + # maximum allowed Python int that can be converted + # into a C long. Previous implementations of this + # method have thrown an OverflowError when attempting + # to generate this integer. + with suppress_warnings() as sup: + w = sup.record(DeprecationWarning) + actual = np.random.random_integers(np.iinfo('l').max, + np.iinfo('l').max) + assert_(len(w) == 1) + + desired = np.iinfo('l').max + assert_equal(actual, desired) + + def test_random_integers_deprecated(self): + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + + # DeprecationWarning raised with high == None + assert_raises(DeprecationWarning, + np.random.random_integers, + np.iinfo('l').max) + + # DeprecationWarning raised with high != None + assert_raises(DeprecationWarning, + np.random.random_integers, + np.iinfo('l').max, np.iinfo('l').max) + + def test_random(self): + np.random.seed(self.seed) + actual = np.random.random((3, 2)) + desired = np.array([[0.61879477158567997, 0.59162362775974664], + [0.88868358904449662, 0.89165480011560816], + [0.4575674820298663, 0.7781880808593471]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_choice_uniform_replace(self): + np.random.seed(self.seed) + actual = np.random.choice(4, 4) + desired = np.array([2, 3, 2, 3]) + assert_array_equal(actual, desired) + + def test_choice_nonuniform_replace(self): + np.random.seed(self.seed) + actual = np.random.choice(4, 4, p=[0.4, 0.4, 0.1, 0.1]) + desired = np.array([1, 1, 2, 2]) + assert_array_equal(actual, desired) + + def test_choice_uniform_noreplace(self): + np.random.seed(self.seed) + actual = np.random.choice(4, 3, replace=False) + desired = np.array([0, 1, 3]) + assert_array_equal(actual, desired) + + def test_choice_nonuniform_noreplace(self): + np.random.seed(self.seed) + actual = np.random.choice(4, 3, replace=False, + p=[0.1, 0.3, 0.5, 0.1]) + desired = np.array([2, 3, 1]) + assert_array_equal(actual, desired) + + def test_choice_noninteger(self): + np.random.seed(self.seed) + actual = np.random.choice(['a', 'b', 'c', 'd'], 4) + desired = np.array(['c', 'd', 'c', 'd']) + assert_array_equal(actual, desired) + + def test_choice_exceptions(self): + sample = np.random.choice + assert_raises(ValueError, sample, -1, 3) + assert_raises(ValueError, sample, 3., 3) + assert_raises(ValueError, sample, [[1, 2], [3, 4]], 3) + assert_raises(ValueError, sample, [], 3) + assert_raises(ValueError, sample, [1, 2, 3, 4], 3, + p=[[0.25, 0.25], [0.25, 0.25]]) + assert_raises(ValueError, sample, [1, 2], 3, p=[0.4, 0.4, 0.2]) + assert_raises(ValueError, sample, [1, 2], 3, p=[1.1, -0.1]) + assert_raises(ValueError, sample, [1, 2], 3, p=[0.4, 0.4]) + assert_raises(ValueError, sample, [1, 2, 3], 4, replace=False) + # gh-13087 + assert_raises(ValueError, sample, [1, 2, 3], -2, replace=False) + assert_raises(ValueError, sample, [1, 2, 3], (-1,), replace=False) + assert_raises(ValueError, sample, [1, 2, 3], (-1, 1), replace=False) + assert_raises(ValueError, sample, [1, 2, 3], 2, + replace=False, p=[1, 0, 0]) + + def test_choice_return_shape(self): + p = [0.1, 0.9] + # Check scalar + assert_(np.isscalar(np.random.choice(2, replace=True))) + assert_(np.isscalar(np.random.choice(2, replace=False))) + assert_(np.isscalar(np.random.choice(2, replace=True, p=p))) + assert_(np.isscalar(np.random.choice(2, replace=False, p=p))) + assert_(np.isscalar(np.random.choice([1, 2], replace=True))) + assert_(np.random.choice([None], replace=True) is None) + a = np.array([1, 2]) + arr = np.empty(1, dtype=object) + arr[0] = a + assert_(np.random.choice(arr, replace=True) is a) + + # Check 0-d array + s = tuple() + assert_(not np.isscalar(np.random.choice(2, s, replace=True))) + assert_(not np.isscalar(np.random.choice(2, s, replace=False))) + assert_(not np.isscalar(np.random.choice(2, s, replace=True, p=p))) + assert_(not np.isscalar(np.random.choice(2, s, replace=False, p=p))) + assert_(not np.isscalar(np.random.choice([1, 2], s, replace=True))) + assert_(np.random.choice([None], s, replace=True).ndim == 0) + a = np.array([1, 2]) + arr = np.empty(1, dtype=object) + arr[0] = a + assert_(np.random.choice(arr, s, replace=True).item() is a) + + # Check multi dimensional array + s = (2, 3) + p = [0.1, 0.1, 0.1, 0.1, 0.4, 0.2] + assert_equal(np.random.choice(6, s, replace=True).shape, s) + assert_equal(np.random.choice(6, s, replace=False).shape, s) + assert_equal(np.random.choice(6, s, replace=True, p=p).shape, s) + assert_equal(np.random.choice(6, s, replace=False, p=p).shape, s) + assert_equal(np.random.choice(np.arange(6), s, replace=True).shape, s) + + # Check zero-size + assert_equal(np.random.randint(0, 0, size=(3, 0, 4)).shape, (3, 0, 4)) + assert_equal(np.random.randint(0, -10, size=0).shape, (0,)) + assert_equal(np.random.randint(10, 10, size=0).shape, (0,)) + assert_equal(np.random.choice(0, size=0).shape, (0,)) + assert_equal(np.random.choice([], size=(0,)).shape, (0,)) + assert_equal(np.random.choice(['a', 'b'], size=(3, 0, 4)).shape, + (3, 0, 4)) + assert_raises(ValueError, np.random.choice, [], 10) + + def test_choice_nan_probabilities(self): + a = np.array([42, 1, 2]) + p = [None, None, None] + assert_raises(ValueError, np.random.choice, a, p=p) + + def test_bytes(self): + np.random.seed(self.seed) + actual = np.random.bytes(10) + desired = b'\x82Ui\x9e\xff\x97+Wf\xa5' + assert_equal(actual, desired) + + def test_shuffle(self): + # Test lists, arrays (of various dtypes), and multidimensional versions + # of both, c-contiguous or not: + for conv in [lambda x: np.array([]), + lambda x: x, + lambda x: np.asarray(x).astype(np.int8), + lambda x: np.asarray(x).astype(np.float32), + lambda x: np.asarray(x).astype(np.complex64), + lambda x: np.asarray(x).astype(object), + lambda x: [(i, i) for i in x], + lambda x: np.asarray([[i, i] for i in x]), + lambda x: np.vstack([x, x]).T, + # gh-11442 + lambda x: (np.asarray([(i, i) for i in x], + [("a", int), ("b", int)]) + .view(np.recarray)), + # gh-4270 + lambda x: np.asarray([(i, i) for i in x], + [("a", object), ("b", np.int32)])]: + np.random.seed(self.seed) + alist = conv([1, 2, 3, 4, 5, 6, 7, 8, 9, 0]) + np.random.shuffle(alist) + actual = alist + desired = conv([0, 1, 9, 6, 2, 4, 5, 8, 7, 3]) + assert_array_equal(actual, desired) + + def test_shuffle_masked(self): + # gh-3263 + a = np.ma.masked_values(np.reshape(range(20), (5, 4)) % 3 - 1, -1) + b = np.ma.masked_values(np.arange(20) % 3 - 1, -1) + a_orig = a.copy() + b_orig = b.copy() + for i in range(50): + np.random.shuffle(a) + assert_equal( + sorted(a.data[~a.mask]), sorted(a_orig.data[~a_orig.mask])) + np.random.shuffle(b) + assert_equal( + sorted(b.data[~b.mask]), sorted(b_orig.data[~b_orig.mask])) + + @pytest.mark.parametrize("random", + [np.random, np.random.RandomState(), np.random.default_rng()]) + def test_shuffle_untyped_warning(self, random): + # Create a dict works like a sequence but isn't one + values = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6} + with pytest.warns(UserWarning, + match="you are shuffling a 'dict' object") as rec: + random.shuffle(values) + assert "test_random" in rec[0].filename + + @pytest.mark.parametrize("random", + [np.random, np.random.RandomState(), np.random.default_rng()]) + @pytest.mark.parametrize("use_array_like", [True, False]) + def test_shuffle_no_object_unpacking(self, random, use_array_like): + class MyArr(np.ndarray): + pass + + items = [ + None, np.array([3]), np.float64(3), np.array(10), np.float64(7) + ] + arr = np.array(items, dtype=object) + item_ids = {id(i) for i in items} + if use_array_like: + arr = arr.view(MyArr) + + # The array was created fine, and did not modify any objects: + assert all(id(i) in item_ids for i in arr) + + if use_array_like and not isinstance(random, np.random.Generator): + # The old API gives incorrect results, but warns about it. + with pytest.warns(UserWarning, + match="Shuffling a one dimensional array.*"): + random.shuffle(arr) + else: + random.shuffle(arr) + assert all(id(i) in item_ids for i in arr) + + def test_shuffle_memoryview(self): + # gh-18273 + # allow graceful handling of memoryviews + # (treat the same as arrays) + np.random.seed(self.seed) + a = np.arange(5).data + np.random.shuffle(a) + assert_equal(np.asarray(a), [0, 1, 4, 3, 2]) + rng = np.random.RandomState(self.seed) + rng.shuffle(a) + assert_equal(np.asarray(a), [0, 1, 2, 3, 4]) + rng = np.random.default_rng(self.seed) + rng.shuffle(a) + assert_equal(np.asarray(a), [4, 1, 0, 3, 2]) + + def test_beta(self): + np.random.seed(self.seed) + actual = np.random.beta(.1, .9, size=(3, 2)) + desired = np.array( + [[1.45341850513746058e-02, 5.31297615662868145e-04], + [1.85366619058432324e-06, 4.19214516800110563e-03], + [1.58405155108498093e-04, 1.26252891949397652e-04]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_binomial(self): + np.random.seed(self.seed) + actual = np.random.binomial(100, .456, size=(3, 2)) + desired = np.array([[37, 43], + [42, 48], + [46, 45]]) + assert_array_equal(actual, desired) + + def test_chisquare(self): + np.random.seed(self.seed) + actual = np.random.chisquare(50, size=(3, 2)) + desired = np.array([[63.87858175501090585, 68.68407748911370447], + [65.77116116901505904, 47.09686762438974483], + [72.3828403199695174, 74.18408615260374006]]) + assert_array_almost_equal(actual, desired, decimal=13) + + def test_dirichlet(self): + np.random.seed(self.seed) + alpha = np.array([51.72840233779265162, 39.74494232180943953]) + actual = np.random.mtrand.dirichlet(alpha, size=(3, 2)) + desired = np.array([[[0.54539444573611562, 0.45460555426388438], + [0.62345816822039413, 0.37654183177960598]], + [[0.55206000085785778, 0.44793999914214233], + [0.58964023305154301, 0.41035976694845688]], + [[0.59266909280647828, 0.40733090719352177], + [0.56974431743975207, 0.43025568256024799]]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_dirichlet_size(self): + # gh-3173 + p = np.array([51.72840233779265162, 39.74494232180943953]) + assert_equal(np.random.dirichlet(p, np.uint32(1)).shape, (1, 2)) + assert_equal(np.random.dirichlet(p, np.uint32(1)).shape, (1, 2)) + assert_equal(np.random.dirichlet(p, np.uint32(1)).shape, (1, 2)) + assert_equal(np.random.dirichlet(p, [2, 2]).shape, (2, 2, 2)) + assert_equal(np.random.dirichlet(p, (2, 2)).shape, (2, 2, 2)) + assert_equal(np.random.dirichlet(p, np.array((2, 2))).shape, (2, 2, 2)) + + assert_raises(TypeError, np.random.dirichlet, p, float(1)) + + def test_dirichlet_bad_alpha(self): + # gh-2089 + alpha = np.array([5.4e-01, -1.0e-16]) + assert_raises(ValueError, np.random.mtrand.dirichlet, alpha) + + # gh-15876 + assert_raises(ValueError, random.dirichlet, [[5, 1]]) + assert_raises(ValueError, random.dirichlet, [[5], [1]]) + assert_raises(ValueError, random.dirichlet, [[[5], [1]], [[1], [5]]]) + assert_raises(ValueError, random.dirichlet, np.array([[5, 1], [1, 5]])) + + def test_exponential(self): + np.random.seed(self.seed) + actual = np.random.exponential(1.1234, size=(3, 2)) + desired = np.array([[1.08342649775011624, 1.00607889924557314], + [2.46628830085216721, 2.49668106809923884], + [0.68717433461363442, 1.69175666993575979]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_exponential_0(self): + assert_equal(np.random.exponential(scale=0), 0) + assert_raises(ValueError, np.random.exponential, scale=-0.) + + def test_f(self): + np.random.seed(self.seed) + actual = np.random.f(12, 77, size=(3, 2)) + desired = np.array([[1.21975394418575878, 1.75135759791559775], + [1.44803115017146489, 1.22108959480396262], + [1.02176975757740629, 1.34431827623300415]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_gamma(self): + np.random.seed(self.seed) + actual = np.random.gamma(5, 3, size=(3, 2)) + desired = np.array([[24.60509188649287182, 28.54993563207210627], + [26.13476110204064184, 12.56988482927716078], + [31.71863275789960568, 33.30143302795922011]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_gamma_0(self): + assert_equal(np.random.gamma(shape=0, scale=0), 0) + assert_raises(ValueError, np.random.gamma, shape=-0., scale=-0.) + + def test_geometric(self): + np.random.seed(self.seed) + actual = np.random.geometric(.123456789, size=(3, 2)) + desired = np.array([[8, 7], + [17, 17], + [5, 12]]) + assert_array_equal(actual, desired) + + def test_gumbel(self): + np.random.seed(self.seed) + actual = np.random.gumbel(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[0.19591898743416816, 0.34405539668096674], + [-1.4492522252274278, -1.47374816298446865], + [1.10651090478803416, -0.69535848626236174]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_gumbel_0(self): + assert_equal(np.random.gumbel(scale=0), 0) + assert_raises(ValueError, np.random.gumbel, scale=-0.) + + def test_hypergeometric(self): + np.random.seed(self.seed) + actual = np.random.hypergeometric(10, 5, 14, size=(3, 2)) + desired = np.array([[10, 10], + [10, 10], + [9, 9]]) + assert_array_equal(actual, desired) + + # Test nbad = 0 + actual = np.random.hypergeometric(5, 0, 3, size=4) + desired = np.array([3, 3, 3, 3]) + assert_array_equal(actual, desired) + + actual = np.random.hypergeometric(15, 0, 12, size=4) + desired = np.array([12, 12, 12, 12]) + assert_array_equal(actual, desired) + + # Test ngood = 0 + actual = np.random.hypergeometric(0, 5, 3, size=4) + desired = np.array([0, 0, 0, 0]) + assert_array_equal(actual, desired) + + actual = np.random.hypergeometric(0, 15, 12, size=4) + desired = np.array([0, 0, 0, 0]) + assert_array_equal(actual, desired) + + def test_laplace(self): + np.random.seed(self.seed) + actual = np.random.laplace(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[0.66599721112760157, 0.52829452552221945], + [3.12791959514407125, 3.18202813572992005], + [-0.05391065675859356, 1.74901336242837324]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_laplace_0(self): + assert_equal(np.random.laplace(scale=0), 0) + assert_raises(ValueError, np.random.laplace, scale=-0.) + + def test_logistic(self): + np.random.seed(self.seed) + actual = np.random.logistic(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[1.09232835305011444, 0.8648196662399954], + [4.27818590694950185, 4.33897006346929714], + [-0.21682183359214885, 2.63373365386060332]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_lognormal(self): + np.random.seed(self.seed) + actual = np.random.lognormal(mean=.123456789, sigma=2.0, size=(3, 2)) + desired = np.array([[16.50698631688883822, 36.54846706092654784], + [22.67886599981281748, 0.71617561058995771], + [65.72798501792723869, 86.84341601437161273]]) + assert_array_almost_equal(actual, desired, decimal=13) + + def test_lognormal_0(self): + assert_equal(np.random.lognormal(sigma=0), 1) + assert_raises(ValueError, np.random.lognormal, sigma=-0.) + + def test_logseries(self): + np.random.seed(self.seed) + actual = np.random.logseries(p=.923456789, size=(3, 2)) + desired = np.array([[2, 2], + [6, 17], + [3, 6]]) + assert_array_equal(actual, desired) + + def test_multinomial(self): + np.random.seed(self.seed) + actual = np.random.multinomial(20, [1/6.]*6, size=(3, 2)) + desired = np.array([[[4, 3, 5, 4, 2, 2], + [5, 2, 8, 2, 2, 1]], + [[3, 4, 3, 6, 0, 4], + [2, 1, 4, 3, 6, 4]], + [[4, 4, 2, 5, 2, 3], + [4, 3, 4, 2, 3, 4]]]) + assert_array_equal(actual, desired) + + def test_multivariate_normal(self): + np.random.seed(self.seed) + mean = (.123456789, 10) + cov = [[1, 0], [0, 1]] + size = (3, 2) + actual = np.random.multivariate_normal(mean, cov, size) + desired = np.array([[[1.463620246718631, 11.73759122771936], + [1.622445133300628, 9.771356667546383]], + [[2.154490787682787, 12.170324946056553], + [1.719909438201865, 9.230548443648306]], + [[0.689515026297799, 9.880729819607714], + [-0.023054015651998, 9.201096623542879]]]) + + assert_array_almost_equal(actual, desired, decimal=15) + + # Check for default size, was raising deprecation warning + actual = np.random.multivariate_normal(mean, cov) + desired = np.array([0.895289569463708, 9.17180864067987]) + assert_array_almost_equal(actual, desired, decimal=15) + + # Check that non positive-semidefinite covariance warns with + # RuntimeWarning + mean = [0, 0] + cov = [[1, 2], [2, 1]] + assert_warns(RuntimeWarning, np.random.multivariate_normal, mean, cov) + + # and that it doesn't warn with RuntimeWarning check_valid='ignore' + assert_no_warnings(np.random.multivariate_normal, mean, cov, + check_valid='ignore') + + # and that it raises with RuntimeWarning check_valid='raises' + assert_raises(ValueError, np.random.multivariate_normal, mean, cov, + check_valid='raise') + + cov = np.array([[1, 0.1], [0.1, 1]], dtype=np.float32) + with suppress_warnings() as sup: + np.random.multivariate_normal(mean, cov) + w = sup.record(RuntimeWarning) + assert len(w) == 0 + + def test_negative_binomial(self): + np.random.seed(self.seed) + actual = np.random.negative_binomial(n=100, p=.12345, size=(3, 2)) + desired = np.array([[848, 841], + [892, 611], + [779, 647]]) + assert_array_equal(actual, desired) + + def test_noncentral_chisquare(self): + np.random.seed(self.seed) + actual = np.random.noncentral_chisquare(df=5, nonc=5, size=(3, 2)) + desired = np.array([[23.91905354498517511, 13.35324692733826346], + [31.22452661329736401, 16.60047399466177254], + [5.03461598262724586, 17.94973089023519464]]) + assert_array_almost_equal(actual, desired, decimal=14) + + actual = np.random.noncentral_chisquare(df=.5, nonc=.2, size=(3, 2)) + desired = np.array([[1.47145377828516666, 0.15052899268012659], + [0.00943803056963588, 1.02647251615666169], + [0.332334982684171, 0.15451287602753125]]) + assert_array_almost_equal(actual, desired, decimal=14) + + np.random.seed(self.seed) + actual = np.random.noncentral_chisquare(df=5, nonc=0, size=(3, 2)) + desired = np.array([[9.597154162763948, 11.725484450296079], + [10.413711048138335, 3.694475922923986], + [13.484222138963087, 14.377255424602957]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_noncentral_f(self): + np.random.seed(self.seed) + actual = np.random.noncentral_f(dfnum=5, dfden=2, nonc=1, + size=(3, 2)) + desired = np.array([[1.40598099674926669, 0.34207973179285761], + [3.57715069265772545, 7.92632662577829805], + [0.43741599463544162, 1.1774208752428319]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_normal(self): + np.random.seed(self.seed) + actual = np.random.normal(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[2.80378370443726244, 3.59863924443872163], + [3.121433477601256, -0.33382987590723379], + [4.18552478636557357, 4.46410668111310471]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_normal_0(self): + assert_equal(np.random.normal(scale=0), 0) + assert_raises(ValueError, np.random.normal, scale=-0.) + + def test_pareto(self): + np.random.seed(self.seed) + actual = np.random.pareto(a=.123456789, size=(3, 2)) + desired = np.array( + [[2.46852460439034849e+03, 1.41286880810518346e+03], + [5.28287797029485181e+07, 6.57720981047328785e+07], + [1.40840323350391515e+02, 1.98390255135251704e+05]]) + # For some reason on 32-bit x86 Ubuntu 12.10 the [1, 0] entry in this + # matrix differs by 24 nulps. Discussion: + # https://mail.python.org/pipermail/numpy-discussion/2012-September/063801.html + # Consensus is that this is probably some gcc quirk that affects + # rounding but not in any important way, so we just use a looser + # tolerance on this test: + np.testing.assert_array_almost_equal_nulp(actual, desired, nulp=30) + + def test_poisson(self): + np.random.seed(self.seed) + actual = np.random.poisson(lam=.123456789, size=(3, 2)) + desired = np.array([[0, 0], + [1, 0], + [0, 0]]) + assert_array_equal(actual, desired) + + def test_poisson_exceptions(self): + lambig = np.iinfo('l').max + lamneg = -1 + assert_raises(ValueError, np.random.poisson, lamneg) + assert_raises(ValueError, np.random.poisson, [lamneg]*10) + assert_raises(ValueError, np.random.poisson, lambig) + assert_raises(ValueError, np.random.poisson, [lambig]*10) + + def test_power(self): + np.random.seed(self.seed) + actual = np.random.power(a=.123456789, size=(3, 2)) + desired = np.array([[0.02048932883240791, 0.01424192241128213], + [0.38446073748535298, 0.39499689943484395], + [0.00177699707563439, 0.13115505880863756]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_rayleigh(self): + np.random.seed(self.seed) + actual = np.random.rayleigh(scale=10, size=(3, 2)) + desired = np.array([[13.8882496494248393, 13.383318339044731], + [20.95413364294492098, 21.08285015800712614], + [11.06066537006854311, 17.35468505778271009]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_rayleigh_0(self): + assert_equal(np.random.rayleigh(scale=0), 0) + assert_raises(ValueError, np.random.rayleigh, scale=-0.) + + def test_standard_cauchy(self): + np.random.seed(self.seed) + actual = np.random.standard_cauchy(size=(3, 2)) + desired = np.array([[0.77127660196445336, -6.55601161955910605], + [0.93582023391158309, -2.07479293013759447], + [-4.74601644297011926, 0.18338989290760804]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_standard_exponential(self): + np.random.seed(self.seed) + actual = np.random.standard_exponential(size=(3, 2)) + desired = np.array([[0.96441739162374596, 0.89556604882105506], + [2.1953785836319808, 2.22243285392490542], + [0.6116915921431676, 1.50592546727413201]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_standard_gamma(self): + np.random.seed(self.seed) + actual = np.random.standard_gamma(shape=3, size=(3, 2)) + desired = np.array([[5.50841531318455058, 6.62953470301903103], + [5.93988484943779227, 2.31044849402133989], + [7.54838614231317084, 8.012756093271868]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_standard_gamma_0(self): + assert_equal(np.random.standard_gamma(shape=0), 0) + assert_raises(ValueError, np.random.standard_gamma, shape=-0.) + + def test_standard_normal(self): + np.random.seed(self.seed) + actual = np.random.standard_normal(size=(3, 2)) + desired = np.array([[1.34016345771863121, 1.73759122771936081], + [1.498988344300628, -0.2286433324536169], + [2.031033998682787, 2.17032494605655257]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_standard_t(self): + np.random.seed(self.seed) + actual = np.random.standard_t(df=10, size=(3, 2)) + desired = np.array([[0.97140611862659965, -0.08830486548450577], + [1.36311143689505321, -0.55317463909867071], + [-0.18473749069684214, 0.61181537341755321]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_triangular(self): + np.random.seed(self.seed) + actual = np.random.triangular(left=5.12, mode=10.23, right=20.34, + size=(3, 2)) + desired = np.array([[12.68117178949215784, 12.4129206149193152], + [16.20131377335158263, 16.25692138747600524], + [11.20400690911820263, 14.4978144835829923]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_uniform(self): + np.random.seed(self.seed) + actual = np.random.uniform(low=1.23, high=10.54, size=(3, 2)) + desired = np.array([[6.99097932346268003, 6.73801597444323974], + [9.50364421400426274, 9.53130618907631089], + [5.48995325769805476, 8.47493103280052118]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_uniform_range_bounds(self): + fmin = np.finfo('float').min + fmax = np.finfo('float').max + + func = np.random.uniform + assert_raises(OverflowError, func, -np.inf, 0) + assert_raises(OverflowError, func, 0, np.inf) + assert_raises(OverflowError, func, fmin, fmax) + assert_raises(OverflowError, func, [-np.inf], [0]) + assert_raises(OverflowError, func, [0], [np.inf]) + + # (fmax / 1e17) - fmin is within range, so this should not throw + # account for i386 extended precision DBL_MAX / 1e17 + DBL_MAX > + # DBL_MAX by increasing fmin a bit + np.random.uniform(low=np.nextafter(fmin, 1), high=fmax / 1e17) + + def test_scalar_exception_propagation(self): + # Tests that exceptions are correctly propagated in distributions + # when called with objects that throw exceptions when converted to + # scalars. + # + # Regression test for gh: 8865 + + class ThrowingFloat(np.ndarray): + def __float__(self): + raise TypeError + + throwing_float = np.array(1.0).view(ThrowingFloat) + assert_raises(TypeError, np.random.uniform, throwing_float, + throwing_float) + + class ThrowingInteger(np.ndarray): + def __int__(self): + raise TypeError + + __index__ = __int__ + + throwing_int = np.array(1).view(ThrowingInteger) + assert_raises(TypeError, np.random.hypergeometric, throwing_int, 1, 1) + + def test_vonmises(self): + np.random.seed(self.seed) + actual = np.random.vonmises(mu=1.23, kappa=1.54, size=(3, 2)) + desired = np.array([[2.28567572673902042, 2.89163838442285037], + [0.38198375564286025, 2.57638023113890746], + [1.19153771588353052, 1.83509849681825354]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_vonmises_small(self): + # check infinite loop, gh-4720 + np.random.seed(self.seed) + r = np.random.vonmises(mu=0., kappa=1.1e-8, size=10**6) + np.testing.assert_(np.isfinite(r).all()) + + def test_wald(self): + np.random.seed(self.seed) + actual = np.random.wald(mean=1.23, scale=1.54, size=(3, 2)) + desired = np.array([[3.82935265715889983, 5.13125249184285526], + [0.35045403618358717, 1.50832396872003538], + [0.24124319895843183, 0.22031101461955038]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_weibull(self): + np.random.seed(self.seed) + actual = np.random.weibull(a=1.23, size=(3, 2)) + desired = np.array([[0.97097342648766727, 0.91422896443565516], + [1.89517770034962929, 1.91414357960479564], + [0.67057783752390987, 1.39494046635066793]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_weibull_0(self): + np.random.seed(self.seed) + assert_equal(np.random.weibull(a=0, size=12), np.zeros(12)) + assert_raises(ValueError, np.random.weibull, a=-0.) + + def test_zipf(self): + np.random.seed(self.seed) + actual = np.random.zipf(a=1.23, size=(3, 2)) + desired = np.array([[66, 29], + [1, 1], + [3, 13]]) + assert_array_equal(actual, desired) + + +class TestBroadcast: + # tests that functions that broadcast behave + # correctly when presented with non-scalar arguments + def setup(self): + self.seed = 123456789 + + def setSeed(self): + np.random.seed(self.seed) + + # TODO: Include test for randint once it can broadcast + # Can steal the test written in PR #6938 + + def test_uniform(self): + low = [0] + high = [1] + uniform = np.random.uniform + desired = np.array([0.53283302478975902, + 0.53413660089041659, + 0.50955303552646702]) + + self.setSeed() + actual = uniform(low * 3, high) + assert_array_almost_equal(actual, desired, decimal=14) + + self.setSeed() + actual = uniform(low, high * 3) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_normal(self): + loc = [0] + scale = [1] + bad_scale = [-1] + normal = np.random.normal + desired = np.array([2.2129019979039612, + 2.1283977976520019, + 1.8417114045748335]) + + self.setSeed() + actual = normal(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, normal, loc * 3, bad_scale) + + self.setSeed() + actual = normal(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, normal, loc, bad_scale * 3) + + def test_beta(self): + a = [1] + b = [2] + bad_a = [-1] + bad_b = [-2] + beta = np.random.beta + desired = np.array([0.19843558305989056, + 0.075230336409423643, + 0.24976865978980844]) + + self.setSeed() + actual = beta(a * 3, b) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, beta, bad_a * 3, b) + assert_raises(ValueError, beta, a * 3, bad_b) + + self.setSeed() + actual = beta(a, b * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, beta, bad_a, b * 3) + assert_raises(ValueError, beta, a, bad_b * 3) + + def test_exponential(self): + scale = [1] + bad_scale = [-1] + exponential = np.random.exponential + desired = np.array([0.76106853658845242, + 0.76386282278691653, + 0.71243813125891797]) + + self.setSeed() + actual = exponential(scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, exponential, bad_scale * 3) + + def test_standard_gamma(self): + shape = [1] + bad_shape = [-1] + std_gamma = np.random.standard_gamma + desired = np.array([0.76106853658845242, + 0.76386282278691653, + 0.71243813125891797]) + + self.setSeed() + actual = std_gamma(shape * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, std_gamma, bad_shape * 3) + + def test_gamma(self): + shape = [1] + scale = [2] + bad_shape = [-1] + bad_scale = [-2] + gamma = np.random.gamma + desired = np.array([1.5221370731769048, + 1.5277256455738331, + 1.4248762625178359]) + + self.setSeed() + actual = gamma(shape * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gamma, bad_shape * 3, scale) + assert_raises(ValueError, gamma, shape * 3, bad_scale) + + self.setSeed() + actual = gamma(shape, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gamma, bad_shape, scale * 3) + assert_raises(ValueError, gamma, shape, bad_scale * 3) + + def test_f(self): + dfnum = [1] + dfden = [2] + bad_dfnum = [-1] + bad_dfden = [-2] + f = np.random.f + desired = np.array([0.80038951638264799, + 0.86768719635363512, + 2.7251095168386801]) + + self.setSeed() + actual = f(dfnum * 3, dfden) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, f, bad_dfnum * 3, dfden) + assert_raises(ValueError, f, dfnum * 3, bad_dfden) + + self.setSeed() + actual = f(dfnum, dfden * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, f, bad_dfnum, dfden * 3) + assert_raises(ValueError, f, dfnum, bad_dfden * 3) + + def test_noncentral_f(self): + dfnum = [2] + dfden = [3] + nonc = [4] + bad_dfnum = [0] + bad_dfden = [-1] + bad_nonc = [-2] + nonc_f = np.random.noncentral_f + desired = np.array([9.1393943263705211, + 13.025456344595602, + 8.8018098359100545]) + + self.setSeed() + actual = nonc_f(dfnum * 3, dfden, nonc) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_f, bad_dfnum * 3, dfden, nonc) + assert_raises(ValueError, nonc_f, dfnum * 3, bad_dfden, nonc) + assert_raises(ValueError, nonc_f, dfnum * 3, dfden, bad_nonc) + + self.setSeed() + actual = nonc_f(dfnum, dfden * 3, nonc) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_f, bad_dfnum, dfden * 3, nonc) + assert_raises(ValueError, nonc_f, dfnum, bad_dfden * 3, nonc) + assert_raises(ValueError, nonc_f, dfnum, dfden * 3, bad_nonc) + + self.setSeed() + actual = nonc_f(dfnum, dfden, nonc * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_f, bad_dfnum, dfden, nonc * 3) + assert_raises(ValueError, nonc_f, dfnum, bad_dfden, nonc * 3) + assert_raises(ValueError, nonc_f, dfnum, dfden, bad_nonc * 3) + + def test_noncentral_f_small_df(self): + self.setSeed() + desired = np.array([6.869638627492048, 0.785880199263955]) + actual = np.random.noncentral_f(0.9, 0.9, 2, size=2) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_chisquare(self): + df = [1] + bad_df = [-1] + chisquare = np.random.chisquare + desired = np.array([0.57022801133088286, + 0.51947702108840776, + 0.1320969254923558]) + + self.setSeed() + actual = chisquare(df * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, chisquare, bad_df * 3) + + def test_noncentral_chisquare(self): + df = [1] + nonc = [2] + bad_df = [-1] + bad_nonc = [-2] + nonc_chi = np.random.noncentral_chisquare + desired = np.array([9.0015599467913763, + 4.5804135049718742, + 6.0872302432834564]) + + self.setSeed() + actual = nonc_chi(df * 3, nonc) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_chi, bad_df * 3, nonc) + assert_raises(ValueError, nonc_chi, df * 3, bad_nonc) + + self.setSeed() + actual = nonc_chi(df, nonc * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_chi, bad_df, nonc * 3) + assert_raises(ValueError, nonc_chi, df, bad_nonc * 3) + + def test_standard_t(self): + df = [1] + bad_df = [-1] + t = np.random.standard_t + desired = np.array([3.0702872575217643, + 5.8560725167361607, + 1.0274791436474273]) + + self.setSeed() + actual = t(df * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, t, bad_df * 3) + + def test_vonmises(self): + mu = [2] + kappa = [1] + bad_kappa = [-1] + vonmises = np.random.vonmises + desired = np.array([2.9883443664201312, + -2.7064099483995943, + -1.8672476700665914]) + + self.setSeed() + actual = vonmises(mu * 3, kappa) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, vonmises, mu * 3, bad_kappa) + + self.setSeed() + actual = vonmises(mu, kappa * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, vonmises, mu, bad_kappa * 3) + + def test_pareto(self): + a = [1] + bad_a = [-1] + pareto = np.random.pareto + desired = np.array([1.1405622680198362, + 1.1465519762044529, + 1.0389564467453547]) + + self.setSeed() + actual = pareto(a * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, pareto, bad_a * 3) + + def test_weibull(self): + a = [1] + bad_a = [-1] + weibull = np.random.weibull + desired = np.array([0.76106853658845242, + 0.76386282278691653, + 0.71243813125891797]) + + self.setSeed() + actual = weibull(a * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, weibull, bad_a * 3) + + def test_power(self): + a = [1] + bad_a = [-1] + power = np.random.power + desired = np.array([0.53283302478975902, + 0.53413660089041659, + 0.50955303552646702]) + + self.setSeed() + actual = power(a * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, power, bad_a * 3) + + def test_laplace(self): + loc = [0] + scale = [1] + bad_scale = [-1] + laplace = np.random.laplace + desired = np.array([0.067921356028507157, + 0.070715642226971326, + 0.019290950698972624]) + + self.setSeed() + actual = laplace(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, laplace, loc * 3, bad_scale) + + self.setSeed() + actual = laplace(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, laplace, loc, bad_scale * 3) + + def test_gumbel(self): + loc = [0] + scale = [1] + bad_scale = [-1] + gumbel = np.random.gumbel + desired = np.array([0.2730318639556768, + 0.26936705726291116, + 0.33906220393037939]) + + self.setSeed() + actual = gumbel(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gumbel, loc * 3, bad_scale) + + self.setSeed() + actual = gumbel(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gumbel, loc, bad_scale * 3) + + def test_logistic(self): + loc = [0] + scale = [1] + bad_scale = [-1] + logistic = np.random.logistic + desired = np.array([0.13152135837586171, + 0.13675915696285773, + 0.038216792802833396]) + + self.setSeed() + actual = logistic(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, logistic, loc * 3, bad_scale) + + self.setSeed() + actual = logistic(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, logistic, loc, bad_scale * 3) + + def test_lognormal(self): + mean = [0] + sigma = [1] + bad_sigma = [-1] + lognormal = np.random.lognormal + desired = np.array([9.1422086044848427, + 8.4013952870126261, + 6.3073234116578671]) + + self.setSeed() + actual = lognormal(mean * 3, sigma) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, lognormal, mean * 3, bad_sigma) + + self.setSeed() + actual = lognormal(mean, sigma * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, lognormal, mean, bad_sigma * 3) + + def test_rayleigh(self): + scale = [1] + bad_scale = [-1] + rayleigh = np.random.rayleigh + desired = np.array([1.2337491937897689, + 1.2360119924878694, + 1.1936818095781789]) + + self.setSeed() + actual = rayleigh(scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, rayleigh, bad_scale * 3) + + def test_wald(self): + mean = [0.5] + scale = [1] + bad_mean = [0] + bad_scale = [-2] + wald = np.random.wald + desired = np.array([0.11873681120271318, + 0.12450084820795027, + 0.9096122728408238]) + + self.setSeed() + actual = wald(mean * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, wald, bad_mean * 3, scale) + assert_raises(ValueError, wald, mean * 3, bad_scale) + + self.setSeed() + actual = wald(mean, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, wald, bad_mean, scale * 3) + assert_raises(ValueError, wald, mean, bad_scale * 3) + assert_raises(ValueError, wald, 0.0, 1) + assert_raises(ValueError, wald, 0.5, 0.0) + + def test_triangular(self): + left = [1] + right = [3] + mode = [2] + bad_left_one = [3] + bad_mode_one = [4] + bad_left_two, bad_mode_two = right * 2 + triangular = np.random.triangular + desired = np.array([2.03339048710429, + 2.0347400359389356, + 2.0095991069536208]) + + self.setSeed() + actual = triangular(left * 3, mode, right) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, triangular, bad_left_one * 3, mode, right) + assert_raises(ValueError, triangular, left * 3, bad_mode_one, right) + assert_raises(ValueError, triangular, bad_left_two * 3, bad_mode_two, + right) + + self.setSeed() + actual = triangular(left, mode * 3, right) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, triangular, bad_left_one, mode * 3, right) + assert_raises(ValueError, triangular, left, bad_mode_one * 3, right) + assert_raises(ValueError, triangular, bad_left_two, bad_mode_two * 3, + right) + + self.setSeed() + actual = triangular(left, mode, right * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, triangular, bad_left_one, mode, right * 3) + assert_raises(ValueError, triangular, left, bad_mode_one, right * 3) + assert_raises(ValueError, triangular, bad_left_two, bad_mode_two, + right * 3) + + def test_binomial(self): + n = [1] + p = [0.5] + bad_n = [-1] + bad_p_one = [-1] + bad_p_two = [1.5] + binom = np.random.binomial + desired = np.array([1, 1, 1]) + + self.setSeed() + actual = binom(n * 3, p) + assert_array_equal(actual, desired) + assert_raises(ValueError, binom, bad_n * 3, p) + assert_raises(ValueError, binom, n * 3, bad_p_one) + assert_raises(ValueError, binom, n * 3, bad_p_two) + + self.setSeed() + actual = binom(n, p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, binom, bad_n, p * 3) + assert_raises(ValueError, binom, n, bad_p_one * 3) + assert_raises(ValueError, binom, n, bad_p_two * 3) + + def test_negative_binomial(self): + n = [1] + p = [0.5] + bad_n = [-1] + bad_p_one = [-1] + bad_p_two = [1.5] + neg_binom = np.random.negative_binomial + desired = np.array([1, 0, 1]) + + self.setSeed() + actual = neg_binom(n * 3, p) + assert_array_equal(actual, desired) + assert_raises(ValueError, neg_binom, bad_n * 3, p) + assert_raises(ValueError, neg_binom, n * 3, bad_p_one) + assert_raises(ValueError, neg_binom, n * 3, bad_p_two) + + self.setSeed() + actual = neg_binom(n, p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, neg_binom, bad_n, p * 3) + assert_raises(ValueError, neg_binom, n, bad_p_one * 3) + assert_raises(ValueError, neg_binom, n, bad_p_two * 3) + + def test_poisson(self): + max_lam = np.random.RandomState()._poisson_lam_max + + lam = [1] + bad_lam_one = [-1] + bad_lam_two = [max_lam * 2] + poisson = np.random.poisson + desired = np.array([1, 1, 0]) + + self.setSeed() + actual = poisson(lam * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, poisson, bad_lam_one * 3) + assert_raises(ValueError, poisson, bad_lam_two * 3) + + def test_zipf(self): + a = [2] + bad_a = [0] + zipf = np.random.zipf + desired = np.array([2, 2, 1]) + + self.setSeed() + actual = zipf(a * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, zipf, bad_a * 3) + with np.errstate(invalid='ignore'): + assert_raises(ValueError, zipf, np.nan) + assert_raises(ValueError, zipf, [0, 0, np.nan]) + + def test_geometric(self): + p = [0.5] + bad_p_one = [-1] + bad_p_two = [1.5] + geom = np.random.geometric + desired = np.array([2, 2, 2]) + + self.setSeed() + actual = geom(p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, geom, bad_p_one * 3) + assert_raises(ValueError, geom, bad_p_two * 3) + + def test_hypergeometric(self): + ngood = [1] + nbad = [2] + nsample = [2] + bad_ngood = [-1] + bad_nbad = [-2] + bad_nsample_one = [0] + bad_nsample_two = [4] + hypergeom = np.random.hypergeometric + desired = np.array([1, 1, 1]) + + self.setSeed() + actual = hypergeom(ngood * 3, nbad, nsample) + assert_array_equal(actual, desired) + assert_raises(ValueError, hypergeom, bad_ngood * 3, nbad, nsample) + assert_raises(ValueError, hypergeom, ngood * 3, bad_nbad, nsample) + assert_raises(ValueError, hypergeom, ngood * 3, nbad, bad_nsample_one) + assert_raises(ValueError, hypergeom, ngood * 3, nbad, bad_nsample_two) + + self.setSeed() + actual = hypergeom(ngood, nbad * 3, nsample) + assert_array_equal(actual, desired) + assert_raises(ValueError, hypergeom, bad_ngood, nbad * 3, nsample) + assert_raises(ValueError, hypergeom, ngood, bad_nbad * 3, nsample) + assert_raises(ValueError, hypergeom, ngood, nbad * 3, bad_nsample_one) + assert_raises(ValueError, hypergeom, ngood, nbad * 3, bad_nsample_two) + + self.setSeed() + actual = hypergeom(ngood, nbad, nsample * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, hypergeom, bad_ngood, nbad, nsample * 3) + assert_raises(ValueError, hypergeom, ngood, bad_nbad, nsample * 3) + assert_raises(ValueError, hypergeom, ngood, nbad, bad_nsample_one * 3) + assert_raises(ValueError, hypergeom, ngood, nbad, bad_nsample_two * 3) + + def test_logseries(self): + p = [0.5] + bad_p_one = [2] + bad_p_two = [-1] + logseries = np.random.logseries + desired = np.array([1, 1, 1]) + + self.setSeed() + actual = logseries(p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, logseries, bad_p_one * 3) + assert_raises(ValueError, logseries, bad_p_two * 3) + + +class TestThread: + # make sure each state produces the same sequence even in threads + def setup(self): + self.seeds = range(4) + + def check_function(self, function, sz): + from threading import Thread + + out1 = np.empty((len(self.seeds),) + sz) + out2 = np.empty((len(self.seeds),) + sz) + + # threaded generation + t = [Thread(target=function, args=(np.random.RandomState(s), o)) + for s, o in zip(self.seeds, out1)] + [x.start() for x in t] + [x.join() for x in t] + + # the same serial + for s, o in zip(self.seeds, out2): + function(np.random.RandomState(s), o) + + # these platforms change x87 fpu precision mode in threads + if np.intp().dtype.itemsize == 4 and sys.platform == "win32": + assert_array_almost_equal(out1, out2) + else: + assert_array_equal(out1, out2) + + def test_normal(self): + def gen_random(state, out): + out[...] = state.normal(size=10000) + self.check_function(gen_random, sz=(10000,)) + + def test_exp(self): + def gen_random(state, out): + out[...] = state.exponential(scale=np.ones((100, 1000))) + self.check_function(gen_random, sz=(100, 1000)) + + def test_multinomial(self): + def gen_random(state, out): + out[...] = state.multinomial(10, [1/6.]*6, size=10000) + self.check_function(gen_random, sz=(10000, 6)) + + +# See Issue #4263 +class TestSingleEltArrayInput: + def setup(self): + self.argOne = np.array([2]) + self.argTwo = np.array([3]) + self.argThree = np.array([4]) + self.tgtShape = (1,) + + def test_one_arg_funcs(self): + funcs = (np.random.exponential, np.random.standard_gamma, + np.random.chisquare, np.random.standard_t, + np.random.pareto, np.random.weibull, + np.random.power, np.random.rayleigh, + np.random.poisson, np.random.zipf, + np.random.geometric, np.random.logseries) + + probfuncs = (np.random.geometric, np.random.logseries) + + for func in funcs: + if func in probfuncs: # p < 1.0 + out = func(np.array([0.5])) + + else: + out = func(self.argOne) + + assert_equal(out.shape, self.tgtShape) + + def test_two_arg_funcs(self): + funcs = (np.random.uniform, np.random.normal, + np.random.beta, np.random.gamma, + np.random.f, np.random.noncentral_chisquare, + np.random.vonmises, np.random.laplace, + np.random.gumbel, np.random.logistic, + np.random.lognormal, np.random.wald, + np.random.binomial, np.random.negative_binomial) + + probfuncs = (np.random.binomial, np.random.negative_binomial) + + for func in funcs: + if func in probfuncs: # p <= 1 + argTwo = np.array([0.5]) + + else: + argTwo = self.argTwo + + out = func(self.argOne, argTwo) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne[0], argTwo) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne, argTwo[0]) + assert_equal(out.shape, self.tgtShape) + +# TODO: Uncomment once randint can broadcast arguments +# def test_randint(self): +# itype = [bool, np.int8, np.uint8, np.int16, np.uint16, +# np.int32, np.uint32, np.int64, np.uint64] +# func = np.random.randint +# high = np.array([1]) +# low = np.array([0]) +# +# for dt in itype: +# out = func(low, high, dtype=dt) +# self.assert_equal(out.shape, self.tgtShape) +# +# out = func(low[0], high, dtype=dt) +# self.assert_equal(out.shape, self.tgtShape) +# +# out = func(low, high[0], dtype=dt) +# self.assert_equal(out.shape, self.tgtShape) + + def test_three_arg_funcs(self): + funcs = [np.random.noncentral_f, np.random.triangular, + np.random.hypergeometric] + + for func in funcs: + out = func(self.argOne, self.argTwo, self.argThree) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne[0], self.argTwo, self.argThree) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne, self.argTwo[0], self.argThree) + assert_equal(out.shape, self.tgtShape) diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/test_randomstate.py b/.local/lib/python3.8/site-packages/numpy/random/tests/test_randomstate.py new file mode 100644 index 0000000..861813a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/test_randomstate.py @@ -0,0 +1,2022 @@ +import hashlib +import pickle +import sys +import warnings + +import numpy as np +import pytest +from numpy.testing import ( + assert_, assert_raises, assert_equal, assert_warns, + assert_no_warnings, assert_array_equal, assert_array_almost_equal, + suppress_warnings + ) + +from numpy.random import MT19937, PCG64 +from numpy import random + +INT_FUNCS = {'binomial': (100.0, 0.6), + 'geometric': (.5,), + 'hypergeometric': (20, 20, 10), + 'logseries': (.5,), + 'multinomial': (20, np.ones(6) / 6.0), + 'negative_binomial': (100, .5), + 'poisson': (10.0,), + 'zipf': (2,), + } + +if np.iinfo(int).max < 2**32: + # Windows and some 32-bit platforms, e.g., ARM + INT_FUNC_HASHES = {'binomial': '2fbead005fc63942decb5326d36a1f32fe2c9d32c904ee61e46866b88447c263', + 'logseries': '23ead5dcde35d4cfd4ef2c105e4c3d43304b45dc1b1444b7823b9ee4fa144ebb', + 'geometric': '0d764db64f5c3bad48c8c33551c13b4d07a1e7b470f77629bef6c985cac76fcf', + 'hypergeometric': '7b59bf2f1691626c5815cdcd9a49e1dd68697251d4521575219e4d2a1b8b2c67', + 'multinomial': 'd754fa5b92943a38ec07630de92362dd2e02c43577fc147417dc5b9db94ccdd3', + 'negative_binomial': '8eb216f7cb2a63cf55605422845caaff002fddc64a7dc8b2d45acd477a49e824', + 'poisson': '70c891d76104013ebd6f6bcf30d403a9074b886ff62e4e6b8eb605bf1a4673b7', + 'zipf': '01f074f97517cd5d21747148ac6ca4074dde7fcb7acbaec0a936606fecacd93f', + } +else: + INT_FUNC_HASHES = {'binomial': '8626dd9d052cb608e93d8868de0a7b347258b199493871a1dc56e2a26cacb112', + 'geometric': '8edd53d272e49c4fc8fbbe6c7d08d563d62e482921f3131d0a0e068af30f0db9', + 'hypergeometric': '83496cc4281c77b786c9b7ad88b74d42e01603a55c60577ebab81c3ba8d45657', + 'logseries': '65878a38747c176bc00e930ebafebb69d4e1e16cd3a704e264ea8f5e24f548db', + 'multinomial': '7a984ae6dca26fd25374479e118b22f55db0aedccd5a0f2584ceada33db98605', + 'negative_binomial': 'd636d968e6a24ae92ab52fe11c46ac45b0897e98714426764e820a7d77602a61', + 'poisson': '956552176f77e7c9cb20d0118fc9cf690be488d790ed4b4c4747b965e61b0bb4', + 'zipf': 'f84ba7feffda41e606e20b28dfc0f1ea9964a74574513d4a4cbc98433a8bfa45', + } + + +@pytest.fixture(scope='module', params=INT_FUNCS) +def int_func(request): + return (request.param, INT_FUNCS[request.param], + INT_FUNC_HASHES[request.param]) + + +def assert_mt19937_state_equal(a, b): + assert_equal(a['bit_generator'], b['bit_generator']) + assert_array_equal(a['state']['key'], b['state']['key']) + assert_array_equal(a['state']['pos'], b['state']['pos']) + assert_equal(a['has_gauss'], b['has_gauss']) + assert_equal(a['gauss'], b['gauss']) + + +class TestSeed: + def test_scalar(self): + s = random.RandomState(0) + assert_equal(s.randint(1000), 684) + s = random.RandomState(4294967295) + assert_equal(s.randint(1000), 419) + + def test_array(self): + s = random.RandomState(range(10)) + assert_equal(s.randint(1000), 468) + s = random.RandomState(np.arange(10)) + assert_equal(s.randint(1000), 468) + s = random.RandomState([0]) + assert_equal(s.randint(1000), 973) + s = random.RandomState([4294967295]) + assert_equal(s.randint(1000), 265) + + def test_invalid_scalar(self): + # seed must be an unsigned 32 bit integer + assert_raises(TypeError, random.RandomState, -0.5) + assert_raises(ValueError, random.RandomState, -1) + + def test_invalid_array(self): + # seed must be an unsigned 32 bit integer + assert_raises(TypeError, random.RandomState, [-0.5]) + assert_raises(ValueError, random.RandomState, [-1]) + assert_raises(ValueError, random.RandomState, [4294967296]) + assert_raises(ValueError, random.RandomState, [1, 2, 4294967296]) + assert_raises(ValueError, random.RandomState, [1, -2, 4294967296]) + + def test_invalid_array_shape(self): + # gh-9832 + assert_raises(ValueError, random.RandomState, np.array([], + dtype=np.int64)) + assert_raises(ValueError, random.RandomState, [[1, 2, 3]]) + assert_raises(ValueError, random.RandomState, [[1, 2, 3], + [4, 5, 6]]) + + def test_cannot_seed(self): + rs = random.RandomState(PCG64(0)) + with assert_raises(TypeError): + rs.seed(1234) + + def test_invalid_initialization(self): + assert_raises(ValueError, random.RandomState, MT19937) + + +class TestBinomial: + def test_n_zero(self): + # Tests the corner case of n == 0 for the binomial distribution. + # binomial(0, p) should be zero for any p in [0, 1]. + # This test addresses issue #3480. + zeros = np.zeros(2, dtype='int') + for p in [0, .5, 1]: + assert_(random.binomial(0, p) == 0) + assert_array_equal(random.binomial(zeros, p), zeros) + + def test_p_is_nan(self): + # Issue #4571. + assert_raises(ValueError, random.binomial, 1, np.nan) + + +class TestMultinomial: + def test_basic(self): + random.multinomial(100, [0.2, 0.8]) + + def test_zero_probability(self): + random.multinomial(100, [0.2, 0.8, 0.0, 0.0, 0.0]) + + def test_int_negative_interval(self): + assert_(-5 <= random.randint(-5, -1) < -1) + x = random.randint(-5, -1, 5) + assert_(np.all(-5 <= x)) + assert_(np.all(x < -1)) + + def test_size(self): + # gh-3173 + p = [0.5, 0.5] + assert_equal(random.multinomial(1, p, np.uint32(1)).shape, (1, 2)) + assert_equal(random.multinomial(1, p, np.uint32(1)).shape, (1, 2)) + assert_equal(random.multinomial(1, p, np.uint32(1)).shape, (1, 2)) + assert_equal(random.multinomial(1, p, [2, 2]).shape, (2, 2, 2)) + assert_equal(random.multinomial(1, p, (2, 2)).shape, (2, 2, 2)) + assert_equal(random.multinomial(1, p, np.array((2, 2))).shape, + (2, 2, 2)) + + assert_raises(TypeError, random.multinomial, 1, p, + float(1)) + + def test_invalid_prob(self): + assert_raises(ValueError, random.multinomial, 100, [1.1, 0.2]) + assert_raises(ValueError, random.multinomial, 100, [-.1, 0.9]) + + def test_invalid_n(self): + assert_raises(ValueError, random.multinomial, -1, [0.8, 0.2]) + + def test_p_non_contiguous(self): + p = np.arange(15.) + p /= np.sum(p[1::3]) + pvals = p[1::3] + random.seed(1432985819) + non_contig = random.multinomial(100, pvals=pvals) + random.seed(1432985819) + contig = random.multinomial(100, pvals=np.ascontiguousarray(pvals)) + assert_array_equal(non_contig, contig) + + def test_multinomial_pvals_float32(self): + x = np.array([9.9e-01, 9.9e-01, 1.0e-09, 1.0e-09, 1.0e-09, 1.0e-09, + 1.0e-09, 1.0e-09, 1.0e-09, 1.0e-09], dtype=np.float32) + pvals = x / x.sum() + match = r"[\w\s]*pvals array is cast to 64-bit floating" + with pytest.raises(ValueError, match=match): + random.multinomial(1, pvals) + + +class TestSetState: + def setup(self): + self.seed = 1234567890 + self.random_state = random.RandomState(self.seed) + self.state = self.random_state.get_state() + + def test_basic(self): + old = self.random_state.tomaxint(16) + self.random_state.set_state(self.state) + new = self.random_state.tomaxint(16) + assert_(np.all(old == new)) + + def test_gaussian_reset(self): + # Make sure the cached every-other-Gaussian is reset. + old = self.random_state.standard_normal(size=3) + self.random_state.set_state(self.state) + new = self.random_state.standard_normal(size=3) + assert_(np.all(old == new)) + + def test_gaussian_reset_in_media_res(self): + # When the state is saved with a cached Gaussian, make sure the + # cached Gaussian is restored. + + self.random_state.standard_normal() + state = self.random_state.get_state() + old = self.random_state.standard_normal(size=3) + self.random_state.set_state(state) + new = self.random_state.standard_normal(size=3) + assert_(np.all(old == new)) + + def test_backwards_compatibility(self): + # Make sure we can accept old state tuples that do not have the + # cached Gaussian value. + old_state = self.state[:-2] + x1 = self.random_state.standard_normal(size=16) + self.random_state.set_state(old_state) + x2 = self.random_state.standard_normal(size=16) + self.random_state.set_state(self.state) + x3 = self.random_state.standard_normal(size=16) + assert_(np.all(x1 == x2)) + assert_(np.all(x1 == x3)) + + def test_negative_binomial(self): + # Ensure that the negative binomial results take floating point + # arguments without truncation. + self.random_state.negative_binomial(0.5, 0.5) + + def test_get_state_warning(self): + rs = random.RandomState(PCG64()) + with suppress_warnings() as sup: + w = sup.record(RuntimeWarning) + state = rs.get_state() + assert_(len(w) == 1) + assert isinstance(state, dict) + assert state['bit_generator'] == 'PCG64' + + def test_invalid_legacy_state_setting(self): + state = self.random_state.get_state() + new_state = ('Unknown', ) + state[1:] + assert_raises(ValueError, self.random_state.set_state, new_state) + assert_raises(TypeError, self.random_state.set_state, + np.array(new_state, dtype=object)) + state = self.random_state.get_state(legacy=False) + del state['bit_generator'] + assert_raises(ValueError, self.random_state.set_state, state) + + def test_pickle(self): + self.random_state.seed(0) + self.random_state.random_sample(100) + self.random_state.standard_normal() + pickled = self.random_state.get_state(legacy=False) + assert_equal(pickled['has_gauss'], 1) + rs_unpick = pickle.loads(pickle.dumps(self.random_state)) + unpickled = rs_unpick.get_state(legacy=False) + assert_mt19937_state_equal(pickled, unpickled) + + def test_state_setting(self): + attr_state = self.random_state.__getstate__() + self.random_state.standard_normal() + self.random_state.__setstate__(attr_state) + state = self.random_state.get_state(legacy=False) + assert_mt19937_state_equal(attr_state, state) + + def test_repr(self): + assert repr(self.random_state).startswith('RandomState(MT19937)') + + +class TestRandint: + + rfunc = random.randint + + # valid integer/boolean types + itype = [np.bool_, np.int8, np.uint8, np.int16, np.uint16, + np.int32, np.uint32, np.int64, np.uint64] + + def test_unsupported_type(self): + assert_raises(TypeError, self.rfunc, 1, dtype=float) + + def test_bounds_checking(self): + for dt in self.itype: + lbnd = 0 if dt is np.bool_ else np.iinfo(dt).min + ubnd = 2 if dt is np.bool_ else np.iinfo(dt).max + 1 + assert_raises(ValueError, self.rfunc, lbnd - 1, ubnd, dtype=dt) + assert_raises(ValueError, self.rfunc, lbnd, ubnd + 1, dtype=dt) + assert_raises(ValueError, self.rfunc, ubnd, lbnd, dtype=dt) + assert_raises(ValueError, self.rfunc, 1, 0, dtype=dt) + + def test_rng_zero_and_extremes(self): + for dt in self.itype: + lbnd = 0 if dt is np.bool_ else np.iinfo(dt).min + ubnd = 2 if dt is np.bool_ else np.iinfo(dt).max + 1 + + tgt = ubnd - 1 + assert_equal(self.rfunc(tgt, tgt + 1, size=1000, dtype=dt), tgt) + + tgt = lbnd + assert_equal(self.rfunc(tgt, tgt + 1, size=1000, dtype=dt), tgt) + + tgt = (lbnd + ubnd)//2 + assert_equal(self.rfunc(tgt, tgt + 1, size=1000, dtype=dt), tgt) + + def test_full_range(self): + # Test for ticket #1690 + + for dt in self.itype: + lbnd = 0 if dt is np.bool_ else np.iinfo(dt).min + ubnd = 2 if dt is np.bool_ else np.iinfo(dt).max + 1 + + try: + self.rfunc(lbnd, ubnd, dtype=dt) + except Exception as e: + raise AssertionError("No error should have been raised, " + "but one was with the following " + "message:\n\n%s" % str(e)) + + def test_in_bounds_fuzz(self): + # Don't use fixed seed + random.seed() + + for dt in self.itype[1:]: + for ubnd in [4, 8, 16]: + vals = self.rfunc(2, ubnd, size=2**16, dtype=dt) + assert_(vals.max() < ubnd) + assert_(vals.min() >= 2) + + vals = self.rfunc(0, 2, size=2**16, dtype=np.bool_) + + assert_(vals.max() < 2) + assert_(vals.min() >= 0) + + def test_repeatability(self): + # We use a sha256 hash of generated sequences of 1000 samples + # in the range [0, 6) for all but bool, where the range + # is [0, 2). Hashes are for little endian numbers. + tgt = {'bool': '509aea74d792fb931784c4b0135392c65aec64beee12b0cc167548a2c3d31e71', + 'int16': '7b07f1a920e46f6d0fe02314155a2330bcfd7635e708da50e536c5ebb631a7d4', + 'int32': 'e577bfed6c935de944424667e3da285012e741892dcb7051a8f1ce68ab05c92f', + 'int64': '0fbead0b06759df2cfb55e43148822d4a1ff953c7eb19a5b08445a63bb64fa9e', + 'int8': '001aac3a5acb935a9b186cbe14a1ca064b8bb2dd0b045d48abeacf74d0203404', + 'uint16': '7b07f1a920e46f6d0fe02314155a2330bcfd7635e708da50e536c5ebb631a7d4', + 'uint32': 'e577bfed6c935de944424667e3da285012e741892dcb7051a8f1ce68ab05c92f', + 'uint64': '0fbead0b06759df2cfb55e43148822d4a1ff953c7eb19a5b08445a63bb64fa9e', + 'uint8': '001aac3a5acb935a9b186cbe14a1ca064b8bb2dd0b045d48abeacf74d0203404'} + + for dt in self.itype[1:]: + random.seed(1234) + + # view as little endian for hash + if sys.byteorder == 'little': + val = self.rfunc(0, 6, size=1000, dtype=dt) + else: + val = self.rfunc(0, 6, size=1000, dtype=dt).byteswap() + + res = hashlib.sha256(val.view(np.int8)).hexdigest() + assert_(tgt[np.dtype(dt).name] == res) + + # bools do not depend on endianness + random.seed(1234) + val = self.rfunc(0, 2, size=1000, dtype=bool).view(np.int8) + res = hashlib.sha256(val).hexdigest() + assert_(tgt[np.dtype(bool).name] == res) + + @pytest.mark.skipif(np.iinfo('l').max < 2**32, + reason='Cannot test with 32-bit C long') + def test_repeatability_32bit_boundary_broadcasting(self): + desired = np.array([[[3992670689, 2438360420, 2557845020], + [4107320065, 4142558326, 3216529513], + [1605979228, 2807061240, 665605495]], + [[3211410639, 4128781000, 457175120], + [1712592594, 1282922662, 3081439808], + [3997822960, 2008322436, 1563495165]], + [[1398375547, 4269260146, 115316740], + [3414372578, 3437564012, 2112038651], + [3572980305, 2260248732, 3908238631]], + [[2561372503, 223155946, 3127879445], + [ 441282060, 3514786552, 2148440361], + [1629275283, 3479737011, 3003195987]], + [[ 412181688, 940383289, 3047321305], + [2978368172, 764731833, 2282559898], + [ 105711276, 720447391, 3596512484]]]) + for size in [None, (5, 3, 3)]: + random.seed(12345) + x = self.rfunc([[-1], [0], [1]], [2**32 - 1, 2**32, 2**32 + 1], + size=size) + assert_array_equal(x, desired if size is not None else desired[0]) + + def test_int64_uint64_corner_case(self): + # When stored in Numpy arrays, `lbnd` is casted + # as np.int64, and `ubnd` is casted as np.uint64. + # Checking whether `lbnd` >= `ubnd` used to be + # done solely via direct comparison, which is incorrect + # because when Numpy tries to compare both numbers, + # it casts both to np.float64 because there is + # no integer superset of np.int64 and np.uint64. However, + # `ubnd` is too large to be represented in np.float64, + # causing it be round down to np.iinfo(np.int64).max, + # leading to a ValueError because `lbnd` now equals + # the new `ubnd`. + + dt = np.int64 + tgt = np.iinfo(np.int64).max + lbnd = np.int64(np.iinfo(np.int64).max) + ubnd = np.uint64(np.iinfo(np.int64).max + 1) + + # None of these function calls should + # generate a ValueError now. + actual = random.randint(lbnd, ubnd, dtype=dt) + assert_equal(actual, tgt) + + def test_respect_dtype_singleton(self): + # See gh-7203 + for dt in self.itype: + lbnd = 0 if dt is np.bool_ else np.iinfo(dt).min + ubnd = 2 if dt is np.bool_ else np.iinfo(dt).max + 1 + + sample = self.rfunc(lbnd, ubnd, dtype=dt) + assert_equal(sample.dtype, np.dtype(dt)) + + for dt in (bool, int, np.compat.long): + lbnd = 0 if dt is bool else np.iinfo(dt).min + ubnd = 2 if dt is bool else np.iinfo(dt).max + 1 + + # gh-7284: Ensure that we get Python data types + sample = self.rfunc(lbnd, ubnd, dtype=dt) + assert_(not hasattr(sample, 'dtype')) + assert_equal(type(sample), dt) + + +class TestRandomDist: + # Make sure the random distribution returns the correct value for a + # given seed + + def setup(self): + self.seed = 1234567890 + + def test_rand(self): + random.seed(self.seed) + actual = random.rand(3, 2) + desired = np.array([[0.61879477158567997, 0.59162362775974664], + [0.88868358904449662, 0.89165480011560816], + [0.4575674820298663, 0.7781880808593471]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_rand_singleton(self): + random.seed(self.seed) + actual = random.rand() + desired = 0.61879477158567997 + assert_array_almost_equal(actual, desired, decimal=15) + + def test_randn(self): + random.seed(self.seed) + actual = random.randn(3, 2) + desired = np.array([[1.34016345771863121, 1.73759122771936081], + [1.498988344300628, -0.2286433324536169], + [2.031033998682787, 2.17032494605655257]]) + assert_array_almost_equal(actual, desired, decimal=15) + + random.seed(self.seed) + actual = random.randn() + assert_array_almost_equal(actual, desired[0, 0], decimal=15) + + def test_randint(self): + random.seed(self.seed) + actual = random.randint(-99, 99, size=(3, 2)) + desired = np.array([[31, 3], + [-52, 41], + [-48, -66]]) + assert_array_equal(actual, desired) + + def test_random_integers(self): + random.seed(self.seed) + with suppress_warnings() as sup: + w = sup.record(DeprecationWarning) + actual = random.random_integers(-99, 99, size=(3, 2)) + assert_(len(w) == 1) + desired = np.array([[31, 3], + [-52, 41], + [-48, -66]]) + assert_array_equal(actual, desired) + + random.seed(self.seed) + with suppress_warnings() as sup: + w = sup.record(DeprecationWarning) + actual = random.random_integers(198, size=(3, 2)) + assert_(len(w) == 1) + assert_array_equal(actual, desired + 100) + + def test_tomaxint(self): + random.seed(self.seed) + rs = random.RandomState(self.seed) + actual = rs.tomaxint(size=(3, 2)) + if np.iinfo(int).max == 2147483647: + desired = np.array([[1328851649, 731237375], + [1270502067, 320041495], + [1908433478, 499156889]], dtype=np.int64) + else: + desired = np.array([[5707374374421908479, 5456764827585442327], + [8196659375100692377, 8224063923314595285], + [4220315081820346526, 7177518203184491332]], + dtype=np.int64) + + assert_equal(actual, desired) + + rs.seed(self.seed) + actual = rs.tomaxint() + assert_equal(actual, desired[0, 0]) + + def test_random_integers_max_int(self): + # Tests whether random_integers can generate the + # maximum allowed Python int that can be converted + # into a C long. Previous implementations of this + # method have thrown an OverflowError when attempting + # to generate this integer. + with suppress_warnings() as sup: + w = sup.record(DeprecationWarning) + actual = random.random_integers(np.iinfo('l').max, + np.iinfo('l').max) + assert_(len(w) == 1) + + desired = np.iinfo('l').max + assert_equal(actual, desired) + with suppress_warnings() as sup: + w = sup.record(DeprecationWarning) + typer = np.dtype('l').type + actual = random.random_integers(typer(np.iinfo('l').max), + typer(np.iinfo('l').max)) + assert_(len(w) == 1) + assert_equal(actual, desired) + + def test_random_integers_deprecated(self): + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + + # DeprecationWarning raised with high == None + assert_raises(DeprecationWarning, + random.random_integers, + np.iinfo('l').max) + + # DeprecationWarning raised with high != None + assert_raises(DeprecationWarning, + random.random_integers, + np.iinfo('l').max, np.iinfo('l').max) + + def test_random_sample(self): + random.seed(self.seed) + actual = random.random_sample((3, 2)) + desired = np.array([[0.61879477158567997, 0.59162362775974664], + [0.88868358904449662, 0.89165480011560816], + [0.4575674820298663, 0.7781880808593471]]) + assert_array_almost_equal(actual, desired, decimal=15) + + random.seed(self.seed) + actual = random.random_sample() + assert_array_almost_equal(actual, desired[0, 0], decimal=15) + + def test_choice_uniform_replace(self): + random.seed(self.seed) + actual = random.choice(4, 4) + desired = np.array([2, 3, 2, 3]) + assert_array_equal(actual, desired) + + def test_choice_nonuniform_replace(self): + random.seed(self.seed) + actual = random.choice(4, 4, p=[0.4, 0.4, 0.1, 0.1]) + desired = np.array([1, 1, 2, 2]) + assert_array_equal(actual, desired) + + def test_choice_uniform_noreplace(self): + random.seed(self.seed) + actual = random.choice(4, 3, replace=False) + desired = np.array([0, 1, 3]) + assert_array_equal(actual, desired) + + def test_choice_nonuniform_noreplace(self): + random.seed(self.seed) + actual = random.choice(4, 3, replace=False, p=[0.1, 0.3, 0.5, 0.1]) + desired = np.array([2, 3, 1]) + assert_array_equal(actual, desired) + + def test_choice_noninteger(self): + random.seed(self.seed) + actual = random.choice(['a', 'b', 'c', 'd'], 4) + desired = np.array(['c', 'd', 'c', 'd']) + assert_array_equal(actual, desired) + + def test_choice_exceptions(self): + sample = random.choice + assert_raises(ValueError, sample, -1, 3) + assert_raises(ValueError, sample, 3., 3) + assert_raises(ValueError, sample, [[1, 2], [3, 4]], 3) + assert_raises(ValueError, sample, [], 3) + assert_raises(ValueError, sample, [1, 2, 3, 4], 3, + p=[[0.25, 0.25], [0.25, 0.25]]) + assert_raises(ValueError, sample, [1, 2], 3, p=[0.4, 0.4, 0.2]) + assert_raises(ValueError, sample, [1, 2], 3, p=[1.1, -0.1]) + assert_raises(ValueError, sample, [1, 2], 3, p=[0.4, 0.4]) + assert_raises(ValueError, sample, [1, 2, 3], 4, replace=False) + # gh-13087 + assert_raises(ValueError, sample, [1, 2, 3], -2, replace=False) + assert_raises(ValueError, sample, [1, 2, 3], (-1,), replace=False) + assert_raises(ValueError, sample, [1, 2, 3], (-1, 1), replace=False) + assert_raises(ValueError, sample, [1, 2, 3], 2, + replace=False, p=[1, 0, 0]) + + def test_choice_return_shape(self): + p = [0.1, 0.9] + # Check scalar + assert_(np.isscalar(random.choice(2, replace=True))) + assert_(np.isscalar(random.choice(2, replace=False))) + assert_(np.isscalar(random.choice(2, replace=True, p=p))) + assert_(np.isscalar(random.choice(2, replace=False, p=p))) + assert_(np.isscalar(random.choice([1, 2], replace=True))) + assert_(random.choice([None], replace=True) is None) + a = np.array([1, 2]) + arr = np.empty(1, dtype=object) + arr[0] = a + assert_(random.choice(arr, replace=True) is a) + + # Check 0-d array + s = tuple() + assert_(not np.isscalar(random.choice(2, s, replace=True))) + assert_(not np.isscalar(random.choice(2, s, replace=False))) + assert_(not np.isscalar(random.choice(2, s, replace=True, p=p))) + assert_(not np.isscalar(random.choice(2, s, replace=False, p=p))) + assert_(not np.isscalar(random.choice([1, 2], s, replace=True))) + assert_(random.choice([None], s, replace=True).ndim == 0) + a = np.array([1, 2]) + arr = np.empty(1, dtype=object) + arr[0] = a + assert_(random.choice(arr, s, replace=True).item() is a) + + # Check multi dimensional array + s = (2, 3) + p = [0.1, 0.1, 0.1, 0.1, 0.4, 0.2] + assert_equal(random.choice(6, s, replace=True).shape, s) + assert_equal(random.choice(6, s, replace=False).shape, s) + assert_equal(random.choice(6, s, replace=True, p=p).shape, s) + assert_equal(random.choice(6, s, replace=False, p=p).shape, s) + assert_equal(random.choice(np.arange(6), s, replace=True).shape, s) + + # Check zero-size + assert_equal(random.randint(0, 0, size=(3, 0, 4)).shape, (3, 0, 4)) + assert_equal(random.randint(0, -10, size=0).shape, (0,)) + assert_equal(random.randint(10, 10, size=0).shape, (0,)) + assert_equal(random.choice(0, size=0).shape, (0,)) + assert_equal(random.choice([], size=(0,)).shape, (0,)) + assert_equal(random.choice(['a', 'b'], size=(3, 0, 4)).shape, + (3, 0, 4)) + assert_raises(ValueError, random.choice, [], 10) + + def test_choice_nan_probabilities(self): + a = np.array([42, 1, 2]) + p = [None, None, None] + assert_raises(ValueError, random.choice, a, p=p) + + def test_choice_p_non_contiguous(self): + p = np.ones(10) / 5 + p[1::2] = 3.0 + random.seed(self.seed) + non_contig = random.choice(5, 3, p=p[::2]) + random.seed(self.seed) + contig = random.choice(5, 3, p=np.ascontiguousarray(p[::2])) + assert_array_equal(non_contig, contig) + + def test_bytes(self): + random.seed(self.seed) + actual = random.bytes(10) + desired = b'\x82Ui\x9e\xff\x97+Wf\xa5' + assert_equal(actual, desired) + + def test_shuffle(self): + # Test lists, arrays (of various dtypes), and multidimensional versions + # of both, c-contiguous or not: + for conv in [lambda x: np.array([]), + lambda x: x, + lambda x: np.asarray(x).astype(np.int8), + lambda x: np.asarray(x).astype(np.float32), + lambda x: np.asarray(x).astype(np.complex64), + lambda x: np.asarray(x).astype(object), + lambda x: [(i, i) for i in x], + lambda x: np.asarray([[i, i] for i in x]), + lambda x: np.vstack([x, x]).T, + # gh-11442 + lambda x: (np.asarray([(i, i) for i in x], + [("a", int), ("b", int)]) + .view(np.recarray)), + # gh-4270 + lambda x: np.asarray([(i, i) for i in x], + [("a", object, (1,)), + ("b", np.int32, (1,))])]: + random.seed(self.seed) + alist = conv([1, 2, 3, 4, 5, 6, 7, 8, 9, 0]) + random.shuffle(alist) + actual = alist + desired = conv([0, 1, 9, 6, 2, 4, 5, 8, 7, 3]) + assert_array_equal(actual, desired) + + def test_shuffle_masked(self): + # gh-3263 + a = np.ma.masked_values(np.reshape(range(20), (5, 4)) % 3 - 1, -1) + b = np.ma.masked_values(np.arange(20) % 3 - 1, -1) + a_orig = a.copy() + b_orig = b.copy() + for i in range(50): + random.shuffle(a) + assert_equal( + sorted(a.data[~a.mask]), sorted(a_orig.data[~a_orig.mask])) + random.shuffle(b) + assert_equal( + sorted(b.data[~b.mask]), sorted(b_orig.data[~b_orig.mask])) + + def test_shuffle_invalid_objects(self): + x = np.array(3) + assert_raises(TypeError, random.shuffle, x) + + def test_permutation(self): + random.seed(self.seed) + alist = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] + actual = random.permutation(alist) + desired = [0, 1, 9, 6, 2, 4, 5, 8, 7, 3] + assert_array_equal(actual, desired) + + random.seed(self.seed) + arr_2d = np.atleast_2d([1, 2, 3, 4, 5, 6, 7, 8, 9, 0]).T + actual = random.permutation(arr_2d) + assert_array_equal(actual, np.atleast_2d(desired).T) + + random.seed(self.seed) + bad_x_str = "abcd" + assert_raises(IndexError, random.permutation, bad_x_str) + + random.seed(self.seed) + bad_x_float = 1.2 + assert_raises(IndexError, random.permutation, bad_x_float) + + integer_val = 10 + desired = [9, 0, 8, 5, 1, 3, 4, 7, 6, 2] + + random.seed(self.seed) + actual = random.permutation(integer_val) + assert_array_equal(actual, desired) + + def test_beta(self): + random.seed(self.seed) + actual = random.beta(.1, .9, size=(3, 2)) + desired = np.array( + [[1.45341850513746058e-02, 5.31297615662868145e-04], + [1.85366619058432324e-06, 4.19214516800110563e-03], + [1.58405155108498093e-04, 1.26252891949397652e-04]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_binomial(self): + random.seed(self.seed) + actual = random.binomial(100.123, .456, size=(3, 2)) + desired = np.array([[37, 43], + [42, 48], + [46, 45]]) + assert_array_equal(actual, desired) + + random.seed(self.seed) + actual = random.binomial(100.123, .456) + desired = 37 + assert_array_equal(actual, desired) + + def test_chisquare(self): + random.seed(self.seed) + actual = random.chisquare(50, size=(3, 2)) + desired = np.array([[63.87858175501090585, 68.68407748911370447], + [65.77116116901505904, 47.09686762438974483], + [72.3828403199695174, 74.18408615260374006]]) + assert_array_almost_equal(actual, desired, decimal=13) + + def test_dirichlet(self): + random.seed(self.seed) + alpha = np.array([51.72840233779265162, 39.74494232180943953]) + actual = random.dirichlet(alpha, size=(3, 2)) + desired = np.array([[[0.54539444573611562, 0.45460555426388438], + [0.62345816822039413, 0.37654183177960598]], + [[0.55206000085785778, 0.44793999914214233], + [0.58964023305154301, 0.41035976694845688]], + [[0.59266909280647828, 0.40733090719352177], + [0.56974431743975207, 0.43025568256024799]]]) + assert_array_almost_equal(actual, desired, decimal=15) + bad_alpha = np.array([5.4e-01, -1.0e-16]) + assert_raises(ValueError, random.dirichlet, bad_alpha) + + random.seed(self.seed) + alpha = np.array([51.72840233779265162, 39.74494232180943953]) + actual = random.dirichlet(alpha) + assert_array_almost_equal(actual, desired[0, 0], decimal=15) + + def test_dirichlet_size(self): + # gh-3173 + p = np.array([51.72840233779265162, 39.74494232180943953]) + assert_equal(random.dirichlet(p, np.uint32(1)).shape, (1, 2)) + assert_equal(random.dirichlet(p, np.uint32(1)).shape, (1, 2)) + assert_equal(random.dirichlet(p, np.uint32(1)).shape, (1, 2)) + assert_equal(random.dirichlet(p, [2, 2]).shape, (2, 2, 2)) + assert_equal(random.dirichlet(p, (2, 2)).shape, (2, 2, 2)) + assert_equal(random.dirichlet(p, np.array((2, 2))).shape, (2, 2, 2)) + + assert_raises(TypeError, random.dirichlet, p, float(1)) + + def test_dirichlet_bad_alpha(self): + # gh-2089 + alpha = np.array([5.4e-01, -1.0e-16]) + assert_raises(ValueError, random.dirichlet, alpha) + + def test_dirichlet_alpha_non_contiguous(self): + a = np.array([51.72840233779265162, -1.0, 39.74494232180943953]) + alpha = a[::2] + random.seed(self.seed) + non_contig = random.dirichlet(alpha, size=(3, 2)) + random.seed(self.seed) + contig = random.dirichlet(np.ascontiguousarray(alpha), + size=(3, 2)) + assert_array_almost_equal(non_contig, contig) + + def test_exponential(self): + random.seed(self.seed) + actual = random.exponential(1.1234, size=(3, 2)) + desired = np.array([[1.08342649775011624, 1.00607889924557314], + [2.46628830085216721, 2.49668106809923884], + [0.68717433461363442, 1.69175666993575979]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_exponential_0(self): + assert_equal(random.exponential(scale=0), 0) + assert_raises(ValueError, random.exponential, scale=-0.) + + def test_f(self): + random.seed(self.seed) + actual = random.f(12, 77, size=(3, 2)) + desired = np.array([[1.21975394418575878, 1.75135759791559775], + [1.44803115017146489, 1.22108959480396262], + [1.02176975757740629, 1.34431827623300415]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_gamma(self): + random.seed(self.seed) + actual = random.gamma(5, 3, size=(3, 2)) + desired = np.array([[24.60509188649287182, 28.54993563207210627], + [26.13476110204064184, 12.56988482927716078], + [31.71863275789960568, 33.30143302795922011]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_gamma_0(self): + assert_equal(random.gamma(shape=0, scale=0), 0) + assert_raises(ValueError, random.gamma, shape=-0., scale=-0.) + + def test_geometric(self): + random.seed(self.seed) + actual = random.geometric(.123456789, size=(3, 2)) + desired = np.array([[8, 7], + [17, 17], + [5, 12]]) + assert_array_equal(actual, desired) + + def test_geometric_exceptions(self): + assert_raises(ValueError, random.geometric, 1.1) + assert_raises(ValueError, random.geometric, [1.1] * 10) + assert_raises(ValueError, random.geometric, -0.1) + assert_raises(ValueError, random.geometric, [-0.1] * 10) + with suppress_warnings() as sup: + sup.record(RuntimeWarning) + assert_raises(ValueError, random.geometric, np.nan) + assert_raises(ValueError, random.geometric, [np.nan] * 10) + + def test_gumbel(self): + random.seed(self.seed) + actual = random.gumbel(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[0.19591898743416816, 0.34405539668096674], + [-1.4492522252274278, -1.47374816298446865], + [1.10651090478803416, -0.69535848626236174]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_gumbel_0(self): + assert_equal(random.gumbel(scale=0), 0) + assert_raises(ValueError, random.gumbel, scale=-0.) + + def test_hypergeometric(self): + random.seed(self.seed) + actual = random.hypergeometric(10.1, 5.5, 14, size=(3, 2)) + desired = np.array([[10, 10], + [10, 10], + [9, 9]]) + assert_array_equal(actual, desired) + + # Test nbad = 0 + actual = random.hypergeometric(5, 0, 3, size=4) + desired = np.array([3, 3, 3, 3]) + assert_array_equal(actual, desired) + + actual = random.hypergeometric(15, 0, 12, size=4) + desired = np.array([12, 12, 12, 12]) + assert_array_equal(actual, desired) + + # Test ngood = 0 + actual = random.hypergeometric(0, 5, 3, size=4) + desired = np.array([0, 0, 0, 0]) + assert_array_equal(actual, desired) + + actual = random.hypergeometric(0, 15, 12, size=4) + desired = np.array([0, 0, 0, 0]) + assert_array_equal(actual, desired) + + def test_laplace(self): + random.seed(self.seed) + actual = random.laplace(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[0.66599721112760157, 0.52829452552221945], + [3.12791959514407125, 3.18202813572992005], + [-0.05391065675859356, 1.74901336242837324]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_laplace_0(self): + assert_equal(random.laplace(scale=0), 0) + assert_raises(ValueError, random.laplace, scale=-0.) + + def test_logistic(self): + random.seed(self.seed) + actual = random.logistic(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[1.09232835305011444, 0.8648196662399954], + [4.27818590694950185, 4.33897006346929714], + [-0.21682183359214885, 2.63373365386060332]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_lognormal(self): + random.seed(self.seed) + actual = random.lognormal(mean=.123456789, sigma=2.0, size=(3, 2)) + desired = np.array([[16.50698631688883822, 36.54846706092654784], + [22.67886599981281748, 0.71617561058995771], + [65.72798501792723869, 86.84341601437161273]]) + assert_array_almost_equal(actual, desired, decimal=13) + + def test_lognormal_0(self): + assert_equal(random.lognormal(sigma=0), 1) + assert_raises(ValueError, random.lognormal, sigma=-0.) + + def test_logseries(self): + random.seed(self.seed) + actual = random.logseries(p=.923456789, size=(3, 2)) + desired = np.array([[2, 2], + [6, 17], + [3, 6]]) + assert_array_equal(actual, desired) + + def test_logseries_exceptions(self): + with suppress_warnings() as sup: + sup.record(RuntimeWarning) + assert_raises(ValueError, random.logseries, np.nan) + assert_raises(ValueError, random.logseries, [np.nan] * 10) + + def test_multinomial(self): + random.seed(self.seed) + actual = random.multinomial(20, [1 / 6.] * 6, size=(3, 2)) + desired = np.array([[[4, 3, 5, 4, 2, 2], + [5, 2, 8, 2, 2, 1]], + [[3, 4, 3, 6, 0, 4], + [2, 1, 4, 3, 6, 4]], + [[4, 4, 2, 5, 2, 3], + [4, 3, 4, 2, 3, 4]]]) + assert_array_equal(actual, desired) + + def test_multivariate_normal(self): + random.seed(self.seed) + mean = (.123456789, 10) + cov = [[1, 0], [0, 1]] + size = (3, 2) + actual = random.multivariate_normal(mean, cov, size) + desired = np.array([[[1.463620246718631, 11.73759122771936], + [1.622445133300628, 9.771356667546383]], + [[2.154490787682787, 12.170324946056553], + [1.719909438201865, 9.230548443648306]], + [[0.689515026297799, 9.880729819607714], + [-0.023054015651998, 9.201096623542879]]]) + + assert_array_almost_equal(actual, desired, decimal=15) + + # Check for default size, was raising deprecation warning + actual = random.multivariate_normal(mean, cov) + desired = np.array([0.895289569463708, 9.17180864067987]) + assert_array_almost_equal(actual, desired, decimal=15) + + # Check that non positive-semidefinite covariance warns with + # RuntimeWarning + mean = [0, 0] + cov = [[1, 2], [2, 1]] + assert_warns(RuntimeWarning, random.multivariate_normal, mean, cov) + + # and that it doesn't warn with RuntimeWarning check_valid='ignore' + assert_no_warnings(random.multivariate_normal, mean, cov, + check_valid='ignore') + + # and that it raises with RuntimeWarning check_valid='raises' + assert_raises(ValueError, random.multivariate_normal, mean, cov, + check_valid='raise') + + cov = np.array([[1, 0.1], [0.1, 1]], dtype=np.float32) + with suppress_warnings() as sup: + random.multivariate_normal(mean, cov) + w = sup.record(RuntimeWarning) + assert len(w) == 0 + + mu = np.zeros(2) + cov = np.eye(2) + assert_raises(ValueError, random.multivariate_normal, mean, cov, + check_valid='other') + assert_raises(ValueError, random.multivariate_normal, + np.zeros((2, 1, 1)), cov) + assert_raises(ValueError, random.multivariate_normal, + mu, np.empty((3, 2))) + assert_raises(ValueError, random.multivariate_normal, + mu, np.eye(3)) + + def test_negative_binomial(self): + random.seed(self.seed) + actual = random.negative_binomial(n=100, p=.12345, size=(3, 2)) + desired = np.array([[848, 841], + [892, 611], + [779, 647]]) + assert_array_equal(actual, desired) + + def test_negative_binomial_exceptions(self): + with suppress_warnings() as sup: + sup.record(RuntimeWarning) + assert_raises(ValueError, random.negative_binomial, 100, np.nan) + assert_raises(ValueError, random.negative_binomial, 100, + [np.nan] * 10) + + def test_noncentral_chisquare(self): + random.seed(self.seed) + actual = random.noncentral_chisquare(df=5, nonc=5, size=(3, 2)) + desired = np.array([[23.91905354498517511, 13.35324692733826346], + [31.22452661329736401, 16.60047399466177254], + [5.03461598262724586, 17.94973089023519464]]) + assert_array_almost_equal(actual, desired, decimal=14) + + actual = random.noncentral_chisquare(df=.5, nonc=.2, size=(3, 2)) + desired = np.array([[1.47145377828516666, 0.15052899268012659], + [0.00943803056963588, 1.02647251615666169], + [0.332334982684171, 0.15451287602753125]]) + assert_array_almost_equal(actual, desired, decimal=14) + + random.seed(self.seed) + actual = random.noncentral_chisquare(df=5, nonc=0, size=(3, 2)) + desired = np.array([[9.597154162763948, 11.725484450296079], + [10.413711048138335, 3.694475922923986], + [13.484222138963087, 14.377255424602957]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_noncentral_f(self): + random.seed(self.seed) + actual = random.noncentral_f(dfnum=5, dfden=2, nonc=1, + size=(3, 2)) + desired = np.array([[1.40598099674926669, 0.34207973179285761], + [3.57715069265772545, 7.92632662577829805], + [0.43741599463544162, 1.1774208752428319]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_noncentral_f_nan(self): + random.seed(self.seed) + actual = random.noncentral_f(dfnum=5, dfden=2, nonc=np.nan) + assert np.isnan(actual) + + def test_normal(self): + random.seed(self.seed) + actual = random.normal(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[2.80378370443726244, 3.59863924443872163], + [3.121433477601256, -0.33382987590723379], + [4.18552478636557357, 4.46410668111310471]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_normal_0(self): + assert_equal(random.normal(scale=0), 0) + assert_raises(ValueError, random.normal, scale=-0.) + + def test_pareto(self): + random.seed(self.seed) + actual = random.pareto(a=.123456789, size=(3, 2)) + desired = np.array( + [[2.46852460439034849e+03, 1.41286880810518346e+03], + [5.28287797029485181e+07, 6.57720981047328785e+07], + [1.40840323350391515e+02, 1.98390255135251704e+05]]) + # For some reason on 32-bit x86 Ubuntu 12.10 the [1, 0] entry in this + # matrix differs by 24 nulps. Discussion: + # https://mail.python.org/pipermail/numpy-discussion/2012-September/063801.html + # Consensus is that this is probably some gcc quirk that affects + # rounding but not in any important way, so we just use a looser + # tolerance on this test: + np.testing.assert_array_almost_equal_nulp(actual, desired, nulp=30) + + def test_poisson(self): + random.seed(self.seed) + actual = random.poisson(lam=.123456789, size=(3, 2)) + desired = np.array([[0, 0], + [1, 0], + [0, 0]]) + assert_array_equal(actual, desired) + + def test_poisson_exceptions(self): + lambig = np.iinfo('l').max + lamneg = -1 + assert_raises(ValueError, random.poisson, lamneg) + assert_raises(ValueError, random.poisson, [lamneg] * 10) + assert_raises(ValueError, random.poisson, lambig) + assert_raises(ValueError, random.poisson, [lambig] * 10) + with suppress_warnings() as sup: + sup.record(RuntimeWarning) + assert_raises(ValueError, random.poisson, np.nan) + assert_raises(ValueError, random.poisson, [np.nan] * 10) + + def test_power(self): + random.seed(self.seed) + actual = random.power(a=.123456789, size=(3, 2)) + desired = np.array([[0.02048932883240791, 0.01424192241128213], + [0.38446073748535298, 0.39499689943484395], + [0.00177699707563439, 0.13115505880863756]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_rayleigh(self): + random.seed(self.seed) + actual = random.rayleigh(scale=10, size=(3, 2)) + desired = np.array([[13.8882496494248393, 13.383318339044731], + [20.95413364294492098, 21.08285015800712614], + [11.06066537006854311, 17.35468505778271009]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_rayleigh_0(self): + assert_equal(random.rayleigh(scale=0), 0) + assert_raises(ValueError, random.rayleigh, scale=-0.) + + def test_standard_cauchy(self): + random.seed(self.seed) + actual = random.standard_cauchy(size=(3, 2)) + desired = np.array([[0.77127660196445336, -6.55601161955910605], + [0.93582023391158309, -2.07479293013759447], + [-4.74601644297011926, 0.18338989290760804]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_standard_exponential(self): + random.seed(self.seed) + actual = random.standard_exponential(size=(3, 2)) + desired = np.array([[0.96441739162374596, 0.89556604882105506], + [2.1953785836319808, 2.22243285392490542], + [0.6116915921431676, 1.50592546727413201]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_standard_gamma(self): + random.seed(self.seed) + actual = random.standard_gamma(shape=3, size=(3, 2)) + desired = np.array([[5.50841531318455058, 6.62953470301903103], + [5.93988484943779227, 2.31044849402133989], + [7.54838614231317084, 8.012756093271868]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_standard_gamma_0(self): + assert_equal(random.standard_gamma(shape=0), 0) + assert_raises(ValueError, random.standard_gamma, shape=-0.) + + def test_standard_normal(self): + random.seed(self.seed) + actual = random.standard_normal(size=(3, 2)) + desired = np.array([[1.34016345771863121, 1.73759122771936081], + [1.498988344300628, -0.2286433324536169], + [2.031033998682787, 2.17032494605655257]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_randn_singleton(self): + random.seed(self.seed) + actual = random.randn() + desired = np.array(1.34016345771863121) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_standard_t(self): + random.seed(self.seed) + actual = random.standard_t(df=10, size=(3, 2)) + desired = np.array([[0.97140611862659965, -0.08830486548450577], + [1.36311143689505321, -0.55317463909867071], + [-0.18473749069684214, 0.61181537341755321]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_triangular(self): + random.seed(self.seed) + actual = random.triangular(left=5.12, mode=10.23, right=20.34, + size=(3, 2)) + desired = np.array([[12.68117178949215784, 12.4129206149193152], + [16.20131377335158263, 16.25692138747600524], + [11.20400690911820263, 14.4978144835829923]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_uniform(self): + random.seed(self.seed) + actual = random.uniform(low=1.23, high=10.54, size=(3, 2)) + desired = np.array([[6.99097932346268003, 6.73801597444323974], + [9.50364421400426274, 9.53130618907631089], + [5.48995325769805476, 8.47493103280052118]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_uniform_range_bounds(self): + fmin = np.finfo('float').min + fmax = np.finfo('float').max + + func = random.uniform + assert_raises(OverflowError, func, -np.inf, 0) + assert_raises(OverflowError, func, 0, np.inf) + assert_raises(OverflowError, func, fmin, fmax) + assert_raises(OverflowError, func, [-np.inf], [0]) + assert_raises(OverflowError, func, [0], [np.inf]) + + # (fmax / 1e17) - fmin is within range, so this should not throw + # account for i386 extended precision DBL_MAX / 1e17 + DBL_MAX > + # DBL_MAX by increasing fmin a bit + random.uniform(low=np.nextafter(fmin, 1), high=fmax / 1e17) + + def test_scalar_exception_propagation(self): + # Tests that exceptions are correctly propagated in distributions + # when called with objects that throw exceptions when converted to + # scalars. + # + # Regression test for gh: 8865 + + class ThrowingFloat(np.ndarray): + def __float__(self): + raise TypeError + + throwing_float = np.array(1.0).view(ThrowingFloat) + assert_raises(TypeError, random.uniform, throwing_float, + throwing_float) + + class ThrowingInteger(np.ndarray): + def __int__(self): + raise TypeError + + throwing_int = np.array(1).view(ThrowingInteger) + assert_raises(TypeError, random.hypergeometric, throwing_int, 1, 1) + + def test_vonmises(self): + random.seed(self.seed) + actual = random.vonmises(mu=1.23, kappa=1.54, size=(3, 2)) + desired = np.array([[2.28567572673902042, 2.89163838442285037], + [0.38198375564286025, 2.57638023113890746], + [1.19153771588353052, 1.83509849681825354]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_vonmises_small(self): + # check infinite loop, gh-4720 + random.seed(self.seed) + r = random.vonmises(mu=0., kappa=1.1e-8, size=10**6) + assert_(np.isfinite(r).all()) + + def test_vonmises_large(self): + # guard against changes in RandomState when Generator is fixed + random.seed(self.seed) + actual = random.vonmises(mu=0., kappa=1e7, size=3) + desired = np.array([4.634253748521111e-04, + 3.558873596114509e-04, + -2.337119622577433e-04]) + assert_array_almost_equal(actual, desired, decimal=8) + + def test_vonmises_nan(self): + random.seed(self.seed) + r = random.vonmises(mu=0., kappa=np.nan) + assert_(np.isnan(r)) + + def test_wald(self): + random.seed(self.seed) + actual = random.wald(mean=1.23, scale=1.54, size=(3, 2)) + desired = np.array([[3.82935265715889983, 5.13125249184285526], + [0.35045403618358717, 1.50832396872003538], + [0.24124319895843183, 0.22031101461955038]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_weibull(self): + random.seed(self.seed) + actual = random.weibull(a=1.23, size=(3, 2)) + desired = np.array([[0.97097342648766727, 0.91422896443565516], + [1.89517770034962929, 1.91414357960479564], + [0.67057783752390987, 1.39494046635066793]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_weibull_0(self): + random.seed(self.seed) + assert_equal(random.weibull(a=0, size=12), np.zeros(12)) + assert_raises(ValueError, random.weibull, a=-0.) + + def test_zipf(self): + random.seed(self.seed) + actual = random.zipf(a=1.23, size=(3, 2)) + desired = np.array([[66, 29], + [1, 1], + [3, 13]]) + assert_array_equal(actual, desired) + + +class TestBroadcast: + # tests that functions that broadcast behave + # correctly when presented with non-scalar arguments + def setup(self): + self.seed = 123456789 + + def set_seed(self): + random.seed(self.seed) + + def test_uniform(self): + low = [0] + high = [1] + uniform = random.uniform + desired = np.array([0.53283302478975902, + 0.53413660089041659, + 0.50955303552646702]) + + self.set_seed() + actual = uniform(low * 3, high) + assert_array_almost_equal(actual, desired, decimal=14) + + self.set_seed() + actual = uniform(low, high * 3) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_normal(self): + loc = [0] + scale = [1] + bad_scale = [-1] + normal = random.normal + desired = np.array([2.2129019979039612, + 2.1283977976520019, + 1.8417114045748335]) + + self.set_seed() + actual = normal(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, normal, loc * 3, bad_scale) + + self.set_seed() + actual = normal(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, normal, loc, bad_scale * 3) + + def test_beta(self): + a = [1] + b = [2] + bad_a = [-1] + bad_b = [-2] + beta = random.beta + desired = np.array([0.19843558305989056, + 0.075230336409423643, + 0.24976865978980844]) + + self.set_seed() + actual = beta(a * 3, b) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, beta, bad_a * 3, b) + assert_raises(ValueError, beta, a * 3, bad_b) + + self.set_seed() + actual = beta(a, b * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, beta, bad_a, b * 3) + assert_raises(ValueError, beta, a, bad_b * 3) + + def test_exponential(self): + scale = [1] + bad_scale = [-1] + exponential = random.exponential + desired = np.array([0.76106853658845242, + 0.76386282278691653, + 0.71243813125891797]) + + self.set_seed() + actual = exponential(scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, exponential, bad_scale * 3) + + def test_standard_gamma(self): + shape = [1] + bad_shape = [-1] + std_gamma = random.standard_gamma + desired = np.array([0.76106853658845242, + 0.76386282278691653, + 0.71243813125891797]) + + self.set_seed() + actual = std_gamma(shape * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, std_gamma, bad_shape * 3) + + def test_gamma(self): + shape = [1] + scale = [2] + bad_shape = [-1] + bad_scale = [-2] + gamma = random.gamma + desired = np.array([1.5221370731769048, + 1.5277256455738331, + 1.4248762625178359]) + + self.set_seed() + actual = gamma(shape * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gamma, bad_shape * 3, scale) + assert_raises(ValueError, gamma, shape * 3, bad_scale) + + self.set_seed() + actual = gamma(shape, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gamma, bad_shape, scale * 3) + assert_raises(ValueError, gamma, shape, bad_scale * 3) + + def test_f(self): + dfnum = [1] + dfden = [2] + bad_dfnum = [-1] + bad_dfden = [-2] + f = random.f + desired = np.array([0.80038951638264799, + 0.86768719635363512, + 2.7251095168386801]) + + self.set_seed() + actual = f(dfnum * 3, dfden) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, f, bad_dfnum * 3, dfden) + assert_raises(ValueError, f, dfnum * 3, bad_dfden) + + self.set_seed() + actual = f(dfnum, dfden * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, f, bad_dfnum, dfden * 3) + assert_raises(ValueError, f, dfnum, bad_dfden * 3) + + def test_noncentral_f(self): + dfnum = [2] + dfden = [3] + nonc = [4] + bad_dfnum = [0] + bad_dfden = [-1] + bad_nonc = [-2] + nonc_f = random.noncentral_f + desired = np.array([9.1393943263705211, + 13.025456344595602, + 8.8018098359100545]) + + self.set_seed() + actual = nonc_f(dfnum * 3, dfden, nonc) + assert_array_almost_equal(actual, desired, decimal=14) + assert np.all(np.isnan(nonc_f(dfnum, dfden, [np.nan] * 3))) + + assert_raises(ValueError, nonc_f, bad_dfnum * 3, dfden, nonc) + assert_raises(ValueError, nonc_f, dfnum * 3, bad_dfden, nonc) + assert_raises(ValueError, nonc_f, dfnum * 3, dfden, bad_nonc) + + self.set_seed() + actual = nonc_f(dfnum, dfden * 3, nonc) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_f, bad_dfnum, dfden * 3, nonc) + assert_raises(ValueError, nonc_f, dfnum, bad_dfden * 3, nonc) + assert_raises(ValueError, nonc_f, dfnum, dfden * 3, bad_nonc) + + self.set_seed() + actual = nonc_f(dfnum, dfden, nonc * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_f, bad_dfnum, dfden, nonc * 3) + assert_raises(ValueError, nonc_f, dfnum, bad_dfden, nonc * 3) + assert_raises(ValueError, nonc_f, dfnum, dfden, bad_nonc * 3) + + def test_noncentral_f_small_df(self): + self.set_seed() + desired = np.array([6.869638627492048, 0.785880199263955]) + actual = random.noncentral_f(0.9, 0.9, 2, size=2) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_chisquare(self): + df = [1] + bad_df = [-1] + chisquare = random.chisquare + desired = np.array([0.57022801133088286, + 0.51947702108840776, + 0.1320969254923558]) + + self.set_seed() + actual = chisquare(df * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, chisquare, bad_df * 3) + + def test_noncentral_chisquare(self): + df = [1] + nonc = [2] + bad_df = [-1] + bad_nonc = [-2] + nonc_chi = random.noncentral_chisquare + desired = np.array([9.0015599467913763, + 4.5804135049718742, + 6.0872302432834564]) + + self.set_seed() + actual = nonc_chi(df * 3, nonc) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_chi, bad_df * 3, nonc) + assert_raises(ValueError, nonc_chi, df * 3, bad_nonc) + + self.set_seed() + actual = nonc_chi(df, nonc * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_chi, bad_df, nonc * 3) + assert_raises(ValueError, nonc_chi, df, bad_nonc * 3) + + def test_standard_t(self): + df = [1] + bad_df = [-1] + t = random.standard_t + desired = np.array([3.0702872575217643, + 5.8560725167361607, + 1.0274791436474273]) + + self.set_seed() + actual = t(df * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, t, bad_df * 3) + assert_raises(ValueError, random.standard_t, bad_df * 3) + + def test_vonmises(self): + mu = [2] + kappa = [1] + bad_kappa = [-1] + vonmises = random.vonmises + desired = np.array([2.9883443664201312, + -2.7064099483995943, + -1.8672476700665914]) + + self.set_seed() + actual = vonmises(mu * 3, kappa) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, vonmises, mu * 3, bad_kappa) + + self.set_seed() + actual = vonmises(mu, kappa * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, vonmises, mu, bad_kappa * 3) + + def test_pareto(self): + a = [1] + bad_a = [-1] + pareto = random.pareto + desired = np.array([1.1405622680198362, + 1.1465519762044529, + 1.0389564467453547]) + + self.set_seed() + actual = pareto(a * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, pareto, bad_a * 3) + assert_raises(ValueError, random.pareto, bad_a * 3) + + def test_weibull(self): + a = [1] + bad_a = [-1] + weibull = random.weibull + desired = np.array([0.76106853658845242, + 0.76386282278691653, + 0.71243813125891797]) + + self.set_seed() + actual = weibull(a * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, weibull, bad_a * 3) + assert_raises(ValueError, random.weibull, bad_a * 3) + + def test_power(self): + a = [1] + bad_a = [-1] + power = random.power + desired = np.array([0.53283302478975902, + 0.53413660089041659, + 0.50955303552646702]) + + self.set_seed() + actual = power(a * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, power, bad_a * 3) + assert_raises(ValueError, random.power, bad_a * 3) + + def test_laplace(self): + loc = [0] + scale = [1] + bad_scale = [-1] + laplace = random.laplace + desired = np.array([0.067921356028507157, + 0.070715642226971326, + 0.019290950698972624]) + + self.set_seed() + actual = laplace(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, laplace, loc * 3, bad_scale) + + self.set_seed() + actual = laplace(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, laplace, loc, bad_scale * 3) + + def test_gumbel(self): + loc = [0] + scale = [1] + bad_scale = [-1] + gumbel = random.gumbel + desired = np.array([0.2730318639556768, + 0.26936705726291116, + 0.33906220393037939]) + + self.set_seed() + actual = gumbel(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gumbel, loc * 3, bad_scale) + + self.set_seed() + actual = gumbel(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gumbel, loc, bad_scale * 3) + + def test_logistic(self): + loc = [0] + scale = [1] + bad_scale = [-1] + logistic = random.logistic + desired = np.array([0.13152135837586171, + 0.13675915696285773, + 0.038216792802833396]) + + self.set_seed() + actual = logistic(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, logistic, loc * 3, bad_scale) + + self.set_seed() + actual = logistic(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, logistic, loc, bad_scale * 3) + assert_equal(random.logistic(1.0, 0.0), 1.0) + + def test_lognormal(self): + mean = [0] + sigma = [1] + bad_sigma = [-1] + lognormal = random.lognormal + desired = np.array([9.1422086044848427, + 8.4013952870126261, + 6.3073234116578671]) + + self.set_seed() + actual = lognormal(mean * 3, sigma) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, lognormal, mean * 3, bad_sigma) + assert_raises(ValueError, random.lognormal, mean * 3, bad_sigma) + + self.set_seed() + actual = lognormal(mean, sigma * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, lognormal, mean, bad_sigma * 3) + assert_raises(ValueError, random.lognormal, mean, bad_sigma * 3) + + def test_rayleigh(self): + scale = [1] + bad_scale = [-1] + rayleigh = random.rayleigh + desired = np.array([1.2337491937897689, + 1.2360119924878694, + 1.1936818095781789]) + + self.set_seed() + actual = rayleigh(scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, rayleigh, bad_scale * 3) + + def test_wald(self): + mean = [0.5] + scale = [1] + bad_mean = [0] + bad_scale = [-2] + wald = random.wald + desired = np.array([0.11873681120271318, + 0.12450084820795027, + 0.9096122728408238]) + + self.set_seed() + actual = wald(mean * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, wald, bad_mean * 3, scale) + assert_raises(ValueError, wald, mean * 3, bad_scale) + assert_raises(ValueError, random.wald, bad_mean * 3, scale) + assert_raises(ValueError, random.wald, mean * 3, bad_scale) + + self.set_seed() + actual = wald(mean, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, wald, bad_mean, scale * 3) + assert_raises(ValueError, wald, mean, bad_scale * 3) + assert_raises(ValueError, wald, 0.0, 1) + assert_raises(ValueError, wald, 0.5, 0.0) + + def test_triangular(self): + left = [1] + right = [3] + mode = [2] + bad_left_one = [3] + bad_mode_one = [4] + bad_left_two, bad_mode_two = right * 2 + triangular = random.triangular + desired = np.array([2.03339048710429, + 2.0347400359389356, + 2.0095991069536208]) + + self.set_seed() + actual = triangular(left * 3, mode, right) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, triangular, bad_left_one * 3, mode, right) + assert_raises(ValueError, triangular, left * 3, bad_mode_one, right) + assert_raises(ValueError, triangular, bad_left_two * 3, bad_mode_two, + right) + + self.set_seed() + actual = triangular(left, mode * 3, right) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, triangular, bad_left_one, mode * 3, right) + assert_raises(ValueError, triangular, left, bad_mode_one * 3, right) + assert_raises(ValueError, triangular, bad_left_two, bad_mode_two * 3, + right) + + self.set_seed() + actual = triangular(left, mode, right * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, triangular, bad_left_one, mode, right * 3) + assert_raises(ValueError, triangular, left, bad_mode_one, right * 3) + assert_raises(ValueError, triangular, bad_left_two, bad_mode_two, + right * 3) + + assert_raises(ValueError, triangular, 10., 0., 20.) + assert_raises(ValueError, triangular, 10., 25., 20.) + assert_raises(ValueError, triangular, 10., 10., 10.) + + def test_binomial(self): + n = [1] + p = [0.5] + bad_n = [-1] + bad_p_one = [-1] + bad_p_two = [1.5] + binom = random.binomial + desired = np.array([1, 1, 1]) + + self.set_seed() + actual = binom(n * 3, p) + assert_array_equal(actual, desired) + assert_raises(ValueError, binom, bad_n * 3, p) + assert_raises(ValueError, binom, n * 3, bad_p_one) + assert_raises(ValueError, binom, n * 3, bad_p_two) + + self.set_seed() + actual = binom(n, p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, binom, bad_n, p * 3) + assert_raises(ValueError, binom, n, bad_p_one * 3) + assert_raises(ValueError, binom, n, bad_p_two * 3) + + def test_negative_binomial(self): + n = [1] + p = [0.5] + bad_n = [-1] + bad_p_one = [-1] + bad_p_two = [1.5] + neg_binom = random.negative_binomial + desired = np.array([1, 0, 1]) + + self.set_seed() + actual = neg_binom(n * 3, p) + assert_array_equal(actual, desired) + assert_raises(ValueError, neg_binom, bad_n * 3, p) + assert_raises(ValueError, neg_binom, n * 3, bad_p_one) + assert_raises(ValueError, neg_binom, n * 3, bad_p_two) + + self.set_seed() + actual = neg_binom(n, p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, neg_binom, bad_n, p * 3) + assert_raises(ValueError, neg_binom, n, bad_p_one * 3) + assert_raises(ValueError, neg_binom, n, bad_p_two * 3) + + def test_poisson(self): + max_lam = random.RandomState()._poisson_lam_max + + lam = [1] + bad_lam_one = [-1] + bad_lam_two = [max_lam * 2] + poisson = random.poisson + desired = np.array([1, 1, 0]) + + self.set_seed() + actual = poisson(lam * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, poisson, bad_lam_one * 3) + assert_raises(ValueError, poisson, bad_lam_two * 3) + + def test_zipf(self): + a = [2] + bad_a = [0] + zipf = random.zipf + desired = np.array([2, 2, 1]) + + self.set_seed() + actual = zipf(a * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, zipf, bad_a * 3) + with np.errstate(invalid='ignore'): + assert_raises(ValueError, zipf, np.nan) + assert_raises(ValueError, zipf, [0, 0, np.nan]) + + def test_geometric(self): + p = [0.5] + bad_p_one = [-1] + bad_p_two = [1.5] + geom = random.geometric + desired = np.array([2, 2, 2]) + + self.set_seed() + actual = geom(p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, geom, bad_p_one * 3) + assert_raises(ValueError, geom, bad_p_two * 3) + + def test_hypergeometric(self): + ngood = [1] + nbad = [2] + nsample = [2] + bad_ngood = [-1] + bad_nbad = [-2] + bad_nsample_one = [0] + bad_nsample_two = [4] + hypergeom = random.hypergeometric + desired = np.array([1, 1, 1]) + + self.set_seed() + actual = hypergeom(ngood * 3, nbad, nsample) + assert_array_equal(actual, desired) + assert_raises(ValueError, hypergeom, bad_ngood * 3, nbad, nsample) + assert_raises(ValueError, hypergeom, ngood * 3, bad_nbad, nsample) + assert_raises(ValueError, hypergeom, ngood * 3, nbad, bad_nsample_one) + assert_raises(ValueError, hypergeom, ngood * 3, nbad, bad_nsample_two) + + self.set_seed() + actual = hypergeom(ngood, nbad * 3, nsample) + assert_array_equal(actual, desired) + assert_raises(ValueError, hypergeom, bad_ngood, nbad * 3, nsample) + assert_raises(ValueError, hypergeom, ngood, bad_nbad * 3, nsample) + assert_raises(ValueError, hypergeom, ngood, nbad * 3, bad_nsample_one) + assert_raises(ValueError, hypergeom, ngood, nbad * 3, bad_nsample_two) + + self.set_seed() + actual = hypergeom(ngood, nbad, nsample * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, hypergeom, bad_ngood, nbad, nsample * 3) + assert_raises(ValueError, hypergeom, ngood, bad_nbad, nsample * 3) + assert_raises(ValueError, hypergeom, ngood, nbad, bad_nsample_one * 3) + assert_raises(ValueError, hypergeom, ngood, nbad, bad_nsample_two * 3) + + assert_raises(ValueError, hypergeom, -1, 10, 20) + assert_raises(ValueError, hypergeom, 10, -1, 20) + assert_raises(ValueError, hypergeom, 10, 10, 0) + assert_raises(ValueError, hypergeom, 10, 10, 25) + + def test_logseries(self): + p = [0.5] + bad_p_one = [2] + bad_p_two = [-1] + logseries = random.logseries + desired = np.array([1, 1, 1]) + + self.set_seed() + actual = logseries(p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, logseries, bad_p_one * 3) + assert_raises(ValueError, logseries, bad_p_two * 3) + + +class TestThread: + # make sure each state produces the same sequence even in threads + def setup(self): + self.seeds = range(4) + + def check_function(self, function, sz): + from threading import Thread + + out1 = np.empty((len(self.seeds),) + sz) + out2 = np.empty((len(self.seeds),) + sz) + + # threaded generation + t = [Thread(target=function, args=(random.RandomState(s), o)) + for s, o in zip(self.seeds, out1)] + [x.start() for x in t] + [x.join() for x in t] + + # the same serial + for s, o in zip(self.seeds, out2): + function(random.RandomState(s), o) + + # these platforms change x87 fpu precision mode in threads + if np.intp().dtype.itemsize == 4 and sys.platform == "win32": + assert_array_almost_equal(out1, out2) + else: + assert_array_equal(out1, out2) + + def test_normal(self): + def gen_random(state, out): + out[...] = state.normal(size=10000) + + self.check_function(gen_random, sz=(10000,)) + + def test_exp(self): + def gen_random(state, out): + out[...] = state.exponential(scale=np.ones((100, 1000))) + + self.check_function(gen_random, sz=(100, 1000)) + + def test_multinomial(self): + def gen_random(state, out): + out[...] = state.multinomial(10, [1 / 6.] * 6, size=10000) + + self.check_function(gen_random, sz=(10000, 6)) + + +# See Issue #4263 +class TestSingleEltArrayInput: + def setup(self): + self.argOne = np.array([2]) + self.argTwo = np.array([3]) + self.argThree = np.array([4]) + self.tgtShape = (1,) + + def test_one_arg_funcs(self): + funcs = (random.exponential, random.standard_gamma, + random.chisquare, random.standard_t, + random.pareto, random.weibull, + random.power, random.rayleigh, + random.poisson, random.zipf, + random.geometric, random.logseries) + + probfuncs = (random.geometric, random.logseries) + + for func in funcs: + if func in probfuncs: # p < 1.0 + out = func(np.array([0.5])) + + else: + out = func(self.argOne) + + assert_equal(out.shape, self.tgtShape) + + def test_two_arg_funcs(self): + funcs = (random.uniform, random.normal, + random.beta, random.gamma, + random.f, random.noncentral_chisquare, + random.vonmises, random.laplace, + random.gumbel, random.logistic, + random.lognormal, random.wald, + random.binomial, random.negative_binomial) + + probfuncs = (random.binomial, random.negative_binomial) + + for func in funcs: + if func in probfuncs: # p <= 1 + argTwo = np.array([0.5]) + + else: + argTwo = self.argTwo + + out = func(self.argOne, argTwo) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne[0], argTwo) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne, argTwo[0]) + assert_equal(out.shape, self.tgtShape) + + def test_three_arg_funcs(self): + funcs = [random.noncentral_f, random.triangular, + random.hypergeometric] + + for func in funcs: + out = func(self.argOne, self.argTwo, self.argThree) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne[0], self.argTwo, self.argThree) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne, self.argTwo[0], self.argThree) + assert_equal(out.shape, self.tgtShape) + + +# Ensure returned array dtype is correct for platform +def test_integer_dtype(int_func): + random.seed(123456789) + fname, args, sha256 = int_func + f = getattr(random, fname) + actual = f(*args, size=2) + assert_(actual.dtype == np.dtype('l')) + + +def test_integer_repeat(int_func): + random.seed(123456789) + fname, args, sha256 = int_func + f = getattr(random, fname) + val = f(*args, size=1000000) + if sys.byteorder != 'little': + val = val.byteswap() + res = hashlib.sha256(val.view(np.int8)).hexdigest() + assert_(res == sha256) + + +def test_broadcast_size_error(): + # GH-16833 + with pytest.raises(ValueError): + random.binomial(1, [0.3, 0.7], size=(2, 1)) + with pytest.raises(ValueError): + random.binomial([1, 2], 0.3, size=(2, 1)) + with pytest.raises(ValueError): + random.binomial([1, 2], [0.3, 0.7], size=(2, 1)) diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/test_randomstate_regression.py b/.local/lib/python3.8/site-packages/numpy/random/tests/test_randomstate_regression.py new file mode 100644 index 0000000..7ad19ab --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/test_randomstate_regression.py @@ -0,0 +1,216 @@ +import sys + +import pytest + +from numpy.testing import ( + assert_, assert_array_equal, assert_raises, + ) +import numpy as np + +from numpy import random + + +class TestRegression: + + def test_VonMises_range(self): + # Make sure generated random variables are in [-pi, pi]. + # Regression test for ticket #986. + for mu in np.linspace(-7., 7., 5): + r = random.vonmises(mu, 1, 50) + assert_(np.all(r > -np.pi) and np.all(r <= np.pi)) + + def test_hypergeometric_range(self): + # Test for ticket #921 + assert_(np.all(random.hypergeometric(3, 18, 11, size=10) < 4)) + assert_(np.all(random.hypergeometric(18, 3, 11, size=10) > 0)) + + # Test for ticket #5623 + args = [ + (2**20 - 2, 2**20 - 2, 2**20 - 2), # Check for 32-bit systems + ] + is_64bits = sys.maxsize > 2**32 + if is_64bits and sys.platform != 'win32': + # Check for 64-bit systems + args.append((2**40 - 2, 2**40 - 2, 2**40 - 2)) + for arg in args: + assert_(random.hypergeometric(*arg) > 0) + + def test_logseries_convergence(self): + # Test for ticket #923 + N = 1000 + random.seed(0) + rvsn = random.logseries(0.8, size=N) + # these two frequency counts should be close to theoretical + # numbers with this large sample + # theoretical large N result is 0.49706795 + freq = np.sum(rvsn == 1) / N + msg = f'Frequency was {freq:f}, should be > 0.45' + assert_(freq > 0.45, msg) + # theoretical large N result is 0.19882718 + freq = np.sum(rvsn == 2) / N + msg = f'Frequency was {freq:f}, should be < 0.23' + assert_(freq < 0.23, msg) + + def test_shuffle_mixed_dimension(self): + # Test for trac ticket #2074 + for t in [[1, 2, 3, None], + [(1, 1), (2, 2), (3, 3), None], + [1, (2, 2), (3, 3), None], + [(1, 1), 2, 3, None]]: + random.seed(12345) + shuffled = list(t) + random.shuffle(shuffled) + expected = np.array([t[0], t[3], t[1], t[2]], dtype=object) + assert_array_equal(np.array(shuffled, dtype=object), expected) + + def test_call_within_randomstate(self): + # Check that custom RandomState does not call into global state + m = random.RandomState() + res = np.array([0, 8, 7, 2, 1, 9, 4, 7, 0, 3]) + for i in range(3): + random.seed(i) + m.seed(4321) + # If m.state is not honored, the result will change + assert_array_equal(m.choice(10, size=10, p=np.ones(10)/10.), res) + + def test_multivariate_normal_size_types(self): + # Test for multivariate_normal issue with 'size' argument. + # Check that the multivariate_normal size argument can be a + # numpy integer. + random.multivariate_normal([0], [[0]], size=1) + random.multivariate_normal([0], [[0]], size=np.int_(1)) + random.multivariate_normal([0], [[0]], size=np.int64(1)) + + def test_beta_small_parameters(self): + # Test that beta with small a and b parameters does not produce + # NaNs due to roundoff errors causing 0 / 0, gh-5851 + random.seed(1234567890) + x = random.beta(0.0001, 0.0001, size=100) + assert_(not np.any(np.isnan(x)), 'Nans in random.beta') + + def test_choice_sum_of_probs_tolerance(self): + # The sum of probs should be 1.0 with some tolerance. + # For low precision dtypes the tolerance was too tight. + # See numpy github issue 6123. + random.seed(1234) + a = [1, 2, 3] + counts = [4, 4, 2] + for dt in np.float16, np.float32, np.float64: + probs = np.array(counts, dtype=dt) / sum(counts) + c = random.choice(a, p=probs) + assert_(c in a) + assert_raises(ValueError, random.choice, a, p=probs*0.9) + + def test_shuffle_of_array_of_different_length_strings(self): + # Test that permuting an array of different length strings + # will not cause a segfault on garbage collection + # Tests gh-7710 + random.seed(1234) + + a = np.array(['a', 'a' * 1000]) + + for _ in range(100): + random.shuffle(a) + + # Force Garbage Collection - should not segfault. + import gc + gc.collect() + + def test_shuffle_of_array_of_objects(self): + # Test that permuting an array of objects will not cause + # a segfault on garbage collection. + # See gh-7719 + random.seed(1234) + a = np.array([np.arange(1), np.arange(4)], dtype=object) + + for _ in range(1000): + random.shuffle(a) + + # Force Garbage Collection - should not segfault. + import gc + gc.collect() + + def test_permutation_subclass(self): + class N(np.ndarray): + pass + + random.seed(1) + orig = np.arange(3).view(N) + perm = random.permutation(orig) + assert_array_equal(perm, np.array([0, 2, 1])) + assert_array_equal(orig, np.arange(3).view(N)) + + class M: + a = np.arange(5) + + def __array__(self): + return self.a + + random.seed(1) + m = M() + perm = random.permutation(m) + assert_array_equal(perm, np.array([2, 1, 4, 0, 3])) + assert_array_equal(m.__array__(), np.arange(5)) + + def test_warns_byteorder(self): + # GH 13159 + other_byteord_dt = 'i4' + with pytest.deprecated_call(match='non-native byteorder is not'): + random.randint(0, 200, size=10, dtype=other_byteord_dt) + + def test_named_argument_initialization(self): + # GH 13669 + rs1 = np.random.RandomState(123456789) + rs2 = np.random.RandomState(seed=123456789) + assert rs1.randint(0, 100) == rs2.randint(0, 100) + + def test_choice_retun_dtype(self): + # GH 9867 + c = np.random.choice(10, p=[.1]*10, size=2) + assert c.dtype == np.dtype(int) + c = np.random.choice(10, p=[.1]*10, replace=False, size=2) + assert c.dtype == np.dtype(int) + c = np.random.choice(10, size=2) + assert c.dtype == np.dtype(int) + c = np.random.choice(10, replace=False, size=2) + assert c.dtype == np.dtype(int) + + @pytest.mark.skipif(np.iinfo('l').max < 2**32, + reason='Cannot test with 32-bit C long') + def test_randint_117(self): + # GH 14189 + random.seed(0) + expected = np.array([2357136044, 2546248239, 3071714933, 3626093760, + 2588848963, 3684848379, 2340255427, 3638918503, + 1819583497, 2678185683], dtype='int64') + actual = random.randint(2**32, size=10) + assert_array_equal(actual, expected) + + def test_p_zero_stream(self): + # Regression test for gh-14522. Ensure that future versions + # generate the same variates as version 1.16. + np.random.seed(12345) + assert_array_equal(random.binomial(1, [0, 0.25, 0.5, 0.75, 1]), + [0, 0, 0, 1, 1]) + + def test_n_zero_stream(self): + # Regression test for gh-14522. Ensure that future versions + # generate the same variates as version 1.16. + np.random.seed(8675309) + expected = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [3, 4, 2, 3, 3, 1, 5, 3, 1, 3]]) + assert_array_equal(random.binomial([[0], [10]], 0.25, size=(2, 10)), + expected) + + +def test_multinomial_empty(): + # gh-20483 + # Ensure that empty p-vals are correctly handled + assert random.multinomial(10, []).shape == (0,) + assert random.multinomial(3, [], size=(7, 5, 3)).shape == (7, 5, 3, 0) + + +def test_multinomial_1d_pval(): + # gh-20483 + with pytest.raises(TypeError, match="pvals must be a 1-d"): + random.multinomial(10, 0.3) diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/test_regression.py b/.local/lib/python3.8/site-packages/numpy/random/tests/test_regression.py new file mode 100644 index 0000000..8bf4198 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/test_regression.py @@ -0,0 +1,149 @@ +import sys +from numpy.testing import ( + assert_, assert_array_equal, assert_raises, + ) +from numpy import random +import numpy as np + + +class TestRegression: + + def test_VonMises_range(self): + # Make sure generated random variables are in [-pi, pi]. + # Regression test for ticket #986. + for mu in np.linspace(-7., 7., 5): + r = random.mtrand.vonmises(mu, 1, 50) + assert_(np.all(r > -np.pi) and np.all(r <= np.pi)) + + def test_hypergeometric_range(self): + # Test for ticket #921 + assert_(np.all(np.random.hypergeometric(3, 18, 11, size=10) < 4)) + assert_(np.all(np.random.hypergeometric(18, 3, 11, size=10) > 0)) + + # Test for ticket #5623 + args = [ + (2**20 - 2, 2**20 - 2, 2**20 - 2), # Check for 32-bit systems + ] + is_64bits = sys.maxsize > 2**32 + if is_64bits and sys.platform != 'win32': + # Check for 64-bit systems + args.append((2**40 - 2, 2**40 - 2, 2**40 - 2)) + for arg in args: + assert_(np.random.hypergeometric(*arg) > 0) + + def test_logseries_convergence(self): + # Test for ticket #923 + N = 1000 + np.random.seed(0) + rvsn = np.random.logseries(0.8, size=N) + # these two frequency counts should be close to theoretical + # numbers with this large sample + # theoretical large N result is 0.49706795 + freq = np.sum(rvsn == 1) / N + msg = f'Frequency was {freq:f}, should be > 0.45' + assert_(freq > 0.45, msg) + # theoretical large N result is 0.19882718 + freq = np.sum(rvsn == 2) / N + msg = f'Frequency was {freq:f}, should be < 0.23' + assert_(freq < 0.23, msg) + + def test_shuffle_mixed_dimension(self): + # Test for trac ticket #2074 + for t in [[1, 2, 3, None], + [(1, 1), (2, 2), (3, 3), None], + [1, (2, 2), (3, 3), None], + [(1, 1), 2, 3, None]]: + np.random.seed(12345) + shuffled = list(t) + random.shuffle(shuffled) + expected = np.array([t[0], t[3], t[1], t[2]], dtype=object) + assert_array_equal(np.array(shuffled, dtype=object), expected) + + def test_call_within_randomstate(self): + # Check that custom RandomState does not call into global state + m = np.random.RandomState() + res = np.array([0, 8, 7, 2, 1, 9, 4, 7, 0, 3]) + for i in range(3): + np.random.seed(i) + m.seed(4321) + # If m.state is not honored, the result will change + assert_array_equal(m.choice(10, size=10, p=np.ones(10)/10.), res) + + def test_multivariate_normal_size_types(self): + # Test for multivariate_normal issue with 'size' argument. + # Check that the multivariate_normal size argument can be a + # numpy integer. + np.random.multivariate_normal([0], [[0]], size=1) + np.random.multivariate_normal([0], [[0]], size=np.int_(1)) + np.random.multivariate_normal([0], [[0]], size=np.int64(1)) + + def test_beta_small_parameters(self): + # Test that beta with small a and b parameters does not produce + # NaNs due to roundoff errors causing 0 / 0, gh-5851 + np.random.seed(1234567890) + x = np.random.beta(0.0001, 0.0001, size=100) + assert_(not np.any(np.isnan(x)), 'Nans in np.random.beta') + + def test_choice_sum_of_probs_tolerance(self): + # The sum of probs should be 1.0 with some tolerance. + # For low precision dtypes the tolerance was too tight. + # See numpy github issue 6123. + np.random.seed(1234) + a = [1, 2, 3] + counts = [4, 4, 2] + for dt in np.float16, np.float32, np.float64: + probs = np.array(counts, dtype=dt) / sum(counts) + c = np.random.choice(a, p=probs) + assert_(c in a) + assert_raises(ValueError, np.random.choice, a, p=probs*0.9) + + def test_shuffle_of_array_of_different_length_strings(self): + # Test that permuting an array of different length strings + # will not cause a segfault on garbage collection + # Tests gh-7710 + np.random.seed(1234) + + a = np.array(['a', 'a' * 1000]) + + for _ in range(100): + np.random.shuffle(a) + + # Force Garbage Collection - should not segfault. + import gc + gc.collect() + + def test_shuffle_of_array_of_objects(self): + # Test that permuting an array of objects will not cause + # a segfault on garbage collection. + # See gh-7719 + np.random.seed(1234) + a = np.array([np.arange(1), np.arange(4)], dtype=object) + + for _ in range(1000): + np.random.shuffle(a) + + # Force Garbage Collection - should not segfault. + import gc + gc.collect() + + def test_permutation_subclass(self): + class N(np.ndarray): + pass + + np.random.seed(1) + orig = np.arange(3).view(N) + perm = np.random.permutation(orig) + assert_array_equal(perm, np.array([0, 2, 1])) + assert_array_equal(orig, np.arange(3).view(N)) + + class M: + a = np.arange(5) + + def __array__(self): + return self.a + + np.random.seed(1) + m = M() + perm = np.random.permutation(m) + assert_array_equal(perm, np.array([2, 1, 4, 0, 3])) + assert_array_equal(m.__array__(), np.arange(5)) diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/test_seed_sequence.py b/.local/lib/python3.8/site-packages/numpy/random/tests/test_seed_sequence.py new file mode 100644 index 0000000..f08cf80 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/test_seed_sequence.py @@ -0,0 +1,80 @@ +import numpy as np +from numpy.testing import assert_array_equal, assert_array_compare + +from numpy.random import SeedSequence + + +def test_reference_data(): + """ Check that SeedSequence generates data the same as the C++ reference. + + https://gist.github.com/imneme/540829265469e673d045 + """ + inputs = [ + [3735928559, 195939070, 229505742, 305419896], + [3668361503, 4165561550, 1661411377, 3634257570], + [164546577, 4166754639, 1765190214, 1303880213], + [446610472, 3941463886, 522937693, 1882353782], + [1864922766, 1719732118, 3882010307, 1776744564], + [4141682960, 3310988675, 553637289, 902896340], + [1134851934, 2352871630, 3699409824, 2648159817], + [1240956131, 3107113773, 1283198141, 1924506131], + [2669565031, 579818610, 3042504477, 2774880435], + [2766103236, 2883057919, 4029656435, 862374500], + ] + outputs = [ + [3914649087, 576849849, 3593928901, 2229911004], + [2240804226, 3691353228, 1365957195, 2654016646], + [3562296087, 3191708229, 1147942216, 3726991905], + [1403443605, 3591372999, 1291086759, 441919183], + [1086200464, 2191331643, 560336446, 3658716651], + [3249937430, 2346751812, 847844327, 2996632307], + [2584285912, 4034195531, 3523502488, 169742686], + [959045797, 3875435559, 1886309314, 359682705], + [3978441347, 432478529, 3223635119, 138903045], + [296367413, 4262059219, 13109864, 3283683422], + ] + outputs64 = [ + [2477551240072187391, 9577394838764454085], + [15854241394484835714, 11398914698975566411], + [13708282465491374871, 16007308345579681096], + [15424829579845884309, 1898028439751125927], + [9411697742461147792, 15714068361935982142], + [10079222287618677782, 12870437757549876199], + [17326737873898640088, 729039288628699544], + [16644868984619524261, 1544825456798124994], + [1857481142255628931, 596584038813451439], + [18305404959516669237, 14103312907920476776], + ] + for seed, expected, expected64 in zip(inputs, outputs, outputs64): + expected = np.array(expected, dtype=np.uint32) + ss = SeedSequence(seed) + state = ss.generate_state(len(expected)) + assert_array_equal(state, expected) + state64 = ss.generate_state(len(expected64), dtype=np.uint64) + assert_array_equal(state64, expected64) + + +def test_zero_padding(): + """ Ensure that the implicit zero-padding does not cause problems. + """ + # Ensure that large integers are inserted in little-endian fashion to avoid + # trailing 0s. + ss0 = SeedSequence(42) + ss1 = SeedSequence(42 << 32) + assert_array_compare( + np.not_equal, + ss0.generate_state(4), + ss1.generate_state(4)) + + # Ensure backwards compatibility with the original 0.17 release for small + # integers and no spawn key. + expected42 = np.array([3444837047, 2669555309, 2046530742, 3581440988], + dtype=np.uint32) + assert_array_equal(SeedSequence(42).generate_state(4), expected42) + + # Regression test for gh-16539 to ensure that the implicit 0s don't + # conflict with spawn keys. + assert_array_compare( + np.not_equal, + SeedSequence(42, spawn_key=(0,)).generate_state(4), + expected42) diff --git a/.local/lib/python3.8/site-packages/numpy/random/tests/test_smoke.py b/.local/lib/python3.8/site-packages/numpy/random/tests/test_smoke.py new file mode 100644 index 0000000..9becc43 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/random/tests/test_smoke.py @@ -0,0 +1,818 @@ +import pickle +from functools import partial + +import numpy as np +import pytest +from numpy.testing import assert_equal, assert_, assert_array_equal +from numpy.random import (Generator, MT19937, PCG64, PCG64DXSM, Philox, SFC64) + +@pytest.fixture(scope='module', + params=(np.bool_, np.int8, np.int16, np.int32, np.int64, + np.uint8, np.uint16, np.uint32, np.uint64)) +def dtype(request): + return request.param + + +def params_0(f): + val = f() + assert_(np.isscalar(val)) + val = f(10) + assert_(val.shape == (10,)) + val = f((10, 10)) + assert_(val.shape == (10, 10)) + val = f((10, 10, 10)) + assert_(val.shape == (10, 10, 10)) + val = f(size=(5, 5)) + assert_(val.shape == (5, 5)) + + +def params_1(f, bounded=False): + a = 5.0 + b = np.arange(2.0, 12.0) + c = np.arange(2.0, 102.0).reshape((10, 10)) + d = np.arange(2.0, 1002.0).reshape((10, 10, 10)) + e = np.array([2.0, 3.0]) + g = np.arange(2.0, 12.0).reshape((1, 10, 1)) + if bounded: + a = 0.5 + b = b / (1.5 * b.max()) + c = c / (1.5 * c.max()) + d = d / (1.5 * d.max()) + e = e / (1.5 * e.max()) + g = g / (1.5 * g.max()) + + # Scalar + f(a) + # Scalar - size + f(a, size=(10, 10)) + # 1d + f(b) + # 2d + f(c) + # 3d + f(d) + # 1d size + f(b, size=10) + # 2d - size - broadcast + f(e, size=(10, 2)) + # 3d - size + f(g, size=(10, 10, 10)) + + +def comp_state(state1, state2): + identical = True + if isinstance(state1, dict): + for key in state1: + identical &= comp_state(state1[key], state2[key]) + elif type(state1) != type(state2): + identical &= type(state1) == type(state2) + else: + if (isinstance(state1, (list, tuple, np.ndarray)) and isinstance( + state2, (list, tuple, np.ndarray))): + for s1, s2 in zip(state1, state2): + identical &= comp_state(s1, s2) + else: + identical &= state1 == state2 + return identical + + +def warmup(rg, n=None): + if n is None: + n = 11 + np.random.randint(0, 20) + rg.standard_normal(n) + rg.standard_normal(n) + rg.standard_normal(n, dtype=np.float32) + rg.standard_normal(n, dtype=np.float32) + rg.integers(0, 2 ** 24, n, dtype=np.uint64) + rg.integers(0, 2 ** 48, n, dtype=np.uint64) + rg.standard_gamma(11.0, n) + rg.standard_gamma(11.0, n, dtype=np.float32) + rg.random(n, dtype=np.float64) + rg.random(n, dtype=np.float32) + + +class RNG: + @classmethod + def setup_class(cls): + # Overridden in test classes. Place holder to silence IDE noise + cls.bit_generator = PCG64 + cls.advance = None + cls.seed = [12345] + cls.rg = Generator(cls.bit_generator(*cls.seed)) + cls.initial_state = cls.rg.bit_generator.state + cls.seed_vector_bits = 64 + cls._extra_setup() + + @classmethod + def _extra_setup(cls): + cls.vec_1d = np.arange(2.0, 102.0) + cls.vec_2d = np.arange(2.0, 102.0)[None, :] + cls.mat = np.arange(2.0, 102.0, 0.01).reshape((100, 100)) + cls.seed_error = TypeError + + def _reset_state(self): + self.rg.bit_generator.state = self.initial_state + + def test_init(self): + rg = Generator(self.bit_generator()) + state = rg.bit_generator.state + rg.standard_normal(1) + rg.standard_normal(1) + rg.bit_generator.state = state + new_state = rg.bit_generator.state + assert_(comp_state(state, new_state)) + + def test_advance(self): + state = self.rg.bit_generator.state + if hasattr(self.rg.bit_generator, 'advance'): + self.rg.bit_generator.advance(self.advance) + assert_(not comp_state(state, self.rg.bit_generator.state)) + else: + bitgen_name = self.rg.bit_generator.__class__.__name__ + pytest.skip(f'Advance is not supported by {bitgen_name}') + + def test_jump(self): + state = self.rg.bit_generator.state + if hasattr(self.rg.bit_generator, 'jumped'): + bit_gen2 = self.rg.bit_generator.jumped() + jumped_state = bit_gen2.state + assert_(not comp_state(state, jumped_state)) + self.rg.random(2 * 3 * 5 * 7 * 11 * 13 * 17) + self.rg.bit_generator.state = state + bit_gen3 = self.rg.bit_generator.jumped() + rejumped_state = bit_gen3.state + assert_(comp_state(jumped_state, rejumped_state)) + else: + bitgen_name = self.rg.bit_generator.__class__.__name__ + if bitgen_name not in ('SFC64',): + raise AttributeError(f'no "jumped" in {bitgen_name}') + pytest.skip(f'Jump is not supported by {bitgen_name}') + + def test_uniform(self): + r = self.rg.uniform(-1.0, 0.0, size=10) + assert_(len(r) == 10) + assert_((r > -1).all()) + assert_((r <= 0).all()) + + def test_uniform_array(self): + r = self.rg.uniform(np.array([-1.0] * 10), 0.0, size=10) + assert_(len(r) == 10) + assert_((r > -1).all()) + assert_((r <= 0).all()) + r = self.rg.uniform(np.array([-1.0] * 10), + np.array([0.0] * 10), size=10) + assert_(len(r) == 10) + assert_((r > -1).all()) + assert_((r <= 0).all()) + r = self.rg.uniform(-1.0, np.array([0.0] * 10), size=10) + assert_(len(r) == 10) + assert_((r > -1).all()) + assert_((r <= 0).all()) + + def test_random(self): + assert_(len(self.rg.random(10)) == 10) + params_0(self.rg.random) + + def test_standard_normal_zig(self): + assert_(len(self.rg.standard_normal(10)) == 10) + + def test_standard_normal(self): + assert_(len(self.rg.standard_normal(10)) == 10) + params_0(self.rg.standard_normal) + + def test_standard_gamma(self): + assert_(len(self.rg.standard_gamma(10, 10)) == 10) + assert_(len(self.rg.standard_gamma(np.array([10] * 10), 10)) == 10) + params_1(self.rg.standard_gamma) + + def test_standard_exponential(self): + assert_(len(self.rg.standard_exponential(10)) == 10) + params_0(self.rg.standard_exponential) + + def test_standard_exponential_float(self): + randoms = self.rg.standard_exponential(10, dtype='float32') + assert_(len(randoms) == 10) + assert randoms.dtype == np.float32 + params_0(partial(self.rg.standard_exponential, dtype='float32')) + + def test_standard_exponential_float_log(self): + randoms = self.rg.standard_exponential(10, dtype='float32', + method='inv') + assert_(len(randoms) == 10) + assert randoms.dtype == np.float32 + params_0(partial(self.rg.standard_exponential, dtype='float32', + method='inv')) + + def test_standard_cauchy(self): + assert_(len(self.rg.standard_cauchy(10)) == 10) + params_0(self.rg.standard_cauchy) + + def test_standard_t(self): + assert_(len(self.rg.standard_t(10, 10)) == 10) + params_1(self.rg.standard_t) + + def test_binomial(self): + assert_(self.rg.binomial(10, .5) >= 0) + assert_(self.rg.binomial(1000, .5) >= 0) + + def test_reset_state(self): + state = self.rg.bit_generator.state + int_1 = self.rg.integers(2**31) + self.rg.bit_generator.state = state + int_2 = self.rg.integers(2**31) + assert_(int_1 == int_2) + + def test_entropy_init(self): + rg = Generator(self.bit_generator()) + rg2 = Generator(self.bit_generator()) + assert_(not comp_state(rg.bit_generator.state, + rg2.bit_generator.state)) + + def test_seed(self): + rg = Generator(self.bit_generator(*self.seed)) + rg2 = Generator(self.bit_generator(*self.seed)) + rg.random() + rg2.random() + assert_(comp_state(rg.bit_generator.state, rg2.bit_generator.state)) + + def test_reset_state_gauss(self): + rg = Generator(self.bit_generator(*self.seed)) + rg.standard_normal() + state = rg.bit_generator.state + n1 = rg.standard_normal(size=10) + rg2 = Generator(self.bit_generator()) + rg2.bit_generator.state = state + n2 = rg2.standard_normal(size=10) + assert_array_equal(n1, n2) + + def test_reset_state_uint32(self): + rg = Generator(self.bit_generator(*self.seed)) + rg.integers(0, 2 ** 24, 120, dtype=np.uint32) + state = rg.bit_generator.state + n1 = rg.integers(0, 2 ** 24, 10, dtype=np.uint32) + rg2 = Generator(self.bit_generator()) + rg2.bit_generator.state = state + n2 = rg2.integers(0, 2 ** 24, 10, dtype=np.uint32) + assert_array_equal(n1, n2) + + def test_reset_state_float(self): + rg = Generator(self.bit_generator(*self.seed)) + rg.random(dtype='float32') + state = rg.bit_generator.state + n1 = rg.random(size=10, dtype='float32') + rg2 = Generator(self.bit_generator()) + rg2.bit_generator.state = state + n2 = rg2.random(size=10, dtype='float32') + assert_((n1 == n2).all()) + + def test_shuffle(self): + original = np.arange(200, 0, -1) + permuted = self.rg.permutation(original) + assert_((original != permuted).any()) + + def test_permutation(self): + original = np.arange(200, 0, -1) + permuted = self.rg.permutation(original) + assert_((original != permuted).any()) + + def test_beta(self): + vals = self.rg.beta(2.0, 2.0, 10) + assert_(len(vals) == 10) + vals = self.rg.beta(np.array([2.0] * 10), 2.0) + assert_(len(vals) == 10) + vals = self.rg.beta(2.0, np.array([2.0] * 10)) + assert_(len(vals) == 10) + vals = self.rg.beta(np.array([2.0] * 10), np.array([2.0] * 10)) + assert_(len(vals) == 10) + vals = self.rg.beta(np.array([2.0] * 10), np.array([[2.0]] * 10)) + assert_(vals.shape == (10, 10)) + + def test_bytes(self): + vals = self.rg.bytes(10) + assert_(len(vals) == 10) + + def test_chisquare(self): + vals = self.rg.chisquare(2.0, 10) + assert_(len(vals) == 10) + params_1(self.rg.chisquare) + + def test_exponential(self): + vals = self.rg.exponential(2.0, 10) + assert_(len(vals) == 10) + params_1(self.rg.exponential) + + def test_f(self): + vals = self.rg.f(3, 1000, 10) + assert_(len(vals) == 10) + + def test_gamma(self): + vals = self.rg.gamma(3, 2, 10) + assert_(len(vals) == 10) + + def test_geometric(self): + vals = self.rg.geometric(0.5, 10) + assert_(len(vals) == 10) + params_1(self.rg.exponential, bounded=True) + + def test_gumbel(self): + vals = self.rg.gumbel(2.0, 2.0, 10) + assert_(len(vals) == 10) + + def test_laplace(self): + vals = self.rg.laplace(2.0, 2.0, 10) + assert_(len(vals) == 10) + + def test_logitic(self): + vals = self.rg.logistic(2.0, 2.0, 10) + assert_(len(vals) == 10) + + def test_logseries(self): + vals = self.rg.logseries(0.5, 10) + assert_(len(vals) == 10) + + def test_negative_binomial(self): + vals = self.rg.negative_binomial(10, 0.2, 10) + assert_(len(vals) == 10) + + def test_noncentral_chisquare(self): + vals = self.rg.noncentral_chisquare(10, 2, 10) + assert_(len(vals) == 10) + + def test_noncentral_f(self): + vals = self.rg.noncentral_f(3, 1000, 2, 10) + assert_(len(vals) == 10) + vals = self.rg.noncentral_f(np.array([3] * 10), 1000, 2) + assert_(len(vals) == 10) + vals = self.rg.noncentral_f(3, np.array([1000] * 10), 2) + assert_(len(vals) == 10) + vals = self.rg.noncentral_f(3, 1000, np.array([2] * 10)) + assert_(len(vals) == 10) + + def test_normal(self): + vals = self.rg.normal(10, 0.2, 10) + assert_(len(vals) == 10) + + def test_pareto(self): + vals = self.rg.pareto(3.0, 10) + assert_(len(vals) == 10) + + def test_poisson(self): + vals = self.rg.poisson(10, 10) + assert_(len(vals) == 10) + vals = self.rg.poisson(np.array([10] * 10)) + assert_(len(vals) == 10) + params_1(self.rg.poisson) + + def test_power(self): + vals = self.rg.power(0.2, 10) + assert_(len(vals) == 10) + + def test_integers(self): + vals = self.rg.integers(10, 20, 10) + assert_(len(vals) == 10) + + def test_rayleigh(self): + vals = self.rg.rayleigh(0.2, 10) + assert_(len(vals) == 10) + params_1(self.rg.rayleigh, bounded=True) + + def test_vonmises(self): + vals = self.rg.vonmises(10, 0.2, 10) + assert_(len(vals) == 10) + + def test_wald(self): + vals = self.rg.wald(1.0, 1.0, 10) + assert_(len(vals) == 10) + + def test_weibull(self): + vals = self.rg.weibull(1.0, 10) + assert_(len(vals) == 10) + + def test_zipf(self): + vals = self.rg.zipf(10, 10) + assert_(len(vals) == 10) + vals = self.rg.zipf(self.vec_1d) + assert_(len(vals) == 100) + vals = self.rg.zipf(self.vec_2d) + assert_(vals.shape == (1, 100)) + vals = self.rg.zipf(self.mat) + assert_(vals.shape == (100, 100)) + + def test_hypergeometric(self): + vals = self.rg.hypergeometric(25, 25, 20) + assert_(np.isscalar(vals)) + vals = self.rg.hypergeometric(np.array([25] * 10), 25, 20) + assert_(vals.shape == (10,)) + + def test_triangular(self): + vals = self.rg.triangular(-5, 0, 5) + assert_(np.isscalar(vals)) + vals = self.rg.triangular(-5, np.array([0] * 10), 5) + assert_(vals.shape == (10,)) + + def test_multivariate_normal(self): + mean = [0, 0] + cov = [[1, 0], [0, 100]] # diagonal covariance + x = self.rg.multivariate_normal(mean, cov, 5000) + assert_(x.shape == (5000, 2)) + x_zig = self.rg.multivariate_normal(mean, cov, 5000) + assert_(x.shape == (5000, 2)) + x_inv = self.rg.multivariate_normal(mean, cov, 5000) + assert_(x.shape == (5000, 2)) + assert_((x_zig != x_inv).any()) + + def test_multinomial(self): + vals = self.rg.multinomial(100, [1.0 / 3, 2.0 / 3]) + assert_(vals.shape == (2,)) + vals = self.rg.multinomial(100, [1.0 / 3, 2.0 / 3], size=10) + assert_(vals.shape == (10, 2)) + + def test_dirichlet(self): + s = self.rg.dirichlet((10, 5, 3), 20) + assert_(s.shape == (20, 3)) + + def test_pickle(self): + pick = pickle.dumps(self.rg) + unpick = pickle.loads(pick) + assert_((type(self.rg) == type(unpick))) + assert_(comp_state(self.rg.bit_generator.state, + unpick.bit_generator.state)) + + pick = pickle.dumps(self.rg) + unpick = pickle.loads(pick) + assert_((type(self.rg) == type(unpick))) + assert_(comp_state(self.rg.bit_generator.state, + unpick.bit_generator.state)) + + def test_seed_array(self): + if self.seed_vector_bits is None: + bitgen_name = self.bit_generator.__name__ + pytest.skip(f'Vector seeding is not supported by {bitgen_name}') + + if self.seed_vector_bits == 32: + dtype = np.uint32 + else: + dtype = np.uint64 + seed = np.array([1], dtype=dtype) + bg = self.bit_generator(seed) + state1 = bg.state + bg = self.bit_generator(1) + state2 = bg.state + assert_(comp_state(state1, state2)) + + seed = np.arange(4, dtype=dtype) + bg = self.bit_generator(seed) + state1 = bg.state + bg = self.bit_generator(seed[0]) + state2 = bg.state + assert_(not comp_state(state1, state2)) + + seed = np.arange(1500, dtype=dtype) + bg = self.bit_generator(seed) + state1 = bg.state + bg = self.bit_generator(seed[0]) + state2 = bg.state + assert_(not comp_state(state1, state2)) + + seed = 2 ** np.mod(np.arange(1500, dtype=dtype), + self.seed_vector_bits - 1) + 1 + bg = self.bit_generator(seed) + state1 = bg.state + bg = self.bit_generator(seed[0]) + state2 = bg.state + assert_(not comp_state(state1, state2)) + + def test_uniform_float(self): + rg = Generator(self.bit_generator(12345)) + warmup(rg) + state = rg.bit_generator.state + r1 = rg.random(11, dtype=np.float32) + rg2 = Generator(self.bit_generator()) + warmup(rg2) + rg2.bit_generator.state = state + r2 = rg2.random(11, dtype=np.float32) + assert_array_equal(r1, r2) + assert_equal(r1.dtype, np.float32) + assert_(comp_state(rg.bit_generator.state, rg2.bit_generator.state)) + + def test_gamma_floats(self): + rg = Generator(self.bit_generator()) + warmup(rg) + state = rg.bit_generator.state + r1 = rg.standard_gamma(4.0, 11, dtype=np.float32) + rg2 = Generator(self.bit_generator()) + warmup(rg2) + rg2.bit_generator.state = state + r2 = rg2.standard_gamma(4.0, 11, dtype=np.float32) + assert_array_equal(r1, r2) + assert_equal(r1.dtype, np.float32) + assert_(comp_state(rg.bit_generator.state, rg2.bit_generator.state)) + + def test_normal_floats(self): + rg = Generator(self.bit_generator()) + warmup(rg) + state = rg.bit_generator.state + r1 = rg.standard_normal(11, dtype=np.float32) + rg2 = Generator(self.bit_generator()) + warmup(rg2) + rg2.bit_generator.state = state + r2 = rg2.standard_normal(11, dtype=np.float32) + assert_array_equal(r1, r2) + assert_equal(r1.dtype, np.float32) + assert_(comp_state(rg.bit_generator.state, rg2.bit_generator.state)) + + def test_normal_zig_floats(self): + rg = Generator(self.bit_generator()) + warmup(rg) + state = rg.bit_generator.state + r1 = rg.standard_normal(11, dtype=np.float32) + rg2 = Generator(self.bit_generator()) + warmup(rg2) + rg2.bit_generator.state = state + r2 = rg2.standard_normal(11, dtype=np.float32) + assert_array_equal(r1, r2) + assert_equal(r1.dtype, np.float32) + assert_(comp_state(rg.bit_generator.state, rg2.bit_generator.state)) + + def test_output_fill(self): + rg = self.rg + state = rg.bit_generator.state + size = (31, 7, 97) + existing = np.empty(size) + rg.bit_generator.state = state + rg.standard_normal(out=existing) + rg.bit_generator.state = state + direct = rg.standard_normal(size=size) + assert_equal(direct, existing) + + sized = np.empty(size) + rg.bit_generator.state = state + rg.standard_normal(out=sized, size=sized.shape) + + existing = np.empty(size, dtype=np.float32) + rg.bit_generator.state = state + rg.standard_normal(out=existing, dtype=np.float32) + rg.bit_generator.state = state + direct = rg.standard_normal(size=size, dtype=np.float32) + assert_equal(direct, existing) + + def test_output_filling_uniform(self): + rg = self.rg + state = rg.bit_generator.state + size = (31, 7, 97) + existing = np.empty(size) + rg.bit_generator.state = state + rg.random(out=existing) + rg.bit_generator.state = state + direct = rg.random(size=size) + assert_equal(direct, existing) + + existing = np.empty(size, dtype=np.float32) + rg.bit_generator.state = state + rg.random(out=existing, dtype=np.float32) + rg.bit_generator.state = state + direct = rg.random(size=size, dtype=np.float32) + assert_equal(direct, existing) + + def test_output_filling_exponential(self): + rg = self.rg + state = rg.bit_generator.state + size = (31, 7, 97) + existing = np.empty(size) + rg.bit_generator.state = state + rg.standard_exponential(out=existing) + rg.bit_generator.state = state + direct = rg.standard_exponential(size=size) + assert_equal(direct, existing) + + existing = np.empty(size, dtype=np.float32) + rg.bit_generator.state = state + rg.standard_exponential(out=existing, dtype=np.float32) + rg.bit_generator.state = state + direct = rg.standard_exponential(size=size, dtype=np.float32) + assert_equal(direct, existing) + + def test_output_filling_gamma(self): + rg = self.rg + state = rg.bit_generator.state + size = (31, 7, 97) + existing = np.zeros(size) + rg.bit_generator.state = state + rg.standard_gamma(1.0, out=existing) + rg.bit_generator.state = state + direct = rg.standard_gamma(1.0, size=size) + assert_equal(direct, existing) + + existing = np.zeros(size, dtype=np.float32) + rg.bit_generator.state = state + rg.standard_gamma(1.0, out=existing, dtype=np.float32) + rg.bit_generator.state = state + direct = rg.standard_gamma(1.0, size=size, dtype=np.float32) + assert_equal(direct, existing) + + def test_output_filling_gamma_broadcast(self): + rg = self.rg + state = rg.bit_generator.state + size = (31, 7, 97) + mu = np.arange(97.0) + 1.0 + existing = np.zeros(size) + rg.bit_generator.state = state + rg.standard_gamma(mu, out=existing) + rg.bit_generator.state = state + direct = rg.standard_gamma(mu, size=size) + assert_equal(direct, existing) + + existing = np.zeros(size, dtype=np.float32) + rg.bit_generator.state = state + rg.standard_gamma(mu, out=existing, dtype=np.float32) + rg.bit_generator.state = state + direct = rg.standard_gamma(mu, size=size, dtype=np.float32) + assert_equal(direct, existing) + + def test_output_fill_error(self): + rg = self.rg + size = (31, 7, 97) + existing = np.empty(size) + with pytest.raises(TypeError): + rg.standard_normal(out=existing, dtype=np.float32) + with pytest.raises(ValueError): + rg.standard_normal(out=existing[::3]) + existing = np.empty(size, dtype=np.float32) + with pytest.raises(TypeError): + rg.standard_normal(out=existing, dtype=np.float64) + + existing = np.zeros(size, dtype=np.float32) + with pytest.raises(TypeError): + rg.standard_gamma(1.0, out=existing, dtype=np.float64) + with pytest.raises(ValueError): + rg.standard_gamma(1.0, out=existing[::3], dtype=np.float32) + existing = np.zeros(size, dtype=np.float64) + with pytest.raises(TypeError): + rg.standard_gamma(1.0, out=existing, dtype=np.float32) + with pytest.raises(ValueError): + rg.standard_gamma(1.0, out=existing[::3]) + + def test_integers_broadcast(self, dtype): + if dtype == np.bool_: + upper = 2 + lower = 0 + else: + info = np.iinfo(dtype) + upper = int(info.max) + 1 + lower = info.min + self._reset_state() + a = self.rg.integers(lower, [upper] * 10, dtype=dtype) + self._reset_state() + b = self.rg.integers([lower] * 10, upper, dtype=dtype) + assert_equal(a, b) + self._reset_state() + c = self.rg.integers(lower, upper, size=10, dtype=dtype) + assert_equal(a, c) + self._reset_state() + d = self.rg.integers(np.array( + [lower] * 10), np.array([upper], dtype=object), size=10, + dtype=dtype) + assert_equal(a, d) + self._reset_state() + e = self.rg.integers( + np.array([lower] * 10), np.array([upper] * 10), size=10, + dtype=dtype) + assert_equal(a, e) + + self._reset_state() + a = self.rg.integers(0, upper, size=10, dtype=dtype) + self._reset_state() + b = self.rg.integers([upper] * 10, dtype=dtype) + assert_equal(a, b) + + def test_integers_numpy(self, dtype): + high = np.array([1]) + low = np.array([0]) + + out = self.rg.integers(low, high, dtype=dtype) + assert out.shape == (1,) + + out = self.rg.integers(low[0], high, dtype=dtype) + assert out.shape == (1,) + + out = self.rg.integers(low, high[0], dtype=dtype) + assert out.shape == (1,) + + def test_integers_broadcast_errors(self, dtype): + if dtype == np.bool_: + upper = 2 + lower = 0 + else: + info = np.iinfo(dtype) + upper = int(info.max) + 1 + lower = info.min + with pytest.raises(ValueError): + self.rg.integers(lower, [upper + 1] * 10, dtype=dtype) + with pytest.raises(ValueError): + self.rg.integers(lower - 1, [upper] * 10, dtype=dtype) + with pytest.raises(ValueError): + self.rg.integers([lower - 1], [upper] * 10, dtype=dtype) + with pytest.raises(ValueError): + self.rg.integers([0], [0], dtype=dtype) + + +class TestMT19937(RNG): + @classmethod + def setup_class(cls): + cls.bit_generator = MT19937 + cls.advance = None + cls.seed = [2 ** 21 + 2 ** 16 + 2 ** 5 + 1] + cls.rg = Generator(cls.bit_generator(*cls.seed)) + cls.initial_state = cls.rg.bit_generator.state + cls.seed_vector_bits = 32 + cls._extra_setup() + cls.seed_error = ValueError + + def test_numpy_state(self): + nprg = np.random.RandomState() + nprg.standard_normal(99) + state = nprg.get_state() + self.rg.bit_generator.state = state + state2 = self.rg.bit_generator.state + assert_((state[1] == state2['state']['key']).all()) + assert_((state[2] == state2['state']['pos'])) + + +class TestPhilox(RNG): + @classmethod + def setup_class(cls): + cls.bit_generator = Philox + cls.advance = 2**63 + 2**31 + 2**15 + 1 + cls.seed = [12345] + cls.rg = Generator(cls.bit_generator(*cls.seed)) + cls.initial_state = cls.rg.bit_generator.state + cls.seed_vector_bits = 64 + cls._extra_setup() + + +class TestSFC64(RNG): + @classmethod + def setup_class(cls): + cls.bit_generator = SFC64 + cls.advance = None + cls.seed = [12345] + cls.rg = Generator(cls.bit_generator(*cls.seed)) + cls.initial_state = cls.rg.bit_generator.state + cls.seed_vector_bits = 192 + cls._extra_setup() + + +class TestPCG64(RNG): + @classmethod + def setup_class(cls): + cls.bit_generator = PCG64 + cls.advance = 2**63 + 2**31 + 2**15 + 1 + cls.seed = [12345] + cls.rg = Generator(cls.bit_generator(*cls.seed)) + cls.initial_state = cls.rg.bit_generator.state + cls.seed_vector_bits = 64 + cls._extra_setup() + + +class TestPCG64DXSM(RNG): + @classmethod + def setup_class(cls): + cls.bit_generator = PCG64DXSM + cls.advance = 2**63 + 2**31 + 2**15 + 1 + cls.seed = [12345] + cls.rg = Generator(cls.bit_generator(*cls.seed)) + cls.initial_state = cls.rg.bit_generator.state + cls.seed_vector_bits = 64 + cls._extra_setup() + + +class TestDefaultRNG(RNG): + @classmethod + def setup_class(cls): + # This will duplicate some tests that directly instantiate a fresh + # Generator(), but that's okay. + cls.bit_generator = PCG64 + cls.advance = 2**63 + 2**31 + 2**15 + 1 + cls.seed = [12345] + cls.rg = np.random.default_rng(*cls.seed) + cls.initial_state = cls.rg.bit_generator.state + cls.seed_vector_bits = 64 + cls._extra_setup() + + def test_default_is_pcg64(self): + # In order to change the default BitGenerator, we'll go through + # a deprecation cycle to move to a different function. + assert_(isinstance(self.rg.bit_generator, PCG64)) + + def test_seed(self): + np.random.default_rng() + np.random.default_rng(None) + np.random.default_rng(12345) + np.random.default_rng(0) + np.random.default_rng(43660444402423911716352051725018508569) + np.random.default_rng([43660444402423911716352051725018508569, + 279705150948142787361475340226491943209]) + with pytest.raises(ValueError): + np.random.default_rng(-1) + with pytest.raises(ValueError): + np.random.default_rng([12345, -1]) diff --git a/.local/lib/python3.8/site-packages/numpy/setup.py b/.local/lib/python3.8/site-packages/numpy/setup.py new file mode 100644 index 0000000..a0ca999 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/setup.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('numpy', parent_package, top_path) + + config.add_subpackage('array_api') + config.add_subpackage('compat') + config.add_subpackage('core') + config.add_subpackage('distutils') + config.add_subpackage('doc') + config.add_subpackage('f2py') + config.add_subpackage('fft') + config.add_subpackage('lib') + config.add_subpackage('linalg') + config.add_subpackage('ma') + config.add_subpackage('matrixlib') + config.add_subpackage('polynomial') + config.add_subpackage('random') + config.add_subpackage('testing') + config.add_subpackage('typing') + config.add_data_dir('doc') + config.add_data_files('py.typed') + config.add_data_files('*.pyi') + config.add_subpackage('tests') + config.make_config_py() # installs __config__.py + return config + +if __name__ == '__main__': + print('This is the wrong setup.py file to run') diff --git a/.local/lib/python3.8/site-packages/numpy/testing/__init__.py b/.local/lib/python3.8/site-packages/numpy/testing/__init__.py new file mode 100644 index 0000000..6e06c5b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/testing/__init__.py @@ -0,0 +1,21 @@ +"""Common test support for all numpy test scripts. + +This single module should provide all the common functionality for numpy tests +in a single location, so that test scripts can just import it and work right +away. + +""" +from unittest import TestCase + +from ._private.utils import * +from ._private.utils import (_assert_valid_refcount, _gen_alignment_data) +from ._private import extbuild, decorators as dec +from ._private.nosetester import ( + run_module_suite, NoseTester as Tester + ) + +__all__ = _private.utils.__all__ + ['TestCase', 'run_module_suite'] + +from numpy._pytesttester import PytestTester +test = PytestTester(__name__) +del PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/testing/__init__.pyi b/.local/lib/python3.8/site-packages/numpy/testing/__init__.pyi new file mode 100644 index 0000000..def0f9f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/testing/__init__.pyi @@ -0,0 +1,58 @@ +from typing import List + +from numpy._pytesttester import PytestTester + +from unittest import ( + TestCase as TestCase, +) + +from numpy.testing._private.utils import ( + assert_equal as assert_equal, + assert_almost_equal as assert_almost_equal, + assert_approx_equal as assert_approx_equal, + assert_array_equal as assert_array_equal, + assert_array_less as assert_array_less, + assert_string_equal as assert_string_equal, + assert_array_almost_equal as assert_array_almost_equal, + assert_raises as assert_raises, + build_err_msg as build_err_msg, + decorate_methods as decorate_methods, + jiffies as jiffies, + memusage as memusage, + print_assert_equal as print_assert_equal, + raises as raises, + rundocs as rundocs, + runstring as runstring, + verbose as verbose, + measure as measure, + assert_ as assert_, + assert_array_almost_equal_nulp as assert_array_almost_equal_nulp, + assert_raises_regex as assert_raises_regex, + assert_array_max_ulp as assert_array_max_ulp, + assert_warns as assert_warns, + assert_no_warnings as assert_no_warnings, + assert_allclose as assert_allclose, + IgnoreException as IgnoreException, + clear_and_catch_warnings as clear_and_catch_warnings, + SkipTest as SkipTest, + KnownFailureException as KnownFailureException, + temppath as temppath, + tempdir as tempdir, + IS_PYPY as IS_PYPY, + IS_PYSTON as IS_PYSTON, + HAS_REFCOUNT as HAS_REFCOUNT, + suppress_warnings as suppress_warnings, + assert_array_compare as assert_array_compare, + assert_no_gc_cycles as assert_no_gc_cycles, + break_cycles as break_cycles, + HAS_LAPACK64 as HAS_LAPACK64, +) + +__all__: List[str] +__path__: List[str] +test: PytestTester + +def run_module_suite( + file_to_run: None | str = ..., + argv: None | List[str] = ..., +) -> None: ... diff --git a/.local/lib/python3.8/site-packages/numpy/testing/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/testing/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..33355af Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/testing/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/testing/__pycache__/print_coercion_tables.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/testing/__pycache__/print_coercion_tables.cpython-38.pyc new file mode 100644 index 0000000..19a3068 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/testing/__pycache__/print_coercion_tables.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/testing/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/testing/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..b671f04 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/testing/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/testing/__pycache__/utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/testing/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000..3582014 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/testing/__pycache__/utils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/testing/_private/__init__.py b/.local/lib/python3.8/site-packages/numpy/testing/_private/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..c4b5731 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/decorators.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/decorators.cpython-38.pyc new file mode 100644 index 0000000..ac80bff Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/decorators.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/extbuild.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/extbuild.cpython-38.pyc new file mode 100644 index 0000000..bd6529d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/extbuild.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/noseclasses.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/noseclasses.cpython-38.pyc new file mode 100644 index 0000000..e89c9f6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/noseclasses.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/nosetester.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/nosetester.cpython-38.pyc new file mode 100644 index 0000000..336480e Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/nosetester.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/parameterized.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/parameterized.cpython-38.pyc new file mode 100644 index 0000000..61db415 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/parameterized.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000..ef7a8ef Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/testing/_private/__pycache__/utils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/testing/_private/decorators.py b/.local/lib/python3.8/site-packages/numpy/testing/_private/decorators.py new file mode 100644 index 0000000..cb49d9a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/testing/_private/decorators.py @@ -0,0 +1,331 @@ +""" +Decorators for labeling and modifying behavior of test objects. + +Decorators that merely return a modified version of the original +function object are straightforward. Decorators that return a new +function object need to use +:: + + nose.tools.make_decorator(original_function)(decorator) + +in returning the decorator, in order to preserve meta-data such as +function name, setup and teardown functions and so on - see +``nose.tools`` for more information. + +""" +import collections.abc +import warnings + +from .utils import SkipTest, assert_warns, HAS_REFCOUNT + +__all__ = ['slow', 'setastest', 'skipif', 'knownfailureif', 'deprecated', + 'parametrize', '_needs_refcount',] + + +def slow(t): + """ + .. deprecated:: 1.21 + This decorator is retained for compatibility with the nose testing framework, which is being phased out. + Please use the nose2 or pytest frameworks instead. + + Label a test as 'slow'. + + The exact definition of a slow test is obviously both subjective and + hardware-dependent, but in general any individual test that requires more + than a second or two should be labeled as slow (the whole suite consists of + thousands of tests, so even a second is significant). + + Parameters + ---------- + t : callable + The test to label as slow. + + Returns + ------- + t : callable + The decorated test `t`. + + Examples + -------- + The `numpy.testing` module includes ``import decorators as dec``. + A test can be decorated as slow like this:: + + from numpy.testing import * + + @dec.slow + def test_big(self): + print('Big, slow test') + + """ + # Numpy 1.21, 2020-12-20 + warnings.warn('the np.testing.dec decorators are included for nose support, and are ' + 'deprecated since NumPy v1.21. Use the nose2 or pytest frameworks instead.', DeprecationWarning, stacklevel=2) + + t.slow = True + return t + +def setastest(tf=True): + """ + .. deprecated:: 1.21 + This decorator is retained for compatibility with the nose testing framework, which is being phased out. + Please use the nose2 or pytest frameworks instead. + + Signals to nose that this function is or is not a test. + + Parameters + ---------- + tf : bool + If True, specifies that the decorated callable is a test. + If False, specifies that the decorated callable is not a test. + Default is True. + + Notes + ----- + This decorator can't use the nose namespace, because it can be + called from a non-test module. See also ``istest`` and ``nottest`` in + ``nose.tools``. + + Examples + -------- + `setastest` can be used in the following way:: + + from numpy.testing import dec + + @dec.setastest(False) + def func_with_test_in_name(arg1, arg2): + pass + + """ + # Numpy 1.21, 2020-12-20 + warnings.warn('the np.testing.dec decorators are included for nose support, and are ' + 'deprecated since NumPy v1.21. Use the nose2 or pytest frameworks instead.', DeprecationWarning, stacklevel=2) + def set_test(t): + t.__test__ = tf + return t + return set_test + +def skipif(skip_condition, msg=None): + """ + .. deprecated:: 1.21 + This decorator is retained for compatibility with the nose testing framework, which is being phased out. + Please use the nose2 or pytest frameworks instead. + + Make function raise SkipTest exception if a given condition is true. + + If the condition is a callable, it is used at runtime to dynamically + make the decision. This is useful for tests that may require costly + imports, to delay the cost until the test suite is actually executed. + + Parameters + ---------- + skip_condition : bool or callable + Flag to determine whether to skip the decorated test. + msg : str, optional + Message to give on raising a SkipTest exception. Default is None. + + Returns + ------- + decorator : function + Decorator which, when applied to a function, causes SkipTest + to be raised when `skip_condition` is True, and the function + to be called normally otherwise. + + Notes + ----- + The decorator itself is decorated with the ``nose.tools.make_decorator`` + function in order to transmit function name, and various other metadata. + + """ + + def skip_decorator(f): + # Local import to avoid a hard nose dependency and only incur the + # import time overhead at actual test-time. + import nose + + # Numpy 1.21, 2020-12-20 + warnings.warn('the np.testing.dec decorators are included for nose support, and are ' + 'deprecated since NumPy v1.21. Use the nose2 or pytest frameworks instead.', DeprecationWarning, stacklevel=2) + + # Allow for both boolean or callable skip conditions. + if isinstance(skip_condition, collections.abc.Callable): + skip_val = lambda: skip_condition() + else: + skip_val = lambda: skip_condition + + def get_msg(func,msg=None): + """Skip message with information about function being skipped.""" + if msg is None: + out = 'Test skipped due to test condition' + else: + out = msg + + return f'Skipping test: {func.__name__}: {out}' + + # We need to define *two* skippers because Python doesn't allow both + # return with value and yield inside the same function. + def skipper_func(*args, **kwargs): + """Skipper for normal test functions.""" + if skip_val(): + raise SkipTest(get_msg(f, msg)) + else: + return f(*args, **kwargs) + + def skipper_gen(*args, **kwargs): + """Skipper for test generators.""" + if skip_val(): + raise SkipTest(get_msg(f, msg)) + else: + yield from f(*args, **kwargs) + + # Choose the right skipper to use when building the actual decorator. + if nose.util.isgenerator(f): + skipper = skipper_gen + else: + skipper = skipper_func + + return nose.tools.make_decorator(f)(skipper) + + return skip_decorator + + +def knownfailureif(fail_condition, msg=None): + """ + .. deprecated:: 1.21 + This decorator is retained for compatibility with the nose testing framework, which is being phased out. + Please use the nose2 or pytest frameworks instead. + + Make function raise KnownFailureException exception if given condition is true. + + If the condition is a callable, it is used at runtime to dynamically + make the decision. This is useful for tests that may require costly + imports, to delay the cost until the test suite is actually executed. + + Parameters + ---------- + fail_condition : bool or callable + Flag to determine whether to mark the decorated test as a known + failure (if True) or not (if False). + msg : str, optional + Message to give on raising a KnownFailureException exception. + Default is None. + + Returns + ------- + decorator : function + Decorator, which, when applied to a function, causes + KnownFailureException to be raised when `fail_condition` is True, + and the function to be called normally otherwise. + + Notes + ----- + The decorator itself is decorated with the ``nose.tools.make_decorator`` + function in order to transmit function name, and various other metadata. + + """ + # Numpy 1.21, 2020-12-20 + warnings.warn('the np.testing.dec decorators are included for nose support, and are ' + 'deprecated since NumPy v1.21. Use the nose2 or pytest frameworks instead.', DeprecationWarning, stacklevel=2) + + if msg is None: + msg = 'Test skipped due to known failure' + + # Allow for both boolean or callable known failure conditions. + if isinstance(fail_condition, collections.abc.Callable): + fail_val = lambda: fail_condition() + else: + fail_val = lambda: fail_condition + + def knownfail_decorator(f): + # Local import to avoid a hard nose dependency and only incur the + # import time overhead at actual test-time. + import nose + from .noseclasses import KnownFailureException + + def knownfailer(*args, **kwargs): + if fail_val(): + raise KnownFailureException(msg) + else: + return f(*args, **kwargs) + return nose.tools.make_decorator(f)(knownfailer) + + return knownfail_decorator + +def deprecated(conditional=True): + """ + .. deprecated:: 1.21 + This decorator is retained for compatibility with the nose testing framework, which is being phased out. + Please use the nose2 or pytest frameworks instead. + + Filter deprecation warnings while running the test suite. + + This decorator can be used to filter DeprecationWarning's, to avoid + printing them during the test suite run, while checking that the test + actually raises a DeprecationWarning. + + Parameters + ---------- + conditional : bool or callable, optional + Flag to determine whether to mark test as deprecated or not. If the + condition is a callable, it is used at runtime to dynamically make the + decision. Default is True. + + Returns + ------- + decorator : function + The `deprecated` decorator itself. + + Notes + ----- + .. versionadded:: 1.4.0 + + """ + def deprecate_decorator(f): + # Local import to avoid a hard nose dependency and only incur the + # import time overhead at actual test-time. + import nose + + # Numpy 1.21, 2020-12-20 + warnings.warn('the np.testing.dec decorators are included for nose support, and are ' + 'deprecated since NumPy v1.21. Use the nose2 or pytest frameworks instead.', DeprecationWarning, stacklevel=2) + + def _deprecated_imp(*args, **kwargs): + # Poor man's replacement for the with statement + with assert_warns(DeprecationWarning): + f(*args, **kwargs) + + if isinstance(conditional, collections.abc.Callable): + cond = conditional() + else: + cond = conditional + if cond: + return nose.tools.make_decorator(f)(_deprecated_imp) + else: + return f + return deprecate_decorator + + +def parametrize(vars, input): + """ + .. deprecated:: 1.21 + This decorator is retained for compatibility with the nose testing framework, which is being phased out. + Please use the nose2 or pytest frameworks instead. + + Pytest compatibility class. This implements the simplest level of + pytest.mark.parametrize for use in nose as an aid in making the transition + to pytest. It achieves that by adding a dummy var parameter and ignoring + the doc_func parameter of the base class. It does not support variable + substitution by name, nor does it support nesting or classes. See the + pytest documentation for usage. + + .. versionadded:: 1.14.0 + + """ + from .parameterized import parameterized + + # Numpy 1.21, 2020-12-20 + warnings.warn('the np.testing.dec decorators are included for nose support, and are ' + 'deprecated since NumPy v1.21. Use the nose2 or pytest frameworks instead.', DeprecationWarning, stacklevel=2) + + return parameterized(input) + +_needs_refcount = skipif(not HAS_REFCOUNT, "python has no sys.getrefcount") diff --git a/.local/lib/python3.8/site-packages/numpy/testing/_private/extbuild.py b/.local/lib/python3.8/site-packages/numpy/testing/_private/extbuild.py new file mode 100644 index 0000000..fc39163 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/testing/_private/extbuild.py @@ -0,0 +1,251 @@ +""" +Build a c-extension module on-the-fly in tests. +See build_and_import_extensions for usage hints + +""" + +import os +import pathlib +import sys +import sysconfig + +__all__ = ['build_and_import_extension', 'compile_extension_module'] + + +def build_and_import_extension( + modname, functions, *, prologue="", build_dir=None, + include_dirs=[], more_init=""): + """ + Build and imports a c-extension module `modname` from a list of function + fragments `functions`. + + + Parameters + ---------- + functions : list of fragments + Each fragment is a sequence of func_name, calling convention, snippet. + prologue : string + Code to preceed the rest, usually extra ``#include`` or ``#define`` + macros. + build_dir : pathlib.Path + Where to build the module, usually a temporary directory + include_dirs : list + Extra directories to find include files when compiling + more_init : string + Code to appear in the module PyMODINIT_FUNC + + Returns + ------- + out: module + The module will have been loaded and is ready for use + + Examples + -------- + >>> functions = [("test_bytes", "METH_O", \"\"\" + if ( !PyBytesCheck(args)) { + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; + \"\"\")] + >>> mod = build_and_import_extension("testme", functions) + >>> assert not mod.test_bytes(u'abc') + >>> assert mod.test_bytes(b'abc') + """ + from distutils.errors import CompileError + + body = prologue + _make_methods(functions, modname) + init = """PyObject *mod = PyModule_Create(&moduledef); + """ + if not build_dir: + build_dir = pathlib.Path('.') + if more_init: + init += """#define INITERROR return NULL + """ + init += more_init + init += "\nreturn mod;" + source_string = _make_source(modname, init, body) + try: + mod_so = compile_extension_module( + modname, build_dir, include_dirs, source_string) + except CompileError as e: + # shorten the exception chain + raise RuntimeError(f"could not compile in {build_dir}:") from e + import importlib.util + spec = importlib.util.spec_from_file_location(modname, mod_so) + foo = importlib.util.module_from_spec(spec) + spec.loader.exec_module(foo) + return foo + + +def compile_extension_module( + name, builddir, include_dirs, + source_string, libraries=[], library_dirs=[]): + """ + Build an extension module and return the filename of the resulting + native code file. + + Parameters + ---------- + name : string + name of the module, possibly including dots if it is a module inside a + package. + builddir : pathlib.Path + Where to build the module, usually a temporary directory + include_dirs : list + Extra directories to find include files when compiling + libraries : list + Libraries to link into the extension module + library_dirs: list + Where to find the libraries, ``-L`` passed to the linker + """ + modname = name.split('.')[-1] + dirname = builddir / name + dirname.mkdir(exist_ok=True) + cfile = _convert_str_to_file(source_string, dirname) + include_dirs = [sysconfig.get_config_var('INCLUDEPY')] + include_dirs + + return _c_compile( + cfile, outputfilename=dirname / modname, + include_dirs=include_dirs, libraries=[], library_dirs=[], + ) + + +def _convert_str_to_file(source, dirname): + """Helper function to create a file ``source.c`` in `dirname` that contains + the string in `source`. Returns the file name + """ + filename = dirname / 'source.c' + with filename.open('w') as f: + f.write(str(source)) + return filename + + +def _make_methods(functions, modname): + """ Turns the name, signature, code in functions into complete functions + and lists them in a methods_table. Then turns the methods_table into a + ``PyMethodDef`` structure and returns the resulting code fragment ready + for compilation + """ + methods_table = [] + codes = [] + for funcname, flags, code in functions: + cfuncname = "%s_%s" % (modname, funcname) + if 'METH_KEYWORDS' in flags: + signature = '(PyObject *self, PyObject *args, PyObject *kwargs)' + else: + signature = '(PyObject *self, PyObject *args)' + methods_table.append( + "{\"%s\", (PyCFunction)%s, %s}," % (funcname, cfuncname, flags)) + func_code = """ + static PyObject* {cfuncname}{signature} + {{ + {code} + }} + """.format(cfuncname=cfuncname, signature=signature, code=code) + codes.append(func_code) + + body = "\n".join(codes) + """ + static PyMethodDef methods[] = { + %(methods)s + { NULL } + }; + static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "%(modname)s", /* m_name */ + NULL, /* m_doc */ + -1, /* m_size */ + methods, /* m_methods */ + }; + """ % dict(methods='\n'.join(methods_table), modname=modname) + return body + + +def _make_source(name, init, body): + """ Combines the code fragments into source code ready to be compiled + """ + code = """ + #include + + %(body)s + + PyMODINIT_FUNC + PyInit_%(name)s(void) { + %(init)s + } + """ % dict( + name=name, init=init, body=body, + ) + return code + + +def _c_compile(cfile, outputfilename, include_dirs=[], libraries=[], + library_dirs=[]): + if sys.platform == 'win32': + compile_extra = ["/we4013"] + link_extra = ["/LIBPATH:" + os.path.join(sys.base_prefix, 'libs')] + elif sys.platform.startswith('linux'): + compile_extra = [ + "-O0", "-g", "-Werror=implicit-function-declaration", "-fPIC"] + link_extra = None + else: + compile_extra = link_extra = None + pass + if sys.platform == 'win32': + link_extra = link_extra + ['/DEBUG'] # generate .pdb file + if sys.platform == 'darwin': + # support Fink & Darwinports + for s in ('/sw/', '/opt/local/'): + if (s + 'include' not in include_dirs + and os.path.exists(s + 'include')): + include_dirs.append(s + 'include') + if s + 'lib' not in library_dirs and os.path.exists(s + 'lib'): + library_dirs.append(s + 'lib') + + outputfilename = outputfilename.with_suffix(get_so_suffix()) + saved_environ = os.environ.copy() + try: + build( + cfile, outputfilename, + compile_extra, link_extra, + include_dirs, libraries, library_dirs) + finally: + # workaround for a distutils bugs where some env vars can + # become longer and longer every time it is used + for key, value in saved_environ.items(): + if os.environ.get(key) != value: + os.environ[key] = value + return outputfilename + + +def build(cfile, outputfilename, compile_extra, link_extra, + include_dirs, libraries, library_dirs): + "cd into the directory where the cfile is, use distutils to build" + from numpy.distutils.ccompiler import new_compiler + + compiler = new_compiler(force=1, verbose=2) + compiler.customize('') + objects = [] + + old = os.getcwd() + os.chdir(cfile.parent) + try: + res = compiler.compile( + [str(cfile.name)], + include_dirs=include_dirs, + extra_preargs=compile_extra + ) + objects += [str(cfile.parent / r) for r in res] + finally: + os.chdir(old) + + compiler.link_shared_object( + objects, str(outputfilename), + libraries=libraries, + extra_preargs=link_extra, + library_dirs=library_dirs) + + +def get_so_suffix(): + ret = sysconfig.get_config_var('EXT_SUFFIX') + assert ret + return ret diff --git a/.local/lib/python3.8/site-packages/numpy/testing/_private/noseclasses.py b/.local/lib/python3.8/site-packages/numpy/testing/_private/noseclasses.py new file mode 100644 index 0000000..48fa4dc --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/testing/_private/noseclasses.py @@ -0,0 +1,364 @@ +# These classes implement a doctest runner plugin for nose, a "known failure" +# error class, and a customized TestProgram for NumPy. + +# Because this module imports nose directly, it should not +# be used except by nosetester.py to avoid a general NumPy +# dependency on nose. +import os +import sys +import doctest +import inspect + +import numpy +import nose +from nose.plugins import doctests as npd +from nose.plugins.errorclass import ErrorClass, ErrorClassPlugin +from nose.plugins.base import Plugin +from nose.util import src +from .nosetester import get_package_name +from .utils import KnownFailureException, KnownFailureTest + + +# Some of the classes in this module begin with 'Numpy' to clearly distinguish +# them from the plethora of very similar names from nose/unittest/doctest + +#----------------------------------------------------------------------------- +# Modified version of the one in the stdlib, that fixes a python bug (doctests +# not found in extension modules, https://bugs.python.org/issue3158) +class NumpyDocTestFinder(doctest.DocTestFinder): + + def _from_module(self, module, object): + """ + Return true if the given object is defined in the given + module. + """ + if module is None: + return True + elif inspect.isfunction(object): + return module.__dict__ is object.__globals__ + elif inspect.isbuiltin(object): + return module.__name__ == object.__module__ + elif inspect.isclass(object): + return module.__name__ == object.__module__ + elif inspect.ismethod(object): + # This one may be a bug in cython that fails to correctly set the + # __module__ attribute of methods, but since the same error is easy + # to make by extension code writers, having this safety in place + # isn't such a bad idea + return module.__name__ == object.__self__.__class__.__module__ + elif inspect.getmodule(object) is not None: + return module is inspect.getmodule(object) + elif hasattr(object, '__module__'): + return module.__name__ == object.__module__ + elif isinstance(object, property): + return True # [XX] no way not be sure. + else: + raise ValueError("object must be a class or function") + + def _find(self, tests, obj, name, module, source_lines, globs, seen): + """ + Find tests for the given object and any contained objects, and + add them to `tests`. + """ + + doctest.DocTestFinder._find(self, tests, obj, name, module, + source_lines, globs, seen) + + # Below we re-run pieces of the above method with manual modifications, + # because the original code is buggy and fails to correctly identify + # doctests in extension modules. + + # Local shorthands + from inspect import ( + isroutine, isclass, ismodule, isfunction, ismethod + ) + + # Look for tests in a module's contained objects. + if ismodule(obj) and self._recurse: + for valname, val in obj.__dict__.items(): + valname1 = f'{name}.{valname}' + if ( (isroutine(val) or isclass(val)) + and self._from_module(module, val)): + + self._find(tests, val, valname1, module, source_lines, + globs, seen) + + # Look for tests in a class's contained objects. + if isclass(obj) and self._recurse: + for valname, val in obj.__dict__.items(): + # Special handling for staticmethod/classmethod. + if isinstance(val, staticmethod): + val = getattr(obj, valname) + if isinstance(val, classmethod): + val = getattr(obj, valname).__func__ + + # Recurse to methods, properties, and nested classes. + if ((isfunction(val) or isclass(val) or + ismethod(val) or isinstance(val, property)) and + self._from_module(module, val)): + valname = f'{name}.{valname}' + self._find(tests, val, valname, module, source_lines, + globs, seen) + + +# second-chance checker; if the default comparison doesn't +# pass, then see if the expected output string contains flags that +# tell us to ignore the output +class NumpyOutputChecker(doctest.OutputChecker): + def check_output(self, want, got, optionflags): + ret = doctest.OutputChecker.check_output(self, want, got, + optionflags) + if not ret: + if "#random" in want: + return True + + # it would be useful to normalize endianness so that + # bigendian machines don't fail all the tests (and there are + # actually some bigendian examples in the doctests). Let's try + # making them all little endian + got = got.replace("'>", "'<") + want = want.replace("'>", "'<") + + # try to normalize out 32 and 64 bit default int sizes + for sz in [4, 8]: + got = got.replace("'>> np.testing.nosetester.get_package_name('nonsense') + 'numpy' + + """ + + fullpath = filepath[:] + pkg_name = [] + while 'site-packages' in filepath or 'dist-packages' in filepath: + filepath, p2 = os.path.split(filepath) + if p2 in ('site-packages', 'dist-packages'): + break + pkg_name.append(p2) + + # if package name determination failed, just default to numpy/scipy + if not pkg_name: + if 'scipy' in fullpath: + return 'scipy' + else: + return 'numpy' + + # otherwise, reverse to get correct order and return + pkg_name.reverse() + + # don't include the outer egg directory + if pkg_name[0].endswith('.egg'): + pkg_name.pop(0) + + return '.'.join(pkg_name) + + +def run_module_suite(file_to_run=None, argv=None): + """ + Run a test module. + + Equivalent to calling ``$ nosetests `` from + the command line + + Parameters + ---------- + file_to_run : str, optional + Path to test module, or None. + By default, run the module from which this function is called. + argv : list of strings + Arguments to be passed to the nose test runner. ``argv[0]`` is + ignored. All command line arguments accepted by ``nosetests`` + will work. If it is the default value None, sys.argv is used. + + .. versionadded:: 1.9.0 + + Examples + -------- + Adding the following:: + + if __name__ == "__main__" : + run_module_suite(argv=sys.argv) + + at the end of a test module will run the tests when that module is + called in the python interpreter. + + Alternatively, calling:: + + >>> run_module_suite(file_to_run="numpy/tests/test_matlib.py") # doctest: +SKIP + + from an interpreter will run all the test routine in 'test_matlib.py'. + """ + if file_to_run is None: + f = sys._getframe(1) + file_to_run = f.f_locals.get('__file__', None) + if file_to_run is None: + raise AssertionError + + if argv is None: + argv = sys.argv + [file_to_run] + else: + argv = argv + [file_to_run] + + nose = import_nose() + from .noseclasses import KnownFailurePlugin + nose.run(argv=argv, addplugins=[KnownFailurePlugin()]) + + +class NoseTester: + """ + Nose test runner. + + This class is made available as numpy.testing.Tester, and a test function + is typically added to a package's __init__.py like so:: + + from numpy.testing import Tester + test = Tester().test + + Calling this test function finds and runs all tests associated with the + package and all its sub-packages. + + Attributes + ---------- + package_path : str + Full path to the package to test. + package_name : str + Name of the package to test. + + Parameters + ---------- + package : module, str or None, optional + The package to test. If a string, this should be the full path to + the package. If None (default), `package` is set to the module from + which `NoseTester` is initialized. + raise_warnings : None, str or sequence of warnings, optional + This specifies which warnings to configure as 'raise' instead + of being shown once during the test execution. Valid strings are: + + - "develop" : equals ``(Warning,)`` + - "release" : equals ``()``, don't raise on any warnings. + + Default is "release". + depth : int, optional + If `package` is None, then this can be used to initialize from the + module of the caller of (the caller of (...)) the code that + initializes `NoseTester`. Default of 0 means the module of the + immediate caller; higher values are useful for utility routines that + want to initialize `NoseTester` objects on behalf of other code. + + """ + def __init__(self, package=None, raise_warnings="release", depth=0, + check_fpu_mode=False): + # Back-compat: 'None' used to mean either "release" or "develop" + # depending on whether this was a release or develop version of + # numpy. Those semantics were fine for testing numpy, but not so + # helpful for downstream projects like scipy that use + # numpy.testing. (They want to set this based on whether *they* are a + # release or develop version, not whether numpy is.) So we continue to + # accept 'None' for back-compat, but it's now just an alias for the + # default "release". + if raise_warnings is None: + raise_warnings = "release" + + package_name = None + if package is None: + f = sys._getframe(1 + depth) + package_path = f.f_locals.get('__file__', None) + if package_path is None: + raise AssertionError + package_path = os.path.dirname(package_path) + package_name = f.f_locals.get('__name__', None) + elif isinstance(package, type(os)): + package_path = os.path.dirname(package.__file__) + package_name = getattr(package, '__name__', None) + else: + package_path = str(package) + + self.package_path = package_path + + # Find the package name under test; this name is used to limit coverage + # reporting (if enabled). + if package_name is None: + package_name = get_package_name(package_path) + self.package_name = package_name + + # Set to "release" in constructor in maintenance branches. + self.raise_warnings = raise_warnings + + # Whether to check for FPU mode changes + self.check_fpu_mode = check_fpu_mode + + def _test_argv(self, label, verbose, extra_argv): + ''' Generate argv for nosetest command + + Parameters + ---------- + label : {'fast', 'full', '', attribute identifier}, optional + see ``test`` docstring + verbose : int, optional + Verbosity value for test outputs, in the range 1-10. Default is 1. + extra_argv : list, optional + List with any extra arguments to pass to nosetests. + + Returns + ------- + argv : list + command line arguments that will be passed to nose + ''' + argv = [__file__, self.package_path, '-s'] + if label and label != 'full': + if not isinstance(label, str): + raise TypeError('Selection label should be a string') + if label == 'fast': + label = 'not slow' + argv += ['-A', label] + argv += ['--verbosity', str(verbose)] + + # When installing with setuptools, and also in some other cases, the + # test_*.py files end up marked +x executable. Nose, by default, does + # not run files marked with +x as they might be scripts. However, in + # our case nose only looks for test_*.py files under the package + # directory, which should be safe. + argv += ['--exe'] + + if extra_argv: + argv += extra_argv + return argv + + def _show_system_info(self): + nose = import_nose() + + import numpy + print(f'NumPy version {numpy.__version__}') + relaxed_strides = numpy.ones((10, 1), order="C").flags.f_contiguous + print("NumPy relaxed strides checking option:", relaxed_strides) + npdir = os.path.dirname(numpy.__file__) + print(f'NumPy is installed in {npdir}') + + if 'scipy' in self.package_name: + import scipy + print(f'SciPy version {scipy.__version__}') + spdir = os.path.dirname(scipy.__file__) + print(f'SciPy is installed in {spdir}') + + pyversion = sys.version.replace('\n', '') + print(f'Python version {pyversion}') + print("nose version %d.%d.%d" % nose.__versioninfo__) + + def _get_custom_doctester(self): + """ Return instantiated plugin for doctests + + Allows subclassing of this class to override doctester + + A return value of None means use the nose builtin doctest plugin + """ + from .noseclasses import NumpyDoctest + return NumpyDoctest() + + def prepare_test_args(self, label='fast', verbose=1, extra_argv=None, + doctests=False, coverage=False, timer=False): + """ + Run tests for module using nose. + + This method does the heavy lifting for the `test` method. It takes all + the same arguments, for details see `test`. + + See Also + -------- + test + + """ + # fail with nice error message if nose is not present + import_nose() + # compile argv + argv = self._test_argv(label, verbose, extra_argv) + # our way of doing coverage + if coverage: + argv += [f'--cover-package={self.package_name}', '--with-coverage', + '--cover-tests', '--cover-erase'] + + if timer: + if timer is True: + argv += ['--with-timer'] + elif isinstance(timer, int): + argv += ['--with-timer', '--timer-top-n', str(timer)] + + # construct list of plugins + import nose.plugins.builtin + from nose.plugins import EntryPointPluginManager + from .noseclasses import (KnownFailurePlugin, Unplugger, + FPUModeCheckPlugin) + plugins = [KnownFailurePlugin()] + plugins += [p() for p in nose.plugins.builtin.plugins] + if self.check_fpu_mode: + plugins += [FPUModeCheckPlugin()] + argv += ["--with-fpumodecheckplugin"] + try: + # External plugins (like nose-timer) + entrypoint_manager = EntryPointPluginManager() + entrypoint_manager.loadPlugins() + plugins += [p for p in entrypoint_manager.plugins] + except ImportError: + # Relies on pkg_resources, not a hard dependency + pass + + # add doctesting if required + doctest_argv = '--with-doctest' in argv + if doctests == False and doctest_argv: + doctests = True + plug = self._get_custom_doctester() + if plug is None: + # use standard doctesting + if doctests and not doctest_argv: + argv += ['--with-doctest'] + else: # custom doctesting + if doctest_argv: # in fact the unplugger would take care of this + argv.remove('--with-doctest') + plugins += [Unplugger('doctest'), plug] + if doctests: + argv += ['--with-' + plug.name] + return argv, plugins + + def test(self, label='fast', verbose=1, extra_argv=None, + doctests=False, coverage=False, raise_warnings=None, + timer=False): + """ + Run tests for module using nose. + + Parameters + ---------- + label : {'fast', 'full', '', attribute identifier}, optional + Identifies the tests to run. This can be a string to pass to + the nosetests executable with the '-A' option, or one of several + special values. Special values are: + + * 'fast' - the default - which corresponds to the ``nosetests -A`` + option of 'not slow'. + * 'full' - fast (as above) and slow tests as in the + 'no -A' option to nosetests - this is the same as ''. + * None or '' - run all tests. + * attribute_identifier - string passed directly to nosetests as '-A'. + + verbose : int, optional + Verbosity value for test outputs, in the range 1-10. Default is 1. + extra_argv : list, optional + List with any extra arguments to pass to nosetests. + doctests : bool, optional + If True, run doctests in module. Default is False. + coverage : bool, optional + If True, report coverage of NumPy code. Default is False. + (This requires the + `coverage module `_). + raise_warnings : None, str or sequence of warnings, optional + This specifies which warnings to configure as 'raise' instead + of being shown once during the test execution. Valid strings are: + + * "develop" : equals ``(Warning,)`` + * "release" : equals ``()``, do not raise on any warnings. + timer : bool or int, optional + Timing of individual tests with ``nose-timer`` (which needs to be + installed). If True, time tests and report on all of them. + If an integer (say ``N``), report timing results for ``N`` slowest + tests. + + Returns + ------- + result : object + Returns the result of running the tests as a + ``nose.result.TextTestResult`` object. + + Notes + ----- + Each NumPy module exposes `test` in its namespace to run all tests for it. + For example, to run all tests for numpy.lib: + + >>> np.lib.test() #doctest: +SKIP + + Examples + -------- + >>> result = np.lib.test() #doctest: +SKIP + Running unit tests for numpy.lib + ... + Ran 976 tests in 3.933s + + OK + + >>> result.errors #doctest: +SKIP + [] + >>> result.knownfail #doctest: +SKIP + [] + """ + + # cap verbosity at 3 because nose becomes *very* verbose beyond that + verbose = min(verbose, 3) + + from . import utils + utils.verbose = verbose + + argv, plugins = self.prepare_test_args( + label, verbose, extra_argv, doctests, coverage, timer) + + if doctests: + print(f'Running unit tests and doctests for {self.package_name}') + else: + print(f'Running unit tests for {self.package_name}') + + self._show_system_info() + + # reset doctest state on every run + import doctest + doctest.master = None + + if raise_warnings is None: + raise_warnings = self.raise_warnings + + _warn_opts = dict(develop=(Warning,), + release=()) + if isinstance(raise_warnings, str): + raise_warnings = _warn_opts[raise_warnings] + + with suppress_warnings("location") as sup: + # Reset the warning filters to the default state, + # so that running the tests is more repeatable. + warnings.resetwarnings() + # Set all warnings to 'warn', this is because the default 'once' + # has the bad property of possibly shadowing later warnings. + warnings.filterwarnings('always') + # Force the requested warnings to raise + for warningtype in raise_warnings: + warnings.filterwarnings('error', category=warningtype) + # Filter out annoying import messages. + sup.filter(message='Not importing directory') + sup.filter(message="numpy.dtype size changed") + sup.filter(message="numpy.ufunc size changed") + sup.filter(category=np.ModuleDeprecationWarning) + # Filter out boolean '-' deprecation messages. This allows + # older versions of scipy to test without a flood of messages. + sup.filter(message=".*boolean negative.*") + sup.filter(message=".*boolean subtract.*") + # Filter out distutils cpu warnings (could be localized to + # distutils tests). ASV has problems with top level import, + # so fetch module for suppression here. + with warnings.catch_warnings(): + warnings.simplefilter("always") + from ...distutils import cpuinfo + sup.filter(category=UserWarning, module=cpuinfo) + # Filter out some deprecation warnings inside nose 1.3.7 when run + # on python 3.5b2. See + # https://github.com/nose-devs/nose/issues/929 + # Note: it is hard to filter based on module for sup (lineno could + # be implemented). + warnings.filterwarnings("ignore", message=".*getargspec.*", + category=DeprecationWarning, + module=r"nose\.") + + from .noseclasses import NumpyTestProgram + + t = NumpyTestProgram(argv=argv, exit=False, plugins=plugins) + + return t.result + + def bench(self, label='fast', verbose=1, extra_argv=None): + """ + Run benchmarks for module using nose. + + Parameters + ---------- + label : {'fast', 'full', '', attribute identifier}, optional + Identifies the benchmarks to run. This can be a string to pass to + the nosetests executable with the '-A' option, or one of several + special values. Special values are: + + * 'fast' - the default - which corresponds to the ``nosetests -A`` + option of 'not slow'. + * 'full' - fast (as above) and slow benchmarks as in the + 'no -A' option to nosetests - this is the same as ''. + * None or '' - run all tests. + * attribute_identifier - string passed directly to nosetests as '-A'. + + verbose : int, optional + Verbosity value for benchmark outputs, in the range 1-10. Default is 1. + extra_argv : list, optional + List with any extra arguments to pass to nosetests. + + Returns + ------- + success : bool + Returns True if running the benchmarks works, False if an error + occurred. + + Notes + ----- + Benchmarks are like tests, but have names starting with "bench" instead + of "test", and can be found under the "benchmarks" sub-directory of the + module. + + Each NumPy module exposes `bench` in its namespace to run all benchmarks + for it. + + Examples + -------- + >>> success = np.lib.bench() #doctest: +SKIP + Running benchmarks for numpy.lib + ... + using 562341 items: + unique: + 0.11 + unique1d: + 0.11 + ratio: 1.0 + nUnique: 56230 == 56230 + ... + OK + + >>> success #doctest: +SKIP + True + + """ + + print(f'Running benchmarks for {self.package_name}') + self._show_system_info() + + argv = self._test_argv(label, verbose, extra_argv) + argv += ['--match', r'(?:^|[\\b_\\.%s-])[Bb]ench' % os.sep] + + # import nose or make informative error + nose = import_nose() + + # get plugin to disable doctests + from .noseclasses import Unplugger + add_plugins = [Unplugger('doctest')] + + return nose.run(argv=argv, addplugins=add_plugins) + + +def _numpy_tester(): + if hasattr(np, "__version__") and ".dev0" in np.__version__: + mode = "develop" + else: + mode = "release" + return NoseTester(raise_warnings=mode, depth=1, + check_fpu_mode=True) diff --git a/.local/lib/python3.8/site-packages/numpy/testing/_private/parameterized.py b/.local/lib/python3.8/site-packages/numpy/testing/_private/parameterized.py new file mode 100644 index 0000000..db9629a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/testing/_private/parameterized.py @@ -0,0 +1,432 @@ +""" +tl;dr: all code code is licensed under simplified BSD, unless stated otherwise. + +Unless stated otherwise in the source files, all code is copyright 2010 David +Wolever . All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of David Wolever. + +""" +import re +import inspect +import warnings +from functools import wraps +from types import MethodType +from collections import namedtuple + +from unittest import TestCase + +_param = namedtuple("param", "args kwargs") + +class param(_param): + """ Represents a single parameter to a test case. + + For example:: + + >>> p = param("foo", bar=16) + >>> p + param("foo", bar=16) + >>> p.args + ('foo', ) + >>> p.kwargs + {'bar': 16} + + Intended to be used as an argument to ``@parameterized``:: + + @parameterized([ + param("foo", bar=16), + ]) + def test_stuff(foo, bar=16): + pass + """ + + def __new__(cls, *args , **kwargs): + return _param.__new__(cls, args, kwargs) + + @classmethod + def explicit(cls, args=None, kwargs=None): + """ Creates a ``param`` by explicitly specifying ``args`` and + ``kwargs``:: + + >>> param.explicit([1,2,3]) + param(*(1, 2, 3)) + >>> param.explicit(kwargs={"foo": 42}) + param(*(), **{"foo": "42"}) + """ + args = args or () + kwargs = kwargs or {} + return cls(*args, **kwargs) + + @classmethod + def from_decorator(cls, args): + """ Returns an instance of ``param()`` for ``@parameterized`` argument + ``args``:: + + >>> param.from_decorator((42, )) + param(args=(42, ), kwargs={}) + >>> param.from_decorator("foo") + param(args=("foo", ), kwargs={}) + """ + if isinstance(args, param): + return args + elif isinstance(args, (str,)): + args = (args, ) + try: + return cls(*args) + except TypeError as e: + if "after * must be" not in str(e): + raise + raise TypeError( + "Parameters must be tuples, but %r is not (hint: use '(%r, )')" + %(args, args), + ) + + def __repr__(self): + return "param(*%r, **%r)" %self + + +def parameterized_argument_value_pairs(func, p): + """Return tuples of parameterized arguments and their values. + + This is useful if you are writing your own doc_func + function and need to know the values for each parameter name:: + + >>> def func(a, foo=None, bar=42, **kwargs): pass + >>> p = param(1, foo=7, extra=99) + >>> parameterized_argument_value_pairs(func, p) + [("a", 1), ("foo", 7), ("bar", 42), ("**kwargs", {"extra": 99})] + + If the function's first argument is named ``self`` then it will be + ignored:: + + >>> def func(self, a): pass + >>> p = param(1) + >>> parameterized_argument_value_pairs(func, p) + [("a", 1)] + + Additionally, empty ``*args`` or ``**kwargs`` will be ignored:: + + >>> def func(foo, *args): pass + >>> p = param(1) + >>> parameterized_argument_value_pairs(func, p) + [("foo", 1)] + >>> p = param(1, 16) + >>> parameterized_argument_value_pairs(func, p) + [("foo", 1), ("*args", (16, ))] + """ + argspec = inspect.getargspec(func) + arg_offset = 1 if argspec.args[:1] == ["self"] else 0 + + named_args = argspec.args[arg_offset:] + + result = list(zip(named_args, p.args)) + named_args = argspec.args[len(result) + arg_offset:] + varargs = p.args[len(result):] + + result.extend([ + (name, p.kwargs.get(name, default)) + for (name, default) + in zip(named_args, argspec.defaults or []) + ]) + + seen_arg_names = {n for (n, _) in result} + keywords = dict(sorted([ + (name, p.kwargs[name]) + for name in p.kwargs + if name not in seen_arg_names + ])) + + if varargs: + result.append(("*%s" %(argspec.varargs, ), tuple(varargs))) + + if keywords: + result.append(("**%s" %(argspec.keywords, ), keywords)) + + return result + +def short_repr(x, n=64): + """ A shortened repr of ``x`` which is guaranteed to be ``unicode``:: + + >>> short_repr("foo") + u"foo" + >>> short_repr("123456789", n=4) + u"12...89" + """ + + x_repr = repr(x) + if isinstance(x_repr, bytes): + try: + x_repr = str(x_repr, "utf-8") + except UnicodeDecodeError: + x_repr = str(x_repr, "latin1") + if len(x_repr) > n: + x_repr = x_repr[:n//2] + "..." + x_repr[len(x_repr) - n//2:] + return x_repr + +def default_doc_func(func, num, p): + if func.__doc__ is None: + return None + + all_args_with_values = parameterized_argument_value_pairs(func, p) + + # Assumes that the function passed is a bound method. + descs = [f'{n}={short_repr(v)}' for n, v in all_args_with_values] + + # The documentation might be a multiline string, so split it + # and just work with the first string, ignoring the period + # at the end if there is one. + first, nl, rest = func.__doc__.lstrip().partition("\n") + suffix = "" + if first.endswith("."): + suffix = "." + first = first[:-1] + args = "%s[with %s]" %(len(first) and " " or "", ", ".join(descs)) + return "".join([first.rstrip(), args, suffix, nl, rest]) + +def default_name_func(func, num, p): + base_name = func.__name__ + name_suffix = "_%s" %(num, ) + if len(p.args) > 0 and isinstance(p.args[0], (str,)): + name_suffix += "_" + parameterized.to_safe_name(p.args[0]) + return base_name + name_suffix + + +# force nose for numpy purposes. +_test_runner_override = 'nose' +_test_runner_guess = False +_test_runners = set(["unittest", "unittest2", "nose", "nose2", "pytest"]) +_test_runner_aliases = { + "_pytest": "pytest", +} + +def set_test_runner(name): + global _test_runner_override + if name not in _test_runners: + raise TypeError( + "Invalid test runner: %r (must be one of: %s)" + %(name, ", ".join(_test_runners)), + ) + _test_runner_override = name + +def detect_runner(): + """ Guess which test runner we're using by traversing the stack and looking + for the first matching module. This *should* be reasonably safe, as + it's done during test discovery where the test runner should be the + stack frame immediately outside. """ + if _test_runner_override is not None: + return _test_runner_override + global _test_runner_guess + if _test_runner_guess is False: + stack = inspect.stack() + for record in reversed(stack): + frame = record[0] + module = frame.f_globals.get("__name__").partition(".")[0] + if module in _test_runner_aliases: + module = _test_runner_aliases[module] + if module in _test_runners: + _test_runner_guess = module + break + else: + _test_runner_guess = None + return _test_runner_guess + +class parameterized: + """ Parameterize a test case:: + + class TestInt: + @parameterized([ + ("A", 10), + ("F", 15), + param("10", 42, base=42) + ]) + def test_int(self, input, expected, base=16): + actual = int(input, base=base) + assert_equal(actual, expected) + + @parameterized([ + (2, 3, 5) + (3, 5, 8), + ]) + def test_add(a, b, expected): + assert_equal(a + b, expected) + """ + + def __init__(self, input, doc_func=None): + self.get_input = self.input_as_callable(input) + self.doc_func = doc_func or default_doc_func + + def __call__(self, test_func): + self.assert_not_in_testcase_subclass() + + @wraps(test_func) + def wrapper(test_self=None): + test_cls = test_self and type(test_self) + + original_doc = wrapper.__doc__ + for num, args in enumerate(wrapper.parameterized_input): + p = param.from_decorator(args) + unbound_func, nose_tuple = self.param_as_nose_tuple(test_self, test_func, num, p) + try: + wrapper.__doc__ = nose_tuple[0].__doc__ + # Nose uses `getattr(instance, test_func.__name__)` to get + # a method bound to the test instance (as opposed to a + # method bound to the instance of the class created when + # tests were being enumerated). Set a value here to make + # sure nose can get the correct test method. + if test_self is not None: + setattr(test_cls, test_func.__name__, unbound_func) + yield nose_tuple + finally: + if test_self is not None: + delattr(test_cls, test_func.__name__) + wrapper.__doc__ = original_doc + wrapper.parameterized_input = self.get_input() + wrapper.parameterized_func = test_func + test_func.__name__ = "_parameterized_original_%s" %(test_func.__name__, ) + return wrapper + + def param_as_nose_tuple(self, test_self, func, num, p): + nose_func = wraps(func)(lambda *args: func(*args[:-1], **args[-1])) + nose_func.__doc__ = self.doc_func(func, num, p) + # Track the unbound function because we need to setattr the unbound + # function onto the class for nose to work (see comments above), and + # Python 3 doesn't let us pull the function out of a bound method. + unbound_func = nose_func + if test_self is not None: + nose_func = MethodType(nose_func, test_self) + return unbound_func, (nose_func, ) + p.args + (p.kwargs or {}, ) + + def assert_not_in_testcase_subclass(self): + parent_classes = self._terrible_magic_get_defining_classes() + if any(issubclass(cls, TestCase) for cls in parent_classes): + raise Exception("Warning: '@parameterized' tests won't work " + "inside subclasses of 'TestCase' - use " + "'@parameterized.expand' instead.") + + def _terrible_magic_get_defining_classes(self): + """ Returns the list of parent classes of the class currently being defined. + Will likely only work if called from the ``parameterized`` decorator. + This function is entirely @brandon_rhodes's fault, as he suggested + the implementation: http://stackoverflow.com/a/8793684/71522 + """ + stack = inspect.stack() + if len(stack) <= 4: + return [] + frame = stack[4] + code_context = frame[4] and frame[4][0].strip() + if not (code_context and code_context.startswith("class ")): + return [] + _, _, parents = code_context.partition("(") + parents, _, _ = parents.partition(")") + return eval("[" + parents + "]", frame[0].f_globals, frame[0].f_locals) + + @classmethod + def input_as_callable(cls, input): + if callable(input): + return lambda: cls.check_input_values(input()) + input_values = cls.check_input_values(input) + return lambda: input_values + + @classmethod + def check_input_values(cls, input_values): + # Explicitly convert non-list inputs to a list so that: + # 1. A helpful exception will be raised if they aren't iterable, and + # 2. Generators are unwrapped exactly once (otherwise `nosetests + # --processes=n` has issues; see: + # https://github.com/wolever/nose-parameterized/pull/31) + if not isinstance(input_values, list): + input_values = list(input_values) + return [ param.from_decorator(p) for p in input_values ] + + @classmethod + def expand(cls, input, name_func=None, doc_func=None, **legacy): + """ A "brute force" method of parameterizing test cases. Creates new + test cases and injects them into the namespace that the wrapped + function is being defined in. Useful for parameterizing tests in + subclasses of 'UnitTest', where Nose test generators don't work. + + >>> @parameterized.expand([("foo", 1, 2)]) + ... def test_add1(name, input, expected): + ... actual = add1(input) + ... assert_equal(actual, expected) + ... + >>> locals() + ... 'test_add1_foo_0': ... + >>> + """ + + if "testcase_func_name" in legacy: + warnings.warn("testcase_func_name= is deprecated; use name_func=", + DeprecationWarning, stacklevel=2) + if not name_func: + name_func = legacy["testcase_func_name"] + + if "testcase_func_doc" in legacy: + warnings.warn("testcase_func_doc= is deprecated; use doc_func=", + DeprecationWarning, stacklevel=2) + if not doc_func: + doc_func = legacy["testcase_func_doc"] + + doc_func = doc_func or default_doc_func + name_func = name_func or default_name_func + + def parameterized_expand_wrapper(f, instance=None): + stack = inspect.stack() + frame = stack[1] + frame_locals = frame[0].f_locals + + parameters = cls.input_as_callable(input)() + for num, p in enumerate(parameters): + name = name_func(f, num, p) + frame_locals[name] = cls.param_as_standalone_func(p, f, name) + frame_locals[name].__doc__ = doc_func(f, num, p) + + f.__test__ = False + return parameterized_expand_wrapper + + @classmethod + def param_as_standalone_func(cls, p, func, name): + @wraps(func) + def standalone_func(*a): + return func(*(a + p.args), **p.kwargs) + standalone_func.__name__ = name + + # place_as is used by py.test to determine what source file should be + # used for this test. + standalone_func.place_as = func + + # Remove __wrapped__ because py.test will try to look at __wrapped__ + # to determine which parameters should be used with this test case, + # and obviously we don't need it to do any parameterization. + try: + del standalone_func.__wrapped__ + except AttributeError: + pass + return standalone_func + + @classmethod + def to_safe_name(cls, s): + return str(re.sub("[^a-zA-Z0-9_]+", "_", s)) diff --git a/.local/lib/python3.8/site-packages/numpy/testing/_private/utils.py b/.local/lib/python3.8/site-packages/numpy/testing/_private/utils.py new file mode 100644 index 0000000..0eb945d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/testing/_private/utils.py @@ -0,0 +1,2520 @@ +""" +Utility function to facilitate testing. + +""" +import os +import sys +import platform +import re +import gc +import operator +import warnings +from functools import partial, wraps +import shutil +import contextlib +from tempfile import mkdtemp, mkstemp +from unittest.case import SkipTest +from warnings import WarningMessage +import pprint + +import numpy as np +from numpy.core import( + intp, float32, empty, arange, array_repr, ndarray, isnat, array) +import numpy.linalg.lapack_lite + +from io import StringIO + +__all__ = [ + 'assert_equal', 'assert_almost_equal', 'assert_approx_equal', + 'assert_array_equal', 'assert_array_less', 'assert_string_equal', + 'assert_array_almost_equal', 'assert_raises', 'build_err_msg', + 'decorate_methods', 'jiffies', 'memusage', 'print_assert_equal', + 'raises', 'rundocs', 'runstring', 'verbose', 'measure', + 'assert_', 'assert_array_almost_equal_nulp', 'assert_raises_regex', + 'assert_array_max_ulp', 'assert_warns', 'assert_no_warnings', + 'assert_allclose', 'IgnoreException', 'clear_and_catch_warnings', + 'SkipTest', 'KnownFailureException', 'temppath', 'tempdir', 'IS_PYPY', + 'HAS_REFCOUNT', 'suppress_warnings', 'assert_array_compare', + 'assert_no_gc_cycles', 'break_cycles', 'HAS_LAPACK64', 'IS_PYSTON', + ] + + +class KnownFailureException(Exception): + '''Raise this exception to mark a test as a known failing test.''' + pass + + +KnownFailureTest = KnownFailureException # backwards compat +verbose = 0 + +IS_PYPY = platform.python_implementation() == 'PyPy' +IS_PYSTON = hasattr(sys, "pyston_version_info") +HAS_REFCOUNT = getattr(sys, 'getrefcount', None) is not None and not IS_PYSTON +HAS_LAPACK64 = numpy.linalg.lapack_lite._ilp64 + + +def import_nose(): + """ Import nose only when needed. + """ + nose_is_good = True + minimum_nose_version = (1, 0, 0) + try: + import nose + except ImportError: + nose_is_good = False + else: + if nose.__versioninfo__ < minimum_nose_version: + nose_is_good = False + + if not nose_is_good: + msg = ('Need nose >= %d.%d.%d for tests - see ' + 'https://nose.readthedocs.io' % + minimum_nose_version) + raise ImportError(msg) + + return nose + + +def assert_(val, msg=''): + """ + Assert that works in release mode. + Accepts callable msg to allow deferring evaluation until failure. + + The Python built-in ``assert`` does not work when executing code in + optimized mode (the ``-O`` flag) - no byte-code is generated for it. + + For documentation on usage, refer to the Python documentation. + + """ + __tracebackhide__ = True # Hide traceback for py.test + if not val: + try: + smsg = msg() + except TypeError: + smsg = msg + raise AssertionError(smsg) + + +def gisnan(x): + """like isnan, but always raise an error if type not supported instead of + returning a TypeError object. + + Notes + ----- + isnan and other ufunc sometimes return a NotImplementedType object instead + of raising any exception. This function is a wrapper to make sure an + exception is always raised. + + This should be removed once this problem is solved at the Ufunc level.""" + from numpy.core import isnan + st = isnan(x) + if isinstance(st, type(NotImplemented)): + raise TypeError("isnan not supported for this type") + return st + + +def gisfinite(x): + """like isfinite, but always raise an error if type not supported instead + of returning a TypeError object. + + Notes + ----- + isfinite and other ufunc sometimes return a NotImplementedType object + instead of raising any exception. This function is a wrapper to make sure + an exception is always raised. + + This should be removed once this problem is solved at the Ufunc level.""" + from numpy.core import isfinite, errstate + with errstate(invalid='ignore'): + st = isfinite(x) + if isinstance(st, type(NotImplemented)): + raise TypeError("isfinite not supported for this type") + return st + + +def gisinf(x): + """like isinf, but always raise an error if type not supported instead of + returning a TypeError object. + + Notes + ----- + isinf and other ufunc sometimes return a NotImplementedType object instead + of raising any exception. This function is a wrapper to make sure an + exception is always raised. + + This should be removed once this problem is solved at the Ufunc level.""" + from numpy.core import isinf, errstate + with errstate(invalid='ignore'): + st = isinf(x) + if isinstance(st, type(NotImplemented)): + raise TypeError("isinf not supported for this type") + return st + + +if os.name == 'nt': + # Code "stolen" from enthought/debug/memusage.py + def GetPerformanceAttributes(object, counter, instance=None, + inum=-1, format=None, machine=None): + # NOTE: Many counters require 2 samples to give accurate results, + # including "% Processor Time" (as by definition, at any instant, a + # thread's CPU usage is either 0 or 100). To read counters like this, + # you should copy this function, but keep the counter open, and call + # CollectQueryData() each time you need to know. + # See http://msdn.microsoft.com/library/en-us/dnperfmo/html/perfmonpt2.asp (dead link) + # My older explanation for this was that the "AddCounter" process + # forced the CPU to 100%, but the above makes more sense :) + import win32pdh + if format is None: + format = win32pdh.PDH_FMT_LONG + path = win32pdh.MakeCounterPath( (machine, object, instance, None, + inum, counter)) + hq = win32pdh.OpenQuery() + try: + hc = win32pdh.AddCounter(hq, path) + try: + win32pdh.CollectQueryData(hq) + type, val = win32pdh.GetFormattedCounterValue(hc, format) + return val + finally: + win32pdh.RemoveCounter(hc) + finally: + win32pdh.CloseQuery(hq) + + def memusage(processName="python", instance=0): + # from win32pdhutil, part of the win32all package + import win32pdh + return GetPerformanceAttributes("Process", "Virtual Bytes", + processName, instance, + win32pdh.PDH_FMT_LONG, None) +elif sys.platform[:5] == 'linux': + + def memusage(_proc_pid_stat=f'/proc/{os.getpid()}/stat'): + """ + Return virtual memory size in bytes of the running python. + + """ + try: + with open(_proc_pid_stat, 'r') as f: + l = f.readline().split(' ') + return int(l[22]) + except Exception: + return +else: + def memusage(): + """ + Return memory usage of running python. [Not implemented] + + """ + raise NotImplementedError + + +if sys.platform[:5] == 'linux': + def jiffies(_proc_pid_stat=f'/proc/{os.getpid()}/stat', _load_time=[]): + """ + Return number of jiffies elapsed. + + Return number of jiffies (1/100ths of a second) that this + process has been scheduled in user mode. See man 5 proc. + + """ + import time + if not _load_time: + _load_time.append(time.time()) + try: + with open(_proc_pid_stat, 'r') as f: + l = f.readline().split(' ') + return int(l[13]) + except Exception: + return int(100*(time.time()-_load_time[0])) +else: + # os.getpid is not in all platforms available. + # Using time is safe but inaccurate, especially when process + # was suspended or sleeping. + def jiffies(_load_time=[]): + """ + Return number of jiffies elapsed. + + Return number of jiffies (1/100ths of a second) that this + process has been scheduled in user mode. See man 5 proc. + + """ + import time + if not _load_time: + _load_time.append(time.time()) + return int(100*(time.time()-_load_time[0])) + + +def build_err_msg(arrays, err_msg, header='Items are not equal:', + verbose=True, names=('ACTUAL', 'DESIRED'), precision=8): + msg = ['\n' + header] + if err_msg: + if err_msg.find('\n') == -1 and len(err_msg) < 79-len(header): + msg = [msg[0] + ' ' + err_msg] + else: + msg.append(err_msg) + if verbose: + for i, a in enumerate(arrays): + + if isinstance(a, ndarray): + # precision argument is only needed if the objects are ndarrays + r_func = partial(array_repr, precision=precision) + else: + r_func = repr + + try: + r = r_func(a) + except Exception as exc: + r = f'[repr failed for <{type(a).__name__}>: {exc}]' + if r.count('\n') > 3: + r = '\n'.join(r.splitlines()[:3]) + r += '...' + msg.append(f' {names[i]}: {r}') + return '\n'.join(msg) + + +def assert_equal(actual, desired, err_msg='', verbose=True): + """ + Raises an AssertionError if two objects are not equal. + + Given two objects (scalars, lists, tuples, dictionaries or numpy arrays), + check that all elements of these objects are equal. An exception is raised + at the first conflicting values. + + When one of `actual` and `desired` is a scalar and the other is array_like, + the function checks that each element of the array_like object is equal to + the scalar. + + This function handles NaN comparisons as if NaN was a "normal" number. + That is, AssertionError is not raised if both objects have NaNs in the same + positions. This is in contrast to the IEEE standard on NaNs, which says + that NaN compared to anything must return False. + + Parameters + ---------- + actual : array_like + The object to check. + desired : array_like + The expected object. + err_msg : str, optional + The error message to be printed in case of failure. + verbose : bool, optional + If True, the conflicting values are appended to the error message. + + Raises + ------ + AssertionError + If actual and desired are not equal. + + Examples + -------- + >>> np.testing.assert_equal([4,5], [4,6]) + Traceback (most recent call last): + ... + AssertionError: + Items are not equal: + item=1 + ACTUAL: 5 + DESIRED: 6 + + The following comparison does not raise an exception. There are NaNs + in the inputs, but they are in the same positions. + + >>> np.testing.assert_equal(np.array([1.0, 2.0, np.nan]), [1, 2, np.nan]) + + """ + __tracebackhide__ = True # Hide traceback for py.test + if isinstance(desired, dict): + if not isinstance(actual, dict): + raise AssertionError(repr(type(actual))) + assert_equal(len(actual), len(desired), err_msg, verbose) + for k, i in desired.items(): + if k not in actual: + raise AssertionError(repr(k)) + assert_equal(actual[k], desired[k], f'key={k!r}\n{err_msg}', + verbose) + return + if isinstance(desired, (list, tuple)) and isinstance(actual, (list, tuple)): + assert_equal(len(actual), len(desired), err_msg, verbose) + for k in range(len(desired)): + assert_equal(actual[k], desired[k], f'item={k!r}\n{err_msg}', + verbose) + return + from numpy.core import ndarray, isscalar, signbit + from numpy.lib import iscomplexobj, real, imag + if isinstance(actual, ndarray) or isinstance(desired, ndarray): + return assert_array_equal(actual, desired, err_msg, verbose) + msg = build_err_msg([actual, desired], err_msg, verbose=verbose) + + # Handle complex numbers: separate into real/imag to handle + # nan/inf/negative zero correctly + # XXX: catch ValueError for subclasses of ndarray where iscomplex fail + try: + usecomplex = iscomplexobj(actual) or iscomplexobj(desired) + except (ValueError, TypeError): + usecomplex = False + + if usecomplex: + if iscomplexobj(actual): + actualr = real(actual) + actuali = imag(actual) + else: + actualr = actual + actuali = 0 + if iscomplexobj(desired): + desiredr = real(desired) + desiredi = imag(desired) + else: + desiredr = desired + desiredi = 0 + try: + assert_equal(actualr, desiredr) + assert_equal(actuali, desiredi) + except AssertionError: + raise AssertionError(msg) + + # isscalar test to check cases such as [np.nan] != np.nan + if isscalar(desired) != isscalar(actual): + raise AssertionError(msg) + + try: + isdesnat = isnat(desired) + isactnat = isnat(actual) + dtypes_match = (np.asarray(desired).dtype.type == + np.asarray(actual).dtype.type) + if isdesnat and isactnat: + # If both are NaT (and have the same dtype -- datetime or + # timedelta) they are considered equal. + if dtypes_match: + return + else: + raise AssertionError(msg) + + except (TypeError, ValueError, NotImplementedError): + pass + + # Inf/nan/negative zero handling + try: + isdesnan = gisnan(desired) + isactnan = gisnan(actual) + if isdesnan and isactnan: + return # both nan, so equal + + # handle signed zero specially for floats + array_actual = np.asarray(actual) + array_desired = np.asarray(desired) + if (array_actual.dtype.char in 'Mm' or + array_desired.dtype.char in 'Mm'): + # version 1.18 + # until this version, gisnan failed for datetime64 and timedelta64. + # Now it succeeds but comparison to scalar with a different type + # emits a DeprecationWarning. + # Avoid that by skipping the next check + raise NotImplementedError('cannot compare to a scalar ' + 'with a different type') + + if desired == 0 and actual == 0: + if not signbit(desired) == signbit(actual): + raise AssertionError(msg) + + except (TypeError, ValueError, NotImplementedError): + pass + + try: + # Explicitly use __eq__ for comparison, gh-2552 + if not (desired == actual): + raise AssertionError(msg) + + except (DeprecationWarning, FutureWarning) as e: + # this handles the case when the two types are not even comparable + if 'elementwise == comparison' in e.args[0]: + raise AssertionError(msg) + else: + raise + + +def print_assert_equal(test_string, actual, desired): + """ + Test if two objects are equal, and print an error message if test fails. + + The test is performed with ``actual == desired``. + + Parameters + ---------- + test_string : str + The message supplied to AssertionError. + actual : object + The object to test for equality against `desired`. + desired : object + The expected result. + + Examples + -------- + >>> np.testing.print_assert_equal('Test XYZ of func xyz', [0, 1], [0, 1]) + >>> np.testing.print_assert_equal('Test XYZ of func xyz', [0, 1], [0, 2]) + Traceback (most recent call last): + ... + AssertionError: Test XYZ of func xyz failed + ACTUAL: + [0, 1] + DESIRED: + [0, 2] + + """ + __tracebackhide__ = True # Hide traceback for py.test + import pprint + + if not (actual == desired): + msg = StringIO() + msg.write(test_string) + msg.write(' failed\nACTUAL: \n') + pprint.pprint(actual, msg) + msg.write('DESIRED: \n') + pprint.pprint(desired, msg) + raise AssertionError(msg.getvalue()) + + +def assert_almost_equal(actual,desired,decimal=7,err_msg='',verbose=True): + """ + Raises an AssertionError if two items are not equal up to desired + precision. + + .. note:: It is recommended to use one of `assert_allclose`, + `assert_array_almost_equal_nulp` or `assert_array_max_ulp` + instead of this function for more consistent floating point + comparisons. + + The test verifies that the elements of `actual` and `desired` satisfy. + + ``abs(desired-actual) < 1.5 * 10**(-decimal)`` + + That is a looser test than originally documented, but agrees with what the + actual implementation in `assert_array_almost_equal` did up to rounding + vagaries. An exception is raised at conflicting values. For ndarrays this + delegates to assert_array_almost_equal + + Parameters + ---------- + actual : array_like + The object to check. + desired : array_like + The expected object. + decimal : int, optional + Desired precision, default is 7. + err_msg : str, optional + The error message to be printed in case of failure. + verbose : bool, optional + If True, the conflicting values are appended to the error message. + + Raises + ------ + AssertionError + If actual and desired are not equal up to specified precision. + + See Also + -------- + assert_allclose: Compare two array_like objects for equality with desired + relative and/or absolute precision. + assert_array_almost_equal_nulp, assert_array_max_ulp, assert_equal + + Examples + -------- + >>> from numpy.testing import assert_almost_equal + >>> assert_almost_equal(2.3333333333333, 2.33333334) + >>> assert_almost_equal(2.3333333333333, 2.33333334, decimal=10) + Traceback (most recent call last): + ... + AssertionError: + Arrays are not almost equal to 10 decimals + ACTUAL: 2.3333333333333 + DESIRED: 2.33333334 + + >>> assert_almost_equal(np.array([1.0,2.3333333333333]), + ... np.array([1.0,2.33333334]), decimal=9) + Traceback (most recent call last): + ... + AssertionError: + Arrays are not almost equal to 9 decimals + + Mismatched elements: 1 / 2 (50%) + Max absolute difference: 6.66669964e-09 + Max relative difference: 2.85715698e-09 + x: array([1. , 2.333333333]) + y: array([1. , 2.33333334]) + + """ + __tracebackhide__ = True # Hide traceback for py.test + from numpy.core import ndarray + from numpy.lib import iscomplexobj, real, imag + + # Handle complex numbers: separate into real/imag to handle + # nan/inf/negative zero correctly + # XXX: catch ValueError for subclasses of ndarray where iscomplex fail + try: + usecomplex = iscomplexobj(actual) or iscomplexobj(desired) + except ValueError: + usecomplex = False + + def _build_err_msg(): + header = ('Arrays are not almost equal to %d decimals' % decimal) + return build_err_msg([actual, desired], err_msg, verbose=verbose, + header=header) + + if usecomplex: + if iscomplexobj(actual): + actualr = real(actual) + actuali = imag(actual) + else: + actualr = actual + actuali = 0 + if iscomplexobj(desired): + desiredr = real(desired) + desiredi = imag(desired) + else: + desiredr = desired + desiredi = 0 + try: + assert_almost_equal(actualr, desiredr, decimal=decimal) + assert_almost_equal(actuali, desiredi, decimal=decimal) + except AssertionError: + raise AssertionError(_build_err_msg()) + + if isinstance(actual, (ndarray, tuple, list)) \ + or isinstance(desired, (ndarray, tuple, list)): + return assert_array_almost_equal(actual, desired, decimal, err_msg) + try: + # If one of desired/actual is not finite, handle it specially here: + # check that both are nan if any is a nan, and test for equality + # otherwise + if not (gisfinite(desired) and gisfinite(actual)): + if gisnan(desired) or gisnan(actual): + if not (gisnan(desired) and gisnan(actual)): + raise AssertionError(_build_err_msg()) + else: + if not desired == actual: + raise AssertionError(_build_err_msg()) + return + except (NotImplementedError, TypeError): + pass + if abs(desired - actual) >= 1.5 * 10.0**(-decimal): + raise AssertionError(_build_err_msg()) + + +def assert_approx_equal(actual,desired,significant=7,err_msg='',verbose=True): + """ + Raises an AssertionError if two items are not equal up to significant + digits. + + .. note:: It is recommended to use one of `assert_allclose`, + `assert_array_almost_equal_nulp` or `assert_array_max_ulp` + instead of this function for more consistent floating point + comparisons. + + Given two numbers, check that they are approximately equal. + Approximately equal is defined as the number of significant digits + that agree. + + Parameters + ---------- + actual : scalar + The object to check. + desired : scalar + The expected object. + significant : int, optional + Desired precision, default is 7. + err_msg : str, optional + The error message to be printed in case of failure. + verbose : bool, optional + If True, the conflicting values are appended to the error message. + + Raises + ------ + AssertionError + If actual and desired are not equal up to specified precision. + + See Also + -------- + assert_allclose: Compare two array_like objects for equality with desired + relative and/or absolute precision. + assert_array_almost_equal_nulp, assert_array_max_ulp, assert_equal + + Examples + -------- + >>> np.testing.assert_approx_equal(0.12345677777777e-20, 0.1234567e-20) + >>> np.testing.assert_approx_equal(0.12345670e-20, 0.12345671e-20, + ... significant=8) + >>> np.testing.assert_approx_equal(0.12345670e-20, 0.12345672e-20, + ... significant=8) + Traceback (most recent call last): + ... + AssertionError: + Items are not equal to 8 significant digits: + ACTUAL: 1.234567e-21 + DESIRED: 1.2345672e-21 + + the evaluated condition that raises the exception is + + >>> abs(0.12345670e-20/1e-21 - 0.12345672e-20/1e-21) >= 10**-(8-1) + True + + """ + __tracebackhide__ = True # Hide traceback for py.test + import numpy as np + + (actual, desired) = map(float, (actual, desired)) + if desired == actual: + return + # Normalized the numbers to be in range (-10.0,10.0) + # scale = float(pow(10,math.floor(math.log10(0.5*(abs(desired)+abs(actual)))))) + with np.errstate(invalid='ignore'): + scale = 0.5*(np.abs(desired) + np.abs(actual)) + scale = np.power(10, np.floor(np.log10(scale))) + try: + sc_desired = desired/scale + except ZeroDivisionError: + sc_desired = 0.0 + try: + sc_actual = actual/scale + except ZeroDivisionError: + sc_actual = 0.0 + msg = build_err_msg( + [actual, desired], err_msg, + header='Items are not equal to %d significant digits:' % significant, + verbose=verbose) + try: + # If one of desired/actual is not finite, handle it specially here: + # check that both are nan if any is a nan, and test for equality + # otherwise + if not (gisfinite(desired) and gisfinite(actual)): + if gisnan(desired) or gisnan(actual): + if not (gisnan(desired) and gisnan(actual)): + raise AssertionError(msg) + else: + if not desired == actual: + raise AssertionError(msg) + return + except (TypeError, NotImplementedError): + pass + if np.abs(sc_desired - sc_actual) >= np.power(10., -(significant-1)): + raise AssertionError(msg) + + +def assert_array_compare(comparison, x, y, err_msg='', verbose=True, header='', + precision=6, equal_nan=True, equal_inf=True): + __tracebackhide__ = True # Hide traceback for py.test + from numpy.core import array, array2string, isnan, inf, bool_, errstate, all, max, object_ + + x = np.asanyarray(x) + y = np.asanyarray(y) + + # original array for output formatting + ox, oy = x, y + + def isnumber(x): + return x.dtype.char in '?bhilqpBHILQPefdgFDG' + + def istime(x): + return x.dtype.char in "Mm" + + def func_assert_same_pos(x, y, func=isnan, hasval='nan'): + """Handling nan/inf. + + Combine results of running func on x and y, checking that they are True + at the same locations. + + """ + __tracebackhide__ = True # Hide traceback for py.test + + x_id = func(x) + y_id = func(y) + # We include work-arounds here to handle three types of slightly + # pathological ndarray subclasses: + # (1) all() on `masked` array scalars can return masked arrays, so we + # use != True + # (2) __eq__ on some ndarray subclasses returns Python booleans + # instead of element-wise comparisons, so we cast to bool_() and + # use isinstance(..., bool) checks + # (3) subclasses with bare-bones __array_function__ implementations may + # not implement np.all(), so favor using the .all() method + # We are not committed to supporting such subclasses, but it's nice to + # support them if possible. + if bool_(x_id == y_id).all() != True: + msg = build_err_msg([x, y], + err_msg + '\nx and y %s location mismatch:' + % (hasval), verbose=verbose, header=header, + names=('x', 'y'), precision=precision) + raise AssertionError(msg) + # If there is a scalar, then here we know the array has the same + # flag as it everywhere, so we should return the scalar flag. + if isinstance(x_id, bool) or x_id.ndim == 0: + return bool_(x_id) + elif isinstance(y_id, bool) or y_id.ndim == 0: + return bool_(y_id) + else: + return y_id + + try: + cond = (x.shape == () or y.shape == ()) or x.shape == y.shape + if not cond: + msg = build_err_msg([x, y], + err_msg + + f'\n(shapes {x.shape}, {y.shape} mismatch)', + verbose=verbose, header=header, + names=('x', 'y'), precision=precision) + raise AssertionError(msg) + + flagged = bool_(False) + if isnumber(x) and isnumber(y): + if equal_nan: + flagged = func_assert_same_pos(x, y, func=isnan, hasval='nan') + + if equal_inf: + flagged |= func_assert_same_pos(x, y, + func=lambda xy: xy == +inf, + hasval='+inf') + flagged |= func_assert_same_pos(x, y, + func=lambda xy: xy == -inf, + hasval='-inf') + + elif istime(x) and istime(y): + # If one is datetime64 and the other timedelta64 there is no point + if equal_nan and x.dtype.type == y.dtype.type: + flagged = func_assert_same_pos(x, y, func=isnat, hasval="NaT") + + if flagged.ndim > 0: + x, y = x[~flagged], y[~flagged] + # Only do the comparison if actual values are left + if x.size == 0: + return + elif flagged: + # no sense doing comparison if everything is flagged. + return + + val = comparison(x, y) + + if isinstance(val, bool): + cond = val + reduced = array([val]) + else: + reduced = val.ravel() + cond = reduced.all() + + # The below comparison is a hack to ensure that fully masked + # results, for which val.ravel().all() returns np.ma.masked, + # do not trigger a failure (np.ma.masked != True evaluates as + # np.ma.masked, which is falsy). + if cond != True: + n_mismatch = reduced.size - reduced.sum(dtype=intp) + n_elements = flagged.size if flagged.ndim != 0 else reduced.size + percent_mismatch = 100 * n_mismatch / n_elements + remarks = [ + 'Mismatched elements: {} / {} ({:.3g}%)'.format( + n_mismatch, n_elements, percent_mismatch)] + + with errstate(invalid='ignore', divide='ignore'): + # ignore errors for non-numeric types + with contextlib.suppress(TypeError): + error = abs(x - y) + max_abs_error = max(error) + if getattr(error, 'dtype', object_) == object_: + remarks.append('Max absolute difference: ' + + str(max_abs_error)) + else: + remarks.append('Max absolute difference: ' + + array2string(max_abs_error)) + + # note: this definition of relative error matches that one + # used by assert_allclose (found in np.isclose) + # Filter values where the divisor would be zero + nonzero = bool_(y != 0) + if all(~nonzero): + max_rel_error = array(inf) + else: + max_rel_error = max(error[nonzero] / abs(y[nonzero])) + if getattr(error, 'dtype', object_) == object_: + remarks.append('Max relative difference: ' + + str(max_rel_error)) + else: + remarks.append('Max relative difference: ' + + array2string(max_rel_error)) + + err_msg += '\n' + '\n'.join(remarks) + msg = build_err_msg([ox, oy], err_msg, + verbose=verbose, header=header, + names=('x', 'y'), precision=precision) + raise AssertionError(msg) + except ValueError: + import traceback + efmt = traceback.format_exc() + header = f'error during assertion:\n\n{efmt}\n\n{header}' + + msg = build_err_msg([x, y], err_msg, verbose=verbose, header=header, + names=('x', 'y'), precision=precision) + raise ValueError(msg) + + +def assert_array_equal(x, y, err_msg='', verbose=True): + """ + Raises an AssertionError if two array_like objects are not equal. + + Given two array_like objects, check that the shape is equal and all + elements of these objects are equal (but see the Notes for the special + handling of a scalar). An exception is raised at shape mismatch or + conflicting values. In contrast to the standard usage in numpy, NaNs + are compared like numbers, no assertion is raised if both objects have + NaNs in the same positions. + + The usual caution for verifying equality with floating point numbers is + advised. + + Parameters + ---------- + x : array_like + The actual object to check. + y : array_like + The desired, expected object. + err_msg : str, optional + The error message to be printed in case of failure. + verbose : bool, optional + If True, the conflicting values are appended to the error message. + + Raises + ------ + AssertionError + If actual and desired objects are not equal. + + See Also + -------- + assert_allclose: Compare two array_like objects for equality with desired + relative and/or absolute precision. + assert_array_almost_equal_nulp, assert_array_max_ulp, assert_equal + + Notes + ----- + When one of `x` and `y` is a scalar and the other is array_like, the + function checks that each element of the array_like object is equal to + the scalar. + + Examples + -------- + The first assert does not raise an exception: + + >>> np.testing.assert_array_equal([1.0,2.33333,np.nan], + ... [np.exp(0),2.33333, np.nan]) + + Assert fails with numerical imprecision with floats: + + >>> np.testing.assert_array_equal([1.0,np.pi,np.nan], + ... [1, np.sqrt(np.pi)**2, np.nan]) + Traceback (most recent call last): + ... + AssertionError: + Arrays are not equal + + Mismatched elements: 1 / 3 (33.3%) + Max absolute difference: 4.4408921e-16 + Max relative difference: 1.41357986e-16 + x: array([1. , 3.141593, nan]) + y: array([1. , 3.141593, nan]) + + Use `assert_allclose` or one of the nulp (number of floating point values) + functions for these cases instead: + + >>> np.testing.assert_allclose([1.0,np.pi,np.nan], + ... [1, np.sqrt(np.pi)**2, np.nan], + ... rtol=1e-10, atol=0) + + As mentioned in the Notes section, `assert_array_equal` has special + handling for scalars. Here the test checks that each value in `x` is 3: + + >>> x = np.full((2, 5), fill_value=3) + >>> np.testing.assert_array_equal(x, 3) + + """ + __tracebackhide__ = True # Hide traceback for py.test + assert_array_compare(operator.__eq__, x, y, err_msg=err_msg, + verbose=verbose, header='Arrays are not equal') + + +def assert_array_almost_equal(x, y, decimal=6, err_msg='', verbose=True): + """ + Raises an AssertionError if two objects are not equal up to desired + precision. + + .. note:: It is recommended to use one of `assert_allclose`, + `assert_array_almost_equal_nulp` or `assert_array_max_ulp` + instead of this function for more consistent floating point + comparisons. + + The test verifies identical shapes and that the elements of ``actual`` and + ``desired`` satisfy. + + ``abs(desired-actual) < 1.5 * 10**(-decimal)`` + + That is a looser test than originally documented, but agrees with what the + actual implementation did up to rounding vagaries. An exception is raised + at shape mismatch or conflicting values. In contrast to the standard usage + in numpy, NaNs are compared like numbers, no assertion is raised if both + objects have NaNs in the same positions. + + Parameters + ---------- + x : array_like + The actual object to check. + y : array_like + The desired, expected object. + decimal : int, optional + Desired precision, default is 6. + err_msg : str, optional + The error message to be printed in case of failure. + verbose : bool, optional + If True, the conflicting values are appended to the error message. + + Raises + ------ + AssertionError + If actual and desired are not equal up to specified precision. + + See Also + -------- + assert_allclose: Compare two array_like objects for equality with desired + relative and/or absolute precision. + assert_array_almost_equal_nulp, assert_array_max_ulp, assert_equal + + Examples + -------- + the first assert does not raise an exception + + >>> np.testing.assert_array_almost_equal([1.0,2.333,np.nan], + ... [1.0,2.333,np.nan]) + + >>> np.testing.assert_array_almost_equal([1.0,2.33333,np.nan], + ... [1.0,2.33339,np.nan], decimal=5) + Traceback (most recent call last): + ... + AssertionError: + Arrays are not almost equal to 5 decimals + + Mismatched elements: 1 / 3 (33.3%) + Max absolute difference: 6.e-05 + Max relative difference: 2.57136612e-05 + x: array([1. , 2.33333, nan]) + y: array([1. , 2.33339, nan]) + + >>> np.testing.assert_array_almost_equal([1.0,2.33333,np.nan], + ... [1.0,2.33333, 5], decimal=5) + Traceback (most recent call last): + ... + AssertionError: + Arrays are not almost equal to 5 decimals + + x and y nan location mismatch: + x: array([1. , 2.33333, nan]) + y: array([1. , 2.33333, 5. ]) + + """ + __tracebackhide__ = True # Hide traceback for py.test + from numpy.core import number, float_, result_type, array + from numpy.core.numerictypes import issubdtype + from numpy.core.fromnumeric import any as npany + + def compare(x, y): + try: + if npany(gisinf(x)) or npany( gisinf(y)): + xinfid = gisinf(x) + yinfid = gisinf(y) + if not (xinfid == yinfid).all(): + return False + # if one item, x and y is +- inf + if x.size == y.size == 1: + return x == y + x = x[~xinfid] + y = y[~yinfid] + except (TypeError, NotImplementedError): + pass + + # make sure y is an inexact type to avoid abs(MIN_INT); will cause + # casting of x later. + dtype = result_type(y, 1.) + y = np.asanyarray(y, dtype) + z = abs(x - y) + + if not issubdtype(z.dtype, number): + z = z.astype(float_) # handle object arrays + + return z < 1.5 * 10.0**(-decimal) + + assert_array_compare(compare, x, y, err_msg=err_msg, verbose=verbose, + header=('Arrays are not almost equal to %d decimals' % decimal), + precision=decimal) + + +def assert_array_less(x, y, err_msg='', verbose=True): + """ + Raises an AssertionError if two array_like objects are not ordered by less + than. + + Given two array_like objects, check that the shape is equal and all + elements of the first object are strictly smaller than those of the + second object. An exception is raised at shape mismatch or incorrectly + ordered values. Shape mismatch does not raise if an object has zero + dimension. In contrast to the standard usage in numpy, NaNs are + compared, no assertion is raised if both objects have NaNs in the same + positions. + + + + Parameters + ---------- + x : array_like + The smaller object to check. + y : array_like + The larger object to compare. + err_msg : string + The error message to be printed in case of failure. + verbose : bool + If True, the conflicting values are appended to the error message. + + Raises + ------ + AssertionError + If actual and desired objects are not equal. + + See Also + -------- + assert_array_equal: tests objects for equality + assert_array_almost_equal: test objects for equality up to precision + + + + Examples + -------- + >>> np.testing.assert_array_less([1.0, 1.0, np.nan], [1.1, 2.0, np.nan]) + >>> np.testing.assert_array_less([1.0, 1.0, np.nan], [1, 2.0, np.nan]) + Traceback (most recent call last): + ... + AssertionError: + Arrays are not less-ordered + + Mismatched elements: 1 / 3 (33.3%) + Max absolute difference: 1. + Max relative difference: 0.5 + x: array([ 1., 1., nan]) + y: array([ 1., 2., nan]) + + >>> np.testing.assert_array_less([1.0, 4.0], 3) + Traceback (most recent call last): + ... + AssertionError: + Arrays are not less-ordered + + Mismatched elements: 1 / 2 (50%) + Max absolute difference: 2. + Max relative difference: 0.66666667 + x: array([1., 4.]) + y: array(3) + + >>> np.testing.assert_array_less([1.0, 2.0, 3.0], [4]) + Traceback (most recent call last): + ... + AssertionError: + Arrays are not less-ordered + + (shapes (3,), (1,) mismatch) + x: array([1., 2., 3.]) + y: array([4]) + + """ + __tracebackhide__ = True # Hide traceback for py.test + assert_array_compare(operator.__lt__, x, y, err_msg=err_msg, + verbose=verbose, + header='Arrays are not less-ordered', + equal_inf=False) + + +def runstring(astr, dict): + exec(astr, dict) + + +def assert_string_equal(actual, desired): + """ + Test if two strings are equal. + + If the given strings are equal, `assert_string_equal` does nothing. + If they are not equal, an AssertionError is raised, and the diff + between the strings is shown. + + Parameters + ---------- + actual : str + The string to test for equality against the expected string. + desired : str + The expected string. + + Examples + -------- + >>> np.testing.assert_string_equal('abc', 'abc') + >>> np.testing.assert_string_equal('abc', 'abcd') + Traceback (most recent call last): + File "", line 1, in + ... + AssertionError: Differences in strings: + - abc+ abcd? + + + """ + # delay import of difflib to reduce startup time + __tracebackhide__ = True # Hide traceback for py.test + import difflib + + if not isinstance(actual, str): + raise AssertionError(repr(type(actual))) + if not isinstance(desired, str): + raise AssertionError(repr(type(desired))) + if desired == actual: + return + + diff = list(difflib.Differ().compare(actual.splitlines(True), + desired.splitlines(True))) + diff_list = [] + while diff: + d1 = diff.pop(0) + if d1.startswith(' '): + continue + if d1.startswith('- '): + l = [d1] + d2 = diff.pop(0) + if d2.startswith('? '): + l.append(d2) + d2 = diff.pop(0) + if not d2.startswith('+ '): + raise AssertionError(repr(d2)) + l.append(d2) + if diff: + d3 = diff.pop(0) + if d3.startswith('? '): + l.append(d3) + else: + diff.insert(0, d3) + if d2[2:] == d1[2:]: + continue + diff_list.extend(l) + continue + raise AssertionError(repr(d1)) + if not diff_list: + return + msg = f"Differences in strings:\n{''.join(diff_list).rstrip()}" + if actual != desired: + raise AssertionError(msg) + + +def rundocs(filename=None, raise_on_error=True): + """ + Run doctests found in the given file. + + By default `rundocs` raises an AssertionError on failure. + + Parameters + ---------- + filename : str + The path to the file for which the doctests are run. + raise_on_error : bool + Whether to raise an AssertionError when a doctest fails. Default is + True. + + Notes + ----- + The doctests can be run by the user/developer by adding the ``doctests`` + argument to the ``test()`` call. For example, to run all tests (including + doctests) for `numpy.lib`: + + >>> np.lib.test(doctests=True) # doctest: +SKIP + """ + from numpy.distutils.misc_util import exec_mod_from_location + import doctest + if filename is None: + f = sys._getframe(1) + filename = f.f_globals['__file__'] + name = os.path.splitext(os.path.basename(filename))[0] + m = exec_mod_from_location(name, filename) + + tests = doctest.DocTestFinder().find(m) + runner = doctest.DocTestRunner(verbose=False) + + msg = [] + if raise_on_error: + out = lambda s: msg.append(s) + else: + out = None + + for test in tests: + runner.run(test, out=out) + + if runner.failures > 0 and raise_on_error: + raise AssertionError("Some doctests failed:\n%s" % "\n".join(msg)) + + +def raises(*args): + """Decorator to check for raised exceptions. + + The decorated test function must raise one of the passed exceptions to + pass. If you want to test many assertions about exceptions in a single + test, you may want to use `assert_raises` instead. + + .. warning:: + This decorator is nose specific, do not use it if you are using a + different test framework. + + Parameters + ---------- + args : exceptions + The test passes if any of the passed exceptions is raised. + + Raises + ------ + AssertionError + + Examples + -------- + + Usage:: + + @raises(TypeError, ValueError) + def test_raises_type_error(): + raise TypeError("This test passes") + + @raises(Exception) + def test_that_fails_by_passing(): + pass + + """ + nose = import_nose() + return nose.tools.raises(*args) + +# +# assert_raises and assert_raises_regex are taken from unittest. +# +import unittest + + +class _Dummy(unittest.TestCase): + def nop(self): + pass + +_d = _Dummy('nop') + +def assert_raises(*args, **kwargs): + """ + assert_raises(exception_class, callable, *args, **kwargs) + assert_raises(exception_class) + + Fail unless an exception of class exception_class is thrown + by callable when invoked with arguments args and keyword + arguments kwargs. If a different type of exception is + thrown, it will not be caught, and the test case will be + deemed to have suffered an error, exactly as for an + unexpected exception. + + Alternatively, `assert_raises` can be used as a context manager: + + >>> from numpy.testing import assert_raises + >>> with assert_raises(ZeroDivisionError): + ... 1 / 0 + + is equivalent to + + >>> def div(x, y): + ... return x / y + >>> assert_raises(ZeroDivisionError, div, 1, 0) + + """ + __tracebackhide__ = True # Hide traceback for py.test + return _d.assertRaises(*args,**kwargs) + + +def assert_raises_regex(exception_class, expected_regexp, *args, **kwargs): + """ + assert_raises_regex(exception_class, expected_regexp, callable, *args, + **kwargs) + assert_raises_regex(exception_class, expected_regexp) + + Fail unless an exception of class exception_class and with message that + matches expected_regexp is thrown by callable when invoked with arguments + args and keyword arguments kwargs. + + Alternatively, can be used as a context manager like `assert_raises`. + + Name of this function adheres to Python 3.2+ reference, but should work in + all versions down to 2.6. + + Notes + ----- + .. versionadded:: 1.9.0 + + """ + __tracebackhide__ = True # Hide traceback for py.test + return _d.assertRaisesRegex(exception_class, expected_regexp, *args, **kwargs) + + +def decorate_methods(cls, decorator, testmatch=None): + """ + Apply a decorator to all methods in a class matching a regular expression. + + The given decorator is applied to all public methods of `cls` that are + matched by the regular expression `testmatch` + (``testmatch.search(methodname)``). Methods that are private, i.e. start + with an underscore, are ignored. + + Parameters + ---------- + cls : class + Class whose methods to decorate. + decorator : function + Decorator to apply to methods + testmatch : compiled regexp or str, optional + The regular expression. Default value is None, in which case the + nose default (``re.compile(r'(?:^|[\\b_\\.%s-])[Tt]est' % os.sep)``) + is used. + If `testmatch` is a string, it is compiled to a regular expression + first. + + """ + if testmatch is None: + testmatch = re.compile(r'(?:^|[\\b_\\.%s-])[Tt]est' % os.sep) + else: + testmatch = re.compile(testmatch) + cls_attr = cls.__dict__ + + # delayed import to reduce startup time + from inspect import isfunction + + methods = [_m for _m in cls_attr.values() if isfunction(_m)] + for function in methods: + try: + if hasattr(function, 'compat_func_name'): + funcname = function.compat_func_name + else: + funcname = function.__name__ + except AttributeError: + # not a function + continue + if testmatch.search(funcname) and not funcname.startswith('_'): + setattr(cls, funcname, decorator(function)) + return + + +def measure(code_str, times=1, label=None): + """ + Return elapsed time for executing code in the namespace of the caller. + + The supplied code string is compiled with the Python builtin ``compile``. + The precision of the timing is 10 milli-seconds. If the code will execute + fast on this timescale, it can be executed many times to get reasonable + timing accuracy. + + Parameters + ---------- + code_str : str + The code to be timed. + times : int, optional + The number of times the code is executed. Default is 1. The code is + only compiled once. + label : str, optional + A label to identify `code_str` with. This is passed into ``compile`` + as the second argument (for run-time error messages). + + Returns + ------- + elapsed : float + Total elapsed time in seconds for executing `code_str` `times` times. + + Examples + -------- + >>> times = 10 + >>> etime = np.testing.measure('for i in range(1000): np.sqrt(i**2)', times=times) + >>> print("Time for a single execution : ", etime / times, "s") # doctest: +SKIP + Time for a single execution : 0.005 s + + """ + frame = sys._getframe(1) + locs, globs = frame.f_locals, frame.f_globals + + code = compile(code_str, f'Test name: {label} ', 'exec') + i = 0 + elapsed = jiffies() + while i < times: + i += 1 + exec(code, globs, locs) + elapsed = jiffies() - elapsed + return 0.01*elapsed + + +def _assert_valid_refcount(op): + """ + Check that ufuncs don't mishandle refcount of object `1`. + Used in a few regression tests. + """ + if not HAS_REFCOUNT: + return True + + import gc + import numpy as np + + b = np.arange(100*100).reshape(100, 100) + c = b + i = 1 + + gc.disable() + try: + rc = sys.getrefcount(i) + for j in range(15): + d = op(b, c) + assert_(sys.getrefcount(i) >= rc) + finally: + gc.enable() + del d # for pyflakes + + +def assert_allclose(actual, desired, rtol=1e-7, atol=0, equal_nan=True, + err_msg='', verbose=True): + """ + Raises an AssertionError if two objects are not equal up to desired + tolerance. + + The test is equivalent to ``allclose(actual, desired, rtol, atol)`` (note + that ``allclose`` has different default values). It compares the difference + between `actual` and `desired` to ``atol + rtol * abs(desired)``. + + .. versionadded:: 1.5.0 + + Parameters + ---------- + actual : array_like + Array obtained. + desired : array_like + Array desired. + rtol : float, optional + Relative tolerance. + atol : float, optional + Absolute tolerance. + equal_nan : bool, optional. + If True, NaNs will compare equal. + err_msg : str, optional + The error message to be printed in case of failure. + verbose : bool, optional + If True, the conflicting values are appended to the error message. + + Raises + ------ + AssertionError + If actual and desired are not equal up to specified precision. + + See Also + -------- + assert_array_almost_equal_nulp, assert_array_max_ulp + + Examples + -------- + >>> x = [1e-5, 1e-3, 1e-1] + >>> y = np.arccos(np.cos(x)) + >>> np.testing.assert_allclose(x, y, rtol=1e-5, atol=0) + + """ + __tracebackhide__ = True # Hide traceback for py.test + import numpy as np + + def compare(x, y): + return np.core.numeric.isclose(x, y, rtol=rtol, atol=atol, + equal_nan=equal_nan) + + actual, desired = np.asanyarray(actual), np.asanyarray(desired) + header = f'Not equal to tolerance rtol={rtol:g}, atol={atol:g}' + assert_array_compare(compare, actual, desired, err_msg=str(err_msg), + verbose=verbose, header=header, equal_nan=equal_nan) + + +def assert_array_almost_equal_nulp(x, y, nulp=1): + """ + Compare two arrays relatively to their spacing. + + This is a relatively robust method to compare two arrays whose amplitude + is variable. + + Parameters + ---------- + x, y : array_like + Input arrays. + nulp : int, optional + The maximum number of unit in the last place for tolerance (see Notes). + Default is 1. + + Returns + ------- + None + + Raises + ------ + AssertionError + If the spacing between `x` and `y` for one or more elements is larger + than `nulp`. + + See Also + -------- + assert_array_max_ulp : Check that all items of arrays differ in at most + N Units in the Last Place. + spacing : Return the distance between x and the nearest adjacent number. + + Notes + ----- + An assertion is raised if the following condition is not met:: + + abs(x - y) <= nulps * spacing(maximum(abs(x), abs(y))) + + Examples + -------- + >>> x = np.array([1., 1e-10, 1e-20]) + >>> eps = np.finfo(x.dtype).eps + >>> np.testing.assert_array_almost_equal_nulp(x, x*eps/2 + x) + + >>> np.testing.assert_array_almost_equal_nulp(x, x*eps + x) + Traceback (most recent call last): + ... + AssertionError: X and Y are not equal to 1 ULP (max is 2) + + """ + __tracebackhide__ = True # Hide traceback for py.test + import numpy as np + ax = np.abs(x) + ay = np.abs(y) + ref = nulp * np.spacing(np.where(ax > ay, ax, ay)) + if not np.all(np.abs(x-y) <= ref): + if np.iscomplexobj(x) or np.iscomplexobj(y): + msg = "X and Y are not equal to %d ULP" % nulp + else: + max_nulp = np.max(nulp_diff(x, y)) + msg = "X and Y are not equal to %d ULP (max is %g)" % (nulp, max_nulp) + raise AssertionError(msg) + + +def assert_array_max_ulp(a, b, maxulp=1, dtype=None): + """ + Check that all items of arrays differ in at most N Units in the Last Place. + + Parameters + ---------- + a, b : array_like + Input arrays to be compared. + maxulp : int, optional + The maximum number of units in the last place that elements of `a` and + `b` can differ. Default is 1. + dtype : dtype, optional + Data-type to convert `a` and `b` to if given. Default is None. + + Returns + ------- + ret : ndarray + Array containing number of representable floating point numbers between + items in `a` and `b`. + + Raises + ------ + AssertionError + If one or more elements differ by more than `maxulp`. + + Notes + ----- + For computing the ULP difference, this API does not differentiate between + various representations of NAN (ULP difference between 0x7fc00000 and 0xffc00000 + is zero). + + See Also + -------- + assert_array_almost_equal_nulp : Compare two arrays relatively to their + spacing. + + Examples + -------- + >>> a = np.linspace(0., 1., 100) + >>> res = np.testing.assert_array_max_ulp(a, np.arcsin(np.sin(a))) + + """ + __tracebackhide__ = True # Hide traceback for py.test + import numpy as np + ret = nulp_diff(a, b, dtype) + if not np.all(ret <= maxulp): + raise AssertionError("Arrays are not almost equal up to %g " + "ULP (max difference is %g ULP)" % + (maxulp, np.max(ret))) + return ret + + +def nulp_diff(x, y, dtype=None): + """For each item in x and y, return the number of representable floating + points between them. + + Parameters + ---------- + x : array_like + first input array + y : array_like + second input array + dtype : dtype, optional + Data-type to convert `x` and `y` to if given. Default is None. + + Returns + ------- + nulp : array_like + number of representable floating point numbers between each item in x + and y. + + Notes + ----- + For computing the ULP difference, this API does not differentiate between + various representations of NAN (ULP difference between 0x7fc00000 and 0xffc00000 + is zero). + + Examples + -------- + # By definition, epsilon is the smallest number such as 1 + eps != 1, so + # there should be exactly one ULP between 1 and 1 + eps + >>> nulp_diff(1, 1 + np.finfo(x.dtype).eps) + 1.0 + """ + import numpy as np + if dtype: + x = np.asarray(x, dtype=dtype) + y = np.asarray(y, dtype=dtype) + else: + x = np.asarray(x) + y = np.asarray(y) + + t = np.common_type(x, y) + if np.iscomplexobj(x) or np.iscomplexobj(y): + raise NotImplementedError("_nulp not implemented for complex array") + + x = np.array([x], dtype=t) + y = np.array([y], dtype=t) + + x[np.isnan(x)] = np.nan + y[np.isnan(y)] = np.nan + + if not x.shape == y.shape: + raise ValueError("x and y do not have the same shape: %s - %s" % + (x.shape, y.shape)) + + def _diff(rx, ry, vdt): + diff = np.asarray(rx-ry, dtype=vdt) + return np.abs(diff) + + rx = integer_repr(x) + ry = integer_repr(y) + return _diff(rx, ry, t) + + +def _integer_repr(x, vdt, comp): + # Reinterpret binary representation of the float as sign-magnitude: + # take into account two-complement representation + # See also + # https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/ + rx = x.view(vdt) + if not (rx.size == 1): + rx[rx < 0] = comp - rx[rx < 0] + else: + if rx < 0: + rx = comp - rx + + return rx + + +def integer_repr(x): + """Return the signed-magnitude interpretation of the binary representation + of x.""" + import numpy as np + if x.dtype == np.float16: + return _integer_repr(x, np.int16, np.int16(-2**15)) + elif x.dtype == np.float32: + return _integer_repr(x, np.int32, np.int32(-2**31)) + elif x.dtype == np.float64: + return _integer_repr(x, np.int64, np.int64(-2**63)) + else: + raise ValueError(f'Unsupported dtype {x.dtype}') + + +@contextlib.contextmanager +def _assert_warns_context(warning_class, name=None): + __tracebackhide__ = True # Hide traceback for py.test + with suppress_warnings() as sup: + l = sup.record(warning_class) + yield + if not len(l) > 0: + name_str = f' when calling {name}' if name is not None else '' + raise AssertionError("No warning raised" + name_str) + + +def assert_warns(warning_class, *args, **kwargs): + """ + Fail unless the given callable throws the specified warning. + + A warning of class warning_class should be thrown by the callable when + invoked with arguments args and keyword arguments kwargs. + If a different type of warning is thrown, it will not be caught. + + If called with all arguments other than the warning class omitted, may be + used as a context manager: + + with assert_warns(SomeWarning): + do_something() + + The ability to be used as a context manager is new in NumPy v1.11.0. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + warning_class : class + The class defining the warning that `func` is expected to throw. + func : callable, optional + Callable to test + *args : Arguments + Arguments for `func`. + **kwargs : Kwargs + Keyword arguments for `func`. + + Returns + ------- + The value returned by `func`. + + Examples + -------- + >>> import warnings + >>> def deprecated_func(num): + ... warnings.warn("Please upgrade", DeprecationWarning) + ... return num*num + >>> with np.testing.assert_warns(DeprecationWarning): + ... assert deprecated_func(4) == 16 + >>> # or passing a func + >>> ret = np.testing.assert_warns(DeprecationWarning, deprecated_func, 4) + >>> assert ret == 16 + """ + if not args: + return _assert_warns_context(warning_class) + + func = args[0] + args = args[1:] + with _assert_warns_context(warning_class, name=func.__name__): + return func(*args, **kwargs) + + +@contextlib.contextmanager +def _assert_no_warnings_context(name=None): + __tracebackhide__ = True # Hide traceback for py.test + with warnings.catch_warnings(record=True) as l: + warnings.simplefilter('always') + yield + if len(l) > 0: + name_str = f' when calling {name}' if name is not None else '' + raise AssertionError(f'Got warnings{name_str}: {l}') + + +def assert_no_warnings(*args, **kwargs): + """ + Fail if the given callable produces any warnings. + + If called with all arguments omitted, may be used as a context manager: + + with assert_no_warnings(): + do_something() + + The ability to be used as a context manager is new in NumPy v1.11.0. + + .. versionadded:: 1.7.0 + + Parameters + ---------- + func : callable + The callable to test. + \\*args : Arguments + Arguments passed to `func`. + \\*\\*kwargs : Kwargs + Keyword arguments passed to `func`. + + Returns + ------- + The value returned by `func`. + + """ + if not args: + return _assert_no_warnings_context() + + func = args[0] + args = args[1:] + with _assert_no_warnings_context(name=func.__name__): + return func(*args, **kwargs) + + +def _gen_alignment_data(dtype=float32, type='binary', max_size=24): + """ + generator producing data with different alignment and offsets + to test simd vectorization + + Parameters + ---------- + dtype : dtype + data type to produce + type : string + 'unary': create data for unary operations, creates one input + and output array + 'binary': create data for unary operations, creates two input + and output array + max_size : integer + maximum size of data to produce + + Returns + ------- + if type is 'unary' yields one output, one input array and a message + containing information on the data + if type is 'binary' yields one output array, two input array and a message + containing information on the data + + """ + ufmt = 'unary offset=(%d, %d), size=%d, dtype=%r, %s' + bfmt = 'binary offset=(%d, %d, %d), size=%d, dtype=%r, %s' + for o in range(3): + for s in range(o + 2, max(o + 3, max_size)): + if type == 'unary': + inp = lambda: arange(s, dtype=dtype)[o:] + out = empty((s,), dtype=dtype)[o:] + yield out, inp(), ufmt % (o, o, s, dtype, 'out of place') + d = inp() + yield d, d, ufmt % (o, o, s, dtype, 'in place') + yield out[1:], inp()[:-1], ufmt % \ + (o + 1, o, s - 1, dtype, 'out of place') + yield out[:-1], inp()[1:], ufmt % \ + (o, o + 1, s - 1, dtype, 'out of place') + yield inp()[:-1], inp()[1:], ufmt % \ + (o, o + 1, s - 1, dtype, 'aliased') + yield inp()[1:], inp()[:-1], ufmt % \ + (o + 1, o, s - 1, dtype, 'aliased') + if type == 'binary': + inp1 = lambda: arange(s, dtype=dtype)[o:] + inp2 = lambda: arange(s, dtype=dtype)[o:] + out = empty((s,), dtype=dtype)[o:] + yield out, inp1(), inp2(), bfmt % \ + (o, o, o, s, dtype, 'out of place') + d = inp1() + yield d, d, inp2(), bfmt % \ + (o, o, o, s, dtype, 'in place1') + d = inp2() + yield d, inp1(), d, bfmt % \ + (o, o, o, s, dtype, 'in place2') + yield out[1:], inp1()[:-1], inp2()[:-1], bfmt % \ + (o + 1, o, o, s - 1, dtype, 'out of place') + yield out[:-1], inp1()[1:], inp2()[:-1], bfmt % \ + (o, o + 1, o, s - 1, dtype, 'out of place') + yield out[:-1], inp1()[:-1], inp2()[1:], bfmt % \ + (o, o, o + 1, s - 1, dtype, 'out of place') + yield inp1()[1:], inp1()[:-1], inp2()[:-1], bfmt % \ + (o + 1, o, o, s - 1, dtype, 'aliased') + yield inp1()[:-1], inp1()[1:], inp2()[:-1], bfmt % \ + (o, o + 1, o, s - 1, dtype, 'aliased') + yield inp1()[:-1], inp1()[:-1], inp2()[1:], bfmt % \ + (o, o, o + 1, s - 1, dtype, 'aliased') + + +class IgnoreException(Exception): + "Ignoring this exception due to disabled feature" + pass + + +@contextlib.contextmanager +def tempdir(*args, **kwargs): + """Context manager to provide a temporary test folder. + + All arguments are passed as this to the underlying tempfile.mkdtemp + function. + + """ + tmpdir = mkdtemp(*args, **kwargs) + try: + yield tmpdir + finally: + shutil.rmtree(tmpdir) + + +@contextlib.contextmanager +def temppath(*args, **kwargs): + """Context manager for temporary files. + + Context manager that returns the path to a closed temporary file. Its + parameters are the same as for tempfile.mkstemp and are passed directly + to that function. The underlying file is removed when the context is + exited, so it should be closed at that time. + + Windows does not allow a temporary file to be opened if it is already + open, so the underlying file must be closed after opening before it + can be opened again. + + """ + fd, path = mkstemp(*args, **kwargs) + os.close(fd) + try: + yield path + finally: + os.remove(path) + + +class clear_and_catch_warnings(warnings.catch_warnings): + """ Context manager that resets warning registry for catching warnings + + Warnings can be slippery, because, whenever a warning is triggered, Python + adds a ``__warningregistry__`` member to the *calling* module. This makes + it impossible to retrigger the warning in this module, whatever you put in + the warnings filters. This context manager accepts a sequence of `modules` + as a keyword argument to its constructor and: + + * stores and removes any ``__warningregistry__`` entries in given `modules` + on entry; + * resets ``__warningregistry__`` to its previous state on exit. + + This makes it possible to trigger any warning afresh inside the context + manager without disturbing the state of warnings outside. + + For compatibility with Python 3.0, please consider all arguments to be + keyword-only. + + Parameters + ---------- + record : bool, optional + Specifies whether warnings should be captured by a custom + implementation of ``warnings.showwarning()`` and be appended to a list + returned by the context manager. Otherwise None is returned by the + context manager. The objects appended to the list are arguments whose + attributes mirror the arguments to ``showwarning()``. + modules : sequence, optional + Sequence of modules for which to reset warnings registry on entry and + restore on exit. To work correctly, all 'ignore' filters should + filter by one of these modules. + + Examples + -------- + >>> import warnings + >>> with np.testing.clear_and_catch_warnings( + ... modules=[np.core.fromnumeric]): + ... warnings.simplefilter('always') + ... warnings.filterwarnings('ignore', module='np.core.fromnumeric') + ... # do something that raises a warning but ignore those in + ... # np.core.fromnumeric + """ + class_modules = () + + def __init__(self, record=False, modules=()): + self.modules = set(modules).union(self.class_modules) + self._warnreg_copies = {} + super().__init__(record=record) + + def __enter__(self): + for mod in self.modules: + if hasattr(mod, '__warningregistry__'): + mod_reg = mod.__warningregistry__ + self._warnreg_copies[mod] = mod_reg.copy() + mod_reg.clear() + return super().__enter__() + + def __exit__(self, *exc_info): + super().__exit__(*exc_info) + for mod in self.modules: + if hasattr(mod, '__warningregistry__'): + mod.__warningregistry__.clear() + if mod in self._warnreg_copies: + mod.__warningregistry__.update(self._warnreg_copies[mod]) + + +class suppress_warnings: + """ + Context manager and decorator doing much the same as + ``warnings.catch_warnings``. + + However, it also provides a filter mechanism to work around + https://bugs.python.org/issue4180. + + This bug causes Python before 3.4 to not reliably show warnings again + after they have been ignored once (even within catch_warnings). It + means that no "ignore" filter can be used easily, since following + tests might need to see the warning. Additionally it allows easier + specificity for testing warnings and can be nested. + + Parameters + ---------- + forwarding_rule : str, optional + One of "always", "once", "module", or "location". Analogous to + the usual warnings module filter mode, it is useful to reduce + noise mostly on the outmost level. Unsuppressed and unrecorded + warnings will be forwarded based on this rule. Defaults to "always". + "location" is equivalent to the warnings "default", match by exact + location the warning warning originated from. + + Notes + ----- + Filters added inside the context manager will be discarded again + when leaving it. Upon entering all filters defined outside a + context will be applied automatically. + + When a recording filter is added, matching warnings are stored in the + ``log`` attribute as well as in the list returned by ``record``. + + If filters are added and the ``module`` keyword is given, the + warning registry of this module will additionally be cleared when + applying it, entering the context, or exiting it. This could cause + warnings to appear a second time after leaving the context if they + were configured to be printed once (default) and were already + printed before the context was entered. + + Nesting this context manager will work as expected when the + forwarding rule is "always" (default). Unfiltered and unrecorded + warnings will be passed out and be matched by the outer level. + On the outmost level they will be printed (or caught by another + warnings context). The forwarding rule argument can modify this + behaviour. + + Like ``catch_warnings`` this context manager is not threadsafe. + + Examples + -------- + + With a context manager:: + + with np.testing.suppress_warnings() as sup: + sup.filter(DeprecationWarning, "Some text") + sup.filter(module=np.ma.core) + log = sup.record(FutureWarning, "Does this occur?") + command_giving_warnings() + # The FutureWarning was given once, the filtered warnings were + # ignored. All other warnings abide outside settings (may be + # printed/error) + assert_(len(log) == 1) + assert_(len(sup.log) == 1) # also stored in log attribute + + Or as a decorator:: + + sup = np.testing.suppress_warnings() + sup.filter(module=np.ma.core) # module must match exactly + @sup + def some_function(): + # do something which causes a warning in np.ma.core + pass + """ + def __init__(self, forwarding_rule="always"): + self._entered = False + + # Suppressions are either instance or defined inside one with block: + self._suppressions = [] + + if forwarding_rule not in {"always", "module", "once", "location"}: + raise ValueError("unsupported forwarding rule.") + self._forwarding_rule = forwarding_rule + + def _clear_registries(self): + if hasattr(warnings, "_filters_mutated"): + # clearing the registry should not be necessary on new pythons, + # instead the filters should be mutated. + warnings._filters_mutated() + return + # Simply clear the registry, this should normally be harmless, + # note that on new pythons it would be invalidated anyway. + for module in self._tmp_modules: + if hasattr(module, "__warningregistry__"): + module.__warningregistry__.clear() + + def _filter(self, category=Warning, message="", module=None, record=False): + if record: + record = [] # The log where to store warnings + else: + record = None + if self._entered: + if module is None: + warnings.filterwarnings( + "always", category=category, message=message) + else: + module_regex = module.__name__.replace('.', r'\.') + '$' + warnings.filterwarnings( + "always", category=category, message=message, + module=module_regex) + self._tmp_modules.add(module) + self._clear_registries() + + self._tmp_suppressions.append( + (category, message, re.compile(message, re.I), module, record)) + else: + self._suppressions.append( + (category, message, re.compile(message, re.I), module, record)) + + return record + + def filter(self, category=Warning, message="", module=None): + """ + Add a new suppressing filter or apply it if the state is entered. + + Parameters + ---------- + category : class, optional + Warning class to filter + message : string, optional + Regular expression matching the warning message. + module : module, optional + Module to filter for. Note that the module (and its file) + must match exactly and cannot be a submodule. This may make + it unreliable for external modules. + + Notes + ----- + When added within a context, filters are only added inside + the context and will be forgotten when the context is exited. + """ + self._filter(category=category, message=message, module=module, + record=False) + + def record(self, category=Warning, message="", module=None): + """ + Append a new recording filter or apply it if the state is entered. + + All warnings matching will be appended to the ``log`` attribute. + + Parameters + ---------- + category : class, optional + Warning class to filter + message : string, optional + Regular expression matching the warning message. + module : module, optional + Module to filter for. Note that the module (and its file) + must match exactly and cannot be a submodule. This may make + it unreliable for external modules. + + Returns + ------- + log : list + A list which will be filled with all matched warnings. + + Notes + ----- + When added within a context, filters are only added inside + the context and will be forgotten when the context is exited. + """ + return self._filter(category=category, message=message, module=module, + record=True) + + def __enter__(self): + if self._entered: + raise RuntimeError("cannot enter suppress_warnings twice.") + + self._orig_show = warnings.showwarning + self._filters = warnings.filters + warnings.filters = self._filters[:] + + self._entered = True + self._tmp_suppressions = [] + self._tmp_modules = set() + self._forwarded = set() + + self.log = [] # reset global log (no need to keep same list) + + for cat, mess, _, mod, log in self._suppressions: + if log is not None: + del log[:] # clear the log + if mod is None: + warnings.filterwarnings( + "always", category=cat, message=mess) + else: + module_regex = mod.__name__.replace('.', r'\.') + '$' + warnings.filterwarnings( + "always", category=cat, message=mess, + module=module_regex) + self._tmp_modules.add(mod) + warnings.showwarning = self._showwarning + self._clear_registries() + + return self + + def __exit__(self, *exc_info): + warnings.showwarning = self._orig_show + warnings.filters = self._filters + self._clear_registries() + self._entered = False + del self._orig_show + del self._filters + + def _showwarning(self, message, category, filename, lineno, + *args, use_warnmsg=None, **kwargs): + for cat, _, pattern, mod, rec in ( + self._suppressions + self._tmp_suppressions)[::-1]: + if (issubclass(category, cat) and + pattern.match(message.args[0]) is not None): + if mod is None: + # Message and category match, either recorded or ignored + if rec is not None: + msg = WarningMessage(message, category, filename, + lineno, **kwargs) + self.log.append(msg) + rec.append(msg) + return + # Use startswith, because warnings strips the c or o from + # .pyc/.pyo files. + elif mod.__file__.startswith(filename): + # The message and module (filename) match + if rec is not None: + msg = WarningMessage(message, category, filename, + lineno, **kwargs) + self.log.append(msg) + rec.append(msg) + return + + # There is no filter in place, so pass to the outside handler + # unless we should only pass it once + if self._forwarding_rule == "always": + if use_warnmsg is None: + self._orig_show(message, category, filename, lineno, + *args, **kwargs) + else: + self._orig_showmsg(use_warnmsg) + return + + if self._forwarding_rule == "once": + signature = (message.args, category) + elif self._forwarding_rule == "module": + signature = (message.args, category, filename) + elif self._forwarding_rule == "location": + signature = (message.args, category, filename, lineno) + + if signature in self._forwarded: + return + self._forwarded.add(signature) + if use_warnmsg is None: + self._orig_show(message, category, filename, lineno, *args, + **kwargs) + else: + self._orig_showmsg(use_warnmsg) + + def __call__(self, func): + """ + Function decorator to apply certain suppressions to a whole + function. + """ + @wraps(func) + def new_func(*args, **kwargs): + with self: + return func(*args, **kwargs) + + return new_func + + +@contextlib.contextmanager +def _assert_no_gc_cycles_context(name=None): + __tracebackhide__ = True # Hide traceback for py.test + + # not meaningful to test if there is no refcounting + if not HAS_REFCOUNT: + yield + return + + assert_(gc.isenabled()) + gc.disable() + gc_debug = gc.get_debug() + try: + for i in range(100): + if gc.collect() == 0: + break + else: + raise RuntimeError( + "Unable to fully collect garbage - perhaps a __del__ method " + "is creating more reference cycles?") + + gc.set_debug(gc.DEBUG_SAVEALL) + yield + # gc.collect returns the number of unreachable objects in cycles that + # were found -- we are checking that no cycles were created in the context + n_objects_in_cycles = gc.collect() + objects_in_cycles = gc.garbage[:] + finally: + del gc.garbage[:] + gc.set_debug(gc_debug) + gc.enable() + + if n_objects_in_cycles: + name_str = f' when calling {name}' if name is not None else '' + raise AssertionError( + "Reference cycles were found{}: {} objects were collected, " + "of which {} are shown below:{}" + .format( + name_str, + n_objects_in_cycles, + len(objects_in_cycles), + ''.join( + "\n {} object with id={}:\n {}".format( + type(o).__name__, + id(o), + pprint.pformat(o).replace('\n', '\n ') + ) for o in objects_in_cycles + ) + ) + ) + + +def assert_no_gc_cycles(*args, **kwargs): + """ + Fail if the given callable produces any reference cycles. + + If called with all arguments omitted, may be used as a context manager: + + with assert_no_gc_cycles(): + do_something() + + .. versionadded:: 1.15.0 + + Parameters + ---------- + func : callable + The callable to test. + \\*args : Arguments + Arguments passed to `func`. + \\*\\*kwargs : Kwargs + Keyword arguments passed to `func`. + + Returns + ------- + Nothing. The result is deliberately discarded to ensure that all cycles + are found. + + """ + if not args: + return _assert_no_gc_cycles_context() + + func = args[0] + args = args[1:] + with _assert_no_gc_cycles_context(name=func.__name__): + func(*args, **kwargs) + +def break_cycles(): + """ + Break reference cycles by calling gc.collect + Objects can call other objects' methods (for instance, another object's + __del__) inside their own __del__. On PyPy, the interpreter only runs + between calls to gc.collect, so multiple calls are needed to completely + release all cycles. + """ + + gc.collect() + if IS_PYPY: + # a few more, just to make sure all the finalizers are called + gc.collect() + gc.collect() + gc.collect() + gc.collect() + + +def requires_memory(free_bytes): + """Decorator to skip a test if not enough memory is available""" + import pytest + + def decorator(func): + @wraps(func) + def wrapper(*a, **kw): + msg = check_free_memory(free_bytes) + if msg is not None: + pytest.skip(msg) + + try: + return func(*a, **kw) + except MemoryError: + # Probably ran out of memory regardless: don't regard as failure + pytest.xfail("MemoryError raised") + + return wrapper + + return decorator + + +def check_free_memory(free_bytes): + """ + Check whether `free_bytes` amount of memory is currently free. + Returns: None if enough memory available, otherwise error message + """ + env_var = 'NPY_AVAILABLE_MEM' + env_value = os.environ.get(env_var) + if env_value is not None: + try: + mem_free = _parse_size(env_value) + except ValueError as exc: + raise ValueError(f'Invalid environment variable {env_var}: {exc}') + + msg = (f'{free_bytes/1e9} GB memory required, but environment variable ' + f'NPY_AVAILABLE_MEM={env_value} set') + else: + mem_free = _get_mem_available() + + if mem_free is None: + msg = ("Could not determine available memory; set NPY_AVAILABLE_MEM " + "environment variable (e.g. NPY_AVAILABLE_MEM=16GB) to run " + "the test.") + mem_free = -1 + else: + msg = f'{free_bytes/1e9} GB memory required, but {mem_free/1e9} GB available' + + return msg if mem_free < free_bytes else None + + +def _parse_size(size_str): + """Convert memory size strings ('12 GB' etc.) to float""" + suffixes = {'': 1, 'b': 1, + 'k': 1000, 'm': 1000**2, 'g': 1000**3, 't': 1000**4, + 'kb': 1000, 'mb': 1000**2, 'gb': 1000**3, 'tb': 1000**4, + 'kib': 1024, 'mib': 1024**2, 'gib': 1024**3, 'tib': 1024**4} + + size_re = re.compile(r'^\s*(\d+|\d+\.\d+)\s*({0})\s*$'.format( + '|'.join(suffixes.keys())), re.I) + + m = size_re.match(size_str.lower()) + if not m or m.group(2) not in suffixes: + raise ValueError(f'value {size_str!r} not a valid size') + return int(float(m.group(1)) * suffixes[m.group(2)]) + + +def _get_mem_available(): + """Return available memory in bytes, or None if unknown.""" + try: + import psutil + return psutil.virtual_memory().available + except (ImportError, AttributeError): + pass + + if sys.platform.startswith('linux'): + info = {} + with open('/proc/meminfo', 'r') as f: + for line in f: + p = line.split() + info[p[0].strip(':').lower()] = int(p[1]) * 1024 + + if 'memavailable' in info: + # Linux >= 3.14 + return info['memavailable'] + else: + return info['memfree'] + info['cached'] + + return None + + +def _no_tracing(func): + """ + Decorator to temporarily turn off tracing for the duration of a test. + Needed in tests that check refcounting, otherwise the tracing itself + influences the refcounts + """ + if not hasattr(sys, 'gettrace'): + return func + else: + @wraps(func) + def wrapper(*args, **kwargs): + original_trace = sys.gettrace() + try: + sys.settrace(None) + return func(*args, **kwargs) + finally: + sys.settrace(original_trace) + return wrapper diff --git a/.local/lib/python3.8/site-packages/numpy/testing/_private/utils.pyi b/.local/lib/python3.8/site-packages/numpy/testing/_private/utils.pyi new file mode 100644 index 0000000..4ba5d82 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/testing/_private/utils.pyi @@ -0,0 +1,400 @@ +import os +import sys +import ast +import types +import warnings +import unittest +import contextlib +from typing import ( + Literal as L, + Any, + AnyStr, + Callable, + ClassVar, + Dict, + Iterable, + List, + NoReturn, + overload, + Pattern, + Sequence, + Set, + Tuple, + Type, + type_check_only, + TypeVar, + Union, + Final, + SupportsIndex, +) + +from numpy import generic, dtype, number, object_, bool_, _FloatValue +from numpy.typing import ( + NDArray, + ArrayLike, + DTypeLike, + _ArrayLikeNumber_co, + _ArrayLikeObject_co, + _ArrayLikeTD64_co, + _ArrayLikeDT64_co, +) + +from unittest.case import ( + SkipTest as SkipTest, +) + +_T = TypeVar("_T") +_ET = TypeVar("_ET", bound=BaseException) +_FT = TypeVar("_FT", bound=Callable[..., Any]) + +# Must return a bool or an ndarray/generic type +# that is supported by `np.logical_and.reduce` +_ComparisonFunc = Callable[ + [NDArray[Any], NDArray[Any]], + Union[ + bool, + bool_, + number[Any], + NDArray[Union[bool_, number[Any], object_]], + ], +] + +__all__: List[str] + +class KnownFailureException(Exception): ... +class IgnoreException(Exception): ... + +class clear_and_catch_warnings(warnings.catch_warnings): + class_modules: ClassVar[Tuple[types.ModuleType, ...]] + modules: Set[types.ModuleType] + @overload + def __new__( + cls, + record: L[False] = ..., + modules: Iterable[types.ModuleType] = ..., + ) -> _clear_and_catch_warnings_without_records: ... + @overload + def __new__( + cls, + record: L[True], + modules: Iterable[types.ModuleType] = ..., + ) -> _clear_and_catch_warnings_with_records: ... + @overload + def __new__( + cls, + record: bool, + modules: Iterable[types.ModuleType] = ..., + ) -> clear_and_catch_warnings: ... + def __enter__(self) -> None | List[warnings.WarningMessage]: ... + def __exit__( + self, + __exc_type: None | Type[BaseException] = ..., + __exc_val: None | BaseException = ..., + __exc_tb: None | types.TracebackType = ..., + ) -> None: ... + +# Type-check only `clear_and_catch_warnings` subclasses for both values of the +# `record` parameter. Copied from the stdlib `warnings` stubs. + +@type_check_only +class _clear_and_catch_warnings_with_records(clear_and_catch_warnings): + def __enter__(self) -> List[warnings.WarningMessage]: ... + +@type_check_only +class _clear_and_catch_warnings_without_records(clear_and_catch_warnings): + def __enter__(self) -> None: ... + +class suppress_warnings: + log: List[warnings.WarningMessage] + def __init__( + self, + forwarding_rule: L["always", "module", "once", "location"] = ..., + ) -> None: ... + def filter( + self, + category: Type[Warning] = ..., + message: str = ..., + module: None | types.ModuleType = ..., + ) -> None: ... + def record( + self, + category: Type[Warning] = ..., + message: str = ..., + module: None | types.ModuleType = ..., + ) -> List[warnings.WarningMessage]: ... + def __enter__(self: _T) -> _T: ... + def __exit__( + self, + __exc_type: None | Type[BaseException] = ..., + __exc_val: None | BaseException = ..., + __exc_tb: None | types.TracebackType = ..., + ) -> None: ... + def __call__(self, func: _FT) -> _FT: ... + +verbose: int +IS_PYPY: Final[bool] +IS_PYSTON: Final[bool] +HAS_REFCOUNT: Final[bool] +HAS_LAPACK64: Final[bool] + +def assert_(val: object, msg: str | Callable[[], str] = ...) -> None: ... + +# Contrary to runtime we can't do `os.name` checks while type checking, +# only `sys.platform` checks +if sys.platform == "win32" or sys.platform == "cygwin": + def memusage(processName: str = ..., instance: int = ...) -> int: ... +elif sys.platform == "linux": + def memusage(_proc_pid_stat: str | bytes | os.PathLike[Any] = ...) -> None | int: ... +else: + def memusage() -> NoReturn: ... + +if sys.platform == "linux": + def jiffies( + _proc_pid_stat: str | bytes | os.PathLike[Any] = ..., + _load_time: List[float] = ..., + ) -> int: ... +else: + def jiffies(_load_time: List[float] = ...) -> int: ... + +def build_err_msg( + arrays: Iterable[object], + err_msg: str, + header: str = ..., + verbose: bool = ..., + names: Sequence[str] = ..., + precision: None | SupportsIndex = ..., +) -> str: ... + +def assert_equal( + actual: object, + desired: object, + err_msg: str = ..., + verbose: bool = ..., +) -> None: ... + +def print_assert_equal( + test_string: str, + actual: object, + desired: object, +) -> None: ... + +def assert_almost_equal( + actual: _ArrayLikeNumber_co | _ArrayLikeObject_co, + desired: _ArrayLikeNumber_co | _ArrayLikeObject_co, + decimal: int = ..., + err_msg: str = ..., + verbose: bool = ..., +) -> None: ... + +# Anything that can be coerced into `builtins.float` +def assert_approx_equal( + actual: _FloatValue, + desired: _FloatValue, + significant: int = ..., + err_msg: str = ..., + verbose: bool = ..., +) -> None: ... + +def assert_array_compare( + comparison: _ComparisonFunc, + x: ArrayLike, + y: ArrayLike, + err_msg: str = ..., + verbose: bool = ..., + header: str = ..., + precision: SupportsIndex = ..., + equal_nan: bool = ..., + equal_inf: bool = ..., +) -> None: ... + +def assert_array_equal( + x: ArrayLike, + y: ArrayLike, + err_msg: str = ..., + verbose: bool = ..., +) -> None: ... + +def assert_array_almost_equal( + x: _ArrayLikeNumber_co | _ArrayLikeObject_co, + y: _ArrayLikeNumber_co | _ArrayLikeObject_co, + decimal: float = ..., + err_msg: str = ..., + verbose: bool = ..., +) -> None: ... + +@overload +def assert_array_less( + x: _ArrayLikeNumber_co | _ArrayLikeObject_co, + y: _ArrayLikeNumber_co | _ArrayLikeObject_co, + err_msg: str = ..., + verbose: bool = ..., +) -> None: ... +@overload +def assert_array_less( + x: _ArrayLikeTD64_co, + y: _ArrayLikeTD64_co, + err_msg: str = ..., + verbose: bool = ..., +) -> None: ... +@overload +def assert_array_less( + x: _ArrayLikeDT64_co, + y: _ArrayLikeDT64_co, + err_msg: str = ..., + verbose: bool = ..., +) -> None: ... + +def runstring( + astr: str | bytes | types.CodeType, + dict: None | Dict[str, Any], +) -> Any: ... + +def assert_string_equal(actual: str, desired: str) -> None: ... + +def rundocs( + filename: None | str | os.PathLike[str] = ..., + raise_on_error: bool = ..., +) -> None: ... + +def raises(*args: Type[BaseException]) -> Callable[[_FT], _FT]: ... + +@overload +def assert_raises( # type: ignore + expected_exception: Type[BaseException] | Tuple[Type[BaseException], ...], + callable: Callable[..., Any], + /, + *args: Any, + **kwargs: Any, +) -> None: ... +@overload +def assert_raises( + expected_exception: Type[_ET] | Tuple[Type[_ET], ...], + *, + msg: None | str = ..., +) -> unittest.case._AssertRaisesContext[_ET]: ... + +@overload +def assert_raises_regex( + expected_exception: Type[BaseException] | Tuple[Type[BaseException], ...], + expected_regex: str | bytes | Pattern[Any], + callable: Callable[..., Any], + /, + *args: Any, + **kwargs: Any, +) -> None: ... +@overload +def assert_raises_regex( + expected_exception: Type[_ET] | Tuple[Type[_ET], ...], + expected_regex: str | bytes | Pattern[Any], + *, + msg: None | str = ..., +) -> unittest.case._AssertRaisesContext[_ET]: ... + +def decorate_methods( + cls: Type[Any], + decorator: Callable[[Callable[..., Any]], Any], + testmatch: None | str | bytes | Pattern[Any] = ..., +) -> None: ... + +def measure( + code_str: str | bytes | ast.mod | ast.AST, + times: int = ..., + label: None | str = ..., +) -> float: ... + +@overload +def assert_allclose( + actual: _ArrayLikeNumber_co | _ArrayLikeObject_co, + desired: _ArrayLikeNumber_co | _ArrayLikeObject_co, + rtol: float = ..., + atol: float = ..., + equal_nan: bool = ..., + err_msg: str = ..., + verbose: bool = ..., +) -> None: ... +@overload +def assert_allclose( + actual: _ArrayLikeTD64_co, + desired: _ArrayLikeTD64_co, + rtol: float = ..., + atol: float = ..., + equal_nan: bool = ..., + err_msg: str = ..., + verbose: bool = ..., +) -> None: ... + +def assert_array_almost_equal_nulp( + x: _ArrayLikeNumber_co, + y: _ArrayLikeNumber_co, + nulp: float = ..., +) -> None: ... + +def assert_array_max_ulp( + a: _ArrayLikeNumber_co, + b: _ArrayLikeNumber_co, + maxulp: float = ..., + dtype: DTypeLike = ..., +) -> NDArray[Any]: ... + +@overload +def assert_warns( + warning_class: Type[Warning], +) -> contextlib._GeneratorContextManager[None]: ... +@overload +def assert_warns( + warning_class: Type[Warning], + func: Callable[..., _T], + /, + *args: Any, + **kwargs: Any, +) -> _T: ... + +@overload +def assert_no_warnings() -> contextlib._GeneratorContextManager[None]: ... +@overload +def assert_no_warnings( + func: Callable[..., _T], + /, + *args: Any, + **kwargs: Any, +) -> _T: ... + +@overload +def tempdir( + suffix: None = ..., + prefix: None = ..., + dir: None = ..., +) -> contextlib._GeneratorContextManager[str]: ... +@overload +def tempdir( + suffix: None | AnyStr = ..., + prefix: None | AnyStr = ..., + dir: None | AnyStr | os.PathLike[AnyStr] = ..., +) -> contextlib._GeneratorContextManager[AnyStr]: ... + +@overload +def temppath( + suffix: None = ..., + prefix: None = ..., + dir: None = ..., + text: bool = ..., +) -> contextlib._GeneratorContextManager[str]: ... +@overload +def temppath( + suffix: None | AnyStr = ..., + prefix: None | AnyStr = ..., + dir: None | AnyStr | os.PathLike[AnyStr] = ..., + text: bool = ..., +) -> contextlib._GeneratorContextManager[AnyStr]: ... + +@overload +def assert_no_gc_cycles() -> contextlib._GeneratorContextManager[None]: ... +@overload +def assert_no_gc_cycles( + func: Callable[..., Any], + /, + *args: Any, + **kwargs: Any, +) -> None: ... + +def break_cycles() -> None: ... diff --git a/.local/lib/python3.8/site-packages/numpy/testing/print_coercion_tables.py b/.local/lib/python3.8/site-packages/numpy/testing/print_coercion_tables.py new file mode 100644 index 0000000..3a447cd --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/testing/print_coercion_tables.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +"""Prints type-coercion tables for the built-in NumPy types + +""" +import numpy as np +from collections import namedtuple + +# Generic object that can be added, but doesn't do anything else +class GenericObject: + def __init__(self, v): + self.v = v + + def __add__(self, other): + return self + + def __radd__(self, other): + return self + + dtype = np.dtype('O') + +def print_cancast_table(ntypes): + print('X', end=' ') + for char in ntypes: + print(char, end=' ') + print() + for row in ntypes: + print(row, end=' ') + for col in ntypes: + if np.can_cast(row, col, "equiv"): + cast = "#" + elif np.can_cast(row, col, "safe"): + cast = "=" + elif np.can_cast(row, col, "same_kind"): + cast = "~" + elif np.can_cast(row, col, "unsafe"): + cast = "." + else: + cast = " " + print(cast, end=' ') + print() + +def print_coercion_table(ntypes, inputfirstvalue, inputsecondvalue, firstarray, use_promote_types=False): + print('+', end=' ') + for char in ntypes: + print(char, end=' ') + print() + for row in ntypes: + if row == 'O': + rowtype = GenericObject + else: + rowtype = np.obj2sctype(row) + + print(row, end=' ') + for col in ntypes: + if col == 'O': + coltype = GenericObject + else: + coltype = np.obj2sctype(col) + try: + if firstarray: + rowvalue = np.array([rowtype(inputfirstvalue)], dtype=rowtype) + else: + rowvalue = rowtype(inputfirstvalue) + colvalue = coltype(inputsecondvalue) + if use_promote_types: + char = np.promote_types(rowvalue.dtype, colvalue.dtype).char + else: + value = np.add(rowvalue, colvalue) + if isinstance(value, np.ndarray): + char = value.dtype.char + else: + char = np.dtype(type(value)).char + except ValueError: + char = '!' + except OverflowError: + char = '@' + except TypeError: + char = '#' + print(char, end=' ') + print() + + +def print_new_cast_table(*, can_cast=True, legacy=False, flags=False): + """Prints new casts, the values given are default "can-cast" values, not + actual ones. + """ + from numpy.core._multiarray_tests import get_all_cast_information + + cast_table = { + 0 : "#", # No cast (classify as equivalent here) + 1 : "#", # equivalent casting + 2 : "=", # safe casting + 3 : "~", # same-kind casting + 4 : ".", # unsafe casting + } + flags_table = { + 0 : "▗", 7: "█", + 1: "▚", 2: "▐", 4: "▄", + 3: "▜", 5: "▙", + 6: "▟", + } + + cast_info = namedtuple("cast_info", ["can_cast", "legacy", "flags"]) + no_cast_info = cast_info(" ", " ", " ") + + casts = get_all_cast_information() + table = {} + dtypes = set() + for cast in casts: + dtypes.add(cast["from"]) + dtypes.add(cast["to"]) + + if cast["from"] not in table: + table[cast["from"]] = {} + to_dict = table[cast["from"]] + + can_cast = cast_table[cast["casting"]] + legacy = "L" if cast["legacy"] else "." + flags = 0 + if cast["requires_pyapi"]: + flags |= 1 + if cast["supports_unaligned"]: + flags |= 2 + if cast["no_floatingpoint_errors"]: + flags |= 4 + + flags = flags_table[flags] + to_dict[cast["to"]] = cast_info(can_cast=can_cast, legacy=legacy, flags=flags) + + # The np.dtype(x.type) is a bit strange, because dtype classes do + # not expose much yet. + types = np.typecodes["All"] + def sorter(x): + # This is a bit weird hack, to get a table as close as possible to + # the one printing all typecodes (but expecting user-dtypes). + dtype = np.dtype(x.type) + try: + indx = types.index(dtype.char) + except ValueError: + indx = np.inf + return (indx, dtype.char) + + dtypes = sorted(dtypes, key=sorter) + + def print_table(field="can_cast"): + print('X', end=' ') + for dt in dtypes: + print(np.dtype(dt.type).char, end=' ') + print() + for from_dt in dtypes: + print(np.dtype(from_dt.type).char, end=' ') + row = table.get(from_dt, {}) + for to_dt in dtypes: + print(getattr(row.get(to_dt, no_cast_info), field), end=' ') + print() + + if can_cast: + # Print the actual table: + print() + print("Casting: # is equivalent, = is safe, ~ is same-kind, and . is unsafe") + print() + print_table("can_cast") + + if legacy: + print() + print("L denotes a legacy cast . a non-legacy one.") + print() + print_table("legacy") + + if flags: + print() + print(f"{flags_table[0]}: no flags, {flags_table[1]}: PyAPI, " + f"{flags_table[2]}: supports unaligned, {flags_table[4]}: no-float-errors") + print() + print_table("flags") + + +if __name__ == '__main__': + print("can cast") + print_cancast_table(np.typecodes['All']) + print() + print("In these tables, ValueError is '!', OverflowError is '@', TypeError is '#'") + print() + print("scalar + scalar") + print_coercion_table(np.typecodes['All'], 0, 0, False) + print() + print("scalar + neg scalar") + print_coercion_table(np.typecodes['All'], 0, -1, False) + print() + print("array + scalar") + print_coercion_table(np.typecodes['All'], 0, 0, True) + print() + print("array + neg scalar") + print_coercion_table(np.typecodes['All'], 0, -1, True) + print() + print("promote_types") + print_coercion_table(np.typecodes['All'], 0, 0, False, True) + print("New casting type promotion:") + print_new_cast_table(can_cast=True, legacy=True, flags=True) diff --git a/.local/lib/python3.8/site-packages/numpy/testing/setup.py b/.local/lib/python3.8/site-packages/numpy/testing/setup.py new file mode 100644 index 0000000..6f203e8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/testing/setup.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('testing', parent_package, top_path) + + config.add_subpackage('_private') + config.add_subpackage('tests') + config.add_data_files('*.pyi') + config.add_data_files('_private/*.pyi') + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(maintainer="NumPy Developers", + maintainer_email="numpy-dev@numpy.org", + description="NumPy test module", + url="https://www.numpy.org", + license="NumPy License (BSD Style)", + configuration=configuration, + ) diff --git a/.local/lib/python3.8/site-packages/numpy/testing/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/testing/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/testing/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/testing/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..1422e6b Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/testing/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/testing/tests/__pycache__/test_doctesting.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/testing/tests/__pycache__/test_doctesting.cpython-38.pyc new file mode 100644 index 0000000..c86f6e6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/testing/tests/__pycache__/test_doctesting.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/testing/tests/__pycache__/test_utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/testing/tests/__pycache__/test_utils.cpython-38.pyc new file mode 100644 index 0000000..2278748 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/testing/tests/__pycache__/test_utils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/testing/tests/test_doctesting.py b/.local/lib/python3.8/site-packages/numpy/testing/tests/test_doctesting.py new file mode 100644 index 0000000..92c2156 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/testing/tests/test_doctesting.py @@ -0,0 +1,57 @@ +""" Doctests for NumPy-specific nose/doctest modifications + +""" +#FIXME: None of these tests is run, because 'check' is not a recognized +# testing prefix. + +# try the #random directive on the output line +def check_random_directive(): + ''' + >>> 2+2 + #random: may vary on your system + ''' + +# check the implicit "import numpy as np" +def check_implicit_np(): + ''' + >>> np.array([1,2,3]) + array([1, 2, 3]) + ''' + +# there's some extraneous whitespace around the correct responses +def check_whitespace_enabled(): + ''' + # whitespace after the 3 + >>> 1+2 + 3 + + # whitespace before the 7 + >>> 3+4 + 7 + ''' + +def check_empty_output(): + """ Check that no output does not cause an error. + + This is related to nose bug 445; the numpy plugin changed the + doctest-result-variable default and therefore hit this bug: + http://code.google.com/p/python-nose/issues/detail?id=445 + + >>> a = 10 + """ + +def check_skip(): + """ Check skip directive + + The test below should not run + + >>> 1/0 #doctest: +SKIP + """ + + +if __name__ == '__main__': + # Run tests outside numpy test rig + import nose + from numpy.testing.noseclasses import NumpyDoctest + argv = ['', __file__, '--with-numpydoctest'] + nose.core.TestProgram(argv=argv, addplugins=[NumpyDoctest()]) diff --git a/.local/lib/python3.8/site-packages/numpy/testing/tests/test_utils.py b/.local/lib/python3.8/site-packages/numpy/testing/tests/test_utils.py new file mode 100644 index 0000000..31d2cdc --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/testing/tests/test_utils.py @@ -0,0 +1,1614 @@ +import warnings +import sys +import os +import itertools +import pytest +import weakref + +import numpy as np +from numpy.testing import ( + assert_equal, assert_array_equal, assert_almost_equal, + assert_array_almost_equal, assert_array_less, build_err_msg, raises, + assert_raises, assert_warns, assert_no_warnings, assert_allclose, + assert_approx_equal, assert_array_almost_equal_nulp, assert_array_max_ulp, + clear_and_catch_warnings, suppress_warnings, assert_string_equal, assert_, + tempdir, temppath, assert_no_gc_cycles, HAS_REFCOUNT + ) +from numpy.core.overrides import ARRAY_FUNCTION_ENABLED + + +class _GenericTest: + + def _test_equal(self, a, b): + self._assert_func(a, b) + + def _test_not_equal(self, a, b): + with assert_raises(AssertionError): + self._assert_func(a, b) + + def test_array_rank1_eq(self): + """Test two equal array of rank 1 are found equal.""" + a = np.array([1, 2]) + b = np.array([1, 2]) + + self._test_equal(a, b) + + def test_array_rank1_noteq(self): + """Test two different array of rank 1 are found not equal.""" + a = np.array([1, 2]) + b = np.array([2, 2]) + + self._test_not_equal(a, b) + + def test_array_rank2_eq(self): + """Test two equal array of rank 2 are found equal.""" + a = np.array([[1, 2], [3, 4]]) + b = np.array([[1, 2], [3, 4]]) + + self._test_equal(a, b) + + def test_array_diffshape(self): + """Test two arrays with different shapes are found not equal.""" + a = np.array([1, 2]) + b = np.array([[1, 2], [1, 2]]) + + self._test_not_equal(a, b) + + def test_objarray(self): + """Test object arrays.""" + a = np.array([1, 1], dtype=object) + self._test_equal(a, 1) + + def test_array_likes(self): + self._test_equal([1, 2, 3], (1, 2, 3)) + + +class TestArrayEqual(_GenericTest): + + def setup(self): + self._assert_func = assert_array_equal + + def test_generic_rank1(self): + """Test rank 1 array for all dtypes.""" + def foo(t): + a = np.empty(2, t) + a.fill(1) + b = a.copy() + c = a.copy() + c.fill(0) + self._test_equal(a, b) + self._test_not_equal(c, b) + + # Test numeric types and object + for t in '?bhilqpBHILQPfdgFDG': + foo(t) + + # Test strings + for t in ['S1', 'U1']: + foo(t) + + def test_0_ndim_array(self): + x = np.array(473963742225900817127911193656584771) + y = np.array(18535119325151578301457182298393896) + assert_raises(AssertionError, self._assert_func, x, y) + + y = x + self._assert_func(x, y) + + x = np.array(43) + y = np.array(10) + assert_raises(AssertionError, self._assert_func, x, y) + + y = x + self._assert_func(x, y) + + def test_generic_rank3(self): + """Test rank 3 array for all dtypes.""" + def foo(t): + a = np.empty((4, 2, 3), t) + a.fill(1) + b = a.copy() + c = a.copy() + c.fill(0) + self._test_equal(a, b) + self._test_not_equal(c, b) + + # Test numeric types and object + for t in '?bhilqpBHILQPfdgFDG': + foo(t) + + # Test strings + for t in ['S1', 'U1']: + foo(t) + + def test_nan_array(self): + """Test arrays with nan values in them.""" + a = np.array([1, 2, np.nan]) + b = np.array([1, 2, np.nan]) + + self._test_equal(a, b) + + c = np.array([1, 2, 3]) + self._test_not_equal(c, b) + + def test_string_arrays(self): + """Test two arrays with different shapes are found not equal.""" + a = np.array(['floupi', 'floupa']) + b = np.array(['floupi', 'floupa']) + + self._test_equal(a, b) + + c = np.array(['floupipi', 'floupa']) + + self._test_not_equal(c, b) + + def test_recarrays(self): + """Test record arrays.""" + a = np.empty(2, [('floupi', float), ('floupa', float)]) + a['floupi'] = [1, 2] + a['floupa'] = [1, 2] + b = a.copy() + + self._test_equal(a, b) + + c = np.empty(2, [('floupipi', float), ('floupa', float)]) + c['floupipi'] = a['floupi'].copy() + c['floupa'] = a['floupa'].copy() + + with suppress_warnings() as sup: + l = sup.record(FutureWarning, message="elementwise == ") + self._test_not_equal(c, b) + assert_equal(len(l), 1) + + def test_masked_nan_inf(self): + # Regression test for gh-11121 + a = np.ma.MaskedArray([3., 4., 6.5], mask=[False, True, False]) + b = np.array([3., np.nan, 6.5]) + self._test_equal(a, b) + self._test_equal(b, a) + a = np.ma.MaskedArray([3., 4., 6.5], mask=[True, False, False]) + b = np.array([np.inf, 4., 6.5]) + self._test_equal(a, b) + self._test_equal(b, a) + + def test_subclass_that_overrides_eq(self): + # While we cannot guarantee testing functions will always work for + # subclasses, the tests should ideally rely only on subclasses having + # comparison operators, not on them being able to store booleans + # (which, e.g., astropy Quantity cannot usefully do). See gh-8452. + class MyArray(np.ndarray): + def __eq__(self, other): + return bool(np.equal(self, other).all()) + + def __ne__(self, other): + return not self == other + + a = np.array([1., 2.]).view(MyArray) + b = np.array([2., 3.]).view(MyArray) + assert_(type(a == a), bool) + assert_(a == a) + assert_(a != b) + self._test_equal(a, a) + self._test_not_equal(a, b) + self._test_not_equal(b, a) + + @pytest.mark.skipif( + not ARRAY_FUNCTION_ENABLED, reason='requires __array_function__') + def test_subclass_that_does_not_implement_npall(self): + class MyArray(np.ndarray): + def __array_function__(self, *args, **kwargs): + return NotImplemented + + a = np.array([1., 2.]).view(MyArray) + b = np.array([2., 3.]).view(MyArray) + with assert_raises(TypeError): + np.all(a) + self._test_equal(a, a) + self._test_not_equal(a, b) + self._test_not_equal(b, a) + + +class TestBuildErrorMessage: + + def test_build_err_msg_defaults(self): + x = np.array([1.00001, 2.00002, 3.00003]) + y = np.array([1.00002, 2.00003, 3.00004]) + err_msg = 'There is a mismatch' + + a = build_err_msg([x, y], err_msg) + b = ('\nItems are not equal: There is a mismatch\n ACTUAL: array([' + '1.00001, 2.00002, 3.00003])\n DESIRED: array([1.00002, ' + '2.00003, 3.00004])') + assert_equal(a, b) + + def test_build_err_msg_no_verbose(self): + x = np.array([1.00001, 2.00002, 3.00003]) + y = np.array([1.00002, 2.00003, 3.00004]) + err_msg = 'There is a mismatch' + + a = build_err_msg([x, y], err_msg, verbose=False) + b = '\nItems are not equal: There is a mismatch' + assert_equal(a, b) + + def test_build_err_msg_custom_names(self): + x = np.array([1.00001, 2.00002, 3.00003]) + y = np.array([1.00002, 2.00003, 3.00004]) + err_msg = 'There is a mismatch' + + a = build_err_msg([x, y], err_msg, names=('FOO', 'BAR')) + b = ('\nItems are not equal: There is a mismatch\n FOO: array([' + '1.00001, 2.00002, 3.00003])\n BAR: array([1.00002, 2.00003, ' + '3.00004])') + assert_equal(a, b) + + def test_build_err_msg_custom_precision(self): + x = np.array([1.000000001, 2.00002, 3.00003]) + y = np.array([1.000000002, 2.00003, 3.00004]) + err_msg = 'There is a mismatch' + + a = build_err_msg([x, y], err_msg, precision=10) + b = ('\nItems are not equal: There is a mismatch\n ACTUAL: array([' + '1.000000001, 2.00002 , 3.00003 ])\n DESIRED: array([' + '1.000000002, 2.00003 , 3.00004 ])') + assert_equal(a, b) + + +class TestEqual(TestArrayEqual): + + def setup(self): + self._assert_func = assert_equal + + def test_nan_items(self): + self._assert_func(np.nan, np.nan) + self._assert_func([np.nan], [np.nan]) + self._test_not_equal(np.nan, [np.nan]) + self._test_not_equal(np.nan, 1) + + def test_inf_items(self): + self._assert_func(np.inf, np.inf) + self._assert_func([np.inf], [np.inf]) + self._test_not_equal(np.inf, [np.inf]) + + def test_datetime(self): + self._test_equal( + np.datetime64("2017-01-01", "s"), + np.datetime64("2017-01-01", "s") + ) + self._test_equal( + np.datetime64("2017-01-01", "s"), + np.datetime64("2017-01-01", "m") + ) + + # gh-10081 + self._test_not_equal( + np.datetime64("2017-01-01", "s"), + np.datetime64("2017-01-02", "s") + ) + self._test_not_equal( + np.datetime64("2017-01-01", "s"), + np.datetime64("2017-01-02", "m") + ) + + def test_nat_items(self): + # not a datetime + nadt_no_unit = np.datetime64("NaT") + nadt_s = np.datetime64("NaT", "s") + nadt_d = np.datetime64("NaT", "ns") + # not a timedelta + natd_no_unit = np.timedelta64("NaT") + natd_s = np.timedelta64("NaT", "s") + natd_d = np.timedelta64("NaT", "ns") + + dts = [nadt_no_unit, nadt_s, nadt_d] + tds = [natd_no_unit, natd_s, natd_d] + for a, b in itertools.product(dts, dts): + self._assert_func(a, b) + self._assert_func([a], [b]) + self._test_not_equal([a], b) + + for a, b in itertools.product(tds, tds): + self._assert_func(a, b) + self._assert_func([a], [b]) + self._test_not_equal([a], b) + + for a, b in itertools.product(tds, dts): + self._test_not_equal(a, b) + self._test_not_equal(a, [b]) + self._test_not_equal([a], [b]) + self._test_not_equal([a], np.datetime64("2017-01-01", "s")) + self._test_not_equal([b], np.datetime64("2017-01-01", "s")) + self._test_not_equal([a], np.timedelta64(123, "s")) + self._test_not_equal([b], np.timedelta64(123, "s")) + + def test_non_numeric(self): + self._assert_func('ab', 'ab') + self._test_not_equal('ab', 'abb') + + def test_complex_item(self): + self._assert_func(complex(1, 2), complex(1, 2)) + self._assert_func(complex(1, np.nan), complex(1, np.nan)) + self._test_not_equal(complex(1, np.nan), complex(1, 2)) + self._test_not_equal(complex(np.nan, 1), complex(1, np.nan)) + self._test_not_equal(complex(np.nan, np.inf), complex(np.nan, 2)) + + def test_negative_zero(self): + self._test_not_equal(np.PZERO, np.NZERO) + + def test_complex(self): + x = np.array([complex(1, 2), complex(1, np.nan)]) + y = np.array([complex(1, 2), complex(1, 2)]) + self._assert_func(x, x) + self._test_not_equal(x, y) + + def test_object(self): + #gh-12942 + import datetime + a = np.array([datetime.datetime(2000, 1, 1), + datetime.datetime(2000, 1, 2)]) + self._test_not_equal(a, a[::-1]) + + +class TestArrayAlmostEqual(_GenericTest): + + def setup(self): + self._assert_func = assert_array_almost_equal + + def test_closeness(self): + # Note that in the course of time we ended up with + # `abs(x - y) < 1.5 * 10**(-decimal)` + # instead of the previously documented + # `abs(x - y) < 0.5 * 10**(-decimal)` + # so this check serves to preserve the wrongness. + + # test scalars + self._assert_func(1.499999, 0.0, decimal=0) + assert_raises(AssertionError, + lambda: self._assert_func(1.5, 0.0, decimal=0)) + + # test arrays + self._assert_func([1.499999], [0.0], decimal=0) + assert_raises(AssertionError, + lambda: self._assert_func([1.5], [0.0], decimal=0)) + + def test_simple(self): + x = np.array([1234.2222]) + y = np.array([1234.2223]) + + self._assert_func(x, y, decimal=3) + self._assert_func(x, y, decimal=4) + assert_raises(AssertionError, + lambda: self._assert_func(x, y, decimal=5)) + + def test_nan(self): + anan = np.array([np.nan]) + aone = np.array([1]) + ainf = np.array([np.inf]) + self._assert_func(anan, anan) + assert_raises(AssertionError, + lambda: self._assert_func(anan, aone)) + assert_raises(AssertionError, + lambda: self._assert_func(anan, ainf)) + assert_raises(AssertionError, + lambda: self._assert_func(ainf, anan)) + + def test_inf(self): + a = np.array([[1., 2.], [3., 4.]]) + b = a.copy() + a[0, 0] = np.inf + assert_raises(AssertionError, + lambda: self._assert_func(a, b)) + b[0, 0] = -np.inf + assert_raises(AssertionError, + lambda: self._assert_func(a, b)) + + def test_subclass(self): + a = np.array([[1., 2.], [3., 4.]]) + b = np.ma.masked_array([[1., 2.], [0., 4.]], + [[False, False], [True, False]]) + self._assert_func(a, b) + self._assert_func(b, a) + self._assert_func(b, b) + + # Test fully masked as well (see gh-11123). + a = np.ma.MaskedArray(3.5, mask=True) + b = np.array([3., 4., 6.5]) + self._test_equal(a, b) + self._test_equal(b, a) + a = np.ma.masked + b = np.array([3., 4., 6.5]) + self._test_equal(a, b) + self._test_equal(b, a) + a = np.ma.MaskedArray([3., 4., 6.5], mask=[True, True, True]) + b = np.array([1., 2., 3.]) + self._test_equal(a, b) + self._test_equal(b, a) + a = np.ma.MaskedArray([3., 4., 6.5], mask=[True, True, True]) + b = np.array(1.) + self._test_equal(a, b) + self._test_equal(b, a) + + def test_subclass_that_cannot_be_bool(self): + # While we cannot guarantee testing functions will always work for + # subclasses, the tests should ideally rely only on subclasses having + # comparison operators, not on them being able to store booleans + # (which, e.g., astropy Quantity cannot usefully do). See gh-8452. + class MyArray(np.ndarray): + def __eq__(self, other): + return super().__eq__(other).view(np.ndarray) + + def __lt__(self, other): + return super().__lt__(other).view(np.ndarray) + + def all(self, *args, **kwargs): + raise NotImplementedError + + a = np.array([1., 2.]).view(MyArray) + self._assert_func(a, a) + + +class TestAlmostEqual(_GenericTest): + + def setup(self): + self._assert_func = assert_almost_equal + + def test_closeness(self): + # Note that in the course of time we ended up with + # `abs(x - y) < 1.5 * 10**(-decimal)` + # instead of the previously documented + # `abs(x - y) < 0.5 * 10**(-decimal)` + # so this check serves to preserve the wrongness. + + # test scalars + self._assert_func(1.499999, 0.0, decimal=0) + assert_raises(AssertionError, + lambda: self._assert_func(1.5, 0.0, decimal=0)) + + # test arrays + self._assert_func([1.499999], [0.0], decimal=0) + assert_raises(AssertionError, + lambda: self._assert_func([1.5], [0.0], decimal=0)) + + def test_nan_item(self): + self._assert_func(np.nan, np.nan) + assert_raises(AssertionError, + lambda: self._assert_func(np.nan, 1)) + assert_raises(AssertionError, + lambda: self._assert_func(np.nan, np.inf)) + assert_raises(AssertionError, + lambda: self._assert_func(np.inf, np.nan)) + + def test_inf_item(self): + self._assert_func(np.inf, np.inf) + self._assert_func(-np.inf, -np.inf) + assert_raises(AssertionError, + lambda: self._assert_func(np.inf, 1)) + assert_raises(AssertionError, + lambda: self._assert_func(-np.inf, np.inf)) + + def test_simple_item(self): + self._test_not_equal(1, 2) + + def test_complex_item(self): + self._assert_func(complex(1, 2), complex(1, 2)) + self._assert_func(complex(1, np.nan), complex(1, np.nan)) + self._assert_func(complex(np.inf, np.nan), complex(np.inf, np.nan)) + self._test_not_equal(complex(1, np.nan), complex(1, 2)) + self._test_not_equal(complex(np.nan, 1), complex(1, np.nan)) + self._test_not_equal(complex(np.nan, np.inf), complex(np.nan, 2)) + + def test_complex(self): + x = np.array([complex(1, 2), complex(1, np.nan)]) + z = np.array([complex(1, 2), complex(np.nan, 1)]) + y = np.array([complex(1, 2), complex(1, 2)]) + self._assert_func(x, x) + self._test_not_equal(x, y) + self._test_not_equal(x, z) + + def test_error_message(self): + """Check the message is formatted correctly for the decimal value. + Also check the message when input includes inf or nan (gh12200)""" + x = np.array([1.00000000001, 2.00000000002, 3.00003]) + y = np.array([1.00000000002, 2.00000000003, 3.00004]) + + # Test with a different amount of decimal digits + with pytest.raises(AssertionError) as exc_info: + self._assert_func(x, y, decimal=12) + msgs = str(exc_info.value).split('\n') + assert_equal(msgs[3], 'Mismatched elements: 3 / 3 (100%)') + assert_equal(msgs[4], 'Max absolute difference: 1.e-05') + assert_equal(msgs[5], 'Max relative difference: 3.33328889e-06') + assert_equal( + msgs[6], + ' x: array([1.00000000001, 2.00000000002, 3.00003 ])') + assert_equal( + msgs[7], + ' y: array([1.00000000002, 2.00000000003, 3.00004 ])') + + # With the default value of decimal digits, only the 3rd element + # differs. Note that we only check for the formatting of the arrays + # themselves. + with pytest.raises(AssertionError) as exc_info: + self._assert_func(x, y) + msgs = str(exc_info.value).split('\n') + assert_equal(msgs[3], 'Mismatched elements: 1 / 3 (33.3%)') + assert_equal(msgs[4], 'Max absolute difference: 1.e-05') + assert_equal(msgs[5], 'Max relative difference: 3.33328889e-06') + assert_equal(msgs[6], ' x: array([1. , 2. , 3.00003])') + assert_equal(msgs[7], ' y: array([1. , 2. , 3.00004])') + + # Check the error message when input includes inf + x = np.array([np.inf, 0]) + y = np.array([np.inf, 1]) + with pytest.raises(AssertionError) as exc_info: + self._assert_func(x, y) + msgs = str(exc_info.value).split('\n') + assert_equal(msgs[3], 'Mismatched elements: 1 / 2 (50%)') + assert_equal(msgs[4], 'Max absolute difference: 1.') + assert_equal(msgs[5], 'Max relative difference: 1.') + assert_equal(msgs[6], ' x: array([inf, 0.])') + assert_equal(msgs[7], ' y: array([inf, 1.])') + + # Check the error message when dividing by zero + x = np.array([1, 2]) + y = np.array([0, 0]) + with pytest.raises(AssertionError) as exc_info: + self._assert_func(x, y) + msgs = str(exc_info.value).split('\n') + assert_equal(msgs[3], 'Mismatched elements: 2 / 2 (100%)') + assert_equal(msgs[4], 'Max absolute difference: 2') + assert_equal(msgs[5], 'Max relative difference: inf') + + def test_error_message_2(self): + """Check the message is formatted correctly when either x or y is a scalar.""" + x = 2 + y = np.ones(20) + with pytest.raises(AssertionError) as exc_info: + self._assert_func(x, y) + msgs = str(exc_info.value).split('\n') + assert_equal(msgs[3], 'Mismatched elements: 20 / 20 (100%)') + assert_equal(msgs[4], 'Max absolute difference: 1.') + assert_equal(msgs[5], 'Max relative difference: 1.') + + y = 2 + x = np.ones(20) + with pytest.raises(AssertionError) as exc_info: + self._assert_func(x, y) + msgs = str(exc_info.value).split('\n') + assert_equal(msgs[3], 'Mismatched elements: 20 / 20 (100%)') + assert_equal(msgs[4], 'Max absolute difference: 1.') + assert_equal(msgs[5], 'Max relative difference: 0.5') + + def test_subclass_that_cannot_be_bool(self): + # While we cannot guarantee testing functions will always work for + # subclasses, the tests should ideally rely only on subclasses having + # comparison operators, not on them being able to store booleans + # (which, e.g., astropy Quantity cannot usefully do). See gh-8452. + class MyArray(np.ndarray): + def __eq__(self, other): + return super().__eq__(other).view(np.ndarray) + + def __lt__(self, other): + return super().__lt__(other).view(np.ndarray) + + def all(self, *args, **kwargs): + raise NotImplementedError + + a = np.array([1., 2.]).view(MyArray) + self._assert_func(a, a) + + +class TestApproxEqual: + + def setup(self): + self._assert_func = assert_approx_equal + + def test_simple_0d_arrays(self): + x = np.array(1234.22) + y = np.array(1234.23) + + self._assert_func(x, y, significant=5) + self._assert_func(x, y, significant=6) + assert_raises(AssertionError, + lambda: self._assert_func(x, y, significant=7)) + + def test_simple_items(self): + x = 1234.22 + y = 1234.23 + + self._assert_func(x, y, significant=4) + self._assert_func(x, y, significant=5) + self._assert_func(x, y, significant=6) + assert_raises(AssertionError, + lambda: self._assert_func(x, y, significant=7)) + + def test_nan_array(self): + anan = np.array(np.nan) + aone = np.array(1) + ainf = np.array(np.inf) + self._assert_func(anan, anan) + assert_raises(AssertionError, lambda: self._assert_func(anan, aone)) + assert_raises(AssertionError, lambda: self._assert_func(anan, ainf)) + assert_raises(AssertionError, lambda: self._assert_func(ainf, anan)) + + def test_nan_items(self): + anan = np.array(np.nan) + aone = np.array(1) + ainf = np.array(np.inf) + self._assert_func(anan, anan) + assert_raises(AssertionError, lambda: self._assert_func(anan, aone)) + assert_raises(AssertionError, lambda: self._assert_func(anan, ainf)) + assert_raises(AssertionError, lambda: self._assert_func(ainf, anan)) + + +class TestArrayAssertLess: + + def setup(self): + self._assert_func = assert_array_less + + def test_simple_arrays(self): + x = np.array([1.1, 2.2]) + y = np.array([1.2, 2.3]) + + self._assert_func(x, y) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + y = np.array([1.0, 2.3]) + + assert_raises(AssertionError, lambda: self._assert_func(x, y)) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + def test_rank2(self): + x = np.array([[1.1, 2.2], [3.3, 4.4]]) + y = np.array([[1.2, 2.3], [3.4, 4.5]]) + + self._assert_func(x, y) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + y = np.array([[1.0, 2.3], [3.4, 4.5]]) + + assert_raises(AssertionError, lambda: self._assert_func(x, y)) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + def test_rank3(self): + x = np.ones(shape=(2, 2, 2)) + y = np.ones(shape=(2, 2, 2))+1 + + self._assert_func(x, y) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + y[0, 0, 0] = 0 + + assert_raises(AssertionError, lambda: self._assert_func(x, y)) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + def test_simple_items(self): + x = 1.1 + y = 2.2 + + self._assert_func(x, y) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + y = np.array([2.2, 3.3]) + + self._assert_func(x, y) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + y = np.array([1.0, 3.3]) + + assert_raises(AssertionError, lambda: self._assert_func(x, y)) + + def test_nan_noncompare(self): + anan = np.array(np.nan) + aone = np.array(1) + ainf = np.array(np.inf) + self._assert_func(anan, anan) + assert_raises(AssertionError, lambda: self._assert_func(aone, anan)) + assert_raises(AssertionError, lambda: self._assert_func(anan, aone)) + assert_raises(AssertionError, lambda: self._assert_func(anan, ainf)) + assert_raises(AssertionError, lambda: self._assert_func(ainf, anan)) + + def test_nan_noncompare_array(self): + x = np.array([1.1, 2.2, 3.3]) + anan = np.array(np.nan) + + assert_raises(AssertionError, lambda: self._assert_func(x, anan)) + assert_raises(AssertionError, lambda: self._assert_func(anan, x)) + + x = np.array([1.1, 2.2, np.nan]) + + assert_raises(AssertionError, lambda: self._assert_func(x, anan)) + assert_raises(AssertionError, lambda: self._assert_func(anan, x)) + + y = np.array([1.0, 2.0, np.nan]) + + self._assert_func(y, x) + assert_raises(AssertionError, lambda: self._assert_func(x, y)) + + def test_inf_compare(self): + aone = np.array(1) + ainf = np.array(np.inf) + + self._assert_func(aone, ainf) + self._assert_func(-ainf, aone) + self._assert_func(-ainf, ainf) + assert_raises(AssertionError, lambda: self._assert_func(ainf, aone)) + assert_raises(AssertionError, lambda: self._assert_func(aone, -ainf)) + assert_raises(AssertionError, lambda: self._assert_func(ainf, ainf)) + assert_raises(AssertionError, lambda: self._assert_func(ainf, -ainf)) + assert_raises(AssertionError, lambda: self._assert_func(-ainf, -ainf)) + + def test_inf_compare_array(self): + x = np.array([1.1, 2.2, np.inf]) + ainf = np.array(np.inf) + + assert_raises(AssertionError, lambda: self._assert_func(x, ainf)) + assert_raises(AssertionError, lambda: self._assert_func(ainf, x)) + assert_raises(AssertionError, lambda: self._assert_func(x, -ainf)) + assert_raises(AssertionError, lambda: self._assert_func(-x, -ainf)) + assert_raises(AssertionError, lambda: self._assert_func(-ainf, -x)) + self._assert_func(-ainf, x) + + +@pytest.mark.skip(reason="The raises decorator depends on Nose") +class TestRaises: + + def setup(self): + class MyException(Exception): + pass + + self.e = MyException + + def raises_exception(self, e): + raise e + + def does_not_raise_exception(self): + pass + + def test_correct_catch(self): + raises(self.e)(self.raises_exception)(self.e) # raises? + + def test_wrong_exception(self): + try: + raises(self.e)(self.raises_exception)(RuntimeError) # raises? + except RuntimeError: + return + else: + raise AssertionError("should have caught RuntimeError") + + def test_catch_no_raise(self): + try: + raises(self.e)(self.does_not_raise_exception)() # raises? + except AssertionError: + return + else: + raise AssertionError("should have raised an AssertionError") + + +class TestWarns: + + def test_warn(self): + def f(): + warnings.warn("yo") + return 3 + + before_filters = sys.modules['warnings'].filters[:] + assert_equal(assert_warns(UserWarning, f), 3) + after_filters = sys.modules['warnings'].filters + + assert_raises(AssertionError, assert_no_warnings, f) + assert_equal(assert_no_warnings(lambda x: x, 1), 1) + + # Check that the warnings state is unchanged + assert_equal(before_filters, after_filters, + "assert_warns does not preserver warnings state") + + def test_context_manager(self): + + before_filters = sys.modules['warnings'].filters[:] + with assert_warns(UserWarning): + warnings.warn("yo") + after_filters = sys.modules['warnings'].filters + + def no_warnings(): + with assert_no_warnings(): + warnings.warn("yo") + + assert_raises(AssertionError, no_warnings) + assert_equal(before_filters, after_filters, + "assert_warns does not preserver warnings state") + + def test_warn_wrong_warning(self): + def f(): + warnings.warn("yo", DeprecationWarning) + + failed = False + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + try: + # Should raise a DeprecationWarning + assert_warns(UserWarning, f) + failed = True + except DeprecationWarning: + pass + + if failed: + raise AssertionError("wrong warning caught by assert_warn") + + +class TestAssertAllclose: + + def test_simple(self): + x = 1e-3 + y = 1e-9 + + assert_allclose(x, y, atol=1) + assert_raises(AssertionError, assert_allclose, x, y) + + a = np.array([x, y, x, y]) + b = np.array([x, y, x, x]) + + assert_allclose(a, b, atol=1) + assert_raises(AssertionError, assert_allclose, a, b) + + b[-1] = y * (1 + 1e-8) + assert_allclose(a, b) + assert_raises(AssertionError, assert_allclose, a, b, rtol=1e-9) + + assert_allclose(6, 10, rtol=0.5) + assert_raises(AssertionError, assert_allclose, 10, 6, rtol=0.5) + + def test_min_int(self): + a = np.array([np.iinfo(np.int_).min], dtype=np.int_) + # Should not raise: + assert_allclose(a, a) + + def test_report_fail_percentage(self): + a = np.array([1, 1, 1, 1]) + b = np.array([1, 1, 1, 2]) + + with pytest.raises(AssertionError) as exc_info: + assert_allclose(a, b) + msg = str(exc_info.value) + assert_('Mismatched elements: 1 / 4 (25%)\n' + 'Max absolute difference: 1\n' + 'Max relative difference: 0.5' in msg) + + def test_equal_nan(self): + a = np.array([np.nan]) + b = np.array([np.nan]) + # Should not raise: + assert_allclose(a, b, equal_nan=True) + + def test_not_equal_nan(self): + a = np.array([np.nan]) + b = np.array([np.nan]) + assert_raises(AssertionError, assert_allclose, a, b, equal_nan=False) + + def test_equal_nan_default(self): + # Make sure equal_nan default behavior remains unchanged. (All + # of these functions use assert_array_compare under the hood.) + # None of these should raise. + a = np.array([np.nan]) + b = np.array([np.nan]) + assert_array_equal(a, b) + assert_array_almost_equal(a, b) + assert_array_less(a, b) + assert_allclose(a, b) + + def test_report_max_relative_error(self): + a = np.array([0, 1]) + b = np.array([0, 2]) + + with pytest.raises(AssertionError) as exc_info: + assert_allclose(a, b) + msg = str(exc_info.value) + assert_('Max relative difference: 0.5' in msg) + + def test_timedelta(self): + # see gh-18286 + a = np.array([[1, 2, 3, "NaT"]], dtype="m8[ns]") + assert_allclose(a, a) + + +class TestArrayAlmostEqualNulp: + + def test_float64_pass(self): + # The number of units of least precision + # In this case, use a few places above the lowest level (ie nulp=1) + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float64) + x = 10**x + x = np.r_[-x, x] + + # Addition + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp/2. + assert_array_almost_equal_nulp(x, y, nulp) + + # Subtraction + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp/2. + assert_array_almost_equal_nulp(x, y, nulp) + + def test_float64_fail(self): + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float64) + x = 10**x + x = np.r_[-x, x] + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + x, y, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + x, y, nulp) + + def test_float64_ignore_nan(self): + # Ignore ULP differences between various NAN's + # Note that MIPS may reverse quiet and signaling nans + # so we use the builtin version as a base. + offset = np.uint64(0xffffffff) + nan1_i64 = np.array(np.nan, dtype=np.float64).view(np.uint64) + nan2_i64 = nan1_i64 ^ offset # nan payload on MIPS is all ones. + nan1_f64 = nan1_i64.view(np.float64) + nan2_f64 = nan2_i64.view(np.float64) + assert_array_max_ulp(nan1_f64, nan2_f64, 0) + + def test_float32_pass(self): + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float32) + x = 10**x + x = np.r_[-x, x] + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp/2. + assert_array_almost_equal_nulp(x, y, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp/2. + assert_array_almost_equal_nulp(x, y, nulp) + + def test_float32_fail(self): + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float32) + x = 10**x + x = np.r_[-x, x] + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + x, y, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + x, y, nulp) + + def test_float32_ignore_nan(self): + # Ignore ULP differences between various NAN's + # Note that MIPS may reverse quiet and signaling nans + # so we use the builtin version as a base. + offset = np.uint32(0xffff) + nan1_i32 = np.array(np.nan, dtype=np.float32).view(np.uint32) + nan2_i32 = nan1_i32 ^ offset # nan payload on MIPS is all ones. + nan1_f32 = nan1_i32.view(np.float32) + nan2_f32 = nan2_i32.view(np.float32) + assert_array_max_ulp(nan1_f32, nan2_f32, 0) + + def test_float16_pass(self): + nulp = 5 + x = np.linspace(-4, 4, 10, dtype=np.float16) + x = 10**x + x = np.r_[-x, x] + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp/2. + assert_array_almost_equal_nulp(x, y, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp/2. + assert_array_almost_equal_nulp(x, y, nulp) + + def test_float16_fail(self): + nulp = 5 + x = np.linspace(-4, 4, 10, dtype=np.float16) + x = 10**x + x = np.r_[-x, x] + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + x, y, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + x, y, nulp) + + def test_float16_ignore_nan(self): + # Ignore ULP differences between various NAN's + # Note that MIPS may reverse quiet and signaling nans + # so we use the builtin version as a base. + offset = np.uint16(0xff) + nan1_i16 = np.array(np.nan, dtype=np.float16).view(np.uint16) + nan2_i16 = nan1_i16 ^ offset # nan payload on MIPS is all ones. + nan1_f16 = nan1_i16.view(np.float16) + nan2_f16 = nan2_i16.view(np.float16) + assert_array_max_ulp(nan1_f16, nan2_f16, 0) + + def test_complex128_pass(self): + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float64) + x = 10**x + x = np.r_[-x, x] + xi = x + x*1j + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp/2. + assert_array_almost_equal_nulp(xi, x + y*1j, nulp) + assert_array_almost_equal_nulp(xi, y + x*1j, nulp) + # The test condition needs to be at least a factor of sqrt(2) smaller + # because the real and imaginary parts both change + y = x + x*eps*nulp/4. + assert_array_almost_equal_nulp(xi, y + y*1j, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp/2. + assert_array_almost_equal_nulp(xi, x + y*1j, nulp) + assert_array_almost_equal_nulp(xi, y + x*1j, nulp) + y = x - x*epsneg*nulp/4. + assert_array_almost_equal_nulp(xi, y + y*1j, nulp) + + def test_complex128_fail(self): + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float64) + x = 10**x + x = np.r_[-x, x] + xi = x + x*1j + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, x + y*1j, nulp) + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + x*1j, nulp) + # The test condition needs to be at least a factor of sqrt(2) smaller + # because the real and imaginary parts both change + y = x + x*eps*nulp + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + y*1j, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, x + y*1j, nulp) + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + x*1j, nulp) + y = x - x*epsneg*nulp + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + y*1j, nulp) + + def test_complex64_pass(self): + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float32) + x = 10**x + x = np.r_[-x, x] + xi = x + x*1j + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp/2. + assert_array_almost_equal_nulp(xi, x + y*1j, nulp) + assert_array_almost_equal_nulp(xi, y + x*1j, nulp) + y = x + x*eps*nulp/4. + assert_array_almost_equal_nulp(xi, y + y*1j, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp/2. + assert_array_almost_equal_nulp(xi, x + y*1j, nulp) + assert_array_almost_equal_nulp(xi, y + x*1j, nulp) + y = x - x*epsneg*nulp/4. + assert_array_almost_equal_nulp(xi, y + y*1j, nulp) + + def test_complex64_fail(self): + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float32) + x = 10**x + x = np.r_[-x, x] + xi = x + x*1j + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, x + y*1j, nulp) + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + x*1j, nulp) + y = x + x*eps*nulp + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + y*1j, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, x + y*1j, nulp) + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + x*1j, nulp) + y = x - x*epsneg*nulp + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + y*1j, nulp) + + +class TestULP: + + def test_equal(self): + x = np.random.randn(10) + assert_array_max_ulp(x, x, maxulp=0) + + def test_single(self): + # Generate 1 + small deviation, check that adding eps gives a few UNL + x = np.ones(10).astype(np.float32) + x += 0.01 * np.random.randn(10).astype(np.float32) + eps = np.finfo(np.float32).eps + assert_array_max_ulp(x, x+eps, maxulp=20) + + def test_double(self): + # Generate 1 + small deviation, check that adding eps gives a few UNL + x = np.ones(10).astype(np.float64) + x += 0.01 * np.random.randn(10).astype(np.float64) + eps = np.finfo(np.float64).eps + assert_array_max_ulp(x, x+eps, maxulp=200) + + def test_inf(self): + for dt in [np.float32, np.float64]: + inf = np.array([np.inf]).astype(dt) + big = np.array([np.finfo(dt).max]) + assert_array_max_ulp(inf, big, maxulp=200) + + def test_nan(self): + # Test that nan is 'far' from small, tiny, inf, max and min + for dt in [np.float32, np.float64]: + if dt == np.float32: + maxulp = 1e6 + else: + maxulp = 1e12 + inf = np.array([np.inf]).astype(dt) + nan = np.array([np.nan]).astype(dt) + big = np.array([np.finfo(dt).max]) + tiny = np.array([np.finfo(dt).tiny]) + zero = np.array([np.PZERO]).astype(dt) + nzero = np.array([np.NZERO]).astype(dt) + assert_raises(AssertionError, + lambda: assert_array_max_ulp(nan, inf, + maxulp=maxulp)) + assert_raises(AssertionError, + lambda: assert_array_max_ulp(nan, big, + maxulp=maxulp)) + assert_raises(AssertionError, + lambda: assert_array_max_ulp(nan, tiny, + maxulp=maxulp)) + assert_raises(AssertionError, + lambda: assert_array_max_ulp(nan, zero, + maxulp=maxulp)) + assert_raises(AssertionError, + lambda: assert_array_max_ulp(nan, nzero, + maxulp=maxulp)) + + +class TestStringEqual: + def test_simple(self): + assert_string_equal("hello", "hello") + assert_string_equal("hello\nmultiline", "hello\nmultiline") + + with pytest.raises(AssertionError) as exc_info: + assert_string_equal("foo\nbar", "hello\nbar") + msg = str(exc_info.value) + assert_equal(msg, "Differences in strings:\n- foo\n+ hello") + + assert_raises(AssertionError, + lambda: assert_string_equal("foo", "hello")) + + def test_regex(self): + assert_string_equal("a+*b", "a+*b") + + assert_raises(AssertionError, + lambda: assert_string_equal("aaa", "a+b")) + + +def assert_warn_len_equal(mod, n_in_context, py34=None, py37=None): + try: + mod_warns = mod.__warningregistry__ + except AttributeError: + # the lack of a __warningregistry__ + # attribute means that no warning has + # occurred; this can be triggered in + # a parallel test scenario, while in + # a serial test scenario an initial + # warning (and therefore the attribute) + # are always created first + mod_warns = {} + + num_warns = len(mod_warns) + # Python 3.4 appears to clear any pre-existing warnings of the same type, + # when raising warnings inside a catch_warnings block. So, there is a + # warning generated by the tests within the context manager, but no + # previous warnings. + if 'version' in mod_warns: + # Python 3 adds a 'version' entry to the registry, + # do not count it. + num_warns -= 1 + + # Behavior of warnings is Python version dependent. Adjust the + # expected result to compensate. In particular, Python 3.7 does + # not make an entry for ignored warnings. + if sys.version_info[:2] >= (3, 7): + if py37 is not None: + n_in_context = py37 + else: + if py34 is not None: + n_in_context = py34 + assert_equal(num_warns, n_in_context) + +def test_warn_len_equal_call_scenarios(): + # assert_warn_len_equal is called under + # varying circumstances depending on serial + # vs. parallel test scenarios; this test + # simply aims to probe both code paths and + # check that no assertion is uncaught + + # parallel scenario -- no warning issued yet + class mod: + pass + + mod_inst = mod() + + assert_warn_len_equal(mod=mod_inst, + n_in_context=0) + + # serial test scenario -- the __warningregistry__ + # attribute should be present + class mod: + def __init__(self): + self.__warningregistry__ = {'warning1':1, + 'warning2':2} + + mod_inst = mod() + assert_warn_len_equal(mod=mod_inst, + n_in_context=2) + + +def _get_fresh_mod(): + # Get this module, with warning registry empty + my_mod = sys.modules[__name__] + try: + my_mod.__warningregistry__.clear() + except AttributeError: + # will not have a __warningregistry__ unless warning has been + # raised in the module at some point + pass + return my_mod + + +def test_clear_and_catch_warnings(): + # Initial state of module, no warnings + my_mod = _get_fresh_mod() + assert_equal(getattr(my_mod, '__warningregistry__', {}), {}) + with clear_and_catch_warnings(modules=[my_mod]): + warnings.simplefilter('ignore') + warnings.warn('Some warning') + assert_equal(my_mod.__warningregistry__, {}) + # Without specified modules, don't clear warnings during context + # Python 3.7 catch_warnings doesn't make an entry for 'ignore'. + with clear_and_catch_warnings(): + warnings.simplefilter('ignore') + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 1, py37=0) + # Confirm that specifying module keeps old warning, does not add new + with clear_and_catch_warnings(modules=[my_mod]): + warnings.simplefilter('ignore') + warnings.warn('Another warning') + assert_warn_len_equal(my_mod, 1, py37=0) + # Another warning, no module spec does add to warnings dict, except on + # Python 3.4 (see comments in `assert_warn_len_equal`) + # Python 3.7 catch_warnings doesn't make an entry for 'ignore'. + with clear_and_catch_warnings(): + warnings.simplefilter('ignore') + warnings.warn('Another warning') + assert_warn_len_equal(my_mod, 2, py34=1, py37=0) + + +def test_suppress_warnings_module(): + # Initial state of module, no warnings + my_mod = _get_fresh_mod() + assert_equal(getattr(my_mod, '__warningregistry__', {}), {}) + + def warn_other_module(): + # Apply along axis is implemented in python; stacklevel=2 means + # we end up inside its module, not ours. + def warn(arr): + warnings.warn("Some warning 2", stacklevel=2) + return arr + np.apply_along_axis(warn, 0, [0]) + + # Test module based warning suppression: + assert_warn_len_equal(my_mod, 0) + with suppress_warnings() as sup: + sup.record(UserWarning) + # suppress warning from other module (may have .pyc ending), + # if apply_along_axis is moved, had to be changed. + sup.filter(module=np.lib.shape_base) + warnings.warn("Some warning") + warn_other_module() + # Check that the suppression did test the file correctly (this module + # got filtered) + assert_equal(len(sup.log), 1) + assert_equal(sup.log[0].message.args[0], "Some warning") + assert_warn_len_equal(my_mod, 0, py37=0) + sup = suppress_warnings() + # Will have to be changed if apply_along_axis is moved: + sup.filter(module=my_mod) + with sup: + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 0) + # And test repeat works: + sup.filter(module=my_mod) + with sup: + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 0) + + # Without specified modules, don't clear warnings during context + # Python 3.7 does not add ignored warnings. + with suppress_warnings(): + warnings.simplefilter('ignore') + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 1, py37=0) + +def test_suppress_warnings_type(): + # Initial state of module, no warnings + my_mod = _get_fresh_mod() + assert_equal(getattr(my_mod, '__warningregistry__', {}), {}) + + # Test module based warning suppression: + with suppress_warnings() as sup: + sup.filter(UserWarning) + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 0) + sup = suppress_warnings() + sup.filter(UserWarning) + with sup: + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 0) + # And test repeat works: + sup.filter(module=my_mod) + with sup: + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 0) + + # Without specified modules, don't clear warnings during context + # Python 3.7 does not add ignored warnings. + with suppress_warnings(): + warnings.simplefilter('ignore') + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 1, py37=0) + + +def test_suppress_warnings_decorate_no_record(): + sup = suppress_warnings() + sup.filter(UserWarning) + + @sup + def warn(category): + warnings.warn('Some warning', category) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + warn(UserWarning) # should be supppressed + warn(RuntimeWarning) + assert_equal(len(w), 1) + + +def test_suppress_warnings_record(): + sup = suppress_warnings() + log1 = sup.record() + + with sup: + log2 = sup.record(message='Some other warning 2') + sup.filter(message='Some warning') + warnings.warn('Some warning') + warnings.warn('Some other warning') + warnings.warn('Some other warning 2') + + assert_equal(len(sup.log), 2) + assert_equal(len(log1), 1) + assert_equal(len(log2),1) + assert_equal(log2[0].message.args[0], 'Some other warning 2') + + # Do it again, with the same context to see if some warnings survived: + with sup: + log2 = sup.record(message='Some other warning 2') + sup.filter(message='Some warning') + warnings.warn('Some warning') + warnings.warn('Some other warning') + warnings.warn('Some other warning 2') + + assert_equal(len(sup.log), 2) + assert_equal(len(log1), 1) + assert_equal(len(log2), 1) + assert_equal(log2[0].message.args[0], 'Some other warning 2') + + # Test nested: + with suppress_warnings() as sup: + sup.record() + with suppress_warnings() as sup2: + sup2.record(message='Some warning') + warnings.warn('Some warning') + warnings.warn('Some other warning') + assert_equal(len(sup2.log), 1) + assert_equal(len(sup.log), 1) + + +def test_suppress_warnings_forwarding(): + def warn_other_module(): + # Apply along axis is implemented in python; stacklevel=2 means + # we end up inside its module, not ours. + def warn(arr): + warnings.warn("Some warning", stacklevel=2) + return arr + np.apply_along_axis(warn, 0, [0]) + + with suppress_warnings() as sup: + sup.record() + with suppress_warnings("always"): + for i in range(2): + warnings.warn("Some warning") + + assert_equal(len(sup.log), 2) + + with suppress_warnings() as sup: + sup.record() + with suppress_warnings("location"): + for i in range(2): + warnings.warn("Some warning") + warnings.warn("Some warning") + + assert_equal(len(sup.log), 2) + + with suppress_warnings() as sup: + sup.record() + with suppress_warnings("module"): + for i in range(2): + warnings.warn("Some warning") + warnings.warn("Some warning") + warn_other_module() + + assert_equal(len(sup.log), 2) + + with suppress_warnings() as sup: + sup.record() + with suppress_warnings("once"): + for i in range(2): + warnings.warn("Some warning") + warnings.warn("Some other warning") + warn_other_module() + + assert_equal(len(sup.log), 2) + + +def test_tempdir(): + with tempdir() as tdir: + fpath = os.path.join(tdir, 'tmp') + with open(fpath, 'w'): + pass + assert_(not os.path.isdir(tdir)) + + raised = False + try: + with tempdir() as tdir: + raise ValueError() + except ValueError: + raised = True + assert_(raised) + assert_(not os.path.isdir(tdir)) + + +def test_temppath(): + with temppath() as fpath: + with open(fpath, 'w'): + pass + assert_(not os.path.isfile(fpath)) + + raised = False + try: + with temppath() as fpath: + raise ValueError() + except ValueError: + raised = True + assert_(raised) + assert_(not os.path.isfile(fpath)) + + +class my_cacw(clear_and_catch_warnings): + + class_modules = (sys.modules[__name__],) + + +def test_clear_and_catch_warnings_inherit(): + # Test can subclass and add default modules + my_mod = _get_fresh_mod() + with my_cacw(): + warnings.simplefilter('ignore') + warnings.warn('Some warning') + assert_equal(my_mod.__warningregistry__, {}) + + +@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") +class TestAssertNoGcCycles: + """ Test assert_no_gc_cycles """ + def test_passes(self): + def no_cycle(): + b = [] + b.append([]) + return b + + with assert_no_gc_cycles(): + no_cycle() + + assert_no_gc_cycles(no_cycle) + + def test_asserts(self): + def make_cycle(): + a = [] + a.append(a) + a.append(a) + return a + + with assert_raises(AssertionError): + with assert_no_gc_cycles(): + make_cycle() + + with assert_raises(AssertionError): + assert_no_gc_cycles(make_cycle) + + @pytest.mark.slow + def test_fails(self): + """ + Test that in cases where the garbage cannot be collected, we raise an + error, instead of hanging forever trying to clear it. + """ + + class ReferenceCycleInDel: + """ + An object that not only contains a reference cycle, but creates new + cycles whenever it's garbage-collected and its __del__ runs + """ + make_cycle = True + + def __init__(self): + self.cycle = self + + def __del__(self): + # break the current cycle so that `self` can be freed + self.cycle = None + + if ReferenceCycleInDel.make_cycle: + # but create a new one so that the garbage collector has more + # work to do. + ReferenceCycleInDel() + + try: + w = weakref.ref(ReferenceCycleInDel()) + try: + with assert_raises(RuntimeError): + # this will be unable to get a baseline empty garbage + assert_no_gc_cycles(lambda: None) + except AssertionError: + # the above test is only necessary if the GC actually tried to free + # our object anyway, which python 2.7 does not. + if w() is not None: + pytest.skip("GC does not call __del__ on cyclic objects") + raise + + finally: + # make sure that we stop creating reference cycles + ReferenceCycleInDel.make_cycle = False diff --git a/.local/lib/python3.8/site-packages/numpy/testing/utils.py b/.local/lib/python3.8/site-packages/numpy/testing/utils.py new file mode 100644 index 0000000..20a8833 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/testing/utils.py @@ -0,0 +1,29 @@ +""" +Back compatibility utils module. It will import the appropriate +set of tools + +""" +import warnings + +# 2018-04-04, numpy 1.15.0 ImportWarning +# 2019-09-18, numpy 1.18.0 DeprecatonWarning (changed) +warnings.warn("Importing from numpy.testing.utils is deprecated " + "since 1.15.0, import from numpy.testing instead.", + DeprecationWarning, stacklevel=2) + +from ._private.utils import * +from ._private.utils import _assert_valid_refcount, _gen_alignment_data + +__all__ = [ + 'assert_equal', 'assert_almost_equal', 'assert_approx_equal', + 'assert_array_equal', 'assert_array_less', 'assert_string_equal', + 'assert_array_almost_equal', 'assert_raises', 'build_err_msg', + 'decorate_methods', 'jiffies', 'memusage', 'print_assert_equal', + 'raises', 'rundocs', 'runstring', 'verbose', 'measure', + 'assert_', 'assert_array_almost_equal_nulp', 'assert_raises_regex', + 'assert_array_max_ulp', 'assert_warns', 'assert_no_warnings', + 'assert_allclose', 'IgnoreException', 'clear_and_catch_warnings', + 'SkipTest', 'KnownFailureException', 'temppath', 'tempdir', 'IS_PYPY', + 'HAS_REFCOUNT', 'suppress_warnings', 'assert_array_compare', + 'assert_no_gc_cycles' + ] diff --git a/.local/lib/python3.8/site-packages/numpy/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ddb09ba Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test__all__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test__all__.cpython-38.pyc new file mode 100644 index 0000000..dbf3beb Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test__all__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_ctypeslib.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_ctypeslib.cpython-38.pyc new file mode 100644 index 0000000..135ca99 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_ctypeslib.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_matlib.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_matlib.cpython-38.pyc new file mode 100644 index 0000000..b0ae095 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_matlib.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_numpy_version.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_numpy_version.cpython-38.pyc new file mode 100644 index 0000000..8823af4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_numpy_version.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_public_api.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_public_api.cpython-38.pyc new file mode 100644 index 0000000..86cd9c2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_public_api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_reloading.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_reloading.cpython-38.pyc new file mode 100644 index 0000000..3114613 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_reloading.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_scripts.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_scripts.cpython-38.pyc new file mode 100644 index 0000000..ea40cea Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_scripts.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_warnings.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_warnings.cpython-38.pyc new file mode 100644 index 0000000..f05a8b5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/tests/__pycache__/test_warnings.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/tests/test__all__.py b/.local/lib/python3.8/site-packages/numpy/tests/test__all__.py new file mode 100644 index 0000000..e44bda3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/tests/test__all__.py @@ -0,0 +1,9 @@ + +import collections +import numpy as np + + +def test_no_duplicates_in_np__all__(): + # Regression test for gh-10198. + dups = {k: v for k, v in collections.Counter(np.__all__).items() if v > 1} + assert len(dups) == 0 diff --git a/.local/lib/python3.8/site-packages/numpy/tests/test_ctypeslib.py b/.local/lib/python3.8/site-packages/numpy/tests/test_ctypeslib.py new file mode 100644 index 0000000..1ea0837 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/tests/test_ctypeslib.py @@ -0,0 +1,368 @@ +import sys +import pytest +import weakref +from pathlib import Path + +import numpy as np +from numpy.ctypeslib import ndpointer, load_library, as_array +from numpy.distutils.misc_util import get_shared_lib_extension +from numpy.testing import assert_, assert_array_equal, assert_raises, assert_equal + +try: + import ctypes +except ImportError: + ctypes = None +else: + cdll = None + test_cdll = None + if hasattr(sys, 'gettotalrefcount'): + try: + cdll = load_library('_multiarray_umath_d', np.core._multiarray_umath.__file__) + except OSError: + pass + try: + test_cdll = load_library('_multiarray_tests', np.core._multiarray_tests.__file__) + except OSError: + pass + if cdll is None: + cdll = load_library('_multiarray_umath', np.core._multiarray_umath.__file__) + if test_cdll is None: + test_cdll = load_library('_multiarray_tests', np.core._multiarray_tests.__file__) + + c_forward_pointer = test_cdll.forward_pointer + + +@pytest.mark.skipif(ctypes is None, + reason="ctypes not available in this python") +@pytest.mark.skipif(sys.platform == 'cygwin', + reason="Known to fail on cygwin") +class TestLoadLibrary: + def test_basic(self): + loader_path = np.core._multiarray_umath.__file__ + + out1 = load_library('_multiarray_umath', loader_path) + out2 = load_library(Path('_multiarray_umath'), loader_path) + out3 = load_library('_multiarray_umath', Path(loader_path)) + out4 = load_library(b'_multiarray_umath', loader_path) + + assert isinstance(out1, ctypes.CDLL) + assert out1 is out2 is out3 is out4 + + def test_basic2(self): + # Regression for #801: load_library with a full library name + # (including extension) does not work. + try: + try: + so = get_shared_lib_extension(is_python_ext=True) + # Should succeed + load_library('_multiarray_umath%s' % so, np.core._multiarray_umath.__file__) + except ImportError: + print("No distutils available, skipping test.") + except ImportError as e: + msg = ("ctypes is not available on this python: skipping the test" + " (import error was: %s)" % str(e)) + print(msg) + + +class TestNdpointer: + def test_dtype(self): + dt = np.intc + p = ndpointer(dtype=dt) + assert_(p.from_param(np.array([1], dt))) + dt = 'i4') + p = ndpointer(dtype=dt) + p.from_param(np.array([1], dt)) + assert_raises(TypeError, p.from_param, + np.array([1], dt.newbyteorder('swap'))) + dtnames = ['x', 'y'] + dtformats = [np.intc, np.float64] + dtdescr = {'names': dtnames, 'formats': dtformats} + dt = np.dtype(dtdescr) + p = ndpointer(dtype=dt) + assert_(p.from_param(np.zeros((10,), dt))) + samedt = np.dtype(dtdescr) + p = ndpointer(dtype=samedt) + assert_(p.from_param(np.zeros((10,), dt))) + dt2 = np.dtype(dtdescr, align=True) + if dt.itemsize != dt2.itemsize: + assert_raises(TypeError, p.from_param, np.zeros((10,), dt2)) + else: + assert_(p.from_param(np.zeros((10,), dt2))) + + def test_ndim(self): + p = ndpointer(ndim=0) + assert_(p.from_param(np.array(1))) + assert_raises(TypeError, p.from_param, np.array([1])) + p = ndpointer(ndim=1) + assert_raises(TypeError, p.from_param, np.array(1)) + assert_(p.from_param(np.array([1]))) + p = ndpointer(ndim=2) + assert_(p.from_param(np.array([[1]]))) + + def test_shape(self): + p = ndpointer(shape=(1, 2)) + assert_(p.from_param(np.array([[1, 2]]))) + assert_raises(TypeError, p.from_param, np.array([[1], [2]])) + p = ndpointer(shape=()) + assert_(p.from_param(np.array(1))) + + def test_flags(self): + x = np.array([[1, 2], [3, 4]], order='F') + p = ndpointer(flags='FORTRAN') + assert_(p.from_param(x)) + p = ndpointer(flags='CONTIGUOUS') + assert_raises(TypeError, p.from_param, x) + p = ndpointer(flags=x.flags.num) + assert_(p.from_param(x)) + assert_raises(TypeError, p.from_param, np.array([[1, 2], [3, 4]])) + + def test_cache(self): + assert_(ndpointer(dtype=np.float64) is ndpointer(dtype=np.float64)) + + # shapes are normalized + assert_(ndpointer(shape=2) is ndpointer(shape=(2,))) + + # 1.12 <= v < 1.16 had a bug that made these fail + assert_(ndpointer(shape=2) is not ndpointer(ndim=2)) + assert_(ndpointer(ndim=2) is not ndpointer(shape=2)) + +@pytest.mark.skipif(ctypes is None, + reason="ctypes not available on this python installation") +class TestNdpointerCFunc: + def test_arguments(self): + """ Test that arguments are coerced from arrays """ + c_forward_pointer.restype = ctypes.c_void_p + c_forward_pointer.argtypes = (ndpointer(ndim=2),) + + c_forward_pointer(np.zeros((2, 3))) + # too many dimensions + assert_raises( + ctypes.ArgumentError, c_forward_pointer, np.zeros((2, 3, 4))) + + @pytest.mark.parametrize( + 'dt', [ + float, + np.dtype(dict( + formats=['u2') + ct = np.ctypeslib.as_ctypes_type(dt) + assert_equal(ct, ctypes.c_uint16.__ctype_be__) + + dt = np.dtype('u2') + ct = np.ctypeslib.as_ctypes_type(dt) + assert_equal(ct, ctypes.c_uint16) + + def test_subarray(self): + dt = np.dtype((np.int32, (2, 3))) + ct = np.ctypeslib.as_ctypes_type(dt) + assert_equal(ct, 2 * (3 * ctypes.c_int32)) + + def test_structure(self): + dt = np.dtype([ + ('a', np.uint16), + ('b', np.uint32), + ]) + + ct = np.ctypeslib.as_ctypes_type(dt) + assert_(issubclass(ct, ctypes.Structure)) + assert_equal(ctypes.sizeof(ct), dt.itemsize) + assert_equal(ct._fields_, [ + ('a', ctypes.c_uint16), + ('b', ctypes.c_uint32), + ]) + + def test_structure_aligned(self): + dt = np.dtype([ + ('a', np.uint16), + ('b', np.uint32), + ], align=True) + + ct = np.ctypeslib.as_ctypes_type(dt) + assert_(issubclass(ct, ctypes.Structure)) + assert_equal(ctypes.sizeof(ct), dt.itemsize) + assert_equal(ct._fields_, [ + ('a', ctypes.c_uint16), + ('', ctypes.c_char * 2), # padding + ('b', ctypes.c_uint32), + ]) + + def test_union(self): + dt = np.dtype(dict( + names=['a', 'b'], + offsets=[0, 0], + formats=[np.uint16, np.uint32] + )) + + ct = np.ctypeslib.as_ctypes_type(dt) + assert_(issubclass(ct, ctypes.Union)) + assert_equal(ctypes.sizeof(ct), dt.itemsize) + assert_equal(ct._fields_, [ + ('a', ctypes.c_uint16), + ('b', ctypes.c_uint32), + ]) + + def test_padded_union(self): + dt = np.dtype(dict( + names=['a', 'b'], + offsets=[0, 0], + formats=[np.uint16, np.uint32], + itemsize=5, + )) + + ct = np.ctypeslib.as_ctypes_type(dt) + assert_(issubclass(ct, ctypes.Union)) + assert_equal(ctypes.sizeof(ct), dt.itemsize) + assert_equal(ct._fields_, [ + ('a', ctypes.c_uint16), + ('b', ctypes.c_uint32), + ('', ctypes.c_char * 5), # padding + ]) + + def test_overlapping(self): + dt = np.dtype(dict( + names=['a', 'b'], + offsets=[0, 2], + formats=[np.uint32, np.uint32] + )) + assert_raises(NotImplementedError, np.ctypeslib.as_ctypes_type, dt) diff --git a/.local/lib/python3.8/site-packages/numpy/tests/test_matlib.py b/.local/lib/python3.8/site-packages/numpy/tests/test_matlib.py new file mode 100644 index 0000000..0e93c48 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/tests/test_matlib.py @@ -0,0 +1,58 @@ +import numpy as np +import numpy.matlib +from numpy.testing import assert_array_equal, assert_ + +def test_empty(): + x = numpy.matlib.empty((2,)) + assert_(isinstance(x, np.matrix)) + assert_(x.shape, (1, 2)) + +def test_ones(): + assert_array_equal(numpy.matlib.ones((2, 3)), + np.matrix([[ 1., 1., 1.], + [ 1., 1., 1.]])) + + assert_array_equal(numpy.matlib.ones(2), np.matrix([[ 1., 1.]])) + +def test_zeros(): + assert_array_equal(numpy.matlib.zeros((2, 3)), + np.matrix([[ 0., 0., 0.], + [ 0., 0., 0.]])) + + assert_array_equal(numpy.matlib.zeros(2), np.matrix([[ 0., 0.]])) + +def test_identity(): + x = numpy.matlib.identity(2, dtype=int) + assert_array_equal(x, np.matrix([[1, 0], [0, 1]])) + +def test_eye(): + xc = numpy.matlib.eye(3, k=1, dtype=int) + assert_array_equal(xc, np.matrix([[ 0, 1, 0], + [ 0, 0, 1], + [ 0, 0, 0]])) + assert xc.flags.c_contiguous + assert not xc.flags.f_contiguous + + xf = numpy.matlib.eye(3, 4, dtype=int, order='F') + assert_array_equal(xf, np.matrix([[ 1, 0, 0, 0], + [ 0, 1, 0, 0], + [ 0, 0, 1, 0]])) + assert not xf.flags.c_contiguous + assert xf.flags.f_contiguous + +def test_rand(): + x = numpy.matlib.rand(3) + # check matrix type, array would have shape (3,) + assert_(x.ndim == 2) + +def test_randn(): + x = np.matlib.randn(3) + # check matrix type, array would have shape (3,) + assert_(x.ndim == 2) + +def test_repmat(): + a1 = np.arange(4) + x = numpy.matlib.repmat(a1, 2, 2) + y = np.array([[0, 1, 2, 3, 0, 1, 2, 3], + [0, 1, 2, 3, 0, 1, 2, 3]]) + assert_array_equal(x, y) diff --git a/.local/lib/python3.8/site-packages/numpy/tests/test_numpy_version.py b/.local/lib/python3.8/site-packages/numpy/tests/test_numpy_version.py new file mode 100644 index 0000000..bccbcb8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/tests/test_numpy_version.py @@ -0,0 +1,44 @@ +""" +Check the numpy version is valid. + +Note that a development version is marked by the presence of 'dev0' or '+' +in the version string, all else is treated as a release. The version string +itself is set from the output of ``git describe`` which relies on tags. + +Examples +-------- + +Valid Development: 1.22.0.dev0 1.22.0.dev0+5-g7999db4df2 1.22.0+5-g7999db4df2 +Valid Release: 1.21.0.rc1, 1.21.0.b1, 1.21.0 +Invalid: 1.22.0.dev, 1.22.0.dev0-5-g7999db4dfB, 1.21.0.d1, 1.21.a + +Note that a release is determined by the version string, which in turn +is controlled by the result of the ``git describe`` command. +""" +import re + +import numpy as np +from numpy.testing import assert_ + + +def test_valid_numpy_version(): + # Verify that the numpy version is a valid one (no .post suffix or other + # nonsense). See gh-6431 for an issue caused by an invalid version. + version_pattern = r"^[0-9]+\.[0-9]+\.[0-9]+(a[0-9]|b[0-9]|rc[0-9]|)" + dev_suffix = r"(\.dev0|)(\+[0-9]*\.g[0-9a-f]+|)" + if np.version.release: + res = re.match(version_pattern + '$', np.__version__) + else: + res = re.match(version_pattern + dev_suffix + '$', np.__version__) + + assert_(res is not None, np.__version__) + + +def test_short_version(): + # Check numpy.short_version actually exists + if np.version.release: + assert_(np.__version__ == np.version.short_version, + "short_version mismatch in release version") + else: + assert_(np.__version__.split("+")[0] == np.version.short_version, + "short_version mismatch in development version") diff --git a/.local/lib/python3.8/site-packages/numpy/tests/test_public_api.py b/.local/lib/python3.8/site-packages/numpy/tests/test_public_api.py new file mode 100644 index 0000000..bb15e10 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/tests/test_public_api.py @@ -0,0 +1,501 @@ +import sys +import sysconfig +import subprocess +import pkgutil +import types +import importlib +import warnings + +import numpy as np +import numpy +import pytest + +try: + import ctypes +except ImportError: + ctypes = None + + +def check_dir(module, module_name=None): + """Returns a mapping of all objects with the wrong __module__ attribute.""" + if module_name is None: + module_name = module.__name__ + results = {} + for name in dir(module): + item = getattr(module, name) + if (hasattr(item, '__module__') and hasattr(item, '__name__') + and item.__module__ != module_name): + results[name] = item.__module__ + '.' + item.__name__ + return results + + +def test_numpy_namespace(): + # None of these objects are publicly documented to be part of the main + # NumPy namespace (some are useful though, others need to be cleaned up) + undocumented = { + 'Tester': 'numpy.testing._private.nosetester.NoseTester', + '_add_newdoc_ufunc': 'numpy.core._multiarray_umath._add_newdoc_ufunc', + 'add_docstring': 'numpy.core._multiarray_umath.add_docstring', + 'add_newdoc': 'numpy.core.function_base.add_newdoc', + 'add_newdoc_ufunc': 'numpy.core._multiarray_umath._add_newdoc_ufunc', + 'byte_bounds': 'numpy.lib.utils.byte_bounds', + 'compare_chararrays': 'numpy.core._multiarray_umath.compare_chararrays', + 'deprecate': 'numpy.lib.utils.deprecate', + 'deprecate_with_doc': 'numpy.lib.utils.deprecate_with_doc', + 'disp': 'numpy.lib.function_base.disp', + 'fastCopyAndTranspose': 'numpy.core._multiarray_umath._fastCopyAndTranspose', + 'get_array_wrap': 'numpy.lib.shape_base.get_array_wrap', + 'get_include': 'numpy.lib.utils.get_include', + 'recfromcsv': 'numpy.lib.npyio.recfromcsv', + 'recfromtxt': 'numpy.lib.npyio.recfromtxt', + 'safe_eval': 'numpy.lib.utils.safe_eval', + 'set_string_function': 'numpy.core.arrayprint.set_string_function', + 'show_config': 'numpy.__config__.show', + 'who': 'numpy.lib.utils.who', + } + # We override dir to not show these members + allowlist = undocumented + bad_results = check_dir(np) + # pytest gives better error messages with the builtin assert than with + # assert_equal + assert bad_results == allowlist + + +@pytest.mark.parametrize('name', ['testing', 'Tester']) +def test_import_lazy_import(name): + """Make sure we can actually use the modules we lazy load. + + While not exported as part of the public API, it was accessible. With the + use of __getattr__ and __dir__, this isn't always true It can happen that + an infinite recursion may happen. + + This is the only way I found that would force the failure to appear on the + badly implemented code. + + We also test for the presence of the lazily imported modules in dir + + """ + exe = (sys.executable, '-c', "import numpy; numpy." + name) + result = subprocess.check_output(exe) + assert not result + + # Make sure they are still in the __dir__ + assert name in dir(np) + + +def test_dir_testing(): + """Assert that output of dir has only one "testing/tester" + attribute without duplicate""" + assert len(dir(np)) == len(set(dir(np))) + + +def test_numpy_linalg(): + bad_results = check_dir(np.linalg) + assert bad_results == {} + + +def test_numpy_fft(): + bad_results = check_dir(np.fft) + assert bad_results == {} + + +@pytest.mark.skipif(ctypes is None, + reason="ctypes not available in this python") +def test_NPY_NO_EXPORT(): + cdll = ctypes.CDLL(np.core._multiarray_tests.__file__) + # Make sure an arbitrary NPY_NO_EXPORT function is actually hidden + f = getattr(cdll, 'test_not_exported', None) + assert f is None, ("'test_not_exported' is mistakenly exported, " + "NPY_NO_EXPORT does not work") + + +# Historically NumPy has not used leading underscores for private submodules +# much. This has resulted in lots of things that look like public modules +# (i.e. things that can be imported as `import numpy.somesubmodule.somefile`), +# but were never intended to be public. The PUBLIC_MODULES list contains +# modules that are either public because they were meant to be, or because they +# contain public functions/objects that aren't present in any other namespace +# for whatever reason and therefore should be treated as public. +# +# The PRIVATE_BUT_PRESENT_MODULES list contains modules that look public (lack +# of underscores) but should not be used. For many of those modules the +# current status is fine. For others it may make sense to work on making them +# private, to clean up our public API and avoid confusion. +PUBLIC_MODULES = ['numpy.' + s for s in [ + "array_api", + "array_api.linalg", + "ctypeslib", + "distutils", + "distutils.cpuinfo", + "distutils.exec_command", + "distutils.misc_util", + "distutils.log", + "distutils.system_info", + "doc", + "doc.constants", + "doc.ufuncs", + "f2py", + "fft", + "lib", + "lib.format", # was this meant to be public? + "lib.mixins", + "lib.recfunctions", + "lib.scimath", + "lib.stride_tricks", + "linalg", + "ma", + "ma.extras", + "ma.mrecords", + "matlib", + "polynomial", + "polynomial.chebyshev", + "polynomial.hermite", + "polynomial.hermite_e", + "polynomial.laguerre", + "polynomial.legendre", + "polynomial.polynomial", + "random", + "testing", + "typing", + "typing.mypy_plugin", + "version", +]] + + +PUBLIC_ALIASED_MODULES = [ + "numpy.char", + "numpy.emath", + "numpy.rec", +] + + +PRIVATE_BUT_PRESENT_MODULES = ['numpy.' + s for s in [ + "compat", + "compat.py3k", + "conftest", + "core", + "core.arrayprint", + "core.defchararray", + "core.einsumfunc", + "core.fromnumeric", + "core.function_base", + "core.getlimits", + "core.memmap", + "core.multiarray", + "core.numeric", + "core.numerictypes", + "core.overrides", + "core.records", + "core.shape_base", + "core.umath", + "core.umath_tests", + "distutils.armccompiler", + "distutils.ccompiler", + 'distutils.ccompiler_opt', + "distutils.command", + "distutils.command.autodist", + "distutils.command.bdist_rpm", + "distutils.command.build", + "distutils.command.build_clib", + "distutils.command.build_ext", + "distutils.command.build_py", + "distutils.command.build_scripts", + "distutils.command.build_src", + "distutils.command.config", + "distutils.command.config_compiler", + "distutils.command.develop", + "distutils.command.egg_info", + "distutils.command.install", + "distutils.command.install_clib", + "distutils.command.install_data", + "distutils.command.install_headers", + "distutils.command.sdist", + "distutils.conv_template", + "distutils.core", + "distutils.extension", + "distutils.fcompiler", + "distutils.fcompiler.absoft", + "distutils.fcompiler.arm", + "distutils.fcompiler.compaq", + "distutils.fcompiler.environment", + "distutils.fcompiler.g95", + "distutils.fcompiler.gnu", + "distutils.fcompiler.hpux", + "distutils.fcompiler.ibm", + "distutils.fcompiler.intel", + "distutils.fcompiler.lahey", + "distutils.fcompiler.mips", + "distutils.fcompiler.nag", + "distutils.fcompiler.none", + "distutils.fcompiler.pathf95", + "distutils.fcompiler.pg", + "distutils.fcompiler.nv", + "distutils.fcompiler.sun", + "distutils.fcompiler.vast", + "distutils.fcompiler.fujitsu", + "distutils.from_template", + "distutils.intelccompiler", + "distutils.lib2def", + "distutils.line_endings", + "distutils.mingw32ccompiler", + "distutils.msvccompiler", + "distutils.npy_pkg_config", + "distutils.numpy_distribution", + "distutils.pathccompiler", + "distutils.unixccompiler", + "dual", + "f2py.auxfuncs", + "f2py.capi_maps", + "f2py.cb_rules", + "f2py.cfuncs", + "f2py.common_rules", + "f2py.crackfortran", + "f2py.diagnose", + "f2py.f2py2e", + "f2py.f2py_testing", + "f2py.f90mod_rules", + "f2py.func2subr", + "f2py.rules", + "f2py.symbolic", + "f2py.use_rules", + "fft.helper", + "lib.arraypad", + "lib.arraysetops", + "lib.arrayterator", + "lib.function_base", + "lib.histograms", + "lib.index_tricks", + "lib.nanfunctions", + "lib.npyio", + "lib.polynomial", + "lib.shape_base", + "lib.twodim_base", + "lib.type_check", + "lib.ufunclike", + "lib.user_array", # note: not in np.lib, but probably should just be deleted + "lib.utils", + "linalg.lapack_lite", + "linalg.linalg", + "ma.bench", + "ma.core", + "ma.testutils", + "ma.timer_comparison", + "matrixlib", + "matrixlib.defmatrix", + "polynomial.polyutils", + "random.mtrand", + "random.bit_generator", + "testing.print_coercion_tables", + "testing.utils", +]] + + +def is_unexpected(name): + """Check if this needs to be considered.""" + if '._' in name or '.tests' in name or '.setup' in name: + return False + + if name in PUBLIC_MODULES: + return False + + if name in PUBLIC_ALIASED_MODULES: + return False + + if name in PRIVATE_BUT_PRESENT_MODULES: + return False + + return True + + +# These are present in a directory with an __init__.py but cannot be imported +# code_generators/ isn't installed, but present for an inplace build +SKIP_LIST = [ + "numpy.core.code_generators", + "numpy.core.code_generators.genapi", + "numpy.core.code_generators.generate_umath", + "numpy.core.code_generators.ufunc_docstrings", + "numpy.core.code_generators.generate_numpy_api", + "numpy.core.code_generators.generate_ufunc_api", + "numpy.core.code_generators.numpy_api", + "numpy.core.cversions", + "numpy.core.generate_numpy_api", + "numpy.distutils.msvc9compiler", +] + + +def test_all_modules_are_expected(): + """ + Test that we don't add anything that looks like a new public module by + accident. Check is based on filenames. + """ + + modnames = [] + for _, modname, ispkg in pkgutil.walk_packages(path=np.__path__, + prefix=np.__name__ + '.', + onerror=None): + if is_unexpected(modname) and modname not in SKIP_LIST: + # We have a name that is new. If that's on purpose, add it to + # PUBLIC_MODULES. We don't expect to have to add anything to + # PRIVATE_BUT_PRESENT_MODULES. Use an underscore in the name! + modnames.append(modname) + + if modnames: + raise AssertionError(f'Found unexpected modules: {modnames}') + + +# Stuff that clearly shouldn't be in the API and is detected by the next test +# below +SKIP_LIST_2 = [ + 'numpy.math', + 'numpy.distutils.log.sys', + 'numpy.doc.constants.re', + 'numpy.doc.constants.textwrap', + 'numpy.lib.emath', + 'numpy.lib.math', + 'numpy.matlib.char', + 'numpy.matlib.rec', + 'numpy.matlib.emath', + 'numpy.matlib.math', + 'numpy.matlib.linalg', + 'numpy.matlib.fft', + 'numpy.matlib.random', + 'numpy.matlib.ctypeslib', + 'numpy.matlib.ma', +] + + +def test_all_modules_are_expected_2(): + """ + Method checking all objects. The pkgutil-based method in + `test_all_modules_are_expected` does not catch imports into a namespace, + only filenames. So this test is more thorough, and checks this like: + + import .lib.scimath as emath + + To check if something in a module is (effectively) public, one can check if + there's anything in that namespace that's a public function/object but is + not exposed in a higher-level namespace. For example for a `numpy.lib` + submodule:: + + mod = np.lib.mixins + for obj in mod.__all__: + if obj in np.__all__: + continue + elif obj in np.lib.__all__: + continue + + else: + print(obj) + + """ + + def find_unexpected_members(mod_name): + members = [] + module = importlib.import_module(mod_name) + if hasattr(module, '__all__'): + objnames = module.__all__ + else: + objnames = dir(module) + + for objname in objnames: + if not objname.startswith('_'): + fullobjname = mod_name + '.' + objname + if isinstance(getattr(module, objname), types.ModuleType): + if is_unexpected(fullobjname): + if fullobjname not in SKIP_LIST_2: + members.append(fullobjname) + + return members + + unexpected_members = find_unexpected_members("numpy") + for modname in PUBLIC_MODULES: + unexpected_members.extend(find_unexpected_members(modname)) + + if unexpected_members: + raise AssertionError("Found unexpected object(s) that look like " + "modules: {}".format(unexpected_members)) + + +def test_api_importable(): + """ + Check that all submodules listed higher up in this file can be imported + + Note that if a PRIVATE_BUT_PRESENT_MODULES entry goes missing, it may + simply need to be removed from the list (deprecation may or may not be + needed - apply common sense). + """ + def check_importable(module_name): + try: + importlib.import_module(module_name) + except (ImportError, AttributeError): + return False + + return True + + module_names = [] + for module_name in PUBLIC_MODULES: + if not check_importable(module_name): + module_names.append(module_name) + + if module_names: + raise AssertionError("Modules in the public API that cannot be " + "imported: {}".format(module_names)) + + for module_name in PUBLIC_ALIASED_MODULES: + try: + eval(module_name) + except AttributeError: + module_names.append(module_name) + + if module_names: + raise AssertionError("Modules in the public API that were not " + "found: {}".format(module_names)) + + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', category=DeprecationWarning) + warnings.filterwarnings('always', category=ImportWarning) + for module_name in PRIVATE_BUT_PRESENT_MODULES: + if not check_importable(module_name): + module_names.append(module_name) + + if module_names: + raise AssertionError("Modules that are not really public but looked " + "public and can not be imported: " + "{}".format(module_names)) + + +@pytest.mark.xfail( + sysconfig.get_config_var("Py_DEBUG") is not None, + reason=( + "NumPy possibly built with `USE_DEBUG=True ./tools/travis-test.sh`, " + "which does not expose the `array_api` entry point. " + "See https://github.com/numpy/numpy/pull/19800" + ), +) +def test_array_api_entry_point(): + """ + Entry point for Array API implementation can be found with importlib and + returns the numpy.array_api namespace. + """ + eps = importlib.metadata.entry_points() + try: + xp_eps = eps.select(group="array_api") + except AttributeError: + # The select interface for entry_points was introduced in py3.10, + # deprecating its dict interface. We fallback to dict keys for finding + # Array API entry points so that running this test in <=3.9 will + # still work - see https://github.com/numpy/numpy/pull/19800. + xp_eps = eps.get("array_api", []) + assert len(xp_eps) > 0, "No entry points for 'array_api' found" + + try: + ep = next(ep for ep in xp_eps if ep.name == "numpy") + except StopIteration: + raise AssertionError("'numpy' not in array_api entry points") from None + + xp = ep.load() + msg = ( + f"numpy entry point value '{ep.value}' " + "does not point to our Array API implementation" + ) + assert xp is numpy.array_api, msg diff --git a/.local/lib/python3.8/site-packages/numpy/tests/test_reloading.py b/.local/lib/python3.8/site-packages/numpy/tests/test_reloading.py new file mode 100644 index 0000000..8d8c8aa --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/tests/test_reloading.py @@ -0,0 +1,64 @@ +from numpy.testing import assert_raises, assert_warns, assert_, assert_equal +from numpy.compat import pickle + +import sys +import subprocess +import textwrap +from importlib import reload + + +def test_numpy_reloading(): + # gh-7844. Also check that relevant globals retain their identity. + import numpy as np + import numpy._globals + + _NoValue = np._NoValue + VisibleDeprecationWarning = np.VisibleDeprecationWarning + ModuleDeprecationWarning = np.ModuleDeprecationWarning + + with assert_warns(UserWarning): + reload(np) + assert_(_NoValue is np._NoValue) + assert_(ModuleDeprecationWarning is np.ModuleDeprecationWarning) + assert_(VisibleDeprecationWarning is np.VisibleDeprecationWarning) + + assert_raises(RuntimeError, reload, numpy._globals) + with assert_warns(UserWarning): + reload(np) + assert_(_NoValue is np._NoValue) + assert_(ModuleDeprecationWarning is np.ModuleDeprecationWarning) + assert_(VisibleDeprecationWarning is np.VisibleDeprecationWarning) + +def test_novalue(): + import numpy as np + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + assert_equal(repr(np._NoValue), '') + assert_(pickle.loads(pickle.dumps(np._NoValue, + protocol=proto)) is np._NoValue) + + +def test_full_reimport(): + """At the time of writing this, it is *not* truly supported, but + apparently enough users rely on it, for it to be an annoying change + when it started failing previously. + """ + # Test within a new process, to ensure that we do not mess with the + # global state during the test run (could lead to cryptic test failures). + # This is generally unsafe, especially, since we also reload the C-modules. + code = textwrap.dedent(r""" + import sys + from pytest import warns + import numpy as np + + for k in list(sys.modules.keys()): + if "numpy" in k: + del sys.modules[k] + + with warns(UserWarning): + import numpy as np + """) + p = subprocess.run([sys.executable, '-c', code], capture_output=True) + if p.returncode: + raise AssertionError( + f"Non-zero return code: {p.returncode!r}\n\n{p.stderr.decode()}" + ) diff --git a/.local/lib/python3.8/site-packages/numpy/tests/test_scripts.py b/.local/lib/python3.8/site-packages/numpy/tests/test_scripts.py new file mode 100644 index 0000000..e67a829 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/tests/test_scripts.py @@ -0,0 +1,46 @@ +""" Test scripts + +Test that we can run executable scripts that have been installed with numpy. +""" +import sys +import os +import pytest +from os.path import join as pathjoin, isfile, dirname +import subprocess + +import numpy as np +from numpy.testing import assert_equal + +is_inplace = isfile(pathjoin(dirname(np.__file__), '..', 'setup.py')) + + +def find_f2py_commands(): + if sys.platform == 'win32': + exe_dir = dirname(sys.executable) + if exe_dir.endswith('Scripts'): # virtualenv + return [os.path.join(exe_dir, 'f2py')] + else: + return [os.path.join(exe_dir, "Scripts", 'f2py')] + else: + # Three scripts are installed in Unix-like systems: + # 'f2py', 'f2py{major}', and 'f2py{major.minor}'. For example, + # if installed with python3.7 the scripts would be named + # 'f2py', 'f2py3', and 'f2py3.7'. + version = sys.version_info + major = str(version.major) + minor = str(version.minor) + return ['f2py', 'f2py' + major, 'f2py' + major + '.' + minor] + + +@pytest.mark.skipif(is_inplace, reason="Cannot test f2py command inplace") +@pytest.mark.xfail(reason="Test is unreliable") +@pytest.mark.parametrize('f2py_cmd', find_f2py_commands()) +def test_f2py(f2py_cmd): + # test that we can run f2py script + stdout = subprocess.check_output([f2py_cmd, '-v']) + assert_equal(stdout.strip(), np.__version__.encode('ascii')) + + +def test_pep338(): + stdout = subprocess.check_output([sys.executable, '-mnumpy.f2py', '-v']) + assert_equal(stdout.strip(), np.__version__.encode('ascii')) diff --git a/.local/lib/python3.8/site-packages/numpy/tests/test_warnings.py b/.local/lib/python3.8/site-packages/numpy/tests/test_warnings.py new file mode 100644 index 0000000..d7a6d88 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/tests/test_warnings.py @@ -0,0 +1,74 @@ +""" +Tests which scan for certain occurrences in the code, they may not find +all of these occurrences but should catch almost all. +""" +import pytest + +from pathlib import Path +import ast +import tokenize +import numpy + +class ParseCall(ast.NodeVisitor): + def __init__(self): + self.ls = [] + + def visit_Attribute(self, node): + ast.NodeVisitor.generic_visit(self, node) + self.ls.append(node.attr) + + def visit_Name(self, node): + self.ls.append(node.id) + + +class FindFuncs(ast.NodeVisitor): + def __init__(self, filename): + super().__init__() + self.__filename = filename + + def visit_Call(self, node): + p = ParseCall() + p.visit(node.func) + ast.NodeVisitor.generic_visit(self, node) + + if p.ls[-1] == 'simplefilter' or p.ls[-1] == 'filterwarnings': + if node.args[0].s == "ignore": + raise AssertionError( + "warnings should have an appropriate stacklevel; found in " + "{} on line {}".format(self.__filename, node.lineno)) + + if p.ls[-1] == 'warn' and ( + len(p.ls) == 1 or p.ls[-2] == 'warnings'): + + if "testing/tests/test_warnings.py" == self.__filename: + # This file + return + + # See if stacklevel exists: + if len(node.args) == 3: + return + args = {kw.arg for kw in node.keywords} + if "stacklevel" in args: + return + raise AssertionError( + "warnings should have an appropriate stacklevel; found in " + "{} on line {}".format(self.__filename, node.lineno)) + + +@pytest.mark.slow +def test_warning_calls(): + # combined "ignore" and stacklevel error + base = Path(numpy.__file__).parent + + for path in base.rglob("*.py"): + if base / "testing" in path.parents: + continue + if path == base / "__init__.py": + continue + if path == base / "random" / "__init__.py": + continue + # use tokenize to auto-detect encoding on systems where no + # default encoding is defined (e.g. LANG='C') + with tokenize.open(str(path)) as file: + tree = ast.parse(file.read()) + FindFuncs(path).visit(tree) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/__init__.py b/.local/lib/python3.8/site-packages/numpy/typing/__init__.py new file mode 100644 index 0000000..d5cfbf5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/__init__.py @@ -0,0 +1,394 @@ +""" +============================ +Typing (:mod:`numpy.typing`) +============================ + +.. versionadded:: 1.20 + +Large parts of the NumPy API have PEP-484-style type annotations. In +addition a number of type aliases are available to users, most prominently +the two below: + +- `ArrayLike`: objects that can be converted to arrays +- `DTypeLike`: objects that can be converted to dtypes + +.. _typing-extensions: https://pypi.org/project/typing-extensions/ + +Mypy plugin +----------- + +.. versionadded:: 1.21 + +.. automodule:: numpy.typing.mypy_plugin + +.. currentmodule:: numpy.typing + +Differences from the runtime NumPy API +-------------------------------------- + +NumPy is very flexible. Trying to describe the full range of +possibilities statically would result in types that are not very +helpful. For that reason, the typed NumPy API is often stricter than +the runtime NumPy API. This section describes some notable +differences. + +ArrayLike +~~~~~~~~~ + +The `ArrayLike` type tries to avoid creating object arrays. For +example, + +.. code-block:: python + + >>> np.array(x**2 for x in range(10)) + array( at ...>, dtype=object) + +is valid NumPy code which will create a 0-dimensional object +array. Type checkers will complain about the above example when using +the NumPy types however. If you really intended to do the above, then +you can either use a ``# type: ignore`` comment: + +.. code-block:: python + + >>> np.array(x**2 for x in range(10)) # type: ignore + +or explicitly type the array like object as `~typing.Any`: + +.. code-block:: python + + >>> from typing import Any + >>> array_like: Any = (x**2 for x in range(10)) + >>> np.array(array_like) + array( at ...>, dtype=object) + +ndarray +~~~~~~~ + +It's possible to mutate the dtype of an array at runtime. For example, +the following code is valid: + +.. code-block:: python + + >>> x = np.array([1, 2]) + >>> x.dtype = np.bool_ + +This sort of mutation is not allowed by the types. Users who want to +write statically typed code should instead use the `numpy.ndarray.view` +method to create a view of the array with a different dtype. + +DTypeLike +~~~~~~~~~ + +The `DTypeLike` type tries to avoid creation of dtype objects using +dictionary of fields like below: + +.. code-block:: python + + >>> x = np.dtype({"field1": (float, 1), "field2": (int, 3)}) + +Although this is valid NumPy code, the type checker will complain about it, +since its usage is discouraged. +Please see : :ref:`Data type objects ` + +Number precision +~~~~~~~~~~~~~~~~ + +The precision of `numpy.number` subclasses is treated as a covariant generic +parameter (see :class:`~NBitBase`), simplifying the annotating of processes +involving precision-based casting. + +.. code-block:: python + + >>> from typing import TypeVar + >>> import numpy as np + >>> import numpy.typing as npt + + >>> T = TypeVar("T", bound=npt.NBitBase) + >>> def func(a: "np.floating[T]", b: "np.floating[T]") -> "np.floating[T]": + ... ... + +Consequently, the likes of `~numpy.float16`, `~numpy.float32` and +`~numpy.float64` are still sub-types of `~numpy.floating`, but, contrary to +runtime, they're not necessarily considered as sub-classes. + +Timedelta64 +~~~~~~~~~~~ + +The `~numpy.timedelta64` class is not considered a subclass of +`~numpy.signedinteger`, the former only inheriting from `~numpy.generic` +while static type checking. + +0D arrays +~~~~~~~~~ + +During runtime numpy aggressively casts any passed 0D arrays into their +corresponding `~numpy.generic` instance. Until the introduction of shape +typing (see :pep:`646`) it is unfortunately not possible to make the +necessary distinction between 0D and >0D arrays. While thus not strictly +correct, all operations are that can potentially perform a 0D-array -> scalar +cast are currently annotated as exclusively returning an `ndarray`. + +If it is known in advance that an operation _will_ perform a +0D-array -> scalar cast, then one can consider manually remedying the +situation with either `typing.cast` or a ``# type: ignore`` comment. + +Record array dtypes +~~~~~~~~~~~~~~~~~~~ + +The dtype of `numpy.recarray`, and the `numpy.rec` functions in general, +can be specified in one of two ways: + +* Directly via the ``dtype`` argument. +* With up to five helper arguments that operate via `numpy.format_parser`: + ``formats``, ``names``, ``titles``, ``aligned`` and ``byteorder``. + +These two approaches are currently typed as being mutually exclusive, +*i.e.* if ``dtype`` is specified than one may not specify ``formats``. +While this mutual exclusivity is not (strictly) enforced during runtime, +combining both dtype specifiers can lead to unexpected or even downright +buggy behavior. + +API +--- + +""" +# NOTE: The API section will be appended with additional entries +# further down in this file + +from __future__ import annotations + +from numpy import ufunc +from typing import TYPE_CHECKING, final + +if not TYPE_CHECKING: + __all__ = ["ArrayLike", "DTypeLike", "NBitBase", "NDArray"] +else: + # Ensure that all objects within this module are accessible while + # static type checking. This includes private ones, as we need them + # for internal use. + # + # Declare to mypy that `__all__` is a list of strings without assigning + # an explicit value + __all__: list[str] + __path__: list[str] + + +@final # Disallow the creation of arbitrary `NBitBase` subclasses +class NBitBase: + """ + A type representing `numpy.number` precision during static type checking. + + Used exclusively for the purpose static type checking, `NBitBase` + represents the base of a hierarchical set of subclasses. + Each subsequent subclass is herein used for representing a lower level + of precision, *e.g.* ``64Bit > 32Bit > 16Bit``. + + .. versionadded:: 1.20 + + Examples + -------- + Below is a typical usage example: `NBitBase` is herein used for annotating + a function that takes a float and integer of arbitrary precision + as arguments and returns a new float of whichever precision is largest + (*e.g.* ``np.float16 + np.int64 -> np.float64``). + + .. code-block:: python + + >>> from __future__ import annotations + >>> from typing import TypeVar, TYPE_CHECKING + >>> import numpy as np + >>> import numpy.typing as npt + + >>> T1 = TypeVar("T1", bound=npt.NBitBase) + >>> T2 = TypeVar("T2", bound=npt.NBitBase) + + >>> def add(a: np.floating[T1], b: np.integer[T2]) -> np.floating[T1 | T2]: + ... return a + b + + >>> a = np.float16() + >>> b = np.int64() + >>> out = add(a, b) + + >>> if TYPE_CHECKING: + ... reveal_locals() + ... # note: Revealed local types are: + ... # note: a: numpy.floating[numpy.typing._16Bit*] + ... # note: b: numpy.signedinteger[numpy.typing._64Bit*] + ... # note: out: numpy.floating[numpy.typing._64Bit*] + + """ + + def __init_subclass__(cls) -> None: + allowed_names = { + "NBitBase", "_256Bit", "_128Bit", "_96Bit", "_80Bit", + "_64Bit", "_32Bit", "_16Bit", "_8Bit", + } + if cls.__name__ not in allowed_names: + raise TypeError('cannot inherit from final class "NBitBase"') + super().__init_subclass__() + + +# Silence errors about subclassing a `@final`-decorated class +class _256Bit(NBitBase): # type: ignore[misc] + pass + +class _128Bit(_256Bit): # type: ignore[misc] + pass + +class _96Bit(_128Bit): # type: ignore[misc] + pass + +class _80Bit(_96Bit): # type: ignore[misc] + pass + +class _64Bit(_80Bit): # type: ignore[misc] + pass + +class _32Bit(_64Bit): # type: ignore[misc] + pass + +class _16Bit(_32Bit): # type: ignore[misc] + pass + +class _8Bit(_16Bit): # type: ignore[misc] + pass + + +from ._nested_sequence import _NestedSequence +from ._nbit import ( + _NBitByte, + _NBitShort, + _NBitIntC, + _NBitIntP, + _NBitInt, + _NBitLongLong, + _NBitHalf, + _NBitSingle, + _NBitDouble, + _NBitLongDouble, +) +from ._char_codes import ( + _BoolCodes, + _UInt8Codes, + _UInt16Codes, + _UInt32Codes, + _UInt64Codes, + _Int8Codes, + _Int16Codes, + _Int32Codes, + _Int64Codes, + _Float16Codes, + _Float32Codes, + _Float64Codes, + _Complex64Codes, + _Complex128Codes, + _ByteCodes, + _ShortCodes, + _IntCCodes, + _IntPCodes, + _IntCodes, + _LongLongCodes, + _UByteCodes, + _UShortCodes, + _UIntCCodes, + _UIntPCodes, + _UIntCodes, + _ULongLongCodes, + _HalfCodes, + _SingleCodes, + _DoubleCodes, + _LongDoubleCodes, + _CSingleCodes, + _CDoubleCodes, + _CLongDoubleCodes, + _DT64Codes, + _TD64Codes, + _StrCodes, + _BytesCodes, + _VoidCodes, + _ObjectCodes, +) +from ._scalars import ( + _CharLike_co, + _BoolLike_co, + _UIntLike_co, + _IntLike_co, + _FloatLike_co, + _ComplexLike_co, + _TD64Like_co, + _NumberLike_co, + _ScalarLike_co, + _VoidLike_co, +) +from ._shape import _Shape, _ShapeLike +from ._dtype_like import ( + DTypeLike as DTypeLike, + _SupportsDType, + _VoidDTypeLike, + _DTypeLikeBool, + _DTypeLikeUInt, + _DTypeLikeInt, + _DTypeLikeFloat, + _DTypeLikeComplex, + _DTypeLikeTD64, + _DTypeLikeDT64, + _DTypeLikeObject, + _DTypeLikeVoid, + _DTypeLikeStr, + _DTypeLikeBytes, + _DTypeLikeComplex_co, +) +from ._array_like import ( + ArrayLike as ArrayLike, + _ArrayLike, + _FiniteNestedSequence, + _SupportsArray, + _ArrayLikeInt, + _ArrayLikeBool_co, + _ArrayLikeUInt_co, + _ArrayLikeInt_co, + _ArrayLikeFloat_co, + _ArrayLikeComplex_co, + _ArrayLikeNumber_co, + _ArrayLikeTD64_co, + _ArrayLikeDT64_co, + _ArrayLikeObject_co, + _ArrayLikeVoid_co, + _ArrayLikeStr_co, + _ArrayLikeBytes_co, +) +from ._generic_alias import ( + NDArray as NDArray, + _DType, + _GenericAlias, +) + +if TYPE_CHECKING: + from ._ufunc import ( + _UFunc_Nin1_Nout1, + _UFunc_Nin2_Nout1, + _UFunc_Nin1_Nout2, + _UFunc_Nin2_Nout2, + _GUFunc_Nin2_Nout1, + ) +else: + # Declare the (type-check-only) ufunc subclasses as ufunc aliases during + # runtime; this helps autocompletion tools such as Jedi (numpy/numpy#19834) + _UFunc_Nin1_Nout1 = ufunc + _UFunc_Nin2_Nout1 = ufunc + _UFunc_Nin1_Nout2 = ufunc + _UFunc_Nin2_Nout2 = ufunc + _GUFunc_Nin2_Nout1 = ufunc + +# Clean up the namespace +del TYPE_CHECKING, final, ufunc + +if __doc__ is not None: + from ._add_docstring import _docstrings + __doc__ += _docstrings + __doc__ += '\n.. autoclass:: numpy.typing.NBitBase\n' + del _docstrings + +from numpy._pytesttester import PytestTester +test = PytestTester(__name__) +del PytestTester diff --git a/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..767c9d7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_add_docstring.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_add_docstring.cpython-38.pyc new file mode 100644 index 0000000..3014f0c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_add_docstring.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_array_like.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_array_like.cpython-38.pyc new file mode 100644 index 0000000..5ba78fc Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_array_like.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_char_codes.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_char_codes.cpython-38.pyc new file mode 100644 index 0000000..c73ba60 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_char_codes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_dtype_like.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_dtype_like.cpython-38.pyc new file mode 100644 index 0000000..49c8af9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_dtype_like.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_extended_precision.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_extended_precision.cpython-38.pyc new file mode 100644 index 0000000..83100f3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_extended_precision.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_generic_alias.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_generic_alias.cpython-38.pyc new file mode 100644 index 0000000..469f7a9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_generic_alias.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_nbit.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_nbit.cpython-38.pyc new file mode 100644 index 0000000..0112e3d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_nbit.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_nested_sequence.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_nested_sequence.cpython-38.pyc new file mode 100644 index 0000000..a6c0041 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_nested_sequence.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_scalars.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_scalars.cpython-38.pyc new file mode 100644 index 0000000..efd114f Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_scalars.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_shape.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_shape.cpython-38.pyc new file mode 100644 index 0000000..2579fc8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/_shape.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/mypy_plugin.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/mypy_plugin.cpython-38.pyc new file mode 100644 index 0000000..b444c13 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/mypy_plugin.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/setup.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/setup.cpython-38.pyc new file mode 100644 index 0000000..a30d93c Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/__pycache__/setup.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/_add_docstring.py b/.local/lib/python3.8/site-packages/numpy/typing/_add_docstring.py new file mode 100644 index 0000000..10d77f5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/_add_docstring.py @@ -0,0 +1,152 @@ +"""A module for creating docstrings for sphinx ``data`` domains.""" + +import re +import textwrap + +from ._generic_alias import NDArray + +_docstrings_list = [] + + +def add_newdoc(name: str, value: str, doc: str) -> None: + """Append ``_docstrings_list`` with a docstring for `name`. + + Parameters + ---------- + name : str + The name of the object. + value : str + A string-representation of the object. + doc : str + The docstring of the object. + + """ + _docstrings_list.append((name, value, doc)) + + +def _parse_docstrings() -> str: + """Convert all docstrings in ``_docstrings_list`` into a single + sphinx-legible text block. + + """ + type_list_ret = [] + for name, value, doc in _docstrings_list: + s = textwrap.dedent(doc).replace("\n", "\n ") + + # Replace sections by rubrics + lines = s.split("\n") + new_lines = [] + indent = "" + for line in lines: + m = re.match(r'^(\s+)[-=]+\s*$', line) + if m and new_lines: + prev = textwrap.dedent(new_lines.pop()) + if prev == "Examples": + indent = "" + new_lines.append(f'{m.group(1)}.. rubric:: {prev}') + else: + indent = 4 * " " + new_lines.append(f'{m.group(1)}.. admonition:: {prev}') + new_lines.append("") + else: + new_lines.append(f"{indent}{line}") + + s = "\n".join(new_lines) + s_block = f""".. data:: {name}\n :value: {value}\n {s}""" + type_list_ret.append(s_block) + return "\n".join(type_list_ret) + + +add_newdoc('ArrayLike', 'typing.Union[...]', + """ + A `~typing.Union` representing objects that can be coerced + into an `~numpy.ndarray`. + + Among others this includes the likes of: + + * Scalars. + * (Nested) sequences. + * Objects implementing the `~class.__array__` protocol. + + .. versionadded:: 1.20 + + See Also + -------- + :term:`array_like`: + Any scalar or sequence that can be interpreted as an ndarray. + + Examples + -------- + .. code-block:: python + + >>> import numpy as np + >>> import numpy.typing as npt + + >>> def as_array(a: npt.ArrayLike) -> np.ndarray: + ... return np.array(a) + + """) + +add_newdoc('DTypeLike', 'typing.Union[...]', + """ + A `~typing.Union` representing objects that can be coerced + into a `~numpy.dtype`. + + Among others this includes the likes of: + + * :class:`type` objects. + * Character codes or the names of :class:`type` objects. + * Objects with the ``.dtype`` attribute. + + .. versionadded:: 1.20 + + See Also + -------- + :ref:`Specifying and constructing data types ` + A comprehensive overview of all objects that can be coerced + into data types. + + Examples + -------- + .. code-block:: python + + >>> import numpy as np + >>> import numpy.typing as npt + + >>> def as_dtype(d: npt.DTypeLike) -> np.dtype: + ... return np.dtype(d) + + """) + +add_newdoc('NDArray', repr(NDArray), + """ + A :term:`generic ` version of + `np.ndarray[Any, np.dtype[+ScalarType]] `. + + Can be used during runtime for typing arrays with a given dtype + and unspecified shape. + + .. versionadded:: 1.21 + + Examples + -------- + .. code-block:: python + + >>> import numpy as np + >>> import numpy.typing as npt + + >>> print(npt.NDArray) + numpy.ndarray[typing.Any, numpy.dtype[+ScalarType]] + + >>> print(npt.NDArray[np.float64]) + numpy.ndarray[typing.Any, numpy.dtype[numpy.float64]] + + >>> NDArrayInt = npt.NDArray[np.int_] + >>> a: NDArrayInt = np.arange(10) + + >>> def func(a: npt.ArrayLike) -> npt.NDArray[Any]: + ... return np.array(a) + + """) + +_docstrings = _parse_docstrings() diff --git a/.local/lib/python3.8/site-packages/numpy/typing/_array_like.py b/.local/lib/python3.8/site-packages/numpy/typing/_array_like.py new file mode 100644 index 0000000..02e5ee5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/_array_like.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +from typing import Any, Sequence, Protocol, Union, TypeVar +from numpy import ( + ndarray, + dtype, + generic, + bool_, + unsignedinteger, + integer, + floating, + complexfloating, + number, + timedelta64, + datetime64, + object_, + void, + str_, + bytes_, +) +from ._nested_sequence import _NestedSequence + +_T = TypeVar("_T") +_ScalarType = TypeVar("_ScalarType", bound=generic) +_DType = TypeVar("_DType", bound="dtype[Any]") +_DType_co = TypeVar("_DType_co", covariant=True, bound="dtype[Any]") + +# The `_SupportsArray` protocol only cares about the default dtype +# (i.e. `dtype=None` or no `dtype` parameter at all) of the to-be returned +# array. +# Concrete implementations of the protocol are responsible for adding +# any and all remaining overloads +class _SupportsArray(Protocol[_DType_co]): + def __array__(self) -> ndarray[Any, _DType_co]: ... + + +# TODO: Wait until mypy supports recursive objects in combination with typevars +_FiniteNestedSequence = Union[ + _T, + Sequence[_T], + Sequence[Sequence[_T]], + Sequence[Sequence[Sequence[_T]]], + Sequence[Sequence[Sequence[Sequence[_T]]]], +] + +# A union representing array-like objects; consists of two typevars: +# One representing types that can be parametrized w.r.t. `np.dtype` +# and another one for the rest +_ArrayLike = Union[ + _SupportsArray[_DType], + _NestedSequence[_SupportsArray[_DType]], + _T, + _NestedSequence[_T], +] + +# TODO: support buffer protocols once +# +# https://bugs.python.org/issue27501 +# +# is resolved. See also the mypy issue: +# +# https://github.com/python/typing/issues/593 +ArrayLike = _ArrayLike[ + dtype, + Union[bool, int, float, complex, str, bytes], +] + +# `ArrayLike_co`: array-like objects that can be coerced into `X` +# given the casting rules `same_kind` +_ArrayLikeBool_co = _ArrayLike[ + "dtype[bool_]", + bool, +] +_ArrayLikeUInt_co = _ArrayLike[ + "dtype[Union[bool_, unsignedinteger[Any]]]", + bool, +] +_ArrayLikeInt_co = _ArrayLike[ + "dtype[Union[bool_, integer[Any]]]", + Union[bool, int], +] +_ArrayLikeFloat_co = _ArrayLike[ + "dtype[Union[bool_, integer[Any], floating[Any]]]", + Union[bool, int, float], +] +_ArrayLikeComplex_co = _ArrayLike[ + "dtype[Union[bool_, integer[Any], floating[Any], complexfloating[Any, Any]]]", + Union[bool, int, float, complex], +] +_ArrayLikeNumber_co = _ArrayLike[ + "dtype[Union[bool_, number[Any]]]", + Union[bool, int, float, complex], +] +_ArrayLikeTD64_co = _ArrayLike[ + "dtype[Union[bool_, integer[Any], timedelta64]]", + Union[bool, int], +] +_ArrayLikeDT64_co = Union[ + _SupportsArray["dtype[datetime64]"], + _NestedSequence[_SupportsArray["dtype[datetime64]"]], +] +_ArrayLikeObject_co = Union[ + _SupportsArray["dtype[object_]"], + _NestedSequence[_SupportsArray["dtype[object_]"]], +] + +_ArrayLikeVoid_co = Union[ + _SupportsArray["dtype[void]"], + _NestedSequence[_SupportsArray["dtype[void]"]], +] +_ArrayLikeStr_co = _ArrayLike[ + "dtype[str_]", + str, +] +_ArrayLikeBytes_co = _ArrayLike[ + "dtype[bytes_]", + bytes, +] + +_ArrayLikeInt = _ArrayLike[ + "dtype[integer[Any]]", + int, +] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/_callable.pyi b/.local/lib/python3.8/site-packages/numpy/typing/_callable.pyi new file mode 100644 index 0000000..e1149f2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/_callable.pyi @@ -0,0 +1,327 @@ +""" +A module with various ``typing.Protocol`` subclasses that implement +the ``__call__`` magic method. + +See the `Mypy documentation`_ on protocols for more details. + +.. _`Mypy documentation`: https://mypy.readthedocs.io/en/stable/protocols.html#callback-protocols + +""" + +from __future__ import annotations + +from typing import ( + Union, + TypeVar, + overload, + Any, + Tuple, + NoReturn, + Protocol, +) + +from numpy import ( + ndarray, + dtype, + generic, + bool_, + timedelta64, + number, + integer, + unsignedinteger, + signedinteger, + int8, + int_, + floating, + float64, + complexfloating, + complex128, +) +from ._nbit import _NBitInt, _NBitDouble +from ._scalars import ( + _BoolLike_co, + _IntLike_co, + _FloatLike_co, + _NumberLike_co, +) +from . import NBitBase +from ._generic_alias import NDArray + +_T1 = TypeVar("_T1") +_T2 = TypeVar("_T2") +_T1_contra = TypeVar("_T1_contra", contravariant=True) +_T2_contra = TypeVar("_T2_contra", contravariant=True) +_2Tuple = Tuple[_T1, _T1] + +_NBit1 = TypeVar("_NBit1", bound=NBitBase) +_NBit2 = TypeVar("_NBit2", bound=NBitBase) + +_IntType = TypeVar("_IntType", bound=integer) +_FloatType = TypeVar("_FloatType", bound=floating) +_NumberType = TypeVar("_NumberType", bound=number) +_NumberType_co = TypeVar("_NumberType_co", covariant=True, bound=number) +_GenericType_co = TypeVar("_GenericType_co", covariant=True, bound=generic) + +class _BoolOp(Protocol[_GenericType_co]): + @overload + def __call__(self, other: _BoolLike_co, /) -> _GenericType_co: ... + @overload # platform dependent + def __call__(self, other: int, /) -> int_: ... + @overload + def __call__(self, other: float, /) -> float64: ... + @overload + def __call__(self, other: complex, /) -> complex128: ... + @overload + def __call__(self, other: _NumberType, /) -> _NumberType: ... + +class _BoolBitOp(Protocol[_GenericType_co]): + @overload + def __call__(self, other: _BoolLike_co, /) -> _GenericType_co: ... + @overload # platform dependent + def __call__(self, other: int, /) -> int_: ... + @overload + def __call__(self, other: _IntType, /) -> _IntType: ... + +class _BoolSub(Protocol): + # Note that `other: bool_` is absent here + @overload + def __call__(self, other: bool, /) -> NoReturn: ... + @overload # platform dependent + def __call__(self, other: int, /) -> int_: ... + @overload + def __call__(self, other: float, /) -> float64: ... + @overload + def __call__(self, other: complex, /) -> complex128: ... + @overload + def __call__(self, other: _NumberType, /) -> _NumberType: ... + +class _BoolTrueDiv(Protocol): + @overload + def __call__(self, other: float | _IntLike_co, /) -> float64: ... + @overload + def __call__(self, other: complex, /) -> complex128: ... + @overload + def __call__(self, other: _NumberType, /) -> _NumberType: ... + +class _BoolMod(Protocol): + @overload + def __call__(self, other: _BoolLike_co, /) -> int8: ... + @overload # platform dependent + def __call__(self, other: int, /) -> int_: ... + @overload + def __call__(self, other: float, /) -> float64: ... + @overload + def __call__(self, other: _IntType, /) -> _IntType: ... + @overload + def __call__(self, other: _FloatType, /) -> _FloatType: ... + +class _BoolDivMod(Protocol): + @overload + def __call__(self, other: _BoolLike_co, /) -> _2Tuple[int8]: ... + @overload # platform dependent + def __call__(self, other: int, /) -> _2Tuple[int_]: ... + @overload + def __call__(self, other: float, /) -> _2Tuple[floating[_NBit1 | _NBitDouble]]: ... + @overload + def __call__(self, other: _IntType, /) -> _2Tuple[_IntType]: ... + @overload + def __call__(self, other: _FloatType, /) -> _2Tuple[_FloatType]: ... + +class _TD64Div(Protocol[_NumberType_co]): + @overload + def __call__(self, other: timedelta64, /) -> _NumberType_co: ... + @overload + def __call__(self, other: _BoolLike_co, /) -> NoReturn: ... + @overload + def __call__(self, other: _FloatLike_co, /) -> timedelta64: ... + +class _IntTrueDiv(Protocol[_NBit1]): + @overload + def __call__(self, other: bool, /) -> floating[_NBit1]: ... + @overload + def __call__(self, other: int, /) -> floating[_NBit1 | _NBitInt]: ... + @overload + def __call__(self, other: float, /) -> floating[_NBit1 | _NBitDouble]: ... + @overload + def __call__( + self, other: complex, /, + ) -> complexfloating[_NBit1 | _NBitDouble, _NBit1 | _NBitDouble]: ... + @overload + def __call__(self, other: integer[_NBit2], /) -> floating[_NBit1 | _NBit2]: ... + +class _UnsignedIntOp(Protocol[_NBit1]): + # NOTE: `uint64 + signedinteger -> float64` + @overload + def __call__(self, other: bool, /) -> unsignedinteger[_NBit1]: ... + @overload + def __call__( + self, other: int | signedinteger[Any], / + ) -> Any: ... + @overload + def __call__(self, other: float, /) -> floating[_NBit1 | _NBitDouble]: ... + @overload + def __call__( + self, other: complex, /, + ) -> complexfloating[_NBit1 | _NBitDouble, _NBit1 | _NBitDouble]: ... + @overload + def __call__( + self, other: unsignedinteger[_NBit2], / + ) -> unsignedinteger[_NBit1 | _NBit2]: ... + +class _UnsignedIntBitOp(Protocol[_NBit1]): + @overload + def __call__(self, other: bool, /) -> unsignedinteger[_NBit1]: ... + @overload + def __call__(self, other: int, /) -> signedinteger[Any]: ... + @overload + def __call__(self, other: signedinteger[Any], /) -> signedinteger[Any]: ... + @overload + def __call__( + self, other: unsignedinteger[_NBit2], / + ) -> unsignedinteger[_NBit1 | _NBit2]: ... + +class _UnsignedIntMod(Protocol[_NBit1]): + @overload + def __call__(self, other: bool, /) -> unsignedinteger[_NBit1]: ... + @overload + def __call__( + self, other: int | signedinteger[Any], / + ) -> Any: ... + @overload + def __call__(self, other: float, /) -> floating[_NBit1 | _NBitDouble]: ... + @overload + def __call__( + self, other: unsignedinteger[_NBit2], / + ) -> unsignedinteger[_NBit1 | _NBit2]: ... + +class _UnsignedIntDivMod(Protocol[_NBit1]): + @overload + def __call__(self, other: bool, /) -> _2Tuple[signedinteger[_NBit1]]: ... + @overload + def __call__( + self, other: int | signedinteger[Any], / + ) -> _2Tuple[Any]: ... + @overload + def __call__(self, other: float, /) -> _2Tuple[floating[_NBit1 | _NBitDouble]]: ... + @overload + def __call__( + self, other: unsignedinteger[_NBit2], / + ) -> _2Tuple[unsignedinteger[_NBit1 | _NBit2]]: ... + +class _SignedIntOp(Protocol[_NBit1]): + @overload + def __call__(self, other: bool, /) -> signedinteger[_NBit1]: ... + @overload + def __call__(self, other: int, /) -> signedinteger[_NBit1 | _NBitInt]: ... + @overload + def __call__(self, other: float, /) -> floating[_NBit1 | _NBitDouble]: ... + @overload + def __call__( + self, other: complex, /, + ) -> complexfloating[_NBit1 | _NBitDouble, _NBit1 | _NBitDouble]: ... + @overload + def __call__( + self, other: signedinteger[_NBit2], /, + ) -> signedinteger[_NBit1 | _NBit2]: ... + +class _SignedIntBitOp(Protocol[_NBit1]): + @overload + def __call__(self, other: bool, /) -> signedinteger[_NBit1]: ... + @overload + def __call__(self, other: int, /) -> signedinteger[_NBit1 | _NBitInt]: ... + @overload + def __call__( + self, other: signedinteger[_NBit2], /, + ) -> signedinteger[_NBit1 | _NBit2]: ... + +class _SignedIntMod(Protocol[_NBit1]): + @overload + def __call__(self, other: bool, /) -> signedinteger[_NBit1]: ... + @overload + def __call__(self, other: int, /) -> signedinteger[_NBit1 | _NBitInt]: ... + @overload + def __call__(self, other: float, /) -> floating[_NBit1 | _NBitDouble]: ... + @overload + def __call__( + self, other: signedinteger[_NBit2], /, + ) -> signedinteger[_NBit1 | _NBit2]: ... + +class _SignedIntDivMod(Protocol[_NBit1]): + @overload + def __call__(self, other: bool, /) -> _2Tuple[signedinteger[_NBit1]]: ... + @overload + def __call__(self, other: int, /) -> _2Tuple[signedinteger[_NBit1 | _NBitInt]]: ... + @overload + def __call__(self, other: float, /) -> _2Tuple[floating[_NBit1 | _NBitDouble]]: ... + @overload + def __call__( + self, other: signedinteger[_NBit2], /, + ) -> _2Tuple[signedinteger[_NBit1 | _NBit2]]: ... + +class _FloatOp(Protocol[_NBit1]): + @overload + def __call__(self, other: bool, /) -> floating[_NBit1]: ... + @overload + def __call__(self, other: int, /) -> floating[_NBit1 | _NBitInt]: ... + @overload + def __call__(self, other: float, /) -> floating[_NBit1 | _NBitDouble]: ... + @overload + def __call__( + self, other: complex, /, + ) -> complexfloating[_NBit1 | _NBitDouble, _NBit1 | _NBitDouble]: ... + @overload + def __call__( + self, other: integer[_NBit2] | floating[_NBit2], / + ) -> floating[_NBit1 | _NBit2]: ... + +class _FloatMod(Protocol[_NBit1]): + @overload + def __call__(self, other: bool, /) -> floating[_NBit1]: ... + @overload + def __call__(self, other: int, /) -> floating[_NBit1 | _NBitInt]: ... + @overload + def __call__(self, other: float, /) -> floating[_NBit1 | _NBitDouble]: ... + @overload + def __call__( + self, other: integer[_NBit2] | floating[_NBit2], / + ) -> floating[_NBit1 | _NBit2]: ... + +class _FloatDivMod(Protocol[_NBit1]): + @overload + def __call__(self, other: bool, /) -> _2Tuple[floating[_NBit1]]: ... + @overload + def __call__(self, other: int, /) -> _2Tuple[floating[_NBit1 | _NBitInt]]: ... + @overload + def __call__(self, other: float, /) -> _2Tuple[floating[_NBit1 | _NBitDouble]]: ... + @overload + def __call__( + self, other: integer[_NBit2] | floating[_NBit2], / + ) -> _2Tuple[floating[_NBit1 | _NBit2]]: ... + +class _ComplexOp(Protocol[_NBit1]): + @overload + def __call__(self, other: bool, /) -> complexfloating[_NBit1, _NBit1]: ... + @overload + def __call__(self, other: int, /) -> complexfloating[_NBit1 | _NBitInt, _NBit1 | _NBitInt]: ... + @overload + def __call__( + self, other: complex, /, + ) -> complexfloating[_NBit1 | _NBitDouble, _NBit1 | _NBitDouble]: ... + @overload + def __call__( + self, + other: Union[ + integer[_NBit2], + floating[_NBit2], + complexfloating[_NBit2, _NBit2], + ], /, + ) -> complexfloating[_NBit1 | _NBit2, _NBit1 | _NBit2]: ... + +class _NumberOp(Protocol): + def __call__(self, other: _NumberLike_co, /) -> Any: ... + +class _ComparisonOp(Protocol[_T1_contra, _T2_contra]): + @overload + def __call__(self, other: _T1_contra, /) -> bool_: ... + @overload + def __call__(self, other: _T2_contra, /) -> NDArray[bool_]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/typing/_char_codes.py b/.local/lib/python3.8/site-packages/numpy/typing/_char_codes.py new file mode 100644 index 0000000..1394710 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/_char_codes.py @@ -0,0 +1,111 @@ +from typing import Literal + +_BoolCodes = Literal["?", "=?", "?", "bool", "bool_", "bool8"] + +_UInt8Codes = Literal["uint8", "u1", "=u1", "u1"] +_UInt16Codes = Literal["uint16", "u2", "=u2", "u2"] +_UInt32Codes = Literal["uint32", "u4", "=u4", "u4"] +_UInt64Codes = Literal["uint64", "u8", "=u8", "u8"] + +_Int8Codes = Literal["int8", "i1", "=i1", "i1"] +_Int16Codes = Literal["int16", "i2", "=i2", "i2"] +_Int32Codes = Literal["int32", "i4", "=i4", "i4"] +_Int64Codes = Literal["int64", "i8", "=i8", "i8"] + +_Float16Codes = Literal["float16", "f2", "=f2", "f2"] +_Float32Codes = Literal["float32", "f4", "=f4", "f4"] +_Float64Codes = Literal["float64", "f8", "=f8", "f8"] + +_Complex64Codes = Literal["complex64", "c8", "=c8", "c8"] +_Complex128Codes = Literal["complex128", "c16", "=c16", "c16"] + +_ByteCodes = Literal["byte", "b", "=b", "b"] +_ShortCodes = Literal["short", "h", "=h", "h"] +_IntCCodes = Literal["intc", "i", "=i", "i"] +_IntPCodes = Literal["intp", "int0", "p", "=p", "p"] +_IntCodes = Literal["long", "int", "int_", "l", "=l", "l"] +_LongLongCodes = Literal["longlong", "q", "=q", "q"] + +_UByteCodes = Literal["ubyte", "B", "=B", "B"] +_UShortCodes = Literal["ushort", "H", "=H", "H"] +_UIntCCodes = Literal["uintc", "I", "=I", "I"] +_UIntPCodes = Literal["uintp", "uint0", "P", "=P", "P"] +_UIntCodes = Literal["uint", "L", "=L", "L"] +_ULongLongCodes = Literal["ulonglong", "Q", "=Q", "Q"] + +_HalfCodes = Literal["half", "e", "=e", "e"] +_SingleCodes = Literal["single", "f", "=f", "f"] +_DoubleCodes = Literal["double", "float", "float_", "d", "=d", "d"] +_LongDoubleCodes = Literal["longdouble", "longfloat", "g", "=g", "g"] + +_CSingleCodes = Literal["csingle", "singlecomplex", "F", "=F", "F"] +_CDoubleCodes = Literal["cdouble", "complex", "complex_", "cfloat", "D", "=D", "D"] +_CLongDoubleCodes = Literal["clongdouble", "clongfloat", "longcomplex", "G", "=G", "G"] + +_StrCodes = Literal["str", "str_", "str0", "unicode", "unicode_", "U", "=U", "U"] +_BytesCodes = Literal["bytes", "bytes_", "bytes0", "S", "=S", "S"] +_VoidCodes = Literal["void", "void0", "V", "=V", "V"] +_ObjectCodes = Literal["object", "object_", "O", "=O", "O"] + +_DT64Codes = Literal[ + "datetime64", "=datetime64", "datetime64", + "datetime64[Y]", "=datetime64[Y]", "datetime64[Y]", + "datetime64[M]", "=datetime64[M]", "datetime64[M]", + "datetime64[W]", "=datetime64[W]", "datetime64[W]", + "datetime64[D]", "=datetime64[D]", "datetime64[D]", + "datetime64[h]", "=datetime64[h]", "datetime64[h]", + "datetime64[m]", "=datetime64[m]", "datetime64[m]", + "datetime64[s]", "=datetime64[s]", "datetime64[s]", + "datetime64[ms]", "=datetime64[ms]", "datetime64[ms]", + "datetime64[us]", "=datetime64[us]", "datetime64[us]", + "datetime64[ns]", "=datetime64[ns]", "datetime64[ns]", + "datetime64[ps]", "=datetime64[ps]", "datetime64[ps]", + "datetime64[fs]", "=datetime64[fs]", "datetime64[fs]", + "datetime64[as]", "=datetime64[as]", "datetime64[as]", + "M", "=M", "M", + "M8", "=M8", "M8", + "M8[Y]", "=M8[Y]", "M8[Y]", + "M8[M]", "=M8[M]", "M8[M]", + "M8[W]", "=M8[W]", "M8[W]", + "M8[D]", "=M8[D]", "M8[D]", + "M8[h]", "=M8[h]", "M8[h]", + "M8[m]", "=M8[m]", "M8[m]", + "M8[s]", "=M8[s]", "M8[s]", + "M8[ms]", "=M8[ms]", "M8[ms]", + "M8[us]", "=M8[us]", "M8[us]", + "M8[ns]", "=M8[ns]", "M8[ns]", + "M8[ps]", "=M8[ps]", "M8[ps]", + "M8[fs]", "=M8[fs]", "M8[fs]", + "M8[as]", "=M8[as]", "M8[as]", +] +_TD64Codes = Literal[ + "timedelta64", "=timedelta64", "timedelta64", + "timedelta64[Y]", "=timedelta64[Y]", "timedelta64[Y]", + "timedelta64[M]", "=timedelta64[M]", "timedelta64[M]", + "timedelta64[W]", "=timedelta64[W]", "timedelta64[W]", + "timedelta64[D]", "=timedelta64[D]", "timedelta64[D]", + "timedelta64[h]", "=timedelta64[h]", "timedelta64[h]", + "timedelta64[m]", "=timedelta64[m]", "timedelta64[m]", + "timedelta64[s]", "=timedelta64[s]", "timedelta64[s]", + "timedelta64[ms]", "=timedelta64[ms]", "timedelta64[ms]", + "timedelta64[us]", "=timedelta64[us]", "timedelta64[us]", + "timedelta64[ns]", "=timedelta64[ns]", "timedelta64[ns]", + "timedelta64[ps]", "=timedelta64[ps]", "timedelta64[ps]", + "timedelta64[fs]", "=timedelta64[fs]", "timedelta64[fs]", + "timedelta64[as]", "=timedelta64[as]", "timedelta64[as]", + "m", "=m", "m", + "m8", "=m8", "m8", + "m8[Y]", "=m8[Y]", "m8[Y]", + "m8[M]", "=m8[M]", "m8[M]", + "m8[W]", "=m8[W]", "m8[W]", + "m8[D]", "=m8[D]", "m8[D]", + "m8[h]", "=m8[h]", "m8[h]", + "m8[m]", "=m8[m]", "m8[m]", + "m8[s]", "=m8[s]", "m8[s]", + "m8[ms]", "=m8[ms]", "m8[ms]", + "m8[us]", "=m8[us]", "m8[us]", + "m8[ns]", "=m8[ns]", "m8[ns]", + "m8[ps]", "=m8[ps]", "m8[ps]", + "m8[fs]", "=m8[fs]", "m8[fs]", + "m8[as]", "=m8[as]", "m8[as]", +] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/_dtype_like.py b/.local/lib/python3.8/site-packages/numpy/typing/_dtype_like.py new file mode 100644 index 0000000..c9bf1a1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/_dtype_like.py @@ -0,0 +1,236 @@ +from typing import ( + Any, + List, + Sequence, + Tuple, + Union, + Type, + TypeVar, + Protocol, + TypedDict, +) + +import numpy as np + +from ._shape import _ShapeLike +from ._generic_alias import _DType as DType + +from ._char_codes import ( + _BoolCodes, + _UInt8Codes, + _UInt16Codes, + _UInt32Codes, + _UInt64Codes, + _Int8Codes, + _Int16Codes, + _Int32Codes, + _Int64Codes, + _Float16Codes, + _Float32Codes, + _Float64Codes, + _Complex64Codes, + _Complex128Codes, + _ByteCodes, + _ShortCodes, + _IntCCodes, + _IntPCodes, + _IntCodes, + _LongLongCodes, + _UByteCodes, + _UShortCodes, + _UIntCCodes, + _UIntPCodes, + _UIntCodes, + _ULongLongCodes, + _HalfCodes, + _SingleCodes, + _DoubleCodes, + _LongDoubleCodes, + _CSingleCodes, + _CDoubleCodes, + _CLongDoubleCodes, + _DT64Codes, + _TD64Codes, + _StrCodes, + _BytesCodes, + _VoidCodes, + _ObjectCodes, +) + +_DTypeLikeNested = Any # TODO: wait for support for recursive types +_DType_co = TypeVar("_DType_co", covariant=True, bound=DType[Any]) + +# Mandatory keys +class _DTypeDictBase(TypedDict): + names: Sequence[str] + formats: Sequence[_DTypeLikeNested] + + +# Mandatory + optional keys +class _DTypeDict(_DTypeDictBase, total=False): + # Only `str` elements are usable as indexing aliases, + # but `titles` can in principle accept any object + offsets: Sequence[int] + titles: Sequence[Any] + itemsize: int + aligned: bool + + +# A protocol for anything with the dtype attribute +class _SupportsDType(Protocol[_DType_co]): + @property + def dtype(self) -> _DType_co: ... + + +# Would create a dtype[np.void] +_VoidDTypeLike = Union[ + # (flexible_dtype, itemsize) + Tuple[_DTypeLikeNested, int], + # (fixed_dtype, shape) + Tuple[_DTypeLikeNested, _ShapeLike], + # [(field_name, field_dtype, field_shape), ...] + # + # The type here is quite broad because NumPy accepts quite a wide + # range of inputs inside the list; see the tests for some + # examples. + List[Any], + # {'names': ..., 'formats': ..., 'offsets': ..., 'titles': ..., + # 'itemsize': ...} + _DTypeDict, + # (base_dtype, new_dtype) + Tuple[_DTypeLikeNested, _DTypeLikeNested], +] + +# Anything that can be coerced into numpy.dtype. +# Reference: https://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html +DTypeLike = Union[ + DType[Any], + # default data type (float64) + None, + # array-scalar types and generic types + Type[Any], # NOTE: We're stuck with `Type[Any]` due to object dtypes + # anything with a dtype attribute + _SupportsDType[DType[Any]], + # character codes, type strings or comma-separated fields, e.g., 'float64' + str, + _VoidDTypeLike, +] + +# NOTE: while it is possible to provide the dtype as a dict of +# dtype-like objects (e.g. `{'field1': ..., 'field2': ..., ...}`), +# this syntax is officially discourged and +# therefore not included in the Union defining `DTypeLike`. +# +# See https://github.com/numpy/numpy/issues/16891 for more details. + +# Aliases for commonly used dtype-like objects. +# Note that the precision of `np.number` subclasses is ignored herein. +_DTypeLikeBool = Union[ + Type[bool], + Type[np.bool_], + DType[np.bool_], + _SupportsDType[DType[np.bool_]], + _BoolCodes, +] +_DTypeLikeUInt = Union[ + Type[np.unsignedinteger], + DType[np.unsignedinteger], + _SupportsDType[DType[np.unsignedinteger]], + _UInt8Codes, + _UInt16Codes, + _UInt32Codes, + _UInt64Codes, + _UByteCodes, + _UShortCodes, + _UIntCCodes, + _UIntPCodes, + _UIntCodes, + _ULongLongCodes, +] +_DTypeLikeInt = Union[ + Type[int], + Type[np.signedinteger], + DType[np.signedinteger], + _SupportsDType[DType[np.signedinteger]], + _Int8Codes, + _Int16Codes, + _Int32Codes, + _Int64Codes, + _ByteCodes, + _ShortCodes, + _IntCCodes, + _IntPCodes, + _IntCodes, + _LongLongCodes, +] +_DTypeLikeFloat = Union[ + Type[float], + Type[np.floating], + DType[np.floating], + _SupportsDType[DType[np.floating]], + _Float16Codes, + _Float32Codes, + _Float64Codes, + _HalfCodes, + _SingleCodes, + _DoubleCodes, + _LongDoubleCodes, +] +_DTypeLikeComplex = Union[ + Type[complex], + Type[np.complexfloating], + DType[np.complexfloating], + _SupportsDType[DType[np.complexfloating]], + _Complex64Codes, + _Complex128Codes, + _CSingleCodes, + _CDoubleCodes, + _CLongDoubleCodes, +] +_DTypeLikeDT64 = Union[ + Type[np.timedelta64], + DType[np.timedelta64], + _SupportsDType[DType[np.timedelta64]], + _TD64Codes, +] +_DTypeLikeTD64 = Union[ + Type[np.datetime64], + DType[np.datetime64], + _SupportsDType[DType[np.datetime64]], + _DT64Codes, +] +_DTypeLikeStr = Union[ + Type[str], + Type[np.str_], + DType[np.str_], + _SupportsDType[DType[np.str_]], + _StrCodes, +] +_DTypeLikeBytes = Union[ + Type[bytes], + Type[np.bytes_], + DType[np.bytes_], + _SupportsDType[DType[np.bytes_]], + _BytesCodes, +] +_DTypeLikeVoid = Union[ + Type[np.void], + DType[np.void], + _SupportsDType[DType[np.void]], + _VoidCodes, + _VoidDTypeLike, +] +_DTypeLikeObject = Union[ + type, + DType[np.object_], + _SupportsDType[DType[np.object_]], + _ObjectCodes, +] + +_DTypeLikeComplex_co = Union[ + _DTypeLikeBool, + _DTypeLikeUInt, + _DTypeLikeInt, + _DTypeLikeFloat, + _DTypeLikeComplex, +] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/_extended_precision.py b/.local/lib/python3.8/site-packages/numpy/typing/_extended_precision.py new file mode 100644 index 0000000..edc1778 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/_extended_precision.py @@ -0,0 +1,43 @@ +"""A module with platform-specific extended precision +`numpy.number` subclasses. + +The subclasses are defined here (instead of ``__init__.pyi``) such +that they can be imported conditionally via the numpy's mypy plugin. +""" + +from typing import TYPE_CHECKING + +import numpy as np +from . import ( + _80Bit, + _96Bit, + _128Bit, + _256Bit, +) + +if TYPE_CHECKING: + uint128 = np.unsignedinteger[_128Bit] + uint256 = np.unsignedinteger[_256Bit] + int128 = np.signedinteger[_128Bit] + int256 = np.signedinteger[_256Bit] + float80 = np.floating[_80Bit] + float96 = np.floating[_96Bit] + float128 = np.floating[_128Bit] + float256 = np.floating[_256Bit] + complex160 = np.complexfloating[_80Bit, _80Bit] + complex192 = np.complexfloating[_96Bit, _96Bit] + complex256 = np.complexfloating[_128Bit, _128Bit] + complex512 = np.complexfloating[_256Bit, _256Bit] +else: + uint128 = Any + uint256 = Any + int128 = Any + int256 = Any + float80 = Any + float96 = Any + float128 = Any + float256 = Any + complex160 = Any + complex192 = Any + complex256 = Any + complex512 = Any diff --git a/.local/lib/python3.8/site-packages/numpy/typing/_generic_alias.py b/.local/lib/python3.8/site-packages/numpy/typing/_generic_alias.py new file mode 100644 index 0000000..1eb2c8c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/_generic_alias.py @@ -0,0 +1,215 @@ +from __future__ import annotations + +import sys +import types +from typing import ( + Any, + ClassVar, + FrozenSet, + Generator, + Iterable, + Iterator, + List, + NoReturn, + Tuple, + Type, + TypeVar, + TYPE_CHECKING, +) + +import numpy as np + +__all__ = ["_GenericAlias", "NDArray"] + +_T = TypeVar("_T", bound="_GenericAlias") + + +def _to_str(obj: object) -> str: + """Helper function for `_GenericAlias.__repr__`.""" + if obj is Ellipsis: + return '...' + elif isinstance(obj, type) and not isinstance(obj, _GENERIC_ALIAS_TYPE): + if obj.__module__ == 'builtins': + return obj.__qualname__ + else: + return f'{obj.__module__}.{obj.__qualname__}' + else: + return repr(obj) + + +def _parse_parameters(args: Iterable[Any]) -> Generator[TypeVar, None, None]: + """Search for all typevars and typevar-containing objects in `args`. + + Helper function for `_GenericAlias.__init__`. + + """ + for i in args: + if hasattr(i, "__parameters__"): + yield from i.__parameters__ + elif isinstance(i, TypeVar): + yield i + + +def _reconstruct_alias(alias: _T, parameters: Iterator[TypeVar]) -> _T: + """Recursively replace all typevars with those from `parameters`. + + Helper function for `_GenericAlias.__getitem__`. + + """ + args = [] + for i in alias.__args__: + if isinstance(i, TypeVar): + value: Any = next(parameters) + elif isinstance(i, _GenericAlias): + value = _reconstruct_alias(i, parameters) + elif hasattr(i, "__parameters__"): + prm_tup = tuple(next(parameters) for _ in i.__parameters__) + value = i[prm_tup] + else: + value = i + args.append(value) + + cls = type(alias) + return cls(alias.__origin__, tuple(args)) + + +class _GenericAlias: + """A python-based backport of the `types.GenericAlias` class. + + E.g. for ``t = list[int]``, ``t.__origin__`` is ``list`` and + ``t.__args__`` is ``(int,)``. + + See Also + -------- + :pep:`585` + The PEP responsible for introducing `types.GenericAlias`. + + """ + + __slots__ = ("__weakref__", "_origin", "_args", "_parameters", "_hash") + + @property + def __origin__(self) -> type: + return super().__getattribute__("_origin") + + @property + def __args__(self) -> Tuple[object, ...]: + return super().__getattribute__("_args") + + @property + def __parameters__(self) -> Tuple[TypeVar, ...]: + """Type variables in the ``GenericAlias``.""" + return super().__getattribute__("_parameters") + + def __init__( + self, + origin: type, + args: object | Tuple[object, ...], + ) -> None: + self._origin = origin + self._args = args if isinstance(args, tuple) else (args,) + self._parameters = tuple(_parse_parameters(self.__args__)) + + @property + def __call__(self) -> type: + return self.__origin__ + + def __reduce__(self: _T) -> Tuple[ + Type[_T], + Tuple[type, Tuple[object, ...]], + ]: + cls = type(self) + return cls, (self.__origin__, self.__args__) + + def __mro_entries__(self, bases: Iterable[object]) -> Tuple[type]: + return (self.__origin__,) + + def __dir__(self) -> List[str]: + """Implement ``dir(self)``.""" + cls = type(self) + dir_origin = set(dir(self.__origin__)) + return sorted(cls._ATTR_EXCEPTIONS | dir_origin) + + def __hash__(self) -> int: + """Return ``hash(self)``.""" + # Attempt to use the cached hash + try: + return super().__getattribute__("_hash") + except AttributeError: + self._hash: int = hash(self.__origin__) ^ hash(self.__args__) + return super().__getattribute__("_hash") + + def __instancecheck__(self, obj: object) -> NoReturn: + """Check if an `obj` is an instance.""" + raise TypeError("isinstance() argument 2 cannot be a " + "parameterized generic") + + def __subclasscheck__(self, cls: type) -> NoReturn: + """Check if a `cls` is a subclass.""" + raise TypeError("issubclass() argument 2 cannot be a " + "parameterized generic") + + def __repr__(self) -> str: + """Return ``repr(self)``.""" + args = ", ".join(_to_str(i) for i in self.__args__) + origin = _to_str(self.__origin__) + return f"{origin}[{args}]" + + def __getitem__(self: _T, key: object | Tuple[object, ...]) -> _T: + """Return ``self[key]``.""" + key_tup = key if isinstance(key, tuple) else (key,) + + if len(self.__parameters__) == 0: + raise TypeError(f"There are no type variables left in {self}") + elif len(key_tup) > len(self.__parameters__): + raise TypeError(f"Too many arguments for {self}") + elif len(key_tup) < len(self.__parameters__): + raise TypeError(f"Too few arguments for {self}") + + key_iter = iter(key_tup) + return _reconstruct_alias(self, key_iter) + + def __eq__(self, value: object) -> bool: + """Return ``self == value``.""" + if not isinstance(value, _GENERIC_ALIAS_TYPE): + return NotImplemented + return ( + self.__origin__ == value.__origin__ and + self.__args__ == value.__args__ + ) + + _ATTR_EXCEPTIONS: ClassVar[FrozenSet[str]] = frozenset({ + "__origin__", + "__args__", + "__parameters__", + "__mro_entries__", + "__reduce__", + "__reduce_ex__", + "__copy__", + "__deepcopy__", + }) + + def __getattribute__(self, name: str) -> Any: + """Return ``getattr(self, name)``.""" + # Pull the attribute from `__origin__` unless its + # name is in `_ATTR_EXCEPTIONS` + cls = type(self) + if name in cls._ATTR_EXCEPTIONS: + return super().__getattribute__(name) + return getattr(self.__origin__, name) + + +# See `_GenericAlias.__eq__` +if sys.version_info >= (3, 9): + _GENERIC_ALIAS_TYPE = (_GenericAlias, types.GenericAlias) +else: + _GENERIC_ALIAS_TYPE = (_GenericAlias,) + +ScalarType = TypeVar("ScalarType", bound=np.generic, covariant=True) + +if TYPE_CHECKING or sys.version_info >= (3, 9): + _DType = np.dtype[ScalarType] + NDArray = np.ndarray[Any, np.dtype[ScalarType]] +else: + _DType = _GenericAlias(np.dtype, (ScalarType,)) + NDArray = _GenericAlias(np.ndarray, (Any, _DType)) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/_nbit.py b/.local/lib/python3.8/site-packages/numpy/typing/_nbit.py new file mode 100644 index 0000000..b8d35db --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/_nbit.py @@ -0,0 +1,16 @@ +"""A module with the precisions of platform-specific `~numpy.number`s.""" + +from typing import Any + +# To-be replaced with a `npt.NBitBase` subclass by numpy's mypy plugin +_NBitByte = Any +_NBitShort = Any +_NBitIntC = Any +_NBitIntP = Any +_NBitInt = Any +_NBitLongLong = Any + +_NBitHalf = Any +_NBitSingle = Any +_NBitDouble = Any +_NBitLongDouble = Any diff --git a/.local/lib/python3.8/site-packages/numpy/typing/_nested_sequence.py b/.local/lib/python3.8/site-packages/numpy/typing/_nested_sequence.py new file mode 100644 index 0000000..a853303 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/_nested_sequence.py @@ -0,0 +1,90 @@ +"""A module containing the `_NestedSequence` protocol.""" + +from __future__ import annotations + +from typing import ( + Any, + Iterator, + overload, + TypeVar, + Protocol, +) + +__all__ = ["_NestedSequence"] + +_T_co = TypeVar("_T_co", covariant=True) + + +class _NestedSequence(Protocol[_T_co]): + """A protocol for representing nested sequences. + + Warning + ------- + `_NestedSequence` currently does not work in combination with typevars, + *e.g.* ``def func(a: _NestedSequnce[T]) -> T: ...``. + + See Also + -------- + `collections.abc.Sequence` + ABCs for read-only and mutable :term:`sequences`. + + Examples + -------- + .. code-block:: python + + >>> from __future__ import annotations + + >>> from typing import TYPE_CHECKING + >>> import numpy as np + >>> from numpy.typing import _NestedSequnce + + >>> def get_dtype(seq: _NestedSequnce[float]) -> np.dtype[np.float64]: + ... return np.asarray(seq).dtype + + >>> a = get_dtype([1.0]) + >>> b = get_dtype([[1.0]]) + >>> c = get_dtype([[[1.0]]]) + >>> d = get_dtype([[[[1.0]]]]) + + >>> if TYPE_CHECKING: + ... reveal_locals() + ... # note: Revealed local types are: + ... # note: a: numpy.dtype[numpy.floating[numpy.typing._64Bit]] + ... # note: b: numpy.dtype[numpy.floating[numpy.typing._64Bit]] + ... # note: c: numpy.dtype[numpy.floating[numpy.typing._64Bit]] + ... # note: d: numpy.dtype[numpy.floating[numpy.typing._64Bit]] + + """ + + def __len__(self, /) -> int: + """Implement ``len(self)``.""" + raise NotImplementedError + + @overload + def __getitem__(self, index: int, /) -> _T_co | _NestedSequence[_T_co]: ... + @overload + def __getitem__(self, index: slice, /) -> _NestedSequence[_T_co]: ... + + def __getitem__(self, index, /): + """Implement ``self[x]``.""" + raise NotImplementedError + + def __contains__(self, x: object, /) -> bool: + """Implement ``x in self``.""" + raise NotImplementedError + + def __iter__(self, /) -> Iterator[_T_co | _NestedSequence[_T_co]]: + """Implement ``iter(self)``.""" + raise NotImplementedError + + def __reversed__(self, /) -> Iterator[_T_co | _NestedSequence[_T_co]]: + """Implement ``reversed(self)``.""" + raise NotImplementedError + + def count(self, value: Any, /) -> int: + """Return the number of occurrences of `value`.""" + raise NotImplementedError + + def index(self, value: Any, /) -> int: + """Return the first index of `value`.""" + raise NotImplementedError diff --git a/.local/lib/python3.8/site-packages/numpy/typing/_scalars.py b/.local/lib/python3.8/site-packages/numpy/typing/_scalars.py new file mode 100644 index 0000000..516b996 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/_scalars.py @@ -0,0 +1,30 @@ +from typing import Union, Tuple, Any + +import numpy as np + +# NOTE: `_StrLike_co` and `_BytesLike_co` are pointless, as `np.str_` and +# `np.bytes_` are already subclasses of their builtin counterpart + +_CharLike_co = Union[str, bytes] + +# The 6 `Like_co` type-aliases below represent all scalars that can be +# coerced into `` (with the casting rule `same_kind`) +_BoolLike_co = Union[bool, np.bool_] +_UIntLike_co = Union[_BoolLike_co, np.unsignedinteger] +_IntLike_co = Union[_BoolLike_co, int, np.integer] +_FloatLike_co = Union[_IntLike_co, float, np.floating] +_ComplexLike_co = Union[_FloatLike_co, complex, np.complexfloating] +_TD64Like_co = Union[_IntLike_co, np.timedelta64] + +_NumberLike_co = Union[int, float, complex, np.number, np.bool_] +_ScalarLike_co = Union[ + int, + float, + complex, + str, + bytes, + np.generic, +] + +# `_VoidLike_co` is technically not a scalar, but it's close enough +_VoidLike_co = Union[Tuple[Any, ...], np.void] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/_shape.py b/.local/lib/python3.8/site-packages/numpy/typing/_shape.py new file mode 100644 index 0000000..c28859b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/_shape.py @@ -0,0 +1,6 @@ +from typing import Sequence, Tuple, Union, SupportsIndex + +_Shape = Tuple[int, ...] + +# Anything that can be coerced to a shape tuple +_ShapeLike = Union[SupportsIndex, Sequence[SupportsIndex]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/_ufunc.pyi b/.local/lib/python3.8/site-packages/numpy/typing/_ufunc.pyi new file mode 100644 index 0000000..1be3500 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/_ufunc.pyi @@ -0,0 +1,405 @@ +"""A module with private type-check-only `numpy.ufunc` subclasses. + +The signatures of the ufuncs are too varied to reasonably type +with a single class. So instead, `ufunc` has been expanded into +four private subclasses, one for each combination of +`~ufunc.nin` and `~ufunc.nout`. + +""" + +from typing import ( + Any, + Generic, + List, + overload, + Tuple, + TypeVar, + Literal, + SupportsIndex, +) + +from numpy import ufunc, _CastingKind, _OrderKACF +from numpy.typing import NDArray + +from ._shape import _ShapeLike +from ._scalars import _ScalarLike_co +from ._array_like import ArrayLike, _ArrayLikeBool_co, _ArrayLikeInt_co +from ._dtype_like import DTypeLike + +_T = TypeVar("_T") +_2Tuple = Tuple[_T, _T] +_3Tuple = Tuple[_T, _T, _T] +_4Tuple = Tuple[_T, _T, _T, _T] + +_NTypes = TypeVar("_NTypes", bound=int) +_IDType = TypeVar("_IDType", bound=Any) +_NameType = TypeVar("_NameType", bound=str) + +# NOTE: In reality `extobj` should be a length of list 3 containing an +# int, an int, and a callable, but there's no way to properly express +# non-homogenous lists. +# Use `Any` over `Union` to avoid issues related to lists invariance. + +# NOTE: `reduce`, `accumulate`, `reduceat` and `outer` raise a ValueError for +# ufuncs that don't accept two input arguments and return one output argument. +# In such cases the respective methods are simply typed as `None`. + +# NOTE: Similarly, `at` won't be defined for ufuncs that return +# multiple outputs; in such cases `at` is typed as `None` + +# NOTE: If 2 output types are returned then `out` must be a +# 2-tuple of arrays. Otherwise `None` or a plain array are also acceptable + +class _UFunc_Nin1_Nout1(ufunc, Generic[_NameType, _NTypes, _IDType]): + @property + def __name__(self) -> _NameType: ... + @property + def ntypes(self) -> _NTypes: ... + @property + def identity(self) -> _IDType: ... + @property + def nin(self) -> Literal[1]: ... + @property + def nout(self) -> Literal[1]: ... + @property + def nargs(self) -> Literal[2]: ... + @property + def signature(self) -> None: ... + @property + def reduce(self) -> None: ... + @property + def accumulate(self) -> None: ... + @property + def reduceat(self) -> None: ... + @property + def outer(self) -> None: ... + + @overload + def __call__( + self, + __x1: _ScalarLike_co, + out: None = ..., + *, + where: None | _ArrayLikeBool_co = ..., + casting: _CastingKind = ..., + order: _OrderKACF = ..., + dtype: DTypeLike = ..., + subok: bool = ..., + signature: str | _2Tuple[None | str] = ..., + extobj: List[Any] = ..., + ) -> Any: ... + @overload + def __call__( + self, + __x1: ArrayLike, + out: None | NDArray[Any] | Tuple[NDArray[Any]] = ..., + *, + where: None | _ArrayLikeBool_co = ..., + casting: _CastingKind = ..., + order: _OrderKACF = ..., + dtype: DTypeLike = ..., + subok: bool = ..., + signature: str | _2Tuple[None | str] = ..., + extobj: List[Any] = ..., + ) -> NDArray[Any]: ... + + def at( + self, + a: NDArray[Any], + indices: _ArrayLikeInt_co, + /, + ) -> None: ... + +class _UFunc_Nin2_Nout1(ufunc, Generic[_NameType, _NTypes, _IDType]): + @property + def __name__(self) -> _NameType: ... + @property + def ntypes(self) -> _NTypes: ... + @property + def identity(self) -> _IDType: ... + @property + def nin(self) -> Literal[2]: ... + @property + def nout(self) -> Literal[1]: ... + @property + def nargs(self) -> Literal[3]: ... + @property + def signature(self) -> None: ... + + @overload + def __call__( + self, + __x1: _ScalarLike_co, + __x2: _ScalarLike_co, + out: None = ..., + *, + where: None | _ArrayLikeBool_co = ..., + casting: _CastingKind = ..., + order: _OrderKACF = ..., + dtype: DTypeLike = ..., + subok: bool = ..., + signature: str | _3Tuple[None | str] = ..., + extobj: List[Any] = ..., + ) -> Any: ... + @overload + def __call__( + self, + __x1: ArrayLike, + __x2: ArrayLike, + out: None | NDArray[Any] | Tuple[NDArray[Any]] = ..., + *, + where: None | _ArrayLikeBool_co = ..., + casting: _CastingKind = ..., + order: _OrderKACF = ..., + dtype: DTypeLike = ..., + subok: bool = ..., + signature: str | _3Tuple[None | str] = ..., + extobj: List[Any] = ..., + ) -> NDArray[Any]: ... + + def at( + self, + a: NDArray[Any], + indices: _ArrayLikeInt_co, + b: ArrayLike, + /, + ) -> None: ... + + def reduce( + self, + array: ArrayLike, + axis: None | _ShapeLike = ..., + dtype: DTypeLike = ..., + out: None | NDArray[Any] = ..., + keepdims: bool = ..., + initial: Any = ..., + where: _ArrayLikeBool_co = ..., + ) -> Any: ... + + def accumulate( + self, + array: ArrayLike, + axis: SupportsIndex = ..., + dtype: DTypeLike = ..., + out: None | NDArray[Any] = ..., + ) -> NDArray[Any]: ... + + def reduceat( + self, + array: ArrayLike, + indices: _ArrayLikeInt_co, + axis: SupportsIndex = ..., + dtype: DTypeLike = ..., + out: None | NDArray[Any] = ..., + ) -> NDArray[Any]: ... + + # Expand `**kwargs` into explicit keyword-only arguments + @overload + def outer( + self, + A: _ScalarLike_co, + B: _ScalarLike_co, + /, *, + out: None = ..., + where: None | _ArrayLikeBool_co = ..., + casting: _CastingKind = ..., + order: _OrderKACF = ..., + dtype: DTypeLike = ..., + subok: bool = ..., + signature: str | _3Tuple[None | str] = ..., + extobj: List[Any] = ..., + ) -> Any: ... + @overload + def outer( # type: ignore[misc] + self, + A: ArrayLike, + B: ArrayLike, + /, *, + out: None | NDArray[Any] | Tuple[NDArray[Any]] = ..., + where: None | _ArrayLikeBool_co = ..., + casting: _CastingKind = ..., + order: _OrderKACF = ..., + dtype: DTypeLike = ..., + subok: bool = ..., + signature: str | _3Tuple[None | str] = ..., + extobj: List[Any] = ..., + ) -> NDArray[Any]: ... + +class _UFunc_Nin1_Nout2(ufunc, Generic[_NameType, _NTypes, _IDType]): + @property + def __name__(self) -> _NameType: ... + @property + def ntypes(self) -> _NTypes: ... + @property + def identity(self) -> _IDType: ... + @property + def nin(self) -> Literal[1]: ... + @property + def nout(self) -> Literal[2]: ... + @property + def nargs(self) -> Literal[3]: ... + @property + def signature(self) -> None: ... + @property + def at(self) -> None: ... + @property + def reduce(self) -> None: ... + @property + def accumulate(self) -> None: ... + @property + def reduceat(self) -> None: ... + @property + def outer(self) -> None: ... + + @overload + def __call__( + self, + __x1: _ScalarLike_co, + __out1: None = ..., + __out2: None = ..., + *, + where: None | _ArrayLikeBool_co = ..., + casting: _CastingKind = ..., + order: _OrderKACF = ..., + dtype: DTypeLike = ..., + subok: bool = ..., + signature: str | _3Tuple[None | str] = ..., + extobj: List[Any] = ..., + ) -> _2Tuple[Any]: ... + @overload + def __call__( + self, + __x1: ArrayLike, + __out1: None | NDArray[Any] = ..., + __out2: None | NDArray[Any] = ..., + *, + out: _2Tuple[NDArray[Any]] = ..., + where: None | _ArrayLikeBool_co = ..., + casting: _CastingKind = ..., + order: _OrderKACF = ..., + dtype: DTypeLike = ..., + subok: bool = ..., + signature: str | _3Tuple[None | str] = ..., + extobj: List[Any] = ..., + ) -> _2Tuple[NDArray[Any]]: ... + +class _UFunc_Nin2_Nout2(ufunc, Generic[_NameType, _NTypes, _IDType]): + @property + def __name__(self) -> _NameType: ... + @property + def ntypes(self) -> _NTypes: ... + @property + def identity(self) -> _IDType: ... + @property + def nin(self) -> Literal[2]: ... + @property + def nout(self) -> Literal[2]: ... + @property + def nargs(self) -> Literal[4]: ... + @property + def signature(self) -> None: ... + @property + def at(self) -> None: ... + @property + def reduce(self) -> None: ... + @property + def accumulate(self) -> None: ... + @property + def reduceat(self) -> None: ... + @property + def outer(self) -> None: ... + + @overload + def __call__( + self, + __x1: _ScalarLike_co, + __x2: _ScalarLike_co, + __out1: None = ..., + __out2: None = ..., + *, + where: None | _ArrayLikeBool_co = ..., + casting: _CastingKind = ..., + order: _OrderKACF = ..., + dtype: DTypeLike = ..., + subok: bool = ..., + signature: str | _4Tuple[None | str] = ..., + extobj: List[Any] = ..., + ) -> _2Tuple[Any]: ... + @overload + def __call__( + self, + __x1: ArrayLike, + __x2: ArrayLike, + __out1: None | NDArray[Any] = ..., + __out2: None | NDArray[Any] = ..., + *, + out: _2Tuple[NDArray[Any]] = ..., + where: None | _ArrayLikeBool_co = ..., + casting: _CastingKind = ..., + order: _OrderKACF = ..., + dtype: DTypeLike = ..., + subok: bool = ..., + signature: str | _4Tuple[None | str] = ..., + extobj: List[Any] = ..., + ) -> _2Tuple[NDArray[Any]]: ... + +class _GUFunc_Nin2_Nout1(ufunc, Generic[_NameType, _NTypes, _IDType]): + @property + def __name__(self) -> _NameType: ... + @property + def ntypes(self) -> _NTypes: ... + @property + def identity(self) -> _IDType: ... + @property + def nin(self) -> Literal[2]: ... + @property + def nout(self) -> Literal[1]: ... + @property + def nargs(self) -> Literal[3]: ... + + # NOTE: In practice the only gufunc in the main name is `matmul`, + # so we can use its signature here + @property + def signature(self) -> Literal["(n?,k),(k,m?)->(n?,m?)"]: ... + @property + def reduce(self) -> None: ... + @property + def accumulate(self) -> None: ... + @property + def reduceat(self) -> None: ... + @property + def outer(self) -> None: ... + @property + def at(self) -> None: ... + + # Scalar for 1D array-likes; ndarray otherwise + @overload + def __call__( + self, + __x1: ArrayLike, + __x2: ArrayLike, + out: None = ..., + *, + casting: _CastingKind = ..., + order: _OrderKACF = ..., + dtype: DTypeLike = ..., + subok: bool = ..., + signature: str | _3Tuple[None | str] = ..., + extobj: List[Any] = ..., + axes: List[_2Tuple[SupportsIndex]] = ..., + ) -> Any: ... + @overload + def __call__( + self, + __x1: ArrayLike, + __x2: ArrayLike, + out: NDArray[Any] | Tuple[NDArray[Any]], + *, + casting: _CastingKind = ..., + order: _OrderKACF = ..., + dtype: DTypeLike = ..., + subok: bool = ..., + signature: str | _3Tuple[None | str] = ..., + extobj: List[Any] = ..., + axes: List[_2Tuple[SupportsIndex]] = ..., + ) -> NDArray[Any]: ... diff --git a/.local/lib/python3.8/site-packages/numpy/typing/mypy_plugin.py b/.local/lib/python3.8/site-packages/numpy/typing/mypy_plugin.py new file mode 100644 index 0000000..5ac75f9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/mypy_plugin.py @@ -0,0 +1,197 @@ +"""A mypy_ plugin for managing a number of platform-specific annotations. +Its functionality can be split into three distinct parts: + +* Assigning the (platform-dependent) precisions of certain `~numpy.number` + subclasses, including the likes of `~numpy.int_`, `~numpy.intp` and + `~numpy.longlong`. See the documentation on + :ref:`scalar types ` for a comprehensive overview + of the affected classes. Without the plugin the precision of all relevant + classes will be inferred as `~typing.Any`. +* Removing all extended-precision `~numpy.number` subclasses that are + unavailable for the platform in question. Most notably this includes the + likes of `~numpy.float128` and `~numpy.complex256`. Without the plugin *all* + extended-precision types will, as far as mypy is concerned, be available + to all platforms. +* Assigning the (platform-dependent) precision of `~numpy.ctypeslib.c_intp`. + Without the plugin the type will default to `ctypes.c_int64`. + + .. versionadded:: 1.22 + +Examples +-------- +To enable the plugin, one must add it to their mypy `configuration file`_: + +.. code-block:: ini + + [mypy] + plugins = numpy.typing.mypy_plugin + +.. _mypy: http://mypy-lang.org/ +.. _configuration file: https://mypy.readthedocs.io/en/stable/config_file.html + +""" + +from __future__ import annotations + +from collections.abc import Iterable +from typing import Final, TYPE_CHECKING, Callable + +import numpy as np + +try: + import mypy.types + from mypy.types import Type + from mypy.plugin import Plugin, AnalyzeTypeContext + from mypy.nodes import MypyFile, ImportFrom, Statement + from mypy.build import PRI_MED + + _HookFunc = Callable[[AnalyzeTypeContext], Type] + MYPY_EX: None | ModuleNotFoundError = None +except ModuleNotFoundError as ex: + MYPY_EX = ex + +__all__: list[str] = [] + + +def _get_precision_dict() -> dict[str, str]: + names = [ + ("_NBitByte", np.byte), + ("_NBitShort", np.short), + ("_NBitIntC", np.intc), + ("_NBitIntP", np.intp), + ("_NBitInt", np.int_), + ("_NBitLongLong", np.longlong), + + ("_NBitHalf", np.half), + ("_NBitSingle", np.single), + ("_NBitDouble", np.double), + ("_NBitLongDouble", np.longdouble), + ] + ret = {} + for name, typ in names: + n: int = 8 * typ().dtype.itemsize + ret[f'numpy.typing._nbit.{name}'] = f"numpy._{n}Bit" + return ret + + +def _get_extended_precision_list() -> list[str]: + extended_types = [np.ulonglong, np.longlong, np.longdouble, np.clongdouble] + extended_names = { + "uint128", + "uint256", + "int128", + "int256", + "float80", + "float96", + "float128", + "float256", + "complex160", + "complex192", + "complex256", + "complex512", + } + return [i.__name__ for i in extended_types if i.__name__ in extended_names] + + +def _get_c_intp_name() -> str: + # Adapted from `np.core._internal._getintp_ctype` + char = np.dtype('p').char + if char == 'i': + return "c_int" + elif char == 'l': + return "c_long" + elif char == 'q': + return "c_longlong" + else: + return "c_long" + + +#: A dictionary mapping type-aliases in `numpy.typing._nbit` to +#: concrete `numpy.typing.NBitBase` subclasses. +_PRECISION_DICT: Final = _get_precision_dict() + +#: A list with the names of all extended precision `np.number` subclasses. +_EXTENDED_PRECISION_LIST: Final = _get_extended_precision_list() + +#: The name of the ctypes quivalent of `np.intp` +_C_INTP: Final = _get_c_intp_name() + + +def _hook(ctx: AnalyzeTypeContext) -> Type: + """Replace a type-alias with a concrete ``NBitBase`` subclass.""" + typ, _, api = ctx + name = typ.name.split(".")[-1] + name_new = _PRECISION_DICT[f"numpy.typing._nbit.{name}"] + return api.named_type(name_new) + + +if TYPE_CHECKING or MYPY_EX is None: + def _index(iterable: Iterable[Statement], id: str) -> int: + """Identify the first ``ImportFrom`` instance the specified `id`.""" + for i, value in enumerate(iterable): + if getattr(value, "id", None) == id: + return i + raise ValueError("Failed to identify a `ImportFrom` instance " + f"with the following id: {id!r}") + + def _override_imports( + file: MypyFile, + module: str, + imports: list[tuple[str, None | str]], + ) -> None: + """Override the first `module`-based import with new `imports`.""" + # Construct a new `from module import y` statement + import_obj = ImportFrom(module, 0, names=imports) + import_obj.is_top_level = True + + # Replace the first `module`-based import statement with `import_obj` + for lst in [file.defs, file.imports]: # type: list[Statement] + i = _index(lst, module) + lst[i] = import_obj + + class _NumpyPlugin(Plugin): + """A mypy plugin for handling versus numpy-specific typing tasks.""" + + def get_type_analyze_hook(self, fullname: str) -> None | _HookFunc: + """Set the precision of platform-specific `numpy.number` + subclasses. + + For example: `numpy.int_`, `numpy.longlong` and `numpy.longdouble`. + """ + if fullname in _PRECISION_DICT: + return _hook + return None + + def get_additional_deps( + self, file: MypyFile + ) -> list[tuple[int, str, int]]: + """Handle all import-based overrides. + + * Import platform-specific extended-precision `numpy.number` + subclasses (*e.g.* `numpy.float96`, `numpy.float128` and + `numpy.complex256`). + * Import the appropriate `ctypes` equivalent to `numpy.intp`. + + """ + ret = [(PRI_MED, file.fullname, -1)] + + if file.fullname == "numpy": + _override_imports( + file, "numpy.typing._extended_precision", + imports=[(v, v) for v in _EXTENDED_PRECISION_LIST], + ) + elif file.fullname == "numpy.ctypeslib": + _override_imports( + file, "ctypes", + imports=[(_C_INTP, "_c_intp")], + ) + return ret + + def plugin(version: str) -> type[_NumpyPlugin]: + """An entry-point for mypy.""" + return _NumpyPlugin + +else: + def plugin(version: str) -> type[_NumpyPlugin]: + """An entry-point for mypy.""" + raise MYPY_EX diff --git a/.local/lib/python3.8/site-packages/numpy/typing/setup.py b/.local/lib/python3.8/site-packages/numpy/typing/setup.py new file mode 100644 index 0000000..694a756 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/setup.py @@ -0,0 +1,12 @@ +def configuration(parent_package='', top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('typing', parent_package, top_path) + config.add_subpackage('tests') + config.add_data_dir('tests/data') + config.add_data_files('*.pyi') + return config + + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/__init__.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..953e537 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/test_generic_alias.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/test_generic_alias.cpython-38.pyc new file mode 100644 index 0000000..ed73c69 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/test_generic_alias.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/test_isfile.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/test_isfile.cpython-38.pyc new file mode 100644 index 0000000..f164732 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/test_isfile.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/test_runtime.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/test_runtime.cpython-38.pyc new file mode 100644 index 0000000..6542c36 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/test_runtime.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/test_typing.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/test_typing.cpython-38.pyc new file mode 100644 index 0000000..706cbfa Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/__pycache__/test_typing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/arithmetic.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/arithmetic.pyi new file mode 100644 index 0000000..b99b24c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/arithmetic.pyi @@ -0,0 +1,121 @@ +from typing import List, Any +import numpy as np + +b_ = np.bool_() +dt = np.datetime64(0, "D") +td = np.timedelta64(0, "D") + +AR_b: np.ndarray[Any, np.dtype[np.bool_]] +AR_u: np.ndarray[Any, np.dtype[np.uint32]] +AR_i: np.ndarray[Any, np.dtype[np.int64]] +AR_f: np.ndarray[Any, np.dtype[np.float64]] +AR_c: np.ndarray[Any, np.dtype[np.complex128]] +AR_m: np.ndarray[Any, np.dtype[np.timedelta64]] +AR_M: np.ndarray[Any, np.dtype[np.datetime64]] + +ANY: Any + +AR_LIKE_b: List[bool] +AR_LIKE_u: List[np.uint32] +AR_LIKE_i: List[int] +AR_LIKE_f: List[float] +AR_LIKE_c: List[complex] +AR_LIKE_m: List[np.timedelta64] +AR_LIKE_M: List[np.datetime64] + +# Array subtraction + +# NOTE: mypys `NoReturn` errors are, unfortunately, not that great +_1 = AR_b - AR_LIKE_b # E: Need type annotation +_2 = AR_LIKE_b - AR_b # E: Need type annotation +AR_i - bytes() # E: No overload variant + +AR_f - AR_LIKE_m # E: Unsupported operand types +AR_f - AR_LIKE_M # E: Unsupported operand types +AR_c - AR_LIKE_m # E: Unsupported operand types +AR_c - AR_LIKE_M # E: Unsupported operand types + +AR_m - AR_LIKE_f # E: Unsupported operand types +AR_M - AR_LIKE_f # E: Unsupported operand types +AR_m - AR_LIKE_c # E: Unsupported operand types +AR_M - AR_LIKE_c # E: Unsupported operand types + +AR_m - AR_LIKE_M # E: Unsupported operand types +AR_LIKE_m - AR_M # E: Unsupported operand types + +# array floor division + +AR_M // AR_LIKE_b # E: Unsupported operand types +AR_M // AR_LIKE_u # E: Unsupported operand types +AR_M // AR_LIKE_i # E: Unsupported operand types +AR_M // AR_LIKE_f # E: Unsupported operand types +AR_M // AR_LIKE_c # E: Unsupported operand types +AR_M // AR_LIKE_m # E: Unsupported operand types +AR_M // AR_LIKE_M # E: Unsupported operand types + +AR_b // AR_LIKE_M # E: Unsupported operand types +AR_u // AR_LIKE_M # E: Unsupported operand types +AR_i // AR_LIKE_M # E: Unsupported operand types +AR_f // AR_LIKE_M # E: Unsupported operand types +AR_c // AR_LIKE_M # E: Unsupported operand types +AR_m // AR_LIKE_M # E: Unsupported operand types +AR_M // AR_LIKE_M # E: Unsupported operand types + +_3 = AR_m // AR_LIKE_b # E: Need type annotation +AR_m // AR_LIKE_c # E: Unsupported operand types + +AR_b // AR_LIKE_m # E: Unsupported operand types +AR_u // AR_LIKE_m # E: Unsupported operand types +AR_i // AR_LIKE_m # E: Unsupported operand types +AR_f // AR_LIKE_m # E: Unsupported operand types +AR_c // AR_LIKE_m # E: Unsupported operand types + +# Array multiplication + +AR_b *= AR_LIKE_u # E: incompatible type +AR_b *= AR_LIKE_i # E: incompatible type +AR_b *= AR_LIKE_f # E: incompatible type +AR_b *= AR_LIKE_c # E: incompatible type +AR_b *= AR_LIKE_m # E: incompatible type + +AR_u *= AR_LIKE_i # E: incompatible type +AR_u *= AR_LIKE_f # E: incompatible type +AR_u *= AR_LIKE_c # E: incompatible type +AR_u *= AR_LIKE_m # E: incompatible type + +AR_i *= AR_LIKE_f # E: incompatible type +AR_i *= AR_LIKE_c # E: incompatible type +AR_i *= AR_LIKE_m # E: incompatible type + +AR_f *= AR_LIKE_c # E: incompatible type +AR_f *= AR_LIKE_m # E: incompatible type + +# Array power + +AR_b **= AR_LIKE_b # E: Invalid self argument +AR_b **= AR_LIKE_u # E: Invalid self argument +AR_b **= AR_LIKE_i # E: Invalid self argument +AR_b **= AR_LIKE_f # E: Invalid self argument +AR_b **= AR_LIKE_c # E: Invalid self argument + +AR_u **= AR_LIKE_i # E: incompatible type +AR_u **= AR_LIKE_f # E: incompatible type +AR_u **= AR_LIKE_c # E: incompatible type + +AR_i **= AR_LIKE_f # E: incompatible type +AR_i **= AR_LIKE_c # E: incompatible type + +AR_f **= AR_LIKE_c # E: incompatible type + +# Scalars + +b_ - b_ # E: No overload variant + +dt + dt # E: Unsupported operand types +td - dt # E: Unsupported operand types +td % 1 # E: Unsupported operand types +td / dt # E: No overload +td % dt # E: Unsupported operand types + +-b_ # E: Unsupported operand type ++b_ # E: Unsupported operand type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/array_constructors.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/array_constructors.pyi new file mode 100644 index 0000000..4f0a60b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/array_constructors.pyi @@ -0,0 +1,31 @@ +import numpy as np + +a: np.ndarray +generator = (i for i in range(10)) + +np.require(a, requirements=1) # E: No overload variant +np.require(a, requirements="TEST") # E: incompatible type + +np.zeros("test") # E: incompatible type +np.zeros() # E: require at least one argument + +np.ones("test") # E: incompatible type +np.ones() # E: require at least one argument + +np.array(0, float, True) # E: No overload variant + +np.linspace(None, 'bob') # E: No overload variant +np.linspace(0, 2, num=10.0) # E: No overload variant +np.linspace(0, 2, endpoint='True') # E: No overload variant +np.linspace(0, 2, retstep=b'False') # E: No overload variant +np.linspace(0, 2, dtype=0) # E: No overload variant +np.linspace(0, 2, axis=None) # E: No overload variant + +np.logspace(None, 'bob') # E: Argument 1 +np.logspace(0, 2, base=None) # E: Argument "base" + +np.geomspace(None, 'bob') # E: Argument 1 + +np.stack(generator) # E: No overload variant +np.hstack({1, 2}) # E: No overload variant +np.vstack(1) # E: No overload variant diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/array_like.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/array_like.pyi new file mode 100644 index 0000000..3bbd290 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/array_like.pyi @@ -0,0 +1,16 @@ +import numpy as np +from numpy.typing import ArrayLike + + +class A: + pass + + +x1: ArrayLike = (i for i in range(10)) # E: Incompatible types in assignment +x2: ArrayLike = A() # E: Incompatible types in assignment +x3: ArrayLike = {1: "foo", 2: "bar"} # E: Incompatible types in assignment + +scalar = np.int64(1) +scalar.__array__(dtype=np.float64) # E: No overload variant +array = np.array([1]) +array.__array__(dtype=np.float64) # E: No overload variant diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/array_pad.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/array_pad.pyi new file mode 100644 index 0000000..2be51a8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/array_pad.pyi @@ -0,0 +1,6 @@ +import numpy as np +import numpy.typing as npt + +AR_i8: npt.NDArray[np.int64] + +np.pad(AR_i8, 2, mode="bob") # E: No overload variant diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/arrayprint.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/arrayprint.pyi new file mode 100644 index 0000000..86297a0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/arrayprint.pyi @@ -0,0 +1,13 @@ +from typing import Callable, Any +import numpy as np + +AR: np.ndarray +func1: Callable[[Any], str] +func2: Callable[[np.integer[Any]], str] + +np.array2string(AR, style=None) # E: Unexpected keyword argument +np.array2string(AR, legacy="1.14") # E: incompatible type +np.array2string(AR, sign="*") # E: incompatible type +np.array2string(AR, floatmode="default") # E: incompatible type +np.array2string(AR, formatter={"A": func1}) # E: incompatible type +np.array2string(AR, formatter={"float": func2}) # E: Incompatible types diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/arrayterator.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/arrayterator.pyi new file mode 100644 index 0000000..c50fb2e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/arrayterator.pyi @@ -0,0 +1,14 @@ +from typing import Any +import numpy as np + +AR_i8: np.ndarray[Any, np.dtype[np.int64]] +ar_iter = np.lib.Arrayterator(AR_i8) + +np.lib.Arrayterator(np.int64()) # E: incompatible type +ar_iter.shape = (10, 5) # E: is read-only +ar_iter[None] # E: Invalid index type +ar_iter[None, 1] # E: Invalid index type +ar_iter[np.intp()] # E: Invalid index type +ar_iter[np.intp(), ...] # E: Invalid index type +ar_iter[AR_i8] # E: Invalid index type +ar_iter[AR_i8, :] # E: Invalid index type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/bitwise_ops.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/bitwise_ops.pyi new file mode 100644 index 0000000..ee90900 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/bitwise_ops.pyi @@ -0,0 +1,20 @@ +import numpy as np + +i8 = np.int64() +i4 = np.int32() +u8 = np.uint64() +b_ = np.bool_() +i = int() + +f8 = np.float64() + +b_ >> f8 # E: No overload variant +i8 << f8 # E: No overload variant +i | f8 # E: Unsupported operand types +i8 ^ f8 # E: No overload variant +u8 & f8 # E: No overload variant +~f8 # E: Unsupported operand type + +# mypys' error message for `NoReturn` is unfortunately pretty bad +# TODO: Re-enable this once we add support for numerical precision for `number`s +# a = u8 | 0 # E: Need type annotation diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/char.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/char.pyi new file mode 100644 index 0000000..320f05d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/char.pyi @@ -0,0 +1,66 @@ +import numpy as np +import numpy.typing as npt + +AR_U: npt.NDArray[np.str_] +AR_S: npt.NDArray[np.bytes_] + +np.char.equal(AR_U, AR_S) # E: incompatible type + +np.char.not_equal(AR_U, AR_S) # E: incompatible type + +np.char.greater_equal(AR_U, AR_S) # E: incompatible type + +np.char.less_equal(AR_U, AR_S) # E: incompatible type + +np.char.greater(AR_U, AR_S) # E: incompatible type + +np.char.less(AR_U, AR_S) # E: incompatible type + +np.char.encode(AR_S) # E: incompatible type +np.char.decode(AR_U) # E: incompatible type + +np.char.join(AR_U, b"_") # E: incompatible type +np.char.join(AR_S, "_") # E: incompatible type + +np.char.ljust(AR_U, 5, fillchar=b"a") # E: incompatible type +np.char.ljust(AR_S, 5, fillchar="a") # E: incompatible type +np.char.rjust(AR_U, 5, fillchar=b"a") # E: incompatible type +np.char.rjust(AR_S, 5, fillchar="a") # E: incompatible type + +np.char.lstrip(AR_U, chars=b"a") # E: incompatible type +np.char.lstrip(AR_S, chars="a") # E: incompatible type +np.char.strip(AR_U, chars=b"a") # E: incompatible type +np.char.strip(AR_S, chars="a") # E: incompatible type +np.char.rstrip(AR_U, chars=b"a") # E: incompatible type +np.char.rstrip(AR_S, chars="a") # E: incompatible type + +np.char.partition(AR_U, b"a") # E: incompatible type +np.char.partition(AR_S, "a") # E: incompatible type +np.char.rpartition(AR_U, b"a") # E: incompatible type +np.char.rpartition(AR_S, "a") # E: incompatible type + +np.char.replace(AR_U, b"_", b"-") # E: incompatible type +np.char.replace(AR_S, "_", "-") # E: incompatible type + +np.char.split(AR_U, b"_") # E: incompatible type +np.char.split(AR_S, "_") # E: incompatible type +np.char.rsplit(AR_U, b"_") # E: incompatible type +np.char.rsplit(AR_S, "_") # E: incompatible type + +np.char.count(AR_U, b"a", start=[1, 2, 3]) # E: incompatible type +np.char.count(AR_S, "a", end=9) # E: incompatible type + +np.char.endswith(AR_U, b"a", start=[1, 2, 3]) # E: incompatible type +np.char.endswith(AR_S, "a", end=9) # E: incompatible type +np.char.startswith(AR_U, b"a", start=[1, 2, 3]) # E: incompatible type +np.char.startswith(AR_S, "a", end=9) # E: incompatible type + +np.char.find(AR_U, b"a", start=[1, 2, 3]) # E: incompatible type +np.char.find(AR_S, "a", end=9) # E: incompatible type +np.char.rfind(AR_U, b"a", start=[1, 2, 3]) # E: incompatible type +np.char.rfind(AR_S, "a", end=9) # E: incompatible type + +np.char.index(AR_U, b"a", start=[1, 2, 3]) # E: incompatible type +np.char.index(AR_S, "a", end=9) # E: incompatible type +np.char.rindex(AR_U, b"a", start=[1, 2, 3]) # E: incompatible type +np.char.rindex(AR_S, "a", end=9) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/chararray.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/chararray.pyi new file mode 100644 index 0000000..ebc182e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/chararray.pyi @@ -0,0 +1,62 @@ +import numpy as np +from typing import Any + +AR_U: np.chararray[Any, np.dtype[np.str_]] +AR_S: np.chararray[Any, np.dtype[np.bytes_]] + +AR_S.encode() # E: Invalid self argument +AR_U.decode() # E: Invalid self argument + +AR_U.join(b"_") # E: incompatible type +AR_S.join("_") # E: incompatible type + +AR_U.ljust(5, fillchar=b"a") # E: incompatible type +AR_S.ljust(5, fillchar="a") # E: incompatible type +AR_U.rjust(5, fillchar=b"a") # E: incompatible type +AR_S.rjust(5, fillchar="a") # E: incompatible type + +AR_U.lstrip(chars=b"a") # E: incompatible type +AR_S.lstrip(chars="a") # E: incompatible type +AR_U.strip(chars=b"a") # E: incompatible type +AR_S.strip(chars="a") # E: incompatible type +AR_U.rstrip(chars=b"a") # E: incompatible type +AR_S.rstrip(chars="a") # E: incompatible type + +AR_U.partition(b"a") # E: incompatible type +AR_S.partition("a") # E: incompatible type +AR_U.rpartition(b"a") # E: incompatible type +AR_S.rpartition("a") # E: incompatible type + +AR_U.replace(b"_", b"-") # E: incompatible type +AR_S.replace("_", "-") # E: incompatible type + +AR_U.split(b"_") # E: incompatible type +AR_S.split("_") # E: incompatible type +AR_S.split(1) # E: incompatible type +AR_U.rsplit(b"_") # E: incompatible type +AR_S.rsplit("_") # E: incompatible type + +AR_U.count(b"a", start=[1, 2, 3]) # E: incompatible type +AR_S.count("a", end=9) # E: incompatible type + +AR_U.endswith(b"a", start=[1, 2, 3]) # E: incompatible type +AR_S.endswith("a", end=9) # E: incompatible type +AR_U.startswith(b"a", start=[1, 2, 3]) # E: incompatible type +AR_S.startswith("a", end=9) # E: incompatible type + +AR_U.find(b"a", start=[1, 2, 3]) # E: incompatible type +AR_S.find("a", end=9) # E: incompatible type +AR_U.rfind(b"a", start=[1, 2, 3]) # E: incompatible type +AR_S.rfind("a", end=9) # E: incompatible type + +AR_U.index(b"a", start=[1, 2, 3]) # E: incompatible type +AR_S.index("a", end=9) # E: incompatible type +AR_U.rindex(b"a", start=[1, 2, 3]) # E: incompatible type +AR_S.rindex("a", end=9) # E: incompatible type + +AR_U == AR_S # E: Unsupported operand types +AR_U != AR_S # E: Unsupported operand types +AR_U >= AR_S # E: Unsupported operand types +AR_U <= AR_S # E: Unsupported operand types +AR_U > AR_S # E: Unsupported operand types +AR_U < AR_S # E: Unsupported operand types diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/comparisons.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/comparisons.pyi new file mode 100644 index 0000000..febd0a1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/comparisons.pyi @@ -0,0 +1,27 @@ +from typing import Any +import numpy as np + +AR_i: np.ndarray[Any, np.dtype[np.int64]] +AR_f: np.ndarray[Any, np.dtype[np.float64]] +AR_c: np.ndarray[Any, np.dtype[np.complex128]] +AR_m: np.ndarray[Any, np.dtype[np.timedelta64]] +AR_M: np.ndarray[Any, np.dtype[np.datetime64]] + +AR_f > AR_m # E: Unsupported operand types +AR_c > AR_m # E: Unsupported operand types + +AR_m > AR_f # E: Unsupported operand types +AR_m > AR_c # E: Unsupported operand types + +AR_i > AR_M # E: Unsupported operand types +AR_f > AR_M # E: Unsupported operand types +AR_m > AR_M # E: Unsupported operand types + +AR_M > AR_i # E: Unsupported operand types +AR_M > AR_f # E: Unsupported operand types +AR_M > AR_m # E: Unsupported operand types + +AR_i > str() # E: No overload variant +AR_i > bytes() # E: No overload variant +str() > AR_M # E: Unsupported operand types +bytes() > AR_M # E: Unsupported operand types diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/constants.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/constants.pyi new file mode 100644 index 0000000..324cbe9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/constants.pyi @@ -0,0 +1,7 @@ +import numpy as np + +np.Inf = np.Inf # E: Cannot assign to final +np.ALLOW_THREADS = np.ALLOW_THREADS # E: Cannot assign to final +np.little_endian = np.little_endian # E: Cannot assign to final +np.UFUNC_PYVALS_NAME = "bob" # E: Incompatible types +np.CLIP = 2 # E: Incompatible types diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/datasource.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/datasource.pyi new file mode 100644 index 0000000..345277d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/datasource.pyi @@ -0,0 +1,15 @@ +from pathlib import Path +import numpy as np + +path: Path +d1: np.DataSource + +d1.abspath(path) # E: incompatible type +d1.abspath(b"...") # E: incompatible type + +d1.exists(path) # E: incompatible type +d1.exists(b"...") # E: incompatible type + +d1.open(path, "r") # E: incompatible type +d1.open(b"...", encoding="utf8") # E: incompatible type +d1.open(None, newline="/n") # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/dtype.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/dtype.pyi new file mode 100644 index 0000000..0f3810f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/dtype.pyi @@ -0,0 +1,20 @@ +import numpy as np + + +class Test1: + not_dtype = np.dtype(float) + + +class Test2: + dtype = float + + +np.dtype(Test1()) # E: No overload variant of "dtype" matches +np.dtype(Test2()) # E: incompatible type + +np.dtype( # E: No overload variant of "dtype" matches + { + "field1": (float, 1), + "field2": (int, 3), + } +) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/einsumfunc.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/einsumfunc.pyi new file mode 100644 index 0000000..33722f8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/einsumfunc.pyi @@ -0,0 +1,15 @@ +from typing import List, Any +import numpy as np + +AR_i: np.ndarray[Any, np.dtype[np.int64]] +AR_f: np.ndarray[Any, np.dtype[np.float64]] +AR_m: np.ndarray[Any, np.dtype[np.timedelta64]] +AR_O: np.ndarray[Any, np.dtype[np.object_]] +AR_U: np.ndarray[Any, np.dtype[np.str_]] + +np.einsum("i,i->i", AR_i, AR_m) # E: incompatible type +np.einsum("i,i->i", AR_O, AR_O) # E: incompatible type +np.einsum("i,i->i", AR_f, AR_f, dtype=np.int32) # E: incompatible type +np.einsum("i,i->i", AR_i, AR_i, dtype=np.timedelta64, casting="unsafe") # E: No overload variant +np.einsum("i,i->i", AR_i, AR_i, out=AR_U) # E: Value of type variable "_ArrayType" of "einsum" cannot be +np.einsum("i,i->i", AR_i, AR_i, out=AR_U, casting="unsafe") # E: No overload variant diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/false_positives.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/false_positives.pyi new file mode 100644 index 0000000..7e79230 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/false_positives.pyi @@ -0,0 +1,11 @@ +import numpy as np +import numpy.typing as npt + +AR_f8: npt.NDArray[np.float64] + +# NOTE: Mypy bug presumably due to the special-casing of heterogeneous tuples; +# xref numpy/numpy#20901 +# +# The expected output should be no different than, e.g., when using a +# list instead of a tuple +np.concatenate(([1], AR_f8)) # E: Argument 1 to "concatenate" has incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/flatiter.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/flatiter.pyi new file mode 100644 index 0000000..544ffbe --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/flatiter.pyi @@ -0,0 +1,25 @@ +from typing import Any + +import numpy as np +from numpy.typing import _SupportsArray + + +class Index: + def __index__(self) -> int: + ... + + +a: "np.flatiter[np.ndarray]" +supports_array: _SupportsArray + +a.base = Any # E: Property "base" defined in "flatiter" is read-only +a.coords = Any # E: Property "coords" defined in "flatiter" is read-only +a.index = Any # E: Property "index" defined in "flatiter" is read-only +a.copy(order='C') # E: Unexpected keyword argument + +# NOTE: Contrary to `ndarray.__getitem__` its counterpart in `flatiter` +# does not accept objects with the `__array__` or `__index__` protocols; +# boolean indexing is just plain broken (gh-17175) +a[np.bool_()] # E: No overload variant of "__getitem__" +a[Index()] # E: No overload variant of "__getitem__" +a[supports_array] # E: No overload variant of "__getitem__" diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/fromnumeric.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/fromnumeric.pyi new file mode 100644 index 0000000..8fafed1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/fromnumeric.pyi @@ -0,0 +1,154 @@ +"""Tests for :mod:`numpy.core.fromnumeric`.""" + +import numpy as np + +A = np.array(True, ndmin=2, dtype=bool) +A.setflags(write=False) + +a = np.bool_(True) + +np.take(a, None) # E: incompatible type +np.take(a, axis=1.0) # E: incompatible type +np.take(A, out=1) # E: incompatible type +np.take(A, mode="bob") # E: incompatible type + +np.reshape(a, None) # E: Argument 2 to "reshape" has incompatible type +np.reshape(A, 1, order="bob") # E: Argument "order" to "reshape" has incompatible type + +np.choose(a, None) # E: incompatible type +np.choose(a, out=1.0) # E: incompatible type +np.choose(A, mode="bob") # E: incompatible type + +np.repeat(a, None) # E: Argument 2 to "repeat" has incompatible type +np.repeat(A, 1, axis=1.0) # E: Argument "axis" to "repeat" has incompatible type + +np.swapaxes(A, None, 1) # E: Argument 2 to "swapaxes" has incompatible type +np.swapaxes(A, 1, [0]) # E: Argument 3 to "swapaxes" has incompatible type + +np.transpose(A, axes=1.0) # E: Argument "axes" to "transpose" has incompatible type + +np.partition(a, None) # E: Argument 2 to "partition" has incompatible type +np.partition( + a, 0, axis="bob" # E: Argument "axis" to "partition" has incompatible type +) +np.partition( + A, 0, kind="bob" # E: Argument "kind" to "partition" has incompatible type +) +np.partition( + A, 0, order=range(5) # E: Argument "order" to "partition" has incompatible type +) + +np.argpartition( + a, None # E: incompatible type +) +np.argpartition( + a, 0, axis="bob" # E: incompatible type +) +np.argpartition( + A, 0, kind="bob" # E: incompatible type +) +np.argpartition( + A, 0, order=range(5) # E: Argument "order" to "argpartition" has incompatible type +) + +np.sort(A, axis="bob") # E: Argument "axis" to "sort" has incompatible type +np.sort(A, kind="bob") # E: Argument "kind" to "sort" has incompatible type +np.sort(A, order=range(5)) # E: Argument "order" to "sort" has incompatible type + +np.argsort(A, axis="bob") # E: Argument "axis" to "argsort" has incompatible type +np.argsort(A, kind="bob") # E: Argument "kind" to "argsort" has incompatible type +np.argsort(A, order=range(5)) # E: Argument "order" to "argsort" has incompatible type + +np.argmax(A, axis="bob") # E: No overload variant of "argmax" matches argument type +np.argmax(A, kind="bob") # E: No overload variant of "argmax" matches argument type + +np.argmin(A, axis="bob") # E: No overload variant of "argmin" matches argument type +np.argmin(A, kind="bob") # E: No overload variant of "argmin" matches argument type + +np.searchsorted( # E: No overload variant of "searchsorted" matches argument type + A[0], 0, side="bob" +) +np.searchsorted( # E: No overload variant of "searchsorted" matches argument type + A[0], 0, sorter=1.0 +) + +np.resize(A, 1.0) # E: Argument 2 to "resize" has incompatible type + +np.squeeze(A, 1.0) # E: No overload variant of "squeeze" matches argument type + +np.diagonal(A, offset=None) # E: Argument "offset" to "diagonal" has incompatible type +np.diagonal(A, axis1="bob") # E: Argument "axis1" to "diagonal" has incompatible type +np.diagonal(A, axis2=[]) # E: Argument "axis2" to "diagonal" has incompatible type + +np.trace(A, offset=None) # E: Argument "offset" to "trace" has incompatible type +np.trace(A, axis1="bob") # E: Argument "axis1" to "trace" has incompatible type +np.trace(A, axis2=[]) # E: Argument "axis2" to "trace" has incompatible type + +np.ravel(a, order="bob") # E: Argument "order" to "ravel" has incompatible type + +np.compress( + [True], A, axis=1.0 # E: Argument "axis" to "compress" has incompatible type +) + +np.clip(a, 1, 2, out=1) # E: No overload variant of "clip" matches argument type +np.clip(1, None, None) # E: No overload variant of "clip" matches argument type + +np.sum(a, axis=1.0) # E: incompatible type +np.sum(a, keepdims=1.0) # E: incompatible type +np.sum(a, initial=[1]) # E: incompatible type + +np.all(a, axis=1.0) # E: No overload variant +np.all(a, keepdims=1.0) # E: No overload variant +np.all(a, out=1.0) # E: No overload variant + +np.any(a, axis=1.0) # E: No overload variant +np.any(a, keepdims=1.0) # E: No overload variant +np.any(a, out=1.0) # E: No overload variant + +np.cumsum(a, axis=1.0) # E: incompatible type +np.cumsum(a, dtype=1.0) # E: incompatible type +np.cumsum(a, out=1.0) # E: incompatible type + +np.ptp(a, axis=1.0) # E: incompatible type +np.ptp(a, keepdims=1.0) # E: incompatible type +np.ptp(a, out=1.0) # E: incompatible type + +np.amax(a, axis=1.0) # E: incompatible type +np.amax(a, keepdims=1.0) # E: incompatible type +np.amax(a, out=1.0) # E: incompatible type +np.amax(a, initial=[1.0]) # E: incompatible type +np.amax(a, where=[1.0]) # E: incompatible type + +np.amin(a, axis=1.0) # E: incompatible type +np.amin(a, keepdims=1.0) # E: incompatible type +np.amin(a, out=1.0) # E: incompatible type +np.amin(a, initial=[1.0]) # E: incompatible type +np.amin(a, where=[1.0]) # E: incompatible type + +np.prod(a, axis=1.0) # E: incompatible type +np.prod(a, out=False) # E: incompatible type +np.prod(a, keepdims=1.0) # E: incompatible type +np.prod(a, initial=int) # E: incompatible type +np.prod(a, where=1.0) # E: incompatible type + +np.cumprod(a, axis=1.0) # E: Argument "axis" to "cumprod" has incompatible type +np.cumprod(a, out=False) # E: Argument "out" to "cumprod" has incompatible type + +np.size(a, axis=1.0) # E: Argument "axis" to "size" has incompatible type + +np.around(a, decimals=1.0) # E: incompatible type +np.around(a, out=type) # E: incompatible type + +np.mean(a, axis=1.0) # E: incompatible type +np.mean(a, out=False) # E: incompatible type +np.mean(a, keepdims=1.0) # E: incompatible type + +np.std(a, axis=1.0) # E: incompatible type +np.std(a, out=False) # E: incompatible type +np.std(a, ddof='test') # E: incompatible type +np.std(a, keepdims=1.0) # E: incompatible type + +np.var(a, axis=1.0) # E: incompatible type +np.var(a, out=False) # E: incompatible type +np.var(a, ddof='test') # E: incompatible type +np.var(a, keepdims=1.0) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/histograms.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/histograms.pyi new file mode 100644 index 0000000..ad15148 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/histograms.pyi @@ -0,0 +1,13 @@ +import numpy as np +import numpy.typing as npt + +AR_i8: npt.NDArray[np.int64] +AR_f8: npt.NDArray[np.float64] + +np.histogram_bin_edges(AR_i8, range=(0, 1, 2)) # E: incompatible type + +np.histogram(AR_i8, range=(0, 1, 2)) # E: incompatible type +np.histogram(AR_i8, normed=True) # E: incompatible type + +np.histogramdd(AR_i8, range=(0, 1)) # E: incompatible type +np.histogramdd(AR_i8, range=[(0, 1, 2)]) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/index_tricks.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/index_tricks.pyi new file mode 100644 index 0000000..565e81a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/index_tricks.pyi @@ -0,0 +1,15 @@ +from typing import List +import numpy as np + +AR_LIKE_i: List[int] +AR_LIKE_f: List[float] + +np.ndindex([1, 2, 3]) # E: No overload variant +np.unravel_index(AR_LIKE_f, (1, 2, 3)) # E: incompatible type +np.ravel_multi_index(AR_LIKE_i, (1, 2, 3), mode="bob") # E: No overload variant +np.mgrid[1] # E: Invalid index type +np.mgrid[...] # E: Invalid index type +np.ogrid[1] # E: Invalid index type +np.ogrid[...] # E: Invalid index type +np.fill_diagonal(AR_LIKE_f, 2) # E: incompatible type +np.diag_indices(1.0) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/lib_function_base.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/lib_function_base.pyi new file mode 100644 index 0000000..9cad2da --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/lib_function_base.pyi @@ -0,0 +1,53 @@ +from typing import Any + +import numpy as np +import numpy.typing as npt + +AR_f8: npt.NDArray[np.float64] +AR_c16: npt.NDArray[np.complex128] +AR_m: npt.NDArray[np.timedelta64] +AR_M: npt.NDArray[np.datetime64] +AR_O: npt.NDArray[np.object_] + +def func(a: int) -> None: ... + +np.average(AR_m) # E: incompatible type +np.select(1, [AR_f8]) # E: incompatible type +np.angle(AR_m) # E: incompatible type +np.unwrap(AR_m) # E: incompatible type +np.unwrap(AR_c16) # E: incompatible type +np.trim_zeros(1) # E: incompatible type +np.place(1, [True], 1.5) # E: incompatible type +np.vectorize(1) # E: incompatible type +np.add_newdoc("__main__", 1.5, "docstring") # E: incompatible type +np.place(AR_f8, slice(None), 5) # E: incompatible type + +np.interp(AR_f8, AR_c16, AR_f8) # E: incompatible type +np.interp(AR_c16, AR_f8, AR_f8) # E: incompatible type +np.interp(AR_f8, AR_f8, AR_f8, period=AR_c16) # E: No overload variant +np.interp(AR_f8, AR_f8, AR_O) # E: incompatible type + +np.cov(AR_m) # E: incompatible type +np.cov(AR_O) # E: incompatible type +np.corrcoef(AR_m) # E: incompatible type +np.corrcoef(AR_O) # E: incompatible type +np.corrcoef(AR_f8, bias=True) # E: No overload variant +np.corrcoef(AR_f8, ddof=2) # E: No overload variant +np.blackman(1j) # E: incompatible type +np.bartlett(1j) # E: incompatible type +np.hanning(1j) # E: incompatible type +np.hamming(1j) # E: incompatible type +np.hamming(AR_c16) # E: incompatible type +np.kaiser(1j, 1) # E: incompatible type +np.sinc(AR_O) # E: incompatible type +np.median(AR_M) # E: incompatible type + +np.add_newdoc_ufunc(func, "docstring") # E: incompatible type +np.percentile(AR_f8, 50j) # E: No overload variant +np.percentile(AR_f8, 50, interpolation="bob") # E: No overload variant +np.quantile(AR_f8, 0.5j) # E: No overload variant +np.quantile(AR_f8, 0.5, interpolation="bob") # E: No overload variant +np.meshgrid(AR_f8, AR_f8, indexing="bob") # E: incompatible type +np.delete(AR_f8, AR_f8) # E: incompatible type +np.insert(AR_f8, AR_f8, 1.5) # E: incompatible type +np.digitize(AR_f8, 1j) # E: No overload variant diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/lib_polynomial.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/lib_polynomial.pyi new file mode 100644 index 0000000..ca02d7b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/lib_polynomial.pyi @@ -0,0 +1,29 @@ +import numpy as np +import numpy.typing as npt + +AR_f8: npt.NDArray[np.float64] +AR_c16: npt.NDArray[np.complex128] +AR_O: npt.NDArray[np.object_] +AR_U: npt.NDArray[np.str_] + +poly_obj: np.poly1d + +np.polyint(AR_U) # E: incompatible type +np.polyint(AR_f8, m=1j) # E: No overload variant + +np.polyder(AR_U) # E: incompatible type +np.polyder(AR_f8, m=1j) # E: No overload variant + +np.polyfit(AR_O, AR_f8, 1) # E: incompatible type +np.polyfit(AR_f8, AR_f8, 1, rcond=1j) # E: No overload variant +np.polyfit(AR_f8, AR_f8, 1, w=AR_c16) # E: incompatible type +np.polyfit(AR_f8, AR_f8, 1, cov="bob") # E: No overload variant + +np.polyval(AR_f8, AR_U) # E: incompatible type +np.polyadd(AR_f8, AR_U) # E: incompatible type +np.polysub(AR_f8, AR_U) # E: incompatible type +np.polymul(AR_f8, AR_U) # E: incompatible type +np.polydiv(AR_f8, AR_U) # E: incompatible type + +5**poly_obj # E: No overload variant +hash(poly_obj) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/lib_utils.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/lib_utils.pyi new file mode 100644 index 0000000..e16c926 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/lib_utils.pyi @@ -0,0 +1,13 @@ +import numpy as np + +np.deprecate(1) # E: No overload variant + +np.deprecate_with_doc(1) # E: incompatible type + +np.byte_bounds(1) # E: incompatible type + +np.who(1) # E: incompatible type + +np.lookfor(None) # E: incompatible type + +np.safe_eval(None) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/lib_version.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/lib_version.pyi new file mode 100644 index 0000000..2758cfe --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/lib_version.pyi @@ -0,0 +1,6 @@ +from numpy.lib import NumpyVersion + +version: NumpyVersion + +NumpyVersion(b"1.8.0") # E: incompatible type +version >= b"1.8.0" # E: Unsupported operand types diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/linalg.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/linalg.pyi new file mode 100644 index 0000000..da93903 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/linalg.pyi @@ -0,0 +1,48 @@ +import numpy as np +import numpy.typing as npt + +AR_f8: npt.NDArray[np.float64] +AR_O: npt.NDArray[np.object_] +AR_M: npt.NDArray[np.datetime64] + +np.linalg.tensorsolve(AR_O, AR_O) # E: incompatible type + +np.linalg.solve(AR_O, AR_O) # E: incompatible type + +np.linalg.tensorinv(AR_O) # E: incompatible type + +np.linalg.inv(AR_O) # E: incompatible type + +np.linalg.matrix_power(AR_M, 5) # E: incompatible type + +np.linalg.cholesky(AR_O) # E: incompatible type + +np.linalg.qr(AR_O) # E: incompatible type +np.linalg.qr(AR_f8, mode="bob") # E: No overload variant + +np.linalg.eigvals(AR_O) # E: incompatible type + +np.linalg.eigvalsh(AR_O) # E: incompatible type +np.linalg.eigvalsh(AR_O, UPLO="bob") # E: No overload variant + +np.linalg.eig(AR_O) # E: incompatible type + +np.linalg.eigh(AR_O) # E: incompatible type +np.linalg.eigh(AR_O, UPLO="bob") # E: No overload variant + +np.linalg.svd(AR_O) # E: incompatible type + +np.linalg.cond(AR_O) # E: incompatible type +np.linalg.cond(AR_f8, p="bob") # E: incompatible type + +np.linalg.matrix_rank(AR_O) # E: incompatible type + +np.linalg.pinv(AR_O) # E: incompatible type + +np.linalg.slogdet(AR_O) # E: incompatible type + +np.linalg.det(AR_O) # E: incompatible type + +np.linalg.norm(AR_f8, ord="bob") # E: No overload variant + +np.linalg.multi_dot([AR_M]) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/memmap.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/memmap.pyi new file mode 100644 index 0000000..434870b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/memmap.pyi @@ -0,0 +1,5 @@ +import numpy as np + +with open("file.txt", "r") as f: + np.memmap(f) # E: No overload variant +np.memmap("test.txt", shape=[10, 5]) # E: No overload variant diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/modules.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/modules.pyi new file mode 100644 index 0000000..59e724f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/modules.pyi @@ -0,0 +1,18 @@ +import numpy as np + +np.testing.bob # E: Module has no attribute +np.bob # E: Module has no attribute + +# Stdlib modules in the namespace by accident +np.warnings # E: Module has no attribute +np.sys # E: Module has no attribute +np.os # E: Module has no attribute +np.math # E: Module has no attribute + +# Public sub-modules that are not imported to their parent module by default; +# e.g. one must first execute `import numpy.lib.recfunctions` +np.lib.recfunctions # E: Module has no attribute + +np.__NUMPY_SETUP__ # E: Module has no attribute +np.__deprecated_attrs__ # E: Module has no attribute +np.__expired_functions__ # E: Module has no attribute diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/multiarray.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/multiarray.pyi new file mode 100644 index 0000000..22bcf8c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/multiarray.pyi @@ -0,0 +1,56 @@ +from typing import List +import numpy as np +import numpy.typing as npt + +i8: np.int64 + +AR_b: npt.NDArray[np.bool_] +AR_u1: npt.NDArray[np.uint8] +AR_i8: npt.NDArray[np.int64] +AR_f8: npt.NDArray[np.float64] +AR_M: npt.NDArray[np.datetime64] + +M: np.datetime64 + +AR_LIKE_f: List[float] + +def func(a: int) -> None: ... + +np.where(AR_b, 1) # E: No overload variant + +np.can_cast(AR_f8, 1) # E: incompatible type + +np.vdot(AR_M, AR_M) # E: incompatible type + +np.copyto(AR_LIKE_f, AR_f8) # E: incompatible type + +np.putmask(AR_LIKE_f, [True, True, False], 1.5) # E: incompatible type + +np.packbits(AR_f8) # E: incompatible type +np.packbits(AR_u1, bitorder=">") # E: incompatible type + +np.unpackbits(AR_i8) # E: incompatible type +np.unpackbits(AR_u1, bitorder=">") # E: incompatible type + +np.shares_memory(1, 1, max_work=i8) # E: incompatible type +np.may_share_memory(1, 1, max_work=i8) # E: incompatible type + +np.arange(M) # E: No overload variant +np.arange(stop=10) # E: No overload variant + +np.datetime_data(int) # E: incompatible type + +np.busday_offset("2012", 10) # E: incompatible type + +np.datetime_as_string("2012") # E: No overload variant + +np.compare_chararrays("a", b"a", "==", False) # E: No overload variant + +np.add_docstring(func, None) # E: incompatible type + +np.nested_iters([AR_i8, AR_i8]) # E: Missing positional argument +np.nested_iters([AR_i8, AR_i8], 0) # E: incompatible type +np.nested_iters([AR_i8, AR_i8], [0]) # E: incompatible type +np.nested_iters([AR_i8, AR_i8], [[0], [1]], flags=["test"]) # E: incompatible type +np.nested_iters([AR_i8, AR_i8], [[0], [1]], op_flags=[["test"]]) # E: incompatible type +np.nested_iters([AR_i8, AR_i8], [[0], [1]], buffersize=1.0) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ndarray.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ndarray.pyi new file mode 100644 index 0000000..5a5130d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ndarray.pyi @@ -0,0 +1,11 @@ +import numpy as np + +# Ban setting dtype since mutating the type of the array in place +# makes having ndarray be generic over dtype impossible. Generally +# users should use `ndarray.view` in this situation anyway. See +# +# https://github.com/numpy/numpy-stubs/issues/7 +# +# for more context. +float_array = np.array([1.0]) +float_array.dtype = np.bool_ # E: Property "dtype" defined in "ndarray" is read-only diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ndarray_misc.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ndarray_misc.pyi new file mode 100644 index 0000000..8320a44 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ndarray_misc.pyi @@ -0,0 +1,41 @@ +""" +Tests for miscellaneous (non-magic) ``np.ndarray``/``np.generic`` methods. + +More extensive tests are performed for the methods' +function-based counterpart in `../from_numeric.py`. + +""" + +from typing import Any +import numpy as np + +f8: np.float64 +AR_f8: np.ndarray[Any, np.dtype[np.float64]] +AR_M: np.ndarray[Any, np.dtype[np.datetime64]] +AR_b: np.ndarray[Any, np.dtype[np.bool_]] + +ctypes_obj = AR_f8.ctypes + +reveal_type(ctypes_obj.get_data()) # E: has no attribute +reveal_type(ctypes_obj.get_shape()) # E: has no attribute +reveal_type(ctypes_obj.get_strides()) # E: has no attribute +reveal_type(ctypes_obj.get_as_parameter()) # E: has no attribute + +f8.argpartition(0) # E: has no attribute +f8.diagonal() # E: has no attribute +f8.dot(1) # E: has no attribute +f8.nonzero() # E: has no attribute +f8.partition(0) # E: has no attribute +f8.put(0, 2) # E: has no attribute +f8.setfield(2, np.float64) # E: has no attribute +f8.sort() # E: has no attribute +f8.trace() # E: has no attribute + +AR_M.__int__() # E: Invalid self argument +AR_M.__float__() # E: Invalid self argument +AR_M.__complex__() # E: Invalid self argument +AR_b.__index__() # E: Invalid self argument + +AR_f8[1.5] # E: No overload variant +AR_f8["field_a"] # E: No overload variant +AR_f8[["field_a", "field_b"]] # E: Invalid index type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/nditer.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/nditer.pyi new file mode 100644 index 0000000..1e8e37e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/nditer.pyi @@ -0,0 +1,8 @@ +import numpy as np + +class Test(np.nditer): ... # E: Cannot inherit from final class + +np.nditer([0, 1], flags=["test"]) # E: incompatible type +np.nditer([0, 1], op_flags=[["test"]]) # E: incompatible type +np.nditer([0, 1], itershape=(1.0,)) # E: incompatible type +np.nditer([0, 1], buffersize=1.0) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/nested_sequence.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/nested_sequence.pyi new file mode 100644 index 0000000..e28661a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/nested_sequence.pyi @@ -0,0 +1,17 @@ +from typing import Sequence, Tuple, List +import numpy.typing as npt + +a: Sequence[float] +b: List[complex] +c: Tuple[str, ...] +d: int +e: str + +def func(a: npt._NestedSequence[int]) -> None: + ... + +reveal_type(func(a)) # E: incompatible type +reveal_type(func(b)) # E: incompatible type +reveal_type(func(c)) # E: incompatible type +reveal_type(func(d)) # E: incompatible type +reveal_type(func(e)) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/npyio.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/npyio.pyi new file mode 100644 index 0000000..c91b4c9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/npyio.pyi @@ -0,0 +1,30 @@ +import pathlib +from typing import IO + +import numpy.typing as npt +import numpy as np + +str_path: str +bytes_path: bytes +pathlib_path: pathlib.Path +str_file: IO[str] +AR_i8: npt.NDArray[np.int64] + +np.load(str_file) # E: incompatible type + +np.save(bytes_path, AR_i8) # E: incompatible type +np.save(str_file, AR_i8) # E: incompatible type + +np.savez(bytes_path, AR_i8) # E: incompatible type +np.savez(str_file, AR_i8) # E: incompatible type + +np.savez_compressed(bytes_path, AR_i8) # E: incompatible type +np.savez_compressed(str_file, AR_i8) # E: incompatible type + +np.loadtxt(bytes_path) # E: incompatible type + +np.fromregex(bytes_path, ".", np.int64) # E: No overload variant + +np.recfromtxt(bytes_path) # E: incompatible type + +np.recfromcsv(bytes_path) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/numerictypes.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/numerictypes.pyi new file mode 100644 index 0000000..a5c2814 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/numerictypes.pyi @@ -0,0 +1,13 @@ +import numpy as np + +# Technically this works, but probably shouldn't. See +# +# https://github.com/numpy/numpy/issues/16366 +# +np.maximum_sctype(1) # E: No overload variant + +np.issubsctype(1, np.int64) # E: incompatible type + +np.issubdtype(1, np.int64) # E: incompatible type + +np.find_common_type(np.int64, np.int64) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/random.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/random.pyi new file mode 100644 index 0000000..c4d1e3e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/random.pyi @@ -0,0 +1,61 @@ +import numpy as np +from typing import Any, List + +SEED_FLOAT: float = 457.3 +SEED_ARR_FLOAT: np.ndarray[Any, np.dtype[np.float64]] = np.array([1.0, 2, 3, 4]) +SEED_ARRLIKE_FLOAT: List[float] = [1.0, 2.0, 3.0, 4.0] +SEED_SEED_SEQ: np.random.SeedSequence = np.random.SeedSequence(0) +SEED_STR: str = "String seeding not allowed" +# default rng +np.random.default_rng(SEED_FLOAT) # E: incompatible type +np.random.default_rng(SEED_ARR_FLOAT) # E: incompatible type +np.random.default_rng(SEED_ARRLIKE_FLOAT) # E: incompatible type +np.random.default_rng(SEED_STR) # E: incompatible type + +# Seed Sequence +np.random.SeedSequence(SEED_FLOAT) # E: incompatible type +np.random.SeedSequence(SEED_ARR_FLOAT) # E: incompatible type +np.random.SeedSequence(SEED_ARRLIKE_FLOAT) # E: incompatible type +np.random.SeedSequence(SEED_SEED_SEQ) # E: incompatible type +np.random.SeedSequence(SEED_STR) # E: incompatible type + +seed_seq: np.random.bit_generator.SeedSequence = np.random.SeedSequence() +seed_seq.spawn(11.5) # E: incompatible type +seed_seq.generate_state(3.14) # E: incompatible type +seed_seq.generate_state(3, np.uint8) # E: incompatible type +seed_seq.generate_state(3, "uint8") # E: incompatible type +seed_seq.generate_state(3, "u1") # E: incompatible type +seed_seq.generate_state(3, np.uint16) # E: incompatible type +seed_seq.generate_state(3, "uint16") # E: incompatible type +seed_seq.generate_state(3, "u2") # E: incompatible type +seed_seq.generate_state(3, np.int32) # E: incompatible type +seed_seq.generate_state(3, "int32") # E: incompatible type +seed_seq.generate_state(3, "i4") # E: incompatible type + +# Bit Generators +np.random.MT19937(SEED_FLOAT) # E: incompatible type +np.random.MT19937(SEED_ARR_FLOAT) # E: incompatible type +np.random.MT19937(SEED_ARRLIKE_FLOAT) # E: incompatible type +np.random.MT19937(SEED_STR) # E: incompatible type + +np.random.PCG64(SEED_FLOAT) # E: incompatible type +np.random.PCG64(SEED_ARR_FLOAT) # E: incompatible type +np.random.PCG64(SEED_ARRLIKE_FLOAT) # E: incompatible type +np.random.PCG64(SEED_STR) # E: incompatible type + +np.random.Philox(SEED_FLOAT) # E: incompatible type +np.random.Philox(SEED_ARR_FLOAT) # E: incompatible type +np.random.Philox(SEED_ARRLIKE_FLOAT) # E: incompatible type +np.random.Philox(SEED_STR) # E: incompatible type + +np.random.SFC64(SEED_FLOAT) # E: incompatible type +np.random.SFC64(SEED_ARR_FLOAT) # E: incompatible type +np.random.SFC64(SEED_ARRLIKE_FLOAT) # E: incompatible type +np.random.SFC64(SEED_STR) # E: incompatible type + +# Generator +np.random.Generator(None) # E: incompatible type +np.random.Generator(12333283902830213) # E: incompatible type +np.random.Generator("OxFEEDF00D") # E: incompatible type +np.random.Generator([123, 234]) # E: incompatible type +np.random.Generator(np.array([123, 234], dtype="u4")) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/rec.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/rec.pyi new file mode 100644 index 0000000..a57f1ba --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/rec.pyi @@ -0,0 +1,17 @@ +import numpy as np +import numpy.typing as npt + +AR_i8: npt.NDArray[np.int64] + +np.rec.fromarrays(1) # E: No overload variant +np.rec.fromarrays([1, 2, 3], dtype=[("f8", "f8")], formats=["f8", "f8"]) # E: No overload variant + +np.rec.fromrecords(AR_i8) # E: incompatible type +np.rec.fromrecords([(1.5,)], dtype=[("f8", "f8")], formats=["f8", "f8"]) # E: No overload variant + +np.rec.fromstring("string", dtype=[("f8", "f8")]) # E: No overload variant +np.rec.fromstring(b"bytes") # E: No overload variant +np.rec.fromstring(b"(1.5,)", dtype=[("f8", "f8")], formats=["f8", "f8"]) # E: No overload variant + +with open("test", "r") as f: + np.rec.fromfile(f, dtype=[("f8", "f8")]) # E: No overload variant diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/scalars.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/scalars.pyi new file mode 100644 index 0000000..9644705 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/scalars.pyi @@ -0,0 +1,93 @@ +import sys +import numpy as np + +f2: np.float16 +f8: np.float64 +c8: np.complex64 + +# Construction + +np.float32(3j) # E: incompatible type + +# Technically the following examples are valid NumPy code. But they +# are not considered a best practice, and people who wish to use the +# stubs should instead do +# +# np.array([1.0, 0.0, 0.0], dtype=np.float32) +# np.array([], dtype=np.complex64) +# +# See e.g. the discussion on the mailing list +# +# https://mail.python.org/pipermail/numpy-discussion/2020-April/080566.html +# +# and the issue +# +# https://github.com/numpy/numpy-stubs/issues/41 +# +# for more context. +np.float32([1.0, 0.0, 0.0]) # E: incompatible type +np.complex64([]) # E: incompatible type + +np.complex64(1, 2) # E: Too many arguments +# TODO: protocols (can't check for non-existent protocols w/ __getattr__) + +np.datetime64(0) # E: No overload variant + +class A: + def __float__(self): + return 1.0 + + +np.int8(A()) # E: incompatible type +np.int16(A()) # E: incompatible type +np.int32(A()) # E: incompatible type +np.int64(A()) # E: incompatible type +np.uint8(A()) # E: incompatible type +np.uint16(A()) # E: incompatible type +np.uint32(A()) # E: incompatible type +np.uint64(A()) # E: incompatible type + +np.void("test") # E: incompatible type + +np.generic(1) # E: Cannot instantiate abstract class +np.number(1) # E: Cannot instantiate abstract class +np.integer(1) # E: Cannot instantiate abstract class +np.inexact(1) # E: Cannot instantiate abstract class +np.character("test") # E: Cannot instantiate abstract class +np.flexible(b"test") # E: Cannot instantiate abstract class + +np.float64(value=0.0) # E: Unexpected keyword argument +np.int64(value=0) # E: Unexpected keyword argument +np.uint64(value=0) # E: Unexpected keyword argument +np.complex128(value=0.0j) # E: Unexpected keyword argument +np.str_(value='bob') # E: No overload variant +np.bytes_(value=b'test') # E: No overload variant +np.void(value=b'test') # E: Unexpected keyword argument +np.bool_(value=True) # E: Unexpected keyword argument +np.datetime64(value="2019") # E: No overload variant +np.timedelta64(value=0) # E: Unexpected keyword argument + +np.bytes_(b"hello", encoding='utf-8') # E: No overload variant +np.str_("hello", encoding='utf-8') # E: No overload variant + +complex(np.bytes_("1")) # E: No overload variant + +f8.item(1) # E: incompatible type +f8.item((0, 1)) # E: incompatible type +f8.squeeze(axis=1) # E: incompatible type +f8.squeeze(axis=(0, 1)) # E: incompatible type +f8.transpose(1) # E: incompatible type + +def func(a: np.float32) -> None: ... + +func(f2) # E: incompatible type +func(f8) # E: incompatible type + +round(c8) # E: No overload variant + +c8.__getnewargs__() # E: Invalid self argument +f2.__getnewargs__() # E: Invalid self argument +f2.hex() # E: Invalid self argument +np.float16.fromhex("0x0.0p+0") # E: Invalid self argument +f2.__trunc__() # E: Invalid self argument +f2.__getformat__("float") # E: Invalid self argument diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/shape_base.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/shape_base.pyi new file mode 100644 index 0000000..e709741 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/shape_base.pyi @@ -0,0 +1,8 @@ +import numpy as np + +class DTypeLike: + dtype: np.dtype[np.int_] + +dtype_like: DTypeLike + +np.expand_dims(dtype_like, (5, 10)) # E: No overload variant diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/stride_tricks.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/stride_tricks.pyi new file mode 100644 index 0000000..f2bfba7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/stride_tricks.pyi @@ -0,0 +1,9 @@ +import numpy as np +import numpy.typing as npt + +AR_f8: npt.NDArray[np.float64] + +np.lib.stride_tricks.as_strided(AR_f8, shape=8) # E: No overload variant +np.lib.stride_tricks.as_strided(AR_f8, strides=8) # E: No overload variant + +np.lib.stride_tricks.sliding_window_view(AR_f8, axis=(1,)) # E: No overload variant diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/testing.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/testing.pyi new file mode 100644 index 0000000..e753a98 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/testing.pyi @@ -0,0 +1,26 @@ +import numpy as np +import numpy.typing as npt + +AR_U: npt.NDArray[np.str_] + +def func() -> bool: ... + +np.testing.assert_(True, msg=1) # E: incompatible type +np.testing.build_err_msg(1, "test") # E: incompatible type +np.testing.assert_almost_equal(AR_U, AR_U) # E: incompatible type +np.testing.assert_approx_equal([1, 2, 3], [1, 2, 3]) # E: incompatible type +np.testing.assert_array_almost_equal(AR_U, AR_U) # E: incompatible type +np.testing.assert_array_less(AR_U, AR_U) # E: incompatible type +np.testing.assert_string_equal(b"a", b"a") # E: incompatible type + +np.testing.assert_raises(expected_exception=TypeError, callable=func) # E: No overload variant +np.testing.assert_raises_regex(expected_exception=TypeError, expected_regex="T", callable=func) # E: No overload variant + +np.testing.assert_allclose(AR_U, AR_U) # E: incompatible type +np.testing.assert_array_almost_equal_nulp(AR_U, AR_U) # E: incompatible type +np.testing.assert_array_max_ulp(AR_U, AR_U) # E: incompatible type + +np.testing.assert_warns(warning_class=RuntimeWarning, func=func) # E: No overload variant +np.testing.assert_no_warnings(func=func) # E: No overload variant + +np.testing.assert_no_gc_cycles(func=func) # E: No overload variant diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/twodim_base.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/twodim_base.pyi new file mode 100644 index 0000000..ab34a37 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/twodim_base.pyi @@ -0,0 +1,37 @@ +from typing import Any, List, TypeVar + +import numpy as np +import numpy.typing as npt + + +def func1(ar: npt.NDArray[Any], a: int) -> npt.NDArray[np.str_]: + pass + + +def func2(ar: npt.NDArray[Any], a: float) -> float: + pass + + +AR_b: npt.NDArray[np.bool_] +AR_m: npt.NDArray[np.timedelta64] + +AR_LIKE_b: List[bool] + +np.eye(10, M=20.0) # E: No overload variant +np.eye(10, k=2.5, dtype=int) # E: No overload variant + +np.diag(AR_b, k=0.5) # E: No overload variant +np.diagflat(AR_b, k=0.5) # E: No overload variant + +np.tri(10, M=20.0) # E: No overload variant +np.tri(10, k=2.5, dtype=int) # E: No overload variant + +np.tril(AR_b, k=0.5) # E: No overload variant +np.triu(AR_b, k=0.5) # E: No overload variant + +np.vander(AR_m) # E: incompatible type + +np.histogram2d(AR_m) # E: No overload variant + +np.mask_indices(10, func1) # E: incompatible type +np.mask_indices(10, func2, 10.5) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/type_check.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/type_check.pyi new file mode 100644 index 0000000..95f52bf --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/type_check.pyi @@ -0,0 +1,13 @@ +import numpy as np +import numpy.typing as npt + +DTYPE_i8: np.dtype[np.int64] + +np.mintypecode(DTYPE_i8) # E: incompatible type +np.iscomplexobj(DTYPE_i8) # E: incompatible type +np.isrealobj(DTYPE_i8) # E: incompatible type + +np.typename(DTYPE_i8) # E: No overload variant +np.typename("invalid") # E: No overload variant + +np.common_type(np.timedelta64()) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ufunc_config.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ufunc_config.pyi new file mode 100644 index 0000000..f547fbb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ufunc_config.pyi @@ -0,0 +1,21 @@ +"""Typing tests for `numpy.core._ufunc_config`.""" + +import numpy as np + +def func1(a: str, b: int, c: float) -> None: ... +def func2(a: str, *, b: int) -> None: ... + +class Write1: + def write1(self, a: str) -> None: ... + +class Write2: + def write(self, a: str, b: str) -> None: ... + +class Write3: + def write(self, *, a: str) -> None: ... + +np.seterrcall(func1) # E: Argument 1 to "seterrcall" has incompatible type +np.seterrcall(func2) # E: Argument 1 to "seterrcall" has incompatible type +np.seterrcall(Write1()) # E: Argument 1 to "seterrcall" has incompatible type +np.seterrcall(Write2()) # E: Argument 1 to "seterrcall" has incompatible type +np.seterrcall(Write3()) # E: Argument 1 to "seterrcall" has incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ufunclike.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ufunclike.pyi new file mode 100644 index 0000000..82a5f3a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ufunclike.pyi @@ -0,0 +1,21 @@ +from typing import List, Any +import numpy as np + +AR_c: np.ndarray[Any, np.dtype[np.complex128]] +AR_m: np.ndarray[Any, np.dtype[np.timedelta64]] +AR_M: np.ndarray[Any, np.dtype[np.datetime64]] +AR_O: np.ndarray[Any, np.dtype[np.object_]] + +np.fix(AR_c) # E: incompatible type +np.fix(AR_m) # E: incompatible type +np.fix(AR_M) # E: incompatible type + +np.isposinf(AR_c) # E: incompatible type +np.isposinf(AR_m) # E: incompatible type +np.isposinf(AR_M) # E: incompatible type +np.isposinf(AR_O) # E: incompatible type + +np.isneginf(AR_c) # E: incompatible type +np.isneginf(AR_m) # E: incompatible type +np.isneginf(AR_M) # E: incompatible type +np.isneginf(AR_O) # E: incompatible type diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ufuncs.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ufuncs.pyi new file mode 100644 index 0000000..e827267 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/ufuncs.pyi @@ -0,0 +1,41 @@ +import numpy as np +import numpy.typing as npt + +AR_f8: npt.NDArray[np.float64] + +np.sin.nin + "foo" # E: Unsupported operand types +np.sin(1, foo="bar") # E: No overload variant + +np.abs(None) # E: No overload variant + +np.add(1, 1, 1) # E: No overload variant +np.add(1, 1, axis=0) # E: No overload variant + +np.matmul(AR_f8, AR_f8, where=True) # E: No overload variant + +np.frexp(AR_f8, out=None) # E: No overload variant +np.frexp(AR_f8, out=AR_f8) # E: No overload variant + +np.absolute.outer() # E: "None" not callable +np.frexp.outer() # E: "None" not callable +np.divmod.outer() # E: "None" not callable +np.matmul.outer() # E: "None" not callable + +np.absolute.reduceat() # E: "None" not callable +np.frexp.reduceat() # E: "None" not callable +np.divmod.reduceat() # E: "None" not callable +np.matmul.reduceat() # E: "None" not callable + +np.absolute.reduce() # E: "None" not callable +np.frexp.reduce() # E: "None" not callable +np.divmod.reduce() # E: "None" not callable +np.matmul.reduce() # E: "None" not callable + +np.absolute.accumulate() # E: "None" not callable +np.frexp.accumulate() # E: "None" not callable +np.divmod.accumulate() # E: "None" not callable +np.matmul.accumulate() # E: "None" not callable + +np.frexp.at() # E: "None" not callable +np.divmod.at() # E: "None" not callable +np.matmul.at() # E: "None" not callable diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/warnings_and_errors.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/warnings_and_errors.pyi new file mode 100644 index 0000000..f4fa382 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/fail/warnings_and_errors.pyi @@ -0,0 +1,5 @@ +import numpy as np + +np.AxisError(1.0) # E: No overload variant +np.AxisError(1, ndim=2.0) # E: No overload variant +np.AxisError(2, msg_prefix=404) # E: No overload variant diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/misc/extended_precision.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/misc/extended_precision.pyi new file mode 100644 index 0000000..1e495e4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/misc/extended_precision.pyi @@ -0,0 +1,17 @@ +import numpy as np + +reveal_type(np.uint128()) +reveal_type(np.uint256()) + +reveal_type(np.int128()) +reveal_type(np.int256()) + +reveal_type(np.float80()) +reveal_type(np.float96()) +reveal_type(np.float128()) +reveal_type(np.float256()) + +reveal_type(np.complex160()) +reveal_type(np.complex192()) +reveal_type(np.complex256()) +reveal_type(np.complex512()) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/mypy.ini b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/mypy.ini new file mode 100644 index 0000000..548f762 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/mypy.ini @@ -0,0 +1,9 @@ +[mypy] +plugins = numpy.typing.mypy_plugin +show_absolute_path = True + +[mypy-numpy] +ignore_errors = True + +[mypy-numpy.*] +ignore_errors = True diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/arithmetic.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/arithmetic.cpython-38.pyc new file mode 100644 index 0000000..01d54b0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/arithmetic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/array_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/array_constructors.cpython-38.pyc new file mode 100644 index 0000000..38c2fd7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/array_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/array_like.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/array_like.cpython-38.pyc new file mode 100644 index 0000000..927c43d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/array_like.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/arrayprint.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/arrayprint.cpython-38.pyc new file mode 100644 index 0000000..e60e0d4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/arrayprint.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/arrayterator.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/arrayterator.cpython-38.pyc new file mode 100644 index 0000000..d954b5a Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/arrayterator.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/bitwise_ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/bitwise_ops.cpython-38.pyc new file mode 100644 index 0000000..4252032 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/bitwise_ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/comparisons.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/comparisons.cpython-38.pyc new file mode 100644 index 0000000..ddacaa3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/comparisons.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/dtype.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/dtype.cpython-38.pyc new file mode 100644 index 0000000..09aaab4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/dtype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/einsumfunc.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/einsumfunc.cpython-38.pyc new file mode 100644 index 0000000..fa42add Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/einsumfunc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/flatiter.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/flatiter.cpython-38.pyc new file mode 100644 index 0000000..a29c221 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/flatiter.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/fromnumeric.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/fromnumeric.cpython-38.pyc new file mode 100644 index 0000000..8a7b845 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/fromnumeric.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/index_tricks.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/index_tricks.cpython-38.pyc new file mode 100644 index 0000000..b3559c4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/index_tricks.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/lib_utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/lib_utils.cpython-38.pyc new file mode 100644 index 0000000..2410ea8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/lib_utils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/lib_version.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/lib_version.cpython-38.pyc new file mode 100644 index 0000000..b4def40 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/lib_version.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/literal.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/literal.cpython-38.pyc new file mode 100644 index 0000000..a060581 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/literal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/mod.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/mod.cpython-38.pyc new file mode 100644 index 0000000..07a0f95 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/mod.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/modules.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/modules.cpython-38.pyc new file mode 100644 index 0000000..22c9ec5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/modules.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/multiarray.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/multiarray.cpython-38.pyc new file mode 100644 index 0000000..ac85fb9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/multiarray.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ndarray_conversion.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ndarray_conversion.cpython-38.pyc new file mode 100644 index 0000000..f34d25d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ndarray_conversion.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ndarray_misc.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ndarray_misc.cpython-38.pyc new file mode 100644 index 0000000..b88d359 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ndarray_misc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ndarray_shape_manipulation.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ndarray_shape_manipulation.cpython-38.pyc new file mode 100644 index 0000000..ce8e5f3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ndarray_shape_manipulation.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/numeric.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/numeric.cpython-38.pyc new file mode 100644 index 0000000..c4d44b9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/numeric.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/numerictypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/numerictypes.cpython-38.pyc new file mode 100644 index 0000000..d5bfd34 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/numerictypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/random.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/random.cpython-38.pyc new file mode 100644 index 0000000..d514923 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/random.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/scalars.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/scalars.cpython-38.pyc new file mode 100644 index 0000000..1b3e6dd Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/scalars.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/simple.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/simple.cpython-38.pyc new file mode 100644 index 0000000..67ca731 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/simple.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/simple_py3.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/simple_py3.cpython-38.pyc new file mode 100644 index 0000000..c7afa59 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/simple_py3.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ufunc_config.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ufunc_config.cpython-38.pyc new file mode 100644 index 0000000..4d4b4ac Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ufunc_config.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ufunclike.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ufunclike.cpython-38.pyc new file mode 100644 index 0000000..6e8d850 Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ufunclike.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ufuncs.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ufuncs.cpython-38.pyc new file mode 100644 index 0000000..347addd Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/ufuncs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/warnings_and_errors.cpython-38.pyc b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/warnings_and_errors.cpython-38.pyc new file mode 100644 index 0000000..646217d Binary files /dev/null and b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/__pycache__/warnings_and_errors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/arithmetic.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/arithmetic.py new file mode 100644 index 0000000..fe16129 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/arithmetic.py @@ -0,0 +1,588 @@ +from __future__ import annotations + +from typing import Any +import numpy as np + +c16 = np.complex128(1) +f8 = np.float64(1) +i8 = np.int64(1) +u8 = np.uint64(1) + +c8 = np.complex64(1) +f4 = np.float32(1) +i4 = np.int32(1) +u4 = np.uint32(1) + +dt = np.datetime64(1, "D") +td = np.timedelta64(1, "D") + +b_ = np.bool_(1) + +b = bool(1) +c = complex(1) +f = float(1) +i = int(1) + + +class Object: + def __array__(self) -> np.ndarray[Any, np.dtype[np.object_]]: + ret = np.empty((), dtype=object) + ret[()] = self + return ret + + def __sub__(self, value: Any) -> Object: + return self + + def __rsub__(self, value: Any) -> Object: + return self + + def __floordiv__(self, value: Any) -> Object: + return self + + def __rfloordiv__(self, value: Any) -> Object: + return self + + def __mul__(self, value: Any) -> Object: + return self + + def __rmul__(self, value: Any) -> Object: + return self + + def __pow__(self, value: Any) -> Object: + return self + + def __rpow__(self, value: Any) -> Object: + return self + + +AR_b: np.ndarray[Any, np.dtype[np.bool_]] = np.array([True]) +AR_u: np.ndarray[Any, np.dtype[np.uint32]] = np.array([1], dtype=np.uint32) +AR_i: np.ndarray[Any, np.dtype[np.int64]] = np.array([1]) +AR_f: np.ndarray[Any, np.dtype[np.float64]] = np.array([1.0]) +AR_c: np.ndarray[Any, np.dtype[np.complex128]] = np.array([1j]) +AR_m: np.ndarray[Any, np.dtype[np.timedelta64]] = np.array([np.timedelta64(1, "D")]) +AR_M: np.ndarray[Any, np.dtype[np.datetime64]] = np.array([np.datetime64(1, "D")]) +AR_O: np.ndarray[Any, np.dtype[np.object_]] = np.array([Object()]) + +AR_LIKE_b = [True] +AR_LIKE_u = [np.uint32(1)] +AR_LIKE_i = [1] +AR_LIKE_f = [1.0] +AR_LIKE_c = [1j] +AR_LIKE_m = [np.timedelta64(1, "D")] +AR_LIKE_M = [np.datetime64(1, "D")] +AR_LIKE_O = [Object()] + +# Array subtractions + +AR_b - AR_LIKE_u +AR_b - AR_LIKE_i +AR_b - AR_LIKE_f +AR_b - AR_LIKE_c +AR_b - AR_LIKE_m +AR_b - AR_LIKE_O + +AR_LIKE_u - AR_b +AR_LIKE_i - AR_b +AR_LIKE_f - AR_b +AR_LIKE_c - AR_b +AR_LIKE_m - AR_b +AR_LIKE_M - AR_b +AR_LIKE_O - AR_b + +AR_u - AR_LIKE_b +AR_u - AR_LIKE_u +AR_u - AR_LIKE_i +AR_u - AR_LIKE_f +AR_u - AR_LIKE_c +AR_u - AR_LIKE_m +AR_u - AR_LIKE_O + +AR_LIKE_b - AR_u +AR_LIKE_u - AR_u +AR_LIKE_i - AR_u +AR_LIKE_f - AR_u +AR_LIKE_c - AR_u +AR_LIKE_m - AR_u +AR_LIKE_M - AR_u +AR_LIKE_O - AR_u + +AR_i - AR_LIKE_b +AR_i - AR_LIKE_u +AR_i - AR_LIKE_i +AR_i - AR_LIKE_f +AR_i - AR_LIKE_c +AR_i - AR_LIKE_m +AR_i - AR_LIKE_O + +AR_LIKE_b - AR_i +AR_LIKE_u - AR_i +AR_LIKE_i - AR_i +AR_LIKE_f - AR_i +AR_LIKE_c - AR_i +AR_LIKE_m - AR_i +AR_LIKE_M - AR_i +AR_LIKE_O - AR_i + +AR_f - AR_LIKE_b +AR_f - AR_LIKE_u +AR_f - AR_LIKE_i +AR_f - AR_LIKE_f +AR_f - AR_LIKE_c +AR_f - AR_LIKE_O + +AR_LIKE_b - AR_f +AR_LIKE_u - AR_f +AR_LIKE_i - AR_f +AR_LIKE_f - AR_f +AR_LIKE_c - AR_f +AR_LIKE_O - AR_f + +AR_c - AR_LIKE_b +AR_c - AR_LIKE_u +AR_c - AR_LIKE_i +AR_c - AR_LIKE_f +AR_c - AR_LIKE_c +AR_c - AR_LIKE_O + +AR_LIKE_b - AR_c +AR_LIKE_u - AR_c +AR_LIKE_i - AR_c +AR_LIKE_f - AR_c +AR_LIKE_c - AR_c +AR_LIKE_O - AR_c + +AR_m - AR_LIKE_b +AR_m - AR_LIKE_u +AR_m - AR_LIKE_i +AR_m - AR_LIKE_m + +AR_LIKE_b - AR_m +AR_LIKE_u - AR_m +AR_LIKE_i - AR_m +AR_LIKE_m - AR_m +AR_LIKE_M - AR_m + +AR_M - AR_LIKE_b +AR_M - AR_LIKE_u +AR_M - AR_LIKE_i +AR_M - AR_LIKE_m +AR_M - AR_LIKE_M + +AR_LIKE_M - AR_M + +AR_O - AR_LIKE_b +AR_O - AR_LIKE_u +AR_O - AR_LIKE_i +AR_O - AR_LIKE_f +AR_O - AR_LIKE_c +AR_O - AR_LIKE_O + +AR_LIKE_b - AR_O +AR_LIKE_u - AR_O +AR_LIKE_i - AR_O +AR_LIKE_f - AR_O +AR_LIKE_c - AR_O +AR_LIKE_O - AR_O + +# Array floor division + +AR_b // AR_LIKE_b +AR_b // AR_LIKE_u +AR_b // AR_LIKE_i +AR_b // AR_LIKE_f +AR_b // AR_LIKE_O + +AR_LIKE_b // AR_b +AR_LIKE_u // AR_b +AR_LIKE_i // AR_b +AR_LIKE_f // AR_b +AR_LIKE_O // AR_b + +AR_u // AR_LIKE_b +AR_u // AR_LIKE_u +AR_u // AR_LIKE_i +AR_u // AR_LIKE_f +AR_u // AR_LIKE_O + +AR_LIKE_b // AR_u +AR_LIKE_u // AR_u +AR_LIKE_i // AR_u +AR_LIKE_f // AR_u +AR_LIKE_m // AR_u +AR_LIKE_O // AR_u + +AR_i // AR_LIKE_b +AR_i // AR_LIKE_u +AR_i // AR_LIKE_i +AR_i // AR_LIKE_f +AR_i // AR_LIKE_O + +AR_LIKE_b // AR_i +AR_LIKE_u // AR_i +AR_LIKE_i // AR_i +AR_LIKE_f // AR_i +AR_LIKE_m // AR_i +AR_LIKE_O // AR_i + +AR_f // AR_LIKE_b +AR_f // AR_LIKE_u +AR_f // AR_LIKE_i +AR_f // AR_LIKE_f +AR_f // AR_LIKE_O + +AR_LIKE_b // AR_f +AR_LIKE_u // AR_f +AR_LIKE_i // AR_f +AR_LIKE_f // AR_f +AR_LIKE_m // AR_f +AR_LIKE_O // AR_f + +AR_m // AR_LIKE_u +AR_m // AR_LIKE_i +AR_m // AR_LIKE_f +AR_m // AR_LIKE_m + +AR_LIKE_m // AR_m + +AR_O // AR_LIKE_b +AR_O // AR_LIKE_u +AR_O // AR_LIKE_i +AR_O // AR_LIKE_f +AR_O // AR_LIKE_O + +AR_LIKE_b // AR_O +AR_LIKE_u // AR_O +AR_LIKE_i // AR_O +AR_LIKE_f // AR_O +AR_LIKE_O // AR_O + +# Inplace multiplication + +AR_b *= AR_LIKE_b + +AR_u *= AR_LIKE_b +AR_u *= AR_LIKE_u + +AR_i *= AR_LIKE_b +AR_i *= AR_LIKE_u +AR_i *= AR_LIKE_i + +AR_f *= AR_LIKE_b +AR_f *= AR_LIKE_u +AR_f *= AR_LIKE_i +AR_f *= AR_LIKE_f + +AR_c *= AR_LIKE_b +AR_c *= AR_LIKE_u +AR_c *= AR_LIKE_i +AR_c *= AR_LIKE_f +AR_c *= AR_LIKE_c + +AR_m *= AR_LIKE_b +AR_m *= AR_LIKE_u +AR_m *= AR_LIKE_i +AR_m *= AR_LIKE_f + +AR_O *= AR_LIKE_b +AR_O *= AR_LIKE_u +AR_O *= AR_LIKE_i +AR_O *= AR_LIKE_f +AR_O *= AR_LIKE_c +AR_O *= AR_LIKE_O + +# Inplace power + +AR_u **= AR_LIKE_b +AR_u **= AR_LIKE_u + +AR_i **= AR_LIKE_b +AR_i **= AR_LIKE_u +AR_i **= AR_LIKE_i + +AR_f **= AR_LIKE_b +AR_f **= AR_LIKE_u +AR_f **= AR_LIKE_i +AR_f **= AR_LIKE_f + +AR_c **= AR_LIKE_b +AR_c **= AR_LIKE_u +AR_c **= AR_LIKE_i +AR_c **= AR_LIKE_f +AR_c **= AR_LIKE_c + +AR_O **= AR_LIKE_b +AR_O **= AR_LIKE_u +AR_O **= AR_LIKE_i +AR_O **= AR_LIKE_f +AR_O **= AR_LIKE_c +AR_O **= AR_LIKE_O + +# unary ops + +-c16 +-c8 +-f8 +-f4 +-i8 +-i4 +-u8 +-u4 +-td +-AR_f + ++c16 ++c8 ++f8 ++f4 ++i8 ++i4 ++u8 ++u4 ++td ++AR_f + +abs(c16) +abs(c8) +abs(f8) +abs(f4) +abs(i8) +abs(i4) +abs(u8) +abs(u4) +abs(td) +abs(b_) +abs(AR_f) + +# Time structures + +dt + td +dt + i +dt + i4 +dt + i8 +dt - dt +dt - i +dt - i4 +dt - i8 + +td + td +td + i +td + i4 +td + i8 +td - td +td - i +td - i4 +td - i8 +td / f +td / f4 +td / f8 +td / td +td // td +td % td + + +# boolean + +b_ / b +b_ / b_ +b_ / i +b_ / i8 +b_ / i4 +b_ / u8 +b_ / u4 +b_ / f +b_ / f8 +b_ / f4 +b_ / c +b_ / c16 +b_ / c8 + +b / b_ +b_ / b_ +i / b_ +i8 / b_ +i4 / b_ +u8 / b_ +u4 / b_ +f / b_ +f8 / b_ +f4 / b_ +c / b_ +c16 / b_ +c8 / b_ + +# Complex + +c16 + c16 +c16 + f8 +c16 + i8 +c16 + c8 +c16 + f4 +c16 + i4 +c16 + b_ +c16 + b +c16 + c +c16 + f +c16 + i +c16 + AR_f + +c16 + c16 +f8 + c16 +i8 + c16 +c8 + c16 +f4 + c16 +i4 + c16 +b_ + c16 +b + c16 +c + c16 +f + c16 +i + c16 +AR_f + c16 + +c8 + c16 +c8 + f8 +c8 + i8 +c8 + c8 +c8 + f4 +c8 + i4 +c8 + b_ +c8 + b +c8 + c +c8 + f +c8 + i +c8 + AR_f + +c16 + c8 +f8 + c8 +i8 + c8 +c8 + c8 +f4 + c8 +i4 + c8 +b_ + c8 +b + c8 +c + c8 +f + c8 +i + c8 +AR_f + c8 + +# Float + +f8 + f8 +f8 + i8 +f8 + f4 +f8 + i4 +f8 + b_ +f8 + b +f8 + c +f8 + f +f8 + i +f8 + AR_f + +f8 + f8 +i8 + f8 +f4 + f8 +i4 + f8 +b_ + f8 +b + f8 +c + f8 +f + f8 +i + f8 +AR_f + f8 + +f4 + f8 +f4 + i8 +f4 + f4 +f4 + i4 +f4 + b_ +f4 + b +f4 + c +f4 + f +f4 + i +f4 + AR_f + +f8 + f4 +i8 + f4 +f4 + f4 +i4 + f4 +b_ + f4 +b + f4 +c + f4 +f + f4 +i + f4 +AR_f + f4 + +# Int + +i8 + i8 +i8 + u8 +i8 + i4 +i8 + u4 +i8 + b_ +i8 + b +i8 + c +i8 + f +i8 + i +i8 + AR_f + +u8 + u8 +u8 + i4 +u8 + u4 +u8 + b_ +u8 + b +u8 + c +u8 + f +u8 + i +u8 + AR_f + +i8 + i8 +u8 + i8 +i4 + i8 +u4 + i8 +b_ + i8 +b + i8 +c + i8 +f + i8 +i + i8 +AR_f + i8 + +u8 + u8 +i4 + u8 +u4 + u8 +b_ + u8 +b + u8 +c + u8 +f + u8 +i + u8 +AR_f + u8 + +i4 + i8 +i4 + i4 +i4 + i +i4 + b_ +i4 + b +i4 + AR_f + +u4 + i8 +u4 + i4 +u4 + u8 +u4 + u4 +u4 + i +u4 + b_ +u4 + b +u4 + AR_f + +i8 + i4 +i4 + i4 +i + i4 +b_ + i4 +b + i4 +AR_f + i4 + +i8 + u4 +i4 + u4 +u8 + u4 +u4 + u4 +b_ + u4 +b + u4 +i + u4 +AR_f + u4 diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/array_constructors.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/array_constructors.py new file mode 100644 index 0000000..2763d9c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/array_constructors.py @@ -0,0 +1,138 @@ +import sys +from typing import Any +import numpy as np + + +class Index: + def __index__(self) -> int: + return 0 + + +class SubClass(np.ndarray): + pass + + +def func(i: int, j: int, **kwargs: Any) -> SubClass: + return B + + +i8 = np.int64(1) + +A = np.array([1]) +B = A.view(SubClass).copy() +B_stack = np.array([[1], [1]]).view(SubClass) +C = [1] + +if sys.version_info >= (3, 8): + np.ndarray(Index()) + np.ndarray([Index()]) + +np.array(1, dtype=float) +np.array(1, copy=False) +np.array(1, order='F') +np.array(1, order=None) +np.array(1, subok=True) +np.array(1, ndmin=3) +np.array(1, str, copy=True, order='C', subok=False, ndmin=2) + +np.asarray(A) +np.asarray(B) +np.asarray(C) + +np.asanyarray(A) +np.asanyarray(B) +np.asanyarray(B, dtype=int) +np.asanyarray(C) + +np.ascontiguousarray(A) +np.ascontiguousarray(B) +np.ascontiguousarray(C) + +np.asfortranarray(A) +np.asfortranarray(B) +np.asfortranarray(C) + +np.require(A) +np.require(B) +np.require(B, dtype=int) +np.require(B, requirements=None) +np.require(B, requirements="E") +np.require(B, requirements=["ENSUREARRAY"]) +np.require(B, requirements={"F", "E"}) +np.require(B, requirements=["C", "OWNDATA"]) +np.require(B, requirements="W") +np.require(B, requirements="A") +np.require(C) + +np.linspace(0, 2) +np.linspace(0.5, [0, 1, 2]) +np.linspace([0, 1, 2], 3) +np.linspace(0j, 2) +np.linspace(0, 2, num=10) +np.linspace(0, 2, endpoint=True) +np.linspace(0, 2, retstep=True) +np.linspace(0j, 2j, retstep=True) +np.linspace(0, 2, dtype=bool) +np.linspace([0, 1], [2, 3], axis=Index()) + +np.logspace(0, 2, base=2) +np.logspace(0, 2, base=2) +np.logspace(0, 2, base=[1j, 2j], num=2) + +np.geomspace(1, 2) + +np.zeros_like(A) +np.zeros_like(C) +np.zeros_like(B) +np.zeros_like(B, dtype=np.int64) + +np.ones_like(A) +np.ones_like(C) +np.ones_like(B) +np.ones_like(B, dtype=np.int64) + +np.empty_like(A) +np.empty_like(C) +np.empty_like(B) +np.empty_like(B, dtype=np.int64) + +np.full_like(A, i8) +np.full_like(C, i8) +np.full_like(B, i8) +np.full_like(B, i8, dtype=np.int64) + +np.ones(1) +np.ones([1, 1, 1]) + +np.full(1, i8) +np.full([1, 1, 1], i8) + +np.indices([1, 2, 3]) +np.indices([1, 2, 3], sparse=True) + +np.fromfunction(func, (3, 5)) + +np.identity(10) + +np.atleast_1d(C) +np.atleast_1d(A) +np.atleast_1d(C, C) +np.atleast_1d(C, A) +np.atleast_1d(A, A) + +np.atleast_2d(C) + +np.atleast_3d(C) + +np.vstack([C, C]) +np.vstack([C, A]) +np.vstack([A, A]) + +np.hstack([C, C]) + +np.stack([C, C]) +np.stack([C, C], axis=0) +np.stack([C, C], out=B_stack) + +np.block([[C, C], [C, C]]) +np.block(A) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/array_like.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/array_like.py new file mode 100644 index 0000000..5bd2fda --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/array_like.py @@ -0,0 +1,39 @@ +from typing import Any, Optional + +import numpy as np +from numpy.typing import ArrayLike, _SupportsArray + +x1: ArrayLike = True +x2: ArrayLike = 5 +x3: ArrayLike = 1.0 +x4: ArrayLike = 1 + 1j +x5: ArrayLike = np.int8(1) +x6: ArrayLike = np.float64(1) +x7: ArrayLike = np.complex128(1) +x8: ArrayLike = np.array([1, 2, 3]) +x9: ArrayLike = [1, 2, 3] +x10: ArrayLike = (1, 2, 3) +x11: ArrayLike = "foo" +x12: ArrayLike = memoryview(b'foo') + + +class A: + def __array__(self, dtype: Optional[np.dtype] = None) -> np.ndarray: + return np.array([1, 2, 3]) + + +x13: ArrayLike = A() + +scalar: _SupportsArray = np.int64(1) +scalar.__array__() +array: _SupportsArray = np.array(1) +array.__array__() + +a: _SupportsArray = A() +a.__array__() +a.__array__() + +# Escape hatch for when you mean to make something like an object +# array. +object_array_scalar: Any = (i for i in range(10)) +np.array(object_array_scalar) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/arrayprint.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/arrayprint.py new file mode 100644 index 0000000..6c704c7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/arrayprint.py @@ -0,0 +1,37 @@ +import numpy as np + +AR = np.arange(10) +AR.setflags(write=False) + +with np.printoptions(): + np.set_printoptions( + precision=1, + threshold=2, + edgeitems=3, + linewidth=4, + suppress=False, + nanstr="Bob", + infstr="Bill", + formatter={}, + sign="+", + floatmode="unique", + ) + np.get_printoptions() + str(AR) + + np.array2string( + AR, + max_line_width=5, + precision=2, + suppress_small=True, + separator=";", + prefix="test", + threshold=5, + floatmode="fixed", + suffix="?", + legacy="1.13", + ) + np.format_float_scientific(1, precision=5) + np.format_float_positional(1, trim="k") + np.array_repr(AR) + np.array_str(AR) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/arrayterator.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/arrayterator.py new file mode 100644 index 0000000..572be5e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/arrayterator.py @@ -0,0 +1,27 @@ + +from __future__ import annotations + +from typing import Any +import numpy as np + +AR_i8: np.ndarray[Any, np.dtype[np.int_]] = np.arange(10) +ar_iter = np.lib.Arrayterator(AR_i8) + +ar_iter.var +ar_iter.buf_size +ar_iter.start +ar_iter.stop +ar_iter.step +ar_iter.shape +ar_iter.flat + +ar_iter.__array__() + +for i in ar_iter: + pass + +ar_iter[0] +ar_iter[...] +ar_iter[:] +ar_iter[0, 0, 0] +ar_iter[..., 0, :] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/bitwise_ops.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/bitwise_ops.py new file mode 100644 index 0000000..67449e2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/bitwise_ops.py @@ -0,0 +1,131 @@ +import numpy as np + +i8 = np.int64(1) +u8 = np.uint64(1) + +i4 = np.int32(1) +u4 = np.uint32(1) + +b_ = np.bool_(1) + +b = bool(1) +i = int(1) + +AR = np.array([0, 1, 2], dtype=np.int32) +AR.setflags(write=False) + + +i8 << i8 +i8 >> i8 +i8 | i8 +i8 ^ i8 +i8 & i8 + +i8 << AR +i8 >> AR +i8 | AR +i8 ^ AR +i8 & AR + +i4 << i4 +i4 >> i4 +i4 | i4 +i4 ^ i4 +i4 & i4 + +i8 << i4 +i8 >> i4 +i8 | i4 +i8 ^ i4 +i8 & i4 + +i8 << i +i8 >> i +i8 | i +i8 ^ i +i8 & i + +i8 << b_ +i8 >> b_ +i8 | b_ +i8 ^ b_ +i8 & b_ + +i8 << b +i8 >> b +i8 | b +i8 ^ b +i8 & b + +u8 << u8 +u8 >> u8 +u8 | u8 +u8 ^ u8 +u8 & u8 + +u8 << AR +u8 >> AR +u8 | AR +u8 ^ AR +u8 & AR + +u4 << u4 +u4 >> u4 +u4 | u4 +u4 ^ u4 +u4 & u4 + +u4 << i4 +u4 >> i4 +u4 | i4 +u4 ^ i4 +u4 & i4 + +u4 << i +u4 >> i +u4 | i +u4 ^ i +u4 & i + +u8 << b_ +u8 >> b_ +u8 | b_ +u8 ^ b_ +u8 & b_ + +u8 << b +u8 >> b +u8 | b +u8 ^ b +u8 & b + +b_ << b_ +b_ >> b_ +b_ | b_ +b_ ^ b_ +b_ & b_ + +b_ << AR +b_ >> AR +b_ | AR +b_ ^ AR +b_ & AR + +b_ << b +b_ >> b +b_ | b +b_ ^ b +b_ & b + +b_ << i +b_ >> i +b_ | i +b_ ^ i +b_ & i + +~i8 +~i4 +~u8 +~u4 +~b_ +~AR diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/comparisons.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/comparisons.py new file mode 100644 index 0000000..ce41de4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/comparisons.py @@ -0,0 +1,301 @@ +from __future__ import annotations + +from typing import Any +import numpy as np + +c16 = np.complex128() +f8 = np.float64() +i8 = np.int64() +u8 = np.uint64() + +c8 = np.complex64() +f4 = np.float32() +i4 = np.int32() +u4 = np.uint32() + +dt = np.datetime64(0, "D") +td = np.timedelta64(0, "D") + +b_ = np.bool_() + +b = bool() +c = complex() +f = float() +i = int() + +SEQ = (0, 1, 2, 3, 4) + +AR_b: np.ndarray[Any, np.dtype[np.bool_]] = np.array([True]) +AR_u: np.ndarray[Any, np.dtype[np.uint32]] = np.array([1], dtype=np.uint32) +AR_i: np.ndarray[Any, np.dtype[np.int_]] = np.array([1]) +AR_f: np.ndarray[Any, np.dtype[np.float_]] = np.array([1.0]) +AR_c: np.ndarray[Any, np.dtype[np.complex_]] = np.array([1.0j]) +AR_m: np.ndarray[Any, np.dtype[np.timedelta64]] = np.array([np.timedelta64("1")]) +AR_M: np.ndarray[Any, np.dtype[np.datetime64]] = np.array([np.datetime64("1")]) +AR_O: np.ndarray[Any, np.dtype[np.object_]] = np.array([1], dtype=object) + +# Arrays + +AR_b > AR_b +AR_b > AR_u +AR_b > AR_i +AR_b > AR_f +AR_b > AR_c + +AR_u > AR_b +AR_u > AR_u +AR_u > AR_i +AR_u > AR_f +AR_u > AR_c + +AR_i > AR_b +AR_i > AR_u +AR_i > AR_i +AR_i > AR_f +AR_i > AR_c + +AR_f > AR_b +AR_f > AR_u +AR_f > AR_i +AR_f > AR_f +AR_f > AR_c + +AR_c > AR_b +AR_c > AR_u +AR_c > AR_i +AR_c > AR_f +AR_c > AR_c + +AR_m > AR_b +AR_m > AR_u +AR_m > AR_i +AR_b > AR_m +AR_u > AR_m +AR_i > AR_m + +AR_M > AR_M + +AR_O > AR_O +1 > AR_O +AR_O > 1 + +# Time structures + +dt > dt + +td > td +td > i +td > i4 +td > i8 +td > AR_i +td > SEQ + +# boolean + +b_ > b +b_ > b_ +b_ > i +b_ > i8 +b_ > i4 +b_ > u8 +b_ > u4 +b_ > f +b_ > f8 +b_ > f4 +b_ > c +b_ > c16 +b_ > c8 +b_ > AR_i +b_ > SEQ + +# Complex + +c16 > c16 +c16 > f8 +c16 > i8 +c16 > c8 +c16 > f4 +c16 > i4 +c16 > b_ +c16 > b +c16 > c +c16 > f +c16 > i +c16 > AR_i +c16 > SEQ + +c16 > c16 +f8 > c16 +i8 > c16 +c8 > c16 +f4 > c16 +i4 > c16 +b_ > c16 +b > c16 +c > c16 +f > c16 +i > c16 +AR_i > c16 +SEQ > c16 + +c8 > c16 +c8 > f8 +c8 > i8 +c8 > c8 +c8 > f4 +c8 > i4 +c8 > b_ +c8 > b +c8 > c +c8 > f +c8 > i +c8 > AR_i +c8 > SEQ + +c16 > c8 +f8 > c8 +i8 > c8 +c8 > c8 +f4 > c8 +i4 > c8 +b_ > c8 +b > c8 +c > c8 +f > c8 +i > c8 +AR_i > c8 +SEQ > c8 + +# Float + +f8 > f8 +f8 > i8 +f8 > f4 +f8 > i4 +f8 > b_ +f8 > b +f8 > c +f8 > f +f8 > i +f8 > AR_i +f8 > SEQ + +f8 > f8 +i8 > f8 +f4 > f8 +i4 > f8 +b_ > f8 +b > f8 +c > f8 +f > f8 +i > f8 +AR_i > f8 +SEQ > f8 + +f4 > f8 +f4 > i8 +f4 > f4 +f4 > i4 +f4 > b_ +f4 > b +f4 > c +f4 > f +f4 > i +f4 > AR_i +f4 > SEQ + +f8 > f4 +i8 > f4 +f4 > f4 +i4 > f4 +b_ > f4 +b > f4 +c > f4 +f > f4 +i > f4 +AR_i > f4 +SEQ > f4 + +# Int + +i8 > i8 +i8 > u8 +i8 > i4 +i8 > u4 +i8 > b_ +i8 > b +i8 > c +i8 > f +i8 > i +i8 > AR_i +i8 > SEQ + +u8 > u8 +u8 > i4 +u8 > u4 +u8 > b_ +u8 > b +u8 > c +u8 > f +u8 > i +u8 > AR_i +u8 > SEQ + +i8 > i8 +u8 > i8 +i4 > i8 +u4 > i8 +b_ > i8 +b > i8 +c > i8 +f > i8 +i > i8 +AR_i > i8 +SEQ > i8 + +u8 > u8 +i4 > u8 +u4 > u8 +b_ > u8 +b > u8 +c > u8 +f > u8 +i > u8 +AR_i > u8 +SEQ > u8 + +i4 > i8 +i4 > i4 +i4 > i +i4 > b_ +i4 > b +i4 > AR_i +i4 > SEQ + +u4 > i8 +u4 > i4 +u4 > u8 +u4 > u4 +u4 > i +u4 > b_ +u4 > b +u4 > AR_i +u4 > SEQ + +i8 > i4 +i4 > i4 +i > i4 +b_ > i4 +b > i4 +AR_i > i4 +SEQ > i4 + +i8 > u4 +i4 > u4 +u8 > u4 +u4 > u4 +b_ > u4 +b > u4 +i > u4 +AR_i > u4 +SEQ > u4 diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/dtype.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/dtype.py new file mode 100644 index 0000000..e849cfd --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/dtype.py @@ -0,0 +1,57 @@ +import numpy as np + +dtype_obj = np.dtype(np.str_) +void_dtype_obj = np.dtype([("f0", np.float64), ("f1", np.float32)]) + +np.dtype(dtype=np.int64) +np.dtype(int) +np.dtype("int") +np.dtype(None) + +np.dtype((int, 2)) +np.dtype((int, (1,))) + +np.dtype({"names": ["a", "b"], "formats": [int, float]}) +np.dtype({"names": ["a"], "formats": [int], "titles": [object]}) +np.dtype({"names": ["a"], "formats": [int], "titles": [object()]}) + +np.dtype([("name", np.unicode_, 16), ("grades", np.float64, (2,)), ("age", "int32")]) + +np.dtype( + { + "names": ["a", "b"], + "formats": [int, float], + "itemsize": 9, + "aligned": False, + "titles": ["x", "y"], + "offsets": [0, 1], + } +) + +np.dtype((np.float_, float)) + + +class Test: + dtype = np.dtype(float) + + +np.dtype(Test()) + +# Methods and attributes +dtype_obj.base +dtype_obj.subdtype +dtype_obj.newbyteorder() +dtype_obj.type +dtype_obj.name +dtype_obj.names + +dtype_obj * 0 +dtype_obj * 2 + +0 * dtype_obj +2 * dtype_obj + +void_dtype_obj["f0"] +void_dtype_obj[0] +void_dtype_obj[["f0", "f1"]] +void_dtype_obj[["f0"]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/einsumfunc.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/einsumfunc.py new file mode 100644 index 0000000..429764e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/einsumfunc.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import Any + +import numpy as np + +AR_LIKE_b = [True, True, True] +AR_LIKE_u = [np.uint32(1), np.uint32(2), np.uint32(3)] +AR_LIKE_i = [1, 2, 3] +AR_LIKE_f = [1.0, 2.0, 3.0] +AR_LIKE_c = [1j, 2j, 3j] +AR_LIKE_U = ["1", "2", "3"] + +OUT_f: np.ndarray[Any, np.dtype[np.float64]] = np.empty(3, dtype=np.float64) +OUT_c: np.ndarray[Any, np.dtype[np.complex128]] = np.empty(3, dtype=np.complex128) + +np.einsum("i,i->i", AR_LIKE_b, AR_LIKE_b) +np.einsum("i,i->i", AR_LIKE_u, AR_LIKE_u) +np.einsum("i,i->i", AR_LIKE_i, AR_LIKE_i) +np.einsum("i,i->i", AR_LIKE_f, AR_LIKE_f) +np.einsum("i,i->i", AR_LIKE_c, AR_LIKE_c) +np.einsum("i,i->i", AR_LIKE_b, AR_LIKE_i) +np.einsum("i,i,i,i->i", AR_LIKE_b, AR_LIKE_u, AR_LIKE_i, AR_LIKE_c) + +np.einsum("i,i->i", AR_LIKE_f, AR_LIKE_f, dtype="c16") +np.einsum("i,i->i", AR_LIKE_U, AR_LIKE_U, dtype=bool, casting="unsafe") +np.einsum("i,i->i", AR_LIKE_f, AR_LIKE_f, out=OUT_c) +np.einsum("i,i->i", AR_LIKE_U, AR_LIKE_U, dtype=int, casting="unsafe", out=OUT_f) + +np.einsum_path("i,i->i", AR_LIKE_b, AR_LIKE_b) +np.einsum_path("i,i->i", AR_LIKE_u, AR_LIKE_u) +np.einsum_path("i,i->i", AR_LIKE_i, AR_LIKE_i) +np.einsum_path("i,i->i", AR_LIKE_f, AR_LIKE_f) +np.einsum_path("i,i->i", AR_LIKE_c, AR_LIKE_c) +np.einsum_path("i,i->i", AR_LIKE_b, AR_LIKE_i) +np.einsum_path("i,i,i,i->i", AR_LIKE_b, AR_LIKE_u, AR_LIKE_i, AR_LIKE_c) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/flatiter.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/flatiter.py new file mode 100644 index 0000000..63c839a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/flatiter.py @@ -0,0 +1,16 @@ +import numpy as np + +a = np.empty((2, 2)).flat + +a.base +a.copy() +a.coords +a.index +iter(a) +next(a) +a[0] +a[[0, 1, 2]] +a[...] +a[:] +a.__array__() +a.__array__(np.dtype(np.float64)) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/fromnumeric.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/fromnumeric.py new file mode 100644 index 0000000..9e936e6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/fromnumeric.py @@ -0,0 +1,260 @@ +"""Tests for :mod:`numpy.core.fromnumeric`.""" + +import numpy as np + +A = np.array(True, ndmin=2, dtype=bool) +B = np.array(1.0, ndmin=2, dtype=np.float32) +A.setflags(write=False) +B.setflags(write=False) + +a = np.bool_(True) +b = np.float32(1.0) +c = 1.0 +d = np.array(1.0, dtype=np.float32) # writeable + +np.take(a, 0) +np.take(b, 0) +np.take(c, 0) +np.take(A, 0) +np.take(B, 0) +np.take(A, [0]) +np.take(B, [0]) + +np.reshape(a, 1) +np.reshape(b, 1) +np.reshape(c, 1) +np.reshape(A, 1) +np.reshape(B, 1) + +np.choose(a, [True, True]) +np.choose(A, [1.0, 1.0]) + +np.repeat(a, 1) +np.repeat(b, 1) +np.repeat(c, 1) +np.repeat(A, 1) +np.repeat(B, 1) + +np.swapaxes(A, 0, 0) +np.swapaxes(B, 0, 0) + +np.transpose(a) +np.transpose(b) +np.transpose(c) +np.transpose(A) +np.transpose(B) + +np.partition(a, 0, axis=None) +np.partition(b, 0, axis=None) +np.partition(c, 0, axis=None) +np.partition(A, 0) +np.partition(B, 0) + +np.argpartition(a, 0) +np.argpartition(b, 0) +np.argpartition(c, 0) +np.argpartition(A, 0) +np.argpartition(B, 0) + +np.sort(A, 0) +np.sort(B, 0) + +np.argsort(A, 0) +np.argsort(B, 0) + +np.argmax(A) +np.argmax(B) +np.argmax(A, axis=0) +np.argmax(B, axis=0) + +np.argmin(A) +np.argmin(B) +np.argmin(A, axis=0) +np.argmin(B, axis=0) + +np.searchsorted(A[0], 0) +np.searchsorted(B[0], 0) +np.searchsorted(A[0], [0]) +np.searchsorted(B[0], [0]) + +np.resize(a, (5, 5)) +np.resize(b, (5, 5)) +np.resize(c, (5, 5)) +np.resize(A, (5, 5)) +np.resize(B, (5, 5)) + +np.squeeze(a) +np.squeeze(b) +np.squeeze(c) +np.squeeze(A) +np.squeeze(B) + +np.diagonal(A) +np.diagonal(B) + +np.trace(A) +np.trace(B) + +np.ravel(a) +np.ravel(b) +np.ravel(c) +np.ravel(A) +np.ravel(B) + +np.nonzero(A) +np.nonzero(B) + +np.shape(a) +np.shape(b) +np.shape(c) +np.shape(A) +np.shape(B) + +np.compress([True], a) +np.compress([True], b) +np.compress([True], c) +np.compress([True], A) +np.compress([True], B) + +np.clip(a, 0, 1.0) +np.clip(b, -1, 1) +np.clip(a, 0, None) +np.clip(b, None, 1) +np.clip(c, 0, 1) +np.clip(A, 0, 1) +np.clip(B, 0, 1) +np.clip(B, [0, 1], [1, 2]) + +np.sum(a) +np.sum(b) +np.sum(c) +np.sum(A) +np.sum(B) +np.sum(A, axis=0) +np.sum(B, axis=0) + +np.all(a) +np.all(b) +np.all(c) +np.all(A) +np.all(B) +np.all(A, axis=0) +np.all(B, axis=0) +np.all(A, keepdims=True) +np.all(B, keepdims=True) + +np.any(a) +np.any(b) +np.any(c) +np.any(A) +np.any(B) +np.any(A, axis=0) +np.any(B, axis=0) +np.any(A, keepdims=True) +np.any(B, keepdims=True) + +np.cumsum(a) +np.cumsum(b) +np.cumsum(c) +np.cumsum(A) +np.cumsum(B) + +np.ptp(b) +np.ptp(c) +np.ptp(B) +np.ptp(B, axis=0) +np.ptp(B, keepdims=True) + +np.amax(a) +np.amax(b) +np.amax(c) +np.amax(A) +np.amax(B) +np.amax(A, axis=0) +np.amax(B, axis=0) +np.amax(A, keepdims=True) +np.amax(B, keepdims=True) + +np.amin(a) +np.amin(b) +np.amin(c) +np.amin(A) +np.amin(B) +np.amin(A, axis=0) +np.amin(B, axis=0) +np.amin(A, keepdims=True) +np.amin(B, keepdims=True) + +np.prod(a) +np.prod(b) +np.prod(c) +np.prod(A) +np.prod(B) +np.prod(a, dtype=None) +np.prod(A, dtype=None) +np.prod(A, axis=0) +np.prod(B, axis=0) +np.prod(A, keepdims=True) +np.prod(B, keepdims=True) +np.prod(b, out=d) +np.prod(B, out=d) + +np.cumprod(a) +np.cumprod(b) +np.cumprod(c) +np.cumprod(A) +np.cumprod(B) + +np.ndim(a) +np.ndim(b) +np.ndim(c) +np.ndim(A) +np.ndim(B) + +np.size(a) +np.size(b) +np.size(c) +np.size(A) +np.size(B) + +np.around(a) +np.around(b) +np.around(c) +np.around(A) +np.around(B) + +np.mean(a) +np.mean(b) +np.mean(c) +np.mean(A) +np.mean(B) +np.mean(A, axis=0) +np.mean(B, axis=0) +np.mean(A, keepdims=True) +np.mean(B, keepdims=True) +np.mean(b, out=d) +np.mean(B, out=d) + +np.std(a) +np.std(b) +np.std(c) +np.std(A) +np.std(B) +np.std(A, axis=0) +np.std(B, axis=0) +np.std(A, keepdims=True) +np.std(B, keepdims=True) +np.std(b, out=d) +np.std(B, out=d) + +np.var(a) +np.var(b) +np.var(c) +np.var(A) +np.var(B) +np.var(A, axis=0) +np.var(B, axis=0) +np.var(A, keepdims=True) +np.var(B, keepdims=True) +np.var(b, out=d) +np.var(B, out=d) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/index_tricks.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/index_tricks.py new file mode 100644 index 0000000..4c4c119 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/index_tricks.py @@ -0,0 +1,64 @@ +from __future__ import annotations +from typing import Any +import numpy as np + +AR_LIKE_b = [[True, True], [True, True]] +AR_LIKE_i = [[1, 2], [3, 4]] +AR_LIKE_f = [[1.0, 2.0], [3.0, 4.0]] +AR_LIKE_U = [["1", "2"], ["3", "4"]] + +AR_i8: np.ndarray[Any, np.dtype[np.int64]] = np.array(AR_LIKE_i, dtype=np.int64) + +np.ndenumerate(AR_i8) +np.ndenumerate(AR_LIKE_f) +np.ndenumerate(AR_LIKE_U) + +np.ndenumerate(AR_i8).iter +np.ndenumerate(AR_LIKE_f).iter +np.ndenumerate(AR_LIKE_U).iter + +next(np.ndenumerate(AR_i8)) +next(np.ndenumerate(AR_LIKE_f)) +next(np.ndenumerate(AR_LIKE_U)) + +iter(np.ndenumerate(AR_i8)) +iter(np.ndenumerate(AR_LIKE_f)) +iter(np.ndenumerate(AR_LIKE_U)) + +iter(np.ndindex(1, 2, 3)) +next(np.ndindex(1, 2, 3)) + +np.unravel_index([22, 41, 37], (7, 6)) +np.unravel_index([31, 41, 13], (7, 6), order='F') +np.unravel_index(1621, (6, 7, 8, 9)) + +np.ravel_multi_index(AR_LIKE_i, (7, 6)) +np.ravel_multi_index(AR_LIKE_i, (7, 6), order='F') +np.ravel_multi_index(AR_LIKE_i, (4, 6), mode='clip') +np.ravel_multi_index(AR_LIKE_i, (4, 4), mode=('clip', 'wrap')) +np.ravel_multi_index((3, 1, 4, 1), (6, 7, 8, 9)) + +np.mgrid[1:1:2] +np.mgrid[1:1:2, None:10] + +np.ogrid[1:1:2] +np.ogrid[1:1:2, None:10] + +np.index_exp[0:1] +np.index_exp[0:1, None:3] +np.index_exp[0, 0:1, ..., [0, 1, 3]] + +np.s_[0:1] +np.s_[0:1, None:3] +np.s_[0, 0:1, ..., [0, 1, 3]] + +np.ix_(AR_LIKE_b[0]) +np.ix_(AR_LIKE_i[0], AR_LIKE_f[0]) +np.ix_(AR_i8[0]) + +np.fill_diagonal(AR_i8, 5) + +np.diag_indices(4) +np.diag_indices(2, 3) + +np.diag_indices_from(AR_i8) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/lib_utils.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/lib_utils.py new file mode 100644 index 0000000..65640c2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/lib_utils.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from io import StringIO + +import numpy as np + +FILE = StringIO() +AR = np.arange(10, dtype=np.float64) + +def func(a: int) -> bool: ... + +np.deprecate(func) +np.deprecate() + +np.deprecate_with_doc("test") +np.deprecate_with_doc(None) + +np.byte_bounds(AR) +np.byte_bounds(np.float64()) + +np.info(1, output=FILE) + +np.source(np.interp, output=FILE) + +np.lookfor("binary representation", output=FILE) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/lib_version.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/lib_version.py new file mode 100644 index 0000000..f3825ec --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/lib_version.py @@ -0,0 +1,18 @@ +from numpy.lib import NumpyVersion + +version = NumpyVersion("1.8.0") + +version.vstring +version.version +version.major +version.minor +version.bugfix +version.pre_release +version.is_devversion + +version == version +version != version +version < "1.8.0" +version <= version +version > version +version >= "1.8.0" diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/literal.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/literal.py new file mode 100644 index 0000000..8eaeb6a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/literal.py @@ -0,0 +1,45 @@ +from functools import partial +from typing import Callable, List, Tuple + +import pytest # type: ignore +import numpy as np + +AR = np.array(0) +AR.setflags(write=False) + +KACF = frozenset({None, "K", "A", "C", "F"}) +ACF = frozenset({None, "A", "C", "F"}) +CF = frozenset({None, "C", "F"}) + +order_list: List[Tuple[frozenset, Callable]] = [ + (KACF, partial(np.ndarray, 1)), + (KACF, AR.tobytes), + (KACF, partial(AR.astype, int)), + (KACF, AR.copy), + (ACF, partial(AR.reshape, 1)), + (KACF, AR.flatten), + (KACF, AR.ravel), + (KACF, partial(np.array, 1)), + (CF, partial(np.zeros, 1)), + (CF, partial(np.ones, 1)), + (CF, partial(np.empty, 1)), + (CF, partial(np.full, 1, 1)), + (KACF, partial(np.zeros_like, AR)), + (KACF, partial(np.ones_like, AR)), + (KACF, partial(np.empty_like, AR)), + (KACF, partial(np.full_like, AR, 1)), + (KACF, partial(np.add, 1, 1)), # i.e. np.ufunc.__call__ + (ACF, partial(np.reshape, AR, 1)), + (KACF, partial(np.ravel, AR)), + (KACF, partial(np.asarray, 1)), + (KACF, partial(np.asanyarray, 1)), +] + +for order_set, func in order_list: + for order in order_set: + func(order=order) + + invalid_orders = KACF - order_set + for order in invalid_orders: + with pytest.raises(ValueError): + func(order=order) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/mod.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/mod.py new file mode 100644 index 0000000..b5b9afb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/mod.py @@ -0,0 +1,149 @@ +import numpy as np + +f8 = np.float64(1) +i8 = np.int64(1) +u8 = np.uint64(1) + +f4 = np.float32(1) +i4 = np.int32(1) +u4 = np.uint32(1) + +td = np.timedelta64(1, "D") +b_ = np.bool_(1) + +b = bool(1) +f = float(1) +i = int(1) + +AR = np.array([1], dtype=np.bool_) +AR.setflags(write=False) + +AR2 = np.array([1], dtype=np.timedelta64) +AR2.setflags(write=False) + +# Time structures + +td % td +td % AR2 +AR2 % td + +divmod(td, td) +divmod(td, AR2) +divmod(AR2, td) + +# Bool + +b_ % b +b_ % i +b_ % f +b_ % b_ +b_ % i8 +b_ % u8 +b_ % f8 +b_ % AR + +divmod(b_, b) +divmod(b_, i) +divmod(b_, f) +divmod(b_, b_) +divmod(b_, i8) +divmod(b_, u8) +divmod(b_, f8) +divmod(b_, AR) + +b % b_ +i % b_ +f % b_ +b_ % b_ +i8 % b_ +u8 % b_ +f8 % b_ +AR % b_ + +divmod(b, b_) +divmod(i, b_) +divmod(f, b_) +divmod(b_, b_) +divmod(i8, b_) +divmod(u8, b_) +divmod(f8, b_) +divmod(AR, b_) + +# int + +i8 % b +i8 % i +i8 % f +i8 % i8 +i8 % f8 +i4 % i8 +i4 % f8 +i4 % i4 +i4 % f4 +i8 % AR + +divmod(i8, b) +divmod(i8, i) +divmod(i8, f) +divmod(i8, i8) +divmod(i8, f8) +divmod(i8, i4) +divmod(i8, f4) +divmod(i4, i4) +divmod(i4, f4) +divmod(i8, AR) + +b % i8 +i % i8 +f % i8 +i8 % i8 +f8 % i8 +i8 % i4 +f8 % i4 +i4 % i4 +f4 % i4 +AR % i8 + +divmod(b, i8) +divmod(i, i8) +divmod(f, i8) +divmod(i8, i8) +divmod(f8, i8) +divmod(i4, i8) +divmod(f4, i8) +divmod(i4, i4) +divmod(f4, i4) +divmod(AR, i8) + +# float + +f8 % b +f8 % i +f8 % f +i8 % f4 +f4 % f4 +f8 % AR + +divmod(f8, b) +divmod(f8, i) +divmod(f8, f) +divmod(f8, f8) +divmod(f8, f4) +divmod(f4, f4) +divmod(f8, AR) + +b % f8 +i % f8 +f % f8 +f8 % f8 +f8 % f8 +f4 % f4 +AR % f8 + +divmod(b, f8) +divmod(i, f8) +divmod(f, f8) +divmod(f8, f8) +divmod(f4, f8) +divmod(f4, f4) +divmod(AR, f8) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/modules.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/modules.py new file mode 100644 index 0000000..9261874 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/modules.py @@ -0,0 +1,43 @@ +import numpy as np +from numpy import f2py + +np.char +np.ctypeslib +np.emath +np.fft +np.lib +np.linalg +np.ma +np.matrixlib +np.polynomial +np.random +np.rec +np.testing +np.version + +np.lib.format +np.lib.mixins +np.lib.scimath +np.lib.stride_tricks +np.ma.extras +np.polynomial.chebyshev +np.polynomial.hermite +np.polynomial.hermite_e +np.polynomial.laguerre +np.polynomial.legendre +np.polynomial.polynomial + +np.__path__ +np.__version__ +np.__git_version__ + +np.__all__ +np.char.__all__ +np.ctypeslib.__all__ +np.emath.__all__ +np.lib.__all__ +np.ma.__all__ +np.random.__all__ +np.rec.__all__ +np.testing.__all__ +f2py.__all__ diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/multiarray.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/multiarray.py new file mode 100644 index 0000000..26cedfd --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/multiarray.py @@ -0,0 +1,76 @@ +import numpy as np +import numpy.typing as npt + +AR_f8: npt.NDArray[np.float64] = np.array([1.0]) +AR_i4 = np.array([1], dtype=np.int32) +AR_u1 = np.array([1], dtype=np.uint8) + +AR_LIKE_f = [1.5] +AR_LIKE_i = [1] + +b_f8 = np.broadcast(AR_f8) +b_i4_f8_f8 = np.broadcast(AR_i4, AR_f8, AR_f8) + +next(b_f8) +b_f8.reset() +b_f8.index +b_f8.iters +b_f8.nd +b_f8.ndim +b_f8.numiter +b_f8.shape +b_f8.size + +next(b_i4_f8_f8) +b_i4_f8_f8.reset() +b_i4_f8_f8.ndim +b_i4_f8_f8.index +b_i4_f8_f8.iters +b_i4_f8_f8.nd +b_i4_f8_f8.numiter +b_i4_f8_f8.shape +b_i4_f8_f8.size + +np.inner(AR_f8, AR_i4) + +np.where([True, True, False]) +np.where([True, True, False], 1, 0) + +np.lexsort([0, 1, 2]) + +np.can_cast(np.dtype("i8"), int) +np.can_cast(AR_f8, "f8") +np.can_cast(AR_f8, np.complex128, casting="unsafe") + +np.min_scalar_type([1]) +np.min_scalar_type(AR_f8) + +np.result_type(int, AR_i4) +np.result_type(AR_f8, AR_u1) +np.result_type(AR_f8, np.complex128) + +np.dot(AR_LIKE_f, AR_i4) +np.dot(AR_u1, 1) +np.dot(1.5j, 1) +np.dot(AR_u1, 1, out=AR_f8) + +np.vdot(AR_LIKE_f, AR_i4) +np.vdot(AR_u1, 1) +np.vdot(1.5j, 1) + +np.bincount(AR_i4) + +np.copyto(AR_f8, [1.6]) + +np.putmask(AR_f8, [True], 1.5) + +np.packbits(AR_i4) +np.packbits(AR_u1) + +np.unpackbits(AR_u1) + +np.shares_memory(1, 2) +np.shares_memory(AR_f8, AR_f8, max_work=1) + +np.may_share_memory(1, 2) +np.may_share_memory(AR_f8, AR_f8, max_work=1) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ndarray_conversion.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ndarray_conversion.py new file mode 100644 index 0000000..303cf53 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ndarray_conversion.py @@ -0,0 +1,94 @@ +import os +import tempfile + +import numpy as np + +nd = np.array([[1, 2], [3, 4]]) +scalar_array = np.array(1) + +# item +scalar_array.item() +nd.item(1) +nd.item(0, 1) +nd.item((0, 1)) + +# tolist is pretty simple + +# itemset +scalar_array.itemset(3) +nd.itemset(3, 0) +nd.itemset((0, 0), 3) + +# tobytes +nd.tobytes() +nd.tobytes("C") +nd.tobytes(None) + +# tofile +if os.name != "nt": + with tempfile.NamedTemporaryFile(suffix=".txt") as tmp: + nd.tofile(tmp.name) + nd.tofile(tmp.name, "") + nd.tofile(tmp.name, sep="") + + nd.tofile(tmp.name, "", "%s") + nd.tofile(tmp.name, format="%s") + + nd.tofile(tmp) + +# dump is pretty simple +# dumps is pretty simple + +# astype +nd.astype("float") +nd.astype(float) + +nd.astype(float, "K") +nd.astype(float, order="K") + +nd.astype(float, "K", "unsafe") +nd.astype(float, casting="unsafe") + +nd.astype(float, "K", "unsafe", True) +nd.astype(float, subok=True) + +nd.astype(float, "K", "unsafe", True, True) +nd.astype(float, copy=True) + +# byteswap +nd.byteswap() +nd.byteswap(True) + +# copy +nd.copy() +nd.copy("C") + +# view +nd.view() +nd.view(np.int64) +nd.view(dtype=np.int64) +nd.view(np.int64, np.matrix) +nd.view(type=np.matrix) + +# getfield +complex_array = np.array([[1 + 1j, 0], [0, 1 - 1j]], dtype=np.complex128) + +complex_array.getfield("float") +complex_array.getfield(float) + +complex_array.getfield("float", 8) +complex_array.getfield(float, offset=8) + +# setflags +nd.setflags() + +nd.setflags(True) +nd.setflags(write=True) + +nd.setflags(True, True) +nd.setflags(write=True, align=True) + +nd.setflags(True, True, False) +nd.setflags(write=True, align=True, uic=False) + +# fill is pretty simple diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ndarray_misc.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ndarray_misc.py new file mode 100644 index 0000000..6202460 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ndarray_misc.py @@ -0,0 +1,185 @@ +""" +Tests for miscellaneous (non-magic) ``np.ndarray``/``np.generic`` methods. + +More extensive tests are performed for the methods' +function-based counterpart in `../from_numeric.py`. + +""" + +from __future__ import annotations + +import operator +from typing import cast, Any + +import numpy as np + +class SubClass(np.ndarray): ... + +i4 = np.int32(1) +A: np.ndarray[Any, np.dtype[np.int32]] = np.array([[1]], dtype=np.int32) +B0 = np.empty((), dtype=np.int32).view(SubClass) +B1 = np.empty((1,), dtype=np.int32).view(SubClass) +B2 = np.empty((1, 1), dtype=np.int32).view(SubClass) +C: np.ndarray[Any, np.dtype[np.int32]] = np.array([0, 1, 2], dtype=np.int32) +D = np.empty(3).view(SubClass) + +i4.all() +A.all() +A.all(axis=0) +A.all(keepdims=True) +A.all(out=B0) + +i4.any() +A.any() +A.any(axis=0) +A.any(keepdims=True) +A.any(out=B0) + +i4.argmax() +A.argmax() +A.argmax(axis=0) +A.argmax(out=B0) + +i4.argmin() +A.argmin() +A.argmin(axis=0) +A.argmin(out=B0) + +i4.argsort() +A.argsort() + +i4.choose([()]) +_choices = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]], dtype=np.int32) +C.choose(_choices) +C.choose(_choices, out=D) + +i4.clip(1) +A.clip(1) +A.clip(None, 1) +A.clip(1, out=B2) +A.clip(None, 1, out=B2) + +i4.compress([1]) +A.compress([1]) +A.compress([1], out=B1) + +i4.conj() +A.conj() +B0.conj() + +i4.conjugate() +A.conjugate() +B0.conjugate() + +i4.cumprod() +A.cumprod() +A.cumprod(out=B1) + +i4.cumsum() +A.cumsum() +A.cumsum(out=B1) + +i4.max() +A.max() +A.max(axis=0) +A.max(keepdims=True) +A.max(out=B0) + +i4.mean() +A.mean() +A.mean(axis=0) +A.mean(keepdims=True) +A.mean(out=B0) + +i4.min() +A.min() +A.min(axis=0) +A.min(keepdims=True) +A.min(out=B0) + +i4.newbyteorder() +A.newbyteorder() +B0.newbyteorder('|') + +i4.prod() +A.prod() +A.prod(axis=0) +A.prod(keepdims=True) +A.prod(out=B0) + +i4.ptp() +A.ptp() +A.ptp(axis=0) +A.ptp(keepdims=True) +A.astype(int).ptp(out=B0) + +i4.round() +A.round() +A.round(out=B2) + +i4.repeat(1) +A.repeat(1) +B0.repeat(1) + +i4.std() +A.std() +A.std(axis=0) +A.std(keepdims=True) +A.std(out=B0.astype(np.float64)) + +i4.sum() +A.sum() +A.sum(axis=0) +A.sum(keepdims=True) +A.sum(out=B0) + +i4.take(0) +A.take(0) +A.take([0]) +A.take(0, out=B0) +A.take([0], out=B1) + +i4.var() +A.var() +A.var(axis=0) +A.var(keepdims=True) +A.var(out=B0) + +A.argpartition([0]) + +A.diagonal() + +A.dot(1) +A.dot(1, out=B0) + +A.nonzero() + +C.searchsorted(1) + +A.trace() +A.trace(out=B0) + +void = cast(np.void, np.array(1, dtype=[("f", np.float64)]).take(0)) +void.setfield(10, np.float64) + +A.item(0) +C.item(0) + +A.ravel() +C.ravel() + +A.flatten() +C.flatten() + +A.reshape(1) +C.reshape(3) + +int(np.array(1.0, dtype=np.float64)) +int(np.array("1", dtype=np.str_)) + +float(np.array(1.0, dtype=np.float64)) +float(np.array("1", dtype=np.str_)) + +complex(np.array(1.0, dtype=np.float64)) + +operator.index(np.array(1, dtype=np.int64)) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ndarray_shape_manipulation.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ndarray_shape_manipulation.py new file mode 100644 index 0000000..0ca3dff --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ndarray_shape_manipulation.py @@ -0,0 +1,47 @@ +import numpy as np + +nd1 = np.array([[1, 2], [3, 4]]) + +# reshape +nd1.reshape(4) +nd1.reshape(2, 2) +nd1.reshape((2, 2)) + +nd1.reshape((2, 2), order="C") +nd1.reshape(4, order="C") + +# resize +nd1.resize() +nd1.resize(4) +nd1.resize(2, 2) +nd1.resize((2, 2)) + +nd1.resize((2, 2), refcheck=True) +nd1.resize(4, refcheck=True) + +nd2 = np.array([[1, 2], [3, 4]]) + +# transpose +nd2.transpose() +nd2.transpose(1, 0) +nd2.transpose((1, 0)) + +# swapaxes +nd2.swapaxes(0, 1) + +# flatten +nd2.flatten() +nd2.flatten("C") + +# ravel +nd2.ravel() +nd2.ravel("C") + +# squeeze +nd2.squeeze() + +nd3 = np.array([[1, 2]]) +nd3.squeeze(0) + +nd4 = np.array([[[1, 2]]]) +nd4.squeeze((0, 1)) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/numeric.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/numeric.py new file mode 100644 index 0000000..34fef72 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/numeric.py @@ -0,0 +1,89 @@ +""" +Tests for :mod:`numpy.core.numeric`. + +Does not include tests which fall under ``array_constructors``. + +""" + +from typing import List +import numpy as np + +class SubClass(np.ndarray): + ... + +i8 = np.int64(1) + +A = np.arange(27).reshape(3, 3, 3) +B: List[List[List[int]]] = A.tolist() +C = np.empty((27, 27)).view(SubClass) + +np.count_nonzero(i8) +np.count_nonzero(A) +np.count_nonzero(B) +np.count_nonzero(A, keepdims=True) +np.count_nonzero(A, axis=0) + +np.isfortran(i8) +np.isfortran(A) + +np.argwhere(i8) +np.argwhere(A) + +np.flatnonzero(i8) +np.flatnonzero(A) + +np.correlate(B[0][0], A.ravel(), mode="valid") +np.correlate(A.ravel(), A.ravel(), mode="same") + +np.convolve(B[0][0], A.ravel(), mode="valid") +np.convolve(A.ravel(), A.ravel(), mode="same") + +np.outer(i8, A) +np.outer(B, A) +np.outer(A, A) +np.outer(A, A, out=C) + +np.tensordot(B, A) +np.tensordot(A, A) +np.tensordot(A, A, axes=0) +np.tensordot(A, A, axes=(0, 1)) + +np.isscalar(i8) +np.isscalar(A) +np.isscalar(B) + +np.roll(A, 1) +np.roll(A, (1, 2)) +np.roll(B, 1) + +np.rollaxis(A, 0, 1) + +np.moveaxis(A, 0, 1) +np.moveaxis(A, (0, 1), (1, 2)) + +np.cross(B, A) +np.cross(A, A) + +np.indices([0, 1, 2]) +np.indices([0, 1, 2], sparse=False) +np.indices([0, 1, 2], sparse=True) + +np.binary_repr(1) + +np.base_repr(1) + +np.allclose(i8, A) +np.allclose(B, A) +np.allclose(A, A) + +np.isclose(i8, A) +np.isclose(B, A) +np.isclose(A, A) + +np.array_equal(i8, A) +np.array_equal(B, A) +np.array_equal(A, A) + +np.array_equiv(i8, A) +np.array_equiv(B, A) +np.array_equiv(A, A) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/numerictypes.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/numerictypes.py new file mode 100644 index 0000000..5af0d17 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/numerictypes.py @@ -0,0 +1,47 @@ +import numpy as np + +np.maximum_sctype("S8") +np.maximum_sctype(object) + +np.issctype(object) +np.issctype("S8") + +np.obj2sctype(list) +np.obj2sctype(list, default=None) +np.obj2sctype(list, default=np.string_) + +np.issubclass_(np.int32, int) +np.issubclass_(np.float64, float) +np.issubclass_(np.float64, (int, float)) + +np.issubsctype("int64", int) +np.issubsctype(np.array([1]), np.array([1])) + +np.issubdtype("S1", np.string_) +np.issubdtype(np.float64, np.float32) + +np.sctype2char("S1") +np.sctype2char(list) + +np.find_common_type([], [np.int64, np.float32, complex]) +np.find_common_type((), (np.int64, np.float32, complex)) +np.find_common_type([np.int64, np.float32], []) +np.find_common_type([np.float32], [np.int64, np.float64]) + +np.cast[int] +np.cast["i8"] +np.cast[np.int64] + +np.nbytes[int] +np.nbytes["i8"] +np.nbytes[np.int64] + +np.ScalarType +np.ScalarType[0] +np.ScalarType[4] +np.ScalarType[9] +np.ScalarType[11] + +np.typecodes["Character"] +np.typecodes["Complex"] +np.typecodes["All"] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/random.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/random.py new file mode 100644 index 0000000..05bd621 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/random.py @@ -0,0 +1,1497 @@ +from __future__ import annotations + +from typing import Any, List, Dict + +import numpy as np + +SEED_NONE = None +SEED_INT = 4579435749574957634658964293569 +SEED_ARR: np.ndarray[Any, np.dtype[np.int64]] = np.array([1, 2, 3, 4], dtype=np.int64) +SEED_ARRLIKE: List[int] = [1, 2, 3, 4] +SEED_SEED_SEQ: np.random.SeedSequence = np.random.SeedSequence(0) +SEED_MT19937: np.random.MT19937 = np.random.MT19937(0) +SEED_PCG64: np.random.PCG64 = np.random.PCG64(0) +SEED_PHILOX: np.random.Philox = np.random.Philox(0) +SEED_SFC64: np.random.SFC64 = np.random.SFC64(0) + +# default rng +np.random.default_rng() +np.random.default_rng(SEED_NONE) +np.random.default_rng(SEED_INT) +np.random.default_rng(SEED_ARR) +np.random.default_rng(SEED_ARRLIKE) +np.random.default_rng(SEED_SEED_SEQ) +np.random.default_rng(SEED_MT19937) +np.random.default_rng(SEED_PCG64) +np.random.default_rng(SEED_PHILOX) +np.random.default_rng(SEED_SFC64) + +# Seed Sequence +np.random.SeedSequence(SEED_NONE) +np.random.SeedSequence(SEED_INT) +np.random.SeedSequence(SEED_ARR) +np.random.SeedSequence(SEED_ARRLIKE) + +# Bit Generators +np.random.MT19937(SEED_NONE) +np.random.MT19937(SEED_INT) +np.random.MT19937(SEED_ARR) +np.random.MT19937(SEED_ARRLIKE) +np.random.MT19937(SEED_SEED_SEQ) + +np.random.PCG64(SEED_NONE) +np.random.PCG64(SEED_INT) +np.random.PCG64(SEED_ARR) +np.random.PCG64(SEED_ARRLIKE) +np.random.PCG64(SEED_SEED_SEQ) + +np.random.Philox(SEED_NONE) +np.random.Philox(SEED_INT) +np.random.Philox(SEED_ARR) +np.random.Philox(SEED_ARRLIKE) +np.random.Philox(SEED_SEED_SEQ) + +np.random.SFC64(SEED_NONE) +np.random.SFC64(SEED_INT) +np.random.SFC64(SEED_ARR) +np.random.SFC64(SEED_ARRLIKE) +np.random.SFC64(SEED_SEED_SEQ) + +seed_seq: np.random.bit_generator.SeedSequence = np.random.SeedSequence(SEED_NONE) +seed_seq.spawn(10) +seed_seq.generate_state(3) +seed_seq.generate_state(3, "u4") +seed_seq.generate_state(3, "uint32") +seed_seq.generate_state(3, "u8") +seed_seq.generate_state(3, "uint64") +seed_seq.generate_state(3, np.uint32) +seed_seq.generate_state(3, np.uint64) + + +def_gen: np.random.Generator = np.random.default_rng() + +D_arr_0p1: np.ndarray[Any, np.dtype[np.float64]] = np.array([0.1]) +D_arr_0p5: np.ndarray[Any, np.dtype[np.float64]] = np.array([0.5]) +D_arr_0p9: np.ndarray[Any, np.dtype[np.float64]] = np.array([0.9]) +D_arr_1p5: np.ndarray[Any, np.dtype[np.float64]] = np.array([1.5]) +I_arr_10: np.ndarray[Any, np.dtype[np.int_]] = np.array([10], dtype=np.int_) +I_arr_20: np.ndarray[Any, np.dtype[np.int_]] = np.array([20], dtype=np.int_) +D_arr_like_0p1: List[float] = [0.1] +D_arr_like_0p5: List[float] = [0.5] +D_arr_like_0p9: List[float] = [0.9] +D_arr_like_1p5: List[float] = [1.5] +I_arr_like_10: List[int] = [10] +I_arr_like_20: List[int] = [20] +D_2D_like: List[List[float]] = [[1, 2], [2, 3], [3, 4], [4, 5.1]] +D_2D: np.ndarray[Any, np.dtype[np.float64]] = np.array(D_2D_like) + +S_out: np.ndarray[Any, np.dtype[np.float32]] = np.empty(1, dtype=np.float32) +D_out: np.ndarray[Any, np.dtype[np.float64]] = np.empty(1) + +def_gen.standard_normal() +def_gen.standard_normal(dtype=np.float32) +def_gen.standard_normal(dtype="float32") +def_gen.standard_normal(dtype="double") +def_gen.standard_normal(dtype=np.float64) +def_gen.standard_normal(size=None) +def_gen.standard_normal(size=1) +def_gen.standard_normal(size=1, dtype=np.float32) +def_gen.standard_normal(size=1, dtype="f4") +def_gen.standard_normal(size=1, dtype="float32", out=S_out) +def_gen.standard_normal(dtype=np.float32, out=S_out) +def_gen.standard_normal(size=1, dtype=np.float64) +def_gen.standard_normal(size=1, dtype="float64") +def_gen.standard_normal(size=1, dtype="f8") +def_gen.standard_normal(out=D_out) +def_gen.standard_normal(size=1, dtype="float64") +def_gen.standard_normal(size=1, dtype="float64", out=D_out) + +def_gen.random() +def_gen.random(dtype=np.float32) +def_gen.random(dtype="float32") +def_gen.random(dtype="double") +def_gen.random(dtype=np.float64) +def_gen.random(size=None) +def_gen.random(size=1) +def_gen.random(size=1, dtype=np.float32) +def_gen.random(size=1, dtype="f4") +def_gen.random(size=1, dtype="float32", out=S_out) +def_gen.random(dtype=np.float32, out=S_out) +def_gen.random(size=1, dtype=np.float64) +def_gen.random(size=1, dtype="float64") +def_gen.random(size=1, dtype="f8") +def_gen.random(out=D_out) +def_gen.random(size=1, dtype="float64") +def_gen.random(size=1, dtype="float64", out=D_out) + +def_gen.standard_cauchy() +def_gen.standard_cauchy(size=None) +def_gen.standard_cauchy(size=1) + +def_gen.standard_exponential() +def_gen.standard_exponential(method="inv") +def_gen.standard_exponential(dtype=np.float32) +def_gen.standard_exponential(dtype="float32") +def_gen.standard_exponential(dtype="double") +def_gen.standard_exponential(dtype=np.float64) +def_gen.standard_exponential(size=None) +def_gen.standard_exponential(size=None, method="inv") +def_gen.standard_exponential(size=1, method="inv") +def_gen.standard_exponential(size=1, dtype=np.float32) +def_gen.standard_exponential(size=1, dtype="f4", method="inv") +def_gen.standard_exponential(size=1, dtype="float32", out=S_out) +def_gen.standard_exponential(dtype=np.float32, out=S_out) +def_gen.standard_exponential(size=1, dtype=np.float64, method="inv") +def_gen.standard_exponential(size=1, dtype="float64") +def_gen.standard_exponential(size=1, dtype="f8") +def_gen.standard_exponential(out=D_out) +def_gen.standard_exponential(size=1, dtype="float64") +def_gen.standard_exponential(size=1, dtype="float64", out=D_out) + +def_gen.zipf(1.5) +def_gen.zipf(1.5, size=None) +def_gen.zipf(1.5, size=1) +def_gen.zipf(D_arr_1p5) +def_gen.zipf(D_arr_1p5, size=1) +def_gen.zipf(D_arr_like_1p5) +def_gen.zipf(D_arr_like_1p5, size=1) + +def_gen.weibull(0.5) +def_gen.weibull(0.5, size=None) +def_gen.weibull(0.5, size=1) +def_gen.weibull(D_arr_0p5) +def_gen.weibull(D_arr_0p5, size=1) +def_gen.weibull(D_arr_like_0p5) +def_gen.weibull(D_arr_like_0p5, size=1) + +def_gen.standard_t(0.5) +def_gen.standard_t(0.5, size=None) +def_gen.standard_t(0.5, size=1) +def_gen.standard_t(D_arr_0p5) +def_gen.standard_t(D_arr_0p5, size=1) +def_gen.standard_t(D_arr_like_0p5) +def_gen.standard_t(D_arr_like_0p5, size=1) + +def_gen.poisson(0.5) +def_gen.poisson(0.5, size=None) +def_gen.poisson(0.5, size=1) +def_gen.poisson(D_arr_0p5) +def_gen.poisson(D_arr_0p5, size=1) +def_gen.poisson(D_arr_like_0p5) +def_gen.poisson(D_arr_like_0p5, size=1) + +def_gen.power(0.5) +def_gen.power(0.5, size=None) +def_gen.power(0.5, size=1) +def_gen.power(D_arr_0p5) +def_gen.power(D_arr_0p5, size=1) +def_gen.power(D_arr_like_0p5) +def_gen.power(D_arr_like_0p5, size=1) + +def_gen.pareto(0.5) +def_gen.pareto(0.5, size=None) +def_gen.pareto(0.5, size=1) +def_gen.pareto(D_arr_0p5) +def_gen.pareto(D_arr_0p5, size=1) +def_gen.pareto(D_arr_like_0p5) +def_gen.pareto(D_arr_like_0p5, size=1) + +def_gen.chisquare(0.5) +def_gen.chisquare(0.5, size=None) +def_gen.chisquare(0.5, size=1) +def_gen.chisquare(D_arr_0p5) +def_gen.chisquare(D_arr_0p5, size=1) +def_gen.chisquare(D_arr_like_0p5) +def_gen.chisquare(D_arr_like_0p5, size=1) + +def_gen.exponential(0.5) +def_gen.exponential(0.5, size=None) +def_gen.exponential(0.5, size=1) +def_gen.exponential(D_arr_0p5) +def_gen.exponential(D_arr_0p5, size=1) +def_gen.exponential(D_arr_like_0p5) +def_gen.exponential(D_arr_like_0p5, size=1) + +def_gen.geometric(0.5) +def_gen.geometric(0.5, size=None) +def_gen.geometric(0.5, size=1) +def_gen.geometric(D_arr_0p5) +def_gen.geometric(D_arr_0p5, size=1) +def_gen.geometric(D_arr_like_0p5) +def_gen.geometric(D_arr_like_0p5, size=1) + +def_gen.logseries(0.5) +def_gen.logseries(0.5, size=None) +def_gen.logseries(0.5, size=1) +def_gen.logseries(D_arr_0p5) +def_gen.logseries(D_arr_0p5, size=1) +def_gen.logseries(D_arr_like_0p5) +def_gen.logseries(D_arr_like_0p5, size=1) + +def_gen.rayleigh(0.5) +def_gen.rayleigh(0.5, size=None) +def_gen.rayleigh(0.5, size=1) +def_gen.rayleigh(D_arr_0p5) +def_gen.rayleigh(D_arr_0p5, size=1) +def_gen.rayleigh(D_arr_like_0p5) +def_gen.rayleigh(D_arr_like_0p5, size=1) + +def_gen.standard_gamma(0.5) +def_gen.standard_gamma(0.5, size=None) +def_gen.standard_gamma(0.5, dtype="float32") +def_gen.standard_gamma(0.5, size=None, dtype="float32") +def_gen.standard_gamma(0.5, size=1) +def_gen.standard_gamma(D_arr_0p5) +def_gen.standard_gamma(D_arr_0p5, dtype="f4") +def_gen.standard_gamma(0.5, size=1, dtype="float32", out=S_out) +def_gen.standard_gamma(D_arr_0p5, dtype=np.float32, out=S_out) +def_gen.standard_gamma(D_arr_0p5, size=1) +def_gen.standard_gamma(D_arr_like_0p5) +def_gen.standard_gamma(D_arr_like_0p5, size=1) +def_gen.standard_gamma(0.5, out=D_out) +def_gen.standard_gamma(D_arr_like_0p5, out=D_out) +def_gen.standard_gamma(D_arr_like_0p5, size=1) +def_gen.standard_gamma(D_arr_like_0p5, size=1, out=D_out, dtype=np.float64) + +def_gen.vonmises(0.5, 0.5) +def_gen.vonmises(0.5, 0.5, size=None) +def_gen.vonmises(0.5, 0.5, size=1) +def_gen.vonmises(D_arr_0p5, 0.5) +def_gen.vonmises(0.5, D_arr_0p5) +def_gen.vonmises(D_arr_0p5, 0.5, size=1) +def_gen.vonmises(0.5, D_arr_0p5, size=1) +def_gen.vonmises(D_arr_like_0p5, 0.5) +def_gen.vonmises(0.5, D_arr_like_0p5) +def_gen.vonmises(D_arr_0p5, D_arr_0p5) +def_gen.vonmises(D_arr_like_0p5, D_arr_like_0p5) +def_gen.vonmises(D_arr_0p5, D_arr_0p5, size=1) +def_gen.vonmises(D_arr_like_0p5, D_arr_like_0p5, size=1) + +def_gen.wald(0.5, 0.5) +def_gen.wald(0.5, 0.5, size=None) +def_gen.wald(0.5, 0.5, size=1) +def_gen.wald(D_arr_0p5, 0.5) +def_gen.wald(0.5, D_arr_0p5) +def_gen.wald(D_arr_0p5, 0.5, size=1) +def_gen.wald(0.5, D_arr_0p5, size=1) +def_gen.wald(D_arr_like_0p5, 0.5) +def_gen.wald(0.5, D_arr_like_0p5) +def_gen.wald(D_arr_0p5, D_arr_0p5) +def_gen.wald(D_arr_like_0p5, D_arr_like_0p5) +def_gen.wald(D_arr_0p5, D_arr_0p5, size=1) +def_gen.wald(D_arr_like_0p5, D_arr_like_0p5, size=1) + +def_gen.uniform(0.5, 0.5) +def_gen.uniform(0.5, 0.5, size=None) +def_gen.uniform(0.5, 0.5, size=1) +def_gen.uniform(D_arr_0p5, 0.5) +def_gen.uniform(0.5, D_arr_0p5) +def_gen.uniform(D_arr_0p5, 0.5, size=1) +def_gen.uniform(0.5, D_arr_0p5, size=1) +def_gen.uniform(D_arr_like_0p5, 0.5) +def_gen.uniform(0.5, D_arr_like_0p5) +def_gen.uniform(D_arr_0p5, D_arr_0p5) +def_gen.uniform(D_arr_like_0p5, D_arr_like_0p5) +def_gen.uniform(D_arr_0p5, D_arr_0p5, size=1) +def_gen.uniform(D_arr_like_0p5, D_arr_like_0p5, size=1) + +def_gen.beta(0.5, 0.5) +def_gen.beta(0.5, 0.5, size=None) +def_gen.beta(0.5, 0.5, size=1) +def_gen.beta(D_arr_0p5, 0.5) +def_gen.beta(0.5, D_arr_0p5) +def_gen.beta(D_arr_0p5, 0.5, size=1) +def_gen.beta(0.5, D_arr_0p5, size=1) +def_gen.beta(D_arr_like_0p5, 0.5) +def_gen.beta(0.5, D_arr_like_0p5) +def_gen.beta(D_arr_0p5, D_arr_0p5) +def_gen.beta(D_arr_like_0p5, D_arr_like_0p5) +def_gen.beta(D_arr_0p5, D_arr_0p5, size=1) +def_gen.beta(D_arr_like_0p5, D_arr_like_0p5, size=1) + +def_gen.f(0.5, 0.5) +def_gen.f(0.5, 0.5, size=None) +def_gen.f(0.5, 0.5, size=1) +def_gen.f(D_arr_0p5, 0.5) +def_gen.f(0.5, D_arr_0p5) +def_gen.f(D_arr_0p5, 0.5, size=1) +def_gen.f(0.5, D_arr_0p5, size=1) +def_gen.f(D_arr_like_0p5, 0.5) +def_gen.f(0.5, D_arr_like_0p5) +def_gen.f(D_arr_0p5, D_arr_0p5) +def_gen.f(D_arr_like_0p5, D_arr_like_0p5) +def_gen.f(D_arr_0p5, D_arr_0p5, size=1) +def_gen.f(D_arr_like_0p5, D_arr_like_0p5, size=1) + +def_gen.gamma(0.5, 0.5) +def_gen.gamma(0.5, 0.5, size=None) +def_gen.gamma(0.5, 0.5, size=1) +def_gen.gamma(D_arr_0p5, 0.5) +def_gen.gamma(0.5, D_arr_0p5) +def_gen.gamma(D_arr_0p5, 0.5, size=1) +def_gen.gamma(0.5, D_arr_0p5, size=1) +def_gen.gamma(D_arr_like_0p5, 0.5) +def_gen.gamma(0.5, D_arr_like_0p5) +def_gen.gamma(D_arr_0p5, D_arr_0p5) +def_gen.gamma(D_arr_like_0p5, D_arr_like_0p5) +def_gen.gamma(D_arr_0p5, D_arr_0p5, size=1) +def_gen.gamma(D_arr_like_0p5, D_arr_like_0p5, size=1) + +def_gen.gumbel(0.5, 0.5) +def_gen.gumbel(0.5, 0.5, size=None) +def_gen.gumbel(0.5, 0.5, size=1) +def_gen.gumbel(D_arr_0p5, 0.5) +def_gen.gumbel(0.5, D_arr_0p5) +def_gen.gumbel(D_arr_0p5, 0.5, size=1) +def_gen.gumbel(0.5, D_arr_0p5, size=1) +def_gen.gumbel(D_arr_like_0p5, 0.5) +def_gen.gumbel(0.5, D_arr_like_0p5) +def_gen.gumbel(D_arr_0p5, D_arr_0p5) +def_gen.gumbel(D_arr_like_0p5, D_arr_like_0p5) +def_gen.gumbel(D_arr_0p5, D_arr_0p5, size=1) +def_gen.gumbel(D_arr_like_0p5, D_arr_like_0p5, size=1) + +def_gen.laplace(0.5, 0.5) +def_gen.laplace(0.5, 0.5, size=None) +def_gen.laplace(0.5, 0.5, size=1) +def_gen.laplace(D_arr_0p5, 0.5) +def_gen.laplace(0.5, D_arr_0p5) +def_gen.laplace(D_arr_0p5, 0.5, size=1) +def_gen.laplace(0.5, D_arr_0p5, size=1) +def_gen.laplace(D_arr_like_0p5, 0.5) +def_gen.laplace(0.5, D_arr_like_0p5) +def_gen.laplace(D_arr_0p5, D_arr_0p5) +def_gen.laplace(D_arr_like_0p5, D_arr_like_0p5) +def_gen.laplace(D_arr_0p5, D_arr_0p5, size=1) +def_gen.laplace(D_arr_like_0p5, D_arr_like_0p5, size=1) + +def_gen.logistic(0.5, 0.5) +def_gen.logistic(0.5, 0.5, size=None) +def_gen.logistic(0.5, 0.5, size=1) +def_gen.logistic(D_arr_0p5, 0.5) +def_gen.logistic(0.5, D_arr_0p5) +def_gen.logistic(D_arr_0p5, 0.5, size=1) +def_gen.logistic(0.5, D_arr_0p5, size=1) +def_gen.logistic(D_arr_like_0p5, 0.5) +def_gen.logistic(0.5, D_arr_like_0p5) +def_gen.logistic(D_arr_0p5, D_arr_0p5) +def_gen.logistic(D_arr_like_0p5, D_arr_like_0p5) +def_gen.logistic(D_arr_0p5, D_arr_0p5, size=1) +def_gen.logistic(D_arr_like_0p5, D_arr_like_0p5, size=1) + +def_gen.lognormal(0.5, 0.5) +def_gen.lognormal(0.5, 0.5, size=None) +def_gen.lognormal(0.5, 0.5, size=1) +def_gen.lognormal(D_arr_0p5, 0.5) +def_gen.lognormal(0.5, D_arr_0p5) +def_gen.lognormal(D_arr_0p5, 0.5, size=1) +def_gen.lognormal(0.5, D_arr_0p5, size=1) +def_gen.lognormal(D_arr_like_0p5, 0.5) +def_gen.lognormal(0.5, D_arr_like_0p5) +def_gen.lognormal(D_arr_0p5, D_arr_0p5) +def_gen.lognormal(D_arr_like_0p5, D_arr_like_0p5) +def_gen.lognormal(D_arr_0p5, D_arr_0p5, size=1) +def_gen.lognormal(D_arr_like_0p5, D_arr_like_0p5, size=1) + +def_gen.noncentral_chisquare(0.5, 0.5) +def_gen.noncentral_chisquare(0.5, 0.5, size=None) +def_gen.noncentral_chisquare(0.5, 0.5, size=1) +def_gen.noncentral_chisquare(D_arr_0p5, 0.5) +def_gen.noncentral_chisquare(0.5, D_arr_0p5) +def_gen.noncentral_chisquare(D_arr_0p5, 0.5, size=1) +def_gen.noncentral_chisquare(0.5, D_arr_0p5, size=1) +def_gen.noncentral_chisquare(D_arr_like_0p5, 0.5) +def_gen.noncentral_chisquare(0.5, D_arr_like_0p5) +def_gen.noncentral_chisquare(D_arr_0p5, D_arr_0p5) +def_gen.noncentral_chisquare(D_arr_like_0p5, D_arr_like_0p5) +def_gen.noncentral_chisquare(D_arr_0p5, D_arr_0p5, size=1) +def_gen.noncentral_chisquare(D_arr_like_0p5, D_arr_like_0p5, size=1) + +def_gen.normal(0.5, 0.5) +def_gen.normal(0.5, 0.5, size=None) +def_gen.normal(0.5, 0.5, size=1) +def_gen.normal(D_arr_0p5, 0.5) +def_gen.normal(0.5, D_arr_0p5) +def_gen.normal(D_arr_0p5, 0.5, size=1) +def_gen.normal(0.5, D_arr_0p5, size=1) +def_gen.normal(D_arr_like_0p5, 0.5) +def_gen.normal(0.5, D_arr_like_0p5) +def_gen.normal(D_arr_0p5, D_arr_0p5) +def_gen.normal(D_arr_like_0p5, D_arr_like_0p5) +def_gen.normal(D_arr_0p5, D_arr_0p5, size=1) +def_gen.normal(D_arr_like_0p5, D_arr_like_0p5, size=1) + +def_gen.triangular(0.1, 0.5, 0.9) +def_gen.triangular(0.1, 0.5, 0.9, size=None) +def_gen.triangular(0.1, 0.5, 0.9, size=1) +def_gen.triangular(D_arr_0p1, 0.5, 0.9) +def_gen.triangular(0.1, D_arr_0p5, 0.9) +def_gen.triangular(D_arr_0p1, 0.5, D_arr_like_0p9, size=1) +def_gen.triangular(0.1, D_arr_0p5, 0.9, size=1) +def_gen.triangular(D_arr_like_0p1, 0.5, D_arr_0p9) +def_gen.triangular(0.5, D_arr_like_0p5, 0.9) +def_gen.triangular(D_arr_0p1, D_arr_0p5, 0.9) +def_gen.triangular(D_arr_like_0p1, D_arr_like_0p5, 0.9) +def_gen.triangular(D_arr_0p1, D_arr_0p5, D_arr_0p9, size=1) +def_gen.triangular(D_arr_like_0p1, D_arr_like_0p5, D_arr_like_0p9, size=1) + +def_gen.noncentral_f(0.1, 0.5, 0.9) +def_gen.noncentral_f(0.1, 0.5, 0.9, size=None) +def_gen.noncentral_f(0.1, 0.5, 0.9, size=1) +def_gen.noncentral_f(D_arr_0p1, 0.5, 0.9) +def_gen.noncentral_f(0.1, D_arr_0p5, 0.9) +def_gen.noncentral_f(D_arr_0p1, 0.5, D_arr_like_0p9, size=1) +def_gen.noncentral_f(0.1, D_arr_0p5, 0.9, size=1) +def_gen.noncentral_f(D_arr_like_0p1, 0.5, D_arr_0p9) +def_gen.noncentral_f(0.5, D_arr_like_0p5, 0.9) +def_gen.noncentral_f(D_arr_0p1, D_arr_0p5, 0.9) +def_gen.noncentral_f(D_arr_like_0p1, D_arr_like_0p5, 0.9) +def_gen.noncentral_f(D_arr_0p1, D_arr_0p5, D_arr_0p9, size=1) +def_gen.noncentral_f(D_arr_like_0p1, D_arr_like_0p5, D_arr_like_0p9, size=1) + +def_gen.binomial(10, 0.5) +def_gen.binomial(10, 0.5, size=None) +def_gen.binomial(10, 0.5, size=1) +def_gen.binomial(I_arr_10, 0.5) +def_gen.binomial(10, D_arr_0p5) +def_gen.binomial(I_arr_10, 0.5, size=1) +def_gen.binomial(10, D_arr_0p5, size=1) +def_gen.binomial(I_arr_like_10, 0.5) +def_gen.binomial(10, D_arr_like_0p5) +def_gen.binomial(I_arr_10, D_arr_0p5) +def_gen.binomial(I_arr_like_10, D_arr_like_0p5) +def_gen.binomial(I_arr_10, D_arr_0p5, size=1) +def_gen.binomial(I_arr_like_10, D_arr_like_0p5, size=1) + +def_gen.negative_binomial(10, 0.5) +def_gen.negative_binomial(10, 0.5, size=None) +def_gen.negative_binomial(10, 0.5, size=1) +def_gen.negative_binomial(I_arr_10, 0.5) +def_gen.negative_binomial(10, D_arr_0p5) +def_gen.negative_binomial(I_arr_10, 0.5, size=1) +def_gen.negative_binomial(10, D_arr_0p5, size=1) +def_gen.negative_binomial(I_arr_like_10, 0.5) +def_gen.negative_binomial(10, D_arr_like_0p5) +def_gen.negative_binomial(I_arr_10, D_arr_0p5) +def_gen.negative_binomial(I_arr_like_10, D_arr_like_0p5) +def_gen.negative_binomial(I_arr_10, D_arr_0p5, size=1) +def_gen.negative_binomial(I_arr_like_10, D_arr_like_0p5, size=1) + +def_gen.hypergeometric(20, 20, 10) +def_gen.hypergeometric(20, 20, 10, size=None) +def_gen.hypergeometric(20, 20, 10, size=1) +def_gen.hypergeometric(I_arr_20, 20, 10) +def_gen.hypergeometric(20, I_arr_20, 10) +def_gen.hypergeometric(I_arr_20, 20, I_arr_like_10, size=1) +def_gen.hypergeometric(20, I_arr_20, 10, size=1) +def_gen.hypergeometric(I_arr_like_20, 20, I_arr_10) +def_gen.hypergeometric(20, I_arr_like_20, 10) +def_gen.hypergeometric(I_arr_20, I_arr_20, 10) +def_gen.hypergeometric(I_arr_like_20, I_arr_like_20, 10) +def_gen.hypergeometric(I_arr_20, I_arr_20, I_arr_10, size=1) +def_gen.hypergeometric(I_arr_like_20, I_arr_like_20, I_arr_like_10, size=1) + +I_int64_100: np.ndarray[Any, np.dtype[np.int64]] = np.array([100], dtype=np.int64) + +def_gen.integers(0, 100) +def_gen.integers(100) +def_gen.integers([100]) +def_gen.integers(0, [100]) + +I_bool_low: np.ndarray[Any, np.dtype[np.bool_]] = np.array([0], dtype=np.bool_) +I_bool_low_like: List[int] = [0] +I_bool_high_open: np.ndarray[Any, np.dtype[np.bool_]] = np.array([1], dtype=np.bool_) +I_bool_high_closed: np.ndarray[Any, np.dtype[np.bool_]] = np.array([1], dtype=np.bool_) + +def_gen.integers(2, dtype=bool) +def_gen.integers(0, 2, dtype=bool) +def_gen.integers(1, dtype=bool, endpoint=True) +def_gen.integers(0, 1, dtype=bool, endpoint=True) +def_gen.integers(I_bool_low_like, 1, dtype=bool, endpoint=True) +def_gen.integers(I_bool_high_open, dtype=bool) +def_gen.integers(I_bool_low, I_bool_high_open, dtype=bool) +def_gen.integers(0, I_bool_high_open, dtype=bool) +def_gen.integers(I_bool_high_closed, dtype=bool, endpoint=True) +def_gen.integers(I_bool_low, I_bool_high_closed, dtype=bool, endpoint=True) +def_gen.integers(0, I_bool_high_closed, dtype=bool, endpoint=True) + +def_gen.integers(2, dtype=np.bool_) +def_gen.integers(0, 2, dtype=np.bool_) +def_gen.integers(1, dtype=np.bool_, endpoint=True) +def_gen.integers(0, 1, dtype=np.bool_, endpoint=True) +def_gen.integers(I_bool_low_like, 1, dtype=np.bool_, endpoint=True) +def_gen.integers(I_bool_high_open, dtype=np.bool_) +def_gen.integers(I_bool_low, I_bool_high_open, dtype=np.bool_) +def_gen.integers(0, I_bool_high_open, dtype=np.bool_) +def_gen.integers(I_bool_high_closed, dtype=np.bool_, endpoint=True) +def_gen.integers(I_bool_low, I_bool_high_closed, dtype=np.bool_, endpoint=True) +def_gen.integers(0, I_bool_high_closed, dtype=np.bool_, endpoint=True) + +I_u1_low: np.ndarray[Any, np.dtype[np.uint8]] = np.array([0], dtype=np.uint8) +I_u1_low_like: List[int] = [0] +I_u1_high_open: np.ndarray[Any, np.dtype[np.uint8]] = np.array([255], dtype=np.uint8) +I_u1_high_closed: np.ndarray[Any, np.dtype[np.uint8]] = np.array([255], dtype=np.uint8) + +def_gen.integers(256, dtype="u1") +def_gen.integers(0, 256, dtype="u1") +def_gen.integers(255, dtype="u1", endpoint=True) +def_gen.integers(0, 255, dtype="u1", endpoint=True) +def_gen.integers(I_u1_low_like, 255, dtype="u1", endpoint=True) +def_gen.integers(I_u1_high_open, dtype="u1") +def_gen.integers(I_u1_low, I_u1_high_open, dtype="u1") +def_gen.integers(0, I_u1_high_open, dtype="u1") +def_gen.integers(I_u1_high_closed, dtype="u1", endpoint=True) +def_gen.integers(I_u1_low, I_u1_high_closed, dtype="u1", endpoint=True) +def_gen.integers(0, I_u1_high_closed, dtype="u1", endpoint=True) + +def_gen.integers(256, dtype="uint8") +def_gen.integers(0, 256, dtype="uint8") +def_gen.integers(255, dtype="uint8", endpoint=True) +def_gen.integers(0, 255, dtype="uint8", endpoint=True) +def_gen.integers(I_u1_low_like, 255, dtype="uint8", endpoint=True) +def_gen.integers(I_u1_high_open, dtype="uint8") +def_gen.integers(I_u1_low, I_u1_high_open, dtype="uint8") +def_gen.integers(0, I_u1_high_open, dtype="uint8") +def_gen.integers(I_u1_high_closed, dtype="uint8", endpoint=True) +def_gen.integers(I_u1_low, I_u1_high_closed, dtype="uint8", endpoint=True) +def_gen.integers(0, I_u1_high_closed, dtype="uint8", endpoint=True) + +def_gen.integers(256, dtype=np.uint8) +def_gen.integers(0, 256, dtype=np.uint8) +def_gen.integers(255, dtype=np.uint8, endpoint=True) +def_gen.integers(0, 255, dtype=np.uint8, endpoint=True) +def_gen.integers(I_u1_low_like, 255, dtype=np.uint8, endpoint=True) +def_gen.integers(I_u1_high_open, dtype=np.uint8) +def_gen.integers(I_u1_low, I_u1_high_open, dtype=np.uint8) +def_gen.integers(0, I_u1_high_open, dtype=np.uint8) +def_gen.integers(I_u1_high_closed, dtype=np.uint8, endpoint=True) +def_gen.integers(I_u1_low, I_u1_high_closed, dtype=np.uint8, endpoint=True) +def_gen.integers(0, I_u1_high_closed, dtype=np.uint8, endpoint=True) + +I_u2_low: np.ndarray[Any, np.dtype[np.uint16]] = np.array([0], dtype=np.uint16) +I_u2_low_like: List[int] = [0] +I_u2_high_open: np.ndarray[Any, np.dtype[np.uint16]] = np.array([65535], dtype=np.uint16) +I_u2_high_closed: np.ndarray[Any, np.dtype[np.uint16]] = np.array([65535], dtype=np.uint16) + +def_gen.integers(65536, dtype="u2") +def_gen.integers(0, 65536, dtype="u2") +def_gen.integers(65535, dtype="u2", endpoint=True) +def_gen.integers(0, 65535, dtype="u2", endpoint=True) +def_gen.integers(I_u2_low_like, 65535, dtype="u2", endpoint=True) +def_gen.integers(I_u2_high_open, dtype="u2") +def_gen.integers(I_u2_low, I_u2_high_open, dtype="u2") +def_gen.integers(0, I_u2_high_open, dtype="u2") +def_gen.integers(I_u2_high_closed, dtype="u2", endpoint=True) +def_gen.integers(I_u2_low, I_u2_high_closed, dtype="u2", endpoint=True) +def_gen.integers(0, I_u2_high_closed, dtype="u2", endpoint=True) + +def_gen.integers(65536, dtype="uint16") +def_gen.integers(0, 65536, dtype="uint16") +def_gen.integers(65535, dtype="uint16", endpoint=True) +def_gen.integers(0, 65535, dtype="uint16", endpoint=True) +def_gen.integers(I_u2_low_like, 65535, dtype="uint16", endpoint=True) +def_gen.integers(I_u2_high_open, dtype="uint16") +def_gen.integers(I_u2_low, I_u2_high_open, dtype="uint16") +def_gen.integers(0, I_u2_high_open, dtype="uint16") +def_gen.integers(I_u2_high_closed, dtype="uint16", endpoint=True) +def_gen.integers(I_u2_low, I_u2_high_closed, dtype="uint16", endpoint=True) +def_gen.integers(0, I_u2_high_closed, dtype="uint16", endpoint=True) + +def_gen.integers(65536, dtype=np.uint16) +def_gen.integers(0, 65536, dtype=np.uint16) +def_gen.integers(65535, dtype=np.uint16, endpoint=True) +def_gen.integers(0, 65535, dtype=np.uint16, endpoint=True) +def_gen.integers(I_u2_low_like, 65535, dtype=np.uint16, endpoint=True) +def_gen.integers(I_u2_high_open, dtype=np.uint16) +def_gen.integers(I_u2_low, I_u2_high_open, dtype=np.uint16) +def_gen.integers(0, I_u2_high_open, dtype=np.uint16) +def_gen.integers(I_u2_high_closed, dtype=np.uint16, endpoint=True) +def_gen.integers(I_u2_low, I_u2_high_closed, dtype=np.uint16, endpoint=True) +def_gen.integers(0, I_u2_high_closed, dtype=np.uint16, endpoint=True) + +I_u4_low: np.ndarray[Any, np.dtype[np.uint32]] = np.array([0], dtype=np.uint32) +I_u4_low_like: List[int] = [0] +I_u4_high_open: np.ndarray[Any, np.dtype[np.uint32]] = np.array([4294967295], dtype=np.uint32) +I_u4_high_closed: np.ndarray[Any, np.dtype[np.uint32]] = np.array([4294967295], dtype=np.uint32) + +def_gen.integers(4294967296, dtype="u4") +def_gen.integers(0, 4294967296, dtype="u4") +def_gen.integers(4294967295, dtype="u4", endpoint=True) +def_gen.integers(0, 4294967295, dtype="u4", endpoint=True) +def_gen.integers(I_u4_low_like, 4294967295, dtype="u4", endpoint=True) +def_gen.integers(I_u4_high_open, dtype="u4") +def_gen.integers(I_u4_low, I_u4_high_open, dtype="u4") +def_gen.integers(0, I_u4_high_open, dtype="u4") +def_gen.integers(I_u4_high_closed, dtype="u4", endpoint=True) +def_gen.integers(I_u4_low, I_u4_high_closed, dtype="u4", endpoint=True) +def_gen.integers(0, I_u4_high_closed, dtype="u4", endpoint=True) + +def_gen.integers(4294967296, dtype="uint32") +def_gen.integers(0, 4294967296, dtype="uint32") +def_gen.integers(4294967295, dtype="uint32", endpoint=True) +def_gen.integers(0, 4294967295, dtype="uint32", endpoint=True) +def_gen.integers(I_u4_low_like, 4294967295, dtype="uint32", endpoint=True) +def_gen.integers(I_u4_high_open, dtype="uint32") +def_gen.integers(I_u4_low, I_u4_high_open, dtype="uint32") +def_gen.integers(0, I_u4_high_open, dtype="uint32") +def_gen.integers(I_u4_high_closed, dtype="uint32", endpoint=True) +def_gen.integers(I_u4_low, I_u4_high_closed, dtype="uint32", endpoint=True) +def_gen.integers(0, I_u4_high_closed, dtype="uint32", endpoint=True) + +def_gen.integers(4294967296, dtype=np.uint32) +def_gen.integers(0, 4294967296, dtype=np.uint32) +def_gen.integers(4294967295, dtype=np.uint32, endpoint=True) +def_gen.integers(0, 4294967295, dtype=np.uint32, endpoint=True) +def_gen.integers(I_u4_low_like, 4294967295, dtype=np.uint32, endpoint=True) +def_gen.integers(I_u4_high_open, dtype=np.uint32) +def_gen.integers(I_u4_low, I_u4_high_open, dtype=np.uint32) +def_gen.integers(0, I_u4_high_open, dtype=np.uint32) +def_gen.integers(I_u4_high_closed, dtype=np.uint32, endpoint=True) +def_gen.integers(I_u4_low, I_u4_high_closed, dtype=np.uint32, endpoint=True) +def_gen.integers(0, I_u4_high_closed, dtype=np.uint32, endpoint=True) + +I_u8_low: np.ndarray[Any, np.dtype[np.uint64]] = np.array([0], dtype=np.uint64) +I_u8_low_like: List[int] = [0] +I_u8_high_open: np.ndarray[Any, np.dtype[np.uint64]] = np.array([18446744073709551615], dtype=np.uint64) +I_u8_high_closed: np.ndarray[Any, np.dtype[np.uint64]] = np.array([18446744073709551615], dtype=np.uint64) + +def_gen.integers(18446744073709551616, dtype="u8") +def_gen.integers(0, 18446744073709551616, dtype="u8") +def_gen.integers(18446744073709551615, dtype="u8", endpoint=True) +def_gen.integers(0, 18446744073709551615, dtype="u8", endpoint=True) +def_gen.integers(I_u8_low_like, 18446744073709551615, dtype="u8", endpoint=True) +def_gen.integers(I_u8_high_open, dtype="u8") +def_gen.integers(I_u8_low, I_u8_high_open, dtype="u8") +def_gen.integers(0, I_u8_high_open, dtype="u8") +def_gen.integers(I_u8_high_closed, dtype="u8", endpoint=True) +def_gen.integers(I_u8_low, I_u8_high_closed, dtype="u8", endpoint=True) +def_gen.integers(0, I_u8_high_closed, dtype="u8", endpoint=True) + +def_gen.integers(18446744073709551616, dtype="uint64") +def_gen.integers(0, 18446744073709551616, dtype="uint64") +def_gen.integers(18446744073709551615, dtype="uint64", endpoint=True) +def_gen.integers(0, 18446744073709551615, dtype="uint64", endpoint=True) +def_gen.integers(I_u8_low_like, 18446744073709551615, dtype="uint64", endpoint=True) +def_gen.integers(I_u8_high_open, dtype="uint64") +def_gen.integers(I_u8_low, I_u8_high_open, dtype="uint64") +def_gen.integers(0, I_u8_high_open, dtype="uint64") +def_gen.integers(I_u8_high_closed, dtype="uint64", endpoint=True) +def_gen.integers(I_u8_low, I_u8_high_closed, dtype="uint64", endpoint=True) +def_gen.integers(0, I_u8_high_closed, dtype="uint64", endpoint=True) + +def_gen.integers(18446744073709551616, dtype=np.uint64) +def_gen.integers(0, 18446744073709551616, dtype=np.uint64) +def_gen.integers(18446744073709551615, dtype=np.uint64, endpoint=True) +def_gen.integers(0, 18446744073709551615, dtype=np.uint64, endpoint=True) +def_gen.integers(I_u8_low_like, 18446744073709551615, dtype=np.uint64, endpoint=True) +def_gen.integers(I_u8_high_open, dtype=np.uint64) +def_gen.integers(I_u8_low, I_u8_high_open, dtype=np.uint64) +def_gen.integers(0, I_u8_high_open, dtype=np.uint64) +def_gen.integers(I_u8_high_closed, dtype=np.uint64, endpoint=True) +def_gen.integers(I_u8_low, I_u8_high_closed, dtype=np.uint64, endpoint=True) +def_gen.integers(0, I_u8_high_closed, dtype=np.uint64, endpoint=True) + +I_i1_low: np.ndarray[Any, np.dtype[np.int8]] = np.array([-128], dtype=np.int8) +I_i1_low_like: List[int] = [-128] +I_i1_high_open: np.ndarray[Any, np.dtype[np.int8]] = np.array([127], dtype=np.int8) +I_i1_high_closed: np.ndarray[Any, np.dtype[np.int8]] = np.array([127], dtype=np.int8) + +def_gen.integers(128, dtype="i1") +def_gen.integers(-128, 128, dtype="i1") +def_gen.integers(127, dtype="i1", endpoint=True) +def_gen.integers(-128, 127, dtype="i1", endpoint=True) +def_gen.integers(I_i1_low_like, 127, dtype="i1", endpoint=True) +def_gen.integers(I_i1_high_open, dtype="i1") +def_gen.integers(I_i1_low, I_i1_high_open, dtype="i1") +def_gen.integers(-128, I_i1_high_open, dtype="i1") +def_gen.integers(I_i1_high_closed, dtype="i1", endpoint=True) +def_gen.integers(I_i1_low, I_i1_high_closed, dtype="i1", endpoint=True) +def_gen.integers(-128, I_i1_high_closed, dtype="i1", endpoint=True) + +def_gen.integers(128, dtype="int8") +def_gen.integers(-128, 128, dtype="int8") +def_gen.integers(127, dtype="int8", endpoint=True) +def_gen.integers(-128, 127, dtype="int8", endpoint=True) +def_gen.integers(I_i1_low_like, 127, dtype="int8", endpoint=True) +def_gen.integers(I_i1_high_open, dtype="int8") +def_gen.integers(I_i1_low, I_i1_high_open, dtype="int8") +def_gen.integers(-128, I_i1_high_open, dtype="int8") +def_gen.integers(I_i1_high_closed, dtype="int8", endpoint=True) +def_gen.integers(I_i1_low, I_i1_high_closed, dtype="int8", endpoint=True) +def_gen.integers(-128, I_i1_high_closed, dtype="int8", endpoint=True) + +def_gen.integers(128, dtype=np.int8) +def_gen.integers(-128, 128, dtype=np.int8) +def_gen.integers(127, dtype=np.int8, endpoint=True) +def_gen.integers(-128, 127, dtype=np.int8, endpoint=True) +def_gen.integers(I_i1_low_like, 127, dtype=np.int8, endpoint=True) +def_gen.integers(I_i1_high_open, dtype=np.int8) +def_gen.integers(I_i1_low, I_i1_high_open, dtype=np.int8) +def_gen.integers(-128, I_i1_high_open, dtype=np.int8) +def_gen.integers(I_i1_high_closed, dtype=np.int8, endpoint=True) +def_gen.integers(I_i1_low, I_i1_high_closed, dtype=np.int8, endpoint=True) +def_gen.integers(-128, I_i1_high_closed, dtype=np.int8, endpoint=True) + +I_i2_low: np.ndarray[Any, np.dtype[np.int16]] = np.array([-32768], dtype=np.int16) +I_i2_low_like: List[int] = [-32768] +I_i2_high_open: np.ndarray[Any, np.dtype[np.int16]] = np.array([32767], dtype=np.int16) +I_i2_high_closed: np.ndarray[Any, np.dtype[np.int16]] = np.array([32767], dtype=np.int16) + +def_gen.integers(32768, dtype="i2") +def_gen.integers(-32768, 32768, dtype="i2") +def_gen.integers(32767, dtype="i2", endpoint=True) +def_gen.integers(-32768, 32767, dtype="i2", endpoint=True) +def_gen.integers(I_i2_low_like, 32767, dtype="i2", endpoint=True) +def_gen.integers(I_i2_high_open, dtype="i2") +def_gen.integers(I_i2_low, I_i2_high_open, dtype="i2") +def_gen.integers(-32768, I_i2_high_open, dtype="i2") +def_gen.integers(I_i2_high_closed, dtype="i2", endpoint=True) +def_gen.integers(I_i2_low, I_i2_high_closed, dtype="i2", endpoint=True) +def_gen.integers(-32768, I_i2_high_closed, dtype="i2", endpoint=True) + +def_gen.integers(32768, dtype="int16") +def_gen.integers(-32768, 32768, dtype="int16") +def_gen.integers(32767, dtype="int16", endpoint=True) +def_gen.integers(-32768, 32767, dtype="int16", endpoint=True) +def_gen.integers(I_i2_low_like, 32767, dtype="int16", endpoint=True) +def_gen.integers(I_i2_high_open, dtype="int16") +def_gen.integers(I_i2_low, I_i2_high_open, dtype="int16") +def_gen.integers(-32768, I_i2_high_open, dtype="int16") +def_gen.integers(I_i2_high_closed, dtype="int16", endpoint=True) +def_gen.integers(I_i2_low, I_i2_high_closed, dtype="int16", endpoint=True) +def_gen.integers(-32768, I_i2_high_closed, dtype="int16", endpoint=True) + +def_gen.integers(32768, dtype=np.int16) +def_gen.integers(-32768, 32768, dtype=np.int16) +def_gen.integers(32767, dtype=np.int16, endpoint=True) +def_gen.integers(-32768, 32767, dtype=np.int16, endpoint=True) +def_gen.integers(I_i2_low_like, 32767, dtype=np.int16, endpoint=True) +def_gen.integers(I_i2_high_open, dtype=np.int16) +def_gen.integers(I_i2_low, I_i2_high_open, dtype=np.int16) +def_gen.integers(-32768, I_i2_high_open, dtype=np.int16) +def_gen.integers(I_i2_high_closed, dtype=np.int16, endpoint=True) +def_gen.integers(I_i2_low, I_i2_high_closed, dtype=np.int16, endpoint=True) +def_gen.integers(-32768, I_i2_high_closed, dtype=np.int16, endpoint=True) + +I_i4_low: np.ndarray[Any, np.dtype[np.int32]] = np.array([-2147483648], dtype=np.int32) +I_i4_low_like: List[int] = [-2147483648] +I_i4_high_open: np.ndarray[Any, np.dtype[np.int32]] = np.array([2147483647], dtype=np.int32) +I_i4_high_closed: np.ndarray[Any, np.dtype[np.int32]] = np.array([2147483647], dtype=np.int32) + +def_gen.integers(2147483648, dtype="i4") +def_gen.integers(-2147483648, 2147483648, dtype="i4") +def_gen.integers(2147483647, dtype="i4", endpoint=True) +def_gen.integers(-2147483648, 2147483647, dtype="i4", endpoint=True) +def_gen.integers(I_i4_low_like, 2147483647, dtype="i4", endpoint=True) +def_gen.integers(I_i4_high_open, dtype="i4") +def_gen.integers(I_i4_low, I_i4_high_open, dtype="i4") +def_gen.integers(-2147483648, I_i4_high_open, dtype="i4") +def_gen.integers(I_i4_high_closed, dtype="i4", endpoint=True) +def_gen.integers(I_i4_low, I_i4_high_closed, dtype="i4", endpoint=True) +def_gen.integers(-2147483648, I_i4_high_closed, dtype="i4", endpoint=True) + +def_gen.integers(2147483648, dtype="int32") +def_gen.integers(-2147483648, 2147483648, dtype="int32") +def_gen.integers(2147483647, dtype="int32", endpoint=True) +def_gen.integers(-2147483648, 2147483647, dtype="int32", endpoint=True) +def_gen.integers(I_i4_low_like, 2147483647, dtype="int32", endpoint=True) +def_gen.integers(I_i4_high_open, dtype="int32") +def_gen.integers(I_i4_low, I_i4_high_open, dtype="int32") +def_gen.integers(-2147483648, I_i4_high_open, dtype="int32") +def_gen.integers(I_i4_high_closed, dtype="int32", endpoint=True) +def_gen.integers(I_i4_low, I_i4_high_closed, dtype="int32", endpoint=True) +def_gen.integers(-2147483648, I_i4_high_closed, dtype="int32", endpoint=True) + +def_gen.integers(2147483648, dtype=np.int32) +def_gen.integers(-2147483648, 2147483648, dtype=np.int32) +def_gen.integers(2147483647, dtype=np.int32, endpoint=True) +def_gen.integers(-2147483648, 2147483647, dtype=np.int32, endpoint=True) +def_gen.integers(I_i4_low_like, 2147483647, dtype=np.int32, endpoint=True) +def_gen.integers(I_i4_high_open, dtype=np.int32) +def_gen.integers(I_i4_low, I_i4_high_open, dtype=np.int32) +def_gen.integers(-2147483648, I_i4_high_open, dtype=np.int32) +def_gen.integers(I_i4_high_closed, dtype=np.int32, endpoint=True) +def_gen.integers(I_i4_low, I_i4_high_closed, dtype=np.int32, endpoint=True) +def_gen.integers(-2147483648, I_i4_high_closed, dtype=np.int32, endpoint=True) + +I_i8_low: np.ndarray[Any, np.dtype[np.int64]] = np.array([-9223372036854775808], dtype=np.int64) +I_i8_low_like: List[int] = [-9223372036854775808] +I_i8_high_open: np.ndarray[Any, np.dtype[np.int64]] = np.array([9223372036854775807], dtype=np.int64) +I_i8_high_closed: np.ndarray[Any, np.dtype[np.int64]] = np.array([9223372036854775807], dtype=np.int64) + +def_gen.integers(9223372036854775808, dtype="i8") +def_gen.integers(-9223372036854775808, 9223372036854775808, dtype="i8") +def_gen.integers(9223372036854775807, dtype="i8", endpoint=True) +def_gen.integers(-9223372036854775808, 9223372036854775807, dtype="i8", endpoint=True) +def_gen.integers(I_i8_low_like, 9223372036854775807, dtype="i8", endpoint=True) +def_gen.integers(I_i8_high_open, dtype="i8") +def_gen.integers(I_i8_low, I_i8_high_open, dtype="i8") +def_gen.integers(-9223372036854775808, I_i8_high_open, dtype="i8") +def_gen.integers(I_i8_high_closed, dtype="i8", endpoint=True) +def_gen.integers(I_i8_low, I_i8_high_closed, dtype="i8", endpoint=True) +def_gen.integers(-9223372036854775808, I_i8_high_closed, dtype="i8", endpoint=True) + +def_gen.integers(9223372036854775808, dtype="int64") +def_gen.integers(-9223372036854775808, 9223372036854775808, dtype="int64") +def_gen.integers(9223372036854775807, dtype="int64", endpoint=True) +def_gen.integers(-9223372036854775808, 9223372036854775807, dtype="int64", endpoint=True) +def_gen.integers(I_i8_low_like, 9223372036854775807, dtype="int64", endpoint=True) +def_gen.integers(I_i8_high_open, dtype="int64") +def_gen.integers(I_i8_low, I_i8_high_open, dtype="int64") +def_gen.integers(-9223372036854775808, I_i8_high_open, dtype="int64") +def_gen.integers(I_i8_high_closed, dtype="int64", endpoint=True) +def_gen.integers(I_i8_low, I_i8_high_closed, dtype="int64", endpoint=True) +def_gen.integers(-9223372036854775808, I_i8_high_closed, dtype="int64", endpoint=True) + +def_gen.integers(9223372036854775808, dtype=np.int64) +def_gen.integers(-9223372036854775808, 9223372036854775808, dtype=np.int64) +def_gen.integers(9223372036854775807, dtype=np.int64, endpoint=True) +def_gen.integers(-9223372036854775808, 9223372036854775807, dtype=np.int64, endpoint=True) +def_gen.integers(I_i8_low_like, 9223372036854775807, dtype=np.int64, endpoint=True) +def_gen.integers(I_i8_high_open, dtype=np.int64) +def_gen.integers(I_i8_low, I_i8_high_open, dtype=np.int64) +def_gen.integers(-9223372036854775808, I_i8_high_open, dtype=np.int64) +def_gen.integers(I_i8_high_closed, dtype=np.int64, endpoint=True) +def_gen.integers(I_i8_low, I_i8_high_closed, dtype=np.int64, endpoint=True) +def_gen.integers(-9223372036854775808, I_i8_high_closed, dtype=np.int64, endpoint=True) + + +def_gen.bit_generator + +def_gen.bytes(2) + +def_gen.choice(5) +def_gen.choice(5, 3) +def_gen.choice(5, 3, replace=True) +def_gen.choice(5, 3, p=[1 / 5] * 5) +def_gen.choice(5, 3, p=[1 / 5] * 5, replace=False) + +def_gen.choice(["pooh", "rabbit", "piglet", "Christopher"]) +def_gen.choice(["pooh", "rabbit", "piglet", "Christopher"], 3) +def_gen.choice(["pooh", "rabbit", "piglet", "Christopher"], 3, p=[1 / 4] * 4) +def_gen.choice(["pooh", "rabbit", "piglet", "Christopher"], 3, replace=True) +def_gen.choice(["pooh", "rabbit", "piglet", "Christopher"], 3, replace=False, p=np.array([1 / 8, 1 / 8, 1 / 2, 1 / 4])) + +def_gen.dirichlet([0.5, 0.5]) +def_gen.dirichlet(np.array([0.5, 0.5])) +def_gen.dirichlet(np.array([0.5, 0.5]), size=3) + +def_gen.multinomial(20, [1 / 6.0] * 6) +def_gen.multinomial(20, np.array([0.5, 0.5])) +def_gen.multinomial(20, [1 / 6.0] * 6, size=2) +def_gen.multinomial([[10], [20]], [1 / 6.0] * 6, size=(2, 2)) +def_gen.multinomial(np.array([[10], [20]]), np.array([0.5, 0.5]), size=(2, 2)) + +def_gen.multivariate_hypergeometric([3, 5, 7], 2) +def_gen.multivariate_hypergeometric(np.array([3, 5, 7]), 2) +def_gen.multivariate_hypergeometric(np.array([3, 5, 7]), 2, size=4) +def_gen.multivariate_hypergeometric(np.array([3, 5, 7]), 2, size=(4, 7)) +def_gen.multivariate_hypergeometric([3, 5, 7], 2, method="count") +def_gen.multivariate_hypergeometric(np.array([3, 5, 7]), 2, method="marginals") + +def_gen.multivariate_normal([0.0], [[1.0]]) +def_gen.multivariate_normal([0.0], np.array([[1.0]])) +def_gen.multivariate_normal(np.array([0.0]), [[1.0]]) +def_gen.multivariate_normal([0.0], np.array([[1.0]])) + +def_gen.permutation(10) +def_gen.permutation([1, 2, 3, 4]) +def_gen.permutation(np.array([1, 2, 3, 4])) +def_gen.permutation(D_2D, axis=1) +def_gen.permuted(D_2D) +def_gen.permuted(D_2D_like) +def_gen.permuted(D_2D, axis=1) +def_gen.permuted(D_2D, out=D_2D) +def_gen.permuted(D_2D_like, out=D_2D) +def_gen.permuted(D_2D_like, out=D_2D) +def_gen.permuted(D_2D, axis=1, out=D_2D) + +def_gen.shuffle(np.arange(10)) +def_gen.shuffle([1, 2, 3, 4, 5]) +def_gen.shuffle(D_2D, axis=1) + +def_gen.__str__() +def_gen.__repr__() +def_gen_state: Dict[str, Any] +def_gen_state = def_gen.__getstate__() +def_gen.__setstate__(def_gen_state) + +# RandomState +random_st: np.random.RandomState = np.random.RandomState() + +random_st.standard_normal() +random_st.standard_normal(size=None) +random_st.standard_normal(size=1) + +random_st.random() +random_st.random(size=None) +random_st.random(size=1) + +random_st.standard_cauchy() +random_st.standard_cauchy(size=None) +random_st.standard_cauchy(size=1) + +random_st.standard_exponential() +random_st.standard_exponential(size=None) +random_st.standard_exponential(size=1) + +random_st.zipf(1.5) +random_st.zipf(1.5, size=None) +random_st.zipf(1.5, size=1) +random_st.zipf(D_arr_1p5) +random_st.zipf(D_arr_1p5, size=1) +random_st.zipf(D_arr_like_1p5) +random_st.zipf(D_arr_like_1p5, size=1) + +random_st.weibull(0.5) +random_st.weibull(0.5, size=None) +random_st.weibull(0.5, size=1) +random_st.weibull(D_arr_0p5) +random_st.weibull(D_arr_0p5, size=1) +random_st.weibull(D_arr_like_0p5) +random_st.weibull(D_arr_like_0p5, size=1) + +random_st.standard_t(0.5) +random_st.standard_t(0.5, size=None) +random_st.standard_t(0.5, size=1) +random_st.standard_t(D_arr_0p5) +random_st.standard_t(D_arr_0p5, size=1) +random_st.standard_t(D_arr_like_0p5) +random_st.standard_t(D_arr_like_0p5, size=1) + +random_st.poisson(0.5) +random_st.poisson(0.5, size=None) +random_st.poisson(0.5, size=1) +random_st.poisson(D_arr_0p5) +random_st.poisson(D_arr_0p5, size=1) +random_st.poisson(D_arr_like_0p5) +random_st.poisson(D_arr_like_0p5, size=1) + +random_st.power(0.5) +random_st.power(0.5, size=None) +random_st.power(0.5, size=1) +random_st.power(D_arr_0p5) +random_st.power(D_arr_0p5, size=1) +random_st.power(D_arr_like_0p5) +random_st.power(D_arr_like_0p5, size=1) + +random_st.pareto(0.5) +random_st.pareto(0.5, size=None) +random_st.pareto(0.5, size=1) +random_st.pareto(D_arr_0p5) +random_st.pareto(D_arr_0p5, size=1) +random_st.pareto(D_arr_like_0p5) +random_st.pareto(D_arr_like_0p5, size=1) + +random_st.chisquare(0.5) +random_st.chisquare(0.5, size=None) +random_st.chisquare(0.5, size=1) +random_st.chisquare(D_arr_0p5) +random_st.chisquare(D_arr_0p5, size=1) +random_st.chisquare(D_arr_like_0p5) +random_st.chisquare(D_arr_like_0p5, size=1) + +random_st.exponential(0.5) +random_st.exponential(0.5, size=None) +random_st.exponential(0.5, size=1) +random_st.exponential(D_arr_0p5) +random_st.exponential(D_arr_0p5, size=1) +random_st.exponential(D_arr_like_0p5) +random_st.exponential(D_arr_like_0p5, size=1) + +random_st.geometric(0.5) +random_st.geometric(0.5, size=None) +random_st.geometric(0.5, size=1) +random_st.geometric(D_arr_0p5) +random_st.geometric(D_arr_0p5, size=1) +random_st.geometric(D_arr_like_0p5) +random_st.geometric(D_arr_like_0p5, size=1) + +random_st.logseries(0.5) +random_st.logseries(0.5, size=None) +random_st.logseries(0.5, size=1) +random_st.logseries(D_arr_0p5) +random_st.logseries(D_arr_0p5, size=1) +random_st.logseries(D_arr_like_0p5) +random_st.logseries(D_arr_like_0p5, size=1) + +random_st.rayleigh(0.5) +random_st.rayleigh(0.5, size=None) +random_st.rayleigh(0.5, size=1) +random_st.rayleigh(D_arr_0p5) +random_st.rayleigh(D_arr_0p5, size=1) +random_st.rayleigh(D_arr_like_0p5) +random_st.rayleigh(D_arr_like_0p5, size=1) + +random_st.standard_gamma(0.5) +random_st.standard_gamma(0.5, size=None) +random_st.standard_gamma(0.5, size=1) +random_st.standard_gamma(D_arr_0p5) +random_st.standard_gamma(D_arr_0p5, size=1) +random_st.standard_gamma(D_arr_like_0p5) +random_st.standard_gamma(D_arr_like_0p5, size=1) +random_st.standard_gamma(D_arr_like_0p5, size=1) + +random_st.vonmises(0.5, 0.5) +random_st.vonmises(0.5, 0.5, size=None) +random_st.vonmises(0.5, 0.5, size=1) +random_st.vonmises(D_arr_0p5, 0.5) +random_st.vonmises(0.5, D_arr_0p5) +random_st.vonmises(D_arr_0p5, 0.5, size=1) +random_st.vonmises(0.5, D_arr_0p5, size=1) +random_st.vonmises(D_arr_like_0p5, 0.5) +random_st.vonmises(0.5, D_arr_like_0p5) +random_st.vonmises(D_arr_0p5, D_arr_0p5) +random_st.vonmises(D_arr_like_0p5, D_arr_like_0p5) +random_st.vonmises(D_arr_0p5, D_arr_0p5, size=1) +random_st.vonmises(D_arr_like_0p5, D_arr_like_0p5, size=1) + +random_st.wald(0.5, 0.5) +random_st.wald(0.5, 0.5, size=None) +random_st.wald(0.5, 0.5, size=1) +random_st.wald(D_arr_0p5, 0.5) +random_st.wald(0.5, D_arr_0p5) +random_st.wald(D_arr_0p5, 0.5, size=1) +random_st.wald(0.5, D_arr_0p5, size=1) +random_st.wald(D_arr_like_0p5, 0.5) +random_st.wald(0.5, D_arr_like_0p5) +random_st.wald(D_arr_0p5, D_arr_0p5) +random_st.wald(D_arr_like_0p5, D_arr_like_0p5) +random_st.wald(D_arr_0p5, D_arr_0p5, size=1) +random_st.wald(D_arr_like_0p5, D_arr_like_0p5, size=1) + +random_st.uniform(0.5, 0.5) +random_st.uniform(0.5, 0.5, size=None) +random_st.uniform(0.5, 0.5, size=1) +random_st.uniform(D_arr_0p5, 0.5) +random_st.uniform(0.5, D_arr_0p5) +random_st.uniform(D_arr_0p5, 0.5, size=1) +random_st.uniform(0.5, D_arr_0p5, size=1) +random_st.uniform(D_arr_like_0p5, 0.5) +random_st.uniform(0.5, D_arr_like_0p5) +random_st.uniform(D_arr_0p5, D_arr_0p5) +random_st.uniform(D_arr_like_0p5, D_arr_like_0p5) +random_st.uniform(D_arr_0p5, D_arr_0p5, size=1) +random_st.uniform(D_arr_like_0p5, D_arr_like_0p5, size=1) + +random_st.beta(0.5, 0.5) +random_st.beta(0.5, 0.5, size=None) +random_st.beta(0.5, 0.5, size=1) +random_st.beta(D_arr_0p5, 0.5) +random_st.beta(0.5, D_arr_0p5) +random_st.beta(D_arr_0p5, 0.5, size=1) +random_st.beta(0.5, D_arr_0p5, size=1) +random_st.beta(D_arr_like_0p5, 0.5) +random_st.beta(0.5, D_arr_like_0p5) +random_st.beta(D_arr_0p5, D_arr_0p5) +random_st.beta(D_arr_like_0p5, D_arr_like_0p5) +random_st.beta(D_arr_0p5, D_arr_0p5, size=1) +random_st.beta(D_arr_like_0p5, D_arr_like_0p5, size=1) + +random_st.f(0.5, 0.5) +random_st.f(0.5, 0.5, size=None) +random_st.f(0.5, 0.5, size=1) +random_st.f(D_arr_0p5, 0.5) +random_st.f(0.5, D_arr_0p5) +random_st.f(D_arr_0p5, 0.5, size=1) +random_st.f(0.5, D_arr_0p5, size=1) +random_st.f(D_arr_like_0p5, 0.5) +random_st.f(0.5, D_arr_like_0p5) +random_st.f(D_arr_0p5, D_arr_0p5) +random_st.f(D_arr_like_0p5, D_arr_like_0p5) +random_st.f(D_arr_0p5, D_arr_0p5, size=1) +random_st.f(D_arr_like_0p5, D_arr_like_0p5, size=1) + +random_st.gamma(0.5, 0.5) +random_st.gamma(0.5, 0.5, size=None) +random_st.gamma(0.5, 0.5, size=1) +random_st.gamma(D_arr_0p5, 0.5) +random_st.gamma(0.5, D_arr_0p5) +random_st.gamma(D_arr_0p5, 0.5, size=1) +random_st.gamma(0.5, D_arr_0p5, size=1) +random_st.gamma(D_arr_like_0p5, 0.5) +random_st.gamma(0.5, D_arr_like_0p5) +random_st.gamma(D_arr_0p5, D_arr_0p5) +random_st.gamma(D_arr_like_0p5, D_arr_like_0p5) +random_st.gamma(D_arr_0p5, D_arr_0p5, size=1) +random_st.gamma(D_arr_like_0p5, D_arr_like_0p5, size=1) + +random_st.gumbel(0.5, 0.5) +random_st.gumbel(0.5, 0.5, size=None) +random_st.gumbel(0.5, 0.5, size=1) +random_st.gumbel(D_arr_0p5, 0.5) +random_st.gumbel(0.5, D_arr_0p5) +random_st.gumbel(D_arr_0p5, 0.5, size=1) +random_st.gumbel(0.5, D_arr_0p5, size=1) +random_st.gumbel(D_arr_like_0p5, 0.5) +random_st.gumbel(0.5, D_arr_like_0p5) +random_st.gumbel(D_arr_0p5, D_arr_0p5) +random_st.gumbel(D_arr_like_0p5, D_arr_like_0p5) +random_st.gumbel(D_arr_0p5, D_arr_0p5, size=1) +random_st.gumbel(D_arr_like_0p5, D_arr_like_0p5, size=1) + +random_st.laplace(0.5, 0.5) +random_st.laplace(0.5, 0.5, size=None) +random_st.laplace(0.5, 0.5, size=1) +random_st.laplace(D_arr_0p5, 0.5) +random_st.laplace(0.5, D_arr_0p5) +random_st.laplace(D_arr_0p5, 0.5, size=1) +random_st.laplace(0.5, D_arr_0p5, size=1) +random_st.laplace(D_arr_like_0p5, 0.5) +random_st.laplace(0.5, D_arr_like_0p5) +random_st.laplace(D_arr_0p5, D_arr_0p5) +random_st.laplace(D_arr_like_0p5, D_arr_like_0p5) +random_st.laplace(D_arr_0p5, D_arr_0p5, size=1) +random_st.laplace(D_arr_like_0p5, D_arr_like_0p5, size=1) + +random_st.logistic(0.5, 0.5) +random_st.logistic(0.5, 0.5, size=None) +random_st.logistic(0.5, 0.5, size=1) +random_st.logistic(D_arr_0p5, 0.5) +random_st.logistic(0.5, D_arr_0p5) +random_st.logistic(D_arr_0p5, 0.5, size=1) +random_st.logistic(0.5, D_arr_0p5, size=1) +random_st.logistic(D_arr_like_0p5, 0.5) +random_st.logistic(0.5, D_arr_like_0p5) +random_st.logistic(D_arr_0p5, D_arr_0p5) +random_st.logistic(D_arr_like_0p5, D_arr_like_0p5) +random_st.logistic(D_arr_0p5, D_arr_0p5, size=1) +random_st.logistic(D_arr_like_0p5, D_arr_like_0p5, size=1) + +random_st.lognormal(0.5, 0.5) +random_st.lognormal(0.5, 0.5, size=None) +random_st.lognormal(0.5, 0.5, size=1) +random_st.lognormal(D_arr_0p5, 0.5) +random_st.lognormal(0.5, D_arr_0p5) +random_st.lognormal(D_arr_0p5, 0.5, size=1) +random_st.lognormal(0.5, D_arr_0p5, size=1) +random_st.lognormal(D_arr_like_0p5, 0.5) +random_st.lognormal(0.5, D_arr_like_0p5) +random_st.lognormal(D_arr_0p5, D_arr_0p5) +random_st.lognormal(D_arr_like_0p5, D_arr_like_0p5) +random_st.lognormal(D_arr_0p5, D_arr_0p5, size=1) +random_st.lognormal(D_arr_like_0p5, D_arr_like_0p5, size=1) + +random_st.noncentral_chisquare(0.5, 0.5) +random_st.noncentral_chisquare(0.5, 0.5, size=None) +random_st.noncentral_chisquare(0.5, 0.5, size=1) +random_st.noncentral_chisquare(D_arr_0p5, 0.5) +random_st.noncentral_chisquare(0.5, D_arr_0p5) +random_st.noncentral_chisquare(D_arr_0p5, 0.5, size=1) +random_st.noncentral_chisquare(0.5, D_arr_0p5, size=1) +random_st.noncentral_chisquare(D_arr_like_0p5, 0.5) +random_st.noncentral_chisquare(0.5, D_arr_like_0p5) +random_st.noncentral_chisquare(D_arr_0p5, D_arr_0p5) +random_st.noncentral_chisquare(D_arr_like_0p5, D_arr_like_0p5) +random_st.noncentral_chisquare(D_arr_0p5, D_arr_0p5, size=1) +random_st.noncentral_chisquare(D_arr_like_0p5, D_arr_like_0p5, size=1) + +random_st.normal(0.5, 0.5) +random_st.normal(0.5, 0.5, size=None) +random_st.normal(0.5, 0.5, size=1) +random_st.normal(D_arr_0p5, 0.5) +random_st.normal(0.5, D_arr_0p5) +random_st.normal(D_arr_0p5, 0.5, size=1) +random_st.normal(0.5, D_arr_0p5, size=1) +random_st.normal(D_arr_like_0p5, 0.5) +random_st.normal(0.5, D_arr_like_0p5) +random_st.normal(D_arr_0p5, D_arr_0p5) +random_st.normal(D_arr_like_0p5, D_arr_like_0p5) +random_st.normal(D_arr_0p5, D_arr_0p5, size=1) +random_st.normal(D_arr_like_0p5, D_arr_like_0p5, size=1) + +random_st.triangular(0.1, 0.5, 0.9) +random_st.triangular(0.1, 0.5, 0.9, size=None) +random_st.triangular(0.1, 0.5, 0.9, size=1) +random_st.triangular(D_arr_0p1, 0.5, 0.9) +random_st.triangular(0.1, D_arr_0p5, 0.9) +random_st.triangular(D_arr_0p1, 0.5, D_arr_like_0p9, size=1) +random_st.triangular(0.1, D_arr_0p5, 0.9, size=1) +random_st.triangular(D_arr_like_0p1, 0.5, D_arr_0p9) +random_st.triangular(0.5, D_arr_like_0p5, 0.9) +random_st.triangular(D_arr_0p1, D_arr_0p5, 0.9) +random_st.triangular(D_arr_like_0p1, D_arr_like_0p5, 0.9) +random_st.triangular(D_arr_0p1, D_arr_0p5, D_arr_0p9, size=1) +random_st.triangular(D_arr_like_0p1, D_arr_like_0p5, D_arr_like_0p9, size=1) + +random_st.noncentral_f(0.1, 0.5, 0.9) +random_st.noncentral_f(0.1, 0.5, 0.9, size=None) +random_st.noncentral_f(0.1, 0.5, 0.9, size=1) +random_st.noncentral_f(D_arr_0p1, 0.5, 0.9) +random_st.noncentral_f(0.1, D_arr_0p5, 0.9) +random_st.noncentral_f(D_arr_0p1, 0.5, D_arr_like_0p9, size=1) +random_st.noncentral_f(0.1, D_arr_0p5, 0.9, size=1) +random_st.noncentral_f(D_arr_like_0p1, 0.5, D_arr_0p9) +random_st.noncentral_f(0.5, D_arr_like_0p5, 0.9) +random_st.noncentral_f(D_arr_0p1, D_arr_0p5, 0.9) +random_st.noncentral_f(D_arr_like_0p1, D_arr_like_0p5, 0.9) +random_st.noncentral_f(D_arr_0p1, D_arr_0p5, D_arr_0p9, size=1) +random_st.noncentral_f(D_arr_like_0p1, D_arr_like_0p5, D_arr_like_0p9, size=1) + +random_st.binomial(10, 0.5) +random_st.binomial(10, 0.5, size=None) +random_st.binomial(10, 0.5, size=1) +random_st.binomial(I_arr_10, 0.5) +random_st.binomial(10, D_arr_0p5) +random_st.binomial(I_arr_10, 0.5, size=1) +random_st.binomial(10, D_arr_0p5, size=1) +random_st.binomial(I_arr_like_10, 0.5) +random_st.binomial(10, D_arr_like_0p5) +random_st.binomial(I_arr_10, D_arr_0p5) +random_st.binomial(I_arr_like_10, D_arr_like_0p5) +random_st.binomial(I_arr_10, D_arr_0p5, size=1) +random_st.binomial(I_arr_like_10, D_arr_like_0p5, size=1) + +random_st.negative_binomial(10, 0.5) +random_st.negative_binomial(10, 0.5, size=None) +random_st.negative_binomial(10, 0.5, size=1) +random_st.negative_binomial(I_arr_10, 0.5) +random_st.negative_binomial(10, D_arr_0p5) +random_st.negative_binomial(I_arr_10, 0.5, size=1) +random_st.negative_binomial(10, D_arr_0p5, size=1) +random_st.negative_binomial(I_arr_like_10, 0.5) +random_st.negative_binomial(10, D_arr_like_0p5) +random_st.negative_binomial(I_arr_10, D_arr_0p5) +random_st.negative_binomial(I_arr_like_10, D_arr_like_0p5) +random_st.negative_binomial(I_arr_10, D_arr_0p5, size=1) +random_st.negative_binomial(I_arr_like_10, D_arr_like_0p5, size=1) + +random_st.hypergeometric(20, 20, 10) +random_st.hypergeometric(20, 20, 10, size=None) +random_st.hypergeometric(20, 20, 10, size=1) +random_st.hypergeometric(I_arr_20, 20, 10) +random_st.hypergeometric(20, I_arr_20, 10) +random_st.hypergeometric(I_arr_20, 20, I_arr_like_10, size=1) +random_st.hypergeometric(20, I_arr_20, 10, size=1) +random_st.hypergeometric(I_arr_like_20, 20, I_arr_10) +random_st.hypergeometric(20, I_arr_like_20, 10) +random_st.hypergeometric(I_arr_20, I_arr_20, 10) +random_st.hypergeometric(I_arr_like_20, I_arr_like_20, 10) +random_st.hypergeometric(I_arr_20, I_arr_20, I_arr_10, size=1) +random_st.hypergeometric(I_arr_like_20, I_arr_like_20, I_arr_like_10, size=1) + +random_st.randint(0, 100) +random_st.randint(100) +random_st.randint([100]) +random_st.randint(0, [100]) + +random_st.randint(2, dtype=bool) +random_st.randint(0, 2, dtype=bool) +random_st.randint(I_bool_high_open, dtype=bool) +random_st.randint(I_bool_low, I_bool_high_open, dtype=bool) +random_st.randint(0, I_bool_high_open, dtype=bool) + +random_st.randint(2, dtype=np.bool_) +random_st.randint(0, 2, dtype=np.bool_) +random_st.randint(I_bool_high_open, dtype=np.bool_) +random_st.randint(I_bool_low, I_bool_high_open, dtype=np.bool_) +random_st.randint(0, I_bool_high_open, dtype=np.bool_) + +random_st.randint(256, dtype="u1") +random_st.randint(0, 256, dtype="u1") +random_st.randint(I_u1_high_open, dtype="u1") +random_st.randint(I_u1_low, I_u1_high_open, dtype="u1") +random_st.randint(0, I_u1_high_open, dtype="u1") + +random_st.randint(256, dtype="uint8") +random_st.randint(0, 256, dtype="uint8") +random_st.randint(I_u1_high_open, dtype="uint8") +random_st.randint(I_u1_low, I_u1_high_open, dtype="uint8") +random_st.randint(0, I_u1_high_open, dtype="uint8") + +random_st.randint(256, dtype=np.uint8) +random_st.randint(0, 256, dtype=np.uint8) +random_st.randint(I_u1_high_open, dtype=np.uint8) +random_st.randint(I_u1_low, I_u1_high_open, dtype=np.uint8) +random_st.randint(0, I_u1_high_open, dtype=np.uint8) + +random_st.randint(65536, dtype="u2") +random_st.randint(0, 65536, dtype="u2") +random_st.randint(I_u2_high_open, dtype="u2") +random_st.randint(I_u2_low, I_u2_high_open, dtype="u2") +random_st.randint(0, I_u2_high_open, dtype="u2") + +random_st.randint(65536, dtype="uint16") +random_st.randint(0, 65536, dtype="uint16") +random_st.randint(I_u2_high_open, dtype="uint16") +random_st.randint(I_u2_low, I_u2_high_open, dtype="uint16") +random_st.randint(0, I_u2_high_open, dtype="uint16") + +random_st.randint(65536, dtype=np.uint16) +random_st.randint(0, 65536, dtype=np.uint16) +random_st.randint(I_u2_high_open, dtype=np.uint16) +random_st.randint(I_u2_low, I_u2_high_open, dtype=np.uint16) +random_st.randint(0, I_u2_high_open, dtype=np.uint16) + +random_st.randint(4294967296, dtype="u4") +random_st.randint(0, 4294967296, dtype="u4") +random_st.randint(I_u4_high_open, dtype="u4") +random_st.randint(I_u4_low, I_u4_high_open, dtype="u4") +random_st.randint(0, I_u4_high_open, dtype="u4") + +random_st.randint(4294967296, dtype="uint32") +random_st.randint(0, 4294967296, dtype="uint32") +random_st.randint(I_u4_high_open, dtype="uint32") +random_st.randint(I_u4_low, I_u4_high_open, dtype="uint32") +random_st.randint(0, I_u4_high_open, dtype="uint32") + +random_st.randint(4294967296, dtype=np.uint32) +random_st.randint(0, 4294967296, dtype=np.uint32) +random_st.randint(I_u4_high_open, dtype=np.uint32) +random_st.randint(I_u4_low, I_u4_high_open, dtype=np.uint32) +random_st.randint(0, I_u4_high_open, dtype=np.uint32) + + +random_st.randint(18446744073709551616, dtype="u8") +random_st.randint(0, 18446744073709551616, dtype="u8") +random_st.randint(I_u8_high_open, dtype="u8") +random_st.randint(I_u8_low, I_u8_high_open, dtype="u8") +random_st.randint(0, I_u8_high_open, dtype="u8") + +random_st.randint(18446744073709551616, dtype="uint64") +random_st.randint(0, 18446744073709551616, dtype="uint64") +random_st.randint(I_u8_high_open, dtype="uint64") +random_st.randint(I_u8_low, I_u8_high_open, dtype="uint64") +random_st.randint(0, I_u8_high_open, dtype="uint64") + +random_st.randint(18446744073709551616, dtype=np.uint64) +random_st.randint(0, 18446744073709551616, dtype=np.uint64) +random_st.randint(I_u8_high_open, dtype=np.uint64) +random_st.randint(I_u8_low, I_u8_high_open, dtype=np.uint64) +random_st.randint(0, I_u8_high_open, dtype=np.uint64) + +random_st.randint(128, dtype="i1") +random_st.randint(-128, 128, dtype="i1") +random_st.randint(I_i1_high_open, dtype="i1") +random_st.randint(I_i1_low, I_i1_high_open, dtype="i1") +random_st.randint(-128, I_i1_high_open, dtype="i1") + +random_st.randint(128, dtype="int8") +random_st.randint(-128, 128, dtype="int8") +random_st.randint(I_i1_high_open, dtype="int8") +random_st.randint(I_i1_low, I_i1_high_open, dtype="int8") +random_st.randint(-128, I_i1_high_open, dtype="int8") + +random_st.randint(128, dtype=np.int8) +random_st.randint(-128, 128, dtype=np.int8) +random_st.randint(I_i1_high_open, dtype=np.int8) +random_st.randint(I_i1_low, I_i1_high_open, dtype=np.int8) +random_st.randint(-128, I_i1_high_open, dtype=np.int8) + +random_st.randint(32768, dtype="i2") +random_st.randint(-32768, 32768, dtype="i2") +random_st.randint(I_i2_high_open, dtype="i2") +random_st.randint(I_i2_low, I_i2_high_open, dtype="i2") +random_st.randint(-32768, I_i2_high_open, dtype="i2") +random_st.randint(32768, dtype="int16") +random_st.randint(-32768, 32768, dtype="int16") +random_st.randint(I_i2_high_open, dtype="int16") +random_st.randint(I_i2_low, I_i2_high_open, dtype="int16") +random_st.randint(-32768, I_i2_high_open, dtype="int16") +random_st.randint(32768, dtype=np.int16) +random_st.randint(-32768, 32768, dtype=np.int16) +random_st.randint(I_i2_high_open, dtype=np.int16) +random_st.randint(I_i2_low, I_i2_high_open, dtype=np.int16) +random_st.randint(-32768, I_i2_high_open, dtype=np.int16) + +random_st.randint(2147483648, dtype="i4") +random_st.randint(-2147483648, 2147483648, dtype="i4") +random_st.randint(I_i4_high_open, dtype="i4") +random_st.randint(I_i4_low, I_i4_high_open, dtype="i4") +random_st.randint(-2147483648, I_i4_high_open, dtype="i4") + +random_st.randint(2147483648, dtype="int32") +random_st.randint(-2147483648, 2147483648, dtype="int32") +random_st.randint(I_i4_high_open, dtype="int32") +random_st.randint(I_i4_low, I_i4_high_open, dtype="int32") +random_st.randint(-2147483648, I_i4_high_open, dtype="int32") + +random_st.randint(2147483648, dtype=np.int32) +random_st.randint(-2147483648, 2147483648, dtype=np.int32) +random_st.randint(I_i4_high_open, dtype=np.int32) +random_st.randint(I_i4_low, I_i4_high_open, dtype=np.int32) +random_st.randint(-2147483648, I_i4_high_open, dtype=np.int32) + +random_st.randint(9223372036854775808, dtype="i8") +random_st.randint(-9223372036854775808, 9223372036854775808, dtype="i8") +random_st.randint(I_i8_high_open, dtype="i8") +random_st.randint(I_i8_low, I_i8_high_open, dtype="i8") +random_st.randint(-9223372036854775808, I_i8_high_open, dtype="i8") + +random_st.randint(9223372036854775808, dtype="int64") +random_st.randint(-9223372036854775808, 9223372036854775808, dtype="int64") +random_st.randint(I_i8_high_open, dtype="int64") +random_st.randint(I_i8_low, I_i8_high_open, dtype="int64") +random_st.randint(-9223372036854775808, I_i8_high_open, dtype="int64") + +random_st.randint(9223372036854775808, dtype=np.int64) +random_st.randint(-9223372036854775808, 9223372036854775808, dtype=np.int64) +random_st.randint(I_i8_high_open, dtype=np.int64) +random_st.randint(I_i8_low, I_i8_high_open, dtype=np.int64) +random_st.randint(-9223372036854775808, I_i8_high_open, dtype=np.int64) + +bg: np.random.BitGenerator = random_st._bit_generator + +random_st.bytes(2) + +random_st.choice(5) +random_st.choice(5, 3) +random_st.choice(5, 3, replace=True) +random_st.choice(5, 3, p=[1 / 5] * 5) +random_st.choice(5, 3, p=[1 / 5] * 5, replace=False) + +random_st.choice(["pooh", "rabbit", "piglet", "Christopher"]) +random_st.choice(["pooh", "rabbit", "piglet", "Christopher"], 3) +random_st.choice(["pooh", "rabbit", "piglet", "Christopher"], 3, p=[1 / 4] * 4) +random_st.choice(["pooh", "rabbit", "piglet", "Christopher"], 3, replace=True) +random_st.choice(["pooh", "rabbit", "piglet", "Christopher"], 3, replace=False, p=np.array([1 / 8, 1 / 8, 1 / 2, 1 / 4])) + +random_st.dirichlet([0.5, 0.5]) +random_st.dirichlet(np.array([0.5, 0.5])) +random_st.dirichlet(np.array([0.5, 0.5]), size=3) + +random_st.multinomial(20, [1 / 6.0] * 6) +random_st.multinomial(20, np.array([0.5, 0.5])) +random_st.multinomial(20, [1 / 6.0] * 6, size=2) + +random_st.multivariate_normal([0.0], [[1.0]]) +random_st.multivariate_normal([0.0], np.array([[1.0]])) +random_st.multivariate_normal(np.array([0.0]), [[1.0]]) +random_st.multivariate_normal([0.0], np.array([[1.0]])) + +random_st.permutation(10) +random_st.permutation([1, 2, 3, 4]) +random_st.permutation(np.array([1, 2, 3, 4])) +random_st.permutation(D_2D) + +random_st.shuffle(np.arange(10)) +random_st.shuffle([1, 2, 3, 4, 5]) +random_st.shuffle(D_2D) + +np.random.RandomState(SEED_PCG64) +np.random.RandomState(0) +np.random.RandomState([0, 1, 2]) +random_st.__str__() +random_st.__repr__() +random_st_state = random_st.__getstate__() +random_st.__setstate__(random_st_state) +random_st.seed() +random_st.seed(1) +random_st.seed([0, 1]) +random_st_get_state = random_st.get_state() +random_st_get_state_legacy = random_st.get_state(legacy=True) +random_st.set_state(random_st_get_state) + +random_st.rand() +random_st.rand(1) +random_st.rand(1, 2) +random_st.randn() +random_st.randn(1) +random_st.randn(1, 2) +random_st.random_sample() +random_st.random_sample(1) +random_st.random_sample(size=(1, 2)) + +random_st.tomaxint() +random_st.tomaxint(1) +random_st.tomaxint((1,)) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/scalars.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/scalars.py new file mode 100644 index 0000000..b258db4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/scalars.py @@ -0,0 +1,254 @@ +import sys +import datetime as dt + +import pytest +import numpy as np + +b = np.bool_() +u8 = np.uint64() +i8 = np.int64() +f8 = np.float64() +c16 = np.complex128() +U = np.str_() +S = np.bytes_() + + +# Construction +class D: + def __index__(self) -> int: + return 0 + + +class C: + def __complex__(self) -> complex: + return 3j + + +class B: + def __int__(self) -> int: + return 4 + + +class A: + def __float__(self) -> float: + return 4.0 + + +np.complex64(3j) +np.complex64(A()) +np.complex64(C()) +np.complex128(3j) +np.complex128(C()) +np.complex128(None) +np.complex64("1.2") +np.complex128(b"2j") + +np.int8(4) +np.int16(3.4) +np.int32(4) +np.int64(-1) +np.uint8(B()) +np.uint32() +np.int32("1") +np.int64(b"2") + +np.float16(A()) +np.float32(16) +np.float64(3.0) +np.float64(None) +np.float32("1") +np.float16(b"2.5") + +if sys.version_info >= (3, 8): + np.uint64(D()) + np.float32(D()) + np.complex64(D()) + +np.bytes_(b"hello") +np.bytes_("hello", 'utf-8') +np.bytes_("hello", encoding='utf-8') +np.str_("hello") +np.str_(b"hello", 'utf-8') +np.str_(b"hello", encoding='utf-8') + +# Array-ish semantics +np.int8().real +np.int16().imag +np.int32().data +np.int64().flags + +np.uint8().itemsize * 2 +np.uint16().ndim + 1 +np.uint32().strides +np.uint64().shape + +# Time structures +np.datetime64() +np.datetime64(0, "D") +np.datetime64(0, b"D") +np.datetime64(0, ('ms', 3)) +np.datetime64("2019") +np.datetime64(b"2019") +np.datetime64("2019", "D") +np.datetime64(np.datetime64()) +np.datetime64(dt.datetime(2000, 5, 3)) +np.datetime64(dt.date(2000, 5, 3)) +np.datetime64(None) +np.datetime64(None, "D") + +np.timedelta64() +np.timedelta64(0) +np.timedelta64(0, "D") +np.timedelta64(0, ('ms', 3)) +np.timedelta64(0, b"D") +np.timedelta64("3") +np.timedelta64(b"5") +np.timedelta64(np.timedelta64(2)) +np.timedelta64(dt.timedelta(2)) +np.timedelta64(None) +np.timedelta64(None, "D") + +np.void(1) +np.void(np.int64(1)) +np.void(True) +np.void(np.bool_(True)) +np.void(b"test") +np.void(np.bytes_("test")) + +# Protocols +i8 = np.int64() +u8 = np.uint64() +f8 = np.float64() +c16 = np.complex128() +b_ = np.bool_() +td = np.timedelta64() +U = np.str_("1") +S = np.bytes_("1") +AR = np.array(1, dtype=np.float64) + +int(i8) +int(u8) +int(f8) +int(b_) +int(td) +int(U) +int(S) +int(AR) +with pytest.warns(np.ComplexWarning): + int(c16) + +float(i8) +float(u8) +float(f8) +float(b_) +float(td) +float(U) +float(S) +float(AR) +with pytest.warns(np.ComplexWarning): + float(c16) + +complex(i8) +complex(u8) +complex(f8) +complex(c16) +complex(b_) +complex(td) +complex(U) +complex(AR) + + +# Misc +c16.dtype +c16.real +c16.imag +c16.real.real +c16.real.imag +c16.ndim +c16.size +c16.itemsize +c16.shape +c16.strides +c16.squeeze() +c16.byteswap() +c16.transpose() + +# Aliases +np.str0() +np.bool8() +np.bytes0() +np.string_() +np.object0() +np.void0(0) + +np.byte() +np.short() +np.intc() +np.intp() +np.int0() +np.int_() +np.longlong() + +np.ubyte() +np.ushort() +np.uintc() +np.uintp() +np.uint0() +np.uint() +np.ulonglong() + +np.half() +np.single() +np.double() +np.float_() +np.longdouble() +np.longfloat() + +np.csingle() +np.singlecomplex() +np.cdouble() +np.complex_() +np.cfloat() +np.clongdouble() +np.clongfloat() +np.longcomplex() + +b.item() +i8.item() +u8.item() +f8.item() +c16.item() +U.item() +S.item() + +b.tolist() +i8.tolist() +u8.tolist() +f8.tolist() +c16.tolist() +U.tolist() +S.tolist() + +b.ravel() +i8.ravel() +u8.ravel() +f8.ravel() +c16.ravel() +U.ravel() +S.ravel() + +b.flatten() +i8.flatten() +u8.flatten() +f8.flatten() +c16.flatten() +U.flatten() +S.flatten() + +b.reshape(1) +i8.reshape(1) +u8.reshape(1) +f8.reshape(1) +c16.reshape(1) +U.reshape(1) +S.reshape(1) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/simple.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/simple.py new file mode 100644 index 0000000..85965e0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/simple.py @@ -0,0 +1,165 @@ +"""Simple expression that should pass with mypy.""" +import operator + +import numpy as np +from typing import Iterable # noqa: F401 + +# Basic checks +array = np.array([1, 2]) + + +def ndarray_func(x): + # type: (np.ndarray) -> np.ndarray + return x + + +ndarray_func(np.array([1, 2])) +array == 1 +array.dtype == float + +# Dtype construction +np.dtype(float) +np.dtype(np.float64) +np.dtype(None) +np.dtype("float64") +np.dtype(np.dtype(float)) +np.dtype(("U", 10)) +np.dtype((np.int32, (2, 2))) +# Define the arguments on the previous line to prevent bidirectional +# type inference in mypy from broadening the types. +two_tuples_dtype = [("R", "u1"), ("G", "u1"), ("B", "u1")] +np.dtype(two_tuples_dtype) + +three_tuples_dtype = [("R", "u1", 2)] +np.dtype(three_tuples_dtype) + +mixed_tuples_dtype = [("R", "u1"), ("G", np.unicode_, 1)] +np.dtype(mixed_tuples_dtype) + +shape_tuple_dtype = [("R", "u1", (2, 2))] +np.dtype(shape_tuple_dtype) + +shape_like_dtype = [("R", "u1", (2, 2)), ("G", np.unicode_, 1)] +np.dtype(shape_like_dtype) + +object_dtype = [("field1", object)] +np.dtype(object_dtype) + +np.dtype((np.int32, (np.int8, 4))) + +# Dtype comparison +np.dtype(float) == float +np.dtype(float) != np.float64 +np.dtype(float) < None +np.dtype(float) <= "float64" +np.dtype(float) > np.dtype(float) +np.dtype(float) >= np.dtype(("U", 10)) + +# Iteration and indexing +def iterable_func(x): + # type: (Iterable) -> Iterable + return x + + +iterable_func(array) +[element for element in array] +iter(array) +zip(array, array) +array[1] +array[:] +array[...] +array[:] = 0 + +array_2d = np.ones((3, 3)) +array_2d[:2, :2] +array_2d[..., 0] +array_2d[:2, :2] = 0 + +# Other special methods +len(array) +str(array) +array_scalar = np.array(1) +int(array_scalar) +float(array_scalar) +# currently does not work due to https://github.com/python/typeshed/issues/1904 +# complex(array_scalar) +bytes(array_scalar) +operator.index(array_scalar) +bool(array_scalar) + +# comparisons +array < 1 +array <= 1 +array == 1 +array != 1 +array > 1 +array >= 1 +1 < array +1 <= array +1 == array +1 != array +1 > array +1 >= array + +# binary arithmetic +array + 1 +1 + array +array += 1 + +array - 1 +1 - array +array -= 1 + +array * 1 +1 * array +array *= 1 + +nonzero_array = np.array([1, 2]) +array / 1 +1 / nonzero_array +float_array = np.array([1.0, 2.0]) +float_array /= 1 + +array // 1 +1 // nonzero_array +array //= 1 + +array % 1 +1 % nonzero_array +array %= 1 + +divmod(array, 1) +divmod(1, nonzero_array) + +array ** 1 +1 ** array +array **= 1 + +array << 1 +1 << array +array <<= 1 + +array >> 1 +1 >> array +array >>= 1 + +array & 1 +1 & array +array &= 1 + +array ^ 1 +1 ^ array +array ^= 1 + +array | 1 +1 | array +array |= 1 + +# unary arithmetic +-array ++array +abs(array) +~array + +# Other methods +np.array([1, 2]).transpose() diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/simple_py3.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/simple_py3.py new file mode 100644 index 0000000..c05a1ce --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/simple_py3.py @@ -0,0 +1,6 @@ +import numpy as np + +array = np.array([1, 2]) + +# The @ operator is not in python 2 +array @ array diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ufunc_config.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ufunc_config.py new file mode 100644 index 0000000..2d13142 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ufunc_config.py @@ -0,0 +1,50 @@ +"""Typing tests for `numpy.core._ufunc_config`.""" + +import numpy as np + +def func1(a: str, b: int) -> None: ... +def func2(a: str, b: int, c: float = ...) -> None: ... +def func3(a: str, b: int) -> int: ... + +class Write1: + def write(self, a: str) -> None: ... + +class Write2: + def write(self, a: str, b: int = ...) -> None: ... + +class Write3: + def write(self, a: str) -> int: ... + + +_err_default = np.geterr() +_bufsize_default = np.getbufsize() +_errcall_default = np.geterrcall() + +try: + np.seterr(all=None) + np.seterr(divide="ignore") + np.seterr(over="warn") + np.seterr(under="call") + np.seterr(invalid="raise") + np.geterr() + + np.setbufsize(4096) + np.getbufsize() + + np.seterrcall(func1) + np.seterrcall(func2) + np.seterrcall(func3) + np.seterrcall(Write1()) + np.seterrcall(Write2()) + np.seterrcall(Write3()) + np.geterrcall() + + with np.errstate(call=func1, all="call"): + pass + with np.errstate(call=Write1(), divide="log", over="log"): + pass + +finally: + np.seterr(**_err_default) + np.setbufsize(_bufsize_default) + np.seterrcall(_errcall_default) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ufunclike.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ufunclike.py new file mode 100644 index 0000000..7eac89e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ufunclike.py @@ -0,0 +1,46 @@ +from __future__ import annotations +from typing import Any +import numpy as np + + +class Object: + def __ceil__(self) -> Object: + return self + + def __floor__(self) -> Object: + return self + + def __ge__(self, value: object) -> bool: + return True + + def __array__(self) -> np.ndarray[Any, np.dtype[np.object_]]: + ret = np.empty((), dtype=object) + ret[()] = self + return ret + + +AR_LIKE_b = [True, True, False] +AR_LIKE_u = [np.uint32(1), np.uint32(2), np.uint32(3)] +AR_LIKE_i = [1, 2, 3] +AR_LIKE_f = [1.0, 2.0, 3.0] +AR_LIKE_O = [Object(), Object(), Object()] +AR_U: np.ndarray[Any, np.dtype[np.str_]] = np.zeros(3, dtype="U5") + +np.fix(AR_LIKE_b) +np.fix(AR_LIKE_u) +np.fix(AR_LIKE_i) +np.fix(AR_LIKE_f) +np.fix(AR_LIKE_O) +np.fix(AR_LIKE_f, out=AR_U) + +np.isposinf(AR_LIKE_b) +np.isposinf(AR_LIKE_u) +np.isposinf(AR_LIKE_i) +np.isposinf(AR_LIKE_f) +np.isposinf(AR_LIKE_f, out=AR_U) + +np.isneginf(AR_LIKE_b) +np.isneginf(AR_LIKE_u) +np.isneginf(AR_LIKE_i) +np.isneginf(AR_LIKE_f) +np.isneginf(AR_LIKE_f, out=AR_U) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ufuncs.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ufuncs.py new file mode 100644 index 0000000..3cc31ae --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/ufuncs.py @@ -0,0 +1,17 @@ +import numpy as np + +np.sin(1) +np.sin([1, 2, 3]) +np.sin(1, out=np.empty(1)) +np.matmul(np.ones((2, 2, 2)), np.ones((2, 2, 2)), axes=[(0, 1), (0, 1), (0, 1)]) +np.sin(1, signature="D->D") +np.sin(1, extobj=[16, 1, lambda: None]) +# NOTE: `np.generic` subclasses are not guaranteed to support addition; +# re-enable this we can infer the exact return type of `np.sin(...)`. +# +# np.sin(1) + np.sin(1) +np.sin.types[0] +np.sin.__name__ +np.sin.__doc__ + +np.abs(np.array([1])) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/warnings_and_errors.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/warnings_and_errors.py new file mode 100644 index 0000000..a556bf6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/pass/warnings_and_errors.py @@ -0,0 +1,6 @@ +import numpy as np + +np.AxisError("test") +np.AxisError(1, ndim=2) +np.AxisError(1, ndim=2, msg_prefix="error") +np.AxisError(1, ndim=2, msg_prefix=None) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arithmetic.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arithmetic.pyi new file mode 100644 index 0000000..c5b4674 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arithmetic.pyi @@ -0,0 +1,522 @@ +from typing import Any, List +import numpy as np +import numpy.typing as npt + +# Can't directly import `np.float128` as it is not available on all platforms +f16: np.floating[npt._128Bit] + +c16 = np.complex128() +f8 = np.float64() +i8 = np.int64() +u8 = np.uint64() + +c8 = np.complex64() +f4 = np.float32() +i4 = np.int32() +u4 = np.uint32() + +dt = np.datetime64(0, "D") +td = np.timedelta64(0, "D") + +b_ = np.bool_() + +b = bool() +c = complex() +f = float() +i = int() + +AR_b: np.ndarray[Any, np.dtype[np.bool_]] +AR_u: np.ndarray[Any, np.dtype[np.uint32]] +AR_i: np.ndarray[Any, np.dtype[np.int64]] +AR_f: np.ndarray[Any, np.dtype[np.float64]] +AR_c: np.ndarray[Any, np.dtype[np.complex128]] +AR_m: np.ndarray[Any, np.dtype[np.timedelta64]] +AR_M: np.ndarray[Any, np.dtype[np.datetime64]] +AR_O: np.ndarray[Any, np.dtype[np.object_]] + +AR_LIKE_b: List[bool] +AR_LIKE_u: List[np.uint32] +AR_LIKE_i: List[int] +AR_LIKE_f: List[float] +AR_LIKE_c: List[complex] +AR_LIKE_m: List[np.timedelta64] +AR_LIKE_M: List[np.datetime64] +AR_LIKE_O: List[np.object_] + +# Array subtraction + +reveal_type(AR_b - AR_LIKE_u) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(AR_b - AR_LIKE_i) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_b - AR_LIKE_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_b - AR_LIKE_c) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_b - AR_LIKE_m) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_b - AR_LIKE_O) # E: Any + +reveal_type(AR_LIKE_u - AR_b) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(AR_LIKE_i - AR_b) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_LIKE_f - AR_b) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_LIKE_c - AR_b) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_LIKE_m - AR_b) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_LIKE_M - AR_b) # E: ndarray[Any, dtype[datetime64]] +reveal_type(AR_LIKE_O - AR_b) # E: Any + +reveal_type(AR_u - AR_LIKE_b) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(AR_u - AR_LIKE_u) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(AR_u - AR_LIKE_i) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_u - AR_LIKE_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_u - AR_LIKE_c) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_u - AR_LIKE_m) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_u - AR_LIKE_O) # E: Any + +reveal_type(AR_LIKE_b - AR_u) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(AR_LIKE_u - AR_u) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(AR_LIKE_i - AR_u) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_LIKE_f - AR_u) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_LIKE_c - AR_u) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_LIKE_m - AR_u) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_LIKE_M - AR_u) # E: ndarray[Any, dtype[datetime64]] +reveal_type(AR_LIKE_O - AR_u) # E: Any + +reveal_type(AR_i - AR_LIKE_b) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_i - AR_LIKE_u) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_i - AR_LIKE_i) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_i - AR_LIKE_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_i - AR_LIKE_c) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_i - AR_LIKE_m) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_i - AR_LIKE_O) # E: Any + +reveal_type(AR_LIKE_b - AR_i) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_LIKE_u - AR_i) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_LIKE_i - AR_i) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_LIKE_f - AR_i) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_LIKE_c - AR_i) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_LIKE_m - AR_i) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_LIKE_M - AR_i) # E: ndarray[Any, dtype[datetime64]] +reveal_type(AR_LIKE_O - AR_i) # E: Any + +reveal_type(AR_f - AR_LIKE_b) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_f - AR_LIKE_u) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_f - AR_LIKE_i) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_f - AR_LIKE_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_f - AR_LIKE_c) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_f - AR_LIKE_O) # E: Any + +reveal_type(AR_LIKE_b - AR_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_LIKE_u - AR_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_LIKE_i - AR_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_LIKE_f - AR_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_LIKE_c - AR_f) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_LIKE_O - AR_f) # E: Any + +reveal_type(AR_c - AR_LIKE_b) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_c - AR_LIKE_u) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_c - AR_LIKE_i) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_c - AR_LIKE_f) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_c - AR_LIKE_c) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_c - AR_LIKE_O) # E: Any + +reveal_type(AR_LIKE_b - AR_c) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_LIKE_u - AR_c) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_LIKE_i - AR_c) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_LIKE_f - AR_c) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_LIKE_c - AR_c) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(AR_LIKE_O - AR_c) # E: Any + +reveal_type(AR_m - AR_LIKE_b) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_m - AR_LIKE_u) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_m - AR_LIKE_i) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_m - AR_LIKE_m) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_m - AR_LIKE_O) # E: Any + +reveal_type(AR_LIKE_b - AR_m) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_LIKE_u - AR_m) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_LIKE_i - AR_m) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_LIKE_m - AR_m) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_LIKE_M - AR_m) # E: ndarray[Any, dtype[datetime64]] +reveal_type(AR_LIKE_O - AR_m) # E: Any + +reveal_type(AR_M - AR_LIKE_b) # E: ndarray[Any, dtype[datetime64]] +reveal_type(AR_M - AR_LIKE_u) # E: ndarray[Any, dtype[datetime64]] +reveal_type(AR_M - AR_LIKE_i) # E: ndarray[Any, dtype[datetime64]] +reveal_type(AR_M - AR_LIKE_m) # E: ndarray[Any, dtype[datetime64]] +reveal_type(AR_M - AR_LIKE_M) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_M - AR_LIKE_O) # E: Any + +reveal_type(AR_LIKE_M - AR_M) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_LIKE_O - AR_M) # E: Any + +reveal_type(AR_O - AR_LIKE_b) # E: Any +reveal_type(AR_O - AR_LIKE_u) # E: Any +reveal_type(AR_O - AR_LIKE_i) # E: Any +reveal_type(AR_O - AR_LIKE_f) # E: Any +reveal_type(AR_O - AR_LIKE_c) # E: Any +reveal_type(AR_O - AR_LIKE_m) # E: Any +reveal_type(AR_O - AR_LIKE_M) # E: Any +reveal_type(AR_O - AR_LIKE_O) # E: Any + +reveal_type(AR_LIKE_b - AR_O) # E: Any +reveal_type(AR_LIKE_u - AR_O) # E: Any +reveal_type(AR_LIKE_i - AR_O) # E: Any +reveal_type(AR_LIKE_f - AR_O) # E: Any +reveal_type(AR_LIKE_c - AR_O) # E: Any +reveal_type(AR_LIKE_m - AR_O) # E: Any +reveal_type(AR_LIKE_M - AR_O) # E: Any +reveal_type(AR_LIKE_O - AR_O) # E: Any + +# Array floor division + +reveal_type(AR_b // AR_LIKE_b) # E: ndarray[Any, dtype[{int8}]] +reveal_type(AR_b // AR_LIKE_u) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(AR_b // AR_LIKE_i) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_b // AR_LIKE_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_b // AR_LIKE_O) # E: Any + +reveal_type(AR_LIKE_b // AR_b) # E: ndarray[Any, dtype[{int8}]] +reveal_type(AR_LIKE_u // AR_b) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(AR_LIKE_i // AR_b) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_LIKE_f // AR_b) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_LIKE_O // AR_b) # E: Any + +reveal_type(AR_u // AR_LIKE_b) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(AR_u // AR_LIKE_u) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(AR_u // AR_LIKE_i) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_u // AR_LIKE_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_u // AR_LIKE_O) # E: Any + +reveal_type(AR_LIKE_b // AR_u) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(AR_LIKE_u // AR_u) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(AR_LIKE_i // AR_u) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_LIKE_f // AR_u) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_LIKE_m // AR_u) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_LIKE_O // AR_u) # E: Any + +reveal_type(AR_i // AR_LIKE_b) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_i // AR_LIKE_u) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_i // AR_LIKE_i) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_i // AR_LIKE_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_i // AR_LIKE_O) # E: Any + +reveal_type(AR_LIKE_b // AR_i) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_LIKE_u // AR_i) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_LIKE_i // AR_i) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(AR_LIKE_f // AR_i) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_LIKE_m // AR_i) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_LIKE_O // AR_i) # E: Any + +reveal_type(AR_f // AR_LIKE_b) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_f // AR_LIKE_u) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_f // AR_LIKE_i) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_f // AR_LIKE_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_f // AR_LIKE_O) # E: Any + +reveal_type(AR_LIKE_b // AR_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_LIKE_u // AR_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_LIKE_i // AR_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_LIKE_f // AR_f) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(AR_LIKE_m // AR_f) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_LIKE_O // AR_f) # E: Any + +reveal_type(AR_m // AR_LIKE_u) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_m // AR_LIKE_i) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_m // AR_LIKE_f) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(AR_m // AR_LIKE_m) # E: ndarray[Any, dtype[{int64}]] +reveal_type(AR_m // AR_LIKE_O) # E: Any + +reveal_type(AR_LIKE_m // AR_m) # E: ndarray[Any, dtype[{int64}]] +reveal_type(AR_LIKE_O // AR_m) # E: Any + +reveal_type(AR_O // AR_LIKE_b) # E: Any +reveal_type(AR_O // AR_LIKE_u) # E: Any +reveal_type(AR_O // AR_LIKE_i) # E: Any +reveal_type(AR_O // AR_LIKE_f) # E: Any +reveal_type(AR_O // AR_LIKE_m) # E: Any +reveal_type(AR_O // AR_LIKE_M) # E: Any +reveal_type(AR_O // AR_LIKE_O) # E: Any + +reveal_type(AR_LIKE_b // AR_O) # E: Any +reveal_type(AR_LIKE_u // AR_O) # E: Any +reveal_type(AR_LIKE_i // AR_O) # E: Any +reveal_type(AR_LIKE_f // AR_O) # E: Any +reveal_type(AR_LIKE_m // AR_O) # E: Any +reveal_type(AR_LIKE_M // AR_O) # E: Any +reveal_type(AR_LIKE_O // AR_O) # E: Any + +# unary ops + +reveal_type(-f16) # E: {float128} +reveal_type(-c16) # E: {complex128} +reveal_type(-c8) # E: {complex64} +reveal_type(-f8) # E: {float64} +reveal_type(-f4) # E: {float32} +reveal_type(-i8) # E: {int64} +reveal_type(-i4) # E: {int32} +reveal_type(-u8) # E: {uint64} +reveal_type(-u4) # E: {uint32} +reveal_type(-td) # E: timedelta64 +reveal_type(-AR_f) # E: Any + +reveal_type(+f16) # E: {float128} +reveal_type(+c16) # E: {complex128} +reveal_type(+c8) # E: {complex64} +reveal_type(+f8) # E: {float64} +reveal_type(+f4) # E: {float32} +reveal_type(+i8) # E: {int64} +reveal_type(+i4) # E: {int32} +reveal_type(+u8) # E: {uint64} +reveal_type(+u4) # E: {uint32} +reveal_type(+td) # E: timedelta64 +reveal_type(+AR_f) # E: Any + +reveal_type(abs(f16)) # E: {float128} +reveal_type(abs(c16)) # E: {float64} +reveal_type(abs(c8)) # E: {float32} +reveal_type(abs(f8)) # E: {float64} +reveal_type(abs(f4)) # E: {float32} +reveal_type(abs(i8)) # E: {int64} +reveal_type(abs(i4)) # E: {int32} +reveal_type(abs(u8)) # E: {uint64} +reveal_type(abs(u4)) # E: {uint32} +reveal_type(abs(td)) # E: timedelta64 +reveal_type(abs(b_)) # E: bool_ +reveal_type(abs(AR_f)) # E: Any + +# Time structures + +reveal_type(dt + td) # E: datetime64 +reveal_type(dt + i) # E: datetime64 +reveal_type(dt + i4) # E: datetime64 +reveal_type(dt + i8) # E: datetime64 +reveal_type(dt - dt) # E: timedelta64 +reveal_type(dt - i) # E: datetime64 +reveal_type(dt - i4) # E: datetime64 +reveal_type(dt - i8) # E: datetime64 + +reveal_type(td + td) # E: timedelta64 +reveal_type(td + i) # E: timedelta64 +reveal_type(td + i4) # E: timedelta64 +reveal_type(td + i8) # E: timedelta64 +reveal_type(td - td) # E: timedelta64 +reveal_type(td - i) # E: timedelta64 +reveal_type(td - i4) # E: timedelta64 +reveal_type(td - i8) # E: timedelta64 +reveal_type(td / f) # E: timedelta64 +reveal_type(td / f4) # E: timedelta64 +reveal_type(td / f8) # E: timedelta64 +reveal_type(td / td) # E: {float64} +reveal_type(td // td) # E: {int64} + +# boolean + +reveal_type(b_ / b) # E: {float64} +reveal_type(b_ / b_) # E: {float64} +reveal_type(b_ / i) # E: {float64} +reveal_type(b_ / i8) # E: {float64} +reveal_type(b_ / i4) # E: {float64} +reveal_type(b_ / u8) # E: {float64} +reveal_type(b_ / u4) # E: {float64} +reveal_type(b_ / f) # E: {float64} +reveal_type(b_ / f16) # E: {float128} +reveal_type(b_ / f8) # E: {float64} +reveal_type(b_ / f4) # E: {float32} +reveal_type(b_ / c) # E: {complex128} +reveal_type(b_ / c16) # E: {complex128} +reveal_type(b_ / c8) # E: {complex64} + +reveal_type(b / b_) # E: {float64} +reveal_type(b_ / b_) # E: {float64} +reveal_type(i / b_) # E: {float64} +reveal_type(i8 / b_) # E: {float64} +reveal_type(i4 / b_) # E: {float64} +reveal_type(u8 / b_) # E: {float64} +reveal_type(u4 / b_) # E: {float64} +reveal_type(f / b_) # E: {float64} +reveal_type(f16 / b_) # E: {float128} +reveal_type(f8 / b_) # E: {float64} +reveal_type(f4 / b_) # E: {float32} +reveal_type(c / b_) # E: {complex128} +reveal_type(c16 / b_) # E: {complex128} +reveal_type(c8 / b_) # E: {complex64} + +# Complex + +reveal_type(c16 + f16) # E: {complex256} +reveal_type(c16 + c16) # E: {complex128} +reveal_type(c16 + f8) # E: {complex128} +reveal_type(c16 + i8) # E: {complex128} +reveal_type(c16 + c8) # E: {complex128} +reveal_type(c16 + f4) # E: {complex128} +reveal_type(c16 + i4) # E: {complex128} +reveal_type(c16 + b_) # E: {complex128} +reveal_type(c16 + b) # E: {complex128} +reveal_type(c16 + c) # E: {complex128} +reveal_type(c16 + f) # E: {complex128} +reveal_type(c16 + i) # E: {complex128} +reveal_type(c16 + AR_f) # E: Any + +reveal_type(f16 + c16) # E: {complex256} +reveal_type(c16 + c16) # E: {complex128} +reveal_type(f8 + c16) # E: {complex128} +reveal_type(i8 + c16) # E: {complex128} +reveal_type(c8 + c16) # E: {complex128} +reveal_type(f4 + c16) # E: {complex128} +reveal_type(i4 + c16) # E: {complex128} +reveal_type(b_ + c16) # E: {complex128} +reveal_type(b + c16) # E: {complex128} +reveal_type(c + c16) # E: {complex128} +reveal_type(f + c16) # E: {complex128} +reveal_type(i + c16) # E: {complex128} +reveal_type(AR_f + c16) # E: Any + +reveal_type(c8 + f16) # E: {complex256} +reveal_type(c8 + c16) # E: {complex128} +reveal_type(c8 + f8) # E: {complex128} +reveal_type(c8 + i8) # E: {complex128} +reveal_type(c8 + c8) # E: {complex64} +reveal_type(c8 + f4) # E: {complex64} +reveal_type(c8 + i4) # E: {complex64} +reveal_type(c8 + b_) # E: {complex64} +reveal_type(c8 + b) # E: {complex64} +reveal_type(c8 + c) # E: {complex128} +reveal_type(c8 + f) # E: {complex128} +reveal_type(c8 + i) # E: complexfloating[{_NBitInt}, {_NBitInt}] +reveal_type(c8 + AR_f) # E: Any + +reveal_type(f16 + c8) # E: {complex256} +reveal_type(c16 + c8) # E: {complex128} +reveal_type(f8 + c8) # E: {complex128} +reveal_type(i8 + c8) # E: {complex128} +reveal_type(c8 + c8) # E: {complex64} +reveal_type(f4 + c8) # E: {complex64} +reveal_type(i4 + c8) # E: {complex64} +reveal_type(b_ + c8) # E: {complex64} +reveal_type(b + c8) # E: {complex64} +reveal_type(c + c8) # E: {complex128} +reveal_type(f + c8) # E: {complex128} +reveal_type(i + c8) # E: complexfloating[{_NBitInt}, {_NBitInt}] +reveal_type(AR_f + c8) # E: Any + +# Float + +reveal_type(f8 + f16) # E: {float128} +reveal_type(f8 + f8) # E: {float64} +reveal_type(f8 + i8) # E: {float64} +reveal_type(f8 + f4) # E: {float64} +reveal_type(f8 + i4) # E: {float64} +reveal_type(f8 + b_) # E: {float64} +reveal_type(f8 + b) # E: {float64} +reveal_type(f8 + c) # E: {complex128} +reveal_type(f8 + f) # E: {float64} +reveal_type(f8 + i) # E: {float64} +reveal_type(f8 + AR_f) # E: Any + +reveal_type(f16 + f8) # E: {float128} +reveal_type(f8 + f8) # E: {float64} +reveal_type(i8 + f8) # E: {float64} +reveal_type(f4 + f8) # E: {float64} +reveal_type(i4 + f8) # E: {float64} +reveal_type(b_ + f8) # E: {float64} +reveal_type(b + f8) # E: {float64} +reveal_type(c + f8) # E: {complex128} +reveal_type(f + f8) # E: {float64} +reveal_type(i + f8) # E: {float64} +reveal_type(AR_f + f8) # E: Any + +reveal_type(f4 + f16) # E: {float128} +reveal_type(f4 + f8) # E: {float64} +reveal_type(f4 + i8) # E: {float64} +reveal_type(f4 + f4) # E: {float32} +reveal_type(f4 + i4) # E: {float32} +reveal_type(f4 + b_) # E: {float32} +reveal_type(f4 + b) # E: {float32} +reveal_type(f4 + c) # E: {complex128} +reveal_type(f4 + f) # E: {float64} +reveal_type(f4 + i) # E: floating[{_NBitInt}] +reveal_type(f4 + AR_f) # E: Any + +reveal_type(f16 + f4) # E: {float128} +reveal_type(f8 + f4) # E: {float64} +reveal_type(i8 + f4) # E: {float64} +reveal_type(f4 + f4) # E: {float32} +reveal_type(i4 + f4) # E: {float32} +reveal_type(b_ + f4) # E: {float32} +reveal_type(b + f4) # E: {float32} +reveal_type(c + f4) # E: {complex128} +reveal_type(f + f4) # E: {float64} +reveal_type(i + f4) # E: floating[{_NBitInt}] +reveal_type(AR_f + f4) # E: Any + +# Int + +reveal_type(i8 + i8) # E: {int64} +reveal_type(i8 + u8) # E: Any +reveal_type(i8 + i4) # E: {int64} +reveal_type(i8 + u4) # E: Any +reveal_type(i8 + b_) # E: {int64} +reveal_type(i8 + b) # E: {int64} +reveal_type(i8 + c) # E: {complex128} +reveal_type(i8 + f) # E: {float64} +reveal_type(i8 + i) # E: {int64} +reveal_type(i8 + AR_f) # E: Any + +reveal_type(u8 + u8) # E: {uint64} +reveal_type(u8 + i4) # E: Any +reveal_type(u8 + u4) # E: {uint64} +reveal_type(u8 + b_) # E: {uint64} +reveal_type(u8 + b) # E: {uint64} +reveal_type(u8 + c) # E: {complex128} +reveal_type(u8 + f) # E: {float64} +reveal_type(u8 + i) # E: Any +reveal_type(u8 + AR_f) # E: Any + +reveal_type(i8 + i8) # E: {int64} +reveal_type(u8 + i8) # E: Any +reveal_type(i4 + i8) # E: {int64} +reveal_type(u4 + i8) # E: Any +reveal_type(b_ + i8) # E: {int64} +reveal_type(b + i8) # E: {int64} +reveal_type(c + i8) # E: {complex128} +reveal_type(f + i8) # E: {float64} +reveal_type(i + i8) # E: {int64} +reveal_type(AR_f + i8) # E: Any + +reveal_type(u8 + u8) # E: {uint64} +reveal_type(i4 + u8) # E: Any +reveal_type(u4 + u8) # E: {uint64} +reveal_type(b_ + u8) # E: {uint64} +reveal_type(b + u8) # E: {uint64} +reveal_type(c + u8) # E: {complex128} +reveal_type(f + u8) # E: {float64} +reveal_type(i + u8) # E: Any +reveal_type(AR_f + u8) # E: Any + +reveal_type(i4 + i8) # E: {int64} +reveal_type(i4 + i4) # E: {int32} +reveal_type(i4 + i) # E: {int_} +reveal_type(i4 + b_) # E: {int32} +reveal_type(i4 + b) # E: {int32} +reveal_type(i4 + AR_f) # E: Any + +reveal_type(u4 + i8) # E: Any +reveal_type(u4 + i4) # E: Any +reveal_type(u4 + u8) # E: {uint64} +reveal_type(u4 + u4) # E: {uint32} +reveal_type(u4 + i) # E: Any +reveal_type(u4 + b_) # E: {uint32} +reveal_type(u4 + b) # E: {uint32} +reveal_type(u4 + AR_f) # E: Any + +reveal_type(i8 + i4) # E: {int64} +reveal_type(i4 + i4) # E: {int32} +reveal_type(i + i4) # E: {int_} +reveal_type(b_ + i4) # E: {int32} +reveal_type(b + i4) # E: {int32} +reveal_type(AR_f + i4) # E: Any + +reveal_type(i8 + u4) # E: Any +reveal_type(i4 + u4) # E: Any +reveal_type(u8 + u4) # E: {uint64} +reveal_type(u4 + u4) # E: {uint32} +reveal_type(b_ + u4) # E: {uint32} +reveal_type(b + u4) # E: {uint32} +reveal_type(i + u4) # E: Any +reveal_type(AR_f + u4) # E: Any diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/array_constructors.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/array_constructors.pyi new file mode 100644 index 0000000..6df1c7c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/array_constructors.pyi @@ -0,0 +1,187 @@ +from typing import List, Any, TypeVar +from pathlib import Path + +import numpy as np +import numpy.typing as npt + +_SCT = TypeVar("_SCT", bound=np.generic, covariant=True) + +class SubClass(np.ndarray[Any, np.dtype[_SCT]]): ... + +i8: np.int64 + +A: npt.NDArray[np.float64] +B: SubClass[np.float64] +C: List[int] + +def func(i: int, j: int, **kwargs: Any) -> SubClass[np.float64]: ... + +reveal_type(np.empty_like(A)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.empty_like(B)) # E: SubClass[{float64}] +reveal_type(np.empty_like([1, 1.0])) # E: ndarray[Any, dtype[Any]] +reveal_type(np.empty_like(A, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.empty_like(A, dtype='c16')) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.array(A)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.array(B)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.array(B, subok=True)) # E: SubClass[{float64}] +reveal_type(np.array([1, 1.0])) # E: ndarray[Any, dtype[Any]] +reveal_type(np.array(A, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.array(A, dtype='c16')) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.zeros([1, 5, 6])) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.zeros([1, 5, 6], dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.zeros([1, 5, 6], dtype='c16')) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.empty([1, 5, 6])) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.empty([1, 5, 6], dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.empty([1, 5, 6], dtype='c16')) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.concatenate(A)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.concatenate([A, A])) # E: Any +reveal_type(np.concatenate([[1], A])) # E: ndarray[Any, dtype[Any]] +reveal_type(np.concatenate([[1], [1]])) # E: ndarray[Any, dtype[Any]] +reveal_type(np.concatenate((A, A))) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.concatenate(([1], [1]))) # E: ndarray[Any, dtype[Any]] +reveal_type(np.concatenate([1, 1.0])) # E: ndarray[Any, dtype[Any]] +reveal_type(np.concatenate(A, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.concatenate(A, dtype='c16')) # E: ndarray[Any, dtype[Any]] +reveal_type(np.concatenate([1, 1.0], out=A)) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(np.asarray(A)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.asarray(B)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.asarray([1, 1.0])) # E: ndarray[Any, dtype[Any]] +reveal_type(np.asarray(A, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.asarray(A, dtype='c16')) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.asanyarray(A)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.asanyarray(B)) # E: SubClass[{float64}] +reveal_type(np.asanyarray([1, 1.0])) # E: ndarray[Any, dtype[Any]] +reveal_type(np.asanyarray(A, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.asanyarray(A, dtype='c16')) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.ascontiguousarray(A)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.ascontiguousarray(B)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.ascontiguousarray([1, 1.0])) # E: ndarray[Any, dtype[Any]] +reveal_type(np.ascontiguousarray(A, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.ascontiguousarray(A, dtype='c16')) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.asfortranarray(A)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.asfortranarray(B)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.asfortranarray([1, 1.0])) # E: ndarray[Any, dtype[Any]] +reveal_type(np.asfortranarray(A, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.asfortranarray(A, dtype='c16')) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.fromstring("1 1 1", sep=" ")) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.fromstring(b"1 1 1", sep=" ")) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.fromstring("1 1 1", dtype=np.int64, sep=" ")) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.fromstring(b"1 1 1", dtype=np.int64, sep=" ")) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.fromstring("1 1 1", dtype="c16", sep=" ")) # E: ndarray[Any, dtype[Any]] +reveal_type(np.fromstring(b"1 1 1", dtype="c16", sep=" ")) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.fromfile("test.txt", sep=" ")) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.fromfile("test.txt", dtype=np.int64, sep=" ")) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.fromfile("test.txt", dtype="c16", sep=" ")) # E: ndarray[Any, dtype[Any]] +with open("test.txt") as f: + reveal_type(np.fromfile(f, sep=" ")) # E: ndarray[Any, dtype[{float64}]] + reveal_type(np.fromfile(b"test.txt", sep=" ")) # E: ndarray[Any, dtype[{float64}]] + reveal_type(np.fromfile(Path("test.txt"), sep=" ")) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(np.fromiter("12345", np.float64)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.fromiter("12345", float)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.frombuffer(A)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.frombuffer(A, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.frombuffer(A, dtype="c16")) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.arange(False, True)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.arange(10)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.arange(0, 10, step=2)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.arange(10.0)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.arange(start=0, stop=10.0)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.arange(np.timedelta64(0))) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(np.arange(0, np.timedelta64(10))) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(np.arange(np.datetime64("0"), np.datetime64("10"))) # E: ndarray[Any, dtype[datetime64]] +reveal_type(np.arange(10, dtype=np.float64)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.arange(0, 10, step=2, dtype=np.int16)) # E: ndarray[Any, dtype[{int16}]] +reveal_type(np.arange(10, dtype=int)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.arange(0, 10, dtype="f8")) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.require(A)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.require(B)) # E: SubClass[{float64}] +reveal_type(np.require(B, requirements=None)) # E: SubClass[{float64}] +reveal_type(np.require(B, dtype=int)) # E: ndarray[Any, Any] +reveal_type(np.require(B, requirements="E")) # E: ndarray[Any, Any] +reveal_type(np.require(B, requirements=["ENSUREARRAY"])) # E: ndarray[Any, Any] +reveal_type(np.require(B, requirements={"F", "E"})) # E: ndarray[Any, Any] +reveal_type(np.require(B, requirements=["C", "OWNDATA"])) # E: SubClass[{float64}] +reveal_type(np.require(B, requirements="W")) # E: SubClass[{float64}] +reveal_type(np.require(B, requirements="A")) # E: SubClass[{float64}] +reveal_type(np.require(C)) # E: ndarray[Any, Any] + +reveal_type(np.linspace(0, 10)) # E: ndarray[Any, Any] +reveal_type(np.linspace(0, 10, retstep=True)) # E: Tuple[ndarray[Any, Any], Any] +reveal_type(np.logspace(0, 10)) # E: ndarray[Any, Any] +reveal_type(np.geomspace(1, 10)) # E: ndarray[Any, Any] + +reveal_type(np.zeros_like(A)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.zeros_like(C)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.zeros_like(A, dtype=float)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.zeros_like(B)) # E: SubClass[{float64}] +reveal_type(np.zeros_like(B, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] + +reveal_type(np.ones_like(A)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.ones_like(C)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.ones_like(A, dtype=float)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.ones_like(B)) # E: SubClass[{float64}] +reveal_type(np.ones_like(B, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] + +reveal_type(np.full_like(A, i8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.full_like(C, i8)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.full_like(A, i8, dtype=int)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.full_like(B, i8)) # E: SubClass[{float64}] +reveal_type(np.full_like(B, i8, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] + +reveal_type(np.ones(1)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.ones([1, 1, 1])) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.ones(5, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.ones(5, dtype=int)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.full(1, i8)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.full([1, 1, 1], i8)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.full(1, i8, dtype=np.float64)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.full(1, i8, dtype=float)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.indices([1, 2, 3])) # E: ndarray[Any, dtype[{int_}]] +reveal_type(np.indices([1, 2, 3], sparse=True)) # E: tuple[ndarray[Any, dtype[{int_}]]] + +reveal_type(np.fromfunction(func, (3, 5))) # E: SubClass[{float64}] + +reveal_type(np.identity(10)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.identity(10, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.identity(10, dtype=int)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.atleast_1d(A)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.atleast_1d(C)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.atleast_1d(A, A)) # E: list[ndarray[Any, dtype[Any]]] +reveal_type(np.atleast_1d(A, C)) # E: list[ndarray[Any, dtype[Any]]] +reveal_type(np.atleast_1d(C, C)) # E: list[ndarray[Any, dtype[Any]]] + +reveal_type(np.atleast_2d(A)) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(np.atleast_3d(A)) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(np.vstack([A, A])) # E: ndarray[Any, Any] +reveal_type(np.vstack([A, C])) # E: ndarray[Any, dtype[Any]] +reveal_type(np.vstack([C, C])) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.hstack([A, A])) # E: ndarray[Any, Any] + +reveal_type(np.stack([A, A])) # E: Any +reveal_type(np.stack([A, C])) # E: ndarray[Any, dtype[Any]] +reveal_type(np.stack([C, C])) # E: ndarray[Any, dtype[Any]] +reveal_type(np.stack([A, A], axis=0)) # E: Any +reveal_type(np.stack([A, A], out=B)) # E: SubClass[{float64}] + +reveal_type(np.block([[A, A], [A, A]])) # E: ndarray[Any, Any] +reveal_type(np.block(C)) # E: ndarray[Any, dtype[Any]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arraypad.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arraypad.pyi new file mode 100644 index 0000000..995f82b --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arraypad.pyi @@ -0,0 +1,21 @@ +from typing import List, Any, Mapping, Tuple, SupportsIndex + +import numpy as np +import numpy.typing as npt + +def mode_func( + ar: npt.NDArray[np.number[Any]], + width: Tuple[int, int], + iaxis: SupportsIndex, + kwargs: Mapping[str, Any], +) -> None: ... + +AR_i8: npt.NDArray[np.int64] +AR_f8: npt.NDArray[np.float64] +AR_LIKE: List[int] + +reveal_type(np.pad(AR_i8, (2, 3), "constant")) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.pad(AR_LIKE, (2, 3), "constant")) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.pad(AR_f8, (2, 3), mode_func)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.pad(AR_f8, (2, 3), mode_func, a=1, b=2)) # E: ndarray[Any, dtype[{float64}]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arrayprint.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arrayprint.pyi new file mode 100644 index 0000000..e797097 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arrayprint.pyi @@ -0,0 +1,19 @@ +from typing import Any, Callable +import numpy as np + +AR: np.ndarray[Any, Any] +func_float: Callable[[np.floating[Any]], str] +func_int: Callable[[np.integer[Any]], str] + +reveal_type(np.get_printoptions()) # E: TypedDict +reveal_type(np.array2string( # E: str + AR, formatter={'float_kind': func_float, 'int_kind': func_int} +)) +reveal_type(np.format_float_scientific(1.0)) # E: str +reveal_type(np.format_float_positional(1)) # E: str +reveal_type(np.array_repr(AR)) # E: str +reveal_type(np.array_str(AR)) # E: str + +reveal_type(np.printoptions()) # E: contextlib._GeneratorContextManager +with np.printoptions() as dct: + reveal_type(dct) # E: TypedDict diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arraysetops.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arraysetops.pyi new file mode 100644 index 0000000..9deff8a --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arraysetops.pyi @@ -0,0 +1,60 @@ +import numpy as np +import numpy.typing as npt + +AR_b: npt.NDArray[np.bool_] +AR_i8: npt.NDArray[np.int64] +AR_f8: npt.NDArray[np.float64] +AR_M: npt.NDArray[np.datetime64] +AR_O: npt.NDArray[np.object_] + +AR_LIKE_f8: list[float] + +reveal_type(np.ediff1d(AR_b)) # E: ndarray[Any, dtype[{int8}]] +reveal_type(np.ediff1d(AR_i8, to_end=[1, 2, 3])) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.ediff1d(AR_M)) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(np.ediff1d(AR_O)) # E: ndarray[Any, dtype[object_]] +reveal_type(np.ediff1d(AR_LIKE_f8, to_begin=[1, 1.5])) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.intersect1d(AR_i8, AR_i8)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.intersect1d(AR_M, AR_M, assume_unique=True)) # E: ndarray[Any, dtype[datetime64]] +reveal_type(np.intersect1d(AR_f8, AR_i8)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.intersect1d(AR_f8, AR_f8, return_indices=True)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{intp}]], ndarray[Any, dtype[{intp}]]] + +reveal_type(np.setxor1d(AR_i8, AR_i8)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.setxor1d(AR_M, AR_M, assume_unique=True)) # E: ndarray[Any, dtype[datetime64]] +reveal_type(np.setxor1d(AR_f8, AR_i8)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.in1d(AR_i8, AR_i8)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.in1d(AR_M, AR_M, assume_unique=True)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.in1d(AR_f8, AR_i8)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.in1d(AR_f8, AR_LIKE_f8, invert=True)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.isin(AR_i8, AR_i8)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.isin(AR_M, AR_M, assume_unique=True)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.isin(AR_f8, AR_i8)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.isin(AR_f8, AR_LIKE_f8, invert=True)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.union1d(AR_i8, AR_i8)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.union1d(AR_M, AR_M)) # E: ndarray[Any, dtype[datetime64]] +reveal_type(np.union1d(AR_f8, AR_i8)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.setdiff1d(AR_i8, AR_i8)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.setdiff1d(AR_M, AR_M, assume_unique=True)) # E: ndarray[Any, dtype[datetime64]] +reveal_type(np.setdiff1d(AR_f8, AR_i8)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.unique(AR_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.unique(AR_LIKE_f8, axis=0)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.unique(AR_f8, return_index=True)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{intp}]]] +reveal_type(np.unique(AR_LIKE_f8, return_index=True)) # E: Tuple[ndarray[Any, dtype[Any]], ndarray[Any, dtype[{intp}]]] +reveal_type(np.unique(AR_f8, return_inverse=True)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{intp}]]] +reveal_type(np.unique(AR_LIKE_f8, return_inverse=True)) # E: Tuple[ndarray[Any, dtype[Any]], ndarray[Any, dtype[{intp}]]] +reveal_type(np.unique(AR_f8, return_counts=True)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{intp}]]] +reveal_type(np.unique(AR_LIKE_f8, return_counts=True)) # E: Tuple[ndarray[Any, dtype[Any]], ndarray[Any, dtype[{intp}]]] +reveal_type(np.unique(AR_f8, return_index=True, return_inverse=True)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{intp}]], ndarray[Any, dtype[{intp}]]] +reveal_type(np.unique(AR_LIKE_f8, return_index=True, return_inverse=True)) # E: Tuple[ndarray[Any, dtype[Any]], ndarray[Any, dtype[{intp}]], ndarray[Any, dtype[{intp}]]] +reveal_type(np.unique(AR_f8, return_index=True, return_counts=True)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{intp}]], ndarray[Any, dtype[{intp}]]] +reveal_type(np.unique(AR_LIKE_f8, return_index=True, return_counts=True)) # E: Tuple[ndarray[Any, dtype[Any]], ndarray[Any, dtype[{intp}]], ndarray[Any, dtype[{intp}]]] +reveal_type(np.unique(AR_f8, return_inverse=True, return_counts=True)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{intp}]], ndarray[Any, dtype[{intp}]]] +reveal_type(np.unique(AR_LIKE_f8, return_inverse=True, return_counts=True)) # E: Tuple[ndarray[Any, dtype[Any]], ndarray[Any, dtype[{intp}]], ndarray[Any, dtype[{intp}]]] +reveal_type(np.unique(AR_f8, return_index=True, return_inverse=True, return_counts=True)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{intp}]], ndarray[Any, dtype[{intp}]], ndarray[Any, dtype[{intp}]]] +reveal_type(np.unique(AR_LIKE_f8, return_index=True, return_inverse=True, return_counts=True)) # E: Tuple[ndarray[Any, dtype[Any]], ndarray[Any, dtype[{intp}]], ndarray[Any, dtype[{intp}]], ndarray[Any, dtype[{intp}]]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arrayterator.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arrayterator.pyi new file mode 100644 index 0000000..2dab9d0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/arrayterator.pyi @@ -0,0 +1,24 @@ +from typing import Any +import numpy as np + +AR_i8: np.ndarray[Any, np.dtype[np.int64]] +ar_iter = np.lib.Arrayterator(AR_i8) + +reveal_type(ar_iter.var) # E: ndarray[Any, dtype[{int64}]] +reveal_type(ar_iter.buf_size) # E: Union[None, builtins.int] +reveal_type(ar_iter.start) # E: builtins.list[builtins.int] +reveal_type(ar_iter.stop) # E: builtins.list[builtins.int] +reveal_type(ar_iter.step) # E: builtins.list[builtins.int] +reveal_type(ar_iter.shape) # E: builtins.tuple[builtins.int] +reveal_type(ar_iter.flat) # E: typing.Generator[{int64}, None, None] + +reveal_type(ar_iter.__array__()) # E: ndarray[Any, dtype[{int64}]] + +for i in ar_iter: + reveal_type(i) # E: ndarray[Any, dtype[{int64}]] + +reveal_type(ar_iter[0]) # E: lib.arrayterator.Arrayterator[Any, dtype[{int64}]] +reveal_type(ar_iter[...]) # E: lib.arrayterator.Arrayterator[Any, dtype[{int64}]] +reveal_type(ar_iter[:]) # E: lib.arrayterator.Arrayterator[Any, dtype[{int64}]] +reveal_type(ar_iter[0, 0, 0]) # E: lib.arrayterator.Arrayterator[Any, dtype[{int64}]] +reveal_type(ar_iter[..., 0, :]) # E: lib.arrayterator.Arrayterator[Any, dtype[{int64}]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/bitwise_ops.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/bitwise_ops.pyi new file mode 100644 index 0000000..f293ef6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/bitwise_ops.pyi @@ -0,0 +1,131 @@ +import numpy as np + +i8 = np.int64(1) +u8 = np.uint64(1) + +i4 = np.int32(1) +u4 = np.uint32(1) + +b_ = np.bool_(1) + +b = bool(1) +i = int(1) + +AR = np.array([0, 1, 2], dtype=np.int32) +AR.setflags(write=False) + + +reveal_type(i8 << i8) # E: {int64} +reveal_type(i8 >> i8) # E: {int64} +reveal_type(i8 | i8) # E: {int64} +reveal_type(i8 ^ i8) # E: {int64} +reveal_type(i8 & i8) # E: {int64} + +reveal_type(i8 << AR) # E: Any +reveal_type(i8 >> AR) # E: Any +reveal_type(i8 | AR) # E: Any +reveal_type(i8 ^ AR) # E: Any +reveal_type(i8 & AR) # E: Any + +reveal_type(i4 << i4) # E: {int32} +reveal_type(i4 >> i4) # E: {int32} +reveal_type(i4 | i4) # E: {int32} +reveal_type(i4 ^ i4) # E: {int32} +reveal_type(i4 & i4) # E: {int32} + +reveal_type(i8 << i4) # E: {int64} +reveal_type(i8 >> i4) # E: {int64} +reveal_type(i8 | i4) # E: {int64} +reveal_type(i8 ^ i4) # E: {int64} +reveal_type(i8 & i4) # E: {int64} + +reveal_type(i8 << i) # E: {int64} +reveal_type(i8 >> i) # E: {int64} +reveal_type(i8 | i) # E: {int64} +reveal_type(i8 ^ i) # E: {int64} +reveal_type(i8 & i) # E: {int64} + +reveal_type(i8 << b_) # E: {int64} +reveal_type(i8 >> b_) # E: {int64} +reveal_type(i8 | b_) # E: {int64} +reveal_type(i8 ^ b_) # E: {int64} +reveal_type(i8 & b_) # E: {int64} + +reveal_type(i8 << b) # E: {int64} +reveal_type(i8 >> b) # E: {int64} +reveal_type(i8 | b) # E: {int64} +reveal_type(i8 ^ b) # E: {int64} +reveal_type(i8 & b) # E: {int64} + +reveal_type(u8 << u8) # E: {uint64} +reveal_type(u8 >> u8) # E: {uint64} +reveal_type(u8 | u8) # E: {uint64} +reveal_type(u8 ^ u8) # E: {uint64} +reveal_type(u8 & u8) # E: {uint64} + +reveal_type(u8 << AR) # E: Any +reveal_type(u8 >> AR) # E: Any +reveal_type(u8 | AR) # E: Any +reveal_type(u8 ^ AR) # E: Any +reveal_type(u8 & AR) # E: Any + +reveal_type(u4 << u4) # E: {uint32} +reveal_type(u4 >> u4) # E: {uint32} +reveal_type(u4 | u4) # E: {uint32} +reveal_type(u4 ^ u4) # E: {uint32} +reveal_type(u4 & u4) # E: {uint32} + +reveal_type(u4 << i4) # E: signedinteger[Any] +reveal_type(u4 >> i4) # E: signedinteger[Any] +reveal_type(u4 | i4) # E: signedinteger[Any] +reveal_type(u4 ^ i4) # E: signedinteger[Any] +reveal_type(u4 & i4) # E: signedinteger[Any] + +reveal_type(u4 << i) # E: signedinteger[Any] +reveal_type(u4 >> i) # E: signedinteger[Any] +reveal_type(u4 | i) # E: signedinteger[Any] +reveal_type(u4 ^ i) # E: signedinteger[Any] +reveal_type(u4 & i) # E: signedinteger[Any] + +reveal_type(u8 << b_) # E: {uint64} +reveal_type(u8 >> b_) # E: {uint64} +reveal_type(u8 | b_) # E: {uint64} +reveal_type(u8 ^ b_) # E: {uint64} +reveal_type(u8 & b_) # E: {uint64} + +reveal_type(u8 << b) # E: {uint64} +reveal_type(u8 >> b) # E: {uint64} +reveal_type(u8 | b) # E: {uint64} +reveal_type(u8 ^ b) # E: {uint64} +reveal_type(u8 & b) # E: {uint64} + +reveal_type(b_ << b_) # E: {int8} +reveal_type(b_ >> b_) # E: {int8} +reveal_type(b_ | b_) # E: bool_ +reveal_type(b_ ^ b_) # E: bool_ +reveal_type(b_ & b_) # E: bool_ + +reveal_type(b_ << AR) # E: Any +reveal_type(b_ >> AR) # E: Any +reveal_type(b_ | AR) # E: Any +reveal_type(b_ ^ AR) # E: Any +reveal_type(b_ & AR) # E: Any + +reveal_type(b_ << b) # E: {int8} +reveal_type(b_ >> b) # E: {int8} +reveal_type(b_ | b) # E: bool_ +reveal_type(b_ ^ b) # E: bool_ +reveal_type(b_ & b) # E: bool_ + +reveal_type(b_ << i) # E: {int_} +reveal_type(b_ >> i) # E: {int_} +reveal_type(b_ | i) # E: {int_} +reveal_type(b_ ^ i) # E: {int_} +reveal_type(b_ & i) # E: {int_} + +reveal_type(~i8) # E: {int64} +reveal_type(~i4) # E: {int32} +reveal_type(~u8) # E: {uint64} +reveal_type(~u4) # E: {uint32} +reveal_type(~b_) # E: bool_ +reveal_type(~AR) # E: Any diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/char.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/char.pyi new file mode 100644 index 0000000..ce8c1b2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/char.pyi @@ -0,0 +1,147 @@ +import numpy as np +import numpy.typing as npt +from typing import Sequence + +AR_U: npt.NDArray[np.str_] +AR_S: npt.NDArray[np.bytes_] + +reveal_type(np.char.equal(AR_U, AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.equal(AR_S, AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.not_equal(AR_U, AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.not_equal(AR_S, AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.greater_equal(AR_U, AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.greater_equal(AR_S, AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.less_equal(AR_U, AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.less_equal(AR_S, AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.greater(AR_U, AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.greater(AR_S, AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.less(AR_U, AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.less(AR_S, AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.multiply(AR_U, 5)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.multiply(AR_S, [5, 4, 3])) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(np.char.mod(AR_U, "test")) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.mod(AR_S, "test")) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(np.char.capitalize(AR_U)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.capitalize(AR_S)) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(np.char.center(AR_U, 5)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.center(AR_S, [2, 3, 4], b"a")) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(np.char.encode(AR_U)) # E: ndarray[Any, dtype[bytes_]] +reveal_type(np.char.decode(AR_S)) # E: ndarray[Any, dtype[str_]] + +reveal_type(np.char.expandtabs(AR_U)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.expandtabs(AR_S, tabsize=4)) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(np.char.join(AR_U, "_")) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.join(AR_S, [b"_", b""])) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(np.char.ljust(AR_U, 5)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.ljust(AR_S, [4, 3, 1], fillchar=[b"a", b"b", b"c"])) # E: ndarray[Any, dtype[bytes_]] +reveal_type(np.char.rjust(AR_U, 5)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.rjust(AR_S, [4, 3, 1], fillchar=[b"a", b"b", b"c"])) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(np.char.lstrip(AR_U)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.lstrip(AR_S, chars=b"_")) # E: ndarray[Any, dtype[bytes_]] +reveal_type(np.char.rstrip(AR_U)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.rstrip(AR_S, chars=b"_")) # E: ndarray[Any, dtype[bytes_]] +reveal_type(np.char.strip(AR_U)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.strip(AR_S, chars=b"_")) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(np.char.partition(AR_U, "\n")) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.partition(AR_S, [b"a", b"b", b"c"])) # E: ndarray[Any, dtype[bytes_]] +reveal_type(np.char.rpartition(AR_U, "\n")) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.rpartition(AR_S, [b"a", b"b", b"c"])) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(np.char.replace(AR_U, "_", "-")) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.replace(AR_S, [b"_", b""], [b"a", b"b"])) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(np.char.split(AR_U, "_")) # E: ndarray[Any, dtype[object_]] +reveal_type(np.char.split(AR_S, maxsplit=[1, 2, 3])) # E: ndarray[Any, dtype[object_]] +reveal_type(np.char.rsplit(AR_U, "_")) # E: ndarray[Any, dtype[object_]] +reveal_type(np.char.rsplit(AR_S, maxsplit=[1, 2, 3])) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.char.splitlines(AR_U)) # E: ndarray[Any, dtype[object_]] +reveal_type(np.char.splitlines(AR_S, keepends=[True, True, False])) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.char.swapcase(AR_U)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.swapcase(AR_S)) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(np.char.title(AR_U)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.title(AR_S)) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(np.char.upper(AR_U)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.upper(AR_S)) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(np.char.zfill(AR_U, 5)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.char.zfill(AR_S, [2, 3, 4])) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(np.char.count(AR_U, "a", start=[1, 2, 3])) # E: ndarray[Any, dtype[{int_}]] +reveal_type(np.char.count(AR_S, [b"a", b"b", b"c"], end=9)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(np.char.endswith(AR_U, "a", start=[1, 2, 3])) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.endswith(AR_S, [b"a", b"b", b"c"], end=9)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.startswith(AR_U, "a", start=[1, 2, 3])) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.startswith(AR_S, [b"a", b"b", b"c"], end=9)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.find(AR_U, "a", start=[1, 2, 3])) # E: ndarray[Any, dtype[{int_}]] +reveal_type(np.char.find(AR_S, [b"a", b"b", b"c"], end=9)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(np.char.rfind(AR_U, "a", start=[1, 2, 3])) # E: ndarray[Any, dtype[{int_}]] +reveal_type(np.char.rfind(AR_S, [b"a", b"b", b"c"], end=9)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(np.char.index(AR_U, "a", start=[1, 2, 3])) # E: ndarray[Any, dtype[{int_}]] +reveal_type(np.char.index(AR_S, [b"a", b"b", b"c"], end=9)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(np.char.rindex(AR_U, "a", start=[1, 2, 3])) # E: ndarray[Any, dtype[{int_}]] +reveal_type(np.char.rindex(AR_S, [b"a", b"b", b"c"], end=9)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(np.char.isalpha(AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.isalpha(AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.isalnum(AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.isalnum(AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.isdecimal(AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.isdecimal(AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.isdigit(AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.isdigit(AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.islower(AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.islower(AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.isnumeric(AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.isnumeric(AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.isspace(AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.isspace(AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.istitle(AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.istitle(AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.isupper(AR_U)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.char.isupper(AR_S)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.char.str_len(AR_U)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(np.char.str_len(AR_S)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(np.char.array(AR_U)) # E: chararray[Any, dtype[str_]] +reveal_type(np.char.array(AR_S, order="K")) # E: chararray[Any, dtype[bytes_]] +reveal_type(np.char.array("bob", copy=True)) # E: chararray[Any, dtype[str_]] +reveal_type(np.char.array(b"bob", itemsize=5)) # E: chararray[Any, dtype[bytes_]] +reveal_type(np.char.array(1, unicode=False)) # E: chararray[Any, dtype[bytes_]] +reveal_type(np.char.array(1, unicode=True)) # E: chararray[Any, dtype[str_]] + +reveal_type(np.char.asarray(AR_U)) # E: chararray[Any, dtype[str_]] +reveal_type(np.char.asarray(AR_S, order="K")) # E: chararray[Any, dtype[bytes_]] +reveal_type(np.char.asarray("bob")) # E: chararray[Any, dtype[str_]] +reveal_type(np.char.asarray(b"bob", itemsize=5)) # E: chararray[Any, dtype[bytes_]] +reveal_type(np.char.asarray(1, unicode=False)) # E: chararray[Any, dtype[bytes_]] +reveal_type(np.char.asarray(1, unicode=True)) # E: chararray[Any, dtype[str_]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/chararray.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/chararray.pyi new file mode 100644 index 0000000..3da2e15 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/chararray.pyi @@ -0,0 +1,129 @@ +import numpy as np +from typing import Any + +AR_U: np.chararray[Any, np.dtype[np.str_]] +AR_S: np.chararray[Any, np.dtype[np.bytes_]] + +reveal_type(AR_U == AR_U) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S == AR_S) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U != AR_U) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S != AR_S) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U >= AR_U) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S >= AR_S) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U <= AR_U) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S <= AR_S) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U > AR_U) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S > AR_S) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U < AR_U) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S < AR_S) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U * 5) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S * [5]) # E: chararray[Any, dtype[bytes_]] + +reveal_type(AR_U % "test") # E: chararray[Any, dtype[str_]] +reveal_type(AR_S % b"test") # E: chararray[Any, dtype[bytes_]] + +reveal_type(AR_U.capitalize()) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.capitalize()) # E: chararray[Any, dtype[bytes_]] + +reveal_type(AR_U.center(5)) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.center([2, 3, 4], b"a")) # E: chararray[Any, dtype[bytes_]] + +reveal_type(AR_U.encode()) # E: chararray[Any, dtype[bytes_]] +reveal_type(AR_S.decode()) # E: chararray[Any, dtype[str_]] + +reveal_type(AR_U.expandtabs()) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.expandtabs(tabsize=4)) # E: chararray[Any, dtype[bytes_]] + +reveal_type(AR_U.join("_")) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.join([b"_", b""])) # E: chararray[Any, dtype[bytes_]] + +reveal_type(AR_U.ljust(5)) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.ljust([4, 3, 1], fillchar=[b"a", b"b", b"c"])) # E: chararray[Any, dtype[bytes_]] +reveal_type(AR_U.rjust(5)) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.rjust([4, 3, 1], fillchar=[b"a", b"b", b"c"])) # E: chararray[Any, dtype[bytes_]] + +reveal_type(AR_U.lstrip()) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.lstrip(chars=b"_")) # E: chararray[Any, dtype[bytes_]] +reveal_type(AR_U.rstrip()) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.rstrip(chars=b"_")) # E: chararray[Any, dtype[bytes_]] +reveal_type(AR_U.strip()) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.strip(chars=b"_")) # E: chararray[Any, dtype[bytes_]] + +reveal_type(AR_U.partition("\n")) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.partition([b"a", b"b", b"c"])) # E: chararray[Any, dtype[bytes_]] +reveal_type(AR_U.rpartition("\n")) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.rpartition([b"a", b"b", b"c"])) # E: chararray[Any, dtype[bytes_]] + +reveal_type(AR_U.replace("_", "-")) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.replace([b"_", b""], [b"a", b"b"])) # E: chararray[Any, dtype[bytes_]] + +reveal_type(AR_U.split("_")) # E: ndarray[Any, dtype[object_]] +reveal_type(AR_S.split(maxsplit=[1, 2, 3])) # E: ndarray[Any, dtype[object_]] +reveal_type(AR_U.rsplit("_")) # E: ndarray[Any, dtype[object_]] +reveal_type(AR_S.rsplit(maxsplit=[1, 2, 3])) # E: ndarray[Any, dtype[object_]] + +reveal_type(AR_U.splitlines()) # E: ndarray[Any, dtype[object_]] +reveal_type(AR_S.splitlines(keepends=[True, True, False])) # E: ndarray[Any, dtype[object_]] + +reveal_type(AR_U.swapcase()) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.swapcase()) # E: chararray[Any, dtype[bytes_]] + +reveal_type(AR_U.title()) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.title()) # E: chararray[Any, dtype[bytes_]] + +reveal_type(AR_U.upper()) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.upper()) # E: chararray[Any, dtype[bytes_]] + +reveal_type(AR_U.zfill(5)) # E: chararray[Any, dtype[str_]] +reveal_type(AR_S.zfill([2, 3, 4])) # E: chararray[Any, dtype[bytes_]] + +reveal_type(AR_U.count("a", start=[1, 2, 3])) # E: ndarray[Any, dtype[{int_}]] +reveal_type(AR_S.count([b"a", b"b", b"c"], end=9)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(AR_U.endswith("a", start=[1, 2, 3])) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S.endswith([b"a", b"b", b"c"], end=9)) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_U.startswith("a", start=[1, 2, 3])) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S.startswith([b"a", b"b", b"c"], end=9)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U.find("a", start=[1, 2, 3])) # E: ndarray[Any, dtype[{int_}]] +reveal_type(AR_S.find([b"a", b"b", b"c"], end=9)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(AR_U.rfind("a", start=[1, 2, 3])) # E: ndarray[Any, dtype[{int_}]] +reveal_type(AR_S.rfind([b"a", b"b", b"c"], end=9)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(AR_U.index("a", start=[1, 2, 3])) # E: ndarray[Any, dtype[{int_}]] +reveal_type(AR_S.index([b"a", b"b", b"c"], end=9)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(AR_U.rindex("a", start=[1, 2, 3])) # E: ndarray[Any, dtype[{int_}]] +reveal_type(AR_S.rindex([b"a", b"b", b"c"], end=9)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(AR_U.isalpha()) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S.isalpha()) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U.isalnum()) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S.isalnum()) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U.isdecimal()) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S.isdecimal()) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U.isdigit()) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S.isdigit()) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U.islower()) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S.islower()) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U.isnumeric()) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S.isnumeric()) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U.isspace()) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S.isspace()) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U.istitle()) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S.istitle()) # E: ndarray[Any, dtype[bool_]] + +reveal_type(AR_U.isupper()) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR_S.isupper()) # E: ndarray[Any, dtype[bool_]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/comparisons.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/comparisons.pyi new file mode 100644 index 0000000..ecd8ea6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/comparisons.pyi @@ -0,0 +1,252 @@ +import numpy as np + +c16 = np.complex128() +f8 = np.float64() +i8 = np.int64() +u8 = np.uint64() + +c8 = np.complex64() +f4 = np.float32() +i4 = np.int32() +u4 = np.uint32() + +dt = np.datetime64(0, "D") +td = np.timedelta64(0, "D") + +b_ = np.bool_() + +b = bool() +c = complex() +f = float() +i = int() + +AR = np.array([0], dtype=np.int64) +AR.setflags(write=False) + +SEQ = (0, 1, 2, 3, 4) + +# Time structures + +reveal_type(dt > dt) # E: bool_ + +reveal_type(td > td) # E: bool_ +reveal_type(td > i) # E: bool_ +reveal_type(td > i4) # E: bool_ +reveal_type(td > i8) # E: bool_ + +reveal_type(td > AR) # E: ndarray[Any, dtype[bool_]] +reveal_type(td > SEQ) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR > SEQ) # E: ndarray[Any, dtype[bool_]] +reveal_type(AR > td) # E: ndarray[Any, dtype[bool_]] +reveal_type(SEQ > td) # E: ndarray[Any, dtype[bool_]] +reveal_type(SEQ > AR) # E: ndarray[Any, dtype[bool_]] + +# boolean + +reveal_type(b_ > b) # E: bool_ +reveal_type(b_ > b_) # E: bool_ +reveal_type(b_ > i) # E: bool_ +reveal_type(b_ > i8) # E: bool_ +reveal_type(b_ > i4) # E: bool_ +reveal_type(b_ > u8) # E: bool_ +reveal_type(b_ > u4) # E: bool_ +reveal_type(b_ > f) # E: bool_ +reveal_type(b_ > f8) # E: bool_ +reveal_type(b_ > f4) # E: bool_ +reveal_type(b_ > c) # E: bool_ +reveal_type(b_ > c16) # E: bool_ +reveal_type(b_ > c8) # E: bool_ +reveal_type(b_ > AR) # E: ndarray[Any, dtype[bool_]] +reveal_type(b_ > SEQ) # E: ndarray[Any, dtype[bool_]] + +# Complex + +reveal_type(c16 > c16) # E: bool_ +reveal_type(c16 > f8) # E: bool_ +reveal_type(c16 > i8) # E: bool_ +reveal_type(c16 > c8) # E: bool_ +reveal_type(c16 > f4) # E: bool_ +reveal_type(c16 > i4) # E: bool_ +reveal_type(c16 > b_) # E: bool_ +reveal_type(c16 > b) # E: bool_ +reveal_type(c16 > c) # E: bool_ +reveal_type(c16 > f) # E: bool_ +reveal_type(c16 > i) # E: bool_ +reveal_type(c16 > AR) # E: ndarray[Any, dtype[bool_]] +reveal_type(c16 > SEQ) # E: ndarray[Any, dtype[bool_]] + +reveal_type(c16 > c16) # E: bool_ +reveal_type(f8 > c16) # E: bool_ +reveal_type(i8 > c16) # E: bool_ +reveal_type(c8 > c16) # E: bool_ +reveal_type(f4 > c16) # E: bool_ +reveal_type(i4 > c16) # E: bool_ +reveal_type(b_ > c16) # E: bool_ +reveal_type(b > c16) # E: bool_ +reveal_type(c > c16) # E: bool_ +reveal_type(f > c16) # E: bool_ +reveal_type(i > c16) # E: bool_ +reveal_type(AR > c16) # E: ndarray[Any, dtype[bool_]] +reveal_type(SEQ > c16) # E: ndarray[Any, dtype[bool_]] + +reveal_type(c8 > c16) # E: bool_ +reveal_type(c8 > f8) # E: bool_ +reveal_type(c8 > i8) # E: bool_ +reveal_type(c8 > c8) # E: bool_ +reveal_type(c8 > f4) # E: bool_ +reveal_type(c8 > i4) # E: bool_ +reveal_type(c8 > b_) # E: bool_ +reveal_type(c8 > b) # E: bool_ +reveal_type(c8 > c) # E: bool_ +reveal_type(c8 > f) # E: bool_ +reveal_type(c8 > i) # E: bool_ +reveal_type(c8 > AR) # E: ndarray[Any, dtype[bool_]] +reveal_type(c8 > SEQ) # E: ndarray[Any, dtype[bool_]] + +reveal_type(c16 > c8) # E: bool_ +reveal_type(f8 > c8) # E: bool_ +reveal_type(i8 > c8) # E: bool_ +reveal_type(c8 > c8) # E: bool_ +reveal_type(f4 > c8) # E: bool_ +reveal_type(i4 > c8) # E: bool_ +reveal_type(b_ > c8) # E: bool_ +reveal_type(b > c8) # E: bool_ +reveal_type(c > c8) # E: bool_ +reveal_type(f > c8) # E: bool_ +reveal_type(i > c8) # E: bool_ +reveal_type(AR > c8) # E: ndarray[Any, dtype[bool_]] +reveal_type(SEQ > c8) # E: ndarray[Any, dtype[bool_]] + +# Float + +reveal_type(f8 > f8) # E: bool_ +reveal_type(f8 > i8) # E: bool_ +reveal_type(f8 > f4) # E: bool_ +reveal_type(f8 > i4) # E: bool_ +reveal_type(f8 > b_) # E: bool_ +reveal_type(f8 > b) # E: bool_ +reveal_type(f8 > c) # E: bool_ +reveal_type(f8 > f) # E: bool_ +reveal_type(f8 > i) # E: bool_ +reveal_type(f8 > AR) # E: ndarray[Any, dtype[bool_]] +reveal_type(f8 > SEQ) # E: ndarray[Any, dtype[bool_]] + +reveal_type(f8 > f8) # E: bool_ +reveal_type(i8 > f8) # E: bool_ +reveal_type(f4 > f8) # E: bool_ +reveal_type(i4 > f8) # E: bool_ +reveal_type(b_ > f8) # E: bool_ +reveal_type(b > f8) # E: bool_ +reveal_type(c > f8) # E: bool_ +reveal_type(f > f8) # E: bool_ +reveal_type(i > f8) # E: bool_ +reveal_type(AR > f8) # E: ndarray[Any, dtype[bool_]] +reveal_type(SEQ > f8) # E: ndarray[Any, dtype[bool_]] + +reveal_type(f4 > f8) # E: bool_ +reveal_type(f4 > i8) # E: bool_ +reveal_type(f4 > f4) # E: bool_ +reveal_type(f4 > i4) # E: bool_ +reveal_type(f4 > b_) # E: bool_ +reveal_type(f4 > b) # E: bool_ +reveal_type(f4 > c) # E: bool_ +reveal_type(f4 > f) # E: bool_ +reveal_type(f4 > i) # E: bool_ +reveal_type(f4 > AR) # E: ndarray[Any, dtype[bool_]] +reveal_type(f4 > SEQ) # E: ndarray[Any, dtype[bool_]] + +reveal_type(f8 > f4) # E: bool_ +reveal_type(i8 > f4) # E: bool_ +reveal_type(f4 > f4) # E: bool_ +reveal_type(i4 > f4) # E: bool_ +reveal_type(b_ > f4) # E: bool_ +reveal_type(b > f4) # E: bool_ +reveal_type(c > f4) # E: bool_ +reveal_type(f > f4) # E: bool_ +reveal_type(i > f4) # E: bool_ +reveal_type(AR > f4) # E: ndarray[Any, dtype[bool_]] +reveal_type(SEQ > f4) # E: ndarray[Any, dtype[bool_]] + +# Int + +reveal_type(i8 > i8) # E: bool_ +reveal_type(i8 > u8) # E: bool_ +reveal_type(i8 > i4) # E: bool_ +reveal_type(i8 > u4) # E: bool_ +reveal_type(i8 > b_) # E: bool_ +reveal_type(i8 > b) # E: bool_ +reveal_type(i8 > c) # E: bool_ +reveal_type(i8 > f) # E: bool_ +reveal_type(i8 > i) # E: bool_ +reveal_type(i8 > AR) # E: ndarray[Any, dtype[bool_]] +reveal_type(i8 > SEQ) # E: ndarray[Any, dtype[bool_]] + +reveal_type(u8 > u8) # E: bool_ +reveal_type(u8 > i4) # E: bool_ +reveal_type(u8 > u4) # E: bool_ +reveal_type(u8 > b_) # E: bool_ +reveal_type(u8 > b) # E: bool_ +reveal_type(u8 > c) # E: bool_ +reveal_type(u8 > f) # E: bool_ +reveal_type(u8 > i) # E: bool_ +reveal_type(u8 > AR) # E: ndarray[Any, dtype[bool_]] +reveal_type(u8 > SEQ) # E: ndarray[Any, dtype[bool_]] + +reveal_type(i8 > i8) # E: bool_ +reveal_type(u8 > i8) # E: bool_ +reveal_type(i4 > i8) # E: bool_ +reveal_type(u4 > i8) # E: bool_ +reveal_type(b_ > i8) # E: bool_ +reveal_type(b > i8) # E: bool_ +reveal_type(c > i8) # E: bool_ +reveal_type(f > i8) # E: bool_ +reveal_type(i > i8) # E: bool_ +reveal_type(AR > i8) # E: ndarray[Any, dtype[bool_]] +reveal_type(SEQ > i8) # E: ndarray[Any, dtype[bool_]] + +reveal_type(u8 > u8) # E: bool_ +reveal_type(i4 > u8) # E: bool_ +reveal_type(u4 > u8) # E: bool_ +reveal_type(b_ > u8) # E: bool_ +reveal_type(b > u8) # E: bool_ +reveal_type(c > u8) # E: bool_ +reveal_type(f > u8) # E: bool_ +reveal_type(i > u8) # E: bool_ +reveal_type(AR > u8) # E: ndarray[Any, dtype[bool_]] +reveal_type(SEQ > u8) # E: ndarray[Any, dtype[bool_]] + +reveal_type(i4 > i8) # E: bool_ +reveal_type(i4 > i4) # E: bool_ +reveal_type(i4 > i) # E: bool_ +reveal_type(i4 > b_) # E: bool_ +reveal_type(i4 > b) # E: bool_ +reveal_type(i4 > AR) # E: ndarray[Any, dtype[bool_]] +reveal_type(i4 > SEQ) # E: ndarray[Any, dtype[bool_]] + +reveal_type(u4 > i8) # E: bool_ +reveal_type(u4 > i4) # E: bool_ +reveal_type(u4 > u8) # E: bool_ +reveal_type(u4 > u4) # E: bool_ +reveal_type(u4 > i) # E: bool_ +reveal_type(u4 > b_) # E: bool_ +reveal_type(u4 > b) # E: bool_ +reveal_type(u4 > AR) # E: ndarray[Any, dtype[bool_]] +reveal_type(u4 > SEQ) # E: ndarray[Any, dtype[bool_]] + +reveal_type(i8 > i4) # E: bool_ +reveal_type(i4 > i4) # E: bool_ +reveal_type(i > i4) # E: bool_ +reveal_type(b_ > i4) # E: bool_ +reveal_type(b > i4) # E: bool_ +reveal_type(AR > i4) # E: ndarray[Any, dtype[bool_]] +reveal_type(SEQ > i4) # E: ndarray[Any, dtype[bool_]] + +reveal_type(i8 > u4) # E: bool_ +reveal_type(i4 > u4) # E: bool_ +reveal_type(u8 > u4) # E: bool_ +reveal_type(u4 > u4) # E: bool_ +reveal_type(b_ > u4) # E: bool_ +reveal_type(b > u4) # E: bool_ +reveal_type(i > u4) # E: bool_ +reveal_type(AR > u4) # E: ndarray[Any, dtype[bool_]] +reveal_type(SEQ > u4) # E: ndarray[Any, dtype[bool_]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/constants.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/constants.pyi new file mode 100644 index 0000000..37f54cc --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/constants.pyi @@ -0,0 +1,52 @@ +import numpy as np + +reveal_type(np.Inf) # E: float +reveal_type(np.Infinity) # E: float +reveal_type(np.NAN) # E: float +reveal_type(np.NINF) # E: float +reveal_type(np.NZERO) # E: float +reveal_type(np.NaN) # E: float +reveal_type(np.PINF) # E: float +reveal_type(np.PZERO) # E: float +reveal_type(np.e) # E: float +reveal_type(np.euler_gamma) # E: float +reveal_type(np.inf) # E: float +reveal_type(np.infty) # E: float +reveal_type(np.nan) # E: float +reveal_type(np.pi) # E: float + +reveal_type(np.ALLOW_THREADS) # E: int +reveal_type(np.BUFSIZE) # E: Literal[8192] +reveal_type(np.CLIP) # E: Literal[0] +reveal_type(np.ERR_CALL) # E: Literal[3] +reveal_type(np.ERR_DEFAULT) # E: Literal[521] +reveal_type(np.ERR_IGNORE) # E: Literal[0] +reveal_type(np.ERR_LOG) # E: Literal[5] +reveal_type(np.ERR_PRINT) # E: Literal[4] +reveal_type(np.ERR_RAISE) # E: Literal[2] +reveal_type(np.ERR_WARN) # E: Literal[1] +reveal_type(np.FLOATING_POINT_SUPPORT) # E: Literal[1] +reveal_type(np.FPE_DIVIDEBYZERO) # E: Literal[1] +reveal_type(np.FPE_INVALID) # E: Literal[8] +reveal_type(np.FPE_OVERFLOW) # E: Literal[2] +reveal_type(np.FPE_UNDERFLOW) # E: Literal[4] +reveal_type(np.MAXDIMS) # E: Literal[32] +reveal_type(np.MAY_SHARE_BOUNDS) # E: Literal[0] +reveal_type(np.MAY_SHARE_EXACT) # E: Literal[-1] +reveal_type(np.RAISE) # E: Literal[2] +reveal_type(np.SHIFT_DIVIDEBYZERO) # E: Literal[0] +reveal_type(np.SHIFT_INVALID) # E: Literal[9] +reveal_type(np.SHIFT_OVERFLOW) # E: Literal[3] +reveal_type(np.SHIFT_UNDERFLOW) # E: Literal[6] +reveal_type(np.UFUNC_BUFSIZE_DEFAULT) # E: Literal[8192] +reveal_type(np.WRAP) # E: Literal[1] +reveal_type(np.tracemalloc_domain) # E: Literal[389047] + +reveal_type(np.little_endian) # E: bool +reveal_type(np.True_) # E: bool_ +reveal_type(np.False_) # E: bool_ + +reveal_type(np.UFUNC_PYVALS_NAME) # E: Literal['UFUNC_PYVALS'] + +reveal_type(np.sctypeDict) # E: dict +reveal_type(np.sctypes) # E: TypedDict diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ctypeslib.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ctypeslib.pyi new file mode 100644 index 0000000..ccbdfe3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ctypeslib.pyi @@ -0,0 +1,87 @@ +import ctypes +from typing import Any + +import numpy as np +import numpy.typing as npt + +AR_bool: npt.NDArray[np.bool_] +AR_ubyte: npt.NDArray[np.ubyte] +AR_ushort: npt.NDArray[np.ushort] +AR_uintc: npt.NDArray[np.uintc] +AR_uint: npt.NDArray[np.uint] +AR_ulonglong: npt.NDArray[np.ulonglong] +AR_byte: npt.NDArray[np.byte] +AR_short: npt.NDArray[np.short] +AR_intc: npt.NDArray[np.intc] +AR_int: npt.NDArray[np.int_] +AR_longlong: npt.NDArray[np.longlong] +AR_single: npt.NDArray[np.single] +AR_double: npt.NDArray[np.double] +AR_longdouble: npt.NDArray[np.longdouble] +AR_void: npt.NDArray[np.void] + +pointer: ctypes.pointer[Any] + +reveal_type(np.ctypeslib.c_intp()) # E: {c_intp} + +reveal_type(np.ctypeslib.ndpointer()) # E: Type[ctypeslib._ndptr[None]] +reveal_type(np.ctypeslib.ndpointer(dtype=np.float64)) # E: Type[ctypeslib._ndptr[dtype[{float64}]]] +reveal_type(np.ctypeslib.ndpointer(dtype=float)) # E: Type[ctypeslib._ndptr[dtype[Any]]] +reveal_type(np.ctypeslib.ndpointer(shape=(10, 3))) # E: Type[ctypeslib._ndptr[None]] +reveal_type(np.ctypeslib.ndpointer(np.int64, shape=(10, 3))) # E: Type[ctypeslib._concrete_ndptr[dtype[{int64}]]] +reveal_type(np.ctypeslib.ndpointer(int, shape=(1,))) # E: Type[ctypeslib._concrete_ndptr[dtype[Any]]] + +reveal_type(np.ctypeslib.as_ctypes_type(np.bool_)) # E: Type[ctypes.c_bool] +reveal_type(np.ctypeslib.as_ctypes_type(np.ubyte)) # E: Type[{c_ubyte}] +reveal_type(np.ctypeslib.as_ctypes_type(np.ushort)) # E: Type[{c_ushort}] +reveal_type(np.ctypeslib.as_ctypes_type(np.uintc)) # E: Type[{c_uint}] +reveal_type(np.ctypeslib.as_ctypes_type(np.uint)) # E: Type[{c_ulong}] +reveal_type(np.ctypeslib.as_ctypes_type(np.ulonglong)) # E: Type[{c_ulonglong}] +reveal_type(np.ctypeslib.as_ctypes_type(np.byte)) # E: Type[{c_byte}] +reveal_type(np.ctypeslib.as_ctypes_type(np.short)) # E: Type[{c_short}] +reveal_type(np.ctypeslib.as_ctypes_type(np.intc)) # E: Type[{c_int}] +reveal_type(np.ctypeslib.as_ctypes_type(np.int_)) # E: Type[{c_long}] +reveal_type(np.ctypeslib.as_ctypes_type(np.longlong)) # E: Type[{c_longlong}] +reveal_type(np.ctypeslib.as_ctypes_type(np.single)) # E: Type[{c_float}] +reveal_type(np.ctypeslib.as_ctypes_type(np.double)) # E: Type[{c_double}] +reveal_type(np.ctypeslib.as_ctypes_type(np.longdouble)) # E: Type[{c_longdouble}] +reveal_type(np.ctypeslib.as_ctypes_type(ctypes.c_double)) # E: Type[{c_double}] +reveal_type(np.ctypeslib.as_ctypes_type("q")) # E: Type[ctypes.c_longlong] +reveal_type(np.ctypeslib.as_ctypes_type([("i8", np.int64), ("f8", np.float64)])) # E: Type[Any] +reveal_type(np.ctypeslib.as_ctypes_type("i8")) # E: Type[Any] +reveal_type(np.ctypeslib.as_ctypes_type("f8")) # E: Type[Any] + +reveal_type(np.ctypeslib.as_ctypes(AR_bool.take(0))) # E: ctypes.c_bool +reveal_type(np.ctypeslib.as_ctypes(AR_ubyte.take(0))) # E: {c_ubyte} +reveal_type(np.ctypeslib.as_ctypes(AR_ushort.take(0))) # E: {c_ushort} +reveal_type(np.ctypeslib.as_ctypes(AR_uintc.take(0))) # E: {c_uint} +reveal_type(np.ctypeslib.as_ctypes(AR_uint.take(0))) # E: {c_ulong} +reveal_type(np.ctypeslib.as_ctypes(AR_ulonglong.take(0))) # E: {c_ulonglong} +reveal_type(np.ctypeslib.as_ctypes(AR_byte.take(0))) # E: {c_byte} +reveal_type(np.ctypeslib.as_ctypes(AR_short.take(0))) # E: {c_short} +reveal_type(np.ctypeslib.as_ctypes(AR_intc.take(0))) # E: {c_int} +reveal_type(np.ctypeslib.as_ctypes(AR_int.take(0))) # E: {c_long} +reveal_type(np.ctypeslib.as_ctypes(AR_longlong.take(0))) # E: {c_longlong} +reveal_type(np.ctypeslib.as_ctypes(AR_single.take(0))) # E: {c_float} +reveal_type(np.ctypeslib.as_ctypes(AR_double.take(0))) # E: {c_double} +reveal_type(np.ctypeslib.as_ctypes(AR_longdouble.take(0))) # E: {c_longdouble} +reveal_type(np.ctypeslib.as_ctypes(AR_void.take(0))) # E: Any +reveal_type(np.ctypeslib.as_ctypes(AR_bool)) # E: ctypes.Array[ctypes.c_bool] +reveal_type(np.ctypeslib.as_ctypes(AR_ubyte)) # E: ctypes.Array[{c_ubyte}] +reveal_type(np.ctypeslib.as_ctypes(AR_ushort)) # E: ctypes.Array[{c_ushort}] +reveal_type(np.ctypeslib.as_ctypes(AR_uintc)) # E: ctypes.Array[{c_uint}] +reveal_type(np.ctypeslib.as_ctypes(AR_uint)) # E: ctypes.Array[{c_ulong}] +reveal_type(np.ctypeslib.as_ctypes(AR_ulonglong)) # E: ctypes.Array[{c_ulonglong}] +reveal_type(np.ctypeslib.as_ctypes(AR_byte)) # E: ctypes.Array[{c_byte}] +reveal_type(np.ctypeslib.as_ctypes(AR_short)) # E: ctypes.Array[{c_short}] +reveal_type(np.ctypeslib.as_ctypes(AR_intc)) # E: ctypes.Array[{c_int}] +reveal_type(np.ctypeslib.as_ctypes(AR_int)) # E: ctypes.Array[{c_long}] +reveal_type(np.ctypeslib.as_ctypes(AR_longlong)) # E: ctypes.Array[{c_longlong}] +reveal_type(np.ctypeslib.as_ctypes(AR_single)) # E: ctypes.Array[{c_float}] +reveal_type(np.ctypeslib.as_ctypes(AR_double)) # E: ctypes.Array[{c_double}] +reveal_type(np.ctypeslib.as_ctypes(AR_longdouble)) # E: ctypes.Array[{c_longdouble}] +reveal_type(np.ctypeslib.as_ctypes(AR_void)) # E: ctypes.Array[Any] + +reveal_type(np.ctypeslib.as_array(AR_ubyte)) # E: ndarray[Any, dtype[{ubyte}]] +reveal_type(np.ctypeslib.as_array(1)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.ctypeslib.as_array(pointer)) # E: ndarray[Any, dtype[Any]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/datasource.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/datasource.pyi new file mode 100644 index 0000000..245ac76 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/datasource.pyi @@ -0,0 +1,21 @@ +from pathlib import Path +import numpy as np + +path1: Path +path2: str + +d1 = np.DataSource(path1) +d2 = np.DataSource(path2) +d3 = np.DataSource(None) + +reveal_type(d1.abspath("...")) # E: str +reveal_type(d2.abspath("...")) # E: str +reveal_type(d3.abspath("...")) # E: str + +reveal_type(d1.exists("...")) # E: bool +reveal_type(d2.exists("...")) # E: bool +reveal_type(d3.exists("...")) # E: bool + +reveal_type(d1.open("...", "r")) # E: IO[Any] +reveal_type(d2.open("...", encoding="utf8")) # E: IO[Any] +reveal_type(d3.open("...", newline="/n")) # E: IO[Any] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/dtype.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/dtype.pyi new file mode 100644 index 0000000..934d7da --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/dtype.pyi @@ -0,0 +1,76 @@ +import ctypes as ct +import numpy as np + +dtype_U: np.dtype[np.str_] +dtype_V: np.dtype[np.void] +dtype_i8: np.dtype[np.int64] + +reveal_type(np.dtype(np.float64)) # E: dtype[{float64}] +reveal_type(np.dtype(np.int64)) # E: dtype[{int64}] + +# String aliases +reveal_type(np.dtype("float64")) # E: dtype[{float64}] +reveal_type(np.dtype("float32")) # E: dtype[{float32}] +reveal_type(np.dtype("int64")) # E: dtype[{int64}] +reveal_type(np.dtype("int32")) # E: dtype[{int32}] +reveal_type(np.dtype("bool")) # E: dtype[bool_] +reveal_type(np.dtype("bytes")) # E: dtype[bytes_] +reveal_type(np.dtype("str")) # E: dtype[str_] + +# Python types +reveal_type(np.dtype(complex)) # E: dtype[{cdouble}] +reveal_type(np.dtype(float)) # E: dtype[{double}] +reveal_type(np.dtype(int)) # E: dtype[{int_}] +reveal_type(np.dtype(bool)) # E: dtype[bool_] +reveal_type(np.dtype(str)) # E: dtype[str_] +reveal_type(np.dtype(bytes)) # E: dtype[bytes_] +reveal_type(np.dtype(object)) # E: dtype[object_] + +# ctypes +reveal_type(np.dtype(ct.c_double)) # E: dtype[{double}] +reveal_type(np.dtype(ct.c_longlong)) # E: dtype[{longlong}] +reveal_type(np.dtype(ct.c_uint32)) # E: dtype[{uint32}] +reveal_type(np.dtype(ct.c_bool)) # E: dtype[bool_] +reveal_type(np.dtype(ct.c_char)) # E: dtype[bytes_] +reveal_type(np.dtype(ct.py_object)) # E: dtype[object_] + +# Special case for None +reveal_type(np.dtype(None)) # E: dtype[{double}] + +# Dtypes of dtypes +reveal_type(np.dtype(np.dtype(np.float64))) # E: dtype[{float64}] + +# Parameterized dtypes +reveal_type(np.dtype("S8")) # E: dtype + +# Void +reveal_type(np.dtype(("U", 10))) # E: dtype[void] + +# Methods and attributes +reveal_type(dtype_U.base) # E: dtype[Any] +reveal_type(dtype_U.subdtype) # E: Union[None, Tuple[dtype[Any], builtins.tuple[builtins.int]]] +reveal_type(dtype_U.newbyteorder()) # E: dtype[str_] +reveal_type(dtype_U.type) # E: Type[str_] +reveal_type(dtype_U.name) # E: str +reveal_type(dtype_U.names) # E: Union[None, builtins.tuple[builtins.str]] + +reveal_type(dtype_U * 0) # E: dtype[str_] +reveal_type(dtype_U * 1) # E: dtype[str_] +reveal_type(dtype_U * 2) # E: dtype[str_] + +reveal_type(dtype_i8 * 0) # E: dtype[void] +reveal_type(dtype_i8 * 1) # E: dtype[{int64}] +reveal_type(dtype_i8 * 2) # E: dtype[void] + +reveal_type(0 * dtype_U) # E: dtype[str_] +reveal_type(1 * dtype_U) # E: dtype[str_] +reveal_type(2 * dtype_U) # E: dtype[str_] + +reveal_type(0 * dtype_i8) # E: dtype[Any] +reveal_type(1 * dtype_i8) # E: dtype[Any] +reveal_type(2 * dtype_i8) # E: dtype[Any] + +reveal_type(dtype_V["f0"]) # E: dtype[Any] +reveal_type(dtype_V[0]) # E: dtype[Any] +reveal_type(dtype_V[["f0", "f1"]]) # E: dtype[void] +reveal_type(dtype_V[["f0"]]) # E: dtype[void] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/einsumfunc.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/einsumfunc.pyi new file mode 100644 index 0000000..5b07e6d --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/einsumfunc.pyi @@ -0,0 +1,32 @@ +from typing import List, Any +import numpy as np + +AR_LIKE_b: List[bool] +AR_LIKE_u: List[np.uint32] +AR_LIKE_i: List[int] +AR_LIKE_f: List[float] +AR_LIKE_c: List[complex] +AR_LIKE_U: List[str] + +OUT_f: np.ndarray[Any, np.dtype[np.float64]] + +reveal_type(np.einsum("i,i->i", AR_LIKE_b, AR_LIKE_b)) # E: Any +reveal_type(np.einsum("i,i->i", AR_LIKE_u, AR_LIKE_u)) # E: Any +reveal_type(np.einsum("i,i->i", AR_LIKE_i, AR_LIKE_i)) # E: Any +reveal_type(np.einsum("i,i->i", AR_LIKE_f, AR_LIKE_f)) # E: Any +reveal_type(np.einsum("i,i->i", AR_LIKE_c, AR_LIKE_c)) # E: Any +reveal_type(np.einsum("i,i->i", AR_LIKE_b, AR_LIKE_i)) # E: Any +reveal_type(np.einsum("i,i,i,i->i", AR_LIKE_b, AR_LIKE_u, AR_LIKE_i, AR_LIKE_c)) # E: Any + +reveal_type(np.einsum("i,i->i", AR_LIKE_c, AR_LIKE_c, out=OUT_f)) # E: ndarray[Any, dtype[{float64}] +reveal_type(np.einsum("i,i->i", AR_LIKE_U, AR_LIKE_U, dtype=bool, casting="unsafe", out=OUT_f)) # E: ndarray[Any, dtype[{float64}] +reveal_type(np.einsum("i,i->i", AR_LIKE_f, AR_LIKE_f, dtype="c16")) # E: Any +reveal_type(np.einsum("i,i->i", AR_LIKE_U, AR_LIKE_U, dtype=bool, casting="unsafe")) # E: Any + +reveal_type(np.einsum_path("i,i->i", AR_LIKE_b, AR_LIKE_b)) # E: Tuple[builtins.list[Any], builtins.str] +reveal_type(np.einsum_path("i,i->i", AR_LIKE_u, AR_LIKE_u)) # E: Tuple[builtins.list[Any], builtins.str] +reveal_type(np.einsum_path("i,i->i", AR_LIKE_i, AR_LIKE_i)) # E: Tuple[builtins.list[Any], builtins.str] +reveal_type(np.einsum_path("i,i->i", AR_LIKE_f, AR_LIKE_f)) # E: Tuple[builtins.list[Any], builtins.str] +reveal_type(np.einsum_path("i,i->i", AR_LIKE_c, AR_LIKE_c)) # E: Tuple[builtins.list[Any], builtins.str] +reveal_type(np.einsum_path("i,i->i", AR_LIKE_b, AR_LIKE_i)) # E: Tuple[builtins.list[Any], builtins.str] +reveal_type(np.einsum_path("i,i,i,i->i", AR_LIKE_b, AR_LIKE_u, AR_LIKE_i, AR_LIKE_c)) # E: Tuple[builtins.list[Any], builtins.str] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/false_positives.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/false_positives.pyi new file mode 100644 index 0000000..2d71566 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/false_positives.pyi @@ -0,0 +1,10 @@ +from typing import Any +import numpy.typing as npt + +AR_Any: npt.NDArray[Any] + +# Mypy bug where overload ambiguity is ignored for `Any`-parametrized types; +# xref numpy/numpy#20099 and python/mypy#11347 +# +# The expected output would be something akin to `ndarray[Any, dtype[Any]]` +reveal_type(AR_Any + 2) # E: ndarray[Any, dtype[signedinteger[Any]]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/fft.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/fft.pyi new file mode 100644 index 0000000..0667938 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/fft.pyi @@ -0,0 +1,35 @@ +import numpy as np +import numpy.typing as npt + +AR_f8: npt.NDArray[np.float64] +AR_c16: npt.NDArray[np.complex128] +AR_LIKE_f8: list[float] + +reveal_type(np.fft.fftshift(AR_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.fft.fftshift(AR_LIKE_f8, axes=0)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.fft.ifftshift(AR_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.fft.ifftshift(AR_LIKE_f8, axes=0)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.fft.fftfreq(5, AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.fft.fftfreq(np.int64(), AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] + +reveal_type(np.fft.fftfreq(5, AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.fft.fftfreq(np.int64(), AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] + +reveal_type(np.fft.fft(AR_f8)) # E: ndarray[Any, dtype[{complex128}]] +reveal_type(np.fft.ifft(AR_f8, axis=1)) # E: ndarray[Any, dtype[{complex128}]] +reveal_type(np.fft.rfft(AR_f8, n=None)) # E: ndarray[Any, dtype[{complex128}]] +reveal_type(np.fft.irfft(AR_f8, norm="ortho")) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.fft.hfft(AR_f8, n=2)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.fft.ihfft(AR_f8)) # E: ndarray[Any, dtype[{complex128}]] + +reveal_type(np.fft.fftn(AR_f8)) # E: ndarray[Any, dtype[{complex128}]] +reveal_type(np.fft.ifftn(AR_f8)) # E: ndarray[Any, dtype[{complex128}]] +reveal_type(np.fft.rfftn(AR_f8)) # E: ndarray[Any, dtype[{complex128}]] +reveal_type(np.fft.irfftn(AR_f8)) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(np.fft.rfft2(AR_f8)) # E: ndarray[Any, dtype[{complex128}]] +reveal_type(np.fft.ifft2(AR_f8)) # E: ndarray[Any, dtype[{complex128}]] +reveal_type(np.fft.fft2(AR_f8)) # E: ndarray[Any, dtype[{complex128}]] +reveal_type(np.fft.irfft2(AR_f8)) # E: ndarray[Any, dtype[{float64}]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/flatiter.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/flatiter.pyi new file mode 100644 index 0000000..9e7d6e5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/flatiter.pyi @@ -0,0 +1,23 @@ +from typing import Any +import numpy as np + +a: np.flatiter[np.ndarray[Any, np.dtype[np.str_]]] + +reveal_type(a.base) # E: ndarray[Any, dtype[str_]] +reveal_type(a.copy()) # E: ndarray[Any, dtype[str_]] +reveal_type(a.coords) # E: tuple[builtins.int] +reveal_type(a.index) # E: int +reveal_type(iter(a)) # E: Iterator[str_] +reveal_type(next(a)) # E: str_ +reveal_type(a[0]) # E: str_ +reveal_type(a[[0, 1, 2]]) # E: ndarray[Any, dtype[str_]] +reveal_type(a[...]) # E: ndarray[Any, dtype[str_]] +reveal_type(a[:]) # E: ndarray[Any, dtype[str_]] +reveal_type(a[(...,)]) # E: ndarray[Any, dtype[str_]] +reveal_type(a[(0,)]) # E: str_ +reveal_type(a.__array__()) # E: ndarray[Any, dtype[str_]] +reveal_type(a.__array__(np.dtype(np.float64))) # E: ndarray[Any, dtype[{float64}]] +a[0] = "a" +a[:5] = "a" +a[...] = "a" +a[(...,)] = "a" diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/fromnumeric.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/fromnumeric.pyi new file mode 100644 index 0000000..2ee1952 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/fromnumeric.pyi @@ -0,0 +1,264 @@ +"""Tests for :mod:`core.fromnumeric`.""" + +import numpy as np + +A = np.array(True, ndmin=2, dtype=bool) +B = np.array(1.0, ndmin=2, dtype=np.float32) +A.setflags(write=False) +B.setflags(write=False) + +a = np.bool_(True) +b = np.float32(1.0) +c = 1.0 +d = np.array(1.0, dtype=np.float32) # writeable + +reveal_type(np.take(a, 0)) # E: Any +reveal_type(np.take(b, 0)) # E: Any +reveal_type(np.take(c, 0)) # E: Any +reveal_type(np.take(A, 0)) # E: Any +reveal_type(np.take(B, 0)) # E: Any +reveal_type(np.take(A, [0])) # E: Any +reveal_type(np.take(B, [0])) # E: Any + +reveal_type(np.reshape(a, 1)) # E: ndarray[Any, Any] +reveal_type(np.reshape(b, 1)) # E: ndarray[Any, Any] +reveal_type(np.reshape(c, 1)) # E: ndarray[Any, Any] +reveal_type(np.reshape(A, 1)) # E: ndarray[Any, Any] +reveal_type(np.reshape(B, 1)) # E: ndarray[Any, Any] + +reveal_type(np.choose(a, [True, True])) # E: Any +reveal_type(np.choose(A, [True, True])) # E: Any + +reveal_type(np.repeat(a, 1)) # E: ndarray[Any, Any] +reveal_type(np.repeat(b, 1)) # E: ndarray[Any, Any] +reveal_type(np.repeat(c, 1)) # E: ndarray[Any, Any] +reveal_type(np.repeat(A, 1)) # E: ndarray[Any, Any] +reveal_type(np.repeat(B, 1)) # E: ndarray[Any, Any] + +# TODO: Add tests for np.put() + +reveal_type(np.swapaxes(A, 0, 0)) # E: ndarray[Any, Any] +reveal_type(np.swapaxes(B, 0, 0)) # E: ndarray[Any, Any] + +reveal_type(np.transpose(a)) # E: ndarray[Any, Any] +reveal_type(np.transpose(b)) # E: ndarray[Any, Any] +reveal_type(np.transpose(c)) # E: ndarray[Any, Any] +reveal_type(np.transpose(A)) # E: ndarray[Any, Any] +reveal_type(np.transpose(B)) # E: ndarray[Any, Any] + +reveal_type(np.partition(a, 0, axis=None)) # E: ndarray[Any, Any] +reveal_type(np.partition(b, 0, axis=None)) # E: ndarray[Any, Any] +reveal_type(np.partition(c, 0, axis=None)) # E: ndarray[Any, Any] +reveal_type(np.partition(A, 0)) # E: ndarray[Any, Any] +reveal_type(np.partition(B, 0)) # E: ndarray[Any, Any] + +reveal_type(np.argpartition(a, 0)) # E: Any +reveal_type(np.argpartition(b, 0)) # E: Any +reveal_type(np.argpartition(c, 0)) # E: Any +reveal_type(np.argpartition(A, 0)) # E: Any +reveal_type(np.argpartition(B, 0)) # E: Any + +reveal_type(np.sort(A, 0)) # E: ndarray[Any, Any] +reveal_type(np.sort(B, 0)) # E: ndarray[Any, Any] + +reveal_type(np.argsort(A, 0)) # E: ndarray[Any, Any] +reveal_type(np.argsort(B, 0)) # E: ndarray[Any, Any] + +reveal_type(np.argmax(A)) # E: {intp} +reveal_type(np.argmax(B)) # E: {intp} +reveal_type(np.argmax(A, axis=0)) # E: Any +reveal_type(np.argmax(B, axis=0)) # E: Any + +reveal_type(np.argmin(A)) # E: {intp} +reveal_type(np.argmin(B)) # E: {intp} +reveal_type(np.argmin(A, axis=0)) # E: Any +reveal_type(np.argmin(B, axis=0)) # E: Any + +reveal_type(np.searchsorted(A[0], 0)) # E: {intp} +reveal_type(np.searchsorted(B[0], 0)) # E: {intp} +reveal_type(np.searchsorted(A[0], [0])) # E: ndarray[Any, Any] +reveal_type(np.searchsorted(B[0], [0])) # E: ndarray[Any, Any] + +reveal_type(np.resize(a, (5, 5))) # E: ndarray[Any, Any] +reveal_type(np.resize(b, (5, 5))) # E: ndarray[Any, Any] +reveal_type(np.resize(c, (5, 5))) # E: ndarray[Any, Any] +reveal_type(np.resize(A, (5, 5))) # E: ndarray[Any, Any] +reveal_type(np.resize(B, (5, 5))) # E: ndarray[Any, Any] + +reveal_type(np.squeeze(a)) # E: bool_ +reveal_type(np.squeeze(b)) # E: {float32} +reveal_type(np.squeeze(c)) # E: ndarray[Any, Any] +reveal_type(np.squeeze(A)) # E: ndarray[Any, Any] +reveal_type(np.squeeze(B)) # E: ndarray[Any, Any] + +reveal_type(np.diagonal(A)) # E: ndarray[Any, Any] +reveal_type(np.diagonal(B)) # E: ndarray[Any, Any] + +reveal_type(np.trace(A)) # E: Any +reveal_type(np.trace(B)) # E: Any + +reveal_type(np.ravel(a)) # E: ndarray[Any, Any] +reveal_type(np.ravel(b)) # E: ndarray[Any, Any] +reveal_type(np.ravel(c)) # E: ndarray[Any, Any] +reveal_type(np.ravel(A)) # E: ndarray[Any, Any] +reveal_type(np.ravel(B)) # E: ndarray[Any, Any] + +reveal_type(np.nonzero(a)) # E: tuple[ndarray[Any, Any]] +reveal_type(np.nonzero(b)) # E: tuple[ndarray[Any, Any]] +reveal_type(np.nonzero(c)) # E: tuple[ndarray[Any, Any]] +reveal_type(np.nonzero(A)) # E: tuple[ndarray[Any, Any]] +reveal_type(np.nonzero(B)) # E: tuple[ndarray[Any, Any]] + +reveal_type(np.shape(a)) # E: tuple[builtins.int] +reveal_type(np.shape(b)) # E: tuple[builtins.int] +reveal_type(np.shape(c)) # E: tuple[builtins.int] +reveal_type(np.shape(A)) # E: tuple[builtins.int] +reveal_type(np.shape(B)) # E: tuple[builtins.int] + +reveal_type(np.compress([True], a)) # E: ndarray[Any, Any] +reveal_type(np.compress([True], b)) # E: ndarray[Any, Any] +reveal_type(np.compress([True], c)) # E: ndarray[Any, Any] +reveal_type(np.compress([True], A)) # E: ndarray[Any, Any] +reveal_type(np.compress([True], B)) # E: ndarray[Any, Any] + +reveal_type(np.clip(a, 0, 1.0)) # E: Any +reveal_type(np.clip(b, -1, 1)) # E: Any +reveal_type(np.clip(c, 0, 1)) # E: Any +reveal_type(np.clip(A, 0, 1)) # E: Any +reveal_type(np.clip(B, 0, 1)) # E: Any + +reveal_type(np.sum(a)) # E: Any +reveal_type(np.sum(b)) # E: Any +reveal_type(np.sum(c)) # E: Any +reveal_type(np.sum(A)) # E: Any +reveal_type(np.sum(B)) # E: Any +reveal_type(np.sum(A, axis=0)) # E: Any +reveal_type(np.sum(B, axis=0)) # E: Any + +reveal_type(np.all(a)) # E: bool_ +reveal_type(np.all(b)) # E: bool_ +reveal_type(np.all(c)) # E: bool_ +reveal_type(np.all(A)) # E: bool_ +reveal_type(np.all(B)) # E: bool_ +reveal_type(np.all(A, axis=0)) # E: Any +reveal_type(np.all(B, axis=0)) # E: Any +reveal_type(np.all(A, keepdims=True)) # E: Any +reveal_type(np.all(B, keepdims=True)) # E: Any + +reveal_type(np.any(a)) # E: bool_ +reveal_type(np.any(b)) # E: bool_ +reveal_type(np.any(c)) # E: bool_ +reveal_type(np.any(A)) # E: bool_ +reveal_type(np.any(B)) # E: bool_ +reveal_type(np.any(A, axis=0)) # E: Any +reveal_type(np.any(B, axis=0)) # E: Any +reveal_type(np.any(A, keepdims=True)) # E: Any +reveal_type(np.any(B, keepdims=True)) # E: Any + +reveal_type(np.cumsum(a)) # E: ndarray[Any, Any] +reveal_type(np.cumsum(b)) # E: ndarray[Any, Any] +reveal_type(np.cumsum(c)) # E: ndarray[Any, Any] +reveal_type(np.cumsum(A)) # E: ndarray[Any, Any] +reveal_type(np.cumsum(B)) # E: ndarray[Any, Any] + +reveal_type(np.ptp(a)) # E: Any +reveal_type(np.ptp(b)) # E: Any +reveal_type(np.ptp(c)) # E: Any +reveal_type(np.ptp(A)) # E: Any +reveal_type(np.ptp(B)) # E: Any +reveal_type(np.ptp(A, axis=0)) # E: Any +reveal_type(np.ptp(B, axis=0)) # E: Any +reveal_type(np.ptp(A, keepdims=True)) # E: Any +reveal_type(np.ptp(B, keepdims=True)) # E: Any + +reveal_type(np.amax(a)) # E: Any +reveal_type(np.amax(b)) # E: Any +reveal_type(np.amax(c)) # E: Any +reveal_type(np.amax(A)) # E: Any +reveal_type(np.amax(B)) # E: Any +reveal_type(np.amax(A, axis=0)) # E: Any +reveal_type(np.amax(B, axis=0)) # E: Any +reveal_type(np.amax(A, keepdims=True)) # E: Any +reveal_type(np.amax(B, keepdims=True)) # E: Any + +reveal_type(np.amin(a)) # E: Any +reveal_type(np.amin(b)) # E: Any +reveal_type(np.amin(c)) # E: Any +reveal_type(np.amin(A)) # E: Any +reveal_type(np.amin(B)) # E: Any +reveal_type(np.amin(A, axis=0)) # E: Any +reveal_type(np.amin(B, axis=0)) # E: Any +reveal_type(np.amin(A, keepdims=True)) # E: Any +reveal_type(np.amin(B, keepdims=True)) # E: Any + +reveal_type(np.prod(a)) # E: Any +reveal_type(np.prod(b)) # E: Any +reveal_type(np.prod(c)) # E: Any +reveal_type(np.prod(A)) # E: Any +reveal_type(np.prod(B)) # E: Any +reveal_type(np.prod(A, axis=0)) # E: Any +reveal_type(np.prod(B, axis=0)) # E: Any +reveal_type(np.prod(A, keepdims=True)) # E: Any +reveal_type(np.prod(B, keepdims=True)) # E: Any +reveal_type(np.prod(b, out=d)) # E: Any +reveal_type(np.prod(B, out=d)) # E: Any + +reveal_type(np.cumprod(a)) # E: ndarray[Any, Any] +reveal_type(np.cumprod(b)) # E: ndarray[Any, Any] +reveal_type(np.cumprod(c)) # E: ndarray[Any, Any] +reveal_type(np.cumprod(A)) # E: ndarray[Any, Any] +reveal_type(np.cumprod(B)) # E: ndarray[Any, Any] + +reveal_type(np.ndim(a)) # E: int +reveal_type(np.ndim(b)) # E: int +reveal_type(np.ndim(c)) # E: int +reveal_type(np.ndim(A)) # E: int +reveal_type(np.ndim(B)) # E: int + +reveal_type(np.size(a)) # E: int +reveal_type(np.size(b)) # E: int +reveal_type(np.size(c)) # E: int +reveal_type(np.size(A)) # E: int +reveal_type(np.size(B)) # E: int + +reveal_type(np.around(a)) # E: Any +reveal_type(np.around(b)) # E: Any +reveal_type(np.around(c)) # E: Any +reveal_type(np.around(A)) # E: Any +reveal_type(np.around(B)) # E: Any + +reveal_type(np.mean(a)) # E: Any +reveal_type(np.mean(b)) # E: Any +reveal_type(np.mean(c)) # E: Any +reveal_type(np.mean(A)) # E: Any +reveal_type(np.mean(B)) # E: Any +reveal_type(np.mean(A, axis=0)) # E: Any +reveal_type(np.mean(B, axis=0)) # E: Any +reveal_type(np.mean(A, keepdims=True)) # E: Any +reveal_type(np.mean(B, keepdims=True)) # E: Any +reveal_type(np.mean(b, out=d)) # E: Any +reveal_type(np.mean(B, out=d)) # E: Any + +reveal_type(np.std(a)) # E: Any +reveal_type(np.std(b)) # E: Any +reveal_type(np.std(c)) # E: Any +reveal_type(np.std(A)) # E: Any +reveal_type(np.std(B)) # E: Any +reveal_type(np.std(A, axis=0)) # E: Any +reveal_type(np.std(B, axis=0)) # E: Any +reveal_type(np.std(A, keepdims=True)) # E: Any +reveal_type(np.std(B, keepdims=True)) # E: Any +reveal_type(np.std(b, out=d)) # E: Any +reveal_type(np.std(B, out=d)) # E: Any + +reveal_type(np.var(a)) # E: Any +reveal_type(np.var(b)) # E: Any +reveal_type(np.var(c)) # E: Any +reveal_type(np.var(A)) # E: Any +reveal_type(np.var(B)) # E: Any +reveal_type(np.var(A, axis=0)) # E: Any +reveal_type(np.var(B, axis=0)) # E: Any +reveal_type(np.var(A, keepdims=True)) # E: Any +reveal_type(np.var(B, keepdims=True)) # E: Any +reveal_type(np.var(b, out=d)) # E: Any +reveal_type(np.var(B, out=d)) # E: Any diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/getlimits.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/getlimits.pyi new file mode 100644 index 0000000..1614b57 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/getlimits.pyi @@ -0,0 +1,47 @@ +import numpy as np +f: float +f8: np.float64 +c8: np.complex64 + +i: int +i8: np.int64 +u4: np.uint32 + +finfo_f8: np.finfo[np.float64] +iinfo_i8: np.iinfo[np.int64] + +reveal_type(np.finfo(f)) # E: finfo[{double}] +reveal_type(np.finfo(f8)) # E: finfo[{float64}] +reveal_type(np.finfo(c8)) # E: finfo[{float32}] +reveal_type(np.finfo('f2')) # E: finfo[floating[Any]] + +reveal_type(finfo_f8.dtype) # E: dtype[{float64}] +reveal_type(finfo_f8.bits) # E: int +reveal_type(finfo_f8.eps) # E: {float64} +reveal_type(finfo_f8.epsneg) # E: {float64} +reveal_type(finfo_f8.iexp) # E: int +reveal_type(finfo_f8.machep) # E: int +reveal_type(finfo_f8.max) # E: {float64} +reveal_type(finfo_f8.maxexp) # E: int +reveal_type(finfo_f8.min) # E: {float64} +reveal_type(finfo_f8.minexp) # E: int +reveal_type(finfo_f8.negep) # E: int +reveal_type(finfo_f8.nexp) # E: int +reveal_type(finfo_f8.nmant) # E: int +reveal_type(finfo_f8.precision) # E: int +reveal_type(finfo_f8.resolution) # E: {float64} +reveal_type(finfo_f8.tiny) # E: {float64} +reveal_type(finfo_f8.smallest_normal) # E: {float64} +reveal_type(finfo_f8.smallest_subnormal) # E: {float64} + +reveal_type(np.iinfo(i)) # E: iinfo[{int_}] +reveal_type(np.iinfo(i8)) # E: iinfo[{int64}] +reveal_type(np.iinfo(u4)) # E: iinfo[{uint32}] +reveal_type(np.iinfo('i2')) # E: iinfo[Any] + +reveal_type(iinfo_i8.dtype) # E: dtype[{int64}] +reveal_type(iinfo_i8.kind) # E: str +reveal_type(iinfo_i8.bits) # E: int +reveal_type(iinfo_i8.key) # E: str +reveal_type(iinfo_i8.min) # E: int +reveal_type(iinfo_i8.max) # E: int diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/histograms.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/histograms.pyi new file mode 100644 index 0000000..d96e44f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/histograms.pyi @@ -0,0 +1,19 @@ +import numpy as np +import numpy.typing as npt + +AR_i8: npt.NDArray[np.int64] +AR_f8: npt.NDArray[np.float64] + +reveal_type(np.histogram_bin_edges(AR_i8, bins="auto")) # E: ndarray[Any, dtype[Any]] +reveal_type(np.histogram_bin_edges(AR_i8, bins="rice", range=(0, 3))) # E: ndarray[Any, dtype[Any]] +reveal_type(np.histogram_bin_edges(AR_i8, bins="scott", weights=AR_f8)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.histogram(AR_i8, bins="auto")) # E: Tuple[ndarray[Any, dtype[Any]], ndarray[Any, dtype[Any]]] +reveal_type(np.histogram(AR_i8, bins="rice", range=(0, 3))) # E: Tuple[ndarray[Any, dtype[Any]], ndarray[Any, dtype[Any]]] +reveal_type(np.histogram(AR_i8, bins="scott", weights=AR_f8)) # E: Tuple[ndarray[Any, dtype[Any]], ndarray[Any, dtype[Any]]] +reveal_type(np.histogram(AR_f8, bins=1, density=True)) # E: Tuple[ndarray[Any, dtype[Any]], ndarray[Any, dtype[Any]]] + +reveal_type(np.histogramdd(AR_i8, bins=[1])) # E: Tuple[ndarray[Any, dtype[Any]], builtins.list[ndarray[Any, dtype[Any]]]] +reveal_type(np.histogramdd(AR_i8, range=[(0, 3)])) # E: Tuple[ndarray[Any, dtype[Any]], builtins.list[ndarray[Any, dtype[Any]]]] +reveal_type(np.histogramdd(AR_i8, weights=AR_f8)) # E: Tuple[ndarray[Any, dtype[Any]], builtins.list[ndarray[Any, dtype[Any]]]] +reveal_type(np.histogramdd(AR_f8, density=True)) # E: Tuple[ndarray[Any, dtype[Any]], builtins.list[ndarray[Any, dtype[Any]]]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/index_tricks.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/index_tricks.pyi new file mode 100644 index 0000000..55c033f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/index_tricks.pyi @@ -0,0 +1,66 @@ +from typing import Any, List +import numpy as np + +AR_LIKE_b: List[bool] +AR_LIKE_i: List[int] +AR_LIKE_f: List[float] +AR_LIKE_U: List[str] + +AR_i8: np.ndarray[Any, np.dtype[np.int64]] + +reveal_type(np.ndenumerate(AR_i8)) # E: ndenumerate[{int64}] +reveal_type(np.ndenumerate(AR_LIKE_f)) # E: ndenumerate[{double}] +reveal_type(np.ndenumerate(AR_LIKE_U)) # E: ndenumerate[str_] + +reveal_type(np.ndenumerate(AR_i8).iter) # E: flatiter[ndarray[Any, dtype[{int64}]]] +reveal_type(np.ndenumerate(AR_LIKE_f).iter) # E: flatiter[ndarray[Any, dtype[{double}]]] +reveal_type(np.ndenumerate(AR_LIKE_U).iter) # E: flatiter[ndarray[Any, dtype[str_]]] + +reveal_type(next(np.ndenumerate(AR_i8))) # E: Tuple[builtins.tuple[builtins.int], {int64}] +reveal_type(next(np.ndenumerate(AR_LIKE_f))) # E: Tuple[builtins.tuple[builtins.int], {double}] +reveal_type(next(np.ndenumerate(AR_LIKE_U))) # E: Tuple[builtins.tuple[builtins.int], str_] + +reveal_type(iter(np.ndenumerate(AR_i8))) # E: Iterator[Tuple[builtins.tuple[builtins.int], {int64}]] +reveal_type(iter(np.ndenumerate(AR_LIKE_f))) # E: Iterator[Tuple[builtins.tuple[builtins.int], {double}]] +reveal_type(iter(np.ndenumerate(AR_LIKE_U))) # E: Iterator[Tuple[builtins.tuple[builtins.int], str_]] + +reveal_type(np.ndindex(1, 2, 3)) # E: numpy.ndindex +reveal_type(np.ndindex((1, 2, 3))) # E: numpy.ndindex +reveal_type(iter(np.ndindex(1, 2, 3))) # E: Iterator[builtins.tuple[builtins.int]] +reveal_type(next(np.ndindex(1, 2, 3))) # E: builtins.tuple[builtins.int] + +reveal_type(np.unravel_index([22, 41, 37], (7, 6))) # E: tuple[ndarray[Any, dtype[{intp}]]] +reveal_type(np.unravel_index([31, 41, 13], (7, 6), order="F")) # E: tuple[ndarray[Any, dtype[{intp}]]] +reveal_type(np.unravel_index(1621, (6, 7, 8, 9))) # E: tuple[{intp}] + +reveal_type(np.ravel_multi_index([[1]], (7, 6))) # E: ndarray[Any, dtype[{intp}]] +reveal_type(np.ravel_multi_index(AR_LIKE_i, (7, 6))) # E: {intp} +reveal_type(np.ravel_multi_index(AR_LIKE_i, (7, 6), order="F")) # E: {intp} +reveal_type(np.ravel_multi_index(AR_LIKE_i, (4, 6), mode="clip")) # E: {intp} +reveal_type(np.ravel_multi_index(AR_LIKE_i, (4, 4), mode=("clip", "wrap"))) # E: {intp} +reveal_type(np.ravel_multi_index((3, 1, 4, 1), (6, 7, 8, 9))) # E: {intp} + +reveal_type(np.mgrid[1:1:2]) # E: ndarray[Any, dtype[Any]] +reveal_type(np.mgrid[1:1:2, None:10]) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.ogrid[1:1:2]) # E: list[ndarray[Any, dtype[Any]]] +reveal_type(np.ogrid[1:1:2, None:10]) # E: list[ndarray[Any, dtype[Any]]] + +reveal_type(np.index_exp[0:1]) # E: Tuple[builtins.slice] +reveal_type(np.index_exp[0:1, None:3]) # E: Tuple[builtins.slice, builtins.slice] +reveal_type(np.index_exp[0, 0:1, ..., [0, 1, 3]]) # E: Tuple[Literal[0]?, builtins.slice, builtins.ellipsis, builtins.list[builtins.int]] + +reveal_type(np.s_[0:1]) # E: builtins.slice +reveal_type(np.s_[0:1, None:3]) # E: Tuple[builtins.slice, builtins.slice] +reveal_type(np.s_[0, 0:1, ..., [0, 1, 3]]) # E: Tuple[Literal[0]?, builtins.slice, builtins.ellipsis, builtins.list[builtins.int]] + +reveal_type(np.ix_(AR_LIKE_b)) # E: tuple[ndarray[Any, dtype[bool_]]] +reveal_type(np.ix_(AR_LIKE_i, AR_LIKE_f)) # E: tuple[ndarray[Any, dtype[{double}]]] +reveal_type(np.ix_(AR_i8)) # E: tuple[ndarray[Any, dtype[{int64}]]] + +reveal_type(np.fill_diagonal(AR_i8, 5)) # E: None + +reveal_type(np.diag_indices(4)) # E: tuple[ndarray[Any, dtype[{int_}]]] +reveal_type(np.diag_indices(2, 3)) # E: tuple[ndarray[Any, dtype[{int_}]]] + +reveal_type(np.diag_indices_from(AR_i8)) # E: tuple[ndarray[Any, dtype[{int_}]]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/lib_function_base.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/lib_function_base.pyi new file mode 100644 index 0000000..eebe9fb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/lib_function_base.pyi @@ -0,0 +1,180 @@ +from typing import Any + +import numpy as np +import numpy.typing as npt + +vectorized_func: np.vectorize + +f8: np.float64 +AR_LIKE_f8: list[float] + +AR_i8: npt.NDArray[np.int64] +AR_f8: npt.NDArray[np.float64] +AR_c16: npt.NDArray[np.complex128] +AR_m: npt.NDArray[np.timedelta64] +AR_M: npt.NDArray[np.datetime64] +AR_O: npt.NDArray[np.object_] +AR_b: npt.NDArray[np.bool_] +AR_U: npt.NDArray[np.str_] +CHAR_AR_U: np.chararray[Any, np.dtype[np.str_]] + +def func(*args: Any, **kwargs: Any) -> Any: ... + +reveal_type(vectorized_func.pyfunc) # E: def (*Any, **Any) -> Any +reveal_type(vectorized_func.cache) # E: bool +reveal_type(vectorized_func.signature) # E: Union[None, builtins.str] +reveal_type(vectorized_func.otypes) # E: Union[None, builtins.str] +reveal_type(vectorized_func.excluded) # E: set[Union[builtins.int, builtins.str]] +reveal_type(vectorized_func.__doc__) # E: Union[None, builtins.str] +reveal_type(vectorized_func([1])) # E: Any +reveal_type(np.vectorize(int)) # E: vectorize +reveal_type(np.vectorize( # E: vectorize + int, otypes="i", doc="doc", excluded=(), cache=True, signature=None +)) + +reveal_type(np.add_newdoc("__main__", "blabla", doc="test doc")) # E: None +reveal_type(np.add_newdoc("__main__", "blabla", doc=("meth", "test doc"))) # E: None +reveal_type(np.add_newdoc("__main__", "blabla", doc=[("meth", "test doc")])) # E: None + +reveal_type(np.rot90(AR_f8, k=2)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.rot90(AR_LIKE_f8, axes=(0, 1))) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.flip(f8)) # E: {float64} +reveal_type(np.flip(1.0)) # E: Any +reveal_type(np.flip(AR_f8, axis=(0, 1))) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.flip(AR_LIKE_f8, axis=0)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.iterable(1)) # E: bool +reveal_type(np.iterable([1])) # E: bool + +reveal_type(np.average(AR_f8)) # E: floating[Any] +reveal_type(np.average(AR_f8, weights=AR_c16)) # E: complexfloating[Any, Any] +reveal_type(np.average(AR_O)) # E: Any +reveal_type(np.average(AR_f8, returned=True)) # E: Tuple[floating[Any], floating[Any]] +reveal_type(np.average(AR_f8, weights=AR_c16, returned=True)) # E: Tuple[complexfloating[Any, Any], complexfloating[Any, Any]] +reveal_type(np.average(AR_O, returned=True)) # E: Tuple[Any, Any] +reveal_type(np.average(AR_f8, axis=0)) # E: Any +reveal_type(np.average(AR_f8, axis=0, returned=True)) # E: Tuple[Any, Any] + +reveal_type(np.asarray_chkfinite(AR_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.asarray_chkfinite(AR_LIKE_f8)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.asarray_chkfinite(AR_f8, dtype=np.float64)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.asarray_chkfinite(AR_f8, dtype=float)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.piecewise(AR_f8, AR_b, [func])) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.piecewise(AR_LIKE_f8, AR_b, [func])) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.select([AR_f8], [AR_f8])) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.copy(AR_LIKE_f8)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.copy(AR_U)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.copy(CHAR_AR_U)) # E: ndarray[Any, Any] +reveal_type(np.copy(CHAR_AR_U, "K", subok=True)) # E: chararray[Any, dtype[str_]] +reveal_type(np.copy(CHAR_AR_U, subok=True)) # E: chararray[Any, dtype[str_]] + +reveal_type(np.gradient(AR_f8, axis=None)) # E: Any +reveal_type(np.gradient(AR_LIKE_f8, edge_order=2)) # E: Any + +reveal_type(np.diff("bob", n=0)) # E: str +reveal_type(np.diff(AR_f8, axis=0)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.diff(AR_LIKE_f8, prepend=1.5)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.angle(AR_f8)) # E: floating[Any] +reveal_type(np.angle(AR_c16, deg=True)) # E: complexfloating[Any, Any] +reveal_type(np.angle(AR_O)) # E: Any + +reveal_type(np.unwrap(AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.unwrap(AR_O)) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.sort_complex(AR_f8)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] + +reveal_type(np.trim_zeros(AR_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.trim_zeros(AR_LIKE_f8)) # E: list[builtins.float] + +reveal_type(np.extract(AR_i8, AR_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.extract(AR_i8, AR_LIKE_f8)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.place(AR_f8, mask=AR_i8, vals=5.0)) # E: None + +reveal_type(np.disp(1, linefeed=True)) # E: None +with open("test", "w") as f: + reveal_type(np.disp("message", device=f)) # E: None + +reveal_type(np.cov(AR_f8, bias=True)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.cov(AR_f8, AR_c16, ddof=1)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.cov(AR_f8, aweights=AR_f8, dtype=np.float32)) # E: ndarray[Any, dtype[{float32}]] +reveal_type(np.cov(AR_f8, fweights=AR_f8, dtype=float)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.corrcoef(AR_f8, rowvar=True)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.corrcoef(AR_f8, AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.corrcoef(AR_f8, dtype=np.float32)) # E: ndarray[Any, dtype[{float32}]] +reveal_type(np.corrcoef(AR_f8, dtype=float)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.blackman(5)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.bartlett(6)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.hanning(4.5)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.hamming(0)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.i0(AR_i8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.kaiser(4, 5.9)) # E: ndarray[Any, dtype[floating[Any]]] + +reveal_type(np.sinc(1.0)) # E: floating[Any] +reveal_type(np.sinc(1j)) # E: complexfloating[Any, Any] +reveal_type(np.sinc(AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.sinc(AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] + +reveal_type(np.msort(CHAR_AR_U)) # E: Any +reveal_type(np.msort(AR_U)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.msort(AR_LIKE_f8)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.median(AR_f8, keepdims=False)) # E: floating[Any] +reveal_type(np.median(AR_c16, overwrite_input=True)) # E: complexfloating[Any, Any] +reveal_type(np.median(AR_m)) # E: timedelta64 +reveal_type(np.median(AR_O)) # E: Any +reveal_type(np.median(AR_f8, keepdims=True)) # E: Any +reveal_type(np.median(AR_c16, axis=0)) # E: Any +reveal_type(np.median(AR_LIKE_f8, out=AR_c16)) # E: ndarray[Any, dtype[{complex128}]] + +reveal_type(np.add_newdoc_ufunc(np.add, "docstring")) # E: None + +reveal_type(np.percentile(AR_f8, 50)) # E: floating[Any] +reveal_type(np.percentile(AR_c16, 50)) # E: complexfloating[Any, Any] +reveal_type(np.percentile(AR_m, 50)) # E: timedelta64 +reveal_type(np.percentile(AR_M, 50, overwrite_input=True)) # E: datetime64 +reveal_type(np.percentile(AR_O, 50)) # E: Any +reveal_type(np.percentile(AR_f8, [50])) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.percentile(AR_c16, [50])) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.percentile(AR_m, [50])) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(np.percentile(AR_M, [50], method="nearest")) # E: ndarray[Any, dtype[datetime64]] +reveal_type(np.percentile(AR_O, [50])) # E: ndarray[Any, dtype[object_]] +reveal_type(np.percentile(AR_f8, [50], keepdims=True)) # E: Any +reveal_type(np.percentile(AR_f8, [50], axis=[1])) # E: Any +reveal_type(np.percentile(AR_f8, [50], out=AR_c16)) # E: ndarray[Any, dtype[{complex128}]] + +reveal_type(np.quantile(AR_f8, 0.5)) # E: floating[Any] +reveal_type(np.quantile(AR_c16, 0.5)) # E: complexfloating[Any, Any] +reveal_type(np.quantile(AR_m, 0.5)) # E: timedelta64 +reveal_type(np.quantile(AR_M, 0.5, overwrite_input=True)) # E: datetime64 +reveal_type(np.quantile(AR_O, 0.5)) # E: Any +reveal_type(np.quantile(AR_f8, [0.5])) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.quantile(AR_c16, [0.5])) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.quantile(AR_m, [0.5])) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(np.quantile(AR_M, [0.5], method="nearest")) # E: ndarray[Any, dtype[datetime64]] +reveal_type(np.quantile(AR_O, [0.5])) # E: ndarray[Any, dtype[object_]] +reveal_type(np.quantile(AR_f8, [0.5], keepdims=True)) # E: Any +reveal_type(np.quantile(AR_f8, [0.5], axis=[1])) # E: Any +reveal_type(np.quantile(AR_f8, [0.5], out=AR_c16)) # E: ndarray[Any, dtype[{complex128}]] + +reveal_type(np.meshgrid(AR_f8, AR_i8, copy=False)) # E: list[ndarray[Any, dtype[Any]]] +reveal_type(np.meshgrid(AR_f8, AR_i8, AR_c16, indexing="ij")) # E: list[ndarray[Any, dtype[Any]]] + +reveal_type(np.delete(AR_f8, np.s_[:5])) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.delete(AR_LIKE_f8, [0, 4, 9], axis=0)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.insert(AR_f8, np.s_[:5], 5)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.insert(AR_LIKE_f8, [0, 4, 9], [0.5, 9.2, 7], axis=0)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.append(AR_f8, 5)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.append(AR_LIKE_f8, 1j, axis=0)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.digitize(4.5, [1])) # E: {intp} +reveal_type(np.digitize(AR_f8, [1, 2, 3])) # E: ndarray[Any, dtype[{intp}]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/lib_polynomial.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/lib_polynomial.pyi new file mode 100644 index 0000000..de89507 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/lib_polynomial.pyi @@ -0,0 +1,111 @@ +import numpy as np +import numpy.typing as npt + +AR_b: npt.NDArray[np.bool_] +AR_u4: npt.NDArray[np.uint32] +AR_i8: npt.NDArray[np.int64] +AR_f8: npt.NDArray[np.float64] +AR_c16: npt.NDArray[np.complex128] +AR_O: npt.NDArray[np.object_] + +poly_obj: np.poly1d + +reveal_type(poly_obj.variable) # E: str +reveal_type(poly_obj.order) # E: int +reveal_type(poly_obj.o) # E: int +reveal_type(poly_obj.roots) # E: ndarray[Any, dtype[Any]] +reveal_type(poly_obj.r) # E: ndarray[Any, dtype[Any]] +reveal_type(poly_obj.coeffs) # E: ndarray[Any, dtype[Any]] +reveal_type(poly_obj.c) # E: ndarray[Any, dtype[Any]] +reveal_type(poly_obj.coef) # E: ndarray[Any, dtype[Any]] +reveal_type(poly_obj.coefficients) # E: ndarray[Any, dtype[Any]] +reveal_type(poly_obj.__hash__) # E: None + +reveal_type(poly_obj(1)) # E: Any +reveal_type(poly_obj([1])) # E: ndarray[Any, dtype[Any]] +reveal_type(poly_obj(poly_obj)) # E: poly1d + +reveal_type(len(poly_obj)) # E: int +reveal_type(-poly_obj) # E: poly1d +reveal_type(+poly_obj) # E: poly1d + +reveal_type(poly_obj * 5) # E: poly1d +reveal_type(5 * poly_obj) # E: poly1d +reveal_type(poly_obj + 5) # E: poly1d +reveal_type(5 + poly_obj) # E: poly1d +reveal_type(poly_obj - 5) # E: poly1d +reveal_type(5 - poly_obj) # E: poly1d +reveal_type(poly_obj**1) # E: poly1d +reveal_type(poly_obj**1.0) # E: poly1d +reveal_type(poly_obj / 5) # E: poly1d +reveal_type(5 / poly_obj) # E: poly1d + +reveal_type(poly_obj[0]) # E: Any +poly_obj[0] = 5 +reveal_type(iter(poly_obj)) # E: Iterator[Any] +reveal_type(poly_obj.deriv()) # E: poly1d +reveal_type(poly_obj.integ()) # E: poly1d + +reveal_type(np.poly(poly_obj)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.poly(AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.poly(AR_c16)) # E: ndarray[Any, dtype[floating[Any]]] + +reveal_type(np.polyint(poly_obj)) # E: poly1d +reveal_type(np.polyint(AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.polyint(AR_f8, k=AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.polyint(AR_O, m=2)) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.polyder(poly_obj)) # E: poly1d +reveal_type(np.polyder(AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.polyder(AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.polyder(AR_O, m=2)) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.polyfit(AR_f8, AR_f8, 2)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.polyfit(AR_f8, AR_i8, 1, full=True)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[signedinteger[typing._32Bit]]], ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{float64}]]] +reveal_type(np.polyfit(AR_u4, AR_f8, 1.0, cov="unscaled")) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{float64}]]] +reveal_type(np.polyfit(AR_c16, AR_f8, 2)) # E: ndarray[Any, dtype[{complex128}]] +reveal_type(np.polyfit(AR_f8, AR_c16, 1, full=True)) # E: Tuple[ndarray[Any, dtype[{complex128}]], ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[signedinteger[typing._32Bit]]], ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{float64}]]] +reveal_type(np.polyfit(AR_u4, AR_c16, 1.0, cov=True)) # E: Tuple[ndarray[Any, dtype[{complex128}]], ndarray[Any, dtype[{complex128}]]] + +reveal_type(np.polyval(AR_b, AR_b)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.polyval(AR_u4, AR_b)) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(np.polyval(AR_i8, AR_i8)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.polyval(AR_f8, AR_i8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.polyval(AR_i8, AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.polyval(AR_O, AR_O)) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.polyadd(poly_obj, AR_i8)) # E: poly1d +reveal_type(np.polyadd(AR_f8, poly_obj)) # E: poly1d +reveal_type(np.polyadd(AR_b, AR_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.polyadd(AR_u4, AR_b)) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(np.polyadd(AR_i8, AR_i8)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.polyadd(AR_f8, AR_i8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.polyadd(AR_i8, AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.polyadd(AR_O, AR_O)) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.polysub(poly_obj, AR_i8)) # E: poly1d +reveal_type(np.polysub(AR_f8, poly_obj)) # E: poly1d +reveal_type(np.polysub(AR_b, AR_b)) # E: +reveal_type(np.polysub(AR_u4, AR_b)) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(np.polysub(AR_i8, AR_i8)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.polysub(AR_f8, AR_i8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.polysub(AR_i8, AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.polysub(AR_O, AR_O)) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.polymul(poly_obj, AR_i8)) # E: poly1d +reveal_type(np.polymul(AR_f8, poly_obj)) # E: poly1d +reveal_type(np.polymul(AR_b, AR_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.polymul(AR_u4, AR_b)) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(np.polymul(AR_i8, AR_i8)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.polymul(AR_f8, AR_i8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.polymul(AR_i8, AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.polymul(AR_O, AR_O)) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.polydiv(poly_obj, AR_i8)) # E: poly1d +reveal_type(np.polydiv(AR_f8, poly_obj)) # E: poly1d +reveal_type(np.polydiv(AR_b, AR_b)) # E: Tuple[ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[floating[Any]]]] +reveal_type(np.polydiv(AR_u4, AR_b)) # E: Tuple[ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[floating[Any]]]] +reveal_type(np.polydiv(AR_i8, AR_i8)) # E: Tuple[ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[floating[Any]]]] +reveal_type(np.polydiv(AR_f8, AR_i8)) # E: Tuple[ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[floating[Any]]]] +reveal_type(np.polydiv(AR_i8, AR_c16)) # E: Tuple[ndarray[Any, dtype[complexfloating[Any, Any]]], ndarray[Any, dtype[complexfloating[Any, Any]]]] +reveal_type(np.polydiv(AR_O, AR_O)) # E: Tuple[ndarray[Any, dtype[Any]], ndarray[Any, dtype[Any]]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/lib_utils.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/lib_utils.pyi new file mode 100644 index 0000000..d820127 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/lib_utils.pyi @@ -0,0 +1,30 @@ +from io import StringIO +from typing import Any, Dict + +import numpy as np + +AR: np.ndarray[Any, np.dtype[np.float64]] +AR_DICT: Dict[str, np.ndarray[Any, np.dtype[np.float64]]] +FILE: StringIO + +def func(a: int) -> bool: ... + +reveal_type(np.deprecate(func)) # E: def (a: builtins.int) -> builtins.bool +reveal_type(np.deprecate()) # E: _Deprecate + +reveal_type(np.deprecate_with_doc("test")) # E: _Deprecate +reveal_type(np.deprecate_with_doc(None)) # E: _Deprecate + +reveal_type(np.byte_bounds(AR)) # E: Tuple[builtins.int, builtins.int] +reveal_type(np.byte_bounds(np.float64())) # E: Tuple[builtins.int, builtins.int] + +reveal_type(np.who(None)) # E: None +reveal_type(np.who(AR_DICT)) # E: None + +reveal_type(np.info(1, output=FILE)) # E: None + +reveal_type(np.source(np.interp, output=FILE)) # E: None + +reveal_type(np.lookfor("binary representation", output=FILE)) # E: None + +reveal_type(np.safe_eval("1 + 1")) # E: Any diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/lib_version.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/lib_version.pyi new file mode 100644 index 0000000..e6f6955 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/lib_version.pyi @@ -0,0 +1,18 @@ +from numpy.lib import NumpyVersion + +version = NumpyVersion("1.8.0") + +reveal_type(version.vstring) # E: str +reveal_type(version.version) # E: str +reveal_type(version.major) # E: int +reveal_type(version.minor) # E: int +reveal_type(version.bugfix) # E: int +reveal_type(version.pre_release) # E: str +reveal_type(version.is_devversion) # E: bool + +reveal_type(version == version) # E: bool +reveal_type(version != version) # E: bool +reveal_type(version < "1.8.0") # E: bool +reveal_type(version <= version) # E: bool +reveal_type(version > version) # E: bool +reveal_type(version >= "1.8.0") # E: bool diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/linalg.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/linalg.pyi new file mode 100644 index 0000000..19e13ae --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/linalg.pyi @@ -0,0 +1,97 @@ +import numpy as np +import numpy.typing as npt + +AR_i8: npt.NDArray[np.int64] +AR_f8: npt.NDArray[np.float64] +AR_c16: npt.NDArray[np.complex128] +AR_O: npt.NDArray[np.object_] +AR_m: npt.NDArray[np.timedelta64] +AR_S: npt.NDArray[np.str_] + +reveal_type(np.linalg.tensorsolve(AR_i8, AR_i8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.linalg.tensorsolve(AR_i8, AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.linalg.tensorsolve(AR_c16, AR_f8)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] + +reveal_type(np.linalg.solve(AR_i8, AR_i8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.linalg.solve(AR_i8, AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.linalg.solve(AR_c16, AR_f8)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] + +reveal_type(np.linalg.tensorinv(AR_i8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.linalg.tensorinv(AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.linalg.tensorinv(AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] + +reveal_type(np.linalg.inv(AR_i8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.linalg.inv(AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.linalg.inv(AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] + +reveal_type(np.linalg.matrix_power(AR_i8, -1)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.linalg.matrix_power(AR_f8, 0)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.linalg.matrix_power(AR_c16, 1)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.linalg.matrix_power(AR_O, 2)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.linalg.cholesky(AR_i8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.linalg.cholesky(AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.linalg.cholesky(AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] + +reveal_type(np.linalg.qr(AR_i8)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{float64}]]] +reveal_type(np.linalg.qr(AR_f8)) # E: Tuple[ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[floating[Any]]]] +reveal_type(np.linalg.qr(AR_c16)) # E: Tuple[ndarray[Any, dtype[complexfloating[Any, Any]]], ndarray[Any, dtype[complexfloating[Any, Any]]]] + +reveal_type(np.linalg.eigvals(AR_i8)) # E: Union[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{complex128}]]] +reveal_type(np.linalg.eigvals(AR_f8)) # E: Union[ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[complexfloating[Any, Any]]]] +reveal_type(np.linalg.eigvals(AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] + +reveal_type(np.linalg.eigvalsh(AR_i8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.linalg.eigvalsh(AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.linalg.eigvalsh(AR_c16)) # E: ndarray[Any, dtype[floating[Any]]] + +reveal_type(np.linalg.eig(AR_i8)) # E: Union[Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{float64}]]], Tuple[ndarray[Any, dtype[{complex128}]], ndarray[Any, dtype[{complex128}]]]] +reveal_type(np.linalg.eig(AR_f8)) # E: Union[Tuple[ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[floating[Any]]]], Tuple[ndarray[Any, dtype[complexfloating[Any, Any]]], ndarray[Any, dtype[complexfloating[Any, Any]]]]] +reveal_type(np.linalg.eig(AR_c16)) # E: Tuple[ndarray[Any, dtype[complexfloating[Any, Any]]], ndarray[Any, dtype[complexfloating[Any, Any]]]] + +reveal_type(np.linalg.eigh(AR_i8)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{float64}]]] +reveal_type(np.linalg.eigh(AR_f8)) # E: Tuple[ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[floating[Any]]]] +reveal_type(np.linalg.eigh(AR_c16)) # E: Tuple[ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[complexfloating[Any, Any]]]] + +reveal_type(np.linalg.svd(AR_i8)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{float64}]]] +reveal_type(np.linalg.svd(AR_f8)) # E: Tuple[ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[floating[Any]]]] +reveal_type(np.linalg.svd(AR_c16)) # E: Tuple[ndarray[Any, dtype[complexfloating[Any, Any]]], ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[complexfloating[Any, Any]]]] +reveal_type(np.linalg.svd(AR_i8, compute_uv=False)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.linalg.svd(AR_f8, compute_uv=False)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.linalg.svd(AR_c16, compute_uv=False)) # E: ndarray[Any, dtype[floating[Any]]] + +reveal_type(np.linalg.cond(AR_i8)) # E: Any +reveal_type(np.linalg.cond(AR_f8)) # E: Any +reveal_type(np.linalg.cond(AR_c16)) # E: Any + +reveal_type(np.linalg.matrix_rank(AR_i8)) # E: Any +reveal_type(np.linalg.matrix_rank(AR_f8)) # E: Any +reveal_type(np.linalg.matrix_rank(AR_c16)) # E: Any + +reveal_type(np.linalg.pinv(AR_i8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.linalg.pinv(AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.linalg.pinv(AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] + +reveal_type(np.linalg.slogdet(AR_i8)) # E: Tuple[Any, Any] +reveal_type(np.linalg.slogdet(AR_f8)) # E: Tuple[Any, Any] +reveal_type(np.linalg.slogdet(AR_c16)) # E: Tuple[Any, Any] + +reveal_type(np.linalg.det(AR_i8)) # E: Any +reveal_type(np.linalg.det(AR_f8)) # E: Any +reveal_type(np.linalg.det(AR_c16)) # E: Any + +reveal_type(np.linalg.lstsq(AR_i8, AR_i8)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{float64}]], {int32}, ndarray[Any, dtype[{float64}]]] +reveal_type(np.linalg.lstsq(AR_i8, AR_f8)) # E: Tuple[ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[floating[Any]]], {int32}, ndarray[Any, dtype[floating[Any]]]] +reveal_type(np.linalg.lstsq(AR_f8, AR_c16)) # E: Tuple[ndarray[Any, dtype[complexfloating[Any, Any]]], ndarray[Any, dtype[floating[Any]]], {int32}, ndarray[Any, dtype[floating[Any]]]] + +reveal_type(np.linalg.norm(AR_i8)) # E: floating[Any] +reveal_type(np.linalg.norm(AR_f8)) # E: floating[Any] +reveal_type(np.linalg.norm(AR_c16)) # E: floating[Any] +reveal_type(np.linalg.norm(AR_S)) # E: floating[Any] +reveal_type(np.linalg.norm(AR_f8, axis=0)) # E: Any + +reveal_type(np.linalg.multi_dot([AR_i8, AR_i8])) # E: Any +reveal_type(np.linalg.multi_dot([AR_i8, AR_f8])) # E: Any +reveal_type(np.linalg.multi_dot([AR_f8, AR_c16])) # E: Any +reveal_type(np.linalg.multi_dot([AR_O, AR_O])) # E: Any +reveal_type(np.linalg.multi_dot([AR_m, AR_m])) # E: Any diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/matrix.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/matrix.pyi new file mode 100644 index 0000000..21c3906 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/matrix.pyi @@ -0,0 +1,69 @@ +from typing import Any +import numpy as np +import numpy.typing as npt + +mat: np.matrix[Any, np.dtype[np.int64]] +ar_f8: npt.NDArray[np.float64] + +reveal_type(mat * 5) # E: matrix[Any, Any] +reveal_type(5 * mat) # E: matrix[Any, Any] +mat *= 5 + +reveal_type(mat**5) # E: matrix[Any, Any] +mat **= 5 + +reveal_type(mat.sum()) # E: Any +reveal_type(mat.mean()) # E: Any +reveal_type(mat.std()) # E: Any +reveal_type(mat.var()) # E: Any +reveal_type(mat.prod()) # E: Any +reveal_type(mat.any()) # E: bool_ +reveal_type(mat.all()) # E: bool_ +reveal_type(mat.max()) # E: {int64} +reveal_type(mat.min()) # E: {int64} +reveal_type(mat.argmax()) # E: {intp} +reveal_type(mat.argmin()) # E: {intp} +reveal_type(mat.ptp()) # E: {int64} + +reveal_type(mat.sum(axis=0)) # E: matrix[Any, Any] +reveal_type(mat.mean(axis=0)) # E: matrix[Any, Any] +reveal_type(mat.std(axis=0)) # E: matrix[Any, Any] +reveal_type(mat.var(axis=0)) # E: matrix[Any, Any] +reveal_type(mat.prod(axis=0)) # E: matrix[Any, Any] +reveal_type(mat.any(axis=0)) # E: matrix[Any, dtype[bool_]] +reveal_type(mat.all(axis=0)) # E: matrix[Any, dtype[bool_]] +reveal_type(mat.max(axis=0)) # E: matrix[Any, dtype[{int64}]] +reveal_type(mat.min(axis=0)) # E: matrix[Any, dtype[{int64}]] +reveal_type(mat.argmax(axis=0)) # E: matrix[Any, dtype[{intp}]] +reveal_type(mat.argmin(axis=0)) # E: matrix[Any, dtype[{intp}]] +reveal_type(mat.ptp(axis=0)) # E: matrix[Any, dtype[{int64}]] + +reveal_type(mat.sum(out=ar_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(mat.mean(out=ar_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(mat.std(out=ar_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(mat.var(out=ar_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(mat.prod(out=ar_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(mat.any(out=ar_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(mat.all(out=ar_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(mat.max(out=ar_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(mat.min(out=ar_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(mat.argmax(out=ar_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(mat.argmin(out=ar_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(mat.ptp(out=ar_f8)) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(mat.T) # E: matrix[Any, dtype[{int64}]] +reveal_type(mat.I) # E: matrix[Any, Any] +reveal_type(mat.A) # E: ndarray[Any, dtype[{int64}]] +reveal_type(mat.A1) # E: ndarray[Any, dtype[{int64}]] +reveal_type(mat.H) # E: matrix[Any, dtype[{int64}]] +reveal_type(mat.getT()) # E: matrix[Any, dtype[{int64}]] +reveal_type(mat.getI()) # E: matrix[Any, Any] +reveal_type(mat.getA()) # E: ndarray[Any, dtype[{int64}]] +reveal_type(mat.getA1()) # E: ndarray[Any, dtype[{int64}]] +reveal_type(mat.getH()) # E: matrix[Any, dtype[{int64}]] + +reveal_type(np.bmat(ar_f8)) # E: matrix[Any, Any] +reveal_type(np.bmat([[0, 1, 2]])) # E: matrix[Any, Any] +reveal_type(np.bmat("mat")) # E: matrix[Any, Any] + +reveal_type(np.asmatrix(ar_f8, dtype=np.int64)) # E: matrix[Any, Any] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/memmap.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/memmap.pyi new file mode 100644 index 0000000..86de8eb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/memmap.pyi @@ -0,0 +1,16 @@ +import numpy as np +from typing import Any + +memmap_obj: np.memmap[Any, np.dtype[np.str_]] + +reveal_type(np.memmap.__array_priority__) # E: float +reveal_type(memmap_obj.__array_priority__) # E: float +reveal_type(memmap_obj.filename) # E: Union[builtins.str, None] +reveal_type(memmap_obj.offset) # E: int +reveal_type(memmap_obj.mode) # E: str +reveal_type(memmap_obj.flush()) # E: None + +reveal_type(np.memmap("file.txt", offset=5)) # E: memmap[Any, dtype[{uint8}]] +reveal_type(np.memmap(b"file.txt", dtype=np.float64, shape=(10, 3))) # E: memmap[Any, dtype[{float64}]] +with open("file.txt", "rb") as f: + reveal_type(np.memmap(f, dtype=float, order="K")) # E: memmap[Any, dtype[Any]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/mod.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/mod.pyi new file mode 100644 index 0000000..b2790b7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/mod.pyi @@ -0,0 +1,147 @@ +from typing import Any +import numpy as np + +f8 = np.float64() +i8 = np.int64() +u8 = np.uint64() + +f4 = np.float32() +i4 = np.int32() +u4 = np.uint32() + +td = np.timedelta64(0, "D") +b_ = np.bool_() + +b = bool() +f = float() +i = int() + +AR_b: np.ndarray[Any, np.dtype[np.bool_]] +AR_m: np.ndarray[Any, np.dtype[np.timedelta64]] + +# Time structures + +reveal_type(td % td) # E: timedelta64 +reveal_type(AR_m % td) # E: Any +reveal_type(td % AR_m) # E: Any + +reveal_type(divmod(td, td)) # E: Tuple[{int64}, timedelta64] +reveal_type(divmod(AR_m, td)) # E: Tuple[ndarray[Any, dtype[signedinteger[typing._64Bit]]], ndarray[Any, dtype[timedelta64]]] +reveal_type(divmod(td, AR_m)) # E: Tuple[ndarray[Any, dtype[signedinteger[typing._64Bit]]], ndarray[Any, dtype[timedelta64]]] + +# Bool + +reveal_type(b_ % b) # E: {int8} +reveal_type(b_ % i) # E: {int_} +reveal_type(b_ % f) # E: {float64} +reveal_type(b_ % b_) # E: {int8} +reveal_type(b_ % i8) # E: {int64} +reveal_type(b_ % u8) # E: {uint64} +reveal_type(b_ % f8) # E: {float64} +reveal_type(b_ % AR_b) # E: ndarray[Any, dtype[{int8}]] + +reveal_type(divmod(b_, b)) # E: Tuple[{int8}, {int8}] +reveal_type(divmod(b_, i)) # E: Tuple[{int_}, {int_}] +reveal_type(divmod(b_, f)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(b_, b_)) # E: Tuple[{int8}, {int8}] +reveal_type(divmod(b_, i8)) # E: Tuple[{int64}, {int64}] +reveal_type(divmod(b_, u8)) # E: Tuple[{uint64}, {uint64}] +reveal_type(divmod(b_, f8)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(b_, AR_b)) # E: ndarray[Any, dtype[{int8}]], ndarray[Any, dtype[{int8}]]] + +reveal_type(b % b_) # E: {int8} +reveal_type(i % b_) # E: {int_} +reveal_type(f % b_) # E: {float64} +reveal_type(b_ % b_) # E: {int8} +reveal_type(i8 % b_) # E: {int64} +reveal_type(u8 % b_) # E: {uint64} +reveal_type(f8 % b_) # E: {float64} +reveal_type(AR_b % b_) # E: ndarray[Any, dtype[{int8}]] + +reveal_type(divmod(b, b_)) # E: Tuple[{int8}, {int8}] +reveal_type(divmod(i, b_)) # E: Tuple[{int_}, {int_}] +reveal_type(divmod(f, b_)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(b_, b_)) # E: Tuple[{int8}, {int8}] +reveal_type(divmod(i8, b_)) # E: Tuple[{int64}, {int64}] +reveal_type(divmod(u8, b_)) # E: Tuple[{uint64}, {uint64}] +reveal_type(divmod(f8, b_)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(AR_b, b_)) # E: ndarray[Any, dtype[{int8}]], ndarray[Any, dtype[{int8}]]] + +# int + +reveal_type(i8 % b) # E: {int64} +reveal_type(i8 % i) # E: {int64} +reveal_type(i8 % f) # E: {float64} +reveal_type(i8 % i8) # E: {int64} +reveal_type(i8 % f8) # E: {float64} +reveal_type(i4 % i8) # E: {int64} +reveal_type(i4 % f8) # E: {float64} +reveal_type(i4 % i4) # E: {int32} +reveal_type(i4 % f4) # E: {float32} +reveal_type(i8 % AR_b) # E: ndarray[Any, dtype[signedinteger[Any]]] + +reveal_type(divmod(i8, b)) # E: Tuple[{int64}, {int64}] +reveal_type(divmod(i8, i)) # E: Tuple[{int64}, {int64}] +reveal_type(divmod(i8, f)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(i8, i8)) # E: Tuple[{int64}, {int64}] +reveal_type(divmod(i8, f8)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(i8, i4)) # E: Tuple[{int64}, {int64}] +reveal_type(divmod(i8, f4)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(i4, i4)) # E: Tuple[{int32}, {int32}] +reveal_type(divmod(i4, f4)) # E: Tuple[{float32}, {float32}] +reveal_type(divmod(i8, AR_b)) # E: Tuple[ndarray[Any, dtype[signedinteger[Any]]], ndarray[Any, dtype[signedinteger[Any]]]] + +reveal_type(b % i8) # E: {int64} +reveal_type(i % i8) # E: {int64} +reveal_type(f % i8) # E: {float64} +reveal_type(i8 % i8) # E: {int64} +reveal_type(f8 % i8) # E: {float64} +reveal_type(i8 % i4) # E: {int64} +reveal_type(f8 % i4) # E: {float64} +reveal_type(i4 % i4) # E: {int32} +reveal_type(f4 % i4) # E: {float32} +reveal_type(AR_b % i8) # E: ndarray[Any, dtype[signedinteger[Any]]] + +reveal_type(divmod(b, i8)) # E: Tuple[{int64}, {int64}] +reveal_type(divmod(i, i8)) # E: Tuple[{int64}, {int64}] +reveal_type(divmod(f, i8)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(i8, i8)) # E: Tuple[{int64}, {int64}] +reveal_type(divmod(f8, i8)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(i4, i8)) # E: Tuple[{int64}, {int64}] +reveal_type(divmod(f4, i8)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(i4, i4)) # E: Tuple[{int32}, {int32}] +reveal_type(divmod(f4, i4)) # E: Tuple[{float32}, {float32}] +reveal_type(divmod(AR_b, i8)) # E: Tuple[ndarray[Any, dtype[signedinteger[Any]]], ndarray[Any, dtype[signedinteger[Any]]]] + +# float + +reveal_type(f8 % b) # E: {float64} +reveal_type(f8 % i) # E: {float64} +reveal_type(f8 % f) # E: {float64} +reveal_type(i8 % f4) # E: {float64} +reveal_type(f4 % f4) # E: {float32} +reveal_type(f8 % AR_b) # E: ndarray[Any, dtype[floating[Any]]] + +reveal_type(divmod(f8, b)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(f8, i)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(f8, f)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(f8, f8)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(f8, f4)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(f4, f4)) # E: Tuple[{float32}, {float32}] +reveal_type(divmod(f8, AR_b)) # E: Tuple[ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[floating[Any]]]] + +reveal_type(b % f8) # E: {float64} +reveal_type(i % f8) # E: {float64} +reveal_type(f % f8) # E: {float64} +reveal_type(f8 % f8) # E: {float64} +reveal_type(f8 % f8) # E: {float64} +reveal_type(f4 % f4) # E: {float32} +reveal_type(AR_b % f8) # E: ndarray[Any, dtype[floating[Any]]] + +reveal_type(divmod(b, f8)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(i, f8)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(f, f8)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(f8, f8)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(f4, f8)) # E: Tuple[{float64}, {float64}] +reveal_type(divmod(f4, f4)) # E: Tuple[{float32}, {float32}] +reveal_type(divmod(AR_b, f8)) # E: Tuple[ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[floating[Any]]]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/modules.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/modules.pyi new file mode 100644 index 0000000..ba830eb --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/modules.pyi @@ -0,0 +1,47 @@ +import numpy as np +from numpy import f2py + +reveal_type(np) # E: ModuleType + +reveal_type(np.char) # E: ModuleType +reveal_type(np.ctypeslib) # E: ModuleType +reveal_type(np.emath) # E: ModuleType +reveal_type(np.fft) # E: ModuleType +reveal_type(np.lib) # E: ModuleType +reveal_type(np.linalg) # E: ModuleType +reveal_type(np.ma) # E: ModuleType +reveal_type(np.matrixlib) # E: ModuleType +reveal_type(np.polynomial) # E: ModuleType +reveal_type(np.random) # E: ModuleType +reveal_type(np.rec) # E: ModuleType +reveal_type(np.testing) # E: ModuleType +reveal_type(np.version) # E: ModuleType + +reveal_type(np.lib.format) # E: ModuleType +reveal_type(np.lib.mixins) # E: ModuleType +reveal_type(np.lib.scimath) # E: ModuleType +reveal_type(np.lib.stride_tricks) # E: ModuleType +reveal_type(np.ma.extras) # E: ModuleType +reveal_type(np.polynomial.chebyshev) # E: ModuleType +reveal_type(np.polynomial.hermite) # E: ModuleType +reveal_type(np.polynomial.hermite_e) # E: ModuleType +reveal_type(np.polynomial.laguerre) # E: ModuleType +reveal_type(np.polynomial.legendre) # E: ModuleType +reveal_type(np.polynomial.polynomial) # E: ModuleType + +reveal_type(np.__path__) # E: list[builtins.str] +reveal_type(np.__version__) # E: str +reveal_type(np.__git_version__) # E: str +reveal_type(np.test) # E: _pytesttester.PytestTester +reveal_type(np.test.module_name) # E: str + +reveal_type(np.__all__) # E: list[builtins.str] +reveal_type(np.char.__all__) # E: list[builtins.str] +reveal_type(np.ctypeslib.__all__) # E: list[builtins.str] +reveal_type(np.emath.__all__) # E: list[builtins.str] +reveal_type(np.lib.__all__) # E: list[builtins.str] +reveal_type(np.ma.__all__) # E: list[builtins.str] +reveal_type(np.random.__all__) # E: list[builtins.str] +reveal_type(np.rec.__all__) # E: list[builtins.str] +reveal_type(np.testing.__all__) # E: list[builtins.str] +reveal_type(f2py.__all__) # E: list[builtins.str] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/multiarray.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/multiarray.pyi new file mode 100644 index 0000000..56706d8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/multiarray.pyi @@ -0,0 +1,144 @@ +import datetime as dt +from typing import Any, List, TypeVar +from pathlib import Path + +import numpy as np +import numpy.typing as npt + +_SCT = TypeVar("_SCT", bound=np.generic, covariant=True) + +class SubClass(np.ndarray[Any, np.dtype[_SCT]]): ... + +subclass: SubClass[np.float64] + +AR_f8: npt.NDArray[np.float64] +AR_i8: npt.NDArray[np.int64] +AR_u1: npt.NDArray[np.uint8] +AR_m: npt.NDArray[np.timedelta64] +AR_M: npt.NDArray[np.datetime64] + +AR_LIKE_f: List[float] +AR_LIKE_i: List[int] + +m: np.timedelta64 +M: np.datetime64 + +b_f8 = np.broadcast(AR_f8) +b_i8_f8_f8 = np.broadcast(AR_i8, AR_f8, AR_f8) + +nditer_obj: np.nditer + +date_scalar: dt.date +date_seq: list[dt.date] +timedelta_seq: list[dt.timedelta] + +def func(a: int) -> bool: ... + +reveal_type(next(b_f8)) # E: tuple[Any] +reveal_type(b_f8.reset()) # E: None +reveal_type(b_f8.index) # E: int +reveal_type(b_f8.iters) # E: tuple[flatiter[Any]] +reveal_type(b_f8.nd) # E: int +reveal_type(b_f8.ndim) # E: int +reveal_type(b_f8.numiter) # E: int +reveal_type(b_f8.shape) # E: tuple[builtins.int] +reveal_type(b_f8.size) # E: int + +reveal_type(next(b_i8_f8_f8)) # E: tuple[Any] +reveal_type(b_i8_f8_f8.reset()) # E: None +reveal_type(b_i8_f8_f8.index) # E: int +reveal_type(b_i8_f8_f8.iters) # E: tuple[flatiter[Any]] +reveal_type(b_i8_f8_f8.nd) # E: int +reveal_type(b_i8_f8_f8.ndim) # E: int +reveal_type(b_i8_f8_f8.numiter) # E: int +reveal_type(b_i8_f8_f8.shape) # E: tuple[builtins.int] +reveal_type(b_i8_f8_f8.size) # E: int + +reveal_type(np.inner(AR_f8, AR_i8)) # E: Any + +reveal_type(np.where([True, True, False])) # E: tuple[ndarray[Any, dtype[{intp}]]] +reveal_type(np.where([True, True, False], 1, 0)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.lexsort([0, 1, 2])) # E: Any + +reveal_type(np.can_cast(np.dtype("i8"), int)) # E: bool +reveal_type(np.can_cast(AR_f8, "f8")) # E: bool +reveal_type(np.can_cast(AR_f8, np.complex128, casting="unsafe")) # E: bool + +reveal_type(np.min_scalar_type([1])) # E: dtype[Any] +reveal_type(np.min_scalar_type(AR_f8)) # E: dtype[Any] + +reveal_type(np.result_type(int, [1])) # E: dtype[Any] +reveal_type(np.result_type(AR_f8, AR_u1)) # E: dtype[Any] +reveal_type(np.result_type(AR_f8, np.complex128)) # E: dtype[Any] + +reveal_type(np.dot(AR_LIKE_f, AR_i8)) # E: Any +reveal_type(np.dot(AR_u1, 1)) # E: Any +reveal_type(np.dot(1.5j, 1)) # E: Any +reveal_type(np.dot(AR_u1, 1, out=AR_f8)) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(np.vdot(AR_LIKE_f, AR_i8)) # E: floating[Any] +reveal_type(np.vdot(AR_u1, 1)) # E: signedinteger[Any] +reveal_type(np.vdot(1.5j, 1)) # E: complexfloating[Any, Any] + +reveal_type(np.bincount(AR_i8)) # E: ndarray[Any, dtype[{intp}]] + +reveal_type(np.copyto(AR_f8, [1., 1.5, 1.6])) # E: None + +reveal_type(np.putmask(AR_f8, [True, True, False], 1.5)) # E: None + +reveal_type(np.packbits(AR_i8)) # ndarray[Any, dtype[{uint8}]] +reveal_type(np.packbits(AR_u1)) # ndarray[Any, dtype[{uint8}]] + +reveal_type(np.unpackbits(AR_u1)) # ndarray[Any, dtype[{uint8}]] + +reveal_type(np.shares_memory(1, 2)) # E: bool +reveal_type(np.shares_memory(AR_f8, AR_f8, max_work=1)) # E: bool + +reveal_type(np.may_share_memory(1, 2)) # E: bool +reveal_type(np.may_share_memory(AR_f8, AR_f8, max_work=1)) # E: bool + +reveal_type(np.geterrobj()) # E: list[Any] + +reveal_type(np.seterrobj([8192, 521, None])) # E: None + +reveal_type(np.promote_types(np.int32, np.int64)) # E: dtype[Any] +reveal_type(np.promote_types("f4", float)) # E: dtype[Any] + +reveal_type(np.frompyfunc(func, 1, 1, identity=None)) # ufunc + +reveal_type(np.datetime_data("m8[D]")) # E: Tuple[builtins.str, builtins.int] +reveal_type(np.datetime_data(np.datetime64)) # E: Tuple[builtins.str, builtins.int] +reveal_type(np.datetime_data(np.dtype(np.timedelta64))) # E: Tuple[builtins.str, builtins.int] + +reveal_type(np.busday_count("2011-01", "2011-02")) # E: {int_} +reveal_type(np.busday_count(["2011-01"], "2011-02")) # E: ndarray[Any, dtype[{int_}]] +reveal_type(np.busday_count(["2011-01"], date_scalar)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(np.busday_offset(M, m)) # E: datetime64 +reveal_type(np.busday_offset(date_scalar, m)) # E: datetime64 +reveal_type(np.busday_offset(M, 5)) # E: datetime64 +reveal_type(np.busday_offset(AR_M, m)) # E: ndarray[Any, dtype[datetime64]] +reveal_type(np.busday_offset(M, timedelta_seq)) # E: ndarray[Any, dtype[datetime64]] +reveal_type(np.busday_offset("2011-01", "2011-02", roll="forward")) # E: datetime64 +reveal_type(np.busday_offset(["2011-01"], "2011-02", roll="forward")) # E: ndarray[Any, dtype[datetime64]] + +reveal_type(np.is_busday("2012")) # E: bool_ +reveal_type(np.is_busday(date_scalar)) # E: bool_ +reveal_type(np.is_busday(["2012"])) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.datetime_as_string(M)) # E: str_ +reveal_type(np.datetime_as_string(AR_M)) # E: ndarray[Any, dtype[str_]] + +reveal_type(np.busdaycalendar(holidays=date_seq)) # E: busdaycalendar +reveal_type(np.busdaycalendar(holidays=[M])) # E: busdaycalendar + +reveal_type(np.compare_chararrays("a", "b", "!=", rstrip=False)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.compare_chararrays(b"a", b"a", "==", True)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.add_docstring(func, "test")) # E: None + +reveal_type(np.nested_iters([AR_i8, AR_i8], [[0], [1]], flags=["c_index"])) # E: tuple[nditer] +reveal_type(np.nested_iters([AR_i8, AR_i8], [[0], [1]], op_flags=[["readonly", "readonly"]])) # E: tuple[nditer] +reveal_type(np.nested_iters([AR_i8, AR_i8], [[0], [1]], op_dtypes=np.int_)) # E: tuple[nditer] +reveal_type(np.nested_iters([AR_i8, AR_i8], [[0], [1]], order="C", casting="no")) # E: tuple[nditer] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/nbit_base_example.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/nbit_base_example.pyi new file mode 100644 index 0000000..d34f6f6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/nbit_base_example.pyi @@ -0,0 +1,19 @@ +from typing import TypeVar, Union +import numpy as np +import numpy.typing as npt + +T1 = TypeVar("T1", bound=npt.NBitBase) +T2 = TypeVar("T2", bound=npt.NBitBase) + +def add(a: np.floating[T1], b: np.integer[T2]) -> np.floating[Union[T1, T2]]: + return a + b + +i8: np.int64 +i4: np.int32 +f8: np.float64 +f4: np.float32 + +reveal_type(add(f8, i8)) # E: {float64} +reveal_type(add(f4, i8)) # E: {float64} +reveal_type(add(f8, i4)) # E: {float64} +reveal_type(add(f4, i4)) # E: {float32} diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ndarray_conversion.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ndarray_conversion.pyi new file mode 100644 index 0000000..6885d4f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ndarray_conversion.pyi @@ -0,0 +1,51 @@ +import numpy as np +import numpy.typing as npt + +nd: npt.NDArray[np.int_] = np.array([[1, 2], [3, 4]]) + +# item +reveal_type(nd.item()) # E: int +reveal_type(nd.item(1)) # E: int +reveal_type(nd.item(0, 1)) # E: int +reveal_type(nd.item((0, 1))) # E: int + +# tolist +reveal_type(nd.tolist()) # E: Any + +# itemset does not return a value +# tostring is pretty simple +# tobytes is pretty simple +# tofile does not return a value +# dump does not return a value +# dumps is pretty simple + +# astype +reveal_type(nd.astype("float")) # E: ndarray[Any, dtype[Any]] +reveal_type(nd.astype(float)) # E: ndarray[Any, dtype[Any]] +reveal_type(nd.astype(np.float64)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(nd.astype(np.float64, "K")) # E: ndarray[Any, dtype[{float64}]] +reveal_type(nd.astype(np.float64, "K", "unsafe")) # E: ndarray[Any, dtype[{float64}]] +reveal_type(nd.astype(np.float64, "K", "unsafe", True)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(nd.astype(np.float64, "K", "unsafe", True, True)) # E: ndarray[Any, dtype[{float64}]] + +# byteswap +reveal_type(nd.byteswap()) # E: ndarray[Any, dtype[{int_}]] +reveal_type(nd.byteswap(True)) # E: ndarray[Any, dtype[{int_}]] + +# copy +reveal_type(nd.copy()) # E: ndarray[Any, dtype[{int_}]] +reveal_type(nd.copy("C")) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(nd.view()) # E: ndarray[Any, dtype[{int_}]] +reveal_type(nd.view(np.float64)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(nd.view(float)) # E: ndarray[Any, dtype[Any]] +reveal_type(nd.view(np.float64, np.matrix)) # E: matrix[Any, Any] + +# getfield +reveal_type(nd.getfield("float")) # E: ndarray[Any, dtype[Any]] +reveal_type(nd.getfield(float)) # E: ndarray[Any, dtype[Any]] +reveal_type(nd.getfield(np.float64)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(nd.getfield(np.float64, 8)) # E: ndarray[Any, dtype[{float64}]] + +# setflags does not return a value +# fill does not return a value diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ndarray_misc.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ndarray_misc.pyi new file mode 100644 index 0000000..f91d635 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ndarray_misc.pyi @@ -0,0 +1,214 @@ +""" +Tests for miscellaneous (non-magic) ``np.ndarray``/``np.generic`` methods. + +More extensive tests are performed for the methods' +function-based counterpart in `../from_numeric.py`. + +""" + +import operator +import ctypes as ct +from typing import Any + +import numpy as np +from numpy.typing import NDArray + +class SubClass(NDArray[np.object_]): ... + +f8: np.float64 +B: SubClass +AR_f8: NDArray[np.float64] +AR_i8: NDArray[np.int64] +AR_U: NDArray[np.str_] +AR_V: NDArray[np.void] + +ctypes_obj = AR_f8.ctypes + +reveal_type(AR_f8.__dlpack__()) # E: Any +reveal_type(AR_f8.__dlpack_device__()) # E: Tuple[int, Literal[0]] + +reveal_type(ctypes_obj.data) # E: int +reveal_type(ctypes_obj.shape) # E: ctypes.Array[{c_intp}] +reveal_type(ctypes_obj.strides) # E: ctypes.Array[{c_intp}] +reveal_type(ctypes_obj._as_parameter_) # E: ctypes.c_void_p + +reveal_type(ctypes_obj.data_as(ct.c_void_p)) # E: ctypes.c_void_p +reveal_type(ctypes_obj.shape_as(ct.c_longlong)) # E: ctypes.Array[ctypes.c_longlong] +reveal_type(ctypes_obj.strides_as(ct.c_ubyte)) # E: ctypes.Array[ctypes.c_ubyte] + +reveal_type(f8.all()) # E: bool_ +reveal_type(AR_f8.all()) # E: bool_ +reveal_type(AR_f8.all(axis=0)) # E: Any +reveal_type(AR_f8.all(keepdims=True)) # E: Any +reveal_type(AR_f8.all(out=B)) # E: SubClass + +reveal_type(f8.any()) # E: bool_ +reveal_type(AR_f8.any()) # E: bool_ +reveal_type(AR_f8.any(axis=0)) # E: Any +reveal_type(AR_f8.any(keepdims=True)) # E: Any +reveal_type(AR_f8.any(out=B)) # E: SubClass + +reveal_type(f8.argmax()) # E: {intp} +reveal_type(AR_f8.argmax()) # E: {intp} +reveal_type(AR_f8.argmax(axis=0)) # E: Any +reveal_type(AR_f8.argmax(out=B)) # E: SubClass + +reveal_type(f8.argmin()) # E: {intp} +reveal_type(AR_f8.argmin()) # E: {intp} +reveal_type(AR_f8.argmin(axis=0)) # E: Any +reveal_type(AR_f8.argmin(out=B)) # E: SubClass + +reveal_type(f8.argsort()) # E: ndarray[Any, Any] +reveal_type(AR_f8.argsort()) # E: ndarray[Any, Any] + +reveal_type(f8.astype(np.int64).choose([()])) # E: ndarray[Any, Any] +reveal_type(AR_f8.choose([0])) # E: ndarray[Any, Any] +reveal_type(AR_f8.choose([0], out=B)) # E: SubClass + +reveal_type(f8.clip(1)) # E: Any +reveal_type(AR_f8.clip(1)) # E: Any +reveal_type(AR_f8.clip(None, 1)) # E: Any +reveal_type(AR_f8.clip(1, out=B)) # E: SubClass +reveal_type(AR_f8.clip(None, 1, out=B)) # E: SubClass + +reveal_type(f8.compress([0])) # E: ndarray[Any, Any] +reveal_type(AR_f8.compress([0])) # E: ndarray[Any, Any] +reveal_type(AR_f8.compress([0], out=B)) # E: SubClass + +reveal_type(f8.conj()) # E: {float64} +reveal_type(AR_f8.conj()) # E: ndarray[Any, dtype[{float64}]] +reveal_type(B.conj()) # E: SubClass + +reveal_type(f8.conjugate()) # E: {float64} +reveal_type(AR_f8.conjugate()) # E: ndarray[Any, dtype[{float64}]] +reveal_type(B.conjugate()) # E: SubClass + +reveal_type(f8.cumprod()) # E: ndarray[Any, Any] +reveal_type(AR_f8.cumprod()) # E: ndarray[Any, Any] +reveal_type(AR_f8.cumprod(out=B)) # E: SubClass + +reveal_type(f8.cumsum()) # E: ndarray[Any, Any] +reveal_type(AR_f8.cumsum()) # E: ndarray[Any, Any] +reveal_type(AR_f8.cumsum(out=B)) # E: SubClass + +reveal_type(f8.max()) # E: Any +reveal_type(AR_f8.max()) # E: Any +reveal_type(AR_f8.max(axis=0)) # E: Any +reveal_type(AR_f8.max(keepdims=True)) # E: Any +reveal_type(AR_f8.max(out=B)) # E: SubClass + +reveal_type(f8.mean()) # E: Any +reveal_type(AR_f8.mean()) # E: Any +reveal_type(AR_f8.mean(axis=0)) # E: Any +reveal_type(AR_f8.mean(keepdims=True)) # E: Any +reveal_type(AR_f8.mean(out=B)) # E: SubClass + +reveal_type(f8.min()) # E: Any +reveal_type(AR_f8.min()) # E: Any +reveal_type(AR_f8.min(axis=0)) # E: Any +reveal_type(AR_f8.min(keepdims=True)) # E: Any +reveal_type(AR_f8.min(out=B)) # E: SubClass + +reveal_type(f8.newbyteorder()) # E: {float64} +reveal_type(AR_f8.newbyteorder()) # E: ndarray[Any, dtype[{float64}]] +reveal_type(B.newbyteorder('|')) # E: SubClass + +reveal_type(f8.prod()) # E: Any +reveal_type(AR_f8.prod()) # E: Any +reveal_type(AR_f8.prod(axis=0)) # E: Any +reveal_type(AR_f8.prod(keepdims=True)) # E: Any +reveal_type(AR_f8.prod(out=B)) # E: SubClass + +reveal_type(f8.ptp()) # E: Any +reveal_type(AR_f8.ptp()) # E: Any +reveal_type(AR_f8.ptp(axis=0)) # E: Any +reveal_type(AR_f8.ptp(keepdims=True)) # E: Any +reveal_type(AR_f8.ptp(out=B)) # E: SubClass + +reveal_type(f8.round()) # E: {float64} +reveal_type(AR_f8.round()) # E: ndarray[Any, dtype[{float64}]] +reveal_type(AR_f8.round(out=B)) # E: SubClass + +reveal_type(f8.repeat(1)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(AR_f8.repeat(1)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(B.repeat(1)) # E: ndarray[Any, dtype[object_]] + +reveal_type(f8.std()) # E: Any +reveal_type(AR_f8.std()) # E: Any +reveal_type(AR_f8.std(axis=0)) # E: Any +reveal_type(AR_f8.std(keepdims=True)) # E: Any +reveal_type(AR_f8.std(out=B)) # E: SubClass + +reveal_type(f8.sum()) # E: Any +reveal_type(AR_f8.sum()) # E: Any +reveal_type(AR_f8.sum(axis=0)) # E: Any +reveal_type(AR_f8.sum(keepdims=True)) # E: Any +reveal_type(AR_f8.sum(out=B)) # E: SubClass + +reveal_type(f8.take(0)) # E: {float64} +reveal_type(AR_f8.take(0)) # E: {float64} +reveal_type(AR_f8.take([0])) # E: ndarray[Any, dtype[{float64}]] +reveal_type(AR_f8.take(0, out=B)) # E: SubClass +reveal_type(AR_f8.take([0], out=B)) # E: SubClass + +reveal_type(f8.var()) # E: Any +reveal_type(AR_f8.var()) # E: Any +reveal_type(AR_f8.var(axis=0)) # E: Any +reveal_type(AR_f8.var(keepdims=True)) # E: Any +reveal_type(AR_f8.var(out=B)) # E: SubClass + +reveal_type(AR_f8.argpartition([0])) # E: ndarray[Any, dtype[{intp}]] + +reveal_type(AR_f8.diagonal()) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(AR_f8.dot(1)) # E: ndarray[Any, Any] +reveal_type(AR_f8.dot([1])) # E: Any +reveal_type(AR_f8.dot(1, out=B)) # E: SubClass + +reveal_type(AR_f8.nonzero()) # E: tuple[ndarray[Any, dtype[{intp}]]] + +reveal_type(AR_f8.searchsorted(1)) # E: {intp} +reveal_type(AR_f8.searchsorted([1])) # E: ndarray[Any, dtype[{intp}]] + +reveal_type(AR_f8.trace()) # E: Any +reveal_type(AR_f8.trace(out=B)) # E: SubClass + +reveal_type(AR_f8.item()) # E: float +reveal_type(AR_U.item()) # E: str + +reveal_type(AR_f8.ravel()) # E: ndarray[Any, dtype[{float64}]] +reveal_type(AR_U.ravel()) # E: ndarray[Any, dtype[str_]] + +reveal_type(AR_f8.flatten()) # E: ndarray[Any, dtype[{float64}]] +reveal_type(AR_U.flatten()) # E: ndarray[Any, dtype[str_]] + +reveal_type(AR_f8.reshape(1)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(AR_U.reshape(1)) # E: ndarray[Any, dtype[str_]] + +reveal_type(int(AR_f8)) # E: int +reveal_type(int(AR_U)) # E: int + +reveal_type(float(AR_f8)) # E: float +reveal_type(float(AR_U)) # E: float + +reveal_type(complex(AR_f8)) # E: complex + +reveal_type(operator.index(AR_i8)) # E: int + +reveal_type(AR_f8.__array_prepare__(B)) # E: ndarray[Any, dtype[object_]] +reveal_type(AR_f8.__array_wrap__(B)) # E: ndarray[Any, dtype[object_]] + +reveal_type(AR_V[0]) # E: Any +reveal_type(AR_V[0, 0]) # E: Any +reveal_type(AR_V[AR_i8]) # E: Any +reveal_type(AR_V[AR_i8, AR_i8]) # E: Any +reveal_type(AR_V[AR_i8, None]) # E: ndarray[Any, dtype[void]] +reveal_type(AR_V[0, ...]) # E: ndarray[Any, dtype[void]] +reveal_type(AR_V[:]) # E: ndarray[Any, dtype[void]] +reveal_type(AR_V["a"]) # E: ndarray[Any, dtype[Any]] +reveal_type(AR_V[["a", "b"]]) # E: ndarray[Any, dtype[void]] + +reveal_type(AR_f8.dump("test_file")) # E: None +reveal_type(AR_f8.dump(b"test_file")) # E: None +with open("test_file", "wb") as f: + reveal_type(AR_f8.dump(f)) # E: None diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ndarray_shape_manipulation.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ndarray_shape_manipulation.pyi new file mode 100644 index 0000000..c000bf4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ndarray_shape_manipulation.pyi @@ -0,0 +1,35 @@ +import numpy as np + +nd = np.array([[1, 2], [3, 4]]) + +# reshape +reveal_type(nd.reshape()) # E: ndarray +reveal_type(nd.reshape(4)) # E: ndarray +reveal_type(nd.reshape(2, 2)) # E: ndarray +reveal_type(nd.reshape((2, 2))) # E: ndarray + +reveal_type(nd.reshape((2, 2), order="C")) # E: ndarray +reveal_type(nd.reshape(4, order="C")) # E: ndarray + +# resize does not return a value + +# transpose +reveal_type(nd.transpose()) # E: ndarray +reveal_type(nd.transpose(1, 0)) # E: ndarray +reveal_type(nd.transpose((1, 0))) # E: ndarray + +# swapaxes +reveal_type(nd.swapaxes(0, 1)) # E: ndarray + +# flatten +reveal_type(nd.flatten()) # E: ndarray +reveal_type(nd.flatten("C")) # E: ndarray + +# ravel +reveal_type(nd.ravel()) # E: ndarray +reveal_type(nd.ravel("C")) # E: ndarray + +# squeeze +reveal_type(nd.squeeze()) # E: ndarray +reveal_type(nd.squeeze(0)) # E: ndarray +reveal_type(nd.squeeze((0, 2))) # E: ndarray diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/nditer.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/nditer.pyi new file mode 100644 index 0000000..65861da --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/nditer.pyi @@ -0,0 +1,46 @@ +import numpy as np + +nditer_obj: np.nditer + +reveal_type(np.nditer([0, 1], flags=["c_index"])) # E: nditer +reveal_type(np.nditer([0, 1], op_flags=[["readonly", "readonly"]])) # E: nditer +reveal_type(np.nditer([0, 1], op_dtypes=np.int_)) # E: nditer +reveal_type(np.nditer([0, 1], order="C", casting="no")) # E: nditer + +reveal_type(nditer_obj.dtypes) # E: tuple[dtype[Any]] +reveal_type(nditer_obj.finished) # E: bool +reveal_type(nditer_obj.has_delayed_bufalloc) # E: bool +reveal_type(nditer_obj.has_index) # E: bool +reveal_type(nditer_obj.has_multi_index) # E: bool +reveal_type(nditer_obj.index) # E: int +reveal_type(nditer_obj.iterationneedsapi) # E: bool +reveal_type(nditer_obj.iterindex) # E: int +reveal_type(nditer_obj.iterrange) # E: tuple[builtins.int] +reveal_type(nditer_obj.itersize) # E: int +reveal_type(nditer_obj.itviews) # E: tuple[ndarray[Any, dtype[Any]]] +reveal_type(nditer_obj.multi_index) # E: tuple[builtins.int] +reveal_type(nditer_obj.ndim) # E: int +reveal_type(nditer_obj.nop) # E: int +reveal_type(nditer_obj.operands) # E: tuple[ndarray[Any, dtype[Any]]] +reveal_type(nditer_obj.shape) # E: tuple[builtins.int] +reveal_type(nditer_obj.value) # E: tuple[ndarray[Any, dtype[Any]]] + +reveal_type(nditer_obj.close()) # E: None +reveal_type(nditer_obj.copy()) # E: nditer +reveal_type(nditer_obj.debug_print()) # E: None +reveal_type(nditer_obj.enable_external_loop()) # E: None +reveal_type(nditer_obj.iternext()) # E: bool +reveal_type(nditer_obj.remove_axis(0)) # E: None +reveal_type(nditer_obj.remove_multi_index()) # E: None +reveal_type(nditer_obj.reset()) # E: None + +reveal_type(len(nditer_obj)) # E: int +reveal_type(iter(nditer_obj)) # E: Iterator[builtins.tuple[ndarray[Any, dtype[Any]]]] +reveal_type(next(nditer_obj)) # E: tuple[ndarray[Any, dtype[Any]]] +reveal_type(nditer_obj.__copy__()) # E: nditer +with nditer_obj as f: + reveal_type(f) # E: nditer +reveal_type(nditer_obj[0]) # E: ndarray[Any, dtype[Any]] +reveal_type(nditer_obj[:]) # E: tuple[ndarray[Any, dtype[Any]]] +nditer_obj[0] = 0 +nditer_obj[:] = [0, 1] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/nested_sequence.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/nested_sequence.pyi new file mode 100644 index 0000000..4d3aad4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/nested_sequence.pyi @@ -0,0 +1,24 @@ +from typing import Sequence, Tuple, List, Any +import numpy.typing as npt + +a: Sequence[int] +b: Sequence[Sequence[int]] +c: Sequence[Sequence[Sequence[int]]] +d: Sequence[Sequence[Sequence[Sequence[int]]]] +e: Sequence[bool] +f: Tuple[int, ...] +g: List[int] +h: Sequence[Any] + +def func(a: npt._NestedSequence[int]) -> None: + ... + +reveal_type(func(a)) # E: None +reveal_type(func(b)) # E: None +reveal_type(func(c)) # E: None +reveal_type(func(d)) # E: None +reveal_type(func(e)) # E: None +reveal_type(func(f)) # E: None +reveal_type(func(g)) # E: None +reveal_type(func(h)) # E: None +reveal_type(func(range(15))) # E: None diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/npyio.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/npyio.pyi new file mode 100644 index 0000000..637bdb6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/npyio.pyi @@ -0,0 +1,92 @@ +import re +import pathlib +from typing import IO, List + +import numpy.typing as npt +import numpy as np + +str_path: str +pathlib_path: pathlib.Path +str_file: IO[str] +bytes_file: IO[bytes] + +bag_obj: np.lib.npyio.BagObj[int] +npz_file: np.lib.npyio.NpzFile + +AR_i8: npt.NDArray[np.int64] +AR_LIKE_f8: List[float] + +class BytesWriter: + def write(self, data: bytes) -> None: ... + +class BytesReader: + def read(self, n: int = ...) -> bytes: ... + def seek(self, offset: int, whence: int = ...) -> int: ... + +bytes_writer: BytesWriter +bytes_reader: BytesReader + +reveal_type(bag_obj.a) # E: int +reveal_type(bag_obj.b) # E: int + +reveal_type(npz_file.zip) # E: zipfile.ZipFile +reveal_type(npz_file.fid) # E: Union[None, typing.IO[builtins.str]] +reveal_type(npz_file.files) # E: list[builtins.str] +reveal_type(npz_file.allow_pickle) # E: bool +reveal_type(npz_file.pickle_kwargs) # E: Union[None, typing.Mapping[builtins.str, Any]] +reveal_type(npz_file.f) # E: lib.npyio.BagObj[lib.npyio.NpzFile] +reveal_type(npz_file["test"]) # E: ndarray[Any, dtype[Any]] +reveal_type(len(npz_file)) # E: int +with npz_file as f: + reveal_type(f) # E: lib.npyio.NpzFile + +reveal_type(np.load(bytes_file)) # E: Any +reveal_type(np.load(pathlib_path, allow_pickle=True)) # E: Any +reveal_type(np.load(str_path, encoding="bytes")) # E: Any +reveal_type(np.load(bytes_reader)) # E: Any + +reveal_type(np.save(bytes_file, AR_LIKE_f8)) # E: None +reveal_type(np.save(pathlib_path, AR_i8, allow_pickle=True)) # E: None +reveal_type(np.save(str_path, AR_LIKE_f8)) # E: None +reveal_type(np.save(bytes_writer, AR_LIKE_f8)) # E: None + +reveal_type(np.savez(bytes_file, AR_LIKE_f8)) # E: None +reveal_type(np.savez(pathlib_path, ar1=AR_i8, ar2=AR_i8)) # E: None +reveal_type(np.savez(str_path, AR_LIKE_f8, ar1=AR_i8)) # E: None +reveal_type(np.savez(bytes_writer, AR_LIKE_f8, ar1=AR_i8)) # E: None + +reveal_type(np.savez_compressed(bytes_file, AR_LIKE_f8)) # E: None +reveal_type(np.savez_compressed(pathlib_path, ar1=AR_i8, ar2=AR_i8)) # E: None +reveal_type(np.savez_compressed(str_path, AR_LIKE_f8, ar1=AR_i8)) # E: None +reveal_type(np.savez_compressed(bytes_writer, AR_LIKE_f8, ar1=AR_i8)) # E: None + +reveal_type(np.loadtxt(bytes_file)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.loadtxt(pathlib_path, dtype=np.str_)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.loadtxt(str_path, dtype=str, skiprows=2)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.loadtxt(str_file, comments="test")) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.loadtxt(str_file, comments=None)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.loadtxt(str_path, delimiter="\n")) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.loadtxt(str_path, ndmin=2)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.loadtxt(["1", "2", "3"])) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(np.fromregex(bytes_file, "test", np.float64)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.fromregex(str_file, b"test", dtype=float)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.fromregex(str_path, re.compile("test"), dtype=np.str_, encoding="utf8")) # E: ndarray[Any, dtype[str_]] +reveal_type(np.fromregex(pathlib_path, "test", np.float64)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.fromregex(bytes_reader, "test", np.float64)) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(np.genfromtxt(bytes_file)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.genfromtxt(pathlib_path, dtype=np.str_)) # E: ndarray[Any, dtype[str_]] +reveal_type(np.genfromtxt(str_path, dtype=str, skiprows=2)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.genfromtxt(str_file, comments="test")) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.genfromtxt(str_path, delimiter="\n")) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.genfromtxt(str_path, ndmin=2)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.genfromtxt(["1", "2", "3"], ndmin=2)) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(np.recfromtxt(bytes_file)) # E: recarray[Any, dtype[record]] +reveal_type(np.recfromtxt(pathlib_path, usemask=True)) # E: ma.mrecords.MaskedRecords[Any, dtype[void]] +reveal_type(np.recfromtxt(["1", "2", "3"])) # E: recarray[Any, dtype[record]] + +reveal_type(np.recfromcsv(bytes_file)) # E: recarray[Any, dtype[record]] +reveal_type(np.recfromcsv(pathlib_path, usemask=True)) # E: ma.mrecords.MaskedRecords[Any, dtype[void]] +reveal_type(np.recfromcsv(["1", "2", "3"])) # E: recarray[Any, dtype[record]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/numeric.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/numeric.pyi new file mode 100644 index 0000000..bf56539 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/numeric.pyi @@ -0,0 +1,134 @@ +""" +Tests for :mod:`core.numeric`. + +Does not include tests which fall under ``array_constructors``. + +""" + +from typing import List +import numpy as np +import numpy.typing as npt + +class SubClass(npt.NDArray[np.int64]): + ... + +i8: np.int64 + +AR_b: npt.NDArray[np.bool_] +AR_u8: npt.NDArray[np.uint64] +AR_i8: npt.NDArray[np.int64] +AR_f8: npt.NDArray[np.float64] +AR_c16: npt.NDArray[np.complex128] +AR_m: npt.NDArray[np.timedelta64] +AR_O: npt.NDArray[np.object_] + +B: List[int] +C: SubClass + +reveal_type(np.count_nonzero(i8)) # E: int +reveal_type(np.count_nonzero(AR_i8)) # E: int +reveal_type(np.count_nonzero(B)) # E: int +reveal_type(np.count_nonzero(AR_i8, keepdims=True)) # E: Any +reveal_type(np.count_nonzero(AR_i8, axis=0)) # E: Any + +reveal_type(np.isfortran(i8)) # E: bool +reveal_type(np.isfortran(AR_i8)) # E: bool + +reveal_type(np.argwhere(i8)) # E: ndarray[Any, dtype[{intp}]] +reveal_type(np.argwhere(AR_i8)) # E: ndarray[Any, dtype[{intp}]] + +reveal_type(np.flatnonzero(i8)) # E: ndarray[Any, dtype[{intp}]] +reveal_type(np.flatnonzero(AR_i8)) # E: ndarray[Any, dtype[{intp}]] + +reveal_type(np.correlate(B, AR_i8, mode="valid")) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.correlate(AR_i8, AR_i8, mode="same")) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.correlate(AR_b, AR_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.correlate(AR_b, AR_u8)) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(np.correlate(AR_i8, AR_b)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.correlate(AR_i8, AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.correlate(AR_i8, AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.correlate(AR_i8, AR_m)) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(np.correlate(AR_O, AR_O)) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.convolve(B, AR_i8, mode="valid")) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.convolve(AR_i8, AR_i8, mode="same")) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.convolve(AR_b, AR_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.convolve(AR_b, AR_u8)) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(np.convolve(AR_i8, AR_b)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.convolve(AR_i8, AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.convolve(AR_i8, AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.convolve(AR_i8, AR_m)) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(np.convolve(AR_O, AR_O)) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.outer(i8, AR_i8)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.outer(B, AR_i8)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.outer(AR_i8, AR_i8)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.outer(AR_i8, AR_i8, out=C)) # E: SubClass +reveal_type(np.outer(AR_b, AR_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.outer(AR_b, AR_u8)) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(np.outer(AR_i8, AR_b)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.convolve(AR_i8, AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.outer(AR_i8, AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.outer(AR_i8, AR_m)) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(np.outer(AR_O, AR_O)) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.tensordot(B, AR_i8)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.tensordot(AR_i8, AR_i8)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.tensordot(AR_i8, AR_i8, axes=0)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.tensordot(AR_i8, AR_i8, axes=(0, 1))) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.tensordot(AR_b, AR_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.tensordot(AR_b, AR_u8)) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(np.tensordot(AR_i8, AR_b)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.tensordot(AR_i8, AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.tensordot(AR_i8, AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.tensordot(AR_i8, AR_m)) # E: ndarray[Any, dtype[timedelta64]] +reveal_type(np.tensordot(AR_O, AR_O)) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.isscalar(i8)) # E: bool +reveal_type(np.isscalar(AR_i8)) # E: bool +reveal_type(np.isscalar(B)) # E: bool + +reveal_type(np.roll(AR_i8, 1)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.roll(AR_i8, (1, 2))) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.roll(B, 1)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.rollaxis(AR_i8, 0, 1)) # E: ndarray[Any, dtype[{int64}]] + +reveal_type(np.moveaxis(AR_i8, 0, 1)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.moveaxis(AR_i8, (0, 1), (1, 2))) # E: ndarray[Any, dtype[{int64}]] + +reveal_type(np.cross(B, AR_i8)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.cross(AR_i8, AR_i8)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.cross(AR_b, AR_u8)) # E: ndarray[Any, dtype[unsignedinteger[Any]]] +reveal_type(np.cross(AR_i8, AR_b)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.cross(AR_i8, AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.cross(AR_i8, AR_c16)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.cross(AR_O, AR_O)) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.indices([0, 1, 2])) # E: ndarray[Any, dtype[{int_}]] +reveal_type(np.indices([0, 1, 2], sparse=True)) # E: tuple[ndarray[Any, dtype[{int_}]]] +reveal_type(np.indices([0, 1, 2], dtype=np.float64)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.indices([0, 1, 2], sparse=True, dtype=np.float64)) # E: tuple[ndarray[Any, dtype[{float64}]]] +reveal_type(np.indices([0, 1, 2], dtype=float)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.indices([0, 1, 2], sparse=True, dtype=float)) # E: tuple[ndarray[Any, dtype[Any]]] + +reveal_type(np.binary_repr(1)) # E: str + +reveal_type(np.base_repr(1)) # E: str + +reveal_type(np.allclose(i8, AR_i8)) # E: bool +reveal_type(np.allclose(B, AR_i8)) # E: bool +reveal_type(np.allclose(AR_i8, AR_i8)) # E: bool + +reveal_type(np.isclose(i8, i8)) # E: bool_ +reveal_type(np.isclose(i8, AR_i8)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.isclose(B, AR_i8)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.isclose(AR_i8, AR_i8)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.array_equal(i8, AR_i8)) # E: bool +reveal_type(np.array_equal(B, AR_i8)) # E: bool +reveal_type(np.array_equal(AR_i8, AR_i8)) # E: bool + +reveal_type(np.array_equiv(i8, AR_i8)) # E: bool +reveal_type(np.array_equiv(B, AR_i8)) # E: bool +reveal_type(np.array_equiv(AR_i8, AR_i8)) # E: bool diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/numerictypes.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/numerictypes.pyi new file mode 100644 index 0000000..cc23352 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/numerictypes.pyi @@ -0,0 +1,42 @@ +import numpy as np + +reveal_type(np.maximum_sctype(np.float64)) # E: Type[{float64}] +reveal_type(np.maximum_sctype("f8")) # E: Type[Any] + +reveal_type(np.issctype(np.float64)) # E: bool +reveal_type(np.issctype("foo")) # E: Literal[False] + +reveal_type(np.obj2sctype(np.float64)) # E: Union[None, Type[{float64}]] +reveal_type(np.obj2sctype(np.float64, default=False)) # E: Union[builtins.bool, Type[{float64}]] +reveal_type(np.obj2sctype("S8")) # E: Union[None, Type[Any]] +reveal_type(np.obj2sctype("S8", default=None)) # E: Union[None, Type[Any]] +reveal_type(np.obj2sctype("foo", default=False)) # E: Union[builtins.bool, Type[Any]] +reveal_type(np.obj2sctype(1)) # E: None +reveal_type(np.obj2sctype(1, default=False)) # E: bool + +reveal_type(np.issubclass_(np.float64, float)) # E: bool +reveal_type(np.issubclass_(np.float64, (int, float))) # E: bool +reveal_type(np.issubclass_(1, 1)) # E: Literal[False] + +reveal_type(np.sctype2char("S8")) # E: str +reveal_type(np.sctype2char(list)) # E: str + +reveal_type(np.find_common_type([np.int64], [np.int64])) # E: dtype[Any] + +reveal_type(np.cast[int]) # E: _CastFunc +reveal_type(np.cast["i8"]) # E: _CastFunc +reveal_type(np.cast[np.int64]) # E: _CastFunc + +reveal_type(np.nbytes[int]) # E: int +reveal_type(np.nbytes["i8"]) # E: int +reveal_type(np.nbytes[np.int64]) # E: int + +reveal_type(np.ScalarType) # E: Tuple +reveal_type(np.ScalarType[0]) # E: Type[builtins.int] +reveal_type(np.ScalarType[4]) # E: Type[builtins.bool] +reveal_type(np.ScalarType[9]) # E: Type[{csingle}] +reveal_type(np.ScalarType[11]) # E: Type[{clongdouble}] + +reveal_type(np.typecodes["Character"]) # E: Literal['c'] +reveal_type(np.typecodes["Complex"]) # E: Literal['FDG'] +reveal_type(np.typecodes["All"]) # E: Literal['?bhilqpBHILQPefdgFDGSUVOMm'] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/random.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/random.pyi new file mode 100644 index 0000000..4e06aa7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/random.pyi @@ -0,0 +1,1539 @@ +from __future__ import annotations + +from typing import Any, List + +import numpy as np + +def_rng = np.random.default_rng() +seed_seq = np.random.SeedSequence() +mt19937 = np.random.MT19937() +pcg64 = np.random.PCG64() +sfc64 = np.random.SFC64() +philox = np.random.Philox() +seedless_seq = np.random.bit_generator.SeedlessSeedSequence() + +reveal_type(def_rng) # E: random._generator.Generator +reveal_type(mt19937) # E: random._mt19937.MT19937 +reveal_type(pcg64) # E: random._pcg64.PCG64 +reveal_type(sfc64) # E: random._sfc64.SFC64 +reveal_type(philox) # E: random._philox.Philox +reveal_type(seed_seq) # E: random.bit_generator.SeedSequence +reveal_type(seedless_seq) # E: random.bit_generator.SeedlessSeedSequence + +mt19937_jumped = mt19937.jumped() +mt19937_jumped3 = mt19937.jumped(3) +mt19937_raw = mt19937.random_raw() +mt19937_raw_arr = mt19937.random_raw(5) + +reveal_type(mt19937_jumped) # E: random._mt19937.MT19937 +reveal_type(mt19937_jumped3) # E: random._mt19937.MT19937 +reveal_type(mt19937_raw) # E: int +reveal_type(mt19937_raw_arr) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(mt19937.lock) # E: threading.Lock + +pcg64_jumped = pcg64.jumped() +pcg64_jumped3 = pcg64.jumped(3) +pcg64_adv = pcg64.advance(3) +pcg64_raw = pcg64.random_raw() +pcg64_raw_arr = pcg64.random_raw(5) + +reveal_type(pcg64_jumped) # E: random._pcg64.PCG64 +reveal_type(pcg64_jumped3) # E: random._pcg64.PCG64 +reveal_type(pcg64_adv) # E: random._pcg64.PCG64 +reveal_type(pcg64_raw) # E: int +reveal_type(pcg64_raw_arr) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(pcg64.lock) # E: threading.Lock + +philox_jumped = philox.jumped() +philox_jumped3 = philox.jumped(3) +philox_adv = philox.advance(3) +philox_raw = philox.random_raw() +philox_raw_arr = philox.random_raw(5) + +reveal_type(philox_jumped) # E: random._philox.Philox +reveal_type(philox_jumped3) # E: random._philox.Philox +reveal_type(philox_adv) # E: random._philox.Philox +reveal_type(philox_raw) # E: int +reveal_type(philox_raw_arr) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(philox.lock) # E: threading.Lock + +sfc64_raw = sfc64.random_raw() +sfc64_raw_arr = sfc64.random_raw(5) + +reveal_type(sfc64_raw) # E: int +reveal_type(sfc64_raw_arr) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(sfc64.lock) # E: threading.Lock + +reveal_type(seed_seq.pool) # ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(seed_seq.entropy) # E:Union[None, int, Sequence[int]] +reveal_type(seed_seq.spawn(1)) # E: list[random.bit_generator.SeedSequence] +reveal_type(seed_seq.generate_state(8, "uint32")) # E: ndarray[Any, dtype[Union[unsignedinteger[typing._32Bit], unsignedinteger[typing._64Bit]]]] +reveal_type(seed_seq.generate_state(8, "uint64")) # E: ndarray[Any, dtype[Union[unsignedinteger[typing._32Bit], unsignedinteger[typing._64Bit]]]] + + +def_gen: np.random.Generator = np.random.default_rng() + +D_arr_0p1: np.ndarray[Any, np.dtype[np.float64]] = np.array([0.1]) +D_arr_0p5: np.ndarray[Any, np.dtype[np.float64]] = np.array([0.5]) +D_arr_0p9: np.ndarray[Any, np.dtype[np.float64]] = np.array([0.9]) +D_arr_1p5: np.ndarray[Any, np.dtype[np.float64]] = np.array([1.5]) +I_arr_10: np.ndarray[Any, np.dtype[np.int_]] = np.array([10], dtype=np.int_) +I_arr_20: np.ndarray[Any, np.dtype[np.int_]] = np.array([20], dtype=np.int_) +D_arr_like_0p1: List[float] = [0.1] +D_arr_like_0p5: List[float] = [0.5] +D_arr_like_0p9: List[float] = [0.9] +D_arr_like_1p5: List[float] = [1.5] +I_arr_like_10: List[int] = [10] +I_arr_like_20: List[int] = [20] +D_2D_like: List[List[float]] = [[1, 2], [2, 3], [3, 4], [4, 5.1]] +D_2D: np.ndarray[Any, np.dtype[np.float64]] = np.array(D_2D_like) +S_out: np.ndarray[Any, np.dtype[np.float32]] = np.empty(1, dtype=np.float32) +D_out: np.ndarray[Any, np.dtype[np.float64]] = np.empty(1) + +reveal_type(def_gen.standard_normal()) # E: float +reveal_type(def_gen.standard_normal(dtype=np.float32)) # E: float +reveal_type(def_gen.standard_normal(dtype="float32")) # E: float +reveal_type(def_gen.standard_normal(dtype="double")) # E: float +reveal_type(def_gen.standard_normal(dtype=np.float64)) # E: float +reveal_type(def_gen.standard_normal(size=None)) # E: float +reveal_type(def_gen.standard_normal(size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_normal(size=1, dtype=np.float32)) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.standard_normal(size=1, dtype="f4")) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.standard_normal(size=1, dtype="float32", out=S_out)) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.standard_normal(dtype=np.float32, out=S_out)) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.standard_normal(size=1, dtype=np.float64)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_normal(size=1, dtype="float64")) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_normal(size=1, dtype="f8")) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_normal(out=D_out)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_normal(size=1, dtype="float64")) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_normal(size=1, dtype="float64", out=D_out)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] + +reveal_type(def_gen.random()) # E: float +reveal_type(def_gen.random(dtype=np.float32)) # E: float +reveal_type(def_gen.random(dtype="float32")) # E: float +reveal_type(def_gen.random(dtype="double")) # E: float +reveal_type(def_gen.random(dtype=np.float64)) # E: float +reveal_type(def_gen.random(size=None)) # E: float +reveal_type(def_gen.random(size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.random(size=1, dtype=np.float32)) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.random(size=1, dtype="f4")) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.random(size=1, dtype="float32", out=S_out)) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.random(dtype=np.float32, out=S_out)) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.random(size=1, dtype=np.float64)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.random(size=1, dtype="float64")) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.random(size=1, dtype="f8")) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.random(out=D_out)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.random(size=1, dtype="float64")) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.random(size=1, dtype="float64", out=D_out)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] + +reveal_type(def_gen.standard_cauchy()) # E: float +reveal_type(def_gen.standard_cauchy(size=None)) # E: float +reveal_type(def_gen.standard_cauchy(size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.standard_exponential()) # E: float +reveal_type(def_gen.standard_exponential(method="inv")) # E: float +reveal_type(def_gen.standard_exponential(dtype=np.float32)) # E: float +reveal_type(def_gen.standard_exponential(dtype="float32")) # E: float +reveal_type(def_gen.standard_exponential(dtype="double")) # E: float +reveal_type(def_gen.standard_exponential(dtype=np.float64)) # E: float +reveal_type(def_gen.standard_exponential(size=None)) # E: float +reveal_type(def_gen.standard_exponential(size=None, method="inv")) # E: float +reveal_type(def_gen.standard_exponential(size=1, method="inv")) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_exponential(size=1, dtype=np.float32)) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.standard_exponential(size=1, dtype="f4", method="inv")) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.standard_exponential(size=1, dtype="float32", out=S_out)) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.standard_exponential(dtype=np.float32, out=S_out)) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.standard_exponential(size=1, dtype=np.float64, method="inv")) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_exponential(size=1, dtype="float64")) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_exponential(size=1, dtype="f8")) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_exponential(out=D_out)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_exponential(size=1, dtype="float64")) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_exponential(size=1, dtype="float64", out=D_out)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] + +reveal_type(def_gen.zipf(1.5)) # E: int +reveal_type(def_gen.zipf(1.5, size=None)) # E: int +reveal_type(def_gen.zipf(1.5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.zipf(D_arr_1p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.zipf(D_arr_1p5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.zipf(D_arr_like_1p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.zipf(D_arr_like_1p5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +reveal_type(def_gen.weibull(0.5)) # E: float +reveal_type(def_gen.weibull(0.5, size=None)) # E: float +reveal_type(def_gen.weibull(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.weibull(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.weibull(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.weibull(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.weibull(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.standard_t(0.5)) # E: float +reveal_type(def_gen.standard_t(0.5, size=None)) # E: float +reveal_type(def_gen.standard_t(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.standard_t(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.standard_t(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.standard_t(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.standard_t(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.poisson(0.5)) # E: int +reveal_type(def_gen.poisson(0.5, size=None)) # E: int +reveal_type(def_gen.poisson(0.5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.poisson(D_arr_0p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.poisson(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.poisson(D_arr_like_0p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.poisson(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +reveal_type(def_gen.power(0.5)) # E: float +reveal_type(def_gen.power(0.5, size=None)) # E: float +reveal_type(def_gen.power(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.power(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.power(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.power(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.power(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.pareto(0.5)) # E: float +reveal_type(def_gen.pareto(0.5, size=None)) # E: float +reveal_type(def_gen.pareto(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.pareto(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.pareto(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.pareto(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.pareto(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.chisquare(0.5)) # E: float +reveal_type(def_gen.chisquare(0.5, size=None)) # E: float +reveal_type(def_gen.chisquare(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.chisquare(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.chisquare(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.chisquare(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.chisquare(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.exponential(0.5)) # E: float +reveal_type(def_gen.exponential(0.5, size=None)) # E: float +reveal_type(def_gen.exponential(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.exponential(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.exponential(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.exponential(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.exponential(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.geometric(0.5)) # E: int +reveal_type(def_gen.geometric(0.5, size=None)) # E: int +reveal_type(def_gen.geometric(0.5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.geometric(D_arr_0p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.geometric(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.geometric(D_arr_like_0p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.geometric(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +reveal_type(def_gen.logseries(0.5)) # E: int +reveal_type(def_gen.logseries(0.5, size=None)) # E: int +reveal_type(def_gen.logseries(0.5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.logseries(D_arr_0p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.logseries(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.logseries(D_arr_like_0p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.logseries(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +reveal_type(def_gen.rayleigh(0.5)) # E: float +reveal_type(def_gen.rayleigh(0.5, size=None)) # E: float +reveal_type(def_gen.rayleigh(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.rayleigh(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.rayleigh(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.rayleigh(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.rayleigh(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.standard_gamma(0.5)) # E: float +reveal_type(def_gen.standard_gamma(0.5, size=None)) # E: float +reveal_type(def_gen.standard_gamma(0.5, dtype="float32")) # E: float +reveal_type(def_gen.standard_gamma(0.5, size=None, dtype="float32")) # E: float +reveal_type(def_gen.standard_gamma(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_gamma(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_gamma(D_arr_0p5, dtype="f4")) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.standard_gamma(0.5, size=1, dtype="float32", out=S_out)) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.standard_gamma(D_arr_0p5, dtype=np.float32, out=S_out)) # E: ndarray[Any, dtype[floating[typing._32Bit]]] +reveal_type(def_gen.standard_gamma(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_gamma(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_gamma(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_gamma(0.5, out=D_out)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_gamma(D_arr_like_0p5, out=D_out)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_gamma(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(def_gen.standard_gamma(D_arr_like_0p5, size=1, out=D_out, dtype=np.float64)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] + +reveal_type(def_gen.vonmises(0.5, 0.5)) # E: float +reveal_type(def_gen.vonmises(0.5, 0.5, size=None)) # E: float +reveal_type(def_gen.vonmises(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.vonmises(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.vonmises(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.vonmises(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.vonmises(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.vonmises(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.vonmises(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.vonmises(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.vonmises(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.vonmises(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.vonmises(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.wald(0.5, 0.5)) # E: float +reveal_type(def_gen.wald(0.5, 0.5, size=None)) # E: float +reveal_type(def_gen.wald(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.wald(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.wald(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.wald(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.wald(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.wald(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.wald(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.wald(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.wald(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.wald(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.wald(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.uniform(0.5, 0.5)) # E: float +reveal_type(def_gen.uniform(0.5, 0.5, size=None)) # E: float +reveal_type(def_gen.uniform(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.uniform(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.uniform(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.uniform(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.uniform(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.uniform(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.uniform(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.uniform(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.uniform(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.uniform(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.uniform(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.beta(0.5, 0.5)) # E: float +reveal_type(def_gen.beta(0.5, 0.5, size=None)) # E: float +reveal_type(def_gen.beta(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.beta(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.beta(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.beta(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.beta(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.beta(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.beta(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.beta(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.beta(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.beta(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.beta(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.f(0.5, 0.5)) # E: float +reveal_type(def_gen.f(0.5, 0.5, size=None)) # E: float +reveal_type(def_gen.f(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.f(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.f(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.f(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.f(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.f(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.f(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.f(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.f(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.f(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.f(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.gamma(0.5, 0.5)) # E: float +reveal_type(def_gen.gamma(0.5, 0.5, size=None)) # E: float +reveal_type(def_gen.gamma(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gamma(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gamma(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gamma(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gamma(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gamma(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gamma(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gamma(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gamma(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gamma(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gamma(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.gumbel(0.5, 0.5)) # E: float +reveal_type(def_gen.gumbel(0.5, 0.5, size=None)) # E: float +reveal_type(def_gen.gumbel(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gumbel(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gumbel(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gumbel(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gumbel(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gumbel(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gumbel(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gumbel(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gumbel(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gumbel(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.gumbel(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.laplace(0.5, 0.5)) # E: float +reveal_type(def_gen.laplace(0.5, 0.5, size=None)) # E: float +reveal_type(def_gen.laplace(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.laplace(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.laplace(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.laplace(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.laplace(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.laplace(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.laplace(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.laplace(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.laplace(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.laplace(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.laplace(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.logistic(0.5, 0.5)) # E: float +reveal_type(def_gen.logistic(0.5, 0.5, size=None)) # E: float +reveal_type(def_gen.logistic(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.logistic(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.logistic(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.logistic(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.logistic(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.logistic(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.logistic(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.logistic(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.logistic(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.logistic(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.logistic(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.lognormal(0.5, 0.5)) # E: float +reveal_type(def_gen.lognormal(0.5, 0.5, size=None)) # E: float +reveal_type(def_gen.lognormal(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.lognormal(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.lognormal(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.lognormal(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.lognormal(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.lognormal(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.lognormal(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.lognormal(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.lognormal(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.lognormal(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.lognormal(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.noncentral_chisquare(0.5, 0.5)) # E: float +reveal_type(def_gen.noncentral_chisquare(0.5, 0.5, size=None)) # E: float +reveal_type(def_gen.noncentral_chisquare(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_chisquare(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_chisquare(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_chisquare(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_chisquare(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_chisquare(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_chisquare(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_chisquare(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_chisquare(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_chisquare(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_chisquare(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.normal(0.5, 0.5)) # E: float +reveal_type(def_gen.normal(0.5, 0.5, size=None)) # E: float +reveal_type(def_gen.normal(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.normal(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.normal(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.normal(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.normal(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.normal(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.normal(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.normal(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.normal(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.normal(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.normal(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.triangular(0.1, 0.5, 0.9)) # E: float +reveal_type(def_gen.triangular(0.1, 0.5, 0.9, size=None)) # E: float +reveal_type(def_gen.triangular(0.1, 0.5, 0.9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.triangular(D_arr_0p1, 0.5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.triangular(0.1, D_arr_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.triangular(D_arr_0p1, 0.5, D_arr_like_0p9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.triangular(0.1, D_arr_0p5, 0.9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.triangular(D_arr_like_0p1, 0.5, D_arr_0p9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.triangular(0.5, D_arr_like_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.triangular(D_arr_0p1, D_arr_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.triangular(D_arr_like_0p1, D_arr_like_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.triangular(D_arr_0p1, D_arr_0p5, D_arr_0p9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.triangular(D_arr_like_0p1, D_arr_like_0p5, D_arr_like_0p9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.noncentral_f(0.1, 0.5, 0.9)) # E: float +reveal_type(def_gen.noncentral_f(0.1, 0.5, 0.9, size=None)) # E: float +reveal_type(def_gen.noncentral_f(0.1, 0.5, 0.9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_f(D_arr_0p1, 0.5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_f(0.1, D_arr_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_f(D_arr_0p1, 0.5, D_arr_like_0p9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_f(0.1, D_arr_0p5, 0.9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_f(D_arr_like_0p1, 0.5, D_arr_0p9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_f(0.5, D_arr_like_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_f(D_arr_0p1, D_arr_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_f(D_arr_like_0p1, D_arr_like_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_f(D_arr_0p1, D_arr_0p5, D_arr_0p9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.noncentral_f(D_arr_like_0p1, D_arr_like_0p5, D_arr_like_0p9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.binomial(10, 0.5)) # E: int +reveal_type(def_gen.binomial(10, 0.5, size=None)) # E: int +reveal_type(def_gen.binomial(10, 0.5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.binomial(I_arr_10, 0.5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.binomial(10, D_arr_0p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.binomial(I_arr_10, 0.5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.binomial(10, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.binomial(I_arr_like_10, 0.5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.binomial(10, D_arr_like_0p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.binomial(I_arr_10, D_arr_0p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.binomial(I_arr_like_10, D_arr_like_0p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.binomial(I_arr_10, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.binomial(I_arr_like_10, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +reveal_type(def_gen.negative_binomial(10, 0.5)) # E: int +reveal_type(def_gen.negative_binomial(10, 0.5, size=None)) # E: int +reveal_type(def_gen.negative_binomial(10, 0.5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.negative_binomial(I_arr_10, 0.5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.negative_binomial(10, D_arr_0p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.negative_binomial(I_arr_10, 0.5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.negative_binomial(10, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.negative_binomial(I_arr_like_10, 0.5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.negative_binomial(10, D_arr_like_0p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.negative_binomial(I_arr_10, D_arr_0p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.negative_binomial(I_arr_like_10, D_arr_like_0p5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.negative_binomial(I_arr_10, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.negative_binomial(I_arr_like_10, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +reveal_type(def_gen.hypergeometric(20, 20, 10)) # E: int +reveal_type(def_gen.hypergeometric(20, 20, 10, size=None)) # E: int +reveal_type(def_gen.hypergeometric(20, 20, 10, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.hypergeometric(I_arr_20, 20, 10)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.hypergeometric(20, I_arr_20, 10)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.hypergeometric(I_arr_20, 20, I_arr_like_10, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.hypergeometric(20, I_arr_20, 10, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.hypergeometric(I_arr_like_20, 20, I_arr_10)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.hypergeometric(20, I_arr_like_20, 10)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.hypergeometric(I_arr_20, I_arr_20, 10)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.hypergeometric(I_arr_like_20, I_arr_like_20, 10)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.hypergeometric(I_arr_20, I_arr_20, I_arr_10, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.hypergeometric(I_arr_like_20, I_arr_like_20, I_arr_like_10, size=1)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +I_int64_100: np.ndarray[Any, np.dtype[np.int64]] = np.array([100], dtype=np.int64) + +reveal_type(def_gen.integers(0, 100)) # E: int +reveal_type(def_gen.integers(100)) # E: int +reveal_type(def_gen.integers([100])) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(0, [100])) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +I_bool_low: np.ndarray[Any, np.dtype[np.bool_]] = np.array([0], dtype=np.bool_) +I_bool_low_like: List[int] = [0] +I_bool_high_open: np.ndarray[Any, np.dtype[np.bool_]] = np.array([1], dtype=np.bool_) +I_bool_high_closed: np.ndarray[Any, np.dtype[np.bool_]] = np.array([1], dtype=np.bool_) + +reveal_type(def_gen.integers(2, dtype=bool)) # E: builtins.bool +reveal_type(def_gen.integers(0, 2, dtype=bool)) # E: builtins.bool +reveal_type(def_gen.integers(1, dtype=bool, endpoint=True)) # E: builtins.bool +reveal_type(def_gen.integers(0, 1, dtype=bool, endpoint=True)) # E: builtins.bool +reveal_type(def_gen.integers(I_bool_low_like, 1, dtype=bool, endpoint=True)) # E: ndarray[Any, dtype[bool_] +reveal_type(def_gen.integers(I_bool_high_open, dtype=bool)) # E: ndarray[Any, dtype[bool_] +reveal_type(def_gen.integers(I_bool_low, I_bool_high_open, dtype=bool)) # E: ndarray[Any, dtype[bool_] +reveal_type(def_gen.integers(0, I_bool_high_open, dtype=bool)) # E: ndarray[Any, dtype[bool_] +reveal_type(def_gen.integers(I_bool_high_closed, dtype=bool, endpoint=True)) # E: ndarray[Any, dtype[bool_] +reveal_type(def_gen.integers(I_bool_low, I_bool_high_closed, dtype=bool, endpoint=True)) # E: ndarray[Any, dtype[bool_] +reveal_type(def_gen.integers(0, I_bool_high_closed, dtype=bool, endpoint=True)) # E: ndarray[Any, dtype[bool_] + +reveal_type(def_gen.integers(2, dtype=np.bool_)) # E: builtins.bool +reveal_type(def_gen.integers(0, 2, dtype=np.bool_)) # E: builtins.bool +reveal_type(def_gen.integers(1, dtype=np.bool_, endpoint=True)) # E: builtins.bool +reveal_type(def_gen.integers(0, 1, dtype=np.bool_, endpoint=True)) # E: builtins.bool +reveal_type(def_gen.integers(I_bool_low_like, 1, dtype=np.bool_, endpoint=True)) # E: ndarray[Any, dtype[bool_] +reveal_type(def_gen.integers(I_bool_high_open, dtype=np.bool_)) # E: ndarray[Any, dtype[bool_] +reveal_type(def_gen.integers(I_bool_low, I_bool_high_open, dtype=np.bool_)) # E: ndarray[Any, dtype[bool_] +reveal_type(def_gen.integers(0, I_bool_high_open, dtype=np.bool_)) # E: ndarray[Any, dtype[bool_] +reveal_type(def_gen.integers(I_bool_high_closed, dtype=np.bool_, endpoint=True)) # E: ndarray[Any, dtype[bool_] +reveal_type(def_gen.integers(I_bool_low, I_bool_high_closed, dtype=np.bool_, endpoint=True)) # E: ndarray[Any, dtype[bool_] +reveal_type(def_gen.integers(0, I_bool_high_closed, dtype=np.bool_, endpoint=True)) # E: ndarray[Any, dtype[bool_] + +I_u1_low: np.ndarray[Any, np.dtype[np.uint8]] = np.array([0], dtype=np.uint8) +I_u1_low_like: List[int] = [0] +I_u1_high_open: np.ndarray[Any, np.dtype[np.uint8]] = np.array([255], dtype=np.uint8) +I_u1_high_closed: np.ndarray[Any, np.dtype[np.uint8]] = np.array([255], dtype=np.uint8) + +reveal_type(def_gen.integers(256, dtype="u1")) # E: int +reveal_type(def_gen.integers(0, 256, dtype="u1")) # E: int +reveal_type(def_gen.integers(255, dtype="u1", endpoint=True)) # E: int +reveal_type(def_gen.integers(0, 255, dtype="u1", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_u1_low_like, 255, dtype="u1", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_u1_high_open, dtype="u1")) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_u1_low, I_u1_high_open, dtype="u1")) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(0, I_u1_high_open, dtype="u1")) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_u1_high_closed, dtype="u1", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_u1_low, I_u1_high_closed, dtype="u1", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(0, I_u1_high_closed, dtype="u1", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] + +reveal_type(def_gen.integers(256, dtype="uint8")) # E: int +reveal_type(def_gen.integers(0, 256, dtype="uint8")) # E: int +reveal_type(def_gen.integers(255, dtype="uint8", endpoint=True)) # E: int +reveal_type(def_gen.integers(0, 255, dtype="uint8", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_u1_low_like, 255, dtype="uint8", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_u1_high_open, dtype="uint8")) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_u1_low, I_u1_high_open, dtype="uint8")) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(0, I_u1_high_open, dtype="uint8")) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_u1_high_closed, dtype="uint8", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_u1_low, I_u1_high_closed, dtype="uint8", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(0, I_u1_high_closed, dtype="uint8", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] + +reveal_type(def_gen.integers(256, dtype=np.uint8)) # E: int +reveal_type(def_gen.integers(0, 256, dtype=np.uint8)) # E: int +reveal_type(def_gen.integers(255, dtype=np.uint8, endpoint=True)) # E: int +reveal_type(def_gen.integers(0, 255, dtype=np.uint8, endpoint=True)) # E: int +reveal_type(def_gen.integers(I_u1_low_like, 255, dtype=np.uint8, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_u1_high_open, dtype=np.uint8)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_u1_low, I_u1_high_open, dtype=np.uint8)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(0, I_u1_high_open, dtype=np.uint8)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_u1_high_closed, dtype=np.uint8, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_u1_low, I_u1_high_closed, dtype=np.uint8, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(0, I_u1_high_closed, dtype=np.uint8, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] + +I_u2_low: np.ndarray[Any, np.dtype[np.uint16]] = np.array([0], dtype=np.uint16) +I_u2_low_like: List[int] = [0] +I_u2_high_open: np.ndarray[Any, np.dtype[np.uint16]] = np.array([65535], dtype=np.uint16) +I_u2_high_closed: np.ndarray[Any, np.dtype[np.uint16]] = np.array([65535], dtype=np.uint16) + +reveal_type(def_gen.integers(65536, dtype="u2")) # E: int +reveal_type(def_gen.integers(0, 65536, dtype="u2")) # E: int +reveal_type(def_gen.integers(65535, dtype="u2", endpoint=True)) # E: int +reveal_type(def_gen.integers(0, 65535, dtype="u2", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_u2_low_like, 65535, dtype="u2", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_u2_high_open, dtype="u2")) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_u2_low, I_u2_high_open, dtype="u2")) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(0, I_u2_high_open, dtype="u2")) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_u2_high_closed, dtype="u2", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_u2_low, I_u2_high_closed, dtype="u2", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(0, I_u2_high_closed, dtype="u2", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] + +reveal_type(def_gen.integers(65536, dtype="uint16")) # E: int +reveal_type(def_gen.integers(0, 65536, dtype="uint16")) # E: int +reveal_type(def_gen.integers(65535, dtype="uint16", endpoint=True)) # E: int +reveal_type(def_gen.integers(0, 65535, dtype="uint16", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_u2_low_like, 65535, dtype="uint16", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_u2_high_open, dtype="uint16")) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_u2_low, I_u2_high_open, dtype="uint16")) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(0, I_u2_high_open, dtype="uint16")) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_u2_high_closed, dtype="uint16", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_u2_low, I_u2_high_closed, dtype="uint16", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(0, I_u2_high_closed, dtype="uint16", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] + +reveal_type(def_gen.integers(65536, dtype=np.uint16)) # E: int +reveal_type(def_gen.integers(0, 65536, dtype=np.uint16)) # E: int +reveal_type(def_gen.integers(65535, dtype=np.uint16, endpoint=True)) # E: int +reveal_type(def_gen.integers(0, 65535, dtype=np.uint16, endpoint=True)) # E: int +reveal_type(def_gen.integers(I_u2_low_like, 65535, dtype=np.uint16, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_u2_high_open, dtype=np.uint16)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_u2_low, I_u2_high_open, dtype=np.uint16)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(0, I_u2_high_open, dtype=np.uint16)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_u2_high_closed, dtype=np.uint16, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_u2_low, I_u2_high_closed, dtype=np.uint16, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(0, I_u2_high_closed, dtype=np.uint16, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] + +I_u4_low: np.ndarray[Any, np.dtype[np.uint32]] = np.array([0], dtype=np.uint32) +I_u4_low_like: List[int] = [0] +I_u4_high_open: np.ndarray[Any, np.dtype[np.uint32]] = np.array([4294967295], dtype=np.uint32) +I_u4_high_closed: np.ndarray[Any, np.dtype[np.uint32]] = np.array([4294967295], dtype=np.uint32) + +reveal_type(def_gen.integers(4294967296, dtype=np.int_)) # E: int +reveal_type(def_gen.integers(0, 4294967296, dtype=np.int_)) # E: int +reveal_type(def_gen.integers(4294967295, dtype=np.int_, endpoint=True)) # E: int +reveal_type(def_gen.integers(0, 4294967295, dtype=np.int_, endpoint=True)) # E: int +reveal_type(def_gen.integers(I_u4_low_like, 4294967295, dtype=np.int_, endpoint=True)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(def_gen.integers(I_u4_high_open, dtype=np.int_)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(def_gen.integers(I_u4_low, I_u4_high_open, dtype=np.int_)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(def_gen.integers(0, I_u4_high_open, dtype=np.int_)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(def_gen.integers(I_u4_high_closed, dtype=np.int_, endpoint=True)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(def_gen.integers(I_u4_low, I_u4_high_closed, dtype=np.int_, endpoint=True)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(def_gen.integers(0, I_u4_high_closed, dtype=np.int_, endpoint=True)) # E: ndarray[Any, dtype[{int_}]] + + +reveal_type(def_gen.integers(4294967296, dtype="u4")) # E: int +reveal_type(def_gen.integers(0, 4294967296, dtype="u4")) # E: int +reveal_type(def_gen.integers(4294967295, dtype="u4", endpoint=True)) # E: int +reveal_type(def_gen.integers(0, 4294967295, dtype="u4", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_u4_low_like, 4294967295, dtype="u4", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_u4_high_open, dtype="u4")) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_u4_low, I_u4_high_open, dtype="u4")) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(0, I_u4_high_open, dtype="u4")) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_u4_high_closed, dtype="u4", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_u4_low, I_u4_high_closed, dtype="u4", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(0, I_u4_high_closed, dtype="u4", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] + +reveal_type(def_gen.integers(4294967296, dtype="uint32")) # E: int +reveal_type(def_gen.integers(0, 4294967296, dtype="uint32")) # E: int +reveal_type(def_gen.integers(4294967295, dtype="uint32", endpoint=True)) # E: int +reveal_type(def_gen.integers(0, 4294967295, dtype="uint32", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_u4_low_like, 4294967295, dtype="uint32", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_u4_high_open, dtype="uint32")) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_u4_low, I_u4_high_open, dtype="uint32")) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(0, I_u4_high_open, dtype="uint32")) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_u4_high_closed, dtype="uint32", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_u4_low, I_u4_high_closed, dtype="uint32", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(0, I_u4_high_closed, dtype="uint32", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] + +reveal_type(def_gen.integers(4294967296, dtype=np.uint32)) # E: int +reveal_type(def_gen.integers(0, 4294967296, dtype=np.uint32)) # E: int +reveal_type(def_gen.integers(4294967295, dtype=np.uint32, endpoint=True)) # E: int +reveal_type(def_gen.integers(0, 4294967295, dtype=np.uint32, endpoint=True)) # E: int +reveal_type(def_gen.integers(I_u4_low_like, 4294967295, dtype=np.uint32, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_u4_high_open, dtype=np.uint32)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_u4_low, I_u4_high_open, dtype=np.uint32)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(0, I_u4_high_open, dtype=np.uint32)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_u4_high_closed, dtype=np.uint32, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_u4_low, I_u4_high_closed, dtype=np.uint32, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(0, I_u4_high_closed, dtype=np.uint32, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] + +reveal_type(def_gen.integers(4294967296, dtype=np.uint)) # E: int +reveal_type(def_gen.integers(0, 4294967296, dtype=np.uint)) # E: int +reveal_type(def_gen.integers(4294967295, dtype=np.uint, endpoint=True)) # E: int +reveal_type(def_gen.integers(0, 4294967295, dtype=np.uint, endpoint=True)) # E: int +reveal_type(def_gen.integers(I_u4_low_like, 4294967295, dtype=np.uint, endpoint=True)) # E: ndarray[Any, dtype[{uint}]] +reveal_type(def_gen.integers(I_u4_high_open, dtype=np.uint)) # E: ndarray[Any, dtype[{uint}]] +reveal_type(def_gen.integers(I_u4_low, I_u4_high_open, dtype=np.uint)) # E: ndarray[Any, dtype[{uint}]] +reveal_type(def_gen.integers(0, I_u4_high_open, dtype=np.uint)) # E: ndarray[Any, dtype[{uint}]] +reveal_type(def_gen.integers(I_u4_high_closed, dtype=np.uint, endpoint=True)) # E: ndarray[Any, dtype[{uint}]] +reveal_type(def_gen.integers(I_u4_low, I_u4_high_closed, dtype=np.uint, endpoint=True)) # E: ndarray[Any, dtype[{uint}]] +reveal_type(def_gen.integers(0, I_u4_high_closed, dtype=np.uint, endpoint=True)) # E: ndarray[Any, dtype[{uint}]] + +I_u8_low: np.ndarray[Any, np.dtype[np.uint64]] = np.array([0], dtype=np.uint64) +I_u8_low_like: List[int] = [0] +I_u8_high_open: np.ndarray[Any, np.dtype[np.uint64]] = np.array([18446744073709551615], dtype=np.uint64) +I_u8_high_closed: np.ndarray[Any, np.dtype[np.uint64]] = np.array([18446744073709551615], dtype=np.uint64) + +reveal_type(def_gen.integers(18446744073709551616, dtype="u8")) # E: int +reveal_type(def_gen.integers(0, 18446744073709551616, dtype="u8")) # E: int +reveal_type(def_gen.integers(18446744073709551615, dtype="u8", endpoint=True)) # E: int +reveal_type(def_gen.integers(0, 18446744073709551615, dtype="u8", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_u8_low_like, 18446744073709551615, dtype="u8", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_u8_high_open, dtype="u8")) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_u8_low, I_u8_high_open, dtype="u8")) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(0, I_u8_high_open, dtype="u8")) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_u8_high_closed, dtype="u8", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_u8_low, I_u8_high_closed, dtype="u8", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(0, I_u8_high_closed, dtype="u8", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] + +reveal_type(def_gen.integers(18446744073709551616, dtype="uint64")) # E: int +reveal_type(def_gen.integers(0, 18446744073709551616, dtype="uint64")) # E: int +reveal_type(def_gen.integers(18446744073709551615, dtype="uint64", endpoint=True)) # E: int +reveal_type(def_gen.integers(0, 18446744073709551615, dtype="uint64", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_u8_low_like, 18446744073709551615, dtype="uint64", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_u8_high_open, dtype="uint64")) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_u8_low, I_u8_high_open, dtype="uint64")) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(0, I_u8_high_open, dtype="uint64")) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_u8_high_closed, dtype="uint64", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_u8_low, I_u8_high_closed, dtype="uint64", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(0, I_u8_high_closed, dtype="uint64", endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] + +reveal_type(def_gen.integers(18446744073709551616, dtype=np.uint64)) # E: int +reveal_type(def_gen.integers(0, 18446744073709551616, dtype=np.uint64)) # E: int +reveal_type(def_gen.integers(18446744073709551615, dtype=np.uint64, endpoint=True)) # E: int +reveal_type(def_gen.integers(0, 18446744073709551615, dtype=np.uint64, endpoint=True)) # E: int +reveal_type(def_gen.integers(I_u8_low_like, 18446744073709551615, dtype=np.uint64, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_u8_high_open, dtype=np.uint64)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_u8_low, I_u8_high_open, dtype=np.uint64)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(0, I_u8_high_open, dtype=np.uint64)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_u8_high_closed, dtype=np.uint64, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_u8_low, I_u8_high_closed, dtype=np.uint64, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(0, I_u8_high_closed, dtype=np.uint64, endpoint=True)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] + +I_i1_low: np.ndarray[Any, np.dtype[np.int8]] = np.array([-128], dtype=np.int8) +I_i1_low_like: List[int] = [-128] +I_i1_high_open: np.ndarray[Any, np.dtype[np.int8]] = np.array([127], dtype=np.int8) +I_i1_high_closed: np.ndarray[Any, np.dtype[np.int8]] = np.array([127], dtype=np.int8) + +reveal_type(def_gen.integers(128, dtype="i1")) # E: int +reveal_type(def_gen.integers(-128, 128, dtype="i1")) # E: int +reveal_type(def_gen.integers(127, dtype="i1", endpoint=True)) # E: int +reveal_type(def_gen.integers(-128, 127, dtype="i1", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_i1_low_like, 127, dtype="i1", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_i1_high_open, dtype="i1")) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_i1_low, I_i1_high_open, dtype="i1")) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(-128, I_i1_high_open, dtype="i1")) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_i1_high_closed, dtype="i1", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_i1_low, I_i1_high_closed, dtype="i1", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(-128, I_i1_high_closed, dtype="i1", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] + +reveal_type(def_gen.integers(128, dtype="int8")) # E: int +reveal_type(def_gen.integers(-128, 128, dtype="int8")) # E: int +reveal_type(def_gen.integers(127, dtype="int8", endpoint=True)) # E: int +reveal_type(def_gen.integers(-128, 127, dtype="int8", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_i1_low_like, 127, dtype="int8", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_i1_high_open, dtype="int8")) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_i1_low, I_i1_high_open, dtype="int8")) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(-128, I_i1_high_open, dtype="int8")) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_i1_high_closed, dtype="int8", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_i1_low, I_i1_high_closed, dtype="int8", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(-128, I_i1_high_closed, dtype="int8", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] + +reveal_type(def_gen.integers(128, dtype=np.int8)) # E: int +reveal_type(def_gen.integers(-128, 128, dtype=np.int8)) # E: int +reveal_type(def_gen.integers(127, dtype=np.int8, endpoint=True)) # E: int +reveal_type(def_gen.integers(-128, 127, dtype=np.int8, endpoint=True)) # E: int +reveal_type(def_gen.integers(I_i1_low_like, 127, dtype=np.int8, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_i1_high_open, dtype=np.int8)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_i1_low, I_i1_high_open, dtype=np.int8)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(-128, I_i1_high_open, dtype=np.int8)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_i1_high_closed, dtype=np.int8, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(I_i1_low, I_i1_high_closed, dtype=np.int8, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(def_gen.integers(-128, I_i1_high_closed, dtype=np.int8, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] + +I_i2_low: np.ndarray[Any, np.dtype[np.int16]] = np.array([-32768], dtype=np.int16) +I_i2_low_like: List[int] = [-32768] +I_i2_high_open: np.ndarray[Any, np.dtype[np.int16]] = np.array([32767], dtype=np.int16) +I_i2_high_closed: np.ndarray[Any, np.dtype[np.int16]] = np.array([32767], dtype=np.int16) + +reveal_type(def_gen.integers(32768, dtype="i2")) # E: int +reveal_type(def_gen.integers(-32768, 32768, dtype="i2")) # E: int +reveal_type(def_gen.integers(32767, dtype="i2", endpoint=True)) # E: int +reveal_type(def_gen.integers(-32768, 32767, dtype="i2", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_i2_low_like, 32767, dtype="i2", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_i2_high_open, dtype="i2")) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_i2_low, I_i2_high_open, dtype="i2")) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(-32768, I_i2_high_open, dtype="i2")) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_i2_high_closed, dtype="i2", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_i2_low, I_i2_high_closed, dtype="i2", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(-32768, I_i2_high_closed, dtype="i2", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] + +reveal_type(def_gen.integers(32768, dtype="int16")) # E: int +reveal_type(def_gen.integers(-32768, 32768, dtype="int16")) # E: int +reveal_type(def_gen.integers(32767, dtype="int16", endpoint=True)) # E: int +reveal_type(def_gen.integers(-32768, 32767, dtype="int16", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_i2_low_like, 32767, dtype="int16", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_i2_high_open, dtype="int16")) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_i2_low, I_i2_high_open, dtype="int16")) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(-32768, I_i2_high_open, dtype="int16")) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_i2_high_closed, dtype="int16", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_i2_low, I_i2_high_closed, dtype="int16", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(-32768, I_i2_high_closed, dtype="int16", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] + +reveal_type(def_gen.integers(32768, dtype=np.int16)) # E: int +reveal_type(def_gen.integers(-32768, 32768, dtype=np.int16)) # E: int +reveal_type(def_gen.integers(32767, dtype=np.int16, endpoint=True)) # E: int +reveal_type(def_gen.integers(-32768, 32767, dtype=np.int16, endpoint=True)) # E: int +reveal_type(def_gen.integers(I_i2_low_like, 32767, dtype=np.int16, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_i2_high_open, dtype=np.int16)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_i2_low, I_i2_high_open, dtype=np.int16)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(-32768, I_i2_high_open, dtype=np.int16)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_i2_high_closed, dtype=np.int16, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(I_i2_low, I_i2_high_closed, dtype=np.int16, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(def_gen.integers(-32768, I_i2_high_closed, dtype=np.int16, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] + +I_i4_low: np.ndarray[Any, np.dtype[np.int32]] = np.array([-2147483648], dtype=np.int32) +I_i4_low_like: List[int] = [-2147483648] +I_i4_high_open: np.ndarray[Any, np.dtype[np.int32]] = np.array([2147483647], dtype=np.int32) +I_i4_high_closed: np.ndarray[Any, np.dtype[np.int32]] = np.array([2147483647], dtype=np.int32) + +reveal_type(def_gen.integers(2147483648, dtype="i4")) # E: int +reveal_type(def_gen.integers(-2147483648, 2147483648, dtype="i4")) # E: int +reveal_type(def_gen.integers(2147483647, dtype="i4", endpoint=True)) # E: int +reveal_type(def_gen.integers(-2147483648, 2147483647, dtype="i4", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_i4_low_like, 2147483647, dtype="i4", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_i4_high_open, dtype="i4")) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_i4_low, I_i4_high_open, dtype="i4")) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(-2147483648, I_i4_high_open, dtype="i4")) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_i4_high_closed, dtype="i4", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_i4_low, I_i4_high_closed, dtype="i4", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(-2147483648, I_i4_high_closed, dtype="i4", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] + +reveal_type(def_gen.integers(2147483648, dtype="int32")) # E: int +reveal_type(def_gen.integers(-2147483648, 2147483648, dtype="int32")) # E: int +reveal_type(def_gen.integers(2147483647, dtype="int32", endpoint=True)) # E: int +reveal_type(def_gen.integers(-2147483648, 2147483647, dtype="int32", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_i4_low_like, 2147483647, dtype="int32", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_i4_high_open, dtype="int32")) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_i4_low, I_i4_high_open, dtype="int32")) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(-2147483648, I_i4_high_open, dtype="int32")) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_i4_high_closed, dtype="int32", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_i4_low, I_i4_high_closed, dtype="int32", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(-2147483648, I_i4_high_closed, dtype="int32", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] + +reveal_type(def_gen.integers(2147483648, dtype=np.int32)) # E: int +reveal_type(def_gen.integers(-2147483648, 2147483648, dtype=np.int32)) # E: int +reveal_type(def_gen.integers(2147483647, dtype=np.int32, endpoint=True)) # E: int +reveal_type(def_gen.integers(-2147483648, 2147483647, dtype=np.int32, endpoint=True)) # E: int +reveal_type(def_gen.integers(I_i4_low_like, 2147483647, dtype=np.int32, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_i4_high_open, dtype=np.int32)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_i4_low, I_i4_high_open, dtype=np.int32)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(-2147483648, I_i4_high_open, dtype=np.int32)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_i4_high_closed, dtype=np.int32, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(I_i4_low, I_i4_high_closed, dtype=np.int32, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(def_gen.integers(-2147483648, I_i4_high_closed, dtype=np.int32, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] + +I_i8_low: np.ndarray[Any, np.dtype[np.int64]] = np.array([-9223372036854775808], dtype=np.int64) +I_i8_low_like: List[int] = [-9223372036854775808] +I_i8_high_open: np.ndarray[Any, np.dtype[np.int64]] = np.array([9223372036854775807], dtype=np.int64) +I_i8_high_closed: np.ndarray[Any, np.dtype[np.int64]] = np.array([9223372036854775807], dtype=np.int64) + +reveal_type(def_gen.integers(9223372036854775808, dtype="i8")) # E: int +reveal_type(def_gen.integers(-9223372036854775808, 9223372036854775808, dtype="i8")) # E: int +reveal_type(def_gen.integers(9223372036854775807, dtype="i8", endpoint=True)) # E: int +reveal_type(def_gen.integers(-9223372036854775808, 9223372036854775807, dtype="i8", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_i8_low_like, 9223372036854775807, dtype="i8", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_i8_high_open, dtype="i8")) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_i8_low, I_i8_high_open, dtype="i8")) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(-9223372036854775808, I_i8_high_open, dtype="i8")) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_i8_high_closed, dtype="i8", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_i8_low, I_i8_high_closed, dtype="i8", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(-9223372036854775808, I_i8_high_closed, dtype="i8", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +reveal_type(def_gen.integers(9223372036854775808, dtype="int64")) # E: int +reveal_type(def_gen.integers(-9223372036854775808, 9223372036854775808, dtype="int64")) # E: int +reveal_type(def_gen.integers(9223372036854775807, dtype="int64", endpoint=True)) # E: int +reveal_type(def_gen.integers(-9223372036854775808, 9223372036854775807, dtype="int64", endpoint=True)) # E: int +reveal_type(def_gen.integers(I_i8_low_like, 9223372036854775807, dtype="int64", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_i8_high_open, dtype="int64")) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_i8_low, I_i8_high_open, dtype="int64")) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(-9223372036854775808, I_i8_high_open, dtype="int64")) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_i8_high_closed, dtype="int64", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_i8_low, I_i8_high_closed, dtype="int64", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(-9223372036854775808, I_i8_high_closed, dtype="int64", endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +reveal_type(def_gen.integers(9223372036854775808, dtype=np.int64)) # E: int +reveal_type(def_gen.integers(-9223372036854775808, 9223372036854775808, dtype=np.int64)) # E: int +reveal_type(def_gen.integers(9223372036854775807, dtype=np.int64, endpoint=True)) # E: int +reveal_type(def_gen.integers(-9223372036854775808, 9223372036854775807, dtype=np.int64, endpoint=True)) # E: int +reveal_type(def_gen.integers(I_i8_low_like, 9223372036854775807, dtype=np.int64, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_i8_high_open, dtype=np.int64)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_i8_low, I_i8_high_open, dtype=np.int64)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(-9223372036854775808, I_i8_high_open, dtype=np.int64)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_i8_high_closed, dtype=np.int64, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(I_i8_low, I_i8_high_closed, dtype=np.int64, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.integers(-9223372036854775808, I_i8_high_closed, dtype=np.int64, endpoint=True)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + + +reveal_type(def_gen.bit_generator) # E: BitGenerator + +reveal_type(def_gen.bytes(2)) # E: bytes + +reveal_type(def_gen.choice(5)) # E: int +reveal_type(def_gen.choice(5, 3)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.choice(5, 3, replace=True)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.choice(5, 3, p=[1 / 5] * 5)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.choice(5, 3, p=[1 / 5] * 5, replace=False)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +reveal_type(def_gen.choice(["pooh", "rabbit", "piglet", "Christopher"])) # E: Any +reveal_type(def_gen.choice(["pooh", "rabbit", "piglet", "Christopher"], 3)) # E: ndarray[Any, Any] +reveal_type(def_gen.choice(["pooh", "rabbit", "piglet", "Christopher"], 3, p=[1 / 4] * 4)) # E: ndarray[Any, Any] +reveal_type(def_gen.choice(["pooh", "rabbit", "piglet", "Christopher"], 3, replace=True)) # E: ndarray[Any, Any] +reveal_type(def_gen.choice(["pooh", "rabbit", "piglet", "Christopher"], 3, replace=False, p=np.array([1 / 8, 1 / 8, 1 / 2, 1 / 4]))) # E: ndarray[Any, Any] + +reveal_type(def_gen.dirichlet([0.5, 0.5])) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.dirichlet(np.array([0.5, 0.5]))) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.dirichlet(np.array([0.5, 0.5]), size=3)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.multinomial(20, [1 / 6.0] * 6)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.multinomial(20, np.array([0.5, 0.5]))) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.multinomial(20, [1 / 6.0] * 6, size=2)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.multinomial([[10], [20]], [1 / 6.0] * 6, size=(2, 2))) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.multinomial(np.array([[10], [20]]), np.array([0.5, 0.5]), size=(2, 2))) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +reveal_type(def_gen.multivariate_hypergeometric([3, 5, 7], 2)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.multivariate_hypergeometric(np.array([3, 5, 7]), 2)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.multivariate_hypergeometric(np.array([3, 5, 7]), 2, size=4)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.multivariate_hypergeometric(np.array([3, 5, 7]), 2, size=(4, 7))) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.multivariate_hypergeometric([3, 5, 7], 2, method="count")) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.multivariate_hypergeometric(np.array([3, 5, 7]), 2, method="marginals")) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +reveal_type(def_gen.multivariate_normal([0.0], [[1.0]])) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.multivariate_normal([0.0], np.array([[1.0]]))) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.multivariate_normal(np.array([0.0]), [[1.0]])) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(def_gen.multivariate_normal([0.0], np.array([[1.0]]))) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(def_gen.permutation(10)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(def_gen.permutation([1, 2, 3, 4])) # E: ndarray[Any, Any] +reveal_type(def_gen.permutation(np.array([1, 2, 3, 4]))) # E: ndarray[Any, Any] +reveal_type(def_gen.permutation(D_2D, axis=1)) # E: ndarray[Any, Any] +reveal_type(def_gen.permuted(D_2D)) # E: ndarray[Any, Any] +reveal_type(def_gen.permuted(D_2D_like)) # E: ndarray[Any, Any] +reveal_type(def_gen.permuted(D_2D, axis=1)) # E: ndarray[Any, Any] +reveal_type(def_gen.permuted(D_2D, out=D_2D)) # E: ndarray[Any, Any] +reveal_type(def_gen.permuted(D_2D_like, out=D_2D)) # E: ndarray[Any, Any] +reveal_type(def_gen.permuted(D_2D_like, out=D_2D)) # E: ndarray[Any, Any] +reveal_type(def_gen.permuted(D_2D, axis=1, out=D_2D)) # E: ndarray[Any, Any] + +reveal_type(def_gen.shuffle(np.arange(10))) # E: None +reveal_type(def_gen.shuffle([1, 2, 3, 4, 5])) # E: None +reveal_type(def_gen.shuffle(D_2D, axis=1)) # E: None + +reveal_type(np.random.Generator(pcg64)) # E: Generator +reveal_type(def_gen.__str__()) # E: str +reveal_type(def_gen.__repr__()) # E: str +def_gen_state = def_gen.__getstate__() +reveal_type(def_gen_state) # E: builtins.dict[builtins.str, Any] +reveal_type(def_gen.__setstate__(def_gen_state)) # E: None + +# RandomState +random_st: np.random.RandomState = np.random.RandomState() + +reveal_type(random_st.standard_normal()) # E: float +reveal_type(random_st.standard_normal(size=None)) # E: float +reveal_type(random_st.standard_normal(size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] + +reveal_type(random_st.random()) # E: float +reveal_type(random_st.random(size=None)) # E: float +reveal_type(random_st.random(size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] + +reveal_type(random_st.standard_cauchy()) # E: float +reveal_type(random_st.standard_cauchy(size=None)) # E: float +reveal_type(random_st.standard_cauchy(size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.standard_exponential()) # E: float +reveal_type(random_st.standard_exponential(size=None)) # E: float +reveal_type(random_st.standard_exponential(size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] + +reveal_type(random_st.zipf(1.5)) # E: int +reveal_type(random_st.zipf(1.5, size=None)) # E: int +reveal_type(random_st.zipf(1.5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.zipf(D_arr_1p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.zipf(D_arr_1p5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.zipf(D_arr_like_1p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.zipf(D_arr_like_1p5, size=1)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(random_st.weibull(0.5)) # E: float +reveal_type(random_st.weibull(0.5, size=None)) # E: float +reveal_type(random_st.weibull(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.weibull(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.weibull(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.weibull(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.weibull(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.standard_t(0.5)) # E: float +reveal_type(random_st.standard_t(0.5, size=None)) # E: float +reveal_type(random_st.standard_t(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.standard_t(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.standard_t(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.standard_t(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.standard_t(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.poisson(0.5)) # E: int +reveal_type(random_st.poisson(0.5, size=None)) # E: int +reveal_type(random_st.poisson(0.5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.poisson(D_arr_0p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.poisson(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.poisson(D_arr_like_0p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.poisson(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(random_st.power(0.5)) # E: float +reveal_type(random_st.power(0.5, size=None)) # E: float +reveal_type(random_st.power(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.power(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.power(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.power(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.power(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.pareto(0.5)) # E: float +reveal_type(random_st.pareto(0.5, size=None)) # E: float +reveal_type(random_st.pareto(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.pareto(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.pareto(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.pareto(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.pareto(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.chisquare(0.5)) # E: float +reveal_type(random_st.chisquare(0.5, size=None)) # E: float +reveal_type(random_st.chisquare(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.chisquare(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.chisquare(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.chisquare(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.chisquare(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.exponential(0.5)) # E: float +reveal_type(random_st.exponential(0.5, size=None)) # E: float +reveal_type(random_st.exponential(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.exponential(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.exponential(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.exponential(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.exponential(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.geometric(0.5)) # E: int +reveal_type(random_st.geometric(0.5, size=None)) # E: int +reveal_type(random_st.geometric(0.5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.geometric(D_arr_0p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.geometric(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.geometric(D_arr_like_0p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.geometric(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(random_st.logseries(0.5)) # E: int +reveal_type(random_st.logseries(0.5, size=None)) # E: int +reveal_type(random_st.logseries(0.5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.logseries(D_arr_0p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.logseries(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.logseries(D_arr_like_0p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.logseries(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(random_st.rayleigh(0.5)) # E: float +reveal_type(random_st.rayleigh(0.5, size=None)) # E: float +reveal_type(random_st.rayleigh(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.rayleigh(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.rayleigh(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.rayleigh(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.rayleigh(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.standard_gamma(0.5)) # E: float +reveal_type(random_st.standard_gamma(0.5, size=None)) # E: float +reveal_type(random_st.standard_gamma(0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(random_st.standard_gamma(D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(random_st.standard_gamma(D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(random_st.standard_gamma(D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(random_st.standard_gamma(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] +reveal_type(random_st.standard_gamma(D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]]] + +reveal_type(random_st.vonmises(0.5, 0.5)) # E: float +reveal_type(random_st.vonmises(0.5, 0.5, size=None)) # E: float +reveal_type(random_st.vonmises(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.vonmises(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.vonmises(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.vonmises(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.vonmises(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.vonmises(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.vonmises(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.vonmises(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.vonmises(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.vonmises(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.vonmises(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.wald(0.5, 0.5)) # E: float +reveal_type(random_st.wald(0.5, 0.5, size=None)) # E: float +reveal_type(random_st.wald(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.wald(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.wald(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.wald(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.wald(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.wald(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.wald(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.wald(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.wald(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.wald(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.wald(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.uniform(0.5, 0.5)) # E: float +reveal_type(random_st.uniform(0.5, 0.5, size=None)) # E: float +reveal_type(random_st.uniform(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.uniform(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.uniform(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.uniform(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.uniform(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.uniform(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.uniform(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.uniform(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.uniform(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.uniform(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.uniform(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.beta(0.5, 0.5)) # E: float +reveal_type(random_st.beta(0.5, 0.5, size=None)) # E: float +reveal_type(random_st.beta(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.beta(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.beta(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.beta(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.beta(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.beta(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.beta(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.beta(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.beta(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.beta(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.beta(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.f(0.5, 0.5)) # E: float +reveal_type(random_st.f(0.5, 0.5, size=None)) # E: float +reveal_type(random_st.f(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.f(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.f(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.f(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.f(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.f(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.f(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.f(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.f(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.f(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.f(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.gamma(0.5, 0.5)) # E: float +reveal_type(random_st.gamma(0.5, 0.5, size=None)) # E: float +reveal_type(random_st.gamma(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gamma(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gamma(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gamma(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gamma(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gamma(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gamma(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gamma(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gamma(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gamma(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gamma(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.gumbel(0.5, 0.5)) # E: float +reveal_type(random_st.gumbel(0.5, 0.5, size=None)) # E: float +reveal_type(random_st.gumbel(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gumbel(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gumbel(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gumbel(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gumbel(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gumbel(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gumbel(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gumbel(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gumbel(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gumbel(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.gumbel(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.laplace(0.5, 0.5)) # E: float +reveal_type(random_st.laplace(0.5, 0.5, size=None)) # E: float +reveal_type(random_st.laplace(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.laplace(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.laplace(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.laplace(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.laplace(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.laplace(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.laplace(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.laplace(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.laplace(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.laplace(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.laplace(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.logistic(0.5, 0.5)) # E: float +reveal_type(random_st.logistic(0.5, 0.5, size=None)) # E: float +reveal_type(random_st.logistic(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.logistic(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.logistic(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.logistic(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.logistic(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.logistic(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.logistic(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.logistic(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.logistic(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.logistic(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.logistic(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.lognormal(0.5, 0.5)) # E: float +reveal_type(random_st.lognormal(0.5, 0.5, size=None)) # E: float +reveal_type(random_st.lognormal(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.lognormal(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.lognormal(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.lognormal(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.lognormal(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.lognormal(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.lognormal(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.lognormal(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.lognormal(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.lognormal(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.lognormal(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.noncentral_chisquare(0.5, 0.5)) # E: float +reveal_type(random_st.noncentral_chisquare(0.5, 0.5, size=None)) # E: float +reveal_type(random_st.noncentral_chisquare(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_chisquare(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_chisquare(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_chisquare(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_chisquare(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_chisquare(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_chisquare(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_chisquare(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_chisquare(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_chisquare(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_chisquare(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.normal(0.5, 0.5)) # E: float +reveal_type(random_st.normal(0.5, 0.5, size=None)) # E: float +reveal_type(random_st.normal(0.5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.normal(D_arr_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.normal(0.5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.normal(D_arr_0p5, 0.5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.normal(0.5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.normal(D_arr_like_0p5, 0.5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.normal(0.5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.normal(D_arr_0p5, D_arr_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.normal(D_arr_like_0p5, D_arr_like_0p5)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.normal(D_arr_0p5, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.normal(D_arr_like_0p5, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.triangular(0.1, 0.5, 0.9)) # E: float +reveal_type(random_st.triangular(0.1, 0.5, 0.9, size=None)) # E: float +reveal_type(random_st.triangular(0.1, 0.5, 0.9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.triangular(D_arr_0p1, 0.5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.triangular(0.1, D_arr_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.triangular(D_arr_0p1, 0.5, D_arr_like_0p9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.triangular(0.1, D_arr_0p5, 0.9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.triangular(D_arr_like_0p1, 0.5, D_arr_0p9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.triangular(0.5, D_arr_like_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.triangular(D_arr_0p1, D_arr_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.triangular(D_arr_like_0p1, D_arr_like_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.triangular(D_arr_0p1, D_arr_0p5, D_arr_0p9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.triangular(D_arr_like_0p1, D_arr_like_0p5, D_arr_like_0p9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.noncentral_f(0.1, 0.5, 0.9)) # E: float +reveal_type(random_st.noncentral_f(0.1, 0.5, 0.9, size=None)) # E: float +reveal_type(random_st.noncentral_f(0.1, 0.5, 0.9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_f(D_arr_0p1, 0.5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_f(0.1, D_arr_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_f(D_arr_0p1, 0.5, D_arr_like_0p9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_f(0.1, D_arr_0p5, 0.9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_f(D_arr_like_0p1, 0.5, D_arr_0p9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_f(0.5, D_arr_like_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_f(D_arr_0p1, D_arr_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_f(D_arr_like_0p1, D_arr_like_0p5, 0.9)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_f(D_arr_0p1, D_arr_0p5, D_arr_0p9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.noncentral_f(D_arr_like_0p1, D_arr_like_0p5, D_arr_like_0p9, size=1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.binomial(10, 0.5)) # E: int +reveal_type(random_st.binomial(10, 0.5, size=None)) # E: int +reveal_type(random_st.binomial(10, 0.5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.binomial(I_arr_10, 0.5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.binomial(10, D_arr_0p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.binomial(I_arr_10, 0.5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.binomial(10, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.binomial(I_arr_like_10, 0.5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.binomial(10, D_arr_like_0p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.binomial(I_arr_10, D_arr_0p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.binomial(I_arr_like_10, D_arr_like_0p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.binomial(I_arr_10, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.binomial(I_arr_like_10, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(random_st.negative_binomial(10, 0.5)) # E: int +reveal_type(random_st.negative_binomial(10, 0.5, size=None)) # E: int +reveal_type(random_st.negative_binomial(10, 0.5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.negative_binomial(I_arr_10, 0.5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.negative_binomial(10, D_arr_0p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.negative_binomial(I_arr_10, 0.5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.negative_binomial(10, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.negative_binomial(I_arr_like_10, 0.5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.negative_binomial(10, D_arr_like_0p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.negative_binomial(I_arr_10, D_arr_0p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.negative_binomial(I_arr_like_10, D_arr_like_0p5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.negative_binomial(I_arr_10, D_arr_0p5, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.negative_binomial(I_arr_like_10, D_arr_like_0p5, size=1)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(random_st.hypergeometric(20, 20, 10)) # E: int +reveal_type(random_st.hypergeometric(20, 20, 10, size=None)) # E: int +reveal_type(random_st.hypergeometric(20, 20, 10, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.hypergeometric(I_arr_20, 20, 10)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.hypergeometric(20, I_arr_20, 10)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.hypergeometric(I_arr_20, 20, I_arr_like_10, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.hypergeometric(20, I_arr_20, 10, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.hypergeometric(I_arr_like_20, 20, I_arr_10)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.hypergeometric(20, I_arr_like_20, 10)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.hypergeometric(I_arr_20, I_arr_20, 10)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.hypergeometric(I_arr_like_20, I_arr_like_20, 10)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.hypergeometric(I_arr_20, I_arr_20, I_arr_10, size=1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.hypergeometric(I_arr_like_20, I_arr_like_20, I_arr_like_10, size=1)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(random_st.randint(0, 100)) # E: int +reveal_type(random_st.randint(100)) # E: int +reveal_type(random_st.randint([100])) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.randint(0, [100])) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(random_st.randint(2, dtype=bool)) # E: builtins.bool +reveal_type(random_st.randint(0, 2, dtype=bool)) # E: builtins.bool +reveal_type(random_st.randint(I_bool_high_open, dtype=bool)) # E: ndarray[Any, dtype[bool_] +reveal_type(random_st.randint(I_bool_low, I_bool_high_open, dtype=bool)) # E: ndarray[Any, dtype[bool_] +reveal_type(random_st.randint(0, I_bool_high_open, dtype=bool)) # E: ndarray[Any, dtype[bool_] + +reveal_type(random_st.randint(2, dtype=np.bool_)) # E: builtins.bool +reveal_type(random_st.randint(0, 2, dtype=np.bool_)) # E: builtins.bool +reveal_type(random_st.randint(I_bool_high_open, dtype=np.bool_)) # E: ndarray[Any, dtype[bool_] +reveal_type(random_st.randint(I_bool_low, I_bool_high_open, dtype=np.bool_)) # E: ndarray[Any, dtype[bool_] +reveal_type(random_st.randint(0, I_bool_high_open, dtype=np.bool_)) # E: ndarray[Any, dtype[bool_] + +reveal_type(random_st.randint(256, dtype="u1")) # E: int +reveal_type(random_st.randint(0, 256, dtype="u1")) # E: int +reveal_type(random_st.randint(I_u1_high_open, dtype="u1")) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(random_st.randint(I_u1_low, I_u1_high_open, dtype="u1")) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(random_st.randint(0, I_u1_high_open, dtype="u1")) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] + +reveal_type(random_st.randint(256, dtype="uint8")) # E: int +reveal_type(random_st.randint(0, 256, dtype="uint8")) # E: int +reveal_type(random_st.randint(I_u1_high_open, dtype="uint8")) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(random_st.randint(I_u1_low, I_u1_high_open, dtype="uint8")) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(random_st.randint(0, I_u1_high_open, dtype="uint8")) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] + +reveal_type(random_st.randint(256, dtype=np.uint8)) # E: int +reveal_type(random_st.randint(0, 256, dtype=np.uint8)) # E: int +reveal_type(random_st.randint(I_u1_high_open, dtype=np.uint8)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(random_st.randint(I_u1_low, I_u1_high_open, dtype=np.uint8)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] +reveal_type(random_st.randint(0, I_u1_high_open, dtype=np.uint8)) # E: ndarray[Any, dtype[unsignedinteger[typing._8Bit]]] + +reveal_type(random_st.randint(65536, dtype="u2")) # E: int +reveal_type(random_st.randint(0, 65536, dtype="u2")) # E: int +reveal_type(random_st.randint(I_u2_high_open, dtype="u2")) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(random_st.randint(I_u2_low, I_u2_high_open, dtype="u2")) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(random_st.randint(0, I_u2_high_open, dtype="u2")) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] + +reveal_type(random_st.randint(65536, dtype="uint16")) # E: int +reveal_type(random_st.randint(0, 65536, dtype="uint16")) # E: int +reveal_type(random_st.randint(I_u2_high_open, dtype="uint16")) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(random_st.randint(I_u2_low, I_u2_high_open, dtype="uint16")) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(random_st.randint(0, I_u2_high_open, dtype="uint16")) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] + +reveal_type(random_st.randint(65536, dtype=np.uint16)) # E: int +reveal_type(random_st.randint(0, 65536, dtype=np.uint16)) # E: int +reveal_type(random_st.randint(I_u2_high_open, dtype=np.uint16)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(random_st.randint(I_u2_low, I_u2_high_open, dtype=np.uint16)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] +reveal_type(random_st.randint(0, I_u2_high_open, dtype=np.uint16)) # E: ndarray[Any, dtype[unsignedinteger[typing._16Bit]]] + +reveal_type(random_st.randint(4294967296, dtype="u4")) # E: int +reveal_type(random_st.randint(0, 4294967296, dtype="u4")) # E: int +reveal_type(random_st.randint(I_u4_high_open, dtype="u4")) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(random_st.randint(I_u4_low, I_u4_high_open, dtype="u4")) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(random_st.randint(0, I_u4_high_open, dtype="u4")) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] + +reveal_type(random_st.randint(4294967296, dtype="uint32")) # E: int +reveal_type(random_st.randint(0, 4294967296, dtype="uint32")) # E: int +reveal_type(random_st.randint(I_u4_high_open, dtype="uint32")) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(random_st.randint(I_u4_low, I_u4_high_open, dtype="uint32")) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(random_st.randint(0, I_u4_high_open, dtype="uint32")) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] + +reveal_type(random_st.randint(4294967296, dtype=np.uint32)) # E: int +reveal_type(random_st.randint(0, 4294967296, dtype=np.uint32)) # E: int +reveal_type(random_st.randint(I_u4_high_open, dtype=np.uint32)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(random_st.randint(I_u4_low, I_u4_high_open, dtype=np.uint32)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] +reveal_type(random_st.randint(0, I_u4_high_open, dtype=np.uint32)) # E: ndarray[Any, dtype[unsignedinteger[typing._32Bit]]] + +reveal_type(random_st.randint(4294967296, dtype=np.uint)) # E: int +reveal_type(random_st.randint(0, 4294967296, dtype=np.uint)) # E: int +reveal_type(random_st.randint(I_u4_high_open, dtype=np.uint)) # E: ndarray[Any, dtype[{uint}]] +reveal_type(random_st.randint(I_u4_low, I_u4_high_open, dtype=np.uint)) # E: ndarray[Any, dtype[{uint}]] +reveal_type(random_st.randint(0, I_u4_high_open, dtype=np.uint)) # E: ndarray[Any, dtype[{uint}]] + +reveal_type(random_st.randint(18446744073709551616, dtype="u8")) # E: int +reveal_type(random_st.randint(0, 18446744073709551616, dtype="u8")) # E: int +reveal_type(random_st.randint(I_u8_high_open, dtype="u8")) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(random_st.randint(I_u8_low, I_u8_high_open, dtype="u8")) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(random_st.randint(0, I_u8_high_open, dtype="u8")) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] + +reveal_type(random_st.randint(18446744073709551616, dtype="uint64")) # E: int +reveal_type(random_st.randint(0, 18446744073709551616, dtype="uint64")) # E: int +reveal_type(random_st.randint(I_u8_high_open, dtype="uint64")) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(random_st.randint(I_u8_low, I_u8_high_open, dtype="uint64")) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(random_st.randint(0, I_u8_high_open, dtype="uint64")) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] + +reveal_type(random_st.randint(18446744073709551616, dtype=np.uint64)) # E: int +reveal_type(random_st.randint(0, 18446744073709551616, dtype=np.uint64)) # E: int +reveal_type(random_st.randint(I_u8_high_open, dtype=np.uint64)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(random_st.randint(I_u8_low, I_u8_high_open, dtype=np.uint64)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] +reveal_type(random_st.randint(0, I_u8_high_open, dtype=np.uint64)) # E: ndarray[Any, dtype[unsignedinteger[typing._64Bit]]] + +reveal_type(random_st.randint(128, dtype="i1")) # E: int +reveal_type(random_st.randint(-128, 128, dtype="i1")) # E: int +reveal_type(random_st.randint(I_i1_high_open, dtype="i1")) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(random_st.randint(I_i1_low, I_i1_high_open, dtype="i1")) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(random_st.randint(-128, I_i1_high_open, dtype="i1")) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] + +reveal_type(random_st.randint(128, dtype="int8")) # E: int +reveal_type(random_st.randint(-128, 128, dtype="int8")) # E: int +reveal_type(random_st.randint(I_i1_high_open, dtype="int8")) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(random_st.randint(I_i1_low, I_i1_high_open, dtype="int8")) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(random_st.randint(-128, I_i1_high_open, dtype="int8")) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] + +reveal_type(random_st.randint(128, dtype=np.int8)) # E: int +reveal_type(random_st.randint(-128, 128, dtype=np.int8)) # E: int +reveal_type(random_st.randint(I_i1_high_open, dtype=np.int8)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(random_st.randint(I_i1_low, I_i1_high_open, dtype=np.int8)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] +reveal_type(random_st.randint(-128, I_i1_high_open, dtype=np.int8)) # E: ndarray[Any, dtype[signedinteger[typing._8Bit]]] + +reveal_type(random_st.randint(32768, dtype="i2")) # E: int +reveal_type(random_st.randint(-32768, 32768, dtype="i2")) # E: int +reveal_type(random_st.randint(I_i2_high_open, dtype="i2")) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(random_st.randint(I_i2_low, I_i2_high_open, dtype="i2")) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(random_st.randint(-32768, I_i2_high_open, dtype="i2")) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(random_st.randint(32768, dtype="int16")) # E: int +reveal_type(random_st.randint(-32768, 32768, dtype="int16")) # E: int +reveal_type(random_st.randint(I_i2_high_open, dtype="int16")) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(random_st.randint(I_i2_low, I_i2_high_open, dtype="int16")) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(random_st.randint(-32768, I_i2_high_open, dtype="int16")) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(random_st.randint(32768, dtype=np.int16)) # E: int +reveal_type(random_st.randint(-32768, 32768, dtype=np.int16)) # E: int +reveal_type(random_st.randint(I_i2_high_open, dtype=np.int16)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(random_st.randint(I_i2_low, I_i2_high_open, dtype=np.int16)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] +reveal_type(random_st.randint(-32768, I_i2_high_open, dtype=np.int16)) # E: ndarray[Any, dtype[signedinteger[typing._16Bit]]] + +reveal_type(random_st.randint(2147483648, dtype="i4")) # E: int +reveal_type(random_st.randint(-2147483648, 2147483648, dtype="i4")) # E: int +reveal_type(random_st.randint(I_i4_high_open, dtype="i4")) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(random_st.randint(I_i4_low, I_i4_high_open, dtype="i4")) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(random_st.randint(-2147483648, I_i4_high_open, dtype="i4")) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] + +reveal_type(random_st.randint(2147483648, dtype="int32")) # E: int +reveal_type(random_st.randint(-2147483648, 2147483648, dtype="int32")) # E: int +reveal_type(random_st.randint(I_i4_high_open, dtype="int32")) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(random_st.randint(I_i4_low, I_i4_high_open, dtype="int32")) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(random_st.randint(-2147483648, I_i4_high_open, dtype="int32")) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] + +reveal_type(random_st.randint(2147483648, dtype=np.int32)) # E: int +reveal_type(random_st.randint(-2147483648, 2147483648, dtype=np.int32)) # E: int +reveal_type(random_st.randint(I_i4_high_open, dtype=np.int32)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(random_st.randint(I_i4_low, I_i4_high_open, dtype=np.int32)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] +reveal_type(random_st.randint(-2147483648, I_i4_high_open, dtype=np.int32)) # E: ndarray[Any, dtype[signedinteger[typing._32Bit]]] + +reveal_type(random_st.randint(2147483648, dtype=np.int_)) # E: int +reveal_type(random_st.randint(-2147483648, 2147483648, dtype=np.int_)) # E: int +reveal_type(random_st.randint(I_i4_high_open, dtype=np.int_)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.randint(I_i4_low, I_i4_high_open, dtype=np.int_)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.randint(-2147483648, I_i4_high_open, dtype=np.int_)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(random_st.randint(9223372036854775808, dtype="i8")) # E: int +reveal_type(random_st.randint(-9223372036854775808, 9223372036854775808, dtype="i8")) # E: int +reveal_type(random_st.randint(I_i8_high_open, dtype="i8")) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(random_st.randint(I_i8_low, I_i8_high_open, dtype="i8")) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(random_st.randint(-9223372036854775808, I_i8_high_open, dtype="i8")) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +reveal_type(random_st.randint(9223372036854775808, dtype="int64")) # E: int +reveal_type(random_st.randint(-9223372036854775808, 9223372036854775808, dtype="int64")) # E: int +reveal_type(random_st.randint(I_i8_high_open, dtype="int64")) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(random_st.randint(I_i8_low, I_i8_high_open, dtype="int64")) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(random_st.randint(-9223372036854775808, I_i8_high_open, dtype="int64")) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +reveal_type(random_st.randint(9223372036854775808, dtype=np.int64)) # E: int +reveal_type(random_st.randint(-9223372036854775808, 9223372036854775808, dtype=np.int64)) # E: int +reveal_type(random_st.randint(I_i8_high_open, dtype=np.int64)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(random_st.randint(I_i8_low, I_i8_high_open, dtype=np.int64)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] +reveal_type(random_st.randint(-9223372036854775808, I_i8_high_open, dtype=np.int64)) # E: ndarray[Any, dtype[signedinteger[typing._64Bit]]] + +reveal_type(random_st._bit_generator) # E: BitGenerator + +reveal_type(random_st.bytes(2)) # E: bytes + +reveal_type(random_st.choice(5)) # E: int +reveal_type(random_st.choice(5, 3)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.choice(5, 3, replace=True)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.choice(5, 3, p=[1 / 5] * 5)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.choice(5, 3, p=[1 / 5] * 5, replace=False)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(random_st.choice(["pooh", "rabbit", "piglet", "Christopher"])) # E: Any +reveal_type(random_st.choice(["pooh", "rabbit", "piglet", "Christopher"], 3)) # E: ndarray[Any, Any] +reveal_type(random_st.choice(["pooh", "rabbit", "piglet", "Christopher"], 3, p=[1 / 4] * 4)) # E: ndarray[Any, Any] +reveal_type(random_st.choice(["pooh", "rabbit", "piglet", "Christopher"], 3, replace=True)) # E: ndarray[Any, Any] +reveal_type(random_st.choice(["pooh", "rabbit", "piglet", "Christopher"], 3, replace=False, p=np.array([1 / 8, 1 / 8, 1 / 2, 1 / 4]))) # E: ndarray[Any, Any] + +reveal_type(random_st.dirichlet([0.5, 0.5])) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.dirichlet(np.array([0.5, 0.5]))) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.dirichlet(np.array([0.5, 0.5]), size=3)) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.multinomial(20, [1 / 6.0] * 6)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.multinomial(20, np.array([0.5, 0.5]))) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.multinomial(20, [1 / 6.0] * 6, size=2)) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(random_st.multivariate_normal([0.0], [[1.0]])) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.multivariate_normal([0.0], np.array([[1.0]]))) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.multivariate_normal(np.array([0.0]), [[1.0]])) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.multivariate_normal([0.0], np.array([[1.0]]))) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.permutation(10)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.permutation([1, 2, 3, 4])) # E: ndarray[Any, Any] +reveal_type(random_st.permutation(np.array([1, 2, 3, 4]))) # E: ndarray[Any, Any] +reveal_type(random_st.permutation(D_2D)) # E: ndarray[Any, Any] + +reveal_type(random_st.shuffle(np.arange(10))) # E: None +reveal_type(random_st.shuffle([1, 2, 3, 4, 5])) # E: None +reveal_type(random_st.shuffle(D_2D)) # E: None + +reveal_type(np.random.RandomState(pcg64)) # E: RandomState +reveal_type(np.random.RandomState(0)) # E: RandomState +reveal_type(np.random.RandomState([0, 1, 2])) # E: RandomState +reveal_type(random_st.__str__()) # E: str +reveal_type(random_st.__repr__()) # E: str +random_st_state = random_st.__getstate__() +reveal_type(random_st_state) # E: builtins.dict[builtins.str, Any] +reveal_type(random_st.__setstate__(random_st_state)) # E: None +reveal_type(random_st.seed()) # E: None +reveal_type(random_st.seed(1)) # E: None +reveal_type(random_st.seed([0, 1])) # E: None +random_st_get_state = random_st.get_state() +reveal_type(random_st_state) # E: builtins.dict[builtins.str, Any] +random_st_get_state_legacy = random_st.get_state(legacy=True) +reveal_type(random_st_get_state_legacy) # E: Union[builtins.dict[builtins.str, Any], Tuple[builtins.str, ndarray[Any, dtype[unsignedinteger[typing._32Bit]]], builtins.int, builtins.int, builtins.float]] +reveal_type(random_st.set_state(random_st_get_state)) # E: None + +reveal_type(random_st.rand()) # E: float +reveal_type(random_st.rand(1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.rand(1, 2)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.randn()) # E: float +reveal_type(random_st.randn(1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.randn(1, 2)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.random_sample()) # E: float +reveal_type(random_st.random_sample(1)) # E: ndarray[Any, dtype[floating[typing._64Bit]] +reveal_type(random_st.random_sample(size=(1, 2))) # E: ndarray[Any, dtype[floating[typing._64Bit]] + +reveal_type(random_st.tomaxint()) # E: int +reveal_type(random_st.tomaxint(1)) # E: ndarray[Any, dtype[{int_}]] +reveal_type(random_st.tomaxint((1,))) # E: ndarray[Any, dtype[{int_}]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/rec.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/rec.pyi new file mode 100644 index 0000000..9921621 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/rec.pyi @@ -0,0 +1,127 @@ +import io +from typing import Any, List + +import numpy as np +import numpy.typing as npt + +AR_i8: npt.NDArray[np.int64] +REC_AR_V: np.recarray[Any, np.dtype[np.record]] +AR_LIST: List[npt.NDArray[np.int64]] + +format_parser: np.format_parser +record: np.record +file_obj: io.BufferedIOBase + +reveal_type(np.format_parser( # E: format_parser + formats=[np.float64, np.int64, np.bool_], + names=["f8", "i8", "?"], + titles=None, + aligned=True, +)) +reveal_type(format_parser.dtype) # E: dtype[void] + +reveal_type(record.field_a) # E: Any +reveal_type(record.field_b) # E: Any +reveal_type(record["field_a"]) # E: Any +reveal_type(record["field_b"]) # E: Any +reveal_type(record.pprint()) # E: str +record.field_c = 5 + +reveal_type(REC_AR_V.field(0)) # E: Any +reveal_type(REC_AR_V.field("field_a")) # E: Any +reveal_type(REC_AR_V.field(0, AR_i8)) # E: None +reveal_type(REC_AR_V.field("field_a", AR_i8)) # E: None +reveal_type(REC_AR_V["field_a"]) # E: Any +reveal_type(REC_AR_V.field_a) # E: Any + +reveal_type(np.recarray( # recarray[Any, dtype[record]] + shape=(10, 5), + formats=[np.float64, np.int64, np.bool_], + order="K", + byteorder="|", +)) +reveal_type(np.recarray( # recarray[Any, dtype[Any]] + shape=(10, 5), + dtype=[("f8", np.float64), ("i8", np.int64)], + strides=(5, 5), +)) + +reveal_type(np.rec.fromarrays( # recarray[Any, dtype[record]] + AR_LIST, +)) +reveal_type(np.rec.fromarrays( # recarray[Any, dtype[Any]] + AR_LIST, + dtype=np.int64, +)) +reveal_type(np.rec.fromarrays( # recarray[Any, dtype[Any]] + AR_LIST, + formats=[np.int64, np.float64], + names=["i8", "f8"] +)) + +reveal_type(np.rec.fromrecords( # recarray[Any, dtype[record]] + (1, 1.5), +)) +reveal_type(np.rec.fromrecords( # recarray[Any, dtype[record]] + [(1, 1.5)], + dtype=[("i8", np.int64), ("f8", np.float64)], +)) +reveal_type(np.rec.fromrecords( # recarray[Any, dtype[record]] + REC_AR_V, + formats=[np.int64, np.float64], + names=["i8", "f8"] +)) + +reveal_type(np.rec.fromstring( # recarray[Any, dtype[record]] + b"(1, 1.5)", + dtype=[("i8", np.int64), ("f8", np.float64)], +)) +reveal_type(np.rec.fromstring( # recarray[Any, dtype[record]] + REC_AR_V, + formats=[np.int64, np.float64], + names=["i8", "f8"] +)) + +reveal_type(np.rec.fromfile( # recarray[Any, dtype[Any]] + "test_file.txt", + dtype=[("i8", np.int64), ("f8", np.float64)], +)) +reveal_type(np.rec.fromfile( # recarray[Any, dtype[record]] + file_obj, + formats=[np.int64, np.float64], + names=["i8", "f8"] +)) + +reveal_type(np.rec.array( # recarray[Any, dtype[{int64}]] + AR_i8, +)) +reveal_type(np.rec.array( # recarray[Any, dtype[Any]] + [(1, 1.5)], + dtype=[("i8", np.int64), ("f8", np.float64)], +)) +reveal_type(np.rec.array( # recarray[Any, dtype[record]] + [(1, 1.5)], + formats=[np.int64, np.float64], + names=["i8", "f8"] +)) + +reveal_type(np.rec.array( # recarray[Any, dtype[Any]] + None, + dtype=np.float64, + shape=(10, 3), +)) +reveal_type(np.rec.array( # recarray[Any, dtype[Any]] + None, + formats=[np.int64, np.float64], + names=["i8", "f8"], + shape=(10, 3), +)) +reveal_type(np.rec.array( # recarray[Any, dtype[Any]] + file_obj, + dtype=np.float64, +)) +reveal_type(np.rec.array( # recarray[Any, dtype[Any]] + file_obj, + formats=[np.int64, np.float64], + names=["i8", "f8"], +)) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/scalars.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/scalars.pyi new file mode 100644 index 0000000..383e40e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/scalars.pyi @@ -0,0 +1,166 @@ +import sys +import numpy as np + +b: np.bool_ +u8: np.uint64 +i8: np.int64 +f8: np.float64 +c8: np.complex64 +c16: np.complex128 +m: np.timedelta64 +U: np.str_ +S: np.bytes_ +V: np.void + +reveal_type(c8.real) # E: {float32} +reveal_type(c8.imag) # E: {float32} + +reveal_type(c8.real.real) # E: {float32} +reveal_type(c8.real.imag) # E: {float32} + +reveal_type(c8.itemsize) # E: int +reveal_type(c8.shape) # E: Tuple[] +reveal_type(c8.strides) # E: Tuple[] + +reveal_type(c8.ndim) # E: Literal[0] +reveal_type(c8.size) # E: Literal[1] + +reveal_type(c8.squeeze()) # E: {complex64} +reveal_type(c8.byteswap()) # E: {complex64} +reveal_type(c8.transpose()) # E: {complex64} + +reveal_type(c8.dtype) # E: dtype[{complex64}] + +reveal_type(c8.real) # E: {float32} +reveal_type(c16.imag) # E: {float64} + +reveal_type(np.unicode_('foo')) # E: str_ +reveal_type(np.str0('foo')) # E: str_ + +reveal_type(V[0]) # E: Any +reveal_type(V["field1"]) # E: Any +reveal_type(V[["field1", "field2"]]) # E: void +V[0] = 5 + +# Aliases +reveal_type(np.unicode_()) # E: str_ +reveal_type(np.str0()) # E: str_ +reveal_type(np.bool8()) # E: bool_ +reveal_type(np.bytes0()) # E: bytes_ +reveal_type(np.string_()) # E: bytes_ +reveal_type(np.object0()) # E: object_ +reveal_type(np.void0(0)) # E: void + +reveal_type(np.byte()) # E: {byte} +reveal_type(np.short()) # E: {short} +reveal_type(np.intc()) # E: {intc} +reveal_type(np.intp()) # E: {intp} +reveal_type(np.int0()) # E: {intp} +reveal_type(np.int_()) # E: {int_} +reveal_type(np.longlong()) # E: {longlong} + +reveal_type(np.ubyte()) # E: {ubyte} +reveal_type(np.ushort()) # E: {ushort} +reveal_type(np.uintc()) # E: {uintc} +reveal_type(np.uintp()) # E: {uintp} +reveal_type(np.uint0()) # E: {uintp} +reveal_type(np.uint()) # E: {uint} +reveal_type(np.ulonglong()) # E: {ulonglong} + +reveal_type(np.half()) # E: {half} +reveal_type(np.single()) # E: {single} +reveal_type(np.double()) # E: {double} +reveal_type(np.float_()) # E: {double} +reveal_type(np.longdouble()) # E: {longdouble} +reveal_type(np.longfloat()) # E: {longdouble} + +reveal_type(np.csingle()) # E: {csingle} +reveal_type(np.singlecomplex()) # E: {csingle} +reveal_type(np.cdouble()) # E: {cdouble} +reveal_type(np.complex_()) # E: {cdouble} +reveal_type(np.cfloat()) # E: {cdouble} +reveal_type(np.clongdouble()) # E: {clongdouble} +reveal_type(np.clongfloat()) # E: {clongdouble} +reveal_type(np.longcomplex()) # E: {clongdouble} + +reveal_type(b.item()) # E: bool +reveal_type(i8.item()) # E: int +reveal_type(u8.item()) # E: int +reveal_type(f8.item()) # E: float +reveal_type(c16.item()) # E: complex +reveal_type(U.item()) # E: str +reveal_type(S.item()) # E: bytes + +reveal_type(b.tolist()) # E: bool +reveal_type(i8.tolist()) # E: int +reveal_type(u8.tolist()) # E: int +reveal_type(f8.tolist()) # E: float +reveal_type(c16.tolist()) # E: complex +reveal_type(U.tolist()) # E: str +reveal_type(S.tolist()) # E: bytes + +reveal_type(b.ravel()) # E: ndarray[Any, dtype[bool_]] +reveal_type(i8.ravel()) # E: ndarray[Any, dtype[{int64}]] +reveal_type(u8.ravel()) # E: ndarray[Any, dtype[{uint64}]] +reveal_type(f8.ravel()) # E: ndarray[Any, dtype[{float64}]] +reveal_type(c16.ravel()) # E: ndarray[Any, dtype[{complex128}]] +reveal_type(U.ravel()) # E: ndarray[Any, dtype[str_]] +reveal_type(S.ravel()) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(b.flatten()) # E: ndarray[Any, dtype[bool_]] +reveal_type(i8.flatten()) # E: ndarray[Any, dtype[{int64}]] +reveal_type(u8.flatten()) # E: ndarray[Any, dtype[{uint64}]] +reveal_type(f8.flatten()) # E: ndarray[Any, dtype[{float64}]] +reveal_type(c16.flatten()) # E: ndarray[Any, dtype[{complex128}]] +reveal_type(U.flatten()) # E: ndarray[Any, dtype[str_]] +reveal_type(S.flatten()) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(b.reshape(1)) # E: ndarray[Any, dtype[bool_]] +reveal_type(i8.reshape(1)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(u8.reshape(1)) # E: ndarray[Any, dtype[{uint64}]] +reveal_type(f8.reshape(1)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(c16.reshape(1)) # E: ndarray[Any, dtype[{complex128}]] +reveal_type(U.reshape(1)) # E: ndarray[Any, dtype[str_]] +reveal_type(S.reshape(1)) # E: ndarray[Any, dtype[bytes_]] + +reveal_type(i8.astype(float)) # E: Any +reveal_type(i8.astype(np.float64)) # E: {float64} + +reveal_type(i8.view()) # E: {int64} +reveal_type(i8.view(np.float64)) # E: {float64} +reveal_type(i8.view(float)) # E: Any +reveal_type(i8.view(np.float64, np.ndarray)) # E: {float64} + +reveal_type(i8.getfield(float)) # E: Any +reveal_type(i8.getfield(np.float64)) # E: {float64} +reveal_type(i8.getfield(np.float64, 8)) # E: {float64} + +reveal_type(f8.as_integer_ratio()) # E: Tuple[builtins.int, builtins.int] +reveal_type(f8.is_integer()) # E: bool +reveal_type(f8.__trunc__()) # E: int +reveal_type(f8.__getformat__("float")) # E: str +reveal_type(f8.hex()) # E: str +reveal_type(np.float64.fromhex("0x0.0p+0")) # E: {float64} + +reveal_type(f8.__getnewargs__()) # E: Tuple[builtins.float] +reveal_type(c16.__getnewargs__()) # E: Tuple[builtins.float, builtins.float] + +reveal_type(i8.numerator) # E: {int64} +reveal_type(i8.denominator) # E: Literal[1] +reveal_type(u8.numerator) # E: {uint64} +reveal_type(u8.denominator) # E: Literal[1] +reveal_type(m.numerator) # E: timedelta64 +reveal_type(m.denominator) # E: Literal[1] + +reveal_type(round(i8)) # E: int +reveal_type(round(i8, 3)) # E: {int64} +reveal_type(round(u8)) # E: int +reveal_type(round(u8, 3)) # E: {uint64} +reveal_type(round(f8)) # E: int +reveal_type(round(f8, 3)) # E: {float64} + +if sys.version_info >= (3, 9): + reveal_type(f8.__ceil__()) # E: int + reveal_type(f8.__floor__()) # E: int + +reveal_type(i8.is_integer()) # E: Literal[True] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/shape_base.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/shape_base.pyi new file mode 100644 index 0000000..f13678c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/shape_base.pyi @@ -0,0 +1,57 @@ +import numpy as np +from numpy.typing import NDArray +from typing import Any, List + +i8: np.int64 +f8: np.float64 + +AR_b: NDArray[np.bool_] +AR_i8: NDArray[np.int64] +AR_f8: NDArray[np.float64] + +AR_LIKE_f8: List[float] + +reveal_type(np.take_along_axis(AR_f8, AR_i8, axis=1)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.take_along_axis(f8, AR_i8, axis=None)) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(np.put_along_axis(AR_f8, AR_i8, "1.0", axis=1)) # E: None + +reveal_type(np.expand_dims(AR_i8, 2)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.expand_dims(AR_LIKE_f8, 2)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.column_stack([AR_i8])) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.column_stack([AR_LIKE_f8])) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.dstack([AR_i8])) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.dstack([AR_LIKE_f8])) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.row_stack([AR_i8])) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.row_stack([AR_LIKE_f8])) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.array_split(AR_i8, [3, 5, 6, 10])) # E: list[ndarray[Any, dtype[{int64}]]] +reveal_type(np.array_split(AR_LIKE_f8, [3, 5, 6, 10])) # E: list[ndarray[Any, dtype[Any]]] + +reveal_type(np.split(AR_i8, [3, 5, 6, 10])) # E: list[ndarray[Any, dtype[{int64}]]] +reveal_type(np.split(AR_LIKE_f8, [3, 5, 6, 10])) # E: list[ndarray[Any, dtype[Any]]] + +reveal_type(np.hsplit(AR_i8, [3, 5, 6, 10])) # E: list[ndarray[Any, dtype[{int64}]]] +reveal_type(np.hsplit(AR_LIKE_f8, [3, 5, 6, 10])) # E: list[ndarray[Any, dtype[Any]]] + +reveal_type(np.vsplit(AR_i8, [3, 5, 6, 10])) # E: list[ndarray[Any, dtype[{int64}]]] +reveal_type(np.vsplit(AR_LIKE_f8, [3, 5, 6, 10])) # E: list[ndarray[Any, dtype[Any]]] + +reveal_type(np.dsplit(AR_i8, [3, 5, 6, 10])) # E: list[ndarray[Any, dtype[{int64}]]] +reveal_type(np.dsplit(AR_LIKE_f8, [3, 5, 6, 10])) # E: list[ndarray[Any, dtype[Any]]] + +reveal_type(np.lib.shape_base.get_array_prepare(AR_i8)) # E: lib.shape_base._ArrayPrepare +reveal_type(np.lib.shape_base.get_array_prepare(AR_i8, 1)) # E: Union[None, lib.shape_base._ArrayPrepare] + +reveal_type(np.get_array_wrap(AR_i8)) # E: lib.shape_base._ArrayWrap +reveal_type(np.get_array_wrap(AR_i8, 1)) # E: Union[None, lib.shape_base._ArrayWrap] + +reveal_type(np.kron(AR_b, AR_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.kron(AR_b, AR_i8)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.kron(AR_f8, AR_f8)) # E: ndarray[Any, dtype[floating[Any]]] + +reveal_type(np.tile(AR_i8, 5)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.tile(AR_LIKE_f8, [2, 2])) # E: ndarray[Any, dtype[Any]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/stride_tricks.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/stride_tricks.pyi new file mode 100644 index 0000000..0d6dcd3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/stride_tricks.pyi @@ -0,0 +1,28 @@ +from typing import List, Dict, Any +import numpy as np +import numpy.typing as npt + +AR_f8: npt.NDArray[np.float64] +AR_LIKE_f: List[float] +interface_dict: Dict[str, Any] + +reveal_type(np.lib.stride_tricks.DummyArray(interface_dict)) # E: lib.stride_tricks.DummyArray + +reveal_type(np.lib.stride_tricks.as_strided(AR_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.lib.stride_tricks.as_strided(AR_LIKE_f)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.lib.stride_tricks.as_strided(AR_f8, strides=(1, 5))) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.lib.stride_tricks.as_strided(AR_f8, shape=[9, 20])) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(np.lib.stride_tricks.sliding_window_view(AR_f8, 5)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.lib.stride_tricks.sliding_window_view(AR_LIKE_f, (1, 5))) # E: ndarray[Any, dtype[Any]] +reveal_type(np.lib.stride_tricks.sliding_window_view(AR_f8, [9], axis=1)) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(np.broadcast_to(AR_f8, 5)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.broadcast_to(AR_LIKE_f, (1, 5))) # E: ndarray[Any, dtype[Any]] +reveal_type(np.broadcast_to(AR_f8, [4, 6], subok=True)) # E: ndarray[Any, dtype[{float64}]] + +reveal_type(np.broadcast_shapes((1, 2), [3, 1], (3, 2))) # E: tuple[builtins.int] +reveal_type(np.broadcast_shapes((6, 7), (5, 6, 1), 7, (5, 1, 7))) # E: tuple[builtins.int] + +reveal_type(np.broadcast_arrays(AR_f8, AR_f8)) # E: list[ndarray[Any, dtype[Any]]] +reveal_type(np.broadcast_arrays(AR_f8, AR_LIKE_f)) # E: list[ndarray[Any, dtype[Any]]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/testing.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/testing.pyi new file mode 100644 index 0000000..9813dc7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/testing.pyi @@ -0,0 +1,173 @@ +from __future__ import annotations + +import re +import sys +from typing import Any, Callable, TypeVar +from pathlib import Path + +import numpy as np +import numpy.typing as npt + +AR_f8: npt.NDArray[np.float64] +AR_i8: npt.NDArray[np.int64] + +bool_obj: bool +suppress_obj: np.testing.suppress_warnings +FT = TypeVar("FT", bound=Callable[..., Any]) + +def func() -> int: ... + +def func2( + x: npt.NDArray[np.number[Any]], + y: npt.NDArray[np.number[Any]], +) -> npt.NDArray[np.bool_]: ... + +reveal_type(np.testing.KnownFailureException()) # E: KnownFailureException +reveal_type(np.testing.IgnoreException()) # E: IgnoreException + +reveal_type(np.testing.clear_and_catch_warnings(modules=[np.testing])) # E: _clear_and_catch_warnings_without_records +reveal_type(np.testing.clear_and_catch_warnings(True)) # E: _clear_and_catch_warnings_with_records +reveal_type(np.testing.clear_and_catch_warnings(False)) # E: _clear_and_catch_warnings_without_records +reveal_type(np.testing.clear_and_catch_warnings(bool_obj)) # E: clear_and_catch_warnings +reveal_type(np.testing.clear_and_catch_warnings.class_modules) # E: tuple[types.ModuleType] +reveal_type(np.testing.clear_and_catch_warnings.modules) # E: set[types.ModuleType] + +with np.testing.clear_and_catch_warnings(True) as c1: + reveal_type(c1) # E: builtins.list[warnings.WarningMessage] +with np.testing.clear_and_catch_warnings() as c2: + reveal_type(c2) # E: None + +reveal_type(np.testing.suppress_warnings("once")) # E: suppress_warnings +reveal_type(np.testing.suppress_warnings()(func)) # E: def () -> builtins.int +reveal_type(suppress_obj.filter(RuntimeWarning)) # E: None +reveal_type(suppress_obj.record(RuntimeWarning)) # E: list[warnings.WarningMessage] +with suppress_obj as c3: + reveal_type(c3) # E: suppress_warnings + +reveal_type(np.testing.verbose) # E: int +reveal_type(np.testing.IS_PYPY) # E: bool +reveal_type(np.testing.HAS_REFCOUNT) # E: bool +reveal_type(np.testing.HAS_LAPACK64) # E: bool + +reveal_type(np.testing.assert_(1, msg="test")) # E: None +reveal_type(np.testing.assert_(2, msg=lambda: "test")) # E: None + +if sys.platform == "win32" or sys.platform == "cygwin": + reveal_type(np.testing.memusage()) # E: builtins.int +elif sys.platform == "linux": + reveal_type(np.testing.memusage()) # E: Union[None, builtins.int] +else: + reveal_type(np.testing.memusage()) # E: + +reveal_type(np.testing.jiffies()) # E: builtins.int + +reveal_type(np.testing.build_err_msg([0, 1, 2], "test")) # E: str +reveal_type(np.testing.build_err_msg(range(2), "test", header="header")) # E: str +reveal_type(np.testing.build_err_msg(np.arange(9).reshape(3, 3), "test", verbose=False)) # E: str +reveal_type(np.testing.build_err_msg("abc", "test", names=["x", "y"])) # E: str +reveal_type(np.testing.build_err_msg([1.0, 2.0], "test", precision=5)) # E: str + +reveal_type(np.testing.assert_equal({1}, {1})) # E: None +reveal_type(np.testing.assert_equal([1, 2, 3], [1, 2, 3], err_msg="fail")) # E: None +reveal_type(np.testing.assert_equal(1, 1.0, verbose=True)) # E: None + +reveal_type(np.testing.print_assert_equal('Test XYZ of func xyz', [0, 1], [0, 1])) # E: None + +reveal_type(np.testing.assert_almost_equal(1.0, 1.1)) # E: None +reveal_type(np.testing.assert_almost_equal([1, 2, 3], [1, 2, 3], err_msg="fail")) # E: None +reveal_type(np.testing.assert_almost_equal(1, 1.0, verbose=True)) # E: None +reveal_type(np.testing.assert_almost_equal(1, 1.0001, decimal=2)) # E: None + +reveal_type(np.testing.assert_approx_equal(1.0, 1.1)) # E: None +reveal_type(np.testing.assert_approx_equal("1", "2", err_msg="fail")) # E: None +reveal_type(np.testing.assert_approx_equal(1, 1.0, verbose=True)) # E: None +reveal_type(np.testing.assert_approx_equal(1, 1.0001, significant=2)) # E: None + +reveal_type(np.testing.assert_array_compare(func2, AR_i8, AR_f8, err_msg="test")) # E: None +reveal_type(np.testing.assert_array_compare(func2, AR_i8, AR_f8, verbose=True)) # E: None +reveal_type(np.testing.assert_array_compare(func2, AR_i8, AR_f8, header="header")) # E: None +reveal_type(np.testing.assert_array_compare(func2, AR_i8, AR_f8, precision=np.int64())) # E: None +reveal_type(np.testing.assert_array_compare(func2, AR_i8, AR_f8, equal_nan=False)) # E: None +reveal_type(np.testing.assert_array_compare(func2, AR_i8, AR_f8, equal_inf=True)) # E: None + +reveal_type(np.testing.assert_array_equal(AR_i8, AR_f8)) # E: None +reveal_type(np.testing.assert_array_equal(AR_i8, AR_f8, err_msg="test")) # E: None +reveal_type(np.testing.assert_array_equal(AR_i8, AR_f8, verbose=True)) # E: None + +reveal_type(np.testing.assert_array_almost_equal(AR_i8, AR_f8)) # E: None +reveal_type(np.testing.assert_array_almost_equal(AR_i8, AR_f8, err_msg="test")) # E: None +reveal_type(np.testing.assert_array_almost_equal(AR_i8, AR_f8, verbose=True)) # E: None +reveal_type(np.testing.assert_array_almost_equal(AR_i8, AR_f8, decimal=1)) # E: None + +reveal_type(np.testing.assert_array_less(AR_i8, AR_f8)) # E: None +reveal_type(np.testing.assert_array_less(AR_i8, AR_f8, err_msg="test")) # E: None +reveal_type(np.testing.assert_array_less(AR_i8, AR_f8, verbose=True)) # E: None + +reveal_type(np.testing.runstring("1 + 1", {})) # E: Any +reveal_type(np.testing.runstring("int64() + 1", {"int64": np.int64})) # E: Any + +reveal_type(np.testing.assert_string_equal("1", "1")) # E: None + +reveal_type(np.testing.rundocs()) # E: None +reveal_type(np.testing.rundocs("test.py")) # E: None +reveal_type(np.testing.rundocs(Path("test.py"), raise_on_error=True)) # E: None + +@np.testing.raises(RuntimeError, RuntimeWarning) +def func3(a: int) -> bool: ... + +reveal_type(func3) # E: def (a: builtins.int) -> builtins.bool + +reveal_type(np.testing.assert_raises(RuntimeWarning)) # E: _AssertRaisesContext[builtins.RuntimeWarning] +reveal_type(np.testing.assert_raises(RuntimeWarning, func3, 5)) # E: None + +reveal_type(np.testing.assert_raises_regex(RuntimeWarning, r"test")) # E: _AssertRaisesContext[builtins.RuntimeWarning] +reveal_type(np.testing.assert_raises_regex(RuntimeWarning, b"test", func3, 5)) # E: None +reveal_type(np.testing.assert_raises_regex(RuntimeWarning, re.compile(b"test"), func3, 5)) # E: None + +class Test: ... + +def decorate(a: FT) -> FT: + return a + +reveal_type(np.testing.decorate_methods(Test, decorate)) # E: None +reveal_type(np.testing.decorate_methods(Test, decorate, None)) # E: None +reveal_type(np.testing.decorate_methods(Test, decorate, "test")) # E: None +reveal_type(np.testing.decorate_methods(Test, decorate, b"test")) # E: None +reveal_type(np.testing.decorate_methods(Test, decorate, re.compile("test"))) # E: None + +reveal_type(np.testing.measure("for i in range(1000): np.sqrt(i**2)")) # E: float +reveal_type(np.testing.measure(b"for i in range(1000): np.sqrt(i**2)", times=5)) # E: float + +reveal_type(np.testing.assert_allclose(AR_i8, AR_f8)) # E: None +reveal_type(np.testing.assert_allclose(AR_i8, AR_f8, rtol=0.005)) # E: None +reveal_type(np.testing.assert_allclose(AR_i8, AR_f8, atol=1)) # E: None +reveal_type(np.testing.assert_allclose(AR_i8, AR_f8, equal_nan=True)) # E: None +reveal_type(np.testing.assert_allclose(AR_i8, AR_f8, err_msg="err")) # E: None +reveal_type(np.testing.assert_allclose(AR_i8, AR_f8, verbose=False)) # E: None + +reveal_type(np.testing.assert_array_almost_equal_nulp(AR_i8, AR_f8, nulp=2)) # E: None + +reveal_type(np.testing.assert_array_max_ulp(AR_i8, AR_f8, maxulp=2)) # E: ndarray[Any, dtype[Any]] +reveal_type(np.testing.assert_array_max_ulp(AR_i8, AR_f8, dtype=np.float32)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.testing.assert_warns(RuntimeWarning)) # E: _GeneratorContextManager[None] +reveal_type(np.testing.assert_warns(RuntimeWarning, func3, 5)) # E: bool + +reveal_type(np.testing.assert_no_warnings()) # E: _GeneratorContextManager[None] +reveal_type(np.testing.assert_no_warnings(func3, 5)) # E: bool + +reveal_type(np.testing.tempdir("test_dir")) # E: _GeneratorContextManager[builtins.str] +reveal_type(np.testing.tempdir(prefix=b"test")) # E: _GeneratorContextManager[builtins.bytes] +reveal_type(np.testing.tempdir("test_dir", dir=Path("here"))) # E: _GeneratorContextManager[builtins.str] + +reveal_type(np.testing.temppath("test_dir", text=True)) # E: _GeneratorContextManager[builtins.str] +reveal_type(np.testing.temppath(prefix=b"test")) # E: _GeneratorContextManager[builtins.bytes] +reveal_type(np.testing.temppath("test_dir", dir=Path("here"))) # E: _GeneratorContextManager[builtins.str] + +reveal_type(np.testing.assert_no_gc_cycles()) # E: _GeneratorContextManager[None] +reveal_type(np.testing.assert_no_gc_cycles(func3, 5)) # E: None + +reveal_type(np.testing.break_cycles()) # E: None + +reveal_type(np.testing.TestCase()) # E: unittest.case.TestCase +reveal_type(np.testing.run_module_suite(file_to_run="numpy/tests/test_matlib.py")) # E: None diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/twodim_base.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/twodim_base.pyi new file mode 100644 index 0000000..0318c3c --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/twodim_base.pyi @@ -0,0 +1,72 @@ +from typing import Any, List, TypeVar + +import numpy as np +import numpy.typing as npt + +_SCT = TypeVar("_SCT", bound=np.generic) + + +def func1(ar: npt.NDArray[_SCT], a: int) -> npt.NDArray[_SCT]: + pass + + +def func2(ar: npt.NDArray[np.number[Any]], a: str) -> npt.NDArray[np.float64]: + pass + + +AR_b: npt.NDArray[np.bool_] +AR_u: npt.NDArray[np.uint64] +AR_i: npt.NDArray[np.int64] +AR_f: npt.NDArray[np.float64] +AR_c: npt.NDArray[np.complex128] +AR_O: npt.NDArray[np.object_] + +AR_LIKE_b: List[bool] + +reveal_type(np.fliplr(AR_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.fliplr(AR_LIKE_b)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.flipud(AR_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.flipud(AR_LIKE_b)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.eye(10)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.eye(10, M=20, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.eye(10, k=2, dtype=int)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.diag(AR_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.diag(AR_LIKE_b, k=0)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.diagflat(AR_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.diagflat(AR_LIKE_b, k=0)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.tri(10)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.tri(10, M=20, dtype=np.int64)) # E: ndarray[Any, dtype[{int64}]] +reveal_type(np.tri(10, k=2, dtype=int)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.tril(AR_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.tril(AR_LIKE_b, k=0)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.triu(AR_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.triu(AR_LIKE_b, k=0)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.vander(AR_b)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.vander(AR_u)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.vander(AR_i, N=2)) # E: ndarray[Any, dtype[signedinteger[Any]]] +reveal_type(np.vander(AR_f, increasing=True)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.vander(AR_c)) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.vander(AR_O)) # E: ndarray[Any, dtype[object_]] + +reveal_type(np.histogram2d(AR_i, AR_b)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[floating[Any]]]] +reveal_type(np.histogram2d(AR_f, AR_f)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[floating[Any]]], ndarray[Any, dtype[floating[Any]]]] +reveal_type(np.histogram2d(AR_f, AR_c, weights=AR_LIKE_b)) # E: Tuple[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[complexfloating[Any, Any]]], ndarray[Any, dtype[complexfloating[Any, Any]]]] + +reveal_type(np.mask_indices(10, func1)) # E: Tuple[ndarray[Any, dtype[{intp}]], ndarray[Any, dtype[{intp}]]] +reveal_type(np.mask_indices(8, func2, "0")) # E: Tuple[ndarray[Any, dtype[{intp}]], ndarray[Any, dtype[{intp}]]] + +reveal_type(np.tril_indices(10)) # E: Tuple[ndarray[Any, dtype[{int_}]], ndarray[Any, dtype[{int_}]]] + +reveal_type(np.tril_indices_from(AR_b)) # E: Tuple[ndarray[Any, dtype[{int_}]], ndarray[Any, dtype[{int_}]]] + +reveal_type(np.triu_indices(10)) # E: Tuple[ndarray[Any, dtype[{int_}]], ndarray[Any, dtype[{int_}]]] + +reveal_type(np.triu_indices_from(AR_b)) # E: Tuple[ndarray[Any, dtype[{int_}]], ndarray[Any, dtype[{int_}]]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/type_check.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/type_check.pyi new file mode 100644 index 0000000..13d41d8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/type_check.pyi @@ -0,0 +1,73 @@ +from typing import List +import numpy as np +import numpy.typing as npt + +f8: np.float64 +f: float + +# NOTE: Avoid importing the platform specific `np.float128` type +AR_i8: npt.NDArray[np.int64] +AR_i4: npt.NDArray[np.int32] +AR_f2: npt.NDArray[np.float16] +AR_f8: npt.NDArray[np.float64] +AR_f16: npt.NDArray[np.floating[npt._128Bit]] +AR_c8: npt.NDArray[np.complex64] +AR_c16: npt.NDArray[np.complex128] + +AR_LIKE_f: List[float] + +class RealObj: + real: slice + +class ImagObj: + imag: slice + +reveal_type(np.mintypecode(["f8"], typeset="qfQF")) + +reveal_type(np.asfarray(AR_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.asfarray(AR_LIKE_f)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.asfarray(AR_f8, dtype="c16")) # E: ndarray[Any, dtype[complexfloating[Any, Any]]] +reveal_type(np.asfarray(AR_f8, dtype="i8")) # E: ndarray[Any, dtype[floating[Any]]] + +reveal_type(np.real(RealObj())) # E: slice +reveal_type(np.real(AR_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.real(AR_c16)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.real(AR_LIKE_f)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.imag(ImagObj())) # E: slice +reveal_type(np.imag(AR_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.imag(AR_c16)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.imag(AR_LIKE_f)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.iscomplex(f8)) # E: bool_ +reveal_type(np.iscomplex(AR_f8)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.iscomplex(AR_LIKE_f)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.isreal(f8)) # E: bool_ +reveal_type(np.isreal(AR_f8)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.isreal(AR_LIKE_f)) # E: ndarray[Any, dtype[bool_]] + +reveal_type(np.iscomplexobj(f8)) # E: bool +reveal_type(np.isrealobj(f8)) # E: bool + +reveal_type(np.nan_to_num(f8)) # E: {float64} +reveal_type(np.nan_to_num(f, copy=True)) # E: Any +reveal_type(np.nan_to_num(AR_f8, nan=1.5)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.nan_to_num(AR_LIKE_f, posinf=9999)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.real_if_close(AR_f8)) # E: ndarray[Any, dtype[{float64}]] +reveal_type(np.real_if_close(AR_c16)) # E: Union[ndarray[Any, dtype[{float64}]], ndarray[Any, dtype[{complex128}]]] +reveal_type(np.real_if_close(AR_c8)) # E: Union[ndarray[Any, dtype[{float32}]], ndarray[Any, dtype[{complex64}]]] +reveal_type(np.real_if_close(AR_LIKE_f)) # E: ndarray[Any, dtype[Any]] + +reveal_type(np.typename("h")) # E: Literal['short'] +reveal_type(np.typename("B")) # E: Literal['unsigned char'] +reveal_type(np.typename("V")) # E: Literal['void'] +reveal_type(np.typename("S1")) # E: Literal['character'] + +reveal_type(np.common_type(AR_i4)) # E: Type[{float64}] +reveal_type(np.common_type(AR_f2)) # E: Type[{float16}] +reveal_type(np.common_type(AR_f2, AR_i4)) # E: Type[{float64}] +reveal_type(np.common_type(AR_f16, AR_i4)) # E: Type[{float128}] +reveal_type(np.common_type(AR_c8, AR_f2)) # E: Type[{complex64}] +reveal_type(np.common_type(AR_f2, AR_c8, AR_i4)) # E: Type[{complex128}] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ufunc_config.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ufunc_config.pyi new file mode 100644 index 0000000..2c6fadf --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ufunc_config.pyi @@ -0,0 +1,25 @@ +"""Typing tests for `core._ufunc_config`.""" + +import numpy as np + +def func(a: str, b: int) -> None: ... + +class Write: + def write(self, value: str) -> None: ... + +reveal_type(np.seterr(all=None)) # E: TypedDict('core._ufunc_config._ErrDict' +reveal_type(np.seterr(divide="ignore")) # E: TypedDict('core._ufunc_config._ErrDict' +reveal_type(np.seterr(over="warn")) # E: TypedDict('core._ufunc_config._ErrDict' +reveal_type(np.seterr(under="call")) # E: TypedDict('core._ufunc_config._ErrDict' +reveal_type(np.seterr(invalid="raise")) # E: TypedDict('core._ufunc_config._ErrDict' +reveal_type(np.geterr()) # E: TypedDict('core._ufunc_config._ErrDict' + +reveal_type(np.setbufsize(4096)) # E: int +reveal_type(np.getbufsize()) # E: int + +reveal_type(np.seterrcall(func)) # E: Union[None, def (builtins.str, builtins.int) -> Any, _SupportsWrite[builtins.str]] +reveal_type(np.seterrcall(Write())) # E: Union[None, def (builtins.str, builtins.int) -> Any, _SupportsWrite[builtins.str]] +reveal_type(np.geterrcall()) # E: Union[None, def (builtins.str, builtins.int) -> Any, _SupportsWrite[builtins.str]] + +reveal_type(np.errstate(call=func, all="call")) # E: errstate[def (a: builtins.str, b: builtins.int)] +reveal_type(np.errstate(call=Write(), divide="log", over="log")) # E: errstate[ufunc_config.Write] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ufunclike.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ufunclike.pyi new file mode 100644 index 0000000..2d67c92 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ufunclike.pyi @@ -0,0 +1,29 @@ +from typing import List, Any +import numpy as np + +AR_LIKE_b: List[bool] +AR_LIKE_u: List[np.uint32] +AR_LIKE_i: List[int] +AR_LIKE_f: List[float] +AR_LIKE_O: List[np.object_] + +AR_U: np.ndarray[Any, np.dtype[np.str_]] + +reveal_type(np.fix(AR_LIKE_b)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.fix(AR_LIKE_u)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.fix(AR_LIKE_i)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.fix(AR_LIKE_f)) # E: ndarray[Any, dtype[floating[Any]]] +reveal_type(np.fix(AR_LIKE_O)) # E: Any +reveal_type(np.fix(AR_LIKE_f, out=AR_U)) # E: ndarray[Any, dtype[str_]] + +reveal_type(np.isposinf(AR_LIKE_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.isposinf(AR_LIKE_u)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.isposinf(AR_LIKE_i)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.isposinf(AR_LIKE_f)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.isposinf(AR_LIKE_f, out=AR_U)) # E: ndarray[Any, dtype[str_]] + +reveal_type(np.isneginf(AR_LIKE_b)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.isneginf(AR_LIKE_u)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.isneginf(AR_LIKE_i)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.isneginf(AR_LIKE_f)) # E: ndarray[Any, dtype[bool_]] +reveal_type(np.isneginf(AR_LIKE_f, out=AR_U)) # E: ndarray[Any, dtype[str_]] diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ufuncs.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ufuncs.pyi new file mode 100644 index 0000000..3bf83c8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/ufuncs.pyi @@ -0,0 +1,68 @@ +import numpy as np +import numpy.typing as npt + +f8: np.float64 +AR_f8: npt.NDArray[np.float64] +AR_i8: npt.NDArray[np.int64] + +reveal_type(np.absolute.__doc__) # E: str +reveal_type(np.absolute.types) # E: builtins.list[builtins.str] + +reveal_type(np.absolute.__name__) # E: Literal['absolute'] +reveal_type(np.absolute.ntypes) # E: Literal[20] +reveal_type(np.absolute.identity) # E: None +reveal_type(np.absolute.nin) # E: Literal[1] +reveal_type(np.absolute.nin) # E: Literal[1] +reveal_type(np.absolute.nout) # E: Literal[1] +reveal_type(np.absolute.nargs) # E: Literal[2] +reveal_type(np.absolute.signature) # E: None +reveal_type(np.absolute(f8)) # E: Any +reveal_type(np.absolute(AR_f8)) # E: ndarray +reveal_type(np.absolute.at(AR_f8, AR_i8)) # E: None + +reveal_type(np.add.__name__) # E: Literal['add'] +reveal_type(np.add.ntypes) # E: Literal[22] +reveal_type(np.add.identity) # E: Literal[0] +reveal_type(np.add.nin) # E: Literal[2] +reveal_type(np.add.nout) # E: Literal[1] +reveal_type(np.add.nargs) # E: Literal[3] +reveal_type(np.add.signature) # E: None +reveal_type(np.add(f8, f8)) # E: Any +reveal_type(np.add(AR_f8, f8)) # E: ndarray +reveal_type(np.add.at(AR_f8, AR_i8, f8)) # E: None +reveal_type(np.add.reduce(AR_f8, axis=0)) # E: Any +reveal_type(np.add.accumulate(AR_f8)) # E: ndarray +reveal_type(np.add.reduceat(AR_f8, AR_i8)) # E: ndarray +reveal_type(np.add.outer(f8, f8)) # E: Any +reveal_type(np.add.outer(AR_f8, f8)) # E: ndarray + +reveal_type(np.frexp.__name__) # E: Literal['frexp'] +reveal_type(np.frexp.ntypes) # E: Literal[4] +reveal_type(np.frexp.identity) # E: None +reveal_type(np.frexp.nin) # E: Literal[1] +reveal_type(np.frexp.nout) # E: Literal[2] +reveal_type(np.frexp.nargs) # E: Literal[3] +reveal_type(np.frexp.signature) # E: None +reveal_type(np.frexp(f8)) # E: Tuple[Any, Any] +reveal_type(np.frexp(AR_f8)) # E: Tuple[ndarray[Any, dtype[Any]], ndarray[Any, dtype[Any]]] + +reveal_type(np.divmod.__name__) # E: Literal['divmod'] +reveal_type(np.divmod.ntypes) # E: Literal[15] +reveal_type(np.divmod.identity) # E: None +reveal_type(np.divmod.nin) # E: Literal[2] +reveal_type(np.divmod.nout) # E: Literal[2] +reveal_type(np.divmod.nargs) # E: Literal[4] +reveal_type(np.divmod.signature) # E: None +reveal_type(np.divmod(f8, f8)) # E: Tuple[Any, Any] +reveal_type(np.divmod(AR_f8, f8)) # E: Tuple[ndarray[Any, dtype[Any]], ndarray[Any, dtype[Any]]] + +reveal_type(np.matmul.__name__) # E: Literal['matmul'] +reveal_type(np.matmul.ntypes) # E: Literal[19] +reveal_type(np.matmul.identity) # E: None +reveal_type(np.matmul.nin) # E: Literal[2] +reveal_type(np.matmul.nout) # E: Literal[1] +reveal_type(np.matmul.nargs) # E: Literal[3] +reveal_type(np.matmul.signature) # E: Literal['(n?,k),(k,m?)->(n?,m?)'] +reveal_type(np.matmul.identity) # E: None +reveal_type(np.matmul(AR_f8, AR_f8)) # E: Any +reveal_type(np.matmul(AR_f8, AR_f8, axes=[(0, 1), (0, 1), (0, 1)])) # E: Any diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/version.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/version.pyi new file mode 100644 index 0000000..e538376 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/version.pyi @@ -0,0 +1,8 @@ +import numpy.version + +reveal_type(numpy.version.version) # E: str +reveal_type(numpy.version.__version__) # E: str +reveal_type(numpy.version.full_version) # E: str +reveal_type(numpy.version.git_revision) # E: str +reveal_type(numpy.version.release) # E: bool +reveal_type(numpy.version.short_version) # E: str diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/warnings_and_errors.pyi b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/warnings_and_errors.pyi new file mode 100644 index 0000000..d5c5044 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/data/reveal/warnings_and_errors.pyi @@ -0,0 +1,11 @@ +from typing import Type + +import numpy as np + +reveal_type(np.ModuleDeprecationWarning()) # E: ModuleDeprecationWarning +reveal_type(np.VisibleDeprecationWarning()) # E: VisibleDeprecationWarning +reveal_type(np.ComplexWarning()) # E: ComplexWarning +reveal_type(np.RankWarning()) # E: RankWarning +reveal_type(np.TooHardError()) # E: TooHardError +reveal_type(np.AxisError("test")) # E: AxisError +reveal_type(np.AxisError(5, 1)) # E: AxisError diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/test_generic_alias.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/test_generic_alias.py new file mode 100644 index 0000000..3934342 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/test_generic_alias.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +import sys +import copy +import types +import pickle +import weakref +from typing import TypeVar, Any, Callable, Tuple, Type, Union + +import pytest +import numpy as np +from numpy.typing._generic_alias import _GenericAlias + +ScalarType = TypeVar("ScalarType", bound=np.generic, covariant=True) +T1 = TypeVar("T1") +T2 = TypeVar("T2") +DType = _GenericAlias(np.dtype, (ScalarType,)) +NDArray = _GenericAlias(np.ndarray, (Any, DType)) + +if sys.version_info >= (3, 9): + DType_ref = types.GenericAlias(np.dtype, (ScalarType,)) + NDArray_ref = types.GenericAlias(np.ndarray, (Any, DType_ref)) + FuncType = Callable[[Union[_GenericAlias, types.GenericAlias]], Any] +else: + DType_ref = Any + NDArray_ref = Any + FuncType = Callable[[_GenericAlias], Any] + +GETATTR_NAMES = sorted(set(dir(np.ndarray)) - _GenericAlias._ATTR_EXCEPTIONS) + +BUFFER = np.array([1], dtype=np.int64) +BUFFER.setflags(write=False) + +def _get_subclass_mro(base: type) -> Tuple[type, ...]: + class Subclass(base): # type: ignore[misc,valid-type] + pass + return Subclass.__mro__[1:] + + +class TestGenericAlias: + """Tests for `numpy.typing._generic_alias._GenericAlias`.""" + + @pytest.mark.parametrize("name,func", [ + ("__init__", lambda n: n), + ("__init__", lambda n: _GenericAlias(np.ndarray, Any)), + ("__init__", lambda n: _GenericAlias(np.ndarray, (Any,))), + ("__init__", lambda n: _GenericAlias(np.ndarray, (Any, Any))), + ("__init__", lambda n: _GenericAlias(np.ndarray, T1)), + ("__init__", lambda n: _GenericAlias(np.ndarray, (T1,))), + ("__init__", lambda n: _GenericAlias(np.ndarray, (T1, T2))), + ("__origin__", lambda n: n.__origin__), + ("__args__", lambda n: n.__args__), + ("__parameters__", lambda n: n.__parameters__), + ("__reduce__", lambda n: n.__reduce__()[1:]), + ("__reduce_ex__", lambda n: n.__reduce_ex__(1)[1:]), + ("__mro_entries__", lambda n: n.__mro_entries__([object])), + ("__hash__", lambda n: hash(n)), + ("__repr__", lambda n: repr(n)), + ("__getitem__", lambda n: n[np.float64]), + ("__getitem__", lambda n: n[ScalarType][np.float64]), + ("__getitem__", lambda n: n[Union[np.int64, ScalarType]][np.float64]), + ("__getitem__", lambda n: n[Union[T1, T2]][np.float32, np.float64]), + ("__eq__", lambda n: n == n), + ("__ne__", lambda n: n != np.ndarray), + ("__dir__", lambda n: dir(n)), + ("__call__", lambda n: n((1,), np.int64, BUFFER)), + ("__call__", lambda n: n(shape=(1,), dtype=np.int64, buffer=BUFFER)), + ("subclassing", lambda n: _get_subclass_mro(n)), + ("pickle", lambda n: n == pickle.loads(pickle.dumps(n))), + ]) + def test_pass(self, name: str, func: FuncType) -> None: + """Compare `types.GenericAlias` with its numpy-based backport. + + Checker whether ``func`` runs as intended and that both `GenericAlias` + and `_GenericAlias` return the same result. + + """ + value = func(NDArray) + + if sys.version_info >= (3, 9): + value_ref = func(NDArray_ref) + assert value == value_ref + + @pytest.mark.parametrize("name,func", [ + ("__copy__", lambda n: n == copy.copy(n)), + ("__deepcopy__", lambda n: n == copy.deepcopy(n)), + ]) + def test_copy(self, name: str, func: FuncType) -> None: + value = func(NDArray) + + # xref bpo-45167 + GE_398 = ( + sys.version_info[:2] == (3, 9) and sys.version_info >= (3, 9, 8) + ) + if GE_398 or sys.version_info >= (3, 10, 1): + value_ref = func(NDArray_ref) + assert value == value_ref + + def test_weakref(self) -> None: + """Test ``__weakref__``.""" + value = weakref.ref(NDArray)() + + if sys.version_info >= (3, 9, 1): # xref bpo-42332 + value_ref = weakref.ref(NDArray_ref)() + assert value == value_ref + + @pytest.mark.parametrize("name", GETATTR_NAMES) + def test_getattr(self, name: str) -> None: + """Test that `getattr` wraps around the underlying type, + aka ``__origin__``. + + """ + value = getattr(NDArray, name) + value_ref1 = getattr(np.ndarray, name) + + if sys.version_info >= (3, 9): + value_ref2 = getattr(NDArray_ref, name) + assert value == value_ref1 == value_ref2 + else: + assert value == value_ref1 + + @pytest.mark.parametrize("name,exc_type,func", [ + ("__getitem__", TypeError, lambda n: n[()]), + ("__getitem__", TypeError, lambda n: n[Any, Any]), + ("__getitem__", TypeError, lambda n: n[Any][Any]), + ("isinstance", TypeError, lambda n: isinstance(np.array(1), n)), + ("issublass", TypeError, lambda n: issubclass(np.ndarray, n)), + ("setattr", AttributeError, lambda n: setattr(n, "__origin__", int)), + ("setattr", AttributeError, lambda n: setattr(n, "test", int)), + ("getattr", AttributeError, lambda n: getattr(n, "test")), + ]) + def test_raise( + self, + name: str, + exc_type: Type[BaseException], + func: FuncType, + ) -> None: + """Test operations that are supposed to raise.""" + with pytest.raises(exc_type): + func(NDArray) + + if sys.version_info >= (3, 9): + with pytest.raises(exc_type): + func(NDArray_ref) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/test_isfile.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/test_isfile.py new file mode 100644 index 0000000..a898b3e --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/test_isfile.py @@ -0,0 +1,30 @@ +import os +from pathlib import Path + +import numpy as np +from numpy.testing import assert_ + +ROOT = Path(np.__file__).parents[0] +FILES = [ + ROOT / "py.typed", + ROOT / "__init__.pyi", + ROOT / "ctypeslib.pyi", + ROOT / "core" / "__init__.pyi", + ROOT / "distutils" / "__init__.pyi", + ROOT / "f2py" / "__init__.pyi", + ROOT / "fft" / "__init__.pyi", + ROOT / "lib" / "__init__.pyi", + ROOT / "linalg" / "__init__.pyi", + ROOT / "ma" / "__init__.pyi", + ROOT / "matrixlib" / "__init__.pyi", + ROOT / "polynomial" / "__init__.pyi", + ROOT / "random" / "__init__.pyi", + ROOT / "testing" / "__init__.pyi", +] + + +class TestIsFile: + def test_isfile(self): + """Test if all ``.pyi`` files are properly installed.""" + for file in FILES: + assert_(os.path.isfile(file)) diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/test_runtime.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/test_runtime.py new file mode 100644 index 0000000..5b5df49 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/test_runtime.py @@ -0,0 +1,82 @@ +"""Test the runtime usage of `numpy.typing`.""" + +from __future__ import annotations + +import sys +from typing import get_type_hints, Union, NamedTuple, get_args, get_origin + +import pytest +import numpy as np +import numpy.typing as npt + + +class TypeTup(NamedTuple): + typ: type + args: tuple[type, ...] + origin: None | type + + +if sys.version_info >= (3, 9): + NDArrayTup = TypeTup(npt.NDArray, npt.NDArray.__args__, np.ndarray) +else: + NDArrayTup = TypeTup(npt.NDArray, (), None) + +TYPES = { + "ArrayLike": TypeTup(npt.ArrayLike, npt.ArrayLike.__args__, Union), + "DTypeLike": TypeTup(npt.DTypeLike, npt.DTypeLike.__args__, Union), + "NBitBase": TypeTup(npt.NBitBase, (), None), + "NDArray": NDArrayTup, +} + + +@pytest.mark.parametrize("name,tup", TYPES.items(), ids=TYPES.keys()) +def test_get_args(name: type, tup: TypeTup) -> None: + """Test `typing.get_args`.""" + typ, ref = tup.typ, tup.args + out = get_args(typ) + assert out == ref + + +@pytest.mark.parametrize("name,tup", TYPES.items(), ids=TYPES.keys()) +def test_get_origin(name: type, tup: TypeTup) -> None: + """Test `typing.get_origin`.""" + typ, ref = tup.typ, tup.origin + out = get_origin(typ) + assert out == ref + + +@pytest.mark.parametrize("name,tup", TYPES.items(), ids=TYPES.keys()) +def test_get_type_hints(name: type, tup: TypeTup) -> None: + """Test `typing.get_type_hints`.""" + typ = tup.typ + + # Explicitly set `__annotations__` in order to circumvent the + # stringification performed by `from __future__ import annotations` + def func(a): pass + func.__annotations__ = {"a": typ, "return": None} + + out = get_type_hints(func) + ref = {"a": typ, "return": type(None)} + assert out == ref + + +@pytest.mark.parametrize("name,tup", TYPES.items(), ids=TYPES.keys()) +def test_get_type_hints_str(name: type, tup: TypeTup) -> None: + """Test `typing.get_type_hints` with string-representation of types.""" + typ_str, typ = f"npt.{name}", tup.typ + + # Explicitly set `__annotations__` in order to circumvent the + # stringification performed by `from __future__ import annotations` + def func(a): pass + func.__annotations__ = {"a": typ_str, "return": None} + + out = get_type_hints(func) + ref = {"a": typ, "return": type(None)} + assert out == ref + + +def test_keys() -> None: + """Test that ``TYPES.keys()`` and ``numpy.typing.__all__`` are synced.""" + keys = TYPES.keys() + ref = set(npt.__all__) + assert keys == ref diff --git a/.local/lib/python3.8/site-packages/numpy/typing/tests/test_typing.py b/.local/lib/python3.8/site-packages/numpy/typing/tests/test_typing.py new file mode 100644 index 0000000..fe58a8f --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/typing/tests/test_typing.py @@ -0,0 +1,455 @@ +from __future__ import annotations + +import importlib.util +import itertools +import os +import re +import shutil +from collections import defaultdict +from collections.abc import Iterator +from typing import IO, TYPE_CHECKING + +import pytest +import numpy as np +import numpy.typing as npt +from numpy.typing.mypy_plugin import ( + _PRECISION_DICT, + _EXTENDED_PRECISION_LIST, + _C_INTP, +) + +try: + from mypy import api +except ImportError: + NO_MYPY = True +else: + NO_MYPY = False + +if TYPE_CHECKING: + # We need this as annotation, but it's located in a private namespace. + # As a compromise, do *not* import it during runtime + from _pytest.mark.structures import ParameterSet + +DATA_DIR = os.path.join(os.path.dirname(__file__), "data") +PASS_DIR = os.path.join(DATA_DIR, "pass") +FAIL_DIR = os.path.join(DATA_DIR, "fail") +REVEAL_DIR = os.path.join(DATA_DIR, "reveal") +MISC_DIR = os.path.join(DATA_DIR, "misc") +MYPY_INI = os.path.join(DATA_DIR, "mypy.ini") +CACHE_DIR = os.path.join(DATA_DIR, ".mypy_cache") + +#: A dictionary with file names as keys and lists of the mypy stdout as values. +#: To-be populated by `run_mypy`. +OUTPUT_MYPY: dict[str, list[str]] = {} + + +def _key_func(key: str) -> str: + """Split at the first occurrence of the ``:`` character. + + Windows drive-letters (*e.g.* ``C:``) are ignored herein. + """ + drive, tail = os.path.splitdrive(key) + return os.path.join(drive, tail.split(":", 1)[0]) + + +def _strip_filename(msg: str) -> str: + """Strip the filename from a mypy message.""" + _, tail = os.path.splitdrive(msg) + return tail.split(":", 1)[-1] + + +def strip_func(match: re.Match[str]) -> str: + """`re.sub` helper function for stripping module names.""" + return match.groups()[1] + + +@pytest.mark.slow +@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") +@pytest.fixture(scope="module", autouse=True) +def run_mypy() -> None: + """Clears the cache and run mypy before running any of the typing tests. + + The mypy results are cached in `OUTPUT_MYPY` for further use. + + The cache refresh can be skipped using + + NUMPY_TYPING_TEST_CLEAR_CACHE=0 pytest numpy/typing/tests + """ + if ( + os.path.isdir(CACHE_DIR) + and bool(os.environ.get("NUMPY_TYPING_TEST_CLEAR_CACHE", True)) + ): + shutil.rmtree(CACHE_DIR) + + for directory in (PASS_DIR, REVEAL_DIR, FAIL_DIR, MISC_DIR): + # Run mypy + stdout, stderr, exit_code = api.run([ + "--config-file", + MYPY_INI, + "--cache-dir", + CACHE_DIR, + directory, + ]) + if stderr: + pytest.fail(f"Unexpected mypy standard error\n\n{stderr}") + elif exit_code not in {0, 1}: + pytest.fail(f"Unexpected mypy exit code: {exit_code}\n\n{stdout}") + stdout = stdout.replace('*', '') + + # Parse the output + iterator = itertools.groupby(stdout.split("\n"), key=_key_func) + OUTPUT_MYPY.update((k, list(v)) for k, v in iterator if k) + + +def get_test_cases(directory: str) -> Iterator[ParameterSet]: + for root, _, files in os.walk(directory): + for fname in files: + short_fname, ext = os.path.splitext(fname) + if ext in (".pyi", ".py"): + fullpath = os.path.join(root, fname) + yield pytest.param(fullpath, id=short_fname) + + +@pytest.mark.slow +@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") +@pytest.mark.parametrize("path", get_test_cases(PASS_DIR)) +def test_success(path) -> None: + # Alias `OUTPUT_MYPY` so that it appears in the local namespace + output_mypy = OUTPUT_MYPY + if path in output_mypy: + msg = "Unexpected mypy output\n\n" + msg += "\n".join(_strip_filename(v) for v in output_mypy[path]) + raise AssertionError(msg) + + +@pytest.mark.slow +@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") +@pytest.mark.parametrize("path", get_test_cases(FAIL_DIR)) +def test_fail(path: str) -> None: + __tracebackhide__ = True + + with open(path) as fin: + lines = fin.readlines() + + errors = defaultdict(lambda: "") + + output_mypy = OUTPUT_MYPY + assert path in output_mypy + for error_line in output_mypy[path]: + error_line = _strip_filename(error_line) + match = re.match( + r"(?P\d+): (error|note): .+$", + error_line, + ) + if match is None: + raise ValueError(f"Unexpected error line format: {error_line}") + lineno = int(match.group('lineno')) + errors[lineno] += f'{error_line}\n' + + for i, line in enumerate(lines): + lineno = i + 1 + if ( + line.startswith('#') + or (" E:" not in line and lineno not in errors) + ): + continue + + target_line = lines[lineno - 1] + if "# E:" in target_line: + expression, _, marker = target_line.partition(" # E: ") + expected_error = errors[lineno].strip() + marker = marker.strip() + _test_fail(path, expression, marker, expected_error, lineno) + else: + pytest.fail( + f"Unexpected mypy output at line {lineno}\n\n{errors[lineno]}" + ) + + +_FAIL_MSG1 = """Extra error at line {} + +Expression: {} +Extra error: {!r} +""" + +_FAIL_MSG2 = """Error mismatch at line {} + +Expression: {} +Expected error: {!r} +Observed error: {!r} +""" + + +def _test_fail( + path: str, + expression: str, + error: str, + expected_error: None | str, + lineno: int, +) -> None: + if expected_error is None: + raise AssertionError(_FAIL_MSG1.format(lineno, expression, error)) + elif error not in expected_error: + raise AssertionError(_FAIL_MSG2.format( + lineno, expression, expected_error, error + )) + + +def _construct_ctypes_dict() -> dict[str, str]: + dct = { + "ubyte": "c_ubyte", + "ushort": "c_ushort", + "uintc": "c_uint", + "uint": "c_ulong", + "ulonglong": "c_ulonglong", + "byte": "c_byte", + "short": "c_short", + "intc": "c_int", + "int_": "c_long", + "longlong": "c_longlong", + "single": "c_float", + "double": "c_double", + "longdouble": "c_longdouble", + } + + # Match `ctypes` names to the first ctypes type with a given kind and + # precision, e.g. {"c_double": "c_double", "c_longdouble": "c_double"} + # if both types represent 64-bit floats. + # In this context "first" is defined by the order of `dct` + ret = {} + visited: dict[tuple[str, int], str] = {} + for np_name, ct_name in dct.items(): + np_scalar = getattr(np, np_name)() + + # Find the first `ctypes` type for a given `kind`/`itemsize` combo + key = (np_scalar.dtype.kind, np_scalar.dtype.itemsize) + ret[ct_name] = visited.setdefault(key, f"ctypes.{ct_name}") + return ret + + +def _construct_format_dict() -> dict[str, str]: + dct = {k.split(".")[-1]: v.replace("numpy", "numpy.typing") for + k, v in _PRECISION_DICT.items()} + + return { + "uint8": "numpy.unsignedinteger[numpy.typing._8Bit]", + "uint16": "numpy.unsignedinteger[numpy.typing._16Bit]", + "uint32": "numpy.unsignedinteger[numpy.typing._32Bit]", + "uint64": "numpy.unsignedinteger[numpy.typing._64Bit]", + "uint128": "numpy.unsignedinteger[numpy.typing._128Bit]", + "uint256": "numpy.unsignedinteger[numpy.typing._256Bit]", + "int8": "numpy.signedinteger[numpy.typing._8Bit]", + "int16": "numpy.signedinteger[numpy.typing._16Bit]", + "int32": "numpy.signedinteger[numpy.typing._32Bit]", + "int64": "numpy.signedinteger[numpy.typing._64Bit]", + "int128": "numpy.signedinteger[numpy.typing._128Bit]", + "int256": "numpy.signedinteger[numpy.typing._256Bit]", + "float16": "numpy.floating[numpy.typing._16Bit]", + "float32": "numpy.floating[numpy.typing._32Bit]", + "float64": "numpy.floating[numpy.typing._64Bit]", + "float80": "numpy.floating[numpy.typing._80Bit]", + "float96": "numpy.floating[numpy.typing._96Bit]", + "float128": "numpy.floating[numpy.typing._128Bit]", + "float256": "numpy.floating[numpy.typing._256Bit]", + "complex64": ("numpy.complexfloating" + "[numpy.typing._32Bit, numpy.typing._32Bit]"), + "complex128": ("numpy.complexfloating" + "[numpy.typing._64Bit, numpy.typing._64Bit]"), + "complex160": ("numpy.complexfloating" + "[numpy.typing._80Bit, numpy.typing._80Bit]"), + "complex192": ("numpy.complexfloating" + "[numpy.typing._96Bit, numpy.typing._96Bit]"), + "complex256": ("numpy.complexfloating" + "[numpy.typing._128Bit, numpy.typing._128Bit]"), + "complex512": ("numpy.complexfloating" + "[numpy.typing._256Bit, numpy.typing._256Bit]"), + + "ubyte": f"numpy.unsignedinteger[{dct['_NBitByte']}]", + "ushort": f"numpy.unsignedinteger[{dct['_NBitShort']}]", + "uintc": f"numpy.unsignedinteger[{dct['_NBitIntC']}]", + "uintp": f"numpy.unsignedinteger[{dct['_NBitIntP']}]", + "uint": f"numpy.unsignedinteger[{dct['_NBitInt']}]", + "ulonglong": f"numpy.unsignedinteger[{dct['_NBitLongLong']}]", + "byte": f"numpy.signedinteger[{dct['_NBitByte']}]", + "short": f"numpy.signedinteger[{dct['_NBitShort']}]", + "intc": f"numpy.signedinteger[{dct['_NBitIntC']}]", + "intp": f"numpy.signedinteger[{dct['_NBitIntP']}]", + "int_": f"numpy.signedinteger[{dct['_NBitInt']}]", + "longlong": f"numpy.signedinteger[{dct['_NBitLongLong']}]", + + "half": f"numpy.floating[{dct['_NBitHalf']}]", + "single": f"numpy.floating[{dct['_NBitSingle']}]", + "double": f"numpy.floating[{dct['_NBitDouble']}]", + "longdouble": f"numpy.floating[{dct['_NBitLongDouble']}]", + "csingle": ("numpy.complexfloating" + f"[{dct['_NBitSingle']}, {dct['_NBitSingle']}]"), + "cdouble": ("numpy.complexfloating" + f"[{dct['_NBitDouble']}, {dct['_NBitDouble']}]"), + "clongdouble": ( + "numpy.complexfloating" + f"[{dct['_NBitLongDouble']}, {dct['_NBitLongDouble']}]" + ), + + # numpy.typing + "_NBitInt": dct['_NBitInt'], + + # numpy.ctypeslib + "c_intp": f"ctypes.{_C_INTP}" + } + + +#: A dictionary with all supported format keys (as keys) +#: and matching values +FORMAT_DICT: dict[str, str] = _construct_format_dict() +FORMAT_DICT.update(_construct_ctypes_dict()) + + +def _parse_reveals(file: IO[str]) -> tuple[npt.NDArray[np.str_], list[str]]: + """Extract and parse all ``" # E: "`` comments from the passed + file-like object. + + All format keys will be substituted for their respective value + from `FORMAT_DICT`, *e.g.* ``"{float64}"`` becomes + ``"numpy.floating[numpy.typing._64Bit]"``. + """ + string = file.read().replace("*", "") + + # Grab all `# E:`-based comments and matching expressions + expression_array, _, comments_array = np.char.partition( + string.split("\n"), sep=" # E: " + ).T + comments = "/n".join(comments_array) + + # Only search for the `{*}` pattern within comments, otherwise + # there is the risk of accidentally grabbing dictionaries and sets + key_set = set(re.findall(r"\{(.*?)\}", comments)) + kwargs = { + k: FORMAT_DICT.get(k, f"") for + k in key_set + } + fmt_str = comments.format(**kwargs) + + return expression_array, fmt_str.split("/n") + + +@pytest.mark.slow +@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") +@pytest.mark.parametrize("path", get_test_cases(REVEAL_DIR)) +def test_reveal(path: str) -> None: + """Validate that mypy correctly infers the return-types of + the expressions in `path`. + """ + __tracebackhide__ = True + + with open(path) as fin: + expression_array, reveal_list = _parse_reveals(fin) + + output_mypy = OUTPUT_MYPY + assert path in output_mypy + for error_line in output_mypy[path]: + error_line = _strip_filename(error_line) + match = re.match( + r"(?P\d+): note: .+$", + error_line, + ) + if match is None: + raise ValueError(f"Unexpected reveal line format: {error_line}") + lineno = int(match.group('lineno')) - 1 + assert "Revealed type is" in error_line + + marker = reveal_list[lineno] + expression = expression_array[lineno] + _test_reveal(path, expression, marker, error_line, 1 + lineno) + + +_REVEAL_MSG = """Reveal mismatch at line {} + +Expression: {} +Expected reveal: {!r} +Observed reveal: {!r} +""" + + +def _test_reveal( + path: str, + expression: str, + reveal: str, + expected_reveal: str, + lineno: int, +) -> None: + """Error-reporting helper function for `test_reveal`.""" + strip_pattern = re.compile(r"(\w+\.)+(\w+)") + stripped_reveal = strip_pattern.sub(strip_func, reveal) + stripped_expected_reveal = strip_pattern.sub(strip_func, expected_reveal) + if stripped_reveal not in stripped_expected_reveal: + raise AssertionError( + _REVEAL_MSG.format(lineno, + expression, + stripped_expected_reveal, + stripped_reveal) + ) + + +@pytest.mark.slow +@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") +@pytest.mark.parametrize("path", get_test_cases(PASS_DIR)) +def test_code_runs(path: str) -> None: + """Validate that the code in `path` properly during runtime.""" + path_without_extension, _ = os.path.splitext(path) + dirname, filename = path.split(os.sep)[-2:] + + spec = importlib.util.spec_from_file_location( + f"{dirname}.{filename}", path + ) + assert spec is not None + assert spec.loader is not None + + test_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(test_module) + + +LINENO_MAPPING = { + 3: "uint128", + 4: "uint256", + 6: "int128", + 7: "int256", + 9: "float80", + 10: "float96", + 11: "float128", + 12: "float256", + 14: "complex160", + 15: "complex192", + 16: "complex256", + 17: "complex512", +} + + +@pytest.mark.slow +@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") +def test_extended_precision() -> None: + path = os.path.join(MISC_DIR, "extended_precision.pyi") + output_mypy = OUTPUT_MYPY + assert path in output_mypy + + with open(path, "r") as f: + expression_list = f.readlines() + + for _msg in output_mypy[path]: + *_, _lineno, msg_typ, msg = _msg.split(":") + + msg = _strip_filename(msg) + lineno = int(_lineno) + expression = expression_list[lineno - 1].rstrip("\n") + msg_typ = msg_typ.strip() + assert msg_typ in {"error", "note"} + + if LINENO_MAPPING[lineno] in _EXTENDED_PRECISION_LIST: + if msg_typ == "error": + raise ValueError(f"Unexpected reveal line format: {lineno}") + else: + marker = FORMAT_DICT[LINENO_MAPPING[lineno]] + _test_reveal(path, expression, marker, msg, lineno) + else: + if msg_typ == "error": + marker = "Module has no attribute" + _test_fail(path, expression, marker, msg, lineno) diff --git a/.local/lib/python3.8/site-packages/numpy/version.py b/.local/lib/python3.8/site-packages/numpy/version.py new file mode 100644 index 0000000..d5657d0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/numpy/version.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from ._version import get_versions + +__ALL__ = ['version', '__version__', 'full_version', 'git_revision', 'release'] + +vinfo: dict[str, str] = get_versions() +version = vinfo["version"] +__version__ = vinfo.get("closest-tag", vinfo["version"]) +full_version = vinfo['version'] +git_revision = vinfo['full-revisionid'] +release = 'dev0' not in version and '+' not in version +short_version = vinfo['version'].split("+")[0] + +del get_versions, vinfo diff --git a/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/INSTALLER b/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/LICENSE b/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/LICENSE new file mode 100644 index 0000000..a0cc369 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/LICENSE @@ -0,0 +1,31 @@ +BSD 3-Clause License + +Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team +All rights reserved. + +Copyright (c) 2011-2021, Open source contributors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/METADATA b/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/METADATA new file mode 100644 index 0000000..94ba7bc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/METADATA @@ -0,0 +1,214 @@ +Metadata-Version: 2.1 +Name: pandas +Version: 1.4.1 +Summary: Powerful data structures for data analysis, time series, and statistics +Home-page: https://pandas.pydata.org +Author: The Pandas Development Team +Author-email: pandas-dev@python.org +License: BSD-3-Clause +Project-URL: Bug Tracker, https://github.com/pandas-dev/pandas/issues +Project-URL: Documentation, https://pandas.pydata.org/pandas-docs/stable +Project-URL: Source Code, https://github.com/pandas-dev/pandas +Platform: any +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Console +Classifier: Intended Audience :: Science/Research +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Cython +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Topic :: Scientific/Engineering +Requires-Python: >=3.8 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: python-dateutil (>=2.8.1) +Requires-Dist: pytz (>=2020.1) +Requires-Dist: numpy (>=1.18.5) ; platform_machine != "aarch64" and platform_machine != "arm64" and python_version < "3.10" +Requires-Dist: numpy (>=1.19.2) ; platform_machine == "aarch64" and python_version < "3.10" +Requires-Dist: numpy (>=1.20.0) ; platform_machine == "arm64" and python_version < "3.10" +Requires-Dist: numpy (>=1.21.0) ; python_version >= "3.10" +Provides-Extra: test +Requires-Dist: hypothesis (>=5.5.3) ; extra == 'test' +Requires-Dist: pytest (>=6.0) ; extra == 'test' +Requires-Dist: pytest-xdist (>=1.31) ; extra == 'test' + +
+
+
+ +----------------- + +# pandas: powerful Python data analysis toolkit +[![PyPI Latest Release](https://img.shields.io/pypi/v/pandas.svg)](https://pypi.org/project/pandas/) +[![Conda Latest Release](https://anaconda.org/conda-forge/pandas/badges/version.svg)](https://anaconda.org/anaconda/pandas/) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3509134.svg)](https://doi.org/10.5281/zenodo.3509134) +[![Package Status](https://img.shields.io/pypi/status/pandas.svg)](https://pypi.org/project/pandas/) +[![License](https://img.shields.io/pypi/l/pandas.svg)](https://github.com/pandas-dev/pandas/blob/main/LICENSE) +[![Azure Build Status](https://dev.azure.com/pandas-dev/pandas/_apis/build/status/pandas-dev.pandas?branch=main)](https://dev.azure.com/pandas-dev/pandas/_build/latest?definitionId=1&branch=main) +[![Coverage](https://codecov.io/github/pandas-dev/pandas/coverage.svg?branch=main)](https://codecov.io/gh/pandas-dev/pandas) +[![Downloads](https://static.pepy.tech/personalized-badge/pandas?period=month&units=international_system&left_color=black&right_color=orange&left_text=PyPI%20downloads%20per%20month)](https://pepy.tech/project/pandas) +[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas) +[![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/) + +## What is it? + +**pandas** is a Python package that provides fast, flexible, and expressive data +structures designed to make working with "relational" or "labeled" data both +easy and intuitive. It aims to be the fundamental high-level building block for +doing practical, **real world** data analysis in Python. Additionally, it has +the broader goal of becoming **the most powerful and flexible open source data +analysis / manipulation tool available in any language**. It is already well on +its way towards this goal. + +## Main Features +Here are just a few of the things that pandas does well: + + - Easy handling of [**missing data**][missing-data] (represented as + `NaN`, `NA`, or `NaT`) in floating point as well as non-floating point data + - Size mutability: columns can be [**inserted and + deleted**][insertion-deletion] from DataFrame and higher dimensional + objects + - Automatic and explicit [**data alignment**][alignment]: objects can + be explicitly aligned to a set of labels, or the user can simply + ignore the labels and let `Series`, `DataFrame`, etc. automatically + align the data for you in computations + - Powerful, flexible [**group by**][groupby] functionality to perform + split-apply-combine operations on data sets, for both aggregating + and transforming data + - Make it [**easy to convert**][conversion] ragged, + differently-indexed data in other Python and NumPy data structures + into DataFrame objects + - Intelligent label-based [**slicing**][slicing], [**fancy + indexing**][fancy-indexing], and [**subsetting**][subsetting] of + large data sets + - Intuitive [**merging**][merging] and [**joining**][joining] data + sets + - Flexible [**reshaping**][reshape] and [**pivoting**][pivot-table] of + data sets + - [**Hierarchical**][mi] labeling of axes (possible to have multiple + labels per tick) + - Robust IO tools for loading data from [**flat files**][flat-files] + (CSV and delimited), [**Excel files**][excel], [**databases**][db], + and saving/loading data from the ultrafast [**HDF5 format**][hdfstore] + - [**Time series**][timeseries]-specific functionality: date range + generation and frequency conversion, moving window statistics, + date shifting and lagging + + + [missing-data]: https://pandas.pydata.org/pandas-docs/stable/user_guide/missing_data.html + [insertion-deletion]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html#column-selection-addition-deletion + [alignment]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html?highlight=alignment#intro-to-data-structures + [groupby]: https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#group-by-split-apply-combine + [conversion]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html#dataframe + [slicing]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#slicing-ranges + [fancy-indexing]: https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#advanced + [subsetting]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#boolean-indexing + [merging]: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#database-style-dataframe-or-named-series-joining-merging + [joining]: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#joining-on-index + [reshape]: https://pandas.pydata.org/pandas-docs/stable/user_guide/reshaping.html + [pivot-table]: https://pandas.pydata.org/pandas-docs/stable/user_guide/reshaping.html + [mi]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#hierarchical-indexing-multiindex + [flat-files]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#csv-text-files + [excel]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#excel-files + [db]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#sql-queries + [hdfstore]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#hdf5-pytables + [timeseries]: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#time-series-date-functionality + +## Where to get it +The source code is currently hosted on GitHub at: +https://github.com/pandas-dev/pandas + +Binary installers for the latest released version are available at the [Python +Package Index (PyPI)](https://pypi.org/project/pandas) and on [Conda](https://docs.conda.io/en/latest/). + +```sh +# conda +conda install pandas +``` + +```sh +# or PyPI +pip install pandas +``` + +## Dependencies +- [NumPy - Adds support for large, multi-dimensional arrays, matrices and high-level mathematical functions to operate on these arrays](https://www.numpy.org) +- [python-dateutil - Provides powerful extensions to the standard datetime module](https://dateutil.readthedocs.io/en/stable/index.html) +- [pytz - Brings the Olson tz database into Python which allows accurate and cross platform timezone calculations](https://github.com/stub42/pytz) + +See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) for minimum supported versions of required, recommended and optional dependencies. + +## Installation from sources +To install pandas from source you need [Cython](https://cython.org/) in addition to the normal +dependencies above. Cython can be installed from PyPI: + +```sh +pip install cython +``` + +In the `pandas` directory (same one where you found this file after +cloning the git repo), execute: + +```sh +python setup.py install +``` + +or for installing in [development mode](https://pip.pypa.io/en/latest/cli/pip_install/#install-editable): + + +```sh +python -m pip install -e . --no-build-isolation --no-use-pep517 +``` + +If you have `make`, you can also use `make develop` to run the same command. + +or alternatively + +```sh +python setup.py develop +``` + +See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source). + +## License +[BSD 3](LICENSE) + +## Documentation +The official documentation is hosted on PyData.org: https://pandas.pydata.org/pandas-docs/stable + +## Background +Work on ``pandas`` started at [AQR](https://www.aqr.com/) (a quantitative hedge fund) in 2008 and +has been under active development since then. + +## Getting Help + +For usage questions, the best place to go to is [StackOverflow](https://stackoverflow.com/questions/tagged/pandas). +Further, general questions and discussions can also take place on the [pydata mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata). + +## Discussion and Development +Most development discussions take place on GitHub in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions. + +## Contributing to pandas [![Open Source Helpers](https://www.codetriage.com/pandas-dev/pandas/badges/users.svg)](https://www.codetriage.com/pandas-dev/pandas) + +All contributions, bug reports, bug fixes, documentation improvements, enhancements, and ideas are welcome. + +A detailed overview on how to contribute can be found in the **[contributing guide](https://pandas.pydata.org/docs/dev/development/contributing.html)**. + +If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out. + +You can also triage issues which may include reproducing bug reports, or asking for vital information such as version numbers or reproduction instructions. If you would like to start triaging issues, one easy way to get started is to [subscribe to pandas on CodeTriage](https://www.codetriage.com/pandas-dev/pandas). + +Or maybe through using pandas you have an idea of your own or are looking for something in the documentation and thinking ‘this can be improved’...you can do something about it! + +Feel free to ask questions on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas). + +As contributors and maintainers to this project, you are expected to abide by pandas' code of conduct. More information can be found at: [Contributor Code of Conduct](https://github.com/pandas-dev/pandas/blob/main/.github/CODE_OF_CONDUCT.md) + + diff --git a/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/RECORD b/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/RECORD new file mode 100644 index 0000000..dab752c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/RECORD @@ -0,0 +1,2815 @@ +pandas-1.4.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +pandas-1.4.1.dist-info/LICENSE,sha256=mJWCvkT10ibfXZv4_jlKIzOATDa6lhB6or3pLQCz7p8,1634 +pandas-1.4.1.dist-info/METADATA,sha256=SfbaFc3JSXa1nvAMRgmtdvQ9-iIC08qI1LkIggnzLV8,12238 +pandas-1.4.1.dist-info/RECORD,, +pandas-1.4.1.dist-info/WHEEL,sha256=-ijGDuALlPxm3HbhKntps0QzHsi-DPlXqgerYTTJkFE,148 +pandas-1.4.1.dist-info/entry_points.txt,sha256=OVLKNEPs-Q7IWypWBL6fxv56_zt4sRnEI7zawo6y_0w,69 +pandas-1.4.1.dist-info/top_level.txt,sha256=_W-EYOwsRjyO7fqakAIX0J3vvvCqzSWZ8z5RtnXISDw,7 +pandas/__init__.py,sha256=C3_29u0iWypQ6FFYqQJzbFKlcYW06jKTN7TmIO5c96M,10222 +pandas/__pycache__/__init__.cpython-38.pyc,, +pandas/__pycache__/_typing.cpython-38.pyc,, +pandas/__pycache__/_version.cpython-38.pyc,, +pandas/__pycache__/conftest.cpython-38.pyc,, +pandas/__pycache__/testing.cpython-38.pyc,, +pandas/_config/__init__.py,sha256=jzEp5Rpr9WGcaLUuXUKeHQcmWNh3OGBmRTFKTGVMq4s,669 +pandas/_config/__pycache__/__init__.cpython-38.pyc,, +pandas/_config/__pycache__/config.cpython-38.pyc,, +pandas/_config/__pycache__/dates.cpython-38.pyc,, +pandas/_config/__pycache__/display.cpython-38.pyc,, +pandas/_config/__pycache__/localization.cpython-38.pyc,, +pandas/_config/config.py,sha256=JTfOY6eFxmtPUiPc0OEN9XS3BzPxtLu_AXnmSeeTc-A,23848 +pandas/_config/dates.py,sha256=u2rl3gkqVBRFadm4GcgijNpZjtxFm88EDH3sopO-3fs,632 +pandas/_config/display.py,sha256=xv_TetWUhFlVpog23QzyhMYsScops_OOsWIAGnmKdJ8,1804 +pandas/_config/localization.py,sha256=36CzFQSh42zga5q90jrE-Z-5p0z_uPDEoCnUIfOGjXk,5255 +pandas/_libs/__init__.py,sha256=EadclD1M9bLo-Ae6EHyb_k80U8meOTYbT7sdJv4tFjc,323 +pandas/_libs/__pycache__/__init__.cpython-38.pyc,, +pandas/_libs/algos.cpython-38-x86_64-linux-gnu.so,sha256=3l3bIDtCnWGaXH1muM0rQ7kTDkpkluGniz6hEzYzi5E,1707880 +pandas/_libs/algos.pxd,sha256=JtgGxJJ3axewXcRNn98q3eKgFYAE2NHUQCCE9EUy92Q,278 +pandas/_libs/algos.pyi,sha256=FODqyBMym0HzVBGmZmorxEos6Xzk-VIkq0vHO8hW_M4,15874 +pandas/_libs/algos.pyx,sha256=fEkJphLoXYgSt_OObyTlbIQYaQAtO4XbIQlJxPZrdvU,49104 +pandas/_libs/algos_common_helper.pxi.in,sha256=lAh933g2uPP71ubkpe9Dc5Mb2bfOZiEX9SQnDPS1Vso,2183 +pandas/_libs/algos_take_helper.pxi.in,sha256=iTugXONKwWMZWK2yyx1uvfPHt9l4PDPZ7OZAiTJ5AtU,6147 +pandas/_libs/arrays.cpython-38-x86_64-linux-gnu.so,sha256=e8Xs3YI58LUgPph3uS-5JUl45_Sa0P6QJmIBlzO--oo,101728 +pandas/_libs/arrays.pxd,sha256=2qzLtfBeiV6KcS7J-d5nsW-3hd63tZR5JXgwIa6dKG8,233 +pandas/_libs/arrays.pyi,sha256=id4DKvhEUQNj2Y2osAXL5Lz2lIfF_Ayaiz5oCLUizeY,940 +pandas/_libs/arrays.pyx,sha256=Bk5iUH_n9vbeFkXQ5aoS-u__0EiaPGOn4b8Ja4PDSMo,5848 +pandas/_libs/dtypes.pxd,sha256=zL_H8T9a2mFrdVnIT7u0T9Q1E5lz904EYY_127fnBjY,736 +pandas/_libs/groupby.cpython-38-x86_64-linux-gnu.so,sha256=iv-P804surzftHO3PoQHZdQDnz6z_AiL0XIOkVSe_nE,1362696 +pandas/_libs/groupby.pyi,sha256=Zh-xIwOATszpckD_pTSDvngv3Z9Ry-7Bv5jKGfe8M_Q,5246 +pandas/_libs/groupby.pyx,sha256=XfNuiFPu7IMfkmabQoNZGgMyenb9F2dvrsw4eLqyQgg,50578 +pandas/_libs/hashing.cpython-38-x86_64-linux-gnu.so,sha256=hz2n1KAnT9ZX8tG8bAkkNAm_RcVtzDer0cu1bMZngXo,196712 +pandas/_libs/hashing.pyi,sha256=cdNwppEilaMnVN77ABt3TBadjUawMtMFgSQb1PCqwQk,181 +pandas/_libs/hashing.pyx,sha256=979cxclgLijnciAHlBZ2g6a-E_zWylkNj5ca9RWdIJ8,4731 +pandas/_libs/hashtable.cpython-38-x86_64-linux-gnu.so,sha256=wnb4kLlB2jgq-Nx850rxHmCK6TiAqM__XmATY3iOLj4,1503272 +pandas/_libs/hashtable.pxd,sha256=5S5nknCvXEgyH8Py2trKQ4KvlR2io2xs-qX523Io2ok,3213 +pandas/_libs/hashtable.pyi,sha256=fi9L_0v9oX9P-f3WOwhjZYf1t32k_bhbymgvNLWTnU0,6404 +pandas/_libs/hashtable.pyx,sha256=LTs_gUOlbuJeikpM1-Z1Z-TyIovhsSeSPpr8XI3k2hc,4586 +pandas/_libs/hashtable_class_helper.pxi.in,sha256=QNhoN0EZPGfocqDDKVj4K6Jum-a3cv5_vCoK1KS2Cdo,44355 +pandas/_libs/hashtable_func_helper.pxi.in,sha256=4W881vTVinTv8zSVHPCMlxuUhnpPxZb3NyrYDKnUUQ4,15039 +pandas/_libs/index.cpython-38-x86_64-linux-gnu.so,sha256=UDEBJ_ZpZSujiY0YoVnk2J7MJJpoB7SDjogA-pwMX-0,520776 +pandas/_libs/index.pyi,sha256=J2mmeivfPYkPH52M5FAZ-_67R4g-9oz4BRwR1kJyxzY,2218 +pandas/_libs/index.pyx,sha256=i6ukM5MhFKCltmpQZSMV8SQq9o-sWEb9KOWodDvMI2I,25989 +pandas/_libs/index_class_helper.pxi.in,sha256=vpXvnbVToEqUKZM5CfXBIdtzBRnKI7GU8VLsaJXbxio,1376 +pandas/_libs/indexing.cpython-38-x86_64-linux-gnu.so,sha256=6OvHSidbFYB0XkY5wNYFlx6TrIwfQC46Slfbtnen8H0,49952 +pandas/_libs/indexing.pyx,sha256=v2iXnfI3lqJVvtzAcSBpWcUbGrQSC1ZET2snGCZliqM,753 +pandas/_libs/internals.cpython-38-x86_64-linux-gnu.so,sha256=21niXmDQGaY7-KN5IckO_gU9x95NCyKqrJATp-NUAa8,351016 +pandas/_libs/internals.pyi,sha256=pOPFLF3D44xt-UPgPoYQ5M6sV3BJ0qPE-hfKNZ7QqrY,2428 +pandas/_libs/internals.pyx,sha256=2l3iu2bkwh-LbMNtHTAqpdVClszg8aRHC62WUecUlWU,24000 +pandas/_libs/interval.cpython-38-x86_64-linux-gnu.so,sha256=nxk7DEkV1IYRXNKk05rcj9BgikQRTcImuCjUXERbU-A,1338472 +pandas/_libs/interval.pyx,sha256=F4psS0SZePCM-xWyojmdSgIGcM2YQr1fL1e9z476e2c,16608 +pandas/_libs/intervaltree.pxi.in,sha256=_nj3K4pxiGBgmz59XsWFKwB-PaHMncYcLkeHB6j9BLw,14829 +pandas/_libs/join.cpython-38-x86_64-linux-gnu.so,sha256=mVlfn2141XPfW28NUpKdmAi-QkEgOkMuk8YP2nbj2K0,3087272 +pandas/_libs/join.pyi,sha256=XL6oZ0nDa70GCqN9GhxiOIGmmst_BxvXVEYWwNvjCFQ,3350 +pandas/_libs/join.pyx,sha256=CcM0ls4WqJ1ibWhU6VABwFWMsuC6ZA5iBUGg5bf5vPg,31899 +pandas/_libs/json.cpython-38-x86_64-linux-gnu.so,sha256=R7vcc5kCzcKnDeAUfX78I37rLqTmv4p_Sq4-d6uGfEo,93504 +pandas/_libs/khash.pxd,sha256=NABSfaCqzYwxNjdysjM3w2VOEPagQ5InM25wCH-QAMM,3800 +pandas/_libs/khash_for_primitive_helper.pxi.in,sha256=xlSmXxv0rgboILfh4jqWNem1oqIK6aL-FBO2yeYTWTk,1421 +pandas/_libs/lib.cpython-38-x86_64-linux-gnu.so,sha256=0NpRoKYajibyFIgUu1DZiL6MoEUi3KrKN4ledeEJZtU,701544 +pandas/_libs/lib.pxd,sha256=nq2dcqCT0V_j4uIC8X5-7h32MpITWsGSnU14revqL3Q,139 +pandas/_libs/lib.pyi,sha256=OMs8gspMT44aTimRh2cpRZXSjqYH-0CO0rPC2hoMX_0,7578 +pandas/_libs/lib.pyx,sha256=9Ha_9NG-PavEfjR0fzRCd8ClvPH-gLJBh6s_3Uc8JEQ,89097 +pandas/_libs/missing.cpython-38-x86_64-linux-gnu.so,sha256=DeZQRHGrOuNNcPVDMtxhNFY7vZfcOQKIrFjmuTc0n5I,197480 +pandas/_libs/missing.pxd,sha256=gavVcysAdcd3WizCXx5xPja4OmEL6pxGlM5kuoTHhbA,408 +pandas/_libs/missing.pyi,sha256=IQdDsi7b-mP1swDLCMnHsW0C0Gk4yBB5TaZzrFdkIps,636 +pandas/_libs/missing.pyx,sha256=J0xkMwiSMDv6RO9YSWAAW39Mv-ioMALjAfId1rm03H0,13911 +pandas/_libs/ops.cpython-38-x86_64-linux-gnu.so,sha256=5TY5NUZs-xmLixr5rb-ZC4Ce1i9epJdZNvZFR1peizU,244040 +pandas/_libs/ops.pyi,sha256=uUf24IkC9NlHkbGtvYpPWQy6NxmblCyom963SPXL8vI,1265 +pandas/_libs/ops.pyx,sha256=P8NZVT40iaTm9GXLtny2RPDvxhfYemt_49Zheak23pM,7749 +pandas/_libs/ops_dispatch.cpython-38-x86_64-linux-gnu.so,sha256=UORiOlWoiUy_HumlG45I3pWwlS_tjUwKO_R7RxrbeDg,61088 +pandas/_libs/ops_dispatch.pyi,sha256=Yxq3SUJ-qoMZ8ErL7wfHfCsTTcETOuu0FuoCOyhmGl0,124 +pandas/_libs/ops_dispatch.pyx,sha256=HgrjG1FNtMmq6vhIupHFz-7eqHM169MSkuwkKbw3WPs,2570 +pandas/_libs/parsers.cpython-38-x86_64-linux-gnu.so,sha256=47-L_3pYKcUDjSv4zP4DNM751Hdo7aI33KteNh18Nrg,543144 +pandas/_libs/parsers.pyi,sha256=dRzS76XxCMTZommWT2u4bStlemXXODd-z8iWac3muz0,2259 +pandas/_libs/parsers.pyx,sha256=KzoYeUECeU7_jVbiqv5YIW0jkds-6CVhxkTdft5Movo,68845 +pandas/_libs/properties.cpython-38-x86_64-linux-gnu.so,sha256=7Z4UxeRQrOjDJ9mp8aA70_DuTJNRDTMUeYy6WYAhdXo,63424 +pandas/_libs/properties.pyi,sha256=rGOc7G06a8tO-r35jyKqNdmEfzgQFhIhBjFIHnGrBPo,330 +pandas/_libs/properties.pyx,sha256=HBdIdK1hFG1p_HuTsEGLofJhko3UAvBsUFlwsyfvHaU,1633 +pandas/_libs/reduction.cpython-38-x86_64-linux-gnu.so,sha256=ocznqfGfbpo4v469EsZzoAjCybLmPopskB3p5vMTiFk,36800 +pandas/_libs/reduction.pyx,sha256=CHI5CD5bxokhdvJfUQ4KgCcIHe4ygWZcvwLUoOy3c8I,1090 +pandas/_libs/reshape.cpython-38-x86_64-linux-gnu.so,sha256=jEEVRv841qzx6n7nFthyqwOIpvBTaU_87eTbY8Vn_us,271880 +pandas/_libs/reshape.pyi,sha256=xaU-NNnRhXVT9AVrksVXrbKfAC7Ny9p-Vwp6srRoGns,419 +pandas/_libs/reshape.pyx,sha256=nW11irTfHnqbv6SaKkb_p5aD_tnJL7KiNKikHgiaf7s,3391 +pandas/_libs/sparse.cpython-38-x86_64-linux-gnu.so,sha256=rgL1Af1LGhrkeuzsFEE8AZddvRdBo5FfpP1RBvYt_hg,850088 +pandas/_libs/sparse.pyi,sha256=RWBeGss3-mxZP63Q38JNf-_zJLODhzJH_rpT5rPUT4Q,1401 +pandas/_libs/sparse.pyx,sha256=1qvN36fx5-uYtDFbzYuN2AbpS_dgaGOusFv6hq4B2TE,21115 +pandas/_libs/sparse_op_helper.pxi.in,sha256=WJSDvOGhQP_0vzRnDaxwBUnOvuh2DBsu8nmF2Zfqfzw,9362 +pandas/_libs/src/headers/cmath,sha256=aXynHHw6iV4S_VzT_i4yjHyVMrq7jEznpcBcMcl_khA,1337 +pandas/_libs/src/headers/ms_inttypes.h,sha256=vBU5Im3pYvCjB2m68U-R0J5OCQh3jAtNyB2l0CnZ6ic,7998 +pandas/_libs/src/headers/ms_stdint.h,sha256=-OBR-4LBD4GYLtPfT7dXIkURJWfHSajktkiD9c3fxFo,7719 +pandas/_libs/src/headers/portable.h,sha256=fXWrWONUB0J7C8U_pe4-LNJ6t26vsWOu_XppeEEDhW8,602 +pandas/_libs/src/headers/stdint.h,sha256=dKCZWNSqOaLwlYFhYLOR-C_sTO0Z2pZ3H0QOoVULQJY,160 +pandas/_libs/src/inline_helper.h,sha256=zIlo0jkPp1UszszqWKBVjk2fqgri81uDnBiULzA3jDg,781 +pandas/_libs/src/klib/khash.h,sha256=zuW95gghHsOrRnwTc1BVAClRASeC-G5xUXOtDeQeSnI,23872 +pandas/_libs/src/klib/khash_python.h,sha256=I2qRzE1bKeVkcQuwAhMW71SMAUVbbFrbQEOPcom5ezo,13495 +pandas/_libs/src/parse_helper.h,sha256=8vfv-4T4lP37LVX76f8dX-Xt7OvNpfQVEe7qmtsORng,2780 +pandas/_libs/src/parser/io.c,sha256=qfaISggoFYKmtbggRAjXGihpVLgURnqeaQyJebLktAo,2163 +pandas/_libs/src/parser/io.h,sha256=_cjQs1pFsqkLoRRS2K1OBDFnnm1Cqsh_uV-EzuW8_eI,789 +pandas/_libs/src/parser/tokenizer.c,sha256=fgQKEEfla0Y5G2Pfb5_a0sk2gdRVAbZBwahBIFZ1nZQ,65354 +pandas/_libs/src/parser/tokenizer.h,sha256=A2vqbbBVceXS3jg6MLMwPWykqHQNvi1On9tL8h3y6ec,6354 +pandas/_libs/src/skiplist.h,sha256=tSLS1YmJaOkrPQcMzdXZDvGnwiKlCD2WVjkulj1g6Ik,7755 +pandas/_libs/src/ujson/lib/ultrajson.h,sha256=TiYdRYZ65MZ1MRh9E4RRm1RarnOZDjk763X0WLvFJVI,10272 +pandas/_libs/src/ujson/lib/ultrajsondec.c,sha256=C4peLpHDFzhm6qooA6sC-IdvtjBt_HnHeH3riCJis7s,31005 +pandas/_libs/src/ujson/lib/ultrajsonenc.c,sha256=7eY_lzv7SYIV3cORtqokQzYv2E7ID2ABa60LtwFWoXI,30424 +pandas/_libs/src/ujson/python/JSONtoObj.c,sha256=zDN8ZOcXKzO8I_gTkLOpC8Zk9dvrFRafkG4nG0yqm1o,18679 +pandas/_libs/src/ujson/python/date_conversions.c,sha256=1Rm6tXGXOn5AHBrpWpENiYsYW4U9vJt0fHjB3pFzsnY,4465 +pandas/_libs/src/ujson/python/date_conversions.h,sha256=IqBOr__Cf22xF-DVBhrXf8lCXAtRSZ-SM9WY0aXm62E,1622 +pandas/_libs/src/ujson/python/objToJSON.c,sha256=ud_Q8uSm5kDpY3W4lrv5yiQLX33IMEzfD5h0X3HfZ3Q,65342 +pandas/_libs/src/ujson/python/ujson.c,sha256=pYxCJhL2Bmbbt1vZq6v84gh3UZIkA_z2z4rsCBnV4ug,3673 +pandas/_libs/src/ujson/python/version.h,sha256=XpgmEU6oUHozUxPs225ei_1DDilQViEXidOCArBslX0,2192 +pandas/_libs/testing.cpython-38-x86_64-linux-gnu.so,sha256=_Hv-8MbsgLlzBP2vCPqDlc3J4o-I9wy-gm4r8Bk3T24,97216 +pandas/_libs/testing.pyi,sha256=_fpEWiBmlWGR_3QUj1RU42WCTtW2Ug-EXHpM-kP6vB0,243 +pandas/_libs/testing.pyx,sha256=twy45NExG7f2yCx10j__sQAIa56a8_CG67q3uclsTxQ,5790 +pandas/_libs/tslib.cpython-38-x86_64-linux-gnu.so,sha256=rZbm0BJGoZ_lwmiXJwcjApJYgPSOLrf2_L5kpSqvNfg,192456 +pandas/_libs/tslib.pyi,sha256=Ic11pg23Lhcpsg_8DVhmsj8ugyeeVolsVf1NYb4w5n0,725 +pandas/_libs/tslib.pyx,sha256=-QPNw-JmHQ98nyWJ8z3NnpM310kTCkHegjQDFkxmK0o,25374 +pandas/_libs/tslibs/__init__.py,sha256=5xR80Bk-cUENHL1ahDWs0VfiSU86WeQDxqWF8UXfLSM,1534 +pandas/_libs/tslibs/__pycache__/__init__.cpython-38.pyc,, +pandas/_libs/tslibs/base.cpython-38-x86_64-linux-gnu.so,sha256=wvk1J17giRZBHHPEXqoSbHnHGJwy5gS3IYcMHBEs5uk,45408 +pandas/_libs/tslibs/base.pxd,sha256=5dct5J7hkV2NFWk-c-yibbKNdFUBuYMENrJV4POJgG8,85 +pandas/_libs/tslibs/base.pyx,sha256=KVdmIMg5nMNIiUJgq-VZg03NCFdUfC1vvXSRD4T09fk,293 +pandas/_libs/tslibs/ccalendar.cpython-38-x86_64-linux-gnu.so,sha256=5fBDKIlZgGuLZ4dSzqHe7ml0-_NQq54NfKKsTM_RDEk,73248 +pandas/_libs/tslibs/ccalendar.pxd,sha256=7O4jEQvS3nSvBdzefsOn702WyND8LZvW4Fxh__4pmf8,698 +pandas/_libs/tslibs/ccalendar.pyi,sha256=dizWWmYtxWa5Lc4Hv69iRaJoazRhegJaDGWYgWtJu-U,502 +pandas/_libs/tslibs/ccalendar.pyx,sha256=Qa78GkZ4iTKMCk4Www3xXeaLzHIlk44K8btj8rGXLiQ,7062 +pandas/_libs/tslibs/conversion.cpython-38-x86_64-linux-gnu.so,sha256=lhqa8AGFWiY3YqlW6ftnZTJlMJo5qLGYFLTF05v0fTY,309096 +pandas/_libs/tslibs/conversion.pxd,sha256=SjaksBa4zpF8f7KSfzTbV1Oj2vHNf7toq1niS_JsCQs,941 +pandas/_libs/tslibs/conversion.pyi,sha256=H13myJMHX1zamGbWeFItKfaGxOyHuRODl_b2ym42kOs,788 +pandas/_libs/tslibs/conversion.pyx,sha256=y3YlLR6iVksEnTntREaliTAf_jdthHkFZGrs7wEOcDA,24709 +pandas/_libs/tslibs/dtypes.cpython-38-x86_64-linux-gnu.so,sha256=YSrr6j6FBbXbDOSpipzQj1VgF5w0ObgKlYoJjNEdm38,127648 +pandas/_libs/tslibs/dtypes.pxd,sha256=k1sUm_2GEYencESdTW85SsLbEhjWWeDZxcXq9lrfCCM,2540 +pandas/_libs/tslibs/dtypes.pyi,sha256=NCozu1qYJmWLew9_5o41fzjk_C2hi8SNiU6mE5Uhf7c,1374 +pandas/_libs/tslibs/dtypes.pyx,sha256=iYEqzDUS0QCmbOhCUQrciJy3Ifl1hxwbzLWD68SiQ1M,8866 +pandas/_libs/tslibs/fields.cpython-38-x86_64-linux-gnu.so,sha256=DCAgqQkVdhDsyjhhB28xryobF1NiYRfybn8aYIqcu3A,324232 +pandas/_libs/tslibs/fields.pyi,sha256=NlT-M3iswvnSltXyUxtvjwTWequ8npc34L39EBv8JXQ,1481 +pandas/_libs/tslibs/fields.pyx,sha256=aC1T8YKH5TIOZdpmed3LyxhHvzO5liAYI-rrhH0vCdM,20468 +pandas/_libs/tslibs/nattype.cpython-38-x86_64-linux-gnu.so,sha256=XKinVli7vsCjzjPIJRjN7G8SKyLH3eEdHgAEKciXBjA,202976 +pandas/_libs/tslibs/nattype.pxd,sha256=7-AMaV-g_6xAWacHgOeQ369ZUJtXO_PVCxqzkfYIqD8,339 +pandas/_libs/tslibs/nattype.pyi,sha256=5ZQ_USwmUiqX46n5qBX6Sv5ylLb6nTGUTC43nee2Kfs,5821 +pandas/_libs/tslibs/nattype.pyx,sha256=AbFn_FNL1HGotahKvzUQ5fQBsw-Pae73VPc9lCYVGR4,36931 +pandas/_libs/tslibs/np_datetime.cpython-38-x86_64-linux-gnu.so,sha256=vkZOlT0-xWSDeMei-aQuietUlsEIxrMJ5T408Bfa3y0,49104 +pandas/_libs/tslibs/np_datetime.pxd,sha256=izwl-XnI9gXdnBl22VyPEhX4B-Sp_ogE4AIY3PpB3uE,2398 +pandas/_libs/tslibs/np_datetime.pyi,sha256=Kdu5pctwQ29Bzo1sCPI-vtNLPmL3PzKfVVv1zGBKwls,43 +pandas/_libs/tslibs/np_datetime.pyx,sha256=_YK4dLB_z-ukIuzvTM70XnFq0H62zbMvDjG0eyLHZps,6085 +pandas/_libs/tslibs/offsets.cpython-38-x86_64-linux-gnu.so,sha256=Lj4q5GF0aEgeGWuQNzEiRMpnEZfQApacP-LCEEn8KnI,1084520 +pandas/_libs/tslibs/offsets.pxd,sha256=nSUGf1ed8DCzo0I_GsRbc7dFZ7QsMotFOUV2yraLM7g,237 +pandas/_libs/tslibs/offsets.pyx,sha256=Sa7AezF5R0yw5pxArBH8_5LlAt5pslebhLKHJDMxN58,127830 +pandas/_libs/tslibs/parsing.cpython-38-x86_64-linux-gnu.so,sha256=PwyuO0lasHKS7EdqGjK9Ti3VAgqJt48KXBRqaV3gHS0,465480 +pandas/_libs/tslibs/parsing.pxd,sha256=wimerC2hsbVoJcKGxoeKkGuNzzckio43BobIsUHaY2Q,94 +pandas/_libs/tslibs/parsing.pyi,sha256=2owuEFGeQfPO9XM9pqSuLtZgDeMuSToXK5eBVgfQiNk,1985 +pandas/_libs/tslibs/parsing.pyx,sha256=J5MjRFYc7MufywaJx0CTfQTEijdHX35YBTBMsvPR_d8,36726 +pandas/_libs/tslibs/period.cpython-38-x86_64-linux-gnu.so,sha256=ebtv2OQ7sYRB9CoKY3-LnQ7iUgSiJs74Fzf3vqq7qKw,453768 +pandas/_libs/tslibs/period.pxd,sha256=y_P7m6lBfMv8ssir-KMhFs3lFeMjmYUrwdG7_gSL2Ao,187 +pandas/_libs/tslibs/period.pyi,sha256=p9o3oWX_qdS6OSKLfubYX2PdMK3Q9_rD--2OaHR7Tr4,3508 +pandas/_libs/tslibs/period.pyx,sha256=IutZoNNEc5CGkjLkIN0Zf1BsnDsIZonWUATpgFZj_X8,79256 +pandas/_libs/tslibs/src/datetime/np_datetime.c,sha256=8Ll78Twr4MnQaOZCp-L3rmyLVnrblxEPdUENKupve80,23080 +pandas/_libs/tslibs/src/datetime/np_datetime.h,sha256=FDZHE7Io5gEZAu7ETn8-hsjVkndy8V2E9Shb-6MjQDU,2300 +pandas/_libs/tslibs/src/datetime/np_datetime_strings.c,sha256=DNSbkW-X5ty01pEu2dXzGXqIjaEYIsVylTwx5UFcM78,24772 +pandas/_libs/tslibs/src/datetime/np_datetime_strings.h,sha256=nnfdfe6mkqd67bxomHFAQGq2qIExyRkDCLqZw6O7NL0,3285 +pandas/_libs/tslibs/strptime.cpython-38-x86_64-linux-gnu.so,sha256=MzbScsCnVljty4L_tYTLMu2zrnb4fY8lISiyVRk8Svg,418024 +pandas/_libs/tslibs/strptime.pyi,sha256=8dtsJ-65Oo6vsAqR4WH5B5MqSh_E2qy1Hrtcymnesj8,286 +pandas/_libs/tslibs/strptime.pyx,sha256=wluiFbznmb9N32U8BZEnCTVhYswZa2tFi_h7FpAVOo8,29116 +pandas/_libs/tslibs/timedeltas.cpython-38-x86_64-linux-gnu.so,sha256=LokTOXrGYIDiB-ntKThtRO_ZC2hAQ5_keE6Vj2F9jZI,495688 +pandas/_libs/tslibs/timedeltas.pxd,sha256=esai98Rc1UbR3Ri-ShGl5qAystS-CtXrrszesg023PI,591 +pandas/_libs/tslibs/timedeltas.pyi,sha256=gapIFUKw8-6Km6vz_-8247Ur9opcfa97QCOmiejDtnw,2817 +pandas/_libs/tslibs/timedeltas.pyx,sha256=Z047UqQ1iY2f4c1HsA3TR_CHxWTeUi_c_dFCrCHnFEk,48263 +pandas/_libs/tslibs/timestamps.cpython-38-x86_64-linux-gnu.so,sha256=EG23ADcN1QLi73Oh-1VDp_K4q-bBOSJ1ewidkZo5NgE,538152 +pandas/_libs/tslibs/timestamps.pxd,sha256=boZWEDSUnFMiO1Tari1yXdHoSBxv2SRERgCwVpVfQUI,1059 +pandas/_libs/tslibs/timestamps.pyi,sha256=lp4fBY8RBrMOTNBnKBC0H_1rT9KaMlQKxs1NwTzSkdM,7021 +pandas/_libs/tslibs/timestamps.pyx,sha256=pJ-UZZeDP5L28skLZQ2tISKHeAdRngcbxUKBxaVZ8T0,67833 +pandas/_libs/tslibs/timezones.cpython-38-x86_64-linux-gnu.so,sha256=P8z_r72fmU3Z-p5BwXNh5G1btkRWDIDPGyTfBbdA1Sc,258184 +pandas/_libs/tslibs/timezones.pxd,sha256=ulfRcfFW3QFN0POI-QmMRTxiscgmPJiQADEKiainPag,451 +pandas/_libs/tslibs/timezones.pyi,sha256=LczmcN-eXc7OnFbmLu3w60AXAA6NkE82BNQ6nXgSo6c,688 +pandas/_libs/tslibs/timezones.pyx,sha256=yX0FhnPeUWtQtOsN5gC2mMQiOHXxEX14fFWVYJjxng4,13006 +pandas/_libs/tslibs/tzconversion.cpython-38-x86_64-linux-gnu.so,sha256=PChxitQ7LP8YIfINO60LkIlKAiW5GCT2fkUeXwgrNGE,333000 +pandas/_libs/tslibs/tzconversion.pxd,sha256=9T7mffMbv2IDFP2Y-X825vJATQm7hh7F0MFEELdE1eQ,351 +pandas/_libs/tslibs/tzconversion.pyi,sha256=R7MEvmpUUx5JJFvyhLj5EWXz6GP63xc4_IrYplkGt-I,567 +pandas/_libs/tslibs/tzconversion.pyx,sha256=Jt6N0dtplkGwqelEohKPA9cLss9d4wA9cNQgl45I16I,18840 +pandas/_libs/tslibs/util.pxd,sha256=tWzNfMP5xFDXcF7_KKARRFopB4CMdncYxtvZsVmiAus,5225 +pandas/_libs/tslibs/vectorized.cpython-38-x86_64-linux-gnu.so,sha256=ovsU963NHKaamlVwjJlsCV2Gh0tgBE3rLA-ei2wImDA,248808 +pandas/_libs/tslibs/vectorized.pyi,sha256=PbtxmdYAvaFc3-pzaO0VCUtZ7YzAtDSash8d_Pfy-Us,1140 +pandas/_libs/tslibs/vectorized.pyx,sha256=79vVIjiIiD-pi_FS6IdPaiEcFRHjXOPuao_3RA1_kgU,12300 +pandas/_libs/util.pxd,sha256=IkOkiaUnl424vgx6do0aKkea0nawqPMxYJJ8CFE97lU,346 +pandas/_libs/window/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/_libs/window/__pycache__/__init__.cpython-38.pyc,, +pandas/_libs/window/aggregations.cpython-38-x86_64-linux-gnu.so,sha256=zpUQVTNwLK6ClIIyb09wzzcmiQXzEeqTtkFuXj7nzrU,391384 +pandas/_libs/window/aggregations.pyi,sha256=VAaBpwBcpLwKVjydF0Q3VJF0Cl3HEteYs6Ym4EGYnNo,4042 +pandas/_libs/window/aggregations.pyx,sha256=gexWNuDzv0_xtRmOCOI3B6RuFVsRWZ9-Sa7-u6Ci6aY,59190 +pandas/_libs/window/indexers.cpython-38-x86_64-linux-gnu.so,sha256=_Q0WZtkc47umLhibsdLtzPKOhpGcoA1cgRPcbihHf6g,188520 +pandas/_libs/window/indexers.pyi,sha256=53aBxew7jBcAc9sbSoOlvpQHhiLDSWPXFcVbCeJDbQA,319 +pandas/_libs/window/indexers.pyx,sha256=dr8FcPQNGQiV2k2dQB-LoGabkb_68pT0ECHTDl7ZZUA,4282 +pandas/_libs/writers.cpython-38-x86_64-linux-gnu.so,sha256=p-vKvCyWWTGfhmvGERMLAtZefdJBahzD9HCIJlGT7GM,229352 +pandas/_libs/writers.pyi,sha256=tZZC_C1ueQh8iz3UH7VgLDReUI9NRxaUGgX2yfBsm1A,580 +pandas/_libs/writers.pyx,sha256=s4faOWIYDC-8Kq0uPA3rWcSGtRiUJHHTZMqUBjjAWyo,4490 +pandas/_testing/__init__.py,sha256=KuQSOlvGTt9g5yTzU9BNWnRCGY3DO08L4TbbbDoq-u8,31188 +pandas/_testing/__pycache__/__init__.cpython-38.pyc,, +pandas/_testing/__pycache__/_hypothesis.cpython-38.pyc,, +pandas/_testing/__pycache__/_io.cpython-38.pyc,, +pandas/_testing/__pycache__/_random.cpython-38.pyc,, +pandas/_testing/__pycache__/_warnings.cpython-38.pyc,, +pandas/_testing/__pycache__/asserters.cpython-38.pyc,, +pandas/_testing/__pycache__/compat.cpython-38.pyc,, +pandas/_testing/__pycache__/contexts.cpython-38.pyc,, +pandas/_testing/_hypothesis.py,sha256=jkn3plK-9OeSH8m9l-sJnUwRh0ax24qpdV6YUV_6a1s,2310 +pandas/_testing/_io.py,sha256=FezSSZNIcityaFtMDIkD-kwqLqHFWPypXg2PDmohmo4,11812 +pandas/_testing/_random.py,sha256=__e3x_eE7rcKx7QH4HaICxtZNYPp8C0ZsYzJk4z3D7g,1088 +pandas/_testing/_warnings.py,sha256=AL12-z6mEqyhHtsE6TJnFMziwRy3e-9Uqi-LqNQJBro,7171 +pandas/_testing/asserters.py,sha256=h9yAtdtCGJ-ndJVHeOl9qQlhUqT2a9vHpiUF4cf_QyY,49283 +pandas/_testing/compat.py,sha256=w-JdZn9GAm_3cFPFndGd0sNixlnWayPNaNVcPeIr2xo,518 +pandas/_testing/contexts.py,sha256=U-rVRCnC9uWlapXuFk0KsN1TBUrDwWbJMWmlrV45Eeo,5542 +pandas/_typing.py,sha256=MwmMNlUKvo7onZh-YJwUFpzGDVbxNVsQSFmCqER1_Vs,8515 +pandas/_version.py,sha256=FW-Pv7Cg28ZFae0cUGX_mMq9XcnG3WC_YFex_EFe9_I,497 +pandas/api/__init__.py,sha256=BQPkOPl-x8gXAKXfMl9jDfU6WxI58T0nRlswfQliUFE,108 +pandas/api/__pycache__/__init__.cpython-38.pyc,, +pandas/api/extensions/__init__.py,sha256=O7tmzpvIT0uv9H5K-yMTKcwZpml9cEaB5CLVMiUkRCk,685 +pandas/api/extensions/__pycache__/__init__.cpython-38.pyc,, +pandas/api/indexers/__init__.py,sha256=kNbZv9nja9iLVmGZU2D6w2dqB2ndsbqTfcsZsGz_Yo0,357 +pandas/api/indexers/__pycache__/__init__.cpython-38.pyc,, +pandas/api/types/__init__.py,sha256=d6jVFKCNtSuNLsI2vR-INIeutY4jUskjPD80WK2DVh4,453 +pandas/api/types/__pycache__/__init__.cpython-38.pyc,, +pandas/arrays/__init__.py,sha256=vaUkZ850nptJMo3ZXGyqxl5KRathXRsWydTJfDmpQlo,602 +pandas/arrays/__pycache__/__init__.cpython-38.pyc,, +pandas/compat/__init__.py,sha256=pn5ZtLVV818COQw1uUYKatQiLndW7roN6MjEl2iZ7EU,3121 +pandas/compat/__pycache__/__init__.cpython-38.pyc,, +pandas/compat/__pycache__/_optional.cpython-38.pyc,, +pandas/compat/__pycache__/chainmap.cpython-38.pyc,, +pandas/compat/__pycache__/pickle_compat.cpython-38.pyc,, +pandas/compat/__pycache__/pyarrow.cpython-38.pyc,, +pandas/compat/_optional.py,sha256=KLrS0-EoDufvy7BikkmnsVzgks9RMP1rYcP5NU6bOy0,4645 +pandas/compat/chainmap.py,sha256=1r3mscfyHtmSIlprLqmZYUwBwpzKgK0yWY7eldCdZJA,791 +pandas/compat/numpy/__init__.py,sha256=6ETfwNRuvugEBPuk8gcgHkvh4e9umvE9JPYlZJoDhpE,912 +pandas/compat/numpy/__pycache__/__init__.cpython-38.pyc,, +pandas/compat/numpy/__pycache__/function.cpython-38.pyc,, +pandas/compat/numpy/function.py,sha256=k_l9rq6DtgxxwTCATkZuVKsSX2FnIDI4I7e5acNIMgI,13003 +pandas/compat/pickle_compat.py,sha256=KeXt16NQE5J8vkth4FuVoh1XwphE5K9NW7Z69SSlBiY,8653 +pandas/compat/pyarrow.py,sha256=AhQ4rWc_AaFDxVZbrtEeiJFAkMmrEnEMC9HZP9-jrO4,707 +pandas/conftest.py,sha256=bY479LlQYUTVQbP1ECzazm1L4Afn1j7jp1AKHYXKAsk,44403 +pandas/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/core/__pycache__/__init__.cpython-38.pyc,, +pandas/core/__pycache__/accessor.cpython-38.pyc,, +pandas/core/__pycache__/algorithms.cpython-38.pyc,, +pandas/core/__pycache__/api.cpython-38.pyc,, +pandas/core/__pycache__/apply.cpython-38.pyc,, +pandas/core/__pycache__/arraylike.cpython-38.pyc,, +pandas/core/__pycache__/base.cpython-38.pyc,, +pandas/core/__pycache__/common.cpython-38.pyc,, +pandas/core/__pycache__/config_init.cpython-38.pyc,, +pandas/core/__pycache__/construction.cpython-38.pyc,, +pandas/core/__pycache__/describe.cpython-38.pyc,, +pandas/core/__pycache__/flags.cpython-38.pyc,, +pandas/core/__pycache__/frame.cpython-38.pyc,, +pandas/core/__pycache__/generic.cpython-38.pyc,, +pandas/core/__pycache__/index.cpython-38.pyc,, +pandas/core/__pycache__/indexing.cpython-38.pyc,, +pandas/core/__pycache__/missing.cpython-38.pyc,, +pandas/core/__pycache__/nanops.cpython-38.pyc,, +pandas/core/__pycache__/resample.cpython-38.pyc,, +pandas/core/__pycache__/roperator.cpython-38.pyc,, +pandas/core/__pycache__/sample.cpython-38.pyc,, +pandas/core/__pycache__/series.cpython-38.pyc,, +pandas/core/__pycache__/shared_docs.cpython-38.pyc,, +pandas/core/__pycache__/sorting.cpython-38.pyc,, +pandas/core/_numba/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/core/_numba/__pycache__/__init__.cpython-38.pyc,, +pandas/core/_numba/__pycache__/executor.cpython-38.pyc,, +pandas/core/_numba/executor.py,sha256=JIyFXAaZPVVom3l88iG5wQbafIG-34iq4L6oQPzHqys,1726 +pandas/core/_numba/kernels/__init__.py,sha256=NlF-IV0W9qeOsExEYGQHBwf-IVcv7lm627eFl5WvAL0,311 +pandas/core/_numba/kernels/__pycache__/__init__.cpython-38.pyc,, +pandas/core/_numba/kernels/__pycache__/mean_.cpython-38.pyc,, +pandas/core/_numba/kernels/__pycache__/min_max_.cpython-38.pyc,, +pandas/core/_numba/kernels/__pycache__/shared.cpython-38.pyc,, +pandas/core/_numba/kernels/__pycache__/sum_.cpython-38.pyc,, +pandas/core/_numba/kernels/__pycache__/var_.cpython-38.pyc,, +pandas/core/_numba/kernels/mean_.py,sha256=j3sP0zEGbT5mBp7kxWzxn0cp6vKl7hT88Auc94YqaR0,2868 +pandas/core/_numba/kernels/min_max_.py,sha256=t4lnCvKq_IuTFi5TgTf482087hEvbknM1rUbSNMCRVA,1857 +pandas/core/_numba/kernels/shared.py,sha256=nRmVuJnQ9tlAX7ph2ZVmPHh3bcgAdqMpQr3NOxwD7BQ,518 +pandas/core/_numba/kernels/sum_.py,sha256=uhWsuppRhHsCxkkzFDs8urTxjG8vw16boY78G2K1QmY,2527 +pandas/core/_numba/kernels/var_.py,sha256=_hBGpjtZvMF7l6aHtgK9k4SiiVonOswbrpfIaKKd84I,3233 +pandas/core/accessor.py,sha256=Oc7lWwPcobbkzzDPcFPbfujbjKUgGRBw4mRrKX1AJuc,8692 +pandas/core/algorithms.py,sha256=VlZXpVmEvjhgWSGkhHFfEtn75XQcOlnQZFcWrFnNFzw,56783 +pandas/core/api.py,sha256=m1oucD1js3UvZqpBUxPVWkxLCGoZETh-UENSzVRRD_k,1964 +pandas/core/apply.py,sha256=8DHQRXR2xwPT5jJzgvAyaybCuPoc2e6dEPT9U4Bz9XI,49425 +pandas/core/array_algos/__init__.py,sha256=8YLlO6TysEPxltfbNKdG9MlVXeDLfTIGNo2nUR-Zwl0,408 +pandas/core/array_algos/__pycache__/__init__.cpython-38.pyc,, +pandas/core/array_algos/__pycache__/masked_reductions.cpython-38.pyc,, +pandas/core/array_algos/__pycache__/putmask.cpython-38.pyc,, +pandas/core/array_algos/__pycache__/quantile.cpython-38.pyc,, +pandas/core/array_algos/__pycache__/replace.cpython-38.pyc,, +pandas/core/array_algos/__pycache__/take.cpython-38.pyc,, +pandas/core/array_algos/__pycache__/transforms.cpython-38.pyc,, +pandas/core/array_algos/masked_reductions.py,sha256=v_MaQZ3rD5XkEZ1jXact2qmFutk8Qr4GfIiRfcgKbYo,3721 +pandas/core/array_algos/putmask.py,sha256=lLzROIvHtihyFW7-me6WaLySBuuINIZ5e3YMkqgyk54,6156 +pandas/core/array_algos/quantile.py,sha256=ySPnAS30EWO9r6Anw_gpZtp74HMfo8jeDs62BtMFumk,5030 +pandas/core/array_algos/replace.py,sha256=MXTm4Tt59yqbwLq5qx3TCwdRUIBcumBpN_s1KExTkb8,4186 +pandas/core/array_algos/take.py,sha256=skfSOTcrRzGiWLpDVL-MMyI6apNFJgcxH4IYz9q-xJo,20513 +pandas/core/array_algos/transforms.py,sha256=gHmRl_uxsxAyVlP2WJ12qB7la6KwJz3TlqGwaIs7pOQ,925 +pandas/core/arraylike.py,sha256=megM7hfCxqrl5ytNjk4veTcuY_NE_UZx_c92i1X4Pr0,18629 +pandas/core/arrays/__init__.py,sha256=4hJ3rTz_tEkrqWo0_vb7zTpa-h9fzfRuTUnQj5EsbIo,1214 +pandas/core/arrays/__pycache__/__init__.cpython-38.pyc,, +pandas/core/arrays/__pycache__/_arrow_utils.cpython-38.pyc,, +pandas/core/arrays/__pycache__/_mixins.cpython-38.pyc,, +pandas/core/arrays/__pycache__/_ranges.cpython-38.pyc,, +pandas/core/arrays/__pycache__/base.cpython-38.pyc,, +pandas/core/arrays/__pycache__/boolean.cpython-38.pyc,, +pandas/core/arrays/__pycache__/categorical.cpython-38.pyc,, +pandas/core/arrays/__pycache__/datetimelike.cpython-38.pyc,, +pandas/core/arrays/__pycache__/datetimes.cpython-38.pyc,, +pandas/core/arrays/__pycache__/floating.cpython-38.pyc,, +pandas/core/arrays/__pycache__/integer.cpython-38.pyc,, +pandas/core/arrays/__pycache__/interval.cpython-38.pyc,, +pandas/core/arrays/__pycache__/masked.cpython-38.pyc,, +pandas/core/arrays/__pycache__/numeric.cpython-38.pyc,, +pandas/core/arrays/__pycache__/numpy_.cpython-38.pyc,, +pandas/core/arrays/__pycache__/period.cpython-38.pyc,, +pandas/core/arrays/__pycache__/string_.cpython-38.pyc,, +pandas/core/arrays/__pycache__/string_arrow.cpython-38.pyc,, +pandas/core/arrays/__pycache__/timedeltas.cpython-38.pyc,, +pandas/core/arrays/_arrow_utils.py,sha256=u6d493l2JNvorPCZvKWVI7d4qwhzrN8f6hWqHGf5ANE,4333 +pandas/core/arrays/_mixins.py,sha256=B783q6PhyA35WIsV_PKt9Y38OiLQtanQiophZevikxM,15940 +pandas/core/arrays/_ranges.py,sha256=9HPOyQY0GLROqkfdKx7ruM-GmVqA5CcMUmIp5IJHx7w,6759 +pandas/core/arrays/base.py,sha256=Ox1WPbmZ51G0-bf1UXEyXxqxuohBQf4BiPy0N8jpOyc,59775 +pandas/core/arrays/boolean.py,sha256=JcIRs0WQtd3xM86lwEa3QGbn7NbGnAdQ8hBZmYfoh6c,16148 +pandas/core/arrays/categorical.py,sha256=npKJTTT51VF_SCIJ5-wreEDE7K9TVYJ4m-ownQIoHl8,94502 +pandas/core/arrays/datetimelike.py,sha256=VD1Dpx6lDTA8Mo_w4f8wpv02nwXtoAEm4Qk8uYAADMY,65862 +pandas/core/arrays/datetimes.py,sha256=42dLhcESwcPSAqGj1nfPydkPAG4-sd0Hy_KEzJxV7I8,84603 +pandas/core/arrays/floating.py,sha256=EUWNzmAk3BsakIr-T55OuE0KEDao9PJKBREYRXRiUmA,10612 +pandas/core/arrays/integer.py,sha256=MjMvIekglZODxvkmqLbVMTolkbIFDgmIAufbowScXgE,13617 +pandas/core/arrays/interval.py,sha256=fzwY396WqfdVf-c2fBY28w7jEenuzXnb9r0O_r5kkK0,56785 +pandas/core/arrays/masked.py,sha256=_h5Rl2LG6Ehoy6i8omP1_gFy5jbSWP2DzriYrRIy8ns,33663 +pandas/core/arrays/numeric.py,sha256=rSklUXF6DNu-qBXyVW75Mr48rh--cT8_LcOgHiakubA,6547 +pandas/core/arrays/numpy_.py,sha256=YoNoFVFgt4cXBIXwBShGNbHY_JBec3erUM8PRVp7_g8,13949 +pandas/core/arrays/period.py,sha256=tlmsq5JXzafgWu6cGJEzngJfq2KB6vg7J_6J3kffb5Y,38056 +pandas/core/arrays/sparse/__init__.py,sha256=dvT633vOcYweqbTMKnFanijIeOS1nZKbj3BJP8T86Gs,292 +pandas/core/arrays/sparse/__pycache__/__init__.cpython-38.pyc,, +pandas/core/arrays/sparse/__pycache__/accessor.cpython-38.pyc,, +pandas/core/arrays/sparse/__pycache__/array.cpython-38.pyc,, +pandas/core/arrays/sparse/__pycache__/dtype.cpython-38.pyc,, +pandas/core/arrays/sparse/__pycache__/scipy_sparse.cpython-38.pyc,, +pandas/core/arrays/sparse/accessor.py,sha256=to-3RtAFhk-CxhcpYfIYPZ06lOfjRB24B1eZHUq41qs,11720 +pandas/core/arrays/sparse/array.py,sha256=2X3F-Pv2QQFTPCjxzlF5Cf7yycBcVc69VhxEAg-Um7I,61465 +pandas/core/arrays/sparse/dtype.py,sha256=7oFVNV6L8Zc90gFYzNCK9RlmWlFbuu63KQXD2VmPfic,12889 +pandas/core/arrays/sparse/scipy_sparse.py,sha256=mfWVORV4gZmIpqKbcD1A98XAnmJJNp8TdDOOBRCqHdo,6508 +pandas/core/arrays/string_.py,sha256=ydGf-O5EkOnIjg_T1TBgK4MzHhS2VQVWx5Z5LyrEAJQ,17856 +pandas/core/arrays/string_arrow.py,sha256=rNIPmwSTUa9ucdQMZ_5ADxPI0Xq7eDa0Tpf31wm-Fz0,28415 +pandas/core/arrays/timedeltas.py,sha256=AQ-6-NhADh15OXUd-u2PWiOi7A1RHTi2NsmSFyyy_fg,37454 +pandas/core/base.py,sha256=0j4TxU6xqZdzFflzKnecZpc60XNMB6AWVlLnAvgSLX0,39398 +pandas/core/common.py,sha256=j2ok7aUWw_mNn8L-I0UDnP6aSJ9rHA3WSUMqkt8LSBw,17150 +pandas/core/computation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/core/computation/__pycache__/__init__.cpython-38.pyc,, +pandas/core/computation/__pycache__/align.cpython-38.pyc,, +pandas/core/computation/__pycache__/api.cpython-38.pyc,, +pandas/core/computation/__pycache__/check.cpython-38.pyc,, +pandas/core/computation/__pycache__/common.cpython-38.pyc,, +pandas/core/computation/__pycache__/engines.cpython-38.pyc,, +pandas/core/computation/__pycache__/eval.cpython-38.pyc,, +pandas/core/computation/__pycache__/expr.cpython-38.pyc,, +pandas/core/computation/__pycache__/expressions.cpython-38.pyc,, +pandas/core/computation/__pycache__/ops.cpython-38.pyc,, +pandas/core/computation/__pycache__/parsing.cpython-38.pyc,, +pandas/core/computation/__pycache__/pytables.cpython-38.pyc,, +pandas/core/computation/__pycache__/scope.cpython-38.pyc,, +pandas/core/computation/align.py,sha256=hOJDqA40keI7PNBQPDo76pj0_71dhcDFPBue_tzF2ik,6086 +pandas/core/computation/api.py,sha256=CQ2AF0hwydcgTHycMCFiyZIAU57RcZT-TVid17SIsV4,65 +pandas/core/computation/check.py,sha256=e_mYOuo7iB_i-TOwrWpWfoUKoSqwGC1dDacu9nROTLQ,301 +pandas/core/computation/common.py,sha256=GNcuMW5apCe9ZJARp8NmALg-2p6ZX7CovyuZt5sHMTo,632 +pandas/core/computation/engines.py,sha256=VzBQd7XX7XgW0OAtaLNs48K6DexGqbzqDp83wch3zvs,3272 +pandas/core/computation/eval.py,sha256=apiH9naaEvgteaN1jC0axs_Y_i2nECtbPzmYnRcAm1g,13544 +pandas/core/computation/expr.py,sha256=sd9yrprjERy1Sxk7lU4xgZ5rxW8Q4VlYZsSDqUJXwhE,24828 +pandas/core/computation/expressions.py,sha256=qV51VHuLXIeeZdLWA8BbnwUBWTRzuV9PCjzf3PDYerg,7328 +pandas/core/computation/ops.py,sha256=IcPzpjaeu8vL-_6Z47kCKjuJugtWAkIdIsN-CGW8Lpg,16140 +pandas/core/computation/parsing.py,sha256=6nbgwVLYfhkpF4n1NdzeK-h2oftMSTDVSeHYlJ0DUQk,6322 +pandas/core/computation/pytables.py,sha256=7V2fhTW1QEPEnu3UgBXVu3RlSYS41opdBYCqQAzisiU,19641 +pandas/core/computation/scope.py,sha256=joCYCny0ntFLIfkN4ILQpEtGIf1YylzI2H6XIdYIyxE,9705 +pandas/core/config_init.py,sha256=lbTTzxrHtC7HLMKWgw0wMHbL01UsMiXMbA7u_6ictCs,27384 +pandas/core/construction.py,sha256=wrnYVmk2mG7U2sSs-n1jEUsMNCx9OCbMgHgjxr1BbcM,28329 +pandas/core/describe.py,sha256=l6b1yClfkA5MI6qaI6cAR4mZtYReRk8PSxBK3cXSYwo,12980 +pandas/core/dtypes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/core/dtypes/__pycache__/__init__.cpython-38.pyc,, +pandas/core/dtypes/__pycache__/api.cpython-38.pyc,, +pandas/core/dtypes/__pycache__/base.cpython-38.pyc,, +pandas/core/dtypes/__pycache__/cast.cpython-38.pyc,, +pandas/core/dtypes/__pycache__/common.cpython-38.pyc,, +pandas/core/dtypes/__pycache__/concat.cpython-38.pyc,, +pandas/core/dtypes/__pycache__/dtypes.cpython-38.pyc,, +pandas/core/dtypes/__pycache__/generic.cpython-38.pyc,, +pandas/core/dtypes/__pycache__/inference.cpython-38.pyc,, +pandas/core/dtypes/__pycache__/missing.cpython-38.pyc,, +pandas/core/dtypes/api.py,sha256=YRpg81j_19sFPZLnGT3r5B-AWBJmeeNVHM9tD1RmkWo,916 +pandas/core/dtypes/base.py,sha256=KLEqz1qiTVahUGxW1YAoLZtAwsMqunIqPslhMPApZDA,14602 +pandas/core/dtypes/cast.py,sha256=dhEjOjR-MKHjhpEWBJ-SsaDNnhc5uCSiVdmOZ9NOSdw,75790 +pandas/core/dtypes/common.py,sha256=4mH8MM_MZZgJ5_PJT3pdYNKmM9vlI1ibLAl25u4YOus,47733 +pandas/core/dtypes/concat.py,sha256=cwOK199i9Su3IsJqnBOK0GKDPnY3rR9PH_PA6ts59oA,12862 +pandas/core/dtypes/dtypes.py,sha256=lv57p0RpxKeKw0RmV2n-8zYYcTd9sLmPbdEwU-mNnd8,43421 +pandas/core/dtypes/generic.py,sha256=r5TgZUpMetX5eDp3JoHDRNJoOxDiT-8dHIJa-BDYdp4,4186 +pandas/core/dtypes/inference.py,sha256=1vFLznsv454ue9yxyZjv2sMqilZ9hpqDScM22rYBILI,9190 +pandas/core/dtypes/missing.py,sha256=L1f4OkBXYuHXIiZW_HUHt8qYGDbvki9MHxkHH12qUFg,19713 +pandas/core/flags.py,sha256=z9Q43vmmPhpMTsQ56uVTc2xHpyG_hzpS7SmQZ4GaWzU,3579 +pandas/core/frame.py,sha256=7jM4bQk4xGTgLqavVSJOb2byYg41meM9b5SwgUkNg04,376629 +pandas/core/generic.py,sha256=RKXghOo2yT5CkfiyPt47EFWvlcoekCDpE7UkCJzyCps,404473 +pandas/core/groupby/__init__.py,sha256=KamY9WI5B4cMap_3wZ5ycMdXM_rOxGSL7RtoKKPfjAo,301 +pandas/core/groupby/__pycache__/__init__.cpython-38.pyc,, +pandas/core/groupby/__pycache__/base.cpython-38.pyc,, +pandas/core/groupby/__pycache__/categorical.cpython-38.pyc,, +pandas/core/groupby/__pycache__/generic.cpython-38.pyc,, +pandas/core/groupby/__pycache__/groupby.cpython-38.pyc,, +pandas/core/groupby/__pycache__/grouper.cpython-38.pyc,, +pandas/core/groupby/__pycache__/indexing.cpython-38.pyc,, +pandas/core/groupby/__pycache__/numba_.cpython-38.pyc,, +pandas/core/groupby/__pycache__/ops.cpython-38.pyc,, +pandas/core/groupby/base.py,sha256=lFEXxdD_sox-NG-HD1pJ2T9DkeWhVVvvgOOIuvBNbec,3746 +pandas/core/groupby/categorical.py,sha256=JuSbuBhB3P2SaDOeptROFEI-B-TlvZMsWUp8rj8t9a0,3820 +pandas/core/groupby/generic.py,sha256=GzgUoH-Oqs4AxYvwaGcpr3YljuOCYhTYllNGOA1zAnE,61171 +pandas/core/groupby/groupby.py,sha256=_IMS2_OJ4Y2sxR3ytT3kBpFLmFrApAwPsVIEk6KkcrA,126940 +pandas/core/groupby/grouper.py,sha256=EiJ2Y2eyyYDg7xx7NsntNCcYTNnhoMQ7gxwQ1uQ7wdQ,33670 +pandas/core/groupby/indexing.py,sha256=TUJ0lhH6eB1trZrfT7DBiNSEWn6te55uG6-xgPhV-OU,9473 +pandas/core/groupby/numba_.py,sha256=irjuZ5c45-dgfd6cQCwUW0aMYiPy61AMJdWGlrjr_Y0,5174 +pandas/core/groupby/ops.py,sha256=sJGzv-GJNLjGOnB_z7sPAoTYKRKT42lxkmQtHN8rot0,39852 +pandas/core/index.py,sha256=Wf41GK5Skx6zS86yrtSUVI8KMF38nfePahWgOhyNaZo,735 +pandas/core/indexers/__init__.py,sha256=OfW6q1pkgzt3qhCbxo-9SJ1QLDXT3ClgBTHEsiLd4sw,792 +pandas/core/indexers/__pycache__/__init__.cpython-38.pyc,, +pandas/core/indexers/__pycache__/objects.cpython-38.pyc,, +pandas/core/indexers/__pycache__/utils.cpython-38.pyc,, +pandas/core/indexers/objects.py,sha256=jWS-KDHMbGrLzNS_eM57cX_7RN9YgPr4TvbZzxpEMyQ,12056 +pandas/core/indexers/utils.py,sha256=kwp-p5vJH9KIZvZtKG1gv7vWvhPvchTK3HsWcDdxO9w,17103 +pandas/core/indexes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/core/indexes/__pycache__/__init__.cpython-38.pyc,, +pandas/core/indexes/__pycache__/accessors.cpython-38.pyc,, +pandas/core/indexes/__pycache__/api.cpython-38.pyc,, +pandas/core/indexes/__pycache__/base.cpython-38.pyc,, +pandas/core/indexes/__pycache__/category.cpython-38.pyc,, +pandas/core/indexes/__pycache__/datetimelike.cpython-38.pyc,, +pandas/core/indexes/__pycache__/datetimes.cpython-38.pyc,, +pandas/core/indexes/__pycache__/extension.cpython-38.pyc,, +pandas/core/indexes/__pycache__/frozen.cpython-38.pyc,, +pandas/core/indexes/__pycache__/interval.cpython-38.pyc,, +pandas/core/indexes/__pycache__/multi.cpython-38.pyc,, +pandas/core/indexes/__pycache__/numeric.cpython-38.pyc,, +pandas/core/indexes/__pycache__/period.cpython-38.pyc,, +pandas/core/indexes/__pycache__/range.cpython-38.pyc,, +pandas/core/indexes/__pycache__/timedeltas.cpython-38.pyc,, +pandas/core/indexes/accessors.py,sha256=7Jh8tjzpatTQUz4_lua1G2-npq7-C_aCR5bxAfi5KUA,14709 +pandas/core/indexes/api.py,sha256=HyDFOQUD4WMw78dKLV-qG9bzlet_uANVD_w5ohL4yI8,8848 +pandas/core/indexes/base.py,sha256=HtQGCm0EJqKQ3zU9x5sd4FzG-4Ku6eryU_aw_227ZsM,241731 +pandas/core/indexes/category.py,sha256=OHXnneTHZnjtXomXcB2p-Pwb1biurfrXmBJEs9BleM0,19283 +pandas/core/indexes/datetimelike.py,sha256=vSw55bt5wnSRtFtn3KTR7-Zf4LkYfs7_yH7EjjSNCoE,23912 +pandas/core/indexes/datetimes.py,sha256=Iig60VZYJitDAJQINYaTtZUvBJbCfdGmhbyL5-N-AlA,40925 +pandas/core/indexes/extension.py,sha256=lnOzj2wkqK2HSnjyWNnReJSbUBTXW8g_DeKBqW51FL4,5796 +pandas/core/indexes/frozen.py,sha256=m-GsIXpu7RAJLxS6EHmhY3nPYAVRf_GvJWBKa42l9oE,3053 +pandas/core/indexes/interval.py,sha256=8bQAacqRhazbKHeT_9Ly7me8eZh4W3PIfljzsuv0sLA,37503 +pandas/core/indexes/multi.py,sha256=DDLRaQwwpuJt31E1qd_KMRTIpq-idedFgJrS3rTdtyY,137185 +pandas/core/indexes/numeric.py,sha256=TvtjJp_UMuUmkiv8WGu0Zm51FL4pg3uKyqKwU6X5COk,16175 +pandas/core/indexes/period.py,sha256=E6CfrlWEHM9h0J1frXLyRTi8k-zg5v7T50ag2iseNy8,18467 +pandas/core/indexes/range.py,sha256=60h5p_LKQ26I8cjMD2sSR_-4zPNC88zr-tfPeiRS50A,35224 +pandas/core/indexes/timedeltas.py,sha256=8V1DsotlLITu_ZGApiUDh1WKKGlN9lTLpzQl4yEyEPY,8477 +pandas/core/indexing.py,sha256=wqiiCrDob3EmpIWOF6zvO5xNsHv5AvSzaU5hDiz2B0w,84140 +pandas/core/internals/__init__.py,sha256=sqs0gkhwwSNnjIhVncHbB5BQrnp3YfoylSGVxGfbSc0,1569 +pandas/core/internals/__pycache__/__init__.cpython-38.pyc,, +pandas/core/internals/__pycache__/api.cpython-38.pyc,, +pandas/core/internals/__pycache__/array_manager.cpython-38.pyc,, +pandas/core/internals/__pycache__/base.cpython-38.pyc,, +pandas/core/internals/__pycache__/blocks.cpython-38.pyc,, +pandas/core/internals/__pycache__/concat.cpython-38.pyc,, +pandas/core/internals/__pycache__/construction.cpython-38.pyc,, +pandas/core/internals/__pycache__/managers.cpython-38.pyc,, +pandas/core/internals/__pycache__/ops.cpython-38.pyc,, +pandas/core/internals/api.py,sha256=eITfDoAMqHEV8W-UAhQ6lYJ-IDe9yet9F45RT3oqtFM,2958 +pandas/core/internals/array_manager.py,sha256=wf9WN-bcEkGSqKR_0UeN1ol1opiib1uP1FEil1RyHvc,44726 +pandas/core/internals/base.py,sha256=r07UDNk2xj9WXwKw00gPqiqKDPsYFLE0BbzsFzo11-s,5752 +pandas/core/internals/blocks.py,sha256=ayD3XTP-iUmXSoh9ZaynV10bIkoHxSnYA9IoFKbkjJs,72969 +pandas/core/internals/concat.py,sha256=9_xHpOdwtXyHWLCvmBnx1qHAEIwIaurEYyqdqCUlFjU,20934 +pandas/core/internals/construction.py,sha256=xaaMlbA3i0jMIxTgAIXkl0hQqvrGI1bM25Fi9YcwDr0,34078 +pandas/core/internals/managers.py,sha256=IAvFGxA_7HF38mMzrEBodbBl5iMWqRWGnKu9vDgEC58,72172 +pandas/core/internals/ops.py,sha256=lUW0kpC4jeYHic5-YG806PgI6sHSb8_jZ-WPW1THGfg,4941 +pandas/core/missing.py,sha256=tfadpCvDMVB13hSXtHQw49-qXsg99903MkAh9qseUfY,29921 +pandas/core/nanops.py,sha256=OS5xVI4c9VUhWlTihEHkQrCs5vG4Z_e1qMr3KeOrXUM,50002 +pandas/core/ops/__init__.py,sha256=8XwzsmR0Wnsd7HHsGmpuSSKSldPuEC5KDQFNU5YzaYc,14232 +pandas/core/ops/__pycache__/__init__.cpython-38.pyc,, +pandas/core/ops/__pycache__/array_ops.cpython-38.pyc,, +pandas/core/ops/__pycache__/common.cpython-38.pyc,, +pandas/core/ops/__pycache__/dispatch.cpython-38.pyc,, +pandas/core/ops/__pycache__/docstrings.cpython-38.pyc,, +pandas/core/ops/__pycache__/invalid.cpython-38.pyc,, +pandas/core/ops/__pycache__/mask_ops.cpython-38.pyc,, +pandas/core/ops/__pycache__/methods.cpython-38.pyc,, +pandas/core/ops/__pycache__/missing.cpython-38.pyc,, +pandas/core/ops/array_ops.py,sha256=PI70iGkc_y7Gjvm6_8YzowfGaail7jIiYGfewWLeLZE,16496 +pandas/core/ops/common.py,sha256=JiuE7dwwHt7F_9NHxyUOvwbST9FwJtHivXGxLGSpBEA,3319 +pandas/core/ops/dispatch.py,sha256=G2SEvrbKI5iIg0sPBCOxk4s3c-kVBZ7jDlAyL-jHEFI,549 +pandas/core/ops/docstrings.py,sha256=35toXs8AgU-PxEwav_ygdbJw-3aqvEFDlKYtYNOYzBM,17704 +pandas/core/ops/invalid.py,sha256=sUm_2E_QJhW9kiDWlvcDQjM2U8OdUgPEj2Dv2iQE8aM,1285 +pandas/core/ops/mask_ops.py,sha256=V_LETrfzNlMnjohMyM46h0yUKAbeeJNFS-UeN9CPIRA,5401 +pandas/core/ops/methods.py,sha256=SUG-alhoA2fLOPPluM4kJZYdNPUf13SBceLSf5M6rG4,3687 +pandas/core/ops/missing.py,sha256=0FfkoXC1YnGlyOBo2lRQVSdN1IHjl7b6hvdgxgT8KR0,5068 +pandas/core/resample.py,sha256=lN--_fBD93dBCoHICk0jVjMV2bNNL9w08BdWvTKSKjg,68577 +pandas/core/reshape/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/core/reshape/__pycache__/__init__.cpython-38.pyc,, +pandas/core/reshape/__pycache__/api.cpython-38.pyc,, +pandas/core/reshape/__pycache__/concat.cpython-38.pyc,, +pandas/core/reshape/__pycache__/melt.cpython-38.pyc,, +pandas/core/reshape/__pycache__/merge.cpython-38.pyc,, +pandas/core/reshape/__pycache__/pivot.cpython-38.pyc,, +pandas/core/reshape/__pycache__/reshape.cpython-38.pyc,, +pandas/core/reshape/__pycache__/tile.cpython-38.pyc,, +pandas/core/reshape/__pycache__/util.cpython-38.pyc,, +pandas/core/reshape/api.py,sha256=FYgxOCxrq5VaPqNU6sfqRr2dT0J251b3Qg7yWm4iNfo,434 +pandas/core/reshape/concat.py,sha256=hkmLbK2xB-nMCrXpOB4S2YpLzmrbe8xybu4JZVEf0sw,24463 +pandas/core/reshape/melt.py,sha256=hQnc7SkJHg_uxnHkO0me_AMCLOz_itZfpT5bcEW44CI,18587 +pandas/core/reshape/merge.py,sha256=ZEL_mGj1tQt_8U2Lmc1pZIkiNdeOR9-0WodbvRLVWgg,83947 +pandas/core/reshape/pivot.py,sha256=-OwcUOiJef_S-yhbgsQp40SqI3y-QzVyhDcXNRWyqO8,26741 +pandas/core/reshape/reshape.py,sha256=saJd_2g-rKeuClSWGJGKm_g696cawXHS8BfRo93M7rY,37977 +pandas/core/reshape/tile.py,sha256=zeRhXp-EM0M7SkvEGFVt5F_DTsaBKUN_BduILiziFrI,21347 +pandas/core/reshape/util.py,sha256=Gik84iHEE2Gz7G0hWcbBFCCu466NpFMO-Brcsx6Ilyk,1662 +pandas/core/roperator.py,sha256=F8ULcax62DJD-0JEax2vBsIGf5vBLvuKhDjQR244AUc,1080 +pandas/core/sample.py,sha256=-SPkkFx3OYZhkni5YPefgqcFTELIxIDppxwqZ36NR1s,4582 +pandas/core/series.py,sha256=ag-00Olivc9lZu4VvnFUeNWNCAE4w_JRZbUpYaL352Y,173431 +pandas/core/shared_docs.py,sha256=yC1m_nnF5RA5gyZSvxrlPskJuCkfeho_tjLHTxl32lY,22432 +pandas/core/sorting.py,sha256=Fwto5XYke_q8OFVVBzMhGC9THd8hGSbuoHkGEQ-Cuh0,21750 +pandas/core/sparse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/core/sparse/__pycache__/__init__.cpython-38.pyc,, +pandas/core/sparse/__pycache__/api.cpython-38.pyc,, +pandas/core/sparse/api.py,sha256=BvV1Hnk7kvp34mDdElwwq_nuyAHNGvkMrVqwtpjDDTM,118 +pandas/core/strings/__init__.py,sha256=MRrMW5YeR-1DlROOhDSvR0Y9V59-LO3IdFUe000RyMA,1250 +pandas/core/strings/__pycache__/__init__.cpython-38.pyc,, +pandas/core/strings/__pycache__/accessor.cpython-38.pyc,, +pandas/core/strings/__pycache__/base.cpython-38.pyc,, +pandas/core/strings/__pycache__/object_array.cpython-38.pyc,, +pandas/core/strings/accessor.py,sha256=VI9LvdJDzYwyGoasTz6eJeCRP-cfzi3OVMt9hOVz3sk,104868 +pandas/core/strings/base.py,sha256=au74R2e--W7ZE4e1TVoguNU8nCj2z5fVxb5MRK7Yvps,5225 +pandas/core/strings/object_array.py,sha256=rFxOR4T8ItVWAXTXc8vSukyz_QSgGdIEhEN6oIxXpag,15136 +pandas/core/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/core/tools/__pycache__/__init__.cpython-38.pyc,, +pandas/core/tools/__pycache__/datetimes.cpython-38.pyc,, +pandas/core/tools/__pycache__/numeric.cpython-38.pyc,, +pandas/core/tools/__pycache__/timedeltas.cpython-38.pyc,, +pandas/core/tools/__pycache__/times.cpython-38.pyc,, +pandas/core/tools/datetimes.py,sha256=a2ORCF7iK1uW3mUocju6jKc-vEGNG3_BJIIzmusdZLs,42538 +pandas/core/tools/numeric.py,sha256=rXA58Ql5lpg5skUwmi6Skd9HCn6oWttjyeQQ2ZpuOK4,8044 +pandas/core/tools/timedeltas.py,sha256=qy8NFB3iURnDfYtMmHyoqKbvIm7H2hRcD2h1LtNLvd0,6620 +pandas/core/tools/times.py,sha256=T_oo50MxO5uQHzaFNssduSo8uTmlQpvZ48CqPdT24Tc,4626 +pandas/core/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/core/util/__pycache__/__init__.cpython-38.pyc,, +pandas/core/util/__pycache__/hashing.cpython-38.pyc,, +pandas/core/util/__pycache__/numba_.cpython-38.pyc,, +pandas/core/util/hashing.py,sha256=WnOFGnAh6lSB3p6b5XQJ9pEB0iU96upbtzbrNk9oAOU,10004 +pandas/core/util/numba_.py,sha256=hwNJuLyu0M-8RzqK1Rs00ZLQKNwUVInowE6MujXeeIE,2952 +pandas/core/window/__init__.py,sha256=EyuCrcV6mULDy0fer5q3GqV9kCnyz2NHGIUlYv_TOb8,313 +pandas/core/window/__pycache__/__init__.cpython-38.pyc,, +pandas/core/window/__pycache__/common.cpython-38.pyc,, +pandas/core/window/__pycache__/doc.cpython-38.pyc,, +pandas/core/window/__pycache__/ewm.cpython-38.pyc,, +pandas/core/window/__pycache__/expanding.cpython-38.pyc,, +pandas/core/window/__pycache__/numba_.cpython-38.pyc,, +pandas/core/window/__pycache__/online.cpython-38.pyc,, +pandas/core/window/__pycache__/rolling.cpython-38.pyc,, +pandas/core/window/common.py,sha256=I_dhvPg2lRnAHXPLvQmsGNkDlm9rAX6G_spRfsNhycs,6578 +pandas/core/window/doc.py,sha256=gSrskssIHANeKQOYcbgpf9GN_-iEJuUrkiJVtcc7G20,4347 +pandas/core/window/ewm.py,sha256=mTocIscSs3sjQk-eKGavr-Ky6gkUe11a-CST2ftACsE,33793 +pandas/core/window/expanding.py,sha256=xPO-9NKMGFT02yNLaSHIACnYOqZJQFIKayFHevZeJ4g,23648 +pandas/core/window/numba_.py,sha256=yjBmUkAuUXYYyC6an9pZz4_LFExOHl19dUaWHRsxVvU,11444 +pandas/core/window/online.py,sha256=VuXuxCFkvUFS3ELLex-VmDWeCNvashzCdnI7XuyJko4,3865 +pandas/core/window/rolling.py,sha256=56HfxABBcHdGfVa_0BSJJ6_PSoHakiv0VnyzAV6s86k,85820 +pandas/errors/__init__.py,sha256=cAQ6GhSwaFD8k1g1rp4kQ73QJreUZvuTHIUCKhTcPLk,6844 +pandas/errors/__pycache__/__init__.cpython-38.pyc,, +pandas/io/__init__.py,sha256=58NuRnRkk46Sf1NVp_uCTeoXswytQcbiumfP_F1em6o,276 +pandas/io/__pycache__/__init__.cpython-38.pyc,, +pandas/io/__pycache__/api.cpython-38.pyc,, +pandas/io/__pycache__/clipboards.cpython-38.pyc,, +pandas/io/__pycache__/common.cpython-38.pyc,, +pandas/io/__pycache__/date_converters.cpython-38.pyc,, +pandas/io/__pycache__/feather_format.cpython-38.pyc,, +pandas/io/__pycache__/gbq.cpython-38.pyc,, +pandas/io/__pycache__/html.cpython-38.pyc,, +pandas/io/__pycache__/orc.cpython-38.pyc,, +pandas/io/__pycache__/parquet.cpython-38.pyc,, +pandas/io/__pycache__/pickle.cpython-38.pyc,, +pandas/io/__pycache__/pytables.cpython-38.pyc,, +pandas/io/__pycache__/spss.cpython-38.pyc,, +pandas/io/__pycache__/sql.cpython-38.pyc,, +pandas/io/__pycache__/stata.cpython-38.pyc,, +pandas/io/__pycache__/xml.cpython-38.pyc,, +pandas/io/api.py,sha256=Bz4VM2-dl5tdpdumMrRG7LZzZLm6pfH4k9mWLBDO1bg,838 +pandas/io/clipboard/__init__.py,sha256=1ag82yNHjAL-nsWGrYfSv1Bfk3leEhjdqU88gNt4AFE,21671 +pandas/io/clipboard/__pycache__/__init__.cpython-38.pyc,, +pandas/io/clipboards.py,sha256=I16OJXlcMn4NVjC8TcGd3nbOD7mruTGmkeifLgsjeK8,4705 +pandas/io/common.py,sha256=5tU2pIB0pVbDEQw3GtwrDtuhCbcG-0Q2KrXydEbtubQ,37806 +pandas/io/date_converters.py,sha256=RuPGWSnpKlgFjViz5z-cJycZZhbiH_mJMFeHBHFia2g,3714 +pandas/io/excel/__init__.py,sha256=tR4_jcNCr-nWEgX7gqfk-p9FDAlnssikwPWPwHZC64Q,578 +pandas/io/excel/__pycache__/__init__.cpython-38.pyc,, +pandas/io/excel/__pycache__/_base.cpython-38.pyc,, +pandas/io/excel/__pycache__/_odfreader.cpython-38.pyc,, +pandas/io/excel/__pycache__/_odswriter.cpython-38.pyc,, +pandas/io/excel/__pycache__/_openpyxl.cpython-38.pyc,, +pandas/io/excel/__pycache__/_pyxlsb.cpython-38.pyc,, +pandas/io/excel/__pycache__/_util.cpython-38.pyc,, +pandas/io/excel/__pycache__/_xlrd.cpython-38.pyc,, +pandas/io/excel/__pycache__/_xlsxwriter.cpython-38.pyc,, +pandas/io/excel/__pycache__/_xlwt.cpython-38.pyc,, +pandas/io/excel/_base.py,sha256=GijXFkwkzwtFCZ68T1Mugg1NlF9zhCciLhn7KR4qWzM,53441 +pandas/io/excel/_odfreader.py,sha256=ejtHIyUv7b_woAUCU51rsfAPb_PLy2sgbH3Xbde2l4w,7609 +pandas/io/excel/_odswriter.py,sha256=ZdCFFqZFHaQp1qQjZSk2JsY-K1dAWxEDPpJJdXCweXw,9743 +pandas/io/excel/_openpyxl.py,sha256=HaNHBQUIbVfINPD_PAR8NMcQlN3-i2wqSpzG_ivHu7o,18554 +pandas/io/excel/_pyxlsb.py,sha256=HsLjLDQOOYsr5cwpfZhc7mnV1baqpKsIJmhDG7Js8ys,3777 +pandas/io/excel/_util.py,sha256=wvTANKqPA9PQvJVZpldiBjMI6I3_7t-8jk_sZgGR7lI,8150 +pandas/io/excel/_xlrd.py,sha256=sipf2POf3jw8OhhyTzhIXRcMdvzg_EcjT0pj32urE_k,3715 +pandas/io/excel/_xlsxwriter.py,sha256=_MFADTkc5Xk2BrKfF0YG0KRgkpsD2l-h3hI2V6PQK-k,8327 +pandas/io/excel/_xlwt.py,sha256=Qb-K_mL8zVQQovaPYoMoVbbY2F2bj3EACBczlagyINw,5146 +pandas/io/feather_format.py,sha256=3jJV93_L39C1cxwq55t9h_b_uEl_FkWZbltzAkW1Qdo,3781 +pandas/io/formats/__init__.py,sha256=mZhXDEIgsvq9e1MyIAyQJymEW530ej1WGh40Emc6g98,217 +pandas/io/formats/__pycache__/__init__.cpython-38.pyc,, +pandas/io/formats/__pycache__/_color_data.cpython-38.pyc,, +pandas/io/formats/__pycache__/console.cpython-38.pyc,, +pandas/io/formats/__pycache__/css.cpython-38.pyc,, +pandas/io/formats/__pycache__/csvs.cpython-38.pyc,, +pandas/io/formats/__pycache__/excel.cpython-38.pyc,, +pandas/io/formats/__pycache__/format.cpython-38.pyc,, +pandas/io/formats/__pycache__/html.cpython-38.pyc,, +pandas/io/formats/__pycache__/info.cpython-38.pyc,, +pandas/io/formats/__pycache__/latex.cpython-38.pyc,, +pandas/io/formats/__pycache__/printing.cpython-38.pyc,, +pandas/io/formats/__pycache__/string.cpython-38.pyc,, +pandas/io/formats/__pycache__/style.cpython-38.pyc,, +pandas/io/formats/__pycache__/style_render.cpython-38.pyc,, +pandas/io/formats/__pycache__/xml.cpython-38.pyc,, +pandas/io/formats/_color_data.py,sha256=nZOC4hv88N33FbfZBUfd_-Chd59Fxxw7xkPujdZFOu0,4296 +pandas/io/formats/console.py,sha256=QLAW-EoHY27tn7md7_mGbq-POOfIhMVSww5Gw2WHIUU,2666 +pandas/io/formats/css.py,sha256=1IiWdIZD031JWKWH6-gxh1TVZRvCDx9UIqG9VrdMsX8,8805 +pandas/io/formats/csvs.py,sha256=zZM6mHu79VGvlVoonkAy_lPNja-kntPoe0_c5EYt62s,10269 +pandas/io/formats/excel.py,sha256=emnDB9KuIiezfEbPac2ZZMnLP3tE_05ujh6eP-MvGJs,30988 +pandas/io/formats/format.py,sha256=M4omogmK7JjaWYcahcpLG9g6ABH6lWQbvZbWDDoxcJc,68756 +pandas/io/formats/html.py,sha256=IbUEK7xZWwkp9t3F_CjrDo5kGkGruaO9cC44vpvR5RQ,23252 +pandas/io/formats/info.py,sha256=GyHS9qPOnZB0zQXHZO_m2gJWU8zjWO0GfTNmO_JqEbs,32834 +pandas/io/formats/latex.py,sha256=Ibd9t_BShP8e3HoyC5DH7-ixaqcvv7Nz2mDwLfzuZEk,25068 +pandas/io/formats/printing.py,sha256=7ErAORHgcqX_UTmnfoN_G0NdXtfjjKnQYZdvI0YgANg,15952 +pandas/io/formats/string.py,sha256=HJ_bzrM_pEEdOSDd06GXsvYUVX7Jv-v_Jm7k4TDeQz8,6800 +pandas/io/formats/style.py,sha256=LOl5G8tOI4OH61qVCC3WmShfX2jOdTQd6aSKFjDBjnw,144744 +pandas/io/formats/style_render.py,sha256=zqCddMOooENUIlfg3VMS_q4ADyDo1OLD005jFk97pxs,74489 +pandas/io/formats/templates/html.tpl,sha256=KA-w_npfnHM_1c5trtJtkd3OD9j8hqtoQAY4GCC5UgI,412 +pandas/io/formats/templates/html_style.tpl,sha256=_gCqktLyUGAo5TzL3I-UCp1Njj8KyeLCWunHz4nYHsE,694 +pandas/io/formats/templates/html_table.tpl,sha256=MJxwJFwOa4KNli-ix7vYAGjRzw59FLAmYKHMy9nC32k,1811 +pandas/io/formats/templates/latex.tpl,sha256=m-YMxqKVJ52kLd61CA9V2MiC_Dtwwa-apvU8YtH8TYU,127 +pandas/io/formats/templates/latex_longtable.tpl,sha256=Rbe50DRsF5ZZMHbmL6oA0RsXwJPhHurzUdi_Lw5x2w0,2883 +pandas/io/formats/templates/latex_table.tpl,sha256=YNvnvjtwYXrWFVXndQZdJqKFIXYTUj8f1YOUdMmxXmQ,2221 +pandas/io/formats/xml.py,sha256=dvsfS1JJIRy3siOOFnNukk8Nk5u9NPW28tSuvXmu8Xo,16395 +pandas/io/gbq.py,sha256=yvkbrpU63fGHVSmh-Knf-L2V6cZVaeySyleipAJI3VQ,8087 +pandas/io/html.py,sha256=OnVPg_9xCgOVbwmlNUtTC2pf2O_WkazX_YLHn6jTV48,35340 +pandas/io/json/__init__.py,sha256=v1Lbz22eRjS0cqR79fFd245X_IS1IR86qISUfpigiBY,374 +pandas/io/json/__pycache__/__init__.cpython-38.pyc,, +pandas/io/json/__pycache__/_json.cpython-38.pyc,, +pandas/io/json/__pycache__/_normalize.cpython-38.pyc,, +pandas/io/json/__pycache__/_table_schema.cpython-38.pyc,, +pandas/io/json/_json.py,sha256=fQI22Tz2kmI0FHqmBQ0MDiUh5NwQaSZZb5qzPGEqS0A,38362 +pandas/io/json/_normalize.py,sha256=ArqvQ--qW8uZ2XhQiusIpdByHhzB36Sxp5aFnSSOrrg,17452 +pandas/io/json/_table_schema.py,sha256=REm7ovwpiSRZbwqYEQCE1cVwC2JtgYxhNZuRBCZpPR4,10496 +pandas/io/orc.py,sha256=OT7jX6ow5Yst7D8JCi7kHeA9Jl4Cbb-hx-fwQGYsyZg,1630 +pandas/io/parquet.py,sha256=6tmMxfQQSV3zTmRPoJqXGsxNtL9pG3DdAKNWvArE-vs,17254 +pandas/io/parsers/__init__.py,sha256=7BLx4kn9y5ipgfZUWZ4y_MLEUNgX6MQ5DyDwshhJxVM,204 +pandas/io/parsers/__pycache__/__init__.cpython-38.pyc,, +pandas/io/parsers/__pycache__/arrow_parser_wrapper.cpython-38.pyc,, +pandas/io/parsers/__pycache__/base_parser.cpython-38.pyc,, +pandas/io/parsers/__pycache__/c_parser_wrapper.cpython-38.pyc,, +pandas/io/parsers/__pycache__/python_parser.cpython-38.pyc,, +pandas/io/parsers/__pycache__/readers.cpython-38.pyc,, +pandas/io/parsers/arrow_parser_wrapper.py,sha256=xFCYztQZFIGee6buUGMiinUclqjXdSUF2mkHJKD4b-g,5780 +pandas/io/parsers/base_parser.py,sha256=CXfICjPu0FaUiwfPrGk6ofgq5m7XoxQctV-VBg8P1GU,45213 +pandas/io/parsers/c_parser_wrapper.py,sha256=g0d_5ArglHdgEUkoZqKFA3Ls3GaDUJl1QgOdn8j61ic,14833 +pandas/io/parsers/python_parser.py,sha256=C6GL1xf33refDZiORJ-Frb-2CzqqYXSbghx1MRGbAac,45990 +pandas/io/parsers/readers.py,sha256=K-Eq-b4dE8LINcqEG5N053QrrjsxhfiB0IiG0tLpQJk,62297 +pandas/io/pickle.py,sha256=h5W8H1mZB1nr3qjpXTTq8KQQoUScILxp4-xbwF-4G-Y,6925 +pandas/io/pytables.py,sha256=bXRP7o3S3pFoc6KbIBVX4dhDaoW19g_-vzk2_5BOOf8,169622 +pandas/io/sas/__init__.py,sha256=xyAIUVLKsy4cFMtXjMmg2mjI9pkaB41ZLH1gsg3x8e0,58 +pandas/io/sas/__pycache__/__init__.cpython-38.pyc,, +pandas/io/sas/__pycache__/sas7bdat.cpython-38.pyc,, +pandas/io/sas/__pycache__/sas_constants.cpython-38.pyc,, +pandas/io/sas/__pycache__/sas_xport.cpython-38.pyc,, +pandas/io/sas/__pycache__/sasreader.cpython-38.pyc,, +pandas/io/sas/_sas.cpython-38-x86_64-linux-gnu.so,sha256=E6QvOJIeeXJY40jG-KQ24C3AdvKU_w2NodyH2hx_XJc,230856 +pandas/io/sas/sas.pyx,sha256=rXCu2Y6uwHOMvNm5CmD0_LvYQLXywYeL9bph7WvdXv4,15358 +pandas/io/sas/sas7bdat.py,sha256=xBF3IDG8YitCHGhfX3DiZV4L0wqPXGREOT94fEXjOMw,30255 +pandas/io/sas/sas_constants.py,sha256=1osy4oIK4siNYqILPpHOmPqrDFhpZL8c06ywvGFEtmk,6731 +pandas/io/sas/sas_xport.py,sha256=ehc4w9sCi9vgh8iK8oaiRvCi__GGUv3cZBuCrpbGh1I,14621 +pandas/io/sas/sasreader.py,sha256=dFIjZoieb-SAIU6X_vI7njH1d2S6wBvvhVhGz7Gu-78,4265 +pandas/io/spss.py,sha256=wsp5zkThM0Ay2NIZFEhUO2Z76lqGZTDJJX2kQtckSFI,1265 +pandas/io/sql.py,sha256=QkenOB_sR-WKPru7qzyOYBKX5z-WuoB9uI8-zpVq5MM,75039 +pandas/io/stata.py,sha256=gVG9RzpToktKEx1wkceJgYdWkjN_R4x94ejvnsK0MvM,130709 +pandas/io/xml.py,sha256=S4xu6r2XAQj8byHjrWelhmIbwoiSnFiE830ETuKcqQA,30108 +pandas/plotting/__init__.py,sha256=W_2wP9v02mNCK4lV5ekG1iJHYSF8dD1NbByJiNq3g8I,2826 +pandas/plotting/__pycache__/__init__.cpython-38.pyc,, +pandas/plotting/__pycache__/_core.cpython-38.pyc,, +pandas/plotting/__pycache__/_misc.cpython-38.pyc,, +pandas/plotting/_core.py,sha256=c4i7KxwaYeSZ6XgqvFmKKDVFfHlc74-sKzDJ4Bxm7_M,63101 +pandas/plotting/_matplotlib/__init__.py,sha256=jGq_ouunQTV3zzX_crl9kCVX2ztk1p62McqD2WVRnAk,2044 +pandas/plotting/_matplotlib/__pycache__/__init__.cpython-38.pyc,, +pandas/plotting/_matplotlib/__pycache__/boxplot.cpython-38.pyc,, +pandas/plotting/_matplotlib/__pycache__/compat.cpython-38.pyc,, +pandas/plotting/_matplotlib/__pycache__/converter.cpython-38.pyc,, +pandas/plotting/_matplotlib/__pycache__/core.cpython-38.pyc,, +pandas/plotting/_matplotlib/__pycache__/groupby.cpython-38.pyc,, +pandas/plotting/_matplotlib/__pycache__/hist.cpython-38.pyc,, +pandas/plotting/_matplotlib/__pycache__/misc.cpython-38.pyc,, +pandas/plotting/_matplotlib/__pycache__/style.cpython-38.pyc,, +pandas/plotting/_matplotlib/__pycache__/timeseries.cpython-38.pyc,, +pandas/plotting/_matplotlib/__pycache__/tools.cpython-38.pyc,, +pandas/plotting/_matplotlib/boxplot.py,sha256=l9RT5Mf49LSIOBq-liz-b94bDq7uZXO47V4aPvkaETA,15884 +pandas/plotting/_matplotlib/compat.py,sha256=BS7h8xETbzxKugJAeVQWfDk2l-aYUqkGbiqqB7tXZ54,739 +pandas/plotting/_matplotlib/converter.py,sha256=5hMRTg1cezmeEoIVhQfEIjhdcqLozCZLvuwMzdQgiLo,35779 +pandas/plotting/_matplotlib/core.py,sha256=KFHbUAamBPnPBRGrVSqu4kj9AChwcOp5mFSJA2cDxZg,57168 +pandas/plotting/_matplotlib/groupby.py,sha256=CWmElzyGgHnVMYlsevuyA1DjK1sc0WP8W8Wlf15b-CE,3954 +pandas/plotting/_matplotlib/hist.py,sha256=kSjGUHS-816VbvYFtnYcZwhiqUmOqqKXtiaCviuvnwQ,13509 +pandas/plotting/_matplotlib/misc.py,sha256=5wug6BVy4kIqOBcyXV6fwNpDFZsVlILfEDJ_Ll2oVlk,13107 +pandas/plotting/_matplotlib/style.py,sha256=ai0qcXw-mY8E8w1jOQlhIUU3kS_1xtk-BWgSj6yXPOI,8065 +pandas/plotting/_matplotlib/timeseries.py,sha256=JZ83oawbBkp8lA7XoVqLTd_kuAItxrT8l2bR3IDDg6E,10094 +pandas/plotting/_matplotlib/tools.py,sha256=OXpqvfc7ypPVGqrmS-sk9GBQRCVTwQKCEzHCeviqIDE,15319 +pandas/plotting/_misc.py,sha256=Dz2FO6BzX4-nqUsvUszHluwkpRBbNRuWZStaE4qPg8c,17111 +pandas/testing.py,sha256=3XTHuY440lezW7rxw4LW9gfxzDEa7s0l16cdnkRYwwM,313 +pandas/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/__pycache__/test_aggregation.cpython-38.pyc,, +pandas/tests/__pycache__/test_algos.cpython-38.pyc,, +pandas/tests/__pycache__/test_common.cpython-38.pyc,, +pandas/tests/__pycache__/test_downstream.cpython-38.pyc,, +pandas/tests/__pycache__/test_errors.cpython-38.pyc,, +pandas/tests/__pycache__/test_expressions.cpython-38.pyc,, +pandas/tests/__pycache__/test_flags.cpython-38.pyc,, +pandas/tests/__pycache__/test_multilevel.cpython-38.pyc,, +pandas/tests/__pycache__/test_nanops.cpython-38.pyc,, +pandas/tests/__pycache__/test_optional_dependency.cpython-38.pyc,, +pandas/tests/__pycache__/test_register_accessor.cpython-38.pyc,, +pandas/tests/__pycache__/test_sorting.cpython-38.pyc,, +pandas/tests/__pycache__/test_take.cpython-38.pyc,, +pandas/tests/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/api/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/api/__pycache__/test_api.cpython-38.pyc,, +pandas/tests/api/__pycache__/test_types.cpython-38.pyc,, +pandas/tests/api/test_api.py,sha256=YkrFymslWmTAYeuBo0rOnZUwZ5Sn9xKXqwlyITui6JA,8237 +pandas/tests/api/test_types.py,sha256=wrPqAJh903NUIoSKVRnIdpGsNXVz72KdoVnKnlY9yhY,1675 +pandas/tests/apply/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/apply/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/apply/__pycache__/common.cpython-38.pyc,, +pandas/tests/apply/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/apply/__pycache__/test_frame_apply.cpython-38.pyc,, +pandas/tests/apply/__pycache__/test_frame_apply_relabeling.cpython-38.pyc,, +pandas/tests/apply/__pycache__/test_frame_transform.cpython-38.pyc,, +pandas/tests/apply/__pycache__/test_invalid_arg.cpython-38.pyc,, +pandas/tests/apply/__pycache__/test_series_apply.cpython-38.pyc,, +pandas/tests/apply/__pycache__/test_series_apply_relabeling.cpython-38.pyc,, +pandas/tests/apply/__pycache__/test_series_transform.cpython-38.pyc,, +pandas/tests/apply/__pycache__/test_str.cpython-38.pyc,, +pandas/tests/apply/common.py,sha256=0vnX_I6cf49trqQxaoj9iqt33Z9kCG-Lg4uBnquvPL4,388 +pandas/tests/apply/conftest.py,sha256=mwVPfC41ZkqEOH6yccseMVDSniNWPsG1ePeO8VYlzsw,399 +pandas/tests/apply/test_frame_apply.py,sha256=6q4VuGVWF88tidsvZMgH5keuc0ceoXRAI4tZfIzvlTY,47731 +pandas/tests/apply/test_frame_apply_relabeling.py,sha256=CqaU6fuDa3zsYI64QlGTlXIHMdliulVo9tT9xy6l0Xw,3095 +pandas/tests/apply/test_frame_transform.py,sha256=e0jNM6zPL-K3zYCWRsKErG-nd7L6KBVCEPJmqkuJEgA,8405 +pandas/tests/apply/test_invalid_arg.py,sha256=89279nZ6bbINlyEhHXEs98Fve3ovCMV9HZBF4FmK_S4,10717 +pandas/tests/apply/test_series_apply.py,sha256=Unu5YQn5XvpP4DKbYDp_F2jjV-09r0xlgSuylA5rjZM,27906 +pandas/tests/apply/test_series_apply_relabeling.py,sha256=AMKpxNA0r-YvExVkO7KxkfQX9OhF8Orz2LARZzAcqBc,1202 +pandas/tests/apply/test_series_transform.py,sha256=S0sS72OrRoK5rfJVzNZerznc2pc-k3DTM0xhgJwlq4w,1474 +pandas/tests/apply/test_str.py,sha256=lUzAsfioAdCArgJAxpKnc0_MT3h30vjhoF0VcdBDkF8,9843 +pandas/tests/arithmetic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/arithmetic/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/arithmetic/__pycache__/common.cpython-38.pyc,, +pandas/tests/arithmetic/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/arithmetic/__pycache__/test_array_ops.cpython-38.pyc,, +pandas/tests/arithmetic/__pycache__/test_categorical.cpython-38.pyc,, +pandas/tests/arithmetic/__pycache__/test_datetime64.cpython-38.pyc,, +pandas/tests/arithmetic/__pycache__/test_interval.cpython-38.pyc,, +pandas/tests/arithmetic/__pycache__/test_numeric.cpython-38.pyc,, +pandas/tests/arithmetic/__pycache__/test_object.cpython-38.pyc,, +pandas/tests/arithmetic/__pycache__/test_period.cpython-38.pyc,, +pandas/tests/arithmetic/__pycache__/test_timedelta64.cpython-38.pyc,, +pandas/tests/arithmetic/common.py,sha256=xe1bvztTFFpxZ0on4xXgtNh71524CqXVo513kKpIHSc,4338 +pandas/tests/arithmetic/conftest.py,sha256=klMFfEHg8l0d3x42yQyck33m8KvErhhOCP6t-FRx-x4,5857 +pandas/tests/arithmetic/test_array_ops.py,sha256=4lmZRZAlbJEnphzzwfcvsO4kEv1LG9l3uCmaF_8kcAA,1064 +pandas/tests/arithmetic/test_categorical.py,sha256=lK5fXv4cRIu69ocvOHfKL5bjeK0jDdW3psvrrssjDoA,742 +pandas/tests/arithmetic/test_datetime64.py,sha256=bIO1H9aTvmxtNMMvE5xxEje-XFowO8UsNhAWd1BnjSU,87523 +pandas/tests/arithmetic/test_interval.py,sha256=al3PonzMi1PofM--95FOoJ3B2ouIM7A0e76pAYMBnE4,11261 +pandas/tests/arithmetic/test_numeric.py,sha256=c5bMaqqhQzUtJNf-15ltwpp3VjNjva4c50XiYud926c,49184 +pandas/tests/arithmetic/test_object.py,sha256=47vE1ImJ5jm9-acKCpQUopLWnKm4NvVdm2FsQVS1-OM,12150 +pandas/tests/arithmetic/test_period.py,sha256=8v0eCHYCOsyr2L4Qm2fTc_Ycx13rr5tNbTka_Ui_mDc,55539 +pandas/tests/arithmetic/test_timedelta64.py,sha256=Qxdt36osEB8sak2-1-As3gT2dJpaUA_iGZZKLg2vY7A,75728 +pandas/tests/arrays/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/arrays/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/arrays/__pycache__/masked_shared.cpython-38.pyc,, +pandas/tests/arrays/__pycache__/test_array.cpython-38.pyc,, +pandas/tests/arrays/__pycache__/test_datetimelike.cpython-38.pyc,, +pandas/tests/arrays/__pycache__/test_datetimes.cpython-38.pyc,, +pandas/tests/arrays/__pycache__/test_ndarray_backed.cpython-38.pyc,, +pandas/tests/arrays/__pycache__/test_numpy.cpython-38.pyc,, +pandas/tests/arrays/__pycache__/test_period.cpython-38.pyc,, +pandas/tests/arrays/__pycache__/test_timedeltas.cpython-38.pyc,, +pandas/tests/arrays/boolean/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/arrays/boolean/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/arrays/boolean/__pycache__/test_arithmetic.cpython-38.pyc,, +pandas/tests/arrays/boolean/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/arrays/boolean/__pycache__/test_comparison.cpython-38.pyc,, +pandas/tests/arrays/boolean/__pycache__/test_construction.cpython-38.pyc,, +pandas/tests/arrays/boolean/__pycache__/test_function.cpython-38.pyc,, +pandas/tests/arrays/boolean/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/arrays/boolean/__pycache__/test_logical.cpython-38.pyc,, +pandas/tests/arrays/boolean/__pycache__/test_ops.cpython-38.pyc,, +pandas/tests/arrays/boolean/__pycache__/test_reduction.cpython-38.pyc,, +pandas/tests/arrays/boolean/__pycache__/test_repr.cpython-38.pyc,, +pandas/tests/arrays/boolean/test_arithmetic.py,sha256=jm3qzJb_fqZq-IKAutEmUc37jM96z-Nj0rkImVQ029Q,3586 +pandas/tests/arrays/boolean/test_astype.py,sha256=jeAO8vaP3wl-ztAhmctM90RIfXuP_XJc76dF7Su6aNQ,1603 +pandas/tests/arrays/boolean/test_comparison.py,sha256=2ne4LJheq4YjNhszCw1JmOW3iBRIOtu5Z6DC4laggd0,1865 +pandas/tests/arrays/boolean/test_construction.py,sha256=QMOK6Apb3EbljEjsle_UXRvsdQkPi0VK88qJp8H3QnY,12295 +pandas/tests/arrays/boolean/test_function.py,sha256=h0Z7PP2kiDneYAd6k_jFDTRPumFWlzD80UR7nh2OB8k,4013 +pandas/tests/arrays/boolean/test_indexing.py,sha256=BorrK8_ZJbN5HWcIX9fCP-BbTCaJsgAGUiza5IwhYr4,361 +pandas/tests/arrays/boolean/test_logical.py,sha256=5WEXDvl7gWT3mPCtorW1zhnzgo3U22muxxUPXioPUXo,9346 +pandas/tests/arrays/boolean/test_ops.py,sha256=iM_FRYMtvvdEpMtLUSuBd_Ww5nHr284v2fRxHaydvIM,975 +pandas/tests/arrays/boolean/test_reduction.py,sha256=KJx4QD5sy94U0TrGuugFzBnp4XtjFahHsSQZwMY4KUo,2017 +pandas/tests/arrays/boolean/test_repr.py,sha256=RRljPIDi6jDNhUdbjKMc75Mst-wm92l-H6b5Y-lCCJA,437 +pandas/tests/arrays/categorical/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/arrays/categorical/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/common.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_algos.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_analytics.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_api.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_dtypes.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_missing.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_operators.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_replace.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_repr.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_sorting.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_subclass.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_take.cpython-38.pyc,, +pandas/tests/arrays/categorical/__pycache__/test_warnings.cpython-38.pyc,, +pandas/tests/arrays/categorical/common.py,sha256=lQ8b1Pb6EWPTcKE-PQ9cymIcuDWYzH1JDyxmU6PaOi4,204 +pandas/tests/arrays/categorical/conftest.py,sha256=AmIOXNVnHCRF2-kO0cLEU6YDahNTFvUrHAbYSv3HxrA,166 +pandas/tests/arrays/categorical/test_algos.py,sha256=0bNc6nJ4oaD-RJAa6D4RX4TJpHjd9-PZ9LF8DIK8pG4,2589 +pandas/tests/arrays/categorical/test_analytics.py,sha256=DyDw0MIQpSI_Et_jZs7gxcOB2PDGMVbWYZF1tmsmnOk,14142 +pandas/tests/arrays/categorical/test_api.py,sha256=0866kT2xLPHkWJyjPztpa2X7-HiglKEuvc0HII0bLtM,21897 +pandas/tests/arrays/categorical/test_astype.py,sha256=tfqm3mi5PYsMRrQ1oiLumgCuv8riG0xw8kbRYtds9Nc,3170 +pandas/tests/arrays/categorical/test_constructors.py,sha256=rLhJvvq67fFIm3S1ZoE9SQHwJfvfy-6y1C-6qD3oll0,29503 +pandas/tests/arrays/categorical/test_dtypes.py,sha256=oCYgn69zkeKpVYllflhKZGNg968Mz8SVyXa1Jbs5axY,5358 +pandas/tests/arrays/categorical/test_indexing.py,sha256=T4fEZB4a-7rtgw9GXXn2bz9aUmI06C01hXBS3yyU6oc,12721 +pandas/tests/arrays/categorical/test_missing.py,sha256=EvNp2EN7Q3MS6a6F7ONJky32Pd3SZ-z2i8NQFi_L0_g,7502 +pandas/tests/arrays/categorical/test_operators.py,sha256=wIAsQHw9wttMwDfSmfoNirv0aDcYMZkSDF9D8Kftq1c,15474 +pandas/tests/arrays/categorical/test_replace.py,sha256=oP_vkFO8uLfG2K0wKa5HDDFEUu5vGdcFuRndE3hPX_Q,2578 +pandas/tests/arrays/categorical/test_repr.py,sha256=K9gzSdyrBnXj0ekveoPr0SzyJoya68yXRBqvEl6sTeA,26450 +pandas/tests/arrays/categorical/test_sorting.py,sha256=uOH_8gepRUYWd9OzQxF-7UVDFZLtQXdK9YCfwdyaZJk,5053 +pandas/tests/arrays/categorical/test_subclass.py,sha256=v-VtvFFLSUt9zXoLOf7YLbda4q0NQVqRWck9x2qtXys,852 +pandas/tests/arrays/categorical/test_take.py,sha256=LHSSBfg8TbkTcZahf-8Qk0l721AOkV8M5tRsmQdHTgU,3652 +pandas/tests/arrays/categorical/test_warnings.py,sha256=7PNlvBCC0olHzx7ziQ42M_w5YEQaxPtqT0zfGj7IrP4,731 +pandas/tests/arrays/datetimes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/arrays/datetimes/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/arrays/datetimes/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/arrays/datetimes/__pycache__/test_reductions.cpython-38.pyc,, +pandas/tests/arrays/datetimes/test_constructors.py,sha256=ZJ3Wu_2FWb_j41YfG7XxYnx998oNrTUZRKSClNrBCd4,5546 +pandas/tests/arrays/datetimes/test_reductions.py,sha256=RcNGzUMWQVms-sXuDwB-qSLqeb6ow9lWQdAL02Gen0c,5431 +pandas/tests/arrays/floating/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/arrays/floating/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/arrays/floating/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/arrays/floating/__pycache__/test_arithmetic.cpython-38.pyc,, +pandas/tests/arrays/floating/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/arrays/floating/__pycache__/test_comparison.cpython-38.pyc,, +pandas/tests/arrays/floating/__pycache__/test_concat.cpython-38.pyc,, +pandas/tests/arrays/floating/__pycache__/test_construction.cpython-38.pyc,, +pandas/tests/arrays/floating/__pycache__/test_function.cpython-38.pyc,, +pandas/tests/arrays/floating/__pycache__/test_repr.cpython-38.pyc,, +pandas/tests/arrays/floating/__pycache__/test_to_numpy.cpython-38.pyc,, +pandas/tests/arrays/floating/conftest.py,sha256=0vIonTsibHqfOGpkEbyZ8WnySHC2GDJggDUHNDgK-VE,827 +pandas/tests/arrays/floating/test_arithmetic.py,sha256=iDazLwxIpjlgdh9oWkkfvXvA12Tc8OE84pBxGw7daaw,6578 +pandas/tests/arrays/floating/test_astype.py,sha256=5J3PbtjuiOg7IRXuTo1msRUSZdC30saTAtwVqDHcEUE,3779 +pandas/tests/arrays/floating/test_comparison.py,sha256=C-rwNTv5FtUvo3oWB8XNquCOa_XQHf6R9JRYX6JVAG0,2071 +pandas/tests/arrays/floating/test_concat.py,sha256=jTRivQyq4rJaWPioRDt-UL136Wap5rdpNP73vwY6HQQ,574 +pandas/tests/arrays/floating/test_construction.py,sha256=d2G7iPYWdjIsHDKe3Ilp5FyUt1UOr2NrPpxqFq5Wgec,6095 +pandas/tests/arrays/floating/test_function.py,sha256=E2TZV6qpgcTCZbCfBUakdJTzMtMm0lDuS7-ZhQd512A,6267 +pandas/tests/arrays/floating/test_repr.py,sha256=hCB-8kQMEuqMWlfH6dKIYwVkqfNnc4_-o1wT0dgX4tg,1158 +pandas/tests/arrays/floating/test_to_numpy.py,sha256=lPUL0Z47k3F0HVi58JIaEOOL7SMybuss0KOofD2E1hQ,4976 +pandas/tests/arrays/integer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/arrays/integer/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/arrays/integer/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/arrays/integer/__pycache__/test_arithmetic.cpython-38.pyc,, +pandas/tests/arrays/integer/__pycache__/test_comparison.cpython-38.pyc,, +pandas/tests/arrays/integer/__pycache__/test_concat.cpython-38.pyc,, +pandas/tests/arrays/integer/__pycache__/test_construction.cpython-38.pyc,, +pandas/tests/arrays/integer/__pycache__/test_dtypes.cpython-38.pyc,, +pandas/tests/arrays/integer/__pycache__/test_function.cpython-38.pyc,, +pandas/tests/arrays/integer/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/arrays/integer/__pycache__/test_repr.cpython-38.pyc,, +pandas/tests/arrays/integer/conftest.py,sha256=ZBGQ2ntL3FngqK6E3TqD64Dl1FeB4pXQTgoWQ__YxFI,1008 +pandas/tests/arrays/integer/test_arithmetic.py,sha256=aDGKfRkIuCeTHZItrI7K088bijcgqtsP3tVuE5EMtZ8,9481 +pandas/tests/arrays/integer/test_comparison.py,sha256=lnMPzaqTd5yF67n4_e5k9TcBINn4PaofK2AeHJaHzn4,1185 +pandas/tests/arrays/integer/test_concat.py,sha256=LUlEoVb7pCmZq8weLGrkEq7ez_BvChHQx6zQFc0GpBk,2131 +pandas/tests/arrays/integer/test_construction.py,sha256=7pbLuTvQEed7YRrk8UH40M88bb2mKYVV4eVbtV4hVeQ,7252 +pandas/tests/arrays/integer/test_dtypes.py,sha256=Hdxi6Rw1X4veZdgjHIhEbWF9Dh1U05cuSMfhVO67dkU,8874 +pandas/tests/arrays/integer/test_function.py,sha256=J21LsXup16bHzoPdXZDkvluVlAa_kr37MXeKg7PbUwk,6491 +pandas/tests/arrays/integer/test_indexing.py,sha256=rgwcafGbwJztl_N4CalvAnW6FKfKVNzJcE-RjcXMpR8,498 +pandas/tests/arrays/integer/test_repr.py,sha256=IYHuuU5lGBaeWg21vtwvH9Le9VRc-q2H8g53bP4rYWA,1653 +pandas/tests/arrays/interval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/arrays/interval/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/arrays/interval/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/arrays/interval/__pycache__/test_interval.cpython-38.pyc,, +pandas/tests/arrays/interval/__pycache__/test_ops.cpython-38.pyc,, +pandas/tests/arrays/interval/test_astype.py,sha256=8rb7rssqvIoSztzCfFb5pY4oIH_GjDStKrXkC6bnUZk,776 +pandas/tests/arrays/interval/test_interval.py,sha256=DYwaD0uEJH6zCtYYIXAXKc838Wp_hhqb3oo1K0tuOMM,11831 +pandas/tests/arrays/interval/test_ops.py,sha256=4QNJBVY5Fb150Rf3lS5a6p_ScHy8U-sAuWTWetbCmVc,3279 +pandas/tests/arrays/masked/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/arrays/masked/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/arrays/masked/__pycache__/test_arithmetic.cpython-38.pyc,, +pandas/tests/arrays/masked/__pycache__/test_arrow_compat.cpython-38.pyc,, +pandas/tests/arrays/masked/__pycache__/test_function.cpython-38.pyc,, +pandas/tests/arrays/masked/test_arithmetic.py,sha256=U7eMO1RxM91mtLDxewBMx2GOPicM16hGd3To0apnits,5699 +pandas/tests/arrays/masked/test_arrow_compat.py,sha256=VSX_gopuMKs8qeEH1ZtyspiXTji5xsYO-YXZJDVHK3Y,6617 +pandas/tests/arrays/masked/test_function.py,sha256=Bl8FUg_ST0JtPZgNX5lt_DYuKkWKTy0fI5irCu_kVIc,1182 +pandas/tests/arrays/masked_shared.py,sha256=31JHuB_H0lNd_gBJioMTzC_k6_7wI7KnB9Kvla1ZIXA,4471 +pandas/tests/arrays/period/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/arrays/period/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/arrays/period/__pycache__/test_arrow_compat.cpython-38.pyc,, +pandas/tests/arrays/period/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/arrays/period/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/arrays/period/__pycache__/test_reductions.cpython-38.pyc,, +pandas/tests/arrays/period/test_arrow_compat.py,sha256=XYYibESYFbLAJwxCd17HOqgIg6bPjzybyf07-h827iE,3399 +pandas/tests/arrays/period/test_astype.py,sha256=-Dhz41IjtcWQggn_KdOg4MiQdZ1JOR4OtFgPySPYJuI,2339 +pandas/tests/arrays/period/test_constructors.py,sha256=UT-bXHKR5Rv9VtTQUxozc_Ag9A8KnMX-T-z2CS07LHY,3923 +pandas/tests/arrays/period/test_reductions.py,sha256=gYiheQK3Z0Bwdo-0UaHIyfXGpmL1_UvoMP9FVIpztlM,1050 +pandas/tests/arrays/sparse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/arrays/sparse/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/arrays/sparse/__pycache__/test_accessor.cpython-38.pyc,, +pandas/tests/arrays/sparse/__pycache__/test_arithmetics.cpython-38.pyc,, +pandas/tests/arrays/sparse/__pycache__/test_array.cpython-38.pyc,, +pandas/tests/arrays/sparse/__pycache__/test_combine_concat.cpython-38.pyc,, +pandas/tests/arrays/sparse/__pycache__/test_dtype.cpython-38.pyc,, +pandas/tests/arrays/sparse/__pycache__/test_libsparse.cpython-38.pyc,, +pandas/tests/arrays/sparse/test_accessor.py,sha256=nh1i6HmlobxkKqiEnfGT4hTyC5u5MWepmBHeIt8jKjw,5646 +pandas/tests/arrays/sparse/test_arithmetics.py,sha256=o8KOV9DaEjcUQBo_AyTifGRq_M2uh0jpR9HbL9vvUi0,20644 +pandas/tests/arrays/sparse/test_array.py,sha256=6bZhSJ4HG18xG2LkNlicEFaIeMNt30XA3tU05tmmmU0,53598 +pandas/tests/arrays/sparse/test_combine_concat.py,sha256=3NMQXaRQc7Bxn5HhSHffcUE24GZi_VYflnFLnixOgbs,2651 +pandas/tests/arrays/sparse/test_dtype.py,sha256=FXAscZs0zwPvJgb4ISUsl2eJFkdJOcxiTEmstg4yjP4,5699 +pandas/tests/arrays/sparse/test_libsparse.py,sha256=-cfog6TPSrYKSLUMYnfyQiwQs1CR5lBIRTJiokbDErY,20912 +pandas/tests/arrays/string_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/arrays/string_/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/arrays/string_/__pycache__/test_string.cpython-38.pyc,, +pandas/tests/arrays/string_/__pycache__/test_string_arrow.cpython-38.pyc,, +pandas/tests/arrays/string_/test_string.py,sha256=YmNYwEui9QEQ0z9OERkZbn_TXuEXoVk6ce8zn69JYao,18145 +pandas/tests/arrays/string_/test_string_arrow.py,sha256=XEfhgnAqvKOzjZdR9nMpZx-xSB8zMPz3zalLRBTtwEg,4567 +pandas/tests/arrays/test_array.py,sha256=lFlGphdoqbyVb4Nwo68DWKwdg8CSQqxGDsjZjHtMbmY,14545 +pandas/tests/arrays/test_datetimelike.py,sha256=xpwZNowPbwWmvZH8_o_mE77SF_RfhNalEzjyOxCIHYs,48959 +pandas/tests/arrays/test_datetimes.py,sha256=vSGkb1FgiqEdhF3Zj1tVCr-clo52SNgF72Yste1urpo,14396 +pandas/tests/arrays/test_ndarray_backed.py,sha256=vJutd6Hl9FIIVu5AgzsN4Y3K2fX6F8QR7kPLFkMKEcY,2299 +pandas/tests/arrays/test_numpy.py,sha256=xFY08NEvn7aunUS_bgZ8MNKgHR8BoytAh_1QvDAOMU8,7558 +pandas/tests/arrays/test_period.py,sha256=saVJsVEWWeQpkfkobBhtLf2E0kOkm-p130nTKgdg1V0,4703 +pandas/tests/arrays/test_timedeltas.py,sha256=FLoZUX5S-dgW2YbRPCFBAyFAD51CwbqDpkBclNHjZ60,4091 +pandas/tests/arrays/timedeltas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/arrays/timedeltas/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/arrays/timedeltas/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/arrays/timedeltas/__pycache__/test_reductions.cpython-38.pyc,, +pandas/tests/arrays/timedeltas/test_constructors.py,sha256=s-93lf6klPbmoGr6jwxYQTHjX5iQS2Sc6sei3g3_Uko,2346 +pandas/tests/arrays/timedeltas/test_reductions.py,sha256=EfSgGzlOdC-UHaoNobvonemNxGvHTKv8ulAOcbm4nL4,6375 +pandas/tests/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/base/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/base/__pycache__/common.cpython-38.pyc,, +pandas/tests/base/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/base/__pycache__/test_conversion.cpython-38.pyc,, +pandas/tests/base/__pycache__/test_fillna.cpython-38.pyc,, +pandas/tests/base/__pycache__/test_misc.cpython-38.pyc,, +pandas/tests/base/__pycache__/test_transpose.cpython-38.pyc,, +pandas/tests/base/__pycache__/test_unique.cpython-38.pyc,, +pandas/tests/base/__pycache__/test_value_counts.cpython-38.pyc,, +pandas/tests/base/common.py,sha256=khOj_7jBGUvPKXyNDfTpkfJ56w38fTgtB0Sk9ueJcJw,252 +pandas/tests/base/test_constructors.py,sha256=TWcAU0xfnGBBBsF4WcZQ11AYOb1QELZzjJJD3HUs7RY,5078 +pandas/tests/base/test_conversion.py,sha256=dUC72PUO4ljAbLn4cswnLj6MdPfNiyM578Uep-s1UQs,15768 +pandas/tests/base/test_fillna.py,sha256=q9LZhUp2HXaVQw4wSxK0VU4Z9z62WI12r9ivsZu0gOg,1522 +pandas/tests/base/test_misc.py,sha256=p1hrSllVv13SvbXZZ4SOUO78GLs0tCMV8cVeSFEviBU,5736 +pandas/tests/base/test_transpose.py,sha256=138_O_JwwdCmfmyjp47PSVa-4Sr7SOuLprr0PzRm6BQ,1694 +pandas/tests/base/test_unique.py,sha256=iq-TZcKgn_VveOYELvYOLiHNwo7swxtqUb9vgI2i-zM,4681 +pandas/tests/base/test_value_counts.py,sha256=MUdHts_T855cEtG3twElVy1tq7EcL_MoVWb-qa-pNSU,9549 +pandas/tests/computation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/computation/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/computation/__pycache__/test_compat.cpython-38.pyc,, +pandas/tests/computation/__pycache__/test_eval.cpython-38.pyc,, +pandas/tests/computation/test_compat.py,sha256=DnL7koCXquyJjhH4WMklEoA5V2Csl1DJ3qaATjYDg0Q,874 +pandas/tests/computation/test_eval.py,sha256=qIZx5pinJEpxBPqALtqeRXm-bFIwqILjMpC7siFq8_o,69019 +pandas/tests/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/config/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/config/__pycache__/test_config.cpython-38.pyc,, +pandas/tests/config/__pycache__/test_localization.cpython-38.pyc,, +pandas/tests/config/test_config.py,sha256=NRdEpcYJM7BOkWSl9mYyMwgW8MvnSb-Ij755rqnL0v8,18259 +pandas/tests/config/test_localization.py,sha256=luk0jd10gce4xfln87o9KHrzil_4dnHkanWcztZuk9I,2878 +pandas/tests/construction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/construction/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/construction/__pycache__/test_extract_array.cpython-38.pyc,, +pandas/tests/construction/test_extract_array.py,sha256=L3fEjATPsAy3a6zrdQJaXXaQ7FvR2LOeiPJMjGNkwKQ,637 +pandas/tests/dtypes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/dtypes/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/dtypes/__pycache__/test_common.cpython-38.pyc,, +pandas/tests/dtypes/__pycache__/test_concat.cpython-38.pyc,, +pandas/tests/dtypes/__pycache__/test_dtypes.cpython-38.pyc,, +pandas/tests/dtypes/__pycache__/test_generic.cpython-38.pyc,, +pandas/tests/dtypes/__pycache__/test_inference.cpython-38.pyc,, +pandas/tests/dtypes/__pycache__/test_missing.cpython-38.pyc,, +pandas/tests/dtypes/cast/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/dtypes/cast/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/dtypes/cast/__pycache__/test_can_hold_element.cpython-38.pyc,, +pandas/tests/dtypes/cast/__pycache__/test_construct_from_scalar.cpython-38.pyc,, +pandas/tests/dtypes/cast/__pycache__/test_construct_ndarray.cpython-38.pyc,, +pandas/tests/dtypes/cast/__pycache__/test_construct_object_arr.cpython-38.pyc,, +pandas/tests/dtypes/cast/__pycache__/test_dict_compat.cpython-38.pyc,, +pandas/tests/dtypes/cast/__pycache__/test_downcast.cpython-38.pyc,, +pandas/tests/dtypes/cast/__pycache__/test_find_common_type.cpython-38.pyc,, +pandas/tests/dtypes/cast/__pycache__/test_infer_datetimelike.cpython-38.pyc,, +pandas/tests/dtypes/cast/__pycache__/test_infer_dtype.cpython-38.pyc,, +pandas/tests/dtypes/cast/__pycache__/test_maybe_box_native.cpython-38.pyc,, +pandas/tests/dtypes/cast/__pycache__/test_promote.cpython-38.pyc,, +pandas/tests/dtypes/cast/test_can_hold_element.py,sha256=YMNVyNxXuZ4e3QJpUVUwgpa0qM5VIIx4BI7kNTPuqz8,2013 +pandas/tests/dtypes/cast/test_construct_from_scalar.py,sha256=INdOiQ7MowXLr6ZReCiq0JykUeFvRWocxk3f-ilk9v0,1780 +pandas/tests/dtypes/cast/test_construct_ndarray.py,sha256=YXylbW1pq_tt4lgp33H5_rTicbxc5z6bkkVH2RC5dgc,1101 +pandas/tests/dtypes/cast/test_construct_object_arr.py,sha256=eOmUu4q0ihGTbYpCleoCnYtvwh1TBCEZQQjLeJaUMNA,717 +pandas/tests/dtypes/cast/test_dict_compat.py,sha256=qyn7kP5b14MywtqOUL5C-NOvjf2qK4PsXGpCvqmo-4E,476 +pandas/tests/dtypes/cast/test_downcast.py,sha256=2VUwon3py24T7MObXR__N0t8ZWFZlCxsRO3Ohy-T1qE,2479 +pandas/tests/dtypes/cast/test_find_common_type.py,sha256=0AXBxplU1sAoHhGEPAAfhTNEvb5adSTlXEEPe9DQPNM,5114 +pandas/tests/dtypes/cast/test_infer_datetimelike.py,sha256=6vor_eqEbMKcBLEkfayXzVzwwf5BZcCvQhFZuqhvyKU,603 +pandas/tests/dtypes/cast/test_infer_dtype.py,sha256=sQ7TlTEgn1RgM7U78A0KG_URR3hufkyXeIj-yR8WUX0,6216 +pandas/tests/dtypes/cast/test_maybe_box_native.py,sha256=uEkoLnSVi4kR8-c5FMhpEba7luZum3PeRIrxIdeGeM4,996 +pandas/tests/dtypes/cast/test_promote.py,sha256=qPsTwVbxmwXCHGkUYWjpL8t1DvMG3Pj41RxUZsWs11E,22037 +pandas/tests/dtypes/test_common.py,sha256=KsulRrUst0B3YpBRVSN1Iqo8PpWSB5i9gMFPJybXw34,26035 +pandas/tests/dtypes/test_concat.py,sha256=L0adD9rk1kZdz8vq_hPMAXp_A1quqKzWRRrZEiqECT0,1566 +pandas/tests/dtypes/test_dtypes.py,sha256=fHsofF5K3qwj0jN5opNhlsH0y7XuoLo3n5iQKKzytrA,38940 +pandas/tests/dtypes/test_generic.py,sha256=YN1TMDcGljwX-Ph05jmVn1RU0_DE8jhu1-b9x2kp1P8,4401 +pandas/tests/dtypes/test_inference.py,sha256=7oN7FZfJ0gsWc1R0Rt-TqSVxksalksH7PxNzWmFNyYA,67029 +pandas/tests/dtypes/test_missing.py,sha256=4QGNg89cYUdXE1xYx09kuw8qXPtYQfSbRuqILfH2z38,23060 +pandas/tests/extension/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/extension/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/extension/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/extension/__pycache__/test_boolean.cpython-38.pyc,, +pandas/tests/extension/__pycache__/test_categorical.cpython-38.pyc,, +pandas/tests/extension/__pycache__/test_common.cpython-38.pyc,, +pandas/tests/extension/__pycache__/test_datetime.cpython-38.pyc,, +pandas/tests/extension/__pycache__/test_extension.cpython-38.pyc,, +pandas/tests/extension/__pycache__/test_external_block.cpython-38.pyc,, +pandas/tests/extension/__pycache__/test_floating.cpython-38.pyc,, +pandas/tests/extension/__pycache__/test_integer.cpython-38.pyc,, +pandas/tests/extension/__pycache__/test_interval.cpython-38.pyc,, +pandas/tests/extension/__pycache__/test_numpy.cpython-38.pyc,, +pandas/tests/extension/__pycache__/test_period.cpython-38.pyc,, +pandas/tests/extension/__pycache__/test_sparse.cpython-38.pyc,, +pandas/tests/extension/__pycache__/test_string.cpython-38.pyc,, +pandas/tests/extension/arrow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/extension/arrow/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/extension/arrow/__pycache__/arrays.cpython-38.pyc,, +pandas/tests/extension/arrow/__pycache__/test_bool.cpython-38.pyc,, +pandas/tests/extension/arrow/__pycache__/test_string.cpython-38.pyc,, +pandas/tests/extension/arrow/__pycache__/test_timestamp.cpython-38.pyc,, +pandas/tests/extension/arrow/arrays.py,sha256=e4E6nGBoMHSN2zEli3jJ2ViSADyKfg-yWkgsSwSAm_Y,6308 +pandas/tests/extension/arrow/test_bool.py,sha256=JIHUZ9ssufAEhly0SwFzBnmfGnF0opZeIVQsLl-wgC0,3033 +pandas/tests/extension/arrow/test_string.py,sha256=6RRz94K29vIIw2kU0agjou6RgKxD-58ArOood3wsDEo,306 +pandas/tests/extension/arrow/test_timestamp.py,sha256=kSJTLacW1cfWSYvDS9KR_BffaFHGuS1TTFgi8G5eQlI,1336 +pandas/tests/extension/base/__init__.py,sha256=ctmO-zEnYGNk7bdEJI4lRNMjCoLot5vkvv4l9z7VCQ4,2713 +pandas/tests/extension/base/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/base.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/casting.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/constructors.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/dim2.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/dtype.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/getitem.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/groupby.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/index.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/interface.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/io.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/methods.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/missing.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/ops.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/printing.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/reduce.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/reshaping.cpython-38.pyc,, +pandas/tests/extension/base/__pycache__/setitem.cpython-38.pyc,, +pandas/tests/extension/base/base.py,sha256=5SPn-G89qste22G5I6n0FARZNpiwJbMLkFbdfxEEFlc,742 +pandas/tests/extension/base/casting.py,sha256=5sxytyXAiHRQiVXnJPi-0ucDJ1nI5qiLahvK8aFSdJk,3174 +pandas/tests/extension/base/constructors.py,sha256=95HytuJQ4xlYf3QCl2vao_RfB8rxD7QvL2G6OdwA3do,5666 +pandas/tests/extension/base/dim2.py,sha256=VjRvGWeh4UbWcJoHOBqFfrLAVIgTTg5JeWxktYnWUaM,10363 +pandas/tests/extension/base/dtype.py,sha256=Y7N55evgnudYzS3qR1MlpA9EYxkO3xrh8d7NmPVhsbM,4755 +pandas/tests/extension/base/getitem.py,sha256=jAQLchFTMl7d3K74dT6rzE5A34t5DtkD90NzzmfrghQ,16420 +pandas/tests/extension/base/groupby.py,sha256=oQny4N6GWA6uNbSZ0Jek3gNoZQJDSVaOG8_GCz9qBbE,4151 +pandas/tests/extension/base/index.py,sha256=vUw3rCBO1I-a-335xPVxPHlk7wj2sM2NT5oG5aDk0Gg,601 +pandas/tests/extension/base/interface.py,sha256=juJDZhx9QNASJ8ceotfrhDDMlnzEbp9RSB-U8dohMZ8,4284 +pandas/tests/extension/base/io.py,sha256=eS6Xcw4ww271ronWBKARxuafNOvmGdunhR5NCx3tf-0,628 +pandas/tests/extension/base/methods.py,sha256=ZivBxZKgSlYcF9jkQbCHHskWo58V_ikogu2jkD13AOo,21557 +pandas/tests/extension/base/missing.py,sha256=OahRfGWXpQgHYlVh5nuwY2_vmsBeiKN1DVTGzGodidA,5345 +pandas/tests/extension/base/ops.py,sha256=v73kC7HrLW6TD7DinuqZQVaH0JQ12Xa9L8EueDqI30w,7662 +pandas/tests/extension/base/printing.py,sha256=DDbHOCY8hj0To3ZNsg9IBWCMHYQumD5mcf_LZo21CIA,1193 +pandas/tests/extension/base/reduce.py,sha256=2RehFKhMDvGX6RvBgOlxV2H2fFwoOGxJst5BZ9U8-dc,2238 +pandas/tests/extension/base/reshaping.py,sha256=G97UjVkCoDJrNDw6MiknVm618gmLixdutoY3z6CUCJo,13699 +pandas/tests/extension/base/setitem.py,sha256=l9ZnlJ3KBjrGvxjo1mBkYDSIBS-Dp6BrGvsHFoJ9mU4,13638 +pandas/tests/extension/conftest.py,sha256=WHr4_931peURo-68EcmpEj-iU-yo0iU-yfF3UH-m0-g,4090 +pandas/tests/extension/date/__init__.py,sha256=-pIaBe_vmgnM_ok6T_-t-wVHetXtNw30SOMWVWNDqLI,118 +pandas/tests/extension/date/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/extension/date/__pycache__/array.cpython-38.pyc,, +pandas/tests/extension/date/array.py,sha256=SU6zWawbGnuWUP-6ciGuwaGHHWNrfut1EMY04Cogrqk,5745 +pandas/tests/extension/decimal/__init__.py,sha256=wgvjyfS3v3AHfh3sEfb5C8rSuOyo2satof8ESijM7bw,191 +pandas/tests/extension/decimal/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/extension/decimal/__pycache__/array.cpython-38.pyc,, +pandas/tests/extension/decimal/__pycache__/test_decimal.cpython-38.pyc,, +pandas/tests/extension/decimal/array.py,sha256=fJ9C2I86llhm1vcoveyJ_eoJbwo3lfYYqm3jTeldYPM,8080 +pandas/tests/extension/decimal/test_decimal.py,sha256=lnDXDols4k_oCuC4zCjaMHCZ-1ZwMQipwdC2_QpKH8Q,14710 +pandas/tests/extension/json/__init__.py,sha256=JvjCnVMfzIUSoHKL-umrkT9H5T8J3Alt8-QoKXMSB4I,146 +pandas/tests/extension/json/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/extension/json/__pycache__/array.cpython-38.pyc,, +pandas/tests/extension/json/__pycache__/test_json.cpython-38.pyc,, +pandas/tests/extension/json/array.py,sha256=usWby_bbN-TvtdppGc6ov1HeV9HFTNxjR2ebbVX0WRI,7590 +pandas/tests/extension/json/test_json.py,sha256=QOVc2542j4xkaOUSKMaRfiBp7qIME9NDkHFFK1Jn-C0,11201 +pandas/tests/extension/list/__init__.py,sha256=FlpTrgdAMl_5puN2zDjvdmosw8aTvaCD-Hi2GtIK-k0,146 +pandas/tests/extension/list/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/extension/list/__pycache__/array.cpython-38.pyc,, +pandas/tests/extension/list/__pycache__/test_list.cpython-38.pyc,, +pandas/tests/extension/list/array.py,sha256=CbaXaaRf1qpmS3LhXH895mF6OjEqJQgsBVkwjLgzJko,3818 +pandas/tests/extension/list/test_list.py,sha256=XyGJ1tWEgjIZVtZ3gP0x6sAgK_8w87Kfu91I1PbVCy8,668 +pandas/tests/extension/test_boolean.py,sha256=dBqSYbS8koX5dSEJmGEgPgwFMzQb0t6Hx56-wr5eBlY,12860 +pandas/tests/extension/test_categorical.py,sha256=rZFxYuv26ggXj9Qh-l8px_K81HMoPct1ODr9b8-RY9c,9969 +pandas/tests/extension/test_common.py,sha256=X2dv8Vbo1vmkh8uKpKyAOaLMDZqTOgFLtz1N3EQttZk,2091 +pandas/tests/extension/test_datetime.py,sha256=8yqQBHV2cLrS3DStiU43k9xbDI87Q3AfTVsZwfhuJiE,5504 +pandas/tests/extension/test_extension.py,sha256=1X1RAqbnoiQ9TObfzNgfxbl4dZjOpmgTRBL1VJZZ2KM,551 +pandas/tests/extension/test_external_block.py,sha256=4ZOPTNe7fG4bgvz6NEOOR-Hw-9LinE9EljPpJ2B7ws4,1083 +pandas/tests/extension/test_floating.py,sha256=cU6NLPdYCzGQFiP8F4wzbvJfZpHfz2JdeKuerzectCQ,6060 +pandas/tests/extension/test_integer.py,sha256=fPIGvLi8gc4jCo5nuyLdK5Xf2Zg6Mu7FCXccIDuiXho,6671 +pandas/tests/extension/test_interval.py,sha256=wMLqgbFqQmlAjKfZpkhgpBYwzqWcFZmXR1UOpbc9w5g,4807 +pandas/tests/extension/test_numpy.py,sha256=ZxW16dsRfxVMDM-Gu2I5GvAkAA9KhSISlyVW4UHZqw8,15729 +pandas/tests/extension/test_period.py,sha256=KY2ltlN4epIMPuC5caJhnP894ojow2u4Jmj22bypQgk,5139 +pandas/tests/extension/test_sparse.py,sha256=iUXJvPsPi0GBDZUG9lHlDPSn5Q4c0Ma4pP0zIFvtKuI,18245 +pandas/tests/extension/test_string.py,sha256=ArB8DRmqiMRefUOFJG_lxgce8-4uTJozDlEp1BFnBtY,6186 +pandas/tests/frame/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/frame/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/frame/__pycache__/common.cpython-38.pyc,, +pandas/tests/frame/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_alter_axes.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_api.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_arithmetic.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_block_internals.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_cumulative.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_iteration.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_logical_ops.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_nonunique_indexes.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_npfuncs.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_query_eval.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_reductions.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_repr_info.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_stack_unstack.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_subclass.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_ufunc.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_unary.cpython-38.pyc,, +pandas/tests/frame/__pycache__/test_validate.cpython-38.pyc,, +pandas/tests/frame/common.py,sha256=-oYwpLow5EVsERWQ5e7A_PeORPqqiZx2lpTzp4hI-PQ,1777 +pandas/tests/frame/conftest.py,sha256=ybrAbi5f9hWr4_Urqr0qfpVOcCW0tvtG1DsoriqF4ww,8761 +pandas/tests/frame/constructors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/frame/constructors/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/frame/constructors/__pycache__/test_from_dict.cpython-38.pyc,, +pandas/tests/frame/constructors/__pycache__/test_from_records.cpython-38.pyc,, +pandas/tests/frame/constructors/test_from_dict.py,sha256=Fw2CoCo8X_rUtTvn7DQt1FNyYCjy3bRpvAKsZ5DDYrQ,7018 +pandas/tests/frame/constructors/test_from_records.py,sha256=KE4Qp6QEX3MXawcCivoIpNEC7lOU_tFJfzLVjEh1mPs,16926 +pandas/tests/frame/indexing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/frame/indexing/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/frame/indexing/__pycache__/test_delitem.cpython-38.pyc,, +pandas/tests/frame/indexing/__pycache__/test_get.cpython-38.pyc,, +pandas/tests/frame/indexing/__pycache__/test_get_value.cpython-38.pyc,, +pandas/tests/frame/indexing/__pycache__/test_getitem.cpython-38.pyc,, +pandas/tests/frame/indexing/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/frame/indexing/__pycache__/test_insert.cpython-38.pyc,, +pandas/tests/frame/indexing/__pycache__/test_lookup.cpython-38.pyc,, +pandas/tests/frame/indexing/__pycache__/test_mask.cpython-38.pyc,, +pandas/tests/frame/indexing/__pycache__/test_set_value.cpython-38.pyc,, +pandas/tests/frame/indexing/__pycache__/test_setitem.cpython-38.pyc,, +pandas/tests/frame/indexing/__pycache__/test_take.cpython-38.pyc,, +pandas/tests/frame/indexing/__pycache__/test_where.cpython-38.pyc,, +pandas/tests/frame/indexing/__pycache__/test_xs.cpython-38.pyc,, +pandas/tests/frame/indexing/test_delitem.py,sha256=3AquK9yLK3gFcJeTLqCz16f7vZMN-eEys_UcMWxGfRk,1778 +pandas/tests/frame/indexing/test_get.py,sha256=N00_igU25_HjYuvAqDQKqBpqbz6HjB97o9Exvbo9BzM,662 +pandas/tests/frame/indexing/test_get_value.py,sha256=A-GbCHlbDfVPGB10dNGnGg4DtrKrlRbRspYfuDTUmPM,679 +pandas/tests/frame/indexing/test_getitem.py,sha256=6hyL0jKTiDKEWqVW2tW9N-sfMPU_1RwnFZvVw5977YM,12481 +pandas/tests/frame/indexing/test_indexing.py,sha256=LxHGQVLMTo4BBpKpuU544lcPH1pxqNBGQ63Bmt-nsAo,53853 +pandas/tests/frame/indexing/test_insert.py,sha256=7OOa8OW6HqGFRthQqzrwtCLuK_2k1KWktaS6NTnka0g,3521 +pandas/tests/frame/indexing/test_lookup.py,sha256=rHftMyz0qii47X80NSLFjTjLa7xSRcfdX1joj1sTGtI,3385 +pandas/tests/frame/indexing/test_mask.py,sha256=vKvxJMb6LVWmu9L4BzYSFwLCAkVQ98BLUzTvOHctNvs,5151 +pandas/tests/frame/indexing/test_set_value.py,sha256=3EV3cAAziAoLidqhEiCgXiUR3_VaP9YYqLrIWQQKM3U,2275 +pandas/tests/frame/indexing/test_setitem.py,sha256=7mx0rahbOAJuKPcnoxqdtrgpkTX1JEBhDSL8XUVMGTs,41412 +pandas/tests/frame/indexing/test_take.py,sha256=DjYzYfwSpl7Mn6NcLSHHFoADjMpS8jZCKIPY7YhxQ6I,2927 +pandas/tests/frame/indexing/test_where.py,sha256=onrjFDYE0IFnEUY02FomJVpNkbDSPdtn2DDcBv3cSJw,31495 +pandas/tests/frame/indexing/test_xs.py,sha256=sckf9x1DOyPSO90oGCaXna5j6hQZXsF13Q-BdQVe3wU,14338 +pandas/tests/frame/methods/__init__.py,sha256=M6dCS5d750Fzf9GX7xyNka-SZ2wJFCL66y5j-moHhwo,229 +pandas/tests/frame/methods/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_add_prefix_suffix.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_align.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_append.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_asfreq.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_asof.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_assign.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_at_time.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_between_time.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_clip.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_combine.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_combine_first.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_compare.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_convert.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_convert_dtypes.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_copy.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_count.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_count_with_level_deprecated.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_cov_corr.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_describe.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_diff.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_dot.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_drop.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_drop_duplicates.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_droplevel.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_dropna.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_dtypes.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_duplicated.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_equals.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_explode.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_fillna.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_filter.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_first_and_last.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_first_valid_index.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_get_numeric_data.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_head_tail.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_infer_objects.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_interpolate.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_is_homogeneous_dtype.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_isin.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_join.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_matmul.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_nlargest.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_pct_change.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_pipe.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_pop.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_quantile.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_rank.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_reindex.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_reindex_like.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_rename.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_rename_axis.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_reorder_levels.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_replace.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_reset_index.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_round.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_sample.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_select_dtypes.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_set_axis.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_set_index.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_shift.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_sort_index.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_sort_values.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_swapaxes.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_swaplevel.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_to_csv.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_to_dict.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_to_dict_of_blocks.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_to_numpy.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_to_period.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_to_records.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_to_timestamp.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_transpose.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_truncate.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_tz_convert.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_tz_localize.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_update.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_value_counts.cpython-38.pyc,, +pandas/tests/frame/methods/__pycache__/test_values.cpython-38.pyc,, +pandas/tests/frame/methods/test_add_prefix_suffix.py,sha256=jX_f1l0z7txhO14k8_UaHJYG7WIY3FXRlDxeOVv7OPU,784 +pandas/tests/frame/methods/test_align.py,sha256=KUITMeXIqDc-HXsVp3mE3RIUvg0mgY4uWUcr-d5CmAE,11087 +pandas/tests/frame/methods/test_append.py,sha256=lluqNle14UKuyuMTwAq5-fADaV1gXBydlJosO7cEeNo,9974 +pandas/tests/frame/methods/test_asfreq.py,sha256=R7e8BS-GnWMsq_iy2cJbqZ1ILLQwGI70FC_i8lz1ipk,6968 +pandas/tests/frame/methods/test_asof.py,sha256=ofv0SVedBu2npPOul6fZerv8AaaAcSX5Hhh7D6aU-ik,5883 +pandas/tests/frame/methods/test_assign.py,sha256=xFGREzLhP1wj3MowBimeYbMWBNiII0280DiOXI6WDB0,2982 +pandas/tests/frame/methods/test_astype.py,sha256=KVXpBWPz5P0hMyZyQfwvEB1cwEubpBaCtlEYLKGcYS8,27848 +pandas/tests/frame/methods/test_at_time.py,sha256=GFS1rEhRbuwB9V8J9T2fZI--nPTVJSqeaIRynP1lkDQ,4462 +pandas/tests/frame/methods/test_between_time.py,sha256=laqCNouLWRxEk46kgPTjDbksqqMb2idDWzkTNhTqtyg,10811 +pandas/tests/frame/methods/test_clip.py,sha256=xGXmpM7l4mwxqGlAvximq__Yd2kbnV1yVPLeHchJoPs,6963 +pandas/tests/frame/methods/test_combine.py,sha256=wNaQqokqHsJmrZ9NQIao58ZT0hSkkTH14I7_Oq8tADs,1359 +pandas/tests/frame/methods/test_combine_first.py,sha256=WAaUac6JPakdSjHILXaATBeWGFixmAe39w9ZRKCveW0,18291 +pandas/tests/frame/methods/test_compare.py,sha256=8MkvFaOb8Dve1M2BMRMyO7e43xd5IBFChSkpcQi0Nf0,6158 +pandas/tests/frame/methods/test_convert.py,sha256=SS_wGp0w3-Xl-Ticb8SBVSfYmuZg9yhk0oVHCKn7x1Y,2214 +pandas/tests/frame/methods/test_convert_dtypes.py,sha256=PQ2JjRe1bAbxTskfc9nKoPlmej6S3vUI2kZi0HWd0r8,1537 +pandas/tests/frame/methods/test_copy.py,sha256=dbmhfVQzyeq_xb_oCMlEKKH0yiYvhJdr5xqNBDP13EU,1788 +pandas/tests/frame/methods/test_count.py,sha256=1DdJC7yNqq_5QWuKQYH0dsewNG4SWY8zhOEjTOb_YL4,1081 +pandas/tests/frame/methods/test_count_with_level_deprecated.py,sha256=pzhLcOCeQXs00g_UqiUR0isYi1JBHj1lrxByQZiGLqg,4340 +pandas/tests/frame/methods/test_cov_corr.py,sha256=fR09byi3h3runxeuCTjlgHK3E4chPxHhBXQfd-SMPtc,13210 +pandas/tests/frame/methods/test_describe.py,sha256=D4HejW8Z8qeiRKPIdkOSTMUp-2frUMsYwAyFpXCi4lo,13700 +pandas/tests/frame/methods/test_diff.py,sha256=9rpLuzcllMgiD564cGx01ci8A0RN-r2vwcxOXjgZEhQ,9848 +pandas/tests/frame/methods/test_dot.py,sha256=p2yYQtp82qE8yDC3yncfL6_0uIMApPdwuUlBXJFbmUk,3899 +pandas/tests/frame/methods/test_drop.py,sha256=kt9vR9_4tkvYBtI4uYXcpoipX3oolVwYAREVVxXIV3Q,19808 +pandas/tests/frame/methods/test_drop_duplicates.py,sha256=ib1mbZNxIHP96We10JMii8NZVA2GGhxMDn_41MYCj9Q,15126 +pandas/tests/frame/methods/test_droplevel.py,sha256=L1gAMjYYPB6eYmSppXfbwPVKa3HCNofqPVUZ3gxLldA,1253 +pandas/tests/frame/methods/test_dropna.py,sha256=1afQLpF6IDLup1XBi9dw0i8NtqypavD29PHfnTBxxqs,9904 +pandas/tests/frame/methods/test_dtypes.py,sha256=eH3FCOVdoAzbiNOOJv3LDlsawVOtkwYgrpUWCMJFMP8,4250 +pandas/tests/frame/methods/test_duplicated.py,sha256=Pw-XNYGnjBbkdaLIfhyxELjEuvn2H5-NX0Pggy-v5N8,3208 +pandas/tests/frame/methods/test_equals.py,sha256=CJFQbs1iaBH-pJhrjvL-fsVNdYTiF3b5uKKSr2YSR3U,2796 +pandas/tests/frame/methods/test_explode.py,sha256=VCtOmaxL9mQwwSDSQOZaWFUw1-IDVmnOGRSlPY97nmo,8161 +pandas/tests/frame/methods/test_fillna.py,sha256=8BOKAlhJBZJpK_FIH_q714kQS5jpAoQjeLwOk2XTLQo,22450 +pandas/tests/frame/methods/test_filter.py,sha256=qpHVkTo7lpmvAPOprKg3pKJRK5c6wD2Pz8o5QJwYWfY,4930 +pandas/tests/frame/methods/test_first_and_last.py,sha256=aq6jIXT_wBYP9jR-tyPaqIGkxmNZAK3aHJLKnJu55-w,2819 +pandas/tests/frame/methods/test_first_valid_index.py,sha256=xdPdloFoOgZsGBinRa0XjY1ZyWSeMMLlEnmwy2zfauo,3404 +pandas/tests/frame/methods/test_get_numeric_data.py,sha256=PLQ7Jm8neTLOBvYva55oo0O-qgMfd8YhMKN5ZW67dms,3198 +pandas/tests/frame/methods/test_head_tail.py,sha256=nZIygQxyyPQK2G4hgw6zkeF6H_KWoTOZ6rp_zZNQMxY,1911 +pandas/tests/frame/methods/test_infer_objects.py,sha256=LNOf2VJsV17FDT9ogEDba6la414yUmm5z_7B97nLN24,1241 +pandas/tests/frame/methods/test_interpolate.py,sha256=xfVvuK-_maemzwrkCfeEc6eHHXJy7tl1dG-EmbssjWU,13261 +pandas/tests/frame/methods/test_is_homogeneous_dtype.py,sha256=NNyf83FGWwcQyaysOSPyRSUR-okaNUY2L0n8Bils9ac,1422 +pandas/tests/frame/methods/test_isin.py,sha256=UESDv240y76igDcysHnRG1KNuwsdHBtmebkO1eXiblU,7323 +pandas/tests/frame/methods/test_join.py,sha256=xRv4Vti8GUUwnhj9SZuZIVY9P7IlEE-ogrl08rSzUv0,11405 +pandas/tests/frame/methods/test_matmul.py,sha256=f6DaX_lJ6LB_dsDZLe2eKEI-JB-wHqN3OOjhqN8CMqk,2847 +pandas/tests/frame/methods/test_nlargest.py,sha256=XVK02dOuHPZR1Me8HBh6rsXaW2Xt7TYP0_QSMR_WBn0,6985 +pandas/tests/frame/methods/test_pct_change.py,sha256=3GX0NDa0w9xtYgQ6E9NijoYUo28goaGxurdbqXGZ3AU,4541 +pandas/tests/frame/methods/test_pipe.py,sha256=O02KyRDhl49kmZ9L0r6pyr8bjA8-Y6XTxQYhMfJta3A,1025 +pandas/tests/frame/methods/test_pop.py,sha256=lRpy3Khytq13pv8Zva9PPWC6YmLbbQx7k2cGaMpm5pc,2116 +pandas/tests/frame/methods/test_quantile.py,sha256=thf8HqxYCx4uzq81JkK3w4F70mQVUYPgr5AjsMwMrZc,26037 +pandas/tests/frame/methods/test_rank.py,sha256=zFRCL740kT4vEoYKLsaI2WeuUs7Uc47CeUD4J9-Wx5o,17061 +pandas/tests/frame/methods/test_reindex.py,sha256=PrwhXlW2ebDZ8P736s2naEH81MeIAnAEFxYOE64kQbI,44165 +pandas/tests/frame/methods/test_reindex_like.py,sha256=2qgqaHDSEKYO1hwE9MaPTFJhl4m7rejHyuOcrmvqaBg,1187 +pandas/tests/frame/methods/test_rename.py,sha256=28gkiQBGHZk2m_r6wTPPa9PLwjNjo7tMRnRqojgcLeY,15382 +pandas/tests/frame/methods/test_rename_axis.py,sha256=90QFtDi0p-8bxEdFfLs75EtJQtJEOTmCdXoiS7h9F-Y,4091 +pandas/tests/frame/methods/test_reorder_levels.py,sha256=iJraDCEnBNTSn_1L8SLGUpq2FKQFkUiaIZ9-htEGwyc,2730 +pandas/tests/frame/methods/test_replace.py,sha256=o9rH_sUlN3EbCYI4H5-mKc9PngbTed5PbxxG6_HJLn4,55893 +pandas/tests/frame/methods/test_reset_index.py,sha256=xMX8cc4FGm8k5bzGe-e1ONmQBHuBkHy67qjznS2NX6M,25127 +pandas/tests/frame/methods/test_round.py,sha256=AFCtQx_PpZwhVpby-glnlsmIthJgKmWklZ7JW6kOkGg,7755 +pandas/tests/frame/methods/test_sample.py,sha256=WyS9LYkGD8oUUx-ZD3pC4Nkqqg67QpKvElvkPFgxADI,13213 +pandas/tests/frame/methods/test_select_dtypes.py,sha256=d2VyM9YTN4NxpK0HzXutuKxQhm_7k3P5WN-KaF4Lh0s,15566 +pandas/tests/frame/methods/test_set_axis.py,sha256=JPKl-K4NstKEdIntl6UKVt4z6XNDipggenXG8l_s6v0,3928 +pandas/tests/frame/methods/test_set_index.py,sha256=-qLMy3RdtChemCNUGWsqxfsFTlqlcmShAOO0xeLftkQ,25979 +pandas/tests/frame/methods/test_shift.py,sha256=dJPDR8fTqoJWR2e33u-LFpCFXHM2oWVEvlc_JOHLkGU,24191 +pandas/tests/frame/methods/test_sort_index.py,sha256=FApsOr6aSvrxRCcF_y7_VEsbaXnLlnntzAymFHcxegQ,31393 +pandas/tests/frame/methods/test_sort_values.py,sha256=PeftyLUqYcgHG7BbD1Cqv29_I39jaZfpBVLxwbJBkZQ,31072 +pandas/tests/frame/methods/test_swapaxes.py,sha256=2UK5z7SGHTMFWqIGmwZeCwu1xD_3EV7bb8y36yN8lAA,664 +pandas/tests/frame/methods/test_swaplevel.py,sha256=Y8npUpIQM0lSdIwY7auGcLJaF21JOb-KlVU3cvSLsOg,1277 +pandas/tests/frame/methods/test_to_csv.py,sha256=zY8ufx6rJ2pEo4geuk7NxpG3g82UkjqStXQhgZfYe4U,47068 +pandas/tests/frame/methods/test_to_dict.py,sha256=NeX6PcID9islmlH0k4A-Px0-5_RzErPlf6XFlhphRLY,11953 +pandas/tests/frame/methods/test_to_dict_of_blocks.py,sha256=Scpa1R0YGSEM0ZCQK6Sq7XksVzic0xAlbzuVKRGxmgA,2281 +pandas/tests/frame/methods/test_to_numpy.py,sha256=YNmAJxCgQqOlb0imJ5dA-Tg0U8kWdT9T8xDXVQElBoI,1256 +pandas/tests/frame/methods/test_to_period.py,sha256=OG8QFxjUwnmr468Gj2qgkSa8JckhY2r6MUfLbS75n-U,2708 +pandas/tests/frame/methods/test_to_records.py,sha256=vNBF8GnEjKvsYUz04FWxFJlBM0x_XpWT_IIrzru17tQ,14696 +pandas/tests/frame/methods/test_to_timestamp.py,sha256=hhK7-89SXyI5zPyARvOB4f0sZvk_PkFHNyx0ATEhqrI,5750 +pandas/tests/frame/methods/test_transpose.py,sha256=yQfShuapnTEBLpUk72aBcen9gc5-WHfp_hoefKv8FbA,3800 +pandas/tests/frame/methods/test_truncate.py,sha256=d4Zo83343thpN6vrhI8bjGsMGQ0akXHZaeMRKORixZ0,5245 +pandas/tests/frame/methods/test_tz_convert.py,sha256=wXeFxbVBM2DY7J-G8up5j5J-Mu1Dbbn8ZkywFDIFuFs,4738 +pandas/tests/frame/methods/test_tz_localize.py,sha256=LlJMkj15fW-sLZq4kjGWckOPkBEHIj7YHeCfvcTxJPk,2050 +pandas/tests/frame/methods/test_update.py,sha256=763go-lnpxOhGyIeXTIPVzuqPOGP-ydR8URqg_jjn7o,4604 +pandas/tests/frame/methods/test_value_counts.py,sha256=cTnehyupmQqauD0Rxqd50CcvRHb0Z86cjRlNFg2xsOQ,3871 +pandas/tests/frame/methods/test_values.py,sha256=mKA95a2n-PKmGYWRUQITuWsfurz8cF-MnS5FjpeJZug,8977 +pandas/tests/frame/test_alter_axes.py,sha256=yHyCho1zs84UETsGGtw-gf3eTIyPj9zYUUA7wHTdRVk,873 +pandas/tests/frame/test_api.py,sha256=jY27oKsbVIc5NgD11Sey5Uc5lt54Qb0WQ9QV_QxP78Q,11427 +pandas/tests/frame/test_arithmetic.py,sha256=MdbeP58ws8BV8K9p-crhd4X2nmHzpw0uKcXpT60n5aM,66108 +pandas/tests/frame/test_block_internals.py,sha256=kXnhZOjvRuwMt0dktAp1-NNDaGRV0HcNGQxp07GAtV4,14937 +pandas/tests/frame/test_constructors.py,sha256=hxmt6BAVMM5134weVF-SwsIOFuO6sHY44YjIt_a1CoA,112898 +pandas/tests/frame/test_cumulative.py,sha256=Ku20LYWW1hrycH8gslF8oNwXMv88RmaJC7x0a5GPbYw,2389 +pandas/tests/frame/test_iteration.py,sha256=tTbsHbO08DBCfUiPFLrssQAc5MBFE7kfQXSSx0PiuUo,5145 +pandas/tests/frame/test_logical_ops.py,sha256=7az8x1vOZCWZyWEhF6uprJ_fCwHXF5B5uT5xNwVqUIg,6172 +pandas/tests/frame/test_nonunique_indexes.py,sha256=JjBaMIDTzebHOdQ63MZAWLgrtUoDn2dyCJXH2SzIOkM,11374 +pandas/tests/frame/test_npfuncs.py,sha256=-nRmfp2Eo_HOqAicYpT3as4OQpBlwcl2TokhtmI7enw,853 +pandas/tests/frame/test_query_eval.py,sha256=W9c3h_lCm7qIRpqBqxZY7yTr0fO-MD2Cm_nAYrgPWF0,48423 +pandas/tests/frame/test_reductions.py,sha256=3L6rr1rSaUXwuO6SVU25HwF3DW_BjzkmT9Hyr8W-Hgc,64079 +pandas/tests/frame/test_repr_info.py,sha256=Ptqhp83xe7pC3EVJ13ad4cmiYW_gAfdu5fIQyboMSTc,10734 +pandas/tests/frame/test_stack_unstack.py,sha256=bqVNeuDn2fUYgQ92q0bwqju7SyT1KMyyRf6vQAfGvBg,76055 +pandas/tests/frame/test_subclass.py,sha256=Pbx_eTfCBgg2M5y0UwKylZzvHDsl_hWj4SJFVia8HlY,24353 +pandas/tests/frame/test_ufunc.py,sha256=DOgQtTus7poUO4SurDLqNCLoxnbqROt5vepb09ir7fo,10676 +pandas/tests/frame/test_unary.py,sha256=XRhJGua48AgnoZWS9k92DQ7xRAzgWXD71KHARJglPQ4,5489 +pandas/tests/frame/test_validate.py,sha256=hSQAfdZOKBe2MnbTBgWULmtA459zctixj7Qjy6bRg20,1094 +pandas/tests/generic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/generic/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/generic/__pycache__/test_duplicate_labels.cpython-38.pyc,, +pandas/tests/generic/__pycache__/test_finalize.cpython-38.pyc,, +pandas/tests/generic/__pycache__/test_frame.cpython-38.pyc,, +pandas/tests/generic/__pycache__/test_generic.cpython-38.pyc,, +pandas/tests/generic/__pycache__/test_label_or_level_utils.cpython-38.pyc,, +pandas/tests/generic/__pycache__/test_series.cpython-38.pyc,, +pandas/tests/generic/__pycache__/test_to_xarray.cpython-38.pyc,, +pandas/tests/generic/test_duplicate_labels.py,sha256=is0Gv82hMt0cvpHFgZWS0jbpCH4RBBthwMI-D1ZDb7M,16041 +pandas/tests/generic/test_finalize.py,sha256=FXes9u3qp5nRwfu5HU_cCvz_B4JMNnefIzJBYCLvMP8,27375 +pandas/tests/generic/test_frame.py,sha256=UQUqlSu5oFCtIMe4fmFgFMr8IzHwOxHJyJA6NL-OAII,6818 +pandas/tests/generic/test_generic.py,sha256=nL80qpZxGJr0Mt5d8FedjGMrzVsaxrwcpoXpDg4upNQ,16043 +pandas/tests/generic/test_label_or_level_utils.py,sha256=soIfBe2_-11y66WUkQ8YjpR4_sNAJwymJiRgcEZgiqc,10257 +pandas/tests/generic/test_series.py,sha256=G1CB9IgsgK7Hak2uR3Ut5N_7-o6Gu_yRSrh9A-sCjtw,4674 +pandas/tests/generic/test_to_xarray.py,sha256=Hp511AhRkEx_LgZ8f-kUBReX0HEadJzMz_DOhQYegnM,4120 +pandas/tests/groupby/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/groupby/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_allowlist.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_any_all.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_apply.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_apply_mutate.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_bin_groupby.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_categorical.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_counting.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_filters.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_frame_value_counts.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_function.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_groupby.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_groupby_dropna.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_groupby_shift_diff.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_groupby_subclass.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_grouping.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_index_as_string.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_libgroupby.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_min_max.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_missing.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_nth.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_numba.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_nunique.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_pipe.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_quantile.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_rank.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_sample.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_size.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_timegrouper.cpython-38.pyc,, +pandas/tests/groupby/__pycache__/test_value_counts.cpython-38.pyc,, +pandas/tests/groupby/aggregate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/groupby/aggregate/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/groupby/aggregate/__pycache__/test_aggregate.cpython-38.pyc,, +pandas/tests/groupby/aggregate/__pycache__/test_cython.cpython-38.pyc,, +pandas/tests/groupby/aggregate/__pycache__/test_numba.cpython-38.pyc,, +pandas/tests/groupby/aggregate/__pycache__/test_other.cpython-38.pyc,, +pandas/tests/groupby/aggregate/test_aggregate.py,sha256=Yf0IVcAHP1eoVZK5djD9-7z5rwAUX9iUebyEo-rqN5Q,46130 +pandas/tests/groupby/aggregate/test_cython.py,sha256=UTkKYoKMiB6_CTgIBiOjocr2XSgVBV8k4a9A3sEv9UQ,11123 +pandas/tests/groupby/aggregate/test_numba.py,sha256=e61SR0_55K6tRpGMp94Gvx6RahvAqTWqBFe_GHrqwEg,6242 +pandas/tests/groupby/aggregate/test_other.py,sha256=y4cjSdB-wmxSIi1SQDHTlZJHsHXcaks-CG23jYtGa9k,20199 +pandas/tests/groupby/conftest.py,sha256=-J7pe_ghpIrE_InE8Tdotc743lCBKxV8XXhrETj7YsQ,4332 +pandas/tests/groupby/test_allowlist.py,sha256=YRca2rdQv3Qmgfw3ZMTS88r-Muvlup_1CE-ayFxrDE8,10469 +pandas/tests/groupby/test_any_all.py,sha256=7bZ2lunM3nM77u-8hXUDi8EXHJJQ5LKXKXdReg5NOUU,5803 +pandas/tests/groupby/test_apply.py,sha256=yqyC4WdFXpvaNkXU3Hvd2tLA2qOt0Hjxr6Sts-zjgug,38127 +pandas/tests/groupby/test_apply_mutate.py,sha256=tkBu37FDnl52qUU4mZe3K1XQ7MJ4h000udkcsLDIcB4,3496 +pandas/tests/groupby/test_bin_groupby.py,sha256=Rz7vS9dcG2yOBy5f0hzXpkUUUB56a74S_-CVlbBgmw4,1799 +pandas/tests/groupby/test_categorical.py,sha256=K5JniwTMgTpXT03aYZa4mY1JlJog7BARKbEmPl_2TXA,59535 +pandas/tests/groupby/test_counting.py,sha256=_ZyOs0IOhBJXLayVppR_ktmJqLVxUii7D5n7q7Wpgzw,12833 +pandas/tests/groupby/test_filters.py,sha256=aB5MEvILm1inUIm3D8GwjHnSwmlBmXV3lqSdqq5OzvI,20777 +pandas/tests/groupby/test_frame_value_counts.py,sha256=rljYAljcZUbeiM3Nh5C2-5MmOpFgXcw_Jj6upteDmnE,14525 +pandas/tests/groupby/test_function.py,sha256=1GVkVyHUhYHayqW20d0q5KopTJRs6MOzxuuS4mC_Q9A,38882 +pandas/tests/groupby/test_groupby.py,sha256=gwg0g0e-g2h-DwbZoLLOSsp-dHSMsyWxTKxM-Yy6688,82134 +pandas/tests/groupby/test_groupby_dropna.py,sha256=cb91HQs4UYa94iLjHzEufmlYQPZKOsfvd5fUsNAqBo4,11719 +pandas/tests/groupby/test_groupby_shift_diff.py,sha256=pY2ZV1bNNvVynQe_arN9x0v82H_-Jp8-z1zHPmMGQOg,4008 +pandas/tests/groupby/test_groupby_subclass.py,sha256=HfC_thR2MIfmS_WmUKNxGf1fxmoVvOcdIeMMp-O8Ycw,3693 +pandas/tests/groupby/test_grouping.py,sha256=ooHREBYcyJL7fWQccNmN07vinXEEcjTmTKjgx9Trjec,36531 +pandas/tests/groupby/test_index_as_string.py,sha256=KQOXdEoOc0Y1ebWV79oZYzUH8soAb0SaISo2qBCiPNk,2069 +pandas/tests/groupby/test_indexing.py,sha256=o7zOuV1ixYlN2-rckWDw4RHLa9UWhrnf42QMxMGD_iE,9064 +pandas/tests/groupby/test_libgroupby.py,sha256=iLChbCSej0DYd3HD60ccyl_bZ9rk40PfSGDJSwEIKWk,8997 +pandas/tests/groupby/test_min_max.py,sha256=eHLWyJt79vx-gIb2K15Ta9bdQZNcmS5_mAMxfMMcXa0,7136 +pandas/tests/groupby/test_missing.py,sha256=XSAz12qEgbkRzEAXmH79uya4nZq6UVH0trplfkIO8AY,4864 +pandas/tests/groupby/test_nth.py,sha256=FOwkzh5963J1hgUHWOzERcPiAHpcd46gY4SSmfbE5xg,26339 +pandas/tests/groupby/test_numba.py,sha256=wNn3-4BPvbfVdhaKatUwrvbWuCY4uKPEbAzQIbzSPPE,3134 +pandas/tests/groupby/test_nunique.py,sha256=E0gGe0xWAdGDacjKppQjwXb9ELkTzHo-K2w6nVEnnz0,5799 +pandas/tests/groupby/test_pipe.py,sha256=pbt3y7KjEuV7spNLaTNUV6ssiLmNteho7Is3_4PVoHs,2118 +pandas/tests/groupby/test_quantile.py,sha256=Nd3ItPCXeLcQLBTl3vftv-YWTm57JDE1WpRnBiSXHd4,11071 +pandas/tests/groupby/test_rank.py,sha256=OSmgwbBRhnxZLe-ezysnsZkVkzHUTnHEWLmLlK4hk7Q,21926 +pandas/tests/groupby/test_sample.py,sha256=6vw-nQ-hKwdoM2nc4MoDNrAgsBgIbY0Wo1a5bxl1Niw,4926 +pandas/tests/groupby/test_size.py,sha256=jnxZOOdtIuDXIMfZzfKCs26-n8UmT9zskkMCUrrxQIk,2169 +pandas/tests/groupby/test_timegrouper.py,sha256=9i9Ib6IzXdTY3wFjuYat5h3gT1eP079jXFaz5WgcmlU,32719 +pandas/tests/groupby/test_value_counts.py,sha256=fnvfF-13pvacmV4CKwnZvG7SMOjRuhIVY2vTA2qfE1E,5011 +pandas/tests/groupby/transform/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/groupby/transform/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/groupby/transform/__pycache__/test_numba.cpython-38.pyc,, +pandas/tests/groupby/transform/__pycache__/test_transform.cpython-38.pyc,, +pandas/tests/groupby/transform/test_numba.py,sha256=s_hTbx8WX-eLKCzUgaG4btCxGO41t2PCuH-3L3M0KKg,6015 +pandas/tests/groupby/transform/test_transform.py,sha256=B-WWDWWGnczRIQQ1OPd2pt-Pl5eDfuX3zFCVcIk_jyA,42661 +pandas/tests/indexes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/__pycache__/common.cpython-38.pyc,, +pandas/tests/indexes/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/indexes/__pycache__/datetimelike.cpython-38.pyc,, +pandas/tests/indexes/__pycache__/test_any_index.cpython-38.pyc,, +pandas/tests/indexes/__pycache__/test_base.cpython-38.pyc,, +pandas/tests/indexes/__pycache__/test_common.cpython-38.pyc,, +pandas/tests/indexes/__pycache__/test_engines.cpython-38.pyc,, +pandas/tests/indexes/__pycache__/test_frozen.cpython-38.pyc,, +pandas/tests/indexes/__pycache__/test_index_new.cpython-38.pyc,, +pandas/tests/indexes/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/indexes/__pycache__/test_numpy_compat.cpython-38.pyc,, +pandas/tests/indexes/__pycache__/test_setops.cpython-38.pyc,, +pandas/tests/indexes/base_class/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/base_class/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/base_class/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/indexes/base_class/__pycache__/test_formats.cpython-38.pyc,, +pandas/tests/indexes/base_class/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/indexes/base_class/__pycache__/test_pickle.cpython-38.pyc,, +pandas/tests/indexes/base_class/__pycache__/test_reshape.cpython-38.pyc,, +pandas/tests/indexes/base_class/__pycache__/test_setops.cpython-38.pyc,, +pandas/tests/indexes/base_class/__pycache__/test_where.cpython-38.pyc,, +pandas/tests/indexes/base_class/test_constructors.py,sha256=G3J3aqqIKEkk1I4MuPQaMojFJKIO3yyLaKWpyu5PP_8,1424 +pandas/tests/indexes/base_class/test_formats.py,sha256=xNzgJAG9JtdV7oMSuBWymTC3SXI2vOL07JkKV7EV6Gs,5611 +pandas/tests/indexes/base_class/test_indexing.py,sha256=9NcvgMbryALEkzM6kUhgDubrPnNDpINf251K0kU48O8,2854 +pandas/tests/indexes/base_class/test_pickle.py,sha256=ANKn2SirZRA2AHaZoCDHCB1AjLEuUTgXU2mXI6n3Tvw,309 +pandas/tests/indexes/base_class/test_reshape.py,sha256=Nh5S8jgnZ8iV86sdYnAHs2IHVAcitx0J66qWpDRN2pY,2716 +pandas/tests/indexes/base_class/test_setops.py,sha256=xAGUMcjexuCjntIDCIyRWJDrImZ9X8y6X4EL-OwZzoY,9063 +pandas/tests/indexes/base_class/test_where.py,sha256=uq7oB-lk7rsgYQer8qeUsqD5aSECtRPSEUfKzn91BiE,341 +pandas/tests/indexes/categorical/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/categorical/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/categorical/__pycache__/test_append.cpython-38.pyc,, +pandas/tests/indexes/categorical/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/indexes/categorical/__pycache__/test_category.cpython-38.pyc,, +pandas/tests/indexes/categorical/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/indexes/categorical/__pycache__/test_equals.cpython-38.pyc,, +pandas/tests/indexes/categorical/__pycache__/test_fillna.cpython-38.pyc,, +pandas/tests/indexes/categorical/__pycache__/test_formats.cpython-38.pyc,, +pandas/tests/indexes/categorical/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/indexes/categorical/__pycache__/test_map.cpython-38.pyc,, +pandas/tests/indexes/categorical/__pycache__/test_reindex.cpython-38.pyc,, +pandas/tests/indexes/categorical/test_append.py,sha256=LjLMq8GkNrsIVNfTrujLv_TlKo79oA_XbpNUFs-pqVQ,2191 +pandas/tests/indexes/categorical/test_astype.py,sha256=1xSjRgrW3uwCVIQAEJLFfp4cv3IY6FEN4oucfsmAyRE,2746 +pandas/tests/indexes/categorical/test_category.py,sha256=nQoXxwyqv4osSsJ0o-dMp0B-o9bus_D6GCcGEjvNvqs,14064 +pandas/tests/indexes/categorical/test_constructors.py,sha256=b-7DNFa8vfGNeBR1nrg4IgT8FZRe7Z75zxnsXr379II,6229 +pandas/tests/indexes/categorical/test_equals.py,sha256=QJ6N7HfB6IG-1C0CtChYGm4EkoimjNWio_1P_1XVMJ4,3331 +pandas/tests/indexes/categorical/test_fillna.py,sha256=sH68aWCabI2qy5dbgxQCXeTfvn1NQgDfM1OT4ojFmaU,1850 +pandas/tests/indexes/categorical/test_formats.py,sha256=3poo3bcz4p5yCzvLOYZWj_f4KYAQ3-TUcPTSr6kd-gg,5944 +pandas/tests/indexes/categorical/test_indexing.py,sha256=Gs14eomkyPJiUZFWSk0fB_FiC7srC6vz43hMR7koylc,14510 +pandas/tests/indexes/categorical/test_map.py,sha256=adVrNr5LIFJ_l9A2eQkVeDQUwIekXh78ZNs8aZoAb1s,4093 +pandas/tests/indexes/categorical/test_reindex.py,sha256=5nbuYb16CyudQ6jXNJxvDBE_VMW6CdtVvliCNvbzqEY,3591 +pandas/tests/indexes/common.py,sha256=gtDjWYgmsAafo5Be9zStufKqtVyCGsDh8igHbvB-7Vo,31990 +pandas/tests/indexes/conftest.py,sha256=oDiXsICtGhGR9WTOSlYybAZgYqTs6yDgKKiouXUuYmI,983 +pandas/tests/indexes/datetimelike.py,sha256=6ynLhYJdG74mzNLVlLwFFIbyLnlkSDPzp6fSoCmRHOk,4346 +pandas/tests/indexes/datetimelike_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/datetimelike_/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/datetimelike_/__pycache__/test_drop_duplicates.cpython-38.pyc,, +pandas/tests/indexes/datetimelike_/__pycache__/test_equals.cpython-38.pyc,, +pandas/tests/indexes/datetimelike_/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/indexes/datetimelike_/__pycache__/test_is_monotonic.cpython-38.pyc,, +pandas/tests/indexes/datetimelike_/__pycache__/test_nat.cpython-38.pyc,, +pandas/tests/indexes/datetimelike_/__pycache__/test_sort_values.cpython-38.pyc,, +pandas/tests/indexes/datetimelike_/__pycache__/test_value_counts.cpython-38.pyc,, +pandas/tests/indexes/datetimelike_/test_drop_duplicates.py,sha256=YaSJls-kbciPJI9-uCrcjlNEFWLnfOb-3s3ByJLJnrw,2394 +pandas/tests/indexes/datetimelike_/test_equals.py,sha256=6A64-PNVmrIqCYDkRJDmA5z5meRiPjyXj76KZh2NF0A,6303 +pandas/tests/indexes/datetimelike_/test_indexing.py,sha256=yKVl5vWHldODLEJGoRe3qgcV_Hi9wFBizoy_n3IhNM0,1297 +pandas/tests/indexes/datetimelike_/test_is_monotonic.py,sha256=LyK-3qh5kbQBPTmQHaKz5_8JST_7AARY7bCoAM2iI0E,1478 +pandas/tests/indexes/datetimelike_/test_nat.py,sha256=6-Yr-n4JskfsjbaEPFgaRPKX4S7R-LhQOEQSC7cBybw,1335 +pandas/tests/indexes/datetimelike_/test_sort_values.py,sha256=dJBlR2FkIpT9R5e6L12Q2GqUlGNaBiYJwnP7i1lyen0,11421 +pandas/tests/indexes/datetimelike_/test_value_counts.py,sha256=DqURabH7nRmJBxNij4np7Y5KSdMme1uh2RAlGbUQkfs,3108 +pandas/tests/indexes/datetimes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/datetimes/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_asof.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_date_range.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_datetime.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_datetimelike.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_delete.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_formats.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_freq_attr.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_join.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_map.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_misc.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_npfuncs.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_ops.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_partial_slicing.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_pickle.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_reindex.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_scalar_compat.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_setops.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_timezones.cpython-38.pyc,, +pandas/tests/indexes/datetimes/__pycache__/test_unique.cpython-38.pyc,, +pandas/tests/indexes/datetimes/methods/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/datetimes/methods/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/datetimes/methods/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/indexes/datetimes/methods/__pycache__/test_factorize.cpython-38.pyc,, +pandas/tests/indexes/datetimes/methods/__pycache__/test_fillna.cpython-38.pyc,, +pandas/tests/indexes/datetimes/methods/__pycache__/test_insert.cpython-38.pyc,, +pandas/tests/indexes/datetimes/methods/__pycache__/test_isocalendar.cpython-38.pyc,, +pandas/tests/indexes/datetimes/methods/__pycache__/test_repeat.cpython-38.pyc,, +pandas/tests/indexes/datetimes/methods/__pycache__/test_shift.cpython-38.pyc,, +pandas/tests/indexes/datetimes/methods/__pycache__/test_snap.cpython-38.pyc,, +pandas/tests/indexes/datetimes/methods/__pycache__/test_to_frame.cpython-38.pyc,, +pandas/tests/indexes/datetimes/methods/__pycache__/test_to_period.cpython-38.pyc,, +pandas/tests/indexes/datetimes/methods/__pycache__/test_to_series.cpython-38.pyc,, +pandas/tests/indexes/datetimes/methods/test_astype.py,sha256=2z5AWj4xg8SmHzAbuWit_-2YvzmpyHftcjN9ktnRPF4,11877 +pandas/tests/indexes/datetimes/methods/test_factorize.py,sha256=zifOBKB5_vVrBRcEKwxzjQHlh1Pqf6Djdy6E4UNAjqA,3650 +pandas/tests/indexes/datetimes/methods/test_fillna.py,sha256=eESnVTQ8J3iBL24bWKt7TmHxC5FJiLZMpKjw1V376qY,2004 +pandas/tests/indexes/datetimes/methods/test_insert.py,sha256=Xc4TJvuUxO872w62OBXxa-07N1InMub6IYUwyA-Gif4,9474 +pandas/tests/indexes/datetimes/methods/test_isocalendar.py,sha256=Fn46hJpQtTNew2ThFpjzQm_lQ1MacKq_TmMEvCGgaZg,674 +pandas/tests/indexes/datetimes/methods/test_repeat.py,sha256=oYwWHoEg8sPH5wn8WtIaxrZKr5vBttp4pBboN9Dm0tk,2397 +pandas/tests/indexes/datetimes/methods/test_shift.py,sha256=fv2MuUfGDZ3dr9nPUl9rd6bTLJXbA3BhXl-FRaspplY,5476 +pandas/tests/indexes/datetimes/methods/test_snap.py,sha256=EKbI_zP-AZsJI80hmuArjD7o-HorHIIOpvLCZpC3b6w,1195 +pandas/tests/indexes/datetimes/methods/test_to_frame.py,sha256=HTPLfWBXDgutMFm8zL1JA9quz_J2or-_QnKyJRIdx3Y,1150 +pandas/tests/indexes/datetimes/methods/test_to_period.py,sha256=mw7eVEnTwHU0ctTsDmXz-ZSk1-serTVMEsK4S4PGf_Q,6751 +pandas/tests/indexes/datetimes/methods/test_to_series.py,sha256=FzOEACb50yqSkabiMVEADrSn9FQktYVoYl9w0WQSxqA,1275 +pandas/tests/indexes/datetimes/test_asof.py,sha256=-fxHseqPYK14ugv7nc3x_WBHx8eY_UrhLosw4XIPRHM,751 +pandas/tests/indexes/datetimes/test_constructors.py,sha256=miHYkqSZdB4-iXNaoL3xXZiUjiKBqo3QPNnBXQUANUk,41705 +pandas/tests/indexes/datetimes/test_date_range.py,sha256=51skIMWX-uqgrs7sYDb64l1TJ41ZcWYk7YuW-HYGngM,39865 +pandas/tests/indexes/datetimes/test_datetime.py,sha256=LKB3X7iAoMi2f3JhV-lXxRqBhxzN04V9ZYjF1t-znxw,5703 +pandas/tests/indexes/datetimes/test_datetimelike.py,sha256=dojYWFsorAGyfnZcDYxaWm75h7Pk5N_iXgQuTCp64yQ,991 +pandas/tests/indexes/datetimes/test_delete.py,sha256=O1cbea-LEF4l8yHLLrNaLBI67KXrfKUvYlYzQ_4DGfo,4594 +pandas/tests/indexes/datetimes/test_formats.py,sha256=PJ8SAOqjWhuyMClebBjeYcm1-ey-ZVI08Rwbv-e5j8w,9408 +pandas/tests/indexes/datetimes/test_freq_attr.py,sha256=GbLRYe-E-Jraq7UVQrdnuwwUEHyy9vA_DQLK1cDvUz0,1732 +pandas/tests/indexes/datetimes/test_indexing.py,sha256=ZyRhnqlhUm2d_jAjtFfcFX8IMV80HvEtsGk4EEq3Bow,28271 +pandas/tests/indexes/datetimes/test_join.py,sha256=4vubcA78AELHUl4XM-vUo12yyF1dNwftTRzZUYY0eIA,4806 +pandas/tests/indexes/datetimes/test_map.py,sha256=JILLZ1zcVd7jXKYWrgek7CtymjbTaEQajLMfVwZBr4A,1370 +pandas/tests/indexes/datetimes/test_misc.py,sha256=D1UaUZII0eMkd_zLvtX7gyKvinj2iPLNxNOS9IX2YGs,12200 +pandas/tests/indexes/datetimes/test_npfuncs.py,sha256=cjjuxeekM2IUf-nx3WKVonrwNAuhZnVgQHNAXdhglog,384 +pandas/tests/indexes/datetimes/test_ops.py,sha256=ADlYknXyZpYXXnSe5LRlHjk3hGIVkfhZWgLeUUWdK4Y,2175 +pandas/tests/indexes/datetimes/test_partial_slicing.py,sha256=OFu76yaekO1AGl1bLGnPskJO8DTPX1pOHBw1nZ1KSjo,16423 +pandas/tests/indexes/datetimes/test_pickle.py,sha256=cpuQl8fsaqJhP4qroLU0LUQjqFQ0uaX3sHql2UYOSg4,1358 +pandas/tests/indexes/datetimes/test_reindex.py,sha256=s1pt3OlK_JdWcaHsxlsvSh34mqFsR4wrONAwFBo5yVw,2145 +pandas/tests/indexes/datetimes/test_scalar_compat.py,sha256=yEn2j4qufAelCcllVAp7D6XtUTOcUGOZubuGsA9XVk8,12675 +pandas/tests/indexes/datetimes/test_setops.py,sha256=2HmaXQgynYeCIEVvCp44B0zM-x7b3LK83U505B_T2nI,20640 +pandas/tests/indexes/datetimes/test_timezones.py,sha256=s8846zR9HY3ALAC7IdQu9lUndikx7xx7SndDX7bR7Kw,45061 +pandas/tests/indexes/datetimes/test_unique.py,sha256=drRXIZFi4WYjNzrJvqaD4nuoJdufsHpj3Tk-2StIruE,2065 +pandas/tests/indexes/interval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/interval/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/interval/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/indexes/interval/__pycache__/test_base.cpython-38.pyc,, +pandas/tests/indexes/interval/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/indexes/interval/__pycache__/test_equals.cpython-38.pyc,, +pandas/tests/indexes/interval/__pycache__/test_formats.cpython-38.pyc,, +pandas/tests/indexes/interval/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/indexes/interval/__pycache__/test_interval.cpython-38.pyc,, +pandas/tests/indexes/interval/__pycache__/test_interval_range.cpython-38.pyc,, +pandas/tests/indexes/interval/__pycache__/test_interval_tree.cpython-38.pyc,, +pandas/tests/indexes/interval/__pycache__/test_join.cpython-38.pyc,, +pandas/tests/indexes/interval/__pycache__/test_pickle.cpython-38.pyc,, +pandas/tests/indexes/interval/__pycache__/test_setops.cpython-38.pyc,, +pandas/tests/indexes/interval/test_astype.py,sha256=P6mpe5dmQJLHsMo0KsMaZ8ZBO-o1YOESyf_zahpZQyo,8571 +pandas/tests/indexes/interval/test_base.py,sha256=ziXak6DrsBFNOhjwkCcOK8-Rkymd8oAazJhBY07F5s4,2372 +pandas/tests/indexes/interval/test_constructors.py,sha256=4ZQURyTiX5TRy2rNIC-eSD7qeii9RprGysbVUoESebc,17207 +pandas/tests/indexes/interval/test_equals.py,sha256=a7GA_whLbOiS4WxUdtDrqKOUhsfqq3TL0nkhqPccuss,1226 +pandas/tests/indexes/interval/test_formats.py,sha256=jqe5OgfjEaQkgm1Vu69Lp0V4hDipxDZbUhGmFDfXrPM,3288 +pandas/tests/indexes/interval/test_indexing.py,sha256=4bJ0lNQyWNOL3xIysNjh9jp5KnM-tbI_Sb0WU5fB7N0,21860 +pandas/tests/indexes/interval/test_interval.py,sha256=pv4ocwhhzzpVdrl5yyodSd53pfQSLdWGdv3WsPlZ4pc,34296 +pandas/tests/indexes/interval/test_interval_range.py,sha256=xUVurv9UbC_boXzljUVkmjNdxVCeRYS33z_JyEhDB74,13249 +pandas/tests/indexes/interval/test_interval_tree.py,sha256=UaLTsyEBVdXkBQJEkD_4R2S5P3YEYye5HfnWblRvky8,7082 +pandas/tests/indexes/interval/test_join.py,sha256=HQJQLS9-RT7de6nBHsw50lBo4arBmXEVZhVMt4iuHyg,1148 +pandas/tests/indexes/interval/test_pickle.py,sha256=Jsmm_p3_qQpfJ9OqCpD3uLMzBkpsxufj1w6iUorYqmk,435 +pandas/tests/indexes/interval/test_setops.py,sha256=Bxr__XGHJyfpOZFZeXkcT95Bw-5qk_pNB6aq8vfUU6M,8118 +pandas/tests/indexes/multi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/multi/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_analytics.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_compat.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_conversion.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_copy.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_drop.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_duplicates.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_equivalence.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_formats.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_get_level_values.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_get_set.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_integrity.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_isin.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_join.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_lexsort.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_missing.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_monotonic.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_names.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_partial_indexing.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_pickle.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_reindex.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_reshape.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_setops.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_sorting.cpython-38.pyc,, +pandas/tests/indexes/multi/__pycache__/test_take.cpython-38.pyc,, +pandas/tests/indexes/multi/conftest.py,sha256=k0CvyqV7NkDjdB5nLqTzBFRyRH7QmORFSWB1dS0Lwo8,2156 +pandas/tests/indexes/multi/test_analytics.py,sha256=HBhD9wSeJ9ZSmQQHrzSlihOpp1Tr74r_T_WpsubTbp0,6609 +pandas/tests/indexes/multi/test_astype.py,sha256=YmTnPF6qXwvYY82wZfQ8XFwVwOYYsIls3LSrdADDW-4,924 +pandas/tests/indexes/multi/test_compat.py,sha256=7oz0tTWCVkzibvxCvvGV9KMwdt_3pkFkrlu6oCof7Ts,3023 +pandas/tests/indexes/multi/test_constructors.py,sha256=xlMZhwro65DZdmuFysNb5VYcGw8jSVbYWgIAXGKlguY,25751 +pandas/tests/indexes/multi/test_conversion.py,sha256=3ehsK3snfKBLLq0qhEpB1kVdAQj2BS21GVAQeCc5yk0,4193 +pandas/tests/indexes/multi/test_copy.py,sha256=RhYrwYiIE53I-3P-1bvns9yus5oNg76qTKbLTfsQk3g,2798 +pandas/tests/indexes/multi/test_drop.py,sha256=GXKvsJ8uCDLA21ELlPsSkfdvDSeEfAQmxBFG59b2T8s,6092 +pandas/tests/indexes/multi/test_duplicates.py,sha256=Ze8SFwbLtFJaqHk_30jtHsMBuRjUg15NBOVZ5oA3qk8,11048 +pandas/tests/indexes/multi/test_equivalence.py,sha256=app6woQFUZOHBQWDsu-8tig9VvNJALg_UJHuQtYOZys,8881 +pandas/tests/indexes/multi/test_formats.py,sha256=Y63dQuzcurOm92lA52HrJaMRy8ELLv7ldfXi9qbf5b0,8480 +pandas/tests/indexes/multi/test_get_level_values.py,sha256=5hs44GQLjBHDltW1Sywr66w2MN7aLa-O8fF-5EZ6_Sk,3596 +pandas/tests/indexes/multi/test_get_set.py,sha256=ZxBdNG7qsB-nuER2H98TnaVpMohRm0ONSsnWMmHMN3A,16974 +pandas/tests/indexes/multi/test_indexing.py,sha256=t_-mbQMIuC2PYskSJyVpPVQl8LpcKsYuIdTw7Xrw-UA,32003 +pandas/tests/indexes/multi/test_integrity.py,sha256=uaEWwIWVVlLWtKjOFBYY0WkZ-ceJRdemyFFZnbZk4eU,8554 +pandas/tests/indexes/multi/test_isin.py,sha256=OQJsb-GOjjohnrPMTT79W84SgI5dyUgeX31M3gJZ1YI,2726 +pandas/tests/indexes/multi/test_join.py,sha256=OIBpdEIckK7zGw8-kj3Fn-KsmB7EQG2u9rf8x4MDgn4,4948 +pandas/tests/indexes/multi/test_lexsort.py,sha256=BoLZ9d58ec6SDAZN2lemMBrhobFWM22LhmhlTgXjTCI,1775 +pandas/tests/indexes/multi/test_missing.py,sha256=oxtBiwN7askAsRHYjT2Z3L1pde5sWk_t7fgJ54BBOw0,3349 +pandas/tests/indexes/multi/test_monotonic.py,sha256=WAKZYcYq-F1hLChr3T1Jdl8aBD0fyqOqcfSNOWI18tk,6842 +pandas/tests/indexes/multi/test_names.py,sha256=-49hYtcg98EzL0hPjnw_Ckz54-Qh6SG-SXmI2h9lxwE,6770 +pandas/tests/indexes/multi/test_partial_indexing.py,sha256=5nR6tybKW-LJ7oBePNyFJdoWYjGw5BW2rixkYKA1cy0,4768 +pandas/tests/indexes/multi/test_pickle.py,sha256=ZJVZo0DcXDtV6BAUuPAKbwMV8aGfazJLU7Lw6lRmBcw,259 +pandas/tests/indexes/multi/test_reindex.py,sha256=Fk4LcXdbSUnZF0mI1TVqCgxMwG7MhLqA9h9YWmLWr7A,5575 +pandas/tests/indexes/multi/test_reshape.py,sha256=WK0BgD1u6-I-yd9zXl5uq6bOMt3m5seOGOHR514C7zA,5069 +pandas/tests/indexes/multi/test_setops.py,sha256=UAlupnTAEqmrREPpXrpAPgFZSQal9bFuHGOCvQOXPGc,17007 +pandas/tests/indexes/multi/test_sorting.py,sha256=47WKGgwGhG49--9TazoSPzXbSQVfrKfsc0rchmSay-U,8570 +pandas/tests/indexes/multi/test_take.py,sha256=gr4gf2ceh3hf8bcRVVwY2Gz_apjyAtpXa4UM8TFq0TE,2501 +pandas/tests/indexes/numeric/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/numeric/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/numeric/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/indexes/numeric/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/indexes/numeric/__pycache__/test_join.cpython-38.pyc,, +pandas/tests/indexes/numeric/__pycache__/test_numeric.cpython-38.pyc,, +pandas/tests/indexes/numeric/__pycache__/test_setops.cpython-38.pyc,, +pandas/tests/indexes/numeric/test_astype.py,sha256=tPSEuDMw-BI97J_WDXWpo-hUUJuQChAoU5cjaweRoko,2991 +pandas/tests/indexes/numeric/test_indexing.py,sha256=GsQfG7LqO7iWFf2uPi9pnukzvIA3Ku7zExzRfWukFUs,22521 +pandas/tests/indexes/numeric/test_join.py,sha256=IuIEF7A2m66RZ6zBeerTUUQ8GiC-5k-uto1VpYUxHgc,14793 +pandas/tests/indexes/numeric/test_numeric.py,sha256=W7yfBzreWsGKAEen0ZQgj99xIu8hzkSb3w_EMhmOlDk,22557 +pandas/tests/indexes/numeric/test_setops.py,sha256=SfkNroqvfqVNqSIW73SG8tgzyN4D-uVtBn32UISWQKo,5673 +pandas/tests/indexes/object/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/object/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/object/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/indexes/object/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/indexes/object/test_astype.py,sha256=yk1A1xAIG7wPIq0-6321wndAZils0odbXmvu4TLFLFc,317 +pandas/tests/indexes/object/test_indexing.py,sha256=GlwzPzhb2TgC5ILA6NxQYVYFDhKZovVKkZK6ATfdWE0,8097 +pandas/tests/indexes/period/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/period/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_formats.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_freq_attr.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_join.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_monotonic.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_partial_slicing.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_period.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_period_range.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_pickle.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_resolution.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_scalar_compat.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_searchsorted.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_setops.cpython-38.pyc,, +pandas/tests/indexes/period/__pycache__/test_tools.cpython-38.pyc,, +pandas/tests/indexes/period/methods/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/period/methods/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/period/methods/__pycache__/test_asfreq.cpython-38.pyc,, +pandas/tests/indexes/period/methods/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/indexes/period/methods/__pycache__/test_factorize.cpython-38.pyc,, +pandas/tests/indexes/period/methods/__pycache__/test_fillna.cpython-38.pyc,, +pandas/tests/indexes/period/methods/__pycache__/test_insert.cpython-38.pyc,, +pandas/tests/indexes/period/methods/__pycache__/test_is_full.cpython-38.pyc,, +pandas/tests/indexes/period/methods/__pycache__/test_repeat.cpython-38.pyc,, +pandas/tests/indexes/period/methods/__pycache__/test_shift.cpython-38.pyc,, +pandas/tests/indexes/period/methods/__pycache__/test_to_timestamp.cpython-38.pyc,, +pandas/tests/indexes/period/methods/test_asfreq.py,sha256=14T9oZNDGVLX6n8DXkDCHdaScbSnpOYAvRQk8QRymdQ,5445 +pandas/tests/indexes/period/methods/test_astype.py,sha256=CKRvIVmz0gCoM1XFFWYRMizrWHD2wcc4lOo_7AF9Oiw,6560 +pandas/tests/indexes/period/methods/test_factorize.py,sha256=wqktTsRc04u0hgUxTzN5TCxoaVKMzd6xk5A6q1fw-DQ,1746 +pandas/tests/indexes/period/methods/test_fillna.py,sha256=BsNanStMuVV5T8S4tPNC7BJSExKOY2zmTws45qTkBGE,1125 +pandas/tests/indexes/period/methods/test_insert.py,sha256=JT9lBhbF90m2zRgIwarhPqPtVbrvkLiihZxO-4WHvTU,482 +pandas/tests/indexes/period/methods/test_is_full.py,sha256=hQgnnd22PyTFp68XVlsfcARnC-wzrkYJ3ejjdTGRQM4,570 +pandas/tests/indexes/period/methods/test_repeat.py,sha256=1Nwn-ePYBEXWY4N9pFdHaqcZoKhWuinKdFJ-EjZtFlY,772 +pandas/tests/indexes/period/methods/test_shift.py,sha256=E71NmVZzq5ydpiFRsWKoxth9A9KOH-nULX-53N6shOs,4411 +pandas/tests/indexes/period/methods/test_to_timestamp.py,sha256=ptaAkDJ3dDMzUk32Y5rk7z6PvJHxghcxuHwiDBOwRfw,4667 +pandas/tests/indexes/period/test_constructors.py,sha256=knsD32MAsB-iLvtJfsR_LrPLsNhkl_9NTX0gnfpbzIQ,20479 +pandas/tests/indexes/period/test_formats.py,sha256=noe6JwN9tpC63DJFN2Yv2sn5xPRjLVkwkegsrmhi0XQ,6587 +pandas/tests/indexes/period/test_freq_attr.py,sha256=osBKUYOZIGhdFY80DXsEWPsT1dKhyCuxPmYsQBQMMvE,477 +pandas/tests/indexes/period/test_indexing.py,sha256=gHLr2nNeKyqea0In9M7Xk8JZSehxaT0SfxL8FXDv1ts,32453 +pandas/tests/indexes/period/test_join.py,sha256=h4iFrQ5ifIB6WU0u4PXXopNdOcCOd3TdDuyXoXkDU14,1820 +pandas/tests/indexes/period/test_monotonic.py,sha256=9Sb4WOykj99hn3MQOfm_MqYRxO5kADZt6OuakhSukp4,1258 +pandas/tests/indexes/period/test_partial_slicing.py,sha256=KS6n8lWiQ3ZB33ctc_4XGwAMT4z7capoKmjn178rkzQ,7384 +pandas/tests/indexes/period/test_period.py,sha256=dFLgi8DrAGEm7oIQCE_Q_0ja9jRme0gBDIeUOgKGLpY,11677 +pandas/tests/indexes/period/test_period_range.py,sha256=6t7JYdgPAAEY6Q3VaEne4eEVagVRSkF2u4gbyzv7frM,4259 +pandas/tests/indexes/period/test_pickle.py,sha256=YD8TfXhgHGxihSshKGs2PghFdtKL7igL05b7ZlHISbU,692 +pandas/tests/indexes/period/test_resolution.py,sha256=bTh8yDI26eG73SVQH1exf9TA5Vt4XiWu70f3fb8i2L4,567 +pandas/tests/indexes/period/test_scalar_compat.py,sha256=KFrPgFM24rpvHocDoAnZjEu8pZdS8RDHWlmv6jFZZ8w,1140 +pandas/tests/indexes/period/test_searchsorted.py,sha256=AF1ruU22wQWnDiDEjKD_lg6en9cJRQbky9Z6z-3QZCM,2604 +pandas/tests/indexes/period/test_setops.py,sha256=7qOhfoOLPfavnvMoeguJhu7H4f8LYY5Xog5ie5lr0SM,12359 +pandas/tests/indexes/period/test_tools.py,sha256=cHIp_pefWsfbdBRW3mBjL9pZ5EhaF95kdvT-IWia1NY,1023 +pandas/tests/indexes/ranges/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/ranges/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/ranges/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/indexes/ranges/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/indexes/ranges/__pycache__/test_join.cpython-38.pyc,, +pandas/tests/indexes/ranges/__pycache__/test_range.cpython-38.pyc,, +pandas/tests/indexes/ranges/__pycache__/test_setops.cpython-38.pyc,, +pandas/tests/indexes/ranges/test_constructors.py,sha256=5hSb0AVntQGgJkik8FTd2ZaoqDyl4uetyBTZB1OWHSE,5414 +pandas/tests/indexes/ranges/test_indexing.py,sha256=pKLrd9zPnXdTw8dw6A0TAgZ8Da6SufTaTTqfTO6U1OU,3446 +pandas/tests/indexes/ranges/test_join.py,sha256=EqOd_q38_IiDe8CbXE9eVbtWocEBUY5HxjJVR4l8O20,6161 +pandas/tests/indexes/ranges/test_range.py,sha256=nMAIW5xj0ecnLBYkpsWNzN99fLZT1er5tgjZ6rjZ9A4,20120 +pandas/tests/indexes/ranges/test_setops.py,sha256=CQXmanxA251oitr_L0etWTgBq6Zi-O2m2-3rdhh1ag0,17431 +pandas/tests/indexes/test_any_index.py,sha256=HWwfEqnc-BCe_8AtOHOjF2GZBKLTr0wN5OWu7ftL_DM,5563 +pandas/tests/indexes/test_base.py,sha256=DaF63Kz8d0NnmdeWeg198gHMei0oW4AC0GE-NBEFnqg,55053 +pandas/tests/indexes/test_common.py,sha256=7vxZ1XGnIOMU8It7D80WBOU50_H2eU2sq0vIu5c3syU,16735 +pandas/tests/indexes/test_engines.py,sha256=jRr76XUGfXZlaFjRC5IM6h_QbnojsNOegMBfg_COamg,6698 +pandas/tests/indexes/test_frozen.py,sha256=dKwa03hmwCwtwCR7UeI42OQTVX2bkdRzNr6fqaDW4-A,3069 +pandas/tests/indexes/test_index_new.py,sha256=EVPIqBdaPceJifPaVF3ZJDpCUpfqOfvBeJ3Y6T496Gs,12559 +pandas/tests/indexes/test_indexing.py,sha256=VRd1J6wCcKTSAZDwzBrIya0Id-bHSipdT4uK6Ka-y5Y,11226 +pandas/tests/indexes/test_numpy_compat.py,sha256=UBAZmHFB5sCIS_GnpTCJQrl6kpvt1k8gFuM1bAqeI-g,4289 +pandas/tests/indexes/test_setops.py,sha256=3ukpZTeQedA3NIpgx3YRKdn5GmyYCWQdS5-DKjbkzfw,27400 +pandas/tests/indexes/timedeltas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/timedeltas/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/__pycache__/test_delete.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/__pycache__/test_formats.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/__pycache__/test_freq_attr.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/__pycache__/test_join.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/__pycache__/test_ops.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/__pycache__/test_pickle.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/__pycache__/test_scalar_compat.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/__pycache__/test_searchsorted.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/__pycache__/test_setops.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/__pycache__/test_timedelta.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/__pycache__/test_timedelta_range.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/methods/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexes/timedeltas/methods/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/methods/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/methods/__pycache__/test_factorize.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/methods/__pycache__/test_fillna.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/methods/__pycache__/test_insert.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/methods/__pycache__/test_repeat.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/methods/__pycache__/test_shift.cpython-38.pyc,, +pandas/tests/indexes/timedeltas/methods/test_astype.py,sha256=wcI9qCAefF3FatvrYddAYH5jqHhpWkgrOYoBnRgkf5Q,4155 +pandas/tests/indexes/timedeltas/methods/test_factorize.py,sha256=aqhhwRKZvfGxa3v09X5vZ7uBup8n5OjaUadfJpV6FoI,1292 +pandas/tests/indexes/timedeltas/methods/test_fillna.py,sha256=F7fBoEG-mnu16ypWYmK5wbIovQJKL0h86C1MzGkhPoE,597 +pandas/tests/indexes/timedeltas/methods/test_insert.py,sha256=otM2JzsPVcYd6pKHZf11DoP9hpG_fYBPmkIhabkNsnY,4714 +pandas/tests/indexes/timedeltas/methods/test_repeat.py,sha256=vPcNBkY4H2RxsykW1bjTg-FSlTlQ2H1yLb-ZsYffsEg,926 +pandas/tests/indexes/timedeltas/methods/test_shift.py,sha256=CmuWUFPcF58DWcCC6FhkxOAMyWjAjUCmKVeGMNk_A6Q,2751 +pandas/tests/indexes/timedeltas/test_constructors.py,sha256=Tft7PkVXe2omZtYjj8XhAXvzTRgEJjbXvlKb0X0WZts,9519 +pandas/tests/indexes/timedeltas/test_delete.py,sha256=-5uYhDUCD55zv5I3Z8aVFEBzdChSWtbPNSP05nqUEiA,2398 +pandas/tests/indexes/timedeltas/test_formats.py,sha256=3U2kMrD4Jmhrj8u5pkxM6yRlptxenAZ3vBBPD9GQFL4,3293 +pandas/tests/indexes/timedeltas/test_freq_attr.py,sha256=RvQK4DnQry93RvsGKq-2EqMOe-5uzuxaH-wERgoqCPM,1824 +pandas/tests/indexes/timedeltas/test_indexing.py,sha256=_08MI6XCMnHAEKA4V_TkM4t8URDuGkopDS4OrogVNTc,12375 +pandas/tests/indexes/timedeltas/test_join.py,sha256=GKTVWVqmG4P_0WbS1YA3FHlwrZLM97tjMwwPy6uDUa0,1514 +pandas/tests/indexes/timedeltas/test_ops.py,sha256=nfGyNJvNy7_jmWebKjevLKhyAMNvI5jytkZTNlpEC-g,393 +pandas/tests/indexes/timedeltas/test_pickle.py,sha256=QesBThE22Ba17eUdG21lWNqPRvBhyupLnPsXueLazHw,302 +pandas/tests/indexes/timedeltas/test_scalar_compat.py,sha256=Zi3lgXWRGy_6UYQjEcNdkt1NDW4quvkBl7IP_O2NQ_I,4512 +pandas/tests/indexes/timedeltas/test_searchsorted.py,sha256=kCE0PkuPk1CxkZHODe3aZ54V-Hc1AiHkyNNVjN5REIM,967 +pandas/tests/indexes/timedeltas/test_setops.py,sha256=AXLfBLdlIg1vKPoOYhO27BnrAV5TFvVSnZRBNtUiuUQ,9521 +pandas/tests/indexes/timedeltas/test_timedelta.py,sha256=nQD50m85tkfyUYZiSADOiqmiqxF3Z84vVpVKpI1jfDI,4519 +pandas/tests/indexes/timedeltas/test_timedelta_range.py,sha256=BVg1H4EpYv1ZUHb2luJDU5HXwNJqrBNBFHGDFio1kM4,3288 +pandas/tests/indexing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexing/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/common.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_at.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_categorical.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_chaining_and_caching.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_check_indexer.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_coercion.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_datetime.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_floats.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_iat.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_iloc.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_indexers.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_loc.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_na_indexing.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_partial.cpython-38.pyc,, +pandas/tests/indexing/__pycache__/test_scalar.cpython-38.pyc,, +pandas/tests/indexing/common.py,sha256=oYeeK6GLvxLBH80c4bGuNdNNTc9PPUzdDISgb6X4yDM,5298 +pandas/tests/indexing/interval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexing/interval/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexing/interval/__pycache__/test_interval.cpython-38.pyc,, +pandas/tests/indexing/interval/__pycache__/test_interval_new.cpython-38.pyc,, +pandas/tests/indexing/interval/test_interval.py,sha256=ascX0OUwi24O8X6xKVsh8JSu9zYp9KTQoS7GJ8I4540,5941 +pandas/tests/indexing/interval/test_interval_new.py,sha256=MMb1By7LEN0q8lWGSJEoYdInLKOW2o6Ww9QRsdyTvkU,7296 +pandas/tests/indexing/multiindex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/indexing/multiindex/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/indexing/multiindex/__pycache__/test_chaining_and_caching.cpython-38.pyc,, +pandas/tests/indexing/multiindex/__pycache__/test_datetime.cpython-38.pyc,, +pandas/tests/indexing/multiindex/__pycache__/test_getitem.cpython-38.pyc,, +pandas/tests/indexing/multiindex/__pycache__/test_iloc.cpython-38.pyc,, +pandas/tests/indexing/multiindex/__pycache__/test_indexing_slow.cpython-38.pyc,, +pandas/tests/indexing/multiindex/__pycache__/test_loc.cpython-38.pyc,, +pandas/tests/indexing/multiindex/__pycache__/test_multiindex.cpython-38.pyc,, +pandas/tests/indexing/multiindex/__pycache__/test_partial.cpython-38.pyc,, +pandas/tests/indexing/multiindex/__pycache__/test_setitem.cpython-38.pyc,, +pandas/tests/indexing/multiindex/__pycache__/test_slice.cpython-38.pyc,, +pandas/tests/indexing/multiindex/__pycache__/test_sorted.cpython-38.pyc,, +pandas/tests/indexing/multiindex/test_chaining_and_caching.py,sha256=KdOBxoMhp030-Kcz7hgBoSN9qoLafSlDJO_5xDDGthA,2138 +pandas/tests/indexing/multiindex/test_datetime.py,sha256=cditJuDyVUhpC6mCnjTfOWYoo9tbsXsG-LlTo_ngSGU,1209 +pandas/tests/indexing/multiindex/test_getitem.py,sha256=di1r6rv0RNb3g_F8iqOm-g8rM81x-cNk42YsHYp3bIU,12609 +pandas/tests/indexing/multiindex/test_iloc.py,sha256=saCNMV4Y5od81B7_hHVSEqMZ3gSsg16z4CSr8p81Sh8,4837 +pandas/tests/indexing/multiindex/test_indexing_slow.py,sha256=oPW7i8xpohgox8NSK97vEhqp-kyAW5Zj2GfUaFXMfxA,2864 +pandas/tests/indexing/multiindex/test_loc.py,sha256=e5Vl48wdT1GfEWJqU-r_KaJG6OEyO-m_2XLaI1-AQFw,31007 +pandas/tests/indexing/multiindex/test_multiindex.py,sha256=3GYwNz4GhYYGZfYkEBqAuV91BEGcpV4N2ku4r_X_d-s,4734 +pandas/tests/indexing/multiindex/test_partial.py,sha256=UCWMulrPZAZAObh8ZPWLtjk0Vd-DQdZc8oan4wJM-L0,8469 +pandas/tests/indexing/multiindex/test_setitem.py,sha256=t0vL7II0hMtxbYviap6brl8iDyBaSUwimwkJvlv2qhs,16919 +pandas/tests/indexing/multiindex/test_slice.py,sha256=JlM11V4CnYrklcERdNfcKi6Ql6GmoIM-KWq7yL4iA5U,25749 +pandas/tests/indexing/multiindex/test_sorted.py,sha256=ET64-9Ys0442mmarRCy5dztKL14x2MsTnICcTjpwEXs,4461 +pandas/tests/indexing/test_at.py,sha256=xzvdvI3JhPFxbn98y3N4tO5hbV1EP03CTJ6mmSh73uo,5376 +pandas/tests/indexing/test_categorical.py,sha256=i3BneFs9zAxXEws2XOaUWsfhcGyrMjYp4C73jHaPrw4,19185 +pandas/tests/indexing/test_chaining_and_caching.py,sha256=QHcv-HwhRmMaEfeHuDszDyNWZK23KlGUmbjYppkOU_M,17013 +pandas/tests/indexing/test_check_indexer.py,sha256=tfr2a1h6uokN2MJDE7TKiZ0iRaHvfSWPPC-86RqaaDU,3159 +pandas/tests/indexing/test_coercion.py,sha256=jdLihFyer1Ym8hkDRBx9TNoOV82LrBmmncWfXHG18nw,41485 +pandas/tests/indexing/test_datetime.py,sha256=ewW0cmFv4qII-kzLCT6HOKvTxXc0tQWgldpWuIv1Ki8,4578 +pandas/tests/indexing/test_floats.py,sha256=T43phBG69Oczx6r4h5wg4iOonE32zXgxmuzkaTNJAU8,19854 +pandas/tests/indexing/test_iat.py,sha256=rH9rYbbm-dnz3Xxna28FmyHFfoXCoSDIFVMKXFYuIVc,1244 +pandas/tests/indexing/test_iloc.py,sha256=lpwitvbcH993D2GOINpi9SqiPvpjPGKeos523nU1vhQ,48257 +pandas/tests/indexing/test_indexers.py,sha256=agN_MCo403fOvqapKi_WYQli9AkDFAk4TDB5XpbJ8js,1661 +pandas/tests/indexing/test_indexing.py,sha256=ccs2wz1LDTWFJJEnZ2JcaFcTWJF4XNvgF4ZVEm4BZYE,33248 +pandas/tests/indexing/test_loc.py,sha256=r250sSbTlqHm2ZM0HeVc51vzs-ATA_76nh1SMPB536Q,103721 +pandas/tests/indexing/test_na_indexing.py,sha256=OEUcIBMeKZq2cwvSkk-m6meKpOHnKgrS3pPUYw0R4rQ,2309 +pandas/tests/indexing/test_partial.py,sha256=Ksj0PqvNL5rfOOKwu5_0JZSNK9-kpifJIVRPunLrOBA,23569 +pandas/tests/indexing/test_scalar.py,sha256=HNNxrZMPsvBpsL3AdvRBrMwGxUBDFRBeepAh8iZqVh0,10370 +pandas/tests/internals/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/internals/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/internals/__pycache__/test_api.cpython-38.pyc,, +pandas/tests/internals/__pycache__/test_internals.cpython-38.pyc,, +pandas/tests/internals/__pycache__/test_managers.cpython-38.pyc,, +pandas/tests/internals/test_api.py,sha256=F483y53JhDojBuTkeq0BkQFnzc-k_tVIdza4AhoDSoA,1231 +pandas/tests/internals/test_internals.py,sha256=thY2Ya90qadGKm5pqm3E3IY94BgqxkL6xEAgW3kDCsA,48653 +pandas/tests/internals/test_managers.py,sha256=qb2taX-5YUvM7r7_pseSpIwAJ3g1BJaLwAG1fhGFbz8,2527 +pandas/tests/io/__init__.py,sha256=Q7gL-8SOTRq5t-qKAOtlCiI5f1jYKWA9zA-7gC112Gc,852 +pandas/tests/io/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/io/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/io/__pycache__/generate_legacy_storage_files.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_clipboard.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_common.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_compression.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_date_converters.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_feather.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_fsspec.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_gcs.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_html.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_orc.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_parquet.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_pickle.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_s3.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_spss.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_sql.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_stata.cpython-38.pyc,, +pandas/tests/io/__pycache__/test_user_agent.cpython-38.pyc,, +pandas/tests/io/conftest.py,sha256=CdJVQS-RofaQAC5sf8-Zvk2CuHuVOYsNoDpLy4JhsLE,6221 +pandas/tests/io/data/fixed_width/fixed_width_format.txt,sha256=KRJHKQdxeXV4qyPuepdgcsB1A9ZFRLxrXzqMG07-EqA,30 +pandas/tests/io/data/gbq_fake_job.txt,sha256=kyaUKMqaSigZ2_8RqqoMaVodYWXafyzni8Z9TSiUADQ,904 +pandas/tests/io/data/legacy_pickle/1.2.4/empty_frame_v1_2_4-GH#42345.pkl,sha256=9VK5EuxJqzAoBgxXtuHCsIcX9ZcQ_sw2PlBsthlfprI,501 +pandas/tests/io/data/parquet/simple.parquet,sha256=_jxeNalGZ6ttpgdvptsLV86ZEEcQjQAFGNSoJR-eX3k,2157 +pandas/tests/io/data/pickle/test_mi_py27.pkl,sha256=KkWb_MQ667aei_mn4Yo6ThrZJstzuM6dumi1PcgRCb0,1395 +pandas/tests/io/data/pickle/test_py27.pkl,sha256=Ok1FYmLF48aHtc8fZlbNctCETzsNvo8ApjxICEcYWEs,943 +pandas/tests/io/data/xml/baby_names.xml,sha256=flfZ-HcZ5QO6DvaaFA2e7XJB1desEHryQFOBmurC4S0,1108 +pandas/tests/io/data/xml/books.xml,sha256=0N4zdS5Ia80hq5gI9OsuwAdM1p7QccVD6JzlcHD_ix8,554 +pandas/tests/io/data/xml/cta_rail_lines.kml,sha256=uEyVTvx-pjTpVruEbwRcnrU4x3bJK0rprBUtgLIVc_w,12034 +pandas/tests/io/data/xml/flatten_doc.xsl,sha256=0tJpGr-l7NVt8YkTaKxxSp3GSCFZSsEUNGscmngaPv8,651 +pandas/tests/io/data/xml/row_field_output.xsl,sha256=rReuvAygnO43bkyw7nUDw7xuxG4eXYaZ8q3D_BJ1IGQ,545 +pandas/tests/io/excel/__init__.py,sha256=iC_AlOgQPIJGWT1QHgfZWoUkJhNynrQbpb-9Xtady68,923 +pandas/tests/io/excel/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/io/excel/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/io/excel/__pycache__/test_odf.cpython-38.pyc,, +pandas/tests/io/excel/__pycache__/test_odswriter.cpython-38.pyc,, +pandas/tests/io/excel/__pycache__/test_openpyxl.cpython-38.pyc,, +pandas/tests/io/excel/__pycache__/test_readers.cpython-38.pyc,, +pandas/tests/io/excel/__pycache__/test_style.cpython-38.pyc,, +pandas/tests/io/excel/__pycache__/test_writers.cpython-38.pyc,, +pandas/tests/io/excel/__pycache__/test_xlrd.cpython-38.pyc,, +pandas/tests/io/excel/__pycache__/test_xlsxwriter.cpython-38.pyc,, +pandas/tests/io/excel/__pycache__/test_xlwt.cpython-38.pyc,, +pandas/tests/io/excel/conftest.py,sha256=l83aZZj5ivLoF2obhREvXAILOqednHJL4B46WjefZqM,1471 +pandas/tests/io/excel/test_odf.py,sha256=rCjIvxjTwoAg840U6r2sniIN5QtKCuYIfGXpKHj4SNA,1102 +pandas/tests/io/excel/test_odswriter.py,sha256=6QLhlR5jz9KLZXF2dPXDgvNtfEyJyL63yB31NxD4CdE,1750 +pandas/tests/io/excel/test_openpyxl.py,sha256=k_XX9XQDccmkbu94mTZQjICV4eMZBu38JHIovqfjiVI,13206 +pandas/tests/io/excel/test_readers.py,sha256=R33zy7kiZZwI_PwDjuAfc4xUd3HSVFxOXfDfQnseMrc,57143 +pandas/tests/io/excel/test_style.py,sha256=6aVhCFZsMPPzSzPre-bZEnFjXd9LQggi62076wl9SKw,6359 +pandas/tests/io/excel/test_writers.py,sha256=HSooe_q-hiEu7_O7OpDh8a4yXZyqbS6WGkQoPLEowYo,48296 +pandas/tests/io/excel/test_xlrd.py,sha256=606iRkjlvmjI0iXCcQP9rurSHh9tKiqEBQnUFoGSkFA,3276 +pandas/tests/io/excel/test_xlsxwriter.py,sha256=OiRbsD93ydROr_3xZ1zS0DitauEz-ZtfYCA6tzkmy4M,2887 +pandas/tests/io/excel/test_xlwt.py,sha256=1XleqSB34rzJVLGWRlvlnKnDuBcZ8aKGLZIyvIn8OxI,4131 +pandas/tests/io/formats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/io/formats/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/io/formats/__pycache__/test_console.cpython-38.pyc,, +pandas/tests/io/formats/__pycache__/test_css.cpython-38.pyc,, +pandas/tests/io/formats/__pycache__/test_eng_formatting.cpython-38.pyc,, +pandas/tests/io/formats/__pycache__/test_format.cpython-38.pyc,, +pandas/tests/io/formats/__pycache__/test_info.cpython-38.pyc,, +pandas/tests/io/formats/__pycache__/test_printing.cpython-38.pyc,, +pandas/tests/io/formats/__pycache__/test_series_info.cpython-38.pyc,, +pandas/tests/io/formats/__pycache__/test_to_csv.cpython-38.pyc,, +pandas/tests/io/formats/__pycache__/test_to_excel.cpython-38.pyc,, +pandas/tests/io/formats/__pycache__/test_to_html.cpython-38.pyc,, +pandas/tests/io/formats/__pycache__/test_to_latex.cpython-38.pyc,, +pandas/tests/io/formats/__pycache__/test_to_markdown.cpython-38.pyc,, +pandas/tests/io/formats/__pycache__/test_to_string.cpython-38.pyc,, +pandas/tests/io/formats/style/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/io/formats/style/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/io/formats/style/__pycache__/test_bar.cpython-38.pyc,, +pandas/tests/io/formats/style/__pycache__/test_deprecated.cpython-38.pyc,, +pandas/tests/io/formats/style/__pycache__/test_format.cpython-38.pyc,, +pandas/tests/io/formats/style/__pycache__/test_highlight.cpython-38.pyc,, +pandas/tests/io/formats/style/__pycache__/test_html.cpython-38.pyc,, +pandas/tests/io/formats/style/__pycache__/test_matplotlib.cpython-38.pyc,, +pandas/tests/io/formats/style/__pycache__/test_non_unique.cpython-38.pyc,, +pandas/tests/io/formats/style/__pycache__/test_style.cpython-38.pyc,, +pandas/tests/io/formats/style/__pycache__/test_to_latex.cpython-38.pyc,, +pandas/tests/io/formats/style/__pycache__/test_tooltip.cpython-38.pyc,, +pandas/tests/io/formats/style/test_bar.py,sha256=5aq5hdiEJ2_I48VhXaMP5g6BoEsg80iCs01J_-b8o7g,10281 +pandas/tests/io/formats/style/test_deprecated.py,sha256=QCCFKToLMphwz6foex-mtIjz0IdfWuaKbvCxooe3its,4300 +pandas/tests/io/formats/style/test_format.py,sha256=jq-OH0z7LNXle0xZkaSTUiNimW1iNtIF0yZeO4VP3II,16669 +pandas/tests/io/formats/style/test_highlight.py,sha256=Zbgtv47UYNUCXNYmrsj3UYSwmPM9KiDTKaFuqKMwQn8,7102 +pandas/tests/io/formats/style/test_html.py,sha256=EhiyXtQXTdbTXI8TSvITCb1wSfq3L0sH3-wxDq5-Bmw,26595 +pandas/tests/io/formats/style/test_matplotlib.py,sha256=rfLi3ivt3M7vlXlmEIjEGyG2223JkjeeaMDKt08kY9o,10159 +pandas/tests/io/formats/style/test_non_unique.py,sha256=0uSH82nnKGIowLrMya__0Dqd9rLgQrNCjDSUCbwOdJY,4381 +pandas/tests/io/formats/style/test_style.py,sha256=4W_SX9MzpuiYT-MAF5IJy79PCO_YBntaqE8hUgHTIIQ,57593 +pandas/tests/io/formats/style/test_to_latex.py,sha256=d5ITo7qi5YjTsxKIyImVGnwQ5B-_Q62Dgz821ggwJt0,30397 +pandas/tests/io/formats/style/test_tooltip.py,sha256=GMqwXrXi9Ppp0khfZHEwgeRqahwju5U2iIhZan3ndZE,2899 +pandas/tests/io/formats/test_console.py,sha256=vUzGFCUKvGw8bbjoChajelipFHXnZebET_usGXw1TdY,2427 +pandas/tests/io/formats/test_css.py,sha256=soqIB2Pv34lSPUL4g5SjiM1HdMc6yTp5JQI1-GHiTmw,6703 +pandas/tests/io/formats/test_eng_formatting.py,sha256=42f-v-IVN6Toj21CQGre-pOrDk4KZeCmj0qqa9htJeY,8341 +pandas/tests/io/formats/test_format.py,sha256=2DkQaPE6254xpAQsP5iwUnBE2nSKcNlLWBAfMf_U0LE,119201 +pandas/tests/io/formats/test_info.py,sha256=cajG0eYoIZHEoQhNjCk0QPkniUEZS9f9vsboHsjWOco,14648 +pandas/tests/io/formats/test_printing.py,sha256=3_VNse1MNpQZspyPD33JsgjNCGNNpdsSzAVwYyLnQoc,6806 +pandas/tests/io/formats/test_series_info.py,sha256=qAoLro_8wS4rMTDBTmbOvWv2w89S6EeyGGr7diafwWg,4803 +pandas/tests/io/formats/test_to_csv.py,sha256=AxkofbotGcUeiewwe-wi51UCBx-vWdZwisTtPN5UlY4,25853 +pandas/tests/io/formats/test_to_excel.py,sha256=CMTPW-qkV0yuk8Rw6rrGkW1BscvnR0dI3WMmMAEKOIU,12536 +pandas/tests/io/formats/test_to_html.py,sha256=UjRTjOoxse9UE3zRIcVkjzxOvmbQu5moULAgGFfIlaY,28608 +pandas/tests/io/formats/test_to_latex.py,sha256=p1kU6kPVTRoFJaz5rvU43zMuWHkroTy7jwXequ8r0sc,45996 +pandas/tests/io/formats/test_to_markdown.py,sha256=EzGalRLKAt5ddIKY31TJnr-GDV_cCpeEfHGYxp0T2G4,2724 +pandas/tests/io/formats/test_to_string.py,sha256=uCuO53lafS1HnXfmacfR44fj-xa9JeVF8TayglWY8hY,9264 +pandas/tests/io/generate_legacy_storage_files.py,sha256=faxVoiJfNcrwN9r1RV2JfVlAx3DkRmY1A1_gZVmx_Gw,9840 +pandas/tests/io/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/io/json/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/io/json/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/io/json/__pycache__/test_compression.cpython-38.pyc,, +pandas/tests/io/json/__pycache__/test_deprecated_kwargs.cpython-38.pyc,, +pandas/tests/io/json/__pycache__/test_json_table_schema.cpython-38.pyc,, +pandas/tests/io/json/__pycache__/test_json_table_schema_ext_dtype.cpython-38.pyc,, +pandas/tests/io/json/__pycache__/test_normalize.cpython-38.pyc,, +pandas/tests/io/json/__pycache__/test_pandas.cpython-38.pyc,, +pandas/tests/io/json/__pycache__/test_readlines.cpython-38.pyc,, +pandas/tests/io/json/__pycache__/test_ujson.cpython-38.pyc,, +pandas/tests/io/json/conftest.py,sha256=Zp83o90PvZ56MbhNRr1NZEPTpho7jRHcLYiEA9R_BZw,205 +pandas/tests/io/json/test_compression.py,sha256=aRfzbHYFXAu_ydO67lISMNU_Rt79ZziCJSbJio-Vr9Y,4156 +pandas/tests/io/json/test_deprecated_kwargs.py,sha256=-hz91joqDj5yjDDF0WCUEr8RqhqxTxEryCHZkb9H1hQ,1144 +pandas/tests/io/json/test_json_table_schema.py,sha256=7vy98LfWqeqLXSOlnuxGEa4Tjv3TjE_KoEiC5Abxy2k,28614 +pandas/tests/io/json/test_json_table_schema_ext_dtype.py,sha256=td-Kfhzt10aIN5LNLHp6iT6yztZhrqN8ItenhOQXlTk,8181 +pandas/tests/io/json/test_normalize.py,sha256=1yoCQQgg2sxJVESxswuhKVny2fWnST_30RKwU1m1HbU,30283 +pandas/tests/io/json/test_pandas.py,sha256=2EyAket_0qkOBmvxQ-y6ATBOadScO256lQ2sIopptFo,65848 +pandas/tests/io/json/test_readlines.py,sha256=CwT1nioWrvxlCVG1-UDE3yJ-PXeMnpeSRzzZSjloDWA,9756 +pandas/tests/io/json/test_ujson.py,sha256=Sw_pZQN9FlG4HFk6dxP576KUOUcAh6LwFt6uXu41WxI,41309 +pandas/tests/io/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/io/parser/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_c_parser_only.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_comment.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_compression.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_converters.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_dialect.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_encoding.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_header.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_index_col.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_mangle_dupes.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_multi_thread.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_na_values.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_network.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_parse_dates.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_python_parser_only.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_quoting.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_read_fwf.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_skiprows.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_textreader.cpython-38.pyc,, +pandas/tests/io/parser/__pycache__/test_unsupported.cpython-38.pyc,, +pandas/tests/io/parser/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/io/parser/common/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/io/parser/common/__pycache__/test_chunksize.cpython-38.pyc,, +pandas/tests/io/parser/common/__pycache__/test_common_basic.cpython-38.pyc,, +pandas/tests/io/parser/common/__pycache__/test_data_list.cpython-38.pyc,, +pandas/tests/io/parser/common/__pycache__/test_decimal.cpython-38.pyc,, +pandas/tests/io/parser/common/__pycache__/test_file_buffer_url.cpython-38.pyc,, +pandas/tests/io/parser/common/__pycache__/test_float.cpython-38.pyc,, +pandas/tests/io/parser/common/__pycache__/test_index.cpython-38.pyc,, +pandas/tests/io/parser/common/__pycache__/test_inf.cpython-38.pyc,, +pandas/tests/io/parser/common/__pycache__/test_ints.cpython-38.pyc,, +pandas/tests/io/parser/common/__pycache__/test_iterator.cpython-38.pyc,, +pandas/tests/io/parser/common/__pycache__/test_read_errors.cpython-38.pyc,, +pandas/tests/io/parser/common/__pycache__/test_verbose.cpython-38.pyc,, +pandas/tests/io/parser/common/test_chunksize.py,sha256=_MCFggUMsv1Tr6siMpuk81-WBwpmNNzfQ7MI4ZVKQeo,7385 +pandas/tests/io/parser/common/test_common_basic.py,sha256=ZrHP4WKE5nsPQGcjCFN-5_tna1vnzqsNDCYF3srNYz4,28043 +pandas/tests/io/parser/common/test_data_list.py,sha256=SJTVxZsQzJUmId6ZqXP7bm82NLWMUBKYrPQxsWEcHxY,2116 +pandas/tests/io/parser/common/test_decimal.py,sha256=EQEd6iON0Zr0eHBoxIDNoUyY4enGEQsjIbJjHd_HcT8,1569 +pandas/tests/io/parser/common/test_file_buffer_url.py,sha256=cXUsIA3UhHaYqfOG0mRGejbHYw_ITjw_TSZFeCnNRM0,11449 +pandas/tests/io/parser/common/test_float.py,sha256=7P8p1G0gmVFXrVlkUvFPKgaLjlaWBOpIyztuTy2tzdk,2152 +pandas/tests/io/parser/common/test_index.py,sha256=HsBCRshT8aAamkvwpfGkSY2AWarC9z7YpiyAwi0CHg4,8030 +pandas/tests/io/parser/common/test_inf.py,sha256=tvDkwOUNw6mYcGvj7k332l2HMqKuKa1MtxUTK9YavWk,1659 +pandas/tests/io/parser/common/test_ints.py,sha256=b8Lw83KsKTED0L4k6BYtmocQ6Ftv_0uOQ16gA5qA_js,6506 +pandas/tests/io/parser/common/test_iterator.py,sha256=wry18UiB9RI3tw_aGeLn6YjZlJnHHmr2iWvLN7npYIE,2706 +pandas/tests/io/parser/common/test_read_errors.py,sha256=td4sJi7ZHkHpNBQyAah7IT_SJI_8oJDO14tfQ6YlJrw,7984 +pandas/tests/io/parser/common/test_verbose.py,sha256=nIdnxbNR0Nirx51p0Y62ykQirpVI7BXPF1NInKG-HLc,1317 +pandas/tests/io/parser/conftest.py,sha256=7NuO4F9W6NvEOiQJng6ia8IZBp9cLxQAEjZENhznWzs,7455 +pandas/tests/io/parser/dtypes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/io/parser/dtypes/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/io/parser/dtypes/__pycache__/test_categorical.cpython-38.pyc,, +pandas/tests/io/parser/dtypes/__pycache__/test_dtypes_basic.cpython-38.pyc,, +pandas/tests/io/parser/dtypes/__pycache__/test_empty.cpython-38.pyc,, +pandas/tests/io/parser/dtypes/test_categorical.py,sha256=ZkOIvX-HZGwMHzhBTr5KJfygtF-yCbYwlotEZ-0RBdw,8580 +pandas/tests/io/parser/dtypes/test_dtypes_basic.py,sha256=G5BAbJyG83ANI-AwRy_U_kjwZuiruP-jws_v2E4onLs,8889 +pandas/tests/io/parser/dtypes/test_empty.py,sha256=lM-L3T9qusxbj3DrETUw-PJJ402efenTBeMJ8jItJRM,5156 +pandas/tests/io/parser/test_c_parser_only.py,sha256=nseDBZNlIVBYoY9iEk9Vfdd0GHNVMX0e0MHjijLooQ8,21631 +pandas/tests/io/parser/test_comment.py,sha256=TpFoZQjs8a88s9SEgPxtE32_vmYHu44oTWC65bDrp5U,4824 +pandas/tests/io/parser/test_compression.py,sha256=23FGeTDEVq9NIWjk--B9cafPSn9cdjNB22lzGZgDDnw,5324 +pandas/tests/io/parser/test_converters.py,sha256=IIG8CX85yl5OsM6MPuGe_TR71tnaDqIM8jsHsFHbpfw,4617 +pandas/tests/io/parser/test_dialect.py,sha256=VRZsqySzq0zG0V3t9WFc-2DrnNptdMHDO2pRi5JWzvo,4158 +pandas/tests/io/parser/test_encoding.py,sha256=FC45tzxm489Iwlu8uax67RLmCFph7x0Oee3jEWBYHDA,9534 +pandas/tests/io/parser/test_header.py,sha256=A-UAksE8NNeJ54mraIDx7iNAdQBBGlzKzKbDko2DD44,18399 +pandas/tests/io/parser/test_index_col.py,sha256=IacvHZ_ZXi_8WfeLU8XgXQMrCWJqGvvgsgrBEXvoTSk,10227 +pandas/tests/io/parser/test_mangle_dupes.py,sha256=Ae_SRU2GdHMJzq3_DxesaqTLqFCUqhLv7g4Rlf69Vq4,4824 +pandas/tests/io/parser/test_multi_thread.py,sha256=O6_mLZsmZCJbDKTs5m0oZh_ka4eHxJK8NmYeGuWHnGM,3776 +pandas/tests/io/parser/test_na_values.py,sha256=GEZ-3OUMwqwLlgm1GVimFmbbhhb3raBr59tymtQIsyk,16717 +pandas/tests/io/parser/test_network.py,sha256=dChx01kZ3LpkmsMWSUQy0Yx4b6xLzUGNyVz_zqdPJJU,11256 +pandas/tests/io/parser/test_parse_dates.py,sha256=tcR99D5OVE-fwyO-OV_xNko6kyGpLAg36eZ1hw67Xy8,59051 +pandas/tests/io/parser/test_python_parser_only.py,sha256=lkAeUPVOK8cJ0MMd00wBd5SnZnjGK-MqfwQs3DCw4xY,13243 +pandas/tests/io/parser/test_quoting.py,sha256=N5T30JNYg9LiMHDrI_4cpc-CbJyZ7JMdIDRm4tNfpWA,5149 +pandas/tests/io/parser/test_read_fwf.py,sha256=bWWb1YbolOO0aGBZemomup-pFuiuA0qkmWsrRAZBA-g,26428 +pandas/tests/io/parser/test_skiprows.py,sha256=8v9YIqjS95yR6XrYpz9Q0Ot6DtY1vuFnN-FlTBbxtrw,7536 +pandas/tests/io/parser/test_textreader.py,sha256=idTsQjxFy1w6VgiDl3TC7FQUNTPZbTv1QJlK9kRCqrA,10812 +pandas/tests/io/parser/test_unsupported.py,sha256=AwJ-ibGzL0l5df9u6r432pJQSqPeiWuriF0yHxHJLPI,6806 +pandas/tests/io/parser/usecols/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/io/parser/usecols/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/io/parser/usecols/__pycache__/test_parse_dates.cpython-38.pyc,, +pandas/tests/io/parser/usecols/__pycache__/test_strings.cpython-38.pyc,, +pandas/tests/io/parser/usecols/__pycache__/test_usecols_basic.cpython-38.pyc,, +pandas/tests/io/parser/usecols/test_parse_dates.py,sha256=K5Ug1ZR1UNKy1sqXmTeLOfYdhLYowGNylMaLqJh0IlM,4011 +pandas/tests/io/parser/usecols/test_strings.py,sha256=_I7EkUdPM0IDdx9cgbMXxVoUBNJERJnWenbOoGl5uuM,2564 +pandas/tests/io/parser/usecols/test_usecols_basic.py,sha256=ssK4yK7gcYQ5tOz2KRN0xqyKedVDt2pAeELyZ4b68bY,12155 +pandas/tests/io/pytables/__init__.py,sha256=OSIRg64VsNMVajwxn2_Rci2ihDrhjzWvicO0H8iNV7M,508 +pandas/tests/io/pytables/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/common.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_append.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_categorical.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_compat.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_complex.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_errors.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_file_handling.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_keys.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_put.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_pytables_missing.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_read.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_retain_attributes.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_round_trip.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_select.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_store.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_subclass.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_time_series.cpython-38.pyc,, +pandas/tests/io/pytables/__pycache__/test_timezones.cpython-38.pyc,, +pandas/tests/io/pytables/common.py,sha256=eR_6gAjmrsiEiDgstDu2_JbI4GCqLupEvGgJBZoGM8E,2068 +pandas/tests/io/pytables/conftest.py,sha256=dk8CIjSK0Rp8Z8OdXrXVzn0Tqm0skXufuD58uTCnS2Y,320 +pandas/tests/io/pytables/test_append.py,sha256=fQxCdUX52cGgIeWXds-YrD9FKXdYjX3CgMhA0liSOj4,33542 +pandas/tests/io/pytables/test_categorical.py,sha256=mJRm9OraIiE7q4OAC1ie6Xk6X6HnL_t8I_si9ByTz54,7248 +pandas/tests/io/pytables/test_compat.py,sha256=7wo6Xd-MYxKA5xVQZ3TU9i81IHOxl7PzL0UcWCzBBZ4,2633 +pandas/tests/io/pytables/test_complex.py,sha256=j_b6q-hygqa0JEuX97FPi9xUx6kmiRNpv1yPP4oEqCo,6245 +pandas/tests/io/pytables/test_errors.py,sha256=jEgqIZgIAoJW0UoVsTWsUQiUU_cES_ydBYbcTIwgOhU,7766 +pandas/tests/io/pytables/test_file_handling.py,sha256=-Irq8UG63W-lLzUaUReZtEfbtuN1z5FSnjb1eVr1QCM,13374 +pandas/tests/io/pytables/test_keys.py,sha256=-JXT-byTLss-xjDTbNUUVG3PeYuwGHsTjWh3uiDYLRc,2376 +pandas/tests/io/pytables/test_put.py,sha256=UBP8OG0pGQpgkC6KT2nHRAWOU19Hc2zPJjKVKmC6OAs,11466 +pandas/tests/io/pytables/test_pytables_missing.py,sha256=mS4LkjqTPsAovK9V_aKLLMPlEi055_sp-5zykczITRA,341 +pandas/tests/io/pytables/test_read.py,sha256=21WthqygyCBOejkDGGCjfV_V2nxwrnzkt9tGWfj8Qos,11428 +pandas/tests/io/pytables/test_retain_attributes.py,sha256=wVGn6Nc2mV1vfOytIOVgvdXM7sDiqt1SN_JkioVMEqk,3375 +pandas/tests/io/pytables/test_round_trip.py,sha256=8WqF06A9l6PZbOd06dBBQHmNUohKQb8piZ9QNSZL5Ec,17615 +pandas/tests/io/pytables/test_select.py,sha256=XkZ4xhJ_toZaPXW1_Wjx-kK_oBbzWIxhj2UuvLdme_s,33503 +pandas/tests/io/pytables/test_store.py,sha256=wZgxkBw8XZ_qj2xZn8cBu4CV_FOkoqXzcjmazUlC5nQ,32062 +pandas/tests/io/pytables/test_subclass.py,sha256=wkpVyzeV_KfL08VgcU9aE_6hezWfcPBwuglODKJ6SiA,1474 +pandas/tests/io/pytables/test_time_series.py,sha256=QPyQEAT9-qo5j58nqIiljuFaTVszQq6lEXtt-QPAeRk,1948 +pandas/tests/io/pytables/test_timezones.py,sha256=Jx7C4l6VuRF_N7Pm8KxRguuYU3DGJSQj7jVZbc2X9Fs,11378 +pandas/tests/io/sas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/io/sas/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/io/sas/__pycache__/test_sas.cpython-38.pyc,, +pandas/tests/io/sas/__pycache__/test_sas7bdat.cpython-38.pyc,, +pandas/tests/io/sas/__pycache__/test_xport.cpython-38.pyc,, +pandas/tests/io/sas/test_sas.py,sha256=C2mvGTXUvmtoroqtSKBPW1SDI9A-Dho1WjN9Wz_5Cf8,695 +pandas/tests/io/sas/test_sas7bdat.py,sha256=UGWnyUQs9ZFTOCnhLHOL4n-CoYAMDNKJIrXbmDQzdPM,12576 +pandas/tests/io/sas/test_xport.py,sha256=qarYpGaueE4b4gVRjikZxPo1QA6SMfYb-xt9KZn3rqM,5862 +pandas/tests/io/test_clipboard.py,sha256=IAeFfjfJsCc5y5qP11DCsniIykNzINIhitONxwsjmIQ,9795 +pandas/tests/io/test_common.py,sha256=efY3clln1DzjESd5ABoEJqiSCsq7hzsmR6XiD4R8v1s,21829 +pandas/tests/io/test_compression.py,sha256=rtlJm2XqnU2kYFjw2RDg7-s8XyL0uMPBU7CNlIyjNio,8456 +pandas/tests/io/test_date_converters.py,sha256=m4ig8gp7LzZ802Wt0zU-ugzzRWnV9iozWH-Wzr5tQ2A,1368 +pandas/tests/io/test_feather.py,sha256=BRl6Q4iyI2mQ8-US9I4dke8ELfVDXaBJU7I9u-jK7HE,6469 +pandas/tests/io/test_fsspec.py,sha256=6HUAFltRpdUp6u58vlOrlPxclFNgTVKwOXQCDVWAuIw,9940 +pandas/tests/io/test_gcs.py,sha256=zeE_E2bD8YO9P5YXU-YQqd72RF31Odr6LXZjXtVYujY,5626 +pandas/tests/io/test_html.py,sha256=CpJFvFuaKIIzBw9FhcTyt5unRx2fZ1eujvS2hvzxtRY,40935 +pandas/tests/io/test_orc.py,sha256=oydjHMQkcBphEvRoIOTQ3YcpODQsz7hEu55RK7IzbMg,6430 +pandas/tests/io/test_parquet.py,sha256=DAO1U-l4gB6lzv6fsfCwGf4BMURpZmWXwiZGsXtzgQE,38095 +pandas/tests/io/test_pickle.py,sha256=SbNeHFYPXGHxBeHwMI2jUJzEAE8azeDGdc6px1zsKZ4,18695 +pandas/tests/io/test_s3.py,sha256=6XAwm4O-CnVoS6J2HH0CQM29ijEG4tpcc1lnW_FUQp8,1577 +pandas/tests/io/test_spss.py,sha256=rJa2WvARqpkdPAwAGxj5jOc1fJiz9LgWLVH_5vBNdDY,2745 +pandas/tests/io/test_sql.py,sha256=ujRvbu_UspYZ5dcZTGbKbOUdZucIdRxIpGf3WmJBKFg,102145 +pandas/tests/io/test_stata.py,sha256=Rb9j3oqoZiWZ_jweGPrYhvRiZrGhLay5PBI4ZaJsxJ0,85475 +pandas/tests/io/test_user_agent.py,sha256=HlEBeCt_YkfI8y6gPqM8lSPsIZLkhOO_ft9tIyJgQEo,11836 +pandas/tests/io/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/io/xml/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/io/xml/__pycache__/test_to_xml.cpython-38.pyc,, +pandas/tests/io/xml/__pycache__/test_xml.cpython-38.pyc,, +pandas/tests/io/xml/test_to_xml.py,sha256=eTx27n8Hi8EUHj-6fYo9wUtm3nFpPU9MxiL55mwHxnw,34499 +pandas/tests/io/xml/test_xml.py,sha256=9NG2LJei_2K1ctXzTAKDGWsPJuKOpamdxoljrgQb2qo,34722 +pandas/tests/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/libs/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/libs/__pycache__/test_hashtable.cpython-38.pyc,, +pandas/tests/libs/__pycache__/test_join.cpython-38.pyc,, +pandas/tests/libs/__pycache__/test_lib.cpython-38.pyc,, +pandas/tests/libs/test_hashtable.py,sha256=EB1ei0NS2RyVRApfSN5JrUNBilfpFiEKxUN1TOf7M00,17581 +pandas/tests/libs/test_join.py,sha256=z5JeLRMmF_vu4wwOpi3cG6k-p6lkhjAKPad6ShMqS30,10811 +pandas/tests/libs/test_lib.py,sha256=J50BlC9B_mxF21BNZrK1_2Ybtn1DVokMwOuJ3w4HSX0,7842 +pandas/tests/plotting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/plotting/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/plotting/__pycache__/common.cpython-38.pyc,, +pandas/tests/plotting/__pycache__/test_backend.cpython-38.pyc,, +pandas/tests/plotting/__pycache__/test_boxplot_method.cpython-38.pyc,, +pandas/tests/plotting/__pycache__/test_common.cpython-38.pyc,, +pandas/tests/plotting/__pycache__/test_converter.cpython-38.pyc,, +pandas/tests/plotting/__pycache__/test_datetimelike.cpython-38.pyc,, +pandas/tests/plotting/__pycache__/test_groupby.cpython-38.pyc,, +pandas/tests/plotting/__pycache__/test_hist_method.cpython-38.pyc,, +pandas/tests/plotting/__pycache__/test_misc.cpython-38.pyc,, +pandas/tests/plotting/__pycache__/test_series.cpython-38.pyc,, +pandas/tests/plotting/__pycache__/test_style.cpython-38.pyc,, +pandas/tests/plotting/common.py,sha256=dIjUpmpK8ZvpdK0Z1jXiKKygoskuWxVNnMjICMsLMGs,21811 +pandas/tests/plotting/frame/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/plotting/frame/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/plotting/frame/__pycache__/test_frame.cpython-38.pyc,, +pandas/tests/plotting/frame/__pycache__/test_frame_color.cpython-38.pyc,, +pandas/tests/plotting/frame/__pycache__/test_frame_groupby.cpython-38.pyc,, +pandas/tests/plotting/frame/__pycache__/test_frame_legend.cpython-38.pyc,, +pandas/tests/plotting/frame/__pycache__/test_frame_subplots.cpython-38.pyc,, +pandas/tests/plotting/frame/__pycache__/test_hist_box_by.cpython-38.pyc,, +pandas/tests/plotting/frame/test_frame.py,sha256=sZojK1g3qZkLxjgCf4tMn-vMM9mHas7GBuahgUhR-AM,79413 +pandas/tests/plotting/frame/test_frame_color.py,sha256=6s41qLYjLG-sO9BTiCdSUWi3jYM6EBkcO8dH78fX_UU,25221 +pandas/tests/plotting/frame/test_frame_groupby.py,sha256=bnvev7X63rzyRjRyP2jIBNH-JqrOAMrINENs0N6aZYw,3099 +pandas/tests/plotting/frame/test_frame_legend.py,sha256=Rl1813NiwqkYSIeAszNEFNKOkMLvB5gBNXG85jukMkA,8166 +pandas/tests/plotting/frame/test_frame_subplots.py,sha256=o18O2AYRHGQOebAdBWwy8J4M6YCJucFQKZ-nWMspaWs,27178 +pandas/tests/plotting/frame/test_hist_box_by.py,sha256=r6jiq83ao7OWaxtNN6unxKcXMEuCA4MkCsH9Cc2k69w,12471 +pandas/tests/plotting/test_backend.py,sha256=-9qHPPRInaoD0m7TulPh4gNaiY6XhIeeKeKD_R9hMWQ,3667 +pandas/tests/plotting/test_boxplot_method.py,sha256=F3GqV0KB2j9EPUO9jTXyOMC2mZui8BO9rOf7HjsaOKs,21773 +pandas/tests/plotting/test_common.py,sha256=cirNypiGWwuvTbnsXafqcSdQEWkncFgFzogqar__GNs,1547 +pandas/tests/plotting/test_converter.py,sha256=Q3c7pV39AD9Dtagwf1oAsKQ4qpdu8-z0KljbIUS4EuM,12455 +pandas/tests/plotting/test_datetimelike.py,sha256=zdPaovpPKNazwVJmRpU1vmsKrDXlZMQyFo5bjIg3fTw,55163 +pandas/tests/plotting/test_groupby.py,sha256=wAGbHyN7smcvc7gZ11AWkd1C9mmcfu28keTd5K7PRGw,4514 +pandas/tests/plotting/test_hist_method.py,sha256=93l32R_Xb0h8PTas8LTJneFbeCEjB4GCP4i6tZ5al5I,28104 +pandas/tests/plotting/test_misc.py,sha256=kVcg0Sf8cAhjbKtBq-j78jGh65z7KJiSFK9twXx3I5Q,20186 +pandas/tests/plotting/test_series.py,sha256=fP54JUmlSbOH-8CWsXzXYm8fKvMsih0jbWxEtj_L7w0,29893 +pandas/tests/plotting/test_style.py,sha256=Wj3UsUKUxwfsg2QdK7LIv8nvoSTxZ_4i-O2Y6hMC_Sw,5203 +pandas/tests/reductions/__init__.py,sha256=vflo8yMcocx2X1Rdw9vt8NpiZ4ZFq9xZRC3PW6Gp-Cs,125 +pandas/tests/reductions/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/reductions/__pycache__/test_reductions.cpython-38.pyc,, +pandas/tests/reductions/__pycache__/test_stat_reductions.cpython-38.pyc,, +pandas/tests/reductions/test_reductions.py,sha256=bYNyz35dmx5WvP1xlEYohg8IFO_MeGn_0JdHMlaBG_I,50791 +pandas/tests/reductions/test_stat_reductions.py,sha256=N3gdXNpkNmpPCalnD8kMFHdnWTawXm2r9VBc8C_OEIw,9567 +pandas/tests/resample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/resample/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/resample/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/resample/__pycache__/test_base.cpython-38.pyc,, +pandas/tests/resample/__pycache__/test_datetime_index.cpython-38.pyc,, +pandas/tests/resample/__pycache__/test_deprecated.cpython-38.pyc,, +pandas/tests/resample/__pycache__/test_period_index.cpython-38.pyc,, +pandas/tests/resample/__pycache__/test_resample_api.cpython-38.pyc,, +pandas/tests/resample/__pycache__/test_resampler_grouper.cpython-38.pyc,, +pandas/tests/resample/__pycache__/test_time_grouper.cpython-38.pyc,, +pandas/tests/resample/__pycache__/test_timedelta.cpython-38.pyc,, +pandas/tests/resample/conftest.py,sha256=rygV0DcxwZD423Z2TwNcRCDQgyJnsv_EqtSIkUYPwWs,4162 +pandas/tests/resample/test_base.py,sha256=MmA0b_ugE92Ccek7KKHSf2ejoum4At7Rq-i4LINabZ8,8137 +pandas/tests/resample/test_datetime_index.py,sha256=OjxWuMN9r8YSdxBUYFuM3Af9gHdwls5THwhkRRYYcGk,61030 +pandas/tests/resample/test_deprecated.py,sha256=UZoLO6BJl1ukiGfnerVDYhnLvRavHwBisLoEjVpqN3I,11516 +pandas/tests/resample/test_period_index.py,sha256=IvivrWofXG7-IFNKSkxLCjAC4CSjVzbpD_rv742hO1I,33758 +pandas/tests/resample/test_resample_api.py,sha256=CxMDSLBcn_N4FgMSwpvUs3WIOuVOvAUd248vP2c1KNo,22305 +pandas/tests/resample/test_resampler_grouper.py,sha256=3_PR3pRNV5DMdmXd88CAR-iuO5gLHRkquJjhZcGKJBw,14444 +pandas/tests/resample/test_time_grouper.py,sha256=Pqm7T2dJfo5vuThx7D_a9FWUMFpJYD4TQkxSgHPD34g,11127 +pandas/tests/resample/test_timedelta.py,sha256=9a4suKXwXYRwnrbO-GpNi0bJxPRG3LHxqL8xLYHv0aE,6370 +pandas/tests/reshape/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/reshape/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/reshape/__pycache__/test_crosstab.cpython-38.pyc,, +pandas/tests/reshape/__pycache__/test_cut.cpython-38.pyc,, +pandas/tests/reshape/__pycache__/test_get_dummies.cpython-38.pyc,, +pandas/tests/reshape/__pycache__/test_melt.cpython-38.pyc,, +pandas/tests/reshape/__pycache__/test_pivot.cpython-38.pyc,, +pandas/tests/reshape/__pycache__/test_pivot_multilevel.cpython-38.pyc,, +pandas/tests/reshape/__pycache__/test_qcut.cpython-38.pyc,, +pandas/tests/reshape/__pycache__/test_union_categoricals.cpython-38.pyc,, +pandas/tests/reshape/__pycache__/test_util.cpython-38.pyc,, +pandas/tests/reshape/concat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/reshape/concat/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/reshape/concat/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/reshape/concat/__pycache__/test_append.cpython-38.pyc,, +pandas/tests/reshape/concat/__pycache__/test_append_common.cpython-38.pyc,, +pandas/tests/reshape/concat/__pycache__/test_categorical.cpython-38.pyc,, +pandas/tests/reshape/concat/__pycache__/test_concat.cpython-38.pyc,, +pandas/tests/reshape/concat/__pycache__/test_dataframe.cpython-38.pyc,, +pandas/tests/reshape/concat/__pycache__/test_datetimes.cpython-38.pyc,, +pandas/tests/reshape/concat/__pycache__/test_empty.cpython-38.pyc,, +pandas/tests/reshape/concat/__pycache__/test_index.cpython-38.pyc,, +pandas/tests/reshape/concat/__pycache__/test_invalid.cpython-38.pyc,, +pandas/tests/reshape/concat/__pycache__/test_series.cpython-38.pyc,, +pandas/tests/reshape/concat/__pycache__/test_sort.cpython-38.pyc,, +pandas/tests/reshape/concat/conftest.py,sha256=s94n_rOGHsQKdP2KbCAQEfZeQpesYmhH_d-RNNTkvYc,162 +pandas/tests/reshape/concat/test_append.py,sha256=cmcdM_RO30znQSeuo9g58Uzp41Rmo3UwIS_YyWDXttI,13562 +pandas/tests/reshape/concat/test_append_common.py,sha256=lfxi6Eo7x17ykWIJ7O8NBiGg7cUy2_KOlcHfVVzOZHY,27943 +pandas/tests/reshape/concat/test_categorical.py,sha256=XZznBpUg82u3sgn779qSQDlzXq01YHNQjnDXOpU-7nY,8258 +pandas/tests/reshape/concat/test_concat.py,sha256=4eVdPgLXOst2lsGCAJ-25A2vWsYHuvWXkBRWB4sgd1k,25182 +pandas/tests/reshape/concat/test_dataframe.py,sha256=FnifjaeFdyvCRKr63SZ_uLDn15uHAD7b6aQeWUTY5pU,7822 +pandas/tests/reshape/concat/test_datetimes.py,sha256=DMrCjrcPLQtvc44_GBUr-8jV7jMivJflf1DXcYwdcC8,18962 +pandas/tests/reshape/concat/test_empty.py,sha256=mPyL7EMR0d5u7en4hP5g3XiMn-CKHXbV-maD7pls-tU,9499 +pandas/tests/reshape/concat/test_index.py,sha256=NaXXvlaIns8WIfcIlaQlwt6Ktnz2MaL7Zy6D2cEVC7Q,10890 +pandas/tests/reshape/concat/test_invalid.py,sha256=sIPSonuwF-HNex5RhvPIAZtLH-2LohJ7jTzd8AzupMQ,1613 +pandas/tests/reshape/concat/test_series.py,sha256=LHHo8m-LA7PlYACwLTVMcwlC_BxyRkk7wP4elAtt7dM,5183 +pandas/tests/reshape/concat/test_sort.py,sha256=iP1bluA2LPWmp_XUxKOoP4XXMlxSSEAmIg6gAOkJoHk,3685 +pandas/tests/reshape/merge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/reshape/merge/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/reshape/merge/__pycache__/test_join.cpython-38.pyc,, +pandas/tests/reshape/merge/__pycache__/test_merge.cpython-38.pyc,, +pandas/tests/reshape/merge/__pycache__/test_merge_asof.cpython-38.pyc,, +pandas/tests/reshape/merge/__pycache__/test_merge_cross.cpython-38.pyc,, +pandas/tests/reshape/merge/__pycache__/test_merge_index_as_string.cpython-38.pyc,, +pandas/tests/reshape/merge/__pycache__/test_merge_ordered.cpython-38.pyc,, +pandas/tests/reshape/merge/__pycache__/test_multi.cpython-38.pyc,, +pandas/tests/reshape/merge/test_join.py,sha256=yxTkWH5OuQQC_X6STOsY0u0HArdu1rzqNSXpNlLrY7U,31071 +pandas/tests/reshape/merge/test_merge.py,sha256=6Nh3Yinz3DuJQiECSB__X0YPrHURaE2kZ3H8nZjMCm0,91382 +pandas/tests/reshape/merge/test_merge_asof.py,sha256=862ZddV3imTzOhrddFjeW5Hr16fdx5wZh8iI9ColVrc,50963 +pandas/tests/reshape/merge/test_merge_cross.py,sha256=v4-EVkqj6ENBl-RpTzgm9fxW_yI9OBKu2bZ0EFbfQ4s,2807 +pandas/tests/reshape/merge/test_merge_index_as_string.py,sha256=tKZu1pxnELCuqfdzhHJv-_J-KOteajhrLTxXXCrfPtc,5360 +pandas/tests/reshape/merge/test_merge_ordered.py,sha256=P772BDPdKR9upXWFUaFXO4jBr-3BXqbQCGauWqHBea4,6383 +pandas/tests/reshape/merge/test_multi.py,sha256=8XvCNJ9nuwbuxWwYFUkZp2-kZJc9-swVePi_WWoY8Zw,29632 +pandas/tests/reshape/test_crosstab.py,sha256=6KbohIztJ7_v4j10zQTDdD3DiKC9DQ4xkiHpuBhKbEw,30051 +pandas/tests/reshape/test_cut.py,sha256=URbCxVV9Pbek18ulPcM6kPuKuTdDwhI7ocBW8jhWr4Y,22135 +pandas/tests/reshape/test_get_dummies.py,sha256=6e9D3LPV-fL0xUvp_mnjhlFCTGKmv67mrpUQcD0uW8Y,23722 +pandas/tests/reshape/test_melt.py,sha256=Y8iHI2A203nbwHuCbjCgEGFug-MFJ7p2PyaE13688DY,37432 +pandas/tests/reshape/test_pivot.py,sha256=FhmDc4qNTAHrITy6ol4CioX8sLAp7IWQyE8UxtCYX0Y,78817 +pandas/tests/reshape/test_pivot_multilevel.py,sha256=X4JXdLWUYSsgjUdWmib4W3jjsezpG7f0AopUsxupGQo,7511 +pandas/tests/reshape/test_qcut.py,sha256=v6d3wMZYnLafa7zBakUUO1SY5CTjBuc6BR9bbplhswc,8178 +pandas/tests/reshape/test_union_categoricals.py,sha256=DW1LjQIgVJeOGjz2WnU-HX41cxQJ6ja0DfDWPQO8Pow,14457 +pandas/tests/reshape/test_util.py,sha256=kaLagIH-U5-sPJ-EG9iXZGdYoZCc3qSjlfyjpmrO6Gk,2859 +pandas/tests/scalar/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/scalar/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/scalar/__pycache__/test_na_scalar.cpython-38.pyc,, +pandas/tests/scalar/__pycache__/test_nat.cpython-38.pyc,, +pandas/tests/scalar/interval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/scalar/interval/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/scalar/interval/__pycache__/test_arithmetic.cpython-38.pyc,, +pandas/tests/scalar/interval/__pycache__/test_interval.cpython-38.pyc,, +pandas/tests/scalar/interval/__pycache__/test_ops.cpython-38.pyc,, +pandas/tests/scalar/interval/test_arithmetic.py,sha256=Lhvs4zwnOXQzllavYDQYoxOLJgDwot4gCRAHXepogJo,1836 +pandas/tests/scalar/interval/test_interval.py,sha256=yZG2JcRcF33AeMSBk0Q4F0L602-PPkLNLw26Gd8TMOI,8861 +pandas/tests/scalar/interval/test_ops.py,sha256=re8lP7Ssee40f40k1Wfyh44ssb09tKQCiWPZ9rkH_v0,2353 +pandas/tests/scalar/period/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/scalar/period/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/scalar/period/__pycache__/test_asfreq.cpython-38.pyc,, +pandas/tests/scalar/period/__pycache__/test_period.cpython-38.pyc,, +pandas/tests/scalar/period/test_asfreq.py,sha256=_5nKUPaOzcz2h-cHk_tWp9J4midnwFj2XKnACM5eEes,36313 +pandas/tests/scalar/period/test_period.py,sha256=CbtdY2hW2nczLkk1Otpjcvca1BmIHKO8FJThfTTGgZI,53742 +pandas/tests/scalar/test_na_scalar.py,sha256=OF4KOq2pQSJ0k7X8qRXXSitY4Hd2AoW9qLoTa__m1J4,7221 +pandas/tests/scalar/test_nat.py,sha256=rjMveH0C0IBEMup6f4-xgiEyPFzGHHlJzxqH1QIDX7c,20500 +pandas/tests/scalar/timedelta/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/scalar/timedelta/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/scalar/timedelta/__pycache__/test_arithmetic.cpython-38.pyc,, +pandas/tests/scalar/timedelta/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/scalar/timedelta/__pycache__/test_formats.cpython-38.pyc,, +pandas/tests/scalar/timedelta/__pycache__/test_timedelta.cpython-38.pyc,, +pandas/tests/scalar/timedelta/test_arithmetic.py,sha256=FBjmy7cnuQyyDKDWvX7R9vGFBZRMFvkZW_VMQMkNwIE,34309 +pandas/tests/scalar/timedelta/test_constructors.py,sha256=2Pvfbcf3n9c80aUdgkvg0unyvMFX6IiYyYUR6o1P6e0,12203 +pandas/tests/scalar/timedelta/test_formats.py,sha256=afiVjnkmjtnprcbtxg0v70VqMVnolTWyFJBXMlWaIY8,1261 +pandas/tests/scalar/timedelta/test_timedelta.py,sha256=Ungkt1JVtPkQLaCWTuSQcvWtiMFf-9Fa1KQmltRP2bI,22234 +pandas/tests/scalar/timestamp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/scalar/timestamp/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/scalar/timestamp/__pycache__/test_arithmetic.cpython-38.pyc,, +pandas/tests/scalar/timestamp/__pycache__/test_comparisons.cpython-38.pyc,, +pandas/tests/scalar/timestamp/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/scalar/timestamp/__pycache__/test_formats.cpython-38.pyc,, +pandas/tests/scalar/timestamp/__pycache__/test_rendering.cpython-38.pyc,, +pandas/tests/scalar/timestamp/__pycache__/test_timestamp.cpython-38.pyc,, +pandas/tests/scalar/timestamp/__pycache__/test_timezones.cpython-38.pyc,, +pandas/tests/scalar/timestamp/__pycache__/test_unary_ops.cpython-38.pyc,, +pandas/tests/scalar/timestamp/test_arithmetic.py,sha256=Kw8eT4KQTa4pFfFkKmofnLqYpYigHasN0vXp2b6-V6c,11073 +pandas/tests/scalar/timestamp/test_comparisons.py,sha256=xHM1ya9wvscTlzQWnlVYgfpTdj785wbm9B_jY8YJOcs,10447 +pandas/tests/scalar/timestamp/test_constructors.py,sha256=QPMadM-A_aIOFlShZc3c8VPefHFyyYjh9AsuFOAL-oM,23031 +pandas/tests/scalar/timestamp/test_formats.py,sha256=f7s69pbe4Wj787E3oDKCnxLyti1jVisYNpccSgDTCw8,1895 +pandas/tests/scalar/timestamp/test_rendering.py,sha256=LjxKvIjYlYrF_0dqJRW4ke5SnMJVZRJtUh59lRcyTzw,4194 +pandas/tests/scalar/timestamp/test_timestamp.py,sha256=BEz5aLt-v8A85GD7XDv1i0tTBp6Eb92tS4JWMEzrvyw,24567 +pandas/tests/scalar/timestamp/test_timezones.py,sha256=_6nNRpFRTtRejS4oVUpIgohW4JPukt3wBJqWNcnbHys,16211 +pandas/tests/scalar/timestamp/test_unary_ops.py,sha256=qrOXf9nlvqAu0R76X78XpLWU6LY4I77MzEpiM-KfT7U,18111 +pandas/tests/series/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/series/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/series/__pycache__/test_api.cpython-38.pyc,, +pandas/tests/series/__pycache__/test_arithmetic.cpython-38.pyc,, +pandas/tests/series/__pycache__/test_constructors.cpython-38.pyc,, +pandas/tests/series/__pycache__/test_cumulative.cpython-38.pyc,, +pandas/tests/series/__pycache__/test_iteration.cpython-38.pyc,, +pandas/tests/series/__pycache__/test_logical_ops.cpython-38.pyc,, +pandas/tests/series/__pycache__/test_missing.cpython-38.pyc,, +pandas/tests/series/__pycache__/test_npfuncs.cpython-38.pyc,, +pandas/tests/series/__pycache__/test_reductions.cpython-38.pyc,, +pandas/tests/series/__pycache__/test_repr.cpython-38.pyc,, +pandas/tests/series/__pycache__/test_subclass.cpython-38.pyc,, +pandas/tests/series/__pycache__/test_ufunc.cpython-38.pyc,, +pandas/tests/series/__pycache__/test_unary.cpython-38.pyc,, +pandas/tests/series/__pycache__/test_validate.cpython-38.pyc,, +pandas/tests/series/accessors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/series/accessors/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/series/accessors/__pycache__/test_cat_accessor.cpython-38.pyc,, +pandas/tests/series/accessors/__pycache__/test_dt_accessor.cpython-38.pyc,, +pandas/tests/series/accessors/__pycache__/test_sparse_accessor.cpython-38.pyc,, +pandas/tests/series/accessors/__pycache__/test_str_accessor.cpython-38.pyc,, +pandas/tests/series/accessors/test_cat_accessor.py,sha256=qhl_HYd8bjFeETeZaEOBbTxvC2wBKwmhGDNmhY5MtRc,10565 +pandas/tests/series/accessors/test_dt_accessor.py,sha256=thiQvPv30-PMCWGEJaXlBObkzhLMoSS4hOaiK-WoAW4,28039 +pandas/tests/series/accessors/test_sparse_accessor.py,sha256=yPxK1Re7RDPLi5v2r9etrgsUfSL9NN45CAvuR3tYVwA,296 +pandas/tests/series/accessors/test_str_accessor.py,sha256=M29X62c2ekvH1FTv56yye2TLcXyYUCM5AegAQVWLFc8,853 +pandas/tests/series/indexing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/series/indexing/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/series/indexing/__pycache__/test_datetime.cpython-38.pyc,, +pandas/tests/series/indexing/__pycache__/test_delitem.cpython-38.pyc,, +pandas/tests/series/indexing/__pycache__/test_get.cpython-38.pyc,, +pandas/tests/series/indexing/__pycache__/test_getitem.cpython-38.pyc,, +pandas/tests/series/indexing/__pycache__/test_indexing.cpython-38.pyc,, +pandas/tests/series/indexing/__pycache__/test_mask.cpython-38.pyc,, +pandas/tests/series/indexing/__pycache__/test_set_value.cpython-38.pyc,, +pandas/tests/series/indexing/__pycache__/test_setitem.cpython-38.pyc,, +pandas/tests/series/indexing/__pycache__/test_take.cpython-38.pyc,, +pandas/tests/series/indexing/__pycache__/test_where.cpython-38.pyc,, +pandas/tests/series/indexing/__pycache__/test_xs.cpython-38.pyc,, +pandas/tests/series/indexing/test_datetime.py,sha256=Hq78WT639CFvSLwUDNdmztd58rNftWzqK7t2-KLK9qA,13888 +pandas/tests/series/indexing/test_delitem.py,sha256=bQwJNiGqH3GQQUkq7linphR9PL2oXOQSeAitqupiRRQ,1979 +pandas/tests/series/indexing/test_get.py,sha256=i7eACQW-KC7Zg0_LeyHFXykN6MTh7gIr4dBA-0aDlWg,4877 +pandas/tests/series/indexing/test_getitem.py,sha256=zCtwcCz8YPPIoKgUM5l2iw2S1cqfx3GF9l46oGH2PWc,23073 +pandas/tests/series/indexing/test_indexing.py,sha256=o_Okri_q82eT2_IvBlovkQUawj71_LikECT3Nzj-RFo,10759 +pandas/tests/series/indexing/test_mask.py,sha256=bjoNDyGpyA_jmhOr3R6d7x_2RsPjjLMvlO-KyfnRw68,1661 +pandas/tests/series/indexing/test_set_value.py,sha256=UwVNpW3Fh3PKhNiFzZiVK07W871CmFM2fGtC6CTW5z0,991 +pandas/tests/series/indexing/test_setitem.py,sha256=Nzccv_qZbGqu8hwfyL_c5BrXH9epWish7Ho1j3v9U4U,48254 +pandas/tests/series/indexing/test_take.py,sha256=2B79IuWBesI849qvFO4hELdNiVsT2A90yq8wor_aRYk,963 +pandas/tests/series/indexing/test_where.py,sha256=P8wJ2M-oq80AaHTXuqqnSNPQ1EGnlS6oUWI9Ks6ymw0,12601 +pandas/tests/series/indexing/test_xs.py,sha256=Zh-NSIY9EQB1YJKUEJEZcm-0sCPd9_jAe1KIrTjYSzE,2701 +pandas/tests/series/methods/__init__.py,sha256=zVXqGxDIQ-ebxxcetI9KcJ9ZEHeIC4086CoDvyc8CNM,225 +pandas/tests/series/methods/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_align.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_append.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_argsort.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_asof.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_astype.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_autocorr.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_between.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_clip.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_combine.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_combine_first.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_compare.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_convert.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_convert_dtypes.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_copy.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_count.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_cov_corr.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_describe.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_diff.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_drop.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_drop_duplicates.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_dropna.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_dtypes.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_duplicated.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_equals.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_explode.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_fillna.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_get_numeric_data.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_head_tail.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_infer_objects.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_interpolate.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_is_monotonic.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_is_unique.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_isin.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_isna.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_item.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_matmul.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_nlargest.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_nunique.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_pct_change.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_pop.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_quantile.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_rank.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_reindex.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_reindex_like.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_rename.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_rename_axis.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_repeat.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_replace.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_reset_index.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_round.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_searchsorted.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_set_name.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_sort_index.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_sort_values.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_to_csv.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_to_dict.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_to_frame.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_truncate.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_tz_localize.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_unique.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_unstack.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_update.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_value_counts.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_values.cpython-38.pyc,, +pandas/tests/series/methods/__pycache__/test_view.cpython-38.pyc,, +pandas/tests/series/methods/test_align.py,sha256=nxSYZxwQO_KhOQTlzPSE_MNCIOBfxo6e_mLCNEt6udY,5358 +pandas/tests/series/methods/test_append.py,sha256=sZH2eDU6U4sodIyLmG4kvL0vu4ExwQ4AdYpOhp6rSog,9987 +pandas/tests/series/methods/test_argsort.py,sha256=mdgCjQM5ILs-PRtiM4fo0aU069RhtpHPTT7VgkzCJ8Y,2265 +pandas/tests/series/methods/test_asof.py,sha256=UCOEvAA0CKDFej97k11aOrwgXBCUmPt0awZYDtHSHiQ,6038 +pandas/tests/series/methods/test_astype.py,sha256=hOC9qmRmbnNkwWYFICMHuMebQ0fCsPfmejX6TG14orM,21515 +pandas/tests/series/methods/test_autocorr.py,sha256=pI9MsjcDr00_4uPYg_Re22IsmVPTDbOjU83P4NOh8ck,999 +pandas/tests/series/methods/test_between.py,sha256=jNhkqiVlGSUWNQ2wXIkmTy2vp63jhlVnXMdn1r4g8cc,3115 +pandas/tests/series/methods/test_clip.py,sha256=UvUdbBbCErTWImtvAaz1vcYTbidru-so6LW3mp7N4oI,5231 +pandas/tests/series/methods/test_combine.py,sha256=ye8pwpjolpG_kUKSFTC8ZoRdj3ze8qtJXvDUZ5gpap4,627 +pandas/tests/series/methods/test_combine_first.py,sha256=8BhTzkfBApPRp-8xI6e1Yx9avsDl9VRB2kfFIa_XBo0,3556 +pandas/tests/series/methods/test_compare.py,sha256=pb1C1B7bQoswLmvZKpnjJ6ErWktr7yIzR7hsaLNosvg,3734 +pandas/tests/series/methods/test_convert.py,sha256=7_ZRSP0HU139OuzrhUb_AUomSZm-UgMMnapthpwYeEo,5059 +pandas/tests/series/methods/test_convert_dtypes.py,sha256=i3ZgU9mp_2a-KbDtU_92-LT9KyqJsXMJPx_BlkGvGBU,6828 +pandas/tests/series/methods/test_copy.py,sha256=xfiJCvhG3VoEhC2dhGoIyvrnKtR3GPXyRtkhBJx_p5I,2179 +pandas/tests/series/methods/test_count.py,sha256=xmLM3SLv_PRZ9NpX-tAYiqKlQGEm0ZZ0jlfe81KRESQ,3245 +pandas/tests/series/methods/test_cov_corr.py,sha256=e-rGOXTUk36586mH4dhMlL_FcpvJgwGhV6a-DW-Nwek,5222 +pandas/tests/series/methods/test_describe.py,sha256=VElUKUjNfBl-ruJORMNK2jdJ4HxdXDTgkVnFbAMvU0c,4855 +pandas/tests/series/methods/test_diff.py,sha256=ctmz7_gFctiDK-C7YqqeeRnF3FPOcFIcG7ln7E4P-N4,2425 +pandas/tests/series/methods/test_drop.py,sha256=D8AQ0Afvg2KQM1XYnk6caT244nIUEZALDphGQGIocHc,3420 +pandas/tests/series/methods/test_drop_duplicates.py,sha256=CVNWDGWAPTUSF-uFrcvg-PM8EQ1l6ZIcy0cYkLzfWpk,8763 +pandas/tests/series/methods/test_dropna.py,sha256=mjHBNX71WRiMnMn5l-FWYNdjFg64bcZNj7OMmVLuoAI,3488 +pandas/tests/series/methods/test_dtypes.py,sha256=lHRfyG6cfNxdpbmGnvCx1Znz0XCd3MGi7uYs4i5aIIQ,210 +pandas/tests/series/methods/test_duplicated.py,sha256=aq_S_byaiVpwxDOemCc9Zjctlnj5Z3Xwro0RA1tEXd0,1401 +pandas/tests/series/methods/test_equals.py,sha256=KRmg_CxLaadHY3jtRbq30ih3E2y4DqH4kQuuXR2nC0U,3942 +pandas/tests/series/methods/test_explode.py,sha256=xWWFC9BkzIQEn6S6Dtxw1wSQf2tv2YKhbcxpsUqAREk,4090 +pandas/tests/series/methods/test_fillna.py,sha256=Z-EQC0pwDqUSear22V9n7dr020lRdYm6k7oh_FbQcRo,33019 +pandas/tests/series/methods/test_get_numeric_data.py,sha256=fMQ5knB5dZCERhV-r_GUMwN94DyW8a_vh8tPn2yWRs8,874 +pandas/tests/series/methods/test_head_tail.py,sha256=1EWojjTzcLvYH34VvyvEHxczDy7zL3dMTyayFHsVSzY,343 +pandas/tests/series/methods/test_infer_objects.py,sha256=SioL1jaiK8W07ZbpSROpB2gBuVQHXnN-PjieVShP1J0,787 +pandas/tests/series/methods/test_interpolate.py,sha256=8Xqp_EJ07r5JuawaKTD_l5hRvvhmy1_AXwKY_Tw8tR0,31841 +pandas/tests/series/methods/test_is_monotonic.py,sha256=jtM_L4BiK8d5H64QxH3urlREw3TttE3BNfoHJ0Rf04s,780 +pandas/tests/series/methods/test_is_unique.py,sha256=0nKqkTV72HRVAAJAoTrB0CLbqMNlZc-9kzq3EI5dJYo,1050 +pandas/tests/series/methods/test_isin.py,sha256=gmkjTaHi3IPXgfUtiaLe2zkXvSLClC7boyzsD1VJPio,6814 +pandas/tests/series/methods/test_isna.py,sha256=TzNID2_dMG6ChWSwOMIqlF9AWcc1UjtjCHLNmT0vlBE,940 +pandas/tests/series/methods/test_item.py,sha256=fA4qXJ7f5aFDzrboCoHdFBgR08zjW5rjJ7r6M4kdYQk,1622 +pandas/tests/series/methods/test_matmul.py,sha256=X1FquNdcBmOj50KAWmvnJP3wu3-CPEkN1l2HSVbHX7o,2668 +pandas/tests/series/methods/test_nlargest.py,sha256=OmGD2Eo809ocsq-JdQ9h6gWTySEN0tjCw7TZbiVu484,7762 +pandas/tests/series/methods/test_nunique.py,sha256=QS-vUWamlQr2qXrXV-R7CAAOStB3Fajn3JCjPc5POOQ,456 +pandas/tests/series/methods/test_pct_change.py,sha256=8tlMFm_99KBr4gQ4ETbHTo9GxCzVceVXc2ZchA3CIRM,2981 +pandas/tests/series/methods/test_pop.py,sha256=xr9ZuFCI7O2gTW8a3WBr-ooQcOhBzoUK4N1x0K5G380,295 +pandas/tests/series/methods/test_quantile.py,sha256=SrHcnkWm1vJShC6GvpU018VIQJXVKebxBcaSUfirQPc,7062 +pandas/tests/series/methods/test_rank.py,sha256=mJ8QKvoN3oGn6hj_iGKv0tFBKXZhSZSg9LUhTCBjEEY,17012 +pandas/tests/series/methods/test_reindex.py,sha256=sadHfyKda0TEJzs3EazRWZk8SD1Myb6qLB9jX5x0u5c,13239 +pandas/tests/series/methods/test_reindex_like.py,sha256=e_nuGo4QLgsdpnZrC49xDVfcz_prTGAOXGyjEEbkKM4,1245 +pandas/tests/series/methods/test_rename.py,sha256=Dwb_lmJWJiP-wPOyP---8WTpv4MrGPsoohx__DZ-hpY,4450 +pandas/tests/series/methods/test_rename_axis.py,sha256=TqGeZdhB3Ektvj48JfbX2Jr_qsCovtoWimpfX_ViJyg,1520 +pandas/tests/series/methods/test_repeat.py,sha256=0TaZACAEmsWYWv9ge1Yg7ErIH2n79AIBA-qwugpYxWY,1249 +pandas/tests/series/methods/test_replace.py,sha256=-Te9E5VuQX6KFFDcc9EPZzMQyZzyzBaZ0q9FuVZTtss,24700 +pandas/tests/series/methods/test_reset_index.py,sha256=FBcEXq_z5_W3WilNe1JD7Q1QjnhOM1LsGr2_ORfkuIM,6654 +pandas/tests/series/methods/test_round.py,sha256=DgFQ4IJTE9XSunMKKLi5CxvrAHjjb5Az_nT-O_vQFa8,2273 +pandas/tests/series/methods/test_searchsorted.py,sha256=1FAW9hJVacvCpEB-sVtxwNVAFSWYR6G_NKkSMKDiFAI,2138 +pandas/tests/series/methods/test_set_name.py,sha256=rt1BK8BnWMd8D8vrO7yQNN4o-Fnapq5bRmlHyrYpxk4,595 +pandas/tests/series/methods/test_sort_index.py,sha256=-7-LW66dqJMr2fNI2WH9eIiHVyWKwjArE051L4P2wmk,12500 +pandas/tests/series/methods/test_sort_values.py,sha256=wQ3IQdXpRwcMhL3vbERLrzsHvHWsY1xREXCEoJLR9Qk,9648 +pandas/tests/series/methods/test_to_csv.py,sha256=SswJNFJXPwH40tbjw0MeZfSAg-2lzZYsbdPGztVZmNs,6222 +pandas/tests/series/methods/test_to_dict.py,sha256=dIzABUIwzHmhh7po9mYnx3dYF6qvmft7phy1aABCydo,1168 +pandas/tests/series/methods/test_to_frame.py,sha256=Y1q0FkiAAUebNMIZUmcOdBG6b3qVfa8vL8hWB7T9zLw,2015 +pandas/tests/series/methods/test_truncate.py,sha256=HTFv_k0XbCe8RjmwJZ1OxbGyZ2uZ-Fl0PNAoWrgE4wE,2282 +pandas/tests/series/methods/test_tz_localize.py,sha256=Ax2cUpZ8FXwwl3kOQcu5HrFy3Jhr71_oo1xntLe9Ml8,4000 +pandas/tests/series/methods/test_unique.py,sha256=cI9OLNdRxqg7T7CsX-oYsz_yBQKRqWE0FO9OnnNAKoQ,1432 +pandas/tests/series/methods/test_unstack.py,sha256=dI4q6OLd4VRlMFQrPySlIsKCta-BVeL1CiPFs10V4Xo,4428 +pandas/tests/series/methods/test_update.py,sha256=YVUrlzC6ScCWQncHlcWsoH8cxRDTKlzsLWjlzU9ov5Y,4619 +pandas/tests/series/methods/test_value_counts.py,sha256=bPWDoMFoeEmDoHm2CizMgeaOovZ4Ss_cNUQDzcquASE,8736 +pandas/tests/series/methods/test_values.py,sha256=R_UAKfUwCkPa_hlOTus5-wYgdgpqYQq7FIQynNOQixQ,741 +pandas/tests/series/methods/test_view.py,sha256=1ZXDQORIEEYwIT3txFkKDLvRkKhTrYf9mmDj478zlIg,1700 +pandas/tests/series/test_api.py,sha256=3BqXJp2PjyIWp4kzT6o-aNA_0myt-tJvjO21I4LtSbk,6916 +pandas/tests/series/test_arithmetic.py,sha256=3bvYL4_2Q2Abe8xMwvB8DAyoDmJA7pM4zyqk-zQCMto,30112 +pandas/tests/series/test_constructors.py,sha256=IH8PbzSVJ1RvMIRw9FpNJBxpBVXtOK4_98jlQdUjNyw,69683 +pandas/tests/series/test_cumulative.py,sha256=zAK8Fqmugopq3FekA6nGOGckAMLFon5qmVkQ4nfxy3o,4085 +pandas/tests/series/test_iteration.py,sha256=3aPgneBFgn-jsKUyUTBuvPMEnhYMZLHWzFdqmY56A-I,1280 +pandas/tests/series/test_logical_ops.py,sha256=97LJxI3qfURIeDssoTleJSKmip-a_n7sJld1KWQPHDA,17677 +pandas/tests/series/test_missing.py,sha256=tBtnLVruKcA_vbgKP72XxQ0l0q0G5baCnam7MkwOPFw,3558 +pandas/tests/series/test_npfuncs.py,sha256=GLAdmqQ5lSRrj1NZfr2jRgSlVrEH-3Ayiee37ub5W2Y,382 +pandas/tests/series/test_reductions.py,sha256=C2lj2dmekOo9NfAac5K9Vfd4tUCypFYHp2oT_49Vqss,3586 +pandas/tests/series/test_repr.py,sha256=i_8OrdkdVMmUWib_Menw4Mh8SUCOzkTVeDlZzr23W8o,15846 +pandas/tests/series/test_subclass.py,sha256=bVgAwys6yirRv_ht-JQ4xpDvh5vJay7FDD9YFt2wF4E,2061 +pandas/tests/series/test_ufunc.py,sha256=T36YHrsKFn6lDIlj-fF7fPI2ahEmFxXCWdMnctQNNt0,15002 +pandas/tests/series/test_unary.py,sha256=Sbe_6gjcgMNCfy5dx1QRDxlLvHjNdDdWL3cBrz4x9x0,1622 +pandas/tests/series/test_validate.py,sha256=ziCmKi_jYuGyxcnsVaJpVgwSCjBgpHDJ0dbzWLa1-kA,668 +pandas/tests/strings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/strings/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/strings/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/strings/__pycache__/test_api.cpython-38.pyc,, +pandas/tests/strings/__pycache__/test_case_justify.cpython-38.pyc,, +pandas/tests/strings/__pycache__/test_cat.cpython-38.pyc,, +pandas/tests/strings/__pycache__/test_extract.cpython-38.pyc,, +pandas/tests/strings/__pycache__/test_find_replace.cpython-38.pyc,, +pandas/tests/strings/__pycache__/test_get_dummies.cpython-38.pyc,, +pandas/tests/strings/__pycache__/test_split_partition.cpython-38.pyc,, +pandas/tests/strings/__pycache__/test_string_array.cpython-38.pyc,, +pandas/tests/strings/__pycache__/test_strings.cpython-38.pyc,, +pandas/tests/strings/conftest.py,sha256=cwP9W0iBJ--8FcxKylK3VjxWrdV2G38laD6-PEo_uEA,5218 +pandas/tests/strings/test_api.py,sha256=ua_vMlIEFJ7sPEEjAy3DaORpJ6JxlLFD2kObmN9al9Q,5004 +pandas/tests/strings/test_case_justify.py,sha256=LBxGKy-QxE5R8kVi4Sivli2uTCTThVMQxW3_2pB7uJY,13095 +pandas/tests/strings/test_cat.py,sha256=78TLW9mTuZ2eUX7fnQSgzKIYUMpNNDP781kbL4dNnFU,12142 +pandas/tests/strings/test_extract.py,sha256=EVSnMhQRCgi94IE1F85Hgso-sHqO8Kxe7n0ZygIE-eQ,25833 +pandas/tests/strings/test_find_replace.py,sha256=CCnrjCJtyTGKFTlueYzjRUuQb9fJGciZbqmUepPIYuI,32366 +pandas/tests/strings/test_get_dummies.py,sha256=LyWHwMrb5pgX69t4b9ouHflXKp4gBXadTCkaZSk_HB4,1608 +pandas/tests/strings/test_split_partition.py,sha256=uyYNV-gQfKaPN81Zm9dcdv0ed9EBzgmUMsJgSi8_0DM,22670 +pandas/tests/strings/test_string_array.py,sha256=tGDpNAOX_iVJUgXDfgjBOLDJHRr6hsq8CIN3gIdD9Sk,3187 +pandas/tests/strings/test_strings.py,sha256=pV2S6brg6jSt98dCneEkqmKXwx4SZayaipmdyG9xbUk,24999 +pandas/tests/test_aggregation.py,sha256=-9GlIUg7qPr3Ppj_TNbBF85oKjSIMAv056hfcYZvhWw,2779 +pandas/tests/test_algos.py,sha256=ke99EvIOl3HwAAyPAU7g9C1Ih2TURziFt-uEu6rXQbE,86065 +pandas/tests/test_common.py,sha256=SzoMRa3HECdqSfmRPCe5Z7sVnt28_Hnzu4vHjSkt1q4,6017 +pandas/tests/test_downstream.py,sha256=7cjZ_r0MI1ihACm-3MR6mmOh8TYllUoraBGVpoEAQSE,7819 +pandas/tests/test_errors.py,sha256=0AJX0jzXB-KP8NrSA-EluL6O8INGVNJy6_koOQCEweg,1670 +pandas/tests/test_expressions.py,sha256=o5ImAIRa_dNrESrQpWP6jGafwOCHKjSh6jnU9qyBGjo,13164 +pandas/tests/test_flags.py,sha256=Dsu6pvQ5A6Manyt1VlQLK8pRpZtr-S2T3ubJvRQaRlA,1550 +pandas/tests/test_multilevel.py,sha256=TSTi7dXCOunptATeEoeMR6mvAgT2K64fdDt658oUzsw,14538 +pandas/tests/test_nanops.py,sha256=h4olKg-ALsA72IJLm2dzs6CwXqrS9SeoKj4cRaj839Y,38757 +pandas/tests/test_optional_dependency.py,sha256=tT5SDWQaIRBCxX8-USqnMA68FVSOdUJfUA7TapBtsK0,2684 +pandas/tests/test_register_accessor.py,sha256=wf9JnlWOm2gNlHFLVa8m3jo7uSwPrV6bfbee6X5NHsQ,2663 +pandas/tests/test_sorting.py,sha256=SYxrvbboKga842onUitrvSssTt7FS88abO66Il8kJhM,17616 +pandas/tests/test_take.py,sha256=d-9ukBaR_7Hdypm5mTqkxdhDMNFZBAnLytFLrdmsMV0,11995 +pandas/tests/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/tools/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/tools/__pycache__/test_to_datetime.cpython-38.pyc,, +pandas/tests/tools/__pycache__/test_to_numeric.cpython-38.pyc,, +pandas/tests/tools/__pycache__/test_to_time.cpython-38.pyc,, +pandas/tests/tools/__pycache__/test_to_timedelta.cpython-38.pyc,, +pandas/tests/tools/test_to_datetime.py,sha256=XzqyQoeArjZzoKzZouyfoUdsd9DSdQjbZxqixWdOE0I,100819 +pandas/tests/tools/test_to_numeric.py,sha256=6UuyAvtBs4Hx4KG03Vg-BOjpbpbhLjf4YTHgg8YF9Xc,22808 +pandas/tests/tools/test_to_time.py,sha256=ejg-yb-fLt3WeFM5fEdZm77NQXKs33ZEfnxuvIfMwFA,2351 +pandas/tests/tools/test_to_timedelta.py,sha256=49hJc-6qMUJiYuRAtMY85GMTBIg5lNHRphS4Ywv1xBk,9905 +pandas/tests/tseries/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/tseries/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/tseries/frequencies/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/tseries/frequencies/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/tseries/frequencies/__pycache__/test_freq_code.cpython-38.pyc,, +pandas/tests/tseries/frequencies/__pycache__/test_frequencies.cpython-38.pyc,, +pandas/tests/tseries/frequencies/__pycache__/test_inference.cpython-38.pyc,, +pandas/tests/tseries/frequencies/test_freq_code.py,sha256=-6FTMn3ro8NrkUqhklvlQbSVL2YRDM6bj2_6TDT8XAo,2008 +pandas/tests/tseries/frequencies/test_frequencies.py,sha256=tyI9e6ve7sEXdALy9GYjMV3mAQHmQF2IqW-xFzPdgjY,821 +pandas/tests/tseries/frequencies/test_inference.py,sha256=J3myi_3np1ZHb7DILXLbW2t4UEIsEF4Yty2cYhI74Dc,13554 +pandas/tests/tseries/holiday/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/tseries/holiday/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/tseries/holiday/__pycache__/test_calendar.cpython-38.pyc,, +pandas/tests/tseries/holiday/__pycache__/test_federal.cpython-38.pyc,, +pandas/tests/tseries/holiday/__pycache__/test_holiday.cpython-38.pyc,, +pandas/tests/tseries/holiday/__pycache__/test_observance.cpython-38.pyc,, +pandas/tests/tseries/holiday/test_calendar.py,sha256=MnUx0LNjTq3KFFXohmgqqbJzMrIuvO7Jaf4tk7FxkQE,3535 +pandas/tests/tseries/holiday/test_federal.py,sha256=TPMPlc2skaMCNbeJ8gDYS7JwsAZFQ16EjdJpN4pQysY,1157 +pandas/tests/tseries/holiday/test_holiday.py,sha256=zxwMWRXcjhczrsO2DJNx1fXIVorwydEcxLZAmOb2W5k,8846 +pandas/tests/tseries/holiday/test_observance.py,sha256=GJBqIF4W6QG4k3Yzz6_13WMOR4nHSVzPbixHxO8Tukw,2723 +pandas/tests/tseries/offsets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/tseries/offsets/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/common.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_business_day.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_business_hour.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_business_month.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_business_quarter.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_business_year.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_custom_business_day.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_custom_business_hour.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_custom_business_month.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_dst.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_easter.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_fiscal.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_index.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_month.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_offsets.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_offsets_properties.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_quarter.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_ticks.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_week.cpython-38.pyc,, +pandas/tests/tseries/offsets/__pycache__/test_year.cpython-38.pyc,, +pandas/tests/tseries/offsets/common.py,sha256=GwLdJGXo--TEPozKFrF3MWeI_kp-owDrTLjALYjrLec,6506 +pandas/tests/tseries/offsets/conftest.py,sha256=EToa7cYjNUnSlQY2dMJbI6SrFJkB5hQNOhgUaq13BjE,643 +pandas/tests/tseries/offsets/test_business_day.py,sha256=H0wxCKk7kYr-drPy2C73dXcF6QZdvgCGpxpKX8EUaEg,7033 +pandas/tests/tseries/offsets/test_business_hour.py,sha256=mp0xmKYL3IBql_6Xz1mlHR0cihcoHBQTucMGqtBXUDs,58313 +pandas/tests/tseries/offsets/test_business_month.py,sha256=7tq0Ql5NwY2uSkyDC1ReVhAi6c48WdF8svohB6F2AIs,6753 +pandas/tests/tseries/offsets/test_business_quarter.py,sha256=uc-a_-nrXZLzKPDIzrLAoyF_EKjTxFZAMOh88aMyfC0,12332 +pandas/tests/tseries/offsets/test_business_year.py,sha256=XOLwy34XlnTWE0XAfBN1iEQUuPNU8msODabaFVEQBr8,6502 +pandas/tests/tseries/offsets/test_custom_business_day.py,sha256=l9O-BCOZEAcxPui92uItKjJrXcOa3OBwwHTi2jymNXA,3186 +pandas/tests/tseries/offsets/test_custom_business_hour.py,sha256=ZovXfSf1vWYopIfBueMRFgxFJGgPG_-mJSCSgsJRrWM,12769 +pandas/tests/tseries/offsets/test_custom_business_month.py,sha256=bI-xHsSvsMRZz8UYrm0OVOw7YjwdZyyFGhk6xRa73Z0,14024 +pandas/tests/tseries/offsets/test_dst.py,sha256=fm5pN6y4vVYmhMo8GCSAPMI7lPcjensNz0OJtTECPZ4,7735 +pandas/tests/tseries/offsets/test_easter.py,sha256=CTZPZCwlsz9nlL5LEAcMQEFO2mPWAZB5F2ofk_oaj0c,1175 +pandas/tests/tseries/offsets/test_fiscal.py,sha256=RxbrPZD63MbgHkjPRHe573wRAOeLDyNdWgMk4SUI8is,28044 +pandas/tests/tseries/offsets/test_index.py,sha256=2e-wN5uf_y7SzO11Z7Jo6EjDC5fFPTVZLtx7G7H6ZWA,1145 +pandas/tests/tseries/offsets/test_month.py,sha256=hmGgjPx74TF7kYTFOvxdpnJaoEEXTk0fjUo9_Vt288w,24078 +pandas/tests/tseries/offsets/test_offsets.py,sha256=mHdPKIY4W3FVYqWGLDfMWo8weX3q6i92Y4_hqqR1yFc,29087 +pandas/tests/tseries/offsets/test_offsets_properties.py,sha256=P_16zBX7ocaGN-br0pEQBGTlewfiDpJsnf5R1ei83JQ,1971 +pandas/tests/tseries/offsets/test_quarter.py,sha256=yI6bqWJ1ah1uJh4vIZd4w6EcTkva1GNiZ8DF3zT_6nY,11558 +pandas/tests/tseries/offsets/test_ticks.py,sha256=eijbAfJw0oS7YO0SSv6pICFOX9zRZrJfxeR_oo-yrfg,10951 +pandas/tests/tseries/offsets/test_week.py,sha256=oOqFvTmlY7Ds0nm1ci6Hth84rtBztz_Zf7MnJ0xu-0o,12503 +pandas/tests/tseries/offsets/test_year.py,sha256=K_bg2DhXe77kYlXYLGOSmXXZbqhXlG5ysXhDdWGyPrU,9865 +pandas/tests/tslibs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/tslibs/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/tslibs/__pycache__/test_api.cpython-38.pyc,, +pandas/tests/tslibs/__pycache__/test_array_to_datetime.cpython-38.pyc,, +pandas/tests/tslibs/__pycache__/test_ccalendar.cpython-38.pyc,, +pandas/tests/tslibs/__pycache__/test_conversion.cpython-38.pyc,, +pandas/tests/tslibs/__pycache__/test_fields.cpython-38.pyc,, +pandas/tests/tslibs/__pycache__/test_libfrequencies.cpython-38.pyc,, +pandas/tests/tslibs/__pycache__/test_liboffsets.cpython-38.pyc,, +pandas/tests/tslibs/__pycache__/test_parse_iso8601.cpython-38.pyc,, +pandas/tests/tslibs/__pycache__/test_parsing.cpython-38.pyc,, +pandas/tests/tslibs/__pycache__/test_period_asfreq.cpython-38.pyc,, +pandas/tests/tslibs/__pycache__/test_timedeltas.cpython-38.pyc,, +pandas/tests/tslibs/__pycache__/test_timezones.cpython-38.pyc,, +pandas/tests/tslibs/__pycache__/test_to_offset.cpython-38.pyc,, +pandas/tests/tslibs/test_api.py,sha256=EUh_2N0fpn9t4jx6ICVFpOH3kI__gcDgZW2k0HJqh44,1231 +pandas/tests/tslibs/test_array_to_datetime.py,sha256=59csG_mVkDhCNUSoRixuwmXspIG0PI5pIwytt5MD8xI,5919 +pandas/tests/tslibs/test_ccalendar.py,sha256=8dmstj7tYOhTPe66bkpV-RQJp03zpBYMpmb-7DaJe_4,1887 +pandas/tests/tslibs/test_conversion.py,sha256=aVN0oo1DAv_sOH_89OHDE6z3v1wwaorJilesYim8xgM,3986 +pandas/tests/tslibs/test_fields.py,sha256=SwQoun0itGlI79s5Lv1w1RWJKmEwlMgawq3ybziA3gw,1351 +pandas/tests/tslibs/test_libfrequencies.py,sha256=1aQnyjAA2F2-xfTlTa081uVE3dTBb2CdkYv8Cry5Gn0,769 +pandas/tests/tslibs/test_liboffsets.py,sha256=958cVv4vva5nawrYcmSinfu62NIL7lYOXOHN7yU-gAE,5108 +pandas/tests/tslibs/test_parse_iso8601.py,sha256=XIidGrTdVtTyauCww9brdCCIcnbXxXuWYqREVorC66E,2069 +pandas/tests/tslibs/test_parsing.py,sha256=acjU7AtzfPCr1R6kraskHtOz5rKnDsATrWU-1TrEdJc,8867 +pandas/tests/tslibs/test_period_asfreq.py,sha256=H0mYk-sTDHG5K98qSHY84lpTKdWI9UUCC2dEk7gdgbg,2326 +pandas/tests/tslibs/test_timedeltas.py,sha256=WBORVlGnsu_hIi6zSgynGD5P5UZYAb3rx1s0OfOuxKk,1780 +pandas/tests/tslibs/test_timezones.py,sha256=tnHX-TDiLJCDYYma8Jkrdena54K-LNEai90u21VVpTg,4653 +pandas/tests/tslibs/test_to_offset.py,sha256=V5Xv79KEnCgxNpM-lyftRXzbzdx959uMWzLcDpu1htI,4786 +pandas/tests/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/util/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/util/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_assert_almost_equal.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_assert_attr_equal.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_assert_categorical_equal.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_assert_extension_array_equal.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_assert_frame_equal.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_assert_index_equal.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_assert_interval_array_equal.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_assert_numpy_array_equal.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_assert_produces_warning.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_assert_series_equal.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_deprecate.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_deprecate_kwarg.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_deprecate_nonkeyword_arguments.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_doc.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_hashing.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_numba.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_safe_import.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_shares_memory.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_show_versions.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_util.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_validate_args.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_validate_args_and_kwargs.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_validate_inclusive.cpython-38.pyc,, +pandas/tests/util/__pycache__/test_validate_kwargs.cpython-38.pyc,, +pandas/tests/util/conftest.py,sha256=loEbQsEtHtv-T4Umeq_UeV6R7s8SO01GHbW6gn8lvlo,476 +pandas/tests/util/test_assert_almost_equal.py,sha256=_e6vI2ODhDg92FJQV5iVorCWoHdWBhp4a4XysKOEoBU,12557 +pandas/tests/util/test_assert_attr_equal.py,sha256=dkMCbZi_l9AltabL7kefYsytgxjoxc4g70nHlkHA3dM,1073 +pandas/tests/util/test_assert_categorical_equal.py,sha256=l0eBVe0b0Vs0-Av22MqkSqHklSwFKnlNNezsQPZWvOE,2748 +pandas/tests/util/test_assert_extension_array_equal.py,sha256=6HhnzmEM3Z3JzIsmnatR-C8Z4eaPV3hngfRZd_k1ATU,3464 +pandas/tests/util/test_assert_frame_equal.py,sha256=JP7dklS_-_PYcyGrBMN5YBEa92kq9NJC1Z6G-hIoZ5o,11565 +pandas/tests/util/test_assert_index_equal.py,sha256=IiSMdoQWng4nRojR10ziShMz7SNNiw_ooax3GurywLo,7702 +pandas/tests/util/test_assert_interval_array_equal.py,sha256=ITqL0Z8AAy5D1knACPOHodI64AHxmNzxiG-i9FeU0b8,2158 +pandas/tests/util/test_assert_numpy_array_equal.py,sha256=fgb8GdUwX4EYiR3PWbjJULNfAJz4DfJ8RJXchssygO4,6624 +pandas/tests/util/test_assert_produces_warning.py,sha256=W4TQT1vAJY7SZ0CzfqJsr329N2l_riYLWcWgUFyBSq0,7044 +pandas/tests/util/test_assert_series_equal.py,sha256=MDL_hgh1pz7n4geiFJFaUnVt5zY1xTfn1c41Cj-O5sI,10398 +pandas/tests/util/test_deprecate.py,sha256=oZXuNwUnS_hAcMWPgl9ErjGCZSs4beoaivnsOTQzIys,1626 +pandas/tests/util/test_deprecate_kwarg.py,sha256=7T2QkCxXUoJHhCxUjAH_5_hM-BHC6nPWG635LFY35lo,2043 +pandas/tests/util/test_deprecate_nonkeyword_arguments.py,sha256=1oq6mN8mBu7PPrhKFDcmtzVS54RHm_yYTU6JcM_D-W0,3128 +pandas/tests/util/test_doc.py,sha256=u0fxCg4zZWhB4SkJYc2huQ0xv7sKKAt0OlpWldmhh_M,1492 +pandas/tests/util/test_hashing.py,sha256=fhYtjHz_w3ALtKGCLZlVhzudJr5vZOAYrvszkLOs24Q,12723 +pandas/tests/util/test_numba.py,sha256=6eOVcokESth7h6yyeehVizx61FtwDdVbF8wV8j3t-Ic,308 +pandas/tests/util/test_safe_import.py,sha256=UxH90Ju9wyQ7Rs7SduRj3dkxroyehIwaWbBEz3ZzvEw,1020 +pandas/tests/util/test_shares_memory.py,sha256=pohzczmtzQtM9wOa-dUkJVCOYPb5VFTxrjlUJL9xmlA,345 +pandas/tests/util/test_show_versions.py,sha256=BrRmjh1dYOYLJDBBSUXKaSr3kqHkOfBG_Hre5LyPQIE,2545 +pandas/tests/util/test_util.py,sha256=P3fQqMjLt1sL0jOKYj_nYIyeiP2PwDXIy4BPUrf_c6k,1982 +pandas/tests/util/test_validate_args.py,sha256=ygRn_KXeO86BTvXnNDGWmnWsWygqoItKk7Ob1I2-9Wo,1842 +pandas/tests/util/test_validate_args_and_kwargs.py,sha256=OFROeLC6jsezEgz3SypUMUl5VHPQGgj49eZYsMOQ9-s,2391 +pandas/tests/util/test_validate_inclusive.py,sha256=w2twetJgIedm6KGQ4WmdmGC_6-RShFjXBMBVxR0gcME,896 +pandas/tests/util/test_validate_kwargs.py,sha256=gSJQe9UVtyNQ8Dw2k-MR2TRPp8GbPbrCwM5yDM-dg9E,1755 +pandas/tests/window/__init__.py,sha256=1jg-FfAkeNqq2AoynrB6nQ1UCUATOF8cVYIaIwPfdTs,196 +pandas/tests/window/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/window/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_api.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_apply.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_base_indexer.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_cython_aggregations.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_dtypes.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_ewm.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_expanding.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_groupby.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_numba.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_online.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_pairwise.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_rolling.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_rolling_functions.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_rolling_quantile.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_rolling_skew_kurt.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_timeseries_window.cpython-38.pyc,, +pandas/tests/window/__pycache__/test_win_type.cpython-38.pyc,, +pandas/tests/window/conftest.py,sha256=WAPhrLczGHRP-tYs-GJCC-bbevQYcXgeonjxZffNMeo,5994 +pandas/tests/window/moments/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pandas/tests/window/moments/__pycache__/__init__.cpython-38.pyc,, +pandas/tests/window/moments/__pycache__/conftest.cpython-38.pyc,, +pandas/tests/window/moments/__pycache__/test_moments_consistency_ewm.cpython-38.pyc,, +pandas/tests/window/moments/__pycache__/test_moments_consistency_expanding.cpython-38.pyc,, +pandas/tests/window/moments/__pycache__/test_moments_consistency_rolling.cpython-38.pyc,, +pandas/tests/window/moments/conftest.py,sha256=XgoAwGq5XDMKwMlIOYvFgHO0ZHJPfSDPKC32SLCtJDA,1753 +pandas/tests/window/moments/test_moments_consistency_ewm.py,sha256=4CgbSYilGuKXXpREqkiYAY6s8IsjLL7QFhCHVERTG6s,8206 +pandas/tests/window/moments/test_moments_consistency_expanding.py,sha256=eUa5UFG7UAqmG56XsYmihGvesbDNrj0DPV7eJgpxksY,5541 +pandas/tests/window/moments/test_moments_consistency_rolling.py,sha256=OKXUciD3Uc9X9kxwKQ4a7cSUrIbysMqekRm88UtRXI4,7687 +pandas/tests/window/test_api.py,sha256=uoKPUYAgovMtJRKq1cFXA78EorFhTsEhdX2yre55GL0,11977 +pandas/tests/window/test_apply.py,sha256=X73I124v1HcJefDuiaAMvr0mPZDQfMcPmV5_WhCqxnA,9458 +pandas/tests/window/test_base_indexer.py,sha256=EnOSEDrDe0_Ebfxn4bWVThqCFNTgybPnoYMu-NYMwvo,15292 +pandas/tests/window/test_cython_aggregations.py,sha256=wPAk76yfrG9D1-IzI0kDklpiTVqgp4xsEGjONe9lCY4,3967 +pandas/tests/window/test_dtypes.py,sha256=G_t4WiUnZFBP3wSL0-8hYyFt9sad8VLEemM_UpmeE78,5283 +pandas/tests/window/test_ewm.py,sha256=fvmxDOLY20EJBzpkZ-8qacqw9SnGlBMvcII3_xhz2zI,20574 +pandas/tests/window/test_expanding.py,sha256=0FiESCwAl1UHNed8MNjdk4_j8UkOqhmfbD-n43OSUEI,20786 +pandas/tests/window/test_groupby.py,sha256=po00w02ayOerwHCMhamvv0gtcuOwyfUx5LF_4Bcrlgo,40829 +pandas/tests/window/test_numba.py,sha256=igr7snkRhWNUDdS-fxxVgOiNrF4ePw_dcvrBPHYIBO0,14953 +pandas/tests/window/test_online.py,sha256=ZZxl1Fg5OVDlQEOn2kRakwZRLN3PrLS42_hErmuWMOc,3236 +pandas/tests/window/test_pairwise.py,sha256=OCHU0tdkCL8Oq_K0E0v0e_Kd3mCzbBvJjMpfd9W1fB4,14439 +pandas/tests/window/test_rolling.py,sha256=Oo6BgVAfmLRpv2C8RRtzruOTEIt1-xWZaRd8dtafsnc,52354 +pandas/tests/window/test_rolling_functions.py,sha256=18juMvPpwE81KNEtQz5vtG1-gg8f35kp_zwaNSUF2PY,17536 +pandas/tests/window/test_rolling_quantile.py,sha256=taGHULWaxuWlWyTWGvqEpbO_UMsU3lOpRhiXgDkrbXw,5062 +pandas/tests/window/test_rolling_skew_kurt.py,sha256=fmkUjE3Pv6sb6ziT8mEhH90dcPirAjTQEQhjurch67o,7094 +pandas/tests/window/test_timeseries_window.py,sha256=Qh8E8wCe6cczo54WoJwLr_BY1PS1LanTVuv6g-AcxwQ,25054 +pandas/tests/window/test_win_type.py,sha256=dKSYEEC0UumIwsOodk3_GFVM0XqsVWZvR4NNaJsLaXM,16165 +pandas/tseries/__init__.py,sha256=cpq1wgIioh5SP0heBvHwxzlu_yu2CMFpGZu3WgevxFE,272 +pandas/tseries/__pycache__/__init__.cpython-38.pyc,, +pandas/tseries/__pycache__/api.cpython-38.pyc,, +pandas/tseries/__pycache__/frequencies.cpython-38.pyc,, +pandas/tseries/__pycache__/holiday.cpython-38.pyc,, +pandas/tseries/__pycache__/offsets.cpython-38.pyc,, +pandas/tseries/api.py,sha256=sx-g3_AEzTe_rI3BpaXWrzX6DGN66-RhABFe84_TIhQ,136 +pandas/tseries/frequencies.py,sha256=DkH4VhHXTCj_Vi4gAhC0Cp-CSP-xz9kRqqIxnzjOLGg,17466 +pandas/tseries/holiday.py,sha256=Y-Pt3Vxm4jGdD7cfEUW72zT0bYjajFPxF_hH-8HYOW0,17039 +pandas/tseries/offsets.py,sha256=r6k_TpTSCMVeLZ5ILWkMvwl9bg3gjK_sjuGmdJyEQUE,1366 +pandas/util/__init__.py,sha256=MUVPneZnJdJYwbVcqPu04ETT0z3ZQHLBqp9cdYxfWa0,412 +pandas/util/__pycache__/__init__.cpython-38.pyc,, +pandas/util/__pycache__/_decorators.cpython-38.pyc,, +pandas/util/__pycache__/_doctools.cpython-38.pyc,, +pandas/util/__pycache__/_exceptions.cpython-38.pyc,, +pandas/util/__pycache__/_print_versions.cpython-38.pyc,, +pandas/util/__pycache__/_test_decorators.cpython-38.pyc,, +pandas/util/__pycache__/_tester.cpython-38.pyc,, +pandas/util/__pycache__/_validators.cpython-38.pyc,, +pandas/util/__pycache__/testing.cpython-38.pyc,, +pandas/util/_decorators.py,sha256=YS6K2hVDfj44IjuGvQthxjm3L1OveWa-q2bx82oFk2o,17161 +pandas/util/_doctools.py,sha256=X7y_QmN_0Gq2yHm08w-_Bybco0YurZYuZSubNsKHWsg,6657 +pandas/util/_exceptions.py,sha256=OY3GuMPW4gFHKjqSXxNNpbjSwqt33jxyu0XLV1WBBDU,1027 +pandas/util/_print_versions.py,sha256=kxAoCghSTouYQD8ixQ9-Uuz1ghWzNO-Rt9njN8LFSCc,4290 +pandas/util/_test_decorators.py,sha256=YJkyWBAAN_scc5PUVXxgUdnZVcr1XzBiYon7G-Qn_lM,8889 +pandas/util/_tester.py,sha256=j5JCuOatm3LWmLjQmlI8TTIUO6y555gHTMXOucYjoTs,824 +pandas/util/_validators.py,sha256=X-zgCWEUUqUaoNoSl482d6Z6ePeBbdBsBs9AVLEjyUs,16935 +pandas/util/testing.py,sha256=I3TS4ilRqUqKpI9UAcqkCvr2f6k6drTMjz5OLjhlj54,330 +pandas/util/version/__init__.py,sha256=UlyeVEsBXNh4s8ecrheLLuLJZdb8onHZXZ1mbYri_sI,16226 +pandas/util/version/__pycache__/__init__.cpython-38.pyc,, diff --git a/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/WHEEL b/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/WHEEL new file mode 100644 index 0000000..3a48d34 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.37.1) +Root-Is-Purelib: false +Tag: cp38-cp38-manylinux_2_17_x86_64 +Tag: cp38-cp38-manylinux2014_x86_64 + diff --git a/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/entry_points.txt b/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/entry_points.txt new file mode 100644 index 0000000..3c1b523 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/entry_points.txt @@ -0,0 +1,3 @@ +[pandas_plotting_backends] +matplotlib = pandas:plotting._matplotlib + diff --git a/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/top_level.txt b/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/top_level.txt new file mode 100644 index 0000000..fb6c7ed --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas-1.4.1.dist-info/top_level.txt @@ -0,0 +1 @@ +pandas diff --git a/.local/lib/python3.8/site-packages/pandas/__init__.py b/.local/lib/python3.8/site-packages/pandas/__init__.py new file mode 100644 index 0000000..1b18af0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/__init__.py @@ -0,0 +1,421 @@ +# flake8: noqa + +__docformat__ = "restructuredtext" + +# Let users know if they're missing any of our hard dependencies +hard_dependencies = ("numpy", "pytz", "dateutil") +missing_dependencies = [] + +for dependency in hard_dependencies: + try: + __import__(dependency) + except ImportError as e: + missing_dependencies.append(f"{dependency}: {e}") + +if missing_dependencies: + raise ImportError( + "Unable to import required dependencies:\n" + "\n".join(missing_dependencies) + ) +del hard_dependencies, dependency, missing_dependencies + +# numpy compat +from pandas.compat import is_numpy_dev as _is_numpy_dev + +try: + from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib +except ImportError as err: # pragma: no cover + module = err.name + raise ImportError( + f"C extension: {module} not built. If you want to import " + "pandas from the source directory, you may need to run " + "'python setup.py build_ext --force' to build the C extensions first." + ) from err +else: + del _tslib, _lib, _hashtable + +from pandas._config import ( + get_option, + set_option, + reset_option, + describe_option, + option_context, + options, +) + +# let init-time option registration happen +import pandas.core.config_init + +from pandas.core.api import ( + # dtype + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, + Float32Dtype, + Float64Dtype, + CategoricalDtype, + PeriodDtype, + IntervalDtype, + DatetimeTZDtype, + StringDtype, + BooleanDtype, + # missing + NA, + isna, + isnull, + notna, + notnull, + # indexes + Index, + CategoricalIndex, + RangeIndex, + MultiIndex, + IntervalIndex, + TimedeltaIndex, + DatetimeIndex, + PeriodIndex, + IndexSlice, + # tseries + NaT, + Period, + period_range, + Timedelta, + timedelta_range, + Timestamp, + date_range, + bdate_range, + Interval, + interval_range, + DateOffset, + # conversion + to_numeric, + to_datetime, + to_timedelta, + # misc + Flags, + Grouper, + factorize, + unique, + value_counts, + NamedAgg, + array, + Categorical, + set_eng_float_format, + Series, + DataFrame, +) + +from pandas.core.arrays.sparse import SparseDtype + +from pandas.tseries.api import infer_freq +from pandas.tseries import offsets + +from pandas.core.computation.api import eval + +from pandas.core.reshape.api import ( + concat, + lreshape, + melt, + wide_to_long, + merge, + merge_asof, + merge_ordered, + crosstab, + pivot, + pivot_table, + get_dummies, + cut, + qcut, +) + +from pandas import api, arrays, errors, io, plotting, testing, tseries +from pandas.util._print_versions import show_versions + +from pandas.io.api import ( + # excel + ExcelFile, + ExcelWriter, + read_excel, + # parsers + read_csv, + read_fwf, + read_table, + # pickle + read_pickle, + to_pickle, + # pytables + HDFStore, + read_hdf, + # sql + read_sql, + read_sql_query, + read_sql_table, + # misc + read_clipboard, + read_parquet, + read_orc, + read_feather, + read_gbq, + read_html, + read_xml, + read_json, + read_stata, + read_sas, + read_spss, +) + +from pandas.io.json import _json_normalize as json_normalize + +from pandas.util._tester import test + +# use the closest tagged version if possible +from pandas._version import get_versions + +v = get_versions() +__version__ = v.get("closest-tag", v["version"]) +__git_version__ = v.get("full-revisionid") +del get_versions, v + +# GH 27101 +__deprecated_num_index_names = ["Float64Index", "Int64Index", "UInt64Index"] + + +def __dir__(): + # GH43028 + # Int64Index etc. are deprecated, but we still want them to be available in the dir. + # Remove in Pandas 2.0, when we remove Int64Index etc. from the code base. + return list(globals().keys()) + __deprecated_num_index_names + + +def __getattr__(name): + import warnings + + if name in __deprecated_num_index_names: + warnings.warn( + f"pandas.{name} is deprecated " + "and will be removed from pandas in a future version. " + "Use pandas.Index with the appropriate dtype instead.", + FutureWarning, + stacklevel=2, + ) + from pandas.core.api import Float64Index, Int64Index, UInt64Index + + return { + "Float64Index": Float64Index, + "Int64Index": Int64Index, + "UInt64Index": UInt64Index, + }[name] + elif name == "datetime": + warnings.warn( + "The pandas.datetime class is deprecated " + "and will be removed from pandas in a future version. " + "Import from datetime module instead.", + FutureWarning, + stacklevel=2, + ) + + from datetime import datetime as dt + + return dt + + elif name == "np": + + warnings.warn( + "The pandas.np module is deprecated " + "and will be removed from pandas in a future version. " + "Import numpy directly instead.", + FutureWarning, + stacklevel=2, + ) + import numpy as np + + return np + + elif name in {"SparseSeries", "SparseDataFrame"}: + warnings.warn( + f"The {name} class is removed from pandas. Accessing it from " + "the top-level namespace will also be removed in the next version.", + FutureWarning, + stacklevel=2, + ) + + return type(name, (), {}) + + elif name == "SparseArray": + + warnings.warn( + "The pandas.SparseArray class is deprecated " + "and will be removed from pandas in a future version. " + "Use pandas.arrays.SparseArray instead.", + FutureWarning, + stacklevel=2, + ) + from pandas.core.arrays.sparse import SparseArray as _SparseArray + + return _SparseArray + + raise AttributeError(f"module 'pandas' has no attribute '{name}'") + + +# module level doc-string +__doc__ = """ +pandas - a powerful data analysis and manipulation library for Python +===================================================================== + +**pandas** is a Python package providing fast, flexible, and expressive data +structures designed to make working with "relational" or "labeled" data both +easy and intuitive. It aims to be the fundamental high-level building block for +doing practical, **real world** data analysis in Python. Additionally, it has +the broader goal of becoming **the most powerful and flexible open source data +analysis / manipulation tool available in any language**. It is already well on +its way toward this goal. + +Main Features +------------- +Here are just a few of the things that pandas does well: + + - Easy handling of missing data in floating point as well as non-floating + point data. + - Size mutability: columns can be inserted and deleted from DataFrame and + higher dimensional objects + - Automatic and explicit data alignment: objects can be explicitly aligned + to a set of labels, or the user can simply ignore the labels and let + `Series`, `DataFrame`, etc. automatically align the data for you in + computations. + - Powerful, flexible group by functionality to perform split-apply-combine + operations on data sets, for both aggregating and transforming data. + - Make it easy to convert ragged, differently-indexed data in other Python + and NumPy data structures into DataFrame objects. + - Intelligent label-based slicing, fancy indexing, and subsetting of large + data sets. + - Intuitive merging and joining data sets. + - Flexible reshaping and pivoting of data sets. + - Hierarchical labeling of axes (possible to have multiple labels per tick). + - Robust IO tools for loading data from flat files (CSV and delimited), + Excel files, databases, and saving/loading data from the ultrafast HDF5 + format. + - Time series-specific functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. +""" + +# Use __all__ to let type checkers know what is part of the public API. +# Pandas is not (yet) a py.typed library: the public API is determined +# based on the documentation. +__all__ = [ + "BooleanDtype", + "Categorical", + "CategoricalDtype", + "CategoricalIndex", + "DataFrame", + "DateOffset", + "DatetimeIndex", + "DatetimeTZDtype", + "ExcelFile", + "ExcelWriter", + "Flags", + "Float32Dtype", + "Float64Dtype", + "Grouper", + "HDFStore", + "Index", + "IndexSlice", + "Int16Dtype", + "Int32Dtype", + "Int64Dtype", + "Int8Dtype", + "Interval", + "IntervalDtype", + "IntervalIndex", + "MultiIndex", + "NA", + "NaT", + "NamedAgg", + "Period", + "PeriodDtype", + "PeriodIndex", + "RangeIndex", + "Series", + "SparseDtype", + "StringDtype", + "Timedelta", + "TimedeltaIndex", + "Timestamp", + "UInt16Dtype", + "UInt32Dtype", + "UInt64Dtype", + "UInt8Dtype", + "api", + "array", + "arrays", + "bdate_range", + "concat", + "crosstab", + "cut", + "date_range", + "describe_option", + "errors", + "eval", + "factorize", + "get_dummies", + "get_option", + "infer_freq", + "interval_range", + "io", + "isna", + "isnull", + "json_normalize", + "lreshape", + "melt", + "merge", + "merge_asof", + "merge_ordered", + "notna", + "notnull", + "offsets", + "option_context", + "options", + "period_range", + "pivot", + "pivot_table", + "plotting", + "qcut", + "read_clipboard", + "read_csv", + "read_excel", + "read_feather", + "read_fwf", + "read_gbq", + "read_hdf", + "read_html", + "read_json", + "read_orc", + "read_parquet", + "read_pickle", + "read_sas", + "read_spss", + "read_sql", + "read_sql_query", + "read_sql_table", + "read_stata", + "read_table", + "read_xml", + "reset_option", + "set_eng_float_format", + "set_option", + "show_versions", + "test", + "testing", + "timedelta_range", + "to_datetime", + "to_numeric", + "to_pickle", + "to_timedelta", + "tseries", + "unique", + "value_counts", + "wide_to_long", +] diff --git a/.local/lib/python3.8/site-packages/pandas/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..1350f1b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/__pycache__/_typing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/__pycache__/_typing.cpython-38.pyc new file mode 100644 index 0000000..5a22cd4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/__pycache__/_typing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/__pycache__/_version.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/__pycache__/_version.cpython-38.pyc new file mode 100644 index 0000000..011ba60 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/__pycache__/_version.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..8a99bd0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/__pycache__/testing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/__pycache__/testing.cpython-38.pyc new file mode 100644 index 0000000..2502c2b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/__pycache__/testing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_config/__init__.py b/.local/lib/python3.8/site-packages/pandas/_config/__init__.py new file mode 100644 index 0000000..65936a9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_config/__init__.py @@ -0,0 +1,28 @@ +""" +pandas._config is considered explicitly upstream of everything else in pandas, +should have no intra-pandas dependencies. + +importing `dates` and `display` ensures that keys needed by _libs +are initialized. +""" +__all__ = [ + "config", + "detect_console_encoding", + "get_option", + "set_option", + "reset_option", + "describe_option", + "option_context", + "options", +] +from pandas._config import config +from pandas._config import dates # noqa:F401 +from pandas._config.config import ( + describe_option, + get_option, + option_context, + options, + reset_option, + set_option, +) +from pandas._config.display import detect_console_encoding diff --git a/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..99cd493 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/config.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/config.cpython-38.pyc new file mode 100644 index 0000000..6d420f5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/config.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/dates.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/dates.cpython-38.pyc new file mode 100644 index 0000000..0a78461 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/dates.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/display.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/display.cpython-38.pyc new file mode 100644 index 0000000..0b51c69 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/display.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/localization.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/localization.cpython-38.pyc new file mode 100644 index 0000000..055e448 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_config/__pycache__/localization.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_config/config.py b/.local/lib/python3.8/site-packages/pandas/_config/config.py new file mode 100644 index 0000000..5a0f582 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_config/config.py @@ -0,0 +1,892 @@ +""" +The config module holds package-wide configurables and provides +a uniform API for working with them. + +Overview +======== + +This module supports the following requirements: +- options are referenced using keys in dot.notation, e.g. "x.y.option - z". +- keys are case-insensitive. +- functions should accept partial/regex keys, when unambiguous. +- options can be registered by modules at import time. +- options can be registered at init-time (via core.config_init) +- options have a default value, and (optionally) a description and + validation function associated with them. +- options can be deprecated, in which case referencing them + should produce a warning. +- deprecated options can optionally be rerouted to a replacement + so that accessing a deprecated option reroutes to a differently + named option. +- options can be reset to their default value. +- all option can be reset to their default value at once. +- all options in a certain sub - namespace can be reset at once. +- the user can set / get / reset or ask for the description of an option. +- a developer can register and mark an option as deprecated. +- you can register a callback to be invoked when the option value + is set or reset. Changing the stored value is considered misuse, but + is not verboten. + +Implementation +============== + +- Data is stored using nested dictionaries, and should be accessed + through the provided API. + +- "Registered options" and "Deprecated options" have metadata associated + with them, which are stored in auxiliary dictionaries keyed on the + fully-qualified key, e.g. "x.y.z.option". + +- the config_init module is imported by the package's __init__.py file. + placing any register_option() calls there will ensure those options + are available as soon as pandas is loaded. If you use register_option + in a module, it will only be available after that module is imported, + which you should be aware of. + +- `config_prefix` is a context_manager (for use with the `with` keyword) + which can save developers some typing, see the docstring. + +""" + +from __future__ import annotations + +from contextlib import ( + ContextDecorator, + contextmanager, +) +import re +from typing import ( + Any, + Callable, + Iterable, + NamedTuple, + cast, +) +import warnings + +from pandas._typing import F + + +class DeprecatedOption(NamedTuple): + key: str + msg: str | None + rkey: str | None + removal_ver: str | None + + +class RegisteredOption(NamedTuple): + key: str + defval: object + doc: str + validator: Callable[[object], Any] | None + cb: Callable[[str], Any] | None + + +# holds deprecated option metadata +_deprecated_options: dict[str, DeprecatedOption] = {} + +# holds registered option metadata +_registered_options: dict[str, RegisteredOption] = {} + +# holds the current values for registered options +_global_config: dict[str, Any] = {} + +# keys which have a special meaning +_reserved_keys: list[str] = ["all"] + + +class OptionError(AttributeError, KeyError): + """ + Exception for pandas.options, backwards compatible with KeyError + checks. + """ + + +# +# User API + + +def _get_single_key(pat: str, silent: bool) -> str: + keys = _select_options(pat) + if len(keys) == 0: + if not silent: + _warn_if_deprecated(pat) + raise OptionError(f"No such keys(s): {repr(pat)}") + if len(keys) > 1: + raise OptionError("Pattern matched multiple keys") + key = keys[0] + + if not silent: + _warn_if_deprecated(key) + + key = _translate_key(key) + + return key + + +def _get_option(pat: str, silent: bool = False): + key = _get_single_key(pat, silent) + + # walk the nested dict + root, k = _get_root(key) + return root[k] + + +def _set_option(*args, **kwargs) -> None: + # must at least 1 arg deal with constraints later + nargs = len(args) + if not nargs or nargs % 2 != 0: + raise ValueError("Must provide an even number of non-keyword arguments") + + # default to false + silent = kwargs.pop("silent", False) + + if kwargs: + kwarg = list(kwargs.keys())[0] + raise TypeError(f'_set_option() got an unexpected keyword argument "{kwarg}"') + + for k, v in zip(args[::2], args[1::2]): + key = _get_single_key(k, silent) + + o = _get_registered_option(key) + if o and o.validator: + o.validator(v) + + # walk the nested dict + root, k = _get_root(key) + root[k] = v + + if o.cb: + if silent: + with warnings.catch_warnings(record=True): + o.cb(key) + else: + o.cb(key) + + +def _describe_option(pat: str = "", _print_desc: bool = True): + + keys = _select_options(pat) + if len(keys) == 0: + raise OptionError("No such keys(s)") + + s = "\n".join([_build_option_description(k) for k in keys]) + + if _print_desc: + print(s) + else: + return s + + +def _reset_option(pat: str, silent: bool = False) -> None: + + keys = _select_options(pat) + + if len(keys) == 0: + raise OptionError("No such keys(s)") + + if len(keys) > 1 and len(pat) < 4 and pat != "all": + raise ValueError( + "You must specify at least 4 characters when " + "resetting multiple keys, use the special keyword " + '"all" to reset all the options to their default value' + ) + + for k in keys: + _set_option(k, _registered_options[k].defval, silent=silent) + + +def get_default_val(pat: str): + key = _get_single_key(pat, silent=True) + return _get_registered_option(key).defval + + +class DictWrapper: + """provide attribute-style access to a nested dict""" + + def __init__(self, d: dict[str, Any], prefix: str = ""): + object.__setattr__(self, "d", d) + object.__setattr__(self, "prefix", prefix) + + def __setattr__(self, key: str, val: Any) -> None: + prefix = object.__getattribute__(self, "prefix") + if prefix: + prefix += "." + prefix += key + # you can't set new keys + # can you can't overwrite subtrees + if key in self.d and not isinstance(self.d[key], dict): + _set_option(prefix, val) + else: + raise OptionError("You can only set the value of existing options") + + def __getattr__(self, key: str): + prefix = object.__getattribute__(self, "prefix") + if prefix: + prefix += "." + prefix += key + try: + v = object.__getattribute__(self, "d")[key] + except KeyError as err: + raise OptionError("No such option") from err + if isinstance(v, dict): + return DictWrapper(v, prefix) + else: + return _get_option(prefix) + + def __dir__(self) -> Iterable[str]: + return list(self.d.keys()) + + +# For user convenience, we'd like to have the available options described +# in the docstring. For dev convenience we'd like to generate the docstrings +# dynamically instead of maintaining them by hand. To this, we use the +# class below which wraps functions inside a callable, and converts +# __doc__ into a property function. The doctsrings below are templates +# using the py2.6+ advanced formatting syntax to plug in a concise list +# of options, and option descriptions. + + +class CallableDynamicDoc: + def __init__(self, func, doc_tmpl): + self.__doc_tmpl__ = doc_tmpl + self.__func__ = func + + def __call__(self, *args, **kwds): + return self.__func__(*args, **kwds) + + @property + def __doc__(self): + opts_desc = _describe_option("all", _print_desc=False) + opts_list = pp_options_list(list(_registered_options.keys())) + return self.__doc_tmpl__.format(opts_desc=opts_desc, opts_list=opts_list) + + +_get_option_tmpl = """ +get_option(pat) + +Retrieves the value of the specified option. + +Available options: + +{opts_list} + +Parameters +---------- +pat : str + Regexp which should match a single option. + Note: partial matches are supported for convenience, but unless you use the + full option name (e.g. x.y.z.option_name), your code may break in future + versions if new options with similar names are introduced. + +Returns +------- +result : the value of the option + +Raises +------ +OptionError : if no such option exists + +Notes +----- +The available options with its descriptions: + +{opts_desc} +""" + +_set_option_tmpl = """ +set_option(pat, value) + +Sets the value of the specified option. + +Available options: + +{opts_list} + +Parameters +---------- +pat : str + Regexp which should match a single option. + Note: partial matches are supported for convenience, but unless you use the + full option name (e.g. x.y.z.option_name), your code may break in future + versions if new options with similar names are introduced. +value : object + New value of option. + +Returns +------- +None + +Raises +------ +OptionError if no such option exists + +Notes +----- +The available options with its descriptions: + +{opts_desc} +""" + +_describe_option_tmpl = """ +describe_option(pat, _print_desc=False) + +Prints the description for one or more registered options. + +Call with no arguments to get a listing for all registered options. + +Available options: + +{opts_list} + +Parameters +---------- +pat : str + Regexp pattern. All matching keys will have their description displayed. +_print_desc : bool, default True + If True (default) the description(s) will be printed to stdout. + Otherwise, the description(s) will be returned as a unicode string + (for testing). + +Returns +------- +None by default, the description(s) as a unicode string if _print_desc +is False + +Notes +----- +The available options with its descriptions: + +{opts_desc} +""" + +_reset_option_tmpl = """ +reset_option(pat) + +Reset one or more options to their default value. + +Pass "all" as argument to reset all options. + +Available options: + +{opts_list} + +Parameters +---------- +pat : str/regex + If specified only options matching `prefix*` will be reset. + Note: partial matches are supported for convenience, but unless you + use the full option name (e.g. x.y.z.option_name), your code may break + in future versions if new options with similar names are introduced. + +Returns +------- +None + +Notes +----- +The available options with its descriptions: + +{opts_desc} +""" + +# bind the functions with their docstrings into a Callable +# and use that as the functions exposed in pd.api +get_option = CallableDynamicDoc(_get_option, _get_option_tmpl) +set_option = CallableDynamicDoc(_set_option, _set_option_tmpl) +reset_option = CallableDynamicDoc(_reset_option, _reset_option_tmpl) +describe_option = CallableDynamicDoc(_describe_option, _describe_option_tmpl) +options = DictWrapper(_global_config) + +# +# Functions for use by pandas developers, in addition to User - api + + +class option_context(ContextDecorator): + """ + Context manager to temporarily set options in the `with` statement context. + + You need to invoke as ``option_context(pat, val, [(pat, val), ...])``. + + Examples + -------- + >>> with option_context('display.max_rows', 10, 'display.max_columns', 5): + ... pass + """ + + def __init__(self, *args): + if len(args) % 2 != 0 or len(args) < 2: + raise ValueError( + "Need to invoke as option_context(pat, val, [(pat, val), ...])." + ) + + self.ops = list(zip(args[::2], args[1::2])) + + def __enter__(self): + self.undo = [(pat, _get_option(pat, silent=True)) for pat, val in self.ops] + + for pat, val in self.ops: + _set_option(pat, val, silent=True) + + def __exit__(self, *args): + if self.undo: + for pat, val in self.undo: + _set_option(pat, val, silent=True) + + +def register_option( + key: str, + defval: object, + doc: str = "", + validator: Callable[[object], Any] | None = None, + cb: Callable[[str], Any] | None = None, +) -> None: + """ + Register an option in the package-wide pandas config object + + Parameters + ---------- + key : str + Fully-qualified key, e.g. "x.y.option - z". + defval : object + Default value of the option. + doc : str + Description of the option. + validator : Callable, optional + Function of a single argument, should raise `ValueError` if + called with a value which is not a legal value for the option. + cb + a function of a single argument "key", which is called + immediately after an option value is set/reset. key is + the full name of the option. + + Raises + ------ + ValueError if `validator` is specified and `defval` is not a valid value. + + """ + import keyword + import tokenize + + key = key.lower() + + if key in _registered_options: + raise OptionError(f"Option '{key}' has already been registered") + if key in _reserved_keys: + raise OptionError(f"Option '{key}' is a reserved key") + + # the default value should be legal + if validator: + validator(defval) + + # walk the nested dict, creating dicts as needed along the path + path = key.split(".") + + for k in path: + if not re.match("^" + tokenize.Name + "$", k): + raise ValueError(f"{k} is not a valid identifier") + if keyword.iskeyword(k): + raise ValueError(f"{k} is a python keyword") + + cursor = _global_config + msg = "Path prefix to option '{option}' is already an option" + + for i, p in enumerate(path[:-1]): + if not isinstance(cursor, dict): + raise OptionError(msg.format(option=".".join(path[:i]))) + if p not in cursor: + cursor[p] = {} + cursor = cursor[p] + + if not isinstance(cursor, dict): + raise OptionError(msg.format(option=".".join(path[:-1]))) + + cursor[path[-1]] = defval # initialize + + # save the option metadata + _registered_options[key] = RegisteredOption( + key=key, defval=defval, doc=doc, validator=validator, cb=cb + ) + + +def deprecate_option( + key: str, + msg: str | None = None, + rkey: str | None = None, + removal_ver: str | None = None, +) -> None: + """ + Mark option `key` as deprecated, if code attempts to access this option, + a warning will be produced, using `msg` if given, or a default message + if not. + if `rkey` is given, any access to the key will be re-routed to `rkey`. + + Neither the existence of `key` nor that if `rkey` is checked. If they + do not exist, any subsequence access will fail as usual, after the + deprecation warning is given. + + Parameters + ---------- + key : str + Name of the option to be deprecated. + must be a fully-qualified option name (e.g "x.y.z.rkey"). + msg : str, optional + Warning message to output when the key is referenced. + if no message is given a default message will be emitted. + rkey : str, optional + Name of an option to reroute access to. + If specified, any referenced `key` will be + re-routed to `rkey` including set/get/reset. + rkey must be a fully-qualified option name (e.g "x.y.z.rkey"). + used by the default message if no `msg` is specified. + removal_ver : str, optional + Specifies the version in which this option will + be removed. used by the default message if no `msg` is specified. + + Raises + ------ + OptionError + If the specified key has already been deprecated. + """ + key = key.lower() + + if key in _deprecated_options: + raise OptionError(f"Option '{key}' has already been defined as deprecated.") + + _deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver) + + +# +# functions internal to the module + + +def _select_options(pat: str) -> list[str]: + """ + returns a list of keys matching `pat` + + if pat=="all", returns all registered options + """ + # short-circuit for exact key + if pat in _registered_options: + return [pat] + + # else look through all of them + keys = sorted(_registered_options.keys()) + if pat == "all": # reserved key + return keys + + return [k for k in keys if re.search(pat, k, re.I)] + + +def _get_root(key: str) -> tuple[dict[str, Any], str]: + path = key.split(".") + cursor = _global_config + for p in path[:-1]: + cursor = cursor[p] + return cursor, path[-1] + + +def _is_deprecated(key: str) -> bool: + """Returns True if the given option has been deprecated""" + key = key.lower() + return key in _deprecated_options + + +def _get_deprecated_option(key: str): + """ + Retrieves the metadata for a deprecated option, if `key` is deprecated. + + Returns + ------- + DeprecatedOption (namedtuple) if key is deprecated, None otherwise + """ + try: + d = _deprecated_options[key] + except KeyError: + return None + else: + return d + + +def _get_registered_option(key: str): + """ + Retrieves the option metadata if `key` is a registered option. + + Returns + ------- + RegisteredOption (namedtuple) if key is deprecated, None otherwise + """ + return _registered_options.get(key) + + +def _translate_key(key: str) -> str: + """ + if key id deprecated and a replacement key defined, will return the + replacement key, otherwise returns `key` as - is + """ + d = _get_deprecated_option(key) + if d: + return d.rkey or key + else: + return key + + +def _warn_if_deprecated(key: str) -> bool: + """ + Checks if `key` is a deprecated option and if so, prints a warning. + + Returns + ------- + bool - True if `key` is deprecated, False otherwise. + """ + d = _get_deprecated_option(key) + if d: + if d.msg: + warnings.warn(d.msg, FutureWarning) + else: + msg = f"'{key}' is deprecated" + if d.removal_ver: + msg += f" and will be removed in {d.removal_ver}" + if d.rkey: + msg += f", please use '{d.rkey}' instead." + else: + msg += ", please refrain from using it." + + warnings.warn(msg, FutureWarning) + return True + return False + + +def _build_option_description(k: str) -> str: + """Builds a formatted description of a registered option and prints it""" + o = _get_registered_option(k) + d = _get_deprecated_option(k) + + s = f"{k} " + + if o.doc: + s += "\n".join(o.doc.strip().split("\n")) + else: + s += "No description available." + + if o: + s += f"\n [default: {o.defval}] [currently: {_get_option(k, True)}]" + + if d: + rkey = d.rkey or "" + s += "\n (Deprecated" + s += f", use `{rkey}` instead." + s += ")" + + return s + + +def pp_options_list(keys: Iterable[str], width=80, _print: bool = False): + """Builds a concise listing of available options, grouped by prefix""" + from itertools import groupby + from textwrap import wrap + + def pp(name: str, ks: Iterable[str]) -> list[str]: + pfx = "- " + name + ".[" if name else "" + ls = wrap( + ", ".join(ks), + width, + initial_indent=pfx, + subsequent_indent=" ", + break_long_words=False, + ) + if ls and ls[-1] and name: + ls[-1] = ls[-1] + "]" + return ls + + ls: list[str] = [] + singles = [x for x in sorted(keys) if x.find(".") < 0] + if singles: + ls += pp("", singles) + keys = [x for x in keys if x.find(".") >= 0] + + for k, g in groupby(sorted(keys), lambda x: x[: x.rfind(".")]): + ks = [x[len(k) + 1 :] for x in list(g)] + ls += pp(k, ks) + s = "\n".join(ls) + if _print: + print(s) + else: + return s + + +# +# helpers + + +@contextmanager +def config_prefix(prefix): + """ + contextmanager for multiple invocations of API with a common prefix + + supported API functions: (register / get / set )__option + + Warning: This is not thread - safe, and won't work properly if you import + the API functions into your module using the "from x import y" construct. + + Example + ------- + import pandas._config.config as cf + with cf.config_prefix("display.font"): + cf.register_option("color", "red") + cf.register_option("size", " 5 pt") + cf.set_option(size, " 6 pt") + cf.get_option(size) + ... + + etc' + + will register options "display.font.color", "display.font.size", set the + value of "display.font.size"... and so on. + """ + # Note: reset_option relies on set_option, and on key directly + # it does not fit in to this monkey-patching scheme + + global register_option, get_option, set_option, reset_option + + def wrap(func: F) -> F: + def inner(key: str, *args, **kwds): + pkey = f"{prefix}.{key}" + return func(pkey, *args, **kwds) + + return cast(F, inner) + + _register_option = register_option + _get_option = get_option + _set_option = set_option + set_option = wrap(set_option) + get_option = wrap(get_option) + register_option = wrap(register_option) + try: + yield + finally: + set_option = _set_option + get_option = _get_option + register_option = _register_option + + +# These factories and methods are handy for use as the validator +# arg in register_option + + +def is_type_factory(_type: type[Any]) -> Callable[[Any], None]: + """ + + Parameters + ---------- + `_type` - a type to be compared against (e.g. type(x) == `_type`) + + Returns + ------- + validator - a function of a single argument x , which raises + ValueError if type(x) is not equal to `_type` + + """ + + def inner(x) -> None: + if type(x) != _type: + raise ValueError(f"Value must have type '{_type}'") + + return inner + + +def is_instance_factory(_type) -> Callable[[Any], None]: + """ + + Parameters + ---------- + `_type` - the type to be checked against + + Returns + ------- + validator - a function of a single argument x , which raises + ValueError if x is not an instance of `_type` + + """ + if isinstance(_type, (tuple, list)): + _type = tuple(_type) + type_repr = "|".join(map(str, _type)) + else: + type_repr = f"'{_type}'" + + def inner(x) -> None: + if not isinstance(x, _type): + raise ValueError(f"Value must be an instance of {type_repr}") + + return inner + + +def is_one_of_factory(legal_values) -> Callable[[Any], None]: + + callables = [c for c in legal_values if callable(c)] + legal_values = [c for c in legal_values if not callable(c)] + + def inner(x) -> None: + if x not in legal_values: + + if not any(c(x) for c in callables): + uvals = [str(lval) for lval in legal_values] + pp_values = "|".join(uvals) + msg = f"Value must be one of {pp_values}" + if len(callables): + msg += " or a callable" + raise ValueError(msg) + + return inner + + +def is_nonnegative_int(value: object) -> None: + """ + Verify that value is None or a positive int. + + Parameters + ---------- + value : None or int + The `value` to be checked. + + Raises + ------ + ValueError + When the value is not None or is a negative integer + """ + if value is None: + return + + elif isinstance(value, int): + if value >= 0: + return + + msg = "Value must be a nonnegative integer or None" + raise ValueError(msg) + + +# common type validators, for convenience +# usage: register_option(... , validator = is_int) +is_int = is_type_factory(int) +is_bool = is_type_factory(bool) +is_float = is_type_factory(float) +is_str = is_type_factory(str) +is_text = is_instance_factory((str, bytes)) + + +def is_callable(obj) -> bool: + """ + + Parameters + ---------- + `obj` - the object to be checked + + Returns + ------- + validator - returns True if object is callable + raises ValueError otherwise. + + """ + if not callable(obj): + raise ValueError("Value must be a callable") + return True diff --git a/.local/lib/python3.8/site-packages/pandas/_config/dates.py b/.local/lib/python3.8/site-packages/pandas/_config/dates.py new file mode 100644 index 0000000..5bf2b49 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_config/dates.py @@ -0,0 +1,23 @@ +""" +config for datetime formatting +""" +from pandas._config import config as cf + +pc_date_dayfirst_doc = """ +: boolean + When True, prints and parses dates with the day first, eg 20/01/2005 +""" + +pc_date_yearfirst_doc = """ +: boolean + When True, prints and parses dates with the year first, eg 2005/01/20 +""" + +with cf.config_prefix("display"): + # Needed upstream of `_libs` because these are used in tslibs.parsing + cf.register_option( + "date_dayfirst", False, pc_date_dayfirst_doc, validator=cf.is_bool + ) + cf.register_option( + "date_yearfirst", False, pc_date_yearfirst_doc, validator=cf.is_bool + ) diff --git a/.local/lib/python3.8/site-packages/pandas/_config/display.py b/.local/lib/python3.8/site-packages/pandas/_config/display.py new file mode 100644 index 0000000..df2c3ad --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_config/display.py @@ -0,0 +1,62 @@ +""" +Unopinionated display configuration. +""" + +from __future__ import annotations + +import locale +import sys + +from pandas._config import config as cf + +# ----------------------------------------------------------------------------- +# Global formatting options +_initial_defencoding: str | None = None + + +def detect_console_encoding() -> str: + """ + Try to find the most capable encoding supported by the console. + slightly modified from the way IPython handles the same issue. + """ + global _initial_defencoding + + encoding = None + try: + encoding = sys.stdout.encoding or sys.stdin.encoding + except (AttributeError, OSError): + pass + + # try again for something better + if not encoding or "ascii" in encoding.lower(): + try: + encoding = locale.getpreferredencoding() + except locale.Error: + # can be raised by locale.setlocale(), which is + # called by getpreferredencoding + # (on some systems, see stdlib locale docs) + pass + + # when all else fails. this will usually be "ascii" + if not encoding or "ascii" in encoding.lower(): + encoding = sys.getdefaultencoding() + + # GH#3360, save the reported defencoding at import time + # MPL backends may change it. Make available for debugging. + if not _initial_defencoding: + _initial_defencoding = sys.getdefaultencoding() + + return encoding + + +pc_encoding_doc = """ +: str/unicode + Defaults to the detected encoding of the console. + Specifies the encoding to be used for strings returned by to_string, + these are generally strings meant to be displayed on the console. +""" + +with cf.config_prefix("display"): + cf.register_option( + "encoding", detect_console_encoding(), pc_encoding_doc, validator=cf.is_text + ) diff --git a/.local/lib/python3.8/site-packages/pandas/_config/localization.py b/.local/lib/python3.8/site-packages/pandas/_config/localization.py new file mode 100644 index 0000000..2a487fa --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_config/localization.py @@ -0,0 +1,178 @@ +""" +Helpers for configuring locale settings. + +Name `localization` is chosen to avoid overlap with builtin `locale` module. +""" +from __future__ import annotations + +from contextlib import contextmanager +import locale +import re +import subprocess +from typing import ( + Callable, + Iterator, +) + +from pandas._config.config import options + + +@contextmanager +def set_locale( + new_locale: str | tuple[str, str], lc_var: int = locale.LC_ALL +) -> Iterator[str | tuple[str, str]]: + """ + Context manager for temporarily setting a locale. + + Parameters + ---------- + new_locale : str or tuple + A string of the form .. For example to set + the current locale to US English with a UTF8 encoding, you would pass + "en_US.UTF-8". + lc_var : int, default `locale.LC_ALL` + The category of the locale being set. + + Notes + ----- + This is useful when you want to run a particular block of code under a + particular locale, without globally setting the locale. This probably isn't + thread-safe. + """ + current_locale = locale.getlocale() + + try: + locale.setlocale(lc_var, new_locale) + normalized_locale = locale.getlocale() + if all(x is not None for x in normalized_locale): + yield ".".join(normalized_locale) + else: + yield new_locale + finally: + locale.setlocale(lc_var, current_locale) + + +def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool: + """ + Check to see if we can set a locale, and subsequently get the locale, + without raising an Exception. + + Parameters + ---------- + lc : str + The locale to attempt to set. + lc_var : int, default `locale.LC_ALL` + The category of the locale being set. + + Returns + ------- + bool + Whether the passed locale can be set + """ + try: + with set_locale(lc, lc_var=lc_var): + pass + except (ValueError, locale.Error): + # horrible name for a Exception subclass + return False + else: + return True + + +def _valid_locales(locales: list[str] | str, normalize: bool) -> list[str]: + """ + Return a list of normalized locales that do not throw an ``Exception`` + when set. + + Parameters + ---------- + locales : str + A string where each locale is separated by a newline. + normalize : bool + Whether to call ``locale.normalize`` on each locale. + + Returns + ------- + valid_locales : list + A list of valid locales. + """ + return [ + loc + for loc in ( + locale.normalize(loc.strip()) if normalize else loc.strip() + for loc in locales + ) + if can_set_locale(loc) + ] + + +def _default_locale_getter() -> bytes: + return subprocess.check_output(["locale -a"], shell=True) + + +def get_locales( + prefix: str | None = None, + normalize: bool = True, + locale_getter: Callable[[], bytes] = _default_locale_getter, +) -> list[str] | None: + """ + Get all the locales that are available on the system. + + Parameters + ---------- + prefix : str + If not ``None`` then return only those locales with the prefix + provided. For example to get all English language locales (those that + start with ``"en"``), pass ``prefix="en"``. + normalize : bool + Call ``locale.normalize`` on the resulting list of available locales. + If ``True``, only locales that can be set without throwing an + ``Exception`` are returned. + locale_getter : callable + The function to use to retrieve the current locales. This should return + a string with each locale separated by a newline character. + + Returns + ------- + locales : list of strings + A list of locale strings that can be set with ``locale.setlocale()``. + For example:: + + locale.setlocale(locale.LC_ALL, locale_string) + + On error will return None (no locale available, e.g. Windows) + + """ + try: + raw_locales = locale_getter() + except subprocess.CalledProcessError: + # Raised on (some? all?) Windows platforms because Note: "locale -a" + # is not defined + return None + + try: + # raw_locales is "\n" separated list of locales + # it may contain non-decodable parts, so split + # extract what we can and then rejoin. + split_raw_locales = raw_locales.split(b"\n") + out_locales = [] + for x in split_raw_locales: + try: + out_locales.append(str(x, encoding=options.display.encoding)) + except UnicodeError: + # 'locale -a' is used to populated 'raw_locales' and on + # Redhat 7 Linux (and maybe others) prints locale names + # using windows-1252 encoding. Bug only triggered by + # a few special characters and when there is an + # extensive list of installed locales. + out_locales.append(str(x, encoding="windows-1252")) + + except TypeError: + pass + + if prefix is None: + return _valid_locales(out_locales, normalize) + + pattern = re.compile(f"{prefix}.*") + found = pattern.findall("\n".join(out_locales)) + return _valid_locales(found, normalize) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/__init__.py b/.local/lib/python3.8/site-packages/pandas/_libs/__init__.py new file mode 100644 index 0000000..f119e28 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/__init__.py @@ -0,0 +1,22 @@ +__all__ = [ + "NaT", + "NaTType", + "OutOfBoundsDatetime", + "Period", + "Timedelta", + "Timestamp", + "iNaT", + "Interval", +] + + +from pandas._libs.interval import Interval +from pandas._libs.tslibs import ( + NaT, + NaTType, + OutOfBoundsDatetime, + Period, + Timedelta, + Timestamp, + iNaT, +) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_libs/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..6da265b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/algos.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/algos.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..145fa70 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/algos.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/algos.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/algos.pxd new file mode 100644 index 0000000..fdeff2e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/algos.pxd @@ -0,0 +1,12 @@ +from pandas._libs.dtypes cimport numeric_t + + +cdef numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) nogil + +cdef enum TiebreakEnumType: + TIEBREAK_AVERAGE + TIEBREAK_MIN, + TIEBREAK_MAX + TIEBREAK_FIRST + TIEBREAK_FIRST_DESCENDING + TIEBREAK_DENSE diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/algos.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/algos.pyi new file mode 100644 index 0000000..df8ac3f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/algos.pyi @@ -0,0 +1,446 @@ +from __future__ import annotations + +from typing import Any + +import numpy as np + +from pandas._typing import npt + +class Infinity: + """ + Provide a positive Infinity comparison method for ranking. + """ + + def __eq__(self, other) -> bool: ... + def __ne__(self, other) -> bool: ... + def __lt__(self, other) -> bool: ... + def __le__(self, other) -> bool: ... + def __gt__(self, other) -> bool: ... + def __ge__(self, other) -> bool: ... + +class NegInfinity: + """ + Provide a negative Infinity comparison method for ranking. + """ + + def __eq__(self, other) -> bool: ... + def __ne__(self, other) -> bool: ... + def __lt__(self, other) -> bool: ... + def __le__(self, other) -> bool: ... + def __gt__(self, other) -> bool: ... + def __ge__(self, other) -> bool: ... + +def unique_deltas( + arr: np.ndarray, # const int64_t[:] +) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1] +def is_lexsorted(list_of_arrays: list[npt.NDArray[np.int64]]) -> bool: ... +def groupsort_indexer( + index: np.ndarray, # const int64_t[:] + ngroups: int, +) -> tuple[ + np.ndarray, # ndarray[int64_t, ndim=1] + np.ndarray, # ndarray[int64_t, ndim=1] +]: ... +def kth_smallest( + a: np.ndarray, # numeric[:] + k: int, +) -> Any: ... # numeric + +# ---------------------------------------------------------------------- +# Pairwise correlation/covariance + +def nancorr( + mat: npt.NDArray[np.float64], # const float64_t[:, :] + cov: bool = ..., + minp: int | None = ..., +) -> npt.NDArray[np.float64]: ... # ndarray[float64_t, ndim=2] +def nancorr_spearman( + mat: npt.NDArray[np.float64], # ndarray[float64_t, ndim=2] + minp: int = ..., +) -> npt.NDArray[np.float64]: ... # ndarray[float64_t, ndim=2] + +# ---------------------------------------------------------------------- + +# ctypedef fused algos_t: +# float64_t +# float32_t +# object +# int64_t +# int32_t +# int16_t +# int8_t +# uint64_t +# uint32_t +# uint16_t +# uint8_t + +def validate_limit(nobs: int | None, limit=...) -> int: ... +def pad( + old: np.ndarray, # ndarray[algos_t] + new: np.ndarray, # ndarray[algos_t] + limit=..., +) -> npt.NDArray[np.intp]: ... # np.ndarray[np.intp, ndim=1] +def pad_inplace( + values: np.ndarray, # algos_t[:] + mask: np.ndarray, # uint8_t[:] + limit=..., +) -> None: ... +def pad_2d_inplace( + values: np.ndarray, # algos_t[:, :] + mask: np.ndarray, # const uint8_t[:, :] + limit=..., +) -> None: ... +def backfill( + old: np.ndarray, # ndarray[algos_t] + new: np.ndarray, # ndarray[algos_t] + limit=..., +) -> npt.NDArray[np.intp]: ... # np.ndarray[np.intp, ndim=1] +def backfill_inplace( + values: np.ndarray, # algos_t[:] + mask: np.ndarray, # uint8_t[:] + limit=..., +) -> None: ... +def backfill_2d_inplace( + values: np.ndarray, # algos_t[:, :] + mask: np.ndarray, # const uint8_t[:, :] + limit=..., +) -> None: ... +def is_monotonic( + arr: np.ndarray, # ndarray[algos_t, ndim=1] + timelike: bool, +) -> tuple[bool, bool, bool]: ... + +# ---------------------------------------------------------------------- +# rank_1d, rank_2d +# ---------------------------------------------------------------------- + +# ctypedef fused rank_t: +# object +# float64_t +# uint64_t +# int64_t + +def rank_1d( + values: np.ndarray, # ndarray[rank_t, ndim=1] + labels: np.ndarray | None = ..., # const int64_t[:]=None + is_datetimelike: bool = ..., + ties_method=..., + ascending: bool = ..., + pct: bool = ..., + na_option=..., +) -> np.ndarray: ... # np.ndarray[float64_t, ndim=1] +def rank_2d( + in_arr: np.ndarray, # ndarray[rank_t, ndim=2] + axis: int = ..., + is_datetimelike: bool = ..., + ties_method=..., + ascending: bool = ..., + na_option=..., + pct: bool = ..., +) -> np.ndarray: ... # np.ndarray[float64_t, ndim=1] +def diff_2d( + arr: np.ndarray, # ndarray[diff_t, ndim=2] + out: np.ndarray, # ndarray[out_t, ndim=2] + periods: int, + axis: int, + datetimelike: bool = ..., +) -> None: ... +def ensure_platform_int(arr: object) -> npt.NDArray[np.intp]: ... +def ensure_object(arr: object) -> npt.NDArray[np.object_]: ... +def ensure_complex64(arr: object, copy=...) -> npt.NDArray[np.complex64]: ... +def ensure_complex128(arr: object, copy=...) -> npt.NDArray[np.complex128]: ... +def ensure_float64(arr: object, copy=...) -> npt.NDArray[np.float64]: ... +def ensure_float32(arr: object, copy=...) -> npt.NDArray[np.float32]: ... +def ensure_int8(arr: object, copy=...) -> npt.NDArray[np.int8]: ... +def ensure_int16(arr: object, copy=...) -> npt.NDArray[np.int16]: ... +def ensure_int32(arr: object, copy=...) -> npt.NDArray[np.int32]: ... +def ensure_int64(arr: object, copy=...) -> npt.NDArray[np.int64]: ... +def ensure_uint8(arr: object, copy=...) -> npt.NDArray[np.uint8]: ... +def ensure_uint16(arr: object, copy=...) -> npt.NDArray[np.uint16]: ... +def ensure_uint32(arr: object, copy=...) -> npt.NDArray[np.uint32]: ... +def ensure_uint64(arr: object, copy=...) -> npt.NDArray[np.uint64]: ... +def take_1d_int8_int8( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int8_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int8_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int8_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int16_int16( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int16_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int16_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int16_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int32_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int32_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int32_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int64_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int64_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_float32_float32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_float32_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_float64_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_object_object( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_bool_bool( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_bool_object( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int8_int8( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int8_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int8_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int8_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int16_int16( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int16_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int16_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int16_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int32_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int32_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int32_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int64_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int64_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_float32_float32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_float32_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_float64_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_object_object( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_bool_bool( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_bool_object( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int8_int8( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int8_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int8_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int8_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int16_int16( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int16_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int16_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int16_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int32_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int32_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int32_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int64_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int64_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_float32_float32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_float32_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_float64_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_object_object( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_bool_bool( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_bool_object( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_multi_int8_int8( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int8_int32( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int8_int64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int8_float64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int16_int16( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int16_int32( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int16_int64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int16_float64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int32_int32( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int32_int64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int32_float64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int64_float64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_float32_float32( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_float32_float64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_float64_float64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_object_object( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_bool_bool( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_bool_object( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int64_int64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/algos.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/algos.pyx new file mode 100644 index 0000000..1b33b90 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/algos.pyx @@ -0,0 +1,1478 @@ +import cython +from cython import Py_ssize_t + +from libc.math cimport ( + fabs, + sqrt, +) +from libc.stdlib cimport ( + free, + malloc, +) +from libc.string cimport memmove + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + NPY_COMPLEX64, + NPY_COMPLEX128, + NPY_FLOAT32, + NPY_FLOAT64, + NPY_INT8, + NPY_INT16, + NPY_INT32, + NPY_INT64, + NPY_OBJECT, + NPY_UINT8, + NPY_UINT16, + NPY_UINT32, + NPY_UINT64, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + intp_t, + ndarray, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + +cnp.import_array() + +cimport pandas._libs.util as util +from pandas._libs.dtypes cimport ( + iu_64_floating_obj_t, + numeric_object_t, + numeric_t, +) +from pandas._libs.khash cimport ( + kh_destroy_int64, + kh_get_int64, + kh_init_int64, + kh_int64_t, + kh_put_int64, + kh_resize_int64, + khiter_t, +) +from pandas._libs.util cimport get_nat + +import pandas._libs.missing as missing + +cdef: + float64_t FP_ERR = 1e-13 + float64_t NaN = np.NaN + int64_t NPY_NAT = get_nat() + + +tiebreakers = { + "average": TIEBREAK_AVERAGE, + "min": TIEBREAK_MIN, + "max": TIEBREAK_MAX, + "first": TIEBREAK_FIRST, + "dense": TIEBREAK_DENSE, +} + + +cdef inline bint are_diff(object left, object right): + try: + return fabs(left - right) > FP_ERR + except TypeError: + return left != right + + +class Infinity: + """ + Provide a positive Infinity comparison method for ranking. + """ + __lt__ = lambda self, other: False + __le__ = lambda self, other: isinstance(other, Infinity) + __eq__ = lambda self, other: isinstance(other, Infinity) + __ne__ = lambda self, other: not isinstance(other, Infinity) + __gt__ = lambda self, other: (not isinstance(other, Infinity) and + not missing.checknull(other)) + __ge__ = lambda self, other: not missing.checknull(other) + + +class NegInfinity: + """ + Provide a negative Infinity comparison method for ranking. + """ + __lt__ = lambda self, other: (not isinstance(other, NegInfinity) and + not missing.checknull(other)) + __le__ = lambda self, other: not missing.checknull(other) + __eq__ = lambda self, other: isinstance(other, NegInfinity) + __ne__ = lambda self, other: not isinstance(other, NegInfinity) + __gt__ = lambda self, other: False + __ge__ = lambda self, other: isinstance(other, NegInfinity) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr): + """ + Efficiently find the unique first-differences of the given array. + + Parameters + ---------- + arr : ndarray[int64_t] + + Returns + ------- + ndarray[int64_t] + An ordered ndarray[int64_t] + """ + cdef: + Py_ssize_t i, n = len(arr) + int64_t val + khiter_t k + kh_int64_t *table + int ret = 0 + list uniques = [] + ndarray[int64_t, ndim=1] result + + table = kh_init_int64() + kh_resize_int64(table, 10) + for i in range(n - 1): + val = arr[i + 1] - arr[i] + k = kh_get_int64(table, val) + if k == table.n_buckets: + kh_put_int64(table, val, &ret) + uniques.append(val) + kh_destroy_int64(table) + + result = np.array(uniques, dtype=np.int64) + result.sort() + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +def is_lexsorted(list_of_arrays: list) -> bint: + cdef: + Py_ssize_t i + Py_ssize_t n, nlevels + int64_t k, cur, pre + ndarray arr + bint result = True + + nlevels = len(list_of_arrays) + n = len(list_of_arrays[0]) + + cdef int64_t **vecs = malloc(nlevels * sizeof(int64_t*)) + for i in range(nlevels): + arr = list_of_arrays[i] + assert arr.dtype.name == 'int64' + vecs[i] = cnp.PyArray_DATA(arr) + + # Assume uniqueness?? + with nogil: + for i in range(1, n): + for k in range(nlevels): + cur = vecs[k][i] + pre = vecs[k][i -1] + if cur == pre: + continue + elif cur > pre: + break + else: + result = False + break + free(vecs) + return result + + +@cython.boundscheck(False) +@cython.wraparound(False) +def groupsort_indexer(const intp_t[:] index, Py_ssize_t ngroups): + """ + Compute a 1-d indexer. + + The indexer is an ordering of the passed index, + ordered by the groups. + + Parameters + ---------- + index: np.ndarray[np.intp] + Mappings from group -> position. + ngroups: int64 + Number of groups. + + Returns + ------- + ndarray[intp_t, ndim=1] + Indexer + ndarray[intp_t, ndim=1] + Group Counts + + Notes + ----- + This is a reverse of the label factorization process. + """ + cdef: + Py_ssize_t i, label, n + intp_t[::1] indexer, where, counts + + counts = np.zeros(ngroups + 1, dtype=np.intp) + n = len(index) + indexer = np.zeros(n, dtype=np.intp) + where = np.zeros(ngroups + 1, dtype=np.intp) + + with nogil: + + # count group sizes, location 0 for NA + for i in range(n): + counts[index[i] + 1] += 1 + + # mark the start of each contiguous group of like-indexed data + for i in range(1, ngroups + 1): + where[i] = where[i - 1] + counts[i - 1] + + # this is our indexer + for i in range(n): + label = index[i] + 1 + indexer[where[label]] = i + where[label] += 1 + + return indexer.base, counts.base + + +cdef inline Py_ssize_t swap(numeric_t *a, numeric_t *b) nogil: + cdef: + numeric_t t + + # cython doesn't allow pointer dereference so use array syntax + t = a[0] + a[0] = b[0] + b[0] = t + return 0 + + +cdef inline numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) nogil: + """ + See kth_smallest.__doc__. The additional parameter n specifies the maximum + number of elements considered in arr, needed for compatibility with usage + in groupby.pyx + """ + cdef: + Py_ssize_t i, j, left, m + numeric_t x + + left = 0 + m = n - 1 + + while left < m: + x = arr[k] + i = left + j = m + + while 1: + while arr[i] < x: + i += 1 + while x < arr[j]: + j -= 1 + if i <= j: + swap(&arr[i], &arr[j]) + i += 1 + j -= 1 + + if i > j: + break + + if j < k: + left = i + if k < i: + m = j + return arr[k] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def kth_smallest(numeric_t[::1] arr, Py_ssize_t k) -> numeric_t: + """ + Compute the kth smallest value in arr. Note that the input + array will be modified. + + Parameters + ---------- + arr : numeric[::1] + Array to compute the kth smallest value for, must be + contiguous + k : Py_ssize_t + + Returns + ------- + numeric + The kth smallest value in arr + """ + cdef: + numeric_t result + + with nogil: + result = kth_smallest_c(&arr[0], k, arr.shape[0]) + + return result + + +# ---------------------------------------------------------------------- +# Pairwise correlation/covariance + + +@cython.boundscheck(False) +@cython.wraparound(False) +def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None): + cdef: + Py_ssize_t i, j, xi, yi, N, K + bint minpv + float64_t[:, ::1] result + ndarray[uint8_t, ndim=2] mask + int64_t nobs = 0 + float64_t vx, vy, dx, dy, meanx, meany, divisor, ssqdmx, ssqdmy, covxy + + N, K = (mat).shape + + if minp is None: + minpv = 1 + else: + minpv = minp + + result = np.empty((K, K), dtype=np.float64) + mask = np.isfinite(mat).view(np.uint8) + + with nogil: + for xi in range(K): + for yi in range(xi + 1): + # Welford's method for the variance-calculation + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + nobs = ssqdmx = ssqdmy = covxy = meanx = meany = 0 + for i in range(N): + if mask[i, xi] and mask[i, yi]: + vx = mat[i, xi] + vy = mat[i, yi] + nobs += 1 + dx = vx - meanx + dy = vy - meany + meanx += 1 / nobs * dx + meany += 1 / nobs * dy + ssqdmx += (vx - meanx) * dx + ssqdmy += (vy - meany) * dy + covxy += (vx - meanx) * dy + + if nobs < minpv: + result[xi, yi] = result[yi, xi] = NaN + else: + divisor = (nobs - 1.0) if cov else sqrt(ssqdmx * ssqdmy) + + if divisor != 0: + result[xi, yi] = result[yi, xi] = covxy / divisor + else: + result[xi, yi] = result[yi, xi] = NaN + + return result.base + +# ---------------------------------------------------------------------- +# Pairwise Spearman correlation + + +@cython.boundscheck(False) +@cython.wraparound(False) +def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarray: + cdef: + Py_ssize_t i, j, xi, yi, N, K + ndarray[float64_t, ndim=2] result + ndarray[float64_t, ndim=2] ranked_mat + ndarray[float64_t, ndim=1] rankedx, rankedy + float64_t[::1] maskedx, maskedy + ndarray[uint8_t, ndim=2] mask + int64_t nobs = 0 + bint no_nans + float64_t vx, vy, sumx, sumxx, sumyy, mean, divisor + + N, K = (mat).shape + + # Handle the edge case where we know all results will be nan + # to keep conditional logic inside loop simpler + if N < minp: + result = np.full((K, K), np.nan, dtype=np.float64) + return result + + result = np.empty((K, K), dtype=np.float64) + mask = np.isfinite(mat).view(np.uint8) + no_nans = mask.all() + + ranked_mat = np.empty((N, K), dtype=np.float64) + + # Note: we index into maskedx, maskedy in loops up to nobs, but using N is safe + # here since N >= nobs and values are stored contiguously + maskedx = np.empty(N, dtype=np.float64) + maskedy = np.empty(N, dtype=np.float64) + for i in range(K): + ranked_mat[:, i] = rank_1d(mat[:, i]) + + with nogil: + for xi in range(K): + for yi in range(xi + 1): + sumx = sumxx = sumyy = 0 + + # Fastpath for data with no nans/infs, allows avoiding mask checks + # and array reassignments + if no_nans: + mean = (N + 1) / 2. + + # now the cov numerator + for i in range(N): + vx = ranked_mat[i, xi] - mean + vy = ranked_mat[i, yi] - mean + + sumx += vx * vy + sumxx += vx * vx + sumyy += vy * vy + else: + nobs = 0 + # Keep track of whether we need to recompute ranks + all_ranks = True + for i in range(N): + all_ranks &= not (mask[i, xi] ^ mask[i, yi]) + if mask[i, xi] and mask[i, yi]: + maskedx[nobs] = ranked_mat[i, xi] + maskedy[nobs] = ranked_mat[i, yi] + nobs += 1 + + if nobs < minp: + result[xi, yi] = result[yi, xi] = NaN + continue + else: + if not all_ranks: + with gil: + # We need to slice back to nobs because rank_1d will + # require arrays of nobs length + rankedx = rank_1d(np.asarray(maskedx)[:nobs]) + rankedy = rank_1d(np.asarray(maskedy)[:nobs]) + for i in range(nobs): + maskedx[i] = rankedx[i] + maskedy[i] = rankedy[i] + + mean = (nobs + 1) / 2. + + # now the cov numerator + for i in range(nobs): + vx = maskedx[i] - mean + vy = maskedy[i] - mean + + sumx += vx * vy + sumxx += vx * vx + sumyy += vy * vy + + divisor = sqrt(sumxx * sumyy) + + if divisor != 0: + result[xi, yi] = result[yi, xi] = sumx / divisor + else: + result[xi, yi] = result[yi, xi] = NaN + + return result + + +# ---------------------------------------------------------------------- + +def validate_limit(nobs: int | None, limit=None) -> int: + """ + Check that the `limit` argument is a positive integer. + + Parameters + ---------- + nobs : int + limit : object + + Returns + ------- + int + The limit. + """ + if limit is None: + lim = nobs + else: + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') + lim = limit + + return lim + + +@cython.boundscheck(False) +@cython.wraparound(False) +def pad( + ndarray[numeric_object_t] old, + ndarray[numeric_object_t] new, + limit=None +) -> ndarray: + # -> ndarray[intp_t, ndim=1] + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[intp_t, ndim=1] indexer + numeric_object_t cur, next_val + int lim, fill_count = 0 + + nleft = len(old) + nright = len(new) + indexer = np.empty(nright, dtype=np.intp) + indexer[:] = -1 + + lim = validate_limit(nright, limit) + + if nleft == 0 or nright == 0 or new[nright - 1] < old[0]: + return indexer + + i = j = 0 + + cur = old[0] + + while j <= nright - 1 and new[j] < cur: + j += 1 + + while True: + if j == nright: + break + + if i == nleft - 1: + while j < nright: + if new[j] == cur: + indexer[j] = i + elif new[j] > cur and fill_count < lim: + indexer[j] = i + fill_count += 1 + j += 1 + break + + next_val = old[i + 1] + + while j < nright and cur <= new[j] < next_val: + if new[j] == cur: + indexer[j] = i + elif fill_count < lim: + indexer[j] = i + fill_count += 1 + j += 1 + + fill_count = 0 + i += 1 + cur = next_val + + return indexer + + +@cython.boundscheck(False) +@cython.wraparound(False) +def pad_inplace(numeric_object_t[:] values, uint8_t[:] mask, limit=None): + cdef: + Py_ssize_t i, N + numeric_object_t val + uint8_t prev_mask + int lim, fill_count = 0 + + N = len(values) + + # GH#2778 + if N == 0: + return + + lim = validate_limit(N, limit) + + val = values[0] + prev_mask = mask[0] + for i in range(N): + if mask[i]: + if fill_count >= lim: + continue + fill_count += 1 + values[i] = val + mask[i] = prev_mask + else: + fill_count = 0 + val = values[i] + prev_mask = mask[i] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def pad_2d_inplace(numeric_object_t[:, :] values, uint8_t[:, :] mask, limit=None): + cdef: + Py_ssize_t i, j, N, K + numeric_object_t val + int lim, fill_count = 0 + + K, N = (values).shape + + # GH#2778 + if N == 0: + return + + lim = validate_limit(N, limit) + + for j in range(K): + fill_count = 0 + val = values[j, 0] + for i in range(N): + if mask[j, i]: + if fill_count >= lim or i == 0: + continue + fill_count += 1 + values[j, i] = val + mask[j, i] = False + else: + fill_count = 0 + val = values[j, i] + + +""" +Backfilling logic for generating fill vector + +Diagram of what's going on + +Old New Fill vector Mask + . 0 1 + . 0 1 + . 0 1 +A A 0 1 + . 1 1 + . 1 1 + . 1 1 + . 1 1 + . 1 1 +B B 1 1 + . 2 1 + . 2 1 + . 2 1 +C C 2 1 + . 0 + . 0 +D +""" + + +@cython.boundscheck(False) +@cython.wraparound(False) +def backfill( + ndarray[numeric_object_t] old, + ndarray[numeric_object_t] new, + limit=None +) -> ndarray: + # -> ndarray[intp_t, ndim=1] + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[intp_t, ndim=1] indexer + numeric_object_t cur, prev + int lim, fill_count = 0 + + nleft = len(old) + nright = len(new) + indexer = np.empty(nright, dtype=np.intp) + indexer[:] = -1 + + lim = validate_limit(nright, limit) + + if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]: + return indexer + + i = nleft - 1 + j = nright - 1 + + cur = old[nleft - 1] + + while j >= 0 and new[j] > cur: + j -= 1 + + while True: + if j < 0: + break + + if i == 0: + while j >= 0: + if new[j] == cur: + indexer[j] = i + elif new[j] < cur and fill_count < lim: + indexer[j] = i + fill_count += 1 + j -= 1 + break + + prev = old[i - 1] + + while j >= 0 and prev < new[j] <= cur: + if new[j] == cur: + indexer[j] = i + elif new[j] < cur and fill_count < lim: + indexer[j] = i + fill_count += 1 + j -= 1 + + fill_count = 0 + i -= 1 + cur = prev + + return indexer + + +def backfill_inplace(numeric_object_t[:] values, uint8_t[:] mask, limit=None): + pad_inplace(values[::-1], mask[::-1], limit=limit) + + +def backfill_2d_inplace(numeric_object_t[:, :] values, + uint8_t[:, :] mask, + limit=None): + pad_2d_inplace(values[:, ::-1], mask[:, ::-1], limit) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike): + """ + Returns + ------- + tuple + is_monotonic_inc : bool + is_monotonic_dec : bool + is_unique : bool + """ + cdef: + Py_ssize_t i, n + numeric_object_t prev, cur + bint is_monotonic_inc = 1 + bint is_monotonic_dec = 1 + bint is_unique = 1 + bint is_strict_monotonic = 1 + + n = len(arr) + + if n == 1: + if arr[0] != arr[0] or (timelike and arr[0] == NPY_NAT): + # single value is NaN + return False, False, True + else: + return True, True, True + elif n < 2: + return True, True, True + + if timelike and arr[0] == NPY_NAT: + return False, False, True + + if numeric_object_t is not object: + with nogil: + prev = arr[0] + for i in range(1, n): + cur = arr[i] + if timelike and cur == NPY_NAT: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if cur < prev: + is_monotonic_inc = 0 + elif cur > prev: + is_monotonic_dec = 0 + elif cur == prev: + is_unique = 0 + else: + # cur or prev is NaN + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if not is_monotonic_inc and not is_monotonic_dec: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + prev = cur + else: + # object-dtype, identical to above except we cannot use `with nogil` + prev = arr[0] + for i in range(1, n): + cur = arr[i] + if timelike and cur == NPY_NAT: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if cur < prev: + is_monotonic_inc = 0 + elif cur > prev: + is_monotonic_dec = 0 + elif cur == prev: + is_unique = 0 + else: + # cur or prev is NaN + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if not is_monotonic_inc and not is_monotonic_dec: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + prev = cur + + is_strict_monotonic = is_unique and (is_monotonic_inc or is_monotonic_dec) + return is_monotonic_inc, is_monotonic_dec, is_strict_monotonic + + +# ---------------------------------------------------------------------- +# rank_1d, rank_2d +# ---------------------------------------------------------------------- + +cdef iu_64_floating_obj_t get_rank_nan_fill_val( + bint rank_nans_highest, + iu_64_floating_obj_t[:] _=None +): + """ + Return the value we'll use to represent missing values when sorting depending + on if we'd like missing values to end up at the top/bottom. (The second parameter + is unused, but needed for fused type specialization) + """ + if rank_nans_highest: + if iu_64_floating_obj_t is object: + return Infinity() + elif iu_64_floating_obj_t is int64_t: + return util.INT64_MAX + elif iu_64_floating_obj_t is uint64_t: + return util.UINT64_MAX + else: + return np.inf + else: + if iu_64_floating_obj_t is object: + return NegInfinity() + elif iu_64_floating_obj_t is int64_t: + return NPY_NAT + elif iu_64_floating_obj_t is uint64_t: + return 0 + else: + return -np.inf + + +@cython.wraparound(False) +@cython.boundscheck(False) +def rank_1d( + ndarray[iu_64_floating_obj_t, ndim=1] values, + const intp_t[:] labels=None, + bint is_datetimelike=False, + ties_method="average", + bint ascending=True, + bint pct=False, + na_option="keep", +): + """ + Fast NaN-friendly version of ``scipy.stats.rankdata``. + + Parameters + ---------- + values : array of iu_64_floating_obj_t values to be ranked + labels : np.ndarray[np.intp] or None + Array containing unique label for each group, with its ordering + matching up to the corresponding record in `values`. If not called + from a groupby operation, will be None. + is_datetimelike : bool, default False + True if `values` contains datetime-like entries. + ties_method : {'average', 'min', 'max', 'first', 'dense'}, default + 'average' + * average: average rank of group + * min: lowest rank in group + * max: highest rank in group + * first: ranks assigned in order they appear in the array + * dense: like 'min', but rank always increases by 1 between groups + ascending : bool, default True + False for ranks by high (1) to low (N) + na_option : {'keep', 'top', 'bottom'}, default 'keep' + pct : bool, default False + Compute percentage rank of data within each group + na_option : {'keep', 'top', 'bottom'}, default 'keep' + * keep: leave NA values where they are + * top: smallest rank if ascending + * bottom: smallest rank if descending + """ + cdef: + TiebreakEnumType tiebreak + Py_ssize_t N + int64_t[::1] grp_sizes + intp_t[:] lexsort_indexer + float64_t[::1] out + ndarray[iu_64_floating_obj_t, ndim=1] masked_vals + iu_64_floating_obj_t[:] masked_vals_memview + uint8_t[:] mask + bint keep_na, nans_rank_highest, check_labels, check_mask + iu_64_floating_obj_t nan_fill_val + + tiebreak = tiebreakers[ties_method] + if tiebreak == TIEBREAK_FIRST: + if not ascending: + tiebreak = TIEBREAK_FIRST_DESCENDING + + keep_na = na_option == 'keep' + + N = len(values) + if labels is not None: + # TODO(cython3): cast won't be necessary (#2992) + assert len(labels) == N + out = np.empty(N) + grp_sizes = np.ones(N, dtype=np.int64) + + # If we don't care about labels, can short-circuit later label + # comparisons + check_labels = labels is not None + + # For cases where a mask is not possible, we can avoid mask checks + check_mask = not (iu_64_floating_obj_t is uint64_t or + (iu_64_floating_obj_t is int64_t and not is_datetimelike)) + + # Copy values into new array in order to fill missing data + # with mask, without obfuscating location of missing data + # in values array + if iu_64_floating_obj_t is object and values.dtype != np.object_: + masked_vals = values.astype('O') + else: + masked_vals = values.copy() + + if iu_64_floating_obj_t is object: + mask = missing.isnaobj(masked_vals) + elif iu_64_floating_obj_t is int64_t and is_datetimelike: + mask = (masked_vals == NPY_NAT).astype(np.uint8) + elif iu_64_floating_obj_t is float64_t: + mask = np.isnan(masked_vals).astype(np.uint8) + else: + mask = np.zeros(shape=len(masked_vals), dtype=np.uint8) + + # If `na_option == 'top'`, we want to assign the lowest rank + # to NaN regardless of ascending/descending. So if ascending, + # fill with lowest value of type to end up with lowest rank. + # If descending, fill with highest value since descending + # will flip the ordering to still end up with lowest rank. + # Symmetric logic applies to `na_option == 'bottom'` + nans_rank_highest = ascending ^ (na_option == 'top') + nan_fill_val = get_rank_nan_fill_val[iu_64_floating_obj_t](nans_rank_highest) + if nans_rank_highest: + order = [masked_vals, mask] + else: + order = [masked_vals, ~(np.asarray(mask))] + + if check_labels: + order.append(labels) + + np.putmask(masked_vals, mask, nan_fill_val) + # putmask doesn't accept a memoryview, so we assign as a separate step + masked_vals_memview = masked_vals + + # lexsort using labels, then mask, then actual values + # each label corresponds to a different group value, + # the mask helps you differentiate missing values before + # performing sort on the actual values + lexsort_indexer = np.lexsort(order).astype(np.intp, copy=False) + + if not ascending: + lexsort_indexer = lexsort_indexer[::-1] + + with nogil: + rank_sorted_1d( + out, + grp_sizes, + lexsort_indexer, + masked_vals_memview, + mask, + check_mask=check_mask, + N=N, + tiebreak=tiebreak, + keep_na=keep_na, + pct=pct, + labels=labels, + ) + + return np.asarray(out) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef void rank_sorted_1d( + float64_t[::1] out, + int64_t[::1] grp_sizes, + const intp_t[:] sort_indexer, + # Can make const with cython3 (https://github.com/cython/cython/issues/3222) + iu_64_floating_obj_t[:] masked_vals, + const uint8_t[:] mask, + bint check_mask, + Py_ssize_t N, + TiebreakEnumType tiebreak=TIEBREAK_AVERAGE, + bint keep_na=True, + bint pct=False, + # https://github.com/cython/cython/issues/1630, only trailing arguments can + # currently be omitted for cdef functions, which is why we keep this at the end + const intp_t[:] labels=None, +) nogil: + """ + See rank_1d.__doc__. Handles only actual ranking, so sorting and masking should + be handled in the caller. Note that `out` and `grp_sizes` are modified inplace. + + Parameters + ---------- + out : float64_t[::1] + Array to store computed ranks + grp_sizes : int64_t[::1] + Array to store group counts, only used if pct=True. Should only be None + if labels is None. + sort_indexer : intp_t[:] + Array of indices which sorts masked_vals + masked_vals : iu_64_floating_obj_t[:] + The values input to rank_1d, with missing values replaced by fill values + mask : uint8_t[:] + Array where entries are True if the value is missing, False otherwise. + check_mask : bool + If False, assumes the mask is all False to skip mask indexing + N : Py_ssize_t + The number of elements to rank. Note: it is not always true that + N == len(out) or N == len(masked_vals) (see `nancorr_spearman` usage for why) + tiebreak : TiebreakEnumType, default TIEBREAK_AVERAGE + See rank_1d.__doc__ for the different modes + keep_na : bool, default True + Whether or not to keep nulls + pct : bool, default False + Compute percentage rank of data within each group + labels : See rank_1d.__doc__, default None. None implies all labels are the same. + """ + + cdef: + Py_ssize_t i, j, dups=0, sum_ranks=0, + Py_ssize_t grp_start=0, grp_vals_seen=1, grp_na_count=0 + bint at_end, next_val_diff, group_changed, check_labels + int64_t grp_size + + check_labels = labels is not None + + # Loop over the length of the value array + # each incremental i value can be looked up in the lexsort_indexer + # array that we sorted previously, which gives us the location of + # that sorted value for retrieval back from the original + # values / masked_vals arrays + # TODO(cython3): de-duplicate once cython supports conditional nogil + if iu_64_floating_obj_t is object: + with gil: + for i in range(N): + at_end = i == N - 1 + + # dups and sum_ranks will be incremented each loop where + # the value / group remains the same, and should be reset + # when either of those change. Used to calculate tiebreakers + dups += 1 + sum_ranks += i - grp_start + 1 + + next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]], + masked_vals[sort_indexer[i+1]]) + + # We'll need this check later anyway to determine group size, so just + # compute it here since shortcircuiting won't help + group_changed = at_end or (check_labels and + (labels[sort_indexer[i]] + != labels[sort_indexer[i+1]])) + + # Update out only when there is a transition of values or labels. + # When a new value or group is encountered, go back #dups steps( + # the number of occurrence of current value) and assign the ranks + # based on the starting index of the current group (grp_start) + # and the current index + if (next_val_diff or group_changed or (check_mask and + (mask[sort_indexer[i]] + ^ mask[sort_indexer[i+1]]))): + + # If keep_na, check for missing values and assign back + # to the result where appropriate + if keep_na and check_mask and mask[sort_indexer[i]]: + grp_na_count = dups + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = NaN + elif tiebreak == TIEBREAK_AVERAGE: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = sum_ranks / dups + elif tiebreak == TIEBREAK_MIN: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = i - grp_start - dups + 2 + elif tiebreak == TIEBREAK_MAX: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = i - grp_start + 1 + + # With n as the previous rank in the group and m as the number + # of duplicates in this stretch, if TIEBREAK_FIRST and ascending, + # then rankings should be n + 1, n + 2 ... n + m + elif tiebreak == TIEBREAK_FIRST: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = j + 1 - grp_start + + # If TIEBREAK_FIRST and descending, the ranking should be + # n + m, n + (m - 1) ... n + 1. This is equivalent to + # (i - dups + 1) + (i - j + 1) - grp_start + elif tiebreak == TIEBREAK_FIRST_DESCENDING: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = 2 * i - j - dups + 2 - grp_start + elif tiebreak == TIEBREAK_DENSE: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = grp_vals_seen + + # Look forward to the next value (using the sorting in + # lexsort_indexer). If the value does not equal the current + # value then we need to reset the dups and sum_ranks, knowing + # that a new value is coming up. The conditional also needs + # to handle nan equality and the end of iteration. If group + # changes we do not record seeing a new value in the group + if not group_changed and (next_val_diff or (check_mask and + (mask[sort_indexer[i]] + ^ mask[sort_indexer[i+1]]))): + dups = sum_ranks = 0 + grp_vals_seen += 1 + + # Similar to the previous conditional, check now if we are + # moving to a new group. If so, keep track of the index where + # the new group occurs, so the tiebreaker calculations can + # decrement that from their position. Fill in the size of each + # group encountered (used by pct calculations later). Also be + # sure to reset any of the items helping to calculate dups + if group_changed: + + # If not dense tiebreak, group size used to compute + # percentile will be # of non-null elements in group + if tiebreak != TIEBREAK_DENSE: + grp_size = i - grp_start + 1 - grp_na_count + + # Otherwise, it will be the number of distinct values + # in the group, subtracting 1 if NaNs are present + # since that is a distinct value we shouldn't count + else: + grp_size = grp_vals_seen - (grp_na_count > 0) + + for j in range(grp_start, i + 1): + grp_sizes[sort_indexer[j]] = grp_size + + dups = sum_ranks = 0 + grp_na_count = 0 + grp_start = i + 1 + grp_vals_seen = 1 + else: + for i in range(N): + at_end = i == N - 1 + + # dups and sum_ranks will be incremented each loop where + # the value / group remains the same, and should be reset + # when either of those change. Used to calculate tiebreakers + dups += 1 + sum_ranks += i - grp_start + 1 + + next_val_diff = at_end or (masked_vals[sort_indexer[i]] + != masked_vals[sort_indexer[i+1]]) + + # We'll need this check later anyway to determine group size, so just + # compute it here since shortcircuiting won't help + group_changed = at_end or (check_labels and + (labels[sort_indexer[i]] + != labels[sort_indexer[i+1]])) + + # Update out only when there is a transition of values or labels. + # When a new value or group is encountered, go back #dups steps( + # the number of occurrence of current value) and assign the ranks + # based on the starting index of the current group (grp_start) + # and the current index + if (next_val_diff or group_changed + or (check_mask and + (mask[sort_indexer[i]] ^ mask[sort_indexer[i+1]]))): + + # If keep_na, check for missing values and assign back + # to the result where appropriate + if keep_na and check_mask and mask[sort_indexer[i]]: + grp_na_count = dups + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = NaN + elif tiebreak == TIEBREAK_AVERAGE: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = sum_ranks / dups + elif tiebreak == TIEBREAK_MIN: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = i - grp_start - dups + 2 + elif tiebreak == TIEBREAK_MAX: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = i - grp_start + 1 + + # With n as the previous rank in the group and m as the number + # of duplicates in this stretch, if TIEBREAK_FIRST and ascending, + # then rankings should be n + 1, n + 2 ... n + m + elif tiebreak == TIEBREAK_FIRST: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = j + 1 - grp_start + + # If TIEBREAK_FIRST and descending, the ranking should be + # n + m, n + (m - 1) ... n + 1. This is equivalent to + # (i - dups + 1) + (i - j + 1) - grp_start + elif tiebreak == TIEBREAK_FIRST_DESCENDING: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = 2 * i - j - dups + 2 - grp_start + elif tiebreak == TIEBREAK_DENSE: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = grp_vals_seen + + # Look forward to the next value (using the sorting in + # lexsort_indexer). If the value does not equal the current + # value then we need to reset the dups and sum_ranks, knowing + # that a new value is coming up. The conditional also needs + # to handle nan equality and the end of iteration. If group + # changes we do not record seeing a new value in the group + if not group_changed and (next_val_diff + or (check_mask and + (mask[sort_indexer[i]] + ^ mask[sort_indexer[i+1]]))): + dups = sum_ranks = 0 + grp_vals_seen += 1 + + # Similar to the previous conditional, check now if we are + # moving to a new group. If so, keep track of the index where + # the new group occurs, so the tiebreaker calculations can + # decrement that from their position. Fill in the size of each + # group encountered (used by pct calculations later). Also be + # sure to reset any of the items helping to calculate dups + if group_changed: + + # If not dense tiebreak, group size used to compute + # percentile will be # of non-null elements in group + if tiebreak != TIEBREAK_DENSE: + grp_size = i - grp_start + 1 - grp_na_count + + # Otherwise, it will be the number of distinct values + # in the group, subtracting 1 if NaNs are present + # since that is a distinct value we shouldn't count + else: + grp_size = grp_vals_seen - (grp_na_count > 0) + + for j in range(grp_start, i + 1): + grp_sizes[sort_indexer[j]] = grp_size + + dups = sum_ranks = 0 + grp_na_count = 0 + grp_start = i + 1 + grp_vals_seen = 1 + + if pct: + for i in range(N): + if grp_sizes[i] != 0: + out[i] = out[i] / grp_sizes[i] + + +def rank_2d( + ndarray[iu_64_floating_obj_t, ndim=2] in_arr, + int axis=0, + bint is_datetimelike=False, + ties_method="average", + bint ascending=True, + na_option="keep", + bint pct=False, +): + """ + Fast NaN-friendly version of ``scipy.stats.rankdata``. + """ + cdef: + Py_ssize_t k, n, col + float64_t[::1, :] out # Column-major so columns are contiguous + int64_t[::1] grp_sizes + ndarray[iu_64_floating_obj_t, ndim=2] values + iu_64_floating_obj_t[:, :] masked_vals + intp_t[:, :] sort_indexer + uint8_t[:, :] mask + TiebreakEnumType tiebreak + bint check_mask, keep_na, nans_rank_highest + iu_64_floating_obj_t nan_fill_val + + tiebreak = tiebreakers[ties_method] + if tiebreak == TIEBREAK_FIRST: + if not ascending: + tiebreak = TIEBREAK_FIRST_DESCENDING + + keep_na = na_option == 'keep' + + # For cases where a mask is not possible, we can avoid mask checks + check_mask = not (iu_64_floating_obj_t is uint64_t or + (iu_64_floating_obj_t is int64_t and not is_datetimelike)) + + if axis == 1: + values = np.asarray(in_arr).T.copy() + else: + values = np.asarray(in_arr).copy() + + if iu_64_floating_obj_t is object: + if values.dtype != np.object_: + values = values.astype('O') + + nans_rank_highest = ascending ^ (na_option == 'top') + if check_mask: + nan_fill_val = get_rank_nan_fill_val[iu_64_floating_obj_t](nans_rank_highest) + + if iu_64_floating_obj_t is object: + mask = missing.isnaobj2d(values).view(np.uint8) + elif iu_64_floating_obj_t is float64_t: + mask = np.isnan(values).view(np.uint8) + + # int64 and datetimelike + else: + mask = (values == NPY_NAT).view(np.uint8) + np.putmask(values, mask, nan_fill_val) + else: + mask = np.zeros_like(values, dtype=np.uint8) + + if nans_rank_highest: + order = (values, mask) + else: + order = (values, ~np.asarray(mask)) + + n, k = (values).shape + out = np.empty((n, k), dtype='f8', order='F') + grp_sizes = np.ones(n, dtype=np.int64) + + # lexsort is slower, so only use if we need to worry about the mask + if check_mask: + sort_indexer = np.lexsort(order, axis=0).astype(np.intp, copy=False) + else: + kind = "stable" if ties_method == "first" else None + sort_indexer = values.argsort(axis=0, kind=kind).astype(np.intp, copy=False) + + if not ascending: + sort_indexer = sort_indexer[::-1, :] + + # putmask doesn't accept a memoryview, so we assign in a separate step + masked_vals = values + with nogil: + for col in range(k): + rank_sorted_1d( + out[:, col], + grp_sizes, + sort_indexer[:, col], + masked_vals[:, col], + mask[:, col], + check_mask=check_mask, + N=n, + tiebreak=tiebreak, + keep_na=keep_na, + pct=pct, + ) + + if axis == 1: + return np.asarray(out.T) + else: + return np.asarray(out) + + +ctypedef fused diff_t: + float64_t + float32_t + int8_t + int16_t + int32_t + int64_t + +ctypedef fused out_t: + float32_t + float64_t + int64_t + + +@cython.boundscheck(False) +@cython.wraparound(False) +def diff_2d( + ndarray[diff_t, ndim=2] arr, # TODO(cython3) update to "const diff_t[:, :] arr" + ndarray[out_t, ndim=2] out, + Py_ssize_t periods, + int axis, + bint datetimelike=False, +): + cdef: + Py_ssize_t i, j, sx, sy, start, stop + bint f_contig = arr.flags.f_contiguous + # bint f_contig = arr.is_f_contig() # TODO(cython3) + diff_t left, right + + # Disable for unsupported dtype combinations, + # see https://github.com/cython/cython/issues/2646 + if (out_t is float32_t + and not (diff_t is float32_t or diff_t is int8_t or diff_t is int16_t)): + raise NotImplementedError # pragma: no cover + elif (out_t is float64_t + and (diff_t is float32_t or diff_t is int8_t or diff_t is int16_t)): + raise NotImplementedError # pragma: no cover + elif out_t is int64_t and diff_t is not int64_t: + # We only have out_t of int64_t if we have datetimelike + raise NotImplementedError # pragma: no cover + else: + # We put this inside an indented else block to avoid cython build + # warnings about unreachable code + sx, sy = (arr).shape + with nogil: + if f_contig: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for j in range(sy): + for i in range(start, stop): + left = arr[i, j] + right = arr[i - periods, j] + if out_t is int64_t and datetimelike: + if left == NPY_NAT or right == NPY_NAT: + out[i, j] = NPY_NAT + else: + out[i, j] = left - right + else: + out[i, j] = left - right + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for j in range(start, stop): + for i in range(sx): + left = arr[i, j] + right = arr[i, j - periods] + if out_t is int64_t and datetimelike: + if left == NPY_NAT or right == NPY_NAT: + out[i, j] = NPY_NAT + else: + out[i, j] = left - right + else: + out[i, j] = left - right + else: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for i in range(start, stop): + for j in range(sy): + left = arr[i, j] + right = arr[i - periods, j] + if out_t is int64_t and datetimelike: + if left == NPY_NAT or right == NPY_NAT: + out[i, j] = NPY_NAT + else: + out[i, j] = left - right + else: + out[i, j] = left - right + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for i in range(sx): + for j in range(start, stop): + left = arr[i, j] + right = arr[i, j - periods] + if out_t is int64_t and datetimelike: + if left == NPY_NAT or right == NPY_NAT: + out[i, j] = NPY_NAT + else: + out[i, j] = left - right + else: + out[i, j] = left - right + + +# generated from template +include "algos_common_helper.pxi" +include "algos_take_helper.pxi" diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/algos_common_helper.pxi.in b/.local/lib/python3.8/site-packages/pandas/_libs/algos_common_helper.pxi.in new file mode 100644 index 0000000..c633821 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/algos_common_helper.pxi.in @@ -0,0 +1,72 @@ +""" +Template for each `dtype` helper function using 1-d template + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +# ---------------------------------------------------------------------- +# ensure_dtype +# ---------------------------------------------------------------------- + + +def ensure_platform_int(object arr): + # GH3033, GH1392 + # platform int is the size of the int pointer, e.g. np.intp + if util.is_array(arr): + if (arr).descr.type_num == cnp.NPY_INTP: + return arr + else: + # equiv: arr.astype(np.intp) + return cnp.PyArray_Cast(arr, cnp.NPY_INTP) + else: + return np.array(arr, dtype=np.intp) + + +def ensure_object(object arr): + if util.is_array(arr): + if (arr).descr.type_num == NPY_OBJECT: + return arr + else: + # equiv: arr.astype(object) + return cnp.PyArray_Cast(arr, NPY_OBJECT) + else: + return np.array(arr, dtype=np.object_) + +{{py: + +# name, c_type, dtype +dtypes = [('float64', 'FLOAT64', 'float64'), + # ('float32', 'FLOAT32', 'float32'), # disabling bc unused + ('int8', 'INT8', 'int8'), + ('int16', 'INT16', 'int16'), + ('int32', 'INT32', 'int32'), + ('int64', 'INT64', 'int64'), + # Disabling uint and complex dtypes because we do not use them + # (and compiling them increases wheel size) + # ('uint8', 'UINT8', 'uint8'), + # ('uint16', 'UINT16', 'uint16'), + # ('uint32', 'UINT32', 'uint32'), + # ('uint64', 'UINT64', 'uint64'), + # ('complex64', 'COMPLEX64', 'complex64'), + # ('complex128', 'COMPLEX128', 'complex128') +] + +def get_dispatch(dtypes): + + for name, c_type, dtype in dtypes: + yield name, c_type, dtype +}} + +{{for name, c_type, dtype in get_dispatch(dtypes)}} + + +def ensure_{{name}}(object arr, copy=True): + if util.is_array(arr): + if (arr).descr.type_num == NPY_{{c_type}}: + return arr + else: + return arr.astype(np.{{dtype}}, copy=copy) + else: + return np.array(arr, dtype=np.{{dtype}}) + +{{endfor}} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/algos_take_helper.pxi.in b/.local/lib/python3.8/site-packages/pandas/_libs/algos_take_helper.pxi.in new file mode 100644 index 0000000..2a38586 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/algos_take_helper.pxi.in @@ -0,0 +1,222 @@ +""" +Template for each `dtype` helper function for take + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +# ---------------------------------------------------------------------- +# take_1d, take_2d +# ---------------------------------------------------------------------- + + +{{py: + +# c_type_in, c_type_out +dtypes = [ + ('uint8_t', 'uint8_t'), + ('uint8_t', 'object'), + ('int8_t', 'int8_t'), + ('int8_t', 'int32_t'), + ('int8_t', 'int64_t'), + ('int8_t', 'float64_t'), + ('int16_t', 'int16_t'), + ('int16_t', 'int32_t'), + ('int16_t', 'int64_t'), + ('int16_t', 'float64_t'), + ('int32_t', 'int32_t'), + ('int32_t', 'int64_t'), + ('int32_t', 'float64_t'), + ('int64_t', 'int64_t'), + ('int64_t', 'float64_t'), + ('float32_t', 'float32_t'), + ('float32_t', 'float64_t'), + ('float64_t', 'float64_t'), + ('object', 'object'), +] + + +def get_dispatch(dtypes): + + for (c_type_in, c_type_out) in dtypes: + + def get_name(dtype_name): + if dtype_name == "object": + return "object" + if dtype_name == "uint8_t": + return "bool" + return dtype_name[:-2] + + name = get_name(c_type_in) + dest = get_name(c_type_out) + + args = dict(name=name, dest=dest, c_type_in=c_type_in, + c_type_out=c_type_out) + + yield (name, dest, c_type_in, c_type_out) + +}} + + +{{for name, dest, c_type_in, c_type_out in get_dispatch(dtypes)}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if c_type_in != "object"}} +def take_1d_{{name}}_{{dest}}(const {{c_type_in}}[:] values, +{{else}} +def take_1d_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=1] values, +{{endif}} + const intp_t[:] indexer, + {{c_type_out}}[:] out, + fill_value=np.nan): + + cdef: + Py_ssize_t i, n, idx + {{c_type_out}} fv + + n = indexer.shape[0] + + fv = fill_value + + {{if c_type_out != "object"}} + with nogil: + {{else}} + if True: + {{endif}} + for i in range(n): + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i] = True if values[idx] > 0 else False + {{else}} + out[i] = values[idx] + {{endif}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if c_type_in != "object"}} +def take_2d_axis0_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values, +{{else}} +def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, +{{endif}} + ndarray[intp_t, ndim=1] indexer, + {{c_type_out}}[:, :] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + {{c_type_out}} fv + {{if c_type_in == c_type_out != "object"}} + const {{c_type_out}} *v + {{c_type_out}} *o + {{endif}} + + n = len(indexer) + k = values.shape[1] + + fv = fill_value + + {{if c_type_in == c_type_out != "object"}} + # GH#3130 + if (values.strides[1] == out.strides[1] and + values.strides[1] == sizeof({{c_type_out}}) and + sizeof({{c_type_out}}) * n >= 256): + + for i in range(n): + idx = indexer[i] + if idx == -1: + for j in range(k): + out[i, j] = fv + else: + v = &values[idx, 0] + o = &out[i, 0] + memmove(o, v, (sizeof({{c_type_out}}) * k)) + return + {{endif}} + + for i in range(n): + idx = indexer[i] + if idx == -1: + for j in range(k): + out[i, j] = fv + else: + for j in range(k): + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i, j] = True if values[idx, j] > 0 else False + {{else}} + out[i, j] = values[idx, j] + {{endif}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if c_type_in != "object"}} +def take_2d_axis1_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values, +{{else}} +def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, +{{endif}} + ndarray[intp_t, ndim=1] indexer, + {{c_type_out}}[:, :] out, + fill_value=np.nan): + + cdef: + Py_ssize_t i, j, k, n, idx + {{c_type_out}} fv + + n = len(values) + k = len(indexer) + + if n == 0 or k == 0: + return + + fv = fill_value + + for i in range(n): + for j in range(k): + idx = indexer[j] + if idx == -1: + out[i, j] = fv + else: + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i, j] = True if values[i, idx] > 0 else False + {{else}} + out[i, j] = values[i, idx] + {{endif}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, + indexer, + ndarray[{{c_type_out}}, ndim=2] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[intp_t, ndim=1] idx0 = indexer[0] + ndarray[intp_t, ndim=1] idx1 = indexer[1] + {{c_type_out}} fv + + n = len(idx0) + k = len(idx1) + + fv = fill_value + for i in range(n): + idx = idx0[i] + if idx == -1: + for j in range(k): + out[i, j] = fv + else: + for j in range(k): + if idx1[j] == -1: + out[i, j] = fv + else: + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i, j] = True if values[idx, idx1[j]] > 0 else False + {{else}} + out[i, j] = values[idx, idx1[j]] + {{endif}} + +{{endfor}} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/arrays.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/arrays.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..242a621 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/arrays.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/arrays.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/arrays.pxd new file mode 100644 index 0000000..737da29 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/arrays.pxd @@ -0,0 +1,11 @@ + +from numpy cimport ndarray + + +cdef class NDArrayBacked: + cdef: + readonly ndarray _ndarray + readonly object _dtype + + cpdef NDArrayBacked _from_backing_data(self, ndarray values) + cpdef __setstate__(self, state) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/arrays.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/arrays.pyi new file mode 100644 index 0000000..76e4321 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/arrays.pyi @@ -0,0 +1,34 @@ +from typing import Sequence + +import numpy as np + +from pandas._typing import ( + DtypeObj, + Shape, +) + +class NDArrayBacked: + _dtype: DtypeObj + _ndarray: np.ndarray + def __init__(self, values: np.ndarray, dtype: DtypeObj): ... + @classmethod + def _simple_new(cls, values: np.ndarray, dtype: DtypeObj): ... + def _from_backing_data(self, values: np.ndarray): ... + def __setstate__(self, state): ... + def __len__(self) -> int: ... + @property + def shape(self) -> Shape: ... + @property + def ndim(self) -> int: ... + @property + def size(self) -> int: ... + @property + def nbytes(self) -> int: ... + def copy(self): ... + def delete(self, loc, axis=...): ... + def swapaxes(self, axis1, axis2): ... + def repeat(self, repeats: int | Sequence[int], axis: int | None = ...): ... + def reshape(self, *args, **kwargs): ... + def ravel(self, order=...): ... + @property + def T(self): ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/arrays.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/arrays.pyx new file mode 100644 index 0000000..8895a2b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/arrays.pyx @@ -0,0 +1,183 @@ +""" +Cython implementations for internal ExtensionArrays. +""" +cimport cython + +import numpy as np + +cimport numpy as cnp +from cpython cimport PyErr_Clear +from numpy cimport ndarray + +cnp.import_array() + + +@cython.freelist(16) +cdef class NDArrayBacked: + """ + Implementing these methods in cython improves performance quite a bit. + + import pandas as pd + + from pandas._libs.arrays import NDArrayBacked as cls + + dti = pd.date_range("2016-01-01", periods=3) + dta = dti._data + arr = dta._ndarray + + obj = cls._simple_new(arr, arr.dtype) + + # for foo in [arr, dta, obj]: ... + + %timeit foo.copy() + 299 ns ± 30 ns per loop # <-- arr underlying ndarray (for reference) + 530 ns ± 9.24 ns per loop # <-- dta with cython NDArrayBacked + 1.66 µs ± 46.3 ns per loop # <-- dta without cython NDArrayBacked + 328 ns ± 5.29 ns per loop # <-- obj with NDArrayBacked.__cinit__ + 371 ns ± 6.97 ns per loop # <-- obj with NDArrayBacked._simple_new + + %timeit foo.T + 125 ns ± 6.27 ns per loop # <-- arr underlying ndarray (for reference) + 226 ns ± 7.66 ns per loop # <-- dta with cython NDArrayBacked + 911 ns ± 16.6 ns per loop # <-- dta without cython NDArrayBacked + 215 ns ± 4.54 ns per loop # <-- obj with NDArrayBacked._simple_new + + """ + # TODO: implement take in terms of cnp.PyArray_TakeFrom + # TODO: implement concat_same_type in terms of cnp.PyArray_Concatenate + + # cdef: + # readonly ndarray _ndarray + # readonly object _dtype + + def __init__(self, ndarray values, object dtype): + self._ndarray = values + self._dtype = dtype + + @classmethod + def _simple_new(cls, ndarray values, object dtype): + cdef: + NDArrayBacked obj + obj = NDArrayBacked.__new__(cls) + obj._ndarray = values + obj._dtype = dtype + return obj + + cpdef NDArrayBacked _from_backing_data(self, ndarray values): + """ + Construct a new ExtensionArray `new_array` with `arr` as its _ndarray. + + This should round-trip: + self == self._from_backing_data(self._ndarray) + """ + # TODO: re-reuse simple_new if/when it can be cpdef + cdef: + NDArrayBacked obj + obj = NDArrayBacked.__new__(type(self)) + obj._ndarray = values + obj._dtype = self._dtype + return obj + + cpdef __setstate__(self, state): + if isinstance(state, dict): + if "_data" in state: + data = state.pop("_data") + elif "_ndarray" in state: + data = state.pop("_ndarray") + else: + raise ValueError # pragma: no cover + self._ndarray = data + self._dtype = state.pop("_dtype") + + for key, val in state.items(): + setattr(self, key, val) + elif isinstance(state, tuple): + if len(state) != 3: + if len(state) == 1 and isinstance(state[0], dict): + self.__setstate__(state[0]) + return + raise NotImplementedError(state) # pragma: no cover + + data, dtype = state[:2] + if isinstance(dtype, np.ndarray): + dtype, data = data, dtype + self._ndarray = data + self._dtype = dtype + + if isinstance(state[2], dict): + for key, val in state[2].items(): + setattr(self, key, val) + else: + raise NotImplementedError(state) # pragma: no cover + else: + raise NotImplementedError(state) # pragma: no cover + + def __len__(self) -> int: + return len(self._ndarray) + + @property + def shape(self): + # object cast bc _ndarray.shape is npy_intp* + return ((self._ndarray)).shape + + @property + def ndim(self) -> int: + return self._ndarray.ndim + + @property + def size(self) -> int: + return self._ndarray.size + + @property + def nbytes(self) -> int: + return self._ndarray.nbytes + + def copy(self, order="C"): + cdef: + cnp.NPY_ORDER order_code + int success + + success = cnp.PyArray_OrderConverter(order, &order_code) + if not success: + # clear exception so that we don't get a SystemError + PyErr_Clear() + # same message used by numpy + msg = f"order must be one of 'C', 'F', 'A', or 'K' (got '{order}')" + raise ValueError(msg) + + res_values = cnp.PyArray_NewCopy(self._ndarray, order_code) + return self._from_backing_data(res_values) + + def delete(self, loc, axis=0): + res_values = np.delete(self._ndarray, loc, axis=axis) + return self._from_backing_data(res_values) + + def swapaxes(self, axis1, axis2): + res_values = cnp.PyArray_SwapAxes(self._ndarray, axis1, axis2) + return self._from_backing_data(res_values) + + # TODO: pass NPY_MAXDIMS equiv to axis=None? + def repeat(self, repeats, axis: int = 0): + if axis is None: + axis = 0 + res_values = cnp.PyArray_Repeat(self._ndarray, repeats, axis) + return self._from_backing_data(res_values) + + def reshape(self, *args, **kwargs): + res_values = self._ndarray.reshape(*args, **kwargs) + return self._from_backing_data(res_values) + + def ravel(self, order="C"): + # cnp.PyArray_OrderConverter(PyObject* obj, NPY_ORDER* order) + # res_values = cnp.PyArray_Ravel(self._ndarray, order) + res_values = self._ndarray.ravel(order) + return self._from_backing_data(res_values) + + @property + def T(self): + res_values = self._ndarray.T + return self._from_backing_data(res_values) + + def transpose(self, *axes): + res_values = self._ndarray.transpose(*axes) + return self._from_backing_data(res_values) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/dtypes.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/dtypes.pxd new file mode 100644 index 0000000..f87a152 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/dtypes.pxd @@ -0,0 +1,48 @@ +""" +Common location for shared fused types +""" + +from numpy cimport ( + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + +# All numeric types except complex +ctypedef fused numeric_t: + int8_t + int16_t + int32_t + int64_t + + uint8_t + uint16_t + uint32_t + uint64_t + + float32_t + float64_t + +# All numeric types + object, doesn't include complex +ctypedef fused numeric_object_t: + numeric_t + object + +# i64 + u64 + all float types +ctypedef fused iu_64_floating_t: + float64_t + float32_t + int64_t + uint64_t + +# i64 + u64 + all float types + object +ctypedef fused iu_64_floating_obj_t: + iu_64_floating_t + object diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/groupby.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/groupby.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..1955c6d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/groupby.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/groupby.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/groupby.pyi new file mode 100644 index 0000000..8eccd0e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/groupby.pyi @@ -0,0 +1,159 @@ +from typing import Literal + +import numpy as np + +from pandas._typing import npt + +def group_median_float64( + out: np.ndarray, # ndarray[float64_t, ndim=2] + counts: npt.NDArray[np.int64], + values: np.ndarray, # ndarray[float64_t, ndim=2] + labels: npt.NDArray[np.int64], + min_count: int = ..., # Py_ssize_t +) -> None: ... +def group_cumprod_float64( + out: np.ndarray, # float64_t[:, ::1] + values: np.ndarray, # const float64_t[:, :] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, + skipna: bool = ..., +) -> None: ... +def group_cumsum( + out: np.ndarray, # numeric[:, ::1] + values: np.ndarray, # ndarray[numeric, ndim=2] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, + skipna: bool = ..., +) -> None: ... +def group_shift_indexer( + out: np.ndarray, # int64_t[::1] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + periods: int, +) -> None: ... +def group_fillna_indexer( + out: np.ndarray, # ndarray[intp_t] + labels: np.ndarray, # ndarray[int64_t] + sorted_labels: npt.NDArray[np.intp], + mask: npt.NDArray[np.uint8], + direction: Literal["ffill", "bfill"], + limit: int, # int64_t + dropna: bool, +) -> None: ... +def group_any_all( + out: np.ndarray, # uint8_t[::1] + values: np.ndarray, # const uint8_t[::1] + labels: np.ndarray, # const int64_t[:] + mask: np.ndarray, # const uint8_t[::1] + val_test: Literal["any", "all"], + skipna: bool, +) -> None: ... +def group_add( + out: np.ndarray, # complexfloating_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[complexfloating_t, ndim=2] + labels: np.ndarray, # const intp_t[:] + min_count: int = ..., + datetimelike: bool = ..., +) -> None: ... +def group_prod( + out: np.ndarray, # floating[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[floating, ndim=2] + labels: np.ndarray, # const intp_t[:] + min_count: int = ..., +) -> None: ... +def group_var( + out: np.ndarray, # floating[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[floating, ndim=2] + labels: np.ndarray, # const intp_t[:] + min_count: int = ..., # Py_ssize_t + ddof: int = ..., # int64_t +) -> None: ... +def group_mean( + out: np.ndarray, # floating[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[floating, ndim=2] + labels: np.ndarray, # const intp_t[:] + min_count: int = ..., # Py_ssize_t + is_datetimelike: bool = ..., # bint + mask: np.ndarray | None = ..., + result_mask: np.ndarray | None = ..., +) -> None: ... +def group_ohlc( + out: np.ndarray, # floating[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[floating, ndim=2] + labels: np.ndarray, # const intp_t[:] + min_count: int = ..., +) -> None: ... +def group_quantile( + out: npt.NDArray[np.float64], + values: np.ndarray, # ndarray[numeric, ndim=1] + labels: npt.NDArray[np.intp], + mask: npt.NDArray[np.uint8], + sort_indexer: npt.NDArray[np.intp], # const + qs: npt.NDArray[np.float64], # const + interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"], +) -> None: ... +def group_last( + out: np.ndarray, # rank_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[rank_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + min_count: int = ..., # Py_ssize_t +) -> None: ... +def group_nth( + out: np.ndarray, # rank_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[rank_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + min_count: int = ..., # int64_t + rank: int = ..., # int64_t +) -> None: ... +def group_rank( + out: np.ndarray, # float64_t[:, ::1] + values: np.ndarray, # ndarray[rank_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, + ties_method: Literal["aveage", "min", "max", "first", "dense"] = ..., + ascending: bool = ..., + pct: bool = ..., + na_option: Literal["keep", "top", "bottom"] = ..., +) -> None: ... +def group_max( + out: np.ndarray, # groupby_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[groupby_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + min_count: int = ..., + mask: np.ndarray | None = ..., + result_mask: np.ndarray | None = ..., +) -> None: ... +def group_min( + out: np.ndarray, # groupby_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[groupby_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + min_count: int = ..., + mask: np.ndarray | None = ..., + result_mask: np.ndarray | None = ..., +) -> None: ... +def group_cummin( + out: np.ndarray, # groupby_t[:, ::1] + values: np.ndarray, # ndarray[groupby_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, +) -> None: ... +def group_cummax( + out: np.ndarray, # groupby_t[:, ::1] + values: np.ndarray, # ndarray[groupby_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, +) -> None: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/groupby.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/groupby.pyx new file mode 100644 index 0000000..c0d1405 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/groupby.pyx @@ -0,0 +1,1580 @@ +import cython +from cython import Py_ssize_t + +from cython cimport floating +from libc.stdlib cimport ( + free, + malloc, +) + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + complex64_t, + complex128_t, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + intp_t, + ndarray, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) +from numpy.math cimport NAN + +cnp.import_array() + +from pandas._libs.algos cimport kth_smallest_c +from pandas._libs.util cimport get_nat + +from pandas._libs.algos import ( + ensure_platform_int, + groupsort_indexer, + rank_1d, + take_2d_axis1_float64_float64, +) + +from pandas._libs.dtypes cimport ( + iu_64_floating_obj_t, + iu_64_floating_t, + numeric_t, +) +from pandas._libs.missing cimport checknull + + +cdef int64_t NPY_NAT = get_nat() +_int64_max = np.iinfo(np.int64).max + +cdef float64_t NaN = np.NaN + +cdef enum InterpolationEnumType: + INTERPOLATION_LINEAR, + INTERPOLATION_LOWER, + INTERPOLATION_HIGHER, + INTERPOLATION_NEAREST, + INTERPOLATION_MIDPOINT + + +cdef inline float64_t median_linear(float64_t* a, int n) nogil: + cdef: + int i, j, na_count = 0 + float64_t result + float64_t* tmp + + if n == 0: + return NaN + + # count NAs + for i in range(n): + if a[i] != a[i]: + na_count += 1 + + if na_count: + if na_count == n: + return NaN + + tmp = malloc((n - na_count) * sizeof(float64_t)) + + j = 0 + for i in range(n): + if a[i] == a[i]: + tmp[j] = a[i] + j += 1 + + a = tmp + n -= na_count + + if n % 2: + result = kth_smallest_c(a, n // 2, n) + else: + result = (kth_smallest_c(a, n // 2, n) + + kth_smallest_c(a, n // 2 - 1, n)) / 2 + + if na_count: + free(a) + + return result + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_median_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[intp_t] labels, + Py_ssize_t min_count=-1) -> None: + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, ngroups, size + ndarray[intp_t] _counts + ndarray[float64_t, ndim=2] data + ndarray[intp_t] indexer + float64_t* ptr + + assert min_count == -1, "'min_count' only used in add and prod" + + ngroups = len(counts) + N, K = (values).shape + + indexer, _counts = groupsort_indexer(labels, ngroups) + counts[:] = _counts[1:] + + data = np.empty((K, N), dtype=np.float64) + ptr = cnp.PyArray_DATA(data) + + take_2d_axis1_float64_float64(values.T, indexer, out=data) + + with nogil: + + for i in range(K): + # exclude NA group + ptr += _counts[0] + for j in range(ngroups): + size = _counts[j + 1] + out[j, i] = median_linear(ptr, size) + ptr += size + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_cumprod_float64(float64_t[:, ::1] out, + const float64_t[:, :] values, + const intp_t[::1] labels, + int ngroups, + bint is_datetimelike, + bint skipna=True) -> None: + """ + Cumulative product of columns of `values`, in row groups `labels`. + + Parameters + ---------- + out : np.ndarray[np.float64, ndim=2] + Array to store cumprod in. + values : np.ndarray[np.float64, ndim=2] + Values to take cumprod of. + labels : np.ndarray[np.intp] + Labels to group by. + ngroups : int + Number of groups, larger than all entries of `labels`. + is_datetimelike : bool + Always false, `values` is never datetime-like. + skipna : bool + If true, ignore nans in `values`. + + Notes + ----- + This method modifies the `out` parameter, rather than returning an object. + """ + cdef: + Py_ssize_t i, j, N, K, size + float64_t val + float64_t[:, ::1] accum + intp_t lab + + N, K = (values).shape + accum = np.ones((ngroups, K), dtype=np.float64) + + with nogil: + for i in range(N): + lab = labels[i] + + if lab < 0: + continue + for j in range(K): + val = values[i, j] + if val == val: + accum[lab, j] *= val + out[i, j] = accum[lab, j] + else: + out[i, j] = NaN + if not skipna: + accum[lab, j] = NaN + break + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_cumsum(numeric_t[:, ::1] out, + ndarray[numeric_t, ndim=2] values, + const intp_t[::1] labels, + int ngroups, + is_datetimelike, + bint skipna=True) -> None: + """ + Cumulative sum of columns of `values`, in row groups `labels`. + + Parameters + ---------- + out : np.ndarray[ndim=2] + Array to store cumsum in. + values : np.ndarray[ndim=2] + Values to take cumsum of. + labels : np.ndarray[np.intp] + Labels to group by. + ngroups : int + Number of groups, larger than all entries of `labels`. + is_datetimelike : bool + True if `values` contains datetime-like entries. + skipna : bool + If true, ignore nans in `values`. + + Notes + ----- + This method modifies the `out` parameter, rather than returning an object. + """ + cdef: + Py_ssize_t i, j, N, K, size + numeric_t val, y, t + numeric_t[:, ::1] accum, compensation + intp_t lab + + N, K = (values).shape + accum = np.zeros((ngroups, K), dtype=np.asarray(values).dtype) + compensation = np.zeros((ngroups, K), dtype=np.asarray(values).dtype) + + with nogil: + for i in range(N): + lab = labels[i] + + if lab < 0: + continue + for j in range(K): + val = values[i, j] + + # For floats, use Kahan summation to reduce floating-point + # error (https://en.wikipedia.org/wiki/Kahan_summation_algorithm) + if numeric_t == float32_t or numeric_t == float64_t: + if val == val: + y = val - compensation[lab, j] + t = accum[lab, j] + y + compensation[lab, j] = t - accum[lab, j] - y + accum[lab, j] = t + out[i, j] = t + else: + out[i, j] = NaN + if not skipna: + accum[lab, j] = NaN + break + else: + t = val + accum[lab, j] + accum[lab, j] = t + out[i, j] = t + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_shift_indexer(int64_t[::1] out, const intp_t[::1] labels, + int ngroups, int periods) -> None: + cdef: + Py_ssize_t N, i, j, ii, lab + int offset = 0, sign + int64_t idxer, idxer_slot + int64_t[::1] label_seen = np.zeros(ngroups, dtype=np.int64) + int64_t[:, ::1] label_indexer + + N, = (labels).shape + + if periods < 0: + periods = -periods + offset = N - 1 + sign = -1 + elif periods > 0: + offset = 0 + sign = 1 + + if periods == 0: + with nogil: + for i in range(N): + out[i] = i + else: + # array of each previous indexer seen + label_indexer = np.zeros((ngroups, periods), dtype=np.int64) + with nogil: + for i in range(N): + # reverse iterator if shifting backwards + ii = offset + sign * i + lab = labels[ii] + + # Skip null keys + if lab == -1: + out[ii] = -1 + continue + + label_seen[lab] += 1 + + idxer_slot = label_seen[lab] % periods + idxer = label_indexer[lab, idxer_slot] + + if label_seen[lab] > periods: + out[ii] = idxer + else: + out[ii] = -1 + + label_indexer[lab, idxer_slot] = ii + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_fillna_indexer(ndarray[intp_t] out, ndarray[intp_t] labels, + ndarray[intp_t] sorted_labels, + ndarray[uint8_t] mask, str direction, + int64_t limit, bint dropna) -> None: + """ + Indexes how to fill values forwards or backwards within a group. + + Parameters + ---------- + out : np.ndarray[np.intp] + Values into which this method will write its results. + labels : np.ndarray[np.intp] + Array containing unique label for each group, with its ordering + matching up to the corresponding record in `values`. + sorted_labels : np.ndarray[np.intp] + obtained by `np.argsort(labels, kind="mergesort")`; reversed if + direction == "bfill" + values : np.ndarray[np.uint8] + Containing the truth value of each element. + mask : np.ndarray[np.uint8] + Indicating whether a value is na or not. + direction : {'ffill', 'bfill'} + Direction for fill to be applied (forwards or backwards, respectively) + limit : Consecutive values to fill before stopping, or -1 for no limit + dropna : Flag to indicate if NaN groups should return all NaN values + + Notes + ----- + This method modifies the `out` parameter rather than returning an object + """ + cdef: + Py_ssize_t i, N, idx + intp_t curr_fill_idx=-1 + int64_t filled_vals = 0 + + N = len(out) + + # Make sure all arrays are the same size + assert N == len(labels) == len(mask) + + with nogil: + for i in range(N): + idx = sorted_labels[i] + if dropna and labels[idx] == -1: # nan-group gets nan-values + curr_fill_idx = -1 + elif mask[idx] == 1: # is missing + # Stop filling once we've hit the limit + if filled_vals >= limit and limit != -1: + curr_fill_idx = -1 + filled_vals += 1 + else: # reset items when not missing + filled_vals = 0 + curr_fill_idx = idx + + out[idx] = curr_fill_idx + + # If we move to the next group, reset + # the fill_idx and counter + if i == N - 1 or labels[idx] != labels[sorted_labels[i + 1]]: + curr_fill_idx = -1 + filled_vals = 0 + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_any_all(int8_t[:, ::1] out, + const int8_t[:, :] values, + const intp_t[::1] labels, + const uint8_t[:, :] mask, + str val_test, + bint skipna, + bint nullable) -> None: + """ + Aggregated boolean values to show truthfulness of group elements. If the + input is a nullable type (nullable=True), the result will be computed + using Kleene logic. + + Parameters + ---------- + out : np.ndarray[np.int8] + Values into which this method will write its results. + labels : np.ndarray[np.intp] + Array containing unique label for each group, with its + ordering matching up to the corresponding record in `values` + values : np.ndarray[np.int8] + Containing the truth value of each element. + mask : np.ndarray[np.uint8] + Indicating whether a value is na or not. + val_test : {'any', 'all'} + String object dictating whether to use any or all truth testing + skipna : bool + Flag to ignore nan values during truth testing + nullable : bool + Whether or not the input is a nullable type. If True, the + result will be computed using Kleene logic + + Notes + ----- + This method modifies the `out` parameter rather than returning an object. + The returned values will either be 0, 1 (False or True, respectively), or + -1 to signify a masked position in the case of a nullable input. + """ + cdef: + Py_ssize_t i, j, N = len(labels), K = out.shape[1] + intp_t lab + int8_t flag_val, val + + if val_test == 'all': + # Because the 'all' value of an empty iterable in Python is True we can + # start with an array full of ones and set to zero when a False value + # is encountered + flag_val = 0 + elif val_test == 'any': + # Because the 'any' value of an empty iterable in Python is False we + # can start with an array full of zeros and set to one only if any + # value encountered is True + flag_val = 1 + else: + raise ValueError("'bool_func' must be either 'any' or 'all'!") + + out[:] = 1 - flag_val + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + for j in range(K): + if skipna and mask[i, j]: + continue + + if nullable and mask[i, j]: + # Set the position as masked if `out[lab] != flag_val`, which + # would indicate True/False has not yet been seen for any/all, + # so by Kleene logic the result is currently unknown + if out[lab, j] != flag_val: + out[lab, j] = -1 + continue + + val = values[i, j] + + # If True and 'any' or False and 'all', the result is + # already determined + if val == flag_val: + out[lab, j] = flag_val + + +# ---------------------------------------------------------------------- +# group_add, group_prod, group_var, group_mean, group_ohlc +# ---------------------------------------------------------------------- + +ctypedef fused add_t: + float64_t + float32_t + complex64_t + complex128_t + object + +ctypedef fused mean_t: + float64_t + float32_t + complex64_t + complex128_t + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_add(add_t[:, ::1] out, + int64_t[::1] counts, + ndarray[add_t, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=0, + bint datetimelike=False) -> None: + """ + Only aggregates on axis=0 using Kahan summation + """ + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + add_t val, t, y + add_t[:, ::1] sumx, compensation + int64_t[:, ::1] nobs + Py_ssize_t len_values = len(values), len_labels = len(labels) + + if len_values != len_labels: + raise ValueError("len(index) != len(labels)") + + nobs = np.zeros((out).shape, dtype=np.int64) + # the below is equivalent to `np.zeros_like(out)` but faster + sumx = np.zeros((out).shape, dtype=(out).base.dtype) + compensation = np.zeros((out).shape, dtype=(out).base.dtype) + + N, K = (values).shape + + if add_t is object: + # NB: this does not use 'compensation' like the non-object track does. + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if not checknull(val): + nobs[lab, j] += 1 + + if nobs[lab, j] == 1: + # i.e. we haven't added anything yet; avoid TypeError + # if e.g. val is a str and sumx[lab, j] is 0 + t = val + else: + t = sumx[lab, j] + val + sumx[lab, j] = t + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + out[i, j] = NAN + else: + out[i, j] = sumx[i, j] + else: + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + # With dt64/td64 values, values have been cast to float64 + # instead if int64 for group_add, but the logic + # is otherwise the same as in _treat_as_na + if val == val and not ( + add_t is float64_t + and datetimelike + and val == NPY_NAT + ): + nobs[lab, j] += 1 + y = val - compensation[lab, j] + t = sumx[lab, j] + y + compensation[lab, j] = t - sumx[lab, j] - y + sumx[lab, j] = t + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + out[i, j] = NAN + else: + out[i, j] = sumx[i, j] + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_prod(floating[:, ::1] out, + int64_t[::1] counts, + ndarray[floating, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=0) -> None: + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + floating val, count + floating[:, ::1] prodx + int64_t[:, ::1] nobs + Py_ssize_t len_values = len(values), len_labels = len(labels) + + if len_values != len_labels: + raise ValueError("len(index) != len(labels)") + + nobs = np.zeros((out).shape, dtype=np.int64) + prodx = np.ones((out).shape, dtype=(out).base.dtype) + + N, K = (values).shape + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[lab, j] += 1 + prodx[lab, j] *= val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + out[i, j] = NAN + else: + out[i, j] = prodx[i, j] + + +@cython.wraparound(False) +@cython.boundscheck(False) +@cython.cdivision(True) +def group_var(floating[:, ::1] out, + int64_t[::1] counts, + ndarray[floating, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=-1, + int64_t ddof=1) -> None: + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + floating val, ct, oldmean + floating[:, ::1] mean + int64_t[:, ::1] nobs + Py_ssize_t len_values = len(values), len_labels = len(labels) + + assert min_count == -1, "'min_count' only used in add and prod" + + if len_values != len_labels: + raise ValueError("len(index) != len(labels)") + + nobs = np.zeros((out).shape, dtype=np.int64) + mean = np.zeros((out).shape, dtype=(out).base.dtype) + + N, K = (values).shape + + out[:, :] = 0.0 + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[lab, j] += 1 + oldmean = mean[lab, j] + mean[lab, j] += (val - oldmean) / nobs[lab, j] + out[lab, j] += (val - mean[lab, j]) * (val - oldmean) + + for i in range(ncounts): + for j in range(K): + ct = nobs[i, j] + if ct <= ddof: + out[i, j] = NAN + else: + out[i, j] /= (ct - ddof) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_mean(mean_t[:, ::1] out, + int64_t[::1] counts, + ndarray[mean_t, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=-1, + bint is_datetimelike=False, + const uint8_t[:, ::1] mask=None, + uint8_t[:, ::1] result_mask=None + ) -> None: + """ + Compute the mean per label given a label assignment for each value. + NaN values are ignored. + + Parameters + ---------- + out : np.ndarray[floating] + Values into which this method will write its results. + counts : np.ndarray[int64] + A zeroed array of the same shape as labels, + populated by group sizes during algorithm. + values : np.ndarray[floating] + 2-d array of the values to find the mean of. + labels : np.ndarray[np.intp] + Array containing unique label for each group, with its + ordering matching up to the corresponding record in `values`. + min_count : Py_ssize_t + Only used in add and prod. Always -1. + is_datetimelike : bool + True if `values` contains datetime-like entries. + mask : ndarray[bool, ndim=2], optional + Not used. + result_mask : ndarray[bool, ndim=2], optional + Not used. + + Notes + ----- + This method modifies the `out` parameter rather than returning an object. + `counts` is modified to hold group sizes + """ + + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + mean_t val, count, y, t, nan_val + mean_t[:, ::1] sumx, compensation + int64_t[:, ::1] nobs + Py_ssize_t len_values = len(values), len_labels = len(labels) + + assert min_count == -1, "'min_count' only used in add and prod" + + if len_values != len_labels: + raise ValueError("len(index) != len(labels)") + + # the below is equivalent to `np.zeros_like(out)` but faster + nobs = np.zeros((out).shape, dtype=np.int64) + sumx = np.zeros((out).shape, dtype=(out).base.dtype) + compensation = np.zeros((out).shape, dtype=(out).base.dtype) + + N, K = (values).shape + nan_val = NPY_NAT if is_datetimelike else NAN + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + # not nan + if val == val and not (is_datetimelike and val == NPY_NAT): + nobs[lab, j] += 1 + y = val - compensation[lab, j] + t = sumx[lab, j] + y + compensation[lab, j] = t - sumx[lab, j] - y + sumx[lab, j] = t + + for i in range(ncounts): + for j in range(K): + count = nobs[i, j] + if nobs[i, j] == 0: + out[i, j] = nan_val + else: + out[i, j] = sumx[i, j] / count + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_ohlc(floating[:, ::1] out, + int64_t[::1] counts, + ndarray[floating, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=-1) -> None: + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab + floating val + + assert min_count == -1, "'min_count' only used in add and prod" + + if len(labels) == 0: + return + + N, K = (values).shape + + if out.shape[1] != 4: + raise ValueError('Output array must have 4 columns') + + if K > 1: + raise NotImplementedError("Argument 'values' must have only one dimension") + out[:] = np.nan + + with nogil: + for i in range(N): + lab = labels[i] + if lab == -1: + continue + + counts[lab] += 1 + val = values[i, 0] + if val != val: + continue + + if out[lab, 0] != out[lab, 0]: + out[lab, 0] = out[lab, 1] = out[lab, 2] = out[lab, 3] = val + else: + out[lab, 1] = max(out[lab, 1], val) + out[lab, 2] = min(out[lab, 2], val) + out[lab, 3] = val + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_quantile(ndarray[float64_t, ndim=2] out, + ndarray[numeric_t, ndim=1] values, + ndarray[intp_t] labels, + ndarray[uint8_t] mask, + const intp_t[:] sort_indexer, + const float64_t[:] qs, + str interpolation) -> None: + """ + Calculate the quantile per group. + + Parameters + ---------- + out : np.ndarray[np.float64, ndim=2] + Array of aggregated values that will be written to. + values : np.ndarray + Array containing the values to apply the function against. + labels : ndarray[np.intp] + Array containing the unique group labels. + sort_indexer : ndarray[np.intp] + Indices describing sort order by values and labels. + qs : ndarray[float64_t] + The quantile values to search for. + interpolation : {'linear', 'lower', 'highest', 'nearest', 'midpoint'} + + Notes + ----- + Rather than explicitly returning a value, this function modifies the + provided `out` parameter. + """ + cdef: + Py_ssize_t i, N=len(labels), ngroups, grp_sz, non_na_sz, k, nqs + Py_ssize_t grp_start=0, idx=0 + intp_t lab + InterpolationEnumType interp + float64_t q_val, q_idx, frac, val, next_val + int64_t[::1] counts, non_na_counts + + assert values.shape[0] == N + + if any(not (0 <= q <= 1) for q in qs): + wrong = [x for x in qs if not (0 <= x <= 1)][0] + raise ValueError( + f"Each 'q' must be between 0 and 1. Got '{wrong}' instead" + ) + + inter_methods = { + 'linear': INTERPOLATION_LINEAR, + 'lower': INTERPOLATION_LOWER, + 'higher': INTERPOLATION_HIGHER, + 'nearest': INTERPOLATION_NEAREST, + 'midpoint': INTERPOLATION_MIDPOINT, + } + interp = inter_methods[interpolation] + + nqs = len(qs) + ngroups = len(out) + counts = np.zeros(ngroups, dtype=np.int64) + non_na_counts = np.zeros(ngroups, dtype=np.int64) + + # First figure out the size of every group + with nogil: + for i in range(N): + lab = labels[i] + if lab == -1: # NA group label + continue + + counts[lab] += 1 + if not mask[i]: + non_na_counts[lab] += 1 + + with nogil: + for i in range(ngroups): + # Figure out how many group elements there are + grp_sz = counts[i] + non_na_sz = non_na_counts[i] + + if non_na_sz == 0: + for k in range(nqs): + out[i, k] = NaN + else: + for k in range(nqs): + q_val = qs[k] + + # Calculate where to retrieve the desired value + # Casting to int will intentionally truncate result + idx = grp_start + (q_val * (non_na_sz - 1)) + + val = values[sort_indexer[idx]] + # If requested quantile falls evenly on a particular index + # then write that index's value out. Otherwise interpolate + q_idx = q_val * (non_na_sz - 1) + frac = q_idx % 1 + + if frac == 0.0 or interp == INTERPOLATION_LOWER: + out[i, k] = val + else: + next_val = values[sort_indexer[idx + 1]] + if interp == INTERPOLATION_LINEAR: + out[i, k] = val + (next_val - val) * frac + elif interp == INTERPOLATION_HIGHER: + out[i, k] = next_val + elif interp == INTERPOLATION_MIDPOINT: + out[i, k] = (val + next_val) / 2.0 + elif interp == INTERPOLATION_NEAREST: + if frac > .5 or (frac == .5 and q_val > .5): # Always OK? + out[i, k] = next_val + else: + out[i, k] = val + + # Increment the index reference in sorted_arr for the next group + grp_start += grp_sz + + +# ---------------------------------------------------------------------- +# group_nth, group_last, group_rank +# ---------------------------------------------------------------------- + +cdef inline bint _treat_as_na(iu_64_floating_obj_t val, bint is_datetimelike) nogil: + if iu_64_floating_obj_t is object: + # Should never be used, but we need to avoid the `val != val` below + # or else cython will raise about gil acquisition. + raise NotImplementedError + + elif iu_64_floating_obj_t is int64_t: + return is_datetimelike and val == NPY_NAT + elif iu_64_floating_obj_t is uint64_t: + # There is no NA value for uint64 + return False + else: + return val != val + + +# GH#31710 use memorviews once cython 0.30 is released so we can +# use `const iu_64_floating_obj_t[:, :] values` +@cython.wraparound(False) +@cython.boundscheck(False) +def group_last(iu_64_floating_obj_t[:, ::1] out, + int64_t[::1] counts, + ndarray[iu_64_floating_obj_t, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=-1) -> None: + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + iu_64_floating_obj_t val + ndarray[iu_64_floating_obj_t, ndim=2] resx + ndarray[int64_t, ndim=2] nobs + bint runtime_error = False + + # TODO(cython3): + # Instead of `labels.shape[0]` use `len(labels)` + if not len(values) == labels.shape[0]: + raise AssertionError("len(index) != len(labels)") + + min_count = max(min_count, 1) + nobs = np.zeros((out).shape, dtype=np.int64) + if iu_64_floating_obj_t is object: + resx = np.empty((out).shape, dtype=object) + else: + resx = np.empty_like(out) + + N, K = (values).shape + + if iu_64_floating_obj_t is object: + # TODO(cython3): De-duplicate once conditional-nogil is available + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if not checknull(val): + # NB: use _treat_as_na here once + # conditional-nogil is available. + nobs[lab, j] += 1 + resx[lab, j] = val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + out[i, j] = None + else: + out[i, j] = resx[i, j] + else: + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if not _treat_as_na(val, True): + # TODO: Sure we always want is_datetimelike=True? + nobs[lab, j] += 1 + resx[lab, j] = val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + if iu_64_floating_obj_t is int64_t: + out[i, j] = NPY_NAT + elif iu_64_floating_obj_t is uint64_t: + runtime_error = True + break + else: + out[i, j] = NAN + + else: + out[i, j] = resx[i, j] + + if runtime_error: + # We cannot raise directly above because that is within a nogil + # block. + raise RuntimeError("empty group with uint64_t") + + +# GH#31710 use memorviews once cython 0.30 is released so we can +# use `const iu_64_floating_obj_t[:, :] values` +@cython.wraparound(False) +@cython.boundscheck(False) +def group_nth(iu_64_floating_obj_t[:, ::1] out, + int64_t[::1] counts, + ndarray[iu_64_floating_obj_t, ndim=2] values, + const intp_t[::1] labels, + int64_t min_count=-1, + int64_t rank=1, + ) -> None: + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + iu_64_floating_obj_t val + ndarray[iu_64_floating_obj_t, ndim=2] resx + ndarray[int64_t, ndim=2] nobs + bint runtime_error = False + + # TODO(cython3): + # Instead of `labels.shape[0]` use `len(labels)` + if not len(values) == labels.shape[0]: + raise AssertionError("len(index) != len(labels)") + + min_count = max(min_count, 1) + nobs = np.zeros((out).shape, dtype=np.int64) + if iu_64_floating_obj_t is object: + resx = np.empty((out).shape, dtype=object) + else: + resx = np.empty_like(out) + + N, K = (values).shape + + if iu_64_floating_obj_t is object: + # TODO(cython3): De-duplicate once conditional-nogil is available + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if not checknull(val): + # NB: use _treat_as_na here once + # conditional-nogil is available. + nobs[lab, j] += 1 + if nobs[lab, j] == rank: + resx[lab, j] = val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + out[i, j] = None + else: + out[i, j] = resx[i, j] + + else: + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if not _treat_as_na(val, True): + # TODO: Sure we always want is_datetimelike=True? + nobs[lab, j] += 1 + if nobs[lab, j] == rank: + resx[lab, j] = val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + if iu_64_floating_obj_t is int64_t: + out[i, j] = NPY_NAT + elif iu_64_floating_obj_t is uint64_t: + runtime_error = True + break + else: + out[i, j] = NAN + else: + out[i, j] = resx[i, j] + + if runtime_error: + # We cannot raise directly above because that is within a nogil + # block. + raise RuntimeError("empty group with uint64_t") + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_rank(float64_t[:, ::1] out, + ndarray[iu_64_floating_obj_t, ndim=2] values, + const intp_t[::1] labels, + int ngroups, + bint is_datetimelike, str ties_method="average", + bint ascending=True, bint pct=False, str na_option="keep") -> None: + """ + Provides the rank of values within each group. + + Parameters + ---------- + out : np.ndarray[np.float64, ndim=2] + Values to which this method will write its results. + values : np.ndarray of iu_64_floating_obj_t values to be ranked + labels : np.ndarray[np.intp] + Array containing unique label for each group, with its ordering + matching up to the corresponding record in `values` + ngroups : int + This parameter is not used, is needed to match signatures of other + groupby functions. + is_datetimelike : bool + True if `values` contains datetime-like entries. + ties_method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' + * average: average rank of group + * min: lowest rank in group + * max: highest rank in group + * first: ranks assigned in order they appear in the array + * dense: like 'min', but rank always increases by 1 between groups + ascending : bool, default True + False for ranks by high (1) to low (N) + na_option : {'keep', 'top', 'bottom'}, default 'keep' + pct : bool, default False + Compute percentage rank of data within each group + na_option : {'keep', 'top', 'bottom'}, default 'keep' + * keep: leave NA values where they are + * top: smallest rank if ascending + * bottom: smallest rank if descending + + Notes + ----- + This method modifies the `out` parameter rather than returning an object + """ + cdef: + Py_ssize_t i, k, N + ndarray[float64_t, ndim=1] result + + N = values.shape[1] + + for k in range(N): + result = rank_1d( + values=values[:, k], + labels=labels, + is_datetimelike=is_datetimelike, + ties_method=ties_method, + ascending=ascending, + pct=pct, + na_option=na_option + ) + for i in range(len(result)): + # TODO: why can't we do out[:, k] = result? + out[i, k] = result[i] + + +# ---------------------------------------------------------------------- +# group_min, group_max +# ---------------------------------------------------------------------- + +# TODO: consider implementing for more dtypes + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef group_min_max(iu_64_floating_t[:, ::1] out, + int64_t[::1] counts, + ndarray[iu_64_floating_t, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=-1, + bint is_datetimelike=False, + bint compute_max=True, + const uint8_t[:, ::1] mask=None, + uint8_t[:, ::1] result_mask=None): + """ + Compute minimum/maximum of columns of `values`, in row groups `labels`. + + Parameters + ---------- + out : np.ndarray[iu_64_floating_t, ndim=2] + Array to store result in. + counts : np.ndarray[int64] + Input as a zeroed array, populated by group sizes during algorithm + values : array + Values to find column-wise min/max of. + labels : np.ndarray[np.intp] + Labels to group by. + min_count : Py_ssize_t, default -1 + The minimum number of non-NA group elements, NA result if threshold + is not met + is_datetimelike : bool + True if `values` contains datetime-like entries. + compute_max : bint, default True + True to compute group-wise max, False to compute min + mask : ndarray[bool, ndim=2], optional + If not None, indices represent missing values, + otherwise the mask will not be used + result_mask : ndarray[bool, ndim=2], optional + If not None, these specify locations in the output that are NA. + Modified in-place. + + Notes + ----- + This method modifies the `out` parameter, rather than returning an object. + `counts` is modified to hold group sizes + """ + cdef: + Py_ssize_t i, j, N, K, lab, ngroups = len(counts) + iu_64_floating_t val, nan_val + ndarray[iu_64_floating_t, ndim=2] group_min_or_max + bint runtime_error = False + int64_t[:, ::1] nobs + bint uses_mask = mask is not None + bint isna_entry + + # TODO(cython3): + # Instead of `labels.shape[0]` use `len(labels)` + if not len(values) == labels.shape[0]: + raise AssertionError("len(index) != len(labels)") + + min_count = max(min_count, 1) + nobs = np.zeros((out).shape, dtype=np.int64) + + group_min_or_max = np.empty_like(out) + if iu_64_floating_t is int64_t: + group_min_or_max[:] = -_int64_max if compute_max else _int64_max + nan_val = NPY_NAT + elif iu_64_floating_t is uint64_t: + # NB: We do not define nan_val because there is no such thing + # for uint64_t. We carefully avoid having to reference it in this + # case. + group_min_or_max[:] = 0 if compute_max else np.iinfo(np.uint64).max + else: + group_min_or_max[:] = -np.inf if compute_max else np.inf + nan_val = NAN + + N, K = (values).shape + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if uses_mask: + isna_entry = mask[i, j] + else: + isna_entry = _treat_as_na(val, is_datetimelike) + + if not isna_entry: + nobs[lab, j] += 1 + if compute_max: + if val > group_min_or_max[lab, j]: + group_min_or_max[lab, j] = val + else: + if val < group_min_or_max[lab, j]: + group_min_or_max[lab, j] = val + + for i in range(ngroups): + for j in range(K): + if nobs[i, j] < min_count: + if iu_64_floating_t is uint64_t: + runtime_error = True + break + else: + if uses_mask: + result_mask[i, j] = True + else: + out[i, j] = nan_val + else: + out[i, j] = group_min_or_max[i, j] + + if runtime_error: + # We cannot raise directly above because that is within a nogil + # block. + raise RuntimeError("empty group with uint64_t") + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_max(iu_64_floating_t[:, ::1] out, + int64_t[::1] counts, + ndarray[iu_64_floating_t, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=-1, + bint is_datetimelike=False, + const uint8_t[:, ::1] mask=None, + uint8_t[:, ::1] result_mask=None) -> None: + """See group_min_max.__doc__""" + group_min_max( + out, + counts, + values, + labels, + min_count=min_count, + is_datetimelike=is_datetimelike, + compute_max=True, + mask=mask, + result_mask=result_mask, + ) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_min(iu_64_floating_t[:, ::1] out, + int64_t[::1] counts, + ndarray[iu_64_floating_t, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=-1, + bint is_datetimelike=False, + const uint8_t[:, ::1] mask=None, + uint8_t[:, ::1] result_mask=None) -> None: + """See group_min_max.__doc__""" + group_min_max( + out, + counts, + values, + labels, + min_count=min_count, + is_datetimelike=is_datetimelike, + compute_max=False, + mask=mask, + result_mask=result_mask, + ) + + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef group_cummin_max(iu_64_floating_t[:, ::1] out, + ndarray[iu_64_floating_t, ndim=2] values, + uint8_t[:, ::1] mask, + const intp_t[::1] labels, + int ngroups, + bint is_datetimelike, + bint skipna, + bint compute_max): + """ + Cumulative minimum/maximum of columns of `values`, in row groups `labels`. + + Parameters + ---------- + out : np.ndarray[iu_64_floating_t, ndim=2] + Array to store cummin/max in. + values : np.ndarray[iu_64_floating_t, ndim=2] + Values to take cummin/max of. + mask : np.ndarray[bool] or None + If not None, indices represent missing values, + otherwise the mask will not be used + labels : np.ndarray[np.intp] + Labels to group by. + ngroups : int + Number of groups, larger than all entries of `labels`. + is_datetimelike : bool + True if `values` contains datetime-like entries. + skipna : bool + If True, ignore nans in `values`. + compute_max : bool + True if cumulative maximum should be computed, False + if cumulative minimum should be computed + + Notes + ----- + This method modifies the `out` parameter, rather than returning an object. + """ + cdef: + iu_64_floating_t[:, ::1] accum + + accum = np.empty((ngroups, (values).shape[1]), dtype=values.dtype) + if iu_64_floating_t is int64_t: + accum[:] = -_int64_max if compute_max else _int64_max + elif iu_64_floating_t is uint64_t: + accum[:] = 0 if compute_max else np.iinfo(np.uint64).max + else: + accum[:] = -np.inf if compute_max else np.inf + + if mask is not None: + masked_cummin_max(out, values, mask, labels, accum, skipna, compute_max) + else: + cummin_max(out, values, labels, accum, skipna, is_datetimelike, compute_max) + + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef cummin_max(iu_64_floating_t[:, ::1] out, + ndarray[iu_64_floating_t, ndim=2] values, + const intp_t[::1] labels, + iu_64_floating_t[:, ::1] accum, + bint skipna, + bint is_datetimelike, + bint compute_max): + """ + Compute the cumulative minimum/maximum of columns of `values`, in row groups + `labels`. + """ + cdef: + Py_ssize_t i, j, N, K + iu_64_floating_t val, mval, na_val + uint8_t[:, ::1] seen_na + intp_t lab + bint na_possible + + if iu_64_floating_t is float64_t or iu_64_floating_t is float32_t: + na_val = NaN + na_possible = True + elif is_datetimelike: + na_val = NPY_NAT + na_possible = True + # Will never be used, just to avoid uninitialized warning + else: + na_val = 0 + na_possible = False + + if na_possible: + seen_na = np.zeros((accum).shape, dtype=np.uint8) + + N, K = (values).shape + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + for j in range(K): + if not skipna and na_possible and seen_na[lab, j]: + out[i, j] = na_val + else: + val = values[i, j] + if not _treat_as_na(val, is_datetimelike): + mval = accum[lab, j] + if compute_max: + if val > mval: + accum[lab, j] = mval = val + else: + if val < mval: + accum[lab, j] = mval = val + out[i, j] = mval + else: + seen_na[lab, j] = 1 + out[i, j] = val + + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef masked_cummin_max(iu_64_floating_t[:, ::1] out, + ndarray[iu_64_floating_t, ndim=2] values, + uint8_t[:, ::1] mask, + const intp_t[::1] labels, + iu_64_floating_t[:, ::1] accum, + bint skipna, + bint compute_max): + """ + Compute the cumulative minimum/maximum of columns of `values`, in row groups + `labels` with a masked algorithm. + """ + cdef: + Py_ssize_t i, j, N, K + iu_64_floating_t val, mval + uint8_t[:, ::1] seen_na + intp_t lab + + N, K = (values).shape + seen_na = np.zeros((accum).shape, dtype=np.uint8) + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + for j in range(K): + if not skipna and seen_na[lab, j]: + mask[i, j] = 1 + else: + if not mask[i, j]: + val = values[i, j] + mval = accum[lab, j] + if compute_max: + if val > mval: + accum[lab, j] = mval = val + else: + if val < mval: + accum[lab, j] = mval = val + out[i, j] = mval + else: + seen_na[lab, j] = 1 + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_cummin(iu_64_floating_t[:, ::1] out, + ndarray[iu_64_floating_t, ndim=2] values, + const intp_t[::1] labels, + int ngroups, + bint is_datetimelike, + uint8_t[:, ::1] mask=None, + bint skipna=True) -> None: + """See group_cummin_max.__doc__""" + group_cummin_max( + out, + values, + mask, + labels, + ngroups, + is_datetimelike, + skipna, + compute_max=False + ) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_cummax(iu_64_floating_t[:, ::1] out, + ndarray[iu_64_floating_t, ndim=2] values, + const intp_t[::1] labels, + int ngroups, + bint is_datetimelike, + uint8_t[:, ::1] mask=None, + bint skipna=True) -> None: + """See group_cummin_max.__doc__""" + group_cummin_max( + out, + values, + mask, + labels, + ngroups, + is_datetimelike, + skipna, + compute_max=True + ) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/hashing.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/hashing.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..ffe1c6b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/hashing.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/hashing.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/hashing.pyi new file mode 100644 index 0000000..8361026 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/hashing.pyi @@ -0,0 +1,9 @@ +import numpy as np + +from pandas._typing import npt + +def hash_object_array( + arr: npt.NDArray[np.object_], + key: str, + encoding: str = ..., +) -> npt.NDArray[np.uint64]: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/hashing.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/hashing.pyx new file mode 100644 index 0000000..39caf04 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/hashing.pyx @@ -0,0 +1,198 @@ +# Translated from the reference implementation +# at https://github.com/veorq/SipHash + +import cython + +from libc.stdlib cimport ( + free, + malloc, +) + +import numpy as np + +from numpy cimport ( + import_array, + ndarray, + uint8_t, + uint32_t, + uint64_t, +) + +import_array() + +from pandas._libs.util cimport is_nan + +DEF cROUNDS = 2 +DEF dROUNDS = 4 + + +@cython.boundscheck(False) +def hash_object_array( + ndarray[object] arr, str key, str encoding="utf8" +) -> np.ndarray[np.uint64]: + """ + Parameters + ---------- + arr : 1-d object ndarray of objects + key : hash key, must be 16 byte len encoded + encoding : encoding for key & arr, default to 'utf8' + + Returns + ------- + 1-d uint64 ndarray of hashes. + + Raises + ------ + TypeError + If the array contains mixed types. + + Notes + ----- + Allowed values must be strings, or nulls + mixed array types will raise TypeError. + """ + cdef: + Py_ssize_t i, n + uint64_t[:] result + bytes data, k + uint8_t *kb + uint64_t *lens + char **vecs + char *cdata + object val + list datas = [] + + k = key.encode(encoding) + kb = k + if len(k) != 16: + raise ValueError( + f"key should be a 16-byte string encoded, got {k} (len {len(k)})" + ) + + n = len(arr) + + # create an array of bytes + vecs = malloc(n * sizeof(char *)) + lens = malloc(n * sizeof(uint64_t)) + + for i in range(n): + val = arr[i] + if isinstance(val, bytes): + data = val + elif isinstance(val, str): + data = val.encode(encoding) + elif val is None or is_nan(val): + # null, stringify and encode + data = str(val).encode(encoding) + + elif isinstance(val, tuple): + # GH#28969 we could have a tuple, but need to ensure that + # the tuple entries are themselves hashable before converting + # to str + hash(val) + data = str(val).encode(encoding) + else: + raise TypeError( + f"{val} of type {type(val)} is not a valid type for hashing, " + "must be string or null" + ) + + lens[i] = len(data) + cdata = data + + # keep the references alive through the end of the + # function + datas.append(data) + vecs[i] = cdata + + result = np.empty(n, dtype=np.uint64) + with nogil: + for i in range(n): + result[i] = low_level_siphash(vecs[i], lens[i], kb) + + free(vecs) + free(lens) + return result.base # .base to retrieve underlying np.ndarray + + +cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil: + return (x << b) | (x >> (64 - b)) + + +cdef inline uint64_t u8to64_le(uint8_t* p) nogil: + return (p[0] | + p[1] << 8 | + p[2] << 16 | + p[3] << 24 | + p[4] << 32 | + p[5] << 40 | + p[6] << 48 | + p[7] << 56) + + +cdef inline void _sipround(uint64_t* v0, uint64_t* v1, + uint64_t* v2, uint64_t* v3) nogil: + v0[0] += v1[0] + v1[0] = _rotl(v1[0], 13) + v1[0] ^= v0[0] + v0[0] = _rotl(v0[0], 32) + v2[0] += v3[0] + v3[0] = _rotl(v3[0], 16) + v3[0] ^= v2[0] + v0[0] += v3[0] + v3[0] = _rotl(v3[0], 21) + v3[0] ^= v0[0] + v2[0] += v1[0] + v1[0] = _rotl(v1[0], 17) + v1[0] ^= v2[0] + v2[0] = _rotl(v2[0], 32) + + +@cython.cdivision(True) +cdef uint64_t low_level_siphash(uint8_t* data, size_t datalen, + uint8_t* key) nogil: + cdef uint64_t v0 = 0x736f6d6570736575ULL + cdef uint64_t v1 = 0x646f72616e646f6dULL + cdef uint64_t v2 = 0x6c7967656e657261ULL + cdef uint64_t v3 = 0x7465646279746573ULL + cdef uint64_t b + cdef uint64_t k0 = u8to64_le(key) + cdef uint64_t k1 = u8to64_le(key + 8) + cdef uint64_t m + cdef int i + cdef uint8_t* end = data + datalen - (datalen % sizeof(uint64_t)) + cdef int left = datalen & 7 + cdef int left_byte + + b = (datalen) << 56 + v3 ^= k1 + v2 ^= k0 + v1 ^= k1 + v0 ^= k0 + + while (data != end): + m = u8to64_le(data) + v3 ^= m + for i in range(cROUNDS): + _sipround(&v0, &v1, &v2, &v3) + v0 ^= m + + data += sizeof(uint64_t) + + for i in range(left-1, -1, -1): + b |= (data[i]) << (i * 8) + + v3 ^= b + + for i in range(cROUNDS): + _sipround(&v0, &v1, &v2, &v3) + + v0 ^= b + v2 ^= 0xff + + for i in range(dROUNDS): + _sipround(&v0, &v1, &v2, &v3) + + b = v0 ^ v1 ^ v2 ^ v3 + + return b diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/hashtable.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/hashtable.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..fd246ee Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/hashtable.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/hashtable.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/hashtable.pxd new file mode 100644 index 0000000..80d7ab5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/hashtable.pxd @@ -0,0 +1,141 @@ +from numpy cimport ( + intp_t, + ndarray, +) + +from pandas._libs.khash cimport ( + complex64_t, + complex128_t, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + kh_complex64_t, + kh_complex128_t, + kh_float32_t, + kh_float64_t, + kh_int8_t, + kh_int16_t, + kh_int32_t, + kh_int64_t, + kh_pymap_t, + kh_str_t, + kh_uint8_t, + kh_uint16_t, + kh_uint32_t, + kh_uint64_t, + khcomplex64_t, + khcomplex128_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + +# prototypes for sharing + +cdef class HashTable: + pass + +cdef class UInt64HashTable(HashTable): + cdef kh_uint64_t *table + + cpdef get_item(self, uint64_t val) + cpdef set_item(self, uint64_t key, Py_ssize_t val) + +cdef class Int64HashTable(HashTable): + cdef kh_int64_t *table + + cpdef get_item(self, int64_t val) + cpdef set_item(self, int64_t key, Py_ssize_t val) + +cdef class UInt32HashTable(HashTable): + cdef kh_uint32_t *table + + cpdef get_item(self, uint32_t val) + cpdef set_item(self, uint32_t key, Py_ssize_t val) + +cdef class Int32HashTable(HashTable): + cdef kh_int32_t *table + + cpdef get_item(self, int32_t val) + cpdef set_item(self, int32_t key, Py_ssize_t val) + +cdef class UInt16HashTable(HashTable): + cdef kh_uint16_t *table + + cpdef get_item(self, uint16_t val) + cpdef set_item(self, uint16_t key, Py_ssize_t val) + +cdef class Int16HashTable(HashTable): + cdef kh_int16_t *table + + cpdef get_item(self, int16_t val) + cpdef set_item(self, int16_t key, Py_ssize_t val) + +cdef class UInt8HashTable(HashTable): + cdef kh_uint8_t *table + + cpdef get_item(self, uint8_t val) + cpdef set_item(self, uint8_t key, Py_ssize_t val) + +cdef class Int8HashTable(HashTable): + cdef kh_int8_t *table + + cpdef get_item(self, int8_t val) + cpdef set_item(self, int8_t key, Py_ssize_t val) + +cdef class Float64HashTable(HashTable): + cdef kh_float64_t *table + + cpdef get_item(self, float64_t val) + cpdef set_item(self, float64_t key, Py_ssize_t val) + +cdef class Float32HashTable(HashTable): + cdef kh_float32_t *table + + cpdef get_item(self, float32_t val) + cpdef set_item(self, float32_t key, Py_ssize_t val) + +cdef class Complex64HashTable(HashTable): + cdef kh_complex64_t *table + + cpdef get_item(self, complex64_t val) + cpdef set_item(self, complex64_t key, Py_ssize_t val) + +cdef class Complex128HashTable(HashTable): + cdef kh_complex128_t *table + + cpdef get_item(self, complex128_t val) + cpdef set_item(self, complex128_t key, Py_ssize_t val) + +cdef class PyObjectHashTable(HashTable): + cdef kh_pymap_t *table + + cpdef get_item(self, object val) + cpdef set_item(self, object key, Py_ssize_t val) + + +cdef class StringHashTable(HashTable): + cdef kh_str_t *table + + cpdef get_item(self, str val) + cpdef set_item(self, str key, Py_ssize_t val) + +cdef struct Int64VectorData: + int64_t *data + Py_ssize_t n, m + +cdef class Vector: + cdef bint external_view_exists + +cdef class Int64Vector(Vector): + cdef Int64VectorData *data + cdef ndarray ao + + cdef resize(self) + cpdef ndarray to_array(self) + cdef inline void append(self, int64_t x) + cdef extend(self, int64_t[:] x) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/hashtable.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/hashtable.pyi new file mode 100644 index 0000000..f4b9064 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/hashtable.pyi @@ -0,0 +1,213 @@ +from typing import ( + Hashable, + Literal, +) + +import numpy as np + +from pandas._typing import npt + +def unique_label_indices( + labels: np.ndarray, # const int64_t[:] +) -> np.ndarray: ... + +class Factorizer: + count: int + def __init__(self, size_hint: int): ... + def get_count(self) -> int: ... + +class ObjectFactorizer(Factorizer): + table: PyObjectHashTable + uniques: ObjectVector + def factorize( + self, + values: npt.NDArray[np.object_], + sort: bool = ..., + na_sentinel=..., + na_value=..., + ) -> npt.NDArray[np.intp]: ... + +class Int64Factorizer(Factorizer): + table: Int64HashTable + uniques: Int64Vector + def factorize( + self, + values: np.ndarray, # const int64_t[:] + sort: bool = ..., + na_sentinel=..., + na_value=..., + ) -> npt.NDArray[np.intp]: ... + +class Int64Vector: + def __init__(self): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.int64]: ... + +class Int32Vector: + def __init__(self): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.int32]: ... + +class Int16Vector: + def __init__(self): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.int16]: ... + +class Int8Vector: + def __init__(self): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.int8]: ... + +class UInt64Vector: + def __init__(self): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.uint64]: ... + +class UInt32Vector: + def __init__(self): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.uint32]: ... + +class UInt16Vector: + def __init__(self): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.uint16]: ... + +class UInt8Vector: + def __init__(self): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.uint8]: ... + +class Float64Vector: + def __init__(self): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.float64]: ... + +class Float32Vector: + def __init__(self): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.float32]: ... + +class Complex128Vector: + def __init__(self): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.complex128]: ... + +class Complex64Vector: + def __init__(self): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.complex64]: ... + +class StringVector: + def __init__(self): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.object_]: ... + +class ObjectVector: + def __init__(self): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.object_]: ... + +class HashTable: + # NB: The base HashTable class does _not_ actually have these methods; + # we are putting the here for the sake of mypy to avoid + # reproducing them in each subclass below. + def __init__(self, size_hint: int = ...): ... + def __len__(self) -> int: ... + def __contains__(self, key: Hashable) -> bool: ... + def sizeof(self, deep: bool = ...) -> int: ... + def get_state(self) -> dict[str, int]: ... + # TODO: `item` type is subclass-specific + def get_item(self, item): ... # TODO: return type? + def set_item(self, item) -> None: ... + # FIXME: we don't actually have this for StringHashTable or ObjectHashTable? + def map( + self, + keys: np.ndarray, # np.ndarray[subclass-specific] + values: np.ndarray, # const int64_t[:] + ) -> None: ... + def map_locations( + self, + values: np.ndarray, # np.ndarray[subclass-specific] + ) -> None: ... + def lookup( + self, + values: np.ndarray, # np.ndarray[subclass-specific] + ) -> npt.NDArray[np.intp]: ... + def get_labels( + self, + values: np.ndarray, # np.ndarray[subclass-specific] + uniques, # SubclassTypeVector + count_prior: int = ..., + na_sentinel: int = ..., + na_value: object = ..., + ) -> npt.NDArray[np.intp]: ... + def unique( + self, + values: np.ndarray, # np.ndarray[subclass-specific] + return_inverse: bool = ..., + ) -> tuple[ + np.ndarray, # np.ndarray[subclass-specific] + npt.NDArray[np.intp], + ] | np.ndarray: ... # np.ndarray[subclass-specific] + def _unique( + self, + values: np.ndarray, # np.ndarray[subclass-specific] + uniques, # FooVector + count_prior: int = ..., + na_sentinel: int = ..., + na_value: object = ..., + ignore_na: bool = ..., + return_inverse: bool = ..., + ) -> tuple[ + np.ndarray, # np.ndarray[subclass-specific] + npt.NDArray[np.intp], + ] | np.ndarray: ... # np.ndarray[subclass-specific] + def factorize( + self, + values: np.ndarray, # np.ndarray[subclass-specific] + na_sentinel: int = ..., + na_value: object = ..., + mask=..., + ) -> tuple[np.ndarray, npt.NDArray[np.intp],]: ... # np.ndarray[subclass-specific] + +class Complex128HashTable(HashTable): ... +class Complex64HashTable(HashTable): ... +class Float64HashTable(HashTable): ... +class Float32HashTable(HashTable): ... + +class Int64HashTable(HashTable): + # Only Int64HashTable has get_labels_groupby + def get_labels_groupby( + self, + values: np.ndarray, # const int64_t[:] + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64],]: ... + +class Int32HashTable(HashTable): ... +class Int16HashTable(HashTable): ... +class Int8HashTable(HashTable): ... +class UInt64HashTable(HashTable): ... +class UInt32HashTable(HashTable): ... +class UInt16HashTable(HashTable): ... +class UInt8HashTable(HashTable): ... +class StringHashTable(HashTable): ... +class PyObjectHashTable(HashTable): ... +class IntpHashTable(HashTable): ... + +def duplicated( + values: np.ndarray, + keep: Literal["last", "first", False] = ..., +) -> npt.NDArray[np.bool_]: ... +def mode(values: np.ndarray, dropna: bool) -> np.ndarray: ... +def value_count( + values: np.ndarray, + dropna: bool, +) -> tuple[np.ndarray, npt.NDArray[np.int64],]: ... # np.ndarray[same-as-values] + +# arr and values should have same dtype +def ismember( + arr: np.ndarray, + values: np.ndarray, +) -> npt.NDArray[np.bool_]: ... +def object_hash(obj) -> int: ... +def objects_are_equal(a, b) -> bool: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/hashtable.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/hashtable.pyx new file mode 100644 index 0000000..6e97c13 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/hashtable.pyx @@ -0,0 +1,182 @@ +cimport cython +from cpython.mem cimport ( + PyMem_Free, + PyMem_Malloc, +) +from cpython.ref cimport ( + Py_INCREF, + PyObject, +) +from libc.stdlib cimport ( + free, + malloc, +) + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + float64_t, + ndarray, + uint8_t, + uint32_t, +) +from numpy.math cimport NAN + +cnp.import_array() + + +from pandas._libs cimport util +from pandas._libs.khash cimport ( + KHASH_TRACE_DOMAIN, + are_equivalent_float32_t, + are_equivalent_float64_t, + are_equivalent_khcomplex64_t, + are_equivalent_khcomplex128_t, + kh_needed_n_buckets, + kh_python_hash_equal, + kh_python_hash_func, + kh_str_t, + khcomplex64_t, + khcomplex128_t, + khiter_t, +) +from pandas._libs.missing cimport checknull + + +def get_hashtable_trace_domain(): + return KHASH_TRACE_DOMAIN + + +def object_hash(obj): + return kh_python_hash_func(obj) + + +def objects_are_equal(a, b): + return kh_python_hash_equal(a, b) + + +cdef int64_t NPY_NAT = util.get_nat() +SIZE_HINT_LIMIT = (1 << 20) + 7 + + +cdef Py_ssize_t _INIT_VEC_CAP = 128 + +include "hashtable_class_helper.pxi" +include "hashtable_func_helper.pxi" + + +# map derived hash-map types onto basic hash-map types: +if np.dtype(np.intp) == np.dtype(np.int64): + IntpHashTable = Int64HashTable + unique_label_indices = _unique_label_indices_int64 +elif np.dtype(np.intp) == np.dtype(np.int32): + IntpHashTable = Int32HashTable + unique_label_indices = _unique_label_indices_int32 +else: + raise ValueError(np.dtype(np.intp)) + + +cdef class Factorizer: + cdef readonly: + Py_ssize_t count + + def __cinit__(self, size_hint: int): + self.count = 0 + + def get_count(self) -> int: + return self.count + + +cdef class ObjectFactorizer(Factorizer): + cdef public: + PyObjectHashTable table + ObjectVector uniques + + def __cinit__(self, size_hint: int): + self.table = PyObjectHashTable(size_hint) + self.uniques = ObjectVector() + + def factorize( + self, ndarray[object] values, sort=False, na_sentinel=-1, na_value=None + ) -> np.ndarray: + """ + + Returns + ------- + np.ndarray[np.intp] + + Examples + -------- + Factorize values with nans replaced by na_sentinel + + >>> fac = ObjectFactorizer(3) + >>> fac.factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20) + array([ 0, 1, 20]) + """ + cdef: + ndarray[intp_t] labels + + if self.uniques.external_view_exists: + uniques = ObjectVector() + uniques.extend(self.uniques.to_array()) + self.uniques = uniques + labels = self.table.get_labels(values, self.uniques, + self.count, na_sentinel, na_value) + mask = (labels == na_sentinel) + # sort on + if sort: + sorter = self.uniques.to_array().argsort() + reverse_indexer = np.empty(len(sorter), dtype=np.intp) + reverse_indexer.put(sorter, np.arange(len(sorter))) + labels = reverse_indexer.take(labels, mode='clip') + labels[mask] = na_sentinel + self.count = len(self.uniques) + return labels + + +cdef class Int64Factorizer(Factorizer): + cdef public: + Int64HashTable table + Int64Vector uniques + + def __cinit__(self, size_hint: int): + self.table = Int64HashTable(size_hint) + self.uniques = Int64Vector() + + def factorize(self, const int64_t[:] values, sort=False, + na_sentinel=-1, na_value=None) -> np.ndarray: + """ + Returns + ------- + ndarray[intp_t] + + Examples + -------- + Factorize values with nans replaced by na_sentinel + + >>> fac = Int64Factorizer(3) + >>> fac.factorize(np.array([1,2,3]), na_sentinel=20) + array([0, 1, 2]) + """ + cdef: + ndarray[intp_t] labels + + if self.uniques.external_view_exists: + uniques = Int64Vector() + uniques.extend(self.uniques.to_array()) + self.uniques = uniques + labels = self.table.get_labels(values, self.uniques, + self.count, na_sentinel, + na_value=na_value) + + # sort on + if sort: + sorter = self.uniques.to_array().argsort() + reverse_indexer = np.empty(len(sorter), dtype=np.intp) + reverse_indexer.put(sorter, np.arange(len(sorter))) + + labels = reverse_indexer.take(labels) + + self.count = len(self.uniques) + return labels diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/hashtable_class_helper.pxi.in b/.local/lib/python3.8/site-packages/pandas/_libs/hashtable_class_helper.pxi.in new file mode 100644 index 0000000..0446b67 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/hashtable_class_helper.pxi.in @@ -0,0 +1,1323 @@ +""" +Template for each `dtype` helper function for hashtable + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + + +{{py: + +# name +complex_types = ['complex64', + 'complex128'] +}} + +{{for name in complex_types}} +cdef kh{{name}}_t to_kh{{name}}_t({{name}}_t val) nogil: + cdef kh{{name}}_t res + res.real = val.real + res.imag = val.imag + return res + +{{endfor}} + + +{{py: + + +# name +c_types = ['khcomplex128_t', + 'khcomplex64_t', + 'float64_t', + 'float32_t', + 'int64_t', + 'int32_t', + 'int16_t', + 'int8_t', + 'uint64_t', + 'uint32_t', + 'uint16_t', + 'uint8_t'] +}} + +{{for c_type in c_types}} + +cdef bint is_nan_{{c_type}}({{c_type}} val) nogil: + {{if c_type in {'khcomplex128_t', 'khcomplex64_t'} }} + return val.real != val.real or val.imag != val.imag + {{elif c_type in {'float64_t', 'float32_t'} }} + return val != val + {{else}} + return False + {{endif}} + + +{{if c_type in {'khcomplex128_t', 'khcomplex64_t', 'float64_t', 'float32_t'} }} +# are_equivalent_{{c_type}} is cimported via khash.pxd +{{else}} +cdef bint are_equivalent_{{c_type}}({{c_type}} val1, {{c_type}} val2) nogil: + return val1 == val2 +{{endif}} + +{{endfor}} + + +{{py: + +# name +cimported_types = ['complex64', + 'complex128', + 'float32', + 'float64', + 'int8', + 'int16', + 'int32', + 'int64', + 'pymap', + 'str', + 'strbox', + 'uint8', + 'uint16', + 'uint32', + 'uint64'] +}} + +{{for name in cimported_types}} +from pandas._libs.khash cimport ( + kh_destroy_{{name}}, + kh_exist_{{name}}, + kh_get_{{name}}, + kh_init_{{name}}, + kh_put_{{name}}, + kh_resize_{{name}}, +) + +{{endfor}} + +# ---------------------------------------------------------------------- +# VectorData +# ---------------------------------------------------------------------- + +from pandas._libs.tslibs.util cimport get_c_string +from pandas._libs.missing cimport C_NA + +{{py: + +# name, dtype, c_type +# the generated StringVector is not actually used +# but is included for completeness (rather ObjectVector is used +# for uniques in hashtables) + +dtypes = [('Complex128', 'complex128', 'khcomplex128_t'), + ('Complex64', 'complex64', 'khcomplex64_t'), + ('Float64', 'float64', 'float64_t'), + ('Float32', 'float32', 'float32_t'), + ('Int64', 'int64', 'int64_t'), + ('Int32', 'int32', 'int32_t'), + ('Int16', 'int16', 'int16_t'), + ('Int8', 'int8', 'int8_t'), + ('String', 'string', 'char *'), + ('UInt64', 'uint64', 'uint64_t'), + ('UInt32', 'uint32', 'uint32_t'), + ('UInt16', 'uint16', 'uint16_t'), + ('UInt8', 'uint8', 'uint8_t')] +}} + +{{for name, dtype, c_type in dtypes}} + + +{{if dtype != 'int64'}} +# Int64VectorData is defined in the .pxd file because it is needed (indirectly) +# by IntervalTree + +ctypedef struct {{name}}VectorData: + {{c_type}} *data + Py_ssize_t n, m + +{{endif}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline void append_data_{{dtype}}({{name}}VectorData *data, + {{c_type}} x) nogil: + + data.data[data.n] = x + data.n += 1 + +{{endfor}} + +ctypedef fused vector_data: + Int64VectorData + Int32VectorData + Int16VectorData + Int8VectorData + UInt64VectorData + UInt32VectorData + UInt16VectorData + UInt8VectorData + Float64VectorData + Float32VectorData + Complex128VectorData + Complex64VectorData + StringVectorData + +cdef inline bint needs_resize(vector_data *data) nogil: + return data.n == data.m + +# ---------------------------------------------------------------------- +# Vector +# ---------------------------------------------------------------------- + +cdef class Vector: + # cdef readonly: + # bint external_view_exists + + def __cinit__(self): + self.external_view_exists = False + + +{{py: + +# name, dtype, c_type +dtypes = [('Complex128', 'complex128', 'khcomplex128_t'), + ('Complex64', 'complex64', 'khcomplex64_t'), + ('Float64', 'float64', 'float64_t'), + ('UInt64', 'uint64', 'uint64_t'), + ('Int64', 'int64', 'int64_t'), + ('Float32', 'float32', 'float32_t'), + ('UInt32', 'uint32', 'uint32_t'), + ('Int32', 'int32', 'int32_t'), + ('UInt16', 'uint16', 'uint16_t'), + ('Int16', 'int16', 'int16_t'), + ('UInt8', 'uint8', 'uint8_t'), + ('Int8', 'int8', 'int8_t')] + +}} + +{{for name, dtype, c_type in dtypes}} + +cdef class {{name}}Vector(Vector): + + # For int64 we have to put this declaration in the .pxd file; + # Int64Vector is the only one we need exposed for other cython files. + {{if dtype != 'int64'}} + cdef: + {{name}}VectorData *data + ndarray ao + {{endif}} + + def __cinit__(self): + self.data = <{{name}}VectorData *>PyMem_Malloc( + sizeof({{name}}VectorData)) + if not self.data: + raise MemoryError() + self.data.n = 0 + self.data.m = _INIT_VEC_CAP + self.ao = np.empty(self.data.m, dtype=np.{{dtype}}) + self.data.data = <{{c_type}}*>self.ao.data + + cdef resize(self): + self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) + self.ao.resize(self.data.m, refcheck=False) + self.data.data = <{{c_type}}*>self.ao.data + + def __dealloc__(self): + if self.data is not NULL: + PyMem_Free(self.data) + self.data = NULL + + def __len__(self) -> int: + return self.data.n + + cpdef ndarray to_array(self): + if self.data.m != self.data.n: + if self.external_view_exists: + # should never happen + raise ValueError("should have raised on append()") + self.ao.resize(self.data.n, refcheck=False) + self.data.m = self.data.n + self.external_view_exists = True + return self.ao + + cdef inline void append(self, {{c_type}} x): + + if needs_resize(self.data): + if self.external_view_exists: + raise ValueError("external reference but " + "Vector.resize() needed") + self.resize() + + append_data_{{dtype}}(self.data, x) + + cdef extend(self, const {{c_type}}[:] x): + for i in range(len(x)): + self.append(x[i]) + +{{endfor}} + +cdef class StringVector(Vector): + + cdef: + StringVectorData *data + + def __cinit__(self): + self.data = PyMem_Malloc(sizeof(StringVectorData)) + if not self.data: + raise MemoryError() + self.data.n = 0 + self.data.m = _INIT_VEC_CAP + self.data.data = malloc(self.data.m * sizeof(char *)) + if not self.data.data: + raise MemoryError() + + cdef resize(self): + cdef: + char **orig_data + Py_ssize_t i, m + + m = self.data.m + self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) + + orig_data = self.data.data + self.data.data = malloc(self.data.m * sizeof(char *)) + if not self.data.data: + raise MemoryError() + for i in range(m): + self.data.data[i] = orig_data[i] + + def __dealloc__(self): + if self.data is not NULL: + if self.data.data is not NULL: + free(self.data.data) + PyMem_Free(self.data) + self.data = NULL + + def __len__(self) -> int: + return self.data.n + + cpdef ndarray[object, ndim=1] to_array(self): + cdef: + ndarray ao + Py_ssize_t n + object val + + ao = np.empty(self.data.n, dtype=object) + for i in range(self.data.n): + val = self.data.data[i] + ao[i] = val + self.external_view_exists = True + self.data.m = self.data.n + return ao + + cdef inline void append(self, char *x): + + if needs_resize(self.data): + self.resize() + + append_data_string(self.data, x) + + cdef extend(self, ndarray[object] x): + for i in range(len(x)): + self.append(x[i]) + + +cdef class ObjectVector(Vector): + + cdef: + PyObject **data + Py_ssize_t n, m + ndarray ao + + def __cinit__(self): + self.n = 0 + self.m = _INIT_VEC_CAP + self.ao = np.empty(_INIT_VEC_CAP, dtype=object) + self.data = self.ao.data + + def __len__(self) -> int: + return self.n + + cdef inline append(self, object obj): + if self.n == self.m: + if self.external_view_exists: + raise ValueError("external reference but " + "Vector.resize() needed") + self.m = max(self.m * 2, _INIT_VEC_CAP) + self.ao.resize(self.m, refcheck=False) + self.data = self.ao.data + + Py_INCREF(obj) + self.data[self.n] = obj + self.n += 1 + + cpdef ndarray[object, ndim=1] to_array(self): + if self.m != self.n: + if self.external_view_exists: + raise ValueError("should have raised on append()") + self.ao.resize(self.n, refcheck=False) + self.m = self.n + self.external_view_exists = True + return self.ao + + cdef extend(self, ndarray[object] x): + for i in range(len(x)): + self.append(x[i]) + +# ---------------------------------------------------------------------- +# HashTable +# ---------------------------------------------------------------------- + + +cdef class HashTable: + + pass + +{{py: + +# name, dtype, c_type, to_c_type +dtypes = [('Complex128', 'complex128', 'khcomplex128_t', 'to_khcomplex128_t'), + ('Float64', 'float64', 'float64_t', ''), + ('UInt64', 'uint64', 'uint64_t', ''), + ('Int64', 'int64', 'int64_t', ''), + ('Complex64', 'complex64', 'khcomplex64_t', 'to_khcomplex64_t'), + ('Float32', 'float32', 'float32_t', ''), + ('UInt32', 'uint32', 'uint32_t', ''), + ('Int32', 'int32', 'int32_t', ''), + ('UInt16', 'uint16', 'uint16_t', ''), + ('Int16', 'int16', 'int16_t', ''), + ('UInt8', 'uint8', 'uint8_t', ''), + ('Int8', 'int8', 'int8_t', '')] + +}} + + +{{for name, dtype, c_type, to_c_type in dtypes}} + +cdef class {{name}}HashTable(HashTable): + + def __cinit__(self, int64_t size_hint=1): + self.table = kh_init_{{dtype}}() + size_hint = min(kh_needed_n_buckets(size_hint), SIZE_HINT_LIMIT) + kh_resize_{{dtype}}(self.table, size_hint) + + def __len__(self) -> int: + return self.table.size + + def __dealloc__(self): + if self.table is not NULL: + kh_destroy_{{dtype}}(self.table) + self.table = NULL + + def __contains__(self, object key) -> bool: + cdef: + khiter_t k + {{c_type}} ckey + ckey = {{to_c_type}}(key) + k = kh_get_{{dtype}}(self.table, ckey) + return k != self.table.n_buckets + + def sizeof(self, deep: bool = False) -> int: + """ return the size of my table in bytes """ + overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) + for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) + for_pairs = self.table.n_buckets * (sizeof({{dtype}}_t) + # keys + sizeof(Py_ssize_t)) # vals + return overhead + for_flags + for_pairs + + def get_state(self) -> dict[str, int]: + """ returns infos about the state of the hashtable""" + return { + 'n_buckets' : self.table.n_buckets, + 'size' : self.table.size, + 'n_occupied' : self.table.n_occupied, + 'upper_bound' : self.table.upper_bound, + } + + cpdef get_item(self, {{dtype}}_t val): + cdef: + khiter_t k + {{c_type}} cval + cval = {{to_c_type}}(val) + k = kh_get_{{dtype}}(self.table, cval) + if k != self.table.n_buckets: + return self.table.vals[k] + else: + raise KeyError(val) + + cpdef set_item(self, {{dtype}}_t key, Py_ssize_t val): + cdef: + khiter_t k + int ret = 0 + {{c_type}} ckey + ckey = {{to_c_type}}(key) + k = kh_put_{{dtype}}(self.table, ckey, &ret) + if kh_exist_{{dtype}}(self.table, k): + self.table.vals[k] = val + else: + raise KeyError(key) + + @cython.boundscheck(False) + def map(self, const {{dtype}}_t[:] keys, const int64_t[:] values) -> None: + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + {{c_type}} key + khiter_t k + + with nogil: + for i in range(n): + key = {{to_c_type}}(keys[i]) + k = kh_put_{{dtype}}(self.table, key, &ret) + self.table.vals[k] = values[i] + + @cython.boundscheck(False) + def map_locations(self, const {{dtype}}_t[:] values) -> None: + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + {{c_type}} val + khiter_t k + + with nogil: + for i in range(n): + val= {{to_c_type}}(values[i]) + k = kh_put_{{dtype}}(self.table, val, &ret) + self.table.vals[k] = i + + @cython.boundscheck(False) + def lookup(self, const {{dtype}}_t[:] values) -> ndarray: + # -> np.ndarray[np.intp] + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + {{c_type}} val + khiter_t k + intp_t[:] locs = np.empty(n, dtype=np.intp) + + with nogil: + for i in range(n): + val = {{to_c_type}}(values[i]) + k = kh_get_{{dtype}}(self.table, val) + if k != self.table.n_buckets: + locs[i] = self.table.vals[k] + else: + locs[i] = -1 + + return np.asarray(locs) + + @cython.boundscheck(False) + @cython.wraparound(False) + def _unique(self, const {{dtype}}_t[:] values, {{name}}Vector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None, bint ignore_na=False, + object mask=None, bint return_inverse=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[{{dtype}}] + Array of values of which unique will be calculated + uniques : {{name}}Vector + Vector into which uniques will be written + count_prior : Py_ssize_t, default 0 + Number of existing entries in uniques + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then + any value "val" satisfying val != val is considered missing. + If na_value is not None, then _additionally_, any value "val" + satisfying val == na_value is considered missing. + ignore_na : bool, default False + Whether NA-values should be ignored for calculating the uniques. If + True, the labels corresponding to missing values will be set to + na_sentinel. + mask : ndarray[bool], optional + If not None, the mask is used as indicator for missing values + (True = missing, False = valid) instead of `na_value` or + condition "val != val". + return_inverse : bool, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + + Returns + ------- + uniques : ndarray[{{dtype}}] + Unique values of input, not sorted + labels : ndarray[intp_t] (if return_inverse=True) + The labels from values to uniques + """ + cdef: + Py_ssize_t i, idx, count = count_prior, n = len(values) + intp_t[:] labels + int ret = 0 + {{c_type}} val, na_value2 + khiter_t k + {{name}}VectorData *ud + bint use_na_value, use_mask + uint8_t[:] mask_values + + if return_inverse: + labels = np.empty(n, dtype=np.intp) + ud = uniques.data + use_na_value = na_value is not None + use_mask = mask is not None + + if use_mask: + mask_values = mask.view("uint8") + + if use_na_value: + # We need this na_value2 because we want to allow users + # to *optionally* specify an NA sentinel *of the correct* type. + # We use None, to make it optional, which requires `object` type + # for the parameter. To please the compiler, we use na_value2, + # which is only used if it's *specified*. + na_value2 = {{to_c_type}}(na_value) + else: + na_value2 = {{to_c_type}}(0) + + with nogil: + for i in range(n): + val = {{to_c_type}}(values[i]) + + if ignore_na and use_mask: + if mask_values[i]: + labels[i] = na_sentinel + continue + elif ignore_na and ( + is_nan_{{c_type}}(val) or + (use_na_value and are_equivalent_{{c_type}}(val, na_value2)) + ): + # if missing values do not count as unique values (i.e. if + # ignore_na is True), skip the hashtable entry for them, + # and replace the corresponding label with na_sentinel + labels[i] = na_sentinel + continue + + k = kh_get_{{dtype}}(self.table, val) + + if k == self.table.n_buckets: + # k hasn't been seen yet + k = kh_put_{{dtype}}(self.table, val, &ret) + + if needs_resize(ud): + with gil: + if uniques.external_view_exists: + raise ValueError("external reference to " + "uniques held, but " + "Vector.resize() needed") + uniques.resize() + append_data_{{dtype}}(ud, val) + if return_inverse: + self.table.vals[k] = count + labels[i] = count + count += 1 + elif return_inverse: + # k falls into a previous bucket + # only relevant in case we need to construct the inverse + idx = self.table.vals[k] + labels[i] = idx + + if return_inverse: + return uniques.to_array(), labels.base # .base -> underlying ndarray + return uniques.to_array() + + def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[{{dtype}}] + Array of values of which unique will be calculated + return_inverse : bool, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + + Returns + ------- + uniques : ndarray[{{dtype}}] + Unique values of input, not sorted + labels : ndarray[intp_t] (if return_inverse) + The labels from values to uniques + """ + uniques = {{name}}Vector() + return self._unique(values, uniques, ignore_na=False, + return_inverse=return_inverse) + + def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel=-1, + object na_value=None, object mask=None): + """ + Calculate unique values and labels (no sorting!) + + Missing values are not included in the "uniques" for this method. + The labels for any missing values will be set to "na_sentinel" + + Parameters + ---------- + values : ndarray[{{dtype}}] + Array of values of which unique will be calculated + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then + any value "val" satisfying val != val is considered missing. + If na_value is not None, then _additionally_, any value "val" + satisfying val == na_value is considered missing. + mask : ndarray[bool], optional + If not None, the mask is used as indicator for missing values + (True = missing, False = valid) instead of `na_value` or + condition "val != val". + + Returns + ------- + uniques : ndarray[{{dtype}}] + Unique values of input, not sorted + labels : ndarray[intp_t] + The labels from values to uniques + """ + uniques_vector = {{name}}Vector() + return self._unique(values, uniques_vector, na_sentinel=na_sentinel, + na_value=na_value, ignore_na=True, mask=mask, + return_inverse=True) + + def get_labels(self, const {{dtype}}_t[:] values, {{name}}Vector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None): + # -> np.ndarray[np.intp] + _, labels = self._unique(values, uniques, count_prior=count_prior, + na_sentinel=na_sentinel, na_value=na_value, + ignore_na=True, return_inverse=True) + return labels + + {{if dtype == 'int64'}} + @cython.boundscheck(False) + def get_labels_groupby( + self, const {{dtype}}_t[:] values + ) -> tuple[ndarray, ndarray]: + # tuple[np.ndarray[np.intp], np.ndarray[{{dtype}}]] + cdef: + Py_ssize_t i, n = len(values) + intp_t[:] labels + Py_ssize_t idx, count = 0 + int ret = 0 + {{c_type}} val + khiter_t k + {{name}}Vector uniques = {{name}}Vector() + {{name}}VectorData *ud + + labels = np.empty(n, dtype=np.intp) + ud = uniques.data + + with nogil: + for i in range(n): + val = {{to_c_type}}(values[i]) + + # specific for groupby + if val < 0: + labels[i] = -1 + continue + + k = kh_get_{{dtype}}(self.table, val) + if k != self.table.n_buckets: + idx = self.table.vals[k] + labels[i] = idx + else: + k = kh_put_{{dtype}}(self.table, val, &ret) + self.table.vals[k] = count + + if needs_resize(ud): + with gil: + uniques.resize() + append_data_{{dtype}}(ud, val) + labels[i] = count + count += 1 + + arr_uniques = uniques.to_array() + + return np.asarray(labels), arr_uniques + {{endif}} + +{{endfor}} + + +cdef class StringHashTable(HashTable): + # these by-definition *must* be strings + # or a sentinel np.nan / None missing value + na_string_sentinel = '__nan__' + + def __init__(self, int64_t size_hint=1): + self.table = kh_init_str() + size_hint = min(kh_needed_n_buckets(size_hint), SIZE_HINT_LIMIT) + kh_resize_str(self.table, size_hint) + + def __dealloc__(self): + if self.table is not NULL: + kh_destroy_str(self.table) + self.table = NULL + + def sizeof(self, deep: bool = False) -> int: + overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) + for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) + for_pairs = self.table.n_buckets * (sizeof(char *) + # keys + sizeof(Py_ssize_t)) # vals + return overhead + for_flags + for_pairs + + def get_state(self) -> dict[str, int]: + """ returns infos about the state of the hashtable""" + return { + 'n_buckets' : self.table.n_buckets, + 'size' : self.table.size, + 'n_occupied' : self.table.n_occupied, + 'upper_bound' : self.table.upper_bound, + } + + cpdef get_item(self, str val): + cdef: + khiter_t k + const char *v + v = get_c_string(val) + + k = kh_get_str(self.table, v) + if k != self.table.n_buckets: + return self.table.vals[k] + else: + raise KeyError(val) + + cpdef set_item(self, str key, Py_ssize_t val): + cdef: + khiter_t k + int ret = 0 + const char *v + + v = get_c_string(key) + + k = kh_put_str(self.table, v, &ret) + if kh_exist_str(self.table, k): + self.table.vals[k] = val + else: + raise KeyError(key) + + @cython.boundscheck(False) + def get_indexer(self, ndarray[object] values) -> ndarray: + # -> np.ndarray[np.intp] + cdef: + Py_ssize_t i, n = len(values) + ndarray[intp_t] labels = np.empty(n, dtype=np.intp) + intp_t *resbuf = labels.data + khiter_t k + kh_str_t *table = self.table + const char *v + const char **vecs + + vecs = malloc(n * sizeof(char *)) + for i in range(n): + val = values[i] + v = get_c_string(val) + vecs[i] = v + + with nogil: + for i in range(n): + k = kh_get_str(table, vecs[i]) + if k != table.n_buckets: + resbuf[i] = table.vals[k] + else: + resbuf[i] = -1 + + free(vecs) + return labels + + @cython.boundscheck(False) + def lookup(self, ndarray[object] values) -> ndarray: + # -> np.ndarray[np.intp] + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + object val + const char *v + khiter_t k + intp_t[:] locs = np.empty(n, dtype=np.intp) + + # these by-definition *must* be strings + vecs = malloc(n * sizeof(char *)) + for i in range(n): + val = values[i] + + if isinstance(val, str): + # GH#31499 if we have a np.str_ get_c_string won't recognize + # it as a str, even though isinstance does. + v = get_c_string(val) + else: + v = get_c_string(self.na_string_sentinel) + vecs[i] = v + + with nogil: + for i in range(n): + v = vecs[i] + k = kh_get_str(self.table, v) + if k != self.table.n_buckets: + locs[i] = self.table.vals[k] + else: + locs[i] = -1 + + free(vecs) + return np.asarray(locs) + + @cython.boundscheck(False) + def map_locations(self, ndarray[object] values) -> None: + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + object val + const char *v + const char **vecs + khiter_t k + + # these by-definition *must* be strings + vecs = malloc(n * sizeof(char *)) + for i in range(n): + val = values[i] + + if isinstance(val, str): + # GH#31499 if we have a np.str_ get_c_string won't recognize + # it as a str, even though isinstance does. + v = get_c_string(val) + else: + v = get_c_string(self.na_string_sentinel) + vecs[i] = v + + with nogil: + for i in range(n): + v = vecs[i] + k = kh_put_str(self.table, v, &ret) + self.table.vals[k] = i + free(vecs) + + @cython.boundscheck(False) + @cython.wraparound(False) + def _unique(self, ndarray[object] values, ObjectVector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None, bint ignore_na=False, + bint return_inverse=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + uniques : ObjectVector + Vector into which uniques will be written + count_prior : Py_ssize_t, default 0 + Number of existing entries in uniques + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then any value + that is not a string is considered missing. If na_value is + not None, then _additionally_ any value "val" satisfying + val == na_value is considered missing. + ignore_na : bool, default False + Whether NA-values should be ignored for calculating the uniques. If + True, the labels corresponding to missing values will be set to + na_sentinel. + return_inverse : bool, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[intp_t] (if return_inverse=True) + The labels from values to uniques + """ + cdef: + Py_ssize_t i, idx, count = count_prior, n = len(values) + intp_t[:] labels + int64_t[:] uindexer + int ret = 0 + object val + const char *v + const char **vecs + khiter_t k + bint use_na_value + + if return_inverse: + labels = np.zeros(n, dtype=np.intp) + uindexer = np.empty(n, dtype=np.int64) + use_na_value = na_value is not None + + # assign pointers and pre-filter out missing (if ignore_na) + vecs = malloc(n * sizeof(char *)) + for i in range(n): + val = values[i] + + if (ignore_na + and (not isinstance(val, str) + or (use_na_value and val == na_value))): + # if missing values do not count as unique values (i.e. if + # ignore_na is True), we can skip the actual value, and + # replace the label with na_sentinel directly + labels[i] = na_sentinel + else: + # if ignore_na is False, we also stringify NaN/None/etc. + try: + v = get_c_string(val) + except UnicodeEncodeError: + v = get_c_string(repr(val)) + vecs[i] = v + + # compute + with nogil: + for i in range(n): + if ignore_na and labels[i] == na_sentinel: + # skip entries for ignored missing values (see above) + continue + + v = vecs[i] + k = kh_get_str(self.table, v) + if k == self.table.n_buckets: + # k hasn't been seen yet + k = kh_put_str(self.table, v, &ret) + uindexer[count] = i + if return_inverse: + self.table.vals[k] = count + labels[i] = count + count += 1 + elif return_inverse: + # k falls into a previous bucket + # only relevant in case we need to construct the inverse + idx = self.table.vals[k] + labels[i] = idx + + free(vecs) + + # uniques + for i in range(count): + uniques.append(values[uindexer[i]]) + + if return_inverse: + return uniques.to_array(), labels.base # .base -> underlying ndarray + return uniques.to_array() + + def unique(self, ndarray[object] values, bint return_inverse=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + return_inverse : bool, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[intp_t] (if return_inverse) + The labels from values to uniques + """ + uniques = ObjectVector() + return self._unique(values, uniques, ignore_na=False, + return_inverse=return_inverse) + + def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1, + object na_value=None, object mask=None): + """ + Calculate unique values and labels (no sorting!) + + Missing values are not included in the "uniques" for this method. + The labels for any missing values will be set to "na_sentinel" + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then any value + that is not a string is considered missing. If na_value is + not None, then _additionally_ any value "val" satisfying + val == na_value is considered missing. + mask : ndarray[bool], optional + Not yet implemented for StringHashTable. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[intp] + The labels from values to uniques + """ + uniques_vector = ObjectVector() + return self._unique(values, uniques_vector, na_sentinel=na_sentinel, + na_value=na_value, ignore_na=True, + return_inverse=True) + + def get_labels(self, ndarray[object] values, ObjectVector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None): + # -> np.ndarray[np.intp] + _, labels = self._unique(values, uniques, count_prior=count_prior, + na_sentinel=na_sentinel, na_value=na_value, + ignore_na=True, return_inverse=True) + return labels + + +cdef class PyObjectHashTable(HashTable): + + def __init__(self, int64_t size_hint=1): + self.table = kh_init_pymap() + size_hint = min(kh_needed_n_buckets(size_hint), SIZE_HINT_LIMIT) + kh_resize_pymap(self.table, size_hint) + + def __dealloc__(self): + if self.table is not NULL: + kh_destroy_pymap(self.table) + self.table = NULL + + def __len__(self) -> int: + return self.table.size + + def __contains__(self, object key) -> bool: + cdef: + khiter_t k + hash(key) + + k = kh_get_pymap(self.table, key) + return k != self.table.n_buckets + + def sizeof(self, deep: bool = False) -> int: + """ return the size of my table in bytes """ + overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) + for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) + for_pairs = self.table.n_buckets * (sizeof(PyObject *) + # keys + sizeof(Py_ssize_t)) # vals + return overhead + for_flags + for_pairs + + def get_state(self) -> dict[str, int]: + """ + returns infos about the current state of the hashtable like size, + number of buckets and so on. + """ + return { + 'n_buckets' : self.table.n_buckets, + 'size' : self.table.size, + 'n_occupied' : self.table.n_occupied, + 'upper_bound' : self.table.upper_bound, + } + + cpdef get_item(self, object val): + cdef: + khiter_t k + + k = kh_get_pymap(self.table, val) + if k != self.table.n_buckets: + return self.table.vals[k] + else: + raise KeyError(val) + + cpdef set_item(self, object key, Py_ssize_t val): + cdef: + khiter_t k + int ret = 0 + char* buf + + hash(key) + + k = kh_put_pymap(self.table, key, &ret) + if kh_exist_pymap(self.table, k): + self.table.vals[k] = val + else: + raise KeyError(key) + + def map_locations(self, ndarray[object] values) -> None: + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + object val + khiter_t k + + for i in range(n): + val = values[i] + hash(val) + + k = kh_put_pymap(self.table, val, &ret) + self.table.vals[k] = i + + def lookup(self, ndarray[object] values) -> ndarray: + # -> np.ndarray[np.intp] + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + object val + khiter_t k + intp_t[:] locs = np.empty(n, dtype=np.intp) + + for i in range(n): + val = values[i] + hash(val) + + k = kh_get_pymap(self.table, val) + if k != self.table.n_buckets: + locs[i] = self.table.vals[k] + else: + locs[i] = -1 + + return np.asarray(locs) + + @cython.boundscheck(False) + @cython.wraparound(False) + def _unique(self, ndarray[object] values, ObjectVector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None, bint ignore_na=False, + bint return_inverse=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + uniques : ObjectVector + Vector into which uniques will be written + count_prior : Py_ssize_t, default 0 + Number of existing entries in uniques + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then None _plus_ + any value "val" satisfying val != val is considered missing. + If na_value is not None, then _additionally_, any value "val" + satisfying val == na_value is considered missing. + ignore_na : bool, default False + Whether NA-values should be ignored for calculating the uniques. If + True, the labels corresponding to missing values will be set to + na_sentinel. + return_inverse : bool, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[intp_t] (if return_inverse=True) + The labels from values to uniques + """ + cdef: + Py_ssize_t i, idx, count = count_prior, n = len(values) + intp_t[:] labels + int ret = 0 + object val + khiter_t k + bint use_na_value + + if return_inverse: + labels = np.empty(n, dtype=np.intp) + use_na_value = na_value is not None + + for i in range(n): + val = values[i] + hash(val) + + if ignore_na and ( + checknull(val) + or (use_na_value and val == na_value) + ): + # if missing values do not count as unique values (i.e. if + # ignore_na is True), skip the hashtable entry for them, and + # replace the corresponding label with na_sentinel + labels[i] = na_sentinel + continue + + k = kh_get_pymap(self.table, val) + if k == self.table.n_buckets: + # k hasn't been seen yet + k = kh_put_pymap(self.table, val, &ret) + uniques.append(val) + if return_inverse: + self.table.vals[k] = count + labels[i] = count + count += 1 + elif return_inverse: + # k falls into a previous bucket + # only relevant in case we need to construct the inverse + idx = self.table.vals[k] + labels[i] = idx + + if return_inverse: + return uniques.to_array(), labels.base # .base -> underlying ndarray + return uniques.to_array() + + def unique(self, ndarray[object] values, bint return_inverse=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + return_inverse : bool, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[intp_t] (if return_inverse) + The labels from values to uniques + """ + uniques = ObjectVector() + return self._unique(values, uniques, ignore_na=False, + return_inverse=return_inverse) + + def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1, + object na_value=None, object mask=None): + """ + Calculate unique values and labels (no sorting!) + + Missing values are not included in the "uniques" for this method. + The labels for any missing values will be set to "na_sentinel" + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then None _plus_ + any value "val" satisfying val != val is considered missing. + If na_value is not None, then _additionally_, any value "val" + satisfying val == na_value is considered missing. + mask : ndarray[bool], optional + Not yet implemented for PyObjectHashTable. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[intp_t] + The labels from values to uniques + """ + uniques_vector = ObjectVector() + return self._unique(values, uniques_vector, na_sentinel=na_sentinel, + na_value=na_value, ignore_na=True, + return_inverse=True) + + def get_labels(self, ndarray[object] values, ObjectVector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None): + # -> np.ndarray[np.intp] + _, labels = self._unique(values, uniques, count_prior=count_prior, + na_sentinel=na_sentinel, na_value=na_value, + ignore_na=True, return_inverse=True) + return labels diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/hashtable_func_helper.pxi.in b/.local/lib/python3.8/site-packages/pandas/_libs/hashtable_func_helper.pxi.in new file mode 100644 index 0000000..e5e64f8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/hashtable_func_helper.pxi.in @@ -0,0 +1,515 @@ +""" +Template for each `dtype` helper function for hashtable + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +{{py: + +# name, dtype, ttype, c_type, to_c_type +dtypes = [('Complex128', 'complex128', 'complex128', + 'khcomplex128_t', 'to_khcomplex128_t'), + ('Complex64', 'complex64', 'complex64', + 'khcomplex64_t', 'to_khcomplex64_t'), + ('Float64', 'float64', 'float64', 'float64_t', ''), + ('Float32', 'float32', 'float32', 'float32_t', ''), + ('UInt64', 'uint64', 'uint64', 'uint64_t', ''), + ('UInt32', 'uint32', 'uint32', 'uint32_t', ''), + ('UInt16', 'uint16', 'uint16', 'uint16_t', ''), + ('UInt8', 'uint8', 'uint8', 'uint8_t', ''), + ('Object', 'object', 'pymap', 'object', ''), + ('Int64', 'int64', 'int64', 'int64_t', ''), + ('Int32', 'int32', 'int32', 'int32_t', ''), + ('Int16', 'int16', 'int16', 'int16_t', ''), + ('Int8', 'int8', 'int8', 'int8_t', '')] + +}} + +{{for name, dtype, ttype, c_type, to_c_type in dtypes}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if dtype == 'object'}} +cdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna): +{{else}} +cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna): +{{endif}} + cdef: + Py_ssize_t i = 0 + Py_ssize_t n = len(values) + kh_{{ttype}}_t *table + + # Don't use Py_ssize_t, since table.n_buckets is unsigned + khiter_t k + + {{c_type}} val + + int ret = 0 + + # we track the order in which keys are first seen (GH39009), + # khash-map isn't insertion-ordered, thus: + # table maps keys to counts + # result_keys remembers the original order of keys + + result_keys = {{name}}Vector() + table = kh_init_{{ttype}}() + + {{if dtype == 'object'}} + kh_resize_{{ttype}}(table, n // 10) + + for i in range(n): + val = values[i] + if not dropna or not checknull(val): + k = kh_get_{{ttype}}(table, val) + if k != table.n_buckets: + table.vals[k] += 1 + else: + k = kh_put_{{ttype}}(table, val, &ret) + table.vals[k] = 1 + result_keys.append(val) + {{else}} + kh_resize_{{ttype}}(table, n) + + for i in range(n): + val = {{to_c_type}}(values[i]) + + if not is_nan_{{c_type}}(val) or not dropna: + k = kh_get_{{ttype}}(table, val) + if k != table.n_buckets: + table.vals[k] += 1 + else: + k = kh_put_{{ttype}}(table, val, &ret) + table.vals[k] = 1 + result_keys.append(val) + {{endif}} + + # collect counts in the order corresponding to result_keys: + cdef int64_t[:] result_counts = np.empty(table.size, dtype=np.int64) + for i in range(table.size): + {{if dtype == 'object'}} + k = kh_get_{{ttype}}(table, result_keys.data[i]) + {{else}} + k = kh_get_{{ttype}}(table, result_keys.data.data[i]) + {{endif}} + result_counts[i] = table.vals[k] + + kh_destroy_{{ttype}}(table) + + return result_keys.to_array(), result_counts.base + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if dtype == 'object'}} +cdef duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'): +{{else}} +cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first'): +{{endif}} + cdef: + int ret = 0 + {{if dtype != 'object'}} + {{c_type}} value + {{endif}} + Py_ssize_t i, n = len(values) + khiter_t k + kh_{{ttype}}_t *table = kh_init_{{ttype}}() + ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool') + + kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT)) + + if keep not in ('last', 'first', False): + raise ValueError('keep must be either "first", "last" or False') + + if keep == 'last': + {{if dtype == 'object'}} + for i in range(n - 1, -1, -1): + # equivalent: range(n)[::-1], which cython doesn't like in nogil + kh_put_{{ttype}}(table, values[i], &ret) + out[i] = ret == 0 + {{else}} + with nogil: + for i in range(n - 1, -1, -1): + # equivalent: range(n)[::-1], which cython doesn't like in nogil + value = {{to_c_type}}(values[i]) + kh_put_{{ttype}}(table, value, &ret) + out[i] = ret == 0 + {{endif}} + elif keep == 'first': + {{if dtype == 'object'}} + for i in range(n): + kh_put_{{ttype}}(table, values[i], &ret) + out[i] = ret == 0 + {{else}} + with nogil: + for i in range(n): + value = {{to_c_type}}(values[i]) + kh_put_{{ttype}}(table, value, &ret) + out[i] = ret == 0 + {{endif}} + else: + {{if dtype == 'object'}} + for i in range(n): + value = values[i] + k = kh_get_{{ttype}}(table, value) + if k != table.n_buckets: + out[table.vals[k]] = 1 + out[i] = 1 + else: + k = kh_put_{{ttype}}(table, value, &ret) + table.vals[k] = i + out[i] = 0 + {{else}} + with nogil: + for i in range(n): + value = {{to_c_type}}(values[i]) + k = kh_get_{{ttype}}(table, value) + if k != table.n_buckets: + out[table.vals[k]] = 1 + out[i] = 1 + else: + k = kh_put_{{ttype}}(table, value, &ret) + table.vals[k] = i + out[i] = 0 + {{endif}} + kh_destroy_{{ttype}}(table) + return out + + +# ---------------------------------------------------------------------- +# Membership +# ---------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if dtype == 'object'}} +cdef ismember_{{dtype}}(ndarray[{{c_type}}] arr, ndarray[{{c_type}}] values): +{{else}} +cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values): +{{endif}} + """ + Return boolean of values in arr on an + element by-element basis + + Parameters + ---------- + arr : {{dtype}} ndarray + values : {{dtype}} ndarray + + Returns + ------- + boolean ndarry len of (arr) + """ + cdef: + Py_ssize_t i, n + khiter_t k + int ret = 0 + ndarray[uint8_t] result + {{c_type}} val + kh_{{ttype}}_t *table = kh_init_{{ttype}}() + + # construct the table + n = len(values) + kh_resize_{{ttype}}(table, n) + + {{if dtype == 'object'}} + for i in range(n): + kh_put_{{ttype}}(table, values[i], &ret) + {{else}} + with nogil: + for i in range(n): + val = {{to_c_type}}(values[i]) + kh_put_{{ttype}}(table, val, &ret) + {{endif}} + + # test membership + n = len(arr) + result = np.empty(n, dtype=np.uint8) + + {{if dtype == 'object'}} + for i in range(n): + val = arr[i] + k = kh_get_{{ttype}}(table, val) + result[i] = (k != table.n_buckets) + {{else}} + with nogil: + for i in range(n): + val = {{to_c_type}}(arr[i]) + k = kh_get_{{ttype}}(table, val) + result[i] = (k != table.n_buckets) + {{endif}} + + kh_destroy_{{ttype}}(table) + return result.view(np.bool_) + +# ---------------------------------------------------------------------- +# Mode Computations +# ---------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if dtype == 'object'}} +cdef mode_{{dtype}}(ndarray[{{dtype}}] values, bint dropna): +{{else}} +cdef mode_{{dtype}}(const {{dtype}}_t[:] values, bint dropna): +{{endif}} + cdef: + {{if dtype == 'object'}} + ndarray[{{dtype}}] keys + ndarray[{{dtype}}] modes + {{else}} + {{dtype}}_t[:] keys + ndarray[{{dtype}}_t] modes + {{endif}} + int64_t[:] counts + int64_t count, max_count = -1 + Py_ssize_t k, j = 0 + + keys, counts = value_count_{{dtype}}(values, dropna) + + {{if dtype == 'object'}} + modes = np.empty(len(keys), dtype=np.object_) + {{else}} + modes = np.empty(len(keys), dtype=np.{{dtype}}) + {{endif}} + + {{if dtype != 'object'}} + with nogil: + for k in range(len(keys)): + count = counts[k] + if count == max_count: + j += 1 + elif count > max_count: + max_count = count + j = 0 + else: + continue + + modes[j] = keys[k] + {{else}} + for k in range(len(keys)): + count = counts[k] + if count == max_count: + j += 1 + elif count > max_count: + max_count = count + j = 0 + else: + continue + + modes[j] = keys[k] + {{endif}} + + return modes[:j + 1] + +{{endfor}} + + +ctypedef fused htfunc_t: + complex128_t + complex64_t + float64_t + float32_t + uint64_t + uint32_t + uint16_t + uint8_t + int64_t + int32_t + int16_t + int8_t + object + + +cpdef value_count(ndarray[htfunc_t] values, bint dropna): + if htfunc_t is object: + return value_count_object(values, dropna) + + elif htfunc_t is int8_t: + return value_count_int8(values, dropna) + elif htfunc_t is int16_t: + return value_count_int16(values, dropna) + elif htfunc_t is int32_t: + return value_count_int32(values, dropna) + elif htfunc_t is int64_t: + return value_count_int64(values, dropna) + + elif htfunc_t is uint8_t: + return value_count_uint8(values, dropna) + elif htfunc_t is uint16_t: + return value_count_uint16(values, dropna) + elif htfunc_t is uint32_t: + return value_count_uint32(values, dropna) + elif htfunc_t is uint64_t: + return value_count_uint64(values, dropna) + + elif htfunc_t is float64_t: + return value_count_float64(values, dropna) + elif htfunc_t is float32_t: + return value_count_float32(values, dropna) + + elif htfunc_t is complex128_t: + return value_count_complex128(values, dropna) + elif htfunc_t is complex64_t: + return value_count_complex64(values, dropna) + + else: + raise TypeError(values.dtype) + + +cpdef duplicated(ndarray[htfunc_t] values, object keep="first"): + if htfunc_t is object: + return duplicated_object(values, keep) + + elif htfunc_t is int8_t: + return duplicated_int8(values, keep) + elif htfunc_t is int16_t: + return duplicated_int16(values, keep) + elif htfunc_t is int32_t: + return duplicated_int32(values, keep) + elif htfunc_t is int64_t: + return duplicated_int64(values, keep) + + elif htfunc_t is uint8_t: + return duplicated_uint8(values, keep) + elif htfunc_t is uint16_t: + return duplicated_uint16(values, keep) + elif htfunc_t is uint32_t: + return duplicated_uint32(values, keep) + elif htfunc_t is uint64_t: + return duplicated_uint64(values, keep) + + elif htfunc_t is float64_t: + return duplicated_float64(values, keep) + elif htfunc_t is float32_t: + return duplicated_float32(values, keep) + + elif htfunc_t is complex128_t: + return duplicated_complex128(values, keep) + elif htfunc_t is complex64_t: + return duplicated_complex64(values, keep) + + else: + raise TypeError(values.dtype) + + +cpdef ismember(ndarray[htfunc_t] arr, ndarray[htfunc_t] values): + if htfunc_t is object: + return ismember_object(arr, values) + + elif htfunc_t is int8_t: + return ismember_int8(arr, values) + elif htfunc_t is int16_t: + return ismember_int16(arr, values) + elif htfunc_t is int32_t: + return ismember_int32(arr, values) + elif htfunc_t is int64_t: + return ismember_int64(arr, values) + + elif htfunc_t is uint8_t: + return ismember_uint8(arr, values) + elif htfunc_t is uint16_t: + return ismember_uint16(arr, values) + elif htfunc_t is uint32_t: + return ismember_uint32(arr, values) + elif htfunc_t is uint64_t: + return ismember_uint64(arr, values) + + elif htfunc_t is float64_t: + return ismember_float64(arr, values) + elif htfunc_t is float32_t: + return ismember_float32(arr, values) + + elif htfunc_t is complex128_t: + return ismember_complex128(arr, values) + elif htfunc_t is complex64_t: + return ismember_complex64(arr, values) + + else: + raise TypeError(values.dtype) + + +cpdef mode(ndarray[htfunc_t] values, bint dropna): + if htfunc_t is object: + return mode_object(values, dropna) + + elif htfunc_t is int8_t: + return mode_int8(values, dropna) + elif htfunc_t is int16_t: + return mode_int16(values, dropna) + elif htfunc_t is int32_t: + return mode_int32(values, dropna) + elif htfunc_t is int64_t: + return mode_int64(values, dropna) + + elif htfunc_t is uint8_t: + return mode_uint8(values, dropna) + elif htfunc_t is uint16_t: + return mode_uint16(values, dropna) + elif htfunc_t is uint32_t: + return mode_uint32(values, dropna) + elif htfunc_t is uint64_t: + return mode_uint64(values, dropna) + + elif htfunc_t is float64_t: + return mode_float64(values, dropna) + elif htfunc_t is float32_t: + return mode_float32(values, dropna) + + elif htfunc_t is complex128_t: + return mode_complex128(values, dropna) + elif htfunc_t is complex64_t: + return mode_complex64(values, dropna) + + else: + raise TypeError(values.dtype) + + +{{py: + +# name, dtype, ttype, c_type +dtypes = [('Int64', 'int64', 'int64', 'int64_t'), + ('Int32', 'int32', 'int32', 'int32_t'), ] + +}} + +{{for name, dtype, ttype, c_type in dtypes}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray: + """ + Indices of the first occurrences of the unique labels + *excluding* -1. equivalent to: + np.unique(labels, return_index=True)[1] + """ + cdef: + int ret = 0 + Py_ssize_t i, n = len(labels) + kh_{{ttype}}_t *table = kh_init_{{ttype}}() + {{name}}Vector idx = {{name}}Vector() + ndarray[{{c_type}}, ndim=1] arr + {{name}}VectorData *ud = idx.data + + kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT)) + + with nogil: + for i in range(n): + kh_put_{{ttype}}(table, labels[i], &ret) + if ret != 0: + if needs_resize(ud): + with gil: + idx.resize() + append_data_{{ttype}}(ud, i) + + kh_destroy_{{ttype}}(table) + + arr = idx.to_array() + arr = arr[np.asarray(labels)[arr].argsort()] + + return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr + +{{endfor}} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/index.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/index.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..8dae6d3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/index.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/index.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/index.pyi new file mode 100644 index 0000000..446a980 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/index.pyi @@ -0,0 +1,65 @@ +import numpy as np + +from pandas._typing import npt + +from pandas import MultiIndex + +class IndexEngine: + over_size_threshold: bool + def __init__(self, values: np.ndarray): ... + def __contains__(self, val: object) -> bool: ... + # -> int | slice | np.ndarray[bool] + def get_loc(self, val: object) -> int | slice | np.ndarray: ... + def sizeof(self, deep: bool = ...) -> int: ... + def __sizeof__(self) -> int: ... + @property + def is_unique(self) -> bool: ... + @property + def is_monotonic_increasing(self) -> bool: ... + @property + def is_monotonic_decreasing(self) -> bool: ... + @property + def is_mapping_populated(self) -> bool: ... + def clear_mapping(self): ... + def get_indexer(self, values: np.ndarray) -> npt.NDArray[np.intp]: ... + def get_indexer_non_unique( + self, + targets: np.ndarray, + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... + +class Float64Engine(IndexEngine): ... +class Float32Engine(IndexEngine): ... +class Int64Engine(IndexEngine): ... +class Int32Engine(IndexEngine): ... +class Int16Engine(IndexEngine): ... +class Int8Engine(IndexEngine): ... +class UInt64Engine(IndexEngine): ... +class UInt32Engine(IndexEngine): ... +class UInt16Engine(IndexEngine): ... +class UInt8Engine(IndexEngine): ... +class ObjectEngine(IndexEngine): ... +class DatetimeEngine(Int64Engine): ... +class TimedeltaEngine(DatetimeEngine): ... +class PeriodEngine(Int64Engine): ... + +class BaseMultiIndexCodesEngine: + levels: list[np.ndarray] + offsets: np.ndarray # ndarray[uint64_t, ndim=1] + def __init__( + self, + levels: list[np.ndarray], # all entries hashable + labels: list[np.ndarray], # all entries integer-dtyped + offsets: np.ndarray, # np.ndarray[np.uint64, ndim=1] + ): ... + def get_indexer( + self, + target: npt.NDArray[np.object_], + ) -> npt.NDArray[np.intp]: ... + def _extract_level_codes(self, target: MultiIndex) -> np.ndarray: ... + def get_indexer_with_fill( + self, + target: np.ndarray, # np.ndarray[object] of tuples + values: np.ndarray, # np.ndarray[object] of tuples + method: str, + limit: int | None, + ) -> npt.NDArray[np.intp]: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/index.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/index.pyx new file mode 100644 index 0000000..c3b8616 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/index.pyx @@ -0,0 +1,799 @@ +cimport cython + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + intp_t, + ndarray, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + +cnp.import_array() + + +from pandas._libs cimport util +from pandas._libs.hashtable cimport HashTable +from pandas._libs.tslibs.nattype cimport c_NaT as NaT +from pandas._libs.tslibs.period cimport is_period_object +from pandas._libs.tslibs.timedeltas cimport _Timedelta +from pandas._libs.tslibs.timestamps cimport _Timestamp + +from pandas._libs import ( + algos, + hashtable as _hash, +) + +from pandas._libs.lib cimport eq_NA_compat +from pandas._libs.missing cimport ( + C_NA as NA, + checknull, + is_matching_na, +) + + +cdef inline bint is_definitely_invalid_key(object val): + try: + hash(val) + except TypeError: + return True + return False + + +cdef ndarray _get_bool_indexer(ndarray values, object val): + """ + Return a ndarray[bool] of locations where val matches self.values. + + If val is not NA, this is equivalent to `self.values == val` + """ + # Caller is responsible for ensuring _check_type has already been called + cdef: + ndarray[uint8_t, ndim=1, cast=True] indexer + Py_ssize_t i + object item + + if values.descr.type_num == cnp.NPY_OBJECT: + # i.e. values.dtype == object + if not checknull(val): + indexer = eq_NA_compat(values, val) + + else: + # We need to check for _matching_ NA values + indexer = np.empty(len(values), dtype=np.uint8) + + for i in range(len(values)): + item = values[i] + indexer[i] = is_matching_na(item, val) + + else: + if util.is_nan(val): + indexer = np.isnan(values) + else: + indexer = values == val + + return indexer.view(bool) + + +# Don't populate hash tables in monotonic indexes larger than this +_SIZE_CUTOFF = 1_000_000 + + +cdef _unpack_bool_indexer(ndarray[uint8_t, ndim=1, cast=True] indexer, object val): + """ + Possibly unpack a boolean mask to a single indexer. + """ + # Returns ndarray[bool] or int + cdef: + ndarray[intp_t, ndim=1] found + int count + + found = np.where(indexer)[0] + count = len(found) + + if count > 1: + return indexer + if count == 1: + return int(found[0]) + + raise KeyError(val) + + +@cython.freelist(32) +cdef class IndexEngine: + + cdef readonly: + ndarray values + HashTable mapping + bint over_size_threshold + + cdef: + bint unique, monotonic_inc, monotonic_dec + bint need_monotonic_check, need_unique_check + object _np_type + + def __init__(self, ndarray values): + self.values = values + + self.over_size_threshold = len(values) >= _SIZE_CUTOFF + self.clear_mapping() + self._np_type = values.dtype.type + + def __contains__(self, val: object) -> bool: + # We assume before we get here: + # - val is hashable + self._ensure_mapping_populated() + return val in self.mapping + + cpdef get_loc(self, object val): + # -> Py_ssize_t | slice | ndarray[bool] + cdef: + Py_ssize_t loc + + if is_definitely_invalid_key(val): + raise TypeError(f"'{val}' is an invalid key") + + self._check_type(val) + + if self.over_size_threshold and self.is_monotonic_increasing: + if not self.is_unique: + return self._get_loc_duplicates(val) + values = self.values + + loc = self._searchsorted_left(val) + if loc >= len(values): + raise KeyError(val) + if values[loc] != val: + raise KeyError(val) + return loc + + self._ensure_mapping_populated() + if not self.unique: + return self._get_loc_duplicates(val) + + try: + return self.mapping.get_item(val) + except OverflowError as err: + # GH#41775 OverflowError e.g. if we are uint64 and val is -1 + # or if we are int64 and value is np.iinfo(np.int64).max+1 + # (the uint64 with -1 case should actually be excluded by _check_type) + raise KeyError(val) from err + + cdef Py_ssize_t _searchsorted_left(self, val) except? -1: + """ + See ObjectEngine._searchsorted_left.__doc__. + """ + # Caller is responsible for ensuring _check_type has already been called + loc = self.values.searchsorted(self._np_type(val), side="left") + return loc + + cdef inline _get_loc_duplicates(self, object val): + # -> Py_ssize_t | slice | ndarray[bool] + cdef: + Py_ssize_t diff, left, right + + if self.is_monotonic_increasing: + values = self.values + try: + left = values.searchsorted(val, side='left') + right = values.searchsorted(val, side='right') + except TypeError: + # e.g. GH#29189 get_loc(None) with a Float64Index + # 2021-09-29 Now only reached for object-dtype + raise KeyError(val) + + diff = right - left + if diff == 0: + raise KeyError(val) + elif diff == 1: + return left + else: + return slice(left, right) + + return self._maybe_get_bool_indexer(val) + + cdef _maybe_get_bool_indexer(self, object val): + # Returns ndarray[bool] or int + cdef: + ndarray[uint8_t, ndim=1, cast=True] indexer + + indexer = _get_bool_indexer(self.values, val) + return _unpack_bool_indexer(indexer, val) + + def sizeof(self, deep: bool = False) -> int: + """ return the sizeof our mapping """ + if not self.is_mapping_populated: + return 0 + return self.mapping.sizeof(deep=deep) + + def __sizeof__(self) -> int: + return self.sizeof() + + @property + def is_unique(self) -> bool: + if self.need_unique_check: + self._do_unique_check() + + return self.unique == 1 + + cdef inline _do_unique_check(self): + + # this de-facto the same + self._ensure_mapping_populated() + + @property + def is_monotonic_increasing(self) -> bool: + if self.need_monotonic_check: + self._do_monotonic_check() + + return self.monotonic_inc == 1 + + @property + def is_monotonic_decreasing(self) -> bool: + if self.need_monotonic_check: + self._do_monotonic_check() + + return self.monotonic_dec == 1 + + cdef inline _do_monotonic_check(self): + cdef: + bint is_unique + try: + values = self.values + self.monotonic_inc, self.monotonic_dec, is_unique = \ + self._call_monotonic(values) + except TypeError: + self.monotonic_inc = 0 + self.monotonic_dec = 0 + is_unique = 0 + + self.need_monotonic_check = 0 + + # we can only be sure of uniqueness if is_unique=1 + if is_unique: + self.unique = 1 + self.need_unique_check = 0 + + cdef _call_monotonic(self, values): + return algos.is_monotonic(values, timelike=False) + + cdef _make_hash_table(self, Py_ssize_t n): + raise NotImplementedError # pragma: no cover + + cdef _check_type(self, object val): + hash(val) + + @property + def is_mapping_populated(self) -> bool: + return self.mapping is not None + + cdef inline _ensure_mapping_populated(self): + # this populates the mapping + # if its not already populated + # also satisfies the need_unique_check + + if not self.is_mapping_populated: + + values = self.values + self.mapping = self._make_hash_table(len(values)) + self.mapping.map_locations(values) + + if len(self.mapping) == len(values): + self.unique = 1 + + self.need_unique_check = 0 + + def clear_mapping(self): + self.mapping = None + self.need_monotonic_check = 1 + self.need_unique_check = 1 + + self.unique = 0 + self.monotonic_inc = 0 + self.monotonic_dec = 0 + + def get_indexer(self, ndarray values) -> np.ndarray: + self._ensure_mapping_populated() + return self.mapping.lookup(values) + + def get_indexer_non_unique(self, ndarray targets): + """ + Return an indexer suitable for taking from a non unique index + return the labels in the same order as the target + and a missing indexer into the targets (which correspond + to the -1 indices in the results + + Returns + ------- + indexer : np.ndarray[np.intp] + missing : np.ndarray[np.intp] + """ + cdef: + ndarray values + ndarray[intp_t] result, missing + set stargets, remaining_stargets, found_nas + dict d = {} + object val + Py_ssize_t count = 0, count_missing = 0 + Py_ssize_t i, j, n, n_t, n_alloc, start, end + bint check_na_values = False + + values = self.values + stargets = set(targets) + + n = len(values) + n_t = len(targets) + if n > 10_000: + n_alloc = 10_000 + else: + n_alloc = n + + result = np.empty(n_alloc, dtype=np.intp) + missing = np.empty(n_t, dtype=np.intp) + + # map each starget to its position in the index + if ( + stargets and + len(stargets) < 5 and + not any([checknull(t) for t in stargets]) and + self.is_monotonic_increasing + ): + # if there are few enough stargets and the index is monotonically + # increasing, then use binary search for each starget + remaining_stargets = set() + for starget in stargets: + try: + start = values.searchsorted(starget, side='left') + end = values.searchsorted(starget, side='right') + except TypeError: # e.g. if we tried to search for string in int array + remaining_stargets.add(starget) + else: + if start != end: + d[starget] = list(range(start, end)) + + stargets = remaining_stargets + + if stargets: + # otherwise, map by iterating through all items in the index + + # short-circuit na check + if values.dtype == object: + check_na_values = True + # keep track of nas in values + found_nas = set() + + for i in range(n): + val = values[i] + + # GH#43870 + # handle lookup for nas + # (ie. np.nan, float("NaN"), Decimal("NaN"), dt64nat, td64nat) + if check_na_values and checknull(val): + match = [na for na in found_nas if is_matching_na(val, na)] + + # matching na not found + if not len(match): + found_nas.add(val) + + # add na to stargets to utilize `in` for stargets/d lookup + match_stargets = [ + x for x in stargets if is_matching_na(val, x) + ] + + if len(match_stargets): + # add our 'standardized' na + stargets.add(val) + + # matching na found + else: + assert len(match) == 1 + val = match[0] + + if val in stargets: + if val not in d: + d[val] = [] + d[val].append(i) + + for i in range(n_t): + val = targets[i] + + # ensure there are nas in values before looking for a matching na + if check_na_values and checknull(val): + match = [na for na in found_nas if is_matching_na(val, na)] + if len(match): + assert len(match) == 1 + val = match[0] + + # found + if val in d: + key = val + + for j in d[key]: + + # realloc if needed + if count >= n_alloc: + n_alloc += 10_000 + result = np.resize(result, n_alloc) + + result[count] = j + count += 1 + + # value not found + else: + + if count >= n_alloc: + n_alloc += 10_000 + result = np.resize(result, n_alloc) + result[count] = -1 + count += 1 + missing[count_missing] = i + count_missing += 1 + + return result[0:count], missing[0:count_missing] + + +cdef Py_ssize_t _bin_search(ndarray values, object val) except -1: + # GH#1757 ndarray.searchsorted is not safe to use with array of tuples + # (treats a tuple `val` as a sequence of keys instead of a single key), + # so we implement something similar. + # This is equivalent to the stdlib's bisect.bisect_left + + cdef: + Py_ssize_t mid = 0, lo = 0, hi = len(values) - 1 + object pval + + if hi == 0 or (hi > 0 and val > values[hi]): + return len(values) + + while lo < hi: + mid = (lo + hi) // 2 + pval = values[mid] + if val < pval: + hi = mid + elif val > pval: + lo = mid + 1 + else: + while mid > 0 and val == values[mid - 1]: + mid -= 1 + return mid + + if val <= values[mid]: + return mid + else: + return mid + 1 + + +cdef class ObjectEngine(IndexEngine): + """ + Index Engine for use with object-dtype Index, namely the base class Index. + """ + cdef _make_hash_table(self, Py_ssize_t n): + return _hash.PyObjectHashTable(n) + + cdef Py_ssize_t _searchsorted_left(self, val) except? -1: + # using values.searchsorted here would treat a tuple `val` as a sequence + # instead of a single key, so we use a different implementation + try: + loc = _bin_search(self.values, val) + except TypeError as err: + raise KeyError(val) from err + return loc + + +cdef class DatetimeEngine(Int64Engine): + + cdef int64_t _unbox_scalar(self, scalar) except? -1: + # NB: caller is responsible for ensuring tzawareness compat + # before we get here + if not (isinstance(scalar, _Timestamp) or scalar is NaT): + raise TypeError(scalar) + return scalar.value + + def __contains__(self, val: object) -> bool: + # We assume before we get here: + # - val is hashable + self._unbox_scalar(val) + try: + self.get_loc(val) + return True + except KeyError: + return False + + cdef _call_monotonic(self, values): + return algos.is_monotonic(values, timelike=True) + + cpdef get_loc(self, object val): + # NB: the caller is responsible for ensuring that we are called + # with either a Timestamp or NaT (Timedelta or NaT for TimedeltaEngine) + + cdef: + Py_ssize_t loc + + if is_definitely_invalid_key(val): + raise TypeError(f"'{val}' is an invalid key") + + try: + conv = self._unbox_scalar(val) + except TypeError: + raise KeyError(val) + + # Welcome to the spaghetti factory + if self.over_size_threshold and self.is_monotonic_increasing: + if not self.is_unique: + return self._get_loc_duplicates(conv) + values = self.values + + loc = values.searchsorted(conv, side='left') + + if loc == len(values) or values[loc] != conv: + raise KeyError(val) + return loc + + self._ensure_mapping_populated() + if not self.unique: + return self._get_loc_duplicates(conv) + + try: + return self.mapping.get_item(conv) + except KeyError: + raise KeyError(val) + + +cdef class TimedeltaEngine(DatetimeEngine): + + cdef int64_t _unbox_scalar(self, scalar) except? -1: + if not (isinstance(scalar, _Timedelta) or scalar is NaT): + raise TypeError(scalar) + return scalar.value + + +cdef class PeriodEngine(Int64Engine): + + cdef int64_t _unbox_scalar(self, scalar) except? -1: + if scalar is NaT: + return scalar.value + if is_period_object(scalar): + # NB: we assume that we have the correct freq here. + return scalar.ordinal + raise TypeError(scalar) + + cpdef get_loc(self, object val): + # NB: the caller is responsible for ensuring that we are called + # with either a Period or NaT + cdef: + int64_t conv + + try: + conv = self._unbox_scalar(val) + except TypeError: + raise KeyError(val) + + return Int64Engine.get_loc(self, conv) + + cdef _call_monotonic(self, values): + return algos.is_monotonic(values, timelike=True) + + +cdef class BaseMultiIndexCodesEngine: + """ + Base class for MultiIndexUIntEngine and MultiIndexPyIntEngine, which + represent each label in a MultiIndex as an integer, by juxtaposing the bits + encoding each level, with appropriate offsets. + + For instance: if 3 levels have respectively 3, 6 and 1 possible values, + then their labels can be represented using respectively 2, 3 and 1 bits, + as follows: + _ _ _ _____ _ __ __ __ + |0|0|0| ... |0| 0|a1|a0| -> offset 0 (first level) + — — — ————— — —— —— —— + |0|0|0| ... |0|b2|b1|b0| -> offset 2 (bits required for first level) + — — — ————— — —— —— —— + |0|0|0| ... |0| 0| 0|c0| -> offset 5 (bits required for first two levels) + ‾ ‾ ‾ ‾‾‾‾‾ ‾ ‾‾ ‾‾ ‾‾ + and the resulting unsigned integer representation will be: + _ _ _ _____ _ __ __ __ __ __ __ + |0|0|0| ... |0|c0|b2|b1|b0|a1|a0| + ‾ ‾ ‾ ‾‾‾‾‾ ‾ ‾‾ ‾‾ ‾‾ ‾‾ ‾‾ ‾‾ + + Offsets are calculated at initialization, labels are transformed by method + _codes_to_ints. + + Keys are located by first locating each component against the respective + level, then locating (the integer representation of) codes. + """ + def __init__(self, object levels, object labels, + ndarray[uint64_t, ndim=1] offsets): + """ + Parameters + ---------- + levels : list-like of numpy arrays + Levels of the MultiIndex. + labels : list-like of numpy arrays of integer dtype + Labels of the MultiIndex. + offsets : numpy array of uint64 dtype + Pre-calculated offsets, one for each level of the index. + """ + self.levels = levels + self.offsets = offsets + + # Transform labels in a single array, and add 1 so that we are working + # with positive integers (-1 for NaN becomes 0): + codes = (np.array(labels, dtype='int64').T + 1).astype('uint64', + copy=False) + + # Map each codes combination in the index to an integer unambiguously + # (no collisions possible), based on the "offsets", which describe the + # number of bits to switch labels for each level: + lab_ints = self._codes_to_ints(codes) + + # Initialize underlying index (e.g. libindex.UInt64Engine) with + # integers representing labels: we will use its get_loc and get_indexer + self._base.__init__(self, lab_ints) + + def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray: + raise NotImplementedError("Implemented by subclass") # pragma: no cover + + def _extract_level_codes(self, target) -> np.ndarray: + """ + Map the requested list of (tuple) keys to their integer representations + for searching in the underlying integer index. + + Parameters + ---------- + target : MultiIndex + + Returns + ------ + int_keys : 1-dimensional array of dtype uint64 or object + Integers representing one combination each + """ + zt = [target._get_level_values(i) for i in range(target.nlevels)] + level_codes = [lev.get_indexer_for(codes) + 1 for lev, codes + in zip(self.levels, zt)] + return self._codes_to_ints(np.array(level_codes, dtype='uint64').T) + + def get_indexer(self, target: np.ndarray) -> np.ndarray: + """ + Returns an array giving the positions of each value of `target` in + `self.values`, where -1 represents a value in `target` which does not + appear in `self.values` + + Parameters + ---------- + target : np.ndarray + + Returns + ------- + np.ndarray[intp_t, ndim=1] of the indexer of `target` into + `self.values` + """ + return self._base.get_indexer(self, target) + + def get_indexer_with_fill(self, ndarray target, ndarray values, + str method, object limit) -> np.ndarray: + """ + Returns an array giving the positions of each value of `target` in + `values`, where -1 represents a value in `target` which does not + appear in `values` + + If `method` is "backfill" then the position for a value in `target` + which does not appear in `values` is that of the next greater value + in `values` (if one exists), and -1 if there is no such value. + + Similarly, if the method is "pad" then the position for a value in + `target` which does not appear in `values` is that of the next smaller + value in `values` (if one exists), and -1 if there is no such value. + + Parameters + ---------- + target: ndarray[object] of tuples + need not be sorted, but all must have the same length, which must be + the same as the length of all tuples in `values` + values : ndarray[object] of tuples + must be sorted and all have the same length. Should be the set of + the MultiIndex's values. + method: string + "backfill" or "pad" + limit: int or None + if provided, limit the number of fills to this value + + Returns + ------- + np.ndarray[intp_t, ndim=1] of the indexer of `target` into `values`, + filled with the `method` (and optionally `limit`) specified + """ + assert method in ("backfill", "pad") + cdef: + int64_t i, j, next_code + int64_t num_values, num_target_values + ndarray[int64_t, ndim=1] target_order + ndarray[object, ndim=1] target_values + ndarray[int64_t, ndim=1] new_codes, new_target_codes + ndarray[intp_t, ndim=1] sorted_indexer + + target_order = np.argsort(target).astype('int64') + target_values = target[target_order] + num_values, num_target_values = len(values), len(target_values) + new_codes, new_target_codes = ( + np.empty((num_values,)).astype('int64'), + np.empty((num_target_values,)).astype('int64'), + ) + + # `values` and `target_values` are both sorted, so we walk through them + # and memoize the (ordered) set of indices in the (implicit) merged-and + # sorted list of the two which belong to each of them + # the effect of this is to create a factorization for the (sorted) + # merger of the index values, where `new_codes` and `new_target_codes` + # are the subset of the factors which appear in `values` and `target`, + # respectively + i, j, next_code = 0, 0, 0 + while i < num_values and j < num_target_values: + val, target_val = values[i], target_values[j] + if val <= target_val: + new_codes[i] = next_code + i += 1 + if target_val <= val: + new_target_codes[j] = next_code + j += 1 + next_code += 1 + + # at this point, at least one should have reached the end + # the remaining values of the other should be added to the end + assert i == num_values or j == num_target_values + while i < num_values: + new_codes[i] = next_code + i += 1 + next_code += 1 + while j < num_target_values: + new_target_codes[j] = next_code + j += 1 + next_code += 1 + + # get the indexer, and undo the sorting of `target.values` + algo = algos.backfill if method == "backfill" else algos.pad + sorted_indexer = algo(new_codes, new_target_codes, limit=limit) + return sorted_indexer[np.argsort(target_order)] + + def get_loc(self, object key): + if is_definitely_invalid_key(key): + raise TypeError(f"'{key}' is an invalid key") + if not isinstance(key, tuple): + raise KeyError(key) + try: + indices = [0 if checknull(v) else lev.get_loc(v) + 1 + for lev, v in zip(self.levels, key)] + except KeyError: + raise KeyError(key) + + # Transform indices into single integer: + lab_int = self._codes_to_ints(np.array(indices, dtype='uint64')) + + return self._base.get_loc(self, lab_int) + + def get_indexer_non_unique(self, target: np.ndarray) -> np.ndarray: + indexer = self._base.get_indexer_non_unique(self, target) + + return indexer + + def __contains__(self, val: object) -> bool: + # We assume before we get here: + # - val is hashable + # Default __contains__ looks in the underlying mapping, which in this + # case only contains integer representations. + try: + self.get_loc(val) + return True + except (KeyError, TypeError, ValueError): + return False + + +# Generated from template. +include "index_class_helper.pxi" diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/index_class_helper.pxi.in b/.local/lib/python3.8/site-packages/pandas/_libs/index_class_helper.pxi.in new file mode 100644 index 0000000..7a2bbec --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/index_class_helper.pxi.in @@ -0,0 +1,51 @@ +""" +Template for functions of IndexEngine subclasses. + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +# ---------------------------------------------------------------------- +# IndexEngine Subclass Methods +# ---------------------------------------------------------------------- + +{{py: + +# name, dtype +dtypes = [('Float64', 'float64'), + ('Float32', 'float32'), + ('Int64', 'int64'), + ('Int32', 'int32'), + ('Int16', 'int16'), + ('Int8', 'int8'), + ('UInt64', 'uint64'), + ('UInt32', 'uint32'), + ('UInt16', 'uint16'), + ('UInt8', 'uint8'), + ] +}} + +{{for name, dtype in dtypes}} + + +cdef class {{name}}Engine(IndexEngine): + + cdef _make_hash_table(self, Py_ssize_t n): + return _hash.{{name}}HashTable(n) + + cdef _check_type(self, object val): + {{if name not in {'Float64', 'Float32'} }} + if not util.is_integer_object(val): + raise KeyError(val) + {{if name.startswith("U")}} + if val < 0: + # cannot have negative values with unsigned int dtype + raise KeyError(val) + {{endif}} + {{else}} + if not util.is_integer_object(val) and not util.is_float_object(val): + # in particular catch bool and avoid casting True -> 1.0 + raise KeyError(val) + {{endif}} + + +{{endfor}} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/indexing.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/indexing.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..08b3467 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/indexing.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/indexing.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/indexing.pyx new file mode 100644 index 0000000..181de17 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/indexing.pyx @@ -0,0 +1,25 @@ +cdef class NDFrameIndexerBase: + """ + A base class for _NDFrameIndexer for fast instantiation and attribute access. + """ + cdef public: + str name + object obj, _ndim + + def __init__(self, name: str, obj): + self.obj = obj + self.name = name + self._ndim = None + + @property + def ndim(self) -> int: + # Delay `ndim` instantiation until required as reading it + # from `obj` isn't entirely cheap. + ndim = self._ndim + if ndim is None: + ndim = self._ndim = self.obj.ndim + if ndim > 2: + raise ValueError( # pragma: no cover + "NDFrameIndexer does not support NDFrame objects with ndim > 2" + ) + return ndim diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/internals.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/internals.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..a447ac9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/internals.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/internals.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/internals.pyi new file mode 100644 index 0000000..6a90fbc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/internals.pyi @@ -0,0 +1,85 @@ +from typing import ( + Iterator, + Sequence, + final, + overload, +) + +import numpy as np + +from pandas._typing import ( + ArrayLike, + T, + npt, +) + +from pandas import Index +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +from pandas.core.internals.blocks import Block as B + +def slice_len(slc: slice, objlen: int = ...) -> int: ... +def get_blkno_indexers( + blknos: np.ndarray, # int64_t[:] + group: bool = ..., +) -> list[tuple[int, slice | np.ndarray]]: ... +def get_blkno_placements( + blknos: np.ndarray, + group: bool = ..., +) -> Iterator[tuple[int, BlockPlacement]]: ... +def update_blklocs_and_blknos( + blklocs: npt.NDArray[np.intp], + blknos: npt.NDArray[np.intp], + loc: int, + nblocks: int, +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... + +class BlockPlacement: + def __init__(self, val: int | slice | np.ndarray): ... + @property + def indexer(self) -> np.ndarray | slice: ... + @property + def as_array(self) -> np.ndarray: ... + @property + def as_slice(self) -> slice: ... + @property + def is_slice_like(self) -> bool: ... + @overload + def __getitem__(self, loc: slice | Sequence[int]) -> BlockPlacement: ... + @overload + def __getitem__(self, loc: int) -> int: ... + def __iter__(self) -> Iterator[int]: ... + def __len__(self) -> int: ... + def delete(self, loc) -> BlockPlacement: ... + def append(self, others: list[BlockPlacement]) -> BlockPlacement: ... + def tile_for_unstack(self, factor: int) -> npt.NDArray[np.intp]: ... + +class SharedBlock: + _mgr_locs: BlockPlacement + ndim: int + values: ArrayLike + def __init__(self, values: ArrayLike, placement: BlockPlacement, ndim: int): ... + +class NumpyBlock(SharedBlock): + values: np.ndarray + @final + def getitem_block_index(self: T, slicer: slice) -> T: ... + +class NDArrayBackedBlock(SharedBlock): + values: NDArrayBackedExtensionArray + @final + def getitem_block_index(self: T, slicer: slice) -> T: ... + +class Block(SharedBlock): ... + +class BlockManager: + blocks: tuple[B, ...] + axes: list[Index] + _known_consolidated: bool + _is_consolidated: bool + _blknos: np.ndarray + _blklocs: np.ndarray + def __init__( + self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=... + ): ... + def get_slice(self: T, slobj: slice, axis: int = ...) -> T: ... + def _rebuild_blknos_and_blklocs(self) -> None: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/internals.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/internals.pyx new file mode 100644 index 0000000..ac423ef --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/internals.pyx @@ -0,0 +1,824 @@ +from collections import defaultdict + +import cython +from cython import Py_ssize_t + +from cpython.slice cimport PySlice_GetIndicesEx + + +cdef extern from "Python.h": + Py_ssize_t PY_SSIZE_T_MAX + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + NPY_INTP, + int64_t, + intp_t, + ndarray, +) + +cnp.import_array() + +from pandas._libs.algos import ensure_int64 + +from pandas._libs.arrays cimport NDArrayBacked +from pandas._libs.util cimport ( + is_array, + is_integer_object, +) + + +@cython.final +@cython.freelist(32) +cdef class BlockPlacement: + # __slots__ = '_as_slice', '_as_array', '_len' + cdef: + slice _as_slice + ndarray _as_array # Note: this still allows `None`; will be intp_t + bint _has_slice, _has_array, _is_known_slice_like + + def __cinit__(self, val): + cdef: + slice slc + + self._as_slice = None + self._as_array = None + self._has_slice = False + self._has_array = False + + if is_integer_object(val): + slc = slice(val, val + 1, 1) + self._as_slice = slc + self._has_slice = True + elif isinstance(val, slice): + slc = slice_canonize(val) + + if slc.start != slc.stop: + self._as_slice = slc + self._has_slice = True + else: + arr = np.empty(0, dtype=np.intp) + self._as_array = arr + self._has_array = True + else: + # Cython memoryview interface requires ndarray to be writeable. + if ( + not is_array(val) + or not cnp.PyArray_ISWRITEABLE(val) + or (val).descr.type_num != cnp.NPY_INTP + ): + arr = np.require(val, dtype=np.intp, requirements='W') + else: + arr = val + # Caller is responsible for ensuring arr.ndim == 1 + self._as_array = arr + self._has_array = True + + def __str__(self) -> str: + cdef: + slice s = self._ensure_has_slice() + + if s is not None: + v = self._as_slice + else: + v = self._as_array + + return f"{type(self).__name__}({v})" + + def __repr__(self) -> str: + return str(self) + + def __len__(self) -> int: + cdef: + slice s = self._ensure_has_slice() + + if s is not None: + return slice_len(s) + else: + return len(self._as_array) + + def __iter__(self): + cdef: + slice s = self._ensure_has_slice() + Py_ssize_t start, stop, step, _ + + if s is not None: + start, stop, step, _ = slice_get_indices_ex(s) + return iter(range(start, stop, step)) + else: + return iter(self._as_array) + + @property + def as_slice(self) -> slice: + cdef: + slice s = self._ensure_has_slice() + + if s is not None: + return s + else: + raise TypeError("Not slice-like") + + @property + def indexer(self): + cdef: + slice s = self._ensure_has_slice() + + if s is not None: + return s + else: + return self._as_array + + @property + def as_array(self) -> np.ndarray: + cdef: + Py_ssize_t start, stop, end, _ + + if not self._has_array: + start, stop, step, _ = slice_get_indices_ex(self._as_slice) + # NOTE: this is the C-optimized equivalent of + # `np.arange(start, stop, step, dtype=np.intp)` + self._as_array = cnp.PyArray_Arange(start, stop, step, NPY_INTP) + self._has_array = True + + return self._as_array + + @property + def is_slice_like(self) -> bool: + cdef: + slice s = self._ensure_has_slice() + + return s is not None + + def __getitem__(self, loc): + cdef: + slice s = self._ensure_has_slice() + + if s is not None: + val = slice_getitem(s, loc) + else: + val = self._as_array[loc] + + if not isinstance(val, slice) and val.ndim == 0: + return val + + return BlockPlacement(val) + + def delete(self, loc) -> BlockPlacement: + return BlockPlacement(np.delete(self.as_array, loc, axis=0)) + + def append(self, others) -> BlockPlacement: + if not len(others): + return self + + return BlockPlacement( + np.concatenate([self.as_array] + [o.as_array for o in others]) + ) + + cdef BlockPlacement iadd(self, other): + cdef: + slice s = self._ensure_has_slice() + Py_ssize_t other_int, start, stop, step + + if is_integer_object(other) and s is not None: + other_int = other + + if other_int == 0: + # BlockPlacement is treated as immutable + return self + + start, stop, step, _ = slice_get_indices_ex(s) + start += other_int + stop += other_int + + if (step > 0 and start < 0) or (step < 0 and stop < step): + raise ValueError("iadd causes length change") + + if stop < 0: + val = slice(start, None, step) + else: + val = slice(start, stop, step) + + return BlockPlacement(val) + else: + newarr = self.as_array + other + if (newarr < 0).any(): + raise ValueError("iadd causes length change") + + val = newarr + return BlockPlacement(val) + + def add(self, other) -> BlockPlacement: + # We can get here with int or ndarray + return self.iadd(other) + + cdef slice _ensure_has_slice(self): + if not self._has_slice: + self._as_slice = indexer_as_slice(self._as_array) + self._has_slice = True + + return self._as_slice + + cpdef BlockPlacement increment_above(self, Py_ssize_t loc): + """ + Increment any entries of 'loc' or above by one. + """ + cdef: + slice nv, s = self._ensure_has_slice() + Py_ssize_t other_int, start, stop, step + ndarray[intp_t, ndim=1] newarr + + if s is not None: + # see if we are either all-above or all-below, each of which + # have fastpaths available. + + start, stop, step, _ = slice_get_indices_ex(s) + + if start < loc and stop <= loc: + # We are entirely below, nothing to increment + return self + + if start >= loc and stop >= loc: + # We are entirely above, we can efficiently increment out slice + nv = slice(start + 1, stop + 1, step) + return BlockPlacement(nv) + + if loc == 0: + # fastpath where we know everything is >= 0 + newarr = self.as_array + 1 + return BlockPlacement(newarr) + + newarr = self.as_array.copy() + newarr[newarr >= loc] += 1 + return BlockPlacement(newarr) + + def tile_for_unstack(self, factor: int) -> np.ndarray: + """ + Find the new mgr_locs for the un-stacked version of a Block. + """ + cdef: + slice slc = self._ensure_has_slice() + slice new_slice + ndarray[intp_t, ndim=1] new_placement + + if slc is not None and slc.step == 1: + new_slc = slice(slc.start * factor, slc.stop * factor, 1) + # equiv: np.arange(new_slc.start, new_slc.stop, dtype=np.intp) + new_placement = cnp.PyArray_Arange(new_slc.start, new_slc.stop, 1, NPY_INTP) + else: + # Note: test_pivot_table_empty_aggfunc gets here with `slc is not None` + mapped = [ + # equiv: np.arange(x * factor, (x + 1) * factor, dtype=np.intp) + cnp.PyArray_Arange(x * factor, (x + 1) * factor, 1, NPY_INTP) + for x in self + ] + new_placement = np.concatenate(mapped) + return new_placement + + +cdef slice slice_canonize(slice s): + """ + Convert slice to canonical bounded form. + """ + cdef: + Py_ssize_t start = 0, stop = 0, step = 1 + + if s.step is None: + step = 1 + else: + step = s.step + if step == 0: + raise ValueError("slice step cannot be zero") + + if step > 0: + if s.stop is None: + raise ValueError("unbounded slice") + + stop = s.stop + if s.start is None: + start = 0 + else: + start = s.start + if start > stop: + start = stop + elif step < 0: + if s.start is None: + raise ValueError("unbounded slice") + + start = s.start + if s.stop is None: + stop = -1 + else: + stop = s.stop + if stop > start: + stop = start + + if start < 0 or (stop < 0 and s.stop is not None and step > 0): + raise ValueError("unbounded slice") + + if stop < 0: + return slice(start, None, step) + else: + return slice(start, stop, step) + + +cpdef Py_ssize_t slice_len(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1: + """ + Get length of a bounded slice. + + The slice must not have any "open" bounds that would create dependency on + container size, i.e.: + - if ``s.step is None or s.step > 0``, ``s.stop`` is not ``None`` + - if ``s.step < 0``, ``s.start`` is not ``None`` + + Otherwise, the result is unreliable. + """ + cdef: + Py_ssize_t start, stop, step, length + + if slc is None: + raise TypeError("slc must be slice") # pragma: no cover + + PySlice_GetIndicesEx(slc, objlen, &start, &stop, &step, &length) + + return length + + +cdef (Py_ssize_t, Py_ssize_t, Py_ssize_t, Py_ssize_t) slice_get_indices_ex( + slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX +): + """ + Get (start, stop, step, length) tuple for a slice. + + If `objlen` is not specified, slice must be bounded, otherwise the result + will be wrong. + """ + cdef: + Py_ssize_t start, stop, step, length + + if slc is None: + raise TypeError("slc should be a slice") # pragma: no cover + + PySlice_GetIndicesEx(slc, objlen, &start, &stop, &step, &length) + + return start, stop, step, length + + +cdef slice_getitem(slice slc, ind): + cdef: + Py_ssize_t s_start, s_stop, s_step, s_len + Py_ssize_t ind_start, ind_stop, ind_step, ind_len + + s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) + + if isinstance(ind, slice): + ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, s_len) + + if ind_step > 0 and ind_len == s_len: + # short-cut for no-op slice + if ind_len == s_len: + return slc + + if ind_step < 0: + s_start = s_stop - s_step + ind_step = -ind_step + + s_step *= ind_step + s_stop = s_start + ind_stop * s_step + s_start = s_start + ind_start * s_step + + if s_step < 0 and s_stop < 0: + return slice(s_start, None, s_step) + else: + return slice(s_start, s_stop, s_step) + + else: + # NOTE: + # this is the C-optimized equivalent of + # `np.arange(s_start, s_stop, s_step, dtype=np.intp)[ind]` + return cnp.PyArray_Arange(s_start, s_stop, s_step, NPY_INTP)[ind] + + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef slice indexer_as_slice(intp_t[:] vals): + cdef: + Py_ssize_t i, n, start, stop + int64_t d + + if vals is None: + raise TypeError("vals must be ndarray") # pragma: no cover + + n = vals.shape[0] + + if n == 0 or vals[0] < 0: + return None + + if n == 1: + return slice(vals[0], vals[0] + 1, 1) + + if vals[1] < 0: + return None + + # n > 2 + d = vals[1] - vals[0] + + if d == 0: + return None + + for i in range(2, n): + if vals[i] < 0 or vals[i] - vals[i - 1] != d: + return None + + start = vals[0] + stop = start + n * d + if stop < 0 and d < 0: + return slice(start, None, d) + else: + return slice(start, stop, d) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def get_blkno_indexers( + int64_t[:] blknos, bint group=True +) -> list[tuple[int, slice | np.ndarray]]: + """ + Enumerate contiguous runs of integers in ndarray. + + Iterate over elements of `blknos` yielding ``(blkno, slice(start, stop))`` + pairs for each contiguous run found. + + If `group` is True and there is more than one run for a certain blkno, + ``(blkno, array)`` with an array containing positions of all elements equal + to blkno. + + Returns + ------- + list[tuple[int, slice | np.ndarray]] + """ + # There's blkno in this function's name because it's used in block & + # blockno handling. + cdef: + int64_t cur_blkno + Py_ssize_t i, start, stop, n, diff + cnp.npy_intp tot_len + int64_t blkno + object group_dict = defaultdict(list) + ndarray[int64_t, ndim=1] arr + + n = blknos.shape[0] + result = list() + start = 0 + cur_blkno = blknos[start] + + if n == 0: + pass + elif group is False: + for i in range(1, n): + if blknos[i] != cur_blkno: + result.append((cur_blkno, slice(start, i))) + + start = i + cur_blkno = blknos[i] + + result.append((cur_blkno, slice(start, n))) + else: + for i in range(1, n): + if blknos[i] != cur_blkno: + group_dict[cur_blkno].append((start, i)) + + start = i + cur_blkno = blknos[i] + + group_dict[cur_blkno].append((start, n)) + + for blkno, slices in group_dict.items(): + if len(slices) == 1: + result.append((blkno, slice(slices[0][0], slices[0][1]))) + else: + tot_len = sum(stop - start for start, stop in slices) + # equiv np.empty(tot_len, dtype=np.int64) + arr = cnp.PyArray_EMPTY(1, &tot_len, cnp.NPY_INT64, 0) + + i = 0 + for start, stop in slices: + for diff in range(start, stop): + arr[i] = diff + i += 1 + + result.append((blkno, arr)) + + return result + + +def get_blkno_placements(blknos, group: bool = True): + """ + Parameters + ---------- + blknos : np.ndarray[int64] + group : bool, default True + + Returns + ------- + iterator + yield (blkno, BlockPlacement) + """ + blknos = ensure_int64(blknos) + + for blkno, indexer in get_blkno_indexers(blknos, group): + yield blkno, BlockPlacement(indexer) + + +@cython.boundscheck(False) +@cython.wraparound(False) +cpdef update_blklocs_and_blknos( + ndarray[intp_t, ndim=1] blklocs, + ndarray[intp_t, ndim=1] blknos, + Py_ssize_t loc, + intp_t nblocks, +): + """ + Update blklocs and blknos when a new column is inserted at 'loc'. + """ + cdef: + Py_ssize_t i + cnp.npy_intp length = len(blklocs) + 1 + ndarray[intp_t, ndim=1] new_blklocs, new_blknos + + # equiv: new_blklocs = np.empty(length, dtype=np.intp) + new_blklocs = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0) + new_blknos = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0) + + for i in range(loc): + new_blklocs[i] = blklocs[i] + new_blknos[i] = blknos[i] + + new_blklocs[loc] = 0 + new_blknos[loc] = nblocks + + for i in range(loc, length - 1): + new_blklocs[i + 1] = blklocs[i] + new_blknos[i + 1] = blknos[i] + + return new_blklocs, new_blknos + + +def _unpickle_block(values, placement, ndim): + # We have to do some gymnastics b/c "ndim" is keyword-only + + from pandas.core.internals.blocks import new_block + + return new_block(values, placement, ndim=ndim) + + +@cython.freelist(64) +cdef class SharedBlock: + """ + Defining __init__ in a cython class significantly improves performance. + """ + cdef: + public BlockPlacement _mgr_locs + readonly int ndim + + def __cinit__(self, values, placement: BlockPlacement, ndim: int): + """ + Parameters + ---------- + values : np.ndarray or ExtensionArray + We assume maybe_coerce_values has already been called. + placement : BlockPlacement + ndim : int + 1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame + """ + self._mgr_locs = placement + self.ndim = ndim + + cpdef __reduce__(self): + args = (self.values, self.mgr_locs.indexer, self.ndim) + return _unpickle_block, args + + cpdef __setstate__(self, state): + from pandas.core.construction import extract_array + + self.mgr_locs = BlockPlacement(state[0]) + self.values = extract_array(state[1], extract_numpy=True) + if len(state) > 2: + # we stored ndim + self.ndim = state[2] + else: + # older pickle + from pandas.core.internals.api import maybe_infer_ndim + + ndim = maybe_infer_ndim(self.values, self.mgr_locs) + self.ndim = ndim + + +cdef class NumpyBlock(SharedBlock): + cdef: + public ndarray values + + def __cinit__(self, ndarray values, BlockPlacement placement, int ndim): + # set values here the (implicit) call to SharedBlock.__cinit__ will + # set placement and ndim + self.values = values + + cpdef NumpyBlock getitem_block_index(self, slice slicer): + """ + Perform __getitem__-like specialized to slicing along index. + + Assumes self.ndim == 2 + """ + new_values = self.values[..., slicer] + return type(self)(new_values, self._mgr_locs, ndim=self.ndim) + + +cdef class NDArrayBackedBlock(SharedBlock): + """ + Block backed by NDArrayBackedExtensionArray + """ + cdef public: + NDArrayBacked values + + def __cinit__(self, NDArrayBacked values, BlockPlacement placement, int ndim): + # set values here the (implicit) call to SharedBlock.__cinit__ will + # set placement and ndim + self.values = values + + cpdef NDArrayBackedBlock getitem_block_index(self, slice slicer): + """ + Perform __getitem__-like specialized to slicing along index. + + Assumes self.ndim == 2 + """ + new_values = self.values[..., slicer] + return type(self)(new_values, self._mgr_locs, ndim=self.ndim) + + +cdef class Block(SharedBlock): + cdef: + public object values + + def __cinit__(self, object values, BlockPlacement placement, int ndim): + # set values here the (implicit) call to SharedBlock.__cinit__ will + # set placement and ndim + self.values = values + + +@cython.freelist(64) +cdef class BlockManager: + cdef: + public tuple blocks + public list axes + public bint _known_consolidated, _is_consolidated + public ndarray _blknos, _blklocs + + def __cinit__(self, blocks=None, axes=None, verify_integrity=True): + # None as defaults for unpickling GH#42345 + if blocks is None: + # This adds 1-2 microseconds to DataFrame(np.array([])) + return + + if isinstance(blocks, list): + # Backward compat for e.g. pyarrow + blocks = tuple(blocks) + + self.blocks = blocks + self.axes = axes.copy() # copy to make sure we are not remotely-mutable + + # Populate known_consolidate, blknos, and blklocs lazily + self._known_consolidated = False + self._is_consolidated = False + self._blknos = None + self._blklocs = None + + # ------------------------------------------------------------------- + # Block Placement + + def _rebuild_blknos_and_blklocs(self) -> None: + """ + Update mgr._blknos / mgr._blklocs. + """ + cdef: + intp_t blkno, i, j + cnp.npy_intp length = self.shape[0] + SharedBlock blk + BlockPlacement bp + ndarray[intp_t, ndim=1] new_blknos, new_blklocs + + # equiv: np.empty(length, dtype=np.intp) + new_blknos = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0) + new_blklocs = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0) + # equiv: new_blknos.fill(-1) + cnp.PyArray_FILLWBYTE(new_blknos, -1) + cnp.PyArray_FILLWBYTE(new_blklocs, -1) + + for blkno, blk in enumerate(self.blocks): + bp = blk._mgr_locs + # Iterating over `bp` is a faster equivalent to + # new_blknos[bp.indexer] = blkno + # new_blklocs[bp.indexer] = np.arange(len(bp)) + for i, j in enumerate(bp): + new_blknos[j] = blkno + new_blklocs[j] = i + + for i in range(length): + # faster than `for blkno in new_blknos` + # https://github.com/cython/cython/issues/4393 + blkno = new_blknos[i] + + # If there are any -1s remaining, this indicates that our mgr_locs + # are invalid. + if blkno == -1: + raise AssertionError("Gaps in blk ref_locs") + + self._blknos = new_blknos + self._blklocs = new_blklocs + + # ------------------------------------------------------------------- + # Pickle + + cpdef __reduce__(self): + if len(self.axes) == 1: + # SingleBlockManager, __init__ expects Block, axis + args = (self.blocks[0], self.axes[0]) + else: + args = (self.blocks, self.axes) + return type(self), args + + cpdef __setstate__(self, state): + from pandas.core.construction import extract_array + from pandas.core.internals.blocks import ( + ensure_block_shape, + new_block, + ) + from pandas.core.internals.managers import ensure_index + + if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]: + state = state[3]["0.14.1"] + axes = [ensure_index(ax) for ax in state["axes"]] + ndim = len(axes) + + for blk in state["blocks"]: + vals = blk["values"] + # older versions may hold e.g. DatetimeIndex instead of DTA + vals = extract_array(vals, extract_numpy=True) + blk["values"] = ensure_block_shape(vals, ndim=ndim) + + nbs = [ + new_block(blk["values"], blk["mgr_locs"], ndim=ndim) + for blk in state["blocks"] + ] + blocks = tuple(nbs) + self.blocks = blocks + self.axes = axes + + else: # pragma: no cover + raise NotImplementedError("pre-0.14.1 pickles are no longer supported") + + self._post_setstate() + + def _post_setstate(self) -> None: + self._is_consolidated = False + self._known_consolidated = False + self._rebuild_blknos_and_blklocs() + + # ------------------------------------------------------------------- + # Indexing + + cdef BlockManager _get_index_slice(self, slobj): + cdef: + SharedBlock blk, nb + BlockManager mgr + ndarray blknos, blklocs + + nbs = [] + for blk in self.blocks: + nb = blk.getitem_block_index(slobj) + nbs.append(nb) + + new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)] + mgr = type(self)(tuple(nbs), new_axes, verify_integrity=False) + + # We can avoid having to rebuild blklocs/blknos + blklocs = self._blklocs + blknos = self._blknos + if blknos is not None: + mgr._blknos = blknos.copy() + mgr._blklocs = blklocs.copy() + return mgr + + def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager: + + if axis == 0: + new_blocks = self._slice_take_blocks_ax0(slobj) + elif axis == 1: + return self._get_index_slice(slobj) + else: + raise IndexError("Requested axis not found in manager") + + new_axes = list(self.axes) + new_axes[axis] = new_axes[axis]._getitem_slice(slobj) + + return type(self)(tuple(new_blocks), new_axes, verify_integrity=False) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/interval.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/interval.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..e2b5a5b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/interval.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/interval.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/interval.pyx new file mode 100644 index 0000000..aba635e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/interval.pyx @@ -0,0 +1,557 @@ +import numbers +from operator import ( + le, + lt, +) + +from cpython.datetime cimport ( + PyDateTime_IMPORT, + PyDelta_Check, +) + +PyDateTime_IMPORT + +from cpython.object cimport ( + Py_EQ, + Py_GE, + Py_GT, + Py_LE, + Py_LT, + Py_NE, + PyObject_RichCompare, +) + +import cython +from cython import Py_ssize_t +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + NPY_QUICKSORT, + PyArray_ArgSort, + PyArray_Take, + float32_t, + float64_t, + int32_t, + int64_t, + ndarray, + uint64_t, +) + +cnp.import_array() + + +from pandas._libs cimport util +from pandas._libs.hashtable cimport Int64Vector +from pandas._libs.tslibs.timedeltas cimport _Timedelta +from pandas._libs.tslibs.timestamps cimport _Timestamp +from pandas._libs.tslibs.timezones cimport tz_compare +from pandas._libs.tslibs.util cimport ( + is_float_object, + is_integer_object, + is_timedelta64_object, +) + +VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither']) + + +cdef class IntervalMixin: + + @property + def closed_left(self): + """ + Check if the interval is closed on the left side. + + For the meaning of `closed` and `open` see :class:`~pandas.Interval`. + + Returns + ------- + bool + True if the Interval is closed on the left-side. + """ + return self.closed in ('left', 'both') + + @property + def closed_right(self): + """ + Check if the interval is closed on the right side. + + For the meaning of `closed` and `open` see :class:`~pandas.Interval`. + + Returns + ------- + bool + True if the Interval is closed on the left-side. + """ + return self.closed in ('right', 'both') + + @property + def open_left(self): + """ + Check if the interval is open on the left side. + + For the meaning of `closed` and `open` see :class:`~pandas.Interval`. + + Returns + ------- + bool + True if the Interval is closed on the left-side. + """ + return not self.closed_left + + @property + def open_right(self): + """ + Check if the interval is open on the right side. + + For the meaning of `closed` and `open` see :class:`~pandas.Interval`. + + Returns + ------- + bool + True if the Interval is closed on the left-side. + """ + return not self.closed_right + + @property + def mid(self): + """ + Return the midpoint of the Interval. + """ + try: + return 0.5 * (self.left + self.right) + except TypeError: + # datetime safe version + return self.left + 0.5 * self.length + + @property + def length(self): + """ + Return the length of the Interval. + """ + return self.right - self.left + + @property + def is_empty(self): + """ + Indicates if an interval is empty, meaning it contains no points. + + .. versionadded:: 0.25.0 + + Returns + ------- + bool or ndarray + A boolean indicating if a scalar :class:`Interval` is empty, or a + boolean ``ndarray`` positionally indicating if an ``Interval`` in + an :class:`~arrays.IntervalArray` or :class:`IntervalIndex` is + empty. + + Examples + -------- + An :class:`Interval` that contains points is not empty: + + >>> pd.Interval(0, 1, closed='right').is_empty + False + + An ``Interval`` that does not contain any points is empty: + + >>> pd.Interval(0, 0, closed='right').is_empty + True + >>> pd.Interval(0, 0, closed='left').is_empty + True + >>> pd.Interval(0, 0, closed='neither').is_empty + True + + An ``Interval`` that contains a single point is not empty: + + >>> pd.Interval(0, 0, closed='both').is_empty + False + + An :class:`~arrays.IntervalArray` or :class:`IntervalIndex` returns a + boolean ``ndarray`` positionally indicating if an ``Interval`` is + empty: + + >>> ivs = [pd.Interval(0, 0, closed='neither'), + ... pd.Interval(1, 2, closed='neither')] + >>> pd.arrays.IntervalArray(ivs).is_empty + array([ True, False]) + + Missing values are not considered empty: + + >>> ivs = [pd.Interval(0, 0, closed='neither'), np.nan] + >>> pd.IntervalIndex(ivs).is_empty + array([ True, False]) + """ + return (self.right == self.left) & (self.closed != 'both') + + def _check_closed_matches(self, other, name='other'): + """ + Check if the closed attribute of `other` matches. + + Note that 'left' and 'right' are considered different from 'both'. + + Parameters + ---------- + other : Interval, IntervalIndex, IntervalArray + name : str + Name to use for 'other' in the error message. + + Raises + ------ + ValueError + When `other` is not closed exactly the same as self. + """ + if self.closed != other.closed: + raise ValueError(f"'{name}.closed' is {repr(other.closed)}, " + f"expected {repr(self.closed)}.") + + +cdef bint _interval_like(other): + return (hasattr(other, 'left') + and hasattr(other, 'right') + and hasattr(other, 'closed')) + + +cdef class Interval(IntervalMixin): + """ + Immutable object implementing an Interval, a bounded slice-like interval. + + Parameters + ---------- + left : orderable scalar + Left bound for the interval. + right : orderable scalar + Right bound for the interval. + closed : {'right', 'left', 'both', 'neither'}, default 'right' + Whether the interval is closed on the left-side, right-side, both or + neither. See the Notes for more detailed explanation. + + See Also + -------- + IntervalIndex : An Index of Interval objects that are all closed on the + same side. + cut : Convert continuous data into discrete bins (Categorical + of Interval objects). + qcut : Convert continuous data into bins (Categorical of Interval objects) + based on quantiles. + Period : Represents a period of time. + + Notes + ----- + The parameters `left` and `right` must be from the same type, you must be + able to compare them and they must satisfy ``left <= right``. + + A closed interval (in mathematics denoted by square brackets) contains + its endpoints, i.e. the closed interval ``[0, 5]`` is characterized by the + conditions ``0 <= x <= 5``. This is what ``closed='both'`` stands for. + An open interval (in mathematics denoted by parentheses) does not contain + its endpoints, i.e. the open interval ``(0, 5)`` is characterized by the + conditions ``0 < x < 5``. This is what ``closed='neither'`` stands for. + Intervals can also be half-open or half-closed, i.e. ``[0, 5)`` is + described by ``0 <= x < 5`` (``closed='left'``) and ``(0, 5]`` is + described by ``0 < x <= 5`` (``closed='right'``). + + Examples + -------- + It is possible to build Intervals of different types, like numeric ones: + + >>> iv = pd.Interval(left=0, right=5) + >>> iv + Interval(0, 5, closed='right') + + You can check if an element belongs to it + + >>> 2.5 in iv + True + + You can test the bounds (``closed='right'``, so ``0 < x <= 5``): + + >>> 0 in iv + False + >>> 5 in iv + True + >>> 0.0001 in iv + True + + Calculate its length + + >>> iv.length + 5 + + You can operate with `+` and `*` over an Interval and the operation + is applied to each of its bounds, so the result depends on the type + of the bound elements + + >>> shifted_iv = iv + 3 + >>> shifted_iv + Interval(3, 8, closed='right') + >>> extended_iv = iv * 10.0 + >>> extended_iv + Interval(0.0, 50.0, closed='right') + + To create a time interval you can use Timestamps as the bounds + + >>> year_2017 = pd.Interval(pd.Timestamp('2017-01-01 00:00:00'), + ... pd.Timestamp('2018-01-01 00:00:00'), + ... closed='left') + >>> pd.Timestamp('2017-01-01 00:00') in year_2017 + True + >>> year_2017.length + Timedelta('365 days 00:00:00') + """ + _typ = "interval" + __array_priority__ = 1000 + + cdef readonly object left + """ + Left bound for the interval. + """ + + cdef readonly object right + """ + Right bound for the interval. + """ + + cdef readonly str closed + """ + Whether the interval is closed on the left-side, right-side, both or + neither. + """ + + def __init__(self, left, right, str closed='right'): + # note: it is faster to just do these checks than to use a special + # constructor (__cinit__/__new__) to avoid them + + self._validate_endpoint(left) + self._validate_endpoint(right) + + if closed not in VALID_CLOSED: + raise ValueError(f"invalid option for 'closed': {closed}") + if not left <= right: + raise ValueError("left side of interval must be <= right side") + if (isinstance(left, _Timestamp) and + not tz_compare(left.tzinfo, right.tzinfo)): + # GH 18538 + raise ValueError("left and right must have the same time zone, got " + f"{repr(left.tzinfo)}' and {repr(right.tzinfo)}") + self.left = left + self.right = right + self.closed = closed + + def _validate_endpoint(self, endpoint): + # GH 23013 + if not (is_integer_object(endpoint) or is_float_object(endpoint) or + isinstance(endpoint, (_Timestamp, _Timedelta))): + raise ValueError("Only numeric, Timestamp and Timedelta endpoints " + "are allowed when constructing an Interval.") + + def __hash__(self): + return hash((self.left, self.right, self.closed)) + + def __contains__(self, key) -> bool: + if _interval_like(key): + raise TypeError("__contains__ not defined for two intervals") + return ((self.left < key if self.open_left else self.left <= key) and + (key < self.right if self.open_right else key <= self.right)) + + def __richcmp__(self, other, op: int): + if isinstance(other, Interval): + self_tuple = (self.left, self.right, self.closed) + other_tuple = (other.left, other.right, other.closed) + return PyObject_RichCompare(self_tuple, other_tuple, op) + elif util.is_array(other): + return np.array( + [PyObject_RichCompare(self, x, op) for x in other], + dtype=bool, + ) + + return NotImplemented + + def __reduce__(self): + args = (self.left, self.right, self.closed) + return (type(self), args) + + def _repr_base(self): + left = self.left + right = self.right + + # TODO: need more general formatting methodology here + if isinstance(left, _Timestamp) and isinstance(right, _Timestamp): + left = left._short_repr + right = right._short_repr + + return left, right + + def __repr__(self) -> str: + + left, right = self._repr_base() + name = type(self).__name__ + repr_str = f'{name}({repr(left)}, {repr(right)}, closed={repr(self.closed)})' + return repr_str + + def __str__(self) -> str: + + left, right = self._repr_base() + start_symbol = '[' if self.closed_left else '(' + end_symbol = ']' if self.closed_right else ')' + return f'{start_symbol}{left}, {right}{end_symbol}' + + def __add__(self, y): + if ( + isinstance(y, numbers.Number) + or PyDelta_Check(y) + or is_timedelta64_object(y) + ): + return Interval(self.left + y, self.right + y, closed=self.closed) + elif ( + isinstance(y, Interval) + and ( + isinstance(self, numbers.Number) + or PyDelta_Check(self) + or is_timedelta64_object(self) + ) + ): + return Interval(y.left + self, y.right + self, closed=y.closed) + return NotImplemented + + def __sub__(self, y): + if ( + isinstance(y, numbers.Number) + or PyDelta_Check(y) + or is_timedelta64_object(y) + ): + return Interval(self.left - y, self.right - y, closed=self.closed) + return NotImplemented + + def __mul__(self, y): + if isinstance(y, numbers.Number): + return Interval(self.left * y, self.right * y, closed=self.closed) + elif isinstance(y, Interval) and isinstance(self, numbers.Number): + return Interval(y.left * self, y.right * self, closed=y.closed) + return NotImplemented + + def __truediv__(self, y): + if isinstance(y, numbers.Number): + return Interval(self.left / y, self.right / y, closed=self.closed) + return NotImplemented + + def __floordiv__(self, y): + if isinstance(y, numbers.Number): + return Interval( + self.left // y, self.right // y, closed=self.closed) + return NotImplemented + + def overlaps(self, other): + """ + Check whether two Interval objects overlap. + + Two intervals overlap if they share a common point, including closed + endpoints. Intervals that only have an open endpoint in common do not + overlap. + + Parameters + ---------- + other : Interval + Interval to check against for an overlap. + + Returns + ------- + bool + True if the two intervals overlap. + + See Also + -------- + IntervalArray.overlaps : The corresponding method for IntervalArray. + IntervalIndex.overlaps : The corresponding method for IntervalIndex. + + Examples + -------- + >>> i1 = pd.Interval(0, 2) + >>> i2 = pd.Interval(1, 3) + >>> i1.overlaps(i2) + True + >>> i3 = pd.Interval(4, 5) + >>> i1.overlaps(i3) + False + + Intervals that share closed endpoints overlap: + + >>> i4 = pd.Interval(0, 1, closed='both') + >>> i5 = pd.Interval(1, 2, closed='both') + >>> i4.overlaps(i5) + True + + Intervals that only have an open endpoint in common do not overlap: + + >>> i6 = pd.Interval(1, 2, closed='neither') + >>> i4.overlaps(i6) + False + """ + if not isinstance(other, Interval): + raise TypeError("`other` must be an Interval, " + f"got {type(other).__name__}") + + # equality is okay if both endpoints are closed (overlap at a point) + op1 = le if (self.closed_left and other.closed_right) else lt + op2 = le if (other.closed_left and self.closed_right) else lt + + # overlaps is equivalent negation of two interval being disjoint: + # disjoint = (A.left > B.right) or (B.left > A.right) + # (simplifying the negation allows this to be done in less operations) + return op1(self.left, other.right) and op2(other.left, self.right) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True): + """ + Parameters + ---------- + intervals : ndarray + Object array of Intervals / nulls. + + validate_closed: bool, default True + Boolean indicating if all intervals must be closed on the same side. + Mismatching closed will raise if True, else return None for closed. + + Returns + ------- + tuple of + left : ndarray + right : ndarray + closed: str + """ + cdef: + object closed = None, interval + Py_ssize_t i, n = len(intervals) + ndarray left, right + bint seen_closed = False + + left = np.empty(n, dtype=intervals.dtype) + right = np.empty(n, dtype=intervals.dtype) + + for i in range(n): + interval = intervals[i] + if interval is None or util.is_nan(interval): + left[i] = np.nan + right[i] = np.nan + continue + + if not isinstance(interval, Interval): + raise TypeError(f"type {type(interval)} with value " + f"{interval} is not an interval") + + left[i] = interval.left + right[i] = interval.right + if not seen_closed: + seen_closed = True + closed = interval.closed + elif closed != interval.closed: + closed = None + if validate_closed: + raise ValueError("intervals must all be closed on the same side") + + return left, right, closed + + +include "intervaltree.pxi" diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/intervaltree.pxi.in b/.local/lib/python3.8/site-packages/pandas/_libs/intervaltree.pxi.in new file mode 100644 index 0000000..547fcc0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/intervaltree.pxi.in @@ -0,0 +1,427 @@ +""" +Template for intervaltree + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +from pandas._libs.algos import is_monotonic + +ctypedef fused int_scalar_t: + int64_t + float64_t + +ctypedef fused uint_scalar_t: + uint64_t + float64_t + +ctypedef fused scalar_t: + int_scalar_t + uint_scalar_t + +# ---------------------------------------------------------------------- +# IntervalTree +# ---------------------------------------------------------------------- + +cdef class IntervalTree(IntervalMixin): + """A centered interval tree + + Based off the algorithm described on Wikipedia: + https://en.wikipedia.org/wiki/Interval_tree + + we are emulating the IndexEngine interface + """ + cdef readonly: + ndarray left, right + IntervalNode root + object dtype + str closed + object _is_overlapping, _left_sorter, _right_sorter + Py_ssize_t _na_count + + def __init__(self, left, right, closed='right', leaf_size=100): + """ + Parameters + ---------- + left, right : np.ndarray[ndim=1] + Left and right bounds for each interval. Assumed to contain no + NaNs. + closed : {'left', 'right', 'both', 'neither'}, optional + Whether the intervals are closed on the left-side, right-side, both + or neither. Defaults to 'right'. + leaf_size : int, optional + Parameter that controls when the tree switches from creating nodes + to brute-force search. Tune this parameter to optimize query + performance. + """ + if closed not in ['left', 'right', 'both', 'neither']: + raise ValueError("invalid option for 'closed': %s" % closed) + + left = np.asarray(left) + right = np.asarray(right) + self.dtype = np.result_type(left, right) + self.left = np.asarray(left, dtype=self.dtype) + self.right = np.asarray(right, dtype=self.dtype) + + indices = np.arange(len(left), dtype='int64') + + self.closed = closed + + # GH 23352: ensure no nan in nodes + mask = ~np.isnan(self.left) + self._na_count = len(mask) - mask.sum() + self.left = self.left[mask] + self.right = self.right[mask] + indices = indices[mask] + + node_cls = NODE_CLASSES[str(self.dtype), closed] + self.root = node_cls(self.left, self.right, indices, leaf_size) + + @property + def left_sorter(self) -> np.ndarray: + """How to sort the left labels; this is used for binary search + """ + if self._left_sorter is None: + self._left_sorter = np.argsort(self.left) + return self._left_sorter + + @property + def right_sorter(self) -> np.ndarray: + """How to sort the right labels + """ + if self._right_sorter is None: + self._right_sorter = np.argsort(self.right) + return self._right_sorter + + @property + def is_overlapping(self) -> bool: + """ + Determine if the IntervalTree contains overlapping intervals. + Cached as self._is_overlapping. + """ + if self._is_overlapping is not None: + return self._is_overlapping + + # <= when both sides closed since endpoints can overlap + op = le if self.closed == 'both' else lt + + # overlap if start of current interval < end of previous interval + # (current and previous in terms of sorted order by left/start side) + current = self.left[self.left_sorter[1:]] + previous = self.right[self.left_sorter[:-1]] + self._is_overlapping = bool(op(current, previous).any()) + + return self._is_overlapping + + @property + def is_monotonic_increasing(self) -> bool: + """ + Return True if the IntervalTree is monotonic increasing (only equal or + increasing values), else False + """ + if self._na_count > 0: + return False + values = [self.right, self.left] + + sort_order = np.lexsort(values) + return is_monotonic(sort_order, False)[0] + + def get_indexer(self, scalar_t[:] target) -> np.ndarray: + """Return the positions corresponding to unique intervals that overlap + with the given array of scalar targets. + """ + + # TODO: write get_indexer_intervals + cdef: + Py_ssize_t old_len + Py_ssize_t i + Int64Vector result + + result = Int64Vector() + old_len = 0 + for i in range(len(target)): + try: + self.root.query(result, target[i]) + except OverflowError: + # overflow -> no match, which is already handled below + pass + + if result.data.n == old_len: + result.append(-1) + elif result.data.n > old_len + 1: + raise KeyError( + 'indexer does not intersect a unique set of intervals') + old_len = result.data.n + return result.to_array().astype('intp') + + def get_indexer_non_unique(self, scalar_t[:] target): + """Return the positions corresponding to intervals that overlap with + the given array of scalar targets. Non-unique positions are repeated. + """ + cdef: + Py_ssize_t old_len + Py_ssize_t i + Int64Vector result, missing + + result = Int64Vector() + missing = Int64Vector() + old_len = 0 + for i in range(len(target)): + try: + self.root.query(result, target[i]) + except OverflowError: + # overflow -> no match, which is already handled below + pass + + if result.data.n == old_len: + result.append(-1) + missing.append(i) + old_len = result.data.n + return (result.to_array().astype('intp'), + missing.to_array().astype('intp')) + + def __repr__(self) -> str: + return (''.format( + dtype=self.dtype, closed=self.closed, + n_elements=self.root.n_elements)) + + # compat with IndexEngine interface + def clear_mapping(self) -> None: + pass + + +cdef take(ndarray source, ndarray indices): + """Take the given positions from a 1D ndarray + """ + return PyArray_Take(source, indices, 0) + + +cdef sort_values_and_indices(all_values, all_indices, subset): + indices = take(all_indices, subset) + values = take(all_values, subset) + sorter = PyArray_ArgSort(values, 0, NPY_QUICKSORT) + sorted_values = take(values, sorter) + sorted_indices = take(indices, sorter) + return sorted_values, sorted_indices + + +# ---------------------------------------------------------------------- +# Nodes +# ---------------------------------------------------------------------- + +@cython.internal +cdef class IntervalNode: + cdef readonly: + int64_t n_elements, n_center, leaf_size + bint is_leaf_node + + def __repr__(self) -> str: + if self.is_leaf_node: + return ( + f"<{type(self).__name__}: {self.n_elements} elements (terminal)>" + ) + else: + n_left = self.left_node.n_elements + n_right = self.right_node.n_elements + n_center = self.n_elements - n_left - n_right + return ( + f"<{type(self).__name__}: " + f"pivot {self.pivot}, {self.n_elements} elements " + f"({n_left} left, {n_right} right, {n_center} overlapping)>" + ) + + def counts(self): + """ + Inspect counts on this node + useful for debugging purposes + """ + if self.is_leaf_node: + return self.n_elements + else: + m = len(self.center_left_values) + l = self.left_node.counts() + r = self.right_node.counts() + return (m, (l, r)) + + +# we need specialized nodes and leaves to optimize for different dtype and +# closed values + +{{py: + +nodes = [] +for dtype in ['float64', 'int64', 'uint64']: + for closed, cmp_left, cmp_right in [ + ('left', '<=', '<'), + ('right', '<', '<='), + ('both', '<=', '<='), + ('neither', '<', '<')]: + cmp_left_converse = '<' if cmp_left == '<=' else '<=' + cmp_right_converse = '<' if cmp_right == '<=' else '<=' + if dtype.startswith('int'): + fused_prefix = 'int_' + elif dtype.startswith('uint'): + fused_prefix = 'uint_' + elif dtype.startswith('float'): + fused_prefix = '' + nodes.append((dtype, dtype.title(), + closed, closed.title(), + cmp_left, + cmp_right, + cmp_left_converse, + cmp_right_converse, + fused_prefix)) + +}} + +NODE_CLASSES = {} + +{{for dtype, dtype_title, closed, closed_title, cmp_left, cmp_right, + cmp_left_converse, cmp_right_converse, fused_prefix in nodes}} + + +@cython.internal +cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode(IntervalNode): + """Non-terminal node for an IntervalTree + + Categorizes intervals by those that fall to the left, those that fall to + the right, and those that overlap with the pivot. + """ + cdef readonly: + {{dtype_title}}Closed{{closed_title}}IntervalNode left_node, right_node + {{dtype}}_t[:] center_left_values, center_right_values, left, right + int64_t[:] center_left_indices, center_right_indices, indices + {{dtype}}_t min_left, max_right + {{dtype}}_t pivot + + def __init__(self, + ndarray[{{dtype}}_t, ndim=1] left, + ndarray[{{dtype}}_t, ndim=1] right, + ndarray[int64_t, ndim=1] indices, + int64_t leaf_size): + + self.n_elements = len(left) + self.leaf_size = leaf_size + + # min_left and min_right are used to speed-up query by skipping + # query on sub-nodes. If this node has size 0, query is cheap, + # so these values don't matter. + if left.size > 0: + self.min_left = left.min() + self.max_right = right.max() + else: + self.min_left = 0 + self.max_right = 0 + + if self.n_elements <= leaf_size: + # make this a terminal (leaf) node + self.is_leaf_node = True + self.left = left + self.right = right + self.indices = indices + self.n_center = 0 + else: + # calculate a pivot so we can create child nodes + self.is_leaf_node = False + self.pivot = np.median(left / 2 + right / 2) + left_set, right_set, center_set = self.classify_intervals( + left, right) + + self.left_node = self.new_child_node(left, right, + indices, left_set) + self.right_node = self.new_child_node(left, right, + indices, right_set) + + self.center_left_values, self.center_left_indices = \ + sort_values_and_indices(left, indices, center_set) + self.center_right_values, self.center_right_indices = \ + sort_values_and_indices(right, indices, center_set) + self.n_center = len(self.center_left_indices) + + @cython.wraparound(False) + @cython.boundscheck(False) + cdef classify_intervals(self, {{dtype}}_t[:] left, {{dtype}}_t[:] right): + """Classify the given intervals based upon whether they fall to the + left, right, or overlap with this node's pivot. + """ + cdef: + Int64Vector left_ind, right_ind, overlapping_ind + Py_ssize_t i + + left_ind = Int64Vector() + right_ind = Int64Vector() + overlapping_ind = Int64Vector() + + for i in range(self.n_elements): + if right[i] {{cmp_right_converse}} self.pivot: + left_ind.append(i) + elif self.pivot {{cmp_left_converse}} left[i]: + right_ind.append(i) + else: + overlapping_ind.append(i) + + return (left_ind.to_array(), + right_ind.to_array(), + overlapping_ind.to_array()) + + cdef new_child_node(self, + ndarray[{{dtype}}_t, ndim=1] left, + ndarray[{{dtype}}_t, ndim=1] right, + ndarray[int64_t, ndim=1] indices, + ndarray[int64_t, ndim=1] subset): + """Create a new child node. + """ + left = take(left, subset) + right = take(right, subset) + indices = take(indices, subset) + return {{dtype_title}}Closed{{closed_title}}IntervalNode( + left, right, indices, self.leaf_size) + + @cython.wraparound(False) + @cython.boundscheck(False) + @cython.initializedcheck(False) + cpdef query(self, Int64Vector result, {{fused_prefix}}scalar_t point): + """Recursively query this node and its sub-nodes for intervals that + overlap with the query point. + """ + cdef: + int64_t[:] indices + {{dtype}}_t[:] values + Py_ssize_t i + + if self.is_leaf_node: + # Once we get down to a certain size, it doesn't make sense to + # continue the binary tree structure. Instead, we use linear + # search. + for i in range(self.n_elements): + if self.left[i] {{cmp_left}} point {{cmp_right}} self.right[i]: + result.append(self.indices[i]) + else: + # There are child nodes. Based on comparing our query to the pivot, + # look at the center values, then go to the relevant child. + if point < self.pivot: + values = self.center_left_values + indices = self.center_left_indices + for i in range(self.n_center): + if not values[i] {{cmp_left}} point: + break + result.append(indices[i]) + if point {{cmp_right}} self.left_node.max_right: + self.left_node.query(result, point) + elif point > self.pivot: + values = self.center_right_values + indices = self.center_right_indices + for i in range(self.n_center - 1, -1, -1): + if not point {{cmp_right}} values[i]: + break + result.append(indices[i]) + if self.right_node.min_left {{cmp_left}} point: + self.right_node.query(result, point) + else: + result.extend(self.center_left_indices) + + +NODE_CLASSES['{{dtype}}', + '{{closed}}'] = {{dtype_title}}Closed{{closed_title}}IntervalNode + +{{endfor}} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/join.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/join.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..b6c1c4c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/join.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/join.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/join.pyi new file mode 100644 index 0000000..a5e91e2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/join.pyi @@ -0,0 +1,93 @@ +import numpy as np + +from pandas._typing import npt + +def inner_join( + left: np.ndarray, # const intp_t[:] + right: np.ndarray, # const intp_t[:] + max_groups: int, +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... +def left_outer_join( + left: np.ndarray, # const intp_t[:] + right: np.ndarray, # const intp_t[:] + max_groups: int, + sort: bool = ..., +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... +def full_outer_join( + left: np.ndarray, # const intp_t[:] + right: np.ndarray, # const intp_t[:] + max_groups: int, +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... +def ffill_indexer( + indexer: np.ndarray, # const intp_t[:] +) -> npt.NDArray[np.intp]: ... +def left_join_indexer_unique( + left: np.ndarray, # ndarray[join_t] + right: np.ndarray, # ndarray[join_t] +) -> npt.NDArray[np.intp]: ... +def left_join_indexer( + left: np.ndarray, # ndarray[join_t] + right: np.ndarray, # ndarray[join_t] +) -> tuple[ + np.ndarray, # np.ndarray[join_t] + npt.NDArray[np.intp], + npt.NDArray[np.intp], +]: ... +def inner_join_indexer( + left: np.ndarray, # ndarray[join_t] + right: np.ndarray, # ndarray[join_t] +) -> tuple[ + np.ndarray, # np.ndarray[join_t] + npt.NDArray[np.intp], + npt.NDArray[np.intp], +]: ... +def outer_join_indexer( + left: np.ndarray, # ndarray[join_t] + right: np.ndarray, # ndarray[join_t] +) -> tuple[ + np.ndarray, # np.ndarray[join_t] + npt.NDArray[np.intp], + npt.NDArray[np.intp], +]: ... +def asof_join_backward_on_X_by_Y( + left_values: np.ndarray, # asof_t[:] + right_values: np.ndarray, # asof_t[:] + left_by_values: np.ndarray, # by_t[:] + right_by_values: np.ndarray, # by_t[:] + allow_exact_matches: bool = ..., + tolerance: np.number | int | float | None = ..., +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... +def asof_join_forward_on_X_by_Y( + left_values: np.ndarray, # asof_t[:] + right_values: np.ndarray, # asof_t[:] + left_by_values: np.ndarray, # by_t[:] + right_by_values: np.ndarray, # by_t[:] + allow_exact_matches: bool = ..., + tolerance: np.number | int | float | None = ..., +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... +def asof_join_nearest_on_X_by_Y( + left_values: np.ndarray, # asof_t[:] + right_values: np.ndarray, # asof_t[:] + left_by_values: np.ndarray, # by_t[:] + right_by_values: np.ndarray, # by_t[:] + allow_exact_matches: bool = ..., + tolerance: np.number | int | float | None = ..., +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... +def asof_join_backward( + left_values: np.ndarray, # asof_t[:] + right_values: np.ndarray, # asof_t[:] + allow_exact_matches: bool = ..., + tolerance: np.number | int | float | None = ..., +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... +def asof_join_forward( + left_values: np.ndarray, # asof_t[:] + right_values: np.ndarray, # asof_t[:] + allow_exact_matches: bool = ..., + tolerance: np.number | int | float | None = ..., +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... +def asof_join_nearest( + left_values: np.ndarray, # asof_t[:] + right_values: np.ndarray, # asof_t[:] + allow_exact_matches: bool = ..., + tolerance: np.number | int | float | None = ..., +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/join.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/join.pyx new file mode 100644 index 0000000..b908fa2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/join.pyx @@ -0,0 +1,1006 @@ +import cython +from cython import Py_ssize_t +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + int64_t, + intp_t, + ndarray, + uint64_t, +) + +cnp.import_array() + +from pandas._libs.algos import groupsort_indexer + +from pandas._libs.dtypes cimport ( + numeric_object_t, + numeric_t, +) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def inner_join(const intp_t[:] left, const intp_t[:] right, + Py_ssize_t max_groups): + cdef: + Py_ssize_t i, j, k, count = 0 + intp_t[::1] left_sorter, right_sorter + intp_t[::1] left_count, right_count + intp_t[::1] left_indexer, right_indexer + intp_t lc, rc + Py_ssize_t left_pos = 0, right_pos = 0, position = 0 + Py_ssize_t offset + + left_sorter, left_count = groupsort_indexer(left, max_groups) + right_sorter, right_count = groupsort_indexer(right, max_groups) + + with nogil: + # First pass, determine size of result set, do not use the NA group + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc > 0 and lc > 0: + count += lc * rc + + left_indexer = np.empty(count, dtype=np.intp) + right_indexer = np.empty(count, dtype=np.intp) + + with nogil: + # exclude the NA group + left_pos = left_count[0] + right_pos = right_count[0] + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc > 0 and lc > 0: + for j in range(lc): + offset = position + j * rc + for k in range(rc): + left_indexer[offset + k] = left_pos + j + right_indexer[offset + k] = right_pos + k + position += lc * rc + left_pos += lc + right_pos += rc + + # Will overwrite left/right indexer with the result + _get_result_indexer(left_sorter, left_indexer) + _get_result_indexer(right_sorter, right_indexer) + + return np.asarray(left_indexer), np.asarray(right_indexer) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def left_outer_join(const intp_t[:] left, const intp_t[:] right, + Py_ssize_t max_groups, bint sort=True): + cdef: + Py_ssize_t i, j, k, count = 0 + ndarray[intp_t] rev + intp_t[::1] left_count, right_count + intp_t[::1] left_sorter, right_sorter + intp_t[::1] left_indexer, right_indexer + intp_t lc, rc + Py_ssize_t left_pos = 0, right_pos = 0, position = 0 + Py_ssize_t offset + + left_sorter, left_count = groupsort_indexer(left, max_groups) + right_sorter, right_count = groupsort_indexer(right, max_groups) + + with nogil: + # First pass, determine size of result set, do not use the NA group + for i in range(1, max_groups + 1): + if right_count[i] > 0: + count += left_count[i] * right_count[i] + else: + count += left_count[i] + + left_indexer = np.empty(count, dtype=np.intp) + right_indexer = np.empty(count, dtype=np.intp) + + with nogil: + # exclude the NA group + left_pos = left_count[0] + right_pos = right_count[0] + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc == 0: + for j in range(lc): + left_indexer[position + j] = left_pos + j + right_indexer[position + j] = -1 + position += lc + else: + for j in range(lc): + offset = position + j * rc + for k in range(rc): + left_indexer[offset + k] = left_pos + j + right_indexer[offset + k] = right_pos + k + position += lc * rc + left_pos += lc + right_pos += rc + + # Will overwrite left/right indexer with the result + _get_result_indexer(left_sorter, left_indexer) + _get_result_indexer(right_sorter, right_indexer) + + if not sort: # if not asked to sort, revert to original order + if len(left) == len(left_indexer): + # no multiple matches for any row on the left + # this is a short-cut to avoid groupsort_indexer + # otherwise, the `else` path also works in this case + rev = np.empty(len(left), dtype=np.intp) + rev.put(np.asarray(left_sorter), np.arange(len(left))) + else: + rev, _ = groupsort_indexer(left_indexer, len(left)) + + return np.asarray(left_indexer).take(rev), np.asarray(right_indexer).take(rev) + else: + return np.asarray(left_indexer), np.asarray(right_indexer) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def full_outer_join(const intp_t[:] left, const intp_t[:] right, + Py_ssize_t max_groups): + cdef: + Py_ssize_t i, j, k, count = 0 + intp_t[::1] left_sorter, right_sorter + intp_t[::1] left_count, right_count + intp_t[::1] left_indexer, right_indexer + intp_t lc, rc + intp_t left_pos = 0, right_pos = 0 + Py_ssize_t offset, position = 0 + + left_sorter, left_count = groupsort_indexer(left, max_groups) + right_sorter, right_count = groupsort_indexer(right, max_groups) + + with nogil: + # First pass, determine size of result set, do not use the NA group + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc > 0 and lc > 0: + count += lc * rc + else: + count += lc + rc + + left_indexer = np.empty(count, dtype=np.intp) + right_indexer = np.empty(count, dtype=np.intp) + + with nogil: + # exclude the NA group + left_pos = left_count[0] + right_pos = right_count[0] + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc == 0: + for j in range(lc): + left_indexer[position + j] = left_pos + j + right_indexer[position + j] = -1 + position += lc + elif lc == 0: + for j in range(rc): + left_indexer[position + j] = -1 + right_indexer[position + j] = right_pos + j + position += rc + else: + for j in range(lc): + offset = position + j * rc + for k in range(rc): + left_indexer[offset + k] = left_pos + j + right_indexer[offset + k] = right_pos + k + position += lc * rc + left_pos += lc + right_pos += rc + + # Will overwrite left/right indexer with the result + _get_result_indexer(left_sorter, left_indexer) + _get_result_indexer(right_sorter, right_indexer) + + return np.asarray(left_indexer), np.asarray(right_indexer) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef void _get_result_indexer(intp_t[::1] sorter, intp_t[::1] indexer) nogil: + """NOTE: overwrites indexer with the result to avoid allocating another array""" + cdef: + Py_ssize_t i, n, idx + + if len(sorter) > 0: + # cython-only equivalent to + # `res = algos.take_nd(sorter, indexer, fill_value=-1)` + n = indexer.shape[0] + for i in range(n): + idx = indexer[i] + if idx == -1: + indexer[i] = -1 + else: + indexer[i] = sorter[idx] + else: + # length-0 case + indexer[:] = -1 + + +def ffill_indexer(const intp_t[:] indexer) -> np.ndarray: + cdef: + Py_ssize_t i, n = len(indexer) + ndarray[intp_t] result + intp_t val, last_obs + + result = np.empty(n, dtype=np.intp) + last_obs = -1 + + for i in range(n): + val = indexer[i] + if val == -1: + result[i] = last_obs + else: + result[i] = val + last_obs = val + + return result + + +# ---------------------------------------------------------------------- +# left_join_indexer, inner_join_indexer, outer_join_indexer +# ---------------------------------------------------------------------- + +# Joins on ordered, unique indices + +# right might contain non-unique values + +@cython.wraparound(False) +@cython.boundscheck(False) +def left_join_indexer_unique( + ndarray[numeric_object_t] left, + ndarray[numeric_object_t] right +): + """ + Both left and right are strictly monotonic increasing. + """ + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[intp_t] indexer + numeric_object_t lval, rval + + i = 0 + j = 0 + nleft = len(left) + nright = len(right) + + indexer = np.empty(nleft, dtype=np.intp) + while True: + if i == nleft: + break + + if j == nright: + indexer[i] = -1 + i += 1 + continue + + rval = right[j] + + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 + + if left[i] == right[j]: + indexer[i] = j + i += 1 + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 + j += 1 + elif left[i] > rval: + indexer[i] = -1 + j += 1 + else: + indexer[i] = -1 + i += 1 + return indexer + + +@cython.wraparound(False) +@cython.boundscheck(False) +def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right): + """ + Two-pass algorithm for monotonic indexes. Handles many-to-one merges. + + Both left and right are monotonic increasing, but at least one of them + is non-unique (if both were unique we'd use left_join_indexer_unique). + """ + cdef: + Py_ssize_t i, j, k, nright, nleft, count + numeric_object_t lval, rval + ndarray[intp_t] lindexer, rindexer + ndarray[numeric_object_t] result + + nleft = len(left) + nright = len(right) + + # First pass is to find the size 'count' of our output indexers. + i = 0 + j = 0 + count = 0 + if nleft > 0: + while i < nleft: + if j == nright: + count += nleft - i + break + + lval = left[i] + rval = right[j] + + if lval == rval: + # This block is identical across + # left_join_indexer, inner_join_indexer, outer_join_indexer + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + count += 1 + i += 1 + else: + j += 1 + + # do it again now that result size is known + + lindexer = np.empty(count, dtype=np.intp) + rindexer = np.empty(count, dtype=np.intp) + result = np.empty(count, dtype=left.dtype) + + i = 0 + j = 0 + count = 0 + if nleft > 0: + while i < nleft: + if j == nright: + while i < nleft: + lindexer[count] = i + rindexer[count] = -1 + result[count] = left[i] + i += 1 + count += 1 + break + + lval = left[i] + rval = right[j] + + if lval == rval: + lindexer[count] = i + rindexer[count] = j + result[count] = lval + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + # i.e. lval not in right; we keep for left_join_indexer + lindexer[count] = i + rindexer[count] = -1 + result[count] = lval + count += 1 + i += 1 + else: + # i.e. rval not in left; we discard for left_join_indexer + j += 1 + + return result, lindexer, rindexer + + +@cython.wraparound(False) +@cython.boundscheck(False) +def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right): + """ + Two-pass algorithm for monotonic indexes. Handles many-to-one merges. + + Both left and right are monotonic increasing but not necessarily unique. + """ + cdef: + Py_ssize_t i, j, k, nright, nleft, count + numeric_object_t lval, rval + ndarray[intp_t] lindexer, rindexer + ndarray[numeric_object_t] result + + nleft = len(left) + nright = len(right) + + # First pass is to find the size 'count' of our output indexers. + i = 0 + j = 0 + count = 0 + if nleft > 0 and nright > 0: + while True: + if i == nleft: + break + if j == nright: + break + + lval = left[i] + rval = right[j] + if lval == rval: + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + # i.e. lval not in right; we discard for inner_indexer + i += 1 + else: + # i.e. rval not in left; we discard for inner_indexer + j += 1 + + # do it again now that result size is known + + lindexer = np.empty(count, dtype=np.intp) + rindexer = np.empty(count, dtype=np.intp) + result = np.empty(count, dtype=left.dtype) + + i = 0 + j = 0 + count = 0 + if nleft > 0 and nright > 0: + while True: + if i == nleft: + break + if j == nright: + break + + lval = left[i] + rval = right[j] + if lval == rval: + lindexer[count] = i + rindexer[count] = j + result[count] = lval + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + # i.e. lval not in right; we discard for inner_indexer + i += 1 + else: + # i.e. rval not in left; we discard for inner_indexer + j += 1 + + return result, lindexer, rindexer + + +@cython.wraparound(False) +@cython.boundscheck(False) +def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right): + """ + Both left and right are monotonic increasing but not necessarily unique. + """ + cdef: + Py_ssize_t i, j, nright, nleft, count + numeric_object_t lval, rval + ndarray[intp_t] lindexer, rindexer + ndarray[numeric_object_t] result + + nleft = len(left) + nright = len(right) + + # First pass is to find the size 'count' of our output indexers. + # count will be length of left plus the number of elements of right not in + # left (counting duplicates) + i = 0 + j = 0 + count = 0 + if nleft == 0: + count = nright + elif nright == 0: + count = nleft + else: + while True: + if i == nleft: + count += nright - j + break + if j == nright: + count += nleft - i + break + + lval = left[i] + rval = right[j] + if lval == rval: + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + count += 1 + i += 1 + else: + count += 1 + j += 1 + + lindexer = np.empty(count, dtype=np.intp) + rindexer = np.empty(count, dtype=np.intp) + result = np.empty(count, dtype=left.dtype) + + # do it again, but populate the indexers / result + + i = 0 + j = 0 + count = 0 + if nleft == 0: + for j in range(nright): + lindexer[j] = -1 + rindexer[j] = j + result[j] = right[j] + elif nright == 0: + for i in range(nleft): + lindexer[i] = i + rindexer[i] = -1 + result[i] = left[i] + else: + while True: + if i == nleft: + while j < nright: + lindexer[count] = -1 + rindexer[count] = j + result[count] = right[j] + count += 1 + j += 1 + break + if j == nright: + while i < nleft: + lindexer[count] = i + rindexer[count] = -1 + result[count] = left[i] + count += 1 + i += 1 + break + + lval = left[i] + rval = right[j] + + if lval == rval: + lindexer[count] = i + rindexer[count] = j + result[count] = lval + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + # i.e. lval not in right; we keep for outer_join_indexer + lindexer[count] = i + rindexer[count] = -1 + result[count] = lval + count += 1 + i += 1 + else: + # i.e. rval not in left; we keep for outer_join_indexer + lindexer[count] = -1 + rindexer[count] = j + result[count] = rval + count += 1 + j += 1 + + return result, lindexer, rindexer + + +# ---------------------------------------------------------------------- +# asof_join_by +# ---------------------------------------------------------------------- + +from pandas._libs.hashtable cimport ( + HashTable, + Int64HashTable, + PyObjectHashTable, + UInt64HashTable, +) + +ctypedef fused by_t: + object + int64_t + uint64_t + + +def asof_join_backward_on_X_by_Y(numeric_t[:] left_values, + numeric_t[:] right_values, + by_t[:] left_by_values, + by_t[:] right_by_values, + bint allow_exact_matches=True, + tolerance=None): + + cdef: + Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos + ndarray[intp_t] left_indexer, right_indexer + bint has_tolerance = False + numeric_t tolerance_ = 0 + numeric_t diff = 0 + HashTable hash_table + by_t by_value + + # if we are using tolerance, set our objects + if tolerance is not None: + has_tolerance = True + tolerance_ = tolerance + + left_size = len(left_values) + right_size = len(right_values) + + left_indexer = np.empty(left_size, dtype=np.intp) + right_indexer = np.empty(left_size, dtype=np.intp) + + if by_t is object: + hash_table = PyObjectHashTable(right_size) + elif by_t is int64_t: + hash_table = Int64HashTable(right_size) + elif by_t is uint64_t: + hash_table = UInt64HashTable(right_size) + + right_pos = 0 + for left_pos in range(left_size): + # restart right_pos if it went negative in a previous iteration + if right_pos < 0: + right_pos = 0 + + # find last position in right whose value is less than left's + if allow_exact_matches: + while (right_pos < right_size and + right_values[right_pos] <= left_values[left_pos]): + hash_table.set_item(right_by_values[right_pos], right_pos) + right_pos += 1 + else: + while (right_pos < right_size and + right_values[right_pos] < left_values[left_pos]): + hash_table.set_item(right_by_values[right_pos], right_pos) + right_pos += 1 + right_pos -= 1 + + # save positions as the desired index + by_value = left_by_values[left_pos] + found_right_pos = (hash_table.get_item(by_value) + if by_value in hash_table else -1) + left_indexer[left_pos] = left_pos + right_indexer[left_pos] = found_right_pos + + # if needed, verify that tolerance is met + if has_tolerance and found_right_pos != -1: + diff = left_values[left_pos] - right_values[found_right_pos] + if diff > tolerance_: + right_indexer[left_pos] = -1 + + return left_indexer, right_indexer + + +def asof_join_forward_on_X_by_Y(numeric_t[:] left_values, + numeric_t[:] right_values, + by_t[:] left_by_values, + by_t[:] right_by_values, + bint allow_exact_matches=1, + tolerance=None): + + cdef: + Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos + ndarray[intp_t] left_indexer, right_indexer + bint has_tolerance = False + numeric_t tolerance_ = 0 + numeric_t diff = 0 + HashTable hash_table + by_t by_value + + # if we are using tolerance, set our objects + if tolerance is not None: + has_tolerance = True + tolerance_ = tolerance + + left_size = len(left_values) + right_size = len(right_values) + + left_indexer = np.empty(left_size, dtype=np.intp) + right_indexer = np.empty(left_size, dtype=np.intp) + + if by_t is object: + hash_table = PyObjectHashTable(right_size) + elif by_t is int64_t: + hash_table = Int64HashTable(right_size) + elif by_t is uint64_t: + hash_table = UInt64HashTable(right_size) + + right_pos = right_size - 1 + for left_pos in range(left_size - 1, -1, -1): + # restart right_pos if it went over in a previous iteration + if right_pos == right_size: + right_pos = right_size - 1 + + # find first position in right whose value is greater than left's + if allow_exact_matches: + while (right_pos >= 0 and + right_values[right_pos] >= left_values[left_pos]): + hash_table.set_item(right_by_values[right_pos], right_pos) + right_pos -= 1 + else: + while (right_pos >= 0 and + right_values[right_pos] > left_values[left_pos]): + hash_table.set_item(right_by_values[right_pos], right_pos) + right_pos -= 1 + right_pos += 1 + + # save positions as the desired index + by_value = left_by_values[left_pos] + found_right_pos = (hash_table.get_item(by_value) + if by_value in hash_table else -1) + left_indexer[left_pos] = left_pos + right_indexer[left_pos] = found_right_pos + + # if needed, verify that tolerance is met + if has_tolerance and found_right_pos != -1: + diff = right_values[found_right_pos] - left_values[left_pos] + if diff > tolerance_: + right_indexer[left_pos] = -1 + + return left_indexer, right_indexer + + +def asof_join_nearest_on_X_by_Y(numeric_t[:] left_values, + numeric_t[:] right_values, + by_t[:] left_by_values, + by_t[:] right_by_values, + bint allow_exact_matches=True, + tolerance=None): + + cdef: + Py_ssize_t left_size, right_size, i + ndarray[intp_t] left_indexer, right_indexer, bli, bri, fli, fri + numeric_t bdiff, fdiff + + left_size = len(left_values) + right_size = len(right_values) + + left_indexer = np.empty(left_size, dtype=np.intp) + right_indexer = np.empty(left_size, dtype=np.intp) + + # search both forward and backward + bli, bri = asof_join_backward_on_X_by_Y( + left_values, + right_values, + left_by_values, + right_by_values, + allow_exact_matches, + tolerance, + ) + fli, fri = asof_join_forward_on_X_by_Y( + left_values, + right_values, + left_by_values, + right_by_values, + allow_exact_matches, + tolerance, + ) + + for i in range(len(bri)): + # choose timestamp from right with smaller difference + if bri[i] != -1 and fri[i] != -1: + bdiff = left_values[bli[i]] - right_values[bri[i]] + fdiff = right_values[fri[i]] - left_values[fli[i]] + right_indexer[i] = bri[i] if bdiff <= fdiff else fri[i] + else: + right_indexer[i] = bri[i] if bri[i] != -1 else fri[i] + left_indexer[i] = bli[i] + + return left_indexer, right_indexer + + +# ---------------------------------------------------------------------- +# asof_join +# ---------------------------------------------------------------------- + +def asof_join_backward(numeric_t[:] left_values, + numeric_t[:] right_values, + bint allow_exact_matches=True, + tolerance=None): + + cdef: + Py_ssize_t left_pos, right_pos, left_size, right_size + ndarray[intp_t] left_indexer, right_indexer + bint has_tolerance = False + numeric_t tolerance_ = 0 + numeric_t diff = 0 + + # if we are using tolerance, set our objects + if tolerance is not None: + has_tolerance = True + tolerance_ = tolerance + + left_size = len(left_values) + right_size = len(right_values) + + left_indexer = np.empty(left_size, dtype=np.intp) + right_indexer = np.empty(left_size, dtype=np.intp) + + right_pos = 0 + for left_pos in range(left_size): + # restart right_pos if it went negative in a previous iteration + if right_pos < 0: + right_pos = 0 + + # find last position in right whose value is less than left's + if allow_exact_matches: + while (right_pos < right_size and + right_values[right_pos] <= left_values[left_pos]): + right_pos += 1 + else: + while (right_pos < right_size and + right_values[right_pos] < left_values[left_pos]): + right_pos += 1 + right_pos -= 1 + + # save positions as the desired index + left_indexer[left_pos] = left_pos + right_indexer[left_pos] = right_pos + + # if needed, verify that tolerance is met + if has_tolerance and right_pos != -1: + diff = left_values[left_pos] - right_values[right_pos] + if diff > tolerance_: + right_indexer[left_pos] = -1 + + return left_indexer, right_indexer + + +def asof_join_forward(numeric_t[:] left_values, + numeric_t[:] right_values, + bint allow_exact_matches=True, + tolerance=None): + + cdef: + Py_ssize_t left_pos, right_pos, left_size, right_size + ndarray[intp_t] left_indexer, right_indexer + bint has_tolerance = False + numeric_t tolerance_ = 0 + numeric_t diff = 0 + + # if we are using tolerance, set our objects + if tolerance is not None: + has_tolerance = True + tolerance_ = tolerance + + left_size = len(left_values) + right_size = len(right_values) + + left_indexer = np.empty(left_size, dtype=np.intp) + right_indexer = np.empty(left_size, dtype=np.intp) + + right_pos = right_size - 1 + for left_pos in range(left_size - 1, -1, -1): + # restart right_pos if it went over in a previous iteration + if right_pos == right_size: + right_pos = right_size - 1 + + # find first position in right whose value is greater than left's + if allow_exact_matches: + while (right_pos >= 0 and + right_values[right_pos] >= left_values[left_pos]): + right_pos -= 1 + else: + while (right_pos >= 0 and + right_values[right_pos] > left_values[left_pos]): + right_pos -= 1 + right_pos += 1 + + # save positions as the desired index + left_indexer[left_pos] = left_pos + right_indexer[left_pos] = (right_pos + if right_pos != right_size else -1) + + # if needed, verify that tolerance is met + if has_tolerance and right_pos != right_size: + diff = right_values[right_pos] - left_values[left_pos] + if diff > tolerance_: + right_indexer[left_pos] = -1 + + return left_indexer, right_indexer + + +def asof_join_nearest(numeric_t[:] left_values, + numeric_t[:] right_values, + bint allow_exact_matches=True, + tolerance=None): + + cdef: + Py_ssize_t left_size, i + ndarray[intp_t] left_indexer, right_indexer, bli, bri, fli, fri + numeric_t bdiff, fdiff + + left_size = len(left_values) + + left_indexer = np.empty(left_size, dtype=np.intp) + right_indexer = np.empty(left_size, dtype=np.intp) + + # search both forward and backward + bli, bri = asof_join_backward(left_values, right_values, + allow_exact_matches, tolerance) + fli, fri = asof_join_forward(left_values, right_values, + allow_exact_matches, tolerance) + + for i in range(len(bri)): + # choose timestamp from right with smaller difference + if bri[i] != -1 and fri[i] != -1: + bdiff = left_values[bli[i]] - right_values[bri[i]] + fdiff = right_values[fri[i]] - left_values[fli[i]] + right_indexer[i] = bri[i] if bdiff <= fdiff else fri[i] + else: + right_indexer[i] = bri[i] if bri[i] != -1 else fri[i] + left_indexer[i] = bli[i] + + return left_indexer, right_indexer diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/json.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/json.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..6fe7b15 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/json.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/khash.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/khash.pxd new file mode 100644 index 0000000..a9f819e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/khash.pxd @@ -0,0 +1,129 @@ +from cpython.object cimport PyObject +from numpy cimport ( + complex64_t, + complex128_t, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + + +cdef extern from "khash_python.h": + const int KHASH_TRACE_DOMAIN + + ctypedef uint32_t khuint_t + ctypedef khuint_t khiter_t + + ctypedef struct khcomplex128_t: + double real + double imag + + bint are_equivalent_khcomplex128_t \ + "kh_complex_hash_equal" (khcomplex128_t a, khcomplex128_t b) nogil + + ctypedef struct khcomplex64_t: + float real + float imag + + bint are_equivalent_khcomplex64_t \ + "kh_complex_hash_equal" (khcomplex64_t a, khcomplex64_t b) nogil + + bint are_equivalent_float64_t \ + "kh_floats_hash_equal" (float64_t a, float64_t b) nogil + + bint are_equivalent_float32_t \ + "kh_floats_hash_equal" (float32_t a, float32_t b) nogil + + uint32_t kh_python_hash_func(object key) + bint kh_python_hash_equal(object a, object b) + + ctypedef struct kh_pymap_t: + khuint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + PyObject **keys + size_t *vals + + kh_pymap_t* kh_init_pymap() + void kh_destroy_pymap(kh_pymap_t*) + void kh_clear_pymap(kh_pymap_t*) + khuint_t kh_get_pymap(kh_pymap_t*, PyObject*) + void kh_resize_pymap(kh_pymap_t*, khuint_t) + khuint_t kh_put_pymap(kh_pymap_t*, PyObject*, int*) + void kh_del_pymap(kh_pymap_t*, khuint_t) + + bint kh_exist_pymap(kh_pymap_t*, khiter_t) + + ctypedef struct kh_pyset_t: + khuint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + PyObject **keys + size_t *vals + + kh_pyset_t* kh_init_pyset() + void kh_destroy_pyset(kh_pyset_t*) + void kh_clear_pyset(kh_pyset_t*) + khuint_t kh_get_pyset(kh_pyset_t*, PyObject*) + void kh_resize_pyset(kh_pyset_t*, khuint_t) + khuint_t kh_put_pyset(kh_pyset_t*, PyObject*, int*) + void kh_del_pyset(kh_pyset_t*, khuint_t) + + bint kh_exist_pyset(kh_pyset_t*, khiter_t) + + ctypedef char* kh_cstr_t + + ctypedef struct kh_str_t: + khuint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + kh_cstr_t *keys + size_t *vals + + kh_str_t* kh_init_str() nogil + void kh_destroy_str(kh_str_t*) nogil + void kh_clear_str(kh_str_t*) nogil + khuint_t kh_get_str(kh_str_t*, kh_cstr_t) nogil + void kh_resize_str(kh_str_t*, khuint_t) nogil + khuint_t kh_put_str(kh_str_t*, kh_cstr_t, int*) nogil + void kh_del_str(kh_str_t*, khuint_t) nogil + + bint kh_exist_str(kh_str_t*, khiter_t) nogil + + ctypedef struct kh_str_starts_t: + kh_str_t *table + int starts[256] + + kh_str_starts_t* kh_init_str_starts() nogil + khuint_t kh_put_str_starts_item(kh_str_starts_t* table, char* key, + int* ret) nogil + khuint_t kh_get_str_starts_item(kh_str_starts_t* table, char* key) nogil + void kh_destroy_str_starts(kh_str_starts_t*) nogil + void kh_resize_str_starts(kh_str_starts_t*, khuint_t) nogil + + # sweep factorize + + ctypedef struct kh_strbox_t: + khuint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + kh_cstr_t *keys + PyObject **vals + + kh_strbox_t* kh_init_strbox() nogil + void kh_destroy_strbox(kh_strbox_t*) nogil + void kh_clear_strbox(kh_strbox_t*) nogil + khuint_t kh_get_strbox(kh_strbox_t*, kh_cstr_t) nogil + void kh_resize_strbox(kh_strbox_t*, khuint_t) nogil + khuint_t kh_put_strbox(kh_strbox_t*, kh_cstr_t, int*) nogil + void kh_del_strbox(kh_strbox_t*, khuint_t) nogil + + bint kh_exist_strbox(kh_strbox_t*, khiter_t) nogil + + khuint_t kh_needed_n_buckets(khuint_t element_n) nogil + + +include "khash_for_primitive_helper.pxi" diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/khash_for_primitive_helper.pxi.in b/.local/lib/python3.8/site-packages/pandas/_libs/khash_for_primitive_helper.pxi.in new file mode 100644 index 0000000..d0934b3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/khash_for_primitive_helper.pxi.in @@ -0,0 +1,44 @@ +""" +Template for wrapping khash-tables for each primitive `dtype` + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +{{py: + +# name, c_type +primitive_types = [('int64', 'int64_t'), + ('uint64', 'uint64_t'), + ('float64', 'float64_t'), + ('int32', 'int32_t'), + ('uint32', 'uint32_t'), + ('float32', 'float32_t'), + ('int16', 'int16_t'), + ('uint16', 'uint16_t'), + ('int8', 'int8_t'), + ('uint8', 'uint8_t'), + ('complex64', 'khcomplex64_t'), + ('complex128', 'khcomplex128_t'), + ] +}} + +{{for name, c_type in primitive_types}} + +cdef extern from "khash_python.h": + ctypedef struct kh_{{name}}_t: + khuint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + {{c_type}} *keys + size_t *vals + + kh_{{name}}_t* kh_init_{{name}}() nogil + void kh_destroy_{{name}}(kh_{{name}}_t*) nogil + void kh_clear_{{name}}(kh_{{name}}_t*) nogil + khuint_t kh_get_{{name}}(kh_{{name}}_t*, {{c_type}}) nogil + void kh_resize_{{name}}(kh_{{name}}_t*, khuint_t) nogil + khuint_t kh_put_{{name}}(kh_{{name}}_t*, {{c_type}}, int*) nogil + void kh_del_{{name}}(kh_{{name}}_t*, khuint_t) nogil + + bint kh_exist_{{name}}(kh_{{name}}_t*, khiter_t) nogil + +{{endfor}} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/lib.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/lib.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..edd119c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/lib.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/lib.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/lib.pxd new file mode 100644 index 0000000..46a339f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/lib.pxd @@ -0,0 +1,6 @@ +from numpy cimport ndarray + + +cdef bint c_is_list_like(object, bint) except -1 + +cpdef ndarray eq_NA_compat(ndarray[object] arr, object key) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/lib.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/lib.pyi new file mode 100644 index 0000000..a7ebd9d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/lib.pyi @@ -0,0 +1,231 @@ +# TODO(npdtypes): Many types specified here can be made more specific/accurate; +# the more specific versions are specified in comments + +from typing import ( + Any, + Callable, + Generator, + Hashable, + Literal, + overload, +) + +import numpy as np + +from pandas._typing import ( + ArrayLike, + DtypeObj, + npt, +) + +# placeholder until we can specify np.ndarray[object, ndim=2] +ndarray_obj_2d = np.ndarray + +from enum import Enum + +class NoDefault(Enum): ... + +no_default: NoDefault + +i8max: int +u8max: int + +def item_from_zerodim(val: object) -> object: ... +def infer_dtype(value: object, skipna: bool = ...) -> str: ... +def is_iterator(obj: object) -> bool: ... +def is_scalar(val: object) -> bool: ... +def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ... +def is_period(val: object) -> bool: ... +def is_interval(val: object) -> bool: ... +def is_decimal(val: object) -> bool: ... +def is_complex(val: object) -> bool: ... +def is_bool(val: object) -> bool: ... +def is_integer(val: object) -> bool: ... +def is_float(val: object) -> bool: ... +def is_interval_array(values: np.ndarray) -> bool: ... +def is_datetime64_array(values: np.ndarray) -> bool: ... +def is_timedelta_or_timedelta64_array(values: np.ndarray) -> bool: ... +def is_datetime_with_singletz_array(values: np.ndarray) -> bool: ... +def is_time_array(values: np.ndarray, skipna: bool = ...): ... +def is_date_array(values: np.ndarray, skipna: bool = ...): ... +def is_datetime_array(values: np.ndarray, skipna: bool = ...): ... +def is_string_array(values: np.ndarray, skipna: bool = ...): ... +def is_float_array(values: np.ndarray, skipna: bool = ...): ... +def is_integer_array(values: np.ndarray, skipna: bool = ...): ... +def is_bool_array(values: np.ndarray, skipna: bool = ...): ... +def fast_multiget(mapping: dict, keys: np.ndarray, default=...) -> np.ndarray: ... +def fast_unique_multiple_list_gen(gen: Generator, sort: bool = ...) -> list: ... +def fast_unique_multiple_list(lists: list, sort: bool | None = ...) -> list: ... +def fast_unique_multiple(arrays: list, sort: bool = ...) -> list: ... +def map_infer( + arr: np.ndarray, + f: Callable[[Any], Any], + convert: bool = ..., + ignore_na: bool = ..., +) -> np.ndarray: ... +@overload # both convert_datetime and convert_to_nullable_integer False -> np.ndarray +def maybe_convert_objects( + objects: npt.NDArray[np.object_], + *, + try_float: bool = ..., + safe: bool = ..., + convert_datetime: Literal[False] = ..., + convert_timedelta: bool = ..., + convert_period: Literal[False] = ..., + convert_interval: Literal[False] = ..., + convert_to_nullable_integer: Literal[False] = ..., + dtype_if_all_nat: DtypeObj | None = ..., +) -> np.ndarray: ... +@overload +def maybe_convert_objects( + objects: npt.NDArray[np.object_], + *, + try_float: bool = ..., + safe: bool = ..., + convert_datetime: bool = ..., + convert_timedelta: bool = ..., + convert_period: bool = ..., + convert_interval: bool = ..., + convert_to_nullable_integer: Literal[True] = ..., + dtype_if_all_nat: DtypeObj | None = ..., +) -> ArrayLike: ... +@overload +def maybe_convert_objects( + objects: npt.NDArray[np.object_], + *, + try_float: bool = ..., + safe: bool = ..., + convert_datetime: Literal[True] = ..., + convert_timedelta: bool = ..., + convert_period: bool = ..., + convert_interval: bool = ..., + convert_to_nullable_integer: bool = ..., + dtype_if_all_nat: DtypeObj | None = ..., +) -> ArrayLike: ... +@overload +def maybe_convert_objects( + objects: npt.NDArray[np.object_], + *, + try_float: bool = ..., + safe: bool = ..., + convert_datetime: bool = ..., + convert_timedelta: bool = ..., + convert_period: Literal[True] = ..., + convert_interval: bool = ..., + convert_to_nullable_integer: bool = ..., + dtype_if_all_nat: DtypeObj | None = ..., +) -> ArrayLike: ... +@overload +def maybe_convert_objects( + objects: npt.NDArray[np.object_], + *, + try_float: bool = ..., + safe: bool = ..., + convert_datetime: bool = ..., + convert_timedelta: bool = ..., + convert_period: bool = ..., + convert_interval: bool = ..., + convert_to_nullable_integer: bool = ..., + dtype_if_all_nat: DtypeObj | None = ..., +) -> ArrayLike: ... +@overload +def maybe_convert_numeric( + values: npt.NDArray[np.object_], + na_values: set, + convert_empty: bool = ..., + coerce_numeric: bool = ..., + convert_to_masked_nullable: Literal[False] = ..., +) -> tuple[np.ndarray, None]: ... +@overload +def maybe_convert_numeric( + values: npt.NDArray[np.object_], + na_values: set, + convert_empty: bool = ..., + coerce_numeric: bool = ..., + *, + convert_to_masked_nullable: Literal[True], +) -> tuple[np.ndarray, np.ndarray]: ... + +# TODO: restrict `arr`? +def ensure_string_array( + arr, + na_value: object = ..., + convert_na_value: bool = ..., + copy: bool = ..., + skipna: bool = ..., +) -> npt.NDArray[np.object_]: ... +def infer_datetimelike_array( + arr: npt.NDArray[np.object_], +) -> tuple[str, bool]: ... +def astype_intsafe( + arr: npt.NDArray[np.object_], + new_dtype: np.dtype, +) -> np.ndarray: ... +def fast_zip(ndarrays: list) -> npt.NDArray[np.object_]: ... + +# TODO: can we be more specific about rows? +def to_object_array_tuples(rows: object) -> ndarray_obj_2d: ... +def tuples_to_object_array( + tuples: npt.NDArray[np.object_], +) -> ndarray_obj_2d: ... + +# TODO: can we be more specific about rows? +def to_object_array(rows: object, min_width: int = ...) -> ndarray_obj_2d: ... +def dicts_to_array(dicts: list, columns: list) -> ndarray_obj_2d: ... +def maybe_booleans_to_slice( + mask: npt.NDArray[np.uint8], +) -> slice | npt.NDArray[np.uint8]: ... +def maybe_indices_to_slice( + indices: npt.NDArray[np.intp], + max_len: int, +) -> slice | npt.NDArray[np.intp]: ... +def is_all_arraylike(obj: list) -> bool: ... + +# ----------------------------------------------------------------- +# Functions which in reality take memoryviews + +def memory_usage_of_objects(arr: np.ndarray) -> int: ... # object[:] # np.int64 +def map_infer_mask( + arr: np.ndarray, + f: Callable[[Any], Any], + mask: np.ndarray, # const uint8_t[:] + convert: bool = ..., + na_value: Any = ..., + dtype: np.dtype = ..., +) -> np.ndarray: ... +def indices_fast( + index: npt.NDArray[np.intp], + labels: np.ndarray, # const int64_t[:] + keys: list, + sorted_labels: list[npt.NDArray[np.int64]], +) -> dict[Hashable, npt.NDArray[np.intp]]: ... +def generate_slices( + labels: np.ndarray, ngroups: int # const intp_t[:] +) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ... +def count_level_2d( + mask: np.ndarray, # ndarray[uint8_t, ndim=2, cast=True], + labels: np.ndarray, # const intp_t[:] + max_bin: int, + axis: int, +) -> np.ndarray: ... # np.ndarray[np.int64, ndim=2] +def get_level_sorter( + label: np.ndarray, # const int64_t[:] + starts: np.ndarray, # const intp_t[:] +) -> np.ndarray: ... # np.ndarray[np.intp, ndim=1] +def generate_bins_dt64( + values: npt.NDArray[np.int64], + binner: np.ndarray, # const int64_t[:] + closed: object = ..., + hasnans: bool = ..., +) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1] +def array_equivalent_object( + left: np.ndarray, # object[:] + right: np.ndarray, # object[:] +) -> bool: ... +def has_infs(arr: np.ndarray) -> bool: ... # const floating[:] +def get_reverse_indexer( + indexer: np.ndarray, # const intp_t[:] + length: int, +) -> npt.NDArray[np.intp]: ... +def is_bool_list(obj: list) -> bool: ... +def dtypes_all_equal(types: list[DtypeObj]) -> bool: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/lib.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/lib.pyx new file mode 100644 index 0000000..ee31781 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/lib.pyx @@ -0,0 +1,3093 @@ +from collections import abc +from decimal import Decimal +from enum import Enum +import warnings + +import cython +from cython import Py_ssize_t + +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDateTime_IMPORT, + PyDelta_Check, + PyTime_Check, +) +from cpython.iterator cimport PyIter_Check +from cpython.number cimport PyNumber_Check +from cpython.object cimport ( + Py_EQ, + PyObject_RichCompareBool, +) +from cpython.ref cimport Py_INCREF +from cpython.sequence cimport PySequence_Check +from cpython.tuple cimport ( + PyTuple_New, + PyTuple_SET_ITEM, +) +from cython cimport floating + +PyDateTime_IMPORT + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + NPY_OBJECT, + PyArray_Check, + PyArray_GETITEM, + PyArray_ITER_DATA, + PyArray_ITER_NEXT, + PyArray_IterNew, + complex128_t, + flatiter, + float32_t, + float64_t, + int64_t, + intp_t, + ndarray, + uint8_t, + uint64_t, +) + +cnp.import_array() + +cdef extern from "numpy/arrayobject.h": + # cython's numpy.dtype specification is incorrect, which leads to + # errors in issubclass(self.dtype.type, np.bool_), so we directly + # include the correct version + # https://github.com/cython/cython/issues/2022 + + ctypedef class numpy.dtype [object PyArray_Descr]: + # Use PyDataType_* macros when possible, however there are no macros + # for accessing some of the fields, so some are defined. Please + # ask on cython-dev if you need more. + cdef: + int type_num + int itemsize "elsize" + char byteorder + object fields + tuple names + +cdef extern from "numpy/ndarrayobject.h": + bint PyArray_CheckScalar(obj) nogil + + +cdef extern from "src/parse_helper.h": + int floatify(object, float64_t *result, int *maybe_int) except -1 + +from pandas._libs cimport util +from pandas._libs.util cimport ( + INT64_MAX, + INT64_MIN, + UINT64_MAX, + is_nan, +) + +from pandas._libs.tslib import array_to_datetime +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + OutOfBoundsTimedelta, +) +from pandas._libs.tslibs.period import Period + +from pandas._libs.missing cimport ( + C_NA, + checknull, + is_matching_na, + is_null_datetime64, + is_null_timedelta64, + isnaobj, +) +from pandas._libs.tslibs.conversion cimport convert_to_tsobject +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, + checknull_with_nat, +) +from pandas._libs.tslibs.offsets cimport is_offset_object +from pandas._libs.tslibs.period cimport is_period_object +from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64 +from pandas._libs.tslibs.timezones cimport tz_compare + +# constants that will be compared to potentially arbitrarily large +# python int +cdef: + object oINT64_MAX = INT64_MAX + object oINT64_MIN = INT64_MIN + object oUINT64_MAX = UINT64_MAX + + float64_t NaN = np.NaN + +# python-visible +i8max = INT64_MAX +u8max = UINT64_MAX + + +@cython.wraparound(False) +@cython.boundscheck(False) +def memory_usage_of_objects(arr: object[:]) -> int64_t: + """ + Return the memory usage of an object array in bytes. + + Does not include the actual bytes of the pointers + """ + i: Py_ssize_t + n: Py_ssize_t + size: int64_t + + size = 0 + n = len(arr) + for i in range(n): + size += arr[i].__sizeof__() + return size + + +# ---------------------------------------------------------------------- + + +def is_scalar(val: object) -> bool: + """ + Return True if given object is scalar. + + Parameters + ---------- + val : object + This includes: + + - numpy array scalar (e.g. np.int64) + - Python builtin numerics + - Python builtin byte arrays and strings + - None + - datetime.datetime + - datetime.timedelta + - Period + - decimal.Decimal + - Interval + - DateOffset + - Fraction + - Number. + + Returns + ------- + bool + Return True if given object is scalar. + + Examples + -------- + >>> import datetime + >>> dt = datetime.datetime(2018, 10, 3) + >>> pd.api.types.is_scalar(dt) + True + + >>> pd.api.types.is_scalar([2, 3]) + False + + >>> pd.api.types.is_scalar({0: 1, 2: 3}) + False + + >>> pd.api.types.is_scalar((0, 2)) + False + + pandas supports PEP 3141 numbers: + + >>> from fractions import Fraction + >>> pd.api.types.is_scalar(Fraction(3, 5)) + True + """ + + # Start with C-optimized checks + if (cnp.PyArray_IsAnyScalar(val) + # PyArray_IsAnyScalar is always False for bytearrays on Py3 + or PyDate_Check(val) + or PyDelta_Check(val) + or PyTime_Check(val) + # We differ from numpy, which claims that None is not scalar; + # see np.isscalar + or val is C_NA + or val is None): + return True + + # Next use C-optimized checks to exclude common non-scalars before falling + # back to non-optimized checks. + if PySequence_Check(val): + # e.g. list, tuple + # includes np.ndarray, Series which PyNumber_Check can return True for + return False + + # Note: PyNumber_Check check includes Decimal, Fraction, numbers.Number + return (PyNumber_Check(val) + or is_period_object(val) + or is_interval(val) + or is_offset_object(val)) + + +cdef inline int64_t get_itemsize(object val): + """ + Get the itemsize of a NumPy scalar, -1 if not a NumPy scalar. + + Parameters + ---------- + val : object + + Returns + ------- + is_ndarray : bool + """ + if PyArray_CheckScalar(val): + return cnp.PyArray_DescrFromScalar(val).itemsize + else: + return -1 + + +def is_iterator(obj: object) -> bool: + """ + Check if the object is an iterator. + + This is intended for generators, not list-like objects. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_iter : bool + Whether `obj` is an iterator. + + Examples + -------- + >>> import datetime + >>> is_iterator((x for x in [])) + True + >>> is_iterator([1, 2, 3]) + False + >>> is_iterator(datetime.datetime(2017, 1, 1)) + False + >>> is_iterator("foo") + False + >>> is_iterator(1) + False + """ + return PyIter_Check(obj) + + +def item_from_zerodim(val: object) -> object: + """ + If the value is a zerodim array, return the item it contains. + + Parameters + ---------- + val : object + + Returns + ------- + object + + Examples + -------- + >>> item_from_zerodim(1) + 1 + >>> item_from_zerodim('foobar') + 'foobar' + >>> item_from_zerodim(np.array(1)) + 1 + >>> item_from_zerodim(np.array([1])) + array([1]) + """ + if cnp.PyArray_IsZeroDim(val): + return cnp.PyArray_ToScalar(cnp.PyArray_DATA(val), val) + return val + + +@cython.wraparound(False) +@cython.boundscheck(False) +def fast_unique_multiple(list arrays, sort: bool = True): + """ + Generate a list of unique values from a list of arrays. + + Parameters + ---------- + list : array-like + List of array-like objects. + sort : bool + Whether or not to sort the resulting unique list. + + Returns + ------- + list of unique values + """ + cdef: + ndarray[object] buf + Py_ssize_t k = len(arrays) + Py_ssize_t i, j, n + list uniques = [] + dict table = {} + object val, stub = 0 + + for i in range(k): + buf = arrays[i] + n = len(buf) + for j in range(n): + val = buf[j] + if val not in table: + table[val] = stub + uniques.append(val) + + if sort is None: + try: + uniques.sort() + except TypeError: + warnings.warn( + "The values in the array are unorderable. " + "Pass `sort=False` to suppress this warning.", + RuntimeWarning, + ) + pass + + return uniques + + +@cython.wraparound(False) +@cython.boundscheck(False) +def fast_unique_multiple_list(lists: list, sort: bool | None = True) -> list: + cdef: + list buf + Py_ssize_t k = len(lists) + Py_ssize_t i, j, n + list uniques = [] + dict table = {} + object val, stub = 0 + + for i in range(k): + buf = lists[i] + n = len(buf) + for j in range(n): + val = buf[j] + if val not in table: + table[val] = stub + uniques.append(val) + if sort: + try: + uniques.sort() + except TypeError: + pass + + return uniques + + +@cython.wraparound(False) +@cython.boundscheck(False) +def fast_unique_multiple_list_gen(object gen, bint sort=True) -> list: + """ + Generate a list of unique values from a generator of lists. + + Parameters + ---------- + gen : generator object + Generator of lists from which the unique list is created. + sort : bool + Whether or not to sort the resulting unique list. + + Returns + ------- + list of unique values + """ + cdef: + list buf + Py_ssize_t j, n + list uniques = [] + dict table = {} + object val, stub = 0 + + for buf in gen: + n = len(buf) + for j in range(n): + val = buf[j] + if val not in table: + table[val] = stub + uniques.append(val) + if sort: + try: + uniques.sort() + except TypeError: + pass + + return uniques + + +@cython.wraparound(False) +@cython.boundscheck(False) +def dicts_to_array(dicts: list, columns: list): + cdef: + Py_ssize_t i, j, k, n + ndarray[object, ndim=2] result + dict row + object col, onan = np.nan + + k = len(columns) + n = len(dicts) + + result = np.empty((n, k), dtype='O') + + for i in range(n): + row = dicts[i] + for j in range(k): + col = columns[j] + if col in row: + result[i, j] = row[col] + else: + result[i, j] = onan + + return result + + +def fast_zip(list ndarrays) -> ndarray[object]: + """ + For zipping multiple ndarrays into an ndarray of tuples. + """ + cdef: + Py_ssize_t i, j, k, n + ndarray[object, ndim=1] result + flatiter it + object val, tup + + k = len(ndarrays) + n = len(ndarrays[0]) + + result = np.empty(n, dtype=object) + + # initialize tuples on first pass + arr = ndarrays[0] + it = PyArray_IterNew(arr) + for i in range(n): + val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it)) + tup = PyTuple_New(k) + + PyTuple_SET_ITEM(tup, 0, val) + Py_INCREF(val) + result[i] = tup + PyArray_ITER_NEXT(it) + + for j in range(1, k): + arr = ndarrays[j] + it = PyArray_IterNew(arr) + if len(arr) != n: + raise ValueError("all arrays must be same length") + + for i in range(n): + val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it)) + PyTuple_SET_ITEM(result[i], j, val) + Py_INCREF(val) + PyArray_ITER_NEXT(it) + + return result + + +def get_reverse_indexer(const intp_t[:] indexer, Py_ssize_t length) -> ndarray: + """ + Reverse indexing operation. + + Given `indexer`, make `indexer_inv` of it, such that:: + + indexer_inv[indexer[x]] = x + + Parameters + ---------- + indexer : np.ndarray[np.intp] + length : int + + Returns + ------- + np.ndarray[np.intp] + + Notes + ----- + If indexer is not unique, only first occurrence is accounted. + """ + cdef: + Py_ssize_t i, n = len(indexer) + ndarray[intp_t, ndim=1] rev_indexer + intp_t idx + + rev_indexer = np.empty(length, dtype=np.intp) + rev_indexer[:] = -1 + for i in range(n): + idx = indexer[i] + if idx != -1: + rev_indexer[idx] = i + + return rev_indexer + + +@cython.wraparound(False) +@cython.boundscheck(False) +# Can add const once https://github.com/cython/cython/issues/1772 resolved +def has_infs(floating[:] arr) -> bool: + cdef: + Py_ssize_t i, n = len(arr) + floating inf, neginf, val + bint ret = False + + inf = np.inf + neginf = -inf + with nogil: + for i in range(n): + val = arr[i] + if val == inf or val == neginf: + ret = True + break + return ret + + +def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, int max_len): + cdef: + Py_ssize_t i, n = len(indices) + intp_t k, vstart, vlast, v + + if n == 0: + return slice(0, 0) + + vstart = indices[0] + if vstart < 0 or max_len <= vstart: + return indices + + if n == 1: + return slice(vstart, (vstart + 1)) + + vlast = indices[n - 1] + if vlast < 0 or max_len <= vlast: + return indices + + k = indices[1] - indices[0] + if k == 0: + return indices + else: + for i in range(2, n): + v = indices[i] + if v - indices[i - 1] != k: + return indices + + if k > 0: + return slice(vstart, (vlast + 1), k) + else: + if vlast == 0: + return slice(vstart, None, k) + else: + return slice(vstart, (vlast - 1), k) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def maybe_booleans_to_slice(ndarray[uint8_t, ndim=1] mask): + cdef: + Py_ssize_t i, n = len(mask) + Py_ssize_t start = 0, end = 0 + bint started = False, finished = False + + for i in range(n): + if mask[i]: + if finished: + return mask.view(np.bool_) + if not started: + started = True + start = i + else: + if finished: + continue + + if started: + end = i + finished = True + + if not started: + return slice(0, 0) + if not finished: + return slice(start, None) + else: + return slice(start, end) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def array_equivalent_object(left: object[:], right: object[:]) -> bool: + """ + Perform an element by element comparison on 1-d object arrays + taking into account nan positions. + """ + cdef: + Py_ssize_t i, n = left.shape[0] + object x, y + + for i in range(n): + x = left[i] + y = right[i] + + # we are either not equal or both nan + # I think None == None will be true here + try: + if PyArray_Check(x) and PyArray_Check(y): + if not array_equivalent_object(x, y): + return False + elif (x is C_NA) ^ (y is C_NA): + return False + elif not ( + PyObject_RichCompareBool(x, y, Py_EQ) + or is_matching_na(x, y, nan_matches_none=True) + ): + return False + except ValueError: + # Avoid raising ValueError when comparing Numpy arrays to other types + if cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y): + # Only compare scalars to scalars and non-scalars to non-scalars + return False + elif (not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y)) + and not (isinstance(x, type(y)) or isinstance(y, type(x)))): + # Check if non-scalars have the same type + return False + raise + return True + + +@cython.wraparound(False) +@cython.boundscheck(False) +def astype_intsafe(ndarray[object] arr, cnp.dtype new_dtype) -> ndarray: + cdef: + Py_ssize_t i, n = len(arr) + object val + bint is_datelike + ndarray result + + is_datelike = new_dtype == 'm8[ns]' + result = np.empty(n, dtype=new_dtype) + for i in range(n): + val = arr[i] + if is_datelike and checknull(val): + result[i] = NPY_NAT + else: + result[i] = val + + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef ndarray[object] ensure_string_array( + arr, + object na_value=np.nan, + bint convert_na_value=True, + bint copy=True, + bint skipna=True, +): + """ + Returns a new numpy array with object dtype and only strings and na values. + + Parameters + ---------- + arr : array-like + The values to be converted to str, if needed. + na_value : Any, default np.nan + The value to use for na. For example, np.nan or pd.NA. + convert_na_value : bool, default True + If False, existing na values will be used unchanged in the new array. + copy : bool, default True + Whether to ensure that a new array is returned. + skipna : bool, default True + Whether or not to coerce nulls to their stringified form + (e.g. if False, NaN becomes 'nan'). + + Returns + ------- + np.ndarray[object] + An array with the input array's elements casted to str or nan-like. + """ + cdef: + Py_ssize_t i = 0, n = len(arr) + + if hasattr(arr, "to_numpy"): + + if hasattr(arr, "dtype") and arr.dtype.kind in ["m", "M"]: + # dtype check to exclude DataFrame + # GH#41409 TODO: not a great place for this + out = arr.astype(str).astype(object) + out[arr.isna()] = na_value + return out + + arr = arr.to_numpy() + elif not util.is_array(arr): + arr = np.array(arr, dtype="object") + + result = np.asarray(arr, dtype="object") + + if copy and result is arr: + result = result.copy() + + for i in range(n): + val = arr[i] + + if isinstance(val, str): + continue + + if not checknull(val): + if not util.is_float_object(val): + # f"{val}" is faster than str(val) + result[i] = f"{val}" + else: + # f"{val}" is not always equivalent to str(val) for floats + result[i] = str(val) + else: + if convert_na_value: + val = na_value + if skipna: + result[i] = val + else: + result[i] = f"{val}" + + return result + + +def is_all_arraylike(obj: list) -> bool: + """ + Should we treat these as levels of a MultiIndex, as opposed to Index items? + """ + cdef: + Py_ssize_t i, n = len(obj) + object val + bint all_arrays = True + + for i in range(n): + val = obj[i] + if not (isinstance(val, list) or + util.is_array(val) or hasattr(val, '_data')): + # TODO: EA? + # exclude tuples, frozensets as they may be contained in an Index + all_arrays = False + break + + return all_arrays + + +# ------------------------------------------------------------------------------ +# Groupby-related functions + +# TODO: could do even better if we know something about the data. eg, index has +# 1-min data, binner has 5-min data, then bins are just strides in index. This +# is a general, O(max(len(values), len(binner))) method. +@cython.boundscheck(False) +@cython.wraparound(False) +def generate_bins_dt64(ndarray[int64_t, ndim=1] values, const int64_t[:] binner, + object closed='left', bint hasnans=False): + """ + Int64 (datetime64) version of generic python version in ``groupby.py``. + """ + cdef: + Py_ssize_t lenidx, lenbin, i, j, bc, vc + ndarray[int64_t, ndim=1] bins + int64_t l_bin, r_bin, nat_count + bint right_closed = closed == 'right' + + nat_count = 0 + if hasnans: + mask = values == NPY_NAT + nat_count = np.sum(mask) + values = values[~mask] + + lenidx = len(values) + lenbin = len(binner) + + if lenidx <= 0 or lenbin <= 0: + raise ValueError("Invalid length for values or for binner") + + # check binner fits data + if values[0] < binner[0]: + raise ValueError("Values falls before first bin") + + if values[lenidx - 1] > binner[lenbin - 1]: + raise ValueError("Values falls after last bin") + + bins = np.empty(lenbin - 1, dtype=np.int64) + + j = 0 # index into values + bc = 0 # bin count + + # linear scan + if right_closed: + for i in range(0, lenbin - 1): + r_bin = binner[i + 1] + # count values in current bin, advance to next bin + while j < lenidx and values[j] <= r_bin: + j += 1 + bins[bc] = j + bc += 1 + else: + for i in range(0, lenbin - 1): + r_bin = binner[i + 1] + # count values in current bin, advance to next bin + while j < lenidx and values[j] < r_bin: + j += 1 + bins[bc] = j + bc += 1 + + if nat_count > 0: + # shift bins by the number of NaT + bins = bins + nat_count + bins = np.insert(bins, 0, nat_count) + + return bins + + +@cython.boundscheck(False) +@cython.wraparound(False) +def get_level_sorter( + ndarray[int64_t, ndim=1] codes, const intp_t[:] starts +) -> ndarray: + """ + Argsort for a single level of a multi-index, keeping the order of higher + levels unchanged. `starts` points to starts of same-key indices w.r.t + to leading levels; equivalent to: + np.hstack([codes[starts[i]:starts[i+1]].argsort(kind='mergesort') + + starts[i] for i in range(len(starts) - 1)]) + + Parameters + ---------- + codes : np.ndarray[int64_t, ndim=1] + starts : np.ndarray[intp, ndim=1] + + Returns + ------- + np.ndarray[np.int, ndim=1] + """ + cdef: + Py_ssize_t i, l, r + ndarray[intp_t, ndim=1] out = np.empty(len(codes), dtype=np.intp) + + for i in range(len(starts) - 1): + l, r = starts[i], starts[i + 1] + out[l:r] = l + codes[l:r].argsort(kind='mergesort') + + return out + + +@cython.boundscheck(False) +@cython.wraparound(False) +def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, + const intp_t[:] labels, + Py_ssize_t max_bin, + int axis): + cdef: + Py_ssize_t i, j, k, n + ndarray[int64_t, ndim=2] counts + + assert (axis == 0 or axis == 1) + n, k = (mask).shape + + if axis == 0: + counts = np.zeros((max_bin, k), dtype='i8') + with nogil: + for i in range(n): + for j in range(k): + if mask[i, j]: + counts[labels[i], j] += 1 + + else: # axis == 1 + counts = np.zeros((n, max_bin), dtype='i8') + with nogil: + for i in range(n): + for j in range(k): + if mask[i, j]: + counts[i, labels[j]] += 1 + + return counts + + +@cython.wraparound(False) +@cython.boundscheck(False) +def generate_slices(const intp_t[:] labels, Py_ssize_t ngroups): + cdef: + Py_ssize_t i, group_size, n, start + intp_t lab + int64_t[::1] starts, ends + + n = len(labels) + + starts = np.zeros(ngroups, dtype=np.int64) + ends = np.zeros(ngroups, dtype=np.int64) + + start = 0 + group_size = 0 + with nogil: + for i in range(n): + lab = labels[i] + if lab < 0: + start += 1 + else: + group_size += 1 + if i == n - 1 or lab != labels[i + 1]: + starts[lab] = start + ends[lab] = start + group_size + start += group_size + group_size = 0 + + return np.asarray(starts), np.asarray(ends) + + +def indices_fast(ndarray[intp_t, ndim=1] index, const int64_t[:] labels, list keys, + list sorted_labels) -> dict: + """ + Parameters + ---------- + index : ndarray[intp] + labels : ndarray[int64] + keys : list + sorted_labels : list[ndarray[int64]] + """ + cdef: + Py_ssize_t i, j, k, lab, cur, start, n = len(labels) + dict result = {} + object tup + + k = len(keys) + + # Start at the first non-null entry + j = 0 + for j in range(0, n): + if labels[j] != -1: + break + else: + return result + cur = labels[j] + start = j + + for i in range(j+1, n): + lab = labels[i] + + if lab != cur: + if lab != -1: + if k == 1: + # When k = 1 we do not want to return a tuple as key + tup = keys[0][sorted_labels[0][i - 1]] + else: + tup = PyTuple_New(k) + for j in range(k): + val = keys[j][sorted_labels[j][i - 1]] + PyTuple_SET_ITEM(tup, j, val) + Py_INCREF(val) + result[tup] = index[start:i] + start = i + cur = lab + + if k == 1: + # When k = 1 we do not want to return a tuple as key + tup = keys[0][sorted_labels[0][n - 1]] + else: + tup = PyTuple_New(k) + for j in range(k): + val = keys[j][sorted_labels[j][n - 1]] + PyTuple_SET_ITEM(tup, j, val) + Py_INCREF(val) + result[tup] = index[start:] + + return result + + +# core.common import for fast inference checks + +def is_float(obj: object) -> bool: + """ + Return True if given object is float. + + Returns + ------- + bool + """ + return util.is_float_object(obj) + + +def is_integer(obj: object) -> bool: + """ + Return True if given object is integer. + + Returns + ------- + bool + """ + return util.is_integer_object(obj) + + +def is_bool(obj: object) -> bool: + """ + Return True if given object is boolean. + + Returns + ------- + bool + """ + return util.is_bool_object(obj) + + +def is_complex(obj: object) -> bool: + """ + Return True if given object is complex. + + Returns + ------- + bool + """ + return util.is_complex_object(obj) + + +cpdef bint is_decimal(object obj): + return isinstance(obj, Decimal) + + +cpdef bint is_interval(object obj): + return getattr(obj, '_typ', '_typ') == 'interval' + + +def is_period(val: object) -> bool: + """ + Return True if given object is Period. + + Returns + ------- + bool + """ + return is_period_object(val) + + +def is_list_like(obj: object, allow_sets: bool = True) -> bool: + """ + Check if the object is list-like. + + Objects that are considered list-like are for example Python + lists, tuples, sets, NumPy arrays, and Pandas Series. + + Strings and datetime objects, however, are not considered list-like. + + Parameters + ---------- + obj : object + Object to check. + allow_sets : bool, default True + If this parameter is False, sets will not be considered list-like. + + Returns + ------- + bool + Whether `obj` has list-like properties. + + Examples + -------- + >>> import datetime + >>> is_list_like([1, 2, 3]) + True + >>> is_list_like({1, 2, 3}) + True + >>> is_list_like(datetime.datetime(2017, 1, 1)) + False + >>> is_list_like("foo") + False + >>> is_list_like(1) + False + >>> is_list_like(np.array([2])) + True + >>> is_list_like(np.array(2)) + False + """ + return c_is_list_like(obj, allow_sets) + + +cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: + # first, performance short-cuts for the most common cases + if util.is_array(obj): + # exclude zero-dimensional numpy arrays, effectively scalars + return not cnp.PyArray_IsZeroDim(obj) + elif isinstance(obj, list): + return True + # then the generic implementation + return ( + # equiv: `isinstance(obj, abc.Iterable)` + getattr(obj, "__iter__", None) is not None and not isinstance(obj, type) + # we do not count strings/unicode/bytes as list-like + and not isinstance(obj, (str, bytes)) + # exclude zero-dimensional duck-arrays, effectively scalars + and not (hasattr(obj, "ndim") and obj.ndim == 0) + # exclude sets if allow_sets is False + and not (allow_sets is False and isinstance(obj, abc.Set)) + ) + + +_TYPE_MAP = { + "categorical": "categorical", + "category": "categorical", + "int8": "integer", + "int16": "integer", + "int32": "integer", + "int64": "integer", + "i": "integer", + "uint8": "integer", + "uint16": "integer", + "uint32": "integer", + "uint64": "integer", + "u": "integer", + "float32": "floating", + "float64": "floating", + "f": "floating", + "complex64": "complex", + "complex128": "complex", + "c": "complex", + "string": "string", + str: "string", + "S": "bytes", + "U": "string", + "bool": "boolean", + "b": "boolean", + "datetime64[ns]": "datetime64", + "M": "datetime64", + "timedelta64[ns]": "timedelta64", + "m": "timedelta64", + "interval": "interval", + Period: "period", +} + +# types only exist on certain platform +try: + np.float128 + _TYPE_MAP['float128'] = 'floating' +except AttributeError: + pass +try: + np.complex256 + _TYPE_MAP['complex256'] = 'complex' +except AttributeError: + pass +try: + np.float16 + _TYPE_MAP['float16'] = 'floating' +except AttributeError: + pass + + +@cython.internal +cdef class Seen: + """ + Class for keeping track of the types of elements + encountered when trying to perform type conversions. + """ + + cdef: + bint int_ # seen_int + bint nat_ # seen nat + bint bool_ # seen_bool + bint null_ # seen_null + bint nan_ # seen_np.nan + bint uint_ # seen_uint (unsigned integer) + bint sint_ # seen_sint (signed integer) + bint float_ # seen_float + bint object_ # seen_object + bint complex_ # seen_complex + bint datetime_ # seen_datetime + bint coerce_numeric # coerce data to numeric + bint timedelta_ # seen_timedelta + bint datetimetz_ # seen_datetimetz + bint period_ # seen_period + bint interval_ # seen_interval + + def __cinit__(self, bint coerce_numeric=False): + """ + Initialize a Seen instance. + + Parameters + ---------- + coerce_numeric : bool, default False + Whether or not to force conversion to a numeric data type if + initial methods to convert to numeric fail. + """ + self.int_ = False + self.nat_ = False + self.bool_ = False + self.null_ = False + self.nan_ = False + self.uint_ = False + self.sint_ = False + self.float_ = False + self.object_ = False + self.complex_ = False + self.datetime_ = False + self.timedelta_ = False + self.datetimetz_ = False + self.period_ = False + self.interval_ = False + self.coerce_numeric = coerce_numeric + + cdef inline bint check_uint64_conflict(self) except -1: + """ + Check whether we can safely convert a uint64 array to a numeric dtype. + + There are two cases when conversion to numeric dtype with a uint64 + array is not safe (and will therefore not be performed) + + 1) A NaN element is encountered. + + uint64 cannot be safely cast to float64 due to truncation issues + at the extreme ends of the range. + + 2) A negative number is encountered. + + There is no numerical dtype that can hold both negative numbers + and numbers greater than INT64_MAX. Hence, at least one number + will be improperly cast if we convert to a numeric dtype. + + Returns + ------- + bool + Whether or not we should return the original input array to avoid + data truncation. + + Raises + ------ + ValueError + uint64 elements were detected, and at least one of the + two conflict cases was also detected. However, we are + trying to force conversion to a numeric dtype. + """ + return (self.uint_ and (self.null_ or self.sint_) + and not self.coerce_numeric) + + cdef inline saw_null(self): + """ + Set flags indicating that a null value was encountered. + """ + self.null_ = True + self.float_ = True + + cdef saw_int(self, object val): + """ + Set flags indicating that an integer value was encountered. + + In addition to setting a flag that an integer was seen, we + also set two flags depending on the type of integer seen: + + 1) sint_ : a negative (signed) number in the + range of [-2**63, 0) was encountered + 2) uint_ : a positive number in the range of + [2**63, 2**64) was encountered + + Parameters + ---------- + val : Python int + Value with which to set the flags. + """ + self.int_ = True + self.sint_ = self.sint_ or (oINT64_MIN <= val < 0) + self.uint_ = self.uint_ or (oINT64_MAX < val <= oUINT64_MAX) + + @property + def numeric_(self): + return self.complex_ or self.float_ or self.int_ + + @property + def is_bool(self): + return not (self.datetime_ or self.numeric_ or self.timedelta_ + or self.nat_) + + @property + def is_float_or_complex(self): + return not (self.bool_ or self.datetime_ or self.timedelta_ + or self.nat_) + + +cdef object _try_infer_map(object dtype): + """ + If its in our map, just return the dtype. + """ + cdef: + object val + str attr + for attr in ["name", "kind", "base", "type"]: + val = getattr(dtype, attr, None) + if val in _TYPE_MAP: + return _TYPE_MAP[val] + return None + + +def infer_dtype(value: object, skipna: bool = True) -> str: + """ + Efficiently infer the type of a passed val, or list-like + array of values. Return a string describing the type. + + Parameters + ---------- + value : scalar, list, ndarray, or pandas type + skipna : bool, default True + Ignore NaN values when inferring the type. + + Returns + ------- + str + Describing the common type of the input data. + Results can include: + + - string + - bytes + - floating + - integer + - mixed-integer + - mixed-integer-float + - decimal + - complex + - categorical + - boolean + - datetime64 + - datetime + - date + - timedelta64 + - timedelta + - time + - period + - mixed + - unknown-array + + Raises + ------ + TypeError + If ndarray-like but cannot infer the dtype + + Notes + ----- + - 'mixed' is the catchall for anything that is not otherwise + specialized + - 'mixed-integer-float' are floats and integers + - 'mixed-integer' are integers mixed with non-integers + - 'unknown-array' is the catchall for something that *is* an array (has + a dtype attribute), but has a dtype unknown to pandas (e.g. external + extension array) + + Examples + -------- + >>> import datetime + >>> infer_dtype(['foo', 'bar']) + 'string' + + >>> infer_dtype(['a', np.nan, 'b'], skipna=True) + 'string' + + >>> infer_dtype(['a', np.nan, 'b'], skipna=False) + 'mixed' + + >>> infer_dtype([b'foo', b'bar']) + 'bytes' + + >>> infer_dtype([1, 2, 3]) + 'integer' + + >>> infer_dtype([1, 2, 3.5]) + 'mixed-integer-float' + + >>> infer_dtype([1.0, 2.0, 3.5]) + 'floating' + + >>> infer_dtype(['a', 1]) + 'mixed-integer' + + >>> infer_dtype([Decimal(1), Decimal(2.0)]) + 'decimal' + + >>> infer_dtype([True, False]) + 'boolean' + + >>> infer_dtype([True, False, np.nan]) + 'boolean' + + >>> infer_dtype([pd.Timestamp('20130101')]) + 'datetime' + + >>> infer_dtype([datetime.date(2013, 1, 1)]) + 'date' + + >>> infer_dtype([np.datetime64('2013-01-01')]) + 'datetime64' + + >>> infer_dtype([datetime.timedelta(0, 1, 1)]) + 'timedelta' + + >>> infer_dtype(pd.Series(list('aabc')).astype('category')) + 'categorical' + """ + cdef: + Py_ssize_t i, n + object val + ndarray values + bint seen_pdnat = False + bint seen_val = False + + if util.is_array(value): + values = value + elif hasattr(value, "inferred_type") and skipna is False: + # Index, use the cached attribute if possible, populate the cache otherwise + return value.inferred_type + elif hasattr(value, "dtype"): + # this will handle ndarray-like + # e.g. categoricals + dtype = value.dtype + if not cnp.PyArray_DescrCheck(dtype): + # i.e. not isinstance(dtype, np.dtype) + inferred = _try_infer_map(value.dtype) + if inferred is not None: + return inferred + return "unknown-array" + + # Unwrap Series/Index + values = np.asarray(value) + + else: + if not isinstance(value, list): + value = list(value) + + from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + values = construct_1d_object_array_from_listlike(value) + + val = _try_infer_map(values.dtype) + if val is not None: + # Anything other than object-dtype should return here. + return val + + if values.descr.type_num != NPY_OBJECT: + # i.e. values.dtype != np.object + # This should not be reached + values = values.astype(object) + + # for f-contiguous array 1000 x 1000, passing order="K" gives 5000x speedup + values = values.ravel(order="K") + + if skipna: + values = values[~isnaobj(values)] + + n = cnp.PyArray_SIZE(values) + if n == 0: + return "empty" + + # Iterate until we find our first valid value. We will use this + # value to decide which of the is_foo_array functions to call. + for i in range(n): + val = values[i] + + # do not use checknull to keep + # np.datetime64('nat') and np.timedelta64('nat') + if val is None or util.is_nan(val): + pass + elif val is NaT: + seen_pdnat = True + else: + seen_val = True + break + + # if all values are nan/NaT + if seen_val is False and seen_pdnat is True: + return "datetime" + # float/object nan is handled in latter logic + + if util.is_datetime64_object(val): + if is_datetime64_array(values): + return "datetime64" + + elif is_timedelta(val): + if is_timedelta_or_timedelta64_array(values): + return "timedelta" + + elif util.is_integer_object(val): + # ordering matters here; this check must come after the is_timedelta + # check otherwise numpy timedelta64 objects would come through here + + if is_integer_array(values): + return "integer" + elif is_integer_float_array(values): + if is_integer_na_array(values): + return "integer-na" + else: + return "mixed-integer-float" + return "mixed-integer" + + elif PyDateTime_Check(val): + if is_datetime_array(values, skipna=skipna): + return "datetime" + elif is_date_array(values, skipna=skipna): + return "date" + + elif PyDate_Check(val): + if is_date_array(values, skipna=skipna): + return "date" + + elif PyTime_Check(val): + if is_time_array(values, skipna=skipna): + return "time" + + elif is_decimal(val): + if is_decimal_array(values): + return "decimal" + + elif util.is_complex_object(val): + if is_complex_array(values): + return "complex" + + elif util.is_float_object(val): + if is_float_array(values): + return "floating" + elif is_integer_float_array(values): + if is_integer_na_array(values): + return "integer-na" + else: + return "mixed-integer-float" + + elif util.is_bool_object(val): + if is_bool_array(values, skipna=skipna): + return "boolean" + + elif isinstance(val, str): + if is_string_array(values, skipna=skipna): + return "string" + + elif isinstance(val, bytes): + if is_bytes_array(values, skipna=skipna): + return "bytes" + + elif is_period_object(val): + if is_period_array(values): + return "period" + + elif is_interval(val): + if is_interval_array(values): + return "interval" + + for i in range(n): + val = values[i] + if util.is_integer_object(val): + return "mixed-integer" + + return "mixed" + + +def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]: + """ + Infer if we have a datetime or timedelta array. + - date: we have *only* date and maybe strings, nulls + - datetime: we have *only* datetimes and maybe strings, nulls + - timedelta: we have *only* timedeltas and maybe strings, nulls + - nat: we do not have *any* date, datetimes or timedeltas, but do have + at least a NaT + - mixed: other objects (strings, a mix of tz-aware and tz-naive, or + actual objects) + + Parameters + ---------- + arr : ndarray[object] + + Returns + ------- + str: {datetime, timedelta, date, nat, mixed} + bool + """ + cdef: + Py_ssize_t i, n = len(arr) + bint seen_timedelta = False, seen_date = False, seen_datetime = False + bint seen_tz_aware = False, seen_tz_naive = False + bint seen_nat = False, seen_str = False + bint seen_period = False, seen_interval = False + list objs = [] + object v + + for i in range(n): + v = arr[i] + if isinstance(v, str): + objs.append(v) + seen_str = True + + if len(objs) == 3: + break + + elif v is None or util.is_nan(v): + # nan or None + pass + elif v is NaT: + seen_nat = True + elif PyDateTime_Check(v): + # datetime + seen_datetime = True + + # disambiguate between tz-naive and tz-aware + if v.tzinfo is None: + seen_tz_naive = True + else: + seen_tz_aware = True + + if seen_tz_naive and seen_tz_aware: + return "mixed", seen_str + elif util.is_datetime64_object(v): + # np.datetime64 + seen_datetime = True + elif PyDate_Check(v): + seen_date = True + elif is_timedelta(v): + # timedelta, or timedelta64 + seen_timedelta = True + elif is_period_object(v): + seen_period = True + break + elif is_interval(v): + seen_interval = True + break + else: + return "mixed", seen_str + + if seen_period: + if is_period_array(arr): + return "period", seen_str + return "mixed", seen_str + + if seen_interval: + if is_interval_array(arr): + return "interval", seen_str + return "mixed", seen_str + + if seen_date and not (seen_datetime or seen_timedelta): + return "date", seen_str + elif seen_datetime and not seen_timedelta: + return "datetime", seen_str + elif seen_timedelta and not seen_datetime: + return "timedelta", seen_str + elif seen_nat: + return "nat", seen_str + + # short-circuit by trying to + # actually convert these strings + # this is for performance as we don't need to try + # convert *every* string array + if len(objs): + try: + # require_iso8601 as in maybe_infer_to_datetimelike + array_to_datetime(objs, errors="raise", require_iso8601=True) + return "datetime", seen_str + except (ValueError, TypeError): + pass + + # we are *not* going to infer from strings + # for timedelta as too much ambiguity + + return "mixed", seen_str + + +cdef inline bint is_timedelta(object o): + return PyDelta_Check(o) or util.is_timedelta64_object(o) + + +@cython.internal +cdef class Validator: + + cdef: + Py_ssize_t n + dtype dtype + bint skipna + + def __cinit__(self, Py_ssize_t n, dtype dtype=np.dtype(np.object_), + bint skipna=False): + self.n = n + self.dtype = dtype + self.skipna = skipna + + cdef bint validate(self, ndarray values) except -1: + if not self.n: + return False + + if self.is_array_typed(): + # i.e. this ndarray is already of the desired dtype + return True + elif self.dtype.type_num == NPY_OBJECT: + if self.skipna: + return self._validate_skipna(values) + else: + return self._validate(values) + else: + return False + + @cython.wraparound(False) + @cython.boundscheck(False) + cdef bint _validate(self, ndarray values) except -1: + cdef: + Py_ssize_t i + Py_ssize_t n = values.size + flatiter it = PyArray_IterNew(values) + + for i in range(n): + # The PyArray_GETITEM and PyArray_ITER_NEXT are faster + # equivalents to `val = values[i]` + val = PyArray_GETITEM(values, PyArray_ITER_DATA(it)) + PyArray_ITER_NEXT(it) + if not self.is_valid(val): + return False + + return True + + @cython.wraparound(False) + @cython.boundscheck(False) + cdef bint _validate_skipna(self, ndarray values) except -1: + cdef: + Py_ssize_t i + Py_ssize_t n = values.size + flatiter it = PyArray_IterNew(values) + + for i in range(n): + # The PyArray_GETITEM and PyArray_ITER_NEXT are faster + # equivalents to `val = values[i]` + val = PyArray_GETITEM(values, PyArray_ITER_DATA(it)) + PyArray_ITER_NEXT(it) + if not self.is_valid_skipna(val): + return False + + return True + + cdef bint is_valid(self, object value) except -1: + return self.is_value_typed(value) + + cdef bint is_valid_skipna(self, object value) except -1: + return self.is_valid(value) or self.is_valid_null(value) + + cdef bint is_value_typed(self, object value) except -1: + raise NotImplementedError(f"{type(self).__name__} child class " + "must define is_value_typed") + + cdef bint is_valid_null(self, object value) except -1: + return value is None or value is C_NA or util.is_nan(value) + + cdef bint is_array_typed(self) except -1: + return False + + +@cython.internal +cdef class BoolValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_bool_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.bool_) + + +cpdef bint is_bool_array(ndarray values, bint skipna=False): + cdef: + BoolValidator validator = BoolValidator(len(values), + values.dtype, + skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class IntegerValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_integer_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.integer) + + +# Note: only python-exposed for tests +cpdef bint is_integer_array(ndarray values): + cdef: + IntegerValidator validator = IntegerValidator(len(values), + values.dtype) + return validator.validate(values) + + +@cython.internal +cdef class IntegerNaValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return (util.is_integer_object(value) + or (util.is_nan(value) and util.is_float_object(value))) + + +cdef bint is_integer_na_array(ndarray values): + cdef: + IntegerNaValidator validator = IntegerNaValidator(len(values), + values.dtype) + return validator.validate(values) + + +@cython.internal +cdef class IntegerFloatValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_integer_object(value) or util.is_float_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.integer) + + +cdef bint is_integer_float_array(ndarray values): + cdef: + IntegerFloatValidator validator = IntegerFloatValidator(len(values), + values.dtype) + return validator.validate(values) + + +@cython.internal +cdef class FloatValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_float_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.floating) + + +# Note: only python-exposed for tests +cpdef bint is_float_array(ndarray values): + cdef: + FloatValidator validator = FloatValidator(len(values), values.dtype) + return validator.validate(values) + + +@cython.internal +cdef class ComplexValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return ( + util.is_complex_object(value) + or (util.is_float_object(value) and is_nan(value)) + ) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.complexfloating) + + +cdef bint is_complex_array(ndarray values): + cdef: + ComplexValidator validator = ComplexValidator(len(values), values.dtype) + return validator.validate(values) + + +@cython.internal +cdef class DecimalValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return is_decimal(value) + + +cdef bint is_decimal_array(ndarray values): + cdef: + DecimalValidator validator = DecimalValidator(len(values), values.dtype) + return validator.validate(values) + + +@cython.internal +cdef class StringValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return isinstance(value, str) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.str_) + + cdef bint is_valid_null(self, object value) except -1: + # We deliberately exclude None / NaN here since StringArray uses NA + return value is C_NA + + +cpdef bint is_string_array(ndarray values, bint skipna=False): + cdef: + StringValidator validator = StringValidator(len(values), + values.dtype, + skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class BytesValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return isinstance(value, bytes) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.bytes_) + + +cdef bint is_bytes_array(ndarray values, bint skipna=False): + cdef: + BytesValidator validator = BytesValidator(len(values), values.dtype, + skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class TemporalValidator(Validator): + cdef: + bint all_generic_na + + def __cinit__(self, Py_ssize_t n, dtype dtype=np.dtype(np.object_), + bint skipna=False): + self.n = n + self.dtype = dtype + self.skipna = skipna + self.all_generic_na = True + + cdef inline bint is_valid(self, object value) except -1: + return self.is_value_typed(value) or self.is_valid_null(value) + + cdef bint is_valid_null(self, object value) except -1: + raise NotImplementedError(f"{type(self).__name__} child class " + "must define is_valid_null") + + cdef inline bint is_valid_skipna(self, object value) except -1: + cdef: + bint is_typed_null = self.is_valid_null(value) + bint is_generic_null = value is None or util.is_nan(value) + if not is_generic_null: + self.all_generic_na = False + return self.is_value_typed(value) or is_typed_null or is_generic_null + + cdef bint _validate_skipna(self, ndarray values) except -1: + """ + If we _only_ saw non-dtype-specific NA values, even if they are valid + for this dtype, we do not infer this dtype. + """ + return Validator._validate_skipna(self, values) and not self.all_generic_na + + +@cython.internal +cdef class DatetimeValidator(TemporalValidator): + cdef bint is_value_typed(self, object value) except -1: + return PyDateTime_Check(value) + + cdef inline bint is_valid_null(self, object value) except -1: + return is_null_datetime64(value) + + +cpdef bint is_datetime_array(ndarray values, bint skipna=True): + cdef: + DatetimeValidator validator = DatetimeValidator(len(values), + skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class Datetime64Validator(DatetimeValidator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_datetime64_object(value) + + +# Note: only python-exposed for tests +cpdef bint is_datetime64_array(ndarray values): + cdef: + Datetime64Validator validator = Datetime64Validator(len(values), + skipna=True) + return validator.validate(values) + + +@cython.internal +cdef class AnyDatetimeValidator(DatetimeValidator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_datetime64_object(value) or ( + PyDateTime_Check(value) and value.tzinfo is None + ) + + +cdef bint is_datetime_or_datetime64_array(ndarray values): + cdef: + AnyDatetimeValidator validator = AnyDatetimeValidator(len(values), + skipna=True) + return validator.validate(values) + + +# Note: only python-exposed for tests +def is_datetime_with_singletz_array(values: ndarray) -> bool: + """ + Check values have the same tzinfo attribute. + Doesn't check values are datetime-like types. + """ + cdef: + Py_ssize_t i = 0, j, n = len(values) + object base_val, base_tz, val, tz + + if n == 0: + return False + + # Get a reference timezone to compare with the rest of the tzs in the array + for i in range(n): + base_val = values[i] + if base_val is not NaT and base_val is not None and not util.is_nan(base_val): + base_tz = getattr(base_val, 'tzinfo', None) + break + + for j in range(i, n): + # Compare val's timezone with the reference timezone + # NaT can coexist with tz-aware datetimes, so skip if encountered + val = values[j] + if val is not NaT and val is not None and not util.is_nan(val): + tz = getattr(val, 'tzinfo', None) + if not tz_compare(base_tz, tz): + return False + + # Note: we should only be called if a tzaware datetime has been seen, + # so base_tz should always be set at this point. + return True + + +@cython.internal +cdef class TimedeltaValidator(TemporalValidator): + cdef bint is_value_typed(self, object value) except -1: + return PyDelta_Check(value) + + cdef inline bint is_valid_null(self, object value) except -1: + return is_null_timedelta64(value) + + +@cython.internal +cdef class AnyTimedeltaValidator(TimedeltaValidator): + cdef inline bint is_value_typed(self, object value) except -1: + return is_timedelta(value) + + +# Note: only python-exposed for tests +cpdef bint is_timedelta_or_timedelta64_array(ndarray values): + """ + Infer with timedeltas and/or nat/none. + """ + cdef: + AnyTimedeltaValidator validator = AnyTimedeltaValidator(len(values), + skipna=True) + return validator.validate(values) + + +@cython.internal +cdef class DateValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return PyDate_Check(value) + + +# Note: only python-exposed for tests +cpdef bint is_date_array(ndarray values, bint skipna=False): + cdef: + DateValidator validator = DateValidator(len(values), skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class TimeValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return PyTime_Check(value) + + +# Note: only python-exposed for tests +cpdef bint is_time_array(ndarray values, bint skipna=False): + cdef: + TimeValidator validator = TimeValidator(len(values), skipna=skipna) + return validator.validate(values) + + +cdef bint is_period_array(ndarray[object] values): + """ + Is this an ndarray of Period objects (or NaT) with a single `freq`? + """ + cdef: + Py_ssize_t i, n = len(values) + int dtype_code = -10000 # i.e. c_FreqGroup.FR_UND + object val + + if len(values) == 0: + return False + + for i in range(n): + val = values[i] + + if is_period_object(val): + if dtype_code == -10000: + dtype_code = val._dtype._dtype_code + elif dtype_code != val._dtype._dtype_code: + # mismatched freqs + return False + elif checknull_with_nat(val): + pass + else: + # Not a Period or NaT-like + return False + + if dtype_code == -10000: + # we saw all-NaTs, no actual Periods + return False + return True + + +# Note: only python-exposed for tests +cpdef bint is_interval_array(ndarray values): + """ + Is this an ndarray of Interval (or np.nan) with a single dtype? + """ + cdef: + Py_ssize_t i, n = len(values) + str closed = None + bint numeric = False + bint dt64 = False + bint td64 = False + object val + + if len(values) == 0: + return False + + for i in range(n): + val = values[i] + + if is_interval(val): + if closed is None: + closed = val.closed + numeric = ( + util.is_float_object(val.left) + or util.is_integer_object(val.left) + ) + td64 = is_timedelta(val.left) + dt64 = PyDateTime_Check(val.left) + elif val.closed != closed: + # mismatched closedness + return False + elif numeric: + if not ( + util.is_float_object(val.left) + or util.is_integer_object(val.left) + ): + # i.e. datetime64 or timedelta64 + return False + elif td64: + if not is_timedelta(val.left): + return False + elif dt64: + if not PyDateTime_Check(val.left): + return False + else: + raise ValueError(val) + elif util.is_nan(val) or val is None: + pass + else: + return False + + if closed is None: + # we saw all-NAs, no actual Intervals + return False + return True + + +@cython.boundscheck(False) +@cython.wraparound(False) +def maybe_convert_numeric( + ndarray[object, ndim=1] values, + set na_values, + bint convert_empty=True, + bint coerce_numeric=False, + bint convert_to_masked_nullable=False, +) -> tuple[np.ndarray, np.ndarray | None]: + """ + Convert object array to a numeric array if possible. + + Parameters + ---------- + values : ndarray[object] + Array of object elements to convert. + na_values : set + Set of values that should be interpreted as NaN. + convert_empty : bool, default True + If an empty array-like object is encountered, whether to interpret + that element as NaN or not. If set to False, a ValueError will be + raised if such an element is encountered and 'coerce_numeric' is False. + coerce_numeric : bool, default False + If initial attempts to convert to numeric have failed, whether to + force conversion to numeric via alternative methods or by setting the + element to NaN. Otherwise, an Exception will be raised when such an + element is encountered. + + This boolean also has an impact on how conversion behaves when a + numeric array has no suitable numerical dtype to return (i.e. uint64, + int32, uint8). If set to False, the original object array will be + returned. Otherwise, a ValueError will be raised. + convert_to_masked_nullable : bool, default False + Whether to return a mask for the converted values. This also disables + upcasting for ints with nulls to float64. + Returns + ------- + np.ndarray + Array of converted object values to numerical ones. + + Optional[np.ndarray] + If convert_to_masked_nullable is True, + returns a boolean mask for the converted values, otherwise returns None. + """ + if len(values) == 0: + return (np.array([], dtype='i8'), None) + + # fastpath for ints - try to convert all based on first value + cdef: + object val = values[0] + + if util.is_integer_object(val): + try: + maybe_ints = values.astype('i8') + if (maybe_ints == values).all(): + return (maybe_ints, None) + except (ValueError, OverflowError, TypeError): + pass + + # Otherwise, iterate and do full inference. + cdef: + int status, maybe_int + Py_ssize_t i, n = values.size + Seen seen = Seen(coerce_numeric) + ndarray[float64_t, ndim=1] floats = np.empty(n, dtype='f8') + ndarray[complex128_t, ndim=1] complexes = np.empty(n, dtype='c16') + ndarray[int64_t, ndim=1] ints = np.empty(n, dtype='i8') + ndarray[uint64_t, ndim=1] uints = np.empty(n, dtype='u8') + ndarray[uint8_t, ndim=1] bools = np.empty(n, dtype='u1') + ndarray[uint8_t, ndim=1] mask = np.zeros(n, dtype="u1") + float64_t fval + bint allow_null_in_int = convert_to_masked_nullable + + for i in range(n): + val = values[i] + # We only want to disable NaNs showing as float if + # a) convert_to_masked_nullable = True + # b) no floats have been seen ( assuming an int shows up later ) + # However, if no ints present (all null array), we need to return floats + allow_null_in_int = convert_to_masked_nullable and not seen.float_ + + if val.__hash__ is not None and val in na_values: + if allow_null_in_int: + seen.null_ = True + mask[i] = 1 + else: + if convert_to_masked_nullable: + mask[i] = 1 + seen.saw_null() + floats[i] = complexes[i] = NaN + elif util.is_float_object(val): + fval = val + if fval != fval: + seen.null_ = True + if allow_null_in_int: + mask[i] = 1 + else: + if convert_to_masked_nullable: + mask[i] = 1 + seen.float_ = True + else: + seen.float_ = True + floats[i] = complexes[i] = fval + elif util.is_integer_object(val): + floats[i] = complexes[i] = val + + val = int(val) + seen.saw_int(val) + + if val >= 0: + if val <= oUINT64_MAX: + uints[i] = val + else: + seen.float_ = True + + if oINT64_MIN <= val <= oINT64_MAX: + ints[i] = val + + if val < oINT64_MIN or (seen.sint_ and seen.uint_): + seen.float_ = True + + elif util.is_bool_object(val): + floats[i] = uints[i] = ints[i] = bools[i] = val + seen.bool_ = True + elif val is None or val is C_NA: + if allow_null_in_int: + seen.null_ = True + mask[i] = 1 + else: + if convert_to_masked_nullable: + mask[i] = 1 + seen.saw_null() + floats[i] = complexes[i] = NaN + elif hasattr(val, '__len__') and len(val) == 0: + if convert_empty or seen.coerce_numeric: + seen.saw_null() + floats[i] = complexes[i] = NaN + else: + raise ValueError("Empty string encountered") + elif util.is_complex_object(val): + complexes[i] = val + seen.complex_ = True + elif is_decimal(val): + floats[i] = complexes[i] = val + seen.float_ = True + else: + try: + status = floatify(val, &fval, &maybe_int) + + if fval in na_values: + seen.saw_null() + floats[i] = complexes[i] = NaN + mask[i] = 1 + else: + if fval != fval: + seen.null_ = True + mask[i] = 1 + + floats[i] = fval + + if maybe_int: + as_int = int(val) + + if as_int in na_values: + mask[i] = 1 + seen.null_ = True + if not allow_null_in_int: + seen.float_ = True + else: + seen.saw_int(as_int) + + if as_int not in na_values: + if as_int < oINT64_MIN or as_int > oUINT64_MAX: + if seen.coerce_numeric: + seen.float_ = True + else: + raise ValueError("Integer out of range.") + else: + if as_int >= 0: + uints[i] = as_int + + if as_int <= oINT64_MAX: + ints[i] = as_int + + seen.float_ = seen.float_ or (seen.uint_ and seen.sint_) + else: + seen.float_ = True + except (TypeError, ValueError) as err: + if not seen.coerce_numeric: + raise type(err)(f"{err} at position {i}") + + seen.saw_null() + floats[i] = NaN + + if seen.check_uint64_conflict(): + return (values, None) + + # This occurs since we disabled float nulls showing as null in anticipation + # of seeing ints that were never seen. So then, we return float + if allow_null_in_int and seen.null_ and not seen.int_: + seen.float_ = True + + if seen.complex_: + return (complexes, None) + elif seen.float_: + if seen.null_ and convert_to_masked_nullable: + return (floats, mask.view(np.bool_)) + return (floats, None) + elif seen.int_: + if seen.null_ and convert_to_masked_nullable: + if seen.uint_: + return (uints, mask.view(np.bool_)) + else: + return (ints, mask.view(np.bool_)) + if seen.uint_: + return (uints, None) + else: + return (ints, None) + elif seen.bool_: + return (bools.view(np.bool_), None) + elif seen.uint_: + return (uints, None) + return (ints, None) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def maybe_convert_objects(ndarray[object] objects, + *, + bint try_float=False, + bint safe=False, + bint convert_datetime=False, + bint convert_timedelta=False, + bint convert_period=False, + bint convert_interval=False, + bint convert_to_nullable_integer=False, + object dtype_if_all_nat=None) -> "ArrayLike": + """ + Type inference function-- convert object array to proper dtype + + Parameters + ---------- + objects : ndarray[object] + Array of object elements to convert. + try_float : bool, default False + If an array-like object contains only float or NaN values is + encountered, whether to convert and return an array of float dtype. + safe : bool, default False + Whether to upcast numeric type (e.g. int cast to float). If set to + True, no upcasting will be performed. + convert_datetime : bool, default False + If an array-like object contains only datetime values or NaT is + encountered, whether to convert and return an array of M8[ns] dtype. + convert_timedelta : bool, default False + If an array-like object contains only timedelta values or NaT is + encountered, whether to convert and return an array of m8[ns] dtype. + convert_period : bool, default False + If an array-like object contains only (homogeneous-freq) Period values + or NaT, whether to convert and return a PeriodArray. + convert_interval : bool, default False + If an array-like object contains only Interval objects (with matching + dtypes and closedness) or NaN, whether to convert to IntervalArray. + convert_to_nullable_integer : bool, default False + If an array-like object contains only integer values (and NaN) is + encountered, whether to convert and return an IntegerArray. + dtype_if_all_nat : np.dtype, ExtensionDtype, or None, default None + Dtype to cast to if we have all-NaT. + + Returns + ------- + np.ndarray or ExtensionArray + Array of converted object values to more specific dtypes if applicable. + """ + cdef: + Py_ssize_t i, n, itemsize_max = 0 + ndarray[float64_t] floats + ndarray[complex128_t] complexes + ndarray[int64_t] ints + ndarray[uint64_t] uints + ndarray[uint8_t] bools + int64_t[:] idatetimes + int64_t[:] itimedeltas + Seen seen = Seen() + object val + float64_t fval, fnan = np.nan + + n = len(objects) + + floats = np.empty(n, dtype='f8') + complexes = np.empty(n, dtype='c16') + ints = np.empty(n, dtype='i8') + uints = np.empty(n, dtype='u8') + bools = np.empty(n, dtype=np.uint8) + mask = np.full(n, False) + + if convert_datetime: + datetimes = np.empty(n, dtype='M8[ns]') + idatetimes = datetimes.view(np.int64) + + if convert_timedelta: + timedeltas = np.empty(n, dtype='m8[ns]') + itimedeltas = timedeltas.view(np.int64) + + for i in range(n): + val = objects[i] + if itemsize_max != -1: + itemsize = get_itemsize(val) + if itemsize > itemsize_max or itemsize == -1: + itemsize_max = itemsize + + if val is None: + seen.null_ = True + floats[i] = complexes[i] = fnan + mask[i] = True + elif val is NaT: + seen.nat_ = True + if convert_datetime: + idatetimes[i] = NPY_NAT + if convert_timedelta: + itimedeltas[i] = NPY_NAT + if not (convert_datetime or convert_timedelta or convert_period): + seen.object_ = True + break + elif val is np.nan: + seen.nan_ = True + mask[i] = True + floats[i] = complexes[i] = val + elif util.is_bool_object(val): + seen.bool_ = True + bools[i] = val + elif util.is_float_object(val): + floats[i] = complexes[i] = val + seen.float_ = True + elif is_timedelta(val): + if convert_timedelta: + seen.timedelta_ = True + try: + itimedeltas[i] = convert_to_timedelta64(val, "ns").view("i8") + except OutOfBoundsTimedelta: + seen.object_ = True + break + break + else: + seen.object_ = True + break + elif util.is_integer_object(val): + seen.int_ = True + floats[i] = val + complexes[i] = val + if not seen.null_: + val = int(val) + seen.saw_int(val) + + if ((seen.uint_ and seen.sint_) or + val > oUINT64_MAX or val < oINT64_MIN): + seen.object_ = True + break + + if seen.uint_: + uints[i] = val + elif seen.sint_: + ints[i] = val + else: + uints[i] = val + ints[i] = val + + elif util.is_complex_object(val): + complexes[i] = val + seen.complex_ = True + elif PyDateTime_Check(val) or util.is_datetime64_object(val): + + # if we have an tz's attached then return the objects + if convert_datetime: + if getattr(val, 'tzinfo', None) is not None: + seen.datetimetz_ = True + break + else: + seen.datetime_ = True + try: + idatetimes[i] = convert_to_tsobject( + val, None, None, 0, 0).value + except OutOfBoundsDatetime: + seen.object_ = True + break + else: + seen.object_ = True + break + elif is_period_object(val): + if convert_period: + seen.period_ = True + break + else: + seen.object_ = True + break + elif try_float and not isinstance(val, str): + # this will convert Decimal objects + try: + floats[i] = float(val) + complexes[i] = complex(val) + seen.float_ = True + except (ValueError, TypeError): + seen.object_ = True + break + elif is_interval(val): + if convert_interval: + seen.interval_ = True + break + else: + seen.object_ = True + break + else: + seen.object_ = True + break + + # we try to coerce datetime w/tz but must all have the same tz + if seen.datetimetz_: + if is_datetime_with_singletz_array(objects): + from pandas import DatetimeIndex + dti = DatetimeIndex(objects) + + # unbox to DatetimeArray + return dti._data + seen.object_ = True + + elif seen.datetime_: + if is_datetime_or_datetime64_array(objects): + from pandas import DatetimeIndex + + try: + dti = DatetimeIndex(objects) + except OutOfBoundsDatetime: + pass + else: + # unbox to ndarray[datetime64[ns]] + return dti._data._ndarray + seen.object_ = True + + elif seen.timedelta_: + if is_timedelta_or_timedelta64_array(objects): + from pandas import TimedeltaIndex + + try: + tdi = TimedeltaIndex(objects) + except OutOfBoundsTimedelta: + pass + else: + # unbox to ndarray[timedelta64[ns]] + return tdi._data._ndarray + seen.object_ = True + + if seen.period_: + if is_period_array(objects): + from pandas import PeriodIndex + pi = PeriodIndex(objects) + + # unbox to PeriodArray + return pi._data + seen.object_ = True + + if seen.interval_: + if is_interval_array(objects): + from pandas import IntervalIndex + ii = IntervalIndex(objects) + + # unbox to IntervalArray + return ii._data + + seen.object_ = True + + if not seen.object_: + result = None + if not safe: + if seen.null_ or seen.nan_: + if seen.is_float_or_complex: + if seen.complex_: + result = complexes + elif seen.float_: + result = floats + elif seen.int_: + if convert_to_nullable_integer: + from pandas.core.arrays import IntegerArray + result = IntegerArray(ints, mask) + else: + result = floats + elif seen.nan_: + result = floats + else: + if not seen.bool_: + if seen.datetime_: + if not seen.numeric_ and not seen.timedelta_: + result = datetimes + elif seen.timedelta_: + if not seen.numeric_: + result = timedeltas + elif seen.nat_: + if not seen.numeric_: + if convert_datetime and convert_timedelta: + dtype = dtype_if_all_nat + if dtype is not None: + # otherwise we keep object dtype + result = _infer_all_nats( + dtype, datetimes, timedeltas + ) + + elif convert_datetime: + result = datetimes + elif convert_timedelta: + result = timedeltas + else: + if seen.complex_: + result = complexes + elif seen.float_: + result = floats + elif seen.int_: + if seen.uint_: + result = uints + else: + result = ints + elif seen.is_bool: + result = bools.view(np.bool_) + + else: + # don't cast int to float, etc. + if seen.null_: + if seen.is_float_or_complex: + if seen.complex_: + if not seen.int_: + result = complexes + elif seen.float_ or seen.nan_: + if not seen.int_: + result = floats + else: + if not seen.bool_: + if seen.datetime_: + if not seen.numeric_ and not seen.timedelta_: + result = datetimes + elif seen.timedelta_: + if not seen.numeric_: + result = timedeltas + elif seen.nat_: + if not seen.numeric_: + if convert_datetime and convert_timedelta: + dtype = dtype_if_all_nat + if dtype is not None: + # otherwise we keep object dtype + result = _infer_all_nats( + dtype, datetimes, timedeltas + ) + + elif convert_datetime: + result = datetimes + elif convert_timedelta: + result = timedeltas + else: + if seen.complex_: + if not seen.int_: + result = complexes + elif seen.float_ or seen.nan_: + if not seen.int_: + result = floats + elif seen.int_: + if seen.uint_: + result = uints + else: + result = ints + elif seen.is_bool and not seen.nan_: + result = bools.view(np.bool_) + + if result is uints or result is ints or result is floats or result is complexes: + # cast to the largest itemsize when all values are NumPy scalars + if itemsize_max > 0 and itemsize_max != result.dtype.itemsize: + result = result.astype(result.dtype.kind + str(itemsize_max)) + return result + elif result is not None: + return result + + return objects + + +cdef _infer_all_nats(dtype, ndarray datetimes, ndarray timedeltas): + """ + If we have all-NaT values, cast these to the given dtype. + """ + if cnp.PyArray_DescrCheck(dtype): + # i.e. isinstance(dtype, np.dtype): + if dtype == "M8[ns]": + result = datetimes + elif dtype == "m8[ns]": + result = timedeltas + else: + raise ValueError(dtype) + else: + # ExtensionDtype + cls = dtype.construct_array_type() + i8vals = np.empty(len(datetimes), dtype="i8") + i8vals.fill(NPY_NAT) + result = cls(i8vals, dtype=dtype) + return result + + +class NoDefault(Enum): + # We make this an Enum + # 1) because it round-trips through pickle correctly (see GH#40397) + # 2) because mypy does not understand singletons + no_default = "NO_DEFAULT" + + def __repr__(self) -> str: + return "" + + +# Note: no_default is exported to the public API in pandas.api.extensions +no_default = NoDefault.no_default # Sentinel indicating the default value. + + +@cython.boundscheck(False) +@cython.wraparound(False) +def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=True, + object na_value=no_default, cnp.dtype dtype=np.dtype(object) + ) -> np.ndarray: + """ + Substitute for np.vectorize with pandas-friendly dtype inference. + + Parameters + ---------- + arr : ndarray + f : function + mask : ndarray + uint8 dtype ndarray indicating values not to apply `f` to. + convert : bool, default True + Whether to call `maybe_convert_objects` on the resulting ndarray + na_value : Any, optional + The result value to use for masked values. By default, the + input value is used + dtype : numpy.dtype + The numpy dtype to use for the result ndarray. + + Returns + ------- + np.ndarray + """ + cdef: + Py_ssize_t i, n + ndarray result + object val + + n = len(arr) + result = np.empty(n, dtype=dtype) + for i in range(n): + if mask[i]: + if na_value is no_default: + val = arr[i] + else: + val = na_value + else: + val = f(arr[i]) + + if cnp.PyArray_IsZeroDim(val): + # unbox 0-dim arrays, GH#690 + val = val.item() + + result[i] = val + + if convert: + return maybe_convert_objects(result, + try_float=False, + convert_datetime=False, + convert_timedelta=False) + + return result + + +@cython.boundscheck(False) +@cython.wraparound(False) +def map_infer( + ndarray arr, object f, bint convert=True, bint ignore_na=False +) -> np.ndarray: + """ + Substitute for np.vectorize with pandas-friendly dtype inference. + + Parameters + ---------- + arr : ndarray + f : function + convert : bint + ignore_na : bint + If True, NA values will not have f applied + + Returns + ------- + np.ndarray + """ + cdef: + Py_ssize_t i, n + ndarray[object] result + object val + + n = len(arr) + result = np.empty(n, dtype=object) + for i in range(n): + if ignore_na and checknull(arr[i]): + result[i] = arr[i] + continue + val = f(arr[i]) + + if cnp.PyArray_IsZeroDim(val): + # unbox 0-dim arrays, GH#690 + val = val.item() + + result[i] = val + + if convert: + return maybe_convert_objects(result, + try_float=False, + convert_datetime=False, + convert_timedelta=False) + + return result + + +def to_object_array(rows: object, min_width: int = 0) -> ndarray: + """ + Convert a list of lists into an object array. + + Parameters + ---------- + rows : 2-d array (N, K) + List of lists to be converted into an array. + min_width : int + Minimum width of the object array. If a list + in `rows` contains fewer than `width` elements, + the remaining elements in the corresponding row + will all be `NaN`. + + Returns + ------- + np.ndarray[object, ndim=2] + """ + cdef: + Py_ssize_t i, j, n, k, tmp + ndarray[object, ndim=2] result + list row + + rows = list(rows) + n = len(rows) + + k = min_width + for i in range(n): + tmp = len(rows[i]) + if tmp > k: + k = tmp + + result = np.empty((n, k), dtype=object) + + for i in range(n): + row = list(rows[i]) + + for j in range(len(row)): + result[i, j] = row[j] + + return result + + +def tuples_to_object_array(ndarray[object] tuples): + cdef: + Py_ssize_t i, j, n, k, tmp + ndarray[object, ndim=2] result + tuple tup + + n = len(tuples) + k = len(tuples[0]) + result = np.empty((n, k), dtype=object) + for i in range(n): + tup = tuples[i] + for j in range(k): + result[i, j] = tup[j] + + return result + + +def to_object_array_tuples(rows: object) -> np.ndarray: + """ + Convert a list of tuples into an object array. Any subclass of + tuple in `rows` will be casted to tuple. + + Parameters + ---------- + rows : 2-d array (N, K) + List of tuples to be converted into an array. + + Returns + ------- + np.ndarray[object, ndim=2] + """ + cdef: + Py_ssize_t i, j, n, k, tmp + ndarray[object, ndim=2] result + tuple row + + rows = list(rows) + n = len(rows) + + k = 0 + for i in range(n): + tmp = 1 if checknull(rows[i]) else len(rows[i]) + if tmp > k: + k = tmp + + result = np.empty((n, k), dtype=object) + + try: + for i in range(n): + row = rows[i] + for j in range(len(row)): + result[i, j] = row[j] + except TypeError: + # e.g. "Expected tuple, got list" + # upcast any subclasses to tuple + for i in range(n): + row = (rows[i],) if checknull(rows[i]) else tuple(rows[i]) + for j in range(len(row)): + result[i, j] = row[j] + + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +def fast_multiget(dict mapping, ndarray keys, default=np.nan) -> np.ndarray: + cdef: + Py_ssize_t i, n = len(keys) + object val + ndarray[object] output = np.empty(n, dtype='O') + + if n == 0: + # kludge, for Series + return np.empty(0, dtype='f8') + + for i in range(n): + val = keys[i] + if val in mapping: + output[i] = mapping[val] + else: + output[i] = default + + return maybe_convert_objects(output) + + +def is_bool_list(obj: list) -> bool: + """ + Check if this list contains only bool or np.bool_ objects. + + This is appreciably faster than checking `np.array(obj).dtype == bool` + + obj1 = [True, False] * 100 + obj2 = obj1 * 100 + obj3 = obj2 * 100 + obj4 = [True, None] + obj1 + + for obj in [obj1, obj2, obj3, obj4]: + %timeit is_bool_list(obj) + %timeit np.array(obj).dtype.kind == "b" + + 340 ns ± 8.22 ns + 8.78 µs ± 253 ns + + 28.8 µs ± 704 ns + 813 µs ± 17.8 µs + + 3.4 ms ± 168 µs + 78.4 ms ± 1.05 ms + + 48.1 ns ± 1.26 ns + 8.1 µs ± 198 ns + """ + cdef: + object item + + for item in obj: + if not util.is_bool_object(item): + return False + + # Note: we return True for empty list + return True + + +cpdef ndarray eq_NA_compat(ndarray[object] arr, object key): + """ + Check for `arr == key`, treating all values as not-equal to pd.NA. + + key is assumed to have `not isna(key)` + """ + cdef: + ndarray[uint8_t, cast=True] result = np.empty(len(arr), dtype=bool) + Py_ssize_t i + object item + + for i in range(len(arr)): + item = arr[i] + if item is C_NA: + result[i] = False + else: + result[i] = item == key + + return result + + +def dtypes_all_equal(list types not None) -> bool: + """ + Faster version for: + + first = types[0] + all(is_dtype_equal(first, t) for t in types[1:]) + + And assuming all elements in the list are np.dtype/ExtensionDtype objects + + See timings at https://github.com/pandas-dev/pandas/pull/44594 + """ + first = types[0] + for t in types[1:]: + try: + if not t == first: + return False + except (TypeError, AttributeError): + return False + else: + return True diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/missing.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/missing.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..9358918 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/missing.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/missing.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/missing.pxd new file mode 100644 index 0000000..854dcf2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/missing.pxd @@ -0,0 +1,19 @@ +from numpy cimport ( + ndarray, + uint8_t, +) + + +cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*) + +cpdef bint checknull(object val, bint inf_as_na=*) +cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=*) + +cdef bint is_null_datetime64(v) +cdef bint is_null_timedelta64(v) +cdef bint checknull_with_nat_and_na(object obj) + +cdef class C_NAType: + pass + +cdef C_NAType C_NA diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/missing.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/missing.pyi new file mode 100644 index 0000000..3a4cc9d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/missing.pyi @@ -0,0 +1,17 @@ +import numpy as np +from numpy import typing as npt + +class NAType: ... + +NA: NAType + +def is_matching_na( + left: object, right: object, nan_matches_none: bool = ... +) -> bool: ... +def isposinf_scalar(val: object) -> bool: ... +def isneginf_scalar(val: object) -> bool: ... +def checknull(val: object, inf_as_na: bool = ...) -> bool: ... +def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ... +def isnaobj2d(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ... +def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ... +def is_float_nan(values: np.ndarray) -> npt.NDArray[np.bool_]: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/missing.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/missing.pyx new file mode 100644 index 0000000..62977f0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/missing.pyx @@ -0,0 +1,507 @@ +from decimal import Decimal +import numbers +from sys import maxsize + +import cython +from cython import Py_ssize_t +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + float64_t, + int64_t, + ndarray, + uint8_t, +) + +cnp.import_array() + +from pandas._libs cimport util +from pandas._libs.tslibs.nattype cimport ( + c_NaT as NaT, + checknull_with_nat, + is_dt64nat, + is_td64nat, +) +from pandas._libs.tslibs.np_datetime cimport ( + get_datetime64_unit, + get_datetime64_value, + get_timedelta64_value, +) + +from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op + +cdef: + float64_t INF = np.inf + float64_t NEGINF = -INF + + int64_t NPY_NAT = util.get_nat() + + bint is_32bit = maxsize <= 2 ** 32 + + type cDecimal = Decimal # for faster isinstance checks + + +cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False): + """ + Check if two scalars are both NA of matching types. + + Parameters + ---------- + left : Any + right : Any + nan_matches_none : bool, default False + For backwards compatibility, consider NaN as matching None. + + Returns + ------- + bool + """ + if left is None: + if nan_matches_none and util.is_nan(right): + return True + return right is None + elif left is C_NA: + return right is C_NA + elif left is NaT: + return right is NaT + elif util.is_float_object(left): + if nan_matches_none and right is None and util.is_nan(left): + return True + return ( + util.is_nan(left) + and util.is_float_object(right) + and util.is_nan(right) + ) + elif util.is_complex_object(left): + return ( + util.is_nan(left) + and util.is_complex_object(right) + and util.is_nan(right) + ) + elif util.is_datetime64_object(left): + return ( + get_datetime64_value(left) == NPY_NAT + and util.is_datetime64_object(right) + and get_datetime64_value(right) == NPY_NAT + and get_datetime64_unit(left) == get_datetime64_unit(right) + ) + elif util.is_timedelta64_object(left): + return ( + get_timedelta64_value(left) == NPY_NAT + and util.is_timedelta64_object(right) + and get_timedelta64_value(right) == NPY_NAT + and get_datetime64_unit(left) == get_datetime64_unit(right) + ) + elif is_decimal_na(left): + return is_decimal_na(right) + return False + + +cpdef bint checknull(object val, bint inf_as_na=False): + """ + Return boolean describing of the input is NA-like, defined here as any + of: + - None + - nan + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + - NA + - Decimal("NaN") + + Parameters + ---------- + val : object + inf_as_na : bool, default False + Whether to treat INF and -INF as NA values. + + Returns + ------- + bool + """ + if val is None or val is NaT or val is C_NA: + return True + elif util.is_float_object(val) or util.is_complex_object(val): + if val != val: + return True + elif inf_as_na: + return val == INF or val == NEGINF + return False + elif util.is_timedelta64_object(val): + return get_timedelta64_value(val) == NPY_NAT + elif util.is_datetime64_object(val): + return get_datetime64_value(val) == NPY_NAT + else: + return is_decimal_na(val) + + +cdef inline bint is_decimal_na(object val): + """ + Is this a decimal.Decimal object Decimal("NAN"). + """ + return isinstance(val, cDecimal) and val != val + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False): + """ + Return boolean mask denoting which elements of a 1-D array are na-like, + according to the criteria defined in `checknull`: + - None + - nan + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + - NA + - Decimal("NaN") + + Parameters + ---------- + arr : ndarray + + Returns + ------- + result : ndarray (dtype=np.bool_) + """ + cdef: + Py_ssize_t i, n + object val + ndarray[uint8_t] result + + assert arr.ndim == 1, "'arr' must be 1-D." + + n = len(arr) + result = np.empty(n, dtype=np.uint8) + for i in range(n): + val = arr[i] + result[i] = checknull(val, inf_as_na=inf_as_na) + return result.view(np.bool_) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def isnaobj2d(arr: ndarray, inf_as_na: bool = False) -> ndarray: + """ + Return boolean mask denoting which elements of a 2-D array are na-like, + according to the criteria defined in `checknull`: + - None + - nan + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + - NA + - Decimal("NaN") + + Parameters + ---------- + arr : ndarray + + Returns + ------- + result : ndarray (dtype=np.bool_) + """ + cdef: + Py_ssize_t i, j, n, m + object val + ndarray[uint8_t, ndim=2] result + + assert arr.ndim == 2, "'arr' must be 2-D." + + n, m = (arr).shape + result = np.zeros((n, m), dtype=np.uint8) + for i in range(n): + for j in range(m): + val = arr[i, j] + if checknull(val, inf_as_na=inf_as_na): + result[i, j] = 1 + return result.view(np.bool_) + + +def isposinf_scalar(val: object) -> bool: + return util.is_float_object(val) and val == INF + + +def isneginf_scalar(val: object) -> bool: + return util.is_float_object(val) and val == NEGINF + + +cdef inline bint is_null_datetime64(v): + # determine if we have a null for a datetime (or integer versions), + # excluding np.timedelta64('nat') + if checknull_with_nat(v) or is_dt64nat(v): + return True + return False + + +cdef inline bint is_null_timedelta64(v): + # determine if we have a null for a timedelta (or integer versions), + # excluding np.datetime64('nat') + if checknull_with_nat(v) or is_td64nat(v): + return True + return False + + +cdef bint checknull_with_nat_and_na(object obj): + # See GH#32214 + return checknull_with_nat(obj) or obj is C_NA + + +@cython.wraparound(False) +@cython.boundscheck(False) +def is_float_nan(values: ndarray) -> ndarray: + """ + True for elements which correspond to a float nan + + Returns + ------- + ndarray[bool] + """ + cdef: + ndarray[uint8_t] result + Py_ssize_t i, N + object val + + N = len(values) + result = np.zeros(N, dtype=np.uint8) + + for i in range(N): + val = values[i] + if util.is_nan(val): + result[i] = True + return result.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def is_numeric_na(values: ndarray) -> ndarray: + """ + Check for NA values consistent with IntegerArray/FloatingArray. + + Similar to a vectorized is_valid_na_for_dtype restricted to numeric dtypes. + + Returns + ------- + ndarray[bool] + """ + cdef: + ndarray[uint8_t] result + Py_ssize_t i, N + object val + + N = len(values) + result = np.zeros(N, dtype=np.uint8) + + for i in range(N): + val = values[i] + if checknull(val): + if val is None or val is C_NA or util.is_nan(val) or is_decimal_na(val): + result[i] = True + else: + raise TypeError(f"'values' contains non-numeric NA {val}") + return result.view(bool) + + +# ----------------------------------------------------------------------------- +# Implementation of NA singleton + + +def _create_binary_propagating_op(name, is_divmod=False): + + def method(self, other): + if (other is C_NA or isinstance(other, str) + or isinstance(other, (numbers.Number, np.bool_)) + or isinstance(other, np.ndarray) and not other.shape): + # Need the other.shape clause to handle NumPy scalars, + # since we do a setitem on `out` below, which + # won't work for NumPy scalars. + if is_divmod: + return NA, NA + else: + return NA + + elif isinstance(other, np.ndarray): + out = np.empty(other.shape, dtype=object) + out[:] = NA + + if is_divmod: + return out, out.copy() + else: + return out + + return NotImplemented + + method.__name__ = name + return method + + +def _create_unary_propagating_op(name: str): + def method(self): + return NA + + method.__name__ = name + return method + + +cdef class C_NAType: + pass + + +class NAType(C_NAType): + """ + NA ("not available") missing value indicator. + + .. warning:: + + Experimental: the behaviour of NA can still change without warning. + + .. versionadded:: 1.0.0 + + The NA singleton is a missing value indicator defined by pandas. It is + used in certain new extension dtypes (currently the "string" dtype). + """ + + _instance = None + + def __new__(cls, *args, **kwargs): + if NAType._instance is None: + NAType._instance = C_NAType.__new__(cls, *args, **kwargs) + return NAType._instance + + def __repr__(self) -> str: + return "" + + def __format__(self, format_spec) -> str: + try: + return self.__repr__().__format__(format_spec) + except ValueError: + return self.__repr__() + + def __bool__(self): + raise TypeError("boolean value of NA is ambiguous") + + def __hash__(self): + # GH 30013: Ensure hash is large enough to avoid hash collisions with integers + exponent = 31 if is_32bit else 61 + return 2 ** exponent - 1 + + def __reduce__(self): + return "NA" + + # Binary arithmetic and comparison ops -> propagate + + __add__ = _create_binary_propagating_op("__add__") + __radd__ = _create_binary_propagating_op("__radd__") + __sub__ = _create_binary_propagating_op("__sub__") + __rsub__ = _create_binary_propagating_op("__rsub__") + __mul__ = _create_binary_propagating_op("__mul__") + __rmul__ = _create_binary_propagating_op("__rmul__") + __matmul__ = _create_binary_propagating_op("__matmul__") + __rmatmul__ = _create_binary_propagating_op("__rmatmul__") + __truediv__ = _create_binary_propagating_op("__truediv__") + __rtruediv__ = _create_binary_propagating_op("__rtruediv__") + __floordiv__ = _create_binary_propagating_op("__floordiv__") + __rfloordiv__ = _create_binary_propagating_op("__rfloordiv__") + __mod__ = _create_binary_propagating_op("__mod__") + __rmod__ = _create_binary_propagating_op("__rmod__") + __divmod__ = _create_binary_propagating_op("__divmod__", is_divmod=True) + __rdivmod__ = _create_binary_propagating_op("__rdivmod__", is_divmod=True) + # __lshift__ and __rshift__ are not implemented + + __eq__ = _create_binary_propagating_op("__eq__") + __ne__ = _create_binary_propagating_op("__ne__") + __le__ = _create_binary_propagating_op("__le__") + __lt__ = _create_binary_propagating_op("__lt__") + __gt__ = _create_binary_propagating_op("__gt__") + __ge__ = _create_binary_propagating_op("__ge__") + + # Unary ops + + __neg__ = _create_unary_propagating_op("__neg__") + __pos__ = _create_unary_propagating_op("__pos__") + __abs__ = _create_unary_propagating_op("__abs__") + __invert__ = _create_unary_propagating_op("__invert__") + + # pow has special + def __pow__(self, other): + if other is C_NA: + return NA + elif isinstance(other, (numbers.Number, np.bool_)): + if other == 0: + # returning positive is correct for +/- 0. + return type(other)(1) + else: + return NA + elif isinstance(other, np.ndarray): + return np.where(other == 0, other.dtype.type(1), NA) + + return NotImplemented + + def __rpow__(self, other): + if other is C_NA: + return NA + elif isinstance(other, (numbers.Number, np.bool_)): + if other == 1: + return other + else: + return NA + elif isinstance(other, np.ndarray): + return np.where(other == 1, other, NA) + return NotImplemented + + # Logical ops using Kleene logic + + def __and__(self, other): + if other is False: + return False + elif other is True or other is C_NA: + return NA + return NotImplemented + + __rand__ = __and__ + + def __or__(self, other): + if other is True: + return True + elif other is False or other is C_NA: + return NA + return NotImplemented + + __ror__ = __or__ + + def __xor__(self, other): + if other is False or other is True or other is C_NA: + return NA + return NotImplemented + + __rxor__ = __xor__ + + __array_priority__ = 1000 + _HANDLED_TYPES = (np.ndarray, numbers.Number, str, np.bool_) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + types = self._HANDLED_TYPES + (NAType,) + for x in inputs: + if not isinstance(x, types): + return NotImplemented + + if method != "__call__": + raise ValueError(f"ufunc method '{method}' not supported for NA") + result = maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is NotImplemented: + # For a NumPy ufunc that's not a binop, like np.logaddexp + index = [i for i, x in enumerate(inputs) if x is NA][0] + result = np.broadcast_arrays(*inputs)[index] + if result.ndim == 0: + result = result.item() + if ufunc.nout > 1: + result = (NA,) * ufunc.nout + + return result + + +C_NA = NAType() # C-visible +NA = C_NA # Python-visible diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/ops.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/ops.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..f850d71 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/ops.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/ops.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/ops.pyi new file mode 100644 index 0000000..74a6ad8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/ops.pyi @@ -0,0 +1,50 @@ +from typing import ( + Any, + Callable, + Iterable, + Literal, + overload, +) + +import numpy as np + +from pandas._typing import npt + +_BinOp = Callable[[Any, Any], Any] +_BoolOp = Callable[[Any, Any], bool] + +def scalar_compare( + values: np.ndarray, # object[:] + val: object, + op: _BoolOp, # {operator.eq, operator.ne, ...} +) -> npt.NDArray[np.bool_]: ... +def vec_compare( + left: npt.NDArray[np.object_], + right: npt.NDArray[np.object_], + op: _BoolOp, # {operator.eq, operator.ne, ...} +) -> npt.NDArray[np.bool_]: ... +def scalar_binop( + values: np.ndarray, # object[:] + val: object, + op: _BinOp, # binary operator +) -> np.ndarray: ... +def vec_binop( + left: np.ndarray, # object[:] + right: np.ndarray, # object[:] + op: _BinOp, # binary operator +) -> np.ndarray: ... +@overload +def maybe_convert_bool( + arr: npt.NDArray[np.object_], + true_values: Iterable = ..., + false_values: Iterable = ..., + convert_to_masked_nullable: Literal[False] = ..., +) -> tuple[np.ndarray, None]: ... +@overload +def maybe_convert_bool( + arr: npt.NDArray[np.object_], + true_values: Iterable = ..., + false_values: Iterable = ..., + *, + convert_to_masked_nullable: Literal[True], +) -> tuple[np.ndarray, np.ndarray]: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/ops.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/ops.pyx new file mode 100644 index 0000000..ac8a7f2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/ops.pyx @@ -0,0 +1,310 @@ +import operator + +from cpython.object cimport ( + Py_EQ, + Py_GE, + Py_GT, + Py_LE, + Py_LT, + Py_NE, + PyObject_RichCompareBool, +) + +import cython +from cython import Py_ssize_t +import numpy as np + +from numpy cimport ( + import_array, + ndarray, + uint8_t, +) + +import_array() + + +from pandas._libs.missing cimport checknull +from pandas._libs.util cimport is_nan + + +@cython.wraparound(False) +@cython.boundscheck(False) +def scalar_compare(object[:] values, object val, object op) -> ndarray: + """ + Compare each element of `values` array with the scalar `val`, with + the comparison operation described by `op`. + + Parameters + ---------- + values : ndarray[object] + val : object + op : {operator.eq, operator.ne, + operator.le, operator.lt, + operator.ge, operator.gt} + + Returns + ------- + result : ndarray[bool] + """ + cdef: + Py_ssize_t i, n = len(values) + ndarray[uint8_t, cast=True] result + bint isnull_val + int flag + object x + + if op is operator.lt: + flag = Py_LT + elif op is operator.le: + flag = Py_LE + elif op is operator.gt: + flag = Py_GT + elif op is operator.ge: + flag = Py_GE + elif op is operator.eq: + flag = Py_EQ + elif op is operator.ne: + flag = Py_NE + else: + raise ValueError('Unrecognized operator') + + result = np.empty(n, dtype=bool).view(np.uint8) + isnull_val = checknull(val) + + if flag == Py_NE: + for i in range(n): + x = values[i] + if checknull(x): + result[i] = True + elif isnull_val: + result[i] = True + else: + try: + result[i] = PyObject_RichCompareBool(x, val, flag) + except TypeError: + result[i] = True + elif flag == Py_EQ: + for i in range(n): + x = values[i] + if checknull(x): + result[i] = False + elif isnull_val: + result[i] = False + else: + try: + result[i] = PyObject_RichCompareBool(x, val, flag) + except TypeError: + result[i] = False + + else: + for i in range(n): + x = values[i] + if checknull(x): + result[i] = False + elif isnull_val: + result[i] = False + else: + result[i] = PyObject_RichCompareBool(x, val, flag) + + return result.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarray: + """ + Compare the elements of `left` with the elements of `right` pointwise, + with the comparison operation described by `op`. + + Parameters + ---------- + left : ndarray[object] + right : ndarray[object] + op : {operator.eq, operator.ne, + operator.le, operator.lt, + operator.ge, operator.gt} + + Returns + ------- + result : ndarray[bool] + """ + cdef: + Py_ssize_t i, n = len(left) + ndarray[uint8_t, cast=True] result + int flag + + if n != len(right): + raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}') + + if op is operator.lt: + flag = Py_LT + elif op is operator.le: + flag = Py_LE + elif op is operator.gt: + flag = Py_GT + elif op is operator.ge: + flag = Py_GE + elif op is operator.eq: + flag = Py_EQ + elif op is operator.ne: + flag = Py_NE + else: + raise ValueError('Unrecognized operator') + + result = np.empty(n, dtype=bool).view(np.uint8) + + if flag == Py_NE: + for i in range(n): + x = left[i] + y = right[i] + + if checknull(x) or checknull(y): + result[i] = True + else: + result[i] = PyObject_RichCompareBool(x, y, flag) + else: + for i in range(n): + x = left[i] + y = right[i] + + if checknull(x) or checknull(y): + result[i] = False + else: + result[i] = PyObject_RichCompareBool(x, y, flag) + + return result.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def scalar_binop(object[:] values, object val, object op) -> ndarray: + """ + Apply the given binary operator `op` between each element of the array + `values` and the scalar `val`. + + Parameters + ---------- + values : ndarray[object] + val : object + op : binary operator + + Returns + ------- + result : ndarray[object] + """ + cdef: + Py_ssize_t i, n = len(values) + object[:] result + object x + + result = np.empty(n, dtype=object) + if val is None or is_nan(val): + result[:] = val + return result.base # `.base` to access underlying np.ndarray + + for i in range(n): + x = values[i] + if x is None or is_nan(x): + result[i] = x + else: + result[i] = op(x, val) + + return maybe_convert_bool(result.base)[0] + + +@cython.wraparound(False) +@cython.boundscheck(False) +def vec_binop(object[:] left, object[:] right, object op) -> ndarray: + """ + Apply the given binary operator `op` pointwise to the elements of + arrays `left` and `right`. + + Parameters + ---------- + left : ndarray[object] + right : ndarray[object] + op : binary operator + + Returns + ------- + result : ndarray[object] + """ + cdef: + Py_ssize_t i, n = len(left) + object[:] result + + if n != len(right): + raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}') + + result = np.empty(n, dtype=object) + + for i in range(n): + x = left[i] + y = right[i] + try: + result[i] = op(x, y) + except TypeError: + if x is None or is_nan(x): + result[i] = x + elif y is None or is_nan(y): + result[i] = y + else: + raise + + return maybe_convert_bool(result.base)[0] # `.base` to access np.ndarray + + +def maybe_convert_bool(ndarray[object] arr, + true_values=None, + false_values=None, + convert_to_masked_nullable=False + ) -> tuple[np.ndarray, np.ndarray | None]: + cdef: + Py_ssize_t i, n + ndarray[uint8_t] result + ndarray[uint8_t] mask + object val + set true_vals, false_vals + bint has_na = False + + n = len(arr) + result = np.empty(n, dtype=np.uint8) + mask = np.zeros(n, dtype=np.uint8) + # the defaults + true_vals = {'True', 'TRUE', 'true'} + false_vals = {'False', 'FALSE', 'false'} + + if true_values is not None: + true_vals = true_vals | set(true_values) + + if false_values is not None: + false_vals = false_vals | set(false_values) + + for i in range(n): + val = arr[i] + + if isinstance(val, bool): + if val is True: + result[i] = 1 + else: + result[i] = 0 + elif val in true_vals: + result[i] = 1 + elif val in false_vals: + result[i] = 0 + elif is_nan(val): + mask[i] = 1 + result[i] = 0 # Value here doesn't matter, will be replaced w/ nan + has_na = True + else: + return (arr, None) + + if has_na: + if convert_to_masked_nullable: + return (result.view(np.bool_), mask.view(np.bool_)) + else: + arr = result.view(np.bool_).astype(object) + np.putmask(arr, mask, np.nan) + return (arr, None) + else: + return (result.view(np.bool_), None) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/ops_dispatch.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/ops_dispatch.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..5c51d7c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/ops_dispatch.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/ops_dispatch.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/ops_dispatch.pyi new file mode 100644 index 0000000..91b5a4d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/ops_dispatch.pyi @@ -0,0 +1,5 @@ +import numpy as np + +def maybe_dispatch_ufunc_to_dunder_op( + self, ufunc: np.ufunc, method: str, *inputs, **kwargs +): ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/ops_dispatch.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/ops_dispatch.pyx new file mode 100644 index 0000000..2b2a411 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/ops_dispatch.pyx @@ -0,0 +1,121 @@ +DISPATCHED_UFUNCS = { + "add", + "sub", + "mul", + "pow", + "mod", + "floordiv", + "truediv", + "divmod", + "eq", + "ne", + "lt", + "gt", + "le", + "ge", + "remainder", + "matmul", + "or", + "xor", + "and", + "neg", + "pos", + "abs", +} +UNARY_UFUNCS = { + "neg", + "pos", + "abs", +} +UFUNC_ALIASES = { + "subtract": "sub", + "multiply": "mul", + "floor_divide": "floordiv", + "true_divide": "truediv", + "power": "pow", + "remainder": "mod", + "divide": "truediv", + "equal": "eq", + "not_equal": "ne", + "less": "lt", + "less_equal": "le", + "greater": "gt", + "greater_equal": "ge", + "bitwise_or": "or", + "bitwise_and": "and", + "bitwise_xor": "xor", + "negative": "neg", + "absolute": "abs", + "positive": "pos", +} + +# For op(., Array) -> Array.__r{op}__ +REVERSED_NAMES = { + "lt": "__gt__", + "le": "__ge__", + "gt": "__lt__", + "ge": "__le__", + "eq": "__eq__", + "ne": "__ne__", +} + + +def maybe_dispatch_ufunc_to_dunder_op( + object self, object ufunc, str method, *inputs, **kwargs +): + """ + Dispatch a ufunc to the equivalent dunder method. + + Parameters + ---------- + self : ArrayLike + The array whose dunder method we dispatch to + ufunc : Callable + A NumPy ufunc + method : {'reduce', 'accumulate', 'reduceat', 'outer', 'at', '__call__'} + inputs : ArrayLike + The input arrays. + kwargs : Any + The additional keyword arguments, e.g. ``out``. + + Returns + ------- + result : Any + The result of applying the ufunc + """ + # special has the ufuncs we dispatch to the dunder op on + + op_name = ufunc.__name__ + op_name = UFUNC_ALIASES.get(op_name, op_name) + + def not_implemented(*args, **kwargs): + return NotImplemented + + if kwargs or ufunc.nin > 2: + return NotImplemented + + if method == "__call__" and op_name in DISPATCHED_UFUNCS: + + if inputs[0] is self: + name = f"__{op_name}__" + meth = getattr(self, name, not_implemented) + + if op_name in UNARY_UFUNCS: + assert len(inputs) == 1 + return meth() + + return meth(inputs[1]) + + elif inputs[1] is self: + name = REVERSED_NAMES.get(op_name, f"__r{op_name}__") + + meth = getattr(self, name, not_implemented) + result = meth(inputs[0]) + return result + + else: + # should not be reached, but covering our bases + return NotImplemented + + else: + return NotImplemented diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/parsers.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/parsers.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..7405830 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/parsers.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/parsers.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/parsers.pyi new file mode 100644 index 0000000..01f5d58 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/parsers.pyi @@ -0,0 +1,71 @@ +from typing import ( + Hashable, + Literal, +) + +import numpy as np + +from pandas._typing import ( + ArrayLike, + Dtype, + npt, +) + +STR_NA_VALUES: set[str] + +def sanitize_objects( + values: npt.NDArray[np.object_], + na_values: set, +) -> int: ... + +class TextReader: + unnamed_cols: set[str] + table_width: int # int64_t + leading_cols: int # int64_t + header: list[list[int]] # non-negative integers + def __init__( + self, + source, + delimiter: bytes | str = ..., # single-character only + header=..., + header_start: int = ..., # int64_t + header_end: int = ..., # uint64_t + index_col=..., + names=..., + tokenize_chunksize: int = ..., # int64_t + delim_whitespace: bool = ..., + converters=..., + skipinitialspace: bool = ..., + escapechar: bytes | str | None = ..., # single-character only + doublequote: bool = ..., + quotechar: str | bytes | None = ..., # at most 1 character + quoting: int = ..., + lineterminator: bytes | str | None = ..., # at most 1 character + comment=..., + decimal: bytes | str = ..., # single-character only + thousands: bytes | str | None = ..., # single-character only + dtype: Dtype | dict[Hashable, Dtype] = ..., + usecols=..., + error_bad_lines: bool = ..., + warn_bad_lines: bool = ..., + na_filter: bool = ..., + na_values=..., + na_fvalues=..., + keep_default_na: bool = ..., + true_values=..., + false_values=..., + allow_leading_cols: bool = ..., + skiprows=..., + skipfooter: int = ..., # int64_t + verbose: bool = ..., + mangle_dupe_cols: bool = ..., + float_precision: Literal["round_trip", "legacy", "high"] | None = ..., + skip_blank_lines: bool = ..., + encoding_errors: bytes | str = ..., + ): ... + def set_error_bad_lines(self, status: int) -> None: ... + def set_noconvert(self, i: int) -> None: ... + def remove_noconvert(self, i: int) -> None: ... + def close(self) -> None: ... + def read(self, rows: int | None = ...) -> dict[int, ArrayLike]: ... + def read_low_memory(self, rows: int | None) -> list[dict[int, ArrayLike]]: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/parsers.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/parsers.pyx new file mode 100644 index 0000000..08c885f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/parsers.pyx @@ -0,0 +1,2057 @@ +# Copyright (c) 2012, Lambda Foundry, Inc. +# See LICENSE for the license +from csv import ( + QUOTE_MINIMAL, + QUOTE_NONE, + QUOTE_NONNUMERIC, +) +from errno import ENOENT +import sys +import time +import warnings + +from libc.stdlib cimport free +from libc.string cimport ( + strcasecmp, + strlen, + strncpy, +) + +import cython +from cython import Py_ssize_t + +from cpython.bytes cimport ( + PyBytes_AsString, + PyBytes_FromString, +) +from cpython.exc cimport ( + PyErr_Fetch, + PyErr_Occurred, +) +from cpython.object cimport PyObject +from cpython.ref cimport ( + Py_INCREF, + Py_XDECREF, +) +from cpython.unicode cimport ( + PyUnicode_AsUTF8String, + PyUnicode_Decode, + PyUnicode_DecodeUTF8, +) + + +cdef extern from "Python.h": + object PyUnicode_FromString(char *v) + + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + float64_t, + int64_t, + ndarray, + uint8_t, + uint64_t, +) + +cnp.import_array() + +from pandas._libs cimport util +from pandas._libs.util cimport ( + INT64_MAX, + INT64_MIN, + UINT64_MAX, +) + +import pandas._libs.lib as lib + +from pandas._libs.khash cimport ( + kh_destroy_float64, + kh_destroy_str, + kh_destroy_str_starts, + kh_destroy_strbox, + kh_exist_str, + kh_float64_t, + kh_get_float64, + kh_get_str, + kh_get_str_starts_item, + kh_get_strbox, + kh_init_float64, + kh_init_str, + kh_init_str_starts, + kh_init_strbox, + kh_put_float64, + kh_put_str, + kh_put_str_starts_item, + kh_put_strbox, + kh_resize_float64, + kh_resize_str_starts, + kh_str_starts_t, + kh_str_t, + kh_strbox_t, + khiter_t, +) + +from pandas.errors import ( + EmptyDataError, + ParserError, + ParserWarning, +) + +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_datetime64_dtype, + is_extension_array_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.inference import is_dict_like + +cdef: + float64_t INF = np.inf + float64_t NEGINF = -INF + int64_t DEFAULT_CHUNKSIZE = 256 * 1024 + + +cdef extern from "headers/portable.h": + # I *think* this is here so that strcasecmp is defined on Windows + # so we don't get + # `parsers.obj : error LNK2001: unresolved external symbol strcasecmp` + # in Appveyor. + # In a sane world, the `from libc.string cimport` above would fail + # loudly. + pass + + +cdef extern from "parser/tokenizer.h": + + ctypedef enum ParserState: + START_RECORD + START_FIELD + ESCAPED_CHAR + IN_FIELD + IN_QUOTED_FIELD + ESCAPE_IN_QUOTED_FIELD + QUOTE_IN_QUOTED_FIELD + EAT_CRNL + EAT_CRNL_NOP + EAT_WHITESPACE + EAT_COMMENT + EAT_LINE_COMMENT + WHITESPACE_LINE + SKIP_LINE + FINISHED + + enum: ERROR_OVERFLOW + + ctypedef enum BadLineHandleMethod: + ERROR, + WARN, + SKIP + + ctypedef void* (*io_callback)(void *src, size_t nbytes, size_t *bytes_read, + int *status, const char *encoding_errors) + ctypedef int (*io_cleanup)(void *src) + + ctypedef struct parser_t: + void *source + io_callback cb_io + io_cleanup cb_cleanup + + int64_t chunksize # Number of bytes to prepare for each chunk + char *data # pointer to data to be processed + int64_t datalen # amount of data available + int64_t datapos + + # where to write out tokenized data + char *stream + uint64_t stream_len + uint64_t stream_cap + + # Store words in (potentially ragged) matrix for now, hmm + char **words + int64_t *word_starts # where we are in the stream + uint64_t words_len + uint64_t words_cap + uint64_t max_words_cap # maximum word cap encountered + + char *pword_start # pointer to stream start of current field + int64_t word_start # position start of current field + + int64_t *line_start # position in words for start of line + int64_t *line_fields # Number of fields in each line + uint64_t lines # Number of lines observed + uint64_t file_lines # Number of lines observed (with bad/skipped) + uint64_t lines_cap # Vector capacity + + # Tokenizing stuff + ParserState state + int doublequote # is " represented by ""? */ + char delimiter # field separator */ + int delim_whitespace # consume tabs / spaces instead + char quotechar # quote character */ + char escapechar # escape character */ + char lineterminator + int skipinitialspace # ignore spaces following delimiter? */ + int quoting # style of quoting to write */ + + char commentchar + int allow_embedded_newline + + int usecols + + Py_ssize_t expected_fields + BadLineHandleMethod on_bad_lines + + # floating point options + char decimal + char sci + + # thousands separator (comma, period) + char thousands + + int header # Boolean: 1: has header, 0: no header + int64_t header_start # header row start + uint64_t header_end # header row end + + void *skipset + PyObject *skipfunc + int64_t skip_first_N_rows + int64_t skipfooter + # pick one, depending on whether the converter requires GIL + float64_t (*double_converter)(const char *, char **, + char, char, char, + int, int *, int *) nogil + + # error handling + char *warn_msg + char *error_msg + + int64_t skip_empty_lines + + ctypedef struct coliter_t: + char **words + int64_t *line_start + int64_t col + + ctypedef struct uint_state: + int seen_sint + int seen_uint + int seen_null + + void uint_state_init(uint_state *self) + int uint64_conflict(uint_state *self) + + void coliter_setup(coliter_t *it, parser_t *parser, + int64_t i, int64_t start) nogil + void COLITER_NEXT(coliter_t, const char *) nogil + + parser_t* parser_new() + + int parser_init(parser_t *self) nogil + void parser_free(parser_t *self) nogil + void parser_del(parser_t *self) nogil + int parser_add_skiprow(parser_t *self, int64_t row) + + int parser_set_skipfirstnrows(parser_t *self, int64_t nrows) + + void parser_set_default_options(parser_t *self) + + int parser_consume_rows(parser_t *self, size_t nrows) + + int parser_trim_buffers(parser_t *self) + + int tokenize_all_rows(parser_t *self, const char *encoding_errors) nogil + int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors) nogil + + int64_t str_to_int64(char *p_item, int64_t int_min, + int64_t int_max, int *error, char tsep) nogil + uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max, + uint64_t uint_max, int *error, char tsep) nogil + + float64_t xstrtod(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil + float64_t precise_xstrtod(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil + float64_t round_trip(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil + + int to_boolean(const char *item, uint8_t *val) nogil + + +cdef extern from "parser/io.h": + void *new_rd_source(object obj) except NULL + + int del_rd_source(void *src) + + void* buffer_rd_bytes(void *source, size_t nbytes, + size_t *bytes_read, int *status, const char *encoding_errors) + + +cdef class TextReader: + """ + + # source: StringIO or file object + + ..versionchange:: 1.2.0 + removed 'compression', 'memory_map', and 'encoding' argument. + These arguments are outsourced to CParserWrapper. + 'source' has to be a file handle. + """ + + cdef: + parser_t *parser + object na_fvalues + object true_values, false_values + object handle + object orig_header + bint na_filter, keep_default_na, verbose, has_usecols, has_mi_columns + bint mangle_dupe_cols, allow_leading_cols + uint64_t parser_start # this is modified after __init__ + list clocks + const char *encoding_errors + kh_str_starts_t *false_set + kh_str_starts_t *true_set + int64_t buffer_lines, skipfooter + list dtype_cast_order # list[np.dtype] + list names # can be None + set noconvert # set[int] + + cdef public: + int64_t leading_cols, table_width + object delimiter # bytes or str + object converters + object na_values + list header # list[list[non-negative integers]] + object index_col + object skiprows + object dtype + object usecols + set unnamed_cols # set[str] + + def __cinit__(self, source, + delimiter=b',', # bytes | str + header=0, + int64_t header_start=0, + uint64_t header_end=0, + index_col=None, + names=None, + tokenize_chunksize=DEFAULT_CHUNKSIZE, + bint delim_whitespace=False, + converters=None, + bint skipinitialspace=False, + escapechar=None, # bytes | str + bint doublequote=True, + quotechar=b'"', + quoting=0, # int + lineterminator=None, # bytes | str + comment=None, + decimal=b'.', # bytes | str + thousands=None, # bytes | str + dtype=None, + usecols=None, + on_bad_lines=ERROR, + bint na_filter=True, + na_values=None, + na_fvalues=None, + bint keep_default_na=True, + true_values=None, + false_values=None, + bint allow_leading_cols=True, + skiprows=None, + skipfooter=0, # int64_t + bint verbose=False, + bint mangle_dupe_cols=True, + float_precision=None, + bint skip_blank_lines=True, + encoding_errors=b"strict"): + + # set encoding for native Python and C library + if isinstance(encoding_errors, str): + encoding_errors = encoding_errors.encode("utf-8") + elif encoding_errors is None: + encoding_errors = b"strict" + Py_INCREF(encoding_errors) + self.encoding_errors = PyBytes_AsString(encoding_errors) + + self.parser = parser_new() + self.parser.chunksize = tokenize_chunksize + + self.mangle_dupe_cols = mangle_dupe_cols + + # For timekeeping + self.clocks = [] + + self.parser.usecols = (usecols is not None) + + self._setup_parser_source(source) + parser_set_default_options(self.parser) + + parser_init(self.parser) + + if delim_whitespace: + self.parser.delim_whitespace = delim_whitespace + else: + if len(delimiter) > 1: + raise ValueError('only length-1 separators excluded right now') + self.parser.delimiter = ord(delimiter) + + # ---------------------------------------- + # parser options + + self.parser.doublequote = doublequote + self.parser.skipinitialspace = skipinitialspace + self.parser.skip_empty_lines = skip_blank_lines + + if lineterminator is not None: + if len(lineterminator) != 1: + raise ValueError('Only length-1 line terminators supported') + self.parser.lineterminator = ord(lineterminator) + + if len(decimal) != 1: + raise ValueError('Only length-1 decimal markers supported') + self.parser.decimal = ord(decimal) + + if thousands is not None: + if len(thousands) != 1: + raise ValueError('Only length-1 thousands markers supported') + self.parser.thousands = ord(thousands) + + if escapechar is not None: + if len(escapechar) != 1: + raise ValueError('Only length-1 escapes supported') + self.parser.escapechar = ord(escapechar) + + self._set_quoting(quotechar, quoting) + + dtype_order = ['int64', 'float64', 'bool', 'object'] + if quoting == QUOTE_NONNUMERIC: + # consistent with csv module semantics, cast all to float + dtype_order = dtype_order[1:] + self.dtype_cast_order = [np.dtype(x) for x in dtype_order] + + if comment is not None: + if len(comment) > 1: + raise ValueError('Only length-1 comment characters supported') + self.parser.commentchar = ord(comment) + + self.parser.on_bad_lines = on_bad_lines + + self.skiprows = skiprows + if skiprows is not None: + self._make_skiprow_set() + + self.skipfooter = skipfooter + + # suboptimal + if usecols is not None: + self.has_usecols = 1 + # GH-20558, validate usecols at higher level and only pass clean + # usecols into TextReader. + self.usecols = usecols + + # TODO: XXX? + if skipfooter > 0: + self.parser.on_bad_lines = SKIP + + self.delimiter = delimiter + + self.na_values = na_values + if na_fvalues is None: + na_fvalues = set() + self.na_fvalues = na_fvalues + + self.true_values = _maybe_encode(true_values) + _true_values + self.false_values = _maybe_encode(false_values) + _false_values + + self.true_set = kset_from_list(self.true_values) + self.false_set = kset_from_list(self.false_values) + + self.keep_default_na = keep_default_na + self.converters = converters + self.na_filter = na_filter + + self.verbose = verbose + + if float_precision == "round_trip": + # see gh-15140 + self.parser.double_converter = round_trip + elif float_precision == "legacy": + self.parser.double_converter = xstrtod + elif float_precision == "high" or float_precision is None: + self.parser.double_converter = precise_xstrtod + else: + raise ValueError(f'Unrecognized float_precision option: ' + f'{float_precision}') + + # Caller is responsible for ensuring we have one of + # - None + # - DtypeObj + # - dict[Any, DtypeObj] + self.dtype = dtype + + # XXX + self.noconvert = set() + + self.index_col = index_col + + # ---------------------------------------- + # header stuff + + self.allow_leading_cols = allow_leading_cols + self.leading_cols = 0 # updated in _get_header + + # TODO: no header vs. header is not the first row + self.has_mi_columns = 0 + self.orig_header = header + if header is None: + # sentinel value + self.parser.header_start = -1 + self.parser.header_end = -1 + self.parser.header = -1 + self.parser_start = 0 + prelim_header = [] + else: + if isinstance(header, list): + if len(header) > 1: + # need to artificially skip the final line + # which is still a header line + header = list(header) + header.append(header[-1] + 1) + self.parser.header_end = header[-1] + self.has_mi_columns = 1 + else: + self.parser.header_end = header[0] + + self.parser_start = header[-1] + 1 + self.parser.header_start = header[0] + self.parser.header = header[0] + prelim_header = header + else: + self.parser.header_start = header + self.parser.header_end = header + self.parser_start = header + 1 + self.parser.header = header + prelim_header = [header] + + self.names = names + header, table_width, unnamed_cols = self._get_header(prelim_header) + # header, table_width, and unnamed_cols are set here, never changed + self.header = header + self.table_width = table_width + self.unnamed_cols = unnamed_cols + + if not self.table_width: + raise EmptyDataError("No columns to parse from file") + + # Compute buffer_lines as function of table width. + heuristic = 2**20 // self.table_width + self.buffer_lines = 1 + while self.buffer_lines * 2 < heuristic: + self.buffer_lines *= 2 + + def __init__(self, *args, **kwargs): + pass + + def __dealloc__(self): + _close(self) + parser_del(self.parser) + + def close(self): + _close(self) + + def _set_quoting(self, quote_char: str | bytes | None, quoting: int): + if not isinstance(quoting, int): + raise TypeError('"quoting" must be an integer') + + if not QUOTE_MINIMAL <= quoting <= QUOTE_NONE: + raise TypeError('bad "quoting" value') + + if not isinstance(quote_char, (str, bytes)) and quote_char is not None: + dtype = type(quote_char).__name__ + raise TypeError(f'"quotechar" must be string, not {dtype}') + + if quote_char is None or quote_char == '': + if quoting != QUOTE_NONE: + raise TypeError("quotechar must be set if quoting enabled") + self.parser.quoting = quoting + self.parser.quotechar = -1 + elif len(quote_char) > 1: # 0-len case handled earlier + raise TypeError('"quotechar" must be a 1-character string') + else: + self.parser.quoting = quoting + self.parser.quotechar = ord(quote_char) + + cdef _make_skiprow_set(self): + if util.is_integer_object(self.skiprows): + parser_set_skipfirstnrows(self.parser, self.skiprows) + elif not callable(self.skiprows): + for i in self.skiprows: + parser_add_skiprow(self.parser, i) + else: + self.parser.skipfunc = self.skiprows + + cdef _setup_parser_source(self, source): + cdef: + void *ptr + + ptr = new_rd_source(source) + self.parser.source = ptr + self.parser.cb_io = &buffer_rd_bytes + self.parser.cb_cleanup = &del_rd_source + + cdef _get_header(self, list prelim_header): + # header is now a list of lists, so field_count should use header[0] + # + # modifies: + # self.parser attributes + # self.parser_start + # self.leading_cols + + cdef: + Py_ssize_t i, start, field_count, passed_count, unnamed_count, level + char *word + str name, old_name + uint64_t hr, data_line = 0 + list header = [] + set unnamed_cols = set() + + if self.parser.header_start >= 0: + + # Header is in the file + for level, hr in enumerate(prelim_header): + + this_header = [] + + if self.parser.lines < hr + 1: + self._tokenize_rows(hr + 2) + + if self.parser.lines == 0: + field_count = 0 + start = self.parser.line_start[0] + + # e.g., if header=3 and file only has 2 lines + elif (self.parser.lines < hr + 1 + and not isinstance(self.orig_header, list)) or ( + self.parser.lines < hr): + msg = self.orig_header + if isinstance(msg, list): + joined = ','.join(str(m) for m in msg) + msg = f"[{joined}], len of {len(msg)}," + raise ParserError( + f'Passed header={msg} but only ' + f'{self.parser.lines} lines in file') + + else: + field_count = self.parser.line_fields[hr] + start = self.parser.line_start[hr] + + unnamed_count = 0 + unnamed_col_indices = [] + + for i in range(field_count): + word = self.parser.words[start + i] + + name = PyUnicode_DecodeUTF8(word, strlen(word), + self.encoding_errors) + + if name == '': + if self.has_mi_columns: + name = f'Unnamed: {i}_level_{level}' + else: + name = f'Unnamed: {i}' + + unnamed_count += 1 + unnamed_col_indices.append(i) + + this_header.append(name) + + if not self.has_mi_columns and self.mangle_dupe_cols: + # Ensure that regular columns are used before unnamed ones + # to keep given names and mangle unnamed columns + col_loop_order = [i for i in range(len(this_header)) + if i not in unnamed_col_indices + ] + unnamed_col_indices + counts = {} + + for i in col_loop_order: + col = this_header[i] + old_col = col + cur_count = counts.get(col, 0) + + if cur_count > 0: + while cur_count > 0: + counts[old_col] = cur_count + 1 + col = f'{old_col}.{cur_count}' + if col in this_header: + cur_count += 1 + else: + cur_count = counts.get(col, 0) + + if ( + self.dtype is not None + and is_dict_like(self.dtype) + and self.dtype.get(old_col) is not None + and self.dtype.get(col) is None + ): + self.dtype.update({col: self.dtype.get(old_col)}) + + this_header[i] = col + counts[col] = cur_count + 1 + + if self.has_mi_columns: + + # If we have grabbed an extra line, but it's not in our + # format, save in the buffer, and create an blank extra + # line for the rest of the parsing code. + if hr == prelim_header[-1]: + lc = len(this_header) + ic = (len(self.index_col) if self.index_col + is not None else 0) + + # if wrong number of blanks or no index, not our format + if (lc != unnamed_count and lc - ic > unnamed_count) or ic == 0: + hr -= 1 + self.parser_start -= 1 + this_header = [None] * lc + + data_line = hr + 1 + header.append(this_header) + unnamed_cols.update({this_header[i] for i in unnamed_col_indices}) + + if self.names is not None: + header = [self.names] + + elif self.names is not None: + # Names passed + if self.parser.lines < 1: + self._tokenize_rows(1) + + header = [self.names] + + if self.parser.lines < 1: + field_count = len(header[0]) + else: + field_count = self.parser.line_fields[data_line] + + # Enforce this unless usecols + if not self.has_usecols: + self.parser.expected_fields = max(field_count, len(self.names)) + else: + # No header passed nor to be found in the file + if self.parser.lines < 1: + self._tokenize_rows(1) + + return None, self.parser.line_fields[0], unnamed_cols + + # Corner case, not enough lines in the file + if self.parser.lines < data_line + 1: + field_count = len(header[0]) + else: # not self.has_usecols: + + field_count = self.parser.line_fields[data_line] + + # #2981 + if self.names is not None: + field_count = max(field_count, len(self.names)) + + passed_count = len(header[0]) + + if (self.has_usecols and self.allow_leading_cols and + not callable(self.usecols)): + nuse = len(self.usecols) + if nuse == passed_count: + self.leading_cols = 0 + elif self.names is None and nuse < passed_count: + self.leading_cols = field_count - passed_count + elif passed_count != field_count: + raise ValueError('Number of passed names did not match number of ' + 'header fields in the file') + # oh boy, #2442, #2981 + elif self.allow_leading_cols and passed_count < field_count: + self.leading_cols = field_count - passed_count + + return header, field_count, unnamed_cols + + def read(self, rows: int | None = None) -> dict[int, "ArrayLike"]: + """ + rows=None --> read all rows + """ + # Don't care about memory usage + columns = self._read_rows(rows, 1) + + return columns + + def read_low_memory(self, rows: int | None)-> list[dict[int, "ArrayLike"]]: + """ + rows=None --> read all rows + """ + # Conserve intermediate space + # Caller is responsible for concatenating chunks, + # see c_parser_wrapper._concatenate_chunks + cdef: + size_t rows_read = 0 + list chunks = [] + + if rows is None: + while True: + try: + chunk = self._read_rows(self.buffer_lines, 0) + if len(chunk) == 0: + break + except StopIteration: + break + else: + chunks.append(chunk) + else: + while rows_read < rows: + try: + crows = min(self.buffer_lines, rows - rows_read) + + chunk = self._read_rows(crows, 0) + if len(chunk) == 0: + break + + rows_read += len(list(chunk.values())[0]) + except StopIteration: + break + else: + chunks.append(chunk) + + parser_trim_buffers(self.parser) + + if len(chunks) == 0: + raise StopIteration + + return chunks + + cdef _tokenize_rows(self, size_t nrows): + cdef: + int status + + with nogil: + status = tokenize_nrows(self.parser, nrows, self.encoding_errors) + + if self.parser.warn_msg != NULL: + print(self.parser.warn_msg, file=sys.stderr) + free(self.parser.warn_msg) + self.parser.warn_msg = NULL + + if status < 0: + raise_parser_error('Error tokenizing data', self.parser) + + # -> dict[int, "ArrayLike"] + cdef _read_rows(self, rows, bint trim): + cdef: + int64_t buffered_lines + int64_t irows + + self._start_clock() + + if rows is not None: + irows = rows + buffered_lines = self.parser.lines - self.parser_start + if buffered_lines < irows: + self._tokenize_rows(irows - buffered_lines) + + if self.skipfooter > 0: + raise ValueError('skipfooter can only be used to read ' + 'the whole file') + else: + with nogil: + status = tokenize_all_rows(self.parser, self.encoding_errors) + + if self.parser.warn_msg != NULL: + print(self.parser.warn_msg, file=sys.stderr) + free(self.parser.warn_msg) + self.parser.warn_msg = NULL + + if status < 0: + raise_parser_error('Error tokenizing data', self.parser) + + if self.parser_start >= self.parser.lines: + raise StopIteration + self._end_clock('Tokenization') + + self._start_clock() + columns = self._convert_column_data(rows) + self._end_clock('Type conversion') + self._start_clock() + if len(columns) > 0: + rows_read = len(list(columns.values())[0]) + # trim + parser_consume_rows(self.parser, rows_read) + if trim: + parser_trim_buffers(self.parser) + self.parser_start -= rows_read + + self._end_clock('Parser memory cleanup') + + return columns + + cdef _start_clock(self): + self.clocks.append(time.time()) + + cdef _end_clock(self, str what): + if self.verbose: + elapsed = time.time() - self.clocks.pop(-1) + print(f'{what} took: {elapsed * 1000:.2f} ms') + + def set_noconvert(self, i: int) -> None: + self.noconvert.add(i) + + def remove_noconvert(self, i: int) -> None: + self.noconvert.remove(i) + + def _convert_column_data(self, rows: int | None) -> dict[int, "ArrayLike"]: + cdef: + int64_t i + int nused + kh_str_starts_t *na_hashset = NULL + int64_t start, end + object name, na_flist, col_dtype = None + bint na_filter = 0 + int64_t num_cols + dict result + + start = self.parser_start + + if rows is None: + end = self.parser.lines + else: + end = min(start + rows, self.parser.lines) + + num_cols = -1 + # Py_ssize_t cast prevents build warning + for i in range(self.parser.lines): + num_cols = (num_cols < self.parser.line_fields[i]) * \ + self.parser.line_fields[i] + \ + (num_cols >= self.parser.line_fields[i]) * num_cols + + usecols_not_callable_and_exists = not callable(self.usecols) and self.usecols + names_larger_num_cols = (self.names and + len(self.names) - self.leading_cols > num_cols) + + if self.table_width - self.leading_cols > num_cols: + if (usecols_not_callable_and_exists + and self.table_width - self.leading_cols < len(self.usecols) + or names_larger_num_cols): + raise ParserError(f"Too many columns specified: expected " + f"{self.table_width - self.leading_cols} " + f"and found {num_cols}") + + if (usecols_not_callable_and_exists and + all(isinstance(u, int) for u in self.usecols)): + missing_usecols = [col for col in self.usecols if col >= num_cols] + if missing_usecols: + warnings.warn( + "Defining usecols with out of bounds indices is deprecated " + "and will raise a ParserError in a future version.", + FutureWarning, + stacklevel=6, + ) + + results = {} + nused = 0 + for i in range(self.table_width): + if i < self.leading_cols: + # Pass through leading columns always + name = i + elif (self.usecols and not callable(self.usecols) and + nused == len(self.usecols)): + # Once we've gathered all requested columns, stop. GH5766 + break + else: + name = self._get_column_name(i, nused) + usecols = set() + if callable(self.usecols): + if self.usecols(name): + usecols = {i} + else: + usecols = self.usecols + if self.has_usecols and not (i in usecols or + name in usecols): + continue + nused += 1 + + conv = self._get_converter(i, name) + + col_dtype = None + if self.dtype is not None: + if isinstance(self.dtype, dict): + if name in self.dtype: + col_dtype = self.dtype[name] + elif i in self.dtype: + col_dtype = self.dtype[i] + else: + if self.dtype.names: + # structured array + col_dtype = np.dtype(self.dtype.descr[i][1]) + else: + col_dtype = self.dtype + + if conv: + if col_dtype is not None: + warnings.warn((f"Both a converter and dtype were specified " + f"for column {name} - only the converter will " + f"be used."), ParserWarning, + stacklevel=5) + results[i] = _apply_converter(conv, self.parser, i, start, end) + continue + + # Collect the list of NaN values associated with the column. + # If we aren't supposed to do that, or none are collected, + # we set `na_filter` to `0` (`1` otherwise). + na_flist = set() + + if self.na_filter: + na_list, na_flist = self._get_na_list(i, name) + if na_list is None: + na_filter = 0 + else: + na_filter = 1 + na_hashset = kset_from_list(na_list) + else: + na_filter = 0 + + # Attempt to parse tokens and infer dtype of the column. + # Should return as the desired dtype (inferred or specified). + try: + col_res, na_count = self._convert_tokens( + i, start, end, name, na_filter, na_hashset, + na_flist, col_dtype) + finally: + # gh-21353 + # + # Cleanup the NaN hash that we generated + # to avoid memory leaks. + if na_filter: + self._free_na_set(na_hashset) + + # don't try to upcast EAs + if na_count > 0 and not is_extension_array_dtype(col_dtype): + col_res = _maybe_upcast(col_res) + + if col_res is None: + raise ParserError(f'Unable to parse column {i}') + + results[i] = col_res + + self.parser_start += end - start + + return results + + # -> tuple["ArrayLike", int]: + cdef inline _convert_tokens(self, Py_ssize_t i, int64_t start, + int64_t end, object name, bint na_filter, + kh_str_starts_t *na_hashset, + object na_flist, object col_dtype): + + if col_dtype is not None: + col_res, na_count = self._convert_with_dtype( + col_dtype, i, start, end, na_filter, + 1, na_hashset, na_flist) + + # Fallback on the parse (e.g. we requested int dtype, + # but its actually a float). + if col_res is not None: + return col_res, na_count + + if i in self.noconvert: + return self._string_convert(i, start, end, na_filter, na_hashset) + else: + col_res = None + for dt in self.dtype_cast_order: + try: + col_res, na_count = self._convert_with_dtype( + dt, i, start, end, na_filter, 0, na_hashset, na_flist) + except ValueError: + # This error is raised from trying to convert to uint64, + # and we discover that we cannot convert to any numerical + # dtype successfully. As a result, we leave the data + # column AS IS with object dtype. + col_res, na_count = self._convert_with_dtype( + np.dtype('object'), i, start, end, 0, + 0, na_hashset, na_flist) + except OverflowError: + col_res, na_count = self._convert_with_dtype( + np.dtype('object'), i, start, end, na_filter, + 0, na_hashset, na_flist) + + if col_res is not None: + break + + # we had a fallback parse on the dtype, so now try to cast + if col_res is not None and col_dtype is not None: + # If col_res is bool, it might actually be a bool array mixed with NaNs + # (see _try_bool_flex()). Usually this would be taken care of using + # _maybe_upcast(), but if col_dtype is a floating type we should just + # take care of that cast here. + if col_res.dtype == np.bool_ and is_float_dtype(col_dtype): + mask = col_res.view(np.uint8) == na_values[np.uint8] + col_res = col_res.astype(col_dtype) + np.putmask(col_res, mask, np.nan) + return col_res, na_count + + # NaNs are already cast to True here, so can not use astype + if col_res.dtype == np.bool_ and is_integer_dtype(col_dtype): + if na_count > 0: + raise ValueError( + f"cannot safely convert passed user dtype of " + f"{col_dtype} for {np.bool_} dtyped data in " + f"column {i} due to NA values" + ) + + # only allow safe casts, eg. with a nan you cannot safely cast to int + try: + col_res = col_res.astype(col_dtype, casting='safe') + except TypeError: + + # float -> int conversions can fail the above + # even with no nans + col_res_orig = col_res + col_res = col_res.astype(col_dtype) + if (col_res != col_res_orig).any(): + raise ValueError( + f"cannot safely convert passed user dtype of " + f"{col_dtype} for {col_res_orig.dtype.name} dtyped data in " + f"column {i}") + + return col_res, na_count + + cdef _convert_with_dtype(self, object dtype, Py_ssize_t i, + int64_t start, int64_t end, + bint na_filter, + bint user_dtype, + kh_str_starts_t *na_hashset, + object na_flist): + if isinstance(dtype, CategoricalDtype): + # TODO: I suspect that _categorical_convert could be + # optimized when dtype is an instance of CategoricalDtype + codes, cats, na_count = _categorical_convert( + self.parser, i, start, end, na_filter, na_hashset) + + # Method accepts list of strings, not encoded ones. + true_values = [x.decode() for x in self.true_values] + array_type = dtype.construct_array_type() + cat = array_type._from_inferred_categories( + cats, codes, dtype, true_values=true_values) + return cat, na_count + + elif is_extension_array_dtype(dtype): + result, na_count = self._string_convert(i, start, end, na_filter, + na_hashset) + + array_type = dtype.construct_array_type() + try: + # use _from_sequence_of_strings if the class defines it + if is_bool_dtype(dtype): + true_values = [x.decode() for x in self.true_values] + false_values = [x.decode() for x in self.false_values] + result = array_type._from_sequence_of_strings( + result, dtype=dtype, true_values=true_values, + false_values=false_values) + else: + result = array_type._from_sequence_of_strings(result, dtype=dtype) + except NotImplementedError: + raise NotImplementedError( + f"Extension Array: {array_type} must implement " + f"_from_sequence_of_strings in order " + f"to be used in parser methods") + + return result, na_count + + elif is_integer_dtype(dtype): + try: + result, na_count = _try_int64(self.parser, i, start, + end, na_filter, na_hashset) + if user_dtype and na_count is not None: + if na_count > 0: + raise ValueError(f"Integer column has NA values in column {i}") + except OverflowError: + result = _try_uint64(self.parser, i, start, end, + na_filter, na_hashset) + na_count = 0 + + if result is not None and dtype != 'int64': + result = result.astype(dtype) + + return result, na_count + + elif is_float_dtype(dtype): + result, na_count = _try_double(self.parser, i, start, end, + na_filter, na_hashset, na_flist) + + if result is not None and dtype != 'float64': + result = result.astype(dtype) + return result, na_count + elif is_bool_dtype(dtype): + result, na_count = _try_bool_flex(self.parser, i, start, end, + na_filter, na_hashset, + self.true_set, self.false_set) + if user_dtype and na_count is not None: + if na_count > 0: + raise ValueError(f"Bool column has NA values in column {i}") + return result, na_count + + elif dtype.kind == 'S': + # TODO: na handling + width = dtype.itemsize + if width > 0: + result = _to_fw_string(self.parser, i, start, end, width) + return result, 0 + + # treat as a regular string parsing + return self._string_convert(i, start, end, na_filter, + na_hashset) + elif dtype.kind == 'U': + width = dtype.itemsize + if width > 0: + raise TypeError(f"the dtype {dtype} is not supported for parsing") + + # unicode variable width + return self._string_convert(i, start, end, na_filter, + na_hashset) + elif is_object_dtype(dtype): + return self._string_convert(i, start, end, na_filter, + na_hashset) + elif is_datetime64_dtype(dtype): + raise TypeError(f"the dtype {dtype} is not supported " + f"for parsing, pass this column " + f"using parse_dates instead") + else: + raise TypeError(f"the dtype {dtype} is not supported for parsing") + + # -> tuple[ndarray[object], int] + cdef _string_convert(self, Py_ssize_t i, int64_t start, int64_t end, + bint na_filter, kh_str_starts_t *na_hashset): + + return _string_box_utf8(self.parser, i, start, end, na_filter, + na_hashset, self.encoding_errors) + + def _get_converter(self, i: int, name): + if self.converters is None: + return None + + if name is not None and name in self.converters: + return self.converters[name] + + # Converter for position, if any + return self.converters.get(i) + + cdef _get_na_list(self, Py_ssize_t i, name): + # Note: updates self.na_values, self.na_fvalues + if self.na_values is None: + return None, set() + + if isinstance(self.na_values, dict): + key = None + values = None + + if name is not None and name in self.na_values: + key = name + elif i in self.na_values: + key = i + else: # No na_values provided for this column. + if self.keep_default_na: + return _NA_VALUES, set() + + return list(), set() + + values = self.na_values[key] + if values is not None and not isinstance(values, list): + values = list(values) + + fvalues = self.na_fvalues[key] + if fvalues is not None and not isinstance(fvalues, set): + fvalues = set(fvalues) + + return _ensure_encoded(values), fvalues + else: + if not isinstance(self.na_values, list): + self.na_values = list(self.na_values) + if not isinstance(self.na_fvalues, set): + self.na_fvalues = set(self.na_fvalues) + + return _ensure_encoded(self.na_values), self.na_fvalues + + cdef _free_na_set(self, kh_str_starts_t *table): + kh_destroy_str_starts(table) + + cdef _get_column_name(self, Py_ssize_t i, Py_ssize_t nused): + cdef int64_t j + if self.has_usecols and self.names is not None: + if (not callable(self.usecols) and + len(self.names) == len(self.usecols)): + return self.names[nused] + else: + return self.names[i - self.leading_cols] + else: + if self.header is not None: + j = i - self.leading_cols + # generate extra (bogus) headers if there are more columns than headers + if j >= len(self.header[0]): + return j + elif self.has_mi_columns: + return tuple(header_row[j] for header_row in self.header) + else: + return self.header[0][j] + else: + return None + + +# Factor out code common to TextReader.__dealloc__ and TextReader.close +# It cannot be a class method, since calling self.close() in __dealloc__ +# which causes a class attribute lookup and violates best parctices +# https://cython.readthedocs.io/en/latest/src/userguide/special_methods.html#finalization-method-dealloc +cdef _close(TextReader reader): + # also preemptively free all allocated memory + parser_free(reader.parser) + if reader.true_set: + kh_destroy_str_starts(reader.true_set) + reader.true_set = NULL + if reader.false_set: + kh_destroy_str_starts(reader.false_set) + reader.false_set = NULL + + +cdef: + object _true_values = [b'True', b'TRUE', b'true'] + object _false_values = [b'False', b'FALSE', b'false'] + + +def _ensure_encoded(list lst): + cdef: + list result = [] + for x in lst: + if isinstance(x, str): + x = PyUnicode_AsUTF8String(x) + elif not isinstance(x, bytes): + x = str(x).encode('utf-8') + + result.append(x) + return result + + +# common NA values +# no longer excluding inf representations +# '1.#INF','-1.#INF', '1.#INF000000', +STR_NA_VALUES = { + "-1.#IND", + "1.#QNAN", + "1.#IND", + "-1.#QNAN", + "#N/A N/A", + "#N/A", + "N/A", + "n/a", + "NA", + "", + "#NA", + "NULL", + "null", + "NaN", + "-NaN", + "nan", + "-nan", + "", +} +_NA_VALUES = _ensure_encoded(list(STR_NA_VALUES)) + + +def _maybe_upcast(arr): + """ + + """ + if issubclass(arr.dtype.type, np.integer): + na_value = na_values[arr.dtype] + arr = arr.astype(float) + np.putmask(arr, arr == na_value, np.nan) + elif arr.dtype == np.bool_: + mask = arr.view(np.uint8) == na_values[np.uint8] + arr = arr.astype(object) + np.putmask(arr, mask, np.nan) + + return arr + + +# ---------------------------------------------------------------------- +# Type conversions / inference support code + + +# -> tuple[ndarray[object], int] +cdef _string_box_utf8(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset, + const char *encoding_errors): + cdef: + int error, na_count = 0 + Py_ssize_t i, lines + coliter_t it + const char *word = NULL + ndarray[object] result + + int ret = 0 + kh_strbox_t *table + + object pyval + + object NA = na_values[np.object_] + khiter_t k + + table = kh_init_strbox() + lines = line_end - line_start + result = np.empty(lines, dtype=np.object_) + coliter_setup(&it, parser, col, line_start) + + for i in range(lines): + COLITER_NEXT(it, word) + + if na_filter: + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + na_count += 1 + result[i] = NA + continue + + k = kh_get_strbox(table, word) + + # in the hash table + if k != table.n_buckets: + # this increments the refcount, but need to test + pyval = table.vals[k] + else: + # box it. new ref? + pyval = PyUnicode_Decode(word, strlen(word), "utf-8", encoding_errors) + + k = kh_put_strbox(table, word, &ret) + table.vals[k] = pyval + + result[i] = pyval + + kh_destroy_strbox(table) + + return result, na_count + + +@cython.boundscheck(False) +cdef _categorical_convert(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset): + "Convert column data into codes, categories" + cdef: + int na_count = 0 + Py_ssize_t i, size, lines + coliter_t it + const char *word = NULL + + int64_t NA = -1 + int64_t[:] codes + int64_t current_category = 0 + + char *errors = "strict" + + int ret = 0 + kh_str_t *table + khiter_t k + + lines = line_end - line_start + codes = np.empty(lines, dtype=np.int64) + + # factorize parsed values, creating a hash table + # bytes -> category code + with nogil: + table = kh_init_str() + coliter_setup(&it, parser, col, line_start) + + for i in range(lines): + COLITER_NEXT(it, word) + + if na_filter: + if kh_get_str_starts_item(na_hashset, word): + # is in NA values + na_count += 1 + codes[i] = NA + continue + + k = kh_get_str(table, word) + # not in the hash table + if k == table.n_buckets: + k = kh_put_str(table, word, &ret) + table.vals[k] = current_category + current_category += 1 + + codes[i] = table.vals[k] + + # parse and box categories to python strings + result = np.empty(table.n_occupied, dtype=np.object_) + for k in range(table.n_buckets): + if kh_exist_str(table, k): + result[table.vals[k]] = PyUnicode_FromString(table.keys[k]) + + kh_destroy_str(table) + return np.asarray(codes), result, na_count + + +# -> ndarray[f'|S{width}'] +cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start, + int64_t line_end, int64_t width): + cdef: + char *data + ndarray result + + result = np.empty(line_end - line_start, dtype=f'|S{width}') + data = result.data + + with nogil: + _to_fw_string_nogil(parser, col, line_start, line_end, width, data) + + return result + + +cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + size_t width, char *data) nogil: + cdef: + int64_t i + coliter_t it + const char *word = NULL + + coliter_setup(&it, parser, col, line_start) + + for i in range(line_end - line_start): + COLITER_NEXT(it, word) + strncpy(data, word, width) + data += width + + +cdef: + char* cinf = b'inf' + char* cposinf = b'+inf' + char* cneginf = b'-inf' + + char* cinfty = b'Infinity' + char* cposinfty = b'+Infinity' + char* cneginfty = b'-Infinity' + + +# -> tuple[ndarray[float64_t], int] | tuple[None, None] +cdef _try_double(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset, object na_flist): + cdef: + int error, na_count = 0 + Py_ssize_t lines + float64_t *data + float64_t NA = na_values[np.float64] + kh_float64_t *na_fset + ndarray[float64_t] result + bint use_na_flist = len(na_flist) > 0 + + lines = line_end - line_start + result = np.empty(lines, dtype=np.float64) + data = result.data + na_fset = kset_float64_from_list(na_flist) + with nogil: + error = _try_double_nogil(parser, parser.double_converter, + col, line_start, line_end, + na_filter, na_hashset, use_na_flist, + na_fset, NA, data, &na_count) + + kh_destroy_float64(na_fset) + if error != 0: + return None, None + return result, na_count + + +cdef inline int _try_double_nogil(parser_t *parser, + float64_t (*double_converter)( + const char *, char **, char, + char, char, int, int *, int *) nogil, + int64_t col, int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset, + bint use_na_flist, + const kh_float64_t *na_flist, + float64_t NA, float64_t *data, + int *na_count) nogil: + cdef: + int error = 0, + Py_ssize_t i, lines = line_end - line_start + coliter_t it + const char *word = NULL + char *p_end + khiter_t k64 + + na_count[0] = 0 + coliter_setup(&it, parser, col, line_start) + + if na_filter: + for i in range(lines): + COLITER_NEXT(it, word) + + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + na_count[0] += 1 + data[0] = NA + else: + data[0] = double_converter(word, &p_end, parser.decimal, + parser.sci, parser.thousands, + 1, &error, NULL) + if error != 0 or p_end == word or p_end[0]: + error = 0 + if (strcasecmp(word, cinf) == 0 or + strcasecmp(word, cposinf) == 0 or + strcasecmp(word, cinfty) == 0 or + strcasecmp(word, cposinfty) == 0): + data[0] = INF + elif (strcasecmp(word, cneginf) == 0 or + strcasecmp(word, cneginfty) == 0): + data[0] = NEGINF + else: + return 1 + if use_na_flist: + k64 = kh_get_float64(na_flist, data[0]) + if k64 != na_flist.n_buckets: + na_count[0] += 1 + data[0] = NA + data += 1 + else: + for i in range(lines): + COLITER_NEXT(it, word) + data[0] = double_converter(word, &p_end, parser.decimal, + parser.sci, parser.thousands, + 1, &error, NULL) + if error != 0 or p_end == word or p_end[0]: + error = 0 + if (strcasecmp(word, cinf) == 0 or + strcasecmp(word, cposinf) == 0 or + strcasecmp(word, cinfty) == 0 or + strcasecmp(word, cposinfty) == 0): + data[0] = INF + elif (strcasecmp(word, cneginf) == 0 or + strcasecmp(word, cneginfty) == 0): + data[0] = NEGINF + else: + return 1 + data += 1 + + return 0 + + +cdef _try_uint64(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset): + cdef: + int error + Py_ssize_t lines + coliter_t it + uint64_t *data + ndarray result + uint_state state + + lines = line_end - line_start + result = np.empty(lines, dtype=np.uint64) + data = result.data + + uint_state_init(&state) + coliter_setup(&it, parser, col, line_start) + with nogil: + error = _try_uint64_nogil(parser, col, line_start, line_end, + na_filter, na_hashset, data, &state) + if error != 0: + if error == ERROR_OVERFLOW: + # Can't get the word variable + raise OverflowError('Overflow') + return None + + if uint64_conflict(&state): + raise ValueError('Cannot convert to numerical dtype') + + if state.seen_sint: + raise OverflowError('Overflow') + + return result + + +cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, + const kh_str_starts_t *na_hashset, + uint64_t *data, uint_state *state) nogil: + cdef: + int error + Py_ssize_t i, lines = line_end - line_start + coliter_t it + const char *word = NULL + + coliter_setup(&it, parser, col, line_start) + + if na_filter: + for i in range(lines): + COLITER_NEXT(it, word) + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + state.seen_null = 1 + data[i] = 0 + continue + + data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX, + &error, parser.thousands) + if error != 0: + return error + else: + for i in range(lines): + COLITER_NEXT(it, word) + data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX, + &error, parser.thousands) + if error != 0: + return error + + return 0 + + +cdef _try_int64(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset): + cdef: + int error, na_count = 0 + Py_ssize_t lines + coliter_t it + int64_t *data + ndarray result + int64_t NA = na_values[np.int64] + + lines = line_end - line_start + result = np.empty(lines, dtype=np.int64) + data = result.data + coliter_setup(&it, parser, col, line_start) + with nogil: + error = _try_int64_nogil(parser, col, line_start, line_end, + na_filter, na_hashset, NA, data, &na_count) + if error != 0: + if error == ERROR_OVERFLOW: + # Can't get the word variable + raise OverflowError('Overflow') + return None, None + + return result, na_count + + +cdef inline int _try_int64_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, + const kh_str_starts_t *na_hashset, int64_t NA, + int64_t *data, int *na_count) nogil: + cdef: + int error + Py_ssize_t i, lines = line_end - line_start + coliter_t it + const char *word = NULL + + na_count[0] = 0 + coliter_setup(&it, parser, col, line_start) + + if na_filter: + for i in range(lines): + COLITER_NEXT(it, word) + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + na_count[0] += 1 + data[i] = NA + continue + + data[i] = str_to_int64(word, INT64_MIN, INT64_MAX, + &error, parser.thousands) + if error != 0: + return error + else: + for i in range(lines): + COLITER_NEXT(it, word) + data[i] = str_to_int64(word, INT64_MIN, INT64_MAX, + &error, parser.thousands) + if error != 0: + return error + + return 0 + + +# -> tuple[ndarray[bool], int] +cdef _try_bool_flex(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, const kh_str_starts_t *na_hashset, + const kh_str_starts_t *true_hashset, + const kh_str_starts_t *false_hashset): + cdef: + int error, na_count = 0 + Py_ssize_t lines + uint8_t *data + ndarray result + uint8_t NA = na_values[np.bool_] + + lines = line_end - line_start + result = np.empty(lines, dtype=np.uint8) + data = result.data + with nogil: + error = _try_bool_flex_nogil(parser, col, line_start, line_end, + na_filter, na_hashset, true_hashset, + false_hashset, NA, data, &na_count) + if error != 0: + return None, None + return result.view(np.bool_), na_count + + +cdef inline int _try_bool_flex_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, + const kh_str_starts_t *na_hashset, + const kh_str_starts_t *true_hashset, + const kh_str_starts_t *false_hashset, + uint8_t NA, uint8_t *data, + int *na_count) nogil: + cdef: + int error = 0 + Py_ssize_t i, lines = line_end - line_start + coliter_t it + const char *word = NULL + + na_count[0] = 0 + coliter_setup(&it, parser, col, line_start) + + if na_filter: + for i in range(lines): + COLITER_NEXT(it, word) + + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + na_count[0] += 1 + data[0] = NA + data += 1 + continue + + if kh_get_str_starts_item(true_hashset, word): + data[0] = 1 + data += 1 + continue + if kh_get_str_starts_item(false_hashset, word): + data[0] = 0 + data += 1 + continue + + error = to_boolean(word, data) + if error != 0: + return error + data += 1 + else: + for i in range(lines): + COLITER_NEXT(it, word) + + if kh_get_str_starts_item(true_hashset, word): + data[0] = 1 + data += 1 + continue + + if kh_get_str_starts_item(false_hashset, word): + data[0] = 0 + data += 1 + continue + + error = to_boolean(word, data) + if error != 0: + return error + data += 1 + + return 0 + + +cdef kh_str_starts_t* kset_from_list(list values) except NULL: + # caller takes responsibility for freeing the hash table + cdef: + Py_ssize_t i + kh_str_starts_t *table + int ret = 0 + object val + + table = kh_init_str_starts() + + for i in range(len(values)): + val = values[i] + + # None creeps in sometimes, which isn't possible here + if not isinstance(val, bytes): + kh_destroy_str_starts(table) + raise ValueError('Must be all encoded bytes') + + kh_put_str_starts_item(table, PyBytes_AsString(val), &ret) + + if table.table.n_buckets <= 128: + # Resize the hash table to make it almost empty, this + # reduces amount of hash collisions on lookup thus + # "key not in table" case is faster. + # Note that this trades table memory footprint for lookup speed. + kh_resize_str_starts(table, table.table.n_buckets * 8) + + return table + + +cdef kh_float64_t* kset_float64_from_list(values) except NULL: + # caller takes responsibility for freeing the hash table + cdef: + khiter_t k + kh_float64_t *table + int ret = 0 + float64_t val + object value + + table = kh_init_float64() + + for value in values: + val = float(value) + + k = kh_put_float64(table, val, &ret) + + if table.n_buckets <= 128: + # See reasoning in kset_from_list + kh_resize_float64(table, table.n_buckets * 8) + return table + + +cdef raise_parser_error(object base, parser_t *parser): + cdef: + object old_exc + object exc_type + PyObject *type + PyObject *value + PyObject *traceback + + if PyErr_Occurred(): + PyErr_Fetch(&type, &value, &traceback) + Py_XDECREF(traceback) + + if value != NULL: + old_exc = value + Py_XDECREF(value) + + # PyErr_Fetch only returned the error message in *value, + # so the Exception class must be extracted from *type. + if isinstance(old_exc, str): + if type != NULL: + exc_type = type + else: + exc_type = ParserError + + Py_XDECREF(type) + raise exc_type(old_exc) + else: + Py_XDECREF(type) + raise old_exc + + message = f'{base}. C error: ' + if parser.error_msg != NULL: + message += parser.error_msg.decode('utf-8') + else: + message += 'no error message set' + + raise ParserError(message) + + +# ---------------------------------------------------------------------- +# NA values +def _compute_na_values(): + int64info = np.iinfo(np.int64) + int32info = np.iinfo(np.int32) + int16info = np.iinfo(np.int16) + int8info = np.iinfo(np.int8) + uint64info = np.iinfo(np.uint64) + uint32info = np.iinfo(np.uint32) + uint16info = np.iinfo(np.uint16) + uint8info = np.iinfo(np.uint8) + na_values = { + np.float64: np.nan, + np.int64: int64info.min, + np.int32: int32info.min, + np.int16: int16info.min, + np.int8: int8info.min, + np.uint64: uint64info.max, + np.uint32: uint32info.max, + np.uint16: uint16info.max, + np.uint8: uint8info.max, + np.bool_: uint8info.max, + np.object_: np.nan # oof + } + return na_values + + +na_values = _compute_na_values() + +for k in list(na_values): + na_values[np.dtype(k)] = na_values[k] + + +# -> ArrayLike +cdef _apply_converter(object f, parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end): + cdef: + Py_ssize_t i, lines + coliter_t it + const char *word = NULL + ndarray[object] result + object val + + lines = line_end - line_start + result = np.empty(lines, dtype=np.object_) + + coliter_setup(&it, parser, col, line_start) + + for i in range(lines): + COLITER_NEXT(it, word) + val = PyUnicode_FromString(word) + result[i] = f(val) + + return lib.maybe_convert_objects(result) + + +cdef list _maybe_encode(list values): + if values is None: + return [] + return [x.encode('utf-8') if isinstance(x, str) else x for x in values] + + +def sanitize_objects(ndarray[object] values, set na_values) -> int: + """ + Convert specified values, including the given set na_values to np.nan. + + Parameters + ---------- + values : ndarray[object] + na_values : set + + Returns + ------- + na_count : int + """ + cdef: + Py_ssize_t i, n + object val, onan + Py_ssize_t na_count = 0 + dict memo = {} + + n = len(values) + onan = np.nan + + for i in range(n): + val = values[i] + if val in na_values: + values[i] = onan + na_count += 1 + elif val in memo: + values[i] = memo[val] + else: + memo[val] = val + + return na_count diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/properties.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/properties.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..72fdd04 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/properties.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/properties.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/properties.pyi new file mode 100644 index 0000000..b2ba55a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/properties.pyi @@ -0,0 +1,9 @@ +# pyright: reportIncompleteStub = false +from typing import Any + +# note: this is a lie to make type checkers happy (they special +# case property). cache_readonly uses attribute names similar to +# property (fget) but it does not provide fset and fdel. +cache_readonly = property + +def __getattr__(name: str) -> Any: ... # incomplete diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/properties.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/properties.pyx new file mode 100644 index 0000000..0ae29a6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/properties.pyx @@ -0,0 +1,70 @@ +from cython import Py_ssize_t + +from cpython.dict cimport ( + PyDict_Contains, + PyDict_GetItem, + PyDict_SetItem, +) + + +cdef class CachedProperty: + + cdef readonly: + object fget, name, __doc__ + + def __init__(self, fget): + self.fget = fget + self.name = fget.__name__ + self.__doc__ = getattr(fget, '__doc__', None) + + def __get__(self, obj, typ): + if obj is None: + # accessed on the class, not the instance + return self + + # Get the cache or set a default one if needed + cache = getattr(obj, '_cache', None) + if cache is None: + try: + cache = obj._cache = {} + except (AttributeError): + return self + + if PyDict_Contains(cache, self.name): + # not necessary to Py_INCREF + val = PyDict_GetItem(cache, self.name) + else: + val = self.fget(obj) + PyDict_SetItem(cache, self.name, val) + return val + + def __set__(self, obj, value): + raise AttributeError("Can't set attribute") + + +cache_readonly = CachedProperty + + +cdef class AxisProperty: + + cdef readonly: + Py_ssize_t axis + object __doc__ + + def __init__(self, axis=0, doc=""): + self.axis = axis + self.__doc__ = doc + + def __get__(self, obj, type): + cdef: + list axes + + if obj is None: + # Only instances have _mgr, not classes + return self + else: + axes = obj._mgr.axes + return axes[self.axis] + + def __set__(self, obj, value): + obj._set_axis(self.axis, value) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/reduction.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/reduction.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..f47a789 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/reduction.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/reduction.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/reduction.pyx new file mode 100644 index 0000000..7ff0842 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/reduction.pyx @@ -0,0 +1,33 @@ +import numpy as np + +cimport numpy as cnp + +cnp.import_array() + +from pandas._libs.util cimport is_array + + +cdef cnp.dtype _dtype_obj = np.dtype("object") + + +cpdef check_result_array(object obj, object dtype): + # Our operation is supposed to be an aggregation/reduction. If + # it returns an ndarray, this likely means an invalid operation has + # been passed. See test_apply_without_aggregation, test_agg_must_agg + if is_array(obj): + if dtype != _dtype_obj: + # If it is object dtype, the function can be a reduction/aggregation + # and still return an ndarray e.g. test_agg_over_numpy_arrays + raise ValueError("Must produce aggregated value") + + +cpdef inline extract_result(object res): + """ extract the result object, it might be a 0-dim ndarray + or a len-1 0-dim, or a scalar """ + if hasattr(res, "_values"): + # Preserve EA + res = res._values + if res.ndim == 1 and len(res) == 1: + # see test_agg_lambda_with_timezone, test_resampler_grouper.py::test_apply + res = res[0] + return res diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/reshape.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/reshape.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..9c6bbb0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/reshape.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/reshape.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/reshape.pyi new file mode 100644 index 0000000..110687f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/reshape.pyi @@ -0,0 +1,16 @@ +import numpy as np + +from pandas._typing import npt + +def unstack( + values: np.ndarray, # reshape_t[:, :] + mask: np.ndarray, # const uint8_t[:] + stride: int, + length: int, + width: int, + new_values: np.ndarray, # reshape_t[:, :] + new_mask: np.ndarray, # uint8_t[:, :] +) -> None: ... +def explode( + values: npt.NDArray[np.object_], +) -> tuple[npt.NDArray[np.object_], npt.NDArray[np.int64]]: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/reshape.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/reshape.pyx new file mode 100644 index 0000000..924a7ad --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/reshape.pyx @@ -0,0 +1,139 @@ +import cython +from cython import Py_ssize_t + +from numpy cimport ( + int64_t, + ndarray, + uint8_t, +) + +import numpy as np + +cimport numpy as cnp + +cnp.import_array() + +from pandas._libs.dtypes cimport numeric_object_t +from pandas._libs.lib cimport c_is_list_like + + +@cython.wraparound(False) +@cython.boundscheck(False) +def unstack(numeric_object_t[:, :] values, const uint8_t[:] mask, + Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width, + numeric_object_t[:, :] new_values, uint8_t[:, :] new_mask) -> None: + """ + Transform long values to wide new_values. + + Parameters + ---------- + values : typed ndarray + mask : np.ndarray[bool] + stride : int + length : int + width : int + new_values : np.ndarray[bool] + result array + new_mask : np.ndarray[bool] + result mask + """ + cdef: + Py_ssize_t i, j, w, nulls, s, offset + + if numeric_object_t is not object: + # evaluated at compile-time + with nogil: + for i in range(stride): + + nulls = 0 + for j in range(length): + + for w in range(width): + + offset = j * width + w + + if mask[offset]: + s = i * width + w + new_values[j, s] = values[offset - nulls, i] + new_mask[j, s] = 1 + else: + nulls += 1 + + else: + # object-dtype, identical to above but we cannot use nogil + for i in range(stride): + + nulls = 0 + for j in range(length): + + for w in range(width): + + offset = j * width + w + + if mask[offset]: + s = i * width + w + new_values[j, s] = values[offset - nulls, i] + new_mask[j, s] = 1 + else: + nulls += 1 + + +@cython.wraparound(False) +@cython.boundscheck(False) +def explode(ndarray[object] values): + """ + transform array list-likes to long form + preserve non-list entries + + Parameters + ---------- + values : ndarray[object] + + Returns + ------- + ndarray[object] + result + ndarray[int64_t] + counts + """ + cdef: + Py_ssize_t i, j, count, n + object v + ndarray[object] result + ndarray[int64_t] counts + + # find the resulting len + n = len(values) + counts = np.zeros(n, dtype='int64') + for i in range(n): + v = values[i] + + if c_is_list_like(v, True): + if len(v): + counts[i] += len(v) + else: + # empty list-like, use a nan marker + counts[i] += 1 + else: + counts[i] += 1 + + result = np.empty(counts.sum(), dtype='object') + count = 0 + for i in range(n): + v = values[i] + + if c_is_list_like(v, True): + if len(v): + v = list(v) + for j in range(len(v)): + result[count] = v[j] + count += 1 + else: + # empty list-like, use a nan marker + result[count] = np.nan + count += 1 + else: + # replace with the existing scalar + result[count] = v + count += 1 + return result, counts diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/sparse.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/sparse.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..19ce7ad Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/sparse.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/sparse.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/sparse.pyi new file mode 100644 index 0000000..aa53880 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/sparse.pyi @@ -0,0 +1,47 @@ +from typing import ( + Sequence, + TypeVar, +) + +import numpy as np + +from pandas._typing import npt + +SparseIndexT = TypeVar("SparseIndexT", bound="SparseIndex") + +class SparseIndex: + length: int + npoints: int + def __init__(self): ... + @property + def ngaps(self) -> int: ... + @property + def nbytes(self) -> int: ... + @property + def indices(self) -> npt.NDArray[np.int32]: ... + def equals(self, other) -> bool: ... + def lookup(self, index: int) -> np.int32: ... + def lookup_array(self, indexer: npt.NDArray[np.int32]) -> npt.NDArray[np.int32]: ... + def to_int_index(self) -> IntIndex: ... + def to_block_index(self) -> BlockIndex: ... + def intersect(self: SparseIndexT, y_: SparseIndex) -> SparseIndexT: ... + def make_union(self: SparseIndexT, y_: SparseIndex) -> SparseIndexT: ... + +class IntIndex(SparseIndex): + indices: npt.NDArray[np.int32] + def __init__( + self, length: int, indices: Sequence[int], check_integrity: bool = ... + ): ... + +class BlockIndex(SparseIndex): + nblocks: int + blocs: np.ndarray + blengths: np.ndarray + def __init__(self, length: int, blocs: np.ndarray, blengths: np.ndarray): ... + +def make_mask_object_ndarray( + arr: npt.NDArray[np.object_], fill_value +) -> npt.NDArray[np.bool_]: ... +def get_blocks( + indices: npt.NDArray[np.int32], +) -> tuple[npt.NDArray[np.int32], npt.NDArray[np.int32]]: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/sparse.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/sparse.pyx new file mode 100644 index 0000000..9b5ad01 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/sparse.pyx @@ -0,0 +1,738 @@ +import cython +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + ndarray, + uint8_t, +) + +cnp.import_array() + + +# ----------------------------------------------------------------------------- +# Preamble stuff + +cdef float64_t NaN = np.NaN +cdef float64_t INF = np.inf + +# ----------------------------------------------------------------------------- + + +cdef class SparseIndex: + """ + Abstract superclass for sparse index types. + """ + + def __init__(self): + raise NotImplementedError + + +cdef class IntIndex(SparseIndex): + """ + Object for holding exact integer sparse indexing information + + Parameters + ---------- + length : integer + indices : array-like + Contains integers corresponding to the indices. + check_integrity : bool, default=True + Check integrity of the input. + """ + + cdef readonly: + Py_ssize_t length, npoints + ndarray indices + + def __init__(self, Py_ssize_t length, indices, bint check_integrity=True): + self.length = length + self.indices = np.ascontiguousarray(indices, dtype=np.int32) + self.npoints = len(self.indices) + + if check_integrity: + self.check_integrity() + + def __reduce__(self): + args = (self.length, self.indices) + return IntIndex, args + + def __repr__(self) -> str: + output = 'IntIndex\n' + output += f'Indices: {repr(self.indices)}\n' + return output + + @property + def nbytes(self) -> int: + return self.indices.nbytes + + cdef check_integrity(self): + """ + Checks the following: + + - Indices are strictly ascending + - Number of indices is at most self.length + - Indices are at least 0 and at most the total length less one + + A ValueError is raised if any of these conditions is violated. + """ + + if self.npoints > self.length: + raise ValueError( + f"Too many indices. Expected {self.length} but found {self.npoints}" + ) + + # Indices are vacuously ordered and non-negative + # if the sequence of indices is empty. + if self.npoints == 0: + return + + if self.indices.min() < 0: + raise ValueError("No index can be less than zero") + + if self.indices.max() >= self.length: + raise ValueError("All indices must be less than the length") + + monotonic = np.all(self.indices[:-1] < self.indices[1:]) + if not monotonic: + raise ValueError("Indices must be strictly increasing") + + def equals(self, other: object) -> bool: + if not isinstance(other, IntIndex): + return False + + if self is other: + return True + + same_length = self.length == other.length + same_indices = np.array_equal(self.indices, other.indices) + return same_length and same_indices + + @property + def ngaps(self) -> int: + return self.length - self.npoints + + cpdef to_int_index(self): + return self + + def to_block_index(self): + locs, lens = get_blocks(self.indices) + return BlockIndex(self.length, locs, lens) + + cpdef IntIndex intersect(self, SparseIndex y_): + cdef: + Py_ssize_t out_length, xi, yi = 0, result_indexer = 0 + int32_t xind + ndarray[int32_t, ndim=1] xindices, yindices, new_indices + IntIndex y + + # if is one already, returns self + y = y_.to_int_index() + + if self.length != y.length: + raise Exception('Indices must reference same underlying length') + + xindices = self.indices + yindices = y.indices + new_indices = np.empty(min( + len(xindices), len(yindices)), dtype=np.int32) + + for xi in range(self.npoints): + xind = xindices[xi] + + while yi < y.npoints and yindices[yi] < xind: + yi += 1 + + if yi >= y.npoints: + break + + # TODO: would a two-pass algorithm be faster? + if yindices[yi] == xind: + new_indices[result_indexer] = xind + result_indexer += 1 + + new_indices = new_indices[:result_indexer] + return IntIndex(self.length, new_indices) + + cpdef IntIndex make_union(self, SparseIndex y_): + + cdef: + ndarray[int32_t, ndim=1] new_indices + IntIndex y + + # if is one already, returns self + y = y_.to_int_index() + + if self.length != y.length: + raise ValueError('Indices must reference same underlying length') + + new_indices = np.union1d(self.indices, y.indices) + return IntIndex(self.length, new_indices) + + @cython.wraparound(False) + cpdef int32_t lookup(self, Py_ssize_t index): + """ + Return the internal location if value exists on given index. + Return -1 otherwise. + """ + cdef: + int32_t res + ndarray[int32_t, ndim=1] inds + + inds = self.indices + if self.npoints == 0: + return -1 + elif index < 0 or self.length <= index: + return -1 + + res = inds.searchsorted(index) + if res == self.npoints: + return -1 + elif inds[res] == index: + return res + else: + return -1 + + @cython.wraparound(False) + cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer): + """ + Vectorized lookup, returns ndarray[int32_t] + """ + cdef: + Py_ssize_t n, i, ind_val + ndarray[int32_t, ndim=1] inds + ndarray[uint8_t, ndim=1, cast=True] mask + ndarray[int32_t, ndim=1] masked + ndarray[int32_t, ndim=1] res + ndarray[int32_t, ndim=1] results + + n = len(indexer) + results = np.empty(n, dtype=np.int32) + results[:] = -1 + + if self.npoints == 0: + return results + + inds = self.indices + mask = (inds[0] <= indexer) & (indexer <= inds[len(inds) - 1]) + + masked = indexer[mask] + res = inds.searchsorted(masked).astype(np.int32) + + res[inds[res] != masked] = -1 + results[mask] = res + return results + + +cpdef get_blocks(ndarray[int32_t, ndim=1] indices): + cdef: + Py_ssize_t init_len, i, npoints, result_indexer = 0 + int32_t block, length = 1, cur, prev + ndarray[int32_t, ndim=1] locs, lens + + npoints = len(indices) + + # just handle the special empty case separately + if npoints == 0: + return np.array([], dtype=np.int32), np.array([], dtype=np.int32) + + # block size can't be longer than npoints + locs = np.empty(npoints, dtype=np.int32) + lens = np.empty(npoints, dtype=np.int32) + + # TODO: two-pass algorithm faster? + prev = block = indices[0] + for i in range(1, npoints): + cur = indices[i] + if cur - prev > 1: + # new block + locs[result_indexer] = block + lens[result_indexer] = length + block = cur + length = 1 + result_indexer += 1 + else: + # same block, increment length + length += 1 + + prev = cur + + locs[result_indexer] = block + lens[result_indexer] = length + result_indexer += 1 + locs = locs[:result_indexer] + lens = lens[:result_indexer] + return locs, lens + + +# ----------------------------------------------------------------------------- +# BlockIndex + +cdef class BlockIndex(SparseIndex): + """ + Object for holding block-based sparse indexing information + + Parameters + ---------- + """ + cdef readonly: + int32_t nblocks, npoints, length + ndarray blocs, blengths + + cdef: + object __weakref__ # need to be picklable + int32_t *locbuf + int32_t *lenbuf + + def __init__(self, length, blocs, blengths): + + self.blocs = np.ascontiguousarray(blocs, dtype=np.int32) + self.blengths = np.ascontiguousarray(blengths, dtype=np.int32) + + # in case we need + self.locbuf = self.blocs.data + self.lenbuf = self.blengths.data + + self.length = length + self.nblocks = np.int32(len(self.blocs)) + self.npoints = self.blengths.sum() + + # self.block_start = blocs + # self.block_end = blocs + blengths + + self.check_integrity() + + def __reduce__(self): + args = (self.length, self.blocs, self.blengths) + return BlockIndex, args + + def __repr__(self) -> str: + output = 'BlockIndex\n' + output += f'Block locations: {repr(self.blocs)}\n' + output += f'Block lengths: {repr(self.blengths)}' + + return output + + @property + def nbytes(self) -> int: + return self.blocs.nbytes + self.blengths.nbytes + + @property + def ngaps(self) -> int: + return self.length - self.npoints + + cdef check_integrity(self): + """ + Check: + - Locations are in ascending order + - No overlapping blocks + - Blocks to not start after end of index, nor extend beyond end + """ + cdef: + Py_ssize_t i + ndarray[int32_t, ndim=1] blocs, blengths + + blocs = self.blocs + blengths = self.blengths + + if len(blocs) != len(blengths): + raise ValueError('block bound arrays must be same length') + + for i in range(self.nblocks): + if i > 0: + if blocs[i] <= blocs[i - 1]: + raise ValueError('Locations not in ascending order') + + if i < self.nblocks - 1: + if blocs[i] + blengths[i] > blocs[i + 1]: + raise ValueError(f'Block {i} overlaps') + else: + if blocs[i] + blengths[i] > self.length: + raise ValueError(f'Block {i} extends beyond end') + + # no zero-length blocks + if blengths[i] == 0: + raise ValueError(f'Zero-length block {i}') + + def equals(self, other: object) -> bool: + if not isinstance(other, BlockIndex): + return False + + if self is other: + return True + + same_length = self.length == other.length + same_blocks = (np.array_equal(self.blocs, other.blocs) and + np.array_equal(self.blengths, other.blengths)) + return same_length and same_blocks + + def to_block_index(self): + return self + + cpdef to_int_index(self): + cdef: + int32_t i = 0, j, b + int32_t offset + ndarray[int32_t, ndim=1] indices + + indices = np.empty(self.npoints, dtype=np.int32) + + for b in range(self.nblocks): + offset = self.locbuf[b] + + for j in range(self.lenbuf[b]): + indices[i] = offset + j + i += 1 + + return IntIndex(self.length, indices) + + @property + def indices(self): + return self.to_int_index().indices + + cpdef BlockIndex intersect(self, SparseIndex other): + """ + Intersect two BlockIndex objects + + Returns + ------- + BlockIndex + """ + cdef: + BlockIndex y + ndarray[int32_t, ndim=1] xloc, xlen, yloc, ylen, out_bloc, out_blen + Py_ssize_t xi = 0, yi = 0, max_len, result_indexer = 0 + int32_t cur_loc, cur_length, diff + + y = other.to_block_index() + + if self.length != y.length: + raise Exception('Indices must reference same underlying length') + + xloc = self.blocs + xlen = self.blengths + yloc = y.blocs + ylen = y.blengths + + # block may be split, but can't exceed original len / 2 + 1 + max_len = min(self.length, y.length) // 2 + 1 + out_bloc = np.empty(max_len, dtype=np.int32) + out_blen = np.empty(max_len, dtype=np.int32) + + while True: + # we are done (or possibly never began) + if xi >= self.nblocks or yi >= y.nblocks: + break + + # completely symmetric...would like to avoid code dup but oh well + if xloc[xi] >= yloc[yi]: + cur_loc = xloc[xi] + diff = xloc[xi] - yloc[yi] + + if ylen[yi] <= diff: + # have to skip this block + yi += 1 + continue + + if ylen[yi] - diff < xlen[xi]: + # take end of y block, move onward + cur_length = ylen[yi] - diff + yi += 1 + else: + # take end of x block + cur_length = xlen[xi] + xi += 1 + + else: # xloc[xi] < yloc[yi] + cur_loc = yloc[yi] + diff = yloc[yi] - xloc[xi] + + if xlen[xi] <= diff: + # have to skip this block + xi += 1 + continue + + if xlen[xi] - diff < ylen[yi]: + # take end of x block, move onward + cur_length = xlen[xi] - diff + xi += 1 + else: + # take end of y block + cur_length = ylen[yi] + yi += 1 + + out_bloc[result_indexer] = cur_loc + out_blen[result_indexer] = cur_length + result_indexer += 1 + + out_bloc = out_bloc[:result_indexer] + out_blen = out_blen[:result_indexer] + + return BlockIndex(self.length, out_bloc, out_blen) + + cpdef BlockIndex make_union(self, SparseIndex y): + """ + Combine together two BlockIndex objects, accepting indices if contained + in one or the other + + Parameters + ---------- + other : SparseIndex + + Notes + ----- + union is a protected keyword in Cython, hence make_union + + Returns + ------- + BlockIndex + """ + return BlockUnion(self, y.to_block_index()).result + + cpdef Py_ssize_t lookup(self, Py_ssize_t index): + """ + Return the internal location if value exists on given index. + Return -1 otherwise. + """ + cdef: + Py_ssize_t i, cum_len + ndarray[int32_t, ndim=1] locs, lens + + locs = self.blocs + lens = self.blengths + + if self.nblocks == 0: + return -1 + elif index < locs[0]: + return -1 + + cum_len = 0 + for i in range(self.nblocks): + if index >= locs[i] and index < locs[i] + lens[i]: + return cum_len + index - locs[i] + cum_len += lens[i] + + return -1 + + @cython.wraparound(False) + cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer): + """ + Vectorized lookup, returns ndarray[int32_t] + """ + cdef: + Py_ssize_t n, i, j, ind_val + ndarray[int32_t, ndim=1] locs, lens + ndarray[int32_t, ndim=1] results + + locs = self.blocs + lens = self.blengths + + n = len(indexer) + results = np.empty(n, dtype=np.int32) + results[:] = -1 + + if self.npoints == 0: + return results + + for i in range(n): + ind_val = indexer[i] + if not (ind_val < 0 or self.length <= ind_val): + cum_len = 0 + for j in range(self.nblocks): + if ind_val >= locs[j] and ind_val < locs[j] + lens[j]: + results[i] = cum_len + ind_val - locs[j] + cum_len += lens[j] + return results + + +@cython.internal +cdef class BlockMerge: + """ + Object-oriented approach makes sharing state between recursive functions a + lot easier and reduces code duplication + """ + cdef: + BlockIndex x, y, result + ndarray xstart, xlen, xend, ystart, ylen, yend + int32_t xi, yi # block indices + + def __init__(self, BlockIndex x, BlockIndex y): + self.x = x + self.y = y + + if x.length != y.length: + raise Exception('Indices must reference same underlying length') + + self.xstart = self.x.blocs + self.ystart = self.y.blocs + + self.xend = self.x.blocs + self.x.blengths + self.yend = self.y.blocs + self.y.blengths + + # self.xlen = self.x.blengths + # self.ylen = self.y.blengths + + self.xi = 0 + self.yi = 0 + + self.result = self._make_merged_blocks() + + cdef _make_merged_blocks(self): + raise NotImplementedError + + cdef _set_current_indices(self, int32_t xi, int32_t yi, bint mode): + if mode == 0: + self.xi = xi + self.yi = yi + else: + self.xi = yi + self.yi = xi + + +@cython.internal +cdef class BlockUnion(BlockMerge): + """ + Object-oriented approach makes sharing state between recursive functions a + lot easier and reduces code duplication + """ + + cdef _make_merged_blocks(self): + cdef: + ndarray[int32_t, ndim=1] xstart, xend, ystart + ndarray[int32_t, ndim=1] yend, out_bloc, out_blen + int32_t nstart, nend, diff + Py_ssize_t max_len, result_indexer = 0 + + xstart = self.xstart + xend = self.xend + ystart = self.ystart + yend = self.yend + + max_len = min(self.x.length, self.y.length) // 2 + 1 + out_bloc = np.empty(max_len, dtype=np.int32) + out_blen = np.empty(max_len, dtype=np.int32) + + while True: + # we are done (or possibly never began) + if self.xi >= self.x.nblocks and self.yi >= self.y.nblocks: + break + elif self.yi >= self.y.nblocks: + # through with y, just pass through x blocks + nstart = xstart[self.xi] + nend = xend[self.xi] + self.xi += 1 + elif self.xi >= self.x.nblocks: + # through with x, just pass through y blocks + nstart = ystart[self.yi] + nend = yend[self.yi] + self.yi += 1 + else: + # find end of new block + if xstart[self.xi] < ystart[self.yi]: + nstart = xstart[self.xi] + nend = self._find_next_block_end(0) + else: + nstart = ystart[self.yi] + nend = self._find_next_block_end(1) + + out_bloc[result_indexer] = nstart + out_blen[result_indexer] = nend - nstart + result_indexer += 1 + + out_bloc = out_bloc[:result_indexer] + out_blen = out_blen[:result_indexer] + + return BlockIndex(self.x.length, out_bloc, out_blen) + + cdef int32_t _find_next_block_end(self, bint mode) except -1: + """ + Wow, this got complicated in a hurry + + mode 0: block started in index x + mode 1: block started in index y + """ + cdef: + ndarray[int32_t, ndim=1] xstart, xend, ystart, yend + int32_t xi, yi, xnblocks, ynblocks, nend + + if mode != 0 and mode != 1: + raise Exception('Mode must be 0 or 1') + + # so symmetric code will work + if mode == 0: + xstart = self.xstart + xend = self.xend + xi = self.xi + + ystart = self.ystart + yend = self.yend + yi = self.yi + ynblocks = self.y.nblocks + else: + xstart = self.ystart + xend = self.yend + xi = self.yi + + ystart = self.xstart + yend = self.xend + yi = self.xi + ynblocks = self.x.nblocks + + nend = xend[xi] + + # done with y? + if yi == ynblocks: + self._set_current_indices(xi + 1, yi, mode) + return nend + elif nend < ystart[yi]: + # block ends before y block + self._set_current_indices(xi + 1, yi, mode) + return nend + else: + while yi < ynblocks and nend > yend[yi]: + yi += 1 + + self._set_current_indices(xi + 1, yi, mode) + + if yi == ynblocks: + return nend + + if nend < ystart[yi]: + # we're done, return the block end + return nend + else: + # merge blocks, continue searching + # this also catches the case where blocks + return self._find_next_block_end(1 - mode) + + +# ----------------------------------------------------------------------------- +# Sparse arithmetic + +include "sparse_op_helper.pxi" + + +# ----------------------------------------------------------------------------- +# SparseArray mask create operations + +def make_mask_object_ndarray(ndarray[object, ndim=1] arr, object fill_value): + cdef: + object value + Py_ssize_t i + Py_ssize_t new_length = len(arr) + ndarray[int8_t, ndim=1] mask + + mask = np.ones(new_length, dtype=np.int8) + + for i in range(new_length): + value = arr[i] + if value == fill_value and type(value) == type(fill_value): + mask[i] = 0 + + return mask.view(dtype=bool) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/sparse_op_helper.pxi.in b/.local/lib/python3.8/site-packages/pandas/_libs/sparse_op_helper.pxi.in new file mode 100644 index 0000000..e6a2c7b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/sparse_op_helper.pxi.in @@ -0,0 +1,309 @@ +""" +Template for each `dtype` helper function for sparse ops + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +# ---------------------------------------------------------------------- +# Sparse op +# ---------------------------------------------------------------------- + +ctypedef fused sparse_t: + float64_t + int64_t + + +cdef inline float64_t __div__(sparse_t a, sparse_t b): + if b == 0: + if a > 0: + return INF + elif a < 0: + return -INF + else: + return NaN + else: + return float(a) / b + + +cdef inline float64_t __truediv__(sparse_t a, sparse_t b): + return __div__(a, b) + + +cdef inline sparse_t __mod__(sparse_t a, sparse_t b): + if b == 0: + if sparse_t is float64_t: + return NaN + else: + return 0 + else: + return a % b + + +cdef inline sparse_t __floordiv__(sparse_t a, sparse_t b): + if b == 0: + if sparse_t is float64_t: + # Match non-sparse Series behavior implemented in mask_zero_div_zero + if a > 0: + return INF + elif a < 0: + return -INF + return NaN + else: + return 0 + else: + return a // b + + +# ---------------------------------------------------------------------- +# sparse array op +# ---------------------------------------------------------------------- + +{{py: + +# dtype, arith_comp_group, logical_group +dtypes = [('float64', True, False), + ('int64', True, True), + ('uint8', False, True)] +# do not generate arithmetic / comparison template for uint8, +# it should be done in fused types + +def get_op(tup): + assert isinstance(tup, tuple) + assert len(tup) == 4 + + opname, lval, rval, dtype = tup + + ops_dict = {'add': '{0} + {1}', + 'sub': '{0} - {1}', + 'mul': '{0} * {1}', + 'div': '__div__({0}, {1})', + 'mod': '__mod__({0}, {1})', + 'truediv': '__truediv__({0}, {1})', + 'floordiv': '__floordiv__({0}, {1})', + 'pow': '{0} ** {1}', + 'eq': '{0} == {1}', + 'ne': '{0} != {1}', + 'lt': '{0} < {1}', + 'gt': '{0} > {1}', + 'le': '{0} <= {1}', + 'ge': '{0} >= {1}', + + 'and': '{0} & {1}', # logical op + 'or': '{0} | {1}', + 'xor': '{0} ^ {1}'} + + return ops_dict[opname].format(lval, rval) + + +def get_dispatch(dtypes): + + ops_list = ['add', 'sub', 'mul', 'div', 'mod', 'truediv', + 'floordiv', 'pow', + 'eq', 'ne', 'lt', 'gt', 'le', 'ge', + 'and', 'or', 'xor'] + + for opname in ops_list: + for dtype, arith_comp_group, logical_group in dtypes: + + if opname in ('div', 'truediv'): + rdtype = 'float64' + elif opname in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'): + # comparison op + rdtype = 'uint8' + elif opname in ('and', 'or', 'xor'): + # logical op + rdtype = 'uint8' + else: + rdtype = dtype + + if opname in ('and', 'or', 'xor'): + if logical_group: + yield opname, dtype, rdtype + else: + if arith_comp_group: + yield opname, dtype, rdtype + +}} + + +{{for opname, dtype, rdtype in get_dispatch(dtypes)}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline tuple block_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, + BlockIndex xindex, + {{dtype}}_t xfill, + {{dtype}}_t[:] y_, + BlockIndex yindex, + {{dtype}}_t yfill): + ''' + Binary operator on BlockIndex objects with fill values + ''' + + cdef: + BlockIndex out_index + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xbp = 0, ybp = 0 # block positions + int32_t xloc, yloc + Py_ssize_t xblock = 0, yblock = 0 # block numbers + + {{dtype}}_t[:] x, y + ndarray[{{rdtype}}_t, ndim=1] out + + # to suppress Cython warning + x = x_ + y = y_ + + out_index = xindex.make_union(yindex) + out = np.empty(out_index.npoints, dtype=np.{{rdtype}}) + + # Wow, what a hack job. Need to do something about this + + # walk the two SparseVectors, adding matched locations... + for out_i in range(out_index.npoints): + if yblock == yindex.nblocks: + # use y fill value + out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}} + xi += 1 + + # advance x location + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + continue + + if xblock == xindex.nblocks: + # use x fill value + out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}} + yi += 1 + + # advance y location + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + continue + + yloc = yindex.locbuf[yblock] + ybp + xloc = xindex.locbuf[xblock] + xbp + + # each index in the out_index had to come from either x, y, or both + if xloc == yloc: + out[out_i] = {{(opname, 'x[xi]', 'y[yi]', dtype) | get_op}} + xi += 1 + yi += 1 + + # advance both locations + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + + elif xloc < yloc: + # use y fill value + out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}} + xi += 1 + + # advance x location + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + else: + # use x fill value + out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}} + yi += 1 + + # advance y location + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + + return out, out_index, {{(opname, 'xfill', 'yfill', dtype) | get_op}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline tuple int_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, + IntIndex xindex, + {{dtype}}_t xfill, + {{dtype}}_t[:] y_, + IntIndex yindex, + {{dtype}}_t yfill): + cdef: + IntIndex out_index + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xloc, yloc + int32_t[:] xindices, yindices, out_indices + {{dtype}}_t[:] x, y + ndarray[{{rdtype}}_t, ndim=1] out + + # suppress Cython compiler warnings due to inlining + x = x_ + y = y_ + + # need to do this first to know size of result array + out_index = xindex.make_union(yindex) + out = np.empty(out_index.npoints, dtype=np.{{rdtype}}) + + xindices = xindex.indices + yindices = yindex.indices + out_indices = out_index.indices + + # walk the two SparseVectors, adding matched locations... + for out_i in range(out_index.npoints): + if xi == xindex.npoints: + # use x fill value + out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}} + yi += 1 + continue + + if yi == yindex.npoints: + # use y fill value + out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}} + xi += 1 + continue + + xloc = xindices[xi] + yloc = yindices[yi] + + # each index in the out_index had to come from either x, y, or both + if xloc == yloc: + out[out_i] = {{(opname, 'x[xi]', 'y[yi]', dtype) | get_op}} + xi += 1 + yi += 1 + elif xloc < yloc: + # use y fill value + out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}} + xi += 1 + else: + # use x fill value + out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}} + yi += 1 + + return out, out_index, {{(opname, 'xfill', 'yfill', dtype) | get_op}} + + +cpdef sparse_{{opname}}_{{dtype}}({{dtype}}_t[:] x, + SparseIndex xindex, {{dtype}}_t xfill, + {{dtype}}_t[:] y, + SparseIndex yindex, {{dtype}}_t yfill): + + if isinstance(xindex, BlockIndex): + return block_op_{{opname}}_{{dtype}}(x, xindex.to_block_index(), xfill, + y, yindex.to_block_index(), yfill) + elif isinstance(xindex, IntIndex): + return int_op_{{opname}}_{{dtype}}(x, xindex.to_int_index(), xfill, + y, yindex.to_int_index(), yfill) + else: + raise NotImplementedError + +{{endfor}} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/cmath b/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/cmath new file mode 100644 index 0000000..9e7540c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/cmath @@ -0,0 +1,48 @@ +#ifndef _PANDAS_MATH_H_ +#define _PANDAS_MATH_H_ + +// MSVC 2017 has a bug where `x == x` can be true for NaNs. +// MSC_VER from https://stackoverflow.com/a/70630/1889400 +// Place upper bound on this check once a fixed MSVC is released. +#if defined(_MSC_VER) && (_MSC_VER < 1800) +#include +// In older versions of Visual Studio there wasn't a std::signbit defined +// This defines it using _copysign +namespace std { + __inline int isnan(double x) { return _isnan(x); } + __inline int signbit(double num) { return _copysign(1.0, num) < 0; } + __inline int notnan(double x) { return !isnan(x); } +} +#elif defined(_MSC_VER) && (_MSC_VER >= 1900) +#include +namespace std { + __inline int isnan(double x) { return _isnan(x); } + __inline int notnan(double x) { return !isnan(x); } +} +#elif defined(_MSC_VER) +#include +namespace std { + __inline int isnan(double x) { return _isnan(x); } + __inline int notnan(double x) { return x == x; } +} +#elif defined(__MVS__) +#include + +#define _signbit signbit +#undef signbit +#undef isnan + +namespace std { + __inline int notnan(double x) { return x == x; } + __inline int signbit(double num) { return _signbit(num); } + __inline int isnan(double x) { return isnan(x); } +} +#else +#include + +namespace std { + __inline int notnan(double x) { return x == x; } +} + +#endif +#endif diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/ms_inttypes.h b/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/ms_inttypes.h new file mode 100644 index 0000000..1be3803 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/ms_inttypes.h @@ -0,0 +1,305 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include "ms_stdint.h" + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +#endif // __STDC_FORMAT_MACROS ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + + +#endif // _MSC_INTTYPES_H_ ] diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/ms_stdint.h b/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/ms_stdint.h new file mode 100644 index 0000000..c66fbb8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/ms_stdint.h @@ -0,0 +1,247 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2008 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +#define INTMAX_C INT64_C +#define UINTMAX_C UINT64_C + +#endif // __STDC_CONSTANT_MACROS ] + + +#endif // _MSC_STDINT_H_ ] diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/portable.h b/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/portable.h new file mode 100644 index 0000000..cb8e5ba --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/portable.h @@ -0,0 +1,16 @@ +#ifndef _PANDAS_PORTABLE_H_ +#define _PANDAS_PORTABLE_H_ + +#if defined(_MSC_VER) +#define strcasecmp( s1, s2 ) _stricmp( s1, s2 ) +#endif + +// GH-23516 - works around locale perf issues +// from MUSL libc, MIT Licensed - see LICENSES +#define isdigit_ascii(c) (((unsigned)(c) - '0') < 10u) +#define getdigit_ascii(c, default) (isdigit_ascii(c) ? ((int)((c) - '0')) : default) +#define isspace_ascii(c) (((c) == ' ') || (((unsigned)(c) - '\t') < 5)) +#define toupper_ascii(c) ((((unsigned)(c) - 'a') < 26) ? ((c) & 0x5f) : (c)) +#define tolower_ascii(c) ((((unsigned)(c) - 'A') < 26) ? ((c) | 0x20) : (c)) + +#endif diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/stdint.h b/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/stdint.h new file mode 100644 index 0000000..8746bf1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/headers/stdint.h @@ -0,0 +1,10 @@ +#ifndef _PANDAS_STDINT_H_ +#define _PANDAS_STDINT_H_ + +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#include "ms_stdint.h" +#else +#include +#endif + +#endif diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/inline_helper.h b/.local/lib/python3.8/site-packages/pandas/_libs/src/inline_helper.h new file mode 100644 index 0000000..40fd457 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/inline_helper.h @@ -0,0 +1,27 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_SRC_INLINE_HELPER_H_ +#define PANDAS__LIBS_SRC_INLINE_HELPER_H_ + +#ifndef PANDAS_INLINE + #if defined(__clang__) + #define PANDAS_INLINE static __inline__ __attribute__ ((__unused__)) + #elif defined(__GNUC__) + #define PANDAS_INLINE static __inline__ + #elif defined(_MSC_VER) + #define PANDAS_INLINE static __inline + #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define PANDAS_INLINE static inline + #else + #define PANDAS_INLINE + #endif // __GNUC__ +#endif // PANDAS_INLINE + +#endif // PANDAS__LIBS_SRC_INLINE_HELPER_H_ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/klib/khash.h b/.local/lib/python3.8/site-packages/pandas/_libs/src/klib/khash.h new file mode 100644 index 0000000..e17d82d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/klib/khash.h @@ -0,0 +1,719 @@ +/* The MIT License + + Copyright (c) 2008, 2009, 2011 by Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* + An example: + +#include "khash.h" +KHASH_MAP_INIT_INT(32, char) +int main() { + int ret, is_missing; + khiter_t k; + khash_t(32) *h = kh_init(32); + k = kh_put(32, h, 5, &ret); + if (!ret) kh_del(32, h, k); + kh_value(h, k) = 10; + k = kh_get(32, h, 10); + is_missing = (k == kh_end(h)); + k = kh_get(32, h, 5); + kh_del(32, h, k); + for (k = kh_begin(h); k != kh_end(h); ++k) + if (kh_exist(h, k)) kh_value(h, k) = 1; + kh_destroy(32, h); + return 0; +} +*/ + +/* + 2011-09-16 (0.2.6): + + * The capacity is a power of 2. This seems to dramatically improve the + speed for simple keys. Thank Zilong Tan for the suggestion. Reference: + + - https://github.com/stefanocasazza/ULib + - https://nothings.org/computer/judy/ + + * Allow to optionally use linear probing which usually has better + performance for random input. Double hashing is still the default as it + is more robust to certain non-random input. + + * Added Wang's integer hash function (not used by default). This hash + function is more robust to certain non-random input. + + 2011-02-14 (0.2.5): + + * Allow to declare global functions. + + 2009-09-26 (0.2.4): + + * Improve portability + + 2008-09-19 (0.2.3): + + * Corrected the example + * Improved interfaces + + 2008-09-11 (0.2.2): + + * Improved speed a little in kh_put() + + 2008-09-10 (0.2.1): + + * Added kh_clear() + * Fixed a compiling error + + 2008-09-02 (0.2.0): + + * Changed to token concatenation which increases flexibility. + + 2008-08-31 (0.1.2): + + * Fixed a bug in kh_get(), which has not been tested previously. + + 2008-08-31 (0.1.1): + + * Added destructor +*/ + + +#ifndef __AC_KHASH_H +#define __AC_KHASH_H + +/*! + @header + + Generic hash table library. + */ + +#define AC_VERSION_KHASH_H "0.2.6" + +#include +#include +#include +#include "../inline_helper.h" + + +// hooks for memory allocator, C-runtime allocator used per default +#ifndef KHASH_MALLOC +#define KHASH_MALLOC malloc +#endif + +#ifndef KHASH_REALLOC +#define KHASH_REALLOC realloc +#endif + +#ifndef KHASH_CALLOC +#define KHASH_CALLOC calloc +#endif + +#ifndef KHASH_FREE +#define KHASH_FREE free +#endif + + +#if UINT_MAX == 0xffffffffu +typedef unsigned int khuint32_t; +typedef signed int khint32_t; +#elif ULONG_MAX == 0xffffffffu +typedef unsigned long khuint32_t; +typedef signed long khint32_t; +#endif + +#if ULONG_MAX == ULLONG_MAX +typedef unsigned long khuint64_t; +typedef signed long khint64_t; +#else +typedef unsigned long long khuint64_t; +typedef signed long long khint64_t; +#endif + +#if UINT_MAX == 0xffffu +typedef unsigned int khuint16_t; +typedef signed int khint16_t; +#elif USHRT_MAX == 0xffffu +typedef unsigned short khuint16_t; +typedef signed short khint16_t; +#endif + +#if UCHAR_MAX == 0xffu +typedef unsigned char khuint8_t; +typedef signed char khint8_t; +#endif + +typedef double khfloat64_t; +typedef float khfloat32_t; + +typedef khuint32_t khuint_t; +typedef khuint_t khiter_t; + +#define __ac_isempty(flag, i) ((flag[i>>5]>>(i&0x1fU))&1) +#define __ac_isdel(flag, i) (0) +#define __ac_iseither(flag, i) __ac_isempty(flag, i) +#define __ac_set_isdel_false(flag, i) (0) +#define __ac_set_isempty_false(flag, i) (flag[i>>5]&=~(1ul<<(i&0x1fU))) +#define __ac_set_isempty_true(flag, i) (flag[i>>5]|=(1ul<<(i&0x1fU))) +#define __ac_set_isboth_false(flag, i) __ac_set_isempty_false(flag, i) +#define __ac_set_isdel_true(flag, i) ((void)0) + + +// specializations of https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp +khuint32_t PANDAS_INLINE murmur2_32to32(khuint32_t k){ + const khuint32_t SEED = 0xc70f6907UL; + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + const khuint32_t M_32 = 0x5bd1e995; + const int R_32 = 24; + + // Initialize the hash to a 'random' value + khuint32_t h = SEED ^ 4; + + //handle 4 bytes: + k *= M_32; + k ^= k >> R_32; + k *= M_32; + + h *= M_32; + h ^= k; + + // Do a few final mixes of the hash to ensure the "last few + // bytes" are well-incorporated. (Really needed here?) + h ^= h >> 13; + h *= M_32; + h ^= h >> 15; + return h; +} + +// it is possible to have a special x64-version, which would need less operations, but +// using 32bit version always has also some benefits: +// - one code for 32bit and 64bit builds +// - the same case for 32bit and 64bit builds +// - no performance difference could be measured compared to a possible x64-version + +khuint32_t PANDAS_INLINE murmur2_32_32to32(khuint32_t k1, khuint32_t k2){ + const khuint32_t SEED = 0xc70f6907UL; + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + const khuint32_t M_32 = 0x5bd1e995; + const int R_32 = 24; + + // Initialize the hash to a 'random' value + khuint32_t h = SEED ^ 4; + + //handle first 4 bytes: + k1 *= M_32; + k1 ^= k1 >> R_32; + k1 *= M_32; + + h *= M_32; + h ^= k1; + + //handle second 4 bytes: + k2 *= M_32; + k2 ^= k2 >> R_32; + k2 *= M_32; + + h *= M_32; + h ^= k2; + + // Do a few final mixes of the hash to ensure the "last few + // bytes" are well-incorporated. + h ^= h >> 13; + h *= M_32; + h ^= h >> 15; + return h; +} + +khuint32_t PANDAS_INLINE murmur2_64to32(khuint64_t k){ + khuint32_t k1 = (khuint32_t)k; + khuint32_t k2 = (khuint32_t)(k >> 32); + + return murmur2_32_32to32(k1, k2); +} + + +#ifdef KHASH_LINEAR +#define __ac_inc(k, m) 1 +#else +#define __ac_inc(k, m) (murmur2_32to32(k) | 1) & (m) +#endif + +#define __ac_fsize(m) ((m) < 32? 1 : (m)>>5) + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +static const double __ac_HASH_UPPER = 0.77; + +#define KHASH_DECLARE(name, khkey_t, khval_t) \ + typedef struct { \ + khuint_t n_buckets, size, n_occupied, upper_bound; \ + khuint32_t *flags; \ + khkey_t *keys; \ + khval_t *vals; \ + } kh_##name##_t; \ + extern kh_##name##_t *kh_init_##name(); \ + extern void kh_destroy_##name(kh_##name##_t *h); \ + extern void kh_clear_##name(kh_##name##_t *h); \ + extern khuint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ + extern void kh_resize_##name(kh_##name##_t *h, khuint_t new_n_buckets); \ + extern khuint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ + extern void kh_del_##name(kh_##name##_t *h, khuint_t x); + +#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + typedef struct { \ + khuint_t n_buckets, size, n_occupied, upper_bound; \ + khuint32_t *flags; \ + khkey_t *keys; \ + khval_t *vals; \ + } kh_##name##_t; \ + SCOPE kh_##name##_t *kh_init_##name(void) { \ + return (kh_##name##_t*)KHASH_CALLOC(1, sizeof(kh_##name##_t)); \ + } \ + SCOPE void kh_destroy_##name(kh_##name##_t *h) \ + { \ + if (h) { \ + KHASH_FREE(h->keys); KHASH_FREE(h->flags); \ + KHASH_FREE(h->vals); \ + KHASH_FREE(h); \ + } \ + } \ + SCOPE void kh_clear_##name(kh_##name##_t *h) \ + { \ + if (h && h->flags) { \ + memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khuint32_t)); \ + h->size = h->n_occupied = 0; \ + } \ + } \ + SCOPE khuint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ + { \ + if (h->n_buckets) { \ + khuint_t inc, k, i, last, mask; \ + mask = h->n_buckets - 1; \ + k = __hash_func(key); i = k & mask; \ + inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + i = (i + inc) & mask; \ + if (i == last) return h->n_buckets; \ + } \ + return __ac_iseither(h->flags, i)? h->n_buckets : i; \ + } else return 0; \ + } \ + SCOPE void kh_resize_##name(kh_##name##_t *h, khuint_t new_n_buckets) \ + { /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ + khuint32_t *new_flags = 0; \ + khuint_t j = 1; \ + { \ + kroundup32(new_n_buckets); \ + if (new_n_buckets < 4) new_n_buckets = 4; \ + if (h->size >= (khuint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ + else { /* hash table size to be changed (shrink or expand); rehash */ \ + new_flags = (khuint32_t*)KHASH_MALLOC(__ac_fsize(new_n_buckets) * sizeof(khuint32_t)); \ + memset(new_flags, 0xff, __ac_fsize(new_n_buckets) * sizeof(khuint32_t)); \ + if (h->n_buckets < new_n_buckets) { /* expand */ \ + h->keys = (khkey_t*)KHASH_REALLOC(h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) h->vals = (khval_t*)KHASH_REALLOC(h->vals, new_n_buckets * sizeof(khval_t)); \ + } /* otherwise shrink */ \ + } \ + } \ + if (j) { /* rehashing is needed */ \ + for (j = 0; j != h->n_buckets; ++j) { \ + if (__ac_iseither(h->flags, j) == 0) { \ + khkey_t key = h->keys[j]; \ + khval_t val; \ + khuint_t new_mask; \ + new_mask = new_n_buckets - 1; \ + if (kh_is_map) val = h->vals[j]; \ + __ac_set_isempty_true(h->flags, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khuint_t inc, k, i; \ + k = __hash_func(key); \ + i = k & new_mask; \ + inc = __ac_inc(k, new_mask); \ + while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \ + __ac_set_isempty_false(new_flags, i); \ + if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ + { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ + if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ + __ac_set_isempty_true(h->flags, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + if (kh_is_map) h->vals[i] = val; \ + break; \ + } \ + } \ + } \ + } \ + if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ + h->keys = (khkey_t*)KHASH_REALLOC(h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) h->vals = (khval_t*)KHASH_REALLOC(h->vals, new_n_buckets * sizeof(khval_t)); \ + } \ + KHASH_FREE(h->flags); /* free the working space */ \ + h->flags = new_flags; \ + h->n_buckets = new_n_buckets; \ + h->n_occupied = h->size; \ + h->upper_bound = (khuint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ + } \ + } \ + SCOPE khuint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ + { \ + khuint_t x; \ + if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ + if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \ + else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \ + } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ + { \ + khuint_t inc, k, i, site, last, mask = h->n_buckets - 1; \ + x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ + if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ + else { \ + inc = __ac_inc(k, mask); last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + if (__ac_isdel(h->flags, i)) site = i; \ + i = (i + inc) & mask; \ + if (i == last) { x = site; break; } \ + } \ + if (x == h->n_buckets) { \ + if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ + else x = i; \ + } \ + } \ + } \ + if (__ac_isempty(h->flags, x)) { /* not present at all */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; ++h->n_occupied; \ + *ret = 1; \ + } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; \ + *ret = 2; \ + } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ + return x; \ + } \ + SCOPE void kh_del_##name(kh_##name##_t *h, khuint_t x) \ + { \ + if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ + __ac_set_isdel_true(h->flags, x); \ + --h->size; \ + } \ + } + +#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + KHASH_INIT2(name, PANDAS_INLINE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +/* --- BEGIN OF HASH FUNCTIONS --- */ + +/*! @function + @abstract Integer hash function + @param key The integer [khuint32_t] + @return The hash value [khuint_t] + */ +#define kh_int_hash_func(key) (khuint32_t)(key) +/*! @function + @abstract Integer comparison function + */ +#define kh_int_hash_equal(a, b) ((a) == (b)) +/*! @function + @abstract 64-bit integer hash function + @param key The integer [khuint64_t] + @return The hash value [khuint_t] + */ +PANDAS_INLINE khuint_t kh_int64_hash_func(khuint64_t key) +{ + return (khuint_t)((key)>>33^(key)^(key)<<11); +} +/*! @function + @abstract 64-bit integer comparison function + */ +#define kh_int64_hash_equal(a, b) ((a) == (b)) + +/*! @function + @abstract const char* hash function + @param s Pointer to a null terminated string + @return The hash value + */ +PANDAS_INLINE khuint_t __ac_X31_hash_string(const char *s) +{ + khuint_t h = *s; + if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s; + return h; +} +/*! @function + @abstract Another interface to const char* hash function + @param key Pointer to a null terminated string [const char*] + @return The hash value [khuint_t] + */ +#define kh_str_hash_func(key) __ac_X31_hash_string(key) +/*! @function + @abstract Const char* comparison function + */ +#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) + +PANDAS_INLINE khuint_t __ac_Wang_hash(khuint_t key) +{ + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} +#define kh_int_hash_func2(k) __ac_Wang_hash((khuint_t)key) + +/* --- END OF HASH FUNCTIONS --- */ + +/* Other convenient macros... */ + +/*! + @abstract Type of the hash table. + @param name Name of the hash table [symbol] + */ +#define khash_t(name) kh_##name##_t + +/*! @function + @abstract Initiate a hash table. + @param name Name of the hash table [symbol] + @return Pointer to the hash table [khash_t(name)*] + */ +#define kh_init(name) kh_init_##name(void) + +/*! @function + @abstract Destroy a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_destroy(name, h) kh_destroy_##name(h) + +/*! @function + @abstract Reset a hash table without deallocating memory. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_clear(name, h) kh_clear_##name(h) + +/*! @function + @abstract Resize a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param s New size [khuint_t] + */ +#define kh_resize(name, h, s) kh_resize_##name(h, s) + +/*! @function + @abstract Insert a key to the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @param r Extra return code: 0 if the key is present in the hash table; + 1 if the bucket is empty (never used); 2 if the element in + the bucket has been deleted [int*] + @return Iterator to the inserted element [khuint_t] + */ +#define kh_put(name, h, k, r) kh_put_##name(h, k, r) + +/*! @function + @abstract Retrieve a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @return Iterator to the found element, or kh_end(h) is the element is absent [khuint_t] + */ +#define kh_get(name, h, k) kh_get_##name(h, k) + +/*! @function + @abstract Remove a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Iterator to the element to be deleted [khuint_t] + */ +#define kh_del(name, h, k) kh_del_##name(h, k) + +/*! @function + @abstract Test whether a bucket contains data. + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khuint_t] + @return 1 if containing data; 0 otherwise [int] + */ +#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) + +/*! @function + @abstract Get key given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khuint_t] + @return Key [type of keys] + */ +#define kh_key(h, x) ((h)->keys[x]) + +/*! @function + @abstract Get value given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khuint_t] + @return Value [type of values] + @discussion For hash sets, calling this results in segfault. + */ +#define kh_val(h, x) ((h)->vals[x]) + +/*! @function + @abstract Alias of kh_val() + */ +#define kh_value(h, x) ((h)->vals[x]) + +/*! @function + @abstract Get the start iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The start iterator [khuint_t] + */ +#define kh_begin(h) (khuint_t)(0) + +/*! @function + @abstract Get the end iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The end iterator [khuint_t] + */ +#define kh_end(h) ((h)->n_buckets) + +/*! @function + @abstract Get the number of elements in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of elements in the hash table [khuint_t] + */ +#define kh_size(h) ((h)->size) + +/*! @function + @abstract Get the number of buckets in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of buckets in the hash table [khuint_t] + */ +#define kh_n_buckets(h) ((h)->n_buckets) + +/* More convenient interfaces */ + +/*! @function + @abstract Instantiate a hash set containing integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT(name) \ + KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT(name, khval_t) \ + KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +#define KHASH_MAP_INIT_UINT(name, khval_t) \ + KHASH_INIT(name, khuint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_UINT64(name) \ + KHASH_INIT(name, khuint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) + +#define KHASH_SET_INIT_INT64(name) \ + KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_UINT64(name, khval_t) \ + KHASH_INIT(name, khuint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) + +#define KHASH_MAP_INIT_INT64(name, khval_t) \ + KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 16bit-integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT16(name, khval_t) \ + KHASH_INIT(name, khint16_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +#define KHASH_MAP_INIT_UINT16(name, khval_t) \ + KHASH_INIT(name, khuint16_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 8bit-integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT8(name, khval_t) \ + KHASH_INIT(name, khint8_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +#define KHASH_MAP_INIT_UINT8(name, khval_t) \ + KHASH_INIT(name, khuint8_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + + + +typedef const char *kh_cstr_t; +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_STR(name) \ + KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_STR(name, khval_t) \ + KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) + + +#define kh_exist_str(h, k) (kh_exist(h, k)) +#define kh_exist_float64(h, k) (kh_exist(h, k)) +#define kh_exist_uint64(h, k) (kh_exist(h, k)) +#define kh_exist_int64(h, k) (kh_exist(h, k)) +#define kh_exist_float32(h, k) (kh_exist(h, k)) +#define kh_exist_int32(h, k) (kh_exist(h, k)) +#define kh_exist_uint32(h, k) (kh_exist(h, k)) +#define kh_exist_int16(h, k) (kh_exist(h, k)) +#define kh_exist_uint16(h, k) (kh_exist(h, k)) +#define kh_exist_int8(h, k) (kh_exist(h, k)) +#define kh_exist_uint8(h, k) (kh_exist(h, k)) + +KHASH_MAP_INIT_STR(str, size_t) +KHASH_MAP_INIT_INT(int32, size_t) +KHASH_MAP_INIT_UINT(uint32, size_t) +KHASH_MAP_INIT_INT64(int64, size_t) +KHASH_MAP_INIT_UINT64(uint64, size_t) +KHASH_MAP_INIT_INT16(int16, size_t) +KHASH_MAP_INIT_UINT16(uint16, size_t) +KHASH_MAP_INIT_INT8(int8, size_t) +KHASH_MAP_INIT_UINT8(uint8, size_t) + + +#endif /* __AC_KHASH_H */ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/klib/khash_python.h b/.local/lib/python3.8/site-packages/pandas/_libs/src/klib/khash_python.h new file mode 100644 index 0000000..56afea0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/klib/khash_python.h @@ -0,0 +1,446 @@ +#include +#include + + +// use numpy's definitions for complex +#include +typedef npy_complex64 khcomplex64_t; +typedef npy_complex128 khcomplex128_t; + + + +// khash should report usage to tracemalloc +#if PY_VERSION_HEX >= 0x03060000 +#include +#if PY_VERSION_HEX < 0x03070000 +#define PyTraceMalloc_Track _PyTraceMalloc_Track +#define PyTraceMalloc_Untrack _PyTraceMalloc_Untrack +#endif +#else +#define PyTraceMalloc_Track(...) +#define PyTraceMalloc_Untrack(...) +#endif + + +static const int KHASH_TRACE_DOMAIN = 424242; +void *traced_malloc(size_t size){ + void * ptr = malloc(size); + if(ptr!=NULL){ + PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, size); + } + return ptr; +} + +void *traced_calloc(size_t num, size_t size){ + void * ptr = calloc(num, size); + if(ptr!=NULL){ + PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, num*size); + } + return ptr; +} + +void *traced_realloc(void* old_ptr, size_t size){ + void * ptr = realloc(old_ptr, size); + if(ptr!=NULL){ + if(old_ptr != ptr){ + PyTraceMalloc_Untrack(KHASH_TRACE_DOMAIN, (uintptr_t)old_ptr); + } + PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, size); + } + return ptr; +} + +void traced_free(void* ptr){ + if(ptr!=NULL){ + PyTraceMalloc_Untrack(KHASH_TRACE_DOMAIN, (uintptr_t)ptr); + } + free(ptr); +} + + +#define KHASH_MALLOC traced_malloc +#define KHASH_REALLOC traced_realloc +#define KHASH_CALLOC traced_calloc +#define KHASH_FREE traced_free +#include "khash.h" + +// Previously we were using the built in cpython hash function for doubles +// python 2.7 https://github.com/python/cpython/blob/2.7/Objects/object.c#L1021 +// python 3.5 https://github.com/python/cpython/blob/3.5/Python/pyhash.c#L85 + +// The python 3 hash function has the invariant hash(x) == hash(int(x)) == hash(decimal(x)) +// and the size of hash may be different by platform / version (long in py2, Py_ssize_t in py3). +// We don't need those invariants because types will be cast before hashing, and if Py_ssize_t +// is 64 bits the truncation causes collision issues. Given all that, we use our own +// simple hash, viewing the double bytes as an int64 and using khash's default +// hash for 64 bit integers. +// GH 13436 showed that _Py_HashDouble doesn't work well with khash +// GH 28303 showed, that the simple xoring-version isn't good enough +// See GH 36729 for evaluation of the currently used murmur2-hash version +// An interesting alternative to expensive murmur2-hash would be to change +// the probing strategy and use e.g. the probing strategy from CPython's +// implementation of dicts, which shines for smaller sizes but is more +// predisposed to superlinear running times (see GH 36729 for comparison) + + +khuint64_t PANDAS_INLINE asuint64(double key) { + khuint64_t val; + memcpy(&val, &key, sizeof(double)); + return val; +} + +khuint32_t PANDAS_INLINE asuint32(float key) { + khuint32_t val; + memcpy(&val, &key, sizeof(float)); + return val; +} + +#define ZERO_HASH 0 +#define NAN_HASH 0 + +khuint32_t PANDAS_INLINE kh_float64_hash_func(double val){ + // 0.0 and -0.0 should have the same hash: + if (val == 0.0){ + return ZERO_HASH; + } + // all nans should have the same hash: + if ( val!=val ){ + return NAN_HASH; + } + khuint64_t as_int = asuint64(val); + return murmur2_64to32(as_int); +} + +khuint32_t PANDAS_INLINE kh_float32_hash_func(float val){ + // 0.0 and -0.0 should have the same hash: + if (val == 0.0f){ + return ZERO_HASH; + } + // all nans should have the same hash: + if ( val!=val ){ + return NAN_HASH; + } + khuint32_t as_int = asuint32(val); + return murmur2_32to32(as_int); +} + +#define kh_floats_hash_equal(a, b) ((a) == (b) || ((b) != (b) && (a) != (a))) + +#define KHASH_MAP_INIT_FLOAT64(name, khval_t) \ + KHASH_INIT(name, khfloat64_t, khval_t, 1, kh_float64_hash_func, kh_floats_hash_equal) + +KHASH_MAP_INIT_FLOAT64(float64, size_t) + +#define KHASH_MAP_INIT_FLOAT32(name, khval_t) \ + KHASH_INIT(name, khfloat32_t, khval_t, 1, kh_float32_hash_func, kh_floats_hash_equal) + +KHASH_MAP_INIT_FLOAT32(float32, size_t) + +khint32_t PANDAS_INLINE kh_complex128_hash_func(khcomplex128_t val){ + return kh_float64_hash_func(val.real)^kh_float64_hash_func(val.imag); +} +khint32_t PANDAS_INLINE kh_complex64_hash_func(khcomplex64_t val){ + return kh_float32_hash_func(val.real)^kh_float32_hash_func(val.imag); +} + +#define kh_complex_hash_equal(a, b) \ + (kh_floats_hash_equal(a.real, b.real) && kh_floats_hash_equal(a.imag, b.imag)) + + +#define KHASH_MAP_INIT_COMPLEX64(name, khval_t) \ + KHASH_INIT(name, khcomplex64_t, khval_t, 1, kh_complex64_hash_func, kh_complex_hash_equal) + +KHASH_MAP_INIT_COMPLEX64(complex64, size_t) + + +#define KHASH_MAP_INIT_COMPLEX128(name, khval_t) \ + KHASH_INIT(name, khcomplex128_t, khval_t, 1, kh_complex128_hash_func, kh_complex_hash_equal) + +KHASH_MAP_INIT_COMPLEX128(complex128, size_t) + + +#define kh_exist_complex64(h, k) (kh_exist(h, k)) +#define kh_exist_complex128(h, k) (kh_exist(h, k)) + + +// NaN-floats should be in the same equivalency class, see GH 22119 +int PANDAS_INLINE floatobject_cmp(PyFloatObject* a, PyFloatObject* b){ + return ( + Py_IS_NAN(PyFloat_AS_DOUBLE(a)) && + Py_IS_NAN(PyFloat_AS_DOUBLE(b)) + ) + || + ( PyFloat_AS_DOUBLE(a) == PyFloat_AS_DOUBLE(b) ); +} + + +// NaNs should be in the same equivalency class, see GH 41836 +// PyObject_RichCompareBool for complexobjects has a different behavior +// needs to be replaced +int PANDAS_INLINE complexobject_cmp(PyComplexObject* a, PyComplexObject* b){ + return ( + Py_IS_NAN(a->cval.real) && + Py_IS_NAN(b->cval.real) && + Py_IS_NAN(a->cval.imag) && + Py_IS_NAN(b->cval.imag) + ) + || + ( + Py_IS_NAN(a->cval.real) && + Py_IS_NAN(b->cval.real) && + a->cval.imag == b->cval.imag + ) + || + ( + a->cval.real == b->cval.real && + Py_IS_NAN(a->cval.imag) && + Py_IS_NAN(b->cval.imag) + ) + || + ( + a->cval.real == b->cval.real && + a->cval.imag == b->cval.imag + ); +} + +int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b); + + +// replacing PyObject_RichCompareBool (NaN!=NaN) with pyobject_cmp (NaN==NaN), +// which treats NaNs as equivalent +// see GH 41836 +int PANDAS_INLINE tupleobject_cmp(PyTupleObject* a, PyTupleObject* b){ + Py_ssize_t i; + + if (Py_SIZE(a) != Py_SIZE(b)) { + return 0; + } + + for (i = 0; i < Py_SIZE(a); ++i) { + if (!pyobject_cmp(PyTuple_GET_ITEM(a, i), PyTuple_GET_ITEM(b, i))) { + return 0; + } + } + return 1; +} + + +int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) { + if (a == b) { + return 1; + } + if (Py_TYPE(a) == Py_TYPE(b)) { + // special handling for some built-in types which could have NaNs + // as we would like to have them equivalent, but the usual + // PyObject_RichCompareBool would return False + if (PyFloat_CheckExact(a)) { + return floatobject_cmp((PyFloatObject*)a, (PyFloatObject*)b); + } + if (PyComplex_CheckExact(a)) { + return complexobject_cmp((PyComplexObject*)a, (PyComplexObject*)b); + } + if (PyTuple_CheckExact(a)) { + return tupleobject_cmp((PyTupleObject*)a, (PyTupleObject*)b); + } + // frozenset isn't yet supported + } + + int result = PyObject_RichCompareBool(a, b, Py_EQ); + if (result < 0) { + PyErr_Clear(); + return 0; + } + return result; +} + + +Py_hash_t PANDAS_INLINE _Pandas_HashDouble(double val) { + //Since Python3.10, nan is no longer has hash 0 + if (Py_IS_NAN(val)) { + return 0; + } +#if PY_VERSION_HEX < 0x030A0000 + return _Py_HashDouble(val); +#else + return _Py_HashDouble(NULL, val); +#endif +} + + +Py_hash_t PANDAS_INLINE floatobject_hash(PyFloatObject* key) { + return _Pandas_HashDouble(PyFloat_AS_DOUBLE(key)); +} + + +#define _PandasHASH_IMAG 1000003UL + +// replaces _Py_HashDouble with _Pandas_HashDouble +Py_hash_t PANDAS_INLINE complexobject_hash(PyComplexObject* key) { + Py_uhash_t realhash = (Py_uhash_t)_Pandas_HashDouble(key->cval.real); + Py_uhash_t imaghash = (Py_uhash_t)_Pandas_HashDouble(key->cval.imag); + if (realhash == (Py_uhash_t)-1 || imaghash == (Py_uhash_t)-1) { + return -1; + } + Py_uhash_t combined = realhash + _PandasHASH_IMAG * imaghash; + if (combined == (Py_uhash_t)-1) { + return -2; + } + return (Py_hash_t)combined; +} + + +khuint32_t PANDAS_INLINE kh_python_hash_func(PyObject* key); + +//we could use any hashing algorithm, this is the original CPython's for tuples + +#if SIZEOF_PY_UHASH_T > 4 +#define _PandasHASH_XXPRIME_1 ((Py_uhash_t)11400714785074694791ULL) +#define _PandasHASH_XXPRIME_2 ((Py_uhash_t)14029467366897019727ULL) +#define _PandasHASH_XXPRIME_5 ((Py_uhash_t)2870177450012600261ULL) +#define _PandasHASH_XXROTATE(x) ((x << 31) | (x >> 33)) /* Rotate left 31 bits */ +#else +#define _PandasHASH_XXPRIME_1 ((Py_uhash_t)2654435761UL) +#define _PandasHASH_XXPRIME_2 ((Py_uhash_t)2246822519UL) +#define _PandasHASH_XXPRIME_5 ((Py_uhash_t)374761393UL) +#define _PandasHASH_XXROTATE(x) ((x << 13) | (x >> 19)) /* Rotate left 13 bits */ +#endif + +Py_hash_t PANDAS_INLINE tupleobject_hash(PyTupleObject* key) { + Py_ssize_t i, len = Py_SIZE(key); + PyObject **item = key->ob_item; + + Py_uhash_t acc = _PandasHASH_XXPRIME_5; + for (i = 0; i < len; i++) { + Py_uhash_t lane = kh_python_hash_func(item[i]); + if (lane == (Py_uhash_t)-1) { + return -1; + } + acc += lane * _PandasHASH_XXPRIME_2; + acc = _PandasHASH_XXROTATE(acc); + acc *= _PandasHASH_XXPRIME_1; + } + + /* Add input length, mangled to keep the historical value of hash(()). */ + acc += len ^ (_PandasHASH_XXPRIME_5 ^ 3527539UL); + + if (acc == (Py_uhash_t)-1) { + return 1546275796; + } + return acc; +} + + +khuint32_t PANDAS_INLINE kh_python_hash_func(PyObject* key) { + Py_hash_t hash; + // For PyObject_Hash holds: + // hash(0.0) == 0 == hash(-0.0) + // yet for different nan-objects different hash-values + // are possible + if (PyFloat_CheckExact(key)) { + // we cannot use kh_float64_hash_func + // because float(k) == k holds for any int-object k + // and kh_float64_hash_func doesn't respect it + hash = floatobject_hash((PyFloatObject*)key); + } + else if (PyComplex_CheckExact(key)) { + // we cannot use kh_complex128_hash_func + // because complex(k,0) == k holds for any int-object k + // and kh_complex128_hash_func doesn't respect it + hash = complexobject_hash((PyComplexObject*)key); + } + else if (PyTuple_CheckExact(key)) { + hash = tupleobject_hash((PyTupleObject*)key); + } + else { + hash = PyObject_Hash(key); + } + + if (hash == -1) { + PyErr_Clear(); + return 0; + } + #if SIZEOF_PY_HASH_T == 4 + // it is already 32bit value + return hash; + #else + // for 64bit builds, + // we need information of the upper 32bits as well + // see GH 37615 + khuint64_t as_uint = (khuint64_t) hash; + // uints avoid undefined behavior of signed ints + return (as_uint>>32)^as_uint; + #endif +} + + +#define kh_python_hash_equal(a, b) (pyobject_cmp(a, b)) + + +// Python object + +typedef PyObject* kh_pyobject_t; + +#define KHASH_MAP_INIT_PYOBJECT(name, khval_t) \ + KHASH_INIT(name, kh_pyobject_t, khval_t, 1, \ + kh_python_hash_func, kh_python_hash_equal) + +KHASH_MAP_INIT_PYOBJECT(pymap, Py_ssize_t) + +#define KHASH_SET_INIT_PYOBJECT(name) \ + KHASH_INIT(name, kh_pyobject_t, char, 0, \ + kh_python_hash_func, kh_python_hash_equal) + +KHASH_SET_INIT_PYOBJECT(pyset) + +#define kh_exist_pymap(h, k) (kh_exist(h, k)) +#define kh_exist_pyset(h, k) (kh_exist(h, k)) + +KHASH_MAP_INIT_STR(strbox, kh_pyobject_t) + +typedef struct { + kh_str_t *table; + int starts[256]; +} kh_str_starts_t; + +typedef kh_str_starts_t* p_kh_str_starts_t; + +p_kh_str_starts_t PANDAS_INLINE kh_init_str_starts(void) { + kh_str_starts_t *result = (kh_str_starts_t*)KHASH_CALLOC(1, sizeof(kh_str_starts_t)); + result->table = kh_init_str(); + return result; +} + +khuint_t PANDAS_INLINE kh_put_str_starts_item(kh_str_starts_t* table, char* key, int* ret) { + khuint_t result = kh_put_str(table->table, key, ret); + if (*ret != 0) { + table->starts[(unsigned char)key[0]] = 1; + } + return result; +} + +khuint_t PANDAS_INLINE kh_get_str_starts_item(const kh_str_starts_t* table, const char* key) { + unsigned char ch = *key; + if (table->starts[ch]) { + if (ch == '\0' || kh_get_str(table->table, key) != table->table->n_buckets) return 1; + } + return 0; +} + +void PANDAS_INLINE kh_destroy_str_starts(kh_str_starts_t* table) { + kh_destroy_str(table->table); + KHASH_FREE(table); +} + +void PANDAS_INLINE kh_resize_str_starts(kh_str_starts_t* table, khuint_t val) { + kh_resize_str(table->table, val); +} + +// utility function: given the number of elements +// returns number of necessary buckets +khuint_t PANDAS_INLINE kh_needed_n_buckets(khuint_t n_elements){ + khuint_t candidate = n_elements; + kroundup32(candidate); + khuint_t upper_bound = (khuint_t)(candidate * __ac_HASH_UPPER + 0.5); + return (upper_bound < n_elements) ? 2*candidate : candidate; + +} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/parse_helper.h b/.local/lib/python3.8/site-packages/pandas/_libs/src/parse_helper.h new file mode 100644 index 0000000..d161c4e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/parse_helper.h @@ -0,0 +1,100 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_SRC_PARSE_HELPER_H_ +#define PANDAS__LIBS_SRC_PARSE_HELPER_H_ + +#include +#include "parser/tokenizer.h" + +int to_double(char *item, double *p_value, char sci, char decimal, + int *maybe_int) { + char *p_end = NULL; + int error = 0; + + /* Switch to precise xstrtod GH 31364 */ + *p_value = precise_xstrtod(item, &p_end, decimal, sci, '\0', 1, + &error, maybe_int); + + return (error == 0) && (!*p_end); +} + +int floatify(PyObject *str, double *result, int *maybe_int) { + int status; + char *data; + PyObject *tmp = NULL; + const char sci = 'E'; + const char dec = '.'; + + if (PyBytes_Check(str)) { + data = PyBytes_AS_STRING(str); + } else if (PyUnicode_Check(str)) { + tmp = PyUnicode_AsUTF8String(str); + if (tmp == NULL) { + return -1; + } + data = PyBytes_AS_STRING(tmp); + } else { + PyErr_SetString(PyExc_TypeError, "Invalid object type"); + return -1; + } + + status = to_double(data, result, sci, dec, maybe_int); + + if (!status) { + /* handle inf/-inf infinity/-infinity */ + if (strlen(data) == 3) { + if (0 == strcasecmp(data, "inf")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 4) { + if (0 == strcasecmp(data, "-inf")) { + *result = -HUGE_VAL; + *maybe_int = 0; + } else if (0 == strcasecmp(data, "+inf")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 8) { + if (0 == strcasecmp(data, "infinity")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 9) { + if (0 == strcasecmp(data, "-infinity")) { + *result = -HUGE_VAL; + *maybe_int = 0; + } else if (0 == strcasecmp(data, "+infinity")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else { + goto parsingerror; + } + } + + Py_XDECREF(tmp); + return 0; + +parsingerror: + PyErr_Format(PyExc_ValueError, "Unable to parse string \"%s\"", data); + Py_XDECREF(tmp); + return -1; +} + +#endif // PANDAS__LIBS_SRC_PARSE_HELPER_H_ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/parser/io.c b/.local/lib/python3.8/site-packages/pandas/_libs/src/parser/io.c new file mode 100644 index 0000000..2ed0cef --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/parser/io.c @@ -0,0 +1,107 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#include "io.h" + +/* + On-disk FILE, uncompressed +*/ + +void *new_rd_source(PyObject *obj) { + rd_source *rds = (rd_source *)malloc(sizeof(rd_source)); + + if (rds == NULL) { + PyErr_NoMemory(); + return NULL; + } + /* hold on to this object */ + Py_INCREF(obj); + rds->obj = obj; + rds->buffer = NULL; + rds->position = 0; + + return (void *)rds; +} + +/* + + Cleanup callbacks + + */ + +int del_rd_source(void *rds) { + Py_XDECREF(RDS(rds)->obj); + Py_XDECREF(RDS(rds)->buffer); + free(rds); + + return 0; +} + +/* + + IO callbacks + + */ + +void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, + int *status, const char *encoding_errors) { + PyGILState_STATE state; + PyObject *result, *func, *args, *tmp; + + void *retval; + + size_t length; + rd_source *src = RDS(source); + state = PyGILState_Ensure(); + + /* delete old object */ + Py_XDECREF(src->buffer); + src->buffer = NULL; + args = Py_BuildValue("(i)", nbytes); + + func = PyObject_GetAttrString(src->obj, "read"); + + /* TODO: does this release the GIL? */ + result = PyObject_CallObject(func, args); + Py_XDECREF(args); + Py_XDECREF(func); + + if (result == NULL) { + PyGILState_Release(state); + *bytes_read = 0; + *status = CALLING_READ_FAILED; + return NULL; + } else if (!PyBytes_Check(result)) { + tmp = PyUnicode_AsEncodedString(result, "utf-8", encoding_errors); + Py_DECREF(result); + if (tmp == NULL) { + PyGILState_Release(state); + return NULL; + } + result = tmp; + } + + length = PySequence_Length(result); + + if (length == 0) + *status = REACHED_EOF; + else + *status = 0; + + /* hang on to the Python object */ + src->buffer = result; + retval = (void *)PyBytes_AsString(result); + + PyGILState_Release(state); + + /* TODO: more error handling */ + *bytes_read = length; + + return retval; +} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/parser/io.h b/.local/lib/python3.8/site-packages/pandas/_libs/src/parser/io.h new file mode 100644 index 0000000..f0e8b01 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/parser/io.h @@ -0,0 +1,34 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_SRC_PARSER_IO_H_ +#define PANDAS__LIBS_SRC_PARSER_IO_H_ + +#define PY_SSIZE_T_CLEAN +#include +#include "tokenizer.h" + +#define FS(source) ((file_source *)source) + +typedef struct _rd_source { + PyObject *obj; + PyObject *buffer; + size_t position; +} rd_source; + +#define RDS(source) ((rd_source *)source) + +void *new_rd_source(PyObject *obj); + +int del_rd_source(void *src); + +void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, + int *status, const char *encoding_errors); + +#endif // PANDAS__LIBS_SRC_PARSER_IO_H_ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/parser/tokenizer.c b/.local/lib/python3.8/site-packages/pandas/_libs/src/parser/tokenizer.c new file mode 100644 index 0000000..ade4d4a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/parser/tokenizer.c @@ -0,0 +1,2085 @@ +/* + +Copyright (c) 2012, Lambda Foundry, Inc., except where noted + +Incorporates components of WarrenWeckesser/textreader, licensed under 3-clause +BSD + +See LICENSE for the license + +*/ + +/* + +Low-level ascii-file processing for pandas. Combines some elements from +Python's built-in csv module and Warren Weckesser's textreader project on +GitHub. See Python Software Foundation License and BSD licenses for these. + +*/ + +#include "tokenizer.h" + +#include +#include +#include + +#include "../headers/portable.h" + +void coliter_setup(coliter_t *self, parser_t *parser, int64_t i, + int64_t start) { + // column i, starting at 0 + self->words = parser->words; + self->col = i; + self->line_start = parser->line_start + start; +} + +static void free_if_not_null(void **ptr) { + TRACE(("free_if_not_null %p\n", *ptr)) + if (*ptr != NULL) { + free(*ptr); + *ptr = NULL; + } +} + +/* + + Parser / tokenizer + +*/ + +static void *grow_buffer(void *buffer, uint64_t length, uint64_t *capacity, + int64_t space, int64_t elsize, int *error) { + uint64_t cap = *capacity; + void *newbuffer = buffer; + + // Can we fit potentially nbytes tokens (+ null terminators) in the stream? + while ((length + space >= cap) && (newbuffer != NULL)) { + cap = cap ? cap << 1 : 2; + buffer = newbuffer; + newbuffer = realloc(newbuffer, elsize * cap); + } + + if (newbuffer == NULL) { + // realloc failed so don't change *capacity, set *error to errno + // and return the last good realloc'd buffer so it can be freed + *error = errno; + newbuffer = buffer; + } else { + // realloc worked, update *capacity and set *error to 0 + // sigh, multiple return values + *capacity = cap; + *error = 0; + } + return newbuffer; +} + +void parser_set_default_options(parser_t *self) { + self->decimal = '.'; + self->sci = 'E'; + + // For tokenization + self->state = START_RECORD; + + self->delimiter = ','; // XXX + self->delim_whitespace = 0; + + self->doublequote = 0; + self->quotechar = '"'; + self->escapechar = 0; + + self->lineterminator = '\0'; /* NUL->standard logic */ + + self->skipinitialspace = 0; + self->quoting = QUOTE_MINIMAL; + self->allow_embedded_newline = 1; + + self->expected_fields = -1; + self->on_bad_lines = ERROR; + + self->commentchar = '#'; + self->thousands = '\0'; + + self->skipset = NULL; + self->skipfunc = NULL; + self->skip_first_N_rows = -1; + self->skip_footer = 0; +} + +parser_t *parser_new() { return (parser_t *)calloc(1, sizeof(parser_t)); } + +int parser_clear_data_buffers(parser_t *self) { + free_if_not_null((void *)&self->stream); + free_if_not_null((void *)&self->words); + free_if_not_null((void *)&self->word_starts); + free_if_not_null((void *)&self->line_start); + free_if_not_null((void *)&self->line_fields); + return 0; +} + +int parser_cleanup(parser_t *self) { + int status = 0; + + // XXX where to put this + free_if_not_null((void *)&self->error_msg); + free_if_not_null((void *)&self->warn_msg); + + if (self->skipset != NULL) { + kh_destroy_int64((kh_int64_t *)self->skipset); + self->skipset = NULL; + } + + if (parser_clear_data_buffers(self) < 0) { + status = -1; + } + + if (self->cb_cleanup != NULL) { + if (self->cb_cleanup(self->source) < 0) { + status = -1; + } + self->cb_cleanup = NULL; + } + + return status; +} + +int parser_init(parser_t *self) { + int64_t sz; + + /* + Initialize data buffers + */ + + self->stream = NULL; + self->words = NULL; + self->word_starts = NULL; + self->line_start = NULL; + self->line_fields = NULL; + self->error_msg = NULL; + self->warn_msg = NULL; + + // token stream + self->stream = malloc(STREAM_INIT_SIZE * sizeof(char)); + if (self->stream == NULL) { + parser_cleanup(self); + return PARSER_OUT_OF_MEMORY; + } + self->stream_cap = STREAM_INIT_SIZE; + self->stream_len = 0; + + // word pointers and metadata + sz = STREAM_INIT_SIZE / 10; + sz = sz ? sz : 1; + self->words = malloc(sz * sizeof(char *)); + self->word_starts = malloc(sz * sizeof(int64_t)); + self->max_words_cap = sz; + self->words_cap = sz; + self->words_len = 0; + + // line pointers and metadata + self->line_start = malloc(sz * sizeof(int64_t)); + + self->line_fields = malloc(sz * sizeof(int64_t)); + + self->lines_cap = sz; + self->lines = 0; + self->file_lines = 0; + + if (self->stream == NULL || self->words == NULL || + self->word_starts == NULL || self->line_start == NULL || + self->line_fields == NULL) { + parser_cleanup(self); + + return PARSER_OUT_OF_MEMORY; + } + + /* amount of bytes buffered */ + self->datalen = 0; + self->datapos = 0; + + self->line_start[0] = 0; + self->line_fields[0] = 0; + + self->pword_start = self->stream; + self->word_start = 0; + + self->state = START_RECORD; + + self->error_msg = NULL; + self->warn_msg = NULL; + + self->commentchar = '\0'; + + return 0; +} + +void parser_free(parser_t *self) { + // opposite of parser_init + parser_cleanup(self); +} + +void parser_del(parser_t *self) { + free(self); +} + +static int make_stream_space(parser_t *self, size_t nbytes) { + uint64_t i, cap, length; + int status; + void *orig_ptr, *newptr; + + // Can we fit potentially nbytes tokens (+ null terminators) in the stream? + + /* + TOKEN STREAM + */ + + orig_ptr = (void *)self->stream; + TRACE( + ("\n\nmake_stream_space: nbytes = %zu. grow_buffer(self->stream...)\n", + nbytes)) + self->stream = (char *)grow_buffer((void *)self->stream, self->stream_len, + &self->stream_cap, nbytes * 2, + sizeof(char), &status); + TRACE( + ("make_stream_space: self->stream=%p, self->stream_len = %zu, " + "self->stream_cap=%zu, status=%zu\n", + self->stream, self->stream_len, self->stream_cap, status)) + + if (status != 0) { + return PARSER_OUT_OF_MEMORY; + } + + // realloc sets errno when moving buffer? + if (self->stream != orig_ptr) { + self->pword_start = self->stream + self->word_start; + + for (i = 0; i < self->words_len; ++i) { + self->words[i] = self->stream + self->word_starts[i]; + } + } + + /* + WORD VECTORS + */ + + cap = self->words_cap; + + /** + * If we are reading in chunks, we need to be aware of the maximum number + * of words we have seen in previous chunks (self->max_words_cap), so + * that way, we can properly allocate when reading subsequent ones. + * + * Otherwise, we risk a buffer overflow if we mistakenly under-allocate + * just because a recent chunk did not have as many words. + */ + if (self->words_len + nbytes < self->max_words_cap) { + length = self->max_words_cap - nbytes - 1; + } else { + length = self->words_len; + } + + self->words = + (char **)grow_buffer((void *)self->words, length, + &self->words_cap, nbytes, + sizeof(char *), &status); + TRACE( + ("make_stream_space: grow_buffer(self->self->words, %zu, %zu, %zu, " + "%d)\n", + self->words_len, self->words_cap, nbytes, status)) + if (status != 0) { + return PARSER_OUT_OF_MEMORY; + } + + // realloc took place + if (cap != self->words_cap) { + TRACE( + ("make_stream_space: cap != self->words_cap, nbytes = %d, " + "self->words_cap=%d\n", + nbytes, self->words_cap)) + newptr = realloc((void *)self->word_starts, + sizeof(int64_t) * self->words_cap); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + self->word_starts = (int64_t *)newptr; + } + } + + /* + LINE VECTORS + */ + cap = self->lines_cap; + self->line_start = + (int64_t *)grow_buffer((void *)self->line_start, self->lines + 1, + &self->lines_cap, nbytes, + sizeof(int64_t), &status); + TRACE(( + "make_stream_space: grow_buffer(self->line_start, %zu, %zu, %zu, %d)\n", + self->lines + 1, self->lines_cap, nbytes, status)) + if (status != 0) { + return PARSER_OUT_OF_MEMORY; + } + + // realloc took place + if (cap != self->lines_cap) { + TRACE(("make_stream_space: cap != self->lines_cap, nbytes = %d\n", + nbytes)) + newptr = realloc((void *)self->line_fields, + sizeof(int64_t) * self->lines_cap); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + self->line_fields = (int64_t *)newptr; + } + } + + return 0; +} + +static int push_char(parser_t *self, char c) { + TRACE(("push_char: self->stream[%zu] = %x, stream_cap=%zu\n", + self->stream_len + 1, c, self->stream_cap)) + if (self->stream_len >= self->stream_cap) { + TRACE( + ("push_char: ERROR!!! self->stream_len(%d) >= " + "self->stream_cap(%d)\n", + self->stream_len, self->stream_cap)) + int64_t bufsize = 100; + self->error_msg = malloc(bufsize); + snprintf(self->error_msg, bufsize, + "Buffer overflow caught - possible malformed input file.\n"); + return PARSER_OUT_OF_MEMORY; + } + self->stream[self->stream_len++] = c; + return 0; +} + +int PANDAS_INLINE end_field(parser_t *self) { + // XXX cruft + if (self->words_len >= self->words_cap) { + TRACE( + ("end_field: ERROR!!! self->words_len(%zu) >= " + "self->words_cap(%zu)\n", + self->words_len, self->words_cap)) + int64_t bufsize = 100; + self->error_msg = malloc(bufsize); + snprintf(self->error_msg, bufsize, + "Buffer overflow caught - possible malformed input file.\n"); + return PARSER_OUT_OF_MEMORY; + } + + // null terminate token + push_char(self, '\0'); + + // set pointer and metadata + self->words[self->words_len] = self->pword_start; + + TRACE(("end_field: Char diff: %d\n", self->pword_start - self->words[0])); + + TRACE(("end_field: Saw word %s at: %d. Total: %d\n", self->pword_start, + self->word_start, self->words_len + 1)) + + self->word_starts[self->words_len] = self->word_start; + self->words_len++; + + // increment line field count + self->line_fields[self->lines]++; + + // New field begin in stream + self->pword_start = self->stream + self->stream_len; + self->word_start = self->stream_len; + + return 0; +} + +static void append_warning(parser_t *self, const char *msg) { + int64_t ex_length; + int64_t length = strlen(msg); + void *newptr; + + if (self->warn_msg == NULL) { + self->warn_msg = malloc(length + 1); + snprintf(self->warn_msg, length + 1, "%s", msg); + } else { + ex_length = strlen(self->warn_msg); + newptr = realloc(self->warn_msg, ex_length + length + 1); + if (newptr != NULL) { + self->warn_msg = (char *)newptr; + snprintf(self->warn_msg + ex_length, length + 1, "%s", msg); + } + } +} + +static int end_line(parser_t *self) { + char *msg; + int64_t fields; + int64_t ex_fields = self->expected_fields; + int64_t bufsize = 100; // for error or warning messages + + fields = self->line_fields[self->lines]; + + TRACE(("end_line: Line end, nfields: %d\n", fields)); + + TRACE(("end_line: lines: %d\n", self->lines)); + if (self->lines > 0) { + if (self->expected_fields >= 0) { + ex_fields = self->expected_fields; + } else { + ex_fields = self->line_fields[self->lines - 1]; + } + } + TRACE(("end_line: ex_fields: %d\n", ex_fields)); + + if (self->state == START_FIELD_IN_SKIP_LINE || + self->state == IN_FIELD_IN_SKIP_LINE || + self->state == IN_QUOTED_FIELD_IN_SKIP_LINE || + self->state == QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE) { + TRACE(("end_line: Skipping row %d\n", self->file_lines)); + // increment file line count + self->file_lines++; + + // skip the tokens from this bad line + self->line_start[self->lines] += fields; + + // reset field count + self->line_fields[self->lines] = 0; + return 0; + } + + if (!(self->lines <= self->header_end + 1) && + (fields > ex_fields) && !(self->usecols)) { + // increment file line count + self->file_lines++; + + // skip the tokens from this bad line + self->line_start[self->lines] += fields; + + // reset field count + self->line_fields[self->lines] = 0; + + // file_lines is now the actual file line number (starting at 1) + if (self->on_bad_lines == ERROR) { + self->error_msg = malloc(bufsize); + snprintf(self->error_msg, bufsize, + "Expected %" PRId64 " fields in line %" PRIu64 ", saw %" + PRId64 "\n", ex_fields, self->file_lines, fields); + + TRACE(("Error at line %d, %d fields\n", self->file_lines, fields)); + + return -1; + } else { + // simply skip bad lines + if (self->on_bad_lines == WARN) { + // pass up error message + msg = malloc(bufsize); + snprintf(msg, bufsize, + "Skipping line %" PRIu64 ": expected %" PRId64 + " fields, saw %" PRId64 "\n", + self->file_lines, ex_fields, fields); + append_warning(self, msg); + free(msg); + } + } + } else { + // missing trailing delimiters + if ((self->lines >= self->header_end + 1) && + fields < ex_fields) { + // might overrun the buffer when closing fields + if (make_stream_space(self, ex_fields - fields) < 0) { + int64_t bufsize = 100; + self->error_msg = malloc(bufsize); + snprintf(self->error_msg, bufsize, "out of memory"); + return -1; + } + + while (fields < ex_fields) { + end_field(self); + fields++; + } + } + + // increment both line counts + self->file_lines++; + self->lines++; + + // good line, set new start point + if (self->lines >= self->lines_cap) { + TRACE(( + "end_line: ERROR!!! self->lines(%zu) >= self->lines_cap(%zu)\n", + self->lines, self->lines_cap)) + int64_t bufsize = 100; + self->error_msg = malloc(bufsize); + snprintf(self->error_msg, bufsize, + "Buffer overflow caught - " + "possible malformed input file.\n"); + return PARSER_OUT_OF_MEMORY; + } + self->line_start[self->lines] = + (self->line_start[self->lines - 1] + fields); + + TRACE( + ("end_line: new line start: %d\n", self->line_start[self->lines])); + + // new line start with 0 fields + self->line_fields[self->lines] = 0; + } + + TRACE(("end_line: Finished line, at %d\n", self->lines)); + + return 0; +} + +int parser_add_skiprow(parser_t *self, int64_t row) { + khiter_t k; + kh_int64_t *set; + int ret = 0; + + if (self->skipset == NULL) { + self->skipset = (void *)kh_init_int64(); + } + + set = (kh_int64_t *)self->skipset; + + k = kh_put_int64(set, row, &ret); + set->keys[k] = row; + + return 0; +} + +int parser_set_skipfirstnrows(parser_t *self, int64_t nrows) { + // self->file_lines is zero based so subtract 1 from nrows + if (nrows > 0) { + self->skip_first_N_rows = nrows - 1; + } + + return 0; +} + +static int parser_buffer_bytes(parser_t *self, size_t nbytes, + const char *encoding_errors) { + int status; + size_t bytes_read; + + status = 0; + self->datapos = 0; + self->data = self->cb_io(self->source, nbytes, &bytes_read, &status, + encoding_errors); + TRACE(( + "parser_buffer_bytes self->cb_io: nbytes=%zu, datalen: %d, status=%d\n", + nbytes, bytes_read, status)); + self->datalen = bytes_read; + + if (status != REACHED_EOF && self->data == NULL) { + int64_t bufsize = 200; + self->error_msg = malloc(bufsize); + + if (status == CALLING_READ_FAILED) { + snprintf(self->error_msg, bufsize, + "Calling read(nbytes) on source failed. " + "Try engine='python'."); + } else { + snprintf(self->error_msg, bufsize, "Unknown error in IO callback"); + } + return -1; + } + + TRACE(("datalen: %d\n", self->datalen)); + + return status; +} + +/* + + Tokenization macros and state machine code + +*/ + +#define PUSH_CHAR(c) \ + TRACE( \ + ("PUSH_CHAR: Pushing %c, slen= %d, stream_cap=%zu, stream_len=%zu\n", \ + c, slen, self->stream_cap, self->stream_len)) \ + if (slen >= self->stream_cap) { \ + TRACE(("PUSH_CHAR: ERROR!!! slen(%d) >= stream_cap(%d)\n", slen, \ + self->stream_cap)) \ + int64_t bufsize = 100; \ + self->error_msg = malloc(bufsize); \ + snprintf(self->error_msg, bufsize, \ + "Buffer overflow caught - possible malformed input file.\n");\ + return PARSER_OUT_OF_MEMORY; \ + } \ + *stream++ = c; \ + slen++; + +// This is a little bit of a hack but works for now + +#define END_FIELD() \ + self->stream_len = slen; \ + if (end_field(self) < 0) { \ + goto parsingerror; \ + } \ + stream = self->stream + self->stream_len; \ + slen = self->stream_len; + +#define END_LINE_STATE(STATE) \ + self->stream_len = slen; \ + if (end_line(self) < 0) { \ + goto parsingerror; \ + } \ + stream = self->stream + self->stream_len; \ + slen = self->stream_len; \ + self->state = STATE; \ + if (line_limit > 0 && self->lines == start_lines + line_limit) { \ + goto linelimit; \ + } + +#define END_LINE_AND_FIELD_STATE(STATE) \ + self->stream_len = slen; \ + if (end_line(self) < 0) { \ + goto parsingerror; \ + } \ + if (end_field(self) < 0) { \ + goto parsingerror; \ + } \ + stream = self->stream + self->stream_len; \ + slen = self->stream_len; \ + self->state = STATE; \ + if (line_limit > 0 && self->lines == start_lines + line_limit) { \ + goto linelimit; \ + } + +#define END_LINE() END_LINE_STATE(START_RECORD) + +#define IS_TERMINATOR(c) \ + (c == line_terminator) + +#define IS_QUOTE(c) ((c == self->quotechar && self->quoting != QUOTE_NONE)) + +// don't parse '\r' with a custom line terminator +#define IS_CARRIAGE(c) (c == carriage_symbol) + +#define IS_COMMENT_CHAR(c) (c == comment_symbol) + +#define IS_ESCAPE_CHAR(c) (c == escape_symbol) + +#define IS_SKIPPABLE_SPACE(c) \ + ((!self->delim_whitespace && c == ' ' && self->skipinitialspace)) + +// applied when in a field +#define IS_DELIMITER(c) \ + ((!self->delim_whitespace && c == self->delimiter) || \ + (self->delim_whitespace && isblank(c))) + +#define _TOKEN_CLEANUP() \ + self->stream_len = slen; \ + self->datapos = i; \ + TRACE(("_TOKEN_CLEANUP: datapos: %d, datalen: %d\n", self->datapos, \ + self->datalen)); + +#define CHECK_FOR_BOM() \ + if (*buf == '\xef' && *(buf + 1) == '\xbb' && *(buf + 2) == '\xbf') { \ + buf += 3; \ + self->datapos += 3; \ + } + +int skip_this_line(parser_t *self, int64_t rownum) { + int should_skip; + PyObject *result; + PyGILState_STATE state; + + if (self->skipfunc != NULL) { + state = PyGILState_Ensure(); + result = PyObject_CallFunction(self->skipfunc, "i", rownum); + + // Error occurred. It will be processed + // and caught at the Cython level. + if (result == NULL) { + should_skip = -1; + } else { + should_skip = PyObject_IsTrue(result); + } + + Py_XDECREF(result); + PyGILState_Release(state); + + return should_skip; + } else if (self->skipset != NULL) { + return (kh_get_int64((kh_int64_t *)self->skipset, self->file_lines) != + ((kh_int64_t *)self->skipset)->n_buckets); + } else { + return (rownum <= self->skip_first_N_rows); + } +} + +int tokenize_bytes(parser_t *self, + size_t line_limit, uint64_t start_lines) { + int64_t i; + uint64_t slen; + int should_skip; + char c; + char *stream; + char *buf = self->data + self->datapos; + + const char line_terminator = (self->lineterminator == '\0') ? + '\n' : self->lineterminator; + + // 1000 is something that couldn't fit in "char" + // thus comparing a char to it would always be "false" + const int carriage_symbol = (self->lineterminator == '\0') ? '\r' : 1000; + const int comment_symbol = (self->commentchar != '\0') ? + self->commentchar : 1000; + const int escape_symbol = (self->escapechar != '\0') ? + self->escapechar : 1000; + + if (make_stream_space(self, self->datalen - self->datapos) < 0) { + int64_t bufsize = 100; + self->error_msg = malloc(bufsize); + snprintf(self->error_msg, bufsize, "out of memory"); + return -1; + } + + stream = self->stream + self->stream_len; + slen = self->stream_len; + + TRACE(("%s\n", buf)); + + if (self->file_lines == 0) { + CHECK_FOR_BOM(); + } + + for (i = self->datapos; i < self->datalen; ++i) { + // next character in file + c = *buf++; + + TRACE( + ("tokenize_bytes - Iter: %d Char: 0x%x Line %d field_count %d, " + "state %d\n", + i, c, self->file_lines + 1, self->line_fields[self->lines], + self->state)); + + switch (self->state) { + case START_FIELD_IN_SKIP_LINE: + if (IS_TERMINATOR(c)) { + END_LINE(); + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } else if (IS_QUOTE(c)) { + self->state = IN_QUOTED_FIELD_IN_SKIP_LINE; + } else if (IS_DELIMITER(c)) { + // Do nothing, we're starting a new field again. + } else { + self->state = IN_FIELD_IN_SKIP_LINE; + } + break; + + case IN_FIELD_IN_SKIP_LINE: + if (IS_TERMINATOR(c)) { + END_LINE(); + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } else if (IS_DELIMITER(c)) { + self->state = START_FIELD_IN_SKIP_LINE; + } + break; + + case IN_QUOTED_FIELD_IN_SKIP_LINE: + if (IS_QUOTE(c)) { + if (self->doublequote) { + self->state = QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE; + } else { + self->state = IN_FIELD_IN_SKIP_LINE; + } + } + break; + + case QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE: + if (IS_QUOTE(c)) { + self->state = IN_QUOTED_FIELD_IN_SKIP_LINE; + } else if (IS_TERMINATOR(c)) { + END_LINE(); + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } else if (IS_DELIMITER(c)) { + self->state = START_FIELD_IN_SKIP_LINE; + } else { + self->state = IN_FIELD_IN_SKIP_LINE; + } + break; + + case WHITESPACE_LINE: + if (IS_TERMINATOR(c)) { + self->file_lines++; + self->state = START_RECORD; + break; + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + break; + } else if (!self->delim_whitespace) { + if (isblank(c) && c != self->delimiter) { + } else { // backtrack + // use i + 1 because buf has been incremented but not i + do { + --buf; + --i; + } while (i + 1 > self->datapos && !IS_TERMINATOR(*buf)); + + // reached a newline rather than the beginning + if (IS_TERMINATOR(*buf)) { + ++buf; // move pointer to first char after newline + ++i; + } + self->state = START_FIELD; + } + break; + } + // fall through + + case EAT_WHITESPACE: + if (IS_TERMINATOR(c)) { + END_LINE(); + self->state = START_RECORD; + break; + } else if (IS_CARRIAGE(c)) { + self->state = EAT_CRNL; + break; + } else if (IS_COMMENT_CHAR(c)) { + self->state = EAT_COMMENT; + break; + } else if (!isblank(c)) { + self->state = START_FIELD; + // fall through to subsequent state + } else { + // if whitespace char, keep slurping + break; + } + + case START_RECORD: + // start of record + should_skip = skip_this_line(self, self->file_lines); + + if (should_skip == -1) { + goto parsingerror; + } else if (should_skip) { + if (IS_QUOTE(c)) { + self->state = IN_QUOTED_FIELD_IN_SKIP_LINE; + } else { + self->state = IN_FIELD_IN_SKIP_LINE; + + if (IS_TERMINATOR(c)) { + END_LINE(); + } + } + break; + } else if (IS_TERMINATOR(c)) { + // \n\r possible? + if (self->skip_empty_lines) { + self->file_lines++; + } else { + END_LINE(); + } + break; + } else if (IS_CARRIAGE(c)) { + if (self->skip_empty_lines) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } else { + self->state = EAT_CRNL; + } + break; + } else if (IS_COMMENT_CHAR(c)) { + self->state = EAT_LINE_COMMENT; + break; + } else if (isblank(c)) { + if (self->delim_whitespace) { + if (self->skip_empty_lines) { + self->state = WHITESPACE_LINE; + } else { + self->state = EAT_WHITESPACE; + } + break; + } else if (c != self->delimiter && self->skip_empty_lines) { + self->state = WHITESPACE_LINE; + break; + } + // fall through + } + + // normal character - fall through + // to handle as START_FIELD + self->state = START_FIELD; + + case START_FIELD: + // expecting field + if (IS_TERMINATOR(c)) { + END_FIELD(); + END_LINE(); + } else if (IS_CARRIAGE(c)) { + END_FIELD(); + self->state = EAT_CRNL; + } else if (IS_QUOTE(c)) { + // start quoted field + self->state = IN_QUOTED_FIELD; + } else if (IS_ESCAPE_CHAR(c)) { + // possible escaped character + self->state = ESCAPED_CHAR; + } else if (IS_SKIPPABLE_SPACE(c)) { + // ignore space at start of field + } else if (IS_DELIMITER(c)) { + if (self->delim_whitespace) { + self->state = EAT_WHITESPACE; + } else { + // save empty field + END_FIELD(); + } + } else if (IS_COMMENT_CHAR(c)) { + END_FIELD(); + self->state = EAT_COMMENT; + } else { + // begin new unquoted field + PUSH_CHAR(c); + self->state = IN_FIELD; + } + break; + + case ESCAPED_CHAR: + PUSH_CHAR(c); + self->state = IN_FIELD; + break; + + case EAT_LINE_COMMENT: + if (IS_TERMINATOR(c)) { + self->file_lines++; + self->state = START_RECORD; + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } + break; + + case IN_FIELD: + // in unquoted field + if (IS_TERMINATOR(c)) { + END_FIELD(); + END_LINE(); + } else if (IS_CARRIAGE(c)) { + END_FIELD(); + self->state = EAT_CRNL; + } else if (IS_ESCAPE_CHAR(c)) { + // possible escaped character + self->state = ESCAPED_CHAR; + } else if (IS_DELIMITER(c)) { + // end of field - end of line not reached yet + END_FIELD(); + + if (self->delim_whitespace) { + self->state = EAT_WHITESPACE; + } else { + self->state = START_FIELD; + } + } else if (IS_COMMENT_CHAR(c)) { + END_FIELD(); + self->state = EAT_COMMENT; + } else { + // normal character - save in field + PUSH_CHAR(c); + } + break; + + case IN_QUOTED_FIELD: + // in quoted field + if (IS_ESCAPE_CHAR(c)) { + // possible escape character + self->state = ESCAPE_IN_QUOTED_FIELD; + } else if (IS_QUOTE(c)) { + if (self->doublequote) { + // double quote - " represented by "" + self->state = QUOTE_IN_QUOTED_FIELD; + } else { + // end of quote part of field + self->state = IN_FIELD; + } + } else { + // normal character - save in field + PUSH_CHAR(c); + } + break; + + case ESCAPE_IN_QUOTED_FIELD: + PUSH_CHAR(c); + self->state = IN_QUOTED_FIELD; + break; + + case QUOTE_IN_QUOTED_FIELD: + // double quote - seen a quote in an quoted field + if (IS_QUOTE(c)) { + // save "" as " + + PUSH_CHAR(c); + self->state = IN_QUOTED_FIELD; + } else if (IS_DELIMITER(c)) { + // end of field - end of line not reached yet + END_FIELD(); + + if (self->delim_whitespace) { + self->state = EAT_WHITESPACE; + } else { + self->state = START_FIELD; + } + } else if (IS_TERMINATOR(c)) { + END_FIELD(); + END_LINE(); + } else if (IS_CARRIAGE(c)) { + END_FIELD(); + self->state = EAT_CRNL; + } else { + PUSH_CHAR(c); + self->state = IN_FIELD; + } + break; + + case EAT_COMMENT: + if (IS_TERMINATOR(c)) { + END_LINE(); + } else if (IS_CARRIAGE(c)) { + self->state = EAT_CRNL; + } + break; + + // only occurs with non-custom line terminator, + // which is why we directly check for '\n' + case EAT_CRNL: + if (c == '\n') { + END_LINE(); + } else if (IS_DELIMITER(c)) { + if (self->delim_whitespace) { + END_LINE_STATE(EAT_WHITESPACE); + } else { + // Handle \r-delimited files + END_LINE_AND_FIELD_STATE(START_FIELD); + } + } else { + if (self->delim_whitespace) { + /* XXX + * first character of a new record--need to back up and + * reread + * to handle properly... + */ + i--; + buf--; // back up one character (HACK!) + END_LINE_STATE(START_RECORD); + } else { + // \r line terminator + // UGH. we don't actually want + // to consume the token. fix this later + self->stream_len = slen; + if (end_line(self) < 0) { + goto parsingerror; + } + + stream = self->stream + self->stream_len; + slen = self->stream_len; + self->state = START_RECORD; + + --i; + buf--; // let's try this character again (HACK!) + if (line_limit > 0 && + self->lines == start_lines + line_limit) { + goto linelimit; + } + } + } + break; + + // only occurs with non-custom line terminator, + // which is why we directly check for '\n' + case EAT_CRNL_NOP: // inside an ignored comment line + self->state = START_RECORD; + // \r line terminator -- parse this character again + if (c != '\n' && !IS_DELIMITER(c)) { + --i; + --buf; + } + break; + default: + break; + } + } + + _TOKEN_CLEANUP(); + + TRACE(("Finished tokenizing input\n")) + + return 0; + +parsingerror: + i++; + _TOKEN_CLEANUP(); + + return -1; + +linelimit: + i++; + _TOKEN_CLEANUP(); + + return 0; +} + +static int parser_handle_eof(parser_t *self) { + int64_t bufsize = 100; + + TRACE( + ("handling eof, datalen: %d, pstate: %d\n", self->datalen, self->state)) + + if (self->datalen != 0) return -1; + + switch (self->state) { + case START_RECORD: + case WHITESPACE_LINE: + case EAT_CRNL_NOP: + case EAT_LINE_COMMENT: + return 0; + + case ESCAPE_IN_QUOTED_FIELD: + case IN_QUOTED_FIELD: + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, + "EOF inside string starting at row %" PRIu64, + self->file_lines); + return -1; + + case ESCAPED_CHAR: + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, + "EOF following escape character"); + return -1; + + case IN_FIELD: + case START_FIELD: + case QUOTE_IN_QUOTED_FIELD: + if (end_field(self) < 0) return -1; + break; + + default: + break; + } + + if (end_line(self) < 0) + return -1; + else + return 0; +} + +int parser_consume_rows(parser_t *self, size_t nrows) { + int64_t offset, word_deletions; + uint64_t char_count, i; + + if (nrows > self->lines) { + nrows = self->lines; + } + + /* do nothing */ + if (nrows == 0) return 0; + + /* cannot guarantee that nrows + 1 has been observed */ + word_deletions = self->line_start[nrows - 1] + self->line_fields[nrows - 1]; + if (word_deletions >= 1) { + char_count = (self->word_starts[word_deletions - 1] + + strlen(self->words[word_deletions - 1]) + 1); + } else { + /* if word_deletions == 0 (i.e. this case) then char_count must + * be 0 too, as no data needs to be skipped */ + char_count = 0; + } + + TRACE(("parser_consume_rows: Deleting %d words, %d chars\n", word_deletions, + char_count)); + + /* move stream, only if something to move */ + if (char_count < self->stream_len) { + memmove(self->stream, (self->stream + char_count), + self->stream_len - char_count); + } + /* buffer counts */ + self->stream_len -= char_count; + + /* move token metadata */ + // Note: We should always have words_len < word_deletions, so this + // subtraction will remain appropriately-typed. + for (i = 0; i < self->words_len - word_deletions; ++i) { + offset = i + word_deletions; + + self->words[i] = self->words[offset] - char_count; + self->word_starts[i] = self->word_starts[offset] - char_count; + } + self->words_len -= word_deletions; + + /* move current word pointer to stream */ + self->pword_start -= char_count; + self->word_start -= char_count; + + /* move line metadata */ + // Note: We should always have self->lines - nrows + 1 >= 0, so this + // subtraction will remain appropriately-typed. + for (i = 0; i < self->lines - nrows + 1; ++i) { + offset = i + nrows; + self->line_start[i] = self->line_start[offset] - word_deletions; + self->line_fields[i] = self->line_fields[offset]; + } + self->lines -= nrows; + + return 0; +} + +static size_t _next_pow2(size_t sz) { + size_t result = 1; + while (result < sz) result *= 2; + return result; +} + +int parser_trim_buffers(parser_t *self) { + /* + Free memory + */ + size_t new_cap; + void *newptr; + + uint64_t i; + + /** + * Before we free up space and trim, we should + * save how many words we saw when parsing, if + * it exceeds the maximum number we saw before. + * + * This is important for when we read in chunks, + * so that we can inform subsequent chunk parsing + * as to how many words we could possibly see. + */ + if (self->words_cap > self->max_words_cap) { + self->max_words_cap = self->words_cap; + } + + /* trim words, word_starts */ + new_cap = _next_pow2(self->words_len) + 1; + if (new_cap < self->words_cap) { + TRACE(("parser_trim_buffers: new_cap < self->words_cap\n")); + self->words = realloc(self->words, new_cap * sizeof(char *)); + if (self->words == NULL) { + return PARSER_OUT_OF_MEMORY; + } + self->word_starts = realloc(self->word_starts, + new_cap * sizeof(int64_t)); + if (self->word_starts == NULL) { + return PARSER_OUT_OF_MEMORY; + } + self->words_cap = new_cap; + } + + /* trim stream */ + new_cap = _next_pow2(self->stream_len) + 1; + TRACE( + ("parser_trim_buffers: new_cap = %zu, stream_cap = %zu, lines_cap = " + "%zu\n", + new_cap, self->stream_cap, self->lines_cap)); + if (new_cap < self->stream_cap) { + TRACE( + ("parser_trim_buffers: new_cap < self->stream_cap, calling " + "realloc\n")); + newptr = realloc(self->stream, new_cap); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + // Update the pointers in the self->words array (char **) if + // `realloc` + // moved the `self->stream` buffer. This block mirrors a similar + // block in + // `make_stream_space`. + if (self->stream != newptr) { + self->pword_start = (char *)newptr + self->word_start; + + for (i = 0; i < self->words_len; ++i) { + self->words[i] = (char *)newptr + self->word_starts[i]; + } + } + + self->stream = newptr; + self->stream_cap = new_cap; + } + } + + /* trim line_start, line_fields */ + new_cap = _next_pow2(self->lines) + 1; + if (new_cap < self->lines_cap) { + TRACE(("parser_trim_buffers: new_cap < self->lines_cap\n")); + newptr = realloc(self->line_start, + new_cap * sizeof(int64_t)); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + self->line_start = newptr; + } + newptr = realloc(self->line_fields, + new_cap * sizeof(int64_t)); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + self->line_fields = newptr; + self->lines_cap = new_cap; + } + } + + return 0; +} + +/* + nrows : number of rows to tokenize (or until reach EOF) + all : tokenize all the data vs. certain number of rows + */ + +int _tokenize_helper(parser_t *self, size_t nrows, int all, + const char *encoding_errors) { + int status = 0; + uint64_t start_lines = self->lines; + + if (self->state == FINISHED) { + return 0; + } + + TRACE(( + "_tokenize_helper: Asked to tokenize %d rows, datapos=%d, datalen=%d\n", + nrows, self->datapos, self->datalen)); + + while (1) { + if (!all && self->lines - start_lines >= nrows) break; + + if (self->datapos == self->datalen) { + status = parser_buffer_bytes(self, self->chunksize, + encoding_errors); + + if (status == REACHED_EOF) { + // close out last line + status = parser_handle_eof(self); + self->state = FINISHED; + break; + } else if (status != 0) { + return status; + } + } + + TRACE( + ("_tokenize_helper: Trying to process %d bytes, datalen=%d, " + "datapos= %d\n", + self->datalen - self->datapos, self->datalen, self->datapos)); + + status = tokenize_bytes(self, nrows, start_lines); + + if (status < 0) { + // XXX + TRACE( + ("_tokenize_helper: Status %d returned from tokenize_bytes, " + "breaking\n", + status)); + status = -1; + break; + } + } + TRACE(("leaving tokenize_helper\n")); + return status; +} + +int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors) { + int status = _tokenize_helper(self, nrows, 0, encoding_errors); + return status; +} + +int tokenize_all_rows(parser_t *self, const char *encoding_errors) { + int status = _tokenize_helper(self, -1, 1, encoding_errors); + return status; +} + +/* + * Function: to_boolean + * -------------------- + * + * Validate if item should be recognized as a boolean field. + * + * item: const char* representing parsed text + * val : pointer to a uint8_t of boolean representation + * + * If item is determined to be boolean, this method will set + * the appropriate value of val and return 0. A non-zero exit + * status means that item was not inferred to be boolean, and + * leaves the value of *val unmodified. + */ +int to_boolean(const char *item, uint8_t *val) { + if (strcasecmp(item, "TRUE") == 0) { + *val = 1; + return 0; + } else if (strcasecmp(item, "FALSE") == 0) { + *val = 0; + return 0; + } + + return -1; +} + +// --------------------------------------------------------------------------- +// Implementation of xstrtod + +// +// strtod.c +// +// Convert string to double +// +// Copyright (C) 2002 Michael Ringgaard. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. Neither the name of the project nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. +// +// ----------------------------------------------------------------------- +// Modifications by Warren Weckesser, March 2011: +// * Rename strtod() to xstrtod(). +// * Added decimal and sci arguments. +// * Skip trailing spaces. +// * Commented out the other functions. +// Modifications by Richard T Guy, August 2013: +// * Add tsep argument for thousands separator +// + +// pessimistic but quick assessment, +// assuming that each decimal digit requires 4 bits to store +const int max_int_decimal_digits = (sizeof(unsigned int) * 8) / 4; + +double xstrtod(const char *str, char **endptr, char decimal, char sci, + char tsep, int skip_trailing, int *error, int *maybe_int) { + double number; + unsigned int i_number = 0; + int exponent; + int negative; + char *p = (char *)str; + double p10; + int n; + int num_digits; + int num_decimals; + + if (maybe_int != NULL) *maybe_int = 1; + // Skip leading whitespace. + while (isspace_ascii(*p)) p++; + + // Handle optional sign. + negative = 0; + switch (*p) { + case '-': + negative = 1; // Fall through to increment position. + case '+': + p++; + } + + exponent = 0; + num_digits = 0; + num_decimals = 0; + + // Process string of digits. + while (isdigit_ascii(*p) && num_digits <= max_int_decimal_digits) { + i_number = i_number * 10 + (*p - '0'); + p++; + num_digits++; + + p += (tsep != '\0' && *p == tsep); + } + number = i_number; + + if (num_digits > max_int_decimal_digits) { + // process what's left as double + while (isdigit_ascii(*p)) { + number = number * 10. + (*p - '0'); + p++; + num_digits++; + + p += (tsep != '\0' && *p == tsep); + } + } + + // Process decimal part. + if (*p == decimal) { + if (maybe_int != NULL) *maybe_int = 0; + p++; + + while (isdigit_ascii(*p)) { + number = number * 10. + (*p - '0'); + p++; + num_digits++; + num_decimals++; + } + + exponent -= num_decimals; + } + + if (num_digits == 0) { + *error = ERANGE; + return 0.0; + } + + // Correct for sign. + if (negative) number = -number; + + // Process an exponent string. + if (toupper_ascii(*p) == toupper_ascii(sci)) { + if (maybe_int != NULL) *maybe_int = 0; + + // Handle optional sign. + negative = 0; + switch (*++p) { + case '-': + negative = 1; // Fall through to increment pos. + case '+': + p++; + } + + // Process string of digits. + num_digits = 0; + n = 0; + while (isdigit_ascii(*p)) { + n = n * 10 + (*p - '0'); + num_digits++; + p++; + } + + if (negative) + exponent -= n; + else + exponent += n; + + // If no digits, after the 'e'/'E', un-consume it + if (num_digits == 0) p--; + } + + if (exponent < DBL_MIN_EXP || exponent > DBL_MAX_EXP) { + *error = ERANGE; + return HUGE_VAL; + } + + // Scale the result. + p10 = 10.; + n = exponent; + if (n < 0) n = -n; + while (n) { + if (n & 1) { + if (exponent < 0) + number /= p10; + else + number *= p10; + } + n >>= 1; + p10 *= p10; + } + + if (number == HUGE_VAL) { + *error = ERANGE; + } + + if (skip_trailing) { + // Skip trailing whitespace. + while (isspace_ascii(*p)) p++; + } + + if (endptr) *endptr = p; + return number; +} + +double precise_xstrtod(const char *str, char **endptr, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) { + double number; + int exponent; + int negative; + char *p = (char *)str; + int num_digits; + int num_decimals; + int max_digits = 17; + int n; + + if (maybe_int != NULL) *maybe_int = 1; + // Cache powers of 10 in memory. + static double e[] = { + 1., 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, + 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, + 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, + 1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, + 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, + 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79, + 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, 1e88, 1e89, + 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, 1e97, 1e98, 1e99, + 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109, + 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119, + 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, + 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, + 1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, + 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, + 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, 1e169, + 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179, + 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189, + 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, 1e196, 1e197, 1e198, 1e199, + 1e200, 1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207, 1e208, 1e209, + 1e210, 1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, + 1e220, 1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, + 1e230, 1e231, 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, + 1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, + 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259, + 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269, + 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279, + 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, 1e286, 1e287, 1e288, 1e289, + 1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, 1e299, + 1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308}; + + // Skip leading whitespace. + while (isspace_ascii(*p)) p++; + + // Handle optional sign. + negative = 0; + switch (*p) { + case '-': + negative = 1; // Fall through to increment position. + case '+': + p++; + } + + number = 0.; + exponent = 0; + num_digits = 0; + num_decimals = 0; + + // Process string of digits. + while (isdigit_ascii(*p)) { + if (num_digits < max_digits) { + number = number * 10. + (*p - '0'); + num_digits++; + } else { + ++exponent; + } + + p++; + p += (tsep != '\0' && *p == tsep); + } + + // Process decimal part + if (*p == decimal) { + if (maybe_int != NULL) *maybe_int = 0; + p++; + + while (num_digits < max_digits && isdigit_ascii(*p)) { + number = number * 10. + (*p - '0'); + p++; + num_digits++; + num_decimals++; + } + + if (num_digits >= max_digits) // Consume extra decimal digits. + while (isdigit_ascii(*p)) ++p; + + exponent -= num_decimals; + } + + if (num_digits == 0) { + *error = ERANGE; + return 0.0; + } + + // Correct for sign. + if (negative) number = -number; + + // Process an exponent string. + if (toupper_ascii(*p) == toupper_ascii(sci)) { + if (maybe_int != NULL) *maybe_int = 0; + + // Handle optional sign + negative = 0; + switch (*++p) { + case '-': + negative = 1; // Fall through to increment pos. + case '+': + p++; + } + + // Process string of digits. + num_digits = 0; + n = 0; + while (num_digits < max_digits && isdigit_ascii(*p)) { + n = n * 10 + (*p - '0'); + num_digits++; + p++; + } + + if (negative) + exponent -= n; + else + exponent += n; + + // If no digits after the 'e'/'E', un-consume it. + if (num_digits == 0) p--; + } + + if (exponent > 308) { + *error = ERANGE; + return HUGE_VAL; + } else if (exponent > 0) { + number *= e[exponent]; + } else if (exponent < -308) { // Subnormal + if (exponent < -616) { // Prevent invalid array access. + number = 0.; + } else { + number /= e[-308 - exponent]; + number /= e[308]; + } + + } else { + number /= e[-exponent]; + } + + if (number == HUGE_VAL || number == -HUGE_VAL) *error = ERANGE; + + if (skip_trailing) { + // Skip trailing whitespace. + while (isspace_ascii(*p)) p++; + } + + if (endptr) *endptr = p; + return number; +} + +/* copy a decimal number string with `decimal`, `tsep` as decimal point + and thousands separator to an equivalent c-locale decimal string (striping + `tsep`, replacing `decimal` with '.'). The returned memory should be free-d + with a call to `free`. +*/ + +char* _str_copy_decimal_str_c(const char *s, char **endpos, char decimal, + char tsep) { + const char *p = s; + size_t length = strlen(s); + char *s_copy = malloc(length + 1); + char *dst = s_copy; + // Skip leading whitespace. + while (isspace_ascii(*p)) p++; + // Copy Leading sign + if (*p == '+' || *p == '-') { + *dst++ = *p++; + } + // Copy integer part dropping `tsep` + while (isdigit_ascii(*p)) { + *dst++ = *p++; + p += (tsep != '\0' && *p == tsep); + } + // Replace `decimal` with '.' + if (*p == decimal) { + *dst++ = '.'; + p++; + } + // Copy fractional part after decimal (if any) + while (isdigit_ascii(*p)) { + *dst++ = *p++; + } + // Copy exponent if any + if (toupper_ascii(*p) == toupper_ascii('E')) { + *dst++ = *p++; + // Copy leading exponent sign (if any) + if (*p == '+' || *p == '-') { + *dst++ = *p++; + } + // Copy exponent digits + while (isdigit_ascii(*p)) { + *dst++ = *p++; + } + } + *dst++ = '\0'; // terminate + if (endpos != NULL) + *endpos = (char *)p; + return s_copy; +} + + +double round_trip(const char *p, char **q, char decimal, char sci, char tsep, + int skip_trailing, int *error, int *maybe_int) { + // 'normalize' representation to C-locale; replace decimal with '.' and + // remove t(housand)sep. + char *endptr; + char *pc = _str_copy_decimal_str_c(p, &endptr, decimal, tsep); + // This is called from a nogil block in parsers.pyx + // so need to explicitly get GIL before Python calls + PyGILState_STATE gstate; + gstate = PyGILState_Ensure(); + char *endpc; + double r = PyOS_string_to_double(pc, &endpc, 0); + // PyOS_string_to_double needs to consume the whole string + if (endpc == pc + strlen(pc)) { + if (q != NULL) { + // report endptr from source string (p) + *q = endptr; + } + } else { + *error = -1; + if (q != NULL) { + // p and pc are different len due to tsep removal. Can't report + // how much it has consumed of p. Just rewind to beginning. + *q = (char *)p; // TODO(willayd): this could be undefined behavior + } + } + if (maybe_int != NULL) *maybe_int = 0; + if (PyErr_Occurred() != NULL) *error = -1; + else if (r == Py_HUGE_VAL) *error = (int)Py_HUGE_VAL; + PyErr_Clear(); + + PyGILState_Release(gstate); + free(pc); + if (skip_trailing && q != NULL && *q != p) { + while (isspace_ascii(**q)) { + (*q)++; + } + } + return r; +} + +// End of xstrtod code +// --------------------------------------------------------------------------- + +void uint_state_init(uint_state *self) { + self->seen_sint = 0; + self->seen_uint = 0; + self->seen_null = 0; +} + +int uint64_conflict(uint_state *self) { + return self->seen_uint && (self->seen_sint || self->seen_null); +} + +int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, + int *error, char tsep) { + const char *p = p_item; + int isneg = 0; + int64_t number = 0; + int d; + + // Skip leading spaces. + while (isspace_ascii(*p)) { + ++p; + } + + // Handle sign. + if (*p == '-') { + isneg = 1; + ++p; + } else if (*p == '+') { + p++; + } + + // Check that there is a first digit. + if (!isdigit_ascii(*p)) { + // Error... + *error = ERROR_NO_DIGITS; + return 0; + } + + if (isneg) { + // If number is greater than pre_min, at least one more digit + // can be processed without overflowing. + int dig_pre_min = -(int_min % 10); + int64_t pre_min = int_min / 10; + + // Process the digits. + d = *p; + if (tsep != '\0') { + while (1) { + if (d == tsep) { + d = *++p; + continue; + } else if (!isdigit_ascii(d)) { + break; + } + if ((number > pre_min) || + ((number == pre_min) && (d - '0' <= dig_pre_min))) { + number = number * 10 - (d - '0'); + d = *++p; + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } else { + while (isdigit_ascii(d)) { + if ((number > pre_min) || + ((number == pre_min) && (d - '0' <= dig_pre_min))) { + number = number * 10 - (d - '0'); + d = *++p; + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } + } else { + // If number is less than pre_max, at least one more digit + // can be processed without overflowing. + int64_t pre_max = int_max / 10; + int dig_pre_max = int_max % 10; + + // Process the digits. + d = *p; + if (tsep != '\0') { + while (1) { + if (d == tsep) { + d = *++p; + continue; + } else if (!isdigit_ascii(d)) { + break; + } + if ((number < pre_max) || + ((number == pre_max) && (d - '0' <= dig_pre_max))) { + number = number * 10 + (d - '0'); + d = *++p; + + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } else { + while (isdigit_ascii(d)) { + if ((number < pre_max) || + ((number == pre_max) && (d - '0' <= dig_pre_max))) { + number = number * 10 + (d - '0'); + d = *++p; + + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } + } + + // Skip trailing spaces. + while (isspace_ascii(*p)) { + ++p; + } + + // Did we use up all the characters? + if (*p) { + *error = ERROR_INVALID_CHARS; + return 0; + } + + *error = 0; + return number; +} + +uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max, + uint64_t uint_max, int *error, char tsep) { + const char *p = p_item; + uint64_t pre_max = uint_max / 10; + int dig_pre_max = uint_max % 10; + uint64_t number = 0; + int d; + + // Skip leading spaces. + while (isspace_ascii(*p)) { + ++p; + } + + // Handle sign. + if (*p == '-') { + state->seen_sint = 1; + *error = 0; + return 0; + } else if (*p == '+') { + p++; + } + + // Check that there is a first digit. + if (!isdigit_ascii(*p)) { + // Error... + *error = ERROR_NO_DIGITS; + return 0; + } + + // If number is less than pre_max, at least one more digit + // can be processed without overflowing. + // + // Process the digits. + d = *p; + if (tsep != '\0') { + while (1) { + if (d == tsep) { + d = *++p; + continue; + } else if (!isdigit_ascii(d)) { + break; + } + if ((number < pre_max) || + ((number == pre_max) && (d - '0' <= dig_pre_max))) { + number = number * 10 + (d - '0'); + d = *++p; + + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } else { + while (isdigit_ascii(d)) { + if ((number < pre_max) || + ((number == pre_max) && (d - '0' <= dig_pre_max))) { + number = number * 10 + (d - '0'); + d = *++p; + + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } + + // Skip trailing spaces. + while (isspace_ascii(*p)) { + ++p; + } + + // Did we use up all the characters? + if (*p) { + *error = ERROR_INVALID_CHARS; + return 0; + } + + if (number > (uint64_t)int_max) { + state->seen_uint = 1; + } + + *error = 0; + return number; +} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/parser/tokenizer.h b/.local/lib/python3.8/site-packages/pandas/_libs/src/parser/tokenizer.h new file mode 100644 index 0000000..d403435 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/parser/tokenizer.h @@ -0,0 +1,236 @@ +/* + +Copyright (c) 2012, Lambda Foundry, Inc., except where noted + +Incorporates components of WarrenWeckesser/textreader, licensed under 3-clause +BSD + +See LICENSE for the license + +*/ + +#ifndef PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ +#define PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ + +#define PY_SSIZE_T_CLEAN +#include + +#define ERROR_NO_DIGITS 1 +#define ERROR_OVERFLOW 2 +#define ERROR_INVALID_CHARS 3 + +#include "../headers/stdint.h" +#include "../inline_helper.h" +#include "../headers/portable.h" + +#include "khash.h" + +#define STREAM_INIT_SIZE 32 + +#define REACHED_EOF 1 +#define CALLING_READ_FAILED 2 + + +/* + + C flat file parsing low level code for pandas / NumPy + + */ + +/* + * Common set of error types for the read_rows() and tokenize() + * functions. + */ + +// #define VERBOSE +#if defined(VERBOSE) +#define TRACE(X) printf X; +#else +#define TRACE(X) +#endif // VERBOSE + +#define PARSER_OUT_OF_MEMORY -1 + +/* + * TODO: Might want to couple count_rows() with read_rows() to avoid + * duplication of some file I/O. + */ + +typedef enum { + START_RECORD, + START_FIELD, + ESCAPED_CHAR, + IN_FIELD, + IN_QUOTED_FIELD, + ESCAPE_IN_QUOTED_FIELD, + QUOTE_IN_QUOTED_FIELD, + EAT_CRNL, + EAT_CRNL_NOP, + EAT_WHITESPACE, + EAT_COMMENT, + EAT_LINE_COMMENT, + WHITESPACE_LINE, + START_FIELD_IN_SKIP_LINE, + IN_FIELD_IN_SKIP_LINE, + IN_QUOTED_FIELD_IN_SKIP_LINE, + QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE, + FINISHED +} ParserState; + +typedef enum { + QUOTE_MINIMAL, + QUOTE_ALL, + QUOTE_NONNUMERIC, + QUOTE_NONE +} QuoteStyle; + +typedef enum { + ERROR, + WARN, + SKIP +} BadLineHandleMethod; + +typedef void *(*io_callback)(void *src, size_t nbytes, size_t *bytes_read, + int *status, const char *encoding_errors); +typedef int (*io_cleanup)(void *src); + +typedef struct parser_t { + void *source; + io_callback cb_io; + io_cleanup cb_cleanup; + + int64_t chunksize; // Number of bytes to prepare for each chunk + char *data; // pointer to data to be processed + int64_t datalen; // amount of data available + int64_t datapos; + + // where to write out tokenized data + char *stream; + uint64_t stream_len; + uint64_t stream_cap; + + // Store words in (potentially ragged) matrix for now, hmm + char **words; + int64_t *word_starts; // where we are in the stream + uint64_t words_len; + uint64_t words_cap; + uint64_t max_words_cap; // maximum word cap encountered + + char *pword_start; // pointer to stream start of current field + int64_t word_start; // position start of current field + + int64_t *line_start; // position in words for start of line + int64_t *line_fields; // Number of fields in each line + uint64_t lines; // Number of (good) lines observed + uint64_t file_lines; // Number of lines (including bad or skipped) + uint64_t lines_cap; // Vector capacity + + // Tokenizing stuff + ParserState state; + int doublequote; /* is " represented by ""? */ + char delimiter; /* field separator */ + int delim_whitespace; /* delimit by consuming space/tabs instead */ + char quotechar; /* quote character */ + char escapechar; /* escape character */ + char lineterminator; + int skipinitialspace; /* ignore spaces following delimiter? */ + int quoting; /* style of quoting to write */ + + char commentchar; + int allow_embedded_newline; + + int usecols; // Boolean: 1: usecols provided, 0: none provided + + Py_ssize_t expected_fields; + BadLineHandleMethod on_bad_lines; + + // floating point options + char decimal; + char sci; + + // thousands separator (comma, period) + char thousands; + + int header; // Boolean: 1: has header, 0: no header + int64_t header_start; // header row start + uint64_t header_end; // header row end + + void *skipset; + PyObject *skipfunc; + int64_t skip_first_N_rows; + int64_t skip_footer; + double (*double_converter)(const char *, char **, + char, char, char, int, int *, int *); + + // error handling + char *warn_msg; + char *error_msg; + + int skip_empty_lines; +} parser_t; + +typedef struct coliter_t { + char **words; + int64_t *line_start; + int64_t col; +} coliter_t; + +void coliter_setup(coliter_t *self, parser_t *parser, int64_t i, int64_t start); + +#define COLITER_NEXT(iter, word) \ + do { \ + const int64_t i = *iter.line_start++ + iter.col; \ + word = i >= *iter.line_start ? "" : iter.words[i]; \ + } while (0) + +parser_t *parser_new(void); + +int parser_init(parser_t *self); + +int parser_consume_rows(parser_t *self, size_t nrows); + +int parser_trim_buffers(parser_t *self); + +int parser_add_skiprow(parser_t *self, int64_t row); + +int parser_set_skipfirstnrows(parser_t *self, int64_t nrows); + +void parser_free(parser_t *self); + +void parser_del(parser_t *self); + +void parser_set_default_options(parser_t *self); + +int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors); + +int tokenize_all_rows(parser_t *self, const char *encoding_errors); + +// Have parsed / type-converted a chunk of data +// and want to free memory from the token stream + +typedef struct uint_state { + int seen_sint; + int seen_uint; + int seen_null; +} uint_state; + +void uint_state_init(uint_state *self); + +int uint64_conflict(uint_state *self); + +uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max, + uint64_t uint_max, int *error, char tsep); +int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, + int *error, char tsep); +double xstrtod(const char *p, char **q, char decimal, char sci, char tsep, + int skip_trailing, int *error, int *maybe_int); +double precise_xstrtod(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int); + +// GH-15140 - round_trip requires and acquires the GIL on its own +double round_trip(const char *p, char **q, char decimal, char sci, char tsep, + int skip_trailing, int *error, int *maybe_int); +int to_boolean(const char *item, uint8_t *val); + +#endif // PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/skiplist.h b/.local/lib/python3.8/site-packages/pandas/_libs/src/skiplist.h new file mode 100644 index 0000000..5d0b144 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/skiplist.h @@ -0,0 +1,300 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Flexibly-sized, index-able skiplist data structure for maintaining a sorted +list of values + +Port of Wes McKinney's Cython version of Raymond Hettinger's original pure +Python recipe (https://rhettinger.wordpress.com/2010/02/06/lost-knowledge/) +*/ + +#ifndef PANDAS__LIBS_SRC_SKIPLIST_H_ +#define PANDAS__LIBS_SRC_SKIPLIST_H_ + +#include +#include +#include +#include +#include "inline_helper.h" + +PANDAS_INLINE float __skiplist_nanf(void) { + const union { + int __i; + float __f; + } __bint = {0x7fc00000UL}; + return __bint.__f; +} +#define PANDAS_NAN ((double)__skiplist_nanf()) + +PANDAS_INLINE double Log2(double val) { return log(val) / log(2.); } + +typedef struct node_t node_t; + +struct node_t { + node_t **next; + int *width; + double value; + int is_nil; + int levels; + int ref_count; +}; + +typedef struct { + node_t *head; + node_t **tmp_chain; + int *tmp_steps; + int size; + int maxlevels; +} skiplist_t; + +PANDAS_INLINE double urand(void) { + return ((double)rand() + 1) / ((double)RAND_MAX + 2); +} + +PANDAS_INLINE int int_min(int a, int b) { return a < b ? a : b; } + +PANDAS_INLINE node_t *node_init(double value, int levels) { + node_t *result; + result = (node_t *)malloc(sizeof(node_t)); + if (result) { + result->value = value; + result->levels = levels; + result->is_nil = 0; + result->ref_count = 0; + result->next = (node_t **)malloc(levels * sizeof(node_t *)); + result->width = (int *)malloc(levels * sizeof(int)); + if (!(result->next && result->width) && (levels != 0)) { + free(result->next); + free(result->width); + free(result); + return NULL; + } + } + return result; +} + +// do this ourselves +PANDAS_INLINE void node_incref(node_t *node) { ++(node->ref_count); } + +PANDAS_INLINE void node_decref(node_t *node) { --(node->ref_count); } + +static void node_destroy(node_t *node) { + int i; + if (node) { + if (node->ref_count <= 1) { + for (i = 0; i < node->levels; ++i) { + node_destroy(node->next[i]); + } + free(node->next); + free(node->width); + // printf("Reference count was 1, freeing\n"); + free(node); + } else { + node_decref(node); + } + // pretty sure that freeing the struct above will be enough + } +} + +PANDAS_INLINE void skiplist_destroy(skiplist_t *skp) { + if (skp) { + node_destroy(skp->head); + free(skp->tmp_steps); + free(skp->tmp_chain); + free(skp); + } +} + +PANDAS_INLINE skiplist_t *skiplist_init(int expected_size) { + skiplist_t *result; + node_t *NIL, *head; + int maxlevels, i; + + maxlevels = 1 + Log2((double)expected_size); + result = (skiplist_t *)malloc(sizeof(skiplist_t)); + if (!result) { + return NULL; + } + result->tmp_chain = (node_t **)malloc(maxlevels * sizeof(node_t *)); + result->tmp_steps = (int *)malloc(maxlevels * sizeof(int)); + result->maxlevels = maxlevels; + result->size = 0; + + head = result->head = node_init(PANDAS_NAN, maxlevels); + NIL = node_init(0.0, 0); + + if (!(result->tmp_chain && result->tmp_steps && result->head && NIL)) { + skiplist_destroy(result); + node_destroy(NIL); + return NULL; + } + + node_incref(head); + + NIL->is_nil = 1; + + for (i = 0; i < maxlevels; ++i) { + head->next[i] = NIL; + head->width[i] = 1; + node_incref(NIL); + } + + return result; +} + +// 1 if left < right, 0 if left == right, -1 if left > right +PANDAS_INLINE int _node_cmp(node_t *node, double value) { + if (node->is_nil || node->value > value) { + return -1; + } else if (node->value < value) { + return 1; + } else { + return 0; + } +} + +PANDAS_INLINE double skiplist_get(skiplist_t *skp, int i, int *ret) { + node_t *node; + int level; + + if (i < 0 || i >= skp->size) { + *ret = 0; + return 0; + } + + node = skp->head; + ++i; + for (level = skp->maxlevels - 1; level >= 0; --level) { + while (node->width[level] <= i) { + i -= node->width[level]; + node = node->next[level]; + } + } + + *ret = 1; + return node->value; +} + +// Returns the lowest rank of all elements with value `value`, as opposed to the +// highest rank returned by `skiplist_insert`. +PANDAS_INLINE int skiplist_min_rank(skiplist_t *skp, double value) { + node_t *node; + int level, rank = 0; + + node = skp->head; + for (level = skp->maxlevels - 1; level >= 0; --level) { + while (_node_cmp(node->next[level], value) > 0) { + rank += node->width[level]; + node = node->next[level]; + } + } + + return rank + 1; +} + +// Returns the rank of the inserted element. When there are duplicates, +// `rank` is the highest of the group, i.e. the 'max' method of +// https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rank.html +PANDAS_INLINE int skiplist_insert(skiplist_t *skp, double value) { + node_t *node, *prevnode, *newnode, *next_at_level; + int *steps_at_level; + int size, steps, level, rank = 0; + node_t **chain; + + chain = skp->tmp_chain; + + steps_at_level = skp->tmp_steps; + memset(steps_at_level, 0, skp->maxlevels * sizeof(int)); + + node = skp->head; + + for (level = skp->maxlevels - 1; level >= 0; --level) { + next_at_level = node->next[level]; + while (_node_cmp(next_at_level, value) >= 0) { + steps_at_level[level] += node->width[level]; + rank += node->width[level]; + node = next_at_level; + next_at_level = node->next[level]; + } + chain[level] = node; + } + + size = int_min(skp->maxlevels, 1 - ((int)Log2(urand()))); + + newnode = node_init(value, size); + if (!newnode) { + return -1; + } + steps = 0; + + for (level = 0; level < size; ++level) { + prevnode = chain[level]; + newnode->next[level] = prevnode->next[level]; + + prevnode->next[level] = newnode; + node_incref(newnode); // increment the reference count + + newnode->width[level] = prevnode->width[level] - steps; + prevnode->width[level] = steps + 1; + + steps += steps_at_level[level]; + } + + for (level = size; level < skp->maxlevels; ++level) { + chain[level]->width[level] += 1; + } + + ++(skp->size); + + return rank + 1; +} + +PANDAS_INLINE int skiplist_remove(skiplist_t *skp, double value) { + int level, size; + node_t *node, *prevnode, *tmpnode, *next_at_level; + node_t **chain; + + chain = skp->tmp_chain; + node = skp->head; + + for (level = skp->maxlevels - 1; level >= 0; --level) { + next_at_level = node->next[level]; + while (_node_cmp(next_at_level, value) > 0) { + node = next_at_level; + next_at_level = node->next[level]; + } + chain[level] = node; + } + + if (value != chain[0]->next[0]->value) { + return 0; + } + + size = chain[0]->next[0]->levels; + + for (level = 0; level < size; ++level) { + prevnode = chain[level]; + + tmpnode = prevnode->next[level]; + + prevnode->width[level] += tmpnode->width[level] - 1; + prevnode->next[level] = tmpnode->next[level]; + + tmpnode->next[level] = NULL; + node_destroy(tmpnode); // decrement refcount or free + } + + for (level = size; level < skp->maxlevels; ++level) { + --(chain[level]->width[level]); + } + + --(skp->size); + return 1; +} + +#endif // PANDAS__LIBS_SRC_SKIPLIST_H_ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/lib/ultrajson.h b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/lib/ultrajson.h new file mode 100644 index 0000000..5b5995a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/lib/ultrajson.h @@ -0,0 +1,316 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +/* +Ultra fast JSON encoder and decoder +Developed by Jonas Tarnstrom (jonas@esn.me). + +Encoder notes: +------------------ + +:: Cyclic references :: +Cyclic referenced objects are not detected. +Set JSONObjectEncoder.recursionMax to suitable value or make sure input object +tree doesn't have cyclic references. + +*/ + +#ifndef PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ +#define PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ + +#include +#include + +// Don't output any extra whitespaces when encoding +#define JSON_NO_EXTRA_WHITESPACE + +// Max decimals to encode double floating point numbers with +#ifndef JSON_DOUBLE_MAX_DECIMALS +#define JSON_DOUBLE_MAX_DECIMALS 15 +#endif + +// Max recursion depth, default for encoder +#ifndef JSON_MAX_RECURSION_DEPTH +#define JSON_MAX_RECURSION_DEPTH 1024 +#endif + +// Max recursion depth, default for decoder +#ifndef JSON_MAX_OBJECT_DEPTH +#define JSON_MAX_OBJECT_DEPTH 1024 +#endif + +/* +Dictates and limits how much stack space for buffers UltraJSON will use before resorting to provided heap functions */ +#ifndef JSON_MAX_STACK_BUFFER_SIZE +#define JSON_MAX_STACK_BUFFER_SIZE 131072 +#endif + +#ifdef _WIN32 + +typedef __int64 JSINT64; +typedef unsigned __int64 JSUINT64; + +typedef __int32 JSINT32; +typedef unsigned __int32 JSUINT32; +typedef unsigned __int8 JSUINT8; +typedef unsigned __int16 JSUTF16; +typedef unsigned __int32 JSUTF32; +typedef __int64 JSLONG; + +#define EXPORTFUNCTION __declspec(dllexport) + +#define FASTCALL_MSVC __fastcall + +#define INLINE_PREFIX static __inline + +#else + +#include +typedef int64_t JSINT64; +typedef uint64_t JSUINT64; + +typedef int32_t JSINT32; +typedef uint32_t JSUINT32; + +#define FASTCALL_MSVC + +#define INLINE_PREFIX static inline + +typedef uint8_t JSUINT8; +typedef uint16_t JSUTF16; +typedef uint32_t JSUTF32; + +typedef int64_t JSLONG; + +#define EXPORTFUNCTION +#endif + +#if !(defined(__LITTLE_ENDIAN__) || defined(__BIG_ENDIAN__)) + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define __LITTLE_ENDIAN__ +#else + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define __BIG_ENDIAN__ +#endif + +#endif + +#endif + +#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) +#error "Endianness not supported" +#endif + +enum JSTYPES { + JT_NULL, // NULL + JT_TRUE, // boolean true + JT_FALSE, // boolean false + JT_INT, // (JSINT32 (signed 32-bit)) + JT_LONG, // (JSINT64 (signed 64-bit)) + JT_DOUBLE, // (double) + JT_BIGNUM, // integer larger than sys.maxsize + JT_UTF8, // (char 8-bit) + JT_ARRAY, // Array structure + JT_OBJECT, // Key/Value structure + JT_INVALID, // Internal, do not return nor expect + JT_POS_INF, // Positive infinity + JT_NEG_INF, // Negative infinity +}; + +typedef void * JSOBJ; +typedef void * JSITER; + +typedef struct __JSONTypeContext { + int type; + void *encoder; + void *prv; +} JSONTypeContext; + +/* +Function pointer declarations, suitable for implementing UltraJSON */ +typedef void (*JSPFN_ITERBEGIN)(JSOBJ obj, JSONTypeContext *tc); +typedef int (*JSPFN_ITERNEXT)(JSOBJ obj, JSONTypeContext *tc); +typedef void (*JSPFN_ITEREND)(JSOBJ obj, JSONTypeContext *tc); +typedef JSOBJ (*JSPFN_ITERGETVALUE)(JSOBJ obj, JSONTypeContext *tc); +typedef char *(*JSPFN_ITERGETNAME)(JSOBJ obj, JSONTypeContext *tc, + size_t *outLen); +typedef void *(*JSPFN_MALLOC)(size_t size); +typedef void (*JSPFN_FREE)(void *pptr); +typedef void *(*JSPFN_REALLOC)(void *base, size_t size); + +typedef struct __JSONObjectEncoder { + void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc); + void (*endTypeContext)(JSOBJ obj, JSONTypeContext *tc); + const char *(*getStringValue)(JSOBJ obj, JSONTypeContext *tc, + size_t *_outLen); + JSINT64 (*getLongValue)(JSOBJ obj, JSONTypeContext *tc); + JSINT32 (*getIntValue)(JSOBJ obj, JSONTypeContext *tc); + double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc); + const char *(*getBigNumStringValue)(JSOBJ obj, JSONTypeContext *tc, + size_t *_outLen); + + /* + Begin iteration of an iteratable object (JS_ARRAY or JS_OBJECT) + Implementor should setup iteration state in ti->prv + */ + JSPFN_ITERBEGIN iterBegin; + + /* + Retrieve next object in an iteration. Should return 0 to indicate iteration has reached end or 1 if there are more items. + Implementor is responsible for keeping state of the iteration. Use ti->prv fields for this + */ + JSPFN_ITERNEXT iterNext; + + /* + Ends the iteration of an iteratable object. + Any iteration state stored in ti->prv can be freed here + */ + JSPFN_ITEREND iterEnd; + + /* + Returns a reference to the value object of an iterator + The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object + */ + JSPFN_ITERGETVALUE iterGetValue; + + /* + Return name of iterator. + The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object + */ + JSPFN_ITERGETNAME iterGetName; + + /* + Release a value as indicated by setting ti->release = 1 in the previous getValue call. + The ti->prv array should contain the necessary context to release the value + */ + void (*releaseObject)(JSOBJ obj); + + /* Library functions + Set to NULL to use STDLIB malloc,realloc,free */ + JSPFN_MALLOC malloc; + JSPFN_REALLOC realloc; + JSPFN_FREE free; + + /* + Configuration for max recursion, set to 0 to use default (see JSON_MAX_RECURSION_DEPTH)*/ + int recursionMax; + + /* + Configuration for max decimals of double floating point numbers to encode (0-9) */ + int doublePrecision; + + /* + If true output will be ASCII with all characters above 127 encoded as \uXXXX. If false output will be UTF-8 or what ever charset strings are brought as */ + int forceASCII; + + /* + If true, '<', '>', and '&' characters will be encoded as \u003c, \u003e, and \u0026, respectively. If false, no special encoding will be used. */ + int encodeHTMLChars; + + /* + Configuration for spaces of indent */ + int indent; + + /* + Set to an error message if error occurred */ + const char *errorMsg; + JSOBJ errorObj; + + /* Buffer stuff */ + char *start; + char *offset; + char *end; + int heap; + int level; +} JSONObjectEncoder; + +/* +Encode an object structure into JSON. + +Arguments: +obj - An anonymous type representing the object +enc - Function definitions for querying JSOBJ type +buffer - Preallocated buffer to store result in. If NULL function allocates own buffer +cbBuffer - Length of buffer (ignored if buffer is NULL) + +Returns: +Encoded JSON object as a null terminated char string. + +NOTE: +If the supplied buffer wasn't enough to hold the result the function will allocate a new buffer. +Life cycle of the provided buffer must still be handled by caller. + +If the return value doesn't equal the specified buffer caller must release the memory using +JSONObjectEncoder.free or free() as specified when calling this function. +*/ +EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, + char *buffer, size_t cbBuffer); + +typedef struct __JSONObjectDecoder { + JSOBJ (*newString)(void *prv, wchar_t *start, wchar_t *end); + int (*objectAddKey)(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value); + int (*arrayAddItem)(void *prv, JSOBJ obj, JSOBJ value); + JSOBJ (*newTrue)(void *prv); + JSOBJ (*newFalse)(void *prv); + JSOBJ (*newNull)(void *prv); + JSOBJ (*newPosInf)(void *prv); + JSOBJ (*newNegInf)(void *prv); + JSOBJ (*newObject)(void *prv, void *decoder); + JSOBJ (*endObject)(void *prv, JSOBJ obj); + JSOBJ (*newArray)(void *prv, void *decoder); + JSOBJ (*endArray)(void *prv, JSOBJ obj); + JSOBJ (*newInt)(void *prv, JSINT32 value); + JSOBJ (*newLong)(void *prv, JSINT64 value); + JSOBJ (*newUnsignedLong)(void *prv, JSUINT64 value); + JSOBJ (*newDouble)(void *prv, double value); + void (*releaseObject)(void *prv, JSOBJ obj, void *decoder); + JSPFN_MALLOC malloc; + JSPFN_FREE free; + JSPFN_REALLOC realloc; + char *errorStr; + char *errorOffset; + int preciseFloat; + void *prv; +} JSONObjectDecoder; + +EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, + const char *buffer, size_t cbBuffer); +EXPORTFUNCTION void encode(JSOBJ, JSONObjectEncoder *, const char *, size_t); + +#endif // PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/lib/ultrajsondec.c b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/lib/ultrajsondec.c new file mode 100644 index 0000000..fee5526 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -0,0 +1,1202 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE +LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights +reserved. + +Numeric decoder derived from from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "ultrajson.h" + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif +#ifndef NULL +#define NULL 0 +#endif + +struct DecoderState { + char *start; + char *end; + wchar_t *escStart; + wchar_t *escEnd; + int escHeap; + int lastType; + JSUINT32 objDepth; + void *prv; + JSONObjectDecoder *dec; +}; + +JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds); +typedef JSOBJ (*PFN_DECODER)(struct DecoderState *ds); + +static JSOBJ SetError(struct DecoderState *ds, int offset, + const char *message) { + ds->dec->errorOffset = ds->start + offset; + ds->dec->errorStr = (char *)message; + return NULL; +} + +double createDouble(double intNeg, double intValue, double frcValue, + int frcDecimalCount) { + static const double g_pow10[] = {1.0, + 0.1, + 0.01, + 0.001, + 0.0001, + 0.00001, + 0.000001, + 0.0000001, + 0.00000001, + 0.000000001, + 0.0000000001, + 0.00000000001, + 0.000000000001, + 0.0000000000001, + 0.00000000000001, + 0.000000000000001}; + return (intValue + (frcValue * g_pow10[frcDecimalCount])) * intNeg; +} + +JSOBJ FASTCALL_MSVC decodePreciseFloat(struct DecoderState *ds) { + char *end; + double value; + errno = 0; + + value = strtod(ds->start, &end); + + if (errno == ERANGE) { + return SetError(ds, -1, "Range error when decoding numeric as double"); + } + + ds->start = end; + return ds->dec->newDouble(ds->prv, value); +} + +JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { + int intNeg = 1; + JSUINT64 intValue; + JSUINT64 prevIntValue; + int chr; + int decimalCount = 0; + double frcValue = 0.0; + double expNeg; + double expValue; + char *offset = ds->start; + + JSUINT64 overflowLimit = LLONG_MAX; + + if (*(offset) == 'I') { + goto DECODE_INF; + } else if (*(offset) == 'N') { + goto DECODE_NAN; + } else if (*(offset) == '-') { + offset++; + intNeg = -1; + overflowLimit = LLONG_MIN; + if (*(offset) == 'I') { + goto DECODE_INF; + } + } + + // Scan integer part + intValue = 0; + + while (1) { + chr = (int)(unsigned char)*(offset); + + switch (chr) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + // PERF: Don't do 64-bit arithmetic here unless we have to + prevIntValue = intValue; + intValue = intValue * 10ULL + (JSLONG) (chr - 48); + + if (intNeg == 1 && prevIntValue > intValue) { + return SetError(ds, -1, "Value is too big!"); + } else if (intNeg == -1 && intValue > overflowLimit) { + return SetError(ds, -1, overflowLimit == LLONG_MAX ? + "Value is too big!" : "Value is too small"); + } + + offset++; + break; + } + case '.': { + offset++; + goto DECODE_FRACTION; + break; + } + case 'e': + case 'E': { + offset++; + goto DECODE_EXPONENT; + break; + } + + default: { + goto BREAK_INT_LOOP; + break; + } + } + } + +BREAK_INT_LOOP: + + ds->lastType = JT_INT; + ds->start = offset; + + if (intNeg == 1 && (intValue & 0x8000000000000000ULL) != 0) + return ds->dec->newUnsignedLong(ds->prv, intValue); + else if ((intValue >> 31)) + return ds->dec->newLong(ds->prv, (JSINT64)(intValue * (JSINT64)intNeg)); + else + return ds->dec->newInt(ds->prv, (JSINT32)(intValue * intNeg)); + +DECODE_FRACTION: + + if (ds->dec->preciseFloat) { + return decodePreciseFloat(ds); + } + + // Scan fraction part + frcValue = 0.0; + for (;;) { + chr = (int)(unsigned char)*(offset); + + switch (chr) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + if (decimalCount < JSON_DOUBLE_MAX_DECIMALS) { + frcValue = frcValue * 10.0 + (double)(chr - 48); + decimalCount++; + } + offset++; + break; + } + case 'e': + case 'E': { + offset++; + goto DECODE_EXPONENT; + break; + } + default: { goto BREAK_FRC_LOOP; } + } + } + +BREAK_FRC_LOOP: + // FIXME: Check for arithmetic overflow here + ds->lastType = JT_DOUBLE; + ds->start = offset; + return ds->dec->newDouble( + ds->prv, + createDouble((double)intNeg, (double)intValue, frcValue, decimalCount)); + +DECODE_EXPONENT: + if (ds->dec->preciseFloat) { + return decodePreciseFloat(ds); + } + + expNeg = 1.0; + + if (*(offset) == '-') { + expNeg = -1.0; + offset++; + } else if (*(offset) == '+') { + expNeg = +1.0; + offset++; + } + + expValue = 0.0; + + for (;;) { + chr = (int)(unsigned char)*(offset); + + switch (chr) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + expValue = expValue * 10.0 + (double)(chr - 48); + offset++; + break; + } + default: { goto BREAK_EXP_LOOP; } + } + } + +DECODE_NAN: + offset++; + if (*(offset++) != 'a') goto SET_NAN_ERROR; + if (*(offset++) != 'N') goto SET_NAN_ERROR; + + ds->lastType = JT_NULL; + ds->start = offset; + return ds->dec->newNull(ds->prv); + +SET_NAN_ERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'NaN'"); + +DECODE_INF: + offset++; + if (*(offset++) != 'n') goto SET_INF_ERROR; + if (*(offset++) != 'f') goto SET_INF_ERROR; + if (*(offset++) != 'i') goto SET_INF_ERROR; + if (*(offset++) != 'n') goto SET_INF_ERROR; + if (*(offset++) != 'i') goto SET_INF_ERROR; + if (*(offset++) != 't') goto SET_INF_ERROR; + if (*(offset++) != 'y') goto SET_INF_ERROR; + + ds->start = offset; + + if (intNeg == 1) { + ds->lastType = JT_POS_INF; + return ds->dec->newPosInf(ds->prv); + } else { + ds->lastType = JT_NEG_INF; + return ds->dec->newNegInf(ds->prv); + } + +SET_INF_ERROR: + if (intNeg == 1) { + const char *msg = "Unexpected character found when decoding 'Infinity'"; + return SetError(ds, -1, msg); + } else { + const char *msg = "Unexpected character found when decoding '-Infinity'"; + return SetError(ds, -1, msg); + } + + +BREAK_EXP_LOOP: + // FIXME: Check for arithmetic overflow here + ds->lastType = JT_DOUBLE; + ds->start = offset; + return ds->dec->newDouble( + ds->prv, + createDouble((double)intNeg, (double)intValue, frcValue, decimalCount) * + pow(10.0, expValue * expNeg)); +} + +JSOBJ FASTCALL_MSVC decode_true(struct DecoderState *ds) { + char *offset = ds->start; + offset++; + + if (*(offset++) != 'r') goto SETERROR; + if (*(offset++) != 'u') goto SETERROR; + if (*(offset++) != 'e') goto SETERROR; + + ds->lastType = JT_TRUE; + ds->start = offset; + return ds->dec->newTrue(ds->prv); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'true'"); +} + +JSOBJ FASTCALL_MSVC decode_false(struct DecoderState *ds) { + char *offset = ds->start; + offset++; + + if (*(offset++) != 'a') goto SETERROR; + if (*(offset++) != 'l') goto SETERROR; + if (*(offset++) != 's') goto SETERROR; + if (*(offset++) != 'e') goto SETERROR; + + ds->lastType = JT_FALSE; + ds->start = offset; + return ds->dec->newFalse(ds->prv); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'false'"); +} + +JSOBJ FASTCALL_MSVC decode_null(struct DecoderState *ds) { + char *offset = ds->start; + offset++; + + if (*(offset++) != 'u') goto SETERROR; + if (*(offset++) != 'l') goto SETERROR; + if (*(offset++) != 'l') goto SETERROR; + + ds->lastType = JT_NULL; + ds->start = offset; + return ds->dec->newNull(ds->prv); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'null'"); +} + +void FASTCALL_MSVC SkipWhitespace(struct DecoderState *ds) { + char *offset; + + for (offset = ds->start; (ds->end - offset) > 0; offset++) { + switch (*offset) { + case ' ': + case '\t': + case '\r': + case '\n': + break; + + default: + ds->start = offset; + return; + } + } + + if (offset == ds->end) { + ds->start = ds->end; + } +} + +enum DECODESTRINGSTATE { + DS_ISNULL = 0x32, + DS_ISQUOTE, + DS_ISESCAPE, + DS_UTFLENERROR, +}; + +static const JSUINT8 g_decoderLookup[256] = { + /* 0x00 */ DS_ISNULL, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x10 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x20 */ 1, + 1, + DS_ISQUOTE, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x30 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x40 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x50 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + DS_ISESCAPE, + 1, + 1, + 1, + /* 0x60 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x70 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x80 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x90 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xa0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xb0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xc0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xd0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xe0 */ 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + /* 0xf0 */ 4, + 4, + 4, + 4, + 4, + 4, + 4, + 4, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, +}; + +JSOBJ FASTCALL_MSVC decode_string(struct DecoderState *ds) { + JSUTF16 sur[2] = {0}; + int iSur = 0; + int index; + wchar_t *escOffset; + wchar_t *escStart; + size_t escLen = (ds->escEnd - ds->escStart); + JSUINT8 *inputOffset; + JSUINT8 oct; + JSUTF32 ucs; + ds->lastType = JT_INVALID; + ds->start++; + + if ((size_t)(ds->end - ds->start) > escLen) { + size_t newSize = (ds->end - ds->start); + + if (ds->escHeap) { + if (newSize > (SIZE_MAX / sizeof(wchar_t))) { + return SetError(ds, -1, "Could not reserve memory block"); + } + escStart = (wchar_t *)ds->dec->realloc(ds->escStart, + newSize * sizeof(wchar_t)); + if (!escStart) { + ds->dec->free(ds->escStart); + return SetError(ds, -1, "Could not reserve memory block"); + } + ds->escStart = escStart; + } else { + wchar_t *oldStart = ds->escStart; + if (newSize > (SIZE_MAX / sizeof(wchar_t))) { + return SetError(ds, -1, "Could not reserve memory block"); + } + ds->escStart = + (wchar_t *)ds->dec->malloc(newSize * sizeof(wchar_t)); + if (!ds->escStart) { + return SetError(ds, -1, "Could not reserve memory block"); + } + ds->escHeap = 1; + memcpy(ds->escStart, oldStart, escLen * sizeof(wchar_t)); + } + + ds->escEnd = ds->escStart + newSize; + } + + escOffset = ds->escStart; + inputOffset = (JSUINT8 *)ds->start; + + for (;;) { + switch (g_decoderLookup[(JSUINT8)(*inputOffset)]) { + case DS_ISNULL: { + return SetError(ds, -1, + "Unmatched ''\"' when when decoding 'string'"); + } + case DS_ISQUOTE: { + ds->lastType = JT_UTF8; + inputOffset++; + ds->start += ((char *)inputOffset - (ds->start)); + return ds->dec->newString(ds->prv, ds->escStart, escOffset); + } + case DS_UTFLENERROR: { + return SetError( + ds, -1, + "Invalid UTF-8 sequence length when decoding 'string'"); + } + case DS_ISESCAPE: + inputOffset++; + switch (*inputOffset) { + case '\\': + *(escOffset++) = L'\\'; + inputOffset++; + continue; + case '\"': + *(escOffset++) = L'\"'; + inputOffset++; + continue; + case '/': + *(escOffset++) = L'/'; + inputOffset++; + continue; + case 'b': + *(escOffset++) = L'\b'; + inputOffset++; + continue; + case 'f': + *(escOffset++) = L'\f'; + inputOffset++; + continue; + case 'n': + *(escOffset++) = L'\n'; + inputOffset++; + continue; + case 'r': + *(escOffset++) = L'\r'; + inputOffset++; + continue; + case 't': + *(escOffset++) = L'\t'; + inputOffset++; + continue; + + case 'u': { + int index; + inputOffset++; + + for (index = 0; index < 4; index++) { + switch (*inputOffset) { + case '\0': + return SetError(ds, -1, + "Unterminated unicode " + "escape sequence when " + "decoding 'string'"); + default: + return SetError(ds, -1, + "Unexpected character in " + "unicode escape sequence " + "when decoding 'string'"); + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + sur[iSur] = (sur[iSur] << 4) + + (JSUTF16)(*inputOffset - '0'); + break; + + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + sur[iSur] = (sur[iSur] << 4) + 10 + + (JSUTF16)(*inputOffset - 'a'); + break; + + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + sur[iSur] = (sur[iSur] << 4) + 10 + + (JSUTF16)(*inputOffset - 'A'); + break; + } + + inputOffset++; + } + + if (iSur == 0) { + if ((sur[iSur] & 0xfc00) == 0xd800) { + // First of a surrogate pair, continue parsing + iSur++; + break; + } + (*escOffset++) = (wchar_t)sur[iSur]; + iSur = 0; + } else { + // Decode pair + if ((sur[1] & 0xfc00) != 0xdc00) { + return SetError(ds, -1, + "Unpaired high surrogate when " + "decoding 'string'"); + } +#if WCHAR_MAX == 0xffff + (*escOffset++) = (wchar_t)sur[0]; + (*escOffset++) = (wchar_t)sur[1]; +#else + (*escOffset++) = + (wchar_t)0x10000 + + (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00)); +#endif + iSur = 0; + } + break; + } + + case '\0': + return SetError(ds, -1, + "Unterminated escape sequence when " + "decoding 'string'"); + default: + return SetError(ds, -1, + "Unrecognized escape sequence when " + "decoding 'string'"); + } + break; + + case 1: { + *(escOffset++) = (wchar_t)(*inputOffset++); + break; + } + + case 2: { + ucs = (*inputOffset++) & 0x1f; + ucs <<= 6; + if (((*inputOffset) & 0x80) != 0x80) { + return SetError(ds, -1, + "Invalid octet in UTF-8 sequence when " + "decoding 'string'"); + } + ucs |= (*inputOffset++) & 0x3f; + if (ucs < 0x80) + return SetError(ds, -1, + "Overlong 2 byte UTF-8 sequence detected " + "when decoding 'string'"); + *(escOffset++) = (wchar_t)ucs; + break; + } + + case 3: { + JSUTF32 ucs = 0; + ucs |= (*inputOffset++) & 0x0f; + + for (index = 0; index < 2; index++) { + ucs <<= 6; + oct = (*inputOffset++); + + if ((oct & 0x80) != 0x80) { + return SetError(ds, -1, + "Invalid octet in UTF-8 sequence when " + "decoding 'string'"); + } + + ucs |= oct & 0x3f; + } + + if (ucs < 0x800) + return SetError(ds, -1, + "Overlong 3 byte UTF-8 sequence detected " + "when encoding string"); + *(escOffset++) = (wchar_t)ucs; + break; + } + + case 4: { + JSUTF32 ucs = 0; + ucs |= (*inputOffset++) & 0x07; + + for (index = 0; index < 3; index++) { + ucs <<= 6; + oct = (*inputOffset++); + + if ((oct & 0x80) != 0x80) { + return SetError(ds, -1, + "Invalid octet in UTF-8 sequence when " + "decoding 'string'"); + } + + ucs |= oct & 0x3f; + } + + if (ucs < 0x10000) + return SetError(ds, -1, + "Overlong 4 byte UTF-8 sequence detected " + "when decoding 'string'"); + +#if WCHAR_MAX == 0xffff + if (ucs >= 0x10000) { + ucs -= 0x10000; + *(escOffset++) = (wchar_t)(ucs >> 10) + 0xd800; + *(escOffset++) = (wchar_t)(ucs & 0x3ff) + 0xdc00; + } else { + *(escOffset++) = (wchar_t)ucs; + } +#else + *(escOffset++) = (wchar_t)ucs; +#endif + break; + } + } + } +} + +JSOBJ FASTCALL_MSVC decode_array(struct DecoderState *ds) { + JSOBJ itemValue; + JSOBJ newObj; + int len; + ds->objDepth++; + if (ds->objDepth > JSON_MAX_OBJECT_DEPTH) { + return SetError(ds, -1, "Reached object decoding depth limit"); + } + + newObj = ds->dec->newArray(ds->prv, ds->dec); + len = 0; + + ds->lastType = JT_INVALID; + ds->start++; + + for (;;) { + SkipWhitespace(ds); + + if ((*ds->start) == ']') { + ds->objDepth--; + if (len == 0) { + ds->start++; + return ds->dec->endArray(ds->prv, newObj); + } + + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError( + ds, -1, + "Unexpected character found when decoding array value (1)"); + } + + itemValue = decode_any(ds); + + if (itemValue == NULL) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } + + if (!ds->dec->arrayAddItem(ds->prv, newObj, itemValue)) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } + + SkipWhitespace(ds); + + switch (*(ds->start++)) { + case ']': { + ds->objDepth--; + return ds->dec->endArray(ds->prv, newObj); + } + case ',': + break; + + default: + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError( + ds, -1, + "Unexpected character found when decoding array value (2)"); + } + + len++; + } +} + +JSOBJ FASTCALL_MSVC decode_object(struct DecoderState *ds) { + JSOBJ itemName; + JSOBJ itemValue; + JSOBJ newObj; + + ds->objDepth++; + if (ds->objDepth > JSON_MAX_OBJECT_DEPTH) { + return SetError(ds, -1, "Reached object decoding depth limit"); + } + + newObj = ds->dec->newObject(ds->prv, ds->dec); + + ds->start++; + + for (;;) { + SkipWhitespace(ds); + + if ((*ds->start) == '}') { + ds->objDepth--; + ds->start++; + return ds->dec->endObject(ds->prv, newObj); + } + + ds->lastType = JT_INVALID; + itemName = decode_any(ds); + + if (itemName == NULL) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } + + if (ds->lastType != JT_UTF8) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return SetError( + ds, -1, + "Key name of object must be 'string' when decoding 'object'"); + } + + SkipWhitespace(ds); + + if (*(ds->start++) != ':') { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return SetError(ds, -1, "No ':' found when decoding object value"); + } + + SkipWhitespace(ds); + + itemValue = decode_any(ds); + + if (itemValue == NULL) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return NULL; + } + + if (!ds->dec->objectAddKey(ds->prv, newObj, itemName, itemValue)) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + ds->dec->releaseObject(ds->prv, itemValue, ds->dec); + return NULL; + } + + SkipWhitespace(ds); + + switch (*(ds->start++)) { + case '}': { + ds->objDepth--; + return ds->dec->endObject(ds->prv, newObj); + } + case ',': + break; + + default: + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError( + ds, -1, + "Unexpected character found when decoding object value"); + } + } +} + +JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds) { + for (;;) { + switch (*ds->start) { + case '\"': + return decode_string(ds); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'I': + case 'N': + case '-': + return decode_numeric(ds); + + case '[': + return decode_array(ds); + case '{': + return decode_object(ds); + case 't': + return decode_true(ds); + case 'f': + return decode_false(ds); + case 'n': + return decode_null(ds); + + case ' ': + case '\t': + case '\r': + case '\n': + // White space + ds->start++; + break; + + default: + return SetError(ds, -1, "Expected object or value"); + } + } +} + +JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, + size_t cbBuffer) { + /* + FIXME: Base the size of escBuffer of that of cbBuffer so that the unicode + escaping doesn't run into the wall each time */ + char *locale; + struct DecoderState ds; + wchar_t escBuffer[(JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t))]; + JSOBJ ret; + + ds.start = (char *)buffer; + ds.end = ds.start + cbBuffer; + + ds.escStart = escBuffer; + ds.escEnd = ds.escStart + (JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t)); + ds.escHeap = 0; + ds.prv = dec->prv; + ds.dec = dec; + ds.dec->errorStr = NULL; + ds.dec->errorOffset = NULL; + ds.objDepth = 0; + + ds.dec = dec; + + locale = setlocale(LC_NUMERIC, NULL); + if (strcmp(locale, "C")) { + locale = strdup(locale); + if (!locale) { + return SetError(&ds, -1, "Could not reserve memory block"); + } + setlocale(LC_NUMERIC, "C"); + ret = decode_any(&ds); + setlocale(LC_NUMERIC, locale); + free(locale); + } else { + ret = decode_any(&ds); + } + + if (ds.escHeap) { + dec->free(ds.escStart); + } + + SkipWhitespace(&ds); + + if (ds.start != ds.end && ret) { + dec->releaseObject(ds.prv, ret, ds.dec); + return SetError(&ds, -1, "Trailing data"); + } + + return ret; +} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/lib/ultrajsonenc.c new file mode 100644 index 0000000..4469631 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/lib/ultrajsonenc.c @@ -0,0 +1,1200 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE +LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights +reserved. + +Numeric decoder derived from from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include "ultrajson.h" + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +/* +Worst cases being: + +Control characters (ASCII < 32) +0x00 (1 byte) input => \u0000 output (6 bytes) +1 * 6 => 6 (6 bytes required) + +or UTF-16 surrogate pairs +4 bytes input in UTF-8 => \uXXXX\uYYYY (12 bytes). + +4 * 6 => 24 bytes (12 bytes required) + +The extra 2 bytes are for the quotes around the string + +*/ +#define RESERVE_STRING(_len) (2 + ((_len)*6)) + +static const double g_pow10[] = {1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000, + 100000000000, + 1000000000000, + 10000000000000, + 100000000000000, + 1000000000000000}; +static const char g_hexChars[] = "0123456789abcdef"; +static const char g_escapeChars[] = "0123456789\\b\\t\\n\\f\\r\\\"\\\\\\/"; + +/* +FIXME: While this is fine dandy and working it's a magic value mess which +probably only the author understands. +Needs a cleanup and more documentation */ + +/* +Table for pure ascii output escaping all characters above 127 to \uXXXX */ +static const JSUINT8 g_asciiOutputTable[256] = { + /* 0x00 */ 0, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 10, + 12, + 14, + 30, + 16, + 18, + 30, + 30, + /* 0x10 */ 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + /* 0x20 */ 1, + 1, + 20, + 1, + 1, + 1, + 29, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 24, + /* 0x30 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 29, + 1, + 29, + 1, + /* 0x40 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x50 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 22, + 1, + 1, + 1, + /* 0x60 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x70 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x80 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x90 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xa0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xb0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xc0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xd0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xe0 */ 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + /* 0xf0 */ 4, + 4, + 4, + 4, + 4, + 4, + 4, + 4, + 5, + 5, + 5, + 5, + 6, + 6, + 1, + 1}; + +static void SetError(JSOBJ obj, JSONObjectEncoder *enc, const char *message) { + enc->errorMsg = message; + enc->errorObj = obj; +} + +/* +FIXME: Keep track of how big these get across several encoder calls and try to +make an estimate +That way we won't run our head into the wall each call */ +void Buffer_Realloc(JSONObjectEncoder *enc, size_t cbNeeded) { + size_t curSize = enc->end - enc->start; + size_t newSize = curSize * 2; + size_t offset = enc->offset - enc->start; + + while (newSize < curSize + cbNeeded) { + newSize *= 2; + } + + if (enc->heap) { + enc->start = (char *)enc->realloc(enc->start, newSize); + if (!enc->start) { + SetError(NULL, enc, "Could not reserve memory block"); + return; + } + } else { + char *oldStart = enc->start; + enc->heap = 1; + enc->start = (char *)enc->malloc(newSize); + if (!enc->start) { + SetError(NULL, enc, "Could not reserve memory block"); + return; + } + memcpy(enc->start, oldStart, offset); + } + enc->offset = enc->start + offset; + enc->end = enc->start + newSize; +} + +INLINE_PREFIX void FASTCALL_MSVC +Buffer_AppendShortHexUnchecked(char *outputOffset, unsigned short value) { + *(outputOffset++) = g_hexChars[(value & 0xf000) >> 12]; + *(outputOffset++) = g_hexChars[(value & 0x0f00) >> 8]; + *(outputOffset++) = g_hexChars[(value & 0x00f0) >> 4]; + *(outputOffset++) = g_hexChars[(value & 0x000f) >> 0]; +} + +int Buffer_EscapeStringUnvalidated(JSONObjectEncoder *enc, const char *io, + const char *end) { + char *of = (char *)enc->offset; + + for (;;) { + switch (*io) { + case 0x00: { + if (io < end) { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + break; + } else { + enc->offset += (of - enc->offset); + return TRUE; + } + } + case '\"': + (*of++) = '\\'; + (*of++) = '\"'; + break; + case '\\': + (*of++) = '\\'; + (*of++) = '\\'; + break; + case '/': + (*of++) = '\\'; + (*of++) = '/'; + break; + case '\b': + (*of++) = '\\'; + (*of++) = 'b'; + break; + case '\f': + (*of++) = '\\'; + (*of++) = 'f'; + break; + case '\n': + (*of++) = '\\'; + (*of++) = 'n'; + break; + case '\r': + (*of++) = '\\'; + (*of++) = 'r'; + break; + case '\t': + (*of++) = '\\'; + (*of++) = 't'; + break; + + case 0x26: // '/' + case 0x3c: // '<' + case 0x3e: // '>' + { + if (enc->encodeHTMLChars) { + // Fall through to \u00XX case below. + } else { + // Same as default case below. + (*of++) = (*io); + break; + } + } + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = g_hexChars[(unsigned char)(((*io) & 0xf0) >> 4)]; + *(of++) = g_hexChars[(unsigned char)((*io) & 0x0f)]; + break; + } + default: + (*of++) = (*io); + break; + } + io++; + } +} + +int Buffer_EscapeStringValidated(JSOBJ obj, JSONObjectEncoder *enc, + const char *io, const char *end) { + JSUTF32 ucs; + char *of = (char *)enc->offset; + + for (;;) { + JSUINT8 utflen = g_asciiOutputTable[(unsigned char)*io]; + + switch (utflen) { + case 0: { + if (io < end) { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + io++; + continue; + } else { + enc->offset += (of - enc->offset); + return TRUE; + } + } + + case 1: { + *(of++) = (*io++); + continue; + } + + case 2: { + JSUTF32 in; + JSUTF16 in16; + + if (end - io < 1) { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in16, io, sizeof(JSUTF16)); + in = (JSUTF32)in16; + +#ifdef __LITTLE_ENDIAN__ + ucs = ((in & 0x1f) << 6) | ((in >> 8) & 0x3f); +#else + ucs = ((in & 0x1f00) >> 2) | (in & 0x3f); +#endif + + if (ucs < 0x80) { + enc->offset += (of - enc->offset); + SetError(obj, enc, + "Overlong 2 byte UTF-8 sequence detected when " + "encoding string"); + return FALSE; + } + + io += 2; + break; + } + + case 3: { + JSUTF32 in; + JSUTF16 in16; + JSUINT8 in8; + + if (end - io < 2) { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in16, io, sizeof(JSUTF16)); + memcpy(&in8, io + 2, sizeof(JSUINT8)); +#ifdef __LITTLE_ENDIAN__ + in = (JSUTF32)in16; + in |= in8 << 16; + ucs = ((in & 0x0f) << 12) | ((in & 0x3f00) >> 2) | + ((in & 0x3f0000) >> 16); +#else + in = in16 << 8; + in |= in8; + ucs = + ((in & 0x0f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f); +#endif + + if (ucs < 0x800) { + enc->offset += (of - enc->offset); + SetError(obj, enc, + "Overlong 3 byte UTF-8 sequence detected when " + "encoding string"); + return FALSE; + } + + io += 3; + break; + } + case 4: { + JSUTF32 in; + + if (end - io < 3) { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in, io, sizeof(JSUTF32)); +#ifdef __LITTLE_ENDIAN__ + ucs = ((in & 0x07) << 18) | ((in & 0x3f00) << 4) | + ((in & 0x3f0000) >> 10) | ((in & 0x3f000000) >> 24); +#else + ucs = ((in & 0x07000000) >> 6) | ((in & 0x3f0000) >> 4) | + ((in & 0x3f00) >> 2) | (in & 0x3f); +#endif + if (ucs < 0x10000) { + enc->offset += (of - enc->offset); + SetError(obj, enc, + "Overlong 4 byte UTF-8 sequence detected when " + "encoding string"); + return FALSE; + } + + io += 4; + break; + } + + case 5: + case 6: { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unsupported UTF-8 sequence length when encoding string"); + return FALSE; + } + + case 29: { + if (enc->encodeHTMLChars) { + // Fall through to \u00XX case 30 below. + } else { + // Same as case 1 above. + *(of++) = (*io++); + continue; + } + } + + case 30: { + // \uXXXX encode + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = g_hexChars[(unsigned char)(((*io) & 0xf0) >> 4)]; + *(of++) = g_hexChars[(unsigned char)((*io) & 0x0f)]; + io++; + continue; + } + case 10: + case 12: + case 14: + case 16: + case 18: + case 20: + case 22: + case 24: { + *(of++) = *((char *)(g_escapeChars + utflen + 0)); + *(of++) = *((char *)(g_escapeChars + utflen + 1)); + io++; + continue; + } + // This can never happen, it's here to make L4 VC++ happy + default: { + ucs = 0; + break; + } + } + + /* + If the character is a UTF8 sequence of length > 1 we end up here */ + if (ucs >= 0x10000) { + ucs -= 0x10000; + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked( + of, (unsigned short)(ucs >> 10) + 0xd800); + of += 4; + + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked( + of, (unsigned short)(ucs & 0x3ff) + 0xdc00); + of += 4; + } else { + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked(of, (unsigned short)ucs); + of += 4; + } + } +} + +#define Buffer_Reserve(__enc, __len) \ + if ( (size_t) ((__enc)->end - (__enc)->offset) < (size_t) (__len)) \ + { \ + Buffer_Realloc((__enc), (__len));\ + } \ + +#define Buffer_AppendCharUnchecked(__enc, __chr) *((__enc)->offset++) = __chr; + +INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin, + char *end) { + char aux; + while (end > begin) aux = *end, *end-- = *begin, *begin++ = aux; +} + +void Buffer_AppendIndentNewlineUnchecked(JSONObjectEncoder *enc) { + if (enc->indent > 0) Buffer_AppendCharUnchecked(enc, '\n'); +} + +// This function could be refactored to only accept enc as an argument, +// but this is a straight vendor from ujson source +void Buffer_AppendIndentUnchecked(JSONObjectEncoder *enc, JSINT32 value) { + int i; + if (enc->indent > 0) { + while (value-- > 0) + for (i = 0; i < enc->indent; i++) + Buffer_AppendCharUnchecked(enc, ' '); + } +} + +void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) { + char *wstr; + JSUINT32 uvalue = (value < 0) ? -value : value; + wstr = enc->offset; + + // Conversion. Number is reversed. + do { + *wstr++ = (char)(48 + (uvalue % 10)); + } while (uvalue /= 10); + if (value < 0) *wstr++ = '-'; + + // Reverse string + strreverse(enc->offset, wstr - 1); + enc->offset += (wstr - (enc->offset)); +} + +void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) { + char *wstr; + JSUINT64 uvalue = (value < 0) ? -value : value; + + wstr = enc->offset; + // Conversion. Number is reversed. + + do { + *wstr++ = (char)(48 + (uvalue % 10ULL)); + } while (uvalue /= 10ULL); + if (value < 0) *wstr++ = '-'; + + // Reverse string + strreverse(enc->offset, wstr - 1); + enc->offset += (wstr - (enc->offset)); +} + +int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, + double value) { + /* if input is beyond the thresholds, revert to exponential */ + const double thres_max = (double)1e16 - 1; + const double thres_min = (double)1e-15; + char precision_str[20]; + int count; + double diff = 0.0; + char *str = enc->offset; + char *wstr = str; + unsigned long long whole; + double tmp; + unsigned long long frac; + int neg; + double pow10; + + if (value == HUGE_VAL || value == -HUGE_VAL) { + SetError(obj, enc, "Invalid Inf value when encoding double"); + return FALSE; + } + + if (!(value == value)) { + SetError(obj, enc, "Invalid Nan value when encoding double"); + return FALSE; + } + + /* we'll work in positive values and deal with the + negative sign issue later */ + neg = 0; + if (value < 0) { + neg = 1; + value = -value; + } + + /* + for very large or small numbers switch back to native sprintf for + exponentials. anyone want to write code to replace this? */ + if (value > thres_max || (value != 0.0 && fabs(value) < thres_min)) { + precision_str[0] = '%'; + precision_str[1] = '.'; +#if defined(_WIN32) && defined(_MSC_VER) + sprintf_s(precision_str + 2, sizeof(precision_str) - 2, "%ug", + enc->doublePrecision); + enc->offset += sprintf_s(str, enc->end - enc->offset, precision_str, + neg ? -value : value); +#else + snprintf(precision_str + 2, sizeof(precision_str) - 2, "%ug", + enc->doublePrecision); + enc->offset += snprintf(str, enc->end - enc->offset, precision_str, + neg ? -value : value); +#endif + return TRUE; + } + + pow10 = g_pow10[enc->doublePrecision]; + + whole = (unsigned long long)value; + tmp = (value - whole) * pow10; + frac = (unsigned long long)(tmp); + diff = tmp - frac; + + if (diff > 0.5) { + ++frac; + } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) { + /* if halfway, round up if odd, OR + if last digit is 0. That last part is strange */ + ++frac; + } + + // handle rollover, e.g. + // case 0.99 with prec 1 is 1.0 and case 0.95 with prec is 1.0 as well + if (frac >= pow10) { + frac = 0; + ++whole; + } + + if (enc->doublePrecision == 0) { + diff = value - whole; + + if (diff > 0.5) { + /* greater than 0.5, round up, e.g. 1.6 -> 2 */ + ++whole; + } else if (diff == 0.5 && (whole & 1)) { + /* exactly 0.5 and ODD, then round up */ + /* 1.5 -> 2, but 2.5 -> 2 */ + ++whole; + } + + // vvvvvvvvvvvvvvvvvvv Diff from modp_dto2 + } else if (frac) { + count = enc->doublePrecision; + // now do fractional part, as an unsigned number + // we know it is not 0 but we can have leading zeros, these + // should be removed + while (!(frac % 10)) { + --count; + frac /= 10; + } + //^^^^^^^^^^^^^^^^^^^ Diff from modp_dto2 + + // now do fractional part, as an unsigned number + do { + --count; + *wstr++ = (char)(48 + (frac % 10)); + } while (frac /= 10); + // add extra 0s + while (count-- > 0) { + *wstr++ = '0'; + } + // add decimal + *wstr++ = '.'; + } else { + *wstr++ = '0'; + *wstr++ = '.'; + } + + // Do whole part. Take care of sign + // conversion. Number is reversed. + do { + *wstr++ = (char)(48 + (whole % 10)); + } while (whole /= 10); + + if (neg) { + *wstr++ = '-'; + } + strreverse(str, wstr - 1); + enc->offset += (wstr - (enc->offset)); + + return TRUE; +} + +/* +FIXME: +Handle integration functions returning NULL here */ + +/* +FIXME: +Perhaps implement recursion detection */ + +void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, + size_t cbName) { + const char *value; + char *objName; + int count; + JSOBJ iterObj; + size_t szlen; + JSONTypeContext tc; + tc.encoder = enc; + + if (enc->level > enc->recursionMax) { + SetError(obj, enc, "Maximum recursion level reached"); + return; + } + + /* + This reservation must hold + + length of _name as encoded worst case + + maxLength of double to string OR maxLength of JSLONG to string + */ + + Buffer_Reserve(enc, 256 + RESERVE_STRING(cbName)); + if (enc->errorMsg) { + return; + } + + if (name) { + Buffer_AppendCharUnchecked(enc, '\"'); + + if (enc->forceASCII) { + if (!Buffer_EscapeStringValidated(obj, enc, name, name + cbName)) { + return; + } + } else { + if (!Buffer_EscapeStringUnvalidated(enc, name, name + cbName)) { + return; + } + } + + Buffer_AppendCharUnchecked(enc, '\"'); + + Buffer_AppendCharUnchecked(enc, ':'); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked(enc, ' '); +#endif + } + + enc->beginTypeContext(obj, &tc); + + switch (tc.type) { + case JT_INVALID: { + return; + } + + case JT_ARRAY: { + count = 0; + enc->iterBegin(obj, &tc); + + Buffer_AppendCharUnchecked(enc, '['); + Buffer_AppendIndentNewlineUnchecked(enc); + + while (enc->iterNext(obj, &tc)) { + if (count > 0) { + Buffer_AppendCharUnchecked(enc, ','); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked(buffer, ' '); +#endif + Buffer_AppendIndentNewlineUnchecked(enc); + } + + iterObj = enc->iterGetValue(obj, &tc); + + enc->level++; + Buffer_AppendIndentUnchecked(enc, enc->level); + encode(iterObj, enc, NULL, 0); + count++; + } + + enc->iterEnd(obj, &tc); + Buffer_AppendIndentNewlineUnchecked(enc); + Buffer_AppendIndentUnchecked(enc, enc->level); + Buffer_AppendCharUnchecked(enc, ']'); + break; + } + + case JT_OBJECT: { + count = 0; + enc->iterBegin(obj, &tc); + + Buffer_AppendCharUnchecked(enc, '{'); + Buffer_AppendIndentNewlineUnchecked(enc); + + while (enc->iterNext(obj, &tc)) { + if (count > 0) { + Buffer_AppendCharUnchecked(enc, ','); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked(enc, ' '); +#endif + Buffer_AppendIndentNewlineUnchecked(enc); + } + + iterObj = enc->iterGetValue(obj, &tc); + objName = enc->iterGetName(obj, &tc, &szlen); + + enc->level++; + Buffer_AppendIndentUnchecked(enc, enc->level); + encode(iterObj, enc, objName, szlen); + count++; + } + + enc->iterEnd(obj, &tc); + Buffer_AppendIndentNewlineUnchecked(enc); + Buffer_AppendIndentUnchecked(enc, enc->level); + Buffer_AppendCharUnchecked(enc, '}'); + break; + } + + case JT_LONG: { + Buffer_AppendLongUnchecked(enc, enc->getLongValue(obj, &tc)); + break; + } + + case JT_INT: { + Buffer_AppendIntUnchecked(enc, enc->getIntValue(obj, &tc)); + break; + } + + case JT_TRUE: { + Buffer_AppendCharUnchecked(enc, 't'); + Buffer_AppendCharUnchecked(enc, 'r'); + Buffer_AppendCharUnchecked(enc, 'u'); + Buffer_AppendCharUnchecked(enc, 'e'); + break; + } + + case JT_FALSE: { + Buffer_AppendCharUnchecked(enc, 'f'); + Buffer_AppendCharUnchecked(enc, 'a'); + Buffer_AppendCharUnchecked(enc, 'l'); + Buffer_AppendCharUnchecked(enc, 's'); + Buffer_AppendCharUnchecked(enc, 'e'); + break; + } + + case JT_NULL: { + Buffer_AppendCharUnchecked(enc, 'n'); + Buffer_AppendCharUnchecked(enc, 'u'); + Buffer_AppendCharUnchecked(enc, 'l'); + Buffer_AppendCharUnchecked(enc, 'l'); + break; + } + + case JT_DOUBLE: { + if (!Buffer_AppendDoubleUnchecked(obj, enc, + enc->getDoubleValue(obj, &tc))) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + break; + } + + case JT_UTF8: { + value = enc->getStringValue(obj, &tc, &szlen); + Buffer_Reserve(enc, RESERVE_STRING(szlen)); + if (enc->errorMsg) { + enc->endTypeContext(obj, &tc); + return; + } + Buffer_AppendCharUnchecked(enc, '\"'); + + if (enc->forceASCII) { + if (!Buffer_EscapeStringValidated(obj, enc, value, + value + szlen)) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + } else { + if (!Buffer_EscapeStringUnvalidated(enc, value, + value + szlen)) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + } + + Buffer_AppendCharUnchecked(enc, '\"'); + break; + } + + case JT_BIGNUM: { + value = enc->getBigNumStringValue(obj, &tc, &szlen); + + Buffer_Reserve(enc, RESERVE_STRING(szlen)); + if (enc->errorMsg) { + enc->endTypeContext(obj, &tc); + return; + } + + if (enc->forceASCII) { + if (!Buffer_EscapeStringValidated(obj, enc, value, + value + szlen)) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + } else { + if (!Buffer_EscapeStringUnvalidated(enc, value, + value + szlen)) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + } + + break; + } + } + + enc->endTypeContext(obj, &tc); + enc->level--; +} + +char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, + size_t _cbBuffer) { + char *locale; + enc->malloc = enc->malloc ? enc->malloc : malloc; + enc->free = enc->free ? enc->free : free; + enc->realloc = enc->realloc ? enc->realloc : realloc; + enc->errorMsg = NULL; + enc->errorObj = NULL; + enc->level = 0; + + if (enc->recursionMax < 1) { + enc->recursionMax = JSON_MAX_RECURSION_DEPTH; + } + + if (enc->doublePrecision < 0 || + enc->doublePrecision > JSON_DOUBLE_MAX_DECIMALS) { + enc->doublePrecision = JSON_DOUBLE_MAX_DECIMALS; + } + + if (_buffer == NULL) { + _cbBuffer = 32768; + enc->start = (char *)enc->malloc(_cbBuffer); + if (!enc->start) { + SetError(obj, enc, "Could not reserve memory block"); + return NULL; + } + enc->heap = 1; + } else { + enc->start = _buffer; + enc->heap = 0; + } + + enc->end = enc->start + _cbBuffer; + enc->offset = enc->start; + + locale = setlocale(LC_NUMERIC, NULL); + if (strcmp(locale, "C")) { + locale = strdup(locale); + if (!locale) { + SetError(NULL, enc, "Could not reserve memory block"); + return NULL; + } + setlocale(LC_NUMERIC, "C"); + encode(obj, enc, NULL, 0); + setlocale(LC_NUMERIC, locale); + free(locale); + } else { + encode(obj, enc, NULL, 0); + } + + Buffer_Reserve(enc, 1); + if (enc->errorMsg) { + return NULL; + } + Buffer_AppendCharUnchecked(enc, '\0'); + + return enc->start; +} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/JSONtoObj.c b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/JSONtoObj.c new file mode 100644 index 0000000..14683f4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -0,0 +1,601 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY +#define NO_IMPORT_ARRAY +#define PY_SSIZE_T_CLEAN +#include +#include +#include + +#define PRINTMARK() + +typedef struct __PyObjectDecoder { + JSONObjectDecoder dec; + + void *npyarr; // Numpy context buffer + void *npyarr_addr; // Ref to npyarr ptr to track DECREF calls + npy_intp curdim; // Current array dimension + + PyArray_Descr *dtype; +} PyObjectDecoder; + +typedef struct __NpyArrContext { + PyObject *ret; + PyObject *labels[2]; + PyArray_Dims shape; + + PyObjectDecoder *dec; + + npy_intp i; + npy_intp elsize; + npy_intp elcount; +} NpyArrContext; + +// Numpy handling based on numpy internal code, specifically the function +// PyArray_FromIter. + +// numpy related functions are inter-dependent so declare them all here, +// to ensure the compiler catches any errors + +// standard numpy array handling +JSOBJ Object_npyNewArray(void *prv, void *decoder); +JSOBJ Object_npyEndArray(void *prv, JSOBJ obj); +int Object_npyArrayAddItem(void *prv, JSOBJ obj, JSOBJ value); + +// for more complex dtypes (object and string) fill a standard Python list +// and convert to a numpy array when done. +JSOBJ Object_npyNewArrayList(void *prv, void *decoder); +JSOBJ Object_npyEndArrayList(void *prv, JSOBJ obj); +int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value); + +// labelled support, encode keys and values of JS object into separate numpy +// arrays +JSOBJ Object_npyNewObject(void *prv, void *decoder); +JSOBJ Object_npyEndObject(void *prv, JSOBJ obj); +int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value); + +// free the numpy context buffer +void Npy_releaseContext(NpyArrContext *npyarr) { + PRINTMARK(); + if (npyarr) { + if (npyarr->shape.ptr) { + PyObject_Free(npyarr->shape.ptr); + } + if (npyarr->dec) { + npyarr->dec->npyarr = NULL; + npyarr->dec->curdim = 0; + } + Py_XDECREF(npyarr->labels[0]); + Py_XDECREF(npyarr->labels[1]); + Py_XDECREF(npyarr->ret); + PyObject_Free(npyarr); + } +} + +JSOBJ Object_npyNewArray(void *prv, void *_decoder) { + NpyArrContext *npyarr; + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + PRINTMARK(); + if (decoder->curdim <= 0) { + // start of array - initialise the context buffer + npyarr = decoder->npyarr = PyObject_Malloc(sizeof(NpyArrContext)); + decoder->npyarr_addr = npyarr; + + if (!npyarr) { + PyErr_NoMemory(); + return NULL; + } + + npyarr->dec = decoder; + npyarr->labels[0] = npyarr->labels[1] = NULL; + + npyarr->shape.ptr = PyObject_Malloc(sizeof(npy_intp) * NPY_MAXDIMS); + npyarr->shape.len = 1; + npyarr->ret = NULL; + + npyarr->elsize = 0; + npyarr->elcount = 4; + npyarr->i = 0; + } else { + // starting a new dimension continue the current array (and reshape + // after) + npyarr = (NpyArrContext *)decoder->npyarr; + if (decoder->curdim >= npyarr->shape.len) { + npyarr->shape.len++; + } + } + + npyarr->shape.ptr[decoder->curdim] = 0; + decoder->curdim++; + return npyarr; +} + +PyObject *Npy_returnLabelled(NpyArrContext *npyarr) { + PyObject *ret = npyarr->ret; + npy_intp i; + + if (npyarr->labels[0] || npyarr->labels[1]) { + // finished decoding, build tuple with values and labels + ret = PyTuple_New(npyarr->shape.len + 1); + for (i = 0; i < npyarr->shape.len; i++) { + if (npyarr->labels[i]) { + PyTuple_SET_ITEM(ret, i + 1, npyarr->labels[i]); + npyarr->labels[i] = NULL; + } else { + Py_INCREF(Py_None); + PyTuple_SET_ITEM(ret, i + 1, Py_None); + } + } + PyTuple_SET_ITEM(ret, 0, npyarr->ret); + } + + return ret; +} + +JSOBJ Object_npyEndArray(void *prv, JSOBJ obj) { + PyObject *ret; + char *new_data; + NpyArrContext *npyarr = (NpyArrContext *)obj; + int emptyType = NPY_DEFAULT_TYPE; + npy_intp i; + PRINTMARK(); + if (!npyarr) { + return NULL; + } + + ret = npyarr->ret; + i = npyarr->i; + + npyarr->dec->curdim--; + + if (i == 0 || !npyarr->ret) { + // empty array would not have been initialised so do it now. + if (npyarr->dec->dtype) { + emptyType = npyarr->dec->dtype->type_num; + } + npyarr->ret = ret = + PyArray_EMPTY(npyarr->shape.len, npyarr->shape.ptr, emptyType, 0); + } else if (npyarr->dec->curdim <= 0) { + // realloc to final size + new_data = PyDataMem_RENEW(PyArray_DATA(ret), i * npyarr->elsize); + if (new_data == NULL) { + PyErr_NoMemory(); + Npy_releaseContext(npyarr); + return NULL; + } + ((PyArrayObject *)ret)->data = (void *)new_data; + // PyArray_BYTES(ret) = new_data; + } + + if (npyarr->dec->curdim <= 0) { + // finished decoding array, reshape if necessary + if (npyarr->shape.len > 1) { + npyarr->ret = PyArray_Newshape((PyArrayObject *)ret, &npyarr->shape, + NPY_ANYORDER); + Py_DECREF(ret); + } + + ret = Npy_returnLabelled(npyarr); + + npyarr->ret = NULL; + Npy_releaseContext(npyarr); + } + + return ret; +} + +int Object_npyArrayAddItem(void *prv, JSOBJ obj, JSOBJ value) { + PyObject *type; + PyArray_Descr *dtype; + npy_intp i; + char *new_data, *item; + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return 0; + } + + i = npyarr->i; + + npyarr->shape.ptr[npyarr->dec->curdim - 1]++; + + if (PyArray_Check((PyObject *)value)) { + // multidimensional array, keep decoding values. + return 1; + } + + if (!npyarr->ret) { + // Array not initialised yet. + // We do it here so we can 'sniff' the data type if none was provided + if (!npyarr->dec->dtype) { + type = PyObject_Type(value); + if (!PyArray_DescrConverter(type, &dtype)) { + Py_DECREF(type); + goto fail; + } + Py_INCREF(dtype); + Py_DECREF(type); + } else { + dtype = PyArray_DescrNew(npyarr->dec->dtype); + } + + // If it's an object or string then fill a Python list and subsequently + // convert. Otherwise we would need to somehow mess about with + // reference counts when renewing memory. + npyarr->elsize = dtype->elsize; + if (PyDataType_REFCHK(dtype) || npyarr->elsize == 0) { + Py_XDECREF(dtype); + + if (npyarr->dec->curdim > 1) { + PyErr_SetString(PyExc_ValueError, + "Cannot decode multidimensional arrays with " + "variable length elements to numpy"); + goto fail; + } + npyarr->elcount = 0; + npyarr->ret = PyList_New(0); + if (!npyarr->ret) { + goto fail; + } + ((JSONObjectDecoder *)npyarr->dec)->newArray = + Object_npyNewArrayList; + ((JSONObjectDecoder *)npyarr->dec)->arrayAddItem = + Object_npyArrayListAddItem; + ((JSONObjectDecoder *)npyarr->dec)->endArray = + Object_npyEndArrayList; + return Object_npyArrayListAddItem(prv, obj, value); + } + + npyarr->ret = PyArray_NewFromDescr( + &PyArray_Type, dtype, 1, &npyarr->elcount, NULL, NULL, 0, NULL); + + if (!npyarr->ret) { + goto fail; + } + } + + if (i >= npyarr->elcount) { + // Grow PyArray_DATA(ret): + // this is similar for the strategy for PyListObject, but we use + // 50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ... + if (npyarr->elsize == 0) { + PyErr_SetString(PyExc_ValueError, + "Cannot decode multidimensional arrays with " + "variable length elements to numpy"); + goto fail; + } + + npyarr->elcount = (i >> 1) + (i < 4 ? 4 : 2) + i; + if (npyarr->elcount <= NPY_MAX_INTP / npyarr->elsize) { + new_data = PyDataMem_RENEW(PyArray_DATA(npyarr->ret), + npyarr->elcount * npyarr->elsize); + } else { + PyErr_NoMemory(); + goto fail; + } + ((PyArrayObject *)npyarr->ret)->data = (void *)new_data; + + // PyArray_BYTES(npyarr->ret) = new_data; + } + + PyArray_DIMS(npyarr->ret)[0] = i + 1; + + if ((item = PyArray_GETPTR1(npyarr->ret, i)) == NULL || + PyArray_SETITEM(npyarr->ret, item, value) == -1) { + goto fail; + } + + Py_DECREF((PyObject *)value); + npyarr->i++; + return 1; + +fail: + + Npy_releaseContext(npyarr); + return 0; +} + +JSOBJ Object_npyNewArrayList(void *prv, void *_decoder) { + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + PRINTMARK(); + PyErr_SetString( + PyExc_ValueError, + "nesting not supported for object or variable length dtypes"); + Npy_releaseContext(decoder->npyarr); + return NULL; +} + +JSOBJ Object_npyEndArrayList(void *prv, JSOBJ obj) { + PyObject *list, *ret; + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return NULL; + } + + // convert decoded list to numpy array + list = (PyObject *)npyarr->ret; + npyarr->ret = PyArray_FROM_O(list); + + ret = Npy_returnLabelled(npyarr); + npyarr->ret = list; + + ((JSONObjectDecoder *)npyarr->dec)->newArray = Object_npyNewArray; + ((JSONObjectDecoder *)npyarr->dec)->arrayAddItem = Object_npyArrayAddItem; + ((JSONObjectDecoder *)npyarr->dec)->endArray = Object_npyEndArray; + Npy_releaseContext(npyarr); + return ret; +} + +int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value) { + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return 0; + } + PyList_Append((PyObject *)npyarr->ret, value); + Py_DECREF((PyObject *)value); + npyarr->elcount++; + return 1; +} + +JSOBJ Object_npyNewObject(void *prv, void *_decoder) { + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + PRINTMARK(); + if (decoder->curdim > 1) { + PyErr_SetString(PyExc_ValueError, + "labels only supported up to 2 dimensions"); + return NULL; + } + + return ((JSONObjectDecoder *)decoder)->newArray(prv, decoder); +} + +JSOBJ Object_npyEndObject(void *prv, JSOBJ obj) { + PyObject *list; + npy_intp labelidx; + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return NULL; + } + + labelidx = npyarr->dec->curdim - 1; + + list = npyarr->labels[labelidx]; + if (list) { + npyarr->labels[labelidx] = PyArray_FROM_O(list); + Py_DECREF(list); + } + + return (PyObject *)((JSONObjectDecoder *)npyarr->dec)->endArray(prv, obj); +} + +int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { + PyObject *label, *labels; + npy_intp labelidx; + // add key to label array, value to values array + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return 0; + } + + label = (PyObject *)name; + labelidx = npyarr->dec->curdim - 1; + + if (!npyarr->labels[labelidx]) { + npyarr->labels[labelidx] = PyList_New(0); + } + labels = npyarr->labels[labelidx]; + // only fill label array once, assumes all column labels are the same + // for 2-dimensional arrays. + if (PyList_Check(labels) && PyList_GET_SIZE(labels) <= npyarr->elcount) { + PyList_Append(labels, label); + } + + if (((JSONObjectDecoder *)npyarr->dec)->arrayAddItem(prv, obj, value)) { + Py_DECREF(label); + return 1; + } + return 0; +} + +int Object_objectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { + int ret = PyDict_SetItem(obj, name, value); + Py_DECREF((PyObject *)name); + Py_DECREF((PyObject *)value); + return ret == 0 ? 1 : 0; +} + +int Object_arrayAddItem(void *prv, JSOBJ obj, JSOBJ value) { + int ret = PyList_Append(obj, value); + Py_DECREF((PyObject *)value); + return ret == 0 ? 1 : 0; +} + +JSOBJ Object_newString(void *prv, wchar_t *start, wchar_t *end) { + return PyUnicode_FromWideChar(start, (end - start)); +} + +JSOBJ Object_newTrue(void *prv) { Py_RETURN_TRUE; } + +JSOBJ Object_newFalse(void *prv) { Py_RETURN_FALSE; } + +JSOBJ Object_newNull(void *prv) { Py_RETURN_NONE; } + +JSOBJ Object_newPosInf(void *prv) { return PyFloat_FromDouble(Py_HUGE_VAL); } + +JSOBJ Object_newNegInf(void *prv) { return PyFloat_FromDouble(-Py_HUGE_VAL); } + +JSOBJ Object_newObject(void *prv, void *decoder) { return PyDict_New(); } + +JSOBJ Object_endObject(void *prv, JSOBJ obj) { return obj; } + +JSOBJ Object_newArray(void *prv, void *decoder) { return PyList_New(0); } + +JSOBJ Object_endArray(void *prv, JSOBJ obj) { return obj; } + +JSOBJ Object_newInteger(void *prv, JSINT32 value) { + return PyLong_FromLong((long)value); +} + +JSOBJ Object_newLong(void *prv, JSINT64 value) { + return PyLong_FromLongLong(value); +} + +JSOBJ Object_newUnsignedLong(void *prv, JSUINT64 value) { + return PyLong_FromUnsignedLongLong(value); +} + +JSOBJ Object_newDouble(void *prv, double value) { + return PyFloat_FromDouble(value); +} + +static void Object_releaseObject(void *prv, JSOBJ obj, void *_decoder) { + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + if (obj != decoder->npyarr_addr) { + Py_XDECREF(((PyObject *)obj)); + } +} + +static char *g_kwlist[] = {"obj", "precise_float", "numpy", + "labelled", "dtype", NULL}; + +PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) { + PyObject *ret; + PyObject *sarg; + PyObject *arg; + PyObject *opreciseFloat = NULL; + JSONObjectDecoder *decoder; + PyObjectDecoder pyDecoder; + PyArray_Descr *dtype = NULL; + int numpy = 0, labelled = 0; + + JSONObjectDecoder dec = { + Object_newString, Object_objectAddKey, Object_arrayAddItem, + Object_newTrue, Object_newFalse, Object_newNull, + Object_newPosInf, Object_newNegInf, Object_newObject, + Object_endObject, Object_newArray, Object_endArray, + Object_newInteger, Object_newLong, Object_newUnsignedLong, + Object_newDouble, + Object_releaseObject, PyObject_Malloc, PyObject_Free, + PyObject_Realloc}; + + dec.preciseFloat = 0; + dec.prv = NULL; + + pyDecoder.dec = dec; + pyDecoder.curdim = 0; + pyDecoder.npyarr = NULL; + pyDecoder.npyarr_addr = NULL; + + decoder = (JSONObjectDecoder *)&pyDecoder; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiiO&", g_kwlist, &arg, + &opreciseFloat, &numpy, &labelled, + PyArray_DescrConverter2, &dtype)) { + Npy_releaseContext(pyDecoder.npyarr); + return NULL; + } + + if (opreciseFloat && PyObject_IsTrue(opreciseFloat)) { + decoder->preciseFloat = 1; + } + + if (PyBytes_Check(arg)) { + sarg = arg; + } else if (PyUnicode_Check(arg)) { + sarg = PyUnicode_AsUTF8String(arg); + if (sarg == NULL) { + // Exception raised above us by codec according to docs + return NULL; + } + } else { + PyErr_Format(PyExc_TypeError, "Expected 'str' or 'bytes'"); + return NULL; + } + + decoder->errorStr = NULL; + decoder->errorOffset = NULL; + + if (numpy) { + pyDecoder.dtype = dtype; + decoder->newArray = Object_npyNewArray; + decoder->endArray = Object_npyEndArray; + decoder->arrayAddItem = Object_npyArrayAddItem; + + if (labelled) { + decoder->newObject = Object_npyNewObject; + decoder->endObject = Object_npyEndObject; + decoder->objectAddKey = Object_npyObjectAddKey; + } + } + + ret = JSON_DecodeObject(decoder, PyBytes_AS_STRING(sarg), + PyBytes_GET_SIZE(sarg)); + + if (sarg != arg) { + Py_DECREF(sarg); + } + + if (PyErr_Occurred()) { + if (ret) { + Py_DECREF((PyObject *)ret); + } + Npy_releaseContext(pyDecoder.npyarr); + return NULL; + } + + if (decoder->errorStr) { + /* + FIXME: It's possible to give a much nicer error message here with actual + failing element in input etc*/ + + PyErr_Format(PyExc_ValueError, "%s", decoder->errorStr); + + if (ret) { + Py_DECREF((PyObject *)ret); + } + Npy_releaseContext(pyDecoder.npyarr); + + return NULL; + } + + return ret; +} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/date_conversions.c b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/date_conversions.c new file mode 100644 index 0000000..0744c6a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/date_conversions.c @@ -0,0 +1,151 @@ +/* +Copyright (c) 2020, PyData Development Team +All rights reserved. +Distributed under the terms of the BSD Simplified License. +The full license is in the LICENSE file, distributed with this software. +*/ + +// Conversion routines that are useful for serialization, +// but which don't interact with JSON objects directly + +#include "date_conversions.h" +#include <../../../tslibs/src/datetime/np_datetime.h> +#include <../../../tslibs/src/datetime/np_datetime_strings.h> + +/* + * Function: scaleNanosecToUnit + * ----------------------------- + * + * Scales an integer value representing time in nanoseconds to provided unit. + * + * Mutates the provided value directly. Returns 0 on success, non-zero on error. + */ +int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) { + switch (unit) { + case NPY_FR_ns: + break; + case NPY_FR_us: + *value /= 1000LL; + break; + case NPY_FR_ms: + *value /= 1000000LL; + break; + case NPY_FR_s: + *value /= 1000000000LL; + break; + default: + return -1; + } + + return 0; +} + +/* Converts the int64_t representation of a datetime to ISO; mutates len */ +char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) { + npy_datetimestruct dts; + int ret_code; + + pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts); + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + + ret_code = make_iso_8601_datetime(&dts, result, *len, base); + if (ret_code != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + } + + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; +} + +npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) { + scaleNanosecToUnit(&dt, base); + return dt; +} + +/* Convert PyDatetime To ISO C-string. mutates len */ +char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, + size_t *len) { + npy_datetimestruct dts; + int ret; + + ret = convert_pydatetime_to_datetimestruct(obj, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + return NULL; + } + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + ret = make_iso_8601_datetime(&dts, result, *len, base); + + if (ret != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + return NULL; + } + + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; +} + +npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) { + npy_datetimestruct dts; + int ret; + + ret = convert_pydatetime_to_datetimestruct(dt, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + // TODO(username): is setting errMsg required? + // ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + // return NULL; + } + + npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); + return NpyDateTimeToEpoch(npy_dt, base); +} + +/* Converts the int64_t representation of a duration to ISO; mutates len */ +char *int64ToIsoDuration(int64_t value, size_t *len) { + pandas_timedeltastruct tds; + int ret_code; + + pandas_timedelta_to_timedeltastruct(value, NPY_FR_ns, &tds); + + // Max theoretical length of ISO Duration with 64 bit day + // as the largest unit is 70 characters + 1 for a null terminator + char *result = PyObject_Malloc(71); + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + + ret_code = make_iso_8601_timedelta(&tds, result, len); + if (ret_code == -1) { + PyErr_SetString(PyExc_ValueError, + "Could not convert timedelta value to string"); + PyObject_Free(result); + return NULL; + } + + return result; +} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/date_conversions.h b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/date_conversions.h new file mode 100644 index 0000000..efd707f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/date_conversions.h @@ -0,0 +1,39 @@ +/* +Copyright (c) 2020, PyData Development Team +All rights reserved. +Distributed under the terms of the BSD Simplified License. +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_SRC_UJSON_PYTHON_DATE_CONVERSIONS_H_ +#define PANDAS__LIBS_SRC_UJSON_PYTHON_DATE_CONVERSIONS_H_ + +#define PY_SSIZE_T_CLEAN +#include +#include + +// Scales value inplace from nanosecond resolution to unit resolution +int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit); + +// Converts an int64 object representing a date to ISO format +// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z +// while base="ns" yields "2020-01-01T00:00:00.000000000Z" +// len is mutated to save the length of the returned string +char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len); + +// TODO(username): this function doesn't do a lot; should augment or +// replace with scaleNanosecToUnit +npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base); + +// Converts a Python object representing a Date / Datetime to ISO format +// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z +// while base="ns" yields "2020-01-01T00:00:00.000000000Z" +// len is mutated to save the length of the returned string +char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, size_t *len); + +// Convert a Python Date/Datetime to Unix epoch with resolution base +npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base); + +char *int64ToIsoDuration(int64_t value, size_t *len); + +#endif // PANDAS__LIBS_SRC_UJSON_PYTHON_DATE_CONVERSIONS_H_ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/objToJSON.c b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/objToJSON.c new file mode 100644 index 0000000..c460999 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/objToJSON.c @@ -0,0 +1,2110 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights +reserved. + +Numeric decoder derived from from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#define PY_SSIZE_T_CLEAN +#include +#include + +#define NO_IMPORT_ARRAY +#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY +#include +#include +#include +#include +#include +#include "date_conversions.h" +#include "datetime.h" + +static PyTypeObject *type_decimal; +static PyTypeObject *cls_dataframe; +static PyTypeObject *cls_series; +static PyTypeObject *cls_index; +static PyTypeObject *cls_nat; +static PyTypeObject *cls_na; +PyObject *cls_timedelta; + +npy_int64 get_nat(void) { return NPY_MIN_INT64; } + +typedef char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti, + size_t *_outLen); + +typedef struct __NpyArrContext { + PyObject *array; + char *dataptr; + int curdim; // current dimension in array's order + int stridedim; // dimension we are striding over + int inc; // stride dimension increment (+/- 1) + npy_intp dim; + npy_intp stride; + npy_intp ndim; + npy_intp index[NPY_MAXDIMS]; + int type_num; + PyArray_GetItemFunc *getitem; + + char **rowLabels; + char **columnLabels; +} NpyArrContext; + +typedef struct __PdBlockContext { + int colIdx; + int ncols; + int transpose; + + NpyArrContext **npyCtxts; // NpyArrContext for each column +} PdBlockContext; + +typedef struct __TypeContext { + JSPFN_ITERBEGIN iterBegin; + JSPFN_ITEREND iterEnd; + JSPFN_ITERNEXT iterNext; + JSPFN_ITERGETNAME iterGetName; + JSPFN_ITERGETVALUE iterGetValue; + PFN_PyTypeToUTF8 PyTypeToUTF8; + PyObject *newObj; + PyObject *dictObj; + Py_ssize_t index; + Py_ssize_t size; + PyObject *itemValue; + PyObject *itemName; + PyObject *attrList; + PyObject *iterator; + + double doubleValue; + JSINT64 longValue; + + char *cStr; + NpyArrContext *npyarr; + PdBlockContext *pdblock; + int transpose; + char **rowLabels; + char **columnLabels; + npy_intp rowLabelsLen; + npy_intp columnLabelsLen; +} TypeContext; + +typedef struct __PyObjectEncoder { + JSONObjectEncoder enc; + + // pass through the NpyArrContext when encoding multi-dimensional arrays + NpyArrContext *npyCtxtPassthru; + + // pass through the PdBlockContext when encoding blocks + PdBlockContext *blkCtxtPassthru; + + // pass-through to encode numpy data directly + int npyType; + void *npyValue; + + int datetimeIso; + NPY_DATETIMEUNIT datetimeUnit; + + // output format style for pandas data types + int outputFormat; + int originalOutputFormat; + + PyObject *defaultHandler; +} PyObjectEncoder; + +#define GET_TC(__ptrtc) ((TypeContext *)((__ptrtc)->prv)) + +enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES }; + +int PdBlock_iterNext(JSOBJ, JSONTypeContext *); + +void *initObjToJSON(void) { + PyObject *mod_pandas; + PyObject *mod_nattype; + PyObject *mod_natype; + PyObject *mod_decimal = PyImport_ImportModule("decimal"); + type_decimal = + (PyTypeObject *)PyObject_GetAttrString(mod_decimal, "Decimal"); + Py_DECREF(mod_decimal); + + PyDateTime_IMPORT; + + mod_pandas = PyImport_ImportModule("pandas"); + if (mod_pandas) { + cls_dataframe = + (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "DataFrame"); + cls_index = (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Index"); + cls_series = + (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Series"); + Py_DECREF(mod_pandas); + } + + mod_nattype = PyImport_ImportModule("pandas._libs.tslibs.nattype"); + if (mod_nattype) { + cls_nat = + (PyTypeObject *)PyObject_GetAttrString(mod_nattype, "NaTType"); + Py_DECREF(mod_nattype); + } + + mod_natype = PyImport_ImportModule("pandas._libs.missing"); + if (mod_natype) { + cls_na = (PyTypeObject *)PyObject_GetAttrString(mod_natype, "NAType"); + Py_DECREF(mod_natype); + } + + // GH 31463 + return NULL; +} + +static TypeContext *createTypeContext(void) { + TypeContext *pc; + + pc = PyObject_Malloc(sizeof(TypeContext)); + if (!pc) { + PyErr_NoMemory(); + return NULL; + } + pc->newObj = NULL; + pc->dictObj = NULL; + pc->itemValue = NULL; + pc->itemName = NULL; + pc->attrList = NULL; + pc->index = 0; + pc->size = 0; + pc->longValue = 0; + pc->doubleValue = 0.0; + pc->cStr = NULL; + pc->npyarr = NULL; + pc->pdblock = NULL; + pc->rowLabels = NULL; + pc->columnLabels = NULL; + pc->transpose = 0; + pc->rowLabelsLen = 0; + pc->columnLabelsLen = 0; + + return pc; +} + +static PyObject *get_values(PyObject *obj) { + PyObject *values = NULL; + + if (PyObject_TypeCheck(obj, cls_index) || + PyObject_TypeCheck(obj, cls_series)) { + // The special cases to worry about are dt64tz and category[dt64tz]. + // In both cases we want the UTC-localized datetime64 ndarray, + // without going through and object array of Timestamps. + values = PyObject_GetAttrString(obj, "values"); + + if (values == NULL) { + // Clear so we can subsequently try another method + PyErr_Clear(); + } else if (PyObject_HasAttrString(values, "__array__")) { + // We may have gotten a Categorical or Sparse array so call np.array + Py_DECREF(values); + values = PyObject_CallMethod(values, "__array__", NULL); + } else if (!PyArray_CheckExact(values)) { + // Didn't get a numpy array, so keep trying + Py_DECREF(values); + values = NULL; + } + } + + if (values == NULL) { + PyObject *typeRepr = PyObject_Repr((PyObject *)Py_TYPE(obj)); + PyObject *repr; + if (PyObject_HasAttrString(obj, "dtype")) { + PyObject *dtype = PyObject_GetAttrString(obj, "dtype"); + repr = PyObject_Repr(dtype); + Py_DECREF(dtype); + } else { + repr = PyUnicode_FromString(""); + } + + PyErr_Format(PyExc_ValueError, "%R or %R are not JSON serializable yet", + repr, typeRepr); + Py_DECREF(repr); + Py_DECREF(typeRepr); + + return NULL; + } + + return values; +} + +static PyObject *get_sub_attr(PyObject *obj, char *attr, char *subAttr) { + PyObject *tmp = PyObject_GetAttrString(obj, attr); + PyObject *ret; + + if (tmp == 0) { + return 0; + } + ret = PyObject_GetAttrString(tmp, subAttr); + Py_DECREF(tmp); + + return ret; +} + +static Py_ssize_t get_attr_length(PyObject *obj, char *attr) { + PyObject *tmp = PyObject_GetAttrString(obj, attr); + Py_ssize_t ret; + + if (tmp == 0) { + return 0; + } + ret = PyObject_Length(tmp); + Py_DECREF(tmp); + + if (ret == -1) { + return 0; + } + + return ret; +} + +static int is_simple_frame(PyObject *obj) { + PyObject *mgr = PyObject_GetAttrString(obj, "_mgr"); + if (!mgr) { + return 0; + } + int ret; + if (PyObject_HasAttrString(mgr, "blocks")) { + ret = (get_attr_length(mgr, "blocks") <= 1); + } else { + ret = 0; + } + + Py_DECREF(mgr); + return ret; +} + +static npy_int64 get_long_attr(PyObject *o, const char *attr) { + npy_int64 long_val; + PyObject *value = PyObject_GetAttrString(o, attr); + long_val = + (PyLong_Check(value) ? PyLong_AsLongLong(value) : PyLong_AsLong(value)); + Py_DECREF(value); + return long_val; +} + +static npy_float64 total_seconds(PyObject *td) { + npy_float64 double_val; + PyObject *value = PyObject_CallMethod(td, "total_seconds", NULL); + double_val = PyFloat_AS_DOUBLE(value); + Py_DECREF(value); + return double_val; +} + +static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc), + size_t *_outLen) { + PyObject *obj = (PyObject *)_obj; + *_outLen = PyBytes_GET_SIZE(obj); + return PyBytes_AS_STRING(obj); +} + +static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc), + size_t *_outLen) { + return (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen); +} + +/* JSON callback. returns a char* and mutates the pointer to *len */ +static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused), + JSONTypeContext *tc, size_t *len) { + NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + return int64ToIso(GET_TC(tc)->longValue, base, len); +} + +/* JSON callback. returns a char* and mutates the pointer to *len */ +static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused), + JSONTypeContext *tc, size_t *len) { + return int64ToIsoDuration(GET_TC(tc)->longValue, len); +} + +/* JSON callback */ +static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc, + size_t *len) { + if (!PyDate_Check(obj)) { + PyErr_SetString(PyExc_TypeError, "Expected date object"); + return NULL; + } + + NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + return PyDateTimeToIso(obj, base, len); +} + +static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) { + PyObject *obj = (PyObject *)_obj; + PyObject *str; + PyObject *tmp; + + str = PyObject_CallMethod(obj, "isoformat", NULL); + if (str == NULL) { + *outLen = 0; + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "Failed to convert time"); + } + ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + return NULL; + } + if (PyUnicode_Check(str)) { + tmp = str; + str = PyUnicode_AsUTF8String(str); + Py_DECREF(tmp); + } + + GET_TC(tc)->newObj = str; + + *outLen = PyBytes_GET_SIZE(str); + char *outValue = PyBytes_AS_STRING(str); + return outValue; +} + +//============================================================================= +// Numpy array iteration functions +//============================================================================= + +static void NpyArr_freeItemValue(JSOBJ Py_UNUSED(_obj), JSONTypeContext *tc) { + if (GET_TC(tc)->npyarr && + GET_TC(tc)->itemValue != GET_TC(tc)->npyarr->array) { + Py_XDECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } +} + +int NpyArr_iterNextNone(JSOBJ Py_UNUSED(_obj), JSONTypeContext *Py_UNUSED(tc)) { + return 0; +} + +void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { + PyArrayObject *obj; + NpyArrContext *npyarr; + + if (GET_TC(tc)->newObj) { + obj = (PyArrayObject *)GET_TC(tc)->newObj; + } else { + obj = (PyArrayObject *)_obj; + } + + npyarr = PyObject_Malloc(sizeof(NpyArrContext)); + GET_TC(tc)->npyarr = npyarr; + + if (!npyarr) { + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + + npyarr->array = (PyObject *)obj; + npyarr->getitem = (PyArray_GetItemFunc *)PyArray_DESCR(obj)->f->getitem; + npyarr->dataptr = PyArray_DATA(obj); + npyarr->ndim = PyArray_NDIM(obj) - 1; + npyarr->curdim = 0; + npyarr->type_num = PyArray_DESCR(obj)->type_num; + + if (GET_TC(tc)->transpose) { + npyarr->dim = PyArray_DIM(obj, npyarr->ndim); + npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim); + npyarr->stridedim = npyarr->ndim; + npyarr->index[npyarr->ndim] = 0; + npyarr->inc = -1; + } else { + npyarr->dim = PyArray_DIM(obj, 0); + npyarr->stride = PyArray_STRIDE(obj, 0); + npyarr->stridedim = 0; + npyarr->index[0] = 0; + npyarr->inc = 1; + } + + npyarr->columnLabels = GET_TC(tc)->columnLabels; + npyarr->rowLabels = GET_TC(tc)->rowLabels; +} + +void NpyArr_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + + if (npyarr) { + NpyArr_freeItemValue(obj, tc); + PyObject_Free(npyarr); + } +} + +void NpyArrPassThru_iterBegin(JSOBJ Py_UNUSED(obj), + JSONTypeContext *Py_UNUSED(tc)) {} + +void NpyArrPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + // finished this dimension, reset the data pointer + npyarr->curdim--; + npyarr->dataptr -= npyarr->stride * npyarr->index[npyarr->stridedim]; + npyarr->stridedim -= npyarr->inc; + npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); + npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); + npyarr->dataptr += npyarr->stride; + + NpyArr_freeItemValue(obj, tc); +} + +int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + + if (PyErr_Occurred()) { + return 0; + } + + if (npyarr->index[npyarr->stridedim] >= npyarr->dim) { + return 0; + } + + NpyArr_freeItemValue(obj, tc); + + if (PyArray_ISDATETIME(npyarr->array)) { + GET_TC(tc)->itemValue = obj; + Py_INCREF(obj); + ((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array); + ((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr; + ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; + } else { + GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array); + } + + npyarr->dataptr += npyarr->stride; + npyarr->index[npyarr->stridedim]++; + return 1; +} + +int NpyArr_iterNext(JSOBJ _obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + + if (PyErr_Occurred()) { + return 0; + } + + if (npyarr->curdim >= npyarr->ndim || + npyarr->index[npyarr->stridedim] >= npyarr->dim) { + // innermost dimension, start retrieving item values + GET_TC(tc)->iterNext = NpyArr_iterNextItem; + return NpyArr_iterNextItem(_obj, tc); + } + + // dig a dimension deeper + npyarr->index[npyarr->stridedim]++; + + npyarr->curdim++; + npyarr->stridedim += npyarr->inc; + npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); + npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); + npyarr->index[npyarr->stridedim] = 0; + + ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; + GET_TC(tc)->itemValue = npyarr->array; + return 1; +} + +JSOBJ NpyArr_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *NpyArr_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + npy_intp idx; + char *cStr; + + if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) { + idx = npyarr->index[npyarr->stridedim] - 1; + cStr = npyarr->columnLabels[idx]; + } else { + idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1; + cStr = npyarr->rowLabels[idx]; + } + + *outLen = strlen(cStr); + + return cStr; +} + +//============================================================================= +// Pandas block iteration functions +// +// Serialises a DataFrame column by column to avoid unnecessary data copies and +// more representative serialisation when dealing with mixed dtypes. +// +// Uses a dedicated NpyArrContext for each column. +//============================================================================= + +void PdBlockPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + + if (blkCtxt->transpose) { + blkCtxt->colIdx++; + } else { + blkCtxt->colIdx = 0; + } + + NpyArr_freeItemValue(obj, tc); +} + +int PdBlock_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + + if (blkCtxt->colIdx >= blkCtxt->ncols) { + return 0; + } + + GET_TC(tc)->npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; + blkCtxt->colIdx++; + return NpyArr_iterNextItem(obj, tc); +} + +char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + NpyArrContext *npyarr = blkCtxt->npyCtxts[0]; + npy_intp idx; + char *cStr; + + if (GET_TC(tc)->iterNext == PdBlock_iterNextItem) { + idx = blkCtxt->colIdx - 1; + cStr = npyarr->columnLabels[idx]; + } else { + idx = GET_TC(tc)->iterNext != PdBlock_iterNext + ? npyarr->index[npyarr->stridedim - npyarr->inc] - 1 + : npyarr->index[npyarr->stridedim]; + + cStr = npyarr->rowLabels[idx]; + } + + *outLen = strlen(cStr); + return cStr; +} + +char *PdBlock_iterGetName_Transpose(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + NpyArrContext *npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; + npy_intp idx; + char *cStr; + + if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) { + idx = npyarr->index[npyarr->stridedim] - 1; + cStr = npyarr->columnLabels[idx]; + } else { + idx = blkCtxt->colIdx; + cStr = npyarr->rowLabels[idx]; + } + + *outLen = strlen(cStr); + return cStr; +} + +int PdBlock_iterNext(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + NpyArrContext *npyarr; + + if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) { + return 0; + } + + if (blkCtxt->transpose) { + if (blkCtxt->colIdx >= blkCtxt->ncols) { + return 0; + } + } else { + npyarr = blkCtxt->npyCtxts[0]; + if (npyarr->index[npyarr->stridedim] >= npyarr->dim) { + return 0; + } + } + + ((PyObjectEncoder *)tc->encoder)->blkCtxtPassthru = blkCtxt; + GET_TC(tc)->itemValue = obj; + + return 1; +} + +void PdBlockPassThru_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + + if (blkCtxt->transpose) { + // if transposed we exhaust each column before moving to the next + GET_TC(tc)->iterNext = NpyArr_iterNextItem; + GET_TC(tc)->iterGetName = PdBlock_iterGetName_Transpose; + GET_TC(tc)->npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; + } +} + +void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { + PyObject *obj, *values, *arrays, *array; + PdBlockContext *blkCtxt; + NpyArrContext *npyarr; + Py_ssize_t i; + + obj = (PyObject *)_obj; + + GET_TC(tc)->iterGetName = GET_TC(tc)->transpose + ? PdBlock_iterGetName_Transpose + : PdBlock_iterGetName; + + blkCtxt = PyObject_Malloc(sizeof(PdBlockContext)); + if (!blkCtxt) { + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + GET_TC(tc)->pdblock = blkCtxt; + + blkCtxt->colIdx = 0; + blkCtxt->transpose = GET_TC(tc)->transpose; + blkCtxt->ncols = get_attr_length(obj, "columns"); + + if (blkCtxt->ncols == 0) { + blkCtxt->npyCtxts = NULL; + + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + + blkCtxt->npyCtxts = + PyObject_Malloc(sizeof(NpyArrContext *) * blkCtxt->ncols); + if (!blkCtxt->npyCtxts) { + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + + arrays = get_sub_attr(obj, "_mgr", "column_arrays"); + if (!arrays) { + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + + for (i = 0; i < PyObject_Length(arrays); i++) { + array = PyList_GET_ITEM(arrays, i); + if (!array) { + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto ARR_RET; + } + + // ensure we have a numpy array (i.e. np.asarray) + values = PyObject_CallMethod(array, "__array__", NULL); + if ((!values) || (!PyArray_CheckExact(values))) { + // Didn't get a numpy array + ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto ARR_RET; + } + + GET_TC(tc)->newObj = values; + + // init a dedicated context for this column + NpyArr_iterBegin(obj, tc); + npyarr = GET_TC(tc)->npyarr; + + GET_TC(tc)->itemValue = NULL; + ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = NULL; + + blkCtxt->npyCtxts[i] = npyarr; + GET_TC(tc)->newObj = NULL; + } + GET_TC(tc)->npyarr = blkCtxt->npyCtxts[0]; + goto ARR_RET; + +ARR_RET: + Py_DECREF(arrays); +} + +void PdBlock_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt; + NpyArrContext *npyarr; + int i; + + GET_TC(tc)->itemValue = NULL; + npyarr = GET_TC(tc)->npyarr; + + blkCtxt = GET_TC(tc)->pdblock; + + if (blkCtxt) { + for (i = 0; i < blkCtxt->ncols; i++) { + npyarr = blkCtxt->npyCtxts[i]; + if (npyarr) { + if (npyarr->array) { + Py_DECREF(npyarr->array); + npyarr->array = NULL; + } + + GET_TC(tc)->npyarr = npyarr; + NpyArr_iterEnd(obj, tc); + + blkCtxt->npyCtxts[i] = NULL; + } + } + + if (blkCtxt->npyCtxts) { + PyObject_Free(blkCtxt->npyCtxts); + } + PyObject_Free(blkCtxt); + } +} + +//============================================================================= +// Tuple iteration functions +// itemValue is borrowed reference, no ref counting +//============================================================================= +void Tuple_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyTuple_GET_SIZE((PyObject *)obj); + GET_TC(tc)->itemValue = NULL; +} + +int Tuple_iterNext(JSOBJ obj, JSONTypeContext *tc) { + PyObject *item; + + if (GET_TC(tc)->index >= GET_TC(tc)->size) { + return 0; + } + + item = PyTuple_GET_ITEM(obj, GET_TC(tc)->index); + + GET_TC(tc)->itemValue = item; + GET_TC(tc)->index++; + return 1; +} + +void Tuple_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {} + +JSOBJ Tuple_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Tuple_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), + size_t *Py_UNUSED(outLen)) { + return NULL; +} + +//============================================================================= +// Set iteration functions +// itemValue is borrowed reference, no ref counting +//============================================================================= +void Set_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->itemValue = NULL; + GET_TC(tc)->iterator = PyObject_GetIter(obj); +} + +int Set_iterNext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObject *item; + + if (GET_TC(tc)->itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + item = PyIter_Next(GET_TC(tc)->iterator); + + if (item == NULL) { + return 0; + } + + GET_TC(tc)->itemValue = item; + return 1; +} + +void Set_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (GET_TC(tc)->itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + if (GET_TC(tc)->iterator) { + Py_DECREF(GET_TC(tc)->iterator); + GET_TC(tc)->iterator = NULL; + } +} + +JSOBJ Set_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Set_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), + size_t *Py_UNUSED(outLen)) { + return NULL; +} + +//============================================================================= +// Dir iteration functions +// itemName ref is borrowed from PyObject_Dir (attrList). No refcount +// itemValue ref is from PyObject_GetAttr. Ref counted +//============================================================================= +void Dir_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->attrList = PyObject_Dir(obj); + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyList_GET_SIZE(GET_TC(tc)->attrList); +} + +void Dir_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (GET_TC(tc)->itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + if (GET_TC(tc)->itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + + Py_DECREF((PyObject *)GET_TC(tc)->attrList); +} + +int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) { + PyObject *obj = (PyObject *)_obj; + PyObject *itemValue = GET_TC(tc)->itemValue; + PyObject *itemName = GET_TC(tc)->itemName; + PyObject *attr; + PyObject *attrName; + char *attrStr; + + if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) { + return 0; + } + + if (itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = itemValue = NULL; + } + + if (itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = itemName = NULL; + } + + for (; GET_TC(tc)->index < GET_TC(tc)->size; GET_TC(tc)->index++) { + attrName = PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index); + attr = PyUnicode_AsUTF8String(attrName); + attrStr = PyBytes_AS_STRING(attr); + + if (attrStr[0] == '_') { + Py_DECREF(attr); + continue; + } + + itemValue = PyObject_GetAttr(obj, attrName); + if (itemValue == NULL) { + PyErr_Clear(); + Py_DECREF(attr); + continue; + } + + if (PyCallable_Check(itemValue)) { + Py_DECREF(itemValue); + Py_DECREF(attr); + continue; + } + + GET_TC(tc)->itemName = itemName; + GET_TC(tc)->itemValue = itemValue; + + itemName = attr; + break; + } + + if (itemName == NULL) { + GET_TC(tc)->index = GET_TC(tc)->size; + GET_TC(tc)->itemValue = NULL; + return 0; + } + + GET_TC(tc)->itemName = itemName; + GET_TC(tc)->itemValue = itemValue; + GET_TC(tc)->index++; + + return 1; +} + +JSOBJ Dir_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Dir_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName); + return PyBytes_AS_STRING(GET_TC(tc)->itemName); +} + +//============================================================================= +// List iteration functions +// itemValue is borrowed from object (which is list). No refcounting +//============================================================================= +void List_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyList_GET_SIZE((PyObject *)obj); +} + +int List_iterNext(JSOBJ obj, JSONTypeContext *tc) { + if (GET_TC(tc)->index >= GET_TC(tc)->size) { + return 0; + } + + GET_TC(tc)->itemValue = PyList_GET_ITEM(obj, GET_TC(tc)->index); + GET_TC(tc)->index++; + return 1; +} + +void List_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {} + +JSOBJ List_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *List_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), + size_t *Py_UNUSED(outLen)) { + return NULL; +} + +//============================================================================= +// pandas Index iteration functions +//============================================================================= +void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + GET_TC(tc)->index = 0; + GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } +} + +int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) { + Py_ssize_t index; + if (!GET_TC(tc)->cStr) { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) { + memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); + } else if (index == 1) { + memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); + GET_TC(tc)->itemValue = get_values(obj); + if (!GET_TC(tc)->itemValue) { + return 0; + } + } else { + return 0; + } + + GET_TC(tc)->index++; + return 1; +} + +void Index_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {} + +JSOBJ Index_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = strlen(GET_TC(tc)->cStr); + return GET_TC(tc)->cStr; +} + +//============================================================================= +// pandas Series iteration functions +//============================================================================= +void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + GET_TC(tc)->index = 0; + GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); + enc->outputFormat = VALUES; // for contained series + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } +} + +int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) { + Py_ssize_t index; + if (!GET_TC(tc)->cStr) { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) { + memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); + } else if (index == 1) { + memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); + } else if (index == 2) { + memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); + GET_TC(tc)->itemValue = get_values(obj); + if (!GET_TC(tc)->itemValue) { + return 0; + } + } else { + return 0; + } + + GET_TC(tc)->index++; + return 1; +} + +void Series_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + enc->outputFormat = enc->originalOutputFormat; +} + +JSOBJ Series_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = strlen(GET_TC(tc)->cStr); + return GET_TC(tc)->cStr; +} + +//============================================================================= +// pandas DataFrame iteration functions +//============================================================================= +void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + GET_TC(tc)->index = 0; + GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); + enc->outputFormat = VALUES; // for contained series & index + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } +} + +int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) { + Py_ssize_t index; + if (!GET_TC(tc)->cStr) { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) { + memcpy(GET_TC(tc)->cStr, "columns", sizeof(char) * 8); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns"); + } else if (index == 1) { + memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); + } else if (index == 2) { + memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); + if (is_simple_frame(obj)) { + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values"); + if (!GET_TC(tc)->itemValue) { + return 0; + } + } else { + Py_INCREF(obj); + GET_TC(tc)->itemValue = obj; + } + } else { + return 0; + } + + GET_TC(tc)->index++; + return 1; +} + +void DataFrame_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + enc->outputFormat = enc->originalOutputFormat; +} + +JSOBJ DataFrame_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *DataFrame_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = strlen(GET_TC(tc)->cStr); + return GET_TC(tc)->cStr; +} + +//============================================================================= +// Dict iteration functions +// itemName might converted to string (Python_Str). Do refCounting +// itemValue is borrowed from object (which is dict). No refCounting +//============================================================================= +void Dict_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + GET_TC(tc)->index = 0; +} + +int Dict_iterNext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObject *itemNameTmp; + + if (GET_TC(tc)->itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + + if (!PyDict_Next((PyObject *)GET_TC(tc)->dictObj, &GET_TC(tc)->index, + &GET_TC(tc)->itemName, &GET_TC(tc)->itemValue)) { + return 0; + } + + if (PyUnicode_Check(GET_TC(tc)->itemName)) { + GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName); + } else if (!PyBytes_Check(GET_TC(tc)->itemName)) { + GET_TC(tc)->itemName = PyObject_Str(GET_TC(tc)->itemName); + itemNameTmp = GET_TC(tc)->itemName; + GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName); + Py_DECREF(itemNameTmp); + } else { + Py_INCREF(GET_TC(tc)->itemName); + } + return 1; +} + +void Dict_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (GET_TC(tc)->itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + Py_DECREF(GET_TC(tc)->dictObj); +} + +JSOBJ Dict_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Dict_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName); + return PyBytes_AS_STRING(GET_TC(tc)->itemName); +} + +void NpyArr_freeLabels(char **labels, npy_intp len) { + npy_intp i; + + if (labels) { + for (i = 0; i < len; i++) { + PyObject_Free(labels[i]); + } + PyObject_Free(labels); + } +} + +/* + * Function: NpyArr_encodeLabels + * ----------------------------- + * + * Builds an array of "encoded" labels. + * + * labels: PyArrayObject pointer for labels to be "encoded" + * num : number of labels + * + * "encode" is quoted above because we aren't really doing encoding + * For historical reasons this function would actually encode the entire + * array into a separate buffer with a separate call to JSON_Encode + * and would leave it to complex pointer manipulation from there to + * unpack values as needed. To make things simpler and more idiomatic + * this has instead just stringified any input save for datetime values, + * which may need to be represented in various formats. + */ +char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, + npy_intp num) { + // NOTE this function steals a reference to labels. + PyObject *item = NULL; + size_t len; + npy_intp i, stride; + char **ret; + char *dataptr, *cLabel; + int type_num; + NPY_DATETIMEUNIT base = enc->datetimeUnit; + + if (!labels) { + return 0; + } + + if (PyArray_SIZE(labels) < num) { + PyErr_SetString( + PyExc_ValueError, + "Label array sizes do not match corresponding data shape"); + Py_DECREF(labels); + return 0; + } + + ret = PyObject_Malloc(sizeof(char *) * num); + if (!ret) { + PyErr_NoMemory(); + Py_DECREF(labels); + return 0; + } + + for (i = 0; i < num; i++) { + ret[i] = NULL; + } + + stride = PyArray_STRIDE(labels, 0); + dataptr = PyArray_DATA(labels); + type_num = PyArray_TYPE(labels); + + for (i = 0; i < num; i++) { + item = PyArray_GETITEM(labels, dataptr); + if (!item) { + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + int is_datetimelike = 0; + npy_int64 nanosecVal; + if (PyTypeNum_ISDATETIME(type_num)) { + is_datetimelike = 1; + PyArray_VectorUnaryFunc *castfunc = + PyArray_GetCastFunc(PyArray_DescrFromType(type_num), NPY_INT64); + if (!castfunc) { + PyErr_Format(PyExc_ValueError, + "Cannot cast numpy dtype %d to long", + enc->npyType); + } + castfunc(dataptr, &nanosecVal, 1, NULL, NULL); + } else if (PyDate_Check(item) || PyDelta_Check(item)) { + is_datetimelike = 1; + if (PyObject_HasAttrString(item, "value")) { + nanosecVal = get_long_attr(item, "value"); + } else { + if (PyDelta_Check(item)) { + nanosecVal = total_seconds(item) * + 1000000000LL; // nanoseconds per second + } else { + // datetime.* objects don't follow above rules + nanosecVal = PyDateTimeToEpoch(item, NPY_FR_ns); + } + } + } + + if (is_datetimelike) { + if (nanosecVal == get_nat()) { + len = 4; + cLabel = PyObject_Malloc(len + 1); + strncpy(cLabel, "null", len + 1); + } else { + if (enc->datetimeIso) { + if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) { + cLabel = int64ToIsoDuration(nanosecVal, &len); + } else { + if (type_num == NPY_DATETIME) { + cLabel = int64ToIso(nanosecVal, base, &len); + } else { + cLabel = PyDateTimeToIso(item, base, &len); + } + } + if (cLabel == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + } else { + int size_of_cLabel = 21; // 21 chars for int 64 + cLabel = PyObject_Malloc(size_of_cLabel); + snprintf(cLabel, size_of_cLabel, "%" NPY_DATETIME_FMT, + NpyDateTimeToEpoch(nanosecVal, base)); + len = strlen(cLabel); + } + } + } else { // Fallback to string representation + // Replace item with the string to keep it alive. + Py_SETREF(item, PyObject_Str(item)); + if (item == NULL) { + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + cLabel = (char *)PyUnicode_AsUTF8(item); + len = strlen(cLabel); + } + + // Add 1 to include NULL terminator + ret[i] = PyObject_Malloc(len + 1); + memcpy(ret[i], cLabel, len + 1); + Py_DECREF(item); + + if (is_datetimelike) { + PyObject_Free(cLabel); + } + + if (PyErr_Occurred()) { + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + if (!ret[i]) { + PyErr_NoMemory(); + ret = 0; + break; + } + + dataptr += stride; + } + + Py_DECREF(labels); + return ret; +} + +void Object_invokeDefaultHandler(PyObject *obj, PyObjectEncoder *enc) { + PyObject *tmpObj = NULL; + tmpObj = PyObject_CallFunctionObjArgs(enc->defaultHandler, obj, NULL); + if (!PyErr_Occurred()) { + if (tmpObj == NULL) { + PyErr_SetString(PyExc_TypeError, + "Failed to execute default handler"); + } else { + encode(tmpObj, (JSONObjectEncoder *)enc, NULL, 0); + } + } + Py_XDECREF(tmpObj); + return; +} + +void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { + PyObject *obj, *exc, *toDictFunc, *tmpObj, *values; + TypeContext *pc; + PyObjectEncoder *enc; + double val; + npy_int64 value; + int unit; + + tc->prv = NULL; + + if (!_obj) { + tc->type = JT_INVALID; + return; + } + + obj = (PyObject *)_obj; + enc = (PyObjectEncoder *)tc->encoder; + + if (PyBool_Check(obj)) { + tc->type = (obj == Py_True) ? JT_TRUE : JT_FALSE; + return; + } else if (obj == Py_None) { + tc->type = JT_NULL; + return; + } + + pc = createTypeContext(); + if (!pc) { + tc->type = JT_INVALID; + return; + } + tc->prv = pc; + + if (PyTypeNum_ISDATETIME(enc->npyType)) { + int64_t longVal; + PyArray_VectorUnaryFunc *castfunc = + PyArray_GetCastFunc(PyArray_DescrFromType(enc->npyType), NPY_INT64); + if (!castfunc) { + PyErr_Format(PyExc_ValueError, "Cannot cast numpy dtype %d to long", + enc->npyType); + } + castfunc(enc->npyValue, &longVal, 1, NULL, NULL); + if (longVal == get_nat()) { + tc->type = JT_NULL; + } else { + if (enc->datetimeIso) { + if (enc->npyType == NPY_TIMEDELTA) { + pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback; + } else { + pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback; + } + // Currently no way to pass longVal to iso function, so use + // state management + GET_TC(tc)->longValue = longVal; + tc->type = JT_UTF8; + } else { + NPY_DATETIMEUNIT base = + ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + GET_TC(tc)->longValue = NpyDateTimeToEpoch(longVal, base); + tc->type = JT_LONG; + } + } + + // TODO(username): this prevents infinite loop with + // mixed-type DataFrames; + // refactor + enc->npyCtxtPassthru = NULL; + enc->npyType = -1; + return; + } + + if (PyIter_Check(obj) || + (PyArray_Check(obj) && !PyArray_CheckScalar(obj))) { + goto ISITERABLE; + } + + if (PyLong_Check(obj)) { + tc->type = JT_LONG; + int overflow = 0; + GET_TC(tc)->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow); + int err; + err = (GET_TC(tc)->longValue == -1) && PyErr_Occurred(); + + if (overflow) { + tc->type = JT_BIGNUM; + } else if (err) { + goto INVALID; + } + + return; + } else if (PyFloat_Check(obj)) { + val = PyFloat_AS_DOUBLE(obj); + if (npy_isnan(val) || npy_isinf(val)) { + tc->type = JT_NULL; + } else { + GET_TC(tc)->doubleValue = val; + tc->type = JT_DOUBLE; + } + return; + } else if (PyBytes_Check(obj)) { + pc->PyTypeToUTF8 = PyBytesToUTF8; + tc->type = JT_UTF8; + return; + } else if (PyUnicode_Check(obj)) { + pc->PyTypeToUTF8 = PyUnicodeToUTF8; + tc->type = JT_UTF8; + return; + } else if (PyObject_TypeCheck(obj, type_decimal)) { + GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj); + tc->type = JT_DOUBLE; + return; + } else if (PyDateTime_Check(obj) || PyDate_Check(obj)) { + if (PyObject_TypeCheck(obj, cls_nat)) { + tc->type = JT_NULL; + return; + } + + if (enc->datetimeIso) { + pc->PyTypeToUTF8 = PyDateTimeToIsoCallback; + tc->type = JT_UTF8; + } else { + NPY_DATETIMEUNIT base = + ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); + tc->type = JT_LONG; + } + return; + } else if (PyTime_Check(obj)) { + pc->PyTypeToUTF8 = PyTimeToJSON; + tc->type = JT_UTF8; + return; + } else if (PyArray_IsScalar(obj, Datetime)) { + if (((PyDatetimeScalarObject *)obj)->obval == get_nat()) { + tc->type = JT_NULL; + return; + } + + if (enc->datetimeIso) { + pc->PyTypeToUTF8 = PyDateTimeToIsoCallback; + tc->type = JT_UTF8; + } else { + NPY_DATETIMEUNIT base = + ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); + tc->type = JT_LONG; + } + return; + } else if (PyDelta_Check(obj)) { + if (PyObject_HasAttrString(obj, "value")) { + value = get_long_attr(obj, "value"); + } else { + value = total_seconds(obj) * 1000000000LL; // nanoseconds per sec + } + + if (value == get_nat()) { + tc->type = JT_NULL; + return; + } else if (enc->datetimeIso) { + pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback; + tc->type = JT_UTF8; + } else { + unit = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + if (scaleNanosecToUnit(&value, unit) != 0) { + // TODO(username): Add some kind of error handling here + } + + exc = PyErr_Occurred(); + + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { + goto INVALID; + } + + tc->type = JT_LONG; + } + GET_TC(tc)->longValue = value; + return; + } else if (PyArray_IsScalar(obj, Integer)) { + tc->type = JT_LONG; + PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), + PyArray_DescrFromType(NPY_INT64)); + + exc = PyErr_Occurred(); + + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { + goto INVALID; + } + + return; + } else if (PyArray_IsScalar(obj, Bool)) { + PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), + PyArray_DescrFromType(NPY_BOOL)); + tc->type = (GET_TC(tc)->longValue) ? JT_TRUE : JT_FALSE; + return; + } else if (PyArray_IsScalar(obj, Float) || PyArray_IsScalar(obj, Double)) { + PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->doubleValue), + PyArray_DescrFromType(NPY_DOUBLE)); + tc->type = JT_DOUBLE; + return; + } else if (PyArray_Check(obj) && PyArray_CheckScalar(obj)) { + PyErr_Format(PyExc_TypeError, + "%R (0d array) is not JSON serializable at the moment", + obj); + goto INVALID; + } else if (PyObject_TypeCheck(obj, cls_na)) { + tc->type = JT_NULL; + return; + } + +ISITERABLE: + + if (PyObject_TypeCheck(obj, cls_index)) { + if (enc->outputFormat == SPLIT) { + tc->type = JT_OBJECT; + pc->iterBegin = Index_iterBegin; + pc->iterEnd = Index_iterEnd; + pc->iterNext = Index_iterNext; + pc->iterGetValue = Index_iterGetValue; + pc->iterGetName = Index_iterGetName; + return; + } + + pc->newObj = get_values(obj); + if (pc->newObj) { + tc->type = JT_ARRAY; + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + } else { + goto INVALID; + } + + return; + } else if (PyObject_TypeCheck(obj, cls_series)) { + if (enc->outputFormat == SPLIT) { + tc->type = JT_OBJECT; + pc->iterBegin = Series_iterBegin; + pc->iterEnd = Series_iterEnd; + pc->iterNext = Series_iterNext; + pc->iterGetValue = Series_iterGetValue; + pc->iterGetName = Series_iterGetName; + return; + } + + pc->newObj = get_values(obj); + if (!pc->newObj) { + goto INVALID; + } + + if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) { + tc->type = JT_OBJECT; + tmpObj = PyObject_GetAttrString(obj, "index"); + if (!tmpObj) { + goto INVALID; + } + values = get_values(tmpObj); + Py_DECREF(tmpObj); + if (!values) { + goto INVALID; + } + pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->columnLabelsLen); + if (!pc->columnLabels) { + goto INVALID; + } + } else { + tc->type = JT_ARRAY; + } + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + return; + } else if (PyArray_Check(obj)) { + if (enc->npyCtxtPassthru) { + pc->npyarr = enc->npyCtxtPassthru; + tc->type = (pc->npyarr->columnLabels ? JT_OBJECT : JT_ARRAY); + + pc->iterBegin = NpyArrPassThru_iterBegin; + pc->iterNext = NpyArr_iterNext; + pc->iterEnd = NpyArrPassThru_iterEnd; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + + enc->npyCtxtPassthru = NULL; + return; + } + + tc->type = JT_ARRAY; + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + return; + } else if (PyObject_TypeCheck(obj, cls_dataframe)) { + if (enc->blkCtxtPassthru) { + pc->pdblock = enc->blkCtxtPassthru; + tc->type = + (pc->pdblock->npyCtxts[0]->columnLabels ? JT_OBJECT : JT_ARRAY); + + pc->iterBegin = PdBlockPassThru_iterBegin; + pc->iterEnd = PdBlockPassThru_iterEnd; + pc->iterNext = PdBlock_iterNextItem; + pc->iterGetName = PdBlock_iterGetName; + pc->iterGetValue = NpyArr_iterGetValue; + + enc->blkCtxtPassthru = NULL; + return; + } + + if (enc->outputFormat == SPLIT) { + tc->type = JT_OBJECT; + pc->iterBegin = DataFrame_iterBegin; + pc->iterEnd = DataFrame_iterEnd; + pc->iterNext = DataFrame_iterNext; + pc->iterGetValue = DataFrame_iterGetValue; + pc->iterGetName = DataFrame_iterGetName; + return; + } + + if (is_simple_frame(obj)) { + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetName = NpyArr_iterGetName; + + pc->newObj = PyObject_GetAttrString(obj, "values"); + if (!pc->newObj) { + goto INVALID; + } + } else { + pc->iterBegin = PdBlock_iterBegin; + pc->iterEnd = PdBlock_iterEnd; + pc->iterNext = PdBlock_iterNext; + pc->iterGetName = PdBlock_iterGetName; + } + pc->iterGetValue = NpyArr_iterGetValue; + + if (enc->outputFormat == VALUES) { + tc->type = JT_ARRAY; + } else if (enc->outputFormat == RECORDS) { + tc->type = JT_ARRAY; + tmpObj = PyObject_GetAttrString(obj, "columns"); + if (!tmpObj) { + goto INVALID; + } + values = get_values(tmpObj); + if (!values) { + Py_DECREF(tmpObj); + goto INVALID; + } + pc->columnLabelsLen = PyObject_Size(tmpObj); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->columnLabelsLen); + Py_DECREF(tmpObj); + if (!pc->columnLabels) { + goto INVALID; + } + } else if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) { + tc->type = JT_OBJECT; + tmpObj = (enc->outputFormat == INDEX + ? PyObject_GetAttrString(obj, "index") + : PyObject_GetAttrString(obj, "columns")); + if (!tmpObj) { + goto INVALID; + } + values = get_values(tmpObj); + if (!values) { + Py_DECREF(tmpObj); + goto INVALID; + } + pc->rowLabelsLen = PyObject_Size(tmpObj); + pc->rowLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->rowLabelsLen); + Py_DECREF(tmpObj); + tmpObj = (enc->outputFormat == INDEX + ? PyObject_GetAttrString(obj, "columns") + : PyObject_GetAttrString(obj, "index")); + if (!tmpObj) { + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + goto INVALID; + } + values = get_values(tmpObj); + if (!values) { + Py_DECREF(tmpObj); + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + goto INVALID; + } + pc->columnLabelsLen = PyObject_Size(tmpObj); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->columnLabelsLen); + Py_DECREF(tmpObj); + if (!pc->columnLabels) { + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + goto INVALID; + } + + if (enc->outputFormat == COLUMNS) { + pc->transpose = 1; + } + } else { + goto INVALID; + } + return; + } else if (PyDict_Check(obj)) { + tc->type = JT_OBJECT; + pc->iterBegin = Dict_iterBegin; + pc->iterEnd = Dict_iterEnd; + pc->iterNext = Dict_iterNext; + pc->iterGetValue = Dict_iterGetValue; + pc->iterGetName = Dict_iterGetName; + pc->dictObj = obj; + Py_INCREF(obj); + + return; + } else if (PyList_Check(obj)) { + tc->type = JT_ARRAY; + pc->iterBegin = List_iterBegin; + pc->iterEnd = List_iterEnd; + pc->iterNext = List_iterNext; + pc->iterGetValue = List_iterGetValue; + pc->iterGetName = List_iterGetName; + return; + } else if (PyTuple_Check(obj)) { + tc->type = JT_ARRAY; + pc->iterBegin = Tuple_iterBegin; + pc->iterEnd = Tuple_iterEnd; + pc->iterNext = Tuple_iterNext; + pc->iterGetValue = Tuple_iterGetValue; + pc->iterGetName = Tuple_iterGetName; + return; + } else if (PyAnySet_Check(obj)) { + tc->type = JT_ARRAY; + pc->iterBegin = Set_iterBegin; + pc->iterEnd = Set_iterEnd; + pc->iterNext = Set_iterNext; + pc->iterGetValue = Set_iterGetValue; + pc->iterGetName = Set_iterGetName; + return; + } + + toDictFunc = PyObject_GetAttrString(obj, "toDict"); + + if (toDictFunc) { + PyObject *tuple = PyTuple_New(0); + PyObject *toDictResult = PyObject_Call(toDictFunc, tuple, NULL); + Py_DECREF(tuple); + Py_DECREF(toDictFunc); + + if (toDictResult == NULL) { + PyErr_Clear(); + tc->type = JT_NULL; + return; + } + + if (!PyDict_Check(toDictResult)) { + Py_DECREF(toDictResult); + tc->type = JT_NULL; + return; + } + + tc->type = JT_OBJECT; + pc->iterBegin = Dict_iterBegin; + pc->iterEnd = Dict_iterEnd; + pc->iterNext = Dict_iterNext; + pc->iterGetValue = Dict_iterGetValue; + pc->iterGetName = Dict_iterGetName; + pc->dictObj = toDictResult; + return; + } + + PyErr_Clear(); + + if (enc->defaultHandler) { + Object_invokeDefaultHandler(obj, enc); + goto INVALID; + } + + tc->type = JT_OBJECT; + pc->iterBegin = Dir_iterBegin; + pc->iterEnd = Dir_iterEnd; + pc->iterNext = Dir_iterNext; + pc->iterGetValue = Dir_iterGetValue; + pc->iterGetName = Dir_iterGetName; + return; + +INVALID: + tc->type = JT_INVALID; + PyObject_Free(tc->prv); + tc->prv = NULL; + return; +} + +void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (tc->prv) { + Py_XDECREF(GET_TC(tc)->newObj); + GET_TC(tc)->newObj = NULL; + NpyArr_freeLabels(GET_TC(tc)->rowLabels, GET_TC(tc)->rowLabelsLen); + GET_TC(tc)->rowLabels = NULL; + NpyArr_freeLabels(GET_TC(tc)->columnLabels, + GET_TC(tc)->columnLabelsLen); + GET_TC(tc)->columnLabels = NULL; + PyObject_Free(GET_TC(tc)->cStr); + GET_TC(tc)->cStr = NULL; + PyObject_Free(tc->prv); + tc->prv = NULL; + } +} + +const char *Object_getStringValue(JSOBJ obj, JSONTypeContext *tc, + size_t *_outLen) { + return GET_TC(tc)->PyTypeToUTF8(obj, tc, _outLen); +} + +JSINT64 Object_getLongValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->longValue; +} + +double Object_getDoubleValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->doubleValue; +} + +const char *Object_getBigNumStringValue(JSOBJ obj, JSONTypeContext *tc, + size_t *_outLen) { + PyObject *repr = PyObject_Str(obj); + const char *str = PyUnicode_AsUTF8AndSize(repr, (Py_ssize_t *)_outLen); + char *bytes = PyObject_Malloc(*_outLen + 1); + memcpy(bytes, str, *_outLen + 1); + GET_TC(tc)->cStr = bytes; + + Py_DECREF(repr); + + return GET_TC(tc)->cStr; +} + +static void Object_releaseObject(JSOBJ _obj) { Py_DECREF((PyObject *)_obj); } + +void Object_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->iterBegin(obj, tc); +} + +int Object_iterNext(JSOBJ obj, JSONTypeContext *tc) { + return GET_TC(tc)->iterNext(obj, tc); +} + +void Object_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->iterEnd(obj, tc); +} + +JSOBJ Object_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { + return GET_TC(tc)->iterGetValue(obj, tc); +} + +char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { + return GET_TC(tc)->iterGetName(obj, tc, outLen); +} + +PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, + PyObject *kwargs) { + static char *kwlist[] = {"obj", + "ensure_ascii", + "double_precision", + "encode_html_chars", + "orient", + "date_unit", + "iso_dates", + "default_handler", + "indent", + NULL}; + + char buffer[65536]; + char *ret; + PyObject *newobj; + PyObject *oinput = NULL; + PyObject *oensureAscii = NULL; + int idoublePrecision = 10; // default double precision setting + PyObject *oencodeHTMLChars = NULL; + char *sOrient = NULL; + char *sdateFormat = NULL; + PyObject *oisoDates = 0; + PyObject *odefHandler = 0; + int indent = 0; + + PyObjectEncoder pyEncoder = {{ + Object_beginTypeContext, + Object_endTypeContext, + Object_getStringValue, + Object_getLongValue, + NULL, // getIntValue is unused + Object_getDoubleValue, + Object_getBigNumStringValue, + Object_iterBegin, + Object_iterNext, + Object_iterEnd, + Object_iterGetValue, + Object_iterGetName, + Object_releaseObject, + PyObject_Malloc, + PyObject_Realloc, + PyObject_Free, + -1, // recursionMax + idoublePrecision, + 1, // forceAscii + 0, // encodeHTMLChars + 0, // indent + }}; + JSONObjectEncoder *encoder = (JSONObjectEncoder *)&pyEncoder; + + pyEncoder.npyCtxtPassthru = NULL; + pyEncoder.blkCtxtPassthru = NULL; + pyEncoder.npyType = -1; + pyEncoder.npyValue = NULL; + pyEncoder.datetimeIso = 0; + pyEncoder.datetimeUnit = NPY_FR_ms; + pyEncoder.outputFormat = COLUMNS; + pyEncoder.defaultHandler = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOOi", kwlist, + &oinput, &oensureAscii, &idoublePrecision, + &oencodeHTMLChars, &sOrient, &sdateFormat, + &oisoDates, &odefHandler, &indent)) { + return NULL; + } + + if (oensureAscii != NULL && !PyObject_IsTrue(oensureAscii)) { + encoder->forceASCII = 0; + } + + if (oencodeHTMLChars != NULL && PyObject_IsTrue(oencodeHTMLChars)) { + encoder->encodeHTMLChars = 1; + } + + if (idoublePrecision > JSON_DOUBLE_MAX_DECIMALS || idoublePrecision < 0) { + PyErr_Format( + PyExc_ValueError, + "Invalid value '%d' for option 'double_precision', max is '%u'", + idoublePrecision, JSON_DOUBLE_MAX_DECIMALS); + return NULL; + } + encoder->doublePrecision = idoublePrecision; + + if (sOrient != NULL) { + if (strcmp(sOrient, "records") == 0) { + pyEncoder.outputFormat = RECORDS; + } else if (strcmp(sOrient, "index") == 0) { + pyEncoder.outputFormat = INDEX; + } else if (strcmp(sOrient, "split") == 0) { + pyEncoder.outputFormat = SPLIT; + } else if (strcmp(sOrient, "values") == 0) { + pyEncoder.outputFormat = VALUES; + } else if (strcmp(sOrient, "columns") != 0) { + PyErr_Format(PyExc_ValueError, + "Invalid value '%s' for option 'orient'", sOrient); + return NULL; + } + } + + if (sdateFormat != NULL) { + if (strcmp(sdateFormat, "s") == 0) { + pyEncoder.datetimeUnit = NPY_FR_s; + } else if (strcmp(sdateFormat, "ms") == 0) { + pyEncoder.datetimeUnit = NPY_FR_ms; + } else if (strcmp(sdateFormat, "us") == 0) { + pyEncoder.datetimeUnit = NPY_FR_us; + } else if (strcmp(sdateFormat, "ns") == 0) { + pyEncoder.datetimeUnit = NPY_FR_ns; + } else { + PyErr_Format(PyExc_ValueError, + "Invalid value '%s' for option 'date_unit'", + sdateFormat); + return NULL; + } + } + + if (oisoDates != NULL && PyObject_IsTrue(oisoDates)) { + pyEncoder.datetimeIso = 1; + } + + if (odefHandler != NULL && odefHandler != Py_None) { + if (!PyCallable_Check(odefHandler)) { + PyErr_SetString(PyExc_TypeError, "Default handler is not callable"); + return NULL; + } + pyEncoder.defaultHandler = odefHandler; + } + + encoder->indent = indent; + + pyEncoder.originalOutputFormat = pyEncoder.outputFormat; + ret = JSON_EncodeObject(oinput, encoder, buffer, sizeof(buffer)); + if (PyErr_Occurred()) { + return NULL; + } + + if (encoder->errorMsg) { + if (ret != buffer) { + encoder->free(ret); + } + PyErr_Format(PyExc_OverflowError, "%s", encoder->errorMsg); + return NULL; + } + + newobj = PyUnicode_FromString(ret); + + if (ret != buffer) { + encoder->free(ret); + } + + return newobj; +} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/ujson.c b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/ujson.c new file mode 100644 index 0000000..a8fdb4f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/ujson.c @@ -0,0 +1,81 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#include "version.h" +#define PY_SSIZE_T_CLEAN +#include +#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY +#include "numpy/arrayobject.h" + +/* objToJSON */ +PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs); +void initObjToJSON(void); + +/* JSONToObj */ +PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs); + +#define ENCODER_HELP_TEXT \ + "Use ensure_ascii=false to output UTF-8. Pass in double_precision to " \ + "alter the maximum digit precision of doubles. Set " \ + "encode_html_chars=True to encode < > & as unicode escape sequences." + +static PyMethodDef ujsonMethods[] = { + {"encode", (PyCFunction)objToJSON, METH_VARARGS | METH_KEYWORDS, + "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT}, + {"decode", (PyCFunction)JSONToObj, METH_VARARGS | METH_KEYWORDS, + "Converts JSON as string to dict object structure. Use precise_float=True " + "to use high precision float decoder."}, + {"dumps", (PyCFunction)objToJSON, METH_VARARGS | METH_KEYWORDS, + "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT}, + {"loads", (PyCFunction)JSONToObj, METH_VARARGS | METH_KEYWORDS, + "Converts JSON as string to dict object structure. Use precise_float=True " + "to use high precision float decoder."}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static PyModuleDef moduledef = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "_libjson", + .m_methods = ujsonMethods +}; + + +PyMODINIT_FUNC PyInit_json(void) { + import_array() + initObjToJSON(); // TODO(username): clean up, maybe via tp_free? + return PyModuleDef_Init(&moduledef); +} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/version.h b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/version.h new file mode 100644 index 0000000..3f38642 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/src/ujson/python/version.h @@ -0,0 +1,43 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#ifndef PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ +#define PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ + +#define UJSON_VERSION "1.33" + +#endif // PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/testing.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/testing.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..f2b9895 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/testing.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/testing.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/testing.pyi new file mode 100644 index 0000000..01da496 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/testing.pyi @@ -0,0 +1,12 @@ +def assert_dict_equal(a, b, compare_keys: bool = ...): ... +def assert_almost_equal( + a, + b, + rtol: float = ..., + atol: float = ..., + check_dtype: bool = ..., + obj=..., + lobj=..., + robj=..., + index_values=..., +): ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/testing.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/testing.pyx new file mode 100644 index 0000000..cfe9f40 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/testing.pyx @@ -0,0 +1,208 @@ +import cmath +import math + +import numpy as np + +from numpy cimport import_array + +import_array() + +from pandas._libs.util cimport ( + is_array, + is_complex_object, + is_real_number_object, +) + +from pandas.core.dtypes.common import is_dtype_equal +from pandas.core.dtypes.missing import ( + array_equivalent, + isna, +) + + +cdef bint isiterable(obj): + return hasattr(obj, '__iter__') + + +cdef bint has_length(obj): + return hasattr(obj, '__len__') + + +cdef bint is_dictlike(obj): + return hasattr(obj, 'keys') and hasattr(obj, '__getitem__') + + +cpdef assert_dict_equal(a, b, bint compare_keys=True): + assert is_dictlike(a) and is_dictlike(b), ( + "Cannot compare dict objects, one or both is not dict-like" + ) + + a_keys = frozenset(a.keys()) + b_keys = frozenset(b.keys()) + + if compare_keys: + assert a_keys == b_keys + + for k in a_keys: + assert_almost_equal(a[k], b[k]) + + return True + + +cpdef assert_almost_equal(a, b, + rtol=1.e-5, atol=1.e-8, + bint check_dtype=True, + obj=None, lobj=None, robj=None, index_values=None): + """ + Check that left and right objects are almost equal. + + Parameters + ---------- + a : object + b : object + rtol : float, default 1e-5 + Relative tolerance. + + .. versionadded:: 1.1.0 + atol : float, default 1e-8 + Absolute tolerance. + + .. versionadded:: 1.1.0 + check_dtype: bool, default True + check dtype if both a and b are np.ndarray. + obj : str, default None + Specify object name being compared, internally used to show + appropriate assertion message. + lobj : str, default None + Specify left object name being compared, internally used to show + appropriate assertion message. + robj : str, default None + Specify right object name being compared, internally used to show + appropriate assertion message. + index_values : ndarray, default None + Specify shared index values of objects being compared, internally used + to show appropriate assertion message. + + .. versionadded:: 1.1.0 + + """ + cdef: + double diff = 0.0 + Py_ssize_t i, na, nb + double fa, fb + bint is_unequal = False, a_is_ndarray, b_is_ndarray + + if lobj is None: + lobj = a + if robj is None: + robj = b + + if isinstance(a, dict) or isinstance(b, dict): + return assert_dict_equal(a, b) + + if isinstance(a, str) or isinstance(b, str): + assert a == b, f"{a} != {b}" + return True + + a_is_ndarray = is_array(a) + b_is_ndarray = is_array(b) + + if obj is None: + if a_is_ndarray or b_is_ndarray: + obj = 'numpy array' + else: + obj = 'Iterable' + + if isiterable(a): + + if not isiterable(b): + from pandas._testing import assert_class_equal + + # classes can't be the same, to raise error + assert_class_equal(a, b, obj=obj) + + assert has_length(a) and has_length(b), ( + f"Can't compare objects without length, one or both is invalid: ({a}, {b})" + ) + + if a_is_ndarray and b_is_ndarray: + na, nb = a.size, b.size + if a.shape != b.shape: + from pandas._testing import raise_assert_detail + raise_assert_detail( + obj, f'{obj} shapes are different', a.shape, b.shape) + + if check_dtype and not is_dtype_equal(a.dtype, b.dtype): + from pandas._testing import assert_attr_equal + assert_attr_equal('dtype', a, b, obj=obj) + + if array_equivalent(a, b, strict_nan=True): + return True + + else: + na, nb = len(a), len(b) + + if na != nb: + from pandas._testing import raise_assert_detail + + # if we have a small diff set, print it + if abs(na - nb) < 10: + r = list(set(a) ^ set(b)) + else: + r = None + + raise_assert_detail(obj, f"{obj} length are different", na, nb, r) + + for i in range(len(a)): + try: + assert_almost_equal(a[i], b[i], rtol=rtol, atol=atol) + except AssertionError: + is_unequal = True + diff += 1 + + if is_unequal: + from pandas._testing import raise_assert_detail + msg = (f"{obj} values are different " + f"({np.round(diff * 100.0 / na, 5)} %)") + raise_assert_detail(obj, msg, lobj, robj, index_values=index_values) + + return True + + elif isiterable(b): + from pandas._testing import assert_class_equal + + # classes can't be the same, to raise error + assert_class_equal(a, b, obj=obj) + + if isna(a) and isna(b): + # TODO: Should require same-dtype NA? + # nan / None comparison + return True + + if a == b: + # object comparison + return True + + if is_real_number_object(a) and is_real_number_object(b): + if array_equivalent(a, b, strict_nan=True): + # inf comparison + return True + + fa, fb = a, b + + if not math.isclose(fa, fb, rel_tol=rtol, abs_tol=atol): + assert False, (f"expected {fb:.5f} but got {fa:.5f}, " + f"with rtol={rtol}, atol={atol}") + return True + + if is_complex_object(a) and is_complex_object(b): + if array_equivalent(a, b, strict_nan=True): + # inf comparison + return True + + if not cmath.isclose(a, b, rel_tol=rtol, abs_tol=atol): + assert False, (f"expected {b:.5f} but got {a:.5f}, " + f"with rtol={rtol}, atol={atol}") + return True + + raise AssertionError(f"{a} != {b}") diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslib.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslib.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..9fa32d0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslib.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslib.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslib.pyi new file mode 100644 index 0000000..4b02235 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslib.pyi @@ -0,0 +1,28 @@ +from datetime import tzinfo + +import numpy as np + +from pandas._typing import npt + +def format_array_from_datetime( + values: npt.NDArray[np.int64], + tz: tzinfo | None = ..., + format: str | None = ..., + na_rep: object = ..., +) -> npt.NDArray[np.object_]: ... +def array_with_unit_to_datetime( + values: np.ndarray, + unit: str, + errors: str = ..., +) -> tuple[np.ndarray, tzinfo | None]: ... +def array_to_datetime( + values: npt.NDArray[np.object_], + errors: str = ..., + dayfirst: bool = ..., + yearfirst: bool = ..., + utc: bool = ..., + require_iso8601: bool = ..., + allow_mixed: bool = ..., +) -> tuple[np.ndarray, tzinfo | None]: ... + +# returned ndarray may be object dtype or datetime64[ns] diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslib.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslib.pyx new file mode 100644 index 0000000..2883c91 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslib.pyx @@ -0,0 +1,779 @@ +import warnings + +import cython + +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDateTime_IMPORT, + datetime, + tzinfo, +) + +# import datetime C API +PyDateTime_IMPORT + + +cimport numpy as cnp +from numpy cimport ( + float64_t, + int64_t, + ndarray, +) + +import numpy as np + +cnp.import_array() + +import pytz + +from pandas._libs.tslibs.np_datetime cimport ( + _string_to_dts, + check_dts_bounds, + dt64_to_dtstruct, + dtstruct_to_dt64, + get_datetime64_value, + npy_datetimestruct, + pydate_to_dt64, + pydatetime_to_dt64, +) +from pandas._libs.util cimport ( + is_datetime64_object, + is_float_object, + is_integer_object, +) + +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime +from pandas._libs.tslibs.parsing import parse_datetime_string + +from pandas._libs.tslibs.conversion cimport ( + _TSObject, + cast_from_unit, + convert_datetime_to_tsobject, + get_datetime64_nanos, + precision_from_unit, +) +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, + c_nat_strings as nat_strings, +) +from pandas._libs.tslibs.timestamps cimport _Timestamp + +from pandas._libs.tslibs.timestamps import Timestamp + +# Note: this is the only non-tslibs intra-pandas dependency here + +from pandas._libs.missing cimport checknull_with_nat_and_na +from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single + + +def _test_parse_iso8601(ts: str): + """ + TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used + only for testing, actual construction uses `convert_str_to_tsobject` + """ + cdef: + _TSObject obj + int out_local = 0, out_tzoffset = 0 + + obj = _TSObject() + + if ts == 'now': + return Timestamp.utcnow() + elif ts == 'today': + return Timestamp.now().normalize() + + _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset, True) + obj.value = dtstruct_to_dt64(&obj.dts) + check_dts_bounds(&obj.dts) + if out_local == 1: + obj.tzinfo = pytz.FixedOffset(out_tzoffset) + obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo) + return Timestamp(obj.value, tz=obj.tzinfo) + else: + return Timestamp(obj.value) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def format_array_from_datetime( + ndarray[int64_t] values, + tzinfo tz=None, + str format=None, + object na_rep=None +) -> np.ndarray: + """ + return a np object array of the string formatted values + + Parameters + ---------- + values : a 1-d i8 array + tz : tzinfo or None, default None + format : str or None, default None + a strftime capable string + na_rep : optional, default is None + a nat format + + Returns + ------- + np.ndarray[object] + """ + cdef: + int64_t val, ns, N = len(values) + ndarray[int64_t] consider_values + bint show_ms = False, show_us = False, show_ns = False + bint basic_format = False + ndarray[object] result = np.empty(N, dtype=object) + object ts, res + npy_datetimestruct dts + + if na_rep is None: + na_rep = 'NaT' + + # if we don't have a format nor tz, then choose + # a format based on precision + basic_format = format is None and tz is None + if basic_format: + consider_values = values[values != NPY_NAT] + show_ns = (consider_values % 1000).any() + + if not show_ns: + consider_values //= 1000 + show_us = (consider_values % 1000).any() + + if not show_ms: + consider_values //= 1000 + show_ms = (consider_values % 1000).any() + + for i in range(N): + val = values[i] + + if val == NPY_NAT: + result[i] = na_rep + elif basic_format: + + dt64_to_dtstruct(val, &dts) + res = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} ' + f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}') + + if show_ns: + ns = dts.ps // 1000 + res += f'.{ns + dts.us * 1000:09d}' + elif show_us: + res += f'.{dts.us:06d}' + elif show_ms: + res += f'.{dts.us // 1000:03d}' + + result[i] = res + + else: + + ts = Timestamp(val, tz=tz) + if format is None: + result[i] = str(ts) + else: + + # invalid format string + # requires dates > 1900 + try: + result[i] = ts.strftime(format) + except ValueError: + result[i] = str(ts) + + return result + + +def array_with_unit_to_datetime( + ndarray values, + str unit, + str errors="coerce" +): + """ + Convert the ndarray to datetime according to the time unit. + + This function converts an array of objects into a numpy array of + datetime64[ns]. It returns the converted array + and also returns the timezone offset + + if errors: + - raise: return converted values or raise OutOfBoundsDatetime + if out of range on the conversion or + ValueError for other conversions (e.g. a string) + - ignore: return non-convertible values as the same unit + - coerce: NaT for non-convertibles + + Parameters + ---------- + values : ndarray + Date-like objects to convert. + unit : str + Time unit to use during conversion. + errors : str, default 'raise' + Error behavior when parsing. + + Returns + ------- + result : ndarray of m8 values + tz : parsed timezone offset or None + """ + cdef: + Py_ssize_t i, j, n=len(values) + int64_t m + int prec = 0 + ndarray[float64_t] fvalues + bint is_ignore = errors=='ignore' + bint is_coerce = errors=='coerce' + bint is_raise = errors=='raise' + bint need_to_iterate = True + ndarray[int64_t] iresult + ndarray[object] oresult + ndarray mask + object tz = None + + assert is_ignore or is_coerce or is_raise + + if unit == "ns": + if issubclass(values.dtype.type, (np.integer, np.float_)): + result = values.astype("M8[ns]", copy=False) + else: + result, tz = array_to_datetime( + values.astype(object, copy=False), + errors=errors, + ) + return result, tz + + m, p = precision_from_unit(unit) + + if is_raise: + # try a quick conversion to i8/f8 + # if we have nulls that are not type-compat + # then need to iterate + + if values.dtype.kind in ["i", "f", "u"]: + iresult = values.astype("i8", copy=False) + # fill missing values by comparing to NPY_NAT + mask = iresult == NPY_NAT + iresult[mask] = 0 + fvalues = iresult.astype("f8") * m + need_to_iterate = False + + if not need_to_iterate: + # check the bounds + if (fvalues < Timestamp.min.value).any() or ( + (fvalues > Timestamp.max.value).any() + ): + raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") + + if values.dtype.kind in ["i", "u"]: + result = (iresult * m).astype("M8[ns]") + + elif values.dtype.kind == "f": + fresult = (values * m).astype("f8") + fresult[mask] = 0 + if prec: + fresult = round(fresult, prec) + result = fresult.astype("M8[ns]", copy=False) + + iresult = result.view("i8") + iresult[mask] = NPY_NAT + + return result, tz + + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') + + try: + for i in range(n): + val = values[i] + + if checknull_with_nat_and_na(val): + iresult[i] = NPY_NAT + + elif is_integer_object(val) or is_float_object(val): + + if val != val or val == NPY_NAT: + iresult[i] = NPY_NAT + else: + try: + iresult[i] = cast_from_unit(val, unit) + except OverflowError: + if is_raise: + raise OutOfBoundsDatetime( + f"cannot convert input {val} with the unit '{unit}'" + ) + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + + elif isinstance(val, str): + if len(val) == 0 or val in nat_strings: + iresult[i] = NPY_NAT + + else: + try: + iresult[i] = cast_from_unit(float(val), unit) + except ValueError: + if is_raise: + raise ValueError( + f"non convertible value {val} with the unit '{unit}'" + ) + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + except OverflowError: + if is_raise: + raise OutOfBoundsDatetime( + f"cannot convert input {val} with the unit '{unit}'" + ) + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + + else: + + if is_raise: + raise ValueError( + f"unit='{unit}' not valid with non-numerical val='{val}'" + ) + if is_ignore: + raise AssertionError + + iresult[i] = NPY_NAT + + return result, tz + + except AssertionError: + pass + + # we have hit an exception + # and are in ignore mode + # redo as object + + oresult = np.empty(n, dtype=object) + for i in range(n): + val = values[i] + + if checknull_with_nat_and_na(val): + oresult[i] = NaT + elif is_integer_object(val) or is_float_object(val): + + if val != val or val == NPY_NAT: + oresult[i] = NaT + else: + try: + oresult[i] = Timestamp(cast_from_unit(val, unit)) + except OverflowError: + oresult[i] = val + + elif isinstance(val, str): + if len(val) == 0 or val in nat_strings: + oresult[i] = NaT + + else: + oresult[i] = val + + return oresult, tz + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef array_to_datetime( + ndarray[object] values, + str errors='raise', + bint dayfirst=False, + bint yearfirst=False, + bint utc=False, + bint require_iso8601=False, + bint allow_mixed=False, +): + """ + Converts a 1D array of date-like values to a numpy array of either: + 1) datetime64[ns] data + 2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError + is encountered + + Also returns a pytz.FixedOffset if an array of strings with the same + timezone offset is passed and utc=True is not passed. Otherwise, None + is returned + + Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric, + strings + + Parameters + ---------- + values : ndarray of object + date-like objects to convert + errors : str, default 'raise' + error behavior when parsing + dayfirst : bool, default False + dayfirst parsing behavior when encountering datetime strings + yearfirst : bool, default False + yearfirst parsing behavior when encountering datetime strings + utc : bool, default False + indicator whether the dates should be UTC + require_iso8601 : bool, default False + indicator whether the datetime string should be iso8601 + allow_mixed : bool, default False + Whether to allow mixed datetimes and integers. + + Returns + ------- + np.ndarray + May be datetime64[ns] or object dtype + tzinfo or None + """ + cdef: + Py_ssize_t i, n = len(values) + object val, py_dt, tz, tz_out = None + ndarray[int64_t] iresult + ndarray[object] oresult + npy_datetimestruct dts + bint utc_convert = bool(utc) + bint seen_integer = False + bint seen_string = False + bint seen_datetime = False + bint seen_datetime_offset = False + bint is_raise = errors=='raise' + bint is_ignore = errors=='ignore' + bint is_coerce = errors=='coerce' + bint is_same_offsets + _TSObject _ts + int64_t value + int out_local = 0, out_tzoffset = 0 + float offset_seconds, tz_offset + set out_tzoffset_vals = set() + bint string_to_dts_failed + + # specify error conditions + assert is_raise or is_ignore or is_coerce + + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') + + try: + for i in range(n): + val = values[i] + + try: + if checknull_with_nat_and_na(val): + iresult[i] = NPY_NAT + + elif PyDateTime_Check(val): + seen_datetime = True + if val.tzinfo is not None: + if utc_convert: + _ts = convert_datetime_to_tsobject(val, None) + iresult[i] = _ts.value + else: + raise ValueError('Tz-aware datetime.datetime ' + 'cannot be converted to ' + 'datetime64 unless utc=True') + elif isinstance(val, _Timestamp): + iresult[i] = val.value + else: + iresult[i] = pydatetime_to_dt64(val, &dts) + check_dts_bounds(&dts) + + elif PyDate_Check(val): + seen_datetime = True + iresult[i] = pydate_to_dt64(val, &dts) + check_dts_bounds(&dts) + + elif is_datetime64_object(val): + seen_datetime = True + iresult[i] = get_datetime64_nanos(val) + + elif is_integer_object(val) or is_float_object(val): + # these must be ns unit by-definition + seen_integer = True + + if val != val or val == NPY_NAT: + iresult[i] = NPY_NAT + elif is_raise or is_ignore: + iresult[i] = val + else: + # coerce + # we now need to parse this as if unit='ns' + # we can ONLY accept integers at this point + # if we have previously (or in future accept + # datetimes/strings, then we must coerce) + try: + iresult[i] = cast_from_unit(val, 'ns') + except OverflowError: + iresult[i] = NPY_NAT + + elif isinstance(val, str): + # string + seen_string = True + + if len(val) == 0 or val in nat_strings: + iresult[i] = NPY_NAT + continue + + string_to_dts_failed = _string_to_dts( + val, &dts, &out_local, + &out_tzoffset, False + ) + if string_to_dts_failed: + # An error at this point is a _parsing_ error + # specifically _not_ OutOfBoundsDatetime + if _parse_today_now(val, &iresult[i], utc): + continue + elif require_iso8601: + # if requiring iso8601 strings, skip trying + # other formats + if is_coerce: + iresult[i] = NPY_NAT + continue + elif is_raise: + raise ValueError( + f"time data {val} doesn't match format specified" + ) + return values, tz_out + + try: + py_dt = parse_datetime_string(val, + dayfirst=dayfirst, + yearfirst=yearfirst) + # If the dateutil parser returned tzinfo, capture it + # to check if all arguments have the same tzinfo + tz = py_dt.utcoffset() + + except (ValueError, OverflowError): + if is_coerce: + iresult[i] = NPY_NAT + continue + raise TypeError("invalid string coercion to datetime") + + if tz is not None: + seen_datetime_offset = True + # dateutil timezone objects cannot be hashed, so + # store the UTC offsets in seconds instead + out_tzoffset_vals.add(tz.total_seconds()) + else: + # Add a marker for naive string, to track if we are + # parsing mixed naive and aware strings + out_tzoffset_vals.add('naive') + + _ts = convert_datetime_to_tsobject(py_dt, None) + iresult[i] = _ts.value + if not string_to_dts_failed: + # No error reported by string_to_dts, pick back up + # where we left off + value = dtstruct_to_dt64(&dts) + if out_local == 1: + seen_datetime_offset = True + # Store the out_tzoffset in seconds + # since we store the total_seconds of + # dateutil.tz.tzoffset objects + out_tzoffset_vals.add(out_tzoffset * 60.) + tz = pytz.FixedOffset(out_tzoffset) + value = tz_localize_to_utc_single(value, tz) + out_local = 0 + out_tzoffset = 0 + else: + # Add a marker for naive string, to track if we are + # parsing mixed naive and aware strings + out_tzoffset_vals.add('naive') + iresult[i] = value + check_dts_bounds(&dts) + + else: + if is_coerce: + iresult[i] = NPY_NAT + else: + raise TypeError(f"{type(val)} is not convertible to datetime") + + except OutOfBoundsDatetime: + if is_coerce: + iresult[i] = NPY_NAT + continue + elif require_iso8601 and isinstance(val, str): + # GH#19382 for just-barely-OutOfBounds falling back to + # dateutil parser will return incorrect result because + # it will ignore nanoseconds + if is_raise: + + # Still raise OutOfBoundsDatetime, + # as error message is informative. + raise + + assert is_ignore + return values, tz_out + raise + + except OutOfBoundsDatetime: + if is_raise: + raise + + return ignore_errors_out_of_bounds_fallback(values), tz_out + + except TypeError: + return _array_to_datetime_object(values, errors, dayfirst, yearfirst) + + if seen_datetime and seen_integer: + # we have mixed datetimes & integers + + if is_coerce: + # coerce all of the integers/floats to NaT, preserve + # the datetimes and other convertibles + for i in range(n): + val = values[i] + if is_integer_object(val) or is_float_object(val): + result[i] = NPY_NAT + elif allow_mixed: + pass + elif is_raise: + raise ValueError("mixed datetimes and integers in passed array") + else: + return _array_to_datetime_object(values, errors, dayfirst, yearfirst) + + if seen_datetime_offset and not utc_convert: + # GH#17697 + # 1) If all the offsets are equal, return one offset for + # the parsed dates to (maybe) pass to DatetimeIndex + # 2) If the offsets are different, then force the parsing down the + # object path where an array of datetimes + # (with individual dateutil.tzoffsets) are returned + is_same_offsets = len(out_tzoffset_vals) == 1 + if not is_same_offsets: + return _array_to_datetime_object(values, errors, dayfirst, yearfirst) + else: + tz_offset = out_tzoffset_vals.pop() + tz_out = pytz.FixedOffset(tz_offset / 60.) + return result, tz_out + + +cdef ndarray[object] ignore_errors_out_of_bounds_fallback(ndarray[object] values): + """ + Fallback for array_to_datetime if an OutOfBoundsDatetime is raised + and errors == "ignore" + + Parameters + ---------- + values : ndarray[object] + + Returns + ------- + ndarray[object] + """ + cdef: + Py_ssize_t i, n = len(values) + object val + + oresult = np.empty(n, dtype=object) + + for i in range(n): + val = values[i] + + # set as nan except if its a NaT + if checknull_with_nat_and_na(val): + if isinstance(val, float): + oresult[i] = np.nan + else: + oresult[i] = NaT + elif is_datetime64_object(val): + if get_datetime64_value(val) == NPY_NAT: + oresult[i] = NaT + else: + oresult[i] = val.item() + else: + oresult[i] = val + return oresult + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef _array_to_datetime_object( + ndarray[object] values, + str errors, + bint dayfirst=False, + bint yearfirst=False, +): + """ + Fall back function for array_to_datetime + + Attempts to parse datetime strings with dateutil to return an array + of datetime objects + + Parameters + ---------- + values : ndarray[object] + date-like objects to convert + errors : str + error behavior when parsing + dayfirst : bool, default False + dayfirst parsing behavior when encountering datetime strings + yearfirst : bool, default False + yearfirst parsing behavior when encountering datetime strings + + Returns + ------- + np.ndarray[object] + Literal[None] + """ + cdef: + Py_ssize_t i, n = len(values) + object val + bint is_ignore = errors == 'ignore' + bint is_coerce = errors == 'coerce' + bint is_raise = errors == 'raise' + ndarray[object] oresult + npy_datetimestruct dts + + assert is_raise or is_ignore or is_coerce + + oresult = np.empty(n, dtype=object) + + # We return an object array and only attempt to parse: + # 1) NaT or NaT-like values + # 2) datetime strings, which we return as datetime.datetime + for i in range(n): + val = values[i] + if checknull_with_nat_and_na(val) or PyDateTime_Check(val): + # GH 25978. No need to parse NaT-like or datetime-like vals + oresult[i] = val + elif isinstance(val, str): + if len(val) == 0 or val in nat_strings: + oresult[i] = 'NaT' + continue + try: + oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, + yearfirst=yearfirst) + pydatetime_to_dt64(oresult[i], &dts) + check_dts_bounds(&dts) + except (ValueError, OverflowError): + if is_coerce: + oresult[i] = NaT + continue + if is_raise: + raise + return values, None + else: + if is_raise: + raise + return values, None + return oresult, None + + +cdef inline bint _parse_today_now(str val, int64_t* iresult, bint utc): + # We delay this check for as long as possible + # because it catches relatively rare cases + if val == "now": + iresult[0] = Timestamp.utcnow().value + if not utc: + # GH#18705 make sure to_datetime("now") matches Timestamp("now") + warnings.warn( + "The parsing of 'now' in pd.to_datetime without `utc=True` is " + "deprecated. In a future version, this will match Timestamp('now') " + "and Timestamp.now()", + FutureWarning, + stacklevel=1, + ) + + return True + elif val == "today": + iresult[0] = Timestamp.today().value + return True + return False diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/__init__.py b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/__init__.py new file mode 100644 index 0000000..11de4e6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/__init__.py @@ -0,0 +1,65 @@ +__all__ = [ + "dtypes", + "localize_pydatetime", + "NaT", + "NaTType", + "iNaT", + "nat_strings", + "OutOfBoundsDatetime", + "OutOfBoundsTimedelta", + "IncompatibleFrequency", + "Period", + "Resolution", + "Timedelta", + "normalize_i8_timestamps", + "is_date_array_normalized", + "dt64arr_to_periodarr", + "delta_to_nanoseconds", + "ints_to_pydatetime", + "ints_to_pytimedelta", + "get_resolution", + "Timestamp", + "tz_convert_from_utc_single", + "to_offset", + "Tick", + "BaseOffset", + "tz_compare", +] + +from pandas._libs.tslibs import dtypes +from pandas._libs.tslibs.conversion import ( + OutOfBoundsTimedelta, + localize_pydatetime, +) +from pandas._libs.tslibs.dtypes import Resolution +from pandas._libs.tslibs.nattype import ( + NaT, + NaTType, + iNaT, + nat_strings, +) +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime +from pandas._libs.tslibs.offsets import ( + BaseOffset, + Tick, + to_offset, +) +from pandas._libs.tslibs.period import ( + IncompatibleFrequency, + Period, +) +from pandas._libs.tslibs.timedeltas import ( + Timedelta, + delta_to_nanoseconds, + ints_to_pytimedelta, +) +from pandas._libs.tslibs.timestamps import Timestamp +from pandas._libs.tslibs.timezones import tz_compare +from pandas._libs.tslibs.tzconversion import tz_convert_from_utc_single +from pandas._libs.tslibs.vectorized import ( + dt64arr_to_periodarr, + get_resolution, + ints_to_pydatetime, + is_date_array_normalized, + normalize_i8_timestamps, +) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..068b9b7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/base.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/base.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..40268b8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/base.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/base.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/base.pxd new file mode 100644 index 0000000..3bffff7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/base.pxd @@ -0,0 +1,5 @@ +from cpython.datetime cimport datetime + + +cdef class ABCTimestamp(datetime): + pass diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/base.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/base.pyx new file mode 100644 index 0000000..1677a8b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/base.pyx @@ -0,0 +1,12 @@ +""" +We define base classes that will be inherited by Timestamp, Timedelta, etc +in order to allow for fast isinstance checks without circular dependency issues. + +This is analogous to core.dtypes.generic. +""" + +from cpython.datetime cimport datetime + + +cdef class ABCTimestamp(datetime): + pass diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/ccalendar.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/ccalendar.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..02a7a5c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/ccalendar.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/ccalendar.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/ccalendar.pxd new file mode 100644 index 0000000..511c9f9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/ccalendar.pxd @@ -0,0 +1,22 @@ +from cython cimport Py_ssize_t +from numpy cimport ( + int32_t, + int64_t, +) + +ctypedef (int32_t, int32_t, int32_t) iso_calendar_t + +cdef int dayofweek(int y, int m, int d) nogil +cdef bint is_leapyear(int64_t year) nogil +cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil +cpdef int32_t get_week_of_year(int year, int month, int day) nogil +cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil +cpdef int32_t get_day_of_year(int year, int month, int day) nogil +cpdef int get_lastbday(int year, int month) nogil +cpdef int get_firstbday(int year, int month) nogil + +cdef int64_t DAY_NANOS +cdef int64_t HOUR_NANOS +cdef dict c_MONTH_NUMBERS + +cdef int32_t* month_offset diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/ccalendar.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/ccalendar.pyi new file mode 100644 index 0000000..993f18a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/ccalendar.pyi @@ -0,0 +1,12 @@ +DAYS: list[str] +MONTH_ALIASES: dict[int, str] +MONTH_NUMBERS: dict[str, int] +MONTHS: list[str] +int_to_weekday: dict[int, str] + +def get_firstbday(year: int, month: int) -> int: ... +def get_lastbday(year: int, month: int) -> int: ... +def get_day_of_year(year: int, month: int, day: int) -> int: ... +def get_iso_calendar(year: int, month: int, day: int) -> tuple[int, int, int]: ... +def get_week_of_year(year: int, month: int, day: int) -> int: ... +def get_days_in_month(year: int, month: int) -> int: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/ccalendar.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/ccalendar.pyx new file mode 100644 index 0000000..2aa0495 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/ccalendar.pyx @@ -0,0 +1,295 @@ +# cython: boundscheck=False +""" +Cython implementations of functions resembling the stdlib calendar module +""" + +import cython + +from numpy cimport ( + int32_t, + int64_t, +) + +# ---------------------------------------------------------------------- +# Constants + +# Slightly more performant cython lookups than a 2D table +# The first 12 entries correspond to month lengths for non-leap years. +# The remaining 12 entries give month lengths for leap years +cdef int32_t* days_per_month_array = [ + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, + 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + +cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4] + +# The first 13 entries give the month days elapsed as of the first of month N +# (or the total number of days in the year for N=13) in non-leap years. +# The remaining 13 entries give the days elapsed in leap years. +cdef int32_t* month_offset = [ + 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, + 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366] + +# Canonical location for other modules to find name constants +MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', + 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] +# The first blank line is consistent with calendar.month_name in the calendar +# standard library +MONTHS_FULL = ['', 'January', 'February', 'March', 'April', 'May', 'June', + 'July', 'August', 'September', 'October', 'November', + 'December'] +MONTH_NUMBERS = {name: num for num, name in enumerate(MONTHS)} +cdef dict c_MONTH_NUMBERS = MONTH_NUMBERS +MONTH_ALIASES = {(num + 1): name for num, name in enumerate(MONTHS)} +MONTH_TO_CAL_NUM = {name: num + 1 for num, name in enumerate(MONTHS)} + +DAYS = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] +DAYS_FULL = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', + 'Saturday', 'Sunday'] +int_to_weekday = {num: name for num, name in enumerate(DAYS)} +weekday_to_int = {int_to_weekday[key]: key for key in int_to_weekday} + +DAY_SECONDS = 86400 +HOUR_SECONDS = 3600 + +cdef int64_t DAY_NANOS = DAY_SECONDS * 1_000_000_000 +cdef int64_t HOUR_NANOS = HOUR_SECONDS * 1_000_000_000 + +# ---------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil: + """ + Return the number of days in the given month of the given year. + + Parameters + ---------- + year : int + month : int + + Returns + ------- + days_in_month : int + + Notes + ----- + Assumes that the arguments are valid. Passing a month not between 1 and 12 + risks a segfault. + """ + return days_per_month_array[12 * is_leapyear(year) + month - 1] + + +@cython.wraparound(False) +@cython.boundscheck(False) +@cython.cdivision +cdef int dayofweek(int y, int m, int d) nogil: + """ + Find the day of week for the date described by the Y/M/D triple y, m, d + using Sakamoto's method, from wikipedia. + + 0 represents Monday. See [1]_. + + Parameters + ---------- + y : int + m : int + d : int + + Returns + ------- + weekday : int + + Notes + ----- + Assumes that y, m, d, represents a valid date. + + See Also + -------- + [1] https://docs.python.org/3/library/calendar.html#calendar.weekday + + [2] https://en.wikipedia.org/wiki/\ + Determination_of_the_day_of_the_week#Sakamoto.27s_methods + """ + cdef: + int day + + y -= m < 3 + day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7 + # convert to python day + return (day + 6) % 7 + + +cdef bint is_leapyear(int64_t year) nogil: + """ + Returns 1 if the given year is a leap year, 0 otherwise. + + Parameters + ---------- + year : int + + Returns + ------- + is_leap : bool + """ + return ((year & 0x3) == 0 and # year % 4 == 0 + ((year % 100) != 0 or (year % 400) == 0)) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef int32_t get_week_of_year(int year, int month, int day) nogil: + """ + Return the ordinal week-of-year for the given day. + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + week_of_year : int32_t + + Notes + ----- + Assumes the inputs describe a valid date. + """ + return get_iso_calendar(year, month, day)[1] + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil: + """ + Return the year, week, and day of year corresponding to ISO 8601 + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + year : int32_t + week : int32_t + day : int32_t + + Notes + ----- + Assumes the inputs describe a valid date. + """ + cdef: + int32_t doy, dow + int32_t iso_year, iso_week + + doy = get_day_of_year(year, month, day) + dow = dayofweek(year, month, day) + + # estimate + iso_week = (doy - 1) - dow + 3 + if iso_week >= 0: + iso_week = iso_week // 7 + 1 + + # verify + if iso_week < 0: + if (iso_week > -2) or (iso_week == -2 and is_leapyear(year - 1)): + iso_week = 53 + else: + iso_week = 52 + elif iso_week == 53: + if 31 - day + dow < 3: + iso_week = 1 + + iso_year = year + if iso_week == 1 and month == 12: + iso_year += 1 + + elif iso_week >= 52 and month == 1: + iso_year -= 1 + + return iso_year, iso_week, dow + 1 + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef int32_t get_day_of_year(int year, int month, int day) nogil: + """ + Return the ordinal day-of-year for the given day. + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + day_of_year : int32_t + + Notes + ----- + Assumes the inputs describe a valid date. + """ + cdef: + bint isleap + int32_t mo_off + int day_of_year + + isleap = is_leapyear(year) + + mo_off = month_offset[isleap * 13 + month - 1] + + day_of_year = mo_off + day + return day_of_year + + +# --------------------------------------------------------------------- +# Business Helpers + +cpdef int get_lastbday(int year, int month) nogil: + """ + Find the last day of the month that is a business day. + + Parameters + ---------- + year : int + month : int + + Returns + ------- + last_bday : int + """ + cdef: + int wkday, days_in_month + + wkday = dayofweek(year, month, 1) + days_in_month = get_days_in_month(year, month) + return days_in_month - max(((wkday + days_in_month - 1) % 7) - 4, 0) + + +cpdef int get_firstbday(int year, int month) nogil: + """ + Find the first day of the month that is a business day. + + Parameters + ---------- + year : int + month : int + + Returns + ------- + first_bday : int + """ + cdef: + int first, wkday + + wkday = dayofweek(year, month, 1) + first = 1 + if wkday == 5: # on Saturday + first = 3 + elif wkday == 6: # on Sunday + first = 2 + return first diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/conversion.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/conversion.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..dad18c5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/conversion.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/conversion.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/conversion.pxd new file mode 100644 index 0000000..5b80193 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/conversion.pxd @@ -0,0 +1,35 @@ +from cpython.datetime cimport ( + datetime, + tzinfo, +) +from numpy cimport ( + int32_t, + int64_t, + ndarray, +) + +from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct + + +cdef class _TSObject: + cdef: + npy_datetimestruct dts # npy_datetimestruct + int64_t value # numpy dt64 + object tzinfo + bint fold + + +cdef convert_to_tsobject(object ts, tzinfo tz, str unit, + bint dayfirst, bint yearfirst, + int32_t nanos=*) + +cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz, + int32_t nanos=*) + +cdef int64_t get_datetime64_nanos(object val) except? -1 + +cpdef datetime localize_pydatetime(datetime dt, tzinfo tz) +cdef int64_t cast_from_unit(object ts, str unit) except? -1 +cpdef (int64_t, int) precision_from_unit(str unit) + +cdef int64_t normalize_i8_stamp(int64_t local_val) nogil diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/conversion.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/conversion.pyi new file mode 100644 index 0000000..3d02881 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/conversion.pyi @@ -0,0 +1,29 @@ +from datetime import ( + datetime, + tzinfo, +) + +import numpy as np + +from pandas._typing import npt + +DT64NS_DTYPE: np.dtype +TD64NS_DTYPE: np.dtype + +class OutOfBoundsTimedelta(ValueError): ... + +def precision_from_unit( + unit: str, +) -> tuple[int, int,]: ... # (int64_t, _) +def ensure_datetime64ns( + arr: np.ndarray, # np.ndarray[datetime64[ANY]] + copy: bool = ..., +) -> np.ndarray: ... # np.ndarray[datetime64ns] +def ensure_timedelta64ns( + arr: np.ndarray, # np.ndarray[timedelta64[ANY]] + copy: bool = ..., +) -> np.ndarray: ... # np.ndarray[timedelta64ns] +def datetime_to_datetime64( + values: npt.NDArray[np.object_], +) -> tuple[np.ndarray, tzinfo | None,]: ... # (np.ndarray[dt64ns], _) +def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/conversion.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/conversion.pyx new file mode 100644 index 0000000..a6dc8cc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/conversion.pyx @@ -0,0 +1,858 @@ +import cython +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + int32_t, + int64_t, + intp_t, + ndarray, +) + +cnp.import_array() + +import pytz + +# stdlib datetime imports + +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDateTime_IMPORT, + datetime, + time, + tzinfo, +) + +PyDateTime_IMPORT + +from pandas._libs.tslibs.base cimport ABCTimestamp +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + NPY_FR_ns, + _string_to_dts, + check_dts_bounds, + dt64_to_dtstruct, + dtstruct_to_dt64, + get_datetime64_unit, + get_datetime64_value, + npy_datetime, + npy_datetimestruct, + pandas_datetime_to_datetimestruct, + pydatetime_to_dt64, +) + +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime + +from pandas._libs.tslibs.timezones cimport ( + get_dst_info, + get_utcoffset, + is_fixed_offset, + is_tzlocal, + is_utc, + maybe_get_tz, + tz_compare, + utc_pytz as UTC, +) +from pandas._libs.tslibs.util cimport ( + is_datetime64_object, + is_float_object, + is_integer_object, +) + +from pandas._libs.tslibs.parsing import parse_datetime_string + +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, + c_nat_strings as nat_strings, + checknull_with_nat, +) +from pandas._libs.tslibs.tzconversion cimport ( + tz_convert_utc_to_tzlocal, + tz_localize_to_utc_single, +) + +# ---------------------------------------------------------------------- +# Constants + +DT64NS_DTYPE = np.dtype('M8[ns]') +TD64NS_DTYPE = np.dtype('m8[ns]') + + +class OutOfBoundsTimedelta(ValueError): + """ + Raised when encountering a timedelta value that cannot be represented + as a timedelta64[ns]. + """ + # Timedelta analogue to OutOfBoundsDatetime + pass + + +# ---------------------------------------------------------------------- +# Unit Conversion Helpers + +cdef inline int64_t cast_from_unit(object ts, str unit) except? -1: + """ + Return a casting of the unit represented to nanoseconds + round the fractional part of a float to our precision, p. + + Parameters + ---------- + ts : int, float, or None + unit : str + + Returns + ------- + int64_t + """ + cdef: + int64_t m + int p + + m, p = precision_from_unit(unit) + + # just give me the unit back + if ts is None: + return m + + # cast the unit, multiply base/frace separately + # to avoid precision issues from float -> int + base = ts + frac = ts - base + if p: + frac = round(frac, p) + return (base * m) + (frac * m) + + +cpdef inline (int64_t, int) precision_from_unit(str unit): + """ + Return a casting of the unit represented to nanoseconds + the precision + to round the fractional part. + + Notes + ----- + The caller is responsible for ensuring that the default value of "ns" + takes the place of None. + """ + cdef: + int64_t m + int p + + if unit == "Y": + m = 1_000_000_000 * 31556952 + p = 9 + elif unit == "M": + m = 1_000_000_000 * 2629746 + p = 9 + elif unit == "W": + m = 1_000_000_000 * 3600 * 24 * 7 + p = 9 + elif unit == "D" or unit == "d": + m = 1_000_000_000 * 3600 * 24 + p = 9 + elif unit == "h": + m = 1_000_000_000 * 3600 + p = 9 + elif unit == "m": + m = 1_000_000_000 * 60 + p = 9 + elif unit == "s": + m = 1_000_000_000 + p = 9 + elif unit == "ms": + m = 1_000_000 + p = 6 + elif unit == "us": + m = 1000 + p = 3 + elif unit == "ns" or unit is None: + m = 1 + p = 0 + else: + raise ValueError(f"cannot cast unit {unit}") + return m, p + + +cdef inline int64_t get_datetime64_nanos(object val) except? -1: + """ + Extract the value and unit from a np.datetime64 object, then convert the + value to nanoseconds if necessary. + """ + cdef: + npy_datetimestruct dts + NPY_DATETIMEUNIT unit + npy_datetime ival + + ival = get_datetime64_value(val) + if ival == NPY_NAT: + return NPY_NAT + + unit = get_datetime64_unit(val) + + if unit != NPY_FR_ns: + pandas_datetime_to_datetimestruct(ival, unit, &dts) + check_dts_bounds(&dts) + ival = dtstruct_to_dt64(&dts) + + return ival + + +@cython.boundscheck(False) +@cython.wraparound(False) +def ensure_datetime64ns(arr: ndarray, copy: bool = True): + """ + Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]' + + Parameters + ---------- + arr : ndarray + copy : bool, default True + + Returns + ------- + ndarray with dtype datetime64[ns] + """ + cdef: + Py_ssize_t i, n = arr.size + const int64_t[:] ivalues + int64_t[:] iresult + NPY_DATETIMEUNIT unit + npy_datetimestruct dts + + shape = (arr).shape + + if (arr).dtype.byteorder == ">": + # GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap + dtype = arr.dtype + arr = arr.astype(dtype.newbyteorder("<")) + + if arr.size == 0: + result = arr.view(DT64NS_DTYPE) + if copy: + result = result.copy() + return result + + unit = get_datetime64_unit(arr.flat[0]) + if unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # without raising explicitly here, we end up with a SystemError + # built-in function ensure_datetime64ns returned a result with an error + raise ValueError("datetime64/timedelta64 must have a unit specified") + + if unit == NPY_FR_ns: + # Check this before allocating result for perf, might save some memory + if copy: + return arr.copy() + return arr + + ivalues = arr.view(np.int64).ravel("K") + + result = np.empty_like(arr, dtype=DT64NS_DTYPE) + iresult = result.ravel("K").view(np.int64) + + for i in range(n): + if ivalues[i] != NPY_NAT: + pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts) + iresult[i] = dtstruct_to_dt64(&dts) + check_dts_bounds(&dts) + else: + iresult[i] = NPY_NAT + + return result + + +def ensure_timedelta64ns(arr: ndarray, copy: bool = True): + """ + Ensure a np.timedelta64 array has dtype specifically 'timedelta64[ns]' + + Parameters + ---------- + arr : ndarray + copy : bool, default True + + Returns + ------- + ndarray[timedelta64[ns]] + """ + assert arr.dtype.kind == "m", arr.dtype + + if arr.dtype == TD64NS_DTYPE: + return arr.copy() if copy else arr + + # Re-use the datetime64 machinery to do an overflow-safe `astype` + dtype = arr.dtype.str.replace("m8", "M8") + dummy = arr.view(dtype) + try: + dt64_result = ensure_datetime64ns(dummy, copy) + except OutOfBoundsDatetime as err: + # Re-write the exception in terms of timedelta64 instead of dt64 + + # Find the value that we are going to report as causing an overflow + tdmin = arr.min() + tdmax = arr.max() + if np.abs(tdmin) >= np.abs(tdmax): + bad_val = tdmin + else: + bad_val = tdmax + + msg = f"Out of bounds for nanosecond {arr.dtype.name} {str(bad_val)}" + raise OutOfBoundsTimedelta(msg) + + return dt64_result.view(TD64NS_DTYPE) + + +# ---------------------------------------------------------------------- + + +@cython.boundscheck(False) +@cython.wraparound(False) +def datetime_to_datetime64(ndarray[object] values): + """ + Convert ndarray of datetime-like objects to int64 array representing + nanosecond timestamps. + + Parameters + ---------- + values : ndarray[object] + + Returns + ------- + result : ndarray[datetime64ns] + inferred_tz : tzinfo or None + """ + cdef: + Py_ssize_t i, n = len(values) + object val + int64_t[:] iresult + npy_datetimestruct dts + _TSObject _ts + bint found_naive = False + tzinfo inferred_tz = None + + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') + for i in range(n): + val = values[i] + if checknull_with_nat(val): + iresult[i] = NPY_NAT + elif PyDateTime_Check(val): + if val.tzinfo is not None: + if found_naive: + raise ValueError('Cannot mix tz-aware with ' + 'tz-naive values') + if inferred_tz is not None: + if not tz_compare(val.tzinfo, inferred_tz): + raise ValueError('Array must be all same time zone') + else: + inferred_tz = val.tzinfo + + _ts = convert_datetime_to_tsobject(val, None) + iresult[i] = _ts.value + check_dts_bounds(&_ts.dts) + else: + found_naive = True + if inferred_tz is not None: + raise ValueError('Cannot mix tz-aware with ' + 'tz-naive values') + iresult[i] = pydatetime_to_dt64(val, &dts) + check_dts_bounds(&dts) + else: + raise TypeError(f'Unrecognized value type: {type(val)}') + + return result, inferred_tz + + +# ---------------------------------------------------------------------- +# _TSObject Conversion + +# lightweight C object to hold datetime & int64 pair +cdef class _TSObject: + # cdef: + # npy_datetimestruct dts # npy_datetimestruct + # int64_t value # numpy dt64 + # object tzinfo + # bint fold + + def __cinit__(self): + # GH 25057. As per PEP 495, set fold to 0 by default + self.fold = 0 + + @property + def value(self): + # This is needed in order for `value` to be accessible in lib.pyx + return self.value + + +cdef convert_to_tsobject(object ts, tzinfo tz, str unit, + bint dayfirst, bint yearfirst, int32_t nanos=0): + """ + Extract datetime and int64 from any of: + - np.int64 (with unit providing a possible modifier) + - np.datetime64 + - a float (with unit providing a possible modifier) + - python int or long object (with unit providing a possible modifier) + - iso8601 string object + - python datetime object + - another timestamp object + + Raises + ------ + OutOfBoundsDatetime : ts cannot be converted within implementation bounds + """ + cdef: + _TSObject obj + + obj = _TSObject() + + if isinstance(ts, str): + return _convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) + + if ts is None or ts is NaT: + obj.value = NPY_NAT + elif is_datetime64_object(ts): + obj.value = get_datetime64_nanos(ts) + if obj.value != NPY_NAT: + dt64_to_dtstruct(obj.value, &obj.dts) + elif is_integer_object(ts): + try: + ts = ts + except OverflowError: + # GH#26651 re-raise as OutOfBoundsDatetime + raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp {ts}") + if ts == NPY_NAT: + obj.value = NPY_NAT + else: + ts = ts * cast_from_unit(None, unit) + obj.value = ts + dt64_to_dtstruct(ts, &obj.dts) + elif is_float_object(ts): + if ts != ts or ts == NPY_NAT: + obj.value = NPY_NAT + else: + ts = cast_from_unit(ts, unit) + obj.value = ts + dt64_to_dtstruct(ts, &obj.dts) + elif PyDateTime_Check(ts): + return convert_datetime_to_tsobject(ts, tz, nanos) + elif PyDate_Check(ts): + # Keep the converter same as PyDateTime's + ts = datetime.combine(ts, time()) + return convert_datetime_to_tsobject(ts, tz) + else: + from .period import Period + if isinstance(ts, Period): + raise ValueError("Cannot convert Period to Timestamp " + "unambiguously. Use to_timestamp") + raise TypeError(f'Cannot convert input [{ts}] of type {type(ts)} to ' + f'Timestamp') + + if tz is not None: + _localize_tso(obj, tz) + + if obj.value != NPY_NAT: + # check_overflows needs to run after _localize_tso + check_dts_bounds(&obj.dts) + check_overflows(obj) + return obj + + +cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz, + int32_t nanos=0): + """ + Convert a datetime (or Timestamp) input `ts`, along with optional timezone + object `tz` to a _TSObject. + + The optional argument `nanos` allows for cases where datetime input + needs to be supplemented with higher-precision information. + + Parameters + ---------- + ts : datetime or Timestamp + Value to be converted to _TSObject + tz : tzinfo or None + timezone for the timezone-aware output + nanos : int32_t, default is 0 + nanoseconds supplement the precision of the datetime input ts + + Returns + ------- + obj : _TSObject + """ + cdef: + _TSObject obj = _TSObject() + + obj.fold = ts.fold + if tz is not None: + tz = maybe_get_tz(tz) + + if ts.tzinfo is not None: + # Convert the current timezone to the passed timezone + ts = ts.astimezone(tz) + obj.value = pydatetime_to_dt64(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + elif not is_utc(tz): + ts = _localize_pydatetime(ts, tz) + obj.value = pydatetime_to_dt64(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + else: + # UTC + obj.value = pydatetime_to_dt64(ts, &obj.dts) + obj.tzinfo = tz + else: + obj.value = pydatetime_to_dt64(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + + if obj.tzinfo is not None and not is_utc(obj.tzinfo): + offset = get_utcoffset(obj.tzinfo, ts) + obj.value -= int(offset.total_seconds() * 1e9) + + if isinstance(ts, ABCTimestamp): + obj.value += ts.nanosecond + obj.dts.ps = ts.nanosecond * 1000 + + if nanos: + obj.value += nanos + obj.dts.ps = nanos * 1000 + + check_dts_bounds(&obj.dts) + check_overflows(obj) + return obj + + +cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, + int tzoffset, tzinfo tz=None): + """ + Convert a datetimestruct `dts`, along with initial timezone offset + `tzoffset` to a _TSObject (with timezone object `tz` - optional). + + Parameters + ---------- + dts: npy_datetimestruct + tzoffset: int + tz : tzinfo or None + timezone for the timezone-aware output. + + Returns + ------- + obj : _TSObject + """ + cdef: + _TSObject obj = _TSObject() + int64_t value # numpy dt64 + datetime dt + ndarray[int64_t] trans + int64_t[:] deltas + + value = dtstruct_to_dt64(&dts) + obj.dts = dts + obj.tzinfo = pytz.FixedOffset(tzoffset) + obj.value = tz_localize_to_utc_single(value, obj.tzinfo) + if tz is None: + check_overflows(obj) + return obj + + # Infer fold from offset-adjusted obj.value + # see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute + if is_utc(tz): + pass + elif is_tzlocal(tz): + tz_convert_utc_to_tzlocal(obj.value, tz, &obj.fold) + else: + trans, deltas, typ = get_dst_info(tz) + + if typ == 'dateutil': + pos = trans.searchsorted(obj.value, side='right') - 1 + obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) + + # Keep the converter same as PyDateTime's + dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, + obj.dts.hour, obj.dts.min, obj.dts.sec, + obj.dts.us, obj.tzinfo, fold=obj.fold) + obj = convert_datetime_to_tsobject( + dt, tz, nanos=obj.dts.ps // 1000) + return obj + + +cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, + bint dayfirst=False, + bint yearfirst=False): + """ + Convert a string input `ts`, along with optional timezone object`tz` + to a _TSObject. + + The optional arguments `dayfirst` and `yearfirst` are passed to the + dateutil parser. + + Parameters + ---------- + ts : str + Value to be converted to _TSObject + tz : tzinfo or None + timezone for the timezone-aware output + unit : str or None + dayfirst : bool, default False + When parsing an ambiguous date string, interpret e.g. "3/4/1975" as + April 3, as opposed to the standard US interpretation March 4. + yearfirst : bool, default False + When parsing an ambiguous date string, interpret e.g. "01/05/09" + as "May 9, 2001", as opposed to the default "Jan 5, 2009" + + Returns + ------- + obj : _TSObject + """ + cdef: + npy_datetimestruct dts + int out_local = 0, out_tzoffset = 0 + bint do_parse_datetime_string = False + + if len(ts) == 0 or ts in nat_strings: + ts = NaT + elif ts == 'now': + # Issue 9000, we short-circuit rather than going + # into np_datetime_strings which returns utc + ts = datetime.now(tz) + elif ts == 'today': + # Issue 9000, we short-circuit rather than going + # into np_datetime_strings which returns a normalized datetime + ts = datetime.now(tz) + # equiv: datetime.today().replace(tzinfo=tz) + else: + string_to_dts_failed = _string_to_dts( + ts, &dts, &out_local, + &out_tzoffset, False + ) + try: + if not string_to_dts_failed: + check_dts_bounds(&dts) + if out_local == 1: + return _create_tsobject_tz_using_offset(dts, + out_tzoffset, tz) + else: + ts = dtstruct_to_dt64(&dts) + if tz is not None: + # shift for _localize_tso + ts = tz_localize_to_utc_single(ts, tz, + ambiguous="raise") + + except OutOfBoundsDatetime: + # GH#19382 for just-barely-OutOfBounds falling back to dateutil + # parser will return incorrect result because it will ignore + # nanoseconds + raise + + except ValueError: + do_parse_datetime_string = True + + if string_to_dts_failed or do_parse_datetime_string: + try: + ts = parse_datetime_string(ts, dayfirst=dayfirst, + yearfirst=yearfirst) + except (ValueError, OverflowError): + raise ValueError("could not convert string to Timestamp") + + return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst) + + +cdef inline check_overflows(_TSObject obj): + """ + Check that we haven't silently overflowed in timezone conversion + + Parameters + ---------- + obj : _TSObject + + Returns + ------- + None + + Raises + ------ + OutOfBoundsDatetime + """ + # GH#12677 + if obj.dts.year == 1677: + if not (obj.value < 0): + from pandas._libs.tslibs.timestamps import Timestamp + fmt = (f"{obj.dts.year}-{obj.dts.month:02d}-{obj.dts.day:02d} " + f"{obj.dts.hour:02d}:{obj.dts.min:02d}:{obj.dts.sec:02d}") + raise OutOfBoundsDatetime( + f"Converting {fmt} underflows past {Timestamp.min}" + ) + elif obj.dts.year == 2262: + if not (obj.value > 0): + from pandas._libs.tslibs.timestamps import Timestamp + fmt = (f"{obj.dts.year}-{obj.dts.month:02d}-{obj.dts.day:02d} " + f"{obj.dts.hour:02d}:{obj.dts.min:02d}:{obj.dts.sec:02d}") + raise OutOfBoundsDatetime( + f"Converting {fmt} overflows past {Timestamp.max}" + ) + +# ---------------------------------------------------------------------- +# Localization + +cdef inline void _localize_tso(_TSObject obj, tzinfo tz): + """ + Given the UTC nanosecond timestamp in obj.value, find the wall-clock + representation of that timestamp in the given timezone. + + Parameters + ---------- + obj : _TSObject + tz : tzinfo + + Returns + ------- + None + + Notes + ----- + Sets obj.tzinfo inplace, alters obj.dts inplace. + """ + cdef: + ndarray[int64_t] trans + int64_t[:] deltas + int64_t local_val + Py_ssize_t pos + str typ + + assert obj.tzinfo is None + + if is_utc(tz): + pass + elif obj.value == NPY_NAT: + pass + elif is_tzlocal(tz): + local_val = tz_convert_utc_to_tzlocal(obj.value, tz, &obj.fold) + dt64_to_dtstruct(local_val, &obj.dts) + else: + # Adjust datetime64 timestamp, recompute datetimestruct + trans, deltas, typ = get_dst_info(tz) + + if is_fixed_offset(tz): + # static/fixed tzinfo; in this case we know len(deltas) == 1 + # This can come back with `typ` of either "fixed" or None + dt64_to_dtstruct(obj.value + deltas[0], &obj.dts) + elif typ == 'pytz': + # i.e. treat_tz_as_pytz(tz) + pos = trans.searchsorted(obj.value, side='right') - 1 + tz = tz._tzinfos[tz._transition_info[pos]] + dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) + elif typ == 'dateutil': + # i.e. treat_tz_as_dateutil(tz) + pos = trans.searchsorted(obj.value, side='right') - 1 + dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) + # dateutil supports fold, so we infer fold from value + obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) + else: + # Note: as of 2018-07-17 all tzinfo objects that are _not_ + # either pytz or dateutil have is_fixed_offset(tz) == True, + # so this branch will never be reached. + pass + + obj.tzinfo = tz + + +cdef inline bint _infer_tsobject_fold( + _TSObject obj, + const int64_t[:] trans, + const int64_t[:] deltas, + int32_t pos, +): + """ + Infer _TSObject fold property from value by assuming 0 and then setting + to 1 if necessary. + + Parameters + ---------- + obj : _TSObject + trans : ndarray[int64_t] + ndarray of offset transition points in nanoseconds since epoch. + deltas : int64_t[:] + array of offsets corresponding to transition points in trans. + pos : int32_t + Position of the last transition point before taking fold into account. + + Returns + ------- + bint + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time + + References + ---------- + .. [1] "PEP 495 - Local Time Disambiguation" + https://www.python.org/dev/peps/pep-0495/#the-fold-attribute + """ + cdef: + bint fold = 0 + + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + fold = 1 + + return fold + +cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): + """ + Take a datetime/Timestamp in UTC and localizes to timezone tz. + + NB: Unlike the public version, this treats datetime and Timestamp objects + identically, i.e. discards nanos from Timestamps. + It also assumes that the `tz` input is not None. + """ + try: + # datetime.replace with pytz may be incorrect result + return tz.localize(dt) + except AttributeError: + return dt.replace(tzinfo=tz) + + +cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz): + """ + Take a datetime/Timestamp in UTC and localizes to timezone tz. + + Parameters + ---------- + dt : datetime or Timestamp + tz : tzinfo or None + + Returns + ------- + localized : datetime or Timestamp + """ + if tz is None: + return dt + elif isinstance(dt, ABCTimestamp): + return dt.tz_localize(tz) + elif is_utc(tz): + return _localize_pydatetime(dt, tz) + try: + # datetime.replace with pytz may be incorrect result + return tz.localize(dt) + except AttributeError: + return dt.replace(tzinfo=tz) + + +# ---------------------------------------------------------------------- +# Normalization + +@cython.cdivision(False) +cdef inline int64_t normalize_i8_stamp(int64_t local_val) nogil: + """ + Round the localized nanosecond timestamp down to the previous midnight. + + Parameters + ---------- + local_val : int64_t + + Returns + ------- + int64_t + """ + cdef: + int64_t day_nanos = 24 * 3600 * 1_000_000_000 + return local_val - (local_val % day_nanos) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/dtypes.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/dtypes.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..3297117 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/dtypes.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/dtypes.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/dtypes.pxd new file mode 100644 index 0000000..71b4eea --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/dtypes.pxd @@ -0,0 +1,76 @@ +cdef dict attrname_to_abbrevs + +cdef enum c_FreqGroup: + # Mirrors FreqGroup in the .pxy file + FR_ANN = 1000 + FR_QTR = 2000 + FR_MTH = 3000 + FR_WK = 4000 + FR_BUS = 5000 + FR_DAY = 6000 + FR_HR = 7000 + FR_MIN = 8000 + FR_SEC = 9000 + FR_MS = 10000 + FR_US = 11000 + FR_NS = 12000 + FR_UND = -10000 # undefined + + +cdef enum PeriodDtypeCode: + # Annual freqs with various fiscal year ends. + # eg, 2005 for A_FEB runs Mar 1, 2004 to Feb 28, 2005 + A = 1000 # Default alias + A_DEC = 1000 # Annual - December year end + A_JAN = 1001 # Annual - January year end + A_FEB = 1002 # Annual - February year end + A_MAR = 1003 # Annual - March year end + A_APR = 1004 # Annual - April year end + A_MAY = 1005 # Annual - May year end + A_JUN = 1006 # Annual - June year end + A_JUL = 1007 # Annual - July year end + A_AUG = 1008 # Annual - August year end + A_SEP = 1009 # Annual - September year end + A_OCT = 1010 # Annual - October year end + A_NOV = 1011 # Annual - November year end + + # Quarterly frequencies with various fiscal year ends. + # eg, Q42005 for Q_OCT runs Aug 1, 2005 to Oct 31, 2005 + Q_DEC = 2000 # Quarterly - December year end + Q_JAN = 2001 # Quarterly - January year end + Q_FEB = 2002 # Quarterly - February year end + Q_MAR = 2003 # Quarterly - March year end + Q_APR = 2004 # Quarterly - April year end + Q_MAY = 2005 # Quarterly - May year end + Q_JUN = 2006 # Quarterly - June year end + Q_JUL = 2007 # Quarterly - July year end + Q_AUG = 2008 # Quarterly - August year end + Q_SEP = 2009 # Quarterly - September year end + Q_OCT = 2010 # Quarterly - October year end + Q_NOV = 2011 # Quarterly - November year end + + M = 3000 # Monthly + + W_SUN = 4000 # Weekly - Sunday end of week + W_MON = 4001 # Weekly - Monday end of week + W_TUE = 4002 # Weekly - Tuesday end of week + W_WED = 4003 # Weekly - Wednesday end of week + W_THU = 4004 # Weekly - Thursday end of week + W_FRI = 4005 # Weekly - Friday end of week + W_SAT = 4006 # Weekly - Saturday end of week + + B = 5000 # Business days + D = 6000 # Daily + H = 7000 # Hourly + T = 8000 # Minutely + S = 9000 # Secondly + L = 10000 # Millisecondly + U = 11000 # Microsecondly + N = 12000 # Nanosecondly + + UNDEFINED = -10_000 + + +cdef class PeriodDtypeBase: + cdef readonly: + PeriodDtypeCode _dtype_code diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/dtypes.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/dtypes.pyi new file mode 100644 index 0000000..8e47993 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/dtypes.pyi @@ -0,0 +1,57 @@ +from enum import Enum + +from pandas._libs.tslibs.offsets import BaseOffset + +_attrname_to_abbrevs: dict[str, str] +_period_code_map: dict[str, int] + +class PeriodDtypeBase: + _dtype_code: int # PeriodDtypeCode + + # actually __cinit__ + def __new__(cls, code: int): ... + def freq_group_code(self) -> int: ... + def date_offset(self) -> BaseOffset: ... + @classmethod + def from_date_offset(cls, offset: BaseOffset) -> PeriodDtypeBase: ... + @property + def resolution(self) -> Resolution: ... + +class FreqGroup(Enum): + FR_ANN: int + FR_QTR: int + FR_MTH: int + FR_WK: int + FR_BUS: int + FR_DAY: int + FR_HR: int + FR_MIN: int + FR_SEC: int + FR_MS: int + FR_US: int + FR_NS: int + FR_UND: int + @staticmethod + def get_freq_group(code: int) -> FreqGroup: ... + +class Resolution(Enum): + RESO_NS: int + RESO_US: int + RESO_MS: int + RESO_SEC: int + RESO_MIN: int + RESO_HR: int + RESO_DAY: int + RESO_MTH: int + RESO_QTR: int + RESO_YR: int + def __lt__(self, other: Resolution) -> bool: ... + def __ge__(self, other: Resolution) -> bool: ... + @property + def freq_group(self) -> FreqGroup: ... + @property + def attrname(self) -> str: ... + @classmethod + def from_attrname(cls, attrname: str) -> Resolution: ... + @classmethod + def get_reso_from_freq(cls, freq: str) -> Resolution: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/dtypes.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/dtypes.pyx new file mode 100644 index 0000000..ea54545 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/dtypes.pyx @@ -0,0 +1,289 @@ +# period frequency constants corresponding to scikits timeseries +# originals +from enum import Enum + + +cdef class PeriodDtypeBase: + """ + Similar to an actual dtype, this contains all of the information + describing a PeriodDtype in an integer code. + """ + # cdef readonly: + # PeriodDtypeCode _dtype_code + + def __cinit__(self, PeriodDtypeCode code): + self._dtype_code = code + + def __eq__(self, other): + if not isinstance(other, PeriodDtypeBase): + return False + if not isinstance(self, PeriodDtypeBase): + # cython semantics, this is a reversed op + return False + return self._dtype_code == other._dtype_code + + @property + def freq_group_code(self) -> int: + # See also: libperiod.get_freq_group + return (self._dtype_code // 1000) * 1000 + + @property + def resolution(self) -> "Resolution": + fgc = self.freq_group_code + return Resolution.from_freq_group(FreqGroup(fgc)) + + @property + def date_offset(self): + """ + Corresponding DateOffset object. + + This mapping is mainly for backward-compatibility. + """ + from .offsets import to_offset + + freqstr = _reverse_period_code_map.get(self._dtype_code) + + return to_offset(freqstr) + + @classmethod + def from_date_offset(cls, offset): + code = offset._period_dtype_code + return cls(code) + + +_period_code_map = { + # Annual freqs with various fiscal year ends. + # eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005 + "A-DEC": 1000, # Annual - December year end + "A-JAN": 1001, # Annual - January year end + "A-FEB": 1002, # Annual - February year end + "A-MAR": 1003, # Annual - March year end + "A-APR": 1004, # Annual - April year end + "A-MAY": 1005, # Annual - May year end + "A-JUN": 1006, # Annual - June year end + "A-JUL": 1007, # Annual - July year end + "A-AUG": 1008, # Annual - August year end + "A-SEP": 1009, # Annual - September year end + "A-OCT": 1010, # Annual - October year end + "A-NOV": 1011, # Annual - November year end + + # Quarterly frequencies with various fiscal year ends. + # eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005 + "Q-DEC": 2000, # Quarterly - December year end + "Q-JAN": 2001, # Quarterly - January year end + "Q-FEB": 2002, # Quarterly - February year end + "Q-MAR": 2003, # Quarterly - March year end + "Q-APR": 2004, # Quarterly - April year end + "Q-MAY": 2005, # Quarterly - May year end + "Q-JUN": 2006, # Quarterly - June year end + "Q-JUL": 2007, # Quarterly - July year end + "Q-AUG": 2008, # Quarterly - August year end + "Q-SEP": 2009, # Quarterly - September year end + "Q-OCT": 2010, # Quarterly - October year end + "Q-NOV": 2011, # Quarterly - November year end + + "M": 3000, # Monthly + + "W-SUN": 4000, # Weekly - Sunday end of week + "W-MON": 4001, # Weekly - Monday end of week + "W-TUE": 4002, # Weekly - Tuesday end of week + "W-WED": 4003, # Weekly - Wednesday end of week + "W-THU": 4004, # Weekly - Thursday end of week + "W-FRI": 4005, # Weekly - Friday end of week + "W-SAT": 4006, # Weekly - Saturday end of week + + "B": 5000, # Business days + "D": 6000, # Daily + "H": 7000, # Hourly + "T": 8000, # Minutely + "S": 9000, # Secondly + "L": 10000, # Millisecondly + "U": 11000, # Microsecondly + "N": 12000, # Nanosecondly +} + +_reverse_period_code_map = { + _period_code_map[key]: key for key in _period_code_map} + +# Yearly aliases; careful not to put these in _reverse_period_code_map +_period_code_map.update({"Y" + key[1:]: _period_code_map[key] + for key in _period_code_map + if key.startswith("A-")}) + +_period_code_map.update({ + "Q": 2000, # Quarterly - December year end (default quarterly) + "A": 1000, # Annual + "W": 4000, # Weekly + "C": 5000, # Custom Business Day +}) + +cdef set _month_names = { + x.split("-")[-1] for x in _period_code_map.keys() if x.startswith("A-") +} + +# Map attribute-name resolutions to resolution abbreviations +_attrname_to_abbrevs = { + "year": "A", + "quarter": "Q", + "month": "M", + "day": "D", + "hour": "H", + "minute": "T", + "second": "S", + "millisecond": "L", + "microsecond": "U", + "nanosecond": "N", +} +cdef dict attrname_to_abbrevs = _attrname_to_abbrevs +cdef dict _abbrev_to_attrnames = {v: k for k, v in attrname_to_abbrevs.items()} + + +class FreqGroup(Enum): + # Mirrors c_FreqGroup in the .pxd file + FR_ANN = 1000 + FR_QTR = 2000 + FR_MTH = 3000 + FR_WK = 4000 + FR_BUS = 5000 + FR_DAY = 6000 + FR_HR = 7000 + FR_MIN = 8000 + FR_SEC = 9000 + FR_MS = 10000 + FR_US = 11000 + FR_NS = 12000 + FR_UND = -10000 # undefined + + @staticmethod + def get_freq_group(code: int) -> "FreqGroup": + # See also: PeriodDtypeBase.freq_group_code + code = (code // 1000) * 1000 + return FreqGroup(code) + + +class Resolution(Enum): + + # Note: cython won't allow us to reference the cdef versions at the + # module level + RESO_NS = 0 + RESO_US = 1 + RESO_MS = 2 + RESO_SEC = 3 + RESO_MIN = 4 + RESO_HR = 5 + RESO_DAY = 6 + RESO_MTH = 7 + RESO_QTR = 8 + RESO_YR = 9 + + def __lt__(self, other): + return self.value < other.value + + def __ge__(self, other): + return self.value >= other.value + + @property + def freq_group(self) -> FreqGroup: + if self == Resolution.RESO_NS: + return FreqGroup.FR_NS + elif self == Resolution.RESO_US: + return FreqGroup.FR_US + elif self == Resolution.RESO_MS: + return FreqGroup.FR_MS + elif self == Resolution.RESO_SEC: + return FreqGroup.FR_SEC + elif self == Resolution.RESO_MIN: + return FreqGroup.FR_MIN + elif self == Resolution.RESO_HR: + return FreqGroup.FR_HR + elif self == Resolution.RESO_DAY: + return FreqGroup.FR_DAY + elif self == Resolution.RESO_MTH: + return FreqGroup.FR_MTH + elif self == Resolution.RESO_QTR: + return FreqGroup.FR_QTR + elif self == Resolution.RESO_YR: + return FreqGroup.FR_ANN + else: + raise ValueError(self) # pragma: no cover + + @property + def attrname(self) -> str: + """ + Return datetime attribute name corresponding to this Resolution. + + Examples + -------- + >>> Resolution.RESO_SEC.attrname + 'second' + """ + return _reso_str_map[self.value] + + @classmethod + def from_attrname(cls, attrname: str) -> "Resolution": + """ + Return resolution str against resolution code. + + Examples + -------- + >>> Resolution.from_attrname('second') + + + >>> Resolution.from_attrname('second') == Resolution.RESO_SEC + True + """ + return cls(_str_reso_map[attrname]) + + @classmethod + def get_reso_from_freq(cls, freq: str) -> "Resolution": + """ + Return resolution code against frequency str. + + `freq` is given by the `offset.freqstr` for some DateOffset object. + + Examples + -------- + >>> Resolution.get_reso_from_freq('H') + + + >>> Resolution.get_reso_from_freq('H') == Resolution.RESO_HR + True + """ + try: + attr_name = _abbrev_to_attrnames[freq] + except KeyError: + # For quarterly and yearly resolutions, we need to chop off + # a month string. + split_freq = freq.split("-") + if len(split_freq) != 2: + raise + if split_freq[1] not in _month_names: + # i.e. we want e.g. "Q-DEC", not "Q-INVALID" + raise + attr_name = _abbrev_to_attrnames[split_freq[0]] + + return cls.from_attrname(attr_name) + + @classmethod + def from_freq_group(cls, freq_group: FreqGroup) -> "Resolution": + abbrev = _reverse_period_code_map[freq_group.value].split("-")[0] + if abbrev == "B": + return cls.RESO_DAY + attrname = _abbrev_to_attrnames[abbrev] + return cls.from_attrname(attrname) + + +cdef dict _reso_str_map = { + Resolution.RESO_NS.value: "nanosecond", + Resolution.RESO_US.value: "microsecond", + Resolution.RESO_MS.value: "millisecond", + Resolution.RESO_SEC.value: "second", + Resolution.RESO_MIN.value: "minute", + Resolution.RESO_HR.value: "hour", + Resolution.RESO_DAY.value: "day", + Resolution.RESO_MTH.value: "month", + Resolution.RESO_QTR.value: "quarter", + Resolution.RESO_YR.value: "year", +} + +cdef dict _str_reso_map = {v: k for k, v in _reso_str_map.items()} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/fields.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/fields.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..e242b4c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/fields.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/fields.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/fields.pyi new file mode 100644 index 0000000..415b432 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/fields.pyi @@ -0,0 +1,52 @@ +import numpy as np + +from pandas._typing import npt + +def build_field_sarray( + dtindex: npt.NDArray[np.int64], # const int64_t[:] +) -> np.ndarray: ... +def month_position_check(fields, weekdays) -> str | None: ... +def get_date_name_field( + dtindex: npt.NDArray[np.int64], # const int64_t[:] + field: str, + locale: str | None = ..., +) -> npt.NDArray[np.object_]: ... +def get_start_end_field( + dtindex: npt.NDArray[np.int64], # const int64_t[:] + field: str, + freqstr: str | None = ..., + month_kw: int = ..., +) -> npt.NDArray[np.bool_]: ... +def get_date_field( + dtindex: npt.NDArray[np.int64], # const int64_t[:] + field: str, +) -> npt.NDArray[np.int32]: ... +def get_timedelta_field( + tdindex: np.ndarray, # const int64_t[:] + field: str, +) -> npt.NDArray[np.int32]: ... +def isleapyear_arr( + years: np.ndarray, +) -> npt.NDArray[np.bool_]: ... +def build_isocalendar_sarray( + dtindex: npt.NDArray[np.int64], # const int64_t[:] +) -> np.ndarray: ... +def get_locale_names(name_type: str, locale: str | None = ...): ... + +class RoundTo: + @property + def MINUS_INFTY(self) -> int: ... + @property + def PLUS_INFTY(self) -> int: ... + @property + def NEAREST_HALF_EVEN(self) -> int: ... + @property + def NEAREST_HALF_PLUS_INFTY(self) -> int: ... + @property + def NEAREST_HALF_MINUS_INFTY(self) -> int: ... + +def round_nsint64( + values: npt.NDArray[np.int64], + mode: RoundTo, + nanos: int, +) -> npt.NDArray[np.int64]: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/fields.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/fields.pyx new file mode 100644 index 0000000..c1915e7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/fields.pyx @@ -0,0 +1,743 @@ +""" +Functions for accessing attributes of Timestamp/datetime64/datetime-like +objects and arrays +""" +from locale import LC_TIME + +import cython +from cython import Py_ssize_t +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + int8_t, + int32_t, + int64_t, + ndarray, + uint32_t, +) + +cnp.import_array() + +from pandas._config.localization import set_locale + +from pandas._libs.tslibs.ccalendar import ( + DAYS_FULL, + MONTHS_FULL, +) + +from pandas._libs.tslibs.ccalendar cimport ( + dayofweek, + get_day_of_year, + get_days_in_month, + get_firstbday, + get_iso_calendar, + get_lastbday, + get_week_of_year, + is_leapyear, + iso_calendar_t, + month_offset, +) +from pandas._libs.tslibs.nattype cimport NPY_NAT +from pandas._libs.tslibs.np_datetime cimport ( + dt64_to_dtstruct, + npy_datetimestruct, + pandas_timedeltastruct, + td64_to_tdstruct, +) + +from pandas._libs.tslibs.strptime import LocaleTime + + +@cython.wraparound(False) +@cython.boundscheck(False) +def build_field_sarray(const int64_t[:] dtindex): + """ + Datetime as int64 representation to a structured array of fields + """ + cdef: + Py_ssize_t i, count = len(dtindex) + npy_datetimestruct dts + ndarray[int32_t] years, months, days, hours, minutes, seconds, mus + + sa_dtype = [ + ("Y", "i4"), # year + ("M", "i4"), # month + ("D", "i4"), # day + ("h", "i4"), # hour + ("m", "i4"), # min + ("s", "i4"), # second + ("u", "i4"), # microsecond + ] + + out = np.empty(count, dtype=sa_dtype) + + years = out['Y'] + months = out['M'] + days = out['D'] + hours = out['h'] + minutes = out['m'] + seconds = out['s'] + mus = out['u'] + + for i in range(count): + dt64_to_dtstruct(dtindex[i], &dts) + years[i] = dts.year + months[i] = dts.month + days[i] = dts.day + hours[i] = dts.hour + minutes[i] = dts.min + seconds[i] = dts.sec + mus[i] = dts.us + + return out + + +def month_position_check(fields, weekdays) -> str | None: + cdef: + int32_t daysinmonth, y, m, d + bint calendar_end = True + bint business_end = True + bint calendar_start = True + bint business_start = True + bint cal + int32_t[:] years = fields["Y"] + int32_t[:] months = fields["M"] + int32_t[:] days = fields["D"] + + for y, m, d, wd in zip(years, months, days, weekdays): + if calendar_start: + calendar_start &= d == 1 + if business_start: + business_start &= d == 1 or (d <= 3 and wd == 0) + + if calendar_end or business_end: + daysinmonth = get_days_in_month(y, m) + cal = d == daysinmonth + if calendar_end: + calendar_end &= cal + if business_end: + business_end &= cal or (daysinmonth - d < 3 and wd == 4) + elif not calendar_start and not business_start: + break + + if calendar_end: + return "ce" + elif business_end: + return "be" + elif calendar_start: + return "cs" + elif business_start: + return "bs" + else: + return None + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_date_name_field(const int64_t[:] dtindex, str field, object locale=None): + """ + Given a int64-based datetime index, return array of strings of date + name based on requested field (e.g. day_name) + """ + cdef: + Py_ssize_t i, count = len(dtindex) + ndarray[object] out, names + npy_datetimestruct dts + int dow + + out = np.empty(count, dtype=object) + + if field == 'day_name': + if locale is None: + names = np.array(DAYS_FULL, dtype=np.object_) + else: + names = np.array(get_locale_names('f_weekday', locale), + dtype=np.object_) + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = np.nan + continue + + dt64_to_dtstruct(dtindex[i], &dts) + dow = dayofweek(dts.year, dts.month, dts.day) + out[i] = names[dow].capitalize() + + elif field == 'month_name': + if locale is None: + names = np.array(MONTHS_FULL, dtype=np.object_) + else: + names = np.array(get_locale_names('f_month', locale), + dtype=np.object_) + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = np.nan + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = names[dts.month].capitalize() + + else: + raise ValueError(f"Field {field} not supported") + + return out + + +cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil: + """ + Analogous to DateOffset.is_on_offset checking for the month part of a date. + """ + if modby == 1: + return True + elif modby == 3: + return (month - compare_month) % 3 == 0 + else: + return month == compare_month + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_start_end_field(const int64_t[:] dtindex, str field, + str freqstr=None, int month_kw=12): + """ + Given an int64-based datetime index return array of indicators + of whether timestamps are at the start/end of the month/quarter/year + (defined by frequency). + """ + cdef: + Py_ssize_t i + int count = len(dtindex) + bint is_business = 0 + int end_month = 12 + int start_month = 1 + ndarray[int8_t] out + npy_datetimestruct dts + int compare_month, modby + + out = np.zeros(count, dtype='int8') + + if freqstr: + if freqstr == 'C': + raise ValueError(f"Custom business days is not supported by {field}") + is_business = freqstr[0] == 'B' + + # YearBegin(), BYearBegin() use month = starting month of year. + # QuarterBegin(), BQuarterBegin() use startingMonth = starting + # month of year. Other offsets use month, startingMonth as ending + # month of year. + + if (freqstr[0:2] in ['MS', 'QS', 'AS']) or ( + freqstr[1:3] in ['MS', 'QS', 'AS']): + end_month = 12 if month_kw == 1 else month_kw - 1 + start_month = month_kw + else: + end_month = month_kw + start_month = (end_month % 12) + 1 + else: + end_month = 12 + start_month = 1 + + compare_month = start_month if "start" in field else end_month + if "month" in field: + modby = 1 + elif "quarter" in field: + modby = 3 + else: + modby = 12 + + if field in ["is_month_start", "is_quarter_start", "is_year_start"]: + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + + if _is_on_month(dts.month, compare_month, modby) and ( + dts.day == get_firstbday(dts.year, dts.month)): + out[i] = 1 + + else: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + + if _is_on_month(dts.month, compare_month, modby) and dts.day == 1: + out[i] = 1 + + elif field in ["is_month_end", "is_quarter_end", "is_year_end"]: + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + + if _is_on_month(dts.month, compare_month, modby) and ( + dts.day == get_lastbday(dts.year, dts.month)): + out[i] = 1 + + else: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + + if _is_on_month(dts.month, compare_month, modby) and ( + dts.day == get_days_in_month(dts.year, dts.month)): + out[i] = 1 + + else: + raise ValueError(f"Field {field} not supported") + + return out.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_date_field(const int64_t[:] dtindex, str field): + """ + Given a int64-based datetime index, extract the year, month, etc., + field and return an array of these values. + """ + cdef: + Py_ssize_t i, count = len(dtindex) + ndarray[int32_t] out + npy_datetimestruct dts + + out = np.empty(count, dtype='i4') + + if field == 'Y': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.year + return out + + elif field == 'M': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.month + return out + + elif field == 'D': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.day + return out + + elif field == 'h': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.hour + return out + + elif field == 'm': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.min + return out + + elif field == 's': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.sec + return out + + elif field == 'us': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.us + return out + + elif field == 'ns': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.ps // 1000 + return out + elif field == 'doy': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = get_day_of_year(dts.year, dts.month, dts.day) + return out + + elif field == 'dow': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dayofweek(dts.year, dts.month, dts.day) + return out + + elif field == 'woy': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = get_week_of_year(dts.year, dts.month, dts.day) + return out + + elif field == 'q': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.month + out[i] = ((out[i] - 1) // 3) + 1 + return out + + elif field == 'dim': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = get_days_in_month(dts.year, dts.month) + return out + elif field == 'is_leap_year': + return isleapyear_arr(get_date_field(dtindex, 'Y')) + + raise ValueError(f"Field {field} not supported") + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_timedelta_field(const int64_t[:] tdindex, str field): + """ + Given a int64-based timedelta index, extract the days, hrs, sec., + field and return an array of these values. + """ + cdef: + Py_ssize_t i, count = len(tdindex) + ndarray[int32_t] out + pandas_timedeltastruct tds + + out = np.empty(count, dtype='i4') + + if field == 'days': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.days + return out + + elif field == 'seconds': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.seconds + return out + + elif field == 'microseconds': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.microseconds + return out + + elif field == 'nanoseconds': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.nanoseconds + return out + + raise ValueError(f"Field {field} not supported") + + +cpdef isleapyear_arr(ndarray years): + """vectorized version of isleapyear; NaT evaluates as False""" + cdef: + ndarray[int8_t] out + + out = np.zeros(len(years), dtype='int8') + out[np.logical_or(years % 400 == 0, + np.logical_and(years % 4 == 0, + years % 100 > 0))] = 1 + return out.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def build_isocalendar_sarray(const int64_t[:] dtindex): + """ + Given a int64-based datetime array, return the ISO 8601 year, week, and day + as a structured array. + """ + cdef: + Py_ssize_t i, count = len(dtindex) + npy_datetimestruct dts + ndarray[uint32_t] iso_years, iso_weeks, days + iso_calendar_t ret_val + + sa_dtype = [ + ("year", "u4"), + ("week", "u4"), + ("day", "u4"), + ] + + out = np.empty(count, dtype=sa_dtype) + + iso_years = out["year"] + iso_weeks = out["week"] + days = out["day"] + + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + ret_val = 0, 0, 0 + else: + dt64_to_dtstruct(dtindex[i], &dts) + ret_val = get_iso_calendar(dts.year, dts.month, dts.day) + + iso_years[i] = ret_val[0] + iso_weeks[i] = ret_val[1] + days[i] = ret_val[2] + return out + + +def get_locale_names(name_type: str, locale: object = None): + """ + Returns an array of localized day or month names. + + Parameters + ---------- + name_type : str + Attribute of LocaleTime() in which to return localized names. + locale : str + + Returns + ------- + list of locale names + """ + with set_locale(locale, LC_TIME): + return getattr(LocaleTime(), name_type) + + +# --------------------------------------------------------------------- +# Rounding + + +class RoundTo: + """ + enumeration defining the available rounding modes + + Attributes + ---------- + MINUS_INFTY + round towards -∞, or floor [2]_ + PLUS_INFTY + round towards +∞, or ceil [3]_ + NEAREST_HALF_EVEN + round to nearest, tie-break half to even [6]_ + NEAREST_HALF_MINUS_INFTY + round to nearest, tie-break half to -∞ [5]_ + NEAREST_HALF_PLUS_INFTY + round to nearest, tie-break half to +∞ [4]_ + + + References + ---------- + .. [1] "Rounding - Wikipedia" + https://en.wikipedia.org/wiki/Rounding + .. [2] "Rounding down" + https://en.wikipedia.org/wiki/Rounding#Rounding_down + .. [3] "Rounding up" + https://en.wikipedia.org/wiki/Rounding#Rounding_up + .. [4] "Round half up" + https://en.wikipedia.org/wiki/Rounding#Round_half_up + .. [5] "Round half down" + https://en.wikipedia.org/wiki/Rounding#Round_half_down + .. [6] "Round half to even" + https://en.wikipedia.org/wiki/Rounding#Round_half_to_even + """ + @property + def MINUS_INFTY(self) -> int: + return 0 + + @property + def PLUS_INFTY(self) -> int: + return 1 + + @property + def NEAREST_HALF_EVEN(self) -> int: + return 2 + + @property + def NEAREST_HALF_PLUS_INFTY(self) -> int: + return 3 + + @property + def NEAREST_HALF_MINUS_INFTY(self) -> int: + return 4 + + +cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit): + cdef: + Py_ssize_t i, n = len(values) + ndarray[int64_t] result = np.empty(n, dtype="i8") + int64_t res, value + + with cython.overflowcheck(True): + for i in range(n): + value = values[i] + if value == NPY_NAT: + res = NPY_NAT + else: + res = value - value % unit + result[i] = res + + return result + + +cdef inline ndarray[int64_t] _ceil_int64(int64_t[:] values, int64_t unit): + cdef: + Py_ssize_t i, n = len(values) + ndarray[int64_t] result = np.empty(n, dtype="i8") + int64_t res, value + + with cython.overflowcheck(True): + for i in range(n): + value = values[i] + + if value == NPY_NAT: + res = NPY_NAT + else: + remainder = value % unit + if remainder == 0: + res = value + else: + res = value + (unit - remainder) + + result[i] = res + + return result + + +cdef inline ndarray[int64_t] _rounddown_int64(values, int64_t unit): + return _ceil_int64(values - unit // 2, unit) + + +cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit): + return _floor_int64(values + unit // 2, unit) + + +def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray: + """ + Applies rounding mode at given frequency + + Parameters + ---------- + values : np.ndarray[int64_t]` + mode : instance of `RoundTo` enumeration + nanos : np.int64 + Freq to round to, expressed in nanoseconds + + Returns + ------- + np.ndarray[int64_t] + """ + cdef: + int64_t unit = nanos + + if mode == RoundTo.MINUS_INFTY: + return _floor_int64(values, unit) + elif mode == RoundTo.PLUS_INFTY: + return _ceil_int64(values, unit) + elif mode == RoundTo.NEAREST_HALF_MINUS_INFTY: + return _rounddown_int64(values, unit) + elif mode == RoundTo.NEAREST_HALF_PLUS_INFTY: + return _roundup_int64(values, unit) + elif mode == RoundTo.NEAREST_HALF_EVEN: + # for odd unit there is no need of a tie break + if unit % 2: + return _rounddown_int64(values, unit) + quotient, remainder = np.divmod(values, unit) + mask = np.logical_or( + remainder > (unit // 2), + np.logical_and(remainder == (unit // 2), quotient % 2) + ) + quotient[mask] += 1 + return quotient * unit + + # if/elif above should catch all rounding modes defined in enum 'RoundTo': + # if flow of control arrives here, it is a bug + raise ValueError("round_nsint64 called with an unrecognized rounding mode") diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/nattype.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/nattype.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..40e8448 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/nattype.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/nattype.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/nattype.pxd new file mode 100644 index 0000000..5e5f422 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/nattype.pxd @@ -0,0 +1,19 @@ +from cpython.datetime cimport datetime +from numpy cimport int64_t + + +cdef int64_t NPY_NAT + +cdef bint _nat_scalar_rules[6] +cdef set c_nat_strings + +cdef class _NaT(datetime): + cdef readonly: + int64_t value + +cdef _NaT c_NaT + + +cdef bint checknull_with_nat(object val) +cdef bint is_dt64nat(object val) +cdef bint is_td64nat(object val) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/nattype.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/nattype.pyi new file mode 100644 index 0000000..8b40993 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/nattype.pyi @@ -0,0 +1,155 @@ +from datetime import ( + datetime, + timedelta, + tzinfo as _tzinfo, +) +from typing import Any + +import numpy as np + +from pandas._libs.tslibs.period import Period + +NaT: NaTType +iNaT: int +nat_strings: set[str] + +def is_null_datetimelike(val: object, inat_is_null: bool = ...) -> bool: ... + +class NaTType(datetime): + value: np.int64 + def asm8(self) -> np.datetime64: ... + def to_datetime64(self) -> np.datetime64: ... + def to_numpy( + self, dtype: np.dtype | str | None = ..., copy: bool = ... + ) -> np.datetime64 | np.timedelta64: ... + @property + def is_leap_year(self) -> bool: ... + @property + def is_month_start(self) -> bool: ... + @property + def is_quarter_start(self) -> bool: ... + @property + def is_year_start(self) -> bool: ... + @property + def is_month_end(self) -> bool: ... + @property + def is_quarter_end(self) -> bool: ... + @property + def is_year_end(self) -> bool: ... + @property + def day_of_year(self) -> float: ... + @property + def dayofyear(self) -> float: ... + @property + def days_in_month(self) -> float: ... + @property + def daysinmonth(self) -> float: ... + @property + def day_of_week(self) -> float: ... + @property + def dayofweek(self) -> float: ... + @property + def week(self) -> float: ... + @property + def weekofyear(self) -> float: ... + def day_name(self) -> float: ... + def month_name(self) -> float: ... + # error: Return type "float" of "weekday" incompatible with return + # type "int" in supertype "date" + def weekday(self) -> float: ... # type: ignore[override] + # error: Return type "float" of "isoweekday" incompatible with return + # type "int" in supertype "date" + def isoweekday(self) -> float: ... # type: ignore[override] + def total_seconds(self) -> float: ... + # error: Signature of "today" incompatible with supertype "datetime" + def today(self, *args, **kwargs) -> NaTType: ... # type: ignore[override] + # error: Signature of "today" incompatible with supertype "datetime" + def now(self, *args, **kwargs) -> NaTType: ... # type: ignore[override] + def to_pydatetime(self) -> NaTType: ... + def date(self) -> NaTType: ... + def round(self) -> NaTType: ... + def floor(self) -> NaTType: ... + def ceil(self) -> NaTType: ... + def tz_convert(self) -> NaTType: ... + def tz_localize(self) -> NaTType: ... + # error: Signature of "replace" incompatible with supertype "datetime" + def replace( # type: ignore[override] + self, + year: int | None = ..., + month: int | None = ..., + day: int | None = ..., + hour: int | None = ..., + minute: int | None = ..., + second: int | None = ..., + microsecond: int | None = ..., + nanosecond: int | None = ..., + tzinfo: _tzinfo | None = ..., + fold: int | None = ..., + ) -> NaTType: ... + # error: Return type "float" of "year" incompatible with return + # type "int" in supertype "date" + @property + def year(self) -> float: ... # type: ignore[override] + @property + def quarter(self) -> float: ... + # error: Return type "float" of "month" incompatible with return + # type "int" in supertype "date" + @property + def month(self) -> float: ... # type: ignore[override] + # error: Return type "float" of "day" incompatible with return + # type "int" in supertype "date" + @property + def day(self) -> float: ... # type: ignore[override] + # error: Return type "float" of "hour" incompatible with return + # type "int" in supertype "date" + @property + def hour(self) -> float: ... # type: ignore[override] + # error: Return type "float" of "minute" incompatible with return + # type "int" in supertype "date" + @property + def minute(self) -> float: ... # type: ignore[override] + # error: Return type "float" of "second" incompatible with return + # type "int" in supertype "date" + @property + def second(self) -> float: ... # type: ignore[override] + @property + def millisecond(self) -> float: ... + # error: Return type "float" of "microsecond" incompatible with return + # type "int" in supertype "date" + @property + def microsecond(self) -> float: ... # type: ignore[override] + @property + def nanosecond(self) -> float: ... + # inject Timedelta properties + @property + def days(self) -> float: ... + @property + def microseconds(self) -> float: ... + @property + def nanoseconds(self) -> float: ... + # inject Period properties + @property + def qyear(self) -> float: ... + def __eq__(self, other: Any) -> bool: ... + def __ne__(self, other: Any) -> bool: ... + # https://github.com/python/mypy/issues/9015 + # error: Argument 1 of "__lt__" is incompatible with supertype "date"; + # supertype defines the argument type as "date" + def __lt__( # type: ignore[override] + self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64 + ) -> bool: ... + # error: Argument 1 of "__le__" is incompatible with supertype "date"; + # supertype defines the argument type as "date" + def __le__( # type: ignore[override] + self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64 + ) -> bool: ... + # error: Argument 1 of "__gt__" is incompatible with supertype "date"; + # supertype defines the argument type as "date" + def __gt__( # type: ignore[override] + self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64 + ) -> bool: ... + # error: Argument 1 of "__ge__" is incompatible with supertype "date"; + # supertype defines the argument type as "date" + def __ge__( # type: ignore[override] + self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64 + ) -> bool: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/nattype.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/nattype.pyx new file mode 100644 index 0000000..e6a7017 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/nattype.pyx @@ -0,0 +1,1225 @@ +import warnings + +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDateTime_IMPORT, + PyDelta_Check, + datetime, + timedelta, +) +from cpython.object cimport ( + Py_EQ, + Py_GE, + Py_GT, + Py_LE, + Py_LT, + Py_NE, + PyObject_RichCompare, +) + +PyDateTime_IMPORT + +from cpython.version cimport PY_MINOR_VERSION + +import numpy as np + +cimport numpy as cnp +from numpy cimport int64_t + +cnp.import_array() + +cimport pandas._libs.tslibs.util as util +from pandas._libs.tslibs.np_datetime cimport ( + get_datetime64_value, + get_timedelta64_value, +) + +# ---------------------------------------------------------------------- +# Constants +nat_strings = {"NaT", "nat", "NAT", "nan", "NaN", "NAN"} +cdef set c_nat_strings = nat_strings + +cdef int64_t NPY_NAT = util.get_nat() +iNaT = NPY_NAT # python-visible constant + +cdef bint _nat_scalar_rules[6] +_nat_scalar_rules[Py_EQ] = False +_nat_scalar_rules[Py_NE] = True +_nat_scalar_rules[Py_LT] = False +_nat_scalar_rules[Py_LE] = False +_nat_scalar_rules[Py_GT] = False +_nat_scalar_rules[Py_GE] = False + +# ---------------------------------------------------------------------- + + +def _make_nan_func(func_name: str, doc: str): + def f(*args, **kwargs): + return np.nan + f.__name__ = func_name + f.__doc__ = doc + return f + + +def _make_nat_func(func_name: str, doc: str): + def f(*args, **kwargs): + return c_NaT + f.__name__ = func_name + f.__doc__ = doc + return f + + +def _make_error_func(func_name: str, cls): + def f(*args, **kwargs): + raise ValueError(f"NaTType does not support {func_name}") + + f.__name__ = func_name + if isinstance(cls, str): + # passed the literal docstring directly + f.__doc__ = cls + elif cls is not None: + f.__doc__ = getattr(cls, func_name).__doc__ + return f + + +cdef _nat_divide_op(self, other): + if PyDelta_Check(other) or util.is_timedelta64_object(other) or other is c_NaT: + return np.nan + if util.is_integer_object(other) or util.is_float_object(other): + return c_NaT + return NotImplemented + + +cdef _nat_rdivide_op(self, other): + if PyDelta_Check(other): + return np.nan + return NotImplemented + + +def __nat_unpickle(*args): + # return constant defined in the module + return c_NaT + +# ---------------------------------------------------------------------- + + +cdef class _NaT(datetime): + # cdef readonly: + # int64_t value + # object freq + + # higher than np.ndarray and np.matrix + __array_priority__ = 100 + + def __richcmp__(_NaT self, object other, int op): + if util.is_datetime64_object(other) or PyDateTime_Check(other): + # We treat NaT as datetime-like for this comparison + return _nat_scalar_rules[op] + + elif util.is_timedelta64_object(other) or PyDelta_Check(other): + # We treat NaT as timedelta-like for this comparison + return _nat_scalar_rules[op] + + elif util.is_array(other): + if other.dtype.kind in "mM": + result = np.empty(other.shape, dtype=np.bool_) + result.fill(_nat_scalar_rules[op]) + elif other.dtype.kind == "O": + result = np.array([PyObject_RichCompare(self, x, op) for x in other]) + elif op == Py_EQ: + result = np.zeros(other.shape, dtype=bool) + elif op == Py_NE: + result = np.ones(other.shape, dtype=bool) + else: + return NotImplemented + return result + + elif PyDate_Check(other): + # GH#39151 don't defer to datetime.date object + if op == Py_EQ: + return False + if op == Py_NE: + return True + warnings.warn( + "Comparison of NaT with datetime.date is deprecated in " + "order to match the standard library behavior. " + "In a future version these will be considered non-comparable.", + FutureWarning, + stacklevel=1, + ) + return False + + return NotImplemented + + def __add__(self, other): + if self is not c_NaT: + # cython __radd__ semantics + self, other = other, self + + if PyDateTime_Check(other): + return c_NaT + elif PyDelta_Check(other): + return c_NaT + elif util.is_datetime64_object(other) or util.is_timedelta64_object(other): + return c_NaT + + elif util.is_integer_object(other): + # For Period compat + return c_NaT + + elif util.is_array(other): + if other.dtype.kind in "mM": + # If we are adding to datetime64, we treat NaT as timedelta + # Either way, result dtype is datetime64 + result = np.empty(other.shape, dtype="datetime64[ns]") + result.fill("NaT") + return result + raise TypeError(f"Cannot add NaT to ndarray with dtype {other.dtype}") + + # Includes Period, DateOffset going through here + return NotImplemented + + def __sub__(self, other): + # Duplicate some logic from _Timestamp.__sub__ to avoid needing + # to subclass; allows us to @final(_Timestamp.__sub__) + cdef: + bint is_rsub = False + + if self is not c_NaT: + # cython __rsub__ semantics + self, other = other, self + is_rsub = True + + if PyDateTime_Check(other): + return c_NaT + elif PyDelta_Check(other): + return c_NaT + elif util.is_datetime64_object(other) or util.is_timedelta64_object(other): + return c_NaT + + elif util.is_integer_object(other): + # For Period compat + return c_NaT + + elif util.is_array(other): + if other.dtype.kind == "m": + if not is_rsub: + # NaT - timedelta64 we treat NaT as datetime64, so result + # is datetime64 + result = np.empty(other.shape, dtype="datetime64[ns]") + result.fill("NaT") + return result + + # timedelta64 - NaT we have to treat NaT as timedelta64 + # for this to be meaningful, and the result is timedelta64 + result = np.empty(other.shape, dtype="timedelta64[ns]") + result.fill("NaT") + return result + + elif other.dtype.kind == "M": + # We treat NaT as a datetime, so regardless of whether this is + # NaT - other or other - NaT, the result is timedelta64 + result = np.empty(other.shape, dtype="timedelta64[ns]") + result.fill("NaT") + return result + + raise TypeError( + f"Cannot subtract NaT from ndarray with dtype {other.dtype}" + ) + + # Includes Period, DateOffset going through here + return NotImplemented + + def __pos__(self): + return NaT + + def __neg__(self): + return NaT + + def __truediv__(self, other): + return _nat_divide_op(self, other) + + def __floordiv__(self, other): + return _nat_divide_op(self, other) + + def __mul__(self, other): + if util.is_integer_object(other) or util.is_float_object(other): + return NaT + return NotImplemented + + @property + def asm8(self) -> np.datetime64: + return np.datetime64(NPY_NAT, "ns") + + def to_datetime64(self) -> np.datetime64: + """ + Return a numpy.datetime64 object with 'ns' precision. + """ + return np.datetime64('NaT', "ns") + + def to_numpy(self, dtype=None, copy=False) -> np.datetime64 | np.timedelta64: + """ + Convert the Timestamp to a NumPy datetime64 or timedelta64. + + .. versionadded:: 0.25.0 + + With the default 'dtype', this is an alias method for `NaT.to_datetime64()`. + + The copy parameter is available here only for compatibility. Its value + will not affect the return value. + + Returns + ------- + numpy.datetime64 or numpy.timedelta64 + + See Also + -------- + DatetimeIndex.to_numpy : Similar method for DatetimeIndex. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.to_numpy() + numpy.datetime64('2020-03-14T15:32:52.192548651') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.to_numpy() + numpy.datetime64('NaT') + + >>> pd.NaT.to_numpy("m8[ns]") + numpy.timedelta64('NaT','ns') + """ + if dtype is not None: + # GH#44460 + dtype = np.dtype(dtype) + if dtype.kind == "M": + return np.datetime64("NaT").astype(dtype) + elif dtype.kind == "m": + return np.timedelta64("NaT").astype(dtype) + else: + raise ValueError( + "NaT.to_numpy dtype must be a datetime64 dtype, timedelta64 " + "dtype, or None." + ) + return self.to_datetime64() + + def __repr__(self) -> str: + return "NaT" + + def __str__(self) -> str: + return "NaT" + + def isoformat(self, sep: str = "T", timespec: str = "auto") -> str: + # This allows Timestamp(ts.isoformat()) to always correctly roundtrip. + return "NaT" + + def __hash__(self) -> int: + return NPY_NAT + + @property + def is_leap_year(self) -> bool: + return False + + @property + def is_month_start(self) -> bool: + return False + + @property + def is_quarter_start(self) -> bool: + return False + + @property + def is_year_start(self) -> bool: + return False + + @property + def is_month_end(self) -> bool: + return False + + @property + def is_quarter_end(self) -> bool: + return False + + @property + def is_year_end(self) -> bool: + return False + + +class NaTType(_NaT): + """ + (N)ot-(A)-(T)ime, the time equivalent of NaN. + """ + + def __new__(cls): + cdef _NaT base + + base = _NaT.__new__(cls, 1, 1, 1) + base.value = NPY_NAT + + return base + + @property + def freq(self): + warnings.warn( + "NaT.freq is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=1, + ) + return None + + def __reduce_ex__(self, protocol): + # python 3.6 compat + # https://bugs.python.org/issue28730 + # now __reduce_ex__ is defined and higher priority than __reduce__ + return self.__reduce__() + + def __reduce__(self): + return (__nat_unpickle, (None, )) + + def __rtruediv__(self, other): + return _nat_rdivide_op(self, other) + + def __rfloordiv__(self, other): + return _nat_rdivide_op(self, other) + + def __rmul__(self, other): + if util.is_integer_object(other) or util.is_float_object(other): + return c_NaT + return NotImplemented + + # ---------------------------------------------------------------------- + # inject the Timestamp field properties + # these by definition return np.nan + + year = property(fget=lambda self: np.nan) + quarter = property(fget=lambda self: np.nan) + month = property(fget=lambda self: np.nan) + day = property(fget=lambda self: np.nan) + hour = property(fget=lambda self: np.nan) + minute = property(fget=lambda self: np.nan) + second = property(fget=lambda self: np.nan) + millisecond = property(fget=lambda self: np.nan) + microsecond = property(fget=lambda self: np.nan) + nanosecond = property(fget=lambda self: np.nan) + + week = property(fget=lambda self: np.nan) + dayofyear = property(fget=lambda self: np.nan) + day_of_year = property(fget=lambda self: np.nan) + weekofyear = property(fget=lambda self: np.nan) + days_in_month = property(fget=lambda self: np.nan) + daysinmonth = property(fget=lambda self: np.nan) + dayofweek = property(fget=lambda self: np.nan) + day_of_week = property(fget=lambda self: np.nan) + + # inject Timedelta properties + days = property(fget=lambda self: np.nan) + seconds = property(fget=lambda self: np.nan) + microseconds = property(fget=lambda self: np.nan) + nanoseconds = property(fget=lambda self: np.nan) + + # inject pd.Period properties + qyear = property(fget=lambda self: np.nan) + + # ---------------------------------------------------------------------- + # GH9513 NaT methods (except to_datetime64) to raise, return np.nan, or + # return NaT create functions that raise, for binding to NaTType + # These are the ones that can get their docstrings from datetime. + + # nan methods + weekday = _make_nan_func( + "weekday", + """ + Return the day of the week represented by the date. + Monday == 0 ... Sunday == 6. + """, + ) + isoweekday = _make_nan_func( + "isoweekday", + """ + Return the day of the week represented by the date. + Monday == 1 ... Sunday == 7. + """, + ) + total_seconds = _make_nan_func("total_seconds", timedelta.total_seconds.__doc__) + month_name = _make_nan_func( + "month_name", + """ + Return the month name of the Timestamp with specified locale. + + Parameters + ---------- + locale : str, default None (English locale) + Locale determining the language in which to return the month name. + + Returns + ------- + str + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.month_name() + 'March' + + Analogous for ``pd.NaT``: + + >>> pd.NaT.month_name() + nan + """, + ) + day_name = _make_nan_func( + "day_name", + """ + Return the day name of the Timestamp with specified locale. + + Parameters + ---------- + locale : str, default None (English locale) + Locale determining the language in which to return the day name. + + Returns + ------- + str + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.day_name() + 'Saturday' + + Analogous for ``pd.NaT``: + + >>> pd.NaT.day_name() + nan + """, + ) + # _nat_methods + date = _make_nat_func("date", datetime.date.__doc__) + + utctimetuple = _make_error_func("utctimetuple", datetime) + timetz = _make_error_func("timetz", datetime) + timetuple = _make_error_func("timetuple", datetime) + isocalendar = _make_error_func("isocalendar", datetime) + dst = _make_error_func("dst", datetime) + ctime = _make_error_func("ctime", datetime) + time = _make_error_func("time", datetime) + toordinal = _make_error_func("toordinal", datetime) + tzname = _make_error_func("tzname", datetime) + utcoffset = _make_error_func("utcoffset", datetime) + + # "fromisocalendar" was introduced in 3.8 + if PY_MINOR_VERSION >= 8: + fromisocalendar = _make_error_func("fromisocalendar", datetime) + + # ---------------------------------------------------------------------- + # The remaining methods have docstrings copy/pasted from the analogous + # Timestamp methods. + + strftime = _make_error_func( + "strftime", + """ + Timestamp.strftime(format) + + Return a string representing the given POSIX timestamp + controlled by an explicit format string. + + Parameters + ---------- + format : str + Format string to convert Timestamp to string. + See strftime documentation for more information on the format string: + https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.strftime('%Y-%m-%d %X') + '2020-03-14 15:32:52' + """, + ) + + strptime = _make_error_func( + "strptime", + """ + Timestamp.strptime(string, format) + + Function is not implemented. Use pd.to_datetime(). + """, + ) + + utcfromtimestamp = _make_error_func( + "utcfromtimestamp", + """ + Timestamp.utcfromtimestamp(ts) + + Construct a naive UTC datetime from a POSIX timestamp. + + Examples + -------- + >>> pd.Timestamp.utcfromtimestamp(1584199972) + Timestamp('2020-03-14 15:32:52') + """, + ) + fromtimestamp = _make_error_func( + "fromtimestamp", + """ + Timestamp.fromtimestamp(ts) + + Transform timestamp[, tz] to tz's local time from POSIX timestamp. + + Examples + -------- + >>> pd.Timestamp.fromtimestamp(1584199972) + Timestamp('2020-03-14 15:32:52') + + Note that the output may change depending on your local time. + """, + ) + combine = _make_error_func( + "combine", + """ + Timestamp.combine(date, time) + + Combine date, time into datetime with same date and time fields. + + Examples + -------- + >>> from datetime import date, time + >>> pd.Timestamp.combine(date(2020, 3, 14), time(15, 30, 15)) + Timestamp('2020-03-14 15:30:15') + """, + ) + utcnow = _make_error_func( + "utcnow", + """ + Timestamp.utcnow() + + Return a new Timestamp representing UTC day and time. + + Examples + -------- + >>> pd.Timestamp.utcnow() # doctest: +SKIP + Timestamp('2020-11-16 22:50:18.092888+0000', tz='UTC') + """, + ) + + timestamp = _make_error_func( + "timestamp", + """ + Return POSIX timestamp as float. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548') + >>> ts.timestamp() + 1584199972.192548 + """ + ) + + # GH9513 NaT methods (except to_datetime64) to raise, return np.nan, or + # return NaT create functions that raise, for binding to NaTType + astimezone = _make_error_func( + "astimezone", + """ + Convert timezone-aware Timestamp to another time zone. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding UTC time. + + Returns + ------- + converted : Timestamp + + Raises + ------ + TypeError + If Timestamp is tz-naive. + + Examples + -------- + Create a timestamp object with UTC timezone: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651', tz='UTC') + >>> ts + Timestamp('2020-03-14 15:32:52.192548651+0000', tz='UTC') + + Change to Tokyo timezone: + + >>> ts.tz_convert(tz='Asia/Tokyo') + Timestamp('2020-03-15 00:32:52.192548651+0900', tz='Asia/Tokyo') + + Can also use ``astimezone``: + + >>> ts.astimezone(tz='Asia/Tokyo') + Timestamp('2020-03-15 00:32:52.192548651+0900', tz='Asia/Tokyo') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.tz_convert(tz='Asia/Tokyo') + NaT + """, + ) + fromordinal = _make_error_func( + "fromordinal", + """ + Timestamp.fromordinal(ordinal, freq=None, tz=None) + + Passed an ordinal, translate and convert to a ts. + Note: by definition there cannot be any tz info on the ordinal itself. + + Parameters + ---------- + ordinal : int + Date corresponding to a proleptic Gregorian ordinal. + freq : str, DateOffset + Offset to apply to the Timestamp. + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for the Timestamp. + + Examples + -------- + >>> pd.Timestamp.fromordinal(737425) + Timestamp('2020-01-01 00:00:00') + """, + ) + + # _nat_methods + to_pydatetime = _make_nat_func( + "to_pydatetime", + """ + Convert a Timestamp object to a native Python datetime object. + + If warn=True, issue a warning if nanoseconds is nonzero. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548') + >>> ts.to_pydatetime() + datetime.datetime(2020, 3, 14, 15, 32, 52, 192548) + + Analogous for ``pd.NaT``: + + >>> pd.NaT.to_pydatetime() + NaT + """, + ) + + now = _make_nat_func( + "now", + """ + Timestamp.now(tz=None) + + Return new Timestamp object representing current time local to + tz. + + Parameters + ---------- + tz : str or timezone object, default None + Timezone to localize to. + + Examples + -------- + >>> pd.Timestamp.now() # doctest: +SKIP + Timestamp('2020-11-16 22:06:16.378782') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.now() + NaT + """, + ) + today = _make_nat_func( + "today", + """ + Timestamp.today(cls, tz=None) + + Return the current time in the local timezone. This differs + from datetime.today() in that it can be localized to a + passed timezone. + + Parameters + ---------- + tz : str or timezone object, default None + Timezone to localize to. + + Examples + -------- + >>> pd.Timestamp.today() # doctest: +SKIP + Timestamp('2020-11-16 22:37:39.969883') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.today() + NaT + """, + ) + round = _make_nat_func( + "round", + """ + Round the Timestamp to the specified resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the rounding resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Returns + ------- + a new Timestamp rounded to the given resolution of `freq` + + Raises + ------ + ValueError if the freq cannot be converted + + Notes + ----- + If the Timestamp has a timezone, rounding will take place relative to the + local ("wall") time and re-localized to the same timezone. When rounding + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + + A timestamp can be rounded using multiple frequency units: + + >>> ts.round(freq='H') # hour + Timestamp('2020-03-14 16:00:00') + + >>> ts.round(freq='T') # minute + Timestamp('2020-03-14 15:33:00') + + >>> ts.round(freq='S') # seconds + Timestamp('2020-03-14 15:32:52') + + >>> ts.round(freq='L') # milliseconds + Timestamp('2020-03-14 15:32:52.193000') + + ``freq`` can also be a multiple of a single unit, like '5T' (i.e. 5 minutes): + + >>> ts.round(freq='5T') + Timestamp('2020-03-14 15:35:00') + + or a combination of multiple units, like '1H30T' (i.e. 1 hour and 30 minutes): + + >>> ts.round(freq='1H30T') + Timestamp('2020-03-14 15:00:00') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.round() + NaT + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> ts_tz = pd.Timestamp("2021-10-31 01:30:00").tz_localize("Europe/Amsterdam") + + >>> ts_tz.round("H", ambiguous=False) + Timestamp('2021-10-31 02:00:00+0100', tz='Europe/Amsterdam') + + >>> ts_tz.round("H", ambiguous=True) + Timestamp('2021-10-31 02:00:00+0200', tz='Europe/Amsterdam') + """, + ) + floor = _make_nat_func( + "floor", + """ + Return a new Timestamp floored to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the flooring resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Raises + ------ + ValueError if the freq cannot be converted. + + Notes + ----- + If the Timestamp has a timezone, flooring will take place relative to the + local ("wall") time and re-localized to the same timezone. When flooring + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + + A timestamp can be floored using multiple frequency units: + + >>> ts.floor(freq='H') # hour + Timestamp('2020-03-14 15:00:00') + + >>> ts.floor(freq='T') # minute + Timestamp('2020-03-14 15:32:00') + + >>> ts.floor(freq='S') # seconds + Timestamp('2020-03-14 15:32:52') + + >>> ts.floor(freq='N') # nanoseconds + Timestamp('2020-03-14 15:32:52.192548651') + + ``freq`` can also be a multiple of a single unit, like '5T' (i.e. 5 minutes): + + >>> ts.floor(freq='5T') + Timestamp('2020-03-14 15:30:00') + + or a combination of multiple units, like '1H30T' (i.e. 1 hour and 30 minutes): + + >>> ts.floor(freq='1H30T') + Timestamp('2020-03-14 15:00:00') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.floor() + NaT + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> ts_tz = pd.Timestamp("2021-10-31 03:30:00").tz_localize("Europe/Amsterdam") + + >>> ts_tz.floor("2H", ambiguous=False) + Timestamp('2021-10-31 02:00:00+0100', tz='Europe/Amsterdam') + + >>> ts_tz.floor("2H", ambiguous=True) + Timestamp('2021-10-31 02:00:00+0200', tz='Europe/Amsterdam') + """, + ) + ceil = _make_nat_func( + "ceil", + """ + Return a new Timestamp ceiled to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the ceiling resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Raises + ------ + ValueError if the freq cannot be converted. + + Notes + ----- + If the Timestamp has a timezone, ceiling will take place relative to the + local ("wall") time and re-localized to the same timezone. When ceiling + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + + A timestamp can be ceiled using multiple frequency units: + + >>> ts.ceil(freq='H') # hour + Timestamp('2020-03-14 16:00:00') + + >>> ts.ceil(freq='T') # minute + Timestamp('2020-03-14 15:33:00') + + >>> ts.ceil(freq='S') # seconds + Timestamp('2020-03-14 15:32:53') + + >>> ts.ceil(freq='U') # microseconds + Timestamp('2020-03-14 15:32:52.192549') + + ``freq`` can also be a multiple of a single unit, like '5T' (i.e. 5 minutes): + + >>> ts.ceil(freq='5T') + Timestamp('2020-03-14 15:35:00') + + or a combination of multiple units, like '1H30T' (i.e. 1 hour and 30 minutes): + + >>> ts.ceil(freq='1H30T') + Timestamp('2020-03-14 16:30:00') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.ceil() + NaT + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> ts_tz = pd.Timestamp("2021-10-31 01:30:00").tz_localize("Europe/Amsterdam") + + >>> ts_tz.ceil("H", ambiguous=False) + Timestamp('2021-10-31 02:00:00+0100', tz='Europe/Amsterdam') + + >>> ts_tz.ceil("H", ambiguous=True) + Timestamp('2021-10-31 02:00:00+0200', tz='Europe/Amsterdam') + """, + ) + + tz_convert = _make_nat_func( + "tz_convert", + """ + Convert timezone-aware Timestamp to another time zone. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding UTC time. + + Returns + ------- + converted : Timestamp + + Raises + ------ + TypeError + If Timestamp is tz-naive. + + Examples + -------- + Create a timestamp object with UTC timezone: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651', tz='UTC') + >>> ts + Timestamp('2020-03-14 15:32:52.192548651+0000', tz='UTC') + + Change to Tokyo timezone: + + >>> ts.tz_convert(tz='Asia/Tokyo') + Timestamp('2020-03-15 00:32:52.192548651+0900', tz='Asia/Tokyo') + + Can also use ``astimezone``: + + >>> ts.astimezone(tz='Asia/Tokyo') + Timestamp('2020-03-15 00:32:52.192548651+0900', tz='Asia/Tokyo') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.tz_convert(tz='Asia/Tokyo') + NaT + """, + ) + tz_localize = _make_nat_func( + "tz_localize", + """ + Convert naive Timestamp to local time zone, or remove + timezone from timezone-aware Timestamp. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding local time. + + ambiguous : bool, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ +default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + The behavior is as follows: + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Returns + ------- + localized : Timestamp + + Raises + ------ + TypeError + If the Timestamp is tz-aware and tz is not None. + + Examples + -------- + Create a naive timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts + Timestamp('2020-03-14 15:32:52.192548651') + + Add 'Europe/Stockholm' as timezone: + + >>> ts.tz_localize(tz='Europe/Stockholm') + Timestamp('2020-03-14 15:32:52.192548651+0100', tz='Europe/Stockholm') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.tz_localize() + NaT + """, + ) + replace = _make_nat_func( + "replace", + """ + Implements datetime.replace, handles nanoseconds. + + Parameters + ---------- + year : int, optional + month : int, optional + day : int, optional + hour : int, optional + minute : int, optional + second : int, optional + microsecond : int, optional + nanosecond : int, optional + tzinfo : tz-convertible, optional + fold : int, optional + + Returns + ------- + Timestamp with fields replaced + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651', tz='UTC') + >>> ts + Timestamp('2020-03-14 15:32:52.192548651+0000', tz='UTC') + + Replace year and the hour: + + >>> ts.replace(year=1999, hour=10) + Timestamp('1999-03-14 10:32:52.192548651+0000', tz='UTC') + + Replace timezone (not a conversion): + + >>> import pytz + >>> ts.replace(tzinfo=pytz.timezone('US/Pacific')) + Timestamp('2020-03-14 15:32:52.192548651-0700', tz='US/Pacific') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.replace(tzinfo=pytz.timezone('US/Pacific')) + NaT + """, + ) + + +c_NaT = NaTType() # C-visible +NaT = c_NaT # Python-visible + + +# ---------------------------------------------------------------------- + +cdef inline bint checknull_with_nat(object val): + """ + Utility to check if a value is a nat or not. + """ + return val is None or util.is_nan(val) or val is c_NaT + + +cdef inline bint is_dt64nat(object val): + """ + Is this a np.datetime64 object np.datetime64("NaT"). + """ + if util.is_datetime64_object(val): + return get_datetime64_value(val) == NPY_NAT + return False + + +cdef inline bint is_td64nat(object val): + """ + Is this a np.timedelta64 object np.timedelta64("NaT"). + """ + if util.is_timedelta64_object(val): + return get_timedelta64_value(val) == NPY_NAT + return False diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/np_datetime.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/np_datetime.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..9fc6453 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/np_datetime.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/np_datetime.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/np_datetime.pxd new file mode 100644 index 0000000..c2bbc4f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/np_datetime.pxd @@ -0,0 +1,81 @@ +from cpython.datetime cimport ( + date, + datetime, +) +from numpy cimport ( + int32_t, + int64_t, +) + + +cdef extern from "numpy/ndarrayobject.h": + ctypedef int64_t npy_timedelta + ctypedef int64_t npy_datetime + +cdef extern from "numpy/ndarraytypes.h": + ctypedef struct PyArray_DatetimeMetaData: + NPY_DATETIMEUNIT base + int64_t num + +cdef extern from "numpy/arrayscalars.h": + ctypedef struct PyDatetimeScalarObject: + # PyObject_HEAD + npy_datetime obval + PyArray_DatetimeMetaData obmeta + + ctypedef struct PyTimedeltaScalarObject: + # PyObject_HEAD + npy_timedelta obval + PyArray_DatetimeMetaData obmeta + +cdef extern from "numpy/ndarraytypes.h": + ctypedef struct npy_datetimestruct: + int64_t year + int32_t month, day, hour, min, sec, us, ps, as + + ctypedef enum NPY_DATETIMEUNIT: + NPY_FR_Y + NPY_FR_M + NPY_FR_W + NPY_FR_D + NPY_FR_B + NPY_FR_h + NPY_FR_m + NPY_FR_s + NPY_FR_ms + NPY_FR_us + NPY_FR_ns + NPY_FR_ps + NPY_FR_fs + NPY_FR_as + NPY_FR_GENERIC + +cdef extern from "src/datetime/np_datetime.h": + ctypedef struct pandas_timedeltastruct: + int64_t days + int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds + + void pandas_datetime_to_datetimestruct(npy_datetime val, + NPY_DATETIMEUNIT fr, + npy_datetimestruct *result) nogil + + +cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 + +cdef check_dts_bounds(npy_datetimestruct *dts) + +cdef int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil +cdef void dt64_to_dtstruct(int64_t dt64, npy_datetimestruct* out) nogil +cdef void td64_to_tdstruct(int64_t td64, pandas_timedeltastruct* out) nogil + +cdef int64_t pydatetime_to_dt64(datetime val, npy_datetimestruct *dts) +cdef int64_t pydate_to_dt64(date val, npy_datetimestruct *dts) +cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts) + +cdef npy_datetime get_datetime64_value(object obj) nogil +cdef npy_timedelta get_timedelta64_value(object obj) nogil +cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil + +cdef int _string_to_dts(str val, npy_datetimestruct* dts, + int* out_local, int* out_tzoffset, + bint want_exc) except? -1 diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/np_datetime.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/np_datetime.pyi new file mode 100644 index 0000000..db0c277 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/np_datetime.pyi @@ -0,0 +1 @@ +class OutOfBoundsDatetime(ValueError): ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/np_datetime.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/np_datetime.pyx new file mode 100644 index 0000000..bc03a3e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/np_datetime.pyx @@ -0,0 +1,186 @@ +from cpython.datetime cimport ( + PyDateTime_DATE_GET_HOUR, + PyDateTime_DATE_GET_MICROSECOND, + PyDateTime_DATE_GET_MINUTE, + PyDateTime_DATE_GET_SECOND, + PyDateTime_GET_DAY, + PyDateTime_GET_MONTH, + PyDateTime_GET_YEAR, + PyDateTime_IMPORT, +) +from cpython.object cimport ( + Py_EQ, + Py_GE, + Py_GT, + Py_LE, + Py_LT, + Py_NE, +) + +PyDateTime_IMPORT + +from numpy cimport int64_t + +from pandas._libs.tslibs.util cimport get_c_string_buf_and_size + + +cdef extern from "src/datetime/np_datetime.h": + int cmp_npy_datetimestruct(npy_datetimestruct *a, + npy_datetimestruct *b) + + npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, + npy_datetimestruct *d) nogil + + void pandas_datetime_to_datetimestruct(npy_datetime val, + NPY_DATETIMEUNIT fr, + npy_datetimestruct *result) nogil + + void pandas_timedelta_to_timedeltastruct(npy_timedelta val, + NPY_DATETIMEUNIT fr, + pandas_timedeltastruct *result + ) nogil + + npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS + +cdef extern from "src/datetime/np_datetime_strings.h": + int parse_iso_8601_datetime(const char *str, int len, int want_exc, + npy_datetimestruct *out, + int *out_local, int *out_tzoffset) + + +# ---------------------------------------------------------------------- +# numpy object inspection + +cdef inline npy_datetime get_datetime64_value(object obj) nogil: + """ + returns the int64 value underlying scalar numpy datetime64 object + + Note that to interpret this as a datetime, the corresponding unit is + also needed. That can be found using `get_datetime64_unit`. + """ + return (obj).obval + + +cdef inline npy_timedelta get_timedelta64_value(object obj) nogil: + """ + returns the int64 value underlying scalar numpy timedelta64 object + """ + return (obj).obval + + +cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: + """ + returns the unit part of the dtype for a numpy datetime64 object. + """ + return (obj).obmeta.base + +# ---------------------------------------------------------------------- +# Comparison + + +cdef inline bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: + """ + cmp_scalar is a more performant version of PyObject_RichCompare + typed for int64_t arguments. + """ + if op == Py_EQ: + return lhs == rhs + elif op == Py_NE: + return lhs != rhs + elif op == Py_LT: + return lhs < rhs + elif op == Py_LE: + return lhs <= rhs + elif op == Py_GT: + return lhs > rhs + elif op == Py_GE: + return lhs >= rhs + + +class OutOfBoundsDatetime(ValueError): + pass + + +cdef inline check_dts_bounds(npy_datetimestruct *dts): + """Raises OutOfBoundsDatetime if the given date is outside the range that + can be represented by nanosecond-resolution 64-bit integers.""" + cdef: + bint error = False + + if (dts.year <= 1677 and + cmp_npy_datetimestruct(dts, &_NS_MIN_DTS) == -1): + error = True + elif (dts.year >= 2262 and + cmp_npy_datetimestruct(dts, &_NS_MAX_DTS) == 1): + error = True + + if error: + fmt = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} ' + f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}') + raise OutOfBoundsDatetime(f'Out of bounds nanosecond timestamp: {fmt}') + + +# ---------------------------------------------------------------------- +# Conversion + +cdef inline int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil: + """Convenience function to call npy_datetimestruct_to_datetime + with the by-far-most-common frequency NPY_FR_ns""" + return npy_datetimestruct_to_datetime(NPY_FR_ns, dts) + + +cdef inline void dt64_to_dtstruct(int64_t dt64, + npy_datetimestruct* out) nogil: + """Convenience function to call pandas_datetime_to_datetimestruct + with the by-far-most-common frequency NPY_FR_ns""" + pandas_datetime_to_datetimestruct(dt64, NPY_FR_ns, out) + return + + +cdef inline void td64_to_tdstruct(int64_t td64, + pandas_timedeltastruct* out) nogil: + """Convenience function to call pandas_timedelta_to_timedeltastruct + with the by-far-most-common frequency NPY_FR_ns""" + pandas_timedelta_to_timedeltastruct(td64, NPY_FR_ns, out) + return + + +cdef inline int64_t pydatetime_to_dt64(datetime val, + npy_datetimestruct *dts): + """ + Note we are assuming that the datetime object is timezone-naive. + """ + dts.year = PyDateTime_GET_YEAR(val) + dts.month = PyDateTime_GET_MONTH(val) + dts.day = PyDateTime_GET_DAY(val) + dts.hour = PyDateTime_DATE_GET_HOUR(val) + dts.min = PyDateTime_DATE_GET_MINUTE(val) + dts.sec = PyDateTime_DATE_GET_SECOND(val) + dts.us = PyDateTime_DATE_GET_MICROSECOND(val) + dts.ps = dts.as = 0 + return dtstruct_to_dt64(dts) + + +cdef inline void pydate_to_dtstruct(date val, npy_datetimestruct *dts): + dts.year = PyDateTime_GET_YEAR(val) + dts.month = PyDateTime_GET_MONTH(val) + dts.day = PyDateTime_GET_DAY(val) + dts.hour = dts.min = dts.sec = dts.us = 0 + dts.ps = dts.as = 0 + return + +cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts): + pydate_to_dtstruct(val, dts) + return dtstruct_to_dt64(dts) + + +cdef inline int _string_to_dts(str val, npy_datetimestruct* dts, + int* out_local, int* out_tzoffset, + bint want_exc) except? -1: + cdef: + Py_ssize_t length + const char* buf + + buf = get_c_string_buf_and_size(val, &length) + return parse_iso_8601_datetime(buf, length, want_exc, + dts, out_local, out_tzoffset) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/offsets.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/offsets.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..c3679e1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/offsets.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/offsets.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/offsets.pxd new file mode 100644 index 0000000..215c3f8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/offsets.pxd @@ -0,0 +1,12 @@ +from numpy cimport int64_t + + +cpdef to_offset(object obj) +cdef bint is_offset_object(object obj) +cdef bint is_tick_object(object obj) + +cdef class BaseOffset: + cdef readonly: + int64_t n + bint normalize + dict _cache diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/offsets.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/offsets.pyx new file mode 100644 index 0000000..dc049e9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/offsets.pyx @@ -0,0 +1,4123 @@ +import operator +import re +import time +import warnings + +import cython + +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDateTime_IMPORT, + PyDelta_Check, + date, + datetime, + time as dt_time, + timedelta, +) + +PyDateTime_IMPORT + +from dateutil.easter import easter +from dateutil.relativedelta import relativedelta +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + int64_t, + ndarray, +) + +cnp.import_array() + +# TODO: formalize having _libs.properties "above" tslibs in the dependency structure + +from pandas._libs.properties import cache_readonly + +from pandas._libs.tslibs cimport util +from pandas._libs.tslibs.util cimport ( + is_datetime64_object, + is_float_object, + is_integer_object, +) + +from pandas._libs.tslibs.ccalendar import ( + MONTH_ALIASES, + MONTH_TO_CAL_NUM, + int_to_weekday, + weekday_to_int, +) + +from pandas._libs.tslibs.ccalendar cimport ( + DAY_NANOS, + dayofweek, + get_days_in_month, + get_firstbday, + get_lastbday, +) +from pandas._libs.tslibs.conversion cimport ( + convert_datetime_to_tsobject, + localize_pydatetime, +) +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, +) +from pandas._libs.tslibs.np_datetime cimport ( + dt64_to_dtstruct, + dtstruct_to_dt64, + npy_datetimestruct, + pydate_to_dtstruct, +) +from pandas._libs.tslibs.tzconversion cimport tz_convert_from_utc_single + +from .dtypes cimport PeriodDtypeCode +from .timedeltas cimport ( + delta_to_nanoseconds, + is_any_td_scalar, +) + +from .timedeltas import Timedelta + +from .timestamps cimport _Timestamp + +from .timestamps import Timestamp + +# --------------------------------------------------------------------- +# Misc Helpers + +cdef bint is_offset_object(object obj): + return isinstance(obj, BaseOffset) + + +cdef bint is_tick_object(object obj): + return isinstance(obj, Tick) + + +cdef datetime _as_datetime(datetime obj): + if isinstance(obj, _Timestamp): + return obj.to_pydatetime() + return obj + + +cdef bint _is_normalized(datetime dt): + if dt.hour != 0 or dt.minute != 0 or dt.second != 0 or dt.microsecond != 0: + # Regardless of whether dt is datetime vs Timestamp + return False + if isinstance(dt, _Timestamp): + return dt.nanosecond == 0 + return True + + +def apply_wrapper_core(func, self, other) -> ndarray: + result = func(self, other) + result = np.asarray(result) + + if self.normalize: + # TODO: Avoid circular/runtime import + from .vectorized import normalize_i8_timestamps + result = normalize_i8_timestamps(result.view("i8"), None) + + return result + + +def apply_index_wraps(func): + # Note: normally we would use `@functools.wraps(func)`, but this does + # not play nicely with cython class methods + def wrapper(self, other): + # other is a DatetimeArray + result = apply_wrapper_core(func, self, other) + result = type(other)(result) + warnings.warn("'Offset.apply_index(other)' is deprecated. " + "Use 'offset + other' instead.", FutureWarning) + return result + + return wrapper + + +def apply_array_wraps(func): + # Note: normally we would use `@functools.wraps(func)`, but this does + # not play nicely with cython class methods + def wrapper(self, other) -> np.ndarray: + # other is a DatetimeArray + result = apply_wrapper_core(func, self, other) + return result + + # do @functools.wraps(func) manually since it doesn't work on cdef funcs + wrapper.__name__ = func.__name__ + wrapper.__doc__ = func.__doc__ + return wrapper + + +def apply_wraps(func): + # Note: normally we would use `@functools.wraps(func)`, but this does + # not play nicely with cython class methods + + def wrapper(self, other): + + if other is NaT: + return NaT + elif ( + isinstance(other, BaseOffset) + or PyDelta_Check(other) + or util.is_timedelta64_object(other) + ): + # timedelta path + return func(self, other) + elif is_datetime64_object(other) or PyDate_Check(other): + # PyDate_Check includes date, datetime + other = Timestamp(other) + else: + # This will end up returning NotImplemented back in __add__ + raise ApplyTypeError + + tz = other.tzinfo + nano = other.nanosecond + + if self._adjust_dst: + other = other.tz_localize(None) + + result = func(self, other) + + result = Timestamp(result) + if self._adjust_dst: + result = result.tz_localize(tz) + + if self.normalize: + result = result.normalize() + + # If the offset object does not have a nanoseconds component, + # the result's nanosecond component may be lost. + if not self.normalize and nano != 0 and not hasattr(self, "nanoseconds"): + if result.nanosecond != nano: + if result.tz is not None: + # convert to UTC + value = result.tz_localize(None).value + else: + value = result.value + result = Timestamp(value + nano) + + if tz is not None and result.tzinfo is None: + result = result.tz_localize(tz) + + return result + + # do @functools.wraps(func) manually since it doesn't work on cdef funcs + wrapper.__name__ = func.__name__ + wrapper.__doc__ = func.__doc__ + return wrapper + + +cdef _wrap_timedelta_result(result): + """ + Tick operations dispatch to their Timedelta counterparts. Wrap the result + of these operations in a Tick if possible. + + Parameters + ---------- + result : object + + Returns + ------- + object + """ + if PyDelta_Check(result): + # convert Timedelta back to a Tick + return delta_to_tick(result) + + return result + +# --------------------------------------------------------------------- +# Business Helpers + + +cdef _get_calendar(weekmask, holidays, calendar): + """ + Generate busdaycalendar + """ + if isinstance(calendar, np.busdaycalendar): + if not holidays: + holidays = tuple(calendar.holidays) + elif not isinstance(holidays, tuple): + holidays = tuple(holidays) + else: + # trust that calendar.holidays and holidays are + # consistent + pass + return calendar, holidays + + if holidays is None: + holidays = [] + try: + holidays = holidays + calendar.holidays().tolist() + except AttributeError: + pass + holidays = [_to_dt64D(dt) for dt in holidays] + holidays = tuple(sorted(holidays)) + + kwargs = {'weekmask': weekmask} + if holidays: + kwargs['holidays'] = holidays + + busdaycalendar = np.busdaycalendar(**kwargs) + return busdaycalendar, holidays + + +cdef _to_dt64D(dt): + # Currently + # > np.datetime64(dt.datetime(2013,5,1),dtype='datetime64[D]') + # numpy.datetime64('2013-05-01T02:00:00.000000+0200') + # Thus astype is needed to cast datetime to datetime64[D] + if getattr(dt, 'tzinfo', None) is not None: + # Get the nanosecond timestamp, + # equiv `Timestamp(dt).value` or `dt.timestamp() * 10**9` + nanos = getattr(dt, "nanosecond", 0) + i8 = convert_datetime_to_tsobject(dt, tz=None, nanos=nanos).value + dt = tz_convert_from_utc_single(i8, dt.tzinfo) + dt = np.int64(dt).astype('datetime64[ns]') + else: + dt = np.datetime64(dt) + if dt.dtype.name != "datetime64[D]": + dt = dt.astype("datetime64[D]") + return dt + + +# --------------------------------------------------------------------- +# Validation + + +cdef _validate_business_time(t_input): + if isinstance(t_input, str): + try: + t = time.strptime(t_input, '%H:%M') + return dt_time(hour=t.tm_hour, minute=t.tm_min) + except ValueError: + raise ValueError("time data must match '%H:%M' format") + elif isinstance(t_input, dt_time): + if t_input.second != 0 or t_input.microsecond != 0: + raise ValueError( + "time data must be specified only with hour and minute") + return t_input + else: + raise ValueError("time data must be string or datetime.time") + + +# --------------------------------------------------------------------- +# Constructor Helpers + +_relativedelta_kwds = {"years", "months", "weeks", "days", "year", "month", + "day", "weekday", "hour", "minute", "second", + "microsecond", "nanosecond", "nanoseconds", "hours", + "minutes", "seconds", "microseconds"} + + +cdef _determine_offset(kwds): + # timedelta is used for sub-daily plural offsets and all singular + # offsets relativedelta is used for plural offsets of daily length or + # more nanosecond(s) are handled by apply_wraps + kwds_no_nanos = dict( + (k, v) for k, v in kwds.items() + if k not in ('nanosecond', 'nanoseconds') + ) + # TODO: Are nanosecond and nanoseconds allowed somewhere? + + _kwds_use_relativedelta = ('years', 'months', 'weeks', 'days', + 'year', 'month', 'week', 'day', 'weekday', + 'hour', 'minute', 'second', 'microsecond') + + use_relativedelta = False + if len(kwds_no_nanos) > 0: + if any(k in _kwds_use_relativedelta for k in kwds_no_nanos): + offset = relativedelta(**kwds_no_nanos) + use_relativedelta = True + else: + # sub-daily offset - use timedelta (tz-aware) + offset = timedelta(**kwds_no_nanos) + elif any(nano in kwds for nano in ('nanosecond', 'nanoseconds')): + offset = timedelta(days=0) + else: + # GH 45643/45890: (historically) defaults to 1 day for non-nano + # since datetime.timedelta doesn't handle nanoseconds + offset = timedelta(days=1) + return offset, use_relativedelta + + +# --------------------------------------------------------------------- +# Mixins & Singletons + + +class ApplyTypeError(TypeError): + # sentinel class for catching the apply error to return NotImplemented + pass + + +# --------------------------------------------------------------------- +# Base Classes + +cdef class BaseOffset: + """ + Base class for DateOffset methods that are not overridden by subclasses. + """ + # ensure that reversed-ops with numpy scalars return NotImplemented + __array_priority__ = 1000 + + _day_opt = None + _attributes = tuple(["n", "normalize"]) + _use_relativedelta = False + _adjust_dst = True + _deprecations = frozenset(["isAnchored", "onOffset"]) + + # cdef readonly: + # int64_t n + # bint normalize + # dict _cache + + def __init__(self, n=1, normalize=False): + n = self._validate_n(n) + self.n = n + self.normalize = normalize + self._cache = {} + + def __eq__(self, other) -> bool: + if isinstance(other, str): + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return False + try: + return self._params == other._params + except AttributeError: + # other is not a DateOffset object + return False + + def __ne__(self, other): + return not self == other + + def __hash__(self) -> int: + return hash(self._params) + + @cache_readonly + def _params(self): + """ + Returns a tuple containing all of the attributes needed to evaluate + equality between two DateOffset objects. + """ + d = getattr(self, "__dict__", {}) + all_paras = d.copy() + all_paras["n"] = self.n + all_paras["normalize"] = self.normalize + for attr in self._attributes: + if hasattr(self, attr) and attr not in d: + # cython attributes are not in __dict__ + all_paras[attr] = getattr(self, attr) + + if 'holidays' in all_paras and not all_paras['holidays']: + all_paras.pop('holidays') + exclude = ['kwds', 'name', 'calendar'] + attrs = [(k, v) for k, v in all_paras.items() + if (k not in exclude) and (k[0] != '_')] + attrs = sorted(set(attrs)) + params = tuple([str(type(self))] + attrs) + return params + + @property + def kwds(self) -> dict: + # for backwards-compatibility + kwds = {name: getattr(self, name, None) for name in self._attributes + if name not in ["n", "normalize"]} + return {name: kwds[name] for name in kwds if kwds[name] is not None} + + @property + def base(self): + """ + Returns a copy of the calling offset object with n=1 and all other + attributes equal. + """ + return type(self)(n=1, normalize=self.normalize, **self.kwds) + + def __add__(self, other): + if not isinstance(self, BaseOffset): + # cython semantics; this is __radd__ + return other.__add__(self) + + elif util.is_array(other) and other.dtype == object: + return np.array([self + x for x in other]) + + try: + return self._apply(other) + except ApplyTypeError: + return NotImplemented + + def __sub__(self, other): + if PyDateTime_Check(other): + raise TypeError('Cannot subtract datetime from offset.') + elif type(other) == type(self): + return type(self)(self.n - other.n, normalize=self.normalize, + **self.kwds) + elif not isinstance(self, BaseOffset): + # cython semantics, this is __rsub__ + return (-other).__add__(self) + else: + # e.g. PeriodIndex + return NotImplemented + + def __call__(self, other): + warnings.warn( + "DateOffset.__call__ is deprecated and will be removed in a future " + "version. Use `offset + other` instead.", + FutureWarning, + stacklevel=1, + ) + return self._apply(other) + + def apply(self, other): + # GH#44522 + warnings.warn( + f"{type(self).__name__}.apply is deprecated and will be removed " + "in a future version. Use `offset + other` instead", + FutureWarning, + stacklevel=2, + ) + return self._apply(other) + + def __mul__(self, other): + if util.is_array(other): + return np.array([self * x for x in other]) + elif is_integer_object(other): + return type(self)(n=other * self.n, normalize=self.normalize, + **self.kwds) + elif not isinstance(self, BaseOffset): + # cython semantics, this is __rmul__ + return other.__mul__(self) + return NotImplemented + + def __neg__(self): + # Note: we are deferring directly to __mul__ instead of __rmul__, as + # that allows us to use methods that can go in a `cdef class` + return self * -1 + + def copy(self): + # Note: we are deferring directly to __mul__ instead of __rmul__, as + # that allows us to use methods that can go in a `cdef class` + return self * 1 + + # ------------------------------------------------------------------ + # Name and Rendering Methods + + def __repr__(self) -> str: + # _output_name used by B(Year|Quarter)(End|Begin) to + # expand "B" -> "Business" + class_name = getattr(self, "_output_name", type(self).__name__) + + if abs(self.n) != 1: + plural = "s" + else: + plural = "" + + n_str = "" + if self.n != 1: + n_str = f"{self.n} * " + + out = f"<{n_str}{class_name}{plural}{self._repr_attrs()}>" + return out + + def _repr_attrs(self) -> str: + exclude = {"n", "inc", "normalize"} + attrs = [] + for attr in sorted(self._attributes): + # _attributes instead of __dict__ because cython attrs are not in __dict__ + if attr.startswith("_") or attr == "kwds" or not hasattr(self, attr): + # DateOffset may not have some of these attributes + continue + elif attr not in exclude: + value = getattr(self, attr) + attrs.append(f"{attr}={value}") + + out = "" + if attrs: + out += ": " + ", ".join(attrs) + return out + + @property + def name(self) -> str: + return self.rule_code + + @property + def _prefix(self) -> str: + raise NotImplementedError("Prefix not defined") + + @property + def rule_code(self) -> str: + return self._prefix + + @cache_readonly + def freqstr(self) -> str: + try: + code = self.rule_code + except NotImplementedError: + return str(repr(self)) + + if self.n != 1: + fstr = f"{self.n}{code}" + else: + fstr = code + + try: + if self._offset: + fstr += self._offset_str() + except AttributeError: + # TODO: standardize `_offset` vs `offset` naming convention + pass + + return fstr + + def _offset_str(self) -> str: + return "" + + # ------------------------------------------------------------------ + + @apply_index_wraps + def apply_index(self, dtindex): + """ + Vectorized apply of DateOffset to DatetimeIndex, + raises NotImplementedError for offsets without a + vectorized implementation. + + .. deprecated:: 1.1.0 + + Use ``offset + dtindex`` instead. + + Parameters + ---------- + index : DatetimeIndex + + Returns + ------- + DatetimeIndex + + Raises + ------ + NotImplementedError + When the specific offset subclass does not have a vectorized + implementation. + """ + raise NotImplementedError( # pragma: no cover + f"DateOffset subclass {type(self).__name__} " + "does not have a vectorized implementation" + ) + + @apply_array_wraps + def _apply_array(self, dtarr): + raise NotImplementedError( + f"DateOffset subclass {type(self).__name__} " + "does not have a vectorized implementation" + ) + + def rollback(self, dt) -> datetime: + """ + Roll provided date backward to next offset only if not on offset. + + Returns + ------- + TimeStamp + Rolled timestamp if not on offset, otherwise unchanged timestamp. + """ + dt = Timestamp(dt) + if not self.is_on_offset(dt): + dt = dt - type(self)(1, normalize=self.normalize, **self.kwds) + return dt + + def rollforward(self, dt) -> datetime: + """ + Roll provided date forward to next offset only if not on offset. + + Returns + ------- + TimeStamp + Rolled timestamp if not on offset, otherwise unchanged timestamp. + """ + dt = Timestamp(dt) + if not self.is_on_offset(dt): + dt = dt + type(self)(1, normalize=self.normalize, **self.kwds) + return dt + + def _get_offset_day(self, other: datetime) -> int: + # subclass must implement `_day_opt`; calling from the base class + # will implicitly assume day_opt = "business_end", see get_day_of_month. + cdef: + npy_datetimestruct dts + pydate_to_dtstruct(other, &dts) + return get_day_of_month(&dts, self._day_opt) + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + + # Default (slow) method for determining if some date is a member of the + # date range generated by this offset. Subclasses may have this + # re-implemented in a nicer way. + a = dt + b = (dt + self) - self + return a == b + + # ------------------------------------------------------------------ + + # Staticmethod so we can call from Tick.__init__, will be unnecessary + # once BaseOffset is a cdef class and is inherited by Tick + @staticmethod + def _validate_n(n) -> int: + """ + Require that `n` be an integer. + + Parameters + ---------- + n : int + + Returns + ------- + nint : int + + Raises + ------ + TypeError if `int(n)` raises + ValueError if n != int(n) + """ + if util.is_timedelta64_object(n): + raise TypeError(f'`n` argument must be an integer, got {type(n)}') + try: + nint = int(n) + except (ValueError, TypeError): + raise TypeError(f'`n` argument must be an integer, got {type(n)}') + if n != nint: + raise ValueError(f'`n` argument must be an integer, got {n}') + return nint + + def __setstate__(self, state): + """ + Reconstruct an instance from a pickled state + """ + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self._cache = state.pop("_cache", {}) + # At this point we expect state to be empty + + def __getstate__(self): + """ + Return a pickleable state + """ + state = {} + state["n"] = self.n + state["normalize"] = self.normalize + + # we don't want to actually pickle the calendar object + # as its a np.busyday; we recreate on deserialization + state.pop("calendar", None) + if "kwds" in state: + state["kwds"].pop("calendar", None) + + return state + + @property + def nanos(self): + raise ValueError(f"{self} is a non-fixed frequency") + + def onOffset(self, dt) -> bool: + warnings.warn( + "onOffset is a deprecated, use is_on_offset instead.", + FutureWarning, + stacklevel=1, + ) + return self.is_on_offset(dt) + + def isAnchored(self) -> bool: + warnings.warn( + "isAnchored is a deprecated, use is_anchored instead.", + FutureWarning, + stacklevel=1, + ) + return self.is_anchored() + + def is_anchored(self) -> bool: + # TODO: Does this make sense for the general case? It would help + # if there were a canonical docstring for what is_anchored means. + return self.n == 1 + + # ------------------------------------------------------------------ + + def is_month_start(self, _Timestamp ts): + return ts._get_start_end_field("is_month_start", self) + + def is_month_end(self, _Timestamp ts): + return ts._get_start_end_field("is_month_end", self) + + def is_quarter_start(self, _Timestamp ts): + return ts._get_start_end_field("is_quarter_start", self) + + def is_quarter_end(self, _Timestamp ts): + return ts._get_start_end_field("is_quarter_end", self) + + def is_year_start(self, _Timestamp ts): + return ts._get_start_end_field("is_year_start", self) + + def is_year_end(self, _Timestamp ts): + return ts._get_start_end_field("is_year_end", self) + + +cdef class SingleConstructorOffset(BaseOffset): + @classmethod + def _from_name(cls, suffix=None): + # default _from_name calls cls with no args + if suffix: + raise ValueError(f"Bad freq suffix {suffix}") + return cls() + + def __reduce__(self): + # This __reduce__ implementation is for all BaseOffset subclasses + # except for RelativeDeltaOffset + # np.busdaycalendar objects do not pickle nicely, but we can reconstruct + # from attributes that do get pickled. + tup = tuple( + getattr(self, attr) if attr != "calendar" else None + for attr in self._attributes + ) + return type(self), tup + + +# --------------------------------------------------------------------- +# Tick Offsets + +cdef class Tick(SingleConstructorOffset): + _adjust_dst = False + _prefix = "undefined" + _attributes = tuple(["n", "normalize"]) + + def __init__(self, n=1, normalize=False): + n = self._validate_n(n) + self.n = n + self.normalize = False + self._cache = {} + if normalize: + # GH#21427 + raise ValueError( + "Tick offset with `normalize=True` are not allowed." + ) + + # Note: Without making this cpdef, we get AttributeError when calling + # from __mul__ + cpdef Tick _next_higher_resolution(Tick self): + if type(self) is Day: + return Hour(self.n * 24) + if type(self) is Hour: + return Minute(self.n * 60) + if type(self) is Minute: + return Second(self.n * 60) + if type(self) is Second: + return Milli(self.n * 1000) + if type(self) is Milli: + return Micro(self.n * 1000) + if type(self) is Micro: + return Nano(self.n * 1000) + raise ValueError("Could not convert to integer offset at any resolution") + + # -------------------------------------------------------------------- + + def _repr_attrs(self) -> str: + # Since cdef classes have no __dict__, we need to override + return "" + + @property + def delta(self): + return self.n * Timedelta(self._nanos_inc) + + @property + def nanos(self) -> int64_t: + return self.n * self._nanos_inc + + def is_on_offset(self, dt: datetime) -> bool: + return True + + def is_anchored(self) -> bool: + return False + + # This is identical to BaseOffset.__hash__, but has to be redefined here + # for Python 3, because we've redefined __eq__. + def __hash__(self) -> int: + return hash(self._params) + + # -------------------------------------------------------------------- + # Comparison and Arithmetic Methods + + def __eq__(self, other): + if isinstance(other, str): + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return False + return self.delta == other + + def __ne__(self, other): + return not (self == other) + + def __le__(self, other): + return self.delta.__le__(other) + + def __lt__(self, other): + return self.delta.__lt__(other) + + def __ge__(self, other): + return self.delta.__ge__(other) + + def __gt__(self, other): + return self.delta.__gt__(other) + + def __mul__(self, other): + if not isinstance(self, Tick): + # cython semantics, this is __rmul__ + return other.__mul__(self) + if is_float_object(other): + n = other * self.n + # If the new `n` is an integer, we can represent it using the + # same Tick subclass as self, otherwise we need to move up + # to a higher-resolution subclass + if np.isclose(n % 1, 0): + return type(self)(int(n)) + new_self = self._next_higher_resolution() + return new_self * other + return BaseOffset.__mul__(self, other) + + def __truediv__(self, other): + if not isinstance(self, Tick): + # cython semantics mean the args are sometimes swapped + result = other.delta.__rtruediv__(self) + else: + result = self.delta.__truediv__(other) + return _wrap_timedelta_result(result) + + def __add__(self, other): + if not isinstance(self, Tick): + # cython semantics; this is __radd__ + return other.__add__(self) + + if isinstance(other, Tick): + if type(self) == type(other): + return type(self)(self.n + other.n) + else: + return delta_to_tick(self.delta + other.delta) + try: + return self._apply(other) + except ApplyTypeError: + # Includes pd.Period + return NotImplemented + except OverflowError as err: + raise OverflowError( + f"the add operation between {self} and {other} will overflow" + ) from err + + def _apply(self, other): + # Timestamp can handle tz and nano sec, thus no need to use apply_wraps + if isinstance(other, _Timestamp): + # GH#15126 + return other + self.delta + elif other is NaT: + return NaT + elif is_datetime64_object(other) or PyDate_Check(other): + # PyDate_Check includes date, datetime + return Timestamp(other) + self + + if util.is_timedelta64_object(other) or PyDelta_Check(other): + return other + self.delta + elif isinstance(other, type(self)): + # TODO(2.0): remove once apply deprecation is enforced. + # This is reached in tests that specifically call apply, + # but should not be reached "naturally" because __add__ should + # catch this case first. + return type(self)(self.n + other.n) + + raise ApplyTypeError(f"Unhandled type: {type(other).__name__}") + + # -------------------------------------------------------------------- + # Pickle Methods + + def __setstate__(self, state): + self.n = state["n"] + self.normalize = False + + +cdef class Day(Tick): + _nanos_inc = 24 * 3600 * 1_000_000_000 + _prefix = "D" + _period_dtype_code = PeriodDtypeCode.D + + +cdef class Hour(Tick): + _nanos_inc = 3600 * 1_000_000_000 + _prefix = "H" + _period_dtype_code = PeriodDtypeCode.H + + +cdef class Minute(Tick): + _nanos_inc = 60 * 1_000_000_000 + _prefix = "T" + _period_dtype_code = PeriodDtypeCode.T + + +cdef class Second(Tick): + _nanos_inc = 1_000_000_000 + _prefix = "S" + _period_dtype_code = PeriodDtypeCode.S + + +cdef class Milli(Tick): + _nanos_inc = 1_000_000 + _prefix = "L" + _period_dtype_code = PeriodDtypeCode.L + + +cdef class Micro(Tick): + _nanos_inc = 1000 + _prefix = "U" + _period_dtype_code = PeriodDtypeCode.U + + +cdef class Nano(Tick): + _nanos_inc = 1 + _prefix = "N" + _period_dtype_code = PeriodDtypeCode.N + + +def delta_to_tick(delta: timedelta) -> Tick: + if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0: + # nanoseconds only for pd.Timedelta + if delta.seconds == 0: + return Day(delta.days) + else: + seconds = delta.days * 86400 + delta.seconds + if seconds % 3600 == 0: + return Hour(seconds / 3600) + elif seconds % 60 == 0: + return Minute(seconds / 60) + else: + return Second(seconds) + else: + nanos = delta_to_nanoseconds(delta) + if nanos % 1_000_000 == 0: + return Milli(nanos // 1_000_000) + elif nanos % 1000 == 0: + return Micro(nanos // 1000) + else: # pragma: no cover + return Nano(nanos) + + +# -------------------------------------------------------------------- + +cdef class RelativeDeltaOffset(BaseOffset): + """ + DateOffset subclass backed by a dateutil relativedelta object. + """ + _attributes = tuple(["n", "normalize"] + list(_relativedelta_kwds)) + _adjust_dst = False + + def __init__(self, n=1, normalize=False, **kwds): + BaseOffset.__init__(self, n, normalize) + + off, use_rd = _determine_offset(kwds) + object.__setattr__(self, "_offset", off) + object.__setattr__(self, "_use_relativedelta", use_rd) + for key in kwds: + val = kwds[key] + object.__setattr__(self, key, val) + + def __getstate__(self): + """ + Return a pickleable state + """ + # RelativeDeltaOffset (technically DateOffset) is the only non-cdef + # class, so the only one with __dict__ + state = self.__dict__.copy() + state["n"] = self.n + state["normalize"] = self.normalize + return state + + def __setstate__(self, state): + """ + Reconstruct an instance from a pickled state + """ + + if "offset" in state: + # Older (<0.22.0) versions have offset attribute instead of _offset + if "_offset" in state: # pragma: no cover + raise AssertionError("Unexpected key `_offset`") + state["_offset"] = state.pop("offset") + state["kwds"]["offset"] = state["_offset"] + + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self._cache = state.pop("_cache", {}) + + self.__dict__.update(state) + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + if self._use_relativedelta: + other = _as_datetime(other) + + if len(self.kwds) > 0: + tzinfo = getattr(other, "tzinfo", None) + if tzinfo is not None and self._use_relativedelta: + # perform calculation in UTC + other = other.replace(tzinfo=None) + + if hasattr(self, "nanoseconds"): + td_nano = Timedelta(nanoseconds=self.nanoseconds) + else: + td_nano = Timedelta(0) + + if self.n > 0: + for i in range(self.n): + other = other + self._offset + td_nano + else: + for i in range(-self.n): + other = other - self._offset - td_nano + + if tzinfo is not None and self._use_relativedelta: + # bring tz back from UTC calculation + other = localize_pydatetime(other, tzinfo) + + return Timestamp(other) + else: + return other + timedelta(self.n) + + @apply_index_wraps + def apply_index(self, dtindex): + """ + Vectorized apply of DateOffset to DatetimeIndex, + raises NotImplementedError for offsets without a + vectorized implementation. + + Parameters + ---------- + index : DatetimeIndex + + Returns + ------- + ndarray[datetime64[ns]] + """ + return self._apply_array(dtindex) + + @apply_array_wraps + def _apply_array(self, dtarr): + dt64other = np.asarray(dtarr) + kwds = self.kwds + relativedelta_fast = { + "years", + "months", + "weeks", + "days", + "hours", + "minutes", + "seconds", + "microseconds", + } + # relativedelta/_offset path only valid for base DateOffset + if self._use_relativedelta and set(kwds).issubset(relativedelta_fast): + + months = (kwds.get("years", 0) * 12 + kwds.get("months", 0)) * self.n + if months: + shifted = shift_months(dt64other.view("i8"), months) + dt64other = shifted.view("datetime64[ns]") + + weeks = kwds.get("weeks", 0) * self.n + if weeks: + dt64other = dt64other + Timedelta(days=7 * weeks) + + timedelta_kwds = { + k: v + for k, v in kwds.items() + if k in ["days", "hours", "minutes", "seconds", "microseconds"] + } + if timedelta_kwds: + delta = Timedelta(**timedelta_kwds) + dt64other = dt64other + (self.n * delta) + return dt64other + elif not self._use_relativedelta and hasattr(self, "_offset"): + # timedelta + return dt64other + Timedelta(self._offset * self.n) + else: + # relativedelta with other keywords + kwd = set(kwds) - relativedelta_fast + raise NotImplementedError( + "DateOffset with relativedelta " + f"keyword(s) {kwd} not able to be " + "applied vectorized" + ) + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + return True + + +class OffsetMeta(type): + """ + Metaclass that allows us to pretend that all BaseOffset subclasses + inherit from DateOffset (which is needed for backward-compatibility). + """ + + @classmethod + def __instancecheck__(cls, obj) -> bool: + return isinstance(obj, BaseOffset) + + @classmethod + def __subclasscheck__(cls, obj) -> bool: + return issubclass(obj, BaseOffset) + + +# TODO: figure out a way to use a metaclass with a cdef class +class DateOffset(RelativeDeltaOffset, metaclass=OffsetMeta): + """ + Standard kind of date increment used for a date range. + + Works exactly like the keyword argument form of relativedelta. + Note that the positional argument form of relativedelata is not + supported. Use of the keyword n is discouraged-- you would be better + off specifying n in the keywords you use, but regardless it is + there for you. n is needed for DateOffset subclasses. + + DateOffset works as follows. Each offset specify a set of dates + that conform to the DateOffset. For example, Bday defines this + set to be the set of dates that are weekdays (M-F). To test if a + date is in the set of a DateOffset dateOffset we can use the + is_on_offset method: dateOffset.is_on_offset(date). + + If a date is not on a valid date, the rollback and rollforward + methods can be used to roll the date to the nearest valid date + before/after the date. + + DateOffsets can be created to move dates forward a given number of + valid dates. For example, Bday(2) can be added to a date to move + it two business days forward. If the date does not start on a + valid date, first it is moved to a valid date. Thus pseudo code + is: + + def __add__(date): + date = rollback(date) # does nothing if date is valid + return date + + + When a date offset is created for a negative number of periods, + the date is first rolled forward. The pseudo code is: + + def __add__(date): + date = rollforward(date) # does nothing is date is valid + return date + + + Zero presents a problem. Should it roll forward or back? We + arbitrarily have it rollforward: + + date + BDay(0) == BDay.rollforward(date) + + Since 0 is a bit weird, we suggest avoiding its use. + + Parameters + ---------- + n : int, default 1 + The number of time periods the offset represents. + If specified without a temporal pattern, defaults to n days. + normalize : bool, default False + Whether to round the result of a DateOffset addition down to the + previous midnight. + **kwds + Temporal parameter that add to or replace the offset value. + + Parameters that **add** to the offset (like Timedelta): + + - years + - months + - weeks + - days + - hours + - minutes + - seconds + - microseconds + - nanoseconds + + Parameters that **replace** the offset value: + + - year + - month + - day + - weekday + - hour + - minute + - second + - microsecond + - nanosecond. + + See Also + -------- + dateutil.relativedelta.relativedelta : The relativedelta type is designed + to be applied to an existing datetime an can replace specific components of + that datetime, or represents an interval of time. + + Examples + -------- + >>> from pandas.tseries.offsets import DateOffset + >>> ts = pd.Timestamp('2017-01-01 09:10:11') + >>> ts + DateOffset(months=3) + Timestamp('2017-04-01 09:10:11') + + >>> ts = pd.Timestamp('2017-01-01 09:10:11') + >>> ts + DateOffset(months=2) + Timestamp('2017-03-01 09:10:11') + """ + def __setattr__(self, name, value): + raise AttributeError("DateOffset objects are immutable.") + +# -------------------------------------------------------------------- + + +cdef class BusinessMixin(SingleConstructorOffset): + """ + Mixin to business types to provide related functions. + """ + + cdef readonly: + timedelta _offset + # Only Custom subclasses use weekmask, holiday, calendar + object weekmask, holidays, calendar + + def __init__(self, n=1, normalize=False, offset=timedelta(0)): + BaseOffset.__init__(self, n, normalize) + self._offset = offset + + cpdef _init_custom(self, weekmask, holidays, calendar): + """ + Additional __init__ for Custom subclasses. + """ + calendar, holidays = _get_calendar( + weekmask=weekmask, holidays=holidays, calendar=calendar + ) + # Custom offset instances are identified by the + # following two attributes. See DateOffset._params() + # holidays, weekmask + self.weekmask = weekmask + self.holidays = holidays + self.calendar = calendar + + @property + def offset(self): + """ + Alias for self._offset. + """ + # Alias for backward compat + return self._offset + + def _repr_attrs(self) -> str: + if self.offset: + attrs = [f"offset={repr(self.offset)}"] + else: + attrs = [] + out = "" + if attrs: + out += ": " + ", ".join(attrs) + return out + + cpdef __setstate__(self, state): + # We need to use a cdef/cpdef method to set the readonly _offset attribute + if "_offset" in state: + self._offset = state.pop("_offset") + elif "offset" in state: + # Older (<0.22.0) versions have offset attribute instead of _offset + self._offset = state.pop("offset") + + if self._prefix.startswith("C"): + # i.e. this is a Custom class + weekmask = state.pop("weekmask") + holidays = state.pop("holidays") + calendar, holidays = _get_calendar(weekmask=weekmask, + holidays=holidays, + calendar=None) + self.weekmask = weekmask + self.calendar = calendar + self.holidays = holidays + + BaseOffset.__setstate__(self, state) + + +cdef class BusinessDay(BusinessMixin): + """ + DateOffset subclass representing possibly n business days. + """ + _period_dtype_code = PeriodDtypeCode.B + _prefix = "B" + _attributes = tuple(["n", "normalize", "offset"]) + + cpdef __setstate__(self, state): + self.n = state.pop("n") + self.normalize = state.pop("normalize") + if "_offset" in state: + self._offset = state.pop("_offset") + elif "offset" in state: + self._offset = state.pop("offset") + self._cache = state.pop("_cache", {}) + + def _offset_str(self) -> str: + def get_str(td): + off_str = "" + if td.days > 0: + off_str += str(td.days) + "D" + if td.seconds > 0: + s = td.seconds + hrs = int(s / 3600) + if hrs != 0: + off_str += str(hrs) + "H" + s -= hrs * 3600 + mts = int(s / 60) + if mts != 0: + off_str += str(mts) + "Min" + s -= mts * 60 + if s != 0: + off_str += str(s) + "s" + if td.microseconds > 0: + off_str += str(td.microseconds) + "us" + return off_str + + if PyDelta_Check(self.offset): + zero = timedelta(0, 0, 0) + if self.offset >= zero: + off_str = "+" + get_str(self.offset) + else: + off_str = "-" + get_str(-self.offset) + return off_str + else: + return "+" + repr(self.offset) + + @apply_wraps + def _apply(self, other): + if PyDateTime_Check(other): + n = self.n + wday = other.weekday() + + # avoid slowness below by operating on weeks first + weeks = n // 5 + if n <= 0 and wday > 4: + # roll forward + n += 1 + + n -= 5 * weeks + + # n is always >= 0 at this point + if n == 0 and wday > 4: + # roll back + days = 4 - wday + elif wday > 4: + # roll forward + days = (7 - wday) + (n - 1) + elif wday + n <= 4: + # shift by n days without leaving the current week + days = n + else: + # shift by n days plus 2 to get past the weekend + days = n + 2 + + result = other + timedelta(days=7 * weeks + days) + if self.offset: + result = result + self.offset + return result + + elif is_any_td_scalar(other): + td = Timedelta(self.offset) + other + return BusinessDay( + self.n, offset=td.to_pytimedelta(), normalize=self.normalize + ) + else: + raise ApplyTypeError( + "Only know how to combine business day with datetime or timedelta." + ) + + @apply_index_wraps + def apply_index(self, dtindex): + return self._apply_array(dtindex) + + @apply_array_wraps + def _apply_array(self, dtarr): + i8other = dtarr.view("i8") + res = _shift_bdays(i8other, self.n) + if self.offset: + res = res.view("M8[ns]") + Timedelta(self.offset) + res = res.view("i8") + return res + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + return dt.weekday() < 5 + + +cdef class BusinessHour(BusinessMixin): + """ + DateOffset subclass representing possibly n business hours. + + Parameters + ---------- + n : int, default 1 + The number of months represented. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + weekmask : str, Default 'Mon Tue Wed Thu Fri' + Weekmask of valid business days, passed to ``numpy.busdaycalendar``. + start : str, default "09:00" + Start time of your custom business hour in 24h format. + end : str, default: "17:00" + End time of your custom business hour in 24h format. + """ + + _prefix = "BH" + _anchor = 0 + _attributes = tuple(["n", "normalize", "start", "end", "offset"]) + _adjust_dst = False + + cdef readonly: + tuple start, end + + def __init__( + self, n=1, normalize=False, start="09:00", end="17:00", offset=timedelta(0) + ): + BusinessMixin.__init__(self, n, normalize, offset) + + # must be validated here to equality check + if np.ndim(start) == 0: + # i.e. not is_list_like + start = [start] + if not len(start): + raise ValueError("Must include at least 1 start time") + + if np.ndim(end) == 0: + # i.e. not is_list_like + end = [end] + if not len(end): + raise ValueError("Must include at least 1 end time") + + start = np.array([_validate_business_time(x) for x in start]) + end = np.array([_validate_business_time(x) for x in end]) + + # Validation of input + if len(start) != len(end): + raise ValueError("number of starting time and ending time must be the same") + num_openings = len(start) + + # sort starting and ending time by starting time + index = np.argsort(start) + + # convert to tuple so that start and end are hashable + start = tuple(start[index]) + end = tuple(end[index]) + + total_secs = 0 + for i in range(num_openings): + total_secs += self._get_business_hours_by_sec(start[i], end[i]) + total_secs += self._get_business_hours_by_sec( + end[i], start[(i + 1) % num_openings] + ) + if total_secs != 24 * 60 * 60: + raise ValueError( + "invalid starting and ending time(s): " + "opening hours should not touch or overlap with " + "one another" + ) + + self.start = start + self.end = end + + cpdef __setstate__(self, state): + start = state.pop("start") + start = (start,) if np.ndim(start) == 0 else tuple(start) + end = state.pop("end") + end = (end,) if np.ndim(end) == 0 else tuple(end) + self.start = start + self.end = end + + state.pop("kwds", {}) + state.pop("next_bday", None) + BusinessMixin.__setstate__(self, state) + + def _repr_attrs(self) -> str: + out = super()._repr_attrs() + hours = ",".join( + f'{st.strftime("%H:%M")}-{en.strftime("%H:%M")}' + for st, en in zip(self.start, self.end) + ) + attrs = [f"{self._prefix}={hours}"] + out += ": " + ", ".join(attrs) + return out + + def _get_business_hours_by_sec(self, start, end): + """ + Return business hours in a day by seconds. + """ + # create dummy datetime to calculate business hours in a day + dtstart = datetime(2014, 4, 1, start.hour, start.minute) + day = 1 if start < end else 2 + until = datetime(2014, 4, day, end.hour, end.minute) + return int((until - dtstart).total_seconds()) + + def _get_closing_time(self, dt: datetime) -> datetime: + """ + Get the closing time of a business hour interval by its opening time. + + Parameters + ---------- + dt : datetime + Opening time of a business hour interval. + + Returns + ------- + result : datetime + Corresponding closing time. + """ + for i, st in enumerate(self.start): + if st.hour == dt.hour and st.minute == dt.minute: + return dt + timedelta( + seconds=self._get_business_hours_by_sec(st, self.end[i]) + ) + assert False + + @cache_readonly + def next_bday(self): + """ + Used for moving to next business day. + """ + if self.n >= 0: + nb_offset = 1 + else: + nb_offset = -1 + if self._prefix.startswith("C"): + # CustomBusinessHour + return CustomBusinessDay( + n=nb_offset, + weekmask=self.weekmask, + holidays=self.holidays, + calendar=self.calendar, + ) + else: + return BusinessDay(n=nb_offset) + + def _next_opening_time(self, other, sign=1): + """ + If self.n and sign have the same sign, return the earliest opening time + later than or equal to current time. + Otherwise the latest opening time earlier than or equal to current + time. + + Opening time always locates on BusinessDay. + However, closing time may not if business hour extends over midnight. + + Parameters + ---------- + other : datetime + Current time. + sign : int, default 1. + Either 1 or -1. Going forward in time if it has the same sign as + self.n. Going backward in time otherwise. + + Returns + ------- + result : datetime + Next opening time. + """ + earliest_start = self.start[0] + latest_start = self.start[-1] + + if not self.next_bday.is_on_offset(other): + # today is not business day + other = other + sign * self.next_bday + if self.n * sign >= 0: + hour, minute = earliest_start.hour, earliest_start.minute + else: + hour, minute = latest_start.hour, latest_start.minute + else: + if self.n * sign >= 0: + if latest_start < other.time(): + # current time is after latest starting time in today + other = other + sign * self.next_bday + hour, minute = earliest_start.hour, earliest_start.minute + else: + # find earliest starting time no earlier than current time + for st in self.start: + if other.time() <= st: + hour, minute = st.hour, st.minute + break + else: + if other.time() < earliest_start: + # current time is before earliest starting time in today + other = other + sign * self.next_bday + hour, minute = latest_start.hour, latest_start.minute + else: + # find latest starting time no later than current time + for st in reversed(self.start): + if other.time() >= st: + hour, minute = st.hour, st.minute + break + + return datetime(other.year, other.month, other.day, hour, minute) + + def _prev_opening_time(self, other: datetime) -> datetime: + """ + If n is positive, return the latest opening time earlier than or equal + to current time. + Otherwise the earliest opening time later than or equal to current + time. + + Parameters + ---------- + other : datetime + Current time. + + Returns + ------- + result : datetime + Previous opening time. + """ + return self._next_opening_time(other, sign=-1) + + @apply_wraps + def rollback(self, dt: datetime) -> datetime: + """ + Roll provided date backward to next offset only if not on offset. + """ + if not self.is_on_offset(dt): + if self.n >= 0: + dt = self._prev_opening_time(dt) + else: + dt = self._next_opening_time(dt) + return self._get_closing_time(dt) + return dt + + @apply_wraps + def rollforward(self, dt: datetime) -> datetime: + """ + Roll provided date forward to next offset only if not on offset. + """ + if not self.is_on_offset(dt): + if self.n >= 0: + return self._next_opening_time(dt) + else: + return self._prev_opening_time(dt) + return dt + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + # used for detecting edge condition + nanosecond = getattr(other, "nanosecond", 0) + # reset timezone and nanosecond + # other may be a Timestamp, thus not use replace + other = datetime( + other.year, + other.month, + other.day, + other.hour, + other.minute, + other.second, + other.microsecond, + ) + n = self.n + + # adjust other to reduce number of cases to handle + if n >= 0: + if other.time() in self.end or not self._is_on_offset(other): + other = self._next_opening_time(other) + else: + if other.time() in self.start: + # adjustment to move to previous business day + other = other - timedelta(seconds=1) + if not self._is_on_offset(other): + other = self._next_opening_time(other) + other = self._get_closing_time(other) + + # get total business hours by sec in one business day + businesshours = sum( + self._get_business_hours_by_sec(st, en) + for st, en in zip(self.start, self.end) + ) + + bd, r = divmod(abs(n * 60), businesshours // 60) + if n < 0: + bd, r = -bd, -r + + # adjust by business days first + if bd != 0: + if self._prefix.startswith("C"): + # GH#30593 this is a Custom offset + skip_bd = CustomBusinessDay( + n=bd, + weekmask=self.weekmask, + holidays=self.holidays, + calendar=self.calendar, + ) + else: + skip_bd = BusinessDay(n=bd) + # midnight business hour may not on BusinessDay + if not self.next_bday.is_on_offset(other): + prev_open = self._prev_opening_time(other) + remain = other - prev_open + other = prev_open + skip_bd + remain + else: + other = other + skip_bd + + # remaining business hours to adjust + bhour_remain = timedelta(minutes=r) + + if n >= 0: + while bhour_remain != timedelta(0): + # business hour left in this business time interval + bhour = ( + self._get_closing_time(self._prev_opening_time(other)) - other + ) + if bhour_remain < bhour: + # finish adjusting if possible + other += bhour_remain + bhour_remain = timedelta(0) + else: + # go to next business time interval + bhour_remain -= bhour + other = self._next_opening_time(other + bhour) + else: + while bhour_remain != timedelta(0): + # business hour left in this business time interval + bhour = self._next_opening_time(other) - other + if ( + bhour_remain > bhour + or bhour_remain == bhour + and nanosecond != 0 + ): + # finish adjusting if possible + other += bhour_remain + bhour_remain = timedelta(0) + else: + # go to next business time interval + bhour_remain -= bhour + other = self._get_closing_time( + self._next_opening_time( + other + bhour - timedelta(seconds=1) + ) + ) + + return other + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + + if dt.tzinfo is not None: + dt = datetime( + dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.microsecond + ) + # Valid BH can be on the different BusinessDay during midnight + # Distinguish by the time spent from previous opening time + return self._is_on_offset(dt) + + def _is_on_offset(self, dt: datetime) -> bool: + """ + Slight speedups using calculated values. + """ + # if self.normalize and not _is_normalized(dt): + # return False + # Valid BH can be on the different BusinessDay during midnight + # Distinguish by the time spent from previous opening time + if self.n >= 0: + op = self._prev_opening_time(dt) + else: + op = self._next_opening_time(dt) + span = (dt - op).total_seconds() + businesshours = 0 + for i, st in enumerate(self.start): + if op.hour == st.hour and op.minute == st.minute: + businesshours = self._get_business_hours_by_sec(st, self.end[i]) + if span <= businesshours: + return True + else: + return False + + +cdef class WeekOfMonthMixin(SingleConstructorOffset): + """ + Mixin for methods common to WeekOfMonth and LastWeekOfMonth. + """ + + cdef readonly: + int weekday, week + + def __init__(self, n=1, normalize=False, weekday=0): + BaseOffset.__init__(self, n, normalize) + self.weekday = weekday + + if weekday < 0 or weekday > 6: + raise ValueError(f"Day must be 0<=day<=6, got {weekday}") + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + compare_day = self._get_offset_day(other) + + months = self.n + months = roll_convention(other.day, months, compare_day) + + shifted = shift_month(other, months, "start") + to_day = self._get_offset_day(shifted) + return shift_day(shifted, to_day - shifted.day) + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + return dt.day == self._get_offset_day(dt) + + @property + def rule_code(self) -> str: + weekday = int_to_weekday.get(self.weekday, "") + if self.week == -1: + # LastWeekOfMonth + return f"{self._prefix}-{weekday}" + return f"{self._prefix}-{self.week + 1}{weekday}" + + +# ---------------------------------------------------------------------- +# Year-Based Offset Classes + +cdef class YearOffset(SingleConstructorOffset): + """ + DateOffset that just needs a month. + """ + _attributes = tuple(["n", "normalize", "month"]) + + # FIXME(cython#4446): python annotation here gives compile-time errors + # _default_month: int + + cdef readonly: + int month + + def __init__(self, n=1, normalize=False, month=None): + BaseOffset.__init__(self, n, normalize) + + month = month if month is not None else self._default_month + self.month = month + + if month < 1 or month > 12: + raise ValueError("Month must go from 1 to 12") + + cpdef __setstate__(self, state): + self.month = state.pop("month") + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self._cache = {} + + @classmethod + def _from_name(cls, suffix=None): + kwargs = {} + if suffix: + kwargs["month"] = MONTH_TO_CAL_NUM[suffix] + return cls(**kwargs) + + @property + def rule_code(self) -> str: + month = MONTH_ALIASES[self.month] + return f"{self._prefix}-{month}" + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + return dt.month == self.month and dt.day == self._get_offset_day(dt) + + def _get_offset_day(self, other: datetime) -> int: + # override BaseOffset method to use self.month instead of other.month + cdef: + npy_datetimestruct dts + pydate_to_dtstruct(other, &dts) + dts.month = self.month + return get_day_of_month(&dts, self._day_opt) + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + years = roll_qtrday(other, self.n, self.month, self._day_opt, modby=12) + months = years * 12 + (self.month - other.month) + return shift_month(other, months, self._day_opt) + + @apply_index_wraps + def apply_index(self, dtindex): + return self._apply_array(dtindex) + + @apply_array_wraps + def _apply_array(self, dtarr): + shifted = shift_quarters( + dtarr.view("i8"), self.n, self.month, self._day_opt, modby=12 + ) + return shifted + + +cdef class BYearEnd(YearOffset): + """ + DateOffset increments between the last business day of the year. + + Examples + -------- + >>> from pandas.tseries.offsets import BYearEnd + >>> ts = pd.Timestamp('2020-05-24 05:01:15') + >>> ts - BYearEnd() + Timestamp('2019-12-31 05:01:15') + >>> ts + BYearEnd() + Timestamp('2020-12-31 05:01:15') + >>> ts + BYearEnd(3) + Timestamp('2022-12-30 05:01:15') + >>> ts + BYearEnd(-3) + Timestamp('2017-12-29 05:01:15') + >>> ts + BYearEnd(month=11) + Timestamp('2020-11-30 05:01:15') + """ + + _outputName = "BusinessYearEnd" + _default_month = 12 + _prefix = "BA" + _day_opt = "business_end" + + +cdef class BYearBegin(YearOffset): + """ + DateOffset increments between the first business day of the year. + + Examples + -------- + >>> from pandas.tseries.offsets import BYearBegin + >>> ts = pd.Timestamp('2020-05-24 05:01:15') + >>> ts + BYearBegin() + Timestamp('2021-01-01 05:01:15') + >>> ts - BYearBegin() + Timestamp('2020-01-01 05:01:15') + >>> ts + BYearBegin(-1) + Timestamp('2020-01-01 05:01:15') + >>> ts + BYearBegin(2) + Timestamp('2022-01-03 05:01:15') + """ + + _outputName = "BusinessYearBegin" + _default_month = 1 + _prefix = "BAS" + _day_opt = "business_start" + + +cdef class YearEnd(YearOffset): + """ + DateOffset increments between calendar year ends. + """ + + _default_month = 12 + _prefix = "A" + _day_opt = "end" + + cdef readonly: + int _period_dtype_code + + def __init__(self, n=1, normalize=False, month=None): + # Because YearEnd can be the freq for a Period, define its + # _period_dtype_code at construction for performance + YearOffset.__init__(self, n, normalize, month) + self._period_dtype_code = PeriodDtypeCode.A + self.month % 12 + + +cdef class YearBegin(YearOffset): + """ + DateOffset increments between calendar year begin dates. + """ + + _default_month = 1 + _prefix = "AS" + _day_opt = "start" + + +# ---------------------------------------------------------------------- +# Quarter-Based Offset Classes + +cdef class QuarterOffset(SingleConstructorOffset): + _attributes = tuple(["n", "normalize", "startingMonth"]) + # TODO: Consider combining QuarterOffset and YearOffset __init__ at some + # point. Also apply_index, is_on_offset, rule_code if + # startingMonth vs month attr names are resolved + + # FIXME(cython#4446): python annotation here gives compile-time errors + # _default_starting_month: int + # _from_name_starting_month: int + + cdef readonly: + int startingMonth + + def __init__(self, n=1, normalize=False, startingMonth=None): + BaseOffset.__init__(self, n, normalize) + + if startingMonth is None: + startingMonth = self._default_starting_month + self.startingMonth = startingMonth + + cpdef __setstate__(self, state): + self.startingMonth = state.pop("startingMonth") + self.n = state.pop("n") + self.normalize = state.pop("normalize") + + @classmethod + def _from_name(cls, suffix=None): + kwargs = {} + if suffix: + kwargs["startingMonth"] = MONTH_TO_CAL_NUM[suffix] + else: + if cls._from_name_starting_month is not None: + kwargs["startingMonth"] = cls._from_name_starting_month + return cls(**kwargs) + + @property + def rule_code(self) -> str: + month = MONTH_ALIASES[self.startingMonth] + return f"{self._prefix}-{month}" + + def is_anchored(self) -> bool: + return self.n == 1 and self.startingMonth is not None + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + mod_month = (dt.month - self.startingMonth) % 3 + return mod_month == 0 and dt.day == self._get_offset_day(dt) + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + # months_since: find the calendar quarter containing other.month, + # e.g. if other.month == 8, the calendar quarter is [Jul, Aug, Sep]. + # Then find the month in that quarter containing an is_on_offset date for + # self. `months_since` is the number of months to shift other.month + # to get to this on-offset month. + months_since = other.month % 3 - self.startingMonth % 3 + qtrs = roll_qtrday( + other, self.n, self.startingMonth, day_opt=self._day_opt, modby=3 + ) + months = qtrs * 3 - months_since + return shift_month(other, months, self._day_opt) + + @apply_index_wraps + def apply_index(self, dtindex): + return self._apply_array(dtindex) + + @apply_array_wraps + def _apply_array(self, dtarr): + shifted = shift_quarters( + dtarr.view("i8"), self.n, self.startingMonth, self._day_opt + ) + return shifted + + +cdef class BQuarterEnd(QuarterOffset): + """ + DateOffset increments between the last business day of each Quarter. + + startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ... + startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ... + startingMonth = 3 corresponds to dates like 3/30/2007, 6/29/2007, ... + + Examples + -------- + >>> from pandas.tseries.offsets import BQuarterEnd + >>> ts = pd.Timestamp('2020-05-24 05:01:15') + >>> ts + BQuarterEnd() + Timestamp('2020-06-30 05:01:15') + >>> ts + BQuarterEnd(2) + Timestamp('2020-09-30 05:01:15') + >>> ts + BQuarterEnd(1, startingMonth=2) + Timestamp('2020-05-29 05:01:15') + >>> ts + BQuarterEnd(startingMonth=2) + Timestamp('2020-05-29 05:01:15') + """ + _output_name = "BusinessQuarterEnd" + _default_starting_month = 3 + _from_name_starting_month = 12 + _prefix = "BQ" + _day_opt = "business_end" + + +cdef class BQuarterBegin(QuarterOffset): + """ + DateOffset increments between the first business day of each Quarter. + + startingMonth = 1 corresponds to dates like 1/01/2007, 4/01/2007, ... + startingMonth = 2 corresponds to dates like 2/01/2007, 5/01/2007, ... + startingMonth = 3 corresponds to dates like 3/01/2007, 6/01/2007, ... + + Examples + -------- + >>> from pandas.tseries.offsets import BQuarterBegin + >>> ts = pd.Timestamp('2020-05-24 05:01:15') + >>> ts + BQuarterBegin() + Timestamp('2020-06-01 05:01:15') + >>> ts + BQuarterBegin(2) + Timestamp('2020-09-01 05:01:15') + >>> ts + BQuarterBegin(startingMonth=2) + Timestamp('2020-08-03 05:01:15') + >>> ts + BQuarterBegin(-1) + Timestamp('2020-03-02 05:01:15') + """ + _output_name = "BusinessQuarterBegin" + _default_starting_month = 3 + _from_name_starting_month = 1 + _prefix = "BQS" + _day_opt = "business_start" + + +cdef class QuarterEnd(QuarterOffset): + """ + DateOffset increments between Quarter end dates. + + startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ... + startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ... + startingMonth = 3 corresponds to dates like 3/31/2007, 6/30/2007, ... + """ + _default_starting_month = 3 + _prefix = "Q" + _day_opt = "end" + + cdef readonly: + int _period_dtype_code + + def __init__(self, n=1, normalize=False, startingMonth=None): + # Because QuarterEnd can be the freq for a Period, define its + # _period_dtype_code at construction for performance + QuarterOffset.__init__(self, n, normalize, startingMonth) + self._period_dtype_code = PeriodDtypeCode.Q_DEC + self.startingMonth % 12 + + +cdef class QuarterBegin(QuarterOffset): + """ + DateOffset increments between Quarter start dates. + + startingMonth = 1 corresponds to dates like 1/01/2007, 4/01/2007, ... + startingMonth = 2 corresponds to dates like 2/01/2007, 5/01/2007, ... + startingMonth = 3 corresponds to dates like 3/01/2007, 6/01/2007, ... + """ + _default_starting_month = 3 + _from_name_starting_month = 1 + _prefix = "QS" + _day_opt = "start" + + +# ---------------------------------------------------------------------- +# Month-Based Offset Classes + +cdef class MonthOffset(SingleConstructorOffset): + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + return dt.day == self._get_offset_day(dt) + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + compare_day = self._get_offset_day(other) + n = roll_convention(other.day, self.n, compare_day) + return shift_month(other, n, self._day_opt) + + @apply_index_wraps + def apply_index(self, dtindex): + return self._apply_array(dtindex) + + @apply_array_wraps + def _apply_array(self, dtarr): + shifted = shift_months(dtarr.view("i8"), self.n, self._day_opt) + return shifted + + cpdef __setstate__(self, state): + state.pop("_use_relativedelta", False) + state.pop("offset", None) + state.pop("_offset", None) + state.pop("kwds", {}) + + BaseOffset.__setstate__(self, state) + + +cdef class MonthEnd(MonthOffset): + """ + DateOffset of one month end. + """ + _period_dtype_code = PeriodDtypeCode.M + _prefix = "M" + _day_opt = "end" + + +cdef class MonthBegin(MonthOffset): + """ + DateOffset of one month at beginning. + """ + _prefix = "MS" + _day_opt = "start" + + +cdef class BusinessMonthEnd(MonthOffset): + """ + DateOffset increments between the last business day of the month. + + Examples + -------- + >>> from pandas.tseries.offsets import BMonthEnd + >>> ts = pd.Timestamp('2020-05-24 05:01:15') + >>> ts + BMonthEnd() + Timestamp('2020-05-29 05:01:15') + >>> ts + BMonthEnd(2) + Timestamp('2020-06-30 05:01:15') + >>> ts + BMonthEnd(-2) + Timestamp('2020-03-31 05:01:15') + """ + _prefix = "BM" + _day_opt = "business_end" + + +cdef class BusinessMonthBegin(MonthOffset): + """ + DateOffset of one month at the first business day. + + Examples + -------- + >>> from pandas.tseries.offsets import BMonthBegin + >>> ts=pd.Timestamp('2020-05-24 05:01:15') + >>> ts + BMonthBegin() + Timestamp('2020-06-01 05:01:15') + >>> ts + BMonthBegin(2) + Timestamp('2020-07-01 05:01:15') + >>> ts + BMonthBegin(-3) + Timestamp('2020-03-02 05:01:15') + """ + _prefix = "BMS" + _day_opt = "business_start" + + +# --------------------------------------------------------------------- +# Semi-Month Based Offsets + +cdef class SemiMonthOffset(SingleConstructorOffset): + _default_day_of_month = 15 + _min_day_of_month = 2 + _attributes = tuple(["n", "normalize", "day_of_month"]) + + cdef readonly: + int day_of_month + + def __init__(self, n=1, normalize=False, day_of_month=None): + BaseOffset.__init__(self, n, normalize) + + if day_of_month is None: + day_of_month = self._default_day_of_month + + self.day_of_month = int(day_of_month) + if not self._min_day_of_month <= self.day_of_month <= 27: + raise ValueError( + "day_of_month must be " + f"{self._min_day_of_month}<=day_of_month<=27, " + f"got {self.day_of_month}" + ) + + cpdef __setstate__(self, state): + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self.day_of_month = state.pop("day_of_month") + + @classmethod + def _from_name(cls, suffix=None): + return cls(day_of_month=suffix) + + @property + def rule_code(self) -> str: + suffix = f"-{self.day_of_month}" + return self._prefix + suffix + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + is_start = isinstance(self, SemiMonthBegin) + + # shift `other` to self.day_of_month, incrementing `n` if necessary + n = roll_convention(other.day, self.n, self.day_of_month) + + days_in_month = get_days_in_month(other.year, other.month) + # For SemiMonthBegin on other.day == 1 and + # SemiMonthEnd on other.day == days_in_month, + # shifting `other` to `self.day_of_month` _always_ requires + # incrementing/decrementing `n`, regardless of whether it is + # initially positive. + if is_start and (self.n <= 0 and other.day == 1): + n -= 1 + elif (not is_start) and (self.n > 0 and other.day == days_in_month): + n += 1 + + if is_start: + months = n // 2 + n % 2 + to_day = 1 if n % 2 else self.day_of_month + else: + months = n // 2 + to_day = 31 if n % 2 else self.day_of_month + + return shift_month(other, months, to_day) + + @apply_index_wraps + @cython.wraparound(False) + @cython.boundscheck(False) + def apply_index(self, dtindex): + return self._apply_array(dtindex) + + @apply_array_wraps + @cython.wraparound(False) + @cython.boundscheck(False) + def _apply_array(self, dtarr): + cdef: + int64_t[:] i8other = dtarr.view("i8") + Py_ssize_t i, count = len(i8other) + int64_t val + int64_t[:] out = np.empty(count, dtype="i8") + npy_datetimestruct dts + int months, to_day, nadj, n = self.n + int days_in_month, day, anchor_dom = self.day_of_month + bint is_start = isinstance(self, SemiMonthBegin) + + with nogil: + for i in range(count): + val = i8other[i] + if val == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(val, &dts) + day = dts.day + + # Adjust so that we are always looking at self.day_of_month, + # incrementing/decrementing n if necessary. + nadj = roll_convention(day, n, anchor_dom) + + days_in_month = get_days_in_month(dts.year, dts.month) + # For SemiMonthBegin on other.day == 1 and + # SemiMonthEnd on other.day == days_in_month, + # shifting `other` to `self.day_of_month` _always_ requires + # incrementing/decrementing `n`, regardless of whether it is + # initially positive. + if is_start and (n <= 0 and day == 1): + nadj -= 1 + elif (not is_start) and (n > 0 and day == days_in_month): + nadj += 1 + + if is_start: + # See also: SemiMonthBegin._apply + months = nadj // 2 + nadj % 2 + to_day = 1 if nadj % 2 else anchor_dom + + else: + # See also: SemiMonthEnd._apply + months = nadj // 2 + to_day = 31 if nadj % 2 else anchor_dom + + dts.year = year_add_months(dts, months) + dts.month = month_add_months(dts, months) + days_in_month = get_days_in_month(dts.year, dts.month) + dts.day = min(to_day, days_in_month) + + out[i] = dtstruct_to_dt64(&dts) + + return out.base + + +cdef class SemiMonthEnd(SemiMonthOffset): + """ + Two DateOffset's per month repeating on the last + day of the month and day_of_month. + + Parameters + ---------- + n : int + normalize : bool, default False + day_of_month : int, {1, 3,...,27}, default 15 + """ + + _prefix = "SM" + _min_day_of_month = 1 + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + days_in_month = get_days_in_month(dt.year, dt.month) + return dt.day in (self.day_of_month, days_in_month) + + +cdef class SemiMonthBegin(SemiMonthOffset): + """ + Two DateOffset's per month repeating on the first + day of the month and day_of_month. + + Parameters + ---------- + n : int + normalize : bool, default False + day_of_month : int, {2, 3,...,27}, default 15 + """ + + _prefix = "SMS" + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + return dt.day in (1, self.day_of_month) + + +# --------------------------------------------------------------------- +# Week-Based Offset Classes + + +cdef class Week(SingleConstructorOffset): + """ + Weekly offset. + + Parameters + ---------- + weekday : int or None, default None + Always generate specific day of week. 0 for Monday. + """ + + _inc = timedelta(weeks=1) + _prefix = "W" + _attributes = tuple(["n", "normalize", "weekday"]) + + cdef readonly: + object weekday # int or None + int _period_dtype_code + + def __init__(self, n=1, normalize=False, weekday=None): + BaseOffset.__init__(self, n, normalize) + self.weekday = weekday + + if self.weekday is not None: + if self.weekday < 0 or self.weekday > 6: + raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") + + self._period_dtype_code = PeriodDtypeCode.W_SUN + (weekday + 1) % 7 + + cpdef __setstate__(self, state): + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self.weekday = state.pop("weekday") + self._cache = state.pop("_cache", {}) + + def is_anchored(self) -> bool: + return self.n == 1 and self.weekday is not None + + @apply_wraps + def _apply(self, other): + if self.weekday is None: + return other + self.n * self._inc + + if not PyDateTime_Check(other): + raise TypeError( + f"Cannot add {type(other).__name__} to {type(self).__name__}" + ) + + k = self.n + otherDay = other.weekday() + if otherDay != self.weekday: + other = other + timedelta((self.weekday - otherDay) % 7) + if k > 0: + k -= 1 + + return other + timedelta(weeks=k) + + @apply_index_wraps + def apply_index(self, dtindex): + return self._apply_array(dtindex) + + @apply_array_wraps + def _apply_array(self, dtarr): + if self.weekday is None: + td = timedelta(days=7 * self.n) + td64 = np.timedelta64(td, "ns") + return dtarr + td64 + else: + i8other = dtarr.view("i8") + return self._end_apply_index(i8other) + + @cython.wraparound(False) + @cython.boundscheck(False) + cdef _end_apply_index(self, const int64_t[:] i8other): + """ + Add self to the given DatetimeIndex, specialized for case where + self.weekday is non-null. + + Parameters + ---------- + i8other : const int64_t[:] + + Returns + ------- + ndarray[int64_t] + """ + cdef: + Py_ssize_t i, count = len(i8other) + int64_t val + int64_t[:] out = np.empty(count, dtype="i8") + npy_datetimestruct dts + int wday, days, weeks, n = self.n + int anchor_weekday = self.weekday + + with nogil: + for i in range(count): + val = i8other[i] + if val == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(val, &dts) + wday = dayofweek(dts.year, dts.month, dts.day) + + days = 0 + weeks = n + if wday != anchor_weekday: + days = (anchor_weekday - wday) % 7 + if weeks > 0: + weeks -= 1 + + out[i] = val + (7 * weeks + days) * DAY_NANOS + + return out.base + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + elif self.weekday is None: + return True + return dt.weekday() == self.weekday + + @property + def rule_code(self) -> str: + suffix = "" + if self.weekday is not None: + weekday = int_to_weekday[self.weekday] + suffix = f"-{weekday}" + return self._prefix + suffix + + @classmethod + def _from_name(cls, suffix=None): + if not suffix: + weekday = None + else: + weekday = weekday_to_int[suffix] + return cls(weekday=weekday) + + +cdef class WeekOfMonth(WeekOfMonthMixin): + """ + Describes monthly dates like "the Tuesday of the 2nd week of each month". + + Parameters + ---------- + n : int + week : int {0, 1, 2, 3, ...}, default 0 + A specific integer for the week of the month. + e.g. 0 is 1st week of month, 1 is the 2nd week, etc. + weekday : int {0, 1, ..., 6}, default 0 + A specific integer for the day of the week. + + - 0 is Monday + - 1 is Tuesday + - 2 is Wednesday + - 3 is Thursday + - 4 is Friday + - 5 is Saturday + - 6 is Sunday. + """ + + _prefix = "WOM" + _attributes = tuple(["n", "normalize", "week", "weekday"]) + + def __init__(self, n=1, normalize=False, week=0, weekday=0): + WeekOfMonthMixin.__init__(self, n, normalize, weekday) + self.week = week + + if self.week < 0 or self.week > 3: + raise ValueError(f"Week must be 0<=week<=3, got {self.week}") + + cpdef __setstate__(self, state): + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self.weekday = state.pop("weekday") + self.week = state.pop("week") + + def _get_offset_day(self, other: datetime) -> int: + """ + Find the day in the same month as other that has the same + weekday as self.weekday and is the self.week'th such day in the month. + + Parameters + ---------- + other : datetime + + Returns + ------- + day : int + """ + mstart = datetime(other.year, other.month, 1) + wday = mstart.weekday() + shift_days = (self.weekday - wday) % 7 + return 1 + shift_days + self.week * 7 + + @classmethod + def _from_name(cls, suffix=None): + if not suffix: + raise ValueError(f"Prefix {repr(cls._prefix)} requires a suffix.") + # only one digit weeks (1 --> week 0, 2 --> week 1, etc.) + week = int(suffix[0]) - 1 + weekday = weekday_to_int[suffix[1:]] + return cls(week=week, weekday=weekday) + + +cdef class LastWeekOfMonth(WeekOfMonthMixin): + """ + Describes monthly dates in last week of month like "the last Tuesday of + each month". + + Parameters + ---------- + n : int, default 1 + weekday : int {0, 1, ..., 6}, default 0 + A specific integer for the day of the week. + + - 0 is Monday + - 1 is Tuesday + - 2 is Wednesday + - 3 is Thursday + - 4 is Friday + - 5 is Saturday + - 6 is Sunday. + """ + + _prefix = "LWOM" + _attributes = tuple(["n", "normalize", "weekday"]) + + def __init__(self, n=1, normalize=False, weekday=0): + WeekOfMonthMixin.__init__(self, n, normalize, weekday) + self.week = -1 + + if self.n == 0: + raise ValueError("N cannot be 0") + + cpdef __setstate__(self, state): + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self.weekday = state.pop("weekday") + self.week = -1 + + def _get_offset_day(self, other: datetime) -> int: + """ + Find the day in the same month as other that has the same + weekday as self.weekday and is the last such day in the month. + + Parameters + ---------- + other: datetime + + Returns + ------- + day: int + """ + dim = get_days_in_month(other.year, other.month) + mend = datetime(other.year, other.month, dim) + wday = mend.weekday() + shift_days = (wday - self.weekday) % 7 + return dim - shift_days + + @classmethod + def _from_name(cls, suffix=None): + if not suffix: + raise ValueError(f"Prefix {repr(cls._prefix)} requires a suffix.") + weekday = weekday_to_int[suffix] + return cls(weekday=weekday) + + +# --------------------------------------------------------------------- +# Special Offset Classes + +cdef class FY5253Mixin(SingleConstructorOffset): + cdef readonly: + int startingMonth + int weekday + str variation + + def __init__( + self, n=1, normalize=False, weekday=0, startingMonth=1, variation="nearest" + ): + BaseOffset.__init__(self, n, normalize) + self.startingMonth = startingMonth + self.weekday = weekday + self.variation = variation + + if self.n == 0: + raise ValueError("N cannot be 0") + + if self.variation not in ["nearest", "last"]: + raise ValueError(f"{self.variation} is not a valid variation") + + cpdef __setstate__(self, state): + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self.weekday = state.pop("weekday") + self.variation = state.pop("variation") + + def is_anchored(self) -> bool: + return ( + self.n == 1 and self.startingMonth is not None and self.weekday is not None + ) + + # -------------------------------------------------------------------- + # Name-related methods + + @property + def rule_code(self) -> str: + prefix = self._prefix + suffix = self.get_rule_code_suffix() + return f"{prefix}-{suffix}" + + def _get_suffix_prefix(self) -> str: + if self.variation == "nearest": + return "N" + else: + return "L" + + def get_rule_code_suffix(self) -> str: + prefix = self._get_suffix_prefix() + month = MONTH_ALIASES[self.startingMonth] + weekday = int_to_weekday[self.weekday] + return f"{prefix}-{month}-{weekday}" + + +cdef class FY5253(FY5253Mixin): + """ + Describes 52-53 week fiscal year. This is also known as a 4-4-5 calendar. + + It is used by companies that desire that their + fiscal year always end on the same day of the week. + + It is a method of managing accounting periods. + It is a common calendar structure for some industries, + such as retail, manufacturing and parking industry. + + For more information see: + https://en.wikipedia.org/wiki/4-4-5_calendar + + The year may either: + + - end on the last X day of the Y month. + - end on the last X day closest to the last day of the Y month. + + X is a specific day of the week. + Y is a certain month of the year + + Parameters + ---------- + n : int + weekday : int {0, 1, ..., 6}, default 0 + A specific integer for the day of the week. + + - 0 is Monday + - 1 is Tuesday + - 2 is Wednesday + - 3 is Thursday + - 4 is Friday + - 5 is Saturday + - 6 is Sunday. + + startingMonth : int {1, 2, ... 12}, default 1 + The month in which the fiscal year ends. + + variation : str, default "nearest" + Method of employing 4-4-5 calendar. + + There are two options: + + - "nearest" means year end is **weekday** closest to last day of month in year. + - "last" means year end is final **weekday** of the final month in fiscal year. + """ + + _prefix = "RE" + _attributes = tuple(["n", "normalize", "weekday", "startingMonth", "variation"]) + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + dt = datetime(dt.year, dt.month, dt.day) + year_end = self.get_year_end(dt) + + if self.variation == "nearest": + # We have to check the year end of "this" cal year AND the previous + return year_end == dt or self.get_year_end(shift_month(dt, -1, None)) == dt + else: + return year_end == dt + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + norm = Timestamp(other).normalize() + + n = self.n + prev_year = self.get_year_end(datetime(other.year - 1, self.startingMonth, 1)) + cur_year = self.get_year_end(datetime(other.year, self.startingMonth, 1)) + next_year = self.get_year_end(datetime(other.year + 1, self.startingMonth, 1)) + + prev_year = localize_pydatetime(prev_year, other.tzinfo) + cur_year = localize_pydatetime(cur_year, other.tzinfo) + next_year = localize_pydatetime(next_year, other.tzinfo) + + # Note: next_year.year == other.year + 1, so we will always + # have other < next_year + if norm == prev_year: + n -= 1 + elif norm == cur_year: + pass + elif n > 0: + if norm < prev_year: + n -= 2 + elif prev_year < norm < cur_year: + n -= 1 + elif cur_year < norm < next_year: + pass + else: + if cur_year < norm < next_year: + n += 1 + elif prev_year < norm < cur_year: + pass + elif ( + norm.year == prev_year.year + and norm < prev_year + and prev_year - norm <= timedelta(6) + ): + # GH#14774, error when next_year.year == cur_year.year + # e.g. prev_year == datetime(2004, 1, 3), + # other == datetime(2004, 1, 1) + n -= 1 + else: + assert False + + shifted = datetime(other.year + n, self.startingMonth, 1) + result = self.get_year_end(shifted) + result = datetime( + result.year, + result.month, + result.day, + other.hour, + other.minute, + other.second, + other.microsecond, + ) + return result + + def get_year_end(self, dt: datetime) -> datetime: + assert dt.tzinfo is None + + dim = get_days_in_month(dt.year, self.startingMonth) + target_date = datetime(dt.year, self.startingMonth, dim) + wkday_diff = self.weekday - target_date.weekday() + if wkday_diff == 0: + # year_end is the same for "last" and "nearest" cases + return target_date + + if self.variation == "last": + days_forward = (wkday_diff % 7) - 7 + + # days_forward is always negative, so we always end up + # in the same year as dt + return target_date + timedelta(days=days_forward) + else: + # variation == "nearest": + days_forward = wkday_diff % 7 + if days_forward <= 3: + # The upcoming self.weekday is closer than the previous one + return target_date + timedelta(days_forward) + else: + # The previous self.weekday is closer than the upcoming one + return target_date + timedelta(days_forward - 7) + + @classmethod + def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): + if varion_code == "N": + variation = "nearest" + elif varion_code == "L": + variation = "last" + else: + raise ValueError(f"Unable to parse varion_code: {varion_code}") + + startingMonth = MONTH_TO_CAL_NUM[startingMonth_code] + weekday = weekday_to_int[weekday_code] + + return { + "weekday": weekday, + "startingMonth": startingMonth, + "variation": variation, + } + + @classmethod + def _from_name(cls, *args): + return cls(**cls._parse_suffix(*args)) + + +cdef class FY5253Quarter(FY5253Mixin): + """ + DateOffset increments between business quarter dates + for 52-53 week fiscal year (also known as a 4-4-5 calendar). + + It is used by companies that desire that their + fiscal year always end on the same day of the week. + + It is a method of managing accounting periods. + It is a common calendar structure for some industries, + such as retail, manufacturing and parking industry. + + For more information see: + https://en.wikipedia.org/wiki/4-4-5_calendar + + The year may either: + + - end on the last X day of the Y month. + - end on the last X day closest to the last day of the Y month. + + X is a specific day of the week. + Y is a certain month of the year + + startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ... + startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ... + startingMonth = 3 corresponds to dates like 3/30/2007, 6/29/2007, ... + + Parameters + ---------- + n : int + weekday : int {0, 1, ..., 6}, default 0 + A specific integer for the day of the week. + + - 0 is Monday + - 1 is Tuesday + - 2 is Wednesday + - 3 is Thursday + - 4 is Friday + - 5 is Saturday + - 6 is Sunday. + + startingMonth : int {1, 2, ..., 12}, default 1 + The month in which fiscal years end. + + qtr_with_extra_week : int {1, 2, 3, 4}, default 1 + The quarter number that has the leap or 14 week when needed. + + variation : str, default "nearest" + Method of employing 4-4-5 calendar. + + There are two options: + + - "nearest" means year end is **weekday** closest to last day of month in year. + - "last" means year end is final **weekday** of the final month in fiscal year. + """ + + _prefix = "REQ" + _attributes = tuple( + [ + "n", + "normalize", + "weekday", + "startingMonth", + "qtr_with_extra_week", + "variation", + ] + ) + + cdef readonly: + int qtr_with_extra_week + + def __init__( + self, + n=1, + normalize=False, + weekday=0, + startingMonth=1, + qtr_with_extra_week=1, + variation="nearest", + ): + FY5253Mixin.__init__( + self, n, normalize, weekday, startingMonth, variation + ) + self.qtr_with_extra_week = qtr_with_extra_week + + cpdef __setstate__(self, state): + FY5253Mixin.__setstate__(self, state) + self.qtr_with_extra_week = state.pop("qtr_with_extra_week") + + @cache_readonly + def _offset(self): + return FY5253( + startingMonth=self.startingMonth, + weekday=self.weekday, + variation=self.variation, + ) + + def _rollback_to_year(self, other: datetime): + """ + Roll `other` back to the most recent date that was on a fiscal year + end. + + Return the date of that year-end, the number of full quarters + elapsed between that year-end and other, and the remaining Timedelta + since the most recent quarter-end. + + Parameters + ---------- + other : datetime or Timestamp + + Returns + ------- + tuple of + prev_year_end : Timestamp giving most recent fiscal year end + num_qtrs : int + tdelta : Timedelta + """ + num_qtrs = 0 + + norm = Timestamp(other).tz_localize(None) + start = self._offset.rollback(norm) + # Note: start <= norm and self._offset.is_on_offset(start) + + if start < norm: + # roll adjustment + qtr_lens = self.get_weeks(norm) + + # check that qtr_lens is consistent with self._offset addition + end = shift_day(start, days=7 * sum(qtr_lens)) + assert self._offset.is_on_offset(end), (start, end, qtr_lens) + + tdelta = norm - start + for qlen in qtr_lens: + if qlen * 7 <= tdelta.days: + num_qtrs += 1 + tdelta -= Timedelta(days=qlen * 7) + else: + break + else: + tdelta = Timedelta(0) + + # Note: we always have tdelta.value >= 0 + return start, num_qtrs, tdelta + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + # Note: self.n == 0 is not allowed. + + n = self.n + + prev_year_end, num_qtrs, tdelta = self._rollback_to_year(other) + res = prev_year_end + n += num_qtrs + if self.n <= 0 and tdelta.value > 0: + n += 1 + + # Possible speedup by handling years first. + years = n // 4 + if years: + res += self._offset * years + n -= years * 4 + + # Add an extra day to make *sure* we are getting the quarter lengths + # for the upcoming year, not the previous year + qtr_lens = self.get_weeks(res + Timedelta(days=1)) + + # Note: we always have 0 <= n < 4 + weeks = sum(qtr_lens[:n]) + if weeks: + res = shift_day(res, days=weeks * 7) + + return res + + def get_weeks(self, dt: datetime): + ret = [13] * 4 + + year_has_extra_week = self.year_has_extra_week(dt) + + if year_has_extra_week: + ret[self.qtr_with_extra_week - 1] = 14 + + return ret + + def year_has_extra_week(self, dt: datetime) -> bool: + # Avoid round-down errors --> normalize to get + # e.g. '370D' instead of '360D23H' + norm = Timestamp(dt).normalize().tz_localize(None) + + next_year_end = self._offset.rollforward(norm) + prev_year_end = norm - self._offset + weeks_in_year = (next_year_end - prev_year_end).days / 7 + assert weeks_in_year in [52, 53], weeks_in_year + return weeks_in_year == 53 + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + if self._offset.is_on_offset(dt): + return True + + next_year_end = dt - self._offset + + qtr_lens = self.get_weeks(dt) + + current = next_year_end + for qtr_len in qtr_lens: + current = shift_day(current, days=qtr_len * 7) + if dt == current: + return True + return False + + @property + def rule_code(self) -> str: + suffix = FY5253Mixin.rule_code.__get__(self) + qtr = self.qtr_with_extra_week + return f"{suffix}-{qtr}" + + @classmethod + def _from_name(cls, *args): + return cls( + **dict(FY5253._parse_suffix(*args[:-1]), qtr_with_extra_week=int(args[-1])) + ) + + +cdef class Easter(SingleConstructorOffset): + """ + DateOffset for the Easter holiday using logic defined in dateutil. + + Right now uses the revised method which is valid in years 1583-4099. + """ + + cpdef __setstate__(self, state): + self.n = state.pop("n") + self.normalize = state.pop("normalize") + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + current_easter = easter(other.year) + current_easter = datetime( + current_easter.year, current_easter.month, current_easter.day + ) + current_easter = localize_pydatetime(current_easter, other.tzinfo) + + n = self.n + if n >= 0 and other < current_easter: + n -= 1 + elif n < 0 and other > current_easter: + n += 1 + # TODO: Why does this handle the 0 case the opposite of others? + + # NOTE: easter returns a datetime.date so we have to convert to type of + # other + new = easter(other.year + n) + new = datetime( + new.year, + new.month, + new.day, + other.hour, + other.minute, + other.second, + other.microsecond, + ) + return new + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + return date(dt.year, dt.month, dt.day) == easter(dt.year) + + +# ---------------------------------------------------------------------- +# Custom Offset classes + + +cdef class CustomBusinessDay(BusinessDay): + """ + DateOffset subclass representing custom business days excluding holidays. + + Parameters + ---------- + n : int, default 1 + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + weekmask : str, Default 'Mon Tue Wed Thu Fri' + Weekmask of valid business days, passed to ``numpy.busdaycalendar``. + holidays : list + List/array of dates to exclude from the set of valid business days, + passed to ``numpy.busdaycalendar``. + calendar : pd.HolidayCalendar or np.busdaycalendar + offset : timedelta, default timedelta(0) + """ + + _prefix = "C" + _attributes = tuple( + ["n", "normalize", "weekmask", "holidays", "calendar", "offset"] + ) + + def __init__( + self, + n=1, + normalize=False, + weekmask="Mon Tue Wed Thu Fri", + holidays=None, + calendar=None, + offset=timedelta(0), + ): + BusinessDay.__init__(self, n, normalize, offset) + self._init_custom(weekmask, holidays, calendar) + + cpdef __setstate__(self, state): + self.holidays = state.pop("holidays") + self.weekmask = state.pop("weekmask") + BusinessDay.__setstate__(self, state) + + @apply_wraps + def _apply(self, other): + if self.n <= 0: + roll = "forward" + else: + roll = "backward" + + if PyDateTime_Check(other): + date_in = other + np_dt = np.datetime64(date_in.date()) + + np_incr_dt = np.busday_offset( + np_dt, self.n, roll=roll, busdaycal=self.calendar + ) + + dt_date = np_incr_dt.astype(datetime) + result = datetime.combine(dt_date, date_in.time()) + + if self.offset: + result = result + self.offset + return result + + elif is_any_td_scalar(other): + td = Timedelta(self.offset) + other + return BDay(self.n, offset=td.to_pytimedelta(), normalize=self.normalize) + else: + raise ApplyTypeError( + "Only know how to combine trading day with " + "datetime, datetime64 or timedelta." + ) + + def apply_index(self, dtindex): + raise NotImplementedError + + def _apply_array(self, dtarr): + raise NotImplementedError + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + day64 = _to_dt64D(dt) + return np.is_busday(day64, busdaycal=self.calendar) + + +cdef class CustomBusinessHour(BusinessHour): + """ + DateOffset subclass representing possibly n custom business days. + + Parameters + ---------- + n : int, default 1 + The number of months represented. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + weekmask : str, Default 'Mon Tue Wed Thu Fri' + Weekmask of valid business days, passed to ``numpy.busdaycalendar``. + start : str, default "09:00" + Start time of your custom business hour in 24h format. + end : str, default: "17:00" + End time of your custom business hour in 24h format. + """ + + _prefix = "CBH" + _anchor = 0 + _attributes = tuple( + ["n", "normalize", "weekmask", "holidays", "calendar", "start", "end", "offset"] + ) + + def __init__( + self, + n=1, + normalize=False, + weekmask="Mon Tue Wed Thu Fri", + holidays=None, + calendar=None, + start="09:00", + end="17:00", + offset=timedelta(0), + ): + BusinessHour.__init__(self, n, normalize, start=start, end=end, offset=offset) + self._init_custom(weekmask, holidays, calendar) + + +cdef class _CustomBusinessMonth(BusinessMixin): + """ + DateOffset subclass representing custom business month(s). + + Increments between beginning/end of month dates. + + Parameters + ---------- + n : int, default 1 + The number of months represented. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + weekmask : str, Default 'Mon Tue Wed Thu Fri' + Weekmask of valid business days, passed to ``numpy.busdaycalendar``. + holidays : list + List/array of dates to exclude from the set of valid business days, + passed to ``numpy.busdaycalendar``. + calendar : pd.HolidayCalendar or np.busdaycalendar + Calendar to integrate. + offset : timedelta, default timedelta(0) + Time offset to apply. + """ + + _attributes = tuple( + ["n", "normalize", "weekmask", "holidays", "calendar", "offset"] + ) + + def __init__( + self, + n=1, + normalize=False, + weekmask="Mon Tue Wed Thu Fri", + holidays=None, + calendar=None, + offset=timedelta(0), + ): + BusinessMixin.__init__(self, n, normalize, offset) + self._init_custom(weekmask, holidays, calendar) + + @cache_readonly + def cbday_roll(self): + """ + Define default roll function to be called in apply method. + """ + cbday_kwds = self.kwds.copy() + cbday_kwds['offset'] = timedelta(0) + + cbday = CustomBusinessDay(n=1, normalize=False, **cbday_kwds) + + if self._prefix.endswith("S"): + # MonthBegin + roll_func = cbday.rollforward + else: + # MonthEnd + roll_func = cbday.rollback + return roll_func + + @cache_readonly + def m_offset(self): + if self._prefix.endswith("S"): + # MonthBegin + moff = MonthBegin(n=1, normalize=False) + else: + # MonthEnd + moff = MonthEnd(n=1, normalize=False) + return moff + + @cache_readonly + def month_roll(self): + """ + Define default roll function to be called in apply method. + """ + if self._prefix.endswith("S"): + # MonthBegin + roll_func = self.m_offset.rollback + else: + # MonthEnd + roll_func = self.m_offset.rollforward + return roll_func + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + # First move to month offset + cur_month_offset_date = self.month_roll(other) + + # Find this custom month offset + compare_date = self.cbday_roll(cur_month_offset_date) + n = roll_convention(other.day, self.n, compare_date.day) + + new = cur_month_offset_date + n * self.m_offset + result = self.cbday_roll(new) + + if self.offset: + result = result + self.offset + return result + + +cdef class CustomBusinessMonthEnd(_CustomBusinessMonth): + _prefix = "CBM" + + +cdef class CustomBusinessMonthBegin(_CustomBusinessMonth): + _prefix = "CBMS" + + +BDay = BusinessDay +BMonthEnd = BusinessMonthEnd +BMonthBegin = BusinessMonthBegin +CBMonthEnd = CustomBusinessMonthEnd +CBMonthBegin = CustomBusinessMonthBegin +CDay = CustomBusinessDay + +# ---------------------------------------------------------------------- +# to_offset helpers + +prefix_mapping = { + offset._prefix: offset + for offset in [ + YearBegin, # 'AS' + YearEnd, # 'A' + BYearBegin, # 'BAS' + BYearEnd, # 'BA' + BusinessDay, # 'B' + BusinessMonthBegin, # 'BMS' + BusinessMonthEnd, # 'BM' + BQuarterEnd, # 'BQ' + BQuarterBegin, # 'BQS' + BusinessHour, # 'BH' + CustomBusinessDay, # 'C' + CustomBusinessMonthEnd, # 'CBM' + CustomBusinessMonthBegin, # 'CBMS' + CustomBusinessHour, # 'CBH' + MonthEnd, # 'M' + MonthBegin, # 'MS' + Nano, # 'N' + SemiMonthEnd, # 'SM' + SemiMonthBegin, # 'SMS' + Week, # 'W' + Second, # 'S' + Minute, # 'T' + Micro, # 'U' + QuarterEnd, # 'Q' + QuarterBegin, # 'QS' + Milli, # 'L' + Hour, # 'H' + Day, # 'D' + WeekOfMonth, # 'WOM' + FY5253, + FY5253Quarter, + ] +} + +# hack to handle WOM-1MON +opattern = re.compile( + r"([+\-]?\d*|[+\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)" +) + +_lite_rule_alias = { + "W": "W-SUN", + "Q": "Q-DEC", + + "A": "A-DEC", # YearEnd(month=12), + "Y": "A-DEC", + "AS": "AS-JAN", # YearBegin(month=1), + "YS": "AS-JAN", + "BA": "BA-DEC", # BYearEnd(month=12), + "BY": "BA-DEC", + "BAS": "BAS-JAN", # BYearBegin(month=1), + "BYS": "BAS-JAN", + + "Min": "T", + "min": "T", + "ms": "L", + "us": "U", + "ns": "N", +} + +_dont_uppercase = {"MS", "ms"} + +INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}" + +# TODO: still needed? +# cache of previously seen offsets +_offset_map = {} + + +# TODO: better name? +def _get_offset(name: str) -> BaseOffset: + """ + Return DateOffset object associated with rule name. + + Examples + -------- + _get_offset('EOM') --> BMonthEnd(1) + """ + if name not in _dont_uppercase: + name = name.upper() + name = _lite_rule_alias.get(name, name) + name = _lite_rule_alias.get(name.lower(), name) + else: + name = _lite_rule_alias.get(name, name) + + if name not in _offset_map: + try: + split = name.split("-") + klass = prefix_mapping[split[0]] + # handles case where there's no suffix (and will TypeError if too + # many '-') + offset = klass._from_name(*split[1:]) + except (ValueError, TypeError, KeyError) as err: + # bad prefix or suffix + raise ValueError(INVALID_FREQ_ERR_MSG.format(name)) from err + # cache + _offset_map[name] = offset + + return _offset_map[name] + + +cpdef to_offset(freq): + """ + Return DateOffset object from string or tuple representation + or datetime.timedelta object. + + Parameters + ---------- + freq : str, datetime.timedelta, BaseOffset or None + + Returns + ------- + DateOffset or None + + Raises + ------ + ValueError + If freq is an invalid frequency + + See Also + -------- + BaseOffset : Standard kind of date increment used for a date range. + + Examples + -------- + >>> to_offset("5min") + <5 * Minutes> + + >>> to_offset("1D1H") + <25 * Hours> + + >>> to_offset("2W") + <2 * Weeks: weekday=6> + + >>> to_offset("2B") + <2 * BusinessDays> + + >>> to_offset(pd.Timedelta(days=1)) + + + >>> to_offset(Hour()) + + """ + if freq is None: + return None + + if isinstance(freq, BaseOffset): + return freq + + if isinstance(freq, tuple): + raise TypeError( + f"to_offset does not support tuples {freq}, pass as a string instead" + ) + + elif PyDelta_Check(freq): + return delta_to_tick(freq) + + elif isinstance(freq, str): + delta = None + stride_sign = None + + try: + split = opattern.split(freq) + if split[-1] != "" and not split[-1].isspace(): + # the last element must be blank + raise ValueError("last element must be blank") + + tups = zip(split[0::4], split[1::4], split[2::4]) + for n, (sep, stride, name) in enumerate(tups): + if sep != "" and not sep.isspace(): + raise ValueError("separator must be spaces") + prefix = _lite_rule_alias.get(name) or name + if stride_sign is None: + stride_sign = -1 if stride.startswith("-") else 1 + if not stride: + stride = 1 + + if prefix in {"D", "H", "T", "S", "L", "U", "N"}: + # For these prefixes, we have something like "3H" or + # "2.5T", so we can construct a Timedelta with the + # matching unit and get our offset from delta_to_tick + td = Timedelta(1, unit=prefix) + off = delta_to_tick(td) + offset = off * float(stride) + if n != 0: + # If n==0, then stride_sign is already incorporated + # into the offset + offset *= stride_sign + else: + stride = int(stride) + offset = _get_offset(name) + offset = offset * int(np.fabs(stride) * stride_sign) + + if delta is None: + delta = offset + else: + delta = delta + offset + except (ValueError, TypeError) as err: + raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err + else: + delta = None + + if delta is None: + raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) + + return delta + + +# ---------------------------------------------------------------------- +# RelativeDelta Arithmetic + +def shift_day(other: datetime, days: int) -> datetime: + """ + Increment the datetime `other` by the given number of days, retaining + the time-portion of the datetime. For tz-naive datetimes this is + equivalent to adding a timedelta. For tz-aware datetimes it is similar to + dateutil's relativedelta.__add__, but handles pytz tzinfo objects. + + Parameters + ---------- + other : datetime or Timestamp + days : int + + Returns + ------- + shifted: datetime or Timestamp + """ + if other.tzinfo is None: + return other + timedelta(days=days) + + tz = other.tzinfo + naive = other.replace(tzinfo=None) + shifted = naive + timedelta(days=days) + return localize_pydatetime(shifted, tz) + + +cdef inline int year_add_months(npy_datetimestruct dts, int months) nogil: + """ + New year number after shifting npy_datetimestruct number of months. + """ + return dts.year + (dts.month + months - 1) // 12 + + +cdef inline int month_add_months(npy_datetimestruct dts, int months) nogil: + """ + New month number after shifting npy_datetimestruct + number of months. + """ + cdef: + int new_month = (dts.month + months) % 12 + return 12 if new_month == 0 else new_month + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef shift_quarters( + const int64_t[:] dtindex, + int quarters, + int q1start_month, + object day_opt, + int modby=3, +): + """ + Given an int64 array representing nanosecond timestamps, shift all elements + by the specified number of quarters using DateOffset semantics. + + Parameters + ---------- + dtindex : int64_t[:] timestamps for input dates + quarters : int number of quarters to shift + q1start_month : int month in which Q1 begins by convention + day_opt : {'start', 'end', 'business_start', 'business_end'} + modby : int (3 for quarters, 12 for years) + + Returns + ------- + out : ndarray[int64_t] + """ + cdef: + Py_ssize_t count = len(dtindex) + int64_t[:] out = np.empty(count, dtype="int64") + + if day_opt not in ["start", "end", "business_start", "business_end"]: + raise ValueError("day must be None, 'start', 'end', " + "'business_start', or 'business_end'") + + _shift_quarters(dtindex, out, count, quarters, q1start_month, day_opt, modby) + return np.asarray(out) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def shift_months(const int64_t[:] dtindex, int months, object day_opt=None): + """ + Given an int64-based datetime index, shift all elements + specified number of months using DateOffset semantics + + day_opt: {None, 'start', 'end', 'business_start', 'business_end'} + * None: day of month + * 'start' 1st day of month + * 'end' last day of month + """ + cdef: + Py_ssize_t i + npy_datetimestruct dts + int count = len(dtindex) + int64_t[:] out = np.empty(count, dtype="int64") + + if day_opt is None: + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(dtindex[i], &dts) + dts.year = year_add_months(dts, months) + dts.month = month_add_months(dts, months) + + dts.day = min(dts.day, get_days_in_month(dts.year, dts.month)) + out[i] = dtstruct_to_dt64(&dts) + elif day_opt in ["start", "end", "business_start", "business_end"]: + _shift_months(dtindex, out, count, months, day_opt) + else: + raise ValueError("day must be None, 'start', 'end', " + "'business_start', or 'business_end'") + + return np.asarray(out) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline void _shift_months(const int64_t[:] dtindex, + int64_t[:] out, + Py_ssize_t count, + int months, + str day_opt) nogil: + """ + See shift_months.__doc__ + """ + cdef: + Py_ssize_t i + int months_to_roll + npy_datetimestruct dts + + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(dtindex[i], &dts) + months_to_roll = months + + months_to_roll = _roll_qtrday(&dts, months_to_roll, 0, day_opt) + + dts.year = year_add_months(dts, months_to_roll) + dts.month = month_add_months(dts, months_to_roll) + dts.day = get_day_of_month(&dts, day_opt) + + out[i] = dtstruct_to_dt64(&dts) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline void _shift_quarters(const int64_t[:] dtindex, + int64_t[:] out, + Py_ssize_t count, + int quarters, + int q1start_month, + str day_opt, + int modby) nogil: + """ + See shift_quarters.__doc__ + """ + cdef: + Py_ssize_t i + int months_since, n + npy_datetimestruct dts + + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(dtindex[i], &dts) + n = quarters + + months_since = (dts.month - q1start_month) % modby + n = _roll_qtrday(&dts, n, months_since, day_opt) + + dts.year = year_add_months(dts, modby * n - months_since) + dts.month = month_add_months(dts, modby * n - months_since) + dts.day = get_day_of_month(&dts, day_opt) + + out[i] = dtstruct_to_dt64(&dts) + + +cdef ndarray[int64_t] _shift_bdays(const int64_t[:] i8other, int periods): + """ + Implementation of BusinessDay.apply_offset. + + Parameters + ---------- + i8other : const int64_t[:] + periods : int + + Returns + ------- + ndarray[int64_t] + """ + cdef: + Py_ssize_t i, n = len(i8other) + int64_t[:] result = np.empty(n, dtype="i8") + int64_t val, res + int wday, nadj, days + npy_datetimestruct dts + + for i in range(n): + val = i8other[i] + if val == NPY_NAT: + result[i] = NPY_NAT + else: + # The rest of this is effectively a copy of BusinessDay.apply + nadj = periods + weeks = nadj // 5 + dt64_to_dtstruct(val, &dts) + wday = dayofweek(dts.year, dts.month, dts.day) + + if nadj <= 0 and wday > 4: + # roll forward + nadj += 1 + + nadj -= 5 * weeks + + # nadj is always >= 0 at this point + if nadj == 0 and wday > 4: + # roll back + days = 4 - wday + elif wday > 4: + # roll forward + days = (7 - wday) + (nadj - 1) + elif wday + nadj <= 4: + # shift by n days without leaving the current week + days = nadj + else: + # shift by nadj days plus 2 to get past the weekend + days = nadj + 2 + + res = val + (7 * weeks + days) * DAY_NANOS + result[i] = res + + return result.base + + +def shift_month(stamp: datetime, months: int, day_opt: object = None) -> datetime: + """ + Given a datetime (or Timestamp) `stamp`, an integer `months` and an + option `day_opt`, return a new datetimelike that many months later, + with day determined by `day_opt` using relativedelta semantics. + + Scalar analogue of shift_months + + Parameters + ---------- + stamp : datetime or Timestamp + months : int + day_opt : None, 'start', 'end', 'business_start', 'business_end', or int + None: returned datetimelike has the same day as the input, or the + last day of the month if the new month is too short + 'start': returned datetimelike has day=1 + 'end': returned datetimelike has day on the last day of the month + 'business_start': returned datetimelike has day on the first + business day of the month + 'business_end': returned datetimelike has day on the last + business day of the month + int: returned datetimelike has day equal to day_opt + + Returns + ------- + shifted : datetime or Timestamp (same as input `stamp`) + """ + cdef: + int year, month, day + int days_in_month, dy + + dy = (stamp.month + months) // 12 + month = (stamp.month + months) % 12 + + if month == 0: + month = 12 + dy -= 1 + year = stamp.year + dy + + if day_opt is None: + days_in_month = get_days_in_month(year, month) + day = min(stamp.day, days_in_month) + elif day_opt == "start": + day = 1 + elif day_opt == "end": + day = get_days_in_month(year, month) + elif day_opt == "business_start": + # first business day of month + day = get_firstbday(year, month) + elif day_opt == "business_end": + # last business day of month + day = get_lastbday(year, month) + elif is_integer_object(day_opt): + days_in_month = get_days_in_month(year, month) + day = min(day_opt, days_in_month) + else: + raise ValueError(day_opt) + return stamp.replace(year=year, month=month, day=day) + + +cdef inline int get_day_of_month(npy_datetimestruct* dts, str day_opt) nogil: + """ + Find the day in `other`'s month that satisfies a DateOffset's is_on_offset + policy, as described by the `day_opt` argument. + + Parameters + ---------- + dts : npy_datetimestruct* + day_opt : {'start', 'end', 'business_start', 'business_end'} + 'start': returns 1 + 'end': returns last day of the month + 'business_start': returns the first business day of the month + 'business_end': returns the last business day of the month + + Returns + ------- + day_of_month : int + + Examples + ------- + >>> other = datetime(2017, 11, 14) + >>> get_day_of_month(other, 'start') + 1 + >>> get_day_of_month(other, 'end') + 30 + + Notes + ----- + Caller is responsible for ensuring one of the four accepted day_opt values + is passed. + """ + + if day_opt == "start": + return 1 + elif day_opt == "end": + return get_days_in_month(dts.year, dts.month) + elif day_opt == "business_start": + # first business day of month + return get_firstbday(dts.year, dts.month) + else: + # i.e. day_opt == "business_end": + # last business day of month + return get_lastbday(dts.year, dts.month) + + +cpdef int roll_convention(int other, int n, int compare) nogil: + """ + Possibly increment or decrement the number of periods to shift + based on rollforward/rollbackward conventions. + + Parameters + ---------- + other : int, generally the day component of a datetime + n : number of periods to increment, before adjusting for rolling + compare : int, generally the day component of a datetime, in the same + month as the datetime form which `other` was taken. + + Returns + ------- + n : int number of periods to increment + """ + if n > 0 and other < compare: + n -= 1 + elif n <= 0 and other > compare: + # as if rolled forward already + n += 1 + return n + + +def roll_qtrday(other: datetime, n: int, month: int, + day_opt: str, modby: int) -> int: + """ + Possibly increment or decrement the number of periods to shift + based on rollforward/rollbackward conventions. + + Parameters + ---------- + other : datetime or Timestamp + n : number of periods to increment, before adjusting for rolling + month : int reference month giving the first month of the year + day_opt : {'start', 'end', 'business_start', 'business_end'} + The convention to use in finding the day in a given month against + which to compare for rollforward/rollbackward decisions. + modby : int 3 for quarters, 12 for years + + Returns + ------- + n : int number of periods to increment + + See Also + -------- + get_day_of_month : Find the day in a month provided an offset. + """ + cdef: + int months_since + npy_datetimestruct dts + + if day_opt not in ["start", "end", "business_start", "business_end"]: + raise ValueError(day_opt) + + pydate_to_dtstruct(other, &dts) + + if modby == 12: + # We care about the month-of-year, not month-of-quarter, so skip mod + months_since = other.month - month + else: + months_since = other.month % modby - month % modby + + return _roll_qtrday(&dts, n, months_since, day_opt) + + +cdef inline int _roll_qtrday(npy_datetimestruct* dts, + int n, + int months_since, + str day_opt) nogil except? -1: + """ + See roll_qtrday.__doc__ + """ + + if n > 0: + if months_since < 0 or (months_since == 0 and + dts.day < get_day_of_month(dts, day_opt)): + # pretend to roll back if on same month but + # before compare_day + n -= 1 + else: + if months_since > 0 or (months_since == 0 and + dts.day > get_day_of_month(dts, day_opt)): + # make sure to roll forward, so negate + n += 1 + return n diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/parsing.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/parsing.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..1e8b722 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/parsing.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/parsing.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/parsing.pxd new file mode 100644 index 0000000..25667f0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/parsing.pxd @@ -0,0 +1,3 @@ + +cpdef str get_rule_month(str source) +cpdef quarter_to_myear(int year, int quarter, str freq) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/parsing.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/parsing.pyi new file mode 100644 index 0000000..6a96b05 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/parsing.pyi @@ -0,0 +1,62 @@ +from datetime import datetime + +import numpy as np + +from pandas._libs.tslibs.offsets import BaseOffset +from pandas._typing import npt + +class DateParseError(ValueError): ... + +def parse_datetime_string( + date_string: str, + dayfirst: bool = ..., + yearfirst: bool = ..., + **kwargs, +) -> datetime: ... +def parse_time_string( + arg: str, + freq: BaseOffset | str | None = ..., + dayfirst: bool | None = ..., + yearfirst: bool | None = ..., +) -> tuple[datetime, str]: ... +def _does_string_look_like_datetime(py_string: str) -> bool: ... +def quarter_to_myear(year: int, quarter: int, freq: str) -> tuple[int, int]: ... +def try_parse_dates( + values: npt.NDArray[np.object_], # object[:] + parser=..., + dayfirst: bool = ..., + default: datetime | None = ..., +) -> npt.NDArray[np.object_]: ... +def try_parse_date_and_time( + dates: npt.NDArray[np.object_], # object[:] + times: npt.NDArray[np.object_], # object[:] + date_parser=..., + time_parser=..., + dayfirst: bool = ..., + default: datetime | None = ..., +) -> npt.NDArray[np.object_]: ... +def try_parse_year_month_day( + years: npt.NDArray[np.object_], # object[:] + months: npt.NDArray[np.object_], # object[:] + days: npt.NDArray[np.object_], # object[:] +) -> npt.NDArray[np.object_]: ... +def try_parse_datetime_components( + years: npt.NDArray[np.object_], # object[:] + months: npt.NDArray[np.object_], # object[:] + days: npt.NDArray[np.object_], # object[:] + hours: npt.NDArray[np.object_], # object[:] + minutes: npt.NDArray[np.object_], # object[:] + seconds: npt.NDArray[np.object_], # object[:] +) -> npt.NDArray[np.object_]: ... +def format_is_iso(f: str) -> bool: ... +def guess_datetime_format( + dt_str, + dayfirst: bool = ..., + dt_str_parse=..., + dt_str_split=..., +) -> str | None: ... +def concat_date_cols( + date_cols: tuple, + keep_trivial_numbers: bool = ..., +) -> npt.NDArray[np.object_]: ... +def get_rule_month(source: str) -> str: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/parsing.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/parsing.pyx new file mode 100644 index 0000000..f2b4806 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/parsing.pyx @@ -0,0 +1,1155 @@ +""" +Parsing functions for datetime and datetime-like strings. +""" +import re +import time +import warnings + +from libc.string cimport strchr + +import cython +from cython import Py_ssize_t + +from cpython.datetime cimport ( + datetime, + datetime_new, + import_datetime, + tzinfo, +) +from cpython.object cimport PyObject_Str +from cpython.version cimport PY_VERSION_HEX + +import_datetime() + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + PyArray_GETITEM, + PyArray_ITER_DATA, + PyArray_ITER_NEXT, + PyArray_IterNew, + flatiter, + float64_t, +) + +cnp.import_array() + +# dateutil compat + +from dateutil.parser import ( + DEFAULTPARSER, + parse as du_parse, +) +from dateutil.relativedelta import relativedelta +from dateutil.tz import ( + tzlocal as _dateutil_tzlocal, + tzoffset, + tzstr as _dateutil_tzstr, + tzutc as _dateutil_tzutc, +) + +from pandas._config import get_option + +from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS +from pandas._libs.tslibs.nattype cimport ( + c_NaT as NaT, + c_nat_strings as nat_strings, +) +from pandas._libs.tslibs.offsets cimport is_offset_object +from pandas._libs.tslibs.util cimport ( + get_c_string_buf_and_size, + is_array, +) + + +cdef extern from "../src/headers/portable.h": + int getdigit_ascii(char c, int default) nogil + +cdef extern from "../src/parser/tokenizer.h": + double xstrtod(const char *p, char **q, char decimal, char sci, char tsep, + int skip_trailing, int *error, int *maybe_int) + + +# ---------------------------------------------------------------------- +# Constants + + +class DateParseError(ValueError): + pass + + +_DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0, + second=0, microsecond=0) + +PARSING_WARNING_MSG = ( + "Parsing '{date_string}' in {format} format. Provide format " + "or specify infer_datetime_format=True for consistent parsing." +) + +cdef: + set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'} + +# ---------------------------------------------------------------------- +cdef: + const char* delimiters = " /-." + int MAX_DAYS_IN_MONTH = 31, MAX_MONTH = 12 + + +cdef inline bint _is_not_delimiter(const char ch): + return strchr(delimiters, ch) == NULL + + +cdef inline int _parse_2digit(const char* s): + cdef int result = 0 + result += getdigit_ascii(s[0], -10) * 10 + result += getdigit_ascii(s[1], -100) * 1 + return result + + +cdef inline int _parse_4digit(const char* s): + cdef int result = 0 + result += getdigit_ascii(s[0], -10) * 1000 + result += getdigit_ascii(s[1], -100) * 100 + result += getdigit_ascii(s[2], -1000) * 10 + result += getdigit_ascii(s[3], -10000) * 1 + return result + + +cdef inline object _parse_delimited_date(str date_string, bint dayfirst): + """ + Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY. + + At the beginning function tries to parse date in MM/DD/YYYY format, but + if month > 12 - in DD/MM/YYYY (`dayfirst == False`). + With `dayfirst == True` function makes an attempt to parse date in + DD/MM/YYYY, if an attempt is wrong - in DD/MM/YYYY + + For MM/DD/YYYY, DD/MM/YYYY: delimiter can be a space or one of /-. + For MM/YYYY: delimiter can be a space or one of /- + If `date_string` can't be converted to date, then function returns + None, None + + Parameters + ---------- + date_string : str + dayfirst : bool + + Returns: + -------- + datetime or None + str or None + Describing resolution of the parsed string. + """ + cdef: + const char* buf + Py_ssize_t length + int day = 1, month = 1, year + bint can_swap = 0 + + buf = get_c_string_buf_and_size(date_string, &length) + if length == 10: + # parsing MM?DD?YYYY and DD?MM?YYYY dates + if _is_not_delimiter(buf[2]) or _is_not_delimiter(buf[5]): + return None, None + month = _parse_2digit(buf) + day = _parse_2digit(buf + 3) + year = _parse_4digit(buf + 6) + reso = 'day' + can_swap = 1 + elif length == 7: + # parsing MM?YYYY dates + if buf[2] == b'.' or _is_not_delimiter(buf[2]): + # we cannot reliably tell whether e.g. 10.2010 is a float + # or a date, thus we refuse to parse it here + return None, None + month = _parse_2digit(buf) + year = _parse_4digit(buf + 3) + reso = 'month' + else: + return None, None + + if month < 0 or day < 0 or year < 1000: + # some part is not an integer, so + # date_string can't be converted to date, above format + return None, None + + swapped_day_and_month = False + if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \ + and (month <= MAX_MONTH or day <= MAX_MONTH): + if (month > MAX_MONTH or (day <= MAX_MONTH and dayfirst)) and can_swap: + day, month = month, day + swapped_day_and_month = True + if dayfirst and not swapped_day_and_month: + warnings.warn( + PARSING_WARNING_MSG.format( + date_string=date_string, + format='MM/DD/YYYY' + ), + stacklevel=4, + ) + elif not dayfirst and swapped_day_and_month: + warnings.warn( + PARSING_WARNING_MSG.format( + date_string=date_string, + format='DD/MM/YYYY' + ), + stacklevel=4, + ) + if PY_VERSION_HEX >= 0x03060100: + # In Python <= 3.6.0 there is no range checking for invalid dates + # in C api, thus we call faster C version for 3.6.1 or newer + return datetime_new(year, month, day, 0, 0, 0, 0, None), reso + return datetime(year, month, day, 0, 0, 0, 0, None), reso + + raise DateParseError(f"Invalid date specified ({month}/{day})") + + +cdef inline bint does_string_look_like_time(str parse_string): + """ + Checks whether given string is a time: it has to start either from + H:MM or from HH:MM, and hour and minute values must be valid. + + Parameters + ---------- + parse_string : str + + Returns: + -------- + bool + Whether given string is potentially a time. + """ + cdef: + const char* buf + Py_ssize_t length + int hour = -1, minute = -1 + + buf = get_c_string_buf_and_size(parse_string, &length) + if length >= 4: + if buf[1] == b':': + # h:MM format + hour = getdigit_ascii(buf[0], -1) + minute = _parse_2digit(buf + 2) + elif buf[2] == b':': + # HH:MM format + hour = _parse_2digit(buf) + minute = _parse_2digit(buf + 3) + + return 0 <= hour <= 23 and 0 <= minute <= 59 + + +def parse_datetime_string( + str date_string, + bint dayfirst=False, + bint yearfirst=False, + **kwargs, +) -> datetime: + """ + Parse datetime string, only returns datetime. + Also cares special handling matching time patterns. + + Returns + ------- + datetime + """ + + cdef: + object dt + + if not _does_string_look_like_datetime(date_string): + raise ValueError('Given date string not likely a datetime.') + + if does_string_look_like_time(date_string): + # use current datetime as default, not pass _DEFAULT_DATETIME + dt = du_parse(date_string, dayfirst=dayfirst, + yearfirst=yearfirst, **kwargs) + return dt + + dt, _ = _parse_delimited_date(date_string, dayfirst) + if dt is not None: + return dt + + try: + dt, _ = _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq=None) + return dt + except DateParseError: + raise + except ValueError: + pass + + try: + dt = du_parse(date_string, default=_DEFAULT_DATETIME, + dayfirst=dayfirst, yearfirst=yearfirst, **kwargs) + except TypeError: + # following may be raised from dateutil + # TypeError: 'NoneType' object is not iterable + raise ValueError('Given date string not likely a datetime.') + + return dt + + +def parse_time_string(arg: str, freq=None, dayfirst=None, yearfirst=None): + """ + Try hard to parse datetime string, leveraging dateutil plus some extra + goodies like quarter recognition. + + Parameters + ---------- + arg : str + freq : str or DateOffset, default None + Helps with interpreting time string if supplied + dayfirst : bool, default None + If None uses default from print_config + yearfirst : bool, default None + If None uses default from print_config + + Returns + ------- + datetime + str + Describing resolution of parsed string. + """ + if is_offset_object(freq): + freq = freq.rule_code + + if dayfirst is None or yearfirst is None: + if dayfirst is None: + dayfirst = get_option("display.date_dayfirst") + if yearfirst is None: + yearfirst = get_option("display.date_yearfirst") + + res = parse_datetime_string_with_reso(arg, freq=freq, + dayfirst=dayfirst, + yearfirst=yearfirst) + return res + + +cdef parse_datetime_string_with_reso( + str date_string, str freq=None, bint dayfirst=False, bint yearfirst=False, +): + """ + Parse datetime string and try to identify its resolution. + + Returns + ------- + datetime + str + Inferred resolution of the parsed string. + + Raises + ------ + ValueError : preliminary check suggests string is not datetime + DateParseError : error within dateutil + """ + cdef: + object parsed, reso + + if not _does_string_look_like_datetime(date_string): + raise ValueError('Given date string not likely a datetime.') + + parsed, reso = _parse_delimited_date(date_string, dayfirst) + if parsed is not None: + return parsed, reso + + try: + return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq) + except DateParseError: + raise + except ValueError: + pass + + try: + parsed, reso = dateutil_parse(date_string, _DEFAULT_DATETIME, + dayfirst=dayfirst, yearfirst=yearfirst, + ignoretz=False, tzinfos=None) + except (ValueError, OverflowError) as err: + # TODO: allow raise of errors within instead + raise DateParseError(err) + if parsed is None: + raise DateParseError(f"Could not parse {date_string}") + return parsed, reso + + +cpdef bint _does_string_look_like_datetime(str py_string): + """ + Checks whether given string is a datetime: it has to start with '0' or + be greater than 1000. + + Parameters + ---------- + py_string: str + + Returns + ------- + bool + Whether given string is potentially a datetime. + """ + cdef: + const char *buf + char *endptr = NULL + Py_ssize_t length = -1 + double converted_date + char first + int error = 0 + + buf = get_c_string_buf_and_size(py_string, &length) + if length >= 1: + first = buf[0] + if first == b'0': + # Strings starting with 0 are more consistent with a + # date-like string than a number + return True + elif py_string in _not_datelike_strings: + return False + else: + # xstrtod with such parameters copies behavior of python `float` + # cast; for example, " 35.e-1 " is valid string for this cast so, + # for correctly xstrtod call necessary to pass these params: + # b'.' - a dot is used as separator, b'e' - an exponential form of + # a float number can be used, b'\0' - not to use a thousand + # separator, 1 - skip extra spaces before and after, + converted_date = xstrtod(buf, &endptr, + b'.', b'e', b'\0', 1, &error, NULL) + # if there were no errors and the whole line was parsed, then ... + if error == 0 and endptr == buf + length: + return converted_date >= 1000 + + return True + + +cdef inline object _parse_dateabbr_string(object date_string, datetime default, + str freq=None): + cdef: + object ret + # year initialized to prevent compiler warnings + int year = -1, quarter = -1, month, mnum + Py_ssize_t date_len + + # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1 + assert isinstance(date_string, str) + + if date_string in nat_strings: + return NaT, '' + + date_string = date_string.upper() + date_len = len(date_string) + + if date_len == 4: + # parse year only like 2000 + try: + ret = default.replace(year=int(date_string)) + return ret, 'year' + except ValueError: + pass + + try: + if 4 <= date_len <= 7: + i = date_string.index('Q', 1, 6) + if i == 1: + quarter = int(date_string[0]) + if date_len == 4 or (date_len == 5 + and date_string[i + 1] == '-'): + # r'(\d)Q-?(\d\d)') + year = 2000 + int(date_string[-2:]) + elif date_len == 6 or (date_len == 7 + and date_string[i + 1] == '-'): + # r'(\d)Q-?(\d\d\d\d)') + year = int(date_string[-4:]) + else: + raise ValueError + elif i == 2 or i == 3: + # r'(\d\d)-?Q(\d)' + if date_len == 4 or (date_len == 5 + and date_string[i - 1] == '-'): + quarter = int(date_string[-1]) + year = 2000 + int(date_string[:2]) + else: + raise ValueError + elif i == 4 or i == 5: + if date_len == 6 or (date_len == 7 + and date_string[i - 1] == '-'): + # r'(\d\d\d\d)-?Q(\d)' + quarter = int(date_string[-1]) + year = int(date_string[:4]) + else: + raise ValueError + + if not (1 <= quarter <= 4): + raise DateParseError(f'Incorrect quarterly string is given, ' + f'quarter must be ' + f'between 1 and 4: {date_string}') + + try: + # GH#1228 + year, month = quarter_to_myear(year, quarter, freq) + except KeyError: + raise DateParseError("Unable to retrieve month " + "information from given " + f"freq: {freq}") + + ret = default.replace(year=year, month=month) + return ret, 'quarter' + + except DateParseError: + raise + except ValueError: + pass + + if date_len == 6 and freq == 'M': + year = int(date_string[:4]) + month = int(date_string[4:6]) + try: + ret = default.replace(year=year, month=month) + return ret, 'month' + except ValueError: + pass + + for pat in ['%Y-%m', '%b %Y', '%b-%Y']: + try: + ret = datetime.strptime(date_string, pat) + return ret, 'month' + except ValueError: + pass + + raise ValueError(f'Unable to parse {date_string}') + + +cpdef quarter_to_myear(int year, int quarter, str freq): + """ + A quarterly frequency defines a "year" which may not coincide with + the calendar-year. Find the calendar-year and calendar-month associated + with the given year and quarter under the `freq`-derived calendar. + + Parameters + ---------- + year : int + quarter : int + freq : str or None + + Returns + ------- + year : int + month : int + + See Also + -------- + Period.qyear + """ + if quarter <= 0 or quarter > 4: + raise ValueError("Quarter must be 1 <= q <= 4") + + if freq is not None: + mnum = c_MONTH_NUMBERS[get_rule_month(freq)] + 1 + month = (mnum + (quarter - 1) * 3) % 12 + 1 + if month > mnum: + year -= 1 + else: + month = (quarter - 1) * 3 + 1 + + return year, month + + +cdef dateutil_parse( + str timestr, + object default, + bint ignoretz=False, + object tzinfos=None, + bint dayfirst=False, + bint yearfirst=False, +): + """ lifted from dateutil to get resolution""" + + cdef: + object res, attr, ret, tzdata + object reso = None + dict repl = {} + + res, _ = DEFAULTPARSER._parse(timestr, dayfirst=dayfirst, yearfirst=yearfirst) + + if res is None: + raise ValueError(f"Unknown datetime string format, unable to parse: {timestr}") + + for attr in ["year", "month", "day", "hour", + "minute", "second", "microsecond"]: + value = getattr(res, attr) + if value is not None: + repl[attr] = value + reso = attr + + if reso is None: + raise ValueError(f"Unable to parse datetime string: {timestr}") + + if reso == 'microsecond': + if repl['microsecond'] == 0: + reso = 'second' + elif repl['microsecond'] % 1000 == 0: + reso = 'millisecond' + + ret = default.replace(**repl) + if res.weekday is not None and not res.day: + ret = ret + relativedelta.relativedelta(weekday=res.weekday) + if not ignoretz: + if callable(tzinfos) or tzinfos and res.tzname in tzinfos: + # Note: as of 1.0 this is not reached because + # we never pass tzinfos, see GH#22234 + if callable(tzinfos): + tzdata = tzinfos(res.tzname, res.tzoffset) + else: + tzdata = tzinfos.get(res.tzname) + if isinstance(tzdata, tzinfo): + new_tzinfo = tzdata + elif isinstance(tzdata, str): + new_tzinfo = _dateutil_tzstr(tzdata) + elif isinstance(tzdata, int): + new_tzinfo = tzoffset(res.tzname, tzdata) + else: + raise ValueError("offset must be tzinfo subclass, " + "tz string, or int offset") + ret = ret.replace(tzinfo=new_tzinfo) + elif res.tzname and res.tzname in time.tzname: + ret = ret.replace(tzinfo=_dateutil_tzlocal()) + elif res.tzoffset == 0: + ret = ret.replace(tzinfo=_dateutil_tzutc()) + elif res.tzoffset: + ret = ret.replace(tzinfo=tzoffset(res.tzname, res.tzoffset)) + return ret, reso + + +# ---------------------------------------------------------------------- +# Parsing for type-inference + + +def try_parse_dates( + object[:] values, parser=None, bint dayfirst=False, default=None, +) -> np.ndarray: + cdef: + Py_ssize_t i, n + object[:] result + + n = len(values) + result = np.empty(n, dtype='O') + + if parser is None: + if default is None: # GH2618 + date = datetime.now() + default = datetime(date.year, date.month, 1) + + parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) + + # EAFP here + try: + for i in range(n): + if values[i] == '': + result[i] = np.nan + else: + result[i] = parse_date(values[i]) + except Exception: + # Since parser is user-defined, we can't guess what it might raise + return values + else: + parse_date = parser + + for i in range(n): + if values[i] == '': + result[i] = np.nan + else: + result[i] = parse_date(values[i]) + + return result.base # .base to access underlying ndarray + + +def try_parse_date_and_time( + object[:] dates, + object[:] times, + date_parser=None, + time_parser=None, + bint dayfirst=False, + default=None, +) -> np.ndarray: + cdef: + Py_ssize_t i, n + object[:] result + + n = len(dates) + # TODO(cython3): Use len instead of `shape[0]` + if times.shape[0] != n: + raise ValueError('Length of dates and times must be equal') + result = np.empty(n, dtype='O') + + if date_parser is None: + if default is None: # GH2618 + date = datetime.now() + default = datetime(date.year, date.month, 1) + + parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) + + else: + parse_date = date_parser + + if time_parser is None: + parse_time = lambda x: du_parse(x) + + else: + parse_time = time_parser + + for i in range(n): + d = parse_date(str(dates[i])) + t = parse_time(str(times[i])) + result[i] = datetime(d.year, d.month, d.day, + t.hour, t.minute, t.second) + + return result.base # .base to access underlying ndarray + + +def try_parse_year_month_day( + object[:] years, object[:] months, object[:] days +) -> np.ndarray: + cdef: + Py_ssize_t i, n + object[:] result + + n = len(years) + # TODO(cython3): Use len instead of `shape[0]` + if months.shape[0] != n or days.shape[0] != n: + raise ValueError('Length of years/months/days must all be equal') + result = np.empty(n, dtype='O') + + for i in range(n): + result[i] = datetime(int(years[i]), int(months[i]), int(days[i])) + + return result.base # .base to access underlying ndarray + + +def try_parse_datetime_components(object[:] years, + object[:] months, + object[:] days, + object[:] hours, + object[:] minutes, + object[:] seconds) -> np.ndarray: + + cdef: + Py_ssize_t i, n + object[:] result + int secs + double float_secs + double micros + + n = len(years) + # TODO(cython3): Use len instead of `shape[0]` + if ( + months.shape[0] != n + or days.shape[0] != n + or hours.shape[0] != n + or minutes.shape[0] != n + or seconds.shape[0] != n + ): + raise ValueError('Length of all datetime components must be equal') + result = np.empty(n, dtype='O') + + for i in range(n): + float_secs = float(seconds[i]) + secs = int(float_secs) + + micros = float_secs - secs + if micros > 0: + micros = micros * 1000000 + + result[i] = datetime(int(years[i]), int(months[i]), int(days[i]), + int(hours[i]), int(minutes[i]), secs, + int(micros)) + + return result.base # .base to access underlying ndarray + + +# ---------------------------------------------------------------------- +# Miscellaneous + + +# Class copied verbatim from https://github.com/dateutil/dateutil/pull/732 +# +# We use this class to parse and tokenize date strings. However, as it is +# a private class in the dateutil library, relying on backwards compatibility +# is not practical. In fact, using this class issues warnings (xref gh-21322). +# Thus, we port the class over so that both issues are resolved. +# +# Copyright (c) 2017 - dateutil contributors +class _timelex: + def __init__(self, instream): + if getattr(instream, 'decode', None) is not None: + instream = instream.decode() + + if isinstance(instream, str): + self.stream = instream + elif getattr(instream, 'read', None) is None: + raise TypeError( + 'Parser must be a string or character stream, not ' + f'{type(instream).__name__}') + else: + self.stream = instream.read() + + def get_tokens(self): + """ + This function breaks the time string into lexical units (tokens), which + can be parsed by the parser. Lexical units are demarcated by changes in + the character set, so any continuous string of letters is considered + one unit, any continuous string of numbers is considered one unit. + The main complication arises from the fact that dots ('.') can be used + both as separators (e.g. "Sep.20.2009") or decimal points (e.g. + "4:30:21.447"). As such, it is necessary to read the full context of + any dot-separated strings before breaking it into tokens; as such, this + function maintains a "token stack", for when the ambiguous context + demands that multiple tokens be parsed at once. + """ + cdef: + Py_ssize_t n + + stream = self.stream.replace('\x00', '') + + # TODO: Change \s --> \s+ (this doesn't match existing behavior) + # TODO: change the punctuation block to punc+ (does not match existing) + # TODO: can we merge the two digit patterns? + tokens = re.findall(r"\s|" + r"(? bint: + """ + Does format match the iso8601 set that can be handled by the C parser? + Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different + but must be consistent. Leading 0s in dates and times are optional. + """ + iso_template = '%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}'.format + excluded_formats = ['%Y%m%d', '%Y%m', '%Y'] + + for date_sep in [' ', '/', '\\', '-', '.', '']: + for time_sep in [' ', 'T']: + for micro_or_tz in ['', '%z', '%Z', '.%f', '.%f%z', '.%f%Z']: + if (iso_template(date_sep=date_sep, + time_sep=time_sep, + micro_or_tz=micro_or_tz, + ).startswith(f) and f not in excluded_formats): + return True + return False + + +def guess_datetime_format( + dt_str, + bint dayfirst=False, + dt_str_parse=du_parse, + dt_str_split=_DATEUTIL_LEXER_SPLIT, +): + """ + Guess the datetime format of a given datetime string. + + Parameters + ---------- + dt_str : str + Datetime string to guess the format of. + dayfirst : bool, default False + If True parses dates with the day first, eg 20/01/2005 + Warning: dayfirst=True is not strict, but will prefer to parse + with day first (this is a known bug). + dt_str_parse : function, defaults to `dateutil.parser.parse` + This function should take in a datetime string and return + a `datetime.datetime` guess that the datetime string represents + dt_str_split : function, defaults to `_DATEUTIL_LEXER_SPLIT` (dateutil) + This function should take in a datetime string and return + a list of strings, the guess of the various specific parts + e.g. '2011/12/30' -> ['2011', '/', '12', '/', '30'] + + Returns + ------- + ret : datetime format string (for `strftime` or `strptime`) + """ + if dt_str_parse is None or dt_str_split is None: + return None + + if not isinstance(dt_str, str): + return None + + day_attribute_and_format = (('day',), '%d', 2) + + # attr name, format, padding (if any) + datetime_attrs_to_format = [ + (('year', 'month', 'day'), '%Y%m%d', 0), + (('year',), '%Y', 0), + (('month',), '%B', 0), + (('month',), '%b', 0), + (('month',), '%m', 2), + day_attribute_and_format, + (('hour',), '%H', 2), + (('minute',), '%M', 2), + (('second',), '%S', 2), + (('microsecond',), '%f', 6), + (('second', 'microsecond'), '%S.%f', 0), + (('tzinfo',), '%z', 0), + (('tzinfo',), '%Z', 0), + (('day_of_week',), '%a', 0), + (('day_of_week',), '%A', 0), + (('meridiem',), '%p', 0), + ] + + if dayfirst: + datetime_attrs_to_format.remove(day_attribute_and_format) + datetime_attrs_to_format.insert(0, day_attribute_and_format) + + try: + parsed_datetime = dt_str_parse(dt_str, dayfirst=dayfirst) + except (ValueError, OverflowError): + # In case the datetime can't be parsed, its format cannot be guessed + return None + + if parsed_datetime is None: + return None + + # the default dt_str_split from dateutil will never raise here; we assume + # that any user-provided function will not either. + tokens = dt_str_split(dt_str) + + # Normalize offset part of tokens. + # There are multiple formats for the timezone offset. + # To pass the comparison condition between the output of `strftime` and + # joined tokens, which is carried out at the final step of the function, + # the offset part of the tokens must match the '%z' format like '+0900' + # instead of ‘+09:00’. + if parsed_datetime.tzinfo is not None: + offset_index = None + if len(tokens) > 0 and tokens[-1] == 'Z': + # the last 'Z' means zero offset + offset_index = -1 + elif len(tokens) > 1 and tokens[-2] in ('+', '-'): + # ex. [..., '+', '0900'] + offset_index = -2 + elif len(tokens) > 3 and tokens[-4] in ('+', '-'): + # ex. [..., '+', '09', ':', '00'] + offset_index = -4 + + if offset_index is not None: + # If the input string has a timezone offset like '+0900', + # the offset is separated into two tokens, ex. ['+', '0900’]. + # This separation will prevent subsequent processing + # from correctly parsing the time zone format. + # So in addition to the format nomalization, we rejoin them here. + tokens[offset_index] = parsed_datetime.strftime("%z") + tokens = tokens[:offset_index + 1 or None] + + format_guess = [None] * len(tokens) + found_attrs = set() + + for attrs, attr_format, padding in datetime_attrs_to_format: + # If a given attribute has been placed in the format string, skip + # over other formats for that same underlying attribute (IE, month + # can be represented in multiple different ways) + if set(attrs) & found_attrs: + continue + + if parsed_datetime.tzinfo is None and attr_format in ("%Z", "%z"): + continue + + parsed_formatted = parsed_datetime.strftime(attr_format) + for i, token_format in enumerate(format_guess): + token_filled = tokens[i].zfill(padding) + if token_format is None and token_filled == parsed_formatted: + format_guess[i] = attr_format + tokens[i] = token_filled + found_attrs.update(attrs) + break + + # Only consider it a valid guess if we have a year, month and day + if len({'year', 'month', 'day'} & found_attrs) != 3: + return None + + output_format = [] + for i, guess in enumerate(format_guess): + if guess is not None: + # Either fill in the format placeholder (like %Y) + output_format.append(guess) + else: + # Or just the token separate (IE, the dashes in "01-01-2013") + try: + # If the token is numeric, then we likely didn't parse it + # properly, so our guess is wrong + float(tokens[i]) + return None + except ValueError: + pass + + output_format.append(tokens[i]) + + guessed_format = ''.join(output_format) + + # rebuild string, capturing any inferred padding + dt_str = ''.join(tokens) + if parsed_datetime.strftime(guessed_format) == dt_str: + return guessed_format + else: + return None + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline object convert_to_unicode(object item, bint keep_trivial_numbers): + """ + Convert `item` to str. + + Parameters + ---------- + item : object + keep_trivial_numbers : bool + if True, then conversion (to string from integer/float zero) + is not performed + + Returns + ------- + str or int or float + """ + cdef: + float64_t float_item + + if keep_trivial_numbers: + if isinstance(item, int): + if item == 0: + return item + elif isinstance(item, float): + float_item = item + if float_item == 0.0 or float_item != float_item: + return item + + if not isinstance(item, str): + item = PyObject_Str(item) + + return item + + +@cython.wraparound(False) +@cython.boundscheck(False) +def concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True) -> np.ndarray: + """ + Concatenates elements from numpy arrays in `date_cols` into strings. + + Parameters + ---------- + date_cols : tuple[ndarray] + keep_trivial_numbers : bool, default True + if True and len(date_cols) == 1, then + conversion (to string from integer/float zero) is not performed + + Returns + ------- + arr_of_rows : ndarray[object] + + Examples + -------- + >>> dates=np.array(['3/31/2019', '4/31/2019'], dtype=object) + >>> times=np.array(['11:20', '10:45'], dtype=object) + >>> result = concat_date_cols((dates, times)) + >>> result + array(['3/31/2019 11:20', '4/31/2019 10:45'], dtype=object) + """ + cdef: + Py_ssize_t rows_count = 0, col_count = len(date_cols) + Py_ssize_t col_idx, row_idx + list list_to_join + cnp.ndarray[object] iters + object[::1] iters_view + flatiter it + cnp.ndarray[object] result + object[:] result_view + + if col_count == 0: + return np.zeros(0, dtype=object) + + if not all(is_array(array) for array in date_cols): + raise ValueError("not all elements from date_cols are numpy arrays") + + rows_count = min(len(array) for array in date_cols) + result = np.zeros(rows_count, dtype=object) + result_view = result + + if col_count == 1: + array = date_cols[0] + it = PyArray_IterNew(array) + for row_idx in range(rows_count): + item = PyArray_GETITEM(array, PyArray_ITER_DATA(it)) + result_view[row_idx] = convert_to_unicode(item, + keep_trivial_numbers) + PyArray_ITER_NEXT(it) + else: + # create fixed size list - more efficient memory allocation + list_to_join = [None] * col_count + iters = np.zeros(col_count, dtype=object) + + # create memoryview of iters ndarray, that will contain some + # flatiter's for each array in `date_cols` - more efficient indexing + iters_view = iters + for col_idx, array in enumerate(date_cols): + iters_view[col_idx] = PyArray_IterNew(array) + + # array elements that are on the same line are converted to one string + for row_idx in range(rows_count): + for col_idx, array in enumerate(date_cols): + # this cast is needed, because we did not find a way + # to efficiently store `flatiter` type objects in ndarray + it = iters_view[col_idx] + item = PyArray_GETITEM(array, PyArray_ITER_DATA(it)) + list_to_join[col_idx] = convert_to_unicode(item, False) + PyArray_ITER_NEXT(it) + result_view[row_idx] = " ".join(list_to_join) + + return result + + +cpdef str get_rule_month(str source): + """ + Return starting month of given freq, default is December. + + Parameters + ---------- + source : str + Derived from `freq.rule_code` or `freq.freqstr`. + + Returns + ------- + rule_month: str + + Examples + -------- + >>> get_rule_month('D') + 'DEC' + + >>> get_rule_month('A-JAN') + 'JAN' + """ + source = source.upper() + if "-" not in source: + return "DEC" + else: + return source.split("-")[1] diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/period.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/period.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..4d63966 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/period.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/period.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/period.pxd new file mode 100644 index 0000000..46c6e52 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/period.pxd @@ -0,0 +1,7 @@ +from numpy cimport int64_t + +from .np_datetime cimport npy_datetimestruct + + +cdef bint is_period_object(object obj) +cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/period.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/period.pyi new file mode 100644 index 0000000..2f60df0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/period.pyi @@ -0,0 +1,125 @@ +from typing import Literal + +import numpy as np + +from pandas._libs.tslibs.nattype import NaTType +from pandas._libs.tslibs.offsets import BaseOffset +from pandas._libs.tslibs.timestamps import Timestamp +from pandas._typing import ( + Frequency, + Timezone, + npt, +) + +INVALID_FREQ_ERR_MSG: str +DIFFERENT_FREQ: str + +class IncompatibleFrequency(ValueError): ... + +def periodarr_to_dt64arr( + periodarr: npt.NDArray[np.int64], # const int64_t[:] + freq: int, +) -> npt.NDArray[np.int64]: ... +def period_asfreq_arr( + arr: npt.NDArray[np.int64], + freq1: int, + freq2: int, + end: bool, +) -> npt.NDArray[np.int64]: ... +def get_period_field_arr( + field: str, + arr: npt.NDArray[np.int64], # const int64_t[:] + freq: int, +) -> npt.NDArray[np.int64]: ... +def from_ordinals( + values: npt.NDArray[np.int64], # const int64_t[:] + freq: Frequency, +) -> npt.NDArray[np.int64]: ... +def extract_ordinals( + values: npt.NDArray[np.object_], + freq: Frequency | int, +) -> npt.NDArray[np.int64]: ... +def extract_freq( + values: npt.NDArray[np.object_], +) -> BaseOffset: ... + +# exposed for tests +def period_asfreq(ordinal: int, freq1: int, freq2: int, end: bool) -> int: ... +def period_ordinal( + y: int, m: int, d: int, h: int, min: int, s: int, us: int, ps: int, freq: int +) -> int: ... +def freq_to_dtype_code(freq: BaseOffset) -> int: ... +def validate_end_alias(how: str) -> Literal["E", "S"]: ... + +class Period: + ordinal: int # int64_t + freq: BaseOffset + + # error: "__new__" must return a class instance (got "Union[Period, NaTType]") + def __new__( # type: ignore[misc] + cls, + value=..., + freq: int | str | None = ..., + ordinal: int | None = ..., + year: int | None = ..., + month: int | None = ..., + quarter: int | None = ..., + day: int | None = ..., + hour: int | None = ..., + minute: int | None = ..., + second: int | None = ..., + ) -> Period | NaTType: ... + @classmethod + def _maybe_convert_freq(cls, freq) -> BaseOffset: ... + @classmethod + def _from_ordinal(cls, ordinal: int, freq) -> Period: ... + @classmethod + def now(cls, freq: BaseOffset = ...) -> Period: ... + def strftime(self, fmt: str) -> str: ... + def to_timestamp( + self, + freq: str | BaseOffset | None = ..., + how: str = ..., + tz: Timezone | None = ..., + ) -> Timestamp: ... + def asfreq(self, freq: str, how: str = ...) -> Period: ... + @property + def freqstr(self) -> str: ... + @property + def is_leap_year(self) -> bool: ... + @property + def daysinmonth(self) -> int: ... + @property + def days_in_month(self) -> int: ... + @property + def qyear(self) -> int: ... + @property + def quarter(self) -> int: ... + @property + def day_of_year(self) -> int: ... + @property + def weekday(self) -> int: ... + @property + def day_of_week(self) -> int: ... + @property + def week(self) -> int: ... + @property + def weekofyear(self) -> int: ... + @property + def second(self) -> int: ... + @property + def minute(self) -> int: ... + @property + def hour(self) -> int: ... + @property + def day(self) -> int: ... + @property + def month(self) -> int: ... + @property + def year(self) -> int: ... + @property + def end_time(self) -> Timestamp: ... + @property + def start_time(self) -> Timestamp: ... + def __sub__(self, other) -> Period | BaseOffset: ... + def __add__(self, other) -> Period: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/period.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/period.pyx new file mode 100644 index 0000000..b2ea2e7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/period.pyx @@ -0,0 +1,2627 @@ +import warnings + +cimport numpy as cnp +from cpython.object cimport ( + Py_EQ, + Py_NE, + PyObject_RichCompare, + PyObject_RichCompareBool, +) +from numpy cimport ( + int64_t, + ndarray, +) + +import numpy as np + +cnp.import_array() + +from libc.stdlib cimport ( + free, + malloc, +) +from libc.string cimport ( + memset, + strlen, +) +from libc.time cimport ( + strftime, + tm, +) + +import cython + +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDateTime_IMPORT, + PyDelta_Check, + datetime, +) + +# import datetime C API +PyDateTime_IMPORT + +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + NPY_FR_D, + NPY_FR_us, + check_dts_bounds, + dt64_to_dtstruct, + dtstruct_to_dt64, + npy_datetimestruct, + pandas_datetime_to_datetimestruct, +) + + +cdef extern from "src/datetime/np_datetime.h": + int64_t npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, + npy_datetimestruct *d) nogil + +cimport pandas._libs.tslibs.util as util + +from pandas._libs.tslibs.timedeltas import Timedelta +from pandas._libs.tslibs.timestamps import Timestamp + +from pandas._libs.tslibs.ccalendar cimport ( + c_MONTH_NUMBERS, + dayofweek, + get_day_of_year, + get_days_in_month, + get_week_of_year, + is_leapyear, +) +from pandas._libs.tslibs.timedeltas cimport ( + delta_to_nanoseconds, + is_any_td_scalar, +) + +from pandas._libs.tslibs.conversion import ensure_datetime64ns + +from pandas._libs.tslibs.dtypes cimport ( + FR_ANN, + FR_BUS, + FR_DAY, + FR_HR, + FR_MIN, + FR_MS, + FR_MTH, + FR_NS, + FR_QTR, + FR_SEC, + FR_UND, + FR_US, + FR_WK, + PeriodDtypeBase, + attrname_to_abbrevs, +) +from pandas._libs.tslibs.parsing cimport quarter_to_myear + +from pandas._libs.tslibs.parsing import parse_time_string + +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + _nat_scalar_rules, + c_NaT as NaT, + c_nat_strings as nat_strings, + checknull_with_nat, +) +from pandas._libs.tslibs.offsets cimport ( + BaseOffset, + is_offset_object, + is_tick_object, + to_offset, +) + +from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG + +cdef: + enum: + INT32_MIN = -2_147_483_648LL + + +ctypedef struct asfreq_info: + int64_t intraday_conversion_factor + int is_end + int to_end + int from_end + +ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) nogil + + +cdef extern from *: + """ + // must use npy typedef b/c int64_t is aliased in cython-generated c + // unclear why we need LL for that row. + // see https://github.com/pandas-dev/pandas/pull/34416/ + static npy_int64 daytime_conversion_factor_matrix[7][7] = { + {1, 24, 1440, 86400, 86400000, 86400000000, 86400000000000}, + {0LL, 1LL, 60LL, 3600LL, 3600000LL, 3600000000LL, 3600000000000LL}, + {0, 0, 1, 60, 60000, 60000000, 60000000000}, + {0, 0, 0, 1, 1000, 1000000, 1000000000}, + {0, 0, 0, 0, 1, 1000, 1000000}, + {0, 0, 0, 0, 0, 1, 1000}, + {0, 0, 0, 0, 0, 0, 1}}; + """ + int64_t daytime_conversion_factor_matrix[7][7] + + +cdef int max_value(int left, int right) nogil: + if left > right: + return left + return right + + +cdef int min_value(int left, int right) nogil: + if left < right: + return left + return right + + +cdef int64_t get_daytime_conversion_factor(int from_index, int to_index) nogil: + cdef: + int row = min_value(from_index, to_index) + int col = max_value(from_index, to_index) + # row or col < 6 means frequency strictly lower than Daily, which + # do not use daytime_conversion_factors + if row < 6: + return 0 + elif col < 6: + return 0 + return daytime_conversion_factor_matrix[row - 6][col - 6] + + +cdef int64_t nofunc(int64_t ordinal, asfreq_info *af_info) nogil: + return INT32_MIN + + +cdef int64_t no_op(int64_t ordinal, asfreq_info *af_info) nogil: + return ordinal + + +cdef freq_conv_func get_asfreq_func(int from_freq, int to_freq) nogil: + cdef: + int from_group = get_freq_group(from_freq) + int to_group = get_freq_group(to_freq) + + if from_group == FR_UND: + from_group = FR_DAY + + if from_group == FR_BUS: + if to_group == FR_ANN: + return asfreq_BtoA + elif to_group == FR_QTR: + return asfreq_BtoQ + elif to_group == FR_MTH: + return asfreq_BtoM + elif to_group == FR_WK: + return asfreq_BtoW + elif to_group == FR_BUS: + return no_op + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_BtoDT + else: + return nofunc + + elif to_group == FR_BUS: + if from_group == FR_ANN: + return asfreq_AtoB + elif from_group == FR_QTR: + return asfreq_QtoB + elif from_group == FR_MTH: + return asfreq_MtoB + elif from_group == FR_WK: + return asfreq_WtoB + elif from_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_DTtoB + else: + return nofunc + + elif from_group == FR_ANN: + if to_group == FR_ANN: + return asfreq_AtoA + elif to_group == FR_QTR: + return asfreq_AtoQ + elif to_group == FR_MTH: + return asfreq_AtoM + elif to_group == FR_WK: + return asfreq_AtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_AtoDT + else: + return nofunc + + elif from_group == FR_QTR: + if to_group == FR_ANN: + return asfreq_QtoA + elif to_group == FR_QTR: + return asfreq_QtoQ + elif to_group == FR_MTH: + return asfreq_QtoM + elif to_group == FR_WK: + return asfreq_QtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_QtoDT + else: + return nofunc + + elif from_group == FR_MTH: + if to_group == FR_ANN: + return asfreq_MtoA + elif to_group == FR_QTR: + return asfreq_MtoQ + elif to_group == FR_MTH: + return no_op + elif to_group == FR_WK: + return asfreq_MtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_MtoDT + else: + return nofunc + + elif from_group == FR_WK: + if to_group == FR_ANN: + return asfreq_WtoA + elif to_group == FR_QTR: + return asfreq_WtoQ + elif to_group == FR_MTH: + return asfreq_WtoM + elif to_group == FR_WK: + return asfreq_WtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_WtoDT + else: + return nofunc + + elif from_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + if to_group == FR_ANN: + return asfreq_DTtoA + elif to_group == FR_QTR: + return asfreq_DTtoQ + elif to_group == FR_MTH: + return asfreq_DTtoM + elif to_group == FR_WK: + return asfreq_DTtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + if from_group > to_group: + return downsample_daytime + else: + return upsample_daytime + + else: + return nofunc + + else: + return nofunc + + +# -------------------------------------------------------------------- +# Frequency Conversion Helpers + +cdef int64_t DtoB_weekday(int64_t unix_date) nogil: + return ((unix_date + 4) // 7) * 5 + ((unix_date + 4) % 7) - 4 + + +cdef int64_t DtoB(npy_datetimestruct *dts, int roll_back, + int64_t unix_date) nogil: + # calculate the current week (counting from 1970-01-01) treating + # sunday as last day of a week + cdef: + int day_of_week = dayofweek(dts.year, dts.month, dts.day) + + if roll_back == 1: + if day_of_week > 4: + # change to friday before weekend + unix_date -= (day_of_week - 4) + else: + if day_of_week > 4: + # change to Monday after weekend + unix_date += (7 - day_of_week) + + return DtoB_weekday(unix_date) + + +cdef inline int64_t upsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil: + if af_info.is_end: + return (ordinal + 1) * af_info.intraday_conversion_factor - 1 + else: + return ordinal * af_info.intraday_conversion_factor + + +cdef inline int64_t downsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil: + return ordinal // af_info.intraday_conversion_factor + + +cdef inline int64_t transform_via_day(int64_t ordinal, + asfreq_info *af_info, + freq_conv_func first_func, + freq_conv_func second_func) nogil: + cdef: + int64_t result + + result = first_func(ordinal, af_info) + result = second_func(result, af_info) + return result + + +# -------------------------------------------------------------------- +# Conversion _to_ Daily Freq + +cdef int64_t asfreq_AtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int64_t unix_date + npy_datetimestruct dts + + ordinal += af_info.is_end + + dts.year = ordinal + 1970 + dts.month = 1 + adjust_dts_for_month(&dts, af_info.from_end) + + unix_date = unix_date_from_ymd(dts.year, dts.month, 1) + unix_date -= af_info.is_end + return upsample_daytime(unix_date, af_info) + + +cdef int64_t asfreq_QtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int64_t unix_date + npy_datetimestruct dts + + ordinal += af_info.is_end + + dts.year = ordinal // 4 + 1970 + dts.month = (ordinal % 4) * 3 + 1 + adjust_dts_for_month(&dts, af_info.from_end) + + unix_date = unix_date_from_ymd(dts.year, dts.month, 1) + unix_date -= af_info.is_end + return upsample_daytime(unix_date, af_info) + + +cdef int64_t asfreq_MtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int64_t unix_date + int year, month + + ordinal += af_info.is_end + + year = ordinal // 12 + 1970 + month = ordinal % 12 + 1 + + unix_date = unix_date_from_ymd(year, month, 1) + unix_date -= af_info.is_end + return upsample_daytime(unix_date, af_info) + + +cdef int64_t asfreq_WtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + ordinal = (ordinal * 7 + af_info.from_end - 4 + + (7 - 1) * (af_info.is_end - 1)) + return upsample_daytime(ordinal, af_info) + + +# -------------------------------------------------------------------- +# Conversion _to_ BusinessDay Freq + +cdef int64_t asfreq_AtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = asfreq_AtoDT(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + roll_back = af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +cdef int64_t asfreq_QtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = asfreq_QtoDT(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + roll_back = af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +cdef int64_t asfreq_MtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = asfreq_MtoDT(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + roll_back = af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +cdef int64_t asfreq_WtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = asfreq_WtoDT(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + roll_back = af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +cdef int64_t asfreq_DTtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = downsample_daytime(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + # This usage defines roll_back the opposite way from the others + roll_back = 1 - af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Daily Freq + +cdef int64_t asfreq_DTtoA(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + npy_datetimestruct dts + + ordinal = downsample_daytime(ordinal, af_info) + pandas_datetime_to_datetimestruct(ordinal, NPY_FR_D, &dts) + return dts_to_year_ordinal(&dts, af_info.to_end) + + +cdef int DtoQ_yq(int64_t ordinal, asfreq_info *af_info, npy_datetimestruct* dts) nogil: + cdef: + int quarter + + pandas_datetime_to_datetimestruct(ordinal, NPY_FR_D, dts) + adjust_dts_for_qtr(dts, af_info.to_end) + + quarter = month_to_quarter(dts.month) + return quarter + + +cdef int64_t asfreq_DTtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int quarter + npy_datetimestruct dts + + ordinal = downsample_daytime(ordinal, af_info) + + quarter = DtoQ_yq(ordinal, af_info, &dts) + return ((dts.year - 1970) * 4 + quarter - 1) + + +cdef int64_t asfreq_DTtoM(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + npy_datetimestruct dts + + ordinal = downsample_daytime(ordinal, af_info) + pandas_datetime_to_datetimestruct(ordinal, NPY_FR_D, &dts) + return dts_to_month_ordinal(&dts) + + +cdef int64_t asfreq_DTtoW(int64_t ordinal, asfreq_info *af_info) nogil: + ordinal = downsample_daytime(ordinal, af_info) + return unix_date_to_week(ordinal, af_info.to_end) + + +cdef int64_t unix_date_to_week(int64_t unix_date, int to_end) nogil: + return (unix_date + 3 - to_end) // 7 + 1 + + +# -------------------------------------------------------------------- +# Conversion _from_ BusinessDay Freq + +cdef int64_t asfreq_BtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + ordinal = ((ordinal + 3) // 5) * 7 + (ordinal + 3) % 5 - 3 + return upsample_daytime(ordinal, af_info) + + +cdef int64_t asfreq_BtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_BtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_BtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_BtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_BtoM(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_BtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_BtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_BtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Annual Freq + +cdef int64_t asfreq_AtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_AtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_AtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_AtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_AtoM(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_AtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_AtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_AtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Quarterly Freq + +cdef int64_t asfreq_QtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_QtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_QtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_QtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_QtoM(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_QtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_QtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_QtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Monthly Freq + +cdef int64_t asfreq_MtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_MtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_MtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_MtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_MtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_MtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Weekly Freq + +cdef int64_t asfreq_WtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_WtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_WtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_WtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_WtoM(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_WtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_WtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_WtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- + +@cython.cdivision +cdef char* c_strftime(npy_datetimestruct *dts, char *fmt): + """ + Generate a nice string representation of the period + object, originally from DateObject_strftime + + Parameters + ---------- + dts : npy_datetimestruct* + fmt : char* + + Returns + ------- + result : char* + """ + cdef: + tm c_date + char *result + int result_len = strlen(fmt) + 50 + + c_date.tm_sec = dts.sec + c_date.tm_min = dts.min + c_date.tm_hour = dts.hour + c_date.tm_mday = dts.day + c_date.tm_mon = dts.month - 1 + c_date.tm_year = dts.year - 1900 + c_date.tm_wday = (dayofweek(dts.year, dts.month, dts.day) + 1) % 7 + c_date.tm_yday = get_day_of_year(dts.year, dts.month, dts.day) - 1 + c_date.tm_isdst = -1 + + result = malloc(result_len * sizeof(char)) + + strftime(result, result_len, fmt, &c_date) + + return result + + +# ---------------------------------------------------------------------- +# Conversion between date_info and npy_datetimestruct + +cdef inline int get_freq_group(int freq) nogil: + # Note: this is equivalent to libfrequencies.get_freq_group, specialized + # to integer argument. + return (freq // 1000) * 1000 + + +cdef inline int get_freq_group_index(int freq) nogil: + return freq // 1000 + + +cdef void adjust_dts_for_month(npy_datetimestruct* dts, int from_end) nogil: + if from_end != 12: + dts.month += from_end + if dts.month > 12: + dts.month -= 12 + else: + dts.year -= 1 + + +cdef void adjust_dts_for_qtr(npy_datetimestruct* dts, int to_end) nogil: + if to_end != 12: + dts.month -= to_end + if dts.month <= 0: + dts.month += 12 + else: + dts.year += 1 + + +# Find the unix_date (days elapsed since datetime(1970, 1, 1) +# for the given year/month/day. +# Assumes GREGORIAN_CALENDAR */ +cdef int64_t unix_date_from_ymd(int year, int month, int day) nogil: + # Calculate the absolute date + cdef: + npy_datetimestruct dts + int64_t unix_date + + memset(&dts, 0, sizeof(npy_datetimestruct)) + dts.year = year + dts.month = month + dts.day = day + unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, &dts) + return unix_date + + +cdef inline int64_t dts_to_month_ordinal(npy_datetimestruct* dts) nogil: + # AKA: use npy_datetimestruct_to_datetime(NPY_FR_M, &dts) + return ((dts.year - 1970) * 12 + dts.month - 1) + + +cdef inline int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end) nogil: + cdef: + int64_t result + + result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_Y, dts) + if dts.month > to_end: + return result + 1 + else: + return result + + +cdef inline int64_t dts_to_qtr_ordinal(npy_datetimestruct* dts, int to_end) nogil: + cdef: + int quarter + + adjust_dts_for_qtr(dts, to_end) + quarter = month_to_quarter(dts.month) + return ((dts.year - 1970) * 4 + quarter - 1) + + +cdef inline int get_anchor_month(int freq, int freq_group) nogil: + cdef: + int fmonth + fmonth = freq - freq_group + if fmonth == 0: + fmonth = 12 + return fmonth + + +# specifically _dont_ use cdvision or else ordinals near -1 are assigned to +# incorrect dates GH#19643 +@cython.cdivision(False) +cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil: + """ + Generate an ordinal in period space + + Parameters + ---------- + dts: npy_datetimestruct* + freq : int + + Returns + ------- + period_ordinal : int64_t + """ + cdef: + int64_t unix_date + int freq_group, fmonth, mdiff + NPY_DATETIMEUNIT unit + + freq_group = get_freq_group(freq) + + if freq_group == FR_ANN: + fmonth = get_anchor_month(freq, freq_group) + return dts_to_year_ordinal(dts, fmonth) + + elif freq_group == FR_QTR: + fmonth = get_anchor_month(freq, freq_group) + return dts_to_qtr_ordinal(dts, fmonth) + + elif freq_group == FR_WK: + unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) + return unix_date_to_week(unix_date, freq - FR_WK) + + elif freq == FR_BUS: + unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) + return DtoB(dts, 0, unix_date) + + unit = get_unit(freq) + return npy_datetimestruct_to_datetime(unit, dts) + + +cdef NPY_DATETIMEUNIT get_unit(int freq) nogil: + """ + Convert the freq to the corresponding NPY_DATETIMEUNIT to pass + to npy_datetimestruct_to_datetime. + """ + if freq == FR_MTH: + return NPY_DATETIMEUNIT.NPY_FR_M + elif freq == FR_DAY: + return NPY_DATETIMEUNIT.NPY_FR_D + elif freq == FR_HR: + return NPY_DATETIMEUNIT.NPY_FR_h + elif freq == FR_MIN: + return NPY_DATETIMEUNIT.NPY_FR_m + elif freq == FR_SEC: + return NPY_DATETIMEUNIT.NPY_FR_s + elif freq == FR_MS: + return NPY_DATETIMEUNIT.NPY_FR_ms + elif freq == FR_US: + return NPY_DATETIMEUNIT.NPY_FR_us + elif freq == FR_NS: + return NPY_DATETIMEUNIT.NPY_FR_ns + elif freq == FR_UND: + # Default to Day + return NPY_DATETIMEUNIT.NPY_FR_D + + +cdef void get_date_info(int64_t ordinal, int freq, npy_datetimestruct *dts) nogil: + cdef: + int64_t unix_date, nanos + npy_datetimestruct dts2 + + unix_date = get_unix_date(ordinal, freq) + nanos = get_time_nanos(freq, unix_date, ordinal) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, dts) + + dt64_to_dtstruct(nanos, &dts2) + dts.hour = dts2.hour + dts.min = dts2.min + dts.sec = dts2.sec + dts.us = dts2.us + dts.ps = dts2.ps + + +cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil: + """ + Returns the proleptic Gregorian ordinal of the date, as an integer. + This corresponds to the number of days since Jan., 1st, 1970 AD. + When the instance has a frequency less than daily, the proleptic date + is calculated for the last day of the period. + + Parameters + ---------- + period_ordinal : int64_t + freq : int + + Returns + ------- + unix_date : int64_t number of days since datetime(1970, 1, 1) + """ + cdef: + asfreq_info af_info + freq_conv_func toDaily = NULL + + if freq == FR_DAY: + return period_ordinal + + toDaily = get_asfreq_func(freq, FR_DAY) + get_asfreq_info(freq, FR_DAY, True, &af_info) + return toDaily(period_ordinal, &af_info) + + +@cython.cdivision +cdef int64_t get_time_nanos(int freq, int64_t unix_date, int64_t ordinal) nogil: + """ + Find the number of nanoseconds after midnight on the given unix_date + that the ordinal represents in the given frequency. + + Parameters + ---------- + freq : int + unix_date : int64_t + ordinal : int64_t + + Returns + ------- + int64_t + """ + cdef: + int64_t sub, factor + int64_t nanos_in_day = 24 * 3600 * 10**9 + + freq = get_freq_group(freq) + + if freq <= FR_DAY: + return 0 + + elif freq == FR_NS: + factor = 1 + + elif freq == FR_US: + factor = 10**3 + + elif freq == FR_MS: + factor = 10**6 + + elif freq == FR_SEC: + factor = 10 **9 + + elif freq == FR_MIN: + factor = 10**9 * 60 + + else: + # We must have freq == FR_HR + factor = 10**9 * 3600 + + sub = ordinal - unix_date * (nanos_in_day / factor) + return sub * factor + + +cdef int get_yq(int64_t ordinal, int freq, npy_datetimestruct* dts): + """ + Find the year and quarter of a Period with the given ordinal and frequency + + Parameters + ---------- + ordinal : int64_t + freq : int + dts : *npy_datetimestruct + + Returns + ------- + quarter : int + describes the implied quarterly frequency associated with `freq` + + Notes + ----- + Sets dts.year in-place. + """ + cdef: + asfreq_info af_info + int qtr_freq + int64_t unix_date + int quarter + + unix_date = get_unix_date(ordinal, freq) + + if get_freq_group(freq) == FR_QTR: + qtr_freq = freq + else: + qtr_freq = FR_QTR + + get_asfreq_info(FR_DAY, qtr_freq, True, &af_info) + + quarter = DtoQ_yq(unix_date, &af_info, dts) + return quarter + + +cdef inline int month_to_quarter(int month) nogil: + return (month - 1) // 3 + 1 + + +# ---------------------------------------------------------------------- +# Period logic + +@cython.wraparound(False) +@cython.boundscheck(False) +def periodarr_to_dt64arr(const int64_t[:] periodarr, int freq): + """ + Convert array to datetime64 values from a set of ordinals corresponding to + periods per period convention. + """ + cdef: + int64_t[:] out + Py_ssize_t i, N + + if freq < 6000: # i.e. FR_DAY, hard-code to avoid need to cast + N = len(periodarr) + out = np.empty(N, dtype="i8") + + # We get here with freqs that do not correspond to a datetime64 unit + for i in range(N): + out[i] = period_ordinal_to_dt64(periodarr[i], freq) + + return out.base # .base to access underlying np.ndarray + + else: + # Short-circuit for performance + if freq == FR_NS: + return periodarr.base + + if freq == FR_US: + dta = periodarr.base.view("M8[us]") + elif freq == FR_MS: + dta = periodarr.base.view("M8[ms]") + elif freq == FR_SEC: + dta = periodarr.base.view("M8[s]") + elif freq == FR_MIN: + dta = periodarr.base.view("M8[m]") + elif freq == FR_HR: + dta = periodarr.base.view("M8[h]") + elif freq == FR_DAY: + dta = periodarr.base.view("M8[D]") + return ensure_datetime64ns(dta) + + +cdef void get_asfreq_info(int from_freq, int to_freq, + bint is_end, asfreq_info *af_info) nogil: + """ + Construct the `asfreq_info` object used to convert an ordinal from + `from_freq` to `to_freq`. + + Parameters + ---------- + from_freq : int + to_freq int + is_end : bool + af_info : *asfreq_info + """ + cdef: + int from_group = get_freq_group(from_freq) + int to_group = get_freq_group(to_freq) + + af_info.is_end = is_end + + af_info.intraday_conversion_factor = get_daytime_conversion_factor( + get_freq_group_index(max_value(from_group, FR_DAY)), + get_freq_group_index(max_value(to_group, FR_DAY))) + + if from_group == FR_WK: + af_info.from_end = calc_week_end(from_freq, from_group) + elif from_group == FR_ANN: + af_info.from_end = calc_a_year_end(from_freq, from_group) + elif from_group == FR_QTR: + af_info.from_end = calc_a_year_end(from_freq, from_group) + + if to_group == FR_WK: + af_info.to_end = calc_week_end(to_freq, to_group) + elif to_group == FR_ANN: + af_info.to_end = calc_a_year_end(to_freq, to_group) + elif to_group == FR_QTR: + af_info.to_end = calc_a_year_end(to_freq, to_group) + + +@cython.cdivision +cdef int calc_a_year_end(int freq, int group) nogil: + cdef: + int result = (freq - group) % 12 + if result == 0: + return 12 + else: + return result + + +cdef inline int calc_week_end(int freq, int group) nogil: + return freq - group + + +cpdef int64_t period_asfreq(int64_t ordinal, int freq1, int freq2, bint end): + """ + Convert period ordinal from one frequency to another, and if upsampling, + choose to use start ('S') or end ('E') of period. + """ + cdef: + int64_t retval + + _period_asfreq(&ordinal, &retval, 1, freq1, freq2, end) + return retval + + +@cython.wraparound(False) +@cython.boundscheck(False) +def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): + """ + Convert int64-array of period ordinals from one frequency to another, and + if upsampling, choose to use start ('S') or end ('E') of period. + """ + cdef: + Py_ssize_t n = len(arr) + Py_ssize_t increment = arr.strides[0] // 8 + ndarray[int64_t] result = np.empty(n, dtype=np.int64) + + _period_asfreq( + cnp.PyArray_DATA(arr), + cnp.PyArray_DATA(result), + n, + freq1, + freq2, + end, + increment, + ) + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef void _period_asfreq( + int64_t* ordinals, + int64_t* out, + Py_ssize_t length, + int freq1, + int freq2, + bint end, + Py_ssize_t increment=1, +): + """See period_asfreq.__doc__""" + cdef: + Py_ssize_t i + freq_conv_func func + asfreq_info af_info + int64_t val + + if length == 1 and ordinals[0] == NPY_NAT: + # fastpath avoid calling get_asfreq_func + out[0] = NPY_NAT + return + + func = get_asfreq_func(freq1, freq2) + get_asfreq_info(freq1, freq2, end, &af_info) + + for i in range(length): + val = ordinals[i * increment] + if val != NPY_NAT: + val = func(val, &af_info) + out[i] = val + + +cpdef int64_t period_ordinal(int y, int m, int d, int h, int min, + int s, int us, int ps, int freq): + """ + Find the ordinal representation of the given datetime components at the + frequency `freq`. + + Parameters + ---------- + y : int + m : int + d : int + h : int + min : int + s : int + us : int + ps : int + + Returns + ------- + ordinal : int64_t + """ + cdef: + npy_datetimestruct dts + dts.year = y + dts.month = m + dts.day = d + dts.hour = h + dts.min = min + dts.sec = s + dts.us = us + dts.ps = ps + return get_period_ordinal(&dts, freq) + + +cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: + cdef: + npy_datetimestruct dts + + if ordinal == NPY_NAT: + return NPY_NAT + + get_date_info(ordinal, freq, &dts) + + check_dts_bounds(&dts) + return dtstruct_to_dt64(&dts) + + +cdef str period_format(int64_t value, int freq, object fmt=None): + cdef: + int freq_group + + if value == NPY_NAT: + return "NaT" + + if isinstance(fmt, str): + fmt = fmt.encode("utf-8") + + if fmt is None: + freq_group = get_freq_group(freq) + if freq_group == FR_ANN: + fmt = b'%Y' + elif freq_group == FR_QTR: + fmt = b'%FQ%q' + elif freq_group == FR_MTH: + fmt = b'%Y-%m' + elif freq_group == FR_WK: + left = period_asfreq(value, freq, FR_DAY, 0) + right = period_asfreq(value, freq, FR_DAY, 1) + return f"{period_format(left, FR_DAY)}/{period_format(right, FR_DAY)}" + elif freq_group == FR_BUS or freq_group == FR_DAY: + fmt = b'%Y-%m-%d' + elif freq_group == FR_HR: + fmt = b'%Y-%m-%d %H:00' + elif freq_group == FR_MIN: + fmt = b'%Y-%m-%d %H:%M' + elif freq_group == FR_SEC: + fmt = b'%Y-%m-%d %H:%M:%S' + elif freq_group == FR_MS: + fmt = b'%Y-%m-%d %H:%M:%S.%l' + elif freq_group == FR_US: + fmt = b'%Y-%m-%d %H:%M:%S.%u' + elif freq_group == FR_NS: + fmt = b'%Y-%m-%d %H:%M:%S.%n' + else: + raise ValueError(f"Unknown freq: {freq}") + + return _period_strftime(value, freq, fmt) + + +cdef list extra_fmts = [(b"%q", b"^`AB`^"), + (b"%f", b"^`CD`^"), + (b"%F", b"^`EF`^"), + (b"%l", b"^`GH`^"), + (b"%u", b"^`IJ`^"), + (b"%n", b"^`KL`^")] + +cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^", + "^`GH`^", "^`IJ`^", "^`KL`^"] + +cdef str _period_strftime(int64_t value, int freq, bytes fmt): + cdef: + Py_ssize_t i + npy_datetimestruct dts + char *formatted + bytes pat, brepl + list found_pat = [False] * len(extra_fmts) + int year, quarter + str result, repl + + get_date_info(value, freq, &dts) + for i in range(len(extra_fmts)): + pat = extra_fmts[i][0] + brepl = extra_fmts[i][1] + if pat in fmt: + fmt = fmt.replace(pat, brepl) + found_pat[i] = True + + formatted = c_strftime(&dts, fmt) + + result = util.char_to_string(formatted) + free(formatted) + + for i in range(len(extra_fmts)): + if found_pat[i]: + + quarter = get_yq(value, freq, &dts) + + if i == 0: + repl = str(quarter) + elif i == 1: # %f, 2-digit year + repl = f"{(dts.year % 100):02d}" + elif i == 2: + repl = str(dts.year) + elif i == 3: + repl = f"{(value % 1_000):03d}" + elif i == 4: + repl = f"{(value % 1_000_000):06d}" + elif i == 5: + repl = f"{(value % 1_000_000_000):09d}" + + result = result.replace(str_extra_fmts[i], repl) + + return result + + +# ---------------------------------------------------------------------- +# period accessors + +ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN + + +cdef int pyear(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.year + + +cdef int pqyear(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + + get_yq(ordinal, freq, &dts) + return dts.year + + +cdef int pquarter(int64_t ordinal, int freq): + cdef: + int quarter + npy_datetimestruct dts + quarter = get_yq(ordinal, freq, &dts) + return quarter + + +cdef int pmonth(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.month + + +cdef int pday(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.day + + +cdef int pweekday(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dayofweek(dts.year, dts.month, dts.day) + + +cdef int pday_of_year(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return get_day_of_year(dts.year, dts.month, dts.day) + + +cdef int pweek(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return get_week_of_year(dts.year, dts.month, dts.day) + + +cdef int phour(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.hour + + +cdef int pminute(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.min + + +cdef int psecond(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.sec + + +cdef int pdays_in_month(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return get_days_in_month(dts.year, dts.month) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_period_field_arr(str field, const int64_t[:] arr, int freq): + cdef: + Py_ssize_t i, sz + int64_t[:] out + accessor f + + func = _get_accessor_func(field) + if func is NULL: + raise ValueError(f"Unrecognized field name: {field}") + + sz = len(arr) + out = np.empty(sz, dtype=np.int64) + + for i in range(sz): + if arr[i] == NPY_NAT: + out[i] = -1 + continue + out[i] = func(arr[i], freq) + + return out.base # .base to access underlying np.ndarray + + +cdef accessor _get_accessor_func(str field): + if field == "year": + return pyear + elif field == "qyear": + return pqyear + elif field == "quarter": + return pquarter + elif field == "month": + return pmonth + elif field == "day": + return pday + elif field == "hour": + return phour + elif field == "minute": + return pminute + elif field == "second": + return psecond + elif field == "week": + return pweek + elif field == "day_of_year": + return pday_of_year + elif field == "weekday" or field == "day_of_week": + return pweekday + elif field == "days_in_month": + return pdays_in_month + return NULL + + +@cython.wraparound(False) +@cython.boundscheck(False) +def from_ordinals(const int64_t[:] values, freq): + cdef: + Py_ssize_t i, n = len(values) + int64_t[:] result = np.empty(len(values), dtype="i8") + int64_t val + + freq = to_offset(freq) + if not isinstance(freq, BaseOffset): + raise ValueError("freq not specified and cannot be inferred") + + for i in range(n): + val = values[i] + if val == NPY_NAT: + result[i] = NPY_NAT + else: + result[i] = Period(val, freq=freq).ordinal + + return result.base + + +@cython.wraparound(False) +@cython.boundscheck(False) +def extract_ordinals(ndarray[object] values, freq) -> np.ndarray: + # TODO: Change type to const object[:] when Cython supports that. + + cdef: + Py_ssize_t i, n = len(values) + int64_t[:] ordinals = np.empty(n, dtype=np.int64) + object p + + freqstr = Period._maybe_convert_freq(freq).freqstr + + for i in range(n): + p = values[i] + + if checknull_with_nat(p): + ordinals[i] = NPY_NAT + elif util.is_integer_object(p): + if p == NPY_NAT: + ordinals[i] = NPY_NAT + else: + raise TypeError(p) + else: + try: + ordinals[i] = p.ordinal + + if p.freqstr != freqstr: + msg = DIFFERENT_FREQ.format(cls="PeriodIndex", + own_freq=freqstr, + other_freq=p.freqstr) + raise IncompatibleFrequency(msg) + + except AttributeError: + p = Period(p, freq=freq) + if p is NaT: + # input may contain NaT-like string + ordinals[i] = NPY_NAT + else: + ordinals[i] = p.ordinal + + return ordinals.base # .base to access underlying np.ndarray + + +def extract_freq(ndarray[object] values) -> BaseOffset: + # TODO: Change type to const object[:] when Cython supports that. + + cdef: + Py_ssize_t i, n = len(values) + object value + + for i in range(n): + value = values[i] + + if is_period_object(value): + return value.freq + + raise ValueError('freq not specified and cannot be inferred') + +# ----------------------------------------------------------------------- +# period helpers + + +DIFFERENT_FREQ = ("Input has different freq={other_freq} " + "from {cls}(freq={own_freq})") + + +class IncompatibleFrequency(ValueError): + pass + + +cdef class PeriodMixin: + # Methods shared between Period and PeriodArray + + cpdef int _get_to_timestamp_base(self): + """ + Return frequency code group used for base of to_timestamp against + frequency code. + + Return day freq code against longer freq than day. + Return second freq code against hour between second. + + Returns + ------- + int + """ + base = self._dtype._dtype_code + if base < FR_BUS: + return FR_DAY + elif FR_HR <= base <= FR_SEC: + return FR_SEC + return base + + @property + def start_time(self) -> Timestamp: + """ + Get the Timestamp for the start of the period. + + Returns + ------- + Timestamp + + See Also + -------- + Period.end_time : Return the end Timestamp. + Period.dayofyear : Return the day of year. + Period.daysinmonth : Return the days in that month. + Period.dayofweek : Return the day of the week. + + Examples + -------- + >>> period = pd.Period('2012-1-1', freq='D') + >>> period + Period('2012-01-01', 'D') + + >>> period.start_time + Timestamp('2012-01-01 00:00:00') + + >>> period.end_time + Timestamp('2012-01-01 23:59:59.999999999') + """ + return self.to_timestamp(how="start") + + @property + def end_time(self) -> Timestamp: + """ + Get the Timestamp for the end of the period. + + Returns + ------- + Timestamp + + See Also + -------- + Period.start_time : Return the start Timestamp. + Period.dayofyear : Return the day of year. + Period.daysinmonth : Return the days in that month. + Period.dayofweek : Return the day of the week. + """ + return self.to_timestamp(how="end") + + def _require_matching_freq(self, other, base=False): + # See also arrays.period.raise_on_incompatible + if is_offset_object(other): + other_freq = other + else: + other_freq = other.freq + + if base: + condition = self.freq.base != other_freq.base + else: + condition = self.freq != other_freq + + if condition: + msg = DIFFERENT_FREQ.format( + cls=type(self).__name__, + own_freq=self.freqstr, + other_freq=other_freq.freqstr, + ) + raise IncompatibleFrequency(msg) + + +cdef class _Period(PeriodMixin): + + cdef readonly: + int64_t ordinal + PeriodDtypeBase _dtype + BaseOffset freq + + # higher than np.ndarray, np.matrix, np.timedelta64 + __array_priority__ = 100 + + dayofweek = _Period.day_of_week + dayofyear = _Period.day_of_year + + def __cinit__(self, int64_t ordinal, BaseOffset freq): + self.ordinal = ordinal + self.freq = freq + # Note: this is more performant than PeriodDtype.from_date_offset(freq) + # because from_date_offset cannot be made a cdef method (until cython + # supported cdef classmethods) + self._dtype = PeriodDtypeBase(freq._period_dtype_code) + + @classmethod + def _maybe_convert_freq(cls, object freq) -> BaseOffset: + """ + Internally we allow integer and tuple representations (for now) that + are not recognized by to_offset, so we convert them here. Also, a + Period's freq attribute must have `freq.n > 0`, which we check for here. + + Returns + ------- + DateOffset + """ + if isinstance(freq, int): + # We already have a dtype code + dtype = PeriodDtypeBase(freq) + freq = dtype.date_offset + + freq = to_offset(freq) + + if freq.n <= 0: + raise ValueError("Frequency must be positive, because it " + f"represents span: {freq.freqstr}") + + return freq + + @classmethod + def _from_ordinal(cls, ordinal: int, freq) -> "Period": + """ + Fast creation from an ordinal and freq that are already validated! + """ + if ordinal == NPY_NAT: + return NaT + else: + freq = cls._maybe_convert_freq(freq) + self = _Period.__new__(cls, ordinal, freq) + return self + + def __richcmp__(self, other, op): + if is_period_object(other): + if other.freq != self.freq: + if op == Py_EQ: + return False + elif op == Py_NE: + return True + self._require_matching_freq(other) + return PyObject_RichCompareBool(self.ordinal, other.ordinal, op) + elif other is NaT: + return _nat_scalar_rules[op] + elif util.is_array(other): + # GH#44285 + if cnp.PyArray_IsZeroDim(other): + return PyObject_RichCompare(self, other.item(), op) + else: + # in particular ndarray[object]; see test_pi_cmp_period + return np.array([PyObject_RichCompare(self, x, op) for x in other]) + return NotImplemented + + def __hash__(self): + return hash((self.ordinal, self.freqstr)) + + def _add_timedeltalike_scalar(self, other) -> "Period": + cdef: + int64_t nanos, base_nanos + + if is_tick_object(self.freq): + nanos = delta_to_nanoseconds(other) + base_nanos = self.freq.base.nanos + if nanos % base_nanos == 0: + ordinal = self.ordinal + (nanos // base_nanos) + return Period(ordinal=ordinal, freq=self.freq) + raise IncompatibleFrequency("Input cannot be converted to " + f"Period(freq={self.freqstr})") + + def _add_offset(self, other) -> "Period": + # Non-Tick DateOffset other + cdef: + int64_t ordinal + + self._require_matching_freq(other, base=True) + + ordinal = self.ordinal + other.n + return Period(ordinal=ordinal, freq=self.freq) + + def __add__(self, other): + if not is_period_object(self): + # cython semantics; this is analogous to a call to __radd__ + if self is NaT: + return NaT + return other.__add__(self) + + if is_any_td_scalar(other): + return self._add_timedeltalike_scalar(other) + elif is_offset_object(other): + return self._add_offset(other) + elif other is NaT: + return NaT + elif util.is_integer_object(other): + ordinal = self.ordinal + other * self.freq.n + return Period(ordinal=ordinal, freq=self.freq) + elif (PyDateTime_Check(other) or + is_period_object(other) or util.is_datetime64_object(other)): + # can't add datetime-like + # GH#17983 + sname = type(self).__name__ + oname = type(other).__name__ + raise TypeError(f"unsupported operand type(s) for +: '{sname}' " + f"and '{oname}'") + + return NotImplemented + + def __sub__(self, other): + if not is_period_object(self): + # cython semantics; this is like a call to __rsub__ + if self is NaT: + return NaT + return NotImplemented + + elif is_any_td_scalar(other): + neg_other = -other + return self + neg_other + elif is_offset_object(other): + # Non-Tick DateOffset + neg_other = -other + return self + neg_other + elif util.is_integer_object(other): + ordinal = self.ordinal - other * self.freq.n + return Period(ordinal=ordinal, freq=self.freq) + elif is_period_object(other): + self._require_matching_freq(other) + # GH 23915 - mul by base freq since __add__ is agnostic of n + return (self.ordinal - other.ordinal) * self.freq.base + elif other is NaT: + return NaT + + return NotImplemented + + def asfreq(self, freq, how='E') -> "Period": + """ + Convert Period to desired frequency, at the start or end of the interval. + + Parameters + ---------- + freq : str + The desired frequency. + how : {'E', 'S', 'end', 'start'}, default 'end' + Start or end of the timespan. + + Returns + ------- + resampled : Period + """ + freq = self._maybe_convert_freq(freq) + how = validate_end_alias(how) + base1 = self._dtype._dtype_code + base2 = freq_to_dtype_code(freq) + + # self.n can't be negative or 0 + end = how == 'E' + if end: + ordinal = self.ordinal + self.freq.n - 1 + else: + ordinal = self.ordinal + ordinal = period_asfreq(ordinal, base1, base2, end) + + return Period(ordinal=ordinal, freq=freq) + + def to_timestamp(self, freq=None, how='start', tz=None) -> Timestamp: + """ + Return the Timestamp representation of the Period. + + Uses the target frequency specified at the part of the period specified + by `how`, which is either `Start` or `Finish`. + + Parameters + ---------- + freq : str or DateOffset + Target frequency. Default is 'D' if self.freq is week or + longer and 'S' otherwise. + how : str, default 'S' (start) + One of 'S', 'E'. Can be aliased as case insensitive + 'Start', 'Finish', 'Begin', 'End'. + + Returns + ------- + Timestamp + """ + if tz is not None: + # GH#34522 + warnings.warn( + "Period.to_timestamp `tz` argument is deprecated and will " + "be removed in a future version. Use " + "`per.to_timestamp(...).tz_localize(tz)` instead.", + FutureWarning, + stacklevel=1, + ) + + how = validate_end_alias(how) + + end = how == 'E' + if end: + if freq == "B" or self.freq == "B": + # roll forward to ensure we land on B date + adjust = Timedelta(1, "D") - Timedelta(1, "ns") + return self.to_timestamp(how="start") + adjust + endpoint = (self + self.freq).to_timestamp(how='start') + return endpoint - Timedelta(1, 'ns') + + if freq is None: + freq = self._get_to_timestamp_base() + base = freq + else: + freq = self._maybe_convert_freq(freq) + base = freq._period_dtype_code + + val = self.asfreq(freq, how) + + dt64 = period_ordinal_to_dt64(val.ordinal, base) + return Timestamp(dt64, tz=tz) + + @property + def year(self) -> int: + """ + Return the year this Period falls on. + """ + base = self._dtype._dtype_code + return pyear(self.ordinal, base) + + @property + def month(self) -> int: + """ + Return the month this Period falls on. + """ + base = self._dtype._dtype_code + return pmonth(self.ordinal, base) + + @property + def day(self) -> int: + """ + Get day of the month that a Period falls on. + + Returns + ------- + int + + See Also + -------- + Period.dayofweek : Get the day of the week. + Period.dayofyear : Get the day of the year. + + Examples + -------- + >>> p = pd.Period("2018-03-11", freq='H') + >>> p.day + 11 + """ + base = self._dtype._dtype_code + return pday(self.ordinal, base) + + @property + def hour(self) -> int: + """ + Get the hour of the day component of the Period. + + Returns + ------- + int + The hour as an integer, between 0 and 23. + + See Also + -------- + Period.second : Get the second component of the Period. + Period.minute : Get the minute component of the Period. + + Examples + -------- + >>> p = pd.Period("2018-03-11 13:03:12.050000") + >>> p.hour + 13 + + Period longer than a day + + >>> p = pd.Period("2018-03-11", freq="M") + >>> p.hour + 0 + """ + base = self._dtype._dtype_code + return phour(self.ordinal, base) + + @property + def minute(self) -> int: + """ + Get minute of the hour component of the Period. + + Returns + ------- + int + The minute as an integer, between 0 and 59. + + See Also + -------- + Period.hour : Get the hour component of the Period. + Period.second : Get the second component of the Period. + + Examples + -------- + >>> p = pd.Period("2018-03-11 13:03:12.050000") + >>> p.minute + 3 + """ + base = self._dtype._dtype_code + return pminute(self.ordinal, base) + + @property + def second(self) -> int: + """ + Get the second component of the Period. + + Returns + ------- + int + The second of the Period (ranges from 0 to 59). + + See Also + -------- + Period.hour : Get the hour component of the Period. + Period.minute : Get the minute component of the Period. + + Examples + -------- + >>> p = pd.Period("2018-03-11 13:03:12.050000") + >>> p.second + 12 + """ + base = self._dtype._dtype_code + return psecond(self.ordinal, base) + + @property + def weekofyear(self) -> int: + """ + Get the week of the year on the given Period. + + Returns + ------- + int + + See Also + -------- + Period.dayofweek : Get the day component of the Period. + Period.weekday : Get the day component of the Period. + + Examples + -------- + >>> p = pd.Period("2018-03-11", "H") + >>> p.weekofyear + 10 + + >>> p = pd.Period("2018-02-01", "D") + >>> p.weekofyear + 5 + + >>> p = pd.Period("2018-01-06", "D") + >>> p.weekofyear + 1 + """ + base = self._dtype._dtype_code + return pweek(self.ordinal, base) + + @property + def week(self) -> int: + """ + Get the week of the year on the given Period. + + Returns + ------- + int + + See Also + -------- + Period.dayofweek : Get the day component of the Period. + Period.weekday : Get the day component of the Period. + + Examples + -------- + >>> p = pd.Period("2018-03-11", "H") + >>> p.week + 10 + + >>> p = pd.Period("2018-02-01", "D") + >>> p.week + 5 + + >>> p = pd.Period("2018-01-06", "D") + >>> p.week + 1 + """ + return self.weekofyear + + @property + def day_of_week(self) -> int: + """ + Day of the week the period lies in, with Monday=0 and Sunday=6. + + If the period frequency is lower than daily (e.g. hourly), and the + period spans over multiple days, the day at the start of the period is + used. + + If the frequency is higher than daily (e.g. monthly), the last day + of the period is used. + + Returns + ------- + int + Day of the week. + + See Also + -------- + Period.day_of_week : Day of the week the period lies in. + Period.weekday : Alias of Period.day_of_week. + Period.day : Day of the month. + Period.dayofyear : Day of the year. + + Examples + -------- + >>> per = pd.Period('2017-12-31 22:00', 'H') + >>> per.day_of_week + 6 + + For periods that span over multiple days, the day at the beginning of + the period is returned. + + >>> per = pd.Period('2017-12-31 22:00', '4H') + >>> per.day_of_week + 6 + >>> per.start_time.day_of_week + 6 + + For periods with a frequency higher than days, the last day of the + period is returned. + + >>> per = pd.Period('2018-01', 'M') + >>> per.day_of_week + 2 + >>> per.end_time.day_of_week + 2 + """ + base = self._dtype._dtype_code + return pweekday(self.ordinal, base) + + @property + def weekday(self) -> int: + """ + Day of the week the period lies in, with Monday=0 and Sunday=6. + + If the period frequency is lower than daily (e.g. hourly), and the + period spans over multiple days, the day at the start of the period is + used. + + If the frequency is higher than daily (e.g. monthly), the last day + of the period is used. + + Returns + ------- + int + Day of the week. + + See Also + -------- + Period.dayofweek : Day of the week the period lies in. + Period.weekday : Alias of Period.dayofweek. + Period.day : Day of the month. + Period.dayofyear : Day of the year. + + Examples + -------- + >>> per = pd.Period('2017-12-31 22:00', 'H') + >>> per.dayofweek + 6 + + For periods that span over multiple days, the day at the beginning of + the period is returned. + + >>> per = pd.Period('2017-12-31 22:00', '4H') + >>> per.dayofweek + 6 + >>> per.start_time.dayofweek + 6 + + For periods with a frequency higher than days, the last day of the + period is returned. + + >>> per = pd.Period('2018-01', 'M') + >>> per.dayofweek + 2 + >>> per.end_time.dayofweek + 2 + """ + # Docstring is a duplicate from dayofweek. Reusing docstrings with + # Appender doesn't work for properties in Cython files, and setting + # the __doc__ attribute is also not possible. + return self.dayofweek + + @property + def day_of_year(self) -> int: + """ + Return the day of the year. + + This attribute returns the day of the year on which the particular + date occurs. The return value ranges between 1 to 365 for regular + years and 1 to 366 for leap years. + + Returns + ------- + int + The day of year. + + See Also + -------- + Period.day : Return the day of the month. + Period.day_of_week : Return the day of week. + PeriodIndex.day_of_year : Return the day of year of all indexes. + + Examples + -------- + >>> period = pd.Period("2015-10-23", freq='H') + >>> period.day_of_year + 296 + >>> period = pd.Period("2012-12-31", freq='D') + >>> period.day_of_year + 366 + >>> period = pd.Period("2013-01-01", freq='D') + >>> period.day_of_year + 1 + """ + base = self._dtype._dtype_code + return pday_of_year(self.ordinal, base) + + @property + def quarter(self) -> int: + """ + Return the quarter this Period falls on. + """ + base = self._dtype._dtype_code + return pquarter(self.ordinal, base) + + @property + def qyear(self) -> int: + """ + Fiscal year the Period lies in according to its starting-quarter. + + The `year` and the `qyear` of the period will be the same if the fiscal + and calendar years are the same. When they are not, the fiscal year + can be different from the calendar year of the period. + + Returns + ------- + int + The fiscal year of the period. + + See Also + -------- + Period.year : Return the calendar year of the period. + + Examples + -------- + If the natural and fiscal year are the same, `qyear` and `year` will + be the same. + + >>> per = pd.Period('2018Q1', freq='Q') + >>> per.qyear + 2018 + >>> per.year + 2018 + + If the fiscal year starts in April (`Q-MAR`), the first quarter of + 2018 will start in April 2017. `year` will then be 2018, but `qyear` + will be the fiscal year, 2018. + + >>> per = pd.Period('2018Q1', freq='Q-MAR') + >>> per.start_time + Timestamp('2017-04-01 00:00:00') + >>> per.qyear + 2018 + >>> per.year + 2017 + """ + base = self._dtype._dtype_code + return pqyear(self.ordinal, base) + + @property + def days_in_month(self) -> int: + """ + Get the total number of days in the month that this period falls on. + + Returns + ------- + int + + See Also + -------- + Period.daysinmonth : Gets the number of days in the month. + DatetimeIndex.daysinmonth : Gets the number of days in the month. + calendar.monthrange : Returns a tuple containing weekday + (0-6 ~ Mon-Sun) and number of days (28-31). + + Examples + -------- + >>> p = pd.Period('2018-2-17') + >>> p.days_in_month + 28 + + >>> pd.Period('2018-03-01').days_in_month + 31 + + Handles the leap year case as well: + + >>> p = pd.Period('2016-2-17') + >>> p.days_in_month + 29 + """ + base = self._dtype._dtype_code + return pdays_in_month(self.ordinal, base) + + @property + def daysinmonth(self) -> int: + """ + Get the total number of days of the month that the Period falls in. + + Returns + ------- + int + + See Also + -------- + Period.days_in_month : Return the days of the month. + Period.dayofyear : Return the day of the year. + + Examples + -------- + >>> p = pd.Period("2018-03-11", freq='H') + >>> p.daysinmonth + 31 + """ + return self.days_in_month + + @property + def is_leap_year(self) -> bool: + """ + Return True if the period's year is in a leap year. + """ + return bool(is_leapyear(self.year)) + + @classmethod + def now(cls, freq=None): + """ + Return the period of now's date. + """ + return Period(datetime.now(), freq=freq) + + @property + def freqstr(self) -> str: + """ + Return a string representation of the frequency. + """ + return self.freq.freqstr + + def __repr__(self) -> str: + base = self._dtype._dtype_code + formatted = period_format(self.ordinal, base) + return f"Period('{formatted}', '{self.freqstr}')" + + def __str__(self) -> str: + """ + Return a string representation for a particular DataFrame + """ + base = self._dtype._dtype_code + formatted = period_format(self.ordinal, base) + value = str(formatted) + return value + + def __setstate__(self, state): + self.freq = state[1] + self.ordinal = state[2] + + def __reduce__(self): + object_state = None, self.freq, self.ordinal + return (Period, object_state) + + def strftime(self, fmt: str) -> str: + r""" + Returns the string representation of the :class:`Period`, depending + on the selected ``fmt``. ``fmt`` must be a string + containing one or several directives. The method recognizes the same + directives as the :func:`time.strftime` function of the standard Python + distribution, as well as the specific additional directives ``%f``, + ``%F``, ``%q``. (formatting & docs originally from scikits.timeries). + + +-----------+--------------------------------+-------+ + | Directive | Meaning | Notes | + +===========+================================+=======+ + | ``%a`` | Locale's abbreviated weekday | | + | | name. | | + +-----------+--------------------------------+-------+ + | ``%A`` | Locale's full weekday name. | | + +-----------+--------------------------------+-------+ + | ``%b`` | Locale's abbreviated month | | + | | name. | | + +-----------+--------------------------------+-------+ + | ``%B`` | Locale's full month name. | | + +-----------+--------------------------------+-------+ + | ``%c`` | Locale's appropriate date and | | + | | time representation. | | + +-----------+--------------------------------+-------+ + | ``%d`` | Day of the month as a decimal | | + | | number [01,31]. | | + +-----------+--------------------------------+-------+ + | ``%f`` | 'Fiscal' year without a | \(1) | + | | century as a decimal number | | + | | [00,99] | | + +-----------+--------------------------------+-------+ + | ``%F`` | 'Fiscal' year with a century | \(2) | + | | as a decimal number | | + +-----------+--------------------------------+-------+ + | ``%H`` | Hour (24-hour clock) as a | | + | | decimal number [00,23]. | | + +-----------+--------------------------------+-------+ + | ``%I`` | Hour (12-hour clock) as a | | + | | decimal number [01,12]. | | + +-----------+--------------------------------+-------+ + | ``%j`` | Day of the year as a decimal | | + | | number [001,366]. | | + +-----------+--------------------------------+-------+ + | ``%m`` | Month as a decimal number | | + | | [01,12]. | | + +-----------+--------------------------------+-------+ + | ``%M`` | Minute as a decimal number | | + | | [00,59]. | | + +-----------+--------------------------------+-------+ + | ``%p`` | Locale's equivalent of either | \(3) | + | | AM or PM. | | + +-----------+--------------------------------+-------+ + | ``%q`` | Quarter as a decimal number | | + | | [01,04] | | + +-----------+--------------------------------+-------+ + | ``%S`` | Second as a decimal number | \(4) | + | | [00,61]. | | + +-----------+--------------------------------+-------+ + | ``%U`` | Week number of the year | \(5) | + | | (Sunday as the first day of | | + | | the week) as a decimal number | | + | | [00,53]. All days in a new | | + | | year preceding the first | | + | | Sunday are considered to be in | | + | | week 0. | | + +-----------+--------------------------------+-------+ + | ``%w`` | Weekday as a decimal number | | + | | [0(Sunday),6]. | | + +-----------+--------------------------------+-------+ + | ``%W`` | Week number of the year | \(5) | + | | (Monday as the first day of | | + | | the week) as a decimal number | | + | | [00,53]. All days in a new | | + | | year preceding the first | | + | | Monday are considered to be in | | + | | week 0. | | + +-----------+--------------------------------+-------+ + | ``%x`` | Locale's appropriate date | | + | | representation. | | + +-----------+--------------------------------+-------+ + | ``%X`` | Locale's appropriate time | | + | | representation. | | + +-----------+--------------------------------+-------+ + | ``%y`` | Year without century as a | | + | | decimal number [00,99]. | | + +-----------+--------------------------------+-------+ + | ``%Y`` | Year with century as a decimal | | + | | number. | | + +-----------+--------------------------------+-------+ + | ``%Z`` | Time zone name (no characters | | + | | if no time zone exists). | | + +-----------+--------------------------------+-------+ + | ``%%`` | A literal ``'%'`` character. | | + +-----------+--------------------------------+-------+ + + Notes + ----- + + (1) + The ``%f`` directive is the same as ``%y`` if the frequency is + not quarterly. + Otherwise, it corresponds to the 'fiscal' year, as defined by + the :attr:`qyear` attribute. + + (2) + The ``%F`` directive is the same as ``%Y`` if the frequency is + not quarterly. + Otherwise, it corresponds to the 'fiscal' year, as defined by + the :attr:`qyear` attribute. + + (3) + The ``%p`` directive only affects the output hour field + if the ``%I`` directive is used to parse the hour. + + (4) + The range really is ``0`` to ``61``; this accounts for leap + seconds and the (very rare) double leap seconds. + + (5) + The ``%U`` and ``%W`` directives are only used in calculations + when the day of the week and the year are specified. + + Examples + -------- + + >>> a = Period(freq='Q-JUL', year=2006, quarter=1) + >>> a.strftime('%F-Q%q') + '2006-Q1' + >>> # Output the last month in the quarter of this date + >>> a.strftime('%b-%Y') + 'Oct-2005' + >>> + >>> a = Period(freq='D', year=2001, month=1, day=1) + >>> a.strftime('%d-%b-%Y') + '01-Jan-2001' + >>> a.strftime('%b. %d, %Y was a %A') + 'Jan. 01, 2001 was a Monday' + """ + base = self._dtype._dtype_code + return period_format(self.ordinal, base, fmt) + + +class Period(_Period): + """ + Represents a period of time. + + Parameters + ---------- + value : Period or str, default None + The time period represented (e.g., '4Q2005'). + freq : str, default None + One of pandas period strings or corresponding objects. + ordinal : int, default None + The period offset from the proleptic Gregorian epoch. + year : int, default None + Year value of the period. + month : int, default 1 + Month value of the period. + quarter : int, default None + Quarter value of the period. + day : int, default 1 + Day value of the period. + hour : int, default 0 + Hour value of the period. + minute : int, default 0 + Minute value of the period. + second : int, default 0 + Second value of the period. + """ + + def __new__(cls, value=None, freq=None, ordinal=None, + year=None, month=None, quarter=None, day=None, + hour=None, minute=None, second=None): + # freq points to a tuple (base, mult); base is one of the defined + # periods such as A, Q, etc. Every five minutes would be, e.g., + # ('T', 5) but may be passed in as a string like '5T' + + # ordinal is the period offset from the gregorian proleptic epoch + cdef _Period self + + if freq is not None: + freq = cls._maybe_convert_freq(freq) + nanosecond = 0 + + if ordinal is not None and value is not None: + raise ValueError("Only value or ordinal but not both should be " + "given but not both") + elif ordinal is not None: + if not util.is_integer_object(ordinal): + raise ValueError("Ordinal must be an integer") + if freq is None: + raise ValueError('Must supply freq for ordinal value') + + elif value is None: + if (year is None and month is None and + quarter is None and day is None and + hour is None and minute is None and second is None): + ordinal = NPY_NAT + else: + if freq is None: + raise ValueError("If value is None, freq cannot be None") + + # set defaults + month = 1 if month is None else month + day = 1 if day is None else day + hour = 0 if hour is None else hour + minute = 0 if minute is None else minute + second = 0 if second is None else second + + ordinal = _ordinal_from_fields(year, month, quarter, day, + hour, minute, second, freq) + + elif is_period_object(value): + other = value + if freq is None or freq._period_dtype_code == other.freq._period_dtype_code: + ordinal = other.ordinal + freq = other.freq + else: + converted = other.asfreq(freq) + ordinal = converted.ordinal + + elif checknull_with_nat(value) or (isinstance(value, str) and + value in nat_strings): + # explicit str check is necessary to avoid raising incorrectly + # if we have a non-hashable value. + ordinal = NPY_NAT + + elif isinstance(value, str) or util.is_integer_object(value): + if util.is_integer_object(value): + if value == NPY_NAT: + value = "NaT" + + value = str(value) + value = value.upper() + dt, reso = parse_time_string(value, freq) + try: + ts = Timestamp(value) + except ValueError: + nanosecond = 0 + else: + nanosecond = ts.nanosecond + if nanosecond != 0: + reso = 'nanosecond' + if dt is NaT: + ordinal = NPY_NAT + + if freq is None: + try: + freq = attrname_to_abbrevs[reso] + except KeyError: + raise ValueError(f"Invalid frequency or could not " + f"infer: {reso}") + freq = to_offset(freq) + + elif PyDateTime_Check(value): + dt = value + if freq is None: + raise ValueError('Must supply freq for datetime value') + elif util.is_datetime64_object(value): + dt = Timestamp(value) + if freq is None: + raise ValueError('Must supply freq for datetime value') + elif PyDate_Check(value): + dt = datetime(year=value.year, month=value.month, day=value.day) + if freq is None: + raise ValueError('Must supply freq for datetime value') + else: + msg = "Value must be Period, string, integer, or datetime" + raise ValueError(msg) + + if ordinal is None: + base = freq_to_dtype_code(freq) + ordinal = period_ordinal(dt.year, dt.month, dt.day, + dt.hour, dt.minute, dt.second, + dt.microsecond, 1000*nanosecond, base) + + return cls._from_ordinal(ordinal, freq) + + +cdef bint is_period_object(object obj): + return isinstance(obj, _Period) + + +cpdef int freq_to_dtype_code(BaseOffset freq) except? -1: + try: + return freq._period_dtype_code + except AttributeError as err: + raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err + + +cdef int64_t _ordinal_from_fields(int year, int month, quarter, int day, + int hour, int minute, int second, + BaseOffset freq): + base = freq_to_dtype_code(freq) + if quarter is not None: + year, month = quarter_to_myear(year, quarter, freq.freqstr) + + return period_ordinal(year, month, day, hour, + minute, second, 0, 0, base) + + +def validate_end_alias(how: str) -> str: # Literal["E", "S"] + how_dict = {'S': 'S', 'E': 'E', + 'START': 'S', 'FINISH': 'E', + 'BEGIN': 'S', 'END': 'E'} + how = how_dict.get(str(how).upper()) + if how not in {'S', 'E'}: + raise ValueError('How must be one of S or E') + return how diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/src/datetime/np_datetime.c b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/src/datetime/np_datetime.c new file mode 100644 index 0000000..0efed21 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -0,0 +1,774 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt + +*/ + +#define NO_IMPORT + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include + +#include +#include +#include +#include "np_datetime.h" + +#if PY_MAJOR_VERSION >= 3 +#define PyInt_AsLong PyLong_AsLong +#endif // PyInt_AsLong + +const npy_datetimestruct _NS_MIN_DTS = { + 1677, 9, 21, 0, 12, 43, 145224, 193000, 0}; +const npy_datetimestruct _NS_MAX_DTS = { + 2262, 4, 11, 23, 47, 16, 854775, 807000, 0}; + + +const int days_per_month_table[2][12] = { + {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, + {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; + +/* + * Returns 1 if the given year is a leap year, 0 otherwise. + */ +int is_leapyear(npy_int64 year) { + return (year & 0x3) == 0 && /* year % 4 == 0 */ + ((year % 100) != 0 || (year % 400) == 0); +} + +/* + * Adjusts a datetimestruct based on a minutes offset. Assumes + * the current values are valid.g + */ +void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) { + int isleap; + + /* MINUTES */ + dts->min += minutes; + while (dts->min < 0) { + dts->min += 60; + dts->hour--; + } + while (dts->min >= 60) { + dts->min -= 60; + dts->hour++; + } + + /* HOURS */ + while (dts->hour < 0) { + dts->hour += 24; + dts->day--; + } + while (dts->hour >= 24) { + dts->hour -= 24; + dts->day++; + } + + /* DAYS */ + if (dts->day < 1) { + dts->month--; + if (dts->month < 1) { + dts->year--; + dts->month = 12; + } + isleap = is_leapyear(dts->year); + dts->day += days_per_month_table[isleap][dts->month - 1]; + } else if (dts->day > 28) { + isleap = is_leapyear(dts->year); + if (dts->day > days_per_month_table[isleap][dts->month - 1]) { + dts->day -= days_per_month_table[isleap][dts->month - 1]; + dts->month++; + if (dts->month > 12) { + dts->year++; + dts->month = 1; + } + } + } +} + +/* + * Calculates the days offset from the 1970 epoch. + */ +npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { + int i, month; + npy_int64 year, days = 0; + const int *month_lengths; + + year = dts->year - 1970; + days = year * 365; + + /* Adjust for leap years */ + if (days >= 0) { + /* + * 1968 is the closest leap year before 1970. + * Exclude the current year, so add 1. + */ + year += 1; + /* Add one day for each 4 years */ + days += year / 4; + /* 1900 is the closest previous year divisible by 100 */ + year += 68; + /* Subtract one day for each 100 years */ + days -= year / 100; + /* 1600 is the closest previous year divisible by 400 */ + year += 300; + /* Add one day for each 400 years */ + days += year / 400; + } else { + /* + * 1972 is the closest later year after 1970. + * Include the current year, so subtract 2. + */ + year -= 2; + /* Subtract one day for each 4 years */ + days += year / 4; + /* 2000 is the closest later year divisible by 100 */ + year -= 28; + /* Add one day for each 100 years */ + days -= year / 100; + /* 2000 is also the closest later year divisible by 400 */ + /* Subtract one day for each 400 years */ + days += year / 400; + } + + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + month = dts->month - 1; + + /* Add the months */ + for (i = 0; i < month; ++i) { + days += month_lengths[i]; + } + + /* Add the days */ + days += dts->day - 1; + + return days; +} + +/* + * Modifies '*days_' to be the day offset within the year, + * and returns the year. + */ +static npy_int64 days_to_yearsdays(npy_int64 *days_) { + const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1); + /* Adjust so it's relative to the year 2000 (divisible by 400) */ + npy_int64 days = (*days_) - (365 * 30 + 7); + npy_int64 year; + + /* Break down the 400 year cycle to get the year and day within the year */ + if (days >= 0) { + year = 400 * (days / days_per_400years); + days = days % days_per_400years; + } else { + year = 400 * ((days - (days_per_400years - 1)) / days_per_400years); + days = days % days_per_400years; + if (days < 0) { + days += days_per_400years; + } + } + + /* Work out the year/day within the 400 year cycle */ + if (days >= 366) { + year += 100 * ((days - 1) / (100 * 365 + 25 - 1)); + days = (days - 1) % (100 * 365 + 25 - 1); + if (days >= 365) { + year += 4 * ((days + 1) / (4 * 365 + 1)); + days = (days + 1) % (4 * 365 + 1); + if (days >= 366) { + year += (days - 1) / 365; + days = (days - 1) % 365; + } + } + } + + *days_ = days; + return year + 2000; +} + +/* + * Adjusts a datetimestruct based on a seconds offset. Assumes + * the current values are valid. + */ +NPY_NO_EXPORT void add_seconds_to_datetimestruct(npy_datetimestruct *dts, + int seconds) { + int minutes; + + dts->sec += seconds; + if (dts->sec < 0) { + minutes = dts->sec / 60; + dts->sec = dts->sec % 60; + if (dts->sec < 0) { + --minutes; + dts->sec += 60; + } + add_minutes_to_datetimestruct(dts, minutes); + } else if (dts->sec >= 60) { + minutes = dts->sec / 60; + dts->sec = dts->sec % 60; + add_minutes_to_datetimestruct(dts, minutes); + } +} + +/* + * Fills in the year, month, day in 'dts' based on the days + * offset from 1970. + */ +static void set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) { + const int *month_lengths; + int i; + + dts->year = days_to_yearsdays(&days); + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + + for (i = 0; i < 12; ++i) { + if (days < month_lengths[i]) { + dts->month = i + 1; + dts->day = days + 1; + return; + } else { + days -= month_lengths[i]; + } + } +} + +/* + * Compares two npy_datetimestruct objects chronologically + */ +int cmp_npy_datetimestruct(const npy_datetimestruct *a, + const npy_datetimestruct *b) { + if (a->year > b->year) { + return 1; + } else if (a->year < b->year) { + return -1; + } + + if (a->month > b->month) { + return 1; + } else if (a->month < b->month) { + return -1; + } + + if (a->day > b->day) { + return 1; + } else if (a->day < b->day) { + return -1; + } + + if (a->hour > b->hour) { + return 1; + } else if (a->hour < b->hour) { + return -1; + } + + if (a->min > b->min) { + return 1; + } else if (a->min < b->min) { + return -1; + } + + if (a->sec > b->sec) { + return 1; + } else if (a->sec < b->sec) { + return -1; + } + + if (a->us > b->us) { + return 1; + } else if (a->us < b->us) { + return -1; + } + + if (a->ps > b->ps) { + return 1; + } else if (a->ps < b->ps) { + return -1; + } + + if (a->as > b->as) { + return 1; + } else if (a->as < b->as) { + return -1; + } + + return 0; +} + +/* + * + * Converts a Python datetime.datetime or datetime.date + * object into a NumPy npy_datetimestruct. Uses tzinfo (if present) + * to convert to UTC time. + * + * The following implementation just asks for attributes, and thus + * supports datetime duck typing. The tzinfo time zone conversion + * requires this style of access as well. + * + * Returns -1 on error, 0 on success, and 1 (with no error set) + * if obj doesn't have the needed date or datetime attributes. + */ +int convert_pydatetime_to_datetimestruct(PyObject *dtobj, + npy_datetimestruct *out) { + // Assumes that obj is a valid datetime object + PyObject *tmp; + PyObject *obj = (PyObject*)dtobj; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + out->year = PyInt_AsLong(PyObject_GetAttrString(obj, "year")); + out->month = PyInt_AsLong(PyObject_GetAttrString(obj, "month")); + out->day = PyInt_AsLong(PyObject_GetAttrString(obj, "day")); + + // TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use + // PyDateTime_Check here, and less verbose attribute lookups. + + /* Check for time attributes (if not there, return success as a date) */ + if (!PyObject_HasAttrString(obj, "hour") || + !PyObject_HasAttrString(obj, "minute") || + !PyObject_HasAttrString(obj, "second") || + !PyObject_HasAttrString(obj, "microsecond")) { + return 0; + } + + out->hour = PyInt_AsLong(PyObject_GetAttrString(obj, "hour")); + out->min = PyInt_AsLong(PyObject_GetAttrString(obj, "minute")); + out->sec = PyInt_AsLong(PyObject_GetAttrString(obj, "second")); + out->us = PyInt_AsLong(PyObject_GetAttrString(obj, "microsecond")); + + /* Apply the time zone offset if datetime obj is tz-aware */ + if (PyObject_HasAttrString((PyObject*)obj, "tzinfo")) { + tmp = PyObject_GetAttrString(obj, "tzinfo"); + if (tmp == NULL) { + return -1; + } + if (tmp == Py_None) { + Py_DECREF(tmp); + } else { + PyObject *offset; + PyObject *tmp_int; + int seconds_offset, minutes_offset; + + /* The utcoffset function should return a timedelta */ + offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj); + if (offset == NULL) { + Py_DECREF(tmp); + return -1; + } + Py_DECREF(tmp); + + /* + * The timedelta should have a function "total_seconds" + * which contains the value we want. + */ + tmp = PyObject_CallMethod(offset, "total_seconds", ""); + if (tmp == NULL) { + return -1; + } + tmp_int = PyNumber_Long(tmp); + if (tmp_int == NULL) { + Py_DECREF(tmp); + return -1; + } + seconds_offset = PyInt_AsLong(tmp_int); + if (seconds_offset == -1 && PyErr_Occurred()) { + Py_DECREF(tmp_int); + Py_DECREF(tmp); + return -1; + } + Py_DECREF(tmp_int); + Py_DECREF(tmp); + + /* Convert to a minutes offset and apply it */ + minutes_offset = seconds_offset / 60; + + add_minutes_to_datetimestruct(out, -minutes_offset); + } + } + + return 0; +} + + +/* + * Converts a datetime from a datetimestruct to a datetime based + * on a metadata unit. The date is assumed to be valid. + */ +npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, + const npy_datetimestruct *dts) { + npy_datetime ret; + + if (base == NPY_FR_Y) { + /* Truncate to the year */ + ret = dts->year - 1970; + } else if (base == NPY_FR_M) { + /* Truncate to the month */ + ret = 12 * (dts->year - 1970) + (dts->month - 1); + } else { + /* Otherwise calculate the number of days to start */ + npy_int64 days = get_datetimestruct_days(dts); + + switch (base) { + case NPY_FR_W: + /* Truncate to weeks */ + if (days >= 0) { + ret = days / 7; + } else { + ret = (days - 6) / 7; + } + break; + case NPY_FR_D: + ret = days; + break; + case NPY_FR_h: + ret = days * 24 + dts->hour; + break; + case NPY_FR_m: + ret = (days * 24 + dts->hour) * 60 + dts->min; + break; + case NPY_FR_s: + ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec; + break; + case NPY_FR_ms: + ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000 + + dts->us / 1000; + break; + case NPY_FR_us: + ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us; + break; + case NPY_FR_ns: + ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000 + + dts->ps / 1000; + break; + case NPY_FR_ps: + ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps; + break; + case NPY_FR_fs: + /* only 2.6 hours */ + ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps) * + 1000 + + dts->as / 1000; + break; + case NPY_FR_as: + /* only 9.2 secs */ + ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps) * + 1000000 + + dts->as; + break; + default: + /* Something got corrupted */ + PyErr_SetString( + PyExc_ValueError, + "NumPy datetime metadata with corrupt unit value"); + return -1; + } + } + return ret; +} + +/* + * Port numpy#13188 https://github.com/numpy/numpy/pull/13188/ + * + * Computes the python `ret, d = divmod(d, unit)`. + * + * Note that GCC is smart enough at -O2 to eliminate the `if(*d < 0)` branch + * for subsequent calls to this command - it is able to deduce that `*d >= 0`. + */ +npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) { + assert(unit > 0); + npy_int64 div = *d / unit; + npy_int64 mod = *d % unit; + if (mod < 0) { + mod += unit; + div -= 1; + } + assert(mod >= 0); + *d = mod; + return div; +} + +/* + * Converts a datetime based on the given metadata into a datetimestruct + */ +void pandas_datetime_to_datetimestruct(npy_datetime dt, + NPY_DATETIMEUNIT base, + npy_datetimestruct *out) { + npy_int64 perday; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->year = 1970; + out->month = 1; + out->day = 1; + + /* + * Note that care must be taken with the / and % operators + * for negative values. + */ + switch (base) { + case NPY_FR_Y: + out->year = 1970 + dt; + break; + + case NPY_FR_M: + out->year = 1970 + extract_unit(&dt, 12); + out->month = dt + 1; + break; + + case NPY_FR_W: + /* A week is 7 days */ + set_datetimestruct_days(dt * 7, out); + break; + + case NPY_FR_D: + set_datetimestruct_days(dt, out); + break; + + case NPY_FR_h: + perday = 24LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = dt; + break; + + case NPY_FR_m: + perday = 24LL * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60); + out->min = (int)dt; + break; + + case NPY_FR_s: + perday = 24LL * 60 * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60 * 60); + out->min = (int)extract_unit(&dt, 60); + out->sec = (int)dt; + break; + + case NPY_FR_ms: + perday = 24LL * 60 * 60 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 60); + out->sec = (int)extract_unit(&dt, 1000LL); + out->us = (int)(dt * 1000); + break; + + case NPY_FR_us: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000); + out->us = (int)dt; + break; + + case NPY_FR_ns: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_ps: + perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_fs: + /* entire range is only +- 2.6 hours */ + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 60 * 60); + if (out->hour < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour += 24; + assert(out->hour >= 0); + } + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000); + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL); + out->as = (int)(dt * 1000); + break; + + case NPY_FR_as: + /* entire range is only +- 9.2 seconds */ + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 1000); + if (out->sec < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour = 23; + out->min = 59; + out->sec += 60; + assert(out->sec >= 0); + } + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL * 1000); + out->as = (int)dt; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy datetime metadata is corrupted with invalid " + "base unit"); + } +} + +/* + * Converts a timedelta from a timedeltastruct to a timedelta based + * on a metadata unit. The timedelta is assumed to be valid. + * + * Returns 0 on success, -1 on failure. + */ +void pandas_timedelta_to_timedeltastruct(npy_timedelta td, + NPY_DATETIMEUNIT base, + pandas_timedeltastruct *out) { + npy_int64 frac; + npy_int64 sfrac; + npy_int64 ifrac; + int sign; + npy_int64 DAY_NS = 86400000000000LL; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(pandas_timedeltastruct)); + + switch (base) { + case NPY_FR_ns: + + // put frac in seconds + if (td < 0 && td % (1000LL * 1000LL * 1000LL) != 0) + frac = td / (1000LL * 1000LL * 1000LL) - 1; + else + frac = td / (1000LL * 1000LL * 1000LL); + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * (1000LL * 1000LL * 1000LL); + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * DAY_NS + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / (1000LL * 1000LL); + ifrac -= out->ms * 1000LL * 1000LL; + out->us = ifrac / 1000LL; + ifrac -= out->us * 1000LL; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + + out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; + out->microseconds = out->ms * 1000 + out->us; + out->nanoseconds = out->ns; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy timedelta metadata is corrupted with " + "invalid base unit"); + } +} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/src/datetime/np_datetime.h b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/src/datetime/np_datetime.h new file mode 100644 index 0000000..0bbc24e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/src/datetime/np_datetime.h @@ -0,0 +1,79 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt + +*/ + +#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ +#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include + +typedef struct { + npy_int64 days; + npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds; +} pandas_timedeltastruct; + +extern const npy_datetimestruct _NS_MIN_DTS; +extern const npy_datetimestruct _NS_MAX_DTS; + +// stuff pandas needs +// ---------------------------------------------------------------------------- + +int convert_pydatetime_to_datetimestruct(PyObject *dtobj, + npy_datetimestruct *out); + +npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, + const npy_datetimestruct *dts); + +void pandas_datetime_to_datetimestruct(npy_datetime val, NPY_DATETIMEUNIT fr, + npy_datetimestruct *result); + +void pandas_timedelta_to_timedeltastruct(npy_timedelta val, + NPY_DATETIMEUNIT fr, + pandas_timedeltastruct *result); + +extern const int days_per_month_table[2][12]; + +// stuff numpy-derived code needs in header +// ---------------------------------------------------------------------------- + +int is_leapyear(npy_int64 year); + +/* + * Calculates the days offset from the 1970 epoch. + */ +npy_int64 +get_datetimestruct_days(const npy_datetimestruct *dts); + + +/* + * Compares two npy_datetimestruct objects chronologically + */ +int cmp_npy_datetimestruct(const npy_datetimestruct *a, + const npy_datetimestruct *b); + + +/* + * Adjusts a datetimestruct based on a minutes offset. Assumes + * the current values are valid. + */ +void +add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes); + + +#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c new file mode 100644 index 0000000..847e84b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -0,0 +1,941 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Written by Mark Wiebe (mwwiebe@gmail.com) +Copyright (c) 2011 by Enthought, Inc. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +See NUMPY_LICENSE.txt for the license. + +This file implements string parsing and creation for NumPy datetime. + +*/ + +#define PY_SSIZE_T_CLEAN +#define NO_IMPORT + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include + +#include + +#include +#include +#include + +#include "np_datetime.h" +#include "np_datetime_strings.h" + + +/* + * Parses (almost) standard ISO 8601 date strings. The differences are: + * + * + Only seconds may have a decimal point, with up to 18 digits after it + * (maximum attoseconds precision). + * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate + * the date and the time. Both are treated equivalently. + * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. + * + Doesn't handle leap seconds (seconds value has 60 in these cases). + * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow + * + Accepts special values "NaT" (not a time), "Today", (current + * day according to local time) and "Now" (current time in UTC). + * + ':' separator between hours, minutes, and seconds is optional. When + * omitted, each component must be 2 digits if it appears. (GH-10041) + * + * 'str' must be a NULL-terminated string, and 'len' must be its length. + * + * 'out' gets filled with the parsed date-time. + * 'out_local' gets set to 1 if the parsed time contains timezone, + * to 0 otherwise. + * 'out_tzoffset' gets set to timezone offset by minutes + * if the parsed time was in local time, + * to 0 otherwise. The values 'now' and 'today' don't get counted + * as local, and neither do UTC +/-#### timezone offsets, because + * they aren't using the computer's local timezone offset. + * + * Returns 0 on success, -1 on failure. + */ +int parse_iso_8601_datetime(const char *str, int len, int want_exc, + npy_datetimestruct *out, + int *out_local, int *out_tzoffset) { + int year_leap = 0; + int i, numdigits; + const char *substr; + int sublen; + + /* If year-month-day are separated by a valid separator, + * months/days without leading zeroes will be parsed + * (though not iso8601). If the components aren't separated, + * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are + * forbidden here (but parsed as YYMMDD elsewhere). + */ + int has_ymd_sep = 0; + char ymd_sep = '\0'; + char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '}; + int valid_ymd_sep_len = sizeof(valid_ymd_sep); + + /* hour-minute-second may or may not separated by ':'. If not, then + * each component must be 2 digits. */ + int has_hms_sep = 0; + int hour_was_2_digits = 0; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + substr = str; + sublen = len; + + /* Skip leading whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + } + + /* Leading '-' sign for negative year */ + if (*substr == '-') { + ++substr; + --sublen; + } + + if (sublen == 0) { + goto parse_error; + } + + /* PARSE THE YEAR (4 digits) */ + out->year = 0; + if (sublen >= 4 && isdigit(substr[0]) && isdigit(substr[1]) && + isdigit(substr[2]) && isdigit(substr[3])) { + out->year = 1000 * (substr[0] - '0') + 100 * (substr[1] - '0') + + 10 * (substr[2] - '0') + (substr[3] - '0'); + + substr += 4; + sublen -= 4; + } + + /* Negate the year if necessary */ + if (str[0] == '-') { + out->year = -out->year; + } + /* Check whether it's a leap-year */ + year_leap = is_leapyear(out->year); + + /* Next character must be a separator, start of month, or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + goto finish; + } + + if (!isdigit(*substr)) { + for (i = 0; i < valid_ymd_sep_len; ++i) { + if (*substr == valid_ymd_sep[i]) { + break; + } + } + if (i == valid_ymd_sep_len) { + goto parse_error; + } + has_ymd_sep = 1; + ymd_sep = valid_ymd_sep[i]; + ++substr; + --sublen; + /* Cannot have trailing separator */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } + + /* PARSE THE MONTH */ + /* First digit required */ + out->month = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->month = 10 * out->month + (*substr - '0'); + ++substr; + --sublen; + } else if (!has_ymd_sep) { + goto parse_error; + } + if (out->month < 1 || out->month > 12) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Month out of range in datetime string \"%s\"", str); + } + goto error; + } + + /* Next character must be the separator, start of day, or end of string */ + if (sublen == 0) { + /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */ + if (!has_ymd_sep) { + goto parse_error; + } + if (out_local != NULL) { + *out_local = 0; + } + goto finish; + } + + if (has_ymd_sep) { + /* Must have separator, but cannot be trailing */ + if (*substr != ymd_sep || sublen == 1) { + goto parse_error; + } + ++substr; + --sublen; + } + + /* PARSE THE DAY */ + /* First digit required */ + if (!isdigit(*substr)) { + goto parse_error; + } + out->day = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->day = 10 * out->day + (*substr - '0'); + ++substr; + --sublen; + } else if (!has_ymd_sep) { + goto parse_error; + } + if (out->day < 1 || + out->day > days_per_month_table[year_leap][out->month - 1]) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Day out of range in datetime string \"%s\"", str); + } + goto error; + } + + /* Next character must be a 'T', ' ', or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + goto finish; + } + + if ((*substr != 'T' && *substr != ' ') || sublen == 1) { + goto parse_error; + } + ++substr; + --sublen; + + /* PARSE THE HOURS */ + /* First digit required */ + if (!isdigit(*substr)) { + goto parse_error; + } + out->hour = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional */ + if (isdigit(*substr)) { + hour_was_2_digits = 1; + out->hour = 10 * out->hour + (*substr - '0'); + ++substr; + --sublen; + if (out->hour >= 24) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Hours out of range in datetime string \"%s\"", + str); + } + goto error; + } + } + + /* Next character must be a ':' or the end of the string */ + if (sublen == 0) { + if (!hour_was_2_digits) { + goto parse_error; + } + goto finish; + } + + if (*substr == ':') { + has_hms_sep = 1; + ++substr; + --sublen; + /* Cannot have a trailing separator */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } else if (!isdigit(*substr)) { + if (!hour_was_2_digits) { + goto parse_error; + } + goto parse_timezone; + } + + /* PARSE THE MINUTES */ + /* First digit required */ + out->min = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->min = 10 * out->min + (*substr - '0'); + ++substr; + --sublen; + if (out->min >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Minutes out of range in datetime string \"%s\"", + str); + } + goto error; + } + } else if (!has_hms_sep) { + goto parse_error; + } + + if (sublen == 0) { + goto finish; + } + + /* If we make it through this condition block, then the next + * character is a digit. */ + if (has_hms_sep && *substr == ':') { + ++substr; + --sublen; + /* Cannot have a trailing ':' */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } else if (!has_hms_sep && isdigit(*substr)) { + } else { + goto parse_timezone; + } + + /* PARSE THE SECONDS */ + /* First digit required */ + out->sec = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->sec = 10 * out->sec + (*substr - '0'); + ++substr; + --sublen; + if (out->sec >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Seconds out of range in datetime string \"%s\"", + str); + } + goto error; + } + } else if (!has_hms_sep) { + goto parse_error; + } + + /* Next character may be a '.' indicating fractional seconds */ + if (sublen > 0 && *substr == '.') { + ++substr; + --sublen; + } else { + goto parse_timezone; + } + + /* PARSE THE MICROSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->us *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->us += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (sublen == 0 || !isdigit(*substr)) { + goto parse_timezone; + } + + /* PARSE THE PICOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->ps *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->ps += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (sublen == 0 || !isdigit(*substr)) { + goto parse_timezone; + } + + /* PARSE THE ATTOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->as *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->as += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + +parse_timezone: + /* trim any whitespace between time/timeezone */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + } + + if (sublen == 0) { + // Unlike NumPy, treating no time zone as naive + goto finish; + } + + /* UTC specifier */ + if (*substr == 'Z') { + /* "Z" should be equivalent to tz offset "+00:00" */ + if (out_local != NULL) { + *out_local = 1; + } + + if (out_tzoffset != NULL) { + *out_tzoffset = 0; + } + + if (sublen == 1) { + goto finish; + } else { + ++substr; + --sublen; + } + } else if (*substr == '-' || *substr == '+') { + /* Time zone offset */ + int offset_neg = 0, offset_hour = 0, offset_minute = 0; + + /* + * Since "local" means local with respect to the current + * machine, we say this is non-local. + */ + + if (*substr == '-') { + offset_neg = 1; + } + ++substr; + --sublen; + + /* The hours offset */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_hour >= 24) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone hours offset out of range " + "in datetime string \"%s\"", + str); + } + goto error; + } + } else if (sublen >= 1 && isdigit(substr[0])) { + offset_hour = substr[0] - '0'; + ++substr; + --sublen; + } else { + goto parse_error; + } + + /* The minutes offset is optional */ + if (sublen > 0) { + /* Optional ':' */ + if (*substr == ':') { + ++substr; + --sublen; + } + + /* The minutes offset (at the end of the string) */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_minute >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone minutes offset out of range " + "in datetime string \"%s\"", + str); + } + goto error; + } + } else if (sublen >= 1 && isdigit(substr[0])) { + offset_minute = substr[0] - '0'; + ++substr; + --sublen; + } else { + goto parse_error; + } + } + + /* Apply the time zone offset */ + if (offset_neg) { + offset_hour = -offset_hour; + offset_minute = -offset_minute; + } + if (out_local != NULL) { + *out_local = 1; + // Unlike NumPy, do not change internal value to local time + *out_tzoffset = 60 * offset_hour + offset_minute; + } + } + + /* Skip trailing whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + } + + if (sublen != 0) { + goto parse_error; + } + +finish: + return 0; + +parse_error: + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Error parsing datetime string \"%s\" at position %d", str, + (int)(substr - str)); + } + return -1; + +error: + return -1; +} + +/* + * Provides a string length to use for converting datetime + * objects with the given local and unit settings. + */ +int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) { + int len = 0; + + switch (base) { + /* Generic units can only be used to represent NaT */ + /* return 4;*/ + case NPY_FR_as: + len += 3; /* "###" */ + case NPY_FR_fs: + len += 3; /* "###" */ + case NPY_FR_ps: + len += 3; /* "###" */ + case NPY_FR_ns: + len += 3; /* "###" */ + case NPY_FR_us: + len += 3; /* "###" */ + case NPY_FR_ms: + len += 4; /* ".###" */ + case NPY_FR_s: + len += 3; /* ":##" */ + case NPY_FR_m: + len += 3; /* ":##" */ + case NPY_FR_h: + len += 3; /* "T##" */ + case NPY_FR_D: + case NPY_FR_W: + len += 3; /* "-##" */ + case NPY_FR_M: + len += 3; /* "-##" */ + case NPY_FR_Y: + len += 21; /* 64-bit year */ + break; + default: + len += 3; /* handle the now defunct NPY_FR_B */ + break; + } + + if (base >= NPY_FR_h) { + if (local) { + len += 5; /* "+####" or "-####" */ + } else { + len += 1; /* "Z" */ + } + } + + len += 1; /* NULL terminator */ + + return len; +} + + +/* + * Converts an npy_datetimestruct to an (almost) ISO 8601 + * NULL-terminated string using timezone Z (UTC). If the string fits in + * the space exactly, it leaves out the NULL terminator and returns success. + * + * The differences from ISO 8601 are the 'NaT' string, and + * the number of year digits is >= 4 instead of strictly 4. + * + * 'base' restricts the output to that unit. Set 'base' to + * -1 to auto-detect a base after which all the values are zero. + * + * Returns 0 on success, -1 on failure (for example if the output + * string was too short). + */ +int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, + NPY_DATETIMEUNIT base) { + char *substr = outstr; + int sublen = outlen; + int tmplen; + + /* + * Print weeks with the same precision as days. + * + * TODO: Could print weeks with YYYY-Www format if the week + * epoch is a Monday. + */ + if (base == NPY_FR_W) { + base = NPY_FR_D; + } + +/* YEAR */ +/* + * Can't use PyOS_snprintf, because it always produces a '\0' + * character at the end, and NumPy string types are permitted + * to have data all the way to the end of the buffer. + */ +#ifdef _WIN32 + tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); +#else + tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); +#endif // _WIN32 + /* If it ran out of space or there isn't space for the NULL terminator */ + if (tmplen < 0 || tmplen > sublen) { + goto string_too_short; + } + substr += tmplen; + sublen -= tmplen; + + /* Stop if the unit is years */ + if (base == NPY_FR_Y) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* MONTH */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '-'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->month / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->month % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is months */ + if (base == NPY_FR_M) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* DAY */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '-'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->day / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->day % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is days */ + if (base == NPY_FR_D) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* HOUR */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = 'T'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->hour / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->hour % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is hours */ + if (base == NPY_FR_h) { + goto add_time_zone; + } + + /* MINUTE */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = ':'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->min / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->min % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is minutes */ + if (base == NPY_FR_m) { + goto add_time_zone; + } + + /* SECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = ':'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->sec / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->sec % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is seconds */ + if (base == NPY_FR_s) { + goto add_time_zone; + } + + /* MILLISECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '.'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 100000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->us / 10000) % 10 + '0'); + if (sublen < 4) { + goto string_too_short; + } + substr[3] = (char)((dts->us / 1000) % 10 + '0'); + substr += 4; + sublen -= 4; + + /* Stop if the unit is milliseconds */ + if (base == NPY_FR_ms) { + goto add_time_zone; + } + + /* MICROSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->us / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->us % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is microseconds */ + if (base == NPY_FR_us) { + goto add_time_zone; + } + + /* NANOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->ps / 100000) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->ps / 1000) % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is nanoseconds */ + if (base == NPY_FR_ns) { + goto add_time_zone; + } + + /* PICOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->ps / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->ps % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is picoseconds */ + if (base == NPY_FR_ps) { + goto add_time_zone; + } + + /* FEMTOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->as / 100000) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->as / 1000) % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is femtoseconds */ + if (base == NPY_FR_fs) { + goto add_time_zone; + } + + /* ATTOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->as / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->as % 10 + '0'); + substr += 3; + sublen -= 3; + +add_time_zone: + /* UTC "Zulu" time */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = 'Z'; + substr += 1; + sublen -= 1; + + /* Add a NULL terminator, and return */ + if (sublen > 0) { + substr[0] = '\0'; + } + + return 0; + +string_too_short: + PyErr_Format(PyExc_RuntimeError, + "The string provided for NumPy ISO datetime formatting " + "was too short, with length %d", + outlen); + return -1; +} + + +int make_iso_8601_timedelta(pandas_timedeltastruct *tds, + char *outstr, size_t *outlen) { + *outlen = 0; + *outlen += snprintf(outstr, 60, // NOLINT + "P%" NPY_INT64_FMT + "DT%" NPY_INT32_FMT + "H%" NPY_INT32_FMT + "M%" NPY_INT32_FMT, + tds->days, tds->hrs, tds->min, tds->sec); + outstr += *outlen; + + if (tds->ns != 0) { + *outlen += snprintf(outstr, 12, // NOLINT + ".%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "S", tds->ms, tds->us, tds->ns); + } else if (tds->us != 0) { + *outlen += snprintf(outstr, 9, // NOLINT + ".%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "S", tds->ms, tds->us); + } else if (tds->ms != 0) { + *outlen += snprintf(outstr, 6, // NOLINT + ".%03" NPY_INT32_FMT "S", tds->ms); + } else { + *outlen += snprintf(outstr, 2, // NOLINT + "%s", "S"); + } + + return 0; +} diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h new file mode 100644 index 0000000..200a71f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h @@ -0,0 +1,92 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Written by Mark Wiebe (mwwiebe@gmail.com) +Copyright (c) 2011 by Enthought, Inc. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +See NUMPY_LICENSE.txt for the license. + +This file implements string parsing and creation for NumPy datetime. + +*/ + +#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ +#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +/* + * Parses (almost) standard ISO 8601 date strings. The differences are: + * + * + The date "20100312" is parsed as the year 20100312, not as + * equivalent to "2010-03-12". The '-' in the dates are not optional. + * + Only seconds may have a decimal point, with up to 18 digits after it + * (maximum attoseconds precision). + * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate + * the date and the time. Both are treated equivalently. + * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. + * + Doesn't handle leap seconds (seconds value has 60 in these cases). + * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow + * + Accepts special values "NaT" (not a time), "Today", (current + * day according to local time) and "Now" (current time in UTC). + * + * 'str' must be a NULL-terminated string, and 'len' must be its length. + * + * 'out' gets filled with the parsed date-time. + * 'out_local' gets whether returned value contains timezone. 0 for UTC, 1 for local time. + * 'out_tzoffset' gets set to timezone offset by minutes + * if the parsed time was in local time, + * to 0 otherwise. The values 'now' and 'today' don't get counted + * as local, and neither do UTC +/-#### timezone offsets, because + * they aren't using the computer's local timezone offset. + * + * Returns 0 on success, -1 on failure. + */ +int +parse_iso_8601_datetime(const char *str, int len, int want_exc, + npy_datetimestruct *out, + int *out_local, + int *out_tzoffset); + +/* + * Provides a string length to use for converting datetime + * objects with the given local and unit settings. + */ +int +get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base); + +/* + * Converts an npy_datetimestruct to an (almost) ISO 8601 + * NULL-terminated string using timezone Z (UTC). + * + * 'base' restricts the output to that unit. Set 'base' to + * -1 to auto-detect a base after which all the values are zero. + * + * Returns 0 on success, -1 on failure (for example if the output + * string was too short). + */ +int +make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, + NPY_DATETIMEUNIT base); + +/* + * Converts an pandas_timedeltastruct to an ISO 8601 string. + * + * Mutates outlen to provide size of (non-NULL terminated) string. + * + * Currently has no error handling + */ +int make_iso_8601_timedelta(pandas_timedeltastruct *tds, char *outstr, + size_t *outlen); +#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/strptime.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/strptime.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..1b83eae Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/strptime.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/strptime.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/strptime.pyi new file mode 100644 index 0000000..8e1acb2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/strptime.pyi @@ -0,0 +1,12 @@ +import numpy as np + +from pandas._typing import npt + +def array_strptime( + values: npt.NDArray[np.object_], + fmt: str | None, + exact: bool = ..., + errors: str = ..., +) -> tuple[np.ndarray, np.ndarray]: ... + +# first ndarray is M8[ns], second is object ndarray of tzinfo | None diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/strptime.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/strptime.pyx new file mode 100644 index 0000000..d214694 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/strptime.pyx @@ -0,0 +1,762 @@ +"""Strptime-related classes and functions. +""" +import calendar +import locale +import re +import time + +from cpython.datetime cimport ( + date, + tzinfo, +) + +from _thread import allocate_lock as _thread_allocate_lock + +import numpy as np +import pytz + +from numpy cimport ( + int64_t, + ndarray, +) + +from pandas._libs.missing cimport checknull_with_nat_and_na +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_nat_strings as nat_strings, +) +from pandas._libs.tslibs.np_datetime cimport ( + check_dts_bounds, + dtstruct_to_dt64, + npy_datetimestruct, +) + + +cdef dict _parse_code_table = {'y': 0, + 'Y': 1, + 'm': 2, + 'B': 3, + 'b': 4, + 'd': 5, + 'H': 6, + 'I': 7, + 'M': 8, + 'S': 9, + 'f': 10, + 'A': 11, + 'a': 12, + 'w': 13, + 'j': 14, + 'U': 15, + 'W': 16, + 'Z': 17, + 'p': 18, # an additional key, only with I + 'z': 19, + 'G': 20, + 'V': 21, + 'u': 22} + + +def array_strptime(ndarray[object] values, object fmt, bint exact=True, errors='raise'): + """ + Calculates the datetime structs represented by the passed array of strings + + Parameters + ---------- + values : ndarray of string-like objects + fmt : string-like regex + exact : matches must be exact if True, search if False + errors : string specifying error handling, {'raise', 'ignore', 'coerce'} + """ + + cdef: + Py_ssize_t i, n = len(values) + npy_datetimestruct dts + int64_t[:] iresult + object[:] result_timezone + int year, month, day, minute, hour, second, weekday, julian + int week_of_year, week_of_year_start, parse_code, ordinal + int iso_week, iso_year + int64_t us, ns + object val, group_key, ampm, found, timezone + dict found_key + bint is_raise = errors=='raise' + bint is_ignore = errors=='ignore' + bint is_coerce = errors=='coerce' + + assert is_raise or is_ignore or is_coerce + + if fmt is not None: + if '%W' in fmt or '%U' in fmt: + if '%Y' not in fmt and '%y' not in fmt: + raise ValueError("Cannot use '%W' or '%U' without day and year") + if '%A' not in fmt and '%a' not in fmt and '%w' not in fmt: + raise ValueError("Cannot use '%W' or '%U' without day and year") + elif '%Z' in fmt and '%z' in fmt: + raise ValueError("Cannot parse both %Z and %z") + + global _TimeRE_cache, _regex_cache + with _cache_lock: + if _getlang() != _TimeRE_cache.locale_time.lang: + _TimeRE_cache = TimeRE() + _regex_cache.clear() + if len(_regex_cache) > _CACHE_MAX_SIZE: + _regex_cache.clear() + locale_time = _TimeRE_cache.locale_time + format_regex = _regex_cache.get(fmt) + if not format_regex: + try: + format_regex = _TimeRE_cache.compile(fmt) + # KeyError raised when a bad format is found; can be specified as + # \\, in which case it was a stray % but with a space after it + except KeyError, err: + bad_directive = err.args[0] + if bad_directive == "\\": + bad_directive = "%" + del err + raise ValueError(f"'{bad_directive}' is a bad directive " + f"in format '{fmt}'") + # IndexError only occurs when the format string is "%" + except IndexError: + raise ValueError(f"stray % in format '{fmt}'") + _regex_cache[fmt] = format_regex + + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') + result_timezone = np.empty(n, dtype='object') + + dts.us = dts.ps = dts.as = 0 + + for i in range(n): + val = values[i] + if isinstance(val, str): + if val in nat_strings: + iresult[i] = NPY_NAT + continue + else: + if checknull_with_nat_and_na(val): + iresult[i] = NPY_NAT + continue + else: + val = str(val) + + # exact matching + if exact: + found = format_regex.match(val) + if not found: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise ValueError(f"time data '{val}' does not match " + f"format '{fmt}' (match)") + if len(val) != found.end(): + if is_coerce: + iresult[i] = NPY_NAT + continue + raise ValueError(f"unconverted data remains: {val[found.end():]}") + + # search + else: + found = format_regex.search(val) + if not found: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise ValueError(f"time data {repr(val)} does not match format " + f"{repr(fmt)} (search)") + + iso_year = -1 + year = 1900 + month = day = 1 + hour = minute = second = ns = us = 0 + timezone = None + # Default to -1 to signify that values not known; not critical to have, + # though + iso_week = week_of_year = -1 + week_of_year_start = -1 + # weekday and julian defaulted to -1 so as to signal need to calculate + # values + weekday = julian = -1 + found_dict = found.groupdict() + for group_key in found_dict.iterkeys(): + # Directives not explicitly handled below: + # c, x, X + # handled by making out of other directives + # U, W + # worthless without day of the week + parse_code = _parse_code_table[group_key] + + if parse_code == 0: + year = int(found_dict['y']) + # Open Group specification for strptime() states that a %y + # value in the range of [00, 68] is in the century 2000, while + # [69,99] is in the century 1900 + if year <= 68: + year += 2000 + else: + year += 1900 + elif parse_code == 1: + year = int(found_dict['Y']) + elif parse_code == 2: + month = int(found_dict['m']) + # elif group_key == 'B': + elif parse_code == 3: + month = locale_time.f_month.index(found_dict['B'].lower()) + # elif group_key == 'b': + elif parse_code == 4: + month = locale_time.a_month.index(found_dict['b'].lower()) + # elif group_key == 'd': + elif parse_code == 5: + day = int(found_dict['d']) + # elif group_key == 'H': + elif parse_code == 6: + hour = int(found_dict['H']) + elif parse_code == 7: + hour = int(found_dict['I']) + ampm = found_dict.get('p', '').lower() + # If there was no AM/PM indicator, we'll treat this like AM + if ampm in ('', locale_time.am_pm[0]): + # We're in AM so the hour is correct unless we're + # looking at 12 midnight. + # 12 midnight == 12 AM == hour 0 + if hour == 12: + hour = 0 + elif ampm == locale_time.am_pm[1]: + # We're in PM so we need to add 12 to the hour unless + # we're looking at 12 noon. + # 12 noon == 12 PM == hour 12 + if hour != 12: + hour += 12 + elif parse_code == 8: + minute = int(found_dict['M']) + elif parse_code == 9: + second = int(found_dict['S']) + elif parse_code == 10: + s = found_dict['f'] + # Pad to always return nanoseconds + s += "0" * (9 - len(s)) + us = long(s) + ns = us % 1000 + us = us // 1000 + elif parse_code == 11: + weekday = locale_time.f_weekday.index(found_dict['A'].lower()) + elif parse_code == 12: + weekday = locale_time.a_weekday.index(found_dict['a'].lower()) + elif parse_code == 13: + weekday = int(found_dict['w']) + if weekday == 0: + weekday = 6 + else: + weekday -= 1 + elif parse_code == 14: + julian = int(found_dict['j']) + elif parse_code == 15 or parse_code == 16: + week_of_year = int(found_dict[group_key]) + if group_key == 'U': + # U starts week on Sunday. + week_of_year_start = 6 + else: + # W starts week on Monday. + week_of_year_start = 0 + elif parse_code == 17: + timezone = pytz.timezone(found_dict['Z']) + elif parse_code == 19: + timezone = parse_timezone_directive(found_dict['z']) + elif parse_code == 20: + iso_year = int(found_dict['G']) + elif parse_code == 21: + iso_week = int(found_dict['V']) + elif parse_code == 22: + weekday = int(found_dict['u']) + weekday -= 1 + + # don't assume default values for ISO week/year + if iso_year != -1: + if iso_week == -1 or weekday == -1: + raise ValueError("ISO year directive '%G' must be used with " + "the ISO week directive '%V' and a weekday " + "directive '%A', '%a', '%w', or '%u'.") + if julian != -1: + raise ValueError("Day of the year directive '%j' is not " + "compatible with ISO year directive '%G'. " + "Use '%Y' instead.") + elif year != -1 and week_of_year == -1 and iso_week != -1: + if weekday == -1: + raise ValueError("ISO week directive '%V' must be used with " + "the ISO year directive '%G' and a weekday " + "directive '%A', '%a', '%w', or '%u'.") + else: + raise ValueError("ISO week directive '%V' is incompatible with " + "the year directive '%Y'. Use the ISO year " + "'%G' instead.") + + # If we know the wk of the year and what day of that wk, we can figure + # out the Julian day of the year. + if julian == -1 and weekday != -1: + if week_of_year != -1: + week_starts_Mon = week_of_year_start == 0 + julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, + week_starts_Mon) + elif iso_year != -1 and iso_week != -1: + year, julian = _calc_julian_from_V(iso_year, iso_week, + weekday + 1) + # Cannot pre-calculate date() since can change in Julian + # calculation and thus could have different value for the day of the wk + # calculation. + try: + if julian == -1: + # Need to add 1 to result since first day of the year is 1, not + # 0. + ordinal = date(year, month, day).toordinal() + julian = ordinal - date(year, 1, 1).toordinal() + 1 + else: + # Assume that if they bothered to include Julian day it will + # be accurate. + datetime_result = date.fromordinal( + (julian - 1) + date(year, 1, 1).toordinal()) + year = datetime_result.year + month = datetime_result.month + day = datetime_result.day + except ValueError: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise + if weekday == -1: + weekday = date(year, month, day).weekday() + + dts.year = year + dts.month = month + dts.day = day + dts.hour = hour + dts.min = minute + dts.sec = second + dts.us = us + dts.ps = ns * 1000 + + iresult[i] = dtstruct_to_dt64(&dts) + try: + check_dts_bounds(&dts) + except ValueError: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise + + result_timezone[i] = timezone + + return result, result_timezone.base + + +""" +_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored +from the standard library, see +https://github.com/python/cpython/blob/master/Lib/_strptime.py +The original module-level docstring follows. + +Strptime-related classes and functions. +CLASSES: + LocaleTime -- Discovers and stores locale-specific time information + TimeRE -- Creates regexes for pattern matching a string of text containing + time information +FUNCTIONS: + _getlang -- Figure out what language is being used for the locale + strptime -- Calculates the time struct represented by the passed-in string +""" + + +def _getlang(): + """Figure out what language is being used for the locale""" + return locale.getlocale(locale.LC_TIME) + + +class LocaleTime: + """ + Stores and handles locale-specific information related to time. + + ATTRIBUTES: + f_weekday -- full weekday names (7-item list) + a_weekday -- abbreviated weekday names (7-item list) + f_month -- full month names (13-item list; dummy value in [0], which + is added by code) + a_month -- abbreviated month names (13-item list, dummy value in + [0], which is added by code) + am_pm -- AM/PM representation (2-item list) + LC_date_time -- format string for date/time representation (string) + LC_date -- format string for date representation (string) + LC_time -- format string for time representation (string) + timezone -- daylight- and non-daylight-savings timezone representation + (2-item list of sets) + lang -- Language used by instance (2-item tuple) + """ + + def __init__(self): + """ + Set all attributes. + + Order of methods called matters for dependency reasons. + + The locale language is set at the offset and then checked again before + exiting. This is to make sure that the attributes were not set with a + mix of information from more than one locale. This would most likely + happen when using threads where one thread calls a locale-dependent + function while another thread changes the locale while the function in + the other thread is still running. Proper coding would call for + locks to prevent changing the locale while locale-dependent code is + running. The check here is done in case someone does not think about + doing this. + + Only other possible issue is if someone changed the timezone and did + not call tz.tzset . That is an issue for the programmer, though, + since changing the timezone is worthless without that call. + """ + self.lang = _getlang() + self.__calc_weekday() + self.__calc_month() + self.__calc_am_pm() + self.__calc_timezone() + self.__calc_date_time() + if _getlang() != self.lang: + raise ValueError("locale changed during initialization") + + def __pad(self, seq, front): + # Add '' to seq to either the front (is True), else the back. + seq = list(seq) + if front: + seq.insert(0, '') + else: + seq.append('') + return seq + + def __calc_weekday(self): + # Set self.a_weekday and self.f_weekday using the calendar + # module. + a_weekday = [calendar.day_abbr[i].lower() for i in range(7)] + f_weekday = [calendar.day_name[i].lower() for i in range(7)] + self.a_weekday = a_weekday + self.f_weekday = f_weekday + + def __calc_month(self): + # Set self.f_month and self.a_month using the calendar module. + a_month = [calendar.month_abbr[i].lower() for i in range(13)] + f_month = [calendar.month_name[i].lower() for i in range(13)] + self.a_month = a_month + self.f_month = f_month + + def __calc_am_pm(self): + # Set self.am_pm by using time.strftime(). + + # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that + # magical; just happened to have used it everywhere else where a + # static date was needed. + am_pm = [] + for hour in (01, 22): + time_tuple = time.struct_time( + (1999, 3, 17, hour, 44, 55, 2, 76, 0)) + am_pm.append(time.strftime("%p", time_tuple).lower()) + self.am_pm = am_pm + + def __calc_date_time(self): + # Set self.date_time, self.date, & self.time by using + # time.strftime(). + + # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of + # overloaded numbers is minimized. The order in which searches for + # values within the format string is very important; it eliminates + # possible ambiguity for what something represents. + time_tuple = time.struct_time((1999, 3, 17, 22, 44, 55, 2, 76, 0)) + date_time = [None, None, None] + date_time[0] = time.strftime("%c", time_tuple).lower() + date_time[1] = time.strftime("%x", time_tuple).lower() + date_time[2] = time.strftime("%X", time_tuple).lower() + replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), + (self.f_month[3], '%B'), + (self.a_weekday[2], '%a'), + (self.a_month[3], '%b'), (self.am_pm[1], '%p'), + ('1999', '%Y'), ('99', '%y'), ('22', '%H'), + ('44', '%M'), ('55', '%S'), ('76', '%j'), + ('17', '%d'), ('03', '%m'), ('3', '%m'), + # '3' needed for when no leading zero. + ('2', '%w'), ('10', '%I')] + replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone + for tz in tz_values]) + for offset, directive in ((0, '%c'), (1, '%x'), (2, '%X')): + current_format = date_time[offset] + for old, new in replacement_pairs: + # Must deal with possible lack of locale info + # manifesting itself as the empty string (e.g., Swedish's + # lack of AM/PM info) or a platform returning a tuple of empty + # strings (e.g., MacOS 9 having timezone as ('','')). + if old: + current_format = current_format.replace(old, new) + # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since + # 2005-01-03 occurs before the first Monday of the year. Otherwise + # %U is used. + time_tuple = time.struct_time((1999, 1, 3, 1, 1, 1, 6, 3, 0)) + if '00' in time.strftime(directive, time_tuple): + U_W = '%W' + else: + U_W = '%U' + date_time[offset] = current_format.replace('11', U_W) + self.LC_date_time = date_time[0] + self.LC_date = date_time[1] + self.LC_time = date_time[2] + + def __calc_timezone(self): + # Set self.timezone by using time.tzname. + # Do not worry about possibility of time.tzname[0] == timetzname[1] + # and time.daylight; handle that in strptime . + try: + time.tzset() + except AttributeError: + pass + no_saving = frozenset(["utc", "gmt", time.tzname[0].lower()]) + if time.daylight: + has_saving = frozenset([time.tzname[1].lower()]) + else: + has_saving = frozenset() + self.timezone = (no_saving, has_saving) + + +class TimeRE(dict): + """ + Handle conversion from format directives to regexes. + + Creates regexes for pattern matching a string of text containing + time information + """ + + def __init__(self, locale_time=None): + """ + Create keys/values. + + Order of execution is important for dependency reasons. + """ + if locale_time: + self.locale_time = locale_time + else: + self.locale_time = LocaleTime() + self._Z = None + base = super() + base.__init__({ + # The " \d" part of the regex is to make %c from ANSI C work + 'd': r"(?P3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", + 'f': r"(?P[0-9]{1,9})", + 'G': r"(?P\d\d\d\d)", + 'H': r"(?P2[0-3]|[0-1]\d|\d)", + 'I': r"(?P1[0-2]|0[1-9]|[1-9])", + 'j': (r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|" + r"[1-9]\d|0[1-9]|[1-9])"), + 'm': r"(?P1[0-2]|0[1-9]|[1-9])", + 'M': r"(?P[0-5]\d|\d)", + 'S': r"(?P6[0-1]|[0-5]\d|\d)", + 'u': r"(?P[1-7])", + 'U': r"(?P5[0-3]|[0-4]\d|\d)", + 'V': r"(?P5[0-3]|0[1-9]|[1-4]\d|\d)", + 'w': r"(?P[0-6])", + # W is set below by using 'U' + 'y': r"(?P\d\d)", + # TODO: Does 'Y' need to worry about having less or more than + # 4 digits? + 'Y': r"(?P\d\d\d\d)", + 'z': r"(?P[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|Z)", + 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), + 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), + 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), + 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'), + 'p': self.__seqToRE(self.locale_time.am_pm, 'p'), + # 'Z' key is generated lazily via __getitem__ + '%': '%'}) + base.__setitem__('W', base.__getitem__('U').replace('U', 'W')) + base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) + base.__setitem__('x', self.pattern(self.locale_time.LC_date)) + base.__setitem__('X', self.pattern(self.locale_time.LC_time)) + + def __getitem__(self, key): + if key == "Z": + # lazy computation + if self._Z is None: + self._Z = self.__seqToRE(pytz.all_timezones, 'Z') + return self._Z + return super().__getitem__(key) + + def __seqToRE(self, to_convert, directive): + """ + Convert a list to a regex string for matching a directive. + + Want possible matching values to be from longest to shortest. This + prevents the possibility of a match occurring for a value that also + a substring of a larger value that should have matched (e.g., 'abc' + matching when 'abcdef' should have been the match). + """ + to_convert = sorted(to_convert, key=len, reverse=True) + for value in to_convert: + if value != '': + break + else: + return '' + regex = '|'.join(re.escape(stuff) for stuff in to_convert) + regex = f"(?P<{directive}>{regex})" + return regex + + def pattern(self, format): + """ + Return regex pattern for the format string. + + Need to make sure that any characters that might be interpreted as + regex syntax are escaped. + """ + processed_format = '' + # The sub() call escapes all characters that might be misconstrued + # as regex syntax. Cannot use re.escape since we have to deal with + # format directives (%m, etc.). + regex_chars = re.compile(r"([\\.^$*+?\(\){}\[\]|])") + format = regex_chars.sub(r"\\\1", format) + whitespace_replacement = re.compile(r'\s+') + format = whitespace_replacement.sub(r'\\s+', format) + while '%' in format: + directive_index = format.index('%') +1 + processed_format = (f"{processed_format}" + f"{format[:directive_index -1]}" + f"{self[format[directive_index]]}") + format = format[directive_index +1:] + return f"{processed_format}{format}" + + def compile(self, format): + """Return a compiled re object for the format string.""" + return re.compile(self.pattern(format), re.IGNORECASE) + + +_cache_lock = _thread_allocate_lock() +# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock +# first! +_TimeRE_cache = TimeRE() +_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache +_regex_cache = {} + + +cdef int _calc_julian_from_U_or_W(int year, int week_of_year, + int day_of_week, int week_starts_Mon): + """ + Calculate the Julian day based on the year, week of the year, and day of + the week, with week_start_day representing whether the week of the year + assumes the week starts on Sunday or Monday (6 or 0). + + Parameters + ---------- + year : int + the year + week_of_year : int + week taken from format U or W + week_starts_Mon : int + represents whether the week of the year + assumes the week starts on Sunday or Monday (6 or 0) + + Returns + ------- + int + converted julian day + """ + + cdef: + int first_weekday, week_0_length, days_to_week + + first_weekday = date(year, 1, 1).weekday() + # If we are dealing with the %U directive (week starts on Sunday), it's + # easier to just shift the view to Sunday being the first day of the + # week. + if not week_starts_Mon: + first_weekday = (first_weekday + 1) % 7 + day_of_week = (day_of_week + 1) % 7 + + # Need to watch out for a week 0 (when the first day of the year is not + # the same as that specified by %U or %W). + week_0_length = (7 - first_weekday) % 7 + if week_of_year == 0: + return 1 + day_of_week - first_weekday + else: + days_to_week = week_0_length + (7 * (week_of_year - 1)) + return 1 + days_to_week + day_of_week + + +cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday): + """ + Calculate the Julian day based on the ISO 8601 year, week, and weekday. + + ISO weeks start on Mondays, with week 01 being the week containing 4 Jan. + ISO week days range from 1 (Monday) to 7 (Sunday). + + Parameters + ---------- + iso_year : int + the year taken from format %G + iso_week : int + the week taken from format %V + iso_weekday : int + weekday taken from format %u + + Returns + ------- + (int, int) + the iso year and the Gregorian ordinal date / julian date + """ + + cdef: + int correction, ordinal + + correction = date(iso_year, 1, 4).isoweekday() + 3 + ordinal = (iso_week * 7) + iso_weekday - correction + # ordinal may be negative or 0 now, which means the date is in the previous + # calendar year + if ordinal < 1: + ordinal += date(iso_year, 1, 1).toordinal() + iso_year -= 1 + ordinal -= date(iso_year, 1, 1).toordinal() + return iso_year, ordinal + + +cdef tzinfo parse_timezone_directive(str z): + """ + Parse the '%z' directive and return a pytz.FixedOffset + + Parameters + ---------- + z : string of the UTC offset + + Returns + ------- + pytz.FixedOffset + + Notes + ----- + This is essentially similar to the cpython implementation + https://github.com/python/cpython/blob/master/Lib/_strptime.py#L457-L479 + """ + + cdef: + int gmtoff_fraction, hours, minutes, seconds, pad_number, microseconds + int total_minutes + object gmtoff_remainder, gmtoff_remainder_padding + + if z == 'Z': + return pytz.FixedOffset(0) + if z[3] == ':': + z = z[:3] + z[4:] + if len(z) > 5: + if z[5] != ':': + raise ValueError(f"Inconsistent use of : in {z}") + z = z[:5] + z[6:] + hours = int(z[1:3]) + minutes = int(z[3:5]) + seconds = int(z[5:7] or 0) + + # Pad to always return microseconds. + gmtoff_remainder = z[8:] + pad_number = 6 - len(gmtoff_remainder) + gmtoff_remainder_padding = "0" * pad_number + microseconds = int(gmtoff_remainder + gmtoff_remainder_padding) + + total_minutes = ((hours * 60) + minutes + (seconds // 60) + + (microseconds // 60_000_000)) + total_minutes = -total_minutes if z.startswith("-") else total_minutes + return pytz.FixedOffset(total_minutes) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timedeltas.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timedeltas.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..99454e1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timedeltas.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timedeltas.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timedeltas.pxd new file mode 100644 index 0000000..fed1f2d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timedeltas.pxd @@ -0,0 +1,19 @@ +from cpython.datetime cimport timedelta +from numpy cimport int64_t + + +# Exposed for tslib, not intended for outside use. +cpdef int64_t delta_to_nanoseconds(delta) except? -1 +cdef convert_to_timedelta64(object ts, str unit) +cdef bint is_any_td_scalar(object obj) + + +cdef class _Timedelta(timedelta): + cdef readonly: + int64_t value # nanoseconds + object freq # frequency reference + bint is_populated # are my components populated + int64_t _d, _h, _m, _s, _ms, _us, _ns + + cpdef timedelta to_pytimedelta(_Timedelta self) + cpdef bint _has_ns(self) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timedeltas.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timedeltas.pyi new file mode 100644 index 0000000..d8369f0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timedeltas.pyi @@ -0,0 +1,84 @@ +from datetime import timedelta +from typing import ( + ClassVar, + Type, + TypeVar, + overload, +) + +import numpy as np + +from pandas._libs.tslibs import ( + NaTType, + Tick, +) +from pandas._typing import npt + +_S = TypeVar("_S", bound=timedelta) + +def ints_to_pytimedelta( + arr: npt.NDArray[np.int64], # const int64_t[:] + box: bool = ..., +) -> npt.NDArray[np.object_]: ... +def array_to_timedelta64( + values: npt.NDArray[np.object_], + unit: str | None = ..., + errors: str = ..., +) -> np.ndarray: ... # np.ndarray[m8ns] +def parse_timedelta_unit(unit: str | None) -> str: ... +def delta_to_nanoseconds(delta: Tick | np.timedelta64 | timedelta | int) -> int: ... + +class Timedelta(timedelta): + min: ClassVar[Timedelta] + max: ClassVar[Timedelta] + resolution: ClassVar[Timedelta] + value: int # np.int64 + + # error: "__new__" must return a class instance (got "Union[Timedelta, NaTType]") + def __new__( # type: ignore[misc] + cls: Type[_S], + value=..., + unit: str = ..., + **kwargs: int | float | np.integer | np.floating, + ) -> _S | NaTType: ... + @property + def days(self) -> int: ... + @property + def seconds(self) -> int: ... + @property + def microseconds(self) -> int: ... + def total_seconds(self) -> float: ... + def to_pytimedelta(self) -> timedelta: ... + def to_timedelta64(self) -> np.timedelta64: ... + @property + def asm8(self) -> np.timedelta64: ... + # TODO: round/floor/ceil could return NaT? + def round(self: _S, freq: str) -> _S: ... + def floor(self: _S, freq: str) -> _S: ... + def ceil(self: _S, freq: str) -> _S: ... + @property + def resolution_string(self) -> str: ... + def __add__(self, other: timedelta) -> timedelta: ... + def __radd__(self, other: timedelta) -> timedelta: ... + def __sub__(self, other: timedelta) -> timedelta: ... + def __rsub__(self, other: timedelta) -> timedelta: ... + def __neg__(self) -> timedelta: ... + def __pos__(self) -> timedelta: ... + def __abs__(self) -> timedelta: ... + def __mul__(self, other: float) -> timedelta: ... + def __rmul__(self, other: float) -> timedelta: ... + @overload + def __floordiv__(self, other: timedelta) -> int: ... + @overload + def __floordiv__(self, other: int) -> timedelta: ... + @overload + def __truediv__(self, other: timedelta) -> float: ... + @overload + def __truediv__(self, other: float) -> timedelta: ... + def __mod__(self, other: timedelta) -> timedelta: ... + def __divmod__(self, other: timedelta) -> tuple[int, timedelta]: ... + def __le__(self, other: timedelta) -> bool: ... + def __lt__(self, other: timedelta) -> bool: ... + def __ge__(self, other: timedelta) -> bool: ... + def __gt__(self, other: timedelta) -> bool: ... + def __hash__(self) -> int: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timedeltas.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timedeltas.pyx new file mode 100644 index 0000000..d327ca6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timedeltas.pyx @@ -0,0 +1,1642 @@ +import collections +import warnings + +import cython + +from cpython.object cimport ( + Py_EQ, + Py_NE, + PyObject_RichCompare, +) + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + int64_t, + ndarray, +) + +cnp.import_array() + +from cpython.datetime cimport ( + PyDateTime_Check, + PyDateTime_IMPORT, + PyDelta_Check, + timedelta, +) + +PyDateTime_IMPORT + + +cimport pandas._libs.tslibs.util as util +from pandas._libs.tslibs.base cimport ABCTimestamp +from pandas._libs.tslibs.conversion cimport ( + cast_from_unit, + precision_from_unit, +) +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, + c_nat_strings as nat_strings, + checknull_with_nat, +) +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + cmp_scalar, + get_datetime64_unit, + get_timedelta64_value, + pandas_timedeltastruct, + td64_to_tdstruct, +) +from pandas._libs.tslibs.offsets cimport is_tick_object +from pandas._libs.tslibs.util cimport ( + is_array, + is_datetime64_object, + is_float_object, + is_integer_object, + is_timedelta64_object, +) + +from pandas._libs.tslibs.fields import ( + RoundTo, + round_nsint64, +) + +# ---------------------------------------------------------------------- +# Constants + +# components named tuple +Components = collections.namedtuple( + "Components", + [ + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + "nanoseconds", + ], +) + +cdef dict timedelta_abbrevs = { + "Y": "Y", + "y": "Y", + "M": "M", + "W": "W", + "w": "W", + "D": "D", + "d": "D", + "days": "D", + "day": "D", + "hours": "h", + "hour": "h", + "hr": "h", + "h": "h", + "m": "m", + "minute": "m", + "min": "m", + "minutes": "m", + "t": "m", + "s": "s", + "seconds": "s", + "sec": "s", + "second": "s", + "ms": "ms", + "milliseconds": "ms", + "millisecond": "ms", + "milli": "ms", + "millis": "ms", + "l": "ms", + "us": "us", + "microseconds": "us", + "microsecond": "us", + "µs": "us", + "micro": "us", + "micros": "us", + "u": "us", + "ns": "ns", + "nanoseconds": "ns", + "nano": "ns", + "nanos": "ns", + "nanosecond": "ns", + "n": "ns", +} + +_no_input = object() + + +# ---------------------------------------------------------------------- +# API + +@cython.boundscheck(False) +@cython.wraparound(False) +def ints_to_pytimedelta(const int64_t[:] arr, box=False): + """ + convert an i8 repr to an ndarray of timedelta or Timedelta (if box == + True) + + Parameters + ---------- + arr : ndarray[int64_t] + box : bool, default False + + Returns + ------- + result : ndarray[object] + array of Timedelta or timedeltas objects + """ + cdef: + Py_ssize_t i, n = len(arr) + int64_t value + object[:] result = np.empty(n, dtype=object) + + for i in range(n): + + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + if box: + result[i] = Timedelta(value) + else: + result[i] = timedelta(microseconds=int(value) / 1000) + + return result.base # .base to access underlying np.ndarray + + +# ---------------------------------------------------------------------- + +cpdef int64_t delta_to_nanoseconds(delta) except? -1: + if is_tick_object(delta): + return delta.nanos + if isinstance(delta, _Timedelta): + delta = delta.value + if is_timedelta64_object(delta): + return get_timedelta64_value(ensure_td64ns(delta)) + if is_integer_object(delta): + return delta + if PyDelta_Check(delta): + try: + return ( + delta.days * 24 * 3600 * 1_000_000 + + delta.seconds * 1_000_000 + + delta.microseconds + ) * 1000 + except OverflowError as err: + from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta + raise OutOfBoundsTimedelta(*err.args) from err + + raise TypeError(type(delta)) + + +cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit): + if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # generic -> default to nanoseconds + return "ns" + elif unit == NPY_DATETIMEUNIT.NPY_FR_us: + return "us" + elif unit == NPY_DATETIMEUNIT.NPY_FR_ms: + return "ms" + elif unit == NPY_DATETIMEUNIT.NPY_FR_s: + return "s" + elif unit == NPY_DATETIMEUNIT.NPY_FR_m: + return "m" + elif unit == NPY_DATETIMEUNIT.NPY_FR_h: + return "h" + elif unit == NPY_DATETIMEUNIT.NPY_FR_D: + return "D" + elif unit == NPY_DATETIMEUNIT.NPY_FR_W: + return "W" + elif unit == NPY_DATETIMEUNIT.NPY_FR_M: + return "M" + elif unit == NPY_DATETIMEUNIT.NPY_FR_Y: + return "Y" + else: + raise NotImplementedError(unit) + + +@cython.overflowcheck(True) +cdef object ensure_td64ns(object ts): + """ + Overflow-safe implementation of td64.astype("m8[ns]") + + Parameters + ---------- + ts : np.timedelta64 + + Returns + ------- + np.timedelta64[ns] + """ + cdef: + NPY_DATETIMEUNIT td64_unit + int64_t td64_value, mult + str unitstr + + td64_unit = get_datetime64_unit(ts) + if ( + td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns + and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC + ): + unitstr = npy_unit_to_abbrev(td64_unit) + + td64_value = get_timedelta64_value(ts) + + mult = precision_from_unit(unitstr)[0] + try: + # NB: cython#1381 this cannot be *= + td64_value = td64_value * mult + except OverflowError as err: + from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta + raise OutOfBoundsTimedelta(ts) from err + + return np.timedelta64(td64_value, "ns") + + return ts + + +cdef convert_to_timedelta64(object ts, str unit): + """ + Convert an incoming object to a timedelta64 if possible. + Before calling, unit must be standardized to avoid repeated unit conversion + + Handle these types of objects: + - timedelta/Timedelta + - timedelta64 + - an offset + - np.int64 (with unit providing a possible modifier) + - None/NaT + + Return an ns based int64 + """ + if checknull_with_nat(ts): + return np.timedelta64(NPY_NAT, "ns") + elif isinstance(ts, _Timedelta): + # already in the proper format + ts = np.timedelta64(ts.value, "ns") + elif is_timedelta64_object(ts): + ts = ensure_td64ns(ts) + elif is_integer_object(ts): + if ts == NPY_NAT: + return np.timedelta64(NPY_NAT, "ns") + else: + if unit in ["Y", "M", "W"]: + ts = np.timedelta64(ts, unit) + else: + ts = cast_from_unit(ts, unit) + ts = np.timedelta64(ts, "ns") + elif is_float_object(ts): + if unit in ["Y", "M", "W"]: + ts = np.timedelta64(int(ts), unit) + else: + ts = cast_from_unit(ts, unit) + ts = np.timedelta64(ts, "ns") + elif isinstance(ts, str): + if (len(ts) > 0 and ts[0] == "P") or (len(ts) > 1 and ts[:2] == "-P"): + ts = parse_iso_format_string(ts) + else: + ts = parse_timedelta_string(ts) + ts = np.timedelta64(ts, "ns") + elif is_tick_object(ts): + ts = np.timedelta64(ts.nanos, "ns") + + if PyDelta_Check(ts): + ts = np.timedelta64(delta_to_nanoseconds(ts), "ns") + elif not is_timedelta64_object(ts): + raise ValueError(f"Invalid type for timedelta scalar: {type(ts)}") + return ts.astype("timedelta64[ns]") + + +@cython.boundscheck(False) +@cython.wraparound(False) +def array_to_timedelta64( + ndarray[object] values, str unit=None, str errors="raise" +) -> ndarray: + """ + Convert an ndarray to an array of timedeltas. If errors == 'coerce', + coerce non-convertible objects to NaT. Otherwise, raise. + + Returns + ------- + np.ndarray[timedelta64ns] + """ + + cdef: + Py_ssize_t i, n + int64_t[:] iresult + + if errors not in {'ignore', 'raise', 'coerce'}: + raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}") + + n = values.shape[0] + result = np.empty(n, dtype='m8[ns]') + iresult = result.view('i8') + + if unit is not None: + for i in range(n): + if isinstance(values[i], str) and errors != "coerce": + raise ValueError( + "unit must not be specified if the input contains a str" + ) + + # Usually, we have all strings. If so, we hit the fast path. + # If this path fails, we try conversion a different way, and + # this is where all of the error handling will take place. + try: + for i in range(n): + if values[i] is NaT: + # we allow this check in the fast-path because NaT is a C-object + # so this is an inexpensive check + iresult[i] = NPY_NAT + else: + result[i] = parse_timedelta_string(values[i]) + except (TypeError, ValueError): + parsed_unit = parse_timedelta_unit(unit or 'ns') + for i in range(n): + try: + result[i] = convert_to_timedelta64(values[i], parsed_unit) + except ValueError as err: + if errors == 'coerce': + result[i] = NPY_NAT + elif "unit abbreviation w/o a number" in str(err): + # re-raise with more pertinent message + msg = f"Could not convert '{values[i]}' to NumPy timedelta" + raise ValueError(msg) from err + else: + raise + + return iresult.base # .base to access underlying np.ndarray + + +cdef inline int64_t parse_timedelta_string(str ts) except? -1: + """ + Parse a regular format timedelta string. Return an int64_t (in ns) + or raise a ValueError on an invalid parse. + """ + + cdef: + unicode c + bint neg = 0, have_dot = 0, have_value = 0, have_hhmmss = 0 + object current_unit = None + int64_t result = 0, m = 0, r + list number = [], frac = [], unit = [] + + # neg : tracks if we have a leading negative for the value + # have_dot : tracks if we are processing a dot (either post hhmmss or + # inside an expression) + # have_value : track if we have at least 1 leading unit + # have_hhmmss : tracks if we have a regular format hh:mm:ss + + if len(ts) == 0 or ts in nat_strings: + return NPY_NAT + + for c in ts: + + # skip whitespace / commas + if c == ' ' or c == ',': + pass + + # positive signs are ignored + elif c == '+': + pass + + # neg + elif c == '-': + + if neg or have_value or have_hhmmss: + raise ValueError("only leading negative signs are allowed") + + neg = 1 + + # number (ascii codes) + elif ord(c) >= 48 and ord(c) <= 57: + + if have_dot: + + # we found a dot, but now its just a fraction + if len(unit): + number.append(c) + have_dot = 0 + else: + frac.append(c) + + elif not len(unit): + number.append(c) + + else: + r = timedelta_from_spec(number, frac, unit) + unit, number, frac = [], [c], [] + + result += timedelta_as_neg(r, neg) + + # hh:mm:ss. + elif c == ':': + + # we flip this off if we have a leading value + if have_value: + neg = 0 + + # we are in the pattern hh:mm:ss pattern + if len(number): + if current_unit is None: + current_unit = 'h' + m = 1000000000 * 3600 + elif current_unit == 'h': + current_unit = 'm' + m = 1000000000 * 60 + elif current_unit == 'm': + current_unit = 's' + m = 1000000000 + r = int(''.join(number)) * m + result += timedelta_as_neg(r, neg) + have_hhmmss = 1 + else: + raise ValueError(f"expecting hh:mm:ss format, received: {ts}") + + unit, number = [], [] + + # after the decimal point + elif c == '.': + + if len(number) and current_unit is not None: + + # by definition we had something like + # so we need to evaluate the final field from a + # hh:mm:ss (so current_unit is 'm') + if current_unit != 'm': + raise ValueError("expected hh:mm:ss format before .") + m = 1000000000 + r = int(''.join(number)) * m + result += timedelta_as_neg(r, neg) + have_value = 1 + unit, number, frac = [], [], [] + + have_dot = 1 + + # unit + else: + unit.append(c) + have_value = 1 + have_dot = 0 + + # we had a dot, but we have a fractional + # value since we have an unit + if have_dot and len(unit): + r = timedelta_from_spec(number, frac, unit) + result += timedelta_as_neg(r, neg) + + # we have a dot as part of a regular format + # e.g. hh:mm:ss.fffffff + elif have_dot: + + if ((len(number) or len(frac)) and not len(unit) + and current_unit is None): + raise ValueError("no units specified") + + if len(frac) > 0 and len(frac) <= 3: + m = 10**(3 -len(frac)) * 1000 * 1000 + elif len(frac) > 3 and len(frac) <= 6: + m = 10**(6 -len(frac)) * 1000 + elif len(frac) > 6 and len(frac) <= 9: + m = 10**(9 -len(frac)) + else: + m = 1 + frac = frac[:9] + r = int(''.join(frac)) * m + result += timedelta_as_neg(r, neg) + + # we have a regular format + # we must have seconds at this point (hence the unit is still 'm') + elif current_unit is not None: + if current_unit != 'm': + raise ValueError("expected hh:mm:ss format") + m = 1000000000 + r = int(''.join(number)) * m + result += timedelta_as_neg(r, neg) + + # we have a last abbreviation + elif len(unit): + if len(number): + r = timedelta_from_spec(number, frac, unit) + result += timedelta_as_neg(r, neg) + else: + raise ValueError("unit abbreviation w/o a number") + + # we only have symbols and no numbers + elif len(number) == 0: + raise ValueError("symbols w/o a number") + + # treat as nanoseconds + # but only if we don't have anything else + else: + if have_value: + raise ValueError("have leftover units") + if len(number): + r = timedelta_from_spec(number, frac, 'ns') + result += timedelta_as_neg(r, neg) + + return result + + +cdef inline int64_t timedelta_as_neg(int64_t value, bint neg): + """ + + Parameters + ---------- + value : int64_t of the timedelta value + neg : bool if the a negative value + """ + if neg: + return -value + return value + + +cdef inline timedelta_from_spec(object number, object frac, object unit): + """ + + Parameters + ---------- + number : a list of number digits + frac : a list of frac digits + unit : a list of unit characters + """ + cdef: + str n + + try: + unit = ''.join(unit) + + if unit in ["M", "Y", "y"]: + warnings.warn( + "Units 'M', 'Y' and 'y' do not represent unambiguous " + "timedelta values and will be removed in a future version.", + FutureWarning, + stacklevel=2, + ) + + if unit == 'M': + # To parse ISO 8601 string, 'M' should be treated as minute, + # not month + unit = 'm' + unit = parse_timedelta_unit(unit) + except KeyError: + raise ValueError(f"invalid abbreviation: {unit}") + + n = ''.join(number) + '.' + ''.join(frac) + return cast_from_unit(float(n), unit) + + +cpdef inline str parse_timedelta_unit(str unit): + """ + Parameters + ---------- + unit : str or None + + Returns + ------- + str + Canonical unit string. + + Raises + ------ + ValueError : on non-parseable input + """ + if unit is None: + return "ns" + elif unit == "M": + return unit + try: + return timedelta_abbrevs[unit.lower()] + except (KeyError, AttributeError): + raise ValueError(f"invalid unit abbreviation: {unit}") + +# ---------------------------------------------------------------------- +# Timedelta ops utilities + +cdef bint _validate_ops_compat(other): + # return True if we are compat with operating + if checknull_with_nat(other): + return True + elif is_any_td_scalar(other): + return True + elif isinstance(other, str): + return True + return False + + +def _op_unary_method(func, name): + def f(self): + return Timedelta(func(self.value), unit='ns') + f.__name__ = name + return f + + +def _binary_op_method_timedeltalike(op, name): + # define a binary operation that only works if the other argument is + # timedelta like or an array of timedeltalike + def f(self, other): + if other is NaT: + return NaT + + elif is_datetime64_object(other) or ( + PyDateTime_Check(other) and not isinstance(other, ABCTimestamp) + ): + # this case is for a datetime object that is specifically + # *not* a Timestamp, as the Timestamp case will be + # handled after `_validate_ops_compat` returns False below + from pandas._libs.tslibs.timestamps import Timestamp + return op(self, Timestamp(other)) + # We are implicitly requiring the canonical behavior to be + # defined by Timestamp methods. + + elif is_array(other): + # nd-array like + if other.dtype.kind in ['m', 'M']: + return op(self.to_timedelta64(), other) + elif other.dtype.kind == 'O': + return np.array([op(self, x) for x in other]) + else: + return NotImplemented + + elif not _validate_ops_compat(other): + # Includes any of our non-cython classes + return NotImplemented + + try: + other = Timedelta(other) + except ValueError: + # failed to parse as timedelta + return NotImplemented + + if other is NaT: + # e.g. if original other was timedelta64('NaT') + return NaT + return Timedelta(op(self.value, other.value), unit='ns') + + f.__name__ = name + return f + + +# ---------------------------------------------------------------------- +# Timedelta Construction + +cdef inline int64_t parse_iso_format_string(str ts) except? -1: + """ + Extracts and cleanses the appropriate values from a match object with + groups for each component of an ISO 8601 duration + + Parameters + ---------- + ts: str + ISO 8601 Duration formatted string + + Returns + ------- + ns: int64_t + Precision in nanoseconds of matched ISO 8601 duration + + Raises + ------ + ValueError + If ``ts`` cannot be parsed + """ + + cdef: + unicode c + int64_t result = 0, r + int p = 0, sign = 1 + object dec_unit = 'ms', err_msg + bint have_dot = 0, have_value = 0, neg = 0 + list number = [], unit = [] + + err_msg = f"Invalid ISO 8601 Duration format - {ts}" + + if ts[0] == "-": + sign = -1 + ts = ts[1:] + + for c in ts: + # number (ascii codes) + if 48 <= ord(c) <= 57: + + have_value = 1 + if have_dot: + if p == 3 and dec_unit != 'ns': + unit.append(dec_unit) + if dec_unit == 'ms': + dec_unit = 'us' + elif dec_unit == 'us': + dec_unit = 'ns' + p = 0 + p += 1 + + if not len(unit): + number.append(c) + else: + r = timedelta_from_spec(number, '0', unit) + result += timedelta_as_neg(r, neg) + + neg = 0 + unit, number = [], [c] + else: + if c == 'P' or c == 'T': + pass # ignore marking characters P and T + elif c == '-': + if neg or have_value: + raise ValueError(err_msg) + else: + neg = 1 + elif c == "+": + pass + elif c in ['W', 'D', 'H', 'M']: + if c in ['H', 'M'] and len(number) > 2: + raise ValueError(err_msg) + if c == 'M': + c = 'min' + unit.append(c) + r = timedelta_from_spec(number, '0', unit) + result += timedelta_as_neg(r, neg) + + neg = 0 + unit, number = [], [] + elif c == '.': + # append any seconds + if len(number): + r = timedelta_from_spec(number, '0', 'S') + result += timedelta_as_neg(r, neg) + unit, number = [], [] + have_dot = 1 + elif c == 'S': + if have_dot: # ms, us, or ns + if not len(number) or p > 3: + raise ValueError(err_msg) + # pad to 3 digits as required + pad = 3 - p + while pad > 0: + number.append('0') + pad -= 1 + + r = timedelta_from_spec(number, '0', dec_unit) + result += timedelta_as_neg(r, neg) + else: # seconds + r = timedelta_from_spec(number, '0', 'S') + result += timedelta_as_neg(r, neg) + else: + raise ValueError(err_msg) + + if not have_value: + # Received string only - never parsed any values + raise ValueError(err_msg) + + return sign*result + + +cdef _to_py_int_float(v): + # Note: This used to be defined inside Timedelta.__new__ + # but cython will not allow `cdef` functions to be defined dynamically. + if is_integer_object(v): + return int(v) + elif is_float_object(v): + return float(v) + raise TypeError(f"Invalid type {type(v)}. Must be int or float.") + + +# Similar to Timestamp/datetime, this is a construction requirement for +# timedeltas that we need to do object instantiation in python. This will +# serve as a C extension type that shadows the Python class, where we do any +# heavy lifting. +cdef class _Timedelta(timedelta): + # cdef readonly: + # int64_t value # nanoseconds + # object freq # frequency reference + # bint is_populated # are my components populated + # int64_t _d, _h, _m, _s, _ms, _us, _ns + + # higher than np.ndarray and np.matrix + __array_priority__ = 100 + + def __hash__(_Timedelta self): + if self._has_ns(): + return hash(self.value) + else: + return timedelta.__hash__(self) + + def __richcmp__(_Timedelta self, object other, int op): + cdef: + _Timedelta ots + int ndim + + if isinstance(other, _Timedelta): + ots = other + elif is_any_td_scalar(other): + ots = Timedelta(other) + # TODO: watch out for overflows + + elif other is NaT: + return op == Py_NE + + elif util.is_array(other): + # TODO: watch out for zero-dim + if other.dtype.kind == "m": + return PyObject_RichCompare(self.asm8, other, op) + elif other.dtype.kind == "O": + # operate element-wise + return np.array( + [PyObject_RichCompare(self, x, op) for x in other], + dtype=bool, + ) + if op == Py_EQ: + return np.zeros(other.shape, dtype=bool) + elif op == Py_NE: + return np.ones(other.shape, dtype=bool) + return NotImplemented # let other raise TypeError + + else: + return NotImplemented + + return cmp_scalar(self.value, ots.value, op) + + cpdef bint _has_ns(self): + return self.value % 1000 != 0 + + def _ensure_components(_Timedelta self): + """ + compute the components + """ + if self.is_populated: + return + + cdef: + pandas_timedeltastruct tds + + td64_to_tdstruct(self.value, &tds) + self._d = tds.days + self._h = tds.hrs + self._m = tds.min + self._s = tds.sec + self._ms = tds.ms + self._us = tds.us + self._ns = tds.ns + self._seconds = tds.seconds + self._microseconds = tds.microseconds + + self.is_populated = 1 + + cpdef timedelta to_pytimedelta(_Timedelta self): + """ + Convert a pandas Timedelta object into a python ``datetime.timedelta`` object. + + Timedelta objects are internally saved as numpy datetime64[ns] dtype. + Use to_pytimedelta() to convert to object dtype. + + Returns + ------- + datetime.timedelta or numpy.array of datetime.timedelta + + See Also + -------- + to_timedelta : Convert argument to Timedelta type. + + Notes + ----- + Any nanosecond resolution will be lost. + """ + return timedelta(microseconds=int(self.value) / 1000) + + def to_timedelta64(self) -> np.timedelta64: + """ + Return a numpy.timedelta64 object with 'ns' precision. + """ + return np.timedelta64(self.value, 'ns') + + def to_numpy(self, dtype=None, copy=False) -> np.timedelta64: + """ + Convert the Timedelta to a NumPy timedelta64. + + .. versionadded:: 0.25.0 + + This is an alias method for `Timedelta.to_timedelta64()`. The dtype and + copy parameters are available here only for compatibility. Their values + will not affect the return value. + + Returns + ------- + numpy.timedelta64 + + See Also + -------- + Series.to_numpy : Similar method for Series. + """ + if dtype is not None or copy is not False: + raise ValueError( + "Timedelta.to_numpy dtype and copy arguments are ignored" + ) + return self.to_timedelta64() + + def view(self, dtype): + """ + Array view compatibility. + """ + return np.timedelta64(self.value).view(dtype) + + @property + def components(self): + """ + Return a components namedtuple-like. + """ + self._ensure_components() + # return the named tuple + return Components(self._d, self._h, self._m, self._s, + self._ms, self._us, self._ns) + + @property + def delta(self): + """ + Return the timedelta in nanoseconds (ns), for internal compatibility. + + Returns + ------- + int + Timedelta in nanoseconds. + + Examples + -------- + >>> td = pd.Timedelta('1 days 42 ns') + >>> td.delta + 86400000000042 + + >>> td = pd.Timedelta('3 s') + >>> td.delta + 3000000000 + + >>> td = pd.Timedelta('3 ms 5 us') + >>> td.delta + 3005000 + + >>> td = pd.Timedelta(42, unit='ns') + >>> td.delta + 42 + """ + return self.value + + @property + def asm8(self) -> np.timedelta64: + """ + Return a numpy timedelta64 array scalar view. + + Provides access to the array scalar view (i.e. a combination of the + value and the units) associated with the numpy.timedelta64().view(), + including a 64-bit integer representation of the timedelta in + nanoseconds (Python int compatible). + + Returns + ------- + numpy timedelta64 array scalar view + Array scalar view of the timedelta in nanoseconds. + + Examples + -------- + >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') + >>> td.asm8 + numpy.timedelta64(86520000003042,'ns') + + >>> td = pd.Timedelta('2 min 3 s') + >>> td.asm8 + numpy.timedelta64(123000000000,'ns') + + >>> td = pd.Timedelta('3 ms 5 us') + >>> td.asm8 + numpy.timedelta64(3005000,'ns') + + >>> td = pd.Timedelta(42, unit='ns') + >>> td.asm8 + numpy.timedelta64(42,'ns') + """ + return np.int64(self.value).view('m8[ns]') + + @property + def resolution_string(self) -> str: + """ + Return a string representing the lowest timedelta resolution. + + Each timedelta has a defined resolution that represents the lowest OR + most granular level of precision. Each level of resolution is + represented by a short string as defined below: + + Resolution: Return value + + * Days: 'D' + * Hours: 'H' + * Minutes: 'T' + * Seconds: 'S' + * Milliseconds: 'L' + * Microseconds: 'U' + * Nanoseconds: 'N' + + Returns + ------- + str + Timedelta resolution. + + Examples + -------- + >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') + >>> td.resolution_string + 'N' + + >>> td = pd.Timedelta('1 days 2 min 3 us') + >>> td.resolution_string + 'U' + + >>> td = pd.Timedelta('2 min 3 s') + >>> td.resolution_string + 'S' + + >>> td = pd.Timedelta(36, unit='us') + >>> td.resolution_string + 'U' + """ + self._ensure_components() + if self._ns: + return "N" + elif self._us: + return "U" + elif self._ms: + return "L" + elif self._s: + return "S" + elif self._m: + return "T" + elif self._h: + return "H" + else: + return "D" + + @property + def nanoseconds(self): + """ + Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. + + Returns + ------- + int + Number of nanoseconds. + + See Also + -------- + Timedelta.components : Return all attributes with assigned values + (i.e. days, hours, minutes, seconds, milliseconds, microseconds, + nanoseconds). + + Examples + -------- + **Using string input** + + >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') + + >>> td.nanoseconds + 42 + + **Using integer input** + + >>> td = pd.Timedelta(42, unit='ns') + >>> td.nanoseconds + 42 + """ + self._ensure_components() + return self._ns + + def _repr_base(self, format=None) -> str: + """ + + Parameters + ---------- + format : None|all|sub_day|long + + Returns + ------- + converted : string of a Timedelta + + """ + cdef object sign, seconds_pretty, subs, fmt, comp_dict + + self._ensure_components() + + if self._d < 0: + sign = " +" + else: + sign = " " + + if format == 'all': + fmt = ("{days} days{sign}{hours:02}:{minutes:02}:{seconds:02}." + "{milliseconds:03}{microseconds:03}{nanoseconds:03}") + else: + # if we have a partial day + subs = (self._h or self._m or self._s or + self._ms or self._us or self._ns) + + if self._ms or self._us or self._ns: + seconds_fmt = "{seconds:02}.{milliseconds:03}{microseconds:03}" + if self._ns: + # GH#9309 + seconds_fmt += "{nanoseconds:03}" + else: + seconds_fmt = "{seconds:02}" + + if format == 'sub_day' and not self._d: + fmt = "{hours:02}:{minutes:02}:" + seconds_fmt + elif subs or format == 'long': + fmt = "{days} days{sign}{hours:02}:{minutes:02}:" + seconds_fmt + else: + fmt = "{days} days" + + comp_dict = self.components._asdict() + comp_dict['sign'] = sign + + return fmt.format(**comp_dict) + + def __repr__(self) -> str: + repr_based = self._repr_base(format='long') + return f"Timedelta('{repr_based}')" + + def __str__(self) -> str: + return self._repr_base(format='long') + + def __bool__(self) -> bool: + return self.value != 0 + + def isoformat(self) -> str: + """ + Format Timedelta as ISO 8601 Duration like + ``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the + values. See https://en.wikipedia.org/wiki/ISO_8601#Durations. + + Returns + ------- + str + + See Also + -------- + Timestamp.isoformat : Function is used to convert the given + Timestamp object into the ISO format. + + Notes + ----- + The longest component is days, whose value may be larger than + 365. + Every component is always included, even if its value is 0. + Pandas uses nanosecond precision, so up to 9 decimal places may + be included in the seconds component. + Trailing 0's are removed from the seconds component after the decimal. + We do not 0 pad components, so it's `...T5H...`, not `...T05H...` + + Examples + -------- + >>> td = pd.Timedelta(days=6, minutes=50, seconds=3, + ... milliseconds=10, microseconds=10, nanoseconds=12) + + >>> td.isoformat() + 'P6DT0H50M3.010010012S' + >>> pd.Timedelta(hours=1, seconds=10).isoformat() + 'P0DT1H0M10S' + >>> pd.Timedelta(days=500.5).isoformat() + 'P500DT12H0M0S' + """ + components = self.components + seconds = (f'{components.seconds}.' + f'{components.milliseconds:0>3}' + f'{components.microseconds:0>3}' + f'{components.nanoseconds:0>3}') + # Trim unnecessary 0s, 1.000000000 -> 1 + seconds = seconds.rstrip('0').rstrip('.') + tpl = (f'P{components.days}DT{components.hours}' + f'H{components.minutes}M{seconds}S') + return tpl + + +# Python front end to C extension type _Timedelta +# This serves as the box for timedelta64 + +class Timedelta(_Timedelta): + """ + Represents a duration, the difference between two dates or times. + + Timedelta is the pandas equivalent of python's ``datetime.timedelta`` + and is interchangeable with it in most cases. + + Parameters + ---------- + value : Timedelta, timedelta, np.timedelta64, str, or int + unit : str, default 'ns' + Denote the unit of the input, if input is an integer. + + Possible values: + + * 'W', 'D', 'T', 'S', 'L', 'U', or 'N' + * 'days' or 'day' + * 'hours', 'hour', 'hr', or 'h' + * 'minutes', 'minute', 'min', or 'm' + * 'seconds', 'second', or 'sec' + * 'milliseconds', 'millisecond', 'millis', or 'milli' + * 'microseconds', 'microsecond', 'micros', or 'micro' + * 'nanoseconds', 'nanosecond', 'nanos', 'nano', or 'ns'. + + **kwargs + Available kwargs: {days, seconds, microseconds, + milliseconds, minutes, hours, weeks}. + Values for construction in compat with datetime.timedelta. + Numpy ints and floats will be coerced to python ints and floats. + + Notes + ----- + The constructor may take in either both values of value and unit or + kwargs as above. Either one of them must be used during initialization + + The ``.value`` attribute is always in ns. + + If the precision is higher than nanoseconds, the precision of the duration is + truncated to nanoseconds. + + Examples + -------- + Here we initialize Timedelta object with both value and unit + + >>> td = pd.Timedelta(1, "d") + >>> td + Timedelta('1 days 00:00:00') + + Here we initialize the Timedelta object with kwargs + + >>> td2 = pd.Timedelta(days=1) + >>> td2 + Timedelta('1 days 00:00:00') + + We see that either way we get the same result + """ + + _req_any_kwargs_new = {"weeks", "days", "hours", "minutes", "seconds", + "milliseconds", "microseconds", "nanoseconds"} + + def __new__(cls, object value=_no_input, unit=None, **kwargs): + cdef _Timedelta td_base + + if value is _no_input: + if not len(kwargs): + raise ValueError("cannot construct a Timedelta without a " + "value/unit or descriptive keywords " + "(days,seconds....)") + + kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} + + unsupported_kwargs = set(kwargs) + unsupported_kwargs.difference_update(cls._req_any_kwargs_new) + if unsupported_kwargs or not cls._req_any_kwargs_new.intersection(kwargs): + raise ValueError( + "cannot construct a Timedelta from the passed arguments, " + "allowed keywords are " + "[weeks, days, hours, minutes, seconds, " + "milliseconds, microseconds, nanoseconds]" + ) + + # GH43764, convert any input to nanoseconds first and then + # create the timestamp. This ensures that any potential + # nanosecond contributions from kwargs parsed as floats + # are taken into consideration. + seconds = int(( + ( + (kwargs.get('days', 0) + kwargs.get('weeks', 0) * 7) * 24 + + kwargs.get('hours', 0) + ) * 3600 + + kwargs.get('minutes', 0) * 60 + + kwargs.get('seconds', 0) + ) * 1_000_000_000 + ) + + value = np.timedelta64( + int(kwargs.get('nanoseconds', 0)) + + int(kwargs.get('microseconds', 0) * 1_000) + + int(kwargs.get('milliseconds', 0) * 1_000_000) + + seconds + ) + + if unit in {'Y', 'y', 'M'}: + raise ValueError( + "Units 'M', 'Y', and 'y' are no longer supported, as they do not " + "represent unambiguous timedelta values durations." + ) + + # GH 30543 if pd.Timedelta already passed, return it + # check that only value is passed + if isinstance(value, _Timedelta) and unit is None and len(kwargs) == 0: + return value + elif isinstance(value, _Timedelta): + value = value.value + elif isinstance(value, str): + if unit is not None: + raise ValueError("unit must not be specified if the value is a str") + if (len(value) > 0 and value[0] == 'P') or ( + len(value) > 1 and value[:2] == '-P' + ): + value = parse_iso_format_string(value) + else: + value = parse_timedelta_string(value) + value = np.timedelta64(value) + elif PyDelta_Check(value): + value = convert_to_timedelta64(value, 'ns') + elif is_timedelta64_object(value): + if unit is not None: + value = value.astype(f'timedelta64[{unit}]') + value = ensure_td64ns(value) + elif is_tick_object(value): + value = np.timedelta64(value.nanos, 'ns') + elif is_integer_object(value) or is_float_object(value): + # unit=None is de-facto 'ns' + unit = parse_timedelta_unit(unit) + value = convert_to_timedelta64(value, unit) + elif checknull_with_nat(value): + return NaT + else: + raise ValueError( + "Value must be Timedelta, string, integer, " + f"float, timedelta or convertible, not {type(value).__name__}" + ) + + if is_timedelta64_object(value): + value = value.view('i8') + + # nat + if value == NPY_NAT: + return NaT + + # make timedelta happy + td_base = _Timedelta.__new__(cls, microseconds=int(value) // 1000) + td_base.value = value + td_base.is_populated = 0 + return td_base + + def __setstate__(self, state): + (value) = state + self.value = value + + def __reduce__(self): + object_state = self.value, + return (Timedelta, object_state) + + @cython.cdivision(True) + def _round(self, freq, mode): + cdef: + int64_t result, unit, remainder + ndarray[int64_t] arr + + from pandas._libs.tslibs.offsets import to_offset + unit = to_offset(freq).nanos + + arr = np.array([self.value], dtype="i8") + result = round_nsint64(arr, mode, unit)[0] + return Timedelta(result, unit="ns") + + def round(self, freq): + """ + Round the Timedelta to the specified resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the rounding resolution. + + Returns + ------- + a new Timedelta rounded to the given resolution of `freq` + + Raises + ------ + ValueError if the freq cannot be converted + """ + return self._round(freq, RoundTo.NEAREST_HALF_EVEN) + + def floor(self, freq): + """ + Return a new Timedelta floored to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the flooring resolution. + """ + return self._round(freq, RoundTo.MINUS_INFTY) + + def ceil(self, freq): + """ + Return a new Timedelta ceiled to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the ceiling resolution. + """ + return self._round(freq, RoundTo.PLUS_INFTY) + + # ---------------------------------------------------------------- + # Arithmetic Methods + # TODO: Can some of these be defined in the cython class? + + __neg__ = _op_unary_method(lambda x: -x, '__neg__') + __pos__ = _op_unary_method(lambda x: x, '__pos__') + __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') + + __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') + __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') + __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') + __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') + + def __mul__(self, other): + if is_integer_object(other) or is_float_object(other): + return Timedelta(other * self.value, unit='ns') + + elif is_array(other): + # ndarray-like + return other * self.to_timedelta64() + + return NotImplemented + + __rmul__ = __mul__ + + def __truediv__(self, other): + if _should_cast_to_timedelta(other): + # We interpret NaT as timedelta64("NaT") + other = Timedelta(other) + if other is NaT: + return np.nan + return self.value / float(other.value) + + elif is_integer_object(other) or is_float_object(other): + # integers or floats + return Timedelta(self.value / other, unit='ns') + + elif is_array(other): + return self.to_timedelta64() / other + + return NotImplemented + + def __rtruediv__(self, other): + if _should_cast_to_timedelta(other): + # We interpret NaT as timedelta64("NaT") + other = Timedelta(other) + if other is NaT: + return np.nan + return float(other.value) / self.value + + elif is_array(other): + if other.dtype.kind == "O": + # GH#31869 + return np.array([x / self for x in other]) + return other / self.to_timedelta64() + + return NotImplemented + + def __floordiv__(self, other): + # numpy does not implement floordiv for timedelta64 dtype, so we cannot + # just defer + if _should_cast_to_timedelta(other): + # We interpret NaT as timedelta64("NaT") + other = Timedelta(other) + if other is NaT: + return np.nan + return self.value // other.value + + elif is_integer_object(other) or is_float_object(other): + return Timedelta(self.value // other, unit='ns') + + elif is_array(other): + if other.dtype.kind == 'm': + # also timedelta-like + return _broadcast_floordiv_td64(self.value, other, _floordiv) + elif other.dtype.kind in ['i', 'u', 'f']: + if other.ndim == 0: + return Timedelta(self.value // other) + else: + return self.to_timedelta64() // other + + raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__') + + return NotImplemented + + def __rfloordiv__(self, other): + # numpy does not implement floordiv for timedelta64 dtype, so we cannot + # just defer + if _should_cast_to_timedelta(other): + # We interpret NaT as timedelta64("NaT") + other = Timedelta(other) + if other is NaT: + return np.nan + return other.value // self.value + + elif is_array(other): + if other.dtype.kind == 'm': + # also timedelta-like + return _broadcast_floordiv_td64(self.value, other, _rfloordiv) + + # Includes integer array // Timedelta, disallowed in GH#19761 + raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__') + + return NotImplemented + + def __mod__(self, other): + # Naive implementation, room for optimization + return self.__divmod__(other)[1] + + def __rmod__(self, other): + # Naive implementation, room for optimization + return self.__rdivmod__(other)[1] + + def __divmod__(self, other): + # Naive implementation, room for optimization + div = self // other + return div, self - div * other + + def __rdivmod__(self, other): + # Naive implementation, room for optimization + div = other // self + return div, other - div * self + + +cdef bint is_any_td_scalar(object obj): + """ + Cython equivalent for `isinstance(obj, (timedelta, np.timedelta64, Tick))` + + Parameters + ---------- + obj : object + + Returns + ------- + bool + """ + return ( + PyDelta_Check(obj) or is_timedelta64_object(obj) or is_tick_object(obj) + ) + + +cdef bint _should_cast_to_timedelta(object obj): + """ + Should we treat this object as a Timedelta for the purpose of a binary op + """ + return ( + is_any_td_scalar(obj) or obj is None or obj is NaT or isinstance(obj, str) + ) + + +cdef _floordiv(int64_t value, right): + return value // right + + +cdef _rfloordiv(int64_t value, right): + # analogous to referencing operator.div, but there is no operator.rfloordiv + return right // value + + +cdef _broadcast_floordiv_td64( + int64_t value, + ndarray other, + object (*operation)(int64_t value, object right) +): + """ + Boilerplate code shared by Timedelta.__floordiv__ and + Timedelta.__rfloordiv__ because np.timedelta64 does not implement these. + + Parameters + ---------- + value : int64_t; `self.value` from a Timedelta object + other : object + operation : function, either _floordiv or _rfloordiv + + Returns + ------- + result : varies based on `other` + """ + # assumes other.dtype.kind == 'm', i.e. other is timedelta-like + + # We need to watch out for np.timedelta64('NaT'). + mask = other.view('i8') == NPY_NAT + + if other.ndim == 0: + if mask: + return np.nan + + return operation(value, other.astype('m8[ns]').astype('i8')) + + else: + res = operation(value, other.astype('m8[ns]').astype('i8')) + + if mask.any(): + res = res.astype('f8') + res[mask] = np.nan + return res + + +# resolution in ns +Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1) +Timedelta.max = Timedelta(np.iinfo(np.int64).max) +Timedelta.resolution = Timedelta(nanoseconds=1) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timestamps.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timestamps.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..339c420 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timestamps.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timestamps.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timestamps.pxd new file mode 100644 index 0000000..8833a61 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timestamps.pxd @@ -0,0 +1,30 @@ +from cpython.datetime cimport ( + datetime, + tzinfo, +) +from numpy cimport int64_t + +from pandas._libs.tslibs.base cimport ABCTimestamp +from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct + + +cdef object create_timestamp_from_ts(int64_t value, + npy_datetimestruct dts, + tzinfo tz, object freq, bint fold) + + +cdef class _Timestamp(ABCTimestamp): + cdef readonly: + int64_t value, nanosecond + object _freq + + cdef bint _get_start_end_field(self, str field, freq) + cdef _get_date_name_field(self, str field, object locale) + cdef int64_t _maybe_convert_value_to_local(self) + cdef bint _can_compare(self, datetime other) + cpdef to_datetime64(self) + cpdef datetime to_pydatetime(_Timestamp self, bint warn=*) + cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, + int op) except -1 + cpdef void _set_freq(self, freq) + cdef _warn_on_field_deprecation(_Timestamp self, freq, str field) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timestamps.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timestamps.pyi new file mode 100644 index 0000000..ecddd83 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timestamps.pyi @@ -0,0 +1,207 @@ +from datetime import ( + date as _date, + datetime, + time as _time, + timedelta, + tzinfo as _tzinfo, +) +from time import struct_time +from typing import ( + ClassVar, + TypeVar, + overload, +) + +import numpy as np + +from pandas._libs.tslibs import ( + BaseOffset, + NaTType, + Period, + Timedelta, +) + +_DatetimeT = TypeVar("_DatetimeT", bound=datetime) + +def integer_op_not_supported(obj: object) -> TypeError: ... + +class Timestamp(datetime): + min: ClassVar[Timestamp] + max: ClassVar[Timestamp] + + resolution: ClassVar[Timedelta] + value: int # np.int64 + + # error: "__new__" must return a class instance (got "Union[Timestamp, NaTType]") + def __new__( # type: ignore[misc] + cls: type[_DatetimeT], + ts_input: int + | np.integer + | float + | str + | _date + | datetime + | np.datetime64 = ..., + freq: int | None | str | BaseOffset = ..., + tz: str | _tzinfo | None | int = ..., + unit: str | int | None = ..., + year: int | None = ..., + month: int | None = ..., + day: int | None = ..., + hour: int | None = ..., + minute: int | None = ..., + second: int | None = ..., + microsecond: int | None = ..., + nanosecond: int | None = ..., + tzinfo: _tzinfo | None = ..., + *, + fold: int | None = ..., + ) -> _DatetimeT | NaTType: ... + def _set_freq(self, freq: BaseOffset | None) -> None: ... + @property + def year(self) -> int: ... + @property + def month(self) -> int: ... + @property + def day(self) -> int: ... + @property + def hour(self) -> int: ... + @property + def minute(self) -> int: ... + @property + def second(self) -> int: ... + @property + def microsecond(self) -> int: ... + @property + def tzinfo(self) -> _tzinfo | None: ... + @property + def tz(self) -> _tzinfo | None: ... + @property + def fold(self) -> int: ... + @classmethod + def fromtimestamp( + cls: type[_DatetimeT], t: float, tz: _tzinfo | None = ... + ) -> _DatetimeT: ... + @classmethod + def utcfromtimestamp(cls: type[_DatetimeT], t: float) -> _DatetimeT: ... + @classmethod + def today(cls: type[_DatetimeT], tz: _tzinfo | str | None = ...) -> _DatetimeT: ... + @classmethod + def fromordinal( + cls: type[_DatetimeT], + ordinal: int, + freq: str | BaseOffset | None = ..., + tz: _tzinfo | str | None = ..., + ) -> _DatetimeT: ... + @classmethod + def now(cls: type[_DatetimeT], tz: _tzinfo | str | None = ...) -> _DatetimeT: ... + @classmethod + def utcnow(cls: type[_DatetimeT]) -> _DatetimeT: ... + # error: Signature of "combine" incompatible with supertype "datetime" + @classmethod + def combine(cls, date: _date, time: _time) -> datetime: ... # type: ignore[override] + @classmethod + def fromisoformat(cls: type[_DatetimeT], date_string: str) -> _DatetimeT: ... + def strftime(self, format: str) -> str: ... + def __format__(self, fmt: str) -> str: ... + def toordinal(self) -> int: ... + def timetuple(self) -> struct_time: ... + def timestamp(self) -> float: ... + def utctimetuple(self) -> struct_time: ... + def date(self) -> _date: ... + def time(self) -> _time: ... + def timetz(self) -> _time: ... + def replace( + self, + year: int = ..., + month: int = ..., + day: int = ..., + hour: int = ..., + minute: int = ..., + second: int = ..., + microsecond: int = ..., + tzinfo: _tzinfo | None = ..., + fold: int = ..., + ) -> datetime: ... + def astimezone(self: _DatetimeT, tz: _tzinfo | None = ...) -> _DatetimeT: ... + def ctime(self) -> str: ... + def isoformat(self, sep: str = ..., timespec: str = ...) -> str: ... + @classmethod + def strptime(cls, date_string: str, format: str) -> datetime: ... + def utcoffset(self) -> timedelta | None: ... + def tzname(self) -> str | None: ... + def dst(self) -> timedelta | None: ... + def __le__(self, other: datetime) -> bool: ... # type: ignore + def __lt__(self, other: datetime) -> bool: ... # type: ignore + def __ge__(self, other: datetime) -> bool: ... # type: ignore + def __gt__(self, other: datetime) -> bool: ... # type: ignore + # error: Signature of "__add__" incompatible with supertype "date"/"datetime" + @overload # type: ignore[override] + def __add__(self, other: np.ndarray) -> np.ndarray: ... + @overload + # TODO: other can also be Tick (but it cannot be resolved) + def __add__(self: _DatetimeT, other: timedelta | np.timedelta64) -> _DatetimeT: ... + def __radd__(self: _DatetimeT, other: timedelta) -> _DatetimeT: ... + @overload # type: ignore + def __sub__(self, other: datetime) -> timedelta: ... + @overload + # TODO: other can also be Tick (but it cannot be resolved) + def __sub__(self, other: timedelta | np.timedelta64) -> datetime: ... + def __hash__(self) -> int: ... + def weekday(self) -> int: ... + def isoweekday(self) -> int: ... + def isocalendar(self) -> tuple[int, int, int]: ... + @property + def is_leap_year(self) -> bool: ... + @property + def is_month_start(self) -> bool: ... + @property + def is_quarter_start(self) -> bool: ... + @property + def is_year_start(self) -> bool: ... + @property + def is_month_end(self) -> bool: ... + @property + def is_quarter_end(self) -> bool: ... + @property + def is_year_end(self) -> bool: ... + def to_pydatetime(self, warn: bool = ...) -> datetime: ... + def to_datetime64(self) -> np.datetime64: ... + def to_period(self, freq: BaseOffset | str | None = ...) -> Period: ... + def to_julian_date(self) -> np.float64: ... + @property + def asm8(self) -> np.datetime64: ... + def tz_convert(self: _DatetimeT, tz: _tzinfo | str | None) -> _DatetimeT: ... + # TODO: could return NaT? + def tz_localize( + self: _DatetimeT, + tz: _tzinfo | str | None, + ambiguous: str = ..., + nonexistent: str = ..., + ) -> _DatetimeT: ... + def normalize(self: _DatetimeT) -> _DatetimeT: ... + # TODO: round/floor/ceil could return NaT? + def round( + self: _DatetimeT, freq: str, ambiguous: bool | str = ..., nonexistent: str = ... + ) -> _DatetimeT: ... + def floor( + self: _DatetimeT, freq: str, ambiguous: bool | str = ..., nonexistent: str = ... + ) -> _DatetimeT: ... + def ceil( + self: _DatetimeT, freq: str, ambiguous: bool | str = ..., nonexistent: str = ... + ) -> _DatetimeT: ... + def day_name(self, locale: str | None = ...) -> str: ... + def month_name(self, locale: str | None = ...) -> str: ... + @property + def day_of_week(self) -> int: ... + @property + def day_of_month(self) -> int: ... + @property + def day_of_year(self) -> int: ... + @property + def quarter(self) -> int: ... + @property + def week(self) -> int: ... + def to_numpy( + self, dtype: np.dtype | None = ..., copy: bool = ... + ) -> np.datetime64: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timestamps.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timestamps.pyx new file mode 100644 index 0000000..92a00d6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timestamps.pyx @@ -0,0 +1,2097 @@ +""" +_Timestamp is a c-defined subclass of datetime.datetime + +_Timestamp is PITA. Because we inherit from datetime, which has very specific +construction requirements, we need to do object instantiation in python +(see Timestamp class below). This will serve as a C extension type that +shadows the python class, where we do any heavy lifting. +""" +import warnings + +cimport cython + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + int8_t, + int64_t, + ndarray, + uint8_t, +) + +cnp.import_array() + +from cpython.datetime cimport ( # alias bc `tzinfo` is a kwarg below + PyDate_Check, + PyDateTime_Check, + PyDateTime_IMPORT, + PyDelta_Check, + PyTZInfo_Check, + datetime, + time, + tzinfo as tzinfo_type, +) +from cpython.object cimport ( + Py_EQ, + Py_GE, + Py_GT, + Py_LE, + Py_LT, + Py_NE, + PyObject_RichCompare, + PyObject_RichCompareBool, +) + +PyDateTime_IMPORT + +from pandas._libs.tslibs cimport ccalendar +from pandas._libs.tslibs.base cimport ABCTimestamp +from pandas._libs.tslibs.conversion cimport ( + _TSObject, + convert_datetime_to_tsobject, + convert_to_tsobject, + normalize_i8_stamp, +) +from pandas._libs.tslibs.util cimport ( + is_array, + is_datetime64_object, + is_float_object, + is_integer_object, + is_timedelta64_object, +) + +from pandas._libs.tslibs.fields import ( + RoundTo, + get_date_name_field, + get_start_end_field, + round_nsint64, +) + +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, +) +from pandas._libs.tslibs.np_datetime cimport ( + check_dts_bounds, + cmp_scalar, + dt64_to_dtstruct, + npy_datetimestruct, + pydatetime_to_dt64, +) + +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime + +from pandas._libs.tslibs.offsets cimport ( + is_offset_object, + to_offset, +) +from pandas._libs.tslibs.timedeltas cimport ( + delta_to_nanoseconds, + is_any_td_scalar, +) + +from pandas._libs.tslibs.timedeltas import Timedelta + +from pandas._libs.tslibs.timezones cimport ( + get_timezone, + is_utc, + maybe_get_tz, + treat_tz_as_pytz, + tz_compare, + utc_pytz as UTC, +) +from pandas._libs.tslibs.tzconversion cimport ( + tz_convert_from_utc_single, + tz_localize_to_utc_single, +) + +# ---------------------------------------------------------------------- +# Constants +_zero_time = time(0, 0) +_no_input = object() + +# ---------------------------------------------------------------------- + + +cdef inline object create_timestamp_from_ts(int64_t value, + npy_datetimestruct dts, + tzinfo tz, object freq, bint fold): + """ convenience routine to construct a Timestamp from its parts """ + cdef _Timestamp ts_base + ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month, + dts.day, dts.hour, dts.min, + dts.sec, dts.us, tz, fold=fold) + ts_base.value = value + ts_base._freq = freq + ts_base.nanosecond = dts.ps // 1000 + + return ts_base + + +def _unpickle_timestamp(value, freq, tz): + # GH#41949 dont warn on unpickle if we have a freq + ts = Timestamp(value, tz=tz) + ts._set_freq(freq) + return ts + + +# ---------------------------------------------------------------------- + +def integer_op_not_supported(obj): + # GH#22535 add/sub of integers and int-arrays is no longer allowed + # Note we return rather than raise the exception so we can raise in + # the caller; mypy finds this more palatable. + cls = type(obj).__name__ + + # GH#30886 using an fstring raises SystemError + int_addsub_msg = ( + f"Addition/subtraction of integers and integer-arrays with {cls} is " + "no longer supported. Instead of adding/subtracting `n`, " + "use `n * obj.freq`" + ) + return TypeError(int_addsub_msg) + + +# ---------------------------------------------------------------------- + +cdef class _Timestamp(ABCTimestamp): + + # higher than np.ndarray and np.matrix + __array_priority__ = 100 + dayofweek = _Timestamp.day_of_week + dayofyear = _Timestamp.day_of_year + + cpdef void _set_freq(self, freq): + # set the ._freq attribute without going through the constructor, + # which would issue a warning + # Caller is responsible for validation + self._freq = freq + + @property + def freq(self): + warnings.warn( + "Timestamp.freq is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=1, + ) + return self._freq + + def __hash__(_Timestamp self): + if self.nanosecond: + return hash(self.value) + if self.fold: + return datetime.__hash__(self.replace(fold=0)) + return datetime.__hash__(self) + + def __richcmp__(_Timestamp self, object other, int op): + cdef: + _Timestamp ots + int ndim + + if isinstance(other, _Timestamp): + ots = other + elif other is NaT: + return op == Py_NE + elif PyDateTime_Check(other) or is_datetime64_object(other): + if self.nanosecond == 0 and PyDateTime_Check(other): + val = self.to_pydatetime() + return PyObject_RichCompareBool(val, other, op) + + try: + ots = type(self)(other) + except ValueError: + if is_datetime64_object(other): + # cast non-nano dt64 to pydatetime + other = other.astype(object) + return self._compare_outside_nanorange(other, op) + + elif is_array(other): + # avoid recursion error GH#15183 + if other.dtype.kind == "M": + if self.tz is None: + return PyObject_RichCompare(self.asm8, other, op) + elif op == Py_NE: + return np.ones(other.shape, dtype=np.bool_) + elif op == Py_EQ: + return np.zeros(other.shape, dtype=np.bool_) + raise TypeError( + "Cannot compare tz-naive and tz-aware timestamps" + ) + elif other.dtype.kind == "O": + # Operate element-wise + return np.array( + [PyObject_RichCompare(self, x, op) for x in other], + dtype=bool, + ) + elif op == Py_NE: + return np.ones(other.shape, dtype=np.bool_) + elif op == Py_EQ: + return np.zeros(other.shape, dtype=np.bool_) + return NotImplemented + + elif PyDate_Check(other): + # returning NotImplemented defers to the `date` implementation + # which incorrectly drops tz and normalizes to midnight + # before comparing + # We follow the stdlib datetime behavior of never being equal + warnings.warn( + "Comparison of Timestamp with datetime.date is deprecated in " + "order to match the standard library behavior. " + "In a future version these will be considered non-comparable. " + "Use 'ts == pd.Timestamp(date)' or 'ts.date() == date' instead.", + FutureWarning, + stacklevel=1, + ) + return NotImplemented + else: + return NotImplemented + + if not self._can_compare(ots): + if op == Py_NE or op == Py_EQ: + return NotImplemented + raise TypeError( + "Cannot compare tz-naive and tz-aware timestamps" + ) + return cmp_scalar(self.value, ots.value, op) + + cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, + int op) except -1: + cdef: + datetime dtval = self.to_pydatetime(warn=False) + + if not self._can_compare(other): + return NotImplemented + + if self.nanosecond == 0: + return PyObject_RichCompareBool(dtval, other, op) + + # otherwise we have dtval < self + if op == Py_NE: + return True + if op == Py_EQ: + return False + if op == Py_LE or op == Py_LT: + return self.year <= other.year + if op == Py_GE or op == Py_GT: + return self.year >= other.year + + cdef bint _can_compare(self, datetime other): + if self.tzinfo is not None: + return other.tzinfo is not None + return other.tzinfo is None + + def __add__(self, other): + cdef: + int64_t nanos = 0 + + if is_any_td_scalar(other): + nanos = delta_to_nanoseconds(other) + result = type(self)(self.value + nanos, tz=self.tzinfo) + if result is not NaT: + result._set_freq(self._freq) # avoid warning in constructor + return result + + elif is_integer_object(other): + raise integer_op_not_supported(self) + + elif is_array(other): + if other.dtype.kind in ['i', 'u']: + raise integer_op_not_supported(self) + if other.dtype.kind == "m": + if self.tz is None: + return self.asm8 + other + return np.asarray( + [self + other[n] for n in range(len(other))], + dtype=object, + ) + + elif not isinstance(self, _Timestamp): + # cython semantics, args have been switched and this is __radd__ + return other.__add__(self) + return NotImplemented + + def __sub__(self, other): + + if is_any_td_scalar(other) or is_integer_object(other): + neg_other = -other + return self + neg_other + + elif is_array(other): + if other.dtype.kind in ['i', 'u']: + raise integer_op_not_supported(self) + if other.dtype.kind == "m": + if self.tz is None: + return self.asm8 - other + return np.asarray( + [self - other[n] for n in range(len(other))], + dtype=object, + ) + return NotImplemented + + if other is NaT: + return NaT + + # coerce if necessary if we are a Timestamp-like + if (PyDateTime_Check(self) + and (PyDateTime_Check(other) or is_datetime64_object(other))): + # both_timestamps is to determine whether Timedelta(self - other) + # should raise the OOB error, or fall back returning a timedelta. + both_timestamps = (isinstance(other, _Timestamp) and + isinstance(self, _Timestamp)) + if isinstance(self, _Timestamp): + other = type(self)(other) + else: + self = type(other)(self) + + if (self.tzinfo is None) ^ (other.tzinfo is None): + raise TypeError( + "Cannot subtract tz-naive and tz-aware datetime-like objects." + ) + + # scalar Timestamp/datetime - Timestamp/datetime -> yields a + # Timedelta + try: + return Timedelta(self.value - other.value) + except (OverflowError, OutOfBoundsDatetime) as err: + if isinstance(other, _Timestamp): + if both_timestamps: + raise OutOfBoundsDatetime( + "Result is too large for pandas.Timedelta. Convert inputs " + "to datetime.datetime with 'Timestamp.to_pydatetime()' " + "before subtracting." + ) from err + # We get here in stata tests, fall back to stdlib datetime + # method and return stdlib timedelta object + pass + elif is_datetime64_object(self): + # GH#28286 cython semantics for __rsub__, `other` is actually + # the Timestamp + return type(other)(self) - other + + return NotImplemented + + # ----------------------------------------------------------------- + + cdef int64_t _maybe_convert_value_to_local(self): + """Convert UTC i8 value to local i8 value if tz exists""" + cdef: + int64_t val + tzinfo own_tz = self.tzinfo + npy_datetimestruct dts + + if own_tz is not None and not is_utc(own_tz): + val = pydatetime_to_dt64(self, &dts) + self.nanosecond + else: + val = self.value + return val + + cdef bint _get_start_end_field(self, str field, freq): + cdef: + int64_t val + dict kwds + ndarray[uint8_t, cast=True] out + int month_kw + + if freq: + kwds = freq.kwds + month_kw = kwds.get('startingMonth', kwds.get('month', 12)) + freqstr = self._freqstr + else: + month_kw = 12 + freqstr = None + + val = self._maybe_convert_value_to_local() + out = get_start_end_field(np.array([val], dtype=np.int64), + field, freqstr, month_kw) + return out[0] + + cdef _warn_on_field_deprecation(self, freq, str field): + """ + Warn if the removal of .freq change the value of start/end properties. + """ + cdef: + bint needs = False + + if freq is not None: + kwds = freq.kwds + month_kw = kwds.get("startingMonth", kwds.get("month", 12)) + freqstr = self._freqstr + if month_kw != 12: + needs = True + if freqstr.startswith("B"): + needs = True + + if needs: + warnings.warn( + "Timestamp.freq is deprecated and will be removed in a future " + "version. When you have a freq, use " + f"freq.{field}(timestamp) instead.", + FutureWarning, + stacklevel=1, + ) + + @property + def is_month_start(self) -> bool: + """ + Return True if date is first day of month. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.is_month_start + False + + >>> ts = pd.Timestamp(2020, 1, 1) + >>> ts.is_month_start + True + """ + if self._freq is None: + # fast-path for non-business frequencies + return self.day == 1 + self._warn_on_field_deprecation(self._freq, "is_month_start") + return self._get_start_end_field("is_month_start", self._freq) + + @property + def is_month_end(self) -> bool: + """ + Return True if date is last day of month. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.is_month_end + False + + >>> ts = pd.Timestamp(2020, 12, 31) + >>> ts.is_month_end + True + """ + if self._freq is None: + # fast-path for non-business frequencies + return self.day == self.days_in_month + self._warn_on_field_deprecation(self._freq, "is_month_end") + return self._get_start_end_field("is_month_end", self._freq) + + @property + def is_quarter_start(self) -> bool: + """ + Return True if date is first day of the quarter. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.is_quarter_start + False + + >>> ts = pd.Timestamp(2020, 4, 1) + >>> ts.is_quarter_start + True + """ + if self._freq is None: + # fast-path for non-business frequencies + return self.day == 1 and self.month % 3 == 1 + self._warn_on_field_deprecation(self._freq, "is_quarter_start") + return self._get_start_end_field("is_quarter_start", self._freq) + + @property + def is_quarter_end(self) -> bool: + """ + Return True if date is last day of the quarter. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.is_quarter_end + False + + >>> ts = pd.Timestamp(2020, 3, 31) + >>> ts.is_quarter_end + True + """ + if self._freq is None: + # fast-path for non-business frequencies + return (self.month % 3) == 0 and self.day == self.days_in_month + self._warn_on_field_deprecation(self._freq, "is_quarter_end") + return self._get_start_end_field("is_quarter_end", self._freq) + + @property + def is_year_start(self) -> bool: + """ + Return True if date is first day of the year. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.is_year_start + False + + >>> ts = pd.Timestamp(2020, 1, 1) + >>> ts.is_year_start + True + """ + if self._freq is None: + # fast-path for non-business frequencies + return self.day == self.month == 1 + self._warn_on_field_deprecation(self._freq, "is_year_start") + return self._get_start_end_field("is_year_start", self._freq) + + @property + def is_year_end(self) -> bool: + """ + Return True if date is last day of the year. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.is_year_end + False + + >>> ts = pd.Timestamp(2020, 12, 31) + >>> ts.is_year_end + True + """ + if self._freq is None: + # fast-path for non-business frequencies + return self.month == 12 and self.day == 31 + self._warn_on_field_deprecation(self._freq, "is_year_end") + return self._get_start_end_field("is_year_end", self._freq) + + cdef _get_date_name_field(self, str field, object locale): + cdef: + int64_t val + object[:] out + + val = self._maybe_convert_value_to_local() + out = get_date_name_field(np.array([val], dtype=np.int64), + field, locale=locale) + return out[0] + + def day_name(self, locale=None) -> str: + """ + Return the day name of the Timestamp with specified locale. + + Parameters + ---------- + locale : str, default None (English locale) + Locale determining the language in which to return the day name. + + Returns + ------- + str + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.day_name() + 'Saturday' + + Analogous for ``pd.NaT``: + + >>> pd.NaT.day_name() + nan + """ + return self._get_date_name_field("day_name", locale) + + def month_name(self, locale=None) -> str: + """ + Return the month name of the Timestamp with specified locale. + + Parameters + ---------- + locale : str, default None (English locale) + Locale determining the language in which to return the month name. + + Returns + ------- + str + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.month_name() + 'March' + + Analogous for ``pd.NaT``: + + >>> pd.NaT.month_name() + nan + """ + return self._get_date_name_field("month_name", locale) + + @property + def is_leap_year(self) -> bool: + """ + Return True if year is a leap year. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.is_leap_year + True + """ + return bool(ccalendar.is_leapyear(self.year)) + + @property + def day_of_week(self) -> int: + """ + Return day of the week. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.day_of_week + 5 + """ + return self.weekday() + + @property + def day_of_year(self) -> int: + """ + Return the day of the year. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.day_of_year + 74 + """ + return ccalendar.get_day_of_year(self.year, self.month, self.day) + + @property + def quarter(self) -> int: + """ + Return the quarter of the year. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.quarter + 1 + """ + return ((self.month - 1) // 3) + 1 + + @property + def week(self) -> int: + """ + Return the week number of the year. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.week + 11 + """ + return ccalendar.get_week_of_year(self.year, self.month, self.day) + + @property + def days_in_month(self) -> int: + """ + Return the number of days in the month. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.days_in_month + 31 + """ + return ccalendar.get_days_in_month(self.year, self.month) + + # ----------------------------------------------------------------- + # Transformation Methods + + def normalize(self) -> "Timestamp": + """ + Normalize Timestamp to midnight, preserving tz information. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14, 15, 30) + >>> ts.normalize() + Timestamp('2020-03-14 00:00:00') + """ + cdef: + local_val = self._maybe_convert_value_to_local() + int64_t normalized + + normalized = normalize_i8_stamp(local_val) + return Timestamp(normalized).tz_localize(self.tzinfo) + + # ----------------------------------------------------------------- + # Pickle Methods + + def __reduce_ex__(self, protocol): + # python 3.6 compat + # https://bugs.python.org/issue28730 + # now __reduce_ex__ is defined and higher priority than __reduce__ + return self.__reduce__() + + def __setstate__(self, state): + self.value = state[0] + self._freq = state[1] + self.tzinfo = state[2] + + def __reduce__(self): + object_state = self.value, self._freq, self.tzinfo + return (_unpickle_timestamp, object_state) + + # ----------------------------------------------------------------- + # Rendering Methods + + def isoformat(self, sep: str = "T", timespec: str = "auto") -> str: + """ + Return the time formatted according to ISO 8610. + + The full format looks like 'YYYY-MM-DD HH:MM:SS.mmmmmmnnn'. + By default, the fractional part is omitted if self.microsecond == 0 + and self.nanosecond == 0. + + If self.tzinfo is not None, the UTC offset is also attached, giving + giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmmnnn+HH:MM'. + + Parameters + ---------- + sep : str, default 'T' + String used as the separator between the date and time. + + timespec : str, default 'auto' + Specifies the number of additional terms of the time to include. + The valid values are 'auto', 'hours', 'minutes', 'seconds', + 'milliseconds', 'microseconds', and 'nanoseconds'. + + Returns + ------- + str + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.isoformat() + '2020-03-14T15:32:52.192548651' + >>> ts.isoformat(timespec='microseconds') + '2020-03-14T15:32:52.192548' + """ + base_ts = "microseconds" if timespec == "nanoseconds" else timespec + base = super(_Timestamp, self).isoformat(sep=sep, timespec=base_ts) + if self.nanosecond == 0 and timespec != "nanoseconds": + return base + + if self.tzinfo is not None: + base1, base2 = base[:-6], base[-6:] + else: + base1, base2 = base, "" + + if timespec == "nanoseconds" or (timespec == "auto" and self.nanosecond): + if self.microsecond: + base1 += f"{self.nanosecond:03d}" + else: + base1 += f".{self.nanosecond:09d}" + + return base1 + base2 + + def __repr__(self) -> str: + stamp = self._repr_base + zone = None + + try: + stamp += self.strftime('%z') + except ValueError: + year2000 = self.replace(year=2000) + stamp += year2000.strftime('%z') + + if self.tzinfo: + zone = get_timezone(self.tzinfo) + try: + stamp += zone.strftime(' %%Z') + except AttributeError: + # e.g. tzlocal has no `strftime` + pass + + tz = f", tz='{zone}'" if zone is not None else "" + freq = "" if self._freq is None else f", freq='{self._freqstr}'" + + return f"Timestamp('{stamp}'{tz}{freq})" + + @property + def _repr_base(self) -> str: + return f"{self._date_repr} {self._time_repr}" + + @property + def _date_repr(self) -> str: + # Ideal here would be self.strftime("%Y-%m-%d"), but + # the datetime strftime() methods require year >= 1900 + return f'{self.year}-{self.month:02d}-{self.day:02d}' + + @property + def _time_repr(self) -> str: + result = f'{self.hour:02d}:{self.minute:02d}:{self.second:02d}' + + if self.nanosecond != 0: + result += f'.{self.nanosecond + 1000 * self.microsecond:09d}' + elif self.microsecond != 0: + result += f'.{self.microsecond:06d}' + + return result + + @property + def _short_repr(self) -> str: + # format a Timestamp with only _date_repr if possible + # otherwise _repr_base + if (self.hour == 0 and + self.minute == 0 and + self.second == 0 and + self.microsecond == 0 and + self.nanosecond == 0): + return self._date_repr + return self._repr_base + + # ----------------------------------------------------------------- + # Conversion Methods + + @property + def asm8(self) -> np.datetime64: + """ + Return numpy datetime64 format in nanoseconds. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14, 15) + >>> ts.asm8 + numpy.datetime64('2020-03-14T15:00:00.000000000') + """ + return np.datetime64(self.value, 'ns') + + def timestamp(self): + """ + Return POSIX timestamp as float. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548') + >>> ts.timestamp() + 1584199972.192548 + """ + # GH 17329 + # Note: Naive timestamps will not match datetime.stdlib + return round(self.value / 1e9, 6) + + cpdef datetime to_pydatetime(_Timestamp self, bint warn=True): + """ + Convert a Timestamp object to a native Python datetime object. + + If warn=True, issue a warning if nanoseconds is nonzero. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548') + >>> ts.to_pydatetime() + datetime.datetime(2020, 3, 14, 15, 32, 52, 192548) + + Analogous for ``pd.NaT``: + + >>> pd.NaT.to_pydatetime() + NaT + """ + if self.nanosecond != 0 and warn: + warnings.warn("Discarding nonzero nanoseconds in conversion.", + UserWarning, stacklevel=2) + + return datetime(self.year, self.month, self.day, + self.hour, self.minute, self.second, + self.microsecond, self.tzinfo, fold=self.fold) + + cpdef to_datetime64(self): + """ + Return a numpy.datetime64 object with 'ns' precision. + """ + return np.datetime64(self.value, "ns") + + def to_numpy(self, dtype=None, copy=False) -> np.datetime64: + """ + Convert the Timestamp to a NumPy datetime64. + + .. versionadded:: 0.25.0 + + This is an alias method for `Timestamp.to_datetime64()`. The dtype and + copy parameters are available here only for compatibility. Their values + will not affect the return value. + + Returns + ------- + numpy.datetime64 + + See Also + -------- + DatetimeIndex.to_numpy : Similar method for DatetimeIndex. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.to_numpy() + numpy.datetime64('2020-03-14T15:32:52.192548651') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.to_numpy() + numpy.datetime64('NaT') + """ + if dtype is not None or copy is not False: + raise ValueError( + "Timestamp.to_numpy dtype and copy arguments are ignored." + ) + return self.to_datetime64() + + def to_period(self, freq=None): + """ + Return an period of which this timestamp is an observation. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> # Year end frequency + >>> ts.to_period(freq='Y') + Period('2020', 'A-DEC') + + >>> # Month end frequency + >>> ts.to_period(freq='M') + Period('2020-03', 'M') + + >>> # Weekly frequency + >>> ts.to_period(freq='W') + Period('2020-03-09/2020-03-15', 'W-SUN') + + >>> # Quarter end frequency + >>> ts.to_period(freq='Q') + Period('2020Q1', 'Q-DEC') + """ + from pandas import Period + + if self.tz is not None: + # GH#21333 + warnings.warn( + "Converting to Period representation will drop timezone information.", + UserWarning, + ) + + if freq is None: + freq = self._freq + warnings.warn( + "In a future version, calling 'Timestamp.to_period()' without " + "passing a 'freq' will raise an exception.", + FutureWarning, + stacklevel=2, + ) + + return Period(self, freq=freq) + + +# ---------------------------------------------------------------------- + +# Python front end to C extension type _Timestamp +# This serves as the box for datetime64 + + +class Timestamp(_Timestamp): + """ + Pandas replacement for python datetime.datetime object. + + Timestamp is the pandas equivalent of python's Datetime + and is interchangeable with it in most cases. It's the type used + for the entries that make up a DatetimeIndex, and other timeseries + oriented data structures in pandas. + + Parameters + ---------- + ts_input : datetime-like, str, int, float + Value to be converted to Timestamp. + freq : str, DateOffset + Offset which Timestamp will have. + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will have. + unit : str + Unit used for conversion if ts_input is of type int or float. The + valid values are 'D', 'h', 'm', 's', 'ms', 'us', and 'ns'. For + example, 's' means seconds and 'ms' means milliseconds. + year, month, day : int + hour, minute, second, microsecond : int, optional, default 0 + nanosecond : int, optional, default 0 + tzinfo : datetime.tzinfo, optional, default None + fold : {0, 1}, default None, keyword-only + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time. + + .. versionadded:: 1.1.0 + + Notes + ----- + There are essentially three calling conventions for the constructor. The + primary form accepts four parameters. They can be passed by position or + keyword. + + The other two forms mimic the parameters from ``datetime.datetime``. They + can be passed by either position or keyword, but not both mixed together. + + Examples + -------- + Using the primary calling convention: + + This converts a datetime-like string + + >>> pd.Timestamp('2017-01-01T12') + Timestamp('2017-01-01 12:00:00') + + This converts a float representing a Unix epoch in units of seconds + + >>> pd.Timestamp(1513393355.5, unit='s') + Timestamp('2017-12-16 03:02:35.500000') + + This converts an int representing a Unix-epoch in units of seconds + and for a particular timezone + + >>> pd.Timestamp(1513393355, unit='s', tz='US/Pacific') + Timestamp('2017-12-15 19:02:35-0800', tz='US/Pacific') + + Using the other two forms that mimic the API for ``datetime.datetime``: + + >>> pd.Timestamp(2017, 1, 1, 12) + Timestamp('2017-01-01 12:00:00') + + >>> pd.Timestamp(year=2017, month=1, day=1, hour=12) + Timestamp('2017-01-01 12:00:00') + """ + + @classmethod + def fromordinal(cls, ordinal, freq=None, tz=None): + """ + Timestamp.fromordinal(ordinal, freq=None, tz=None) + + Passed an ordinal, translate and convert to a ts. + Note: by definition there cannot be any tz info on the ordinal itself. + + Parameters + ---------- + ordinal : int + Date corresponding to a proleptic Gregorian ordinal. + freq : str, DateOffset + Offset to apply to the Timestamp. + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for the Timestamp. + + Examples + -------- + >>> pd.Timestamp.fromordinal(737425) + Timestamp('2020-01-01 00:00:00') + """ + return cls(datetime.fromordinal(ordinal), + freq=freq, tz=tz) + + @classmethod + def now(cls, tz=None): + """ + Timestamp.now(tz=None) + + Return new Timestamp object representing current time local to + tz. + + Parameters + ---------- + tz : str or timezone object, default None + Timezone to localize to. + + Examples + -------- + >>> pd.Timestamp.now() # doctest: +SKIP + Timestamp('2020-11-16 22:06:16.378782') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.now() + NaT + """ + if isinstance(tz, str): + tz = maybe_get_tz(tz) + return cls(datetime.now(tz)) + + @classmethod + def today(cls, tz=None): + """ + Timestamp.today(cls, tz=None) + + Return the current time in the local timezone. This differs + from datetime.today() in that it can be localized to a + passed timezone. + + Parameters + ---------- + tz : str or timezone object, default None + Timezone to localize to. + + Examples + -------- + >>> pd.Timestamp.today() # doctest: +SKIP + Timestamp('2020-11-16 22:37:39.969883') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.today() + NaT + """ + return cls.now(tz) + + @classmethod + def utcnow(cls): + """ + Timestamp.utcnow() + + Return a new Timestamp representing UTC day and time. + + Examples + -------- + >>> pd.Timestamp.utcnow() # doctest: +SKIP + Timestamp('2020-11-16 22:50:18.092888+0000', tz='UTC') + """ + return cls.now(UTC) + + @classmethod + def utcfromtimestamp(cls, ts): + """ + Timestamp.utcfromtimestamp(ts) + + Construct a naive UTC datetime from a POSIX timestamp. + + Examples + -------- + >>> pd.Timestamp.utcfromtimestamp(1584199972) + Timestamp('2020-03-14 15:32:52') + """ + # GH#22451 + warnings.warn( + "The behavior of Timestamp.utcfromtimestamp is deprecated, in a " + "future version will return a timezone-aware Timestamp with UTC " + "timezone. To keep the old behavior, use " + "Timestamp.utcfromtimestamp(ts).tz_localize(None). " + "To get the future behavior, use Timestamp.fromtimestamp(ts, 'UTC')", + FutureWarning, + stacklevel=1, + ) + return cls(datetime.utcfromtimestamp(ts)) + + @classmethod + def fromtimestamp(cls, ts, tz=None): + """ + Timestamp.fromtimestamp(ts) + + Transform timestamp[, tz] to tz's local time from POSIX timestamp. + + Examples + -------- + >>> pd.Timestamp.fromtimestamp(1584199972) + Timestamp('2020-03-14 15:32:52') + + Note that the output may change depending on your local time. + """ + tz = maybe_get_tz(tz) + return cls(datetime.fromtimestamp(ts, tz)) + + def strftime(self, format): + """ + Timestamp.strftime(format) + + Return a string representing the given POSIX timestamp + controlled by an explicit format string. + + Parameters + ---------- + format : str + Format string to convert Timestamp to string. + See strftime documentation for more information on the format string: + https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.strftime('%Y-%m-%d %X') + '2020-03-14 15:32:52' + """ + return datetime.strftime(self, format) + + # Issue 25016. + @classmethod + def strptime(cls, date_string, format): + """ + Timestamp.strptime(string, format) + + Function is not implemented. Use pd.to_datetime(). + """ + raise NotImplementedError( + "Timestamp.strptime() is not implemented. " + "Use to_datetime() to parse date strings." + ) + + @classmethod + def combine(cls, date, time): + """ + Timestamp.combine(date, time) + + Combine date, time into datetime with same date and time fields. + + Examples + -------- + >>> from datetime import date, time + >>> pd.Timestamp.combine(date(2020, 3, 14), time(15, 30, 15)) + Timestamp('2020-03-14 15:30:15') + """ + return cls(datetime.combine(date, time)) + + def __new__( + cls, + object ts_input=_no_input, + object freq=None, + tz=None, + unit=None, + year=None, + month=None, + day=None, + hour=None, + minute=None, + second=None, + microsecond=None, + nanosecond=None, + tzinfo_type tzinfo=None, + *, + fold=None, + ): + # The parameter list folds together legacy parameter names (the first + # four) and positional and keyword parameter names from pydatetime. + # + # There are three calling forms: + # + # - In the legacy form, the first parameter, ts_input, is required + # and may be datetime-like, str, int, or float. The second + # parameter, offset, is optional and may be str or DateOffset. + # + # - ints in the first, second, and third arguments indicate + # pydatetime positional arguments. Only the first 8 arguments + # (standing in for year, month, day, hour, minute, second, + # microsecond, tzinfo) may be non-None. As a shortcut, we just + # check that the second argument is an int. + # + # - Nones for the first four (legacy) arguments indicate pydatetime + # keyword arguments. year, month, and day are required. As a + # shortcut, we just check that the first argument was not passed. + # + # Mixing pydatetime positional and keyword arguments is forbidden! + + cdef: + _TSObject ts + tzinfo_type tzobj + + _date_attributes = [year, month, day, hour, minute, second, + microsecond, nanosecond] + + if tzinfo is not None: + # GH#17690 tzinfo must be a datetime.tzinfo object, ensured + # by the cython annotation. + if tz is not None: + raise ValueError('Can provide at most one of tz, tzinfo') + + # User passed tzinfo instead of tz; avoid silently ignoring + tz, tzinfo = tzinfo, None + + # Allow fold only for unambiguous input + if fold is not None: + if fold not in [0, 1]: + raise ValueError( + "Valid values for the fold argument are None, 0, or 1." + ) + + if (ts_input is not _no_input and not ( + PyDateTime_Check(ts_input) and + getattr(ts_input, 'tzinfo', None) is None)): + raise ValueError( + "Cannot pass fold with possibly unambiguous input: int, " + "float, numpy.datetime64, str, or timezone-aware " + "datetime-like. Pass naive datetime-like or build " + "Timestamp from components." + ) + + if tz is not None and PyTZInfo_Check(tz) and treat_tz_as_pytz(tz): + raise ValueError( + "pytz timezones do not support fold. Please use dateutil " + "timezones." + ) + + if hasattr(ts_input, 'fold'): + ts_input = ts_input.replace(fold=fold) + + # GH 30543 if pd.Timestamp already passed, return it + # check that only ts_input is passed + # checking verbosely, because cython doesn't optimize + # list comprehensions (as of cython 0.29.x) + if (isinstance(ts_input, _Timestamp) and freq is None and + tz is None and unit is None and year is None and + month is None and day is None and hour is None and + minute is None and second is None and + microsecond is None and nanosecond is None and + tzinfo is None): + return ts_input + elif isinstance(ts_input, str): + # User passed a date string to parse. + # Check that the user didn't also pass a date attribute kwarg. + if any(arg is not None for arg in _date_attributes): + raise ValueError( + "Cannot pass a date attribute keyword " + "argument when passing a date string" + ) + + elif ts_input is _no_input: + # GH 31200 + # When year, month or day is not given, we call the datetime + # constructor to make sure we get the same error message + # since Timestamp inherits datetime + datetime_kwargs = { + "hour": hour or 0, + "minute": minute or 0, + "second": second or 0, + "microsecond": microsecond or 0, + "fold": fold or 0 + } + if year is not None: + datetime_kwargs["year"] = year + if month is not None: + datetime_kwargs["month"] = month + if day is not None: + datetime_kwargs["day"] = day + + ts_input = datetime(**datetime_kwargs) + + elif is_integer_object(freq): + # User passed positional arguments: + # Timestamp(year, month, day[, hour[, minute[, second[, + # microsecond[, nanosecond[, tzinfo]]]]]]) + ts_input = datetime(ts_input, freq, tz, unit or 0, + year or 0, month or 0, day or 0, fold=fold or 0) + nanosecond = hour + tz = minute + freq = None + unit = None + + if getattr(ts_input, 'tzinfo', None) is not None and tz is not None: + raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " + "the tz parameter. Use tz_convert instead.") + + tzobj = maybe_get_tz(tz) + if tzobj is not None and is_datetime64_object(ts_input): + # GH#24559, GH#42288 In the future we will treat datetime64 as + # wall-time (consistent with DatetimeIndex) + warnings.warn( + "In a future version, when passing a np.datetime64 object and " + "a timezone to Timestamp, the datetime64 will be interpreted " + "as a wall time, not a UTC time. To interpret as a UTC time, " + "use `Timestamp(dt64).tz_localize('UTC').tz_convert(tz)`", + FutureWarning, + stacklevel=1, + ) + # Once this deprecation is enforced, we can do + # return Timestamp(ts_input).tz_localize(tzobj) + ts = convert_to_tsobject(ts_input, tzobj, unit, 0, 0, nanosecond or 0) + + if ts.value == NPY_NAT: + return NaT + + if freq is None: + # GH 22311: Try to extract the frequency of a given Timestamp input + freq = getattr(ts_input, '_freq', None) + else: + warnings.warn( + "The 'freq' argument in Timestamp is deprecated and will be " + "removed in a future version.", + FutureWarning, + stacklevel=1, + ) + if not is_offset_object(freq): + freq = to_offset(freq) + + return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold) + + def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): + cdef: + int64_t nanos = to_offset(freq).nanos + + if self.tz is not None: + value = self.tz_localize(None).value + else: + value = self.value + + value = np.array([value], dtype=np.int64) + + # Will only ever contain 1 element for timestamp + r = round_nsint64(value, mode, nanos)[0] + result = Timestamp(r, unit='ns') + if self.tz is not None: + result = result.tz_localize( + self.tz, ambiguous=ambiguous, nonexistent=nonexistent + ) + return result + + def round(self, freq, ambiguous='raise', nonexistent='raise'): + """ + Round the Timestamp to the specified resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the rounding resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Returns + ------- + a new Timestamp rounded to the given resolution of `freq` + + Raises + ------ + ValueError if the freq cannot be converted + + Notes + ----- + If the Timestamp has a timezone, rounding will take place relative to the + local ("wall") time and re-localized to the same timezone. When rounding + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + + A timestamp can be rounded using multiple frequency units: + + >>> ts.round(freq='H') # hour + Timestamp('2020-03-14 16:00:00') + + >>> ts.round(freq='T') # minute + Timestamp('2020-03-14 15:33:00') + + >>> ts.round(freq='S') # seconds + Timestamp('2020-03-14 15:32:52') + + >>> ts.round(freq='L') # milliseconds + Timestamp('2020-03-14 15:32:52.193000') + + ``freq`` can also be a multiple of a single unit, like '5T' (i.e. 5 minutes): + + >>> ts.round(freq='5T') + Timestamp('2020-03-14 15:35:00') + + or a combination of multiple units, like '1H30T' (i.e. 1 hour and 30 minutes): + + >>> ts.round(freq='1H30T') + Timestamp('2020-03-14 15:00:00') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.round() + NaT + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> ts_tz = pd.Timestamp("2021-10-31 01:30:00").tz_localize("Europe/Amsterdam") + + >>> ts_tz.round("H", ambiguous=False) + Timestamp('2021-10-31 02:00:00+0100', tz='Europe/Amsterdam') + + >>> ts_tz.round("H", ambiguous=True) + Timestamp('2021-10-31 02:00:00+0200', tz='Europe/Amsterdam') + """ + return self._round( + freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent + ) + + def floor(self, freq, ambiguous='raise', nonexistent='raise'): + """ + Return a new Timestamp floored to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the flooring resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Raises + ------ + ValueError if the freq cannot be converted. + + Notes + ----- + If the Timestamp has a timezone, flooring will take place relative to the + local ("wall") time and re-localized to the same timezone. When flooring + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + + A timestamp can be floored using multiple frequency units: + + >>> ts.floor(freq='H') # hour + Timestamp('2020-03-14 15:00:00') + + >>> ts.floor(freq='T') # minute + Timestamp('2020-03-14 15:32:00') + + >>> ts.floor(freq='S') # seconds + Timestamp('2020-03-14 15:32:52') + + >>> ts.floor(freq='N') # nanoseconds + Timestamp('2020-03-14 15:32:52.192548651') + + ``freq`` can also be a multiple of a single unit, like '5T' (i.e. 5 minutes): + + >>> ts.floor(freq='5T') + Timestamp('2020-03-14 15:30:00') + + or a combination of multiple units, like '1H30T' (i.e. 1 hour and 30 minutes): + + >>> ts.floor(freq='1H30T') + Timestamp('2020-03-14 15:00:00') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.floor() + NaT + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> ts_tz = pd.Timestamp("2021-10-31 03:30:00").tz_localize("Europe/Amsterdam") + + >>> ts_tz.floor("2H", ambiguous=False) + Timestamp('2021-10-31 02:00:00+0100', tz='Europe/Amsterdam') + + >>> ts_tz.floor("2H", ambiguous=True) + Timestamp('2021-10-31 02:00:00+0200', tz='Europe/Amsterdam') + """ + return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) + + def ceil(self, freq, ambiguous='raise', nonexistent='raise'): + """ + Return a new Timestamp ceiled to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the ceiling resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Raises + ------ + ValueError if the freq cannot be converted. + + Notes + ----- + If the Timestamp has a timezone, ceiling will take place relative to the + local ("wall") time and re-localized to the same timezone. When ceiling + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + + A timestamp can be ceiled using multiple frequency units: + + >>> ts.ceil(freq='H') # hour + Timestamp('2020-03-14 16:00:00') + + >>> ts.ceil(freq='T') # minute + Timestamp('2020-03-14 15:33:00') + + >>> ts.ceil(freq='S') # seconds + Timestamp('2020-03-14 15:32:53') + + >>> ts.ceil(freq='U') # microseconds + Timestamp('2020-03-14 15:32:52.192549') + + ``freq`` can also be a multiple of a single unit, like '5T' (i.e. 5 minutes): + + >>> ts.ceil(freq='5T') + Timestamp('2020-03-14 15:35:00') + + or a combination of multiple units, like '1H30T' (i.e. 1 hour and 30 minutes): + + >>> ts.ceil(freq='1H30T') + Timestamp('2020-03-14 16:30:00') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.ceil() + NaT + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> ts_tz = pd.Timestamp("2021-10-31 01:30:00").tz_localize("Europe/Amsterdam") + + >>> ts_tz.ceil("H", ambiguous=False) + Timestamp('2021-10-31 02:00:00+0100', tz='Europe/Amsterdam') + + >>> ts_tz.ceil("H", ambiguous=True) + Timestamp('2021-10-31 02:00:00+0200', tz='Europe/Amsterdam') + """ + return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) + + @property + def tz(self): + """ + Alias for tzinfo. + + Examples + -------- + >>> ts = pd.Timestamp(1584226800, unit='s', tz='Europe/Stockholm') + >>> ts.tz + + """ + return self.tzinfo + + @tz.setter + def tz(self, value): + # GH 3746: Prevent localizing or converting the index by setting tz + raise AttributeError( + "Cannot directly set timezone. " + "Use tz_localize() or tz_convert() as appropriate" + ) + + @property + def _freqstr(self): + return getattr(self._freq, "freqstr", self._freq) + + @property + def freqstr(self): + """ + Return the total number of days in the month. + """ + warnings.warn( + "Timestamp.freqstr is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=1, + ) + return self._freqstr + + def tz_localize(self, tz, ambiguous='raise', nonexistent='raise'): + """ + Convert naive Timestamp to local time zone, or remove + timezone from timezone-aware Timestamp. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding local time. + + ambiguous : bool, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ +default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + The behavior is as follows: + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Returns + ------- + localized : Timestamp + + Raises + ------ + TypeError + If the Timestamp is tz-aware and tz is not None. + + Examples + -------- + Create a naive timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts + Timestamp('2020-03-14 15:32:52.192548651') + + Add 'Europe/Stockholm' as timezone: + + >>> ts.tz_localize(tz='Europe/Stockholm') + Timestamp('2020-03-14 15:32:52.192548651+0100', tz='Europe/Stockholm') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.tz_localize() + NaT + """ + if ambiguous == 'infer': + raise ValueError('Cannot infer offset with only one time.') + + nonexistent_options = ('raise', 'NaT', 'shift_forward', 'shift_backward') + if nonexistent not in nonexistent_options and not PyDelta_Check(nonexistent): + raise ValueError( + "The nonexistent argument must be one of 'raise', " + "'NaT', 'shift_forward', 'shift_backward' or a timedelta object" + ) + + if self.tzinfo is None: + # tz naive, localize + tz = maybe_get_tz(tz) + if not isinstance(ambiguous, str): + ambiguous = [ambiguous] + value = tz_localize_to_utc_single(self.value, tz, + ambiguous=ambiguous, + nonexistent=nonexistent) + out = Timestamp(value, tz=tz) + if out is not NaT: + out._set_freq(self._freq) # avoid warning in constructor + return out + else: + if tz is None: + # reset tz + value = tz_convert_from_utc_single(self.value, self.tz) + out = Timestamp(value, tz=tz) + if out is not NaT: + out._set_freq(self._freq) # avoid warning in constructor + return out + else: + raise TypeError( + "Cannot localize tz-aware Timestamp, use tz_convert for conversions" + ) + + def tz_convert(self, tz): + """ + Convert timezone-aware Timestamp to another time zone. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding UTC time. + + Returns + ------- + converted : Timestamp + + Raises + ------ + TypeError + If Timestamp is tz-naive. + + Examples + -------- + Create a timestamp object with UTC timezone: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651', tz='UTC') + >>> ts + Timestamp('2020-03-14 15:32:52.192548651+0000', tz='UTC') + + Change to Tokyo timezone: + + >>> ts.tz_convert(tz='Asia/Tokyo') + Timestamp('2020-03-15 00:32:52.192548651+0900', tz='Asia/Tokyo') + + Can also use ``astimezone``: + + >>> ts.astimezone(tz='Asia/Tokyo') + Timestamp('2020-03-15 00:32:52.192548651+0900', tz='Asia/Tokyo') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.tz_convert(tz='Asia/Tokyo') + NaT + """ + if self.tzinfo is None: + # tz naive, use tz_localize + raise TypeError( + "Cannot convert tz-naive Timestamp, use tz_localize to localize" + ) + else: + # Same UTC timestamp, different time zone + out = Timestamp(self.value, tz=tz) + if out is not NaT: + out._set_freq(self._freq) # avoid warning in constructor + return out + + astimezone = tz_convert + + def replace( + self, + year=None, + month=None, + day=None, + hour=None, + minute=None, + second=None, + microsecond=None, + nanosecond=None, + tzinfo=object, + fold=None, + ): + """ + Implements datetime.replace, handles nanoseconds. + + Parameters + ---------- + year : int, optional + month : int, optional + day : int, optional + hour : int, optional + minute : int, optional + second : int, optional + microsecond : int, optional + nanosecond : int, optional + tzinfo : tz-convertible, optional + fold : int, optional + + Returns + ------- + Timestamp with fields replaced + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651', tz='UTC') + >>> ts + Timestamp('2020-03-14 15:32:52.192548651+0000', tz='UTC') + + Replace year and the hour: + + >>> ts.replace(year=1999, hour=10) + Timestamp('1999-03-14 10:32:52.192548651+0000', tz='UTC') + + Replace timezone (not a conversion): + + >>> import pytz + >>> ts.replace(tzinfo=pytz.timezone('US/Pacific')) + Timestamp('2020-03-14 15:32:52.192548651-0700', tz='US/Pacific') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.replace(tzinfo=pytz.timezone('US/Pacific')) + NaT + """ + + cdef: + npy_datetimestruct dts + int64_t value + object k, v + datetime ts_input + tzinfo_type tzobj + + # set to naive if needed + tzobj = self.tzinfo + value = self.value + + # GH 37610. Preserve fold when replacing. + if fold is None: + fold = self.fold + + if tzobj is not None: + value = tz_convert_from_utc_single(value, tzobj) + + # setup components + dt64_to_dtstruct(value, &dts) + dts.ps = self.nanosecond * 1000 + + # replace + def validate(k, v): + """ validate integers """ + if not is_integer_object(v): + raise ValueError( + f"value must be an integer, received {type(v)} for {k}" + ) + return v + + if year is not None: + dts.year = validate('year', year) + if month is not None: + dts.month = validate('month', month) + if day is not None: + dts.day = validate('day', day) + if hour is not None: + dts.hour = validate('hour', hour) + if minute is not None: + dts.min = validate('minute', minute) + if second is not None: + dts.sec = validate('second', second) + if microsecond is not None: + dts.us = validate('microsecond', microsecond) + if nanosecond is not None: + dts.ps = validate('nanosecond', nanosecond) * 1000 + if tzinfo is not object: + tzobj = tzinfo + + # reconstruct & check bounds + if tzobj is not None and treat_tz_as_pytz(tzobj): + # replacing across a DST boundary may induce a new tzinfo object + # see GH#18319 + ts_input = tzobj.localize(datetime(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, + dts.us), + is_dst=not bool(fold)) + tzobj = ts_input.tzinfo + else: + kwargs = {'year': dts.year, 'month': dts.month, 'day': dts.day, + 'hour': dts.hour, 'minute': dts.min, 'second': dts.sec, + 'microsecond': dts.us, 'tzinfo': tzobj, + 'fold': fold} + ts_input = datetime(**kwargs) + + ts = convert_datetime_to_tsobject(ts_input, tzobj) + value = ts.value + (dts.ps // 1000) + if value != NPY_NAT: + check_dts_bounds(&dts) + + return create_timestamp_from_ts(value, dts, tzobj, self._freq, fold) + + def to_julian_date(self) -> np.float64: + """ + Convert TimeStamp to a Julian Date. + 0 Julian date is noon January 1, 4713 BC. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52') + >>> ts.to_julian_date() + 2458923.147824074 + """ + year = self.year + month = self.month + day = self.day + if month <= 2: + year -= 1 + month += 12 + return (day + + np.fix((153 * month - 457) / 5) + + 365 * year + + np.floor(year / 4) - + np.floor(year / 100) + + np.floor(year / 400) + + 1721118.5 + + (self.hour + + self.minute / 60.0 + + self.second / 3600.0 + + self.microsecond / 3600.0 / 1e+6 + + self.nanosecond / 3600.0 / 1e+9 + ) / 24.0) + + def isoweekday(self): + """ + Return the day of the week represented by the date. + Monday == 1 ... Sunday == 7. + """ + return super().isoweekday() + + def weekday(self): + """ + Return the day of the week represented by the date. + Monday == 0 ... Sunday == 6. + """ + return super().weekday() + + +# Aliases +Timestamp.weekofyear = Timestamp.week +Timestamp.daysinmonth = Timestamp.days_in_month + +# Add the min and max fields at the class level +cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max +cdef int64_t _NS_LOWER_BOUND = NPY_NAT + 1 + +# Resolution is in nanoseconds +Timestamp.min = Timestamp(_NS_LOWER_BOUND) +Timestamp.max = Timestamp(_NS_UPPER_BOUND) +Timestamp.resolution = Timedelta(nanoseconds=1) # GH#21336, GH#21365 diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timezones.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timezones.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..3efdd26 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timezones.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timezones.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timezones.pxd new file mode 100644 index 0000000..13f196a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timezones.pxd @@ -0,0 +1,22 @@ +from cpython.datetime cimport ( + datetime, + timedelta, + tzinfo, +) + + +cdef tzinfo utc_pytz + +cpdef bint is_utc(tzinfo tz) +cdef bint is_tzlocal(tzinfo tz) + +cdef bint treat_tz_as_pytz(tzinfo tz) + +cpdef bint tz_compare(tzinfo start, tzinfo end) +cpdef object get_timezone(tzinfo tz) +cpdef tzinfo maybe_get_tz(object tz) + +cdef timedelta get_utcoffset(tzinfo tz, datetime obj) +cdef bint is_fixed_offset(tzinfo tz) + +cdef object get_dst_info(tzinfo tz) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timezones.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timezones.pyi new file mode 100644 index 0000000..20c403e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timezones.pyi @@ -0,0 +1,25 @@ +from datetime import ( + datetime, + tzinfo, +) +from typing import Callable + +import numpy as np + +from pandas._typing import npt + +# imported from dateutil.tz +dateutil_gettz: Callable[[str], tzinfo] + +def tz_standardize(tz: tzinfo) -> tzinfo: ... +def tz_compare(start: tzinfo | None, end: tzinfo | None) -> bool: ... +def infer_tzinfo( + start: datetime | None, + end: datetime | None, +) -> tzinfo | None: ... +def get_dst_info( + tz: tzinfo, +) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64], str]: ... +def maybe_get_tz(tz: str | int | np.int64 | tzinfo | None) -> tzinfo | None: ... +def get_timezone(tz: tzinfo) -> tzinfo | str: ... +def is_utc(tz: tzinfo | None) -> bool: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timezones.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timezones.pyx new file mode 100644 index 0000000..224c5be --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/timezones.pyx @@ -0,0 +1,408 @@ +from datetime import ( + timedelta, + timezone, +) + +from cpython.datetime cimport ( + datetime, + timedelta, + tzinfo, +) + +# dateutil compat + +from dateutil.tz import ( + gettz as dateutil_gettz, + tzfile as _dateutil_tzfile, + tzlocal as _dateutil_tzlocal, + tzutc as _dateutil_tzutc, +) +import pytz +from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo + +UTC = pytz.utc + + +import numpy as np + +cimport numpy as cnp +from numpy cimport int64_t + +cnp.import_array() + +# ---------------------------------------------------------------------- +from pandas._libs.tslibs.util cimport ( + get_nat, + is_integer_object, +) + + +cdef int64_t NPY_NAT = get_nat() +cdef tzinfo utc_stdlib = timezone.utc +cdef tzinfo utc_pytz = UTC +cdef tzinfo utc_dateutil_str = dateutil_gettz("UTC") # NB: *not* the same as tzutc() + + +# ---------------------------------------------------------------------- + +cpdef inline bint is_utc(tzinfo tz): + return ( + tz is utc_pytz + or tz is utc_stdlib + or isinstance(tz, _dateutil_tzutc) + or tz is utc_dateutil_str + ) + + +cdef inline bint is_tzlocal(tzinfo tz): + return isinstance(tz, _dateutil_tzlocal) + + +cdef inline bint treat_tz_as_pytz(tzinfo tz): + return (hasattr(tz, '_utc_transition_times') and + hasattr(tz, '_transition_info')) + + +cdef inline bint treat_tz_as_dateutil(tzinfo tz): + return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx') + + +# Returns str or tzinfo object +cpdef inline object get_timezone(tzinfo tz): + """ + We need to do several things here: + 1) Distinguish between pytz and dateutil timezones + 2) Not be over-specific (e.g. US/Eastern with/without DST is same *zone* + but a different tz object) + 3) Provide something to serialize when we're storing a datetime object + in pytables. + + We return a string prefaced with dateutil if it's a dateutil tz, else just + the tz name. It needs to be a string so that we can serialize it with + UJSON/pytables. maybe_get_tz (below) is the inverse of this process. + """ + if tz is None: + raise TypeError("tz argument cannot be None") + if is_utc(tz): + return tz + else: + if treat_tz_as_dateutil(tz): + if '.tar.gz' in tz._filename: + raise ValueError( + 'Bad tz filename. Dateutil on python 3 on windows has a ' + 'bug which causes tzfile._filename to be the same for all ' + 'timezone files. Please construct dateutil timezones ' + 'implicitly by passing a string like "dateutil/Europe' + '/London" when you construct your pandas objects instead ' + 'of passing a timezone object. See ' + 'https://github.com/pandas-dev/pandas/pull/7362') + return 'dateutil/' + tz._filename + else: + # tz is a pytz timezone or unknown. + try: + zone = tz.zone + if zone is None: + return tz + return zone + except AttributeError: + return tz + + +cpdef inline tzinfo maybe_get_tz(object tz): + """ + (Maybe) Construct a timezone object from a string. If tz is a string, use + it to construct a timezone object. Otherwise, just return tz. + """ + if isinstance(tz, str): + if tz == 'tzlocal()': + tz = _dateutil_tzlocal() + elif tz.startswith('dateutil/'): + zone = tz[9:] + tz = dateutil_gettz(zone) + # On Python 3 on Windows, the filename is not always set correctly. + if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename: + tz._filename = zone + elif tz[0] in {'-', '+'}: + hours = int(tz[0:3]) + minutes = int(tz[0] + tz[4:6]) + tz = timezone(timedelta(hours=hours, minutes=minutes)) + elif tz[0:4] in {'UTC-', 'UTC+'}: + hours = int(tz[3:6]) + minutes = int(tz[3] + tz[7:9]) + tz = timezone(timedelta(hours=hours, minutes=minutes)) + else: + tz = pytz.timezone(tz) + elif is_integer_object(tz): + tz = pytz.FixedOffset(tz / 60) + elif isinstance(tz, tzinfo): + pass + elif tz is None: + pass + else: + raise TypeError(type(tz)) + return tz + + +def _p_tz_cache_key(tz: tzinfo): + """ + Python interface for cache function to facilitate testing. + """ + return tz_cache_key(tz) + + +# Timezone data caches, key is the pytz string or dateutil file name. +dst_cache = {} + + +cdef inline object tz_cache_key(tzinfo tz): + """ + Return the key in the cache for the timezone info object or None + if unknown. + + The key is currently the tz string for pytz timezones, the filename for + dateutil timezones. + + Notes + ----- + This cannot just be the hash of a timezone object. Unfortunately, the + hashes of two dateutil tz objects which represent the same timezone are + not equal (even though the tz objects will compare equal and represent + the same tz file). Also, pytz objects are not always hashable so we use + str(tz) instead. + """ + if isinstance(tz, _pytz_BaseTzInfo): + return tz.zone + elif isinstance(tz, _dateutil_tzfile): + if '.tar.gz' in tz._filename: + raise ValueError('Bad tz filename. Dateutil on python 3 on ' + 'windows has a bug which causes tzfile._filename ' + 'to be the same for all timezone files. Please ' + 'construct dateutil timezones implicitly by ' + 'passing a string like "dateutil/Europe/London" ' + 'when you construct your pandas objects instead ' + 'of passing a timezone object. See ' + 'https://github.com/pandas-dev/pandas/pull/7362') + return 'dateutil' + tz._filename + else: + return None + + +# ---------------------------------------------------------------------- +# UTC Offsets + + +cdef timedelta get_utcoffset(tzinfo tz, datetime obj): + try: + return tz._utcoffset + except AttributeError: + return tz.utcoffset(obj) + + +cdef inline bint is_fixed_offset(tzinfo tz): + if treat_tz_as_dateutil(tz): + if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0: + return 1 + else: + return 0 + elif treat_tz_as_pytz(tz): + if (len(tz._transition_info) == 0 + and len(tz._utc_transition_times) == 0): + return 1 + else: + return 0 + # This also implicitly accepts datetime.timezone objects which are + # considered fixed + return 1 + + +cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz): + """ + Transition times in dateutil timezones are stored in local non-dst + time. This code converts them to UTC. It's the reverse of the code + in dateutil.tz.tzfile.__init__. + """ + new_trans = list(tz._trans_list) + last_std_offset = 0 + for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)): + if not tti.isdst: + last_std_offset = tti.offset + new_trans[i] = trans - last_std_offset + return new_trans + + +cdef int64_t[:] unbox_utcoffsets(object transinfo): + cdef: + Py_ssize_t i, sz + int64_t[:] arr + + sz = len(transinfo) + arr = np.empty(sz, dtype='i8') + + for i in range(sz): + arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000 + + return arr + + +# ---------------------------------------------------------------------- +# Daylight Savings + + +cdef object get_dst_info(tzinfo tz): + """ + Returns + ------- + ndarray[int64_t] + Nanosecond UTC times of DST transitions. + ndarray[int64_t] + Nanosecond UTC offsets corresponding to DST transitions. + str + Describing the type of tzinfo object. + """ + cache_key = tz_cache_key(tz) + if cache_key is None: + # e.g. pytz.FixedOffset, matplotlib.dates._UTC, + # psycopg2.tz.FixedOffsetTimezone + num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000 + return (np.array([NPY_NAT + 1], dtype=np.int64), + np.array([num], dtype=np.int64), + "unknown") + + if cache_key not in dst_cache: + if treat_tz_as_pytz(tz): + trans = np.array(tz._utc_transition_times, dtype='M8[ns]') + trans = trans.view('i8') + if tz._utc_transition_times[0].year == 1: + trans[0] = NPY_NAT + 1 + deltas = unbox_utcoffsets(tz._transition_info) + typ = 'pytz' + + elif treat_tz_as_dateutil(tz): + if len(tz._trans_list): + # get utc trans times + trans_list = _get_utc_trans_times_from_dateutil_tz(tz) + trans = np.hstack([ + np.array([0], dtype='M8[s]'), # place holder for 1st item + np.array(trans_list, dtype='M8[s]')]).astype( + 'M8[ns]') # all trans listed + trans = trans.view('i8') + trans[0] = NPY_NAT + 1 + + # deltas + deltas = np.array([v.offset for v in ( + tz._ttinfo_before,) + tz._trans_idx], dtype='i8') + deltas *= 1000000000 + typ = 'dateutil' + + elif is_fixed_offset(tz): + trans = np.array([NPY_NAT + 1], dtype=np.int64) + deltas = np.array([tz._ttinfo_std.offset], + dtype='i8') * 1000000000 + typ = 'fixed' + else: + # 2018-07-12 this is not reached in the tests, and this case + # is not handled in any of the functions that call + # get_dst_info. If this case _were_ hit the calling + # functions would then hit an IndexError because they assume + # `deltas` is non-empty. + # (under the just-deleted code that returned empty arrays) + raise AssertionError("dateutil tzinfo is not a FixedOffset " + "and has an empty `_trans_list`.", tz) + else: + # static tzinfo, we can get here with pytz.StaticTZInfo + # which are not caught by treat_tz_as_pytz + trans = np.array([NPY_NAT + 1], dtype=np.int64) + num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000 + deltas = np.array([num], dtype=np.int64) + typ = "static" + + dst_cache[cache_key] = (trans, deltas, typ) + + return dst_cache[cache_key] + + +def infer_tzinfo(datetime start, datetime end): + if start is not None and end is not None: + tz = start.tzinfo + if not tz_compare(tz, end.tzinfo): + raise AssertionError(f'Inputs must both have the same timezone, ' + f'{tz} != {end.tzinfo}') + elif start is not None: + tz = start.tzinfo + elif end is not None: + tz = end.tzinfo + else: + tz = None + return tz + + +cpdef bint tz_compare(tzinfo start, tzinfo end): + """ + Compare string representations of timezones + + The same timezone can be represented as different instances of + timezones. For example + `` and + `` are essentially same + timezones but aren't evaluated such, but the string representation + for both of these is `'Europe/Paris'`. + + This exists only to add a notion of equality to pytz-style zones + that is compatible with the notion of equality expected of tzinfo + subclasses. + + Parameters + ---------- + start : tzinfo + end : tzinfo + + Returns: + ------- + bool + """ + # GH 18523 + if is_utc(start): + # GH#38851 consider pytz/dateutil/stdlib UTCs as equivalent + return is_utc(end) + elif is_utc(end): + # Ensure we don't treat tzlocal as equal to UTC when running in UTC + return False + elif start is None or end is None: + return start is None and end is None + return get_timezone(start) == get_timezone(end) + + +def tz_standardize(tz: tzinfo) -> tzinfo: + """ + If the passed tz is a pytz timezone object, "normalize" it to the a + consistent version + + Parameters + ---------- + tz : tzinfo + + Returns + ------- + tzinfo + + Examples + -------- + >>> from datetime import datetime + >>> from pytz import timezone + >>> tz = timezone('US/Pacific').normalize( + ... datetime(2014, 1, 1, tzinfo=pytz.utc) + ... ).tzinfo + >>> tz + + >>> tz_standardize(tz) + + + >>> tz = timezone('US/Pacific') + >>> tz + + >>> tz_standardize(tz) + + """ + if treat_tz_as_pytz(tz): + return pytz.timezone(str(tz)) + return tz diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/tzconversion.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/tzconversion.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..ae98e07 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/tzconversion.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/tzconversion.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/tzconversion.pxd new file mode 100644 index 0000000..3666d00 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/tzconversion.pxd @@ -0,0 +1,11 @@ +from cpython.datetime cimport tzinfo +from numpy cimport int64_t + + +cdef int64_t tz_convert_utc_to_tzlocal( + int64_t utc_val, tzinfo tz, bint* fold=* +) except? -1 +cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz) +cdef int64_t tz_localize_to_utc_single( + int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=* +) except? -1 diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/tzconversion.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/tzconversion.pyi new file mode 100644 index 0000000..e1a0263 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/tzconversion.pyi @@ -0,0 +1,21 @@ +from datetime import ( + timedelta, + tzinfo, +) +from typing import Iterable + +import numpy as np + +from pandas._typing import npt + +def tz_convert_from_utc( + vals: npt.NDArray[np.int64], # const int64_t[:] + tz: tzinfo, +) -> npt.NDArray[np.int64]: ... +def tz_convert_from_utc_single(val: np.int64, tz: tzinfo) -> np.int64: ... +def tz_localize_to_utc( + vals: npt.NDArray[np.int64], + tz: tzinfo | None, + ambiguous: str | bool | Iterable[bool] | None = ..., + nonexistent: str | timedelta | np.timedelta64 | None = ..., +) -> npt.NDArray[np.int64]: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/tzconversion.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/tzconversion.pyx new file mode 100644 index 0000000..d28b851 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/tzconversion.pyx @@ -0,0 +1,599 @@ +""" +timezone conversion +""" +import cython +from cython import Py_ssize_t + +from cpython.datetime cimport ( + PyDateTime_IMPORT, + PyDelta_Check, + datetime, + timedelta, + tzinfo, +) + +PyDateTime_IMPORT + +from dateutil.tz import tzutc +import numpy as np +import pytz + +cimport numpy as cnp +from numpy cimport ( + int64_t, + intp_t, + ndarray, + uint8_t, +) + +cnp.import_array() + +from pandas._libs.tslibs.ccalendar cimport ( + DAY_NANOS, + HOUR_NANOS, +) +from pandas._libs.tslibs.nattype cimport NPY_NAT +from pandas._libs.tslibs.np_datetime cimport ( + dt64_to_dtstruct, + npy_datetimestruct, +) +from pandas._libs.tslibs.timezones cimport ( + get_dst_info, + get_utcoffset, + is_fixed_offset, + is_tzlocal, + is_utc, +) + + +cdef int64_t tz_localize_to_utc_single( + int64_t val, tzinfo tz, object ambiguous=None, object nonexistent=None, +) except? -1: + """See tz_localize_to_utc.__doc__""" + cdef: + int64_t delta + int64_t[:] deltas + + if val == NPY_NAT: + return val + + elif is_utc(tz) or tz is None: + return val + + elif is_tzlocal(tz): + return _tz_convert_tzlocal_utc(val, tz, to_utc=True) + + elif is_fixed_offset(tz): + # TODO: in this case we should be able to use get_utcoffset, + # that returns None for e.g. 'dateutil//usr/share/zoneinfo/Etc/GMT-9' + _, deltas, _ = get_dst_info(tz) + delta = deltas[0] + return val - delta + + else: + return tz_localize_to_utc( + np.array([val], dtype="i8"), + tz, + ambiguous=ambiguous, + nonexistent=nonexistent, + )[0] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def tz_localize_to_utc(ndarray[int64_t] vals, tzinfo tz, object ambiguous=None, + object nonexistent=None): + """ + Localize tzinfo-naive i8 to given time zone (using pytz). If + there are ambiguities in the values, raise AmbiguousTimeError. + + Parameters + ---------- + vals : ndarray[int64_t] + tz : tzinfo or None + ambiguous : str, bool, or arraylike + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from 03:00 + DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC + and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter + dictates how ambiguous times should be handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False signifies a + non-DST time (note that this flag is only applicable for ambiguous + times, but the array must have the same length as vals) + - bool if True, treat all vals as DST. If False, treat them as non-DST + - 'NaT' will return NaT where there are ambiguous times + + nonexistent : {None, "NaT", "shift_forward", "shift_backward", "raise", \ +timedelta-like} + How to handle non-existent times when converting wall times to UTC + + Returns + ------- + localized : ndarray[int64_t] + """ + cdef: + int64_t[:] deltas, idx_shifted, idx_shifted_left, idx_shifted_right + ndarray[uint8_t, cast=True] ambiguous_array, both_nat, both_eq + Py_ssize_t i, idx, pos, ntrans, n = len(vals) + Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right + int64_t *tdata + int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins + int64_t first_delta + int64_t shift_delta = 0 + ndarray[int64_t] trans, result, result_a, result_b, dst_hours, delta + ndarray trans_idx, grp, a_idx, b_idx, one_diff + npy_datetimestruct dts + bint infer_dst = False, is_dst = False, fill = False + bint shift_forward = False, shift_backward = False + bint fill_nonexist = False + list trans_grp + str stamp + + # Vectorized version of DstTzInfo.localize + if is_utc(tz) or tz is None: + return vals + + result = np.empty(n, dtype=np.int64) + + if is_tzlocal(tz): + for i in range(n): + v = vals[i] + if v == NPY_NAT: + result[i] = NPY_NAT + else: + result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=True) + return result + + # silence false-positive compiler warning + ambiguous_array = np.empty(0, dtype=bool) + if isinstance(ambiguous, str): + if ambiguous == 'infer': + infer_dst = True + elif ambiguous == 'NaT': + fill = True + elif isinstance(ambiguous, bool): + is_dst = True + if ambiguous: + ambiguous_array = np.ones(len(vals), dtype=bool) + else: + ambiguous_array = np.zeros(len(vals), dtype=bool) + elif hasattr(ambiguous, '__iter__'): + is_dst = True + if len(ambiguous) != len(vals): + raise ValueError("Length of ambiguous bool-array must be " + "the same size as vals") + ambiguous_array = np.asarray(ambiguous, dtype=bool) + + if nonexistent == 'NaT': + fill_nonexist = True + elif nonexistent == 'shift_forward': + shift_forward = True + elif nonexistent == 'shift_backward': + shift_backward = True + elif PyDelta_Check(nonexistent): + from .timedeltas import delta_to_nanoseconds + shift_delta = delta_to_nanoseconds(nonexistent) + elif nonexistent not in ('raise', None): + msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', " + "shift_backwards} or a timedelta object") + raise ValueError(msg) + + trans, deltas, _ = get_dst_info(tz) + + tdata = cnp.PyArray_DATA(trans) + ntrans = len(trans) + + # Determine whether each date lies left of the DST transition (store in + # result_a) or right of the DST transition (store in result_b) + result_a = np.empty(n, dtype=np.int64) + result_b = np.empty(n, dtype=np.int64) + result_a[:] = NPY_NAT + result_b[:] = NPY_NAT + + idx_shifted_left = (np.maximum(0, trans.searchsorted( + vals - DAY_NANOS, side='right') - 1)).astype(np.int64) + + idx_shifted_right = (np.maximum(0, trans.searchsorted( + vals + DAY_NANOS, side='right') - 1)).astype(np.int64) + + for i in range(n): + val = vals[i] + v_left = val - deltas[idx_shifted_left[i]] + pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1 + # timestamp falls to the left side of the DST transition + if v_left + deltas[pos_left] == val: + result_a[i] = v_left + + v_right = val - deltas[idx_shifted_right[i]] + pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1 + # timestamp falls to the right side of the DST transition + if v_right + deltas[pos_right] == val: + result_b[i] = v_right + + # silence false-positive compiler warning + dst_hours = np.empty(0, dtype=np.int64) + if infer_dst: + dst_hours = np.empty(n, dtype=np.int64) + dst_hours[:] = NPY_NAT + + # Get the ambiguous hours (given the above, these are the hours + # where result_a != result_b and neither of them are NAT) + both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT) + both_eq = result_a == result_b + trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq))) + if trans_idx.size == 1: + stamp = _render_tstamp(vals[trans_idx]) + raise pytz.AmbiguousTimeError( + f"Cannot infer dst time from {stamp} as there " + f"are no repeated times") + # Split the array into contiguous chunks (where the difference between + # indices is 1). These are effectively dst transitions in different + # years which is useful for checking that there is not an ambiguous + # transition in an individual year. + if trans_idx.size > 0: + one_diff = np.where(np.diff(trans_idx) != 1)[0] + 1 + trans_grp = np.array_split(trans_idx, one_diff) + + # Iterate through each day, if there are no hours where the + # delta is negative (indicates a repeat of hour) the switch + # cannot be inferred + for grp in trans_grp: + + delta = np.diff(result_a[grp]) + if grp.size == 1 or np.all(delta > 0): + stamp = _render_tstamp(vals[grp[0]]) + raise pytz.AmbiguousTimeError(stamp) + + # Find the index for the switch and pull from a for dst and b + # for standard + switch_idx = (delta <= 0).nonzero()[0] + if switch_idx.size > 1: + raise pytz.AmbiguousTimeError( + f"There are {switch_idx.size} dst switches when " + f"there should only be 1.") + switch_idx = switch_idx[0] + 1 + # Pull the only index and adjust + a_idx = grp[:switch_idx] + b_idx = grp[switch_idx:] + dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) + + for i in range(n): + val = vals[i] + left = result_a[i] + right = result_b[i] + if val == NPY_NAT: + result[i] = val + elif left != NPY_NAT and right != NPY_NAT: + if left == right: + result[i] = left + else: + if infer_dst and dst_hours[i] != NPY_NAT: + result[i] = dst_hours[i] + elif is_dst: + if ambiguous_array[i]: + result[i] = left + else: + result[i] = right + elif fill: + result[i] = NPY_NAT + else: + stamp = _render_tstamp(val) + raise pytz.AmbiguousTimeError( + f"Cannot infer dst time from {stamp}, try using the " + f"'ambiguous' argument") + elif left != NPY_NAT: + result[i] = left + elif right != NPY_NAT: + result[i] = right + else: + # Handle nonexistent times + if shift_forward or shift_backward or shift_delta != 0: + # Shift the nonexistent time to the closest existing time + remaining_mins = val % HOUR_NANOS + if shift_delta != 0: + # Validate that we don't relocalize on another nonexistent + # time + if -1 < shift_delta + remaining_mins < HOUR_NANOS: + raise ValueError( + f"The provided timedelta will relocalize on a " + f"nonexistent time: {nonexistent}" + ) + new_local = val + shift_delta + elif shift_forward: + new_local = val + (HOUR_NANOS - remaining_mins) + else: + # Subtract 1 since the beginning hour is _inclusive_ of + # nonexistent times + new_local = val - remaining_mins - 1 + delta_idx = trans.searchsorted(new_local, side='right') + # Shift the delta_idx by if the UTC offset of + # the target tz is greater than 0 and we're moving forward + # or vice versa + first_delta = deltas[0] + if (shift_forward or shift_delta > 0) and first_delta > 0: + delta_idx_offset = 1 + elif (shift_backward or shift_delta < 0) and first_delta < 0: + delta_idx_offset = 1 + else: + delta_idx_offset = 0 + delta_idx = delta_idx - delta_idx_offset + result[i] = new_local - deltas[delta_idx] + elif fill_nonexist: + result[i] = NPY_NAT + else: + stamp = _render_tstamp(val) + raise pytz.NonExistentTimeError(stamp) + + return result + + +cdef inline Py_ssize_t bisect_right_i8(int64_t *data, + int64_t val, Py_ssize_t n): + cdef: + Py_ssize_t pivot, left = 0, right = n + + assert n >= 1 + + # edge cases + if val > data[n - 1]: + return n + + if val < data[0]: + return 0 + + while left < right: + pivot = left + (right - left) // 2 + + if data[pivot] <= val: + left = pivot + 1 + else: + right = pivot + + return left + + +cdef inline str _render_tstamp(int64_t val): + """ Helper function to render exception messages""" + from pandas._libs.tslibs.timestamps import Timestamp + return str(Timestamp(val)) + + +# ---------------------------------------------------------------------- +# Timezone Conversion + +cdef int64_t tz_convert_utc_to_tzlocal( + int64_t utc_val, tzinfo tz, bint* fold=NULL +) except? -1: + """ + Parameters + ---------- + utc_val : int64_t + tz : tzinfo + fold : bint* + pointer to fold: whether datetime ends up in a fold or not + after adjustment + + Returns + ------- + local_val : int64_t + """ + return _tz_convert_tzlocal_utc(utc_val, tz, to_utc=False, fold=fold) + + +cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz): + """ + Convert the val (in i8) from UTC to tz + + This is a single value version of tz_convert_from_utc. + + Parameters + ---------- + val : int64 + tz : tzinfo + + Returns + ------- + converted: int64 + """ + cdef: + int64_t delta + int64_t[:] deltas + ndarray[int64_t, ndim=1] trans + intp_t pos + + if val == NPY_NAT: + return val + + if is_utc(tz): + return val + elif is_tzlocal(tz): + return _tz_convert_tzlocal_utc(val, tz, to_utc=False) + elif is_fixed_offset(tz): + _, deltas, _ = get_dst_info(tz) + delta = deltas[0] + return val + delta + else: + trans, deltas, _ = get_dst_info(tz) + pos = trans.searchsorted(val, side="right") - 1 + return val + deltas[pos] + + +def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): + """ + Convert the values (in i8) from UTC to tz + + Parameters + ---------- + vals : int64 ndarray + tz : tzinfo + + Returns + ------- + int64 ndarray of converted + """ + cdef: + const int64_t[:] converted + + if len(vals) == 0: + return np.array([], dtype=np.int64) + + converted = _tz_convert_from_utc(vals, tz) + return np.array(converted, dtype=np.int64) + + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): + """ + Convert the given values (in i8) either to UTC or from UTC. + + Parameters + ---------- + vals : int64 ndarray + tz : tzinfo + + Returns + ------- + converted : ndarray[int64_t] + """ + cdef: + int64_t[:] converted, deltas + Py_ssize_t i, n = len(vals) + int64_t val, delta + intp_t[:] pos + ndarray[int64_t] trans + str typ + + if is_utc(tz): + return vals + elif is_tzlocal(tz): + converted = np.empty(n, dtype=np.int64) + for i in range(n): + val = vals[i] + if val == NPY_NAT: + converted[i] = NPY_NAT + else: + converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc=False) + else: + converted = np.empty(n, dtype=np.int64) + + trans, deltas, typ = get_dst_info(tz) + + if typ not in ["pytz", "dateutil"]: + # FixedOffset, we know len(deltas) == 1 + delta = deltas[0] + + for i in range(n): + val = vals[i] + if val == NPY_NAT: + converted[i] = val + else: + converted[i] = val + delta + + else: + pos = trans.searchsorted(vals, side="right") - 1 + + for i in range(n): + val = vals[i] + if val == NPY_NAT: + converted[i] = val + else: + if pos[i] < 0: + # TODO: How is this reached? Should we be checking for + # it elsewhere? + raise ValueError("First time before start of DST info") + + converted[i] = val + deltas[pos[i]] + + return converted + + +# OSError may be thrown by tzlocal on windows at or close to 1970-01-01 +# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241 +cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz, + bint to_utc, + bint *fold=NULL) except? -1: + """ + Calculate offset in nanoseconds needed to convert the i8 representation of + a datetime from a tzlocal timezone to UTC, or vice-versa. + + Parameters + ---------- + val : int64_t + tz : tzinfo + to_utc : bint + True if converting tzlocal _to_ UTC, False if going the other direction + fold : bint*, default NULL + pointer to fold: whether datetime ends up in a fold or not + after adjustment + + Returns + ------- + delta : int64_t + + Notes + ----- + Sets fold by pointer + """ + cdef: + npy_datetimestruct dts + datetime dt + int64_t delta + timedelta td + + dt64_to_dtstruct(val, &dts) + dt = datetime(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us) + # tz.utcoffset only makes sense if datetime + # is _wall time_, so if val is a UTC timestamp convert to wall time + if not to_utc: + dt = dt.replace(tzinfo=tzutc()) + dt = dt.astimezone(tz) + + if fold is not NULL: + fold[0] = dt.fold + + td = tz.utcoffset(dt) + return int(td.total_seconds() * 1_000_000_000) + + +# OSError may be thrown by tzlocal on windows at or close to 1970-01-01 +# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241 +cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True, + bint* fold=NULL) except? -1: + """ + Convert the i8 representation of a datetime from a tzlocal timezone to + UTC, or vice-versa. + + Private, not intended for use outside of tslibs.conversion + + Parameters + ---------- + val : int64_t + tz : tzinfo + to_utc : bint + True if converting tzlocal _to_ UTC, False if going the other direction + fold : bint* + pointer to fold: whether datetime ends up in a fold or not + after adjustment + + Returns + ------- + result : int64_t + + Notes + ----- + Sets fold by pointer + """ + cdef: + int64_t delta + + delta = _tzlocal_get_offset_components(val, tz, to_utc, fold) + + if to_utc: + return val - delta + else: + return val + delta diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/util.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/util.pxd new file mode 100644 index 0000000..150516a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/util.pxd @@ -0,0 +1,222 @@ + +from cpython.object cimport PyTypeObject + + +cdef extern from *: + """ + PyObject* char_to_string(const char* data) { + return PyUnicode_FromString(data); + } + """ + object char_to_string(const char* data) + + +cdef extern from "Python.h": + # Note: importing extern-style allows us to declare these as nogil + # functions, whereas `from cpython cimport` does not. + bint PyUnicode_Check(object obj) nogil + bint PyBool_Check(object obj) nogil + bint PyFloat_Check(object obj) nogil + bint PyComplex_Check(object obj) nogil + bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil + + # Note that following functions can potentially raise an exception, + # thus they cannot be declared 'nogil'. Also PyUnicode_AsUTF8AndSize() can + # potentially allocate memory inside in unlikely case of when underlying + # unicode object was stored as non-utf8 and utf8 wasn't requested before. + const char* PyUnicode_AsUTF8AndSize(object obj, + Py_ssize_t* length) except NULL + +from numpy cimport ( + float64_t, + int64_t, +) + + +cdef extern from "numpy/arrayobject.h": + PyTypeObject PyFloatingArrType_Type + +cdef extern from "numpy/ndarrayobject.h": + PyTypeObject PyTimedeltaArrType_Type + PyTypeObject PyDatetimeArrType_Type + PyTypeObject PyComplexFloatingArrType_Type + PyTypeObject PyBoolArrType_Type + + bint PyArray_IsIntegerScalar(obj) nogil + bint PyArray_Check(obj) nogil + +cdef extern from "numpy/npy_common.h": + int64_t NPY_MIN_INT64 + + +cdef inline int64_t get_nat(): + return NPY_MIN_INT64 + + +# -------------------------------------------------------------------- +# Type Checking + +cdef inline bint is_integer_object(object obj) nogil: + """ + Cython equivalent of + + `isinstance(val, (int, long, np.integer)) and not isinstance(val, bool)` + + Parameters + ---------- + val : object + + Returns + ------- + is_integer : bool + + Notes + ----- + This counts np.timedelta64 objects as integers. + """ + return (not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) + and not is_timedelta64_object(obj)) + + +cdef inline bint is_float_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (float, np.complex_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_float : bool + """ + return (PyFloat_Check(obj) or + (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) + + +cdef inline bint is_complex_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (complex, np.complex_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_complex : bool + """ + return (PyComplex_Check(obj) or + PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) + + +cdef inline bint is_bool_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (bool, np.bool_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_bool : bool + """ + return (PyBool_Check(obj) or + PyObject_TypeCheck(obj, &PyBoolArrType_Type)) + + +cdef inline bint is_real_number_object(object obj) nogil: + return is_bool_object(obj) or is_integer_object(obj) or is_float_object(obj) + + +cdef inline bint is_timedelta64_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, np.timedelta64)` + + Parameters + ---------- + val : object + + Returns + ------- + is_timedelta64 : bool + """ + return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) + + +cdef inline bint is_datetime64_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, np.datetime64)` + + Parameters + ---------- + val : object + + Returns + ------- + is_datetime64 : bool + """ + return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) + + +cdef inline bint is_array(object val): + """ + Cython equivalent of `isinstance(val, np.ndarray)` + + Parameters + ---------- + val : object + + Returns + ------- + is_ndarray : bool + """ + return PyArray_Check(val) + + +cdef inline bint is_nan(object val): + """ + Check if val is a Not-A-Number float or complex, including + float('NaN') and np.nan. + + Parameters + ---------- + val : object + + Returns + ------- + is_nan : bool + """ + cdef float64_t fval + if is_float_object(val): + fval = val + return fval != fval + return is_complex_object(val) and val != val + + +cdef inline const char* get_c_string_buf_and_size(str py_string, + Py_ssize_t *length) except NULL: + """ + Extract internal char* buffer of unicode or bytes object `py_string` with + getting length of this internal buffer saved in `length`. + + Notes + ----- + Python object owns memory, thus returned char* must not be freed. + `length` can be NULL if getting buffer length is not needed. + + Parameters + ---------- + py_string : str + length : Py_ssize_t* + + Returns + ------- + buf : const char* + """ + return PyUnicode_AsUTF8AndSize(py_string, length) + + +cdef inline const char* get_c_string(str py_string) except NULL: + return get_c_string_buf_and_size(py_string, NULL) diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/vectorized.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/vectorized.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..01d0696 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/vectorized.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/vectorized.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/vectorized.pyi new file mode 100644 index 0000000..e9a39a6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/vectorized.pyi @@ -0,0 +1,36 @@ +""" +For cython types that cannot be represented precisely, closest-available +python equivalents are used, and the precise types kept as adjacent comments. +""" +from datetime import tzinfo + +import numpy as np + +from pandas._libs.tslibs.dtypes import Resolution +from pandas._libs.tslibs.offsets import BaseOffset +from pandas._typing import npt + +def dt64arr_to_periodarr( + stamps: npt.NDArray[np.int64], # const int64_t[:] + freq: int, + tz: tzinfo | None, +) -> npt.NDArray[np.int64]: ... # np.ndarray[np.int64, ndim=1] +def is_date_array_normalized( + stamps: npt.NDArray[np.int64], # const int64_t[:] + tz: tzinfo | None = ..., +) -> bool: ... +def normalize_i8_timestamps( + stamps: npt.NDArray[np.int64], # const int64_t[:] + tz: tzinfo | None, +) -> npt.NDArray[np.int64]: ... +def get_resolution( + stamps: npt.NDArray[np.int64], # const int64_t[:] + tz: tzinfo | None = ..., +) -> Resolution: ... +def ints_to_pydatetime( + arr: npt.NDArray[np.int64], # const int64_t[:}] + tz: tzinfo | None = ..., + freq: str | BaseOffset | None = ..., + fold: bool = ..., + box: str = ..., +) -> npt.NDArray[np.object_]: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/vectorized.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/vectorized.pyx new file mode 100644 index 0000000..02bdae3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/tslibs/vectorized.pyx @@ -0,0 +1,436 @@ +import cython + +from cpython.datetime cimport ( + date, + datetime, + time, + tzinfo, +) + +import numpy as np + +from numpy cimport ( + int64_t, + intp_t, + ndarray, +) + +from .conversion cimport normalize_i8_stamp + +from .dtypes import Resolution + +from .nattype cimport ( + NPY_NAT, + c_NaT as NaT, +) +from .np_datetime cimport ( + dt64_to_dtstruct, + npy_datetimestruct, +) +from .offsets cimport to_offset +from .period cimport get_period_ordinal +from .timestamps cimport create_timestamp_from_ts +from .timezones cimport ( + get_dst_info, + is_tzlocal, + is_utc, +) +from .tzconversion cimport tz_convert_utc_to_tzlocal + +# ------------------------------------------------------------------------- + +cdef inline object create_datetime_from_ts( + int64_t value, + npy_datetimestruct dts, + tzinfo tz, + object freq, + bint fold, +): + """ + Convenience routine to construct a datetime.datetime from its parts. + """ + return datetime( + dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, + tz, fold=fold, + ) + + +cdef inline object create_date_from_ts( + int64_t value, + npy_datetimestruct dts, + tzinfo tz, + object freq, + bint fold +): + """ + Convenience routine to construct a datetime.date from its parts. + """ + # GH#25057 add fold argument to match other func_create signatures + return date(dts.year, dts.month, dts.day) + + +cdef inline object create_time_from_ts( + int64_t value, + npy_datetimestruct dts, + tzinfo tz, + object freq, + bint fold +): + """ + Convenience routine to construct a datetime.time from its parts. + """ + return time(dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def ints_to_pydatetime( + const int64_t[:] arr, + tzinfo tz=None, + object freq=None, + bint fold=False, + str box="datetime" +) -> np.ndarray: + """ + Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp. + + Parameters + ---------- + arr : array of i8 + tz : str, optional + convert to this timezone + freq : str/Offset, optional + freq to convert + fold : bint, default is 0 + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time + + .. versionadded:: 1.1.0 + box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' + * If datetime, convert to datetime.datetime + * If date, convert to datetime.date + * If time, convert to datetime.time + * If Timestamp, convert to pandas.Timestamp + + Returns + ------- + ndarray[object] of type specified by box + """ + cdef: + Py_ssize_t i, n = len(arr) + ndarray[int64_t] trans + int64_t[:] deltas + intp_t[:] pos + npy_datetimestruct dts + object dt, new_tz + str typ + int64_t value, local_value, delta = NPY_NAT # dummy for delta + ndarray[object] result = np.empty(n, dtype=object) + object (*func_create)(int64_t, npy_datetimestruct, tzinfo, object, bint) + bint use_utc = False, use_tzlocal = False, use_fixed = False + bint use_pytz = False + + if box == "date": + assert (tz is None), "tz should be None when converting to date" + + func_create = create_date_from_ts + elif box == "timestamp": + func_create = create_timestamp_from_ts + + if isinstance(freq, str): + freq = to_offset(freq) + elif box == "time": + func_create = create_time_from_ts + elif box == "datetime": + func_create = create_datetime_from_ts + else: + raise ValueError( + "box must be one of 'datetime', 'date', 'time' or 'timestamp'" + ) + + if is_utc(tz) or tz is None: + use_utc = True + elif is_tzlocal(tz): + use_tzlocal = True + else: + trans, deltas, typ = get_dst_info(tz) + if typ not in ["pytz", "dateutil"]: + # static/fixed; in this case we know that len(delta) == 1 + use_fixed = True + delta = deltas[0] + else: + pos = trans.searchsorted(arr, side="right") - 1 + use_pytz = typ == "pytz" + + for i in range(n): + new_tz = tz + value = arr[i] + + if value == NPY_NAT: + result[i] = NaT + else: + if use_utc: + local_value = value + elif use_tzlocal: + local_value = tz_convert_utc_to_tzlocal(value, tz) + elif use_fixed: + local_value = value + delta + elif not use_pytz: + # i.e. dateutil + # no zone-name change for dateutil tzs - dst etc + # represented in single object. + local_value = value + deltas[pos[i]] + else: + # pytz + # find right representation of dst etc in pytz timezone + new_tz = tz._tzinfos[tz._transition_info[pos[i]]] + local_value = value + deltas[pos[i]] + + dt64_to_dtstruct(local_value, &dts) + result[i] = func_create(value, dts, new_tz, freq, fold) + + return result + + +# ------------------------------------------------------------------------- + +cdef: + int RESO_NS = 0 + int RESO_US = 1 + int RESO_MS = 2 + int RESO_SEC = 3 + int RESO_MIN = 4 + int RESO_HR = 5 + int RESO_DAY = 6 + int RESO_MTH = 7 + int RESO_QTR = 8 + int RESO_YR = 9 + + +cdef inline int _reso_stamp(npy_datetimestruct *dts): + if dts.us != 0: + if dts.us % 1000 == 0: + return RESO_MS + return RESO_US + elif dts.sec != 0: + return RESO_SEC + elif dts.min != 0: + return RESO_MIN + elif dts.hour != 0: + return RESO_HR + return RESO_DAY + + +def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution: + cdef: + Py_ssize_t i, n = len(stamps) + npy_datetimestruct dts + int reso = RESO_DAY, curr_reso + ndarray[int64_t] trans + int64_t[:] deltas + intp_t[:] pos + int64_t local_val, delta = NPY_NAT + bint use_utc = False, use_tzlocal = False, use_fixed = False + + if is_utc(tz) or tz is None: + use_utc = True + elif is_tzlocal(tz): + use_tzlocal = True + else: + trans, deltas, typ = get_dst_info(tz) + if typ not in ["pytz", "dateutil"]: + # static/fixed; in this case we know that len(delta) == 1 + use_fixed = True + delta = deltas[0] + else: + pos = trans.searchsorted(stamps, side="right") - 1 + + for i in range(n): + if stamps[i] == NPY_NAT: + continue + + if use_utc: + local_val = stamps[i] + elif use_tzlocal: + local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) + elif use_fixed: + local_val = stamps[i] + delta + else: + local_val = stamps[i] + deltas[pos[i]] + + dt64_to_dtstruct(local_val, &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + + return Resolution(reso) + + +# ------------------------------------------------------------------------- + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo tz): + """ + Normalize each of the (nanosecond) timezone aware timestamps in the given + array by rounding down to the beginning of the day (i.e. midnight). + This is midnight for timezone, `tz`. + + Parameters + ---------- + stamps : int64 ndarray + tz : tzinfo or None + + Returns + ------- + result : int64 ndarray of converted of normalized nanosecond timestamps + """ + cdef: + Py_ssize_t i, n = len(stamps) + int64_t[:] result = np.empty(n, dtype=np.int64) + ndarray[int64_t] trans + int64_t[:] deltas + str typ + Py_ssize_t[:] pos + int64_t local_val, delta = NPY_NAT + bint use_utc = False, use_tzlocal = False, use_fixed = False + + if is_utc(tz) or tz is None: + use_utc = True + elif is_tzlocal(tz): + use_tzlocal = True + else: + trans, deltas, typ = get_dst_info(tz) + if typ not in ["pytz", "dateutil"]: + # static/fixed; in this case we know that len(delta) == 1 + use_fixed = True + delta = deltas[0] + else: + pos = trans.searchsorted(stamps, side="right") - 1 + + for i in range(n): + # TODO: reinstate nogil for use_utc case? + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + + if use_utc: + local_val = stamps[i] + elif use_tzlocal: + local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) + elif use_fixed: + local_val = stamps[i] + delta + else: + local_val = stamps[i] + deltas[pos[i]] + + result[i] = normalize_i8_stamp(local_val) + + return result.base # `.base` to access underlying ndarray + + +@cython.wraparound(False) +@cython.boundscheck(False) +def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool: + """ + Check if all of the given (nanosecond) timestamps are normalized to + midnight, i.e. hour == minute == second == 0. If the optional timezone + `tz` is not None, then this is midnight for this timezone. + + Parameters + ---------- + stamps : int64 ndarray + tz : tzinfo or None + + Returns + ------- + is_normalized : bool True if all stamps are normalized + """ + cdef: + Py_ssize_t i, n = len(stamps) + ndarray[int64_t] trans + int64_t[:] deltas + intp_t[:] pos + int64_t local_val, delta = NPY_NAT + str typ + int64_t day_nanos = 24 * 3600 * 1_000_000_000 + bint use_utc = False, use_tzlocal = False, use_fixed = False + + if is_utc(tz) or tz is None: + use_utc = True + elif is_tzlocal(tz): + use_tzlocal = True + else: + trans, deltas, typ = get_dst_info(tz) + if typ not in ["pytz", "dateutil"]: + # static/fixed; in this case we know that len(delta) == 1 + use_fixed = True + delta = deltas[0] + else: + pos = trans.searchsorted(stamps, side="right") - 1 + + for i in range(n): + if use_utc: + local_val = stamps[i] + elif use_tzlocal: + local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) + elif use_fixed: + local_val = stamps[i] + delta + else: + local_val = stamps[i] + deltas[pos[i]] + + if local_val % day_nanos != 0: + return False + + return True + + +# ------------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +def dt64arr_to_periodarr(const int64_t[:] stamps, int freq, tzinfo tz): + cdef: + Py_ssize_t n = len(stamps) + int64_t[:] result = np.empty(n, dtype=np.int64) + ndarray[int64_t] trans + int64_t[:] deltas + Py_ssize_t[:] pos + npy_datetimestruct dts + int64_t local_val, delta = NPY_NAT + bint use_utc = False, use_tzlocal = False, use_fixed = False + + if is_utc(tz) or tz is None: + use_utc = True + elif is_tzlocal(tz): + use_tzlocal = True + else: + trans, deltas, typ = get_dst_info(tz) + if typ not in ["pytz", "dateutil"]: + # static/fixed; in this case we know that len(delta) == 1 + use_fixed = True + delta = deltas[0] + else: + pos = trans.searchsorted(stamps, side="right") - 1 + + for i in range(n): + # TODO: reinstate nogil for use_utc case? + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + + if use_utc: + local_val = stamps[i] + elif use_tzlocal: + local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) + elif use_fixed: + local_val = stamps[i] + delta + else: + local_val = stamps[i] + deltas[pos[i]] + + dt64_to_dtstruct(local_val, &dts) + result[i] = get_period_ordinal(&dts, freq) + + return result.base # .base to get underlying ndarray diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/util.pxd b/.local/lib/python3.8/site-packages/pandas/_libs/util.pxd new file mode 100644 index 0000000..df88c89 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/util.pxd @@ -0,0 +1,18 @@ +cimport numpy as cnp + +from pandas._libs.tslibs.util cimport * + + +cdef extern from "src/headers/stdint.h": + enum: UINT8_MAX + enum: UINT16_MAX + enum: UINT32_MAX + enum: UINT64_MAX + enum: INT8_MIN + enum: INT8_MAX + enum: INT16_MIN + enum: INT16_MAX + enum: INT32_MAX + enum: INT32_MIN + enum: INT64_MAX + enum: INT64_MIN diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/window/__init__.py b/.local/lib/python3.8/site-packages/pandas/_libs/window/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/window/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_libs/window/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..7df83e4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/window/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/window/aggregations.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/window/aggregations.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..c51585f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/window/aggregations.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/window/aggregations.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/window/aggregations.pyi new file mode 100644 index 0000000..b926a7c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/window/aggregations.pyi @@ -0,0 +1,127 @@ +from typing import ( + Any, + Callable, + Literal, +) + +import numpy as np + +from pandas._typing import ( + WindowingRankType, + npt, +) + +def roll_sum( + values: np.ndarray, # const float64_t[:] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t +) -> np.ndarray: ... # np.ndarray[float] +def roll_mean( + values: np.ndarray, # const float64_t[:] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t +) -> np.ndarray: ... # np.ndarray[float] +def roll_var( + values: np.ndarray, # const float64_t[:] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t + ddof: int = ..., +) -> np.ndarray: ... # np.ndarray[float] +def roll_skew( + values: np.ndarray, # np.ndarray[np.float64] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t +) -> np.ndarray: ... # np.ndarray[float] +def roll_kurt( + values: np.ndarray, # np.ndarray[np.float64] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t +) -> np.ndarray: ... # np.ndarray[float] +def roll_median_c( + values: np.ndarray, # np.ndarray[np.float64] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t +) -> np.ndarray: ... # np.ndarray[float] +def roll_max( + values: np.ndarray, # np.ndarray[np.float64] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t +) -> np.ndarray: ... # np.ndarray[float] +def roll_min( + values: np.ndarray, # np.ndarray[np.float64] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t +) -> np.ndarray: ... # np.ndarray[float] +def roll_quantile( + values: np.ndarray, # const float64_t[:] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t + quantile: float, # float64_t + interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"], +) -> np.ndarray: ... # np.ndarray[float] +def roll_rank( + values: np.ndarray, + start: np.ndarray, + end: np.ndarray, + minp: int, + percentile: bool, + method: WindowingRankType, + ascending: bool, +) -> np.ndarray: ... # np.ndarray[float] +def roll_apply( + obj: object, + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t + function: Callable[..., Any], + raw: bool, + args: tuple[Any, ...], + kwargs: dict[str, Any], +) -> npt.NDArray[np.float64]: ... +def roll_weighted_sum( + values: np.ndarray, # const float64_t[:] + weights: np.ndarray, # const float64_t[:] + minp: int, +) -> np.ndarray: ... # np.ndarray[np.float64] +def roll_weighted_mean( + values: np.ndarray, # const float64_t[:] + weights: np.ndarray, # const float64_t[:] + minp: int, +) -> np.ndarray: ... # np.ndarray[np.float64] +def roll_weighted_var( + values: np.ndarray, # const float64_t[:] + weights: np.ndarray, # const float64_t[:] + minp: int, # int64_t + ddof: int, # unsigned int +) -> np.ndarray: ... # np.ndarray[np.float64] +def ewm( + vals: np.ndarray, # const float64_t[:] + start: np.ndarray, # const int64_t[:] + end: np.ndarray, # const int64_t[:] + minp: int, + com: float, # float64_t + adjust: bool, + ignore_na: bool, + deltas: np.ndarray, # const float64_t[:] + normalize: bool, +) -> np.ndarray: ... # np.ndarray[np.float64] +def ewmcov( + input_x: np.ndarray, # const float64_t[:] + start: np.ndarray, # const int64_t[:] + end: np.ndarray, # const int64_t[:] + minp: int, + input_y: np.ndarray, # const float64_t[:] + com: float, # float64_t + adjust: bool, + ignore_na: bool, + bias: bool, +) -> np.ndarray: ... # np.ndarray[np.float64] diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/window/aggregations.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/window/aggregations.pyx new file mode 100644 index 0000000..ff53a57 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/window/aggregations.pyx @@ -0,0 +1,1845 @@ +# cython: boundscheck=False, wraparound=False, cdivision=True + +import cython + +from libc.math cimport round +from libcpp.deque cimport deque + +from pandas._libs.algos cimport TiebreakEnumType + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + float32_t, + float64_t, + int64_t, + ndarray, +) + +cnp.import_array() + + +cdef extern from "../src/headers/cmath" namespace "std": + bint isnan(float64_t) nogil + bint notnan(float64_t) nogil + int signbit(float64_t) nogil + float64_t sqrt(float64_t x) nogil + +from pandas._libs.algos import is_monotonic +from pandas._libs.dtypes cimport numeric_t + + +cdef extern from "../src/skiplist.h": + ctypedef struct node_t: + node_t **next + int *width + double value + int is_nil + int levels + int ref_count + + ctypedef struct skiplist_t: + node_t *head + node_t **tmp_chain + int *tmp_steps + int size + int maxlevels + + skiplist_t* skiplist_init(int) nogil + void skiplist_destroy(skiplist_t*) nogil + double skiplist_get(skiplist_t*, int, int*) nogil + int skiplist_insert(skiplist_t*, double) nogil + int skiplist_remove(skiplist_t*, double) nogil + int skiplist_rank(skiplist_t*, double) nogil + int skiplist_min_rank(skiplist_t*, double) nogil + +cdef: + float32_t MINfloat32 = np.NINF + float64_t MINfloat64 = np.NINF + + float32_t MAXfloat32 = np.inf + float64_t MAXfloat64 = np.inf + + float64_t NaN = np.NaN + +cdef bint is_monotonic_increasing_start_end_bounds( + ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end +): + return is_monotonic(start, False)[0] and is_monotonic(end, False)[0] + +# ---------------------------------------------------------------------- +# Rolling sum + + +cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x) nogil: + cdef: + float64_t result + + if nobs == 0 == minp: + result = 0 + elif nobs >= minp: + result = sum_x + else: + result = NaN + + return result + + +cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x, + float64_t *compensation) nogil: + """ add a value from the sum calc using Kahan summation """ + + cdef: + float64_t y, t + + # Not NaN + if notnan(val): + nobs[0] = nobs[0] + 1 + y = val - compensation[0] + t = sum_x[0] + y + compensation[0] = t - sum_x[0] - y + sum_x[0] = t + + +cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x, + float64_t *compensation) nogil: + """ remove a value from the sum calc using Kahan summation """ + + cdef: + float64_t y, t + + # Not NaN + if notnan(val): + nobs[0] = nobs[0] - 1 + y = - val - compensation[0] + t = sum_x[0] + y + compensation[0] = t - sum_x[0] - y + sum_x[0] = t + + +def roll_sum(const float64_t[:] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp) -> np.ndarray: + cdef: + Py_ssize_t i, j + float64_t sum_x, compensation_add, compensation_remove + int64_t s, e + int64_t nobs = 0, N = len(start) + ndarray[float64_t] output + bint is_monotonic_increasing_bounds + + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + output = np.empty(N, dtype=np.float64) + + with nogil: + + for i in range(0, N): + s = start[i] + e = end[i] + + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + + # setup + + sum_x = compensation_add = compensation_remove = 0 + nobs = 0 + for j in range(s, e): + add_sum(values[j], &nobs, &sum_x, &compensation_add) + + else: + + # calculate deletes + for j in range(start[i - 1], s): + remove_sum(values[j], &nobs, &sum_x, &compensation_remove) + + # calculate adds + for j in range(end[i - 1], e): + add_sum(values[j], &nobs, &sum_x, &compensation_add) + + output[i] = calc_sum(minp, nobs, sum_x) + + if not is_monotonic_increasing_bounds: + nobs = 0 + sum_x = 0.0 + compensation_remove = 0.0 + + return output + + +# ---------------------------------------------------------------------- +# Rolling mean + + +cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, + Py_ssize_t neg_ct, float64_t sum_x) nogil: + cdef: + float64_t result + + if nobs >= minp and nobs > 0: + result = sum_x / nobs + if neg_ct == 0 and result < 0: + # all positive + result = 0 + elif neg_ct == nobs and result > 0: + # all negative + result = 0 + else: + pass + else: + result = NaN + return result + + +cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, + Py_ssize_t *neg_ct, float64_t *compensation) nogil: + """ add a value from the mean calc using Kahan summation """ + cdef: + float64_t y, t + + # Not NaN + if notnan(val): + nobs[0] = nobs[0] + 1 + y = val - compensation[0] + t = sum_x[0] + y + compensation[0] = t - sum_x[0] - y + sum_x[0] = t + if signbit(val): + neg_ct[0] = neg_ct[0] + 1 + + +cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, + Py_ssize_t *neg_ct, float64_t *compensation) nogil: + """ remove a value from the mean calc using Kahan summation """ + cdef: + float64_t y, t + + if notnan(val): + nobs[0] = nobs[0] - 1 + y = - val - compensation[0] + t = sum_x[0] + y + compensation[0] = t - sum_x[0] - y + sum_x[0] = t + if signbit(val): + neg_ct[0] = neg_ct[0] - 1 + + +def roll_mean(const float64_t[:] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp) -> np.ndarray: + cdef: + float64_t val, compensation_add, compensation_remove, sum_x + int64_t s, e + Py_ssize_t nobs, i, j, neg_ct, N = len(start) + ndarray[float64_t] output + bint is_monotonic_increasing_bounds + + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + output = np.empty(N, dtype=np.float64) + + with nogil: + + for i in range(0, N): + s = start[i] + e = end[i] + + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + + compensation_add = compensation_remove = sum_x = 0 + nobs = neg_ct = 0 + # setup + for j in range(s, e): + val = values[j] + add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add) + + else: + + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + remove_mean(val, &nobs, &sum_x, &neg_ct, &compensation_remove) + + # calculate adds + for j in range(end[i - 1], e): + val = values[j] + add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add) + + output[i] = calc_mean(minp, nobs, neg_ct, sum_x) + + if not is_monotonic_increasing_bounds: + nobs = 0 + neg_ct = 0 + sum_x = 0.0 + compensation_remove = 0.0 + return output + +# ---------------------------------------------------------------------- +# Rolling variance + + +cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, + float64_t ssqdm_x) nogil: + cdef: + float64_t result + + # Variance is unchanged if no observation is added or removed + if (nobs >= minp) and (nobs > ddof): + + # pathological case + if nobs == 1: + result = 0 + else: + result = ssqdm_x / (nobs - ddof) + else: + result = NaN + + return result + + +cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, + float64_t *ssqdm_x, float64_t *compensation) nogil: + """ add a value from the var calc """ + cdef: + float64_t delta, prev_mean, y, t + + # `isnan` instead of equality as fix for GH-21813, msvc 2017 bug + if isnan(val): + return + + nobs[0] = nobs[0] + 1 + # Welford's method for the online variance-calculation + # using Kahan summation + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + prev_mean = mean_x[0] - compensation[0] + y = val - compensation[0] + t = y - mean_x[0] + compensation[0] = t + mean_x[0] - y + delta = t + if nobs[0]: + mean_x[0] = mean_x[0] + delta / nobs[0] + else: + mean_x[0] = 0 + ssqdm_x[0] = ssqdm_x[0] + (val - prev_mean) * (val - mean_x[0]) + + +cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, + float64_t *ssqdm_x, float64_t *compensation) nogil: + """ remove a value from the var calc """ + cdef: + float64_t delta, prev_mean, y, t + if notnan(val): + nobs[0] = nobs[0] - 1 + if nobs[0]: + # Welford's method for the online variance-calculation + # using Kahan summation + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + prev_mean = mean_x[0] - compensation[0] + y = val - compensation[0] + t = y - mean_x[0] + compensation[0] = t + mean_x[0] - y + delta = t + mean_x[0] = mean_x[0] - delta / nobs[0] + ssqdm_x[0] = ssqdm_x[0] - (val - prev_mean) * (val - mean_x[0]) + else: + mean_x[0] = 0 + ssqdm_x[0] = 0 + + +def roll_var(const float64_t[:] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int ddof=1) -> np.ndarray: + """ + Numerically stable implementation using Welford's method. + """ + cdef: + float64_t mean_x, ssqdm_x, nobs, compensation_add, + float64_t compensation_remove, + float64_t val, prev, delta, mean_x_old + int64_t s, e + Py_ssize_t i, j, N = len(start) + ndarray[float64_t] output + bint is_monotonic_increasing_bounds + + minp = max(minp, 1) + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + output = np.empty(N, dtype=np.float64) + + with nogil: + + for i in range(0, N): + + s = start[i] + e = end[i] + + # Over the first window, observations can only be added + # never removed + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + + mean_x = ssqdm_x = nobs = compensation_add = compensation_remove = 0 + for j in range(s, e): + add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add) + + else: + + # After the first window, observations can both be added + # and removed + + # calculate deletes + for j in range(start[i - 1], s): + remove_var(values[j], &nobs, &mean_x, &ssqdm_x, + &compensation_remove) + + # calculate adds + for j in range(end[i - 1], e): + add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add) + + output[i] = calc_var(minp, ddof, nobs, ssqdm_x) + + if not is_monotonic_increasing_bounds: + nobs = 0.0 + mean_x = 0.0 + ssqdm_x = 0.0 + compensation_remove = 0.0 + + return output + +# ---------------------------------------------------------------------- +# Rolling skewness + + +cdef inline float64_t calc_skew(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, + float64_t xxx) nogil: + cdef: + float64_t result, dnobs + float64_t A, B, C, R + + if nobs >= minp: + dnobs = nobs + A = x / dnobs + B = xx / dnobs - A * A + C = xxx / dnobs - A * A * A - 3 * A * B + + # #18044: with uniform distribution, floating issue will + # cause B != 0. and cause the result is a very + # large number. + # + # in core/nanops.py nanskew/nankurt call the function + # _zero_out_fperr(m2) to fix floating error. + # if the variance is less than 1e-14, it could be + # treat as zero, here we follow the original + # skew/kurt behaviour to check B <= 1e-14 + if B <= 1e-14 or nobs < 3: + result = NaN + else: + R = sqrt(B) + result = ((sqrt(dnobs * (dnobs - 1.)) * C) / + ((dnobs - 2) * R * R * R)) + else: + result = NaN + + return result + + +cdef inline void add_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, + float64_t *compensation_x, + float64_t *compensation_xx, + float64_t *compensation_xxx) nogil: + """ add a value from the skew calc """ + cdef: + float64_t y, t + + # Not NaN + if notnan(val): + nobs[0] = nobs[0] + 1 + + y = val - compensation_x[0] + t = x[0] + y + compensation_x[0] = t - x[0] - y + x[0] = t + y = val * val - compensation_xx[0] + t = xx[0] + y + compensation_xx[0] = t - xx[0] - y + xx[0] = t + y = val * val * val - compensation_xxx[0] + t = xxx[0] + y + compensation_xxx[0] = t - xxx[0] - y + xxx[0] = t + + +cdef inline void remove_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, + float64_t *compensation_x, + float64_t *compensation_xx, + float64_t *compensation_xxx) nogil: + """ remove a value from the skew calc """ + cdef: + float64_t y, t + + # Not NaN + if notnan(val): + nobs[0] = nobs[0] - 1 + + y = - val - compensation_x[0] + t = x[0] + y + compensation_x[0] = t - x[0] - y + x[0] = t + y = - val * val - compensation_xx[0] + t = xx[0] + y + compensation_xx[0] = t - xx[0] - y + xx[0] = t + y = - val * val * val - compensation_xxx[0] + t = xxx[0] + y + compensation_xxx[0] = t - xxx[0] - y + xxx[0] = t + + +def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp) -> np.ndarray: + cdef: + Py_ssize_t i, j + float64_t val, prev, min_val, mean_val, sum_val = 0 + float64_t compensation_xxx_add, compensation_xxx_remove + float64_t compensation_xx_add, compensation_xx_remove + float64_t compensation_x_add, compensation_x_remove + float64_t x, xx, xxx + int64_t nobs = 0, N = len(start), V = len(values), nobs_mean = 0 + int64_t s, e + ndarray[float64_t] output, mean_array, values_copy + bint is_monotonic_increasing_bounds + + minp = max(minp, 3) + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + output = np.empty(N, dtype=np.float64) + min_val = np.nanmin(values) + values_copy = np.copy(values) + + with nogil: + for i in range(0, V): + val = values_copy[i] + if notnan(val): + nobs_mean += 1 + sum_val += val + mean_val = sum_val / nobs_mean + # Other cases would lead to imprecision for smallest values + if min_val - mean_val > -1e5: + mean_val = round(mean_val) + for i in range(0, V): + values_copy[i] = values_copy[i] - mean_val + + for i in range(0, N): + + s = start[i] + e = end[i] + + # Over the first window, observations can only be added + # never removed + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + + compensation_xxx_add = compensation_xxx_remove = 0 + compensation_xx_add = compensation_xx_remove = 0 + compensation_x_add = compensation_x_remove = 0 + x = xx = xxx = 0 + nobs = 0 + for j in range(s, e): + val = values_copy[j] + add_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_add, + &compensation_xx_add, &compensation_xxx_add) + + else: + + # After the first window, observations can both be added + # and removed + # calculate deletes + for j in range(start[i - 1], s): + val = values_copy[j] + remove_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_remove, + &compensation_xx_remove, &compensation_xxx_remove) + + # calculate adds + for j in range(end[i - 1], e): + val = values_copy[j] + add_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_add, + &compensation_xx_add, &compensation_xxx_add) + + output[i] = calc_skew(minp, nobs, x, xx, xxx) + + if not is_monotonic_increasing_bounds: + nobs = 0 + x = 0.0 + xx = 0.0 + xxx = 0.0 + + return output + +# ---------------------------------------------------------------------- +# Rolling kurtosis + + +cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, + float64_t xxx, float64_t xxxx) nogil: + cdef: + float64_t result, dnobs + float64_t A, B, C, D, R, K + + if nobs >= minp: + dnobs = nobs + A = x / dnobs + R = A * A + B = xx / dnobs - R + R = R * A + C = xxx / dnobs - R - 3 * A * B + R = R * A + D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A + + # #18044: with uniform distribution, floating issue will + # cause B != 0. and cause the result is a very + # large number. + # + # in core/nanops.py nanskew/nankurt call the function + # _zero_out_fperr(m2) to fix floating error. + # if the variance is less than 1e-14, it could be + # treat as zero, here we follow the original + # skew/kurt behaviour to check B <= 1e-14 + if B <= 1e-14 or nobs < 4: + result = NaN + else: + K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2) + result = K / ((dnobs - 2.) * (dnobs - 3.)) + else: + result = NaN + + return result + + +cdef inline void add_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx, + float64_t *compensation_x, + float64_t *compensation_xx, + float64_t *compensation_xxx, + float64_t *compensation_xxxx) nogil: + """ add a value from the kurotic calc """ + cdef: + float64_t y, t + + # Not NaN + if notnan(val): + nobs[0] = nobs[0] + 1 + + y = val - compensation_x[0] + t = x[0] + y + compensation_x[0] = t - x[0] - y + x[0] = t + y = val * val - compensation_xx[0] + t = xx[0] + y + compensation_xx[0] = t - xx[0] - y + xx[0] = t + y = val * val * val - compensation_xxx[0] + t = xxx[0] + y + compensation_xxx[0] = t - xxx[0] - y + xxx[0] = t + y = val * val * val * val - compensation_xxxx[0] + t = xxxx[0] + y + compensation_xxxx[0] = t - xxxx[0] - y + xxxx[0] = t + + +cdef inline void remove_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx, + float64_t *compensation_x, + float64_t *compensation_xx, + float64_t *compensation_xxx, + float64_t *compensation_xxxx) nogil: + """ remove a value from the kurotic calc """ + cdef: + float64_t y, t + + # Not NaN + if notnan(val): + nobs[0] = nobs[0] - 1 + + y = - val - compensation_x[0] + t = x[0] + y + compensation_x[0] = t - x[0] - y + x[0] = t + y = - val * val - compensation_xx[0] + t = xx[0] + y + compensation_xx[0] = t - xx[0] - y + xx[0] = t + y = - val * val * val - compensation_xxx[0] + t = xxx[0] + y + compensation_xxx[0] = t - xxx[0] - y + xxx[0] = t + y = - val * val * val * val - compensation_xxxx[0] + t = xxxx[0] + y + compensation_xxxx[0] = t - xxxx[0] - y + xxxx[0] = t + + +def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp) -> np.ndarray: + cdef: + Py_ssize_t i, j + float64_t val, prev, mean_val, min_val, sum_val = 0 + float64_t compensation_xxxx_add, compensation_xxxx_remove + float64_t compensation_xxx_remove, compensation_xxx_add + float64_t compensation_xx_remove, compensation_xx_add + float64_t compensation_x_remove, compensation_x_add + float64_t x, xx, xxx, xxxx + int64_t nobs, s, e, N = len(start), V = len(values), nobs_mean = 0 + ndarray[float64_t] output, values_copy + bint is_monotonic_increasing_bounds + + minp = max(minp, 4) + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + output = np.empty(N, dtype=np.float64) + values_copy = np.copy(values) + min_val = np.nanmin(values) + + with nogil: + for i in range(0, V): + val = values_copy[i] + if notnan(val): + nobs_mean += 1 + sum_val += val + mean_val = sum_val / nobs_mean + # Other cases would lead to imprecision for smallest values + if min_val - mean_val > -1e4: + mean_val = round(mean_val) + for i in range(0, V): + values_copy[i] = values_copy[i] - mean_val + + for i in range(0, N): + + s = start[i] + e = end[i] + + # Over the first window, observations can only be added + # never removed + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + + compensation_xxxx_add = compensation_xxxx_remove = 0 + compensation_xxx_remove = compensation_xxx_add = 0 + compensation_xx_remove = compensation_xx_add = 0 + compensation_x_remove = compensation_x_add = 0 + x = xx = xxx = xxxx = 0 + nobs = 0 + for j in range(s, e): + add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx, + &compensation_x_add, &compensation_xx_add, + &compensation_xxx_add, &compensation_xxxx_add) + + else: + + # After the first window, observations can both be added + # and removed + # calculate deletes + for j in range(start[i - 1], s): + remove_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx, + &compensation_x_remove, &compensation_xx_remove, + &compensation_xxx_remove, &compensation_xxxx_remove) + + # calculate adds + for j in range(end[i - 1], e): + add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx, + &compensation_x_add, &compensation_xx_add, + &compensation_xxx_add, &compensation_xxxx_add) + + output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx) + + if not is_monotonic_increasing_bounds: + nobs = 0 + x = 0.0 + xx = 0.0 + xxx = 0.0 + xxxx = 0.0 + + return output + + +# ---------------------------------------------------------------------- +# Rolling median, min, max + + +def roll_median_c(const float64_t[:] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp) -> np.ndarray: + cdef: + Py_ssize_t i, j + bint err = False, is_monotonic_increasing_bounds + int midpoint, ret = 0 + int64_t nobs = 0, N = len(start), s, e, win + float64_t val, res, prev + skiplist_t *sl + ndarray[float64_t] output + + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + + # we use the Fixed/Variable Indexer here as the + # actual skiplist ops outweigh any window computation costs + output = np.empty(N, dtype=np.float64) + + if (end - start).max() == 0: + output[:] = NaN + return output + win = (end - start).max() + sl = skiplist_init(win) + if sl == NULL: + raise MemoryError("skiplist_init failed") + + with nogil: + + for i in range(0, N): + s = start[i] + e = end[i] + + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + + if i != 0: + skiplist_destroy(sl) + sl = skiplist_init(win) + nobs = 0 + # setup + for j in range(s, e): + val = values[j] + if notnan(val): + nobs += 1 + err = skiplist_insert(sl, val) == -1 + if err: + break + + else: + + # calculate adds + for j in range(end[i - 1], e): + val = values[j] + if notnan(val): + nobs += 1 + err = skiplist_insert(sl, val) == -1 + if err: + break + + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + if notnan(val): + skiplist_remove(sl, val) + nobs -= 1 + if nobs >= minp: + midpoint = (nobs / 2) + if nobs % 2: + res = skiplist_get(sl, midpoint, &ret) + else: + res = (skiplist_get(sl, midpoint, &ret) + + skiplist_get(sl, (midpoint - 1), &ret)) / 2 + if ret == 0: + res = NaN + else: + res = NaN + + output[i] = res + + if not is_monotonic_increasing_bounds: + nobs = 0 + skiplist_destroy(sl) + sl = skiplist_init(win) + + skiplist_destroy(sl) + if err: + raise MemoryError("skiplist_insert failed") + return output + + +# ---------------------------------------------------------------------- + +# Moving maximum / minimum code taken from Bottleneck under the terms +# of its Simplified BSD license +# https://github.com/pydata/bottleneck + + +cdef inline numeric_t init_mm(numeric_t ai, Py_ssize_t *nobs, bint is_max) nogil: + + if numeric_t in cython.floating: + if ai == ai: + nobs[0] = nobs[0] + 1 + elif is_max: + if numeric_t == cython.float: + ai = MINfloat32 + else: + ai = MINfloat64 + else: + if numeric_t == cython.float: + ai = MAXfloat32 + else: + ai = MAXfloat64 + + else: + nobs[0] = nobs[0] + 1 + + return ai + + +cdef inline void remove_mm(numeric_t aold, Py_ssize_t *nobs) nogil: + """ remove a value from the mm calc """ + if numeric_t in cython.floating and aold == aold: + nobs[0] = nobs[0] - 1 + + +cdef inline numeric_t calc_mm(int64_t minp, Py_ssize_t nobs, + numeric_t value) nogil: + cdef: + numeric_t result + + if numeric_t in cython.floating: + if nobs >= minp: + result = value + else: + result = NaN + else: + result = value + + return result + + +def roll_max(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp) -> np.ndarray: + """ + Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. + + Parameters + ---------- + values : np.ndarray[np.float64] + window : int, size of rolling window + minp : if number of observations in window + is below this, output a NaN + index : ndarray, optional + index for window computation + closed : 'right', 'left', 'both', 'neither' + make the interval closed on the right, left, + both or neither endpoints + + Returns + ------- + np.ndarray[float] + """ + return _roll_min_max(values, start, end, minp, is_max=1) + + +def roll_min(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp) -> np.ndarray: + """ + Moving min of 1d array of any numeric type along axis=0 ignoring NaNs. + + Parameters + ---------- + values : np.ndarray[np.float64] + window : int, size of rolling window + minp : if number of observations in window + is below this, output a NaN + index : ndarray, optional + index for window computation + + Returns + ------- + np.ndarray[float] + """ + return _roll_min_max(values, start, end, minp, is_max=0) + + +cdef _roll_min_max(ndarray[numeric_t] values, + ndarray[int64_t] starti, + ndarray[int64_t] endi, + int64_t minp, + bint is_max): + cdef: + numeric_t ai + int64_t curr_win_size, start + Py_ssize_t i, k, nobs = 0, N = len(starti) + deque Q[int64_t] # min/max always the front + deque W[int64_t] # track the whole window for nobs compute + ndarray[float64_t, ndim=1] output + + output = np.empty(N, dtype=np.float64) + Q = deque[int64_t]() + W = deque[int64_t]() + + with nogil: + + # This is using a modified version of the C++ code in this + # SO post: https://stackoverflow.com/a/12239580 + # The original impl didn't deal with variable window sizes + # So the code was optimized for that + + # first window's size + curr_win_size = endi[0] - starti[0] + # GH 32865 + # Anchor output index to values index to provide custom + # BaseIndexer support + for i in range(N): + + curr_win_size = endi[i] - starti[i] + if i == 0: + start = starti[i] + else: + start = endi[i - 1] + + for k in range(start, endi[i]): + ai = init_mm(values[k], &nobs, is_max) + # Discard previous entries if we find new min or max + if is_max: + while not Q.empty() and ((ai >= values[Q.back()]) or + values[Q.back()] != values[Q.back()]): + Q.pop_back() + else: + while not Q.empty() and ((ai <= values[Q.back()]) or + values[Q.back()] != values[Q.back()]): + Q.pop_back() + Q.push_back(k) + W.push_back(k) + + # Discard entries outside and left of current window + while not Q.empty() and Q.front() <= starti[i] - 1: + Q.pop_front() + while not W.empty() and W.front() <= starti[i] - 1: + remove_mm(values[W.front()], &nobs) + W.pop_front() + + # Save output based on index in input value array + if not Q.empty() and curr_win_size > 0: + output[i] = calc_mm(minp, nobs, values[Q.front()]) + else: + output[i] = NaN + + return output + + +cdef enum InterpolationType: + LINEAR, + LOWER, + HIGHER, + NEAREST, + MIDPOINT + + +interpolation_types = { + 'linear': LINEAR, + 'lower': LOWER, + 'higher': HIGHER, + 'nearest': NEAREST, + 'midpoint': MIDPOINT, +} + + +def roll_quantile(const float64_t[:] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, + float64_t quantile, str interpolation) -> np.ndarray: + """ + O(N log(window)) implementation using skip list + """ + cdef: + Py_ssize_t i, j, s, e, N = len(start), idx + int ret = 0 + int64_t nobs = 0, win + float64_t val, prev, midpoint, idx_with_fraction + float64_t vlow, vhigh + skiplist_t *skiplist + InterpolationType interpolation_type + ndarray[float64_t] output + + if quantile <= 0.0 or quantile >= 1.0: + raise ValueError(f"quantile value {quantile} not in [0, 1]") + + try: + interpolation_type = interpolation_types[interpolation] + except KeyError: + raise ValueError(f"Interpolation '{interpolation}' is not supported") + + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + # we use the Fixed/Variable Indexer here as the + # actual skiplist ops outweigh any window computation costs + output = np.empty(N, dtype=np.float64) + + win = (end - start).max() + if win == 0: + output[:] = NaN + return output + skiplist = skiplist_init(win) + if skiplist == NULL: + raise MemoryError("skiplist_init failed") + + with nogil: + for i in range(0, N): + s = start[i] + e = end[i] + + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + if i != 0: + nobs = 0 + skiplist_destroy(skiplist) + skiplist = skiplist_init(win) + + # setup + for j in range(s, e): + val = values[j] + if notnan(val): + nobs += 1 + skiplist_insert(skiplist, val) + + else: + # calculate adds + for j in range(end[i - 1], e): + val = values[j] + if notnan(val): + nobs += 1 + skiplist_insert(skiplist, val) + + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + if notnan(val): + skiplist_remove(skiplist, val) + nobs -= 1 + if nobs >= minp: + if nobs == 1: + # Single value in skip list + output[i] = skiplist_get(skiplist, 0, &ret) + else: + idx_with_fraction = quantile * (nobs - 1) + idx = idx_with_fraction + + if idx_with_fraction == idx: + # no need to interpolate + output[i] = skiplist_get(skiplist, idx, &ret) + continue + + if interpolation_type == LINEAR: + vlow = skiplist_get(skiplist, idx, &ret) + vhigh = skiplist_get(skiplist, idx + 1, &ret) + output[i] = ((vlow + (vhigh - vlow) * + (idx_with_fraction - idx))) + elif interpolation_type == LOWER: + output[i] = skiplist_get(skiplist, idx, &ret) + elif interpolation_type == HIGHER: + output[i] = skiplist_get(skiplist, idx + 1, &ret) + elif interpolation_type == NEAREST: + # the same behaviour as round() + if idx_with_fraction - idx == 0.5: + if idx % 2 == 0: + output[i] = skiplist_get(skiplist, idx, &ret) + else: + output[i] = skiplist_get( + skiplist, idx + 1, &ret) + elif idx_with_fraction - idx < 0.5: + output[i] = skiplist_get(skiplist, idx, &ret) + else: + output[i] = skiplist_get(skiplist, idx + 1, &ret) + elif interpolation_type == MIDPOINT: + vlow = skiplist_get(skiplist, idx, &ret) + vhigh = skiplist_get(skiplist, idx + 1, &ret) + output[i] = (vlow + vhigh) / 2 + + if ret == 0: + output[i] = NaN + else: + output[i] = NaN + + skiplist_destroy(skiplist) + + return output + + +rolling_rank_tiebreakers = { + "average": TiebreakEnumType.TIEBREAK_AVERAGE, + "min": TiebreakEnumType.TIEBREAK_MIN, + "max": TiebreakEnumType.TIEBREAK_MAX, +} + + +def roll_rank(const float64_t[:] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, bint percentile, + str method, bint ascending) -> np.ndarray: + """ + O(N log(window)) implementation using skip list + + derived from roll_quantile + """ + cdef: + Py_ssize_t i, j, s, e, N = len(start), idx + float64_t rank_min = 0, rank = 0 + int64_t nobs = 0, win + float64_t val + skiplist_t *skiplist + float64_t[::1] output + TiebreakEnumType rank_type + + try: + rank_type = rolling_rank_tiebreakers[method] + except KeyError: + raise ValueError(f"Method '{method}' is not supported") + + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + # we use the Fixed/Variable Indexer here as the + # actual skiplist ops outweigh any window computation costs + output = np.empty(N, dtype=np.float64) + + win = (end - start).max() + if win == 0: + output[:] = NaN + return np.asarray(output) + skiplist = skiplist_init(win) + if skiplist == NULL: + raise MemoryError("skiplist_init failed") + + with nogil: + for i in range(N): + s = start[i] + e = end[i] + + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + if i != 0: + nobs = 0 + skiplist_destroy(skiplist) + skiplist = skiplist_init(win) + + # setup + for j in range(s, e): + val = values[j] if ascending else -values[j] + if notnan(val): + nobs += 1 + rank = skiplist_insert(skiplist, val) + if rank == -1: + raise MemoryError("skiplist_insert failed") + if rank_type == TiebreakEnumType.TIEBREAK_AVERAGE: + # The average rank of `val` is the sum of the ranks of all + # instances of `val` in the skip list divided by the number + # of instances. The sum of consecutive integers from 1 to N + # is N * (N + 1) / 2. + # The sum of the ranks is the sum of integers from the + # lowest rank to the highest rank, which is the sum of + # integers from 1 to the highest rank minus the sum of + # integers from 1 to one less than the lowest rank. + rank_min = skiplist_min_rank(skiplist, val) + rank = (((rank * (rank + 1) / 2) + - ((rank_min - 1) * rank_min / 2)) + / (rank - rank_min + 1)) + elif rank_type == TiebreakEnumType.TIEBREAK_MIN: + rank = skiplist_min_rank(skiplist, val) + else: + rank = NaN + + else: + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] if ascending else -values[j] + if notnan(val): + skiplist_remove(skiplist, val) + nobs -= 1 + + # calculate adds + for j in range(end[i - 1], e): + val = values[j] if ascending else -values[j] + if notnan(val): + nobs += 1 + rank = skiplist_insert(skiplist, val) + if rank == -1: + raise MemoryError("skiplist_insert failed") + if rank_type == TiebreakEnumType.TIEBREAK_AVERAGE: + rank_min = skiplist_min_rank(skiplist, val) + rank = (((rank * (rank + 1) / 2) + - ((rank_min - 1) * rank_min / 2)) + / (rank - rank_min + 1)) + elif rank_type == TiebreakEnumType.TIEBREAK_MIN: + rank = skiplist_min_rank(skiplist, val) + else: + rank = NaN + if nobs >= minp: + output[i] = rank / nobs if percentile else rank + else: + output[i] = NaN + + skiplist_destroy(skiplist) + + return np.asarray(output) + + +def roll_apply(object obj, + ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp, + object function, bint raw, + tuple args, dict kwargs) -> np.ndarray: + cdef: + ndarray[float64_t] output, counts + ndarray[float64_t, cast=True] arr + Py_ssize_t i, s, e, N = len(start), n = len(obj) + + if n == 0: + return np.array([], dtype=np.float64) + + arr = np.asarray(obj) + + # ndarray input + if raw and not arr.flags.c_contiguous: + arr = arr.copy('C') + + counts = roll_sum(np.isfinite(arr).astype(float), start, end, minp) + + output = np.empty(N, dtype=np.float64) + + for i in range(N): + + s = start[i] + e = end[i] + + if counts[i] >= minp: + if raw: + output[i] = function(arr[s:e], *args, **kwargs) + else: + output[i] = function(obj.iloc[s:e], *args, **kwargs) + else: + output[i] = NaN + + return output + + +# ---------------------------------------------------------------------- +# Rolling sum and mean for weighted window + + +def roll_weighted_sum( + const float64_t[:] values, const float64_t[:] weights, int minp +) -> np.ndaray: + return _roll_weighted_sum_mean(values, weights, minp, avg=0) + + +def roll_weighted_mean( + const float64_t[:] values, const float64_t[:] weights, int minp +) -> np.ndaray: + return _roll_weighted_sum_mean(values, weights, minp, avg=1) + + +cdef float64_t[:] _roll_weighted_sum_mean(const float64_t[:] values, + const float64_t[:] weights, + int minp, bint avg): + """ + Assume len(weights) << len(values) + """ + cdef: + float64_t[:] output, tot_wgt, counts + Py_ssize_t in_i, win_i, win_n, in_n + float64_t val_in, val_win, c, w + + in_n = len(values) + win_n = len(weights) + + output = np.zeros(in_n, dtype=np.float64) + counts = np.zeros(in_n, dtype=np.float64) + if avg: + tot_wgt = np.zeros(in_n, dtype=np.float64) + + elif minp > in_n: + minp = in_n + 1 + + minp = max(minp, 1) + + with nogil: + if avg: + for win_i in range(win_n): + val_win = weights[win_i] + if val_win != val_win: + continue + + for in_i in range(in_n - (win_n - win_i) + 1): + val_in = values[in_i] + if val_in == val_in: + output[in_i + (win_n - win_i) - 1] += val_in * val_win + counts[in_i + (win_n - win_i) - 1] += 1 + tot_wgt[in_i + (win_n - win_i) - 1] += val_win + + for in_i in range(in_n): + c = counts[in_i] + if c < minp: + output[in_i] = NaN + else: + w = tot_wgt[in_i] + if w == 0: + output[in_i] = NaN + else: + output[in_i] /= tot_wgt[in_i] + + else: + for win_i in range(win_n): + val_win = weights[win_i] + if val_win != val_win: + continue + + for in_i in range(in_n - (win_n - win_i) + 1): + val_in = values[in_i] + + if val_in == val_in: + output[in_i + (win_n - win_i) - 1] += val_in * val_win + counts[in_i + (win_n - win_i) - 1] += 1 + + for in_i in range(in_n): + c = counts[in_i] + if c < minp: + output[in_i] = NaN + + return output + + +# ---------------------------------------------------------------------- +# Rolling var for weighted window + + +cdef inline float64_t calc_weighted_var(float64_t t, + float64_t sum_w, + Py_ssize_t win_n, + unsigned int ddof, + float64_t nobs, + int64_t minp) nogil: + """ + Calculate weighted variance for a window using West's method. + + Paper: https://dl.acm.org/citation.cfm?id=359153 + + Parameters + ---------- + t: float64_t + sum of weighted squared differences + sum_w: float64_t + sum of weights + win_n: Py_ssize_t + window size + ddof: unsigned int + delta degrees of freedom + nobs: float64_t + number of observations + minp: int64_t + minimum number of observations + + Returns + ------- + result : float64_t + weighted variance of the window + """ + + cdef: + float64_t result + + # Variance is unchanged if no observation is added or removed + if (nobs >= minp) and (nobs > ddof): + + # pathological case + if nobs == 1: + result = 0 + else: + result = t * win_n / ((win_n - ddof) * sum_w) + if result < 0: + result = 0 + else: + result = NaN + + return result + + +cdef inline void add_weighted_var(float64_t val, + float64_t w, + float64_t *t, + float64_t *sum_w, + float64_t *mean, + float64_t *nobs) nogil: + """ + Update weighted mean, sum of weights and sum of weighted squared + differences to include value and weight pair in weighted variance + calculation using West's method. + + Paper: https://dl.acm.org/citation.cfm?id=359153 + + Parameters + ---------- + val: float64_t + window values + w: float64_t + window weights + t: float64_t + sum of weighted squared differences + sum_w: float64_t + sum of weights + mean: float64_t + weighted mean + nobs: float64_t + number of observations + """ + + cdef: + float64_t temp, q, r + + if isnan(val): + return + + nobs[0] = nobs[0] + 1 + + q = val - mean[0] + temp = sum_w[0] + w + r = q * w / temp + + mean[0] = mean[0] + r + t[0] = t[0] + r * sum_w[0] * q + sum_w[0] = temp + + +cdef inline void remove_weighted_var(float64_t val, + float64_t w, + float64_t *t, + float64_t *sum_w, + float64_t *mean, + float64_t *nobs) nogil: + """ + Update weighted mean, sum of weights and sum of weighted squared + differences to remove value and weight pair from weighted variance + calculation using West's method. + + Paper: https://dl.acm.org/citation.cfm?id=359153 + + Parameters + ---------- + val: float64_t + window values + w: float64_t + window weights + t: float64_t + sum of weighted squared differences + sum_w: float64_t + sum of weights + mean: float64_t + weighted mean + nobs: float64_t + number of observations + """ + + cdef: + float64_t temp, q, r + + if notnan(val): + nobs[0] = nobs[0] - 1 + + if nobs[0]: + q = val - mean[0] + temp = sum_w[0] - w + r = q * w / temp + + mean[0] = mean[0] - r + t[0] = t[0] - r * sum_w[0] * q + sum_w[0] = temp + + else: + t[0] = 0 + sum_w[0] = 0 + mean[0] = 0 + + +def roll_weighted_var(const float64_t[:] values, const float64_t[:] weights, + int64_t minp, unsigned int ddof): + """ + Calculates weighted rolling variance using West's online algorithm. + + Paper: https://dl.acm.org/citation.cfm?id=359153 + + Parameters + ---------- + values: float64_t[:] + values to roll window over + weights: float64_t[:] + array of weights whose length is window size + minp: int64_t + minimum number of observations to calculate + variance of a window + ddof: unsigned int + the divisor used in variance calculations + is the window size - ddof + + Returns + ------- + output: float64_t[:] + weighted variances of windows + """ + + cdef: + float64_t t = 0, sum_w = 0, mean = 0, nobs = 0 + float64_t val, pre_val, w, pre_w + Py_ssize_t i, n, win_n + float64_t[:] output + + n = len(values) + win_n = len(weights) + output = np.empty(n, dtype=np.float64) + + with nogil: + + for i in range(win_n): + add_weighted_var(values[i], weights[i], &t, + &sum_w, &mean, &nobs) + + output[i] = calc_weighted_var(t, sum_w, win_n, + ddof, nobs, minp) + + for i in range(win_n, n): + val = values[i] + pre_val = values[i - win_n] + + w = weights[i % win_n] + pre_w = weights[(i - win_n) % win_n] + + if notnan(val): + if pre_val == pre_val: + remove_weighted_var(pre_val, pre_w, &t, + &sum_w, &mean, &nobs) + + add_weighted_var(val, w, &t, &sum_w, &mean, &nobs) + + elif pre_val == pre_val: + remove_weighted_var(pre_val, pre_w, &t, + &sum_w, &mean, &nobs) + + output[i] = calc_weighted_var(t, sum_w, win_n, + ddof, nobs, minp) + + return output + + +# ---------------------------------------------------------------------- +# Exponentially weighted moving + +def ewm(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end, + int minp, float64_t com, bint adjust, bint ignore_na, + const float64_t[:] deltas=None, bint normalize=True) -> np.ndarray: + """ + Compute exponentially-weighted moving average or sum using center-of-mass. + + Parameters + ---------- + vals : ndarray (float64 type) + start: ndarray (int64 type) + end: ndarray (int64 type) + minp : int + com : float64 + adjust : bool + ignore_na : bool + deltas : ndarray (float64 type), optional. If None, implicitly assumes equally + spaced points (used when `times` is not passed) + normalize : bool, optional. + If True, calculate the mean. If False, calculate the sum. + + Returns + ------- + np.ndarray[float64_t] + """ + + cdef: + Py_ssize_t i, j, s, e, nobs, win_size, N = len(vals), M = len(start) + const float64_t[:] sub_vals + const float64_t[:] sub_deltas=None + ndarray[float64_t] sub_output, output = np.empty(N, dtype=np.float64) + float64_t alpha, old_wt_factor, new_wt, weighted, old_wt, cur + bint is_observation, use_deltas + + if N == 0: + return output + + use_deltas = deltas is not None + + alpha = 1. / (1. + com) + old_wt_factor = 1. - alpha + new_wt = 1. if adjust else alpha + + for j in range(M): + s = start[j] + e = end[j] + sub_vals = vals[s:e] + # note that len(deltas) = len(vals) - 1 and deltas[i] is to be used in + # conjunction with vals[i+1] + if use_deltas: + sub_deltas = deltas[s:e - 1] + win_size = len(sub_vals) + sub_output = np.empty(win_size, dtype=np.float64) + + weighted = sub_vals[0] + is_observation = weighted == weighted + nobs = int(is_observation) + sub_output[0] = weighted if nobs >= minp else NaN + old_wt = 1. + + with nogil: + for i in range(1, win_size): + cur = sub_vals[i] + is_observation = cur == cur + nobs += is_observation + if weighted == weighted: + + if is_observation or not ignore_na: + if normalize: + if use_deltas: + old_wt *= old_wt_factor ** sub_deltas[i - 1] + else: + old_wt *= old_wt_factor + else: + weighted = old_wt_factor * weighted + if is_observation: + if normalize: + # avoid numerical errors on constant series + if weighted != cur: + weighted = old_wt * weighted + new_wt * cur + weighted /= (old_wt + new_wt) + if adjust: + old_wt += new_wt + else: + old_wt = 1. + else: + weighted += cur + elif is_observation: + weighted = cur + + sub_output[i] = weighted if nobs >= minp else NaN + + output[s:e] = sub_output + + return output + + +def ewmcov(const float64_t[:] input_x, const int64_t[:] start, const int64_t[:] end, + int minp, const float64_t[:] input_y, float64_t com, bint adjust, + bint ignore_na, bint bias) -> np.ndarray: + """ + Compute exponentially-weighted moving variance using center-of-mass. + + Parameters + ---------- + input_x : ndarray (float64 type) + start: ndarray (int64 type) + end: ndarray (int64 type) + minp : int + input_y : ndarray (float64 type) + com : float64 + adjust : bool + ignore_na : bool + bias : bool + + Returns + ------- + np.ndarray[float64_t] + """ + + cdef: + Py_ssize_t i, j, s, e, win_size, nobs + Py_ssize_t N = len(input_x), M = len(input_y), L = len(start) + float64_t alpha, old_wt_factor, new_wt, mean_x, mean_y, cov + float64_t sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y + float64_t numerator, denominator + const float64_t[:] sub_x_vals, sub_y_vals + ndarray[float64_t] sub_out, output = np.empty(N, dtype=np.float64) + bint is_observation + + if M != N: + raise ValueError(f"arrays are of different lengths ({N} and {M})") + + if N == 0: + return output + + alpha = 1. / (1. + com) + old_wt_factor = 1. - alpha + new_wt = 1. if adjust else alpha + + for j in range(L): + s = start[j] + e = end[j] + sub_x_vals = input_x[s:e] + sub_y_vals = input_y[s:e] + win_size = len(sub_x_vals) + sub_out = np.empty(win_size, dtype=np.float64) + + mean_x = sub_x_vals[0] + mean_y = sub_y_vals[0] + is_observation = (mean_x == mean_x) and (mean_y == mean_y) + nobs = int(is_observation) + if not is_observation: + mean_x = NaN + mean_y = NaN + sub_out[0] = (0. if bias else NaN) if nobs >= minp else NaN + cov = 0. + sum_wt = 1. + sum_wt2 = 1. + old_wt = 1. + + with nogil: + for i in range(1, win_size): + cur_x = sub_x_vals[i] + cur_y = sub_y_vals[i] + is_observation = (cur_x == cur_x) and (cur_y == cur_y) + nobs += is_observation + if mean_x == mean_x: + if is_observation or not ignore_na: + sum_wt *= old_wt_factor + sum_wt2 *= (old_wt_factor * old_wt_factor) + old_wt *= old_wt_factor + if is_observation: + old_mean_x = mean_x + old_mean_y = mean_y + + # avoid numerical errors on constant series + if mean_x != cur_x: + mean_x = ((old_wt * old_mean_x) + + (new_wt * cur_x)) / (old_wt + new_wt) + + # avoid numerical errors on constant series + if mean_y != cur_y: + mean_y = ((old_wt * old_mean_y) + + (new_wt * cur_y)) / (old_wt + new_wt) + cov = ((old_wt * (cov + ((old_mean_x - mean_x) * + (old_mean_y - mean_y)))) + + (new_wt * ((cur_x - mean_x) * + (cur_y - mean_y)))) / (old_wt + new_wt) + sum_wt += new_wt + sum_wt2 += (new_wt * new_wt) + old_wt += new_wt + if not adjust: + sum_wt /= old_wt + sum_wt2 /= (old_wt * old_wt) + old_wt = 1. + elif is_observation: + mean_x = cur_x + mean_y = cur_y + + if nobs >= minp: + if not bias: + numerator = sum_wt * sum_wt + denominator = numerator - sum_wt2 + if denominator > 0: + sub_out[i] = (numerator / denominator) * cov + else: + sub_out[i] = NaN + else: + sub_out[i] = cov + else: + sub_out[i] = NaN + + output[s:e] = sub_out + + return output diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/window/indexers.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/window/indexers.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..1af296a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/window/indexers.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/window/indexers.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/window/indexers.pyi new file mode 100644 index 0000000..c9bc64b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/window/indexers.pyi @@ -0,0 +1,12 @@ +import numpy as np + +from pandas._typing import npt + +def calculate_variable_window_bounds( + num_values: int, # int64_t + window_size: int, # int64_t + min_periods, + center: bool, + closed: str | None, + index: np.ndarray, # const int64_t[:] +) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/window/indexers.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/window/indexers.pyx new file mode 100644 index 0000000..4b3a858 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/window/indexers.pyx @@ -0,0 +1,146 @@ +# cython: boundscheck=False, wraparound=False, cdivision=True + +import numpy as np + +from numpy cimport ( + int64_t, + ndarray, +) + +# Cython routines for window indexers + + +def calculate_variable_window_bounds( + int64_t num_values, + int64_t window_size, + object min_periods, # unused but here to match get_window_bounds signature + bint center, + str closed, + const int64_t[:] index +): + """ + Calculate window boundaries for rolling windows from a time offset. + + Parameters + ---------- + num_values : int64 + total number of values + + window_size : int64 + window size calculated from the offset + + min_periods : object + ignored, exists for compatibility + + center : bint + center the rolling window on the current observation + + closed : str + string of side of the window that should be closed + + index : ndarray[int64] + time series index to roll over + + Returns + ------- + (ndarray[int64], ndarray[int64]) + """ + cdef: + bint left_closed = False + bint right_closed = False + ndarray[int64_t, ndim=1] start, end + int64_t start_bound, end_bound, index_growth_sign = 1 + Py_ssize_t i, j + + # default is 'right' + if closed is None: + closed = 'right' + + if closed in ['right', 'both']: + right_closed = True + + if closed in ['left', 'both']: + left_closed = True + + # GH 43997: + # If the forward and the backward facing windows + # would result in a fraction of 1/2 a nanosecond + # we need to make both interval ends inclusive. + if center and window_size % 2 == 1: + right_closed = True + left_closed = True + + if index[num_values - 1] < index[0]: + index_growth_sign = -1 + + start = np.empty(num_values, dtype='int64') + start.fill(-1) + end = np.empty(num_values, dtype='int64') + end.fill(-1) + + start[0] = 0 + + # right endpoint is closed + if right_closed: + end[0] = 1 + # right endpoint is open + else: + end[0] = 0 + if center: + end_bound = index[0] + index_growth_sign * window_size / 2 + for j in range(0, num_values): + if (index[j] - end_bound) * index_growth_sign < 0: + end[0] = j + 1 + elif (index[j] - end_bound) * index_growth_sign == 0 and right_closed: + end[0] = j + 1 + elif (index[j] - end_bound) * index_growth_sign >= 0: + end[0] = j + break + + with nogil: + + # start is start of slice interval (including) + # end is end of slice interval (not including) + for i in range(1, num_values): + if center: + end_bound = index[i] + index_growth_sign * window_size / 2 + start_bound = index[i] - index_growth_sign * window_size / 2 + else: + end_bound = index[i] + start_bound = index[i] - index_growth_sign * window_size + + # left endpoint is closed + if left_closed: + start_bound -= 1 * index_growth_sign + + # advance the start bound until we are + # within the constraint + start[i] = i + for j in range(start[i - 1], i): + if (index[j] - start_bound) * index_growth_sign > 0: + start[i] = j + break + + # for centered window advance the end bound until we are + # outside the constraint + if center: + for j in range(end[i - 1], num_values + 1): + if j == num_values: + end[i] = j + elif ((index[j] - end_bound) * index_growth_sign == 0 and + right_closed): + end[i] = j + 1 + elif (index[j] - end_bound) * index_growth_sign >= 0: + end[i] = j + break + # end bound is previous end + # or current index + elif (index[end[i - 1]] - end_bound) * index_growth_sign <= 0: + end[i] = i + 1 + else: + end[i] = end[i - 1] + + # right endpoint is open + if not right_closed and not center: + end[i] -= 1 + return start, end diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/writers.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/_libs/writers.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..ec3f7da Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_libs/writers.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/writers.pyi b/.local/lib/python3.8/site-packages/pandas/_libs/writers.pyi new file mode 100644 index 0000000..930322f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/writers.pyi @@ -0,0 +1,23 @@ +from __future__ import annotations + +import numpy as np + +from pandas._typing import ArrayLike + +def write_csv_rows( + data: list[ArrayLike], + data_index: np.ndarray, + nlevels: int, + cols: np.ndarray, + writer: object, # _csv.writer +) -> None: ... +def convert_json_to_lines(arr: str) -> str: ... +def max_len_string_array( + arr: np.ndarray, # pandas_string[:] +) -> int: ... +def word_len(val: object) -> int: ... +def string_array_replace_from_nan_rep( + arr: np.ndarray, # np.ndarray[object, ndim=1] + nan_rep: object, + replace: object = ..., +) -> None: ... diff --git a/.local/lib/python3.8/site-packages/pandas/_libs/writers.pyx b/.local/lib/python3.8/site-packages/pandas/_libs/writers.pyx new file mode 100644 index 0000000..eac6ee4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_libs/writers.pyx @@ -0,0 +1,175 @@ +import cython +import numpy as np + +from cpython cimport ( + PyBytes_GET_SIZE, + PyUnicode_GET_LENGTH, +) +from numpy cimport ( + ndarray, + uint8_t, +) + +ctypedef fused pandas_string: + str + bytes + + +@cython.boundscheck(False) +@cython.wraparound(False) +def write_csv_rows( + list data, + ndarray data_index, + Py_ssize_t nlevels, + ndarray cols, + object writer +) -> None: + """ + Write the given data to the writer object, pre-allocating where possible + for performance improvements. + + Parameters + ---------- + data : list[ArrayLike] + data_index : ndarray + nlevels : int + cols : ndarray + writer : _csv.writer + """ + # In crude testing, N>100 yields little marginal improvement + cdef: + Py_ssize_t i, j = 0, k = len(data_index), N = 100, ncols = len(cols) + list rows + + # pre-allocate rows + rows = [[None] * (nlevels + ncols) for _ in range(N)] + + if nlevels == 1: + for j in range(k): + row = rows[j % N] + row[0] = data_index[j] + for i in range(ncols): + row[1 + i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + elif nlevels > 1: + for j in range(k): + row = rows[j % N] + row[:nlevels] = list(data_index[j]) + for i in range(ncols): + row[nlevels + i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + else: + for j in range(k): + row = rows[j % N] + for i in range(ncols): + row[i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + + if j >= 0 and (j < N - 1 or (j % N) != N - 1): + writer.writerows(rows[:((j + 1) % N)]) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def convert_json_to_lines(arr: str) -> str: + """ + replace comma separated json with line feeds, paying special attention + to quotes & brackets + """ + cdef: + Py_ssize_t i = 0, num_open_brackets_seen = 0, length + bint in_quotes = False, is_escaping = False + ndarray[uint8_t, ndim=1] narr + unsigned char val, newline, comma, left_bracket, right_bracket, quote + unsigned char backslash + + newline = ord('\n') + comma = ord(',') + left_bracket = ord('{') + right_bracket = ord('}') + quote = ord('"') + backslash = ord('\\') + + narr = np.frombuffer(arr.encode('utf-8'), dtype='u1').copy() + length = narr.shape[0] + for i in range(length): + val = narr[i] + if val == quote and i > 0 and not is_escaping: + in_quotes = ~in_quotes + if val == backslash or is_escaping: + is_escaping = ~is_escaping + if val == comma: # commas that should be \n + if num_open_brackets_seen == 0 and not in_quotes: + narr[i] = newline + elif val == left_bracket: + if not in_quotes: + num_open_brackets_seen += 1 + elif val == right_bracket: + if not in_quotes: + num_open_brackets_seen -= 1 + + return narr.tobytes().decode('utf-8') + '\n' # GH:36888 + + +# stata, pytables +@cython.boundscheck(False) +@cython.wraparound(False) +def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: + """ + Return the maximum size of elements in a 1-dim string array. + """ + cdef: + Py_ssize_t i, m = 0, wlen = 0, length = arr.shape[0] + pandas_string val + + for i in range(length): + val = arr[i] + wlen = word_len(val) + + if wlen > m: + m = wlen + + return m + + +cpdef inline Py_ssize_t word_len(object val): + """ + Return the maximum length of a string or bytes value. + """ + cdef: + Py_ssize_t wlen = 0 + + if isinstance(val, str): + wlen = PyUnicode_GET_LENGTH(val) + elif isinstance(val, bytes): + wlen = PyBytes_GET_SIZE(val) + + return wlen + +# ------------------------------------------------------------------ +# PyTables Helpers + + +@cython.boundscheck(False) +@cython.wraparound(False) +def string_array_replace_from_nan_rep( + ndarray[object, ndim=1] arr, + object nan_rep, + object replace=np.nan +) -> None: + """ + Replace the values in the array with 'replacement' if + they are 'nan_rep'. Return the same array. + """ + cdef: + Py_ssize_t length = len(arr), i = 0 + + for i in range(length): + if arr[i] == nan_rep: + arr[i] = replace diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/__init__.py b/.local/lib/python3.8/site-packages/pandas/_testing/__init__.py new file mode 100644 index 0000000..0dfe334 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_testing/__init__.py @@ -0,0 +1,1107 @@ +from __future__ import annotations + +import collections +from datetime import datetime +from decimal import Decimal +from functools import wraps +import operator +import os +import re +import string +from typing import ( + TYPE_CHECKING, + Callable, + ContextManager, + Counter, + Iterable, +) +import warnings + +import numpy as np + +from pandas._config.localization import ( # noqa:F401 + can_set_locale, + get_locales, + set_locale, +) + +from pandas._typing import Dtype + +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, + is_sequence, + is_unsigned_integer_dtype, + pandas_dtype, +) + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + DatetimeIndex, + Index, + IntervalIndex, + MultiIndex, + RangeIndex, + Series, + bdate_range, +) +from pandas._testing._io import ( # noqa:F401 + close, + network, + round_trip_localpath, + round_trip_pathlib, + round_trip_pickle, + with_connectivity_check, + write_to_compressed, +) +from pandas._testing._random import ( # noqa:F401 + randbool, + rands, + rands_array, + randu_array, +) +from pandas._testing._warnings import assert_produces_warning # noqa:F401 +from pandas._testing.asserters import ( # noqa:F401 + assert_almost_equal, + assert_attr_equal, + assert_categorical_equal, + assert_class_equal, + assert_contains_all, + assert_copy, + assert_datetime_array_equal, + assert_dict_equal, + assert_equal, + assert_extension_array_equal, + assert_frame_equal, + assert_index_equal, + assert_indexing_slices_equivalent, + assert_interval_array_equal, + assert_is_sorted, + assert_is_valid_plot_return_object, + assert_metadata_equivalent, + assert_numpy_array_equal, + assert_period_array_equal, + assert_series_equal, + assert_sp_array_equal, + assert_timedelta_array_equal, + raise_assert_detail, +) +from pandas._testing.compat import ( # noqa:F401 + get_dtype, + get_obj, +) +from pandas._testing.contexts import ( # noqa:F401 + RNGContext, + decompress_file, + ensure_clean, + ensure_clean_dir, + ensure_safe_environment_variables, + set_timezone, + use_numexpr, + with_csv_dialect, +) +from pandas.core.api import ( + Float64Index, + Int64Index, + NumericIndex, + UInt64Index, +) +from pandas.core.arrays import ( + BaseMaskedArray, + ExtensionArray, + PandasArray, +) +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +from pandas.core.construction import extract_array + +if TYPE_CHECKING: + from pandas import ( + PeriodIndex, + TimedeltaIndex, + ) + +_N = 30 +_K = 4 + +UNSIGNED_INT_NUMPY_DTYPES: list[Dtype] = ["uint8", "uint16", "uint32", "uint64"] +UNSIGNED_INT_EA_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"] +SIGNED_INT_NUMPY_DTYPES: list[Dtype] = [int, "int8", "int16", "int32", "int64"] +SIGNED_INT_EA_DTYPES: list[Dtype] = ["Int8", "Int16", "Int32", "Int64"] +ALL_INT_NUMPY_DTYPES = UNSIGNED_INT_NUMPY_DTYPES + SIGNED_INT_NUMPY_DTYPES +ALL_INT_EA_DTYPES = UNSIGNED_INT_EA_DTYPES + SIGNED_INT_EA_DTYPES + +FLOAT_NUMPY_DTYPES: list[Dtype] = [float, "float32", "float64"] +FLOAT_EA_DTYPES: list[Dtype] = ["Float32", "Float64"] +COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"] +STRING_DTYPES: list[Dtype] = [str, "str", "U"] + +DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"] +TIMEDELTA64_DTYPES: list[Dtype] = ["timedelta64[ns]", "m8[ns]"] + +BOOL_DTYPES: list[Dtype] = [bool, "bool"] +BYTES_DTYPES: list[Dtype] = [bytes, "bytes"] +OBJECT_DTYPES: list[Dtype] = [object, "object"] + +ALL_REAL_NUMPY_DTYPES = FLOAT_NUMPY_DTYPES + ALL_INT_NUMPY_DTYPES +ALL_NUMPY_DTYPES = ( + ALL_REAL_NUMPY_DTYPES + + COMPLEX_DTYPES + + STRING_DTYPES + + DATETIME64_DTYPES + + TIMEDELTA64_DTYPES + + BOOL_DTYPES + + OBJECT_DTYPES + + BYTES_DTYPES +) + +NARROW_NP_DTYPES = [ + np.float16, + np.float32, + np.int8, + np.int16, + np.int32, + np.uint8, + np.uint16, + np.uint32, +] + +NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")] +NP_NAT_OBJECTS = [ + cls("NaT", unit) + for cls in [np.datetime64, np.timedelta64] + for unit in [ + "Y", + "M", + "W", + "D", + "h", + "m", + "s", + "ms", + "us", + "ns", + "ps", + "fs", + "as", + ] +] + +EMPTY_STRING_PATTERN = re.compile("^$") + +# set testing_mode +_testing_mode_warnings = (DeprecationWarning, ResourceWarning) + + +def set_testing_mode(): + # set the testing mode filters + testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None") + if "deprecate" in testing_mode: + for category in _testing_mode_warnings: + warnings.simplefilter("always", category) + + +def reset_testing_mode(): + # reset the testing mode filters + testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None") + if "deprecate" in testing_mode: + for category in _testing_mode_warnings: + warnings.simplefilter("ignore", category) + + +set_testing_mode() + + +def reset_display_options(): + """ + Reset the display options for printing and representing objects. + """ + pd.reset_option("^display.", silent=True) + + +# ----------------------------------------------------------------------------- +# Comparators + + +def equalContents(arr1, arr2) -> bool: + """ + Checks if the set of unique elements of arr1 and arr2 are equivalent. + """ + return frozenset(arr1) == frozenset(arr2) + + +def box_expected(expected, box_cls, transpose=True): + """ + Helper function to wrap the expected output of a test in a given box_class. + + Parameters + ---------- + expected : np.ndarray, Index, Series + box_cls : {Index, Series, DataFrame} + + Returns + ------- + subclass of box_cls + """ + if box_cls is pd.array: + if isinstance(expected, RangeIndex): + # pd.array would return an IntegerArray + expected = PandasArray(np.asarray(expected._values)) + else: + expected = pd.array(expected) + elif box_cls is Index: + expected = Index._with_infer(expected) + elif box_cls is Series: + expected = Series(expected) + elif box_cls is DataFrame: + expected = Series(expected).to_frame() + if transpose: + # for vector operations, we need a DataFrame to be a single-row, + # not a single-column, in order to operate against non-DataFrame + # vectors of the same length. But convert to two rows to avoid + # single-row special cases in datetime arithmetic + expected = expected.T + expected = pd.concat([expected] * 2, ignore_index=True) + elif box_cls is np.ndarray or box_cls is np.array: + expected = np.array(expected) + elif box_cls is to_array: + expected = to_array(expected) + else: + raise NotImplementedError(box_cls) + return expected + + +def to_array(obj): + """ + Similar to pd.array, but does not cast numpy dtypes to nullable dtypes. + """ + # temporary implementation until we get pd.array in place + dtype = getattr(obj, "dtype", None) + + if dtype is None: + return np.asarray(obj) + + return extract_array(obj, extract_numpy=True) + + +# ----------------------------------------------------------------------------- +# Others + + +def getCols(k): + return string.ascii_uppercase[:k] + + +# make index +def makeStringIndex(k=10, name=None): + return Index(rands_array(nchars=10, size=k), name=name) + + +def makeUnicodeIndex(k=10, name=None): + return Index(randu_array(nchars=10, size=k), name=name) + + +def makeCategoricalIndex(k=10, n=3, name=None, **kwargs): + """make a length k index or n categories""" + x = rands_array(nchars=4, size=n) + return CategoricalIndex( + Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs + ) + + +def makeIntervalIndex(k=10, name=None, **kwargs): + """make a length k IntervalIndex""" + x = np.linspace(0, 100, num=(k + 1)) + return IntervalIndex.from_breaks(x, name=name, **kwargs) + + +def makeBoolIndex(k=10, name=None): + if k == 1: + return Index([True], name=name) + elif k == 2: + return Index([False, True], name=name) + return Index([False, True] + [False] * (k - 2), name=name) + + +def makeNumericIndex(k=10, name=None, *, dtype): + dtype = pandas_dtype(dtype) + assert isinstance(dtype, np.dtype) + + if is_integer_dtype(dtype): + values = np.arange(k, dtype=dtype) + if is_unsigned_integer_dtype(dtype): + values += 2 ** (dtype.itemsize * 8 - 1) + elif is_float_dtype(dtype): + values = np.random.random_sample(k) - np.random.random_sample(1) + values.sort() + values = values * (10 ** np.random.randint(0, 9)) + else: + raise NotImplementedError(f"wrong dtype {dtype}") + + return NumericIndex(values, dtype=dtype, name=name) + + +def makeIntIndex(k=10, name=None): + base_idx = makeNumericIndex(k, name=name, dtype="int64") + return Int64Index(base_idx) + + +def makeUIntIndex(k=10, name=None): + base_idx = makeNumericIndex(k, name=name, dtype="uint64") + return UInt64Index(base_idx) + + +def makeRangeIndex(k=10, name=None, **kwargs): + return RangeIndex(0, k, 1, name=name, **kwargs) + + +def makeFloatIndex(k=10, name=None): + base_idx = makeNumericIndex(k, name=name, dtype="float64") + return Float64Index(base_idx) + + +def makeDateIndex(k: int = 10, freq="B", name=None, **kwargs) -> DatetimeIndex: + dt = datetime(2000, 1, 1) + dr = bdate_range(dt, periods=k, freq=freq, name=name) + return DatetimeIndex(dr, name=name, **kwargs) + + +def makeTimedeltaIndex(k: int = 10, freq="D", name=None, **kwargs) -> TimedeltaIndex: + return pd.timedelta_range(start="1 day", periods=k, freq=freq, name=name, **kwargs) + + +def makePeriodIndex(k: int = 10, name=None, **kwargs) -> PeriodIndex: + dt = datetime(2000, 1, 1) + return pd.period_range(start=dt, periods=k, freq="B", name=name, **kwargs) + + +def makeMultiIndex(k=10, names=None, **kwargs): + return MultiIndex.from_product((("foo", "bar"), (1, 2)), names=names, **kwargs) + + +_names = [ + "Alice", + "Bob", + "Charlie", + "Dan", + "Edith", + "Frank", + "George", + "Hannah", + "Ingrid", + "Jerry", + "Kevin", + "Laura", + "Michael", + "Norbert", + "Oliver", + "Patricia", + "Quinn", + "Ray", + "Sarah", + "Tim", + "Ursula", + "Victor", + "Wendy", + "Xavier", + "Yvonne", + "Zelda", +] + + +def _make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None): + """ + Make a DataFrame with a DatetimeIndex + + Parameters + ---------- + start : str or Timestamp, default "2000-01-01" + The start of the index. Passed to date_range with `freq`. + end : str or Timestamp, default "2000-12-31" + The end of the index. Passed to date_range with `freq`. + freq : str or Freq + The frequency to use for the DatetimeIndex + seed : int, optional + The random state seed. + + * name : object dtype with string names + * id : int dtype with + * x, y : float dtype + + Examples + -------- + >>> _make_timeseries() # doctest: +SKIP + id name x y + timestamp + 2000-01-01 982 Frank 0.031261 0.986727 + 2000-01-02 1025 Edith -0.086358 -0.032920 + 2000-01-03 982 Edith 0.473177 0.298654 + 2000-01-04 1009 Sarah 0.534344 -0.750377 + 2000-01-05 963 Zelda -0.271573 0.054424 + ... ... ... ... ... + 2000-12-27 980 Ingrid -0.132333 -0.422195 + 2000-12-28 972 Frank -0.376007 -0.298687 + 2000-12-29 1009 Ursula -0.865047 -0.503133 + 2000-12-30 1000 Hannah -0.063757 -0.507336 + 2000-12-31 972 Tim -0.869120 0.531685 + """ + index = pd.date_range(start=start, end=end, freq=freq, name="timestamp") + n = len(index) + state = np.random.RandomState(seed) + columns = { + "name": state.choice(_names, size=n), + "id": state.poisson(1000, size=n), + "x": state.rand(n) * 2 - 1, + "y": state.rand(n) * 2 - 1, + } + df = DataFrame(columns, index=index, columns=sorted(columns)) + if df.index[-1] == end: + df = df.iloc[:-1] + return df + + +def index_subclass_makers_generator(): + make_index_funcs = [ + makeDateIndex, + makePeriodIndex, + makeTimedeltaIndex, + makeRangeIndex, + makeIntervalIndex, + makeCategoricalIndex, + makeMultiIndex, + ] + yield from make_index_funcs + + +def all_timeseries_index_generator(k: int = 10) -> Iterable[Index]: + """ + Generator which can be iterated over to get instances of all the classes + which represent time-series. + + Parameters + ---------- + k: length of each of the index instances + """ + make_index_funcs: list[Callable[..., Index]] = [ + makeDateIndex, + makePeriodIndex, + makeTimedeltaIndex, + ] + for make_index_func in make_index_funcs: + yield make_index_func(k=k) + + +# make series +def makeFloatSeries(name=None): + index = makeStringIndex(_N) + return Series(np.random.randn(_N), index=index, name=name) + + +def makeStringSeries(name=None): + index = makeStringIndex(_N) + return Series(np.random.randn(_N), index=index, name=name) + + +def makeObjectSeries(name=None): + data = makeStringIndex(_N) + data = Index(data, dtype=object) + index = makeStringIndex(_N) + return Series(data, index=index, name=name) + + +def getSeriesData(): + index = makeStringIndex(_N) + return {c: Series(np.random.randn(_N), index=index) for c in getCols(_K)} + + +def makeTimeSeries(nper=None, freq="B", name=None): + if nper is None: + nper = _N + return Series( + np.random.randn(nper), index=makeDateIndex(nper, freq=freq), name=name + ) + + +def makePeriodSeries(nper=None, name=None): + if nper is None: + nper = _N + return Series(np.random.randn(nper), index=makePeriodIndex(nper), name=name) + + +def getTimeSeriesData(nper=None, freq="B"): + return {c: makeTimeSeries(nper, freq) for c in getCols(_K)} + + +def getPeriodData(nper=None): + return {c: makePeriodSeries(nper) for c in getCols(_K)} + + +# make frame +def makeTimeDataFrame(nper=None, freq="B"): + data = getTimeSeriesData(nper, freq) + return DataFrame(data) + + +def makeDataFrame() -> DataFrame: + data = getSeriesData() + return DataFrame(data) + + +def getMixedTypeDict(): + index = Index(["a", "b", "c", "d", "e"]) + + data = { + "A": [0.0, 1.0, 2.0, 3.0, 4.0], + "B": [0.0, 1.0, 0.0, 1.0, 0.0], + "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], + "D": bdate_range("1/1/2009", periods=5), + } + + return index, data + + +def makeMixedDataFrame(): + return DataFrame(getMixedTypeDict()[1]) + + +def makePeriodFrame(nper=None): + data = getPeriodData(nper) + return DataFrame(data) + + +def makeCustomIndex( + nentries, nlevels, prefix="#", names=False, ndupe_l=None, idx_type=None +): + """ + Create an index/multindex with given dimensions, levels, names, etc' + + nentries - number of entries in index + nlevels - number of levels (> 1 produces multindex) + prefix - a string prefix for labels + names - (Optional), bool or list of strings. if True will use default + names, if false will use no names, if a list is given, the name of + each level in the index will be taken from the list. + ndupe_l - (Optional), list of ints, the number of rows for which the + label will repeated at the corresponding level, you can specify just + the first few, the rest will use the default ndupe_l of 1. + len(ndupe_l) <= nlevels. + idx_type - "i"/"f"/"s"/"u"/"dt"/"p"/"td". + If idx_type is not None, `idx_nlevels` must be 1. + "i"/"f" creates an integer/float index, + "s"/"u" creates a string/unicode index + "dt" create a datetime index. + "td" create a datetime index. + + if unspecified, string labels will be generated. + """ + if ndupe_l is None: + ndupe_l = [1] * nlevels + assert is_sequence(ndupe_l) and len(ndupe_l) <= nlevels + assert names is None or names is False or names is True or len(names) is nlevels + assert idx_type is None or ( + idx_type in ("i", "f", "s", "u", "dt", "p", "td") and nlevels == 1 + ) + + if names is True: + # build default names + names = [prefix + str(i) for i in range(nlevels)] + if names is False: + # pass None to index constructor for no name + names = None + + # make singleton case uniform + if isinstance(names, str) and nlevels == 1: + names = [names] + + # specific 1D index type requested? + idx_func_dict: dict[str, Callable[..., Index]] = { + "i": makeIntIndex, + "f": makeFloatIndex, + "s": makeStringIndex, + "u": makeUnicodeIndex, + "dt": makeDateIndex, + "td": makeTimedeltaIndex, + "p": makePeriodIndex, + } + idx_func = idx_func_dict.get(idx_type) + if idx_func: + idx = idx_func(nentries) + # but we need to fill in the name + if names: + idx.name = names[0] + return idx + elif idx_type is not None: + raise ValueError( + f"{repr(idx_type)} is not a legal value for `idx_type`, " + "use 'i'/'f'/'s'/'u'/'dt'/'p'/'td'." + ) + + if len(ndupe_l) < nlevels: + ndupe_l.extend([1] * (nlevels - len(ndupe_l))) + assert len(ndupe_l) == nlevels + + assert all(x > 0 for x in ndupe_l) + + list_of_lists = [] + for i in range(nlevels): + + def keyfunc(x): + import re + + numeric_tuple = re.sub(r"[^\d_]_?", "", x).split("_") + return [int(num) for num in numeric_tuple] + + # build a list of lists to create the index from + div_factor = nentries // ndupe_l[i] + 1 + + # Deprecated since version 3.9: collections.Counter now supports []. See PEP 585 + # and Generic Alias Type. + cnt: Counter[str] = collections.Counter() + for j in range(div_factor): + label = f"{prefix}_l{i}_g{j}" + cnt[label] = ndupe_l[i] + # cute Counter trick + result = sorted(cnt.elements(), key=keyfunc)[:nentries] + list_of_lists.append(result) + + tuples = list(zip(*list_of_lists)) + + # convert tuples to index + if nentries == 1: + # we have a single level of tuples, i.e. a regular Index + index = Index(tuples[0], name=names[0]) + elif nlevels == 1: + name = None if names is None else names[0] + index = Index((x[0] for x in tuples), name=name) + else: + index = MultiIndex.from_tuples(tuples, names=names) + return index + + +def makeCustomDataframe( + nrows, + ncols, + c_idx_names=True, + r_idx_names=True, + c_idx_nlevels=1, + r_idx_nlevels=1, + data_gen_f=None, + c_ndupe_l=None, + r_ndupe_l=None, + dtype=None, + c_idx_type=None, + r_idx_type=None, +): + """ + Create a DataFrame using supplied parameters. + + Parameters + ---------- + nrows, ncols - number of data rows/cols + c_idx_names, idx_names - False/True/list of strings, yields No names , + default names or uses the provided names for the levels of the + corresponding index. You can provide a single string when + c_idx_nlevels ==1. + c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex + r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex + data_gen_f - a function f(row,col) which return the data value + at that position, the default generator used yields values of the form + "RxCy" based on position. + c_ndupe_l, r_ndupe_l - list of integers, determines the number + of duplicates for each label at a given level of the corresponding + index. The default `None` value produces a multiplicity of 1 across + all levels, i.e. a unique index. Will accept a partial list of length + N < idx_nlevels, for just the first N levels. If ndupe doesn't divide + nrows/ncol, the last label might have lower multiplicity. + dtype - passed to the DataFrame constructor as is, in case you wish to + have more control in conjunction with a custom `data_gen_f` + r_idx_type, c_idx_type - "i"/"f"/"s"/"u"/"dt"/"td". + If idx_type is not None, `idx_nlevels` must be 1. + "i"/"f" creates an integer/float index, + "s"/"u" creates a string/unicode index + "dt" create a datetime index. + "td" create a timedelta index. + + if unspecified, string labels will be generated. + + Examples + -------- + # 5 row, 3 columns, default names on both, single index on both axis + >> makeCustomDataframe(5,3) + + # make the data a random int between 1 and 100 + >> mkdf(5,3,data_gen_f=lambda r,c:randint(1,100)) + + # 2-level multiindex on rows with each label duplicated + # twice on first level, default names on both axis, single + # index on both axis + >> a=makeCustomDataframe(5,3,r_idx_nlevels=2,r_ndupe_l=[2]) + + # DatetimeIndex on row, index with unicode labels on columns + # no names on either axis + >> a=makeCustomDataframe(5,3,c_idx_names=False,r_idx_names=False, + r_idx_type="dt",c_idx_type="u") + + # 4-level multindex on rows with names provided, 2-level multindex + # on columns with default labels and default names. + >> a=makeCustomDataframe(5,3,r_idx_nlevels=4, + r_idx_names=["FEE","FIH","FOH","FUM"], + c_idx_nlevels=2) + + >> a=mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4) + """ + assert c_idx_nlevels > 0 + assert r_idx_nlevels > 0 + assert r_idx_type is None or ( + r_idx_type in ("i", "f", "s", "u", "dt", "p", "td") and r_idx_nlevels == 1 + ) + assert c_idx_type is None or ( + c_idx_type in ("i", "f", "s", "u", "dt", "p", "td") and c_idx_nlevels == 1 + ) + + columns = makeCustomIndex( + ncols, + nlevels=c_idx_nlevels, + prefix="C", + names=c_idx_names, + ndupe_l=c_ndupe_l, + idx_type=c_idx_type, + ) + index = makeCustomIndex( + nrows, + nlevels=r_idx_nlevels, + prefix="R", + names=r_idx_names, + ndupe_l=r_ndupe_l, + idx_type=r_idx_type, + ) + + # by default, generate data based on location + if data_gen_f is None: + data_gen_f = lambda r, c: f"R{r}C{c}" + + data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)] + + return DataFrame(data, index, columns, dtype=dtype) + + +def _create_missing_idx(nrows, ncols, density, random_state=None): + if random_state is None: + random_state = np.random + else: + random_state = np.random.RandomState(random_state) + + # below is cribbed from scipy.sparse + size = round((1 - density) * nrows * ncols) + # generate a few more to ensure unique values + min_rows = 5 + fac = 1.02 + extra_size = min(size + min_rows, fac * size) + + def _gen_unique_rand(rng, _extra_size): + ind = rng.rand(int(_extra_size)) + return np.unique(np.floor(ind * nrows * ncols))[:size] + + ind = _gen_unique_rand(random_state, extra_size) + while ind.size < size: + extra_size *= 1.05 + ind = _gen_unique_rand(random_state, extra_size) + + j = np.floor(ind * 1.0 / nrows).astype(int) + i = (ind - j * nrows).astype(int) + return i.tolist(), j.tolist() + + +def makeMissingDataframe(density=0.9, random_state=None): + df = makeDataFrame() + i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state) + df.values[i, j] = np.nan + return df + + +def test_parallel(num_threads=2, kwargs_list=None): + """ + Decorator to run the same function multiple times in parallel. + + Parameters + ---------- + num_threads : int, optional + The number of times the function is run in parallel. + kwargs_list : list of dicts, optional + The list of kwargs to update original + function kwargs on different threads. + + Notes + ----- + This decorator does not pass the return value of the decorated function. + + Original from scikit-image: + + https://github.com/scikit-image/scikit-image/pull/1519 + + """ + assert num_threads > 0 + has_kwargs_list = kwargs_list is not None + if has_kwargs_list: + assert len(kwargs_list) == num_threads + import threading + + def wrapper(func): + @wraps(func) + def inner(*args, **kwargs): + if has_kwargs_list: + update_kwargs = lambda i: dict(kwargs, **kwargs_list[i]) + else: + update_kwargs = lambda i: kwargs + threads = [] + for i in range(num_threads): + updated_kwargs = update_kwargs(i) + thread = threading.Thread(target=func, args=args, kwargs=updated_kwargs) + threads.append(thread) + for thread in threads: + thread.start() + for thread in threads: + thread.join() + + return inner + + return wrapper + + +class SubclassedSeries(Series): + _metadata = ["testattr", "name"] + + @property + def _constructor(self): + return SubclassedSeries + + @property + def _constructor_expanddim(self): + return SubclassedDataFrame + + +class SubclassedDataFrame(DataFrame): + _metadata = ["testattr"] + + @property + def _constructor(self): + return SubclassedDataFrame + + @property + def _constructor_sliced(self): + return SubclassedSeries + + +class SubclassedCategorical(Categorical): + @property + def _constructor(self): + return SubclassedCategorical + + +def _make_skipna_wrapper(alternative, skipna_alternative=None): + """ + Create a function for calling on an array. + + Parameters + ---------- + alternative : function + The function to be called on the array with no NaNs. + Only used when 'skipna_alternative' is None. + skipna_alternative : function + The function to be called on the original array + + Returns + ------- + function + """ + if skipna_alternative: + + def skipna_wrapper(x): + return skipna_alternative(x.values) + + else: + + def skipna_wrapper(x): + nona = x.dropna() + if len(nona) == 0: + return np.nan + return alternative(nona) + + return skipna_wrapper + + +def convert_rows_list_to_csv_str(rows_list: list[str]): + """ + Convert list of CSV rows to single CSV-formatted string for current OS. + + This method is used for creating expected value of to_csv() method. + + Parameters + ---------- + rows_list : List[str] + Each element represents the row of csv. + + Returns + ------- + str + Expected output of to_csv() in current OS. + """ + sep = os.linesep + return sep.join(rows_list) + sep + + +def external_error_raised(expected_exception: type[Exception]) -> ContextManager: + """ + Helper function to mark pytest.raises that have an external error message. + + Parameters + ---------- + expected_exception : Exception + Expected error to raise. + + Returns + ------- + Callable + Regular `pytest.raises` function with `match` equal to `None`. + """ + import pytest + + return pytest.raises(expected_exception, match=None) # noqa: PDF010 + + +cython_table = pd.core.common._cython_table.items() + + +def get_cython_table_params(ndframe, func_names_and_expected): + """ + Combine frame, functions from com._cython_table + keys and expected result. + + Parameters + ---------- + ndframe : DataFrame or Series + func_names_and_expected : Sequence of two items + The first item is a name of a NDFrame method ('sum', 'prod') etc. + The second item is the expected return value. + + Returns + ------- + list + List of three items (DataFrame, function, expected result) + """ + results = [] + for func_name, expected in func_names_and_expected: + results.append((ndframe, func_name, expected)) + results += [ + (ndframe, func, expected) + for func, name in cython_table + if name == func_name + ] + return results + + +def get_op_from_name(op_name: str) -> Callable: + """ + The operator function for a given op name. + + Parameters + ---------- + op_name : str + The op name, in form of "add" or "__add__". + + Returns + ------- + function + A function performing the operation. + """ + short_opname = op_name.strip("_") + try: + op = getattr(operator, short_opname) + except AttributeError: + # Assume it is the reverse operator + rop = getattr(operator, short_opname[1:]) + op = lambda x, y: rop(y, x) + + return op + + +# ----------------------------------------------------------------------------- +# Indexing test helpers + + +def getitem(x): + return x + + +def setitem(x): + return x + + +def loc(x): + return x.loc + + +def iloc(x): + return x.iloc + + +def at(x): + return x.at + + +def iat(x): + return x.iat + + +# ----------------------------------------------------------------------------- + + +def shares_memory(left, right) -> bool: + """ + Pandas-compat for np.shares_memory. + """ + if isinstance(left, np.ndarray) and isinstance(right, np.ndarray): + return np.shares_memory(left, right) + elif isinstance(left, np.ndarray): + # Call with reversed args to get to unpacking logic below. + return shares_memory(right, left) + + if isinstance(left, RangeIndex): + return False + if isinstance(left, MultiIndex): + return shares_memory(left._codes, right) + if isinstance(left, (Index, Series)): + return shares_memory(left._values, right) + + if isinstance(left, NDArrayBackedExtensionArray): + return shares_memory(left._ndarray, right) + if isinstance(left, pd.core.arrays.SparseArray): + return shares_memory(left.sp_values, right) + if isinstance(left, pd.core.arrays.IntervalArray): + return shares_memory(left._left, right) or shares_memory(left._right, right) + + if isinstance(left, ExtensionArray) and left.dtype == "string[pyarrow]": + # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669 + if isinstance(right, ExtensionArray) and right.dtype == "string[pyarrow]": + # error: "ExtensionArray" has no attribute "_data" + left_pa_data = left._data # type: ignore[attr-defined] + # error: "ExtensionArray" has no attribute "_data" + right_pa_data = right._data # type: ignore[attr-defined] + left_buf1 = left_pa_data.chunk(0).buffers()[1] + right_buf1 = right_pa_data.chunk(0).buffers()[1] + return left_buf1 == right_buf1 + + if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray): + # By convention, we'll say these share memory if they share *either* + # the _data or the _mask + return np.shares_memory(left._data, right._data) or np.shares_memory( + left._mask, right._mask + ) + + if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1: + arr = left._mgr.arrays[0] + return shares_memory(arr, right) + + raise NotImplementedError(type(left), type(right)) diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..b858bfb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/_hypothesis.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/_hypothesis.cpython-38.pyc new file mode 100644 index 0000000..28c2cd2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/_hypothesis.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/_io.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/_io.cpython-38.pyc new file mode 100644 index 0000000..843b4d5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/_io.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/_random.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/_random.cpython-38.pyc new file mode 100644 index 0000000..0efb6fb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/_random.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/_warnings.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/_warnings.cpython-38.pyc new file mode 100644 index 0000000..38b550c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/_warnings.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/asserters.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/asserters.cpython-38.pyc new file mode 100644 index 0000000..9385c59 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/asserters.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/compat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/compat.cpython-38.pyc new file mode 100644 index 0000000..f3039ae Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/compat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/contexts.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/contexts.cpython-38.pyc new file mode 100644 index 0000000..0ffa5cc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/_testing/__pycache__/contexts.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/_hypothesis.py b/.local/lib/python3.8/site-packages/pandas/_testing/_hypothesis.py new file mode 100644 index 0000000..5256a30 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_testing/_hypothesis.py @@ -0,0 +1,89 @@ +""" +Hypothesis data generator helpers. +""" +from datetime import datetime + +from hypothesis import strategies as st +from hypothesis.extra.dateutil import timezones as dateutil_timezones +from hypothesis.extra.pytz import timezones as pytz_timezones + +from pandas.compat import is_platform_windows + +import pandas as pd + +from pandas.tseries.offsets import ( + BMonthBegin, + BMonthEnd, + BQuarterBegin, + BQuarterEnd, + BYearBegin, + BYearEnd, + MonthBegin, + MonthEnd, + QuarterBegin, + QuarterEnd, + YearBegin, + YearEnd, +) + +OPTIONAL_INTS = st.lists(st.one_of(st.integers(), st.none()), max_size=10, min_size=3) + +OPTIONAL_FLOATS = st.lists(st.one_of(st.floats(), st.none()), max_size=10, min_size=3) + +OPTIONAL_TEXT = st.lists(st.one_of(st.none(), st.text()), max_size=10, min_size=3) + +OPTIONAL_DICTS = st.lists( + st.one_of(st.none(), st.dictionaries(st.text(), st.integers())), + max_size=10, + min_size=3, +) + +OPTIONAL_LISTS = st.lists( + st.one_of(st.none(), st.lists(st.text(), max_size=10, min_size=3)), + max_size=10, + min_size=3, +) + +OPTIONAL_ONE_OF_ALL = st.one_of( + OPTIONAL_DICTS, OPTIONAL_FLOATS, OPTIONAL_INTS, OPTIONAL_LISTS, OPTIONAL_TEXT +) + +if is_platform_windows(): + DATETIME_NO_TZ = st.datetimes(min_value=datetime(1900, 1, 1)) +else: + DATETIME_NO_TZ = st.datetimes() + +DATETIME_JAN_1_1900_OPTIONAL_TZ = st.datetimes( + min_value=pd.Timestamp(1900, 1, 1).to_pydatetime(), + max_value=pd.Timestamp(1900, 1, 1).to_pydatetime(), + timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()), +) + +DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ = st.datetimes( + min_value=pd.Timestamp.min.to_pydatetime(warn=False), + max_value=pd.Timestamp.max.to_pydatetime(warn=False), +) + +INT_NEG_999_TO_POS_999 = st.integers(-999, 999) + +# The strategy for each type is registered in conftest.py, as they don't carry +# enough runtime information (e.g. type hints) to infer how to build them. +YQM_OFFSET = st.one_of( + *map( + st.from_type, + [ + MonthBegin, + MonthEnd, + BMonthBegin, + BMonthEnd, + QuarterBegin, + QuarterEnd, + BQuarterBegin, + BQuarterEnd, + YearBegin, + YearEnd, + BYearBegin, + BYearEnd, + ], + ) +) diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/_io.py b/.local/lib/python3.8/site-packages/pandas/_testing/_io.py new file mode 100644 index 0000000..f465458 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_testing/_io.py @@ -0,0 +1,421 @@ +from __future__ import annotations + +import bz2 +from functools import wraps +import gzip +from typing import ( + TYPE_CHECKING, + Any, + Callable, +) +import zipfile + +from pandas._typing import ( + FilePath, + ReadPickleBuffer, +) +from pandas.compat import get_lzma_file +from pandas.compat._optional import import_optional_dependency + +import pandas as pd +from pandas._testing._random import rands +from pandas._testing.contexts import ensure_clean + +from pandas.io.common import urlopen + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + +# skip tests on exceptions with these messages +_network_error_messages = ( + # 'urlopen error timed out', + # 'timeout: timed out', + # 'socket.timeout: timed out', + "timed out", + "Server Hangup", + "HTTP Error 503: Service Unavailable", + "502: Proxy Error", + "HTTP Error 502: internal error", + "HTTP Error 502", + "HTTP Error 503", + "HTTP Error 403", + "HTTP Error 400", + "Temporary failure in name resolution", + "Name or service not known", + "Connection refused", + "certificate verify", +) + +# or this e.errno/e.reason.errno +_network_errno_vals = ( + 101, # Network is unreachable + 111, # Connection refused + 110, # Connection timed out + 104, # Connection reset Error + 54, # Connection reset by peer + 60, # urllib.error.URLError: [Errno 60] Connection timed out +) + +# Both of the above shouldn't mask real issues such as 404's +# or refused connections (changed DNS). +# But some tests (test_data yahoo) contact incredibly flakey +# servers. + +# and conditionally raise on exception types in _get_default_network_errors + + +def _get_default_network_errors(): + # Lazy import for http.client & urllib.error + # because it imports many things from the stdlib + import http.client + import urllib.error + + return (OSError, http.client.HTTPException, TimeoutError, urllib.error.URLError) + + +def optional_args(decorator): + """ + allows a decorator to take optional positional and keyword arguments. + Assumes that taking a single, callable, positional argument means that + it is decorating a function, i.e. something like this:: + + @my_decorator + def function(): pass + + Calls decorator with decorator(f, *args, **kwargs) + """ + + @wraps(decorator) + def wrapper(*args, **kwargs): + def dec(f): + return decorator(f, *args, **kwargs) + + is_decorating = not kwargs and len(args) == 1 and callable(args[0]) + if is_decorating: + f = args[0] + args = () + return dec(f) + else: + return dec + + return wrapper + + +@optional_args +def network( + t, + url="https://www.google.com", + raise_on_error=False, + check_before_test=False, + error_classes=None, + skip_errnos=_network_errno_vals, + _skip_on_messages=_network_error_messages, +): + """ + Label a test as requiring network connection and, if an error is + encountered, only raise if it does not find a network connection. + + In comparison to ``network``, this assumes an added contract to your test: + you must assert that, under normal conditions, your test will ONLY fail if + it does not have network connectivity. + + You can call this in 3 ways: as a standard decorator, with keyword + arguments, or with a positional argument that is the url to check. + + Parameters + ---------- + t : callable + The test requiring network connectivity. + url : path + The url to test via ``pandas.io.common.urlopen`` to check + for connectivity. Defaults to 'https://www.google.com'. + raise_on_error : bool + If True, never catches errors. + check_before_test : bool + If True, checks connectivity before running the test case. + error_classes : tuple or Exception + error classes to ignore. If not in ``error_classes``, raises the error. + defaults to OSError. Be careful about changing the error classes here. + skip_errnos : iterable of int + Any exception that has .errno or .reason.erno set to one + of these values will be skipped with an appropriate + message. + _skip_on_messages: iterable of string + any exception e for which one of the strings is + a substring of str(e) will be skipped with an appropriate + message. Intended to suppress errors where an errno isn't available. + + Notes + ----- + * ``raise_on_error`` supersedes ``check_before_test`` + + Returns + ------- + t : callable + The decorated test ``t``, with checks for connectivity errors. + + Example + ------- + + Tests decorated with @network will fail if it's possible to make a network + connection to another URL (defaults to google.com):: + + >>> from pandas import _testing as tm + >>> @tm.network + ... def test_network(): + ... with pd.io.common.urlopen("rabbit://bonanza.com"): + ... pass + >>> test_network() # doctest: +SKIP + Traceback + ... + URLError: + + You can specify alternative URLs:: + + >>> @tm.network("https://www.yahoo.com") + ... def test_something_with_yahoo(): + ... raise OSError("Failure Message") + >>> test_something_with_yahoo() # doctest: +SKIP + Traceback (most recent call last): + ... + OSError: Failure Message + + If you set check_before_test, it will check the url first and not run the + test on failure:: + + >>> @tm.network("failing://url.blaher", check_before_test=True) + ... def test_something(): + ... print("I ran!") + ... raise ValueError("Failure") + >>> test_something() # doctest: +SKIP + Traceback (most recent call last): + ... + + Errors not related to networking will always be raised. + """ + import pytest + + if error_classes is None: + error_classes = _get_default_network_errors() + + t.network = True + + @wraps(t) + def wrapper(*args, **kwargs): + if ( + check_before_test + and not raise_on_error + and not can_connect(url, error_classes) + ): + pytest.skip( + f"May not have network connectivity because cannot connect to {url}" + ) + try: + return t(*args, **kwargs) + except Exception as err: + errno = getattr(err, "errno", None) + if not errno and hasattr(errno, "reason"): + # error: "Exception" has no attribute "reason" + errno = getattr(err.reason, "errno", None) # type: ignore[attr-defined] + + if errno in skip_errnos: + pytest.skip(f"Skipping test due to known errno and error {err}") + + e_str = str(err) + + if any(m.lower() in e_str.lower() for m in _skip_on_messages): + pytest.skip( + f"Skipping test because exception message is known and error {err}" + ) + + if not isinstance(err, error_classes) or raise_on_error: + raise + else: + pytest.skip( + f"Skipping test due to lack of connectivity and error {err}" + ) + + return wrapper + + +with_connectivity_check = network + + +def can_connect(url, error_classes=None): + """ + Try to connect to the given url. True if succeeds, False if OSError + raised + + Parameters + ---------- + url : basestring + The URL to try to connect to + + Returns + ------- + connectable : bool + Return True if no OSError (unable to connect) or URLError (bad url) was + raised + """ + if error_classes is None: + error_classes = _get_default_network_errors() + + try: + with urlopen(url): + pass + except error_classes: + return False + else: + return True + + +# ------------------------------------------------------------------ +# File-IO + + +def round_trip_pickle( + obj: Any, path: FilePath | ReadPickleBuffer | None = None +) -> DataFrame | Series: + """ + Pickle an object and then read it again. + + Parameters + ---------- + obj : any object + The object to pickle and then re-read. + path : str, path object or file-like object, default None + The path where the pickled object is written and then read. + + Returns + ------- + pandas object + The original object that was pickled and then re-read. + """ + _path = path + if _path is None: + _path = f"__{rands(10)}__.pickle" + with ensure_clean(_path) as temp_path: + pd.to_pickle(obj, temp_path) + return pd.read_pickle(temp_path) + + +def round_trip_pathlib(writer, reader, path: str | None = None): + """ + Write an object to file specified by a pathlib.Path and read it back + + Parameters + ---------- + writer : callable bound to pandas object + IO writing function (e.g. DataFrame.to_csv ) + reader : callable + IO reading function (e.g. pd.read_csv ) + path : str, default None + The path where the object is written and then read. + + Returns + ------- + pandas object + The original object that was serialized and then re-read. + """ + import pytest + + Path = pytest.importorskip("pathlib").Path + if path is None: + path = "___pathlib___" + with ensure_clean(path) as path: + writer(Path(path)) + obj = reader(Path(path)) + return obj + + +def round_trip_localpath(writer, reader, path: str | None = None): + """ + Write an object to file specified by a py.path LocalPath and read it back. + + Parameters + ---------- + writer : callable bound to pandas object + IO writing function (e.g. DataFrame.to_csv ) + reader : callable + IO reading function (e.g. pd.read_csv ) + path : str, default None + The path where the object is written and then read. + + Returns + ------- + pandas object + The original object that was serialized and then re-read. + """ + import pytest + + LocalPath = pytest.importorskip("py.path").local + if path is None: + path = "___localpath___" + with ensure_clean(path) as path: + writer(LocalPath(path)) + obj = reader(LocalPath(path)) + return obj + + +def write_to_compressed(compression, path, data, dest="test"): + """ + Write data to a compressed file. + + Parameters + ---------- + compression : {'gzip', 'bz2', 'zip', 'xz', 'zstd'} + The compression type to use. + path : str + The file path to write the data. + data : str + The data to write. + dest : str, default "test" + The destination file (for ZIP only) + + Raises + ------ + ValueError : An invalid compression value was passed in. + """ + args: tuple[Any, ...] = (data,) + mode = "wb" + method = "write" + compress_method: Callable + + if compression == "zip": + compress_method = zipfile.ZipFile + mode = "w" + args = (dest, data) + method = "writestr" + elif compression == "gzip": + compress_method = gzip.GzipFile + elif compression == "bz2": + compress_method = bz2.BZ2File + elif compression == "zstd": + compress_method = import_optional_dependency("zstandard").open + elif compression == "xz": + compress_method = get_lzma_file() + else: + raise ValueError(f"Unrecognized compression type: {compression}") + + with compress_method(path, mode=mode) as f: + getattr(f, method)(*args) + + +# ------------------------------------------------------------------ +# Plotting + + +def close(fignum=None): + from matplotlib.pyplot import ( + close as _close, + get_fignums, + ) + + if fignum is None: + for fignum in get_fignums(): + _close(fignum) + else: + _close(fignum) diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/_random.py b/.local/lib/python3.8/site-packages/pandas/_testing/_random.py new file mode 100644 index 0000000..a646d76 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_testing/_random.py @@ -0,0 +1,48 @@ +import string + +import numpy as np + + +def randbool(size=(), p: float = 0.5): + return np.random.rand(*size) <= p + + +RANDS_CHARS = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1)) +RANDU_CHARS = np.array( + list("".join(map(chr, range(1488, 1488 + 26))) + string.digits), + dtype=(np.unicode_, 1), +) + + +def rands_array(nchars, size, dtype="O"): + """ + Generate an array of byte strings. + """ + retval = ( + np.random.choice(RANDS_CHARS, size=nchars * np.prod(size)) + .view((np.str_, nchars)) + .reshape(size) + ) + return retval.astype(dtype) + + +def randu_array(nchars, size, dtype="O"): + """ + Generate an array of unicode strings. + """ + retval = ( + np.random.choice(RANDU_CHARS, size=nchars * np.prod(size)) + .view((np.unicode_, nchars)) + .reshape(size) + ) + return retval.astype(dtype) + + +def rands(nchars): + """ + Generate one random byte string. + + See `rands_array` if you want to create an array of random strings. + + """ + return "".join(np.random.choice(RANDS_CHARS, nchars)) diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/_warnings.py b/.local/lib/python3.8/site-packages/pandas/_testing/_warnings.py new file mode 100644 index 0000000..f9443f8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_testing/_warnings.py @@ -0,0 +1,205 @@ +from __future__ import annotations + +from contextlib import contextmanager +import re +import sys +from typing import ( + Sequence, + Type, + cast, +) +import warnings + + +@contextmanager +def assert_produces_warning( + expected_warning: type[Warning] | bool | None = Warning, + filter_level="always", + check_stacklevel: bool = True, + raise_on_extra_warnings: bool = True, + match: str | None = None, +): + """ + Context manager for running code expected to either raise a specific + warning, or not raise any warnings. Verifies that the code raises the + expected warning, and that it does not raise any other unexpected + warnings. It is basically a wrapper around ``warnings.catch_warnings``. + + Parameters + ---------- + expected_warning : {Warning, False, None}, default Warning + The type of Exception raised. ``exception.Warning`` is the base + class for all warnings. To check that no warning is returned, + specify ``False`` or ``None``. + filter_level : str or None, default "always" + Specifies whether warnings are ignored, displayed, or turned + into errors. + Valid values are: + + * "error" - turns matching warnings into exceptions + * "ignore" - discard the warning + * "always" - always emit a warning + * "default" - print the warning the first time it is generated + from each location + * "module" - print the warning the first time it is generated + from each module + * "once" - print the warning the first time it is generated + + check_stacklevel : bool, default True + If True, displays the line that called the function containing + the warning to show were the function is called. Otherwise, the + line that implements the function is displayed. + raise_on_extra_warnings : bool, default True + Whether extra warnings not of the type `expected_warning` should + cause the test to fail. + match : str, optional + Match warning message. + + Examples + -------- + >>> import warnings + >>> with assert_produces_warning(): + ... warnings.warn(UserWarning()) + ... + >>> with assert_produces_warning(False): + ... warnings.warn(RuntimeWarning()) + ... + Traceback (most recent call last): + ... + AssertionError: Caused unexpected warning(s): ['RuntimeWarning']. + >>> with assert_produces_warning(UserWarning): + ... warnings.warn(RuntimeWarning()) + Traceback (most recent call last): + ... + AssertionError: Did not see expected warning of class 'UserWarning'. + + ..warn:: This is *not* thread-safe. + """ + __tracebackhide__ = True + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter(filter_level) + yield w + + if expected_warning: + expected_warning = cast(Type[Warning], expected_warning) + _assert_caught_expected_warning( + caught_warnings=w, + expected_warning=expected_warning, + match=match, + check_stacklevel=check_stacklevel, + ) + + if raise_on_extra_warnings: + _assert_caught_no_extra_warnings( + caught_warnings=w, + expected_warning=expected_warning, + ) + + +def _assert_caught_expected_warning( + *, + caught_warnings: Sequence[warnings.WarningMessage], + expected_warning: type[Warning], + match: str | None, + check_stacklevel: bool, +) -> None: + """Assert that there was the expected warning among the caught warnings.""" + saw_warning = False + matched_message = False + unmatched_messages = [] + + for actual_warning in caught_warnings: + if issubclass(actual_warning.category, expected_warning): + saw_warning = True + + if check_stacklevel and issubclass( + actual_warning.category, (FutureWarning, DeprecationWarning) + ): + _assert_raised_with_correct_stacklevel(actual_warning) + + if match is not None: + if re.search(match, str(actual_warning.message)): + matched_message = True + else: + unmatched_messages.append(actual_warning.message) + + if not saw_warning: + raise AssertionError( + f"Did not see expected warning of class " + f"{repr(expected_warning.__name__)}" + ) + + if match and not matched_message: + raise AssertionError( + f"Did not see warning {repr(expected_warning.__name__)} " + f"matching '{match}'. The emitted warning messages are " + f"{unmatched_messages}" + ) + + +def _assert_caught_no_extra_warnings( + *, + caught_warnings: Sequence[warnings.WarningMessage], + expected_warning: type[Warning] | bool | None, +) -> None: + """Assert that no extra warnings apart from the expected ones are caught.""" + extra_warnings = [] + + for actual_warning in caught_warnings: + if _is_unexpected_warning(actual_warning, expected_warning): + # GH#38630 pytest.filterwarnings does not suppress these. + if actual_warning.category == ResourceWarning: + # GH 44732: Don't make the CI flaky by filtering SSL-related + # ResourceWarning from dependencies + unclosed_ssl = ( + "unclosed transport bool: + """Check if the actual warning issued is unexpected.""" + if actual_warning and not expected_warning: + return True + expected_warning = cast(Type[Warning], expected_warning) + return bool(not issubclass(actual_warning.category, expected_warning)) + + +def _assert_raised_with_correct_stacklevel( + actual_warning: warnings.WarningMessage, +) -> None: + from inspect import ( + getframeinfo, + stack, + ) + + caller = getframeinfo(stack()[4][0]) + msg = ( + "Warning not set with correct stacklevel. " + f"File where warning is raised: {actual_warning.filename} != " + f"{caller.filename}. Warning message: {actual_warning.message}" + ) + assert actual_warning.filename == caller.filename, msg diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/asserters.py b/.local/lib/python3.8/site-packages/pandas/_testing/asserters.py new file mode 100644 index 0000000..77e4065 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_testing/asserters.py @@ -0,0 +1,1485 @@ +from __future__ import annotations + +from typing import cast +import warnings + +import numpy as np + +from pandas._libs.lib import ( + NoDefault, + no_default, +) +from pandas._libs.missing import is_matching_na +import pandas._libs.testing as _testing +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_bool, + is_categorical_dtype, + is_extension_array_dtype, + is_interval_dtype, + is_number, + is_numeric_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + PandasDtype, +) +from pandas.core.dtypes.missing import array_equivalent + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + IntervalIndex, + MultiIndex, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, +) +from pandas.core.algorithms import ( + safe_sort, + take_nd, +) +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + IntervalArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin +from pandas.core.arrays.string_ import StringDtype + +from pandas.io.formats.printing import pprint_thing + + +def assert_almost_equal( + left, + right, + check_dtype: bool | str = "equiv", + check_less_precise: bool | int | NoDefault = no_default, + rtol: float = 1.0e-5, + atol: float = 1.0e-8, + **kwargs, +): + """ + Check that the left and right objects are approximately equal. + + By approximately equal, we refer to objects that are numbers or that + contain numbers which may be equivalent to specific levels of precision. + + Parameters + ---------- + left : object + right : object + check_dtype : bool or {'equiv'}, default 'equiv' + Check dtype if both a and b are the same type. If 'equiv' is passed in, + then `RangeIndex` and `Int64Index` are also considered equivalent + when doing type checking. + check_less_precise : bool or int, default False + Specify comparison precision. 5 digits (False) or 3 digits (True) + after decimal points are compared. If int, then specify the number + of digits to compare. + + When comparing two numbers, if the first number has magnitude less + than 1e-5, we compare the two numbers directly and check whether + they are equivalent within the specified precision. Otherwise, we + compare the **ratio** of the second number to the first number and + check whether it is equivalent to 1 within the specified precision. + + .. deprecated:: 1.1.0 + Use `rtol` and `atol` instead to define relative/absolute + tolerance, respectively. Similar to :func:`math.isclose`. + rtol : float, default 1e-5 + Relative tolerance. + + .. versionadded:: 1.1.0 + atol : float, default 1e-8 + Absolute tolerance. + + .. versionadded:: 1.1.0 + """ + if check_less_precise is not no_default: + warnings.warn( + "The 'check_less_precise' keyword in testing.assert_*_equal " + "is deprecated and will be removed in a future version. " + "You can stop passing 'check_less_precise' to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # https://github.com/python/mypy/issues/7642 + # error: Argument 1 to "_get_tol_from_less_precise" has incompatible + # type "Union[bool, int, NoDefault]"; expected "Union[bool, int]" + rtol = atol = _get_tol_from_less_precise( + check_less_precise # type: ignore[arg-type] + ) + + if isinstance(left, Index): + assert_index_equal( + left, + right, + check_exact=False, + exact=check_dtype, + rtol=rtol, + atol=atol, + **kwargs, + ) + + elif isinstance(left, Series): + assert_series_equal( + left, + right, + check_exact=False, + check_dtype=check_dtype, + rtol=rtol, + atol=atol, + **kwargs, + ) + + elif isinstance(left, DataFrame): + assert_frame_equal( + left, + right, + check_exact=False, + check_dtype=check_dtype, + rtol=rtol, + atol=atol, + **kwargs, + ) + + else: + # Other sequences. + if check_dtype: + if is_number(left) and is_number(right): + # Do not compare numeric classes, like np.float64 and float. + pass + elif is_bool(left) and is_bool(right): + # Do not compare bool classes, like np.bool_ and bool. + pass + else: + if isinstance(left, np.ndarray) or isinstance(right, np.ndarray): + obj = "numpy array" + else: + obj = "Input" + assert_class_equal(left, right, obj=obj) + + # if we have "equiv", this becomes True + check_dtype = bool(check_dtype) + _testing.assert_almost_equal( + left, right, check_dtype=check_dtype, rtol=rtol, atol=atol, **kwargs + ) + + +def _get_tol_from_less_precise(check_less_precise: bool | int) -> float: + """ + Return the tolerance equivalent to the deprecated `check_less_precise` + parameter. + + Parameters + ---------- + check_less_precise : bool or int + + Returns + ------- + float + Tolerance to be used as relative/absolute tolerance. + + Examples + -------- + >>> # Using check_less_precise as a bool: + >>> _get_tol_from_less_precise(False) + 5e-06 + >>> _get_tol_from_less_precise(True) + 0.0005 + >>> # Using check_less_precise as an int representing the decimal + >>> # tolerance intended: + >>> _get_tol_from_less_precise(2) + 0.005 + >>> _get_tol_from_less_precise(8) + 5e-09 + """ + if isinstance(check_less_precise, bool): + if check_less_precise: + # 3-digit tolerance + return 0.5e-3 + else: + # 5-digit tolerance + return 0.5e-5 + else: + # Equivalent to setting checking_less_precise= + return 0.5 * 10 ** -check_less_precise + + +def _check_isinstance(left, right, cls): + """ + Helper method for our assert_* methods that ensures that + the two objects being compared have the right type before + proceeding with the comparison. + + Parameters + ---------- + left : The first object being compared. + right : The second object being compared. + cls : The class type to check against. + + Raises + ------ + AssertionError : Either `left` or `right` is not an instance of `cls`. + """ + cls_name = cls.__name__ + + if not isinstance(left, cls): + raise AssertionError( + f"{cls_name} Expected type {cls}, found {type(left)} instead" + ) + if not isinstance(right, cls): + raise AssertionError( + f"{cls_name} Expected type {cls}, found {type(right)} instead" + ) + + +def assert_dict_equal(left, right, compare_keys: bool = True): + + _check_isinstance(left, right, dict) + _testing.assert_dict_equal(left, right, compare_keys=compare_keys) + + +def assert_index_equal( + left: Index, + right: Index, + exact: bool | str = "equiv", + check_names: bool = True, + check_less_precise: bool | int | NoDefault = no_default, + check_exact: bool = True, + check_categorical: bool = True, + check_order: bool = True, + rtol: float = 1.0e-5, + atol: float = 1.0e-8, + obj: str = "Index", +) -> None: + """ + Check that left and right Index are equal. + + Parameters + ---------- + left : Index + right : Index + exact : bool or {'equiv'}, default 'equiv' + Whether to check the Index class, dtype and inferred_type + are identical. If 'equiv', then RangeIndex can be substituted for + Int64Index as well. + check_names : bool, default True + Whether to check the names attribute. + check_less_precise : bool or int, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + If int, then specify the digits to compare. + + .. deprecated:: 1.1.0 + Use `rtol` and `atol` instead to define relative/absolute + tolerance, respectively. Similar to :func:`math.isclose`. + check_exact : bool, default True + Whether to compare number exactly. + check_categorical : bool, default True + Whether to compare internal Categorical exactly. + check_order : bool, default True + Whether to compare the order of index entries as well as their values. + If True, both indexes must contain the same elements, in the same order. + If False, both indexes must contain the same elements, but in any order. + + .. versionadded:: 1.2.0 + rtol : float, default 1e-5 + Relative tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + atol : float, default 1e-8 + Absolute tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + obj : str, default 'Index' + Specify object name being compared, internally used to show appropriate + assertion message. + + Examples + -------- + >>> from pandas import testing as tm + >>> a = pd.Index([1, 2, 3]) + >>> b = pd.Index([1, 2, 3]) + >>> tm.assert_index_equal(a, b) + """ + __tracebackhide__ = True + + def _check_types(left, right, obj="Index") -> None: + if not exact: + return + + assert_class_equal(left, right, exact=exact, obj=obj) + assert_attr_equal("inferred_type", left, right, obj=obj) + + # Skip exact dtype checking when `check_categorical` is False + if is_categorical_dtype(left.dtype) and is_categorical_dtype(right.dtype): + if check_categorical: + assert_attr_equal("dtype", left, right, obj=obj) + assert_index_equal(left.categories, right.categories, exact=exact) + return + + assert_attr_equal("dtype", left, right, obj=obj) + + def _get_ilevel_values(index, level): + # accept level number only + unique = index.levels[level] + level_codes = index.codes[level] + filled = take_nd(unique._values, level_codes, fill_value=unique._na_value) + return unique._shallow_copy(filled, name=index.names[level]) + + if check_less_precise is not no_default: + warnings.warn( + "The 'check_less_precise' keyword in testing.assert_*_equal " + "is deprecated and will be removed in a future version. " + "You can stop passing 'check_less_precise' to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # https://github.com/python/mypy/issues/7642 + # error: Argument 1 to "_get_tol_from_less_precise" has incompatible + # type "Union[bool, int, NoDefault]"; expected "Union[bool, int]" + rtol = atol = _get_tol_from_less_precise( + check_less_precise # type: ignore[arg-type] + ) + + # instance validation + _check_isinstance(left, right, Index) + + # class / dtype comparison + _check_types(left, right, obj=obj) + + # level comparison + if left.nlevels != right.nlevels: + msg1 = f"{obj} levels are different" + msg2 = f"{left.nlevels}, {left}" + msg3 = f"{right.nlevels}, {right}" + raise_assert_detail(obj, msg1, msg2, msg3) + + # length comparison + if len(left) != len(right): + msg1 = f"{obj} length are different" + msg2 = f"{len(left)}, {left}" + msg3 = f"{len(right)}, {right}" + raise_assert_detail(obj, msg1, msg2, msg3) + + # If order doesn't matter then sort the index entries + if not check_order: + left = Index(safe_sort(left)) + right = Index(safe_sort(right)) + + # MultiIndex special comparison for little-friendly error messages + if left.nlevels > 1: + left = cast(MultiIndex, left) + right = cast(MultiIndex, right) + + for level in range(left.nlevels): + # cannot use get_level_values here because it can change dtype + llevel = _get_ilevel_values(left, level) + rlevel = _get_ilevel_values(right, level) + + lobj = f"MultiIndex level [{level}]" + assert_index_equal( + llevel, + rlevel, + exact=exact, + check_names=check_names, + check_exact=check_exact, + rtol=rtol, + atol=atol, + obj=lobj, + ) + # get_level_values may change dtype + _check_types(left.levels[level], right.levels[level], obj=obj) + + # skip exact index checking when `check_categorical` is False + if check_exact and check_categorical: + if not left.equals(right): + mismatch = left._values != right._values + + diff = np.sum(mismatch.astype(int)) * 100.0 / len(left) + msg = f"{obj} values are different ({np.round(diff, 5)} %)" + raise_assert_detail(obj, msg, left, right) + else: + + # if we have "equiv", this becomes True + exact_bool = bool(exact) + _testing.assert_almost_equal( + left.values, + right.values, + rtol=rtol, + atol=atol, + check_dtype=exact_bool, + obj=obj, + lobj=left, + robj=right, + ) + + # metadata comparison + if check_names: + assert_attr_equal("names", left, right, obj=obj) + if isinstance(left, PeriodIndex) or isinstance(right, PeriodIndex): + assert_attr_equal("freq", left, right, obj=obj) + if isinstance(left, IntervalIndex) or isinstance(right, IntervalIndex): + assert_interval_array_equal(left._values, right._values) + + if check_categorical: + if is_categorical_dtype(left.dtype) or is_categorical_dtype(right.dtype): + assert_categorical_equal(left._values, right._values, obj=f"{obj} category") + + +def assert_class_equal(left, right, exact: bool | str = True, obj="Input"): + """ + Checks classes are equal. + """ + from pandas.core.indexes.numeric import NumericIndex + + __tracebackhide__ = True + + def repr_class(x): + if isinstance(x, Index): + # return Index as it is to include values in the error message + return x + + return type(x).__name__ + + if type(left) == type(right): + return + + if exact == "equiv": + # accept equivalence of NumericIndex (sub-)classes + if isinstance(left, NumericIndex) and isinstance(right, NumericIndex): + return + + msg = f"{obj} classes are different" + raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) + + +def assert_attr_equal(attr: str, left, right, obj: str = "Attributes"): + """ + Check attributes are equal. Both objects must have attribute. + + Parameters + ---------- + attr : str + Attribute name being compared. + left : object + right : object + obj : str, default 'Attributes' + Specify object name being compared, internally used to show appropriate + assertion message + """ + __tracebackhide__ = True + + left_attr = getattr(left, attr) + right_attr = getattr(right, attr) + + if left_attr is right_attr: + return True + elif is_matching_na(left_attr, right_attr): + # e.g. both np.nan, both NaT, both pd.NA, ... + return True + + try: + result = left_attr == right_attr + except TypeError: + # datetimetz on rhs may raise TypeError + result = False + if (left_attr is pd.NA) ^ (right_attr is pd.NA): + result = False + elif not isinstance(result, bool): + result = result.all() + + if result: + return True + else: + msg = f'Attribute "{attr}" are different' + raise_assert_detail(obj, msg, left_attr, right_attr) + + +def assert_is_valid_plot_return_object(objs): + import matplotlib.pyplot as plt + + if isinstance(objs, (Series, np.ndarray)): + for el in objs.ravel(): + msg = ( + "one of 'objs' is not a matplotlib Axes instance, " + f"type encountered {repr(type(el).__name__)}" + ) + assert isinstance(el, (plt.Axes, dict)), msg + else: + msg = ( + "objs is neither an ndarray of Artist instances nor a single " + "ArtistArtist instance, tuple, or dict, 'objs' is a " + f"{repr(type(objs).__name__)}" + ) + assert isinstance(objs, (plt.Artist, tuple, dict)), msg + + +def assert_is_sorted(seq): + """Assert that the sequence is sorted.""" + if isinstance(seq, (Index, Series)): + seq = seq.values + # sorting does not change precisions + assert_numpy_array_equal(seq, np.sort(np.array(seq))) + + +def assert_categorical_equal( + left, right, check_dtype=True, check_category_order=True, obj="Categorical" +): + """ + Test that Categoricals are equivalent. + + Parameters + ---------- + left : Categorical + right : Categorical + check_dtype : bool, default True + Check that integer dtype of the codes are the same. + check_category_order : bool, default True + Whether the order of the categories should be compared, which + implies identical integer codes. If False, only the resulting + values are compared. The ordered attribute is + checked regardless. + obj : str, default 'Categorical' + Specify object name being compared, internally used to show appropriate + assertion message. + """ + _check_isinstance(left, right, Categorical) + + exact: bool | str + if isinstance(left.categories, RangeIndex) or isinstance( + right.categories, RangeIndex + ): + exact = "equiv" + else: + # We still want to require exact matches for NumericIndex + exact = True + + if check_category_order: + assert_index_equal( + left.categories, right.categories, obj=f"{obj}.categories", exact=exact + ) + assert_numpy_array_equal( + left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes" + ) + else: + try: + lc = left.categories.sort_values() + rc = right.categories.sort_values() + except TypeError: + # e.g. '<' not supported between instances of 'int' and 'str' + lc, rc = left.categories, right.categories + assert_index_equal(lc, rc, obj=f"{obj}.categories", exact=exact) + assert_index_equal( + left.categories.take(left.codes), + right.categories.take(right.codes), + obj=f"{obj}.values", + exact=exact, + ) + + assert_attr_equal("ordered", left, right, obj=obj) + + +def assert_interval_array_equal(left, right, exact="equiv", obj="IntervalArray"): + """ + Test that two IntervalArrays are equivalent. + + Parameters + ---------- + left, right : IntervalArray + The IntervalArrays to compare. + exact : bool or {'equiv'}, default 'equiv' + Whether to check the Index class, dtype and inferred_type + are identical. If 'equiv', then RangeIndex can be substituted for + Int64Index as well. + obj : str, default 'IntervalArray' + Specify object name being compared, internally used to show appropriate + assertion message + """ + _check_isinstance(left, right, IntervalArray) + + kwargs = {} + if left._left.dtype.kind in ["m", "M"]: + # We have a DatetimeArray or TimedeltaArray + kwargs["check_freq"] = False + + assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs) + assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs) + + assert_attr_equal("closed", left, right, obj=obj) + + +def assert_period_array_equal(left, right, obj="PeriodArray"): + _check_isinstance(left, right, PeriodArray) + + assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") + assert_attr_equal("freq", left, right, obj=obj) + + +def assert_datetime_array_equal(left, right, obj="DatetimeArray", check_freq=True): + __tracebackhide__ = True + _check_isinstance(left, right, DatetimeArray) + + assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") + if check_freq: + assert_attr_equal("freq", left, right, obj=obj) + assert_attr_equal("tz", left, right, obj=obj) + + +def assert_timedelta_array_equal(left, right, obj="TimedeltaArray", check_freq=True): + __tracebackhide__ = True + _check_isinstance(left, right, TimedeltaArray) + assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") + if check_freq: + assert_attr_equal("freq", left, right, obj=obj) + + +def raise_assert_detail(obj, message, left, right, diff=None, index_values=None): + __tracebackhide__ = True + + msg = f"""{obj} are different + +{message}""" + + if isinstance(index_values, np.ndarray): + msg += f"\n[index]: {pprint_thing(index_values)}" + + if isinstance(left, np.ndarray): + left = pprint_thing(left) + elif ( + isinstance(left, CategoricalDtype) + or isinstance(left, PandasDtype) + or isinstance(left, StringDtype) + ): + left = repr(left) + + if isinstance(right, np.ndarray): + right = pprint_thing(right) + elif ( + isinstance(right, CategoricalDtype) + or isinstance(right, PandasDtype) + or isinstance(right, StringDtype) + ): + right = repr(right) + + msg += f""" +[left]: {left} +[right]: {right}""" + + if diff is not None: + msg += f"\n[diff]: {diff}" + + raise AssertionError(msg) + + +def assert_numpy_array_equal( + left, + right, + strict_nan=False, + check_dtype=True, + err_msg=None, + check_same=None, + obj="numpy array", + index_values=None, +): + """ + Check that 'np.ndarray' is equivalent. + + Parameters + ---------- + left, right : numpy.ndarray or iterable + The two arrays to be compared. + strict_nan : bool, default False + If True, consider NaN and None to be different. + check_dtype : bool, default True + Check dtype if both a and b are np.ndarray. + err_msg : str, default None + If provided, used as assertion message. + check_same : None|'copy'|'same', default None + Ensure left and right refer/do not refer to the same memory area. + obj : str, default 'numpy array' + Specify object name being compared, internally used to show appropriate + assertion message. + index_values : numpy.ndarray, default None + optional index (shared by both left and right), used in output. + """ + __tracebackhide__ = True + + # instance validation + # Show a detailed error message when classes are different + assert_class_equal(left, right, obj=obj) + # both classes must be an np.ndarray + _check_isinstance(left, right, np.ndarray) + + def _get_base(obj): + return obj.base if getattr(obj, "base", None) is not None else obj + + left_base = _get_base(left) + right_base = _get_base(right) + + if check_same == "same": + if left_base is not right_base: + raise AssertionError(f"{repr(left_base)} is not {repr(right_base)}") + elif check_same == "copy": + if left_base is right_base: + raise AssertionError(f"{repr(left_base)} is {repr(right_base)}") + + def _raise(left, right, err_msg): + if err_msg is None: + if left.shape != right.shape: + raise_assert_detail( + obj, f"{obj} shapes are different", left.shape, right.shape + ) + + diff = 0 + for left_arr, right_arr in zip(left, right): + # count up differences + if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan): + diff += 1 + + diff = diff * 100.0 / left.size + msg = f"{obj} values are different ({np.round(diff, 5)} %)" + raise_assert_detail(obj, msg, left, right, index_values=index_values) + + raise AssertionError(err_msg) + + # compare shape and values + if not array_equivalent(left, right, strict_nan=strict_nan): + _raise(left, right, err_msg) + + if check_dtype: + if isinstance(left, np.ndarray) and isinstance(right, np.ndarray): + assert_attr_equal("dtype", left, right, obj=obj) + + +def assert_extension_array_equal( + left, + right, + check_dtype=True, + index_values=None, + check_less_precise=no_default, + check_exact=False, + rtol: float = 1.0e-5, + atol: float = 1.0e-8, +): + """ + Check that left and right ExtensionArrays are equal. + + Parameters + ---------- + left, right : ExtensionArray + The two arrays to compare. + check_dtype : bool, default True + Whether to check if the ExtensionArray dtypes are identical. + index_values : numpy.ndarray, default None + Optional index (shared by both left and right), used in output. + check_less_precise : bool or int, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + If int, then specify the digits to compare. + + .. deprecated:: 1.1.0 + Use `rtol` and `atol` instead to define relative/absolute + tolerance, respectively. Similar to :func:`math.isclose`. + check_exact : bool, default False + Whether to compare number exactly. + rtol : float, default 1e-5 + Relative tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + atol : float, default 1e-8 + Absolute tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + + Notes + ----- + Missing values are checked separately from valid values. + A mask of missing values is computed for each and checked to match. + The remaining all-valid values are cast to object dtype and checked. + + Examples + -------- + >>> from pandas import testing as tm + >>> a = pd.Series([1, 2, 3, 4]) + >>> b, c = a.array, a.array + >>> tm.assert_extension_array_equal(b, c) + """ + if check_less_precise is not no_default: + warnings.warn( + "The 'check_less_precise' keyword in testing.assert_*_equal " + "is deprecated and will be removed in a future version. " + "You can stop passing 'check_less_precise' to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + rtol = atol = _get_tol_from_less_precise(check_less_precise) + + assert isinstance(left, ExtensionArray), "left is not an ExtensionArray" + assert isinstance(right, ExtensionArray), "right is not an ExtensionArray" + if check_dtype: + assert_attr_equal("dtype", left, right, obj="ExtensionArray") + + if ( + isinstance(left, DatetimeLikeArrayMixin) + and isinstance(right, DatetimeLikeArrayMixin) + and type(right) == type(left) + ): + # Avoid slow object-dtype comparisons + # np.asarray for case where we have a np.MaskedArray + assert_numpy_array_equal( + np.asarray(left.asi8), np.asarray(right.asi8), index_values=index_values + ) + return + + left_na = np.asarray(left.isna()) + right_na = np.asarray(right.isna()) + assert_numpy_array_equal( + left_na, right_na, obj="ExtensionArray NA mask", index_values=index_values + ) + + left_valid = np.asarray(left[~left_na].astype(object)) + right_valid = np.asarray(right[~right_na].astype(object)) + if check_exact: + assert_numpy_array_equal( + left_valid, right_valid, obj="ExtensionArray", index_values=index_values + ) + else: + _testing.assert_almost_equal( + left_valid, + right_valid, + check_dtype=check_dtype, + rtol=rtol, + atol=atol, + obj="ExtensionArray", + index_values=index_values, + ) + + +# This could be refactored to use the NDFrame.equals method +def assert_series_equal( + left, + right, + check_dtype=True, + check_index_type="equiv", + check_series_type=True, + check_less_precise=no_default, + check_names=True, + check_exact=False, + check_datetimelike_compat=False, + check_categorical=True, + check_category_order=True, + check_freq=True, + check_flags=True, + rtol=1.0e-5, + atol=1.0e-8, + obj="Series", + *, + check_index=True, +): + """ + Check that left and right Series are equal. + + Parameters + ---------- + left : Series + right : Series + check_dtype : bool, default True + Whether to check the Series dtype is identical. + check_index_type : bool or {'equiv'}, default 'equiv' + Whether to check the Index class, dtype and inferred_type + are identical. + check_series_type : bool, default True + Whether to check the Series class is identical. + check_less_precise : bool or int, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + If int, then specify the digits to compare. + + When comparing two numbers, if the first number has magnitude less + than 1e-5, we compare the two numbers directly and check whether + they are equivalent within the specified precision. Otherwise, we + compare the **ratio** of the second number to the first number and + check whether it is equivalent to 1 within the specified precision. + + .. deprecated:: 1.1.0 + Use `rtol` and `atol` instead to define relative/absolute + tolerance, respectively. Similar to :func:`math.isclose`. + check_names : bool, default True + Whether to check the Series and Index names attribute. + check_exact : bool, default False + Whether to compare number exactly. + check_datetimelike_compat : bool, default False + Compare datetime-like which is comparable ignoring dtype. + check_categorical : bool, default True + Whether to compare internal Categorical exactly. + check_category_order : bool, default True + Whether to compare category order of internal Categoricals. + + .. versionadded:: 1.0.2 + check_freq : bool, default True + Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex. + + .. versionadded:: 1.1.0 + check_flags : bool, default True + Whether to check the `flags` attribute. + + .. versionadded:: 1.2.0 + + rtol : float, default 1e-5 + Relative tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + atol : float, default 1e-8 + Absolute tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + obj : str, default 'Series' + Specify object name being compared, internally used to show appropriate + assertion message. + check_index : bool, default True + Whether to check index equivalence. If False, then compare only values. + + .. versionadded:: 1.3.0 + + Examples + -------- + >>> from pandas import testing as tm + >>> a = pd.Series([1, 2, 3, 4]) + >>> b = pd.Series([1, 2, 3, 4]) + >>> tm.assert_series_equal(a, b) + """ + __tracebackhide__ = True + + if check_less_precise is not no_default: + warnings.warn( + "The 'check_less_precise' keyword in testing.assert_*_equal " + "is deprecated and will be removed in a future version. " + "You can stop passing 'check_less_precise' to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + rtol = atol = _get_tol_from_less_precise(check_less_precise) + + # instance validation + _check_isinstance(left, right, Series) + + if check_series_type: + assert_class_equal(left, right, obj=obj) + + # length comparison + if len(left) != len(right): + msg1 = f"{len(left)}, {left.index}" + msg2 = f"{len(right)}, {right.index}" + raise_assert_detail(obj, "Series length are different", msg1, msg2) + + if check_flags: + assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}" + + if check_index: + # GH #38183 + assert_index_equal( + left.index, + right.index, + exact=check_index_type, + check_names=check_names, + check_exact=check_exact, + check_categorical=check_categorical, + rtol=rtol, + atol=atol, + obj=f"{obj}.index", + ) + + if check_freq and isinstance(left.index, (DatetimeIndex, TimedeltaIndex)): + lidx = left.index + ridx = right.index + assert lidx.freq == ridx.freq, (lidx.freq, ridx.freq) + + if check_dtype: + # We want to skip exact dtype checking when `check_categorical` + # is False. We'll still raise if only one is a `Categorical`, + # regardless of `check_categorical` + if ( + isinstance(left.dtype, CategoricalDtype) + and isinstance(right.dtype, CategoricalDtype) + and not check_categorical + ): + pass + else: + assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") + + if check_exact and is_numeric_dtype(left.dtype) and is_numeric_dtype(right.dtype): + left_values = left._values + right_values = right._values + # Only check exact if dtype is numeric + if isinstance(left_values, ExtensionArray) and isinstance( + right_values, ExtensionArray + ): + assert_extension_array_equal( + left_values, + right_values, + check_dtype=check_dtype, + index_values=np.asarray(left.index), + ) + else: + assert_numpy_array_equal( + left_values, + right_values, + check_dtype=check_dtype, + obj=str(obj), + index_values=np.asarray(left.index), + ) + elif check_datetimelike_compat and ( + needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype) + ): + # we want to check only if we have compat dtypes + # e.g. integer and M|m are NOT compat, but we can simply check + # the values in that case + + # datetimelike may have different objects (e.g. datetime.datetime + # vs Timestamp) but will compare equal + if not Index(left._values).equals(Index(right._values)): + msg = ( + f"[datetimelike_compat=True] {left._values} " + f"is not equal to {right._values}." + ) + raise AssertionError(msg) + elif is_interval_dtype(left.dtype) and is_interval_dtype(right.dtype): + assert_interval_array_equal(left.array, right.array) + elif isinstance(left.dtype, CategoricalDtype) or isinstance( + right.dtype, CategoricalDtype + ): + _testing.assert_almost_equal( + left._values, + right._values, + rtol=rtol, + atol=atol, + check_dtype=check_dtype, + obj=str(obj), + index_values=np.asarray(left.index), + ) + elif is_extension_array_dtype(left.dtype) and is_extension_array_dtype(right.dtype): + assert_extension_array_equal( + left._values, + right._values, + rtol=rtol, + atol=atol, + check_dtype=check_dtype, + index_values=np.asarray(left.index), + ) + elif is_extension_array_dtype_and_needs_i8_conversion( + left.dtype, right.dtype + ) or is_extension_array_dtype_and_needs_i8_conversion(right.dtype, left.dtype): + assert_extension_array_equal( + left._values, + right._values, + check_dtype=check_dtype, + index_values=np.asarray(left.index), + ) + elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype): + # DatetimeArray or TimedeltaArray + assert_extension_array_equal( + left._values, + right._values, + check_dtype=check_dtype, + index_values=np.asarray(left.index), + ) + else: + _testing.assert_almost_equal( + left._values, + right._values, + rtol=rtol, + atol=atol, + check_dtype=check_dtype, + obj=str(obj), + index_values=np.asarray(left.index), + ) + + # metadata comparison + if check_names: + assert_attr_equal("name", left, right, obj=obj) + + if check_categorical: + if isinstance(left.dtype, CategoricalDtype) or isinstance( + right.dtype, CategoricalDtype + ): + assert_categorical_equal( + left._values, + right._values, + obj=f"{obj} category", + check_category_order=check_category_order, + ) + + +# This could be refactored to use the NDFrame.equals method +def assert_frame_equal( + left, + right, + check_dtype=True, + check_index_type="equiv", + check_column_type="equiv", + check_frame_type=True, + check_less_precise=no_default, + check_names=True, + by_blocks=False, + check_exact=False, + check_datetimelike_compat=False, + check_categorical=True, + check_like=False, + check_freq=True, + check_flags=True, + rtol=1.0e-5, + atol=1.0e-8, + obj="DataFrame", +): + """ + Check that left and right DataFrame are equal. + + This function is intended to compare two DataFrames and output any + differences. Is is mostly intended for use in unit tests. + Additional parameters allow varying the strictness of the + equality checks performed. + + Parameters + ---------- + left : DataFrame + First DataFrame to compare. + right : DataFrame + Second DataFrame to compare. + check_dtype : bool, default True + Whether to check the DataFrame dtype is identical. + check_index_type : bool or {'equiv'}, default 'equiv' + Whether to check the Index class, dtype and inferred_type + are identical. + check_column_type : bool or {'equiv'}, default 'equiv' + Whether to check the columns class, dtype and inferred_type + are identical. Is passed as the ``exact`` argument of + :func:`assert_index_equal`. + check_frame_type : bool, default True + Whether to check the DataFrame class is identical. + check_less_precise : bool or int, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + If int, then specify the digits to compare. + + When comparing two numbers, if the first number has magnitude less + than 1e-5, we compare the two numbers directly and check whether + they are equivalent within the specified precision. Otherwise, we + compare the **ratio** of the second number to the first number and + check whether it is equivalent to 1 within the specified precision. + + .. deprecated:: 1.1.0 + Use `rtol` and `atol` instead to define relative/absolute + tolerance, respectively. Similar to :func:`math.isclose`. + check_names : bool, default True + Whether to check that the `names` attribute for both the `index` + and `column` attributes of the DataFrame is identical. + by_blocks : bool, default False + Specify how to compare internal data. If False, compare by columns. + If True, compare by blocks. + check_exact : bool, default False + Whether to compare number exactly. + check_datetimelike_compat : bool, default False + Compare datetime-like which is comparable ignoring dtype. + check_categorical : bool, default True + Whether to compare internal Categorical exactly. + check_like : bool, default False + If True, ignore the order of index & columns. + Note: index labels must match their respective rows + (same as in columns) - same labels must be with the same data. + check_freq : bool, default True + Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex. + + .. versionadded:: 1.1.0 + check_flags : bool, default True + Whether to check the `flags` attribute. + rtol : float, default 1e-5 + Relative tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + atol : float, default 1e-8 + Absolute tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + obj : str, default 'DataFrame' + Specify object name being compared, internally used to show appropriate + assertion message. + + See Also + -------- + assert_series_equal : Equivalent method for asserting Series equality. + DataFrame.equals : Check DataFrame equality. + + Examples + -------- + This example shows comparing two DataFrames that are equal + but with columns of differing dtypes. + + >>> from pandas.testing import assert_frame_equal + >>> df1 = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) + >>> df2 = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]}) + + df1 equals itself. + + >>> assert_frame_equal(df1, df1) + + df1 differs from df2 as column 'b' is of a different type. + + >>> assert_frame_equal(df1, df2) + Traceback (most recent call last): + ... + AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="b") are different + + Attribute "dtype" are different + [left]: int64 + [right]: float64 + + Ignore differing dtypes in columns with check_dtype. + + >>> assert_frame_equal(df1, df2, check_dtype=False) + """ + __tracebackhide__ = True + + if check_less_precise is not no_default: + warnings.warn( + "The 'check_less_precise' keyword in testing.assert_*_equal " + "is deprecated and will be removed in a future version. " + "You can stop passing 'check_less_precise' to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + rtol = atol = _get_tol_from_less_precise(check_less_precise) + + # instance validation + _check_isinstance(left, right, DataFrame) + + if check_frame_type: + assert isinstance(left, type(right)) + # assert_class_equal(left, right, obj=obj) + + # shape comparison + if left.shape != right.shape: + raise_assert_detail( + obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}" + ) + + if check_flags: + assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}" + + # index comparison + assert_index_equal( + left.index, + right.index, + exact=check_index_type, + check_names=check_names, + check_exact=check_exact, + check_categorical=check_categorical, + check_order=not check_like, + rtol=rtol, + atol=atol, + obj=f"{obj}.index", + ) + + # column comparison + assert_index_equal( + left.columns, + right.columns, + exact=check_column_type, + check_names=check_names, + check_exact=check_exact, + check_categorical=check_categorical, + check_order=not check_like, + rtol=rtol, + atol=atol, + obj=f"{obj}.columns", + ) + + if check_like: + left, right = left.reindex_like(right), right + + # compare by blocks + if by_blocks: + rblocks = right._to_dict_of_blocks() + lblocks = left._to_dict_of_blocks() + for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))): + assert dtype in lblocks + assert dtype in rblocks + assert_frame_equal( + lblocks[dtype], rblocks[dtype], check_dtype=check_dtype, obj=obj + ) + + # compare by columns + else: + for i, col in enumerate(left.columns): + # We have already checked that columns match, so we can do + # fast location-based lookups + lcol = left._ixs(i, axis=1) + rcol = right._ixs(i, axis=1) + + # GH #38183 + # use check_index=False, because we do not want to run + # assert_index_equal for each column, + # as we already checked it for the whole dataframe before. + assert_series_equal( + lcol, + rcol, + check_dtype=check_dtype, + check_index_type=check_index_type, + check_exact=check_exact, + check_names=check_names, + check_datetimelike_compat=check_datetimelike_compat, + check_categorical=check_categorical, + check_freq=check_freq, + obj=f'{obj}.iloc[:, {i}] (column name="{col}")', + rtol=rtol, + atol=atol, + check_index=False, + check_flags=False, + ) + + +def assert_equal(left, right, **kwargs): + """ + Wrapper for tm.assert_*_equal to dispatch to the appropriate test function. + + Parameters + ---------- + left, right : Index, Series, DataFrame, ExtensionArray, or np.ndarray + The two items to be compared. + **kwargs + All keyword arguments are passed through to the underlying assert method. + """ + __tracebackhide__ = True + + if isinstance(left, Index): + assert_index_equal(left, right, **kwargs) + if isinstance(left, (DatetimeIndex, TimedeltaIndex)): + assert left.freq == right.freq, (left.freq, right.freq) + elif isinstance(left, Series): + assert_series_equal(left, right, **kwargs) + elif isinstance(left, DataFrame): + assert_frame_equal(left, right, **kwargs) + elif isinstance(left, IntervalArray): + assert_interval_array_equal(left, right, **kwargs) + elif isinstance(left, PeriodArray): + assert_period_array_equal(left, right, **kwargs) + elif isinstance(left, DatetimeArray): + assert_datetime_array_equal(left, right, **kwargs) + elif isinstance(left, TimedeltaArray): + assert_timedelta_array_equal(left, right, **kwargs) + elif isinstance(left, ExtensionArray): + assert_extension_array_equal(left, right, **kwargs) + elif isinstance(left, np.ndarray): + assert_numpy_array_equal(left, right, **kwargs) + elif isinstance(left, str): + assert kwargs == {} + assert left == right + else: + assert kwargs == {} + assert_almost_equal(left, right) + + +def assert_sp_array_equal(left, right): + """ + Check that the left and right SparseArray are equal. + + Parameters + ---------- + left : SparseArray + right : SparseArray + """ + _check_isinstance(left, right, pd.arrays.SparseArray) + + assert_numpy_array_equal(left.sp_values, right.sp_values) + + # SparseIndex comparison + assert isinstance(left.sp_index, pd._libs.sparse.SparseIndex) + assert isinstance(right.sp_index, pd._libs.sparse.SparseIndex) + + left_index = left.sp_index + right_index = right.sp_index + + if not left_index.equals(right_index): + raise_assert_detail( + "SparseArray.index", "index are not equal", left_index, right_index + ) + else: + # Just ensure a + pass + + assert_attr_equal("fill_value", left, right) + assert_attr_equal("dtype", left, right) + assert_numpy_array_equal(left.to_dense(), right.to_dense()) + + +def assert_contains_all(iterable, dic): + for k in iterable: + assert k in dic, f"Did not contain item: {repr(k)}" + + +def assert_copy(iter1, iter2, **eql_kwargs): + """ + iter1, iter2: iterables that produce elements + comparable with assert_almost_equal + + Checks that the elements are equal, but not + the same object. (Does not check that items + in sequences are also not the same object) + """ + for elem1, elem2 in zip(iter1, iter2): + assert_almost_equal(elem1, elem2, **eql_kwargs) + msg = ( + f"Expected object {repr(type(elem1))} and object {repr(type(elem2))} to be " + "different objects, but they were the same object." + ) + assert elem1 is not elem2, msg + + +def is_extension_array_dtype_and_needs_i8_conversion(left_dtype, right_dtype) -> bool: + """ + Checks that we have the combination of an ExtensionArraydtype and + a dtype that should be converted to int64 + + Returns + ------- + bool + + Related to issue #37609 + """ + return is_extension_array_dtype(left_dtype) and needs_i8_conversion(right_dtype) + + +def assert_indexing_slices_equivalent(ser: Series, l_slc: slice, i_slc: slice): + """ + Check that ser.iloc[i_slc] matches ser.loc[l_slc] and, if applicable, + ser[l_slc]. + """ + expected = ser.iloc[i_slc] + + assert_series_equal(ser.loc[l_slc], expected) + + if not ser.index.is_integer(): + # For integer indices, .loc and plain getitem are position-based. + assert_series_equal(ser[l_slc], expected) + + +def assert_metadata_equivalent(left, right): + """ + Check that ._metadata attributes are equivalent. + """ + for attr in left._metadata: + val = getattr(left, attr, None) + if right is None: + assert val is None + else: + assert val == getattr(right, attr, None) diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/compat.py b/.local/lib/python3.8/site-packages/pandas/_testing/compat.py new file mode 100644 index 0000000..e2ac8f7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_testing/compat.py @@ -0,0 +1,23 @@ +""" +Helpers for sharing tests between DataFrame/Series +""" + +from pandas import DataFrame + + +def get_dtype(obj): + if isinstance(obj, DataFrame): + # Note: we are assuming only one column + return obj.dtypes.iat[0] + else: + return obj.dtype + + +def get_obj(df: DataFrame, klass): + """ + For sharing tests using frame_or_series, either return the DataFrame + unchanged or return it's first column as a Series. + """ + if klass is DataFrame: + return df + return df._ixs(0, axis=1) diff --git a/.local/lib/python3.8/site-packages/pandas/_testing/contexts.py b/.local/lib/python3.8/site-packages/pandas/_testing/contexts.py new file mode 100644 index 0000000..5a77c06 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_testing/contexts.py @@ -0,0 +1,244 @@ +from __future__ import annotations + +from contextlib import contextmanager +import os +from pathlib import Path +import random +from shutil import rmtree +import string +import tempfile +from typing import ( + IO, + Any, +) + +import numpy as np + +from pandas import set_option + +from pandas.io.common import get_handle + + +@contextmanager +def decompress_file(path, compression): + """ + Open a compressed file and return a file object. + + Parameters + ---------- + path : str + The path where the file is read from. + + compression : {'gzip', 'bz2', 'zip', 'xz', 'zstd', None} + Name of the decompression to use + + Returns + ------- + file object + """ + with get_handle(path, "rb", compression=compression, is_text=False) as handle: + yield handle.handle + + +@contextmanager +def set_timezone(tz: str): + """ + Context manager for temporarily setting a timezone. + + Parameters + ---------- + tz : str + A string representing a valid timezone. + + Examples + -------- + >>> from datetime import datetime + >>> from dateutil.tz import tzlocal + >>> tzlocal().tzname(datetime(2021, 1, 1)) # doctest: +SKIP + 'IST' + + >>> with set_timezone('US/Eastern'): + ... tzlocal().tzname(datetime(2021, 1, 1)) + ... + 'EST' + """ + import os + import time + + def setTZ(tz): + if tz is None: + try: + del os.environ["TZ"] + except KeyError: + pass + else: + os.environ["TZ"] = tz + time.tzset() + + orig_tz = os.environ.get("TZ") + setTZ(tz) + try: + yield + finally: + setTZ(orig_tz) + + +@contextmanager +def ensure_clean(filename=None, return_filelike: bool = False, **kwargs: Any): + """ + Gets a temporary path and agrees to remove on close. + + This implementation does not use tempfile.mkstemp to avoid having a file handle. + If the code using the returned path wants to delete the file itself, windows + requires that no program has a file handle to it. + + Parameters + ---------- + filename : str (optional) + suffix of the created file. + return_filelike : bool (default False) + if True, returns a file-like which is *always* cleaned. Necessary for + savefig and other functions which want to append extensions. + **kwargs + Additional keywords are passed to open(). + + """ + folder = Path(tempfile.gettempdir()) + + if filename is None: + filename = "" + filename = ( + "".join(random.choices(string.ascii_letters + string.digits, k=30)) + filename + ) + path = folder / filename + + path.touch() + + handle_or_str: str | IO = str(path) + if return_filelike: + kwargs.setdefault("mode", "w+b") + handle_or_str = open(path, **kwargs) + + try: + yield handle_or_str + finally: + if not isinstance(handle_or_str, str): + handle_or_str.close() + if path.is_file(): + path.unlink() + + +@contextmanager +def ensure_clean_dir(): + """ + Get a temporary directory path and agrees to remove on close. + + Yields + ------ + Temporary directory path + """ + directory_name = tempfile.mkdtemp(suffix="") + try: + yield directory_name + finally: + try: + rmtree(directory_name) + except OSError: + pass + + +@contextmanager +def ensure_safe_environment_variables(): + """ + Get a context manager to safely set environment variables + + All changes will be undone on close, hence environment variables set + within this contextmanager will neither persist nor change global state. + """ + saved_environ = dict(os.environ) + try: + yield + finally: + os.environ.clear() + os.environ.update(saved_environ) + + +@contextmanager +def with_csv_dialect(name, **kwargs): + """ + Context manager to temporarily register a CSV dialect for parsing CSV. + + Parameters + ---------- + name : str + The name of the dialect. + kwargs : mapping + The parameters for the dialect. + + Raises + ------ + ValueError : the name of the dialect conflicts with a builtin one. + + See Also + -------- + csv : Python's CSV library. + """ + import csv + + _BUILTIN_DIALECTS = {"excel", "excel-tab", "unix"} + + if name in _BUILTIN_DIALECTS: + raise ValueError("Cannot override builtin dialect.") + + csv.register_dialect(name, **kwargs) + try: + yield + finally: + csv.unregister_dialect(name) + + +@contextmanager +def use_numexpr(use, min_elements=None): + from pandas.core.computation import expressions as expr + + if min_elements is None: + min_elements = expr._MIN_ELEMENTS + + olduse = expr.USE_NUMEXPR + oldmin = expr._MIN_ELEMENTS + set_option("compute.use_numexpr", use) + expr._MIN_ELEMENTS = min_elements + try: + yield + finally: + expr._MIN_ELEMENTS = oldmin + set_option("compute.use_numexpr", olduse) + + +class RNGContext: + """ + Context manager to set the numpy random number generator speed. Returns + to the original value upon exiting the context manager. + + Parameters + ---------- + seed : int + Seed for numpy.random.seed + + Examples + -------- + with RNGContext(42): + np.random.randn() + """ + + def __init__(self, seed): + self.seed = seed + + def __enter__(self): + + self.start_state = np.random.get_state() + np.random.seed(self.seed) + + def __exit__(self, exc_type, exc_value, traceback): + + np.random.set_state(self.start_state) diff --git a/.local/lib/python3.8/site-packages/pandas/_typing.py b/.local/lib/python3.8/site-packages/pandas/_typing.py new file mode 100644 index 0000000..fd099b3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_typing.py @@ -0,0 +1,297 @@ +from __future__ import annotations + +from datetime import ( + datetime, + timedelta, + tzinfo, +) +from os import PathLike +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Collection, + Dict, + Hashable, + Iterator, + List, + Literal, + Mapping, + Optional, + Protocol, + Sequence, + Tuple, + Type as type_t, + TypeVar, + Union, +) + +import numpy as np + +# To prevent import cycles place any internal imports in the branch below +# and use a string literal forward reference to it in subsequent types +# https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles +if TYPE_CHECKING: + import numpy.typing as npt + + from pandas._libs import ( + Period, + Timedelta, + Timestamp, + ) + + from pandas.core.dtypes.dtypes import ExtensionDtype + + from pandas import Interval + from pandas.core.arrays.base import ExtensionArray + from pandas.core.frame import DataFrame + from pandas.core.generic import NDFrame + from pandas.core.groupby.generic import ( + DataFrameGroupBy, + GroupBy, + SeriesGroupBy, + ) + from pandas.core.indexes.base import Index + from pandas.core.internals import ( + ArrayManager, + BlockManager, + SingleArrayManager, + SingleBlockManager, + ) + from pandas.core.resample import Resampler + from pandas.core.series import Series + from pandas.core.window.rolling import BaseWindow + + from pandas.io.formats.format import EngFormatter + from pandas.tseries.offsets import DateOffset + + # numpy compatible types + NumpyValueArrayLike = Union[npt._ScalarLike_co, npt.ArrayLike] + NumpySorter = Optional[npt._ArrayLikeInt_co] + +else: + npt: Any = None + + +# array-like + +ArrayLike = Union["ExtensionArray", np.ndarray] +AnyArrayLike = Union[ArrayLike, "Index", "Series"] + +# scalars + +PythonScalar = Union[str, int, float, bool] +DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"] +PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"] +Scalar = Union[PythonScalar, PandasScalar] +IntStrT = TypeVar("IntStrT", int, str) + + +# timestamp and timedelta convertible types + +TimestampConvertibleTypes = Union[ + "Timestamp", datetime, np.datetime64, int, np.int64, float, str +] +TimedeltaConvertibleTypes = Union[ + "Timedelta", timedelta, np.timedelta64, int, np.int64, float, str +] +Timezone = Union[str, tzinfo] + +# NDFrameT is stricter and ensures that the same subclass of NDFrame always is +# used. E.g. `def func(a: NDFrameT) -> NDFrameT: ...` means that if a +# Series is passed into a function, a Series is always returned and if a DataFrame is +# passed in, a DataFrame is always returned. +NDFrameT = TypeVar("NDFrameT", bound="NDFrame") + +Axis = Union[str, int] +IndexLabel = Union[Hashable, Sequence[Hashable]] +Level = Union[Hashable, int] +Shape = Tuple[int, ...] +Suffixes = Tuple[Optional[str], Optional[str]] +Ordered = Optional[bool] +JSONSerializable = Optional[Union[PythonScalar, List, Dict]] +Frequency = Union[str, "DateOffset"] +Axes = Collection[Any] + +RandomState = Union[ + int, + ArrayLike, + np.random.Generator, + np.random.BitGenerator, + np.random.RandomState, +] + +# dtypes +NpDtype = Union[str, np.dtype, type_t[Union[str, float, int, complex, bool, object]]] +Dtype = Union["ExtensionDtype", NpDtype] +AstypeArg = Union["ExtensionDtype", "npt.DTypeLike"] +# DtypeArg specifies all allowable dtypes in a functions its dtype argument +DtypeArg = Union[Dtype, Dict[Hashable, Dtype]] +DtypeObj = Union[np.dtype, "ExtensionDtype"] + +# For functions like rename that convert one label to another +Renamer = Union[Mapping[Hashable, Any], Callable[[Hashable], Hashable]] + +# to maintain type information across generic functions and parametrization +T = TypeVar("T") + +# used in decorators to preserve the signature of the function it decorates +# see https://mypy.readthedocs.io/en/stable/generics.html#declaring-decorators +FuncType = Callable[..., Any] +F = TypeVar("F", bound=FuncType) + +# types of vectorized key functions for DataFrame::sort_values and +# DataFrame::sort_index, among others +ValueKeyFunc = Optional[Callable[["Series"], Union["Series", AnyArrayLike]]] +IndexKeyFunc = Optional[Callable[["Index"], Union["Index", AnyArrayLike]]] + +# types of `func` kwarg for DataFrame.aggregate and Series.aggregate +AggFuncTypeBase = Union[Callable, str] +AggFuncTypeDict = Dict[Hashable, Union[AggFuncTypeBase, List[AggFuncTypeBase]]] +AggFuncType = Union[ + AggFuncTypeBase, + List[AggFuncTypeBase], + AggFuncTypeDict, +] +AggObjType = Union[ + "Series", + "DataFrame", + "GroupBy", + "SeriesGroupBy", + "DataFrameGroupBy", + "BaseWindow", + "Resampler", +] + +PythonFuncType = Callable[[Any], Any] + +# filenames and file-like-objects +AnyStr_cov = TypeVar("AnyStr_cov", str, bytes, covariant=True) +AnyStr_con = TypeVar("AnyStr_con", str, bytes, contravariant=True) + + +class BaseBuffer(Protocol): + @property + def mode(self) -> str: + # for _get_filepath_or_buffer + ... + + def fileno(self) -> int: + # for _MMapWrapper + ... + + def seek(self, __offset: int, __whence: int = ...) -> int: + # with one argument: gzip.GzipFile, bz2.BZ2File + # with two arguments: zip.ZipFile, read_sas + ... + + def seekable(self) -> bool: + # for bz2.BZ2File + ... + + def tell(self) -> int: + # for zip.ZipFile, read_stata, to_stata + ... + + +class ReadBuffer(BaseBuffer, Protocol[AnyStr_cov]): + def read(self, __n: int | None = ...) -> AnyStr_cov: + # for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File + ... + + +class WriteBuffer(BaseBuffer, Protocol[AnyStr_con]): + def write(self, __b: AnyStr_con) -> Any: + # for gzip.GzipFile, bz2.BZ2File + ... + + def flush(self) -> Any: + # for gzip.GzipFile, bz2.BZ2File + ... + + +class ReadPickleBuffer(ReadBuffer[bytes], Protocol): + def readline(self) -> AnyStr_cov: + ... + + +class WriteExcelBuffer(WriteBuffer[bytes], Protocol): + def truncate(self, size: int | None = ...) -> int: + ... + + +class ReadCsvBuffer(ReadBuffer[AnyStr_cov], Protocol): + def __iter__(self) -> Iterator[AnyStr_cov]: + # for engine=python + ... + + def readline(self) -> AnyStr_cov: + # for engine=python + ... + + @property + def closed(self) -> bool: + # for enine=pyarrow + ... + + +FilePath = Union[str, "PathLike[str]"] + +# for arbitrary kwargs passed during reading/writing files +StorageOptions = Optional[Dict[str, Any]] + + +# compression keywords and compression +CompressionDict = Dict[str, Any] +CompressionOptions = Optional[ + Union[Literal["infer", "gzip", "bz2", "zip", "xz", "zstd"], CompressionDict] +] +XMLParsers = Literal["lxml", "etree"] + + +# types in DataFrameFormatter +FormattersType = Union[ + List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable] +] +ColspaceType = Mapping[Hashable, Union[str, int]] +FloatFormatType = Union[str, Callable, "EngFormatter"] +ColspaceArgType = Union[ + str, int, Sequence[Union[str, int]], Mapping[Hashable, Union[str, int]] +] + +# Arguments for fillna() +FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"] + +# internals +Manager = Union[ + "ArrayManager", "SingleArrayManager", "BlockManager", "SingleBlockManager" +] +SingleManager = Union["SingleArrayManager", "SingleBlockManager"] +Manager2D = Union["ArrayManager", "BlockManager"] + +# indexing +# PositionalIndexer -> valid 1D positional indexer, e.g. can pass +# to ndarray.__getitem__ +# ScalarIndexer is for a single value as the index +# SequenceIndexer is for list like or slices (but not tuples) +# PositionalIndexerTuple is extends the PositionalIndexer for 2D arrays +# These are used in various __getitem__ overloads +# TODO(typing#684): add Ellipsis, see +# https://github.com/python/typing/issues/684#issuecomment-548203158 +# https://bugs.python.org/issue41810 +# Using List[int] here rather than Sequence[int] to disallow tuples. +ScalarIndexer = Union[int, np.integer] +SequenceIndexer = Union[slice, List[int], np.ndarray] +PositionalIndexer = Union[ScalarIndexer, SequenceIndexer] +PositionalIndexerTuple = Tuple[PositionalIndexer, PositionalIndexer] +PositionalIndexer2D = Union[PositionalIndexer, PositionalIndexerTuple] +if TYPE_CHECKING: + TakeIndexer = Union[Sequence[int], Sequence[np.integer], npt.NDArray[np.integer]] +else: + TakeIndexer = Any + +# Windowing rank methods +WindowingRankType = Literal["average", "min", "max"] + +# read_csv engines +CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"] diff --git a/.local/lib/python3.8/site-packages/pandas/_version.py b/.local/lib/python3.8/site-packages/pandas/_version.py new file mode 100644 index 0000000..db56e70 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/_version.py @@ -0,0 +1,21 @@ + +# This file was generated by 'versioneer.py' (0.19) from +# revision-control system data, or from the parent directory name of an +# unpacked source archive. Distribution tarballs contain a pre-generated copy +# of this file. + +import json + +version_json = ''' +{ + "date": "2022-02-12T09:04:11+0000", + "dirty": false, + "error": null, + "full-revisionid": "06d230151e6f18fdb8139d09abf539867a8cd481", + "version": "1.4.1" +} +''' # END VERSION_JSON + + +def get_versions(): + return json.loads(version_json) diff --git a/.local/lib/python3.8/site-packages/pandas/api/__init__.py b/.local/lib/python3.8/site-packages/pandas/api/__init__.py new file mode 100644 index 0000000..80202b3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/api/__init__.py @@ -0,0 +1,6 @@ +""" public toolkit API """ +from pandas.api import ( # noqa:F401 + extensions, + indexers, + types, +) diff --git a/.local/lib/python3.8/site-packages/pandas/api/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/api/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..72ecc52 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/api/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/api/extensions/__init__.py b/.local/lib/python3.8/site-packages/pandas/api/extensions/__init__.py new file mode 100644 index 0000000..ea5f1ba --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/api/extensions/__init__.py @@ -0,0 +1,33 @@ +""" +Public API for extending pandas objects. +""" + +from pandas._libs.lib import no_default + +from pandas.core.dtypes.base import ( + ExtensionDtype, + register_extension_dtype, +) + +from pandas.core.accessor import ( + register_dataframe_accessor, + register_index_accessor, + register_series_accessor, +) +from pandas.core.algorithms import take +from pandas.core.arrays import ( + ExtensionArray, + ExtensionScalarOpsMixin, +) + +__all__ = [ + "no_default", + "ExtensionDtype", + "register_extension_dtype", + "register_dataframe_accessor", + "register_index_accessor", + "register_series_accessor", + "take", + "ExtensionArray", + "ExtensionScalarOpsMixin", +] diff --git a/.local/lib/python3.8/site-packages/pandas/api/extensions/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/api/extensions/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..646b9f2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/api/extensions/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/api/indexers/__init__.py b/.local/lib/python3.8/site-packages/pandas/api/indexers/__init__.py new file mode 100644 index 0000000..78357f1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/api/indexers/__init__.py @@ -0,0 +1,17 @@ +""" +Public API for Rolling Window Indexers. +""" + +from pandas.core.indexers import check_array_indexer +from pandas.core.indexers.objects import ( + BaseIndexer, + FixedForwardWindowIndexer, + VariableOffsetWindowIndexer, +) + +__all__ = [ + "check_array_indexer", + "BaseIndexer", + "FixedForwardWindowIndexer", + "VariableOffsetWindowIndexer", +] diff --git a/.local/lib/python3.8/site-packages/pandas/api/indexers/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/api/indexers/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..c7a3c36 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/api/indexers/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/api/types/__init__.py b/.local/lib/python3.8/site-packages/pandas/api/types/__init__.py new file mode 100644 index 0000000..fb1abdd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/api/types/__init__.py @@ -0,0 +1,23 @@ +""" +Public toolkit API. +""" + +from pandas._libs.lib import infer_dtype + +from pandas.core.dtypes.api import * # noqa: F401, F403 +from pandas.core.dtypes.concat import union_categoricals +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) + +__all__ = [ + "infer_dtype", + "union_categoricals", + "CategoricalDtype", + "DatetimeTZDtype", + "IntervalDtype", + "PeriodDtype", +] diff --git a/.local/lib/python3.8/site-packages/pandas/api/types/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/api/types/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..319aa5d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/api/types/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/arrays/__init__.py b/.local/lib/python3.8/site-packages/pandas/arrays/__init__.py new file mode 100644 index 0000000..89d362e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/arrays/__init__.py @@ -0,0 +1,34 @@ +""" +All of pandas' ExtensionArrays. + +See :ref:`extending.extension-types` for more. +""" +from pandas.core.arrays import ( + ArrowStringArray, + BooleanArray, + Categorical, + DatetimeArray, + FloatingArray, + IntegerArray, + IntervalArray, + PandasArray, + PeriodArray, + SparseArray, + StringArray, + TimedeltaArray, +) + +__all__ = [ + "ArrowStringArray", + "BooleanArray", + "Categorical", + "DatetimeArray", + "FloatingArray", + "IntegerArray", + "IntervalArray", + "PandasArray", + "PeriodArray", + "SparseArray", + "StringArray", + "TimedeltaArray", +] diff --git a/.local/lib/python3.8/site-packages/pandas/arrays/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/arrays/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..aca09fb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/arrays/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/compat/__init__.py b/.local/lib/python3.8/site-packages/pandas/compat/__init__.py new file mode 100644 index 0000000..703e67b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/compat/__init__.py @@ -0,0 +1,151 @@ +""" +compat +====== + +Cross-compatible functions for different versions of Python. + +Other items: +* platform checker +""" +import os +import platform +import sys + +from pandas._typing import F +from pandas.compat.numpy import ( + is_numpy_dev, + np_version_under1p19, + np_version_under1p20, +) +from pandas.compat.pyarrow import ( + pa_version_under1p01, + pa_version_under2p0, + pa_version_under3p0, + pa_version_under4p0, +) + +PY39 = sys.version_info >= (3, 9) +PY310 = sys.version_info >= (3, 10) +PYPY = platform.python_implementation() == "PyPy" +IS64 = sys.maxsize > 2 ** 32 + + +def set_function_name(f: F, name: str, cls) -> F: + """ + Bind the name/qualname attributes of the function. + """ + f.__name__ = name + f.__qualname__ = f"{cls.__name__}.{name}" + f.__module__ = cls.__module__ + return f + + +def is_platform_little_endian() -> bool: + """ + Checking if the running platform is little endian. + + Returns + ------- + bool + True if the running platform is little endian. + """ + return sys.byteorder == "little" + + +def is_platform_windows() -> bool: + """ + Checking if the running platform is windows. + + Returns + ------- + bool + True if the running platform is windows. + """ + return sys.platform in ["win32", "cygwin"] + + +def is_platform_linux() -> bool: + """ + Checking if the running platform is linux. + + Returns + ------- + bool + True if the running platform is linux. + """ + return sys.platform == "linux" + + +def is_platform_mac() -> bool: + """ + Checking if the running platform is mac. + + Returns + ------- + bool + True if the running platform is mac. + """ + return sys.platform == "darwin" + + +def is_platform_arm() -> bool: + """ + Checking if the running platform use ARM architecture. + + Returns + ------- + bool + True if the running platform uses ARM architecture. + """ + return platform.machine() in ("arm64", "aarch64") or platform.machine().startswith( + "armv" + ) + + +def is_ci_environment() -> bool: + """ + Checking if running in a continuous integration environment by checking + the PANDAS_CI environment variable. + + Returns + ------- + bool + True if the running in a continuous integration environment. + """ + return os.environ.get("PANDAS_CI", "0") == "1" + + +def get_lzma_file(): + """ + Importing the `LZMAFile` class from the `lzma` module. + + Returns + ------- + class + The `LZMAFile` class from the `lzma` module. + + Raises + ------ + RuntimeError + If the `lzma` module was not imported correctly, or didn't exist. + """ + try: + import lzma + except ImportError: + raise RuntimeError( + "lzma module not available. " + "A Python re-install with the proper dependencies, " + "might be required to solve this issue." + ) + return lzma.LZMAFile + + +__all__ = [ + "is_numpy_dev", + "np_version_under1p19", + "np_version_under1p20", + "pa_version_under1p01", + "pa_version_under2p0", + "pa_version_under3p0", + "pa_version_under4p0", +] diff --git a/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..e0bee3b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/_optional.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/_optional.cpython-38.pyc new file mode 100644 index 0000000..20179ae Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/_optional.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/chainmap.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/chainmap.cpython-38.pyc new file mode 100644 index 0000000..f37cd85 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/chainmap.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/pickle_compat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/pickle_compat.cpython-38.pyc new file mode 100644 index 0000000..57ec7a9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/pickle_compat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/pyarrow.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/pyarrow.cpython-38.pyc new file mode 100644 index 0000000..5f9cdda Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/compat/__pycache__/pyarrow.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/compat/_optional.py b/.local/lib/python3.8/site-packages/pandas/compat/_optional.py new file mode 100644 index 0000000..a26bc94 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/compat/_optional.py @@ -0,0 +1,154 @@ +from __future__ import annotations + +import importlib +import sys +import types +import warnings + +from pandas.util.version import Version + +# Update install.rst when updating versions! + +VERSIONS = { + "bs4": "4.8.2", + "blosc": "1.20.1", + "bottleneck": "1.3.1", + "fastparquet": "0.4.0", + "fsspec": "0.7.4", + "html5lib": "1.1", + "gcsfs": "0.6.0", + "jinja2": "2.11", + "lxml.etree": "4.5.0", + "matplotlib": "3.3.2", + "numba": "0.50.1", + "numexpr": "2.7.1", + "odfpy": "1.4.1", + "openpyxl": "3.0.3", + "pandas_gbq": "0.14.0", + "psycopg2": "2.8.4", # (dt dec pq3 ext lo64) + "pymysql": "0.10.1", + "pyarrow": "1.0.1", + "pyreadstat": "1.1.0", + "pytest": "6.0", + "pyxlsb": "1.0.6", + "s3fs": "0.4.0", + "scipy": "1.4.1", + "sqlalchemy": "1.4.0", + "tables": "3.6.1", + "tabulate": "0.8.7", + "xarray": "0.15.1", + "xlrd": "2.0.1", + "xlwt": "1.3.0", + "xlsxwriter": "1.2.2", + "zstandard": "0.15.2", +} + +# A mapping from import name to package name (on PyPI) for packages where +# these two names are different. + +INSTALL_MAPPING = { + "bs4": "beautifulsoup4", + "bottleneck": "Bottleneck", + "lxml.etree": "lxml", + "odf": "odfpy", + "pandas_gbq": "pandas-gbq", + "tables": "pytables", + "sqlalchemy": "SQLAlchemy", + "jinja2": "Jinja2", +} + + +def get_version(module: types.ModuleType) -> str: + version = getattr(module, "__version__", None) + if version is None: + # xlrd uses a capitalized attribute name + version = getattr(module, "__VERSION__", None) + + if version is None: + raise ImportError(f"Can't determine version for {module.__name__}") + if module.__name__ == "psycopg2": + # psycopg2 appends " (dt dec pq3 ext lo64)" to it's version + version = version.split()[0] + return version + + +def import_optional_dependency( + name: str, + extra: str = "", + errors: str = "raise", + min_version: str | None = None, +): + """ + Import an optional dependency. + + By default, if a dependency is missing an ImportError with a nice + message will be raised. If a dependency is present, but too old, + we raise. + + Parameters + ---------- + name : str + The module name. + extra : str + Additional text to include in the ImportError message. + errors : str {'raise', 'warn', 'ignore'} + What to do when a dependency is not found or its version is too old. + + * raise : Raise an ImportError + * warn : Only applicable when a module's version is to old. + Warns that the version is too old and returns None + * ignore: If the module is not installed, return None, otherwise, + return the module, even if the version is too old. + It's expected that users validate the version locally when + using ``errors="ignore"`` (see. ``io/html.py``) + min_version : str, default None + Specify a minimum version that is different from the global pandas + minimum version required. + Returns + ------- + maybe_module : Optional[ModuleType] + The imported module, when found and the version is correct. + None is returned when the package is not found and `errors` + is False, or when the package's version is too old and `errors` + is ``'warn'``. + """ + + assert errors in {"warn", "raise", "ignore"} + + package_name = INSTALL_MAPPING.get(name) + install_name = package_name if package_name is not None else name + + msg = ( + f"Missing optional dependency '{install_name}'. {extra} " + f"Use pip or conda to install {install_name}." + ) + try: + module = importlib.import_module(name) + except ImportError: + if errors == "raise": + raise ImportError(msg) + else: + return None + + # Handle submodules: if we have submodule, grab parent module from sys.modules + parent = name.split(".")[0] + if parent != name: + install_name = parent + module_to_get = sys.modules[install_name] + else: + module_to_get = module + minimum_version = min_version if min_version is not None else VERSIONS.get(parent) + if minimum_version: + version = get_version(module_to_get) + if Version(version) < Version(minimum_version): + msg = ( + f"Pandas requires version '{minimum_version}' or newer of '{parent}' " + f"(version '{version}' currently installed)." + ) + if errors == "warn": + warnings.warn(msg, UserWarning) + return None + elif errors == "raise": + raise ImportError(msg) + + return module diff --git a/.local/lib/python3.8/site-packages/pandas/compat/chainmap.py b/.local/lib/python3.8/site-packages/pandas/compat/chainmap.py new file mode 100644 index 0000000..9af7962 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/compat/chainmap.py @@ -0,0 +1,35 @@ +from typing import ( + ChainMap, + TypeVar, +) + +_KT = TypeVar("_KT") +_VT = TypeVar("_VT") + + +class DeepChainMap(ChainMap[_KT, _VT]): + """ + Variant of ChainMap that allows direct updates to inner scopes. + + Only works when all passed mapping are mutable. + """ + + def __setitem__(self, key: _KT, value: _VT) -> None: + for mapping in self.maps: + if key in mapping: + mapping[key] = value + return + self.maps[0][key] = value + + def __delitem__(self, key: _KT) -> None: + """ + Raises + ------ + KeyError + If `key` doesn't exist. + """ + for mapping in self.maps: + if key in mapping: + del mapping[key] + return + raise KeyError(key) diff --git a/.local/lib/python3.8/site-packages/pandas/compat/numpy/__init__.py b/.local/lib/python3.8/site-packages/pandas/compat/numpy/__init__.py new file mode 100644 index 0000000..623c5fb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/compat/numpy/__init__.py @@ -0,0 +1,34 @@ +""" support numpy compatibility across versions """ +import numpy as np + +from pandas.util.version import Version + +# numpy versioning +_np_version = np.__version__ +_nlv = Version(_np_version) +np_version_under1p19 = _nlv < Version("1.19") +np_version_under1p20 = _nlv < Version("1.20") +np_version_under1p22 = _nlv < Version("1.22") +np_version_gte1p22 = _nlv >= Version("1.22") +is_numpy_dev = _nlv.dev is not None +_min_numpy_ver = "1.18.5" + +if is_numpy_dev or not np_version_under1p22: + np_percentile_argname = "method" +else: + np_percentile_argname = "interpolation" + + +if _nlv < Version(_min_numpy_ver): + raise ImportError( + f"this version of pandas is incompatible with numpy < {_min_numpy_ver}\n" + f"your numpy version is {_np_version}.\n" + f"Please upgrade numpy to >= {_min_numpy_ver} to use this pandas version" + ) + + +__all__ = [ + "np", + "_np_version", + "is_numpy_dev", +] diff --git a/.local/lib/python3.8/site-packages/pandas/compat/numpy/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/compat/numpy/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ef366e1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/compat/numpy/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/compat/numpy/__pycache__/function.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/compat/numpy/__pycache__/function.cpython-38.pyc new file mode 100644 index 0000000..8420e08 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/compat/numpy/__pycache__/function.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/compat/numpy/function.py b/.local/lib/python3.8/site-packages/pandas/compat/numpy/function.py new file mode 100644 index 0000000..cea1b80 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/compat/numpy/function.py @@ -0,0 +1,409 @@ +""" +For compatibility with numpy libraries, pandas functions or methods have to +accept '*args' and '**kwargs' parameters to accommodate numpy arguments that +are not actually used or respected in the pandas implementation. + +To ensure that users do not abuse these parameters, validation is performed in +'validators.py' to make sure that any extra parameters passed correspond ONLY +to those in the numpy signature. Part of that validation includes whether or +not the user attempted to pass in non-default values for these extraneous +parameters. As we want to discourage users from relying on these parameters +when calling the pandas implementation, we want them only to pass in the +default values for these parameters. + +This module provides a set of commonly used default arguments for functions and +methods that are spread throughout the codebase. This module will make it +easier to adjust to future upstream changes in the analogous numpy signatures. +""" +from __future__ import annotations + +from typing import Any + +from numpy import ndarray + +from pandas._libs.lib import ( + is_bool, + is_integer, +) +from pandas.errors import UnsupportedFunctionCall +from pandas.util._validators import ( + validate_args, + validate_args_and_kwargs, + validate_kwargs, +) + + +class CompatValidator: + def __init__( + self, + defaults, + fname=None, + method: str | None = None, + max_fname_arg_count=None, + ): + self.fname = fname + self.method = method + self.defaults = defaults + self.max_fname_arg_count = max_fname_arg_count + + def __call__( + self, + args, + kwargs, + fname=None, + max_fname_arg_count=None, + method: str | None = None, + ) -> None: + if args or kwargs: + fname = self.fname if fname is None else fname + max_fname_arg_count = ( + self.max_fname_arg_count + if max_fname_arg_count is None + else max_fname_arg_count + ) + method = self.method if method is None else method + + if method == "args": + validate_args(fname, args, max_fname_arg_count, self.defaults) + elif method == "kwargs": + validate_kwargs(fname, kwargs, self.defaults) + elif method == "both": + validate_args_and_kwargs( + fname, args, kwargs, max_fname_arg_count, self.defaults + ) + else: + raise ValueError(f"invalid validation method '{method}'") + + +ARGMINMAX_DEFAULTS = {"out": None} +validate_argmin = CompatValidator( + ARGMINMAX_DEFAULTS, fname="argmin", method="both", max_fname_arg_count=1 +) +validate_argmax = CompatValidator( + ARGMINMAX_DEFAULTS, fname="argmax", method="both", max_fname_arg_count=1 +) + + +def process_skipna(skipna, args): + if isinstance(skipna, ndarray) or skipna is None: + args = (skipna,) + args + skipna = True + + return skipna, args + + +def validate_argmin_with_skipna(skipna, args, kwargs): + """ + If 'Series.argmin' is called via the 'numpy' library, the third parameter + in its signature is 'out', which takes either an ndarray or 'None', so + check if the 'skipna' parameter is either an instance of ndarray or is + None, since 'skipna' itself should be a boolean + """ + skipna, args = process_skipna(skipna, args) + validate_argmin(args, kwargs) + return skipna + + +def validate_argmax_with_skipna(skipna, args, kwargs): + """ + If 'Series.argmax' is called via the 'numpy' library, the third parameter + in its signature is 'out', which takes either an ndarray or 'None', so + check if the 'skipna' parameter is either an instance of ndarray or is + None, since 'skipna' itself should be a boolean + """ + skipna, args = process_skipna(skipna, args) + validate_argmax(args, kwargs) + return skipna + + +ARGSORT_DEFAULTS: dict[str, int | str | None] = {} +ARGSORT_DEFAULTS["axis"] = -1 +ARGSORT_DEFAULTS["kind"] = "quicksort" +ARGSORT_DEFAULTS["order"] = None +ARGSORT_DEFAULTS["kind"] = None + + +validate_argsort = CompatValidator( + ARGSORT_DEFAULTS, fname="argsort", max_fname_arg_count=0, method="both" +) + +# two different signatures of argsort, this second validation for when the +# `kind` param is supported +ARGSORT_DEFAULTS_KIND: dict[str, int | None] = {} +ARGSORT_DEFAULTS_KIND["axis"] = -1 +ARGSORT_DEFAULTS_KIND["order"] = None +validate_argsort_kind = CompatValidator( + ARGSORT_DEFAULTS_KIND, fname="argsort", max_fname_arg_count=0, method="both" +) + + +def validate_argsort_with_ascending(ascending, args, kwargs): + """ + If 'Categorical.argsort' is called via the 'numpy' library, the first + parameter in its signature is 'axis', which takes either an integer or + 'None', so check if the 'ascending' parameter has either integer type or is + None, since 'ascending' itself should be a boolean + """ + if is_integer(ascending) or ascending is None: + args = (ascending,) + args + ascending = True + + validate_argsort_kind(args, kwargs, max_fname_arg_count=3) + return ascending + + +CLIP_DEFAULTS: dict[str, Any] = {"out": None} +validate_clip = CompatValidator( + CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3 +) + + +def validate_clip_with_axis(axis, args, kwargs): + """ + If 'NDFrame.clip' is called via the numpy library, the third parameter in + its signature is 'out', which can takes an ndarray, so check if the 'axis' + parameter is an instance of ndarray, since 'axis' itself should either be + an integer or None + """ + if isinstance(axis, ndarray): + args = (axis,) + args + axis = None + + validate_clip(args, kwargs) + return axis + + +CUM_FUNC_DEFAULTS: dict[str, Any] = {} +CUM_FUNC_DEFAULTS["dtype"] = None +CUM_FUNC_DEFAULTS["out"] = None +validate_cum_func = CompatValidator( + CUM_FUNC_DEFAULTS, method="both", max_fname_arg_count=1 +) +validate_cumsum = CompatValidator( + CUM_FUNC_DEFAULTS, fname="cumsum", method="both", max_fname_arg_count=1 +) + + +def validate_cum_func_with_skipna(skipna, args, kwargs, name): + """ + If this function is called via the 'numpy' library, the third parameter in + its signature is 'dtype', which takes either a 'numpy' dtype or 'None', so + check if the 'skipna' parameter is a boolean or not + """ + if not is_bool(skipna): + args = (skipna,) + args + skipna = True + + validate_cum_func(args, kwargs, fname=name) + return skipna + + +ALLANY_DEFAULTS: dict[str, bool | None] = {} +ALLANY_DEFAULTS["dtype"] = None +ALLANY_DEFAULTS["out"] = None +ALLANY_DEFAULTS["keepdims"] = False +ALLANY_DEFAULTS["axis"] = None +validate_all = CompatValidator( + ALLANY_DEFAULTS, fname="all", method="both", max_fname_arg_count=1 +) +validate_any = CompatValidator( + ALLANY_DEFAULTS, fname="any", method="both", max_fname_arg_count=1 +) + +LOGICAL_FUNC_DEFAULTS = {"out": None, "keepdims": False} +validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs") + +MINMAX_DEFAULTS = {"axis": None, "out": None, "keepdims": False} +validate_min = CompatValidator( + MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1 +) +validate_max = CompatValidator( + MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1 +) + +RESHAPE_DEFAULTS: dict[str, str] = {"order": "C"} +validate_reshape = CompatValidator( + RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1 +) + +REPEAT_DEFAULTS: dict[str, Any] = {"axis": None} +validate_repeat = CompatValidator( + REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1 +) + +ROUND_DEFAULTS: dict[str, Any] = {"out": None} +validate_round = CompatValidator( + ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1 +) + +SORT_DEFAULTS: dict[str, int | str | None] = {} +SORT_DEFAULTS["axis"] = -1 +SORT_DEFAULTS["kind"] = "quicksort" +SORT_DEFAULTS["order"] = None +validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs") + +STAT_FUNC_DEFAULTS: dict[str, Any | None] = {} +STAT_FUNC_DEFAULTS["dtype"] = None +STAT_FUNC_DEFAULTS["out"] = None + +SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy() +SUM_DEFAULTS["axis"] = None +SUM_DEFAULTS["keepdims"] = False +SUM_DEFAULTS["initial"] = None + +PROD_DEFAULTS = STAT_FUNC_DEFAULTS.copy() +PROD_DEFAULTS["axis"] = None +PROD_DEFAULTS["keepdims"] = False +PROD_DEFAULTS["initial"] = None + +MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy() +MEDIAN_DEFAULTS["overwrite_input"] = False +MEDIAN_DEFAULTS["keepdims"] = False + +STAT_FUNC_DEFAULTS["keepdims"] = False + +validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS, method="kwargs") +validate_sum = CompatValidator( + SUM_DEFAULTS, fname="sum", method="both", max_fname_arg_count=1 +) +validate_prod = CompatValidator( + PROD_DEFAULTS, fname="prod", method="both", max_fname_arg_count=1 +) +validate_mean = CompatValidator( + STAT_FUNC_DEFAULTS, fname="mean", method="both", max_fname_arg_count=1 +) +validate_median = CompatValidator( + MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1 +) + +STAT_DDOF_FUNC_DEFAULTS: dict[str, bool | None] = {} +STAT_DDOF_FUNC_DEFAULTS["dtype"] = None +STAT_DDOF_FUNC_DEFAULTS["out"] = None +STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False +validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs") + +TAKE_DEFAULTS: dict[str, str | None] = {} +TAKE_DEFAULTS["out"] = None +TAKE_DEFAULTS["mode"] = "raise" +validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs") + + +def validate_take_with_convert(convert, args, kwargs): + """ + If this function is called via the 'numpy' library, the third parameter in + its signature is 'axis', which takes either an ndarray or 'None', so check + if the 'convert' parameter is either an instance of ndarray or is None + """ + if isinstance(convert, ndarray) or convert is None: + args = (convert,) + args + convert = True + + validate_take(args, kwargs, max_fname_arg_count=3, method="both") + return convert + + +TRANSPOSE_DEFAULTS = {"axes": None} +validate_transpose = CompatValidator( + TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0 +) + + +def validate_window_func(name, args, kwargs) -> None: + numpy_args = ("axis", "dtype", "out") + msg = ( + f"numpy operations are not valid with window objects. " + f"Use .{name}() directly instead " + ) + + if len(args) > 0: + raise UnsupportedFunctionCall(msg) + + for arg in numpy_args: + if arg in kwargs: + raise UnsupportedFunctionCall(msg) + + +def validate_rolling_func(name, args, kwargs) -> None: + numpy_args = ("axis", "dtype", "out") + msg = ( + f"numpy operations are not valid with window objects. " + f"Use .rolling(...).{name}() instead " + ) + + if len(args) > 0: + raise UnsupportedFunctionCall(msg) + + for arg in numpy_args: + if arg in kwargs: + raise UnsupportedFunctionCall(msg) + + +def validate_expanding_func(name, args, kwargs) -> None: + numpy_args = ("axis", "dtype", "out") + msg = ( + f"numpy operations are not valid with window objects. " + f"Use .expanding(...).{name}() instead " + ) + + if len(args) > 0: + raise UnsupportedFunctionCall(msg) + + for arg in numpy_args: + if arg in kwargs: + raise UnsupportedFunctionCall(msg) + + +def validate_groupby_func(name, args, kwargs, allowed=None) -> None: + """ + 'args' and 'kwargs' should be empty, except for allowed kwargs because all + of their necessary parameters are explicitly listed in the function + signature + """ + if allowed is None: + allowed = [] + + kwargs = set(kwargs) - set(allowed) + + if len(args) + len(kwargs) > 0: + raise UnsupportedFunctionCall( + "numpy operations are not valid with groupby. " + f"Use .groupby(...).{name}() instead" + ) + + +RESAMPLER_NUMPY_OPS = ("min", "max", "sum", "prod", "mean", "std", "var") + + +def validate_resampler_func(method: str, args, kwargs) -> None: + """ + 'args' and 'kwargs' should be empty because all of their necessary + parameters are explicitly listed in the function signature + """ + if len(args) + len(kwargs) > 0: + if method in RESAMPLER_NUMPY_OPS: + raise UnsupportedFunctionCall( + "numpy operations are not valid with resample. " + f"Use .resample(...).{method}() instead" + ) + else: + raise TypeError("too many arguments passed in") + + +def validate_minmax_axis(axis: int | None, ndim: int = 1) -> None: + """ + Ensure that the axis argument passed to min, max, argmin, or argmax is zero + or None, as otherwise it will be incorrectly ignored. + + Parameters + ---------- + axis : int or None + ndim : int, default 1 + + Raises + ------ + ValueError + """ + if axis is None: + return + if axis >= ndim or (axis < 0 and ndim + axis < 0): + raise ValueError(f"`axis` must be fewer than the number of dimensions ({ndim})") diff --git a/.local/lib/python3.8/site-packages/pandas/compat/pickle_compat.py b/.local/lib/python3.8/site-packages/pandas/compat/pickle_compat.py new file mode 100644 index 0000000..2333324 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/compat/pickle_compat.py @@ -0,0 +1,303 @@ +""" +Support pre-0.12 series pickle compatibility. +""" +from __future__ import annotations + +import contextlib +import copy +import io +import pickle as pkl +from typing import TYPE_CHECKING +import warnings + +import numpy as np + +from pandas._libs.arrays import NDArrayBacked +from pandas._libs.tslibs import BaseOffset + +from pandas import Index +from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.internals import BlockManager + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + + +def load_reduce(self): + stack = self.stack + args = stack.pop() + func = stack[-1] + + try: + stack[-1] = func(*args) + return + except TypeError as err: + + # If we have a deprecated function, + # try to replace and try again. + + msg = "_reconstruct: First argument must be a sub-type of ndarray" + + if msg in str(err): + try: + cls = args[0] + stack[-1] = object.__new__(cls) + return + except TypeError: + pass + elif args and isinstance(args[0], type) and issubclass(args[0], BaseOffset): + # TypeError: object.__new__(Day) is not safe, use Day.__new__() + cls = args[0] + stack[-1] = cls.__new__(*args) + return + elif args and issubclass(args[0], PeriodArray): + cls = args[0] + stack[-1] = NDArrayBacked.__new__(*args) + return + + raise + + +_sparse_msg = """\ + +Loading a saved '{cls}' as a {new} with sparse values. +'{cls}' is now removed. You should re-save this dataset in its new format. +""" + + +class _LoadSparseSeries: + # To load a SparseSeries as a Series[Sparse] + + # https://github.com/python/mypy/issues/1020 + # error: Incompatible return type for "__new__" (returns "Series", but must return + # a subtype of "_LoadSparseSeries") + def __new__(cls) -> Series: # type: ignore[misc] + from pandas import Series + + warnings.warn( + _sparse_msg.format(cls="SparseSeries", new="Series"), + FutureWarning, + stacklevel=6, + ) + + return Series(dtype=object) + + +class _LoadSparseFrame: + # To load a SparseDataFrame as a DataFrame[Sparse] + + # https://github.com/python/mypy/issues/1020 + # error: Incompatible return type for "__new__" (returns "DataFrame", but must + # return a subtype of "_LoadSparseFrame") + def __new__(cls) -> DataFrame: # type: ignore[misc] + from pandas import DataFrame + + warnings.warn( + _sparse_msg.format(cls="SparseDataFrame", new="DataFrame"), + FutureWarning, + stacklevel=6, + ) + + return DataFrame() + + +# If classes are moved, provide compat here. +_class_locations_map = { + ("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"), + # 15477 + ("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"), + ("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"), + ("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"), + # 10890 + ("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"), + ("pandas.sparse.series", "SparseTimeSeries"): ( + "pandas.core.sparse.series", + "SparseSeries", + ), + # 12588, extensions moving + ("pandas._sparse", "BlockIndex"): ("pandas._libs.sparse", "BlockIndex"), + ("pandas.tslib", "Timestamp"): ("pandas._libs.tslib", "Timestamp"), + # 18543 moving period + ("pandas._period", "Period"): ("pandas._libs.tslibs.period", "Period"), + ("pandas._libs.period", "Period"): ("pandas._libs.tslibs.period", "Period"), + # 18014 moved __nat_unpickle from _libs.tslib-->_libs.tslibs.nattype + ("pandas.tslib", "__nat_unpickle"): ( + "pandas._libs.tslibs.nattype", + "__nat_unpickle", + ), + ("pandas._libs.tslib", "__nat_unpickle"): ( + "pandas._libs.tslibs.nattype", + "__nat_unpickle", + ), + # 15998 top-level dirs moving + ("pandas.sparse.array", "SparseArray"): ( + "pandas.core.arrays.sparse", + "SparseArray", + ), + ("pandas.sparse.series", "SparseSeries"): ( + "pandas.compat.pickle_compat", + "_LoadSparseSeries", + ), + ("pandas.sparse.frame", "SparseDataFrame"): ( + "pandas.core.sparse.frame", + "_LoadSparseFrame", + ), + ("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"), + ("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"), + ("pandas.indexes.numeric", "Int64Index"): ( + "pandas.core.indexes.numeric", + "Int64Index", + ), + ("pandas.indexes.range", "RangeIndex"): ("pandas.core.indexes.range", "RangeIndex"), + ("pandas.indexes.multi", "MultiIndex"): ("pandas.core.indexes.multi", "MultiIndex"), + ("pandas.tseries.index", "_new_DatetimeIndex"): ( + "pandas.core.indexes.datetimes", + "_new_DatetimeIndex", + ), + ("pandas.tseries.index", "DatetimeIndex"): ( + "pandas.core.indexes.datetimes", + "DatetimeIndex", + ), + ("pandas.tseries.period", "PeriodIndex"): ( + "pandas.core.indexes.period", + "PeriodIndex", + ), + # 19269, arrays moving + ("pandas.core.categorical", "Categorical"): ("pandas.core.arrays", "Categorical"), + # 19939, add timedeltaindex, float64index compat from 15998 move + ("pandas.tseries.tdi", "TimedeltaIndex"): ( + "pandas.core.indexes.timedeltas", + "TimedeltaIndex", + ), + ("pandas.indexes.numeric", "Float64Index"): ( + "pandas.core.indexes.numeric", + "Float64Index", + ), + ("pandas.core.sparse.series", "SparseSeries"): ( + "pandas.compat.pickle_compat", + "_LoadSparseSeries", + ), + ("pandas.core.sparse.frame", "SparseDataFrame"): ( + "pandas.compat.pickle_compat", + "_LoadSparseFrame", + ), +} + + +# our Unpickler sub-class to override methods and some dispatcher +# functions for compat and uses a non-public class of the pickle module. + + +class Unpickler(pkl._Unpickler): + def find_class(self, module, name): + # override superclass + key = (module, name) + module, name = _class_locations_map.get(key, key) + return super().find_class(module, name) + + +Unpickler.dispatch = copy.copy(Unpickler.dispatch) +Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce + + +def load_newobj(self): + args = self.stack.pop() + cls = self.stack[-1] + + # compat + if issubclass(cls, Index): + obj = object.__new__(cls) + elif issubclass(cls, DatetimeArray) and not args: + arr = np.array([], dtype="M8[ns]") + obj = cls.__new__(cls, arr, arr.dtype) + elif issubclass(cls, TimedeltaArray) and not args: + arr = np.array([], dtype="m8[ns]") + obj = cls.__new__(cls, arr, arr.dtype) + elif cls is BlockManager and not args: + obj = cls.__new__(cls, (), [], False) + else: + obj = cls.__new__(cls, *args) + + self.stack[-1] = obj + + +Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj + + +def load_newobj_ex(self): + kwargs = self.stack.pop() + args = self.stack.pop() + cls = self.stack.pop() + + # compat + if issubclass(cls, Index): + obj = object.__new__(cls) + else: + obj = cls.__new__(cls, *args, **kwargs) + self.append(obj) + + +try: + Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex +except (AttributeError, KeyError): + pass + + +def load(fh, encoding: str | None = None, is_verbose: bool = False): + """ + Load a pickle, with a provided encoding, + + Parameters + ---------- + fh : a filelike object + encoding : an optional encoding + is_verbose : show exception output + """ + try: + fh.seek(0) + if encoding is not None: + up = Unpickler(fh, encoding=encoding) + else: + up = Unpickler(fh) + # "Unpickler" has no attribute "is_verbose" [attr-defined] + up.is_verbose = is_verbose # type: ignore[attr-defined] + + return up.load() + except (ValueError, TypeError): + raise + + +def loads( + bytes_object: bytes, + *, + fix_imports: bool = True, + encoding: str = "ASCII", + errors: str = "strict", +): + """ + Analogous to pickle._loads. + """ + fd = io.BytesIO(bytes_object) + return Unpickler( + fd, fix_imports=fix_imports, encoding=encoding, errors=errors + ).load() + + +@contextlib.contextmanager +def patch_pickle(): + """ + Temporarily patch pickle to use our unpickler. + """ + orig_loads = pkl.loads + try: + setattr(pkl, "loads", loads) + yield + finally: + setattr(pkl, "loads", orig_loads) diff --git a/.local/lib/python3.8/site-packages/pandas/compat/pyarrow.py b/.local/lib/python3.8/site-packages/pandas/compat/pyarrow.py new file mode 100644 index 0000000..e6ac0c5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/compat/pyarrow.py @@ -0,0 +1,22 @@ +""" support pyarrow compatibility across versions """ + +from pandas.util.version import Version + +try: + import pyarrow as pa + + _pa_version = pa.__version__ + _palv = Version(_pa_version) + pa_version_under1p01 = _palv < Version("1.0.1") + pa_version_under2p0 = _palv < Version("2.0.0") + pa_version_under3p0 = _palv < Version("3.0.0") + pa_version_under4p0 = _palv < Version("4.0.0") + pa_version_under5p0 = _palv < Version("5.0.0") + pa_version_under6p0 = _palv < Version("6.0.0") +except ImportError: + pa_version_under1p01 = True + pa_version_under2p0 = True + pa_version_under3p0 = True + pa_version_under4p0 = True + pa_version_under5p0 = True + pa_version_under6p0 = True diff --git a/.local/lib/python3.8/site-packages/pandas/conftest.py b/.local/lib/python3.8/site-packages/pandas/conftest.py new file mode 100644 index 0000000..958df72 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/conftest.py @@ -0,0 +1,1775 @@ +""" +This file is very long and growing, but it was decided to not split it yet, as +it's still manageable (2020-03-17, ~1.1k LoC). See gh-31989 + +Instead of splitting it was decided to define sections here: +- Configuration / Settings +- Autouse fixtures +- Common arguments +- Missing values & co. +- Classes +- Indices +- Series' +- DataFrames +- Operators & Operations +- Data sets/files +- Time zones +- Dtypes +- Misc +""" +# pyright: reportUntypedFunctionDecorator = false + +from collections import abc +from datetime import ( + date, + datetime, + time, + timedelta, + timezone, +) +from decimal import Decimal +import operator +import os + +from dateutil.tz import ( + tzlocal, + tzutc, +) +import hypothesis +from hypothesis import strategies as st +import numpy as np +import pytest +from pytz import ( + FixedOffset, + utc, +) + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + IntervalDtype, +) + +import pandas as pd +from pandas import ( + DataFrame, + Interval, + Period, + Series, + Timedelta, + Timestamp, +) +import pandas._testing as tm +from pandas.core import ops +from pandas.core.indexes.api import ( + Index, + MultiIndex, +) + +try: + import pyarrow as pa +except ImportError: + has_pyarrow = False +else: + del pa + has_pyarrow = True + +# Until https://github.com/numpy/numpy/issues/19078 is sorted out, just suppress +suppress_npdev_promotion_warning = pytest.mark.filterwarnings( + "ignore:Promotion of numbers and bools:FutureWarning" +) + +# ---------------------------------------------------------------- +# Configuration / Settings +# ---------------------------------------------------------------- +# pytest + + +def pytest_addoption(parser): + parser.addoption("--skip-slow", action="store_true", help="skip slow tests") + parser.addoption("--skip-network", action="store_true", help="skip network tests") + parser.addoption("--skip-db", action="store_true", help="skip db tests") + parser.addoption( + "--run-high-memory", action="store_true", help="run high memory tests" + ) + parser.addoption("--only-slow", action="store_true", help="run only slow tests") + parser.addoption( + "--strict-data-files", + action="store_true", + help="Fail if a test is skipped for missing data file.", + ) + + +def pytest_collection_modifyitems(items, config): + skip_slow = config.getoption("--skip-slow") + only_slow = config.getoption("--only-slow") + skip_network = config.getoption("--skip-network") + skip_db = config.getoption("--skip-db") + run_high_memory = config.getoption("--run-high-memory") + + marks = [ + (pytest.mark.slow, "slow", skip_slow, "--skip-slow"), + (pytest.mark.network, "network", skip_network, "--network"), + (pytest.mark.db, "db", skip_db, "--skip-db"), + ] + + for item in items: + if config.getoption("--doctest-modules") or config.getoption( + "--doctest-cython", default=False + ): + # autouse=True for the add_doctest_imports can lead to expensive teardowns + # since doctest_namespace is a session fixture + item.add_marker(pytest.mark.usefixtures("add_doctest_imports")) + # mark all tests in the pandas/tests/frame directory with "arraymanager" + if "/frame/" in item.nodeid: + item.add_marker(pytest.mark.arraymanager) + item.add_marker(suppress_npdev_promotion_warning) + + for (mark, kwd, skip_if_found, arg_name) in marks: + if kwd in item.keywords: + # If we're skipping, no need to actually add the marker or look for + # other markers + if skip_if_found: + item.add_marker(pytest.mark.skip(f"skipping due to {arg_name}")) + break + + item.add_marker(mark) + + if only_slow and "slow" not in item.keywords: + item.add_marker(pytest.mark.skip("skipping due to --only-slow")) + + if "high_memory" in item.keywords and not run_high_memory: + item.add_marker( + pytest.mark.skip( + "skipping high memory test since --run-high-memory was not set" + ) + ) + + +# Hypothesis +hypothesis.settings.register_profile( + "ci", + # Hypothesis timing checks are tuned for scalars by default, so we bump + # them from 200ms to 500ms per test case as the global default. If this + # is too short for a specific test, (a) try to make it faster, and (b) + # if it really is slow add `@settings(deadline=...)` with a working value, + # or `deadline=None` to entirely disable timeouts for that test. + # 2022-02-09: Changed deadline from 500 -> None. Deadline leads to + # non-actionable, flaky CI failures (# GH 24641, 44969, 45118, 44969) + deadline=None, + suppress_health_check=(hypothesis.HealthCheck.too_slow,), +) +hypothesis.settings.load_profile("ci") + +# Registering these strategies makes them globally available via st.from_type, +# which is use for offsets in tests/tseries/offsets/test_offsets_properties.py +for name in "MonthBegin MonthEnd BMonthBegin BMonthEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, st.builds(cls, n=st.integers(-99, 99), normalize=st.booleans()) + ) + +for name in "YearBegin YearEnd BYearBegin BYearEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, + st.builds( + cls, + n=st.integers(-5, 5), + normalize=st.booleans(), + month=st.integers(min_value=1, max_value=12), + ), + ) + +for name in "QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, + st.builds( + cls, + n=st.integers(-24, 24), + normalize=st.booleans(), + startingMonth=st.integers(min_value=1, max_value=12), + ), + ) + + +@pytest.fixture +def add_doctest_imports(doctest_namespace): + """ + Make `np` and `pd` names available for doctests. + """ + doctest_namespace["np"] = np + doctest_namespace["pd"] = pd + + +# ---------------------------------------------------------------- +# Autouse fixtures +# ---------------------------------------------------------------- +@pytest.fixture(autouse=True) +def configure_tests(): + """ + Configure settings for all tests and test modules. + """ + pd.set_option("chained_assignment", "raise") + + +# ---------------------------------------------------------------- +# Common arguments +# ---------------------------------------------------------------- +@pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: f"axis={repr(x)}") +def axis(request): + """ + Fixture for returning the axis numbers of a DataFrame. + """ + return request.param + + +axis_frame = axis + + +@pytest.fixture(params=[True, False, None]) +def observed(request): + """ + Pass in the observed keyword to groupby for [True, False] + This indicates whether categoricals should return values for + values which are not in the grouper [False / None], or only values which + appear in the grouper [True]. [None] is supported for future compatibility + if we decide to change the default (and would need to warn if this + parameter is not passed). + """ + return request.param + + +@pytest.fixture(params=[True, False, None]) +def ordered(request): + """ + Boolean 'ordered' parameter for Categorical. + """ + return request.param + + +@pytest.fixture(params=["first", "last", False]) +def keep(request): + """ + Valid values for the 'keep' parameter used in + .duplicated or .drop_duplicates + """ + return request.param + + +@pytest.fixture(params=["both", "neither", "left", "right"]) +def inclusive_endpoints_fixture(request): + """ + Fixture for trying all interval 'inclusive' parameters. + """ + return request.param + + +@pytest.fixture(params=["left", "right", "both", "neither"]) +def closed(request): + """ + Fixture for trying all interval closed parameters. + """ + return request.param + + +@pytest.fixture(params=["left", "right", "both", "neither"]) +def other_closed(request): + """ + Secondary closed fixture to allow parametrizing over all pairs of closed. + """ + return request.param + + +@pytest.fixture( + params=[ + None, + "gzip", + "bz2", + "zip", + "xz", + pytest.param("zstd", marks=td.skip_if_no("zstandard")), + ] +) +def compression(request): + """ + Fixture for trying common compression types in compression tests. + """ + return request.param + + +@pytest.fixture( + params=[ + "gzip", + "bz2", + "zip", + "xz", + pytest.param("zstd", marks=td.skip_if_no("zstandard")), + ] +) +def compression_only(request): + """ + Fixture for trying common compression types in compression tests excluding + uncompressed case. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def writable(request): + """ + Fixture that an array is writable. + """ + return request.param + + +@pytest.fixture(params=["inner", "outer", "left", "right"]) +def join_type(request): + """ + Fixture for trying all types of join operations. + """ + return request.param + + +@pytest.fixture(params=["nlargest", "nsmallest"]) +def nselect_method(request): + """ + Fixture for trying all nselect methods. + """ + return request.param + + +# ---------------------------------------------------------------- +# Missing values & co. +# ---------------------------------------------------------------- +@pytest.fixture(params=tm.NULL_OBJECTS, ids=lambda x: type(x).__name__) +def nulls_fixture(request): + """ + Fixture for each null type in pandas. + """ + return request.param + + +nulls_fixture2 = nulls_fixture # Generate cartesian product of nulls_fixture + + +@pytest.fixture(params=[None, np.nan, pd.NaT]) +def unique_nulls_fixture(request): + """ + Fixture for each null type in pandas, each null type exactly once. + """ + return request.param + + +# Generate cartesian product of unique_nulls_fixture: +unique_nulls_fixture2 = unique_nulls_fixture + + +@pytest.fixture(params=tm.NP_NAT_OBJECTS, ids=lambda x: type(x).__name__) +def np_nat_fixture(request): + """ + Fixture for each NaT type in numpy. + """ + return request.param + + +# Generate cartesian product of np_nat_fixture: +np_nat_fixture2 = np_nat_fixture + + +# ---------------------------------------------------------------- +# Classes +# ---------------------------------------------------------------- + + +@pytest.fixture(params=[DataFrame, Series]) +def frame_or_series(request): + """ + Fixture to parametrize over DataFrame and Series. + """ + return request.param + + +# error: List item 0 has incompatible type "Type[Index]"; expected "Type[IndexOpsMixin]" +@pytest.fixture( + params=[Index, Series], ids=["index", "series"] # type: ignore[list-item] +) +def index_or_series(request): + """ + Fixture to parametrize over Index and Series, made necessary by a mypy + bug, giving an error: + + List item 0 has incompatible type "Type[Series]"; expected "Type[PandasObject]" + + See GH#29725 + """ + return request.param + + +# Generate cartesian product of index_or_series fixture: +index_or_series2 = index_or_series + + +@pytest.fixture(params=[Index, Series, pd.array], ids=["index", "series", "array"]) +def index_or_series_or_array(request): + """ + Fixture to parametrize over Index, Series, and ExtensionArray + """ + return request.param + + +@pytest.fixture(params=[Index, Series, DataFrame, pd.array], ids=lambda x: x.__name__) +def box_with_array(request): + """ + Fixture to test behavior for Index, Series, DataFrame, and pandas Array + classes + """ + return request.param + + +box_with_array2 = box_with_array + + +@pytest.fixture +def dict_subclass(): + """ + Fixture for a dictionary subclass. + """ + + class TestSubDict(dict): + def __init__(self, *args, **kwargs): + dict.__init__(self, *args, **kwargs) + + return TestSubDict + + +@pytest.fixture +def non_dict_mapping_subclass(): + """ + Fixture for a non-mapping dictionary subclass. + """ + + class TestNonDictMapping(abc.Mapping): + def __init__(self, underlying_dict): + self._data = underlying_dict + + def __getitem__(self, key): + return self._data.__getitem__(key) + + def __iter__(self): + return self._data.__iter__() + + def __len__(self): + return self._data.__len__() + + return TestNonDictMapping + + +# ---------------------------------------------------------------- +# Indices +# ---------------------------------------------------------------- +@pytest.fixture +def multiindex_year_month_day_dataframe_random_data(): + """ + DataFrame with 3 level MultiIndex (year, month, day) covering + first 100 business days from 2000-01-01 with random data + """ + tdf = tm.makeTimeDataFrame(100) + ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() + # use Int64Index, to make sure things work + ymd.index = ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels]) + ymd.index.set_names(["year", "month", "day"], inplace=True) + return ymd + + +@pytest.fixture +def lexsorted_two_level_string_multiindex(): + """ + 2-level MultiIndex, lexsorted, with string names. + """ + return MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + + +@pytest.fixture +def multiindex_dataframe_random_data(lexsorted_two_level_string_multiindex): + """DataFrame with 2 level MultiIndex with random data""" + index = lexsorted_two_level_string_multiindex + return DataFrame( + np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp") + ) + + +def _create_multiindex(): + """ + MultiIndex used to test the general functionality of this object + """ + + # See Also: tests.multi.conftest.idx + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + index_names = ["first", "second"] + return MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=index_names, + verify_integrity=False, + ) + + +def _create_mi_with_dt64tz_level(): + """ + MultiIndex with a level that is a tzaware DatetimeIndex. + """ + # GH#8367 round trip with pickle + return MultiIndex.from_product( + [[1, 2], ["a", "b"], pd.date_range("20130101", periods=3, tz="US/Eastern")], + names=["one", "two", "three"], + ) + + +indices_dict = { + "unicode": tm.makeUnicodeIndex(100), + "string": tm.makeStringIndex(100), + "datetime": tm.makeDateIndex(100), + "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"), + "period": tm.makePeriodIndex(100), + "timedelta": tm.makeTimedeltaIndex(100), + "int": tm.makeIntIndex(100), + "uint": tm.makeUIntIndex(100), + "range": tm.makeRangeIndex(100), + "float": tm.makeFloatIndex(100), + "num_int64": tm.makeNumericIndex(100, dtype="int64"), + "num_int32": tm.makeNumericIndex(100, dtype="int32"), + "num_int16": tm.makeNumericIndex(100, dtype="int16"), + "num_int8": tm.makeNumericIndex(100, dtype="int8"), + "num_uint64": tm.makeNumericIndex(100, dtype="uint64"), + "num_uint32": tm.makeNumericIndex(100, dtype="uint32"), + "num_uint16": tm.makeNumericIndex(100, dtype="uint16"), + "num_uint8": tm.makeNumericIndex(100, dtype="uint8"), + "num_float64": tm.makeNumericIndex(100, dtype="float64"), + "num_float32": tm.makeNumericIndex(100, dtype="float32"), + "bool": tm.makeBoolIndex(10), + "categorical": tm.makeCategoricalIndex(100), + "interval": tm.makeIntervalIndex(100), + "empty": Index([]), + "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), + "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(), + "multi": _create_multiindex(), + "repeats": Index([0, 0, 1, 1, 2, 2]), + "nullable_int": Index(np.arange(100), dtype="Int64"), + "nullable_uint": Index(np.arange(100), dtype="UInt16"), + "nullable_float": Index(np.arange(100), dtype="Float32"), + "nullable_bool": Index(np.arange(100).astype(bool), dtype="boolean"), + "string-python": Index(pd.array(tm.makeStringIndex(100), dtype="string[python]")), +} +if has_pyarrow: + idx = Index(pd.array(tm.makeStringIndex(100), dtype="string[pyarrow]")) + indices_dict["string-pyarrow"] = idx + + +@pytest.fixture(params=indices_dict.keys()) +def index(request): + """ + Fixture for many "simple" kinds of indices. + + These indices are unlikely to cover corner cases, e.g. + - no names + - no NaTs/NaNs + - no values near implementation bounds + - ... + """ + # copy to avoid mutation, e.g. setting .name + return indices_dict[request.param].copy() + + +# Needed to generate cartesian product of indices +index_fixture2 = index + + +@pytest.fixture( + params=[ + key for key in indices_dict if not isinstance(indices_dict[key], MultiIndex) + ] +) +def index_flat(request): + """ + index fixture, but excluding MultiIndex cases. + """ + key = request.param + return indices_dict[key].copy() + + +# Alias so we can test with cartesian product of index_flat +index_flat2 = index_flat + + +@pytest.fixture( + params=[ + key + for key in indices_dict + if not isinstance(indices_dict[key], MultiIndex) and indices_dict[key].is_unique + ] +) +def index_flat_unique(request): + """ + index_flat with uniqueness requirement. + """ + key = request.param + return indices_dict[key].copy() + + +@pytest.fixture( + params=[ + key + for key in indices_dict + if not ( + key in ["int", "uint", "range", "empty", "repeats"] + or key.startswith("num_") + ) + and not isinstance(indices_dict[key], MultiIndex) + ] +) +def index_with_missing(request): + """ + Fixture for indices with missing values. + + Integer-dtype and empty cases are excluded because they cannot hold missing + values. + + MultiIndex is excluded because isna() is not defined for MultiIndex. + """ + + # GH 35538. Use deep copy to avoid illusive bug on np-dev + # Azure pipeline that writes into indices_dict despite copy + ind = indices_dict[request.param].copy(deep=True) + vals = ind.values + if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]: + # For setting missing values in the top level of MultiIndex + vals = ind.tolist() + vals[0] = (None,) + vals[0][1:] + vals[-1] = (None,) + vals[-1][1:] + return MultiIndex.from_tuples(vals) + else: + vals[0] = None + vals[-1] = None + return type(ind)(vals) + + +# ---------------------------------------------------------------- +# Series' +# ---------------------------------------------------------------- +@pytest.fixture +def string_series(): + """ + Fixture for Series of floats with Index of unique strings + """ + s = tm.makeStringSeries() + s.name = "series" + return s + + +@pytest.fixture +def object_series(): + """ + Fixture for Series of dtype object with Index of unique strings + """ + s = tm.makeObjectSeries() + s.name = "objects" + return s + + +@pytest.fixture +def datetime_series(): + """ + Fixture for Series of floats with DatetimeIndex + """ + s = tm.makeTimeSeries() + s.name = "ts" + return s + + +def _create_series(index): + """Helper for the _series dict""" + size = len(index) + data = np.random.randn(size) + return Series(data, index=index, name="a") + + +_series = { + f"series-with-{index_id}-index": _create_series(index) + for index_id, index in indices_dict.items() +} + + +@pytest.fixture +def series_with_simple_index(index): + """ + Fixture for tests on series with changing types of indices. + """ + return _create_series(index) + + +@pytest.fixture +def series_with_multilevel_index(): + """ + Fixture with a Series with a 2-level MultiIndex. + """ + arrays = [ + ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = zip(*arrays) + index = MultiIndex.from_tuples(tuples) + data = np.random.randn(8) + ser = Series(data, index=index) + ser[3] = np.NaN + return ser + + +_narrow_series = { + f"{dtype.__name__}-series": tm.makeFloatSeries(name="a").astype(dtype) + for dtype in tm.NARROW_NP_DTYPES +} + + +_index_or_series_objs = {**indices_dict, **_series, **_narrow_series} + + +@pytest.fixture(params=_index_or_series_objs.keys()) +def index_or_series_obj(request): + """ + Fixture for tests on indexes, series and series with a narrow dtype + copy to avoid mutation, e.g. setting .name + """ + return _index_or_series_objs[request.param].copy(deep=True) + + +# ---------------------------------------------------------------- +# DataFrames +# ---------------------------------------------------------------- +@pytest.fixture +def int_frame(): + """ + Fixture for DataFrame of ints with index of unique strings + + Columns are ['A', 'B', 'C', 'D'] + + A B C D + vpBeWjM651 1 0 1 0 + 5JyxmrP1En -1 0 0 0 + qEDaoD49U2 -1 1 0 0 + m66TkTfsFe 0 0 0 0 + EHPaNzEUFm -1 0 -1 0 + fpRJCevQhi 2 0 0 0 + OlQvnmfi3Q 0 0 -2 0 + ... .. .. .. .. + uB1FPlz4uP 0 0 0 1 + EcSe6yNzCU 0 0 -1 0 + L50VudaiI8 -1 1 -2 0 + y3bpw4nwIp 0 -1 0 0 + H0RdLLwrCT 1 1 0 0 + rY82K0vMwm 0 0 0 0 + 1OPIUjnkjk 2 0 0 0 + + [30 rows x 4 columns] + """ + return DataFrame(tm.getSeriesData()).astype("int64") + + +@pytest.fixture +def datetime_frame(): + """ + Fixture for DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D'] + + A B C D + 2000-01-03 -1.122153 0.468535 0.122226 1.693711 + 2000-01-04 0.189378 0.486100 0.007864 -1.216052 + 2000-01-05 0.041401 -0.835752 -0.035279 -0.414357 + 2000-01-06 0.430050 0.894352 0.090719 0.036939 + 2000-01-07 -0.620982 -0.668211 -0.706153 1.466335 + 2000-01-10 -0.752633 0.328434 -0.815325 0.699674 + 2000-01-11 -2.236969 0.615737 -0.829076 -1.196106 + ... ... ... ... ... + 2000-02-03 1.642618 -0.579288 0.046005 1.385249 + 2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351 + 2000-02-07 -2.656149 -0.601387 1.410148 0.444150 + 2000-02-08 -1.201881 -1.289040 0.772992 -1.445300 + 2000-02-09 1.377373 0.398619 1.008453 -0.928207 + 2000-02-10 0.473194 -0.636677 0.984058 0.511519 + 2000-02-11 -0.965556 0.408313 -1.312844 -0.381948 + + [30 rows x 4 columns] + """ + return DataFrame(tm.getTimeSeriesData()) + + +@pytest.fixture +def float_frame(): + """ + Fixture for DataFrame of floats with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + + A B C D + P7GACiRnxd -0.465578 -0.361863 0.886172 -0.053465 + qZKh6afn8n -0.466693 -0.373773 0.266873 1.673901 + tkp0r6Qble 0.148691 -0.059051 0.174817 1.598433 + wP70WOCtv8 0.133045 -0.581994 -0.992240 0.261651 + M2AeYQMnCz -1.207959 -0.185775 0.588206 0.563938 + QEPzyGDYDo -0.381843 -0.758281 0.502575 -0.565053 + r78Jwns6dn -0.653707 0.883127 0.682199 0.206159 + ... ... ... ... ... + IHEGx9NO0T -0.277360 0.113021 -1.018314 0.196316 + lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999 + qa66YMWQa5 1.110525 0.475310 -0.747865 0.032121 + yOa0ATsmcE -0.431457 0.067094 0.096567 -0.264962 + 65znX3uRNG 1.528446 0.160416 -0.109635 -0.032987 + eCOBvKqf3e 0.235281 1.622222 0.781255 0.392871 + xSucinXxuV -1.263557 0.252799 -0.552247 0.400426 + + [30 rows x 4 columns] + """ + return DataFrame(tm.getSeriesData()) + + +@pytest.fixture +def mixed_type_frame(): + """ + Fixture for DataFrame of float/int/string columns with RangeIndex + Columns are ['a', 'b', 'c', 'float32', 'int32']. + """ + return DataFrame( + { + "a": 1.0, + "b": 2, + "c": "foo", + "float32": np.array([1.0] * 10, dtype="float32"), + "int32": np.array([1] * 10, dtype="int32"), + }, + index=np.arange(10), + ) + + +@pytest.fixture +def rand_series_with_duplicate_datetimeindex(): + """ + Fixture for Series with a DatetimeIndex that has duplicates. + """ + dates = [ + datetime(2000, 1, 2), + datetime(2000, 1, 2), + datetime(2000, 1, 2), + datetime(2000, 1, 3), + datetime(2000, 1, 3), + datetime(2000, 1, 3), + datetime(2000, 1, 4), + datetime(2000, 1, 4), + datetime(2000, 1, 4), + datetime(2000, 1, 5), + ] + + return Series(np.random.randn(len(dates)), index=dates) + + +# ---------------------------------------------------------------- +# Scalars +# ---------------------------------------------------------------- +@pytest.fixture( + params=[ + (Interval(left=0, right=5), IntervalDtype("int64", "right")), + (Interval(left=0.1, right=0.5), IntervalDtype("float64", "right")), + (Period("2012-01", freq="M"), "period[M]"), + (Period("2012-02-01", freq="D"), "period[D]"), + ( + Timestamp("2011-01-01", tz="US/Eastern"), + DatetimeTZDtype(tz="US/Eastern"), + ), + (Timedelta(seconds=500), "timedelta64[ns]"), + ] +) +def ea_scalar_and_dtype(request): + return request.param + + +# ---------------------------------------------------------------- +# Operators & Operations +# ---------------------------------------------------------------- +_all_arithmetic_operators = [ + "__add__", + "__radd__", + "__sub__", + "__rsub__", + "__mul__", + "__rmul__", + "__floordiv__", + "__rfloordiv__", + "__truediv__", + "__rtruediv__", + "__pow__", + "__rpow__", + "__mod__", + "__rmod__", +] + + +@pytest.fixture(params=_all_arithmetic_operators) +def all_arithmetic_operators(request): + """ + Fixture for dunder names for common arithmetic operations. + """ + return request.param + + +@pytest.fixture( + params=[ + operator.add, + ops.radd, + operator.sub, + ops.rsub, + operator.mul, + ops.rmul, + operator.truediv, + ops.rtruediv, + operator.floordiv, + ops.rfloordiv, + operator.mod, + ops.rmod, + operator.pow, + ops.rpow, + operator.eq, + operator.ne, + operator.lt, + operator.le, + operator.gt, + operator.ge, + operator.and_, + ops.rand_, + operator.xor, + ops.rxor, + operator.or_, + ops.ror_, + ] +) +def all_binary_operators(request): + """ + Fixture for operator and roperator arithmetic, comparison, and logical ops. + """ + return request.param + + +@pytest.fixture( + params=[ + operator.add, + ops.radd, + operator.sub, + ops.rsub, + operator.mul, + ops.rmul, + operator.truediv, + ops.rtruediv, + operator.floordiv, + ops.rfloordiv, + operator.mod, + ops.rmod, + operator.pow, + ops.rpow, + ] +) +def all_arithmetic_functions(request): + """ + Fixture for operator and roperator arithmetic functions. + + Notes + ----- + This includes divmod and rdivmod, whereas all_arithmetic_operators + does not. + """ + return request.param + + +_all_numeric_reductions = [ + "sum", + "max", + "min", + "mean", + "prod", + "std", + "var", + "median", + "kurt", + "skew", +] + + +@pytest.fixture(params=_all_numeric_reductions) +def all_numeric_reductions(request): + """ + Fixture for numeric reduction names. + """ + return request.param + + +_all_boolean_reductions = ["all", "any"] + + +@pytest.fixture(params=_all_boolean_reductions) +def all_boolean_reductions(request): + """ + Fixture for boolean reduction names. + """ + return request.param + + +_all_reductions = _all_numeric_reductions + _all_boolean_reductions + + +@pytest.fixture(params=_all_reductions) +def all_reductions(request): + """ + Fixture for all (boolean + numeric) reduction names. + """ + return request.param + + +@pytest.fixture( + params=[ + operator.eq, + operator.ne, + operator.gt, + operator.ge, + operator.lt, + operator.le, + ] +) +def comparison_op(request): + """ + Fixture for operator module comparison functions. + """ + return request.param + + +@pytest.fixture(params=["__le__", "__lt__", "__ge__", "__gt__"]) +def compare_operators_no_eq_ne(request): + """ + Fixture for dunder names for compare operations except == and != + + * >= + * > + * < + * <= + """ + return request.param + + +@pytest.fixture( + params=["__and__", "__rand__", "__or__", "__ror__", "__xor__", "__rxor__"] +) +def all_logical_operators(request): + """ + Fixture for dunder names for common logical operations + + * | + * & + * ^ + """ + return request.param + + +# ---------------------------------------------------------------- +# Data sets/files +# ---------------------------------------------------------------- +@pytest.fixture +def strict_data_files(pytestconfig): + """ + Returns the configuration for the test setting `--strict-data-files`. + """ + return pytestconfig.getoption("--strict-data-files") + + +@pytest.fixture +def datapath(strict_data_files): + """ + Get the path to a data file. + + Parameters + ---------- + path : str + Path to the file, relative to ``pandas/tests/`` + + Returns + ------- + path including ``pandas/tests``. + + Raises + ------ + ValueError + If the path doesn't exist and the --strict-data-files option is set. + """ + BASE_PATH = os.path.join(os.path.dirname(__file__), "tests") + + def deco(*args): + path = os.path.join(BASE_PATH, *args) + if not os.path.exists(path): + if strict_data_files: + raise ValueError( + f"Could not find file {path} and --strict-data-files is set." + ) + else: + pytest.skip(f"Could not find {path}.") + return path + + return deco + + +@pytest.fixture +def iris(datapath): + """ + The iris dataset as a DataFrame. + """ + return pd.read_csv(datapath("io", "data", "csv", "iris.csv")) + + +# ---------------------------------------------------------------- +# Time zones +# ---------------------------------------------------------------- +TIMEZONES = [ + None, + "UTC", + "US/Eastern", + "Asia/Tokyo", + "dateutil/US/Pacific", + "dateutil/Asia/Singapore", + "+01:15", + "-02:15", + "UTC+01:15", + "UTC-02:15", + tzutc(), + tzlocal(), + FixedOffset(300), + FixedOffset(0), + FixedOffset(-300), + timezone.utc, + timezone(timedelta(hours=1)), + timezone(timedelta(hours=-1), name="foo"), +] +TIMEZONE_IDS = [repr(i) for i in TIMEZONES] + + +@td.parametrize_fixture_doc(str(TIMEZONE_IDS)) +@pytest.fixture(params=TIMEZONES, ids=TIMEZONE_IDS) +def tz_naive_fixture(request): + """ + Fixture for trying timezones including default (None): {0} + """ + return request.param + + +@td.parametrize_fixture_doc(str(TIMEZONE_IDS[1:])) +@pytest.fixture(params=TIMEZONES[1:], ids=TIMEZONE_IDS[1:]) +def tz_aware_fixture(request): + """ + Fixture for trying explicit timezones: {0} + """ + return request.param + + +# Generate cartesian product of tz_aware_fixture: +tz_aware_fixture2 = tz_aware_fixture + + +@pytest.fixture(params=["utc", "dateutil/UTC", utc, tzutc(), timezone.utc]) +def utc_fixture(request): + """ + Fixture to provide variants of UTC timezone strings and tzinfo objects. + """ + return request.param + + +utc_fixture2 = utc_fixture + + +# ---------------------------------------------------------------- +# Dtypes +# ---------------------------------------------------------------- +@pytest.fixture(params=tm.STRING_DTYPES) +def string_dtype(request): + """ + Parametrized fixture for string dtypes. + + * str + * 'str' + * 'U' + """ + return request.param + + +@pytest.fixture( + params=[ + "string[python]", + pytest.param( + "string[pyarrow]", marks=td.skip_if_no("pyarrow", min_version="1.0.0") + ), + ] +) +def nullable_string_dtype(request): + """ + Parametrized fixture for string dtypes. + + * 'string[python]' + * 'string[pyarrow]' + """ + return request.param + + +@pytest.fixture( + params=[ + "python", + pytest.param("pyarrow", marks=td.skip_if_no("pyarrow", min_version="1.0.0")), + ] +) +def string_storage(request): + """ + Parametrized fixture for pd.options.mode.string_storage. + + * 'python' + * 'pyarrow' + """ + return request.param + + +# Alias so we can test with cartesian product of string_storage +string_storage2 = string_storage + + +@pytest.fixture(params=tm.BYTES_DTYPES) +def bytes_dtype(request): + """ + Parametrized fixture for bytes dtypes. + + * bytes + * 'bytes' + """ + return request.param + + +@pytest.fixture(params=tm.OBJECT_DTYPES) +def object_dtype(request): + """ + Parametrized fixture for object dtypes. + + * object + * 'object' + """ + return request.param + + +@pytest.fixture( + params=[ + "object", + "string[python]", + pytest.param( + "string[pyarrow]", marks=td.skip_if_no("pyarrow", min_version="1.0.0") + ), + ] +) +def any_string_dtype(request): + """ + Parametrized fixture for string dtypes. + * 'object' + * 'string[python]' + * 'string[pyarrow]' + """ + return request.param + + +@pytest.fixture(params=tm.DATETIME64_DTYPES) +def datetime64_dtype(request): + """ + Parametrized fixture for datetime64 dtypes. + + * 'datetime64[ns]' + * 'M8[ns]' + """ + return request.param + + +@pytest.fixture(params=tm.TIMEDELTA64_DTYPES) +def timedelta64_dtype(request): + """ + Parametrized fixture for timedelta64 dtypes. + + * 'timedelta64[ns]' + * 'm8[ns]' + """ + return request.param + + +@pytest.fixture +def fixed_now_ts(): + """ + Fixture emits fixed Timestamp.now() + """ + return Timestamp( + year=2021, month=1, day=1, hour=12, minute=4, second=13, microsecond=22 + ) + + +@pytest.fixture(params=tm.FLOAT_NUMPY_DTYPES) +def float_numpy_dtype(request): + """ + Parameterized fixture for float dtypes. + + * float + * 'float32' + * 'float64' + """ + return request.param + + +@pytest.fixture(params=tm.FLOAT_EA_DTYPES) +def float_ea_dtype(request): + """ + Parameterized fixture for float dtypes. + + * 'Float32' + * 'Float64' + """ + return request.param + + +@pytest.fixture(params=tm.FLOAT_NUMPY_DTYPES + tm.FLOAT_EA_DTYPES) +def any_float_dtype(request): + """ + Parameterized fixture for float dtypes. + + * float + * 'float32' + * 'float64' + * 'Float32' + * 'Float64' + """ + return request.param + + +@pytest.fixture(params=tm.COMPLEX_DTYPES) +def complex_dtype(request): + """ + Parameterized fixture for complex dtypes. + + * complex + * 'complex64' + * 'complex128' + """ + return request.param + + +@pytest.fixture(params=tm.SIGNED_INT_NUMPY_DTYPES) +def any_signed_int_numpy_dtype(request): + """ + Parameterized fixture for signed integer dtypes. + + * int + * 'int8' + * 'int16' + * 'int32' + * 'int64' + """ + return request.param + + +@pytest.fixture(params=tm.UNSIGNED_INT_NUMPY_DTYPES) +def any_unsigned_int_numpy_dtype(request): + """ + Parameterized fixture for unsigned integer dtypes. + + * 'uint8' + * 'uint16' + * 'uint32' + * 'uint64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_INT_NUMPY_DTYPES) +def any_int_numpy_dtype(request): + """ + Parameterized fixture for any integer dtype. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_INT_EA_DTYPES) +def any_int_ea_dtype(request): + """ + Parameterized fixture for any nullable integer dtype. + + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_INT_NUMPY_DTYPES + tm.ALL_INT_EA_DTYPES) +def any_int_dtype(request): + """ + Parameterized fixture for any nullable integer dtype. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_INT_EA_DTYPES + tm.FLOAT_EA_DTYPES) +def any_numeric_ea_dtype(request): + """ + Parameterized fixture for any nullable integer dtype and + any float ea dtypes. + + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + * 'Float32' + * 'Float64' + """ + return request.param + + +@pytest.fixture(params=tm.SIGNED_INT_EA_DTYPES) +def any_signed_int_ea_dtype(request): + """ + Parameterized fixture for any signed nullable integer dtype. + + * 'Int8' + * 'Int16' + * 'Int32' + * 'Int64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_REAL_NUMPY_DTYPES) +def any_real_numpy_dtype(request): + """ + Parameterized fixture for any (purely) real numeric dtype. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * float + * 'float32' + * 'float64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_NUMPY_DTYPES) +def any_numpy_dtype(request): + """ + Parameterized fixture for all numpy dtypes. + + * bool + * 'bool' + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * float + * 'float32' + * 'float64' + * complex + * 'complex64' + * 'complex128' + * str + * 'str' + * 'U' + * bytes + * 'bytes' + * 'datetime64[ns]' + * 'M8[ns]' + * 'timedelta64[ns]' + * 'm8[ns]' + * object + * 'object' + """ + return request.param + + +# categoricals are handled separately +_any_skipna_inferred_dtype = [ + ("string", ["a", np.nan, "c"]), + ("string", ["a", pd.NA, "c"]), + ("bytes", [b"a", np.nan, b"c"]), + ("empty", [np.nan, np.nan, np.nan]), + ("empty", []), + ("mixed-integer", ["a", np.nan, 2]), + ("mixed", ["a", np.nan, 2.0]), + ("floating", [1.0, np.nan, 2.0]), + ("integer", [1, np.nan, 2]), + ("mixed-integer-float", [1, np.nan, 2.0]), + ("decimal", [Decimal(1), np.nan, Decimal(2)]), + ("boolean", [True, np.nan, False]), + ("boolean", [True, pd.NA, False]), + ("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]), + ("datetime", [Timestamp("20130101"), np.nan, Timestamp("20180101")]), + ("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]), + # The following two dtypes are commented out due to GH 23554 + # ('complex', [1 + 1j, np.nan, 2 + 2j]), + # ('timedelta64', [np.timedelta64(1, 'D'), + # np.nan, np.timedelta64(2, 'D')]), + ("timedelta", [timedelta(1), np.nan, timedelta(2)]), + ("time", [time(1), np.nan, time(2)]), + ("period", [Period(2013), pd.NaT, Period(2018)]), + ("interval", [Interval(0, 1), np.nan, Interval(0, 2)]), +] +ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id + + +@pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids) +def any_skipna_inferred_dtype(request): + """ + Fixture for all inferred dtypes from _libs.lib.infer_dtype + + The covered (inferred) types are: + * 'string' + * 'empty' + * 'bytes' + * 'mixed' + * 'mixed-integer' + * 'mixed-integer-float' + * 'floating' + * 'integer' + * 'decimal' + * 'boolean' + * 'datetime64' + * 'datetime' + * 'date' + * 'timedelta' + * 'time' + * 'period' + * 'interval' + + Returns + ------- + inferred_dtype : str + The string for the inferred dtype from _libs.lib.infer_dtype + values : np.ndarray + An array of object dtype that will be inferred to have + `inferred_dtype` + + Examples + -------- + >>> import pandas._libs.lib as lib + >>> + >>> def test_something(any_skipna_inferred_dtype): + ... inferred_dtype, values = any_skipna_inferred_dtype + ... # will pass + ... assert lib.infer_dtype(values, skipna=True) == inferred_dtype + """ + inferred_dtype, values = request.param + values = np.array(values, dtype=object) # object dtype to avoid casting + + # correctness of inference tested in tests/dtypes/test_inference.py + return inferred_dtype, values + + +# ---------------------------------------------------------------- +# Misc +# ---------------------------------------------------------------- +@pytest.fixture +def ip(): + """ + Get an instance of IPython.InteractiveShell. + + Will raise a skip if IPython is not installed. + """ + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.interactiveshell import InteractiveShell + + # GH#35711 make sure sqlite history file handle is not leaked + from traitlets.config import Config # isort:skip + + c = Config() + c.HistoryManager.hist_file = ":memory:" + + return InteractiveShell(config=c) + + +@pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"]) +def spmatrix(request): + """ + Yields scipy sparse matrix classes. + """ + from scipy import sparse + + return getattr(sparse, request.param + "_matrix") + + +@pytest.fixture( + params=[ + getattr(pd.offsets, o) + for o in pd.offsets.__all__ + if issubclass(getattr(pd.offsets, o), pd.offsets.Tick) + ] +) +def tick_classes(request): + """ + Fixture for Tick based datetime offsets available for a time series. + """ + return request.param + + +@pytest.fixture(params=[None, lambda x: x]) +def sort_by_key(request): + """ + Simple fixture for testing keys in sorting methods. + Tests None (no key) and the identity key. + """ + return request.param + + +@pytest.fixture() +def fsspectest(): + pytest.importorskip("fsspec") + from fsspec import register_implementation + from fsspec.implementations.memory import MemoryFileSystem + from fsspec.registry import _registry as registry + + class TestMemoryFS(MemoryFileSystem): + protocol = "testmem" + test = [None] + + def __init__(self, **kwargs): + self.test[0] = kwargs.pop("test", None) + super().__init__(**kwargs) + + register_implementation("testmem", TestMemoryFS, clobber=True) + yield TestMemoryFS() + registry.pop("testmem", None) + TestMemoryFS.test[0] = None + TestMemoryFS.store.clear() + + +@pytest.fixture( + params=[ + ("foo", None, None), + ("Egon", "Venkman", None), + ("NCC1701D", "NCC1701D", "NCC1701D"), + # possibly-matching NAs + (np.nan, np.nan, np.nan), + (np.nan, pd.NaT, None), + (np.nan, pd.NA, None), + (pd.NA, pd.NA, pd.NA), + ] +) +def names(request): + """ + A 3-tuple of names, the first two for operands, the last for a result. + """ + return request.param + + +@pytest.fixture(params=[tm.setitem, tm.loc, tm.iloc]) +def indexer_sli(request): + """ + Parametrize over __setitem__, loc.__setitem__, iloc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.setitem, tm.iloc]) +def indexer_si(request): + """ + Parametrize over __setitem__, iloc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.setitem, tm.loc]) +def indexer_sl(request): + """ + Parametrize over __setitem__, loc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.at, tm.loc]) +def indexer_al(request): + """ + Parametrize over at.__setitem__, loc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.iat, tm.iloc]) +def indexer_ial(request): + """ + Parametrize over iat.__setitem__, iloc.__setitem__ + """ + return request.param + + +@pytest.fixture +def using_array_manager(request): + """ + Fixture to check if the array manager is being used. + """ + return pd.options.mode.data_manager == "array" diff --git a/.local/lib/python3.8/site-packages/pandas/core/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..f38544a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/accessor.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/accessor.cpython-38.pyc new file mode 100644 index 0000000..a28da7a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/accessor.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/algorithms.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/algorithms.cpython-38.pyc new file mode 100644 index 0000000..3b7aba7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/algorithms.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/api.cpython-38.pyc new file mode 100644 index 0000000..d7fc133 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/apply.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/apply.cpython-38.pyc new file mode 100644 index 0000000..e5bcb6d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/apply.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/arraylike.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/arraylike.cpython-38.pyc new file mode 100644 index 0000000..e4a3854 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/arraylike.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/base.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/base.cpython-38.pyc new file mode 100644 index 0000000..1074eb7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..8467b95 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/config_init.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/config_init.cpython-38.pyc new file mode 100644 index 0000000..f2dbcdd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/config_init.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/construction.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/construction.cpython-38.pyc new file mode 100644 index 0000000..6214e92 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/construction.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/describe.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/describe.cpython-38.pyc new file mode 100644 index 0000000..366c74c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/describe.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/flags.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/flags.cpython-38.pyc new file mode 100644 index 0000000..c8d0edc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/flags.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/frame.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/frame.cpython-38.pyc new file mode 100644 index 0000000..34a4978 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/frame.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/generic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/generic.cpython-38.pyc new file mode 100644 index 0000000..dbc6a50 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/generic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/index.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/index.cpython-38.pyc new file mode 100644 index 0000000..690862f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/index.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/indexing.cpython-38.pyc new file mode 100644 index 0000000..0ec8f80 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/missing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/missing.cpython-38.pyc new file mode 100644 index 0000000..2a25e85 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/missing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/nanops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/nanops.cpython-38.pyc new file mode 100644 index 0000000..5571750 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/nanops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/resample.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/resample.cpython-38.pyc new file mode 100644 index 0000000..8a91577 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/resample.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/roperator.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/roperator.cpython-38.pyc new file mode 100644 index 0000000..9a24c03 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/roperator.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/sample.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/sample.cpython-38.pyc new file mode 100644 index 0000000..290fcce Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/sample.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/series.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/series.cpython-38.pyc new file mode 100644 index 0000000..4af94dc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/series.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/shared_docs.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/shared_docs.cpython-38.pyc new file mode 100644 index 0000000..599f646 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/shared_docs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/__pycache__/sorting.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/sorting.cpython-38.pyc new file mode 100644 index 0000000..bd45dd7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/__pycache__/sorting.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/_numba/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/_numba/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..90caa46 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/_numba/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/__pycache__/executor.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/_numba/__pycache__/executor.cpython-38.pyc new file mode 100644 index 0000000..1e1fb01 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/_numba/__pycache__/executor.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/executor.py b/.local/lib/python3.8/site-packages/pandas/core/_numba/executor.py new file mode 100644 index 0000000..0b59d07 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/_numba/executor.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Callable, +) + +import numpy as np + +from pandas._typing import Scalar +from pandas.compat._optional import import_optional_dependency + +from pandas.core.util.numba_ import ( + NUMBA_FUNC_CACHE, + get_jit_arguments, +) + + +def generate_shared_aggregator( + func: Callable[..., Scalar], + engine_kwargs: dict[str, bool] | None, + cache_key_str: str, +): + """ + Generate a Numba function that loops over the columns 2D object and applies + a 1D numba kernel over each column. + + Parameters + ---------- + func : function + aggregation function to be applied to each column + engine_kwargs : dict + dictionary of arguments to be passed into numba.jit + cache_key_str: str + string to access the compiled function of the form + _ e.g. rolling_mean, groupby_mean + + Returns + ------- + Numba function + """ + nopython, nogil, parallel = get_jit_arguments(engine_kwargs, None) + + cache_key = (func, cache_key_str) + if cache_key in NUMBA_FUNC_CACHE: + return NUMBA_FUNC_CACHE[cache_key] + + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def column_looper( + values: np.ndarray, + start: np.ndarray, + end: np.ndarray, + min_periods: int, + *args, + ): + result = np.empty((len(start), values.shape[1]), dtype=np.float64) + for i in numba.prange(values.shape[1]): + result[:, i] = func(values[:, i], start, end, min_periods, *args) + return result + + return column_looper diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__init__.py new file mode 100644 index 0000000..219ff02 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__init__.py @@ -0,0 +1,6 @@ +from pandas.core._numba.kernels.mean_ import sliding_mean +from pandas.core._numba.kernels.min_max_ import sliding_min_max +from pandas.core._numba.kernels.sum_ import sliding_sum +from pandas.core._numba.kernels.var_ import sliding_var + +__all__ = ["sliding_mean", "sliding_sum", "sliding_var", "sliding_min_max"] diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..73c03f1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/mean_.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/mean_.cpython-38.pyc new file mode 100644 index 0000000..e7effd2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/mean_.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/min_max_.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/min_max_.cpython-38.pyc new file mode 100644 index 0000000..fc8d693 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/min_max_.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/shared.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/shared.cpython-38.pyc new file mode 100644 index 0000000..a5f2244 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/shared.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/sum_.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/sum_.cpython-38.pyc new file mode 100644 index 0000000..8a999f3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/sum_.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/var_.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/var_.cpython-38.pyc new file mode 100644 index 0000000..dbb59c0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/__pycache__/var_.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/mean_.py b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/mean_.py new file mode 100644 index 0000000..8f67dd9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/mean_.py @@ -0,0 +1,106 @@ +""" +Numba 1D mean kernels that can be shared by +* Dataframe / Series +* groupby +* rolling / expanding + +Mirrors pandas/_libs/window/aggregation.pyx +""" +from __future__ import annotations + +import numba +import numpy as np + +from pandas.core._numba.kernels.shared import is_monotonic_increasing + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def add_mean( + val: float, nobs: int, sum_x: float, neg_ct: int, compensation: float +) -> tuple[int, float, int, float]: + if not np.isnan(val): + nobs += 1 + y = val - compensation + t = sum_x + y + compensation = t - sum_x - y + sum_x = t + if val < 0: + neg_ct += 1 + return nobs, sum_x, neg_ct, compensation + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def remove_mean( + val: float, nobs: int, sum_x: float, neg_ct: int, compensation: float +) -> tuple[int, float, int, float]: + if not np.isnan(val): + nobs -= 1 + y = -val - compensation + t = sum_x + y + compensation = t - sum_x - y + sum_x = t + if val < 0: + neg_ct -= 1 + return nobs, sum_x, neg_ct, compensation + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def sliding_mean( + values: np.ndarray, + start: np.ndarray, + end: np.ndarray, + min_periods: int, +) -> np.ndarray: + N = len(start) + nobs = 0 + sum_x = 0.0 + neg_ct = 0 + compensation_add = 0.0 + compensation_remove = 0.0 + + is_monotonic_increasing_bounds = is_monotonic_increasing( + start + ) and is_monotonic_increasing(end) + + output = np.empty(N, dtype=np.float64) + + for i in range(N): + s = start[i] + e = end[i] + if i == 0 or not is_monotonic_increasing_bounds: + for j in range(s, e): + val = values[j] + nobs, sum_x, neg_ct, compensation_add = add_mean( + val, nobs, sum_x, neg_ct, compensation_add + ) + else: + for j in range(start[i - 1], s): + val = values[j] + nobs, sum_x, neg_ct, compensation_remove = remove_mean( + val, nobs, sum_x, neg_ct, compensation_remove + ) + + for j in range(end[i - 1], e): + val = values[j] + nobs, sum_x, neg_ct, compensation_add = add_mean( + val, nobs, sum_x, neg_ct, compensation_add + ) + + if nobs >= min_periods and nobs > 0: + result = sum_x / nobs + if neg_ct == 0 and result < 0: + result = 0 + elif neg_ct == nobs and result > 0: + result = 0 + else: + result = np.nan + + output[i] = result + + if not is_monotonic_increasing_bounds: + nobs = 0 + sum_x = 0.0 + neg_ct = 0 + compensation_remove = 0.0 + + return output diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/min_max_.py b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/min_max_.py new file mode 100644 index 0000000..4f237fc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/min_max_.py @@ -0,0 +1,70 @@ +""" +Numba 1D min/max kernels that can be shared by +* Dataframe / Series +* groupby +* rolling / expanding + +Mirrors pandas/_libs/window/aggregation.pyx +""" +from __future__ import annotations + +import numba +import numpy as np + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def sliding_min_max( + values: np.ndarray, + start: np.ndarray, + end: np.ndarray, + min_periods: int, + is_max: bool, +) -> np.ndarray: + N = len(start) + nobs = 0 + output = np.empty(N, dtype=np.float64) + # Use deque once numba supports it + # https://github.com/numba/numba/issues/7417 + Q: list = [] + W: list = [] + for i in range(N): + + curr_win_size = end[i] - start[i] + if i == 0: + st = start[i] + else: + st = end[i - 1] + + for k in range(st, end[i]): + ai = values[k] + if not np.isnan(ai): + nobs += 1 + elif is_max: + ai = -np.inf + else: + ai = np.inf + # Discard previous entries if we find new min or max + if is_max: + while Q and ((ai >= values[Q[-1]]) or values[Q[-1]] != values[Q[-1]]): + Q.pop() + else: + while Q and ((ai <= values[Q[-1]]) or values[Q[-1]] != values[Q[-1]]): + Q.pop() + Q.append(k) + W.append(k) + + # Discard entries outside and left of current window + while Q and Q[0] <= start[i] - 1: + Q.pop(0) + while W and W[0] <= start[i] - 1: + if not np.isnan(values[W[0]]): + nobs -= 1 + W.pop(0) + + # Save output based on index in input value array + if Q and curr_win_size > 0 and nobs >= min_periods: + output[i] = values[Q[0]] + else: + output[i] = np.nan + + return output diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/shared.py b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/shared.py new file mode 100644 index 0000000..ec25e78 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/shared.py @@ -0,0 +1,23 @@ +import numba +import numpy as np + + +@numba.jit( + # error: Any? not callable + numba.boolean(numba.int64[:]), # type: ignore[misc] + nopython=True, + nogil=True, + parallel=False, +) +def is_monotonic_increasing(bounds: np.ndarray) -> bool: + """Check if int64 values are monotonically increasing.""" + n = len(bounds) + if n < 2: + return True + prev = bounds[0] + for i in range(1, n): + cur = bounds[i] + if cur < prev: + return False + prev = cur + return True diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/sum_.py b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/sum_.py new file mode 100644 index 0000000..c2e81b4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/sum_.py @@ -0,0 +1,98 @@ +""" +Numba 1D sum kernels that can be shared by +* Dataframe / Series +* groupby +* rolling / expanding + +Mirrors pandas/_libs/window/aggregation.pyx +""" +from __future__ import annotations + +import numba +import numpy as np + +from pandas.core._numba.kernels.shared import is_monotonic_increasing + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def add_sum( + val: float, nobs: int, sum_x: float, compensation: float +) -> tuple[int, float, float]: + if not np.isnan(val): + nobs += 1 + y = val - compensation + t = sum_x + y + compensation = t - sum_x - y + sum_x = t + return nobs, sum_x, compensation + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def remove_sum( + val: float, nobs: int, sum_x: float, compensation: float +) -> tuple[int, float, float]: + if not np.isnan(val): + nobs -= 1 + y = -val - compensation + t = sum_x + y + compensation = t - sum_x - y + sum_x = t + return nobs, sum_x, compensation + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def sliding_sum( + values: np.ndarray, + start: np.ndarray, + end: np.ndarray, + min_periods: int, +) -> np.ndarray: + N = len(start) + nobs = 0 + sum_x = 0.0 + compensation_add = 0.0 + compensation_remove = 0.0 + + is_monotonic_increasing_bounds = is_monotonic_increasing( + start + ) and is_monotonic_increasing(end) + + output = np.empty(N, dtype=np.float64) + + for i in range(N): + s = start[i] + e = end[i] + if i == 0 or not is_monotonic_increasing_bounds: + for j in range(s, e): + val = values[j] + nobs, sum_x, compensation_add = add_sum( + val, nobs, sum_x, compensation_add + ) + else: + for j in range(start[i - 1], s): + val = values[j] + nobs, sum_x, compensation_remove = remove_sum( + val, nobs, sum_x, compensation_remove + ) + + for j in range(end[i - 1], e): + val = values[j] + nobs, sum_x, compensation_add = add_sum( + val, nobs, sum_x, compensation_add + ) + + if nobs == 0 == nobs: + result = 0.0 + elif nobs >= min_periods: + result = sum_x + else: + result = np.nan + + output[i] = result + + if not is_monotonic_increasing_bounds: + nobs = 0 + sum_x = 0.0 + compensation_remove = 0.0 + + return output diff --git a/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/var_.py b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/var_.py new file mode 100644 index 0000000..2e56606 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/_numba/kernels/var_.py @@ -0,0 +1,116 @@ +""" +Numba 1D var kernels that can be shared by +* Dataframe / Series +* groupby +* rolling / expanding + +Mirrors pandas/_libs/window/aggregation.pyx +""" +from __future__ import annotations + +import numba +import numpy as np + +from pandas.core._numba.kernels.shared import is_monotonic_increasing + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def add_var( + val: float, nobs: int, mean_x: float, ssqdm_x: float, compensation: float +) -> tuple[int, float, float, float]: + if not np.isnan(val): + nobs += 1 + prev_mean = mean_x - compensation + y = val - compensation + t = y - mean_x + compensation = t + mean_x - y + delta = t + if nobs: + mean_x += delta / nobs + else: + mean_x = 0 + ssqdm_x += (val - prev_mean) * (val - mean_x) + return nobs, mean_x, ssqdm_x, compensation + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def remove_var( + val: float, nobs: int, mean_x: float, ssqdm_x: float, compensation: float +) -> tuple[int, float, float, float]: + if not np.isnan(val): + nobs -= 1 + if nobs: + prev_mean = mean_x - compensation + y = val - compensation + t = y - mean_x + compensation = t + mean_x - y + delta = t + mean_x -= delta / nobs + ssqdm_x -= (val - prev_mean) * (val - mean_x) + else: + mean_x = 0 + ssqdm_x = 0 + return nobs, mean_x, ssqdm_x, compensation + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def sliding_var( + values: np.ndarray, + start: np.ndarray, + end: np.ndarray, + min_periods: int, + ddof: int = 1, +) -> np.ndarray: + N = len(start) + nobs = 0 + mean_x = 0.0 + ssqdm_x = 0.0 + compensation_add = 0.0 + compensation_remove = 0.0 + + min_periods = max(min_periods, 1) + is_monotonic_increasing_bounds = is_monotonic_increasing( + start + ) and is_monotonic_increasing(end) + + output = np.empty(N, dtype=np.float64) + + for i in range(N): + s = start[i] + e = end[i] + if i == 0 or not is_monotonic_increasing_bounds: + for j in range(s, e): + val = values[j] + nobs, mean_x, ssqdm_x, compensation_add = add_var( + val, nobs, mean_x, ssqdm_x, compensation_add + ) + else: + for j in range(start[i - 1], s): + val = values[j] + nobs, mean_x, ssqdm_x, compensation_remove = remove_var( + val, nobs, mean_x, ssqdm_x, compensation_remove + ) + + for j in range(end[i - 1], e): + val = values[j] + nobs, mean_x, ssqdm_x, compensation_add = add_var( + val, nobs, mean_x, ssqdm_x, compensation_add + ) + + if nobs >= min_periods and nobs > ddof: + if nobs == 1: + result = 0.0 + else: + result = ssqdm_x / (nobs - ddof) + else: + result = np.nan + + output[i] = result + + if not is_monotonic_increasing_bounds: + nobs = 0 + mean_x = 0.0 + ssqdm_x = 0.0 + compensation_remove = 0.0 + + return output diff --git a/.local/lib/python3.8/site-packages/pandas/core/accessor.py b/.local/lib/python3.8/site-packages/pandas/core/accessor.py new file mode 100644 index 0000000..07fa579 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/accessor.py @@ -0,0 +1,298 @@ +""" + +accessor.py contains base classes for implementing accessor properties +that can be mixed into or pinned onto other pandas classes. + +""" +from __future__ import annotations + +import warnings + +from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level + + +class DirNamesMixin: + _accessors: set[str] = set() + _hidden_attrs: frozenset[str] = frozenset() + + def _dir_deletions(self) -> set[str]: + """ + Delete unwanted __dir__ for this object. + """ + return self._accessors | self._hidden_attrs + + def _dir_additions(self) -> set[str]: + """ + Add additional __dir__ for this object. + """ + return {accessor for accessor in self._accessors if hasattr(self, accessor)} + + def __dir__(self) -> list[str]: + """ + Provide method name lookup and completion. + + Notes + ----- + Only provide 'public' methods. + """ + rv = set(super().__dir__()) + rv = (rv - self._dir_deletions()) | self._dir_additions() + return sorted(rv) + + +class PandasDelegate: + """ + Abstract base class for delegating methods/properties. + """ + + def _delegate_property_get(self, name, *args, **kwargs): + raise TypeError(f"You cannot access the property {name}") + + def _delegate_property_set(self, name, value, *args, **kwargs): + raise TypeError(f"The property {name} cannot be set") + + def _delegate_method(self, name, *args, **kwargs): + raise TypeError(f"You cannot call method {name}") + + @classmethod + def _add_delegate_accessors( + cls, delegate, accessors, typ: str, overwrite: bool = False + ): + """ + Add accessors to cls from the delegate class. + + Parameters + ---------- + cls + Class to add the methods/properties to. + delegate + Class to get methods/properties and doc-strings. + accessors : list of str + List of accessors to add. + typ : {'property', 'method'} + overwrite : bool, default False + Overwrite the method/property in the target class if it exists. + """ + + def _create_delegator_property(name): + def _getter(self): + return self._delegate_property_get(name) + + def _setter(self, new_values): + return self._delegate_property_set(name, new_values) + + _getter.__name__ = name + _setter.__name__ = name + + return property( + fget=_getter, fset=_setter, doc=getattr(delegate, name).__doc__ + ) + + def _create_delegator_method(name): + def f(self, *args, **kwargs): + return self._delegate_method(name, *args, **kwargs) + + f.__name__ = name + f.__doc__ = getattr(delegate, name).__doc__ + + return f + + for name in accessors: + + if typ == "property": + f = _create_delegator_property(name) + else: + f = _create_delegator_method(name) + + # don't overwrite existing methods/properties + if overwrite or not hasattr(cls, name): + setattr(cls, name, f) + + +def delegate_names(delegate, accessors, typ: str, overwrite: bool = False): + """ + Add delegated names to a class using a class decorator. This provides + an alternative usage to directly calling `_add_delegate_accessors` + below a class definition. + + Parameters + ---------- + delegate : object + The class to get methods/properties & doc-strings. + accessors : Sequence[str] + List of accessor to add. + typ : {'property', 'method'} + overwrite : bool, default False + Overwrite the method/property in the target class if it exists. + + Returns + ------- + callable + A class decorator. + + Examples + -------- + @delegate_names(Categorical, ["categories", "ordered"], "property") + class CategoricalAccessor(PandasDelegate): + [...] + """ + + def add_delegate_accessors(cls): + cls._add_delegate_accessors(delegate, accessors, typ, overwrite=overwrite) + return cls + + return add_delegate_accessors + + +# Ported with modifications from xarray +# https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py +# 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors +# 2. We use a UserWarning instead of a custom Warning + + +class CachedAccessor: + """ + Custom property-like object. + + A descriptor for caching accessors. + + Parameters + ---------- + name : str + Namespace that will be accessed under, e.g. ``df.foo``. + accessor : cls + Class with the extension methods. + + Notes + ----- + For accessor, The class's __init__ method assumes that one of + ``Series``, ``DataFrame`` or ``Index`` as the + single argument ``data``. + """ + + def __init__(self, name: str, accessor) -> None: + self._name = name + self._accessor = accessor + + def __get__(self, obj, cls): + if obj is None: + # we're accessing the attribute of the class, i.e., Dataset.geo + return self._accessor + accessor_obj = self._accessor(obj) + # Replace the property with the accessor object. Inspired by: + # https://www.pydanny.com/cached-property.html + # We need to use object.__setattr__ because we overwrite __setattr__ on + # NDFrame + object.__setattr__(obj, self._name, accessor_obj) + return accessor_obj + + +@doc(klass="", others="") +def _register_accessor(name, cls): + """ + Register a custom accessor on {klass} objects. + + Parameters + ---------- + name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. + + Returns + ------- + callable + A class decorator. + + See Also + -------- + register_dataframe_accessor : Register a custom accessor on DataFrame objects. + register_series_accessor : Register a custom accessor on Series objects. + register_index_accessor : Register a custom accessor on Index objects. + + Notes + ----- + When accessed, your accessor will be initialized with the pandas object + the user is interacting with. So the signature must be + + .. code-block:: python + + def __init__(self, pandas_object): # noqa: E999 + ... + + For consistency with pandas methods, you should raise an ``AttributeError`` + if the data passed to your accessor has an incorrect dtype. + + >>> pd.Series(['a', 'b']).dt + Traceback (most recent call last): + ... + AttributeError: Can only use .dt accessor with datetimelike values + + Examples + -------- + In your library code:: + + import pandas as pd + + @pd.api.extensions.register_dataframe_accessor("geo") + class GeoAccessor: + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFrame + lat = self._obj.latitude + lon = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + + Back in an interactive IPython session: + + .. code-block:: ipython + + In [1]: ds = pd.DataFrame({{"longitude": np.linspace(0, 10), + ...: "latitude": np.linspace(0, 20)}}) + In [2]: ds.geo.center + Out[2]: (5.0, 10.0) + In [3]: ds.geo.plot() # plots data on a map + """ + + def decorator(accessor): + if hasattr(cls, name): + warnings.warn( + f"registration of accessor {repr(accessor)} under name " + f"{repr(name)} for type {repr(cls)} is overriding a preexisting " + f"attribute with the same name.", + UserWarning, + stacklevel=find_stack_level(), + ) + setattr(cls, name, CachedAccessor(name, accessor)) + cls._accessors.add(name) + return accessor + + return decorator + + +@doc(_register_accessor, klass="DataFrame") +def register_dataframe_accessor(name): + from pandas import DataFrame + + return _register_accessor(name, DataFrame) + + +@doc(_register_accessor, klass="Series") +def register_series_accessor(name): + from pandas import Series + + return _register_accessor(name, Series) + + +@doc(_register_accessor, klass="Index") +def register_index_accessor(name): + from pandas import Index + + return _register_accessor(name, Index) diff --git a/.local/lib/python3.8/site-packages/pandas/core/algorithms.py b/.local/lib/python3.8/site-packages/pandas/core/algorithms.py new file mode 100644 index 0000000..36eabe9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/algorithms.py @@ -0,0 +1,1846 @@ +""" +Generic data algorithms. This module is experimental at the moment and not +intended for public consumption +""" +from __future__ import annotations + +import operator +from textwrap import dedent +from typing import ( + TYPE_CHECKING, + Hashable, + Literal, + Sequence, + Union, + cast, + final, +) +from warnings import warn + +import numpy as np + +from pandas._libs import ( + algos, + hashtable as htable, + iNaT, + lib, +) +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + DtypeObj, + IndexLabel, + Scalar, + TakeIndexer, + npt, +) +from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import ( + construct_1d_object_array_from_listlike, + infer_dtype_from_array, + sanitize_to_nanoseconds, +) +from pandas.core.dtypes.common import ( + ensure_float64, + ensure_object, + ensure_platform_int, + is_array_like, + is_bool_dtype, + is_categorical_dtype, + is_complex_dtype, + is_datetime64_dtype, + is_extension_array_dtype, + is_float_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_timedelta64_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + PandasDtype, +) +from pandas.core.dtypes.generic import ( + ABCDatetimeArray, + ABCExtensionArray, + ABCIndex, + ABCMultiIndex, + ABCRangeIndex, + ABCSeries, + ABCTimedeltaArray, +) +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, +) + +from pandas.core.array_algos.take import take_nd +from pandas.core.construction import ( + array as pd_array, + ensure_wrapped_if_datetimelike, + extract_array, +) +from pandas.core.indexers import validate_indices + +if TYPE_CHECKING: + + from pandas._typing import ( + NumpySorter, + NumpyValueArrayLike, + ) + + from pandas import ( + Categorical, + DataFrame, + Index, + Series, + ) + from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + TimedeltaArray, + ) + + +# --------------- # +# dtype access # +# --------------- # +def _ensure_data(values: ArrayLike) -> np.ndarray: + """ + routine to ensure that our data is of the correct + input dtype for lower-level routines + + This will coerce: + - ints -> int64 + - uint -> uint64 + - bool -> uint8 + - datetimelike -> i8 + - datetime64tz -> i8 (in local tz) + - categorical -> codes + + Parameters + ---------- + values : np.ndarray or ExtensionArray + + Returns + ------- + np.ndarray + """ + + if not isinstance(values, ABCMultiIndex): + # extract_array would raise + values = extract_array(values, extract_numpy=True) + + # we check some simple dtypes first + if is_object_dtype(values.dtype): + return ensure_object(np.asarray(values)) + + elif is_bool_dtype(values.dtype): + if isinstance(values, np.ndarray): + # i.e. actually dtype == np.dtype("bool") + return np.asarray(values).view("uint8") + else: + # i.e. all-bool Categorical, BooleanArray + try: + return np.asarray(values).astype("uint8", copy=False) + except (TypeError, ValueError): + # GH#42107 we have pd.NAs present + return np.asarray(values) + + elif is_integer_dtype(values.dtype): + return np.asarray(values) + + elif is_float_dtype(values.dtype): + # Note: checking `values.dtype == "float128"` raises on Windows and 32bit + # error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype, dtype[Any]]" + # has no attribute "itemsize" + if values.dtype.itemsize in [2, 12, 16]: # type: ignore[union-attr] + # we dont (yet) have float128 hashtable support + return ensure_float64(values) + return np.asarray(values) + + elif is_complex_dtype(values.dtype): + # Incompatible return value type (got "Tuple[Union[Any, ExtensionArray, + # ndarray[Any, Any]], Union[Any, ExtensionDtype]]", expected + # "Tuple[ndarray[Any, Any], Union[dtype[Any], ExtensionDtype]]") + return values # type: ignore[return-value] + + # datetimelike + elif needs_i8_conversion(values.dtype): + if isinstance(values, np.ndarray): + values = sanitize_to_nanoseconds(values) + npvalues = values.view("i8") + npvalues = cast(np.ndarray, npvalues) + return npvalues + + elif is_categorical_dtype(values.dtype): + values = cast("Categorical", values) + values = values.codes + return values + + # we have failed, return object + values = np.asarray(values, dtype=object) + return ensure_object(values) + + +def _reconstruct_data( + values: ArrayLike, dtype: DtypeObj, original: AnyArrayLike +) -> ArrayLike: + """ + reverse of _ensure_data + + Parameters + ---------- + values : np.ndarray or ExtensionArray + dtype : np.dtype or ExtensionDtype + original : AnyArrayLike + + Returns + ------- + ExtensionArray or np.ndarray + """ + if isinstance(values, ABCExtensionArray) and values.dtype == dtype: + # Catch DatetimeArray/TimedeltaArray + return values + + if not isinstance(dtype, np.dtype): + # i.e. ExtensionDtype + cls = dtype.construct_array_type() + if isinstance(values, cls) and values.dtype == dtype: + return values + + values = cls._from_sequence(values, dtype=dtype) + elif is_bool_dtype(dtype): + values = values.astype(dtype, copy=False) + + # we only support object dtypes bool Index + if isinstance(original, ABCIndex): + values = values.astype(object, copy=False) + elif dtype is not None: + if is_datetime64_dtype(dtype): + dtype = np.dtype("datetime64[ns]") + elif is_timedelta64_dtype(dtype): + dtype = np.dtype("timedelta64[ns]") + + values = values.astype(dtype, copy=False) + + return values + + +def _ensure_arraylike(values) -> ArrayLike: + """ + ensure that we are arraylike if not already + """ + if not is_array_like(values): + inferred = lib.infer_dtype(values, skipna=False) + if inferred in ["mixed", "string", "mixed-integer"]: + # "mixed-integer" to ensure we do not cast ["ss", 42] to str GH#22160 + if isinstance(values, tuple): + values = list(values) + values = construct_1d_object_array_from_listlike(values) + else: + values = np.asarray(values) + return values + + +_hashtables = { + "complex128": htable.Complex128HashTable, + "complex64": htable.Complex64HashTable, + "float64": htable.Float64HashTable, + "float32": htable.Float32HashTable, + "uint64": htable.UInt64HashTable, + "uint32": htable.UInt32HashTable, + "uint16": htable.UInt16HashTable, + "uint8": htable.UInt8HashTable, + "int64": htable.Int64HashTable, + "int32": htable.Int32HashTable, + "int16": htable.Int16HashTable, + "int8": htable.Int8HashTable, + "string": htable.StringHashTable, + "object": htable.PyObjectHashTable, +} + + +def _get_hashtable_algo(values: np.ndarray): + """ + Parameters + ---------- + values : np.ndarray + + Returns + ------- + htable : HashTable subclass + values : ndarray + """ + values = _ensure_data(values) + + ndtype = _check_object_for_strings(values) + htable = _hashtables[ndtype] + return htable, values + + +def _get_values_for_rank(values: ArrayLike) -> np.ndarray: + + values = _ensure_data(values) + if values.dtype.kind in ["i", "u", "f"]: + # rank_t includes only object, int64, uint64, float64 + dtype = values.dtype.kind + "8" + values = values.astype(dtype, copy=False) + return values + + +def _get_data_algo(values: ArrayLike): + values = _get_values_for_rank(values) + + ndtype = _check_object_for_strings(values) + htable = _hashtables.get(ndtype, _hashtables["object"]) + + return htable, values + + +def _check_object_for_strings(values: np.ndarray) -> str: + """ + Check if we can use string hashtable instead of object hashtable. + + Parameters + ---------- + values : ndarray + + Returns + ------- + str + """ + ndtype = values.dtype.name + if ndtype == "object": + + # it's cheaper to use a String Hash Table than Object; we infer + # including nulls because that is the only difference between + # StringHashTable and ObjectHashtable + if lib.infer_dtype(values, skipna=False) in ["string"]: + ndtype = "string" + return ndtype + + +# --------------- # +# top-level algos # +# --------------- # + + +def unique(values): + """ + Return unique values based on a hash table. + + Uniques are returned in order of appearance. This does NOT sort. + + Significantly faster than numpy.unique for long enough sequences. + Includes NA values. + + Parameters + ---------- + values : 1d array-like + + Returns + ------- + numpy.ndarray or ExtensionArray + + The return can be: + + * Index : when the input is an Index + * Categorical : when the input is a Categorical dtype + * ndarray : when the input is a Series/ndarray + + Return numpy.ndarray or ExtensionArray. + + See Also + -------- + Index.unique : Return unique values from an Index. + Series.unique : Return unique values of Series object. + + Examples + -------- + >>> pd.unique(pd.Series([2, 1, 3, 3])) + array([2, 1, 3]) + + >>> pd.unique(pd.Series([2] + [1] * 5)) + array([2, 1]) + + >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])) + array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]') + + >>> pd.unique( + ... pd.Series( + ... [ + ... pd.Timestamp("20160101", tz="US/Eastern"), + ... pd.Timestamp("20160101", tz="US/Eastern"), + ... ] + ... ) + ... ) + + ['2016-01-01 00:00:00-05:00'] + Length: 1, dtype: datetime64[ns, US/Eastern] + + >>> pd.unique( + ... pd.Index( + ... [ + ... pd.Timestamp("20160101", tz="US/Eastern"), + ... pd.Timestamp("20160101", tz="US/Eastern"), + ... ] + ... ) + ... ) + DatetimeIndex(['2016-01-01 00:00:00-05:00'], + dtype='datetime64[ns, US/Eastern]', + freq=None) + + >>> pd.unique(list("baabc")) + array(['b', 'a', 'c'], dtype=object) + + An unordered Categorical will return categories in the + order of appearance. + + >>> pd.unique(pd.Series(pd.Categorical(list("baabc")))) + ['b', 'a', 'c'] + Categories (3, object): ['a', 'b', 'c'] + + >>> pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc")))) + ['b', 'a', 'c'] + Categories (3, object): ['a', 'b', 'c'] + + An ordered Categorical preserves the category ordering. + + >>> pd.unique( + ... pd.Series( + ... pd.Categorical(list("baabc"), categories=list("abc"), ordered=True) + ... ) + ... ) + ['b', 'a', 'c'] + Categories (3, object): ['a' < 'b' < 'c'] + + An array of tuples + + >>> pd.unique([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]) + array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object) + """ + values = _ensure_arraylike(values) + + if is_extension_array_dtype(values.dtype): + # Dispatch to extension dtype's unique. + return values.unique() + + original = values + htable, values = _get_hashtable_algo(values) + + table = htable(len(values)) + uniques = table.unique(values) + uniques = _reconstruct_data(uniques, original.dtype, original) + return uniques + + +unique1d = unique + + +def isin(comps: AnyArrayLike, values: AnyArrayLike) -> npt.NDArray[np.bool_]: + """ + Compute the isin boolean array. + + Parameters + ---------- + comps : array-like + values : array-like + + Returns + ------- + ndarray[bool] + Same length as `comps`. + """ + if not is_list_like(comps): + raise TypeError( + "only list-like objects are allowed to be passed " + f"to isin(), you passed a [{type(comps).__name__}]" + ) + if not is_list_like(values): + raise TypeError( + "only list-like objects are allowed to be passed " + f"to isin(), you passed a [{type(values).__name__}]" + ) + + if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)): + values = _ensure_arraylike(list(values)) + elif isinstance(values, ABCMultiIndex): + # Avoid raising in extract_array + values = np.array(values) + else: + values = extract_array(values, extract_numpy=True, extract_range=True) + + comps = _ensure_arraylike(comps) + comps = extract_array(comps, extract_numpy=True) + if not isinstance(comps, np.ndarray): + # i.e. Extension Array + return comps.isin(values) + + elif needs_i8_conversion(comps.dtype): + # Dispatch to DatetimeLikeArrayMixin.isin + return pd_array(comps).isin(values) + elif needs_i8_conversion(values.dtype) and not is_object_dtype(comps.dtype): + # e.g. comps are integers and values are datetime64s + return np.zeros(comps.shape, dtype=bool) + # TODO: not quite right ... Sparse/Categorical + elif needs_i8_conversion(values.dtype): + return isin(comps, values.astype(object)) + + elif isinstance(values.dtype, ExtensionDtype): + return isin(np.asarray(comps), np.asarray(values)) + + # GH16012 + # Ensure np.in1d doesn't get object types or it *may* throw an exception + # Albeit hashmap has O(1) look-up (vs. O(logn) in sorted array), + # in1d is faster for small sizes + if len(comps) > 1_000_000 and len(values) <= 26 and not is_object_dtype(comps): + # If the values include nan we need to check for nan explicitly + # since np.nan it not equal to np.nan + if isna(values).any(): + + def f(c, v): + return np.logical_or(np.in1d(c, v), np.isnan(c)) + + else: + f = np.in1d + + else: + common = np.find_common_type([values.dtype, comps.dtype], []) + values = values.astype(common, copy=False) + comps = comps.astype(common, copy=False) + f = htable.ismember + + return f(comps, values) + + +def factorize_array( + values: np.ndarray, + na_sentinel: int = -1, + size_hint: int | None = None, + na_value=None, + mask: np.ndarray | None = None, +) -> tuple[npt.NDArray[np.intp], np.ndarray]: + """ + Factorize a numpy array to codes and uniques. + + This doesn't do any coercion of types or unboxing before factorization. + + Parameters + ---------- + values : ndarray + na_sentinel : int, default -1 + size_hint : int, optional + Passed through to the hashtable's 'get_labels' method + na_value : object, optional + A value in `values` to consider missing. Note: only use this + parameter when you know that you don't have any values pandas would + consider missing in the array (NaN for float data, iNaT for + datetimes, etc.). + mask : ndarray[bool], optional + If not None, the mask is used as indicator for missing values + (True = missing, False = valid) instead of `na_value` or + condition "val != val". + + Returns + ------- + codes : ndarray[np.intp] + uniques : ndarray + """ + hash_klass, values = _get_data_algo(values) + + table = hash_klass(size_hint or len(values)) + uniques, codes = table.factorize( + values, na_sentinel=na_sentinel, na_value=na_value, mask=mask + ) + + codes = ensure_platform_int(codes) + return codes, uniques + + +@doc( + values=dedent( + """\ + values : sequence + A 1-D sequence. Sequences that aren't pandas objects are + coerced to ndarrays before factorization. + """ + ), + sort=dedent( + """\ + sort : bool, default False + Sort `uniques` and shuffle `codes` to maintain the + relationship. + """ + ), + size_hint=dedent( + """\ + size_hint : int, optional + Hint to the hashtable sizer. + """ + ), +) +def factorize( + values, + sort: bool = False, + na_sentinel: int | None = -1, + size_hint: int | None = None, +) -> tuple[np.ndarray, np.ndarray | Index]: + """ + Encode the object as an enumerated type or categorical variable. + + This method is useful for obtaining a numeric representation of an + array when all that matters is identifying distinct values. `factorize` + is available as both a top-level function :func:`pandas.factorize`, + and as a method :meth:`Series.factorize` and :meth:`Index.factorize`. + + Parameters + ---------- + {values}{sort} + na_sentinel : int or None, default -1 + Value to mark "not found". If None, will not drop the NaN + from the uniques of the values. + + .. versionchanged:: 1.1.2 + {size_hint}\ + + Returns + ------- + codes : ndarray + An integer ndarray that's an indexer into `uniques`. + ``uniques.take(codes)`` will have the same values as `values`. + uniques : ndarray, Index, or Categorical + The unique valid values. When `values` is Categorical, `uniques` + is a Categorical. When `values` is some other pandas object, an + `Index` is returned. Otherwise, a 1-D ndarray is returned. + + .. note:: + + Even if there's a missing value in `values`, `uniques` will + *not* contain an entry for it. + + See Also + -------- + cut : Discretize continuous-valued array. + unique : Find the unique value in an array. + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + These examples all show factorize as a top-level method like + ``pd.factorize(values)``. The results are identical for methods like + :meth:`Series.factorize`. + + >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b']) + >>> codes + array([0, 0, 1, 2, 0]...) + >>> uniques + array(['b', 'a', 'c'], dtype=object) + + With ``sort=True``, the `uniques` will be sorted, and `codes` will be + shuffled so that the relationship is the maintained. + + >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'], sort=True) + >>> codes + array([1, 1, 0, 2, 1]...) + >>> uniques + array(['a', 'b', 'c'], dtype=object) + + Missing values are indicated in `codes` with `na_sentinel` + (``-1`` by default). Note that missing values are never + included in `uniques`. + + >>> codes, uniques = pd.factorize(['b', None, 'a', 'c', 'b']) + >>> codes + array([ 0, -1, 1, 2, 0]...) + >>> uniques + array(['b', 'a', 'c'], dtype=object) + + Thus far, we've only factorized lists (which are internally coerced to + NumPy arrays). When factorizing pandas objects, the type of `uniques` + will differ. For Categoricals, a `Categorical` is returned. + + >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c']) + >>> codes, uniques = pd.factorize(cat) + >>> codes + array([0, 0, 1]...) + >>> uniques + ['a', 'c'] + Categories (3, object): ['a', 'b', 'c'] + + Notice that ``'b'`` is in ``uniques.categories``, despite not being + present in ``cat.values``. + + For all other pandas objects, an Index of the appropriate type is + returned. + + >>> cat = pd.Series(['a', 'a', 'c']) + >>> codes, uniques = pd.factorize(cat) + >>> codes + array([0, 0, 1]...) + >>> uniques + Index(['a', 'c'], dtype='object') + + If NaN is in the values, and we want to include NaN in the uniques of the + values, it can be achieved by setting ``na_sentinel=None``. + + >>> values = np.array([1, 2, 1, np.nan]) + >>> codes, uniques = pd.factorize(values) # default: na_sentinel=-1 + >>> codes + array([ 0, 1, 0, -1]) + >>> uniques + array([1., 2.]) + + >>> codes, uniques = pd.factorize(values, na_sentinel=None) + >>> codes + array([0, 1, 0, 2]) + >>> uniques + array([ 1., 2., nan]) + """ + # Implementation notes: This method is responsible for 3 things + # 1.) coercing data to array-like (ndarray, Index, extension array) + # 2.) factorizing codes and uniques + # 3.) Maybe boxing the uniques in an Index + # + # Step 2 is dispatched to extension types (like Categorical). They are + # responsible only for factorization. All data coercion, sorting and boxing + # should happen here. + + if isinstance(values, ABCRangeIndex): + return values.factorize(sort=sort) + + values = _ensure_arraylike(values) + original = values + if not isinstance(values, ABCMultiIndex): + values = extract_array(values, extract_numpy=True) + + # GH35667, if na_sentinel=None, we will not dropna NaNs from the uniques + # of values, assign na_sentinel=-1 to replace code value for NaN. + dropna = True + if na_sentinel is None: + na_sentinel = -1 + dropna = False + + if ( + isinstance(values, (ABCDatetimeArray, ABCTimedeltaArray)) + and values.freq is not None + ): + codes, uniques = values.factorize(sort=sort) + if isinstance(original, ABCIndex): + uniques = original._shallow_copy(uniques, name=None) + elif isinstance(original, ABCSeries): + from pandas import Index + + uniques = Index(uniques) + return codes, uniques + + if not isinstance(values.dtype, np.dtype): + # i.e. ExtensionDtype + codes, uniques = values.factorize(na_sentinel=na_sentinel) + dtype = original.dtype + else: + dtype = values.dtype + values = _ensure_data(values) + na_value: Scalar + + if original.dtype.kind in ["m", "M"]: + # Note: factorize_array will cast NaT bc it has a __int__ + # method, but will not cast the more-correct dtype.type("nat") + na_value = iNaT + else: + na_value = None + + codes, uniques = factorize_array( + values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value + ) + + if sort and len(uniques) > 0: + uniques, codes = safe_sort( + uniques, codes, na_sentinel=na_sentinel, assume_unique=True, verify=False + ) + + code_is_na = codes == na_sentinel + if not dropna and code_is_na.any(): + # na_value is set based on the dtype of uniques, and compat set to False is + # because we do not want na_value to be 0 for integers + na_value = na_value_for_dtype(uniques.dtype, compat=False) + # Argument 2 to "append" has incompatible type "List[Union[str, float, Period, + # Timestamp, Timedelta, Any]]"; expected "Union[_SupportsArray[dtype[Any]], + # _NestedSequence[_SupportsArray[dtype[Any]]] + # , bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, + # float, complex, str, bytes]]]" [arg-type] + uniques = np.append(uniques, [na_value]) # type: ignore[arg-type] + codes = np.where(code_is_na, len(uniques) - 1, codes) + + uniques = _reconstruct_data(uniques, dtype, original) + + # return original tenor + if isinstance(original, ABCIndex): + if original.dtype.kind in ["m", "M"] and isinstance(uniques, np.ndarray): + original._data = cast( + "Union[DatetimeArray, TimedeltaArray]", original._data + ) + uniques = type(original._data)._simple_new(uniques, dtype=original.dtype) + uniques = original._shallow_copy(uniques, name=None) + elif isinstance(original, ABCSeries): + from pandas import Index + + uniques = Index(uniques) + + return codes, uniques + + +def value_counts( + values, + sort: bool = True, + ascending: bool = False, + normalize: bool = False, + bins=None, + dropna: bool = True, +) -> Series: + """ + Compute a histogram of the counts of non-null values. + + Parameters + ---------- + values : ndarray (1-d) + sort : bool, default True + Sort by values + ascending : bool, default False + Sort in ascending order + normalize: bool, default False + If True then compute a relative histogram + bins : integer, optional + Rather than count values, group them into half-open bins, + convenience for pd.cut, only works with numeric data + dropna : bool, default True + Don't include counts of NaN + + Returns + ------- + Series + """ + from pandas.core.series import Series + + name = getattr(values, "name", None) + + if bins is not None: + from pandas.core.reshape.tile import cut + + values = Series(values) + try: + ii = cut(values, bins, include_lowest=True) + except TypeError as err: + raise TypeError("bins argument only works with numeric data.") from err + + # count, remove nulls (from the index), and but the bins + result = ii.value_counts(dropna=dropna) + result = result[result.index.notna()] + result.index = result.index.astype("interval") + result = result.sort_index() + + # if we are dropna and we have NO values + if dropna and (result._values == 0).all(): + result = result.iloc[0:0] + + # normalizing is by len of all (regardless of dropna) + counts = np.array([len(ii)]) + + else: + + if is_extension_array_dtype(values): + + # handle Categorical and sparse, + result = Series(values)._values.value_counts(dropna=dropna) + result.name = name + counts = result._values + + else: + keys, counts = value_counts_arraylike(values, dropna) + + result = Series(counts, index=keys, name=name) + + if sort: + result = result.sort_values(ascending=ascending) + + if normalize: + result = result / counts.sum() + + return result + + +# Called once from SparseArray, otherwise could be private +def value_counts_arraylike(values, dropna: bool): + """ + Parameters + ---------- + values : arraylike + dropna : bool + + Returns + ------- + uniques : np.ndarray or ExtensionArray + counts : np.ndarray + """ + values = _ensure_arraylike(values) + original = values + values = _ensure_data(values) + + keys, counts = htable.value_count(values, dropna) + + if needs_i8_conversion(original.dtype): + # datetime, timedelta, or period + + if dropna: + msk = keys != iNaT + keys, counts = keys[msk], counts[msk] + + res_keys = _reconstruct_data(keys, original.dtype, original) + return res_keys, counts + + +def duplicated( + values: ArrayLike, keep: Literal["first", "last", False] = "first" +) -> npt.NDArray[np.bool_]: + """ + Return boolean ndarray denoting duplicate values. + + Parameters + ---------- + values : nd.array, ExtensionArray or Series + Array over which to check for duplicate values. + keep : {'first', 'last', False}, default 'first' + - ``first`` : Mark duplicates as ``True`` except for the first + occurrence. + - ``last`` : Mark duplicates as ``True`` except for the last + occurrence. + - False : Mark all duplicates as ``True``. + + Returns + ------- + duplicated : ndarray[bool] + """ + values = _ensure_data(values) + return htable.duplicated(values, keep=keep) + + +def mode(values: ArrayLike, dropna: bool = True) -> ArrayLike: + """ + Returns the mode(s) of an array. + + Parameters + ---------- + values : array-like + Array over which to check for duplicate values. + dropna : bool, default True + Don't consider counts of NaN/NaT. + + Returns + ------- + np.ndarray or ExtensionArray + """ + values = _ensure_arraylike(values) + original = values + + if needs_i8_conversion(values.dtype): + # Got here with ndarray; dispatch to DatetimeArray/TimedeltaArray. + values = ensure_wrapped_if_datetimelike(values) + # error: Item "ndarray[Any, Any]" of "Union[ExtensionArray, + # ndarray[Any, Any]]" has no attribute "_mode" + return values._mode(dropna=dropna) # type: ignore[union-attr] + + values = _ensure_data(values) + + npresult = htable.mode(values, dropna=dropna) + try: + npresult = np.sort(npresult) + except TypeError as err: + warn(f"Unable to sort modes: {err}") + + result = _reconstruct_data(npresult, original.dtype, original) + return result + + +def rank( + values: ArrayLike, + axis: int = 0, + method: str = "average", + na_option: str = "keep", + ascending: bool = True, + pct: bool = False, +) -> npt.NDArray[np.float64]: + """ + Rank the values along a given axis. + + Parameters + ---------- + values : np.ndarray or ExtensionArray + Array whose values will be ranked. The number of dimensions in this + array must not exceed 2. + axis : int, default 0 + Axis over which to perform rankings. + method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' + The method by which tiebreaks are broken during the ranking. + na_option : {'keep', 'top'}, default 'keep' + The method by which NaNs are placed in the ranking. + - ``keep``: rank each NaN value with a NaN ranking + - ``top``: replace each NaN with either +/- inf so that they + there are ranked at the top + ascending : bool, default True + Whether or not the elements should be ranked in ascending order. + pct : bool, default False + Whether or not to the display the returned rankings in integer form + (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1). + """ + is_datetimelike = needs_i8_conversion(values.dtype) + values = _get_values_for_rank(values) + if values.ndim == 1: + ranks = algos.rank_1d( + values, + is_datetimelike=is_datetimelike, + ties_method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + elif values.ndim == 2: + ranks = algos.rank_2d( + values, + axis=axis, + is_datetimelike=is_datetimelike, + ties_method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + else: + raise TypeError("Array with ndim > 2 are not supported.") + + return ranks + + +def checked_add_with_arr( + arr: np.ndarray, + b, + arr_mask: npt.NDArray[np.bool_] | None = None, + b_mask: npt.NDArray[np.bool_] | None = None, +) -> np.ndarray: + """ + Perform array addition that checks for underflow and overflow. + + Performs the addition of an int64 array and an int64 integer (or array) + but checks that they do not result in overflow first. For elements that + are indicated to be NaN, whether or not there is overflow for that element + is automatically ignored. + + Parameters + ---------- + arr : array addend. + b : array or scalar addend. + arr_mask : np.ndarray[bool] or None, default None + array indicating which elements to exclude from checking + b_mask : np.ndarray[bool] or None, default None + array or scalar indicating which element(s) to exclude from checking + + Returns + ------- + sum : An array for elements x + b for each element x in arr if b is + a scalar or an array for elements x + y for each element pair + (x, y) in (arr, b). + + Raises + ------ + OverflowError if any x + y exceeds the maximum or minimum int64 value. + """ + # For performance reasons, we broadcast 'b' to the new array 'b2' + # so that it has the same size as 'arr'. + b2 = np.broadcast_to(b, arr.shape) + if b_mask is not None: + # We do the same broadcasting for b_mask as well. + b2_mask = np.broadcast_to(b_mask, arr.shape) + else: + b2_mask = None + + # For elements that are NaN, regardless of their value, we should + # ignore whether they overflow or not when doing the checked add. + if arr_mask is not None and b2_mask is not None: + not_nan = np.logical_not(arr_mask | b2_mask) + elif arr_mask is not None: + not_nan = np.logical_not(arr_mask) + elif b_mask is not None: + # Argument 1 to "__call__" of "_UFunc_Nin1_Nout1" has incompatible type + # "Optional[ndarray[Any, dtype[bool_]]]"; expected + # "Union[_SupportsArray[dtype[Any]], _NestedSequence[_SupportsArray[dtype[An + # y]]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, + # int, float, complex, str, bytes]]]" [arg-type] + not_nan = np.logical_not(b2_mask) # type: ignore[arg-type] + else: + not_nan = np.empty(arr.shape, dtype=bool) + not_nan.fill(True) + + # gh-14324: For each element in 'arr' and its corresponding element + # in 'b2', we check the sign of the element in 'b2'. If it is positive, + # we then check whether its sum with the element in 'arr' exceeds + # np.iinfo(np.int64).max. If so, we have an overflow error. If it + # it is negative, we then check whether its sum with the element in + # 'arr' exceeds np.iinfo(np.int64).min. If so, we have an overflow + # error as well. + i8max = lib.i8max + i8min = iNaT + + mask1 = b2 > 0 + mask2 = b2 < 0 + + if not mask1.any(): + to_raise = ((i8min - b2 > arr) & not_nan).any() + elif not mask2.any(): + to_raise = ((i8max - b2 < arr) & not_nan).any() + else: + to_raise = ((i8max - b2[mask1] < arr[mask1]) & not_nan[mask1]).any() or ( + (i8min - b2[mask2] > arr[mask2]) & not_nan[mask2] + ).any() + + if to_raise: + raise OverflowError("Overflow in int64 addition") + return arr + b + + +# --------------- # +# select n # +# --------------- # + + +class SelectN: + def __init__(self, obj, n: int, keep: str): + self.obj = obj + self.n = n + self.keep = keep + + if self.keep not in ("first", "last", "all"): + raise ValueError('keep must be either "first", "last" or "all"') + + def compute(self, method: str) -> DataFrame | Series: + raise NotImplementedError + + @final + def nlargest(self): + return self.compute("nlargest") + + @final + def nsmallest(self): + return self.compute("nsmallest") + + @final + @staticmethod + def is_valid_dtype_n_method(dtype: DtypeObj) -> bool: + """ + Helper function to determine if dtype is valid for + nsmallest/nlargest methods + """ + return ( + is_numeric_dtype(dtype) and not is_complex_dtype(dtype) + ) or needs_i8_conversion(dtype) + + +class SelectNSeries(SelectN): + """ + Implement n largest/smallest for Series + + Parameters + ---------- + obj : Series + n : int + keep : {'first', 'last'}, default 'first' + + Returns + ------- + nordered : Series + """ + + def compute(self, method: str) -> Series: + + from pandas.core.reshape.concat import concat + + n = self.n + dtype = self.obj.dtype + if not self.is_valid_dtype_n_method(dtype): + raise TypeError(f"Cannot use method '{method}' with dtype {dtype}") + + if n <= 0: + return self.obj[[]] + + dropped = self.obj.dropna() + nan_index = self.obj.drop(dropped.index) + + if is_extension_array_dtype(dropped.dtype): + # GH#41816 bc we have dropped NAs above, MaskedArrays can use the + # numpy logic. + from pandas.core.arrays import BaseMaskedArray + + arr = dropped._values + if isinstance(arr, BaseMaskedArray): + ser = type(dropped)(arr._data, index=dropped.index, name=dropped.name) + + result = type(self)(ser, n=self.n, keep=self.keep).compute(method) + return result.astype(arr.dtype) + + # slow method + if n >= len(self.obj): + ascending = method == "nsmallest" + return self.obj.sort_values(ascending=ascending).head(n) + + # fast method + new_dtype = dropped.dtype + arr = _ensure_data(dropped.values) + if method == "nlargest": + arr = -arr + if is_integer_dtype(new_dtype): + # GH 21426: ensure reverse ordering at boundaries + arr -= 1 + + elif is_bool_dtype(new_dtype): + # GH 26154: ensure False is smaller than True + arr = 1 - (-arr) + + if self.keep == "last": + arr = arr[::-1] + + nbase = n + findex = len(self.obj) + narr = len(arr) + n = min(n, narr) + + # arr passed into kth_smallest must be contiguous. We copy + # here because kth_smallest will modify its input + kth_val = algos.kth_smallest(arr.copy(order="C"), n - 1) + (ns,) = np.nonzero(arr <= kth_val) + inds = ns[arr[ns].argsort(kind="mergesort")] + + if self.keep != "all": + inds = inds[:n] + findex = nbase + + if self.keep == "last": + # reverse indices + inds = narr - 1 - inds + + return concat([dropped.iloc[inds], nan_index]).iloc[:findex] + + +class SelectNFrame(SelectN): + """ + Implement n largest/smallest for DataFrame + + Parameters + ---------- + obj : DataFrame + n : int + keep : {'first', 'last'}, default 'first' + columns : list or str + + Returns + ------- + nordered : DataFrame + """ + + def __init__(self, obj: DataFrame, n: int, keep: str, columns: IndexLabel): + super().__init__(obj, n, keep) + if not is_list_like(columns) or isinstance(columns, tuple): + columns = [columns] + + columns = cast(Sequence[Hashable], columns) + columns = list(columns) + self.columns = columns + + def compute(self, method: str) -> DataFrame: + + from pandas.core.api import Int64Index + + n = self.n + frame = self.obj + columns = self.columns + + for column in columns: + dtype = frame[column].dtype + if not self.is_valid_dtype_n_method(dtype): + raise TypeError( + f"Column {repr(column)} has dtype {dtype}, " + f"cannot use method {repr(method)} with this dtype" + ) + + def get_indexer(current_indexer, other_indexer): + """ + Helper function to concat `current_indexer` and `other_indexer` + depending on `method` + """ + if method == "nsmallest": + return current_indexer.append(other_indexer) + else: + return other_indexer.append(current_indexer) + + # Below we save and reset the index in case index contains duplicates + original_index = frame.index + cur_frame = frame = frame.reset_index(drop=True) + cur_n = n + indexer = Int64Index([]) + + for i, column in enumerate(columns): + # For each column we apply method to cur_frame[column]. + # If it's the last column or if we have the number of + # results desired we are done. + # Otherwise there are duplicates of the largest/smallest + # value and we need to look at the rest of the columns + # to determine which of the rows with the largest/smallest + # value in the column to keep. + series = cur_frame[column] + is_last_column = len(columns) - 1 == i + values = getattr(series, method)( + cur_n, keep=self.keep if is_last_column else "all" + ) + + if is_last_column or len(values) <= cur_n: + indexer = get_indexer(indexer, values.index) + break + + # Now find all values which are equal to + # the (nsmallest: largest)/(nlargest: smallest) + # from our series. + border_value = values == values[values.index[-1]] + + # Some of these values are among the top-n + # some aren't. + unsafe_values = values[border_value] + + # These values are definitely among the top-n + safe_values = values[~border_value] + indexer = get_indexer(indexer, safe_values.index) + + # Go on and separate the unsafe_values on the remaining + # columns. + cur_frame = cur_frame.loc[unsafe_values.index] + cur_n = n - len(indexer) + + frame = frame.take(indexer) + + # Restore the index on frame + frame.index = original_index.take(indexer) + + # If there is only one column, the frame is already sorted. + if len(columns) == 1: + return frame + + ascending = method == "nsmallest" + + return frame.sort_values(columns, ascending=ascending, kind="mergesort") + + +# ---- # +# take # +# ---- # + + +def take( + arr, + indices: TakeIndexer, + axis: int = 0, + allow_fill: bool = False, + fill_value=None, +): + """ + Take elements from an array. + + Parameters + ---------- + arr : array-like or scalar value + Non array-likes (sequences/scalars without a dtype) are coerced + to an ndarray. + indices : sequence of int or one-dimensional np.ndarray of int + Indices to be taken. + axis : int, default 0 + The axis over which to select values. + allow_fill : bool, default False + How to handle negative values in `indices`. + + * False: negative values in `indices` indicate positional indices + from the right (the default). This is similar to :func:`numpy.take`. + + * True: negative values in `indices` indicate + missing values. These values are set to `fill_value`. Any other + negative values raise a ``ValueError``. + + fill_value : any, optional + Fill value to use for NA-indices when `allow_fill` is True. + This may be ``None``, in which case the default NA value for + the type (``self.dtype.na_value``) is used. + + For multi-dimensional `arr`, each *element* is filled with + `fill_value`. + + Returns + ------- + ndarray or ExtensionArray + Same type as the input. + + Raises + ------ + IndexError + When `indices` is out of bounds for the array. + ValueError + When the indexer contains negative values other than ``-1`` + and `allow_fill` is True. + + Notes + ----- + When `allow_fill` is False, `indices` may be whatever dimensionality + is accepted by NumPy for `arr`. + + When `allow_fill` is True, `indices` should be 1-D. + + See Also + -------- + numpy.take : Take elements from an array along an axis. + + Examples + -------- + >>> from pandas.api.extensions import take + + With the default ``allow_fill=False``, negative numbers indicate + positional indices from the right. + + >>> take(np.array([10, 20, 30]), [0, 0, -1]) + array([10, 10, 30]) + + Setting ``allow_fill=True`` will place `fill_value` in those positions. + + >>> take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True) + array([10., 10., nan]) + + >>> take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True, + ... fill_value=-10) + array([ 10, 10, -10]) + """ + if not is_array_like(arr): + arr = np.asarray(arr) + + indices = np.asarray(indices, dtype=np.intp) + + if allow_fill: + # Pandas style, -1 means NA + validate_indices(indices, arr.shape[axis]) + result = take_nd( + arr, indices, axis=axis, allow_fill=True, fill_value=fill_value + ) + else: + # NumPy style + result = arr.take(indices, axis=axis) + return result + + +# ------------ # +# searchsorted # +# ------------ # + + +def searchsorted( + arr: ArrayLike, + value: NumpyValueArrayLike | ExtensionArray, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, +) -> npt.NDArray[np.intp] | np.intp: + """ + Find indices where elements should be inserted to maintain order. + + .. versionadded:: 0.25.0 + + Find the indices into a sorted array `arr` (a) such that, if the + corresponding elements in `value` were inserted before the indices, + the order of `arr` would be preserved. + + Assuming that `arr` is sorted: + + ====== ================================ + `side` returned index `i` satisfies + ====== ================================ + left ``arr[i-1] < value <= self[i]`` + right ``arr[i-1] <= value < self[i]`` + ====== ================================ + + Parameters + ---------- + arr: np.ndarray, ExtensionArray, Series + Input array. If `sorter` is None, then it must be sorted in + ascending order, otherwise `sorter` must be an array of indices + that sort it. + value : array-like or scalar + Values to insert into `arr`. + side : {'left', 'right'}, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable + index, return either 0 or N (where N is the length of `self`). + sorter : 1-D array-like, optional + Optional array of integer indices that sort array a into ascending + order. They are typically the result of argsort. + + Returns + ------- + array of ints or int + If value is array-like, array of insertion points. + If value is scalar, a single integer. + + See Also + -------- + numpy.searchsorted : Similar method from NumPy. + """ + if sorter is not None: + sorter = ensure_platform_int(sorter) + + if ( + isinstance(arr, np.ndarray) + and is_integer_dtype(arr.dtype) + and (is_integer(value) or is_integer_dtype(value)) + ): + # if `arr` and `value` have different dtypes, `arr` would be + # recast by numpy, causing a slow search. + # Before searching below, we therefore try to give `value` the + # same dtype as `arr`, while guarding against integer overflows. + iinfo = np.iinfo(arr.dtype.type) + value_arr = np.array([value]) if is_scalar(value) else np.array(value) + if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all(): + # value within bounds, so no overflow, so can convert value dtype + # to dtype of arr + dtype = arr.dtype + else: + dtype = value_arr.dtype + + if is_scalar(value): + # We know that value is int + value = cast(int, dtype.type(value)) + else: + value = pd_array(cast(ArrayLike, value), dtype=dtype) + else: + # E.g. if `arr` is an array with dtype='datetime64[ns]' + # and `value` is a pd.Timestamp, we may need to convert value + arr = ensure_wrapped_if_datetimelike(arr) + + # Argument 1 to "searchsorted" of "ndarray" has incompatible type + # "Union[NumpyValueArrayLike, ExtensionArray]"; expected "NumpyValueArrayLike" + return arr.searchsorted(value, side=side, sorter=sorter) # type: ignore[arg-type] + + +# ---- # +# diff # +# ---- # + +_diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"} + + +def diff(arr, n: int, axis: int = 0): + """ + difference of n between self, + analogous to s-s.shift(n) + + Parameters + ---------- + arr : ndarray or ExtensionArray + n : int + number of periods + axis : {0, 1} + axis to shift on + stacklevel : int, default 3 + The stacklevel for the lost dtype warning. + + Returns + ------- + shifted + """ + + n = int(n) + na = np.nan + dtype = arr.dtype + + is_bool = is_bool_dtype(dtype) + if is_bool: + op = operator.xor + else: + op = operator.sub + + if isinstance(dtype, PandasDtype): + # PandasArray cannot necessarily hold shifted versions of itself. + arr = arr.to_numpy() + dtype = arr.dtype + + if not isinstance(dtype, np.dtype): + # i.e ExtensionDtype + if hasattr(arr, f"__{op.__name__}__"): + if axis != 0: + raise ValueError(f"cannot diff {type(arr).__name__} on axis={axis}") + return op(arr, arr.shift(n)) + else: + warn( + "dtype lost in 'diff()'. In the future this will raise a " + "TypeError. Convert to a suitable dtype prior to calling 'diff'.", + FutureWarning, + stacklevel=find_stack_level(), + ) + arr = np.asarray(arr) + dtype = arr.dtype + + is_timedelta = False + if needs_i8_conversion(arr.dtype): + dtype = np.int64 + arr = arr.view("i8") + na = iNaT + is_timedelta = True + + elif is_bool: + # We have to cast in order to be able to hold np.nan + dtype = np.object_ + + elif is_integer_dtype(dtype): + # We have to cast in order to be able to hold np.nan + + # int8, int16 are incompatible with float64, + # see https://github.com/cython/cython/issues/2646 + if arr.dtype.name in ["int8", "int16"]: + dtype = np.float32 + else: + dtype = np.float64 + + orig_ndim = arr.ndim + if orig_ndim == 1: + # reshape so we can always use algos.diff_2d + arr = arr.reshape(-1, 1) + # TODO: require axis == 0 + + dtype = np.dtype(dtype) + out_arr = np.empty(arr.shape, dtype=dtype) + + na_indexer = [slice(None)] * 2 + na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None) + out_arr[tuple(na_indexer)] = na + + if arr.dtype.name in _diff_special: + # TODO: can diff_2d dtype specialization troubles be fixed by defining + # out_arr inside diff_2d? + algos.diff_2d(arr, out_arr, n, axis, datetimelike=is_timedelta) + else: + # To keep mypy happy, _res_indexer is a list while res_indexer is + # a tuple, ditto for lag_indexer. + _res_indexer = [slice(None)] * 2 + _res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n) + res_indexer = tuple(_res_indexer) + + _lag_indexer = [slice(None)] * 2 + _lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None) + lag_indexer = tuple(_lag_indexer) + + out_arr[res_indexer] = op(arr[res_indexer], arr[lag_indexer]) + + if is_timedelta: + out_arr = out_arr.view("timedelta64[ns]") + + if orig_ndim == 1: + out_arr = out_arr[:, 0] + return out_arr + + +# -------------------------------------------------------------------- +# Helper functions + +# Note: safe_sort is in algorithms.py instead of sorting.py because it is +# low-dependency, is used in this module, and used private methods from +# this module. +def safe_sort( + values, + codes=None, + na_sentinel: int = -1, + assume_unique: bool = False, + verify: bool = True, +) -> np.ndarray | tuple[np.ndarray, np.ndarray]: + """ + Sort ``values`` and reorder corresponding ``codes``. + + ``values`` should be unique if ``codes`` is not None. + Safe for use with mixed types (int, str), orders ints before strs. + + Parameters + ---------- + values : list-like + Sequence; must be unique if ``codes`` is not None. + codes : list_like, optional + Indices to ``values``. All out of bound indices are treated as + "not found" and will be masked with ``na_sentinel``. + na_sentinel : int, default -1 + Value in ``codes`` to mark "not found". + Ignored when ``codes`` is None. + assume_unique : bool, default False + When True, ``values`` are assumed to be unique, which can speed up + the calculation. Ignored when ``codes`` is None. + verify : bool, default True + Check if codes are out of bound for the values and put out of bound + codes equal to na_sentinel. If ``verify=False``, it is assumed there + are no out of bound codes. Ignored when ``codes`` is None. + + .. versionadded:: 0.25.0 + + Returns + ------- + ordered : ndarray + Sorted ``values`` + new_codes : ndarray + Reordered ``codes``; returned when ``codes`` is not None. + + Raises + ------ + TypeError + * If ``values`` is not list-like or if ``codes`` is neither None + nor list-like + * If ``values`` cannot be sorted + ValueError + * If ``codes`` is not None and ``values`` contain duplicates. + """ + if not is_list_like(values): + raise TypeError( + "Only list-like objects are allowed to be passed to safe_sort as values" + ) + + if not isinstance(values, (np.ndarray, ABCExtensionArray)): + # don't convert to string types + dtype, _ = infer_dtype_from_array(values) + # error: Argument "dtype" to "asarray" has incompatible type "Union[dtype[Any], + # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, + # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], + # _DTypeDict, Tuple[Any, Any]]]" + values = np.asarray(values, dtype=dtype) # type: ignore[arg-type] + + sorter = None + + if ( + not is_extension_array_dtype(values) + and lib.infer_dtype(values, skipna=False) == "mixed-integer" + ): + ordered = _sort_mixed(values) + else: + try: + sorter = values.argsort() + ordered = values.take(sorter) + except TypeError: + # Previous sorters failed or were not applicable, try `_sort_mixed` + # which would work, but which fails for special case of 1d arrays + # with tuples. + if values.size and isinstance(values[0], tuple): + ordered = _sort_tuples(values) + else: + ordered = _sort_mixed(values) + + # codes: + + if codes is None: + return ordered + + if not is_list_like(codes): + raise TypeError( + "Only list-like objects or None are allowed to " + "be passed to safe_sort as codes" + ) + codes = ensure_platform_int(np.asarray(codes)) + + if not assume_unique and not len(unique(values)) == len(values): + raise ValueError("values should be unique if codes is not None") + + if sorter is None: + # mixed types + hash_klass, values = _get_data_algo(values) + t = hash_klass(len(values)) + t.map_locations(values) + sorter = ensure_platform_int(t.lookup(ordered)) + + if na_sentinel == -1: + # take_nd is faster, but only works for na_sentinels of -1 + order2 = sorter.argsort() + new_codes = take_nd(order2, codes, fill_value=-1) + if verify: + mask = (codes < -len(values)) | (codes >= len(values)) + else: + mask = None + else: + reverse_indexer = np.empty(len(sorter), dtype=np.int_) + reverse_indexer.put(sorter, np.arange(len(sorter))) + # Out of bound indices will be masked with `na_sentinel` next, so we + # may deal with them here without performance loss using `mode='wrap'` + new_codes = reverse_indexer.take(codes, mode="wrap") + + mask = codes == na_sentinel + if verify: + mask = mask | (codes < -len(values)) | (codes >= len(values)) + + if mask is not None: + np.putmask(new_codes, mask, na_sentinel) + + return ordered, ensure_platform_int(new_codes) + + +def _sort_mixed(values) -> np.ndarray: + """order ints before strings in 1d arrays, safe in py3""" + str_pos = np.array([isinstance(x, str) for x in values], dtype=bool) + nums = np.sort(values[~str_pos]) + strs = np.sort(values[str_pos]) + return np.concatenate([nums, np.asarray(strs, dtype=object)]) + + +def _sort_tuples(values: np.ndarray) -> np.ndarray: + """ + Convert array of tuples (1d) to array or array (2d). + We need to keep the columns separately as they contain different types and + nans (can't use `np.sort` as it may fail when str and nan are mixed in a + column as types cannot be compared). + """ + from pandas.core.internals.construction import to_arrays + from pandas.core.sorting import lexsort_indexer + + arrays, _ = to_arrays(values, None) + indexer = lexsort_indexer(arrays, orders=True) + return values[indexer] + + +def union_with_duplicates(lvals: ArrayLike, rvals: ArrayLike) -> ArrayLike: + """ + Extracts the union from lvals and rvals with respect to duplicates and nans in + both arrays. + + Parameters + ---------- + lvals: np.ndarray or ExtensionArray + left values which is ordered in front. + rvals: np.ndarray or ExtensionArray + right values ordered after lvals. + + Returns + ------- + np.ndarray or ExtensionArray + Containing the unsorted union of both arrays. + + Notes + ----- + Caller is responsible for ensuring lvals.dtype == rvals.dtype. + """ + indexer = [] + l_count = value_counts(lvals, dropna=False) + r_count = value_counts(rvals, dropna=False) + l_count, r_count = l_count.align(r_count, fill_value=0) + unique_array = unique(concat_compat([lvals, rvals])) + unique_array = ensure_wrapped_if_datetimelike(unique_array) + + for i, value in enumerate(unique_array): + indexer += [i] * int(max(l_count.at[value], r_count.at[value])) + return unique_array.take(indexer) diff --git a/.local/lib/python3.8/site-packages/pandas/core/api.py b/.local/lib/python3.8/site-packages/pandas/core/api.py new file mode 100644 index 0000000..cf082d2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/api.py @@ -0,0 +1,86 @@ +# flake8: noqa:F401 + +from pandas._libs import ( + NaT, + Period, + Timedelta, + Timestamp, +) +from pandas._libs.missing import NA + +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) +from pandas.core.dtypes.missing import ( + isna, + isnull, + notna, + notnull, +) + +from pandas.core.algorithms import ( + factorize, + unique, + value_counts, +) +from pandas.core.arrays import Categorical +from pandas.core.arrays.boolean import BooleanDtype +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) +from pandas.core.arrays.integer import ( + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, +) +from pandas.core.arrays.string_ import StringDtype +from pandas.core.construction import array +from pandas.core.flags import Flags +from pandas.core.groupby import ( + Grouper, + NamedAgg, +) +from pandas.core.indexes.api import ( + CategoricalIndex, + DatetimeIndex, + Float64Index, + Index, + Int64Index, + IntervalIndex, + MultiIndex, + NumericIndex, + PeriodIndex, + RangeIndex, + TimedeltaIndex, + UInt64Index, +) +from pandas.core.indexes.datetimes import ( + bdate_range, + date_range, +) +from pandas.core.indexes.interval import ( + Interval, + interval_range, +) +from pandas.core.indexes.period import period_range +from pandas.core.indexes.timedeltas import timedelta_range +from pandas.core.indexing import IndexSlice +from pandas.core.series import Series +from pandas.core.tools.datetimes import to_datetime +from pandas.core.tools.numeric import to_numeric +from pandas.core.tools.timedeltas import to_timedelta + +from pandas.io.formats.format import set_eng_float_format +from pandas.tseries.offsets import DateOffset + +# DataFrame needs to be imported after NamedAgg to avoid a circular import +from pandas.core.frame import DataFrame # isort:skip diff --git a/.local/lib/python3.8/site-packages/pandas/core/apply.py b/.local/lib/python3.8/site-packages/pandas/core/apply.py new file mode 100644 index 0000000..64ee843 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/apply.py @@ -0,0 +1,1566 @@ +from __future__ import annotations + +import abc +from collections import defaultdict +from functools import partial +import inspect +import re +from typing import ( + TYPE_CHECKING, + Any, + Callable, + DefaultDict, + Dict, + Hashable, + Iterable, + Iterator, + List, + Sequence, + cast, +) +import warnings + +import numpy as np + +from pandas._config import option_context + +from pandas._libs import lib +from pandas._typing import ( + AggFuncType, + AggFuncTypeBase, + AggFuncTypeDict, + AggObjType, + Axis, + NDFrameT, +) +from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import is_nested_object +from pandas.core.dtypes.common import ( + is_dict_like, + is_extension_array_dtype, + is_list_like, + is_sequence, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCNDFrame, + ABCSeries, +) + +from pandas.core.algorithms import safe_sort +from pandas.core.base import ( + DataError, + SelectionMixin, + SpecificationError, +) +import pandas.core.common as com +from pandas.core.construction import ( + create_series_with_explicit_dtype, + ensure_wrapped_if_datetimelike, +) + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Index, + Series, + ) + from pandas.core.groupby import GroupBy + from pandas.core.resample import Resampler + from pandas.core.window.rolling import BaseWindow + + +ResType = Dict[int, Any] + + +def frame_apply( + obj: DataFrame, + func: AggFuncType, + axis: Axis = 0, + raw: bool = False, + result_type: str | None = None, + args=None, + kwargs=None, +) -> FrameApply: + """construct and return a row or column based frame apply object""" + axis = obj._get_axis_number(axis) + klass: type[FrameApply] + if axis == 0: + klass = FrameRowApply + elif axis == 1: + klass = FrameColumnApply + + return klass( + obj, + func, + raw=raw, + result_type=result_type, + args=args, + kwargs=kwargs, + ) + + +class Apply(metaclass=abc.ABCMeta): + axis: int + + def __init__( + self, + obj: AggObjType, + func, + raw: bool, + result_type: str | None, + args, + kwargs, + ): + self.obj = obj + self.raw = raw + self.args = args or () + self.kwargs = kwargs or {} + + if result_type not in [None, "reduce", "broadcast", "expand"]: + raise ValueError( + "invalid value for result_type, must be one " + "of {None, 'reduce', 'broadcast', 'expand'}" + ) + + self.result_type = result_type + + # curry if needed + if ( + (kwargs or args) + and not isinstance(func, (np.ufunc, str)) + and not is_list_like(func) + ): + + def f(x): + return func(x, *args, **kwargs) + + else: + f = func + + self.orig_f: AggFuncType = func + self.f: AggFuncType = f + + @abc.abstractmethod + def apply(self) -> DataFrame | Series: + pass + + def agg(self) -> DataFrame | Series | None: + """ + Provide an implementation for the aggregators. + + Returns + ------- + Result of aggregation, or None if agg cannot be performed by + this method. + """ + obj = self.obj + arg = self.f + args = self.args + kwargs = self.kwargs + + if isinstance(arg, str): + return self.apply_str() + + if is_dict_like(arg): + return self.agg_dict_like() + elif is_list_like(arg): + # we require a list, but not a 'str' + return self.agg_list_like() + + if callable(arg): + f = com.get_cython_func(arg) + if f and not args and not kwargs: + return getattr(obj, f)() + + # caller can react + return None + + def transform(self) -> DataFrame | Series: + """ + Transform a DataFrame or Series. + + Returns + ------- + DataFrame or Series + Result of applying ``func`` along the given axis of the + Series or DataFrame. + + Raises + ------ + ValueError + If the transform function fails or does not transform. + """ + obj = self.obj + func = self.orig_f + axis = self.axis + args = self.args + kwargs = self.kwargs + + is_series = obj.ndim == 1 + + if obj._get_axis_number(axis) == 1: + assert not is_series + return obj.T.transform(func, 0, *args, **kwargs).T + + if is_list_like(func) and not is_dict_like(func): + func = cast(List[AggFuncTypeBase], func) + # Convert func equivalent dict + if is_series: + func = {com.get_callable_name(v) or v: v for v in func} + else: + func = {col: func for col in obj} + + if is_dict_like(func): + func = cast(AggFuncTypeDict, func) + return self.transform_dict_like(func) + + # func is either str or callable + func = cast(AggFuncTypeBase, func) + try: + result = self.transform_str_or_callable(func) + except TypeError: + raise + except Exception as err: + raise ValueError("Transform function failed") from err + + # Functions that transform may return empty Series/DataFrame + # when the dtype is not appropriate + if ( + isinstance(result, (ABCSeries, ABCDataFrame)) + and result.empty + and not obj.empty + ): + raise ValueError("Transform function failed") + if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals( + obj.index + ): + raise ValueError("Function did not transform") + + return result + + def transform_dict_like(self, func): + """ + Compute transform in the case of a dict-like func + """ + from pandas.core.reshape.concat import concat + + obj = self.obj + args = self.args + kwargs = self.kwargs + + # transform is currently only for Series/DataFrame + assert isinstance(obj, ABCNDFrame) + + if len(func) == 0: + raise ValueError("No transform functions were provided") + + func = self.normalize_dictlike_arg("transform", obj, func) + + results: dict[Hashable, DataFrame | Series] = {} + failed_names = [] + all_type_errors = True + for name, how in func.items(): + colg = obj._gotitem(name, ndim=1) + try: + results[name] = colg.transform(how, 0, *args, **kwargs) + except Exception as err: + if str(err) in { + "Function did not transform", + "No transform functions were provided", + }: + raise err + else: + if not isinstance(err, TypeError): + all_type_errors = False + failed_names.append(name) + # combine results + if not results: + klass = TypeError if all_type_errors else ValueError + raise klass("Transform function failed") + if len(failed_names) > 0: + warnings.warn( + f"{failed_names} did not transform successfully. If any error is " + f"raised, this will raise in a future version of pandas. " + f"Drop these columns/ops to avoid this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return concat(results, axis=1) + + def transform_str_or_callable(self, func) -> DataFrame | Series: + """ + Compute transform in the case of a string or callable func + """ + obj = self.obj + args = self.args + kwargs = self.kwargs + + if isinstance(func, str): + return self._try_aggregate_string_function(obj, func, *args, **kwargs) + + if not args and not kwargs: + f = com.get_cython_func(func) + if f: + return getattr(obj, f)() + + # Two possible ways to use a UDF - apply or call directly + try: + return obj.apply(func, args=args, **kwargs) + except Exception: + return func(obj, *args, **kwargs) + + def agg_list_like(self) -> DataFrame | Series: + """ + Compute aggregation in the case of a list-like argument. + + Returns + ------- + Result of aggregation. + """ + from pandas.core.reshape.concat import concat + + obj = self.obj + arg = cast(List[AggFuncTypeBase], self.f) + + if not isinstance(obj, SelectionMixin): + # i.e. obj is Series or DataFrame + selected_obj = obj + elif obj._selected_obj.ndim == 1: + # For SeriesGroupBy this matches _obj_with_exclusions + selected_obj = obj._selected_obj + else: + selected_obj = obj._obj_with_exclusions + + results = [] + keys = [] + failed_names = [] + + depr_nuisance_columns_msg = ( + "{} did not aggregate successfully. If any error is " + "raised this will raise in a future version of pandas. " + "Drop these columns/ops to avoid this warning." + ) + + # degenerate case + if selected_obj.ndim == 1: + for a in arg: + colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) + try: + new_res = colg.aggregate(a) + + except TypeError: + failed_names.append(com.get_callable_name(a) or a) + else: + results.append(new_res) + + # make sure we find a good name + name = com.get_callable_name(a) or a + keys.append(name) + + # multiples + else: + indices = [] + for index, col in enumerate(selected_obj): + colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) + try: + # Capture and suppress any warnings emitted by us in the call + # to agg below, but pass through any warnings that were + # generated otherwise. + # This is necessary because of https://bugs.python.org/issue29672 + # See GH #43741 for more details + with warnings.catch_warnings(record=True) as record: + new_res = colg.aggregate(arg) + if len(record) > 0: + match = re.compile(depr_nuisance_columns_msg.format(".*")) + for warning in record: + if re.match(match, str(warning.message)): + failed_names.append(col) + else: + warnings.warn_explicit( + message=warning.message, + category=warning.category, + filename=warning.filename, + lineno=warning.lineno, + ) + + except (TypeError, DataError): + failed_names.append(col) + except ValueError as err: + # cannot aggregate + if "Must produce aggregated value" in str(err): + # raised directly in _aggregate_named + failed_names.append(col) + elif "no results" in str(err): + # reached in test_frame_apply.test_nuiscance_columns + # where the colg.aggregate(arg) ends up going through + # the selected_obj.ndim == 1 branch above with arg == ["sum"] + # on a datetime64[ns] column + failed_names.append(col) + else: + raise + else: + results.append(new_res) + indices.append(index) + + keys = selected_obj.columns.take(indices) + + # if we are empty + if not len(results): + raise ValueError("no results") + + if len(failed_names) > 0: + warnings.warn( + depr_nuisance_columns_msg.format(failed_names), + FutureWarning, + stacklevel=find_stack_level(), + ) + + try: + concatenated = concat(results, keys=keys, axis=1, sort=False) + except TypeError as err: + # we are concatting non-NDFrame objects, + # e.g. a list of scalars + from pandas import Series + + result = Series(results, index=keys, name=obj.name) + if is_nested_object(result): + raise ValueError( + "cannot combine transform and aggregation operations" + ) from err + return result + else: + # Concat uses the first index to determine the final indexing order. + # The union of a shorter first index with the other indices causes + # the index sorting to be different from the order of the aggregating + # functions. Reindex if this is the case. + index_size = concatenated.index.size + full_ordered_index = next( + result.index for result in results if result.index.size == index_size + ) + return concatenated.reindex(full_ordered_index, copy=False) + + def agg_dict_like(self) -> DataFrame | Series: + """ + Compute aggregation in the case of a dict-like argument. + + Returns + ------- + Result of aggregation. + """ + from pandas import Index + from pandas.core.reshape.concat import concat + + obj = self.obj + arg = cast(AggFuncTypeDict, self.f) + + if not isinstance(obj, SelectionMixin): + # i.e. obj is Series or DataFrame + selected_obj = obj + selection = None + else: + selected_obj = obj._selected_obj + selection = obj._selection + + arg = self.normalize_dictlike_arg("agg", selected_obj, arg) + + if selected_obj.ndim == 1: + # key only used for output + colg = obj._gotitem(selection, ndim=1) + results = {key: colg.agg(how) for key, how in arg.items()} + else: + # key used for column selection and output + results = { + key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items() + } + + # set the final keys + keys = list(arg.keys()) + + # Avoid making two isinstance calls in all and any below + is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()] + + # combine results + if all(is_ndframe): + keys_to_use: Iterable[Hashable] + keys_to_use = [k for k in keys if not results[k].empty] + # Have to check, if at least one DataFrame is not empty. + keys_to_use = keys_to_use if keys_to_use != [] else keys + if selected_obj.ndim == 2: + # keys are columns, so we can preserve names + ktu = Index(keys_to_use) + ktu._set_names(selected_obj.columns.names) + keys_to_use = ktu + + axis = 0 if isinstance(obj, ABCSeries) else 1 + result = concat( + {k: results[k] for k in keys_to_use}, axis=axis, keys=keys_to_use + ) + elif any(is_ndframe): + # There is a mix of NDFrames and scalars + raise ValueError( + "cannot perform both aggregation " + "and transformation operations " + "simultaneously" + ) + else: + from pandas import Series + + # we have a dict of scalars + # GH 36212 use name only if obj is a series + if obj.ndim == 1: + obj = cast("Series", obj) + name = obj.name + else: + name = None + + result = Series(results, name=name) + + return result + + def apply_str(self) -> DataFrame | Series: + """ + Compute apply in case of a string. + + Returns + ------- + result: Series or DataFrame + """ + # Caller is responsible for checking isinstance(self.f, str) + f = cast(str, self.f) + + obj = self.obj + + # Support for `frame.transform('method')` + # Some methods (shift, etc.) require the axis argument, others + # don't, so inspect and insert if necessary. + func = getattr(obj, f, None) + if callable(func): + sig = inspect.getfullargspec(func) + if "axis" in sig.args: + self.kwargs["axis"] = self.axis + elif self.axis != 0: + raise ValueError(f"Operation {f} does not support axis=1") + return self._try_aggregate_string_function(obj, f, *self.args, **self.kwargs) + + def apply_multiple(self) -> DataFrame | Series: + """ + Compute apply in case of a list-like or dict-like. + + Returns + ------- + result: Series, DataFrame, or None + Result when self.f is a list-like or dict-like, None otherwise. + """ + return self.obj.aggregate(self.f, self.axis, *self.args, **self.kwargs) + + def normalize_dictlike_arg( + self, how: str, obj: DataFrame | Series, func: AggFuncTypeDict + ) -> AggFuncTypeDict: + """ + Handler for dict-like argument. + + Ensures that necessary columns exist if obj is a DataFrame, and + that a nested renamer is not passed. Also normalizes to all lists + when values consists of a mix of list and non-lists. + """ + assert how in ("apply", "agg", "transform") + + # Can't use func.values(); wouldn't work for a Series + if ( + how == "agg" + and isinstance(obj, ABCSeries) + and any(is_list_like(v) for _, v in func.items()) + ) or (any(is_dict_like(v) for _, v in func.items())): + # GH 15931 - deprecation of renaming keys + raise SpecificationError("nested renamer is not supported") + + if obj.ndim != 1: + # Check for missing columns on a frame + cols = set(func.keys()) - set(obj.columns) + if len(cols) > 0: + cols_sorted = list(safe_sort(list(cols))) + raise KeyError(f"Column(s) {cols_sorted} do not exist") + + is_aggregator = lambda x: isinstance(x, (list, tuple, dict)) + + # if we have a dict of any non-scalars + # eg. {'A' : ['mean']}, normalize all to + # be list-likes + # Cannot use func.values() because arg may be a Series + if any(is_aggregator(x) for _, x in func.items()): + new_func: AggFuncTypeDict = {} + for k, v in func.items(): + if not is_aggregator(v): + # mypy can't realize v is not a list here + new_func[k] = [v] # type:ignore[list-item] + else: + new_func[k] = v + func = new_func + return func + + def _try_aggregate_string_function(self, obj, arg: str, *args, **kwargs): + """ + if arg is a string, then try to operate on it: + - try to find a function (or attribute) on ourselves + - try to find a numpy function + - raise + """ + assert isinstance(arg, str) + + f = getattr(obj, arg, None) + if f is not None: + if callable(f): + return f(*args, **kwargs) + + # people may try to aggregate on a non-callable attribute + # but don't let them think they can pass args to it + assert len(args) == 0 + assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0 + return f + + f = getattr(np, arg, None) + if f is not None and hasattr(obj, "__array__"): + # in particular exclude Window + return f(obj, *args, **kwargs) + + raise AttributeError( + f"'{arg}' is not a valid function for '{type(obj).__name__}' object" + ) + + +class NDFrameApply(Apply): + """ + Methods shared by FrameApply and SeriesApply but + not GroupByApply or ResamplerWindowApply + """ + + @property + def index(self) -> Index: + return self.obj.index + + @property + def agg_axis(self) -> Index: + return self.obj._get_agg_axis(self.axis) + + +class FrameApply(NDFrameApply): + obj: DataFrame + + # --------------------------------------------------------------- + # Abstract Methods + + @property + @abc.abstractmethod + def result_index(self) -> Index: + pass + + @property + @abc.abstractmethod + def result_columns(self) -> Index: + pass + + @property + @abc.abstractmethod + def series_generator(self) -> Iterator[Series]: + pass + + @abc.abstractmethod + def wrap_results_for_axis( + self, results: ResType, res_index: Index + ) -> DataFrame | Series: + pass + + # --------------------------------------------------------------- + + @property + def res_columns(self) -> Index: + return self.result_columns + + @property + def columns(self) -> Index: + return self.obj.columns + + @cache_readonly + def values(self): + return self.obj.values + + @cache_readonly + def dtypes(self) -> Series: + return self.obj.dtypes + + def apply(self) -> DataFrame | Series: + """compute the results""" + # dispatch to agg + if is_list_like(self.f): + return self.apply_multiple() + + # all empty + if len(self.columns) == 0 and len(self.index) == 0: + return self.apply_empty_result() + + # string dispatch + if isinstance(self.f, str): + return self.apply_str() + + # ufunc + elif isinstance(self.f, np.ufunc): + with np.errstate(all="ignore"): + results = self.obj._mgr.apply("apply", func=self.f) + # _constructor will retain self.index and self.columns + return self.obj._constructor(data=results) + + # broadcasting + if self.result_type == "broadcast": + return self.apply_broadcast(self.obj) + + # one axis empty + elif not all(self.obj.shape): + return self.apply_empty_result() + + # raw + elif self.raw: + return self.apply_raw() + + return self.apply_standard() + + def agg(self): + obj = self.obj + axis = self.axis + + # TODO: Avoid having to change state + self.obj = self.obj if self.axis == 0 else self.obj.T + self.axis = 0 + + result = None + try: + result = super().agg() + except TypeError as err: + exc = TypeError( + "DataFrame constructor called with " + f"incompatible data and dtype: {err}" + ) + raise exc from err + finally: + self.obj = obj + self.axis = axis + + if axis == 1: + result = result.T if result is not None else result + + if result is None: + result = self.obj.apply(self.orig_f, axis, args=self.args, **self.kwargs) + + return result + + def apply_empty_result(self): + """ + we have an empty result; at least 1 axis is 0 + + we will try to apply the function to an empty + series in order to see if this is a reduction function + """ + assert callable(self.f) + + # we are not asked to reduce or infer reduction + # so just return a copy of the existing object + if self.result_type not in ["reduce", None]: + return self.obj.copy() + + # we may need to infer + should_reduce = self.result_type == "reduce" + + from pandas import Series + + if not should_reduce: + try: + r = self.f(Series([], dtype=np.float64)) + except Exception: + pass + else: + should_reduce = not isinstance(r, Series) + + if should_reduce: + if len(self.agg_axis): + r = self.f(Series([], dtype=np.float64)) + else: + r = np.nan + + return self.obj._constructor_sliced(r, index=self.agg_axis) + else: + return self.obj.copy() + + def apply_raw(self): + """apply to the values as a numpy array""" + + def wrap_function(func): + """ + Wrap user supplied function to work around numpy issue. + + see https://github.com/numpy/numpy/issues/8352 + """ + + def wrapper(*args, **kwargs): + result = func(*args, **kwargs) + if isinstance(result, str): + result = np.array(result, dtype=object) + return result + + return wrapper + + result = np.apply_along_axis(wrap_function(self.f), self.axis, self.values) + + # TODO: mixed type case + if result.ndim == 2: + return self.obj._constructor(result, index=self.index, columns=self.columns) + else: + return self.obj._constructor_sliced(result, index=self.agg_axis) + + def apply_broadcast(self, target: DataFrame) -> DataFrame: + assert callable(self.f) + + result_values = np.empty_like(target.values) + + # axis which we want to compare compliance + result_compare = target.shape[0] + + for i, col in enumerate(target.columns): + res = self.f(target[col]) + ares = np.asarray(res).ndim + + # must be a scalar or 1d + if ares > 1: + raise ValueError("too many dims to broadcast") + elif ares == 1: + + # must match return dim + if result_compare != len(res): + raise ValueError("cannot broadcast result") + + result_values[:, i] = res + + # we *always* preserve the original index / columns + result = self.obj._constructor( + result_values, index=target.index, columns=target.columns + ) + return result + + def apply_standard(self): + results, res_index = self.apply_series_generator() + + # wrap results + return self.wrap_results(results, res_index) + + def apply_series_generator(self) -> tuple[ResType, Index]: + assert callable(self.f) + + series_gen = self.series_generator + res_index = self.result_index + + results = {} + + with option_context("mode.chained_assignment", None): + for i, v in enumerate(series_gen): + # ignore SettingWithCopy here in case the user mutates + results[i] = self.f(v) + if isinstance(results[i], ABCSeries): + # If we have a view on v, we need to make a copy because + # series_generator will swap out the underlying data + results[i] = results[i].copy(deep=False) + + return results, res_index + + def wrap_results(self, results: ResType, res_index: Index) -> DataFrame | Series: + from pandas import Series + + # see if we can infer the results + if len(results) > 0 and 0 in results and is_sequence(results[0]): + return self.wrap_results_for_axis(results, res_index) + + # dict of scalars + + # the default dtype of an empty Series will be `object`, but this + # code can be hit by df.mean() where the result should have dtype + # float64 even if it's an empty Series. + constructor_sliced = self.obj._constructor_sliced + if constructor_sliced is Series: + result = create_series_with_explicit_dtype( + results, dtype_if_empty=np.float64 + ) + else: + result = constructor_sliced(results) + result.index = res_index + + return result + + def apply_str(self) -> DataFrame | Series: + # Caller is responsible for checking isinstance(self.f, str) + # TODO: GH#39993 - Avoid special-casing by replacing with lambda + if self.f == "size": + # Special-cased because DataFrame.size returns a single scalar + obj = self.obj + value = obj.shape[self.axis] + return obj._constructor_sliced(value, index=self.agg_axis) + return super().apply_str() + + +class FrameRowApply(FrameApply): + axis = 0 + + def apply_broadcast(self, target: DataFrame) -> DataFrame: + return super().apply_broadcast(target) + + @property + def series_generator(self): + return (self.obj._ixs(i, axis=1) for i in range(len(self.columns))) + + @property + def result_index(self) -> Index: + return self.columns + + @property + def result_columns(self) -> Index: + return self.index + + def wrap_results_for_axis( + self, results: ResType, res_index: Index + ) -> DataFrame | Series: + """return the results for the rows""" + + if self.result_type == "reduce": + # e.g. test_apply_dict GH#8735 + res = self.obj._constructor_sliced(results) + res.index = res_index + return res + + elif self.result_type is None and all( + isinstance(x, dict) for x in results.values() + ): + # Our operation was a to_dict op e.g. + # test_apply_dict GH#8735, test_apply_reduce_to_dict GH#25196 #37544 + res = self.obj._constructor_sliced(results) + res.index = res_index + return res + + try: + result = self.obj._constructor(data=results) + except ValueError as err: + if "All arrays must be of the same length" in str(err): + # e.g. result = [[2, 3], [1.5], ['foo', 'bar']] + # see test_agg_listlike_result GH#29587 + res = self.obj._constructor_sliced(results) + res.index = res_index + return res + else: + raise + + if not isinstance(results[0], ABCSeries): + if len(result.index) == len(self.res_columns): + result.index = self.res_columns + + if len(result.columns) == len(res_index): + result.columns = res_index + + return result + + +class FrameColumnApply(FrameApply): + axis = 1 + + def apply_broadcast(self, target: DataFrame) -> DataFrame: + result = super().apply_broadcast(target.T) + return result.T + + @property + def series_generator(self): + values = self.values + values = ensure_wrapped_if_datetimelike(values) + assert len(values) > 0 + + # We create one Series object, and will swap out the data inside + # of it. Kids: don't do this at home. + ser = self.obj._ixs(0, axis=0) + mgr = ser._mgr + + if is_extension_array_dtype(ser.dtype): + # values will be incorrect for this block + # TODO(EA2D): special case would be unnecessary with 2D EAs + obj = self.obj + for i in range(len(obj)): + yield obj._ixs(i, axis=0) + + else: + for (arr, name) in zip(values, self.index): + # GH#35462 re-pin mgr in case setitem changed it + ser._mgr = mgr + mgr.set_values(arr) + ser.name = name + yield ser + + @property + def result_index(self) -> Index: + return self.index + + @property + def result_columns(self) -> Index: + return self.columns + + def wrap_results_for_axis( + self, results: ResType, res_index: Index + ) -> DataFrame | Series: + """return the results for the columns""" + result: DataFrame | Series + + # we have requested to expand + if self.result_type == "expand": + result = self.infer_to_same_shape(results, res_index) + + # we have a non-series and don't want inference + elif not isinstance(results[0], ABCSeries): + result = self.obj._constructor_sliced(results) + result.index = res_index + + # we may want to infer results + else: + result = self.infer_to_same_shape(results, res_index) + + return result + + def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame: + """infer the results to the same shape as the input object""" + result = self.obj._constructor(data=results) + result = result.T + + # set the index + result.index = res_index + + # infer dtypes + result = result.infer_objects() + + return result + + +class SeriesApply(NDFrameApply): + obj: Series + axis = 0 + + def __init__( + self, + obj: Series, + func: AggFuncType, + convert_dtype: bool, + args, + kwargs, + ): + self.convert_dtype = convert_dtype + + super().__init__( + obj, + func, + raw=False, + result_type=None, + args=args, + kwargs=kwargs, + ) + + def apply(self) -> DataFrame | Series: + obj = self.obj + + if len(obj) == 0: + return self.apply_empty_result() + + # dispatch to agg + if is_list_like(self.f): + return self.apply_multiple() + + if isinstance(self.f, str): + # if we are a string, try to dispatch + return self.apply_str() + + return self.apply_standard() + + def agg(self): + result = super().agg() + if result is None: + f = self.f + kwargs = self.kwargs + + # string, list-like, and dict-like are entirely handled in super + assert callable(f) + + # we can be called from an inner function which + # passes this meta-data + kwargs.pop("_level", None) + + # try a regular apply, this evaluates lambdas + # row-by-row; however if the lambda is expected a Series + # expression, e.g.: lambda x: x-x.quantile(0.25) + # this will fail, so we can try a vectorized evaluation + + # we cannot FIRST try the vectorized evaluation, because + # then .agg and .apply would have different semantics if the + # operation is actually defined on the Series, e.g. str + try: + result = self.obj.apply(f) + except (ValueError, AttributeError, TypeError): + result = f(self.obj) + + return result + + def apply_empty_result(self) -> Series: + obj = self.obj + return obj._constructor(dtype=obj.dtype, index=obj.index).__finalize__( + obj, method="apply" + ) + + def apply_standard(self) -> DataFrame | Series: + f = self.f + obj = self.obj + + with np.errstate(all="ignore"): + if isinstance(f, np.ufunc): + return f(obj) + + # row-wise access + if is_extension_array_dtype(obj.dtype) and hasattr(obj._values, "map"): + # GH#23179 some EAs do not have `map` + mapped = obj._values.map(f) + else: + values = obj.astype(object)._values + # error: Argument 2 to "map_infer" has incompatible type + # "Union[Callable[..., Any], str, List[Union[Callable[..., Any], str]], + # Dict[Hashable, Union[Union[Callable[..., Any], str], + # List[Union[Callable[..., Any], str]]]]]"; expected + # "Callable[[Any], Any]" + mapped = lib.map_infer( + values, + f, # type: ignore[arg-type] + convert=self.convert_dtype, + ) + + if len(mapped) and isinstance(mapped[0], ABCSeries): + # GH#43986 Need to do list(mapped) in order to get treated as nested + # See also GH#25959 regarding EA support + return obj._constructor_expanddim(list(mapped), index=obj.index) + else: + return obj._constructor(mapped, index=obj.index).__finalize__( + obj, method="apply" + ) + + +class GroupByApply(Apply): + def __init__( + self, + obj: GroupBy[NDFrameT], + func: AggFuncType, + args, + kwargs, + ): + kwargs = kwargs.copy() + self.axis = obj.obj._get_axis_number(kwargs.get("axis", 0)) + super().__init__( + obj, + func, + raw=False, + result_type=None, + args=args, + kwargs=kwargs, + ) + + def apply(self): + raise NotImplementedError + + def transform(self): + raise NotImplementedError + + +class ResamplerWindowApply(Apply): + axis = 0 + obj: Resampler | BaseWindow + + def __init__( + self, + obj: Resampler | BaseWindow, + func: AggFuncType, + args, + kwargs, + ): + super().__init__( + obj, + func, + raw=False, + result_type=None, + args=args, + kwargs=kwargs, + ) + + def apply(self): + raise NotImplementedError + + def transform(self): + raise NotImplementedError + + +def reconstruct_func( + func: AggFuncType | None, **kwargs +) -> tuple[bool, AggFuncType | None, list[str] | None, list[int] | None]: + """ + This is the internal function to reconstruct func given if there is relabeling + or not and also normalize the keyword to get new order of columns. + + If named aggregation is applied, `func` will be None, and kwargs contains the + column and aggregation function information to be parsed; + If named aggregation is not applied, `func` is either string (e.g. 'min') or + Callable, or list of them (e.g. ['min', np.max]), or the dictionary of column name + and str/Callable/list of them (e.g. {'A': 'min'}, or {'A': [np.min, lambda x: x]}) + + If relabeling is True, will return relabeling, reconstructed func, column + names, and the reconstructed order of columns. + If relabeling is False, the columns and order will be None. + + Parameters + ---------- + func: agg function (e.g. 'min' or Callable) or list of agg functions + (e.g. ['min', np.max]) or dictionary (e.g. {'A': ['min', np.max]}). + **kwargs: dict, kwargs used in is_multi_agg_with_relabel and + normalize_keyword_aggregation function for relabelling + + Returns + ------- + relabelling: bool, if there is relabelling or not + func: normalized and mangled func + columns: list of column names + order: list of columns indices + + Examples + -------- + >>> reconstruct_func(None, **{"foo": ("col", "min")}) + (True, defaultdict(, {'col': ['min']}), ('foo',), array([0])) + + >>> reconstruct_func("min") + (False, 'min', None, None) + """ + relabeling = func is None and is_multi_agg_with_relabel(**kwargs) + columns: list[str] | None = None + order: list[int] | None = None + + if not relabeling: + if isinstance(func, list) and len(func) > len(set(func)): + + # GH 28426 will raise error if duplicated function names are used and + # there is no reassigned name + raise SpecificationError( + "Function names must be unique if there is no new column names " + "assigned" + ) + elif func is None: + # nicer error message + raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).") + + if relabeling: + func, columns, order = normalize_keyword_aggregation(kwargs) + + return relabeling, func, columns, order + + +def is_multi_agg_with_relabel(**kwargs) -> bool: + """ + Check whether kwargs passed to .agg look like multi-agg with relabeling. + + Parameters + ---------- + **kwargs : dict + + Returns + ------- + bool + + Examples + -------- + >>> is_multi_agg_with_relabel(a="max") + False + >>> is_multi_agg_with_relabel(a_max=("a", "max"), a_min=("a", "min")) + True + >>> is_multi_agg_with_relabel() + False + """ + return all(isinstance(v, tuple) and len(v) == 2 for v in kwargs.values()) and ( + len(kwargs) > 0 + ) + + +def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[int]]: + """ + Normalize user-provided "named aggregation" kwargs. + Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs + to the old Dict[str, List[scalar]]]. + + Parameters + ---------- + kwargs : dict + + Returns + ------- + aggspec : dict + The transformed kwargs. + columns : List[str] + The user-provided keys. + col_idx_order : List[int] + List of columns indices. + + Examples + -------- + >>> normalize_keyword_aggregation({"output": ("input", "sum")}) + (defaultdict(, {'input': ['sum']}), ('output',), array([0])) + """ + from pandas.core.indexes.base import Index + + # Normalize the aggregation functions as Mapping[column, List[func]], + # process normally, then fixup the names. + # TODO: aggspec type: typing.Dict[str, List[AggScalar]] + # May be hitting https://github.com/python/mypy/issues/5958 + # saying it doesn't have an attribute __name__ + aggspec: DefaultDict = defaultdict(list) + order = [] + columns, pairs = list(zip(*kwargs.items())) + + for column, aggfunc in pairs: + aggspec[column].append(aggfunc) + order.append((column, com.get_callable_name(aggfunc) or aggfunc)) + + # uniquify aggfunc name if duplicated in order list + uniquified_order = _make_unique_kwarg_list(order) + + # GH 25719, due to aggspec will change the order of assigned columns in aggregation + # uniquified_aggspec will store uniquified order list and will compare it with order + # based on index + aggspec_order = [ + (column, com.get_callable_name(aggfunc) or aggfunc) + for column, aggfuncs in aggspec.items() + for aggfunc in aggfuncs + ] + uniquified_aggspec = _make_unique_kwarg_list(aggspec_order) + + # get the new index of columns by comparison + col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order) + # error: Incompatible return value type (got "Tuple[defaultdict[Any, Any], + # Any, ndarray]", expected "Tuple[Dict[Any, Any], List[str], List[int]]") + return aggspec, columns, col_idx_order # type: ignore[return-value] + + +def _make_unique_kwarg_list( + seq: Sequence[tuple[Any, Any]] +) -> Sequence[tuple[Any, Any]]: + """ + Uniquify aggfunc name of the pairs in the order list + + Examples: + -------- + >>> kwarg_list = [('a', ''), ('a', ''), ('b', '')] + >>> _make_unique_kwarg_list(kwarg_list) + [('a', '_0'), ('a', '_1'), ('b', '')] + """ + return [ + (pair[0], "_".join([pair[1], str(seq[:i].count(pair))])) + if seq.count(pair) > 1 + else pair + for i, pair in enumerate(seq) + ] + + +def relabel_result( + result: DataFrame | Series, + func: dict[str, list[Callable | str]], + columns: Iterable[Hashable], + order: Iterable[int], +) -> dict[Hashable, Series]: + """ + Internal function to reorder result if relabelling is True for + dataframe.agg, and return the reordered result in dict. + + Parameters: + ---------- + result: Result from aggregation + func: Dict of (column name, funcs) + columns: New columns name for relabelling + order: New order for relabelling + + Examples: + --------- + >>> result = DataFrame({"A": [np.nan, 2, np.nan], + ... "C": [6, np.nan, np.nan], "B": [np.nan, 4, 2.5]}) # doctest: +SKIP + >>> funcs = {"A": ["max"], "C": ["max"], "B": ["mean", "min"]} + >>> columns = ("foo", "aab", "bar", "dat") + >>> order = [0, 1, 2, 3] + >>> _relabel_result(result, func, columns, order) # doctest: +SKIP + dict(A=Series([2.0, NaN, NaN, NaN], index=["foo", "aab", "bar", "dat"]), + C=Series([NaN, 6.0, NaN, NaN], index=["foo", "aab", "bar", "dat"]), + B=Series([NaN, NaN, 2.5, 4.0], index=["foo", "aab", "bar", "dat"])) + """ + from pandas.core.indexes.base import Index + + reordered_indexes = [ + pair[0] for pair in sorted(zip(columns, order), key=lambda t: t[1]) + ] + reordered_result_in_dict: dict[Hashable, Series] = {} + idx = 0 + + reorder_mask = not isinstance(result, ABCSeries) and len(result.columns) > 1 + for col, fun in func.items(): + s = result[col].dropna() + + # In the `_aggregate`, the callable names are obtained and used in `result`, and + # these names are ordered alphabetically. e.g. + # C2 C1 + # 1 NaN + # amax NaN 4.0 + # max NaN 4.0 + # sum 18.0 6.0 + # Therefore, the order of functions for each column could be shuffled + # accordingly so need to get the callable name if it is not parsed names, and + # reorder the aggregated result for each column. + # e.g. if df.agg(c1=("C2", sum), c2=("C2", lambda x: min(x))), correct order is + # [sum, ], but in `result`, it will be [, sum], and we need to + # reorder so that aggregated values map to their functions regarding the order. + + # However there is only one column being used for aggregation, not need to + # reorder since the index is not sorted, and keep as is in `funcs`, e.g. + # A + # min 1.0 + # mean 1.5 + # mean 1.5 + if reorder_mask: + fun = [ + com.get_callable_name(f) if not isinstance(f, str) else f for f in fun + ] + col_idx_order = Index(s.index).get_indexer(fun) + s = s[col_idx_order] + + # assign the new user-provided "named aggregation" as index names, and reindex + # it based on the whole user-provided names. + s.index = reordered_indexes[idx : idx + len(fun)] + reordered_result_in_dict[col] = s.reindex(columns, copy=False) + idx = idx + len(fun) + return reordered_result_in_dict + + +# TODO: Can't use, because mypy doesn't like us setting __name__ +# error: "partial[Any]" has no attribute "__name__" +# the type is: +# typing.Sequence[Callable[..., ScalarResult]] +# -> typing.Sequence[Callable[..., ScalarResult]]: + + +def _managle_lambda_list(aggfuncs: Sequence[Any]) -> Sequence[Any]: + """ + Possibly mangle a list of aggfuncs. + + Parameters + ---------- + aggfuncs : Sequence + + Returns + ------- + mangled: list-like + A new AggSpec sequence, where lambdas have been converted + to have unique names. + + Notes + ----- + If just one aggfunc is passed, the name will not be mangled. + """ + if len(aggfuncs) <= 1: + # don't mangle for .agg([lambda x: .]) + return aggfuncs + i = 0 + mangled_aggfuncs = [] + for aggfunc in aggfuncs: + if com.get_callable_name(aggfunc) == "": + aggfunc = partial(aggfunc) + aggfunc.__name__ = f"" + i += 1 + mangled_aggfuncs.append(aggfunc) + + return mangled_aggfuncs + + +def maybe_mangle_lambdas(agg_spec: Any) -> Any: + """ + Make new lambdas with unique names. + + Parameters + ---------- + agg_spec : Any + An argument to GroupBy.agg. + Non-dict-like `agg_spec` are pass through as is. + For dict-like `agg_spec` a new spec is returned + with name-mangled lambdas. + + Returns + ------- + mangled : Any + Same type as the input. + + Examples + -------- + >>> maybe_mangle_lambdas('sum') + 'sum' + >>> maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP + [, + .f(*args, **kwargs)>] + """ + is_dict = is_dict_like(agg_spec) + if not (is_dict or is_list_like(agg_spec)): + return agg_spec + mangled_aggspec = type(agg_spec)() # dict or OrderedDict + + if is_dict: + for key, aggfuncs in agg_spec.items(): + if is_list_like(aggfuncs) and not is_dict_like(aggfuncs): + mangled_aggfuncs = _managle_lambda_list(aggfuncs) + else: + mangled_aggfuncs = aggfuncs + + mangled_aggspec[key] = mangled_aggfuncs + else: + mangled_aggspec = _managle_lambda_list(agg_spec) + + return mangled_aggspec + + +def validate_func_kwargs( + kwargs: dict, +) -> tuple[list[str], list[str | Callable[..., Any]]]: + """ + Validates types of user-provided "named aggregation" kwargs. + `TypeError` is raised if aggfunc is not `str` or callable. + + Parameters + ---------- + kwargs : dict + + Returns + ------- + columns : List[str] + List of user-provied keys. + func : List[Union[str, callable[...,Any]]] + List of user-provided aggfuncs + + Examples + -------- + >>> validate_func_kwargs({'one': 'min', 'two': 'max'}) + (['one', 'two'], ['min', 'max']) + """ + tuple_given_message = "func is expected but received {} in **kwargs." + columns = list(kwargs) + func = [] + for col_func in kwargs.values(): + if not (isinstance(col_func, str) or callable(col_func)): + raise TypeError(tuple_given_message.format(type(col_func).__name__)) + func.append(col_func) + if not columns: + no_arg_message = "Must provide 'func' or named aggregation **kwargs." + raise TypeError(no_arg_message) + return columns, func diff --git a/.local/lib/python3.8/site-packages/pandas/core/array_algos/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__init__.py new file mode 100644 index 0000000..a7655a0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__init__.py @@ -0,0 +1,9 @@ +""" +core.array_algos is for algorithms that operate on ndarray and ExtensionArray. +These should: + +- Assume that any Index, Series, or DataFrame objects have already been unwrapped. +- Assume that any list arguments have already been cast to ndarray/EA. +- Not depend on Index, Series, or DataFrame, nor import any of these. +- May dispatch to ExtensionArray methods, but should not import from core.arrays. +""" diff --git a/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..bb5e6e7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/masked_reductions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/masked_reductions.cpython-38.pyc new file mode 100644 index 0000000..d89b972 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/masked_reductions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/putmask.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/putmask.cpython-38.pyc new file mode 100644 index 0000000..71df99f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/putmask.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/quantile.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/quantile.cpython-38.pyc new file mode 100644 index 0000000..eea144b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/quantile.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/replace.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/replace.cpython-38.pyc new file mode 100644 index 0000000..c520028 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/replace.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/take.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/take.cpython-38.pyc new file mode 100644 index 0000000..ca7bda9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/take.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/transforms.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/transforms.cpython-38.pyc new file mode 100644 index 0000000..94b5946 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/array_algos/__pycache__/transforms.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/array_algos/masked_reductions.py b/.local/lib/python3.8/site-packages/pandas/core/array_algos/masked_reductions.py new file mode 100644 index 0000000..66a3152 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/array_algos/masked_reductions.py @@ -0,0 +1,150 @@ +""" +masked_reductions.py is for reduction algorithms using a mask-based approach +for missing values. +""" + +from typing import ( + Callable, + Optional, +) + +import numpy as np + +from pandas._libs import missing as libmissing + +from pandas.core.nanops import check_below_min_count + + +def _sumprod( + func: Callable, + values: np.ndarray, + mask: np.ndarray, + *, + skipna: bool = True, + min_count: int = 0, + axis: Optional[int] = None, +): + """ + Sum or product for 1D masked array. + + Parameters + ---------- + func : np.sum or np.prod + values : np.ndarray + Numpy array with the values (can be of any dtype that support the + operation). + mask : np.ndarray + Boolean numpy array (True values indicate missing values). + skipna : bool, default True + Whether to skip NA. + min_count : int, default 0 + The required number of valid values to perform the operation. If fewer than + ``min_count`` non-NA values are present the result will be NA. + axis : int, optional, default None + """ + if not skipna: + if mask.any(axis=axis) or check_below_min_count(values.shape, None, min_count): + return libmissing.NA + else: + return func(values, axis=axis) + else: + if check_below_min_count(values.shape, mask, min_count) and ( + axis is None or values.ndim == 1 + ): + return libmissing.NA + + return func(values, where=~mask, axis=axis) + + +def sum( + values: np.ndarray, + mask: np.ndarray, + *, + skipna: bool = True, + min_count: int = 0, + axis: Optional[int] = None, +): + return _sumprod( + np.sum, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis + ) + + +def prod( + values: np.ndarray, + mask: np.ndarray, + *, + skipna: bool = True, + min_count: int = 0, + axis: Optional[int] = None, +): + return _sumprod( + np.prod, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis + ) + + +def _minmax( + func: Callable, + values: np.ndarray, + mask: np.ndarray, + *, + skipna: bool = True, + axis: Optional[int] = None, +): + """ + Reduction for 1D masked array. + + Parameters + ---------- + func : np.min or np.max + values : np.ndarray + Numpy array with the values (can be of any dtype that support the + operation). + mask : np.ndarray + Boolean numpy array (True values indicate missing values). + skipna : bool, default True + Whether to skip NA. + axis : int, optional, default None + """ + if not skipna: + if mask.any() or not values.size: + # min/max with empty array raise in numpy, pandas returns NA + return libmissing.NA + else: + return func(values) + else: + subset = values[~mask] + if subset.size: + return func(subset) + else: + # min/max with empty array raise in numpy, pandas returns NA + return libmissing.NA + + +def min( + values: np.ndarray, + mask: np.ndarray, + *, + skipna: bool = True, + axis: Optional[int] = None, +): + return _minmax(np.min, values=values, mask=mask, skipna=skipna, axis=axis) + + +def max( + values: np.ndarray, + mask: np.ndarray, + *, + skipna: bool = True, + axis: Optional[int] = None, +): + return _minmax(np.max, values=values, mask=mask, skipna=skipna, axis=axis) + + +# TODO: axis kwarg +def mean(values: np.ndarray, mask: np.ndarray, skipna: bool = True): + if not values.size or mask.all(): + return libmissing.NA + _sum = _sumprod(np.sum, values=values, mask=mask, skipna=skipna) + count = np.count_nonzero(~mask) + mean_value = _sum / count + return mean_value diff --git a/.local/lib/python3.8/site-packages/pandas/core/array_algos/putmask.py b/.local/lib/python3.8/site-packages/pandas/core/array_algos/putmask.py new file mode 100644 index 0000000..24a0f83 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/array_algos/putmask.py @@ -0,0 +1,200 @@ +""" +EA-compatible analogue to to np.putmask +""" +from __future__ import annotations + +from typing import Any + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ( + ArrayLike, + npt, +) + +from pandas.core.dtypes.cast import ( + can_hold_element, + convert_scalar_for_putitemlike, + find_common_type, + infer_dtype_from, +) +from pandas.core.dtypes.common import is_list_like + +from pandas.core.arrays import ExtensionArray + + +def putmask_inplace(values: ArrayLike, mask: npt.NDArray[np.bool_], value: Any) -> None: + """ + ExtensionArray-compatible implementation of np.putmask. The main + difference is we do not handle repeating or truncating like numpy. + + Parameters + ---------- + values: np.ndarray or ExtensionArray + mask : np.ndarray[bool] + We assume extract_bool_array has already been called. + value : Any + """ + + if lib.is_scalar(value) and isinstance(values, np.ndarray): + value = convert_scalar_for_putitemlike(value, values.dtype) + + if ( + not isinstance(values, np.ndarray) + or (values.dtype == object and not lib.is_scalar(value)) + # GH#43424: np.putmask raises TypeError if we cannot cast between types with + # rule = "safe", a stricter guarantee we may not have here + or ( + isinstance(value, np.ndarray) and not np.can_cast(value.dtype, values.dtype) + ) + ): + # GH#19266 using np.putmask gives unexpected results with listlike value + # along with object dtype + if is_list_like(value) and len(value) == len(values): + values[mask] = value[mask] + else: + values[mask] = value + else: + # GH#37833 np.putmask is more performant than __setitem__ + np.putmask(values, mask, value) + + +def putmask_smart(values: np.ndarray, mask: npt.NDArray[np.bool_], new) -> np.ndarray: + """ + Return a new ndarray, try to preserve dtype if possible. + + Parameters + ---------- + values : np.ndarray + `values`, updated in-place. + mask : np.ndarray[bool] + Applies to both sides (array like). + new : listlike `new values` aligned with `values` + + Returns + ------- + values : ndarray with updated values + this *may* be a copy of the original + + See Also + -------- + np.putmask + """ + # we cannot use np.asarray() here as we cannot have conversions + # that numpy does when numeric are mixed with strings + + # see if we are only masking values that if putted + # will work in the current dtype + try: + nn = new[mask] + except TypeError: + # TypeError: only integer scalar arrays can be converted to a scalar index + pass + else: + # We only get to putmask_smart when we cannot hold 'new' in values. + # The "smart" part of putmask_smart is checking if we can hold new[mask] + # in values, in which case we can still avoid the need to cast. + if can_hold_element(values, nn): + values[mask] = nn + return values + + new = np.asarray(new) + + if values.dtype.kind == new.dtype.kind: + # preserves dtype if possible + np.putmask(values, mask, new) + return values + + dtype = find_common_type([values.dtype, new.dtype]) + values = values.astype(dtype) + + np.putmask(values, mask, new) + return values + + +def putmask_without_repeat( + values: np.ndarray, mask: npt.NDArray[np.bool_], new: Any +) -> None: + """ + np.putmask will truncate or repeat if `new` is a listlike with + len(new) != len(values). We require an exact match. + + Parameters + ---------- + values : np.ndarray + mask : np.ndarray[bool] + new : Any + """ + if getattr(new, "ndim", 0) >= 1: + new = new.astype(values.dtype, copy=False) + + # TODO: this prob needs some better checking for 2D cases + nlocs = mask.sum() + if nlocs > 0 and is_list_like(new) and getattr(new, "ndim", 1) == 1: + if nlocs == len(new): + # GH#30567 + # If length of ``new`` is less than the length of ``values``, + # `np.putmask` would first repeat the ``new`` array and then + # assign the masked values hence produces incorrect result. + # `np.place` on the other hand uses the ``new`` values at it is + # to place in the masked locations of ``values`` + np.place(values, mask, new) + # i.e. values[mask] = new + elif mask.shape[-1] == len(new) or len(new) == 1: + np.putmask(values, mask, new) + else: + raise ValueError("cannot assign mismatch length to masked array") + else: + np.putmask(values, mask, new) + + +def validate_putmask( + values: ArrayLike, mask: np.ndarray +) -> tuple[npt.NDArray[np.bool_], bool]: + """ + Validate mask and check if this putmask operation is a no-op. + """ + mask = extract_bool_array(mask) + if mask.shape != values.shape: + raise ValueError("putmask: mask and data must be the same size") + + noop = not mask.any() + return mask, noop + + +def extract_bool_array(mask: ArrayLike) -> npt.NDArray[np.bool_]: + """ + If we have a SparseArray or BooleanArray, convert it to ndarray[bool]. + """ + if isinstance(mask, ExtensionArray): + # We could have BooleanArray, Sparse[bool], ... + # Except for BooleanArray, this is equivalent to just + # np.asarray(mask, dtype=bool) + mask = mask.to_numpy(dtype=bool, na_value=False) + + mask = np.asarray(mask, dtype=bool) + return mask + + +def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other): + """ + Parameters + ---------- + values : np.ndarray + num_set : int + For putmask, this is mask.sum() + other : Any + """ + if values.dtype == object: + dtype, _ = infer_dtype_from(other, pandas_dtype=True) + + if isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"]: + # https://github.com/numpy/numpy/issues/12550 + # timedelta64 will incorrectly cast to int + if not is_list_like(other): + other = [other] * num_set + else: + other = list(other) + + return other diff --git a/.local/lib/python3.8/site-packages/pandas/core/array_algos/quantile.py b/.local/lib/python3.8/site-packages/pandas/core/array_algos/quantile.py new file mode 100644 index 0000000..64cd43a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/array_algos/quantile.py @@ -0,0 +1,188 @@ +from __future__ import annotations + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ( + ArrayLike, + Scalar, + npt, +) +from pandas.compat.numpy import np_percentile_argname + +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, +) + + +def quantile_compat( + values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str +) -> ArrayLike: + """ + Compute the quantiles of the given values for each quantile in `qs`. + + Parameters + ---------- + values : np.ndarray or ExtensionArray + qs : np.ndarray[float64] + interpolation : str + + Returns + ------- + np.ndarray or ExtensionArray + """ + if isinstance(values, np.ndarray): + fill_value = na_value_for_dtype(values.dtype, compat=False) + mask = isna(values) + return quantile_with_mask(values, mask, fill_value, qs, interpolation) + else: + return values._quantile(qs, interpolation) + + +def quantile_with_mask( + values: np.ndarray, + mask: npt.NDArray[np.bool_], + fill_value, + qs: npt.NDArray[np.float64], + interpolation: str, +) -> np.ndarray: + """ + Compute the quantiles of the given values for each quantile in `qs`. + + Parameters + ---------- + values : np.ndarray + For ExtensionArray, this is _values_for_factorize()[0] + mask : np.ndarray[bool] + mask = isna(values) + For ExtensionArray, this is computed before calling _value_for_factorize + fill_value : Scalar + The value to interpret fill NA entries with + For ExtensionArray, this is _values_for_factorize()[1] + qs : np.ndarray[float64] + interpolation : str + Type of interpolation + + Returns + ------- + np.ndarray + + Notes + ----- + Assumes values is already 2D. For ExtensionArray this means np.atleast_2d + has been called on _values_for_factorize()[0] + + Quantile is computed along axis=1. + """ + assert values.ndim == 2 + + is_empty = values.shape[1] == 0 + + if is_empty: + # create the array of na_values + # 2d len(values) * len(qs) + flat = np.array([fill_value] * len(qs)) + result = np.repeat(flat, len(values)).reshape(len(values), len(qs)) + else: + result = _nanpercentile( + values, + qs * 100.0, + na_value=fill_value, + mask=mask, + interpolation=interpolation, + ) + + result = np.array(result, copy=False) + result = result.T + + return result + + +def _nanpercentile_1d( + values: np.ndarray, + mask: npt.NDArray[np.bool_], + qs: npt.NDArray[np.float64], + na_value: Scalar, + interpolation, +) -> Scalar | np.ndarray: + """ + Wrapper for np.percentile that skips missing values, specialized to + 1-dimensional case. + + Parameters + ---------- + values : array over which to find quantiles + mask : ndarray[bool] + locations in values that should be considered missing + qs : np.ndarray[float64] of quantile indices to find + na_value : scalar + value to return for empty or all-null values + interpolation : str + + Returns + ------- + quantiles : scalar or array + """ + # mask is Union[ExtensionArray, ndarray] + values = values[~mask] + + if len(values) == 0: + return np.array([na_value] * len(qs), dtype=values.dtype) + + return np.percentile(values, qs, **{np_percentile_argname: interpolation}) + + +def _nanpercentile( + values: np.ndarray, + qs: npt.NDArray[np.float64], + *, + na_value, + mask: npt.NDArray[np.bool_], + interpolation, +): + """ + Wrapper for np.percentile that skips missing values. + + Parameters + ---------- + values : np.ndarray[ndim=2] over which to find quantiles + qs : np.ndarray[float64] of quantile indices to find + na_value : scalar + value to return for empty or all-null values + mask : np.ndarray[bool] + locations in values that should be considered missing + interpolation : str + + Returns + ------- + quantiles : scalar or array + """ + + if values.dtype.kind in ["m", "M"]: + # need to cast to integer to avoid rounding errors in numpy + result = _nanpercentile( + values.view("i8"), + qs=qs, + na_value=na_value.view("i8"), + mask=mask, + interpolation=interpolation, + ) + + # Note: we have to do `astype` and not view because in general we + # have float result at this point, not i8 + return result.astype(values.dtype) + + if not lib.is_scalar(mask) and mask.any(): + # Caller is responsible for ensuring mask shape match + assert mask.shape == values.shape + result = [ + _nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation) + for (val, m) in zip(list(values), list(mask)) + ] + result = np.array(result, dtype=values.dtype, copy=False).T + return result + else: + return np.percentile( + values, qs, axis=1, **{np_percentile_argname: interpolation} + ) diff --git a/.local/lib/python3.8/site-packages/pandas/core/array_algos/replace.py b/.local/lib/python3.8/site-packages/pandas/core/array_algos/replace.py new file mode 100644 index 0000000..e26bb9f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/array_algos/replace.py @@ -0,0 +1,158 @@ +""" +Methods used by Block.replace and related methods. +""" +from __future__ import annotations + +import operator +import re +from typing import ( + Any, + Pattern, +) + +import numpy as np + +from pandas._typing import ( + ArrayLike, + Scalar, +) + +from pandas.core.dtypes.common import ( + is_datetimelike_v_numeric, + is_numeric_v_string_like, + is_re, + is_re_compilable, + is_scalar, +) +from pandas.core.dtypes.missing import isna + + +def should_use_regex(regex: bool, to_replace: Any) -> bool: + """ + Decide whether to treat `to_replace` as a regular expression. + """ + if is_re(to_replace): + regex = True + + regex = regex and is_re_compilable(to_replace) + + # Don't use regex if the pattern is empty. + regex = regex and re.compile(to_replace).pattern != "" + return regex + + +def compare_or_regex_search( + a: ArrayLike, b: Scalar | Pattern, regex: bool, mask: np.ndarray +) -> ArrayLike | bool: + """ + Compare two array-like inputs of the same shape or two scalar values + + Calls operator.eq or re.search, depending on regex argument. If regex is + True, perform an element-wise regex matching. + + Parameters + ---------- + a : array-like + b : scalar or regex pattern + regex : bool + mask : np.ndarray[bool] + + Returns + ------- + mask : array-like of bool + """ + if isna(b): + return ~mask + + def _check_comparison_types( + result: ArrayLike | bool, a: ArrayLike, b: Scalar | Pattern + ): + """ + Raises an error if the two arrays (a,b) cannot be compared. + Otherwise, returns the comparison result as expected. + """ + if is_scalar(result) and isinstance(a, np.ndarray): + type_names = [type(a).__name__, type(b).__name__] + + type_names[0] = f"ndarray(dtype={a.dtype})" + + raise TypeError( + f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" + ) + + if not regex or not should_use_regex(regex, b): + # TODO: should use missing.mask_missing? + op = lambda x: operator.eq(x, b) + else: + op = np.vectorize( + lambda x: bool(re.search(b, x)) + if isinstance(x, str) and isinstance(b, (str, Pattern)) + else False + ) + + # GH#32621 use mask to avoid comparing to NAs + if isinstance(a, np.ndarray): + a = a[mask] + + if is_numeric_v_string_like(a, b): + # GH#29553 avoid deprecation warnings from numpy + return np.zeros(a.shape, dtype=bool) + + elif is_datetimelike_v_numeric(a, b): + # GH#29553 avoid deprecation warnings from numpy + _check_comparison_types(False, a, b) + return False + + result = op(a) + + if isinstance(result, np.ndarray) and mask is not None: + # The shape of the mask can differ to that of the result + # since we may compare only a subset of a's or b's elements + tmp = np.zeros(mask.shape, dtype=np.bool_) + np.place(tmp, mask, result) + result = tmp + + _check_comparison_types(result, a, b) + return result + + +def replace_regex(values: ArrayLike, rx: re.Pattern, value, mask: np.ndarray | None): + """ + Parameters + ---------- + values : ArrayLike + Object dtype. + rx : re.Pattern + value : Any + mask : np.ndarray[bool], optional + + Notes + ----- + Alters values in-place. + """ + + # deal with replacing values with objects (strings) that match but + # whose replacement is not a string (numeric, nan, object) + if isna(value) or not isinstance(value, str): + + def re_replacer(s): + if is_re(rx) and isinstance(s, str): + return value if rx.search(s) is not None else s + else: + return s + + else: + # value is guaranteed to be a string here, s can be either a string + # or null if it's null it gets returned + def re_replacer(s): + if is_re(rx) and isinstance(s, str): + return rx.sub(value, s) + else: + return s + + f = np.vectorize(re_replacer, otypes=[np.object_]) + + if mask is None: + values[:] = f(values) + else: + values[mask] = f(values[mask]) diff --git a/.local/lib/python3.8/site-packages/pandas/core/array_algos/take.py b/.local/lib/python3.8/site-packages/pandas/core/array_algos/take.py new file mode 100644 index 0000000..188725f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/array_algos/take.py @@ -0,0 +1,584 @@ +from __future__ import annotations + +import functools +from typing import ( + TYPE_CHECKING, + cast, + overload, +) + +import numpy as np + +from pandas._libs import ( + algos as libalgos, + lib, +) +from pandas._typing import ( + ArrayLike, + npt, +) + +from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_1d_only_ea_obj, +) +from pandas.core.dtypes.missing import na_value_for_dtype + +from pandas.core.construction import ensure_wrapped_if_datetimelike + +if TYPE_CHECKING: + from pandas.core.arrays._mixins import NDArrayBackedExtensionArray + from pandas.core.arrays.base import ExtensionArray + + +@overload +def take_nd( + arr: np.ndarray, + indexer, + axis: int = ..., + fill_value=..., + allow_fill: bool = ..., +) -> np.ndarray: + ... + + +@overload +def take_nd( + arr: ExtensionArray, + indexer, + axis: int = ..., + fill_value=..., + allow_fill: bool = ..., +) -> ArrayLike: + ... + + +def take_nd( + arr: ArrayLike, + indexer, + axis: int = 0, + fill_value=lib.no_default, + allow_fill: bool = True, +) -> ArrayLike: + + """ + Specialized Cython take which sets NaN values in one pass + + This dispatches to ``take`` defined on ExtensionArrays. It does not + currently dispatch to ``SparseArray.take`` for sparse ``arr``. + + Note: this function assumes that the indexer is a valid(ated) indexer with + no out of bound indices. + + Parameters + ---------- + arr : np.ndarray or ExtensionArray + Input array. + indexer : ndarray + 1-D array of indices to take, subarrays corresponding to -1 value + indices are filed with fill_value + axis : int, default 0 + Axis to take from + fill_value : any, default np.nan + Fill value to replace -1 values with + allow_fill : bool, default True + If False, indexer is assumed to contain no -1 values so no filling + will be done. This short-circuits computation of a mask. Result is + undefined if allow_fill == False and -1 is present in indexer. + + Returns + ------- + subarray : np.ndarray or ExtensionArray + May be the same type as the input, or cast to an ndarray. + """ + if fill_value is lib.no_default: + fill_value = na_value_for_dtype(arr.dtype, compat=False) + elif isinstance(arr.dtype, np.dtype) and arr.dtype.kind in "mM": + dtype, fill_value = maybe_promote(arr.dtype, fill_value) + if arr.dtype != dtype: + # EA.take is strict about returning a new object of the same type + # so for that case cast upfront + arr = arr.astype(dtype) + + if not isinstance(arr, np.ndarray): + # i.e. ExtensionArray, + # includes for EA to catch DatetimeArray, TimedeltaArray + if not is_1d_only_ea_obj(arr): + # i.e. DatetimeArray, TimedeltaArray + arr = cast("NDArrayBackedExtensionArray", arr) + return arr.take( + indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis + ) + + return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) + + arr = np.asarray(arr) + return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill) + + +def _take_nd_ndarray( + arr: np.ndarray, + indexer: npt.NDArray[np.intp] | None, + axis: int, + fill_value, + allow_fill: bool, +) -> np.ndarray: + + if indexer is None: + indexer = np.arange(arr.shape[axis], dtype=np.intp) + dtype, fill_value = arr.dtype, arr.dtype.type() + else: + indexer = ensure_platform_int(indexer) + + dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value( + arr, indexer, fill_value, allow_fill + ) + + flip_order = False + if arr.ndim == 2 and arr.flags.f_contiguous: + flip_order = True + + if flip_order: + arr = arr.T + axis = arr.ndim - axis - 1 + + # at this point, it's guaranteed that dtype can hold both the arr values + # and the fill_value + out_shape_ = list(arr.shape) + out_shape_[axis] = len(indexer) + out_shape = tuple(out_shape_) + if arr.flags.f_contiguous and axis == arr.ndim - 1: + # minor tweak that can make an order-of-magnitude difference + # for dataframes initialized directly from 2-d ndarrays + # (s.t. df.values is c-contiguous and df._mgr.blocks[0] is its + # f-contiguous transpose) + out = np.empty(out_shape, dtype=dtype, order="F") + else: + out = np.empty(out_shape, dtype=dtype) + + func = _get_take_nd_function( + arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info + ) + func(arr, indexer, out, fill_value) + + if flip_order: + out = out.T + return out + + +def take_1d( + arr: ArrayLike, + indexer: npt.NDArray[np.intp], + fill_value=None, + allow_fill: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> ArrayLike: + """ + Specialized version for 1D arrays. Differences compared to `take_nd`: + + - Assumes input array has already been converted to numpy array / EA + - Assumes indexer is already guaranteed to be intp dtype ndarray + - Only works for 1D arrays + + To ensure the lowest possible overhead. + + Note: similarly to `take_nd`, this function assumes that the indexer is + a valid(ated) indexer with no out of bound indices. + + Parameters + ---------- + arr : np.ndarray or ExtensionArray + Input array. + indexer : ndarray + 1-D array of indices to take (validated indices, intp dtype). + fill_value : any, default np.nan + Fill value to replace -1 values with + allow_fill : bool, default True + If False, indexer is assumed to contain no -1 values so no filling + will be done. This short-circuits computation of a mask. Result is + undefined if allow_fill == False and -1 is present in indexer. + mask : np.ndarray, optional, default None + If `allow_fill` is True, and the mask (where indexer == -1) is already + known, it can be passed to avoid recomputation. + """ + if not isinstance(arr, np.ndarray): + # ExtensionArray -> dispatch to their method + return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) + + if not allow_fill: + return arr.take(indexer) + + dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value( + arr, indexer, fill_value, True, mask + ) + + # at this point, it's guaranteed that dtype can hold both the arr values + # and the fill_value + out = np.empty(indexer.shape, dtype=dtype) + + func = _get_take_nd_function( + arr.ndim, arr.dtype, out.dtype, axis=0, mask_info=mask_info + ) + func(arr, indexer, out, fill_value) + + return out + + +def take_2d_multi( + arr: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + fill_value=np.nan, +) -> np.ndarray: + """ + Specialized Cython take which sets NaN values in one pass. + """ + # This is only called from one place in DataFrame._reindex_multi, + # so we know indexer is well-behaved. + assert indexer is not None + assert indexer[0] is not None + assert indexer[1] is not None + + row_idx, col_idx = indexer + + row_idx = ensure_platform_int(row_idx) + col_idx = ensure_platform_int(col_idx) + indexer = row_idx, col_idx + mask_info = None + + # check for promotion based on types only (do this first because + # it's faster than computing a mask) + dtype, fill_value = maybe_promote(arr.dtype, fill_value) + if dtype != arr.dtype: + # check if promotion is actually required based on indexer + row_mask = row_idx == -1 + col_mask = col_idx == -1 + row_needs = row_mask.any() + col_needs = col_mask.any() + mask_info = (row_mask, col_mask), (row_needs, col_needs) + + if not (row_needs or col_needs): + # if not, then depromote, set fill_value to dummy + # (it won't be used but we don't want the cython code + # to crash when trying to cast it to dtype) + dtype, fill_value = arr.dtype, arr.dtype.type() + + # at this point, it's guaranteed that dtype can hold both the arr values + # and the fill_value + out_shape = len(row_idx), len(col_idx) + out = np.empty(out_shape, dtype=dtype) + + func = _take_2d_multi_dict.get((arr.dtype.name, out.dtype.name), None) + if func is None and arr.dtype != out.dtype: + func = _take_2d_multi_dict.get((out.dtype.name, out.dtype.name), None) + if func is not None: + func = _convert_wrapper(func, out.dtype) + + if func is not None: + func(arr, indexer, out=out, fill_value=fill_value) + else: + # test_reindex_multi + _take_2d_multi_object( + arr, indexer, out, fill_value=fill_value, mask_info=mask_info + ) + + return out + + +@functools.lru_cache(maxsize=128) +def _get_take_nd_function_cached( + ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: int +): + """ + Part of _get_take_nd_function below that doesn't need `mask_info` and thus + can be cached (mask_info potentially contains a numpy ndarray which is not + hashable and thus cannot be used as argument for cached function). + """ + tup = (arr_dtype.name, out_dtype.name) + if ndim == 1: + func = _take_1d_dict.get(tup, None) + elif ndim == 2: + if axis == 0: + func = _take_2d_axis0_dict.get(tup, None) + else: + func = _take_2d_axis1_dict.get(tup, None) + if func is not None: + return func + + # We get here with string, uint, float16, and complex dtypes that could + # potentially be handled in algos_take_helper. + # Also a couple with (M8[ns], object) and (m8[ns], object) + tup = (out_dtype.name, out_dtype.name) + if ndim == 1: + func = _take_1d_dict.get(tup, None) + elif ndim == 2: + if axis == 0: + func = _take_2d_axis0_dict.get(tup, None) + else: + func = _take_2d_axis1_dict.get(tup, None) + if func is not None: + func = _convert_wrapper(func, out_dtype) + return func + + return None + + +def _get_take_nd_function( + ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: int = 0, mask_info=None +): + """ + Get the appropriate "take" implementation for the given dimension, axis + and dtypes. + """ + func = None + if ndim <= 2: + # for this part we don't need `mask_info` -> use the cached algo lookup + func = _get_take_nd_function_cached(ndim, arr_dtype, out_dtype, axis) + + if func is None: + + def func(arr, indexer, out, fill_value=np.nan): + indexer = ensure_platform_int(indexer) + _take_nd_object( + arr, indexer, out, axis=axis, fill_value=fill_value, mask_info=mask_info + ) + + return func + + +def _view_wrapper(f, arr_dtype=None, out_dtype=None, fill_wrap=None): + def wrapper( + arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan + ): + if arr_dtype is not None: + arr = arr.view(arr_dtype) + if out_dtype is not None: + out = out.view(out_dtype) + if fill_wrap is not None: + fill_value = fill_wrap(fill_value) + f(arr, indexer, out, fill_value=fill_value) + + return wrapper + + +def _convert_wrapper(f, conv_dtype): + def wrapper( + arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan + ): + if conv_dtype == object: + # GH#39755 avoid casting dt64/td64 to integers + arr = ensure_wrapped_if_datetimelike(arr) + arr = arr.astype(conv_dtype) + f(arr, indexer, out, fill_value=fill_value) + + return wrapper + + +_take_1d_dict = { + ("int8", "int8"): libalgos.take_1d_int8_int8, + ("int8", "int32"): libalgos.take_1d_int8_int32, + ("int8", "int64"): libalgos.take_1d_int8_int64, + ("int8", "float64"): libalgos.take_1d_int8_float64, + ("int16", "int16"): libalgos.take_1d_int16_int16, + ("int16", "int32"): libalgos.take_1d_int16_int32, + ("int16", "int64"): libalgos.take_1d_int16_int64, + ("int16", "float64"): libalgos.take_1d_int16_float64, + ("int32", "int32"): libalgos.take_1d_int32_int32, + ("int32", "int64"): libalgos.take_1d_int32_int64, + ("int32", "float64"): libalgos.take_1d_int32_float64, + ("int64", "int64"): libalgos.take_1d_int64_int64, + ("int64", "float64"): libalgos.take_1d_int64_float64, + ("float32", "float32"): libalgos.take_1d_float32_float32, + ("float32", "float64"): libalgos.take_1d_float32_float64, + ("float64", "float64"): libalgos.take_1d_float64_float64, + ("object", "object"): libalgos.take_1d_object_object, + ("bool", "bool"): _view_wrapper(libalgos.take_1d_bool_bool, np.uint8, np.uint8), + ("bool", "object"): _view_wrapper(libalgos.take_1d_bool_object, np.uint8, None), + ("datetime64[ns]", "datetime64[ns]"): _view_wrapper( + libalgos.take_1d_int64_int64, np.int64, np.int64, np.int64 + ), + ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper( + libalgos.take_1d_int64_int64, np.int64, np.int64, np.int64 + ), +} + +_take_2d_axis0_dict = { + ("int8", "int8"): libalgos.take_2d_axis0_int8_int8, + ("int8", "int32"): libalgos.take_2d_axis0_int8_int32, + ("int8", "int64"): libalgos.take_2d_axis0_int8_int64, + ("int8", "float64"): libalgos.take_2d_axis0_int8_float64, + ("int16", "int16"): libalgos.take_2d_axis0_int16_int16, + ("int16", "int32"): libalgos.take_2d_axis0_int16_int32, + ("int16", "int64"): libalgos.take_2d_axis0_int16_int64, + ("int16", "float64"): libalgos.take_2d_axis0_int16_float64, + ("int32", "int32"): libalgos.take_2d_axis0_int32_int32, + ("int32", "int64"): libalgos.take_2d_axis0_int32_int64, + ("int32", "float64"): libalgos.take_2d_axis0_int32_float64, + ("int64", "int64"): libalgos.take_2d_axis0_int64_int64, + ("int64", "float64"): libalgos.take_2d_axis0_int64_float64, + ("float32", "float32"): libalgos.take_2d_axis0_float32_float32, + ("float32", "float64"): libalgos.take_2d_axis0_float32_float64, + ("float64", "float64"): libalgos.take_2d_axis0_float64_float64, + ("object", "object"): libalgos.take_2d_axis0_object_object, + ("bool", "bool"): _view_wrapper( + libalgos.take_2d_axis0_bool_bool, np.uint8, np.uint8 + ), + ("bool", "object"): _view_wrapper( + libalgos.take_2d_axis0_bool_object, np.uint8, None + ), + ("datetime64[ns]", "datetime64[ns]"): _view_wrapper( + libalgos.take_2d_axis0_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), + ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper( + libalgos.take_2d_axis0_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), +} + +_take_2d_axis1_dict = { + ("int8", "int8"): libalgos.take_2d_axis1_int8_int8, + ("int8", "int32"): libalgos.take_2d_axis1_int8_int32, + ("int8", "int64"): libalgos.take_2d_axis1_int8_int64, + ("int8", "float64"): libalgos.take_2d_axis1_int8_float64, + ("int16", "int16"): libalgos.take_2d_axis1_int16_int16, + ("int16", "int32"): libalgos.take_2d_axis1_int16_int32, + ("int16", "int64"): libalgos.take_2d_axis1_int16_int64, + ("int16", "float64"): libalgos.take_2d_axis1_int16_float64, + ("int32", "int32"): libalgos.take_2d_axis1_int32_int32, + ("int32", "int64"): libalgos.take_2d_axis1_int32_int64, + ("int32", "float64"): libalgos.take_2d_axis1_int32_float64, + ("int64", "int64"): libalgos.take_2d_axis1_int64_int64, + ("int64", "float64"): libalgos.take_2d_axis1_int64_float64, + ("float32", "float32"): libalgos.take_2d_axis1_float32_float32, + ("float32", "float64"): libalgos.take_2d_axis1_float32_float64, + ("float64", "float64"): libalgos.take_2d_axis1_float64_float64, + ("object", "object"): libalgos.take_2d_axis1_object_object, + ("bool", "bool"): _view_wrapper( + libalgos.take_2d_axis1_bool_bool, np.uint8, np.uint8 + ), + ("bool", "object"): _view_wrapper( + libalgos.take_2d_axis1_bool_object, np.uint8, None + ), + ("datetime64[ns]", "datetime64[ns]"): _view_wrapper( + libalgos.take_2d_axis1_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), + ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper( + libalgos.take_2d_axis1_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), +} + +_take_2d_multi_dict = { + ("int8", "int8"): libalgos.take_2d_multi_int8_int8, + ("int8", "int32"): libalgos.take_2d_multi_int8_int32, + ("int8", "int64"): libalgos.take_2d_multi_int8_int64, + ("int8", "float64"): libalgos.take_2d_multi_int8_float64, + ("int16", "int16"): libalgos.take_2d_multi_int16_int16, + ("int16", "int32"): libalgos.take_2d_multi_int16_int32, + ("int16", "int64"): libalgos.take_2d_multi_int16_int64, + ("int16", "float64"): libalgos.take_2d_multi_int16_float64, + ("int32", "int32"): libalgos.take_2d_multi_int32_int32, + ("int32", "int64"): libalgos.take_2d_multi_int32_int64, + ("int32", "float64"): libalgos.take_2d_multi_int32_float64, + ("int64", "int64"): libalgos.take_2d_multi_int64_int64, + ("int64", "float64"): libalgos.take_2d_multi_int64_float64, + ("float32", "float32"): libalgos.take_2d_multi_float32_float32, + ("float32", "float64"): libalgos.take_2d_multi_float32_float64, + ("float64", "float64"): libalgos.take_2d_multi_float64_float64, + ("object", "object"): libalgos.take_2d_multi_object_object, + ("bool", "bool"): _view_wrapper( + libalgos.take_2d_multi_bool_bool, np.uint8, np.uint8 + ), + ("bool", "object"): _view_wrapper( + libalgos.take_2d_multi_bool_object, np.uint8, None + ), + ("datetime64[ns]", "datetime64[ns]"): _view_wrapper( + libalgos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), + ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper( + libalgos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), +} + + +def _take_nd_object( + arr: np.ndarray, + indexer: npt.NDArray[np.intp], + out: np.ndarray, + axis: int, + fill_value, + mask_info, +): + if mask_info is not None: + mask, needs_masking = mask_info + else: + mask = indexer == -1 + needs_masking = mask.any() + if arr.dtype != out.dtype: + arr = arr.astype(out.dtype) + if arr.shape[axis] > 0: + arr.take(indexer, axis=axis, out=out) + if needs_masking: + outindexer = [slice(None)] * arr.ndim + outindexer[axis] = mask + out[tuple(outindexer)] = fill_value + + +def _take_2d_multi_object( + arr: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value, + mask_info, +) -> None: + # this is not ideal, performance-wise, but it's better than raising + # an exception (best to optimize in Cython to avoid getting here) + row_idx, col_idx = indexer # both np.intp + if mask_info is not None: + (row_mask, col_mask), (row_needs, col_needs) = mask_info + else: + row_mask = row_idx == -1 + col_mask = col_idx == -1 + row_needs = row_mask.any() + col_needs = col_mask.any() + if fill_value is not None: + if row_needs: + out[row_mask, :] = fill_value + if col_needs: + out[:, col_mask] = fill_value + for i in range(len(row_idx)): + u_ = row_idx[i] + for j in range(len(col_idx)): + v = col_idx[j] + out[i, j] = arr[u_, v] + + +def _take_preprocess_indexer_and_fill_value( + arr: np.ndarray, + indexer: npt.NDArray[np.intp], + fill_value, + allow_fill: bool, + mask: npt.NDArray[np.bool_] | None = None, +): + mask_info: tuple[np.ndarray | None, bool] | None = None + + if not allow_fill: + dtype, fill_value = arr.dtype, arr.dtype.type() + mask_info = None, False + else: + # check for promotion based on types only (do this first because + # it's faster than computing a mask) + dtype, fill_value = maybe_promote(arr.dtype, fill_value) + if dtype != arr.dtype: + # check if promotion is actually required based on indexer + if mask is not None: + needs_masking = True + else: + mask = indexer == -1 + needs_masking = bool(mask.any()) + mask_info = mask, needs_masking + if not needs_masking: + # if not, then depromote, set fill_value to dummy + # (it won't be used but we don't want the cython code + # to crash when trying to cast it to dtype) + dtype, fill_value = arr.dtype, arr.dtype.type() + + return dtype, fill_value, mask_info diff --git a/.local/lib/python3.8/site-packages/pandas/core/array_algos/transforms.py b/.local/lib/python3.8/site-packages/pandas/core/array_algos/transforms.py new file mode 100644 index 0000000..27aebb9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/array_algos/transforms.py @@ -0,0 +1,38 @@ +""" +transforms.py is for shape-preserving functions. +""" + +import numpy as np + + +def shift(values: np.ndarray, periods: int, axis: int, fill_value) -> np.ndarray: + new_values = values + + if periods == 0 or values.size == 0: + return new_values.copy() + + # make sure array sent to np.roll is c_contiguous + f_ordered = values.flags.f_contiguous + if f_ordered: + new_values = new_values.T + axis = new_values.ndim - axis - 1 + + if new_values.size: + new_values = np.roll( + new_values, + np.intp(periods), + axis=axis, + ) + + axis_indexer = [slice(None)] * values.ndim + if periods > 0: + axis_indexer[axis] = slice(None, periods) + else: + axis_indexer[axis] = slice(periods, None) + new_values[tuple(axis_indexer)] = fill_value + + # restore original order + if f_ordered: + new_values = new_values.T + + return new_values diff --git a/.local/lib/python3.8/site-packages/pandas/core/arraylike.py b/.local/lib/python3.8/site-packages/pandas/core/arraylike.py new file mode 100644 index 0000000..b6e9bf1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arraylike.py @@ -0,0 +1,528 @@ +""" +Methods that can be shared by many array-like classes or subclasses: + Series + Index + ExtensionArray +""" +import operator +from typing import Any +import warnings + +import numpy as np + +from pandas._libs import lib +from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.generic import ABCNDFrame + +from pandas.core import roperator +from pandas.core.construction import extract_array +from pandas.core.ops.common import unpack_zerodim_and_defer + +REDUCTION_ALIASES = { + "maximum": "max", + "minimum": "min", + "add": "sum", + "multiply": "prod", +} + + +class OpsMixin: + # ------------------------------------------------------------- + # Comparisons + + def _cmp_method(self, other, op): + return NotImplemented + + @unpack_zerodim_and_defer("__eq__") + def __eq__(self, other): + return self._cmp_method(other, operator.eq) + + @unpack_zerodim_and_defer("__ne__") + def __ne__(self, other): + return self._cmp_method(other, operator.ne) + + @unpack_zerodim_and_defer("__lt__") + def __lt__(self, other): + return self._cmp_method(other, operator.lt) + + @unpack_zerodim_and_defer("__le__") + def __le__(self, other): + return self._cmp_method(other, operator.le) + + @unpack_zerodim_and_defer("__gt__") + def __gt__(self, other): + return self._cmp_method(other, operator.gt) + + @unpack_zerodim_and_defer("__ge__") + def __ge__(self, other): + return self._cmp_method(other, operator.ge) + + # ------------------------------------------------------------- + # Logical Methods + + def _logical_method(self, other, op): + return NotImplemented + + @unpack_zerodim_and_defer("__and__") + def __and__(self, other): + return self._logical_method(other, operator.and_) + + @unpack_zerodim_and_defer("__rand__") + def __rand__(self, other): + return self._logical_method(other, roperator.rand_) + + @unpack_zerodim_and_defer("__or__") + def __or__(self, other): + return self._logical_method(other, operator.or_) + + @unpack_zerodim_and_defer("__ror__") + def __ror__(self, other): + return self._logical_method(other, roperator.ror_) + + @unpack_zerodim_and_defer("__xor__") + def __xor__(self, other): + return self._logical_method(other, operator.xor) + + @unpack_zerodim_and_defer("__rxor__") + def __rxor__(self, other): + return self._logical_method(other, roperator.rxor) + + # ------------------------------------------------------------- + # Arithmetic Methods + + def _arith_method(self, other, op): + return NotImplemented + + @unpack_zerodim_and_defer("__add__") + def __add__(self, other): + return self._arith_method(other, operator.add) + + @unpack_zerodim_and_defer("__radd__") + def __radd__(self, other): + return self._arith_method(other, roperator.radd) + + @unpack_zerodim_and_defer("__sub__") + def __sub__(self, other): + return self._arith_method(other, operator.sub) + + @unpack_zerodim_and_defer("__rsub__") + def __rsub__(self, other): + return self._arith_method(other, roperator.rsub) + + @unpack_zerodim_and_defer("__mul__") + def __mul__(self, other): + return self._arith_method(other, operator.mul) + + @unpack_zerodim_and_defer("__rmul__") + def __rmul__(self, other): + return self._arith_method(other, roperator.rmul) + + @unpack_zerodim_and_defer("__truediv__") + def __truediv__(self, other): + return self._arith_method(other, operator.truediv) + + @unpack_zerodim_and_defer("__rtruediv__") + def __rtruediv__(self, other): + return self._arith_method(other, roperator.rtruediv) + + @unpack_zerodim_and_defer("__floordiv__") + def __floordiv__(self, other): + return self._arith_method(other, operator.floordiv) + + @unpack_zerodim_and_defer("__rfloordiv") + def __rfloordiv__(self, other): + return self._arith_method(other, roperator.rfloordiv) + + @unpack_zerodim_and_defer("__mod__") + def __mod__(self, other): + return self._arith_method(other, operator.mod) + + @unpack_zerodim_and_defer("__rmod__") + def __rmod__(self, other): + return self._arith_method(other, roperator.rmod) + + @unpack_zerodim_and_defer("__divmod__") + def __divmod__(self, other): + return self._arith_method(other, divmod) + + @unpack_zerodim_and_defer("__rdivmod__") + def __rdivmod__(self, other): + return self._arith_method(other, roperator.rdivmod) + + @unpack_zerodim_and_defer("__pow__") + def __pow__(self, other): + return self._arith_method(other, operator.pow) + + @unpack_zerodim_and_defer("__rpow__") + def __rpow__(self, other): + return self._arith_method(other, roperator.rpow) + + +# ----------------------------------------------------------------------------- +# Helpers to implement __array_ufunc__ + + +def _is_aligned(frame, other): + """ + Helper to check if a DataFrame is aligned with another DataFrame or Series. + """ + from pandas import DataFrame + + if isinstance(other, DataFrame): + return frame._indexed_same(other) + else: + # Series -> match index + return frame.columns.equals(other.index) + + +def _maybe_fallback(ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any): + """ + In the future DataFrame, inputs to ufuncs will be aligned before applying + the ufunc, but for now we ignore the index but raise a warning if behaviour + would change in the future. + This helper detects the case where a warning is needed and then fallbacks + to applying the ufunc on arrays to avoid alignment. + + See https://github.com/pandas-dev/pandas/pull/39239 + """ + from pandas import DataFrame + from pandas.core.generic import NDFrame + + n_alignable = sum(isinstance(x, NDFrame) for x in inputs) + n_frames = sum(isinstance(x, DataFrame) for x in inputs) + + if n_alignable >= 2 and n_frames >= 1: + # if there are 2 alignable inputs (Series or DataFrame), of which at least 1 + # is a DataFrame -> we would have had no alignment before -> warn that this + # will align in the future + + # the first frame is what determines the output index/columns in pandas < 1.2 + first_frame = next(x for x in inputs if isinstance(x, DataFrame)) + + # check if the objects are aligned or not + non_aligned = sum( + not _is_aligned(first_frame, x) for x in inputs if isinstance(x, NDFrame) + ) + + # if at least one is not aligned -> warn and fallback to array behaviour + if non_aligned: + warnings.warn( + "Calling a ufunc on non-aligned DataFrames (or DataFrame/Series " + "combination). Currently, the indices are ignored and the result " + "takes the index/columns of the first DataFrame. In the future , " + "the DataFrames/Series will be aligned before applying the ufunc.\n" + "Convert one of the arguments to a NumPy array " + "(eg 'ufunc(df1, np.asarray(df2)') to keep the current behaviour, " + "or align manually (eg 'df1, df2 = df1.align(df2)') before passing to " + "the ufunc to obtain the future behaviour and silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + # keep the first dataframe of the inputs, other DataFrame/Series is + # converted to array for fallback behaviour + new_inputs = [] + for x in inputs: + if x is first_frame: + new_inputs.append(x) + elif isinstance(x, NDFrame): + new_inputs.append(np.asarray(x)) + else: + new_inputs.append(x) + + # call the ufunc on those transformed inputs + return getattr(ufunc, method)(*new_inputs, **kwargs) + + # signal that we didn't fallback / execute the ufunc yet + return NotImplemented + + +def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any): + """ + Compatibility with numpy ufuncs. + + See also + -------- + numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__ + """ + from pandas.core.generic import NDFrame + from pandas.core.internals import BlockManager + + cls = type(self) + + kwargs = _standardize_out_kwarg(**kwargs) + + # for backwards compatibility check and potentially fallback for non-aligned frames + result = _maybe_fallback(ufunc, method, *inputs, **kwargs) + if result is not NotImplemented: + return result + + # for binary ops, use our custom dunder methods + result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs) + if result is not NotImplemented: + return result + + # Determine if we should defer. + no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__) + + for item in inputs: + higher_priority = ( + hasattr(item, "__array_priority__") + and item.__array_priority__ > self.__array_priority__ + ) + has_array_ufunc = ( + hasattr(item, "__array_ufunc__") + and type(item).__array_ufunc__ not in no_defer + and not isinstance(item, self._HANDLED_TYPES) + ) + if higher_priority or has_array_ufunc: + return NotImplemented + + # align all the inputs. + types = tuple(type(x) for x in inputs) + alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)] + + if len(alignable) > 1: + # This triggers alignment. + # At the moment, there aren't any ufuncs with more than two inputs + # so this ends up just being x1.index | x2.index, but we write + # it to handle *args. + + if len(set(types)) > 1: + # We currently don't handle ufunc(DataFrame, Series) + # well. Previously this raised an internal ValueError. We might + # support it someday, so raise a NotImplementedError. + raise NotImplementedError( + "Cannot apply ufunc {} to mixed DataFrame and Series " + "inputs.".format(ufunc) + ) + axes = self.axes + for obj in alignable[1:]: + # this relies on the fact that we aren't handling mixed + # series / frame ufuncs. + for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)): + axes[i] = ax1.union(ax2) + + reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes)) + inputs = tuple( + x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x + for x, t in zip(inputs, types) + ) + else: + reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes)) + + if self.ndim == 1: + names = [getattr(x, "name") for x in inputs if hasattr(x, "name")] + name = names[0] if len(set(names)) == 1 else None + reconstruct_kwargs = {"name": name} + else: + reconstruct_kwargs = {} + + def reconstruct(result): + if ufunc.nout > 1: + # np.modf, np.frexp, np.divmod + return tuple(_reconstruct(x) for x in result) + + return _reconstruct(result) + + def _reconstruct(result): + if lib.is_scalar(result): + return result + + if result.ndim != self.ndim: + if method == "outer": + if self.ndim == 2: + # we already deprecated for Series + msg = ( + "outer method for ufunc {} is not implemented on " + "pandas objects. Returning an ndarray, but in the " + "future this will raise a 'NotImplementedError'. " + "Consider explicitly converting the DataFrame " + "to an array with '.to_numpy()' first." + ) + warnings.warn( + msg.format(ufunc), FutureWarning, stacklevel=find_stack_level() + ) + return result + raise NotImplementedError + return result + if isinstance(result, BlockManager): + # we went through BlockManager.apply e.g. np.sqrt + result = self._constructor(result, **reconstruct_kwargs, copy=False) + else: + # we converted an array, lost our axes + result = self._constructor( + result, **reconstruct_axes, **reconstruct_kwargs, copy=False + ) + # TODO: When we support multiple values in __finalize__, this + # should pass alignable to `__finalize__` instead of self. + # Then `np.add(a, b)` would consider attrs from both a and b + # when a and b are NDFrames. + if len(alignable) == 1: + result = result.__finalize__(self) + return result + + if "out" in kwargs: + # e.g. test_multiindex_get_loc + result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs) + return reconstruct(result) + + if method == "reduce": + # e.g. test.series.test_ufunc.test_reduce + result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs) + if result is not NotImplemented: + return result + + # We still get here with kwargs `axis` for e.g. np.maximum.accumulate + # and `dtype` and `keepdims` for np.ptp + + if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1): + # Just give up on preserving types in the complex case. + # In theory we could preserve them for them. + # * nout>1 is doable if BlockManager.apply took nout and + # returned a Tuple[BlockManager]. + # * len(inputs) > 1 is doable when we know that we have + # aligned blocks / dtypes. + + # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add + inputs = tuple(np.asarray(x) for x in inputs) + # Note: we can't use default_array_ufunc here bc reindexing means + # that `self` may not be among `inputs` + result = getattr(ufunc, method)(*inputs, **kwargs) + elif self.ndim == 1: + # ufunc(series, ...) + inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) + result = getattr(ufunc, method)(*inputs, **kwargs) + else: + # ufunc(dataframe) + if method == "__call__" and not kwargs: + # for np.(..) calls + # kwargs cannot necessarily be handled block-by-block, so only + # take this path if there are no kwargs + mgr = inputs[0]._mgr + result = mgr.apply(getattr(ufunc, method)) + else: + # otherwise specific ufunc methods (eg np..accumulate(..)) + # Those can have an axis keyword and thus can't be called block-by-block + result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs) + # e.g. np.negative (only one reached), with "where" and "out" in kwargs + + result = reconstruct(result) + return result + + +def _standardize_out_kwarg(**kwargs) -> dict: + """ + If kwargs contain "out1" and "out2", replace that with a tuple "out" + + np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or + `out1=out1, out2=out2)` + """ + if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs: + out1 = kwargs.pop("out1") + out2 = kwargs.pop("out2") + out = (out1, out2) + kwargs["out"] = out + return kwargs + + +def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + """ + If we have an `out` keyword, then call the ufunc without `out` and then + set the result into the given `out`. + """ + + # Note: we assume _standardize_out_kwarg has already been called. + out = kwargs.pop("out") + where = kwargs.pop("where", None) + + result = getattr(ufunc, method)(*inputs, **kwargs) + + if result is NotImplemented: + return NotImplemented + + if isinstance(result, tuple): + # i.e. np.divmod, np.modf, np.frexp + if not isinstance(out, tuple) or len(out) != len(result): + raise NotImplementedError + + for arr, res in zip(out, result): + _assign_where(arr, res, where) + + return out + + if isinstance(out, tuple): + if len(out) == 1: + out = out[0] + else: + raise NotImplementedError + + _assign_where(out, result, where) + return out + + +def _assign_where(out, result, where) -> None: + """ + Set a ufunc result into 'out', masking with a 'where' argument if necessary. + """ + if where is None: + # no 'where' arg passed to ufunc + out[:] = result + else: + np.putmask(out, where, result) + + +def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + """ + Fallback to the behavior we would get if we did not define __array_ufunc__. + + Notes + ----- + We are assuming that `self` is among `inputs`. + """ + if not any(x is self for x in inputs): + raise NotImplementedError + + new_inputs = [x if x is not self else np.asarray(x) for x in inputs] + + return getattr(ufunc, method)(*new_inputs, **kwargs) + + +def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + """ + Dispatch ufunc reductions to self's reduction methods. + """ + assert method == "reduce" + + if len(inputs) != 1 or inputs[0] is not self: + return NotImplemented + + if ufunc.__name__ not in REDUCTION_ALIASES: + return NotImplemented + + method_name = REDUCTION_ALIASES[ufunc.__name__] + + # NB: we are assuming that min/max represent minimum/maximum methods, + # which would not be accurate for e.g. Timestamp.min + if not hasattr(self, method_name): + return NotImplemented + + if self.ndim > 1: + if isinstance(self, ABCNDFrame): + # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA + kwargs["numeric_only"] = False + + if "axis" not in kwargs: + # For DataFrame reductions we don't want the default axis=0 + # Note: np.min is not a ufunc, but uses array_function_dispatch, + # so calls DataFrame.min (without ever getting here) with the np.min + # default of axis=None, which DataFrame.min catches and changes to axis=0. + # np.minimum.reduce(df) gets here bc axis is not in kwargs, + # so we set axis=0 to match the behaviorof np.minimum.reduce(df.values) + kwargs["axis"] = 0 + + # By default, numpy's reductions do not skip NaNs, so we have to + # pass skipna=False + return getattr(self, method_name)(skipna=False, **kwargs) diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/__init__.py new file mode 100644 index 0000000..e301e82 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/__init__.py @@ -0,0 +1,41 @@ +from pandas.core.arrays.base import ( + ExtensionArray, + ExtensionOpsMixin, + ExtensionScalarOpsMixin, +) +from pandas.core.arrays.boolean import BooleanArray +from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays.datetimes import DatetimeArray +from pandas.core.arrays.floating import FloatingArray +from pandas.core.arrays.integer import IntegerArray +from pandas.core.arrays.interval import IntervalArray +from pandas.core.arrays.masked import BaseMaskedArray +from pandas.core.arrays.numpy_ import PandasArray +from pandas.core.arrays.period import ( + PeriodArray, + period_array, +) +from pandas.core.arrays.sparse import SparseArray +from pandas.core.arrays.string_ import StringArray +from pandas.core.arrays.string_arrow import ArrowStringArray +from pandas.core.arrays.timedeltas import TimedeltaArray + +__all__ = [ + "ExtensionArray", + "ExtensionOpsMixin", + "ExtensionScalarOpsMixin", + "ArrowStringArray", + "BaseMaskedArray", + "BooleanArray", + "Categorical", + "DatetimeArray", + "FloatingArray", + "IntegerArray", + "IntervalArray", + "PandasArray", + "PeriodArray", + "period_array", + "SparseArray", + "StringArray", + "TimedeltaArray", +] diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..15efb7c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/_arrow_utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/_arrow_utils.cpython-38.pyc new file mode 100644 index 0000000..b825617 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/_arrow_utils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/_mixins.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/_mixins.cpython-38.pyc new file mode 100644 index 0000000..fd9524d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/_mixins.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/_ranges.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/_ranges.cpython-38.pyc new file mode 100644 index 0000000..9611def Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/_ranges.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/base.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/base.cpython-38.pyc new file mode 100644 index 0000000..f14cc28 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/boolean.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/boolean.cpython-38.pyc new file mode 100644 index 0000000..17e7302 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/boolean.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/categorical.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/categorical.cpython-38.pyc new file mode 100644 index 0000000..0d41862 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/categorical.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/datetimelike.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/datetimelike.cpython-38.pyc new file mode 100644 index 0000000..e7311b4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/datetimelike.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/datetimes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/datetimes.cpython-38.pyc new file mode 100644 index 0000000..fcf49e1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/datetimes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/floating.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/floating.cpython-38.pyc new file mode 100644 index 0000000..c616610 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/floating.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/integer.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/integer.cpython-38.pyc new file mode 100644 index 0000000..cb1b466 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/integer.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/interval.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/interval.cpython-38.pyc new file mode 100644 index 0000000..75e64ef Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/interval.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/masked.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/masked.cpython-38.pyc new file mode 100644 index 0000000..bc29608 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/masked.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/numeric.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/numeric.cpython-38.pyc new file mode 100644 index 0000000..3ebd0be Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/numeric.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/numpy_.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/numpy_.cpython-38.pyc new file mode 100644 index 0000000..c3f48da Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/numpy_.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/period.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/period.cpython-38.pyc new file mode 100644 index 0000000..69af6d5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/period.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/string_.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/string_.cpython-38.pyc new file mode 100644 index 0000000..636816b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/string_.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/string_arrow.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/string_arrow.cpython-38.pyc new file mode 100644 index 0000000..d1a861f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/string_arrow.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/timedeltas.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/timedeltas.cpython-38.pyc new file mode 100644 index 0000000..2289f32 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/__pycache__/timedeltas.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/_arrow_utils.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/_arrow_utils.py new file mode 100644 index 0000000..6214693 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/_arrow_utils.py @@ -0,0 +1,141 @@ +import json + +import numpy as np +import pyarrow + +from pandas.core.arrays.interval import VALID_CLOSED + + +def pyarrow_array_to_numpy_and_mask(arr, dtype): + """ + Convert a primitive pyarrow.Array to a numpy array and boolean mask based + on the buffers of the Array. + + At the moment pyarrow.BooleanArray is not supported. + + Parameters + ---------- + arr : pyarrow.Array + dtype : numpy.dtype + + Returns + ------- + (data, mask) + Tuple of two numpy arrays with the raw data (with specified dtype) and + a boolean mask (validity mask, so False means missing) + """ + dtype = np.dtype(dtype) + + buflist = arr.buffers() + # Since Arrow buffers might contain padding and the data might be offset, + # the buffer gets sliced here before handing it to numpy. + # See also https://github.com/pandas-dev/pandas/issues/40896 + offset = arr.offset * dtype.itemsize + length = len(arr) * dtype.itemsize + data_buf = buflist[1][offset : offset + length] + data = np.frombuffer(data_buf, dtype=dtype) + bitmask = buflist[0] + if bitmask is not None: + mask = pyarrow.BooleanArray.from_buffers( + pyarrow.bool_(), len(arr), [None, bitmask], offset=arr.offset + ) + mask = np.asarray(mask) + else: + mask = np.ones(len(arr), dtype=bool) + return data, mask + + +class ArrowPeriodType(pyarrow.ExtensionType): + def __init__(self, freq): + # attributes need to be set first before calling + # super init (as that calls serialize) + self._freq = freq + pyarrow.ExtensionType.__init__(self, pyarrow.int64(), "pandas.period") + + @property + def freq(self): + return self._freq + + def __arrow_ext_serialize__(self): + metadata = {"freq": self.freq} + return json.dumps(metadata).encode() + + @classmethod + def __arrow_ext_deserialize__(cls, storage_type, serialized): + metadata = json.loads(serialized.decode()) + return ArrowPeriodType(metadata["freq"]) + + def __eq__(self, other): + if isinstance(other, pyarrow.BaseExtensionType): + return type(self) == type(other) and self.freq == other.freq + else: + return NotImplemented + + def __hash__(self): + return hash((str(self), self.freq)) + + def to_pandas_dtype(self): + import pandas as pd + + return pd.PeriodDtype(freq=self.freq) + + +# register the type with a dummy instance +_period_type = ArrowPeriodType("D") +pyarrow.register_extension_type(_period_type) + + +class ArrowIntervalType(pyarrow.ExtensionType): + def __init__(self, subtype, closed): + # attributes need to be set first before calling + # super init (as that calls serialize) + assert closed in VALID_CLOSED + self._closed = closed + if not isinstance(subtype, pyarrow.DataType): + subtype = pyarrow.type_for_alias(str(subtype)) + self._subtype = subtype + + storage_type = pyarrow.struct([("left", subtype), ("right", subtype)]) + pyarrow.ExtensionType.__init__(self, storage_type, "pandas.interval") + + @property + def subtype(self): + return self._subtype + + @property + def closed(self): + return self._closed + + def __arrow_ext_serialize__(self): + metadata = {"subtype": str(self.subtype), "closed": self.closed} + return json.dumps(metadata).encode() + + @classmethod + def __arrow_ext_deserialize__(cls, storage_type, serialized): + metadata = json.loads(serialized.decode()) + subtype = pyarrow.type_for_alias(metadata["subtype"]) + closed = metadata["closed"] + return ArrowIntervalType(subtype, closed) + + def __eq__(self, other): + if isinstance(other, pyarrow.BaseExtensionType): + return ( + type(self) == type(other) + and self.subtype == other.subtype + and self.closed == other.closed + ) + else: + return NotImplemented + + def __hash__(self): + return hash((str(self), str(self.subtype), self.closed)) + + def to_pandas_dtype(self): + import pandas as pd + + return pd.IntervalDtype(self.subtype.to_pandas_dtype(), self.closed) + + +# register the type with a dummy instance +_interval_type = ArrowIntervalType(pyarrow.int64(), "left") +pyarrow.register_extension_type(_interval_type) diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/_mixins.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/_mixins.py new file mode 100644 index 0000000..a40be5a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/_mixins.py @@ -0,0 +1,496 @@ +from __future__ import annotations + +from functools import wraps +from typing import ( + TYPE_CHECKING, + Any, + Literal, + Sequence, + TypeVar, + cast, + overload, +) + +import numpy as np + +from pandas._libs import lib +from pandas._libs.arrays import NDArrayBacked +from pandas._typing import ( + ArrayLike, + Dtype, + F, + PositionalIndexer2D, + PositionalIndexerTuple, + ScalarIndexer, + SequenceIndexer, + Shape, + TakeIndexer, + npt, + type_t, +) +from pandas.errors import AbstractMethodError +from pandas.util._decorators import doc +from pandas.util._validators import ( + validate_bool_kwarg, + validate_fillna_kwargs, + validate_insert_loc, +) + +from pandas.core.dtypes.common import ( + is_dtype_equal, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + ExtensionDtype, + PeriodDtype, +) +from pandas.core.dtypes.missing import array_equivalent + +from pandas.core import missing +from pandas.core.algorithms import ( + take, + unique, + value_counts, +) +from pandas.core.array_algos.quantile import quantile_with_mask +from pandas.core.array_algos.transforms import shift +from pandas.core.arrays.base import ExtensionArray +from pandas.core.construction import extract_array +from pandas.core.indexers import check_array_indexer +from pandas.core.sorting import nargminmax + +NDArrayBackedExtensionArrayT = TypeVar( + "NDArrayBackedExtensionArrayT", bound="NDArrayBackedExtensionArray" +) + +if TYPE_CHECKING: + + from pandas._typing import ( + NumpySorter, + NumpyValueArrayLike, + ) + + +def ravel_compat(meth: F) -> F: + """ + Decorator to ravel a 2D array before passing it to a cython operation, + then reshape the result to our own shape. + """ + + @wraps(meth) + def method(self, *args, **kwargs): + if self.ndim == 1: + return meth(self, *args, **kwargs) + + flags = self._ndarray.flags + flat = self.ravel("K") + result = meth(flat, *args, **kwargs) + order = "F" if flags.f_contiguous else "C" + return result.reshape(self.shape, order=order) + + return cast(F, method) + + +class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray): + """ + ExtensionArray that is backed by a single NumPy ndarray. + """ + + _ndarray: np.ndarray + + def _box_func(self, x): + """ + Wrap numpy type in our dtype.type if necessary. + """ + return x + + def _validate_scalar(self, value): + # used by NDArrayBackedExtensionIndex.insert + raise AbstractMethodError(self) + + # ------------------------------------------------------------------------ + + def view(self, dtype: Dtype | None = None) -> ArrayLike: + # We handle datetime64, datetime64tz, timedelta64, and period + # dtypes here. Everything else we pass through to the underlying + # ndarray. + if dtype is None or dtype is self.dtype: + return self._from_backing_data(self._ndarray) + + if isinstance(dtype, type): + # we sometimes pass non-dtype objects, e.g np.ndarray; + # pass those through to the underlying ndarray + return self._ndarray.view(dtype) + + dtype = pandas_dtype(dtype) + arr = self._ndarray + + if isinstance(dtype, (PeriodDtype, DatetimeTZDtype)): + cls = dtype.construct_array_type() + return cls(arr.view("i8"), dtype=dtype) + elif dtype == "M8[ns]": + from pandas.core.arrays import DatetimeArray + + return DatetimeArray(arr.view("i8"), dtype=dtype) + elif dtype == "m8[ns]": + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray(arr.view("i8"), dtype=dtype) + + # error: Argument "dtype" to "view" of "_ArrayOrScalarCommon" has incompatible + # type "Union[ExtensionDtype, dtype[Any]]"; expected "Union[dtype[Any], None, + # type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, + # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" + return arr.view(dtype=dtype) # type: ignore[arg-type] + + def take( + self: NDArrayBackedExtensionArrayT, + indices: TakeIndexer, + *, + allow_fill: bool = False, + fill_value: Any = None, + axis: int = 0, + ) -> NDArrayBackedExtensionArrayT: + if allow_fill: + fill_value = self._validate_scalar(fill_value) + + new_data = take( + self._ndarray, + indices, + allow_fill=allow_fill, + fill_value=fill_value, + axis=axis, + ) + return self._from_backing_data(new_data) + + # ------------------------------------------------------------------------ + + def equals(self, other) -> bool: + if type(self) is not type(other): + return False + if not is_dtype_equal(self.dtype, other.dtype): + return False + return bool(array_equivalent(self._ndarray, other._ndarray)) + + def _values_for_argsort(self) -> np.ndarray: + return self._ndarray + + # Signature of "argmin" incompatible with supertype "ExtensionArray" + def argmin(self, axis: int = 0, skipna: bool = True): # type:ignore[override] + # override base class by adding axis keyword + validate_bool_kwarg(skipna, "skipna") + if not skipna and self.isna().any(): + raise NotImplementedError + return nargminmax(self, "argmin", axis=axis) + + # Signature of "argmax" incompatible with supertype "ExtensionArray" + def argmax(self, axis: int = 0, skipna: bool = True): # type:ignore[override] + # override base class by adding axis keyword + validate_bool_kwarg(skipna, "skipna") + if not skipna and self.isna().any(): + raise NotImplementedError + return nargminmax(self, "argmax", axis=axis) + + def unique(self: NDArrayBackedExtensionArrayT) -> NDArrayBackedExtensionArrayT: + new_data = unique(self._ndarray) + return self._from_backing_data(new_data) + + @classmethod + @doc(ExtensionArray._concat_same_type) + def _concat_same_type( + cls: type[NDArrayBackedExtensionArrayT], + to_concat: Sequence[NDArrayBackedExtensionArrayT], + axis: int = 0, + ) -> NDArrayBackedExtensionArrayT: + dtypes = {str(x.dtype) for x in to_concat} + if len(dtypes) != 1: + raise ValueError("to_concat must have the same dtype (tz)", dtypes) + + new_values = [x._ndarray for x in to_concat] + new_values = np.concatenate(new_values, axis=axis) + # error: Argument 1 to "_from_backing_data" of "NDArrayBackedExtensionArray" has + # incompatible type "List[ndarray]"; expected "ndarray" + return to_concat[0]._from_backing_data(new_values) # type: ignore[arg-type] + + @doc(ExtensionArray.searchsorted) + def searchsorted( + self, + value: NumpyValueArrayLike | ExtensionArray, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> npt.NDArray[np.intp] | np.intp: + npvalue = self._validate_searchsorted_value(value) + return self._ndarray.searchsorted(npvalue, side=side, sorter=sorter) + + def _validate_searchsorted_value( + self, value: NumpyValueArrayLike | ExtensionArray + ) -> NumpyValueArrayLike: + if isinstance(value, ExtensionArray): + return value.to_numpy() + else: + return value + + @doc(ExtensionArray.shift) + def shift(self, periods=1, fill_value=None, axis=0): + + fill_value = self._validate_shift_value(fill_value) + new_values = shift(self._ndarray, periods, axis, fill_value) + + return self._from_backing_data(new_values) + + def _validate_shift_value(self, fill_value): + # TODO(2.0): after deprecation in datetimelikearraymixin is enforced, + # we can remove this and use validate_fill_value directly + return self._validate_scalar(fill_value) + + def __setitem__(self, key, value): + key = check_array_indexer(self, key) + value = self._validate_setitem_value(value) + self._ndarray[key] = value + + def _validate_setitem_value(self, value): + return value + + @overload + def __getitem__(self, key: ScalarIndexer) -> Any: + ... + + @overload + def __getitem__( + self: NDArrayBackedExtensionArrayT, + key: SequenceIndexer | PositionalIndexerTuple, + ) -> NDArrayBackedExtensionArrayT: + ... + + def __getitem__( + self: NDArrayBackedExtensionArrayT, + key: PositionalIndexer2D, + ) -> NDArrayBackedExtensionArrayT | Any: + if lib.is_integer(key): + # fast-path + result = self._ndarray[key] + if self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + # error: Incompatible types in assignment (expression has type "ExtensionArray", + # variable has type "Union[int, slice, ndarray]") + key = extract_array(key, extract_numpy=True) # type: ignore[assignment] + key = check_array_indexer(self, key) + result = self._ndarray[key] + if lib.is_scalar(result): + return self._box_func(result) + + result = self._from_backing_data(result) + return result + + def _fill_mask_inplace( + self, method: str, limit, mask: npt.NDArray[np.bool_] + ) -> None: + # (for now) when self.ndim == 2, we assume axis=0 + func = missing.get_fill_func(method, ndim=self.ndim) + func(self._ndarray.T, limit=limit, mask=mask.T) + return + + @doc(ExtensionArray.fillna) + def fillna( + self: NDArrayBackedExtensionArrayT, value=None, method=None, limit=None + ) -> NDArrayBackedExtensionArrayT: + value, method = validate_fillna_kwargs( + value, method, validate_scalar_dict_value=False + ) + + mask = self.isna() + # error: Argument 2 to "check_value_size" has incompatible type + # "ExtensionArray"; expected "ndarray" + value = missing.check_value_size( + value, mask, len(self) # type: ignore[arg-type] + ) + + if mask.any(): + if method is not None: + # TODO: check value is None + # (for now) when self.ndim == 2, we assume axis=0 + func = missing.get_fill_func(method, ndim=self.ndim) + new_values, _ = func(self._ndarray.T.copy(), limit=limit, mask=mask.T) + new_values = new_values.T + + # TODO: PandasArray didn't used to copy, need tests for this + new_values = self._from_backing_data(new_values) + else: + # fill with value + new_values = self.copy() + new_values[mask] = value + else: + # We validate the fill_value even if there is nothing to fill + if value is not None: + self._validate_setitem_value(value) + + new_values = self.copy() + return new_values + + # ------------------------------------------------------------------------ + # Reductions + + def _wrap_reduction_result(self, axis: int | None, result): + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + # ------------------------------------------------------------------------ + # __array_function__ methods + + def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: + """ + Analogue to np.putmask(self, mask, value) + + Parameters + ---------- + mask : np.ndarray[bool] + value : scalar or listlike + + Raises + ------ + TypeError + If value cannot be cast to self.dtype. + """ + value = self._validate_setitem_value(value) + + np.putmask(self._ndarray, mask, value) + + def _where( + self: NDArrayBackedExtensionArrayT, mask: np.ndarray, value + ) -> NDArrayBackedExtensionArrayT: + """ + Analogue to np.where(mask, self, value) + + Parameters + ---------- + mask : np.ndarray[bool] + value : scalar or listlike + + Raises + ------ + TypeError + If value cannot be cast to self.dtype. + """ + value = self._validate_setitem_value(value) + + res_values = np.where(mask, self._ndarray, value) + return self._from_backing_data(res_values) + + # ------------------------------------------------------------------------ + # Index compat methods + + def insert( + self: NDArrayBackedExtensionArrayT, loc: int, item + ) -> NDArrayBackedExtensionArrayT: + """ + Make new ExtensionArray inserting new item at location. Follows + Python list.append semantics for negative values. + + Parameters + ---------- + loc : int + item : object + + Returns + ------- + type(self) + """ + loc = validate_insert_loc(loc, len(self)) + + code = self._validate_scalar(item) + + new_vals = np.concatenate( + ( + self._ndarray[:loc], + np.asarray([code], dtype=self._ndarray.dtype), + self._ndarray[loc:], + ) + ) + return self._from_backing_data(new_vals) + + # ------------------------------------------------------------------------ + # Additional array methods + # These are not part of the EA API, but we implement them because + # pandas assumes they're there. + + def value_counts(self, dropna: bool = True): + """ + Return a Series containing counts of unique values. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of NA values. + + Returns + ------- + Series + """ + if self.ndim != 1: + raise NotImplementedError + + from pandas import ( + Index, + Series, + ) + + if dropna: + # error: Unsupported operand type for ~ ("ExtensionArray") + values = self[~self.isna()]._ndarray # type: ignore[operator] + else: + values = self._ndarray + + result = value_counts(values, sort=False, dropna=dropna) + + index_arr = self._from_backing_data(np.asarray(result.index._data)) + index = Index(index_arr, name=result.index.name) + return Series(result._values, index=index, name=result.name) + + def _quantile( + self: NDArrayBackedExtensionArrayT, + qs: npt.NDArray[np.float64], + interpolation: str, + ) -> NDArrayBackedExtensionArrayT: + # TODO: disable for Categorical if not ordered? + + # asarray needed for Sparse, see GH#24600 + mask = np.asarray(self.isna()) + mask = np.atleast_2d(mask) + + arr = np.atleast_2d(self._ndarray) + # TODO: something NDArrayBacked-specific instead of _values_for_factorize[1]? + fill_value = self._values_for_factorize()[1] + + res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation) + + result = type(self)._from_factorized(res_values, self) + if self.ndim == 1: + assert result.shape == (1, len(qs)), result.shape + result = result[0] + + return result + + # ------------------------------------------------------------------------ + # numpy-like methods + + @classmethod + def _empty( + cls: type_t[NDArrayBackedExtensionArrayT], shape: Shape, dtype: ExtensionDtype + ) -> NDArrayBackedExtensionArrayT: + """ + Analogous to np.empty(shape, dtype=dtype) + + Parameters + ---------- + shape : tuple[int] + dtype : ExtensionDtype + """ + # The base implementation uses a naive approach to find the dtype + # for the backing ndarray + arr = cls._from_sequence([], dtype=dtype) + backing = np.empty(shape, dtype=arr._ndarray.dtype) + return arr._from_backing_data(backing) diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/_ranges.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/_ranges.py new file mode 100644 index 0000000..3909875 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/_ranges.py @@ -0,0 +1,193 @@ +""" +Helper functions to generate range-like data for DatetimeArray +(and possibly TimedeltaArray/PeriodArray) +""" +from __future__ import annotations + +import numpy as np + +from pandas._libs.lib import i8max +from pandas._libs.tslibs import ( + BaseOffset, + OutOfBoundsDatetime, + Timedelta, + Timestamp, + iNaT, +) + + +def generate_regular_range( + start: Timestamp | Timedelta, + end: Timestamp | Timedelta, + periods: int, + freq: BaseOffset, +): + """ + Generate a range of dates or timestamps with the spans between dates + described by the given `freq` DateOffset. + + Parameters + ---------- + start : Timedelta, Timestamp or None + First point of produced date range. + end : Timedelta, Timestamp or None + Last point of produced date range. + periods : int + Number of periods in produced date range. + freq : Tick + Describes space between dates in produced date range. + + Returns + ------- + ndarray[np.int64] Representing nanoseconds. + """ + istart = start.value if start is not None else None + iend = end.value if end is not None else None + stride = freq.nanos + + if periods is None: + b = istart + # cannot just use e = Timestamp(end) + 1 because arange breaks when + # stride is too large, see GH10887 + e = b + (iend - b) // stride * stride + stride // 2 + 1 + elif istart is not None: + b = istart + e = _generate_range_overflow_safe(b, periods, stride, side="start") + elif iend is not None: + e = iend + stride + b = _generate_range_overflow_safe(e, periods, stride, side="end") + else: + raise ValueError( + "at least 'start' or 'end' should be specified if a 'period' is given." + ) + + with np.errstate(over="raise"): + # If the range is sufficiently large, np.arange may overflow + # and incorrectly return an empty array if not caught. + try: + values = np.arange(b, e, stride, dtype=np.int64) + except FloatingPointError: + xdr = [b] + while xdr[-1] != e: + xdr.append(xdr[-1] + stride) + values = np.array(xdr[:-1], dtype=np.int64) + return values + + +def _generate_range_overflow_safe( + endpoint: int, periods: int, stride: int, side: str = "start" +) -> int: + """ + Calculate the second endpoint for passing to np.arange, checking + to avoid an integer overflow. Catch OverflowError and re-raise + as OutOfBoundsDatetime. + + Parameters + ---------- + endpoint : int + nanosecond timestamp of the known endpoint of the desired range + periods : int + number of periods in the desired range + stride : int + nanoseconds between periods in the desired range + side : {'start', 'end'} + which end of the range `endpoint` refers to + + Returns + ------- + other_end : int + + Raises + ------ + OutOfBoundsDatetime + """ + # GH#14187 raise instead of incorrectly wrapping around + assert side in ["start", "end"] + + i64max = np.uint64(i8max) + msg = f"Cannot generate range with {side}={endpoint} and periods={periods}" + + with np.errstate(over="raise"): + # if periods * strides cannot be multiplied within the *uint64* bounds, + # we cannot salvage the operation by recursing, so raise + try: + addend = np.uint64(periods) * np.uint64(np.abs(stride)) + except FloatingPointError as err: + raise OutOfBoundsDatetime(msg) from err + + if np.abs(addend) <= i64max: + # relatively easy case without casting concerns + return _generate_range_overflow_safe_signed(endpoint, periods, stride, side) + + elif (endpoint > 0 and side == "start" and stride > 0) or ( + endpoint < 0 and side == "end" and stride > 0 + ): + # no chance of not-overflowing + raise OutOfBoundsDatetime(msg) + + elif side == "end" and endpoint > i64max and endpoint - stride <= i64max: + # in _generate_regular_range we added `stride` thereby overflowing + # the bounds. Adjust to fix this. + return _generate_range_overflow_safe( + endpoint - stride, periods - 1, stride, side + ) + + # split into smaller pieces + mid_periods = periods // 2 + remaining = periods - mid_periods + assert 0 < remaining < periods, (remaining, periods, endpoint, stride) + + midpoint = _generate_range_overflow_safe(endpoint, mid_periods, stride, side) + return _generate_range_overflow_safe(midpoint, remaining, stride, side) + + +def _generate_range_overflow_safe_signed( + endpoint: int, periods: int, stride: int, side: str +) -> int: + """ + A special case for _generate_range_overflow_safe where `periods * stride` + can be calculated without overflowing int64 bounds. + """ + assert side in ["start", "end"] + if side == "end": + stride *= -1 + + with np.errstate(over="raise"): + addend = np.int64(periods) * np.int64(stride) + try: + # easy case with no overflows + result = np.int64(endpoint) + addend + if result == iNaT: + # Putting this into a DatetimeArray/TimedeltaArray + # would incorrectly be interpreted as NaT + raise OverflowError + # error: Incompatible return value type (got "signedinteger[_64Bit]", + # expected "int") + return result # type: ignore[return-value] + except (FloatingPointError, OverflowError): + # with endpoint negative and addend positive we risk + # FloatingPointError; with reversed signed we risk OverflowError + pass + + # if stride and endpoint had opposite signs, then endpoint + addend + # should never overflow. so they must have the same signs + assert (stride > 0 and endpoint >= 0) or (stride < 0 and endpoint <= 0) + + if stride > 0: + # watch out for very special case in which we just slightly + # exceed implementation bounds, but when passing the result to + # np.arange will get a result slightly within the bounds + + # error: Incompatible types in assignment (expression has type + # "unsignedinteger[_64Bit]", variable has type "signedinteger[_64Bit]") + result = np.uint64(endpoint) + np.uint64(addend) # type: ignore[assignment] + i64max = np.uint64(i8max) + assert result > i64max + if result <= i64max + np.uint64(stride): + # error: Incompatible return value type (got "unsignedinteger", expected + # "int") + return result # type: ignore[return-value] + + raise OutOfBoundsDatetime( + f"Cannot generate range with {side}={endpoint} and periods={periods}" + ) diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/base.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/base.py new file mode 100644 index 0000000..d955d8e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/base.py @@ -0,0 +1,1808 @@ +""" +An interface for extending pandas with custom arrays. + +.. warning:: + + This is an experimental API and subject to breaking changes + without warning. +""" +from __future__ import annotations + +import operator +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Iterator, + Literal, + Sequence, + TypeVar, + cast, + overload, +) + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ( + ArrayLike, + AstypeArg, + Dtype, + FillnaOptions, + PositionalIndexer, + ScalarIndexer, + SequenceIndexer, + Shape, + TakeIndexer, + npt, +) +from pandas.compat import set_function_name +from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError +from pandas.util._decorators import ( + Appender, + Substitution, + cache_readonly, +) +from pandas.util._validators import ( + validate_bool_kwarg, + validate_fillna_kwargs, + validate_insert_loc, +) + +from pandas.core.dtypes.cast import maybe_cast_to_extension_array +from pandas.core.dtypes.common import ( + is_dtype_equal, + is_list_like, + is_scalar, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import isna + +from pandas.core import ( + arraylike, + missing, + roperator, +) +from pandas.core.algorithms import ( + factorize_array, + isin, + mode, + rank, + unique, +) +from pandas.core.array_algos.quantile import quantile_with_mask +from pandas.core.sorting import ( + nargminmax, + nargsort, +) + +if TYPE_CHECKING: + + class ExtensionArraySupportsAnyAll("ExtensionArray"): + def any(self, *, skipna: bool = True) -> bool: + pass + + def all(self, *, skipna: bool = True) -> bool: + pass + + from pandas._typing import ( + NumpySorter, + NumpyValueArrayLike, + ) + + +_extension_array_shared_docs: dict[str, str] = {} + +ExtensionArrayT = TypeVar("ExtensionArrayT", bound="ExtensionArray") + + +class ExtensionArray: + """ + Abstract base class for custom 1-D array types. + + pandas will recognize instances of this class as proper arrays + with a custom type and will not attempt to coerce them to objects. They + may be stored directly inside a :class:`DataFrame` or :class:`Series`. + + Attributes + ---------- + dtype + nbytes + ndim + shape + + Methods + ------- + argsort + astype + copy + dropna + factorize + fillna + equals + insert + isin + isna + ravel + repeat + searchsorted + shift + take + tolist + unique + view + _concat_same_type + _formatter + _from_factorized + _from_sequence + _from_sequence_of_strings + _reduce + _values_for_argsort + _values_for_factorize + + Notes + ----- + The interface includes the following abstract methods that must be + implemented by subclasses: + + * _from_sequence + * _from_factorized + * __getitem__ + * __len__ + * __eq__ + * dtype + * nbytes + * isna + * take + * copy + * _concat_same_type + + A default repr displaying the type, (truncated) data, length, + and dtype is provided. It can be customized or replaced by + by overriding: + + * __repr__ : A default repr for the ExtensionArray. + * _formatter : Print scalars inside a Series or DataFrame. + + Some methods require casting the ExtensionArray to an ndarray of Python + objects with ``self.astype(object)``, which may be expensive. When + performance is a concern, we highly recommend overriding the following + methods: + + * fillna + * dropna + * unique + * factorize / _values_for_factorize + * argsort / _values_for_argsort + * searchsorted + + The remaining methods implemented on this class should be performant, + as they only compose abstract methods. Still, a more efficient + implementation may be available, and these methods can be overridden. + + One can implement methods to handle array reductions. + + * _reduce + + One can implement methods to handle parsing from strings that will be used + in methods such as ``pandas.io.parsers.read_csv``. + + * _from_sequence_of_strings + + This class does not inherit from 'abc.ABCMeta' for performance reasons. + Methods and properties required by the interface raise + ``pandas.errors.AbstractMethodError`` and no ``register`` method is + provided for registering virtual subclasses. + + ExtensionArrays are limited to 1 dimension. + + They may be backed by none, one, or many NumPy arrays. For example, + ``pandas.Categorical`` is an extension array backed by two arrays, + one for codes and one for categories. An array of IPv6 address may + be backed by a NumPy structured array with two fields, one for the + lower 64 bits and one for the upper 64 bits. Or they may be backed + by some other storage type, like Python lists. Pandas makes no + assumptions on how the data are stored, just that it can be converted + to a NumPy array. + The ExtensionArray interface does not impose any rules on how this data + is stored. However, currently, the backing data cannot be stored in + attributes called ``.values`` or ``._values`` to ensure full compatibility + with pandas internals. But other names as ``.data``, ``._data``, + ``._items``, ... can be freely used. + + If implementing NumPy's ``__array_ufunc__`` interface, pandas expects + that + + 1. You defer by returning ``NotImplemented`` when any Series are present + in `inputs`. Pandas will extract the arrays and call the ufunc again. + 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class. + Pandas inspect this to determine whether the ufunc is valid for the + types present. + + See :ref:`extending.extension.ufunc` for more. + + By default, ExtensionArrays are not hashable. Immutable subclasses may + override this behavior. + """ + + # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. + # Don't override this. + _typ = "extension" + + # ------------------------------------------------------------------------ + # Constructors + # ------------------------------------------------------------------------ + + @classmethod + def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): + """ + Construct a new ExtensionArray from a sequence of scalars. + + Parameters + ---------- + scalars : Sequence + Each element will be an instance of the scalar type for this + array, ``cls.dtype.type`` or be converted into this type in this method. + dtype : dtype, optional + Construct for this particular dtype. This should be a Dtype + compatible with the ExtensionArray. + copy : bool, default False + If True, copy the underlying data. + + Returns + ------- + ExtensionArray + """ + raise AbstractMethodError(cls) + + @classmethod + def _from_sequence_of_strings( + cls, strings, *, dtype: Dtype | None = None, copy=False + ): + """ + Construct a new ExtensionArray from a sequence of strings. + + Parameters + ---------- + strings : Sequence + Each element will be an instance of the scalar type for this + array, ``cls.dtype.type``. + dtype : dtype, optional + Construct for this particular dtype. This should be a Dtype + compatible with the ExtensionArray. + copy : bool, default False + If True, copy the underlying data. + + Returns + ------- + ExtensionArray + """ + raise AbstractMethodError(cls) + + @classmethod + def _from_factorized(cls, values, original): + """ + Reconstruct an ExtensionArray after factorization. + + Parameters + ---------- + values : ndarray + An integer ndarray with the factorized values. + original : ExtensionArray + The original ExtensionArray that factorize was called on. + + See Also + -------- + factorize : Top-level factorize method that dispatches here. + ExtensionArray.factorize : Encode the extension array as an enumerated type. + """ + raise AbstractMethodError(cls) + + # ------------------------------------------------------------------------ + # Must be a Sequence + # ------------------------------------------------------------------------ + @overload + def __getitem__(self, item: ScalarIndexer) -> Any: + ... + + @overload + def __getitem__(self: ExtensionArrayT, item: SequenceIndexer) -> ExtensionArrayT: + ... + + def __getitem__( + self: ExtensionArrayT, item: PositionalIndexer + ) -> ExtensionArrayT | Any: + """ + Select a subset of self. + + Parameters + ---------- + item : int, slice, or ndarray + * int: The position in 'self' to get. + + * slice: A slice object, where 'start', 'stop', and 'step' are + integers or None + + * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' + + * list[int]: A list of int + + Returns + ------- + item : scalar or ExtensionArray + + Notes + ----- + For scalar ``item``, return a scalar value suitable for the array's + type. This should be an instance of ``self.dtype.type``. + + For slice ``key``, return an instance of ``ExtensionArray``, even + if the slice is length 0 or 1. + + For a boolean mask, return an instance of ``ExtensionArray``, filtered + to the values where ``item`` is True. + """ + raise AbstractMethodError(self) + + def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: + """ + Set one or more values inplace. + + This method is not required to satisfy the pandas extension array + interface. + + Parameters + ---------- + key : int, ndarray, or slice + When called from, e.g. ``Series.__setitem__``, ``key`` will be + one of + + * scalar int + * ndarray of integers. + * boolean ndarray + * slice object + + value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object + value or values to be set of ``key``. + + Returns + ------- + None + """ + # Some notes to the ExtensionArray implementor who may have ended up + # here. While this method is not required for the interface, if you + # *do* choose to implement __setitem__, then some semantics should be + # observed: + # + # * Setting multiple values : ExtensionArrays should support setting + # multiple values at once, 'key' will be a sequence of integers and + # 'value' will be a same-length sequence. + # + # * Broadcasting : For a sequence 'key' and a scalar 'value', + # each position in 'key' should be set to 'value'. + # + # * Coercion : Most users will expect basic coercion to work. For + # example, a string like '2018-01-01' is coerced to a datetime + # when setting on a datetime64ns array. In general, if the + # __init__ method coerces that value, then so should __setitem__ + # Note, also, that Series/DataFrame.where internally use __setitem__ + # on a copy of the data. + raise NotImplementedError(f"{type(self)} does not implement __setitem__.") + + def __len__(self) -> int: + """ + Length of this array + + Returns + ------- + length : int + """ + raise AbstractMethodError(self) + + def __iter__(self) -> Iterator[Any]: + """ + Iterate over elements of the array. + """ + # This needs to be implemented so that pandas recognizes extension + # arrays as list-like. The default implementation makes successive + # calls to ``__getitem__``, which may be slower than necessary. + for i in range(len(self)): + yield self[i] + + def __contains__(self, item: object) -> bool | np.bool_: + """ + Return for `item in self`. + """ + # GH37867 + # comparisons of any item to pd.NA always return pd.NA, so e.g. "a" in [pd.NA] + # would raise a TypeError. The implementation below works around that. + if is_scalar(item) and isna(item): + if not self._can_hold_na: + return False + elif item is self.dtype.na_value or isinstance(item, self.dtype.type): + return self._hasna + else: + return False + else: + # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no + # attribute "any" + return (item == self).any() # type: ignore[union-attr] + + # error: Signature of "__eq__" incompatible with supertype "object" + def __eq__(self, other: Any) -> ArrayLike: # type: ignore[override] + """ + Return for `self == other` (element-wise equality). + """ + # Implementer note: this should return a boolean numpy ndarray or + # a boolean ExtensionArray. + # When `other` is one of Series, Index, or DataFrame, this method should + # return NotImplemented (to ensure that those objects are responsible for + # first unpacking the arrays, and then dispatch the operation to the + # underlying arrays) + raise AbstractMethodError(self) + + # error: Signature of "__ne__" incompatible with supertype "object" + def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override] + """ + Return for `self != other` (element-wise in-equality). + """ + return ~(self == other) + + def to_numpy( + self, + dtype: npt.DTypeLike | None = None, + copy: bool = False, + na_value=lib.no_default, + ) -> np.ndarray: + """ + Convert to a NumPy ndarray. + + .. versionadded:: 1.0.0 + + This is similar to :meth:`numpy.asarray`, but may provide additional control + over how the conversion is done. + + Parameters + ---------- + dtype : str or numpy.dtype, optional + The dtype to pass to :meth:`numpy.asarray`. + copy : bool, default False + Whether to ensure that the returned value is a not a view on + another array. Note that ``copy=False`` does not *ensure* that + ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that + a copy is made, even if not strictly necessary. + na_value : Any, optional + The value to use for missing values. The default value depends + on `dtype` and the type of the array. + + Returns + ------- + numpy.ndarray + """ + result = np.asarray(self, dtype=dtype) + if copy or na_value is not lib.no_default: + result = result.copy() + if na_value is not lib.no_default: + result[self.isna()] = na_value + return result + + # ------------------------------------------------------------------------ + # Required attributes + # ------------------------------------------------------------------------ + + @property + def dtype(self) -> ExtensionDtype: + """ + An instance of 'ExtensionDtype'. + """ + raise AbstractMethodError(self) + + @property + def shape(self) -> Shape: + """ + Return a tuple of the array dimensions. + """ + return (len(self),) + + @property + def size(self) -> int: + """ + The number of elements in the array. + """ + return np.prod(self.shape) + + @property + def ndim(self) -> int: + """ + Extension Arrays are only allowed to be 1-dimensional. + """ + return 1 + + @property + def nbytes(self) -> int: + """ + The number of bytes needed to store this object in memory. + """ + # If this is expensive to compute, return an approximate lower bound + # on the number of bytes needed. + raise AbstractMethodError(self) + + # ------------------------------------------------------------------------ + # Additional Methods + # ------------------------------------------------------------------------ + + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + """ + Cast to a NumPy array or ExtensionArray with 'dtype'. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + copy : bool, default True + Whether to copy the data, even if not necessary. If False, + a copy is made only if the old dtype does not match the + new dtype. + + Returns + ------- + array : np.ndarray or ExtensionArray + An ExtensionArray if dtype is ExtensionDtype, + Otherwise a NumPy ndarray with 'dtype' for its dtype. + """ + + dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, self.dtype): + if not copy: + return self + else: + return self.copy() + + if isinstance(dtype, ExtensionDtype): + cls = dtype.construct_array_type() + return cls._from_sequence(self, dtype=dtype, copy=copy) + + return np.array(self, dtype=dtype, copy=copy) + + def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll: + """ + A 1-D array indicating if each value is missing. + + Returns + ------- + na_values : Union[np.ndarray, ExtensionArray] + In most cases, this should return a NumPy ndarray. For + exceptional cases like ``SparseArray``, where returning + an ndarray would be expensive, an ExtensionArray may be + returned. + + Notes + ----- + If returning an ExtensionArray, then + + * ``na_values._is_boolean`` should be True + * `na_values` should implement :func:`ExtensionArray._reduce` + * ``na_values.any`` and ``na_values.all`` should be implemented + """ + raise AbstractMethodError(self) + + @property + def _hasna(self) -> bool: + # GH#22680 + """ + Equivalent to `self.isna().any()`. + + Some ExtensionArray subclasses may be able to optimize this check. + """ + return bool(self.isna().any()) + + def _values_for_argsort(self) -> np.ndarray: + """ + Return values for sorting. + + Returns + ------- + ndarray + The transformed values should maintain the ordering between values + within the array. + + See Also + -------- + ExtensionArray.argsort : Return the indices that would sort this array. + """ + # Note: this is used in `ExtensionArray.argsort`. + return np.array(self) + + def argsort( + self, + ascending: bool = True, + kind: str = "quicksort", + na_position: str = "last", + *args, + **kwargs, + ) -> np.ndarray: + """ + Return the indices that would sort this array. + + Parameters + ---------- + ascending : bool, default True + Whether the indices should result in an ascending + or descending sort. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional + Sorting algorithm. + *args, **kwargs: + Passed through to :func:`numpy.argsort`. + + Returns + ------- + np.ndarray[np.intp] + Array of indices that sort ``self``. If NaN values are contained, + NaN values are placed at the end. + + See Also + -------- + numpy.argsort : Sorting implementation used internally. + """ + # Implementor note: You have two places to override the behavior of + # argsort. + # 1. _values_for_argsort : construct the values passed to np.argsort + # 2. argsort : total control over sorting. + ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) + + values = self._values_for_argsort() + return nargsort( + values, + kind=kind, + ascending=ascending, + na_position=na_position, + mask=np.asarray(self.isna()), + ) + + def argmin(self, skipna: bool = True) -> int: + """ + Return the index of minimum value. + + In case of multiple occurrences of the minimum value, the index + corresponding to the first occurrence is returned. + + Parameters + ---------- + skipna : bool, default True + + Returns + ------- + int + + See Also + -------- + ExtensionArray.argmax + """ + validate_bool_kwarg(skipna, "skipna") + if not skipna and self._hasna: + raise NotImplementedError + return nargminmax(self, "argmin") + + def argmax(self, skipna: bool = True) -> int: + """ + Return the index of maximum value. + + In case of multiple occurrences of the maximum value, the index + corresponding to the first occurrence is returned. + + Parameters + ---------- + skipna : bool, default True + + Returns + ------- + int + + See Also + -------- + ExtensionArray.argmin + """ + validate_bool_kwarg(skipna, "skipna") + if not skipna and self._hasna: + raise NotImplementedError + return nargminmax(self, "argmax") + + def fillna( + self, + value: object | ArrayLike | None = None, + method: FillnaOptions | None = None, + limit: int | None = None, + ): + """ + Fill NA/NaN values using the specified method. + + Parameters + ---------- + value : scalar, array-like + If a scalar value is passed it is used to fill all missing values. + Alternatively, an array-like 'value' can be given. It's expected + that the array-like have the same length as 'self'. + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap. + limit : int, default None + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. + + Returns + ------- + ExtensionArray + With NA/NaN filled. + """ + value, method = validate_fillna_kwargs(value, method) + + mask = self.isna() + # error: Argument 2 to "check_value_size" has incompatible type + # "ExtensionArray"; expected "ndarray" + value = missing.check_value_size( + value, mask, len(self) # type: ignore[arg-type] + ) + + if mask.any(): + if method is not None: + func = missing.get_fill_func(method) + new_values, _ = func(self.astype(object), limit=limit, mask=mask) + new_values = self._from_sequence(new_values, dtype=self.dtype) + else: + # fill with value + new_values = self.copy() + new_values[mask] = value + else: + new_values = self.copy() + return new_values + + def dropna(self: ExtensionArrayT) -> ExtensionArrayT: + """ + Return ExtensionArray without NA values. + + Returns + ------- + valid : ExtensionArray + """ + # error: Unsupported operand type for ~ ("ExtensionArray") + return self[~self.isna()] # type: ignore[operator] + + def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray: + """ + Shift values by desired number. + + Newly introduced missing values are filled with + ``self.dtype.na_value``. + + Parameters + ---------- + periods : int, default 1 + The number of periods to shift. Negative values are allowed + for shifting backwards. + + fill_value : object, optional + The scalar value to use for newly introduced missing values. + The default is ``self.dtype.na_value``. + + Returns + ------- + ExtensionArray + Shifted. + + Notes + ----- + If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is + returned. + + If ``periods > len(self)``, then an array of size + len(self) is returned, with all values filled with + ``self.dtype.na_value``. + """ + # Note: this implementation assumes that `self.dtype.na_value` can be + # stored in an instance of your ExtensionArray with `self.dtype`. + if not len(self) or periods == 0: + return self.copy() + + if isna(fill_value): + fill_value = self.dtype.na_value + + empty = self._from_sequence( + [fill_value] * min(abs(periods), len(self)), dtype=self.dtype + ) + if periods > 0: + a = empty + b = self[:-periods] + else: + a = self[abs(periods) :] + b = empty + return self._concat_same_type([a, b]) + + def unique(self: ExtensionArrayT) -> ExtensionArrayT: + """ + Compute the ExtensionArray of unique values. + + Returns + ------- + uniques : ExtensionArray + """ + uniques = unique(self.astype(object)) + return self._from_sequence(uniques, dtype=self.dtype) + + def searchsorted( + self, + value: NumpyValueArrayLike | ExtensionArray, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> npt.NDArray[np.intp] | np.intp: + """ + Find indices where elements should be inserted to maintain order. + + Find the indices into a sorted array `self` (a) such that, if the + corresponding elements in `value` were inserted before the indices, + the order of `self` would be preserved. + + Assuming that `self` is sorted: + + ====== ================================ + `side` returned index `i` satisfies + ====== ================================ + left ``self[i-1] < value <= self[i]`` + right ``self[i-1] <= value < self[i]`` + ====== ================================ + + Parameters + ---------- + value : array-like, list or scalar + Value(s) to insert into `self`. + side : {'left', 'right'}, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable + index, return either 0 or N (where N is the length of `self`). + sorter : 1-D array-like, optional + Optional array of integer indices that sort array a into ascending + order. They are typically the result of argsort. + + Returns + ------- + array of ints or int + If value is array-like, array of insertion points. + If value is scalar, a single integer. + + See Also + -------- + numpy.searchsorted : Similar method from NumPy. + """ + # Note: the base tests provided by pandas only test the basics. + # We do not test + # 1. Values outside the range of the `data_for_sorting` fixture + # 2. Values between the values in the `data_for_sorting` fixture + # 3. Missing values. + arr = self.astype(object) + if isinstance(value, ExtensionArray): + value = value.astype(object) + return arr.searchsorted(value, side=side, sorter=sorter) + + def equals(self, other: object) -> bool: + """ + Return if another array is equivalent to this array. + + Equivalent means that both arrays have the same shape and dtype, and + all values compare equal. Missing values in the same location are + considered equal (in contrast with normal equality). + + Parameters + ---------- + other : ExtensionArray + Array to compare to this Array. + + Returns + ------- + boolean + Whether the arrays are equivalent. + """ + if type(self) != type(other): + return False + other = cast(ExtensionArray, other) + if not is_dtype_equal(self.dtype, other.dtype): + return False + elif len(self) != len(other): + return False + else: + equal_values = self == other + if isinstance(equal_values, ExtensionArray): + # boolean array with NA -> fill with False + equal_values = equal_values.fillna(False) + # error: Unsupported left operand type for & ("ExtensionArray") + equal_na = self.isna() & other.isna() # type: ignore[operator] + return bool((equal_values | equal_na).all()) + + def isin(self, values) -> np.ndarray: + """ + Pointwise comparison for set containment in the given values. + + Roughly equivalent to `np.array([x in values for x in self])` + + Parameters + ---------- + values : Sequence + + Returns + ------- + np.ndarray[bool] + """ + return isin(np.asarray(self), values) + + def _values_for_factorize(self) -> tuple[np.ndarray, Any]: + """ + Return an array and missing value suitable for factorization. + + Returns + ------- + values : ndarray + + An array suitable for factorization. This should maintain order + and be a supported dtype (Float64, Int64, UInt64, String, Object). + By default, the extension array is cast to object dtype. + na_value : object + The value in `values` to consider missing. This will be treated + as NA in the factorization routines, so it will be coded as + `na_sentinel` and not included in `uniques`. By default, + ``np.nan`` is used. + + Notes + ----- + The values returned by this method are also used in + :func:`pandas.util.hash_pandas_object`. + """ + return self.astype(object), np.nan + + def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]: + """ + Encode the extension array as an enumerated type. + + Parameters + ---------- + na_sentinel : int, default -1 + Value to use in the `codes` array to indicate missing values. + + Returns + ------- + codes : ndarray + An integer NumPy array that's an indexer into the original + ExtensionArray. + uniques : ExtensionArray + An ExtensionArray containing the unique values of `self`. + + .. note:: + + uniques will *not* contain an entry for the NA value of + the ExtensionArray if there are any missing values present + in `self`. + + See Also + -------- + factorize : Top-level factorize method that dispatches here. + + Notes + ----- + :meth:`pandas.factorize` offers a `sort` keyword as well. + """ + # Implementer note: There are two ways to override the behavior of + # pandas.factorize + # 1. _values_for_factorize and _from_factorize. + # Specify the values passed to pandas' internal factorization + # routines, and how to convert from those values back to the + # original ExtensionArray. + # 2. ExtensionArray.factorize. + # Complete control over factorization. + arr, na_value = self._values_for_factorize() + + codes, uniques = factorize_array( + arr, na_sentinel=na_sentinel, na_value=na_value + ) + + uniques_ea = self._from_factorized(uniques, self) + return codes, uniques_ea + + _extension_array_shared_docs[ + "repeat" + ] = """ + Repeat elements of a %(klass)s. + + Returns a new %(klass)s where each element of the current %(klass)s + is repeated consecutively a given number of times. + + Parameters + ---------- + repeats : int or array of ints + The number of repetitions for each element. This should be a + non-negative integer. Repeating 0 times will return an empty + %(klass)s. + axis : None + Must be ``None``. Has no effect but is accepted for compatibility + with numpy. + + Returns + ------- + repeated_array : %(klass)s + Newly created %(klass)s with repeated elements. + + See Also + -------- + Series.repeat : Equivalent function for Series. + Index.repeat : Equivalent function for Index. + numpy.repeat : Similar method for :class:`numpy.ndarray`. + ExtensionArray.take : Take arbitrary positions. + + Examples + -------- + >>> cat = pd.Categorical(['a', 'b', 'c']) + >>> cat + ['a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] + >>> cat.repeat(2) + ['a', 'a', 'b', 'b', 'c', 'c'] + Categories (3, object): ['a', 'b', 'c'] + >>> cat.repeat([1, 2, 3]) + ['a', 'b', 'b', 'c', 'c', 'c'] + Categories (3, object): ['a', 'b', 'c'] + """ + + @Substitution(klass="ExtensionArray") + @Appender(_extension_array_shared_docs["repeat"]) + def repeat(self, repeats: int | Sequence[int], axis: int | None = None): + nv.validate_repeat((), {"axis": axis}) + ind = np.arange(len(self)).repeat(repeats) + return self.take(ind) + + # ------------------------------------------------------------------------ + # Indexing methods + # ------------------------------------------------------------------------ + + def take( + self: ExtensionArrayT, + indices: TakeIndexer, + *, + allow_fill: bool = False, + fill_value: Any = None, + ) -> ExtensionArrayT: + """ + Take elements from an array. + + Parameters + ---------- + indices : sequence of int or one-dimensional np.ndarray of int + Indices to be taken. + allow_fill : bool, default False + How to handle negative values in `indices`. + + * False: negative values in `indices` indicate positional indices + from the right (the default). This is similar to + :func:`numpy.take`. + + * True: negative values in `indices` indicate + missing values. These values are set to `fill_value`. Any other + other negative values raise a ``ValueError``. + + fill_value : any, optional + Fill value to use for NA-indices when `allow_fill` is True. + This may be ``None``, in which case the default NA value for + the type, ``self.dtype.na_value``, is used. + + For many ExtensionArrays, there will be two representations of + `fill_value`: a user-facing "boxed" scalar, and a low-level + physical NA value. `fill_value` should be the user-facing version, + and the implementation should handle translating that to the + physical version for processing the take if necessary. + + Returns + ------- + ExtensionArray + + Raises + ------ + IndexError + When the indices are out of bounds for the array. + ValueError + When `indices` contains negative values other than ``-1`` + and `allow_fill` is True. + + See Also + -------- + numpy.take : Take elements from an array along an axis. + api.extensions.take : Take elements from an array. + + Notes + ----- + ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``, + ``iloc``, when `indices` is a sequence of values. Additionally, + it's called by :meth:`Series.reindex`, or any other method + that causes realignment, with a `fill_value`. + + Examples + -------- + Here's an example implementation, which relies on casting the + extension array to object dtype. This uses the helper method + :func:`pandas.api.extensions.take`. + + .. code-block:: python + + def take(self, indices, allow_fill=False, fill_value=None): + from pandas.core.algorithms import take + + # If the ExtensionArray is backed by an ndarray, then + # just pass that here instead of coercing to object. + data = self.astype(object) + + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + # fill value should always be translated from the scalar + # type for the array, to the physical storage type for + # the data, before passing to take. + + result = take(data, indices, fill_value=fill_value, + allow_fill=allow_fill) + return self._from_sequence(result, dtype=self.dtype) + """ + # Implementer note: The `fill_value` parameter should be a user-facing + # value, an instance of self.dtype.type. When passed `fill_value=None`, + # the default of `self.dtype.na_value` should be used. + # This may differ from the physical storage type your ExtensionArray + # uses. In this case, your implementation is responsible for casting + # the user-facing type to the storage type, before using + # pandas.api.extensions.take + raise AbstractMethodError(self) + + def copy(self: ExtensionArrayT) -> ExtensionArrayT: + """ + Return a copy of the array. + + Returns + ------- + ExtensionArray + """ + raise AbstractMethodError(self) + + def view(self, dtype: Dtype | None = None) -> ArrayLike: + """ + Return a view on the array. + + Parameters + ---------- + dtype : str, np.dtype, or ExtensionDtype, optional + Default None. + + Returns + ------- + ExtensionArray or np.ndarray + A view on the :class:`ExtensionArray`'s data. + """ + # NB: + # - This must return a *new* object referencing the same data, not self. + # - The only case that *must* be implemented is with dtype=None, + # giving a view with the same dtype as self. + if dtype is not None: + raise NotImplementedError(dtype) + return self[:] + + # ------------------------------------------------------------------------ + # Printing + # ------------------------------------------------------------------------ + + def __repr__(self) -> str: + if self.ndim > 1: + return self._repr_2d() + + from pandas.io.formats.printing import format_object_summary + + # the short repr has no trailing newline, while the truncated + # repr does. So we include a newline in our template, and strip + # any trailing newlines from format_object_summary + data = format_object_summary( + self, self._formatter(), indent_for_name=False + ).rstrip(", \n") + class_name = f"<{type(self).__name__}>\n" + return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" + + def _repr_2d(self) -> str: + from pandas.io.formats.printing import format_object_summary + + # the short repr has no trailing newline, while the truncated + # repr does. So we include a newline in our template, and strip + # any trailing newlines from format_object_summary + lines = [ + format_object_summary(x, self._formatter(), indent_for_name=False).rstrip( + ", \n" + ) + for x in self + ] + data = ",\n".join(lines) + class_name = f"<{type(self).__name__}>" + return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}" + + def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]: + """ + Formatting function for scalar values. + + This is used in the default '__repr__'. The returned formatting + function receives instances of your scalar type. + + Parameters + ---------- + boxed : bool, default False + An indicated for whether or not your array is being printed + within a Series, DataFrame, or Index (True), or just by + itself (False). This may be useful if you want scalar values + to appear differently within a Series versus on its own (e.g. + quoted or not). + + Returns + ------- + Callable[[Any], str] + A callable that gets instances of the scalar type and + returns a string. By default, :func:`repr` is used + when ``boxed=False`` and :func:`str` is used when + ``boxed=True``. + """ + if boxed: + return str + return repr + + # ------------------------------------------------------------------------ + # Reshaping + # ------------------------------------------------------------------------ + + def transpose(self, *axes: int) -> ExtensionArray: + """ + Return a transposed view on this array. + + Because ExtensionArrays are always 1D, this is a no-op. It is included + for compatibility with np.ndarray. + """ + return self[:] + + @property + def T(self) -> ExtensionArray: + return self.transpose() + + def ravel(self, order: Literal["C", "F", "A", "K"] | None = "C") -> ExtensionArray: + """ + Return a flattened view on this array. + + Parameters + ---------- + order : {None, 'C', 'F', 'A', 'K'}, default 'C' + + Returns + ------- + ExtensionArray + + Notes + ----- + - Because ExtensionArrays are 1D-only, this is a no-op. + - The "order" argument is ignored, is for compatibility with NumPy. + """ + return self + + @classmethod + def _concat_same_type( + cls: type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT] + ) -> ExtensionArrayT: + """ + Concatenate multiple array of this dtype. + + Parameters + ---------- + to_concat : sequence of this type + + Returns + ------- + ExtensionArray + """ + # Implementer note: this method will only be called with a sequence of + # ExtensionArrays of this class and with the same dtype as self. This + # should allow "easy" concatenation (no upcasting needed), and result + # in a new ExtensionArray of the same dtype. + # Note: this strict behaviour is only guaranteed starting with pandas 1.1 + raise AbstractMethodError(cls) + + # The _can_hold_na attribute is set to True so that pandas internals + # will use the ExtensionDtype.na_value as the NA value in operations + # such as take(), reindex(), shift(), etc. In addition, those results + # will then be of the ExtensionArray subclass rather than an array + # of objects + @cache_readonly + def _can_hold_na(self) -> bool: + return self.dtype._can_hold_na + + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + """ + Return a scalar result of performing the reduction operation. + + Parameters + ---------- + name : str + Name of the function, supported values are: + { any, all, min, max, sum, mean, median, prod, + std, var, sem, kurt, skew }. + skipna : bool, default True + If True, skip NaN values. + **kwargs + Additional keyword arguments passed to the reduction function. + Currently, `ddof` is the only supported kwarg. + + Returns + ------- + scalar + + Raises + ------ + TypeError : subclass does not define reductions + """ + meth = getattr(self, name, None) + if meth is None: + raise TypeError( + f"'{type(self).__name__}' with dtype {self.dtype} " + f"does not support reduction '{name}'" + ) + return meth(skipna=skipna, **kwargs) + + # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 + # Incompatible types in assignment (expression has type "None", base class + # "object" defined the type as "Callable[[object], int]") + __hash__: None # type: ignore[assignment] + + # ------------------------------------------------------------------------ + # Non-Optimized Default Methods + + def tolist(self) -> list: + """ + Return a list of the values. + + These are each a scalar type, which is a Python scalar + (for str, int, float) or a pandas scalar + (for Timestamp/Timedelta/Interval/Period) + + Returns + ------- + list + """ + if self.ndim > 1: + return [x.tolist() for x in self] + return list(self) + + def delete(self: ExtensionArrayT, loc: PositionalIndexer) -> ExtensionArrayT: + indexer = np.delete(np.arange(len(self)), loc) + return self.take(indexer) + + def insert(self: ExtensionArrayT, loc: int, item) -> ExtensionArrayT: + """ + Insert an item at the given position. + + Parameters + ---------- + loc : int + item : scalar-like + + Returns + ------- + same type as self + + Notes + ----- + This method should be both type and dtype-preserving. If the item + cannot be held in an array of this type/dtype, either ValueError or + TypeError should be raised. + + The default implementation relies on _from_sequence to raise on invalid + items. + """ + loc = validate_insert_loc(loc, len(self)) + + item_arr = type(self)._from_sequence([item], dtype=self.dtype) + + return type(self)._concat_same_type([self[:loc], item_arr, self[loc:]]) + + def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: + """ + Analogue to np.putmask(self, mask, value) + + Parameters + ---------- + mask : np.ndarray[bool] + value : scalar or listlike + If listlike, must be arraylike with same length as self. + + Returns + ------- + None + + Notes + ----- + Unlike np.putmask, we do not repeat listlike values with mismatched length. + 'value' should either be a scalar or an arraylike with the same length + as self. + """ + if is_list_like(value): + val = value[mask] + else: + val = value + + self[mask] = val + + def _where( + self: ExtensionArrayT, mask: npt.NDArray[np.bool_], value + ) -> ExtensionArrayT: + """ + Analogue to np.where(mask, self, value) + + Parameters + ---------- + mask : np.ndarray[bool] + value : scalar or listlike + + Returns + ------- + same type as self + """ + result = self.copy() + + if is_list_like(value): + val = value[~mask] + else: + val = value + + result[~mask] = val + return result + + def _fill_mask_inplace( + self, method: str, limit, mask: npt.NDArray[np.bool_] + ) -> None: + """ + Replace values in locations specified by 'mask' using pad or backfill. + + See also + -------- + ExtensionArray.fillna + """ + func = missing.get_fill_func(method) + # NB: if we don't copy mask here, it may be altered inplace, which + # would mess up the `self[mask] = ...` below. + new_values, _ = func(self.astype(object), limit=limit, mask=mask.copy()) + new_values = self._from_sequence(new_values, dtype=self.dtype) + self[mask] = new_values[mask] + return + + def _rank( + self, + *, + axis: int = 0, + method: str = "average", + na_option: str = "keep", + ascending: bool = True, + pct: bool = False, + ): + """ + See Series.rank.__doc__. + """ + if axis != 0: + raise NotImplementedError + + # TODO: we only have tests that get here with dt64 and td64 + # TODO: all tests that get here use the defaults for all the kwds + return rank( + self, + axis=axis, + method=method, + na_option=na_option, + ascending=ascending, + pct=pct, + ) + + @classmethod + def _empty(cls, shape: Shape, dtype: ExtensionDtype): + """ + Create an ExtensionArray with the given shape and dtype. + + See also + -------- + ExtensionDtype.empty + ExtensionDtype.empty is the 'official' public version of this API. + """ + # Implementer note: while ExtensionDtype.empty is the public way to + # call this method, it is still required to implement this `_empty` + # method as well (it is called internally in pandas) + obj = cls._from_sequence([], dtype=dtype) + + taker = np.broadcast_to(np.intp(-1), shape) + result = obj.take(taker, allow_fill=True) + if not isinstance(result, cls) or dtype != result.dtype: + raise NotImplementedError( + f"Default 'empty' implementation is invalid for dtype='{dtype}'" + ) + return result + + def _quantile( + self: ExtensionArrayT, qs: npt.NDArray[np.float64], interpolation: str + ) -> ExtensionArrayT: + """ + Compute the quantiles of self for each quantile in `qs`. + + Parameters + ---------- + qs : np.ndarray[float64] + interpolation: str + + Returns + ------- + same type as self + """ + # asarray needed for Sparse, see GH#24600 + mask = np.asarray(self.isna()) + mask = np.atleast_2d(mask) + + arr = np.atleast_2d(np.asarray(self)) + fill_value = np.nan + + res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation) + + if self.ndim == 2: + # i.e. DatetimeArray + result = type(self)._from_sequence(res_values) + + else: + # shape[0] should be 1 as long as EAs are 1D + assert res_values.shape == (1, len(qs)), res_values.shape + result = type(self)._from_sequence(res_values[0]) + + return result + + def _mode(self: ExtensionArrayT, dropna: bool = True) -> ExtensionArrayT: + """ + Returns the mode(s) of the ExtensionArray. + + Always returns `ExtensionArray` even if only one value. + + Parameters + ---------- + dropna : bool, default True + Don't consider counts of NA values. + + Returns + ------- + same type as self + Sorted, if possible. + """ + # error: Incompatible return value type (got "Union[ExtensionArray, + # ndarray[Any, Any]]", expected "ExtensionArrayT") + return mode(self, dropna=dropna) # type: ignore[return-value] + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + if any( + isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs + ): + return NotImplemented + + result = arraylike.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + if "out" in kwargs: + return arraylike.dispatch_ufunc_with_out( + self, ufunc, method, *inputs, **kwargs + ) + + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs) + + +class ExtensionOpsMixin: + """ + A base class for linking the operators to their dunder names. + + .. note:: + + You may want to set ``__array_priority__`` if you want your + implementation to be called when involved in binary operations + with NumPy arrays. + """ + + @classmethod + def _create_arithmetic_method(cls, op): + raise AbstractMethodError(cls) + + @classmethod + def _add_arithmetic_ops(cls): + setattr(cls, "__add__", cls._create_arithmetic_method(operator.add)) + setattr(cls, "__radd__", cls._create_arithmetic_method(roperator.radd)) + setattr(cls, "__sub__", cls._create_arithmetic_method(operator.sub)) + setattr(cls, "__rsub__", cls._create_arithmetic_method(roperator.rsub)) + setattr(cls, "__mul__", cls._create_arithmetic_method(operator.mul)) + setattr(cls, "__rmul__", cls._create_arithmetic_method(roperator.rmul)) + setattr(cls, "__pow__", cls._create_arithmetic_method(operator.pow)) + setattr(cls, "__rpow__", cls._create_arithmetic_method(roperator.rpow)) + setattr(cls, "__mod__", cls._create_arithmetic_method(operator.mod)) + setattr(cls, "__rmod__", cls._create_arithmetic_method(roperator.rmod)) + setattr(cls, "__floordiv__", cls._create_arithmetic_method(operator.floordiv)) + setattr( + cls, "__rfloordiv__", cls._create_arithmetic_method(roperator.rfloordiv) + ) + setattr(cls, "__truediv__", cls._create_arithmetic_method(operator.truediv)) + setattr(cls, "__rtruediv__", cls._create_arithmetic_method(roperator.rtruediv)) + setattr(cls, "__divmod__", cls._create_arithmetic_method(divmod)) + setattr(cls, "__rdivmod__", cls._create_arithmetic_method(roperator.rdivmod)) + + @classmethod + def _create_comparison_method(cls, op): + raise AbstractMethodError(cls) + + @classmethod + def _add_comparison_ops(cls): + setattr(cls, "__eq__", cls._create_comparison_method(operator.eq)) + setattr(cls, "__ne__", cls._create_comparison_method(operator.ne)) + setattr(cls, "__lt__", cls._create_comparison_method(operator.lt)) + setattr(cls, "__gt__", cls._create_comparison_method(operator.gt)) + setattr(cls, "__le__", cls._create_comparison_method(operator.le)) + setattr(cls, "__ge__", cls._create_comparison_method(operator.ge)) + + @classmethod + def _create_logical_method(cls, op): + raise AbstractMethodError(cls) + + @classmethod + def _add_logical_ops(cls): + setattr(cls, "__and__", cls._create_logical_method(operator.and_)) + setattr(cls, "__rand__", cls._create_logical_method(roperator.rand_)) + setattr(cls, "__or__", cls._create_logical_method(operator.or_)) + setattr(cls, "__ror__", cls._create_logical_method(roperator.ror_)) + setattr(cls, "__xor__", cls._create_logical_method(operator.xor)) + setattr(cls, "__rxor__", cls._create_logical_method(roperator.rxor)) + + +class ExtensionScalarOpsMixin(ExtensionOpsMixin): + """ + A mixin for defining ops on an ExtensionArray. + + It is assumed that the underlying scalar objects have the operators + already defined. + + Notes + ----- + If you have defined a subclass MyExtensionArray(ExtensionArray), then + use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to + get the arithmetic operators. After the definition of MyExtensionArray, + insert the lines + + MyExtensionArray._add_arithmetic_ops() + MyExtensionArray._add_comparison_ops() + + to link the operators to your class. + + .. note:: + + You may want to set ``__array_priority__`` if you want your + implementation to be called when involved in binary operations + with NumPy arrays. + """ + + @classmethod + def _create_method(cls, op, coerce_to_dtype=True, result_dtype=None): + """ + A class method that returns a method that will correspond to an + operator for an ExtensionArray subclass, by dispatching to the + relevant operator defined on the individual elements of the + ExtensionArray. + + Parameters + ---------- + op : function + An operator that takes arguments op(a, b) + coerce_to_dtype : bool, default True + boolean indicating whether to attempt to convert + the result to the underlying ExtensionArray dtype. + If it's not possible to create a new ExtensionArray with the + values, an ndarray is returned instead. + + Returns + ------- + Callable[[Any, Any], Union[ndarray, ExtensionArray]] + A method that can be bound to a class. When used, the method + receives the two arguments, one of which is the instance of + this class, and should return an ExtensionArray or an ndarray. + + Returning an ndarray may be necessary when the result of the + `op` cannot be stored in the ExtensionArray. The dtype of the + ndarray uses NumPy's normal inference rules. + + Examples + -------- + Given an ExtensionArray subclass called MyExtensionArray, use + + __add__ = cls._create_method(operator.add) + + in the class definition of MyExtensionArray to create the operator + for addition, that will be based on the operator implementation + of the underlying elements of the ExtensionArray + """ + + def _binop(self, other): + def convert_values(param): + if isinstance(param, ExtensionArray) or is_list_like(param): + ovalues = param + else: # Assume its an object + ovalues = [param] * len(self) + return ovalues + + if isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)): + # rely on pandas to unbox and dispatch to us + return NotImplemented + + lvalues = self + rvalues = convert_values(other) + + # If the operator is not defined for the underlying objects, + # a TypeError should be raised + res = [op(a, b) for (a, b) in zip(lvalues, rvalues)] + + def _maybe_convert(arr): + if coerce_to_dtype: + # https://github.com/pandas-dev/pandas/issues/22850 + # We catch all regular exceptions here, and fall back + # to an ndarray. + res = maybe_cast_to_extension_array(type(self), arr) + if not isinstance(res, type(self)): + # exception raised in _from_sequence; ensure we have ndarray + res = np.asarray(arr) + else: + res = np.asarray(arr, dtype=result_dtype) + return res + + if op.__name__ in {"divmod", "rdivmod"}: + a, b = zip(*res) + return _maybe_convert(a), _maybe_convert(b) + + return _maybe_convert(res) + + op_name = f"__{op.__name__}__" + return set_function_name(_binop, op_name, cls) + + @classmethod + def _create_arithmetic_method(cls, op): + return cls._create_method(op) + + @classmethod + def _create_comparison_method(cls, op): + return cls._create_method(op, coerce_to_dtype=False, result_dtype=bool) diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/boolean.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/boolean.py new file mode 100644 index 0000000..d501af6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/boolean.py @@ -0,0 +1,535 @@ +from __future__ import annotations + +import numbers +from typing import ( + TYPE_CHECKING, + overload, +) + +import numpy as np + +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._typing import ( + ArrayLike, + AstypeArg, + Dtype, + DtypeObj, + npt, + type_t, +) + +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_float_dtype, + is_integer_dtype, + is_list_like, + is_numeric_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + register_extension_dtype, +) +from pandas.core.dtypes.missing import isna + +from pandas.core import ops +from pandas.core.arrays import ExtensionArray +from pandas.core.arrays.masked import ( + BaseMaskedArray, + BaseMaskedDtype, +) + +if TYPE_CHECKING: + import pyarrow + + +@register_extension_dtype +class BooleanDtype(BaseMaskedDtype): + """ + Extension dtype for boolean data. + + .. versionadded:: 1.0.0 + + .. warning:: + + BooleanDtype is considered experimental. The implementation and + parts of the API may change without warning. + + Attributes + ---------- + None + + Methods + ------- + None + + Examples + -------- + >>> pd.BooleanDtype() + BooleanDtype + """ + + name = "boolean" + + # https://github.com/python/mypy/issues/4125 + # error: Signature of "type" incompatible with supertype "BaseMaskedDtype" + @property + def type(self) -> type: # type: ignore[override] + return np.bool_ + + @property + def kind(self) -> str: + return "b" + + @property + def numpy_dtype(self) -> np.dtype: + return np.dtype("bool") + + @classmethod + def construct_array_type(cls) -> type_t[BooleanArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return BooleanArray + + def __repr__(self) -> str: + return "BooleanDtype" + + @property + def _is_boolean(self) -> bool: + return True + + @property + def _is_numeric(self) -> bool: + return True + + def __from_arrow__( + self, array: pyarrow.Array | pyarrow.ChunkedArray + ) -> BooleanArray: + """ + Construct BooleanArray from pyarrow Array/ChunkedArray. + """ + import pyarrow + + if array.type != pyarrow.bool_(): + raise TypeError(f"Expected array of boolean type, got {array.type} instead") + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + # pyarrow.ChunkedArray + chunks = array.chunks + + results = [] + for arr in chunks: + buflist = arr.buffers() + data = pyarrow.BooleanArray.from_buffers( + arr.type, len(arr), [None, buflist[1]], offset=arr.offset + ).to_numpy(zero_copy_only=False) + if arr.null_count != 0: + mask = pyarrow.BooleanArray.from_buffers( + arr.type, len(arr), [None, buflist[0]], offset=arr.offset + ).to_numpy(zero_copy_only=False) + mask = ~mask + else: + mask = np.zeros(len(arr), dtype=bool) + + bool_arr = BooleanArray(data, mask) + results.append(bool_arr) + + if not results: + return BooleanArray( + np.array([], dtype=np.bool_), np.array([], dtype=np.bool_) + ) + else: + return BooleanArray._concat_same_type(results) + + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + # Handle only boolean + np.bool_ -> boolean, since other cases like + # Int64 + boolean -> Int64 will be handled by the other type + if all( + isinstance(t, BooleanDtype) + or (isinstance(t, np.dtype) and (np.issubdtype(t, np.bool_))) + for t in dtypes + ): + return BooleanDtype() + else: + return None + + +def coerce_to_array( + values, mask=None, copy: bool = False +) -> tuple[np.ndarray, np.ndarray]: + """ + Coerce the input values array to numpy arrays with a mask. + + Parameters + ---------- + values : 1D list-like + mask : bool 1D array, optional + copy : bool, default False + if True, copy the input + + Returns + ------- + tuple of (values, mask) + """ + if isinstance(values, BooleanArray): + if mask is not None: + raise ValueError("cannot pass mask for BooleanArray input") + values, mask = values._data, values._mask + if copy: + values = values.copy() + mask = mask.copy() + return values, mask + + mask_values = None + if isinstance(values, np.ndarray) and values.dtype == np.bool_: + if copy: + values = values.copy() + elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype): + mask_values = isna(values) + + values_bool = np.zeros(len(values), dtype=bool) + values_bool[~mask_values] = values[~mask_values].astype(bool) + + if not np.all( + values_bool[~mask_values].astype(values.dtype) == values[~mask_values] + ): + raise TypeError("Need to pass bool-like values") + + values = values_bool + else: + values_object = np.asarray(values, dtype=object) + + inferred_dtype = lib.infer_dtype(values_object, skipna=True) + integer_like = ("floating", "integer", "mixed-integer-float") + if inferred_dtype not in ("boolean", "empty") + integer_like: + raise TypeError("Need to pass bool-like values") + + mask_values = isna(values_object) + values = np.zeros(len(values), dtype=bool) + values[~mask_values] = values_object[~mask_values].astype(bool) + + # if the values were integer-like, validate it were actually 0/1's + if (inferred_dtype in integer_like) and not ( + np.all( + values[~mask_values].astype(float) + == values_object[~mask_values].astype(float) + ) + ): + raise TypeError("Need to pass bool-like values") + + if mask is None and mask_values is None: + mask = np.zeros(len(values), dtype=bool) + elif mask is None: + mask = mask_values + else: + if isinstance(mask, np.ndarray) and mask.dtype == np.bool_: + if mask_values is not None: + mask = mask | mask_values + else: + if copy: + mask = mask.copy() + else: + mask = np.array(mask, dtype=bool) + if mask_values is not None: + mask = mask | mask_values + + if values.shape != mask.shape: + raise ValueError("values.shape and mask.shape must match") + + return values, mask + + +class BooleanArray(BaseMaskedArray): + """ + Array of boolean (True/False) data with missing values. + + This is a pandas Extension array for boolean data, under the hood + represented by 2 numpy arrays: a boolean array with the data and + a boolean array with the mask (True indicating missing). + + BooleanArray implements Kleene logic (sometimes called three-value + logic) for logical operations. See :ref:`boolean.kleene` for more. + + To construct an BooleanArray from generic array-like input, use + :func:`pandas.array` specifying ``dtype="boolean"`` (see examples + below). + + .. versionadded:: 1.0.0 + + .. warning:: + + BooleanArray is considered experimental. The implementation and + parts of the API may change without warning. + + Parameters + ---------- + values : numpy.ndarray + A 1-d boolean-dtype array with the data. + mask : numpy.ndarray + A 1-d boolean-dtype array indicating missing values (True + indicates missing). + copy : bool, default False + Whether to copy the `values` and `mask` arrays. + + Attributes + ---------- + None + + Methods + ------- + None + + Returns + ------- + BooleanArray + + Examples + -------- + Create an BooleanArray with :func:`pandas.array`: + + >>> pd.array([True, False, None], dtype="boolean") + + [True, False, ] + Length: 3, dtype: boolean + """ + + # The value used to fill '_data' to avoid upcasting + _internal_fill_value = False + # Fill values used for any/all + _truthy_value = True + _falsey_value = False + _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"} + _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"} + + def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): + if not (isinstance(values, np.ndarray) and values.dtype == np.bool_): + raise TypeError( + "values should be boolean numpy array. Use " + "the 'pd.array' function instead" + ) + self._dtype = BooleanDtype() + super().__init__(values, mask, copy=copy) + + @property + def dtype(self) -> BooleanDtype: + return self._dtype + + @classmethod + def _from_sequence( + cls, scalars, *, dtype: Dtype | None = None, copy: bool = False + ) -> BooleanArray: + if dtype: + assert dtype == "boolean" + values, mask = coerce_to_array(scalars, copy=copy) + return BooleanArray(values, mask) + + @classmethod + def _from_sequence_of_strings( + cls, + strings: list[str], + *, + dtype: Dtype | None = None, + copy: bool = False, + true_values: list[str] | None = None, + false_values: list[str] | None = None, + ) -> BooleanArray: + true_values_union = cls._TRUE_VALUES.union(true_values or []) + false_values_union = cls._FALSE_VALUES.union(false_values or []) + + def map_string(s): + if isna(s): + return s + elif s in true_values_union: + return True + elif s in false_values_union: + return False + else: + raise ValueError(f"{s} cannot be cast to bool") + + scalars = [map_string(x) for x in strings] + return cls._from_sequence(scalars, dtype=dtype, copy=copy) + + _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_) + + def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]: + return coerce_to_array(value) + + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + + """ + Cast to a NumPy array or ExtensionArray with 'dtype'. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + copy : bool, default True + Whether to copy the data, even if not necessary. If False, + a copy is made only if the old dtype does not match the + new dtype. + + Returns + ------- + ndarray or ExtensionArray + NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype. + + Raises + ------ + TypeError + if incompatible type with an BooleanDtype, equivalent of same_kind + casting + """ + dtype = pandas_dtype(dtype) + + if isinstance(dtype, ExtensionDtype): + return super().astype(dtype, copy) + + if is_bool_dtype(dtype): + # astype_nansafe converts np.nan to True + if self._hasna: + raise ValueError("cannot convert float NaN to bool") + else: + return self._data.astype(dtype, copy=copy) + + # for integer, error if there are missing values + if is_integer_dtype(dtype) and self._hasna: + raise ValueError("cannot convert NA to integer") + + # for float dtype, ensure we use np.nan before casting (numpy cannot + # deal with pd.NA) + na_value = self._na_value + if is_float_dtype(dtype): + na_value = np.nan + # coerce + return self.to_numpy(dtype=dtype, na_value=na_value, copy=False) + + def _values_for_argsort(self) -> np.ndarray: + """ + Return values for sorting. + + Returns + ------- + ndarray + The transformed values should maintain the ordering between values + within the array. + + See Also + -------- + ExtensionArray.argsort : Return the indices that would sort this array. + """ + data = self._data.copy() + data[self._mask] = -1 + return data + + def _logical_method(self, other, op): + + assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"} + other_is_booleanarray = isinstance(other, BooleanArray) + other_is_scalar = lib.is_scalar(other) + mask = None + + if other_is_booleanarray: + other, mask = other._data, other._mask + elif is_list_like(other): + other = np.asarray(other, dtype="bool") + if other.ndim > 1: + raise NotImplementedError("can only perform ops with 1-d structures") + other, mask = coerce_to_array(other, copy=False) + elif isinstance(other, np.bool_): + other = other.item() + + if other_is_scalar and other is not libmissing.NA and not lib.is_bool(other): + raise TypeError( + "'other' should be pandas.NA or a bool. " + f"Got {type(other).__name__} instead." + ) + + if not other_is_scalar and len(self) != len(other): + raise ValueError("Lengths must match to compare") + + if op.__name__ in {"or_", "ror_"}: + result, mask = ops.kleene_or(self._data, other, self._mask, mask) + elif op.__name__ in {"and_", "rand_"}: + result, mask = ops.kleene_and(self._data, other, self._mask, mask) + elif op.__name__ in {"xor", "rxor"}: + result, mask = ops.kleene_xor(self._data, other, self._mask, mask) + + # error: Argument 2 to "BooleanArray" has incompatible type "Optional[Any]"; + # expected "ndarray" + return BooleanArray(result, mask) # type: ignore[arg-type] + + def _arith_method(self, other, op): + mask = None + op_name = op.__name__ + + if isinstance(other, BooleanArray): + other, mask = other._data, other._mask + + elif is_list_like(other): + other = np.asarray(other) + if other.ndim > 1: + raise NotImplementedError("can only perform ops with 1-d structures") + if len(self) != len(other): + raise ValueError("Lengths must match") + + # nans propagate + if mask is None: + mask = self._mask + if other is libmissing.NA: + mask |= True + else: + mask = self._mask | mask + + if other is libmissing.NA: + # if other is NA, the result will be all NA and we can't run the + # actual op, so we need to choose the resulting dtype manually + if op_name in {"floordiv", "rfloordiv", "mod", "rmod", "pow", "rpow"}: + dtype = "int8" + elif op_name in {"truediv", "rtruediv"}: + dtype = "float64" + else: + dtype = "bool" + result = np.zeros(len(self._data), dtype=dtype) + else: + if op_name in {"pow", "rpow"} and isinstance(other, np.bool_): + # Avoid DeprecationWarning: In future, it will be an error + # for 'np.bool_' scalars to be interpreted as an index + other = bool(other) + + with np.errstate(all="ignore"): + result = op(self._data, other) + + # divmod returns a tuple + if op_name == "divmod": + div, mod = result + return ( + self._maybe_mask_result(div, mask, other, "floordiv"), + self._maybe_mask_result(mod, mask, other, "mod"), + ) + + return self._maybe_mask_result(result, mask, other, op_name) + + def __abs__(self): + return self.copy() diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/categorical.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/categorical.py new file mode 100644 index 0000000..0202efd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/categorical.py @@ -0,0 +1,2882 @@ +from __future__ import annotations + +from csv import QUOTE_NONNUMERIC +from functools import partial +import operator +from shutil import get_terminal_size +from typing import ( + TYPE_CHECKING, + Hashable, + Sequence, + TypeVar, + Union, + cast, + overload, +) +from warnings import ( + catch_warnings, + simplefilter, + warn, +) + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import ( + NaT, + algos as libalgos, + hashtable as htable, + lib, +) +from pandas._libs.arrays import NDArrayBacked +from pandas._libs.lib import no_default +from pandas._typing import ( + ArrayLike, + AstypeArg, + Dtype, + NpDtype, + Ordered, + Shape, + npt, + type_t, +) +from pandas.compat.numpy import function as nv +from pandas.util._decorators import ( + cache_readonly, + deprecate_kwarg, +) +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.cast import ( + coerce_indexer_dtype, + maybe_cast_to_extension_array, +) +from pandas.core.dtypes.common import ( + ensure_int64, + ensure_platform_int, + is_categorical_dtype, + is_datetime64_dtype, + is_dict_like, + is_dtype_equal, + is_extension_array_dtype, + is_hashable, + is_integer_dtype, + is_list_like, + is_object_dtype, + is_scalar, + is_timedelta64_dtype, + needs_i8_conversion, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + ExtensionDtype, +) +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + notna, +) + +from pandas.core import ( + arraylike, + ops, +) +from pandas.core.accessor import ( + PandasDelegate, + delegate_names, +) +import pandas.core.algorithms as algorithms +from pandas.core.algorithms import ( + factorize, + take_nd, + unique1d, +) +from pandas.core.arrays._mixins import ( + NDArrayBackedExtensionArray, + ravel_compat, +) +from pandas.core.base import ( + ExtensionArray, + NoNewAttributesMixin, + PandasObject, +) +import pandas.core.common as com +from pandas.core.construction import ( + extract_array, + sanitize_array, +) +from pandas.core.ops.common import unpack_zerodim_and_defer +from pandas.core.sorting import nargsort +from pandas.core.strings.object_array import ObjectStringArrayMixin + +from pandas.io.formats import console + +if TYPE_CHECKING: + from pandas import Index + + +CategoricalT = TypeVar("CategoricalT", bound="Categorical") + + +def _cat_compare_op(op): + opname = f"__{op.__name__}__" + fill_value = True if op is operator.ne else False + + @unpack_zerodim_and_defer(opname) + def func(self, other): + hashable = is_hashable(other) + if is_list_like(other) and len(other) != len(self) and not hashable: + # in hashable case we may have a tuple that is itself a category + raise ValueError("Lengths must match.") + + if not self.ordered: + if opname in ["__lt__", "__gt__", "__le__", "__ge__"]: + raise TypeError( + "Unordered Categoricals can only compare equality or not" + ) + if isinstance(other, Categorical): + # Two Categoricals can only be compared if the categories are + # the same (maybe up to ordering, depending on ordered) + + msg = "Categoricals can only be compared if 'categories' are the same." + if not self._categories_match_up_to_permutation(other): + raise TypeError(msg) + + if not self.ordered and not self.categories.equals(other.categories): + # both unordered and different order + other_codes = recode_for_categories( + other.codes, other.categories, self.categories, copy=False + ) + else: + other_codes = other._codes + + ret = op(self._codes, other_codes) + mask = (self._codes == -1) | (other_codes == -1) + if mask.any(): + ret[mask] = fill_value + return ret + + if hashable: + if other in self.categories: + i = self._unbox_scalar(other) + ret = op(self._codes, i) + + if opname not in {"__eq__", "__ge__", "__gt__"}: + # GH#29820 performance trick; get_loc will always give i>=0, + # so in the cases (__ne__, __le__, __lt__) the setting + # here is a no-op, so can be skipped. + mask = self._codes == -1 + ret[mask] = fill_value + return ret + else: + return ops.invalid_comparison(self, other, op) + else: + # allow categorical vs object dtype array comparisons for equality + # these are only positional comparisons + if opname not in ["__eq__", "__ne__"]: + raise TypeError( + f"Cannot compare a Categorical for op {opname} with " + f"type {type(other)}.\nIf you want to compare values, " + "use 'np.asarray(cat) other'." + ) + + if isinstance(other, ExtensionArray) and needs_i8_conversion(other.dtype): + # We would return NotImplemented here, but that messes up + # ExtensionIndex's wrapped methods + return op(other, self) + return getattr(np.array(self), opname)(np.array(other)) + + func.__name__ = opname + + return func + + +def contains(cat, key, container): + """ + Helper for membership check for ``key`` in ``cat``. + + This is a helper method for :method:`__contains__` + and :class:`CategoricalIndex.__contains__`. + + Returns True if ``key`` is in ``cat.categories`` and the + location of ``key`` in ``categories`` is in ``container``. + + Parameters + ---------- + cat : :class:`Categorical`or :class:`categoricalIndex` + key : a hashable object + The key to check membership for. + container : Container (e.g. list-like or mapping) + The container to check for membership in. + + Returns + ------- + is_in : bool + True if ``key`` is in ``self.categories`` and location of + ``key`` in ``categories`` is in ``container``, else False. + + Notes + ----- + This method does not check for NaN values. Do that separately + before calling this method. + """ + hash(key) + + # get location of key in categories. + # If a KeyError, the key isn't in categories, so logically + # can't be in container either. + try: + loc = cat.categories.get_loc(key) + except (KeyError, TypeError): + return False + + # loc is the location of key in categories, but also the *value* + # for key in container. So, `key` may be in categories, + # but still not in `container`. Example ('b' in categories, + # but not in values): + # 'b' in Categorical(['a'], categories=['a', 'b']) # False + if is_scalar(loc): + return loc in container + else: + # if categories is an IntervalIndex, loc is an array. + return any(loc_ in container for loc_ in loc) + + +class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMixin): + """ + Represent a categorical variable in classic R / S-plus fashion. + + `Categoricals` can only take on only a limited, and usually fixed, number + of possible values (`categories`). In contrast to statistical categorical + variables, a `Categorical` might have an order, but numerical operations + (additions, divisions, ...) are not possible. + + All values of the `Categorical` are either in `categories` or `np.nan`. + Assigning values outside of `categories` will raise a `ValueError`. Order + is defined by the order of the `categories`, not lexical order of the + values. + + Parameters + ---------- + values : list-like + The values of the categorical. If categories are given, values not in + categories will be replaced with NaN. + categories : Index-like (unique), optional + The unique categories for this categorical. If not given, the + categories are assumed to be the unique values of `values` (sorted, if + possible, otherwise in the order in which they appear). + ordered : bool, default False + Whether or not this categorical is treated as a ordered categorical. + If True, the resulting categorical will be ordered. + An ordered categorical respects, when sorted, the order of its + `categories` attribute (which in turn is the `categories` argument, if + provided). + dtype : CategoricalDtype + An instance of ``CategoricalDtype`` to use for this categorical. + + Attributes + ---------- + categories : Index + The categories of this categorical + codes : ndarray + The codes (integer positions, which point to the categories) of this + categorical, read only. + ordered : bool + Whether or not this Categorical is ordered. + dtype : CategoricalDtype + The instance of ``CategoricalDtype`` storing the ``categories`` + and ``ordered``. + + Methods + ------- + from_codes + __array__ + + Raises + ------ + ValueError + If the categories do not validate. + TypeError + If an explicit ``ordered=True`` is given but no `categories` and the + `values` are not sortable. + + See Also + -------- + CategoricalDtype : Type for categorical data. + CategoricalIndex : An Index with an underlying ``Categorical``. + + Notes + ----- + See the `user guide + `__ + for more. + + Examples + -------- + >>> pd.Categorical([1, 2, 3, 1, 2, 3]) + [1, 2, 3, 1, 2, 3] + Categories (3, int64): [1, 2, 3] + + >>> pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c']) + ['a', 'b', 'c', 'a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] + + Missing values are not included as a category. + + >>> c = pd.Categorical([1, 2, 3, 1, 2, 3, np.nan]) + >>> c + [1, 2, 3, 1, 2, 3, NaN] + Categories (3, int64): [1, 2, 3] + + However, their presence is indicated in the `codes` attribute + by code `-1`. + + >>> c.codes + array([ 0, 1, 2, 0, 1, 2, -1], dtype=int8) + + Ordered `Categoricals` can be sorted according to the custom order + of the categories and can have a min and max value. + + >>> c = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'], ordered=True, + ... categories=['c', 'b', 'a']) + >>> c + ['a', 'b', 'c', 'a', 'b', 'c'] + Categories (3, object): ['c' < 'b' < 'a'] + >>> c.min() + 'c' + """ + + # For comparisons, so that numpy uses our implementation if the compare + # ops, which raise + __array_priority__ = 1000 + # tolist is not actually deprecated, just suppressed in the __dir__ + _hidden_attrs = PandasObject._hidden_attrs | frozenset(["tolist"]) + _typ = "categorical" + + _dtype: CategoricalDtype + + def __init__( + self, + values, + categories=None, + ordered=None, + dtype: Dtype | None = None, + fastpath: bool = False, + copy: bool = True, + ): + + dtype = CategoricalDtype._from_values_or_dtype( + values, categories, ordered, dtype + ) + # At this point, dtype is always a CategoricalDtype, but + # we may have dtype.categories be None, and we need to + # infer categories in a factorization step further below + + if fastpath: + codes = coerce_indexer_dtype(values, dtype.categories) + dtype = CategoricalDtype(ordered=False).update_dtype(dtype) + super().__init__(codes, dtype) + return + + if not is_list_like(values): + # GH#38433 + warn( + "Allowing scalars in the Categorical constructor is deprecated " + "and will raise in a future version. Use `[value]` instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + values = [values] + + # null_mask indicates missing values we want to exclude from inference. + # This means: only missing values in list-likes (not arrays/ndframes). + null_mask = np.array(False) + + # sanitize input + if is_categorical_dtype(values): + if dtype.categories is None: + dtype = CategoricalDtype(values.categories, dtype.ordered) + elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)): + values = com.convert_to_list_like(values) + if isinstance(values, list) and len(values) == 0: + # By convention, empty lists result in object dtype: + values = np.array([], dtype=object) + elif isinstance(values, np.ndarray): + if values.ndim > 1: + # preempt sanitize_array from raising ValueError + raise NotImplementedError( + "> 1 ndim Categorical are not supported at this time" + ) + values = sanitize_array(values, None) + else: + # i.e. must be a list + arr = sanitize_array(values, None) + null_mask = isna(arr) + if null_mask.any(): + # We remove null values here, then below will re-insert + # them, grep "full_codes" + arr_list = [values[idx] for idx in np.where(~null_mask)[0]] + + # GH#44900 Do not cast to float if we have only missing values + if arr_list or arr.dtype == "object": + sanitize_dtype = None + else: + sanitize_dtype = arr.dtype + + arr = sanitize_array(arr_list, None, dtype=sanitize_dtype) + values = arr + + if dtype.categories is None: + try: + codes, categories = factorize(values, sort=True) + except TypeError as err: + codes, categories = factorize(values, sort=False) + if dtype.ordered: + # raise, as we don't have a sortable data structure and so + # the user should give us one by specifying categories + raise TypeError( + "'values' is not ordered, please " + "explicitly specify the categories order " + "by passing in a categories argument." + ) from err + + # we're inferring from values + dtype = CategoricalDtype(categories, dtype.ordered) + + elif is_categorical_dtype(values.dtype): + # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no + # attribute "_codes" + old_codes = extract_array(values)._codes # type: ignore[union-attr] + codes = recode_for_categories( + old_codes, values.dtype.categories, dtype.categories, copy=copy + ) + + else: + codes = _get_codes_for_values(values, dtype.categories) + + if null_mask.any(): + # Reinsert -1 placeholders for previously removed missing values + full_codes = -np.ones(null_mask.shape, dtype=codes.dtype) + full_codes[~null_mask] = codes + codes = full_codes + + dtype = CategoricalDtype(ordered=False).update_dtype(dtype) + arr = coerce_indexer_dtype(codes, dtype.categories) + # error: Argument 1 to "__init__" of "NDArrayBacked" has incompatible + # type "Union[ExtensionArray, ndarray]"; expected "ndarray" + super().__init__(arr, dtype) # type: ignore[arg-type] + + @property + def dtype(self) -> CategoricalDtype: + """ + The :class:`~pandas.api.types.CategoricalDtype` for this instance. + """ + return self._dtype + + @property + def _constructor(self) -> type[Categorical]: + return Categorical + + @classmethod + def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): + return Categorical(scalars, dtype=dtype, copy=copy) + + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + """ + Coerce this type to another dtype + + Parameters + ---------- + dtype : numpy dtype or pandas type + copy : bool, default True + By default, astype always returns a newly allocated object. + If copy is set to False and dtype is categorical, the original + object is returned. + """ + dtype = pandas_dtype(dtype) + if self.dtype is dtype: + result = self.copy() if copy else self + + elif is_categorical_dtype(dtype): + dtype = cast("Union[str, CategoricalDtype]", dtype) + + # GH 10696/18593/18630 + dtype = self.dtype.update_dtype(dtype) + self = self.copy() if copy else self + result = self._set_dtype(dtype) + + elif isinstance(dtype, ExtensionDtype): + return super().astype(dtype, copy=copy) + + elif is_integer_dtype(dtype) and self.isna().any(): + raise ValueError("Cannot convert float NaN to integer") + + elif len(self.codes) == 0 or len(self.categories) == 0: + result = np.array( + self, + dtype=dtype, + copy=copy, + ) + + else: + # GH8628 (PERF): astype category codes instead of astyping array + new_cats = self.categories._values + + try: + new_cats = new_cats.astype(dtype=dtype, copy=copy) + fill_value = self.categories._na_value + if not is_valid_na_for_dtype(fill_value, dtype): + fill_value = lib.item_from_zerodim( + np.array(self.categories._na_value).astype(dtype) + ) + except ( + TypeError, # downstream error msg for CategoricalIndex is misleading + ValueError, + ): + msg = f"Cannot cast {self.categories.dtype} dtype to {dtype}" + raise ValueError(msg) + + result = take_nd( + new_cats, ensure_platform_int(self._codes), fill_value=fill_value + ) + + return result + + @cache_readonly + def itemsize(self) -> int: + """ + return the size of a single category + """ + return self.categories.itemsize + + def to_list(self): + """ + Alias for tolist. + """ + return self.tolist() + + @classmethod + def _from_inferred_categories( + cls, inferred_categories, inferred_codes, dtype, true_values=None + ): + """ + Construct a Categorical from inferred values. + + For inferred categories (`dtype` is None) the categories are sorted. + For explicit `dtype`, the `inferred_categories` are cast to the + appropriate type. + + Parameters + ---------- + inferred_categories : Index + inferred_codes : Index + dtype : CategoricalDtype or 'category' + true_values : list, optional + If none are provided, the default ones are + "True", "TRUE", and "true." + + Returns + ------- + Categorical + """ + from pandas import ( + Index, + to_datetime, + to_numeric, + to_timedelta, + ) + + cats = Index(inferred_categories) + known_categories = ( + isinstance(dtype, CategoricalDtype) and dtype.categories is not None + ) + + if known_categories: + # Convert to a specialized type with `dtype` if specified. + if dtype.categories.is_numeric(): + cats = to_numeric(inferred_categories, errors="coerce") + elif is_datetime64_dtype(dtype.categories): + cats = to_datetime(inferred_categories, errors="coerce") + elif is_timedelta64_dtype(dtype.categories): + cats = to_timedelta(inferred_categories, errors="coerce") + elif dtype.categories.is_boolean(): + if true_values is None: + true_values = ["True", "TRUE", "true"] + + # error: Incompatible types in assignment (expression has type + # "ndarray", variable has type "Index") + cats = cats.isin(true_values) # type: ignore[assignment] + + if known_categories: + # Recode from observation order to dtype.categories order. + categories = dtype.categories + codes = recode_for_categories(inferred_codes, cats, categories) + elif not cats.is_monotonic_increasing: + # Sort categories and recode for unknown categories. + unsorted = cats.copy() + categories = cats.sort_values() + + codes = recode_for_categories(inferred_codes, unsorted, categories) + dtype = CategoricalDtype(categories, ordered=False) + else: + dtype = CategoricalDtype(cats, ordered=False) + codes = inferred_codes + + return cls(codes, dtype=dtype, fastpath=True) + + @classmethod + def from_codes( + cls, codes, categories=None, ordered=None, dtype: Dtype | None = None + ): + """ + Make a Categorical type from codes and categories or dtype. + + This constructor is useful if you already have codes and + categories/dtype and so do not need the (computation intensive) + factorization step, which is usually done on the constructor. + + If your data does not follow this convention, please use the normal + constructor. + + Parameters + ---------- + codes : array-like of int + An integer array, where each integer points to a category in + categories or dtype.categories, or else is -1 for NaN. + categories : index-like, optional + The categories for the categorical. Items need to be unique. + If the categories are not given here, then they must be provided + in `dtype`. + ordered : bool, optional + Whether or not this categorical is treated as an ordered + categorical. If not given here or in `dtype`, the resulting + categorical will be unordered. + dtype : CategoricalDtype or "category", optional + If :class:`CategoricalDtype`, cannot be used together with + `categories` or `ordered`. + + Returns + ------- + Categorical + + Examples + -------- + >>> dtype = pd.CategoricalDtype(['a', 'b'], ordered=True) + >>> pd.Categorical.from_codes(codes=[0, 1, 0, 1], dtype=dtype) + ['a', 'b', 'a', 'b'] + Categories (2, object): ['a' < 'b'] + """ + dtype = CategoricalDtype._from_values_or_dtype( + categories=categories, ordered=ordered, dtype=dtype + ) + if dtype.categories is None: + msg = ( + "The categories must be provided in 'categories' or " + "'dtype'. Both were None." + ) + raise ValueError(msg) + + if is_extension_array_dtype(codes) and is_integer_dtype(codes): + # Avoid the implicit conversion of Int to object + if isna(codes).any(): + raise ValueError("codes cannot contain NA values") + codes = codes.to_numpy(dtype=np.int64) + else: + codes = np.asarray(codes) + if len(codes) and not is_integer_dtype(codes): + raise ValueError("codes need to be array-like integers") + + if len(codes) and (codes.max() >= len(dtype.categories) or codes.min() < -1): + raise ValueError("codes need to be between -1 and len(categories)-1") + + return cls(codes, dtype=dtype, fastpath=True) + + # ------------------------------------------------------------------ + # Categories/Codes/Ordered + + @property + def categories(self): + """ + The categories of this categorical. + + Setting assigns new values to each category (effectively a rename of + each individual category). + + The assigned value has to be a list-like object. All items must be + unique and the number of items in the new categories must be the same + as the number of items in the old categories. + + Assigning to `categories` is a inplace operation! + + Raises + ------ + ValueError + If the new categories do not validate as categories or if the + number of new categories is unequal the number of old categories + + See Also + -------- + rename_categories : Rename categories. + reorder_categories : Reorder categories. + add_categories : Add new categories. + remove_categories : Remove the specified categories. + remove_unused_categories : Remove categories which are not used. + set_categories : Set the categories to the specified ones. + """ + return self.dtype.categories + + @categories.setter + def categories(self, categories): + new_dtype = CategoricalDtype(categories, ordered=self.ordered) + if self.dtype.categories is not None and len(self.dtype.categories) != len( + new_dtype.categories + ): + raise ValueError( + "new categories need to have the same number of " + "items as the old categories!" + ) + super().__init__(self._ndarray, new_dtype) + + @property + def ordered(self) -> Ordered: + """ + Whether the categories have an ordered relationship. + """ + return self.dtype.ordered + + @property + def codes(self) -> np.ndarray: + """ + The category codes of this categorical. + + Codes are an array of integers which are the positions of the actual + values in the categories array. + + There is no setter, use the other categorical methods and the normal item + setter to change values in the categorical. + + Returns + ------- + ndarray[int] + A non-writable view of the `codes` array. + """ + v = self._codes.view() + v.flags.writeable = False + return v + + def _set_categories(self, categories, fastpath=False): + """ + Sets new categories inplace + + Parameters + ---------- + fastpath : bool, default False + Don't perform validation of the categories for uniqueness or nulls + + Examples + -------- + >>> c = pd.Categorical(['a', 'b']) + >>> c + ['a', 'b'] + Categories (2, object): ['a', 'b'] + + >>> c._set_categories(pd.Index(['a', 'c'])) + >>> c + ['a', 'c'] + Categories (2, object): ['a', 'c'] + """ + if fastpath: + new_dtype = CategoricalDtype._from_fastpath(categories, self.ordered) + else: + new_dtype = CategoricalDtype(categories, ordered=self.ordered) + if ( + not fastpath + and self.dtype.categories is not None + and len(new_dtype.categories) != len(self.dtype.categories) + ): + raise ValueError( + "new categories need to have the same number of " + "items than the old categories!" + ) + + super().__init__(self._ndarray, new_dtype) + + def _set_dtype(self, dtype: CategoricalDtype) -> Categorical: + """ + Internal method for directly updating the CategoricalDtype + + Parameters + ---------- + dtype : CategoricalDtype + + Notes + ----- + We don't do any validation here. It's assumed that the dtype is + a (valid) instance of `CategoricalDtype`. + """ + codes = recode_for_categories(self.codes, self.categories, dtype.categories) + return type(self)(codes, dtype=dtype, fastpath=True) + + def set_ordered(self, value, inplace=False): + """ + Set the ordered attribute to the boolean value. + + Parameters + ---------- + value : bool + Set whether this categorical is ordered (True) or not (False). + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to the value. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + new_dtype = CategoricalDtype(self.categories, ordered=value) + cat = self if inplace else self.copy() + NDArrayBacked.__init__(cat, cat._ndarray, new_dtype) + if not inplace: + return cat + + def as_ordered(self, inplace=False): + """ + Set the Categorical to be ordered. + + Parameters + ---------- + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to True. + + Returns + ------- + Categorical or None + Ordered Categorical or None if ``inplace=True``. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + return self.set_ordered(True, inplace=inplace) + + def as_unordered(self, inplace=False): + """ + Set the Categorical to be unordered. + + Parameters + ---------- + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to False. + + Returns + ------- + Categorical or None + Unordered Categorical or None if ``inplace=True``. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + return self.set_ordered(False, inplace=inplace) + + def set_categories( + self, new_categories, ordered=None, rename=False, inplace=no_default + ): + """ + Set the categories to the specified new_categories. + + `new_categories` can include new categories (which will result in + unused categories) or remove old categories (which results in values + set to NaN). If `rename==True`, the categories will simple be renamed + (less or more items than in old categories will result in values set to + NaN or in unused categories respectively). + + This method can be used to perform more than one action of adding, + removing, and reordering simultaneously and is therefore faster than + performing the individual steps via the more specialised methods. + + On the other hand this methods does not do checks (e.g., whether the + old categories are included in the new categories on a reorder), which + can result in surprising changes, for example when using special string + dtypes, which does not considers a S1 string equal to a single char + python string. + + Parameters + ---------- + new_categories : Index-like + The categories in new order. + ordered : bool, default False + Whether or not the categorical is treated as a ordered categorical. + If not given, do not change the ordered information. + rename : bool, default False + Whether or not the new_categories should be considered as a rename + of the old categories or as reordered categories. + inplace : bool, default False + Whether or not to reorder the categories in-place or return a copy + of this categorical with reordered categories. + + .. deprecated:: 1.3.0 + + Returns + ------- + Categorical with reordered categories or None if inplace. + + Raises + ------ + ValueError + If new_categories does not validate as categories + + See Also + -------- + rename_categories : Rename categories. + reorder_categories : Reorder categories. + add_categories : Add new categories. + remove_categories : Remove the specified categories. + remove_unused_categories : Remove categories which are not used. + """ + if inplace is not no_default: + warn( + "The `inplace` parameter in pandas.Categorical." + "set_categories is deprecated and will be removed in " + "a future version. Removing unused categories will always " + "return a new Categorical object.", + FutureWarning, + stacklevel=2, + ) + else: + inplace = False + + inplace = validate_bool_kwarg(inplace, "inplace") + if ordered is None: + ordered = self.dtype.ordered + new_dtype = CategoricalDtype(new_categories, ordered=ordered) + + cat = self if inplace else self.copy() + if rename: + if cat.dtype.categories is not None and len(new_dtype.categories) < len( + cat.dtype.categories + ): + # remove all _codes which are larger and set to -1/NaN + cat._codes[cat._codes >= len(new_dtype.categories)] = -1 + codes = cat._codes + else: + codes = recode_for_categories( + cat.codes, cat.categories, new_dtype.categories + ) + NDArrayBacked.__init__(cat, codes, new_dtype) + + if not inplace: + return cat + + def rename_categories(self, new_categories, inplace=no_default): + """ + Rename categories. + + Parameters + ---------- + new_categories : list-like, dict-like or callable + + New categories which will replace old categories. + + * list-like: all items must be unique and the number of items in + the new categories must match the existing number of categories. + + * dict-like: specifies a mapping from + old categories to new. Categories not contained in the mapping + are passed through and extra categories in the mapping are + ignored. + + * callable : a callable that is called on all items in the old + categories and whose return values comprise the new categories. + + inplace : bool, default False + Whether or not to rename the categories inplace or return a copy of + this categorical with renamed categories. + + .. deprecated:: 1.3.0 + + Returns + ------- + cat : Categorical or None + Categorical with removed categories or None if ``inplace=True``. + + Raises + ------ + ValueError + If new categories are list-like and do not have the same number of + items than the current categories or do not validate as categories + + See Also + -------- + reorder_categories : Reorder categories. + add_categories : Add new categories. + remove_categories : Remove the specified categories. + remove_unused_categories : Remove categories which are not used. + set_categories : Set the categories to the specified ones. + + Examples + -------- + >>> c = pd.Categorical(['a', 'a', 'b']) + >>> c.rename_categories([0, 1]) + [0, 0, 1] + Categories (2, int64): [0, 1] + + For dict-like ``new_categories``, extra keys are ignored and + categories not in the dictionary are passed through + + >>> c.rename_categories({'a': 'A', 'c': 'C'}) + ['A', 'A', 'b'] + Categories (2, object): ['A', 'b'] + + You may also provide a callable to create the new categories + + >>> c.rename_categories(lambda x: x.upper()) + ['A', 'A', 'B'] + Categories (2, object): ['A', 'B'] + """ + if inplace is not no_default: + warn( + "The `inplace` parameter in pandas.Categorical." + "rename_categories is deprecated and will be removed in " + "a future version. Removing unused categories will always " + "return a new Categorical object.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + inplace = validate_bool_kwarg(inplace, "inplace") + cat = self if inplace else self.copy() + + if is_dict_like(new_categories): + cat.categories = [new_categories.get(item, item) for item in cat.categories] + elif callable(new_categories): + cat.categories = [new_categories(item) for item in cat.categories] + else: + cat.categories = new_categories + if not inplace: + return cat + + def reorder_categories(self, new_categories, ordered=None, inplace=no_default): + """ + Reorder categories as specified in new_categories. + + `new_categories` need to include all old categories and no new category + items. + + Parameters + ---------- + new_categories : Index-like + The categories in new order. + ordered : bool, optional + Whether or not the categorical is treated as a ordered categorical. + If not given, do not change the ordered information. + inplace : bool, default False + Whether or not to reorder the categories inplace or return a copy of + this categorical with reordered categories. + + .. deprecated:: 1.3.0 + + Returns + ------- + cat : Categorical or None + Categorical with removed categories or None if ``inplace=True``. + + Raises + ------ + ValueError + If the new categories do not contain all old category items or any + new ones + + See Also + -------- + rename_categories : Rename categories. + add_categories : Add new categories. + remove_categories : Remove the specified categories. + remove_unused_categories : Remove categories which are not used. + set_categories : Set the categories to the specified ones. + """ + if inplace is not no_default: + warn( + "The `inplace` parameter in pandas.Categorical." + "reorder_categories is deprecated and will be removed in " + "a future version. Reordering categories will always " + "return a new Categorical object.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + inplace = validate_bool_kwarg(inplace, "inplace") + if set(self.dtype.categories) != set(new_categories): + raise ValueError( + "items in new_categories are not the same as in old categories" + ) + + with catch_warnings(): + simplefilter("ignore") + return self.set_categories(new_categories, ordered=ordered, inplace=inplace) + + def add_categories(self, new_categories, inplace=no_default): + """ + Add new categories. + + `new_categories` will be included at the last/highest place in the + categories and will be unused directly after this call. + + Parameters + ---------- + new_categories : category or list-like of category + The new categories to be included. + inplace : bool, default False + Whether or not to add the categories inplace or return a copy of + this categorical with added categories. + + .. deprecated:: 1.3.0 + + Returns + ------- + cat : Categorical or None + Categorical with new categories added or None if ``inplace=True``. + + Raises + ------ + ValueError + If the new categories include old categories or do not validate as + categories + + See Also + -------- + rename_categories : Rename categories. + reorder_categories : Reorder categories. + remove_categories : Remove the specified categories. + remove_unused_categories : Remove categories which are not used. + set_categories : Set the categories to the specified ones. + + Examples + -------- + >>> c = pd.Categorical(['c', 'b', 'c']) + >>> c + ['c', 'b', 'c'] + Categories (2, object): ['b', 'c'] + + >>> c.add_categories(['d', 'a']) + ['c', 'b', 'c'] + Categories (4, object): ['b', 'c', 'd', 'a'] + """ + if inplace is not no_default: + warn( + "The `inplace` parameter in pandas.Categorical." + "add_categories is deprecated and will be removed in " + "a future version. Removing unused categories will always " + "return a new Categorical object.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + inplace = validate_bool_kwarg(inplace, "inplace") + if not is_list_like(new_categories): + new_categories = [new_categories] + already_included = set(new_categories) & set(self.dtype.categories) + if len(already_included) != 0: + raise ValueError( + f"new categories must not include old categories: {already_included}" + ) + new_categories = list(self.dtype.categories) + list(new_categories) + new_dtype = CategoricalDtype(new_categories, self.ordered) + + cat = self if inplace else self.copy() + codes = coerce_indexer_dtype(cat._ndarray, new_dtype.categories) + NDArrayBacked.__init__(cat, codes, new_dtype) + if not inplace: + return cat + + def remove_categories(self, removals, inplace=no_default): + """ + Remove the specified categories. + + `removals` must be included in the old categories. Values which were in + the removed categories will be set to NaN + + Parameters + ---------- + removals : category or list of categories + The categories which should be removed. + inplace : bool, default False + Whether or not to remove the categories inplace or return a copy of + this categorical with removed categories. + + .. deprecated:: 1.3.0 + + Returns + ------- + cat : Categorical or None + Categorical with removed categories or None if ``inplace=True``. + + Raises + ------ + ValueError + If the removals are not contained in the categories + + See Also + -------- + rename_categories : Rename categories. + reorder_categories : Reorder categories. + add_categories : Add new categories. + remove_unused_categories : Remove categories which are not used. + set_categories : Set the categories to the specified ones. + + Examples + -------- + >>> c = pd.Categorical(['a', 'c', 'b', 'c', 'd']) + >>> c + ['a', 'c', 'b', 'c', 'd'] + Categories (4, object): ['a', 'b', 'c', 'd'] + + >>> c.remove_categories(['d', 'a']) + [NaN, 'c', 'b', 'c', NaN] + Categories (2, object): ['b', 'c'] + """ + if inplace is not no_default: + warn( + "The `inplace` parameter in pandas.Categorical." + "remove_categories is deprecated and will be removed in " + "a future version. Removing unused categories will always " + "return a new Categorical object.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + inplace = validate_bool_kwarg(inplace, "inplace") + if not is_list_like(removals): + removals = [removals] + + removal_set = set(removals) + not_included = removal_set - set(self.dtype.categories) + new_categories = [c for c in self.dtype.categories if c not in removal_set] + + # GH 10156 + if any(isna(removals)): + not_included = {x for x in not_included if notna(x)} + new_categories = [x for x in new_categories if notna(x)] + + if len(not_included) != 0: + raise ValueError(f"removals must all be in old categories: {not_included}") + + with catch_warnings(): + simplefilter("ignore") + return self.set_categories( + new_categories, ordered=self.ordered, rename=False, inplace=inplace + ) + + def remove_unused_categories(self, inplace=no_default): + """ + Remove categories which are not used. + + Parameters + ---------- + inplace : bool, default False + Whether or not to drop unused categories inplace or return a copy of + this categorical with unused categories dropped. + + .. deprecated:: 1.2.0 + + Returns + ------- + cat : Categorical or None + Categorical with unused categories dropped or None if ``inplace=True``. + + See Also + -------- + rename_categories : Rename categories. + reorder_categories : Reorder categories. + add_categories : Add new categories. + remove_categories : Remove the specified categories. + set_categories : Set the categories to the specified ones. + + Examples + -------- + >>> c = pd.Categorical(['a', 'c', 'b', 'c', 'd']) + >>> c + ['a', 'c', 'b', 'c', 'd'] + Categories (4, object): ['a', 'b', 'c', 'd'] + + >>> c[2] = 'a' + >>> c[4] = 'c' + >>> c + ['a', 'c', 'a', 'c', 'c'] + Categories (4, object): ['a', 'b', 'c', 'd'] + + >>> c.remove_unused_categories() + ['a', 'c', 'a', 'c', 'c'] + Categories (2, object): ['a', 'c'] + """ + if inplace is not no_default: + warn( + "The `inplace` parameter in pandas.Categorical." + "remove_unused_categories is deprecated and " + "will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + inplace = validate_bool_kwarg(inplace, "inplace") + cat = self if inplace else self.copy() + idx, inv = np.unique(cat._codes, return_inverse=True) + + if idx.size != 0 and idx[0] == -1: # na sentinel + idx, inv = idx[1:], inv - 1 + + new_categories = cat.dtype.categories.take(idx) + new_dtype = CategoricalDtype._from_fastpath( + new_categories, ordered=self.ordered + ) + new_codes = coerce_indexer_dtype(inv, new_dtype.categories) + NDArrayBacked.__init__(cat, new_codes, new_dtype) + if not inplace: + return cat + + # ------------------------------------------------------------------ + + def map(self, mapper): + """ + Map categories using an input mapping or function. + + Maps the categories to new categories. If the mapping correspondence is + one-to-one the result is a :class:`~pandas.Categorical` which has the + same order property as the original, otherwise a :class:`~pandas.Index` + is returned. NaN values are unaffected. + + If a `dict` or :class:`~pandas.Series` is used any unmapped category is + mapped to `NaN`. Note that if this happens an :class:`~pandas.Index` + will be returned. + + Parameters + ---------- + mapper : function, dict, or Series + Mapping correspondence. + + Returns + ------- + pandas.Categorical or pandas.Index + Mapped categorical. + + See Also + -------- + CategoricalIndex.map : Apply a mapping correspondence on a + :class:`~pandas.CategoricalIndex`. + Index.map : Apply a mapping correspondence on an + :class:`~pandas.Index`. + Series.map : Apply a mapping correspondence on a + :class:`~pandas.Series`. + Series.apply : Apply more complex functions on a + :class:`~pandas.Series`. + + Examples + -------- + >>> cat = pd.Categorical(['a', 'b', 'c']) + >>> cat + ['a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] + >>> cat.map(lambda x: x.upper()) + ['A', 'B', 'C'] + Categories (3, object): ['A', 'B', 'C'] + >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'}) + ['first', 'second', 'third'] + Categories (3, object): ['first', 'second', 'third'] + + If the mapping is one-to-one the ordering of the categories is + preserved: + + >>> cat = pd.Categorical(['a', 'b', 'c'], ordered=True) + >>> cat + ['a', 'b', 'c'] + Categories (3, object): ['a' < 'b' < 'c'] + >>> cat.map({'a': 3, 'b': 2, 'c': 1}) + [3, 2, 1] + Categories (3, int64): [3 < 2 < 1] + + If the mapping is not one-to-one an :class:`~pandas.Index` is returned: + + >>> cat.map({'a': 'first', 'b': 'second', 'c': 'first'}) + Index(['first', 'second', 'first'], dtype='object') + + If a `dict` is used, all unmapped categories are mapped to `NaN` and + the result is an :class:`~pandas.Index`: + + >>> cat.map({'a': 'first', 'b': 'second'}) + Index(['first', 'second', nan], dtype='object') + """ + new_categories = self.categories.map(mapper) + try: + return self.from_codes( + self._codes.copy(), categories=new_categories, ordered=self.ordered + ) + except ValueError: + # NA values are represented in self._codes with -1 + # np.take causes NA values to take final element in new_categories + if np.any(self._codes == -1): + new_categories = new_categories.insert(len(new_categories), np.nan) + return np.take(new_categories, self._codes) + + __eq__ = _cat_compare_op(operator.eq) + __ne__ = _cat_compare_op(operator.ne) + __lt__ = _cat_compare_op(operator.lt) + __gt__ = _cat_compare_op(operator.gt) + __le__ = _cat_compare_op(operator.le) + __ge__ = _cat_compare_op(operator.ge) + + # ------------------------------------------------------------- + # Validators; ideally these can be de-duplicated + + def _validate_setitem_value(self, value): + if not is_hashable(value): + # wrap scalars and hashable-listlikes in list + return self._validate_listlike(value) + else: + return self._validate_scalar(value) + + _validate_searchsorted_value = _validate_setitem_value + + def _validate_scalar(self, fill_value): + """ + Convert a user-facing fill_value to a representation to use with our + underlying ndarray, raising TypeError if this is not possible. + + Parameters + ---------- + fill_value : object + + Returns + ------- + fill_value : int + + Raises + ------ + TypeError + """ + + if is_valid_na_for_dtype(fill_value, self.categories.dtype): + fill_value = -1 + elif fill_value in self.categories: + fill_value = self._unbox_scalar(fill_value) + else: + raise TypeError( + "Cannot setitem on a Categorical with a new " + f"category ({fill_value}), set the categories first" + ) + return fill_value + + # ------------------------------------------------------------- + + @ravel_compat + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + """ + The numpy array interface. + + Returns + ------- + numpy.array + A numpy array of either the specified dtype or, + if dtype==None (default), the same dtype as + categorical.categories.dtype. + """ + ret = take_nd(self.categories._values, self._codes) + if dtype and not is_dtype_equal(dtype, self.categories.dtype): + return np.asarray(ret, dtype) + # When we're a Categorical[ExtensionArray], like Interval, + # we need to ensure __array__ gets all the way to an + # ndarray. + return np.asarray(ret) + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + if method == "reduce": + # e.g. TestCategoricalAnalytics::test_min_max_ordered + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + # for all other cases, raise for now (similarly as what happens in + # Series.__array_prepare__) + raise TypeError( + f"Object with dtype {self.dtype} cannot perform " + f"the numpy op {ufunc.__name__}" + ) + + def __setstate__(self, state): + """Necessary for making this object picklable""" + if not isinstance(state, dict): + return super().__setstate__(state) + + if "_dtype" not in state: + state["_dtype"] = CategoricalDtype(state["_categories"], state["_ordered"]) + + if "_codes" in state and "_ndarray" not in state: + # backward compat, changed what is property vs attribute + state["_ndarray"] = state.pop("_codes") + + super().__setstate__(state) + + @property + def nbytes(self) -> int: + return self._codes.nbytes + self.dtype.categories.values.nbytes + + def memory_usage(self, deep: bool = False) -> int: + """ + Memory usage of my values + + Parameters + ---------- + deep : bool + Introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption + + Returns + ------- + bytes used + + Notes + ----- + Memory usage does not include memory consumed by elements that + are not components of the array if deep=False + + See Also + -------- + numpy.ndarray.nbytes + """ + return self._codes.nbytes + self.dtype.categories.memory_usage(deep=deep) + + def isna(self) -> np.ndarray: + """ + Detect missing values + + Missing values (-1 in .codes) are detected. + + Returns + ------- + np.ndarray[bool] of whether my values are null + + See Also + -------- + isna : Top-level isna. + isnull : Alias of isna. + Categorical.notna : Boolean inverse of Categorical.isna. + + """ + return self._codes == -1 + + isnull = isna + + def notna(self) -> np.ndarray: + """ + Inverse of isna + + Both missing values (-1 in .codes) and NA as a category are detected as + null. + + Returns + ------- + np.ndarray[bool] of whether my values are not null + + See Also + -------- + notna : Top-level notna. + notnull : Alias of notna. + Categorical.isna : Boolean inverse of Categorical.notna. + + """ + return ~self.isna() + + notnull = notna + + def value_counts(self, dropna: bool = True): + """ + Return a Series containing counts of each category. + + Every category will have an entry, even those with a count of 0. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of NaN. + + Returns + ------- + counts : Series + + See Also + -------- + Series.value_counts + """ + from pandas import ( + CategoricalIndex, + Series, + ) + + code, cat = self._codes, self.categories + ncat, mask = (len(cat), code >= 0) + ix, clean = np.arange(ncat), mask.all() + + if dropna or clean: + obs = code if clean else code[mask] + count = np.bincount(obs, minlength=ncat or 0) + else: + count = np.bincount(np.where(mask, code, ncat)) + ix = np.append(ix, -1) + + ix = coerce_indexer_dtype(ix, self.dtype.categories) + ix = self._from_backing_data(ix) + + return Series(count, index=CategoricalIndex(ix), dtype="int64") + + # error: Argument 2 of "_empty" is incompatible with supertype + # "NDArrayBackedExtensionArray"; supertype defines the argument type as + # "ExtensionDtype" + @classmethod + def _empty( # type: ignore[override] + cls: type_t[Categorical], shape: Shape, dtype: CategoricalDtype + ) -> Categorical: + """ + Analogous to np.empty(shape, dtype=dtype) + + Parameters + ---------- + shape : tuple[int] + dtype : CategoricalDtype + """ + arr = cls._from_sequence([], dtype=dtype) + + # We have to use np.zeros instead of np.empty otherwise the resulting + # ndarray may contain codes not supported by this dtype, in which + # case repr(result) could segfault. + backing = np.zeros(shape, dtype=arr._ndarray.dtype) + + return arr._from_backing_data(backing) + + def _internal_get_values(self): + """ + Return the values. + + For internal compatibility with pandas formatting. + + Returns + ------- + np.ndarray or Index + A numpy array of the same dtype as categorical.categories.dtype or + Index if datetime / periods. + """ + # if we are a datetime and period index, return Index to keep metadata + if needs_i8_conversion(self.categories.dtype): + return self.categories.take(self._codes, fill_value=NaT) + elif is_integer_dtype(self.categories) and -1 in self._codes: + return self.categories.astype("object").take(self._codes, fill_value=np.nan) + return np.array(self) + + def check_for_ordered(self, op): + """assert that we are ordered""" + if not self.ordered: + raise TypeError( + f"Categorical is not ordered for operation {op}\n" + "you can use .as_ordered() to change the " + "Categorical to an ordered one\n" + ) + + def argsort(self, ascending=True, kind="quicksort", **kwargs): + """ + Return the indices that would sort the Categorical. + + .. versionchanged:: 0.25.0 + + Changed to sort missing values at the end. + + Parameters + ---------- + ascending : bool, default True + Whether the indices should result in an ascending + or descending sort. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional + Sorting algorithm. + **kwargs: + passed through to :func:`numpy.argsort`. + + Returns + ------- + np.ndarray[np.intp] + + See Also + -------- + numpy.ndarray.argsort + + Notes + ----- + While an ordering is applied to the category values, arg-sorting + in this context refers more to organizing and grouping together + based on matching category values. Thus, this function can be + called on an unordered Categorical instance unlike the functions + 'Categorical.min' and 'Categorical.max'. + + Examples + -------- + >>> pd.Categorical(['b', 'b', 'a', 'c']).argsort() + array([2, 0, 1, 3]) + + >>> cat = pd.Categorical(['b', 'b', 'a', 'c'], + ... categories=['c', 'b', 'a'], + ... ordered=True) + >>> cat.argsort() + array([3, 0, 1, 2]) + + Missing values are placed at the end + + >>> cat = pd.Categorical([2, None, 1]) + >>> cat.argsort() + array([2, 0, 1]) + """ + return super().argsort(ascending=ascending, kind=kind, **kwargs) + + def sort_values( + self, inplace: bool = False, ascending: bool = True, na_position: str = "last" + ): + """ + Sort the Categorical by category value returning a new + Categorical by default. + + While an ordering is applied to the category values, sorting in this + context refers more to organizing and grouping together based on + matching category values. Thus, this function can be called on an + unordered Categorical instance unlike the functions 'Categorical.min' + and 'Categorical.max'. + + Parameters + ---------- + inplace : bool, default False + Do operation in place. + ascending : bool, default True + Order ascending. Passing False orders descending. The + ordering parameter provides the method by which the + category values are organized. + na_position : {'first', 'last'} (optional, default='last') + 'first' puts NaNs at the beginning + 'last' puts NaNs at the end + + Returns + ------- + Categorical or None + + See Also + -------- + Categorical.sort + Series.sort_values + + Examples + -------- + >>> c = pd.Categorical([1, 2, 2, 1, 5]) + >>> c + [1, 2, 2, 1, 5] + Categories (3, int64): [1, 2, 5] + >>> c.sort_values() + [1, 1, 2, 2, 5] + Categories (3, int64): [1, 2, 5] + >>> c.sort_values(ascending=False) + [5, 2, 2, 1, 1] + Categories (3, int64): [1, 2, 5] + + Inplace sorting can be done as well: + + >>> c.sort_values(inplace=True) + >>> c + [1, 1, 2, 2, 5] + Categories (3, int64): [1, 2, 5] + >>> + >>> c = pd.Categorical([1, 2, 2, 1, 5]) + + 'sort_values' behaviour with NaNs. Note that 'na_position' + is independent of the 'ascending' parameter: + + >>> c = pd.Categorical([np.nan, 2, 2, np.nan, 5]) + >>> c + [NaN, 2, 2, NaN, 5] + Categories (2, int64): [2, 5] + >>> c.sort_values() + [2, 2, 5, NaN, NaN] + Categories (2, int64): [2, 5] + >>> c.sort_values(ascending=False) + [5, 2, 2, NaN, NaN] + Categories (2, int64): [2, 5] + >>> c.sort_values(na_position='first') + [NaN, NaN, 2, 2, 5] + Categories (2, int64): [2, 5] + >>> c.sort_values(ascending=False, na_position='first') + [NaN, NaN, 5, 2, 2] + Categories (2, int64): [2, 5] + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if na_position not in ["last", "first"]: + raise ValueError(f"invalid na_position: {repr(na_position)}") + + sorted_idx = nargsort(self, ascending=ascending, na_position=na_position) + + if inplace: + self._codes[:] = self._codes[sorted_idx] + else: + codes = self._codes[sorted_idx] + return self._from_backing_data(codes) + + def _rank( + self, + *, + axis: int = 0, + method: str = "average", + na_option: str = "keep", + ascending: bool = True, + pct: bool = False, + ): + """ + See Series.rank.__doc__. + """ + if axis != 0: + raise NotImplementedError + vff = self._values_for_rank() + return algorithms.rank( + vff, + axis=axis, + method=method, + na_option=na_option, + ascending=ascending, + pct=pct, + ) + + def _values_for_rank(self): + """ + For correctly ranking ordered categorical data. See GH#15420 + + Ordered categorical data should be ranked on the basis of + codes with -1 translated to NaN. + + Returns + ------- + numpy.array + + """ + from pandas import Series + + if self.ordered: + values = self.codes + mask = values == -1 + if mask.any(): + values = values.astype("float64") + values[mask] = np.nan + elif self.categories.is_numeric(): + values = np.array(self) + else: + # reorder the categories (so rank can use the float codes) + # instead of passing an object array to rank + values = np.array( + self.rename_categories(Series(self.categories).rank().values) + ) + return values + + def view(self, dtype=None): + if dtype is not None: + raise NotImplementedError(dtype) + return self._from_backing_data(self._ndarray) + + def to_dense(self) -> np.ndarray: + """ + Return my 'dense' representation + + For internal compatibility with numpy arrays. + + Returns + ------- + dense : array + """ + warn( + "Categorical.to_dense is deprecated and will be removed in " + "a future version. Use np.asarray(cat) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return np.asarray(self) + + # ------------------------------------------------------------------ + # NDArrayBackedExtensionArray compat + + @property + def _codes(self) -> np.ndarray: + return self._ndarray + + @_codes.setter + def _codes(self, value: np.ndarray): + warn( + "Setting the codes on a Categorical is deprecated and will raise in " + "a future version. Create a new Categorical object instead", + FutureWarning, + stacklevel=find_stack_level(), + ) # GH#40606 + NDArrayBacked.__init__(self, value, self.dtype) + + def _box_func(self, i: int): + if i == -1: + return np.NaN + return self.categories[i] + + def _unbox_scalar(self, key) -> int: + # searchsorted is very performance sensitive. By converting codes + # to same dtype as self.codes, we get much faster performance. + code = self.categories.get_loc(key) + code = self._ndarray.dtype.type(code) + return code + + # ------------------------------------------------------------------ + + def take_nd(self, indexer, allow_fill: bool = False, fill_value=None): + # GH#27745 deprecate alias that other EAs dont have + warn( + "Categorical.take_nd is deprecated, use Categorical.take instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.take(indexer, allow_fill=allow_fill, fill_value=fill_value) + + def __iter__(self): + """ + Returns an Iterator over the values of this Categorical. + """ + if self.ndim == 1: + return iter(self._internal_get_values().tolist()) + else: + return (self[n] for n in range(len(self))) + + def __contains__(self, key) -> bool: + """ + Returns True if `key` is in this Categorical. + """ + # if key is a NaN, check if any NaN is in self. + if is_valid_na_for_dtype(key, self.categories.dtype): + return bool(self.isna().any()) + + return contains(self, key, container=self._codes) + + # ------------------------------------------------------------------ + # Rendering Methods + + def _formatter(self, boxed: bool = False): + # Defer to CategoricalFormatter's formatter. + return None + + def _tidy_repr(self, max_vals: int = 10, footer: bool = True) -> str: + """ + a short repr displaying only max_vals and an optional (but default + footer) + """ + num = max_vals // 2 + head = self[:num]._get_repr(length=False, footer=False) + tail = self[-(max_vals - num) :]._get_repr(length=False, footer=False) + + result = f"{head[:-1]}, ..., {tail[1:]}" + if footer: + result = f"{result}\n{self._repr_footer()}" + + return str(result) + + def _repr_categories(self) -> list[str]: + """ + return the base repr for the categories + """ + max_categories = ( + 10 + if get_option("display.max_categories") == 0 + else get_option("display.max_categories") + ) + from pandas.io.formats import format as fmt + + format_array = partial( + fmt.format_array, formatter=None, quoting=QUOTE_NONNUMERIC + ) + if len(self.categories) > max_categories: + num = max_categories // 2 + head = format_array(self.categories[:num]) + tail = format_array(self.categories[-num:]) + category_strs = head + ["..."] + tail + else: + category_strs = format_array(self.categories) + + # Strip all leading spaces, which format_array adds for columns... + category_strs = [x.strip() for x in category_strs] + return category_strs + + def _repr_categories_info(self) -> str: + """ + Returns a string representation of the footer. + """ + category_strs = self._repr_categories() + dtype = str(self.categories.dtype) + levheader = f"Categories ({len(self.categories)}, {dtype}): " + width, height = get_terminal_size() + max_width = get_option("display.width") or width + if console.in_ipython_frontend(): + # 0 = no breaks + max_width = 0 + levstring = "" + start = True + cur_col_len = len(levheader) # header + sep_len, sep = (3, " < ") if self.ordered else (2, ", ") + linesep = sep.rstrip() + "\n" # remove whitespace + for val in category_strs: + if max_width != 0 and cur_col_len + sep_len + len(val) > max_width: + levstring += linesep + (" " * (len(levheader) + 1)) + cur_col_len = len(levheader) + 1 # header + a whitespace + elif not start: + levstring += sep + cur_col_len += len(val) + levstring += val + start = False + # replace to simple save space by + return levheader + "[" + levstring.replace(" < ... < ", " ... ") + "]" + + def _repr_footer(self) -> str: + info = self._repr_categories_info() + return f"Length: {len(self)}\n{info}" + + def _get_repr(self, length: bool = True, na_rep="NaN", footer: bool = True) -> str: + from pandas.io.formats import format as fmt + + formatter = fmt.CategoricalFormatter( + self, length=length, na_rep=na_rep, footer=footer + ) + result = formatter.to_string() + return str(result) + + def __repr__(self) -> str: + """ + String representation. + """ + _maxlen = 10 + if len(self._codes) > _maxlen: + result = self._tidy_repr(_maxlen) + elif len(self._codes) > 0: + result = self._get_repr(length=len(self) > _maxlen) + else: + msg = self._get_repr(length=False, footer=True).replace("\n", ", ") + result = f"[], {msg}" + + return result + + # ------------------------------------------------------------------ + + def _validate_listlike(self, value): + # NB: here we assume scalar-like tuples have already been excluded + value = extract_array(value, extract_numpy=True) + + # require identical categories set + if isinstance(value, Categorical): + if not is_dtype_equal(self.dtype, value.dtype): + raise TypeError( + "Cannot set a Categorical with another, " + "without identical categories" + ) + # is_dtype_equal implies categories_match_up_to_permutation + value = self._encode_with_my_categories(value) + return value._codes + + from pandas import Index + + # tupleize_cols=False for e.g. test_fillna_iterable_category GH#41914 + to_add = Index._with_infer(value, tupleize_cols=False).difference( + self.categories + ) + + # no assignments of values not in categories, but it's always ok to set + # something to np.nan + if len(to_add) and not isna(to_add).all(): + raise TypeError( + "Cannot setitem on a Categorical with a new " + "category, set the categories first" + ) + + codes = self.categories.get_indexer(value) + return codes.astype(self._ndarray.dtype, copy=False) + + def _reverse_indexer(self) -> dict[Hashable, npt.NDArray[np.intp]]: + """ + Compute the inverse of a categorical, returning + a dict of categories -> indexers. + + *This is an internal function* + + Returns + ------- + Dict[Hashable, np.ndarray[np.intp]] + dict of categories -> indexers + + Examples + -------- + >>> c = pd.Categorical(list('aabca')) + >>> c + ['a', 'a', 'b', 'c', 'a'] + Categories (3, object): ['a', 'b', 'c'] + >>> c.categories + Index(['a', 'b', 'c'], dtype='object') + >>> c.codes + array([0, 0, 1, 2, 0], dtype=int8) + >>> c._reverse_indexer() + {'a': array([0, 1, 4]), 'b': array([2]), 'c': array([3])} + + """ + categories = self.categories + r, counts = libalgos.groupsort_indexer( + ensure_platform_int(self.codes), categories.size + ) + counts = ensure_int64(counts).cumsum() + _result = (r[start:end] for start, end in zip(counts, counts[1:])) + return dict(zip(categories, _result)) + + # ------------------------------------------------------------------ + # Reductions + + @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") + def min(self, *, skipna=True, **kwargs): + """ + The minimum value of the object. + + Only ordered `Categoricals` have a minimum! + + .. versionchanged:: 1.0.0 + + Returns an NA value on empty arrays + + Raises + ------ + TypeError + If the `Categorical` is not `ordered`. + + Returns + ------- + min : the minimum of this `Categorical` + """ + nv.validate_minmax_axis(kwargs.get("axis", 0)) + nv.validate_min((), kwargs) + self.check_for_ordered("min") + + if not len(self._codes): + return self.dtype.na_value + + good = self._codes != -1 + if not good.all(): + if skipna and good.any(): + pointer = self._codes[good].min() + else: + return np.nan + else: + pointer = self._codes.min() + return self._wrap_reduction_result(None, pointer) + + @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") + def max(self, *, skipna=True, **kwargs): + """ + The maximum value of the object. + + Only ordered `Categoricals` have a maximum! + + .. versionchanged:: 1.0.0 + + Returns an NA value on empty arrays + + Raises + ------ + TypeError + If the `Categorical` is not `ordered`. + + Returns + ------- + max : the maximum of this `Categorical` + """ + nv.validate_minmax_axis(kwargs.get("axis", 0)) + nv.validate_max((), kwargs) + self.check_for_ordered("max") + + if not len(self._codes): + return self.dtype.na_value + + good = self._codes != -1 + if not good.all(): + if skipna and good.any(): + pointer = self._codes[good].max() + else: + return np.nan + else: + pointer = self._codes.max() + return self._wrap_reduction_result(None, pointer) + + def mode(self, dropna: bool = True) -> Categorical: + """ + Returns the mode(s) of the Categorical. + + Always returns `Categorical` even if only one value. + + Parameters + ---------- + dropna : bool, default True + Don't consider counts of NaN/NaT. + + Returns + ------- + modes : `Categorical` (sorted) + """ + warn( + "Categorical.mode is deprecated and will be removed in a future version. " + "Use Series.mode instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._mode(dropna=dropna) + + def _mode(self, dropna: bool = True) -> Categorical: + codes = self._codes + if dropna: + good = self._codes != -1 + codes = self._codes[good] + + codes = htable.mode(codes, dropna) + codes.sort() + codes = coerce_indexer_dtype(codes, self.dtype.categories) + return self._from_backing_data(codes) + + # ------------------------------------------------------------------ + # ExtensionArray Interface + + def unique(self): + """ + Return the ``Categorical`` which ``categories`` and ``codes`` are + unique. + + .. versionchanged:: 1.3.0 + + Previously, unused categories were dropped from the new categories. + + Returns + ------- + Categorical + + See Also + -------- + pandas.unique + CategoricalIndex.unique + Series.unique : Return unique values of Series object. + + Examples + -------- + >>> pd.Categorical(list("baabc")).unique() + ['b', 'a', 'c'] + Categories (3, object): ['a', 'b', 'c'] + >>> pd.Categorical(list("baab"), categories=list("abc"), ordered=True).unique() + ['b', 'a'] + Categories (3, object): ['a' < 'b' < 'c'] + """ + unique_codes = unique1d(self.codes) + return self._from_backing_data(unique_codes) + + def _values_for_factorize(self): + return self._ndarray, -1 + + @classmethod + def _from_factorized(cls, uniques, original): + return original._constructor( + original.categories.take(uniques), dtype=original.dtype + ) + + def equals(self, other: object) -> bool: + """ + Returns True if categorical arrays are equal. + + Parameters + ---------- + other : `Categorical` + + Returns + ------- + bool + """ + if not isinstance(other, Categorical): + return False + elif self._categories_match_up_to_permutation(other): + other = self._encode_with_my_categories(other) + return np.array_equal(self._codes, other._codes) + return False + + @classmethod + def _concat_same_type( + cls: type[CategoricalT], to_concat: Sequence[CategoricalT], axis: int = 0 + ) -> CategoricalT: + from pandas.core.dtypes.concat import union_categoricals + + first = to_concat[0] + if axis >= first.ndim: + raise ValueError( + f"axis {axis} is out of bounds for array of dimension {first.ndim}" + ) + + if axis == 1: + # Flatten, concatenate then reshape + if not all(x.ndim == 2 for x in to_concat): + raise ValueError + + # pass correctly-shaped to union_categoricals + tc_flat = [] + for obj in to_concat: + tc_flat.extend([obj[:, i] for i in range(obj.shape[1])]) + + res_flat = cls._concat_same_type(tc_flat, axis=0) + + result = res_flat.reshape(len(first), -1, order="F") + return result + + result = union_categoricals(to_concat) + return result + + # ------------------------------------------------------------------ + + def _encode_with_my_categories(self, other: Categorical) -> Categorical: + """ + Re-encode another categorical using this Categorical's categories. + + Notes + ----- + This assumes we have already checked + self._categories_match_up_to_permutation(other). + """ + # Indexing on codes is more efficient if categories are the same, + # so we can apply some optimizations based on the degree of + # dtype-matching. + codes = recode_for_categories( + other.codes, other.categories, self.categories, copy=False + ) + return self._from_backing_data(codes) + + def _categories_match_up_to_permutation(self, other: Categorical) -> bool: + """ + Returns True if categoricals are the same dtype + same categories, and same ordered + + Parameters + ---------- + other : Categorical + + Returns + ------- + bool + """ + return hash(self.dtype) == hash(other.dtype) + + def is_dtype_equal(self, other) -> bool: + warn( + "Categorical.is_dtype_equal is deprecated and will be removed " + "in a future version", + FutureWarning, + stacklevel=find_stack_level(), + ) + try: + return self._categories_match_up_to_permutation(other) + except (AttributeError, TypeError): + return False + + def describe(self): + """ + Describes this Categorical + + Returns + ------- + description: `DataFrame` + A dataframe with frequency and counts by category. + """ + counts = self.value_counts(dropna=False) + freqs = counts / counts.sum() + + from pandas import Index + from pandas.core.reshape.concat import concat + + result = concat([counts, freqs], axis=1) + result.columns = Index(["counts", "freqs"]) + result.index.name = "categories" + + return result + + def isin(self, values) -> npt.NDArray[np.bool_]: + """ + Check whether `values` are contained in Categorical. + + Return a boolean NumPy Array showing whether each element in + the Categorical matches an element in the passed sequence of + `values` exactly. + + Parameters + ---------- + values : set or list-like + The sequence of values to test. Passing in a single string will + raise a ``TypeError``. Instead, turn a single string into a + list of one element. + + Returns + ------- + np.ndarray[bool] + + Raises + ------ + TypeError + * If `values` is not a set or list-like + + See Also + -------- + pandas.Series.isin : Equivalent method on Series. + + Examples + -------- + >>> s = pd.Categorical(['lama', 'cow', 'lama', 'beetle', 'lama', + ... 'hippo']) + >>> s.isin(['cow', 'lama']) + array([ True, True, True, False, True, False]) + + Passing a single string as ``s.isin('lama')`` will raise an error. Use + a list of one element instead: + + >>> s.isin(['lama']) + array([ True, False, True, False, True, False]) + """ + if not is_list_like(values): + values_type = type(values).__name__ + raise TypeError( + "only list-like objects are allowed to be passed " + f"to isin(), you passed a [{values_type}]" + ) + values = sanitize_array(values, None, None) + null_mask = np.asarray(isna(values)) + code_values = self.categories.get_indexer(values) + code_values = code_values[null_mask | (code_values >= 0)] + return algorithms.isin(self.codes, code_values) + + def replace(self, to_replace, value, inplace: bool = False): + """ + Replaces all instances of one value with another + + Parameters + ---------- + to_replace: object + The value to be replaced + + value: object + The value to replace it with + + inplace: bool + Whether the operation is done in-place + + Returns + ------- + None if inplace is True, otherwise the new Categorical after replacement + + + Examples + -------- + >>> s = pd.Categorical([1, 2, 1, 3]) + >>> s.replace(1, 3) + [3, 2, 3, 3] + Categories (2, int64): [2, 3] + """ + # GH#44929 deprecation + warn( + "Categorical.replace is deprecated and will be removed in a future " + "version. Use Series.replace directly instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._replace(to_replace=to_replace, value=value, inplace=inplace) + + def _replace(self, *, to_replace, value, inplace: bool = False): + inplace = validate_bool_kwarg(inplace, "inplace") + cat = self if inplace else self.copy() + + # build a dict of (to replace -> value) pairs + if is_list_like(to_replace): + # if to_replace is list-like and value is scalar + replace_dict = {replace_value: value for replace_value in to_replace} + else: + # if both to_replace and value are scalar + replace_dict = {to_replace: value} + + # other cases, like if both to_replace and value are list-like or if + # to_replace is a dict, are handled separately in NDFrame + for replace_value, new_value in replace_dict.items(): + if new_value == replace_value: + continue + if replace_value in cat.categories: + if isna(new_value): + with catch_warnings(): + simplefilter("ignore") + cat.remove_categories(replace_value, inplace=True) + continue + + categories = cat.categories.tolist() + index = categories.index(replace_value) + + if new_value in cat.categories: + value_index = categories.index(new_value) + cat._codes[cat._codes == index] = value_index + with catch_warnings(): + simplefilter("ignore") + cat.remove_categories(replace_value, inplace=True) + else: + categories[index] = new_value + with catch_warnings(): + simplefilter("ignore") + cat.rename_categories(categories, inplace=True) + if not inplace: + return cat + + # ------------------------------------------------------------------------ + # String methods interface + def _str_map( + self, f, na_value=np.nan, dtype=np.dtype("object"), convert: bool = True + ): + # Optimization to apply the callable `f` to the categories once + # and rebuild the result by `take`ing from the result with the codes. + # Returns the same type as the object-dtype implementation though. + from pandas.core.arrays import PandasArray + + categories = self.categories + codes = self.codes + result = PandasArray(categories.to_numpy())._str_map(f, na_value, dtype) + return take_nd(result, codes, fill_value=na_value) + + def _str_get_dummies(self, sep="|"): + # sep may not be in categories. Just bail on this. + from pandas.core.arrays import PandasArray + + return PandasArray(self.astype(str))._str_get_dummies(sep) + + +# The Series.cat accessor + + +@delegate_names( + delegate=Categorical, accessors=["categories", "ordered"], typ="property" +) +@delegate_names( + delegate=Categorical, + accessors=[ + "rename_categories", + "reorder_categories", + "add_categories", + "remove_categories", + "remove_unused_categories", + "set_categories", + "as_ordered", + "as_unordered", + ], + typ="method", +) +class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): + """ + Accessor object for categorical properties of the Series values. + + Be aware that assigning to `categories` is a inplace operation, while all + methods return new categorical data per default (but can be called with + `inplace=True`). + + Parameters + ---------- + data : Series or CategoricalIndex + + Examples + -------- + >>> s = pd.Series(list("abbccc")).astype("category") + >>> s + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): ['a', 'b', 'c'] + + >>> s.cat.categories + Index(['a', 'b', 'c'], dtype='object') + + >>> s.cat.rename_categories(list("cba")) + 0 c + 1 b + 2 b + 3 a + 4 a + 5 a + dtype: category + Categories (3, object): ['c', 'b', 'a'] + + >>> s.cat.reorder_categories(list("cba")) + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): ['c', 'b', 'a'] + + >>> s.cat.add_categories(["d", "e"]) + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (5, object): ['a', 'b', 'c', 'd', 'e'] + + >>> s.cat.remove_categories(["a", "c"]) + 0 NaN + 1 b + 2 b + 3 NaN + 4 NaN + 5 NaN + dtype: category + Categories (1, object): ['b'] + + >>> s1 = s.cat.add_categories(["d", "e"]) + >>> s1.cat.remove_unused_categories() + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): ['a', 'b', 'c'] + + >>> s.cat.set_categories(list("abcde")) + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (5, object): ['a', 'b', 'c', 'd', 'e'] + + >>> s.cat.as_ordered() + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): ['a' < 'b' < 'c'] + + >>> s.cat.as_unordered() + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): ['a', 'b', 'c'] + """ + + def __init__(self, data): + self._validate(data) + self._parent = data.values + self._index = data.index + self._name = data.name + self._freeze() + + @staticmethod + def _validate(data): + if not is_categorical_dtype(data.dtype): + raise AttributeError("Can only use .cat accessor with a 'category' dtype") + + def _delegate_property_get(self, name): + return getattr(self._parent, name) + + def _delegate_property_set(self, name, new_values): + return setattr(self._parent, name, new_values) + + @property + def codes(self): + """ + Return Series of codes as well as the index. + """ + from pandas import Series + + return Series(self._parent.codes, index=self._index) + + def _delegate_method(self, name, *args, **kwargs): + from pandas import Series + + method = getattr(self._parent, name) + res = method(*args, **kwargs) + if res is not None: + return Series(res, index=self._index, name=self._name) + + +# utility routines + + +def _get_codes_for_values(values, categories: Index) -> np.ndarray: + """ + utility routine to turn values into codes given the specified categories + + If `values` is known to be a Categorical, use recode_for_categories instead. + """ + if values.ndim > 1: + flat = values.ravel() + codes = _get_codes_for_values(flat, categories) + return codes.reshape(values.shape) + + if isinstance(categories.dtype, ExtensionDtype) and is_object_dtype(values): + # Support inferring the correct extension dtype from an array of + # scalar objects. e.g. + # Categorical(array[Period, Period], categories=PeriodIndex(...)) + cls = categories.dtype.construct_array_type() + values = maybe_cast_to_extension_array(cls, values) + + codes = categories.get_indexer_for(values) + return coerce_indexer_dtype(codes, categories) + + +def recode_for_categories( + codes: np.ndarray, old_categories, new_categories, copy: bool = True +) -> np.ndarray: + """ + Convert a set of codes for to a new set of categories + + Parameters + ---------- + codes : np.ndarray + old_categories, new_categories : Index + copy: bool, default True + Whether to copy if the codes are unchanged. + + Returns + ------- + new_codes : np.ndarray[np.int64] + + Examples + -------- + >>> old_cat = pd.Index(['b', 'a', 'c']) + >>> new_cat = pd.Index(['a', 'b']) + >>> codes = np.array([0, 1, 1, 2]) + >>> recode_for_categories(codes, old_cat, new_cat) + array([ 1, 0, 0, -1], dtype=int8) + """ + if len(old_categories) == 0: + # All null anyway, so just retain the nulls + if copy: + return codes.copy() + return codes + elif new_categories.equals(old_categories): + # Same categories, so no need to actually recode + if copy: + return codes.copy() + return codes + + indexer = coerce_indexer_dtype( + new_categories.get_indexer(old_categories), new_categories + ) + new_codes = take_nd(indexer, codes, fill_value=-1) + return new_codes + + +def factorize_from_iterable(values) -> tuple[np.ndarray, Index]: + """ + Factorize an input `values` into `categories` and `codes`. Preserves + categorical dtype in `categories`. + + Parameters + ---------- + values : list-like + + Returns + ------- + codes : ndarray + categories : Index + If `values` has a categorical dtype, then `categories` is + a CategoricalIndex keeping the categories and order of `values`. + """ + from pandas import CategoricalIndex + + if not is_list_like(values): + raise TypeError("Input must be list-like") + + if is_categorical_dtype(values): + values = extract_array(values) + # The Categorical we want to build has the same categories + # as values but its codes are by def [0, ..., len(n_categories) - 1] + cat_codes = np.arange(len(values.categories), dtype=values.codes.dtype) + cat = Categorical.from_codes(cat_codes, dtype=values.dtype) + + categories = CategoricalIndex(cat) + codes = values.codes + else: + # The value of ordered is irrelevant since we don't use cat as such, + # but only the resulting categories, the order of which is independent + # from ordered. Set ordered to False as default. See GH #15457 + cat = Categorical(values, ordered=False) + categories = cat.categories + codes = cat.codes + return codes, categories + + +def factorize_from_iterables(iterables) -> tuple[list[np.ndarray], list[Index]]: + """ + A higher-level wrapper over `factorize_from_iterable`. + + Parameters + ---------- + iterables : list-like of list-likes + + Returns + ------- + codes : list of ndarrays + categories : list of Indexes + + Notes + ----- + See `factorize_from_iterable` for more info. + """ + if len(iterables) == 0: + # For consistency, it should return two empty lists. + return [], [] + + codes, categories = zip(*(factorize_from_iterable(it) for it in iterables)) + return list(codes), list(categories) diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/datetimelike.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/datetimelike.py new file mode 100644 index 0000000..c845d9c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/datetimelike.py @@ -0,0 +1,1936 @@ +from __future__ import annotations + +from datetime import ( + datetime, + timedelta, +) +import operator +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Literal, + Sequence, + TypeVar, + Union, + cast, + final, + overload, +) +import warnings + +import numpy as np + +from pandas._libs import ( + algos, + lib, +) +from pandas._libs.tslibs import ( + BaseOffset, + IncompatibleFrequency, + NaT, + NaTType, + Period, + Resolution, + Tick, + Timestamp, + delta_to_nanoseconds, + iNaT, + ints_to_pydatetime, + to_offset, +) +from pandas._libs.tslibs.fields import ( + RoundTo, + round_nsint64, +) +from pandas._libs.tslibs.timestamps import integer_op_not_supported +from pandas._typing import ( + ArrayLike, + DatetimeLikeScalar, + Dtype, + DtypeObj, + NpDtype, + PositionalIndexer2D, + PositionalIndexerTuple, + ScalarIndexer, + SequenceIndexer, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import ( + AbstractMethodError, + NullFrequencyError, + PerformanceWarning, +) +from pandas.util._decorators import ( + Appender, + Substitution, + cache_readonly, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_all_strings, + is_categorical_dtype, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_datetime_or_timedelta_dtype, + is_dtype_equal, + is_float_dtype, + is_integer_dtype, + is_list_like, + is_object_dtype, + is_period_dtype, + is_string_dtype, + is_timedelta64_dtype, + is_unsigned_integer_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, +) + +from pandas.core import ( + nanops, + ops, +) +from pandas.core.algorithms import ( + checked_add_with_arr, + isin, + mode, + unique1d, +) +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays._mixins import ( + NDArrayBackedExtensionArray, + ravel_compat, +) +import pandas.core.common as com +from pandas.core.construction import ( + array as pd_array, + extract_array, +) +from pandas.core.indexers import ( + check_array_indexer, + check_setitem_lengths, +) +from pandas.core.ops.common import unpack_zerodim_and_defer +from pandas.core.ops.invalid import ( + invalid_comparison, + make_invalid_op, +) + +from pandas.tseries import frequencies + +if TYPE_CHECKING: + + from pandas.core.arrays import ( + DatetimeArray, + TimedeltaArray, + ) + +DTScalarOrNaT = Union[DatetimeLikeScalar, NaTType] +DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin") + + +class InvalidComparison(Exception): + """ + Raised by _validate_comparison_value to indicate to caller it should + return invalid_comparison. + """ + + pass + + +class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray): + """ + Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray + + Assumes that __new__/__init__ defines: + _data + _freq + + and that the inheriting class has methods: + _generate_range + """ + + # _infer_matches -> which infer_dtype strings are close enough to our own + _infer_matches: tuple[str, ...] + _is_recognized_dtype: Callable[[DtypeObj], bool] + _recognized_scalars: tuple[type, ...] + _ndarray: np.ndarray + + @cache_readonly + def _can_hold_na(self) -> bool: + return True + + def __init__(self, data, dtype: Dtype | None = None, freq=None, copy=False): + raise AbstractMethodError(self) + + @property + def _scalar_type(self) -> type[DatetimeLikeScalar]: + """ + The scalar associated with this datelike + + * PeriodArray : Period + * DatetimeArray : Timestamp + * TimedeltaArray : Timedelta + """ + raise AbstractMethodError(self) + + def _scalar_from_string(self, value: str) -> DTScalarOrNaT: + """ + Construct a scalar type from a string. + + Parameters + ---------- + value : str + + Returns + ------- + Period, Timestamp, or Timedelta, or NaT + Whatever the type of ``self._scalar_type`` is. + + Notes + ----- + This should call ``self._check_compatible_with`` before + unboxing the result. + """ + raise AbstractMethodError(self) + + def _unbox_scalar( + self, value: DTScalarOrNaT, setitem: bool = False + ) -> np.int64 | np.datetime64 | np.timedelta64: + """ + Unbox the integer value of a scalar `value`. + + Parameters + ---------- + value : Period, Timestamp, Timedelta, or NaT + Depending on subclass. + setitem : bool, default False + Whether to check compatibility with setitem strictness. + + Returns + ------- + int + + Examples + -------- + >>> self._unbox_scalar(Timedelta("10s")) # doctest: +SKIP + 10000000000 + """ + raise AbstractMethodError(self) + + def _check_compatible_with( + self, other: DTScalarOrNaT, setitem: bool = False + ) -> None: + """ + Verify that `self` and `other` are compatible. + + * DatetimeArray verifies that the timezones (if any) match + * PeriodArray verifies that the freq matches + * Timedelta has no verification + + In each case, NaT is considered compatible. + + Parameters + ---------- + other + setitem : bool, default False + For __setitem__ we may have stricter compatibility restrictions than + for comparisons. + + Raises + ------ + Exception + """ + raise AbstractMethodError(self) + + # ------------------------------------------------------------------ + # NDArrayBackedExtensionArray compat + + @cache_readonly + def _data(self) -> np.ndarray: + return self._ndarray + + # ------------------------------------------------------------------ + + def _box_func(self, x): + """ + box function to get object from internal representation + """ + raise AbstractMethodError(self) + + def _box_values(self, values) -> np.ndarray: + """ + apply box func to passed values + """ + return lib.map_infer(values, self._box_func, convert=False) + + def __iter__(self): + if self.ndim > 1: + return (self[n] for n in range(len(self))) + else: + return (self._box_func(v) for v in self.asi8) + + @property + def asi8(self) -> npt.NDArray[np.int64]: + """ + Integer representation of the values. + + Returns + ------- + ndarray + An ndarray with int64 dtype. + """ + # do not cache or you'll create a memory leak + return self._ndarray.view("i8") + + # ---------------------------------------------------------------- + # Rendering Methods + + def _format_native_types(self, *, na_rep="NaT", date_format=None): + """ + Helper method for astype when converting to strings. + + Returns + ------- + ndarray[str] + """ + raise AbstractMethodError(self) + + def _formatter(self, boxed: bool = False): + # TODO: Remove Datetime & DatetimeTZ formatters. + return "'{}'".format + + # ---------------------------------------------------------------- + # Array-Like / EA-Interface Methods + + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + # used for Timedelta/DatetimeArray, overwritten by PeriodArray + if is_object_dtype(dtype): + return np.array(list(self), dtype=object) + return self._ndarray + + @overload + def __getitem__(self, item: ScalarIndexer) -> DTScalarOrNaT: + ... + + @overload + def __getitem__( + self: DatetimeLikeArrayT, + item: SequenceIndexer | PositionalIndexerTuple, + ) -> DatetimeLikeArrayT: + ... + + def __getitem__( + self: DatetimeLikeArrayT, key: PositionalIndexer2D + ) -> DatetimeLikeArrayT | DTScalarOrNaT: + """ + This getitem defers to the underlying array, which by-definition can + only handle list-likes, slices, and integer scalars + """ + # Use cast as we know we will get back a DatetimeLikeArray or DTScalar, + # but skip evaluating the Union at runtime for performance + # (see https://github.com/pandas-dev/pandas/pull/44624) + result = cast( + "Union[DatetimeLikeArrayT, DTScalarOrNaT]", super().__getitem__(key) + ) + if lib.is_scalar(result): + return result + else: + # At this point we know the result is an array. + result = cast(DatetimeLikeArrayT, result) + result._freq = self._get_getitem_freq(key) + return result + + def _get_getitem_freq(self, key) -> BaseOffset | None: + """ + Find the `freq` attribute to assign to the result of a __getitem__ lookup. + """ + is_period = is_period_dtype(self.dtype) + if is_period: + freq = self.freq + elif self.ndim != 1: + freq = None + else: + key = check_array_indexer(self, key) # maybe ndarray[bool] -> slice + freq = None + if isinstance(key, slice): + if self.freq is not None and key.step is not None: + freq = key.step * self.freq + else: + freq = self.freq + elif key is Ellipsis: + # GH#21282 indexing with Ellipsis is similar to a full slice, + # should preserve `freq` attribute + freq = self.freq + elif com.is_bool_indexer(key): + new_key = lib.maybe_booleans_to_slice(key.view(np.uint8)) + if isinstance(new_key, slice): + return self._get_getitem_freq(new_key) + return freq + + # error: Argument 1 of "__setitem__" is incompatible with supertype + # "ExtensionArray"; supertype defines the argument type as "Union[int, + # ndarray]" + def __setitem__( # type: ignore[override] + self, + key: int | Sequence[int] | Sequence[bool] | slice, + value: NaTType | Any | Sequence[Any], + ) -> None: + # I'm fudging the types a bit here. "Any" above really depends + # on type(self). For PeriodArray, it's Period (or stuff coercible + # to a period in from_sequence). For DatetimeArray, it's Timestamp... + # I don't know if mypy can do that, possibly with Generics. + # https://mypy.readthedocs.io/en/latest/generics.html + no_op = check_setitem_lengths(key, value, self) + if no_op: + return + + super().__setitem__(key, value) + self._maybe_clear_freq() + + def _maybe_clear_freq(self): + # inplace operations like __setitem__ may invalidate the freq of + # DatetimeArray and TimedeltaArray + pass + + def astype(self, dtype, copy: bool = True): + # Some notes on cases we don't have to handle here in the base class: + # 1. PeriodArray.astype handles period -> period + # 2. DatetimeArray.astype handles conversion between tz. + # 3. DatetimeArray.astype handles datetime -> period + dtype = pandas_dtype(dtype) + + if is_object_dtype(dtype): + if self.dtype.kind == "M": + # *much* faster than self._box_values + # for e.g. test_get_loc_tuple_monotonic_above_size_cutoff + i8data = self.asi8.ravel() + converted = ints_to_pydatetime( + i8data, + # error: "DatetimeLikeArrayMixin" has no attribute "tz" + tz=self.tz, # type: ignore[attr-defined] + freq=self.freq, + box="timestamp", + ) + return converted.reshape(self.shape) + + return self._box_values(self.asi8.ravel()).reshape(self.shape) + + elif isinstance(dtype, ExtensionDtype): + return super().astype(dtype, copy=copy) + elif is_string_dtype(dtype): + return self._format_native_types() + elif is_integer_dtype(dtype): + # we deliberately ignore int32 vs. int64 here. + # See https://github.com/pandas-dev/pandas/issues/24381 for more. + values = self.asi8 + + if is_unsigned_integer_dtype(dtype): + # Again, we ignore int32 vs. int64 + values = values.view("uint64") + + if copy: + values = values.copy() + return values + elif ( + is_datetime_or_timedelta_dtype(dtype) + and not is_dtype_equal(self.dtype, dtype) + ) or is_float_dtype(dtype): + # disallow conversion between datetime/timedelta, + # and conversions for any datetimelike to float + msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" + raise TypeError(msg) + else: + return np.asarray(self, dtype=dtype) + + @overload + def view(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT: + ... + + @overload + def view(self, dtype: Literal["M8[ns]"]) -> DatetimeArray: + ... + + @overload + def view(self, dtype: Literal["m8[ns]"]) -> TimedeltaArray: + ... + + @overload + def view(self, dtype: Dtype | None = ...) -> ArrayLike: + ... + + def view(self, dtype: Dtype | None = None) -> ArrayLike: + # we need to explicitly call super() method as long as the `@overload`s + # are present in this file. + return super().view(dtype) + + # ------------------------------------------------------------------ + # ExtensionArray Interface + + @classmethod + def _concat_same_type( + cls: type[DatetimeLikeArrayT], + to_concat: Sequence[DatetimeLikeArrayT], + axis: int = 0, + ) -> DatetimeLikeArrayT: + new_obj = super()._concat_same_type(to_concat, axis) + + obj = to_concat[0] + dtype = obj.dtype + + new_freq = None + if is_period_dtype(dtype): + new_freq = obj.freq + elif axis == 0: + # GH 3232: If the concat result is evenly spaced, we can retain the + # original frequency + to_concat = [x for x in to_concat if len(x)] + + if obj.freq is not None and all(x.freq == obj.freq for x in to_concat): + pairs = zip(to_concat[:-1], to_concat[1:]) + if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs): + new_freq = obj.freq + + new_obj._freq = new_freq + return new_obj + + def copy(self: DatetimeLikeArrayT, order="C") -> DatetimeLikeArrayT: + # error: Unexpected keyword argument "order" for "copy" + new_obj = super().copy(order=order) # type: ignore[call-arg] + new_obj._freq = self.freq + return new_obj + + def _values_for_factorize(self): + # int64 instead of int ensures we have a "view" method + return self._ndarray, np.int64(iNaT) + + @classmethod + def _from_factorized( + cls: type[DatetimeLikeArrayT], values, original: DatetimeLikeArrayT + ) -> DatetimeLikeArrayT: + return cls(values, dtype=original.dtype) + + # ------------------------------------------------------------------ + # Validation Methods + # TODO: try to de-duplicate these, ensure identical behavior + + def _validate_comparison_value(self, other): + if isinstance(other, str): + try: + # GH#18435 strings get a pass from tzawareness compat + other = self._scalar_from_string(other) + except (ValueError, IncompatibleFrequency): + # failed to parse as Timestamp/Timedelta/Period + raise InvalidComparison(other) + + if isinstance(other, self._recognized_scalars) or other is NaT: + other = self._scalar_type(other) + try: + self._check_compatible_with(other) + except (TypeError, IncompatibleFrequency) as err: + # e.g. tzawareness mismatch + raise InvalidComparison(other) from err + + elif not is_list_like(other): + raise InvalidComparison(other) + + elif len(other) != len(self): + raise ValueError("Lengths must match") + + else: + try: + other = self._validate_listlike(other, allow_object=True) + self._check_compatible_with(other) + except (TypeError, IncompatibleFrequency) as err: + if is_object_dtype(getattr(other, "dtype", None)): + # We will have to operate element-wise + pass + else: + raise InvalidComparison(other) from err + + return other + + def _validate_shift_value(self, fill_value): + # TODO(2.0): once this deprecation is enforced, use _validate_scalar + if is_valid_na_for_dtype(fill_value, self.dtype): + fill_value = NaT + elif isinstance(fill_value, self._recognized_scalars): + fill_value = self._scalar_type(fill_value) + else: + new_fill: DatetimeLikeScalar + + # only warn if we're not going to raise + if self._scalar_type is Period and lib.is_integer(fill_value): + # kludge for #31971 since Period(integer) tries to cast to str + new_fill = Period._from_ordinal(fill_value, freq=self.freq) + else: + new_fill = self._scalar_type(fill_value) + + # stacklevel here is chosen to be correct when called from + # DataFrame.shift or Series.shift + warnings.warn( + f"Passing {type(fill_value)} to shift is deprecated and " + "will raise in a future version, pass " + f"{self._scalar_type.__name__} instead.", + FutureWarning, + # There is no way to hard-code the level since this might be + # reached directly or called from the Index or Block method + stacklevel=find_stack_level(), + ) + fill_value = new_fill + + return self._unbox(fill_value, setitem=True) + + def _validate_scalar( + self, + value, + *, + allow_listlike: bool = False, + setitem: bool = True, + unbox: bool = True, + ): + """ + Validate that the input value can be cast to our scalar_type. + + Parameters + ---------- + value : object + allow_listlike: bool, default False + When raising an exception, whether the message should say + listlike inputs are allowed. + setitem : bool, default True + Whether to check compatibility with setitem strictness. + unbox : bool, default True + Whether to unbox the result before returning. Note: unbox=False + skips the setitem compatibility check. + + Returns + ------- + self._scalar_type or NaT + """ + if isinstance(value, self._scalar_type): + pass + + elif isinstance(value, str): + # NB: Careful about tzawareness + try: + value = self._scalar_from_string(value) + except ValueError as err: + msg = self._validation_error_message(value, allow_listlike) + raise TypeError(msg) from err + + elif is_valid_na_for_dtype(value, self.dtype): + # GH#18295 + value = NaT + + elif isna(value): + # if we are dt64tz and value is dt64("NaT"), dont cast to NaT, + # or else we'll fail to raise in _unbox_scalar + msg = self._validation_error_message(value, allow_listlike) + raise TypeError(msg) + + elif isinstance(value, self._recognized_scalars): + value = self._scalar_type(value) + + else: + msg = self._validation_error_message(value, allow_listlike) + raise TypeError(msg) + + if not unbox: + # NB: In general NDArrayBackedExtensionArray will unbox here; + # this option exists to prevent a performance hit in + # TimedeltaIndex.get_loc + return value + return self._unbox_scalar(value, setitem=setitem) + + def _validation_error_message(self, value, allow_listlike: bool = False) -> str: + """ + Construct an exception message on validation error. + + Some methods allow only scalar inputs, while others allow either scalar + or listlike. + + Parameters + ---------- + allow_listlike: bool, default False + + Returns + ------- + str + """ + if allow_listlike: + msg = ( + f"value should be a '{self._scalar_type.__name__}', 'NaT', " + f"or array of those. Got '{type(value).__name__}' instead." + ) + else: + msg = ( + f"value should be a '{self._scalar_type.__name__}' or 'NaT'. " + f"Got '{type(value).__name__}' instead." + ) + return msg + + def _validate_listlike(self, value, allow_object: bool = False): + if isinstance(value, type(self)): + return value + + if isinstance(value, list) and len(value) == 0: + # We treat empty list as our own dtype. + return type(self)._from_sequence([], dtype=self.dtype) + + if hasattr(value, "dtype") and value.dtype == object: + # `array` below won't do inference if value is an Index or Series. + # so do so here. in the Index case, inferred_type may be cached. + if lib.infer_dtype(value) in self._infer_matches: + try: + value = type(self)._from_sequence(value) + except (ValueError, TypeError): + if allow_object: + return value + msg = self._validation_error_message(value, True) + raise TypeError(msg) + + # Do type inference if necessary up front (after unpacking PandasArray) + # e.g. we passed PeriodIndex.values and got an ndarray of Periods + value = extract_array(value, extract_numpy=True) + value = pd_array(value) + value = extract_array(value, extract_numpy=True) + + if is_all_strings(value): + # We got a StringArray + try: + # TODO: Could use from_sequence_of_strings if implemented + # Note: passing dtype is necessary for PeriodArray tests + value = type(self)._from_sequence(value, dtype=self.dtype) + except ValueError: + pass + + if is_categorical_dtype(value.dtype): + # e.g. we have a Categorical holding self.dtype + if is_dtype_equal(value.categories.dtype, self.dtype): + # TODO: do we need equal dtype or just comparable? + value = value._internal_get_values() + value = extract_array(value, extract_numpy=True) + + if allow_object and is_object_dtype(value.dtype): + pass + + elif not type(self)._is_recognized_dtype(value.dtype): + msg = self._validation_error_message(value, True) + raise TypeError(msg) + + return value + + def _validate_searchsorted_value(self, value): + if not is_list_like(value): + return self._validate_scalar(value, allow_listlike=True, setitem=False) + else: + value = self._validate_listlike(value) + + return self._unbox(value) + + def _validate_setitem_value(self, value): + if is_list_like(value): + value = self._validate_listlike(value) + else: + return self._validate_scalar(value, allow_listlike=True) + + return self._unbox(value, setitem=True) + + def _unbox( + self, other, setitem: bool = False + ) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarray: + """ + Unbox either a scalar with _unbox_scalar or an instance of our own type. + """ + if lib.is_scalar(other): + other = self._unbox_scalar(other, setitem=setitem) + else: + # same type as self + self._check_compatible_with(other, setitem=setitem) + other = other._ndarray + return other + + # ------------------------------------------------------------------ + # Additional array methods + # These are not part of the EA API, but we implement them because + # pandas assumes they're there. + + @ravel_compat + def map(self, mapper): + # TODO(GH-23179): Add ExtensionArray.map + # Need to figure out if we want ExtensionArray.map first. + # If so, then we can refactor IndexOpsMixin._map_values to + # a standalone function and call from here.. + # Else, just rewrite _map_infer_values to do the right thing. + from pandas import Index + + return Index(self).map(mapper).array + + def isin(self, values) -> npt.NDArray[np.bool_]: + """ + Compute boolean array of whether each value is found in the + passed set of values. + + Parameters + ---------- + values : set or sequence of values + + Returns + ------- + ndarray[bool] + """ + if not hasattr(values, "dtype"): + values = np.asarray(values) + + if values.dtype.kind in ["f", "i", "u", "c"]: + # TODO: de-duplicate with equals, validate_comparison_value + return np.zeros(self.shape, dtype=bool) + + if not isinstance(values, type(self)): + inferable = [ + "timedelta", + "timedelta64", + "datetime", + "datetime64", + "date", + "period", + ] + if values.dtype == object: + inferred = lib.infer_dtype(values, skipna=False) + if inferred not in inferable: + if inferred == "string": + pass + + elif "mixed" in inferred: + return isin(self.astype(object), values) + else: + return np.zeros(self.shape, dtype=bool) + + try: + values = type(self)._from_sequence(values) + except ValueError: + return isin(self.astype(object), values) + + try: + self._check_compatible_with(values) + except (TypeError, ValueError): + # Includes tzawareness mismatch and IncompatibleFrequencyError + return np.zeros(self.shape, dtype=bool) + + return isin(self.asi8, values.asi8) + + # ------------------------------------------------------------------ + # Null Handling + + def isna(self) -> npt.NDArray[np.bool_]: + return self._isnan + + @property # NB: override with cache_readonly in immutable subclasses + def _isnan(self) -> npt.NDArray[np.bool_]: + """ + return if each value is nan + """ + return self.asi8 == iNaT + + @property # NB: override with cache_readonly in immutable subclasses + def _hasna(self) -> bool: + """ + return if I have any nans; enables various perf speedups + """ + return bool(self._isnan.any()) + + def _maybe_mask_results( + self, result: np.ndarray, fill_value=iNaT, convert=None + ) -> np.ndarray: + """ + Parameters + ---------- + result : np.ndarray + fill_value : object, default iNaT + convert : str, dtype or None + + Returns + ------- + result : ndarray with values replace by the fill_value + + mask the result if needed, convert to the provided dtype if its not + None + + This is an internal routine. + """ + if self._hasna: + if convert: + result = result.astype(convert) + if fill_value is None: + fill_value = np.nan + np.putmask(result, self._isnan, fill_value) + return result + + # ------------------------------------------------------------------ + # Frequency Properties/Methods + + @property + def freq(self): + """ + Return the frequency object if it is set, otherwise None. + """ + return self._freq + + @freq.setter + def freq(self, value): + if value is not None: + value = to_offset(value) + self._validate_frequency(self, value) + + if self.ndim > 1: + raise ValueError("Cannot set freq with ndim > 1") + + self._freq = value + + @property + def freqstr(self) -> str | None: + """ + Return the frequency object as a string if its set, otherwise None. + """ + if self.freq is None: + return None + return self.freq.freqstr + + @property # NB: override with cache_readonly in immutable subclasses + def inferred_freq(self) -> str | None: + """ + Tries to return a string representing a frequency guess, + generated by infer_freq. Returns None if it can't autodetect the + frequency. + """ + if self.ndim != 1: + return None + try: + return frequencies.infer_freq(self) + except ValueError: + return None + + @property # NB: override with cache_readonly in immutable subclasses + def _resolution_obj(self) -> Resolution | None: + freqstr = self.freqstr + if freqstr is None: + return None + try: + return Resolution.get_reso_from_freq(freqstr) + except KeyError: + return None + + @property # NB: override with cache_readonly in immutable subclasses + def resolution(self) -> str: + """ + Returns day, hour, minute, second, millisecond or microsecond + """ + # error: Item "None" of "Optional[Any]" has no attribute "attrname" + return self._resolution_obj.attrname # type: ignore[union-attr] + + @classmethod + def _validate_frequency(cls, index, freq, **kwargs): + """ + Validate that a frequency is compatible with the values of a given + Datetime Array/Index or Timedelta Array/Index + + Parameters + ---------- + index : DatetimeIndex or TimedeltaIndex + The index on which to determine if the given frequency is valid + freq : DateOffset + The frequency to validate + """ + # TODO: this is not applicable to PeriodArray, move to correct Mixin + inferred = index.inferred_freq + if index.size == 0 or inferred == freq.freqstr: + return None + + try: + on_freq = cls._generate_range( + start=index[0], end=None, periods=len(index), freq=freq, **kwargs + ) + if not np.array_equal(index.asi8, on_freq.asi8): + raise ValueError + except ValueError as e: + if "non-fixed" in str(e): + # non-fixed frequencies are not meaningful for timedelta64; + # we retain that error message + raise e + # GH#11587 the main way this is reached is if the `np.array_equal` + # check above is False. This can also be reached if index[0] + # is `NaT`, in which case the call to `cls._generate_range` will + # raise a ValueError, which we re-raise with a more targeted + # message. + raise ValueError( + f"Inferred frequency {inferred} from passed values " + f"does not conform to passed frequency {freq.freqstr}" + ) from e + + @classmethod + def _generate_range( + cls: type[DatetimeLikeArrayT], start, end, periods, freq, *args, **kwargs + ) -> DatetimeLikeArrayT: + raise AbstractMethodError(cls) + + # monotonicity/uniqueness properties are called via frequencies.infer_freq, + # see GH#23789 + + @property + def _is_monotonic_increasing(self) -> bool: + return algos.is_monotonic(self.asi8, timelike=True)[0] + + @property + def _is_monotonic_decreasing(self) -> bool: + return algos.is_monotonic(self.asi8, timelike=True)[1] + + @property + def _is_unique(self) -> bool: + return len(unique1d(self.asi8.ravel("K"))) == self.size + + # ------------------------------------------------------------------ + # Arithmetic Methods + + def _cmp_method(self, other, op): + if self.ndim > 1 and getattr(other, "shape", None) == self.shape: + # TODO: handle 2D-like listlikes + return op(self.ravel(), other.ravel()).reshape(self.shape) + + try: + other = self._validate_comparison_value(other) + except InvalidComparison: + return invalid_comparison(self, other, op) + + dtype = getattr(other, "dtype", None) + if is_object_dtype(dtype): + # We have to use comp_method_OBJECT_ARRAY instead of numpy + # comparison otherwise it would fail to raise when + # comparing tz-aware and tz-naive + with np.errstate(all="ignore"): + result = ops.comp_method_OBJECT_ARRAY( + op, np.asarray(self.astype(object)), other + ) + return result + + other_vals = self._unbox(other) + # GH#37462 comparison on i8 values is almost 2x faster than M8/m8 + result = op(self._ndarray.view("i8"), other_vals.view("i8")) + + o_mask = isna(other) + mask = self._isnan | o_mask + if mask.any(): + nat_result = op is operator.ne + np.putmask(result, mask, nat_result) + + return result + + # pow is invalid for all three subclasses; TimedeltaArray will override + # the multiplication and division ops + __pow__ = make_invalid_op("__pow__") + __rpow__ = make_invalid_op("__rpow__") + __mul__ = make_invalid_op("__mul__") + __rmul__ = make_invalid_op("__rmul__") + __truediv__ = make_invalid_op("__truediv__") + __rtruediv__ = make_invalid_op("__rtruediv__") + __floordiv__ = make_invalid_op("__floordiv__") + __rfloordiv__ = make_invalid_op("__rfloordiv__") + __mod__ = make_invalid_op("__mod__") + __rmod__ = make_invalid_op("__rmod__") + __divmod__ = make_invalid_op("__divmod__") + __rdivmod__ = make_invalid_op("__rdivmod__") + + def _add_datetimelike_scalar(self, other): + # Overridden by TimedeltaArray + raise TypeError(f"cannot add {type(self).__name__} and {type(other).__name__}") + + _add_datetime_arraylike = _add_datetimelike_scalar + + def _sub_datetimelike_scalar(self, other): + # Overridden by DatetimeArray + assert other is not NaT + raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}") + + _sub_datetime_arraylike = _sub_datetimelike_scalar + + def _sub_period(self, other): + # Overridden by PeriodArray + raise TypeError(f"cannot subtract Period from a {type(self).__name__}") + + def _add_period(self, other: Period): + # Overridden by TimedeltaArray + raise TypeError(f"cannot add Period to a {type(self).__name__}") + + def _add_offset(self, offset): + raise AbstractMethodError(self) + + def _add_timedeltalike_scalar(self, other): + """ + Add a delta of a timedeltalike + + Returns + ------- + Same type as self + """ + if isna(other): + # i.e np.timedelta64("NaT"), not recognized by delta_to_nanoseconds + new_values = np.empty(self.shape, dtype="i8") + new_values.fill(iNaT) + return type(self)(new_values, dtype=self.dtype) + + inc = delta_to_nanoseconds(other) + new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan) + new_values = new_values.view("i8") + new_values = self._maybe_mask_results(new_values) + new_values = new_values.view(self._ndarray.dtype) + + new_freq = None + if isinstance(self.freq, Tick) or is_period_dtype(self.dtype): + # adding a scalar preserves freq + new_freq = self.freq + + # error: Unexpected keyword argument "freq" for "_simple_new" of "NDArrayBacked" + return type(self)._simple_new( # type: ignore[call-arg] + new_values, dtype=self.dtype, freq=new_freq + ) + + def _add_timedelta_arraylike(self, other): + """ + Add a delta of a TimedeltaIndex + + Returns + ------- + Same type as self + """ + # overridden by PeriodArray + + if len(self) != len(other): + raise ValueError("cannot add indices of unequal length") + + if isinstance(other, np.ndarray): + # ndarray[timedelta64]; wrap in TimedeltaIndex for op + from pandas.core.arrays import TimedeltaArray + + other = TimedeltaArray._from_sequence(other) + + self_i8 = self.asi8 + other_i8 = other.asi8 + new_values = checked_add_with_arr( + self_i8, other_i8, arr_mask=self._isnan, b_mask=other._isnan + ) + if self._hasna or other._hasna: + mask = self._isnan | other._isnan + np.putmask(new_values, mask, iNaT) + + return type(self)(new_values, dtype=self.dtype) + + @final + def _add_nat(self): + """ + Add pd.NaT to self + """ + if is_period_dtype(self.dtype): + raise TypeError( + f"Cannot add {type(self).__name__} and {type(NaT).__name__}" + ) + + # GH#19124 pd.NaT is treated like a timedelta for both timedelta + # and datetime dtypes + result = np.empty(self.shape, dtype=np.int64) + result.fill(iNaT) + return type(self)(result, dtype=self.dtype, freq=None) + + @final + def _sub_nat(self): + """ + Subtract pd.NaT from self + """ + # GH#19124 Timedelta - datetime is not in general well-defined. + # We make an exception for pd.NaT, which in this case quacks + # like a timedelta. + # For datetime64 dtypes by convention we treat NaT as a datetime, so + # this subtraction returns a timedelta64 dtype. + # For period dtype, timedelta64 is a close-enough return dtype. + result = np.empty(self.shape, dtype=np.int64) + result.fill(iNaT) + return result.view("timedelta64[ns]") + + def _sub_period_array(self, other): + # Overridden by PeriodArray + raise TypeError( + f"cannot subtract {other.dtype}-dtype from {type(self).__name__}" + ) + + def _addsub_object_array(self, other: np.ndarray, op): + """ + Add or subtract array-like of DateOffset objects + + Parameters + ---------- + other : np.ndarray[object] + op : {operator.add, operator.sub} + + Returns + ------- + result : same class as self + """ + assert op in [operator.add, operator.sub] + if len(other) == 1 and self.ndim == 1: + # If both 1D then broadcasting is unambiguous + return op(self, other[0]) + + warnings.warn( + "Adding/subtracting object-dtype array to " + f"{type(self).__name__} not vectorized.", + PerformanceWarning, + ) + + # Caller is responsible for broadcasting if necessary + assert self.shape == other.shape, (self.shape, other.shape) + + with warnings.catch_warnings(): + # filter out warnings about Timestamp.freq + warnings.filterwarnings("ignore", category=FutureWarning) + res_values = op(self.astype("O"), np.asarray(other)) + + result = pd_array(res_values.ravel()) + # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no attribute + # "reshape" + result = extract_array( + result, extract_numpy=True + ).reshape( # type: ignore[union-attr] + self.shape + ) + return result + + def _time_shift( + self: DatetimeLikeArrayT, periods: int, freq=None + ) -> DatetimeLikeArrayT: + """ + Shift each value by `periods`. + + Note this is different from ExtensionArray.shift, which + shifts the *position* of each element, padding the end with + missing values. + + Parameters + ---------- + periods : int + Number of periods to shift by. + freq : pandas.DateOffset, pandas.Timedelta, or str + Frequency increment to shift by. + """ + if freq is not None and freq != self.freq: + if isinstance(freq, str): + freq = to_offset(freq) + offset = periods * freq + return self + offset + + if periods == 0 or len(self) == 0: + # GH#14811 empty case + return self.copy() + + if self.freq is None: + raise NullFrequencyError("Cannot shift with no freq") + + start = self[0] + periods * self.freq + end = self[-1] + periods * self.freq + + # Note: in the DatetimeTZ case, _generate_range will infer the + # appropriate timezone from `start` and `end`, so tz does not need + # to be passed explicitly. + return self._generate_range(start=start, end=end, periods=None, freq=self.freq) + + @unpack_zerodim_and_defer("__add__") + def __add__(self, other): + other_dtype = getattr(other, "dtype", None) + + # scalar others + if other is NaT: + result = self._add_nat() + elif isinstance(other, (Tick, timedelta, np.timedelta64)): + result = self._add_timedeltalike_scalar(other) + elif isinstance(other, BaseOffset): + # specifically _not_ a Tick + result = self._add_offset(other) + elif isinstance(other, (datetime, np.datetime64)): + result = self._add_datetimelike_scalar(other) + elif isinstance(other, Period) and is_timedelta64_dtype(self.dtype): + result = self._add_period(other) + elif lib.is_integer(other): + # This check must come after the check for np.timedelta64 + # as is_integer returns True for these + if not is_period_dtype(self.dtype): + raise integer_op_not_supported(self) + result = self._time_shift(other) + + # array-like others + elif is_timedelta64_dtype(other_dtype): + # TimedeltaIndex, ndarray[timedelta64] + result = self._add_timedelta_arraylike(other) + elif is_object_dtype(other_dtype): + # e.g. Array/Index of DateOffset objects + result = self._addsub_object_array(other, operator.add) + elif is_datetime64_dtype(other_dtype) or is_datetime64tz_dtype(other_dtype): + # DatetimeIndex, ndarray[datetime64] + return self._add_datetime_arraylike(other) + elif is_integer_dtype(other_dtype): + if not is_period_dtype(self.dtype): + raise integer_op_not_supported(self) + result = self._addsub_int_array(other, operator.add) + else: + # Includes Categorical, other ExtensionArrays + # For PeriodDtype, if self is a TimedeltaArray and other is a + # PeriodArray with a timedelta-like (i.e. Tick) freq, this + # operation is valid. Defer to the PeriodArray implementation. + # In remaining cases, this will end up raising TypeError. + return NotImplemented + + if isinstance(result, np.ndarray) and is_timedelta64_dtype(result.dtype): + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray(result) + return result + + def __radd__(self, other): + # alias for __add__ + return self.__add__(other) + + @unpack_zerodim_and_defer("__sub__") + def __sub__(self, other): + + other_dtype = getattr(other, "dtype", None) + + # scalar others + if other is NaT: + result = self._sub_nat() + elif isinstance(other, (Tick, timedelta, np.timedelta64)): + result = self._add_timedeltalike_scalar(-other) + elif isinstance(other, BaseOffset): + # specifically _not_ a Tick + result = self._add_offset(-other) + elif isinstance(other, (datetime, np.datetime64)): + result = self._sub_datetimelike_scalar(other) + elif lib.is_integer(other): + # This check must come after the check for np.timedelta64 + # as is_integer returns True for these + if not is_period_dtype(self.dtype): + raise integer_op_not_supported(self) + result = self._time_shift(-other) + + elif isinstance(other, Period): + result = self._sub_period(other) + + # array-like others + elif is_timedelta64_dtype(other_dtype): + # TimedeltaIndex, ndarray[timedelta64] + result = self._add_timedelta_arraylike(-other) + elif is_object_dtype(other_dtype): + # e.g. Array/Index of DateOffset objects + result = self._addsub_object_array(other, operator.sub) + elif is_datetime64_dtype(other_dtype) or is_datetime64tz_dtype(other_dtype): + # DatetimeIndex, ndarray[datetime64] + result = self._sub_datetime_arraylike(other) + elif is_period_dtype(other_dtype): + # PeriodIndex + result = self._sub_period_array(other) + elif is_integer_dtype(other_dtype): + if not is_period_dtype(self.dtype): + raise integer_op_not_supported(self) + result = self._addsub_int_array(other, operator.sub) + else: + # Includes ExtensionArrays, float_dtype + return NotImplemented + + if isinstance(result, np.ndarray) and is_timedelta64_dtype(result.dtype): + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray(result) + return result + + def __rsub__(self, other): + other_dtype = getattr(other, "dtype", None) + + if is_datetime64_any_dtype(other_dtype) and is_timedelta64_dtype(self.dtype): + # ndarray[datetime64] cannot be subtracted from self, so + # we need to wrap in DatetimeArray/Index and flip the operation + if lib.is_scalar(other): + # i.e. np.datetime64 object + return Timestamp(other) - self + if not isinstance(other, DatetimeLikeArrayMixin): + # Avoid down-casting DatetimeIndex + from pandas.core.arrays import DatetimeArray + + other = DatetimeArray(other) + return other - self + elif ( + is_datetime64_any_dtype(self.dtype) + and hasattr(other, "dtype") + and not is_datetime64_any_dtype(other.dtype) + ): + # GH#19959 datetime - datetime is well-defined as timedelta, + # but any other type - datetime is not well-defined. + raise TypeError( + f"cannot subtract {type(self).__name__} from {type(other).__name__}" + ) + elif is_period_dtype(self.dtype) and is_timedelta64_dtype(other_dtype): + # TODO: Can we simplify/generalize these cases at all? + raise TypeError(f"cannot subtract {type(self).__name__} from {other.dtype}") + elif is_timedelta64_dtype(self.dtype): + self = cast("TimedeltaArray", self) + return (-self) + other + + # We get here with e.g. datetime objects + return -(self - other) + + def __iadd__(self, other): + result = self + other + self[:] = result[:] + + if not is_period_dtype(self.dtype): + # restore freq, which is invalidated by setitem + self._freq = result.freq + return self + + def __isub__(self, other): + result = self - other + self[:] = result[:] + + if not is_period_dtype(self.dtype): + # restore freq, which is invalidated by setitem + self._freq = result.freq + return self + + # -------------------------------------------------------------- + # Reductions + + def min(self, *, axis: int | None = None, skipna: bool = True, **kwargs): + """ + Return the minimum value of the Array or minimum along + an axis. + + See Also + -------- + numpy.ndarray.min + Index.min : Return the minimum value in an Index. + Series.min : Return the minimum value in a Series. + """ + nv.validate_min((), kwargs) + nv.validate_minmax_axis(axis, self.ndim) + + if is_period_dtype(self.dtype): + # pass datetime64 values to nanops to get correct NaT semantics + result = nanops.nanmin( + self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna + ) + if result is NaT: + return NaT + result = result.view("i8") + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def max(self, *, axis: int | None = None, skipna: bool = True, **kwargs): + """ + Return the maximum value of the Array or maximum along + an axis. + + See Also + -------- + numpy.ndarray.max + Index.max : Return the maximum value in an Index. + Series.max : Return the maximum value in a Series. + """ + nv.validate_max((), kwargs) + nv.validate_minmax_axis(axis, self.ndim) + + if is_period_dtype(self.dtype): + # pass datetime64 values to nanops to get correct NaT semantics + result = nanops.nanmax( + self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna + ) + if result is NaT: + return result + result = result.view("i8") + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def mean(self, *, skipna: bool = True, axis: int | None = 0): + """ + Return the mean value of the Array. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + skipna : bool, default True + Whether to ignore any NaT elements. + axis : int, optional, default 0 + + Returns + ------- + scalar + Timestamp or Timedelta. + + See Also + -------- + numpy.ndarray.mean : Returns the average of array elements along a given axis. + Series.mean : Return the mean value in a Series. + + Notes + ----- + mean is only defined for Datetime and Timedelta dtypes, not for Period. + """ + if is_period_dtype(self.dtype): + # See discussion in GH#24757 + raise TypeError( + f"mean is not implemented for {type(self).__name__} since the " + "meaning is ambiguous. An alternative is " + "obj.to_timestamp(how='start').mean()" + ) + + result = nanops.nanmean( + self._ndarray, axis=axis, skipna=skipna, mask=self.isna() + ) + return self._wrap_reduction_result(axis, result) + + def median(self, *, axis: int | None = None, skipna: bool = True, **kwargs): + nv.validate_median((), kwargs) + + if axis is not None and abs(axis) >= self.ndim: + raise ValueError("abs(axis) must be less than ndim") + + if is_period_dtype(self.dtype): + # pass datetime64 values to nanops to get correct NaT semantics + result = nanops.nanmedian( + self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna + ) + result = result.view("i8") + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def _mode(self, dropna: bool = True): + values = self + if dropna: + mask = values.isna() + values = values[~mask] + + i8modes = mode(values.view("i8")) + npmodes = i8modes.view(self._ndarray.dtype) + npmodes = cast(np.ndarray, npmodes) + return self._from_backing_data(npmodes) + + +class DatelikeOps(DatetimeLikeArrayMixin): + """ + Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex. + """ + + @Substitution( + URL="https://docs.python.org/3/library/datetime.html" + "#strftime-and-strptime-behavior" + ) + def strftime(self, date_format: str) -> npt.NDArray[np.object_]: + """ + Convert to Index using specified date_format. + + Return an Index of formatted strings specified by date_format, which + supports the same string format as the python standard library. Details + of the string format can be found in `python string format + doc <%(URL)s>`__. + + Parameters + ---------- + date_format : str + Date format string (e.g. "%%Y-%%m-%%d"). + + Returns + ------- + ndarray[object] + NumPy ndarray of formatted strings. + + See Also + -------- + to_datetime : Convert the given argument to datetime. + DatetimeIndex.normalize : Return DatetimeIndex with times to midnight. + DatetimeIndex.round : Round the DatetimeIndex to the specified freq. + DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq. + + Examples + -------- + >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"), + ... periods=3, freq='s') + >>> rng.strftime('%%B %%d, %%Y, %%r') + Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM', + 'March 10, 2018, 09:00:02 AM'], + dtype='object') + """ + result = self._format_native_types(date_format=date_format, na_rep=np.nan) + return result.astype(object, copy=False) + + +_round_doc = """ + Perform {op} operation on the data to the specified `freq`. + + Parameters + ---------- + freq : str or Offset + The frequency level to {op} the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See + :ref:`frequency aliases ` for + a list of possible `freq` values. + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + Only relevant for DatetimeIndex: + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False designates + a non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous + times. + + nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Returns + ------- + DatetimeIndex, TimedeltaIndex, or Series + Index of the same type for a DatetimeIndex or TimedeltaIndex, + or a Series with the same index for a Series. + + Raises + ------ + ValueError if the `freq` cannot be converted. + + Notes + ----- + If the timestamps have a timezone, {op}ing will take place relative to the + local ("wall") time and re-localized to the same timezone. When {op}ing + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + **DatetimeIndex** + + >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min') + >>> rng + DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00', + '2018-01-01 12:01:00'], + dtype='datetime64[ns]', freq='T') + """ + +_round_example = """>>> rng.round('H') + DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', + '2018-01-01 12:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.round("H") + 0 2018-01-01 12:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 12:00:00 + dtype: datetime64[ns] + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam") + + >>> rng_tz.floor("2H", ambiguous=False) + DatetimeIndex(['2021-10-31 02:00:00+01:00'], + dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + + >>> rng_tz.floor("2H", ambiguous=True) + DatetimeIndex(['2021-10-31 02:00:00+02:00'], + dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + """ + +_floor_example = """>>> rng.floor('H') + DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00', + '2018-01-01 12:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.floor("H") + 0 2018-01-01 11:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 12:00:00 + dtype: datetime64[ns] + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam") + + >>> rng_tz.floor("2H", ambiguous=False) + DatetimeIndex(['2021-10-31 02:00:00+01:00'], + dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + + >>> rng_tz.floor("2H", ambiguous=True) + DatetimeIndex(['2021-10-31 02:00:00+02:00'], + dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + """ + +_ceil_example = """>>> rng.ceil('H') + DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', + '2018-01-01 13:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.ceil("H") + 0 2018-01-01 12:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 13:00:00 + dtype: datetime64[ns] + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> rng_tz = pd.DatetimeIndex(["2021-10-31 01:30:00"], tz="Europe/Amsterdam") + + >>> rng_tz.ceil("H", ambiguous=False) + DatetimeIndex(['2021-10-31 02:00:00+01:00'], + dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + + >>> rng_tz.ceil("H", ambiguous=True) + DatetimeIndex(['2021-10-31 02:00:00+02:00'], + dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + """ + + +TimelikeOpsT = TypeVar("TimelikeOpsT", bound="TimelikeOps") + + +class TimelikeOps(DatetimeLikeArrayMixin): + """ + Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex. + """ + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + if ( + ufunc in [np.isnan, np.isinf, np.isfinite] + and len(inputs) == 1 + and inputs[0] is self + ): + # numpy 1.18 changed isinf and isnan to not raise on dt64/td64 + return getattr(ufunc, method)(self._ndarray, **kwargs) + + return super().__array_ufunc__(ufunc, method, *inputs, **kwargs) + + def _round(self, freq, mode, ambiguous, nonexistent): + # round the local times + if is_datetime64tz_dtype(self.dtype): + # operate on naive timestamps, then convert back to aware + self = cast("DatetimeArray", self) + naive = self.tz_localize(None) + result = naive._round(freq, mode, ambiguous, nonexistent) + return result.tz_localize( + self.tz, ambiguous=ambiguous, nonexistent=nonexistent + ) + + values = self.view("i8") + values = cast(np.ndarray, values) + nanos = to_offset(freq).nanos + result_i8 = round_nsint64(values, mode, nanos) + result = self._maybe_mask_results(result_i8, fill_value=iNaT) + result = result.view(self._ndarray.dtype) + return self._simple_new(result, dtype=self.dtype) + + @Appender((_round_doc + _round_example).format(op="round")) + def round(self, freq, ambiguous="raise", nonexistent="raise"): + return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent) + + @Appender((_round_doc + _floor_example).format(op="floor")) + def floor(self, freq, ambiguous="raise", nonexistent="raise"): + return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) + + @Appender((_round_doc + _ceil_example).format(op="ceil")) + def ceil(self, freq, ambiguous="raise", nonexistent="raise"): + return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) + + # -------------------------------------------------------------- + # Reductions + + def any(self, *, axis: int | None = None, skipna: bool = True): + # GH#34479 discussion of desired behavior long-term + return nanops.nanany(self._ndarray, axis=axis, skipna=skipna, mask=self.isna()) + + def all(self, *, axis: int | None = None, skipna: bool = True): + # GH#34479 discussion of desired behavior long-term + return nanops.nanall(self._ndarray, axis=axis, skipna=skipna, mask=self.isna()) + + # -------------------------------------------------------------- + # Frequency Methods + + def _maybe_clear_freq(self) -> None: + self._freq = None + + def _with_freq(self, freq): + """ + Helper to get a view on the same data, with a new freq. + + Parameters + ---------- + freq : DateOffset, None, or "infer" + + Returns + ------- + Same type as self + """ + # GH#29843 + if freq is None: + # Always valid + pass + elif len(self) == 0 and isinstance(freq, BaseOffset): + # Always valid. In the TimedeltaArray case, we assume this + # is a Tick offset. + pass + else: + # As an internal method, we can ensure this assertion always holds + assert freq == "infer" + freq = to_offset(self.inferred_freq) + + arr = self.view() + arr._freq = freq + return arr + + # -------------------------------------------------------------- + + def factorize(self, na_sentinel=-1, sort: bool = False): + if self.freq is not None: + # We must be unique, so can short-circuit (and retain freq) + codes = np.arange(len(self), dtype=np.intp) + uniques = self.copy() # TODO: copy or view? + if sort and self.freq.n < 0: + codes = codes[::-1] + uniques = uniques[::-1] + return codes, uniques + # FIXME: shouldn't get here; we are ignoring sort + return super().factorize(na_sentinel=na_sentinel) + + +# ------------------------------------------------------------------- +# Shared Constructor Helpers + + +def validate_periods(periods): + """ + If a `periods` argument is passed to the Datetime/Timedelta Array/Index + constructor, cast it to an integer. + + Parameters + ---------- + periods : None, float, int + + Returns + ------- + periods : None or int + + Raises + ------ + TypeError + if periods is None, float, or int + """ + if periods is not None: + if lib.is_float(periods): + periods = int(periods) + elif not lib.is_integer(periods): + raise TypeError(f"periods must be a number, got {periods}") + return periods + + +def validate_inferred_freq(freq, inferred_freq, freq_infer): + """ + If the user passes a freq and another freq is inferred from passed data, + require that they match. + + Parameters + ---------- + freq : DateOffset or None + inferred_freq : DateOffset or None + freq_infer : bool + + Returns + ------- + freq : DateOffset or None + freq_infer : bool + + Notes + ----- + We assume at this point that `maybe_infer_freq` has been called, so + `freq` is either a DateOffset object or None. + """ + if inferred_freq is not None: + if freq is not None and freq != inferred_freq: + raise ValueError( + f"Inferred frequency {inferred_freq} from passed " + "values does not conform to passed frequency " + f"{freq.freqstr}" + ) + elif freq is None: + freq = inferred_freq + freq_infer = False + + return freq, freq_infer + + +def maybe_infer_freq(freq): + """ + Comparing a DateOffset to the string "infer" raises, so we need to + be careful about comparisons. Make a dummy variable `freq_infer` to + signify the case where the given freq is "infer" and set freq to None + to avoid comparison trouble later on. + + Parameters + ---------- + freq : {DateOffset, None, str} + + Returns + ------- + freq : {DateOffset, None} + freq_infer : bool + Whether we should inherit the freq of passed data. + """ + freq_infer = False + if not isinstance(freq, BaseOffset): + # if a passed freq is None, don't infer automatically + if freq != "infer": + freq = to_offset(freq) + else: + freq_infer = True + freq = None + return freq, freq_infer diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/datetimes.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/datetimes.py new file mode 100644 index 0000000..00d5f11 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/datetimes.py @@ -0,0 +1,2596 @@ +from __future__ import annotations + +from datetime import ( + datetime, + time, + timedelta, + tzinfo, +) +from typing import ( + TYPE_CHECKING, + Literal, +) +import warnings + +import numpy as np + +from pandas._libs import ( + lib, + tslib, +) +from pandas._libs.arrays import NDArrayBacked +from pandas._libs.tslibs import ( + BaseOffset, + NaT, + NaTType, + Resolution, + Timestamp, + conversion, + fields, + get_resolution, + iNaT, + ints_to_pydatetime, + is_date_array_normalized, + normalize_i8_timestamps, + timezones, + to_offset, + tzconversion, +) +from pandas._typing import npt +from pandas.errors import PerformanceWarning +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_inclusive + +from pandas.core.dtypes.cast import astype_dt64_to_dt64tz +from pandas.core.dtypes.common import ( + DT64NS_DTYPE, + INT64_DTYPE, + is_bool_dtype, + is_categorical_dtype, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_float_dtype, + is_object_dtype, + is_period_dtype, + is_sparse, + is_string_dtype, + is_timedelta64_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.generic import ABCMultiIndex +from pandas.core.dtypes.missing import isna + +from pandas.core.algorithms import checked_add_with_arr +from pandas.core.arrays import ( + ExtensionArray, + datetimelike as dtl, +) +from pandas.core.arrays._ranges import generate_regular_range +from pandas.core.arrays.integer import IntegerArray +import pandas.core.common as com +from pandas.core.construction import extract_array + +from pandas.tseries.frequencies import get_period_alias +from pandas.tseries.offsets import ( + BDay, + Day, + Tick, +) + +if TYPE_CHECKING: + + from pandas import DataFrame + from pandas.core.arrays import ( + PeriodArray, + TimedeltaArray, + ) + +_midnight = time(0, 0) + + +def tz_to_dtype(tz): + """ + Return a datetime64[ns] dtype appropriate for the given timezone. + + Parameters + ---------- + tz : tzinfo or None + + Returns + ------- + np.dtype or Datetime64TZDType + """ + if tz is None: + return DT64NS_DTYPE + else: + return DatetimeTZDtype(tz=tz) + + +def _field_accessor(name: str, field: str, docstring=None): + def f(self): + values = self._local_timestamps() + + if field in self._bool_ops: + result: np.ndarray + + if field.endswith(("start", "end")): + freq = self.freq + month_kw = 12 + if freq: + kwds = freq.kwds + month_kw = kwds.get("startingMonth", kwds.get("month", 12)) + + result = fields.get_start_end_field( + values, field, self.freqstr, month_kw + ) + else: + result = fields.get_date_field(values, field) + + # these return a boolean by-definition + return result + + if field in self._object_ops: + result = fields.get_date_name_field(values, field) + result = self._maybe_mask_results(result, fill_value=None) + + else: + result = fields.get_date_field(values, field) + result = self._maybe_mask_results( + result, fill_value=None, convert="float64" + ) + + return result + + f.__name__ = name + f.__doc__ = docstring + return property(f) + + +class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): + """ + Pandas ExtensionArray for tz-naive or tz-aware datetime data. + + .. warning:: + + DatetimeArray is currently experimental, and its API may change + without warning. In particular, :attr:`DatetimeArray.dtype` is + expected to change to always be an instance of an ``ExtensionDtype`` + subclass. + + Parameters + ---------- + values : Series, Index, DatetimeArray, ndarray + The datetime data. + + For DatetimeArray `values` (or a Series or Index boxing one), + `dtype` and `freq` will be extracted from `values`. + + dtype : numpy.dtype or DatetimeTZDtype + Note that the only NumPy dtype allowed is 'datetime64[ns]'. + freq : str or Offset, optional + The frequency. + copy : bool, default False + Whether to copy the underlying array of values. + + Attributes + ---------- + None + + Methods + ------- + None + """ + + _typ = "datetimearray" + _scalar_type = Timestamp + _recognized_scalars = (datetime, np.datetime64) + _is_recognized_dtype = is_datetime64_any_dtype + _infer_matches = ("datetime", "datetime64", "date") + + # define my properties & methods for delegation + _bool_ops: list[str] = [ + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + "is_leap_year", + ] + _object_ops: list[str] = ["freq", "tz"] + _field_ops: list[str] = [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "weekofyear", + "week", + "weekday", + "dayofweek", + "day_of_week", + "dayofyear", + "day_of_year", + "quarter", + "days_in_month", + "daysinmonth", + "microsecond", + "nanosecond", + ] + _other_ops: list[str] = ["date", "time", "timetz"] + _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops + _other_ops + _datetimelike_methods: list[str] = [ + "to_period", + "tz_localize", + "tz_convert", + "normalize", + "strftime", + "round", + "floor", + "ceil", + "month_name", + "day_name", + ] + + # ndim is inherited from ExtensionArray, must exist to ensure + # Timestamp.__richcmp__(DateTimeArray) operates pointwise + + # ensure that operations with numpy arrays defer to our implementation + __array_priority__ = 1000 + + # ----------------------------------------------------------------- + # Constructors + + _dtype: np.dtype | DatetimeTZDtype + _freq = None + + def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy: bool = False): + values = extract_array(values, extract_numpy=True) + if isinstance(values, IntegerArray): + values = values.to_numpy("int64", na_value=iNaT) + + inferred_freq = getattr(values, "_freq", None) + + if isinstance(values, type(self)): + # validation + dtz = getattr(dtype, "tz", None) + if dtz and values.tz is None: + dtype = DatetimeTZDtype(tz=dtype.tz) + elif dtz and values.tz: + if not timezones.tz_compare(dtz, values.tz): + msg = ( + "Timezone of the array and 'dtype' do not match. " + f"'{dtz}' != '{values.tz}'" + ) + raise TypeError(msg) + elif values.tz: + dtype = values.dtype + + if freq is None: + freq = values.freq + values = values._ndarray + + if not isinstance(values, np.ndarray): + raise ValueError( + f"Unexpected type '{type(values).__name__}'. 'values' must be " + "a DatetimeArray, ndarray, or Series or Index containing one of those." + ) + if values.ndim not in [1, 2]: + raise ValueError("Only 1-dimensional input arrays are supported.") + + if values.dtype == "i8": + # for compat with datetime/timedelta/period shared methods, + # we can sometimes get here with int64 values. These represent + # nanosecond UTC (or tz-naive) unix timestamps + values = values.view(DT64NS_DTYPE) + + if values.dtype != DT64NS_DTYPE: + raise ValueError( + "The dtype of 'values' is incorrect. Must be 'datetime64[ns]'. " + f"Got {values.dtype} instead." + ) + + dtype = _validate_dt64_dtype(dtype) + + if freq == "infer": + raise ValueError( + "Frequency inference not allowed in DatetimeArray.__init__. " + "Use 'pd.array()' instead." + ) + + if copy: + values = values.copy() + if freq: + freq = to_offset(freq) + if getattr(dtype, "tz", None): + # https://github.com/pandas-dev/pandas/issues/18595 + # Ensure that we have a standard timezone for pytz objects. + # Without this, things like adding an array of timedeltas and + # a tz-aware Timestamp (with a tz specific to its datetime) will + # be incorrect(ish?) for the array as a whole + dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz)) + + NDArrayBacked.__init__(self, values=values, dtype=dtype) + self._freq = freq + + if inferred_freq is None and freq is not None: + type(self)._validate_frequency(self, freq) + + # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked" + @classmethod + def _simple_new( # type: ignore[override] + cls, values: np.ndarray, freq: BaseOffset | None = None, dtype=DT64NS_DTYPE + ) -> DatetimeArray: + assert isinstance(values, np.ndarray) + assert values.dtype == DT64NS_DTYPE + + result = super()._simple_new(values, dtype) + result._freq = freq + return result + + @classmethod + def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False): + return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy) + + @classmethod + def _from_sequence_not_strict( + cls, + data, + dtype=None, + copy: bool = False, + tz=None, + freq=lib.no_default, + dayfirst: bool = False, + yearfirst: bool = False, + ambiguous="raise", + ): + explicit_none = freq is None + freq = freq if freq is not lib.no_default else None + + freq, freq_infer = dtl.maybe_infer_freq(freq) + + subarr, tz, inferred_freq = _sequence_to_dt64ns( + data, + dtype=dtype, + copy=copy, + tz=tz, + dayfirst=dayfirst, + yearfirst=yearfirst, + ambiguous=ambiguous, + ) + + freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) + if explicit_none: + freq = None + + dtype = tz_to_dtype(tz) + result = cls._simple_new(subarr, freq=freq, dtype=dtype) + + if inferred_freq is None and freq is not None: + # this condition precludes `freq_infer` + cls._validate_frequency(result, freq, ambiguous=ambiguous) + + elif freq_infer: + # Set _freq directly to bypass duplicative _validate_frequency + # check. + result._freq = to_offset(result.inferred_freq) + + return result + + @classmethod + def _generate_range( + cls, + start, + end, + periods, + freq, + tz=None, + normalize=False, + ambiguous="raise", + nonexistent="raise", + inclusive="both", + ): + + periods = dtl.validate_periods(periods) + if freq is None and any(x is None for x in [periods, start, end]): + raise ValueError("Must provide freq argument if no data is supplied") + + if com.count_not_none(start, end, periods, freq) != 3: + raise ValueError( + "Of the four parameters: start, end, periods, " + "and freq, exactly three must be specified" + ) + freq = to_offset(freq) + + if start is not None: + start = Timestamp(start) + + if end is not None: + end = Timestamp(end) + + if start is NaT or end is NaT: + raise ValueError("Neither `start` nor `end` can be NaT") + + left_inclusive, right_inclusive = validate_inclusive(inclusive) + start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) + tz = _infer_tz_from_endpoints(start, end, tz) + + if tz is not None: + # Localize the start and end arguments + start_tz = None if start is None else start.tz + end_tz = None if end is None else end.tz + start = _maybe_localize_point( + start, start_tz, start, freq, tz, ambiguous, nonexistent + ) + end = _maybe_localize_point( + end, end_tz, end, freq, tz, ambiguous, nonexistent + ) + if freq is not None: + # We break Day arithmetic (fixed 24 hour) here and opt for + # Day to mean calendar day (23/24/25 hour). Therefore, strip + # tz info from start and day to avoid DST arithmetic + if isinstance(freq, Day): + if start is not None: + start = start.tz_localize(None) + if end is not None: + end = end.tz_localize(None) + + if isinstance(freq, Tick): + values = generate_regular_range(start, end, periods, freq) + else: + xdr = generate_range(start=start, end=end, periods=periods, offset=freq) + values = np.array([x.value for x in xdr], dtype=np.int64) + + _tz = start.tz if start is not None else end.tz + values = values.view("M8[ns]") + index = cls._simple_new(values, freq=freq, dtype=tz_to_dtype(_tz)) + + if tz is not None and index.tz is None: + arr = tzconversion.tz_localize_to_utc( + index.asi8, tz, ambiguous=ambiguous, nonexistent=nonexistent + ) + + index = cls(arr) + + # index is localized datetime64 array -> have to convert + # start/end as well to compare + if start is not None: + start = start.tz_localize(tz, ambiguous, nonexistent).asm8 + if end is not None: + end = end.tz_localize(tz, ambiguous, nonexistent).asm8 + else: + # Create a linearly spaced date_range in local time + # Nanosecond-granularity timestamps aren't always correctly + # representable with doubles, so we limit the range that we + # pass to np.linspace as much as possible + arr = ( + np.linspace(0, end.value - start.value, periods, dtype="int64") + + start.value + ) + dtype = tz_to_dtype(tz) + arr = arr.astype("M8[ns]", copy=False) + index = cls._simple_new(arr, freq=None, dtype=dtype) + + if start == end: + if not left_inclusive and not right_inclusive: + index = index[1:-1] + else: + if not left_inclusive or not right_inclusive: + if not left_inclusive and len(index) and index[0] == start: + index = index[1:] + if not right_inclusive and len(index) and index[-1] == end: + index = index[:-1] + + dtype = tz_to_dtype(tz) + return cls._simple_new(index._ndarray, freq=freq, dtype=dtype) + + # ----------------------------------------------------------------- + # DatetimeLike Interface + + def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64: + if not isinstance(value, self._scalar_type) and value is not NaT: + raise ValueError("'value' should be a Timestamp.") + self._check_compatible_with(value, setitem=setitem) + return value.asm8 + + def _scalar_from_string(self, value) -> Timestamp | NaTType: + return Timestamp(value, tz=self.tz) + + def _check_compatible_with(self, other, setitem: bool = False): + if other is NaT: + return + self._assert_tzawareness_compat(other) + if setitem: + # Stricter check for setitem vs comparison methods + if self.tz is not None and not timezones.tz_compare(self.tz, other.tz): + # TODO(2.0): remove this check. GH#37605 + warnings.warn( + "Setitem-like behavior with mismatched timezones is deprecated " + "and will change in a future version. Instead of raising " + "(or for Index, Series, and DataFrame methods, coercing to " + "object dtype), the value being set (or passed as a " + "fill_value, or inserted) will be cast to the existing " + "DatetimeArray/DatetimeIndex/Series/DataFrame column's " + "timezone. To retain the old behavior, explicitly cast to " + "object dtype before the operation.", + FutureWarning, + stacklevel=find_stack_level(), + ) + raise ValueError(f"Timezones don't match. '{self.tz}' != '{other.tz}'") + + # ----------------------------------------------------------------- + # Descriptive Properties + + def _box_func(self, x) -> Timestamp | NaTType: + if isinstance(x, np.datetime64): + # GH#42228 + # Argument 1 to "signedinteger" has incompatible type "datetime64"; + # expected "Union[SupportsInt, Union[str, bytes], SupportsIndex]" + x = np.int64(x) # type: ignore[arg-type] + ts = Timestamp(x, tz=self.tz) + # Non-overlapping identity check (left operand type: "Timestamp", + # right operand type: "NaTType") + if ts is not NaT: # type: ignore[comparison-overlap] + # GH#41586 + # do this instead of passing to the constructor to avoid FutureWarning + ts._set_freq(self.freq) + return ts + + @property + # error: Return type "Union[dtype, DatetimeTZDtype]" of "dtype" + # incompatible with return type "ExtensionDtype" in supertype + # "ExtensionArray" + def dtype(self) -> np.dtype | DatetimeTZDtype: # type: ignore[override] + """ + The dtype for the DatetimeArray. + + .. warning:: + + A future version of pandas will change dtype to never be a + ``numpy.dtype``. Instead, :attr:`DatetimeArray.dtype` will + always be an instance of an ``ExtensionDtype`` subclass. + + Returns + ------- + numpy.dtype or DatetimeTZDtype + If the values are tz-naive, then ``np.dtype('datetime64[ns]')`` + is returned. + + If the values are tz-aware, then the ``DatetimeTZDtype`` + is returned. + """ + return self._dtype + + @property + def tz(self) -> tzinfo | None: + """ + Return the timezone. + + Returns + ------- + datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None + Returns None when the array is tz-naive. + """ + # GH 18595 + return getattr(self.dtype, "tz", None) + + @tz.setter + def tz(self, value): + # GH 3746: Prevent localizing or converting the index by setting tz + raise AttributeError( + "Cannot directly set timezone. Use tz_localize() " + "or tz_convert() as appropriate" + ) + + @property + def tzinfo(self) -> tzinfo | None: + """ + Alias for tz attribute + """ + return self.tz + + @property # NB: override with cache_readonly in immutable subclasses + def is_normalized(self) -> bool: + """ + Returns True if all of the dates are at midnight ("no time") + """ + return is_date_array_normalized(self.asi8, self.tz) + + @property # NB: override with cache_readonly in immutable subclasses + def _resolution_obj(self) -> Resolution: + return get_resolution(self.asi8, self.tz) + + # ---------------------------------------------------------------- + # Array-Like / EA-Interface Methods + + def __array__(self, dtype=None) -> np.ndarray: + if dtype is None and self.tz: + # The default for tz-aware is object, to preserve tz info + dtype = object + + return super().__array__(dtype=dtype) + + def __iter__(self): + """ + Return an iterator over the boxed values + + Yields + ------ + tstamp : Timestamp + """ + if self.ndim > 1: + for i in range(len(self)): + yield self[i] + else: + # convert in chunks of 10k for efficiency + data = self.asi8 + length = len(self) + chunksize = 10000 + chunks = (length // chunksize) + 1 + + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, length) + converted = ints_to_pydatetime( + data[start_i:end_i], tz=self.tz, freq=self.freq, box="timestamp" + ) + yield from converted + + def astype(self, dtype, copy: bool = True): + # We handle + # --> datetime + # --> period + # DatetimeLikeArrayMixin Super handles the rest. + dtype = pandas_dtype(dtype) + + if is_dtype_equal(dtype, self.dtype): + if copy: + return self.copy() + return self + + elif is_datetime64_ns_dtype(dtype): + return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False) + + elif self.tz is None and is_datetime64_dtype(dtype) and dtype != self.dtype: + # unit conversion e.g. datetime64[s] + return self._ndarray.astype(dtype) + + elif is_period_dtype(dtype): + return self.to_period(freq=dtype.freq) + return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy) + + # ----------------------------------------------------------------- + # Rendering Methods + + @dtl.ravel_compat + def _format_native_types( + self, *, na_rep="NaT", date_format=None, **kwargs + ) -> npt.NDArray[np.object_]: + from pandas.io.formats.format import get_format_datetime64_from_values + + fmt = get_format_datetime64_from_values(self, date_format) + + return tslib.format_array_from_datetime( + self.asi8, tz=self.tz, format=fmt, na_rep=na_rep + ) + + # ----------------------------------------------------------------- + # Comparison Methods + + def _has_same_tz(self, other) -> bool: + + # vzone shouldn't be None if value is non-datetime like + if isinstance(other, np.datetime64): + # convert to Timestamp as np.datetime64 doesn't have tz attr + other = Timestamp(other) + + if not hasattr(other, "tzinfo"): + return False + other_tz = other.tzinfo + return timezones.tz_compare(self.tzinfo, other_tz) + + def _assert_tzawareness_compat(self, other) -> None: + # adapted from _Timestamp._assert_tzawareness_compat + other_tz = getattr(other, "tzinfo", None) + other_dtype = getattr(other, "dtype", None) + + if is_datetime64tz_dtype(other_dtype): + # Get tzinfo from Series dtype + other_tz = other.dtype.tz + if other is NaT: + # pd.NaT quacks both aware and naive + pass + elif self.tz is None: + if other_tz is not None: + raise TypeError( + "Cannot compare tz-naive and tz-aware datetime-like objects." + ) + elif other_tz is None: + raise TypeError( + "Cannot compare tz-naive and tz-aware datetime-like objects" + ) + + # ----------------------------------------------------------------- + # Arithmetic Methods + + def _sub_datetime_arraylike(self, other): + """subtract DatetimeArray/Index or ndarray[datetime64]""" + if len(self) != len(other): + raise ValueError("cannot add indices of unequal length") + + if isinstance(other, np.ndarray): + assert is_datetime64_dtype(other) + other = type(self)(other) + + try: + self._assert_tzawareness_compat(other) + except TypeError as error: + new_message = str(error).replace("compare", "subtract") + raise type(error)(new_message) from error + + self_i8 = self.asi8 + other_i8 = other.asi8 + arr_mask = self._isnan | other._isnan + new_values = checked_add_with_arr(self_i8, -other_i8, arr_mask=arr_mask) + if self._hasna or other._hasna: + np.putmask(new_values, arr_mask, iNaT) + return new_values.view("timedelta64[ns]") + + def _add_offset(self, offset) -> DatetimeArray: + if self.ndim == 2: + return self.ravel()._add_offset(offset).reshape(self.shape) + + assert not isinstance(offset, Tick) + try: + if self.tz is not None: + values = self.tz_localize(None) + else: + values = self + result = offset._apply_array(values).view("M8[ns]") + result = DatetimeArray._simple_new(result) + result = result.tz_localize(self.tz) + + except NotImplementedError: + warnings.warn( + "Non-vectorized DateOffset being applied to Series or DatetimeIndex.", + PerformanceWarning, + ) + result = self.astype("O") + offset + if not len(self): + # GH#30336 _from_sequence won't be able to infer self.tz + return type(self)._from_sequence(result).tz_localize(self.tz) + + return type(self)._from_sequence(result) + + def _sub_datetimelike_scalar(self, other): + # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]] + assert isinstance(other, (datetime, np.datetime64)) + assert other is not NaT + other = Timestamp(other) + # error: Non-overlapping identity check (left operand type: "Timestamp", + # right operand type: "NaTType") + if other is NaT: # type: ignore[comparison-overlap] + return self - NaT + + try: + self._assert_tzawareness_compat(other) + except TypeError as error: + new_message = str(error).replace("compare", "subtract") + raise type(error)(new_message) from error + + i8 = self.asi8 + result = checked_add_with_arr(i8, -other.value, arr_mask=self._isnan) + result = self._maybe_mask_results(result) + return result.view("timedelta64[ns]") + + # ----------------------------------------------------------------- + # Timezone Conversion and Localization Methods + + def _local_timestamps(self) -> np.ndarray: + """ + Convert to an i8 (unix-like nanosecond timestamp) representation + while keeping the local timezone and not using UTC. + This is used to calculate time-of-day information as if the timestamps + were timezone-naive. + """ + if self.tz is None or timezones.is_utc(self.tz): + return self.asi8 + return tzconversion.tz_convert_from_utc(self.asi8, self.tz) + + def tz_convert(self, tz) -> DatetimeArray: + """ + Convert tz-aware Datetime Array/Index from one time zone to another. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time. Corresponding timestamps would be converted + to this time zone of the Datetime Array/Index. A `tz` of None will + convert to UTC and remove the timezone information. + + Returns + ------- + Array or Index + + Raises + ------ + TypeError + If Datetime Array/Index is tz-naive. + + See Also + -------- + DatetimeIndex.tz : A timezone that has a variable offset from UTC. + DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a + given time zone, or remove timezone from a tz-aware DatetimeIndex. + + Examples + -------- + With the `tz` parameter, we can change the DatetimeIndex + to other time zones: + + >>> dti = pd.date_range(start='2014-08-01 09:00', + ... freq='H', periods=3, tz='Europe/Berlin') + + >>> dti + DatetimeIndex(['2014-08-01 09:00:00+02:00', + '2014-08-01 10:00:00+02:00', + '2014-08-01 11:00:00+02:00'], + dtype='datetime64[ns, Europe/Berlin]', freq='H') + + >>> dti.tz_convert('US/Central') + DatetimeIndex(['2014-08-01 02:00:00-05:00', + '2014-08-01 03:00:00-05:00', + '2014-08-01 04:00:00-05:00'], + dtype='datetime64[ns, US/Central]', freq='H') + + With the ``tz=None``, we can remove the timezone (after converting + to UTC if necessary): + + >>> dti = pd.date_range(start='2014-08-01 09:00', freq='H', + ... periods=3, tz='Europe/Berlin') + + >>> dti + DatetimeIndex(['2014-08-01 09:00:00+02:00', + '2014-08-01 10:00:00+02:00', + '2014-08-01 11:00:00+02:00'], + dtype='datetime64[ns, Europe/Berlin]', freq='H') + + >>> dti.tz_convert(None) + DatetimeIndex(['2014-08-01 07:00:00', + '2014-08-01 08:00:00', + '2014-08-01 09:00:00'], + dtype='datetime64[ns]', freq='H') + """ + tz = timezones.maybe_get_tz(tz) + + if self.tz is None: + # tz naive, use tz_localize + raise TypeError( + "Cannot convert tz-naive timestamps, use tz_localize to localize" + ) + + # No conversion since timestamps are all UTC to begin with + dtype = tz_to_dtype(tz) + return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq) + + @dtl.ravel_compat + def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeArray: + """ + Localize tz-naive Datetime Array/Index to tz-aware + Datetime Array/Index. + + This method takes a time zone (tz) naive Datetime Array/Index object + and makes this time zone aware. It does not move the time to another + time zone. + + This method can also be used to do the inverse -- to create a time + zone unaware object from an aware object. To that end, pass `tz=None`. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone to convert timestamps to. Passing ``None`` will + remove the time zone information preserving local time. + ambiguous : 'infer', 'NaT', bool array, default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False signifies a + non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous + times. + + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ +default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Returns + ------- + Same type as self + Array/Index converted to the specified time zone. + + Raises + ------ + TypeError + If the Datetime Array/Index is tz-aware and tz is not None. + + See Also + -------- + DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from + one time zone to another. + + Examples + -------- + >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3) + >>> tz_naive + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq='D') + + Localize DatetimeIndex in US/Eastern time zone: + + >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern') + >>> tz_aware + DatetimeIndex(['2018-03-01 09:00:00-05:00', + '2018-03-02 09:00:00-05:00', + '2018-03-03 09:00:00-05:00'], + dtype='datetime64[ns, US/Eastern]', freq=None) + + With the ``tz=None``, we can remove the time zone information + while keeping the local time (not converted to UTC): + + >>> tz_aware.tz_localize(None) + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq=None) + + Be careful with DST changes. When there is sequential data, pandas can + infer the DST time: + + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) + >>> s.dt.tz_localize('CET', ambiguous='infer') + 0 2018-10-28 01:30:00+02:00 + 1 2018-10-28 02:00:00+02:00 + 2 2018-10-28 02:30:00+02:00 + 3 2018-10-28 02:00:00+01:00 + 4 2018-10-28 02:30:00+01:00 + 5 2018-10-28 03:00:00+01:00 + 6 2018-10-28 03:30:00+01:00 + dtype: datetime64[ns, CET] + + In some cases, inferring the DST is impossible. In such cases, you can + pass an ndarray to the ambiguous parameter to set the DST explicitly + + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) + >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) + 0 2018-10-28 01:20:00+02:00 + 1 2018-10-28 02:36:00+02:00 + 2 2018-10-28 03:46:00+01:00 + dtype: datetime64[ns, CET] + + If the DST transition causes nonexistent times, you can shift these + dates forward or backwards with a timedelta object or `'shift_forward'` + or `'shift_backwards'`. + + >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00', + ... '2015-03-29 03:30:00'])) + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') + 0 2015-03-29 03:00:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') + 0 2015-03-29 01:59:59.999999999+01:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) + 0 2015-03-29 03:30:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + """ + nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") + if nonexistent not in nonexistent_options and not isinstance( + nonexistent, timedelta + ): + raise ValueError( + "The nonexistent argument must be one of 'raise', " + "'NaT', 'shift_forward', 'shift_backward' or " + "a timedelta object" + ) + + if self.tz is not None: + if tz is None: + new_dates = tzconversion.tz_convert_from_utc(self.asi8, self.tz) + else: + raise TypeError("Already tz-aware, use tz_convert to convert.") + else: + tz = timezones.maybe_get_tz(tz) + # Convert to UTC + + new_dates = tzconversion.tz_localize_to_utc( + self.asi8, tz, ambiguous=ambiguous, nonexistent=nonexistent + ) + new_dates = new_dates.view(DT64NS_DTYPE) + dtype = tz_to_dtype(tz) + + freq = None + if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates[0])): + # we can preserve freq + # TODO: Also for fixed-offsets + freq = self.freq + elif tz is None and self.tz is None: + # no-op + freq = self.freq + return self._simple_new(new_dates, dtype=dtype, freq=freq) + + # ---------------------------------------------------------------- + # Conversion Methods - Vectorized analogues of Timestamp methods + + def to_pydatetime(self) -> npt.NDArray[np.object_]: + """ + Return Datetime Array/Index as object ndarray of datetime.datetime + objects. + + Returns + ------- + datetimes : ndarray[object] + """ + return ints_to_pydatetime(self.asi8, tz=self.tz) + + def normalize(self) -> DatetimeArray: + """ + Convert times to midnight. + + The time component of the date-time is converted to midnight i.e. + 00:00:00. This is useful in cases, when the time does not matter. + Length is unaltered. The timezones are unaffected. + + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on Datetime Array/Index. + + Returns + ------- + DatetimeArray, DatetimeIndex or Series + The same type as the original data. Series will have the same + name and index. DatetimeIndex will have the same name. + + See Also + -------- + floor : Floor the datetimes to the specified freq. + ceil : Ceil the datetimes to the specified freq. + round : Round the datetimes to the specified freq. + + Examples + -------- + >>> idx = pd.date_range(start='2014-08-01 10:00', freq='H', + ... periods=3, tz='Asia/Calcutta') + >>> idx + DatetimeIndex(['2014-08-01 10:00:00+05:30', + '2014-08-01 11:00:00+05:30', + '2014-08-01 12:00:00+05:30'], + dtype='datetime64[ns, Asia/Calcutta]', freq='H') + >>> idx.normalize() + DatetimeIndex(['2014-08-01 00:00:00+05:30', + '2014-08-01 00:00:00+05:30', + '2014-08-01 00:00:00+05:30'], + dtype='datetime64[ns, Asia/Calcutta]', freq=None) + """ + new_values = normalize_i8_timestamps(self.asi8, self.tz) + return type(self)(new_values)._with_freq("infer").tz_localize(self.tz) + + @dtl.ravel_compat + def to_period(self, freq=None) -> PeriodArray: + """ + Cast to PeriodArray/Index at a particular frequency. + + Converts DatetimeArray/Index to PeriodArray/Index. + + Parameters + ---------- + freq : str or Offset, optional + One of pandas' :ref:`offset strings ` + or an Offset object. Will be inferred by default. + + Returns + ------- + PeriodArray/Index + + Raises + ------ + ValueError + When converting a DatetimeArray/Index with non-regular values, + so that a frequency cannot be inferred. + + See Also + -------- + PeriodIndex: Immutable ndarray holding ordinal values. + DatetimeIndex.to_pydatetime: Return DatetimeIndex as object. + + Examples + -------- + >>> df = pd.DataFrame({"y": [1, 2, 3]}, + ... index=pd.to_datetime(["2000-03-31 00:00:00", + ... "2000-05-31 00:00:00", + ... "2000-08-31 00:00:00"])) + >>> df.index.to_period("M") + PeriodIndex(['2000-03', '2000-05', '2000-08'], + dtype='period[M]') + + Infer the daily frequency + + >>> idx = pd.date_range("2017-01-01", periods=2) + >>> idx.to_period() + PeriodIndex(['2017-01-01', '2017-01-02'], + dtype='period[D]') + """ + from pandas.core.arrays import PeriodArray + + if self.tz is not None: + warnings.warn( + "Converting to PeriodArray/Index representation " + "will drop timezone information.", + UserWarning, + ) + + if freq is None: + freq = self.freqstr or self.inferred_freq + + if freq is None: + raise ValueError( + "You must pass a freq argument as current index has none." + ) + + res = get_period_alias(freq) + + # https://github.com/pandas-dev/pandas/issues/33358 + if res is None: + res = freq + + freq = res + + return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz) + + def to_perioddelta(self, freq) -> TimedeltaArray: + """ + Calculate TimedeltaArray of difference between index + values and index converted to PeriodArray at specified + freq. Used for vectorized offsets. + + Parameters + ---------- + freq : Period frequency + + Returns + ------- + TimedeltaArray/Index + """ + # Deprecaation GH#34853 + warnings.warn( + "to_perioddelta is deprecated and will be removed in a " + "future version. " + "Use `dtindex - dtindex.to_period(freq).to_timestamp()` instead.", + FutureWarning, + # stacklevel chosen to be correct for when called from DatetimeIndex + stacklevel=find_stack_level(), + ) + from pandas.core.arrays.timedeltas import TimedeltaArray + + i8delta = self.asi8 - self.to_period(freq).to_timestamp().asi8 + m8delta = i8delta.view("m8[ns]") + return TimedeltaArray(m8delta) + + # ----------------------------------------------------------------- + # Properties - Vectorized Timestamp Properties/Methods + + def month_name(self, locale=None): + """ + Return the month names of the DateTimeIndex with specified locale. + + Parameters + ---------- + locale : str, optional + Locale determining the language in which to return the month name. + Default is English locale. + + Returns + ------- + Index + Index of month names. + + Examples + -------- + >>> idx = pd.date_range(start='2018-01', freq='M', periods=3) + >>> idx + DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], + dtype='datetime64[ns]', freq='M') + >>> idx.month_name() + Index(['January', 'February', 'March'], dtype='object') + """ + values = self._local_timestamps() + + result = fields.get_date_name_field(values, "month_name", locale=locale) + result = self._maybe_mask_results(result, fill_value=None) + return result + + def day_name(self, locale=None): + """ + Return the day names of the DateTimeIndex with specified locale. + + Parameters + ---------- + locale : str, optional + Locale determining the language in which to return the day name. + Default is English locale. + + Returns + ------- + Index + Index of day names. + + Examples + -------- + >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3) + >>> idx + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'], + dtype='datetime64[ns]', freq='D') + >>> idx.day_name() + Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object') + """ + values = self._local_timestamps() + + result = fields.get_date_name_field(values, "day_name", locale=locale) + result = self._maybe_mask_results(result, fill_value=None) + return result + + @property + def time(self) -> npt.NDArray[np.object_]: + """ + Returns numpy array of :class:`datetime.time` objects. + + The time part of the Timestamps. + """ + # If the Timestamps have a timezone that is not UTC, + # convert them into their i8 representation while + # keeping their timezone and not using UTC + timestamps = self._local_timestamps() + + return ints_to_pydatetime(timestamps, box="time") + + @property + def timetz(self) -> npt.NDArray[np.object_]: + """ + Returns numpy array of :class:`datetime.time` objects with timezone + information. + + The time part of the Timestamps. + """ + return ints_to_pydatetime(self.asi8, self.tz, box="time") + + @property + def date(self) -> npt.NDArray[np.object_]: + """ + Returns numpy array of python :class:`datetime.date` objects. + + Namely, the date part of Timestamps without time and + timezone information. + """ + # If the Timestamps have a timezone that is not UTC, + # convert them into their i8 representation while + # keeping their timezone and not using UTC + timestamps = self._local_timestamps() + + return ints_to_pydatetime(timestamps, box="date") + + def isocalendar(self) -> DataFrame: + """ + Returns a DataFrame with the year, week, and day calculated according to + the ISO 8601 standard. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame + with columns year, week and day + + See Also + -------- + Timestamp.isocalendar : Function return a 3-tuple containing ISO year, + week number, and weekday for the given Timestamp object. + datetime.date.isocalendar : Return a named tuple object with + three components: year, week and weekday. + + Examples + -------- + >>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4) + >>> idx.isocalendar() + year week day + 2019-12-29 2019 52 7 + 2019-12-30 2020 1 1 + 2019-12-31 2020 1 2 + 2020-01-01 2020 1 3 + >>> idx.isocalendar().week + 2019-12-29 52 + 2019-12-30 1 + 2019-12-31 1 + 2020-01-01 1 + Freq: D, Name: week, dtype: UInt32 + """ + from pandas import DataFrame + + values = self._local_timestamps() + sarray = fields.build_isocalendar_sarray(values) + iso_calendar_df = DataFrame( + sarray, columns=["year", "week", "day"], dtype="UInt32" + ) + if self._hasna: + iso_calendar_df.iloc[self._isnan] = None + return iso_calendar_df + + @property + def weekofyear(self): + """ + The week ordinal of the year. + + .. deprecated:: 1.1.0 + + weekofyear and week have been deprecated. + Please use DatetimeIndex.isocalendar().week instead. + """ + warnings.warn( + "weekofyear and week have been deprecated, please use " + "DatetimeIndex.isocalendar().week instead, which returns " + "a Series. To exactly reproduce the behavior of week and " + "weekofyear and return an Index, you may call " + "pd.Int64Index(idx.isocalendar().week)", + FutureWarning, + stacklevel=find_stack_level(), + ) + week_series = self.isocalendar().week + if week_series.hasnans: + return week_series.to_numpy(dtype="float64", na_value=np.nan) + return week_series.to_numpy(dtype="int64") + + week = weekofyear + + year = _field_accessor( + "year", + "Y", + """ + The year of the datetime. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="Y") + ... ) + >>> datetime_series + 0 2000-12-31 + 1 2001-12-31 + 2 2002-12-31 + dtype: datetime64[ns] + >>> datetime_series.dt.year + 0 2000 + 1 2001 + 2 2002 + dtype: int64 + """, + ) + month = _field_accessor( + "month", + "M", + """ + The month as January=1, December=12. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="M") + ... ) + >>> datetime_series + 0 2000-01-31 + 1 2000-02-29 + 2 2000-03-31 + dtype: datetime64[ns] + >>> datetime_series.dt.month + 0 1 + 1 2 + 2 3 + dtype: int64 + """, + ) + day = _field_accessor( + "day", + "D", + """ + The day of the datetime. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="D") + ... ) + >>> datetime_series + 0 2000-01-01 + 1 2000-01-02 + 2 2000-01-03 + dtype: datetime64[ns] + >>> datetime_series.dt.day + 0 1 + 1 2 + 2 3 + dtype: int64 + """, + ) + hour = _field_accessor( + "hour", + "h", + """ + The hours of the datetime. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="h") + ... ) + >>> datetime_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 01:00:00 + 2 2000-01-01 02:00:00 + dtype: datetime64[ns] + >>> datetime_series.dt.hour + 0 0 + 1 1 + 2 2 + dtype: int64 + """, + ) + minute = _field_accessor( + "minute", + "m", + """ + The minutes of the datetime. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="T") + ... ) + >>> datetime_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 00:01:00 + 2 2000-01-01 00:02:00 + dtype: datetime64[ns] + >>> datetime_series.dt.minute + 0 0 + 1 1 + 2 2 + dtype: int64 + """, + ) + second = _field_accessor( + "second", + "s", + """ + The seconds of the datetime. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="s") + ... ) + >>> datetime_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 00:00:01 + 2 2000-01-01 00:00:02 + dtype: datetime64[ns] + >>> datetime_series.dt.second + 0 0 + 1 1 + 2 2 + dtype: int64 + """, + ) + microsecond = _field_accessor( + "microsecond", + "us", + """ + The microseconds of the datetime. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="us") + ... ) + >>> datetime_series + 0 2000-01-01 00:00:00.000000 + 1 2000-01-01 00:00:00.000001 + 2 2000-01-01 00:00:00.000002 + dtype: datetime64[ns] + >>> datetime_series.dt.microsecond + 0 0 + 1 1 + 2 2 + dtype: int64 + """, + ) + nanosecond = _field_accessor( + "nanosecond", + "ns", + """ + The nanoseconds of the datetime. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="ns") + ... ) + >>> datetime_series + 0 2000-01-01 00:00:00.000000000 + 1 2000-01-01 00:00:00.000000001 + 2 2000-01-01 00:00:00.000000002 + dtype: datetime64[ns] + >>> datetime_series.dt.nanosecond + 0 0 + 1 1 + 2 2 + dtype: int64 + """, + ) + _dayofweek_doc = """ + The day of the week with Monday=0, Sunday=6. + + Return the day of the week. It is assumed the week starts on + Monday, which is denoted by 0 and ends on Sunday which is denoted + by 6. This method is available on both Series with datetime + values (using the `dt` accessor) or DatetimeIndex. + + Returns + ------- + Series or Index + Containing integers indicating the day number. + + See Also + -------- + Series.dt.dayofweek : Alias. + Series.dt.weekday : Alias. + Series.dt.day_name : Returns the name of the day of the week. + + Examples + -------- + >>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series() + >>> s.dt.dayofweek + 2016-12-31 5 + 2017-01-01 6 + 2017-01-02 0 + 2017-01-03 1 + 2017-01-04 2 + 2017-01-05 3 + 2017-01-06 4 + 2017-01-07 5 + 2017-01-08 6 + Freq: D, dtype: int64 + """ + day_of_week = _field_accessor("day_of_week", "dow", _dayofweek_doc) + dayofweek = day_of_week + weekday = day_of_week + + day_of_year = _field_accessor( + "dayofyear", + "doy", + """ + The ordinal day of the year. + """, + ) + dayofyear = day_of_year + quarter = _field_accessor( + "quarter", + "q", + """ + The quarter of the date. + """, + ) + days_in_month = _field_accessor( + "days_in_month", + "dim", + """ + The number of days in the month. + """, + ) + daysinmonth = days_in_month + _is_month_doc = """ + Indicates whether the date is the {first_or_last} day of the month. + + Returns + ------- + Series or array + For Series, returns a Series with boolean values. + For DatetimeIndex, returns a boolean array. + + See Also + -------- + is_month_start : Return a boolean indicating whether the date + is the first day of the month. + is_month_end : Return a boolean indicating whether the date + is the last day of the month. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> s = pd.Series(pd.date_range("2018-02-27", periods=3)) + >>> s + 0 2018-02-27 + 1 2018-02-28 + 2 2018-03-01 + dtype: datetime64[ns] + >>> s.dt.is_month_start + 0 False + 1 False + 2 True + dtype: bool + >>> s.dt.is_month_end + 0 False + 1 True + 2 False + dtype: bool + + >>> idx = pd.date_range("2018-02-27", periods=3) + >>> idx.is_month_start + array([False, False, True]) + >>> idx.is_month_end + array([False, True, False]) + """ + is_month_start = _field_accessor( + "is_month_start", "is_month_start", _is_month_doc.format(first_or_last="first") + ) + + is_month_end = _field_accessor( + "is_month_end", "is_month_end", _is_month_doc.format(first_or_last="last") + ) + + is_quarter_start = _field_accessor( + "is_quarter_start", + "is_quarter_start", + """ + Indicator for whether the date is the first day of a quarter. + + Returns + ------- + is_quarter_start : Series or DatetimeIndex + The same type as the original data with boolean values. Series will + have the same name and index. DatetimeIndex will have the same + name. + + See Also + -------- + quarter : Return the quarter of the date. + is_quarter_end : Similar property for indicating the quarter start. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30", + ... periods=4)}) + >>> df.assign(quarter=df.dates.dt.quarter, + ... is_quarter_start=df.dates.dt.is_quarter_start) + dates quarter is_quarter_start + 0 2017-03-30 1 False + 1 2017-03-31 1 False + 2 2017-04-01 2 True + 3 2017-04-02 2 False + + >>> idx = pd.date_range('2017-03-30', periods=4) + >>> idx + DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'], + dtype='datetime64[ns]', freq='D') + + >>> idx.is_quarter_start + array([False, False, True, False]) + """, + ) + is_quarter_end = _field_accessor( + "is_quarter_end", + "is_quarter_end", + """ + Indicator for whether the date is the last day of a quarter. + + Returns + ------- + is_quarter_end : Series or DatetimeIndex + The same type as the original data with boolean values. Series will + have the same name and index. DatetimeIndex will have the same + name. + + See Also + -------- + quarter : Return the quarter of the date. + is_quarter_start : Similar property indicating the quarter start. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30", + ... periods=4)}) + >>> df.assign(quarter=df.dates.dt.quarter, + ... is_quarter_end=df.dates.dt.is_quarter_end) + dates quarter is_quarter_end + 0 2017-03-30 1 False + 1 2017-03-31 1 True + 2 2017-04-01 2 False + 3 2017-04-02 2 False + + >>> idx = pd.date_range('2017-03-30', periods=4) + >>> idx + DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'], + dtype='datetime64[ns]', freq='D') + + >>> idx.is_quarter_end + array([False, True, False, False]) + """, + ) + is_year_start = _field_accessor( + "is_year_start", + "is_year_start", + """ + Indicate whether the date is the first day of a year. + + Returns + ------- + Series or DatetimeIndex + The same type as the original data with boolean values. Series will + have the same name and index. DatetimeIndex will have the same + name. + + See Also + -------- + is_year_end : Similar property indicating the last day of the year. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3)) + >>> dates + 0 2017-12-30 + 1 2017-12-31 + 2 2018-01-01 + dtype: datetime64[ns] + + >>> dates.dt.is_year_start + 0 False + 1 False + 2 True + dtype: bool + + >>> idx = pd.date_range("2017-12-30", periods=3) + >>> idx + DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'], + dtype='datetime64[ns]', freq='D') + + >>> idx.is_year_start + array([False, False, True]) + """, + ) + is_year_end = _field_accessor( + "is_year_end", + "is_year_end", + """ + Indicate whether the date is the last day of the year. + + Returns + ------- + Series or DatetimeIndex + The same type as the original data with boolean values. Series will + have the same name and index. DatetimeIndex will have the same + name. + + See Also + -------- + is_year_start : Similar property indicating the start of the year. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3)) + >>> dates + 0 2017-12-30 + 1 2017-12-31 + 2 2018-01-01 + dtype: datetime64[ns] + + >>> dates.dt.is_year_end + 0 False + 1 True + 2 False + dtype: bool + + >>> idx = pd.date_range("2017-12-30", periods=3) + >>> idx + DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'], + dtype='datetime64[ns]', freq='D') + + >>> idx.is_year_end + array([False, True, False]) + """, + ) + is_leap_year = _field_accessor( + "is_leap_year", + "is_leap_year", + """ + Boolean indicator if the date belongs to a leap year. + + A leap year is a year, which has 366 days (instead of 365) including + 29th of February as an intercalary day. + Leap years are years which are multiples of four with the exception + of years divisible by 100 but not by 400. + + Returns + ------- + Series or ndarray + Booleans indicating if dates belong to a leap year. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="Y") + >>> idx + DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'], + dtype='datetime64[ns]', freq='A-DEC') + >>> idx.is_leap_year + array([ True, False, False]) + + >>> dates_series = pd.Series(idx) + >>> dates_series + 0 2012-12-31 + 1 2013-12-31 + 2 2014-12-31 + dtype: datetime64[ns] + >>> dates_series.dt.is_leap_year + 0 True + 1 False + 2 False + dtype: bool + """, + ) + + def to_julian_date(self) -> np.ndarray: + """ + Convert Datetime Array to float64 ndarray of Julian Dates. + 0 Julian date is noon January 1, 4713 BC. + https://en.wikipedia.org/wiki/Julian_day + """ + + # http://mysite.verizon.net/aesir_research/date/jdalg2.htm + year = np.asarray(self.year) + month = np.asarray(self.month) + day = np.asarray(self.day) + testarr = month < 3 + year[testarr] -= 1 + month[testarr] += 12 + return ( + day + + np.fix((153 * month - 457) / 5) + + 365 * year + + np.floor(year / 4) + - np.floor(year / 100) + + np.floor(year / 400) + + 1_721_118.5 + + ( + self.hour + + self.minute / 60 + + self.second / 3600 + + self.microsecond / 3600 / 10 ** 6 + + self.nanosecond / 3600 / 10 ** 9 + ) + / 24 + ) + + # ----------------------------------------------------------------- + # Reductions + + def std( + self, + axis=None, + dtype=None, + out=None, + ddof: int = 1, + keepdims: bool = False, + skipna: bool = True, + ): + """ + Return sample standard deviation over requested axis. + + Normalized by N-1 by default. This can be changed using the ddof argument + + Parameters + ---------- + axis : int optional, default None + Axis for the function to be applied on. + ddof : int, default 1 + Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result will be + NA. + + Returns + ------- + Timedelta + """ + # Because std is translation-invariant, we can get self.std + # by calculating (self - Timestamp(0)).std, and we can do it + # without creating a copy by using a view on self._ndarray + from pandas.core.arrays import TimedeltaArray + + tda = TimedeltaArray(self._ndarray.view("i8")) + return tda.std( + axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims, skipna=skipna + ) + + +# ------------------------------------------------------------------- +# Constructor Helpers + + +def sequence_to_datetimes(data, require_iso8601: bool = False) -> DatetimeArray: + """ + Parse/convert the passed data to either DatetimeArray or np.ndarray[object]. + """ + result, tz, freq = _sequence_to_dt64ns( + data, + allow_mixed=True, + require_iso8601=require_iso8601, + ) + + dtype = tz_to_dtype(tz) + dta = DatetimeArray._simple_new(result, freq=freq, dtype=dtype) + return dta + + +def _sequence_to_dt64ns( + data, + dtype=None, + copy=False, + tz=None, + dayfirst=False, + yearfirst=False, + ambiguous="raise", + *, + allow_mixed: bool = False, + require_iso8601: bool = False, +): + """ + Parameters + ---------- + data : list-like + dtype : dtype, str, or None, default None + copy : bool, default False + tz : tzinfo, str, or None, default None + dayfirst : bool, default False + yearfirst : bool, default False + ambiguous : str, bool, or arraylike, default 'raise' + See pandas._libs.tslibs.tzconversion.tz_localize_to_utc. + allow_mixed : bool, default False + Interpret integers as timestamps when datetime objects are also present. + require_iso8601 : bool, default False + Only consider ISO-8601 formats when parsing strings. + + Returns + ------- + result : numpy.ndarray + The sequence converted to a numpy array with dtype ``datetime64[ns]``. + tz : tzinfo or None + Either the user-provided tzinfo or one inferred from the data. + inferred_freq : Tick or None + The inferred frequency of the sequence. + + Raises + ------ + TypeError : PeriodDType data is passed + """ + + inferred_freq = None + + dtype = _validate_dt64_dtype(dtype) + tz = timezones.maybe_get_tz(tz) + + # if dtype has an embedded tz, capture it + tz = validate_tz_from_dtype(dtype, tz) + + if not hasattr(data, "dtype"): + # e.g. list, tuple + if np.ndim(data) == 0: + # i.e. generator + data = list(data) + data = np.asarray(data) + copy = False + elif isinstance(data, ABCMultiIndex): + raise TypeError("Cannot create a DatetimeArray from a MultiIndex.") + else: + data = extract_array(data, extract_numpy=True) + + if isinstance(data, IntegerArray): + data = data.to_numpy("int64", na_value=iNaT) + elif not isinstance(data, (np.ndarray, ExtensionArray)): + # GH#24539 e.g. xarray, dask object + data = np.asarray(data) + + if isinstance(data, DatetimeArray): + inferred_freq = data.freq + + # By this point we are assured to have either a numpy array or Index + data, copy = maybe_convert_dtype(data, copy) + data_dtype = getattr(data, "dtype", None) + + if ( + is_object_dtype(data_dtype) + or is_string_dtype(data_dtype) + or is_sparse(data_dtype) + ): + # TODO: We do not have tests specific to string-dtypes, + # also complex or categorical or other extension + copy = False + if lib.infer_dtype(data, skipna=False) == "integer": + data = data.astype(np.int64) + else: + # data comes back here as either i8 to denote UTC timestamps + # or M8[ns] to denote wall times + data, inferred_tz = objects_to_datetime64ns( + data, + dayfirst=dayfirst, + yearfirst=yearfirst, + allow_object=False, + allow_mixed=allow_mixed, + require_iso8601=require_iso8601, + ) + if tz and inferred_tz: + # two timezones: convert to intended from base UTC repr + if data.dtype == "i8": + # GH#42505 + # by convention, these are _already_ UTC, e.g + return data.view(DT64NS_DTYPE), tz, None + + utc_vals = tzconversion.tz_convert_from_utc(data.view("i8"), tz) + data = utc_vals.view(DT64NS_DTYPE) + elif inferred_tz: + tz = inferred_tz + + data_dtype = data.dtype + + # `data` may have originally been a Categorical[datetime64[ns, tz]], + # so we need to handle these types. + if is_datetime64tz_dtype(data_dtype): + # DatetimeArray -> ndarray + tz = _maybe_infer_tz(tz, data.tz) + result = data._ndarray + + elif is_datetime64_dtype(data_dtype): + # tz-naive DatetimeArray or ndarray[datetime64] + data = getattr(data, "_ndarray", data) + if data.dtype != DT64NS_DTYPE: + data = conversion.ensure_datetime64ns(data) + copy = False + + if tz is not None: + # Convert tz-naive to UTC + tz = timezones.maybe_get_tz(tz) + data = tzconversion.tz_localize_to_utc( + data.view("i8"), tz, ambiguous=ambiguous + ) + data = data.view(DT64NS_DTYPE) + + assert data.dtype == DT64NS_DTYPE, data.dtype + result = data + + else: + # must be integer dtype otherwise + # assume this data are epoch timestamps + if tz: + tz = timezones.maybe_get_tz(tz) + + if data.dtype != INT64_DTYPE: + data = data.astype(np.int64, copy=False) + result = data.view(DT64NS_DTYPE) + + if copy: + result = result.copy() + + assert isinstance(result, np.ndarray), type(result) + assert result.dtype == "M8[ns]", result.dtype + + # We have to call this again after possibly inferring a tz above + validate_tz_from_dtype(dtype, tz) + + return result, tz, inferred_freq + + +def objects_to_datetime64ns( + data: np.ndarray, + dayfirst, + yearfirst, + utc=False, + errors="raise", + require_iso8601: bool = False, + allow_object: bool = False, + allow_mixed: bool = False, +): + """ + Convert data to array of timestamps. + + Parameters + ---------- + data : np.ndarray[object] + dayfirst : bool + yearfirst : bool + utc : bool, default False + Whether to convert timezone-aware timestamps to UTC. + errors : {'raise', 'ignore', 'coerce'} + require_iso8601 : bool, default False + allow_object : bool + Whether to return an object-dtype ndarray instead of raising if the + data contains more than one timezone. + allow_mixed : bool, default False + Interpret integers as timestamps when datetime objects are also present. + + Returns + ------- + result : ndarray + np.int64 dtype if returned values represent UTC timestamps + np.datetime64[ns] if returned values represent wall times + object if mixed timezones + inferred_tz : tzinfo or None + + Raises + ------ + ValueError : if data cannot be converted to datetimes + """ + assert errors in ["raise", "ignore", "coerce"] + + # if str-dtype, convert + data = np.array(data, copy=False, dtype=np.object_) + + flags = data.flags + order: Literal["F", "C"] = "F" if flags.f_contiguous else "C" + try: + result, tz_parsed = tslib.array_to_datetime( + data.ravel("K"), + errors=errors, + utc=utc, + dayfirst=dayfirst, + yearfirst=yearfirst, + require_iso8601=require_iso8601, + allow_mixed=allow_mixed, + ) + result = result.reshape(data.shape, order=order) + except ValueError as err: + try: + values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K")) + # If tzaware, these values represent unix timestamps, so we + # return them as i8 to distinguish from wall times + values = values.reshape(data.shape, order=order) + return values.view("i8"), tz_parsed + except (ValueError, TypeError): + raise err + + if tz_parsed is not None: + # We can take a shortcut since the datetime64 numpy array + # is in UTC + # Return i8 values to denote unix timestamps + return result.view("i8"), tz_parsed + elif is_datetime64_dtype(result): + # returning M8[ns] denotes wall-times; since tz is None + # the distinction is a thin one + return result, tz_parsed + elif is_object_dtype(result): + # GH#23675 when called via `pd.to_datetime`, returning an object-dtype + # array is allowed. When called via `pd.DatetimeIndex`, we can + # only accept datetime64 dtype, so raise TypeError if object-dtype + # is returned, as that indicates the values can be recognized as + # datetimes but they have conflicting timezones/awareness + if allow_object: + return result, tz_parsed + raise TypeError(result) + else: # pragma: no cover + # GH#23675 this TypeError should never be hit, whereas the TypeError + # in the object-dtype branch above is reachable. + raise TypeError(result) + + +def maybe_convert_dtype(data, copy: bool): + """ + Convert data based on dtype conventions, issuing deprecation warnings + or errors where appropriate. + + Parameters + ---------- + data : np.ndarray or pd.Index + copy : bool + + Returns + ------- + data : np.ndarray or pd.Index + copy : bool + + Raises + ------ + TypeError : PeriodDType data is passed + """ + if not hasattr(data, "dtype"): + # e.g. collections.deque + return data, copy + + if is_float_dtype(data.dtype): + # Note: we must cast to datetime64[ns] here in order to treat these + # as wall-times instead of UTC timestamps. + data = data.astype(DT64NS_DTYPE) + copy = False + # TODO: deprecate this behavior to instead treat symmetrically + # with integer dtypes. See discussion in GH#23675 + + elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype): + # GH#29794 enforcing deprecation introduced in GH#23539 + raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]") + elif is_period_dtype(data.dtype): + # Note: without explicitly raising here, PeriodIndex + # test_setops.test_join_does_not_recur fails + raise TypeError( + "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead" + ) + + elif is_categorical_dtype(data.dtype): + # GH#18664 preserve tz in going DTI->Categorical->DTI + # TODO: cases where we need to do another pass through this func, + # e.g. the categories are timedelta64s + data = data.categories.take(data.codes, fill_value=NaT)._values + copy = False + + elif is_extension_array_dtype(data.dtype) and not is_datetime64tz_dtype(data.dtype): + # Includes categorical + # TODO: We have no tests for these + data = np.array(data, dtype=np.object_) + copy = False + + return data, copy + + +# ------------------------------------------------------------------- +# Validation and Inference + + +def _maybe_infer_tz(tz: tzinfo | None, inferred_tz: tzinfo | None) -> tzinfo | None: + """ + If a timezone is inferred from data, check that it is compatible with + the user-provided timezone, if any. + + Parameters + ---------- + tz : tzinfo or None + inferred_tz : tzinfo or None + + Returns + ------- + tz : tzinfo or None + + Raises + ------ + TypeError : if both timezones are present but do not match + """ + if tz is None: + tz = inferred_tz + elif inferred_tz is None: + pass + elif not timezones.tz_compare(tz, inferred_tz): + raise TypeError( + f"data is already tz-aware {inferred_tz}, unable to " + f"set specified tz: {tz}" + ) + return tz + + +def _validate_dt64_dtype(dtype): + """ + Check that a dtype, if passed, represents either a numpy datetime64[ns] + dtype or a pandas DatetimeTZDtype. + + Parameters + ---------- + dtype : object + + Returns + ------- + dtype : None, numpy.dtype, or DatetimeTZDtype + + Raises + ------ + ValueError : invalid dtype + + Notes + ----- + Unlike validate_tz_from_dtype, this does _not_ allow non-existent + tz errors to go through + """ + if dtype is not None: + dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, np.dtype("M8")): + # no precision, disallowed GH#24806 + msg = ( + "Passing in 'datetime64' dtype with no precision is not allowed. " + "Please pass in 'datetime64[ns]' instead." + ) + raise ValueError(msg) + + if (isinstance(dtype, np.dtype) and dtype != DT64NS_DTYPE) or not isinstance( + dtype, (np.dtype, DatetimeTZDtype) + ): + raise ValueError( + f"Unexpected value for 'dtype': '{dtype}'. " + "Must be 'datetime64[ns]' or DatetimeTZDtype'." + ) + return dtype + + +def validate_tz_from_dtype(dtype, tz: tzinfo | None) -> tzinfo | None: + """ + If the given dtype is a DatetimeTZDtype, extract the implied + tzinfo object from it and check that it does not conflict with the given + tz. + + Parameters + ---------- + dtype : dtype, str + tz : None, tzinfo + + Returns + ------- + tz : consensus tzinfo + + Raises + ------ + ValueError : on tzinfo mismatch + """ + if dtype is not None: + if isinstance(dtype, str): + try: + dtype = DatetimeTZDtype.construct_from_string(dtype) + except TypeError: + # Things like `datetime64[ns]`, which is OK for the + # constructors, but also nonsense, which should be validated + # but not by us. We *do* allow non-existent tz errors to + # go through + pass + dtz = getattr(dtype, "tz", None) + if dtz is not None: + if tz is not None and not timezones.tz_compare(tz, dtz): + raise ValueError("cannot supply both a tz and a dtype with a tz") + tz = dtz + + if tz is not None and is_datetime64_dtype(dtype): + # We also need to check for the case where the user passed a + # tz-naive dtype (i.e. datetime64[ns]) + if tz is not None and not timezones.tz_compare(tz, dtz): + raise ValueError( + "cannot supply both a tz and a " + "timezone-naive dtype (i.e. datetime64[ns])" + ) + + return tz + + +def _infer_tz_from_endpoints( + start: Timestamp, end: Timestamp, tz: tzinfo | None +) -> tzinfo | None: + """ + If a timezone is not explicitly given via `tz`, see if one can + be inferred from the `start` and `end` endpoints. If more than one + of these inputs provides a timezone, require that they all agree. + + Parameters + ---------- + start : Timestamp + end : Timestamp + tz : tzinfo or None + + Returns + ------- + tz : tzinfo or None + + Raises + ------ + TypeError : if start and end timezones do not agree + """ + try: + inferred_tz = timezones.infer_tzinfo(start, end) + except AssertionError as err: + # infer_tzinfo raises AssertionError if passed mismatched timezones + raise TypeError( + "Start and end cannot both be tz-aware with different timezones" + ) from err + + inferred_tz = timezones.maybe_get_tz(inferred_tz) + tz = timezones.maybe_get_tz(tz) + + if tz is not None and inferred_tz is not None: + if not timezones.tz_compare(inferred_tz, tz): + raise AssertionError("Inferred time zone not equal to passed time zone") + + elif inferred_tz is not None: + tz = inferred_tz + + return tz + + +def _maybe_normalize_endpoints( + start: Timestamp | None, end: Timestamp | None, normalize: bool +): + _normalized = True + + if start is not None: + if normalize: + start = start.normalize() + _normalized = True + else: + _normalized = _normalized and start.time() == _midnight + + if end is not None: + if normalize: + end = end.normalize() + _normalized = True + else: + _normalized = _normalized and end.time() == _midnight + + return start, end, _normalized + + +def _maybe_localize_point(ts, is_none, is_not_none, freq, tz, ambiguous, nonexistent): + """ + Localize a start or end Timestamp to the timezone of the corresponding + start or end Timestamp + + Parameters + ---------- + ts : start or end Timestamp to potentially localize + is_none : argument that should be None + is_not_none : argument that should not be None + freq : Tick, DateOffset, or None + tz : str, timezone object or None + ambiguous: str, localization behavior for ambiguous times + nonexistent: str, localization behavior for nonexistent times + + Returns + ------- + ts : Timestamp + """ + # Make sure start and end are timezone localized if: + # 1) freq = a Timedelta-like frequency (Tick) + # 2) freq = None i.e. generating a linspaced range + if is_none is None and is_not_none is not None: + # Note: We can't ambiguous='infer' a singular ambiguous time; however, + # we have historically defaulted ambiguous=False + ambiguous = ambiguous if ambiguous != "infer" else False + localize_args = {"ambiguous": ambiguous, "nonexistent": nonexistent, "tz": None} + if isinstance(freq, Tick) or freq is None: + localize_args["tz"] = tz + ts = ts.tz_localize(**localize_args) + return ts + + +def generate_range(start=None, end=None, periods=None, offset=BDay()): + """ + Generates a sequence of dates corresponding to the specified time + offset. Similar to dateutil.rrule except uses pandas DateOffset + objects to represent time increments. + + Parameters + ---------- + start : datetime, (default None) + end : datetime, (default None) + periods : int, (default None) + offset : DateOffset, (default BDay()) + + Notes + ----- + * This method is faster for generating weekdays than dateutil.rrule + * At least two of (start, end, periods) must be specified. + * If both start and end are specified, the returned dates will + satisfy start <= date <= end. + + Returns + ------- + dates : generator object + """ + offset = to_offset(offset) + + start = Timestamp(start) + start = start if start is not NaT else None + end = Timestamp(end) + end = end if end is not NaT else None + + if start and not offset.is_on_offset(start): + start = offset.rollforward(start) + + elif end and not offset.is_on_offset(end): + end = offset.rollback(end) + + if periods is None and end < start and offset.n >= 0: + end = None + periods = 0 + + if end is None: + end = start + (periods - 1) * offset + + if start is None: + start = end - (periods - 1) * offset + + cur = start + if offset.n >= 0: + while cur <= end: + yield cur + + if cur == end: + # GH#24252 avoid overflows by not performing the addition + # in offset.apply unless we have to + break + + # faster than cur + offset + next_date = offset._apply(cur) + if next_date <= cur: + raise ValueError(f"Offset {offset} did not increment date") + cur = next_date + else: + while cur >= end: + yield cur + + if cur == end: + # GH#24252 avoid overflows by not performing the addition + # in offset.apply unless we have to + break + + # faster than cur + offset + next_date = offset._apply(cur) + if next_date >= cur: + raise ValueError(f"Offset {offset} did not decrement date") + cur = next_date diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/floating.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/floating.py new file mode 100644 index 0000000..e1f80c5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/floating.py @@ -0,0 +1,375 @@ +from __future__ import annotations + +from typing import overload + +import numpy as np + +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._typing import ( + ArrayLike, + AstypeArg, + DtypeObj, + npt, +) +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.cast import astype_nansafe +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_datetime64_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + register_extension_dtype, +) + +from pandas.core.arrays import ExtensionArray +from pandas.core.arrays.numeric import ( + NumericArray, + NumericDtype, +) +from pandas.core.tools.numeric import to_numeric + + +class FloatingDtype(NumericDtype): + """ + An ExtensionDtype to hold a single size of floating dtype. + + These specific implementations are subclasses of the non-public + FloatingDtype. For example we have Float32Dtype to represent float32. + + The attributes name & type are set when these subclasses are created. + """ + + def __repr__(self) -> str: + return f"{self.name}Dtype()" + + @property + def _is_numeric(self) -> bool: + return True + + @classmethod + def construct_array_type(cls) -> type[FloatingArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return FloatingArray + + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + # for now only handle other floating types + if not all(isinstance(t, FloatingDtype) for t in dtypes): + return None + np_dtype = np.find_common_type( + # error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype]" has no + # attribute "numpy_dtype" + [t.numpy_dtype for t in dtypes], # type: ignore[union-attr] + [], + ) + if np.issubdtype(np_dtype, np.floating): + return FLOAT_STR_TO_DTYPE[str(np_dtype)] + return None + + +def coerce_to_array( + values, dtype=None, mask=None, copy: bool = False +) -> tuple[np.ndarray, np.ndarray]: + """ + Coerce the input values array to numpy arrays with a mask. + + Parameters + ---------- + values : 1D list-like + dtype : float dtype + mask : bool 1D array, optional + copy : bool, default False + if True, copy the input + + Returns + ------- + tuple of (values, mask) + """ + # if values is floating numpy array, preserve its dtype + if dtype is None and hasattr(values, "dtype"): + if is_float_dtype(values.dtype): + dtype = values.dtype + + if dtype is not None: + if isinstance(dtype, str) and dtype.startswith("Float"): + # Avoid DeprecationWarning from NumPy about np.dtype("Float64") + # https://github.com/numpy/numpy/pull/7476 + dtype = dtype.lower() + + if not issubclass(type(dtype), FloatingDtype): + try: + dtype = FLOAT_STR_TO_DTYPE[str(np.dtype(dtype))] + except KeyError as err: + raise ValueError(f"invalid dtype specified {dtype}") from err + + if isinstance(values, FloatingArray): + values, mask = values._data, values._mask + if dtype is not None: + values = values.astype(dtype.numpy_dtype, copy=False) + + if copy: + values = values.copy() + mask = mask.copy() + return values, mask + + values = np.array(values, copy=copy) + if is_object_dtype(values.dtype): + inferred_type = lib.infer_dtype(values, skipna=True) + if inferred_type == "empty": + pass + elif inferred_type not in [ + "floating", + "integer", + "mixed-integer", + "integer-na", + "mixed-integer-float", + ]: + raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype") + + elif is_bool_dtype(values) and is_float_dtype(dtype): + values = np.array(values, dtype=float, copy=copy) + + elif not (is_integer_dtype(values) or is_float_dtype(values)): + raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype") + + if values.ndim != 1: + raise TypeError("values must be a 1D list-like") + + if mask is None: + mask = libmissing.is_numeric_na(values) + + else: + assert len(mask) == len(values) + + if not mask.ndim == 1: + raise TypeError("mask must be a 1D list-like") + + # infer dtype if needed + if dtype is None: + dtype = np.dtype("float64") + else: + dtype = dtype.type + + # if we are float, let's make sure that we can + # safely cast + + # we copy as need to coerce here + # TODO should this be a safe cast? + if mask.any(): + values = values.copy() + values[mask] = np.nan + values = values.astype(dtype, copy=False) # , casting="safe") + + return values, mask + + +class FloatingArray(NumericArray): + """ + Array of floating (optional missing) values. + + .. versionadded:: 1.2.0 + + .. warning:: + + FloatingArray is currently experimental, and its API or internal + implementation may change without warning. Especially the behaviour + regarding NaN (distinct from NA missing values) is subject to change. + + We represent a FloatingArray with 2 numpy arrays: + + - data: contains a numpy float array of the appropriate dtype + - mask: a boolean array holding a mask on the data, True is missing + + To construct an FloatingArray from generic array-like input, use + :func:`pandas.array` with one of the float dtypes (see examples). + + See :ref:`integer_na` for more. + + Parameters + ---------- + values : numpy.ndarray + A 1-d float-dtype array. + mask : numpy.ndarray + A 1-d boolean-dtype array indicating missing values. + copy : bool, default False + Whether to copy the `values` and `mask`. + + Attributes + ---------- + None + + Methods + ------- + None + + Returns + ------- + FloatingArray + + Examples + -------- + Create an FloatingArray with :func:`pandas.array`: + + >>> pd.array([0.1, None, 0.3], dtype=pd.Float32Dtype()) + + [0.1, , 0.3] + Length: 3, dtype: Float32 + + String aliases for the dtypes are also available. They are capitalized. + + >>> pd.array([0.1, None, 0.3], dtype="Float32") + + [0.1, , 0.3] + Length: 3, dtype: Float32 + """ + + # The value used to fill '_data' to avoid upcasting + _internal_fill_value = 0.0 + # Fill values used for any/all + _truthy_value = 1.0 + _falsey_value = 0.0 + + @cache_readonly + def dtype(self) -> FloatingDtype: + return FLOAT_STR_TO_DTYPE[str(self._data.dtype)] + + def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): + if not (isinstance(values, np.ndarray) and values.dtype.kind == "f"): + raise TypeError( + "values should be floating numpy array. Use " + "the 'pd.array' function instead" + ) + if values.dtype == np.float16: + # If we don't raise here, then accessing self.dtype would raise + raise TypeError("FloatingArray does not support np.float16 dtype.") + + super().__init__(values, mask, copy=copy) + + @classmethod + def _from_sequence( + cls, scalars, *, dtype=None, copy: bool = False + ) -> FloatingArray: + values, mask = coerce_to_array(scalars, dtype=dtype, copy=copy) + return FloatingArray(values, mask) + + @classmethod + def _from_sequence_of_strings( + cls, strings, *, dtype=None, copy: bool = False + ) -> FloatingArray: + scalars = to_numeric(strings, errors="raise") + return cls._from_sequence(scalars, dtype=dtype, copy=copy) + + def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]: + return coerce_to_array(value, dtype=self.dtype) + + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + """ + Cast to a NumPy array or ExtensionArray with 'dtype'. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + copy : bool, default True + Whether to copy the data, even if not necessary. If False, + a copy is made only if the old dtype does not match the + new dtype. + + Returns + ------- + ndarray or ExtensionArray + NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with + 'dtype' for its dtype. + + Raises + ------ + TypeError + if incompatible type with an FloatingDtype, equivalent of same_kind + casting + """ + dtype = pandas_dtype(dtype) + + if isinstance(dtype, ExtensionDtype): + return super().astype(dtype, copy=copy) + + # coerce + if is_float_dtype(dtype): + # In astype, we consider dtype=float to also mean na_value=np.nan + kwargs = {"na_value": np.nan} + elif is_datetime64_dtype(dtype): + # error: Dict entry 0 has incompatible type "str": "datetime64"; expected + # "str": "float" + kwargs = {"na_value": np.datetime64("NaT")} # type: ignore[dict-item] + else: + kwargs = {} + + # error: Argument 2 to "to_numpy" of "BaseMaskedArray" has incompatible + # type "**Dict[str, float]"; expected "bool" + data = self.to_numpy(dtype=dtype, **kwargs) # type: ignore[arg-type] + return astype_nansafe(data, dtype, copy=False) + + def _values_for_argsort(self) -> np.ndarray: + return self._data + + +_dtype_docstring = """ +An ExtensionDtype for {dtype} data. + +This dtype uses ``pd.NA`` as missing value indicator. + +Attributes +---------- +None + +Methods +------- +None +""" + +# create the Dtype + + +@register_extension_dtype +class Float32Dtype(FloatingDtype): + type = np.float32 + name = "Float32" + __doc__ = _dtype_docstring.format(dtype="float32") + + +@register_extension_dtype +class Float64Dtype(FloatingDtype): + type = np.float64 + name = "Float64" + __doc__ = _dtype_docstring.format(dtype="float64") + + +FLOAT_STR_TO_DTYPE = { + "float32": Float32Dtype(), + "float64": Float64Dtype(), +} diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/integer.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/integer.py new file mode 100644 index 0000000..443dfa4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/integer.py @@ -0,0 +1,497 @@ +from __future__ import annotations + +from typing import overload + +import numpy as np + +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._typing import ( + ArrayLike, + AstypeArg, + Dtype, + DtypeObj, + npt, +) +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.base import ( + ExtensionDtype, + register_extension_dtype, +) +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_datetime64_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + is_string_dtype, + pandas_dtype, +) + +from pandas.core.arrays import ExtensionArray +from pandas.core.arrays.masked import BaseMaskedDtype +from pandas.core.arrays.numeric import ( + NumericArray, + NumericDtype, +) +from pandas.core.tools.numeric import to_numeric + + +class _IntegerDtype(NumericDtype): + """ + An ExtensionDtype to hold a single size & kind of integer dtype. + + These specific implementations are subclasses of the non-public + _IntegerDtype. For example we have Int8Dtype to represent signed int 8s. + + The attributes name & type are set when these subclasses are created. + """ + + def __repr__(self) -> str: + sign = "U" if self.is_unsigned_integer else "" + return f"{sign}Int{8 * self.itemsize}Dtype()" + + @cache_readonly + def is_signed_integer(self) -> bool: + return self.kind == "i" + + @cache_readonly + def is_unsigned_integer(self) -> bool: + return self.kind == "u" + + @property + def _is_numeric(self) -> bool: + return True + + @classmethod + def construct_array_type(cls) -> type[IntegerArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return IntegerArray + + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + # we only handle nullable EA dtypes and numeric numpy dtypes + if not all( + isinstance(t, BaseMaskedDtype) + or ( + isinstance(t, np.dtype) + and (np.issubdtype(t, np.number) or np.issubdtype(t, np.bool_)) + ) + for t in dtypes + ): + return None + np_dtype = np.find_common_type( + # error: List comprehension has incompatible type List[Union[Any, + # dtype, ExtensionDtype]]; expected List[Union[dtype, None, type, + # _SupportsDtype, str, Tuple[Any, Union[int, Sequence[int]]], + # List[Any], _DtypeDict, Tuple[Any, Any]]] + [ + t.numpy_dtype # type: ignore[misc] + if isinstance(t, BaseMaskedDtype) + else t + for t in dtypes + ], + [], + ) + if np.issubdtype(np_dtype, np.integer): + return INT_STR_TO_DTYPE[str(np_dtype)] + elif np.issubdtype(np_dtype, np.floating): + from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE + + return FLOAT_STR_TO_DTYPE[str(np_dtype)] + return None + + +def safe_cast(values, dtype, copy: bool): + """ + Safely cast the values to the dtype if they + are equivalent, meaning floats must be equivalent to the + ints. + """ + try: + return values.astype(dtype, casting="safe", copy=copy) + except TypeError as err: + casted = values.astype(dtype, copy=copy) + if (casted == values).all(): + return casted + + raise TypeError( + f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}" + ) from err + + +def coerce_to_array( + values, dtype, mask=None, copy: bool = False +) -> tuple[np.ndarray, np.ndarray]: + """ + Coerce the input values array to numpy arrays with a mask. + + Parameters + ---------- + values : 1D list-like + dtype : integer dtype + mask : bool 1D array, optional + copy : bool, default False + if True, copy the input + + Returns + ------- + tuple of (values, mask) + """ + # if values is integer numpy array, preserve its dtype + if dtype is None and hasattr(values, "dtype"): + if is_integer_dtype(values.dtype): + dtype = values.dtype + + if dtype is not None: + if isinstance(dtype, str) and ( + dtype.startswith("Int") or dtype.startswith("UInt") + ): + # Avoid DeprecationWarning from NumPy about np.dtype("Int64") + # https://github.com/numpy/numpy/pull/7476 + dtype = dtype.lower() + + if not issubclass(type(dtype), _IntegerDtype): + try: + dtype = INT_STR_TO_DTYPE[str(np.dtype(dtype))] + except KeyError as err: + raise ValueError(f"invalid dtype specified {dtype}") from err + + if isinstance(values, IntegerArray): + values, mask = values._data, values._mask + if dtype is not None: + values = values.astype(dtype.numpy_dtype, copy=False) + + if copy: + values = values.copy() + mask = mask.copy() + return values, mask + + values = np.array(values, copy=copy) + inferred_type = None + if is_object_dtype(values.dtype) or is_string_dtype(values.dtype): + inferred_type = lib.infer_dtype(values, skipna=True) + if inferred_type == "empty": + pass + elif inferred_type not in [ + "floating", + "integer", + "mixed-integer", + "integer-na", + "mixed-integer-float", + "string", + "unicode", + ]: + raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype") + + elif is_bool_dtype(values) and is_integer_dtype(dtype): + values = np.array(values, dtype=int, copy=copy) + + elif not (is_integer_dtype(values) or is_float_dtype(values)): + raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype") + + if values.ndim != 1: + raise TypeError("values must be a 1D list-like") + + if mask is None: + mask = libmissing.is_numeric_na(values) + else: + assert len(mask) == len(values) + + if mask.ndim != 1: + raise TypeError("mask must be a 1D list-like") + + # infer dtype if needed + if dtype is None: + dtype = np.dtype("int64") + else: + dtype = dtype.type + + # if we are float, let's make sure that we can + # safely cast + + # we copy as need to coerce here + if mask.any(): + values = values.copy() + values[mask] = 1 + if inferred_type in ("string", "unicode"): + # casts from str are always safe since they raise + # a ValueError if the str cannot be parsed into an int + values = values.astype(dtype, copy=copy) + else: + values = safe_cast(values, dtype, copy=False) + + return values, mask + + +class IntegerArray(NumericArray): + """ + Array of integer (optional missing) values. + + .. versionchanged:: 1.0.0 + + Now uses :attr:`pandas.NA` as the missing value rather + than :attr:`numpy.nan`. + + .. warning:: + + IntegerArray is currently experimental, and its API or internal + implementation may change without warning. + + We represent an IntegerArray with 2 numpy arrays: + + - data: contains a numpy integer array of the appropriate dtype + - mask: a boolean array holding a mask on the data, True is missing + + To construct an IntegerArray from generic array-like input, use + :func:`pandas.array` with one of the integer dtypes (see examples). + + See :ref:`integer_na` for more. + + Parameters + ---------- + values : numpy.ndarray + A 1-d integer-dtype array. + mask : numpy.ndarray + A 1-d boolean-dtype array indicating missing values. + copy : bool, default False + Whether to copy the `values` and `mask`. + + Attributes + ---------- + None + + Methods + ------- + None + + Returns + ------- + IntegerArray + + Examples + -------- + Create an IntegerArray with :func:`pandas.array`. + + >>> int_array = pd.array([1, None, 3], dtype=pd.Int32Dtype()) + >>> int_array + + [1, , 3] + Length: 3, dtype: Int32 + + String aliases for the dtypes are also available. They are capitalized. + + >>> pd.array([1, None, 3], dtype='Int32') + + [1, , 3] + Length: 3, dtype: Int32 + + >>> pd.array([1, None, 3], dtype='UInt16') + + [1, , 3] + Length: 3, dtype: UInt16 + """ + + # The value used to fill '_data' to avoid upcasting + _internal_fill_value = 1 + # Fill values used for any/all + _truthy_value = 1 + _falsey_value = 0 + + @cache_readonly + def dtype(self) -> _IntegerDtype: + return INT_STR_TO_DTYPE[str(self._data.dtype)] + + def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): + if not (isinstance(values, np.ndarray) and values.dtype.kind in ["i", "u"]): + raise TypeError( + "values should be integer numpy array. Use " + "the 'pd.array' function instead" + ) + super().__init__(values, mask, copy=copy) + + @classmethod + def _from_sequence( + cls, scalars, *, dtype: Dtype | None = None, copy: bool = False + ) -> IntegerArray: + values, mask = coerce_to_array(scalars, dtype=dtype, copy=copy) + return IntegerArray(values, mask) + + @classmethod + def _from_sequence_of_strings( + cls, strings, *, dtype: Dtype | None = None, copy: bool = False + ) -> IntegerArray: + scalars = to_numeric(strings, errors="raise") + return cls._from_sequence(scalars, dtype=dtype, copy=copy) + + def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]: + return coerce_to_array(value, dtype=self.dtype) + + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + """ + Cast to a NumPy array or ExtensionArray with 'dtype'. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + copy : bool, default True + Whether to copy the data, even if not necessary. If False, + a copy is made only if the old dtype does not match the + new dtype. + + Returns + ------- + ndarray or ExtensionArray + NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype. + + Raises + ------ + TypeError + if incompatible type with an IntegerDtype, equivalent of same_kind + casting + """ + dtype = pandas_dtype(dtype) + + if isinstance(dtype, ExtensionDtype): + return super().astype(dtype, copy=copy) + + na_value: float | np.datetime64 | lib.NoDefault + + # coerce + if is_float_dtype(dtype): + # In astype, we consider dtype=float to also mean na_value=np.nan + na_value = np.nan + elif is_datetime64_dtype(dtype): + na_value = np.datetime64("NaT") + else: + na_value = lib.no_default + + return self.to_numpy(dtype=dtype, na_value=na_value, copy=False) + + def _values_for_argsort(self) -> np.ndarray: + """ + Return values for sorting. + + Returns + ------- + ndarray + The transformed values should maintain the ordering between values + within the array. + + See Also + -------- + ExtensionArray.argsort : Return the indices that would sort this array. + """ + data = self._data.copy() + if self._mask.any(): + data[self._mask] = data.min() - 1 + return data + + +_dtype_docstring = """ +An ExtensionDtype for {dtype} integer data. + +.. versionchanged:: 1.0.0 + + Now uses :attr:`pandas.NA` as its missing value, + rather than :attr:`numpy.nan`. + +Attributes +---------- +None + +Methods +------- +None +""" + +# create the Dtype + + +@register_extension_dtype +class Int8Dtype(_IntegerDtype): + type = np.int8 + name = "Int8" + __doc__ = _dtype_docstring.format(dtype="int8") + + +@register_extension_dtype +class Int16Dtype(_IntegerDtype): + type = np.int16 + name = "Int16" + __doc__ = _dtype_docstring.format(dtype="int16") + + +@register_extension_dtype +class Int32Dtype(_IntegerDtype): + type = np.int32 + name = "Int32" + __doc__ = _dtype_docstring.format(dtype="int32") + + +@register_extension_dtype +class Int64Dtype(_IntegerDtype): + type = np.int64 + name = "Int64" + __doc__ = _dtype_docstring.format(dtype="int64") + + +@register_extension_dtype +class UInt8Dtype(_IntegerDtype): + type = np.uint8 + name = "UInt8" + __doc__ = _dtype_docstring.format(dtype="uint8") + + +@register_extension_dtype +class UInt16Dtype(_IntegerDtype): + type = np.uint16 + name = "UInt16" + __doc__ = _dtype_docstring.format(dtype="uint16") + + +@register_extension_dtype +class UInt32Dtype(_IntegerDtype): + type = np.uint32 + name = "UInt32" + __doc__ = _dtype_docstring.format(dtype="uint32") + + +@register_extension_dtype +class UInt64Dtype(_IntegerDtype): + type = np.uint64 + name = "UInt64" + __doc__ = _dtype_docstring.format(dtype="uint64") + + +INT_STR_TO_DTYPE: dict[str, _IntegerDtype] = { + "int8": Int8Dtype(), + "int16": Int16Dtype(), + "int32": Int32Dtype(), + "int64": Int64Dtype(), + "uint8": UInt8Dtype(), + "uint16": UInt16Dtype(), + "uint32": UInt32Dtype(), + "uint64": UInt64Dtype(), +} diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/interval.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/interval.py new file mode 100644 index 0000000..9a1435c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/interval.py @@ -0,0 +1,1727 @@ +from __future__ import annotations + +import operator +from operator import ( + le, + lt, +) +import textwrap +from typing import ( + Sequence, + TypeVar, + Union, + cast, + overload, +) + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import ( + NaT, + lib, +) +from pandas._libs.interval import ( + VALID_CLOSED, + Interval, + IntervalMixin, + intervals_to_interval_bounds, +) +from pandas._libs.missing import NA +from pandas._typing import ( + ArrayLike, + Dtype, + NpDtype, + PositionalIndexer, + ScalarIndexer, + SequenceIndexer, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_float_dtype, + is_integer_dtype, + is_interval_dtype, + is_list_like, + is_object_dtype, + is_scalar, + is_string_dtype, + is_timedelta64_dtype, + needs_i8_conversion, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import IntervalDtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCDatetimeIndex, + ABCIntervalIndex, + ABCPeriodIndex, +) +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + notna, +) + +from pandas.core.algorithms import ( + isin, + take, + unique, + value_counts, +) +from pandas.core.arrays.base import ( + ExtensionArray, + _extension_array_shared_docs, +) +import pandas.core.common as com +from pandas.core.construction import ( + array as pd_array, + ensure_wrapped_if_datetimelike, + extract_array, +) +from pandas.core.indexers import check_array_indexer +from pandas.core.indexes.base import ensure_index +from pandas.core.ops import ( + invalid_comparison, + unpack_zerodim_and_defer, +) + +IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray") +IntervalOrNA = Union[Interval, float] + +_interval_shared_docs: dict[str, str] = {} + +_shared_docs_kwargs = { + "klass": "IntervalArray", + "qualname": "arrays.IntervalArray", + "name": "", +} + + +_interval_shared_docs[ + "class" +] = """ +%(summary)s + +.. versionadded:: %(versionadded)s + +Parameters +---------- +data : array-like (1-dimensional) + Array-like containing Interval objects from which to build the + %(klass)s. +closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both or + neither. +dtype : dtype or None, default None + If None, dtype will be inferred. +copy : bool, default False + Copy the input data. +%(name)s\ +verify_integrity : bool, default True + Verify that the %(klass)s is valid. + +Attributes +---------- +left +right +closed +mid +length +is_empty +is_non_overlapping_monotonic +%(extra_attributes)s\ + +Methods +------- +from_arrays +from_tuples +from_breaks +contains +overlaps +set_closed +to_tuples +%(extra_methods)s\ + +See Also +-------- +Index : The base pandas Index type. +Interval : A bounded slice-like interval; the elements of an %(klass)s. +interval_range : Function to create a fixed frequency IntervalIndex. +cut : Bin values into discrete Intervals. +qcut : Bin values into equal-sized Intervals based on rank or sample quantiles. + +Notes +----- +See the `user guide +`__ +for more. + +%(examples)s\ +""" + + +@Appender( + _interval_shared_docs["class"] + % { + "klass": "IntervalArray", + "summary": "Pandas array for interval data that are closed on the same side.", + "versionadded": "0.24.0", + "name": "", + "extra_attributes": "", + "extra_methods": "", + "examples": textwrap.dedent( + """\ + Examples + -------- + A new ``IntervalArray`` can be constructed directly from an array-like of + ``Interval`` objects: + + >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) + + [(0, 1], (1, 5]] + Length: 2, dtype: interval[int64, right] + + It may also be constructed using one of the constructor + methods: :meth:`IntervalArray.from_arrays`, + :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`. + """ + ), + } +) +class IntervalArray(IntervalMixin, ExtensionArray): + ndim = 1 + can_hold_na = True + _na_value = _fill_value = np.nan + + # --------------------------------------------------------------------- + # Constructors + + def __new__( + cls: type[IntervalArrayT], + data, + closed=None, + dtype: Dtype | None = None, + copy: bool = False, + verify_integrity: bool = True, + ): + + data = extract_array(data, extract_numpy=True) + + if isinstance(data, cls): + left = data._left + right = data._right + closed = closed or data.closed + else: + + # don't allow scalars + if is_scalar(data): + msg = ( + f"{cls.__name__}(...) must be called with a collection " + f"of some kind, {data} was passed" + ) + raise TypeError(msg) + + # might need to convert empty or purely na data + data = _maybe_convert_platform_interval(data) + left, right, infer_closed = intervals_to_interval_bounds( + data, validate_closed=closed is None + ) + if left.dtype == object: + left = lib.maybe_convert_objects(left) + right = lib.maybe_convert_objects(right) + closed = closed or infer_closed + + return cls._simple_new( + left, + right, + closed, + copy=copy, + dtype=dtype, + verify_integrity=verify_integrity, + ) + + @classmethod + def _simple_new( + cls: type[IntervalArrayT], + left, + right, + closed=None, + copy: bool = False, + dtype: Dtype | None = None, + verify_integrity: bool = True, + ) -> IntervalArrayT: + result = IntervalMixin.__new__(cls) + + if closed is None and isinstance(dtype, IntervalDtype): + closed = dtype.closed + + closed = closed or "right" + left = ensure_index(left, copy=copy) + right = ensure_index(right, copy=copy) + + if dtype is not None: + # GH 19262: dtype must be an IntervalDtype to override inferred + dtype = pandas_dtype(dtype) + if is_interval_dtype(dtype): + dtype = cast(IntervalDtype, dtype) + if dtype.subtype is not None: + left = left.astype(dtype.subtype) + right = right.astype(dtype.subtype) + else: + msg = f"dtype must be an IntervalDtype, got {dtype}" + raise TypeError(msg) + + if dtype.closed is None: + # possibly loading an old pickle + dtype = IntervalDtype(dtype.subtype, closed) + elif closed != dtype.closed: + raise ValueError("closed keyword does not match dtype.closed") + + # coerce dtypes to match if needed + if is_float_dtype(left) and is_integer_dtype(right): + right = right.astype(left.dtype) + elif is_float_dtype(right) and is_integer_dtype(left): + left = left.astype(right.dtype) + + if type(left) != type(right): + msg = ( + f"must not have differing left [{type(left).__name__}] and " + f"right [{type(right).__name__}] types" + ) + raise ValueError(msg) + elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype): + # GH 19016 + msg = ( + "category, object, and string subtypes are not supported " + "for IntervalArray" + ) + raise TypeError(msg) + elif isinstance(left, ABCPeriodIndex): + msg = "Period dtypes are not supported, use a PeriodIndex instead" + raise ValueError(msg) + elif isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz): + msg = ( + "left and right must have the same time zone, got " + f"'{left.tz}' and '{right.tz}'" + ) + raise ValueError(msg) + + # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray + left = ensure_wrapped_if_datetimelike(left) + left = extract_array(left, extract_numpy=True) + right = ensure_wrapped_if_datetimelike(right) + right = extract_array(right, extract_numpy=True) + + lbase = getattr(left, "_ndarray", left).base + rbase = getattr(right, "_ndarray", right).base + if lbase is not None and lbase is rbase: + # If these share data, then setitem could corrupt our IA + right = right.copy() + + dtype = IntervalDtype(left.dtype, closed=closed) + result._dtype = dtype + + result._left = left + result._right = right + if verify_integrity: + result._validate() + return result + + @classmethod + def _from_sequence( + cls: type[IntervalArrayT], + scalars, + *, + dtype: Dtype | None = None, + copy: bool = False, + ) -> IntervalArrayT: + return cls(scalars, dtype=dtype, copy=copy) + + @classmethod + def _from_factorized( + cls: type[IntervalArrayT], values: np.ndarray, original: IntervalArrayT + ) -> IntervalArrayT: + if len(values) == 0: + # An empty array returns object-dtype here. We can't create + # a new IA from an (empty) object-dtype array, so turn it into the + # correct dtype. + values = values.astype(original.dtype.subtype) + return cls(values, closed=original.closed) + + _interval_shared_docs["from_breaks"] = textwrap.dedent( + """ + Construct an %(klass)s from an array of splits. + + Parameters + ---------- + breaks : array-like (1-dimensional) + Left and right bounds for each interval. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. + copy : bool, default False + Copy the data. + dtype : dtype or None, default None + If None, dtype will be inferred. + + Returns + ------- + %(klass)s + + See Also + -------- + interval_range : Function to create a fixed frequency IntervalIndex. + %(klass)s.from_arrays : Construct from a left and right array. + %(klass)s.from_tuples : Construct from a sequence of tuples. + + %(examples)s\ + """ + ) + + @classmethod + @Appender( + _interval_shared_docs["from_breaks"] + % { + "klass": "IntervalArray", + "examples": textwrap.dedent( + """\ + Examples + -------- + >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3]) + + [(0, 1], (1, 2], (2, 3]] + Length: 3, dtype: interval[int64, right] + """ + ), + } + ) + def from_breaks( + cls: type[IntervalArrayT], + breaks, + closed="right", + copy: bool = False, + dtype: Dtype | None = None, + ) -> IntervalArrayT: + breaks = _maybe_convert_platform_interval(breaks) + + return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype) + + _interval_shared_docs["from_arrays"] = textwrap.dedent( + """ + Construct from two arrays defining the left and right bounds. + + Parameters + ---------- + left : array-like (1-dimensional) + Left bounds for each interval. + right : array-like (1-dimensional) + Right bounds for each interval. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. + copy : bool, default False + Copy the data. + dtype : dtype, optional + If None, dtype will be inferred. + + Returns + ------- + %(klass)s + + Raises + ------ + ValueError + When a value is missing in only one of `left` or `right`. + When a value in `left` is greater than the corresponding value + in `right`. + + See Also + -------- + interval_range : Function to create a fixed frequency IntervalIndex. + %(klass)s.from_breaks : Construct an %(klass)s from an array of + splits. + %(klass)s.from_tuples : Construct an %(klass)s from an + array-like of tuples. + + Notes + ----- + Each element of `left` must be less than or equal to the `right` + element at the same position. If an element is missing, it must be + missing in both `left` and `right`. A TypeError is raised when + using an unsupported type for `left` or `right`. At the moment, + 'category', 'object', and 'string' subtypes are not supported. + + %(examples)s\ + """ + ) + + @classmethod + @Appender( + _interval_shared_docs["from_arrays"] + % { + "klass": "IntervalArray", + "examples": textwrap.dedent( + """\ + >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3]) + + [(0, 1], (1, 2], (2, 3]] + Length: 3, dtype: interval[int64, right] + """ + ), + } + ) + def from_arrays( + cls: type[IntervalArrayT], + left, + right, + closed="right", + copy: bool = False, + dtype: Dtype | None = None, + ) -> IntervalArrayT: + left = _maybe_convert_platform_interval(left) + right = _maybe_convert_platform_interval(right) + + return cls._simple_new( + left, right, closed, copy=copy, dtype=dtype, verify_integrity=True + ) + + _interval_shared_docs["from_tuples"] = textwrap.dedent( + """ + Construct an %(klass)s from an array-like of tuples. + + Parameters + ---------- + data : array-like (1-dimensional) + Array of tuples. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. + copy : bool, default False + By-default copy the data, this is compat only and ignored. + dtype : dtype or None, default None + If None, dtype will be inferred. + + Returns + ------- + %(klass)s + + See Also + -------- + interval_range : Function to create a fixed frequency IntervalIndex. + %(klass)s.from_arrays : Construct an %(klass)s from a left and + right array. + %(klass)s.from_breaks : Construct an %(klass)s from an array of + splits. + + %(examples)s\ + """ + ) + + @classmethod + @Appender( + _interval_shared_docs["from_tuples"] + % { + "klass": "IntervalArray", + "examples": textwrap.dedent( + """\ + Examples + -------- + >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)]) + + [(0, 1], (1, 2]] + Length: 2, dtype: interval[int64, right] + """ + ), + } + ) + def from_tuples( + cls: type[IntervalArrayT], + data, + closed="right", + copy: bool = False, + dtype: Dtype | None = None, + ) -> IntervalArrayT: + if len(data): + left, right = [], [] + else: + # ensure that empty data keeps input dtype + left = right = data + + for d in data: + if isna(d): + lhs = rhs = np.nan + else: + name = cls.__name__ + try: + # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...] + lhs, rhs = d + except ValueError as err: + msg = f"{name}.from_tuples requires tuples of length 2, got {d}" + raise ValueError(msg) from err + except TypeError as err: + msg = f"{name}.from_tuples received an invalid item, {d}" + raise TypeError(msg) from err + left.append(lhs) + right.append(rhs) + + return cls.from_arrays(left, right, closed, copy=False, dtype=dtype) + + def _validate(self): + """ + Verify that the IntervalArray is valid. + + Checks that + + * closed is valid + * left and right match lengths + * left and right have the same missing values + * left is always below right + """ + if self.closed not in VALID_CLOSED: + msg = f"invalid option for 'closed': {self.closed}" + raise ValueError(msg) + if len(self._left) != len(self._right): + msg = "left and right must have the same length" + raise ValueError(msg) + left_mask = notna(self._left) + right_mask = notna(self._right) + if not (left_mask == right_mask).all(): + msg = ( + "missing values must be missing in the same " + "location both left and right sides" + ) + raise ValueError(msg) + if not (self._left[left_mask] <= self._right[left_mask]).all(): + msg = "left side of interval must be <= right side" + raise ValueError(msg) + + def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT: + """ + Return a new IntervalArray with the replacement attributes + + Parameters + ---------- + left : Index + Values to be used for the left-side of the intervals. + right : Index + Values to be used for the right-side of the intervals. + """ + return self._simple_new(left, right, closed=self.closed, verify_integrity=False) + + # --------------------------------------------------------------------- + # Descriptive + + @property + def dtype(self) -> IntervalDtype: + return self._dtype + + @property + def nbytes(self) -> int: + return self.left.nbytes + self.right.nbytes + + @property + def size(self) -> int: + # Avoid materializing self.values + return self.left.size + + # --------------------------------------------------------------------- + # EA Interface + + def __iter__(self): + return iter(np.asarray(self)) + + def __len__(self) -> int: + return len(self._left) + + @overload + def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA: + ... + + @overload + def __getitem__(self: IntervalArrayT, key: SequenceIndexer) -> IntervalArrayT: + ... + + def __getitem__( + self: IntervalArrayT, key: PositionalIndexer + ) -> IntervalArrayT | IntervalOrNA: + key = check_array_indexer(self, key) + left = self._left[key] + right = self._right[key] + + if not isinstance(left, (np.ndarray, ExtensionArray)): + # scalar + if is_scalar(left) and isna(left): + return self._fill_value + return Interval(left, right, self.closed) + # error: Argument 1 to "ndim" has incompatible type "Union[ndarray, + # ExtensionArray]"; expected "Union[Union[int, float, complex, str, bytes, + # generic], Sequence[Union[int, float, complex, str, bytes, generic]], + # Sequence[Sequence[Any]], _SupportsArray]" + if np.ndim(left) > 1: # type: ignore[arg-type] + # GH#30588 multi-dimensional indexer disallowed + raise ValueError("multi-dimensional indexing not allowed") + return self._shallow_copy(left, right) + + def __setitem__(self, key, value): + value_left, value_right = self._validate_setitem_value(value) + key = check_array_indexer(self, key) + + self._left[key] = value_left + self._right[key] = value_right + + def _cmp_method(self, other, op): + # ensure pandas array for list-like and eliminate non-interval scalars + if is_list_like(other): + if len(self) != len(other): + raise ValueError("Lengths must match to compare") + other = pd_array(other) + elif not isinstance(other, Interval): + # non-interval scalar -> no matches + if other is NA: + # GH#31882 + from pandas.core.arrays import BooleanArray + + arr = np.empty(self.shape, dtype=bool) + mask = np.ones(self.shape, dtype=bool) + return BooleanArray(arr, mask) + return invalid_comparison(self, other, op) + + # determine the dtype of the elements we want to compare + if isinstance(other, Interval): + other_dtype = pandas_dtype("interval") + elif not is_categorical_dtype(other.dtype): + other_dtype = other.dtype + else: + # for categorical defer to categories for dtype + other_dtype = other.categories.dtype + + # extract intervals if we have interval categories with matching closed + if is_interval_dtype(other_dtype): + if self.closed != other.categories.closed: + return invalid_comparison(self, other, op) + + other = other.categories.take( + other.codes, allow_fill=True, fill_value=other.categories._na_value + ) + + # interval-like -> need same closed and matching endpoints + if is_interval_dtype(other_dtype): + if self.closed != other.closed: + return invalid_comparison(self, other, op) + elif not isinstance(other, Interval): + other = type(self)(other) + + if op is operator.eq: + return (self._left == other.left) & (self._right == other.right) + elif op is operator.ne: + return (self._left != other.left) | (self._right != other.right) + elif op is operator.gt: + return (self._left > other.left) | ( + (self._left == other.left) & (self._right > other.right) + ) + elif op is operator.ge: + return (self == other) | (self > other) + elif op is operator.lt: + return (self._left < other.left) | ( + (self._left == other.left) & (self._right < other.right) + ) + else: + # operator.lt + return (self == other) | (self < other) + + # non-interval/non-object dtype -> no matches + if not is_object_dtype(other_dtype): + return invalid_comparison(self, other, op) + + # object dtype -> iteratively check for intervals + result = np.zeros(len(self), dtype=bool) + for i, obj in enumerate(other): + try: + result[i] = op(self[i], obj) + except TypeError: + if obj is NA: + # comparison with np.nan returns NA + # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092 + result = result.astype(object) + result[i] = NA + else: + raise + return result + + @unpack_zerodim_and_defer("__eq__") + def __eq__(self, other): + return self._cmp_method(other, operator.eq) + + @unpack_zerodim_and_defer("__ne__") + def __ne__(self, other): + return self._cmp_method(other, operator.ne) + + @unpack_zerodim_and_defer("__gt__") + def __gt__(self, other): + return self._cmp_method(other, operator.gt) + + @unpack_zerodim_and_defer("__ge__") + def __ge__(self, other): + return self._cmp_method(other, operator.ge) + + @unpack_zerodim_and_defer("__lt__") + def __lt__(self, other): + return self._cmp_method(other, operator.lt) + + @unpack_zerodim_and_defer("__le__") + def __le__(self, other): + return self._cmp_method(other, operator.le) + + def argsort( + self, + ascending: bool = True, + kind: str = "quicksort", + na_position: str = "last", + *args, + **kwargs, + ) -> np.ndarray: + ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) + + if ascending and kind == "quicksort" and na_position == "last": + return np.lexsort((self.right, self.left)) + + # TODO: other cases we can use lexsort for? much more performant. + return super().argsort( + ascending=ascending, kind=kind, na_position=na_position, **kwargs + ) + + def min(self, *, axis: int | None = None, skipna: bool = True): + nv.validate_minmax_axis(axis, self.ndim) + + if not len(self): + return self._na_value + + mask = self.isna() + if mask.any(): + if not skipna: + return self._na_value + obj = self[~mask] + else: + obj = self + + indexer = obj.argsort()[0] + return obj[indexer] + + def max(self, *, axis: int | None = None, skipna: bool = True): + nv.validate_minmax_axis(axis, self.ndim) + + if not len(self): + return self._na_value + + mask = self.isna() + if mask.any(): + if not skipna: + return self._na_value + obj = self[~mask] + else: + obj = self + + indexer = obj.argsort()[-1] + return obj[indexer] + + def fillna( + self: IntervalArrayT, value=None, method=None, limit=None + ) -> IntervalArrayT: + """ + Fill NA/NaN values using the specified method. + + Parameters + ---------- + value : scalar, dict, Series + If a scalar value is passed it is used to fill all missing values. + Alternatively, a Series or dict can be used to fill in different + values for each index. The value should not be a list. The + value(s) passed should be either Interval objects or NA/NaN. + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + (Not implemented yet for IntervalArray) + Method to use for filling holes in reindexed Series + limit : int, default None + (Not implemented yet for IntervalArray) + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. + + Returns + ------- + filled : IntervalArray with NA/NaN filled + """ + if method is not None: + raise TypeError("Filling by method is not supported for IntervalArray.") + if limit is not None: + raise TypeError("limit is not supported for IntervalArray.") + + value_left, value_right = self._validate_scalar(value) + + left = self.left.fillna(value=value_left) + right = self.right.fillna(value=value_right) + return self._shallow_copy(left, right) + + def astype(self, dtype, copy: bool = True): + """ + Cast to an ExtensionArray or NumPy array with dtype 'dtype'. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + + copy : bool, default True + Whether to copy the data, even if not necessary. If False, + a copy is made only if the old dtype does not match the + new dtype. + + Returns + ------- + array : ExtensionArray or ndarray + ExtensionArray or NumPy ndarray with 'dtype' for its dtype. + """ + from pandas import Index + + if dtype is not None: + dtype = pandas_dtype(dtype) + + if is_interval_dtype(dtype): + if dtype == self.dtype: + return self.copy() if copy else self + + # need to cast to different subtype + try: + # We need to use Index rules for astype to prevent casting + # np.nan entries to int subtypes + new_left = Index(self._left, copy=False).astype(dtype.subtype) + new_right = Index(self._right, copy=False).astype(dtype.subtype) + except TypeError as err: + msg = ( + f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" + ) + raise TypeError(msg) from err + return self._shallow_copy(new_left, new_right) + else: + try: + return super().astype(dtype, copy=copy) + except (TypeError, ValueError) as err: + msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" + raise TypeError(msg) from err + + def equals(self, other) -> bool: + if type(self) != type(other): + return False + + return bool( + self.closed == other.closed + and self.left.equals(other.left) + and self.right.equals(other.right) + ) + + @classmethod + def _concat_same_type( + cls: type[IntervalArrayT], to_concat: Sequence[IntervalArrayT] + ) -> IntervalArrayT: + """ + Concatenate multiple IntervalArray + + Parameters + ---------- + to_concat : sequence of IntervalArray + + Returns + ------- + IntervalArray + """ + closed = {interval.closed for interval in to_concat} + if len(closed) != 1: + raise ValueError("Intervals must all be closed on the same side.") + closed = closed.pop() + + left = np.concatenate([interval.left for interval in to_concat]) + right = np.concatenate([interval.right for interval in to_concat]) + return cls._simple_new(left, right, closed=closed, copy=False) + + def copy(self: IntervalArrayT) -> IntervalArrayT: + """ + Return a copy of the array. + + Returns + ------- + IntervalArray + """ + left = self._left.copy() + right = self._right.copy() + closed = self.closed + # TODO: Could skip verify_integrity here. + return type(self).from_arrays(left, right, closed=closed) + + def isna(self) -> np.ndarray: + return isna(self._left) + + def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray: + if not len(self) or periods == 0: + return self.copy() + + if isna(fill_value): + fill_value = self.dtype.na_value + + # ExtensionArray.shift doesn't work for two reasons + # 1. IntervalArray.dtype.na_value may not be correct for the dtype. + # 2. IntervalArray._from_sequence only accepts NaN for missing values, + # not other values like NaT + + empty_len = min(abs(periods), len(self)) + if isna(fill_value): + from pandas import Index + + fill_value = Index(self._left, copy=False)._na_value + empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1)) + else: + empty = self._from_sequence([fill_value] * empty_len) + + if periods > 0: + a = empty + b = self[:-periods] + else: + a = self[abs(periods) :] + b = empty + return self._concat_same_type([a, b]) + + def take( + self: IntervalArrayT, + indices, + *, + allow_fill: bool = False, + fill_value=None, + axis=None, + **kwargs, + ) -> IntervalArrayT: + """ + Take elements from the IntervalArray. + + Parameters + ---------- + indices : sequence of integers + Indices to be taken. + + allow_fill : bool, default False + How to handle negative values in `indices`. + + * False: negative values in `indices` indicate positional indices + from the right (the default). This is similar to + :func:`numpy.take`. + + * True: negative values in `indices` indicate + missing values. These values are set to `fill_value`. Any other + other negative values raise a ``ValueError``. + + fill_value : Interval or NA, optional + Fill value to use for NA-indices when `allow_fill` is True. + This may be ``None``, in which case the default NA value for + the type, ``self.dtype.na_value``, is used. + + For many ExtensionArrays, there will be two representations of + `fill_value`: a user-facing "boxed" scalar, and a low-level + physical NA value. `fill_value` should be the user-facing version, + and the implementation should handle translating that to the + physical version for processing the take if necessary. + + axis : any, default None + Present for compat with IntervalIndex; does nothing. + + Returns + ------- + IntervalArray + + Raises + ------ + IndexError + When the indices are out of bounds for the array. + ValueError + When `indices` contains negative values other than ``-1`` + and `allow_fill` is True. + """ + nv.validate_take((), kwargs) + + fill_left = fill_right = fill_value + if allow_fill: + fill_left, fill_right = self._validate_scalar(fill_value) + + left_take = take( + self._left, indices, allow_fill=allow_fill, fill_value=fill_left + ) + right_take = take( + self._right, indices, allow_fill=allow_fill, fill_value=fill_right + ) + + return self._shallow_copy(left_take, right_take) + + def _validate_listlike(self, value): + # list-like of intervals + try: + array = IntervalArray(value) + self._check_closed_matches(array, name="value") + value_left, value_right = array.left, array.right + except TypeError as err: + # wrong type: not interval or NA + msg = f"'value' should be an interval type, got {type(value)} instead." + raise TypeError(msg) from err + + try: + self.left._validate_fill_value(value_left) + except (ValueError, TypeError) as err: + msg = ( + "'value' should be a compatible interval type, " + f"got {type(value)} instead." + ) + raise TypeError(msg) from err + + return value_left, value_right + + def _validate_scalar(self, value): + if isinstance(value, Interval): + self._check_closed_matches(value, name="value") + left, right = value.left, value.right + # TODO: check subdtype match like _validate_setitem_value? + elif is_valid_na_for_dtype(value, self.left.dtype): + # GH#18295 + left = right = value + else: + raise TypeError( + "can only insert Interval objects and NA into an IntervalArray" + ) + return left, right + + def _validate_setitem_value(self, value): + needs_float_conversion = False + + if is_valid_na_for_dtype(value, self.left.dtype): + # na value: need special casing to set directly on numpy arrays + if is_integer_dtype(self.dtype.subtype): + # can't set NaN on a numpy integer array + needs_float_conversion = True + elif is_datetime64_dtype(self.dtype.subtype): + # need proper NaT to set directly on the numpy array + value = np.datetime64("NaT") + elif is_datetime64tz_dtype(self.dtype.subtype): + # need proper NaT to set directly on the DatetimeArray array + value = NaT + elif is_timedelta64_dtype(self.dtype.subtype): + # need proper NaT to set directly on the numpy array + value = np.timedelta64("NaT") + value_left, value_right = value, value + + elif isinstance(value, Interval): + # scalar interval + self._check_closed_matches(value, name="value") + value_left, value_right = value.left, value.right + self.left._validate_fill_value(value_left) + self.left._validate_fill_value(value_right) + + else: + return self._validate_listlike(value) + + if needs_float_conversion: + raise ValueError("Cannot set float NaN to integer-backed IntervalArray") + return value_left, value_right + + def value_counts(self, dropna: bool = True): + """ + Returns a Series containing counts of each interval. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of NaN. + + Returns + ------- + counts : Series + + See Also + -------- + Series.value_counts + """ + # TODO: implement this is a non-naive way! + return value_counts(np.asarray(self), dropna=dropna) + + # --------------------------------------------------------------------- + # Rendering Methods + + def _format_data(self) -> str: + + # TODO: integrate with categorical and make generic + # name argument is unused here; just for compat with base / categorical + n = len(self) + max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10) + + formatter = str + + if n == 0: + summary = "[]" + elif n == 1: + first = formatter(self[0]) + summary = f"[{first}]" + elif n == 2: + first = formatter(self[0]) + last = formatter(self[-1]) + summary = f"[{first}, {last}]" + else: + + if n > max_seq_items: + n = min(max_seq_items // 2, 10) + head = [formatter(x) for x in self[:n]] + tail = [formatter(x) for x in self[-n:]] + head_str = ", ".join(head) + tail_str = ", ".join(tail) + summary = f"[{head_str} ... {tail_str}]" + else: + tail = [formatter(x) for x in self] + tail_str = ", ".join(tail) + summary = f"[{tail_str}]" + + return summary + + def __repr__(self) -> str: + # the short repr has no trailing newline, while the truncated + # repr does. So we include a newline in our template, and strip + # any trailing newlines from format_object_summary + data = self._format_data() + class_name = f"<{type(self).__name__}>\n" + + template = f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" + return template + + def _format_space(self) -> str: + space = " " * (len(type(self).__name__) + 1) + return f"\n{space}" + + # --------------------------------------------------------------------- + # Vectorized Interval Properties/Attributes + + @property + def left(self): + """ + Return the left endpoints of each Interval in the IntervalArray as + an Index. + """ + from pandas import Index + + return Index(self._left, copy=False) + + @property + def right(self): + """ + Return the right endpoints of each Interval in the IntervalArray as + an Index. + """ + from pandas import Index + + return Index(self._right, copy=False) + + @property + def length(self): + """ + Return an Index with entries denoting the length of each Interval in + the IntervalArray. + """ + return self.right - self.left + + @property + def mid(self): + """ + Return the midpoint of each Interval in the IntervalArray as an Index. + """ + try: + return 0.5 * (self.left + self.right) + except TypeError: + # datetime safe version + return self.left + 0.5 * self.length + + _interval_shared_docs["overlaps"] = textwrap.dedent( + """ + Check elementwise if an Interval overlaps the values in the %(klass)s. + + Two intervals overlap if they share a common point, including closed + endpoints. Intervals that only have an open endpoint in common do not + overlap. + + Parameters + ---------- + other : %(klass)s + Interval to check against for an overlap. + + Returns + ------- + ndarray + Boolean array positionally indicating where an overlap occurs. + + See Also + -------- + Interval.overlaps : Check whether two Interval objects overlap. + + Examples + -------- + %(examples)s + >>> intervals.overlaps(pd.Interval(0.5, 1.5)) + array([ True, True, False]) + + Intervals that share closed endpoints overlap: + + >>> intervals.overlaps(pd.Interval(1, 3, closed='left')) + array([ True, True, True]) + + Intervals that only have an open endpoint in common do not overlap: + + >>> intervals.overlaps(pd.Interval(1, 2, closed='right')) + array([False, True, False]) + """ + ) + + @Appender( + _interval_shared_docs["overlaps"] + % { + "klass": "IntervalArray", + "examples": textwrap.dedent( + """\ + >>> data = [(0, 1), (1, 3), (2, 4)] + >>> intervals = pd.arrays.IntervalArray.from_tuples(data) + >>> intervals + + [(0, 1], (1, 3], (2, 4]] + Length: 3, dtype: interval[int64, right] + """ + ), + } + ) + def overlaps(self, other): + if isinstance(other, (IntervalArray, ABCIntervalIndex)): + raise NotImplementedError + elif not isinstance(other, Interval): + msg = f"`other` must be Interval-like, got {type(other).__name__}" + raise TypeError(msg) + + # equality is okay if both endpoints are closed (overlap at a point) + op1 = le if (self.closed_left and other.closed_right) else lt + op2 = le if (other.closed_left and self.closed_right) else lt + + # overlaps is equivalent negation of two interval being disjoint: + # disjoint = (A.left > B.right) or (B.left > A.right) + # (simplifying the negation allows this to be done in less operations) + return op1(self.left, other.right) & op2(other.left, self.right) + + # --------------------------------------------------------------------- + + @property + def closed(self): + """ + Whether the intervals are closed on the left-side, right-side, both or + neither. + """ + return self.dtype.closed + + _interval_shared_docs["set_closed"] = textwrap.dedent( + """ + Return an %(klass)s identical to the current one, but closed on the + specified side. + + Parameters + ---------- + closed : {'left', 'right', 'both', 'neither'} + Whether the intervals are closed on the left-side, right-side, both + or neither. + + Returns + ------- + new_index : %(klass)s + + %(examples)s\ + """ + ) + + @Appender( + _interval_shared_docs["set_closed"] + % { + "klass": "IntervalArray", + "examples": textwrap.dedent( + """\ + Examples + -------- + >>> index = pd.arrays.IntervalArray.from_breaks(range(4)) + >>> index + + [(0, 1], (1, 2], (2, 3]] + Length: 3, dtype: interval[int64, right] + >>> index.set_closed('both') + + [[0, 1], [1, 2], [2, 3]] + Length: 3, dtype: interval[int64, both] + """ + ), + } + ) + def set_closed(self: IntervalArrayT, closed) -> IntervalArrayT: + if closed not in VALID_CLOSED: + msg = f"invalid option for 'closed': {closed}" + raise ValueError(msg) + + return type(self)._simple_new( + left=self._left, right=self._right, closed=closed, verify_integrity=False + ) + + _interval_shared_docs[ + "is_non_overlapping_monotonic" + ] = """ + Return True if the %(klass)s is non-overlapping (no Intervals share + points) and is either monotonic increasing or monotonic decreasing, + else False. + """ + + # https://github.com/python/mypy/issues/1362 + # Mypy does not support decorated properties + @property # type: ignore[misc] + @Appender( + _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs + ) + def is_non_overlapping_monotonic(self) -> bool: + # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... ) + # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...) + # we already require left <= right + + # strict inequality for closed == 'both'; equality implies overlapping + # at a point when both sides of intervals are included + if self.closed == "both": + return bool( + (self._right[:-1] < self._left[1:]).all() + or (self._left[:-1] > self._right[1:]).all() + ) + + # non-strict inequality when closed != 'both'; at least one side is + # not included in the intervals, so equality does not imply overlapping + return bool( + (self._right[:-1] <= self._left[1:]).all() + or (self._left[:-1] >= self._right[1:]).all() + ) + + # --------------------------------------------------------------------- + # Conversion + + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + """ + Return the IntervalArray's data as a numpy array of Interval + objects (with dtype='object') + """ + left = self._left + right = self._right + mask = self.isna() + closed = self.closed + + result = np.empty(len(left), dtype=object) + for i in range(len(left)): + if mask[i]: + result[i] = np.nan + else: + result[i] = Interval(left[i], right[i], closed) + return result + + def __arrow_array__(self, type=None): + """ + Convert myself into a pyarrow Array. + """ + import pyarrow + + from pandas.core.arrays._arrow_utils import ArrowIntervalType + + try: + subtype = pyarrow.from_numpy_dtype(self.dtype.subtype) + except TypeError as err: + raise TypeError( + f"Conversion to arrow with subtype '{self.dtype.subtype}' " + "is not supported" + ) from err + interval_type = ArrowIntervalType(subtype, self.closed) + storage_array = pyarrow.StructArray.from_arrays( + [ + pyarrow.array(self._left, type=subtype, from_pandas=True), + pyarrow.array(self._right, type=subtype, from_pandas=True), + ], + names=["left", "right"], + ) + mask = self.isna() + if mask.any(): + # if there are missing values, set validity bitmap also on the array level + null_bitmap = pyarrow.array(~mask).buffers()[1] + storage_array = pyarrow.StructArray.from_buffers( + storage_array.type, + len(storage_array), + [null_bitmap], + children=[storage_array.field(0), storage_array.field(1)], + ) + + if type is not None: + if type.equals(interval_type.storage_type): + return storage_array + elif isinstance(type, ArrowIntervalType): + # ensure we have the same subtype and closed attributes + if not type.equals(interval_type): + raise TypeError( + "Not supported to convert IntervalArray to type with " + f"different 'subtype' ({self.dtype.subtype} vs {type.subtype}) " + f"and 'closed' ({self.closed} vs {type.closed}) attributes" + ) + else: + raise TypeError( + f"Not supported to convert IntervalArray to '{type}' type" + ) + + return pyarrow.ExtensionArray.from_storage(interval_type, storage_array) + + _interval_shared_docs[ + "to_tuples" + ] = """ + Return an %(return_type)s of tuples of the form (left, right). + + Parameters + ---------- + na_tuple : bool, default True + Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA + value itself if False, ``nan``. + + Returns + ------- + tuples: %(return_type)s + %(examples)s\ + """ + + @Appender( + _interval_shared_docs["to_tuples"] % {"return_type": "ndarray", "examples": ""} + ) + def to_tuples(self, na_tuple=True) -> np.ndarray: + tuples = com.asarray_tuplesafe(zip(self._left, self._right)) + if not na_tuple: + # GH 18756 + tuples = np.where(~self.isna(), tuples, np.nan) + return tuples + + # --------------------------------------------------------------------- + + def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: + value_left, value_right = self._validate_setitem_value(value) + + if isinstance(self._left, np.ndarray): + np.putmask(self._left, mask, value_left) + np.putmask(self._right, mask, value_right) + else: + self._left._putmask(mask, value_left) + self._right._putmask(mask, value_right) + + def insert(self: IntervalArrayT, loc: int, item: Interval) -> IntervalArrayT: + """ + Return a new IntervalArray inserting new item at location. Follows + Python numpy.insert semantics for negative values. Only Interval + objects and NA can be inserted into an IntervalIndex + + Parameters + ---------- + loc : int + item : Interval + + Returns + ------- + IntervalArray + """ + left_insert, right_insert = self._validate_scalar(item) + + new_left = self.left.insert(loc, left_insert) + new_right = self.right.insert(loc, right_insert) + + return self._shallow_copy(new_left, new_right) + + def delete(self: IntervalArrayT, loc) -> IntervalArrayT: + if isinstance(self._left, np.ndarray): + new_left = np.delete(self._left, loc) + new_right = np.delete(self._right, loc) + else: + new_left = self._left.delete(loc) + new_right = self._right.delete(loc) + return self._shallow_copy(left=new_left, right=new_right) + + @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs) + def repeat( + self: IntervalArrayT, + repeats: int | Sequence[int], + axis: int | None = None, + ) -> IntervalArrayT: + nv.validate_repeat((), {"axis": axis}) + left_repeat = self.left.repeat(repeats) + right_repeat = self.right.repeat(repeats) + return self._shallow_copy(left=left_repeat, right=right_repeat) + + _interval_shared_docs["contains"] = textwrap.dedent( + """ + Check elementwise if the Intervals contain the value. + + Return a boolean mask whether the value is contained in the Intervals + of the %(klass)s. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + other : scalar + The value to check whether it is contained in the Intervals. + + Returns + ------- + boolean array + + See Also + -------- + Interval.contains : Check whether Interval object contains value. + %(klass)s.overlaps : Check if an Interval overlaps the values in the + %(klass)s. + + Examples + -------- + %(examples)s + >>> intervals.contains(0.5) + array([ True, False, False]) + """ + ) + + @Appender( + _interval_shared_docs["contains"] + % { + "klass": "IntervalArray", + "examples": textwrap.dedent( + """\ + >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)]) + >>> intervals + + [(0, 1], (1, 3], (2, 4]] + Length: 3, dtype: interval[int64, right] + """ + ), + } + ) + def contains(self, other): + if isinstance(other, Interval): + raise NotImplementedError("contains not implemented for two intervals") + + return (self._left < other if self.open_left else self._left <= other) & ( + other < self._right if self.open_right else other <= self._right + ) + + def isin(self, values) -> np.ndarray: + if not hasattr(values, "dtype"): + values = np.array(values) + values = extract_array(values, extract_numpy=True) + + if is_interval_dtype(values.dtype): + if self.closed != values.closed: + # not comparable -> no overlap + return np.zeros(self.shape, dtype=bool) + + if is_dtype_equal(self.dtype, values.dtype): + # GH#38353 instead of casting to object, operating on a + # complex128 ndarray is much more performant. + left = self._combined.view("complex128") + right = values._combined.view("complex128") + # Argument 1 to "in1d" has incompatible type "Union[ExtensionArray, + # ndarray[Any, Any], ndarray[Any, dtype[Any]]]"; expected + # "Union[_SupportsArray[dtype[Any]], _NestedSequence[_SupportsArray[ + # dtype[Any]]], bool, int, float, complex, str, bytes, + # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]" + # [arg-type] + return np.in1d(left, right) # type: ignore[arg-type] + + elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion( + values.left.dtype + ): + # not comparable -> no overlap + return np.zeros(self.shape, dtype=bool) + + return isin(self.astype(object), values.astype(object)) + + @property + def _combined(self) -> ArrayLike: + left = self.left._values.reshape(-1, 1) + right = self.right._values.reshape(-1, 1) + if needs_i8_conversion(left.dtype): + comb = left._concat_same_type([left, right], axis=1) + else: + comb = np.concatenate([left, right], axis=1) + return comb + + def _from_combined(self, combined: np.ndarray) -> IntervalArray: + """ + Create a new IntervalArray with our dtype from a 1D complex128 ndarray. + """ + nc = combined.view("i8").reshape(-1, 2) + + dtype = self._left.dtype + if needs_i8_conversion(dtype): + new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype) + new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype) + else: + new_left = nc[:, 0].view(dtype) + new_right = nc[:, 1].view(dtype) + return self._shallow_copy(left=new_left, right=new_right) + + def unique(self) -> IntervalArray: + # No overload variant of "__getitem__" of "ExtensionArray" matches argument + # type "Tuple[slice, int]" + nc = unique( + self._combined.view("complex128")[:, 0] # type: ignore[call-overload] + ) + nc = nc[:, None] + return self._from_combined(nc) + + +def _maybe_convert_platform_interval(values) -> ArrayLike: + """ + Try to do platform conversion, with special casing for IntervalArray. + Wrapper around maybe_convert_platform that alters the default return + dtype in certain cases to be compatible with IntervalArray. For example, + empty lists return with integer dtype instead of object dtype, which is + prohibited for IntervalArray. + + Parameters + ---------- + values : array-like + + Returns + ------- + array + """ + if isinstance(values, (list, tuple)) and len(values) == 0: + # GH 19016 + # empty lists/tuples get object dtype by default, but this is + # prohibited for IntervalArray, so coerce to integer instead + return np.array([], dtype=np.int64) + elif not is_list_like(values) or isinstance(values, ABCDataFrame): + # This will raise later, but we avoid passing to maybe_convert_platform + return values + elif is_categorical_dtype(values): + values = np.asarray(values) + elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)): + # TODO: should we just cast these to list? + return values + else: + values = extract_array(values, extract_numpy=True) + + if not hasattr(values, "dtype"): + return np.asarray(values) + return values diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/masked.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/masked.py new file mode 100644 index 0000000..7d23265 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/masked.py @@ -0,0 +1,1039 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Sequence, + TypeVar, + overload, +) +import warnings + +import numpy as np + +from pandas._libs import ( + iNaT, + lib, + missing as libmissing, +) +from pandas._typing import ( + ArrayLike, + AstypeArg, + NpDtype, + PositionalIndexer, + Scalar, + ScalarIndexer, + SequenceIndexer, + Shape, + npt, + type_t, +) +from pandas.errors import AbstractMethodError +from pandas.util._decorators import ( + cache_readonly, + doc, +) +from pandas.util._validators import validate_fillna_kwargs + +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.common import ( + is_bool, + is_bool_dtype, + is_dtype_equal, + is_float, + is_float_dtype, + is_integer_dtype, + is_list_like, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_string_dtype, + pandas_dtype, +) +from pandas.core.dtypes.inference import is_array_like +from pandas.core.dtypes.missing import ( + array_equivalent, + isna, + notna, +) + +from pandas.core import ( + arraylike, + missing, + nanops, + ops, +) +from pandas.core.algorithms import ( + factorize_array, + isin, + take, +) +from pandas.core.array_algos import masked_reductions +from pandas.core.array_algos.quantile import quantile_with_mask +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays import ExtensionArray +from pandas.core.indexers import check_array_indexer +from pandas.core.ops import invalid_comparison + +if TYPE_CHECKING: + from pandas import Series + from pandas.core.arrays import BooleanArray + +from pandas.compat.numpy import function as nv + +BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray") + + +class BaseMaskedDtype(ExtensionDtype): + """ + Base class for dtypes for BasedMaskedArray subclasses. + """ + + name: str + base = None + type: type + + na_value = libmissing.NA + + @cache_readonly + def numpy_dtype(self) -> np.dtype: + """Return an instance of our numpy dtype""" + return np.dtype(self.type) + + @cache_readonly + def kind(self) -> str: + return self.numpy_dtype.kind + + @cache_readonly + def itemsize(self) -> int: + """Return the number of bytes in this dtype""" + return self.numpy_dtype.itemsize + + @classmethod + def construct_array_type(cls) -> type_t[BaseMaskedArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + raise NotImplementedError + + +class BaseMaskedArray(OpsMixin, ExtensionArray): + """ + Base class for masked arrays (which use _data and _mask to store the data). + + numpy based + """ + + # The value used to fill '_data' to avoid upcasting + _internal_fill_value: Scalar + # our underlying data and mask are each ndarrays + _data: np.ndarray + _mask: np.ndarray + + # Fill values used for any/all + _truthy_value = Scalar # bool(_truthy_value) = True + _falsey_value = Scalar # bool(_falsey_value) = False + + def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): + # values is supposed to already be validated in the subclass + if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_): + raise TypeError( + "mask should be boolean numpy array. Use " + "the 'pd.array' function instead" + ) + if values.shape != mask.shape: + raise ValueError("values.shape must match mask.shape") + + if copy: + values = values.copy() + mask = mask.copy() + + self._data = values + self._mask = mask + + @property + def dtype(self) -> BaseMaskedDtype: + raise AbstractMethodError(self) + + @overload + def __getitem__(self, item: ScalarIndexer) -> Any: + ... + + @overload + def __getitem__(self: BaseMaskedArrayT, item: SequenceIndexer) -> BaseMaskedArrayT: + ... + + def __getitem__( + self: BaseMaskedArrayT, item: PositionalIndexer + ) -> BaseMaskedArrayT | Any: + item = check_array_indexer(self, item) + + newmask = self._mask[item] + if is_bool(newmask): + # This is a scalar indexing + if newmask: + return self.dtype.na_value + return self._data[item] + + return type(self)(self._data[item], newmask) + + @doc(ExtensionArray.fillna) + def fillna( + self: BaseMaskedArrayT, value=None, method=None, limit=None + ) -> BaseMaskedArrayT: + value, method = validate_fillna_kwargs(value, method) + + mask = self._mask + + if is_array_like(value): + if len(value) != len(self): + raise ValueError( + f"Length of 'value' does not match. Got ({len(value)}) " + f" expected {len(self)}" + ) + value = value[mask] + + if mask.any(): + if method is not None: + func = missing.get_fill_func(method, ndim=self.ndim) + new_values, new_mask = func( + self._data.copy().T, + limit=limit, + mask=mask.copy().T, + ) + return type(self)(new_values.T, new_mask.view(np.bool_).T) + else: + # fill with value + new_values = self.copy() + new_values[mask] = value + else: + new_values = self.copy() + return new_values + + def _coerce_to_array(self, values) -> tuple[np.ndarray, np.ndarray]: + raise AbstractMethodError(self) + + def __setitem__(self, key, value) -> None: + _is_scalar = is_scalar(value) + if _is_scalar: + value = [value] + value, mask = self._coerce_to_array(value) + + if _is_scalar: + value = value[0] + mask = mask[0] + + key = check_array_indexer(self, key) + self._data[key] = value + self._mask[key] = mask + + def __iter__(self): + if self.ndim == 1: + for i in range(len(self)): + if self._mask[i]: + yield self.dtype.na_value + else: + yield self._data[i] + else: + for i in range(len(self)): + yield self[i] + + def __len__(self) -> int: + return len(self._data) + + @property + def shape(self) -> Shape: + return self._data.shape + + @property + def ndim(self) -> int: + return self._data.ndim + + def swapaxes(self: BaseMaskedArrayT, axis1, axis2) -> BaseMaskedArrayT: + data = self._data.swapaxes(axis1, axis2) + mask = self._mask.swapaxes(axis1, axis2) + return type(self)(data, mask) + + def delete(self: BaseMaskedArrayT, loc, axis: int = 0) -> BaseMaskedArrayT: + data = np.delete(self._data, loc, axis=axis) + mask = np.delete(self._mask, loc, axis=axis) + return type(self)(data, mask) + + def reshape(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT: + data = self._data.reshape(*args, **kwargs) + mask = self._mask.reshape(*args, **kwargs) + return type(self)(data, mask) + + def ravel(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT: + # TODO: need to make sure we have the same order for data/mask + data = self._data.ravel(*args, **kwargs) + mask = self._mask.ravel(*args, **kwargs) + return type(self)(data, mask) + + @property + def T(self: BaseMaskedArrayT) -> BaseMaskedArrayT: + return type(self)(self._data.T, self._mask.T) + + def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: + return type(self)(~self._data, self._mask.copy()) + + def to_numpy( + self, + dtype: npt.DTypeLike | None = None, + copy: bool = False, + na_value: Scalar = lib.no_default, + ) -> np.ndarray: + """ + Convert to a NumPy Array. + + By default converts to an object-dtype NumPy array. Specify the `dtype` and + `na_value` keywords to customize the conversion. + + Parameters + ---------- + dtype : dtype, default object + The numpy dtype to convert to. + copy : bool, default False + Whether to ensure that the returned value is a not a view on + the array. Note that ``copy=False`` does not *ensure* that + ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that + a copy is made, even if not strictly necessary. This is typically + only possible when no missing values are present and `dtype` + is the equivalent numpy dtype. + na_value : scalar, optional + Scalar missing value indicator to use in numpy array. Defaults + to the native missing value indicator of this array (pd.NA). + + Returns + ------- + numpy.ndarray + + Examples + -------- + An object-dtype is the default result + + >>> a = pd.array([True, False, pd.NA], dtype="boolean") + >>> a.to_numpy() + array([True, False, ], dtype=object) + + When no missing values are present, an equivalent dtype can be used. + + >>> pd.array([True, False], dtype="boolean").to_numpy(dtype="bool") + array([ True, False]) + >>> pd.array([1, 2], dtype="Int64").to_numpy("int64") + array([1, 2]) + + However, requesting such dtype will raise a ValueError if + missing values are present and the default missing value :attr:`NA` + is used. + + >>> a = pd.array([True, False, pd.NA], dtype="boolean") + >>> a + + [True, False, ] + Length: 3, dtype: boolean + + >>> a.to_numpy(dtype="bool") + Traceback (most recent call last): + ... + ValueError: cannot convert to bool numpy array in presence of missing values + + Specify a valid `na_value` instead + + >>> a.to_numpy(dtype="bool", na_value=False) + array([ True, False, False]) + """ + if na_value is lib.no_default: + na_value = libmissing.NA + if dtype is None: + dtype = object + if self._hasna: + if ( + not is_object_dtype(dtype) + and not is_string_dtype(dtype) + and na_value is libmissing.NA + ): + raise ValueError( + f"cannot convert to '{dtype}'-dtype NumPy array " + "with missing values. Specify an appropriate 'na_value' " + "for this dtype." + ) + # don't pass copy to astype -> always need a copy since we are mutating + data = self._data.astype(dtype) + data[self._mask] = na_value + else: + data = self._data.astype(dtype, copy=copy) + return data + + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + dtype = pandas_dtype(dtype) + + if is_dtype_equal(dtype, self.dtype): + if copy: + return self.copy() + return self + + # if we are astyping to another nullable masked dtype, we can fastpath + if isinstance(dtype, BaseMaskedDtype): + # TODO deal with NaNs for FloatingArray case + data = self._data.astype(dtype.numpy_dtype, copy=copy) + # mask is copied depending on whether the data was copied, and + # not directly depending on the `copy` keyword + mask = self._mask if data is self._data else self._mask.copy() + cls = dtype.construct_array_type() + return cls(data, mask, copy=False) + + if isinstance(dtype, ExtensionDtype): + eacls = dtype.construct_array_type() + return eacls._from_sequence(self, dtype=dtype, copy=copy) + + raise NotImplementedError("subclass must implement astype to np.dtype") + + __array_priority__ = 1000 # higher than ndarray so ops dispatch to us + + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + """ + the array interface, return my values + We return an object array here to preserve our scalar values + """ + return self.to_numpy(dtype=dtype) + + _HANDLED_TYPES: tuple[type, ...] + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + # For MaskedArray inputs, we apply the ufunc to ._data + # and mask the result. + + out = kwargs.get("out", ()) + + for x in inputs + out: + if not isinstance(x, self._HANDLED_TYPES + (BaseMaskedArray,)): + return NotImplemented + + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + if "out" in kwargs: + # e.g. test_ufunc_with_out + return arraylike.dispatch_ufunc_with_out( + self, ufunc, method, *inputs, **kwargs + ) + + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + mask = np.zeros(len(self), dtype=bool) + inputs2 = [] + for x in inputs: + if isinstance(x, BaseMaskedArray): + mask |= x._mask + inputs2.append(x._data) + else: + inputs2.append(x) + + def reconstruct(x): + # we don't worry about scalar `x` here, since we + # raise for reduce up above. + from pandas.core.arrays import ( + BooleanArray, + FloatingArray, + IntegerArray, + ) + + if is_bool_dtype(x.dtype): + m = mask.copy() + return BooleanArray(x, m) + elif is_integer_dtype(x.dtype): + m = mask.copy() + return IntegerArray(x, m) + elif is_float_dtype(x.dtype): + m = mask.copy() + if x.dtype == np.float16: + # reached in e.g. np.sqrt on BooleanArray + # we don't support float16 + x = x.astype(np.float32) + return FloatingArray(x, m) + else: + x[mask] = np.nan + return x + + result = getattr(ufunc, method)(*inputs2, **kwargs) + if ufunc.nout > 1: + # e.g. np.divmod + return tuple(reconstruct(x) for x in result) + elif method == "reduce": + # e.g. np.add.reduce; test_ufunc_reduce_raises + if self._mask.any(): + return self._na_value + return result + else: + return reconstruct(result) + + def __arrow_array__(self, type=None): + """ + Convert myself into a pyarrow Array. + """ + import pyarrow as pa + + return pa.array(self._data, mask=self._mask, type=type) + + @property + def _hasna(self) -> bool: + # Note: this is expensive right now! The hope is that we can + # make this faster by having an optional mask, but not have to change + # source code using it.. + + # error: Incompatible return value type (got "bool_", expected "bool") + return self._mask.any() # type: ignore[return-value] + + def _cmp_method(self, other, op) -> BooleanArray: + from pandas.core.arrays import BooleanArray + + mask = None + + if isinstance(other, BaseMaskedArray): + other, mask = other._data, other._mask + + elif is_list_like(other): + other = np.asarray(other) + if other.ndim > 1: + raise NotImplementedError("can only perform ops with 1-d structures") + if len(self) != len(other): + raise ValueError("Lengths must match to compare") + + if other is libmissing.NA: + # numpy does not handle pd.NA well as "other" scalar (it returns + # a scalar False instead of an array) + # This may be fixed by NA.__array_ufunc__. Revisit this check + # once that's implemented. + result = np.zeros(self._data.shape, dtype="bool") + mask = np.ones(self._data.shape, dtype="bool") + else: + with warnings.catch_warnings(): + # numpy may show a FutureWarning: + # elementwise comparison failed; returning scalar instead, + # but in the future will perform elementwise comparison + # before returning NotImplemented. We fall back to the correct + # behavior today, so that should be fine to ignore. + warnings.filterwarnings("ignore", "elementwise", FutureWarning) + with np.errstate(all="ignore"): + method = getattr(self._data, f"__{op.__name__}__") + result = method(other) + + if result is NotImplemented: + result = invalid_comparison(self._data, other, op) + + # nans propagate + if mask is None: + mask = self._mask.copy() + else: + mask = self._mask | mask + + return BooleanArray(result, mask, copy=False) + + def _maybe_mask_result(self, result, mask, other, op_name: str): + """ + Parameters + ---------- + result : array-like + mask : array-like bool + other : scalar or array-like + op_name : str + """ + # if we have a float operand we are by-definition + # a float result + # or our op is a divide + if ( + (is_float_dtype(other) or is_float(other)) + or (op_name in ["rtruediv", "truediv"]) + or (is_float_dtype(self.dtype) and is_numeric_dtype(result.dtype)) + ): + from pandas.core.arrays import FloatingArray + + return FloatingArray(result, mask, copy=False) + + elif is_bool_dtype(result): + from pandas.core.arrays import BooleanArray + + return BooleanArray(result, mask, copy=False) + + elif result.dtype == "timedelta64[ns]": + # e.g. test_numeric_arr_mul_tdscalar_numexpr_path + from pandas.core.arrays import TimedeltaArray + + result[mask] = iNaT + return TimedeltaArray._simple_new(result) + + elif is_integer_dtype(result): + from pandas.core.arrays import IntegerArray + + return IntegerArray(result, mask, copy=False) + + else: + result[mask] = np.nan + return result + + def isna(self) -> np.ndarray: + return self._mask.copy() + + @property + def _na_value(self): + return self.dtype.na_value + + @property + def nbytes(self) -> int: + return self._data.nbytes + self._mask.nbytes + + @classmethod + def _concat_same_type( + cls: type[BaseMaskedArrayT], + to_concat: Sequence[BaseMaskedArrayT], + axis: int = 0, + ) -> BaseMaskedArrayT: + data = np.concatenate([x._data for x in to_concat], axis=axis) + mask = np.concatenate([x._mask for x in to_concat], axis=axis) + return cls(data, mask) + + def take( + self: BaseMaskedArrayT, + indexer, + *, + allow_fill: bool = False, + fill_value: Scalar | None = None, + axis: int = 0, + ) -> BaseMaskedArrayT: + # we always fill with 1 internally + # to avoid upcasting + data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value + result = take( + self._data, + indexer, + fill_value=data_fill_value, + allow_fill=allow_fill, + axis=axis, + ) + + mask = take( + self._mask, indexer, fill_value=True, allow_fill=allow_fill, axis=axis + ) + + # if we are filling + # we only fill where the indexer is null + # not existing missing values + # TODO(jreback) what if we have a non-na float as a fill value? + if allow_fill and notna(fill_value): + fill_mask = np.asarray(indexer) == -1 + result[fill_mask] = fill_value + mask = mask ^ fill_mask + + return type(self)(result, mask, copy=False) + + # error: Return type "BooleanArray" of "isin" incompatible with return type + # "ndarray" in supertype "ExtensionArray" + def isin(self, values) -> BooleanArray: # type: ignore[override] + + from pandas.core.arrays import BooleanArray + + # algorithms.isin will eventually convert values to an ndarray, so no extra + # cost to doing it here first + values_arr = np.asarray(values) + result = isin(self._data, values_arr) + + if self._hasna: + values_have_NA = is_object_dtype(values_arr.dtype) and any( + val is self.dtype.na_value for val in values_arr + ) + + # For now, NA does not propagate so set result according to presence of NA, + # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion + result[self._mask] = values_have_NA + + mask = np.zeros(self._data.shape, dtype=bool) + return BooleanArray(result, mask, copy=False) + + def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT: + data, mask = self._data, self._mask + data = data.copy() + mask = mask.copy() + return type(self)(data, mask, copy=False) + + @doc(ExtensionArray.factorize) + def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]: + arr = self._data + mask = self._mask + + codes, uniques = factorize_array(arr, na_sentinel=na_sentinel, mask=mask) + + # the hashtables don't handle all different types of bits + uniques = uniques.astype(self.dtype.numpy_dtype, copy=False) + uniques_ea = type(self)(uniques, np.zeros(len(uniques), dtype=bool)) + return codes, uniques_ea + + def value_counts(self, dropna: bool = True) -> Series: + """ + Returns a Series containing counts of each unique value. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of missing values. + + Returns + ------- + counts : Series + + See Also + -------- + Series.value_counts + """ + from pandas import ( + Index, + Series, + ) + from pandas.arrays import IntegerArray + + # compute counts on the data with no nans + data = self._data[~self._mask] + value_counts = Index(data).value_counts() + + index = value_counts.index + + # if we want nans, count the mask + if dropna: + counts = value_counts._values + else: + counts = np.empty(len(value_counts) + 1, dtype="int64") + counts[:-1] = value_counts + counts[-1] = self._mask.sum() + + index = index.insert(len(index), self.dtype.na_value) + + index = index.astype(self.dtype) + + mask = np.zeros(len(counts), dtype="bool") + counts = IntegerArray(counts, mask) + + return Series(counts, index=index) + + @doc(ExtensionArray.equals) + def equals(self, other) -> bool: + if type(self) != type(other): + return False + if other.dtype != self.dtype: + return False + + # GH#44382 if e.g. self[1] is np.nan and other[1] is pd.NA, we are NOT + # equal. + if not np.array_equal(self._mask, other._mask): + return False + + left = self._data[~self._mask] + right = other._data[~other._mask] + return array_equivalent(left, right, dtype_equal=True) + + def _quantile( + self: BaseMaskedArrayT, qs: npt.NDArray[np.float64], interpolation: str + ) -> BaseMaskedArrayT: + """ + Dispatch to quantile_with_mask, needed because we do not have + _from_factorized. + + Notes + ----- + We assume that all impacted cases are 1D-only. + """ + mask = np.atleast_2d(np.asarray(self.isna())) + npvalues: np.ndarray = np.atleast_2d(np.asarray(self)) + + res = quantile_with_mask( + npvalues, + mask=mask, + fill_value=self.dtype.na_value, + qs=qs, + interpolation=interpolation, + ) + assert res.ndim == 2 + assert res.shape[0] == 1 + res = res[0] + try: + out = type(self)._from_sequence(res, dtype=self.dtype) + except TypeError: + # GH#42626: not able to safely cast Int64 + # for floating point output + out = np.asarray(res, dtype=np.float64) + return out + + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + if name in {"any", "all", "min", "max", "sum", "prod"}: + return getattr(self, name)(skipna=skipna, **kwargs) + + data = self._data + mask = self._mask + + if name in {"mean"}: + op = getattr(masked_reductions, name) + result = op(data, mask, skipna=skipna, **kwargs) + return result + + # coerce to a nan-aware float if needed + # (we explicitly use NaN within reductions) + if self._hasna: + data = self.to_numpy("float64", na_value=np.nan) + + # median, var, std, skew, kurt, idxmin, idxmax + op = getattr(nanops, "nan" + name) + result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs) + + if np.isnan(result): + return libmissing.NA + + return result + + def _wrap_reduction_result(self, name: str, result, skipna, **kwargs): + if isinstance(result, np.ndarray): + axis = kwargs["axis"] + if skipna: + # we only retain mask for all-NA rows/columns + mask = self._mask.all(axis=axis) + else: + mask = self._mask.any(axis=axis) + + return self._maybe_mask_result(result, mask, other=None, op_name=name) + return result + + def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): + nv.validate_sum((), kwargs) + + # TODO: do this in validate_sum? + if "out" in kwargs: + # np.sum; test_floating_array_numpy_sum + if kwargs["out"] is not None: + raise NotImplementedError + kwargs.pop("out") + + result = masked_reductions.sum( + self._data, + self._mask, + skipna=skipna, + min_count=min_count, + axis=axis, + ) + return self._wrap_reduction_result( + "sum", result, skipna=skipna, axis=axis, **kwargs + ) + + def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): + nv.validate_prod((), kwargs) + result = masked_reductions.prod( + self._data, + self._mask, + skipna=skipna, + min_count=min_count, + axis=axis, + ) + return self._wrap_reduction_result( + "prod", result, skipna=skipna, axis=axis, **kwargs + ) + + def min(self, *, skipna=True, axis: int | None = 0, **kwargs): + nv.validate_min((), kwargs) + return masked_reductions.min( + self._data, + self._mask, + skipna=skipna, + axis=axis, + ) + + def max(self, *, skipna=True, axis: int | None = 0, **kwargs): + nv.validate_max((), kwargs) + return masked_reductions.max( + self._data, + self._mask, + skipna=skipna, + axis=axis, + ) + + def any(self, *, skipna: bool = True, **kwargs): + """ + Return whether any element is truthy. + + Returns False unless there is at least one element that is truthy. + By default, NAs are skipped. If ``skipna=False`` is specified and + missing values are present, similar :ref:`Kleene logic ` + is used as for logical operations. + + .. versionchanged:: 1.4.0 + + Parameters + ---------- + skipna : bool, default True + Exclude NA values. If the entire array is NA and `skipna` is + True, then the result will be False, as for an empty array. + If `skipna` is False, the result will still be True if there is + at least one element that is truthy, otherwise NA will be returned + if there are NA's present. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + bool or :attr:`pandas.NA` + + See Also + -------- + numpy.any : Numpy version of this method. + BaseMaskedArray.all : Return whether all elements are truthy. + + Examples + -------- + The result indicates whether any element is truthy (and by default + skips NAs): + + >>> pd.array([True, False, True]).any() + True + >>> pd.array([True, False, pd.NA]).any() + True + >>> pd.array([False, False, pd.NA]).any() + False + >>> pd.array([], dtype="boolean").any() + False + >>> pd.array([pd.NA], dtype="boolean").any() + False + >>> pd.array([pd.NA], dtype="Float64").any() + False + + With ``skipna=False``, the result can be NA if this is logically + required (whether ``pd.NA`` is True or False influences the result): + + >>> pd.array([True, False, pd.NA]).any(skipna=False) + True + >>> pd.array([1, 0, pd.NA]).any(skipna=False) + True + >>> pd.array([False, False, pd.NA]).any(skipna=False) + + >>> pd.array([0, 0, pd.NA]).any(skipna=False) + + """ + kwargs.pop("axis", None) + nv.validate_any((), kwargs) + + values = self._data.copy() + # Argument 3 to "putmask" has incompatible type "object"; expected + # "Union[_SupportsArray[dtype[Any]], _NestedSequence[ + # _SupportsArray[dtype[Any]]], bool, int, float, complex, str, bytes, _Nested + # Sequence[Union[bool, int, float, complex, str, bytes]]]" [arg-type] + np.putmask(values, self._mask, self._falsey_value) # type: ignore[arg-type] + result = values.any() + if skipna: + return result + else: + if result or len(self) == 0 or not self._mask.any(): + return result + else: + return self.dtype.na_value + + def all(self, *, skipna: bool = True, **kwargs): + """ + Return whether all elements are truthy. + + Returns True unless there is at least one element that is falsey. + By default, NAs are skipped. If ``skipna=False`` is specified and + missing values are present, similar :ref:`Kleene logic ` + is used as for logical operations. + + .. versionchanged:: 1.4.0 + + Parameters + ---------- + skipna : bool, default True + Exclude NA values. If the entire array is NA and `skipna` is + True, then the result will be True, as for an empty array. + If `skipna` is False, the result will still be False if there is + at least one element that is falsey, otherwise NA will be returned + if there are NA's present. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + bool or :attr:`pandas.NA` + + See Also + -------- + numpy.all : Numpy version of this method. + BooleanArray.any : Return whether any element is truthy. + + Examples + -------- + The result indicates whether all elements are truthy (and by default + skips NAs): + + >>> pd.array([True, True, pd.NA]).all() + True + >>> pd.array([1, 1, pd.NA]).all() + True + >>> pd.array([True, False, pd.NA]).all() + False + >>> pd.array([], dtype="boolean").all() + True + >>> pd.array([pd.NA], dtype="boolean").all() + True + >>> pd.array([pd.NA], dtype="Float64").all() + True + + With ``skipna=False``, the result can be NA if this is logically + required (whether ``pd.NA`` is True or False influences the result): + + >>> pd.array([True, True, pd.NA]).all(skipna=False) + + >>> pd.array([1, 1, pd.NA]).all(skipna=False) + + >>> pd.array([True, False, pd.NA]).all(skipna=False) + False + >>> pd.array([1, 0, pd.NA]).all(skipna=False) + False + """ + kwargs.pop("axis", None) + nv.validate_all((), kwargs) + + values = self._data.copy() + # Argument 3 to "putmask" has incompatible type "object"; expected + # "Union[_SupportsArray[dtype[Any]], _NestedSequence[ + # _SupportsArray[dtype[Any]]], bool, int, float, complex, str, bytes, _Neste + # dSequence[Union[bool, int, float, complex, str, bytes]]]" [arg-type] + np.putmask(values, self._mask, self._truthy_value) # type: ignore[arg-type] + result = values.all() + + if skipna: + return result + else: + if not result or len(self) == 0 or not self._mask.any(): + return result + else: + return self.dtype.na_value diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/numeric.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/numeric.py new file mode 100644 index 0000000..2ab95b4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/numeric.py @@ -0,0 +1,202 @@ +from __future__ import annotations + +import datetime +import numbers +from typing import ( + TYPE_CHECKING, + TypeVar, +) + +import numpy as np + +from pandas._libs import ( + Timedelta, + missing as libmissing, +) +from pandas.compat.numpy import function as nv + +from pandas.core.dtypes.common import ( + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_list_like, + pandas_dtype, +) + +from pandas.core.arrays.masked import ( + BaseMaskedArray, + BaseMaskedDtype, +) + +if TYPE_CHECKING: + import pyarrow + +T = TypeVar("T", bound="NumericArray") + + +class NumericDtype(BaseMaskedDtype): + def __from_arrow__( + self, array: pyarrow.Array | pyarrow.ChunkedArray + ) -> BaseMaskedArray: + """ + Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray. + """ + import pyarrow + + from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask + + array_class = self.construct_array_type() + + pyarrow_type = pyarrow.from_numpy_dtype(self.type) + if not array.type.equals(pyarrow_type): + # test_from_arrow_type_error raise for string, but allow + # through itemsize conversion GH#31896 + rt_dtype = pandas_dtype(array.type.to_pandas_dtype()) + if rt_dtype.kind not in ["i", "u", "f"]: + # Could allow "c" or potentially disallow float<->int conversion, + # but at the moment we specifically test that uint<->int works + raise TypeError( + f"Expected array of {self} type, got {array.type} instead" + ) + + array = array.cast(pyarrow_type) + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + # pyarrow.ChunkedArray + chunks = array.chunks + + results = [] + for arr in chunks: + data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.type) + num_arr = array_class(data.copy(), ~mask, copy=False) + results.append(num_arr) + + if not results: + return array_class( + np.array([], dtype=self.numpy_dtype), np.array([], dtype=np.bool_) + ) + elif len(results) == 1: + # avoid additional copy in _concat_same_type + return results[0] + else: + return array_class._concat_same_type(results) + + +class NumericArray(BaseMaskedArray): + """ + Base class for IntegerArray and FloatingArray. + """ + + def _arith_method(self, other, op): + op_name = op.__name__ + omask = None + + if getattr(other, "ndim", 0) > 1: + raise NotImplementedError("can only perform ops with 1-d structures") + + if isinstance(other, NumericArray): + other, omask = other._data, other._mask + + elif is_list_like(other): + other = np.asarray(other) + if other.ndim > 1: + raise NotImplementedError("can only perform ops with 1-d structures") + if len(self) != len(other): + raise ValueError("Lengths must match") + if not (is_float_dtype(other) or is_integer_dtype(other)): + raise TypeError("can only perform ops with numeric values") + + elif isinstance(other, (datetime.timedelta, np.timedelta64)): + other = Timedelta(other) + + else: + if not (is_float(other) or is_integer(other) or other is libmissing.NA): + raise TypeError("can only perform ops with numeric values") + + if omask is None: + mask = self._mask.copy() + if other is libmissing.NA: + mask |= True + else: + mask = self._mask | omask + + if op_name == "pow": + # 1 ** x is 1. + mask = np.where((self._data == 1) & ~self._mask, False, mask) + # x ** 0 is 1. + if omask is not None: + mask = np.where((other == 0) & ~omask, False, mask) + elif other is not libmissing.NA: + mask = np.where(other == 0, False, mask) + + elif op_name == "rpow": + # 1 ** x is 1. + if omask is not None: + mask = np.where((other == 1) & ~omask, False, mask) + elif other is not libmissing.NA: + mask = np.where(other == 1, False, mask) + # x ** 0 is 1. + mask = np.where((self._data == 0) & ~self._mask, False, mask) + + if other is libmissing.NA: + result = np.ones_like(self._data) + if "truediv" in op_name and self.dtype.kind != "f": + # The actual data here doesn't matter since the mask + # will be all-True, but since this is division, we want + # to end up with floating dtype. + result = result.astype(np.float64) + else: + with np.errstate(all="ignore"): + result = op(self._data, other) + + # divmod returns a tuple + if op_name == "divmod": + div, mod = result + return ( + self._maybe_mask_result(div, mask, other, "floordiv"), + self._maybe_mask_result(mod, mask, other, "mod"), + ) + + return self._maybe_mask_result(result, mask, other, op_name) + + _HANDLED_TYPES = (np.ndarray, numbers.Number) + + def __neg__(self): + return type(self)(-self._data, self._mask.copy()) + + def __pos__(self): + return self.copy() + + def __abs__(self): + return type(self)(abs(self._data), self._mask.copy()) + + def round(self: T, decimals: int = 0, *args, **kwargs) -> T: + """ + Round each value in the array a to the given number of decimals. + + Parameters + ---------- + decimals : int, default 0 + Number of decimal places to round to. If decimals is negative, + it specifies the number of positions to the left of the decimal point. + *args, **kwargs + Additional arguments and keywords have no effect but might be + accepted for compatibility with NumPy. + + Returns + ------- + NumericArray + Rounded values of the NumericArray. + + See Also + -------- + numpy.around : Round values of an np.array. + DataFrame.round : Round values of a DataFrame. + Series.round : Round values of a Series. + """ + nv.validate_round(args, kwargs) + values = np.round(self._data, decimals=decimals, **kwargs) + return type(self)(values, self._mask.copy()) diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/numpy_.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/numpy_.py new file mode 100644 index 0000000..ddb36f5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/numpy_.py @@ -0,0 +1,428 @@ +from __future__ import annotations + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ( + Dtype, + NpDtype, + Scalar, + npt, +) +from pandas.compat.numpy import function as nv + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike +from pandas.core.dtypes.dtypes import PandasDtype +from pandas.core.dtypes.missing import isna + +from pandas.core import ( + arraylike, + nanops, + ops, +) +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +from pandas.core.construction import ensure_wrapped_if_datetimelike +from pandas.core.strings.object_array import ObjectStringArrayMixin + + +class PandasArray( + OpsMixin, + NDArrayBackedExtensionArray, + ObjectStringArrayMixin, +): + """ + A pandas ExtensionArray for NumPy data. + + This is mostly for internal compatibility, and is not especially + useful on its own. + + Parameters + ---------- + values : ndarray + The NumPy ndarray to wrap. Must be 1-dimensional. + copy : bool, default False + Whether to copy `values`. + + Attributes + ---------- + None + + Methods + ------- + None + """ + + # If you're wondering why pd.Series(cls) doesn't put the array in an + # ExtensionBlock, search for `ABCPandasArray`. We check for + # that _typ to ensure that users don't unnecessarily use EAs inside + # pandas internals, which turns off things like block consolidation. + _typ = "npy_extension" + __array_priority__ = 1000 + _ndarray: np.ndarray + _dtype: PandasDtype + + # ------------------------------------------------------------------------ + # Constructors + + def __init__(self, values: np.ndarray | PandasArray, copy: bool = False): + if isinstance(values, type(self)): + values = values._ndarray + if not isinstance(values, np.ndarray): + raise ValueError( + f"'values' must be a NumPy array, not {type(values).__name__}" + ) + + if values.ndim == 0: + # Technically we support 2, but do not advertise that fact. + raise ValueError("PandasArray must be 1-dimensional.") + + if copy: + values = values.copy() + + dtype = PandasDtype(values.dtype) + super().__init__(values, dtype) + + @classmethod + def _from_sequence( + cls, scalars, *, dtype: Dtype | None = None, copy: bool = False + ) -> PandasArray: + if isinstance(dtype, PandasDtype): + dtype = dtype._dtype + + # error: Argument "dtype" to "asarray" has incompatible type + # "Union[ExtensionDtype, str, dtype[Any], dtype[floating[_64Bit]], Type[object], + # None]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, + # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], + # _DTypeDict, Tuple[Any, Any]]]" + result = np.asarray(scalars, dtype=dtype) # type: ignore[arg-type] + if ( + result.ndim > 1 + and not hasattr(scalars, "dtype") + and (dtype is None or dtype == object) + ): + # e.g. list-of-tuples + result = construct_1d_object_array_from_listlike(scalars) + + if copy and result is scalars: + result = result.copy() + return cls(result) + + @classmethod + def _from_factorized(cls, values, original) -> PandasArray: + return cls(values) + + def _from_backing_data(self, arr: np.ndarray) -> PandasArray: + return type(self)(arr) + + # ------------------------------------------------------------------------ + # Data + + @property + def dtype(self) -> PandasDtype: + return self._dtype + + # ------------------------------------------------------------------------ + # NumPy Array Interface + + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + return np.asarray(self._ndarray, dtype=dtype) + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + # Lightly modified version of + # https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html + # The primary modification is not boxing scalar return values + # in PandasArray, since pandas' ExtensionArrays are 1-d. + out = kwargs.get("out", ()) + + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + # e.g. tests.series.test_ufunc.TestNumpyReductions + return result + + # Defer to the implementation of the ufunc on unwrapped values. + inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs) + if out: + kwargs["out"] = tuple( + x._ndarray if isinstance(x, PandasArray) else x for x in out + ) + result = getattr(ufunc, method)(*inputs, **kwargs) + + if ufunc.nout > 1: + # multiple return values; re-box array-like results + return tuple(type(self)(x) for x in result) + elif method == "at": + # no return value + return None + elif method == "reduce": + if isinstance(result, np.ndarray): + # e.g. test_np_reduce_2d + return type(self)(result) + + # e.g. test_np_max_nested_tuples + return result + else: + # one return value; re-box array-like results + return type(self)(result) + + # ------------------------------------------------------------------------ + # Pandas ExtensionArray Interface + + def isna(self) -> np.ndarray: + return isna(self._ndarray) + + def _validate_scalar(self, fill_value): + if fill_value is None: + # Primarily for subclasses + fill_value = self.dtype.na_value + return fill_value + + def _values_for_factorize(self) -> tuple[np.ndarray, int]: + return self._ndarray, -1 + + # ------------------------------------------------------------------------ + # Reductions + + def any( + self, + *, + axis: int | None = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_any((), {"out": out, "keepdims": keepdims}) + result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def all( + self, + *, + axis: int | None = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_all((), {"out": out, "keepdims": keepdims}) + result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def min(self, *, axis: int | None = None, skipna: bool = True, **kwargs) -> Scalar: + nv.validate_min((), kwargs) + result = nanops.nanmin( + values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna + ) + return self._wrap_reduction_result(axis, result) + + def max(self, *, axis: int | None = None, skipna: bool = True, **kwargs) -> Scalar: + nv.validate_max((), kwargs) + result = nanops.nanmax( + values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna + ) + return self._wrap_reduction_result(axis, result) + + def sum( + self, *, axis: int | None = None, skipna: bool = True, min_count=0, **kwargs + ) -> Scalar: + nv.validate_sum((), kwargs) + result = nanops.nansum( + self._ndarray, axis=axis, skipna=skipna, min_count=min_count + ) + return self._wrap_reduction_result(axis, result) + + def prod( + self, *, axis: int | None = None, skipna: bool = True, min_count=0, **kwargs + ) -> Scalar: + nv.validate_prod((), kwargs) + result = nanops.nanprod( + self._ndarray, axis=axis, skipna=skipna, min_count=min_count + ) + return self._wrap_reduction_result(axis, result) + + def mean( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims}) + result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def median( + self, + *, + axis: int | None = None, + out=None, + overwrite_input: bool = False, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_median( + (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims} + ) + result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def std( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + ddof=1, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_stat_ddof_func( + (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std" + ) + result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + return self._wrap_reduction_result(axis, result) + + def var( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + ddof=1, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_stat_ddof_func( + (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var" + ) + result = nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + return self._wrap_reduction_result(axis, result) + + def sem( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + ddof=1, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_stat_ddof_func( + (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem" + ) + result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + return self._wrap_reduction_result(axis, result) + + def kurt( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_stat_ddof_func( + (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt" + ) + result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def skew( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_stat_ddof_func( + (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew" + ) + result = nanops.nanskew(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + # ------------------------------------------------------------------------ + # Additional Methods + + def to_numpy( + self, + dtype: npt.DTypeLike | None = None, + copy: bool = False, + na_value=lib.no_default, + ) -> np.ndarray: + result = np.asarray(self._ndarray, dtype=dtype) + + if (copy or na_value is not lib.no_default) and result is self._ndarray: + result = result.copy() + + if na_value is not lib.no_default: + result[self.isna()] = na_value + + return result + + # ------------------------------------------------------------------------ + # Ops + + def __invert__(self) -> PandasArray: + return type(self)(~self._ndarray) + + def __neg__(self) -> PandasArray: + return type(self)(-self._ndarray) + + def __pos__(self) -> PandasArray: + return type(self)(+self._ndarray) + + def __abs__(self) -> PandasArray: + return type(self)(abs(self._ndarray)) + + def _cmp_method(self, other, op): + if isinstance(other, PandasArray): + other = other._ndarray + + other = ops.maybe_prepare_scalar_for_op(other, (len(self),)) + pd_op = ops.get_array_op(op) + other = ensure_wrapped_if_datetimelike(other) + with np.errstate(all="ignore"): + result = pd_op(self._ndarray, other) + + if op is divmod or op is ops.rdivmod: + a, b = result + if isinstance(a, np.ndarray): + # for e.g. op vs TimedeltaArray, we may already + # have an ExtensionArray, in which case we do not wrap + return self._wrap_ndarray_result(a), self._wrap_ndarray_result(b) + return a, b + + if isinstance(result, np.ndarray): + # for e.g. multiplication vs TimedeltaArray, we may already + # have an ExtensionArray, in which case we do not wrap + return self._wrap_ndarray_result(result) + return result + + _arith_method = _cmp_method + + def _wrap_ndarray_result(self, result: np.ndarray): + # If we have timedelta64[ns] result, return a TimedeltaArray instead + # of a PandasArray + if result.dtype == "timedelta64[ns]": + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray._simple_new(result) + return type(self)(result) + + # ------------------------------------------------------------------------ + # String methods interface + _str_na_value = np.nan diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/period.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/period.py new file mode 100644 index 0000000..04cc70b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/period.py @@ -0,0 +1,1235 @@ +from __future__ import annotations + +from datetime import timedelta +import operator +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Literal, + Sequence, +) + +import numpy as np + +from pandas._libs import algos as libalgos +from pandas._libs.arrays import NDArrayBacked +from pandas._libs.tslibs import ( + BaseOffset, + NaT, + NaTType, + Timedelta, + delta_to_nanoseconds, + dt64arr_to_periodarr as c_dt64arr_to_periodarr, + iNaT, + parsing, + period as libperiod, + to_offset, +) +from pandas._libs.tslibs.dtypes import FreqGroup +from pandas._libs.tslibs.fields import isleapyear_arr +from pandas._libs.tslibs.offsets import ( + Tick, + delta_to_tick, +) +from pandas._libs.tslibs.period import ( + DIFFERENT_FREQ, + IncompatibleFrequency, + Period, + get_period_field_arr, + period_asfreq_arr, +) +from pandas._typing import ( + AnyArrayLike, + Dtype, + NpDtype, + npt, +) +from pandas.util._decorators import ( + cache_readonly, + doc, +) + +from pandas.core.dtypes.common import ( + TD64NS_DTYPE, + ensure_object, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_dtype_equal, + is_float_dtype, + is_integer_dtype, + is_period_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import PeriodDtype +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCPeriodIndex, + ABCSeries, + ABCTimedeltaArray, +) +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +import pandas.core.algorithms as algos +from pandas.core.arrays import datetimelike as dtl +from pandas.core.arrays.base import ExtensionArray +import pandas.core.common as com + +if TYPE_CHECKING: + + from pandas._typing import ( + NumpySorter, + NumpyValueArrayLike, + ) + + from pandas.core.arrays import DatetimeArray + + +_shared_doc_kwargs = { + "klass": "PeriodArray", +} + + +def _field_accessor(name: str, docstring=None): + def f(self): + base = self.freq._period_dtype_code + result = get_period_field_arr(name, self.asi8, base) + return result + + f.__name__ = name + f.__doc__ = docstring + return property(f) + + +class PeriodArray(dtl.DatelikeOps): + """ + Pandas ExtensionArray for storing Period data. + + Users should use :func:`~pandas.period_array` to create new instances. + Alternatively, :func:`~pandas.array` can be used to create new instances + from a sequence of Period scalars. + + Parameters + ---------- + values : Union[PeriodArray, Series[period], ndarray[int], PeriodIndex] + The data to store. These should be arrays that can be directly + converted to ordinals without inference or copy (PeriodArray, + ndarray[int64]), or a box around such an array (Series[period], + PeriodIndex). + dtype : PeriodDtype, optional + A PeriodDtype instance from which to extract a `freq`. If both + `freq` and `dtype` are specified, then the frequencies must match. + freq : str or DateOffset + The `freq` to use for the array. Mostly applicable when `values` + is an ndarray of integers, when `freq` is required. When `values` + is a PeriodArray (or box around), it's checked that ``values.freq`` + matches `freq`. + copy : bool, default False + Whether to copy the ordinals before storing. + + Attributes + ---------- + None + + Methods + ------- + None + + See Also + -------- + Period: Represents a period of time. + PeriodIndex : Immutable Index for period data. + period_range: Create a fixed-frequency PeriodArray. + array: Construct a pandas array. + + Notes + ----- + There are two components to a PeriodArray + + - ordinals : integer ndarray + - freq : pd.tseries.offsets.Offset + + The values are physically stored as a 1-D ndarray of integers. These are + called "ordinals" and represent some kind of offset from a base. + + The `freq` indicates the span covered by each element of the array. + All elements in the PeriodArray have the same `freq`. + """ + + # array priority higher than numpy scalars + __array_priority__ = 1000 + _typ = "periodarray" # ABCPeriodArray + _scalar_type = Period + _recognized_scalars = (Period,) + _is_recognized_dtype = is_period_dtype + _infer_matches = ("period",) + + # Names others delegate to us + _other_ops: list[str] = [] + _bool_ops: list[str] = ["is_leap_year"] + _object_ops: list[str] = ["start_time", "end_time", "freq"] + _field_ops: list[str] = [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "weekofyear", + "weekday", + "week", + "dayofweek", + "day_of_week", + "dayofyear", + "day_of_year", + "quarter", + "qyear", + "days_in_month", + "daysinmonth", + ] + _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops + _datetimelike_methods: list[str] = ["strftime", "to_timestamp", "asfreq"] + + _dtype: PeriodDtype + + # -------------------------------------------------------------------- + # Constructors + + def __init__( + self, values, dtype: Dtype | None = None, freq=None, copy: bool = False + ): + freq = validate_dtype_freq(dtype, freq) + + if freq is not None: + freq = Period._maybe_convert_freq(freq) + + if isinstance(values, ABCSeries): + values = values._values + if not isinstance(values, type(self)): + raise TypeError("Incorrect dtype") + + elif isinstance(values, ABCPeriodIndex): + values = values._values + + if isinstance(values, type(self)): + if freq is not None and freq != values.freq: + raise raise_on_incompatible(values, freq) + values, freq = values._ndarray, values.freq + + values = np.array(values, dtype="int64", copy=copy) + if freq is None: + raise ValueError("freq is not specified and cannot be inferred") + NDArrayBacked.__init__(self, values, PeriodDtype(freq)) + + # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked" + @classmethod + def _simple_new( # type: ignore[override] + cls, + values: np.ndarray, + freq: BaseOffset | None = None, + dtype: Dtype | None = None, + ) -> PeriodArray: + # alias for PeriodArray.__init__ + assertion_msg = "Should be numpy array of type i8" + assert isinstance(values, np.ndarray) and values.dtype == "i8", assertion_msg + return cls(values, freq=freq, dtype=dtype) + + @classmethod + def _from_sequence( + cls: type[PeriodArray], + scalars: Sequence[Period | None] | AnyArrayLike, + *, + dtype: Dtype | None = None, + copy: bool = False, + ) -> PeriodArray: + if dtype and isinstance(dtype, PeriodDtype): + freq = dtype.freq + else: + freq = None + + if isinstance(scalars, cls): + validate_dtype_freq(scalars.dtype, freq) + if copy: + scalars = scalars.copy() + return scalars + + periods = np.asarray(scalars, dtype=object) + + freq = freq or libperiod.extract_freq(periods) + ordinals = libperiod.extract_ordinals(periods, freq) + return cls(ordinals, freq=freq) + + @classmethod + def _from_sequence_of_strings( + cls, strings, *, dtype: Dtype | None = None, copy: bool = False + ) -> PeriodArray: + return cls._from_sequence(strings, dtype=dtype, copy=copy) + + @classmethod + def _from_datetime64(cls, data, freq, tz=None) -> PeriodArray: + """ + Construct a PeriodArray from a datetime64 array + + Parameters + ---------- + data : ndarray[datetime64[ns], datetime64[ns, tz]] + freq : str or Tick + tz : tzinfo, optional + + Returns + ------- + PeriodArray[freq] + """ + data, freq = dt64arr_to_periodarr(data, freq, tz) + return cls(data, freq=freq) + + @classmethod + def _generate_range(cls, start, end, periods, freq, fields): + periods = dtl.validate_periods(periods) + + if freq is not None: + freq = Period._maybe_convert_freq(freq) + + field_count = len(fields) + if start is not None or end is not None: + if field_count > 0: + raise ValueError( + "Can either instantiate from fields or endpoints, but not both" + ) + subarr, freq = _get_ordinal_range(start, end, periods, freq) + elif field_count > 0: + subarr, freq = _range_from_fields(freq=freq, **fields) + else: + raise ValueError("Not enough parameters to construct Period range") + + return subarr, freq + + # ----------------------------------------------------------------- + # DatetimeLike Interface + + # error: Argument 1 of "_unbox_scalar" is incompatible with supertype + # "DatetimeLikeArrayMixin"; supertype defines the argument type as + # "Union[Union[Period, Any, Timedelta], NaTType]" + def _unbox_scalar( # type: ignore[override] + self, + value: Period | NaTType, + setitem: bool = False, + ) -> np.int64: + if value is NaT: + # error: Item "Period" of "Union[Period, NaTType]" has no attribute "value" + return np.int64(value.value) # type: ignore[union-attr] + elif isinstance(value, self._scalar_type): + self._check_compatible_with(value, setitem=setitem) + return np.int64(value.ordinal) + else: + raise ValueError(f"'value' should be a Period. Got '{value}' instead.") + + def _scalar_from_string(self, value: str) -> Period: + return Period(value, freq=self.freq) + + def _check_compatible_with(self, other, setitem: bool = False): + if other is NaT: + return + self._require_matching_freq(other) + + # -------------------------------------------------------------------- + # Data / Attributes + + @cache_readonly + def dtype(self) -> PeriodDtype: + return self._dtype + + # error: Read-only property cannot override read-write property + @property # type: ignore[misc] + def freq(self) -> BaseOffset: + """ + Return the frequency object for this PeriodArray. + """ + return self.dtype.freq + + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + if dtype == "i8": + return self.asi8 + elif dtype == bool: + return ~self._isnan + + # This will raise TypeError for non-object dtypes + return np.array(list(self), dtype=object) + + def __arrow_array__(self, type=None): + """ + Convert myself into a pyarrow Array. + """ + import pyarrow + + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + if type is not None: + if pyarrow.types.is_integer(type): + return pyarrow.array(self._ndarray, mask=self.isna(), type=type) + elif isinstance(type, ArrowPeriodType): + # ensure we have the same freq + if self.freqstr != type.freq: + raise TypeError( + "Not supported to convert PeriodArray to array with different " + f"'freq' ({self.freqstr} vs {type.freq})" + ) + else: + raise TypeError( + f"Not supported to convert PeriodArray to '{type}' type" + ) + + period_type = ArrowPeriodType(self.freqstr) + storage_array = pyarrow.array(self._ndarray, mask=self.isna(), type="int64") + return pyarrow.ExtensionArray.from_storage(period_type, storage_array) + + # -------------------------------------------------------------------- + # Vectorized analogues of Period properties + + year = _field_accessor( + "year", + """ + The year of the period. + """, + ) + month = _field_accessor( + "month", + """ + The month as January=1, December=12. + """, + ) + day = _field_accessor( + "day", + """ + The days of the period. + """, + ) + hour = _field_accessor( + "hour", + """ + The hour of the period. + """, + ) + minute = _field_accessor( + "minute", + """ + The minute of the period. + """, + ) + second = _field_accessor( + "second", + """ + The second of the period. + """, + ) + weekofyear = _field_accessor( + "week", + """ + The week ordinal of the year. + """, + ) + week = weekofyear + day_of_week = _field_accessor( + "day_of_week", + """ + The day of the week with Monday=0, Sunday=6. + """, + ) + dayofweek = day_of_week + weekday = dayofweek + dayofyear = day_of_year = _field_accessor( + "day_of_year", + """ + The ordinal day of the year. + """, + ) + quarter = _field_accessor( + "quarter", + """ + The quarter of the date. + """, + ) + qyear = _field_accessor("qyear") + days_in_month = _field_accessor( + "days_in_month", + """ + The number of days in the month. + """, + ) + daysinmonth = days_in_month + + @property + def is_leap_year(self) -> np.ndarray: + """ + Logical indicating if the date belongs to a leap year. + """ + return isleapyear_arr(np.asarray(self.year)) + + def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray: + """ + Cast to DatetimeArray/Index. + + Parameters + ---------- + freq : str or DateOffset, optional + Target frequency. The default is 'D' for week or longer, + 'S' otherwise. + how : {'s', 'e', 'start', 'end'} + Whether to use the start or end of the time period being converted. + + Returns + ------- + DatetimeArray/Index + """ + from pandas.core.arrays import DatetimeArray + + how = libperiod.validate_end_alias(how) + + end = how == "E" + if end: + if freq == "B" or self.freq == "B": + # roll forward to ensure we land on B date + adjust = Timedelta(1, "D") - Timedelta(1, "ns") + return self.to_timestamp(how="start") + adjust + else: + adjust = Timedelta(1, "ns") + return (self + self.freq).to_timestamp(how="start") - adjust + + if freq is None: + freq = self._get_to_timestamp_base() + base = freq + else: + freq = Period._maybe_convert_freq(freq) + base = freq._period_dtype_code + + new_parr = self.asfreq(freq, how=how) + + new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base) + dta = DatetimeArray(new_data) + + if self.freq.name == "B": + # See if we can retain BDay instead of Day in cases where + # len(self) is too small for infer_freq to distinguish between them + diffs = libalgos.unique_deltas(self.asi8) + if len(diffs) == 1: + diff = diffs[0] + if diff == self.freq.n: + dta._freq = self.freq + elif diff == 1: + dta._freq = self.freq.base + # TODO: other cases? + return dta + else: + return dta._with_freq("infer") + + # -------------------------------------------------------------------- + + def _time_shift(self, periods: int, freq=None) -> PeriodArray: + """ + Shift each value by `periods`. + + Note this is different from ExtensionArray.shift, which + shifts the *position* of each element, padding the end with + missing values. + + Parameters + ---------- + periods : int + Number of periods to shift by. + freq : pandas.DateOffset, pandas.Timedelta, or str + Frequency increment to shift by. + """ + if freq is not None: + raise TypeError( + "`freq` argument is not supported for " + f"{type(self).__name__}._time_shift" + ) + values = self.asi8 + periods * self.freq.n + if self._hasna: + values[self._isnan] = iNaT + return type(self)(values, freq=self.freq) + + def _box_func(self, x) -> Period | NaTType: + return Period._from_ordinal(ordinal=x, freq=self.freq) + + @doc(**_shared_doc_kwargs, other="PeriodIndex", other_name="PeriodIndex") + def asfreq(self, freq=None, how: str = "E") -> PeriodArray: + """ + Convert the {klass} to the specified frequency `freq`. + + Equivalent to applying :meth:`pandas.Period.asfreq` with the given arguments + to each :class:`~pandas.Period` in this {klass}. + + Parameters + ---------- + freq : str + A frequency. + how : str {{'E', 'S'}}, default 'E' + Whether the elements should be aligned to the end + or start within pa period. + + * 'E', 'END', or 'FINISH' for end, + * 'S', 'START', or 'BEGIN' for start. + + January 31st ('END') vs. January 1st ('START') for example. + + Returns + ------- + {klass} + The transformed {klass} with the new frequency. + + See Also + -------- + {other}.asfreq: Convert each Period in a {other_name} to the given frequency. + Period.asfreq : Convert a :class:`~pandas.Period` object to the given frequency. + + Examples + -------- + >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='A') + >>> pidx + PeriodIndex(['2010', '2011', '2012', '2013', '2014', '2015'], + dtype='period[A-DEC]') + + >>> pidx.asfreq('M') + PeriodIndex(['2010-12', '2011-12', '2012-12', '2013-12', '2014-12', + '2015-12'], dtype='period[M]') + + >>> pidx.asfreq('M', how='S') + PeriodIndex(['2010-01', '2011-01', '2012-01', '2013-01', '2014-01', + '2015-01'], dtype='period[M]') + """ + how = libperiod.validate_end_alias(how) + + freq = Period._maybe_convert_freq(freq) + + base1 = self.freq._period_dtype_code + base2 = freq._period_dtype_code + + asi8 = self.asi8 + # self.freq.n can't be negative or 0 + end = how == "E" + if end: + ordinal = asi8 + self.freq.n - 1 + else: + ordinal = asi8 + + new_data = period_asfreq_arr(ordinal, base1, base2, end) + + if self._hasna: + new_data[self._isnan] = iNaT + + return type(self)(new_data, freq=freq) + + # ------------------------------------------------------------------ + # Rendering Methods + + def _formatter(self, boxed: bool = False): + if boxed: + return str + return "'{}'".format + + @dtl.ravel_compat + def _format_native_types( + self, *, na_rep="NaT", date_format=None, **kwargs + ) -> np.ndarray: + """ + actually format my specific types + """ + values = self.astype(object) + + if date_format: + formatter = lambda dt: dt.strftime(date_format) + else: + formatter = lambda dt: str(dt) + + if self._hasna: + mask = self._isnan + values[mask] = na_rep + imask = ~mask + values[imask] = np.array([formatter(dt) for dt in values[imask]]) + else: + values = np.array([formatter(dt) for dt in values]) + return values + + # ------------------------------------------------------------------ + + def astype(self, dtype, copy: bool = True): + # We handle Period[T] -> Period[U] + # Our parent handles everything else. + dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, self._dtype): + if not copy: + return self + else: + return self.copy() + if is_period_dtype(dtype): + return self.asfreq(dtype.freq) + + if is_datetime64_any_dtype(dtype): + # GH#45038 match PeriodIndex behavior. + tz = getattr(dtype, "tz", None) + return self.to_timestamp().tz_localize(tz) + + return super().astype(dtype, copy=copy) + + def searchsorted( + self, + value: NumpyValueArrayLike | ExtensionArray, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> npt.NDArray[np.intp] | np.intp: + npvalue = self._validate_searchsorted_value(value).view("M8[ns]") + + # Cast to M8 to get datetime-like NaT placement + m8arr = self._ndarray.view("M8[ns]") + return m8arr.searchsorted(npvalue, side=side, sorter=sorter) + + def fillna(self, value=None, method=None, limit=None) -> PeriodArray: + if method is not None: + # view as dt64 so we get treated as timelike in core.missing + dta = self.view("M8[ns]") + result = dta.fillna(value=value, method=method, limit=limit) + # error: Incompatible return value type (got "Union[ExtensionArray, + # ndarray[Any, Any]]", expected "PeriodArray") + return result.view(self.dtype) # type: ignore[return-value] + return super().fillna(value=value, method=method, limit=limit) + + # ------------------------------------------------------------------ + # Arithmetic Methods + + def _sub_datelike(self, other): + assert other is not NaT + return NotImplemented + + def _sub_period(self, other): + # If the operation is well-defined, we return an object-Index + # of DateOffsets. Null entries are filled with pd.NaT + self._check_compatible_with(other) + asi8 = self.asi8 + new_data = asi8 - other.ordinal + new_data = np.array([self.freq * x for x in new_data]) + + if self._hasna: + new_data[self._isnan] = NaT + + return new_data + + def _sub_period_array(self, other): + """ + Subtract a Period Array/Index from self. This is only valid if self + is itself a Period Array/Index, raises otherwise. Both objects must + have the same frequency. + + Parameters + ---------- + other : PeriodIndex or PeriodArray + + Returns + ------- + result : np.ndarray[object] + Array of DateOffset objects; nulls represented by NaT. + """ + self._require_matching_freq(other) + + new_values = algos.checked_add_with_arr( + self.asi8, -other.asi8, arr_mask=self._isnan, b_mask=other._isnan + ) + + new_values = np.array([self.freq.base * x for x in new_values]) + if self._hasna or other._hasna: + mask = self._isnan | other._isnan + new_values[mask] = NaT + return new_values + + def _addsub_int_array( + self, other: np.ndarray, op: Callable[[Any, Any], Any] + ) -> PeriodArray: + """ + Add or subtract array of integers; equivalent to applying + `_time_shift` pointwise. + + Parameters + ---------- + other : np.ndarray[integer-dtype] + op : {operator.add, operator.sub} + + Returns + ------- + result : PeriodArray + """ + assert op in [operator.add, operator.sub] + if op is operator.sub: + other = -other + res_values = algos.checked_add_with_arr(self.asi8, other, arr_mask=self._isnan) + res_values = res_values.view("i8") + np.putmask(res_values, self._isnan, iNaT) + return type(self)(res_values, freq=self.freq) + + def _add_offset(self, other: BaseOffset): + assert not isinstance(other, Tick) + + self._require_matching_freq(other, base=True) + + # Note: when calling parent class's _add_timedeltalike_scalar, + # it will call delta_to_nanoseconds(delta). Because delta here + # is an integer, delta_to_nanoseconds will return it unchanged. + result = super()._add_timedeltalike_scalar(other.n) + return type(self)(result, freq=self.freq) + + def _add_timedeltalike_scalar(self, other): + """ + Parameters + ---------- + other : timedelta, Tick, np.timedelta64 + + Returns + ------- + PeriodArray + """ + if not isinstance(self.freq, Tick): + # We cannot add timedelta-like to non-tick PeriodArray + raise raise_on_incompatible(self, other) + + if notna(other): + # special handling for np.timedelta64("NaT"), avoid calling + # _check_timedeltalike_freq_compat as that would raise TypeError + other = self._check_timedeltalike_freq_compat(other) + + # Note: when calling parent class's _add_timedeltalike_scalar, + # it will call delta_to_nanoseconds(delta). Because delta here + # is an integer, delta_to_nanoseconds will return it unchanged. + return super()._add_timedeltalike_scalar(other) + + def _add_timedelta_arraylike(self, other): + """ + Parameters + ---------- + other : TimedeltaArray or ndarray[timedelta64] + + Returns + ------- + result : ndarray[int64] + """ + if not isinstance(self.freq, Tick): + # We cannot add timedelta-like to non-tick PeriodArray + raise TypeError( + f"Cannot add or subtract timedelta64[ns] dtype from {self.dtype}" + ) + + if not np.all(isna(other)): + delta = self._check_timedeltalike_freq_compat(other) + else: + # all-NaT TimedeltaIndex is equivalent to a single scalar td64 NaT + return self + np.timedelta64("NaT") + + ordinals = self._addsub_int_array(delta, operator.add).asi8 + return type(self)(ordinals, dtype=self.dtype) + + def _check_timedeltalike_freq_compat(self, other): + """ + Arithmetic operations with timedelta-like scalars or array `other` + are only valid if `other` is an integer multiple of `self.freq`. + If the operation is valid, find that integer multiple. Otherwise, + raise because the operation is invalid. + + Parameters + ---------- + other : timedelta, np.timedelta64, Tick, + ndarray[timedelta64], TimedeltaArray, TimedeltaIndex + + Returns + ------- + multiple : int or ndarray[int64] + + Raises + ------ + IncompatibleFrequency + """ + assert isinstance(self.freq, Tick) # checked by calling function + base_nanos = self.freq.base.nanos + + if isinstance(other, (timedelta, np.timedelta64, Tick)): + nanos = delta_to_nanoseconds(other) + + elif isinstance(other, np.ndarray): + # numpy timedelta64 array; all entries must be compatible + assert other.dtype.kind == "m" + if other.dtype != TD64NS_DTYPE: + # i.e. non-nano unit + # TODO: disallow unit-less timedelta64 + other = other.astype(TD64NS_DTYPE) + nanos = other.view("i8") + else: + # TimedeltaArray/Index + nanos = other.asi8 + + if np.all(nanos % base_nanos == 0): + # nanos being added is an integer multiple of the + # base-frequency to self.freq + delta = nanos // base_nanos + # delta is the integer (or integer-array) number of periods + # by which will be added to self. + return delta + + raise raise_on_incompatible(self, other) + + # ------------------------------------------------------------------ + # TODO: See if we can re-share this with Period + + def _get_to_timestamp_base(self) -> int: + """ + Return frequency code group used for base of to_timestamp against + frequency code. + + Return day freq code against longer freq than day. + Return second freq code against hour between second. + + Returns + ------- + int + """ + base = self._dtype._dtype_code + if base < FreqGroup.FR_BUS.value: + return FreqGroup.FR_DAY.value + elif FreqGroup.FR_HR.value <= base <= FreqGroup.FR_SEC.value: + return FreqGroup.FR_SEC.value + return base + + @property + def start_time(self) -> DatetimeArray: + return self.to_timestamp(how="start") + + @property + def end_time(self) -> DatetimeArray: + return self.to_timestamp(how="end") + + def _require_matching_freq(self, other, base: bool = False) -> None: + # See also arrays.period.raise_on_incompatible + if isinstance(other, BaseOffset): + other_freq = other + else: + other_freq = other.freq + + if base: + condition = self.freq.base != other_freq.base + else: + condition = self.freq != other_freq + + if condition: + msg = DIFFERENT_FREQ.format( + cls=type(self).__name__, + own_freq=self.freqstr, + other_freq=other_freq.freqstr, + ) + raise IncompatibleFrequency(msg) + + +def raise_on_incompatible(left, right): + """ + Helper function to render a consistent error message when raising + IncompatibleFrequency. + + Parameters + ---------- + left : PeriodArray + right : None, DateOffset, Period, ndarray, or timedelta-like + + Returns + ------- + IncompatibleFrequency + Exception to be raised by the caller. + """ + # GH#24283 error message format depends on whether right is scalar + if isinstance(right, (np.ndarray, ABCTimedeltaArray)) or right is None: + other_freq = None + elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period, BaseOffset)): + other_freq = right.freqstr + else: + other_freq = delta_to_tick(Timedelta(right)).freqstr + + msg = DIFFERENT_FREQ.format( + cls=type(left).__name__, own_freq=left.freqstr, other_freq=other_freq + ) + return IncompatibleFrequency(msg) + + +# ------------------------------------------------------------------- +# Constructor Helpers + + +def period_array( + data: Sequence[Period | str | None] | AnyArrayLike, + freq: str | Tick | None = None, + copy: bool = False, +) -> PeriodArray: + """ + Construct a new PeriodArray from a sequence of Period scalars. + + Parameters + ---------- + data : Sequence of Period objects + A sequence of Period objects. These are required to all have + the same ``freq.`` Missing values can be indicated by ``None`` + or ``pandas.NaT``. + freq : str, Tick, or Offset + The frequency of every element of the array. This can be specified + to avoid inferring the `freq` from `data`. + copy : bool, default False + Whether to ensure a copy of the data is made. + + Returns + ------- + PeriodArray + + See Also + -------- + PeriodArray + pandas.PeriodIndex + + Examples + -------- + >>> period_array([pd.Period('2017', freq='A'), + ... pd.Period('2018', freq='A')]) + + ['2017', '2018'] + Length: 2, dtype: period[A-DEC] + + >>> period_array([pd.Period('2017', freq='A'), + ... pd.Period('2018', freq='A'), + ... pd.NaT]) + + ['2017', '2018', 'NaT'] + Length: 3, dtype: period[A-DEC] + + Integers that look like years are handled + + >>> period_array([2000, 2001, 2002], freq='D') + + ['2000-01-01', '2001-01-01', '2002-01-01'] + Length: 3, dtype: period[D] + + Datetime-like strings may also be passed + + >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q') + + ['2000Q1', '2000Q2', '2000Q3', '2000Q4'] + Length: 4, dtype: period[Q-DEC] + """ + data_dtype = getattr(data, "dtype", None) + + if is_datetime64_dtype(data_dtype): + return PeriodArray._from_datetime64(data, freq) + if is_period_dtype(data_dtype): + return PeriodArray(data, freq=freq) + + # other iterable of some kind + if not isinstance(data, (np.ndarray, list, tuple, ABCSeries)): + data = list(data) + + arrdata = np.asarray(data) + + dtype: PeriodDtype | None + if freq: + dtype = PeriodDtype(freq) + else: + dtype = None + + if is_float_dtype(arrdata) and len(arrdata) > 0: + raise TypeError("PeriodIndex does not allow floating point in construction") + + if is_integer_dtype(arrdata.dtype): + arr = arrdata.astype(np.int64, copy=False) + ordinals = libperiod.from_ordinals(arr, freq) + return PeriodArray(ordinals, dtype=dtype) + + data = ensure_object(arrdata) + + return PeriodArray._from_sequence(data, dtype=dtype) + + +def validate_dtype_freq(dtype, freq): + """ + If both a dtype and a freq are available, ensure they match. If only + dtype is available, extract the implied freq. + + Parameters + ---------- + dtype : dtype + freq : DateOffset or None + + Returns + ------- + freq : DateOffset + + Raises + ------ + ValueError : non-period dtype + IncompatibleFrequency : mismatch between dtype and freq + """ + if freq is not None: + freq = to_offset(freq) + + if dtype is not None: + dtype = pandas_dtype(dtype) + if not is_period_dtype(dtype): + raise ValueError("dtype must be PeriodDtype") + if freq is None: + freq = dtype.freq + elif freq != dtype.freq: + raise IncompatibleFrequency("specified freq and dtype are different") + return freq + + +def dt64arr_to_periodarr(data, freq, tz=None): + """ + Convert an datetime-like array to values Period ordinals. + + Parameters + ---------- + data : Union[Series[datetime64[ns]], DatetimeIndex, ndarray[datetime64ns]] + freq : Optional[Union[str, Tick]] + Must match the `freq` on the `data` if `data` is a DatetimeIndex + or Series. + tz : Optional[tzinfo] + + Returns + ------- + ordinals : ndarray[int64] + freq : Tick + The frequency extracted from the Series or DatetimeIndex if that's + used. + + """ + if data.dtype != np.dtype("M8[ns]"): + raise ValueError(f"Wrong dtype: {data.dtype}") + + if freq is None: + if isinstance(data, ABCIndex): + data, freq = data._values, data.freq + elif isinstance(data, ABCSeries): + data, freq = data._values, data.dt.freq + + elif isinstance(data, (ABCIndex, ABCSeries)): + data = data._values + + freq = Period._maybe_convert_freq(freq) + base = freq._period_dtype_code + return c_dt64arr_to_periodarr(data.view("i8"), base, tz), freq + + +def _get_ordinal_range(start, end, periods, freq, mult=1): + if com.count_not_none(start, end, periods) != 2: + raise ValueError( + "Of the three parameters: start, end, and periods, " + "exactly two must be specified" + ) + + if freq is not None: + freq = to_offset(freq) + mult = freq.n + + if start is not None: + start = Period(start, freq) + if end is not None: + end = Period(end, freq) + + is_start_per = isinstance(start, Period) + is_end_per = isinstance(end, Period) + + if is_start_per and is_end_per and start.freq != end.freq: + raise ValueError("start and end must have same freq") + if start is NaT or end is NaT: + raise ValueError("start and end must not be NaT") + + if freq is None: + if is_start_per: + freq = start.freq + elif is_end_per: + freq = end.freq + else: # pragma: no cover + raise ValueError("Could not infer freq from start/end") + + if periods is not None: + periods = periods * mult + if start is None: + data = np.arange( + end.ordinal - periods + mult, end.ordinal + 1, mult, dtype=np.int64 + ) + else: + data = np.arange( + start.ordinal, start.ordinal + periods, mult, dtype=np.int64 + ) + else: + data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64) + + return data, freq + + +def _range_from_fields( + year=None, + month=None, + quarter=None, + day=None, + hour=None, + minute=None, + second=None, + freq=None, +) -> tuple[np.ndarray, BaseOffset]: + if hour is None: + hour = 0 + if minute is None: + minute = 0 + if second is None: + second = 0 + if day is None: + day = 1 + + ordinals = [] + + if quarter is not None: + if freq is None: + freq = to_offset("Q") + base = FreqGroup.FR_QTR.value + else: + freq = to_offset(freq) + base = libperiod.freq_to_dtype_code(freq) + if base != FreqGroup.FR_QTR.value: + raise AssertionError("base must equal FR_QTR") + + freqstr = freq.freqstr + year, quarter = _make_field_arrays(year, quarter) + for y, q in zip(year, quarter): + y, m = parsing.quarter_to_myear(y, q, freqstr) + val = libperiod.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base) + ordinals.append(val) + else: + freq = to_offset(freq) + base = libperiod.freq_to_dtype_code(freq) + arrays = _make_field_arrays(year, month, day, hour, minute, second) + for y, mth, d, h, mn, s in zip(*arrays): + ordinals.append(libperiod.period_ordinal(y, mth, d, h, mn, s, 0, 0, base)) + + return np.array(ordinals, dtype=np.int64), freq + + +def _make_field_arrays(*fields) -> list[np.ndarray]: + length = None + for x in fields: + if isinstance(x, (list, np.ndarray, ABCSeries)): + if length is not None and len(x) != length: + raise ValueError("Mismatched Period array lengths") + elif length is None: + length = len(x) + + # error: Argument 2 to "repeat" has incompatible type "Optional[int]"; expected + # "Union[Union[int, integer[Any]], Union[bool, bool_], ndarray, Sequence[Union[int, + # integer[Any]]], Sequence[Union[bool, bool_]], Sequence[Sequence[Any]]]" + return [ + np.asarray(x) + if isinstance(x, (np.ndarray, list, ABCSeries)) + else np.repeat(x, length) # type: ignore[arg-type] + for x in fields + ] diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__init__.py new file mode 100644 index 0000000..18294ea --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__init__.py @@ -0,0 +1,13 @@ +# flake8: noqa: F401 + +from pandas.core.arrays.sparse.accessor import ( + SparseAccessor, + SparseFrameAccessor, +) +from pandas.core.arrays.sparse.array import ( + BlockIndex, + IntIndex, + SparseArray, + make_sparse_index, +) +from pandas.core.arrays.sparse.dtype import SparseDtype diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..0c66f81 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/accessor.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/accessor.cpython-38.pyc new file mode 100644 index 0000000..2d7337f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/accessor.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/array.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/array.cpython-38.pyc new file mode 100644 index 0000000..d2faee0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/array.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/dtype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/dtype.cpython-38.pyc new file mode 100644 index 0000000..f3c8ccf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/dtype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/scipy_sparse.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/scipy_sparse.cpython-38.pyc new file mode 100644 index 0000000..6ad3b5c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/__pycache__/scipy_sparse.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/accessor.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/accessor.py new file mode 100644 index 0000000..8e61460 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/accessor.py @@ -0,0 +1,386 @@ +"""Sparse accessor""" + +import numpy as np + +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.cast import find_common_type + +from pandas.core.accessor import ( + PandasDelegate, + delegate_names, +) +from pandas.core.arrays.sparse.array import SparseArray +from pandas.core.arrays.sparse.dtype import SparseDtype + + +class BaseAccessor: + _validation_msg = "Can only use the '.sparse' accessor with Sparse data." + + def __init__(self, data=None): + self._parent = data + self._validate(data) + + def _validate(self, data): + raise NotImplementedError + + +@delegate_names( + SparseArray, ["npoints", "density", "fill_value", "sp_values"], typ="property" +) +class SparseAccessor(BaseAccessor, PandasDelegate): + """ + Accessor for SparseSparse from other sparse matrix data types. + """ + + def _validate(self, data): + if not isinstance(data.dtype, SparseDtype): + raise AttributeError(self._validation_msg) + + def _delegate_property_get(self, name, *args, **kwargs): + return getattr(self._parent.array, name) + + def _delegate_method(self, name, *args, **kwargs): + if name == "from_coo": + return self.from_coo(*args, **kwargs) + elif name == "to_coo": + return self.to_coo(*args, **kwargs) + else: + raise ValueError + + @classmethod + def from_coo(cls, A, dense_index=False): + """ + Create a Series with sparse values from a scipy.sparse.coo_matrix. + + Parameters + ---------- + A : scipy.sparse.coo_matrix + dense_index : bool, default False + If False (default), the SparseSeries index consists of only the + coords of the non-null entries of the original coo_matrix. + If True, the SparseSeries index consists of the full sorted + (row, col) coordinates of the coo_matrix. + + Returns + ------- + s : Series + A Series with sparse values. + + Examples + -------- + >>> from scipy import sparse + + >>> A = sparse.coo_matrix( + ... ([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4) + ... ) + >>> A + <3x4 sparse matrix of type '' + with 3 stored elements in COOrdinate format> + + >>> A.todense() + matrix([[0., 0., 1., 2.], + [3., 0., 0., 0.], + [0., 0., 0., 0.]]) + + >>> ss = pd.Series.sparse.from_coo(A) + >>> ss + 0 2 1.0 + 3 2.0 + 1 0 3.0 + dtype: Sparse[float64, nan] + """ + from pandas import Series + from pandas.core.arrays.sparse.scipy_sparse import coo_to_sparse_series + + result = coo_to_sparse_series(A, dense_index=dense_index) + result = Series(result.array, index=result.index, copy=False) + + return result + + def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False): + """ + Create a scipy.sparse.coo_matrix from a Series with MultiIndex. + + Use row_levels and column_levels to determine the row and column + coordinates respectively. row_levels and column_levels are the names + (labels) or numbers of the levels. {row_levels, column_levels} must be + a partition of the MultiIndex level names (or numbers). + + Parameters + ---------- + row_levels : tuple/list + column_levels : tuple/list + sort_labels : bool, default False + Sort the row and column labels before forming the sparse matrix. + When `row_levels` and/or `column_levels` refer to a single level, + set to `True` for a faster execution. + + Returns + ------- + y : scipy.sparse.coo_matrix + rows : list (row labels) + columns : list (column labels) + + Examples + -------- + >>> s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan]) + >>> s.index = pd.MultiIndex.from_tuples( + ... [ + ... (1, 2, "a", 0), + ... (1, 2, "a", 1), + ... (1, 1, "b", 0), + ... (1, 1, "b", 1), + ... (2, 1, "b", 0), + ... (2, 1, "b", 1) + ... ], + ... names=["A", "B", "C", "D"], + ... ) + >>> s + A B C D + 1 2 a 0 3.0 + 1 NaN + 1 b 0 1.0 + 1 3.0 + 2 1 b 0 NaN + 1 NaN + dtype: float64 + + >>> ss = s.astype("Sparse") + >>> ss + A B C D + 1 2 a 0 3.0 + 1 NaN + 1 b 0 1.0 + 1 3.0 + 2 1 b 0 NaN + 1 NaN + dtype: Sparse[float64, nan] + + >>> A, rows, columns = ss.sparse.to_coo( + ... row_levels=["A", "B"], column_levels=["C", "D"], sort_labels=True + ... ) + >>> A + <3x4 sparse matrix of type '' + with 3 stored elements in COOrdinate format> + >>> A.todense() + matrix([[0., 0., 1., 3.], + [3., 0., 0., 0.], + [0., 0., 0., 0.]]) + + >>> rows + [(1, 1), (1, 2), (2, 1)] + >>> columns + [('a', 0), ('a', 1), ('b', 0), ('b', 1)] + """ + from pandas.core.arrays.sparse.scipy_sparse import sparse_series_to_coo + + A, rows, columns = sparse_series_to_coo( + self._parent, row_levels, column_levels, sort_labels=sort_labels + ) + return A, rows, columns + + def to_dense(self): + """ + Convert a Series from sparse values to dense. + + .. versionadded:: 0.25.0 + + Returns + ------- + Series: + A Series with the same values, stored as a dense array. + + Examples + -------- + >>> series = pd.Series(pd.arrays.SparseArray([0, 1, 0])) + >>> series + 0 0 + 1 1 + 2 0 + dtype: Sparse[int64, 0] + + >>> series.sparse.to_dense() + 0 0 + 1 1 + 2 0 + dtype: int64 + """ + from pandas import Series + + return Series( + self._parent.array.to_dense(), + index=self._parent.index, + name=self._parent.name, + ) + + +class SparseFrameAccessor(BaseAccessor, PandasDelegate): + """ + DataFrame accessor for sparse data. + + .. versionadded:: 0.25.0 + """ + + def _validate(self, data): + dtypes = data.dtypes + if not all(isinstance(t, SparseDtype) for t in dtypes): + raise AttributeError(self._validation_msg) + + @classmethod + def from_spmatrix(cls, data, index=None, columns=None): + """ + Create a new DataFrame from a scipy sparse matrix. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + data : scipy.sparse.spmatrix + Must be convertible to csc format. + index, columns : Index, optional + Row and column labels to use for the resulting DataFrame. + Defaults to a RangeIndex. + + Returns + ------- + DataFrame + Each column of the DataFrame is stored as a + :class:`arrays.SparseArray`. + + Examples + -------- + >>> import scipy.sparse + >>> mat = scipy.sparse.eye(3) + >>> pd.DataFrame.sparse.from_spmatrix(mat) + 0 1 2 + 0 1.0 0.0 0.0 + 1 0.0 1.0 0.0 + 2 0.0 0.0 1.0 + """ + from pandas._libs.sparse import IntIndex + + from pandas import DataFrame + + data = data.tocsc() + index, columns = cls._prep_index(data, index, columns) + n_rows, n_columns = data.shape + # We need to make sure indices are sorted, as we create + # IntIndex with no input validation (i.e. check_integrity=False ). + # Indices may already be sorted in scipy in which case this adds + # a small overhead. + data.sort_indices() + indices = data.indices + indptr = data.indptr + array_data = data.data + dtype = SparseDtype(array_data.dtype, 0) + arrays = [] + for i in range(n_columns): + sl = slice(indptr[i], indptr[i + 1]) + idx = IntIndex(n_rows, indices[sl], check_integrity=False) + arr = SparseArray._simple_new(array_data[sl], idx, dtype) + arrays.append(arr) + return DataFrame._from_arrays( + arrays, columns=columns, index=index, verify_integrity=False + ) + + def to_dense(self): + """ + Convert a DataFrame with sparse values to dense. + + .. versionadded:: 0.25.0 + + Returns + ------- + DataFrame + A DataFrame with the same values stored as dense arrays. + + Examples + -------- + >>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0])}) + >>> df.sparse.to_dense() + A + 0 0 + 1 1 + 2 0 + """ + from pandas import DataFrame + + data = {k: v.array.to_dense() for k, v in self._parent.items()} + return DataFrame(data, index=self._parent.index, columns=self._parent.columns) + + def to_coo(self): + """ + Return the contents of the frame as a sparse SciPy COO matrix. + + .. versionadded:: 0.25.0 + + Returns + ------- + coo_matrix : scipy.sparse.spmatrix + If the caller is heterogeneous and contains booleans or objects, + the result will be of dtype=object. See Notes. + + Notes + ----- + The dtype will be the lowest-common-denominator type (implicit + upcasting); that is to say if the dtypes (even of numeric types) + are mixed, the one that accommodates all will be chosen. + + e.g. If the dtypes are float16 and float32, dtype will be upcast to + float32. By numpy.find_common_type convention, mixing int64 and + and uint64 will result in a float64 dtype. + """ + import_optional_dependency("scipy") + from scipy.sparse import coo_matrix + + dtype = find_common_type(self._parent.dtypes.to_list()) + if isinstance(dtype, SparseDtype): + dtype = dtype.subtype + + cols, rows, data = [], [], [] + for col, (_, ser) in enumerate(self._parent.iteritems()): + sp_arr = ser.array + if sp_arr.fill_value != 0: + raise ValueError("fill value must be 0 when converting to COO matrix") + + row = sp_arr.sp_index.indices + cols.append(np.repeat(col, len(row))) + rows.append(row) + data.append(sp_arr.sp_values.astype(dtype, copy=False)) + + cols = np.concatenate(cols) + rows = np.concatenate(rows) + data = np.concatenate(data) + return coo_matrix((data, (rows, cols)), shape=self._parent.shape) + + @property + def density(self) -> float: + """ + Ratio of non-sparse points to total (dense) data points. + """ + tmp = np.mean([column.array.density for _, column in self._parent.items()]) + return tmp + + @staticmethod + def _prep_index(data, index, columns): + from pandas.core.indexes.api import ( + default_index, + ensure_index, + ) + + N, K = data.shape + if index is None: + index = default_index(N) + else: + index = ensure_index(index) + if columns is None: + columns = default_index(K) + else: + columns = ensure_index(columns) + + if len(columns) != K: + raise ValueError(f"Column length mismatch: {len(columns)} vs. {K}") + if len(index) != N: + raise ValueError(f"Index length mismatch: {len(index)} vs. {N}") + return index, columns diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/array.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/array.py new file mode 100644 index 0000000..ebfa769 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/array.py @@ -0,0 +1,1853 @@ +""" +SparseArray data structure +""" +from __future__ import annotations + +from collections import abc +import numbers +import operator +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Literal, + Sequence, + TypeVar, + cast, + overload, +) +import warnings + +import numpy as np + +from pandas._libs import lib +import pandas._libs.sparse as splib +from pandas._libs.sparse import ( + BlockIndex, + IntIndex, + SparseIndex, +) +from pandas._libs.tslibs import NaT +from pandas._typing import ( + ArrayLike, + AstypeArg, + Dtype, + NpDtype, + PositionalIndexer, + Scalar, + ScalarIndexer, + SequenceIndexer, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import PerformanceWarning +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_insert_loc + +from pandas.core.dtypes.cast import ( + astype_nansafe, + construct_1d_arraylike_from_scalar, + find_common_type, + maybe_box_datetimelike, +) +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_datetime64_any_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_integer, + is_list_like, + is_object_dtype, + is_scalar, + is_string_dtype, + pandas_dtype, +) +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, + notna, +) + +from pandas.core import arraylike +import pandas.core.algorithms as algos +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays import ExtensionArray +from pandas.core.arrays.sparse.dtype import SparseDtype +from pandas.core.base import PandasObject +import pandas.core.common as com +from pandas.core.construction import ( + extract_array, + sanitize_array, +) +from pandas.core.indexers import ( + check_array_indexer, + unpack_tuple_and_ellipses, +) +from pandas.core.missing import interpolate_2d +from pandas.core.nanops import check_below_min_count +import pandas.core.ops as ops + +import pandas.io.formats.printing as printing + +# See https://github.com/python/typing/issues/684 +if TYPE_CHECKING: + from enum import Enum + + class ellipsis(Enum): + Ellipsis = "..." + + Ellipsis = ellipsis.Ellipsis + + from scipy.sparse import spmatrix + + from pandas._typing import ( + FillnaOptions, + NumpySorter, + ) + + SparseIndexKind = Literal["integer", "block"] + + from pandas import Series + +else: + ellipsis = type(Ellipsis) + + +# ---------------------------------------------------------------------------- +# Array + +SparseArrayT = TypeVar("SparseArrayT", bound="SparseArray") + +_sparray_doc_kwargs = {"klass": "SparseArray"} + + +def _get_fill(arr: SparseArray) -> np.ndarray: + """ + Create a 0-dim ndarray containing the fill value + + Parameters + ---------- + arr : SparseArray + + Returns + ------- + fill_value : ndarray + 0-dim ndarray with just the fill value. + + Notes + ----- + coerce fill_value to arr dtype if possible + int64 SparseArray can have NaN as fill_value if there is no missing + """ + try: + return np.asarray(arr.fill_value, dtype=arr.dtype.subtype) + except ValueError: + return np.asarray(arr.fill_value) + + +def _sparse_array_op( + left: SparseArray, right: SparseArray, op: Callable, name: str +) -> SparseArray: + """ + Perform a binary operation between two arrays. + + Parameters + ---------- + left : Union[SparseArray, ndarray] + right : Union[SparseArray, ndarray] + op : Callable + The binary operation to perform + name str + Name of the callable. + + Returns + ------- + SparseArray + """ + if name.startswith("__"): + # For lookups in _libs.sparse we need non-dunder op name + name = name[2:-2] + + # dtype used to find corresponding sparse method + ltype = left.dtype.subtype + rtype = right.dtype.subtype + + if not is_dtype_equal(ltype, rtype): + subtype = find_common_type([ltype, rtype]) + ltype = SparseDtype(subtype, left.fill_value) + rtype = SparseDtype(subtype, right.fill_value) + + left = left.astype(ltype, copy=False) + right = right.astype(rtype, copy=False) + dtype = ltype.subtype + else: + dtype = ltype + + # dtype the result must have + result_dtype = None + + if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0: + with np.errstate(all="ignore"): + result = op(left.to_dense(), right.to_dense()) + fill = op(_get_fill(left), _get_fill(right)) + + if left.sp_index.ngaps == 0: + index = left.sp_index + else: + index = right.sp_index + elif left.sp_index.equals(right.sp_index): + with np.errstate(all="ignore"): + result = op(left.sp_values, right.sp_values) + fill = op(_get_fill(left), _get_fill(right)) + index = left.sp_index + else: + if name[0] == "r": + left, right = right, left + name = name[1:] + + if name in ("and", "or", "xor") and dtype == "bool": + opname = f"sparse_{name}_uint8" + # to make template simple, cast here + left_sp_values = left.sp_values.view(np.uint8) + right_sp_values = right.sp_values.view(np.uint8) + result_dtype = bool + else: + opname = f"sparse_{name}_{dtype}" + left_sp_values = left.sp_values + right_sp_values = right.sp_values + + if ( + name in ["floordiv", "mod"] + and (right == 0).any() + and left.dtype.kind in ["i", "u"] + ): + # Match the non-Sparse Series behavior + opname = f"sparse_{name}_float64" + left_sp_values = left_sp_values.astype("float64") + right_sp_values = right_sp_values.astype("float64") + + sparse_op = getattr(splib, opname) + + with np.errstate(all="ignore"): + result, index, fill = sparse_op( + left_sp_values, + left.sp_index, + left.fill_value, + right_sp_values, + right.sp_index, + right.fill_value, + ) + + if name == "divmod": + # result is a 2-tuple + # error: Incompatible return value type (got "Tuple[SparseArray, + # SparseArray]", expected "SparseArray") + return ( # type: ignore[return-value] + _wrap_result(name, result[0], index, fill[0], dtype=result_dtype), + _wrap_result(name, result[1], index, fill[1], dtype=result_dtype), + ) + + if result_dtype is None: + result_dtype = result.dtype + + return _wrap_result(name, result, index, fill, dtype=result_dtype) + + +def _wrap_result( + name: str, data, sparse_index, fill_value, dtype: Dtype | None = None +) -> SparseArray: + """ + wrap op result to have correct dtype + """ + if name.startswith("__"): + # e.g. __eq__ --> eq + name = name[2:-2] + + if name in ("eq", "ne", "lt", "gt", "le", "ge"): + dtype = bool + + fill_value = lib.item_from_zerodim(fill_value) + + if is_bool_dtype(dtype): + # fill_value may be np.bool_ + fill_value = bool(fill_value) + return SparseArray( + data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype + ) + + +class SparseArray(OpsMixin, PandasObject, ExtensionArray): + """ + An ExtensionArray for storing sparse data. + + Parameters + ---------- + data : array-like or scalar + A dense array of values to store in the SparseArray. This may contain + `fill_value`. + sparse_index : SparseIndex, optional + index : Index + + .. deprecated:: 1.4.0 + Use a function like `np.full` to construct an array with the desired + repeats of the scalar value instead. + + fill_value : scalar, optional + Elements in data that are ``fill_value`` are not stored in the + SparseArray. For memory savings, this should be the most common value + in `data`. By default, `fill_value` depends on the dtype of `data`: + + =========== ========== + data.dtype na_value + =========== ========== + float ``np.nan`` + int ``0`` + bool False + datetime64 ``pd.NaT`` + timedelta64 ``pd.NaT`` + =========== ========== + + The fill value is potentially specified in three ways. In order of + precedence, these are + + 1. The `fill_value` argument + 2. ``dtype.fill_value`` if `fill_value` is None and `dtype` is + a ``SparseDtype`` + 3. ``data.dtype.fill_value`` if `fill_value` is None and `dtype` + is not a ``SparseDtype`` and `data` is a ``SparseArray``. + + kind : str + Can be 'integer' or 'block', default is 'integer'. + The type of storage for sparse locations. + + * 'block': Stores a `block` and `block_length` for each + contiguous *span* of sparse values. This is best when + sparse data tends to be clumped together, with large + regions of ``fill-value`` values between sparse values. + * 'integer': uses an integer to store the location of + each sparse value. + + dtype : np.dtype or SparseDtype, optional + The dtype to use for the SparseArray. For numpy dtypes, this + determines the dtype of ``self.sp_values``. For SparseDtype, + this determines ``self.sp_values`` and ``self.fill_value``. + copy : bool, default False + Whether to explicitly copy the incoming `data` array. + + Attributes + ---------- + None + + Methods + ------- + None + + Examples + -------- + >>> from pandas.arrays import SparseArray + >>> arr = SparseArray([0, 0, 1, 2]) + >>> arr + [0, 0, 1, 2] + Fill: 0 + IntIndex + Indices: array([2, 3], dtype=int32) + """ + + _subtyp = "sparse_array" # register ABCSparseArray + _hidden_attrs = PandasObject._hidden_attrs | frozenset(["get_values"]) + _sparse_index: SparseIndex + + def __init__( + self, + data, + sparse_index=None, + index=None, + fill_value=None, + kind: SparseIndexKind = "integer", + dtype: Dtype | None = None, + copy: bool = False, + ): + + if fill_value is None and isinstance(dtype, SparseDtype): + fill_value = dtype.fill_value + + if isinstance(data, type(self)): + # disable normal inference on dtype, sparse_index, & fill_value + if sparse_index is None: + sparse_index = data.sp_index + if fill_value is None: + fill_value = data.fill_value + if dtype is None: + dtype = data.dtype + # TODO: make kind=None, and use data.kind? + data = data.sp_values + + # Handle use-provided dtype + if isinstance(dtype, str): + # Two options: dtype='int', regular numpy dtype + # or dtype='Sparse[int]', a sparse dtype + try: + dtype = SparseDtype.construct_from_string(dtype) + except TypeError: + dtype = pandas_dtype(dtype) + + if isinstance(dtype, SparseDtype): + if fill_value is None: + fill_value = dtype.fill_value + dtype = dtype.subtype + + if index is not None: + warnings.warn( + "The index argument has been deprecated and will be " + "removed in a future version. Use a function like np.full " + "to construct an array with the desired repeats of the " + "scalar value instead.\n\n", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if index is not None and not is_scalar(data): + raise Exception("must only pass scalars with an index") + + if is_scalar(data): + if index is not None and data is None: + data = np.nan + + if index is not None: + npoints = len(index) + elif sparse_index is None: + npoints = 1 + else: + npoints = sparse_index.length + + data = construct_1d_arraylike_from_scalar(data, npoints, dtype=None) + dtype = data.dtype + + if dtype is not None: + dtype = pandas_dtype(dtype) + + # TODO: disentangle the fill_value dtype inference from + # dtype inference + if data is None: + # TODO: What should the empty dtype be? Object or float? + + # error: Argument "dtype" to "array" has incompatible type + # "Union[ExtensionDtype, dtype[Any], None]"; expected "Union[dtype[Any], + # None, type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, + # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" + data = np.array([], dtype=dtype) # type: ignore[arg-type] + + if not is_array_like(data): + try: + # probably shared code in sanitize_series + + data = sanitize_array(data, index=None) + except ValueError: + # NumPy may raise a ValueError on data like [1, []] + # we retry with object dtype here. + if dtype is None: + dtype = object + data = np.atleast_1d(np.asarray(data, dtype=dtype)) + else: + raise + + if copy: + # TODO: avoid double copy when dtype forces cast. + data = data.copy() + + if fill_value is None: + fill_value_dtype = data.dtype if dtype is None else dtype + if fill_value_dtype is None: + fill_value = np.nan + else: + fill_value = na_value_for_dtype(fill_value_dtype) + + if isinstance(data, type(self)) and sparse_index is None: + sparse_index = data._sparse_index + # error: Argument "dtype" to "asarray" has incompatible type + # "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected + # "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int], + # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, + # Any]]]" + sparse_values = np.asarray( + data.sp_values, dtype=dtype # type: ignore[arg-type] + ) + elif sparse_index is None: + data = extract_array(data, extract_numpy=True) + if not isinstance(data, np.ndarray): + # EA + if is_datetime64tz_dtype(data.dtype): + warnings.warn( + f"Creating SparseArray from {data.dtype} data " + "loses timezone information. Cast to object before " + "sparse to retain timezone information.", + UserWarning, + stacklevel=find_stack_level(), + ) + data = np.asarray(data, dtype="datetime64[ns]") + if fill_value is NaT: + fill_value = np.datetime64("NaT", "ns") + data = np.asarray(data) + sparse_values, sparse_index, fill_value = make_sparse( + # error: Argument "dtype" to "make_sparse" has incompatible type + # "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected + # "Union[str, dtype[Any], None]" + data, + kind=kind, + fill_value=fill_value, + dtype=dtype, # type: ignore[arg-type] + ) + else: + # error: Argument "dtype" to "asarray" has incompatible type + # "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected + # "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int], + # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, + # Any]]]" + sparse_values = np.asarray(data, dtype=dtype) # type: ignore[arg-type] + if len(sparse_values) != sparse_index.npoints: + raise AssertionError( + f"Non array-like type {type(sparse_values)} must " + "have the same length as the index" + ) + self._sparse_index = sparse_index + self._sparse_values = sparse_values + self._dtype = SparseDtype(sparse_values.dtype, fill_value) + + @classmethod + def _simple_new( + cls: type[SparseArrayT], + sparse_array: np.ndarray, + sparse_index: SparseIndex, + dtype: SparseDtype, + ) -> SparseArrayT: + new = object.__new__(cls) + new._sparse_index = sparse_index + new._sparse_values = sparse_array + new._dtype = dtype + return new + + @classmethod + def from_spmatrix(cls: type[SparseArrayT], data: spmatrix) -> SparseArrayT: + """ + Create a SparseArray from a scipy.sparse matrix. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + data : scipy.sparse.sp_matrix + This should be a SciPy sparse matrix where the size + of the second dimension is 1. In other words, a + sparse matrix with a single column. + + Returns + ------- + SparseArray + + Examples + -------- + >>> import scipy.sparse + >>> mat = scipy.sparse.coo_matrix((4, 1)) + >>> pd.arrays.SparseArray.from_spmatrix(mat) + [0.0, 0.0, 0.0, 0.0] + Fill: 0.0 + IntIndex + Indices: array([], dtype=int32) + """ + length, ncol = data.shape + + if ncol != 1: + raise ValueError(f"'data' must have a single column, not '{ncol}'") + + # our sparse index classes require that the positions be strictly + # increasing. So we need to sort loc, and arr accordingly. + data = data.tocsc() + data.sort_indices() + arr = data.data + idx = data.indices + + zero = np.array(0, dtype=arr.dtype).item() + dtype = SparseDtype(arr.dtype, zero) + index = IntIndex(length, idx) + + return cls._simple_new(arr, index, dtype) + + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + fill_value = self.fill_value + + if self.sp_index.ngaps == 0: + # Compat for na dtype and int values. + return self.sp_values + if dtype is None: + # Can NumPy represent this type? + # If not, `np.result_type` will raise. We catch that + # and return object. + if is_datetime64_any_dtype(self.sp_values.dtype): + # However, we *do* special-case the common case of + # a datetime64 with pandas NaT. + if fill_value is NaT: + # Can't put pd.NaT in a datetime64[ns] + fill_value = np.datetime64("NaT") + try: + dtype = np.result_type(self.sp_values.dtype, type(fill_value)) + except TypeError: + dtype = object + + out = np.full(self.shape, fill_value, dtype=dtype) + out[self.sp_index.indices] = self.sp_values + return out + + def __setitem__(self, key, value): + # I suppose we could allow setting of non-fill_value elements. + # TODO(SparseArray.__setitem__): remove special cases in + # ExtensionBlock.where + msg = "SparseArray does not support item assignment via setitem" + raise TypeError(msg) + + @classmethod + def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False): + return cls(scalars, dtype=dtype) + + @classmethod + def _from_factorized(cls, values, original): + return cls(values, dtype=original.dtype) + + # ------------------------------------------------------------------------ + # Data + # ------------------------------------------------------------------------ + @property + def sp_index(self) -> SparseIndex: + """ + The SparseIndex containing the location of non- ``fill_value`` points. + """ + return self._sparse_index + + @property + def sp_values(self) -> np.ndarray: + """ + An ndarray containing the non- ``fill_value`` values. + + Examples + -------- + >>> s = SparseArray([0, 0, 1, 0, 2], fill_value=0) + >>> s.sp_values + array([1, 2]) + """ + return self._sparse_values + + @property + def dtype(self) -> SparseDtype: + return self._dtype + + @property + def fill_value(self): + """ + Elements in `data` that are `fill_value` are not stored. + + For memory savings, this should be the most common value in the array. + """ + return self.dtype.fill_value + + @fill_value.setter + def fill_value(self, value): + self._dtype = SparseDtype(self.dtype.subtype, value) + + @property + def kind(self) -> SparseIndexKind: + """ + The kind of sparse index for this array. One of {'integer', 'block'}. + """ + if isinstance(self.sp_index, IntIndex): + return "integer" + else: + return "block" + + @property + def _valid_sp_values(self) -> np.ndarray: + sp_vals = self.sp_values + mask = notna(sp_vals) + return sp_vals[mask] + + def __len__(self) -> int: + return self.sp_index.length + + @property + def _null_fill_value(self) -> bool: + return self._dtype._is_na_fill_value + + def _fill_value_matches(self, fill_value) -> bool: + if self._null_fill_value: + return isna(fill_value) + else: + return self.fill_value == fill_value + + @property + def nbytes(self) -> int: + return self.sp_values.nbytes + self.sp_index.nbytes + + @property + def density(self) -> float: + """ + The percent of non- ``fill_value`` points, as decimal. + + Examples + -------- + >>> s = SparseArray([0, 0, 1, 1, 1], fill_value=0) + >>> s.density + 0.6 + """ + return self.sp_index.npoints / self.sp_index.length + + @property + def npoints(self) -> int: + """ + The number of non- ``fill_value`` points. + + Examples + -------- + >>> s = SparseArray([0, 0, 1, 1, 1], fill_value=0) + >>> s.npoints + 3 + """ + return self.sp_index.npoints + + def isna(self): + # If null fill value, we want SparseDtype[bool, true] + # to preserve the same memory usage. + dtype = SparseDtype(bool, self._null_fill_value) + if self._null_fill_value: + return type(self)._simple_new(isna(self.sp_values), self.sp_index, dtype) + mask = np.full(len(self), False, dtype=np.bool8) + mask[self.sp_index.indices] = isna(self.sp_values) + return type(self)(mask, fill_value=False, dtype=dtype) + + def fillna( + self: SparseArrayT, + value=None, + method: FillnaOptions | None = None, + limit: int | None = None, + ) -> SparseArrayT: + """ + Fill missing values with `value`. + + Parameters + ---------- + value : scalar, optional + method : str, optional + + .. warning:: + + Using 'method' will result in high memory use, + as all `fill_value` methods will be converted to + an in-memory ndarray + + limit : int, optional + + Returns + ------- + SparseArray + + Notes + ----- + When `value` is specified, the result's ``fill_value`` depends on + ``self.fill_value``. The goal is to maintain low-memory use. + + If ``self.fill_value`` is NA, the result dtype will be + ``SparseDtype(self.dtype, fill_value=value)``. This will preserve + amount of memory used before and after filling. + + When ``self.fill_value`` is not NA, the result dtype will be + ``self.dtype``. Again, this preserves the amount of memory used. + """ + if (method is None and value is None) or ( + method is not None and value is not None + ): + raise ValueError("Must specify one of 'method' or 'value'.") + + elif method is not None: + msg = "fillna with 'method' requires high memory usage." + warnings.warn(msg, PerformanceWarning) + # Need type annotation for "new_values" [var-annotated] + new_values = np.asarray(self) # type: ignore[var-annotated] + # interpolate_2d modifies new_values inplace + interpolate_2d(new_values, method=method, limit=limit) + return type(self)(new_values, fill_value=self.fill_value) + + else: + new_values = np.where(isna(self.sp_values), value, self.sp_values) + + if self._null_fill_value: + # This is essentially just updating the dtype. + new_dtype = SparseDtype(self.dtype.subtype, fill_value=value) + else: + new_dtype = self.dtype + + return self._simple_new(new_values, self._sparse_index, new_dtype) + + def shift(self: SparseArrayT, periods: int = 1, fill_value=None) -> SparseArrayT: + + if not len(self) or periods == 0: + return self.copy() + + if isna(fill_value): + fill_value = self.dtype.na_value + + subtype = np.result_type(fill_value, self.dtype.subtype) + + if subtype != self.dtype.subtype: + # just coerce up front + arr = self.astype(SparseDtype(subtype, self.fill_value)) + else: + arr = self + + empty = self._from_sequence( + [fill_value] * min(abs(periods), len(self)), dtype=arr.dtype + ) + + if periods > 0: + a = empty + b = arr[:-periods] + else: + a = arr[abs(periods) :] + b = empty + return arr._concat_same_type([a, b]) + + def _first_fill_value_loc(self): + """ + Get the location of the first missing value. + + Returns + ------- + int + """ + if len(self) == 0 or self.sp_index.npoints == len(self): + return -1 + + indices = self.sp_index.indices + if not len(indices) or indices[0] > 0: + return 0 + + diff = indices[1:] - indices[:-1] + return np.searchsorted(diff, 2) + 1 + + def unique(self: SparseArrayT) -> SparseArrayT: + uniques = list(algos.unique(self.sp_values)) + fill_loc = self._first_fill_value_loc() + if fill_loc >= 0: + uniques.insert(fill_loc, self.fill_value) + return type(self)._from_sequence(uniques, dtype=self.dtype) + + def _values_for_factorize(self): + # Still override this for hash_pandas_object + return np.asarray(self), self.fill_value + + def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, SparseArray]: + # Currently, ExtensionArray.factorize -> Tuple[ndarray, EA] + # The sparsity on this is backwards from what Sparse would want. Want + # ExtensionArray.factorize -> Tuple[EA, EA] + # Given that we have to return a dense array of codes, why bother + # implementing an efficient factorize? + codes, uniques = algos.factorize(np.asarray(self), na_sentinel=na_sentinel) + uniques_sp = SparseArray(uniques, dtype=self.dtype) + return codes, uniques_sp + + def value_counts(self, dropna: bool = True) -> Series: + """ + Returns a Series containing counts of unique values. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of NaN, even if NaN is in sp_values. + + Returns + ------- + counts : Series + """ + from pandas import ( + Index, + Series, + ) + + keys, counts = algos.value_counts_arraylike(self.sp_values, dropna=dropna) + fcounts = self.sp_index.ngaps + if fcounts > 0 and (not self._null_fill_value or not dropna): + mask = isna(keys) if self._null_fill_value else keys == self.fill_value + if mask.any(): + counts[mask] += fcounts + else: + keys = np.insert(keys, 0, self.fill_value) + counts = np.insert(counts, 0, fcounts) + + if not isinstance(keys, ABCIndex): + keys = Index(keys) + return Series(counts, index=keys) + + def _quantile(self, qs: npt.NDArray[np.float64], interpolation: str): + # Special case: the returned array isn't _really_ sparse, so we don't + # wrap it in a SparseArray + result = super()._quantile(qs, interpolation) + return np.asarray(result) + + # -------- + # Indexing + # -------- + @overload + def __getitem__(self, key: ScalarIndexer) -> Any: + ... + + @overload + def __getitem__( + self: SparseArrayT, + key: SequenceIndexer | tuple[int | ellipsis, ...], + ) -> SparseArrayT: + ... + + def __getitem__( + self: SparseArrayT, + key: PositionalIndexer | tuple[int | ellipsis, ...], + ) -> SparseArrayT | Any: + + if isinstance(key, tuple): + key = unpack_tuple_and_ellipses(key) + # Non-overlapping identity check (left operand type: + # "Union[Union[Union[int, integer[Any]], Union[slice, List[int], + # ndarray[Any, Any]]], Tuple[Union[int, ellipsis], ...]]", + # right operand type: "ellipsis") + if key is Ellipsis: # type: ignore[comparison-overlap] + raise ValueError("Cannot slice with Ellipsis") + + if is_integer(key): + return self._get_val_at(key) + elif isinstance(key, tuple): + # Invalid index type "Tuple[Union[int, ellipsis], ...]" for + # "ndarray[Any, Any]"; expected type "Union[SupportsIndex, + # _SupportsArray[dtype[Union[bool_, integer[Any]]]], _NestedSequence[_Su + # pportsArray[dtype[Union[bool_, integer[Any]]]]], + # _NestedSequence[Union[bool, int]], Tuple[Union[SupportsIndex, + # _SupportsArray[dtype[Union[bool_, integer[Any]]]], + # _NestedSequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]], _N + # estedSequence[Union[bool, int]]], ...]]" [index] + data_slice = self.to_dense()[key] # type: ignore[index] + elif isinstance(key, slice): + + # Avoid densifying when handling contiguous slices + if key.step is None or key.step == 1: + start = 0 if key.start is None else key.start + if start < 0: + start += len(self) + + end = len(self) if key.stop is None else key.stop + if end < 0: + end += len(self) + + indices = self.sp_index.indices + keep_inds = np.flatnonzero((indices >= start) & (indices < end)) + sp_vals = self.sp_values[keep_inds] + + sp_index = indices[keep_inds].copy() + + # If we've sliced to not include the start of the array, all our indices + # should be shifted. NB: here we are careful to also not shift by a + # negative value for a case like [0, 1][-100:] where the start index + # should be treated like 0 + if start > 0: + sp_index -= start + + # Length of our result should match applying this slice to a range + # of the length of our original array + new_len = len(range(len(self))[key]) + new_sp_index = make_sparse_index(new_len, sp_index, self.kind) + return type(self)._simple_new(sp_vals, new_sp_index, self.dtype) + else: + indices = np.arange(len(self), dtype=np.int32)[key] + return self.take(indices) + + elif not is_list_like(key): + # e.g. "foo" or 2.5 + # exception message copied from numpy + raise IndexError( + r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis " + r"(`None`) and integer or boolean arrays are valid indices" + ) + + else: + if isinstance(key, SparseArray): + # NOTE: If we guarantee that SparseDType(bool) + # has only fill_value - true, false or nan + # (see GH PR 44955) + # we can apply mask very fast: + if is_bool_dtype(key): + if isna(key.fill_value): + return self.take(key.sp_index.indices[key.sp_values]) + if not key.fill_value: + return self.take(key.sp_index.indices) + n = len(self) + mask = np.full(n, True, dtype=np.bool8) + mask[key.sp_index.indices] = False + return self.take(np.arange(n)[mask]) + else: + key = np.asarray(key) + + key = check_array_indexer(self, key) + + if com.is_bool_indexer(key): + # mypy doesn't know we have an array here + key = cast(np.ndarray, key) + return self.take(np.arange(len(key), dtype=np.int32)[key]) + elif hasattr(key, "__len__"): + return self.take(key) + else: + raise ValueError(f"Cannot slice with '{key}'") + + return type(self)(data_slice, kind=self.kind) + + def _get_val_at(self, loc): + loc = validate_insert_loc(loc, len(self)) + + sp_loc = self.sp_index.lookup(loc) + if sp_loc == -1: + return self.fill_value + else: + val = self.sp_values[sp_loc] + val = maybe_box_datetimelike(val, self.sp_values.dtype) + return val + + def take( + self: SparseArrayT, indices, *, allow_fill: bool = False, fill_value=None + ) -> SparseArrayT: + if is_scalar(indices): + raise ValueError(f"'indices' must be an array, not a scalar '{indices}'.") + indices = np.asarray(indices, dtype=np.int32) + + dtype = None + if indices.size == 0: + result = np.array([], dtype="object") + dtype = self.dtype + elif allow_fill: + result = self._take_with_fill(indices, fill_value=fill_value) + else: + return self._take_without_fill(indices) + + return type(self)( + result, fill_value=self.fill_value, kind=self.kind, dtype=dtype + ) + + def _take_with_fill(self, indices, fill_value=None) -> np.ndarray: + if fill_value is None: + fill_value = self.dtype.na_value + + if indices.min() < -1: + raise ValueError( + "Invalid value in 'indices'. Must be between -1 " + "and the length of the array." + ) + + if indices.max() >= len(self): + raise IndexError("out of bounds value in 'indices'.") + + if len(self) == 0: + # Empty... Allow taking only if all empty + if (indices == -1).all(): + dtype = np.result_type(self.sp_values, type(fill_value)) + taken = np.empty_like(indices, dtype=dtype) + taken.fill(fill_value) + return taken + else: + raise IndexError("cannot do a non-empty take from an empty axes.") + + # sp_indexer may be -1 for two reasons + # 1.) we took for an index of -1 (new) + # 2.) we took a value that was self.fill_value (old) + sp_indexer = self.sp_index.lookup_array(indices) + new_fill_indices = indices == -1 + old_fill_indices = (sp_indexer == -1) & ~new_fill_indices + + if self.sp_index.npoints == 0 and old_fill_indices.all(): + # We've looked up all valid points on an all-sparse array. + taken = np.full( + sp_indexer.shape, fill_value=self.fill_value, dtype=self.dtype.subtype + ) + + elif self.sp_index.npoints == 0: + # Avoid taking from the empty self.sp_values + _dtype = np.result_type(self.dtype.subtype, type(fill_value)) + taken = np.full(sp_indexer.shape, fill_value=fill_value, dtype=_dtype) + else: + taken = self.sp_values.take(sp_indexer) + + # Fill in two steps. + # Old fill values + # New fill values + # potentially coercing to a new dtype at each stage. + + m0 = sp_indexer[old_fill_indices] < 0 + m1 = sp_indexer[new_fill_indices] < 0 + + result_type = taken.dtype + + if m0.any(): + result_type = np.result_type(result_type, type(self.fill_value)) + taken = taken.astype(result_type) + taken[old_fill_indices] = self.fill_value + + if m1.any(): + result_type = np.result_type(result_type, type(fill_value)) + taken = taken.astype(result_type) + taken[new_fill_indices] = fill_value + + return taken + + def _take_without_fill(self: SparseArrayT, indices) -> SparseArrayT: + to_shift = indices < 0 + + n = len(self) + + if (indices.max() >= n) or (indices.min() < -n): + if n == 0: + raise IndexError("cannot do a non-empty take from an empty axes.") + else: + raise IndexError("out of bounds value in 'indices'.") + + if to_shift.any(): + indices = indices.copy() + indices[to_shift] += n + + sp_indexer = self.sp_index.lookup_array(indices) + value_mask = sp_indexer != -1 + new_sp_values = self.sp_values[sp_indexer[value_mask]] + + value_indices = np.flatnonzero(value_mask).astype(np.int32, copy=False) + + new_sp_index = make_sparse_index(len(indices), value_indices, kind=self.kind) + return type(self)._simple_new(new_sp_values, new_sp_index, dtype=self.dtype) + + def searchsorted( + self, + v: ArrayLike | object, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> npt.NDArray[np.intp] | np.intp: + + msg = "searchsorted requires high memory usage." + warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level()) + if not is_scalar(v): + v = np.asarray(v) + v = np.asarray(v) + return np.asarray(self, dtype=self.dtype.subtype).searchsorted(v, side, sorter) + + def copy(self: SparseArrayT) -> SparseArrayT: + values = self.sp_values.copy() + return self._simple_new(values, self.sp_index, self.dtype) + + @classmethod + def _concat_same_type( + cls: type[SparseArrayT], to_concat: Sequence[SparseArrayT] + ) -> SparseArrayT: + fill_value = to_concat[0].fill_value + + values = [] + length = 0 + + if to_concat: + sp_kind = to_concat[0].kind + else: + sp_kind = "integer" + + sp_index: SparseIndex + if sp_kind == "integer": + indices = [] + + for arr in to_concat: + int_idx = arr.sp_index.indices.copy() + int_idx += length # TODO: wraparound + length += arr.sp_index.length + + values.append(arr.sp_values) + indices.append(int_idx) + + data = np.concatenate(values) + indices_arr = np.concatenate(indices) + # Argument 2 to "IntIndex" has incompatible type "ndarray[Any, + # dtype[signedinteger[_32Bit]]]"; expected "Sequence[int]" + sp_index = IntIndex(length, indices_arr) # type: ignore[arg-type] + + else: + # when concatenating block indices, we don't claim that you'll + # get an identical index as concatenating the values and then + # creating a new index. We don't want to spend the time trying + # to merge blocks across arrays in `to_concat`, so the resulting + # BlockIndex may have more blocks. + blengths = [] + blocs = [] + + for arr in to_concat: + block_idx = arr.sp_index.to_block_index() + + values.append(arr.sp_values) + blocs.append(block_idx.blocs.copy() + length) + blengths.append(block_idx.blengths) + length += arr.sp_index.length + + data = np.concatenate(values) + blocs_arr = np.concatenate(blocs) + blengths_arr = np.concatenate(blengths) + + sp_index = BlockIndex(length, blocs_arr, blengths_arr) + + return cls(data, sparse_index=sp_index, fill_value=fill_value) + + def astype(self, dtype: AstypeArg | None = None, copy: bool = True): + """ + Change the dtype of a SparseArray. + + The output will always be a SparseArray. To convert to a dense + ndarray with a certain dtype, use :meth:`numpy.asarray`. + + Parameters + ---------- + dtype : np.dtype or ExtensionDtype + For SparseDtype, this changes the dtype of + ``self.sp_values`` and the ``self.fill_value``. + + For other dtypes, this only changes the dtype of + ``self.sp_values``. + + copy : bool, default True + Whether to ensure a copy is made, even if not necessary. + + Returns + ------- + SparseArray + + Examples + -------- + >>> arr = pd.arrays.SparseArray([0, 0, 1, 2]) + >>> arr + [0, 0, 1, 2] + Fill: 0 + IntIndex + Indices: array([2, 3], dtype=int32) + + >>> arr.astype(np.dtype('int32')) + [0, 0, 1, 2] + Fill: 0 + IntIndex + Indices: array([2, 3], dtype=int32) + + Using a NumPy dtype with a different kind (e.g. float) will coerce + just ``self.sp_values``. + + >>> arr.astype(np.dtype('float64')) + ... # doctest: +NORMALIZE_WHITESPACE + [0.0, 0.0, 1.0, 2.0] + Fill: 0.0 + IntIndex + Indices: array([2, 3], dtype=int32) + + Use a SparseDtype if you wish to be change the fill value as well. + + >>> arr.astype(SparseDtype("float64", fill_value=np.nan)) + ... # doctest: +NORMALIZE_WHITESPACE + [nan, nan, 1.0, 2.0] + Fill: nan + IntIndex + Indices: array([2, 3], dtype=int32) + """ + if is_dtype_equal(dtype, self._dtype): + if not copy: + return self + else: + return self.copy() + dtype = self.dtype.update_dtype(dtype) + subtype = pandas_dtype(dtype._subtype_with_str) + sp_values = astype_nansafe(self.sp_values, subtype, copy=copy) + + # error: Argument 1 to "_simple_new" of "SparseArray" has incompatible type + # "ExtensionArray"; expected "ndarray" + return self._simple_new( + sp_values, self.sp_index, dtype # type: ignore[arg-type] + ) + + def map(self: SparseArrayT, mapper) -> SparseArrayT: + """ + Map categories using an input mapping or function. + + Parameters + ---------- + mapper : dict, Series, callable + The correspondence from old values to new. + + Returns + ------- + SparseArray + The output array will have the same density as the input. + The output fill value will be the result of applying the + mapping to ``self.fill_value`` + + Examples + -------- + >>> arr = pd.arrays.SparseArray([0, 1, 2]) + >>> arr.map(lambda x: x + 10) + [10, 11, 12] + Fill: 10 + IntIndex + Indices: array([1, 2], dtype=int32) + + >>> arr.map({0: 10, 1: 11, 2: 12}) + [10, 11, 12] + Fill: 10 + IntIndex + Indices: array([1, 2], dtype=int32) + + >>> arr.map(pd.Series([10, 11, 12], index=[0, 1, 2])) + [10, 11, 12] + Fill: 10 + IntIndex + Indices: array([1, 2], dtype=int32) + """ + # this is used in apply. + # We get hit since we're an "is_extension_type" but regular extension + # types are not hit. This may be worth adding to the interface. + if isinstance(mapper, ABCSeries): + mapper = mapper.to_dict() + + if isinstance(mapper, abc.Mapping): + fill_value = mapper.get(self.fill_value, self.fill_value) + sp_values = [mapper.get(x, None) for x in self.sp_values] + else: + fill_value = mapper(self.fill_value) + sp_values = [mapper(x) for x in self.sp_values] + + return type(self)(sp_values, sparse_index=self.sp_index, fill_value=fill_value) + + def to_dense(self) -> np.ndarray: + """ + Convert SparseArray to a NumPy array. + + Returns + ------- + arr : NumPy array + """ + return np.asarray(self, dtype=self.sp_values.dtype) + + _internal_get_values = to_dense + + def _where(self, mask, value): + # NB: may not preserve dtype, e.g. result may be Sparse[float64] + # while self is Sparse[int64] + naive_implementation = np.where(mask, self, value) + result = type(self)._from_sequence(naive_implementation) + return result + + # ------------------------------------------------------------------------ + # IO + # ------------------------------------------------------------------------ + def __setstate__(self, state): + """Necessary for making this object picklable""" + if isinstance(state, tuple): + # Compat for pandas < 0.24.0 + nd_state, (fill_value, sp_index) = state + # Need type annotation for "sparse_values" [var-annotated] + sparse_values = np.array([]) # type: ignore[var-annotated] + sparse_values.__setstate__(nd_state) + + self._sparse_values = sparse_values + self._sparse_index = sp_index + self._dtype = SparseDtype(sparse_values.dtype, fill_value) + else: + self.__dict__.update(state) + + def nonzero(self): + if self.fill_value == 0: + return (self.sp_index.indices,) + else: + return (self.sp_index.indices[self.sp_values != 0],) + + # ------------------------------------------------------------------------ + # Reductions + # ------------------------------------------------------------------------ + + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + method = getattr(self, name, None) + + if method is None: + raise TypeError(f"cannot perform {name} with type {self.dtype}") + + if skipna: + arr = self + else: + arr = self.dropna() + + return getattr(arr, name)(**kwargs) + + def all(self, axis=None, *args, **kwargs): + """ + Tests whether all elements evaluate True + + Returns + ------- + all : bool + + See Also + -------- + numpy.all + """ + nv.validate_all(args, kwargs) + + values = self.sp_values + + if len(values) != len(self) and not np.all(self.fill_value): + return False + + return values.all() + + def any(self, axis=0, *args, **kwargs): + """ + Tests whether at least one of elements evaluate True + + Returns + ------- + any : bool + + See Also + -------- + numpy.any + """ + nv.validate_any(args, kwargs) + + values = self.sp_values + + if len(values) != len(self) and np.any(self.fill_value): + return True + + return values.any().item() + + def sum( + self, axis: int = 0, min_count: int = 0, skipna: bool = True, *args, **kwargs + ) -> Scalar: + """ + Sum of non-NA/null values + + Parameters + ---------- + axis : int, default 0 + Not Used. NumPy compatibility. + min_count : int, default 0 + The required number of valid values to perform the summation. If fewer + than ``min_count`` valid values are present, the result will be the missing + value indicator for subarray type. + *args, **kwargs + Not Used. NumPy compatibility. + + Returns + ------- + scalar + """ + nv.validate_sum(args, kwargs) + valid_vals = self._valid_sp_values + sp_sum = valid_vals.sum() + has_na = self.sp_index.ngaps > 0 and not self._null_fill_value + + if has_na and not skipna: + return na_value_for_dtype(self.dtype.subtype, compat=False) + + if self._null_fill_value: + if check_below_min_count(valid_vals.shape, None, min_count): + return na_value_for_dtype(self.dtype.subtype, compat=False) + return sp_sum + else: + nsparse = self.sp_index.ngaps + if check_below_min_count(valid_vals.shape, None, min_count - nsparse): + return na_value_for_dtype(self.dtype.subtype, compat=False) + return sp_sum + self.fill_value * nsparse + + def cumsum(self, axis: int = 0, *args, **kwargs) -> SparseArray: + """ + Cumulative sum of non-NA/null values. + + When performing the cumulative summation, any non-NA/null values will + be skipped. The resulting SparseArray will preserve the locations of + NaN values, but the fill value will be `np.nan` regardless. + + Parameters + ---------- + axis : int or None + Axis over which to perform the cumulative summation. If None, + perform cumulative summation over flattened array. + + Returns + ------- + cumsum : SparseArray + """ + nv.validate_cumsum(args, kwargs) + + if axis is not None and axis >= self.ndim: # Mimic ndarray behaviour. + raise ValueError(f"axis(={axis}) out of bounds") + + if not self._null_fill_value: + return SparseArray(self.to_dense()).cumsum() + + return SparseArray( + self.sp_values.cumsum(), + sparse_index=self.sp_index, + fill_value=self.fill_value, + ) + + def mean(self, axis=0, *args, **kwargs): + """ + Mean of non-NA/null values + + Returns + ------- + mean : float + """ + nv.validate_mean(args, kwargs) + valid_vals = self._valid_sp_values + sp_sum = valid_vals.sum() + ct = len(valid_vals) + + if self._null_fill_value: + return sp_sum / ct + else: + nsparse = self.sp_index.ngaps + return (sp_sum + self.fill_value * nsparse) / (ct + nsparse) + + def max(self, *, axis: int | None = None, skipna: bool = True): + """ + Max of array values, ignoring NA values if specified. + + Parameters + ---------- + axis : int, default 0 + Not Used. NumPy compatibility. + skipna : bool, default True + Whether to ignore NA values. + + Returns + ------- + scalar + """ + nv.validate_minmax_axis(axis, self.ndim) + return self._min_max("max", skipna=skipna) + + def min(self, *, axis: int | None = None, skipna: bool = True): + """ + Min of array values, ignoring NA values if specified. + + Parameters + ---------- + axis : int, default 0 + Not Used. NumPy compatibility. + skipna : bool, default True + Whether to ignore NA values. + + Returns + ------- + scalar + """ + nv.validate_minmax_axis(axis, self.ndim) + return self._min_max("min", skipna=skipna) + + def _min_max(self, kind: Literal["min", "max"], skipna: bool) -> Scalar: + """ + Min/max of non-NA/null values + + Parameters + ---------- + kind : {"min", "max"} + skipna : bool + + Returns + ------- + scalar + """ + valid_vals = self._valid_sp_values + has_nonnull_fill_vals = not self._null_fill_value and self.sp_index.ngaps > 0 + + if len(valid_vals) > 0: + sp_min_max = getattr(valid_vals, kind)() + + # If a non-null fill value is currently present, it might be the min/max + if has_nonnull_fill_vals: + func = max if kind == "max" else min + return func(sp_min_max, self.fill_value) + elif skipna: + return sp_min_max + elif self.sp_index.ngaps == 0: + # No NAs present + return sp_min_max + else: + return na_value_for_dtype(self.dtype.subtype, compat=False) + elif has_nonnull_fill_vals: + return self.fill_value + else: + return na_value_for_dtype(self.dtype.subtype, compat=False) + + # ------------------------------------------------------------------------ + # Ufuncs + # ------------------------------------------------------------------------ + + _HANDLED_TYPES = (np.ndarray, numbers.Number) + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + out = kwargs.get("out", ()) + + for x in inputs + out: + if not isinstance(x, self._HANDLED_TYPES + (SparseArray,)): + return NotImplemented + + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + if "out" in kwargs: + # e.g. tests.arrays.sparse.test_arithmetics.test_ndarray_inplace + res = arraylike.dispatch_ufunc_with_out( + self, ufunc, method, *inputs, **kwargs + ) + return res + + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + # e.g. tests.series.test_ufunc.TestNumpyReductions + return result + + if len(inputs) == 1: + # No alignment necessary. + sp_values = getattr(ufunc, method)(self.sp_values, **kwargs) + fill_value = getattr(ufunc, method)(self.fill_value, **kwargs) + + if ufunc.nout > 1: + # multiple outputs. e.g. modf + arrays = tuple( + self._simple_new( + sp_value, self.sp_index, SparseDtype(sp_value.dtype, fv) + ) + for sp_value, fv in zip(sp_values, fill_value) + ) + return arrays + elif method == "reduce": + # e.g. reductions + return sp_values + + return self._simple_new( + sp_values, self.sp_index, SparseDtype(sp_values.dtype, fill_value) + ) + + new_inputs = tuple(np.asarray(x) for x in inputs) + result = getattr(ufunc, method)(*new_inputs, **kwargs) + if out: + if len(out) == 1: + out = out[0] + return out + + if ufunc.nout > 1: + return tuple(type(self)(x) for x in result) + elif method == "at": + # no return value + return None + else: + return type(self)(result) + + # ------------------------------------------------------------------------ + # Ops + # ------------------------------------------------------------------------ + + def _arith_method(self, other, op): + op_name = op.__name__ + + if isinstance(other, SparseArray): + return _sparse_array_op(self, other, op, op_name) + + elif is_scalar(other): + with np.errstate(all="ignore"): + fill = op(_get_fill(self), np.asarray(other)) + result = op(self.sp_values, other) + + if op_name == "divmod": + left, right = result + lfill, rfill = fill + return ( + _wrap_result(op_name, left, self.sp_index, lfill), + _wrap_result(op_name, right, self.sp_index, rfill), + ) + + return _wrap_result(op_name, result, self.sp_index, fill) + + else: + other = np.asarray(other) + with np.errstate(all="ignore"): + if len(self) != len(other): + raise AssertionError( + f"length mismatch: {len(self)} vs. {len(other)}" + ) + if not isinstance(other, SparseArray): + dtype = getattr(other, "dtype", None) + other = SparseArray(other, fill_value=self.fill_value, dtype=dtype) + return _sparse_array_op(self, other, op, op_name) + + def _cmp_method(self, other, op) -> SparseArray: + if not is_scalar(other) and not isinstance(other, type(self)): + # convert list-like to ndarray + other = np.asarray(other) + + if isinstance(other, np.ndarray): + # TODO: make this more flexible than just ndarray... + other = SparseArray(other, fill_value=self.fill_value) + + if isinstance(other, SparseArray): + if len(self) != len(other): + raise ValueError( + f"operands have mismatched length {len(self)} and {len(other)}" + ) + + op_name = op.__name__.strip("_") + return _sparse_array_op(self, other, op, op_name) + else: + # scalar + with np.errstate(all="ignore"): + fill_value = op(self.fill_value, other) + result = np.full(len(self), fill_value, dtype=np.bool_) + result[self.sp_index.indices] = op(self.sp_values, other) + + return type(self)( + result, + fill_value=fill_value, + dtype=np.bool_, + ) + + _logical_method = _cmp_method + + def _unary_method(self, op) -> SparseArray: + fill_value = op(np.array(self.fill_value)).item() + dtype = SparseDtype(self.dtype.subtype, fill_value) + # NOTE: if fill_value doesn't change + # we just have to apply op to sp_values + if isna(self.fill_value) or fill_value == self.fill_value: + values = op(self.sp_values) + return type(self)._simple_new(values, self.sp_index, self.dtype) + # In the other case we have to recalc indexes + return type(self)(op(self.to_dense()), dtype=dtype) + + def __pos__(self) -> SparseArray: + return self._unary_method(operator.pos) + + def __neg__(self) -> SparseArray: + return self._unary_method(operator.neg) + + def __invert__(self) -> SparseArray: + return self._unary_method(operator.invert) + + def __abs__(self) -> SparseArray: + return self._unary_method(operator.abs) + + # ---------- + # Formatting + # ----------- + def __repr__(self) -> str: + pp_str = printing.pprint_thing(self) + pp_fill = printing.pprint_thing(self.fill_value) + pp_index = printing.pprint_thing(self.sp_index) + return f"{pp_str}\nFill: {pp_fill}\n{pp_index}" + + def _formatter(self, boxed=False): + # Defer to the formatter from the GenericArrayFormatter calling us. + # This will infer the correct formatter from the dtype of the values. + return None + + +def make_sparse( + arr: np.ndarray, + kind: SparseIndexKind = "block", + fill_value=None, + dtype: NpDtype | None = None, +): + """ + Convert ndarray to sparse format + + Parameters + ---------- + arr : ndarray + kind : {'block', 'integer'} + fill_value : NaN or another value + dtype : np.dtype, optional + copy : bool, default False + + Returns + ------- + (sparse_values, index, fill_value) : (ndarray, SparseIndex, Scalar) + """ + assert isinstance(arr, np.ndarray) + + if arr.ndim > 1: + raise TypeError("expected dimension <= 1 data") + + if fill_value is None: + fill_value = na_value_for_dtype(arr.dtype) + + if isna(fill_value): + mask = notna(arr) + else: + # cast to object comparison to be safe + if is_string_dtype(arr.dtype): + arr = arr.astype(object) + + if is_object_dtype(arr.dtype): + # element-wise equality check method in numpy doesn't treat + # each element type, eg. 0, 0.0, and False are treated as + # same. So we have to check the both of its type and value. + mask = splib.make_mask_object_ndarray(arr, fill_value) + else: + mask = arr != fill_value + + length = len(arr) + if length != len(mask): + # the arr is a SparseArray + indices = mask.sp_index.indices + else: + indices = mask.nonzero()[0].astype(np.int32) + + index = make_sparse_index(length, indices, kind) + sparsified_values = arr[mask] + if dtype is not None: + # error: Argument "dtype" to "astype_nansafe" has incompatible type "Union[str, + # dtype[Any]]"; expected "Union[dtype[Any], ExtensionDtype]" + sparsified_values = astype_nansafe( + sparsified_values, dtype=dtype # type: ignore[arg-type] + ) + # TODO: copy + return sparsified_values, index, fill_value + + +@overload +def make_sparse_index(length: int, indices, kind: Literal["block"]) -> BlockIndex: + ... + + +@overload +def make_sparse_index(length: int, indices, kind: Literal["integer"]) -> IntIndex: + ... + + +def make_sparse_index(length: int, indices, kind: SparseIndexKind) -> SparseIndex: + index: SparseIndex + if kind == "block": + locs, lens = splib.get_blocks(indices) + index = BlockIndex(length, locs, lens) + elif kind == "integer": + index = IntIndex(length, indices) + else: # pragma: no cover + raise ValueError("must be block or integer type") + return index diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/dtype.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/dtype.py new file mode 100644 index 0000000..f1da242 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/dtype.py @@ -0,0 +1,414 @@ +"""Sparse Dtype""" +from __future__ import annotations + +import re +from typing import ( + TYPE_CHECKING, + Any, +) +import warnings + +import numpy as np + +from pandas._typing import ( + Dtype, + DtypeObj, + type_t, +) +from pandas.errors import PerformanceWarning +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.base import ( + ExtensionDtype, + register_extension_dtype, +) +from pandas.core.dtypes.cast import astype_nansafe +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_object_dtype, + is_scalar, + is_string_dtype, + pandas_dtype, +) +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, +) + +if TYPE_CHECKING: + from pandas.core.arrays.sparse.array import SparseArray + + +@register_extension_dtype +class SparseDtype(ExtensionDtype): + """ + Dtype for data stored in :class:`SparseArray`. + + This dtype implements the pandas ExtensionDtype interface. + + Parameters + ---------- + dtype : str, ExtensionDtype, numpy.dtype, type, default numpy.float64 + The dtype of the underlying array storing the non-fill value values. + fill_value : scalar, optional + The scalar value not stored in the SparseArray. By default, this + depends on `dtype`. + + =========== ========== + dtype na_value + =========== ========== + float ``np.nan`` + int ``0`` + bool ``False`` + datetime64 ``pd.NaT`` + timedelta64 ``pd.NaT`` + =========== ========== + + The default value may be overridden by specifying a `fill_value`. + + Attributes + ---------- + None + + Methods + ------- + None + """ + + # We include `_is_na_fill_value` in the metadata to avoid hash collisions + # between SparseDtype(float, 0.0) and SparseDtype(float, nan). + # Without is_na_fill_value in the comparison, those would be equal since + # hash(nan) is (sometimes?) 0. + _metadata = ("_dtype", "_fill_value", "_is_na_fill_value") + + def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None): + + if isinstance(dtype, type(self)): + if fill_value is None: + fill_value = dtype.fill_value + dtype = dtype.subtype + + dtype = pandas_dtype(dtype) + if is_string_dtype(dtype): + dtype = np.dtype("object") + + if fill_value is None: + fill_value = na_value_for_dtype(dtype) + + self._dtype = dtype + self._fill_value = fill_value + self._check_fill_value() + + def __hash__(self): + # Python3 doesn't inherit __hash__ when a base class overrides + # __eq__, so we explicitly do it here. + return super().__hash__() + + def __eq__(self, other: Any) -> bool: + # We have to override __eq__ to handle NA values in _metadata. + # The base class does simple == checks, which fail for NA. + if isinstance(other, str): + try: + other = self.construct_from_string(other) + except TypeError: + return False + + if isinstance(other, type(self)): + subtype = self.subtype == other.subtype + if self._is_na_fill_value: + # this case is complicated by two things: + # SparseDtype(float, float(nan)) == SparseDtype(float, np.nan) + # SparseDtype(float, np.nan) != SparseDtype(float, pd.NaT) + # i.e. we want to treat any floating-point NaN as equal, but + # not a floating-point NaN and a datetime NaT. + fill_value = ( + other._is_na_fill_value + and isinstance(self.fill_value, type(other.fill_value)) + or isinstance(other.fill_value, type(self.fill_value)) + ) + else: + fill_value = self.fill_value == other.fill_value + + return subtype and fill_value + return False + + @property + def fill_value(self): + """ + The fill value of the array. + + Converting the SparseArray to a dense ndarray will fill the + array with this value. + + .. warning:: + + It's possible to end up with a SparseArray that has ``fill_value`` + values in ``sp_values``. This can occur, for example, when setting + ``SparseArray.fill_value`` directly. + """ + return self._fill_value + + def _check_fill_value(self): + if not is_scalar(self._fill_value): + raise ValueError( + f"fill_value must be a scalar. Got {self._fill_value} instead" + ) + # TODO: Right now we can use Sparse boolean array + # with any fill_value. Here was an attempt + # to allow only 3 value: True, False or nan + # but plenty test has failed. + # see pull 44955 + # if self._is_boolean and not ( + # is_bool(self._fill_value) or isna(self._fill_value) + # ): + # raise ValueError( + # "fill_value must be True, False or nan " + # f"for boolean type. Got {self._fill_value} instead" + # ) + + @property + def _is_na_fill_value(self) -> bool: + return isna(self.fill_value) + + @property + def _is_numeric(self) -> bool: + return not is_object_dtype(self.subtype) + + @property + def _is_boolean(self) -> bool: + return is_bool_dtype(self.subtype) + + @property + def kind(self): + """ + The sparse kind. Either 'integer', or 'block'. + """ + return self.subtype.kind + + @property + def type(self): + return self.subtype.type + + @property + def subtype(self): + return self._dtype + + @property + def name(self): + return f"Sparse[{self.subtype.name}, {repr(self.fill_value)}]" + + def __repr__(self) -> str: + return self.name + + @classmethod + def construct_array_type(cls) -> type_t[SparseArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays.sparse.array import SparseArray + + return SparseArray + + @classmethod + def construct_from_string(cls, string: str) -> SparseDtype: + """ + Construct a SparseDtype from a string form. + + Parameters + ---------- + string : str + Can take the following forms. + + string dtype + ================ ============================ + 'int' SparseDtype[np.int64, 0] + 'Sparse' SparseDtype[np.float64, nan] + 'Sparse[int]' SparseDtype[np.int64, 0] + 'Sparse[int, 0]' SparseDtype[np.int64, 0] + ================ ============================ + + It is not possible to specify non-default fill values + with a string. An argument like ``'Sparse[int, 1]'`` + will raise a ``TypeError`` because the default fill value + for integers is 0. + + Returns + ------- + SparseDtype + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + msg = f"Cannot construct a 'SparseDtype' from '{string}'" + if string.startswith("Sparse"): + try: + sub_type, has_fill_value = cls._parse_subtype(string) + except ValueError as err: + raise TypeError(msg) from err + else: + result = SparseDtype(sub_type) + msg = ( + f"Cannot construct a 'SparseDtype' from '{string}'.\n\nIt " + "looks like the fill_value in the string is not " + "the default for the dtype. Non-default fill_values " + "are not supported. Use the 'SparseDtype()' " + "constructor instead." + ) + if has_fill_value and str(result) != string: + raise TypeError(msg) + return result + else: + raise TypeError(msg) + + @staticmethod + def _parse_subtype(dtype: str) -> tuple[str, bool]: + """ + Parse a string to get the subtype + + Parameters + ---------- + dtype : str + A string like + + * Sparse[subtype] + * Sparse[subtype, fill_value] + + Returns + ------- + subtype : str + + Raises + ------ + ValueError + When the subtype cannot be extracted. + """ + xpr = re.compile(r"Sparse\[(?P[^,]*)(, )?(?P.*?)?\]$") + m = xpr.match(dtype) + has_fill_value = False + if m: + subtype = m.groupdict()["subtype"] + has_fill_value = bool(m.groupdict()["fill_value"]) + elif dtype == "Sparse": + subtype = "float64" + else: + raise ValueError(f"Cannot parse {dtype}") + return subtype, has_fill_value + + @classmethod + def is_dtype(cls, dtype: object) -> bool: + dtype = getattr(dtype, "dtype", dtype) + if isinstance(dtype, str) and dtype.startswith("Sparse"): + sub_type, _ = cls._parse_subtype(dtype) + dtype = np.dtype(sub_type) + elif isinstance(dtype, cls): + return True + return isinstance(dtype, np.dtype) or dtype == "Sparse" + + def update_dtype(self, dtype) -> SparseDtype: + """ + Convert the SparseDtype to a new dtype. + + This takes care of converting the ``fill_value``. + + Parameters + ---------- + dtype : Union[str, numpy.dtype, SparseDtype] + The new dtype to use. + + * For a SparseDtype, it is simply returned + * For a NumPy dtype (or str), the current fill value + is converted to the new dtype, and a SparseDtype + with `dtype` and the new fill value is returned. + + Returns + ------- + SparseDtype + A new SparseDtype with the correct `dtype` and fill value + for that `dtype`. + + Raises + ------ + ValueError + When the current fill value cannot be converted to the + new `dtype` (e.g. trying to convert ``np.nan`` to an + integer dtype). + + + Examples + -------- + >>> SparseDtype(int, 0).update_dtype(float) + Sparse[float64, 0.0] + + >>> SparseDtype(int, 1).update_dtype(SparseDtype(float, np.nan)) + Sparse[float64, nan] + """ + cls = type(self) + dtype = pandas_dtype(dtype) + + if not isinstance(dtype, cls): + if not isinstance(dtype, np.dtype): + raise TypeError("sparse arrays of extension dtypes not supported") + + fill_value = astype_nansafe(np.array(self.fill_value), dtype).item() + dtype = cls(dtype, fill_value=fill_value) + + return dtype + + @property + def _subtype_with_str(self): + """ + Whether the SparseDtype's subtype should be considered ``str``. + + Typically, pandas will store string data in an object-dtype array. + When converting values to a dtype, e.g. in ``.astype``, we need to + be more specific, we need the actual underlying type. + + Returns + ------- + >>> SparseDtype(int, 1)._subtype_with_str + dtype('int64') + + >>> SparseDtype(object, 1)._subtype_with_str + dtype('O') + + >>> dtype = SparseDtype(str, '') + >>> dtype.subtype + dtype('O') + + >>> dtype._subtype_with_str + + """ + if isinstance(self.fill_value, str): + return type(self.fill_value) + return self.subtype + + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + # TODO for now only handle SparseDtypes and numpy dtypes => extend + # with other compatible extension dtypes + if any( + isinstance(x, ExtensionDtype) and not isinstance(x, SparseDtype) + for x in dtypes + ): + return None + + fill_values = [x.fill_value for x in dtypes if isinstance(x, SparseDtype)] + fill_value = fill_values[0] + + # np.nan isn't a singleton, so we may end up with multiple + # NaNs here, so we ignore the all NA case too. + if not (len(set(fill_values)) == 1 or isna(fill_values).all()): + warnings.warn( + "Concatenating sparse arrays with multiple fill " + f"values: '{fill_values}'. Picking the first and " + "converting the rest.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) + + np_dtypes = [x.subtype if isinstance(x, SparseDtype) else x for x in dtypes] + return SparseDtype(np.find_common_type(np_dtypes, []), fill_value=fill_value) diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/scipy_sparse.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/scipy_sparse.py new file mode 100644 index 0000000..88e1778 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/sparse/scipy_sparse.py @@ -0,0 +1,211 @@ +""" +Interaction with scipy.sparse matrices. + +Currently only includes to_coo helpers. +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Iterable, +) + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ( + IndexLabel, + npt, +) + +from pandas.core.dtypes.missing import notna + +from pandas.core.algorithms import factorize +from pandas.core.indexes.api import MultiIndex +from pandas.core.series import Series + +if TYPE_CHECKING: + import scipy.sparse + + +def _check_is_partition(parts: Iterable, whole: Iterable): + whole = set(whole) + parts = [set(x) for x in parts] + if set.intersection(*parts) != set(): + raise ValueError("Is not a partition because intersection is not null.") + if set.union(*parts) != whole: + raise ValueError("Is not a partition because union is not the whole.") + + +def _levels_to_axis( + ss, + levels: tuple[int] | list[int], + valid_ilocs: npt.NDArray[np.intp], + sort_labels: bool = False, +) -> tuple[npt.NDArray[np.intp], list[IndexLabel]]: + """ + For a MultiIndexed sparse Series `ss`, return `ax_coords` and `ax_labels`, + where `ax_coords` are the coordinates along one of the two axes of the + destination sparse matrix, and `ax_labels` are the labels from `ss`' Index + which correspond to these coordinates. + + Parameters + ---------- + ss : Series + levels : tuple/list + valid_ilocs : numpy.ndarray + Array of integer positions of valid values for the sparse matrix in ss. + sort_labels : bool, default False + Sort the axis labels before forming the sparse matrix. When `levels` + refers to a single level, set to True for a faster execution. + + Returns + ------- + ax_coords : numpy.ndarray (axis coordinates) + ax_labels : list (axis labels) + """ + # Since the labels are sorted in `Index.levels`, when we wish to sort and + # there is only one level of the MultiIndex for this axis, the desired + # output can be obtained in the following simpler, more efficient way. + if sort_labels and len(levels) == 1: + ax_coords = ss.index.codes[levels[0]][valid_ilocs] + ax_labels = ss.index.levels[levels[0]] + + else: + levels_values = lib.fast_zip( + [ss.index.get_level_values(lvl).values for lvl in levels] + ) + codes, ax_labels = factorize(levels_values, sort=sort_labels) + ax_coords = codes[valid_ilocs] + + ax_labels = ax_labels.tolist() + return ax_coords, ax_labels + + +def _to_ijv( + ss, + row_levels: tuple[int] | list[int] = (0,), + column_levels: tuple[int] | list[int] = (1,), + sort_labels: bool = False, +) -> tuple[ + np.ndarray, + npt.NDArray[np.intp], + npt.NDArray[np.intp], + list[IndexLabel], + list[IndexLabel], +]: + """ + For an arbitrary MultiIndexed sparse Series return (v, i, j, ilabels, + jlabels) where (v, (i, j)) is suitable for passing to scipy.sparse.coo + constructor, and ilabels and jlabels are the row and column labels + respectively. + + Parameters + ---------- + ss : Series + row_levels : tuple/list + column_levels : tuple/list + sort_labels : bool, default False + Sort the row and column labels before forming the sparse matrix. + When `row_levels` and/or `column_levels` refer to a single level, + set to `True` for a faster execution. + + Returns + ------- + values : numpy.ndarray + Valid values to populate a sparse matrix, extracted from + ss. + i_coords : numpy.ndarray (row coordinates of the values) + j_coords : numpy.ndarray (column coordinates of the values) + i_labels : list (row labels) + j_labels : list (column labels) + """ + # index and column levels must be a partition of the index + _check_is_partition([row_levels, column_levels], range(ss.index.nlevels)) + # From the sparse Series, get the integer indices and data for valid sparse + # entries. + sp_vals = ss.array.sp_values + na_mask = notna(sp_vals) + values = sp_vals[na_mask] + valid_ilocs = ss.array.sp_index.indices[na_mask] + + i_coords, i_labels = _levels_to_axis( + ss, row_levels, valid_ilocs, sort_labels=sort_labels + ) + + j_coords, j_labels = _levels_to_axis( + ss, column_levels, valid_ilocs, sort_labels=sort_labels + ) + + return values, i_coords, j_coords, i_labels, j_labels + + +def sparse_series_to_coo( + ss: Series, + row_levels: Iterable[int] = (0,), + column_levels: Iterable[int] = (1,), + sort_labels: bool = False, +) -> tuple[scipy.sparse.coo_matrix, list[IndexLabel], list[IndexLabel]]: + """ + Convert a sparse Series to a scipy.sparse.coo_matrix using index + levels row_levels, column_levels as the row and column + labels respectively. Returns the sparse_matrix, row and column labels. + """ + import scipy.sparse + + if ss.index.nlevels < 2: + raise ValueError("to_coo requires MultiIndex with nlevels >= 2.") + if not ss.index.is_unique: + raise ValueError( + "Duplicate index entries are not allowed in to_coo transformation." + ) + + # to keep things simple, only rely on integer indexing (not labels) + row_levels = [ss.index._get_level_number(x) for x in row_levels] + column_levels = [ss.index._get_level_number(x) for x in column_levels] + + v, i, j, rows, columns = _to_ijv( + ss, row_levels=row_levels, column_levels=column_levels, sort_labels=sort_labels + ) + sparse_matrix = scipy.sparse.coo_matrix( + (v, (i, j)), shape=(len(rows), len(columns)) + ) + return sparse_matrix, rows, columns + + +def coo_to_sparse_series( + A: scipy.sparse.coo_matrix, dense_index: bool = False +) -> Series: + """ + Convert a scipy.sparse.coo_matrix to a SparseSeries. + + Parameters + ---------- + A : scipy.sparse.coo_matrix + dense_index : bool, default False + + Returns + ------- + Series + + Raises + ------ + TypeError if A is not a coo_matrix + """ + from pandas import SparseDtype + + try: + ser = Series(A.data, MultiIndex.from_arrays((A.row, A.col))) + except AttributeError as err: + raise TypeError( + f"Expected coo_matrix. Got {type(A).__name__} instead." + ) from err + ser = ser.sort_index() + ser = ser.astype(SparseDtype(ser.dtype)) + if dense_index: + # is there a better constructor method to use here? + i = range(A.shape[0]) + j = range(A.shape[1]) + ind = MultiIndex.from_product([i, j]) + ser = ser.reindex(ind) + return ser diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/string_.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/string_.py new file mode 100644 index 0000000..919b882 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/string_.py @@ -0,0 +1,572 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, +) + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._libs.arrays import NDArrayBacked +from pandas._typing import ( + Dtype, + Scalar, + type_t, +) +from pandas.compat import pa_version_under1p01 +from pandas.compat.numpy import function as nv + +from pandas.core.dtypes.base import ( + ExtensionDtype, + register_extension_dtype, +) +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_dtype_equal, + is_integer_dtype, + is_object_dtype, + is_string_dtype, + pandas_dtype, +) + +from pandas.core import ops +from pandas.core.array_algos import masked_reductions +from pandas.core.arrays import ( + FloatingArray, + IntegerArray, + PandasArray, +) +from pandas.core.arrays.base import ExtensionArray +from pandas.core.arrays.floating import FloatingDtype +from pandas.core.arrays.integer import _IntegerDtype +from pandas.core.construction import extract_array +from pandas.core.indexers import check_array_indexer +from pandas.core.missing import isna + +if TYPE_CHECKING: + import pyarrow + + +@register_extension_dtype +class StringDtype(ExtensionDtype): + """ + Extension dtype for string data. + + .. versionadded:: 1.0.0 + + .. warning:: + + StringDtype is considered experimental. The implementation and + parts of the API may change without warning. + + In particular, StringDtype.na_value may change to no longer be + ``numpy.nan``. + + Parameters + ---------- + storage : {"python", "pyarrow"}, optional + If not given, the value of ``pd.options.mode.string_storage``. + + Attributes + ---------- + None + + Methods + ------- + None + + Examples + -------- + >>> pd.StringDtype() + string[python] + + >>> pd.StringDtype(storage="pyarrow") + string[pyarrow] + """ + + name = "string" + + #: StringDtype.na_value uses pandas.NA + na_value = libmissing.NA + _metadata = ("storage",) + + def __init__(self, storage=None): + if storage is None: + storage = get_option("mode.string_storage") + if storage not in {"python", "pyarrow"}: + raise ValueError( + f"Storage must be 'python' or 'pyarrow'. Got {storage} instead." + ) + if storage == "pyarrow" and pa_version_under1p01: + raise ImportError( + "pyarrow>=1.0.0 is required for PyArrow backed StringArray." + ) + self.storage = storage + + @property + def type(self) -> type[str]: + return str + + @classmethod + def construct_from_string(cls, string): + """ + Construct a StringDtype from a string. + + Parameters + ---------- + string : str + The type of the name. The storage type will be taking from `string`. + Valid options and their storage types are + + ========================== ============================================== + string result storage + ========================== ============================================== + ``'string'`` pd.options.mode.string_storage, default python + ``'string[python]'`` python + ``'string[pyarrow]'`` pyarrow + ========================== ============================================== + + Returns + ------- + StringDtype + + Raise + ----- + TypeError + If the string is not a valid option. + + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + if string == "string": + return cls() + elif string == "string[python]": + return cls(storage="python") + elif string == "string[pyarrow]": + return cls(storage="pyarrow") + else: + raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") + + def __eq__(self, other: Any) -> bool: + if isinstance(other, str) and other == "string": + return True + return super().__eq__(other) + + def __hash__(self) -> int: + # custom __eq__ so have to override __hash__ + return super().__hash__() + + # https://github.com/pandas-dev/pandas/issues/36126 + # error: Signature of "construct_array_type" incompatible with supertype + # "ExtensionDtype" + def construct_array_type( # type: ignore[override] + self, + ) -> type_t[BaseStringArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays.string_arrow import ArrowStringArray + + if self.storage == "python": + return StringArray + else: + return ArrowStringArray + + def __repr__(self): + return f"string[{self.storage}]" + + def __str__(self): + return self.name + + def __from_arrow__( + self, array: pyarrow.Array | pyarrow.ChunkedArray + ) -> BaseStringArray: + """ + Construct StringArray from pyarrow Array/ChunkedArray. + """ + if self.storage == "pyarrow": + from pandas.core.arrays.string_arrow import ArrowStringArray + + return ArrowStringArray(array) + else: + + import pyarrow + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + # pyarrow.ChunkedArray + chunks = array.chunks + + results = [] + for arr in chunks: + # using _from_sequence to ensure None is converted to NA + str_arr = StringArray._from_sequence(np.array(arr)) + results.append(str_arr) + + if results: + return StringArray._concat_same_type(results) + else: + return StringArray(np.array([], dtype="object")) + + +class BaseStringArray(ExtensionArray): + pass + + +class StringArray(BaseStringArray, PandasArray): + """ + Extension array for string data. + + .. versionadded:: 1.0.0 + + .. warning:: + + StringArray is considered experimental. The implementation and + parts of the API may change without warning. + + Parameters + ---------- + values : array-like + The array of data. + + .. warning:: + + Currently, this expects an object-dtype ndarray + where the elements are Python strings or :attr:`pandas.NA`. + This may change without warning in the future. Use + :meth:`pandas.array` with ``dtype="string"`` for a stable way of + creating a `StringArray` from any sequence. + + copy : bool, default False + Whether to copy the array of data. + + Attributes + ---------- + None + + Methods + ------- + None + + See Also + -------- + array + The recommended function for creating a StringArray. + Series.str + The string methods are available on Series backed by + a StringArray. + + Notes + ----- + StringArray returns a BooleanArray for comparison methods. + + Examples + -------- + >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string") + + ['This is', 'some text', , 'data.'] + Length: 4, dtype: string + + Unlike arrays instantiated with ``dtype="object"``, ``StringArray`` + will convert the values to strings. + + >>> pd.array(['1', 1], dtype="object") + + ['1', 1] + Length: 2, dtype: object + >>> pd.array(['1', 1], dtype="string") + + ['1', '1'] + Length: 2, dtype: string + + However, instantiating StringArrays directly with non-strings will raise an error. + + For comparison methods, `StringArray` returns a :class:`pandas.BooleanArray`: + + >>> pd.array(["a", None, "c"], dtype="string") == "a" + + [True, , False] + Length: 3, dtype: boolean + """ + + # undo the PandasArray hack + _typ = "extension" + + def __init__(self, values, copy=False): + values = extract_array(values) + + super().__init__(values, copy=copy) + # error: Incompatible types in assignment (expression has type "StringDtype", + # variable has type "PandasDtype") + NDArrayBacked.__init__(self, self._ndarray, StringDtype(storage="python")) + if not isinstance(values, type(self)): + self._validate() + + def _validate(self): + """Validate that we only store NA or strings.""" + if len(self._ndarray) and not lib.is_string_array(self._ndarray, skipna=True): + raise ValueError("StringArray requires a sequence of strings or pandas.NA") + if self._ndarray.dtype != "object": + raise ValueError( + "StringArray requires a sequence of strings or pandas.NA. Got " + f"'{self._ndarray.dtype}' dtype instead." + ) + + @classmethod + def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): + if dtype and not (isinstance(dtype, str) and dtype == "string"): + dtype = pandas_dtype(dtype) + assert isinstance(dtype, StringDtype) and dtype.storage == "python" + + from pandas.core.arrays.masked import BaseMaskedArray + + if isinstance(scalars, BaseMaskedArray): + # avoid costly conversion to object dtype + na_values = scalars._mask + result = scalars._data + result = lib.ensure_string_array(result, copy=copy, convert_na_value=False) + result[na_values] = StringDtype.na_value + + else: + # convert non-na-likes to str, and nan-likes to StringDtype.na_value + result = lib.ensure_string_array( + scalars, na_value=StringDtype.na_value, copy=copy + ) + + # Manually creating new array avoids the validation step in the __init__, so is + # faster. Refactor need for validation? + new_string_array = cls.__new__(cls) + NDArrayBacked.__init__(new_string_array, result, StringDtype(storage="python")) + + return new_string_array + + @classmethod + def _from_sequence_of_strings( + cls, strings, *, dtype: Dtype | None = None, copy=False + ): + return cls._from_sequence(strings, dtype=dtype, copy=copy) + + @classmethod + def _empty(cls, shape, dtype) -> StringArray: + values = np.empty(shape, dtype=object) + values[:] = libmissing.NA + return cls(values).astype(dtype, copy=False) + + def __arrow_array__(self, type=None): + """ + Convert myself into a pyarrow Array. + """ + import pyarrow as pa + + if type is None: + type = pa.string() + + values = self._ndarray.copy() + values[self.isna()] = None + return pa.array(values, type=type, from_pandas=True) + + def _values_for_factorize(self): + arr = self._ndarray.copy() + mask = self.isna() + arr[mask] = -1 + return arr, -1 + + def __setitem__(self, key, value): + value = extract_array(value, extract_numpy=True) + if isinstance(value, type(self)): + # extract_array doesn't extract PandasArray subclasses + value = value._ndarray + + key = check_array_indexer(self, key) + scalar_key = lib.is_scalar(key) + scalar_value = lib.is_scalar(value) + if scalar_key and not scalar_value: + raise ValueError("setting an array element with a sequence.") + + # validate new items + if scalar_value: + if isna(value): + value = StringDtype.na_value + elif not isinstance(value, str): + raise ValueError( + f"Cannot set non-string value '{value}' into a StringArray." + ) + else: + if not is_array_like(value): + value = np.asarray(value, dtype=object) + if len(value) and not lib.is_string_array(value, skipna=True): + raise ValueError("Must provide strings.") + + super().__setitem__(key, value) + + def astype(self, dtype, copy: bool = True): + dtype = pandas_dtype(dtype) + + if is_dtype_equal(dtype, self.dtype): + if copy: + return self.copy() + return self + + elif isinstance(dtype, _IntegerDtype): + arr = self._ndarray.copy() + mask = self.isna() + arr[mask] = 0 + values = arr.astype(dtype.numpy_dtype) + return IntegerArray(values, mask, copy=False) + elif isinstance(dtype, FloatingDtype): + arr = self.copy() + mask = self.isna() + arr[mask] = "0" + values = arr.astype(dtype.numpy_dtype) + return FloatingArray(values, mask, copy=False) + elif isinstance(dtype, ExtensionDtype): + return super().astype(dtype, copy=copy) + elif np.issubdtype(dtype, np.floating): + arr = self._ndarray.copy() + mask = self.isna() + arr[mask] = 0 + values = arr.astype(dtype) + values[mask] = np.nan + return values + + return super().astype(dtype, copy) + + def _reduce( + self, name: str, *, skipna: bool = True, axis: int | None = 0, **kwargs + ): + if name in ["min", "max"]: + return getattr(self, name)(skipna=skipna, axis=axis) + + raise TypeError(f"Cannot perform reduction '{name}' with string dtype") + + def min(self, axis=None, skipna: bool = True, **kwargs) -> Scalar: + nv.validate_min((), kwargs) + result = masked_reductions.min( + values=self.to_numpy(), mask=self.isna(), skipna=skipna + ) + return self._wrap_reduction_result(axis, result) + + def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar: + nv.validate_max((), kwargs) + result = masked_reductions.max( + values=self.to_numpy(), mask=self.isna(), skipna=skipna + ) + return self._wrap_reduction_result(axis, result) + + def value_counts(self, dropna: bool = True): + from pandas import value_counts + + result = value_counts(self._ndarray, dropna=dropna).astype("Int64") + result.index = result.index.astype(self.dtype) + return result + + def memory_usage(self, deep: bool = False) -> int: + result = self._ndarray.nbytes + if deep: + return result + lib.memory_usage_of_objects(self._ndarray) + return result + + def _cmp_method(self, other, op): + from pandas.arrays import BooleanArray + + if isinstance(other, StringArray): + other = other._ndarray + + mask = isna(self) | isna(other) + valid = ~mask + + if not lib.is_scalar(other): + if len(other) != len(self): + # prevent improper broadcasting when other is 2D + raise ValueError( + f"Lengths of operands do not match: {len(self)} != {len(other)}" + ) + + other = np.asarray(other) + other = other[valid] + + if op.__name__ in ops.ARITHMETIC_BINOPS: + result = np.empty_like(self._ndarray, dtype="object") + result[mask] = StringDtype.na_value + result[valid] = op(self._ndarray[valid], other) + return StringArray(result) + else: + # logical + result = np.zeros(len(self._ndarray), dtype="bool") + result[valid] = op(self._ndarray[valid], other) + return BooleanArray(result, mask) + + _arith_method = _cmp_method + + # ------------------------------------------------------------------------ + # String methods interface + # error: Incompatible types in assignment (expression has type "NAType", + # base class "PandasArray" defined the type as "float") + _str_na_value = StringDtype.na_value # type: ignore[assignment] + + def _str_map( + self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True + ): + from pandas.arrays import BooleanArray + + if dtype is None: + dtype = StringDtype(storage="python") + if na_value is None: + na_value = self.dtype.na_value + + mask = isna(self) + arr = np.asarray(self) + + if is_integer_dtype(dtype) or is_bool_dtype(dtype): + constructor: type[IntegerArray] | type[BooleanArray] + if is_integer_dtype(dtype): + constructor = IntegerArray + else: + constructor = BooleanArray + + na_value_is_na = isna(na_value) + if na_value_is_na: + na_value = 1 + result = lib.map_infer_mask( + arr, + f, + mask.view("uint8"), + convert=False, + na_value=na_value, + # error: Argument 1 to "dtype" has incompatible type + # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected + # "Type[object]" + dtype=np.dtype(dtype), # type: ignore[arg-type] + ) + + if not na_value_is_na: + mask[:] = False + + return constructor(result, mask) + + elif is_string_dtype(dtype) and not is_object_dtype(dtype): + # i.e. StringDtype + result = lib.map_infer_mask( + arr, f, mask.view("uint8"), convert=False, na_value=na_value + ) + return StringArray(result) + else: + # This is when the result type is object. We reach this when + # -> We know the result type is truly object (e.g. .encode returns bytes + # or .findall returns a list). + # -> We don't know the result type. E.g. `.get` can return anything. + return lib.map_infer_mask(arr, f, mask.view("uint8")) diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/string_arrow.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/string_arrow.py new file mode 100644 index 0000000..3503b54 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/string_arrow.py @@ -0,0 +1,862 @@ +from __future__ import annotations + +from collections.abc import Callable # noqa: PDF001 +import re +from typing import ( + TYPE_CHECKING, + Any, + Union, + cast, + overload, +) + +import numpy as np + +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._typing import ( + Dtype, + NpDtype, + PositionalIndexer, + Scalar, + ScalarIndexer, + SequenceIndexer, + TakeIndexer, + npt, +) +from pandas.compat import ( + pa_version_under1p01, + pa_version_under2p0, + pa_version_under3p0, + pa_version_under4p0, +) +from pandas.util._decorators import doc + +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_dtype_equal, + is_integer, + is_integer_dtype, + is_object_dtype, + is_scalar, + is_string_dtype, + pandas_dtype, +) +from pandas.core.dtypes.missing import isna + +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays.base import ExtensionArray +from pandas.core.arrays.boolean import BooleanDtype +from pandas.core.arrays.integer import Int64Dtype +from pandas.core.arrays.numeric import NumericDtype +from pandas.core.arrays.string_ import ( + BaseStringArray, + StringDtype, +) +from pandas.core.indexers import ( + check_array_indexer, + unpack_tuple_and_ellipses, + validate_indices, +) +from pandas.core.strings.object_array import ObjectStringArrayMixin + +if not pa_version_under1p01: + import pyarrow as pa + import pyarrow.compute as pc + + ARROW_CMP_FUNCS = { + "eq": pc.equal, + "ne": pc.not_equal, + "lt": pc.less, + "gt": pc.greater, + "le": pc.less_equal, + "ge": pc.greater_equal, + } + + +if TYPE_CHECKING: + from pandas import Series + +ArrowStringScalarOrNAT = Union[str, libmissing.NAType] + + +def _chk_pyarrow_available() -> None: + if pa_version_under1p01: + msg = "pyarrow>=1.0.0 is required for PyArrow backed StringArray." + raise ImportError(msg) + + +# TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from +# ObjectStringArrayMixin because we want to have the object-dtype based methods as +# fallback for the ones that pyarrow doesn't yet support + + +class ArrowStringArray(OpsMixin, BaseStringArray, ObjectStringArrayMixin): + """ + Extension array for string data in a ``pyarrow.ChunkedArray``. + + .. versionadded:: 1.2.0 + + .. warning:: + + ArrowStringArray is considered experimental. The implementation and + parts of the API may change without warning. + + Parameters + ---------- + values : pyarrow.Array or pyarrow.ChunkedArray + The array of data. + + Attributes + ---------- + None + + Methods + ------- + None + + See Also + -------- + array + The recommended function for creating a ArrowStringArray. + Series.str + The string methods are available on Series backed by + a ArrowStringArray. + + Notes + ----- + ArrowStringArray returns a BooleanArray for comparison methods. + + Examples + -------- + >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string[pyarrow]") + + ['This is', 'some text', , 'data.'] + Length: 4, dtype: string + """ + + def __init__(self, values): + self._dtype = StringDtype(storage="pyarrow") + if isinstance(values, pa.Array): + self._data = pa.chunked_array([values]) + elif isinstance(values, pa.ChunkedArray): + self._data = values + else: + raise ValueError(f"Unsupported type '{type(values)}' for ArrowStringArray") + + if not pa.types.is_string(self._data.type): + raise ValueError( + "ArrowStringArray requires a PyArrow (chunked) array of string type" + ) + + @classmethod + def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False): + from pandas.core.arrays.masked import BaseMaskedArray + + _chk_pyarrow_available() + + if dtype and not (isinstance(dtype, str) and dtype == "string"): + dtype = pandas_dtype(dtype) + assert isinstance(dtype, StringDtype) and dtype.storage == "pyarrow" + + if isinstance(scalars, BaseMaskedArray): + # avoid costly conversion to object dtype in ensure_string_array and + # numerical issues with Float32Dtype + na_values = scalars._mask + result = scalars._data + result = lib.ensure_string_array(result, copy=copy, convert_na_value=False) + return cls(pa.array(result, mask=na_values, type=pa.string())) + + # convert non-na-likes to str + result = lib.ensure_string_array(scalars, copy=copy) + return cls(pa.array(result, type=pa.string(), from_pandas=True)) + + @classmethod + def _from_sequence_of_strings( + cls, strings, dtype: Dtype | None = None, copy: bool = False + ): + return cls._from_sequence(strings, dtype=dtype, copy=copy) + + @property + def dtype(self) -> StringDtype: + """ + An instance of 'string[pyarrow]'. + """ + return self._dtype + + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + """Correctly construct numpy arrays when passed to `np.asarray()`.""" + return self.to_numpy(dtype=dtype) + + def __arrow_array__(self, type=None): + """Convert myself to a pyarrow Array or ChunkedArray.""" + return self._data + + def to_numpy( + self, + dtype: npt.DTypeLike | None = None, + copy: bool = False, + na_value=lib.no_default, + ) -> np.ndarray: + """ + Convert to a NumPy ndarray. + """ + # TODO: copy argument is ignored + + result = np.array(self._data, dtype=dtype) + if self._data.null_count > 0: + if na_value is lib.no_default: + if dtype and np.issubdtype(dtype, np.floating): + return result + na_value = self._dtype.na_value + mask = self.isna() + result[mask] = na_value + return result + + def __len__(self) -> int: + """ + Length of this array. + + Returns + ------- + length : int + """ + return len(self._data) + + @doc(ExtensionArray.factorize) + def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]: + encoded = self._data.dictionary_encode() + indices = pa.chunked_array( + [c.indices for c in encoded.chunks], type=encoded.type.index_type + ).to_pandas() + if indices.dtype.kind == "f": + indices[np.isnan(indices)] = na_sentinel + indices = indices.astype(np.int64, copy=False) + + if encoded.num_chunks: + uniques = type(self)(encoded.chunk(0).dictionary) + else: + uniques = type(self)(pa.array([], type=encoded.type.value_type)) + + return indices.values, uniques + + @classmethod + def _concat_same_type(cls, to_concat) -> ArrowStringArray: + """ + Concatenate multiple ArrowStringArray. + + Parameters + ---------- + to_concat : sequence of ArrowStringArray + + Returns + ------- + ArrowStringArray + """ + return cls( + pa.chunked_array( + [array for ea in to_concat for array in ea._data.iterchunks()] + ) + ) + + @overload + def __getitem__(self, item: ScalarIndexer) -> ArrowStringScalarOrNAT: + ... + + @overload + def __getitem__(self: ArrowStringArray, item: SequenceIndexer) -> ArrowStringArray: + ... + + def __getitem__( + self: ArrowStringArray, item: PositionalIndexer + ) -> ArrowStringArray | ArrowStringScalarOrNAT: + """Select a subset of self. + + Parameters + ---------- + item : int, slice, or ndarray + * int: The position in 'self' to get. + * slice: A slice object, where 'start', 'stop', and 'step' are + integers or None + * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' + + Returns + ------- + item : scalar or ExtensionArray + + Notes + ----- + For scalar ``item``, return a scalar value suitable for the array's + type. This should be an instance of ``self.dtype.type``. + For slice ``key``, return an instance of ``ExtensionArray``, even + if the slice is length 0 or 1. + For a boolean mask, return an instance of ``ExtensionArray``, filtered + to the values where ``item`` is True. + """ + item = check_array_indexer(self, item) + + if isinstance(item, np.ndarray): + if not len(item): + return type(self)(pa.chunked_array([], type=pa.string())) + elif is_integer_dtype(item.dtype): + return self.take(item) + elif is_bool_dtype(item.dtype): + return type(self)(self._data.filter(item)) + else: + raise IndexError( + "Only integers, slices and integer or " + "boolean arrays are valid indices." + ) + elif isinstance(item, tuple): + item = unpack_tuple_and_ellipses(item) + + # error: Non-overlapping identity check (left operand type: + # "Union[Union[int, integer[Any]], Union[slice, List[int], + # ndarray[Any, Any]]]", right operand type: "ellipsis") + if item is Ellipsis: # type: ignore[comparison-overlap] + # TODO: should be handled by pyarrow? + item = slice(None) + + if is_scalar(item) and not is_integer(item): + # e.g. "foo" or 2.5 + # exception message copied from numpy + raise IndexError( + r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis " + r"(`None`) and integer or boolean arrays are valid indices" + ) + # We are not an array indexer, so maybe e.g. a slice or integer + # indexer. We dispatch to pyarrow. + value = self._data[item] + if isinstance(value, pa.ChunkedArray): + return type(self)(value) + else: + return self._as_pandas_scalar(value) + + def _as_pandas_scalar(self, arrow_scalar: pa.Scalar): + scalar = arrow_scalar.as_py() + if scalar is None: + return self._dtype.na_value + else: + return scalar + + @property + def nbytes(self) -> int: + """ + The number of bytes needed to store this object in memory. + """ + return self._data.nbytes + + def isna(self) -> np.ndarray: + """ + Boolean NumPy array indicating if each value is missing. + + This should return a 1-D array the same length as 'self'. + """ + # TODO: Implement .to_numpy for ChunkedArray + return self._data.is_null().to_pandas().values + + def copy(self) -> ArrowStringArray: + """ + Return a shallow copy of the array. + + Underlying ChunkedArray is immutable, so a deep copy is unnecessary. + + Returns + ------- + ArrowStringArray + """ + return type(self)(self._data) + + def _cmp_method(self, other, op): + from pandas.arrays import BooleanArray + + pc_func = ARROW_CMP_FUNCS[op.__name__] + if isinstance(other, ArrowStringArray): + result = pc_func(self._data, other._data) + elif isinstance(other, (np.ndarray, list)): + result = pc_func(self._data, other) + elif is_scalar(other): + try: + result = pc_func(self._data, pa.scalar(other)) + except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid): + mask = isna(self) | isna(other) + valid = ~mask + result = np.zeros(len(self), dtype="bool") + result[valid] = op(np.array(self)[valid], other) + return BooleanArray(result, mask) + else: + return NotImplemented + + # TODO(ARROW-9429): Add a .to_numpy() to ChunkedArray + return BooleanArray._from_sequence(result.to_pandas().values) + + def insert(self, loc: int, item): + if not isinstance(item, str) and item is not libmissing.NA: + raise TypeError("Scalar must be NA or str") + return super().insert(loc, item) + + def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: + """Set one or more values inplace. + + Parameters + ---------- + key : int, ndarray, or slice + When called from, e.g. ``Series.__setitem__``, ``key`` will be + one of + + * scalar int + * ndarray of integers. + * boolean ndarray + * slice object + + value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object + value or values to be set of ``key``. + + Returns + ------- + None + """ + key = check_array_indexer(self, key) + + if is_integer(key): + key = cast(int, key) + + if not is_scalar(value): + raise ValueError("Must pass scalars with scalar indexer") + elif isna(value): + value = None + elif not isinstance(value, str): + raise ValueError("Scalar must be NA or str") + + # Slice data and insert in-between + new_data = [ + *self._data[0:key].chunks, + pa.array([value], type=pa.string()), + *self._data[(key + 1) :].chunks, + ] + self._data = pa.chunked_array(new_data) + else: + # Convert to integer indices and iteratively assign. + # TODO: Make a faster variant of this in Arrow upstream. + # This is probably extremely slow. + + # Convert all possible input key types to an array of integers + if isinstance(key, slice): + key_array = np.array(range(len(self))[key]) + elif is_bool_dtype(key): + # TODO(ARROW-9430): Directly support setitem(booleans) + key_array = np.argwhere(key).flatten() + else: + # TODO(ARROW-9431): Directly support setitem(integers) + key_array = np.asanyarray(key) + + if is_scalar(value): + value = np.broadcast_to(value, len(key_array)) + else: + value = np.asarray(value) + + if len(key_array) != len(value): + raise ValueError("Length of indexer and values mismatch") + + for k, v in zip(key_array, value): + self[k] = v + + def take( + self, + indices: TakeIndexer, + allow_fill: bool = False, + fill_value: Any = None, + ): + """ + Take elements from an array. + + Parameters + ---------- + indices : sequence of int or one-dimensional np.ndarray of int + Indices to be taken. + allow_fill : bool, default False + How to handle negative values in `indices`. + + * False: negative values in `indices` indicate positional indices + from the right (the default). This is similar to + :func:`numpy.take`. + + * True: negative values in `indices` indicate + missing values. These values are set to `fill_value`. Any other + other negative values raise a ``ValueError``. + + fill_value : any, optional + Fill value to use for NA-indices when `allow_fill` is True. + This may be ``None``, in which case the default NA value for + the type, ``self.dtype.na_value``, is used. + + For many ExtensionArrays, there will be two representations of + `fill_value`: a user-facing "boxed" scalar, and a low-level + physical NA value. `fill_value` should be the user-facing version, + and the implementation should handle translating that to the + physical version for processing the take if necessary. + + Returns + ------- + ExtensionArray + + Raises + ------ + IndexError + When the indices are out of bounds for the array. + ValueError + When `indices` contains negative values other than ``-1`` + and `allow_fill` is True. + + See Also + -------- + numpy.take + api.extensions.take + + Notes + ----- + ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``, + ``iloc``, when `indices` is a sequence of values. Additionally, + it's called by :meth:`Series.reindex`, or any other method + that causes realignment, with a `fill_value`. + """ + # TODO: Remove once we got rid of the (indices < 0) check + if not is_array_like(indices): + indices_array = np.asanyarray(indices) + else: + # error: Incompatible types in assignment (expression has type + # "Sequence[int]", variable has type "ndarray") + indices_array = indices # type: ignore[assignment] + + if len(self._data) == 0 and (indices_array >= 0).any(): + raise IndexError("cannot do a non-empty take") + if indices_array.size > 0 and indices_array.max() >= len(self._data): + raise IndexError("out of bounds value in 'indices'.") + + if allow_fill: + fill_mask = indices_array < 0 + if fill_mask.any(): + validate_indices(indices_array, len(self._data)) + # TODO(ARROW-9433): Treat negative indices as NULL + indices_array = pa.array(indices_array, mask=fill_mask) + result = self._data.take(indices_array) + if isna(fill_value): + return type(self)(result) + # TODO: ArrowNotImplementedError: Function fill_null has no + # kernel matching input types (array[string], scalar[string]) + result = type(self)(result) + result[fill_mask] = fill_value + return result + # return type(self)(pc.fill_null(result, pa.scalar(fill_value))) + else: + # Nothing to fill + return type(self)(self._data.take(indices)) + else: # allow_fill=False + # TODO(ARROW-9432): Treat negative indices as indices from the right. + if (indices_array < 0).any(): + # Don't modify in-place + indices_array = np.copy(indices_array) + indices_array[indices_array < 0] += len(self._data) + return type(self)(self._data.take(indices_array)) + + def isin(self, values): + if pa_version_under2p0: + return super().isin(values) + + value_set = [ + pa_scalar.as_py() + for pa_scalar in [pa.scalar(value, from_pandas=True) for value in values] + if pa_scalar.type in (pa.string(), pa.null()) + ] + + # for an empty value_set pyarrow 3.0.0 segfaults and pyarrow 2.0.0 returns True + # for null values, so we short-circuit to return all False array. + if not len(value_set): + return np.zeros(len(self), dtype=bool) + + kwargs = {} + if pa_version_under3p0: + # in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises + # with unexpected keyword argument in pyarrow 3.0.0+ + kwargs["skip_null"] = True + + result = pc.is_in(self._data, value_set=pa.array(value_set), **kwargs) + # pyarrow 2.0.0 returned nulls, so we explicily specify dtype to convert nulls + # to False + return np.array(result, dtype=np.bool_) + + def value_counts(self, dropna: bool = True) -> Series: + """ + Return a Series containing counts of each unique value. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of missing values. + + Returns + ------- + counts : Series + + See Also + -------- + Series.value_counts + """ + from pandas import ( + Index, + Series, + ) + + vc = self._data.value_counts() + + values = vc.field(0) + counts = vc.field(1) + if dropna and self._data.null_count > 0: + mask = values.is_valid() + values = values.filter(mask) + counts = counts.filter(mask) + + # No missing values so we can adhere to the interface and return a numpy array. + counts = np.array(counts) + + index = Index(type(self)(values)) + + return Series(counts, index=index).astype("Int64") + + def astype(self, dtype, copy: bool = True): + dtype = pandas_dtype(dtype) + + if is_dtype_equal(dtype, self.dtype): + if copy: + return self.copy() + return self + + elif isinstance(dtype, NumericDtype): + data = self._data.cast(pa.from_numpy_dtype(dtype.numpy_dtype)) + return dtype.__from_arrow__(data) + + return super().astype(dtype, copy=copy) + + # ------------------------------------------------------------------------ + # String methods interface + + # error: Cannot determine type of 'na_value' + _str_na_value = StringDtype.na_value # type: ignore[has-type] + + def _str_map( + self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True + ): + # TODO: de-duplicate with StringArray method. This method is moreless copy and + # paste. + + from pandas.arrays import ( + BooleanArray, + IntegerArray, + ) + + if dtype is None: + dtype = self.dtype + if na_value is None: + na_value = self.dtype.na_value + + mask = isna(self) + arr = np.asarray(self) + + if is_integer_dtype(dtype) or is_bool_dtype(dtype): + constructor: type[IntegerArray] | type[BooleanArray] + if is_integer_dtype(dtype): + constructor = IntegerArray + else: + constructor = BooleanArray + + na_value_is_na = isna(na_value) + if na_value_is_na: + na_value = 1 + result = lib.map_infer_mask( + arr, + f, + mask.view("uint8"), + convert=False, + na_value=na_value, + # error: Argument 1 to "dtype" has incompatible type + # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected + # "Type[object]" + dtype=np.dtype(dtype), # type: ignore[arg-type] + ) + + if not na_value_is_na: + mask[:] = False + + return constructor(result, mask) + + elif is_string_dtype(dtype) and not is_object_dtype(dtype): + # i.e. StringDtype + result = lib.map_infer_mask( + arr, f, mask.view("uint8"), convert=False, na_value=na_value + ) + result = pa.array(result, mask=mask, type=pa.string(), from_pandas=True) + return type(self)(result) + else: + # This is when the result type is object. We reach this when + # -> We know the result type is truly object (e.g. .encode returns bytes + # or .findall returns a list). + # -> We don't know the result type. E.g. `.get` can return anything. + return lib.map_infer_mask(arr, f, mask.view("uint8")) + + def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex: bool = True): + if flags: + return super()._str_contains(pat, case, flags, na, regex) + + if regex: + if pa_version_under4p0 or case is False: + return super()._str_contains(pat, case, flags, na, regex) + else: + result = pc.match_substring_regex(self._data, pat) + else: + if case: + result = pc.match_substring(self._data, pat) + else: + result = pc.match_substring(pc.utf8_upper(self._data), pat.upper()) + result = BooleanDtype().__from_arrow__(result) + if not isna(na): + result[isna(result)] = bool(na) + return result + + def _str_startswith(self, pat: str, na=None): + if pa_version_under4p0: + return super()._str_startswith(pat, na) + + pat = "^" + re.escape(pat) + return self._str_contains(pat, na=na, regex=True) + + def _str_endswith(self, pat: str, na=None): + if pa_version_under4p0: + return super()._str_endswith(pat, na) + + pat = re.escape(pat) + "$" + return self._str_contains(pat, na=na, regex=True) + + def _str_replace( + self, + pat: str | re.Pattern, + repl: str | Callable, + n: int = -1, + case: bool = True, + flags: int = 0, + regex: bool = True, + ): + if ( + pa_version_under4p0 + or isinstance(pat, re.Pattern) + or callable(repl) + or not case + or flags + ): + return super()._str_replace(pat, repl, n, case, flags, regex) + + func = pc.replace_substring_regex if regex else pc.replace_substring + result = func(self._data, pattern=pat, replacement=repl, max_replacements=n) + return type(self)(result) + + def _str_match( + self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None + ): + if pa_version_under4p0: + return super()._str_match(pat, case, flags, na) + + if not pat.startswith("^"): + pat = "^" + pat + return self._str_contains(pat, case, flags, na, regex=True) + + def _str_fullmatch(self, pat, case: bool = True, flags: int = 0, na: Scalar = None): + if pa_version_under4p0: + return super()._str_fullmatch(pat, case, flags, na) + + if not pat.endswith("$") or pat.endswith("//$"): + pat = pat + "$" + return self._str_match(pat, case, flags, na) + + def _str_isalnum(self): + result = pc.utf8_is_alnum(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_isalpha(self): + result = pc.utf8_is_alpha(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_isdecimal(self): + result = pc.utf8_is_decimal(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_isdigit(self): + result = pc.utf8_is_digit(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_islower(self): + result = pc.utf8_is_lower(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_isnumeric(self): + result = pc.utf8_is_numeric(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_isspace(self): + if pa_version_under2p0: + return super()._str_isspace() + + result = pc.utf8_is_space(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_istitle(self): + result = pc.utf8_is_title(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_isupper(self): + result = pc.utf8_is_upper(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_len(self): + if pa_version_under4p0: + return super()._str_len() + + result = pc.utf8_length(self._data) + return Int64Dtype().__from_arrow__(result) + + def _str_lower(self): + return type(self)(pc.utf8_lower(self._data)) + + def _str_upper(self): + return type(self)(pc.utf8_upper(self._data)) + + def _str_strip(self, to_strip=None): + if pa_version_under4p0: + return super()._str_strip(to_strip) + + if to_strip is None: + result = pc.utf8_trim_whitespace(self._data) + else: + result = pc.utf8_trim(self._data, characters=to_strip) + return type(self)(result) + + def _str_lstrip(self, to_strip=None): + if pa_version_under4p0: + return super()._str_lstrip(to_strip) + + if to_strip is None: + result = pc.utf8_ltrim_whitespace(self._data) + else: + result = pc.utf8_ltrim(self._data, characters=to_strip) + return type(self)(result) + + def _str_rstrip(self, to_strip=None): + if pa_version_under4p0: + return super()._str_rstrip(to_strip) + + if to_strip is None: + result = pc.utf8_rtrim_whitespace(self._data) + else: + result = pc.utf8_rtrim(self._data, characters=to_strip) + return type(self)(result) diff --git a/.local/lib/python3.8/site-packages/pandas/core/arrays/timedeltas.py b/.local/lib/python3.8/site-packages/pandas/core/arrays/timedeltas.py new file mode 100644 index 0000000..6ea2435 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/arrays/timedeltas.py @@ -0,0 +1,1127 @@ +from __future__ import annotations + +from datetime import timedelta +from typing import TYPE_CHECKING + +import numpy as np + +from pandas._libs import ( + lib, + tslibs, +) +from pandas._libs.arrays import NDArrayBacked +from pandas._libs.tslibs import ( + BaseOffset, + NaT, + NaTType, + Period, + Tick, + Timedelta, + Timestamp, + iNaT, + to_offset, +) +from pandas._libs.tslibs.conversion import ( + ensure_timedelta64ns, + precision_from_unit, +) +from pandas._libs.tslibs.fields import get_timedelta_field +from pandas._libs.tslibs.timedeltas import ( + array_to_timedelta64, + ints_to_pytimedelta, + parse_timedelta_unit, +) +from pandas._typing import ( + DtypeObj, + NpDtype, +) +from pandas.compat.numpy import function as nv +from pandas.util._validators import validate_endpoints + +from pandas.core.dtypes.cast import astype_td64_unit_conversion +from pandas.core.dtypes.common import ( + DT64NS_DTYPE, + TD64NS_DTYPE, + is_dtype_equal, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + is_scalar, + is_string_dtype, + is_timedelta64_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.generic import ( + ABCCategorical, + ABCMultiIndex, +) +from pandas.core.dtypes.missing import isna + +from pandas.core import nanops +from pandas.core.algorithms import checked_add_with_arr +from pandas.core.arrays import ( + ExtensionArray, + IntegerArray, + datetimelike as dtl, +) +from pandas.core.arrays._ranges import generate_regular_range +import pandas.core.common as com +from pandas.core.construction import extract_array +from pandas.core.ops.common import unpack_zerodim_and_defer + +if TYPE_CHECKING: + from pandas import DataFrame + from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + ) + + +def _field_accessor(name: str, alias: str, docstring: str): + def f(self) -> np.ndarray: + values = self.asi8 + result = get_timedelta_field(values, alias) + if self._hasna: + result = self._maybe_mask_results( + result, fill_value=None, convert="float64" + ) + + return result + + f.__name__ = name + f.__doc__ = f"\n{docstring}\n" + return property(f) + + +class TimedeltaArray(dtl.TimelikeOps): + """ + Pandas ExtensionArray for timedelta data. + + .. warning:: + + TimedeltaArray is currently experimental, and its API may change + without warning. In particular, :attr:`TimedeltaArray.dtype` is + expected to change to be an instance of an ``ExtensionDtype`` + subclass. + + Parameters + ---------- + values : array-like + The timedelta data. + + dtype : numpy.dtype + Currently, only ``numpy.dtype("timedelta64[ns]")`` is accepted. + freq : Offset, optional + copy : bool, default False + Whether to copy the underlying array of data. + + Attributes + ---------- + None + + Methods + ------- + None + """ + + _typ = "timedeltaarray" + _scalar_type = Timedelta + _recognized_scalars = (timedelta, np.timedelta64, Tick) + _is_recognized_dtype = is_timedelta64_dtype + _infer_matches = ("timedelta", "timedelta64") + + __array_priority__ = 1000 + # define my properties & methods for delegation + _other_ops: list[str] = [] + _bool_ops: list[str] = [] + _object_ops: list[str] = ["freq"] + _field_ops: list[str] = ["days", "seconds", "microseconds", "nanoseconds"] + _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops + _datetimelike_methods: list[str] = [ + "to_pytimedelta", + "total_seconds", + "round", + "floor", + "ceil", + ] + + # Note: ndim must be defined to ensure NaT.__richcmp__(TimedeltaArray) + # operates pointwise. + + def _box_func(self, x) -> Timedelta | NaTType: + return Timedelta(x, unit="ns") + + @property + # error: Return type "dtype" of "dtype" incompatible with return type + # "ExtensionDtype" in supertype "ExtensionArray" + def dtype(self) -> np.dtype: # type: ignore[override] + """ + The dtype for the TimedeltaArray. + + .. warning:: + + A future version of pandas will change dtype to be an instance + of a :class:`pandas.api.extensions.ExtensionDtype` subclass, + not a ``numpy.dtype``. + + Returns + ------- + numpy.dtype + """ + return TD64NS_DTYPE + + # ---------------------------------------------------------------- + # Constructors + + _freq = None + + def __init__( + self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy: bool = False + ): + values = extract_array(values, extract_numpy=True) + if isinstance(values, IntegerArray): + values = values.to_numpy("int64", na_value=tslibs.iNaT) + + inferred_freq = getattr(values, "_freq", None) + explicit_none = freq is None + freq = freq if freq is not lib.no_default else None + + if isinstance(values, type(self)): + if explicit_none: + # dont inherit from values + pass + elif freq is None: + freq = values.freq + elif freq and values.freq: + freq = to_offset(freq) + freq, _ = dtl.validate_inferred_freq(freq, values.freq, False) + values = values._ndarray + + if not isinstance(values, np.ndarray): + msg = ( + f"Unexpected type '{type(values).__name__}'. 'values' must be a " + "TimedeltaArray, ndarray, or Series or Index containing one of those." + ) + raise ValueError(msg) + if values.ndim not in [1, 2]: + raise ValueError("Only 1-dimensional input arrays are supported.") + + if values.dtype == "i8": + # for compat with datetime/timedelta/period shared methods, + # we can sometimes get here with int64 values. These represent + # nanosecond UTC (or tz-naive) unix timestamps + values = values.view(TD64NS_DTYPE) + + _validate_td64_dtype(values.dtype) + dtype = _validate_td64_dtype(dtype) + + if freq == "infer": + msg = ( + "Frequency inference not allowed in TimedeltaArray.__init__. " + "Use 'pd.array()' instead." + ) + raise ValueError(msg) + + if copy: + values = values.copy() + if freq: + freq = to_offset(freq) + + NDArrayBacked.__init__(self, values=values, dtype=dtype) + self._freq = freq + + if inferred_freq is None and freq is not None: + type(self)._validate_frequency(self, freq) + + # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked" + @classmethod + def _simple_new( # type: ignore[override] + cls, values: np.ndarray, freq: BaseOffset | None = None, dtype=TD64NS_DTYPE + ) -> TimedeltaArray: + assert dtype == TD64NS_DTYPE, dtype + assert isinstance(values, np.ndarray), type(values) + assert values.dtype == TD64NS_DTYPE + + result = super()._simple_new(values=values, dtype=TD64NS_DTYPE) + result._freq = freq + return result + + @classmethod + def _from_sequence( + cls, data, *, dtype=TD64NS_DTYPE, copy: bool = False + ) -> TimedeltaArray: + if dtype: + _validate_td64_dtype(dtype) + + data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None) + freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False) + + return cls._simple_new(data, freq=freq) + + @classmethod + def _from_sequence_not_strict( + cls, + data, + dtype=TD64NS_DTYPE, + copy: bool = False, + freq=lib.no_default, + unit=None, + ) -> TimedeltaArray: + if dtype: + _validate_td64_dtype(dtype) + + explicit_none = freq is None + freq = freq if freq is not lib.no_default else None + + freq, freq_infer = dtl.maybe_infer_freq(freq) + + data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit) + freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) + if explicit_none: + freq = None + + result = cls._simple_new(data, freq=freq) + + if inferred_freq is None and freq is not None: + # this condition precludes `freq_infer` + cls._validate_frequency(result, freq) + + elif freq_infer: + # Set _freq directly to bypass duplicative _validate_frequency + # check. + result._freq = to_offset(result.inferred_freq) + + return result + + @classmethod + def _generate_range(cls, start, end, periods, freq, closed=None): + + periods = dtl.validate_periods(periods) + if freq is None and any(x is None for x in [periods, start, end]): + raise ValueError("Must provide freq argument if no data is supplied") + + if com.count_not_none(start, end, periods, freq) != 3: + raise ValueError( + "Of the four parameters: start, end, periods, " + "and freq, exactly three must be specified" + ) + + if start is not None: + start = Timedelta(start) + + if end is not None: + end = Timedelta(end) + + left_closed, right_closed = validate_endpoints(closed) + + if freq is not None: + index = generate_regular_range(start, end, periods, freq) + else: + index = np.linspace(start.value, end.value, periods).astype("i8") + + if not left_closed: + index = index[1:] + if not right_closed: + index = index[:-1] + + return cls._simple_new(index.view("m8[ns]"), freq=freq) + + # ---------------------------------------------------------------- + # DatetimeLike Interface + + def _unbox_scalar(self, value, setitem: bool = False) -> np.timedelta64: + if not isinstance(value, self._scalar_type) and value is not NaT: + raise ValueError("'value' should be a Timedelta.") + self._check_compatible_with(value, setitem=setitem) + return np.timedelta64(value.value, "ns") + + def _scalar_from_string(self, value) -> Timedelta | NaTType: + return Timedelta(value) + + def _check_compatible_with(self, other, setitem: bool = False) -> None: + # we don't have anything to validate. + pass + + # ---------------------------------------------------------------- + # Array-Like / EA-Interface Methods + + def astype(self, dtype, copy: bool = True): + # We handle + # --> timedelta64[ns] + # --> timedelta64 + # DatetimeLikeArrayMixin super call handles other cases + dtype = pandas_dtype(dtype) + + if dtype.kind == "m": + return astype_td64_unit_conversion(self._ndarray, dtype, copy=copy) + + return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy) + + def __iter__(self): + if self.ndim > 1: + for i in range(len(self)): + yield self[i] + else: + # convert in chunks of 10k for efficiency + data = self.asi8 + length = len(self) + chunksize = 10000 + chunks = (length // chunksize) + 1 + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, length) + converted = ints_to_pytimedelta(data[start_i:end_i], box=True) + yield from converted + + # ---------------------------------------------------------------- + # Reductions + + def sum( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + keepdims: bool = False, + initial=None, + skipna: bool = True, + min_count: int = 0, + ): + nv.validate_sum( + (), {"dtype": dtype, "out": out, "keepdims": keepdims, "initial": initial} + ) + + result = nanops.nansum( + self._ndarray, axis=axis, skipna=skipna, min_count=min_count + ) + return self._wrap_reduction_result(axis, result) + + def std( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + ddof: int = 1, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_stat_ddof_func( + (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std" + ) + + result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + # ---------------------------------------------------------------- + # Rendering Methods + + def _formatter(self, boxed: bool = False): + from pandas.io.formats.format import get_format_timedelta64 + + return get_format_timedelta64(self, box=True) + + @dtl.ravel_compat + def _format_native_types( + self, *, na_rep="NaT", date_format=None, **kwargs + ) -> np.ndarray: + from pandas.io.formats.format import get_format_timedelta64 + + formatter = get_format_timedelta64(self._ndarray, na_rep) + return np.array([formatter(x) for x in self._ndarray]) + + # ---------------------------------------------------------------- + # Arithmetic Methods + + def _add_offset(self, other): + assert not isinstance(other, Tick) + raise TypeError( + f"cannot add the type {type(other).__name__} to a {type(self).__name__}" + ) + + def _add_period(self, other: Period) -> PeriodArray: + """ + Add a Period object. + """ + # We will wrap in a PeriodArray and defer to the reversed operation + from pandas.core.arrays.period import PeriodArray + + i8vals = np.broadcast_to(other.ordinal, self.shape) + oth = PeriodArray(i8vals, freq=other.freq) + return oth + self + + def _add_datetime_arraylike(self, other): + """ + Add DatetimeArray/Index or ndarray[datetime64] to TimedeltaArray. + """ + if isinstance(other, np.ndarray): + # At this point we have already checked that dtype is datetime64 + from pandas.core.arrays import DatetimeArray + + other = DatetimeArray(other) + + # defer to implementation in DatetimeArray + return other + self + + def _add_datetimelike_scalar(self, other) -> DatetimeArray: + # adding a timedeltaindex to a datetimelike + from pandas.core.arrays import DatetimeArray + + assert other is not NaT + other = Timestamp(other) + if other is NaT: + # In this case we specifically interpret NaT as a datetime, not + # the timedelta interpretation we would get by returning self + NaT + result = self.asi8.view("m8[ms]") + NaT.to_datetime64() + return DatetimeArray(result) + + i8 = self.asi8 + result = checked_add_with_arr(i8, other.value, arr_mask=self._isnan) + result = self._maybe_mask_results(result) + dtype = DatetimeTZDtype(tz=other.tz) if other.tz else DT64NS_DTYPE + return DatetimeArray(result, dtype=dtype, freq=self.freq) + + def _addsub_object_array(self, other, op): + # Add or subtract Array-like of objects + try: + # TimedeltaIndex can only operate with a subset of DateOffset + # subclasses. Incompatible classes will raise AttributeError, + # which we re-raise as TypeError + return super()._addsub_object_array(other, op) + except AttributeError as err: + raise TypeError( + f"Cannot add/subtract non-tick DateOffset to {type(self).__name__}" + ) from err + + @unpack_zerodim_and_defer("__mul__") + def __mul__(self, other) -> TimedeltaArray: + if is_scalar(other): + # numpy will accept float and int, raise TypeError for others + result = self._ndarray * other + freq = None + if self.freq is not None and not isna(other): + freq = self.freq * other + return type(self)(result, freq=freq) + + if not hasattr(other, "dtype"): + # list, tuple + other = np.array(other) + if len(other) != len(self) and not is_timedelta64_dtype(other.dtype): + # Exclude timedelta64 here so we correctly raise TypeError + # for that instead of ValueError + raise ValueError("Cannot multiply with unequal lengths") + + if is_object_dtype(other.dtype): + # this multiplication will succeed only if all elements of other + # are int or float scalars, so we will end up with + # timedelta64[ns]-dtyped result + result = [self[n] * other[n] for n in range(len(self))] + result = np.array(result) + return type(self)(result) + + # numpy will accept float or int dtype, raise TypeError for others + result = self._ndarray * other + return type(self)(result) + + __rmul__ = __mul__ + + @unpack_zerodim_and_defer("__truediv__") + def __truediv__(self, other): + # timedelta / X is well-defined for timedelta-like or numeric X + + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + if other is NaT: + # specifically timedelta64-NaT + result = np.empty(self.shape, dtype=np.float64) + result.fill(np.nan) + return result + + # otherwise, dispatch to Timedelta implementation + return self._ndarray / other + + elif lib.is_scalar(other): + # assume it is numeric + result = self._ndarray / other + freq = None + if self.freq is not None: + # Tick division is not implemented, so operate on Timedelta + freq = self.freq.delta / other + return type(self)(result, freq=freq) + + if not hasattr(other, "dtype"): + # e.g. list, tuple + other = np.array(other) + + if len(other) != len(self): + raise ValueError("Cannot divide vectors with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): + # let numpy handle it + return self._ndarray / other + + elif is_object_dtype(other.dtype): + # We operate on raveled arrays to avoid problems in inference + # on NaT + srav = self.ravel() + orav = other.ravel() + result = [srav[n] / orav[n] for n in range(len(srav))] + result = np.array(result).reshape(self.shape) + + # We need to do dtype inference in order to keep DataFrame ops + # behavior consistent with Series behavior + inferred = lib.infer_dtype(result, skipna=False) + if inferred == "timedelta": + flat = result.ravel() + result = type(self)._from_sequence(flat).reshape(result.shape) + elif inferred == "floating": + result = result.astype(float) + elif inferred == "datetime": + # GH#39750 this occurs when result is all-NaT, in which case + # we want to interpret these NaTs as td64. + # We construct an all-td64NaT result. + result = self * np.nan + + return result + + else: + result = self._ndarray / other + return type(self)(result) + + @unpack_zerodim_and_defer("__rtruediv__") + def __rtruediv__(self, other): + # X / timedelta is defined only for timedelta-like X + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + if other is NaT: + # specifically timedelta64-NaT + result = np.empty(self.shape, dtype=np.float64) + result.fill(np.nan) + return result + + # otherwise, dispatch to Timedelta implementation + return other / self._ndarray + + elif lib.is_scalar(other): + raise TypeError( + f"Cannot divide {type(other).__name__} by {type(self).__name__}" + ) + + if not hasattr(other, "dtype"): + # e.g. list, tuple + other = np.array(other) + + if len(other) != len(self): + raise ValueError("Cannot divide vectors with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): + # let numpy handle it + return other / self._ndarray + + elif is_object_dtype(other.dtype): + # Note: unlike in __truediv__, we do not _need_ to do type + # inference on the result. It does not raise, a numeric array + # is returned. GH#23829 + result = [other[n] / self[n] for n in range(len(self))] + return np.array(result) + + else: + raise TypeError( + f"Cannot divide {other.dtype} data by {type(self).__name__}" + ) + + @unpack_zerodim_and_defer("__floordiv__") + def __floordiv__(self, other): + + if is_scalar(other): + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + if other is NaT: + # treat this specifically as timedelta-NaT + result = np.empty(self.shape, dtype=np.float64) + result.fill(np.nan) + return result + + # dispatch to Timedelta implementation + result = other.__rfloordiv__(self._ndarray) + return result + + # at this point we should only have numeric scalars; anything + # else will raise + result = self._ndarray // other + freq = None + if self.freq is not None: + # Note: freq gets division, not floor-division + freq = self.freq / other + if freq.nanos == 0 and self.freq.nanos != 0: + # e.g. if self.freq is Nano(1) then dividing by 2 + # rounds down to zero + freq = None + return type(self)(result, freq=freq) + + if not hasattr(other, "dtype"): + # list, tuple + other = np.array(other) + if len(other) != len(self): + raise ValueError("Cannot divide with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): + other = type(self)(other) + + # numpy timedelta64 does not natively support floordiv, so operate + # on the i8 values + result = self.asi8 // other.asi8 + mask = self._isnan | other._isnan + if mask.any(): + result = result.astype(np.float64) + np.putmask(result, mask, np.nan) + return result + + elif is_object_dtype(other.dtype): + # error: Incompatible types in assignment (expression has type + # "List[Any]", variable has type "ndarray") + srav = self.ravel() + orav = other.ravel() + res_list = [srav[n] // orav[n] for n in range(len(srav))] + result_flat = np.asarray(res_list) + inferred = lib.infer_dtype(result_flat, skipna=False) + + result = result_flat.reshape(self.shape) + + if inferred == "timedelta": + result, _ = sequence_to_td64ns(result) + return type(self)(result) + if inferred == "datetime": + # GH#39750 occurs when result is all-NaT, which in this + # case should be interpreted as td64nat. This can only + # occur when self is all-td64nat + return self * np.nan + return result + + elif is_integer_dtype(other.dtype) or is_float_dtype(other.dtype): + result = self._ndarray // other + return type(self)(result) + + else: + dtype = getattr(other, "dtype", type(other).__name__) + raise TypeError(f"Cannot divide {dtype} by {type(self).__name__}") + + @unpack_zerodim_and_defer("__rfloordiv__") + def __rfloordiv__(self, other): + + if is_scalar(other): + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + if other is NaT: + # treat this specifically as timedelta-NaT + result = np.empty(self.shape, dtype=np.float64) + result.fill(np.nan) + return result + + # dispatch to Timedelta implementation + result = other.__floordiv__(self._ndarray) + return result + + raise TypeError( + f"Cannot divide {type(other).__name__} by {type(self).__name__}" + ) + + if not hasattr(other, "dtype"): + # list, tuple + other = np.array(other) + + if len(other) != len(self): + raise ValueError("Cannot divide with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): + other = type(self)(other) + # numpy timedelta64 does not natively support floordiv, so operate + # on the i8 values + result = other.asi8 // self.asi8 + mask = self._isnan | other._isnan + if mask.any(): + result = result.astype(np.float64) + np.putmask(result, mask, np.nan) + return result + + elif is_object_dtype(other.dtype): + result_list = [other[n] // self[n] for n in range(len(self))] + result = np.array(result_list) + return result + + else: + dtype = getattr(other, "dtype", type(other).__name__) + raise TypeError(f"Cannot divide {dtype} by {type(self).__name__}") + + @unpack_zerodim_and_defer("__mod__") + def __mod__(self, other): + # Note: This is a naive implementation, can likely be optimized + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + return self - (self // other) * other + + @unpack_zerodim_and_defer("__rmod__") + def __rmod__(self, other): + # Note: This is a naive implementation, can likely be optimized + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + return other - (other // self) * self + + @unpack_zerodim_and_defer("__divmod__") + def __divmod__(self, other): + # Note: This is a naive implementation, can likely be optimized + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + + res1 = self // other + res2 = self - res1 * other + return res1, res2 + + @unpack_zerodim_and_defer("__rdivmod__") + def __rdivmod__(self, other): + # Note: This is a naive implementation, can likely be optimized + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + + res1 = other // self + res2 = other - res1 * self + return res1, res2 + + def __neg__(self) -> TimedeltaArray: + if self.freq is not None: + return type(self)(-self._ndarray, freq=-self.freq) + return type(self)(-self._ndarray) + + def __pos__(self) -> TimedeltaArray: + return type(self)(self._ndarray.copy(), freq=self.freq) + + def __abs__(self) -> TimedeltaArray: + # Note: freq is not preserved + return type(self)(np.abs(self._ndarray)) + + # ---------------------------------------------------------------- + # Conversion Methods - Vectorized analogues of Timedelta methods + + def total_seconds(self) -> np.ndarray: + """ + Return total duration of each element expressed in seconds. + + This method is available directly on TimedeltaArray, TimedeltaIndex + and on Series containing timedelta values under the ``.dt`` namespace. + + Returns + ------- + seconds : [ndarray, Float64Index, Series] + When the calling object is a TimedeltaArray, the return type + is ndarray. When the calling object is a TimedeltaIndex, + the return type is a Float64Index. When the calling object + is a Series, the return type is Series of type `float64` whose + index is the same as the original. + + See Also + -------- + datetime.timedelta.total_seconds : Standard library version + of this method. + TimedeltaIndex.components : Return a DataFrame with components of + each Timedelta. + + Examples + -------- + **Series** + + >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='d')) + >>> s + 0 0 days + 1 1 days + 2 2 days + 3 3 days + 4 4 days + dtype: timedelta64[ns] + + >>> s.dt.total_seconds() + 0 0.0 + 1 86400.0 + 2 172800.0 + 3 259200.0 + 4 345600.0 + dtype: float64 + + **TimedeltaIndex** + + >>> idx = pd.to_timedelta(np.arange(5), unit='d') + >>> idx + TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq=None) + + >>> idx.total_seconds() + Float64Index([0.0, 86400.0, 172800.0, 259200.00000000003, 345600.0], + dtype='float64') + """ + return self._maybe_mask_results(1e-9 * self.asi8, fill_value=None) + + def to_pytimedelta(self) -> np.ndarray: + """ + Return Timedelta Array/Index as object ndarray of datetime.timedelta + objects. + + Returns + ------- + timedeltas : ndarray[object] + """ + return tslibs.ints_to_pytimedelta(self.asi8) + + days = _field_accessor("days", "days", "Number of days for each element.") + seconds = _field_accessor( + "seconds", + "seconds", + "Number of seconds (>= 0 and less than 1 day) for each element.", + ) + microseconds = _field_accessor( + "microseconds", + "microseconds", + "Number of microseconds (>= 0 and less than 1 second) for each element.", + ) + nanoseconds = _field_accessor( + "nanoseconds", + "nanoseconds", + "Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.", + ) + + @property + def components(self) -> DataFrame: + """ + Return a dataframe of the components (days, hours, minutes, + seconds, milliseconds, microseconds, nanoseconds) of the Timedeltas. + + Returns + ------- + DataFrame + """ + from pandas import DataFrame + + columns = [ + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + "nanoseconds", + ] + hasnans = self._hasna + if hasnans: + + def f(x): + if isna(x): + return [np.nan] * len(columns) + return x.components + + else: + + def f(x): + return x.components + + result = DataFrame([f(x) for x in self], columns=columns) + if not hasnans: + result = result.astype("int64") + return result + + +# --------------------------------------------------------------------- +# Constructor Helpers + + +def sequence_to_td64ns( + data, copy: bool = False, unit=None, errors="raise" +) -> tuple[np.ndarray, Tick | None]: + """ + Parameters + ---------- + data : list-like + copy : bool, default False + unit : str, optional + The timedelta unit to treat integers as multiples of. For numeric + data this defaults to ``'ns'``. + Must be un-specified if the data contains a str and ``errors=="raise"``. + errors : {"raise", "coerce", "ignore"}, default "raise" + How to handle elements that cannot be converted to timedelta64[ns]. + See ``pandas.to_timedelta`` for details. + + Returns + ------- + converted : numpy.ndarray + The sequence converted to a numpy array with dtype ``timedelta64[ns]``. + inferred_freq : Tick or None + The inferred frequency of the sequence. + + Raises + ------ + ValueError : Data cannot be converted to timedelta64[ns]. + + Notes + ----- + Unlike `pandas.to_timedelta`, if setting ``errors=ignore`` will not cause + errors to be ignored; they are caught and subsequently ignored at a + higher level. + """ + inferred_freq = None + if unit is not None: + unit = parse_timedelta_unit(unit) + + # Unwrap whatever we have into a np.ndarray + if not hasattr(data, "dtype"): + # e.g. list, tuple + if np.ndim(data) == 0: + # i.e. generator + data = list(data) + data = np.array(data, copy=False) + elif isinstance(data, ABCMultiIndex): + raise TypeError("Cannot create a DatetimeArray from a MultiIndex.") + else: + data = extract_array(data, extract_numpy=True) + + if isinstance(data, IntegerArray): + data = data.to_numpy("int64", na_value=iNaT) + elif not isinstance(data, (np.ndarray, ExtensionArray)): + # GH#24539 e.g. xarray, dask object + data = np.asarray(data) + elif isinstance(data, ABCCategorical): + data = data.categories.take(data.codes, fill_value=NaT)._values + copy = False + + if isinstance(data, TimedeltaArray): + inferred_freq = data.freq + + # Convert whatever we have into timedelta64[ns] dtype + if is_object_dtype(data.dtype) or is_string_dtype(data.dtype): + # no need to make a copy, need to convert if string-dtyped + data = objects_to_td64ns(data, unit=unit, errors=errors) + copy = False + + elif is_integer_dtype(data.dtype): + # treat as multiples of the given unit + data, copy_made = ints_to_td64ns(data, unit=unit) + copy = copy and not copy_made + + elif is_float_dtype(data.dtype): + # cast the unit, multiply base/frac separately + # to avoid precision issues from float -> int + mask = np.isnan(data) + m, p = precision_from_unit(unit or "ns") + base = data.astype(np.int64) + frac = data - base + if p: + frac = np.round(frac, p) + data = (base * m + (frac * m).astype(np.int64)).view("timedelta64[ns]") + data[mask] = iNaT + copy = False + + elif is_timedelta64_dtype(data.dtype): + if data.dtype != TD64NS_DTYPE: + # non-nano unit + data = ensure_timedelta64ns(data) + copy = False + + else: + # This includes datetime64-dtype, see GH#23539, GH#29794 + raise TypeError(f"dtype {data.dtype} cannot be converted to timedelta64[ns]") + + data = np.array(data, copy=copy) + + assert data.dtype == "m8[ns]", data + return data, inferred_freq + + +def ints_to_td64ns(data, unit="ns"): + """ + Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating + the integers as multiples of the given timedelta unit. + + Parameters + ---------- + data : numpy.ndarray with integer-dtype + unit : str, default "ns" + The timedelta unit to treat integers as multiples of. + + Returns + ------- + numpy.ndarray : timedelta64[ns] array converted from data + bool : whether a copy was made + """ + copy_made = False + unit = unit if unit is not None else "ns" + + if data.dtype != np.int64: + # converting to int64 makes a copy, so we can avoid + # re-copying later + data = data.astype(np.int64) + copy_made = True + + if unit != "ns": + dtype_str = f"timedelta64[{unit}]" + data = data.view(dtype_str) + + data = ensure_timedelta64ns(data) + + # the astype conversion makes a copy, so we can avoid re-copying later + copy_made = True + + else: + data = data.view("timedelta64[ns]") + + return data, copy_made + + +def objects_to_td64ns(data, unit=None, errors="raise"): + """ + Convert a object-dtyped or string-dtyped array into an + timedelta64[ns]-dtyped array. + + Parameters + ---------- + data : ndarray or Index + unit : str, default "ns" + The timedelta unit to treat integers as multiples of. + Must not be specified if the data contains a str. + errors : {"raise", "coerce", "ignore"}, default "raise" + How to handle elements that cannot be converted to timedelta64[ns]. + See ``pandas.to_timedelta`` for details. + + Returns + ------- + numpy.ndarray : timedelta64[ns] array converted from data + + Raises + ------ + ValueError : Data cannot be converted to timedelta64[ns]. + + Notes + ----- + Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause + errors to be ignored; they are caught and subsequently ignored at a + higher level. + """ + # coerce Index to np.ndarray, converting string-dtype if necessary + values = np.array(data, dtype=np.object_, copy=False) + + result = array_to_timedelta64(values, unit=unit, errors=errors) + return result.view("timedelta64[ns]") + + +def _validate_td64_dtype(dtype) -> DtypeObj: + dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, np.dtype("timedelta64")): + # no precision disallowed GH#24806 + msg = ( + "Passing in 'timedelta' dtype with no precision is not allowed. " + "Please pass in 'timedelta64[ns]' instead." + ) + raise ValueError(msg) + + if not is_dtype_equal(dtype, TD64NS_DTYPE): + raise ValueError(f"dtype {dtype} cannot be converted to timedelta64[ns]") + + return dtype diff --git a/.local/lib/python3.8/site-packages/pandas/core/base.py b/.local/lib/python3.8/site-packages/pandas/core/base.py new file mode 100644 index 0000000..504b6d5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/base.py @@ -0,0 +1,1304 @@ +""" +Base and utility classes for pandas objects. +""" + +from __future__ import annotations + +import textwrap +from typing import ( + TYPE_CHECKING, + Any, + Generic, + Hashable, + Literal, + TypeVar, + cast, + final, + overload, +) + +import numpy as np + +import pandas._libs.lib as lib +from pandas._typing import ( + ArrayLike, + DtypeObj, + IndexLabel, + NDFrameT, + Shape, + npt, +) +from pandas.compat import PYPY +from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError +from pandas.util._decorators import ( + cache_readonly, + doc, +) + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_dict_like, + is_extension_array_dtype, + is_object_dtype, + is_scalar, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + isna, + remove_na_arraylike, +) + +from pandas.core import ( + algorithms, + nanops, + ops, +) +from pandas.core.accessor import DirNamesMixin +from pandas.core.algorithms import ( + duplicated, + unique1d, + value_counts, +) +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays import ExtensionArray +from pandas.core.construction import ( + create_series_with_explicit_dtype, + ensure_wrapped_if_datetimelike, + extract_array, +) + +if TYPE_CHECKING: + + from pandas._typing import ( + NumpySorter, + NumpyValueArrayLike, + ) + + from pandas import Categorical + + +_shared_docs: dict[str, str] = {} +_indexops_doc_kwargs = { + "klass": "IndexOpsMixin", + "inplace": "", + "unique": "IndexOpsMixin", + "duplicated": "IndexOpsMixin", +} + +_T = TypeVar("_T", bound="IndexOpsMixin") + + +class PandasObject(DirNamesMixin): + """ + Baseclass for various pandas objects. + """ + + # results from calls to methods decorated with cache_readonly get added to _cache + _cache: dict[str, Any] + + @property + def _constructor(self): + """ + Class constructor (for this class it's just `__class__`. + """ + return type(self) + + def __repr__(self) -> str: + """ + Return a string representation for a particular object. + """ + # Should be overwritten by base classes + return object.__repr__(self) + + def _reset_cache(self, key: str | None = None) -> None: + """ + Reset cached properties. If ``key`` is passed, only clears that key. + """ + if not hasattr(self, "_cache"): + return + if key is None: + self._cache.clear() + else: + self._cache.pop(key, None) + + def __sizeof__(self) -> int: + """ + Generates the total memory usage for an object that returns + either a value or Series of values + """ + memory_usage = getattr(self, "memory_usage", None) + if memory_usage: + mem = memory_usage(deep=True) + return int(mem if is_scalar(mem) else mem.sum()) + + # no memory_usage attribute, so fall back to object's 'sizeof' + return super().__sizeof__() + + +class NoNewAttributesMixin: + """ + Mixin which prevents adding new attributes. + + Prevents additional attributes via xxx.attribute = "something" after a + call to `self.__freeze()`. Mainly used to prevent the user from using + wrong attributes on an accessor (`Series.cat/.str/.dt`). + + If you really want to add a new attribute at a later time, you need to use + `object.__setattr__(self, key, value)`. + """ + + def _freeze(self): + """ + Prevents setting additional attributes. + """ + object.__setattr__(self, "__frozen", True) + + # prevent adding any attribute via s.xxx.new_attribute = ... + def __setattr__(self, key: str, value): + # _cache is used by a decorator + # We need to check both 1.) cls.__dict__ and 2.) getattr(self, key) + # because + # 1.) getattr is false for attributes that raise errors + # 2.) cls.__dict__ doesn't traverse into base classes + if getattr(self, "__frozen", False) and not ( + key == "_cache" + or key in type(self).__dict__ + or getattr(self, key, None) is not None + ): + raise AttributeError(f"You cannot add any new attribute '{key}'") + object.__setattr__(self, key, value) + + +class DataError(Exception): + pass + + +class SpecificationError(Exception): + pass + + +class SelectionMixin(Generic[NDFrameT]): + """ + mixin implementing the selection & aggregation interface on a group-like + object sub-classes need to define: obj, exclusions + """ + + obj: NDFrameT + _selection: IndexLabel | None = None + exclusions: frozenset[Hashable] + _internal_names = ["_cache", "__setstate__"] + _internal_names_set = set(_internal_names) + + @final + @property + def _selection_list(self): + if not isinstance( + self._selection, (list, tuple, ABCSeries, ABCIndex, np.ndarray) + ): + return [self._selection] + return self._selection + + @cache_readonly + def _selected_obj(self): + if self._selection is None or isinstance(self.obj, ABCSeries): + return self.obj + else: + return self.obj[self._selection] + + @final + @cache_readonly + def ndim(self) -> int: + return self._selected_obj.ndim + + @final + @cache_readonly + def _obj_with_exclusions(self): + if self._selection is not None and isinstance(self.obj, ABCDataFrame): + return self.obj[self._selection_list] + + if len(self.exclusions) > 0: + # equivalent to `self.obj.drop(self.exclusions, axis=1) + # but this avoids consolidating and making a copy + return self.obj._drop_axis( + self.exclusions, axis=1, consolidate=False, only_slice=True + ) + else: + return self.obj + + def __getitem__(self, key): + if self._selection is not None: + raise IndexError(f"Column(s) {self._selection} already selected") + + if isinstance(key, (list, tuple, ABCSeries, ABCIndex, np.ndarray)): + if len(self.obj.columns.intersection(key)) != len(set(key)): + bad_keys = list(set(key).difference(self.obj.columns)) + raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}") + return self._gotitem(list(key), ndim=2) + + elif not getattr(self, "as_index", False): + if key not in self.obj.columns: + raise KeyError(f"Column not found: {key}") + return self._gotitem(key, ndim=2) + + else: + if key not in self.obj: + raise KeyError(f"Column not found: {key}") + subset = self.obj[key] + ndim = subset.ndim + return self._gotitem(key, ndim=ndim, subset=subset) + + def _gotitem(self, key, ndim: int, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : str / list of selections + ndim : {1, 2} + requested ndim of result + subset : object, default None + subset to act on + """ + raise AbstractMethodError(self) + + def aggregate(self, func, *args, **kwargs): + raise AbstractMethodError(self) + + agg = aggregate + + +class IndexOpsMixin(OpsMixin): + """ + Common ops mixin to support a unified interface / docs for Series / Index + """ + + # ndarray compatibility + __array_priority__ = 1000 + _hidden_attrs: frozenset[str] = frozenset( + ["tolist"] # tolist is not deprecated, just suppressed in the __dir__ + ) + + @property + def dtype(self) -> DtypeObj: + # must be defined here as a property for mypy + raise AbstractMethodError(self) + + @property + def _values(self) -> ExtensionArray | np.ndarray: + # must be defined here as a property for mypy + raise AbstractMethodError(self) + + def transpose(self: _T, *args, **kwargs) -> _T: + """ + Return the transpose, which is by definition self. + + Returns + ------- + %(klass)s + """ + nv.validate_transpose(args, kwargs) + return self + + T = property( + transpose, + doc=""" + Return the transpose, which is by definition self. + """, + ) + + @property + def shape(self) -> Shape: + """ + Return a tuple of the shape of the underlying data. + """ + return self._values.shape + + def __len__(self) -> int: + # We need this defined here for mypy + raise AbstractMethodError(self) + + @property + def ndim(self) -> int: + """ + Number of dimensions of the underlying data, by definition 1. + """ + return 1 + + def item(self): + """ + Return the first element of the underlying data as a Python scalar. + + Returns + ------- + scalar + The first element of %(klass)s. + + Raises + ------ + ValueError + If the data is not length-1. + """ + if len(self) == 1: + return next(iter(self)) + raise ValueError("can only convert an array of size 1 to a Python scalar") + + @property + def nbytes(self) -> int: + """ + Return the number of bytes in the underlying data. + """ + return self._values.nbytes + + @property + def size(self) -> int: + """ + Return the number of elements in the underlying data. + """ + return len(self._values) + + @property + def array(self) -> ExtensionArray: + """ + The ExtensionArray of the data backing this Series or Index. + + Returns + ------- + ExtensionArray + An ExtensionArray of the values stored within. For extension + types, this is the actual array. For NumPy native types, this + is a thin (no copy) wrapper around :class:`numpy.ndarray`. + + ``.array`` differs ``.values`` which may require converting the + data to a different form. + + See Also + -------- + Index.to_numpy : Similar method that always returns a NumPy array. + Series.to_numpy : Similar method that always returns a NumPy array. + + Notes + ----- + This table lays out the different array types for each extension + dtype within pandas. + + ================== ============================= + dtype array type + ================== ============================= + category Categorical + period PeriodArray + interval IntervalArray + IntegerNA IntegerArray + string StringArray + boolean BooleanArray + datetime64[ns, tz] DatetimeArray + ================== ============================= + + For any 3rd-party extension types, the array type will be an + ExtensionArray. + + For all remaining dtypes ``.array`` will be a + :class:`arrays.NumpyExtensionArray` wrapping the actual ndarray + stored within. If you absolutely need a NumPy array (possibly with + copying / coercing data), then use :meth:`Series.to_numpy` instead. + + Examples + -------- + For regular NumPy types like int, and float, a PandasArray + is returned. + + >>> pd.Series([1, 2, 3]).array + + [1, 2, 3] + Length: 3, dtype: int64 + + For extension types, like Categorical, the actual ExtensionArray + is returned + + >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) + >>> ser.array + ['a', 'b', 'a'] + Categories (2, object): ['a', 'b'] + """ + raise AbstractMethodError(self) + + def to_numpy( + self, + dtype: npt.DTypeLike | None = None, + copy: bool = False, + na_value=lib.no_default, + **kwargs, + ) -> np.ndarray: + """ + A NumPy ndarray representing the values in this Series or Index. + + Parameters + ---------- + dtype : str or numpy.dtype, optional + The dtype to pass to :meth:`numpy.asarray`. + copy : bool, default False + Whether to ensure that the returned value is not a view on + another array. Note that ``copy=False`` does not *ensure* that + ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that + a copy is made, even if not strictly necessary. + na_value : Any, optional + The value to use for missing values. The default value depends + on `dtype` and the type of the array. + + .. versionadded:: 1.0.0 + + **kwargs + Additional keywords passed through to the ``to_numpy`` method + of the underlying array (for extension arrays). + + .. versionadded:: 1.0.0 + + Returns + ------- + numpy.ndarray + + See Also + -------- + Series.array : Get the actual data stored within. + Index.array : Get the actual data stored within. + DataFrame.to_numpy : Similar method for DataFrame. + + Notes + ----- + The returned array will be the same up to equality (values equal + in `self` will be equal in the returned array; likewise for values + that are not equal). When `self` contains an ExtensionArray, the + dtype may be different. For example, for a category-dtype Series, + ``to_numpy()`` will return a NumPy array and the categorical dtype + will be lost. + + For NumPy dtypes, this will be a reference to the actual data stored + in this Series or Index (assuming ``copy=False``). Modifying the result + in place will modify the data stored in the Series or Index (not that + we recommend doing that). + + For extension types, ``to_numpy()`` *may* require copying data and + coercing the result to a NumPy type (possibly object), which may be + expensive. When you need a no-copy reference to the underlying data, + :attr:`Series.array` should be used instead. + + This table lays out the different dtypes and default return types of + ``to_numpy()`` for various dtypes within pandas. + + ================== ================================ + dtype array type + ================== ================================ + category[T] ndarray[T] (same dtype as input) + period ndarray[object] (Periods) + interval ndarray[object] (Intervals) + IntegerNA ndarray[object] + datetime64[ns] datetime64[ns] + datetime64[ns, tz] ndarray[object] (Timestamps) + ================== ================================ + + Examples + -------- + >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) + >>> ser.to_numpy() + array(['a', 'b', 'a'], dtype=object) + + Specify the `dtype` to control how datetime-aware data is represented. + Use ``dtype=object`` to return an ndarray of pandas :class:`Timestamp` + objects, each with the correct ``tz``. + + >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) + >>> ser.to_numpy(dtype=object) + array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'), + Timestamp('2000-01-02 00:00:00+0100', tz='CET')], + dtype=object) + + Or ``dtype='datetime64[ns]'`` to return an ndarray of native + datetime64 values. The values are converted to UTC and the timezone + info is dropped. + + >>> ser.to_numpy(dtype="datetime64[ns]") + ... # doctest: +ELLIPSIS + array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'], + dtype='datetime64[ns]') + """ + if is_extension_array_dtype(self.dtype): + return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs) + elif kwargs: + bad_keys = list(kwargs.keys())[0] + raise TypeError( + f"to_numpy() got an unexpected keyword argument '{bad_keys}'" + ) + + result = np.asarray(self._values, dtype=dtype) + # TODO(GH-24345): Avoid potential double copy + if copy or na_value is not lib.no_default: + result = result.copy() + if na_value is not lib.no_default: + result[self.isna()] = na_value + return result + + @property + def empty(self) -> bool: + return not self.size + + def max(self, axis=None, skipna: bool = True, *args, **kwargs): + """ + Return the maximum value of the Index. + + Parameters + ---------- + axis : int, optional + For compatibility with NumPy. Only 0 or None are allowed. + skipna : bool, default True + Exclude NA/null values when showing the result. + *args, **kwargs + Additional arguments and keywords for compatibility with NumPy. + + Returns + ------- + scalar + Maximum value. + + See Also + -------- + Index.min : Return the minimum value in an Index. + Series.max : Return the maximum value in a Series. + DataFrame.max : Return the maximum values in a DataFrame. + + Examples + -------- + >>> idx = pd.Index([3, 2, 1]) + >>> idx.max() + 3 + + >>> idx = pd.Index(['c', 'b', 'a']) + >>> idx.max() + 'c' + + For a MultiIndex, the maximum is determined lexicographically. + + >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)]) + >>> idx.max() + ('b', 2) + """ + nv.validate_minmax_axis(axis) + nv.validate_max(args, kwargs) + return nanops.nanmax(self._values, skipna=skipna) + + @doc(op="max", oppose="min", value="largest") + def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + """ + Return int position of the {value} value in the Series. + + If the {op}imum is achieved in multiple locations, + the first row position is returned. + + Parameters + ---------- + axis : {{None}} + Dummy argument for consistency with Series. + skipna : bool, default True + Exclude NA/null values when showing the result. + *args, **kwargs + Additional arguments and keywords for compatibility with NumPy. + + Returns + ------- + int + Row position of the {op}imum value. + + See Also + -------- + Series.arg{op} : Return position of the {op}imum value. + Series.arg{oppose} : Return position of the {oppose}imum value. + numpy.ndarray.arg{op} : Equivalent method for numpy arrays. + Series.idxmax : Return index label of the maximum values. + Series.idxmin : Return index label of the minimum values. + + Examples + -------- + Consider dataset containing cereal calories + + >>> s = pd.Series({{'Corn Flakes': 100.0, 'Almond Delight': 110.0, + ... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0}}) + >>> s + Corn Flakes 100.0 + Almond Delight 110.0 + Cinnamon Toast Crunch 120.0 + Cocoa Puff 110.0 + dtype: float64 + + >>> s.argmax() + 2 + >>> s.argmin() + 0 + + The maximum cereal calories is the third element and + the minimum cereal calories is the first element, + since series is zero-indexed. + """ + delegate = self._values + nv.validate_minmax_axis(axis) + skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs) + + if isinstance(delegate, ExtensionArray): + if not skipna and delegate.isna().any(): + return -1 + else: + return delegate.argmax() + else: + # error: Incompatible return value type (got "Union[int, ndarray]", expected + # "int") + return nanops.nanargmax( # type: ignore[return-value] + delegate, skipna=skipna + ) + + def min(self, axis=None, skipna: bool = True, *args, **kwargs): + """ + Return the minimum value of the Index. + + Parameters + ---------- + axis : {None} + Dummy argument for consistency with Series. + skipna : bool, default True + Exclude NA/null values when showing the result. + *args, **kwargs + Additional arguments and keywords for compatibility with NumPy. + + Returns + ------- + scalar + Minimum value. + + See Also + -------- + Index.max : Return the maximum value of the object. + Series.min : Return the minimum value in a Series. + DataFrame.min : Return the minimum values in a DataFrame. + + Examples + -------- + >>> idx = pd.Index([3, 2, 1]) + >>> idx.min() + 1 + + >>> idx = pd.Index(['c', 'b', 'a']) + >>> idx.min() + 'a' + + For a MultiIndex, the minimum is determined lexicographically. + + >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)]) + >>> idx.min() + ('a', 1) + """ + nv.validate_minmax_axis(axis) + nv.validate_min(args, kwargs) + return nanops.nanmin(self._values, skipna=skipna) + + @doc(argmax, op="min", oppose="max", value="smallest") + def argmin(self, axis=None, skipna=True, *args, **kwargs) -> int: + delegate = self._values + nv.validate_minmax_axis(axis) + skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs) + + if isinstance(delegate, ExtensionArray): + if not skipna and delegate.isna().any(): + return -1 + else: + return delegate.argmin() + else: + # error: Incompatible return value type (got "Union[int, ndarray]", expected + # "int") + return nanops.nanargmin( # type: ignore[return-value] + delegate, skipna=skipna + ) + + def tolist(self): + """ + Return a list of the values. + + These are each a scalar type, which is a Python scalar + (for str, int, float) or a pandas scalar + (for Timestamp/Timedelta/Interval/Period) + + Returns + ------- + list + + See Also + -------- + numpy.ndarray.tolist : Return the array as an a.ndim-levels deep + nested list of Python scalars. + """ + return self._values.tolist() + + to_list = tolist + + def __iter__(self): + """ + Return an iterator of the values. + + These are each a scalar type, which is a Python scalar + (for str, int, float) or a pandas scalar + (for Timestamp/Timedelta/Interval/Period) + + Returns + ------- + iterator + """ + # We are explicitly making element iterators. + if not isinstance(self._values, np.ndarray): + # Check type instead of dtype to catch DTA/TDA + return iter(self._values) + else: + return map(self._values.item, range(self._values.size)) + + @cache_readonly + def hasnans(self) -> bool: + """ + Return True if there are any NaNs. + + Enables various performance speedups. + """ + return bool(isna(self).any()) + + def isna(self): + return isna(self._values) + + def _reduce( + self, + op, + name: str, + *, + axis=0, + skipna=True, + numeric_only=None, + filter_type=None, + **kwds, + ): + """ + Perform the reduction type operation if we can. + """ + func = getattr(self, name, None) + if func is None: + raise TypeError( + f"{type(self).__name__} cannot perform the operation {name}" + ) + return func(skipna=skipna, **kwds) + + @final + def _map_values(self, mapper, na_action=None): + """ + An internal function that maps values using the input + correspondence (which can be a dict, Series, or function). + + Parameters + ---------- + mapper : function, dict, or Series + The input correspondence object + na_action : {None, 'ignore'} + If 'ignore', propagate NA values, without passing them to the + mapping function + + Returns + ------- + Union[Index, MultiIndex], inferred + The output of the mapping function applied to the index. + If the function returns a tuple with more than one element + a MultiIndex will be returned. + """ + # we can fastpath dict/Series to an efficient map + # as we know that we are not going to have to yield + # python types + if is_dict_like(mapper): + if isinstance(mapper, dict) and hasattr(mapper, "__missing__"): + # If a dictionary subclass defines a default value method, + # convert mapper to a lookup function (GH #15999). + dict_with_default = mapper + mapper = lambda x: dict_with_default[x] + else: + # Dictionary does not have a default. Thus it's safe to + # convert to an Series for efficiency. + # we specify the keys here to handle the + # possibility that they are tuples + + # The return value of mapping with an empty mapper is + # expected to be pd.Series(np.nan, ...). As np.nan is + # of dtype float64 the return value of this method should + # be float64 as well + mapper = create_series_with_explicit_dtype( + mapper, dtype_if_empty=np.float64 + ) + + if isinstance(mapper, ABCSeries): + # Since values were input this means we came from either + # a dict or a series and mapper should be an index + if is_categorical_dtype(self.dtype): + # use the built in categorical series mapper which saves + # time by mapping the categories instead of all values + + cat = cast("Categorical", self._values) + return cat.map(mapper) + + values = self._values + + indexer = mapper.index.get_indexer(values) + new_values = algorithms.take_nd(mapper._values, indexer) + + return new_values + + # we must convert to python types + if is_extension_array_dtype(self.dtype) and hasattr(self._values, "map"): + # GH#23179 some EAs do not have `map` + values = self._values + if na_action is not None: + raise NotImplementedError + map_f = lambda values, f: values.map(f) + else: + values = self._values.astype(object) + if na_action == "ignore": + map_f = lambda values, f: lib.map_infer_mask( + values, f, isna(values).view(np.uint8) + ) + elif na_action is None: + map_f = lib.map_infer + else: + msg = ( + "na_action must either be 'ignore' or None, " + f"{na_action} was passed" + ) + raise ValueError(msg) + + # mapper is a function + new_values = map_f(values, mapper) + + return new_values + + def value_counts( + self, + normalize: bool = False, + sort: bool = True, + ascending: bool = False, + bins=None, + dropna: bool = True, + ): + """ + Return a Series containing counts of unique values. + + The resulting object will be in descending order so that the + first element is the most frequently-occurring element. + Excludes NA values by default. + + Parameters + ---------- + normalize : bool, default False + If True then the object returned will contain the relative + frequencies of the unique values. + sort : bool, default True + Sort by frequencies. + ascending : bool, default False + Sort in ascending order. + bins : int, optional + Rather than count values, group them into half-open bins, + a convenience for ``pd.cut``, only works with numeric data. + dropna : bool, default True + Don't include counts of NaN. + + Returns + ------- + Series + + See Also + -------- + Series.count: Number of non-NA elements in a Series. + DataFrame.count: Number of non-NA elements in a DataFrame. + DataFrame.value_counts: Equivalent method on DataFrames. + + Examples + -------- + >>> index = pd.Index([3, 1, 2, 3, 4, np.nan]) + >>> index.value_counts() + 3.0 2 + 1.0 1 + 2.0 1 + 4.0 1 + dtype: int64 + + With `normalize` set to `True`, returns the relative frequency by + dividing all values by the sum of values. + + >>> s = pd.Series([3, 1, 2, 3, 4, np.nan]) + >>> s.value_counts(normalize=True) + 3.0 0.4 + 1.0 0.2 + 2.0 0.2 + 4.0 0.2 + dtype: float64 + + **bins** + + Bins can be useful for going from a continuous variable to a + categorical variable; instead of counting unique + apparitions of values, divide the index in the specified + number of half-open bins. + + >>> s.value_counts(bins=3) + (0.996, 2.0] 2 + (2.0, 3.0] 2 + (3.0, 4.0] 1 + dtype: int64 + + **dropna** + + With `dropna` set to `False` we can also see NaN index values. + + >>> s.value_counts(dropna=False) + 3.0 2 + 1.0 1 + 2.0 1 + 4.0 1 + NaN 1 + dtype: int64 + """ + return value_counts( + self, + sort=sort, + ascending=ascending, + normalize=normalize, + bins=bins, + dropna=dropna, + ) + + def unique(self): + values = self._values + + if not isinstance(values, np.ndarray): + result: ArrayLike = values.unique() + if self.dtype.kind in ["m", "M"] and isinstance(self, ABCSeries): + # GH#31182 Series._values returns EA, unpack for backward-compat + if getattr(self.dtype, "tz", None) is None: + result = np.asarray(result) + else: + result = unique1d(values) + + return result + + def nunique(self, dropna: bool = True) -> int: + """ + Return number of unique elements in the object. + + Excludes NA values by default. + + Parameters + ---------- + dropna : bool, default True + Don't include NaN in the count. + + Returns + ------- + int + + See Also + -------- + DataFrame.nunique: Method nunique for DataFrame. + Series.count: Count non-NA/null observations in the Series. + + Examples + -------- + >>> s = pd.Series([1, 3, 5, 7, 7]) + >>> s + 0 1 + 1 3 + 2 5 + 3 7 + 4 7 + dtype: int64 + + >>> s.nunique() + 4 + """ + uniqs = self.unique() + if dropna: + uniqs = remove_na_arraylike(uniqs) + return len(uniqs) + + @property + def is_unique(self) -> bool: + """ + Return boolean if values in the object are unique. + + Returns + ------- + bool + """ + return self.nunique(dropna=False) == len(self) + + @property + def is_monotonic(self) -> bool: + """ + Return boolean if values in the object are + monotonic_increasing. + + Returns + ------- + bool + """ + from pandas import Index + + return Index(self).is_monotonic + + @property + def is_monotonic_increasing(self) -> bool: + """ + Alias for is_monotonic. + """ + # mypy complains if we alias directly + return self.is_monotonic + + @property + def is_monotonic_decreasing(self) -> bool: + """ + Return boolean if values in the object are + monotonic_decreasing. + + Returns + ------- + bool + """ + from pandas import Index + + return Index(self).is_monotonic_decreasing + + def _memory_usage(self, deep: bool = False) -> int: + """ + Memory usage of the values. + + Parameters + ---------- + deep : bool, default False + Introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption. + + Returns + ------- + bytes used + + See Also + -------- + numpy.ndarray.nbytes : Total bytes consumed by the elements of the + array. + + Notes + ----- + Memory usage does not include memory consumed by elements that + are not components of the array if deep=False or if used on PyPy + """ + if hasattr(self.array, "memory_usage"): + # https://github.com/python/mypy/issues/1424 + # error: "ExtensionArray" has no attribute "memory_usage" + return self.array.memory_usage(deep=deep) # type: ignore[attr-defined] + + v = self.array.nbytes + if deep and is_object_dtype(self) and not PYPY: + values = cast(np.ndarray, self._values) + v += lib.memory_usage_of_objects(values) + return v + + @doc( + algorithms.factorize, + values="", + order="", + size_hint="", + sort=textwrap.dedent( + """\ + sort : bool, default False + Sort `uniques` and shuffle `codes` to maintain the + relationship. + """ + ), + ) + def factorize(self, sort: bool = False, na_sentinel: int | None = -1): + return algorithms.factorize(self, sort=sort, na_sentinel=na_sentinel) + + _shared_docs[ + "searchsorted" + ] = """ + Find indices where elements should be inserted to maintain order. + + Find the indices into a sorted {klass} `self` such that, if the + corresponding elements in `value` were inserted before the indices, + the order of `self` would be preserved. + + .. note:: + + The {klass} *must* be monotonically sorted, otherwise + wrong locations will likely be returned. Pandas does *not* + check this for you. + + Parameters + ---------- + value : array-like or scalar + Values to insert into `self`. + side : {{'left', 'right'}}, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable + index, return either 0 or N (where N is the length of `self`). + sorter : 1-D array-like, optional + Optional array of integer indices that sort `self` into ascending + order. They are typically the result of ``np.argsort``. + + Returns + ------- + int or array of int + A scalar or array of insertion points with the + same shape as `value`. + + See Also + -------- + sort_values : Sort by the values along either axis. + numpy.searchsorted : Similar method from NumPy. + + Notes + ----- + Binary search is used to find the required insertion points. + + Examples + -------- + >>> ser = pd.Series([1, 2, 3]) + >>> ser + 0 1 + 1 2 + 2 3 + dtype: int64 + + >>> ser.searchsorted(4) + 3 + + >>> ser.searchsorted([0, 4]) + array([0, 3]) + + >>> ser.searchsorted([1, 3], side='left') + array([0, 2]) + + >>> ser.searchsorted([1, 3], side='right') + array([1, 3]) + + >>> ser = pd.Series(pd.to_datetime(['3/11/2000', '3/12/2000', '3/13/2000'])) + >>> ser + 0 2000-03-11 + 1 2000-03-12 + 2 2000-03-13 + dtype: datetime64[ns] + + >>> ser.searchsorted('3/14/2000') + 3 + + >>> ser = pd.Categorical( + ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True + ... ) + >>> ser + ['apple', 'bread', 'bread', 'cheese', 'milk'] + Categories (4, object): ['apple' < 'bread' < 'cheese' < 'milk'] + + >>> ser.searchsorted('bread') + 1 + + >>> ser.searchsorted(['bread'], side='right') + array([3]) + + If the values are not monotonically sorted, wrong locations + may be returned: + + >>> ser = pd.Series([2, 1, 3]) + >>> ser + 0 2 + 1 1 + 2 3 + dtype: int64 + + >>> ser.searchsorted(1) # doctest: +SKIP + 0 # wrong result, correct would be 1 + """ + + # This overload is needed so that the call to searchsorted in + # pandas.core.resample.TimeGrouper._get_period_bins picks the correct result + + @overload + # The following ignore is also present in numpy/__init__.pyi + # Possibly a mypy bug?? + # error: Overloaded function signatures 1 and 2 overlap with incompatible + # return types [misc] + def searchsorted( # type: ignore[misc] + self, + value: npt._ScalarLike_co, + side: Literal["left", "right"] = ..., + sorter: NumpySorter = ..., + ) -> np.intp: + ... + + @overload + def searchsorted( + self, + value: npt.ArrayLike | ExtensionArray, + side: Literal["left", "right"] = ..., + sorter: NumpySorter = ..., + ) -> npt.NDArray[np.intp]: + ... + + @doc(_shared_docs["searchsorted"], klass="Index") + def searchsorted( + self, + value: NumpyValueArrayLike | ExtensionArray, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> npt.NDArray[np.intp] | np.intp: + + values = self._values + if not isinstance(values, np.ndarray): + # Going through EA.searchsorted directly improves performance GH#38083 + return values.searchsorted(value, side=side, sorter=sorter) + + return algorithms.searchsorted( + values, + value, + side=side, + sorter=sorter, + ) + + def drop_duplicates(self, keep="first"): + duplicated = self._duplicated(keep=keep) + # error: Value of type "IndexOpsMixin" is not indexable + return self[~duplicated] # type: ignore[index] + + @final + def _duplicated( + self, keep: Literal["first", "last", False] = "first" + ) -> npt.NDArray[np.bool_]: + return duplicated(self._values, keep=keep) + + def _arith_method(self, other, op): + res_name = ops.get_op_result_name(self, other) + + lvalues = self._values + rvalues = extract_array(other, extract_numpy=True, extract_range=True) + rvalues = ops.maybe_prepare_scalar_for_op(rvalues, lvalues.shape) + rvalues = ensure_wrapped_if_datetimelike(rvalues) + + with np.errstate(all="ignore"): + result = ops.arithmetic_op(lvalues, rvalues, op) + + return self._construct_result(result, name=res_name) + + def _construct_result(self, result, name): + """ + Construct an appropriately-wrapped result from the ArrayLike result + of an arithmetic-like operation. + """ + raise AbstractMethodError(self) diff --git a/.local/lib/python3.8/site-packages/pandas/core/common.py b/.local/lib/python3.8/site-packages/pandas/core/common.py new file mode 100644 index 0000000..e398923 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/common.py @@ -0,0 +1,634 @@ +""" +Misc tools for implementing data structures + +Note: pandas.core.common is *not* part of the public API. +""" +from __future__ import annotations + +import builtins +from collections import ( + abc, + defaultdict, +) +import contextlib +from functools import partial +import inspect +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Collection, + Hashable, + Iterable, + Iterator, + Sequence, + cast, + overload, +) +import warnings + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + NpDtype, + RandomState, + Scalar, + T, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_extension_array_dtype, + is_integer, +) +from pandas.core.dtypes.generic import ( + ABCExtensionArray, + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.inference import iterable_not_string +from pandas.core.dtypes.missing import isna + +if TYPE_CHECKING: + from pandas import Index + + +class SettingWithCopyError(ValueError): + pass + + +class SettingWithCopyWarning(Warning): + pass + + +def flatten(line): + """ + Flatten an arbitrarily nested sequence. + + Parameters + ---------- + line : sequence + The non string sequence to flatten + + Notes + ----- + This doesn't consider strings sequences. + + Returns + ------- + flattened : generator + """ + for element in line: + if iterable_not_string(element): + yield from flatten(element) + else: + yield element + + +def consensus_name_attr(objs): + name = objs[0].name + for obj in objs[1:]: + try: + if obj.name != name: + name = None + except ValueError: + name = None + return name + + +def is_bool_indexer(key: Any) -> bool: + """ + Check whether `key` is a valid boolean indexer. + + Parameters + ---------- + key : Any + Only list-likes may be considered boolean indexers. + All other types are not considered a boolean indexer. + For array-like input, boolean ndarrays or ExtensionArrays + with ``_is_boolean`` set are considered boolean indexers. + + Returns + ------- + bool + Whether `key` is a valid boolean indexer. + + Raises + ------ + ValueError + When the array is an object-dtype ndarray or ExtensionArray + and contains missing values. + + See Also + -------- + check_array_indexer : Check that `key` is a valid array to index, + and convert to an ndarray. + """ + if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or ( + is_array_like(key) and is_extension_array_dtype(key.dtype) + ): + if key.dtype == np.object_: + key = np.asarray(key) + + if not lib.is_bool_array(key): + na_msg = "Cannot mask with non-boolean array containing NA / NaN values" + if lib.infer_dtype(key) == "boolean" and isna(key).any(): + # Don't raise on e.g. ["A", "B", np.nan], see + # test_loc_getitem_list_of_labels_categoricalindex_with_na + raise ValueError(na_msg) + return False + return True + elif is_bool_dtype(key.dtype): + return True + elif isinstance(key, list): + # check if np.array(key).dtype would be bool + if len(key) > 0: + if type(key) is not list: + # GH#42461 cython will raise TypeError if we pass a subclass + key = list(key) + return lib.is_bool_list(key) + + return False + + +def cast_scalar_indexer(val, warn_float: bool = False): + """ + To avoid numpy DeprecationWarnings, cast float to integer where valid. + + Parameters + ---------- + val : scalar + warn_float : bool, default False + If True, issue deprecation warning for a float indexer. + + Returns + ------- + outval : scalar + """ + # assumes lib.is_scalar(val) + if lib.is_float(val) and val.is_integer(): + if warn_float: + warnings.warn( + "Indexing with a float is deprecated, and will raise an IndexError " + "in pandas 2.0. You can manually convert to an integer key instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return int(val) + return val + + +def not_none(*args): + """ + Returns a generator consisting of the arguments that are not None. + """ + return (arg for arg in args if arg is not None) + + +def any_none(*args) -> bool: + """ + Returns a boolean indicating if any argument is None. + """ + return any(arg is None for arg in args) + + +def all_none(*args) -> bool: + """ + Returns a boolean indicating if all arguments are None. + """ + return all(arg is None for arg in args) + + +def any_not_none(*args) -> bool: + """ + Returns a boolean indicating if any argument is not None. + """ + return any(arg is not None for arg in args) + + +def all_not_none(*args) -> bool: + """ + Returns a boolean indicating if all arguments are not None. + """ + return all(arg is not None for arg in args) + + +def count_not_none(*args) -> int: + """ + Returns the count of arguments that are not None. + """ + return sum(x is not None for x in args) + + +def asarray_tuplesafe(values, dtype: NpDtype | None = None) -> np.ndarray: + + if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")): + values = list(values) + elif isinstance(values, ABCIndex): + # error: Incompatible return value type (got "Union[ExtensionArray, ndarray]", + # expected "ndarray") + return values._values # type: ignore[return-value] + + if isinstance(values, list) and dtype in [np.object_, object]: + return construct_1d_object_array_from_listlike(values) + + result = np.asarray(values, dtype=dtype) + + if issubclass(result.dtype.type, str): + result = np.asarray(values, dtype=object) + + if result.ndim == 2: + # Avoid building an array of arrays: + values = [tuple(x) for x in values] + result = construct_1d_object_array_from_listlike(values) + + return result + + +def index_labels_to_array(labels, dtype: NpDtype | None = None) -> np.ndarray: + """ + Transform label or iterable of labels to array, for use in Index. + + Parameters + ---------- + dtype : dtype + If specified, use as dtype of the resulting array, otherwise infer. + + Returns + ------- + array + """ + if isinstance(labels, (str, tuple)): + labels = [labels] + + if not isinstance(labels, (list, np.ndarray)): + try: + labels = list(labels) + except TypeError: # non-iterable + labels = [labels] + + labels = asarray_tuplesafe(labels, dtype=dtype) + + return labels + + +def maybe_make_list(obj): + if obj is not None and not isinstance(obj, (tuple, list)): + return [obj] + return obj + + +def maybe_iterable_to_list(obj: Iterable[T] | T) -> Collection[T] | T: + """ + If obj is Iterable but not list-like, consume into list. + """ + if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized): + return list(obj) + obj = cast(Collection, obj) + return obj + + +def is_null_slice(obj) -> bool: + """ + We have a null slice. + """ + return ( + isinstance(obj, slice) + and obj.start is None + and obj.stop is None + and obj.step is None + ) + + +def is_true_slices(line) -> list[bool]: + """ + Find non-trivial slices in "line": return a list of booleans with same length. + """ + return [isinstance(k, slice) and not is_null_slice(k) for k in line] + + +# TODO: used only once in indexing; belongs elsewhere? +def is_full_slice(obj, line: int) -> bool: + """ + We have a full length slice. + """ + return ( + isinstance(obj, slice) + and obj.start == 0 + and obj.stop == line + and obj.step is None + ) + + +def get_callable_name(obj): + # typical case has name + if hasattr(obj, "__name__"): + return getattr(obj, "__name__") + # some objects don't; could recurse + if isinstance(obj, partial): + return get_callable_name(obj.func) + # fall back to class name + if callable(obj): + return type(obj).__name__ + # everything failed (probably because the argument + # wasn't actually callable); we return None + # instead of the empty string in this case to allow + # distinguishing between no name and a name of '' + return None + + +def apply_if_callable(maybe_callable, obj, **kwargs): + """ + Evaluate possibly callable input using obj and kwargs if it is callable, + otherwise return as it is. + + Parameters + ---------- + maybe_callable : possibly a callable + obj : NDFrame + **kwargs + """ + if callable(maybe_callable): + return maybe_callable(obj, **kwargs) + + return maybe_callable + + +def standardize_mapping(into): + """ + Helper function to standardize a supplied mapping. + + Parameters + ---------- + into : instance or subclass of collections.abc.Mapping + Must be a class, an initialized collections.defaultdict, + or an instance of a collections.abc.Mapping subclass. + + Returns + ------- + mapping : a collections.abc.Mapping subclass or other constructor + a callable object that can accept an iterator to create + the desired Mapping. + + See Also + -------- + DataFrame.to_dict + Series.to_dict + """ + if not inspect.isclass(into): + if isinstance(into, defaultdict): + return partial(defaultdict, into.default_factory) + into = type(into) + if not issubclass(into, abc.Mapping): + raise TypeError(f"unsupported type: {into}") + elif into == defaultdict: + raise TypeError("to_dict() only accepts initialized defaultdicts") + return into + + +@overload +def random_state(state: np.random.Generator) -> np.random.Generator: + ... + + +@overload +def random_state( + state: int | ArrayLike | np.random.BitGenerator | np.random.RandomState | None, +) -> np.random.RandomState: + ... + + +def random_state(state: RandomState | None = None): + """ + Helper function for processing random_state arguments. + + Parameters + ---------- + state : int, array-like, BitGenerator, Generator, np.random.RandomState, None. + If receives an int, array-like, or BitGenerator, passes to + np.random.RandomState() as seed. + If receives an np.random RandomState or Generator, just returns that unchanged. + If receives `None`, returns np.random. + If receives anything else, raises an informative ValueError. + + .. versionchanged:: 1.1.0 + + array-like and BitGenerator object now passed to np.random.RandomState() + as seed + + Default None. + + Returns + ------- + np.random.RandomState or np.random.Generator. If state is None, returns np.random + + """ + if ( + is_integer(state) + or is_array_like(state) + or isinstance(state, np.random.BitGenerator) + ): + # error: Argument 1 to "RandomState" has incompatible type "Optional[Union[int, + # Union[ExtensionArray, ndarray[Any, Any]], Generator, RandomState]]"; expected + # "Union[None, Union[Union[_SupportsArray[dtype[Union[bool_, integer[Any]]]], + # Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]], + # Sequence[Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]]], + # Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_, + # integer[Any]]]]]]], + # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_, + # integer[Any]]]]]]]]], Union[bool, int, Sequence[Union[bool, int]], + # Sequence[Sequence[Union[bool, int]]], Sequence[Sequence[Sequence[Union[bool, + # int]]]], Sequence[Sequence[Sequence[Sequence[Union[bool, int]]]]]]], + # BitGenerator]" + return np.random.RandomState(state) # type: ignore[arg-type] + elif isinstance(state, np.random.RandomState): + return state + elif isinstance(state, np.random.Generator): + return state + elif state is None: + return np.random + else: + raise ValueError( + "random_state must be an integer, array-like, a BitGenerator, Generator, " + "a numpy RandomState, or None" + ) + + +def pipe( + obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs +) -> T: + """ + Apply a function ``func`` to object ``obj`` either by passing obj as the + first argument to the function or, in the case that the func is a tuple, + interpret the first element of the tuple as a function and pass the obj to + that function as a keyword argument whose key is the value of the second + element of the tuple. + + Parameters + ---------- + func : callable or tuple of (callable, str) + Function to apply to this object or, alternatively, a + ``(callable, data_keyword)`` tuple where ``data_keyword`` is a + string indicating the keyword of `callable`` that expects the + object. + *args : iterable, optional + Positional arguments passed into ``func``. + **kwargs : dict, optional + A dictionary of keyword arguments passed into ``func``. + + Returns + ------- + object : the return type of ``func``. + """ + if isinstance(func, tuple): + func, target = func + if target in kwargs: + msg = f"{target} is both the pipe target and a keyword argument" + raise ValueError(msg) + kwargs[target] = obj + return func(*args, **kwargs) + else: + return func(obj, *args, **kwargs) + + +def get_rename_function(mapper): + """ + Returns a function that will map names/labels, dependent if mapper + is a dict, Series or just a function. + """ + if isinstance(mapper, (abc.Mapping, ABCSeries)): + + def f(x): + if x in mapper: + return mapper[x] + else: + return x + + else: + f = mapper + + return f + + +def convert_to_list_like( + values: Scalar | Iterable | AnyArrayLike, +) -> list | AnyArrayLike: + """ + Convert list-like or scalar input to list-like. List, numpy and pandas array-like + inputs are returned unmodified whereas others are converted to list. + """ + if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)): + return values + elif isinstance(values, abc.Iterable) and not isinstance(values, str): + return list(values) + + return [values] + + +@contextlib.contextmanager +def temp_setattr(obj, attr: str, value) -> Iterator[None]: + """Temporarily set attribute on an object. + + Args: + obj: Object whose attribute will be modified. + attr: Attribute to modify. + value: Value to temporarily set attribute to. + + Yields: + obj with modified attribute. + """ + old_value = getattr(obj, attr) + setattr(obj, attr, value) + yield obj + setattr(obj, attr, old_value) + + +def require_length_match(data, index: Index): + """ + Check the length of data matches the length of the index. + """ + if len(data) != len(index): + raise ValueError( + "Length of values " + f"({len(data)}) " + "does not match length of index " + f"({len(index)})" + ) + + +# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0, +# whereas np.min and np.max (which directly call obj.min and obj.max) +# default to axis=None. +_builtin_table = { + builtins.sum: np.sum, + builtins.max: np.maximum.reduce, + builtins.min: np.minimum.reduce, +} + +_cython_table = { + builtins.sum: "sum", + builtins.max: "max", + builtins.min: "min", + np.all: "all", + np.any: "any", + np.sum: "sum", + np.nansum: "sum", + np.mean: "mean", + np.nanmean: "mean", + np.prod: "prod", + np.nanprod: "prod", + np.std: "std", + np.nanstd: "std", + np.var: "var", + np.nanvar: "var", + np.median: "median", + np.nanmedian: "median", + np.max: "max", + np.nanmax: "max", + np.min: "min", + np.nanmin: "min", + np.cumprod: "cumprod", + np.nancumprod: "cumprod", + np.cumsum: "cumsum", + np.nancumsum: "cumsum", +} + + +def get_cython_func(arg: Callable) -> str | None: + """ + if we define an internal function for this argument, return it + """ + return _cython_table.get(arg) + + +def is_builtin_func(arg): + """ + if we define an builtin function for this argument, return it, + otherwise return the arg + """ + return _builtin_table.get(arg, arg) + + +def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]: + """ + If a name is missing then replace it by level_n, where n is the count + + .. versionadded:: 1.4.0 + + Parameters + ---------- + names : list-like + list of column names or None values. + + Returns + ------- + list + list of column names with the None values replaced. + """ + return [f"level_{i}" if name is None else name for i, name in enumerate(names)] diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/computation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..062b5df Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/align.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/align.cpython-38.pyc new file mode 100644 index 0000000..7c613e3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/align.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/api.cpython-38.pyc new file mode 100644 index 0000000..740e926 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/check.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/check.cpython-38.pyc new file mode 100644 index 0000000..8e1436a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/check.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..9feb8da Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/engines.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/engines.cpython-38.pyc new file mode 100644 index 0000000..77713f2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/engines.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/eval.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/eval.cpython-38.pyc new file mode 100644 index 0000000..108d9d9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/eval.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/expr.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/expr.cpython-38.pyc new file mode 100644 index 0000000..311594e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/expr.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/expressions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/expressions.cpython-38.pyc new file mode 100644 index 0000000..d2a0ef1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/expressions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/ops.cpython-38.pyc new file mode 100644 index 0000000..151f330 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/parsing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/parsing.cpython-38.pyc new file mode 100644 index 0000000..8f63db8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/parsing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/pytables.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/pytables.cpython-38.pyc new file mode 100644 index 0000000..032a763 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/pytables.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/scope.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/scope.cpython-38.pyc new file mode 100644 index 0000000..f95dc0f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/computation/__pycache__/scope.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/align.py b/.local/lib/python3.8/site-packages/pandas/core/computation/align.py new file mode 100644 index 0000000..f148822 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/computation/align.py @@ -0,0 +1,211 @@ +""" +Core eval alignment algorithms. +""" +from __future__ import annotations + +from functools import ( + partial, + wraps, +) +from typing import ( + TYPE_CHECKING, + Sequence, +) +import warnings + +import numpy as np + +from pandas.errors import PerformanceWarning +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +from pandas.core.base import PandasObject +import pandas.core.common as com +from pandas.core.computation.common import result_type_many + +if TYPE_CHECKING: + from pandas.core.generic import NDFrame + from pandas.core.indexes.api import Index + + +def _align_core_single_unary_op( + term, +) -> tuple[partial | type[NDFrame], dict[str, Index] | None]: + + typ: partial | type[NDFrame] + axes: dict[str, Index] | None = None + + if isinstance(term.value, np.ndarray): + typ = partial(np.asanyarray, dtype=term.value.dtype) + else: + typ = type(term.value) + if hasattr(term.value, "axes"): + axes = _zip_axes_from_type(typ, term.value.axes) + + return typ, axes + + +def _zip_axes_from_type( + typ: type[NDFrame], new_axes: Sequence[Index] +) -> dict[str, Index]: + return {name: new_axes[i] for i, name in enumerate(typ._AXIS_ORDERS)} + + +def _any_pandas_objects(terms) -> bool: + """ + Check a sequence of terms for instances of PandasObject. + """ + return any(isinstance(term.value, PandasObject) for term in terms) + + +def _filter_special_cases(f): + @wraps(f) + def wrapper(terms): + # single unary operand + if len(terms) == 1: + return _align_core_single_unary_op(terms[0]) + + term_values = (term.value for term in terms) + + # we don't have any pandas objects + if not _any_pandas_objects(terms): + return result_type_many(*term_values), None + + return f(terms) + + return wrapper + + +@_filter_special_cases +def _align_core(terms): + term_index = [i for i, term in enumerate(terms) if hasattr(term.value, "axes")] + term_dims = [terms[i].value.ndim for i in term_index] + + from pandas import Series + + ndims = Series(dict(zip(term_index, term_dims))) + + # initial axes are the axes of the largest-axis'd term + biggest = terms[ndims.idxmax()].value + typ = biggest._constructor + axes = biggest.axes + naxes = len(axes) + gt_than_one_axis = naxes > 1 + + for value in (terms[i].value for i in term_index): + is_series = isinstance(value, ABCSeries) + is_series_and_gt_one_axis = is_series and gt_than_one_axis + + for axis, items in enumerate(value.axes): + if is_series_and_gt_one_axis: + ax, itm = naxes - 1, value.index + else: + ax, itm = axis, items + + if not axes[ax].is_(itm): + axes[ax] = axes[ax].join(itm, how="outer") + + for i, ndim in ndims.items(): + for axis, items in zip(range(ndim), axes): + ti = terms[i].value + + if hasattr(ti, "reindex"): + transpose = isinstance(ti, ABCSeries) and naxes > 1 + reindexer = axes[naxes - 1] if transpose else items + + term_axis_size = len(ti.axes[axis]) + reindexer_size = len(reindexer) + + ordm = np.log10(max(1, abs(reindexer_size - term_axis_size))) + if ordm >= 1 and reindexer_size >= 10000: + w = ( + f"Alignment difference on axis {axis} is larger " + f"than an order of magnitude on term {repr(terms[i].name)}, " + f"by more than {ordm:.4g}; performance may suffer." + ) + warnings.warn( + w, category=PerformanceWarning, stacklevel=find_stack_level() + ) + + f = partial(ti.reindex, reindexer, axis=axis, copy=False) + + terms[i].update(f()) + + terms[i].update(terms[i].value.values) + + return typ, _zip_axes_from_type(typ, axes) + + +def align_terms(terms): + """ + Align a set of terms. + """ + try: + # flatten the parse tree (a nested list, really) + terms = list(com.flatten(terms)) + except TypeError: + # can't iterate so it must just be a constant or single variable + if isinstance(terms.value, (ABCSeries, ABCDataFrame)): + typ = type(terms.value) + return typ, _zip_axes_from_type(typ, terms.value.axes) + return np.result_type(terms.type), None + + # if all resolved variables are numeric scalars + if all(term.is_scalar for term in terms): + return result_type_many(*(term.value for term in terms)).type, None + + # perform the main alignment + typ, axes = _align_core(terms) + return typ, axes + + +def reconstruct_object(typ, obj, axes, dtype): + """ + Reconstruct an object given its type, raw value, and possibly empty + (None) axes. + + Parameters + ---------- + typ : object + A type + obj : object + The value to use in the type constructor + axes : dict + The axes to use to construct the resulting pandas object + + Returns + ------- + ret : typ + An object of type ``typ`` with the value `obj` and possible axes + `axes`. + """ + try: + typ = typ.type + except AttributeError: + pass + + res_t = np.result_type(obj.dtype, dtype) + + if not isinstance(typ, partial) and issubclass(typ, PandasObject): + return typ(obj, dtype=res_t, **axes) + + # special case for pathological things like ~True/~False + if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_: + ret_value = res_t.type(obj) + else: + ret_value = typ(obj).astype(res_t) + # The condition is to distinguish 0-dim array (returned in case of + # scalar) and 1 element array + # e.g. np.array(0) and np.array([0]) + if ( + len(obj.shape) == 1 + and len(obj) == 1 + and not isinstance(ret_value, np.ndarray) + ): + ret_value = np.array([ret_value]).astype(res_t) + + return ret_value diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/api.py b/.local/lib/python3.8/site-packages/pandas/core/computation/api.py new file mode 100644 index 0000000..bd3be5b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/computation/api.py @@ -0,0 +1,2 @@ +__all__ = ["eval"] +from pandas.core.computation.eval import eval diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/check.py b/.local/lib/python3.8/site-packages/pandas/core/computation/check.py new file mode 100644 index 0000000..7be617d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/computation/check.py @@ -0,0 +1,10 @@ +from pandas.compat._optional import import_optional_dependency + +ne = import_optional_dependency("numexpr", errors="warn") +NUMEXPR_INSTALLED = ne is not None +if NUMEXPR_INSTALLED: + NUMEXPR_VERSION = ne.__version__ +else: + NUMEXPR_VERSION = None + +__all__ = ["NUMEXPR_INSTALLED", "NUMEXPR_VERSION"] diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/common.py b/.local/lib/python3.8/site-packages/pandas/core/computation/common.py new file mode 100644 index 0000000..8a9583c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/computation/common.py @@ -0,0 +1,26 @@ +from functools import reduce + +import numpy as np + +from pandas._config import get_option + + +def ensure_decoded(s): + """ + If we have bytes, decode them to unicode. + """ + if isinstance(s, (np.bytes_, bytes)): + s = s.decode(get_option("display.encoding")) + return s + + +def result_type_many(*arrays_and_dtypes): + """ + Wrapper around numpy.result_type which overcomes the NPY_MAXARGS (32) + argument limit. + """ + try: + return np.result_type(*arrays_and_dtypes) + except ValueError: + # we have > NPY_MAXARGS terms in our expression + return reduce(np.result_type, arrays_and_dtypes) diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/engines.py b/.local/lib/python3.8/site-packages/pandas/core/computation/engines.py new file mode 100644 index 0000000..ec3548c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/computation/engines.py @@ -0,0 +1,143 @@ +""" +Engine classes for :func:`~pandas.eval` +""" +from __future__ import annotations + +import abc + +from pandas.core.computation.align import ( + align_terms, + reconstruct_object, +) +from pandas.core.computation.expr import Expr +from pandas.core.computation.ops import ( + MATHOPS, + REDUCTIONS, +) + +import pandas.io.formats.printing as printing + +_ne_builtins = frozenset(MATHOPS + REDUCTIONS) + + +class NumExprClobberingError(NameError): + pass + + +def _check_ne_builtin_clash(expr: Expr) -> None: + """ + Attempt to prevent foot-shooting in a helpful way. + + Parameters + ---------- + expr : Expr + Terms can contain + """ + names = expr.names + overlap = names & _ne_builtins + + if overlap: + s = ", ".join([repr(x) for x in overlap]) + raise NumExprClobberingError( + f'Variables in expression "{expr}" overlap with builtins: ({s})' + ) + + +class AbstractEngine(metaclass=abc.ABCMeta): + """Object serving as a base class for all engines.""" + + has_neg_frac = False + + def __init__(self, expr): + self.expr = expr + self.aligned_axes = None + self.result_type = None + + def convert(self) -> str: + """ + Convert an expression for evaluation. + + Defaults to return the expression as a string. + """ + return printing.pprint_thing(self.expr) + + def evaluate(self) -> object: + """ + Run the engine on the expression. + + This method performs alignment which is necessary no matter what engine + is being used, thus its implementation is in the base class. + + Returns + ------- + object + The result of the passed expression. + """ + if not self._is_aligned: + self.result_type, self.aligned_axes = align_terms(self.expr.terms) + + # make sure no names in resolvers and locals/globals clash + res = self._evaluate() + return reconstruct_object( + self.result_type, res, self.aligned_axes, self.expr.terms.return_type + ) + + @property + def _is_aligned(self) -> bool: + return self.aligned_axes is not None and self.result_type is not None + + @abc.abstractmethod + def _evaluate(self): + """ + Return an evaluated expression. + + Parameters + ---------- + env : Scope + The local and global environment in which to evaluate an + expression. + + Notes + ----- + Must be implemented by subclasses. + """ + pass + + +class NumExprEngine(AbstractEngine): + """NumExpr engine class""" + + has_neg_frac = True + + def _evaluate(self): + import numexpr as ne + + # convert the expression to a valid numexpr expression + s = self.convert() + + env = self.expr.env + scope = env.full_scope + _check_ne_builtin_clash(self.expr) + return ne.evaluate(s, local_dict=scope) + + +class PythonEngine(AbstractEngine): + """ + Evaluate an expression in Python space. + + Mostly for testing purposes. + """ + + has_neg_frac = False + + def evaluate(self): + return self.expr() + + def _evaluate(self) -> None: + pass + + +ENGINES: dict[str, type[AbstractEngine]] = { + "numexpr": NumExprEngine, + "python": PythonEngine, +} diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/eval.py b/.local/lib/python3.8/site-packages/pandas/core/computation/eval.py new file mode 100644 index 0000000..d82cc37 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/computation/eval.py @@ -0,0 +1,408 @@ +""" +Top level ``eval`` module. +""" +from __future__ import annotations + +import tokenize +import warnings + +from pandas._libs.lib import no_default +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.computation.engines import ENGINES +from pandas.core.computation.expr import ( + PARSERS, + Expr, +) +from pandas.core.computation.ops import BinOp +from pandas.core.computation.parsing import tokenize_string +from pandas.core.computation.scope import ensure_scope + +from pandas.io.formats.printing import pprint_thing + + +def _check_engine(engine: str | None) -> str: + """ + Make sure a valid engine is passed. + + Parameters + ---------- + engine : str + String to validate. + + Raises + ------ + KeyError + * If an invalid engine is passed. + ImportError + * If numexpr was requested but doesn't exist. + + Returns + ------- + str + Engine name. + """ + from pandas.core.computation.check import NUMEXPR_INSTALLED + from pandas.core.computation.expressions import USE_NUMEXPR + + if engine is None: + engine = "numexpr" if USE_NUMEXPR else "python" + + if engine not in ENGINES: + valid_engines = list(ENGINES.keys()) + raise KeyError( + f"Invalid engine '{engine}' passed, valid engines are {valid_engines}" + ) + + # TODO: validate this in a more general way (thinking of future engines + # that won't necessarily be import-able) + # Could potentially be done on engine instantiation + if engine == "numexpr" and not NUMEXPR_INSTALLED: + raise ImportError( + "'numexpr' is not installed or an unsupported version. Cannot use " + "engine='numexpr' for query/eval if 'numexpr' is not installed" + ) + + return engine + + +def _check_parser(parser: str): + """ + Make sure a valid parser is passed. + + Parameters + ---------- + parser : str + + Raises + ------ + KeyError + * If an invalid parser is passed + """ + if parser not in PARSERS: + raise KeyError( + f"Invalid parser '{parser}' passed, valid parsers are {PARSERS.keys()}" + ) + + +def _check_resolvers(resolvers): + if resolvers is not None: + for resolver in resolvers: + if not hasattr(resolver, "__getitem__"): + name = type(resolver).__name__ + raise TypeError( + f"Resolver of type '{name}' does not " + "implement the __getitem__ method" + ) + + +def _check_expression(expr): + """ + Make sure an expression is not an empty string + + Parameters + ---------- + expr : object + An object that can be converted to a string + + Raises + ------ + ValueError + * If expr is an empty string + """ + if not expr: + raise ValueError("expr cannot be an empty string") + + +def _convert_expression(expr) -> str: + """ + Convert an object to an expression. + + This function converts an object to an expression (a unicode string) and + checks to make sure it isn't empty after conversion. This is used to + convert operators to their string representation for recursive calls to + :func:`~pandas.eval`. + + Parameters + ---------- + expr : object + The object to be converted to a string. + + Returns + ------- + str + The string representation of an object. + + Raises + ------ + ValueError + * If the expression is empty. + """ + s = pprint_thing(expr) + _check_expression(s) + return s + + +def _check_for_locals(expr: str, stack_level: int, parser: str): + + at_top_of_stack = stack_level == 0 + not_pandas_parser = parser != "pandas" + + if not_pandas_parser: + msg = "The '@' prefix is only supported by the pandas parser" + elif at_top_of_stack: + msg = ( + "The '@' prefix is not allowed in top-level eval calls.\n" + "please refer to your variables by name without the '@' prefix." + ) + + if at_top_of_stack or not_pandas_parser: + for toknum, tokval in tokenize_string(expr): + if toknum == tokenize.OP and tokval == "@": + raise SyntaxError(msg) + + +def eval( + expr: str | BinOp, # we leave BinOp out of the docstr bc it isn't for users + parser: str = "pandas", + engine: str | None = None, + truediv=no_default, + local_dict=None, + global_dict=None, + resolvers=(), + level=0, + target=None, + inplace=False, +): + """ + Evaluate a Python expression as a string using various backends. + + The following arithmetic operations are supported: ``+``, ``-``, ``*``, + ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following + boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not). + Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`, + :keyword:`or`, and :keyword:`not` with the same semantics as the + corresponding bitwise operators. :class:`~pandas.Series` and + :class:`~pandas.DataFrame` objects are supported and behave as they would + with plain ol' Python evaluation. + + Parameters + ---------- + expr : str + The expression to evaluate. This string cannot contain any Python + `statements + `__, + only Python `expressions + `__. + parser : {'pandas', 'python'}, default 'pandas' + The parser to use to construct the syntax tree from the expression. The + default of ``'pandas'`` parses code slightly different than standard + Python. Alternatively, you can parse an expression using the + ``'python'`` parser to retain strict Python semantics. See the + :ref:`enhancing performance ` documentation for + more details. + engine : {'python', 'numexpr'}, default 'numexpr' + + The engine used to evaluate the expression. Supported engines are + + - None : tries to use ``numexpr``, falls back to ``python`` + - ``'numexpr'``: This default engine evaluates pandas objects using + numexpr for large speed ups in complex expressions + with large frames. + - ``'python'``: Performs operations as if you had ``eval``'d in top + level python. This engine is generally not that useful. + + More backends may be available in the future. + + truediv : bool, optional + Whether to use true division, like in Python >= 3. + + .. deprecated:: 1.0.0 + + local_dict : dict or None, optional + A dictionary of local variables, taken from locals() by default. + global_dict : dict or None, optional + A dictionary of global variables, taken from globals() by default. + resolvers : list of dict-like or None, optional + A list of objects implementing the ``__getitem__`` special method that + you can use to inject an additional collection of namespaces to use for + variable lookup. For example, this is used in the + :meth:`~DataFrame.query` method to inject the + ``DataFrame.index`` and ``DataFrame.columns`` + variables that refer to their respective :class:`~pandas.DataFrame` + instance attributes. + level : int, optional + The number of prior stack frames to traverse and add to the current + scope. Most users will **not** need to change this parameter. + target : object, optional, default None + This is the target object for assignment. It is used when there is + variable assignment in the expression. If so, then `target` must + support item assignment with string keys, and if a copy is being + returned, it must also support `.copy()`. + inplace : bool, default False + If `target` is provided, and the expression mutates `target`, whether + to modify `target` inplace. Otherwise, return a copy of `target` with + the mutation. + + Returns + ------- + ndarray, numeric scalar, DataFrame, Series, or None + The completion value of evaluating the given code or None if ``inplace=True``. + + Raises + ------ + ValueError + There are many instances where such an error can be raised: + + - `target=None`, but the expression is multiline. + - The expression is multiline, but not all them have item assignment. + An example of such an arrangement is this: + + a = b + 1 + a + 2 + + Here, there are expressions on different lines, making it multiline, + but the last line has no variable assigned to the output of `a + 2`. + - `inplace=True`, but the expression is missing item assignment. + - Item assignment is provided, but the `target` does not support + string item assignment. + - Item assignment is provided and `inplace=False`, but the `target` + does not support the `.copy()` method + + See Also + -------- + DataFrame.query : Evaluates a boolean expression to query the columns + of a frame. + DataFrame.eval : Evaluate a string describing operations on + DataFrame columns. + + Notes + ----- + The ``dtype`` of any objects involved in an arithmetic ``%`` operation are + recursively cast to ``float64``. + + See the :ref:`enhancing performance ` documentation for + more details. + + Examples + -------- + >>> df = pd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]}) + >>> df + animal age + 0 dog 10 + 1 pig 20 + + We can add a new column using ``pd.eval``: + + >>> pd.eval("double_age = df.age * 2", target=df) + animal age double_age + 0 dog 10 20 + 1 pig 20 40 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + if truediv is not no_default: + warnings.warn( + ( + "The `truediv` parameter in pd.eval is deprecated and " + "will be removed in a future version." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + + exprs: list[str | BinOp] + if isinstance(expr, str): + _check_expression(expr) + exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""] + else: + # ops.BinOp; for internal compat, not intended to be passed by users + exprs = [expr] + multi_line = len(exprs) > 1 + + if multi_line and target is None: + raise ValueError( + "multi-line expressions are only valid in the " + "context of data, use DataFrame.eval" + ) + engine = _check_engine(engine) + _check_parser(parser) + _check_resolvers(resolvers) + + ret = None + first_expr = True + target_modified = False + + for expr in exprs: + expr = _convert_expression(expr) + _check_for_locals(expr, level, parser) + + # get our (possibly passed-in) scope + env = ensure_scope( + level + 1, + global_dict=global_dict, + local_dict=local_dict, + resolvers=resolvers, + target=target, + ) + + parsed_expr = Expr(expr, engine=engine, parser=parser, env=env) + + # construct the engine and evaluate the parsed expression + eng = ENGINES[engine] + eng_inst = eng(parsed_expr) + ret = eng_inst.evaluate() + + if parsed_expr.assigner is None: + if multi_line: + raise ValueError( + "Multi-line expressions are only valid " + "if all expressions contain an assignment" + ) + elif inplace: + raise ValueError("Cannot operate inplace if there is no assignment") + + # assign if needed + assigner = parsed_expr.assigner + if env.target is not None and assigner is not None: + target_modified = True + + # if returning a copy, copy only on the first assignment + if not inplace and first_expr: + try: + target = env.target.copy() + except AttributeError as err: + raise ValueError("Cannot return a copy of the target") from err + else: + target = env.target + + # TypeError is most commonly raised (e.g. int, list), but you + # get IndexError if you try to do this assignment on np.ndarray. + # we will ignore numpy warnings here; e.g. if trying + # to use a non-numeric indexer + try: + with warnings.catch_warnings(record=True): + # TODO: Filter the warnings we actually care about here. + target[assigner] = ret + except (TypeError, IndexError) as err: + raise ValueError("Cannot assign expression output to target") from err + + if not resolvers: + resolvers = ({assigner: ret},) + else: + # existing resolver needs updated to handle + # case of mutating existing column in copy + for resolver in resolvers: + if assigner in resolver: + resolver[assigner] = ret + break + else: + resolvers += ({assigner: ret},) + + ret = None + first_expr = False + + # We want to exclude `inplace=None` as being False. + if inplace is False: + return target if target_modified else ret diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/expr.py b/.local/lib/python3.8/site-packages/pandas/core/computation/expr.py new file mode 100644 index 0000000..f8716ca --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/computation/expr.py @@ -0,0 +1,842 @@ +""" +:func:`~pandas.eval` parsers. +""" +from __future__ import annotations + +import ast +from functools import ( + partial, + reduce, +) +from keyword import iskeyword +import tokenize +from typing import ( + Callable, + TypeVar, +) + +import numpy as np + +from pandas.compat import PY39 + +import pandas.core.common as com +from pandas.core.computation.ops import ( + ARITH_OPS_SYMS, + BOOL_OPS_SYMS, + CMP_OPS_SYMS, + LOCAL_TAG, + MATHOPS, + REDUCTIONS, + UNARY_OPS_SYMS, + BinOp, + Constant, + Div, + FuncNode, + Op, + Term, + UnaryOp, + UndefinedVariableError, + is_term, +) +from pandas.core.computation.parsing import ( + clean_backtick_quoted_toks, + tokenize_string, +) +from pandas.core.computation.scope import Scope + +import pandas.io.formats.printing as printing + + +def _rewrite_assign(tok: tuple[int, str]) -> tuple[int, str]: + """ + Rewrite the assignment operator for PyTables expressions that use ``=`` + as a substitute for ``==``. + + Parameters + ---------- + tok : tuple of int, str + ints correspond to the all caps constants in the tokenize module + + Returns + ------- + tuple of int, str + Either the input or token or the replacement values + """ + toknum, tokval = tok + return toknum, "==" if tokval == "=" else tokval + + +def _replace_booleans(tok: tuple[int, str]) -> tuple[int, str]: + """ + Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise + precedence is changed to boolean precedence. + + Parameters + ---------- + tok : tuple of int, str + ints correspond to the all caps constants in the tokenize module + + Returns + ------- + tuple of int, str + Either the input or token or the replacement values + """ + toknum, tokval = tok + if toknum == tokenize.OP: + if tokval == "&": + return tokenize.NAME, "and" + elif tokval == "|": + return tokenize.NAME, "or" + return toknum, tokval + return toknum, tokval + + +def _replace_locals(tok: tuple[int, str]) -> tuple[int, str]: + """ + Replace local variables with a syntactically valid name. + + Parameters + ---------- + tok : tuple of int, str + ints correspond to the all caps constants in the tokenize module + + Returns + ------- + tuple of int, str + Either the input or token or the replacement values + + Notes + ----- + This is somewhat of a hack in that we rewrite a string such as ``'@a'`` as + ``'__pd_eval_local_a'`` by telling the tokenizer that ``__pd_eval_local_`` + is a ``tokenize.OP`` and to replace the ``'@'`` symbol with it. + """ + toknum, tokval = tok + if toknum == tokenize.OP and tokval == "@": + return tokenize.OP, LOCAL_TAG + return toknum, tokval + + +def _compose2(f, g): + """ + Compose 2 callables. + """ + return lambda *args, **kwargs: f(g(*args, **kwargs)) + + +def _compose(*funcs): + """ + Compose 2 or more callables. + """ + assert len(funcs) > 1, "At least 2 callables must be passed to compose" + return reduce(_compose2, funcs) + + +def _preparse( + source: str, + f=_compose( + _replace_locals, _replace_booleans, _rewrite_assign, clean_backtick_quoted_toks + ), +) -> str: + """ + Compose a collection of tokenization functions. + + Parameters + ---------- + source : str + A Python source code string + f : callable + This takes a tuple of (toknum, tokval) as its argument and returns a + tuple with the same structure but possibly different elements. Defaults + to the composition of ``_rewrite_assign``, ``_replace_booleans``, and + ``_replace_locals``. + + Returns + ------- + str + Valid Python source code + + Notes + ----- + The `f` parameter can be any callable that takes *and* returns input of the + form ``(toknum, tokval)``, where ``toknum`` is one of the constants from + the ``tokenize`` module and ``tokval`` is a string. + """ + assert callable(f), "f must be callable" + return tokenize.untokenize(f(x) for x in tokenize_string(source)) + + +def _is_type(t): + """ + Factory for a type checking function of type ``t`` or tuple of types. + """ + return lambda x: isinstance(x.value, t) + + +_is_list = _is_type(list) +_is_str = _is_type(str) + + +# partition all AST nodes +_all_nodes = frozenset( + node + for node in (getattr(ast, name) for name in dir(ast)) + if isinstance(node, type) and issubclass(node, ast.AST) +) + + +def _filter_nodes(superclass, all_nodes=_all_nodes): + """ + Filter out AST nodes that are subclasses of ``superclass``. + """ + node_names = (node.__name__ for node in all_nodes if issubclass(node, superclass)) + return frozenset(node_names) + + +_all_node_names = frozenset(map(lambda x: x.__name__, _all_nodes)) +_mod_nodes = _filter_nodes(ast.mod) +_stmt_nodes = _filter_nodes(ast.stmt) +_expr_nodes = _filter_nodes(ast.expr) +_expr_context_nodes = _filter_nodes(ast.expr_context) +_boolop_nodes = _filter_nodes(ast.boolop) +_operator_nodes = _filter_nodes(ast.operator) +_unary_op_nodes = _filter_nodes(ast.unaryop) +_cmp_op_nodes = _filter_nodes(ast.cmpop) +_comprehension_nodes = _filter_nodes(ast.comprehension) +_handler_nodes = _filter_nodes(ast.excepthandler) +_arguments_nodes = _filter_nodes(ast.arguments) +_keyword_nodes = _filter_nodes(ast.keyword) +_alias_nodes = _filter_nodes(ast.alias) + +if not PY39: + _slice_nodes = _filter_nodes(ast.slice) + + +# nodes that we don't support directly but are needed for parsing +_hacked_nodes = frozenset(["Assign", "Module", "Expr"]) + + +_unsupported_expr_nodes = frozenset( + [ + "Yield", + "GeneratorExp", + "IfExp", + "DictComp", + "SetComp", + "Repr", + "Lambda", + "Set", + "AST", + "Is", + "IsNot", + ] +) + +# these nodes are low priority or won't ever be supported (e.g., AST) +_unsupported_nodes = ( + _stmt_nodes + | _mod_nodes + | _handler_nodes + | _arguments_nodes + | _keyword_nodes + | _alias_nodes + | _expr_context_nodes + | _unsupported_expr_nodes +) - _hacked_nodes + +# we're adding a different assignment in some cases to be equality comparison +# and we don't want `stmt` and friends in their so get only the class whose +# names are capitalized +_base_supported_nodes = (_all_node_names - _unsupported_nodes) | _hacked_nodes +intersection = _unsupported_nodes & _base_supported_nodes +_msg = f"cannot both support and not support {intersection}" +assert not intersection, _msg + + +def _node_not_implemented(node_name: str) -> Callable[..., None]: + """ + Return a function that raises a NotImplementedError with a passed node name. + """ + + def f(self, *args, **kwargs): + raise NotImplementedError(f"'{node_name}' nodes are not implemented") + + return f + + +# should be bound by BaseExprVisitor but that creates a circular dependency: +# _T is used in disallow, but disallow is used to define BaseExprVisitor +# https://github.com/microsoft/pyright/issues/2315 +_T = TypeVar("_T") + + +def disallow(nodes: set[str]) -> Callable[[type[_T]], type[_T]]: + """ + Decorator to disallow certain nodes from parsing. Raises a + NotImplementedError instead. + + Returns + ------- + callable + """ + + def disallowed(cls: type[_T]) -> type[_T]: + # error: "Type[_T]" has no attribute "unsupported_nodes" + cls.unsupported_nodes = () # type: ignore[attr-defined] + for node in nodes: + new_method = _node_not_implemented(node) + name = f"visit_{node}" + # error: "Type[_T]" has no attribute "unsupported_nodes" + cls.unsupported_nodes += (name,) # type: ignore[attr-defined] + setattr(cls, name, new_method) + return cls + + return disallowed + + +def _op_maker(op_class, op_symbol): + """ + Return a function to create an op class with its symbol already passed. + + Returns + ------- + callable + """ + + def f(self, node, *args, **kwargs): + """ + Return a partial function with an Op subclass with an operator already passed. + + Returns + ------- + callable + """ + return partial(op_class, op_symbol, *args, **kwargs) + + return f + + +_op_classes = {"binary": BinOp, "unary": UnaryOp} + + +def add_ops(op_classes): + """ + Decorator to add default implementation of ops. + """ + + def f(cls): + for op_attr_name, op_class in op_classes.items(): + ops = getattr(cls, f"{op_attr_name}_ops") + ops_map = getattr(cls, f"{op_attr_name}_op_nodes_map") + for op in ops: + op_node = ops_map[op] + if op_node is not None: + made_op = _op_maker(op_class, op) + setattr(cls, f"visit_{op_node}", made_op) + return cls + + return f + + +@disallow(_unsupported_nodes) +@add_ops(_op_classes) +class BaseExprVisitor(ast.NodeVisitor): + """ + Custom ast walker. Parsers of other engines should subclass this class + if necessary. + + Parameters + ---------- + env : Scope + engine : str + parser : str + preparser : callable + """ + + const_type: type[Term] = Constant + term_type = Term + + binary_ops = CMP_OPS_SYMS + BOOL_OPS_SYMS + ARITH_OPS_SYMS + binary_op_nodes = ( + "Gt", + "Lt", + "GtE", + "LtE", + "Eq", + "NotEq", + "In", + "NotIn", + "BitAnd", + "BitOr", + "And", + "Or", + "Add", + "Sub", + "Mult", + None, + "Pow", + "FloorDiv", + "Mod", + ) + binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes)) + + unary_ops = UNARY_OPS_SYMS + unary_op_nodes = "UAdd", "USub", "Invert", "Not" + unary_op_nodes_map = {k: v for k, v in zip(unary_ops, unary_op_nodes)} + + rewrite_map = { + ast.Eq: ast.In, + ast.NotEq: ast.NotIn, + ast.In: ast.In, + ast.NotIn: ast.NotIn, + } + + unsupported_nodes: tuple[str, ...] + + def __init__(self, env, engine, parser, preparser=_preparse): + self.env = env + self.engine = engine + self.parser = parser + self.preparser = preparser + self.assigner = None + + def visit(self, node, **kwargs): + if isinstance(node, str): + clean = self.preparser(node) + try: + node = ast.fix_missing_locations(ast.parse(clean)) + except SyntaxError as e: + if any(iskeyword(x) for x in clean.split()): + e.msg = "Python keyword not valid identifier in numexpr query" + raise e + + method = "visit_" + type(node).__name__ + visitor = getattr(self, method) + return visitor(node, **kwargs) + + def visit_Module(self, node, **kwargs): + if len(node.body) != 1: + raise SyntaxError("only a single expression is allowed") + expr = node.body[0] + return self.visit(expr, **kwargs) + + def visit_Expr(self, node, **kwargs): + return self.visit(node.value, **kwargs) + + def _rewrite_membership_op(self, node, left, right): + # the kind of the operator (is actually an instance) + op_instance = node.op + op_type = type(op_instance) + + # must be two terms and the comparison operator must be ==/!=/in/not in + if is_term(left) and is_term(right) and op_type in self.rewrite_map: + + left_list, right_list = map(_is_list, (left, right)) + left_str, right_str = map(_is_str, (left, right)) + + # if there are any strings or lists in the expression + if left_list or right_list or left_str or right_str: + op_instance = self.rewrite_map[op_type]() + + # pop the string variable out of locals and replace it with a list + # of one string, kind of a hack + if right_str: + name = self.env.add_tmp([right.value]) + right = self.term_type(name, self.env) + + if left_str: + name = self.env.add_tmp([left.value]) + left = self.term_type(name, self.env) + + op = self.visit(op_instance) + return op, op_instance, left, right + + def _maybe_transform_eq_ne(self, node, left=None, right=None): + if left is None: + left = self.visit(node.left, side="left") + if right is None: + right = self.visit(node.right, side="right") + op, op_class, left, right = self._rewrite_membership_op(node, left, right) + return op, op_class, left, right + + def _maybe_downcast_constants(self, left, right): + f32 = np.dtype(np.float32) + if ( + left.is_scalar + and hasattr(left, "value") + and not right.is_scalar + and right.return_type == f32 + ): + # right is a float32 array, left is a scalar + name = self.env.add_tmp(np.float32(left.value)) + left = self.term_type(name, self.env) + if ( + right.is_scalar + and hasattr(right, "value") + and not left.is_scalar + and left.return_type == f32 + ): + # left is a float32 array, right is a scalar + name = self.env.add_tmp(np.float32(right.value)) + right = self.term_type(name, self.env) + + return left, right + + def _maybe_eval(self, binop, eval_in_python): + # eval `in` and `not in` (for now) in "partial" python space + # things that can be evaluated in "eval" space will be turned into + # temporary variables. for example, + # [1,2] in a + 2 * b + # in that case a + 2 * b will be evaluated using numexpr, and the "in" + # call will be evaluated using isin (in python space) + return binop.evaluate( + self.env, self.engine, self.parser, self.term_type, eval_in_python + ) + + def _maybe_evaluate_binop( + self, + op, + op_class, + lhs, + rhs, + eval_in_python=("in", "not in"), + maybe_eval_in_python=("==", "!=", "<", ">", "<=", ">="), + ): + res = op(lhs, rhs) + + if res.has_invalid_return_type: + raise TypeError( + f"unsupported operand type(s) for {res.op}: " + f"'{lhs.type}' and '{rhs.type}'" + ) + + if self.engine != "pytables" and ( + res.op in CMP_OPS_SYMS + and getattr(lhs, "is_datetime", False) + or getattr(rhs, "is_datetime", False) + ): + # all date ops must be done in python bc numexpr doesn't work + # well with NaT + return self._maybe_eval(res, self.binary_ops) + + if res.op in eval_in_python: + # "in"/"not in" ops are always evaluated in python + return self._maybe_eval(res, eval_in_python) + elif self.engine != "pytables": + if ( + getattr(lhs, "return_type", None) == object + or getattr(rhs, "return_type", None) == object + ): + # evaluate "==" and "!=" in python if either of our operands + # has an object return type + return self._maybe_eval(res, eval_in_python + maybe_eval_in_python) + return res + + def visit_BinOp(self, node, **kwargs): + op, op_class, left, right = self._maybe_transform_eq_ne(node) + left, right = self._maybe_downcast_constants(left, right) + return self._maybe_evaluate_binop(op, op_class, left, right) + + def visit_Div(self, node, **kwargs): + return lambda lhs, rhs: Div(lhs, rhs) + + def visit_UnaryOp(self, node, **kwargs): + op = self.visit(node.op) + operand = self.visit(node.operand) + return op(operand) + + def visit_Name(self, node, **kwargs): + return self.term_type(node.id, self.env, **kwargs) + + def visit_NameConstant(self, node, **kwargs): + return self.const_type(node.value, self.env) + + def visit_Num(self, node, **kwargs): + return self.const_type(node.n, self.env) + + def visit_Constant(self, node, **kwargs): + return self.const_type(node.n, self.env) + + def visit_Str(self, node, **kwargs): + name = self.env.add_tmp(node.s) + return self.term_type(name, self.env) + + def visit_List(self, node, **kwargs): + name = self.env.add_tmp([self.visit(e)(self.env) for e in node.elts]) + return self.term_type(name, self.env) + + visit_Tuple = visit_List + + def visit_Index(self, node, **kwargs): + """df.index[4]""" + return self.visit(node.value) + + def visit_Subscript(self, node, **kwargs): + from pandas import eval as pd_eval + + value = self.visit(node.value) + slobj = self.visit(node.slice) + result = pd_eval( + slobj, local_dict=self.env, engine=self.engine, parser=self.parser + ) + try: + # a Term instance + v = value.value[result] + except AttributeError: + # an Op instance + lhs = pd_eval( + value, local_dict=self.env, engine=self.engine, parser=self.parser + ) + v = lhs[result] + name = self.env.add_tmp(v) + return self.term_type(name, env=self.env) + + def visit_Slice(self, node, **kwargs): + """df.index[slice(4,6)]""" + lower = node.lower + if lower is not None: + lower = self.visit(lower).value + upper = node.upper + if upper is not None: + upper = self.visit(upper).value + step = node.step + if step is not None: + step = self.visit(step).value + + return slice(lower, upper, step) + + def visit_Assign(self, node, **kwargs): + """ + support a single assignment node, like + + c = a + b + + set the assigner at the top level, must be a Name node which + might or might not exist in the resolvers + + """ + if len(node.targets) != 1: + raise SyntaxError("can only assign a single expression") + if not isinstance(node.targets[0], ast.Name): + raise SyntaxError("left hand side of an assignment must be a single name") + if self.env.target is None: + raise ValueError("cannot assign without a target object") + + try: + assigner = self.visit(node.targets[0], **kwargs) + except UndefinedVariableError: + assigner = node.targets[0].id + + self.assigner = getattr(assigner, "name", assigner) + if self.assigner is None: + raise SyntaxError( + "left hand side of an assignment must be a single resolvable name" + ) + + return self.visit(node.value, **kwargs) + + def visit_Attribute(self, node, **kwargs): + attr = node.attr + value = node.value + + ctx = node.ctx + if isinstance(ctx, ast.Load): + # resolve the value + resolved = self.visit(value).value + try: + v = getattr(resolved, attr) + name = self.env.add_tmp(v) + return self.term_type(name, self.env) + except AttributeError: + # something like datetime.datetime where scope is overridden + if isinstance(value, ast.Name) and value.id == attr: + return resolved + raise + + raise ValueError(f"Invalid Attribute context {type(ctx).__name__}") + + def visit_Call(self, node, side=None, **kwargs): + + if isinstance(node.func, ast.Attribute) and node.func.attr != "__call__": + res = self.visit_Attribute(node.func) + elif not isinstance(node.func, ast.Name): + raise TypeError("Only named functions are supported") + else: + try: + res = self.visit(node.func) + except UndefinedVariableError: + # Check if this is a supported function name + try: + res = FuncNode(node.func.id) + except ValueError: + # Raise original error + raise + + if res is None: + # error: "expr" has no attribute "id" + raise ValueError( + f"Invalid function call {node.func.id}" # type: ignore[attr-defined] + ) + if hasattr(res, "value"): + res = res.value + + if isinstance(res, FuncNode): + + new_args = [self.visit(arg) for arg in node.args] + + if node.keywords: + raise TypeError( + f'Function "{res.name}" does not support keyword arguments' + ) + + return res(*new_args) + + else: + + new_args = [self.visit(arg).value for arg in node.args] + + for key in node.keywords: + if not isinstance(key, ast.keyword): + # error: "expr" has no attribute "id" + raise ValueError( + "keyword error in function call " # type: ignore[attr-defined] + f"'{node.func.id}'" + ) + + if key.arg: + kwargs[key.arg] = self.visit(key.value).value + + name = self.env.add_tmp(res(*new_args, **kwargs)) + return self.term_type(name=name, env=self.env) + + def translate_In(self, op): + return op + + def visit_Compare(self, node, **kwargs): + ops = node.ops + comps = node.comparators + + # base case: we have something like a CMP b + if len(comps) == 1: + op = self.translate_In(ops[0]) + binop = ast.BinOp(op=op, left=node.left, right=comps[0]) + return self.visit(binop) + + # recursive case: we have a chained comparison, a CMP b CMP c, etc. + left = node.left + values = [] + for op, comp in zip(ops, comps): + new_node = self.visit( + ast.Compare(comparators=[comp], left=left, ops=[self.translate_In(op)]) + ) + left = comp + values.append(new_node) + return self.visit(ast.BoolOp(op=ast.And(), values=values)) + + def _try_visit_binop(self, bop): + if isinstance(bop, (Op, Term)): + return bop + return self.visit(bop) + + def visit_BoolOp(self, node, **kwargs): + def visitor(x, y): + lhs = self._try_visit_binop(x) + rhs = self._try_visit_binop(y) + + op, op_class, lhs, rhs = self._maybe_transform_eq_ne(node, lhs, rhs) + return self._maybe_evaluate_binop(op, node.op, lhs, rhs) + + operands = node.values + return reduce(visitor, operands) + + +_python_not_supported = frozenset(["Dict", "BoolOp", "In", "NotIn"]) +_numexpr_supported_calls = frozenset(REDUCTIONS + MATHOPS) + + +@disallow( + (_unsupported_nodes | _python_not_supported) + - (_boolop_nodes | frozenset(["BoolOp", "Attribute", "In", "NotIn", "Tuple"])) +) +class PandasExprVisitor(BaseExprVisitor): + def __init__( + self, + env, + engine, + parser, + preparser=partial( + _preparse, + f=_compose(_replace_locals, _replace_booleans, clean_backtick_quoted_toks), + ), + ): + super().__init__(env, engine, parser, preparser) + + +@disallow(_unsupported_nodes | _python_not_supported | frozenset(["Not"])) +class PythonExprVisitor(BaseExprVisitor): + def __init__(self, env, engine, parser, preparser=lambda x: x): + super().__init__(env, engine, parser, preparser=preparser) + + +class Expr: + """ + Object encapsulating an expression. + + Parameters + ---------- + expr : str + engine : str, optional, default 'numexpr' + parser : str, optional, default 'pandas' + env : Scope, optional, default None + level : int, optional, default 2 + """ + + env: Scope + engine: str + parser: str + + def __init__( + self, + expr, + engine: str = "numexpr", + parser: str = "pandas", + env: Scope | None = None, + level: int = 0, + ): + self.expr = expr + self.env = env or Scope(level=level + 1) + self.engine = engine + self.parser = parser + self._visitor = PARSERS[parser](self.env, self.engine, self.parser) + self.terms = self.parse() + + @property + def assigner(self): + return getattr(self._visitor, "assigner", None) + + def __call__(self): + return self.terms(self.env) + + def __repr__(self) -> str: + return printing.pprint_thing(self.terms) + + def __len__(self) -> int: + return len(self.expr) + + def parse(self): + """ + Parse an expression. + """ + return self._visitor.visit(self.expr) + + @property + def names(self): + """ + Get the names in an expression. + """ + if is_term(self.terms): + return frozenset([self.terms.name]) + return frozenset(term.name for term in com.flatten(self.terms)) + + +PARSERS = {"python": PythonExprVisitor, "pandas": PandasExprVisitor} diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/expressions.py b/.local/lib/python3.8/site-packages/pandas/core/computation/expressions.py new file mode 100644 index 0000000..9e180f1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/computation/expressions.py @@ -0,0 +1,284 @@ +""" +Expressions +----------- + +Offer fast expression evaluation through numexpr + +""" +from __future__ import annotations + +import operator +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._typing import FuncType + +from pandas.core.computation.check import NUMEXPR_INSTALLED +from pandas.core.ops import roperator + +if NUMEXPR_INSTALLED: + import numexpr as ne + +_TEST_MODE: bool | None = None +_TEST_RESULT: list[bool] = [] +USE_NUMEXPR = NUMEXPR_INSTALLED +_evaluate: FuncType | None = None +_where: FuncType | None = None + +# the set of dtypes that we will allow pass to numexpr +_ALLOWED_DTYPES = { + "evaluate": {"int64", "int32", "float64", "float32", "bool"}, + "where": {"int64", "float64", "bool"}, +} + +# the minimum prod shape that we will use numexpr +_MIN_ELEMENTS = 1_000_000 + + +def set_use_numexpr(v=True): + # set/unset to use numexpr + global USE_NUMEXPR + if NUMEXPR_INSTALLED: + USE_NUMEXPR = v + + # choose what we are going to do + global _evaluate, _where + + _evaluate = _evaluate_numexpr if USE_NUMEXPR else _evaluate_standard + _where = _where_numexpr if USE_NUMEXPR else _where_standard + + +def set_numexpr_threads(n=None): + # if we are using numexpr, set the threads to n + # otherwise reset + if NUMEXPR_INSTALLED and USE_NUMEXPR: + if n is None: + n = ne.detect_number_of_cores() + ne.set_num_threads(n) + + +def _evaluate_standard(op, op_str, a, b): + """ + Standard evaluation. + """ + if _TEST_MODE: + _store_test_result(False) + return op(a, b) + + +def _can_use_numexpr(op, op_str, a, b, dtype_check): + """return a boolean if we WILL be using numexpr""" + if op_str is not None: + + # required min elements (otherwise we are adding overhead) + if a.size > _MIN_ELEMENTS: + # check for dtype compatibility + dtypes: set[str] = set() + for o in [a, b]: + # ndarray and Series Case + if hasattr(o, "dtype"): + dtypes |= {o.dtype.name} + + # allowed are a superset + if not len(dtypes) or _ALLOWED_DTYPES[dtype_check] >= dtypes: + return True + + return False + + +def _evaluate_numexpr(op, op_str, a, b): + result = None + + if _can_use_numexpr(op, op_str, a, b, "evaluate"): + is_reversed = op.__name__.strip("_").startswith("r") + if is_reversed: + # we were originally called by a reversed op method + a, b = b, a + + a_value = a + b_value = b + + try: + result = ne.evaluate( + f"a_value {op_str} b_value", + local_dict={"a_value": a_value, "b_value": b_value}, + casting="safe", + ) + except TypeError: + # numexpr raises eg for array ** array with integers + # (https://github.com/pydata/numexpr/issues/379) + pass + except NotImplementedError: + if _bool_arith_fallback(op_str, a, b): + pass + else: + raise + + if is_reversed: + # reverse order to original for fallback + a, b = b, a + + if _TEST_MODE: + _store_test_result(result is not None) + + if result is None: + result = _evaluate_standard(op, op_str, a, b) + + return result + + +_op_str_mapping = { + operator.add: "+", + roperator.radd: "+", + operator.mul: "*", + roperator.rmul: "*", + operator.sub: "-", + roperator.rsub: "-", + operator.truediv: "/", + roperator.rtruediv: "/", + # floordiv not supported by numexpr 2.x + operator.floordiv: None, + roperator.rfloordiv: None, + # we require Python semantics for mod of negative for backwards compatibility + # see https://github.com/pydata/numexpr/issues/365 + # so sticking with unaccelerated for now GH#36552 + operator.mod: None, + roperator.rmod: None, + operator.pow: "**", + roperator.rpow: "**", + operator.eq: "==", + operator.ne: "!=", + operator.le: "<=", + operator.lt: "<", + operator.ge: ">=", + operator.gt: ">", + operator.and_: "&", + roperator.rand_: "&", + operator.or_: "|", + roperator.ror_: "|", + operator.xor: "^", + roperator.rxor: "^", + divmod: None, + roperator.rdivmod: None, +} + + +def _where_standard(cond, a, b): + # Caller is responsible for extracting ndarray if necessary + return np.where(cond, a, b) + + +def _where_numexpr(cond, a, b): + # Caller is responsible for extracting ndarray if necessary + result = None + + if _can_use_numexpr(None, "where", a, b, "where"): + + result = ne.evaluate( + "where(cond_value, a_value, b_value)", + local_dict={"cond_value": cond, "a_value": a, "b_value": b}, + casting="safe", + ) + + if result is None: + result = _where_standard(cond, a, b) + + return result + + +# turn myself on +set_use_numexpr(get_option("compute.use_numexpr")) + + +def _has_bool_dtype(x): + try: + return x.dtype == bool + except AttributeError: + return isinstance(x, (bool, np.bool_)) + + +_BOOL_OP_UNSUPPORTED = {"+": "|", "*": "&", "-": "^"} + + +def _bool_arith_fallback(op_str, a, b): + """ + Check if we should fallback to the python `_evaluate_standard` in case + of an unsupported operation by numexpr, which is the case for some + boolean ops. + """ + if _has_bool_dtype(a) and _has_bool_dtype(b): + if op_str in _BOOL_OP_UNSUPPORTED: + warnings.warn( + f"evaluating in Python space because the {repr(op_str)} " + "operator is not supported by numexpr for the bool dtype, " + f"use {repr(_BOOL_OP_UNSUPPORTED[op_str])} instead." + ) + return True + return False + + +def evaluate(op, a, b, use_numexpr: bool = True): + """ + Evaluate and return the expression of the op on a and b. + + Parameters + ---------- + op : the actual operand + a : left operand + b : right operand + use_numexpr : bool, default True + Whether to try to use numexpr. + """ + op_str = _op_str_mapping[op] + if op_str is not None: + if use_numexpr: + # error: "None" not callable + return _evaluate(op, op_str, a, b) # type: ignore[misc] + return _evaluate_standard(op, op_str, a, b) + + +def where(cond, a, b, use_numexpr=True): + """ + Evaluate the where condition cond on a and b. + + Parameters + ---------- + cond : np.ndarray[bool] + a : return if cond is True + b : return if cond is False + use_numexpr : bool, default True + Whether to try to use numexpr. + """ + assert _where is not None + return _where(cond, a, b) if use_numexpr else _where_standard(cond, a, b) + + +def set_test_mode(v: bool = True) -> None: + """ + Keeps track of whether numexpr was used. + + Stores an additional ``True`` for every successful use of evaluate with + numexpr since the last ``get_test_result``. + """ + global _TEST_MODE, _TEST_RESULT + _TEST_MODE = v + _TEST_RESULT = [] + + +def _store_test_result(used_numexpr: bool) -> None: + global _TEST_RESULT + if used_numexpr: + _TEST_RESULT.append(used_numexpr) + + +def get_test_result() -> list[bool]: + """ + Get test result and reset test_results. + """ + global _TEST_RESULT + res = _TEST_RESULT + _TEST_RESULT = [] + return res diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/ops.py b/.local/lib/python3.8/site-packages/pandas/core/computation/ops.py new file mode 100644 index 0000000..8758565 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/computation/ops.py @@ -0,0 +1,624 @@ +""" +Operator classes for eval. +""" + +from __future__ import annotations + +from datetime import datetime +from functools import partial +import operator +from typing import ( + Callable, + Iterable, +) + +import numpy as np + +from pandas._libs.tslibs import Timestamp + +from pandas.core.dtypes.common import ( + is_list_like, + is_scalar, +) + +import pandas.core.common as com +from pandas.core.computation.common import ( + ensure_decoded, + result_type_many, +) +from pandas.core.computation.scope import DEFAULT_GLOBALS + +from pandas.io.formats.printing import ( + pprint_thing, + pprint_thing_encoded, +) + +REDUCTIONS = ("sum", "prod") + +_unary_math_ops = ( + "sin", + "cos", + "exp", + "log", + "expm1", + "log1p", + "sqrt", + "sinh", + "cosh", + "tanh", + "arcsin", + "arccos", + "arctan", + "arccosh", + "arcsinh", + "arctanh", + "abs", + "log10", + "floor", + "ceil", +) +_binary_math_ops = ("arctan2",) + +MATHOPS = _unary_math_ops + _binary_math_ops + + +LOCAL_TAG = "__pd_eval_local_" + + +class UndefinedVariableError(NameError): + """ + NameError subclass for local variables. + """ + + def __init__(self, name: str, is_local: bool | None = None): + base_msg = f"{repr(name)} is not defined" + if is_local: + msg = f"local variable {base_msg}" + else: + msg = f"name {base_msg}" + super().__init__(msg) + + +class Term: + def __new__(cls, name, env, side=None, encoding=None): + klass = Constant if not isinstance(name, str) else cls + # error: Argument 2 for "super" not an instance of argument 1 + supr_new = super(Term, klass).__new__ # type: ignore[misc] + return supr_new(klass) + + is_local: bool + + def __init__(self, name, env, side=None, encoding=None): + # name is a str for Term, but may be something else for subclasses + self._name = name + self.env = env + self.side = side + tname = str(name) + self.is_local = tname.startswith(LOCAL_TAG) or tname in DEFAULT_GLOBALS + self._value = self._resolve_name() + self.encoding = encoding + + @property + def local_name(self) -> str: + return self.name.replace(LOCAL_TAG, "") + + def __repr__(self) -> str: + return pprint_thing(self.name) + + def __call__(self, *args, **kwargs): + return self.value + + def evaluate(self, *args, **kwargs): + return self + + def _resolve_name(self): + res = self.env.resolve(self.local_name, is_local=self.is_local) + self.update(res) + + if hasattr(res, "ndim") and res.ndim > 2: + raise NotImplementedError( + "N-dimensional objects, where N > 2, are not supported with eval" + ) + return res + + def update(self, value): + """ + search order for local (i.e., @variable) variables: + + scope, key_variable + [('locals', 'local_name'), + ('globals', 'local_name'), + ('locals', 'key'), + ('globals', 'key')] + """ + key = self.name + + # if it's a variable name (otherwise a constant) + if isinstance(key, str): + self.env.swapkey(self.local_name, key, new_value=value) + + self.value = value + + @property + def is_scalar(self) -> bool: + return is_scalar(self._value) + + @property + def type(self): + try: + # potentially very slow for large, mixed dtype frames + return self._value.values.dtype + except AttributeError: + try: + # ndarray + return self._value.dtype + except AttributeError: + # scalar + return type(self._value) + + return_type = type + + @property + def raw(self) -> str: + return f"{type(self).__name__}(name={repr(self.name)}, type={self.type})" + + @property + def is_datetime(self) -> bool: + try: + t = self.type.type + except AttributeError: + t = self.type + + return issubclass(t, (datetime, np.datetime64)) + + @property + def value(self): + return self._value + + @value.setter + def value(self, new_value): + self._value = new_value + + @property + def name(self): + return self._name + + @property + def ndim(self) -> int: + return self._value.ndim + + +class Constant(Term): + def __init__(self, value, env, side=None, encoding=None): + super().__init__(value, env, side=side, encoding=encoding) + + def _resolve_name(self): + return self._name + + @property + def name(self): + return self.value + + def __repr__(self) -> str: + # in python 2 str() of float + # can truncate shorter than repr() + return repr(self.name) + + +_bool_op_map = {"not": "~", "and": "&", "or": "|"} + + +class Op: + """ + Hold an operator of arbitrary arity. + """ + + op: str + + def __init__(self, op: str, operands: Iterable[Term | Op], encoding=None): + self.op = _bool_op_map.get(op, op) + self.operands = operands + self.encoding = encoding + + def __iter__(self): + return iter(self.operands) + + def __repr__(self) -> str: + """ + Print a generic n-ary operator and its operands using infix notation. + """ + # recurse over the operands + parened = (f"({pprint_thing(opr)})" for opr in self.operands) + return pprint_thing(f" {self.op} ".join(parened)) + + @property + def return_type(self): + # clobber types to bool if the op is a boolean operator + if self.op in (CMP_OPS_SYMS + BOOL_OPS_SYMS): + return np.bool_ + return result_type_many(*(term.type for term in com.flatten(self))) + + @property + def has_invalid_return_type(self) -> bool: + types = self.operand_types + obj_dtype_set = frozenset([np.dtype("object")]) + return self.return_type == object and types - obj_dtype_set + + @property + def operand_types(self): + return frozenset(term.type for term in com.flatten(self)) + + @property + def is_scalar(self) -> bool: + return all(operand.is_scalar for operand in self.operands) + + @property + def is_datetime(self) -> bool: + try: + t = self.return_type.type + except AttributeError: + t = self.return_type + + return issubclass(t, (datetime, np.datetime64)) + + +def _in(x, y): + """ + Compute the vectorized membership of ``x in y`` if possible, otherwise + use Python. + """ + try: + return x.isin(y) + except AttributeError: + if is_list_like(x): + try: + return y.isin(x) + except AttributeError: + pass + return x in y + + +def _not_in(x, y): + """ + Compute the vectorized membership of ``x not in y`` if possible, + otherwise use Python. + """ + try: + return ~x.isin(y) + except AttributeError: + if is_list_like(x): + try: + return ~y.isin(x) + except AttributeError: + pass + return x not in y + + +CMP_OPS_SYMS = (">", "<", ">=", "<=", "==", "!=", "in", "not in") +_cmp_ops_funcs = ( + operator.gt, + operator.lt, + operator.ge, + operator.le, + operator.eq, + operator.ne, + _in, + _not_in, +) +_cmp_ops_dict = dict(zip(CMP_OPS_SYMS, _cmp_ops_funcs)) + +BOOL_OPS_SYMS = ("&", "|", "and", "or") +_bool_ops_funcs = (operator.and_, operator.or_, operator.and_, operator.or_) +_bool_ops_dict = dict(zip(BOOL_OPS_SYMS, _bool_ops_funcs)) + +ARITH_OPS_SYMS = ("+", "-", "*", "/", "**", "//", "%") +_arith_ops_funcs = ( + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.pow, + operator.floordiv, + operator.mod, +) +_arith_ops_dict = dict(zip(ARITH_OPS_SYMS, _arith_ops_funcs)) + +SPECIAL_CASE_ARITH_OPS_SYMS = ("**", "//", "%") +_special_case_arith_ops_funcs = (operator.pow, operator.floordiv, operator.mod) +_special_case_arith_ops_dict = dict( + zip(SPECIAL_CASE_ARITH_OPS_SYMS, _special_case_arith_ops_funcs) +) + +_binary_ops_dict = {} + +for d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict): + _binary_ops_dict.update(d) + + +def _cast_inplace(terms, acceptable_dtypes, dtype): + """ + Cast an expression inplace. + + Parameters + ---------- + terms : Op + The expression that should cast. + acceptable_dtypes : list of acceptable numpy.dtype + Will not cast if term's dtype in this list. + dtype : str or numpy.dtype + The dtype to cast to. + """ + dt = np.dtype(dtype) + for term in terms: + if term.type in acceptable_dtypes: + continue + + try: + new_value = term.value.astype(dt) + except AttributeError: + new_value = dt.type(term.value) + term.update(new_value) + + +def is_term(obj) -> bool: + return isinstance(obj, Term) + + +class BinOp(Op): + """ + Hold a binary operator and its operands. + + Parameters + ---------- + op : str + lhs : Term or Op + rhs : Term or Op + """ + + def __init__(self, op: str, lhs, rhs): + super().__init__(op, (lhs, rhs)) + self.lhs = lhs + self.rhs = rhs + + self._disallow_scalar_only_bool_ops() + + self.convert_values() + + try: + self.func = _binary_ops_dict[op] + except KeyError as err: + # has to be made a list for python3 + keys = list(_binary_ops_dict.keys()) + raise ValueError( + f"Invalid binary operator {repr(op)}, valid operators are {keys}" + ) from err + + def __call__(self, env): + """ + Recursively evaluate an expression in Python space. + + Parameters + ---------- + env : Scope + + Returns + ------- + object + The result of an evaluated expression. + """ + # recurse over the left/right nodes + left = self.lhs(env) + right = self.rhs(env) + + return self.func(left, right) + + def evaluate(self, env, engine: str, parser, term_type, eval_in_python): + """ + Evaluate a binary operation *before* being passed to the engine. + + Parameters + ---------- + env : Scope + engine : str + parser : str + term_type : type + eval_in_python : list + + Returns + ------- + term_type + The "pre-evaluated" expression as an instance of ``term_type`` + """ + if engine == "python": + res = self(env) + else: + # recurse over the left/right nodes + + left = self.lhs.evaluate( + env, + engine=engine, + parser=parser, + term_type=term_type, + eval_in_python=eval_in_python, + ) + + right = self.rhs.evaluate( + env, + engine=engine, + parser=parser, + term_type=term_type, + eval_in_python=eval_in_python, + ) + + # base cases + if self.op in eval_in_python: + res = self.func(left.value, right.value) + else: + from pandas.core.computation.eval import eval + + res = eval(self, local_dict=env, engine=engine, parser=parser) + + name = env.add_tmp(res) + return term_type(name, env=env) + + def convert_values(self): + """ + Convert datetimes to a comparable value in an expression. + """ + + def stringify(value): + encoder: Callable + if self.encoding is not None: + encoder = partial(pprint_thing_encoded, encoding=self.encoding) + else: + encoder = pprint_thing + return encoder(value) + + lhs, rhs = self.lhs, self.rhs + + if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.is_scalar: + v = rhs.value + if isinstance(v, (int, float)): + v = stringify(v) + v = Timestamp(ensure_decoded(v)) + if v.tz is not None: + v = v.tz_convert("UTC") + self.rhs.update(v) + + if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.is_scalar: + v = lhs.value + if isinstance(v, (int, float)): + v = stringify(v) + v = Timestamp(ensure_decoded(v)) + if v.tz is not None: + v = v.tz_convert("UTC") + self.lhs.update(v) + + def _disallow_scalar_only_bool_ops(self): + rhs = self.rhs + lhs = self.lhs + + # GH#24883 unwrap dtype if necessary to ensure we have a type object + rhs_rt = rhs.return_type + rhs_rt = getattr(rhs_rt, "type", rhs_rt) + lhs_rt = lhs.return_type + lhs_rt = getattr(lhs_rt, "type", lhs_rt) + if ( + (lhs.is_scalar or rhs.is_scalar) + and self.op in _bool_ops_dict + and ( + not ( + issubclass(rhs_rt, (bool, np.bool_)) + and issubclass(lhs_rt, (bool, np.bool_)) + ) + ) + ): + raise NotImplementedError("cannot evaluate scalar only bool ops") + + +def isnumeric(dtype) -> bool: + return issubclass(np.dtype(dtype).type, np.number) + + +class Div(BinOp): + """ + Div operator to special case casting. + + Parameters + ---------- + lhs, rhs : Term or Op + The Terms or Ops in the ``/`` expression. + """ + + def __init__(self, lhs, rhs): + super().__init__("/", lhs, rhs) + + if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type): + raise TypeError( + f"unsupported operand type(s) for {self.op}: " + f"'{lhs.return_type}' and '{rhs.return_type}'" + ) + + # do not upcast float32s to float64 un-necessarily + acceptable_dtypes = [np.float32, np.float_] + _cast_inplace(com.flatten(self), acceptable_dtypes, np.float_) + + +UNARY_OPS_SYMS = ("+", "-", "~", "not") +_unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert) +_unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs)) + + +class UnaryOp(Op): + """ + Hold a unary operator and its operands. + + Parameters + ---------- + op : str + The token used to represent the operator. + operand : Term or Op + The Term or Op operand to the operator. + + Raises + ------ + ValueError + * If no function associated with the passed operator token is found. + """ + + def __init__(self, op: str, operand): + super().__init__(op, (operand,)) + self.operand = operand + + try: + self.func = _unary_ops_dict[op] + except KeyError as err: + raise ValueError( + f"Invalid unary operator {repr(op)}, " + f"valid operators are {UNARY_OPS_SYMS}" + ) from err + + def __call__(self, env): + operand = self.operand(env) + return self.func(operand) + + def __repr__(self) -> str: + return pprint_thing(f"{self.op}({self.operand})") + + @property + def return_type(self) -> np.dtype: + operand = self.operand + if operand.return_type == np.dtype("bool"): + return np.dtype("bool") + if isinstance(operand, Op) and ( + operand.op in _cmp_ops_dict or operand.op in _bool_ops_dict + ): + return np.dtype("bool") + return np.dtype("int") + + +class MathCall(Op): + def __init__(self, func, args): + super().__init__(func.name, args) + self.func = func + + def __call__(self, env): + # error: "Op" not callable + operands = [op(env) for op in self.operands] # type: ignore[operator] + with np.errstate(all="ignore"): + return self.func.func(*operands) + + def __repr__(self) -> str: + operands = map(str, self.operands) + return pprint_thing(f"{self.op}({','.join(operands)})") + + +class FuncNode: + def __init__(self, name: str): + if name not in MATHOPS: + raise ValueError(f'"{name}" is not a supported function') + self.name = name + self.func = getattr(np, name) + + def __call__(self, *args): + return MathCall(self, args) diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/parsing.py b/.local/lib/python3.8/site-packages/pandas/core/computation/parsing.py new file mode 100644 index 0000000..89d1f21 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/computation/parsing.py @@ -0,0 +1,195 @@ +""" +:func:`~pandas.eval` source string parsing functions +""" +from __future__ import annotations + +from io import StringIO +from keyword import iskeyword +import token +import tokenize +from typing import ( + Hashable, + Iterator, +) + +# A token value Python's tokenizer probably will never use. +BACKTICK_QUOTED_STRING = 100 + + +def create_valid_python_identifier(name: str) -> str: + """ + Create valid Python identifiers from any string. + + Check if name contains any special characters. If it contains any + special characters, the special characters will be replaced by + a special string and a prefix is added. + + Raises + ------ + SyntaxError + If the returned name is not a Python valid identifier, raise an exception. + This can happen if there is a hashtag in the name, as the tokenizer will + than terminate and not find the backtick. + But also for characters that fall out of the range of (U+0001..U+007F). + """ + if name.isidentifier() and not iskeyword(name): + return name + + # Create a dict with the special characters and their replacement string. + # EXACT_TOKEN_TYPES contains these special characters + # token.tok_name contains a readable description of the replacement string. + special_characters_replacements = { + char: f"_{token.tok_name[tokval]}_" + for char, tokval in (tokenize.EXACT_TOKEN_TYPES.items()) + } + special_characters_replacements.update( + { + " ": "_", + "?": "_QUESTIONMARK_", + "!": "_EXCLAMATIONMARK_", + "$": "_DOLLARSIGN_", + "€": "_EUROSIGN_", + "°": "_DEGREESIGN_", + # Including quotes works, but there are exceptions. + "'": "_SINGLEQUOTE_", + '"': "_DOUBLEQUOTE_", + # Currently not possible. Terminates parser and won't find backtick. + # "#": "_HASH_", + } + ) + + name = "".join([special_characters_replacements.get(char, char) for char in name]) + name = "BACKTICK_QUOTED_STRING_" + name + + if not name.isidentifier(): + raise SyntaxError(f"Could not convert '{name}' to a valid Python identifier.") + + return name + + +def clean_backtick_quoted_toks(tok: tuple[int, str]) -> tuple[int, str]: + """ + Clean up a column name if surrounded by backticks. + + Backtick quoted string are indicated by a certain tokval value. If a string + is a backtick quoted token it will processed by + :func:`_create_valid_python_identifier` so that the parser can find this + string when the query is executed. + In this case the tok will get the NAME tokval. + + Parameters + ---------- + tok : tuple of int, str + ints correspond to the all caps constants in the tokenize module + + Returns + ------- + tok : Tuple[int, str] + Either the input or token or the replacement values + """ + toknum, tokval = tok + if toknum == BACKTICK_QUOTED_STRING: + return tokenize.NAME, create_valid_python_identifier(tokval) + return toknum, tokval + + +def clean_column_name(name: Hashable) -> Hashable: + """ + Function to emulate the cleaning of a backtick quoted name. + + The purpose for this function is to see what happens to the name of + identifier if it goes to the process of being parsed a Python code + inside a backtick quoted string and than being cleaned + (removed of any special characters). + + Parameters + ---------- + name : hashable + Name to be cleaned. + + Returns + ------- + name : hashable + Returns the name after tokenizing and cleaning. + + Notes + ----- + For some cases, a name cannot be converted to a valid Python identifier. + In that case :func:`tokenize_string` raises a SyntaxError. + In that case, we just return the name unmodified. + + If this name was used in the query string (this makes the query call impossible) + an error will be raised by :func:`tokenize_backtick_quoted_string` instead, + which is not caught and propagates to the user level. + """ + try: + tokenized = tokenize_string(f"`{name}`") + tokval = next(tokenized)[1] + return create_valid_python_identifier(tokval) + except SyntaxError: + return name + + +def tokenize_backtick_quoted_string( + token_generator: Iterator[tokenize.TokenInfo], source: str, string_start: int +) -> tuple[int, str]: + """ + Creates a token from a backtick quoted string. + + Moves the token_generator forwards till right after the next backtick. + + Parameters + ---------- + token_generator : Iterator[tokenize.TokenInfo] + The generator that yields the tokens of the source string (Tuple[int, str]). + The generator is at the first token after the backtick (`) + + source : str + The Python source code string. + + string_start : int + This is the start of backtick quoted string inside the source string. + + Returns + ------- + tok: Tuple[int, str] + The token that represents the backtick quoted string. + The integer is equal to BACKTICK_QUOTED_STRING (100). + """ + for _, tokval, start, _, _ in token_generator: + if tokval == "`": + string_end = start[1] + break + + return BACKTICK_QUOTED_STRING, source[string_start:string_end] + + +def tokenize_string(source: str) -> Iterator[tuple[int, str]]: + """ + Tokenize a Python source code string. + + Parameters + ---------- + source : str + The Python source code string. + + Returns + ------- + tok_generator : Iterator[Tuple[int, str]] + An iterator yielding all tokens with only toknum and tokval (Tuple[ing, str]). + """ + line_reader = StringIO(source).readline + token_generator = tokenize.generate_tokens(line_reader) + + # Loop over all tokens till a backtick (`) is found. + # Then, take all tokens till the next backtick to form a backtick quoted string + for toknum, tokval, start, _, _ in token_generator: + if tokval == "`": + try: + yield tokenize_backtick_quoted_string( + token_generator, source, string_start=start[1] + 1 + ) + except Exception as err: + raise SyntaxError(f"Failed to parse backticks in '{source}'.") from err + else: + yield toknum, tokval diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/pytables.py b/.local/lib/python3.8/site-packages/pandas/core/computation/pytables.py new file mode 100644 index 0000000..3e041c0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/computation/pytables.py @@ -0,0 +1,653 @@ +""" manage PyTables query interface via Expressions """ +from __future__ import annotations + +import ast +from functools import partial +from typing import Any + +import numpy as np + +from pandas._libs.tslibs import ( + Timedelta, + Timestamp, +) +from pandas._typing import npt +from pandas.compat.chainmap import DeepChainMap + +from pandas.core.dtypes.common import is_list_like + +import pandas.core.common as com +from pandas.core.computation import ( + expr, + ops, + scope as _scope, +) +from pandas.core.computation.common import ensure_decoded +from pandas.core.computation.expr import BaseExprVisitor +from pandas.core.computation.ops import ( + UndefinedVariableError, + is_term, +) +from pandas.core.construction import extract_array +from pandas.core.indexes.base import Index + +from pandas.io.formats.printing import ( + pprint_thing, + pprint_thing_encoded, +) + + +class PyTablesScope(_scope.Scope): + __slots__ = ("queryables",) + + queryables: dict[str, Any] + + def __init__( + self, + level: int, + global_dict=None, + local_dict=None, + queryables: dict[str, Any] | None = None, + ): + super().__init__(level + 1, global_dict=global_dict, local_dict=local_dict) + self.queryables = queryables or {} + + +class Term(ops.Term): + env: PyTablesScope + + def __new__(cls, name, env, side=None, encoding=None): + if isinstance(name, str): + klass = cls + else: + klass = Constant + return object.__new__(klass) + + def __init__(self, name, env: PyTablesScope, side=None, encoding=None): + super().__init__(name, env, side=side, encoding=encoding) + + def _resolve_name(self): + # must be a queryables + if self.side == "left": + # Note: The behavior of __new__ ensures that self.name is a str here + if self.name not in self.env.queryables: + raise NameError(f"name {repr(self.name)} is not defined") + return self.name + + # resolve the rhs (and allow it to be None) + try: + return self.env.resolve(self.name, is_local=False) + except UndefinedVariableError: + return self.name + + # read-only property overwriting read/write property + @property # type: ignore[misc] + def value(self): + return self._value + + +class Constant(Term): + def __init__(self, value, env: PyTablesScope, side=None, encoding=None): + assert isinstance(env, PyTablesScope), type(env) + super().__init__(value, env, side=side, encoding=encoding) + + def _resolve_name(self): + return self._name + + +class BinOp(ops.BinOp): + + _max_selectors = 31 + + op: str + queryables: dict[str, Any] + condition: str | None + + def __init__(self, op: str, lhs, rhs, queryables: dict[str, Any], encoding): + super().__init__(op, lhs, rhs) + self.queryables = queryables + self.encoding = encoding + self.condition = None + + def _disallow_scalar_only_bool_ops(self): + pass + + def prune(self, klass): + def pr(left, right): + """create and return a new specialized BinOp from myself""" + if left is None: + return right + elif right is None: + return left + + k = klass + if isinstance(left, ConditionBinOp): + if isinstance(right, ConditionBinOp): + k = JointConditionBinOp + elif isinstance(left, k): + return left + elif isinstance(right, k): + return right + + elif isinstance(left, FilterBinOp): + if isinstance(right, FilterBinOp): + k = JointFilterBinOp + elif isinstance(left, k): + return left + elif isinstance(right, k): + return right + + return k( + self.op, left, right, queryables=self.queryables, encoding=self.encoding + ).evaluate() + + left, right = self.lhs, self.rhs + + if is_term(left) and is_term(right): + res = pr(left.value, right.value) + elif not is_term(left) and is_term(right): + res = pr(left.prune(klass), right.value) + elif is_term(left) and not is_term(right): + res = pr(left.value, right.prune(klass)) + elif not (is_term(left) or is_term(right)): + res = pr(left.prune(klass), right.prune(klass)) + + return res + + def conform(self, rhs): + """inplace conform rhs""" + if not is_list_like(rhs): + rhs = [rhs] + if isinstance(rhs, np.ndarray): + rhs = rhs.ravel() + return rhs + + @property + def is_valid(self) -> bool: + """return True if this is a valid field""" + return self.lhs in self.queryables + + @property + def is_in_table(self) -> bool: + """ + return True if this is a valid column name for generation (e.g. an + actual column in the table) + """ + return self.queryables.get(self.lhs) is not None + + @property + def kind(self): + """the kind of my field""" + return getattr(self.queryables.get(self.lhs), "kind", None) + + @property + def meta(self): + """the meta of my field""" + return getattr(self.queryables.get(self.lhs), "meta", None) + + @property + def metadata(self): + """the metadata of my field""" + return getattr(self.queryables.get(self.lhs), "metadata", None) + + def generate(self, v) -> str: + """create and return the op string for this TermValue""" + val = v.tostring(self.encoding) + return f"({self.lhs} {self.op} {val})" + + def convert_value(self, v) -> TermValue: + """ + convert the expression that is in the term to something that is + accepted by pytables + """ + + def stringify(value): + if self.encoding is not None: + return pprint_thing_encoded(value, encoding=self.encoding) + return pprint_thing(value) + + kind = ensure_decoded(self.kind) + meta = ensure_decoded(self.meta) + if kind == "datetime64" or kind == "datetime": + if isinstance(v, (int, float)): + v = stringify(v) + v = ensure_decoded(v) + v = Timestamp(v) + if v.tz is not None: + v = v.tz_convert("UTC") + return TermValue(v, v.value, kind) + elif kind == "timedelta64" or kind == "timedelta": + if isinstance(v, str): + v = Timedelta(v).value + else: + v = Timedelta(v, unit="s").value + return TermValue(int(v), v, kind) + elif meta == "category": + metadata = extract_array(self.metadata, extract_numpy=True) + result: npt.NDArray[np.intp] | np.intp | int + if v not in metadata: + result = -1 + else: + result = metadata.searchsorted(v, side="left") + return TermValue(result, result, "integer") + elif kind == "integer": + v = int(float(v)) + return TermValue(v, v, kind) + elif kind == "float": + v = float(v) + return TermValue(v, v, kind) + elif kind == "bool": + if isinstance(v, str): + v = not v.strip().lower() in [ + "false", + "f", + "no", + "n", + "none", + "0", + "[]", + "{}", + "", + ] + else: + v = bool(v) + return TermValue(v, v, kind) + elif isinstance(v, str): + # string quoting + return TermValue(v, stringify(v), "string") + else: + raise TypeError(f"Cannot compare {v} of type {type(v)} to {kind} column") + + def convert_values(self): + pass + + +class FilterBinOp(BinOp): + filter: tuple[Any, Any, Index] | None = None + + def __repr__(self) -> str: + if self.filter is None: + return "Filter: Not Initialized" + return pprint_thing(f"[Filter : [{self.filter[0]}] -> [{self.filter[1]}]") + + def invert(self): + """invert the filter""" + if self.filter is not None: + self.filter = ( + self.filter[0], + self.generate_filter_op(invert=True), + self.filter[2], + ) + return self + + def format(self): + """return the actual filter format""" + return [self.filter] + + def evaluate(self): + + if not self.is_valid: + raise ValueError(f"query term is not valid [{self}]") + + rhs = self.conform(self.rhs) + values = list(rhs) + + if self.is_in_table: + + # if too many values to create the expression, use a filter instead + if self.op in ["==", "!="] and len(values) > self._max_selectors: + + filter_op = self.generate_filter_op() + self.filter = (self.lhs, filter_op, Index(values)) + + return self + return None + + # equality conditions + if self.op in ["==", "!="]: + + filter_op = self.generate_filter_op() + self.filter = (self.lhs, filter_op, Index(values)) + + else: + raise TypeError( + f"passing a filterable condition to a non-table indexer [{self}]" + ) + + return self + + def generate_filter_op(self, invert: bool = False): + if (self.op == "!=" and not invert) or (self.op == "==" and invert): + return lambda axis, vals: ~axis.isin(vals) + else: + return lambda axis, vals: axis.isin(vals) + + +class JointFilterBinOp(FilterBinOp): + def format(self): + raise NotImplementedError("unable to collapse Joint Filters") + + def evaluate(self): + return self + + +class ConditionBinOp(BinOp): + def __repr__(self) -> str: + return pprint_thing(f"[Condition : [{self.condition}]]") + + def invert(self): + """invert the condition""" + # if self.condition is not None: + # self.condition = "~(%s)" % self.condition + # return self + raise NotImplementedError( + "cannot use an invert condition when passing to numexpr" + ) + + def format(self): + """return the actual ne format""" + return self.condition + + def evaluate(self): + + if not self.is_valid: + raise ValueError(f"query term is not valid [{self}]") + + # convert values if we are in the table + if not self.is_in_table: + return None + + rhs = self.conform(self.rhs) + values = [self.convert_value(v) for v in rhs] + + # equality conditions + if self.op in ["==", "!="]: + + # too many values to create the expression? + if len(values) <= self._max_selectors: + vs = [self.generate(v) for v in values] + self.condition = f"({' | '.join(vs)})" + + # use a filter after reading + else: + return None + else: + self.condition = self.generate(values[0]) + + return self + + +class JointConditionBinOp(ConditionBinOp): + def evaluate(self): + self.condition = f"({self.lhs.condition} {self.op} {self.rhs.condition})" + return self + + +class UnaryOp(ops.UnaryOp): + def prune(self, klass): + + if self.op != "~": + raise NotImplementedError("UnaryOp only support invert type ops") + + operand = self.operand + operand = operand.prune(klass) + + if operand is not None and ( + issubclass(klass, ConditionBinOp) + and operand.condition is not None + or not issubclass(klass, ConditionBinOp) + and issubclass(klass, FilterBinOp) + and operand.filter is not None + ): + return operand.invert() + return None + + +class PyTablesExprVisitor(BaseExprVisitor): + const_type = Constant + term_type = Term + + def __init__(self, env, engine, parser, **kwargs): + super().__init__(env, engine, parser) + for bin_op in self.binary_ops: + bin_node = self.binary_op_nodes_map[bin_op] + setattr( + self, + f"visit_{bin_node}", + lambda node, bin_op=bin_op: partial(BinOp, bin_op, **kwargs), + ) + + def visit_UnaryOp(self, node, **kwargs): + if isinstance(node.op, (ast.Not, ast.Invert)): + return UnaryOp("~", self.visit(node.operand)) + elif isinstance(node.op, ast.USub): + return self.const_type(-self.visit(node.operand).value, self.env) + elif isinstance(node.op, ast.UAdd): + raise NotImplementedError("Unary addition not supported") + + def visit_Index(self, node, **kwargs): + return self.visit(node.value).value + + def visit_Assign(self, node, **kwargs): + cmpr = ast.Compare( + ops=[ast.Eq()], left=node.targets[0], comparators=[node.value] + ) + return self.visit(cmpr) + + def visit_Subscript(self, node, **kwargs): + # only allow simple subscripts + + value = self.visit(node.value) + slobj = self.visit(node.slice) + try: + value = value.value + except AttributeError: + pass + + if isinstance(slobj, Term): + # In py39 np.ndarray lookups with Term containing int raise + slobj = slobj.value + + try: + return self.const_type(value[slobj], self.env) + except TypeError as err: + raise ValueError( + f"cannot subscript {repr(value)} with {repr(slobj)}" + ) from err + + def visit_Attribute(self, node, **kwargs): + attr = node.attr + value = node.value + + ctx = type(node.ctx) + if ctx == ast.Load: + # resolve the value + resolved = self.visit(value) + + # try to get the value to see if we are another expression + try: + resolved = resolved.value + except (AttributeError): + pass + + try: + return self.term_type(getattr(resolved, attr), self.env) + except AttributeError: + + # something like datetime.datetime where scope is overridden + if isinstance(value, ast.Name) and value.id == attr: + return resolved + + raise ValueError(f"Invalid Attribute context {ctx.__name__}") + + def translate_In(self, op): + return ast.Eq() if isinstance(op, ast.In) else op + + def _rewrite_membership_op(self, node, left, right): + return self.visit(node.op), node.op, left, right + + +def _validate_where(w): + """ + Validate that the where statement is of the right type. + + The type may either be String, Expr, or list-like of Exprs. + + Parameters + ---------- + w : String term expression, Expr, or list-like of Exprs. + + Returns + ------- + where : The original where clause if the check was successful. + + Raises + ------ + TypeError : An invalid data type was passed in for w (e.g. dict). + """ + if not (isinstance(w, (PyTablesExpr, str)) or is_list_like(w)): + raise TypeError( + "where must be passed as a string, PyTablesExpr, " + "or list-like of PyTablesExpr" + ) + + return w + + +class PyTablesExpr(expr.Expr): + """ + Hold a pytables-like expression, comprised of possibly multiple 'terms'. + + Parameters + ---------- + where : string term expression, PyTablesExpr, or list-like of PyTablesExprs + queryables : a "kinds" map (dict of column name -> kind), or None if column + is non-indexable + encoding : an encoding that will encode the query terms + + Returns + ------- + a PyTablesExpr object + + Examples + -------- + 'index>=date' + "columns=['A', 'D']" + 'columns=A' + 'columns==A' + "~(columns=['A','B'])" + 'index>df.index[3] & string="bar"' + '(index>df.index[3] & index<=df.index[6]) | string="bar"' + "ts>=Timestamp('2012-02-01')" + "major_axis>=20130101" + """ + + _visitor: PyTablesExprVisitor | None + env: PyTablesScope + expr: str + + def __init__( + self, + where, + queryables: dict[str, Any] | None = None, + encoding=None, + scope_level: int = 0, + ): + + where = _validate_where(where) + + self.encoding = encoding + self.condition = None + self.filter = None + self.terms = None + self._visitor = None + + # capture the environment if needed + local_dict: DeepChainMap[Any, Any] = DeepChainMap() + + if isinstance(where, PyTablesExpr): + local_dict = where.env.scope + _where = where.expr + + elif is_list_like(where): + where = list(where) + for idx, w in enumerate(where): + if isinstance(w, PyTablesExpr): + local_dict = w.env.scope + else: + w = _validate_where(w) + where[idx] = w + _where = " & ".join([f"({w})" for w in com.flatten(where)]) + else: + # _validate_where ensures we otherwise have a string + _where = where + + self.expr = _where + self.env = PyTablesScope(scope_level + 1, local_dict=local_dict) + + if queryables is not None and isinstance(self.expr, str): + self.env.queryables.update(queryables) + self._visitor = PyTablesExprVisitor( + self.env, + queryables=queryables, + parser="pytables", + engine="pytables", + encoding=encoding, + ) + self.terms = self.parse() + + def __repr__(self) -> str: + if self.terms is not None: + return pprint_thing(self.terms) + return pprint_thing(self.expr) + + def evaluate(self): + """create and return the numexpr condition and filter""" + try: + self.condition = self.terms.prune(ConditionBinOp) + except AttributeError as err: + raise ValueError( + f"cannot process expression [{self.expr}], [{self}] " + "is not a valid condition" + ) from err + try: + self.filter = self.terms.prune(FilterBinOp) + except AttributeError as err: + raise ValueError( + f"cannot process expression [{self.expr}], [{self}] " + "is not a valid filter" + ) from err + + return self.condition, self.filter + + +class TermValue: + """hold a term value the we use to construct a condition/filter""" + + def __init__(self, value, converted, kind: str): + assert isinstance(kind, str), kind + self.value = value + self.converted = converted + self.kind = kind + + def tostring(self, encoding) -> str: + """quote the string if not encoded else encode and return""" + if self.kind == "string": + if encoding is not None: + return str(self.converted) + return f'"{self.converted}"' + elif self.kind == "float": + # python 2 str(float) is not always + # round-trippable so use repr() + return repr(self.converted) + return str(self.converted) + + +def maybe_expression(s) -> bool: + """loose checking if s is a pytables-acceptable expression""" + if not isinstance(s, str): + return False + ops = PyTablesExprVisitor.binary_ops + PyTablesExprVisitor.unary_ops + ("=",) + + # make sure we have an op at least + return any(op in s for op in ops) diff --git a/.local/lib/python3.8/site-packages/pandas/core/computation/scope.py b/.local/lib/python3.8/site-packages/pandas/core/computation/scope.py new file mode 100644 index 0000000..a561824 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/computation/scope.py @@ -0,0 +1,330 @@ +""" +Module for scope operations +""" +from __future__ import annotations + +import datetime +import inspect +from io import StringIO +import itertools +import pprint +import struct +import sys + +import numpy as np + +from pandas._libs.tslibs import Timestamp +from pandas.compat.chainmap import DeepChainMap + + +def ensure_scope( + level: int, global_dict=None, local_dict=None, resolvers=(), target=None, **kwargs +) -> Scope: + """Ensure that we are grabbing the correct scope.""" + return Scope( + level + 1, + global_dict=global_dict, + local_dict=local_dict, + resolvers=resolvers, + target=target, + ) + + +def _replacer(x) -> str: + """ + Replace a number with its hexadecimal representation. Used to tag + temporary variables with their calling scope's id. + """ + # get the hex repr of the binary char and remove 0x and pad by pad_size + # zeros + try: + hexin = ord(x) + except TypeError: + # bytes literals masquerade as ints when iterating in py3 + hexin = x + + return hex(hexin) + + +def _raw_hex_id(obj) -> str: + """Return the padded hexadecimal id of ``obj``.""" + # interpret as a pointer since that's what really what id returns + packed = struct.pack("@P", id(obj)) + return "".join([_replacer(x) for x in packed]) + + +DEFAULT_GLOBALS = { + "Timestamp": Timestamp, + "datetime": datetime.datetime, + "True": True, + "False": False, + "list": list, + "tuple": tuple, + "inf": np.inf, + "Inf": np.inf, +} + + +def _get_pretty_string(obj) -> str: + """ + Return a prettier version of obj. + + Parameters + ---------- + obj : object + Object to pretty print + + Returns + ------- + str + Pretty print object repr + """ + sio = StringIO() + pprint.pprint(obj, stream=sio) + return sio.getvalue() + + +class Scope: + """ + Object to hold scope, with a few bells to deal with some custom syntax + and contexts added by pandas. + + Parameters + ---------- + level : int + global_dict : dict or None, optional, default None + local_dict : dict or Scope or None, optional, default None + resolvers : list-like or None, optional, default None + target : object + + Attributes + ---------- + level : int + scope : DeepChainMap + target : object + temps : dict + """ + + __slots__ = ["level", "scope", "target", "resolvers", "temps"] + level: int + scope: DeepChainMap + resolvers: DeepChainMap + temps: dict + + def __init__( + self, level: int, global_dict=None, local_dict=None, resolvers=(), target=None + ): + self.level = level + 1 + + # shallow copy because we don't want to keep filling this up with what + # was there before if there are multiple calls to Scope/_ensure_scope + self.scope = DeepChainMap(DEFAULT_GLOBALS.copy()) + self.target = target + + if isinstance(local_dict, Scope): + self.scope.update(local_dict.scope) + if local_dict.target is not None: + self.target = local_dict.target + self._update(local_dict.level) + + frame = sys._getframe(self.level) + + try: + # shallow copy here because we don't want to replace what's in + # scope when we align terms (alignment accesses the underlying + # numpy array of pandas objects) + scope_global = self.scope.new_child((global_dict or frame.f_globals).copy()) + self.scope = DeepChainMap(scope_global) + if not isinstance(local_dict, Scope): + scope_local = self.scope.new_child( + (local_dict or frame.f_locals).copy() + ) + self.scope = DeepChainMap(scope_local) + finally: + del frame + + # assumes that resolvers are going from outermost scope to inner + if isinstance(local_dict, Scope): + resolvers += tuple(local_dict.resolvers.maps) + self.resolvers = DeepChainMap(*resolvers) + self.temps = {} + + def __repr__(self) -> str: + scope_keys = _get_pretty_string(list(self.scope.keys())) + res_keys = _get_pretty_string(list(self.resolvers.keys())) + return f"{type(self).__name__}(scope={scope_keys}, resolvers={res_keys})" + + @property + def has_resolvers(self) -> bool: + """ + Return whether we have any extra scope. + + For example, DataFrames pass Their columns as resolvers during calls to + ``DataFrame.eval()`` and ``DataFrame.query()``. + + Returns + ------- + hr : bool + """ + return bool(len(self.resolvers)) + + def resolve(self, key: str, is_local: bool): + """ + Resolve a variable name in a possibly local context. + + Parameters + ---------- + key : str + A variable name + is_local : bool + Flag indicating whether the variable is local or not (prefixed with + the '@' symbol) + + Returns + ------- + value : object + The value of a particular variable + """ + try: + # only look for locals in outer scope + if is_local: + return self.scope[key] + + # not a local variable so check in resolvers if we have them + if self.has_resolvers: + return self.resolvers[key] + + # if we're here that means that we have no locals and we also have + # no resolvers + assert not is_local and not self.has_resolvers + return self.scope[key] + except KeyError: + try: + # last ditch effort we look in temporaries + # these are created when parsing indexing expressions + # e.g., df[df > 0] + return self.temps[key] + except KeyError as err: + # runtime import because ops imports from scope + from pandas.core.computation.ops import UndefinedVariableError + + raise UndefinedVariableError(key, is_local) from err + + def swapkey(self, old_key: str, new_key: str, new_value=None) -> None: + """ + Replace a variable name, with a potentially new value. + + Parameters + ---------- + old_key : str + Current variable name to replace + new_key : str + New variable name to replace `old_key` with + new_value : object + Value to be replaced along with the possible renaming + """ + if self.has_resolvers: + maps = self.resolvers.maps + self.scope.maps + else: + maps = self.scope.maps + + maps.append(self.temps) + + for mapping in maps: + if old_key in mapping: + mapping[new_key] = new_value + return + + def _get_vars(self, stack, scopes: list[str]) -> None: + """ + Get specifically scoped variables from a list of stack frames. + + Parameters + ---------- + stack : list + A list of stack frames as returned by ``inspect.stack()`` + scopes : sequence of strings + A sequence containing valid stack frame attribute names that + evaluate to a dictionary. For example, ('locals', 'globals') + """ + variables = itertools.product(scopes, stack) + for scope, (frame, _, _, _, _, _) in variables: + try: + d = getattr(frame, "f_" + scope) + self.scope = DeepChainMap(self.scope.new_child(d)) + finally: + # won't remove it, but DECREF it + # in Py3 this probably isn't necessary since frame won't be + # scope after the loop + del frame + + def _update(self, level: int) -> None: + """ + Update the current scope by going back `level` levels. + + Parameters + ---------- + level : int + """ + sl = level + 1 + + # add sl frames to the scope starting with the + # most distant and overwriting with more current + # makes sure that we can capture variable scope + stack = inspect.stack() + + try: + self._get_vars(stack[:sl], scopes=["locals"]) + finally: + del stack[:], stack + + def add_tmp(self, value) -> str: + """ + Add a temporary variable to the scope. + + Parameters + ---------- + value : object + An arbitrary object to be assigned to a temporary variable. + + Returns + ------- + str + The name of the temporary variable created. + """ + name = f"{type(value).__name__}_{self.ntemps}_{_raw_hex_id(self)}" + + # add to inner most scope + assert name not in self.temps + self.temps[name] = value + assert name in self.temps + + # only increment if the variable gets put in the scope + return name + + @property + def ntemps(self) -> int: + """The number of temporary variables in this scope""" + return len(self.temps) + + @property + def full_scope(self) -> DeepChainMap: + """ + Return the full scope for use with passing to engines transparently + as a mapping. + + Returns + ------- + vars : DeepChainMap + All variables in this scope. + """ + # error: Unsupported operand types for + ("List[Dict[Any, Any]]" and + # "List[Mapping[Any, Any]]") + # error: Unsupported operand types for + ("List[Dict[Any, Any]]" and + # "List[Mapping[str, Any]]") + maps = ( + [self.temps] + + self.resolvers.maps # type: ignore[operator] + + self.scope.maps # type: ignore[operator] + ) + return DeepChainMap(*maps) diff --git a/.local/lib/python3.8/site-packages/pandas/core/config_init.py b/.local/lib/python3.8/site-packages/pandas/core/config_init.py new file mode 100644 index 0000000..bf2d770 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/config_init.py @@ -0,0 +1,959 @@ +""" +This module is imported from the pandas package __init__.py file +in order to ensure that the core.config options registered here will +be available as soon as the user loads the package. if register_option +is invoked inside specific modules, they will not be registered until that +module is imported, which may or may not be a problem. + +If you need to make sure options are available even before a certain +module is imported, register them here rather than in the module. + +""" +import os +from typing import Callable +import warnings + +import pandas._config.config as cf +from pandas._config.config import ( + is_bool, + is_callable, + is_instance_factory, + is_int, + is_nonnegative_int, + is_one_of_factory, + is_str, + is_text, +) + +from pandas.util._exceptions import find_stack_level + +# compute + +use_bottleneck_doc = """ +: bool + Use the bottleneck library to accelerate if it is installed, + the default is True + Valid values: False,True +""" + + +def use_bottleneck_cb(key): + from pandas.core import nanops + + nanops.set_use_bottleneck(cf.get_option(key)) + + +use_numexpr_doc = """ +: bool + Use the numexpr library to accelerate computation if it is installed, + the default is True + Valid values: False,True +""" + + +def use_numexpr_cb(key): + from pandas.core.computation import expressions + + expressions.set_use_numexpr(cf.get_option(key)) + + +use_numba_doc = """ +: bool + Use the numba engine option for select operations if it is installed, + the default is False + Valid values: False,True +""" + + +def use_numba_cb(key): + from pandas.core.util import numba_ + + numba_.set_use_numba(cf.get_option(key)) + + +with cf.config_prefix("compute"): + cf.register_option( + "use_bottleneck", + True, + use_bottleneck_doc, + validator=is_bool, + cb=use_bottleneck_cb, + ) + cf.register_option( + "use_numexpr", True, use_numexpr_doc, validator=is_bool, cb=use_numexpr_cb + ) + cf.register_option( + "use_numba", False, use_numba_doc, validator=is_bool, cb=use_numba_cb + ) +# +# options from the "display" namespace + +pc_precision_doc = """ +: int + Floating point output precision in terms of number of places after the + decimal, for regular formatting as well as scientific notation. Similar + to ``precision`` in :meth:`numpy.set_printoptions`. +""" + +pc_colspace_doc = """ +: int + Default space for DataFrame columns. +""" + +pc_max_rows_doc = """ +: int + If max_rows is exceeded, switch to truncate view. Depending on + `large_repr`, objects are either centrally truncated or printed as + a summary view. 'None' value means unlimited. + + In case python/IPython is running in a terminal and `large_repr` + equals 'truncate' this can be set to 0 and pandas will auto-detect + the height of the terminal and print a truncated object which fits + the screen height. The IPython notebook, IPython qtconsole, or + IDLE do not run in a terminal and hence it is not possible to do + correct auto-detection. +""" + +pc_min_rows_doc = """ +: int + The numbers of rows to show in a truncated view (when `max_rows` is + exceeded). Ignored when `max_rows` is set to None or 0. When set to + None, follows the value of `max_rows`. +""" + +pc_max_cols_doc = """ +: int + If max_cols is exceeded, switch to truncate view. Depending on + `large_repr`, objects are either centrally truncated or printed as + a summary view. 'None' value means unlimited. + + In case python/IPython is running in a terminal and `large_repr` + equals 'truncate' this can be set to 0 and pandas will auto-detect + the width of the terminal and print a truncated object which fits + the screen width. The IPython notebook, IPython qtconsole, or IDLE + do not run in a terminal and hence it is not possible to do + correct auto-detection. +""" + +pc_max_categories_doc = """ +: int + This sets the maximum number of categories pandas should output when + printing out a `Categorical` or a Series of dtype "category". +""" + +pc_max_info_cols_doc = """ +: int + max_info_columns is used in DataFrame.info method to decide if + per column information will be printed. +""" + +pc_nb_repr_h_doc = """ +: boolean + When True, IPython notebook will use html representation for + pandas objects (if it is available). +""" + +pc_pprint_nest_depth = """ +: int + Controls the number of nested levels to process when pretty-printing +""" + +pc_multi_sparse_doc = """ +: boolean + "sparsify" MultiIndex display (don't display repeated + elements in outer levels within groups) +""" + +float_format_doc = """ +: callable + The callable should accept a floating point number and return + a string with the desired format of the number. This is used + in some places like SeriesFormatter. + See formats.format.EngFormatter for an example. +""" + +max_colwidth_doc = """ +: int or None + The maximum width in characters of a column in the repr of + a pandas data structure. When the column overflows, a "..." + placeholder is embedded in the output. A 'None' value means unlimited. +""" + +colheader_justify_doc = """ +: 'left'/'right' + Controls the justification of column headers. used by DataFrameFormatter. +""" + +pc_expand_repr_doc = """ +: boolean + Whether to print out the full DataFrame repr for wide DataFrames across + multiple lines, `max_columns` is still respected, but the output will + wrap-around across multiple "pages" if its width exceeds `display.width`. +""" + +pc_show_dimensions_doc = """ +: boolean or 'truncate' + Whether to print out dimensions at the end of DataFrame repr. + If 'truncate' is specified, only print out the dimensions if the + frame is truncated (e.g. not display all rows and/or columns) +""" + +pc_east_asian_width_doc = """ +: boolean + Whether to use the Unicode East Asian Width to calculate the display text + width. + Enabling this may affect to the performance (default: False) +""" + +pc_ambiguous_as_wide_doc = """ +: boolean + Whether to handle Unicode characters belong to Ambiguous as Wide (width=2) + (default: False) +""" + +pc_latex_repr_doc = """ +: boolean + Whether to produce a latex DataFrame representation for jupyter + environments that support it. + (default: False) +""" + +pc_table_schema_doc = """ +: boolean + Whether to publish a Table Schema representation for frontends + that support it. + (default: False) +""" + +pc_html_border_doc = """ +: int + A ``border=value`` attribute is inserted in the ```` tag + for the DataFrame HTML repr. +""" + +pc_html_use_mathjax_doc = """\ +: boolean + When True, Jupyter notebook will process table contents using MathJax, + rendering mathematical expressions enclosed by the dollar symbol. + (default: True) +""" + +pc_max_dir_items = """\ +: int + The number of items that will be added to `dir(...)`. 'None' value means + unlimited. Because dir is cached, changing this option will not immediately + affect already existing dataframes until a column is deleted or added. + + This is for instance used to suggest columns from a dataframe to tab + completion. +""" + +pc_width_doc = """ +: int + Width of the display in characters. In case python/IPython is running in + a terminal this can be set to None and pandas will correctly auto-detect + the width. + Note that the IPython notebook, IPython qtconsole, or IDLE do not run in a + terminal and hence it is not possible to correctly detect the width. +""" + +pc_chop_threshold_doc = """ +: float or None + if set to a float value, all float values smaller then the given threshold + will be displayed as exactly 0 by repr and friends. +""" + +pc_max_seq_items = """ +: int or None + When pretty-printing a long sequence, no more then `max_seq_items` + will be printed. If items are omitted, they will be denoted by the + addition of "..." to the resulting string. + + If set to None, the number of items to be printed is unlimited. +""" + +pc_max_info_rows_doc = """ +: int or None + df.info() will usually show null-counts for each column. + For large frames this can be quite slow. max_info_rows and max_info_cols + limit this null check only to frames with smaller dimensions than + specified. +""" + +pc_large_repr_doc = """ +: 'truncate'/'info' + For DataFrames exceeding max_rows/max_cols, the repr (and HTML repr) can + show a truncated table (the default from 0.13), or switch to the view from + df.info() (the behaviour in earlier versions of pandas). +""" + +pc_memory_usage_doc = """ +: bool, string or None + This specifies if the memory usage of a DataFrame should be displayed when + df.info() is called. Valid values True,False,'deep' +""" + +pc_latex_escape = """ +: bool + This specifies if the to_latex method of a Dataframe uses escapes special + characters. + Valid values: False,True +""" + +pc_latex_longtable = """ +:bool + This specifies if the to_latex method of a Dataframe uses the longtable + format. + Valid values: False,True +""" + +pc_latex_multicolumn = """ +: bool + This specifies if the to_latex method of a Dataframe uses multicolumns + to pretty-print MultiIndex columns. + Valid values: False,True +""" + +pc_latex_multicolumn_format = """ +: string + This specifies the format for multicolumn headers. + Can be surrounded with '|'. + Valid values: 'l', 'c', 'r', 'p{}' +""" + +pc_latex_multirow = """ +: bool + This specifies if the to_latex method of a Dataframe uses multirows + to pretty-print MultiIndex rows. + Valid values: False,True +""" + + +def table_schema_cb(key): + from pandas.io.formats.printing import enable_data_resource_formatter + + enable_data_resource_formatter(cf.get_option(key)) + + +def is_terminal() -> bool: + """ + Detect if Python is running in a terminal. + + Returns True if Python is running in a terminal or False if not. + """ + try: + # error: Name 'get_ipython' is not defined + ip = get_ipython() # type: ignore[name-defined] + except NameError: # assume standard Python interpreter in a terminal + return True + else: + if hasattr(ip, "kernel"): # IPython as a Jupyter kernel + return False + else: # IPython in a terminal + return True + + +with cf.config_prefix("display"): + cf.register_option("precision", 6, pc_precision_doc, validator=is_nonnegative_int) + cf.register_option( + "float_format", + None, + float_format_doc, + validator=is_one_of_factory([None, is_callable]), + ) + cf.register_option("column_space", 12, validator=is_int) + cf.register_option( + "max_info_rows", + 1690785, + pc_max_info_rows_doc, + validator=is_instance_factory((int, type(None))), + ) + cf.register_option("max_rows", 60, pc_max_rows_doc, validator=is_nonnegative_int) + cf.register_option( + "min_rows", + 10, + pc_min_rows_doc, + validator=is_instance_factory([type(None), int]), + ) + cf.register_option("max_categories", 8, pc_max_categories_doc, validator=is_int) + + def _deprecate_negative_int_max_colwidth(key): + value = cf.get_option(key) + if value is not None and value < 0: + warnings.warn( + "Passing a negative integer is deprecated in version 1.0 and " + "will not be supported in future version. Instead, use None " + "to not limit the column width.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + cf.register_option( + # TODO(2.0): change `validator=is_nonnegative_int` see GH#31569 + "max_colwidth", + 50, + max_colwidth_doc, + validator=is_instance_factory([type(None), int]), + cb=_deprecate_negative_int_max_colwidth, + ) + if is_terminal(): + max_cols = 0 # automatically determine optimal number of columns + else: + max_cols = 20 # cannot determine optimal number of columns + cf.register_option( + "max_columns", max_cols, pc_max_cols_doc, validator=is_nonnegative_int + ) + cf.register_option( + "large_repr", + "truncate", + pc_large_repr_doc, + validator=is_one_of_factory(["truncate", "info"]), + ) + cf.register_option("max_info_columns", 100, pc_max_info_cols_doc, validator=is_int) + cf.register_option( + "colheader_justify", "right", colheader_justify_doc, validator=is_text + ) + cf.register_option("notebook_repr_html", True, pc_nb_repr_h_doc, validator=is_bool) + cf.register_option("pprint_nest_depth", 3, pc_pprint_nest_depth, validator=is_int) + cf.register_option("multi_sparse", True, pc_multi_sparse_doc, validator=is_bool) + cf.register_option("expand_frame_repr", True, pc_expand_repr_doc) + cf.register_option( + "show_dimensions", + "truncate", + pc_show_dimensions_doc, + validator=is_one_of_factory([True, False, "truncate"]), + ) + cf.register_option("chop_threshold", None, pc_chop_threshold_doc) + cf.register_option("max_seq_items", 100, pc_max_seq_items) + cf.register_option( + "width", 80, pc_width_doc, validator=is_instance_factory([type(None), int]) + ) + cf.register_option( + "memory_usage", + True, + pc_memory_usage_doc, + validator=is_one_of_factory([None, True, False, "deep"]), + ) + cf.register_option( + "unicode.east_asian_width", False, pc_east_asian_width_doc, validator=is_bool + ) + cf.register_option( + "unicode.ambiguous_as_wide", False, pc_east_asian_width_doc, validator=is_bool + ) + cf.register_option("latex.repr", False, pc_latex_repr_doc, validator=is_bool) + cf.register_option("latex.escape", True, pc_latex_escape, validator=is_bool) + cf.register_option("latex.longtable", False, pc_latex_longtable, validator=is_bool) + cf.register_option( + "latex.multicolumn", True, pc_latex_multicolumn, validator=is_bool + ) + cf.register_option( + "latex.multicolumn_format", "l", pc_latex_multicolumn, validator=is_text + ) + cf.register_option("latex.multirow", False, pc_latex_multirow, validator=is_bool) + cf.register_option( + "html.table_schema", + False, + pc_table_schema_doc, + validator=is_bool, + cb=table_schema_cb, + ) + cf.register_option("html.border", 1, pc_html_border_doc, validator=is_int) + cf.register_option( + "html.use_mathjax", True, pc_html_use_mathjax_doc, validator=is_bool + ) + cf.register_option( + "max_dir_items", 100, pc_max_dir_items, validator=is_nonnegative_int + ) + +tc_sim_interactive_doc = """ +: boolean + Whether to simulate interactive mode for purposes of testing +""" + +with cf.config_prefix("mode"): + cf.register_option("sim_interactive", False, tc_sim_interactive_doc) + +use_inf_as_null_doc = """ +: boolean + use_inf_as_null had been deprecated and will be removed in a future + version. Use `use_inf_as_na` instead. +""" + +use_inf_as_na_doc = """ +: boolean + True means treat None, NaN, INF, -INF as NA (old way), + False means None and NaN are null, but INF, -INF are not NA + (new way). +""" + +# We don't want to start importing everything at the global context level +# or we'll hit circular deps. + + +def use_inf_as_na_cb(key): + from pandas.core.dtypes.missing import _use_inf_as_na + + _use_inf_as_na(key) + + +with cf.config_prefix("mode"): + cf.register_option("use_inf_as_na", False, use_inf_as_na_doc, cb=use_inf_as_na_cb) + cf.register_option( + "use_inf_as_null", False, use_inf_as_null_doc, cb=use_inf_as_na_cb + ) + + +cf.deprecate_option( + "mode.use_inf_as_null", msg=use_inf_as_null_doc, rkey="mode.use_inf_as_na" +) + + +data_manager_doc = """ +: string + Internal data manager type; can be "block" or "array". Defaults to "block", + unless overridden by the 'PANDAS_DATA_MANAGER' environment variable (needs + to be set before pandas is imported). +""" + + +with cf.config_prefix("mode"): + cf.register_option( + "data_manager", + # Get the default from an environment variable, if set, otherwise defaults + # to "block". This environment variable can be set for testing. + os.environ.get("PANDAS_DATA_MANAGER", "block"), + data_manager_doc, + validator=is_one_of_factory(["block", "array"]), + ) + + +# user warnings +chained_assignment = """ +: string + Raise an exception, warn, or no action if trying to use chained assignment, + The default is warn +""" + +with cf.config_prefix("mode"): + cf.register_option( + "chained_assignment", + "warn", + chained_assignment, + validator=is_one_of_factory([None, "warn", "raise"]), + ) + + +string_storage_doc = """ +: string + The default storage for StringDtype. +""" + +with cf.config_prefix("mode"): + cf.register_option( + "string_storage", + "python", + string_storage_doc, + validator=is_one_of_factory(["python", "pyarrow"]), + ) + +# Set up the io.excel specific reader configuration. +reader_engine_doc = """ +: string + The default Excel reader engine for '{ext}' files. Available options: + auto, {others}. +""" + +_xls_options = ["xlrd"] +_xlsm_options = ["xlrd", "openpyxl"] +_xlsx_options = ["xlrd", "openpyxl"] +_ods_options = ["odf"] +_xlsb_options = ["pyxlsb"] + + +with cf.config_prefix("io.excel.xls"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="xls", others=", ".join(_xls_options)), + validator=is_one_of_factory(_xls_options + ["auto"]), + ) + +with cf.config_prefix("io.excel.xlsm"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="xlsm", others=", ".join(_xlsm_options)), + validator=is_one_of_factory(_xlsm_options + ["auto"]), + ) + + +with cf.config_prefix("io.excel.xlsx"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="xlsx", others=", ".join(_xlsx_options)), + validator=is_one_of_factory(_xlsx_options + ["auto"]), + ) + + +with cf.config_prefix("io.excel.ods"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="ods", others=", ".join(_ods_options)), + validator=is_one_of_factory(_ods_options + ["auto"]), + ) + +with cf.config_prefix("io.excel.xlsb"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="xlsb", others=", ".join(_xlsb_options)), + validator=is_one_of_factory(_xlsb_options + ["auto"]), + ) + +# Set up the io.excel specific writer configuration. +writer_engine_doc = """ +: string + The default Excel writer engine for '{ext}' files. Available options: + auto, {others}. +""" + +_xls_options = ["xlwt"] +_xlsm_options = ["openpyxl"] +_xlsx_options = ["openpyxl", "xlsxwriter"] +_ods_options = ["odf"] + + +with cf.config_prefix("io.excel.xls"): + cf.register_option( + "writer", + "auto", + writer_engine_doc.format(ext="xls", others=", ".join(_xls_options)), + validator=str, + ) +cf.deprecate_option( + "io.excel.xls.writer", + msg="As the xlwt package is no longer maintained, the xlwt engine will be " + "removed in a future version of pandas. This is the only engine in pandas that " + "supports writing in the xls format. Install openpyxl and write to an " + "xlsx file instead.", +) + +with cf.config_prefix("io.excel.xlsm"): + cf.register_option( + "writer", + "auto", + writer_engine_doc.format(ext="xlsm", others=", ".join(_xlsm_options)), + validator=str, + ) + + +with cf.config_prefix("io.excel.xlsx"): + cf.register_option( + "writer", + "auto", + writer_engine_doc.format(ext="xlsx", others=", ".join(_xlsx_options)), + validator=str, + ) + + +with cf.config_prefix("io.excel.ods"): + cf.register_option( + "writer", + "auto", + writer_engine_doc.format(ext="ods", others=", ".join(_ods_options)), + validator=str, + ) + + +# Set up the io.parquet specific configuration. +parquet_engine_doc = """ +: string + The default parquet reader/writer engine. Available options: + 'auto', 'pyarrow', 'fastparquet', the default is 'auto' +""" + +with cf.config_prefix("io.parquet"): + cf.register_option( + "engine", + "auto", + parquet_engine_doc, + validator=is_one_of_factory(["auto", "pyarrow", "fastparquet"]), + ) + + +# Set up the io.sql specific configuration. +sql_engine_doc = """ +: string + The default sql reader/writer engine. Available options: + 'auto', 'sqlalchemy', the default is 'auto' +""" + +with cf.config_prefix("io.sql"): + cf.register_option( + "engine", + "auto", + sql_engine_doc, + validator=is_one_of_factory(["auto", "sqlalchemy"]), + ) + +# -------- +# Plotting +# --------- + +plotting_backend_doc = """ +: str + The plotting backend to use. The default value is "matplotlib", the + backend provided with pandas. Other backends can be specified by + providing the name of the module that implements the backend. +""" + + +def register_plotting_backend_cb(key): + if key == "matplotlib": + # We defer matplotlib validation, since it's the default + return + from pandas.plotting._core import _get_plot_backend + + _get_plot_backend(key) + + +with cf.config_prefix("plotting"): + cf.register_option( + "backend", + defval="matplotlib", + doc=plotting_backend_doc, + validator=register_plotting_backend_cb, + ) + + +register_converter_doc = """ +: bool or 'auto'. + Whether to register converters with matplotlib's units registry for + dates, times, datetimes, and Periods. Toggling to False will remove + the converters, restoring any converters that pandas overwrote. +""" + + +def register_converter_cb(key): + from pandas.plotting import ( + deregister_matplotlib_converters, + register_matplotlib_converters, + ) + + if cf.get_option(key): + register_matplotlib_converters() + else: + deregister_matplotlib_converters() + + +with cf.config_prefix("plotting.matplotlib"): + cf.register_option( + "register_converters", + "auto", + register_converter_doc, + validator=is_one_of_factory(["auto", True, False]), + cb=register_converter_cb, + ) + +# ------ +# Styler +# ------ + +styler_sparse_index_doc = """ +: bool + Whether to sparsify the display of a hierarchical index. Setting to False will + display each explicit level element in a hierarchical key for each row. +""" + +styler_sparse_columns_doc = """ +: bool + Whether to sparsify the display of hierarchical columns. Setting to False will + display each explicit level element in a hierarchical key for each column. +""" + +styler_render_repr = """ +: str + Determine which output to use in Jupyter Notebook in {"html", "latex"}. +""" + +styler_max_elements = """ +: int + The maximum number of data-cell (", indent) + + if self.fmt.header: + self._write_col_header(indent + self.indent_delta) + + if self.show_row_idx_names: + self._write_row_header(indent + self.indent_delta) + + self.write("", indent) + + def _get_formatted_values(self) -> dict[int, list[str]]: + with option_context("display.max_colwidth", None): + fmt_values = {i: self.fmt.format_col(i) for i in range(self.ncols)} + return fmt_values + + def _write_body(self, indent: int) -> None: + self.write("", indent) + fmt_values = self._get_formatted_values() + + # write values + if self.fmt.index and isinstance(self.frame.index, MultiIndex): + self._write_hierarchical_rows(fmt_values, indent + self.indent_delta) + else: + self._write_regular_rows(fmt_values, indent + self.indent_delta) + + self.write("", indent) + + def _write_regular_rows( + self, fmt_values: Mapping[int, list[str]], indent: int + ) -> None: + is_truncated_horizontally = self.fmt.is_truncated_horizontally + is_truncated_vertically = self.fmt.is_truncated_vertically + + nrows = len(self.fmt.tr_frame) + + if self.fmt.index: + fmt = self.fmt._get_formatter("__index__") + if fmt is not None: + index_values = self.fmt.tr_frame.index.map(fmt) + else: + index_values = self.fmt.tr_frame.index.format() + + row: list[str] = [] + for i in range(nrows): + + if is_truncated_vertically and i == (self.fmt.tr_row_num): + str_sep_row = ["..."] * len(row) + self.write_tr( + str_sep_row, + indent, + self.indent_delta, + tags=None, + nindex_levels=self.row_levels, + ) + + row = [] + if self.fmt.index: + row.append(index_values[i]) + # see gh-22579 + # Column misalignment also occurs for + # a standard index when the columns index is named. + # Add blank cell before data cells. + elif self.show_col_idx_names: + row.append("") + row.extend(fmt_values[j][i] for j in range(self.ncols)) + + if is_truncated_horizontally: + dot_col_ix = self.fmt.tr_col_num + self.row_levels + row.insert(dot_col_ix, "...") + self.write_tr( + row, indent, self.indent_delta, tags=None, nindex_levels=self.row_levels + ) + + def _write_hierarchical_rows( + self, fmt_values: Mapping[int, list[str]], indent: int + ) -> None: + template = 'rowspan="{span}" valign="top"' + + is_truncated_horizontally = self.fmt.is_truncated_horizontally + is_truncated_vertically = self.fmt.is_truncated_vertically + frame = self.fmt.tr_frame + nrows = len(frame) + + assert isinstance(frame.index, MultiIndex) + idx_values = frame.index.format(sparsify=False, adjoin=False, names=False) + idx_values = list(zip(*idx_values)) + + if self.fmt.sparsify: + # GH3547 + sentinel = lib.no_default + levels = frame.index.format(sparsify=sentinel, adjoin=False, names=False) + + level_lengths = get_level_lengths(levels, sentinel) + inner_lvl = len(level_lengths) - 1 + if is_truncated_vertically: + # Insert ... row and adjust idx_values and + # level_lengths to take this into account. + ins_row = self.fmt.tr_row_num + inserted = False + for lnum, records in enumerate(level_lengths): + rec_new = {} + for tag, span in list(records.items()): + if tag >= ins_row: + rec_new[tag + 1] = span + elif tag + span > ins_row: + rec_new[tag] = span + 1 + + # GH 14882 - Make sure insertion done once + if not inserted: + dot_row = list(idx_values[ins_row - 1]) + dot_row[-1] = "..." + idx_values.insert(ins_row, tuple(dot_row)) + inserted = True + else: + dot_row = list(idx_values[ins_row]) + dot_row[inner_lvl - lnum] = "..." + idx_values[ins_row] = tuple(dot_row) + else: + rec_new[tag] = span + # If ins_row lies between tags, all cols idx cols + # receive ... + if tag + span == ins_row: + rec_new[ins_row] = 1 + if lnum == 0: + idx_values.insert( + ins_row, tuple(["..."] * len(level_lengths)) + ) + + # GH 14882 - Place ... in correct level + elif inserted: + dot_row = list(idx_values[ins_row]) + dot_row[inner_lvl - lnum] = "..." + idx_values[ins_row] = tuple(dot_row) + level_lengths[lnum] = rec_new + + level_lengths[inner_lvl][ins_row] = 1 + for ix_col in range(len(fmt_values)): + fmt_values[ix_col].insert(ins_row, "...") + nrows += 1 + + for i in range(nrows): + row = [] + tags = {} + + sparse_offset = 0 + j = 0 + for records, v in zip(level_lengths, idx_values[i]): + if i in records: + if records[i] > 1: + tags[j] = template.format(span=records[i]) + else: + sparse_offset += 1 + continue + + j += 1 + row.append(v) + + row.extend(fmt_values[j][i] for j in range(self.ncols)) + if is_truncated_horizontally: + row.insert( + self.row_levels - sparse_offset + self.fmt.tr_col_num, "..." + ) + self.write_tr( + row, + indent, + self.indent_delta, + tags=tags, + nindex_levels=len(levels) - sparse_offset, + ) + else: + row = [] + for i in range(len(frame)): + if is_truncated_vertically and i == (self.fmt.tr_row_num): + str_sep_row = ["..."] * len(row) + self.write_tr( + str_sep_row, + indent, + self.indent_delta, + tags=None, + nindex_levels=self.row_levels, + ) + + idx_values = list( + zip(*frame.index.format(sparsify=False, adjoin=False, names=False)) + ) + row = [] + row.extend(idx_values[i]) + row.extend(fmt_values[j][i] for j in range(self.ncols)) + if is_truncated_horizontally: + row.insert(self.row_levels + self.fmt.tr_col_num, "...") + self.write_tr( + row, + indent, + self.indent_delta, + tags=None, + nindex_levels=frame.index.nlevels, + ) + + +class NotebookFormatter(HTMLFormatter): + """ + Internal class for formatting output data in html for display in Jupyter + Notebooks. This class is intended for functionality specific to + DataFrame._repr_html_() and DataFrame.to_html(notebook=True) + """ + + def _get_formatted_values(self) -> dict[int, list[str]]: + return {i: self.fmt.format_col(i) for i in range(self.ncols)} + + def _get_columns_formatted_values(self) -> list[str]: + return self.columns.format() + + def write_style(self) -> None: + # We use the "scoped" attribute here so that the desired + # style properties for the data frame are not then applied + # throughout the entire notebook. + template_first = """\ + """ + template_select = """\ + .dataframe %s { + %s: %s; + }""" + element_props = [ + ("tbody tr th:only-of-type", "vertical-align", "middle"), + ("tbody tr th", "vertical-align", "top"), + ] + if isinstance(self.columns, MultiIndex): + element_props.append(("thead tr th", "text-align", "left")) + if self.show_row_idx_names: + element_props.append( + ("thead tr:last-of-type th", "text-align", "right") + ) + else: + element_props.append(("thead th", "text-align", "right")) + template_mid = "\n\n".join(map(lambda t: template_select % t, element_props)) + template = dedent("\n".join((template_first, template_mid, template_last))) + self.write(template) + + def render(self) -> list[str]: + self.write("
") + self.write_style() + super().render() + self.write("
") + return self.elements diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/info.py b/.local/lib/python3.8/site-packages/pandas/io/formats/info.py new file mode 100644 index 0000000..4a9310c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/info.py @@ -0,0 +1,1117 @@ +from __future__ import annotations + +from abc import ( + ABC, + abstractmethod, +) +import sys +from textwrap import dedent +from typing import ( + TYPE_CHECKING, + Iterable, + Iterator, + Mapping, + Sequence, +) + +from pandas._config import get_option + +from pandas._typing import ( + Dtype, + WriteBuffer, +) + +from pandas.core.indexes.api import Index + +from pandas.io.formats import format as fmt +from pandas.io.formats.printing import pprint_thing + +if TYPE_CHECKING: + from pandas.core.frame import ( + DataFrame, + Series, + ) + + +frame_max_cols_sub = dedent( + """\ + max_cols : int, optional + When to switch from the verbose to the truncated output. If the + DataFrame has more than `max_cols` columns, the truncated output + is used. By default, the setting in + ``pandas.options.display.max_info_columns`` is used.""" +) + + +show_counts_sub = dedent( + """\ + show_counts : bool, optional + Whether to show the non-null counts. By default, this is shown + only if the DataFrame is smaller than + ``pandas.options.display.max_info_rows`` and + ``pandas.options.display.max_info_columns``. A value of True always + shows the counts, and False never shows the counts.""" +) + +null_counts_sub = dedent( + """ + null_counts : bool, optional + .. deprecated:: 1.2.0 + Use show_counts instead.""" +) + + +frame_examples_sub = dedent( + """\ + >>> int_values = [1, 2, 3, 4, 5] + >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] + >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] + >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values, + ... "float_col": float_values}) + >>> df + int_col text_col float_col + 0 1 alpha 0.00 + 1 2 beta 0.25 + 2 3 gamma 0.50 + 3 4 delta 0.75 + 4 5 epsilon 1.00 + + Prints information of all columns: + + >>> df.info(verbose=True) + + RangeIndex: 5 entries, 0 to 4 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 int_col 5 non-null int64 + 1 text_col 5 non-null object + 2 float_col 5 non-null float64 + dtypes: float64(1), int64(1), object(1) + memory usage: 248.0+ bytes + + Prints a summary of columns count and its dtypes but not per column + information: + + >>> df.info(verbose=False) + + RangeIndex: 5 entries, 0 to 4 + Columns: 3 entries, int_col to float_col + dtypes: float64(1), int64(1), object(1) + memory usage: 248.0+ bytes + + Pipe output of DataFrame.info to buffer instead of sys.stdout, get + buffer content and writes to a text file: + + >>> import io + >>> buffer = io.StringIO() + >>> df.info(buf=buffer) + >>> s = buffer.getvalue() + >>> with open("df_info.txt", "w", + ... encoding="utf-8") as f: # doctest: +SKIP + ... f.write(s) + 260 + + The `memory_usage` parameter allows deep introspection mode, specially + useful for big DataFrames and fine-tune memory optimization: + + >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) + >>> df = pd.DataFrame({ + ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6), + ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6), + ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6) + ... }) + >>> df.info() + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object + dtypes: object(3) + memory usage: 22.9+ MB + + >>> df.info(memory_usage='deep') + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object + dtypes: object(3) + memory usage: 165.9 MB""" +) + + +frame_see_also_sub = dedent( + """\ + DataFrame.describe: Generate descriptive statistics of DataFrame + columns. + DataFrame.memory_usage: Memory usage of DataFrame columns.""" +) + + +frame_sub_kwargs = { + "klass": "DataFrame", + "type_sub": " and columns", + "max_cols_sub": frame_max_cols_sub, + "show_counts_sub": show_counts_sub, + "null_counts_sub": null_counts_sub, + "examples_sub": frame_examples_sub, + "see_also_sub": frame_see_also_sub, + "version_added_sub": "", +} + + +series_examples_sub = dedent( + """\ + >>> int_values = [1, 2, 3, 4, 5] + >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] + >>> s = pd.Series(text_values, index=int_values) + >>> s.info() + + Int64Index: 5 entries, 1 to 5 + Series name: None + Non-Null Count Dtype + -------------- ----- + 5 non-null object + dtypes: object(1) + memory usage: 80.0+ bytes + + Prints a summary excluding information about its values: + + >>> s.info(verbose=False) + + Int64Index: 5 entries, 1 to 5 + dtypes: object(1) + memory usage: 80.0+ bytes + + Pipe output of Series.info to buffer instead of sys.stdout, get + buffer content and writes to a text file: + + >>> import io + >>> buffer = io.StringIO() + >>> s.info(buf=buffer) + >>> s = buffer.getvalue() + >>> with open("df_info.txt", "w", + ... encoding="utf-8") as f: # doctest: +SKIP + ... f.write(s) + 260 + + The `memory_usage` parameter allows deep introspection mode, specially + useful for big Series and fine-tune memory optimization: + + >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) + >>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6)) + >>> s.info() + + RangeIndex: 1000000 entries, 0 to 999999 + Series name: None + Non-Null Count Dtype + -------------- ----- + 1000000 non-null object + dtypes: object(1) + memory usage: 7.6+ MB + + >>> s.info(memory_usage='deep') + + RangeIndex: 1000000 entries, 0 to 999999 + Series name: None + Non-Null Count Dtype + -------------- ----- + 1000000 non-null object + dtypes: object(1) + memory usage: 55.3 MB""" +) + + +series_see_also_sub = dedent( + """\ + Series.describe: Generate descriptive statistics of Series. + Series.memory_usage: Memory usage of Series.""" +) + + +series_sub_kwargs = { + "klass": "Series", + "type_sub": "", + "max_cols_sub": "", + "show_counts_sub": show_counts_sub, + "null_counts_sub": "", + "examples_sub": series_examples_sub, + "see_also_sub": series_see_also_sub, + "version_added_sub": "\n.. versionadded:: 1.4.0\n", +} + + +INFO_DOCSTRING = dedent( + """ + Print a concise summary of a {klass}. + + This method prints information about a {klass} including + the index dtype{type_sub}, non-null values and memory usage. + {version_added_sub}\ + + Parameters + ---------- + data : {klass} + {klass} to print information about. + verbose : bool, optional + Whether to print the full summary. By default, the setting in + ``pandas.options.display.max_info_columns`` is followed. + buf : writable buffer, defaults to sys.stdout + Where to send the output. By default, the output is printed to + sys.stdout. Pass a writable buffer if you need to further process + the output.\ + {max_cols_sub} + memory_usage : bool, str, optional + Specifies whether total memory usage of the {klass} + elements (including the index) should be displayed. By default, + this follows the ``pandas.options.display.memory_usage`` setting. + + True always show memory usage. False never shows memory usage. + A value of 'deep' is equivalent to "True with deep introspection". + Memory usage is shown in human-readable units (base-2 + representation). Without deep introspection a memory estimation is + made based in column dtype and number of rows assuming values + consume the same memory amount for corresponding dtypes. With deep + memory introspection, a real memory usage calculation is performed + at the cost of computational resources. + {show_counts_sub}{null_counts_sub} + + Returns + ------- + None + This method prints a summary of a {klass} and returns None. + + See Also + -------- + {see_also_sub} + + Examples + -------- + {examples_sub} + """ +) + + +def _put_str(s: str | Dtype, space: int) -> str: + """ + Make string of specified length, padding to the right if necessary. + + Parameters + ---------- + s : Union[str, Dtype] + String to be formatted. + space : int + Length to force string to be of. + + Returns + ------- + str + String coerced to given length. + + Examples + -------- + >>> pd.io.formats.info._put_str("panda", 6) + 'panda ' + >>> pd.io.formats.info._put_str("panda", 4) + 'pand' + """ + return str(s)[:space].ljust(space) + + +def _sizeof_fmt(num: int | float, size_qualifier: str) -> str: + """ + Return size in human readable format. + + Parameters + ---------- + num : int + Size in bytes. + size_qualifier : str + Either empty, or '+' (if lower bound). + + Returns + ------- + str + Size in human readable format. + + Examples + -------- + >>> _sizeof_fmt(23028, '') + '22.5 KB' + + >>> _sizeof_fmt(23028, '+') + '22.5+ KB' + """ + for x in ["bytes", "KB", "MB", "GB", "TB"]: + if num < 1024.0: + return f"{num:3.1f}{size_qualifier} {x}" + num /= 1024.0 + return f"{num:3.1f}{size_qualifier} PB" + + +def _initialize_memory_usage( + memory_usage: bool | str | None = None, +) -> bool | str: + """Get memory usage based on inputs and display options.""" + if memory_usage is None: + memory_usage = get_option("display.memory_usage") + return memory_usage + + +class BaseInfo(ABC): + """ + Base class for DataFrameInfo and SeriesInfo. + + Parameters + ---------- + data : DataFrame or Series + Either dataframe or series. + memory_usage : bool or str, optional + If "deep", introspect the data deeply by interrogating object dtypes + for system-level memory consumption, and include it in the returned + values. + """ + + data: DataFrame | Series + memory_usage: bool | str + + @property + @abstractmethod + def dtypes(self) -> Iterable[Dtype]: + """ + Dtypes. + + Returns + ------- + dtypes : sequence + Dtype of each of the DataFrame's columns (or one series column). + """ + + @property + @abstractmethod + def dtype_counts(self) -> Mapping[str, int]: + """Mapping dtype - number of counts.""" + + @property + @abstractmethod + def non_null_counts(self) -> Sequence[int]: + """Sequence of non-null counts for all columns or column (if series).""" + + @property + @abstractmethod + def memory_usage_bytes(self) -> int: + """ + Memory usage in bytes. + + Returns + ------- + memory_usage_bytes : int + Object's total memory usage in bytes. + """ + + @property + def memory_usage_string(self) -> str: + """Memory usage in a form of human readable string.""" + return f"{_sizeof_fmt(self.memory_usage_bytes, self.size_qualifier)}\n" + + @property + def size_qualifier(self) -> str: + size_qualifier = "" + if self.memory_usage: + if self.memory_usage != "deep": + # size_qualifier is just a best effort; not guaranteed to catch + # all cases (e.g., it misses categorical data even with object + # categories) + if ( + "object" in self.dtype_counts + or self.data.index._is_memory_usage_qualified() + ): + size_qualifier = "+" + return size_qualifier + + @abstractmethod + def render( + self, + *, + buf: WriteBuffer[str] | None, + max_cols: int | None, + verbose: bool | None, + show_counts: bool | None, + ) -> None: + pass + + +class DataFrameInfo(BaseInfo): + """ + Class storing dataframe-specific info. + """ + + def __init__( + self, + data: DataFrame, + memory_usage: bool | str | None = None, + ): + self.data: DataFrame = data + self.memory_usage = _initialize_memory_usage(memory_usage) + + @property + def dtype_counts(self) -> Mapping[str, int]: + return _get_dataframe_dtype_counts(self.data) + + @property + def dtypes(self) -> Iterable[Dtype]: + """ + Dtypes. + + Returns + ------- + dtypes + Dtype of each of the DataFrame's columns. + """ + return self.data.dtypes + + @property + def ids(self) -> Index: + """ + Column names. + + Returns + ------- + ids : Index + DataFrame's column names. + """ + return self.data.columns + + @property + def col_count(self) -> int: + """Number of columns to be summarized.""" + return len(self.ids) + + @property + def non_null_counts(self) -> Sequence[int]: + """Sequence of non-null counts for all columns or column (if series).""" + return self.data.count() + + @property + def memory_usage_bytes(self) -> int: + if self.memory_usage == "deep": + deep = True + else: + deep = False + return self.data.memory_usage(index=True, deep=deep).sum() + + def render( + self, + *, + buf: WriteBuffer[str] | None, + max_cols: int | None, + verbose: bool | None, + show_counts: bool | None, + ) -> None: + printer = DataFrameInfoPrinter( + info=self, + max_cols=max_cols, + verbose=verbose, + show_counts=show_counts, + ) + printer.to_buffer(buf) + + +class SeriesInfo(BaseInfo): + """ + Class storing series-specific info. + """ + + def __init__( + self, + data: Series, + memory_usage: bool | str | None = None, + ): + self.data: Series = data + self.memory_usage = _initialize_memory_usage(memory_usage) + + def render( + self, + *, + buf: WriteBuffer[str] | None = None, + max_cols: int | None = None, + verbose: bool | None = None, + show_counts: bool | None = None, + ) -> None: + if max_cols is not None: + raise ValueError( + "Argument `max_cols` can only be passed " + "in DataFrame.info, not Series.info" + ) + printer = SeriesInfoPrinter( + info=self, + verbose=verbose, + show_counts=show_counts, + ) + printer.to_buffer(buf) + + @property + def non_null_counts(self) -> Sequence[int]: + return [self.data.count()] + + @property + def dtypes(self) -> Iterable[Dtype]: + return [self.data.dtypes] + + @property + def dtype_counts(self): + from pandas.core.frame import DataFrame + + return _get_dataframe_dtype_counts(DataFrame(self.data)) + + @property + def memory_usage_bytes(self) -> int: + """Memory usage in bytes. + + Returns + ------- + memory_usage_bytes : int + Object's total memory usage in bytes. + """ + if self.memory_usage == "deep": + deep = True + else: + deep = False + return self.data.memory_usage(index=True, deep=deep) + + +class InfoPrinterAbstract: + """ + Class for printing dataframe or series info. + """ + + def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None: + """Save dataframe info into buffer.""" + table_builder = self._create_table_builder() + lines = table_builder.get_lines() + if buf is None: # pragma: no cover + buf = sys.stdout + fmt.buffer_put_lines(buf, lines) + + @abstractmethod + def _create_table_builder(self) -> TableBuilderAbstract: + """Create instance of table builder.""" + + +class DataFrameInfoPrinter(InfoPrinterAbstract): + """ + Class for printing dataframe info. + + Parameters + ---------- + info : DataFrameInfo + Instance of DataFrameInfo. + max_cols : int, optional + When to switch from the verbose to the truncated output. + verbose : bool, optional + Whether to print the full summary. + show_counts : bool, optional + Whether to show the non-null counts. + """ + + def __init__( + self, + info: DataFrameInfo, + max_cols: int | None = None, + verbose: bool | None = None, + show_counts: bool | None = None, + ): + self.info = info + self.data = info.data + self.verbose = verbose + self.max_cols = self._initialize_max_cols(max_cols) + self.show_counts = self._initialize_show_counts(show_counts) + + @property + def max_rows(self) -> int: + """Maximum info rows to be displayed.""" + return get_option("display.max_info_rows", len(self.data) + 1) + + @property + def exceeds_info_cols(self) -> bool: + """Check if number of columns to be summarized does not exceed maximum.""" + return bool(self.col_count > self.max_cols) + + @property + def exceeds_info_rows(self) -> bool: + """Check if number of rows to be summarized does not exceed maximum.""" + return bool(len(self.data) > self.max_rows) + + @property + def col_count(self) -> int: + """Number of columns to be summarized.""" + return self.info.col_count + + def _initialize_max_cols(self, max_cols: int | None) -> int: + if max_cols is None: + return get_option("display.max_info_columns", self.col_count + 1) + return max_cols + + def _initialize_show_counts(self, show_counts: bool | None) -> bool: + if show_counts is None: + return bool(not self.exceeds_info_cols and not self.exceeds_info_rows) + else: + return show_counts + + def _create_table_builder(self) -> DataFrameTableBuilder: + """ + Create instance of table builder based on verbosity and display settings. + """ + if self.verbose: + return DataFrameTableBuilderVerbose( + info=self.info, + with_counts=self.show_counts, + ) + elif self.verbose is False: # specifically set to False, not necessarily None + return DataFrameTableBuilderNonVerbose(info=self.info) + else: + if self.exceeds_info_cols: + return DataFrameTableBuilderNonVerbose(info=self.info) + else: + return DataFrameTableBuilderVerbose( + info=self.info, + with_counts=self.show_counts, + ) + + +class SeriesInfoPrinter(InfoPrinterAbstract): + """Class for printing series info. + + Parameters + ---------- + info : SeriesInfo + Instance of SeriesInfo. + verbose : bool, optional + Whether to print the full summary. + show_counts : bool, optional + Whether to show the non-null counts. + """ + + def __init__( + self, + info: SeriesInfo, + verbose: bool | None = None, + show_counts: bool | None = None, + ): + self.info = info + self.data = info.data + self.verbose = verbose + self.show_counts = self._initialize_show_counts(show_counts) + + def _create_table_builder(self) -> SeriesTableBuilder: + """ + Create instance of table builder based on verbosity. + """ + if self.verbose or self.verbose is None: + return SeriesTableBuilderVerbose( + info=self.info, + with_counts=self.show_counts, + ) + else: + return SeriesTableBuilderNonVerbose(info=self.info) + + def _initialize_show_counts(self, show_counts: bool | None) -> bool: + if show_counts is None: + return True + else: + return show_counts + + +class TableBuilderAbstract(ABC): + """ + Abstract builder for info table. + """ + + _lines: list[str] + info: BaseInfo + + @abstractmethod + def get_lines(self) -> list[str]: + """Product in a form of list of lines (strings).""" + + @property + def data(self) -> DataFrame | Series: + return self.info.data + + @property + def dtypes(self) -> Iterable[Dtype]: + """Dtypes of each of the DataFrame's columns.""" + return self.info.dtypes + + @property + def dtype_counts(self) -> Mapping[str, int]: + """Mapping dtype - number of counts.""" + return self.info.dtype_counts + + @property + def display_memory_usage(self) -> bool: + """Whether to display memory usage.""" + return bool(self.info.memory_usage) + + @property + def memory_usage_string(self) -> str: + """Memory usage string with proper size qualifier.""" + return self.info.memory_usage_string + + @property + def non_null_counts(self) -> Sequence[int]: + return self.info.non_null_counts + + def add_object_type_line(self) -> None: + """Add line with string representation of dataframe to the table.""" + self._lines.append(str(type(self.data))) + + def add_index_range_line(self) -> None: + """Add line with range of indices to the table.""" + self._lines.append(self.data.index._summary()) + + def add_dtypes_line(self) -> None: + """Add summary line with dtypes present in dataframe.""" + collected_dtypes = [ + f"{key}({val:d})" for key, val in sorted(self.dtype_counts.items()) + ] + self._lines.append(f"dtypes: {', '.join(collected_dtypes)}") + + +class DataFrameTableBuilder(TableBuilderAbstract): + """ + Abstract builder for dataframe info table. + + Parameters + ---------- + info : DataFrameInfo. + Instance of DataFrameInfo. + """ + + def __init__(self, *, info: DataFrameInfo): + self.info: DataFrameInfo = info + + def get_lines(self) -> list[str]: + self._lines = [] + if self.col_count == 0: + self._fill_empty_info() + else: + self._fill_non_empty_info() + return self._lines + + def _fill_empty_info(self) -> None: + """Add lines to the info table, pertaining to empty dataframe.""" + self.add_object_type_line() + self.add_index_range_line() + self._lines.append(f"Empty {type(self.data).__name__}") + + @abstractmethod + def _fill_non_empty_info(self) -> None: + """Add lines to the info table, pertaining to non-empty dataframe.""" + + @property + def data(self) -> DataFrame: + """DataFrame.""" + return self.info.data + + @property + def ids(self) -> Index: + """Dataframe columns.""" + return self.info.ids + + @property + def col_count(self) -> int: + """Number of dataframe columns to be summarized.""" + return self.info.col_count + + def add_memory_usage_line(self) -> None: + """Add line containing memory usage.""" + self._lines.append(f"memory usage: {self.memory_usage_string}") + + +class DataFrameTableBuilderNonVerbose(DataFrameTableBuilder): + """ + Dataframe info table builder for non-verbose output. + """ + + def _fill_non_empty_info(self) -> None: + """Add lines to the info table, pertaining to non-empty dataframe.""" + self.add_object_type_line() + self.add_index_range_line() + self.add_columns_summary_line() + self.add_dtypes_line() + if self.display_memory_usage: + self.add_memory_usage_line() + + def add_columns_summary_line(self) -> None: + self._lines.append(self.ids._summary(name="Columns")) + + +class TableBuilderVerboseMixin(TableBuilderAbstract): + """ + Mixin for verbose info output. + """ + + SPACING: str = " " * 2 + strrows: Sequence[Sequence[str]] + gross_column_widths: Sequence[int] + with_counts: bool + + @property + @abstractmethod + def headers(self) -> Sequence[str]: + """Headers names of the columns in verbose table.""" + + @property + def header_column_widths(self) -> Sequence[int]: + """Widths of header columns (only titles).""" + return [len(col) for col in self.headers] + + def _get_gross_column_widths(self) -> Sequence[int]: + """Get widths of columns containing both headers and actual content.""" + body_column_widths = self._get_body_column_widths() + return [ + max(*widths) + for widths in zip(self.header_column_widths, body_column_widths) + ] + + def _get_body_column_widths(self) -> Sequence[int]: + """Get widths of table content columns.""" + strcols: Sequence[Sequence[str]] = list(zip(*self.strrows)) + return [max(len(x) for x in col) for col in strcols] + + def _gen_rows(self) -> Iterator[Sequence[str]]: + """ + Generator function yielding rows content. + + Each element represents a row comprising a sequence of strings. + """ + if self.with_counts: + return self._gen_rows_with_counts() + else: + return self._gen_rows_without_counts() + + @abstractmethod + def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: + """Iterator with string representation of body data with counts.""" + + @abstractmethod + def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: + """Iterator with string representation of body data without counts.""" + + def add_header_line(self) -> None: + header_line = self.SPACING.join( + [ + _put_str(header, col_width) + for header, col_width in zip(self.headers, self.gross_column_widths) + ] + ) + self._lines.append(header_line) + + def add_separator_line(self) -> None: + separator_line = self.SPACING.join( + [ + _put_str("-" * header_colwidth, gross_colwidth) + for header_colwidth, gross_colwidth in zip( + self.header_column_widths, self.gross_column_widths + ) + ] + ) + self._lines.append(separator_line) + + def add_body_lines(self) -> None: + for row in self.strrows: + body_line = self.SPACING.join( + [ + _put_str(col, gross_colwidth) + for col, gross_colwidth in zip(row, self.gross_column_widths) + ] + ) + self._lines.append(body_line) + + def _gen_non_null_counts(self) -> Iterator[str]: + """Iterator with string representation of non-null counts.""" + for count in self.non_null_counts: + yield f"{count} non-null" + + def _gen_dtypes(self) -> Iterator[str]: + """Iterator with string representation of column dtypes.""" + for dtype in self.dtypes: + yield pprint_thing(dtype) + + +class DataFrameTableBuilderVerbose(DataFrameTableBuilder, TableBuilderVerboseMixin): + """ + Dataframe info table builder for verbose output. + """ + + def __init__( + self, + *, + info: DataFrameInfo, + with_counts: bool, + ): + self.info = info + self.with_counts = with_counts + self.strrows: Sequence[Sequence[str]] = list(self._gen_rows()) + self.gross_column_widths: Sequence[int] = self._get_gross_column_widths() + + def _fill_non_empty_info(self) -> None: + """Add lines to the info table, pertaining to non-empty dataframe.""" + self.add_object_type_line() + self.add_index_range_line() + self.add_columns_summary_line() + self.add_header_line() + self.add_separator_line() + self.add_body_lines() + self.add_dtypes_line() + if self.display_memory_usage: + self.add_memory_usage_line() + + @property + def headers(self) -> Sequence[str]: + """Headers names of the columns in verbose table.""" + if self.with_counts: + return [" # ", "Column", "Non-Null Count", "Dtype"] + return [" # ", "Column", "Dtype"] + + def add_columns_summary_line(self) -> None: + self._lines.append(f"Data columns (total {self.col_count} columns):") + + def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: + """Iterator with string representation of body data without counts.""" + yield from zip( + self._gen_line_numbers(), + self._gen_columns(), + self._gen_dtypes(), + ) + + def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: + """Iterator with string representation of body data with counts.""" + yield from zip( + self._gen_line_numbers(), + self._gen_columns(), + self._gen_non_null_counts(), + self._gen_dtypes(), + ) + + def _gen_line_numbers(self) -> Iterator[str]: + """Iterator with string representation of column numbers.""" + for i, _ in enumerate(self.ids): + yield f" {i}" + + def _gen_columns(self) -> Iterator[str]: + """Iterator with string representation of column names.""" + for col in self.ids: + yield pprint_thing(col) + + +class SeriesTableBuilder(TableBuilderAbstract): + """ + Abstract builder for series info table. + + Parameters + ---------- + info : SeriesInfo. + Instance of SeriesInfo. + """ + + def __init__(self, *, info: SeriesInfo): + self.info: SeriesInfo = info + + def get_lines(self) -> list[str]: + self._lines = [] + self._fill_non_empty_info() + return self._lines + + @property + def data(self) -> Series: + """Series.""" + return self.info.data + + def add_memory_usage_line(self) -> None: + """Add line containing memory usage.""" + self._lines.append(f"memory usage: {self.memory_usage_string}") + + @abstractmethod + def _fill_non_empty_info(self) -> None: + """Add lines to the info table, pertaining to non-empty series.""" + + +class SeriesTableBuilderNonVerbose(SeriesTableBuilder): + """ + Series info table builder for non-verbose output. + """ + + def _fill_non_empty_info(self) -> None: + """Add lines to the info table, pertaining to non-empty series.""" + self.add_object_type_line() + self.add_index_range_line() + self.add_dtypes_line() + if self.display_memory_usage: + self.add_memory_usage_line() + + +class SeriesTableBuilderVerbose(SeriesTableBuilder, TableBuilderVerboseMixin): + """ + Series info table builder for verbose output. + """ + + def __init__( + self, + *, + info: SeriesInfo, + with_counts: bool, + ): + self.info = info + self.with_counts = with_counts + self.strrows: Sequence[Sequence[str]] = list(self._gen_rows()) + self.gross_column_widths: Sequence[int] = self._get_gross_column_widths() + + def _fill_non_empty_info(self) -> None: + """Add lines to the info table, pertaining to non-empty series.""" + self.add_object_type_line() + self.add_index_range_line() + self.add_series_name_line() + self.add_header_line() + self.add_separator_line() + self.add_body_lines() + self.add_dtypes_line() + if self.display_memory_usage: + self.add_memory_usage_line() + + def add_series_name_line(self): + self._lines.append(f"Series name: {self.data.name}") + + @property + def headers(self) -> Sequence[str]: + """Headers names of the columns in verbose table.""" + if self.with_counts: + return ["Non-Null Count", "Dtype"] + return ["Dtype"] + + def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: + """Iterator with string representation of body data without counts.""" + yield from self._gen_dtypes() + + def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: + """Iterator with string representation of body data with counts.""" + yield from zip( + self._gen_non_null_counts(), + self._gen_dtypes(), + ) + + +def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]: + """ + Create mapping between datatypes and their number of occurrences. + """ + # groupby dtype.name to collect e.g. Categorical columns + return df.dtypes.value_counts().groupby(lambda x: x.name).sum() diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/latex.py b/.local/lib/python3.8/site-packages/pandas/io/formats/latex.py new file mode 100644 index 0000000..3ffb60a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/latex.py @@ -0,0 +1,830 @@ +""" +Module for formatting output data in Latex. +""" +from __future__ import annotations + +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + Iterator, + Sequence, +) + +import numpy as np + +from pandas.core.dtypes.generic import ABCMultiIndex + +from pandas.io.formats.format import DataFrameFormatter + + +def _split_into_full_short_caption( + caption: str | tuple[str, str] | None +) -> tuple[str, str]: + """Extract full and short captions from caption string/tuple. + + Parameters + ---------- + caption : str or tuple, optional + Either table caption string or tuple (full_caption, short_caption). + If string is provided, then it is treated as table full caption, + while short_caption is considered an empty string. + + Returns + ------- + full_caption, short_caption : tuple + Tuple of full_caption, short_caption strings. + """ + if caption: + if isinstance(caption, str): + full_caption = caption + short_caption = "" + else: + try: + full_caption, short_caption = caption + except ValueError as err: + msg = "caption must be either a string or a tuple of two strings" + raise ValueError(msg) from err + else: + full_caption = "" + short_caption = "" + return full_caption, short_caption + + +class RowStringConverter(ABC): + r"""Converter for dataframe rows into LaTeX strings. + + Parameters + ---------- + formatter : `DataFrameFormatter` + Instance of `DataFrameFormatter`. + multicolumn: bool, optional + Whether to use \multicolumn macro. + multicolumn_format: str, optional + Multicolumn format. + multirow: bool, optional + Whether to use \multirow macro. + + """ + + def __init__( + self, + formatter: DataFrameFormatter, + multicolumn: bool = False, + multicolumn_format: str | None = None, + multirow: bool = False, + ): + self.fmt = formatter + self.frame = self.fmt.frame + self.multicolumn = multicolumn + self.multicolumn_format = multicolumn_format + self.multirow = multirow + self.clinebuf: list[list[int]] = [] + self.strcols = self._get_strcols() + self.strrows = list(zip(*self.strcols)) + + def get_strrow(self, row_num: int) -> str: + """Get string representation of the row.""" + row = self.strrows[row_num] + + is_multicol = ( + row_num < self.column_levels and self.fmt.header and self.multicolumn + ) + + is_multirow = ( + row_num >= self.header_levels + and self.fmt.index + and self.multirow + and self.index_levels > 1 + ) + + is_cline_maybe_required = is_multirow and row_num < len(self.strrows) - 1 + + crow = self._preprocess_row(row) + + if is_multicol: + crow = self._format_multicolumn(crow) + if is_multirow: + crow = self._format_multirow(crow, row_num) + + lst = [] + lst.append(" & ".join(crow)) + lst.append(" \\\\") + if is_cline_maybe_required: + cline = self._compose_cline(row_num, len(self.strcols)) + lst.append(cline) + return "".join(lst) + + @property + def _header_row_num(self) -> int: + """Number of rows in header.""" + return self.header_levels if self.fmt.header else 0 + + @property + def index_levels(self) -> int: + """Integer number of levels in index.""" + return self.frame.index.nlevels + + @property + def column_levels(self) -> int: + return self.frame.columns.nlevels + + @property + def header_levels(self) -> int: + nlevels = self.column_levels + if self.fmt.has_index_names and self.fmt.show_index_names: + nlevels += 1 + return nlevels + + def _get_strcols(self) -> list[list[str]]: + """String representation of the columns.""" + if self.fmt.frame.empty: + strcols = [[self._empty_info_line]] + else: + strcols = self.fmt.get_strcols() + + # reestablish the MultiIndex that has been joined by get_strcols() + if self.fmt.index and isinstance(self.frame.index, ABCMultiIndex): + out = self.frame.index.format( + adjoin=False, + sparsify=self.fmt.sparsify, + names=self.fmt.has_index_names, + na_rep=self.fmt.na_rep, + ) + + # index.format will sparsify repeated entries with empty strings + # so pad these with some empty space + def pad_empties(x): + for pad in reversed(x): + if pad: + break + return [x[0]] + [i if i else " " * len(pad) for i in x[1:]] + + gen = (pad_empties(i) for i in out) + + # Add empty spaces for each column level + clevels = self.frame.columns.nlevels + out = [[" " * len(i[-1])] * clevels + i for i in gen] + + # Add the column names to the last index column + cnames = self.frame.columns.names + if any(cnames): + new_names = [i if i else "{}" for i in cnames] + out[self.frame.index.nlevels - 1][:clevels] = new_names + + # Get rid of old multiindex column and add new ones + strcols = out + strcols[1:] + return strcols + + @property + def _empty_info_line(self): + return ( + f"Empty {type(self.frame).__name__}\n" + f"Columns: {self.frame.columns}\n" + f"Index: {self.frame.index}" + ) + + def _preprocess_row(self, row: Sequence[str]) -> list[str]: + """Preprocess elements of the row.""" + if self.fmt.escape: + crow = _escape_symbols(row) + else: + crow = [x if x else "{}" for x in row] + if self.fmt.bold_rows and self.fmt.index: + crow = _convert_to_bold(crow, self.index_levels) + return crow + + def _format_multicolumn(self, row: list[str]) -> list[str]: + r""" + Combine columns belonging to a group to a single multicolumn entry + according to self.multicolumn_format + + e.g.: + a & & & b & c & + will become + \multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c} + """ + row2 = row[: self.index_levels] + ncol = 1 + coltext = "" + + def append_col(): + # write multicolumn if needed + if ncol > 1: + row2.append( + f"\\multicolumn{{{ncol:d}}}{{{self.multicolumn_format}}}" + f"{{{coltext.strip()}}}" + ) + # don't modify where not needed + else: + row2.append(coltext) + + for c in row[self.index_levels :]: + # if next col has text, write the previous + if c.strip(): + if coltext: + append_col() + coltext = c + ncol = 1 + # if not, add it to the previous multicolumn + else: + ncol += 1 + # write last column name + if coltext: + append_col() + return row2 + + def _format_multirow(self, row: list[str], i: int) -> list[str]: + r""" + Check following rows, whether row should be a multirow + + e.g.: becomes: + a & 0 & \multirow{2}{*}{a} & 0 & + & 1 & & 1 & + b & 0 & \cline{1-2} + b & 0 & + """ + for j in range(self.index_levels): + if row[j].strip(): + nrow = 1 + for r in self.strrows[i + 1 :]: + if not r[j].strip(): + nrow += 1 + else: + break + if nrow > 1: + # overwrite non-multirow entry + row[j] = f"\\multirow{{{nrow:d}}}{{*}}{{{row[j].strip()}}}" + # save when to end the current block with \cline + self.clinebuf.append([i + nrow - 1, j + 1]) + return row + + def _compose_cline(self, i: int, icol: int) -> str: + """ + Create clines after multirow-blocks are finished. + """ + lst = [] + for cl in self.clinebuf: + if cl[0] == i: + lst.append(f"\n\\cline{{{cl[1]:d}-{icol:d}}}") + # remove entries that have been written to buffer + self.clinebuf = [x for x in self.clinebuf if x[0] != i] + return "".join(lst) + + +class RowStringIterator(RowStringConverter): + """Iterator over rows of the header or the body of the table.""" + + @abstractmethod + def __iter__(self) -> Iterator[str]: + """Iterate over LaTeX string representations of rows.""" + + +class RowHeaderIterator(RowStringIterator): + """Iterator for the table header rows.""" + + def __iter__(self) -> Iterator[str]: + for row_num in range(len(self.strrows)): + if row_num < self._header_row_num: + yield self.get_strrow(row_num) + + +class RowBodyIterator(RowStringIterator): + """Iterator for the table body rows.""" + + def __iter__(self) -> Iterator[str]: + for row_num in range(len(self.strrows)): + if row_num >= self._header_row_num: + yield self.get_strrow(row_num) + + +class TableBuilderAbstract(ABC): + """ + Abstract table builder producing string representation of LaTeX table. + + Parameters + ---------- + formatter : `DataFrameFormatter` + Instance of `DataFrameFormatter`. + column_format: str, optional + Column format, for example, 'rcl' for three columns. + multicolumn: bool, optional + Use multicolumn to enhance MultiIndex columns. + multicolumn_format: str, optional + The alignment for multicolumns, similar to column_format. + multirow: bool, optional + Use multirow to enhance MultiIndex rows. + caption: str, optional + Table caption. + short_caption: str, optional + Table short caption. + label: str, optional + LaTeX label. + position: str, optional + Float placement specifier, for example, 'htb'. + """ + + def __init__( + self, + formatter: DataFrameFormatter, + column_format: str | None = None, + multicolumn: bool = False, + multicolumn_format: str | None = None, + multirow: bool = False, + caption: str | None = None, + short_caption: str | None = None, + label: str | None = None, + position: str | None = None, + ): + self.fmt = formatter + self.column_format = column_format + self.multicolumn = multicolumn + self.multicolumn_format = multicolumn_format + self.multirow = multirow + self.caption = caption + self.short_caption = short_caption + self.label = label + self.position = position + + def get_result(self) -> str: + """String representation of LaTeX table.""" + elements = [ + self.env_begin, + self.top_separator, + self.header, + self.middle_separator, + self.env_body, + self.bottom_separator, + self.env_end, + ] + result = "\n".join([item for item in elements if item]) + trailing_newline = "\n" + result += trailing_newline + return result + + @property + @abstractmethod + def env_begin(self) -> str: + """Beginning of the environment.""" + + @property + @abstractmethod + def top_separator(self) -> str: + """Top level separator.""" + + @property + @abstractmethod + def header(self) -> str: + """Header lines.""" + + @property + @abstractmethod + def middle_separator(self) -> str: + """Middle level separator.""" + + @property + @abstractmethod + def env_body(self) -> str: + """Environment body.""" + + @property + @abstractmethod + def bottom_separator(self) -> str: + """Bottom level separator.""" + + @property + @abstractmethod + def env_end(self) -> str: + """End of the environment.""" + + +class GenericTableBuilder(TableBuilderAbstract): + """Table builder producing string representation of LaTeX table.""" + + @property + def header(self) -> str: + iterator = self._create_row_iterator(over="header") + return "\n".join(list(iterator)) + + @property + def top_separator(self) -> str: + return "\\toprule" + + @property + def middle_separator(self) -> str: + return "\\midrule" if self._is_separator_required() else "" + + @property + def env_body(self) -> str: + iterator = self._create_row_iterator(over="body") + return "\n".join(list(iterator)) + + def _is_separator_required(self) -> bool: + return bool(self.header and self.env_body) + + @property + def _position_macro(self) -> str: + r"""Position macro, extracted from self.position, like [h].""" + return f"[{self.position}]" if self.position else "" + + @property + def _caption_macro(self) -> str: + r"""Caption macro, extracted from self.caption. + + With short caption: + \caption[short_caption]{caption_string}. + + Without short caption: + \caption{caption_string}. + """ + if self.caption: + return "".join( + [ + r"\caption", + f"[{self.short_caption}]" if self.short_caption else "", + f"{{{self.caption}}}", + ] + ) + return "" + + @property + def _label_macro(self) -> str: + r"""Label macro, extracted from self.label, like \label{ref}.""" + return f"\\label{{{self.label}}}" if self.label else "" + + def _create_row_iterator(self, over: str) -> RowStringIterator: + """Create iterator over header or body of the table. + + Parameters + ---------- + over : {'body', 'header'} + Over what to iterate. + + Returns + ------- + RowStringIterator + Iterator over body or header. + """ + iterator_kind = self._select_iterator(over) + return iterator_kind( + formatter=self.fmt, + multicolumn=self.multicolumn, + multicolumn_format=self.multicolumn_format, + multirow=self.multirow, + ) + + def _select_iterator(self, over: str) -> type[RowStringIterator]: + """Select proper iterator over table rows.""" + if over == "header": + return RowHeaderIterator + elif over == "body": + return RowBodyIterator + else: + msg = f"'over' must be either 'header' or 'body', but {over} was provided" + raise ValueError(msg) + + +class LongTableBuilder(GenericTableBuilder): + """Concrete table builder for longtable. + + >>> from pandas.io.formats import format as fmt + >>> df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + >>> formatter = fmt.DataFrameFormatter(df) + >>> builder = LongTableBuilder(formatter, caption='a long table', + ... label='tab:long', column_format='lrl') + >>> table = builder.get_result() + >>> print(table) + \\begin{longtable}{lrl} + \\caption{a long table} + \\label{tab:long}\\\\ + \\toprule + {} & a & b \\\\ + \\midrule + \\endfirsthead + \\caption[]{a long table} \\\\ + \\toprule + {} & a & b \\\\ + \\midrule + \\endhead + \\midrule + \\multicolumn{3}{r}{{Continued on next page}} \\\\ + \\midrule + \\endfoot + + \\bottomrule + \\endlastfoot + 0 & 1 & b1 \\\\ + 1 & 2 & b2 \\\\ + \\end{longtable} + + """ + + @property + def env_begin(self) -> str: + first_row = ( + f"\\begin{{longtable}}{self._position_macro}{{{self.column_format}}}" + ) + elements = [first_row, f"{self._caption_and_label()}"] + return "\n".join([item for item in elements if item]) + + def _caption_and_label(self) -> str: + if self.caption or self.label: + double_backslash = "\\\\" + elements = [f"{self._caption_macro}", f"{self._label_macro}"] + caption_and_label = "\n".join([item for item in elements if item]) + caption_and_label += double_backslash + return caption_and_label + else: + return "" + + @property + def middle_separator(self) -> str: + iterator = self._create_row_iterator(over="header") + + # the content between \endfirsthead and \endhead commands + # mitigates repeated List of Tables entries in the final LaTeX + # document when dealing with longtable environments; GH #34360 + elements = [ + "\\midrule", + "\\endfirsthead", + f"\\caption[]{{{self.caption}}} \\\\" if self.caption else "", + self.top_separator, + self.header, + "\\midrule", + "\\endhead", + "\\midrule", + f"\\multicolumn{{{len(iterator.strcols)}}}{{r}}" + "{{Continued on next page}} \\\\", + "\\midrule", + "\\endfoot\n", + "\\bottomrule", + "\\endlastfoot", + ] + if self._is_separator_required(): + return "\n".join(elements) + return "" + + @property + def bottom_separator(self) -> str: + return "" + + @property + def env_end(self) -> str: + return "\\end{longtable}" + + +class RegularTableBuilder(GenericTableBuilder): + """Concrete table builder for regular table. + + >>> from pandas.io.formats import format as fmt + >>> df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + >>> formatter = fmt.DataFrameFormatter(df) + >>> builder = RegularTableBuilder(formatter, caption='caption', label='lab', + ... column_format='lrc') + >>> table = builder.get_result() + >>> print(table) + \\begin{table} + \\centering + \\caption{caption} + \\label{lab} + \\begin{tabular}{lrc} + \\toprule + {} & a & b \\\\ + \\midrule + 0 & 1 & b1 \\\\ + 1 & 2 & b2 \\\\ + \\bottomrule + \\end{tabular} + \\end{table} + + """ + + @property + def env_begin(self) -> str: + elements = [ + f"\\begin{{table}}{self._position_macro}", + "\\centering", + f"{self._caption_macro}", + f"{self._label_macro}", + f"\\begin{{tabular}}{{{self.column_format}}}", + ] + return "\n".join([item for item in elements if item]) + + @property + def bottom_separator(self) -> str: + return "\\bottomrule" + + @property + def env_end(self) -> str: + return "\n".join(["\\end{tabular}", "\\end{table}"]) + + +class TabularBuilder(GenericTableBuilder): + """Concrete table builder for tabular environment. + + >>> from pandas.io.formats import format as fmt + >>> df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + >>> formatter = fmt.DataFrameFormatter(df) + >>> builder = TabularBuilder(formatter, column_format='lrc') + >>> table = builder.get_result() + >>> print(table) + \\begin{tabular}{lrc} + \\toprule + {} & a & b \\\\ + \\midrule + 0 & 1 & b1 \\\\ + 1 & 2 & b2 \\\\ + \\bottomrule + \\end{tabular} + + """ + + @property + def env_begin(self) -> str: + return f"\\begin{{tabular}}{{{self.column_format}}}" + + @property + def bottom_separator(self) -> str: + return "\\bottomrule" + + @property + def env_end(self) -> str: + return "\\end{tabular}" + + +class LatexFormatter: + r""" + Used to render a DataFrame to a LaTeX tabular/longtable environment output. + + Parameters + ---------- + formatter : `DataFrameFormatter` + longtable : bool, default False + Use longtable environment. + column_format : str, default None + The columns format as specified in `LaTeX table format + `__ e.g 'rcl' for 3 columns + multicolumn : bool, default False + Use \multicolumn to enhance MultiIndex columns. + multicolumn_format : str, default 'l' + The alignment for multicolumns, similar to `column_format` + multirow : bool, default False + Use \multirow to enhance MultiIndex rows. + caption : str or tuple, optional + Tuple (full_caption, short_caption), + which results in \caption[short_caption]{full_caption}; + if a single string is passed, no short caption will be set. + label : str, optional + The LaTeX label to be placed inside ``\label{}`` in the output. + position : str, optional + The LaTeX positional argument for tables, to be placed after + ``\begin{}`` in the output. + + See Also + -------- + HTMLFormatter + """ + + def __init__( + self, + formatter: DataFrameFormatter, + longtable: bool = False, + column_format: str | None = None, + multicolumn: bool = False, + multicolumn_format: str | None = None, + multirow: bool = False, + caption: str | tuple[str, str] | None = None, + label: str | None = None, + position: str | None = None, + ): + self.fmt = formatter + self.frame = self.fmt.frame + self.longtable = longtable + self.column_format = column_format + self.multicolumn = multicolumn + self.multicolumn_format = multicolumn_format + self.multirow = multirow + self.caption, self.short_caption = _split_into_full_short_caption(caption) + self.label = label + self.position = position + + def to_string(self) -> str: + """ + Render a DataFrame to a LaTeX tabular, longtable, or table/tabular + environment output. + """ + return self.builder.get_result() + + @property + def builder(self) -> TableBuilderAbstract: + """Concrete table builder. + + Returns + ------- + TableBuilder + """ + builder = self._select_builder() + return builder( + formatter=self.fmt, + column_format=self.column_format, + multicolumn=self.multicolumn, + multicolumn_format=self.multicolumn_format, + multirow=self.multirow, + caption=self.caption, + short_caption=self.short_caption, + label=self.label, + position=self.position, + ) + + def _select_builder(self) -> type[TableBuilderAbstract]: + """Select proper table builder.""" + if self.longtable: + return LongTableBuilder + if any([self.caption, self.label, self.position]): + return RegularTableBuilder + return TabularBuilder + + @property + def column_format(self) -> str | None: + """Column format.""" + return self._column_format + + @column_format.setter + def column_format(self, input_column_format: str | None) -> None: + """Setter for column format.""" + if input_column_format is None: + self._column_format = ( + self._get_index_format() + self._get_column_format_based_on_dtypes() + ) + elif not isinstance(input_column_format, str): + raise ValueError( + f"column_format must be str or unicode, " + f"not {type(input_column_format)}" + ) + else: + self._column_format = input_column_format + + def _get_column_format_based_on_dtypes(self) -> str: + """Get column format based on data type. + + Right alignment for numbers and left - for strings. + """ + + def get_col_type(dtype): + if issubclass(dtype.type, np.number): + return "r" + return "l" + + dtypes = self.frame.dtypes._values + return "".join(map(get_col_type, dtypes)) + + def _get_index_format(self) -> str: + """Get index column format.""" + return "l" * self.frame.index.nlevels if self.fmt.index else "" + + +def _escape_symbols(row: Sequence[str]) -> list[str]: + """Carry out string replacements for special symbols. + + Parameters + ---------- + row : list + List of string, that may contain special symbols. + + Returns + ------- + list + list of strings with the special symbols replaced. + """ + return [ + ( + x.replace("\\", "\\textbackslash ") + .replace("_", "\\_") + .replace("%", "\\%") + .replace("$", "\\$") + .replace("#", "\\#") + .replace("{", "\\{") + .replace("}", "\\}") + .replace("~", "\\textasciitilde ") + .replace("^", "\\textasciicircum ") + .replace("&", "\\&") + if (x and x != "{}") + else "{}" + ) + for x in row + ] + + +def _convert_to_bold(crow: Sequence[str], ilevels: int) -> list[str]: + """Convert elements in ``crow`` to bold.""" + return [ + f"\\textbf{{{x}}}" if j < ilevels and x.strip() not in ["", "{}"] else x + for j, x in enumerate(crow) + ] + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/printing.py b/.local/lib/python3.8/site-packages/pandas/io/formats/printing.py new file mode 100644 index 0000000..7743153 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/printing.py @@ -0,0 +1,511 @@ +""" +Printing tools. +""" +from __future__ import annotations + +import sys +from typing import ( + Any, + Callable, + Dict, + Iterable, + Mapping, + Sequence, + TypeVar, + Union, +) + +from pandas._config import get_option + +from pandas.core.dtypes.inference import is_sequence + +EscapeChars = Union[Mapping[str, str], Iterable[str]] +_KT = TypeVar("_KT") +_VT = TypeVar("_VT") + + +def adjoin(space: int, *lists: list[str], **kwargs) -> str: + """ + Glues together two sets of strings using the amount of space requested. + The idea is to prettify. + + ---------- + space : int + number of spaces for padding + lists : str + list of str which being joined + strlen : callable + function used to calculate the length of each str. Needed for unicode + handling. + justfunc : callable + function used to justify str. Needed for unicode handling. + """ + strlen = kwargs.pop("strlen", len) + justfunc = kwargs.pop("justfunc", justify) + + out_lines = [] + newLists = [] + lengths = [max(map(strlen, x)) + space for x in lists[:-1]] + # not the last one + lengths.append(max(map(len, lists[-1]))) + maxLen = max(map(len, lists)) + for i, lst in enumerate(lists): + nl = justfunc(lst, lengths[i], mode="left") + nl.extend([" " * lengths[i]] * (maxLen - len(lst))) + newLists.append(nl) + toJoin = zip(*newLists) + for lines in toJoin: + out_lines.append("".join(lines)) + return "\n".join(out_lines) + + +def justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]: + """ + Perform ljust, center, rjust against string or list-like + """ + if mode == "left": + return [x.ljust(max_len) for x in texts] + elif mode == "center": + return [x.center(max_len) for x in texts] + else: + return [x.rjust(max_len) for x in texts] + + +# Unicode consolidation +# --------------------- +# +# pprinting utility functions for generating Unicode text or +# bytes(3.x)/str(2.x) representations of objects. +# Try to use these as much as possible rather than rolling your own. +# +# When to use +# ----------- +# +# 1) If you're writing code internal to pandas (no I/O directly involved), +# use pprint_thing(). +# +# It will always return unicode text which can handled by other +# parts of the package without breakage. +# +# 2) if you need to write something out to file, use +# pprint_thing_encoded(encoding). +# +# If no encoding is specified, it defaults to utf-8. Since encoding pure +# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're +# working with straight ascii. + + +def _pprint_seq( + seq: Sequence, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds +) -> str: + """ + internal. pprinter for iterables. you should probably use pprint_thing() + rather than calling this directly. + + bounds length of printed sequence, depending on options + """ + if isinstance(seq, set): + fmt = "{{{body}}}" + else: + fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})" + + if max_seq_items is False: + nitems = len(seq) + else: + nitems = max_seq_items or get_option("max_seq_items") or len(seq) + + s = iter(seq) + # handle sets, no slicing + r = [ + pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) + for i in range(min(nitems, len(seq))) + ] + body = ", ".join(r) + + if nitems < len(seq): + body += ", ..." + elif isinstance(seq, tuple) and len(seq) == 1: + body += "," + + return fmt.format(body=body) + + +def _pprint_dict( + seq: Mapping, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds +) -> str: + """ + internal. pprinter for iterables. you should probably use pprint_thing() + rather than calling this directly. + """ + fmt = "{{{things}}}" + pairs = [] + + pfmt = "{key}: {val}" + + if max_seq_items is False: + nitems = len(seq) + else: + nitems = max_seq_items or get_option("max_seq_items") or len(seq) + + for k, v in list(seq.items())[:nitems]: + pairs.append( + pfmt.format( + key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), + val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), + ) + ) + + if nitems < len(seq): + return fmt.format(things=", ".join(pairs) + ", ...") + else: + return fmt.format(things=", ".join(pairs)) + + +def pprint_thing( + thing: Any, + _nest_lvl: int = 0, + escape_chars: EscapeChars | None = None, + default_escapes: bool = False, + quote_strings: bool = False, + max_seq_items: int | None = None, +) -> str: + """ + This function is the sanctioned way of converting objects + to a string representation and properly handles nested sequences. + + Parameters + ---------- + thing : anything to be formatted + _nest_lvl : internal use only. pprint_thing() is mutually-recursive + with pprint_sequence, this argument is used to keep track of the + current nesting level, and limit it. + escape_chars : list or dict, optional + Characters to escape. If a dict is passed the values are the + replacements + default_escapes : bool, default False + Whether the input escape characters replaces or adds to the defaults + max_seq_items : int or None, default None + Pass through to other pretty printers to limit sequence printing + + Returns + ------- + str + """ + + def as_escaped_string( + thing: Any, escape_chars: EscapeChars | None = escape_chars + ) -> str: + translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"} + if isinstance(escape_chars, dict): + if default_escapes: + translate.update(escape_chars) + else: + translate = escape_chars + escape_chars = list(escape_chars.keys()) + else: + escape_chars = escape_chars or () + + result = str(thing) + for c in escape_chars: + result = result.replace(c, translate[c]) + return result + + if hasattr(thing, "__next__"): + return str(thing) + elif isinstance(thing, dict) and _nest_lvl < get_option( + "display.pprint_nest_depth" + ): + result = _pprint_dict( + thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items + ) + elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"): + result = _pprint_seq( + thing, + _nest_lvl, + escape_chars=escape_chars, + quote_strings=quote_strings, + max_seq_items=max_seq_items, + ) + elif isinstance(thing, str) and quote_strings: + result = f"'{as_escaped_string(thing)}'" + else: + result = as_escaped_string(thing) + + return result + + +def pprint_thing_encoded( + object, encoding: str = "utf-8", errors: str = "replace" +) -> bytes: + value = pprint_thing(object) # get unicode representation of object + return value.encode(encoding, errors) + + +def enable_data_resource_formatter(enable: bool) -> None: + if "IPython" not in sys.modules: + # definitely not in IPython + return + from IPython import get_ipython + + ip = get_ipython() + if ip is None: + # still not in IPython + return + + formatters = ip.display_formatter.formatters + mimetype = "application/vnd.dataresource+json" + + if enable: + if mimetype not in formatters: + # define tableschema formatter + from IPython.core.formatters import BaseFormatter + + class TableSchemaFormatter(BaseFormatter): + print_method = "_repr_data_resource_" + _return_type = (dict,) + + # register it: + formatters[mimetype] = TableSchemaFormatter() + # enable it if it's been disabled: + formatters[mimetype].enabled = True + else: + # unregister tableschema mime-type + if mimetype in formatters: + formatters[mimetype].enabled = False + + +def default_pprint(thing: Any, max_seq_items: int | None = None) -> str: + return pprint_thing( + thing, + escape_chars=("\t", "\r", "\n"), + quote_strings=True, + max_seq_items=max_seq_items, + ) + + +def format_object_summary( + obj, + formatter: Callable, + is_justify: bool = True, + name: str | None = None, + indent_for_name: bool = True, + line_break_each_value: bool = False, +) -> str: + """ + Return the formatted obj as a unicode string + + Parameters + ---------- + obj : object + must be iterable and support __getitem__ + formatter : callable + string formatter for an element + is_justify : bool + should justify the display + name : name, optional + defaults to the class name of the obj + indent_for_name : bool, default True + Whether subsequent lines should be indented to + align with the name. + line_break_each_value : bool, default False + If True, inserts a line break for each value of ``obj``. + If False, only break lines when the a line of values gets wider + than the display width. + + .. versionadded:: 0.25.0 + + Returns + ------- + summary string + """ + from pandas.io.formats.console import get_console_size + from pandas.io.formats.format import get_adjustment + + display_width, _ = get_console_size() + if display_width is None: + display_width = get_option("display.width") or 80 + if name is None: + name = type(obj).__name__ + + if indent_for_name: + name_len = len(name) + space1 = f'\n{(" " * (name_len + 1))}' + space2 = f'\n{(" " * (name_len + 2))}' + else: + space1 = "\n" + space2 = "\n " # space for the opening '[' + + n = len(obj) + if line_break_each_value: + # If we want to vertically align on each value of obj, we need to + # separate values by a line break and indent the values + sep = ",\n " + " " * len(name) + else: + sep = "," + max_seq_items = get_option("display.max_seq_items") or n + + # are we a truncated display + is_truncated = n > max_seq_items + + # adj can optionally handle unicode eastern asian width + adj = get_adjustment() + + def _extend_line( + s: str, line: str, value: str, display_width: int, next_line_prefix: str + ) -> tuple[str, str]: + + if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width: + s += line.rstrip() + line = next_line_prefix + line += value + return s, line + + def best_len(values: list[str]) -> int: + if values: + return max(adj.len(x) for x in values) + else: + return 0 + + close = ", " + + if n == 0: + summary = f"[]{close}" + elif n == 1 and not line_break_each_value: + first = formatter(obj[0]) + summary = f"[{first}]{close}" + elif n == 2 and not line_break_each_value: + first = formatter(obj[0]) + last = formatter(obj[-1]) + summary = f"[{first}, {last}]{close}" + else: + + if max_seq_items == 1: + # If max_seq_items=1 show only last element + head = [] + tail = [formatter(x) for x in obj[-1:]] + elif n > max_seq_items: + n = min(max_seq_items // 2, 10) + head = [formatter(x) for x in obj[:n]] + tail = [formatter(x) for x in obj[-n:]] + else: + head = [] + tail = [formatter(x) for x in obj] + + # adjust all values to max length if needed + if is_justify: + if line_break_each_value: + # Justify each string in the values of head and tail, so the + # strings will right align when head and tail are stacked + # vertically. + head, tail = _justify(head, tail) + elif is_truncated or not ( + len(", ".join(head)) < display_width + and len(", ".join(tail)) < display_width + ): + # Each string in head and tail should align with each other + max_length = max(best_len(head), best_len(tail)) + head = [x.rjust(max_length) for x in head] + tail = [x.rjust(max_length) for x in tail] + # If we are not truncated and we are only a single + # line, then don't justify + + if line_break_each_value: + # Now head and tail are of type List[Tuple[str]]. Below we + # convert them into List[str], so there will be one string per + # value. Also truncate items horizontally if wider than + # max_space + max_space = display_width - len(space2) + value = tail[0] + for max_items in reversed(range(1, len(value) + 1)): + pprinted_seq = _pprint_seq(value, max_seq_items=max_items) + if len(pprinted_seq) < max_space: + break + head = [_pprint_seq(x, max_seq_items=max_items) for x in head] + tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail] + + summary = "" + line = space2 + + for max_items in range(len(head)): + word = head[max_items] + sep + " " + summary, line = _extend_line(summary, line, word, display_width, space2) + + if is_truncated: + # remove trailing space of last line + summary += line.rstrip() + space2 + "..." + line = space2 + + for max_items in range(len(tail) - 1): + word = tail[max_items] + sep + " " + summary, line = _extend_line(summary, line, word, display_width, space2) + + # last value: no sep added + 1 space of width used for trailing ',' + summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2) + summary += line + + # right now close is either '' or ', ' + # Now we want to include the ']', but not the maybe space. + close = "]" + close.rstrip(" ") + summary += close + + if len(summary) > (display_width) or line_break_each_value: + summary += space1 + else: # one row + summary += " " + + # remove initial space + summary = "[" + summary[len(space2) :] + + return summary + + +def _justify( + head: list[Sequence[str]], tail: list[Sequence[str]] +) -> tuple[list[tuple[str, ...]], list[tuple[str, ...]]]: + """ + Justify items in head and tail, so they are right-aligned when stacked. + + Parameters + ---------- + head : list-like of list-likes of strings + tail : list-like of list-likes of strings + + Returns + ------- + tuple of list of tuples of strings + Same as head and tail, but items are right aligned when stacked + vertically. + + Examples + -------- + >>> _justify([['a', 'b']], [['abc', 'abcd']]) + ([(' a', ' b')], [('abc', 'abcd')]) + """ + combined = head + tail + + # For each position for the sequences in ``combined``, + # find the length of the largest string. + max_length = [0] * len(combined[0]) + for inner_seq in combined: + length = [len(item) for item in inner_seq] + max_length = [max(x, y) for x, y in zip(max_length, length)] + + # justify each item in each list-like in head and tail using max_length + head = [ + tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head + ] + tail = [ + tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail + ] + # https://github.com/python/mypy/issues/4975 + # error: Incompatible return value type (got "Tuple[List[Sequence[str]], + # List[Sequence[str]]]", expected "Tuple[List[Tuple[str, ...]], + # List[Tuple[str, ...]]]") + return head, tail # type: ignore[return-value] + + +class PrettyDict(Dict[_KT, _VT]): + """Dict extension to support abbreviated __repr__""" + + def __repr__(self) -> str: + return pprint_thing(self) diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/string.py b/.local/lib/python3.8/site-packages/pandas/io/formats/string.py new file mode 100644 index 0000000..90a4800 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/string.py @@ -0,0 +1,207 @@ +""" +Module for formatting output data in console (to string). +""" +from __future__ import annotations + +from shutil import get_terminal_size +from typing import Iterable + +import numpy as np + +from pandas.io.formats.format import DataFrameFormatter +from pandas.io.formats.printing import pprint_thing + + +class StringFormatter: + """Formatter for string representation of a dataframe.""" + + def __init__(self, fmt: DataFrameFormatter, line_width: int | None = None): + self.fmt = fmt + self.adj = fmt.adj + self.frame = fmt.frame + self.line_width = line_width + + def to_string(self) -> str: + text = self._get_string_representation() + if self.fmt.should_show_dimensions: + text = "".join([text, self.fmt.dimensions_info]) + return text + + def _get_strcols(self) -> list[list[str]]: + strcols = self.fmt.get_strcols() + if self.fmt.is_truncated: + strcols = self._insert_dot_separators(strcols) + return strcols + + def _get_string_representation(self) -> str: + if self.fmt.frame.empty: + return self._empty_info_line + + strcols = self._get_strcols() + + if self.line_width is None: + # no need to wrap around just print the whole frame + return self.adj.adjoin(1, *strcols) + + if self._need_to_wrap_around: + return self._join_multiline(strcols) + + return self._fit_strcols_to_terminal_width(strcols) + + @property + def _empty_info_line(self) -> str: + return ( + f"Empty {type(self.frame).__name__}\n" + f"Columns: {pprint_thing(self.frame.columns)}\n" + f"Index: {pprint_thing(self.frame.index)}" + ) + + @property + def _need_to_wrap_around(self) -> bool: + return bool(self.fmt.max_cols is None or self.fmt.max_cols > 0) + + def _insert_dot_separators(self, strcols: list[list[str]]) -> list[list[str]]: + str_index = self.fmt._get_formatted_index(self.fmt.tr_frame) + index_length = len(str_index) + + if self.fmt.is_truncated_horizontally: + strcols = self._insert_dot_separator_horizontal(strcols, index_length) + + if self.fmt.is_truncated_vertically: + strcols = self._insert_dot_separator_vertical(strcols, index_length) + + return strcols + + @property + def _adjusted_tr_col_num(self) -> int: + return self.fmt.tr_col_num + 1 if self.fmt.index else self.fmt.tr_col_num + + def _insert_dot_separator_horizontal( + self, strcols: list[list[str]], index_length: int + ) -> list[list[str]]: + strcols.insert(self._adjusted_tr_col_num, [" ..."] * index_length) + return strcols + + def _insert_dot_separator_vertical( + self, strcols: list[list[str]], index_length: int + ) -> list[list[str]]: + n_header_rows = index_length - len(self.fmt.tr_frame) + row_num = self.fmt.tr_row_num + for ix, col in enumerate(strcols): + cwidth = self.adj.len(col[row_num]) + + if self.fmt.is_truncated_horizontally: + is_dot_col = ix == self._adjusted_tr_col_num + else: + is_dot_col = False + + if cwidth > 3 or is_dot_col: + dots = "..." + else: + dots = ".." + + if ix == 0 and self.fmt.index: + dot_mode = "left" + elif is_dot_col: + cwidth = 4 + dot_mode = "right" + else: + dot_mode = "right" + + dot_str = self.adj.justify([dots], cwidth, mode=dot_mode)[0] + col.insert(row_num + n_header_rows, dot_str) + return strcols + + def _join_multiline(self, strcols_input: Iterable[list[str]]) -> str: + lwidth = self.line_width + adjoin_width = 1 + strcols = list(strcols_input) + + if self.fmt.index: + idx = strcols.pop(0) + lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width + + col_widths = [ + np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0 + for col in strcols + ] + + assert lwidth is not None + col_bins = _binify(col_widths, lwidth) + nbins = len(col_bins) + + if self.fmt.is_truncated_vertically: + assert self.fmt.max_rows_fitted is not None + nrows = self.fmt.max_rows_fitted + 1 + else: + nrows = len(self.frame) + + str_lst = [] + start = 0 + for i, end in enumerate(col_bins): + row = strcols[start:end] + if self.fmt.index: + row.insert(0, idx) + if nbins > 1: + if end <= len(strcols) and i < nbins - 1: + row.append([" \\"] + [" "] * (nrows - 1)) + else: + row.append([" "] * nrows) + str_lst.append(self.adj.adjoin(adjoin_width, *row)) + start = end + return "\n\n".join(str_lst) + + def _fit_strcols_to_terminal_width(self, strcols: list[list[str]]) -> str: + from pandas import Series + + lines = self.adj.adjoin(1, *strcols).split("\n") + max_len = Series(lines).str.len().max() + # plus truncate dot col + width, _ = get_terminal_size() + dif = max_len - width + # '+ 1' to avoid too wide repr (GH PR #17023) + adj_dif = dif + 1 + col_lens = Series([Series(ele).apply(len).max() for ele in strcols]) + n_cols = len(col_lens) + counter = 0 + while adj_dif > 0 and n_cols > 1: + counter += 1 + mid = round(n_cols / 2) + mid_ix = col_lens.index[mid] + col_len = col_lens[mid_ix] + # adjoin adds one + adj_dif -= col_len + 1 + col_lens = col_lens.drop(mid_ix) + n_cols = len(col_lens) + + # subtract index column + max_cols_fitted = n_cols - self.fmt.index + # GH-21180. Ensure that we print at least two. + max_cols_fitted = max(max_cols_fitted, 2) + self.fmt.max_cols_fitted = max_cols_fitted + + # Call again _truncate to cut frame appropriately + # and then generate string representation + self.fmt.truncate() + strcols = self._get_strcols() + return self.adj.adjoin(1, *strcols) + + +def _binify(cols: list[int], line_width: int) -> list[int]: + adjoin_width = 1 + bins = [] + curr_width = 0 + i_last_column = len(cols) - 1 + for i, w in enumerate(cols): + w_adjoined = w + adjoin_width + curr_width += w_adjoined + if i_last_column == i: + wrap = curr_width + 1 > line_width and i > 0 + else: + wrap = curr_width + 2 > line_width and i > 0 + if wrap: + bins.append(i) + curr_width = w_adjoined + + bins.append(len(cols)) + return bins diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/style.py b/.local/lib/python3.8/site-packages/pandas/io/formats/style.py new file mode 100644 index 0000000..10cf854 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/style.py @@ -0,0 +1,3869 @@ +""" +Module for applying conditional formatting to DataFrames and Series. +""" +from __future__ import annotations + +from contextlib import contextmanager +import copy +from functools import partial +import operator +from typing import ( + Any, + Callable, + Hashable, + Sequence, +) +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._typing import ( + Axis, + FilePath, + IndexLabel, + Level, + Scalar, + WriteBuffer, +) +from pandas.compat._optional import import_optional_dependency +from pandas.util._decorators import ( + Substitution, + doc, +) +from pandas.util._exceptions import find_stack_level + +import pandas as pd +from pandas import ( + IndexSlice, + RangeIndex, +) +import pandas.core.common as com +from pandas.core.frame import ( + DataFrame, + Series, +) +from pandas.core.generic import NDFrame +from pandas.core.shared_docs import _shared_docs + +from pandas.io.formats.format import save_to_buffer + +jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") + +from pandas.io.formats.style_render import ( + CSSProperties, + CSSStyles, + ExtFormatter, + StylerRenderer, + Subset, + Tooltips, + format_table_styles, + maybe_convert_css_to_tuples, + non_reducing_slice, + refactor_levels, +) + +try: + import matplotlib as mpl + import matplotlib.pyplot as plt + + has_mpl = True +except ImportError: + has_mpl = False + no_mpl_message = "{0} requires matplotlib." + + +@contextmanager +def _mpl(func: Callable): + if has_mpl: + yield plt, mpl + else: + raise ImportError(no_mpl_message.format(func.__name__)) + + +#### +# Shared Doc Strings + +subset = """ + subset : label, array-like, IndexSlice, optional + A valid 2d input to `DataFrame.loc[]`, or, in the case of a 1d input + or single key, to `DataFrame.loc[:, ]` where the columns are + prioritised, to limit ``data`` to *before* applying the function. +""" + +props = """ + props : str, default None + CSS properties to use for highlighting. If ``props`` is given, ``color`` + is not used. +""" + +# +### + + +class Styler(StylerRenderer): + r""" + Helps style a DataFrame or Series according to the data with HTML and CSS. + + Parameters + ---------- + data : Series or DataFrame + Data to be styled - either a Series or DataFrame. + precision : int, optional + Precision to round floats to. If not given defaults to + ``pandas.options.styler.format.precision``. + + .. versionchanged:: 1.4.0 + table_styles : list-like, default None + List of {selector: (attr, value)} dicts; see Notes. + uuid : str, default None + A unique identifier to avoid CSS collisions; generated automatically. + caption : str, tuple, default None + String caption to attach to the table. Tuple only used for LaTeX dual captions. + table_attributes : str, default None + Items that show up in the opening ``
) elements that will be rendered before + trimming will occur over columns, rows or both if needed. +""" + +styler_max_rows = """ +: int, optional + The maximum number of rows that will be rendered. May still be reduced to + satsify ``max_elements``, which takes precedence. +""" + +styler_max_columns = """ +: int, optional + The maximum number of columns that will be rendered. May still be reduced to + satsify ``max_elements``, which takes precedence. +""" + +styler_precision = """ +: int + The precision for floats and complex numbers. +""" + +styler_decimal = """ +: str + The character representation for the decimal separator for floats and complex. +""" + +styler_thousands = """ +: str, optional + The character representation for thousands separator for floats, int and complex. +""" + +styler_na_rep = """ +: str, optional + The string representation for values identified as missing. +""" + +styler_escape = """ +: str, optional + Whether to escape certain characters according to the given context; html or latex. +""" + +styler_formatter = """ +: str, callable, dict, optional + A formatter object to be used as default within ``Styler.format``. +""" + +styler_multirow_align = """ +: {"c", "t", "b"} + The specifier for vertical alignment of sparsified LaTeX multirows. +""" + +styler_multicol_align = r""" +: {"r", "c", "l", "naive-l", "naive-r"} + The specifier for horizontal alignment of sparsified LaTeX multicolumns. Pipe + decorators can also be added to non-naive values to draw vertical + rules, e.g. "\|r" will draw a rule on the left side of right aligned merged cells. +""" + +styler_hrules = """ +: bool + Whether to add horizontal rules on top and bottom and below the headers. +""" + +styler_environment = """ +: str + The environment to replace ``\\begin{table}``. If "longtable" is used results + in a specific longtable environment format. +""" + +styler_encoding = """ +: str + The encoding used for output HTML and LaTeX files. +""" + +styler_mathjax = """ +: bool + If False will render special CSS classes to table attributes that indicate Mathjax + will not be used in Jupyter Notebook. +""" + +with cf.config_prefix("styler"): + cf.register_option("sparse.index", True, styler_sparse_index_doc, validator=is_bool) + + cf.register_option( + "sparse.columns", True, styler_sparse_columns_doc, validator=is_bool + ) + + cf.register_option( + "render.repr", + "html", + styler_render_repr, + validator=is_one_of_factory(["html", "latex"]), + ) + + cf.register_option( + "render.max_elements", + 2 ** 18, + styler_max_elements, + validator=is_nonnegative_int, + ) + + cf.register_option( + "render.max_rows", + None, + styler_max_rows, + validator=is_nonnegative_int, + ) + + cf.register_option( + "render.max_columns", + None, + styler_max_columns, + validator=is_nonnegative_int, + ) + + cf.register_option("render.encoding", "utf-8", styler_encoding, validator=is_str) + + cf.register_option("format.decimal", ".", styler_decimal, validator=is_str) + + cf.register_option( + "format.precision", 6, styler_precision, validator=is_nonnegative_int + ) + + cf.register_option( + "format.thousands", + None, + styler_thousands, + validator=is_instance_factory([type(None), str]), + ) + + cf.register_option( + "format.na_rep", + None, + styler_na_rep, + validator=is_instance_factory([type(None), str]), + ) + + cf.register_option( + "format.escape", + None, + styler_escape, + validator=is_one_of_factory([None, "html", "latex"]), + ) + + cf.register_option( + "format.formatter", + None, + styler_formatter, + validator=is_instance_factory([type(None), dict, Callable, str]), + ) + + cf.register_option("html.mathjax", True, styler_mathjax, validator=is_bool) + + cf.register_option( + "latex.multirow_align", + "c", + styler_multirow_align, + validator=is_one_of_factory(["c", "t", "b", "naive"]), + ) + + val_mca = ["r", "|r|", "|r", "r|", "c", "|c|", "|c", "c|", "l", "|l|", "|l", "l|"] + val_mca += ["naive-l", "naive-r"] + cf.register_option( + "latex.multicol_align", + "r", + styler_multicol_align, + validator=is_one_of_factory(val_mca), + ) + + cf.register_option("latex.hrules", False, styler_hrules, validator=is_bool) + + cf.register_option( + "latex.environment", + None, + styler_environment, + validator=is_instance_factory([type(None), str]), + ) diff --git a/.local/lib/python3.8/site-packages/pandas/core/construction.py b/.local/lib/python3.8/site-packages/pandas/core/construction.py new file mode 100644 index 0000000..e496125 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/construction.py @@ -0,0 +1,858 @@ +""" +Constructor functions intended to be shared by pd.array, Series.__init__, +and Index.__new__. + +These should not depend on core.internals. +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Sequence, + cast, +) +import warnings + +import numpy as np +import numpy.ma as ma + +from pandas._libs import lib +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + Dtype, + DtypeObj, +) +from pandas.errors import IntCastingNaNError +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.base import ( + ExtensionDtype, + _registry as registry, +) +from pandas.core.dtypes.cast import ( + construct_1d_arraylike_from_scalar, + construct_1d_object_array_from_listlike, + maybe_cast_to_datetime, + maybe_cast_to_integer_array, + maybe_convert_platform, + maybe_infer_to_datetimelike, + maybe_upcast, + sanitize_to_nanoseconds, +) +from pandas.core.dtypes.common import ( + is_datetime64_ns_dtype, + is_extension_array_dtype, + is_float_dtype, + is_integer_dtype, + is_list_like, + is_object_dtype, + is_timedelta64_ns_dtype, +) +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + PandasDtype, +) +from pandas.core.dtypes.generic import ( + ABCExtensionArray, + ABCIndex, + ABCPandasArray, + ABCRangeIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import isna + +import pandas.core.common as com + +if TYPE_CHECKING: + from pandas import ( + ExtensionArray, + Index, + Series, + ) + + +def array( + data: Sequence[object] | AnyArrayLike, + dtype: Dtype | None = None, + copy: bool = True, +) -> ExtensionArray: + """ + Create an array. + + Parameters + ---------- + data : Sequence of objects + The scalars inside `data` should be instances of the + scalar type for `dtype`. It's expected that `data` + represents a 1-dimensional array of data. + + When `data` is an Index or Series, the underlying array + will be extracted from `data`. + + dtype : str, np.dtype, or ExtensionDtype, optional + The dtype to use for the array. This may be a NumPy + dtype or an extension type registered with pandas using + :meth:`pandas.api.extensions.register_extension_dtype`. + + If not specified, there are two possibilities: + + 1. When `data` is a :class:`Series`, :class:`Index`, or + :class:`ExtensionArray`, the `dtype` will be taken + from the data. + 2. Otherwise, pandas will attempt to infer the `dtype` + from the data. + + Note that when `data` is a NumPy array, ``data.dtype`` is + *not* used for inferring the array type. This is because + NumPy cannot represent all the types of data that can be + held in extension arrays. + + Currently, pandas will infer an extension dtype for sequences of + + ============================== ======================================= + Scalar Type Array Type + ============================== ======================================= + :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray` + :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` + :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray` + :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray` + :class:`int` :class:`pandas.arrays.IntegerArray` + :class:`float` :class:`pandas.arrays.FloatingArray` + :class:`str` :class:`pandas.arrays.StringArray` or + :class:`pandas.arrays.ArrowStringArray` + :class:`bool` :class:`pandas.arrays.BooleanArray` + ============================== ======================================= + + The ExtensionArray created when the scalar type is :class:`str` is determined by + ``pd.options.mode.string_storage`` if the dtype is not explicitly given. + + For all other cases, NumPy's usual inference rules will be used. + + .. versionchanged:: 1.0.0 + + Pandas infers nullable-integer dtype for integer data, + string dtype for string data, and nullable-boolean dtype + for boolean data. + + .. versionchanged:: 1.2.0 + + Pandas now also infers nullable-floating dtype for float-like + input data + + copy : bool, default True + Whether to copy the data, even if not necessary. Depending + on the type of `data`, creating the new array may require + copying data, even if ``copy=False``. + + Returns + ------- + ExtensionArray + The newly created array. + + Raises + ------ + ValueError + When `data` is not 1-dimensional. + + See Also + -------- + numpy.array : Construct a NumPy array. + Series : Construct a pandas Series. + Index : Construct a pandas Index. + arrays.PandasArray : ExtensionArray wrapping a NumPy array. + Series.array : Extract the array stored within a Series. + + Notes + ----- + Omitting the `dtype` argument means pandas will attempt to infer the + best array type from the values in the data. As new array types are + added by pandas and 3rd party libraries, the "best" array type may + change. We recommend specifying `dtype` to ensure that + + 1. the correct array type for the data is returned + 2. the returned array type doesn't change as new extension types + are added by pandas and third-party libraries + + Additionally, if the underlying memory representation of the returned + array matters, we recommend specifying the `dtype` as a concrete object + rather than a string alias or allowing it to be inferred. For example, + a future version of pandas or a 3rd-party library may include a + dedicated ExtensionArray for string data. In this event, the following + would no longer return a :class:`arrays.PandasArray` backed by a NumPy + array. + + >>> pd.array(['a', 'b'], dtype=str) + + ['a', 'b'] + Length: 2, dtype: str32 + + This would instead return the new ExtensionArray dedicated for string + data. If you really need the new array to be backed by a NumPy array, + specify that in the dtype. + + >>> pd.array(['a', 'b'], dtype=np.dtype(" + ['a', 'b'] + Length: 2, dtype: str32 + + Finally, Pandas has arrays that mostly overlap with NumPy + + * :class:`arrays.DatetimeArray` + * :class:`arrays.TimedeltaArray` + + When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is + passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray`` + rather than a ``PandasArray``. This is for symmetry with the case of + timezone-aware data, which NumPy does not natively support. + + >>> pd.array(['2015', '2016'], dtype='datetime64[ns]') + + ['2015-01-01 00:00:00', '2016-01-01 00:00:00'] + Length: 2, dtype: datetime64[ns] + + >>> pd.array(["1H", "2H"], dtype='timedelta64[ns]') + + ['0 days 01:00:00', '0 days 02:00:00'] + Length: 2, dtype: timedelta64[ns] + + Examples + -------- + If a dtype is not specified, pandas will infer the best dtype from the values. + See the description of `dtype` for the types pandas infers for. + + >>> pd.array([1, 2]) + + [1, 2] + Length: 2, dtype: Int64 + + >>> pd.array([1, 2, np.nan]) + + [1, 2, ] + Length: 3, dtype: Int64 + + >>> pd.array([1.1, 2.2]) + + [1.1, 2.2] + Length: 2, dtype: Float64 + + >>> pd.array(["a", None, "c"]) + + ['a', , 'c'] + Length: 3, dtype: string + + >>> with pd.option_context("string_storage", "pyarrow"): + ... arr = pd.array(["a", None, "c"]) + ... + >>> arr + + ['a', , 'c'] + Length: 3, dtype: string + + >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")]) + + ['2000-01-01', '2000-01-01'] + Length: 2, dtype: period[D] + + You can use the string alias for `dtype` + + >>> pd.array(['a', 'b', 'a'], dtype='category') + ['a', 'b', 'a'] + Categories (2, object): ['a', 'b'] + + Or specify the actual dtype + + >>> pd.array(['a', 'b', 'a'], + ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True)) + ['a', 'b', 'a'] + Categories (3, object): ['a' < 'b' < 'c'] + + If pandas does not infer a dedicated extension type a + :class:`arrays.PandasArray` is returned. + + >>> pd.array([1 + 1j, 3 + 2j]) + + [(1+1j), (3+2j)] + Length: 2, dtype: complex128 + + As mentioned in the "Notes" section, new extension types may be added + in the future (by pandas or 3rd party libraries), causing the return + value to no longer be a :class:`arrays.PandasArray`. Specify the `dtype` + as a NumPy dtype if you need to ensure there's no future change in + behavior. + + >>> pd.array([1, 2], dtype=np.dtype("int32")) + + [1, 2] + Length: 2, dtype: int32 + + `data` must be 1-dimensional. A ValueError is raised when the input + has the wrong dimensionality. + + >>> pd.array(1) + Traceback (most recent call last): + ... + ValueError: Cannot pass scalar '1' to 'pandas.array'. + """ + from pandas.core.arrays import ( + BooleanArray, + DatetimeArray, + ExtensionArray, + FloatingArray, + IntegerArray, + IntervalArray, + PandasArray, + PeriodArray, + TimedeltaArray, + ) + from pandas.core.arrays.string_ import StringDtype + + if lib.is_scalar(data): + msg = f"Cannot pass scalar '{data}' to 'pandas.array'." + raise ValueError(msg) + + if dtype is None and isinstance(data, (ABCSeries, ABCIndex, ExtensionArray)): + # Note: we exclude np.ndarray here, will do type inference on it + dtype = data.dtype + + data = extract_array(data, extract_numpy=True) + + # this returns None for not-found dtypes. + if isinstance(dtype, str): + dtype = registry.find(dtype) or dtype + + if is_extension_array_dtype(dtype): + cls = cast(ExtensionDtype, dtype).construct_array_type() + return cls._from_sequence(data, dtype=dtype, copy=copy) + + if dtype is None: + inferred_dtype = lib.infer_dtype(data, skipna=True) + if inferred_dtype == "period": + return PeriodArray._from_sequence(data, copy=copy) + + elif inferred_dtype == "interval": + return IntervalArray(data, copy=copy) + + elif inferred_dtype.startswith("datetime"): + # datetime, datetime64 + try: + return DatetimeArray._from_sequence(data, copy=copy) + except ValueError: + # Mixture of timezones, fall back to PandasArray + pass + + elif inferred_dtype.startswith("timedelta"): + # timedelta, timedelta64 + return TimedeltaArray._from_sequence(data, copy=copy) + + elif inferred_dtype == "string": + # StringArray/ArrowStringArray depending on pd.options.mode.string_storage + return StringDtype().construct_array_type()._from_sequence(data, copy=copy) + + elif inferred_dtype == "integer": + return IntegerArray._from_sequence(data, copy=copy) + + elif ( + inferred_dtype in ("floating", "mixed-integer-float") + and getattr(data, "dtype", None) != np.float16 + ): + # GH#44715 Exclude np.float16 bc FloatingArray does not support it; + # we will fall back to PandasArray. + return FloatingArray._from_sequence(data, copy=copy) + + elif inferred_dtype == "boolean": + return BooleanArray._from_sequence(data, copy=copy) + + # Pandas overrides NumPy for + # 1. datetime64[ns] + # 2. timedelta64[ns] + # so that a DatetimeArray is returned. + if is_datetime64_ns_dtype(dtype): + return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy) + elif is_timedelta64_ns_dtype(dtype): + return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) + + return PandasArray._from_sequence(data, dtype=dtype, copy=copy) + + +def extract_array( + obj: object, extract_numpy: bool = False, extract_range: bool = False +) -> Any | ArrayLike: + """ + Extract the ndarray or ExtensionArray from a Series or Index. + + For all other types, `obj` is just returned as is. + + Parameters + ---------- + obj : object + For Series / Index, the underlying ExtensionArray is unboxed. + + extract_numpy : bool, default False + Whether to extract the ndarray from a PandasArray. + + extract_range : bool, default False + If we have a RangeIndex, return range._values if True + (which is a materialized integer ndarray), otherwise return unchanged. + + Returns + ------- + arr : object + + Examples + -------- + >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category')) + ['a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] + + Other objects like lists, arrays, and DataFrames are just passed through. + + >>> extract_array([1, 2, 3]) + [1, 2, 3] + + For an ndarray-backed Series / Index the ndarray is returned. + + >>> extract_array(pd.Series([1, 2, 3])) + array([1, 2, 3]) + + To extract all the way down to the ndarray, pass ``extract_numpy=True``. + + >>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True) + array([1, 2, 3]) + """ + if isinstance(obj, (ABCIndex, ABCSeries)): + if isinstance(obj, ABCRangeIndex): + if extract_range: + return obj._values + return obj + + obj = obj._values + + elif extract_numpy and isinstance(obj, ABCPandasArray): + obj = obj.to_numpy() + + return obj + + +def ensure_wrapped_if_datetimelike(arr): + """ + Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray. + """ + if isinstance(arr, np.ndarray): + if arr.dtype.kind == "M": + from pandas.core.arrays import DatetimeArray + + return DatetimeArray._from_sequence(arr) + + elif arr.dtype.kind == "m": + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray._from_sequence(arr) + + return arr + + +def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: + """ + Convert numpy MaskedArray to ensure mask is softened. + """ + mask = ma.getmaskarray(data) + if mask.any(): + data, fill_value = maybe_upcast(data, copy=True) + data.soften_mask() # set hardmask False if it was True + data[mask] = fill_value + else: + data = data.copy() + return data + + +def sanitize_array( + data, + index: Index | None, + dtype: DtypeObj | None = None, + copy: bool = False, + raise_cast_failure: bool = True, + *, + allow_2d: bool = False, +) -> ArrayLike: + """ + Sanitize input data to an ndarray or ExtensionArray, copy if specified, + coerce to the dtype if specified. + + Parameters + ---------- + data : Any + index : Index or None, default None + dtype : np.dtype, ExtensionDtype, or None, default None + copy : bool, default False + raise_cast_failure : bool, default True + allow_2d : bool, default False + If False, raise if we have a 2D Arraylike. + + Returns + ------- + np.ndarray or ExtensionArray + + Notes + ----- + raise_cast_failure=False is only intended to be True when called from the + DataFrame constructor, as the dtype keyword there may be interpreted as only + applying to a subset of columns, see GH#24435. + """ + if isinstance(data, ma.MaskedArray): + data = sanitize_masked_array(data) + + if isinstance(dtype, PandasDtype): + # Avoid ending up with a PandasArray + dtype = dtype.numpy_dtype + + # extract ndarray or ExtensionArray, ensure we have no PandasArray + data = extract_array(data, extract_numpy=True) + + if isinstance(data, np.ndarray) and data.ndim == 0: + if dtype is None: + dtype = data.dtype + data = lib.item_from_zerodim(data) + elif isinstance(data, range): + # GH#16804 + data = range_to_ndarray(data) + copy = False + + if not is_list_like(data): + if index is None: + raise ValueError("index must be specified when data is not list-like") + data = construct_1d_arraylike_from_scalar(data, len(index), dtype) + return data + + # GH#846 + if isinstance(data, np.ndarray): + if isinstance(data, np.matrix): + data = data.A + + if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype): + # possibility of nan -> garbage + try: + subarr = _try_cast(data, dtype, copy, True) + except IntCastingNaNError: + warnings.warn( + "In a future version, passing float-dtype values containing NaN " + "and an integer dtype will raise IntCastingNaNError " + "(subclass of ValueError) instead of silently ignoring the " + "passed dtype. To retain the old behavior, call Series(arr) or " + "DataFrame(arr) without passing a dtype.", + FutureWarning, + stacklevel=find_stack_level(), + ) + subarr = np.array(data, copy=copy) + except ValueError: + if not raise_cast_failure: + # i.e. called via DataFrame constructor + warnings.warn( + "In a future version, passing float-dtype values and an " + "integer dtype to DataFrame will retain floating dtype " + "if they cannot be cast losslessly (matching Series behavior). " + "To retain the old behavior, use DataFrame(data).astype(dtype)", + FutureWarning, + stacklevel=find_stack_level(), + ) + # GH#40110 until the deprecation is enforced, we _dont_ + # ignore the dtype for DataFrame, and _do_ cast even though + # it is lossy. + dtype = cast(np.dtype, dtype) + return np.array(data, dtype=dtype, copy=copy) + subarr = np.array(data, copy=copy) + else: + # we will try to copy by-definition here + subarr = _try_cast(data, dtype, copy, raise_cast_failure) + + elif isinstance(data, ABCExtensionArray): + # it is already ensured above this is not a PandasArray + subarr = data + + if dtype is not None: + subarr = subarr.astype(dtype, copy=copy) + elif copy: + subarr = subarr.copy() + + else: + if isinstance(data, (set, frozenset)): + # Raise only for unordered sets, e.g., not for dict_keys + raise TypeError(f"'{type(data).__name__}' type is unordered") + + # materialize e.g. generators, convert e.g. tuples, abc.ValueView + if hasattr(data, "__array__"): + # e.g. dask array GH#38645 + data = np.asarray(data) + else: + data = list(data) + + if dtype is not None or len(data) == 0: + subarr = _try_cast(data, dtype, copy, raise_cast_failure) + else: + subarr = maybe_convert_platform(data) + if subarr.dtype == object: + subarr = cast(np.ndarray, subarr) + subarr = maybe_infer_to_datetimelike(subarr) + + subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d) + + if isinstance(subarr, np.ndarray): + # at this point we should have dtype be None or subarr.dtype == dtype + dtype = cast(np.dtype, dtype) + subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) + + return subarr + + +def range_to_ndarray(rng: range) -> np.ndarray: + """ + Cast a range object to ndarray. + """ + # GH#30171 perf avoid realizing range as a list in np.array + try: + arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64") + except OverflowError: + # GH#30173 handling for ranges that overflow int64 + if (rng.start >= 0 and rng.step > 0) or (rng.stop >= 0 and rng.step < 0): + try: + arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64") + except OverflowError: + arr = construct_1d_object_array_from_listlike(list(rng)) + else: + arr = construct_1d_object_array_from_listlike(list(rng)) + return arr + + +def _sanitize_ndim( + result: ArrayLike, + data, + dtype: DtypeObj | None, + index: Index | None, + *, + allow_2d: bool = False, +) -> ArrayLike: + """ + Ensure we have a 1-dimensional result array. + """ + if getattr(result, "ndim", 0) == 0: + raise ValueError("result should be arraylike with ndim > 0") + + elif result.ndim == 1: + # the result that we want + result = _maybe_repeat(result, index) + + elif result.ndim > 1: + if isinstance(data, np.ndarray): + if allow_2d: + return result + raise ValueError("Data must be 1-dimensional") + if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype): + # i.e. PandasDtype("O") + + result = com.asarray_tuplesafe(data, dtype=np.dtype("object")) + cls = dtype.construct_array_type() + result = cls._from_sequence(result, dtype=dtype) + else: + # error: Argument "dtype" to "asarray_tuplesafe" has incompatible type + # "Union[dtype[Any], ExtensionDtype, None]"; expected "Union[str, + # dtype[Any], None]" + result = com.asarray_tuplesafe(data, dtype=dtype) # type: ignore[arg-type] + return result + + +def _sanitize_str_dtypes( + result: np.ndarray, data, dtype: np.dtype | None, copy: bool +) -> np.ndarray: + """ + Ensure we have a dtype that is supported by pandas. + """ + + # This is to prevent mixed-type Series getting all casted to + # NumPy string type, e.g. NaN --> '-1#IND'. + if issubclass(result.dtype.type, str): + # GH#16605 + # If not empty convert the data to dtype + # GH#19853: If data is a scalar, result has already the result + if not lib.is_scalar(data): + if not np.all(isna(data)): + data = np.array(data, dtype=dtype, copy=False) + result = np.array(data, dtype=object, copy=copy) + return result + + +def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike: + """ + If we have a length-1 array and an index describing how long we expect + the result to be, repeat the array. + """ + if index is not None: + if 1 == len(arr) != len(index): + arr = arr.repeat(len(index)) + return arr + + +def _try_cast( + arr: list | np.ndarray, + dtype: DtypeObj | None, + copy: bool, + raise_cast_failure: bool, +) -> ArrayLike: + """ + Convert input to numpy ndarray and optionally cast to a given dtype. + + Parameters + ---------- + arr : ndarray or list + Excludes: ExtensionArray, Series, Index. + dtype : np.dtype, ExtensionDtype or None + copy : bool + If False, don't copy the data if not needed. + raise_cast_failure : bool + If True, and if a dtype is specified, raise errors during casting. + Otherwise an object array is returned. + + Returns + ------- + np.ndarray or ExtensionArray + """ + is_ndarray = isinstance(arr, np.ndarray) + + if dtype is None: + # perf shortcut as this is the most common case + if is_ndarray: + arr = cast(np.ndarray, arr) + if arr.dtype != object: + return sanitize_to_nanoseconds(arr, copy=copy) + + out = maybe_infer_to_datetimelike(arr) + if out is arr and copy: + out = out.copy() + return out + + else: + # i.e. list + varr = np.array(arr, copy=False) + # filter out cases that we _dont_ want to go through + # maybe_infer_to_datetimelike + if varr.dtype != object or varr.size == 0: + return varr + return maybe_infer_to_datetimelike(varr) + + elif isinstance(dtype, ExtensionDtype): + # create an extension array from its dtype + if isinstance(dtype, DatetimeTZDtype): + # We can't go through _from_sequence because it handles dt64naive + # data differently; _from_sequence treats naive as wall times, + # while maybe_cast_to_datetime treats it as UTC + # see test_maybe_promote_any_numpy_dtype_with_datetimetz + + return maybe_cast_to_datetime(arr, dtype) + # TODO: copy? + + array_type = dtype.construct_array_type()._from_sequence + subarr = array_type(arr, dtype=dtype, copy=copy) + return subarr + + elif is_object_dtype(dtype): + if not is_ndarray: + subarr = construct_1d_object_array_from_listlike(arr) + return subarr + return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy) + + elif dtype.kind == "U": + # TODO: test cases with arr.dtype.kind in ["m", "M"] + return lib.ensure_string_array(arr, convert_na_value=False, copy=copy) + + elif dtype.kind in ["m", "M"]: + return maybe_cast_to_datetime(arr, dtype) + + try: + # GH#15832: Check if we are requesting a numeric dtype and + # that we can convert the data to the requested dtype. + if is_integer_dtype(dtype): + # this will raise if we have e.g. floats + + subarr = maybe_cast_to_integer_array(arr, dtype) + else: + # 4 tests fail if we move this to a try/except/else; see + # test_constructor_compound_dtypes, test_constructor_cast_failure + # test_constructor_dict_cast2, test_loc_setitem_dtype + subarr = np.array(arr, dtype=dtype, copy=copy) + + except (ValueError, TypeError): + if raise_cast_failure: + raise + else: + # we only get here with raise_cast_failure False, which means + # called via the DataFrame constructor + # GH#24435 + warnings.warn( + f"Could not cast to {dtype}, falling back to object. This " + "behavior is deprecated. In a future version, when a dtype is " + "passed to 'DataFrame', either all columns will be cast to that " + "dtype, or a TypeError will be raised.", + FutureWarning, + stacklevel=find_stack_level(), + ) + subarr = np.array(arr, dtype=object, copy=copy) + return subarr + + +def is_empty_data(data: Any) -> bool: + """ + Utility to check if a Series is instantiated with empty data, + which does not contain dtype information. + + Parameters + ---------- + data : array-like, Iterable, dict, or scalar value + Contains data stored in Series. + + Returns + ------- + bool + """ + is_none = data is None + is_list_like_without_dtype = is_list_like(data) and not hasattr(data, "dtype") + is_simple_empty = is_list_like_without_dtype and not data + return is_none or is_simple_empty + + +def create_series_with_explicit_dtype( + data: Any = None, + index: ArrayLike | Index | None = None, + dtype: Dtype | None = None, + name: str | None = None, + copy: bool = False, + fastpath: bool = False, + dtype_if_empty: Dtype = object, +) -> Series: + """ + Helper to pass an explicit dtype when instantiating an empty Series. + + This silences a DeprecationWarning described in GitHub-17261. + + Parameters + ---------- + data : Mirrored from Series.__init__ + index : Mirrored from Series.__init__ + dtype : Mirrored from Series.__init__ + name : Mirrored from Series.__init__ + copy : Mirrored from Series.__init__ + fastpath : Mirrored from Series.__init__ + dtype_if_empty : str, numpy.dtype, or ExtensionDtype + This dtype will be passed explicitly if an empty Series will + be instantiated. + + Returns + ------- + Series + """ + from pandas.core.series import Series + + if is_empty_data(data) and dtype is None: + dtype = dtype_if_empty + return Series( + data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath + ) diff --git a/.local/lib/python3.8/site-packages/pandas/core/describe.py b/.local/lib/python3.8/site-packages/pandas/core/describe.py new file mode 100644 index 0000000..8d88ce2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/describe.py @@ -0,0 +1,422 @@ +""" +Module responsible for execution of NDFrame.describe() method. + +Method NDFrame.describe() delegates actual execution to function describe_ndframe(). +""" +from __future__ import annotations + +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, + Sequence, + cast, +) +import warnings + +import numpy as np + +from pandas._libs.tslibs import Timestamp +from pandas._typing import NDFrameT +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_percentile + +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_datetime64_any_dtype, + is_numeric_dtype, + is_timedelta64_dtype, +) + +from pandas.core.reshape.concat import concat + +from pandas.io.formats.format import format_percentiles + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + + +def describe_ndframe( + *, + obj: NDFrameT, + include: str | Sequence[str] | None, + exclude: str | Sequence[str] | None, + datetime_is_numeric: bool, + percentiles: Sequence[float] | np.ndarray | None, +) -> NDFrameT: + """Describe series or dataframe. + + Called from pandas.core.generic.NDFrame.describe() + + Parameters + ---------- + obj: DataFrame or Series + Either dataframe or series to be described. + include : 'all', list-like of dtypes or None (default), optional + A white list of data types to include in the result. Ignored for ``Series``. + exclude : list-like of dtypes or None (default), optional, + A black list of data types to omit from the result. Ignored for ``Series``. + datetime_is_numeric : bool, default False + Whether to treat datetime dtypes as numeric. + percentiles : list-like of numbers, optional + The percentiles to include in the output. All should fall between 0 and 1. + The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and + 75th percentiles. + + Returns + ------- + Dataframe or series description. + """ + percentiles = refine_percentiles(percentiles) + + describer: NDFrameDescriberAbstract + if obj.ndim == 1: + describer = SeriesDescriber( + obj=cast("Series", obj), + datetime_is_numeric=datetime_is_numeric, + ) + else: + describer = DataFrameDescriber( + obj=cast("DataFrame", obj), + include=include, + exclude=exclude, + datetime_is_numeric=datetime_is_numeric, + ) + + result = describer.describe(percentiles=percentiles) + return cast(NDFrameT, result) + + +class NDFrameDescriberAbstract(ABC): + """Abstract class for describing dataframe or series. + + Parameters + ---------- + obj : Series or DataFrame + Object to be described. + datetime_is_numeric : bool + Whether to treat datetime dtypes as numeric. + """ + + def __init__(self, obj: DataFrame | Series, datetime_is_numeric: bool): + self.obj = obj + self.datetime_is_numeric = datetime_is_numeric + + @abstractmethod + def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame | Series: + """Do describe either series or dataframe. + + Parameters + ---------- + percentiles : list-like of numbers + The percentiles to include in the output. + """ + + +class SeriesDescriber(NDFrameDescriberAbstract): + """Class responsible for creating series description.""" + + obj: Series + + def describe(self, percentiles: Sequence[float] | np.ndarray) -> Series: + describe_func = select_describe_func( + self.obj, + self.datetime_is_numeric, + ) + return describe_func(self.obj, percentiles) + + +class DataFrameDescriber(NDFrameDescriberAbstract): + """Class responsible for creating dataobj description. + + Parameters + ---------- + obj : DataFrame + DataFrame to be described. + include : 'all', list-like of dtypes or None + A white list of data types to include in the result. + exclude : list-like of dtypes or None + A black list of data types to omit from the result. + datetime_is_numeric : bool + Whether to treat datetime dtypes as numeric. + """ + + def __init__( + self, + obj: DataFrame, + *, + include: str | Sequence[str] | None, + exclude: str | Sequence[str] | None, + datetime_is_numeric: bool, + ): + self.include = include + self.exclude = exclude + + if obj.ndim == 2 and obj.columns.size == 0: + raise ValueError("Cannot describe a DataFrame without columns") + + super().__init__(obj, datetime_is_numeric=datetime_is_numeric) + + def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame: + data = self._select_data() + + ldesc: list[Series] = [] + for _, series in data.items(): + describe_func = select_describe_func(series, self.datetime_is_numeric) + ldesc.append(describe_func(series, percentiles)) + + col_names = reorder_columns(ldesc) + d = concat( + [x.reindex(col_names, copy=False) for x in ldesc], + axis=1, + sort=False, + ) + d.columns = data.columns.copy() + return d + + def _select_data(self): + """Select columns to be described.""" + if (self.include is None) and (self.exclude is None): + # when some numerics are found, keep only numerics + default_include = [np.number] + if self.datetime_is_numeric: + # error: Argument 1 to "append" of "list" has incompatible type "str"; + # expected "Type[number[Any]]" + default_include.append("datetime") # type: ignore[arg-type] + data = self.obj.select_dtypes(include=default_include) + if len(data.columns) == 0: + data = self.obj + elif self.include == "all": + if self.exclude is not None: + msg = "exclude must be None when include is 'all'" + raise ValueError(msg) + data = self.obj + else: + data = self.obj.select_dtypes( + include=self.include, + exclude=self.exclude, + ) + return data + + +def reorder_columns(ldesc: Sequence[Series]) -> list[Hashable]: + """Set a convenient order for rows for display.""" + names: list[Hashable] = [] + ldesc_indexes = sorted((x.index for x in ldesc), key=len) + for idxnames in ldesc_indexes: + for name in idxnames: + if name not in names: + names.append(name) + return names + + +def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series: + """Describe series containing numerical data. + + Parameters + ---------- + series : Series + Series to be described. + percentiles : list-like of numbers + The percentiles to include in the output. + """ + from pandas import Series + + # error: Argument 1 to "format_percentiles" has incompatible type "Sequence[float]"; + # expected "Union[ndarray, List[Union[int, float]], List[float], List[Union[str, + # float]]]" + formatted_percentiles = format_percentiles(percentiles) # type: ignore[arg-type] + + stat_index = ["count", "mean", "std", "min"] + formatted_percentiles + ["max"] + d = ( + [series.count(), series.mean(), series.std(), series.min()] + + series.quantile(percentiles).tolist() + + [series.max()] + ) + return Series(d, index=stat_index, name=series.name) + + +def describe_categorical_1d( + data: Series, + percentiles_ignored: Sequence[float], +) -> Series: + """Describe series containing categorical data. + + Parameters + ---------- + data : Series + Series to be described. + percentiles_ignored : list-like of numbers + Ignored, but in place to unify interface. + """ + names = ["count", "unique", "top", "freq"] + objcounts = data.value_counts() + count_unique = len(objcounts[objcounts != 0]) + if count_unique > 0: + top, freq = objcounts.index[0], objcounts.iloc[0] + dtype = None + else: + # If the DataFrame is empty, set 'top' and 'freq' to None + # to maintain output shape consistency + top, freq = np.nan, np.nan + dtype = "object" + + result = [data.count(), count_unique, top, freq] + + from pandas import Series + + return Series(result, index=names, name=data.name, dtype=dtype) + + +def describe_timestamp_as_categorical_1d( + data: Series, + percentiles_ignored: Sequence[float], +) -> Series: + """Describe series containing timestamp data treated as categorical. + + Parameters + ---------- + data : Series + Series to be described. + percentiles_ignored : list-like of numbers + Ignored, but in place to unify interface. + """ + names = ["count", "unique"] + objcounts = data.value_counts() + count_unique = len(objcounts[objcounts != 0]) + result = [data.count(), count_unique] + dtype = None + if count_unique > 0: + top, freq = objcounts.index[0], objcounts.iloc[0] + tz = data.dt.tz + asint = data.dropna().values.view("i8") + top = Timestamp(top) + if top.tzinfo is not None and tz is not None: + # Don't tz_localize(None) if key is already tz-aware + top = top.tz_convert(tz) + else: + top = top.tz_localize(tz) + names += ["top", "freq", "first", "last"] + result += [ + top, + freq, + Timestamp(asint.min(), tz=tz), + Timestamp(asint.max(), tz=tz), + ] + + # If the DataFrame is empty, set 'top' and 'freq' to None + # to maintain output shape consistency + else: + names += ["top", "freq"] + result += [np.nan, np.nan] + dtype = "object" + + from pandas import Series + + return Series(result, index=names, name=data.name, dtype=dtype) + + +def describe_timestamp_1d(data: Series, percentiles: Sequence[float]) -> Series: + """Describe series containing datetime64 dtype. + + Parameters + ---------- + data : Series + Series to be described. + percentiles : list-like of numbers + The percentiles to include in the output. + """ + # GH-30164 + from pandas import Series + + # error: Argument 1 to "format_percentiles" has incompatible type "Sequence[float]"; + # expected "Union[ndarray, List[Union[int, float]], List[float], List[Union[str, + # float]]]" + formatted_percentiles = format_percentiles(percentiles) # type: ignore[arg-type] + + stat_index = ["count", "mean", "min"] + formatted_percentiles + ["max"] + d = ( + [data.count(), data.mean(), data.min()] + + data.quantile(percentiles).tolist() + + [data.max()] + ) + return Series(d, index=stat_index, name=data.name) + + +def select_describe_func( + data: Series, + datetime_is_numeric: bool, +) -> Callable: + """Select proper function for describing series based on data type. + + Parameters + ---------- + data : Series + Series to be described. + datetime_is_numeric : bool + Whether to treat datetime dtypes as numeric. + """ + if is_bool_dtype(data.dtype): + return describe_categorical_1d + elif is_numeric_dtype(data): + return describe_numeric_1d + elif is_datetime64_any_dtype(data.dtype): + if datetime_is_numeric: + return describe_timestamp_1d + else: + warnings.warn( + "Treating datetime data as categorical rather than numeric in " + "`.describe` is deprecated and will be removed in a future " + "version of pandas. Specify `datetime_is_numeric=True` to " + "silence this warning and adopt the future behavior now.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return describe_timestamp_as_categorical_1d + elif is_timedelta64_dtype(data.dtype): + return describe_numeric_1d + else: + return describe_categorical_1d + + +def refine_percentiles( + percentiles: Sequence[float] | np.ndarray | None, +) -> np.ndarray[Any, np.dtype[np.float64]]: + """ + Ensure that percentiles are unique and sorted. + + Parameters + ---------- + percentiles : list-like of numbers, optional + The percentiles to include in the output. + """ + if percentiles is None: + return np.array([0.25, 0.5, 0.75]) + + # explicit conversion of `percentiles` to list + percentiles = list(percentiles) + + # get them all to be in [0, 1] + validate_percentile(percentiles) + + # median should always be included + if 0.5 not in percentiles: + percentiles.append(0.5) + + percentiles = np.asarray(percentiles) + + # sort and check for duplicates + unique_pcts = np.unique(percentiles) + assert percentiles is not None + if len(unique_pcts) < len(percentiles): + raise ValueError("percentiles cannot contain duplicates") + + return unique_pcts diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..3529f3f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/api.cpython-38.pyc new file mode 100644 index 0000000..256ce88 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/base.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/base.cpython-38.pyc new file mode 100644 index 0000000..763d09f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/cast.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/cast.cpython-38.pyc new file mode 100644 index 0000000..18250d8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/cast.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..3c51808 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/concat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/concat.cpython-38.pyc new file mode 100644 index 0000000..d927e29 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/concat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/dtypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/dtypes.cpython-38.pyc new file mode 100644 index 0000000..88f223e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/dtypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/generic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/generic.cpython-38.pyc new file mode 100644 index 0000000..6e02015 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/generic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/inference.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/inference.cpython-38.pyc new file mode 100644 index 0000000..8c57ecd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/inference.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/missing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/missing.cpython-38.pyc new file mode 100644 index 0000000..5c81bdb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/dtypes/__pycache__/missing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/api.py b/.local/lib/python3.8/site-packages/pandas/core/dtypes/api.py new file mode 100644 index 0000000..bb6bfda --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/dtypes/api.py @@ -0,0 +1,45 @@ +# flake8: noqa:F401 + +from pandas.core.dtypes.common import ( + is_array_like, + is_bool, + is_bool_dtype, + is_categorical, + is_categorical_dtype, + is_complex, + is_complex_dtype, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64tz_dtype, + is_dict_like, + is_dtype_equal, + is_extension_array_dtype, + is_extension_type, + is_file_like, + is_float, + is_float_dtype, + is_hashable, + is_int64_dtype, + is_integer, + is_integer_dtype, + is_interval, + is_interval_dtype, + is_iterator, + is_list_like, + is_named_tuple, + is_number, + is_numeric_dtype, + is_object_dtype, + is_period_dtype, + is_re, + is_re_compilable, + is_scalar, + is_signed_integer_dtype, + is_sparse, + is_string_dtype, + is_timedelta64_dtype, + is_timedelta64_ns_dtype, + is_unsigned_integer_dtype, + pandas_dtype, +) diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/base.py b/.local/lib/python3.8/site-packages/pandas/core/dtypes/base.py new file mode 100644 index 0000000..fa07b5f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/dtypes/base.py @@ -0,0 +1,499 @@ +""" +Extend pandas with custom array types. +""" + +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + TypeVar, + cast, + overload, +) + +import numpy as np + +from pandas._libs.hashtable import object_hash +from pandas._typing import ( + DtypeObj, + Shape, + npt, + type_t, +) +from pandas.errors import AbstractMethodError + +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) + +if TYPE_CHECKING: + from pandas.core.arrays import ExtensionArray + + # To parameterize on same ExtensionDtype + ExtensionDtypeT = TypeVar("ExtensionDtypeT", bound="ExtensionDtype") + + +class ExtensionDtype: + """ + A custom data type, to be paired with an ExtensionArray. + + See Also + -------- + extensions.register_extension_dtype: Register an ExtensionType + with pandas as class decorator. + extensions.ExtensionArray: Abstract base class for custom 1-D array types. + + Notes + ----- + The interface includes the following abstract methods that must + be implemented by subclasses: + + * type + * name + * construct_array_type + + The following attributes and methods influence the behavior of the dtype in + pandas operations + + * _is_numeric + * _is_boolean + * _get_common_dtype + + The `na_value` class attribute can be used to set the default NA value + for this type. :attr:`numpy.nan` is used by default. + + ExtensionDtypes are required to be hashable. The base class provides + a default implementation, which relies on the ``_metadata`` class + attribute. ``_metadata`` should be a tuple containing the strings + that define your data type. For example, with ``PeriodDtype`` that's + the ``freq`` attribute. + + **If you have a parametrized dtype you should set the ``_metadata`` + class property**. + + Ideally, the attributes in ``_metadata`` will match the + parameters to your ``ExtensionDtype.__init__`` (if any). If any of + the attributes in ``_metadata`` don't implement the standard + ``__eq__`` or ``__hash__``, the default implementations here will not + work. + + For interaction with Apache Arrow (pyarrow), a ``__from_arrow__`` method + can be implemented: this method receives a pyarrow Array or ChunkedArray + as only argument and is expected to return the appropriate pandas + ExtensionArray for this dtype and the passed values:: + + class ExtensionDtype: + + def __from_arrow__( + self, array: Union[pyarrow.Array, pyarrow.ChunkedArray] + ) -> ExtensionArray: + ... + + This class does not inherit from 'abc.ABCMeta' for performance reasons. + Methods and properties required by the interface raise + ``pandas.errors.AbstractMethodError`` and no ``register`` method is + provided for registering virtual subclasses. + """ + + _metadata: tuple[str, ...] = () + + def __str__(self) -> str: + return self.name + + def __eq__(self, other: Any) -> bool: + """ + Check whether 'other' is equal to self. + + By default, 'other' is considered equal if either + + * it's a string matching 'self.name'. + * it's an instance of this type and all of the attributes + in ``self._metadata`` are equal between `self` and `other`. + + Parameters + ---------- + other : Any + + Returns + ------- + bool + """ + if isinstance(other, str): + try: + other = self.construct_from_string(other) + except TypeError: + return False + if isinstance(other, type(self)): + return all( + getattr(self, attr) == getattr(other, attr) for attr in self._metadata + ) + return False + + def __hash__(self) -> int: + # for python>=3.10, different nan objects have different hashes + # we need to avoid that und thus use hash function with old behavior + return object_hash(tuple(getattr(self, attr) for attr in self._metadata)) + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) + + @property + def na_value(self) -> object: + """ + Default NA value to use for this type. + + This is used in e.g. ExtensionArray.take. This should be the + user-facing "boxed" version of the NA value, not the physical NA value + for storage. e.g. for JSONArray, this is an empty dictionary. + """ + return np.nan + + @property + def type(self) -> type_t[Any]: + """ + The scalar type for the array, e.g. ``int`` + + It's expected ``ExtensionArray[item]`` returns an instance + of ``ExtensionDtype.type`` for scalar ``item``, assuming + that value is valid (not NA). NA values do not need to be + instances of `type`. + """ + raise AbstractMethodError(self) + + @property + def kind(self) -> str: + """ + A character code (one of 'biufcmMOSUV'), default 'O' + + This should match the NumPy dtype used when the array is + converted to an ndarray, which is probably 'O' for object if + the extension type cannot be represented as a built-in NumPy + type. + + See Also + -------- + numpy.dtype.kind + """ + return "O" + + @property + def name(self) -> str: + """ + A string identifying the data type. + + Will be used for display in, e.g. ``Series.dtype`` + """ + raise AbstractMethodError(self) + + @property + def names(self) -> list[str] | None: + """ + Ordered list of field names, or None if there are no fields. + + This is for compatibility with NumPy arrays, and may be removed in the + future. + """ + return None + + @classmethod + def construct_array_type(cls) -> type_t[ExtensionArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + raise AbstractMethodError(cls) + + def empty(self, shape: Shape) -> type_t[ExtensionArray]: + """ + Construct an ExtensionArray of this dtype with the given shape. + + Analogous to numpy.empty. + + Parameters + ---------- + shape : int or tuple[int] + + Returns + ------- + ExtensionArray + """ + cls = self.construct_array_type() + return cls._empty(shape, dtype=self) + + @classmethod + def construct_from_string( + cls: type_t[ExtensionDtypeT], string: str + ) -> ExtensionDtypeT: + r""" + Construct this type from a string. + + This is useful mainly for data types that accept parameters. + For example, a period dtype accepts a frequency parameter that + can be set as ``period[H]`` (where H means hourly frequency). + + By default, in the abstract class, just the name of the type is + expected. But subclasses can overwrite this method to accept + parameters. + + Parameters + ---------- + string : str + The name of the type, for example ``category``. + + Returns + ------- + ExtensionDtype + Instance of the dtype. + + Raises + ------ + TypeError + If a class cannot be constructed from this 'string'. + + Examples + -------- + For extension dtypes with arguments the following may be an + adequate implementation. + + >>> @classmethod + ... def construct_from_string(cls, string): + ... pattern = re.compile(r"^my_type\[(?P.+)\]$") + ... match = pattern.match(string) + ... if match: + ... return cls(**match.groupdict()) + ... else: + ... raise TypeError( + ... f"Cannot construct a '{cls.__name__}' from '{string}'" + ... ) + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + # error: Non-overlapping equality check (left operand type: "str", right + # operand type: "Callable[[ExtensionDtype], str]") [comparison-overlap] + assert isinstance(cls.name, str), (cls, type(cls.name)) + if string != cls.name: + raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") + return cls() + + @classmethod + def is_dtype(cls, dtype: object) -> bool: + """ + Check if we match 'dtype'. + + Parameters + ---------- + dtype : object + The object to check. + + Returns + ------- + bool + + Notes + ----- + The default implementation is True if + + 1. ``cls.construct_from_string(dtype)`` is an instance + of ``cls``. + 2. ``dtype`` is an object and is an instance of ``cls`` + 3. ``dtype`` has a ``dtype`` attribute, and any of the above + conditions is true for ``dtype.dtype``. + """ + dtype = getattr(dtype, "dtype", dtype) + + if isinstance(dtype, (ABCSeries, ABCIndex, ABCDataFrame, np.dtype)): + # https://github.com/pandas-dev/pandas/issues/22960 + # avoid passing data to `construct_from_string`. This could + # cause a FutureWarning from numpy about failing elementwise + # comparison from, e.g., comparing DataFrame == 'category'. + return False + elif dtype is None: + return False + elif isinstance(dtype, cls): + return True + if isinstance(dtype, str): + try: + return cls.construct_from_string(dtype) is not None + except TypeError: + return False + return False + + @property + def _is_numeric(self) -> bool: + """ + Whether columns with this dtype should be considered numeric. + + By default ExtensionDtypes are assumed to be non-numeric. + They'll be excluded from operations that exclude non-numeric + columns, like (groupby) reductions, plotting, etc. + """ + return False + + @property + def _is_boolean(self) -> bool: + """ + Whether this dtype should be considered boolean. + + By default, ExtensionDtypes are assumed to be non-numeric. + Setting this to True will affect the behavior of several places, + e.g. + + * is_bool + * boolean indexing + + Returns + ------- + bool + """ + return False + + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + """ + Return the common dtype, if one exists. + + Used in `find_common_type` implementation. This is for example used + to determine the resulting dtype in a concat operation. + + If no common dtype exists, return None (which gives the other dtypes + the chance to determine a common dtype). If all dtypes in the list + return None, then the common dtype will be "object" dtype (this means + it is never needed to return "object" dtype from this method itself). + + Parameters + ---------- + dtypes : list of dtypes + The dtypes for which to determine a common dtype. This is a list + of np.dtype or ExtensionDtype instances. + + Returns + ------- + Common dtype (np.dtype or ExtensionDtype) or None + """ + if len(set(dtypes)) == 1: + # only itself + return self + else: + return None + + @property + def _can_hold_na(self) -> bool: + """ + Can arrays of this dtype hold NA values? + """ + return True + + +def register_extension_dtype(cls: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]: + """ + Register an ExtensionType with pandas as class decorator. + + This enables operations like ``.astype(name)`` for the name + of the ExtensionDtype. + + Returns + ------- + callable + A class decorator. + + Examples + -------- + >>> from pandas.api.extensions import register_extension_dtype, ExtensionDtype + >>> @register_extension_dtype + ... class MyExtensionDtype(ExtensionDtype): + ... name = "myextension" + """ + _registry.register(cls) + return cls + + +class Registry: + """ + Registry for dtype inference. + + The registry allows one to map a string repr of a extension + dtype to an extension dtype. The string alias can be used in several + places, including + + * Series and Index constructors + * :meth:`pandas.array` + * :meth:`pandas.Series.astype` + + Multiple extension types can be registered. + These are tried in order. + """ + + def __init__(self): + self.dtypes: list[type_t[ExtensionDtype]] = [] + + def register(self, dtype: type_t[ExtensionDtype]) -> None: + """ + Parameters + ---------- + dtype : ExtensionDtype class + """ + if not issubclass(dtype, ExtensionDtype): + raise ValueError("can only register pandas extension dtypes") + + self.dtypes.append(dtype) + + @overload + def find(self, dtype: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]: + ... + + @overload + def find(self, dtype: ExtensionDtypeT) -> ExtensionDtypeT: + ... + + @overload + def find(self, dtype: str) -> ExtensionDtype | None: + ... + + @overload + def find( + self, dtype: npt.DTypeLike + ) -> type_t[ExtensionDtype] | ExtensionDtype | None: + ... + + def find( + self, dtype: type_t[ExtensionDtype] | ExtensionDtype | npt.DTypeLike + ) -> type_t[ExtensionDtype] | ExtensionDtype | None: + """ + Parameters + ---------- + dtype : ExtensionDtype class or instance or str or numpy dtype or python type + + Returns + ------- + return the first matching dtype, otherwise return None + """ + if not isinstance(dtype, str): + dtype_type: type_t + if not isinstance(dtype, type): + dtype_type = type(dtype) + else: + dtype_type = dtype + if issubclass(dtype_type, ExtensionDtype): + # cast needed here as mypy doesn't know we have figured + # out it is an ExtensionDtype or type_t[ExtensionDtype] + return cast("ExtensionDtype | type_t[ExtensionDtype]", dtype) + + return None + + for dtype_type in self.dtypes: + try: + return dtype_type.construct_from_string(dtype) + except TypeError: + pass + + return None + + +_registry = Registry() diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/cast.py b/.local/lib/python3.8/site-packages/pandas/core/dtypes/cast.py new file mode 100644 index 0000000..e70fd44 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/dtypes/cast.py @@ -0,0 +1,2316 @@ +""" +Routines for casting. +""" + +from __future__ import annotations + +from datetime import ( + date, + datetime, + timedelta, +) +import functools +import inspect +from typing import ( + TYPE_CHECKING, + Any, + Sized, + TypeVar, + cast, + overload, +) +import warnings + +import numpy as np + +from pandas._libs import lib +from pandas._libs.tslibs import ( + NaT, + OutOfBoundsDatetime, + OutOfBoundsTimedelta, + Timedelta, + Timestamp, + conversion, +) +from pandas._libs.tslibs.timedeltas import array_to_timedelta64 +from pandas._typing import ( + ArrayLike, + Dtype, + DtypeObj, + Scalar, +) +from pandas.errors import IntCastingNaNError +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.common import ( + DT64NS_DTYPE, + TD64NS_DTYPE, + ensure_int8, + ensure_int16, + ensure_int32, + ensure_int64, + ensure_object, + ensure_str, + is_bool, + is_bool_dtype, + is_complex, + is_complex_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_string_dtype, + is_timedelta64_dtype, + is_unsigned_integer_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + ExtensionDtype, + IntervalDtype, + PandasDtype, + PeriodDtype, +) +from pandas.core.dtypes.generic import ( + ABCExtensionArray, + ABCSeries, +) +from pandas.core.dtypes.inference import is_list_like +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + na_value_for_dtype, + notna, +) + +if TYPE_CHECKING: + + from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + IntervalArray, + PeriodArray, + TimedeltaArray, + ) + +_int8_max = np.iinfo(np.int8).max +_int16_max = np.iinfo(np.int16).max +_int32_max = np.iinfo(np.int32).max +_int64_max = np.iinfo(np.int64).max + +_dtype_obj = np.dtype(object) + +NumpyArrayT = TypeVar("NumpyArrayT", bound=np.ndarray) + + +def maybe_convert_platform( + values: list | tuple | range | np.ndarray | ExtensionArray, +) -> ArrayLike: + """try to do platform conversion, allow ndarray or list here""" + arr: ArrayLike + + if isinstance(values, (list, tuple, range)): + arr = construct_1d_object_array_from_listlike(values) + else: + # The caller is responsible for ensuring that we have np.ndarray + # or ExtensionArray here. + arr = values + + if arr.dtype == _dtype_obj: + arr = cast(np.ndarray, arr) + arr = lib.maybe_convert_objects(arr) + + return arr + + +def is_nested_object(obj) -> bool: + """ + return a boolean if we have a nested object, e.g. a Series with 1 or + more Series elements + + This may not be necessarily be performant. + + """ + return bool( + isinstance(obj, ABCSeries) + and is_object_dtype(obj.dtype) + and any(isinstance(v, ABCSeries) for v in obj._values) + ) + + +def maybe_box_datetimelike(value: Scalar, dtype: Dtype | None = None) -> Scalar: + """ + Cast scalar to Timestamp or Timedelta if scalar is datetime-like + and dtype is not object. + + Parameters + ---------- + value : scalar + dtype : Dtype, optional + + Returns + ------- + scalar + """ + if dtype == _dtype_obj: + pass + elif isinstance(value, (np.datetime64, datetime)): + value = Timestamp(value) + elif isinstance(value, (np.timedelta64, timedelta)): + value = Timedelta(value) + + return value + + +def maybe_box_native(value: Scalar) -> Scalar: + """ + If passed a scalar cast the scalar to a python native type. + + Parameters + ---------- + value : scalar or Series + + Returns + ------- + scalar or Series + """ + if is_float(value): + # error: Argument 1 to "float" has incompatible type + # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]"; + # expected "Union[SupportsFloat, _SupportsIndex, str]" + value = float(value) # type: ignore[arg-type] + elif is_integer(value): + # error: Argument 1 to "int" has incompatible type + # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]"; + # expected "Union[str, SupportsInt, _SupportsIndex, _SupportsTrunc]" + value = int(value) # type: ignore[arg-type] + elif is_bool(value): + value = bool(value) + elif isinstance(value, (np.datetime64, np.timedelta64)): + value = maybe_box_datetimelike(value) + return value + + +def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: + """ + Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting + into a numpy array. Failing to unbox would risk dropping nanoseconds. + + Notes + ----- + Caller is responsible for checking dtype.kind in ["m", "M"] + """ + if is_valid_na_for_dtype(value, dtype): + # GH#36541: can't fill array directly with pd.NaT + # > np.empty(10, dtype="datetime64[64]").fill(pd.NaT) + # ValueError: cannot convert float NaN to integer + value = dtype.type("NaT", "ns") + elif isinstance(value, Timestamp): + if value.tz is None: + value = value.to_datetime64() + elif not isinstance(dtype, DatetimeTZDtype): + raise TypeError("Cannot unbox tzaware Timestamp to tznaive dtype") + elif isinstance(value, Timedelta): + value = value.to_timedelta64() + + _disallow_mismatched_datetimelike(value, dtype) + return value + + +def _disallow_mismatched_datetimelike(value, dtype: DtypeObj): + """ + numpy allows np.array(dt64values, dtype="timedelta64[ns]") and + vice-versa, but we do not want to allow this, so we need to + check explicitly + """ + vdtype = getattr(value, "dtype", None) + if vdtype is None: + return + elif (vdtype.kind == "m" and dtype.kind == "M") or ( + vdtype.kind == "M" and dtype.kind == "m" + ): + raise TypeError(f"Cannot cast {repr(value)} to {dtype}") + + +def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLike: + """ + try to cast to the specified dtype (e.g. convert back to bool/int + or could be an astype of float64->float32 + """ + do_round = False + + if isinstance(dtype, str): + if dtype == "infer": + inferred_type = lib.infer_dtype(result, skipna=False) + if inferred_type == "boolean": + dtype = "bool" + elif inferred_type == "integer": + dtype = "int64" + elif inferred_type == "datetime64": + dtype = "datetime64[ns]" + elif inferred_type == "timedelta64": + dtype = "timedelta64[ns]" + + # try to upcast here + elif inferred_type == "floating": + dtype = "int64" + if issubclass(result.dtype.type, np.number): + do_round = True + + else: + # TODO: complex? what if result is already non-object? + dtype = "object" + + dtype = np.dtype(dtype) + + if not isinstance(dtype, np.dtype): + # enforce our signature annotation + raise TypeError(dtype) # pragma: no cover + + converted = maybe_downcast_numeric(result, dtype, do_round) + if converted is not result: + return converted + + # a datetimelike + # GH12821, iNaT is cast to float + if dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]: + result = result.astype(dtype) + + return result + + +def maybe_downcast_numeric( + result: ArrayLike, dtype: DtypeObj, do_round: bool = False +) -> ArrayLike: + """ + Subset of maybe_downcast_to_dtype restricted to numeric dtypes. + + Parameters + ---------- + result : ndarray or ExtensionArray + dtype : np.dtype or ExtensionDtype + do_round : bool + + Returns + ------- + ndarray or ExtensionArray + """ + if not isinstance(dtype, np.dtype) or not isinstance(result.dtype, np.dtype): + # e.g. SparseDtype has no itemsize attr + return result + + def trans(x): + if do_round: + return x.round() + return x + + if dtype.kind == result.dtype.kind: + # don't allow upcasts here (except if empty) + if result.dtype.itemsize <= dtype.itemsize and result.size: + return result + + if is_bool_dtype(dtype) or is_integer_dtype(dtype): + + if not result.size: + # if we don't have any elements, just astype it + return trans(result).astype(dtype) + + # do a test on the first element, if it fails then we are done + r = result.ravel() + arr = np.array([r[0]]) + + if isna(arr).any(): + # if we have any nulls, then we are done + return result + + elif not isinstance(r[0], (np.integer, np.floating, int, float, bool)): + # a comparable, e.g. a Decimal may slip in here + return result + + if ( + issubclass(result.dtype.type, (np.object_, np.number)) + and notna(result).all() + ): + new_result = trans(result).astype(dtype) + if new_result.dtype.kind == "O" or result.dtype.kind == "O": + # np.allclose may raise TypeError on object-dtype + if (new_result == result).all(): + return new_result + else: + if np.allclose(new_result, result, rtol=0): + return new_result + + elif ( + issubclass(dtype.type, np.floating) + and not is_bool_dtype(result.dtype) + and not is_string_dtype(result.dtype) + ): + return result.astype(dtype) + + return result + + +def maybe_cast_pointwise_result( + result: ArrayLike, + dtype: DtypeObj, + numeric_only: bool = False, + same_dtype: bool = True, +) -> ArrayLike: + """ + Try casting result of a pointwise operation back to the original dtype if + appropriate. + + Parameters + ---------- + result : array-like + Result to cast. + dtype : np.dtype or ExtensionDtype + Input Series from which result was calculated. + numeric_only : bool, default False + Whether to cast only numerics or datetimes as well. + same_dtype : bool, default True + Specify dtype when calling _from_sequence + + Returns + ------- + result : array-like + result maybe casted to the dtype. + """ + + assert not is_scalar(result) + + if isinstance(dtype, ExtensionDtype): + if not isinstance(dtype, (CategoricalDtype, DatetimeTZDtype)): + # TODO: avoid this special-casing + # We have to special case categorical so as not to upcast + # things like counts back to categorical + + cls = dtype.construct_array_type() + if same_dtype: + result = maybe_cast_to_extension_array(cls, result, dtype=dtype) + else: + result = maybe_cast_to_extension_array(cls, result) + + elif (numeric_only and is_numeric_dtype(dtype)) or not numeric_only: + result = maybe_downcast_to_dtype(result, dtype) + + return result + + +def maybe_cast_to_extension_array( + cls: type[ExtensionArray], obj: ArrayLike, dtype: ExtensionDtype | None = None +) -> ArrayLike: + """ + Call to `_from_sequence` that returns the object unchanged on Exception. + + Parameters + ---------- + cls : class, subclass of ExtensionArray + obj : arraylike + Values to pass to cls._from_sequence + dtype : ExtensionDtype, optional + + Returns + ------- + ExtensionArray or obj + """ + from pandas.core.arrays.string_ import BaseStringArray + + assert isinstance(cls, type), f"must pass a type: {cls}" + assertion_msg = f"must pass a subclass of ExtensionArray: {cls}" + assert issubclass(cls, ABCExtensionArray), assertion_msg + + # Everything can be converted to StringArrays, but we may not want to convert + if issubclass(cls, BaseStringArray) and lib.infer_dtype(obj) != "string": + return obj + + try: + result = cls._from_sequence(obj, dtype=dtype) + except Exception: + # We can't predict what downstream EA constructors may raise + result = obj + return result + + +@overload +def ensure_dtype_can_hold_na(dtype: np.dtype) -> np.dtype: + ... + + +@overload +def ensure_dtype_can_hold_na(dtype: ExtensionDtype) -> ExtensionDtype: + ... + + +def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj: + """ + If we have a dtype that cannot hold NA values, find the best match that can. + """ + if isinstance(dtype, ExtensionDtype): + # TODO: ExtensionDtype.can_hold_na? + return dtype + elif dtype.kind == "b": + return _dtype_obj + elif dtype.kind in ["i", "u"]: + return np.dtype(np.float64) + return dtype + + +def maybe_promote(dtype: np.dtype, fill_value=np.nan): + """ + Find the minimal dtype that can hold both the given dtype and fill_value. + + Parameters + ---------- + dtype : np.dtype + fill_value : scalar, default np.nan + + Returns + ------- + dtype + Upcasted from dtype argument if necessary. + fill_value + Upcasted from fill_value argument if necessary. + + Raises + ------ + ValueError + If fill_value is a non-scalar and dtype is not object. + """ + # TODO(2.0): need to directly use the non-cached version as long as we + # possibly raise a deprecation warning for datetime dtype + if dtype.kind == "M": + return _maybe_promote(dtype, fill_value) + # for performance, we are using a cached version of the actual implementation + # of the function in _maybe_promote. However, this doesn't always work (in case + # of non-hashable arguments), so we fallback to the actual implementation if needed + try: + # error: Argument 3 to "__call__" of "_lru_cache_wrapper" has incompatible type + # "Type[Any]"; expected "Hashable" [arg-type] + return _maybe_promote_cached( + dtype, fill_value, type(fill_value) # type: ignore[arg-type] + ) + except TypeError: + # if fill_value is not hashable (required for caching) + return _maybe_promote(dtype, fill_value) + + +@functools.lru_cache(maxsize=128) +def _maybe_promote_cached(dtype, fill_value, fill_value_type): + # The cached version of _maybe_promote below + # This also use fill_value_type as (unused) argument to use this in the + # cache lookup -> to differentiate 1 and True + return _maybe_promote(dtype, fill_value) + + +def _maybe_promote(dtype: np.dtype, fill_value=np.nan): + # The actual implementation of the function, use `maybe_promote` above for + # a cached version. + if not is_scalar(fill_value): + # with object dtype there is nothing to promote, and the user can + # pass pretty much any weird fill_value they like + if not is_object_dtype(dtype): + # with object dtype there is nothing to promote, and the user can + # pass pretty much any weird fill_value they like + raise ValueError("fill_value must be a scalar") + dtype = _dtype_obj + return dtype, fill_value + + kinds = ["i", "u", "f", "c", "m", "M"] + if is_valid_na_for_dtype(fill_value, dtype) and dtype.kind in kinds: + dtype = ensure_dtype_can_hold_na(dtype) + fv = na_value_for_dtype(dtype) + return dtype, fv + + elif isna(fill_value): + dtype = _dtype_obj + if fill_value is None: + # but we retain e.g. pd.NA + fill_value = np.nan + return dtype, fill_value + + # returns tuple of (dtype, fill_value) + if issubclass(dtype.type, np.datetime64): + inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True) + if inferred == dtype: + return dtype, fv + + # TODO(2.0): once this deprecation is enforced, this whole case + # becomes equivalent to: + # dta = DatetimeArray._from_sequence([], dtype="M8[ns]") + # try: + # fv = dta._validate_setitem_value(fill_value) + # return dta.dtype, fv + # except (ValueError, TypeError): + # return _dtype_obj, fill_value + if isinstance(fill_value, date) and not isinstance(fill_value, datetime): + # deprecate casting of date object to match infer_dtype_from_scalar + # and DatetimeArray._validate_setitem_value + try: + fv = Timestamp(fill_value).to_datetime64() + except OutOfBoundsDatetime: + pass + else: + warnings.warn( + "Using a `date` object for fill_value with `datetime64[ns]` " + "dtype is deprecated. In a future version, this will be cast " + "to object dtype. Pass `fill_value=Timestamp(date_obj)` instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return dtype, fv + elif isinstance(fill_value, str): + try: + # explicitly wrap in str to convert np.str_ + fv = Timestamp(str(fill_value)) + except (ValueError, TypeError): + pass + else: + if isna(fv) or fv.tz is None: + return dtype, fv.asm8 + + return np.dtype("object"), fill_value + + elif issubclass(dtype.type, np.timedelta64): + inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True) + if inferred == dtype: + return dtype, fv + + return np.dtype("object"), fill_value + + elif is_float(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.dtype(np.object_) + + elif issubclass(dtype.type, np.integer): + dtype = np.dtype(np.float64) + + elif dtype.kind == "f": + mst = np.min_scalar_type(fill_value) + if mst > dtype: + # e.g. mst is np.float64 and dtype is np.float32 + dtype = mst + + elif dtype.kind == "c": + mst = np.min_scalar_type(fill_value) + dtype = np.promote_types(dtype, mst) + + elif is_bool(fill_value): + if not issubclass(dtype.type, np.bool_): + dtype = np.dtype(np.object_) + + elif is_integer(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.dtype(np.object_) + + elif issubclass(dtype.type, np.integer): + if not np.can_cast(fill_value, dtype): + # upcast to prevent overflow + mst = np.min_scalar_type(fill_value) + dtype = np.promote_types(dtype, mst) + if dtype.kind == "f": + # Case where we disagree with numpy + dtype = np.dtype(np.object_) + + elif is_complex(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.dtype(np.object_) + + elif issubclass(dtype.type, (np.integer, np.floating)): + mst = np.min_scalar_type(fill_value) + dtype = np.promote_types(dtype, mst) + + elif dtype.kind == "c": + mst = np.min_scalar_type(fill_value) + if mst > dtype: + # e.g. mst is np.complex128 and dtype is np.complex64 + dtype = mst + + else: + dtype = np.dtype(np.object_) + + # in case we have a string that looked like a number + if issubclass(dtype.type, (bytes, str)): + dtype = np.dtype(np.object_) + + fill_value = _ensure_dtype_type(fill_value, dtype) + return dtype, fill_value + + +def _ensure_dtype_type(value, dtype: np.dtype): + """ + Ensure that the given value is an instance of the given dtype. + + e.g. if out dtype is np.complex64_, we should have an instance of that + as opposed to a python complex object. + + Parameters + ---------- + value : object + dtype : np.dtype + + Returns + ------- + object + """ + # Start with exceptions in which we do _not_ cast to numpy types + + if dtype == _dtype_obj: + return value + + # Note: before we get here we have already excluded isna(value) + return dtype.type(value) + + +def infer_dtype_from(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]: + """ + Interpret the dtype from a scalar or array. + + Parameters + ---------- + val : object + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, scalar/array belongs to pandas extension types is inferred as + object + """ + if not is_list_like(val): + return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) + return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) + + +def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]: + """ + Interpret the dtype from a scalar. + + Parameters + ---------- + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, scalar belongs to pandas extension types is inferred as + object + """ + dtype: DtypeObj = _dtype_obj + + # a 1-element ndarray + if isinstance(val, np.ndarray): + if val.ndim != 0: + msg = "invalid ndarray passed to infer_dtype_from_scalar" + raise ValueError(msg) + + dtype = val.dtype + val = lib.item_from_zerodim(val) + + elif isinstance(val, str): + + # If we create an empty array using a string to infer + # the dtype, NumPy will only allocate one character per entry + # so this is kind of bad. Alternately we could use np.repeat + # instead of np.empty (but then you still don't want things + # coming out as np.str_! + + dtype = _dtype_obj + + elif isinstance(val, (np.datetime64, datetime)): + try: + val = Timestamp(val) + except OutOfBoundsDatetime: + return _dtype_obj, val + + # error: Non-overlapping identity check (left operand type: "Timestamp", + # right operand type: "NaTType") + if val is NaT or val.tz is None: # type: ignore[comparison-overlap] + dtype = np.dtype("M8[ns]") + val = val.to_datetime64() + else: + if pandas_dtype: + dtype = DatetimeTZDtype(unit="ns", tz=val.tz) + else: + # return datetimetz as object + return _dtype_obj, val + + elif isinstance(val, (np.timedelta64, timedelta)): + try: + val = Timedelta(val) + except (OutOfBoundsTimedelta, OverflowError): + dtype = _dtype_obj + else: + dtype = np.dtype("m8[ns]") + val = np.timedelta64(val.value, "ns") + + elif is_bool(val): + dtype = np.dtype(np.bool_) + + elif is_integer(val): + if isinstance(val, np.integer): + dtype = np.dtype(type(val)) + else: + dtype = np.dtype(np.int64) + + try: + np.array(val, dtype=dtype) + except OverflowError: + dtype = np.array(val).dtype + + elif is_float(val): + if isinstance(val, np.floating): + dtype = np.dtype(type(val)) + else: + dtype = np.dtype(np.float64) + + elif is_complex(val): + dtype = np.dtype(np.complex_) + + elif pandas_dtype: + if lib.is_period(val): + dtype = PeriodDtype(freq=val.freq) + elif lib.is_interval(val): + subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0] + dtype = IntervalDtype(subtype=subtype, closed=val.closed) + + return dtype, val + + +def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]: + """ + Convert datetimelike-keyed dicts to a Timestamp-keyed dict. + + Parameters + ---------- + d: dict-like object + + Returns + ------- + dict + """ + return {maybe_box_datetimelike(key): value for key, value in d.items()} + + +def infer_dtype_from_array( + arr, pandas_dtype: bool = False +) -> tuple[DtypeObj, ArrayLike]: + """ + Infer the dtype from an array. + + Parameters + ---------- + arr : array + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, array belongs to pandas extension types + is inferred as object + + Returns + ------- + tuple (numpy-compat/pandas-compat dtype, array) + + Notes + ----- + if pandas_dtype=False. these infer to numpy dtypes + exactly with the exception that mixed / object dtypes + are not coerced by stringifying or conversion + + if pandas_dtype=True. datetime64tz-aware/categorical + types will retain there character. + + Examples + -------- + >>> np.asarray([1, '1']) + array(['1', '1'], dtype='>> infer_dtype_from_array([1, '1']) + (dtype('O'), [1, '1']) + """ + if isinstance(arr, np.ndarray): + return arr.dtype, arr + + if not is_list_like(arr): + raise TypeError("'arr' must be list-like") + + if pandas_dtype and is_extension_array_dtype(arr): + return arr.dtype, arr + + elif isinstance(arr, ABCSeries): + return arr.dtype, np.asarray(arr) + + # don't force numpy coerce with nan's + inferred = lib.infer_dtype(arr, skipna=False) + if inferred in ["string", "bytes", "mixed", "mixed-integer"]: + return (np.dtype(np.object_), arr) + + arr = np.asarray(arr) + return arr.dtype, arr + + +def maybe_infer_dtype_type(element): + """ + Try to infer an object's dtype, for use in arithmetic ops. + + Uses `element.dtype` if that's available. + Objects implementing the iterator protocol are cast to a NumPy array, + and from there the array's type is used. + + Parameters + ---------- + element : object + Possibly has a `.dtype` attribute, and possibly the iterator + protocol. + + Returns + ------- + tipo : type + + Examples + -------- + >>> from collections import namedtuple + >>> Foo = namedtuple("Foo", "dtype") + >>> maybe_infer_dtype_type(Foo(np.dtype("i8"))) + dtype('int64') + """ + tipo = None + if hasattr(element, "dtype"): + tipo = element.dtype + elif is_list_like(element): + element = np.asarray(element) + tipo = element.dtype + return tipo + + +def maybe_upcast( + values: NumpyArrayT, + fill_value: Scalar = np.nan, + copy: bool = False, +) -> tuple[NumpyArrayT, Scalar]: + """ + Provide explicit type promotion and coercion. + + Parameters + ---------- + values : np.ndarray + The array that we may want to upcast. + fill_value : what we want to fill with + copy : bool, default True + If True always make a copy even if no upcast is required. + + Returns + ------- + values: np.ndarray + the original array, possibly upcast + fill_value: + the fill value, possibly upcast + """ + new_dtype, fill_value = maybe_promote(values.dtype, fill_value) + # We get a copy in all cases _except_ (values.dtype == new_dtype and not copy) + upcast_values = values.astype(new_dtype, copy=copy) + + # error: Incompatible return value type (got "Tuple[ndarray[Any, dtype[Any]], + # Union[Union[str, int, float, bool] Union[Period, Timestamp, Timedelta, Any]]]", + # expected "Tuple[NumpyArrayT, Union[Union[str, int, float, bool], Union[Period, + # Timestamp, Timedelta, Any]]]") + return upcast_values, fill_value # type: ignore[return-value] + + +def invalidate_string_dtypes(dtype_set: set[DtypeObj]): + """ + Change string like dtypes to object for + ``DataFrame.select_dtypes()``. + """ + # error: Argument 1 to has incompatible type "Type[generic]"; expected + # "Union[dtype[Any], ExtensionDtype, None]" + # error: Argument 2 to has incompatible type "Type[generic]"; expected + # "Union[dtype[Any], ExtensionDtype, None]" + non_string_dtypes = dtype_set - { + np.dtype("S").type, # type: ignore[arg-type] + np.dtype(" DatetimeArray: + # GH#33401 we have inconsistent behaviors between + # Datetimeindex[naive].astype(tzaware) + # Series[dt64].astype(tzaware) + # This collects them in one place to prevent further fragmentation. + + from pandas.core.construction import ensure_wrapped_if_datetimelike + + values = ensure_wrapped_if_datetimelike(values) + values = cast("DatetimeArray", values) + aware = isinstance(dtype, DatetimeTZDtype) + + if via_utc: + # Series.astype behavior + + # caller is responsible for checking this + assert values.tz is None and aware + dtype = cast(DatetimeTZDtype, dtype) + + if copy: + # this should be the only copy + values = values.copy() + + warnings.warn( + "Using .astype to convert from timezone-naive dtype to " + "timezone-aware dtype is deprecated and will raise in a " + "future version. Use ser.dt.tz_localize instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + # GH#33401 this doesn't match DatetimeArray.astype, which + # goes through the `not via_utc` path + return values.tz_localize("UTC").tz_convert(dtype.tz) + + else: + # DatetimeArray/DatetimeIndex.astype behavior + if values.tz is None and aware: + dtype = cast(DatetimeTZDtype, dtype) + warnings.warn( + "Using .astype to convert from timezone-naive dtype to " + "timezone-aware dtype is deprecated and will raise in a " + "future version. Use obj.tz_localize instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return values.tz_localize(dtype.tz) + + elif aware: + # GH#18951: datetime64_tz dtype but not equal means different tz + dtype = cast(DatetimeTZDtype, dtype) + result = values.tz_convert(dtype.tz) + if copy: + result = result.copy() + return result + + elif values.tz is not None: + warnings.warn( + "Using .astype to convert from timezone-aware dtype to " + "timezone-naive dtype is deprecated and will raise in a " + "future version. Use obj.tz_localize(None) or " + "obj.tz_convert('UTC').tz_localize(None) instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + + result = values.tz_convert("UTC").tz_localize(None) + if copy: + result = result.copy() + return result + + raise NotImplementedError("dtype_equal case should be handled elsewhere") + + +def astype_td64_unit_conversion( + values: np.ndarray, dtype: np.dtype, copy: bool +) -> np.ndarray: + """ + By pandas convention, converting to non-nano timedelta64 + returns an int64-dtyped array with ints representing multiples + of the desired timedelta unit. This is essentially division. + + Parameters + ---------- + values : np.ndarray[timedelta64[ns]] + dtype : np.dtype + timedelta64 with unit not-necessarily nano + copy : bool + + Returns + ------- + np.ndarray + """ + if is_dtype_equal(values.dtype, dtype): + if copy: + return values.copy() + return values + + # otherwise we are converting to non-nano + result = values.astype(dtype, copy=False) # avoid double-copying + result = result.astype(np.float64) + + mask = isna(values) + np.putmask(result, mask, np.nan) + return result + + +@overload +def astype_nansafe( + arr: np.ndarray, dtype: np.dtype, copy: bool = ..., skipna: bool = ... +) -> np.ndarray: + ... + + +@overload +def astype_nansafe( + arr: np.ndarray, dtype: ExtensionDtype, copy: bool = ..., skipna: bool = ... +) -> ExtensionArray: + ... + + +def astype_nansafe( + arr: np.ndarray, dtype: DtypeObj, copy: bool = True, skipna: bool = False +) -> ArrayLike: + """ + Cast the elements of an array to a given dtype a nan-safe manner. + + Parameters + ---------- + arr : ndarray + dtype : np.dtype or ExtensionDtype + copy : bool, default True + If False, a view will be attempted but may fail, if + e.g. the item sizes don't align. + skipna: bool, default False + Whether or not we should skip NaN when casting as a string-type. + + Raises + ------ + ValueError + The dtype was a datetime64/timedelta64 dtype, but it had no unit. + """ + if arr.ndim > 1: + flat = arr.ravel() + result = astype_nansafe(flat, dtype, copy=copy, skipna=skipna) + # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no + # attribute "reshape" + return result.reshape(arr.shape) # type: ignore[union-attr] + + # We get here with 0-dim from sparse + arr = np.atleast_1d(arr) + + # dispatch on extension dtype if needed + if isinstance(dtype, ExtensionDtype): + return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) + + elif not isinstance(dtype, np.dtype): # pragma: no cover + raise ValueError("dtype must be np.dtype or ExtensionDtype") + + if arr.dtype.kind in ["m", "M"] and ( + issubclass(dtype.type, str) or dtype == _dtype_obj + ): + from pandas.core.construction import ensure_wrapped_if_datetimelike + + arr = ensure_wrapped_if_datetimelike(arr) + return arr.astype(dtype, copy=copy) + + if issubclass(dtype.type, str): + return lib.ensure_string_array(arr, skipna=skipna, convert_na_value=False) + + elif is_datetime64_dtype(arr.dtype): + if dtype == np.int64: + if isna(arr).any(): + raise ValueError("Cannot convert NaT values to integer") + return arr.view(dtype) + + # allow frequency conversions + if dtype.kind == "M": + return arr.astype(dtype) + + raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]") + + elif is_timedelta64_dtype(arr.dtype): + if dtype == np.int64: + if isna(arr).any(): + raise ValueError("Cannot convert NaT values to integer") + return arr.view(dtype) + + elif dtype.kind == "m": + return astype_td64_unit_conversion(arr, dtype, copy=copy) + + raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") + + elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer): + return astype_float_to_int_nansafe(arr, dtype, copy) + + elif is_object_dtype(arr.dtype): + + # work around NumPy brokenness, #1987 + if np.issubdtype(dtype.type, np.integer): + return lib.astype_intsafe(arr, dtype) + + # if we have a datetime/timedelta array of objects + # then coerce to a proper dtype and recall astype_nansafe + + elif is_datetime64_dtype(dtype): + from pandas import to_datetime + + return astype_nansafe( + to_datetime(arr).values, + dtype, + copy=copy, + ) + elif is_timedelta64_dtype(dtype): + from pandas import to_timedelta + + return astype_nansafe(to_timedelta(arr)._values, dtype, copy=copy) + + if dtype.name in ("datetime64", "timedelta64"): + msg = ( + f"The '{dtype.name}' dtype has no unit. Please pass in " + f"'{dtype.name}[ns]' instead." + ) + raise ValueError(msg) + + if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype): + # Explicit copy, or required since NumPy can't view from / to object. + return arr.astype(dtype, copy=True) + + return arr.astype(dtype, copy=copy) + + +def astype_float_to_int_nansafe( + values: np.ndarray, dtype: np.dtype, copy: bool +) -> np.ndarray: + """ + astype with a check preventing converting NaN to an meaningless integer value. + """ + if not np.isfinite(values).all(): + raise IntCastingNaNError( + "Cannot convert non-finite values (NA or inf) to integer" + ) + return values.astype(dtype, copy=copy) + + +def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> ArrayLike: + """ + Cast array (ndarray or ExtensionArray) to the new dtype. + + Parameters + ---------- + values : ndarray or ExtensionArray + dtype : dtype object + copy : bool, default False + copy if indicated + + Returns + ------- + ndarray or ExtensionArray + """ + if ( + values.dtype.kind in ["m", "M"] + and dtype.kind in ["i", "u"] + and isinstance(dtype, np.dtype) + and dtype.itemsize != 8 + ): + # TODO(2.0) remove special case once deprecation on DTA/TDA is enforced + msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]" + raise TypeError(msg) + + if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype): + return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True) + + if is_dtype_equal(values.dtype, dtype): + if copy: + return values.copy() + return values + + if not isinstance(values, np.ndarray): + # i.e. ExtensionArray + values = values.astype(dtype, copy=copy) + + else: + values = astype_nansafe(values, dtype, copy=copy) + + # in pandas we don't store numpy str dtypes, so convert to object + if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str): + values = np.array(values, dtype=object) + + return values + + +def astype_array_safe( + values: ArrayLike, dtype, copy: bool = False, errors: str = "raise" +) -> ArrayLike: + """ + Cast array (ndarray or ExtensionArray) to the new dtype. + + This basically is the implementation for DataFrame/Series.astype and + includes all custom logic for pandas (NaN-safety, converting str to object, + not allowing ) + + Parameters + ---------- + values : ndarray or ExtensionArray + dtype : str, dtype convertible + copy : bool, default False + copy if indicated + errors : str, {'raise', 'ignore'}, default 'raise' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + + Returns + ------- + ndarray or ExtensionArray + """ + errors_legal_values = ("raise", "ignore") + + if errors not in errors_legal_values: + invalid_arg = ( + "Expected value of kwarg 'errors' to be one of " + f"{list(errors_legal_values)}. Supplied value is '{errors}'" + ) + raise ValueError(invalid_arg) + + if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): + msg = ( + f"Expected an instance of {dtype.__name__}, " + "but got the class instead. Try instantiating 'dtype'." + ) + raise TypeError(msg) + + dtype = pandas_dtype(dtype) + if isinstance(dtype, PandasDtype): + # Ensure we don't end up with a PandasArray + dtype = dtype.numpy_dtype + + try: + new_values = astype_array(values, dtype, copy=copy) + except (ValueError, TypeError): + # e.g. astype_nansafe can fail on object-dtype of strings + # trying to convert to float + if errors == "ignore": + new_values = values + else: + raise + + return new_values + + +def soft_convert_objects( + values: np.ndarray, + datetime: bool = True, + numeric: bool = True, + timedelta: bool = True, + period: bool = True, + copy: bool = True, +) -> ArrayLike: + """ + Try to coerce datetime, timedelta, and numeric object-dtype columns + to inferred dtype. + + Parameters + ---------- + values : np.ndarray[object] + datetime : bool, default True + numeric: bool, default True + timedelta : bool, default True + period : bool, default True + copy : bool, default True + + Returns + ------- + np.ndarray or ExtensionArray + """ + validate_bool_kwarg(datetime, "datetime") + validate_bool_kwarg(numeric, "numeric") + validate_bool_kwarg(timedelta, "timedelta") + validate_bool_kwarg(copy, "copy") + + conversion_count = sum((datetime, numeric, timedelta)) + if conversion_count == 0: + raise ValueError("At least one of datetime, numeric or timedelta must be True.") + + # Soft conversions + if datetime or timedelta: + # GH 20380, when datetime is beyond year 2262, hence outside + # bound of nanosecond-resolution 64-bit integers. + try: + converted = lib.maybe_convert_objects( + values, + convert_datetime=datetime, + convert_timedelta=timedelta, + convert_period=period, + ) + except (OutOfBoundsDatetime, ValueError): + return values + if converted is not values: + return converted + + if numeric and is_object_dtype(values.dtype): + converted, _ = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) + + # If all NaNs, then do not-alter + values = converted if not isna(converted).all() else values + values = values.copy() if copy else values + + return values + + +def convert_dtypes( + input_array: ArrayLike, + convert_string: bool = True, + convert_integer: bool = True, + convert_boolean: bool = True, + convert_floating: bool = True, +) -> DtypeObj: + """ + Convert objects to best possible type, and optionally, + to types supporting ``pd.NA``. + + Parameters + ---------- + input_array : ExtensionArray or np.ndarray + convert_string : bool, default True + Whether object dtypes should be converted to ``StringDtype()``. + convert_integer : bool, default True + Whether, if possible, conversion can be done to integer extension types. + convert_boolean : bool, defaults True + Whether object dtypes should be converted to ``BooleanDtypes()``. + convert_floating : bool, defaults True + Whether, if possible, conversion can be done to floating extension types. + If `convert_integer` is also True, preference will be give to integer + dtypes if the floats can be faithfully casted to integers. + + Returns + ------- + np.dtype, or ExtensionDtype + """ + inferred_dtype: str | DtypeObj + + if ( + convert_string or convert_integer or convert_boolean or convert_floating + ) and isinstance(input_array, np.ndarray): + + if is_object_dtype(input_array.dtype): + inferred_dtype = lib.infer_dtype(input_array) + else: + inferred_dtype = input_array.dtype + + if is_string_dtype(inferred_dtype): + if not convert_string or inferred_dtype == "bytes": + return input_array.dtype + else: + return pandas_dtype("string") + + if convert_integer: + target_int_dtype = pandas_dtype("Int64") + + if is_integer_dtype(input_array.dtype): + from pandas.core.arrays.integer import INT_STR_TO_DTYPE + + inferred_dtype = INT_STR_TO_DTYPE.get( + input_array.dtype.name, target_int_dtype + ) + elif is_numeric_dtype(input_array.dtype): + # TODO: de-dup with maybe_cast_to_integer_array? + arr = input_array[notna(input_array)] + if (arr.astype(int) == arr).all(): + inferred_dtype = target_int_dtype + else: + inferred_dtype = input_array.dtype + + if convert_floating: + if not is_integer_dtype(input_array.dtype) and is_numeric_dtype( + input_array.dtype + ): + from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE + + inferred_float_dtype: DtypeObj = FLOAT_STR_TO_DTYPE.get( + input_array.dtype.name, pandas_dtype("Float64") + ) + # if we could also convert to integer, check if all floats + # are actually integers + if convert_integer: + # TODO: de-dup with maybe_cast_to_integer_array? + arr = input_array[notna(input_array)] + if (arr.astype(int) == arr).all(): + inferred_dtype = pandas_dtype("Int64") + else: + inferred_dtype = inferred_float_dtype + else: + inferred_dtype = inferred_float_dtype + + if convert_boolean: + if is_bool_dtype(input_array.dtype): + inferred_dtype = pandas_dtype("boolean") + elif isinstance(inferred_dtype, str) and inferred_dtype == "boolean": + inferred_dtype = pandas_dtype("boolean") + + if isinstance(inferred_dtype, str): + # If we couldn't do anything else, then we retain the dtype + inferred_dtype = input_array.dtype + + else: + return input_array.dtype + + # error: Incompatible return value type (got "Union[str, Union[dtype[Any], + # ExtensionDtype]]", expected "Union[dtype[Any], ExtensionDtype]") + return inferred_dtype # type: ignore[return-value] + + +def maybe_infer_to_datetimelike( + value: np.ndarray, +) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray | IntervalArray: + """ + we might have a array (or single object) that is datetime like, + and no dtype is passed don't change the value unless we find a + datetime/timedelta set + + this is pretty strict in that a datetime/timedelta is REQUIRED + in addition to possible nulls/string likes + + Parameters + ---------- + value : np.ndarray[object] + + Returns + ------- + np.ndarray, DatetimeArray, TimedeltaArray, PeriodArray, or IntervalArray + + """ + if not isinstance(value, np.ndarray) or value.dtype != object: + # Caller is responsible for passing only ndarray[object] + raise TypeError(type(value)) # pragma: no cover + + v = np.array(value, copy=False) + + shape = v.shape + if v.ndim != 1: + v = v.ravel() + + if not len(v): + return value + + def try_datetime(v: np.ndarray) -> ArrayLike: + # Coerce to datetime64, datetime64tz, or in corner cases + # object[datetimes] + from pandas.core.arrays.datetimes import sequence_to_datetimes + + try: + # GH#19671 we pass require_iso8601 to be relatively strict + # when parsing strings. + dta = sequence_to_datetimes(v, require_iso8601=True) + except (ValueError, TypeError): + # e.g. is not convertible to datetime + return v.reshape(shape) + else: + # GH#19761 we may have mixed timezones, in which cast 'dta' is + # an ndarray[object]. Only 1 test + # relies on this behavior, see GH#40111 + return dta.reshape(shape) + + def try_timedelta(v: np.ndarray) -> np.ndarray: + # safe coerce to timedelta64 + + # will try first with a string & object conversion + try: + # bc we know v.dtype == object, this is equivalent to + # `np.asarray(to_timedelta(v))`, but using a lower-level API that + # does not require a circular import. + td_values = array_to_timedelta64(v).view("m8[ns]") + except (ValueError, OverflowError): + return v.reshape(shape) + else: + return td_values.reshape(shape) + + inferred_type, seen_str = lib.infer_datetimelike_array(ensure_object(v)) + if inferred_type in ["period", "interval"]: + # Incompatible return value type (got "Union[ExtensionArray, ndarray]", + # expected "Union[ndarray, DatetimeArray, TimedeltaArray, PeriodArray, + # IntervalArray]") + return lib.maybe_convert_objects( # type: ignore[return-value] + v, convert_period=True, convert_interval=True + ) + + if inferred_type == "datetime": + # error: Incompatible types in assignment (expression has type "ExtensionArray", + # variable has type "Union[ndarray, List[Any]]") + value = try_datetime(v) # type: ignore[assignment] + elif inferred_type == "timedelta": + value = try_timedelta(v) + elif inferred_type == "nat": + + # if all NaT, return as datetime + if isna(v).all(): + # error: Incompatible types in assignment (expression has type + # "ExtensionArray", variable has type "Union[ndarray, List[Any]]") + value = try_datetime(v) # type: ignore[assignment] + else: + + # We have at least a NaT and a string + # try timedelta first to avoid spurious datetime conversions + # e.g. '00:00:01' is a timedelta but technically is also a datetime + value = try_timedelta(v) + if lib.infer_dtype(value, skipna=False) in ["mixed"]: + # cannot skip missing values, as NaT implies that the string + # is actually a datetime + + # error: Incompatible types in assignment (expression has type + # "ExtensionArray", variable has type "Union[ndarray, List[Any]]") + value = try_datetime(v) # type: ignore[assignment] + + if value.dtype.kind in ["m", "M"] and seen_str: + # TODO(2.0): enforcing this deprecation should close GH#40111 + warnings.warn( + f"Inferring {value.dtype} from data containing strings is deprecated " + "and will be removed in a future version. To retain the old behavior " + f"explicitly pass Series(data, dtype={value.dtype})", + FutureWarning, + stacklevel=find_stack_level(), + ) + return value + + +def maybe_cast_to_datetime( + value: ExtensionArray | np.ndarray | list, dtype: DtypeObj | None +) -> ExtensionArray | np.ndarray: + """ + try to cast the array/value to a datetimelike dtype, converting float + nan to iNaT + + We allow a list *only* when dtype is not None. + """ + from pandas.core.arrays.datetimes import sequence_to_datetimes + from pandas.core.arrays.timedeltas import TimedeltaArray + + if not is_list_like(value): + raise TypeError("value must be listlike") + + if is_timedelta64_dtype(dtype): + # TODO: _from_sequence would raise ValueError in cases where + # ensure_nanosecond_dtype raises TypeError + dtype = cast(np.dtype, dtype) + dtype = ensure_nanosecond_dtype(dtype) + res = TimedeltaArray._from_sequence(value, dtype=dtype) + return res + + if dtype is not None: + is_datetime64 = is_datetime64_dtype(dtype) + is_datetime64tz = is_datetime64tz_dtype(dtype) + + vdtype = getattr(value, "dtype", None) + + if is_datetime64 or is_datetime64tz: + dtype = ensure_nanosecond_dtype(dtype) + + value = np.array(value, copy=False) + + # we have an array of datetime or timedeltas & nulls + if value.size or not is_dtype_equal(value.dtype, dtype): + _disallow_mismatched_datetimelike(value, dtype) + + try: + if is_datetime64: + dta = sequence_to_datetimes(value) + # GH 25843: Remove tz information since the dtype + # didn't specify one + + if dta.tz is not None: + warnings.warn( + "Data is timezone-aware. Converting " + "timezone-aware data to timezone-naive by " + "passing dtype='datetime64[ns]' to " + "DataFrame or Series is deprecated and will " + "raise in a future version. Use " + "`pd.Series(values).dt.tz_localize(None)` " + "instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # equiv: dta.view(dtype) + # Note: NOT equivalent to dta.astype(dtype) + dta = dta.tz_localize(None) + + value = dta + elif is_datetime64tz: + dtype = cast(DatetimeTZDtype, dtype) + # The string check can be removed once issue #13712 + # is solved. String data that is passed with a + # datetime64tz is assumed to be naive which should + # be localized to the timezone. + is_dt_string = is_string_dtype(value.dtype) + dta = sequence_to_datetimes(value) + if dta.tz is not None: + value = dta.astype(dtype, copy=False) + elif is_dt_string: + # Strings here are naive, so directly localize + # equiv: dta.astype(dtype) # though deprecated + + value = dta.tz_localize(dtype.tz) + else: + # Numeric values are UTC at this point, + # so localize and convert + # equiv: Series(dta).astype(dtype) # though deprecated + if getattr(vdtype, "kind", None) == "M": + # GH#24559, GH#33401 deprecate behavior inconsistent + # with DatetimeArray/DatetimeIndex + warnings.warn( + "In a future version, constructing a Series " + "from datetime64[ns] data and a " + "DatetimeTZDtype will interpret the data " + "as wall-times instead of " + "UTC times, matching the behavior of " + "DatetimeIndex. To treat the data as UTC " + "times, use pd.Series(data).dt" + ".tz_localize('UTC').tz_convert(dtype.tz) " + "or pd.Series(data.view('int64'), dtype=dtype)", + FutureWarning, + stacklevel=find_stack_level(), + ) + + value = dta.tz_localize("UTC").tz_convert(dtype.tz) + except OutOfBoundsDatetime: + raise + except ValueError: + # TODO(GH#40048): only catch dateutil's ParserError + # once we can reliably import it in all supported versions + pass + + elif getattr(vdtype, "kind", None) in ["m", "M"]: + # we are already datetimelike and want to coerce to non-datetimelike; + # astype_nansafe will raise for anything other than object, then upcast. + # see test_datetimelike_values_with_object_dtype + # error: Argument 2 to "astype_nansafe" has incompatible type + # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]" + return astype_nansafe(value, dtype) # type: ignore[arg-type] + + elif isinstance(value, np.ndarray): + if value.dtype.kind in ["M", "m"]: + # catch a datetime/timedelta that is not of ns variety + # and no coercion specified + value = sanitize_to_nanoseconds(value) + + elif value.dtype == _dtype_obj: + value = maybe_infer_to_datetimelike(value) + + elif isinstance(value, list): + # we only get here with dtype=None, which we do not allow + raise ValueError( + "maybe_cast_to_datetime allows a list *only* if dtype is not None" + ) + + # at this point we have converted or raised in all cases where we had a list + return cast(ArrayLike, value) + + +def sanitize_to_nanoseconds(values: np.ndarray, copy: bool = False) -> np.ndarray: + """ + Safely convert non-nanosecond datetime64 or timedelta64 values to nanosecond. + """ + dtype = values.dtype + if dtype.kind == "M" and dtype != DT64NS_DTYPE: + values = conversion.ensure_datetime64ns(values) + + elif dtype.kind == "m" and dtype != TD64NS_DTYPE: + values = conversion.ensure_timedelta64ns(values) + + elif copy: + values = values.copy() + + return values + + +def ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: + """ + Convert dtypes with granularity less than nanosecond to nanosecond + + >>> ensure_nanosecond_dtype(np.dtype("M8[s]")) + dtype('>> ensure_nanosecond_dtype(np.dtype("m8[ps]")) + Traceback (most recent call last): + ... + TypeError: cannot convert timedeltalike to dtype [timedelta64[ps]] + """ + msg = ( + f"The '{dtype.name}' dtype has no unit. " + f"Please pass in '{dtype.name}[ns]' instead." + ) + + # unpack e.g. SparseDtype + dtype = getattr(dtype, "subtype", dtype) + + if not isinstance(dtype, np.dtype): + # i.e. datetime64tz + pass + + elif dtype.kind == "M" and dtype != DT64NS_DTYPE: + # pandas supports dtype whose granularity is less than [ns] + # e.g., [ps], [fs], [as] + if dtype <= np.dtype("M8[ns]"): + if dtype.name == "datetime64": + raise ValueError(msg) + dtype = DT64NS_DTYPE + else: + raise TypeError(f"cannot convert datetimelike to dtype [{dtype}]") + + elif dtype.kind == "m" and dtype != TD64NS_DTYPE: + # pandas supports dtype whose granularity is less than [ns] + # e.g., [ps], [fs], [as] + if dtype <= np.dtype("m8[ns]"): + if dtype.name == "timedelta64": + raise ValueError(msg) + dtype = TD64NS_DTYPE + else: + raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]") + return dtype + + +@overload +def find_common_type(types: list[np.dtype]) -> np.dtype: + ... + + +@overload +def find_common_type(types: list[ExtensionDtype]) -> DtypeObj: + ... + + +@overload +def find_common_type(types: list[DtypeObj]) -> DtypeObj: + ... + + +def find_common_type(types): + """ + Find a common data type among the given dtypes. + + Parameters + ---------- + types : list of dtypes + + Returns + ------- + pandas extension or numpy dtype + + See Also + -------- + numpy.find_common_type + + """ + if not types: + raise ValueError("no types given") + + first = types[0] + + # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2) + # => object + if lib.dtypes_all_equal(list(types)): + return first + + # get unique types (dict.fromkeys is used as order-preserving set()) + types = list(dict.fromkeys(types).keys()) + + if any(isinstance(t, ExtensionDtype) for t in types): + for t in types: + if isinstance(t, ExtensionDtype): + res = t._get_common_dtype(types) + if res is not None: + return res + return np.dtype("object") + + # take lowest unit + if all(is_datetime64_dtype(t) for t in types): + return np.dtype("datetime64[ns]") + if all(is_timedelta64_dtype(t) for t in types): + return np.dtype("timedelta64[ns]") + + # don't mix bool / int or float or complex + # this is different from numpy, which casts bool with float/int as int + has_bools = any(is_bool_dtype(t) for t in types) + if has_bools: + for t in types: + if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t): + return np.dtype("object") + + return np.find_common_type(types, []) + + +def construct_2d_arraylike_from_scalar( + value: Scalar, length: int, width: int, dtype: np.dtype, copy: bool +) -> np.ndarray: + + shape = (length, width) + + if dtype.kind in ["m", "M"]: + value = maybe_unbox_datetimelike_tz_deprecation(value, dtype) + elif dtype == _dtype_obj: + if isinstance(value, (np.timedelta64, np.datetime64)): + # calling np.array below would cast to pytimedelta/pydatetime + out = np.empty(shape, dtype=object) + out.fill(value) + return out + + # Attempt to coerce to a numpy array + try: + arr = np.array(value, dtype=dtype, copy=copy) + except (ValueError, TypeError) as err: + raise TypeError( + f"DataFrame constructor called with incompatible data and dtype: {err}" + ) from err + + if arr.ndim != 0: + raise ValueError("DataFrame constructor not properly called!") + + return np.full(shape, arr) + + +def construct_1d_arraylike_from_scalar( + value: Scalar, length: int, dtype: DtypeObj | None +) -> ArrayLike: + """ + create a np.ndarray / pandas type of specified shape and dtype + filled with values + + Parameters + ---------- + value : scalar value + length : int + dtype : pandas_dtype or np.dtype + + Returns + ------- + np.ndarray / pandas type of length, filled with value + + """ + + if dtype is None: + try: + dtype, value = infer_dtype_from_scalar(value, pandas_dtype=True) + except OutOfBoundsDatetime: + dtype = _dtype_obj + + if isinstance(dtype, ExtensionDtype): + cls = dtype.construct_array_type() + subarr = cls._from_sequence([value] * length, dtype=dtype) + + else: + + if length and is_integer_dtype(dtype) and isna(value): + # coerce if we have nan for an integer dtype + dtype = np.dtype("float64") + elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): + # we need to coerce to object dtype to avoid + # to allow numpy to take our string as a scalar value + dtype = np.dtype("object") + if not isna(value): + value = ensure_str(value) + elif dtype.kind in ["M", "m"]: + value = maybe_unbox_datetimelike_tz_deprecation(value, dtype) + + subarr = np.empty(length, dtype=dtype) + subarr.fill(value) + + return subarr + + +def maybe_unbox_datetimelike_tz_deprecation(value: Scalar, dtype: DtypeObj): + """ + Wrap maybe_unbox_datetimelike with a check for a timezone-aware Timestamp + along with a timezone-naive datetime64 dtype, which is deprecated. + """ + # Caller is responsible for checking dtype.kind in ["m", "M"] + + if isinstance(value, datetime): + # we dont want to box dt64, in particular datetime64("NaT") + value = maybe_box_datetimelike(value, dtype) + + try: + value = maybe_unbox_datetimelike(value, dtype) + except TypeError: + if ( + isinstance(value, Timestamp) + and value.tzinfo is not None + and isinstance(dtype, np.dtype) + and dtype.kind == "M" + ): + warnings.warn( + "Data is timezone-aware. Converting " + "timezone-aware data to timezone-naive by " + "passing dtype='datetime64[ns]' to " + "DataFrame or Series is deprecated and will " + "raise in a future version. Use " + "`pd.Series(values).dt.tz_localize(None)` " + "instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + new_value = value.tz_localize(None) + return maybe_unbox_datetimelike(new_value, dtype) + else: + raise + return value + + +def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: + """ + Transform any list-like object in a 1-dimensional numpy array of object + dtype. + + Parameters + ---------- + values : any iterable which has a len() + + Raises + ------ + TypeError + * If `values` does not have a len() + + Returns + ------- + 1-dimensional numpy array of dtype object + """ + # numpy will try to interpret nested lists as further dimensions, hence + # making a 1D array that contains list-likes is a bit tricky: + result = np.empty(len(values), dtype="object") + result[:] = values + return result + + +def maybe_cast_to_integer_array( + arr: list | np.ndarray, dtype: np.dtype, copy: bool = False +) -> np.ndarray: + """ + Takes any dtype and returns the casted version, raising for when data is + incompatible with integer/unsigned integer dtypes. + + Parameters + ---------- + arr : np.ndarray or list + The array to cast. + dtype : np.dtype + The integer dtype to cast the array to. + copy: bool, default False + Whether to make a copy of the array before returning. + + Returns + ------- + ndarray + Array of integer or unsigned integer dtype. + + Raises + ------ + OverflowError : the dtype is incompatible with the data + ValueError : loss of precision has occurred during casting + + Examples + -------- + If you try to coerce negative values to unsigned integers, it raises: + + >>> pd.Series([-1], dtype="uint64") + Traceback (most recent call last): + ... + OverflowError: Trying to coerce negative values to unsigned integers + + Also, if you try to coerce float values to integers, it raises: + + >>> pd.Series([1, 2, 3.5], dtype="int64") + Traceback (most recent call last): + ... + ValueError: Trying to coerce float values to integers + """ + assert is_integer_dtype(dtype) + + try: + if not isinstance(arr, np.ndarray): + casted = np.array(arr, dtype=dtype, copy=copy) + else: + casted = arr.astype(dtype, copy=copy) + except OverflowError as err: + raise OverflowError( + "The elements provided in the data cannot all be " + f"casted to the dtype {dtype}" + ) from err + + if np.array_equal(arr, casted): + return casted + + # We do this casting to allow for proper + # data and dtype checking. + # + # We didn't do this earlier because NumPy + # doesn't handle `uint64` correctly. + arr = np.asarray(arr) + + if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): + raise OverflowError("Trying to coerce negative values to unsigned integers") + + if is_float_dtype(arr.dtype): + if not np.isfinite(arr).all(): + raise IntCastingNaNError( + "Cannot convert non-finite values (NA or inf) to integer" + ) + raise ValueError("Trying to coerce float values to integers") + if is_object_dtype(arr.dtype): + raise ValueError("Trying to coerce float values to integers") + + if casted.dtype < arr.dtype: + # GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows + warnings.warn( + f"Values are too large to be losslessly cast to {dtype}. " + "In a future version this will raise OverflowError. To retain the " + f"old behavior, use pd.Series(values).astype({dtype})", + FutureWarning, + stacklevel=find_stack_level(), + ) + return casted + + if arr.dtype.kind in ["m", "M"]: + # test_constructor_maskedarray_nonfloat + warnings.warn( + f"Constructing Series or DataFrame from {arr.dtype} values and " + f"dtype={dtype} is deprecated and will raise in a future version. " + "Use values.view(dtype) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return casted + + # No known cases that get here, but raising explicitly to cover our bases. + raise ValueError(f"values cannot be losslessly cast to {dtype}") + + +def convert_scalar_for_putitemlike(scalar: Scalar, dtype: np.dtype) -> Scalar: + """ + Convert datetimelike scalar if we are setting into a datetime64 + or timedelta64 ndarray. + + Parameters + ---------- + scalar : scalar + dtype : np.dtype + + Returns + ------- + scalar + """ + if dtype.kind in ["m", "M"]: + scalar = maybe_box_datetimelike(scalar, dtype) + return maybe_unbox_datetimelike(scalar, dtype) + else: + _validate_numeric_casting(dtype, scalar) + return scalar + + +def _validate_numeric_casting(dtype: np.dtype, value: Scalar) -> None: + """ + Check that we can losslessly insert the given value into an array + with the given dtype. + + Parameters + ---------- + dtype : np.dtype + value : scalar + + Raises + ------ + ValueError + """ + # error: Argument 1 to "__call__" of "ufunc" has incompatible type + # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]"; + # expected "Union[Union[int, float, complex, str, bytes, generic], + # Sequence[Union[int, float, complex, str, bytes, generic]], + # Sequence[Sequence[Any]], _SupportsArray]" + if ( + issubclass(dtype.type, (np.integer, np.bool_)) + and is_float(value) + and np.isnan(value) # type: ignore[arg-type] + ): + raise ValueError("Cannot assign nan to integer series") + + elif dtype.kind in ["i", "u", "f", "c"]: + if is_bool(value) or isinstance(value, np.timedelta64): + # numpy will cast td64 to integer if we're not careful + raise ValueError( + f"Cannot assign {type(value).__name__} to float/integer series" + ) + elif dtype.kind == "b": + if is_scalar(value) and not is_bool(value): + raise ValueError(f"Cannot assign {type(value).__name__} to bool series") + + +def can_hold_element(arr: ArrayLike, element: Any) -> bool: + """ + Can we do an inplace setitem with this element in an array with this dtype? + + Parameters + ---------- + arr : np.ndarray or ExtensionArray + element : Any + + Returns + ------- + bool + """ + dtype = arr.dtype + if not isinstance(dtype, np.dtype) or dtype.kind in ["m", "M"]: + if isinstance(dtype, (PeriodDtype, IntervalDtype, DatetimeTZDtype, np.dtype)): + # np.dtype here catches datetime64ns and timedelta64ns; we assume + # in this case that we have DatetimeArray/TimedeltaArray + arr = cast( + "PeriodArray | DatetimeArray | TimedeltaArray | IntervalArray", arr + ) + try: + arr._validate_setitem_value(element) + return True + except (ValueError, TypeError): + return False + + # This is technically incorrect, but maintains the behavior of + # ExtensionBlock._can_hold_element + return True + + try: + np_can_hold_element(dtype, element) + return True + except (TypeError, ValueError): + return False + + +def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: + """ + Raise if we cannot losslessly set this element into an ndarray with this dtype. + + Specifically about places where we disagree with numpy. i.e. there are + cases where numpy will raise in doing the setitem that we do not check + for here, e.g. setting str "X" into a numeric ndarray. + + Returns + ------- + Any + The element, potentially cast to the dtype. + + Raises + ------ + ValueError : If we cannot losslessly store this element with this dtype. + """ + if dtype == _dtype_obj: + return element + + tipo = maybe_infer_dtype_type(element) + + if dtype.kind in ["i", "u"]: + if isinstance(element, range): + if _dtype_can_hold_range(element, dtype): + return element + raise ValueError + + if tipo is not None: + if tipo.kind not in ["i", "u"]: + if is_float(element) and element.is_integer(): + return element + + if isinstance(element, np.ndarray) and element.dtype.kind == "f": + # If all can be losslessly cast to integers, then we can hold them + # We do something similar in putmask_smart + casted = element.astype(dtype) + comp = casted == element + if comp.all(): + return element + raise ValueError + + # Anything other than integer we cannot hold + raise ValueError + elif dtype.itemsize < tipo.itemsize: + if is_integer(element): + # e.g. test_setitem_series_int8 if we have a python int 1 + # tipo may be np.int32, despite the fact that it will fit + # in smaller int dtypes. + info = np.iinfo(dtype) + if info.min <= element <= info.max: + return element + raise ValueError + raise ValueError + elif not isinstance(tipo, np.dtype): + # i.e. nullable IntegerDtype; we can put this into an ndarray + # losslessly iff it has no NAs + hasnas = element._mask.any() + # TODO: don't rely on implementation detail + if hasnas: + raise ValueError + return element + + return element + + # We have not inferred an integer from the dtype + # check if we have a builtin int or a float equal to an int + if is_integer(element) or (is_float(element) and element.is_integer()): + return element + raise ValueError + + elif dtype.kind == "f": + if tipo is not None: + # TODO: itemsize check? + if tipo.kind not in ["f", "i", "u"]: + # Anything other than float/integer we cannot hold + raise ValueError + elif not isinstance(tipo, np.dtype): + # i.e. nullable IntegerDtype or FloatingDtype; + # we can put this into an ndarray losslessly iff it has no NAs + hasnas = element._mask.any() + # TODO: don't rely on implementation detail + if hasnas: + raise ValueError + return element + return element + + if lib.is_integer(element) or lib.is_float(element): + return element + raise ValueError + + elif dtype.kind == "c": + if tipo is not None: + if tipo.kind in ["c", "f", "i", "u"]: + return element + raise ValueError + if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element): + return element + raise ValueError + + elif dtype.kind == "b": + if tipo is not None: + if tipo.kind == "b": # FIXME: wrong with BooleanArray? + return element + raise ValueError + if lib.is_bool(element): + return element + raise ValueError + + elif dtype.kind == "S": + # TODO: test tests.frame.methods.test_replace tests get here, + # need more targeted tests. xref phofl has a PR about this + if tipo is not None: + if tipo.kind == "S" and tipo.itemsize <= dtype.itemsize: + return element + raise ValueError + if isinstance(element, bytes) and len(element) <= dtype.itemsize: + return element + raise ValueError + + raise NotImplementedError(dtype) + + +def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: + """ + maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints), + but in many cases a range can be held by a smaller integer dtype. + Check if this is one of those cases. + """ + if not len(rng): + return True + return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype) diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/common.py b/.local/lib/python3.8/site-packages/pandas/core/dtypes/common.py new file mode 100644 index 0000000..6776064 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/dtypes/common.py @@ -0,0 +1,1814 @@ +""" +Common type operations. +""" +from __future__ import annotations + +from typing import ( + Any, + Callable, +) +import warnings + +import numpy as np + +from pandas._libs import ( + Interval, + Period, + algos, + lib, +) +from pandas._libs.tslibs import conversion +from pandas._typing import ( + ArrayLike, + DtypeObj, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.base import _registry as registry +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + ExtensionDtype, + IntervalDtype, + PeriodDtype, +) +from pandas.core.dtypes.generic import ( + ABCCategorical, + ABCIndex, +) +from pandas.core.dtypes.inference import ( # noqa:F401 + is_array_like, + is_bool, + is_complex, + is_dataclass, + is_decimal, + is_dict_like, + is_file_like, + is_float, + is_hashable, + is_integer, + is_interval, + is_iterator, + is_list_like, + is_named_tuple, + is_nested_list_like, + is_number, + is_re, + is_re_compilable, + is_scalar, + is_sequence, +) + +DT64NS_DTYPE = conversion.DT64NS_DTYPE +TD64NS_DTYPE = conversion.TD64NS_DTYPE +INT64_DTYPE = np.dtype(np.int64) + +# oh the troubles to reduce import time +_is_scipy_sparse = None + +ensure_float64 = algos.ensure_float64 + + +def ensure_float(arr): + """ + Ensure that an array object has a float dtype if possible. + + Parameters + ---------- + arr : array-like + The array whose data type we want to enforce as float. + + Returns + ------- + float_arr : The original array cast to the float dtype if + possible. Otherwise, the original array is returned. + """ + if is_extension_array_dtype(arr.dtype): + if is_float_dtype(arr.dtype): + arr = arr.to_numpy(dtype=arr.dtype.numpy_dtype, na_value=np.nan) + else: + arr = arr.to_numpy(dtype="float64", na_value=np.nan) + elif issubclass(arr.dtype.type, (np.integer, np.bool_)): + arr = arr.astype(float) + return arr + + +ensure_int64 = algos.ensure_int64 +ensure_int32 = algos.ensure_int32 +ensure_int16 = algos.ensure_int16 +ensure_int8 = algos.ensure_int8 +ensure_platform_int = algos.ensure_platform_int +ensure_object = algos.ensure_object + + +def ensure_str(value: bytes | Any) -> str: + """ + Ensure that bytes and non-strings get converted into ``str`` objects. + """ + if isinstance(value, bytes): + value = value.decode("utf-8") + elif not isinstance(value, str): + value = str(value) + return value + + +def ensure_python_int(value: int | np.integer) -> int: + """ + Ensure that a value is a python int. + + Parameters + ---------- + value: int or numpy.integer + + Returns + ------- + int + + Raises + ------ + TypeError: if the value isn't an int or can't be converted to one. + """ + if not (is_integer(value) or is_float(value)): + if not is_scalar(value): + raise TypeError( + f"Value needs to be a scalar value, was type {type(value).__name__}" + ) + raise TypeError(f"Wrong type {type(value)} for value {value}") + try: + new_value = int(value) + assert new_value == value + except (TypeError, ValueError, AssertionError) as err: + raise TypeError(f"Wrong type {type(value)} for value {value}") from err + return new_value + + +def classes(*klasses) -> Callable: + """Evaluate if the tipo is a subclass of the klasses.""" + return lambda tipo: issubclass(tipo, klasses) + + +def classes_and_not_datetimelike(*klasses) -> Callable: + """ + Evaluate if the tipo is a subclass of the klasses + and not a datetimelike. + """ + return lambda tipo: ( + issubclass(tipo, klasses) + and not issubclass(tipo, (np.datetime64, np.timedelta64)) + ) + + +def is_object_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the object dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the object dtype. + + Examples + -------- + >>> is_object_dtype(object) + True + >>> is_object_dtype(int) + False + >>> is_object_dtype(np.array([], dtype=object)) + True + >>> is_object_dtype(np.array([], dtype=int)) + False + >>> is_object_dtype([1, 2, 3]) + False + """ + return _is_dtype_type(arr_or_dtype, classes(np.object_)) + + +def is_sparse(arr) -> bool: + """ + Check whether an array-like is a 1-D pandas sparse array. + + Check that the one-dimensional array-like is a pandas sparse array. + Returns True if it is a pandas sparse array, not another type of + sparse array. + + Parameters + ---------- + arr : array-like + Array-like to check. + + Returns + ------- + bool + Whether or not the array-like is a pandas sparse array. + + Examples + -------- + Returns `True` if the parameter is a 1-D pandas sparse array. + + >>> is_sparse(pd.arrays.SparseArray([0, 0, 1, 0])) + True + >>> is_sparse(pd.Series(pd.arrays.SparseArray([0, 0, 1, 0]))) + True + + Returns `False` if the parameter is not sparse. + + >>> is_sparse(np.array([0, 0, 1, 0])) + False + >>> is_sparse(pd.Series([0, 1, 0, 0])) + False + + Returns `False` if the parameter is not a pandas sparse array. + + >>> from scipy.sparse import bsr_matrix + >>> is_sparse(bsr_matrix([0, 1, 0, 0])) + False + + Returns `False` if the parameter has more than one dimension. + """ + from pandas.core.arrays.sparse import SparseDtype + + dtype = getattr(arr, "dtype", arr) + return isinstance(dtype, SparseDtype) + + +def is_scipy_sparse(arr) -> bool: + """ + Check whether an array-like is a scipy.sparse.spmatrix instance. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean + Whether or not the array-like is a scipy.sparse.spmatrix instance. + + Notes + ----- + If scipy is not installed, this function will always return False. + + Examples + -------- + >>> from scipy.sparse import bsr_matrix + >>> is_scipy_sparse(bsr_matrix([1, 2, 3])) + True + >>> is_scipy_sparse(pd.arrays.SparseArray([1, 2, 3])) + False + """ + global _is_scipy_sparse + + if _is_scipy_sparse is None: + try: + from scipy.sparse import issparse as _is_scipy_sparse + except ImportError: + _is_scipy_sparse = lambda _: False + + assert _is_scipy_sparse is not None + return _is_scipy_sparse(arr) + + +def is_categorical(arr) -> bool: + """ + Check whether an array-like is a Categorical instance. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean + Whether or not the array-like is of a Categorical instance. + + Examples + -------- + >>> is_categorical([1, 2, 3]) + False + + Categoricals, Series Categoricals, and CategoricalIndex will return True. + + >>> cat = pd.Categorical([1, 2, 3]) + >>> is_categorical(cat) + True + >>> is_categorical(pd.Series(cat)) + True + >>> is_categorical(pd.CategoricalIndex([1, 2, 3])) + True + """ + warnings.warn( + "is_categorical is deprecated and will be removed in a future version. " + "Use is_categorical_dtype instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return isinstance(arr, ABCCategorical) or is_categorical_dtype(arr) + + +def is_datetime64_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the datetime64 dtype. + + Examples + -------- + >>> is_datetime64_dtype(object) + False + >>> is_datetime64_dtype(np.datetime64) + True + >>> is_datetime64_dtype(np.array([], dtype=int)) + False + >>> is_datetime64_dtype(np.array([], dtype=np.datetime64)) + True + >>> is_datetime64_dtype([1, 2, 3]) + False + """ + if isinstance(arr_or_dtype, np.dtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "M" + return _is_dtype_type(arr_or_dtype, classes(np.datetime64)) + + +def is_datetime64tz_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of a DatetimeTZDtype dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of a DatetimeTZDtype dtype. + + Examples + -------- + >>> is_datetime64tz_dtype(object) + False + >>> is_datetime64tz_dtype([1, 2, 3]) + False + >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) # tz-naive + False + >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_datetime64tz_dtype(dtype) + True + >>> is_datetime64tz_dtype(s) + True + """ + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "M" + + if arr_or_dtype is None: + return False + return DatetimeTZDtype.is_dtype(arr_or_dtype) + + +def is_timedelta64_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the timedelta64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the timedelta64 dtype. + + Examples + -------- + >>> is_timedelta64_dtype(object) + False + >>> is_timedelta64_dtype(np.timedelta64) + True + >>> is_timedelta64_dtype([1, 2, 3]) + False + >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + True + >>> is_timedelta64_dtype('0 days') + False + """ + if isinstance(arr_or_dtype, np.dtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "m" + + return _is_dtype_type(arr_or_dtype, classes(np.timedelta64)) + + +def is_period_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the Period dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the Period dtype. + + Examples + -------- + >>> is_period_dtype(object) + False + >>> is_period_dtype(PeriodDtype(freq="D")) + True + >>> is_period_dtype([1, 2, 3]) + False + >>> is_period_dtype(pd.Period("2017-01-01")) + False + >>> is_period_dtype(pd.PeriodIndex([], freq="A")) + True + """ + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.type is Period + + if arr_or_dtype is None: + return False + return PeriodDtype.is_dtype(arr_or_dtype) + + +def is_interval_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the Interval dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the Interval dtype. + + Examples + -------- + >>> is_interval_dtype(object) + False + >>> is_interval_dtype(IntervalDtype()) + True + >>> is_interval_dtype([1, 2, 3]) + False + >>> + >>> interval = pd.Interval(1, 2, closed="right") + >>> is_interval_dtype(interval) + False + >>> is_interval_dtype(pd.IntervalIndex([interval])) + True + """ + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.type is Interval + + if arr_or_dtype is None: + return False + return IntervalDtype.is_dtype(arr_or_dtype) + + +def is_categorical_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the Categorical dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the Categorical dtype. + + Examples + -------- + >>> is_categorical_dtype(object) + False + >>> is_categorical_dtype(CategoricalDtype()) + True + >>> is_categorical_dtype([1, 2, 3]) + False + >>> is_categorical_dtype(pd.Categorical([1, 2, 3])) + True + >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) + True + """ + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.name == "category" + + if arr_or_dtype is None: + return False + return CategoricalDtype.is_dtype(arr_or_dtype) + + +def is_string_or_object_np_dtype(dtype: np.dtype) -> bool: + """ + Faster alternative to is_string_dtype, assumes we have a np.dtype object. + """ + return dtype == object or dtype.kind in "SU" + + +def is_string_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the string dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of the string dtype. + + Examples + -------- + >>> is_string_dtype(str) + True + >>> is_string_dtype(object) + True + >>> is_string_dtype(int) + False + >>> + >>> is_string_dtype(np.array(['a', 'b'])) + True + >>> is_string_dtype(pd.Series([1, 2])) + False + """ + # TODO: gh-15585: consider making the checks stricter. + def condition(dtype) -> bool: + return dtype.kind in ("O", "S", "U") and not is_excluded_dtype(dtype) + + def is_excluded_dtype(dtype) -> bool: + """ + These have kind = "O" but aren't string dtypes so need to be explicitly excluded + """ + return isinstance(dtype, (PeriodDtype, IntervalDtype, CategoricalDtype)) + + return _is_dtype(arr_or_dtype, condition) + + +def is_dtype_equal(source, target) -> bool: + """ + Check if two dtypes are equal. + + Parameters + ---------- + source : The first dtype to compare + target : The second dtype to compare + + Returns + ------- + boolean + Whether or not the two dtypes are equal. + + Examples + -------- + >>> is_dtype_equal(int, float) + False + >>> is_dtype_equal("int", int) + True + >>> is_dtype_equal(object, "category") + False + >>> is_dtype_equal(CategoricalDtype(), "category") + True + >>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64") + False + """ + if isinstance(target, str): + if not isinstance(source, str): + # GH#38516 ensure we get the same behavior from + # is_dtype_equal(CDT, "category") and CDT == "category" + try: + src = get_dtype(source) + if isinstance(src, ExtensionDtype): + return src == target + except (TypeError, AttributeError, ImportError): + return False + elif isinstance(source, str): + return is_dtype_equal(target, source) + + try: + source = get_dtype(source) + target = get_dtype(target) + return source == target + except (TypeError, AttributeError, ImportError): + + # invalid comparison + # object == category will hit this + return False + + +def is_any_int_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of an integer dtype. + + In this function, timedelta64 instances are also considered "any-integer" + type objects and will return True. + + This function is internal and should not be exposed in the public API. + + The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered + as integer by this function. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of an integer dtype. + + Examples + -------- + >>> is_any_int_dtype(str) + False + >>> is_any_int_dtype(int) + True + >>> is_any_int_dtype(float) + False + >>> is_any_int_dtype(np.uint64) + True + >>> is_any_int_dtype(np.datetime64) + False + >>> is_any_int_dtype(np.timedelta64) + True + >>> is_any_int_dtype(np.array(['a', 'b'])) + False + >>> is_any_int_dtype(pd.Series([1, 2])) + True + >>> is_any_int_dtype(np.array([], dtype=np.timedelta64)) + True + >>> is_any_int_dtype(pd.Index([1, 2.])) # float + False + """ + return _is_dtype_type(arr_or_dtype, classes(np.integer, np.timedelta64)) + + +def is_integer_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of an integer dtype. + + Unlike in `is_any_int_dtype`, timedelta64 instances will return False. + + The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered + as integer by this function. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of an integer dtype and + not an instance of timedelta64. + + Examples + -------- + >>> is_integer_dtype(str) + False + >>> is_integer_dtype(int) + True + >>> is_integer_dtype(float) + False + >>> is_integer_dtype(np.uint64) + True + >>> is_integer_dtype('int8') + True + >>> is_integer_dtype('Int8') + True + >>> is_integer_dtype(pd.Int8Dtype) + True + >>> is_integer_dtype(np.datetime64) + False + >>> is_integer_dtype(np.timedelta64) + False + >>> is_integer_dtype(np.array(['a', 'b'])) + False + >>> is_integer_dtype(pd.Series([1, 2])) + True + >>> is_integer_dtype(np.array([], dtype=np.timedelta64)) + False + >>> is_integer_dtype(pd.Index([1, 2.])) # float + False + """ + return _is_dtype_type(arr_or_dtype, classes_and_not_datetimelike(np.integer)) + + +def is_signed_integer_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a signed integer dtype. + + Unlike in `is_any_int_dtype`, timedelta64 instances will return False. + + The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered + as integer by this function. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a signed integer dtype + and not an instance of timedelta64. + + Examples + -------- + >>> is_signed_integer_dtype(str) + False + >>> is_signed_integer_dtype(int) + True + >>> is_signed_integer_dtype(float) + False + >>> is_signed_integer_dtype(np.uint64) # unsigned + False + >>> is_signed_integer_dtype('int8') + True + >>> is_signed_integer_dtype('Int8') + True + >>> is_signed_integer_dtype(pd.Int8Dtype) + True + >>> is_signed_integer_dtype(np.datetime64) + False + >>> is_signed_integer_dtype(np.timedelta64) + False + >>> is_signed_integer_dtype(np.array(['a', 'b'])) + False + >>> is_signed_integer_dtype(pd.Series([1, 2])) + True + >>> is_signed_integer_dtype(np.array([], dtype=np.timedelta64)) + False + >>> is_signed_integer_dtype(pd.Index([1, 2.])) # float + False + >>> is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned + False + """ + return _is_dtype_type(arr_or_dtype, classes_and_not_datetimelike(np.signedinteger)) + + +def is_unsigned_integer_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of an unsigned integer dtype. + + The nullable Integer dtypes (e.g. pandas.UInt64Dtype) are also + considered as integer by this function. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of an unsigned integer dtype. + + Examples + -------- + >>> is_unsigned_integer_dtype(str) + False + >>> is_unsigned_integer_dtype(int) # signed + False + >>> is_unsigned_integer_dtype(float) + False + >>> is_unsigned_integer_dtype(np.uint64) + True + >>> is_unsigned_integer_dtype('uint8') + True + >>> is_unsigned_integer_dtype('UInt8') + True + >>> is_unsigned_integer_dtype(pd.UInt8Dtype) + True + >>> is_unsigned_integer_dtype(np.array(['a', 'b'])) + False + >>> is_unsigned_integer_dtype(pd.Series([1, 2])) # signed + False + >>> is_unsigned_integer_dtype(pd.Index([1, 2.])) # float + False + >>> is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32)) + True + """ + return _is_dtype_type( + arr_or_dtype, classes_and_not_datetimelike(np.unsignedinteger) + ) + + +def is_int64_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the int64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of the int64 dtype. + + Notes + ----- + Depending on system architecture, the return value of `is_int64_dtype( + int)` will be True if the OS uses 64-bit integers and False if the OS + uses 32-bit integers. + + Examples + -------- + >>> is_int64_dtype(str) + False + >>> is_int64_dtype(np.int32) + False + >>> is_int64_dtype(np.int64) + True + >>> is_int64_dtype('int8') + False + >>> is_int64_dtype('Int8') + False + >>> is_int64_dtype(pd.Int64Dtype) + True + >>> is_int64_dtype(float) + False + >>> is_int64_dtype(np.uint64) # unsigned + False + >>> is_int64_dtype(np.array(['a', 'b'])) + False + >>> is_int64_dtype(np.array([1, 2], dtype=np.int64)) + True + >>> is_int64_dtype(pd.Index([1, 2.])) # float + False + >>> is_int64_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned + False + """ + return _is_dtype_type(arr_or_dtype, classes(np.int64)) + + +def is_datetime64_any_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + bool + Whether or not the array or dtype is of the datetime64 dtype. + + Examples + -------- + >>> is_datetime64_any_dtype(str) + False + >>> is_datetime64_any_dtype(int) + False + >>> is_datetime64_any_dtype(np.datetime64) # can be tz-naive + True + >>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) + True + >>> is_datetime64_any_dtype(np.array(['a', 'b'])) + False + >>> is_datetime64_any_dtype(np.array([1, 2])) + False + >>> is_datetime64_any_dtype(np.array([], dtype="datetime64[ns]")) + True + >>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")) + True + """ + if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "M" + + if arr_or_dtype is None: + return False + return is_datetime64_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype) + + +def is_datetime64_ns_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the datetime64[ns] dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + bool + Whether or not the array or dtype is of the datetime64[ns] dtype. + + Examples + -------- + >>> is_datetime64_ns_dtype(str) + False + >>> is_datetime64_ns_dtype(int) + False + >>> is_datetime64_ns_dtype(np.datetime64) # no unit + False + >>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) + True + >>> is_datetime64_ns_dtype(np.array(['a', 'b'])) + False + >>> is_datetime64_ns_dtype(np.array([1, 2])) + False + >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64")) # no unit + False + >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) # wrong unit + False + >>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")) + True + """ + if arr_or_dtype is None: + return False + try: + tipo = get_dtype(arr_or_dtype) + except TypeError: + if is_datetime64tz_dtype(arr_or_dtype): + tipo = get_dtype(arr_or_dtype.dtype) + else: + return False + return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE + + +def is_timedelta64_ns_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the timedelta64[ns] dtype. + + This is a very specific dtype, so generic ones like `np.timedelta64` + will return False if passed into this function. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of the timedelta64[ns] dtype. + + Examples + -------- + >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]')) + True + >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency + False + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) + True + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) + False + """ + return _is_dtype(arr_or_dtype, lambda dtype: dtype == TD64NS_DTYPE) + + +def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of + a timedelta64 or datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a timedelta64, + or datetime64 dtype. + + Examples + -------- + >>> is_datetime_or_timedelta_dtype(str) + False + >>> is_datetime_or_timedelta_dtype(int) + False + >>> is_datetime_or_timedelta_dtype(np.datetime64) + True + >>> is_datetime_or_timedelta_dtype(np.timedelta64) + True + >>> is_datetime_or_timedelta_dtype(np.array(['a', 'b'])) + False + >>> is_datetime_or_timedelta_dtype(pd.Series([1, 2])) + False + >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64)) + True + >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) + True + """ + return _is_dtype_type(arr_or_dtype, classes(np.datetime64, np.timedelta64)) + + +# This exists to silence numpy deprecation warnings, see GH#29553 +def is_numeric_v_string_like(a: ArrayLike, b): + """ + Check if we are comparing a string-like object to a numeric ndarray. + NumPy doesn't like to compare such objects, especially numeric arrays + and scalar string-likes. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean + Whether we return a comparing a string-like object to a numeric array. + + Examples + -------- + >>> is_numeric_v_string_like(np.array([1]), "foo") + True + >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + True + >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + True + >>> is_numeric_v_string_like(np.array([1]), np.array([2])) + False + >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + False + """ + is_a_array = isinstance(a, np.ndarray) + is_b_array = isinstance(b, np.ndarray) + + is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b") + is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b") + is_a_string_array = is_a_array and a.dtype.kind in ("S", "U") + is_b_string_array = is_b_array and b.dtype.kind in ("S", "U") + + is_b_scalar_string_like = not is_b_array and isinstance(b, str) + + return ( + (is_a_numeric_array and is_b_scalar_string_like) + or (is_a_numeric_array and is_b_string_array) + or (is_b_numeric_array and is_a_string_array) + ) + + +# This exists to silence numpy deprecation warnings, see GH#29553 +def is_datetimelike_v_numeric(a, b): + """ + Check if we are comparing a datetime-like object to a numeric object. + By "numeric," we mean an object that is either of an int or float dtype. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean + Whether we return a comparing a datetime-like to a numeric object. + + Examples + -------- + >>> from datetime import datetime + >>> dt = np.datetime64(datetime(2017, 1, 1)) + >>> + >>> is_datetimelike_v_numeric(1, 1) + False + >>> is_datetimelike_v_numeric(dt, dt) + False + >>> is_datetimelike_v_numeric(1, dt) + True + >>> is_datetimelike_v_numeric(dt, 1) # symmetric check + True + >>> is_datetimelike_v_numeric(np.array([dt]), 1) + True + >>> is_datetimelike_v_numeric(np.array([1]), dt) + True + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + True + >>> is_datetimelike_v_numeric(np.array([1]), np.array([2])) + False + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + False + """ + if not hasattr(a, "dtype"): + a = np.asarray(a) + if not hasattr(b, "dtype"): + b = np.asarray(b) + + def is_numeric(x): + """ + Check if an object has a numeric dtype (i.e. integer or float). + """ + return is_integer_dtype(x) or is_float_dtype(x) + + return (needs_i8_conversion(a) and is_numeric(b)) or ( + needs_i8_conversion(b) and is_numeric(a) + ) + + +def needs_i8_conversion(arr_or_dtype) -> bool: + """ + Check whether the array or dtype should be converted to int64. + + An array-like or dtype "needs" such a conversion if the array-like + or dtype is of a datetime-like dtype + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype should be converted to int64. + + Examples + -------- + >>> needs_i8_conversion(str) + False + >>> needs_i8_conversion(np.int64) + False + >>> needs_i8_conversion(np.datetime64) + True + >>> needs_i8_conversion(np.array(['a', 'b'])) + False + >>> needs_i8_conversion(pd.Series([1, 2])) + False + >>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) + True + >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + """ + if arr_or_dtype is None: + return False + if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)): + # fastpath + dtype = arr_or_dtype + return dtype.kind in ["m", "M"] or dtype.type is Period + + try: + dtype = get_dtype(arr_or_dtype) + except (TypeError, ValueError): + return False + if isinstance(dtype, np.dtype): + return dtype.kind in ["m", "M"] + return isinstance(dtype, (PeriodDtype, DatetimeTZDtype)) + + +def is_numeric_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a numeric dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a numeric dtype. + + Examples + -------- + >>> is_numeric_dtype(str) + False + >>> is_numeric_dtype(int) + True + >>> is_numeric_dtype(float) + True + >>> is_numeric_dtype(np.uint64) + True + >>> is_numeric_dtype(np.datetime64) + False + >>> is_numeric_dtype(np.timedelta64) + False + >>> is_numeric_dtype(np.array(['a', 'b'])) + False + >>> is_numeric_dtype(pd.Series([1, 2])) + True + >>> is_numeric_dtype(pd.Index([1, 2.])) + True + >>> is_numeric_dtype(np.array([], dtype=np.timedelta64)) + False + """ + return _is_dtype_type( + arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_) + ) + + +def is_float_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a float dtype. + + This function is internal and should not be exposed in the public API. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a float dtype. + + Examples + -------- + >>> is_float_dtype(str) + False + >>> is_float_dtype(int) + False + >>> is_float_dtype(float) + True + >>> is_float_dtype(np.array(['a', 'b'])) + False + >>> is_float_dtype(pd.Series([1, 2])) + False + >>> is_float_dtype(pd.Index([1, 2.])) + True + """ + return _is_dtype_type(arr_or_dtype, classes(np.floating)) + + +def is_bool_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a boolean dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a boolean dtype. + + Notes + ----- + An ExtensionArray is considered boolean when the ``_is_boolean`` + attribute is set to True. + + Examples + -------- + >>> is_bool_dtype(str) + False + >>> is_bool_dtype(int) + False + >>> is_bool_dtype(bool) + True + >>> is_bool_dtype(np.bool_) + True + >>> is_bool_dtype(np.array(['a', 'b'])) + False + >>> is_bool_dtype(pd.Series([1, 2])) + False + >>> is_bool_dtype(np.array([True, False])) + True + >>> is_bool_dtype(pd.Categorical([True, False])) + True + >>> is_bool_dtype(pd.arrays.SparseArray([True, False])) + True + """ + if arr_or_dtype is None: + return False + try: + dtype = get_dtype(arr_or_dtype) + except (TypeError, ValueError): + return False + + if isinstance(dtype, CategoricalDtype): + arr_or_dtype = dtype.categories + # now we use the special definition for Index + + if isinstance(arr_or_dtype, ABCIndex): + # Allow Index[object] that is all-bools or Index["boolean"] + return arr_or_dtype.inferred_type == "boolean" + elif isinstance(dtype, ExtensionDtype): + return getattr(dtype, "_is_boolean", False) + + return issubclass(dtype.type, np.bool_) + + +def is_extension_type(arr) -> bool: + """ + Check whether an array-like is of a pandas extension class instance. + + .. deprecated:: 1.0.0 + Use ``is_extension_array_dtype`` instead. + + Extension classes include categoricals, pandas sparse objects (i.e. + classes represented within the pandas library and not ones external + to it like scipy sparse matrices), and datetime-like arrays. + + Parameters + ---------- + arr : array-like, scalar + The array-like to check. + + Returns + ------- + boolean + Whether or not the array-like is of a pandas extension class instance. + + Examples + -------- + >>> is_extension_type([1, 2, 3]) + False + >>> is_extension_type(np.array([1, 2, 3])) + False + >>> + >>> cat = pd.Categorical([1, 2, 3]) + >>> + >>> is_extension_type(cat) + True + >>> is_extension_type(pd.Series(cat)) + True + >>> is_extension_type(pd.arrays.SparseArray([1, 2, 3])) + True + >>> from scipy.sparse import bsr_matrix + >>> is_extension_type(bsr_matrix([1, 2, 3])) + False + >>> is_extension_type(pd.DatetimeIndex([1, 2, 3])) + False + >>> is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + >>> + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_extension_type(s) + True + """ + warnings.warn( + "'is_extension_type' is deprecated and will be removed in a future " + "version. Use 'is_extension_array_dtype' instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if is_categorical_dtype(arr): + return True + elif is_sparse(arr): + return True + elif is_datetime64tz_dtype(arr): + return True + return False + + +def is_1d_only_ea_obj(obj: Any) -> bool: + """ + ExtensionArray that does not support 2D, or more specifically that does + not use HybridBlock. + """ + from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + PeriodArray, + TimedeltaArray, + ) + + return isinstance(obj, ExtensionArray) and not isinstance( + obj, (DatetimeArray, TimedeltaArray, PeriodArray) + ) + + +def is_1d_only_ea_dtype(dtype: DtypeObj | None) -> bool: + """ + Analogue to is_extension_array_dtype but excluding DatetimeTZDtype. + """ + # Note: if other EA dtypes are ever held in HybridBlock, exclude those + # here too. + # NB: need to check DatetimeTZDtype and not is_datetime64tz_dtype + # to exclude ArrowTimestampUSDtype + return isinstance(dtype, ExtensionDtype) and not isinstance( + dtype, (DatetimeTZDtype, PeriodDtype) + ) + + +def is_extension_array_dtype(arr_or_dtype) -> bool: + """ + Check if an object is a pandas extension array type. + + See the :ref:`Use Guide ` for more. + + Parameters + ---------- + arr_or_dtype : object + For array-like input, the ``.dtype`` attribute will + be extracted. + + Returns + ------- + bool + Whether the `arr_or_dtype` is an extension array type. + + Notes + ----- + This checks whether an object implements the pandas extension + array interface. In pandas, this includes: + + * Categorical + * Sparse + * Interval + * Period + * DatetimeArray + * TimedeltaArray + + Third-party libraries may implement arrays or types satisfying + this interface as well. + + Examples + -------- + >>> from pandas.api.types import is_extension_array_dtype + >>> arr = pd.Categorical(['a', 'b']) + >>> is_extension_array_dtype(arr) + True + >>> is_extension_array_dtype(arr.dtype) + True + + >>> arr = np.array(['a', 'b']) + >>> is_extension_array_dtype(arr.dtype) + False + """ + dtype = getattr(arr_or_dtype, "dtype", arr_or_dtype) + if isinstance(dtype, ExtensionDtype): + return True + elif isinstance(dtype, np.dtype): + return False + else: + return registry.find(dtype) is not None + + +def is_ea_or_datetimelike_dtype(dtype: DtypeObj | None) -> bool: + """ + Check for ExtensionDtype, datetime64 dtype, or timedelta64 dtype. + + Notes + ----- + Checks only for dtype objects, not dtype-castable strings or types. + """ + return isinstance(dtype, ExtensionDtype) or ( + isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"] + ) + + +def is_complex_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a complex dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a complex dtype. + + Examples + -------- + >>> is_complex_dtype(str) + False + >>> is_complex_dtype(int) + False + >>> is_complex_dtype(np.complex_) + True + >>> is_complex_dtype(np.array(['a', 'b'])) + False + >>> is_complex_dtype(pd.Series([1, 2])) + False + >>> is_complex_dtype(np.array([1 + 1j, 5])) + True + """ + return _is_dtype_type(arr_or_dtype, classes(np.complexfloating)) + + +def _is_dtype(arr_or_dtype, condition) -> bool: + """ + Return true if the condition is satisfied for the arr_or_dtype. + + Parameters + ---------- + arr_or_dtype : array-like, str, np.dtype, or ExtensionArrayType + The array-like or dtype object whose dtype we want to extract. + condition : callable[Union[np.dtype, ExtensionDtype]] + + Returns + ------- + bool + + """ + if arr_or_dtype is None: + return False + try: + dtype = get_dtype(arr_or_dtype) + except (TypeError, ValueError): + return False + return condition(dtype) + + +def get_dtype(arr_or_dtype) -> DtypeObj: + """ + Get the dtype instance associated with an array + or dtype object. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype object whose dtype we want to extract. + + Returns + ------- + obj_dtype : The extract dtype instance from the + passed in array or dtype object. + + Raises + ------ + TypeError : The passed in object is None. + """ + if arr_or_dtype is None: + raise TypeError("Cannot deduce dtype from null object") + + # fastpath + elif isinstance(arr_or_dtype, np.dtype): + return arr_or_dtype + elif isinstance(arr_or_dtype, type): + return np.dtype(arr_or_dtype) + + # if we have an array-like + elif hasattr(arr_or_dtype, "dtype"): + arr_or_dtype = arr_or_dtype.dtype + + return pandas_dtype(arr_or_dtype) + + +def _is_dtype_type(arr_or_dtype, condition) -> bool: + """ + Return true if the condition is satisfied for the arr_or_dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype object whose dtype we want to extract. + condition : callable[Union[np.dtype, ExtensionDtypeType]] + + Returns + ------- + bool : if the condition is satisfied for the arr_or_dtype + """ + if arr_or_dtype is None: + return condition(type(None)) + + # fastpath + if isinstance(arr_or_dtype, np.dtype): + return condition(arr_or_dtype.type) + elif isinstance(arr_or_dtype, type): + if issubclass(arr_or_dtype, ExtensionDtype): + arr_or_dtype = arr_or_dtype.type + return condition(np.dtype(arr_or_dtype).type) + + # if we have an array-like + if hasattr(arr_or_dtype, "dtype"): + arr_or_dtype = arr_or_dtype.dtype + + # we are not possibly a dtype + elif is_list_like(arr_or_dtype): + return condition(type(None)) + + try: + tipo = pandas_dtype(arr_or_dtype).type + except (TypeError, ValueError): + if is_scalar(arr_or_dtype): + return condition(type(None)) + + return False + + return condition(tipo) + + +def infer_dtype_from_object(dtype) -> type: + """ + Get a numpy dtype.type-style object for a dtype object. + + This methods also includes handling of the datetime64[ns] and + datetime64[ns, TZ] objects. + + If no dtype can be found, we return ``object``. + + Parameters + ---------- + dtype : dtype, type + The dtype object whose numpy dtype.type-style + object we want to extract. + + Returns + ------- + type + """ + if isinstance(dtype, type) and issubclass(dtype, np.generic): + # Type object from a dtype + + return dtype + elif isinstance(dtype, (np.dtype, ExtensionDtype)): + # dtype object + try: + _validate_date_like_dtype(dtype) + except TypeError: + # Should still pass if we don't have a date-like + pass + return dtype.type + + try: + dtype = pandas_dtype(dtype) + except TypeError: + pass + + if is_extension_array_dtype(dtype): + return dtype.type + elif isinstance(dtype, str): + + # TODO(jreback) + # should deprecate these + if dtype in ["datetimetz", "datetime64tz"]: + return DatetimeTZDtype.type + elif dtype in ["period"]: + raise NotImplementedError + + if dtype in ["datetime", "timedelta"]: + dtype += "64" + try: + return infer_dtype_from_object(getattr(np, dtype)) + except (AttributeError, TypeError): + # Handles cases like get_dtype(int) i.e., + # Python objects that are valid dtypes + # (unlike user-defined types, in general) + # + # TypeError handles the float16 type code of 'e' + # further handle internal types + pass + + return infer_dtype_from_object(np.dtype(dtype)) + + +def _validate_date_like_dtype(dtype) -> None: + """ + Check whether the dtype is a date-like dtype. Raises an error if invalid. + + Parameters + ---------- + dtype : dtype, type + The dtype to check. + + Raises + ------ + TypeError : The dtype could not be casted to a date-like dtype. + ValueError : The dtype is an illegal date-like dtype (e.g. the + frequency provided is too specific) + """ + try: + typ = np.datetime_data(dtype)[0] + except ValueError as e: + raise TypeError(e) from e + if typ not in ["generic", "ns"]: + raise ValueError( + f"{repr(dtype.name)} is too specific of a frequency, " + f"try passing {repr(dtype.type.__name__)}" + ) + + +def validate_all_hashable(*args, error_name: str | None = None) -> None: + """ + Return None if all args are hashable, else raise a TypeError. + + Parameters + ---------- + *args + Arguments to validate. + error_name : str, optional + The name to use if error + + Raises + ------ + TypeError : If an argument is not hashable + + Returns + ------- + None + """ + if not all(is_hashable(arg) for arg in args): + if error_name: + raise TypeError(f"{error_name} must be a hashable type") + else: + raise TypeError("All elements must be hashable") + + +def pandas_dtype(dtype) -> DtypeObj: + """ + Convert input into a pandas only dtype object or a numpy dtype object. + + Parameters + ---------- + dtype : object to be converted + + Returns + ------- + np.dtype or a pandas dtype + + Raises + ------ + TypeError if not a dtype + """ + # short-circuit + if isinstance(dtype, np.ndarray): + return dtype.dtype + elif isinstance(dtype, (np.dtype, ExtensionDtype)): + return dtype + + # registered extension types + result = registry.find(dtype) + if result is not None: + return result + + # try a numpy dtype + # raise a consistent TypeError if failed + try: + npdtype = np.dtype(dtype) + except SyntaxError as err: + # np.dtype uses `eval` which can raise SyntaxError + raise TypeError(f"data type '{dtype}' not understood") from err + + # Any invalid dtype (such as pd.Timestamp) should raise an error. + # np.dtype(invalid_type).kind = 0 for such objects. However, this will + # also catch some valid dtypes such as object, np.object_ and 'object' + # which we safeguard against by catching them earlier and returning + # np.dtype(valid_dtype) before this condition is evaluated. + if is_hashable(dtype) and dtype in [object, np.object_, "object", "O"]: + # check hashability to avoid errors/DeprecationWarning when we get + # here and `dtype` is an array + return npdtype + elif npdtype.kind == "O": + raise TypeError(f"dtype '{dtype}' not understood") + + return npdtype + + +def is_all_strings(value: ArrayLike) -> bool: + """ + Check if this is an array of strings that we should try parsing. + + Includes object-dtype ndarray containing all-strings, StringArray, + and Categorical with all-string categories. + Does not include numpy string dtypes. + """ + dtype = value.dtype + + if isinstance(dtype, np.dtype): + return ( + dtype == np.dtype("object") + and lib.infer_dtype(value, skipna=False) == "string" + ) + elif isinstance(dtype, CategoricalDtype): + return dtype.categories.inferred_type == "string" + return dtype == "string" diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/concat.py b/.local/lib/python3.8/site-packages/pandas/core/dtypes/concat.py new file mode 100644 index 0000000..2dc4241 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/dtypes/concat.py @@ -0,0 +1,367 @@ +""" +Utility functions related to concat. +""" +from typing import ( + TYPE_CHECKING, + cast, +) +import warnings + +import numpy as np + +from pandas._typing import ( + ArrayLike, + DtypeObj, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import ( + astype_array, + find_common_type, +) +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_dtype_equal, + is_sparse, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.generic import ( + ABCCategoricalIndex, + ABCExtensionArray, + ABCSeries, +) + +if TYPE_CHECKING: + from pandas.core.arrays.sparse import SparseArray + + +def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike: + """ + Helper function for `arr.astype(common_dtype)` but handling all special + cases. + """ + if is_dtype_equal(arr.dtype, dtype): + return arr + if ( + is_categorical_dtype(arr.dtype) + and isinstance(dtype, np.dtype) + and np.issubdtype(dtype, np.integer) + ): + # problem case: categorical of int -> gives int as result dtype, + # but categorical can contain NAs -> fall back to object dtype + try: + return arr.astype(dtype, copy=False) + except ValueError: + return arr.astype(object, copy=False) + + if is_sparse(arr) and not is_sparse(dtype): + # problem case: SparseArray.astype(dtype) doesn't follow the specified + # dtype exactly, but converts this to Sparse[dtype] -> first manually + # convert to dense array + + # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type + # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _ + # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any, + # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict, + # Tuple[Any, Any]]]" [arg-type] + arr = cast("SparseArray", arr) + return arr.to_dense().astype(dtype, copy=False) # type: ignore[arg-type] + + # astype_array includes ensure_wrapped_if_datetimelike + return astype_array(arr, dtype=dtype, copy=False) + + +def concat_compat(to_concat, axis: int = 0, ea_compat_axis: bool = False): + """ + provide concatenation of an array of arrays each of which is a single + 'normalized' dtypes (in that for example, if it's object, then it is a + non-datetimelike and provide a combined dtype for the resulting array that + preserves the overall dtype if possible) + + Parameters + ---------- + to_concat : array of arrays + axis : axis to provide concatenation + ea_compat_axis : bool, default False + For ExtensionArray compat, behave as if axis == 1 when determining + whether to drop empty arrays. + + Returns + ------- + a single array, preserving the combined dtypes + """ + # filter empty arrays + # 1-d dtypes always are included here + def is_nonempty(x) -> bool: + if x.ndim <= axis: + return True + return x.shape[axis] > 0 + + # If all arrays are empty, there's nothing to convert, just short-cut to + # the concatenation, #3121. + # + # Creating an empty array directly is tempting, but the winnings would be + # marginal given that it would still require shape & dtype calculation and + # np.concatenate which has them both implemented is compiled. + non_empties = [x for x in to_concat if is_nonempty(x)] + if non_empties and axis == 0 and not ea_compat_axis: + # ea_compat_axis see GH#39574 + to_concat = non_empties + + kinds = {obj.dtype.kind for obj in to_concat} + contains_datetime = any(kind in ["m", "M"] for kind in kinds) or any( + isinstance(obj, ABCExtensionArray) and obj.ndim > 1 for obj in to_concat + ) + + all_empty = not len(non_empties) + single_dtype = len({x.dtype for x in to_concat}) == 1 + any_ea = any(isinstance(x.dtype, ExtensionDtype) for x in to_concat) + + if contains_datetime: + return _concat_datetime(to_concat, axis=axis) + + if any_ea: + # we ignore axis here, as internally concatting with EAs is always + # for axis=0 + if not single_dtype: + target_dtype = find_common_type([x.dtype for x in to_concat]) + to_concat = [cast_to_common_type(arr, target_dtype) for arr in to_concat] + + if isinstance(to_concat[0], ABCExtensionArray): + # TODO: what about EA-backed Index? + cls = type(to_concat[0]) + return cls._concat_same_type(to_concat) + else: + return np.concatenate(to_concat) + + elif all_empty: + # we have all empties, but may need to coerce the result dtype to + # object if we have non-numeric type operands (numpy would otherwise + # cast this to float) + if len(kinds) != 1: + + if not len(kinds - {"i", "u", "f"}) or not len(kinds - {"b", "i", "u"}): + # let numpy coerce + pass + else: + # coerce to object + to_concat = [x.astype("object") for x in to_concat] + kinds = {"o"} + + result = np.concatenate(to_concat, axis=axis) + if "b" in kinds and result.dtype.kind in ["i", "u", "f"]: + # GH#39817 + warnings.warn( + "Behavior when concatenating bool-dtype and numeric-dtype arrays is " + "deprecated; in a future version these will cast to object dtype " + "(instead of coercing bools to numeric values). To retain the old " + "behavior, explicitly cast bool-dtype arrays to numeric dtype.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return result + + +def union_categoricals( + to_union, sort_categories: bool = False, ignore_order: bool = False +): + """ + Combine list-like of Categorical-like, unioning categories. + + All categories must have the same dtype. + + Parameters + ---------- + to_union : list-like + Categorical, CategoricalIndex, or Series with dtype='category'. + sort_categories : bool, default False + If true, resulting categories will be lexsorted, otherwise + they will be ordered as they appear in the data. + ignore_order : bool, default False + If true, the ordered attribute of the Categoricals will be ignored. + Results in an unordered categorical. + + Returns + ------- + Categorical + + Raises + ------ + TypeError + - all inputs do not have the same dtype + - all inputs do not have the same ordered property + - all inputs are ordered and their categories are not identical + - sort_categories=True and Categoricals are ordered + ValueError + Empty list of categoricals passed + + Notes + ----- + To learn more about categories, see `link + `__ + + Examples + -------- + >>> from pandas.api.types import union_categoricals + + If you want to combine categoricals that do not necessarily have + the same categories, `union_categoricals` will combine a list-like + of categoricals. The new categories will be the union of the + categories being combined. + + >>> a = pd.Categorical(["b", "c"]) + >>> b = pd.Categorical(["a", "b"]) + >>> union_categoricals([a, b]) + ['b', 'c', 'a', 'b'] + Categories (3, object): ['b', 'c', 'a'] + + By default, the resulting categories will be ordered as they appear + in the `categories` of the data. If you want the categories to be + lexsorted, use `sort_categories=True` argument. + + >>> union_categoricals([a, b], sort_categories=True) + ['b', 'c', 'a', 'b'] + Categories (3, object): ['a', 'b', 'c'] + + `union_categoricals` also works with the case of combining two + categoricals of the same categories and order information (e.g. what + you could also `append` for). + + >>> a = pd.Categorical(["a", "b"], ordered=True) + >>> b = pd.Categorical(["a", "b", "a"], ordered=True) + >>> union_categoricals([a, b]) + ['a', 'b', 'a', 'b', 'a'] + Categories (2, object): ['a' < 'b'] + + Raises `TypeError` because the categories are ordered and not identical. + + >>> a = pd.Categorical(["a", "b"], ordered=True) + >>> b = pd.Categorical(["a", "b", "c"], ordered=True) + >>> union_categoricals([a, b]) + Traceback (most recent call last): + ... + TypeError: to union ordered Categoricals, all categories must be the same + + New in version 0.20.0 + + Ordered categoricals with different categories or orderings can be + combined by using the `ignore_ordered=True` argument. + + >>> a = pd.Categorical(["a", "b", "c"], ordered=True) + >>> b = pd.Categorical(["c", "b", "a"], ordered=True) + >>> union_categoricals([a, b], ignore_order=True) + ['a', 'b', 'c', 'c', 'b', 'a'] + Categories (3, object): ['a', 'b', 'c'] + + `union_categoricals` also works with a `CategoricalIndex`, or `Series` + containing categorical data, but note that the resulting array will + always be a plain `Categorical` + + >>> a = pd.Series(["b", "c"], dtype='category') + >>> b = pd.Series(["a", "b"], dtype='category') + >>> union_categoricals([a, b]) + ['b', 'c', 'a', 'b'] + Categories (3, object): ['b', 'c', 'a'] + """ + from pandas import Categorical + from pandas.core.arrays.categorical import recode_for_categories + + if len(to_union) == 0: + raise ValueError("No Categoricals to union") + + def _maybe_unwrap(x): + if isinstance(x, (ABCCategoricalIndex, ABCSeries)): + return x._values + elif isinstance(x, Categorical): + return x + else: + raise TypeError("all components to combine must be Categorical") + + to_union = [_maybe_unwrap(x) for x in to_union] + first = to_union[0] + + if not all( + is_dtype_equal(other.categories.dtype, first.categories.dtype) + for other in to_union[1:] + ): + raise TypeError("dtype of categories must be the same") + + ordered = False + if all(first._categories_match_up_to_permutation(other) for other in to_union[1:]): + # identical categories - fastpath + categories = first.categories + ordered = first.ordered + + all_codes = [first._encode_with_my_categories(x)._codes for x in to_union] + new_codes = np.concatenate(all_codes) + + if sort_categories and not ignore_order and ordered: + raise TypeError("Cannot use sort_categories=True with ordered Categoricals") + + if sort_categories and not categories.is_monotonic_increasing: + categories = categories.sort_values() + indexer = categories.get_indexer(first.categories) + + from pandas.core.algorithms import take_nd + + new_codes = take_nd(indexer, new_codes, fill_value=-1) + elif ignore_order or all(not c.ordered for c in to_union): + # different categories - union and recode + cats = first.categories.append([c.categories for c in to_union[1:]]) + categories = cats.unique() + if sort_categories: + categories = categories.sort_values() + + new_codes = [ + recode_for_categories(c.codes, c.categories, categories) for c in to_union + ] + new_codes = np.concatenate(new_codes) + else: + # ordered - to show a proper error message + if all(c.ordered for c in to_union): + msg = "to union ordered Categoricals, all categories must be the same" + raise TypeError(msg) + else: + raise TypeError("Categorical.ordered must be the same") + + if ignore_order: + ordered = False + + return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True) + + +def _concatenate_2d(to_concat, axis: int): + # coerce to 2d if needed & concatenate + if axis == 1: + to_concat = [np.atleast_2d(x) for x in to_concat] + return np.concatenate(to_concat, axis=axis) + + +def _concat_datetime(to_concat, axis=0): + """ + provide concatenation of an datetimelike array of arrays each of which is a + single M8[ns], datetime64[ns, tz] or m8[ns] dtype + + Parameters + ---------- + to_concat : array of arrays + axis : axis to provide concatenation + + Returns + ------- + a single array, preserving the combined dtypes + """ + from pandas.core.construction import ensure_wrapped_if_datetimelike + + to_concat = [ensure_wrapped_if_datetimelike(x) for x in to_concat] + + single_dtype = len({x.dtype for x in to_concat}) == 1 + + # multiple types, need to coerce to object + if not single_dtype: + # ensure_wrapped_if_datetimelike ensures that astype(object) wraps + # in Timestamp/Timedelta + return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis) + + result = type(to_concat[0])._concat_same_type(to_concat, axis=axis) + return result diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/dtypes.py b/.local/lib/python3.8/site-packages/pandas/core/dtypes/dtypes.py new file mode 100644 index 0000000..e74d73b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/dtypes/dtypes.py @@ -0,0 +1,1366 @@ +""" +Define extension dtypes. +""" +from __future__ import annotations + +import re +from typing import ( + TYPE_CHECKING, + Any, + MutableMapping, + cast, +) + +import numpy as np +import pytz + +from pandas._libs.interval import Interval +from pandas._libs.properties import cache_readonly +from pandas._libs.tslibs import ( + BaseOffset, + NaT, + Period, + Timestamp, + dtypes, + timezones, + to_offset, + tz_compare, +) +from pandas._typing import ( + Dtype, + DtypeObj, + Ordered, + npt, + type_t, +) + +from pandas.core.dtypes.base import ( + ExtensionDtype, + register_extension_dtype, +) +from pandas.core.dtypes.generic import ( + ABCCategoricalIndex, + ABCIndex, +) +from pandas.core.dtypes.inference import ( + is_bool, + is_list_like, +) + +if TYPE_CHECKING: + from datetime import tzinfo + + import pyarrow + + from pandas import ( + Categorical, + Index, + ) + from pandas.core.arrays import ( + DatetimeArray, + IntervalArray, + PandasArray, + PeriodArray, + ) + +str_type = str + + +class PandasExtensionDtype(ExtensionDtype): + """ + A np.dtype duck-typed class, suitable for holding a custom dtype. + + THIS IS NOT A REAL NUMPY DTYPE + """ + + type: Any + kind: Any + # The Any type annotations above are here only because mypy seems to have a + # problem dealing with multiple inheritance from PandasExtensionDtype + # and ExtensionDtype's @properties in the subclasses below. The kind and + # type variables in those subclasses are explicitly typed below. + subdtype = None + str: str_type + num = 100 + shape: tuple[int, ...] = () + itemsize = 8 + base: DtypeObj | None = None + isbuiltin = 0 + isnative = 0 + _cache_dtypes: dict[str_type, PandasExtensionDtype] = {} + + def __repr__(self) -> str_type: + """ + Return a string representation for a particular object. + """ + return str(self) + + def __hash__(self) -> int: + raise NotImplementedError("sub-classes should implement an __hash__ method") + + def __getstate__(self) -> dict[str_type, Any]: + # pickle support; we don't want to pickle the cache + return {k: getattr(self, k, None) for k in self._metadata} + + @classmethod + def reset_cache(cls) -> None: + """clear the cache""" + cls._cache_dtypes = {} + + +class CategoricalDtypeType(type): + """ + the type of CategoricalDtype, this metaclass determines subclass ability + """ + + pass + + +@register_extension_dtype +class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): + """ + Type for categorical data with the categories and orderedness. + + Parameters + ---------- + categories : sequence, optional + Must be unique, and must not contain any nulls. + The categories are stored in an Index, + and if an index is provided the dtype of that index will be used. + ordered : bool or None, default False + Whether or not this categorical is treated as a ordered categorical. + None can be used to maintain the ordered value of existing categoricals when + used in operations that combine categoricals, e.g. astype, and will resolve to + False if there is no existing ordered to maintain. + + Attributes + ---------- + categories + ordered + + Methods + ------- + None + + See Also + -------- + Categorical : Represent a categorical variable in classic R / S-plus fashion. + + Notes + ----- + This class is useful for specifying the type of a ``Categorical`` + independent of the values. See :ref:`categorical.categoricaldtype` + for more. + + Examples + -------- + >>> t = pd.CategoricalDtype(categories=['b', 'a'], ordered=True) + >>> pd.Series(['a', 'b', 'a', 'c'], dtype=t) + 0 a + 1 b + 2 a + 3 NaN + dtype: category + Categories (2, object): ['b' < 'a'] + + An empty CategoricalDtype with a specific dtype can be created + by providing an empty index. As follows, + + >>> pd.CategoricalDtype(pd.DatetimeIndex([])).categories.dtype + dtype(' CategoricalDtype: + self = cls.__new__(cls) + self._finalize(categories, ordered, fastpath=True) + return self + + @classmethod + def _from_categorical_dtype( + cls, dtype: CategoricalDtype, categories=None, ordered: Ordered = None + ) -> CategoricalDtype: + if categories is ordered is None: + return dtype + if categories is None: + categories = dtype.categories + if ordered is None: + ordered = dtype.ordered + return cls(categories, ordered) + + @classmethod + def _from_values_or_dtype( + cls, + values=None, + categories=None, + ordered: bool | None = None, + dtype: Dtype | None = None, + ) -> CategoricalDtype: + """ + Construct dtype from the input parameters used in :class:`Categorical`. + + This constructor method specifically does not do the factorization + step, if that is needed to find the categories. This constructor may + therefore return ``CategoricalDtype(categories=None, ordered=None)``, + which may not be useful. Additional steps may therefore have to be + taken to create the final dtype. + + The return dtype is specified from the inputs in this prioritized + order: + 1. if dtype is a CategoricalDtype, return dtype + 2. if dtype is the string 'category', create a CategoricalDtype from + the supplied categories and ordered parameters, and return that. + 3. if values is a categorical, use value.dtype, but override it with + categories and ordered if either/both of those are not None. + 4. if dtype is None and values is not a categorical, construct the + dtype from categories and ordered, even if either of those is None. + + Parameters + ---------- + values : list-like, optional + The list-like must be 1-dimensional. + categories : list-like, optional + Categories for the CategoricalDtype. + ordered : bool, optional + Designating if the categories are ordered. + dtype : CategoricalDtype or the string "category", optional + If ``CategoricalDtype``, cannot be used together with + `categories` or `ordered`. + + Returns + ------- + CategoricalDtype + + Examples + -------- + >>> pd.CategoricalDtype._from_values_or_dtype() + CategoricalDtype(categories=None, ordered=None) + >>> pd.CategoricalDtype._from_values_or_dtype( + ... categories=['a', 'b'], ordered=True + ... ) + CategoricalDtype(categories=['a', 'b'], ordered=True) + >>> dtype1 = pd.CategoricalDtype(['a', 'b'], ordered=True) + >>> dtype2 = pd.CategoricalDtype(['x', 'y'], ordered=False) + >>> c = pd.Categorical([0, 1], dtype=dtype1, fastpath=True) + >>> pd.CategoricalDtype._from_values_or_dtype( + ... c, ['x', 'y'], ordered=True, dtype=dtype2 + ... ) + Traceback (most recent call last): + ... + ValueError: Cannot specify `categories` or `ordered` together with + `dtype`. + + The supplied dtype takes precedence over values' dtype: + + >>> pd.CategoricalDtype._from_values_or_dtype(c, dtype=dtype2) + CategoricalDtype(categories=['x', 'y'], ordered=False) + """ + + if dtype is not None: + # The dtype argument takes precedence over values.dtype (if any) + if isinstance(dtype, str): + if dtype == "category": + dtype = CategoricalDtype(categories, ordered) + else: + raise ValueError(f"Unknown dtype {repr(dtype)}") + elif categories is not None or ordered is not None: + raise ValueError( + "Cannot specify `categories` or `ordered` together with `dtype`." + ) + elif not isinstance(dtype, CategoricalDtype): + raise ValueError(f"Cannot not construct CategoricalDtype from {dtype}") + elif cls.is_dtype(values): + # If no "dtype" was passed, use the one from "values", but honor + # the "ordered" and "categories" arguments + dtype = values.dtype._from_categorical_dtype( + values.dtype, categories, ordered + ) + else: + # If dtype=None and values is not categorical, create a new dtype. + # Note: This could potentially have categories=None and + # ordered=None. + dtype = CategoricalDtype(categories, ordered) + + return cast(CategoricalDtype, dtype) + + @classmethod + def construct_from_string(cls, string: str_type) -> CategoricalDtype: + """ + Construct a CategoricalDtype from a string. + + Parameters + ---------- + string : str + Must be the string "category" in order to be successfully constructed. + + Returns + ------- + CategoricalDtype + Instance of the dtype. + + Raises + ------ + TypeError + If a CategoricalDtype cannot be constructed from the input. + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + if string != cls.name: + raise TypeError(f"Cannot construct a 'CategoricalDtype' from '{string}'") + + # need ordered=None to ensure that operations specifying dtype="category" don't + # override the ordered value for existing categoricals + return cls(ordered=None) + + def _finalize(self, categories, ordered: Ordered, fastpath: bool = False) -> None: + + if ordered is not None: + self.validate_ordered(ordered) + + if categories is not None: + categories = self.validate_categories(categories, fastpath=fastpath) + + self._categories = categories + self._ordered = ordered + + def __setstate__(self, state: MutableMapping[str_type, Any]) -> None: + # for pickle compat. __get_state__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) + self._categories = state.pop("categories", None) + self._ordered = state.pop("ordered", False) + + def __hash__(self) -> int: + # _hash_categories returns a uint64, so use the negative + # space for when we have unknown categories to avoid a conflict + if self.categories is None: + if self.ordered: + return -1 + else: + return -2 + # We *do* want to include the real self.ordered here + return int(self._hash_categories) + + def __eq__(self, other: Any) -> bool: + """ + Rules for CDT equality: + 1) Any CDT is equal to the string 'category' + 2) Any CDT is equal to itself + 3) Any CDT is equal to a CDT with categories=None regardless of ordered + 4) A CDT with ordered=True is only equal to another CDT with + ordered=True and identical categories in the same order + 5) A CDT with ordered={False, None} is only equal to another CDT with + ordered={False, None} and identical categories, but same order is + not required. There is no distinction between False/None. + 6) Any other comparison returns False + """ + if isinstance(other, str): + return other == self.name + elif other is self: + return True + elif not (hasattr(other, "ordered") and hasattr(other, "categories")): + return False + elif self.categories is None or other.categories is None: + # For non-fully-initialized dtypes, these are only equal to + # - the string "category" (handled above) + # - other CategoricalDtype with categories=None + return self.categories is other.categories + elif self.ordered or other.ordered: + # At least one has ordered=True; equal if both have ordered=True + # and the same values for categories in the same order. + return (self.ordered == other.ordered) and self.categories.equals( + other.categories + ) + else: + # Neither has ordered=True; equal if both have the same categories, + # but same order is not necessary. There is no distinction between + # ordered=False and ordered=None: CDT(., False) and CDT(., None) + # will be equal if they have the same categories. + left = self.categories + right = other.categories + + # GH#36280 the ordering of checks here is for performance + if not left.dtype == right.dtype: + return False + + if len(left) != len(right): + return False + + if self.categories.equals(other.categories): + # Check and see if they happen to be identical categories + return True + + if left.dtype != object: + # Faster than calculating hash + indexer = left.get_indexer(right) + # Because left and right have the same length and are unique, + # `indexer` not having any -1s implies that there is a + # bijection between `left` and `right`. + return (indexer != -1).all() + + # With object-dtype we need a comparison that identifies + # e.g. int(2) as distinct from float(2) + return hash(self) == hash(other) + + def __repr__(self) -> str_type: + if self.categories is None: + data = "None" + else: + data = self.categories._format_data(name=type(self).__name__) + if data is None: + # self.categories is RangeIndex + data = str(self.categories._range) + data = data.rstrip(", ") + return f"CategoricalDtype(categories={data}, ordered={self.ordered})" + + @cache_readonly + def _hash_categories(self) -> int: + from pandas.core.util.hashing import ( + combine_hash_arrays, + hash_array, + hash_tuples, + ) + + categories = self.categories + ordered = self.ordered + + if len(categories) and isinstance(categories[0], tuple): + # assumes if any individual category is a tuple, then all our. ATM + # I don't really want to support just some of the categories being + # tuples. + cat_list = list(categories) # breaks if a np.array of categories + cat_array = hash_tuples(cat_list) + else: + if categories.dtype == "O" and len({type(x) for x in categories}) != 1: + # TODO: hash_array doesn't handle mixed types. It casts + # everything to a str first, which means we treat + # {'1', '2'} the same as {'1', 2} + # find a better solution + hashed = hash((tuple(categories), ordered)) + return hashed + + if DatetimeTZDtype.is_dtype(categories.dtype): + # Avoid future warning. + categories = categories.view("datetime64[ns]") + + cat_array = hash_array(np.asarray(categories), categorize=False) + if ordered: + cat_array = np.vstack( + [cat_array, np.arange(len(cat_array), dtype=cat_array.dtype)] + ) + else: + cat_array = np.array([cat_array]) + combined_hashed = combine_hash_arrays(iter(cat_array), num_items=len(cat_array)) + return np.bitwise_xor.reduce(combined_hashed) + + @classmethod + def construct_array_type(cls) -> type_t[Categorical]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas import Categorical + + return Categorical + + @staticmethod + def validate_ordered(ordered: Ordered) -> None: + """ + Validates that we have a valid ordered parameter. If + it is not a boolean, a TypeError will be raised. + + Parameters + ---------- + ordered : object + The parameter to be verified. + + Raises + ------ + TypeError + If 'ordered' is not a boolean. + """ + if not is_bool(ordered): + raise TypeError("'ordered' must either be 'True' or 'False'") + + @staticmethod + def validate_categories(categories, fastpath: bool = False) -> Index: + """ + Validates that we have good categories + + Parameters + ---------- + categories : array-like + fastpath : bool + Whether to skip nan and uniqueness checks + + Returns + ------- + categories : Index + """ + from pandas.core.indexes.base import Index + + if not fastpath and not is_list_like(categories): + raise TypeError( + f"Parameter 'categories' must be list-like, was {repr(categories)}" + ) + elif not isinstance(categories, ABCIndex): + categories = Index._with_infer(categories, tupleize_cols=False) + + if not fastpath: + + if categories.hasnans: + raise ValueError("Categorical categories cannot be null") + + if not categories.is_unique: + raise ValueError("Categorical categories must be unique") + + if isinstance(categories, ABCCategoricalIndex): + categories = categories.categories + + return categories + + def update_dtype(self, dtype: str_type | CategoricalDtype) -> CategoricalDtype: + """ + Returns a CategoricalDtype with categories and ordered taken from dtype + if specified, otherwise falling back to self if unspecified + + Parameters + ---------- + dtype : CategoricalDtype + + Returns + ------- + new_dtype : CategoricalDtype + """ + if isinstance(dtype, str) and dtype == "category": + # dtype='category' should not change anything + return self + elif not self.is_dtype(dtype): + raise ValueError( + f"a CategoricalDtype must be passed to perform an update, " + f"got {repr(dtype)}" + ) + else: + # from here on, dtype is a CategoricalDtype + dtype = cast(CategoricalDtype, dtype) + + # update categories/ordered unless they've been explicitly passed as None + new_categories = ( + dtype.categories if dtype.categories is not None else self.categories + ) + new_ordered = dtype.ordered if dtype.ordered is not None else self.ordered + + return CategoricalDtype(new_categories, new_ordered) + + @property + def categories(self) -> Index: + """ + An ``Index`` containing the unique categories allowed. + """ + return self._categories + + @property + def ordered(self) -> Ordered: + """ + Whether the categories have an ordered relationship. + """ + return self._ordered + + @property + def _is_boolean(self) -> bool: + from pandas.core.dtypes.common import is_bool_dtype + + return is_bool_dtype(self.categories) + + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + from pandas.core.arrays.sparse import SparseDtype + + # check if we have all categorical dtype with identical categories + if all(isinstance(x, CategoricalDtype) for x in dtypes): + first = dtypes[0] + if all(first == other for other in dtypes[1:]): + return first + + # special case non-initialized categorical + # TODO we should figure out the expected return value in general + non_init_cats = [ + isinstance(x, CategoricalDtype) and x.categories is None for x in dtypes + ] + if all(non_init_cats): + return self + elif any(non_init_cats): + return None + + # categorical is aware of Sparse -> extract sparse subdtypes + dtypes = [x.subtype if isinstance(x, SparseDtype) else x for x in dtypes] + # extract the categories' dtype + non_cat_dtypes = [ + x.categories.dtype if isinstance(x, CategoricalDtype) else x for x in dtypes + ] + # TODO should categorical always give an answer? + from pandas.core.dtypes.cast import find_common_type + + return find_common_type(non_cat_dtypes) + + +@register_extension_dtype +class DatetimeTZDtype(PandasExtensionDtype): + """ + An ExtensionDtype for timezone-aware datetime data. + + **This is not an actual numpy dtype**, but a duck type. + + Parameters + ---------- + unit : str, default "ns" + The precision of the datetime data. Currently limited + to ``"ns"``. + tz : str, int, or datetime.tzinfo + The timezone. + + Attributes + ---------- + unit + tz + + Methods + ------- + None + + Raises + ------ + pytz.UnknownTimeZoneError + When the requested timezone cannot be found. + + Examples + -------- + >>> pd.DatetimeTZDtype(tz='UTC') + datetime64[ns, UTC] + + >>> pd.DatetimeTZDtype(tz='dateutil/US/Central') + datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')] + """ + + type: type[Timestamp] = Timestamp + kind: str_type = "M" + str = "|M8[ns]" + num = 101 + base = np.dtype("M8[ns]") + na_value = NaT + _metadata = ("unit", "tz") + _match = re.compile(r"(datetime64|M8)\[(?P.+), (?P.+)\]") + _cache_dtypes: dict[str_type, PandasExtensionDtype] = {} + + def __init__(self, unit: str_type | DatetimeTZDtype = "ns", tz=None): + if isinstance(unit, DatetimeTZDtype): + # error: "str" has no attribute "tz" + unit, tz = unit.unit, unit.tz # type: ignore[attr-defined] + + if unit != "ns": + if isinstance(unit, str) and tz is None: + # maybe a string like datetime64[ns, tz], which we support for + # now. + result = type(self).construct_from_string(unit) + unit = result.unit + tz = result.tz + msg = ( + f"Passing a dtype alias like 'datetime64[ns, {tz}]' " + "to DatetimeTZDtype is no longer supported. Use " + "'DatetimeTZDtype.construct_from_string()' instead." + ) + raise ValueError(msg) + else: + raise ValueError("DatetimeTZDtype only supports ns units") + + if tz: + tz = timezones.maybe_get_tz(tz) + tz = timezones.tz_standardize(tz) + elif tz is not None: + raise pytz.UnknownTimeZoneError(tz) + if tz is None: + raise TypeError("A 'tz' is required.") + + self._unit = unit + self._tz = tz + + @property + def unit(self) -> str_type: + """ + The precision of the datetime data. + """ + return self._unit + + @property + def tz(self) -> tzinfo: + """ + The timezone. + """ + return self._tz + + @classmethod + def construct_array_type(cls) -> type_t[DatetimeArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays import DatetimeArray + + return DatetimeArray + + @classmethod + def construct_from_string(cls, string: str_type) -> DatetimeTZDtype: + """ + Construct a DatetimeTZDtype from a string. + + Parameters + ---------- + string : str + The string alias for this DatetimeTZDtype. + Should be formatted like ``datetime64[ns, ]``, + where ```` is the timezone name. + + Examples + -------- + >>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]') + datetime64[ns, UTC] + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + + msg = f"Cannot construct a 'DatetimeTZDtype' from '{string}'" + match = cls._match.match(string) + if match: + d = match.groupdict() + try: + return cls(unit=d["unit"], tz=d["tz"]) + except (KeyError, TypeError, ValueError) as err: + # KeyError if maybe_get_tz tries and fails to get a + # pytz timezone (actually pytz.UnknownTimeZoneError). + # TypeError if we pass a nonsense tz; + # ValueError if we pass a unit other than "ns" + raise TypeError(msg) from err + raise TypeError(msg) + + def __str__(self) -> str_type: + return f"datetime64[{self.unit}, {self.tz}]" + + @property + def name(self) -> str_type: + """A string representation of the dtype.""" + return str(self) + + def __hash__(self) -> int: + # make myself hashable + # TODO: update this. + return hash(str(self)) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, str): + if other.startswith("M8["): + other = "datetime64[" + other[3:] + return other == self.name + + return ( + isinstance(other, DatetimeTZDtype) + and self.unit == other.unit + and tz_compare(self.tz, other.tz) + ) + + def __setstate__(self, state) -> None: + # for pickle compat. __get_state__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) + self._tz = state["tz"] + self._unit = state["unit"] + + +@register_extension_dtype +class PeriodDtype(dtypes.PeriodDtypeBase, PandasExtensionDtype): + """ + An ExtensionDtype for Period data. + + **This is not an actual numpy dtype**, but a duck type. + + Parameters + ---------- + freq : str or DateOffset + The frequency of this PeriodDtype. + + Attributes + ---------- + freq + + Methods + ------- + None + + Examples + -------- + >>> pd.PeriodDtype(freq='D') + period[D] + + >>> pd.PeriodDtype(freq=pd.offsets.MonthEnd()) + period[M] + """ + + type: type[Period] = Period + kind: str_type = "O" + str = "|O08" + base = np.dtype("O") + num = 102 + _metadata = ("freq",) + _match = re.compile(r"(P|p)eriod\[(?P.+)\]") + _cache_dtypes: dict[str_type, PandasExtensionDtype] = {} + + def __new__(cls, freq=None): + """ + Parameters + ---------- + freq : frequency + """ + if isinstance(freq, PeriodDtype): + return freq + + elif freq is None: + # empty constructor for pickle compat + # -10_000 corresponds to PeriodDtypeCode.UNDEFINED + u = dtypes.PeriodDtypeBase.__new__(cls, -10_000) + u._freq = None + return u + + if not isinstance(freq, BaseOffset): + freq = cls._parse_dtype_strict(freq) + + try: + return cls._cache_dtypes[freq.freqstr] + except KeyError: + dtype_code = freq._period_dtype_code + u = dtypes.PeriodDtypeBase.__new__(cls, dtype_code) + u._freq = freq + cls._cache_dtypes[freq.freqstr] = u + return u + + def __reduce__(self): + return type(self), (self.freq,) + + @property + def freq(self): + """ + The frequency object of this PeriodDtype. + """ + return self._freq + + @classmethod + def _parse_dtype_strict(cls, freq: str_type) -> BaseOffset: + if isinstance(freq, str): # note: freq is already of type str! + if freq.startswith("period[") or freq.startswith("Period["): + m = cls._match.search(freq) + if m is not None: + freq = m.group("freq") + + freq_offset = to_offset(freq) + if freq_offset is not None: + return freq_offset + + raise ValueError("could not construct PeriodDtype") + + @classmethod + def construct_from_string(cls, string: str_type) -> PeriodDtype: + """ + Strict construction from a string, raise a TypeError if not + possible + """ + if ( + isinstance(string, str) + and (string.startswith("period[") or string.startswith("Period[")) + or isinstance(string, BaseOffset) + ): + # do not parse string like U as period[U] + # avoid tuple to be regarded as freq + try: + return cls(freq=string) + except ValueError: + pass + if isinstance(string, str): + msg = f"Cannot construct a 'PeriodDtype' from '{string}'" + else: + msg = f"'construct_from_string' expects a string, got {type(string)}" + raise TypeError(msg) + + def __str__(self) -> str_type: + return self.name + + @property + def name(self) -> str_type: + return f"period[{self.freq.freqstr}]" + + @property + def na_value(self): + return NaT + + def __hash__(self) -> int: + # make myself hashable + return hash(str(self)) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, str): + return other in [self.name, self.name.title()] + + elif isinstance(other, PeriodDtype): + + # For freqs that can be held by a PeriodDtype, this check is + # equivalent to (and much faster than) self.freq == other.freq + sfreq = self.freq + ofreq = other.freq + return ( + sfreq.n == ofreq.n + and sfreq._period_dtype_code == ofreq._period_dtype_code + ) + + return False + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) + + def __setstate__(self, state): + # for pickle compat. __getstate__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) + self._freq = state["freq"] + + @classmethod + def is_dtype(cls, dtype: object) -> bool: + """ + Return a boolean if we if the passed type is an actual dtype that we + can match (via string or type) + """ + if isinstance(dtype, str): + # PeriodDtype can be instantiated from freq string like "U", + # but doesn't regard freq str like "U" as dtype. + if dtype.startswith("period[") or dtype.startswith("Period["): + try: + if cls._parse_dtype_strict(dtype) is not None: + return True + else: + return False + except ValueError: + return False + else: + return False + return super().is_dtype(dtype) + + @classmethod + def construct_array_type(cls) -> type_t[PeriodArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays import PeriodArray + + return PeriodArray + + def __from_arrow__( + self, array: pyarrow.Array | pyarrow.ChunkedArray + ) -> PeriodArray: + """ + Construct PeriodArray from pyarrow Array/ChunkedArray. + """ + import pyarrow + + from pandas.core.arrays import PeriodArray + from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + chunks = array.chunks + + results = [] + for arr in chunks: + data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype="int64") + parr = PeriodArray(data.copy(), freq=self.freq, copy=False) + parr[~mask] = NaT + results.append(parr) + + if not results: + return PeriodArray(np.array([], dtype="int64"), freq=self.freq, copy=False) + return PeriodArray._concat_same_type(results) + + +@register_extension_dtype +class IntervalDtype(PandasExtensionDtype): + """ + An ExtensionDtype for Interval data. + + **This is not an actual numpy dtype**, but a duck type. + + Parameters + ---------- + subtype : str, np.dtype + The dtype of the Interval bounds. + + Attributes + ---------- + subtype + + Methods + ------- + None + + Examples + -------- + >>> pd.IntervalDtype(subtype='int64', closed='both') + interval[int64, both] + """ + + name = "interval" + kind: str_type = "O" + str = "|O08" + base = np.dtype("O") + num = 103 + _metadata = ( + "subtype", + "closed", + ) + _match = re.compile( + r"(I|i)nterval\[(?P[^,]+)(, (?P(right|left|both|neither)))?\]" + ) + _cache_dtypes: dict[str_type, PandasExtensionDtype] = {} + + def __new__(cls, subtype=None, closed: str_type | None = None): + from pandas.core.dtypes.common import ( + is_string_dtype, + pandas_dtype, + ) + + if closed is not None and closed not in {"right", "left", "both", "neither"}: + raise ValueError("closed must be one of 'right', 'left', 'both', 'neither'") + + if isinstance(subtype, IntervalDtype): + if closed is not None and closed != subtype.closed: + raise ValueError( + "dtype.closed and 'closed' do not match. " + "Try IntervalDtype(dtype.subtype, closed) instead." + ) + return subtype + elif subtype is None: + # we are called as an empty constructor + # generally for pickle compat + u = object.__new__(cls) + u._subtype = None + u._closed = closed + return u + elif isinstance(subtype, str) and subtype.lower() == "interval": + subtype = None + else: + if isinstance(subtype, str): + m = cls._match.search(subtype) + if m is not None: + gd = m.groupdict() + subtype = gd["subtype"] + if gd.get("closed", None) is not None: + if closed is not None: + if closed != gd["closed"]: + raise ValueError( + "'closed' keyword does not match value " + "specified in dtype string" + ) + closed = gd["closed"] + + try: + subtype = pandas_dtype(subtype) + except TypeError as err: + raise TypeError("could not construct IntervalDtype") from err + + if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype): + # GH 19016 + msg = ( + "category, object, and string subtypes are not supported " + "for IntervalDtype" + ) + raise TypeError(msg) + + key = str(subtype) + str(closed) + try: + return cls._cache_dtypes[key] + except KeyError: + u = object.__new__(cls) + u._subtype = subtype + u._closed = closed + cls._cache_dtypes[key] = u + return u + + @property + def closed(self): + return self._closed + + @property + def subtype(self): + """ + The dtype of the Interval bounds. + """ + return self._subtype + + @classmethod + def construct_array_type(cls) -> type[IntervalArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays import IntervalArray + + return IntervalArray + + @classmethod + def construct_from_string(cls, string: str_type) -> IntervalDtype: + """ + attempt to construct this type from a string, raise a TypeError + if its not possible + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + + if string.lower() == "interval" or cls._match.search(string) is not None: + return cls(string) + + msg = ( + f"Cannot construct a 'IntervalDtype' from '{string}'.\n\n" + "Incorrectly formatted string passed to constructor. " + "Valid formats include Interval or Interval[dtype] " + "where dtype is numeric, datetime, or timedelta" + ) + raise TypeError(msg) + + @property + def type(self): + return Interval + + def __str__(self) -> str_type: + if self.subtype is None: + return "interval" + if self.closed is None: + # Only partially initialized GH#38394 + return f"interval[{self.subtype}]" + return f"interval[{self.subtype}, {self.closed}]" + + def __hash__(self) -> int: + # make myself hashable + return hash(str(self)) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, str): + return other.lower() in (self.name.lower(), str(self).lower()) + elif not isinstance(other, IntervalDtype): + return False + elif self.subtype is None or other.subtype is None: + # None should match any subtype + return True + elif self.closed != other.closed: + return False + else: + from pandas.core.dtypes.common import is_dtype_equal + + return is_dtype_equal(self.subtype, other.subtype) + + def __setstate__(self, state): + # for pickle compat. __get_state__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) + self._subtype = state["subtype"] + + # backward-compat older pickles won't have "closed" key + self._closed = state.pop("closed", None) + + @classmethod + def is_dtype(cls, dtype: object) -> bool: + """ + Return a boolean if we if the passed type is an actual dtype that we + can match (via string or type) + """ + if isinstance(dtype, str): + if dtype.lower().startswith("interval"): + try: + if cls.construct_from_string(dtype) is not None: + return True + else: + return False + except (ValueError, TypeError): + return False + else: + return False + return super().is_dtype(dtype) + + def __from_arrow__( + self, array: pyarrow.Array | pyarrow.ChunkedArray + ) -> IntervalArray: + """ + Construct IntervalArray from pyarrow Array/ChunkedArray. + """ + import pyarrow + + from pandas.core.arrays import IntervalArray + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + chunks = array.chunks + + results = [] + for arr in chunks: + left = np.asarray(arr.storage.field("left"), dtype=self.subtype) + right = np.asarray(arr.storage.field("right"), dtype=self.subtype) + iarr = IntervalArray.from_arrays(left, right, closed=array.type.closed) + results.append(iarr) + + if not results: + return IntervalArray.from_arrays( + np.array([], dtype=self.subtype), + np.array([], dtype=self.subtype), + closed=array.type.closed, + ) + return IntervalArray._concat_same_type(results) + + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + if not all(isinstance(x, IntervalDtype) for x in dtypes): + return None + + closed = cast("IntervalDtype", dtypes[0]).closed + if not all(cast("IntervalDtype", x).closed == closed for x in dtypes): + return np.dtype(object) + + from pandas.core.dtypes.cast import find_common_type + + common = find_common_type([cast("IntervalDtype", x).subtype for x in dtypes]) + if common == object: + return np.dtype(object) + return IntervalDtype(common, closed=closed) + + +class PandasDtype(ExtensionDtype): + """ + A Pandas ExtensionDtype for NumPy dtypes. + + This is mostly for internal compatibility, and is not especially + useful on its own. + + Parameters + ---------- + dtype : object + Object to be converted to a NumPy data type object. + + See Also + -------- + numpy.dtype + """ + + _metadata = ("_dtype",) + + def __init__(self, dtype: npt.DTypeLike | PandasDtype | None): + if isinstance(dtype, PandasDtype): + # make constructor univalent + dtype = dtype.numpy_dtype + self._dtype = np.dtype(dtype) + + def __repr__(self) -> str: + return f"PandasDtype({repr(self.name)})" + + @property + def numpy_dtype(self) -> np.dtype: + """ + The NumPy dtype this PandasDtype wraps. + """ + return self._dtype + + @property + def name(self) -> str: + """ + A bit-width name for this data-type. + """ + return self._dtype.name + + @property + def type(self) -> type[np.generic]: + """ + The type object used to instantiate a scalar of this NumPy data-type. + """ + return self._dtype.type + + @property + def _is_numeric(self) -> bool: + # exclude object, str, unicode, void. + return self.kind in set("biufc") + + @property + def _is_boolean(self) -> bool: + return self.kind == "b" + + @classmethod + def construct_from_string(cls, string: str) -> PandasDtype: + try: + dtype = np.dtype(string) + except TypeError as err: + if not isinstance(string, str): + msg = f"'construct_from_string' expects a string, got {type(string)}" + else: + msg = f"Cannot construct a 'PandasDtype' from '{string}'" + raise TypeError(msg) from err + return cls(dtype) + + @classmethod + def construct_array_type(cls) -> type_t[PandasArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays import PandasArray + + return PandasArray + + @property + def kind(self) -> str: + """ + A character code (one of 'biufcmMOSUV') identifying the general kind of data. + """ + return self._dtype.kind + + @property + def itemsize(self) -> int: + """ + The element size of this data-type object. + """ + return self._dtype.itemsize diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/generic.py b/.local/lib/python3.8/site-packages/pandas/core/dtypes/generic.py new file mode 100644 index 0000000..d6dbc83 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/dtypes/generic.py @@ -0,0 +1,155 @@ +""" define generic base classes for pandas objects """ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Type, + cast, +) + +if TYPE_CHECKING: + from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + DatetimeIndex, + Float64Index, + Index, + Int64Index, + IntervalIndex, + MultiIndex, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, + UInt64Index, + ) + from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + PandasArray, + PeriodArray, + TimedeltaArray, + ) + from pandas.core.generic import NDFrame + + +# define abstract base classes to enable isinstance type checking on our +# objects +def create_pandas_abc_type(name, attr, comp): + + # https://github.com/python/mypy/issues/1006 + # error: 'classmethod' used with a non-method + @classmethod # type: ignore[misc] + def _check(cls, inst) -> bool: + return getattr(inst, attr, "_typ") in comp + + dct = {"__instancecheck__": _check, "__subclasscheck__": _check} + meta = type("ABCBase", (type,), dct) + return meta(name, (), dct) + + +ABCInt64Index = cast( + "Type[Int64Index]", + create_pandas_abc_type("ABCInt64Index", "_typ", ("int64index",)), +) +ABCUInt64Index = cast( + "Type[UInt64Index]", + create_pandas_abc_type("ABCUInt64Index", "_typ", ("uint64index",)), +) +ABCRangeIndex = cast( + "Type[RangeIndex]", + create_pandas_abc_type("ABCRangeIndex", "_typ", ("rangeindex",)), +) +ABCFloat64Index = cast( + "Type[Float64Index]", + create_pandas_abc_type("ABCFloat64Index", "_typ", ("float64index",)), +) +ABCMultiIndex = cast( + "Type[MultiIndex]", + create_pandas_abc_type("ABCMultiIndex", "_typ", ("multiindex",)), +) +ABCDatetimeIndex = cast( + "Type[DatetimeIndex]", + create_pandas_abc_type("ABCDatetimeIndex", "_typ", ("datetimeindex",)), +) +ABCTimedeltaIndex = cast( + "Type[TimedeltaIndex]", + create_pandas_abc_type("ABCTimedeltaIndex", "_typ", ("timedeltaindex",)), +) +ABCPeriodIndex = cast( + "Type[PeriodIndex]", + create_pandas_abc_type("ABCPeriodIndex", "_typ", ("periodindex",)), +) +ABCCategoricalIndex = cast( + "Type[CategoricalIndex]", + create_pandas_abc_type("ABCCategoricalIndex", "_typ", ("categoricalindex",)), +) +ABCIntervalIndex = cast( + "Type[IntervalIndex]", + create_pandas_abc_type("ABCIntervalIndex", "_typ", ("intervalindex",)), +) +ABCIndex = cast( + "Type[Index]", + create_pandas_abc_type( + "ABCIndex", + "_typ", + { + "index", + "int64index", + "rangeindex", + "float64index", + "uint64index", + "numericindex", + "multiindex", + "datetimeindex", + "timedeltaindex", + "periodindex", + "categoricalindex", + "intervalindex", + }, + ), +) + + +ABCNDFrame = cast( + "Type[NDFrame]", + create_pandas_abc_type("ABCNDFrame", "_typ", ("series", "dataframe")), +) +ABCSeries = cast( + "Type[Series]", + create_pandas_abc_type("ABCSeries", "_typ", ("series",)), +) +ABCDataFrame = cast( + "Type[DataFrame]", create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",)) +) + +ABCCategorical = cast( + "Type[Categorical]", + create_pandas_abc_type("ABCCategorical", "_typ", ("categorical")), +) +ABCDatetimeArray = cast( + "Type[DatetimeArray]", + create_pandas_abc_type("ABCDatetimeArray", "_typ", ("datetimearray")), +) +ABCTimedeltaArray = cast( + "Type[TimedeltaArray]", + create_pandas_abc_type("ABCTimedeltaArray", "_typ", ("timedeltaarray")), +) +ABCPeriodArray = cast( + "Type[PeriodArray]", + create_pandas_abc_type("ABCPeriodArray", "_typ", ("periodarray",)), +) +ABCExtensionArray = cast( + "Type[ExtensionArray]", + create_pandas_abc_type( + "ABCExtensionArray", + "_typ", + # Note: IntervalArray and SparseArray are included bc they have _typ="extension" + {"extension", "categorical", "periodarray", "datetimearray", "timedeltaarray"}, + ), +) +ABCPandasArray = cast( + "Type[PandasArray]", + create_pandas_abc_type("ABCPandasArray", "_typ", ("npy_extension",)), +) diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/inference.py b/.local/lib/python3.8/site-packages/pandas/core/dtypes/inference.py new file mode 100644 index 0000000..1f1486b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/dtypes/inference.py @@ -0,0 +1,451 @@ +""" basic inference routines """ + +from collections import abc +from numbers import Number +import re +from typing import Pattern + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ArrayLike + +is_bool = lib.is_bool + +is_integer = lib.is_integer + +is_float = lib.is_float + +is_complex = lib.is_complex + +is_scalar = lib.is_scalar + +is_decimal = lib.is_decimal + +is_interval = lib.is_interval + +is_list_like = lib.is_list_like + +is_iterator = lib.is_iterator + + +def is_number(obj) -> bool: + """ + Check if the object is a number. + + Returns True when the object is a number, and False if is not. + + Parameters + ---------- + obj : any type + The object to check if is a number. + + Returns + ------- + is_number : bool + Whether `obj` is a number or not. + + See Also + -------- + api.types.is_integer: Checks a subgroup of numbers. + + Examples + -------- + >>> from pandas.api.types import is_number + >>> is_number(1) + True + >>> is_number(7.15) + True + + Booleans are valid because they are int subclass. + + >>> is_number(False) + True + + >>> is_number("foo") + False + >>> is_number("5") + False + """ + return isinstance(obj, (Number, np.number)) + + +def iterable_not_string(obj) -> bool: + """ + Check if the object is an iterable but not a string. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_iter_not_string : bool + Whether `obj` is a non-string iterable. + + Examples + -------- + >>> iterable_not_string([1, 2, 3]) + True + >>> iterable_not_string("foo") + False + >>> iterable_not_string(1) + False + """ + return isinstance(obj, abc.Iterable) and not isinstance(obj, str) + + +def is_file_like(obj) -> bool: + """ + Check if the object is a file-like object. + + For objects to be considered file-like, they must + be an iterator AND have either a `read` and/or `write` + method as an attribute. + + Note: file-like objects must be iterable, but + iterable objects need not be file-like. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_file_like : bool + Whether `obj` has file-like properties. + + Examples + -------- + >>> import io + >>> buffer = io.StringIO("data") + >>> is_file_like(buffer) + True + >>> is_file_like([1, 2, 3]) + False + """ + if not (hasattr(obj, "read") or hasattr(obj, "write")): + return False + + return bool(hasattr(obj, "__iter__")) + + +def is_re(obj) -> bool: + """ + Check if the object is a regex pattern instance. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_regex : bool + Whether `obj` is a regex pattern. + + Examples + -------- + >>> is_re(re.compile(".*")) + True + >>> is_re("foo") + False + """ + return isinstance(obj, Pattern) + + +def is_re_compilable(obj) -> bool: + """ + Check if the object can be compiled into a regex pattern instance. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_regex_compilable : bool + Whether `obj` can be compiled as a regex pattern. + + Examples + -------- + >>> is_re_compilable(".*") + True + >>> is_re_compilable(1) + False + """ + try: + re.compile(obj) + except TypeError: + return False + else: + return True + + +def is_array_like(obj) -> bool: + """ + Check if the object is array-like. + + For an object to be considered array-like, it must be list-like and + have a `dtype` attribute. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_array_like : bool + Whether `obj` has array-like properties. + + Examples + -------- + >>> is_array_like(np.array([1, 2, 3])) + True + >>> is_array_like(pd.Series(["a", "b"])) + True + >>> is_array_like(pd.Index(["2016-01-01"])) + True + >>> is_array_like([1, 2, 3]) + False + >>> is_array_like(("a", "b")) + False + """ + return is_list_like(obj) and hasattr(obj, "dtype") + + +def is_nested_list_like(obj) -> bool: + """ + Check if the object is list-like, and that all of its elements + are also list-like. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_list_like : bool + Whether `obj` has list-like properties. + + Examples + -------- + >>> is_nested_list_like([[1, 2, 3]]) + True + >>> is_nested_list_like([{1, 2, 3}, {1, 2, 3}]) + True + >>> is_nested_list_like(["foo"]) + False + >>> is_nested_list_like([]) + False + >>> is_nested_list_like([[1, 2, 3], 1]) + False + + Notes + ----- + This won't reliably detect whether a consumable iterator (e. g. + a generator) is a nested-list-like without consuming the iterator. + To avoid consuming it, we always return False if the outer container + doesn't define `__len__`. + + See Also + -------- + is_list_like + """ + return ( + is_list_like(obj) + and hasattr(obj, "__len__") + and len(obj) > 0 + and all(is_list_like(item) for item in obj) + ) + + +def is_dict_like(obj) -> bool: + """ + Check if the object is dict-like. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_dict_like : bool + Whether `obj` has dict-like properties. + + Examples + -------- + >>> is_dict_like({1: 2}) + True + >>> is_dict_like([1, 2, 3]) + False + >>> is_dict_like(dict) + False + >>> is_dict_like(dict()) + True + """ + dict_like_attrs = ("__getitem__", "keys", "__contains__") + return ( + all(hasattr(obj, attr) for attr in dict_like_attrs) + # [GH 25196] exclude classes + and not isinstance(obj, type) + ) + + +def is_named_tuple(obj) -> bool: + """ + Check if the object is a named tuple. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_named_tuple : bool + Whether `obj` is a named tuple. + + Examples + -------- + >>> from collections import namedtuple + >>> Point = namedtuple("Point", ["x", "y"]) + >>> p = Point(1, 2) + >>> + >>> is_named_tuple(p) + True + >>> is_named_tuple((1, 2)) + False + """ + return isinstance(obj, abc.Sequence) and hasattr(obj, "_fields") + + +def is_hashable(obj) -> bool: + """ + Return True if hash(obj) will succeed, False otherwise. + + Some types will pass a test against collections.abc.Hashable but fail when + they are actually hashed with hash(). + + Distinguish between these and other types by trying the call to hash() and + seeing if they raise TypeError. + + Returns + ------- + bool + + Examples + -------- + >>> import collections + >>> a = ([],) + >>> isinstance(a, collections.abc.Hashable) + True + >>> is_hashable(a) + False + """ + # Unfortunately, we can't use isinstance(obj, collections.abc.Hashable), + # which can be faster than calling hash. That is because numpy scalars + # fail this test. + + # Reconsider this decision once this numpy bug is fixed: + # https://github.com/numpy/numpy/issues/5562 + + try: + hash(obj) + except TypeError: + return False + else: + return True + + +def is_sequence(obj) -> bool: + """ + Check if the object is a sequence of objects. + String types are not included as sequences here. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_sequence : bool + Whether `obj` is a sequence of objects. + + Examples + -------- + >>> l = [1, 2, 3] + >>> + >>> is_sequence(l) + True + >>> is_sequence(iter(l)) + False + """ + try: + iter(obj) # Can iterate over it. + len(obj) # Has a length associated with it. + return not isinstance(obj, (str, bytes)) + except (TypeError, AttributeError): + return False + + +def is_dataclass(item): + """ + Checks if the object is a data-class instance + + Parameters + ---------- + item : object + + Returns + -------- + is_dataclass : bool + True if the item is an instance of a data-class, + will return false if you pass the data class itself + + Examples + -------- + >>> from dataclasses import dataclass + >>> @dataclass + ... class Point: + ... x: int + ... y: int + + >>> is_dataclass(Point) + False + >>> is_dataclass(Point(0,2)) + True + + """ + try: + from dataclasses import is_dataclass + + return is_dataclass(item) and not isinstance(item, type) + except ImportError: + return False + + +def is_inferred_bool_dtype(arr: ArrayLike) -> bool: + """ + Check if this is a ndarray[bool] or an ndarray[object] of bool objects. + + Parameters + ---------- + arr : np.ndarray or ExtensionArray + + Returns + ------- + bool + + Notes + ----- + This does not include the special treatment is_bool_dtype uses for + Categorical. + """ + if not isinstance(arr, np.ndarray): + return False + + dtype = arr.dtype + if dtype == np.dtype(bool): + return True + elif dtype == np.dtype("object"): + return lib.is_bool_array(arr) + return False diff --git a/.local/lib/python3.8/site-packages/pandas/core/dtypes/missing.py b/.local/lib/python3.8/site-packages/pandas/core/dtypes/missing.py new file mode 100644 index 0000000..dd3fcb2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/dtypes/missing.py @@ -0,0 +1,670 @@ +""" +missing types & inference +""" +from __future__ import annotations + +from decimal import Decimal +from functools import partial + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import lib +import pandas._libs.missing as libmissing +from pandas._libs.tslibs import ( + NaT, + iNaT, +) +from pandas._typing import ( + ArrayLike, + DtypeObj, + npt, +) + +from pandas.core.dtypes.common import ( + DT64NS_DTYPE, + TD64NS_DTYPE, + ensure_object, + is_bool_dtype, + is_categorical_dtype, + is_complex_dtype, + is_datetimelike_v_numeric, + is_dtype_equal, + is_extension_array_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + is_scalar, + is_string_or_object_np_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + ExtensionDtype, + IntervalDtype, + PeriodDtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCExtensionArray, + ABCIndex, + ABCMultiIndex, + ABCSeries, +) +from pandas.core.dtypes.inference import is_list_like + +isposinf_scalar = libmissing.isposinf_scalar +isneginf_scalar = libmissing.isneginf_scalar + +nan_checker = np.isnan +INF_AS_NA = False +_dtype_object = np.dtype("object") +_dtype_str = np.dtype(str) + + +def isna(obj): + """ + Detect missing values for an array-like object. + + This function takes a scalar or array-like object and indicates + whether values are missing (``NaN`` in numeric arrays, ``None`` or ``NaN`` + in object arrays, ``NaT`` in datetimelike). + + Parameters + ---------- + obj : scalar or array-like + Object to check for null or missing values. + + Returns + ------- + bool or array-like of bool + For scalar input, returns a scalar boolean. + For array input, returns an array of boolean indicating whether each + corresponding element is missing. + + See Also + -------- + notna : Boolean inverse of pandas.isna. + Series.isna : Detect missing values in a Series. + DataFrame.isna : Detect missing values in a DataFrame. + Index.isna : Detect missing values in an Index. + + Examples + -------- + Scalar arguments (including strings) result in a scalar boolean. + + >>> pd.isna('dog') + False + + >>> pd.isna(pd.NA) + True + + >>> pd.isna(np.nan) + True + + ndarrays result in an ndarray of booleans. + + >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) + >>> array + array([[ 1., nan, 3.], + [ 4., 5., nan]]) + >>> pd.isna(array) + array([[False, True, False], + [False, False, True]]) + + For indexes, an ndarray of booleans is returned. + + >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, + ... "2017-07-08"]) + >>> index + DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], + dtype='datetime64[ns]', freq=None) + >>> pd.isna(index) + array([False, False, True, False]) + + For Series and DataFrame, the same type is returned, containing booleans. + + >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) + >>> df + 0 1 2 + 0 ant bee cat + 1 dog None fly + >>> pd.isna(df) + 0 1 2 + 0 False False False + 1 False True False + + >>> pd.isna(df[1]) + 0 False + 1 True + Name: 1, dtype: bool + """ + return _isna(obj) + + +isnull = isna + + +def _isna(obj, inf_as_na: bool = False): + """ + Detect missing values, treating None, NaN or NA as null. Infinite + values will also be treated as null if inf_as_na is True. + + Parameters + ---------- + obj: ndarray or object value + Input array or scalar value. + inf_as_na: bool + Whether to treat infinity as null. + + Returns + ------- + boolean ndarray or boolean + """ + if is_scalar(obj): + return libmissing.checknull(obj, inf_as_na=inf_as_na) + elif isinstance(obj, ABCMultiIndex): + raise NotImplementedError("isna is not defined for MultiIndex") + elif isinstance(obj, type): + return False + elif isinstance(obj, (np.ndarray, ABCExtensionArray)): + return _isna_array(obj, inf_as_na=inf_as_na) + elif isinstance(obj, ABCIndex): + # Try to use cached isna, which also short-circuits for integer dtypes + # and avoids materializing RangeIndex._values + if not obj._can_hold_na: + return obj.isna() + return _isna_array(obj._values, inf_as_na=inf_as_na) + + elif isinstance(obj, ABCSeries): + result = _isna_array(obj._values, inf_as_na=inf_as_na) + # box + result = obj._constructor(result, index=obj.index, name=obj.name, copy=False) + return result + elif isinstance(obj, ABCDataFrame): + return obj.isna() + elif isinstance(obj, list): + return _isna_array(np.asarray(obj, dtype=object), inf_as_na=inf_as_na) + elif hasattr(obj, "__array__"): + return _isna_array(np.asarray(obj), inf_as_na=inf_as_na) + else: + return False + + +def _use_inf_as_na(key): + """ + Option change callback for na/inf behaviour. + + Choose which replacement for numpy.isnan / -numpy.isfinite is used. + + Parameters + ---------- + flag: bool + True means treat None, NaN, INF, -INF as null (old way), + False means None and NaN are null, but INF, -INF are not null + (new way). + + Notes + ----- + This approach to setting global module values is discussed and + approved here: + + * https://stackoverflow.com/questions/4859217/ + programmatically-creating-variables-in-python/4859312#4859312 + """ + inf_as_na = get_option(key) + globals()["_isna"] = partial(_isna, inf_as_na=inf_as_na) + if inf_as_na: + globals()["nan_checker"] = lambda x: ~np.isfinite(x) + globals()["INF_AS_NA"] = True + else: + globals()["nan_checker"] = np.isnan + globals()["INF_AS_NA"] = False + + +def _isna_array(values: ArrayLike, inf_as_na: bool = False): + """ + Return an array indicating which values of the input array are NaN / NA. + + Parameters + ---------- + obj: ndarray or ExtensionArray + The input array whose elements are to be checked. + inf_as_na: bool + Whether or not to treat infinite values as NA. + + Returns + ------- + array-like + Array of boolean values denoting the NA status of each element. + """ + dtype = values.dtype + + if not isinstance(values, np.ndarray): + # i.e. ExtensionArray + if inf_as_na and is_categorical_dtype(dtype): + result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na) + else: + # error: Incompatible types in assignment (expression has type + # "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has + # type "ndarray[Any, dtype[bool_]]") + result = values.isna() # type: ignore[assignment] + elif is_string_or_object_np_dtype(values.dtype): + result = _isna_string_dtype(values, inf_as_na=inf_as_na) + elif needs_i8_conversion(dtype): + # this is the NaT pattern + result = values.view("i8") == iNaT + else: + if inf_as_na: + result = ~np.isfinite(values) + else: + result = np.isnan(values) + + return result + + +def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bool_]: + # Working around NumPy ticket 1542 + dtype = values.dtype + + if dtype.kind in ("S", "U"): + result = np.zeros(values.shape, dtype=bool) + else: + + if values.ndim == 1: + result = libmissing.isnaobj(values, inf_as_na=inf_as_na) + elif values.ndim == 2: + result = libmissing.isnaobj2d(values, inf_as_na=inf_as_na) + else: + # 0-D, reached via e.g. mask_missing + result = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na) + result = result.reshape(values.shape) + + return result + + +def notna(obj): + """ + Detect non-missing values for an array-like object. + + This function takes a scalar or array-like object and indicates + whether values are valid (not missing, which is ``NaN`` in numeric + arrays, ``None`` or ``NaN`` in object arrays, ``NaT`` in datetimelike). + + Parameters + ---------- + obj : array-like or object value + Object to check for *not* null or *non*-missing values. + + Returns + ------- + bool or array-like of bool + For scalar input, returns a scalar boolean. + For array input, returns an array of boolean indicating whether each + corresponding element is valid. + + See Also + -------- + isna : Boolean inverse of pandas.notna. + Series.notna : Detect valid values in a Series. + DataFrame.notna : Detect valid values in a DataFrame. + Index.notna : Detect valid values in an Index. + + Examples + -------- + Scalar arguments (including strings) result in a scalar boolean. + + >>> pd.notna('dog') + True + + >>> pd.notna(pd.NA) + False + + >>> pd.notna(np.nan) + False + + ndarrays result in an ndarray of booleans. + + >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) + >>> array + array([[ 1., nan, 3.], + [ 4., 5., nan]]) + >>> pd.notna(array) + array([[ True, False, True], + [ True, True, False]]) + + For indexes, an ndarray of booleans is returned. + + >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, + ... "2017-07-08"]) + >>> index + DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], + dtype='datetime64[ns]', freq=None) + >>> pd.notna(index) + array([ True, True, False, True]) + + For Series and DataFrame, the same type is returned, containing booleans. + + >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) + >>> df + 0 1 2 + 0 ant bee cat + 1 dog None fly + >>> pd.notna(df) + 0 1 2 + 0 True True True + 1 True False True + + >>> pd.notna(df[1]) + 0 True + 1 False + Name: 1, dtype: bool + """ + res = isna(obj) + if is_scalar(res): + return not res + return ~res + + +notnull = notna + + +def isna_compat(arr, fill_value=np.nan) -> bool: + """ + Parameters + ---------- + arr: a numpy array + fill_value: fill value, default to np.nan + + Returns + ------- + True if we can fill using this fill_value + """ + if isna(fill_value): + dtype = arr.dtype + return not (is_bool_dtype(dtype) or is_integer_dtype(dtype)) + return True + + +def array_equivalent( + left, + right, + strict_nan: bool = False, + dtype_equal: bool = False, +) -> bool: + """ + True if two arrays, left and right, have equal non-NaN elements, and NaNs + in corresponding locations. False otherwise. It is assumed that left and + right are NumPy arrays of the same dtype. The behavior of this function + (particularly with respect to NaNs) is not defined if the dtypes are + different. + + Parameters + ---------- + left, right : ndarrays + strict_nan : bool, default False + If True, consider NaN and None to be different. + dtype_equal : bool, default False + Whether `left` and `right` are known to have the same dtype + according to `is_dtype_equal`. Some methods like `BlockManager.equals`. + require that the dtypes match. Setting this to ``True`` can improve + performance, but will give different results for arrays that are + equal but different dtypes. + + Returns + ------- + b : bool + Returns True if the arrays are equivalent. + + Examples + -------- + >>> array_equivalent( + ... np.array([1, 2, np.nan]), + ... np.array([1, 2, np.nan])) + True + >>> array_equivalent( + ... np.array([1, np.nan, 2]), + ... np.array([1, 2, np.nan])) + False + """ + left, right = np.asarray(left), np.asarray(right) + + # shape compat + if left.shape != right.shape: + return False + + if dtype_equal: + # fastpath when we require that the dtypes match (Block.equals) + if left.dtype.kind in ["f", "c"]: + return _array_equivalent_float(left, right) + elif is_datetimelike_v_numeric(left.dtype, right.dtype): + return False + elif needs_i8_conversion(left.dtype): + return _array_equivalent_datetimelike(left, right) + elif is_string_or_object_np_dtype(left.dtype): + # TODO: fastpath for pandas' StringDtype + return _array_equivalent_object(left, right, strict_nan) + else: + return np.array_equal(left, right) + + # Slow path when we allow comparing different dtypes. + # Object arrays can contain None, NaN and NaT. + # string dtypes must be come to this path for NumPy 1.7.1 compat + if left.dtype.kind in "OSU" or right.dtype.kind in "OSU": + # Note: `in "OSU"` is non-trivially faster than `in ["O", "S", "U"]` + # or `in ("O", "S", "U")` + return _array_equivalent_object(left, right, strict_nan) + + # NaNs can occur in float and complex arrays. + if is_float_dtype(left.dtype) or is_complex_dtype(left.dtype): + if not (left.size and right.size): + return True + return ((left == right) | (isna(left) & isna(right))).all() + + elif is_datetimelike_v_numeric(left, right): + # GH#29553 avoid numpy deprecation warning + return False + + elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype): + # datetime64, timedelta64, Period + if not is_dtype_equal(left.dtype, right.dtype): + return False + + left = left.view("i8") + right = right.view("i8") + + # if we have structured dtypes, compare first + if ( + left.dtype.type is np.void or right.dtype.type is np.void + ) and left.dtype != right.dtype: + return False + + return np.array_equal(left, right) + + +def _array_equivalent_float(left, right) -> bool: + return bool(((left == right) | (np.isnan(left) & np.isnan(right))).all()) + + +def _array_equivalent_datetimelike(left, right): + return np.array_equal(left.view("i8"), right.view("i8")) + + +def _array_equivalent_object(left: np.ndarray, right: np.ndarray, strict_nan: bool): + if not strict_nan: + # isna considers NaN and None to be equivalent. + + if left.flags["F_CONTIGUOUS"] and right.flags["F_CONTIGUOUS"]: + # we can improve performance by doing a copy-free ravel + # e.g. in frame_methods.Equals.time_frame_nonunique_equal + # if we transposed the frames + left = left.ravel("K") + right = right.ravel("K") + + return lib.array_equivalent_object( + ensure_object(left.ravel()), ensure_object(right.ravel()) + ) + + for left_value, right_value in zip(left, right): + if left_value is NaT and right_value is not NaT: + return False + + elif left_value is libmissing.NA and right_value is not libmissing.NA: + return False + + elif isinstance(left_value, float) and np.isnan(left_value): + if not isinstance(right_value, float) or not np.isnan(right_value): + return False + else: + try: + if np.any(np.asarray(left_value != right_value)): + return False + except TypeError as err: + if "boolean value of NA is ambiguous" in str(err): + return False + raise + return True + + +def array_equals(left: ArrayLike, right: ArrayLike) -> bool: + """ + ExtensionArray-compatible implementation of array_equivalent. + """ + if not is_dtype_equal(left.dtype, right.dtype): + return False + elif isinstance(left, ABCExtensionArray): + return left.equals(right) + else: + return array_equivalent(left, right, dtype_equal=True) + + +def infer_fill_value(val): + """ + infer the fill value for the nan/NaT from the provided + scalar/ndarray/list-like if we are a NaT, return the correct dtyped + element to provide proper block construction + """ + if not is_list_like(val): + val = [val] + val = np.array(val, copy=False) + if needs_i8_conversion(val.dtype): + return np.array("NaT", dtype=val.dtype) + elif is_object_dtype(val.dtype): + dtype = lib.infer_dtype(ensure_object(val), skipna=False) + if dtype in ["datetime", "datetime64"]: + return np.array("NaT", dtype=DT64NS_DTYPE) + elif dtype in ["timedelta", "timedelta64"]: + return np.array("NaT", dtype=TD64NS_DTYPE) + return np.nan + + +def maybe_fill(arr: np.ndarray) -> np.ndarray: + """ + Fill numpy.ndarray with NaN, unless we have a integer or boolean dtype. + """ + if arr.dtype.kind not in ("u", "i", "b"): + arr.fill(np.nan) + return arr + + +def na_value_for_dtype(dtype: DtypeObj, compat: bool = True): + """ + Return a dtype compat na value + + Parameters + ---------- + dtype : string / dtype + compat : bool, default True + + Returns + ------- + np.dtype or a pandas dtype + + Examples + -------- + >>> na_value_for_dtype(np.dtype('int64')) + 0 + >>> na_value_for_dtype(np.dtype('int64'), compat=False) + nan + >>> na_value_for_dtype(np.dtype('float64')) + nan + >>> na_value_for_dtype(np.dtype('bool')) + False + >>> na_value_for_dtype(np.dtype('datetime64[ns]')) + numpy.datetime64('NaT') + """ + + if isinstance(dtype, ExtensionDtype): + return dtype.na_value + elif needs_i8_conversion(dtype): + return dtype.type("NaT", "ns") + elif is_float_dtype(dtype): + return np.nan + elif is_integer_dtype(dtype): + if compat: + return 0 + return np.nan + elif is_bool_dtype(dtype): + if compat: + return False + return np.nan + return np.nan + + +def remove_na_arraylike(arr): + """ + Return array-like containing only true/non-NaN values, possibly empty. + """ + if is_extension_array_dtype(arr): + return arr[notna(arr)] + else: + return arr[notna(np.asarray(arr))] + + +def is_valid_na_for_dtype(obj, dtype: DtypeObj) -> bool: + """ + isna check that excludes incompatible dtypes + + Parameters + ---------- + obj : object + dtype : np.datetime64, np.timedelta64, DatetimeTZDtype, or PeriodDtype + + Returns + ------- + bool + """ + if not lib.is_scalar(obj) or not isna(obj): + return False + elif dtype.kind == "M": + if isinstance(dtype, np.dtype): + # i.e. not tzaware + return not isinstance(obj, (np.timedelta64, Decimal)) + # we have to rule out tznaive dt64("NaT") + return not isinstance(obj, (np.timedelta64, np.datetime64, Decimal)) + elif dtype.kind == "m": + return not isinstance(obj, (np.datetime64, Decimal)) + elif dtype.kind in ["i", "u", "f", "c"]: + # Numeric + return obj is not NaT and not isinstance(obj, (np.datetime64, np.timedelta64)) + + elif dtype == _dtype_str: + # numpy string dtypes to avoid float np.nan + return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal, float)) + + elif dtype == _dtype_object: + # This is needed for Categorical, but is kind of weird + return True + + elif isinstance(dtype, PeriodDtype): + return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal)) + + elif isinstance(dtype, IntervalDtype): + return lib.is_float(obj) or obj is None or obj is libmissing.NA + + elif isinstance(dtype, CategoricalDtype): + return is_valid_na_for_dtype(obj, dtype.categories.dtype) + + # fallback, default to allowing NaN, None, NA, NaT + return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal)) diff --git a/.local/lib/python3.8/site-packages/pandas/core/flags.py b/.local/lib/python3.8/site-packages/pandas/core/flags.py new file mode 100644 index 0000000..54be212 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/flags.py @@ -0,0 +1,113 @@ +import weakref + + +class Flags: + """ + Flags that apply to pandas objects. + + .. versionadded:: 1.2.0 + + Parameters + ---------- + obj : Series or DataFrame + The object these flags are associated with. + allows_duplicate_labels : bool, default True + Whether to allow duplicate labels in this object. By default, + duplicate labels are permitted. Setting this to ``False`` will + cause an :class:`errors.DuplicateLabelError` to be raised when + `index` (or columns for DataFrame) is not unique, or any + subsequent operation on introduces duplicates. + See :ref:`duplicates.disallow` for more. + + .. warning:: + + This is an experimental feature. Currently, many methods fail to + propagate the ``allows_duplicate_labels`` value. In future versions + it is expected that every method taking or returning one or more + DataFrame or Series objects will propagate ``allows_duplicate_labels``. + + Notes + ----- + Attributes can be set in two ways + + >>> df = pd.DataFrame() + >>> df.flags + + >>> df.flags.allows_duplicate_labels = False + >>> df.flags + + + >>> df.flags['allows_duplicate_labels'] = True + >>> df.flags + + """ + + _keys = {"allows_duplicate_labels"} + + def __init__(self, obj, *, allows_duplicate_labels): + self._allows_duplicate_labels = allows_duplicate_labels + self._obj = weakref.ref(obj) + + @property + def allows_duplicate_labels(self) -> bool: + """ + Whether this object allows duplicate labels. + + Setting ``allows_duplicate_labels=False`` ensures that the + index (and columns of a DataFrame) are unique. Most methods + that accept and return a Series or DataFrame will propagate + the value of ``allows_duplicate_labels``. + + See :ref:`duplicates` for more. + + See Also + -------- + DataFrame.attrs : Set global metadata on this object. + DataFrame.set_flags : Set global flags on this object. + + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2]}, index=['a', 'a']) + >>> df.flags.allows_duplicate_labels + True + >>> df.flags.allows_duplicate_labels = False + Traceback (most recent call last): + ... + pandas.errors.DuplicateLabelError: Index has duplicates. + positions + label + a [0, 1] + """ + return self._allows_duplicate_labels + + @allows_duplicate_labels.setter + def allows_duplicate_labels(self, value: bool): + value = bool(value) + obj = self._obj() + if obj is None: + raise ValueError("This flag's object has been deleted.") + + if not value: + for ax in obj.axes: + ax._maybe_check_unique() + + self._allows_duplicate_labels = value + + def __getitem__(self, key): + if key not in self._keys: + raise KeyError(key) + + return getattr(self, key) + + def __setitem__(self, key, value): + if key not in self._keys: + raise ValueError(f"Unknown flag {key}. Must be one of {self._keys}") + setattr(self, key, value) + + def __repr__(self): + return f"" + + def __eq__(self, other): + if isinstance(other, type(self)): + return self.allows_duplicate_labels == other.allows_duplicate_labels + return False diff --git a/.local/lib/python3.8/site-packages/pandas/core/frame.py b/.local/lib/python3.8/site-packages/pandas/core/frame.py new file mode 100644 index 0000000..b89f702 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/frame.py @@ -0,0 +1,10998 @@ +""" +DataFrame +--------- +An efficient 2D container for potentially mixed-type time series or other +labeled data series. + +Similar to its R counterpart, data.frame, except providing automatic data +alignment and a host of useful data manipulation methods having to do with the +labeling information +""" +from __future__ import annotations + +import collections +from collections import abc +import datetime +import functools +from io import StringIO +import itertools +from textwrap import dedent +from typing import ( + IO, + TYPE_CHECKING, + Any, + Callable, + Hashable, + Iterable, + Iterator, + Literal, + Sequence, + cast, + overload, +) +import warnings + +import numpy as np +import numpy.ma as ma + +from pandas._config import get_option + +from pandas._libs import ( + algos as libalgos, + lib, + properties, +) +from pandas._libs.hashtable import duplicated +from pandas._libs.lib import no_default +from pandas._typing import ( + AggFuncType, + AnyArrayLike, + ArrayLike, + Axes, + Axis, + ColspaceArgType, + CompressionOptions, + Dtype, + DtypeObj, + FilePath, + FillnaOptions, + FloatFormatType, + FormattersType, + Frequency, + IndexKeyFunc, + IndexLabel, + Level, + PythonFuncType, + ReadBuffer, + Renamer, + Scalar, + StorageOptions, + Suffixes, + TimedeltaConvertibleTypes, + TimestampConvertibleTypes, + ValueKeyFunc, + WriteBuffer, + npt, +) +from pandas.compat._optional import import_optional_dependency +from pandas.compat.numpy import function as nv +from pandas.util._decorators import ( + Appender, + Substitution, + deprecate_kwarg, + deprecate_nonkeyword_arguments, + doc, + rewrite_axis_style_signature, +) +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import ( + validate_ascending, + validate_axis_style_args, + validate_bool_kwarg, + validate_percentile, +) + +from pandas.core.dtypes.cast import ( + construct_1d_arraylike_from_scalar, + construct_2d_arraylike_from_scalar, + find_common_type, + infer_dtype_from_scalar, + invalidate_string_dtypes, + maybe_box_native, + maybe_downcast_to_dtype, +) +from pandas.core.dtypes.common import ( + ensure_platform_int, + infer_dtype_from_object, + is_1d_only_ea_dtype, + is_1d_only_ea_obj, + is_bool_dtype, + is_dataclass, + is_datetime64_any_dtype, + is_dict_like, + is_dtype_equal, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_hashable, + is_integer, + is_integer_dtype, + is_iterator, + is_list_like, + is_object_dtype, + is_scalar, + is_sequence, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +from pandas.core import ( + algorithms, + common as com, + nanops, + ops, +) +from pandas.core.accessor import CachedAccessor +from pandas.core.apply import ( + reconstruct_func, + relabel_result, +) +from pandas.core.array_algos.take import take_2d_multi +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + TimedeltaArray, +) +from pandas.core.arrays.sparse import SparseFrameAccessor +from pandas.core.construction import ( + extract_array, + sanitize_array, + sanitize_masked_array, +) +from pandas.core.generic import NDFrame +from pandas.core.indexers import check_key_length +from pandas.core.indexes.api import ( + DatetimeIndex, + Index, + PeriodIndex, + default_index, + ensure_index, + ensure_index_from_sequences, +) +from pandas.core.indexes.multi import ( + MultiIndex, + maybe_droplevels, +) +from pandas.core.indexing import ( + check_bool_indexer, + check_deprecated_indexers, + convert_to_index_sliceable, +) +from pandas.core.internals import ( + ArrayManager, + BlockManager, +) +from pandas.core.internals.construction import ( + arrays_to_mgr, + dataclasses_to_dicts, + dict_to_mgr, + mgr_to_mgr, + ndarray_to_mgr, + nested_data_to_arrays, + rec_array_to_mgr, + reorder_arrays, + to_arrays, + treat_as_nested, +) +from pandas.core.reshape.melt import melt +from pandas.core.series import Series +from pandas.core.shared_docs import _shared_docs +from pandas.core.sorting import ( + get_group_index, + lexsort_indexer, + nargsort, +) + +from pandas.io.common import get_handle +from pandas.io.formats import ( + console, + format as fmt, +) +from pandas.io.formats.info import ( + INFO_DOCSTRING, + DataFrameInfo, + frame_sub_kwargs, +) +import pandas.plotting + +if TYPE_CHECKING: + + from pandas.core.groupby.generic import DataFrameGroupBy + from pandas.core.internals import SingleDataManager + from pandas.core.resample import Resampler + + from pandas.io.formats.style import Styler + +# --------------------------------------------------------------------- +# Docstring templates + +_shared_doc_kwargs = { + "axes": "index, columns", + "klass": "DataFrame", + "axes_single_arg": "{0 or 'index', 1 or 'columns'}", + "axis": """axis : {0 or 'index', 1 or 'columns'}, default 0 + If 0 or 'index': apply function to each column. + If 1 or 'columns': apply function to each row.""", + "inplace": """ + inplace : bool, default False + If True, performs operation inplace and returns None.""", + "optional_by": """ + by : str or list of str + Name or list of names to sort by. + + - if `axis` is 0 or `'index'` then `by` may contain index + levels and/or column labels. + - if `axis` is 1 or `'columns'` then `by` may contain column + levels and/or index labels.""", + "optional_labels": """labels : array-like, optional + New labels / index to conform the axis specified by 'axis' to.""", + "optional_axis": """axis : int or str, optional + Axis to target. Can be either the axis name ('index', 'columns') + or number (0, 1).""", + "replace_iloc": """ + This differs from updating with ``.loc`` or ``.iloc``, which require + you to specify a location to update with some value.""", +} + +_numeric_only_doc = """numeric_only : bool or None, default None + Include only float, int, boolean data. If None, will attempt to use + everything, then use only numeric data +""" + +_merge_doc = """ +Merge DataFrame or named Series objects with a database-style join. + +A named Series object is treated as a DataFrame with a single named column. + +The join is done on columns or indexes. If joining columns on +columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes +on indexes or indexes on a column or columns, the index will be passed on. +When performing a cross merge, no column specifications to merge on are +allowed. + +.. warning:: + + If both key columns contain rows where the key is a null value, those + rows will be matched against each other. This is different from usual SQL + join behaviour and can lead to unexpected results. + +Parameters +----------%s +right : DataFrame or named Series + Object to merge with. +how : {'left', 'right', 'outer', 'inner', 'cross'}, default 'inner' + Type of merge to be performed. + + * left: use only keys from left frame, similar to a SQL left outer join; + preserve key order. + * right: use only keys from right frame, similar to a SQL right outer join; + preserve key order. + * outer: use union of keys from both frames, similar to a SQL full outer + join; sort keys lexicographically. + * inner: use intersection of keys from both frames, similar to a SQL inner + join; preserve the order of the left keys. + * cross: creates the cartesian product from both frames, preserves the order + of the left keys. + + .. versionadded:: 1.2.0 + +on : label or list + Column or index level names to join on. These must be found in both + DataFrames. If `on` is None and not merging on indexes then this defaults + to the intersection of the columns in both DataFrames. +left_on : label or list, or array-like + Column or index level names to join on in the left DataFrame. Can also + be an array or list of arrays of the length of the left DataFrame. + These arrays are treated as if they are columns. +right_on : label or list, or array-like + Column or index level names to join on in the right DataFrame. Can also + be an array or list of arrays of the length of the right DataFrame. + These arrays are treated as if they are columns. +left_index : bool, default False + Use the index from the left DataFrame as the join key(s). If it is a + MultiIndex, the number of keys in the other DataFrame (either the index + or a number of columns) must match the number of levels. +right_index : bool, default False + Use the index from the right DataFrame as the join key. Same caveats as + left_index. +sort : bool, default False + Sort the join keys lexicographically in the result DataFrame. If False, + the order of the join keys depends on the join type (how keyword). +suffixes : list-like, default is ("_x", "_y") + A length-2 sequence where each element is optionally a string + indicating the suffix to add to overlapping column names in + `left` and `right` respectively. Pass a value of `None` instead + of a string to indicate that the column name from `left` or + `right` should be left as-is, with no suffix. At least one of the + values must not be None. +copy : bool, default True + If False, avoid copy if possible. +indicator : bool or str, default False + If True, adds a column to the output DataFrame called "_merge" with + information on the source of each row. The column can be given a different + name by providing a string argument. The column will have a Categorical + type with the value of "left_only" for observations whose merge key only + appears in the left DataFrame, "right_only" for observations + whose merge key only appears in the right DataFrame, and "both" + if the observation's merge key is found in both DataFrames. + +validate : str, optional + If specified, checks if merge is of specified type. + + * "one_to_one" or "1:1": check if merge keys are unique in both + left and right datasets. + * "one_to_many" or "1:m": check if merge keys are unique in left + dataset. + * "many_to_one" or "m:1": check if merge keys are unique in right + dataset. + * "many_to_many" or "m:m": allowed, but does not result in checks. + +Returns +------- +DataFrame + A DataFrame of the two merged objects. + +See Also +-------- +merge_ordered : Merge with optional filling/interpolation. +merge_asof : Merge on nearest keys. +DataFrame.join : Similar method using indices. + +Notes +----- +Support for specifying index levels as the `on`, `left_on`, and +`right_on` parameters was added in version 0.23.0 +Support for merging named Series objects was added in version 0.24.0 + +Examples +-------- +>>> df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], +... 'value': [1, 2, 3, 5]}) +>>> df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], +... 'value': [5, 6, 7, 8]}) +>>> df1 + lkey value +0 foo 1 +1 bar 2 +2 baz 3 +3 foo 5 +>>> df2 + rkey value +0 foo 5 +1 bar 6 +2 baz 7 +3 foo 8 + +Merge df1 and df2 on the lkey and rkey columns. The value columns have +the default suffixes, _x and _y, appended. + +>>> df1.merge(df2, left_on='lkey', right_on='rkey') + lkey value_x rkey value_y +0 foo 1 foo 5 +1 foo 1 foo 8 +2 foo 5 foo 5 +3 foo 5 foo 8 +4 bar 2 bar 6 +5 baz 3 baz 7 + +Merge DataFrames df1 and df2 with specified left and right suffixes +appended to any overlapping columns. + +>>> df1.merge(df2, left_on='lkey', right_on='rkey', +... suffixes=('_left', '_right')) + lkey value_left rkey value_right +0 foo 1 foo 5 +1 foo 1 foo 8 +2 foo 5 foo 5 +3 foo 5 foo 8 +4 bar 2 bar 6 +5 baz 3 baz 7 + +Merge DataFrames df1 and df2, but raise an exception if the DataFrames have +any overlapping columns. + +>>> df1.merge(df2, left_on='lkey', right_on='rkey', suffixes=(False, False)) +Traceback (most recent call last): +... +ValueError: columns overlap but no suffix specified: + Index(['value'], dtype='object') + +>>> df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]}) +>>> df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]}) +>>> df1 + a b +0 foo 1 +1 bar 2 +>>> df2 + a c +0 foo 3 +1 baz 4 + +>>> df1.merge(df2, how='inner', on='a') + a b c +0 foo 1 3 + +>>> df1.merge(df2, how='left', on='a') + a b c +0 foo 1 3.0 +1 bar 2 NaN + +>>> df1 = pd.DataFrame({'left': ['foo', 'bar']}) +>>> df2 = pd.DataFrame({'right': [7, 8]}) +>>> df1 + left +0 foo +1 bar +>>> df2 + right +0 7 +1 8 + +>>> df1.merge(df2, how='cross') + left right +0 foo 7 +1 foo 8 +2 bar 7 +3 bar 8 +""" + + +# ----------------------------------------------------------------------- +# DataFrame class + + +class DataFrame(NDFrame, OpsMixin): + """ + Two-dimensional, size-mutable, potentially heterogeneous tabular data. + + Data structure also contains labeled axes (rows and columns). + Arithmetic operations align on both row and column labels. Can be + thought of as a dict-like container for Series objects. The primary + pandas data structure. + + Parameters + ---------- + data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame + Dict can contain Series, arrays, constants, dataclass or list-like objects. If + data is a dict, column order follows insertion-order. If a dict contains Series + which have an index defined, it is aligned by its index. + + .. versionchanged:: 0.25.0 + If data is a list of dicts, column order follows insertion-order. + + index : Index or array-like + Index to use for resulting frame. Will default to RangeIndex if + no indexing information part of input data and no index provided. + columns : Index or array-like + Column labels to use for resulting frame when data does not have them, + defaulting to RangeIndex(0, 1, 2, ..., n). If data contains column labels, + will perform column selection instead. + dtype : dtype, default None + Data type to force. Only a single dtype is allowed. If None, infer. + copy : bool or None, default None + Copy data from inputs. + For dict data, the default of None behaves like ``copy=True``. For DataFrame + or 2d ndarray input, the default of None behaves like ``copy=False``. + + .. versionchanged:: 1.3.0 + + See Also + -------- + DataFrame.from_records : Constructor from tuples, also record arrays. + DataFrame.from_dict : From dicts of Series, arrays, or dicts. + read_csv : Read a comma-separated values (csv) file into DataFrame. + read_table : Read general delimited file into DataFrame. + read_clipboard : Read text from clipboard into DataFrame. + + Examples + -------- + Constructing DataFrame from a dictionary. + + >>> d = {'col1': [1, 2], 'col2': [3, 4]} + >>> df = pd.DataFrame(data=d) + >>> df + col1 col2 + 0 1 3 + 1 2 4 + + Notice that the inferred dtype is int64. + + >>> df.dtypes + col1 int64 + col2 int64 + dtype: object + + To enforce a single dtype: + + >>> df = pd.DataFrame(data=d, dtype=np.int8) + >>> df.dtypes + col1 int8 + col2 int8 + dtype: object + + Constructing DataFrame from a dictionary including Series: + + >>> d = {'col1': [0, 1, 2, 3], 'col2': pd.Series([2, 3], index=[2, 3])} + >>> pd.DataFrame(data=d, index=[0, 1, 2, 3]) + col1 col2 + 0 0 NaN + 1 1 NaN + 2 2 2.0 + 3 3 3.0 + + Constructing DataFrame from numpy ndarray: + + >>> df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), + ... columns=['a', 'b', 'c']) + >>> df2 + a b c + 0 1 2 3 + 1 4 5 6 + 2 7 8 9 + + Constructing DataFrame from a numpy ndarray that has labeled columns: + + >>> data = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], + ... dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]) + >>> df3 = pd.DataFrame(data, columns=['c', 'a']) + ... + >>> df3 + c a + 0 3 1 + 1 6 4 + 2 9 7 + + Constructing DataFrame from dataclass: + + >>> from dataclasses import make_dataclass + >>> Point = make_dataclass("Point", [("x", int), ("y", int)]) + >>> pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)]) + x y + 0 0 0 + 1 0 3 + 2 2 3 + """ + + _internal_names_set = {"columns", "index"} | NDFrame._internal_names_set + _typ = "dataframe" + _HANDLED_TYPES = (Series, Index, ExtensionArray, np.ndarray) + _accessors: set[str] = {"sparse"} + _hidden_attrs: frozenset[str] = NDFrame._hidden_attrs | frozenset([]) + _mgr: BlockManager | ArrayManager + + @property + def _constructor(self) -> type[DataFrame]: + return DataFrame + + _constructor_sliced: type[Series] = Series + + # ---------------------------------------------------------------------- + # Constructors + + def __init__( + self, + data=None, + index: Axes | None = None, + columns: Axes | None = None, + dtype: Dtype | None = None, + copy: bool | None = None, + ): + + if data is None: + data = {} + if dtype is not None: + dtype = self._validate_dtype(dtype) + + if isinstance(data, DataFrame): + data = data._mgr + + if isinstance(data, (BlockManager, ArrayManager)): + # first check if a Manager is passed without any other arguments + # -> use fastpath (without checking Manager type) + if index is None and columns is None and dtype is None and not copy: + # GH#33357 fastpath + NDFrame.__init__(self, data) + return + + manager = get_option("mode.data_manager") + + if copy is None: + if isinstance(data, dict): + # retain pre-GH#38939 default behavior + copy = True + elif ( + manager == "array" + and isinstance(data, (np.ndarray, ExtensionArray)) + and data.ndim == 2 + ): + # INFO(ArrayManager) by default copy the 2D input array to get + # contiguous 1D arrays + copy = True + else: + copy = False + + if isinstance(data, (BlockManager, ArrayManager)): + mgr = self._init_mgr( + data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy + ) + + elif isinstance(data, dict): + # GH#38939 de facto copy defaults to False only in non-dict cases + mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager) + elif isinstance(data, ma.MaskedArray): + import numpy.ma.mrecords as mrecords + + # masked recarray + if isinstance(data, mrecords.MaskedRecords): + mgr = rec_array_to_mgr( + data, + index, + columns, + dtype, + copy, + typ=manager, + ) + warnings.warn( + "Support for MaskedRecords is deprecated and will be " + "removed in a future version. Pass " + "{name: data[name] for name in data.dtype.names} instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + # a masked array + else: + data = sanitize_masked_array(data) + mgr = ndarray_to_mgr( + data, + index, + columns, + dtype=dtype, + copy=copy, + typ=manager, + ) + + elif isinstance(data, (np.ndarray, Series, Index, ExtensionArray)): + if data.dtype.names: + # i.e. numpy structured array + data = cast(np.ndarray, data) + mgr = rec_array_to_mgr( + data, + index, + columns, + dtype, + copy, + typ=manager, + ) + elif getattr(data, "name", None) is not None: + # i.e. Series/Index with non-None name + mgr = dict_to_mgr( + # error: Item "ndarray" of "Union[ndarray, Series, Index]" has no + # attribute "name" + {data.name: data}, # type: ignore[union-attr] + index, + columns, + dtype=dtype, + typ=manager, + ) + else: + mgr = ndarray_to_mgr( + data, + index, + columns, + dtype=dtype, + copy=copy, + typ=manager, + ) + + # For data is list-like, or Iterable (will consume into list) + elif is_list_like(data): + if not isinstance(data, (abc.Sequence, ExtensionArray)): + if hasattr(data, "__array__"): + # GH#44616 big perf improvement for e.g. pytorch tensor + data = np.asarray(data) + else: + data = list(data) + if len(data) > 0: + if is_dataclass(data[0]): + data = dataclasses_to_dicts(data) + if not isinstance(data, np.ndarray) and treat_as_nested(data): + # exclude ndarray as we may have cast it a few lines above + if columns is not None: + # error: Argument 1 to "ensure_index" has incompatible type + # "Collection[Any]"; expected "Union[Union[Union[ExtensionArray, + # ndarray], Index, Series], Sequence[Any]]" + columns = ensure_index(columns) # type: ignore[arg-type] + arrays, columns, index = nested_data_to_arrays( + # error: Argument 3 to "nested_data_to_arrays" has incompatible + # type "Optional[Collection[Any]]"; expected "Optional[Index]" + data, + columns, + index, # type: ignore[arg-type] + dtype, + ) + mgr = arrays_to_mgr( + arrays, + columns, + index, + dtype=dtype, + typ=manager, + ) + else: + mgr = ndarray_to_mgr( + data, + index, + columns, + dtype=dtype, + copy=copy, + typ=manager, + ) + else: + mgr = dict_to_mgr( + {}, + index, + columns, + dtype=dtype, + typ=manager, + ) + # For data is scalar + else: + if index is None or columns is None: + raise ValueError("DataFrame constructor not properly called!") + + # Argument 1 to "ensure_index" has incompatible type "Collection[Any]"; + # expected "Union[Union[Union[ExtensionArray, ndarray], + # Index, Series], Sequence[Any]]" + index = ensure_index(index) # type: ignore[arg-type] + # Argument 1 to "ensure_index" has incompatible type "Collection[Any]"; + # expected "Union[Union[Union[ExtensionArray, ndarray], + # Index, Series], Sequence[Any]]" + columns = ensure_index(columns) # type: ignore[arg-type] + + if not dtype: + dtype, _ = infer_dtype_from_scalar(data, pandas_dtype=True) + + # For data is a scalar extension dtype + if isinstance(dtype, ExtensionDtype): + # TODO(EA2D): special case not needed with 2D EAs + + values = [ + construct_1d_arraylike_from_scalar(data, len(index), dtype) + for _ in range(len(columns)) + ] + mgr = arrays_to_mgr(values, columns, index, dtype=None, typ=manager) + else: + arr2d = construct_2d_arraylike_from_scalar( + data, + len(index), + len(columns), + dtype, + copy, + ) + + mgr = ndarray_to_mgr( + arr2d, + index, + columns, + dtype=arr2d.dtype, + copy=False, + typ=manager, + ) + + # ensure correct Manager type according to settings + mgr = mgr_to_mgr(mgr, typ=manager) + + NDFrame.__init__(self, mgr) + + # ---------------------------------------------------------------------- + + @property + def axes(self) -> list[Index]: + """ + Return a list representing the axes of the DataFrame. + + It has the row axis labels and column axis labels as the only members. + They are returned in that order. + + Examples + -------- + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.axes + [RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'], + dtype='object')] + """ + return [self.index, self.columns] + + @property + def shape(self) -> tuple[int, int]: + """ + Return a tuple representing the dimensionality of the DataFrame. + + See Also + -------- + ndarray.shape : Tuple of array dimensions. + + Examples + -------- + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.shape + (2, 2) + + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4], + ... 'col3': [5, 6]}) + >>> df.shape + (2, 3) + """ + return len(self.index), len(self.columns) + + @property + def _is_homogeneous_type(self) -> bool: + """ + Whether all the columns in a DataFrame have the same type. + + Returns + ------- + bool + + See Also + -------- + Index._is_homogeneous_type : Whether the object has a single + dtype. + MultiIndex._is_homogeneous_type : Whether all the levels of a + MultiIndex have the same dtype. + + Examples + -------- + >>> DataFrame({"A": [1, 2], "B": [3, 4]})._is_homogeneous_type + True + >>> DataFrame({"A": [1, 2], "B": [3.0, 4.0]})._is_homogeneous_type + False + + Items with the same type but different sizes are considered + different types. + + >>> DataFrame({ + ... "A": np.array([1, 2], dtype=np.int32), + ... "B": np.array([1, 2], dtype=np.int64)})._is_homogeneous_type + False + """ + if isinstance(self._mgr, ArrayManager): + return len({arr.dtype for arr in self._mgr.arrays}) == 1 + if self._mgr.any_extension_types: + return len({block.dtype for block in self._mgr.blocks}) == 1 + else: + return not self._is_mixed_type + + @property + def _can_fast_transpose(self) -> bool: + """ + Can we transpose this DataFrame without creating any new array objects. + """ + if isinstance(self._mgr, ArrayManager): + return False + blocks = self._mgr.blocks + if len(blocks) != 1: + return False + + dtype = blocks[0].dtype + # TODO(EA2D) special case would be unnecessary with 2D EAs + return not is_1d_only_ea_dtype(dtype) + + # error: Return type "Union[ndarray, DatetimeArray, TimedeltaArray]" of + # "_values" incompatible with return type "ndarray" in supertype "NDFrame" + @property + def _values( # type: ignore[override] + self, + ) -> np.ndarray | DatetimeArray | TimedeltaArray: + """ + Analogue to ._values that may return a 2D ExtensionArray. + """ + self._consolidate_inplace() + + mgr = self._mgr + + if isinstance(mgr, ArrayManager): + if len(mgr.arrays) == 1 and not is_1d_only_ea_obj(mgr.arrays[0]): + # error: Item "ExtensionArray" of "Union[ndarray, ExtensionArray]" + # has no attribute "reshape" + return mgr.arrays[0].reshape(-1, 1) # type: ignore[union-attr] + return self.values + + blocks = mgr.blocks + if len(blocks) != 1: + return self.values + + arr = blocks[0].values + if arr.ndim == 1: + # non-2D ExtensionArray + return self.values + + # more generally, whatever we allow in NDArrayBackedExtensionBlock + arr = cast("np.ndarray | DatetimeArray | TimedeltaArray", arr) + return arr.T + + # ---------------------------------------------------------------------- + # Rendering Methods + + def _repr_fits_vertical_(self) -> bool: + """ + Check length against max_rows. + """ + max_rows = get_option("display.max_rows") + return len(self) <= max_rows + + def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool: + """ + Check if full repr fits in horizontal boundaries imposed by the display + options width and max_columns. + + In case of non-interactive session, no boundaries apply. + + `ignore_width` is here so ipynb+HTML output can behave the way + users expect. display.max_columns remains in effect. + GH3541, GH3573 + """ + width, height = console.get_console_size() + max_columns = get_option("display.max_columns") + nb_columns = len(self.columns) + + # exceed max columns + if (max_columns and nb_columns > max_columns) or ( + (not ignore_width) and width and nb_columns > (width // 2) + ): + return False + + # used by repr_html under IPython notebook or scripts ignore terminal + # dims + if ignore_width or not console.in_interactive_session(): + return True + + if get_option("display.width") is not None or console.in_ipython_frontend(): + # check at least the column row for excessive width + max_rows = 1 + else: + max_rows = get_option("display.max_rows") + + # when auto-detecting, so width=None and not in ipython front end + # check whether repr fits horizontal by actually checking + # the width of the rendered repr + buf = StringIO() + + # only care about the stuff we'll actually print out + # and to_string on entire frame may be expensive + d = self + + if max_rows is not None: # unlimited rows + # min of two, where one may be None + d = d.iloc[: min(max_rows, len(d))] + else: + return True + + d.to_string(buf=buf) + value = buf.getvalue() + repr_width = max(len(line) for line in value.split("\n")) + + return repr_width < width + + def _info_repr(self) -> bool: + """ + True if the repr should show the info view. + """ + info_repr_option = get_option("display.large_repr") == "info" + return info_repr_option and not ( + self._repr_fits_horizontal_() and self._repr_fits_vertical_() + ) + + def __repr__(self) -> str: + """ + Return a string representation for a particular DataFrame. + """ + if self._info_repr(): + buf = StringIO() + self.info(buf=buf) + return buf.getvalue() + + repr_params = fmt.get_dataframe_repr_params() + return self.to_string(**repr_params) + + def _repr_html_(self) -> str | None: + """ + Return a html representation for a particular DataFrame. + + Mainly for IPython notebook. + """ + if self._info_repr(): + buf = StringIO() + self.info(buf=buf) + # need to escape the , should be the first line. + val = buf.getvalue().replace("<", r"<", 1) + val = val.replace(">", r">", 1) + return "
" + val + "
" + + if get_option("display.notebook_repr_html"): + max_rows = get_option("display.max_rows") + min_rows = get_option("display.min_rows") + max_cols = get_option("display.max_columns") + show_dimensions = get_option("display.show_dimensions") + + formatter = fmt.DataFrameFormatter( + self, + columns=None, + col_space=None, + na_rep="NaN", + formatters=None, + float_format=None, + sparsify=None, + justify=None, + index_names=True, + header=True, + index=True, + bold_rows=True, + escape=True, + max_rows=max_rows, + min_rows=min_rows, + max_cols=max_cols, + show_dimensions=show_dimensions, + decimal=".", + ) + return fmt.DataFrameRenderer(formatter).to_html(notebook=True) + else: + return None + + @overload + def to_string( + self, + buf: None = ..., + columns: Sequence[str] | None = ..., + col_space: int | list[int] | dict[Hashable, int] | None = ..., + header: bool | Sequence[str] = ..., + index: bool = ..., + na_rep: str = ..., + formatters: fmt.FormattersType | None = ..., + float_format: fmt.FloatFormatType | None = ..., + sparsify: bool | None = ..., + index_names: bool = ..., + justify: str | None = ..., + max_rows: int | None = ..., + max_cols: int | None = ..., + show_dimensions: bool = ..., + decimal: str = ..., + line_width: int | None = ..., + min_rows: int | None = ..., + max_colwidth: int | None = ..., + encoding: str | None = ..., + ) -> str: + ... + + @overload + def to_string( + self, + buf: FilePath | WriteBuffer[str], + columns: Sequence[str] | None = ..., + col_space: int | list[int] | dict[Hashable, int] | None = ..., + header: bool | Sequence[str] = ..., + index: bool = ..., + na_rep: str = ..., + formatters: fmt.FormattersType | None = ..., + float_format: fmt.FloatFormatType | None = ..., + sparsify: bool | None = ..., + index_names: bool = ..., + justify: str | None = ..., + max_rows: int | None = ..., + max_cols: int | None = ..., + show_dimensions: bool = ..., + decimal: str = ..., + line_width: int | None = ..., + min_rows: int | None = ..., + max_colwidth: int | None = ..., + encoding: str | None = ..., + ) -> None: + ... + + @Substitution( + header_type="bool or sequence of str", + header="Write out the column names. If a list of strings " + "is given, it is assumed to be aliases for the " + "column names", + col_space_type="int, list or dict of int", + col_space="The minimum width of each column. If a list of ints is given " + "every integers corresponds with one column. If a dict is given, the key " + "references the column, while the value defines the space to use.", + ) + @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) + def to_string( + self, + buf: FilePath | WriteBuffer[str] | None = None, + columns: Sequence[str] | None = None, + col_space: int | list[int] | dict[Hashable, int] | None = None, + header: bool | Sequence[str] = True, + index: bool = True, + na_rep: str = "NaN", + formatters: fmt.FormattersType | None = None, + float_format: fmt.FloatFormatType | None = None, + sparsify: bool | None = None, + index_names: bool = True, + justify: str | None = None, + max_rows: int | None = None, + max_cols: int | None = None, + show_dimensions: bool = False, + decimal: str = ".", + line_width: int | None = None, + min_rows: int | None = None, + max_colwidth: int | None = None, + encoding: str | None = None, + ) -> str | None: + """ + Render a DataFrame to a console-friendly tabular output. + %(shared_params)s + line_width : int, optional + Width to wrap a line in characters. + min_rows : int, optional + The number of rows to display in the console in a truncated repr + (when number of rows is above `max_rows`). + max_colwidth : int, optional + Max width to truncate each column in characters. By default, no limit. + + .. versionadded:: 1.0.0 + encoding : str, default "utf-8" + Set character encoding. + + .. versionadded:: 1.0 + %(returns)s + See Also + -------- + to_html : Convert DataFrame to HTML. + + Examples + -------- + >>> d = {'col1': [1, 2, 3], 'col2': [4, 5, 6]} + >>> df = pd.DataFrame(d) + >>> print(df.to_string()) + col1 col2 + 0 1 4 + 1 2 5 + 2 3 6 + """ + from pandas import option_context + + with option_context("display.max_colwidth", max_colwidth): + formatter = fmt.DataFrameFormatter( + self, + columns=columns, + col_space=col_space, + na_rep=na_rep, + formatters=formatters, + float_format=float_format, + sparsify=sparsify, + justify=justify, + index_names=index_names, + header=header, + index=index, + min_rows=min_rows, + max_rows=max_rows, + max_cols=max_cols, + show_dimensions=show_dimensions, + decimal=decimal, + ) + return fmt.DataFrameRenderer(formatter).to_string( + buf=buf, + encoding=encoding, + line_width=line_width, + ) + + # ---------------------------------------------------------------------- + + @property + def style(self) -> Styler: + """ + Returns a Styler object. + + Contains methods for building a styled HTML representation of the DataFrame. + + See Also + -------- + io.formats.style.Styler : Helps style a DataFrame or Series according to the + data with HTML and CSS. + """ + from pandas.io.formats.style import Styler + + return Styler(self) + + _shared_docs[ + "items" + ] = r""" + Iterate over (column name, Series) pairs. + + Iterates over the DataFrame columns, returning a tuple with + the column name and the content as a Series. + + Yields + ------ + label : object + The column names for the DataFrame being iterated over. + content : Series + The column entries belonging to each label, as a Series. + + See Also + -------- + DataFrame.iterrows : Iterate over DataFrame rows as + (index, Series) pairs. + DataFrame.itertuples : Iterate over DataFrame rows as namedtuples + of the values. + + Examples + -------- + >>> df = pd.DataFrame({'species': ['bear', 'bear', 'marsupial'], + ... 'population': [1864, 22000, 80000]}, + ... index=['panda', 'polar', 'koala']) + >>> df + species population + panda bear 1864 + polar bear 22000 + koala marsupial 80000 + >>> for label, content in df.items(): + ... print(f'label: {label}') + ... print(f'content: {content}', sep='\n') + ... + label: species + content: + panda bear + polar bear + koala marsupial + Name: species, dtype: object + label: population + content: + panda 1864 + polar 22000 + koala 80000 + Name: population, dtype: int64 + """ + + @Appender(_shared_docs["items"]) + def items(self) -> Iterable[tuple[Hashable, Series]]: + if self.columns.is_unique and hasattr(self, "_item_cache"): + for k in self.columns: + yield k, self._get_item_cache(k) + else: + for i, k in enumerate(self.columns): + yield k, self._ixs(i, axis=1) + + @Appender(_shared_docs["items"]) + def iteritems(self) -> Iterable[tuple[Hashable, Series]]: + yield from self.items() + + def iterrows(self) -> Iterable[tuple[Hashable, Series]]: + """ + Iterate over DataFrame rows as (index, Series) pairs. + + Yields + ------ + index : label or tuple of label + The index of the row. A tuple for a `MultiIndex`. + data : Series + The data of the row as a Series. + + See Also + -------- + DataFrame.itertuples : Iterate over DataFrame rows as namedtuples of the values. + DataFrame.items : Iterate over (column name, Series) pairs. + + Notes + ----- + 1. Because ``iterrows`` returns a Series for each row, + it does **not** preserve dtypes across the rows (dtypes are + preserved across columns for DataFrames). For example, + + >>> df = pd.DataFrame([[1, 1.5]], columns=['int', 'float']) + >>> row = next(df.iterrows())[1] + >>> row + int 1.0 + float 1.5 + Name: 0, dtype: float64 + >>> print(row['int'].dtype) + float64 + >>> print(df['int'].dtype) + int64 + + To preserve dtypes while iterating over the rows, it is better + to use :meth:`itertuples` which returns namedtuples of the values + and which is generally faster than ``iterrows``. + + 2. You should **never modify** something you are iterating over. + This is not guaranteed to work in all cases. Depending on the + data types, the iterator returns a copy and not a view, and writing + to it will have no effect. + """ + columns = self.columns + klass = self._constructor_sliced + for k, v in zip(self.index, self.values): + s = klass(v, index=columns, name=k) + yield k, s + + def itertuples( + self, index: bool = True, name: str | None = "Pandas" + ) -> Iterable[tuple[Any, ...]]: + """ + Iterate over DataFrame rows as namedtuples. + + Parameters + ---------- + index : bool, default True + If True, return the index as the first element of the tuple. + name : str or None, default "Pandas" + The name of the returned namedtuples or None to return regular + tuples. + + Returns + ------- + iterator + An object to iterate over namedtuples for each row in the + DataFrame with the first field possibly being the index and + following fields being the column values. + + See Also + -------- + DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) + pairs. + DataFrame.items : Iterate over (column name, Series) pairs. + + Notes + ----- + The column names will be renamed to positional names if they are + invalid Python identifiers, repeated, or start with an underscore. + On python versions < 3.7 regular tuples are returned for DataFrames + with a large number of columns (>254). + + Examples + -------- + >>> df = pd.DataFrame({'num_legs': [4, 2], 'num_wings': [0, 2]}, + ... index=['dog', 'hawk']) + >>> df + num_legs num_wings + dog 4 0 + hawk 2 2 + >>> for row in df.itertuples(): + ... print(row) + ... + Pandas(Index='dog', num_legs=4, num_wings=0) + Pandas(Index='hawk', num_legs=2, num_wings=2) + + By setting the `index` parameter to False we can remove the index + as the first element of the tuple: + + >>> for row in df.itertuples(index=False): + ... print(row) + ... + Pandas(num_legs=4, num_wings=0) + Pandas(num_legs=2, num_wings=2) + + With the `name` parameter set we set a custom name for the yielded + namedtuples: + + >>> for row in df.itertuples(name='Animal'): + ... print(row) + ... + Animal(Index='dog', num_legs=4, num_wings=0) + Animal(Index='hawk', num_legs=2, num_wings=2) + """ + arrays = [] + fields = list(self.columns) + if index: + arrays.append(self.index) + fields.insert(0, "Index") + + # use integer indexing because of possible duplicate column names + arrays.extend(self.iloc[:, k] for k in range(len(self.columns))) + + if name is not None: + # https://github.com/python/mypy/issues/9046 + # error: namedtuple() expects a string literal as the first argument + itertuple = collections.namedtuple( # type: ignore[misc] + name, fields, rename=True + ) + return map(itertuple._make, zip(*arrays)) + + # fallback to regular tuples + return zip(*arrays) + + def __len__(self) -> int: + """ + Returns length of info axis, but here we use the index. + """ + return len(self.index) + + @overload + def dot(self, other: Series) -> Series: + ... + + @overload + def dot(self, other: DataFrame | Index | ArrayLike) -> DataFrame: + ... + + def dot(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series: + """ + Compute the matrix multiplication between the DataFrame and other. + + This method computes the matrix product between the DataFrame and the + values of an other Series, DataFrame or a numpy array. + + It can also be called using ``self @ other`` in Python >= 3.5. + + Parameters + ---------- + other : Series, DataFrame or array-like + The other object to compute the matrix product with. + + Returns + ------- + Series or DataFrame + If other is a Series, return the matrix product between self and + other as a Series. If other is a DataFrame or a numpy.array, return + the matrix product of self and other in a DataFrame of a np.array. + + See Also + -------- + Series.dot: Similar method for Series. + + Notes + ----- + The dimensions of DataFrame and other must be compatible in order to + compute the matrix multiplication. In addition, the column names of + DataFrame and the index of other must contain the same values, as they + will be aligned prior to the multiplication. + + The dot method for Series computes the inner product, instead of the + matrix product here. + + Examples + -------- + Here we multiply a DataFrame with a Series. + + >>> df = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + >>> s = pd.Series([1, 1, 2, 1]) + >>> df.dot(s) + 0 -4 + 1 5 + dtype: int64 + + Here we multiply a DataFrame with another DataFrame. + + >>> other = pd.DataFrame([[0, 1], [1, 2], [-1, -1], [2, 0]]) + >>> df.dot(other) + 0 1 + 0 1 4 + 1 2 2 + + Note that the dot method give the same result as @ + + >>> df @ other + 0 1 + 0 1 4 + 1 2 2 + + The dot method works also if other is an np.array. + + >>> arr = np.array([[0, 1], [1, 2], [-1, -1], [2, 0]]) + >>> df.dot(arr) + 0 1 + 0 1 4 + 1 2 2 + + Note how shuffling of the objects does not change the result. + + >>> s2 = s.reindex([1, 0, 2, 3]) + >>> df.dot(s2) + 0 -4 + 1 5 + dtype: int64 + """ + if isinstance(other, (Series, DataFrame)): + common = self.columns.union(other.index) + if len(common) > len(self.columns) or len(common) > len(other.index): + raise ValueError("matrices are not aligned") + + left = self.reindex(columns=common, copy=False) + right = other.reindex(index=common, copy=False) + lvals = left.values + rvals = right._values + else: + left = self + lvals = self.values + rvals = np.asarray(other) + if lvals.shape[1] != rvals.shape[0]: + raise ValueError( + f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}" + ) + + if isinstance(other, DataFrame): + return self._constructor( + np.dot(lvals, rvals), index=left.index, columns=other.columns + ) + elif isinstance(other, Series): + return self._constructor_sliced(np.dot(lvals, rvals), index=left.index) + elif isinstance(rvals, (np.ndarray, Index)): + result = np.dot(lvals, rvals) + if result.ndim == 2: + return self._constructor(result, index=left.index) + else: + return self._constructor_sliced(result, index=left.index) + else: # pragma: no cover + raise TypeError(f"unsupported type: {type(other)}") + + @overload + def __matmul__(self, other: Series) -> Series: + ... + + @overload + def __matmul__( + self, other: AnyArrayLike | DataFrame | Series + ) -> DataFrame | Series: + ... + + def __matmul__( + self, other: AnyArrayLike | DataFrame | Series + ) -> DataFrame | Series: + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ + return self.dot(other) + + def __rmatmul__(self, other): + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ + try: + return self.T.dot(np.transpose(other)).T + except ValueError as err: + if "shape mismatch" not in str(err): + raise + # GH#21581 give exception message for original shapes + msg = f"shapes {np.shape(other)} and {self.shape} not aligned" + raise ValueError(msg) from err + + # ---------------------------------------------------------------------- + # IO methods (to / from other formats) + + @classmethod + def from_dict( + cls, + data, + orient: str = "columns", + dtype: Dtype | None = None, + columns=None, + ) -> DataFrame: + """ + Construct DataFrame from dict of array-like or dicts. + + Creates DataFrame object from dictionary by columns or by index + allowing dtype specification. + + Parameters + ---------- + data : dict + Of the form {field : array-like} or {field : dict}. + orient : {'columns', 'index', 'tight'}, default 'columns' + The "orientation" of the data. If the keys of the passed dict + should be the columns of the resulting DataFrame, pass 'columns' + (default). Otherwise if the keys should be rows, pass 'index'. + If 'tight', assume a dict with keys ['index', 'columns', 'data', + 'index_names', 'column_names']. + + .. versionadded:: 1.4.0 + 'tight' as an allowed value for the ``orient`` argument + + dtype : dtype, default None + Data type to force, otherwise infer. + columns : list, default None + Column labels to use when ``orient='index'``. Raises a ValueError + if used with ``orient='columns'`` or ``orient='tight'``. + + Returns + ------- + DataFrame + + See Also + -------- + DataFrame.from_records : DataFrame from structured ndarray, sequence + of tuples or dicts, or DataFrame. + DataFrame : DataFrame object creation using constructor. + DataFrame.to_dict : Convert the DataFrame to a dictionary. + + Examples + -------- + By default the keys of the dict become the DataFrame columns: + + >>> data = {'col_1': [3, 2, 1, 0], 'col_2': ['a', 'b', 'c', 'd']} + >>> pd.DataFrame.from_dict(data) + col_1 col_2 + 0 3 a + 1 2 b + 2 1 c + 3 0 d + + Specify ``orient='index'`` to create the DataFrame using dictionary + keys as rows: + + >>> data = {'row_1': [3, 2, 1, 0], 'row_2': ['a', 'b', 'c', 'd']} + >>> pd.DataFrame.from_dict(data, orient='index') + 0 1 2 3 + row_1 3 2 1 0 + row_2 a b c d + + When using the 'index' orientation, the column names can be + specified manually: + + >>> pd.DataFrame.from_dict(data, orient='index', + ... columns=['A', 'B', 'C', 'D']) + A B C D + row_1 3 2 1 0 + row_2 a b c d + + Specify ``orient='tight'`` to create the DataFrame using a 'tight' + format: + + >>> data = {'index': [('a', 'b'), ('a', 'c')], + ... 'columns': [('x', 1), ('y', 2)], + ... 'data': [[1, 3], [2, 4]], + ... 'index_names': ['n1', 'n2'], + ... 'column_names': ['z1', 'z2']} + >>> pd.DataFrame.from_dict(data, orient='tight') + z1 x y + z2 1 2 + n1 n2 + a b 1 3 + c 2 4 + """ + index = None + orient = orient.lower() + if orient == "index": + if len(data) > 0: + # TODO speed up Series case + if isinstance(list(data.values())[0], (Series, dict)): + data = _from_nested_dict(data) + else: + data, index = list(data.values()), list(data.keys()) + elif orient == "columns" or orient == "tight": + if columns is not None: + raise ValueError(f"cannot use columns parameter with orient='{orient}'") + else: # pragma: no cover + raise ValueError("only recognize index or columns for orient") + + if orient != "tight": + return cls(data, index=index, columns=columns, dtype=dtype) + else: + realdata = data["data"] + + def create_index(indexlist, namelist): + index: Index + if len(namelist) > 1: + index = MultiIndex.from_tuples(indexlist, names=namelist) + else: + index = Index(indexlist, name=namelist[0]) + return index + + index = create_index(data["index"], data["index_names"]) + columns = create_index(data["columns"], data["column_names"]) + return cls(realdata, index=index, columns=columns, dtype=dtype) + + def to_numpy( + self, + dtype: npt.DTypeLike | None = None, + copy: bool = False, + na_value=lib.no_default, + ) -> np.ndarray: + """ + Convert the DataFrame to a NumPy array. + + By default, the dtype of the returned array will be the common NumPy + dtype of all types in the DataFrame. For example, if the dtypes are + ``float16`` and ``float32``, the results dtype will be ``float32``. + This may require copying data and coercing values, which may be + expensive. + + Parameters + ---------- + dtype : str or numpy.dtype, optional + The dtype to pass to :meth:`numpy.asarray`. + copy : bool, default False + Whether to ensure that the returned value is not a view on + another array. Note that ``copy=False`` does not *ensure* that + ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that + a copy is made, even if not strictly necessary. + na_value : Any, optional + The value to use for missing values. The default value depends + on `dtype` and the dtypes of the DataFrame columns. + + .. versionadded:: 1.1.0 + + Returns + ------- + numpy.ndarray + + See Also + -------- + Series.to_numpy : Similar method for Series. + + Examples + -------- + >>> pd.DataFrame({"A": [1, 2], "B": [3, 4]}).to_numpy() + array([[1, 3], + [2, 4]]) + + With heterogeneous data, the lowest common type will have to + be used. + + >>> df = pd.DataFrame({"A": [1, 2], "B": [3.0, 4.5]}) + >>> df.to_numpy() + array([[1. , 3. ], + [2. , 4.5]]) + + For a mix of numeric and non-numeric types, the output array will + have object dtype. + + >>> df['C'] = pd.date_range('2000', periods=2) + >>> df.to_numpy() + array([[1, 3.0, Timestamp('2000-01-01 00:00:00')], + [2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object) + """ + self._consolidate_inplace() + if dtype is not None: + dtype = np.dtype(dtype) + result = self._mgr.as_array(dtype=dtype, copy=copy, na_value=na_value) + if result.dtype is not dtype: + result = np.array(result, dtype=dtype, copy=False) + + return result + + def to_dict(self, orient: str = "dict", into=dict): + """ + Convert the DataFrame to a dictionary. + + The type of the key-value pairs can be customized with the parameters + (see below). + + Parameters + ---------- + orient : str {'dict', 'list', 'series', 'split', 'records', 'index'} + Determines the type of the values of the dictionary. + + - 'dict' (default) : dict like {column -> {index -> value}} + - 'list' : dict like {column -> [values]} + - 'series' : dict like {column -> Series(values)} + - 'split' : dict like + {'index' -> [index], 'columns' -> [columns], 'data' -> [values]} + - 'tight' : dict like + {'index' -> [index], 'columns' -> [columns], 'data' -> [values], + 'index_names' -> [index.names], 'column_names' -> [column.names]} + - 'records' : list like + [{column -> value}, ... , {column -> value}] + - 'index' : dict like {index -> {column -> value}} + + Abbreviations are allowed. `s` indicates `series` and `sp` + indicates `split`. + + .. versionadded:: 1.4.0 + 'tight' as an allowed value for the ``orient`` argument + + into : class, default dict + The collections.abc.Mapping subclass used for all Mappings + in the return value. Can be the actual class or an empty + instance of the mapping type you want. If you want a + collections.defaultdict, you must pass it initialized. + + Returns + ------- + dict, list or collections.abc.Mapping + Return a collections.abc.Mapping object representing the DataFrame. + The resulting transformation depends on the `orient` parameter. + + See Also + -------- + DataFrame.from_dict: Create a DataFrame from a dictionary. + DataFrame.to_json: Convert a DataFrame to JSON format. + + Examples + -------- + >>> df = pd.DataFrame({'col1': [1, 2], + ... 'col2': [0.5, 0.75]}, + ... index=['row1', 'row2']) + >>> df + col1 col2 + row1 1 0.50 + row2 2 0.75 + >>> df.to_dict() + {'col1': {'row1': 1, 'row2': 2}, 'col2': {'row1': 0.5, 'row2': 0.75}} + + You can specify the return orientation. + + >>> df.to_dict('series') + {'col1': row1 1 + row2 2 + Name: col1, dtype: int64, + 'col2': row1 0.50 + row2 0.75 + Name: col2, dtype: float64} + + >>> df.to_dict('split') + {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'], + 'data': [[1, 0.5], [2, 0.75]]} + + >>> df.to_dict('records') + [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}] + + >>> df.to_dict('index') + {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}} + + >>> df.to_dict('tight') + {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'], + 'data': [[1, 0.5], [2, 0.75]], 'index_names': [None], 'column_names': [None]} + + You can also specify the mapping type. + + >>> from collections import OrderedDict, defaultdict + >>> df.to_dict(into=OrderedDict) + OrderedDict([('col1', OrderedDict([('row1', 1), ('row2', 2)])), + ('col2', OrderedDict([('row1', 0.5), ('row2', 0.75)]))]) + + If you want a `defaultdict`, you need to initialize it: + + >>> dd = defaultdict(list) + >>> df.to_dict('records', into=dd) + [defaultdict(, {'col1': 1, 'col2': 0.5}), + defaultdict(, {'col1': 2, 'col2': 0.75})] + """ + if not self.columns.is_unique: + warnings.warn( + "DataFrame columns are not unique, some columns will be omitted.", + UserWarning, + stacklevel=find_stack_level(), + ) + # GH16122 + into_c = com.standardize_mapping(into) + + orient = orient.lower() + # GH32515 + if orient.startswith(("d", "l", "s", "r", "i")) and orient not in { + "dict", + "list", + "series", + "split", + "records", + "index", + }: + warnings.warn( + "Using short name for 'orient' is deprecated. Only the " + "options: ('dict', list, 'series', 'split', 'records', 'index') " + "will be used in a future version. Use one of the above " + "to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if orient.startswith("d"): + orient = "dict" + elif orient.startswith("l"): + orient = "list" + elif orient.startswith("sp"): + orient = "split" + elif orient.startswith("s"): + orient = "series" + elif orient.startswith("r"): + orient = "records" + elif orient.startswith("i"): + orient = "index" + + if orient == "dict": + return into_c((k, v.to_dict(into)) for k, v in self.items()) + + elif orient == "list": + return into_c((k, v.tolist()) for k, v in self.items()) + + elif orient == "split": + return into_c( + ( + ("index", self.index.tolist()), + ("columns", self.columns.tolist()), + ( + "data", + [ + list(map(maybe_box_native, t)) + for t in self.itertuples(index=False, name=None) + ], + ), + ) + ) + + elif orient == "tight": + return into_c( + ( + ("index", self.index.tolist()), + ("columns", self.columns.tolist()), + ( + "data", + [ + list(map(maybe_box_native, t)) + for t in self.itertuples(index=False, name=None) + ], + ), + ("index_names", list(self.index.names)), + ("column_names", list(self.columns.names)), + ) + ) + + elif orient == "series": + return into_c((k, v) for k, v in self.items()) + + elif orient == "records": + columns = self.columns.tolist() + rows = ( + dict(zip(columns, row)) + for row in self.itertuples(index=False, name=None) + ) + return [ + into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows + ] + + elif orient == "index": + if not self.index.is_unique: + raise ValueError("DataFrame index must be unique for orient='index'.") + return into_c( + (t[0], dict(zip(self.columns, t[1:]))) + for t in self.itertuples(name=None) + ) + + else: + raise ValueError(f"orient '{orient}' not understood") + + def to_gbq( + self, + destination_table: str, + project_id: str | None = None, + chunksize: int | None = None, + reauth: bool = False, + if_exists: str = "fail", + auth_local_webserver: bool = False, + table_schema: list[dict[str, str]] | None = None, + location: str | None = None, + progress_bar: bool = True, + credentials=None, + ) -> None: + """ + Write a DataFrame to a Google BigQuery table. + + This function requires the `pandas-gbq package + `__. + + See the `How to authenticate with Google BigQuery + `__ + guide for authentication instructions. + + Parameters + ---------- + destination_table : str + Name of table to be written, in the form ``dataset.tablename``. + project_id : str, optional + Google BigQuery Account project ID. Optional when available from + the environment. + chunksize : int, optional + Number of rows to be inserted in each chunk from the dataframe. + Set to ``None`` to load the whole dataframe at once. + reauth : bool, default False + Force Google BigQuery to re-authenticate the user. This is useful + if multiple accounts are used. + if_exists : str, default 'fail' + Behavior when the destination table exists. Value can be one of: + + ``'fail'`` + If table exists raise pandas_gbq.gbq.TableCreationError. + ``'replace'`` + If table exists, drop it, recreate it, and insert data. + ``'append'`` + If table exists, insert data. Create if does not exist. + auth_local_webserver : bool, default False + Use the `local webserver flow`_ instead of the `console flow`_ + when getting user credentials. + + .. _local webserver flow: + https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + .. _console flow: + https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + + *New in version 0.2.0 of pandas-gbq*. + table_schema : list of dicts, optional + List of BigQuery table fields to which according DataFrame + columns conform to, e.g. ``[{'name': 'col1', 'type': + 'STRING'},...]``. If schema is not provided, it will be + generated according to dtypes of DataFrame columns. See + BigQuery API documentation on available names of a field. + + *New in version 0.3.1 of pandas-gbq*. + location : str, optional + Location where the load job should run. See the `BigQuery locations + documentation + `__ for a + list of available locations. The location must match that of the + target dataset. + + *New in version 0.5.0 of pandas-gbq*. + progress_bar : bool, default True + Use the library `tqdm` to show the progress bar for the upload, + chunk by chunk. + + *New in version 0.5.0 of pandas-gbq*. + credentials : google.auth.credentials.Credentials, optional + Credentials for accessing Google APIs. Use this parameter to + override default credentials, such as to use Compute Engine + :class:`google.auth.compute_engine.Credentials` or Service + Account :class:`google.oauth2.service_account.Credentials` + directly. + + *New in version 0.8.0 of pandas-gbq*. + + See Also + -------- + pandas_gbq.to_gbq : This function in the pandas-gbq library. + read_gbq : Read a DataFrame from Google BigQuery. + """ + from pandas.io import gbq + + gbq.to_gbq( + self, + destination_table, + project_id=project_id, + chunksize=chunksize, + reauth=reauth, + if_exists=if_exists, + auth_local_webserver=auth_local_webserver, + table_schema=table_schema, + location=location, + progress_bar=progress_bar, + credentials=credentials, + ) + + @classmethod + def from_records( + cls, + data, + index=None, + exclude=None, + columns=None, + coerce_float: bool = False, + nrows: int | None = None, + ) -> DataFrame: + """ + Convert structured or record ndarray to DataFrame. + + Creates a DataFrame object from a structured ndarray, sequence of + tuples or dicts, or DataFrame. + + Parameters + ---------- + data : structured ndarray, sequence of tuples or dicts, or DataFrame + Structured input data. + index : str, list of fields, array-like + Field of array to use as the index, alternately a specific set of + input labels to use. + exclude : sequence, default None + Columns or fields to exclude. + columns : sequence, default None + Column names to use. If the passed data do not have names + associated with them, this argument provides names for the + columns. Otherwise this argument indicates the order of the columns + in the result (any names not found in the data will become all-NA + columns). + coerce_float : bool, default False + Attempt to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets. + nrows : int, default None + Number of rows to read if data is an iterator. + + Returns + ------- + DataFrame + + See Also + -------- + DataFrame.from_dict : DataFrame from dict of array-like or dicts. + DataFrame : DataFrame object creation using constructor. + + Examples + -------- + Data can be provided as a structured ndarray: + + >>> data = np.array([(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')], + ... dtype=[('col_1', 'i4'), ('col_2', 'U1')]) + >>> pd.DataFrame.from_records(data) + col_1 col_2 + 0 3 a + 1 2 b + 2 1 c + 3 0 d + + Data can be provided as a list of dicts: + + >>> data = [{'col_1': 3, 'col_2': 'a'}, + ... {'col_1': 2, 'col_2': 'b'}, + ... {'col_1': 1, 'col_2': 'c'}, + ... {'col_1': 0, 'col_2': 'd'}] + >>> pd.DataFrame.from_records(data) + col_1 col_2 + 0 3 a + 1 2 b + 2 1 c + 3 0 d + + Data can be provided as a list of tuples with corresponding columns: + + >>> data = [(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')] + >>> pd.DataFrame.from_records(data, columns=['col_1', 'col_2']) + col_1 col_2 + 0 3 a + 1 2 b + 2 1 c + 3 0 d + """ + result_index = None + + # Make a copy of the input columns so we can modify it + if columns is not None: + columns = ensure_index(columns) + + def maybe_reorder( + arrays: list[ArrayLike], arr_columns: Index, columns: Index, index + ) -> tuple[list[ArrayLike], Index, Index | None]: + """ + If our desired 'columns' do not match the data's pre-existing 'arr_columns', + we re-order our arrays. This is like a pre-emptive (cheap) reindex. + """ + if len(arrays): + length = len(arrays[0]) + else: + length = 0 + + result_index = None + if len(arrays) == 0 and index is None and length == 0: + # for backward compat use an object Index instead of RangeIndex + result_index = Index([]) + + arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns, length) + return arrays, arr_columns, result_index + + if is_iterator(data): + if nrows == 0: + return cls() + + try: + first_row = next(data) + except StopIteration: + return cls(index=index, columns=columns) + + dtype = None + if hasattr(first_row, "dtype") and first_row.dtype.names: + dtype = first_row.dtype + + values = [first_row] + + if nrows is None: + values += data + else: + values.extend(itertools.islice(data, nrows - 1)) + + if dtype is not None: + data = np.array(values, dtype=dtype) + else: + data = values + + if isinstance(data, dict): + if columns is None: + columns = arr_columns = ensure_index(sorted(data)) + arrays = [data[k] for k in columns] + else: + arrays = [] + arr_columns_list = [] + for k, v in data.items(): + if k in columns: + arr_columns_list.append(k) + arrays.append(v) + + arr_columns = Index(arr_columns_list) + arrays, arr_columns, result_index = maybe_reorder( + arrays, arr_columns, columns, index + ) + + elif isinstance(data, (np.ndarray, DataFrame)): + arrays, columns = to_arrays(data, columns) + arr_columns = columns + else: + arrays, arr_columns = to_arrays(data, columns) + if coerce_float: + for i, arr in enumerate(arrays): + if arr.dtype == object: + # error: Argument 1 to "maybe_convert_objects" has + # incompatible type "Union[ExtensionArray, ndarray]"; + # expected "ndarray" + arrays[i] = lib.maybe_convert_objects( + arr, # type: ignore[arg-type] + try_float=True, + ) + + arr_columns = ensure_index(arr_columns) + if columns is None: + columns = arr_columns + else: + arrays, arr_columns, result_index = maybe_reorder( + arrays, arr_columns, columns, index + ) + + if exclude is None: + exclude = set() + else: + exclude = set(exclude) + + if index is not None: + if isinstance(index, str) or not hasattr(index, "__iter__"): + i = columns.get_loc(index) + exclude.add(index) + if len(arrays) > 0: + result_index = Index(arrays[i], name=index) + else: + result_index = Index([], name=index) + else: + try: + index_data = [arrays[arr_columns.get_loc(field)] for field in index] + except (KeyError, TypeError): + # raised by get_loc, see GH#29258 + result_index = index + else: + result_index = ensure_index_from_sequences(index_data, names=index) + exclude.update(index) + + if any(exclude): + arr_exclude = [x for x in exclude if x in arr_columns] + to_remove = [arr_columns.get_loc(col) for col in arr_exclude] + arrays = [v for i, v in enumerate(arrays) if i not in to_remove] + + columns = columns.drop(exclude) + + manager = get_option("mode.data_manager") + mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager) + + return cls(mgr) + + def to_records( + self, index=True, column_dtypes=None, index_dtypes=None + ) -> np.recarray: + """ + Convert DataFrame to a NumPy record array. + + Index will be included as the first field of the record array if + requested. + + Parameters + ---------- + index : bool, default True + Include index in resulting record array, stored in 'index' + field or using the index label, if set. + column_dtypes : str, type, dict, default None + If a string or type, the data type to store all columns. If + a dictionary, a mapping of column names and indices (zero-indexed) + to specific data types. + index_dtypes : str, type, dict, default None + If a string or type, the data type to store all index levels. If + a dictionary, a mapping of index level names and indices + (zero-indexed) to specific data types. + + This mapping is applied only if `index=True`. + + Returns + ------- + numpy.recarray + NumPy ndarray with the DataFrame labels as fields and each row + of the DataFrame as entries. + + See Also + -------- + DataFrame.from_records: Convert structured or record ndarray + to DataFrame. + numpy.recarray: An ndarray that allows field access using + attributes, analogous to typed columns in a + spreadsheet. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2], 'B': [0.5, 0.75]}, + ... index=['a', 'b']) + >>> df + A B + a 1 0.50 + b 2 0.75 + >>> df.to_records() + rec.array([('a', 1, 0.5 ), ('b', 2, 0.75)], + dtype=[('index', 'O'), ('A', '>> df.index = df.index.rename("I") + >>> df.to_records() + rec.array([('a', 1, 0.5 ), ('b', 2, 0.75)], + dtype=[('I', 'O'), ('A', '>> df.to_records(index=False) + rec.array([(1, 0.5 ), (2, 0.75)], + dtype=[('A', '>> df.to_records(column_dtypes={"A": "int32"}) + rec.array([('a', 1, 0.5 ), ('b', 2, 0.75)], + dtype=[('I', 'O'), ('A', '>> df.to_records(index_dtypes=">> index_dtypes = f">> df.to_records(index_dtypes=index_dtypes) + rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)], + dtype=[('I', 'S1'), ('A', ' DataFrame: + """ + Create DataFrame from a list of arrays corresponding to the columns. + + Parameters + ---------- + arrays : list-like of arrays + Each array in the list corresponds to one column, in order. + columns : list-like, Index + The column names for the resulting DataFrame. + index : list-like, Index + The rows labels for the resulting DataFrame. + dtype : dtype, optional + Optional dtype to enforce for all arrays. + verify_integrity : bool, default True + Validate and homogenize all input. If set to False, it is assumed + that all elements of `arrays` are actual arrays how they will be + stored in a block (numpy ndarray or ExtensionArray), have the same + length as and are aligned with the index, and that `columns` and + `index` are ensured to be an Index object. + + Returns + ------- + DataFrame + """ + if dtype is not None: + dtype = pandas_dtype(dtype) + + manager = get_option("mode.data_manager") + columns = ensure_index(columns) + if len(columns) != len(arrays): + raise ValueError("len(columns) must match len(arrays)") + mgr = arrays_to_mgr( + arrays, + columns, + index, + dtype=dtype, + verify_integrity=verify_integrity, + typ=manager, + ) + return cls(mgr) + + @doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "path", + ) + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") + def to_stata( + self, + path: FilePath | WriteBuffer[bytes], + convert_dates: dict[Hashable, str] | None = None, + write_index: bool = True, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, + version: int | None = 114, + convert_strl: Sequence[Hashable] | None = None, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, + *, + value_labels: dict[Hashable, dict[float | int, str]] | None = None, + ) -> None: + """ + Export DataFrame object to Stata dta format. + + Writes the DataFrame to a Stata dataset file. + "dta" files contain a Stata dataset. + + Parameters + ---------- + path : str, path object, or buffer + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``write()`` function. + + .. versionchanged:: 1.0.0 + + Previously this was "fname" + + convert_dates : dict + Dictionary mapping columns containing datetime types to stata + internal format to use when writing the dates. Options are 'tc', + 'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either an integer + or a name. Datetime columns that do not have a conversion type + specified will be converted to 'tc'. Raises NotImplementedError if + a datetime column has timezone information. + write_index : bool + Write the index to Stata dataset. + byteorder : str + Can be ">", "<", "little", or "big". default is `sys.byteorder`. + time_stamp : datetime + A datetime to use as file creation date. Default is the current + time. + data_label : str, optional + A label for the data set. Must be 80 characters or smaller. + variable_labels : dict + Dictionary containing columns as keys and variable labels as + values. Each label must be 80 characters or smaller. + version : {{114, 117, 118, 119, None}}, default 114 + Version to use in the output dta file. Set to None to let pandas + decide between 118 or 119 formats depending on the number of + columns in the frame. Version 114 can be read by Stata 10 and + later. Version 117 can be read by Stata 13 or later. Version 118 + is supported in Stata 14 and later. Version 119 is supported in + Stata 15 and later. Version 114 limits string variables to 244 + characters or fewer while versions 117 and later allow strings + with lengths up to 2,000,000 characters. Versions 118 and 119 + support Unicode characters, and version 119 supports more than + 32,767 variables. + + Version 119 should usually only be used when the number of + variables exceeds the capacity of dta format 118. Exporting + smaller datasets in format 119 may have unintended consequences, + and, as of November 2020, Stata SE cannot read version 119 files. + + .. versionchanged:: 1.0.0 + + Added support for formats 118 and 119. + + convert_strl : list, optional + List of column names to convert to string columns to Stata StrL + format. Only available if version is 117. Storing strings in the + StrL format can produce smaller dta files if strings have more than + 8 characters and values are repeated. + {compression_options} + + .. versionadded:: 1.1.0 + + .. versionchanged:: 1.4.0 Zstandard support. + + {storage_options} + + .. versionadded:: 1.2.0 + + value_labels : dict of dicts + Dictionary containing columns as keys and dictionaries of column value + to labels as values. Labels for a single variable must be 32,000 + characters or smaller. + + .. versionadded:: 1.4.0 + + Raises + ------ + NotImplementedError + * If datetimes contain timezone information + * Column dtype is not representable in Stata + ValueError + * Columns listed in convert_dates are neither datetime64[ns] + or datetime.datetime + * Column listed in convert_dates is not in DataFrame + * Categorical label contains more than 32,000 characters + + See Also + -------- + read_stata : Import Stata data files. + io.stata.StataWriter : Low-level writer for Stata data files. + io.stata.StataWriter117 : Low-level writer for version 117 files. + + Examples + -------- + >>> df = pd.DataFrame({{'animal': ['falcon', 'parrot', 'falcon', + ... 'parrot'], + ... 'speed': [350, 18, 361, 15]}}) + >>> df.to_stata('animals.dta') # doctest: +SKIP + """ + if version not in (114, 117, 118, 119, None): + raise ValueError("Only formats 114, 117, 118 and 119 are supported.") + if version == 114: + if convert_strl is not None: + raise ValueError("strl is not supported in format 114") + from pandas.io.stata import StataWriter as statawriter + elif version == 117: + # mypy: Name 'statawriter' already defined (possibly by an import) + from pandas.io.stata import ( # type: ignore[no-redef] + StataWriter117 as statawriter, + ) + else: # versions 118 and 119 + # mypy: Name 'statawriter' already defined (possibly by an import) + from pandas.io.stata import ( # type: ignore[no-redef] + StataWriterUTF8 as statawriter, + ) + + kwargs: dict[str, Any] = {} + if version is None or version >= 117: + # strl conversion is only supported >= 117 + kwargs["convert_strl"] = convert_strl + if version is None or version >= 118: + # Specifying the version is only supported for UTF8 (118 or 119) + kwargs["version"] = version + + writer = statawriter( + path, + self, + convert_dates=convert_dates, + byteorder=byteorder, + time_stamp=time_stamp, + data_label=data_label, + write_index=write_index, + variable_labels=variable_labels, + compression=compression, + storage_options=storage_options, + value_labels=value_labels, + **kwargs, + ) + writer.write_file() + + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") + def to_feather(self, path: FilePath | WriteBuffer[bytes], **kwargs) -> None: + """ + Write a DataFrame to the binary Feather format. + + Parameters + ---------- + path : str, path object, file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``write()`` function. If a string or a path, + it will be used as Root Directory path when writing a partitioned dataset. + **kwargs : + Additional keywords passed to :func:`pyarrow.feather.write_feather`. + Starting with pyarrow 0.17, this includes the `compression`, + `compression_level`, `chunksize` and `version` keywords. + + .. versionadded:: 1.1.0 + + Notes + ----- + This function writes the dataframe as a `feather file + `_. Requires a default + index. For saving the DataFrame with your custom index use a method that + supports custom indices e.g. `to_parquet`. + """ + from pandas.io.feather_format import to_feather + + to_feather(self, path, **kwargs) + + @doc( + Series.to_markdown, + klass=_shared_doc_kwargs["klass"], + storage_options=_shared_docs["storage_options"], + examples="""Examples + -------- + >>> df = pd.DataFrame( + ... data={"animal_1": ["elk", "pig"], "animal_2": ["dog", "quetzal"]} + ... ) + >>> print(df.to_markdown()) + | | animal_1 | animal_2 | + |---:|:-----------|:-----------| + | 0 | elk | dog | + | 1 | pig | quetzal | + + Output markdown with a tabulate option. + + >>> print(df.to_markdown(tablefmt="grid")) + +----+------------+------------+ + | | animal_1 | animal_2 | + +====+============+============+ + | 0 | elk | dog | + +----+------------+------------+ + | 1 | pig | quetzal | + +----+------------+------------+""", + ) + def to_markdown( + self, + buf: IO[str] | str | None = None, + mode: str = "wt", + index: bool = True, + storage_options: StorageOptions = None, + **kwargs, + ) -> str | None: + if "showindex" in kwargs: + warnings.warn( + "'showindex' is deprecated. Only 'index' will be used " + "in a future version. Use 'index' to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + kwargs.setdefault("headers", "keys") + kwargs.setdefault("tablefmt", "pipe") + kwargs.setdefault("showindex", index) + tabulate = import_optional_dependency("tabulate") + result = tabulate.tabulate(self, **kwargs) + if buf is None: + return result + + with get_handle(buf, mode, storage_options=storage_options) as handles: + handles.handle.write(result) + return None + + @doc(storage_options=_shared_docs["storage_options"]) + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") + def to_parquet( + self, + path: FilePath | WriteBuffer[bytes] | None = None, + engine: str = "auto", + compression: str | None = "snappy", + index: bool | None = None, + partition_cols: list[str] | None = None, + storage_options: StorageOptions = None, + **kwargs, + ) -> bytes | None: + """ + Write a DataFrame to the binary parquet format. + + This function writes the dataframe as a `parquet file + `_. You can choose different parquet + backends, and have the option of compression. See + :ref:`the user guide ` for more details. + + Parameters + ---------- + path : str, path object, file-like object, or None, default None + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``write()`` function. If None, the result is + returned as bytes. If a string or path, it will be used as Root Directory + path when writing a partitioned dataset. + + .. versionchanged:: 1.2.0 + + Previously this was "fname" + + engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto' + Parquet library to use. If 'auto', then the option + ``io.parquet.engine`` is used. The default ``io.parquet.engine`` + behavior is to try 'pyarrow', falling back to 'fastparquet' if + 'pyarrow' is unavailable. + compression : {{'snappy', 'gzip', 'brotli', None}}, default 'snappy' + Name of the compression to use. Use ``None`` for no compression. + index : bool, default None + If ``True``, include the dataframe's index(es) in the file output. + If ``False``, they will not be written to the file. + If ``None``, similar to ``True`` the dataframe's index(es) + will be saved. However, instead of being saved as values, + the RangeIndex will be stored as a range in the metadata so it + doesn't require much space and is faster. Other indexes will + be included as columns in the file output. + partition_cols : list, optional, default None + Column names by which to partition the dataset. + Columns are partitioned in the order they are given. + Must be None if path is not a string. + {storage_options} + + .. versionadded:: 1.2.0 + + **kwargs + Additional arguments passed to the parquet library. See + :ref:`pandas io ` for more details. + + Returns + ------- + bytes if no path argument is provided else None + + See Also + -------- + read_parquet : Read a parquet file. + DataFrame.to_csv : Write a csv file. + DataFrame.to_sql : Write to a sql table. + DataFrame.to_hdf : Write to hdf. + + Notes + ----- + This function requires either the `fastparquet + `_ or `pyarrow + `_ library. + + Examples + -------- + >>> df = pd.DataFrame(data={{'col1': [1, 2], 'col2': [3, 4]}}) + >>> df.to_parquet('df.parquet.gzip', + ... compression='gzip') # doctest: +SKIP + >>> pd.read_parquet('df.parquet.gzip') # doctest: +SKIP + col1 col2 + 0 1 3 + 1 2 4 + + If you want to get a buffer to the parquet content you can use a io.BytesIO + object, as long as you don't use partition_cols, which creates multiple files. + + >>> import io + >>> f = io.BytesIO() + >>> df.to_parquet(f) + >>> f.seek(0) + 0 + >>> content = f.read() + """ + from pandas.io.parquet import to_parquet + + return to_parquet( + self, + path, + engine, + compression=compression, + index=index, + partition_cols=partition_cols, + storage_options=storage_options, + **kwargs, + ) + + @Substitution( + header_type="bool", + header="Whether to print column labels, default True", + col_space_type="str or int, list or dict of int or str", + col_space="The minimum width of each column in CSS length " + "units. An int is assumed to be px units.\n\n" + " .. versionadded:: 0.25.0\n" + " Ability to use str", + ) + @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) + def to_html( + self, + buf: FilePath | WriteBuffer[str] | None = None, + columns: Sequence[str] | None = None, + col_space: ColspaceArgType | None = None, + header: bool | Sequence[str] = True, + index: bool = True, + na_rep: str = "NaN", + formatters: FormattersType | None = None, + float_format: FloatFormatType | None = None, + sparsify: bool | None = None, + index_names: bool = True, + justify: str | None = None, + max_rows: int | None = None, + max_cols: int | None = None, + show_dimensions: bool | str = False, + decimal: str = ".", + bold_rows: bool = True, + classes: str | list | tuple | None = None, + escape: bool = True, + notebook: bool = False, + border: int | None = None, + table_id: str | None = None, + render_links: bool = False, + encoding: str | None = None, + ): + """ + Render a DataFrame as an HTML table. + %(shared_params)s + bold_rows : bool, default True + Make the row labels bold in the output. + classes : str or list or tuple, default None + CSS class(es) to apply to the resulting html table. + escape : bool, default True + Convert the characters <, >, and & to HTML-safe sequences. + notebook : {True, False}, default False + Whether the generated HTML is for IPython Notebook. + border : int + A ``border=border`` attribute is included in the opening + `` tag. Default ``pd.options.display.html.border``. + table_id : str, optional + A css id is included in the opening `
` tag if specified. + render_links : bool, default False + Convert URLs to HTML links. + encoding : str, default "utf-8" + Set character encoding. + + .. versionadded:: 1.0 + %(returns)s + See Also + -------- + to_string : Convert DataFrame to a string. + """ + if justify is not None and justify not in fmt._VALID_JUSTIFY_PARAMETERS: + raise ValueError("Invalid value for justify parameter") + + formatter = fmt.DataFrameFormatter( + self, + columns=columns, + col_space=col_space, + na_rep=na_rep, + header=header, + index=index, + formatters=formatters, + float_format=float_format, + bold_rows=bold_rows, + sparsify=sparsify, + justify=justify, + index_names=index_names, + escape=escape, + decimal=decimal, + max_rows=max_rows, + max_cols=max_cols, + show_dimensions=show_dimensions, + ) + # TODO: a generic formatter wld b in DataFrameFormatter + return fmt.DataFrameRenderer(formatter).to_html( + buf=buf, + classes=classes, + notebook=notebook, + border=border, + encoding=encoding, + table_id=table_id, + render_links=render_links, + ) + + @doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "path_or_buffer", + ) + def to_xml( + self, + path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, + index: bool = True, + root_name: str | None = "data", + row_name: str | None = "row", + na_rep: str | None = None, + attr_cols: list[str] | None = None, + elem_cols: list[str] | None = None, + namespaces: dict[str | None, str] | None = None, + prefix: str | None = None, + encoding: str = "utf-8", + xml_declaration: bool | None = True, + pretty_print: bool | None = True, + parser: str | None = "lxml", + stylesheet: FilePath | ReadBuffer[str] | ReadBuffer[bytes] | None = None, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, + ) -> str | None: + """ + Render a DataFrame to an XML document. + + .. versionadded:: 1.3.0 + + Parameters + ---------- + path_or_buffer : str, path object, file-like object, or None, default None + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a ``write()`` function. If None, the result is returned + as a string. + index : bool, default True + Whether to include index in XML document. + root_name : str, default 'data' + The name of root element in XML document. + row_name : str, default 'row' + The name of row element in XML document. + na_rep : str, optional + Missing data representation. + attr_cols : list-like, optional + List of columns to write as attributes in row element. + Hierarchical columns will be flattened with underscore + delimiting the different levels. + elem_cols : list-like, optional + List of columns to write as children in row element. By default, + all columns output as children of row element. Hierarchical + columns will be flattened with underscore delimiting the + different levels. + namespaces : dict, optional + All namespaces to be defined in root element. Keys of dict + should be prefix names and values of dict corresponding URIs. + Default namespaces should be given empty string key. For + example, :: + + namespaces = {{"": "https://example.com"}} + + prefix : str, optional + Namespace prefix to be used for every element and/or attribute + in document. This should be one of the keys in ``namespaces`` + dict. + encoding : str, default 'utf-8' + Encoding of the resulting document. + xml_declaration : bool, default True + Whether to include the XML declaration at start of document. + pretty_print : bool, default True + Whether output should be pretty printed with indentation and + line breaks. + parser : {{'lxml','etree'}}, default 'lxml' + Parser module to use for building of tree. Only 'lxml' and + 'etree' are supported. With 'lxml', the ability to use XSLT + stylesheet is supported. + stylesheet : str, path object or file-like object, optional + A URL, file-like object, or a raw string containing an XSLT + script used to transform the raw XML output. Script should use + layout of elements and attributes from original output. This + argument requires ``lxml`` to be installed. Only XSLT 1.0 + scripts and not later versions is currently supported. + {compression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + {storage_options} + + Returns + ------- + None or str + If ``io`` is None, returns the resulting XML format as a + string. Otherwise returns None. + + See Also + -------- + to_json : Convert the pandas object to a JSON string. + to_html : Convert DataFrame to a html. + + Examples + -------- + >>> df = pd.DataFrame({{'shape': ['square', 'circle', 'triangle'], + ... 'degrees': [360, 360, 180], + ... 'sides': [4, np.nan, 3]}}) + + >>> df.to_xml() # doctest: +SKIP + + + + 0 + square + 360 + 4.0 + + + 1 + circle + 360 + + + + 2 + triangle + 180 + 3.0 + + + + >>> df.to_xml(attr_cols=[ + ... 'index', 'shape', 'degrees', 'sides' + ... ]) # doctest: +SKIP + + + + + + + + >>> df.to_xml(namespaces={{"doc": "https://example.com"}}, + ... prefix="doc") # doctest: +SKIP + + + + 0 + square + 360 + 4.0 + + + 1 + circle + 360 + + + + 2 + triangle + 180 + 3.0 + + + """ + + from pandas.io.formats.xml import ( + EtreeXMLFormatter, + LxmlXMLFormatter, + ) + + lxml = import_optional_dependency("lxml.etree", errors="ignore") + + TreeBuilder: type[EtreeXMLFormatter] | type[LxmlXMLFormatter] + + if parser == "lxml": + if lxml is not None: + TreeBuilder = LxmlXMLFormatter + else: + raise ImportError( + "lxml not found, please install or use the etree parser." + ) + + elif parser == "etree": + TreeBuilder = EtreeXMLFormatter + + else: + raise ValueError("Values for parser can only be lxml or etree.") + + xml_formatter = TreeBuilder( + self, + path_or_buffer=path_or_buffer, + index=index, + root_name=root_name, + row_name=row_name, + na_rep=na_rep, + attr_cols=attr_cols, + elem_cols=elem_cols, + namespaces=namespaces, + prefix=prefix, + encoding=encoding, + xml_declaration=xml_declaration, + pretty_print=pretty_print, + stylesheet=stylesheet, + compression=compression, + storage_options=storage_options, + ) + + return xml_formatter.write_output() + + # ---------------------------------------------------------------------- + @doc(INFO_DOCSTRING, **frame_sub_kwargs) + def info( + self, + verbose: bool | None = None, + buf: WriteBuffer[str] | None = None, + max_cols: int | None = None, + memory_usage: bool | str | None = None, + show_counts: bool | None = None, + null_counts: bool | None = None, + ) -> None: + if null_counts is not None: + if show_counts is not None: + raise ValueError("null_counts used with show_counts. Use show_counts.") + warnings.warn( + "null_counts is deprecated. Use show_counts instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + show_counts = null_counts + info = DataFrameInfo( + data=self, + memory_usage=memory_usage, + ) + info.render( + buf=buf, + max_cols=max_cols, + verbose=verbose, + show_counts=show_counts, + ) + + def memory_usage(self, index: bool = True, deep: bool = False) -> Series: + """ + Return the memory usage of each column in bytes. + + The memory usage can optionally include the contribution of + the index and elements of `object` dtype. + + This value is displayed in `DataFrame.info` by default. This can be + suppressed by setting ``pandas.options.display.memory_usage`` to False. + + Parameters + ---------- + index : bool, default True + Specifies whether to include the memory usage of the DataFrame's + index in returned Series. If ``index=True``, the memory usage of + the index is the first item in the output. + deep : bool, default False + If True, introspect the data deeply by interrogating + `object` dtypes for system-level memory consumption, and include + it in the returned values. + + Returns + ------- + Series + A Series whose index is the original column names and whose values + is the memory usage of each column in bytes. + + See Also + -------- + numpy.ndarray.nbytes : Total bytes consumed by the elements of an + ndarray. + Series.memory_usage : Bytes consumed by a Series. + Categorical : Memory-efficient array for string values with + many repeated values. + DataFrame.info : Concise summary of a DataFrame. + + Examples + -------- + >>> dtypes = ['int64', 'float64', 'complex128', 'object', 'bool'] + >>> data = dict([(t, np.ones(shape=5000, dtype=int).astype(t)) + ... for t in dtypes]) + >>> df = pd.DataFrame(data) + >>> df.head() + int64 float64 complex128 object bool + 0 1 1.0 1.0+0.0j 1 True + 1 1 1.0 1.0+0.0j 1 True + 2 1 1.0 1.0+0.0j 1 True + 3 1 1.0 1.0+0.0j 1 True + 4 1 1.0 1.0+0.0j 1 True + + >>> df.memory_usage() + Index 128 + int64 40000 + float64 40000 + complex128 80000 + object 40000 + bool 5000 + dtype: int64 + + >>> df.memory_usage(index=False) + int64 40000 + float64 40000 + complex128 80000 + object 40000 + bool 5000 + dtype: int64 + + The memory footprint of `object` dtype columns is ignored by default: + + >>> df.memory_usage(deep=True) + Index 128 + int64 40000 + float64 40000 + complex128 80000 + object 180000 + bool 5000 + dtype: int64 + + Use a Categorical for efficient storage of an object-dtype column with + many repeated values. + + >>> df['object'].astype('category').memory_usage(deep=True) + 5244 + """ + result = self._constructor_sliced( + [c.memory_usage(index=False, deep=deep) for col, c in self.items()], + index=self.columns, + ) + if index: + index_memory_usage = self._constructor_sliced( + self.index.memory_usage(deep=deep), index=["Index"] + ) + result = index_memory_usage._append(result) + return result + + def transpose(self, *args, copy: bool = False) -> DataFrame: + """ + Transpose index and columns. + + Reflect the DataFrame over its main diagonal by writing rows as columns + and vice-versa. The property :attr:`.T` is an accessor to the method + :meth:`transpose`. + + Parameters + ---------- + *args : tuple, optional + Accepted for compatibility with NumPy. + copy : bool, default False + Whether to copy the data after transposing, even for DataFrames + with a single dtype. + + Note that a copy is always required for mixed dtype DataFrames, + or for DataFrames with any extension types. + + Returns + ------- + DataFrame + The transposed DataFrame. + + See Also + -------- + numpy.transpose : Permute the dimensions of a given array. + + Notes + ----- + Transposing a DataFrame with mixed dtypes will result in a homogeneous + DataFrame with the `object` dtype. In such a case, a copy of the data + is always made. + + Examples + -------- + **Square DataFrame with homogeneous dtype** + + >>> d1 = {'col1': [1, 2], 'col2': [3, 4]} + >>> df1 = pd.DataFrame(data=d1) + >>> df1 + col1 col2 + 0 1 3 + 1 2 4 + + >>> df1_transposed = df1.T # or df1.transpose() + >>> df1_transposed + 0 1 + col1 1 2 + col2 3 4 + + When the dtype is homogeneous in the original DataFrame, we get a + transposed DataFrame with the same dtype: + + >>> df1.dtypes + col1 int64 + col2 int64 + dtype: object + >>> df1_transposed.dtypes + 0 int64 + 1 int64 + dtype: object + + **Non-square DataFrame with mixed dtypes** + + >>> d2 = {'name': ['Alice', 'Bob'], + ... 'score': [9.5, 8], + ... 'employed': [False, True], + ... 'kids': [0, 0]} + >>> df2 = pd.DataFrame(data=d2) + >>> df2 + name score employed kids + 0 Alice 9.5 False 0 + 1 Bob 8.0 True 0 + + >>> df2_transposed = df2.T # or df2.transpose() + >>> df2_transposed + 0 1 + name Alice Bob + score 9.5 8.0 + employed False True + kids 0 0 + + When the DataFrame has mixed dtypes, we get a transposed DataFrame with + the `object` dtype: + + >>> df2.dtypes + name object + score float64 + employed bool + kids int64 + dtype: object + >>> df2_transposed.dtypes + 0 object + 1 object + dtype: object + """ + nv.validate_transpose(args, {}) + # construct the args + + dtypes = list(self.dtypes) + + if self._can_fast_transpose: + # Note: tests pass without this, but this improves perf quite a bit. + new_vals = self._values.T + if copy: + new_vals = new_vals.copy() + + result = self._constructor(new_vals, index=self.columns, columns=self.index) + + elif ( + self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]) + ): + # We have EAs with the same dtype. We can preserve that dtype in transpose. + dtype = dtypes[0] + arr_type = dtype.construct_array_type() + values = self.values + + new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values] + result = type(self)._from_arrays( + new_values, index=self.columns, columns=self.index + ) + + else: + new_arr = self.values.T + if copy: + new_arr = new_arr.copy() + result = self._constructor(new_arr, index=self.columns, columns=self.index) + + return result.__finalize__(self, method="transpose") + + @property + def T(self) -> DataFrame: + return self.transpose() + + # ---------------------------------------------------------------------- + # Indexing Methods + + def _ixs(self, i: int, axis: int = 0): + """ + Parameters + ---------- + i : int + axis : int + + Notes + ----- + If slice passed, the resulting data will be a view. + """ + # irow + if axis == 0: + new_values = self._mgr.fast_xs(i) + + # if we are a copy, mark as such + copy = isinstance(new_values, np.ndarray) and new_values.base is None + result = self._constructor_sliced( + new_values, + index=self.columns, + name=self.index[i], + dtype=new_values.dtype, + ) + result._set_is_copy(self, copy=copy) + return result + + # icol + else: + label = self.columns[i] + + col_mgr = self._mgr.iget(i) + result = self._box_col_values(col_mgr, i) + + # this is a cached value, mark it so + result._set_as_cached(label, self) + return result + + def _get_column_array(self, i: int) -> ArrayLike: + """ + Get the values of the i'th column (ndarray or ExtensionArray, as stored + in the Block) + """ + return self._mgr.iget_values(i) + + def _iter_column_arrays(self) -> Iterator[ArrayLike]: + """ + Iterate over the arrays of all columns in order. + This returns the values as stored in the Block (ndarray or ExtensionArray). + """ + for i in range(len(self.columns)): + yield self._get_column_array(i) + + def __getitem__(self, key): + check_deprecated_indexers(key) + key = lib.item_from_zerodim(key) + key = com.apply_if_callable(key, self) + + if is_hashable(key) and not is_iterator(key): + # is_iterator to exclude generator e.g. test_getitem_listlike + # shortcut if the key is in columns + if self.columns.is_unique and key in self.columns: + if isinstance(self.columns, MultiIndex): + return self._getitem_multilevel(key) + return self._get_item_cache(key) + + # Do we have a slicer (on rows)? + indexer = convert_to_index_sliceable(self, key) + if indexer is not None: + if isinstance(indexer, np.ndarray): + indexer = lib.maybe_indices_to_slice( + indexer.astype(np.intp, copy=False), len(self) + ) + if isinstance(indexer, np.ndarray): + # GH#43223 If we can not convert, use take + return self.take(indexer, axis=0) + # either we have a slice or we have a string that can be converted + # to a slice for partial-string date indexing + return self._slice(indexer, axis=0) + + # Do we have a (boolean) DataFrame? + if isinstance(key, DataFrame): + return self.where(key) + + # Do we have a (boolean) 1d indexer? + if com.is_bool_indexer(key): + return self._getitem_bool_array(key) + + # We are left with two options: a single key, and a collection of keys, + # We interpret tuples as collections only for non-MultiIndex + is_single_key = isinstance(key, tuple) or not is_list_like(key) + + if is_single_key: + if self.columns.nlevels > 1: + return self._getitem_multilevel(key) + indexer = self.columns.get_loc(key) + if is_integer(indexer): + indexer = [indexer] + else: + if is_iterator(key): + key = list(key) + indexer = self.columns._get_indexer_strict(key, "columns")[1] + + # take() does not accept boolean indexers + if getattr(indexer, "dtype", None) == bool: + indexer = np.where(indexer)[0] + + data = self._take_with_is_copy(indexer, axis=1) + + if is_single_key: + # What does looking for a single key in a non-unique index return? + # The behavior is inconsistent. It returns a Series, except when + # - the key itself is repeated (test on data.shape, #9519), or + # - we have a MultiIndex on columns (test on self.columns, #21309) + if data.shape[1] == 1 and not isinstance(self.columns, MultiIndex): + # GH#26490 using data[key] can cause RecursionError + return data._get_item_cache(key) + + return data + + def _getitem_bool_array(self, key): + # also raises Exception if object array with NA values + # warning here just in case -- previously __setitem__ was + # reindexing but __getitem__ was not; it seems more reasonable to + # go with the __setitem__ behavior since that is more consistent + # with all other indexing behavior + if isinstance(key, Series) and not key.index.equals(self.index): + warnings.warn( + "Boolean Series key will be reindexed to match DataFrame index.", + UserWarning, + stacklevel=find_stack_level(), + ) + elif len(key) != len(self.index): + raise ValueError( + f"Item wrong length {len(key)} instead of {len(self.index)}." + ) + + # check_bool_indexer will throw exception if Series key cannot + # be reindexed to match DataFrame rows + key = check_bool_indexer(self.index, key) + indexer = key.nonzero()[0] + return self._take_with_is_copy(indexer, axis=0) + + def _getitem_multilevel(self, key): + # self.columns is a MultiIndex + loc = self.columns.get_loc(key) + if isinstance(loc, (slice, np.ndarray)): + new_columns = self.columns[loc] + result_columns = maybe_droplevels(new_columns, key) + if self._is_mixed_type: + result = self.reindex(columns=new_columns) + result.columns = result_columns + else: + new_values = self.values[:, loc] + result = self._constructor( + new_values, index=self.index, columns=result_columns + ) + result = result.__finalize__(self) + + # If there is only one column being returned, and its name is + # either an empty string, or a tuple with an empty string as its + # first element, then treat the empty string as a placeholder + # and return the column as if the user had provided that empty + # string in the key. If the result is a Series, exclude the + # implied empty string from its name. + if len(result.columns) == 1: + top = result.columns[0] + if isinstance(top, tuple): + top = top[0] + if top == "": + result = result[""] + if isinstance(result, Series): + result = self._constructor_sliced( + result, index=self.index, name=key + ) + + result._set_is_copy(self) + return result + else: + # loc is neither a slice nor ndarray, so must be an int + return self._ixs(loc, axis=1) + + def _get_value(self, index, col, takeable: bool = False) -> Scalar: + """ + Quickly retrieve single value at passed column and index. + + Parameters + ---------- + index : row label + col : column label + takeable : interpret the index/col as indexers, default False + + Returns + ------- + scalar + + Notes + ----- + Assumes that both `self.index._index_as_unique` and + `self.columns._index_as_unique`; Caller is responsible for checking. + """ + if takeable: + series = self._ixs(col, axis=1) + return series._values[index] + + series = self._get_item_cache(col) + engine = self.index._engine + + if not isinstance(self.index, MultiIndex): + # CategoricalIndex: Trying to use the engine fastpath may give incorrect + # results if our categories are integers that dont match our codes + # IntervalIndex: IntervalTree has no get_loc + row = self.index.get_loc(index) + return series._values[row] + + # For MultiIndex going through engine effectively restricts us to + # same-length tuples; see test_get_set_value_no_partial_indexing + loc = engine.get_loc(index) + return series._values[loc] + + def __setitem__(self, key, value): + key = com.apply_if_callable(key, self) + + # see if we can slice the rows + indexer = convert_to_index_sliceable(self, key) + if indexer is not None: + # either we have a slice or we have a string that can be converted + # to a slice for partial-string date indexing + return self._setitem_slice(indexer, value) + + if isinstance(key, DataFrame) or getattr(key, "ndim", None) == 2: + self._setitem_frame(key, value) + elif isinstance(key, (Series, np.ndarray, list, Index)): + self._setitem_array(key, value) + elif isinstance(value, DataFrame): + self._set_item_frame_value(key, value) + elif ( + is_list_like(value) + and not self.columns.is_unique + and 1 < len(self.columns.get_indexer_for([key])) == len(value) + ): + # Column to set is duplicated + self._setitem_array([key], value) + else: + # set column + self._set_item(key, value) + + def _setitem_slice(self, key: slice, value): + # NB: we can't just use self.loc[key] = value because that + # operates on labels and we need to operate positional for + # backwards-compat, xref GH#31469 + self._check_setitem_copy() + self.iloc[key] = value + + def _setitem_array(self, key, value): + # also raises Exception if object array with NA values + if com.is_bool_indexer(key): + # bool indexer is indexing along rows + if len(key) != len(self.index): + raise ValueError( + f"Item wrong length {len(key)} instead of {len(self.index)}!" + ) + key = check_bool_indexer(self.index, key) + indexer = key.nonzero()[0] + self._check_setitem_copy() + if isinstance(value, DataFrame): + # GH#39931 reindex since iloc does not align + value = value.reindex(self.index.take(indexer)) + self.iloc[indexer] = value + + else: + # Note: unlike self.iloc[:, indexer] = value, this will + # never try to overwrite values inplace + + if isinstance(value, DataFrame): + check_key_length(self.columns, key, value) + for k1, k2 in zip(key, value.columns): + self[k1] = value[k2] + + elif not is_list_like(value): + for col in key: + self[col] = value + + elif isinstance(value, np.ndarray) and value.ndim == 2: + self._iset_not_inplace(key, value) + + elif np.ndim(value) > 1: + # list of lists + value = DataFrame(value).values + return self._setitem_array(key, value) + + else: + self._iset_not_inplace(key, value) + + def _iset_not_inplace(self, key, value): + # GH#39510 when setting with df[key] = obj with a list-like key and + # list-like value, we iterate over those listlikes and set columns + # one at a time. This is different from dispatching to + # `self.loc[:, key]= value` because loc.__setitem__ may overwrite + # data inplace, whereas this will insert new arrays. + + def igetitem(obj, i: int): + # Note: we catch DataFrame obj before getting here, but + # hypothetically would return obj.iloc[:, i] + if isinstance(obj, np.ndarray): + return obj[..., i] + else: + return obj[i] + + if self.columns.is_unique: + if np.shape(value)[-1] != len(key): + raise ValueError("Columns must be same length as key") + + for i, col in enumerate(key): + self[col] = igetitem(value, i) + + else: + + ilocs = self.columns.get_indexer_non_unique(key)[0] + if (ilocs < 0).any(): + # key entries not in self.columns + raise NotImplementedError + + if np.shape(value)[-1] != len(ilocs): + raise ValueError("Columns must be same length as key") + + assert np.ndim(value) <= 2 + + orig_columns = self.columns + + # Using self.iloc[:, i] = ... may set values inplace, which + # by convention we do not do in __setitem__ + try: + self.columns = Index(range(len(self.columns))) + for i, iloc in enumerate(ilocs): + self[iloc] = igetitem(value, i) + finally: + self.columns = orig_columns + + def _setitem_frame(self, key, value): + # support boolean setting with DataFrame input, e.g. + # df[df > df2] = 0 + if isinstance(key, np.ndarray): + if key.shape != self.shape: + raise ValueError("Array conditional must be same shape as self") + key = self._constructor(key, **self._construct_axes_dict()) + + if key.size and not is_bool_dtype(key.values): + raise TypeError( + "Must pass DataFrame or 2-d ndarray with boolean values only" + ) + + self._check_inplace_setting(value) + self._check_setitem_copy() + self._where(-key, value, inplace=True) + + def _set_item_frame_value(self, key, value: DataFrame) -> None: + self._ensure_valid_index(value) + + # align columns + if key in self.columns: + loc = self.columns.get_loc(key) + cols = self.columns[loc] + len_cols = 1 if is_scalar(cols) else len(cols) + if len_cols != len(value.columns): + raise ValueError("Columns must be same length as key") + + # align right-hand-side columns if self.columns + # is multi-index and self[key] is a sub-frame + if isinstance(self.columns, MultiIndex) and isinstance( + loc, (slice, Series, np.ndarray, Index) + ): + cols = maybe_droplevels(cols, key) + if len(cols) and not cols.equals(value.columns): + value = value.reindex(cols, axis=1) + + # now align rows + arraylike = _reindex_for_setitem(value, self.index) + self._set_item_mgr(key, arraylike) + + def _iset_item_mgr( + self, loc: int | slice | np.ndarray, value, inplace: bool = False + ) -> None: + # when called from _set_item_mgr loc can be anything returned from get_loc + self._mgr.iset(loc, value, inplace=inplace) + self._clear_item_cache() + + def _set_item_mgr(self, key, value: ArrayLike) -> None: + try: + loc = self._info_axis.get_loc(key) + except KeyError: + # This item wasn't present, just insert at end + self._mgr.insert(len(self._info_axis), key, value) + else: + self._iset_item_mgr(loc, value) + + # check if we are modifying a copy + # try to set first as we want an invalid + # value exception to occur first + if len(self): + self._check_setitem_copy() + + def _iset_item(self, loc: int, value) -> None: + arraylike = self._sanitize_column(value) + self._iset_item_mgr(loc, arraylike, inplace=True) + + # check if we are modifying a copy + # try to set first as we want an invalid + # value exception to occur first + if len(self): + self._check_setitem_copy() + + def _set_item(self, key, value) -> None: + """ + Add series to DataFrame in specified column. + + If series is a numpy-array (not a Series/TimeSeries), it must be the + same length as the DataFrames index or an error will be thrown. + + Series/TimeSeries will be conformed to the DataFrames index to + ensure homogeneity. + """ + value = self._sanitize_column(value) + + if ( + key in self.columns + and value.ndim == 1 + and not is_extension_array_dtype(value) + ): + # broadcast across multiple columns if necessary + if not self.columns.is_unique or isinstance(self.columns, MultiIndex): + existing_piece = self[key] + if isinstance(existing_piece, DataFrame): + value = np.tile(value, (len(existing_piece.columns), 1)).T + + self._set_item_mgr(key, value) + + def _set_value( + self, index: IndexLabel, col, value: Scalar, takeable: bool = False + ) -> None: + """ + Put single value at passed column and index. + + Parameters + ---------- + index : Label + row label + col : Label + column label + value : scalar + takeable : bool, default False + Sets whether or not index/col interpreted as indexers + """ + try: + if takeable: + series = self._ixs(col, axis=1) + loc = index + else: + series = self._get_item_cache(col) + loc = self.index.get_loc(index) + + # setitem_inplace will do validation that may raise TypeError + # or ValueError + series._mgr.setitem_inplace(loc, value) + + except (KeyError, TypeError, ValueError): + # set using a non-recursive method & reset the cache + if takeable: + self.iloc[index, col] = value + else: + self.loc[index, col] = value + self._item_cache.pop(col, None) + + def _ensure_valid_index(self, value) -> None: + """ + Ensure that if we don't have an index, that we can create one from the + passed value. + """ + # GH5632, make sure that we are a Series convertible + if not len(self.index) and is_list_like(value) and len(value): + if not isinstance(value, DataFrame): + try: + value = Series(value) + except (ValueError, NotImplementedError, TypeError) as err: + raise ValueError( + "Cannot set a frame with no defined index " + "and a value that cannot be converted to a Series" + ) from err + + # GH31368 preserve name of index + index_copy = value.index.copy() + if self.index.name is not None: + index_copy.name = self.index.name + + self._mgr = self._mgr.reindex_axis(index_copy, axis=1, fill_value=np.nan) + + def _box_col_values(self, values: SingleDataManager, loc: int) -> Series: + """ + Provide boxed values for a column. + """ + # Lookup in columns so that if e.g. a str datetime was passed + # we attach the Timestamp object as the name. + name = self.columns[loc] + klass = self._constructor_sliced + # We get index=self.index bc values is a SingleDataManager + return klass(values, name=name, fastpath=True).__finalize__(self) + + # ---------------------------------------------------------------------- + # Lookup Caching + + def _clear_item_cache(self) -> None: + self._item_cache.clear() + + def _get_item_cache(self, item: Hashable) -> Series: + """Return the cached item, item represents a label indexer.""" + cache = self._item_cache + res = cache.get(item) + if res is None: + # All places that call _get_item_cache have unique columns, + # pending resolution of GH#33047 + + loc = self.columns.get_loc(item) + res = self._ixs(loc, axis=1) + + cache[item] = res + + # for a chain + res._is_copy = self._is_copy + return res + + def _reset_cacher(self) -> None: + # no-op for DataFrame + pass + + def _maybe_cache_changed(self, item, value: Series, inplace: bool) -> None: + """ + The object has called back to us saying maybe it has changed. + """ + loc = self._info_axis.get_loc(item) + arraylike = value._values + self._mgr.iset(loc, arraylike, inplace=inplace) + + # ---------------------------------------------------------------------- + # Unsorted + + def query(self, expr: str, inplace: bool = False, **kwargs): + """ + Query the columns of a DataFrame with a boolean expression. + + Parameters + ---------- + expr : str + The query string to evaluate. + + You can refer to variables + in the environment by prefixing them with an '@' character like + ``@a + b``. + + You can refer to column names that are not valid Python variable names + by surrounding them in backticks. Thus, column names containing spaces + or punctuations (besides underscores) or starting with digits must be + surrounded by backticks. (For example, a column named "Area (cm^2)" would + be referenced as ```Area (cm^2)```). Column names which are Python keywords + (like "list", "for", "import", etc) cannot be used. + + For example, if one of your columns is called ``a a`` and you want + to sum it with ``b``, your query should be ```a a` + b``. + + .. versionadded:: 0.25.0 + Backtick quoting introduced. + + .. versionadded:: 1.0.0 + Expanding functionality of backtick quoting for more than only spaces. + + inplace : bool + Whether the query should modify the data in place or return + a modified copy. + **kwargs + See the documentation for :func:`eval` for complete details + on the keyword arguments accepted by :meth:`DataFrame.query`. + + Returns + ------- + DataFrame or None + DataFrame resulting from the provided query expression or + None if ``inplace=True``. + + See Also + -------- + eval : Evaluate a string describing operations on + DataFrame columns. + DataFrame.eval : Evaluate a string describing operations on + DataFrame columns. + + Notes + ----- + The result of the evaluation of this expression is first passed to + :attr:`DataFrame.loc` and if that fails because of a + multidimensional key (e.g., a DataFrame) then the result will be passed + to :meth:`DataFrame.__getitem__`. + + This method uses the top-level :func:`eval` function to + evaluate the passed query. + + The :meth:`~pandas.DataFrame.query` method uses a slightly + modified Python syntax by default. For example, the ``&`` and ``|`` + (bitwise) operators have the precedence of their boolean cousins, + :keyword:`and` and :keyword:`or`. This *is* syntactically valid Python, + however the semantics are different. + + You can change the semantics of the expression by passing the keyword + argument ``parser='python'``. This enforces the same semantics as + evaluation in Python space. Likewise, you can pass ``engine='python'`` + to evaluate an expression using Python itself as a backend. This is not + recommended as it is inefficient compared to using ``numexpr`` as the + engine. + + The :attr:`DataFrame.index` and + :attr:`DataFrame.columns` attributes of the + :class:`~pandas.DataFrame` instance are placed in the query namespace + by default, which allows you to treat both the index and columns of the + frame as a column in the frame. + The identifier ``index`` is used for the frame index; you can also + use the name of the index to identify it in a query. Please note that + Python keywords may not be used as identifiers. + + For further details and examples see the ``query`` documentation in + :ref:`indexing `. + + *Backtick quoted variables* + + Backtick quoted variables are parsed as literal Python code and + are converted internally to a Python valid identifier. + This can lead to the following problems. + + During parsing a number of disallowed characters inside the backtick + quoted string are replaced by strings that are allowed as a Python identifier. + These characters include all operators in Python, the space character, the + question mark, the exclamation mark, the dollar sign, and the euro sign. + For other characters that fall outside the ASCII range (U+0001..U+007F) + and those that are not further specified in PEP 3131, + the query parser will raise an error. + This excludes whitespace different than the space character, + but also the hashtag (as it is used for comments) and the backtick + itself (backtick can also not be escaped). + + In a special case, quotes that make a pair around a backtick can + confuse the parser. + For example, ```it's` > `that's``` will raise an error, + as it forms a quoted string (``'s > `that'``) with a backtick inside. + + See also the Python documentation about lexical analysis + (https://docs.python.org/3/reference/lexical_analysis.html) + in combination with the source code in :mod:`pandas.core.computation.parsing`. + + Examples + -------- + >>> df = pd.DataFrame({'A': range(1, 6), + ... 'B': range(10, 0, -2), + ... 'C C': range(10, 5, -1)}) + >>> df + A B C C + 0 1 10 10 + 1 2 8 9 + 2 3 6 8 + 3 4 4 7 + 4 5 2 6 + >>> df.query('A > B') + A B C C + 4 5 2 6 + + The previous expression is equivalent to + + >>> df[df.A > df.B] + A B C C + 4 5 2 6 + + For columns with spaces in their name, you can use backtick quoting. + + >>> df.query('B == `C C`') + A B C C + 0 1 10 10 + + The previous expression is equivalent to + + >>> df[df.B == df['C C']] + A B C C + 0 1 10 10 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if not isinstance(expr, str): + msg = f"expr must be a string to be evaluated, {type(expr)} given" + raise ValueError(msg) + kwargs["level"] = kwargs.pop("level", 0) + 1 + kwargs["target"] = None + res = self.eval(expr, **kwargs) + + try: + result = self.loc[res] + except ValueError: + # when res is multi-dimensional loc raises, but this is sometimes a + # valid query + result = self[res] + + if inplace: + self._update_inplace(result) + return None + else: + return result + + def eval(self, expr: str, inplace: bool = False, **kwargs): + """ + Evaluate a string describing operations on DataFrame columns. + + Operates on columns only, not specific rows or elements. This allows + `eval` to run arbitrary code, which can make you vulnerable to code + injection if you pass user input to this function. + + Parameters + ---------- + expr : str + The expression string to evaluate. + inplace : bool, default False + If the expression contains an assignment, whether to perform the + operation inplace and mutate the existing DataFrame. Otherwise, + a new DataFrame is returned. + **kwargs + See the documentation for :func:`eval` for complete details + on the keyword arguments accepted by + :meth:`~pandas.DataFrame.query`. + + Returns + ------- + ndarray, scalar, pandas object, or None + The result of the evaluation or None if ``inplace=True``. + + See Also + -------- + DataFrame.query : Evaluates a boolean expression to query the columns + of a frame. + DataFrame.assign : Can evaluate an expression or function to create new + values for a column. + eval : Evaluate a Python expression as a string using various + backends. + + Notes + ----- + For more details see the API documentation for :func:`~eval`. + For detailed examples see :ref:`enhancing performance with eval + `. + + Examples + -------- + >>> df = pd.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2)}) + >>> df + A B + 0 1 10 + 1 2 8 + 2 3 6 + 3 4 4 + 4 5 2 + >>> df.eval('A + B') + 0 11 + 1 10 + 2 9 + 3 8 + 4 7 + dtype: int64 + + Assignment is allowed though by default the original DataFrame is not + modified. + + >>> df.eval('C = A + B') + A B C + 0 1 10 11 + 1 2 8 10 + 2 3 6 9 + 3 4 4 8 + 4 5 2 7 + >>> df + A B + 0 1 10 + 1 2 8 + 2 3 6 + 3 4 4 + 4 5 2 + + Use ``inplace=True`` to modify the original DataFrame. + + >>> df.eval('C = A + B', inplace=True) + >>> df + A B C + 0 1 10 11 + 1 2 8 10 + 2 3 6 9 + 3 4 4 8 + 4 5 2 7 + + Multiple columns can be assigned to using multi-line expressions: + + >>> df.eval( + ... ''' + ... C = A + B + ... D = A - B + ... ''' + ... ) + A B C D + 0 1 10 11 -9 + 1 2 8 10 -6 + 2 3 6 9 -3 + 3 4 4 8 0 + 4 5 2 7 3 + """ + from pandas.core.computation.eval import eval as _eval + + inplace = validate_bool_kwarg(inplace, "inplace") + kwargs["level"] = kwargs.pop("level", 0) + 1 + index_resolvers = self._get_index_resolvers() + column_resolvers = self._get_cleaned_column_resolvers() + resolvers = column_resolvers, index_resolvers + if "target" not in kwargs: + kwargs["target"] = self + kwargs["resolvers"] = tuple(kwargs.get("resolvers", ())) + resolvers + + return _eval(expr, inplace=inplace, **kwargs) + + def select_dtypes(self, include=None, exclude=None) -> DataFrame: + """ + Return a subset of the DataFrame's columns based on the column dtypes. + + Parameters + ---------- + include, exclude : scalar or list-like + A selection of dtypes or strings to be included/excluded. At least + one of these parameters must be supplied. + + Returns + ------- + DataFrame + The subset of the frame including the dtypes in ``include`` and + excluding the dtypes in ``exclude``. + + Raises + ------ + ValueError + * If both of ``include`` and ``exclude`` are empty + * If ``include`` and ``exclude`` have overlapping elements + * If any kind of string dtype is passed in. + + See Also + -------- + DataFrame.dtypes: Return Series with the data type of each column. + + Notes + ----- + * To select all *numeric* types, use ``np.number`` or ``'number'`` + * To select strings you must use the ``object`` dtype, but note that + this will return *all* object dtype columns + * See the `numpy dtype hierarchy + `__ + * To select datetimes, use ``np.datetime64``, ``'datetime'`` or + ``'datetime64'`` + * To select timedeltas, use ``np.timedelta64``, ``'timedelta'`` or + ``'timedelta64'`` + * To select Pandas categorical dtypes, use ``'category'`` + * To select Pandas datetimetz dtypes, use ``'datetimetz'`` (new in + 0.20.0) or ``'datetime64[ns, tz]'`` + + Examples + -------- + >>> df = pd.DataFrame({'a': [1, 2] * 3, + ... 'b': [True, False] * 3, + ... 'c': [1.0, 2.0] * 3}) + >>> df + a b c + 0 1 True 1.0 + 1 2 False 2.0 + 2 1 True 1.0 + 3 2 False 2.0 + 4 1 True 1.0 + 5 2 False 2.0 + + >>> df.select_dtypes(include='bool') + b + 0 True + 1 False + 2 True + 3 False + 4 True + 5 False + + >>> df.select_dtypes(include=['float64']) + c + 0 1.0 + 1 2.0 + 2 1.0 + 3 2.0 + 4 1.0 + 5 2.0 + + >>> df.select_dtypes(exclude=['int64']) + b c + 0 True 1.0 + 1 False 2.0 + 2 True 1.0 + 3 False 2.0 + 4 True 1.0 + 5 False 2.0 + """ + if not is_list_like(include): + include = (include,) if include is not None else () + if not is_list_like(exclude): + exclude = (exclude,) if exclude is not None else () + + selection = (frozenset(include), frozenset(exclude)) + + if not any(selection): + raise ValueError("at least one of include or exclude must be nonempty") + + # convert the myriad valid dtypes object to a single representation + def check_int_infer_dtype(dtypes): + converted_dtypes: list[type] = [] + for dtype in dtypes: + # Numpy maps int to different types (int32, in64) on Windows and Linux + # see https://github.com/numpy/numpy/issues/9464 + if (isinstance(dtype, str) and dtype == "int") or (dtype is int): + converted_dtypes.append(np.int32) + converted_dtypes.append(np.int64) + elif dtype == "float" or dtype is float: + # GH#42452 : np.dtype("float") coerces to np.float64 from Numpy 1.20 + converted_dtypes.extend([np.float64, np.float32]) + else: + converted_dtypes.append(infer_dtype_from_object(dtype)) + return frozenset(converted_dtypes) + + include = check_int_infer_dtype(include) + exclude = check_int_infer_dtype(exclude) + + for dtypes in (include, exclude): + invalidate_string_dtypes(dtypes) + + # can't both include AND exclude! + if not include.isdisjoint(exclude): + raise ValueError(f"include and exclude overlap on {(include & exclude)}") + + def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool: + return issubclass(dtype.type, tuple(dtypes_set)) or ( + np.number in dtypes_set and getattr(dtype, "_is_numeric", False) + ) + + def predicate(arr: ArrayLike) -> bool: + dtype = arr.dtype + if include: + if not dtype_predicate(dtype, include): + return False + + if exclude: + if dtype_predicate(dtype, exclude): + return False + + return True + + mgr = self._mgr._get_data_subset(predicate) + return type(self)(mgr).__finalize__(self) + + def insert( + self, + loc: int, + column: Hashable, + value: Scalar | AnyArrayLike, + allow_duplicates: bool = False, + ) -> None: + """ + Insert column into DataFrame at specified location. + + Raises a ValueError if `column` is already contained in the DataFrame, + unless `allow_duplicates` is set to True. + + Parameters + ---------- + loc : int + Insertion index. Must verify 0 <= loc <= len(columns). + column : str, number, or hashable object + Label of the inserted column. + value : Scalar, Series, or array-like + allow_duplicates : bool, optional default False + + See Also + -------- + Index.insert : Insert new item by index. + + Examples + -------- + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df + col1 col2 + 0 1 3 + 1 2 4 + >>> df.insert(1, "newcol", [99, 99]) + >>> df + col1 newcol col2 + 0 1 99 3 + 1 2 99 4 + >>> df.insert(0, "col1", [100, 100], allow_duplicates=True) + >>> df + col1 col1 newcol col2 + 0 100 1 99 3 + 1 100 2 99 4 + + Notice that pandas uses index alignment in case of `value` from type `Series`: + + >>> df.insert(0, "col0", pd.Series([5, 6], index=[1, 2])) + >>> df + col0 col1 col1 newcol col2 + 0 NaN 100 1 99 3 + 1 5.0 100 2 99 4 + """ + if allow_duplicates and not self.flags.allows_duplicate_labels: + raise ValueError( + "Cannot specify 'allow_duplicates=True' when " + "'self.flags.allows_duplicate_labels' is False." + ) + if not allow_duplicates and column in self.columns: + # Should this be a different kind of error?? + raise ValueError(f"cannot insert {column}, already exists") + if not isinstance(loc, int): + raise TypeError("loc must be int") + + value = self._sanitize_column(value) + self._mgr.insert(loc, column, value) + + def assign(self, **kwargs) -> DataFrame: + r""" + Assign new columns to a DataFrame. + + Returns a new object with all original columns in addition to new ones. + Existing columns that are re-assigned will be overwritten. + + Parameters + ---------- + **kwargs : dict of {str: callable or Series} + The column names are keywords. If the values are + callable, they are computed on the DataFrame and + assigned to the new columns. The callable must not + change input DataFrame (though pandas doesn't check it). + If the values are not callable, (e.g. a Series, scalar, or array), + they are simply assigned. + + Returns + ------- + DataFrame + A new DataFrame with the new columns in addition to + all the existing columns. + + Notes + ----- + Assigning multiple columns within the same ``assign`` is possible. + Later items in '\*\*kwargs' may refer to newly created or modified + columns in 'df'; items are computed and assigned into 'df' in order. + + Examples + -------- + >>> df = pd.DataFrame({'temp_c': [17.0, 25.0]}, + ... index=['Portland', 'Berkeley']) + >>> df + temp_c + Portland 17.0 + Berkeley 25.0 + + Where the value is a callable, evaluated on `df`: + + >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32) + temp_c temp_f + Portland 17.0 62.6 + Berkeley 25.0 77.0 + + Alternatively, the same behavior can be achieved by directly + referencing an existing Series or sequence: + + >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32) + temp_c temp_f + Portland 17.0 62.6 + Berkeley 25.0 77.0 + + You can create multiple columns within the same assign where one + of the columns depends on another one defined within the same assign: + + >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32, + ... temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9) + temp_c temp_f temp_k + Portland 17.0 62.6 290.15 + Berkeley 25.0 77.0 298.15 + """ + data = self.copy() + + for k, v in kwargs.items(): + data[k] = com.apply_if_callable(v, data) + return data + + def _sanitize_column(self, value) -> ArrayLike: + """ + Ensures new columns (which go into the BlockManager as new blocks) are + always copied and converted into an array. + + Parameters + ---------- + value : scalar, Series, or array-like + + Returns + ------- + numpy.ndarray or ExtensionArray + """ + self._ensure_valid_index(value) + + # We should never get here with DataFrame value + if isinstance(value, Series): + return _reindex_for_setitem(value, self.index) + + if is_list_like(value): + com.require_length_match(value, self.index) + return sanitize_array(value, self.index, copy=True, allow_2d=True) + + @property + def _series(self): + return { + item: Series( + self._mgr.iget(idx), index=self.index, name=item, fastpath=True + ) + for idx, item in enumerate(self.columns) + } + + def lookup( + self, row_labels: Sequence[IndexLabel], col_labels: Sequence[IndexLabel] + ) -> np.ndarray: + """ + Label-based "fancy indexing" function for DataFrame. + Given equal-length arrays of row and column labels, return an + array of the values corresponding to each (row, col) pair. + + .. deprecated:: 1.2.0 + DataFrame.lookup is deprecated, + use DataFrame.melt and DataFrame.loc instead. + For further details see + :ref:`Looking up values by index/column labels `. + + Parameters + ---------- + row_labels : sequence + The row labels to use for lookup. + col_labels : sequence + The column labels to use for lookup. + + Returns + ------- + numpy.ndarray + The found values. + """ + msg = ( + "The 'lookup' method is deprecated and will be " + "removed in a future version. " + "You can use DataFrame.melt and DataFrame.loc " + "as a substitute." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + + n = len(row_labels) + if n != len(col_labels): + raise ValueError("Row labels must have same size as column labels") + if not (self.index.is_unique and self.columns.is_unique): + # GH#33041 + raise ValueError("DataFrame.lookup requires unique index and columns") + + thresh = 1000 + if not self._is_mixed_type or n > thresh: + values = self.values + ridx = self.index.get_indexer(row_labels) + cidx = self.columns.get_indexer(col_labels) + if (ridx == -1).any(): + raise KeyError("One or more row labels was not found") + if (cidx == -1).any(): + raise KeyError("One or more column labels was not found") + flat_index = ridx * len(self.columns) + cidx + result = values.flat[flat_index] + else: + result = np.empty(n, dtype="O") + for i, (r, c) in enumerate(zip(row_labels, col_labels)): + result[i] = self._get_value(r, c) + + if is_object_dtype(result): + result = lib.maybe_convert_objects(result) + + return result + + # ---------------------------------------------------------------------- + # Reindexing and alignment + + def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy): + frame = self + + columns = axes["columns"] + if columns is not None: + frame = frame._reindex_columns( + columns, method, copy, level, fill_value, limit, tolerance + ) + + index = axes["index"] + if index is not None: + frame = frame._reindex_index( + index, method, copy, level, fill_value, limit, tolerance + ) + + return frame + + def _reindex_index( + self, + new_index, + method, + copy: bool, + level: Level, + fill_value=np.nan, + limit=None, + tolerance=None, + ): + new_index, indexer = self.index.reindex( + new_index, method=method, level=level, limit=limit, tolerance=tolerance + ) + return self._reindex_with_indexers( + {0: [new_index, indexer]}, + copy=copy, + fill_value=fill_value, + allow_dups=False, + ) + + def _reindex_columns( + self, + new_columns, + method, + copy: bool, + level: Level, + fill_value=None, + limit=None, + tolerance=None, + ): + new_columns, indexer = self.columns.reindex( + new_columns, method=method, level=level, limit=limit, tolerance=tolerance + ) + return self._reindex_with_indexers( + {1: [new_columns, indexer]}, + copy=copy, + fill_value=fill_value, + allow_dups=False, + ) + + def _reindex_multi( + self, axes: dict[str, Index], copy: bool, fill_value + ) -> DataFrame: + """ + We are guaranteed non-Nones in the axes. + """ + + new_index, row_indexer = self.index.reindex(axes["index"]) + new_columns, col_indexer = self.columns.reindex(axes["columns"]) + + if row_indexer is not None and col_indexer is not None: + # Fastpath. By doing two 'take's at once we avoid making an + # unnecessary copy. + # We only get here with `not self._is_mixed_type`, which (almost) + # ensures that self.values is cheap. It may be worth making this + # condition more specific. + indexer = row_indexer, col_indexer + new_values = take_2d_multi(self.values, indexer, fill_value=fill_value) + return self._constructor(new_values, index=new_index, columns=new_columns) + else: + return self._reindex_with_indexers( + {0: [new_index, row_indexer], 1: [new_columns, col_indexer]}, + copy=copy, + fill_value=fill_value, + ) + + @doc(NDFrame.align, **_shared_doc_kwargs) + def align( + self, + other, + join: str = "outer", + axis: Axis | None = None, + level: Level | None = None, + copy: bool = True, + fill_value=None, + method: str | None = None, + limit=None, + fill_axis: Axis = 0, + broadcast_axis: Axis | None = None, + ) -> DataFrame: + return super().align( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + broadcast_axis=broadcast_axis, + ) + + @overload + def set_axis( + self, labels, axis: Axis = ..., inplace: Literal[False] = ... + ) -> DataFrame: + ... + + @overload + def set_axis(self, labels, axis: Axis, inplace: Literal[True]) -> None: + ... + + @overload + def set_axis(self, labels, *, inplace: Literal[True]) -> None: + ... + + @overload + def set_axis( + self, labels, axis: Axis = ..., inplace: bool = ... + ) -> DataFrame | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + @Appender( + """ + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + Change the row labels. + + >>> df.set_axis(['a', 'b', 'c'], axis='index') + A B + a 1 4 + b 2 5 + c 3 6 + + Change the column labels. + + >>> df.set_axis(['I', 'II'], axis='columns') + I II + 0 1 4 + 1 2 5 + 2 3 6 + + Now, update the labels inplace. + + >>> df.set_axis(['i', 'ii'], axis='columns', inplace=True) + >>> df + i ii + 0 1 4 + 1 2 5 + 2 3 6 + """ + ) + @Substitution( + **_shared_doc_kwargs, + extended_summary_sub=" column or", + axis_description_sub=", and 1 identifies the columns", + see_also_sub=" or columns", + ) + @Appender(NDFrame.set_axis.__doc__) + def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): + return super().set_axis(labels, axis=axis, inplace=inplace) + + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.reindex.__doc__) + @rewrite_axis_style_signature( + "labels", + [ + ("method", None), + ("copy", True), + ("level", None), + ("fill_value", np.nan), + ("limit", None), + ("tolerance", None), + ], + ) + def reindex(self, *args, **kwargs) -> DataFrame: + axes = validate_axis_style_args(self, args, kwargs, "labels", "reindex") + kwargs.update(axes) + # Pop these, since the values are in `kwargs` under different names + kwargs.pop("axis", None) + kwargs.pop("labels", None) + return super().reindex(**kwargs) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + def drop( + self, + labels=None, + axis: Axis = 0, + index=None, + columns=None, + level: Level | None = None, + inplace: bool = False, + errors: str = "raise", + ): + """ + Drop specified labels from rows or columns. + + Remove rows or columns by specifying label names and corresponding + axis, or by specifying directly index or column names. When using a + multi-index, labels on different levels can be removed by specifying + the level. See the `user guide ` + for more information about the now unused levels. + + Parameters + ---------- + labels : single label or list-like + Index or column labels to drop. A tuple will be used as a single + label and not treated as a list-like. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Whether to drop labels from the index (0 or 'index') or + columns (1 or 'columns'). + index : single label or list-like + Alternative to specifying axis (``labels, axis=0`` + is equivalent to ``index=labels``). + columns : single label or list-like + Alternative to specifying axis (``labels, axis=1`` + is equivalent to ``columns=labels``). + level : int or level name, optional + For MultiIndex, level from which the labels will be removed. + inplace : bool, default False + If False, return a copy. Otherwise, do operation + inplace and return None. + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and only existing labels are + dropped. + + Returns + ------- + DataFrame or None + DataFrame without the removed index or column labels or + None if ``inplace=True``. + + Raises + ------ + KeyError + If any of the labels is not found in the selected axis. + + See Also + -------- + DataFrame.loc : Label-location based indexer for selection by label. + DataFrame.dropna : Return DataFrame with labels on given axis omitted + where (all or any) data are missing. + DataFrame.drop_duplicates : Return DataFrame with duplicate rows + removed, optionally only considering certain columns. + Series.drop : Return Series with specified index labels removed. + + Examples + -------- + >>> df = pd.DataFrame(np.arange(12).reshape(3, 4), + ... columns=['A', 'B', 'C', 'D']) + >>> df + A B C D + 0 0 1 2 3 + 1 4 5 6 7 + 2 8 9 10 11 + + Drop columns + + >>> df.drop(['B', 'C'], axis=1) + A D + 0 0 3 + 1 4 7 + 2 8 11 + + >>> df.drop(columns=['B', 'C']) + A D + 0 0 3 + 1 4 7 + 2 8 11 + + Drop a row by index + + >>> df.drop([0, 1]) + A B C D + 2 8 9 10 11 + + Drop columns and/or rows of MultiIndex DataFrame + + >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'], + ... ['speed', 'weight', 'length']], + ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], + ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) + >>> df = pd.DataFrame(index=midx, columns=['big', 'small'], + ... data=[[45, 30], [200, 100], [1.5, 1], [30, 20], + ... [250, 150], [1.5, 0.8], [320, 250], + ... [1, 0.8], [0.3, 0.2]]) + >>> df + big small + lama speed 45.0 30.0 + weight 200.0 100.0 + length 1.5 1.0 + cow speed 30.0 20.0 + weight 250.0 150.0 + length 1.5 0.8 + falcon speed 320.0 250.0 + weight 1.0 0.8 + length 0.3 0.2 + + Drop a specific index combination from the MultiIndex + DataFrame, i.e., drop the combination ``'falcon'`` and + ``'weight'``, which deletes only the corresponding row + + >>> df.drop(index=('falcon', 'weight')) + big small + lama speed 45.0 30.0 + weight 200.0 100.0 + length 1.5 1.0 + cow speed 30.0 20.0 + weight 250.0 150.0 + length 1.5 0.8 + falcon speed 320.0 250.0 + length 0.3 0.2 + + >>> df.drop(index='cow', columns='small') + big + lama speed 45.0 + weight 200.0 + length 1.5 + falcon speed 320.0 + weight 1.0 + length 0.3 + + >>> df.drop(index='length', level=1) + big small + lama speed 45.0 30.0 + weight 200.0 100.0 + cow speed 30.0 20.0 + weight 250.0 150.0 + falcon speed 320.0 250.0 + weight 1.0 0.8 + """ + return super().drop( + labels=labels, + axis=axis, + index=index, + columns=columns, + level=level, + inplace=inplace, + errors=errors, + ) + + def rename( + self, + mapper: Renamer | None = None, + *, + index: Renamer | None = None, + columns: Renamer | None = None, + axis: Axis | None = None, + copy: bool = True, + inplace: bool = False, + level: Level | None = None, + errors: str = "ignore", + ) -> DataFrame | None: + """ + Alter axes labels. + + Function / dict values must be unique (1-to-1). Labels not contained in + a dict / Series will be left as-is. Extra labels listed don't throw an + error. + + See the :ref:`user guide ` for more. + + Parameters + ---------- + mapper : dict-like or function + Dict-like or function transformations to apply to + that axis' values. Use either ``mapper`` and ``axis`` to + specify the axis to target with ``mapper``, or ``index`` and + ``columns``. + index : dict-like or function + Alternative to specifying axis (``mapper, axis=0`` + is equivalent to ``index=mapper``). + columns : dict-like or function + Alternative to specifying axis (``mapper, axis=1`` + is equivalent to ``columns=mapper``). + axis : {0 or 'index', 1 or 'columns'}, default 0 + Axis to target with ``mapper``. Can be either the axis name + ('index', 'columns') or number (0, 1). The default is 'index'. + copy : bool, default True + Also copy underlying data. + inplace : bool, default False + Whether to return a new DataFrame. If True then value of copy is + ignored. + level : int or level name, default None + In case of a MultiIndex, only rename labels in the specified + level. + errors : {'ignore', 'raise'}, default 'ignore' + If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`, + or `columns` contains labels that are not present in the Index + being transformed. + If 'ignore', existing keys will be renamed and extra keys will be + ignored. + + Returns + ------- + DataFrame or None + DataFrame with the renamed axis labels or None if ``inplace=True``. + + Raises + ------ + KeyError + If any of the labels is not found in the selected axis and + "errors='raise'". + + See Also + -------- + DataFrame.rename_axis : Set the name of the axis. + + Examples + -------- + ``DataFrame.rename`` supports two calling conventions + + * ``(index=index_mapper, columns=columns_mapper, ...)`` + * ``(mapper, axis={'index', 'columns'}, ...)`` + + We *highly* recommend using keyword arguments to clarify your + intent. + + Rename columns using a mapping: + + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + >>> df.rename(columns={"A": "a", "B": "c"}) + a c + 0 1 4 + 1 2 5 + 2 3 6 + + Rename index using a mapping: + + >>> df.rename(index={0: "x", 1: "y", 2: "z"}) + A B + x 1 4 + y 2 5 + z 3 6 + + Cast index labels to a different type: + + >>> df.index + RangeIndex(start=0, stop=3, step=1) + >>> df.rename(index=str).index + Index(['0', '1', '2'], dtype='object') + + >>> df.rename(columns={"A": "a", "B": "b", "C": "c"}, errors="raise") + Traceback (most recent call last): + KeyError: ['C'] not found in axis + + Using axis-style parameters: + + >>> df.rename(str.lower, axis='columns') + a b + 0 1 4 + 1 2 5 + 2 3 6 + + >>> df.rename({1: 2, 2: 4}, axis='index') + A B + 0 1 4 + 2 2 5 + 4 3 6 + """ + return super()._rename( + mapper=mapper, + index=index, + columns=columns, + axis=axis, + copy=copy, + inplace=inplace, + level=level, + errors=errors, + ) + + @overload + def fillna( + self, + value=..., + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: Literal[False] = ..., + limit=..., + downcast=..., + ) -> DataFrame: + ... + + @overload + def fillna( + self, + value, + method: FillnaOptions | None, + axis: Axis | None, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + *, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + value, + *, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + *, + method: FillnaOptions | None, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + *, + axis: Axis | None, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + *, + method: FillnaOptions | None, + axis: Axis | None, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + value, + *, + axis: Axis | None, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + value, + method: FillnaOptions | None, + *, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + value=..., + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: bool = ..., + limit=..., + downcast=..., + ) -> DataFrame | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) + @doc(NDFrame.fillna, **_shared_doc_kwargs) + def fillna( + self, + value: object | ArrayLike | None = None, + method: FillnaOptions | None = None, + axis: Axis | None = None, + inplace: bool = False, + limit=None, + downcast=None, + ) -> DataFrame | None: + return super().fillna( + value=value, + method=method, + axis=axis, + inplace=inplace, + limit=limit, + downcast=downcast, + ) + + def pop(self, item: Hashable) -> Series: + """ + Return item and drop from frame. Raise KeyError if not found. + + Parameters + ---------- + item : label + Label of column to be popped. + + Returns + ------- + Series + + Examples + -------- + >>> df = pd.DataFrame([('falcon', 'bird', 389.0), + ... ('parrot', 'bird', 24.0), + ... ('lion', 'mammal', 80.5), + ... ('monkey', 'mammal', np.nan)], + ... columns=('name', 'class', 'max_speed')) + >>> df + name class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN + + >>> df.pop('class') + 0 bird + 1 bird + 2 mammal + 3 mammal + Name: class, dtype: object + + >>> df + name max_speed + 0 falcon 389.0 + 1 parrot 24.0 + 2 lion 80.5 + 3 monkey NaN + """ + return super().pop(item=item) + + @doc(NDFrame.replace, **_shared_doc_kwargs) + def replace( + self, + to_replace=None, + value=lib.no_default, + inplace: bool = False, + limit=None, + regex: bool = False, + method: str | lib.NoDefault = lib.no_default, + ): + return super().replace( + to_replace=to_replace, + value=value, + inplace=inplace, + limit=limit, + regex=regex, + method=method, + ) + + def _replace_columnwise( + self, mapping: dict[Hashable, tuple[Any, Any]], inplace: bool, regex + ): + """ + Dispatch to Series.replace column-wise. + + Parameters + ---------- + mapping : dict + of the form {col: (target, value)} + inplace : bool + regex : bool or same types as `to_replace` in DataFrame.replace + + Returns + ------- + DataFrame or None + """ + # Operate column-wise + res = self if inplace else self.copy() + ax = self.columns + + for i in range(len(ax)): + if ax[i] in mapping: + ser = self.iloc[:, i] + + target, value = mapping[ax[i]] + newobj = ser.replace(target, value, regex=regex) + + res.iloc[:, i] = newobj + + if inplace: + return + return res.__finalize__(self) + + @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) + def shift( + self, + periods=1, + freq: Frequency | None = None, + axis: Axis = 0, + fill_value=lib.no_default, + ) -> DataFrame: + axis = self._get_axis_number(axis) + + ncols = len(self.columns) + if axis == 1 and periods != 0 and fill_value is lib.no_default and ncols > 0: + # We will infer fill_value to match the closest column + + # Use a column that we know is valid for our column's dtype GH#38434 + label = self.columns[0] + + if periods > 0: + result = self.iloc[:, :-periods] + for col in range(min(ncols, abs(periods))): + # TODO(EA2D): doing this in a loop unnecessary with 2D EAs + # Define filler inside loop so we get a copy + filler = self.iloc[:, 0].shift(len(self)) + result.insert(0, label, filler, allow_duplicates=True) + else: + result = self.iloc[:, -periods:] + for col in range(min(ncols, abs(periods))): + # Define filler inside loop so we get a copy + filler = self.iloc[:, -1].shift(len(self)) + result.insert( + len(result.columns), label, filler, allow_duplicates=True + ) + + result.columns = self.columns.copy() + return result + + return super().shift( + periods=periods, freq=freq, axis=axis, fill_value=fill_value + ) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "keys"]) + def set_index( + self, + keys, + drop: bool = True, + append: bool = False, + inplace: bool = False, + verify_integrity: bool = False, + ): + """ + Set the DataFrame index using existing columns. + + Set the DataFrame index (row labels) using one or more existing + columns or arrays (of the correct length). The index can replace the + existing index or expand on it. + + Parameters + ---------- + keys : label or array-like or list of labels/arrays + This parameter can be either a single column key, a single array of + the same length as the calling DataFrame, or a list containing an + arbitrary combination of column keys and arrays. Here, "array" + encompasses :class:`Series`, :class:`Index`, ``np.ndarray``, and + instances of :class:`~collections.abc.Iterator`. + drop : bool, default True + Delete columns to be used as the new index. + append : bool, default False + Whether to append columns to existing index. + inplace : bool, default False + If True, modifies the DataFrame in place (do not create a new object). + verify_integrity : bool, default False + Check the new index for duplicates. Otherwise defer the check until + necessary. Setting to False will improve the performance of this + method. + + Returns + ------- + DataFrame or None + Changed row labels or None if ``inplace=True``. + + See Also + -------- + DataFrame.reset_index : Opposite of set_index. + DataFrame.reindex : Change to new indices or expand indices. + DataFrame.reindex_like : Change to same indices as other DataFrame. + + Examples + -------- + >>> df = pd.DataFrame({'month': [1, 4, 7, 10], + ... 'year': [2012, 2014, 2013, 2014], + ... 'sale': [55, 40, 84, 31]}) + >>> df + month year sale + 0 1 2012 55 + 1 4 2014 40 + 2 7 2013 84 + 3 10 2014 31 + + Set the index to become the 'month' column: + + >>> df.set_index('month') + year sale + month + 1 2012 55 + 4 2014 40 + 7 2013 84 + 10 2014 31 + + Create a MultiIndex using columns 'year' and 'month': + + >>> df.set_index(['year', 'month']) + sale + year month + 2012 1 55 + 2014 4 40 + 2013 7 84 + 2014 10 31 + + Create a MultiIndex using an Index and a column: + + >>> df.set_index([pd.Index([1, 2, 3, 4]), 'year']) + month sale + year + 1 2012 1 55 + 2 2014 4 40 + 3 2013 7 84 + 4 2014 10 31 + + Create a MultiIndex using two Series: + + >>> s = pd.Series([1, 2, 3, 4]) + >>> df.set_index([s, s**2]) + month year sale + 1 1 1 2012 55 + 2 4 4 2014 40 + 3 9 7 2013 84 + 4 16 10 2014 31 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + self._check_inplace_and_allows_duplicate_labels(inplace) + if not isinstance(keys, list): + keys = [keys] + + err_msg = ( + 'The parameter "keys" may be a column key, one-dimensional ' + "array, or a list containing only valid column keys and " + "one-dimensional arrays." + ) + + missing: list[Hashable] = [] + for col in keys: + if isinstance(col, (Index, Series, np.ndarray, list, abc.Iterator)): + # arrays are fine as long as they are one-dimensional + # iterators get converted to list below + if getattr(col, "ndim", 1) != 1: + raise ValueError(err_msg) + else: + # everything else gets tried as a key; see GH 24969 + try: + found = col in self.columns + except TypeError as err: + raise TypeError( + f"{err_msg}. Received column of type {type(col)}" + ) from err + else: + if not found: + missing.append(col) + + if missing: + raise KeyError(f"None of {missing} are in the columns") + + if inplace: + frame = self + else: + frame = self.copy() + + arrays = [] + names: list[Hashable] = [] + if append: + names = list(self.index.names) + if isinstance(self.index, MultiIndex): + for i in range(self.index.nlevels): + arrays.append(self.index._get_level_values(i)) + else: + arrays.append(self.index) + + to_remove: list[Hashable] = [] + for col in keys: + if isinstance(col, MultiIndex): + for n in range(col.nlevels): + arrays.append(col._get_level_values(n)) + names.extend(col.names) + elif isinstance(col, (Index, Series)): + # if Index then not MultiIndex (treated above) + + # error: Argument 1 to "append" of "list" has incompatible type + # "Union[Index, Series]"; expected "Index" + arrays.append(col) # type:ignore[arg-type] + names.append(col.name) + elif isinstance(col, (list, np.ndarray)): + # error: Argument 1 to "append" of "list" has incompatible type + # "Union[List[Any], ndarray]"; expected "Index" + arrays.append(col) # type: ignore[arg-type] + names.append(None) + elif isinstance(col, abc.Iterator): + # error: Argument 1 to "append" of "list" has incompatible type + # "List[Any]"; expected "Index" + arrays.append(list(col)) # type: ignore[arg-type] + names.append(None) + # from here, col can only be a column label + else: + arrays.append(frame[col]._values) + names.append(col) + if drop: + to_remove.append(col) + + if len(arrays[-1]) != len(self): + # check newest element against length of calling frame, since + # ensure_index_from_sequences would not raise for append=False. + raise ValueError( + f"Length mismatch: Expected {len(self)} rows, " + f"received array of length {len(arrays[-1])}" + ) + + index = ensure_index_from_sequences(arrays, names) + + if verify_integrity and not index.is_unique: + duplicates = index[index.duplicated()].unique() + raise ValueError(f"Index has duplicate keys: {duplicates}") + + # use set to handle duplicate column names gracefully in case of drop + for c in set(to_remove): + del frame[c] + + # clear up memory usage + index._cleanup() + + frame.index = index + + if not inplace: + return frame + + @overload + def reset_index( + self, + level: Hashable | Sequence[Hashable] | None = ..., + drop: bool = ..., + inplace: Literal[False] = ..., + col_level: Hashable = ..., + col_fill: Hashable = ..., + ) -> DataFrame: + ... + + @overload + def reset_index( + self, + level: Hashable | Sequence[Hashable] | None, + drop: bool, + inplace: Literal[True], + col_level: Hashable = ..., + col_fill: Hashable = ..., + ) -> None: + ... + + @overload + def reset_index( + self, + *, + drop: bool, + inplace: Literal[True], + col_level: Hashable = ..., + col_fill: Hashable = ..., + ) -> None: + ... + + @overload + def reset_index( + self, + level: Hashable | Sequence[Hashable] | None, + *, + inplace: Literal[True], + col_level: Hashable = ..., + col_fill: Hashable = ..., + ) -> None: + ... + + @overload + def reset_index( + self, + *, + inplace: Literal[True], + col_level: Hashable = ..., + col_fill: Hashable = ..., + ) -> None: + ... + + @overload + def reset_index( + self, + level: Hashable | Sequence[Hashable] | None = ..., + drop: bool = ..., + inplace: bool = ..., + col_level: Hashable = ..., + col_fill: Hashable = ..., + ) -> DataFrame | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"]) + def reset_index( + self, + level: Hashable | Sequence[Hashable] | None = None, + drop: bool = False, + inplace: bool = False, + col_level: Hashable = 0, + col_fill: Hashable = "", + ) -> DataFrame | None: + """ + Reset the index, or a level of it. + + Reset the index of the DataFrame, and use the default one instead. + If the DataFrame has a MultiIndex, this method can remove one or more + levels. + + Parameters + ---------- + level : int, str, tuple, or list, default None + Only remove the given levels from the index. Removes all levels by + default. + drop : bool, default False + Do not try to insert index into dataframe columns. This resets + the index to the default integer index. + inplace : bool, default False + Modify the DataFrame in place (do not create a new object). + col_level : int or str, default 0 + If the columns have multiple levels, determines which level the + labels are inserted into. By default it is inserted into the first + level. + col_fill : object, default '' + If the columns have multiple levels, determines how the other + levels are named. If None then the index name is repeated. + + Returns + ------- + DataFrame or None + DataFrame with the new index or None if ``inplace=True``. + + See Also + -------- + DataFrame.set_index : Opposite of reset_index. + DataFrame.reindex : Change to new indices or expand indices. + DataFrame.reindex_like : Change to same indices as other DataFrame. + + Examples + -------- + >>> df = pd.DataFrame([('bird', 389.0), + ... ('bird', 24.0), + ... ('mammal', 80.5), + ... ('mammal', np.nan)], + ... index=['falcon', 'parrot', 'lion', 'monkey'], + ... columns=('class', 'max_speed')) + >>> df + class max_speed + falcon bird 389.0 + parrot bird 24.0 + lion mammal 80.5 + monkey mammal NaN + + When we reset the index, the old index is added as a column, and a + new sequential index is used: + + >>> df.reset_index() + index class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN + + We can use the `drop` parameter to avoid the old index being added as + a column: + + >>> df.reset_index(drop=True) + class max_speed + 0 bird 389.0 + 1 bird 24.0 + 2 mammal 80.5 + 3 mammal NaN + + You can also use `reset_index` with `MultiIndex`. + + >>> index = pd.MultiIndex.from_tuples([('bird', 'falcon'), + ... ('bird', 'parrot'), + ... ('mammal', 'lion'), + ... ('mammal', 'monkey')], + ... names=['class', 'name']) + >>> columns = pd.MultiIndex.from_tuples([('speed', 'max'), + ... ('species', 'type')]) + >>> df = pd.DataFrame([(389.0, 'fly'), + ... ( 24.0, 'fly'), + ... ( 80.5, 'run'), + ... (np.nan, 'jump')], + ... index=index, + ... columns=columns) + >>> df + speed species + max type + class name + bird falcon 389.0 fly + parrot 24.0 fly + mammal lion 80.5 run + monkey NaN jump + + If the index has multiple levels, we can reset a subset of them: + + >>> df.reset_index(level='class') + class speed species + max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + + If we are not dropping the index, by default, it is placed in the top + level. We can place it in another level: + + >>> df.reset_index(level='class', col_level=1) + speed species + class max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + + When the index is inserted under another level, we can specify under + which one with the parameter `col_fill`: + + >>> df.reset_index(level='class', col_level=1, col_fill='species') + species speed species + class max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + + If we specify a nonexistent level for `col_fill`, it is created: + + >>> df.reset_index(level='class', col_level=1, col_fill='genus') + genus speed species + class max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + """ + inplace = validate_bool_kwarg(inplace, "inplace") + self._check_inplace_and_allows_duplicate_labels(inplace) + if inplace: + new_obj = self + else: + new_obj = self.copy() + + new_index = default_index(len(new_obj)) + if level is not None: + if not isinstance(level, (tuple, list)): + level = [level] + level = [self.index._get_level_number(lev) for lev in level] + if len(level) < self.index.nlevels: + new_index = self.index.droplevel(level) + + if not drop: + to_insert: Iterable[tuple[Any, Any | None]] + if isinstance(self.index, MultiIndex): + names = com.fill_missing_names(self.index.names) + to_insert = zip(self.index.levels, self.index.codes) + else: + default = "index" if "index" not in self else "level_0" + names = [default] if self.index.name is None else [self.index.name] + to_insert = ((self.index, None),) + + multi_col = isinstance(self.columns, MultiIndex) + for i, (lev, lab) in reversed(list(enumerate(to_insert))): + if level is not None and i not in level: + continue + name = names[i] + if multi_col: + col_name = list(name) if isinstance(name, tuple) else [name] + if col_fill is None: + if len(col_name) not in (1, self.columns.nlevels): + raise ValueError( + "col_fill=None is incompatible " + f"with incomplete column name {name}" + ) + col_fill = col_name[0] + + lev_num = self.columns._get_level_number(col_level) + name_lst = [col_fill] * lev_num + col_name + missing = self.columns.nlevels - len(name_lst) + name_lst += [col_fill] * missing + name = tuple(name_lst) + + # to ndarray and maybe infer different dtype + level_values = lev._values + if level_values.dtype == np.object_: + level_values = lib.maybe_convert_objects(level_values) + + if lab is not None: + # if we have the codes, extract the values with a mask + level_values = algorithms.take( + level_values, lab, allow_fill=True, fill_value=lev._na_value + ) + + new_obj.insert(0, name, level_values) + + new_obj.index = new_index + if not inplace: + return new_obj + + return None + + # ---------------------------------------------------------------------- + # Reindex-based selection methods + + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) + def isna(self) -> DataFrame: + result = self._constructor(self._mgr.isna(func=isna)) + return result.__finalize__(self, method="isna") + + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) + def isnull(self) -> DataFrame: + """ + DataFrame.isnull is an alias for DataFrame.isna. + """ + return self.isna() + + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) + def notna(self) -> DataFrame: + return ~self.isna() + + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) + def notnull(self) -> DataFrame: + """ + DataFrame.notnull is an alias for DataFrame.notna. + """ + return ~self.isna() + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def dropna( + self, + axis: Axis = 0, + how: str = "any", + thresh=None, + subset: IndexLabel = None, + inplace: bool = False, + ): + """ + Remove missing values. + + See the :ref:`User Guide ` for more on which values are + considered missing, and how to work with missing data. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + Determine if rows or columns which contain missing values are + removed. + + * 0, or 'index' : Drop rows which contain missing values. + * 1, or 'columns' : Drop columns which contain missing value. + + .. versionchanged:: 1.0.0 + + Pass tuple or list to drop on multiple axes. + Only a single axis is allowed. + + how : {'any', 'all'}, default 'any' + Determine if row or column is removed from DataFrame, when we have + at least one NA or all NA. + + * 'any' : If any NA values are present, drop that row or column. + * 'all' : If all values are NA, drop that row or column. + + thresh : int, optional + Require that many non-NA values. + subset : column label or sequence of labels, optional + Labels along other axis to consider, e.g. if you are dropping rows + these would be a list of columns to include. + inplace : bool, default False + If True, do operation inplace and return None. + + Returns + ------- + DataFrame or None + DataFrame with NA entries dropped from it or None if ``inplace=True``. + + See Also + -------- + DataFrame.isna: Indicate missing values. + DataFrame.notna : Indicate existing (non-missing) values. + DataFrame.fillna : Replace missing values. + Series.dropna : Drop missing values. + Index.dropna : Drop missing indices. + + Examples + -------- + >>> df = pd.DataFrame({"name": ['Alfred', 'Batman', 'Catwoman'], + ... "toy": [np.nan, 'Batmobile', 'Bullwhip'], + ... "born": [pd.NaT, pd.Timestamp("1940-04-25"), + ... pd.NaT]}) + >>> df + name toy born + 0 Alfred NaN NaT + 1 Batman Batmobile 1940-04-25 + 2 Catwoman Bullwhip NaT + + Drop the rows where at least one element is missing. + + >>> df.dropna() + name toy born + 1 Batman Batmobile 1940-04-25 + + Drop the columns where at least one element is missing. + + >>> df.dropna(axis='columns') + name + 0 Alfred + 1 Batman + 2 Catwoman + + Drop the rows where all elements are missing. + + >>> df.dropna(how='all') + name toy born + 0 Alfred NaN NaT + 1 Batman Batmobile 1940-04-25 + 2 Catwoman Bullwhip NaT + + Keep only the rows with at least 2 non-NA values. + + >>> df.dropna(thresh=2) + name toy born + 1 Batman Batmobile 1940-04-25 + 2 Catwoman Bullwhip NaT + + Define in which columns to look for missing values. + + >>> df.dropna(subset=['name', 'toy']) + name toy born + 1 Batman Batmobile 1940-04-25 + 2 Catwoman Bullwhip NaT + + Keep the DataFrame with valid entries in the same variable. + + >>> df.dropna(inplace=True) + >>> df + name toy born + 1 Batman Batmobile 1940-04-25 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if isinstance(axis, (tuple, list)): + # GH20987 + raise TypeError("supplying multiple axes to axis is no longer supported.") + + axis = self._get_axis_number(axis) + agg_axis = 1 - axis + + agg_obj = self + if subset is not None: + # subset needs to be list + if not is_list_like(subset): + subset = [subset] + ax = self._get_axis(agg_axis) + indices = ax.get_indexer_for(subset) + check = indices == -1 + if check.any(): + raise KeyError(np.array(subset)[check].tolist()) + agg_obj = self.take(indices, axis=agg_axis) + + if thresh is not None: + count = agg_obj.count(axis=agg_axis) + mask = count >= thresh + elif how == "any": + # faster equivalent to 'agg_obj.count(agg_axis) == self.shape[agg_axis]' + mask = notna(agg_obj).all(axis=agg_axis, bool_only=False) + elif how == "all": + # faster equivalent to 'agg_obj.count(agg_axis) > 0' + mask = notna(agg_obj).any(axis=agg_axis, bool_only=False) + else: + if how is not None: + raise ValueError(f"invalid how option: {how}") + else: + raise TypeError("must specify how or thresh") + + if np.all(mask): + result = self.copy() + else: + result = self.loc(axis=axis)[mask] + + if inplace: + self._update_inplace(result) + else: + return result + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "subset"]) + def drop_duplicates( + self, + subset: Hashable | Sequence[Hashable] | None = None, + keep: Literal["first"] | Literal["last"] | Literal[False] = "first", + inplace: bool = False, + ignore_index: bool = False, + ) -> DataFrame | None: + """ + Return DataFrame with duplicate rows removed. + + Considering certain columns is optional. Indexes, including time indexes + are ignored. + + Parameters + ---------- + subset : column label or sequence of labels, optional + Only consider certain columns for identifying duplicates, by + default use all of the columns. + keep : {'first', 'last', False}, default 'first' + Determines which duplicates (if any) to keep. + - ``first`` : Drop duplicates except for the first occurrence. + - ``last`` : Drop duplicates except for the last occurrence. + - False : Drop all duplicates. + inplace : bool, default False + Whether to drop duplicates in place or to return a copy. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + Returns + ------- + DataFrame or None + DataFrame with duplicates removed or None if ``inplace=True``. + + See Also + -------- + DataFrame.value_counts: Count unique combinations of columns. + + Examples + -------- + Consider dataset containing ramen rating. + + >>> df = pd.DataFrame({ + ... 'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'], + ... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'], + ... 'rating': [4, 4, 3.5, 15, 5] + ... }) + >>> df + brand style rating + 0 Yum Yum cup 4.0 + 1 Yum Yum cup 4.0 + 2 Indomie cup 3.5 + 3 Indomie pack 15.0 + 4 Indomie pack 5.0 + + By default, it removes duplicate rows based on all columns. + + >>> df.drop_duplicates() + brand style rating + 0 Yum Yum cup 4.0 + 2 Indomie cup 3.5 + 3 Indomie pack 15.0 + 4 Indomie pack 5.0 + + To remove duplicates on specific column(s), use ``subset``. + + >>> df.drop_duplicates(subset=['brand']) + brand style rating + 0 Yum Yum cup 4.0 + 2 Indomie cup 3.5 + + To remove duplicates and keep last occurrences, use ``keep``. + + >>> df.drop_duplicates(subset=['brand', 'style'], keep='last') + brand style rating + 1 Yum Yum cup 4.0 + 2 Indomie cup 3.5 + 4 Indomie pack 5.0 + """ + if self.empty: + return self.copy() + + inplace = validate_bool_kwarg(inplace, "inplace") + ignore_index = validate_bool_kwarg(ignore_index, "ignore_index") + duplicated = self.duplicated(subset, keep=keep) + + result = self[-duplicated] + if ignore_index: + result.index = default_index(len(result)) + + if inplace: + self._update_inplace(result) + return None + else: + return result + + def duplicated( + self, + subset: Hashable | Sequence[Hashable] | None = None, + keep: Literal["first"] | Literal["last"] | Literal[False] = "first", + ) -> Series: + """ + Return boolean Series denoting duplicate rows. + + Considering certain columns is optional. + + Parameters + ---------- + subset : column label or sequence of labels, optional + Only consider certain columns for identifying duplicates, by + default use all of the columns. + keep : {'first', 'last', False}, default 'first' + Determines which duplicates (if any) to mark. + + - ``first`` : Mark duplicates as ``True`` except for the first occurrence. + - ``last`` : Mark duplicates as ``True`` except for the last occurrence. + - False : Mark all duplicates as ``True``. + + Returns + ------- + Series + Boolean series for each duplicated rows. + + See Also + -------- + Index.duplicated : Equivalent method on index. + Series.duplicated : Equivalent method on Series. + Series.drop_duplicates : Remove duplicate values from Series. + DataFrame.drop_duplicates : Remove duplicate values from DataFrame. + + Examples + -------- + Consider dataset containing ramen rating. + + >>> df = pd.DataFrame({ + ... 'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'], + ... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'], + ... 'rating': [4, 4, 3.5, 15, 5] + ... }) + >>> df + brand style rating + 0 Yum Yum cup 4.0 + 1 Yum Yum cup 4.0 + 2 Indomie cup 3.5 + 3 Indomie pack 15.0 + 4 Indomie pack 5.0 + + By default, for each set of duplicated values, the first occurrence + is set on False and all others on True. + + >>> df.duplicated() + 0 False + 1 True + 2 False + 3 False + 4 False + dtype: bool + + By using 'last', the last occurrence of each set of duplicated values + is set on False and all others on True. + + >>> df.duplicated(keep='last') + 0 True + 1 False + 2 False + 3 False + 4 False + dtype: bool + + By setting ``keep`` on False, all duplicates are True. + + >>> df.duplicated(keep=False) + 0 True + 1 True + 2 False + 3 False + 4 False + dtype: bool + + To find duplicates on specific column(s), use ``subset``. + + >>> df.duplicated(subset=['brand']) + 0 False + 1 True + 2 False + 3 True + 4 True + dtype: bool + """ + + if self.empty: + return self._constructor_sliced(dtype=bool) + + def f(vals) -> tuple[np.ndarray, int]: + labels, shape = algorithms.factorize(vals, size_hint=len(self)) + return labels.astype("i8", copy=False), len(shape) + + if subset is None: + # https://github.com/pandas-dev/pandas/issues/28770 + # Incompatible types in assignment (expression has type "Index", variable + # has type "Sequence[Any]") + subset = self.columns # type: ignore[assignment] + elif ( + not np.iterable(subset) + or isinstance(subset, str) + or isinstance(subset, tuple) + and subset in self.columns + ): + subset = (subset,) + + # needed for mypy since can't narrow types using np.iterable + subset = cast(Sequence, subset) + + # Verify all columns in subset exist in the queried dataframe + # Otherwise, raise a KeyError, same as if you try to __getitem__ with a + # key that doesn't exist. + diff = Index(subset).difference(self.columns) + if not diff.empty: + raise KeyError(diff) + + vals = (col.values for name, col in self.items() if name in subset) + labels, shape = map(list, zip(*map(f, vals))) + + ids = get_group_index( + labels, + # error: Argument 1 to "tuple" has incompatible type "List[_T]"; + # expected "Iterable[int]" + tuple(shape), # type: ignore[arg-type] + sort=False, + xnull=False, + ) + result = self._constructor_sliced(duplicated(ids, keep), index=self.index) + return result.__finalize__(self, method="duplicated") + + # ---------------------------------------------------------------------- + # Sorting + # TODO: Just move the sort_values doc here. + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "by"]) + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.sort_values.__doc__) + # error: Signature of "sort_values" incompatible with supertype "NDFrame" + def sort_values( # type: ignore[override] + self, + by, + axis: Axis = 0, + ascending=True, + inplace: bool = False, + kind: str = "quicksort", + na_position: str = "last", + ignore_index: bool = False, + key: ValueKeyFunc = None, + ): + inplace = validate_bool_kwarg(inplace, "inplace") + axis = self._get_axis_number(axis) + ascending = validate_ascending(ascending) + if not isinstance(by, list): + by = [by] + if is_sequence(ascending) and len(by) != len(ascending): + raise ValueError( + f"Length of ascending ({len(ascending)}) != length of by ({len(by)})" + ) + if len(by) > 1: + + keys = [self._get_label_or_level_values(x, axis=axis) for x in by] + + # need to rewrap columns in Series to apply key function + if key is not None: + # error: List comprehension has incompatible type List[Series]; + # expected List[ndarray] + keys = [ + Series(k, name=name) # type: ignore[misc] + for (k, name) in zip(keys, by) + ] + + indexer = lexsort_indexer( + keys, orders=ascending, na_position=na_position, key=key + ) + elif len(by): + # len(by) == 1 + + by = by[0] + k = self._get_label_or_level_values(by, axis=axis) + + # need to rewrap column in Series to apply key function + if key is not None: + # error: Incompatible types in assignment (expression has type + # "Series", variable has type "ndarray") + k = Series(k, name=by) # type: ignore[assignment] + + if isinstance(ascending, (tuple, list)): + ascending = ascending[0] + + indexer = nargsort( + k, kind=kind, ascending=ascending, na_position=na_position, key=key + ) + else: + return self.copy() + + new_data = self._mgr.take( + indexer, axis=self._get_block_manager_axis(axis), verify=False + ) + + if ignore_index: + new_data.set_axis( + self._get_block_manager_axis(axis), default_index(len(indexer)) + ) + + result = self._constructor(new_data) + if inplace: + return self._update_inplace(result) + else: + return result.__finalize__(self, method="sort_values") + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def sort_index( + self, + axis: Axis = 0, + level: Level | None = None, + ascending: bool | int | Sequence[bool | int] = True, + inplace: bool = False, + kind: str = "quicksort", + na_position: str = "last", + sort_remaining: bool = True, + ignore_index: bool = False, + key: IndexKeyFunc = None, + ): + """ + Sort object by labels (along an axis). + + Returns a new DataFrame sorted by label if `inplace` argument is + ``False``, otherwise updates the original DataFrame and returns None. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis along which to sort. The value 0 identifies the rows, + and 1 identifies the columns. + level : int or level name or list of ints or list of level names + If not None, sort on values in specified index level(s). + ascending : bool or list-like of bools, default True + Sort ascending vs. descending. When the index is a MultiIndex the + sort direction can be controlled for each level individually. + inplace : bool, default False + If True, perform operation in-place. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' + Choice of sorting algorithm. See also :func:`numpy.sort` for more + information. `mergesort` and `stable` are the only stable algorithms. For + DataFrames, this option is only applied when sorting on a single + column or label. + na_position : {'first', 'last'}, default 'last' + Puts NaNs at the beginning if `first`; `last` puts NaNs at the end. + Not implemented for MultiIndex. + sort_remaining : bool, default True + If True and sorting by level and index is multilevel, sort by other + levels too (in order) after sorting by specified level. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + key : callable, optional + If not None, apply the key function to the index values + before sorting. This is similar to the `key` argument in the + builtin :meth:`sorted` function, with the notable difference that + this `key` function should be *vectorized*. It should expect an + ``Index`` and return an ``Index`` of the same shape. For MultiIndex + inputs, the key is applied *per level*. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame or None + The original DataFrame sorted by the labels or None if ``inplace=True``. + + See Also + -------- + Series.sort_index : Sort Series by the index. + DataFrame.sort_values : Sort DataFrame by the value. + Series.sort_values : Sort Series by the value. + + Examples + -------- + >>> df = pd.DataFrame([1, 2, 3, 4, 5], index=[100, 29, 234, 1, 150], + ... columns=['A']) + >>> df.sort_index() + A + 1 4 + 29 2 + 100 1 + 150 5 + 234 3 + + By default, it sorts in ascending order, to sort in descending order, + use ``ascending=False`` + + >>> df.sort_index(ascending=False) + A + 234 3 + 150 5 + 100 1 + 29 2 + 1 4 + + A key function can be specified which is applied to the index before + sorting. For a ``MultiIndex`` this is applied to each level separately. + + >>> df = pd.DataFrame({"a": [1, 2, 3, 4]}, index=['A', 'b', 'C', 'd']) + >>> df.sort_index(key=lambda x: x.str.lower()) + a + A 1 + b 2 + C 3 + d 4 + """ + return super().sort_index( + axis, + level, + ascending, + inplace, + kind, + na_position, + sort_remaining, + ignore_index, + key, + ) + + def value_counts( + self, + subset: Sequence[Hashable] | None = None, + normalize: bool = False, + sort: bool = True, + ascending: bool = False, + dropna: bool = True, + ): + """ + Return a Series containing counts of unique rows in the DataFrame. + + .. versionadded:: 1.1.0 + + Parameters + ---------- + subset : list-like, optional + Columns to use when counting unique combinations. + normalize : bool, default False + Return proportions rather than frequencies. + sort : bool, default True + Sort by frequencies. + ascending : bool, default False + Sort in ascending order. + dropna : bool, default True + Don’t include counts of rows that contain NA values. + + .. versionadded:: 1.3.0 + + Returns + ------- + Series + + See Also + -------- + Series.value_counts: Equivalent method on Series. + + Notes + ----- + The returned Series will have a MultiIndex with one level per input + column. By default, rows that contain any NA values are omitted from + the result. By default, the resulting Series will be in descending + order so that the first element is the most frequently-occurring row. + + Examples + -------- + >>> df = pd.DataFrame({'num_legs': [2, 4, 4, 6], + ... 'num_wings': [2, 0, 0, 0]}, + ... index=['falcon', 'dog', 'cat', 'ant']) + >>> df + num_legs num_wings + falcon 2 2 + dog 4 0 + cat 4 0 + ant 6 0 + + >>> df.value_counts() + num_legs num_wings + 4 0 2 + 2 2 1 + 6 0 1 + dtype: int64 + + >>> df.value_counts(sort=False) + num_legs num_wings + 2 2 1 + 4 0 2 + 6 0 1 + dtype: int64 + + >>> df.value_counts(ascending=True) + num_legs num_wings + 2 2 1 + 6 0 1 + 4 0 2 + dtype: int64 + + >>> df.value_counts(normalize=True) + num_legs num_wings + 4 0 0.50 + 2 2 0.25 + 6 0 0.25 + dtype: float64 + + With `dropna` set to `False` we can also count rows with NA values. + + >>> df = pd.DataFrame({'first_name': ['John', 'Anne', 'John', 'Beth'], + ... 'middle_name': ['Smith', pd.NA, pd.NA, 'Louise']}) + >>> df + first_name middle_name + 0 John Smith + 1 Anne + 2 John + 3 Beth Louise + + >>> df.value_counts() + first_name middle_name + Beth Louise 1 + John Smith 1 + dtype: int64 + + >>> df.value_counts(dropna=False) + first_name middle_name + Anne NaN 1 + Beth Louise 1 + John Smith 1 + NaN 1 + dtype: int64 + """ + if subset is None: + subset = self.columns.tolist() + + counts = self.groupby(subset, dropna=dropna).grouper.size() + + if sort: + counts = counts.sort_values(ascending=ascending) + if normalize: + counts /= counts.sum() + + # Force MultiIndex for single column + if len(subset) == 1: + counts.index = MultiIndex.from_arrays( + [counts.index], names=[counts.index.name] + ) + + return counts + + def nlargest(self, n: int, columns: IndexLabel, keep: str = "first") -> DataFrame: + """ + Return the first `n` rows ordered by `columns` in descending order. + + Return the first `n` rows with the largest values in `columns`, in + descending order. The columns that are not specified are returned as + well, but not used for ordering. + + This method is equivalent to + ``df.sort_values(columns, ascending=False).head(n)``, but more + performant. + + Parameters + ---------- + n : int + Number of rows to return. + columns : label or list of labels + Column label(s) to order by. + keep : {'first', 'last', 'all'}, default 'first' + Where there are duplicate values: + + - ``first`` : prioritize the first occurrence(s) + - ``last`` : prioritize the last occurrence(s) + - ``all`` : do not drop any duplicates, even it means + selecting more than `n` items. + + Returns + ------- + DataFrame + The first `n` rows ordered by the given columns in descending + order. + + See Also + -------- + DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in + ascending order. + DataFrame.sort_values : Sort DataFrame by the values. + DataFrame.head : Return the first `n` rows without re-ordering. + + Notes + ----- + This function cannot be used with all column types. For example, when + specifying columns with `object` or `category` dtypes, ``TypeError`` is + raised. + + Examples + -------- + >>> df = pd.DataFrame({'population': [59000000, 65000000, 434000, + ... 434000, 434000, 337000, 11300, + ... 11300, 11300], + ... 'GDP': [1937894, 2583560 , 12011, 4520, 12128, + ... 17036, 182, 38, 311], + ... 'alpha-2': ["IT", "FR", "MT", "MV", "BN", + ... "IS", "NR", "TV", "AI"]}, + ... index=["Italy", "France", "Malta", + ... "Maldives", "Brunei", "Iceland", + ... "Nauru", "Tuvalu", "Anguilla"]) + >>> df + population GDP alpha-2 + Italy 59000000 1937894 IT + France 65000000 2583560 FR + Malta 434000 12011 MT + Maldives 434000 4520 MV + Brunei 434000 12128 BN + Iceland 337000 17036 IS + Nauru 11300 182 NR + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + + In the following example, we will use ``nlargest`` to select the three + rows having the largest values in column "population". + + >>> df.nlargest(3, 'population') + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Malta 434000 12011 MT + + When using ``keep='last'``, ties are resolved in reverse order: + + >>> df.nlargest(3, 'population', keep='last') + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Brunei 434000 12128 BN + + When using ``keep='all'``, all duplicate items are maintained: + + >>> df.nlargest(3, 'population', keep='all') + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Malta 434000 12011 MT + Maldives 434000 4520 MV + Brunei 434000 12128 BN + + To order by the largest values in column "population" and then "GDP", + we can specify multiple columns like in the next example. + + >>> df.nlargest(3, ['population', 'GDP']) + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Brunei 434000 12128 BN + """ + return algorithms.SelectNFrame(self, n=n, keep=keep, columns=columns).nlargest() + + def nsmallest(self, n: int, columns: IndexLabel, keep: str = "first") -> DataFrame: + """ + Return the first `n` rows ordered by `columns` in ascending order. + + Return the first `n` rows with the smallest values in `columns`, in + ascending order. The columns that are not specified are returned as + well, but not used for ordering. + + This method is equivalent to + ``df.sort_values(columns, ascending=True).head(n)``, but more + performant. + + Parameters + ---------- + n : int + Number of items to retrieve. + columns : list or str + Column name or names to order by. + keep : {'first', 'last', 'all'}, default 'first' + Where there are duplicate values: + + - ``first`` : take the first occurrence. + - ``last`` : take the last occurrence. + - ``all`` : do not drop any duplicates, even it means + selecting more than `n` items. + + Returns + ------- + DataFrame + + See Also + -------- + DataFrame.nlargest : Return the first `n` rows ordered by `columns` in + descending order. + DataFrame.sort_values : Sort DataFrame by the values. + DataFrame.head : Return the first `n` rows without re-ordering. + + Examples + -------- + >>> df = pd.DataFrame({'population': [59000000, 65000000, 434000, + ... 434000, 434000, 337000, 337000, + ... 11300, 11300], + ... 'GDP': [1937894, 2583560 , 12011, 4520, 12128, + ... 17036, 182, 38, 311], + ... 'alpha-2': ["IT", "FR", "MT", "MV", "BN", + ... "IS", "NR", "TV", "AI"]}, + ... index=["Italy", "France", "Malta", + ... "Maldives", "Brunei", "Iceland", + ... "Nauru", "Tuvalu", "Anguilla"]) + >>> df + population GDP alpha-2 + Italy 59000000 1937894 IT + France 65000000 2583560 FR + Malta 434000 12011 MT + Maldives 434000 4520 MV + Brunei 434000 12128 BN + Iceland 337000 17036 IS + Nauru 337000 182 NR + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + + In the following example, we will use ``nsmallest`` to select the + three rows having the smallest values in column "population". + + >>> df.nsmallest(3, 'population') + population GDP alpha-2 + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + Iceland 337000 17036 IS + + When using ``keep='last'``, ties are resolved in reverse order: + + >>> df.nsmallest(3, 'population', keep='last') + population GDP alpha-2 + Anguilla 11300 311 AI + Tuvalu 11300 38 TV + Nauru 337000 182 NR + + When using ``keep='all'``, all duplicate items are maintained: + + >>> df.nsmallest(3, 'population', keep='all') + population GDP alpha-2 + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + Iceland 337000 17036 IS + Nauru 337000 182 NR + + To order by the smallest values in column "population" and then "GDP", we can + specify multiple columns like in the next example. + + >>> df.nsmallest(3, ['population', 'GDP']) + population GDP alpha-2 + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + Nauru 337000 182 NR + """ + return algorithms.SelectNFrame( + self, n=n, keep=keep, columns=columns + ).nsmallest() + + @doc( + Series.swaplevel, + klass=_shared_doc_kwargs["klass"], + extra_params=dedent( + """axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to swap levels on. 0 or 'index' for row-wise, 1 or + 'columns' for column-wise.""" + ), + examples=dedent( + """\ + Examples + -------- + >>> df = pd.DataFrame( + ... {"Grade": ["A", "B", "A", "C"]}, + ... index=[ + ... ["Final exam", "Final exam", "Coursework", "Coursework"], + ... ["History", "Geography", "History", "Geography"], + ... ["January", "February", "March", "April"], + ... ], + ... ) + >>> df + Grade + Final exam History January A + Geography February B + Coursework History March A + Geography April C + + In the following example, we will swap the levels of the indices. + Here, we will swap the levels column-wise, but levels can be swapped row-wise + in a similar manner. Note that column-wise is the default behaviour. + By not supplying any arguments for i and j, we swap the last and second to + last indices. + + >>> df.swaplevel() + Grade + Final exam January History A + February Geography B + Coursework March History A + April Geography C + + By supplying one argument, we can choose which index to swap the last + index with. We can for example swap the first index with the last one as + follows. + + >>> df.swaplevel(0) + Grade + January History Final exam A + February Geography Final exam B + March History Coursework A + April Geography Coursework C + + We can also define explicitly which indices we want to swap by supplying values + for both i and j. Here, we for example swap the first and second indices. + + >>> df.swaplevel(0, 1) + Grade + History Final exam January A + Geography Final exam February B + History Coursework March A + Geography Coursework April C""" + ), + ) + def swaplevel(self, i: Axis = -2, j: Axis = -1, axis: Axis = 0) -> DataFrame: + result = self.copy() + + axis = self._get_axis_number(axis) + + if not isinstance(result._get_axis(axis), MultiIndex): # pragma: no cover + raise TypeError("Can only swap levels on a hierarchical axis.") + + if axis == 0: + assert isinstance(result.index, MultiIndex) + result.index = result.index.swaplevel(i, j) + else: + assert isinstance(result.columns, MultiIndex) + result.columns = result.columns.swaplevel(i, j) + return result + + def reorder_levels(self, order: Sequence[Axis], axis: Axis = 0) -> DataFrame: + """ + Rearrange index levels using input order. May not drop or duplicate levels. + + Parameters + ---------- + order : list of int or list of str + List representing new level order. Reference level by number + (position) or by key (label). + axis : {0 or 'index', 1 or 'columns'}, default 0 + Where to reorder levels. + + Returns + ------- + DataFrame + + Examples + -------- + >>> data = { + ... "class": ["Mammals", "Mammals", "Reptiles"], + ... "diet": ["Omnivore", "Carnivore", "Carnivore"], + ... "species": ["Humans", "Dogs", "Snakes"], + ... } + >>> df = pd.DataFrame(data, columns=["class", "diet", "species"]) + >>> df = df.set_index(["class", "diet"]) + >>> df + species + class diet + Mammals Omnivore Humans + Carnivore Dogs + Reptiles Carnivore Snakes + + Let's reorder the levels of the index: + + >>> df.reorder_levels(["diet", "class"]) + species + diet class + Omnivore Mammals Humans + Carnivore Mammals Dogs + Reptiles Snakes + """ + axis = self._get_axis_number(axis) + if not isinstance(self._get_axis(axis), MultiIndex): # pragma: no cover + raise TypeError("Can only reorder levels on a hierarchical axis.") + + result = self.copy() + + if axis == 0: + assert isinstance(result.index, MultiIndex) + result.index = result.index.reorder_levels(order) + else: + assert isinstance(result.columns, MultiIndex) + result.columns = result.columns.reorder_levels(order) + return result + + # ---------------------------------------------------------------------- + # Arithmetic Methods + + def _cmp_method(self, other, op): + axis = 1 # only relevant for Series other case + + self, other = ops.align_method_FRAME(self, other, axis, flex=False, level=None) + + # See GH#4537 for discussion of scalar op behavior + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + def _arith_method(self, other, op): + if ops.should_reindex_frame_op(self, other, op, 1, 1, None, None): + return ops.frame_arith_method_with_reindex(self, other, op) + + axis = 1 # only relevant for Series other case + other = ops.maybe_prepare_scalar_for_op(other, (self.shape[axis],)) + + self, other = ops.align_method_FRAME(self, other, axis, flex=True, level=None) + + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + _logical_method = _arith_method + + def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None): + """ + Evaluate the frame operation func(left, right) by evaluating + column-by-column, dispatching to the Series implementation. + + Parameters + ---------- + right : scalar, Series, or DataFrame + func : arithmetic or comparison operator + axis : {None, 0, 1} + + Returns + ------- + DataFrame + """ + # Get the appropriate array-op to apply to each column/block's values. + array_op = ops.get_array_op(func) + + right = lib.item_from_zerodim(right) + if not is_list_like(right): + # i.e. scalar, faster than checking np.ndim(right) == 0 + with np.errstate(all="ignore"): + bm = self._mgr.apply(array_op, right=right) + return self._constructor(bm) + + elif isinstance(right, DataFrame): + assert self.index.equals(right.index) + assert self.columns.equals(right.columns) + # TODO: The previous assertion `assert right._indexed_same(self)` + # fails in cases with empty columns reached via + # _frame_arith_method_with_reindex + + # TODO operate_blockwise expects a manager of the same type + with np.errstate(all="ignore"): + bm = self._mgr.operate_blockwise( + # error: Argument 1 to "operate_blockwise" of "ArrayManager" has + # incompatible type "Union[ArrayManager, BlockManager]"; expected + # "ArrayManager" + # error: Argument 1 to "operate_blockwise" of "BlockManager" has + # incompatible type "Union[ArrayManager, BlockManager]"; expected + # "BlockManager" + right._mgr, # type: ignore[arg-type] + array_op, + ) + return self._constructor(bm) + + elif isinstance(right, Series) and axis == 1: + # axis=1 means we want to operate row-by-row + assert right.index.equals(self.columns) + + right = right._values + # maybe_align_as_frame ensures we do not have an ndarray here + assert not isinstance(right, np.ndarray) + + with np.errstate(all="ignore"): + arrays = [ + array_op(_left, _right) + for _left, _right in zip(self._iter_column_arrays(), right) + ] + + elif isinstance(right, Series): + assert right.index.equals(self.index) # Handle other cases later + right = right._values + + with np.errstate(all="ignore"): + arrays = [array_op(left, right) for left in self._iter_column_arrays()] + + else: + # Remaining cases have less-obvious dispatch rules + raise NotImplementedError(right) + + return type(self)._from_arrays( + arrays, self.columns, self.index, verify_integrity=False + ) + + def _combine_frame(self, other: DataFrame, func, fill_value=None): + # at this point we have `self._indexed_same(other)` + + if fill_value is None: + # since _arith_op may be called in a loop, avoid function call + # overhead if possible by doing this check once + _arith_op = func + + else: + + def _arith_op(left, right): + # for the mixed_type case where we iterate over columns, + # _arith_op(left, right) is equivalent to + # left._binop(right, func, fill_value=fill_value) + left, right = ops.fill_binop(left, right, fill_value) + return func(left, right) + + new_data = self._dispatch_frame_op(other, _arith_op) + return new_data + + def _construct_result(self, result) -> DataFrame: + """ + Wrap the result of an arithmetic, comparison, or logical operation. + + Parameters + ---------- + result : DataFrame + + Returns + ------- + DataFrame + """ + out = self._constructor(result, copy=False) + # Pin columns instead of passing to constructor for compat with + # non-unique columns case + out.columns = self.columns + out.index = self.index + return out + + def __divmod__(self, other) -> tuple[DataFrame, DataFrame]: + # Naive implementation, room for optimization + div = self // other + mod = self - div * other + return div, mod + + def __rdivmod__(self, other) -> tuple[DataFrame, DataFrame]: + # Naive implementation, room for optimization + div = other // self + mod = other - div * self + return div, mod + + # ---------------------------------------------------------------------- + # Combination-Related + + @doc( + _shared_docs["compare"], + """ +Returns +------- +DataFrame + DataFrame that shows the differences stacked side by side. + + The resulting index will be a MultiIndex with 'self' and 'other' + stacked alternately at the inner level. + +Raises +------ +ValueError + When the two DataFrames don't have identical labels or shape. + +See Also +-------- +Series.compare : Compare with another Series and show differences. +DataFrame.equals : Test whether two objects contain the same elements. + +Notes +----- +Matching NaNs will not appear as a difference. + +Can only compare identically-labeled +(i.e. same shape, identical row and column labels) DataFrames + +Examples +-------- +>>> df = pd.DataFrame( +... {{ +... "col1": ["a", "a", "b", "b", "a"], +... "col2": [1.0, 2.0, 3.0, np.nan, 5.0], +... "col3": [1.0, 2.0, 3.0, 4.0, 5.0] +... }}, +... columns=["col1", "col2", "col3"], +... ) +>>> df + col1 col2 col3 +0 a 1.0 1.0 +1 a 2.0 2.0 +2 b 3.0 3.0 +3 b NaN 4.0 +4 a 5.0 5.0 + +>>> df2 = df.copy() +>>> df2.loc[0, 'col1'] = 'c' +>>> df2.loc[2, 'col3'] = 4.0 +>>> df2 + col1 col2 col3 +0 c 1.0 1.0 +1 a 2.0 2.0 +2 b 3.0 4.0 +3 b NaN 4.0 +4 a 5.0 5.0 + +Align the differences on columns + +>>> df.compare(df2) + col1 col3 + self other self other +0 a c NaN NaN +2 NaN NaN 3.0 4.0 + +Stack the differences on rows + +>>> df.compare(df2, align_axis=0) + col1 col3 +0 self a NaN + other c NaN +2 self NaN 3.0 + other NaN 4.0 + +Keep the equal values + +>>> df.compare(df2, keep_equal=True) + col1 col3 + self other self other +0 a c 1.0 1.0 +2 b b 3.0 4.0 + +Keep all original rows and columns + +>>> df.compare(df2, keep_shape=True) + col1 col2 col3 + self other self other self other +0 a c NaN NaN NaN NaN +1 NaN NaN NaN NaN NaN NaN +2 NaN NaN NaN NaN 3.0 4.0 +3 NaN NaN NaN NaN NaN NaN +4 NaN NaN NaN NaN NaN NaN + +Keep all original rows and columns and also all original values + +>>> df.compare(df2, keep_shape=True, keep_equal=True) + col1 col2 col3 + self other self other self other +0 a c 1.0 1.0 1.0 1.0 +1 a a 2.0 2.0 2.0 2.0 +2 b b 3.0 3.0 3.0 4.0 +3 b b NaN NaN 4.0 4.0 +4 a a 5.0 5.0 5.0 5.0 +""", + klass=_shared_doc_kwargs["klass"], + ) + def compare( + self, + other: DataFrame, + align_axis: Axis = 1, + keep_shape: bool = False, + keep_equal: bool = False, + ) -> DataFrame: + return super().compare( + other=other, + align_axis=align_axis, + keep_shape=keep_shape, + keep_equal=keep_equal, + ) + + def combine( + self, other: DataFrame, func, fill_value=None, overwrite: bool = True + ) -> DataFrame: + """ + Perform column-wise combine with another DataFrame. + + Combines a DataFrame with `other` DataFrame using `func` + to element-wise combine columns. The row and column indexes of the + resulting DataFrame will be the union of the two. + + Parameters + ---------- + other : DataFrame + The DataFrame to merge column-wise. + func : function + Function that takes two series as inputs and return a Series or a + scalar. Used to merge the two dataframes column by columns. + fill_value : scalar value, default None + The value to fill NaNs with prior to passing any column to the + merge func. + overwrite : bool, default True + If True, columns in `self` that do not exist in `other` will be + overwritten with NaNs. + + Returns + ------- + DataFrame + Combination of the provided DataFrames. + + See Also + -------- + DataFrame.combine_first : Combine two DataFrame objects and default to + non-null values in frame calling the method. + + Examples + -------- + Combine using a simple function that chooses the smaller column. + + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 + >>> df1.combine(df2, take_smaller) + A B + 0 0 3 + 1 0 3 + + Example using a true element-wise combine function. + + >>> df1 = pd.DataFrame({'A': [5, 0], 'B': [2, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1.combine(df2, np.minimum) + A B + 0 1 2 + 1 0 3 + + Using `fill_value` fills Nones prior to passing the column to the + merge function. + + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1.combine(df2, take_smaller, fill_value=-5) + A B + 0 0 -5.0 + 1 0 4.0 + + However, if the same element in both dataframes is None, that None + is preserved + + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [None, 3]}) + >>> df1.combine(df2, take_smaller, fill_value=-5) + A B + 0 0 -5.0 + 1 0 3.0 + + Example that demonstrates the use of `overwrite` and behavior when + the axis differ between the dataframes. + + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]}) + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [-10, 1], }, index=[1, 2]) + >>> df1.combine(df2, take_smaller) + A B C + 0 NaN NaN NaN + 1 NaN 3.0 -10.0 + 2 NaN 3.0 1.0 + + >>> df1.combine(df2, take_smaller, overwrite=False) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 -10.0 + 2 NaN 3.0 1.0 + + Demonstrating the preference of the passed in dataframe. + + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1], }, index=[1, 2]) + >>> df2.combine(df1, take_smaller) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 NaN + 2 NaN 3.0 NaN + + >>> df2.combine(df1, take_smaller, overwrite=False) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 1.0 + 2 NaN 3.0 1.0 + """ + other_idxlen = len(other.index) # save for compare + + this, other = self.align(other, copy=False) + new_index = this.index + + if other.empty and len(new_index) == len(self.index): + return self.copy() + + if self.empty and len(other) == other_idxlen: + return other.copy() + + # sorts if possible + new_columns = this.columns.union(other.columns) + do_fill = fill_value is not None + result = {} + for col in new_columns: + series = this[col] + otherSeries = other[col] + + this_dtype = series.dtype + other_dtype = otherSeries.dtype + + this_mask = isna(series) + other_mask = isna(otherSeries) + + # don't overwrite columns unnecessarily + # DO propagate if this column is not in the intersection + if not overwrite and other_mask.all(): + result[col] = this[col].copy() + continue + + if do_fill: + series = series.copy() + otherSeries = otherSeries.copy() + series[this_mask] = fill_value + otherSeries[other_mask] = fill_value + + if col not in self.columns: + # If self DataFrame does not have col in other DataFrame, + # try to promote series, which is all NaN, as other_dtype. + new_dtype = other_dtype + try: + series = series.astype(new_dtype, copy=False) + except ValueError: + # e.g. new_dtype is integer types + pass + else: + # if we have different dtypes, possibly promote + new_dtype = find_common_type([this_dtype, other_dtype]) + series = series.astype(new_dtype, copy=False) + otherSeries = otherSeries.astype(new_dtype, copy=False) + + arr = func(series, otherSeries) + if isinstance(new_dtype, np.dtype): + # if new_dtype is an EA Dtype, then `func` is expected to return + # the correct dtype without any additional casting + arr = maybe_downcast_to_dtype(arr, new_dtype) + + result[col] = arr + + # convert_objects just in case + return self._constructor(result, index=new_index, columns=new_columns) + + def combine_first(self, other: DataFrame) -> DataFrame: + """ + Update null elements with value in the same location in `other`. + + Combine two DataFrame objects by filling null values in one DataFrame + with non-null values from other DataFrame. The row and column indexes + of the resulting DataFrame will be the union of the two. + + Parameters + ---------- + other : DataFrame + Provided DataFrame to use to fill null values. + + Returns + ------- + DataFrame + The result of combining the provided DataFrame with the other object. + + See Also + -------- + DataFrame.combine : Perform series-wise operation on two DataFrames + using a given function. + + Examples + -------- + >>> df1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1.combine_first(df2) + A B + 0 1.0 3.0 + 1 0.0 4.0 + + Null values still persist if the location of that null value + does not exist in `other` + + >>> df1 = pd.DataFrame({'A': [None, 0], 'B': [4, None]}) + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1]}, index=[1, 2]) + >>> df1.combine_first(df2) + A B C + 0 NaN 4.0 NaN + 1 0.0 3.0 1.0 + 2 NaN 3.0 1.0 + """ + import pandas.core.computation.expressions as expressions + + def combiner(x, y): + mask = extract_array(isna(x)) + + x_values = extract_array(x, extract_numpy=True) + y_values = extract_array(y, extract_numpy=True) + + # If the column y in other DataFrame is not in first DataFrame, + # just return y_values. + if y.name not in self.columns: + return y_values + + return expressions.where(mask, y_values, x_values) + + combined = self.combine(other, combiner, overwrite=False) + + dtypes = { + col: find_common_type([self.dtypes[col], other.dtypes[col]]) + for col in self.columns.intersection(other.columns) + if not is_dtype_equal(combined.dtypes[col], self.dtypes[col]) + } + + if dtypes: + combined = combined.astype(dtypes) + + return combined + + def update( + self, + other, + join: str = "left", + overwrite: bool = True, + filter_func=None, + errors: str = "ignore", + ) -> None: + """ + Modify in place using non-NA values from another DataFrame. + + Aligns on indices. There is no return value. + + Parameters + ---------- + other : DataFrame, or object coercible into a DataFrame + Should have at least one matching index/column label + with the original DataFrame. If a Series is passed, + its name attribute must be set, and that will be + used as the column name to align with the original DataFrame. + join : {'left'}, default 'left' + Only left join is implemented, keeping the index and columns of the + original object. + overwrite : bool, default True + How to handle non-NA values for overlapping keys: + + * True: overwrite original DataFrame's values + with values from `other`. + * False: only update values that are NA in + the original DataFrame. + + filter_func : callable(1d-array) -> bool 1d-array, optional + Can choose to replace values other than NA. Return True for values + that should be updated. + errors : {'raise', 'ignore'}, default 'ignore' + If 'raise', will raise a ValueError if the DataFrame and `other` + both contain non-NA data in the same place. + + Returns + ------- + None : method directly changes calling object + + Raises + ------ + ValueError + * When `errors='raise'` and there's overlapping non-NA data. + * When `errors` is not either `'ignore'` or `'raise'` + NotImplementedError + * If `join != 'left'` + + See Also + -------- + dict.update : Similar method for dictionaries. + DataFrame.merge : For column(s)-on-column(s) operations. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3], + ... 'B': [400, 500, 600]}) + >>> new_df = pd.DataFrame({'B': [4, 5, 6], + ... 'C': [7, 8, 9]}) + >>> df.update(new_df) + >>> df + A B + 0 1 4 + 1 2 5 + 2 3 6 + + The DataFrame's length does not increase as a result of the update, + only values at matching index/column labels are updated. + + >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], + ... 'B': ['x', 'y', 'z']}) + >>> new_df = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']}) + >>> df.update(new_df) + >>> df + A B + 0 a d + 1 b e + 2 c f + + For Series, its name attribute must be set. + + >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], + ... 'B': ['x', 'y', 'z']}) + >>> new_column = pd.Series(['d', 'e'], name='B', index=[0, 2]) + >>> df.update(new_column) + >>> df + A B + 0 a d + 1 b y + 2 c e + >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], + ... 'B': ['x', 'y', 'z']}) + >>> new_df = pd.DataFrame({'B': ['d', 'e']}, index=[1, 2]) + >>> df.update(new_df) + >>> df + A B + 0 a x + 1 b d + 2 c e + + If `other` contains NaNs the corresponding values are not updated + in the original dataframe. + + >>> df = pd.DataFrame({'A': [1, 2, 3], + ... 'B': [400, 500, 600]}) + >>> new_df = pd.DataFrame({'B': [4, np.nan, 6]}) + >>> df.update(new_df) + >>> df + A B + 0 1 4.0 + 1 2 500.0 + 2 3 6.0 + """ + import pandas.core.computation.expressions as expressions + + # TODO: Support other joins + if join != "left": # pragma: no cover + raise NotImplementedError("Only left join is supported") + if errors not in ["ignore", "raise"]: + raise ValueError("The parameter errors must be either 'ignore' or 'raise'") + + if not isinstance(other, DataFrame): + other = DataFrame(other) + + other = other.reindex_like(self) + + for col in self.columns: + this = self[col]._values + that = other[col]._values + if filter_func is not None: + with np.errstate(all="ignore"): + mask = ~filter_func(this) | isna(that) + else: + if errors == "raise": + mask_this = notna(that) + mask_that = notna(this) + if any(mask_this & mask_that): + raise ValueError("Data overlaps.") + + if overwrite: + mask = isna(that) + else: + mask = notna(this) + + # don't overwrite columns unnecessarily + if mask.all(): + continue + + self[col] = expressions.where(mask, this, that) + + # ---------------------------------------------------------------------- + # Data reshaping + @Appender( + """ +Examples +-------- +>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon', +... 'Parrot', 'Parrot'], +... 'Max Speed': [380., 370., 24., 26.]}) +>>> df + Animal Max Speed +0 Falcon 380.0 +1 Falcon 370.0 +2 Parrot 24.0 +3 Parrot 26.0 +>>> df.groupby(['Animal']).mean() + Max Speed +Animal +Falcon 375.0 +Parrot 25.0 + +**Hierarchical Indexes** + +We can groupby different levels of a hierarchical index +using the `level` parameter: + +>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'], +... ['Captive', 'Wild', 'Captive', 'Wild']] +>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type')) +>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]}, +... index=index) +>>> df + Max Speed +Animal Type +Falcon Captive 390.0 + Wild 350.0 +Parrot Captive 30.0 + Wild 20.0 +>>> df.groupby(level=0).mean() + Max Speed +Animal +Falcon 370.0 +Parrot 25.0 +>>> df.groupby(level="Type").mean() + Max Speed +Type +Captive 210.0 +Wild 185.0 + +We can also choose to include NA in group keys or not by setting +`dropna` parameter, the default setting is `True`. + +>>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]] +>>> df = pd.DataFrame(l, columns=["a", "b", "c"]) + +>>> df.groupby(by=["b"]).sum() + a c +b +1.0 2 3 +2.0 2 5 + +>>> df.groupby(by=["b"], dropna=False).sum() + a c +b +1.0 2 3 +2.0 2 5 +NaN 1 4 + +>>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]] +>>> df = pd.DataFrame(l, columns=["a", "b", "c"]) + +>>> df.groupby(by="a").sum() + b c +a +a 13.0 13.0 +b 12.3 123.0 + +>>> df.groupby(by="a", dropna=False).sum() + b c +a +a 13.0 13.0 +b 12.3 123.0 +NaN 12.3 33.0 +""" + ) + @Appender(_shared_docs["groupby"] % _shared_doc_kwargs) + def groupby( + self, + by=None, + axis: Axis = 0, + level: Level | None = None, + as_index: bool = True, + sort: bool = True, + group_keys: bool = True, + squeeze: bool | lib.NoDefault = no_default, + observed: bool = False, + dropna: bool = True, + ) -> DataFrameGroupBy: + from pandas.core.groupby.generic import DataFrameGroupBy + + if squeeze is not no_default: + warnings.warn( + ( + "The `squeeze` parameter is deprecated and " + "will be removed in a future version." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + squeeze = False + + if level is None and by is None: + raise TypeError("You have to supply one of 'by' and 'level'") + axis = self._get_axis_number(axis) + + # https://github.com/python/mypy/issues/7642 + # error: Argument "squeeze" to "DataFrameGroupBy" has incompatible type + # "Union[bool, NoDefault]"; expected "bool" + return DataFrameGroupBy( + obj=self, + keys=by, + axis=axis, + level=level, + as_index=as_index, + sort=sort, + group_keys=group_keys, + squeeze=squeeze, # type: ignore[arg-type] + observed=observed, + dropna=dropna, + ) + + _shared_docs[ + "pivot" + ] = """ + Return reshaped DataFrame organized by given index / column values. + + Reshape data (produce a "pivot" table) based on column values. Uses + unique values from specified `index` / `columns` to form axes of the + resulting DataFrame. This function does not support data + aggregation, multiple values will result in a MultiIndex in the + columns. See the :ref:`User Guide ` for more on reshaping. + + Parameters + ----------%s + index : str or object or a list of str, optional + Column to use to make new frame's index. If None, uses + existing index. + + .. versionchanged:: 1.1.0 + Also accept list of index names. + + columns : str or object or a list of str + Column to use to make new frame's columns. + + .. versionchanged:: 1.1.0 + Also accept list of columns names. + + values : str, object or a list of the previous, optional + Column(s) to use for populating new frame's values. If not + specified, all remaining columns will be used and the result will + have hierarchically indexed columns. + + Returns + ------- + DataFrame + Returns reshaped DataFrame. + + Raises + ------ + ValueError: + When there are any `index`, `columns` combinations with multiple + values. `DataFrame.pivot_table` when you need to aggregate. + + See Also + -------- + DataFrame.pivot_table : Generalization of pivot that can handle + duplicate values for one index/column pair. + DataFrame.unstack : Pivot based on the index values instead of a + column. + wide_to_long : Wide panel to long format. Less flexible but more + user-friendly than melt. + + Notes + ----- + For finer-tuned control, see hierarchical indexing documentation along + with the related stack/unstack methods. + + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', + ... 'two'], + ... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], + ... 'baz': [1, 2, 3, 4, 5, 6], + ... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) + >>> df + foo bar baz zoo + 0 one A 1 x + 1 one B 2 y + 2 one C 3 z + 3 two A 4 q + 4 two B 5 w + 5 two C 6 t + + >>> df.pivot(index='foo', columns='bar', values='baz') + bar A B C + foo + one 1 2 3 + two 4 5 6 + + >>> df.pivot(index='foo', columns='bar')['baz'] + bar A B C + foo + one 1 2 3 + two 4 5 6 + + >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) + baz zoo + bar A B C A B C + foo + one 1 2 3 x y z + two 4 5 6 q w t + + You could also assign a list of column names or a list of index names. + + >>> df = pd.DataFrame({ + ... "lev1": [1, 1, 1, 2, 2, 2], + ... "lev2": [1, 1, 2, 1, 1, 2], + ... "lev3": [1, 2, 1, 2, 1, 2], + ... "lev4": [1, 2, 3, 4, 5, 6], + ... "values": [0, 1, 2, 3, 4, 5]}) + >>> df + lev1 lev2 lev3 lev4 values + 0 1 1 1 1 0 + 1 1 1 2 2 1 + 2 1 2 1 3 2 + 3 2 1 2 4 3 + 4 2 1 1 5 4 + 5 2 2 2 6 5 + + >>> df.pivot(index="lev1", columns=["lev2", "lev3"],values="values") + lev2 1 2 + lev3 1 2 1 2 + lev1 + 1 0.0 1.0 2.0 NaN + 2 4.0 3.0 NaN 5.0 + + >>> df.pivot(index=["lev1", "lev2"], columns=["lev3"],values="values") + lev3 1 2 + lev1 lev2 + 1 1 0.0 1.0 + 2 2.0 NaN + 2 1 4.0 3.0 + 2 NaN 5.0 + + A ValueError is raised if there are any duplicates. + + >>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'], + ... "bar": ['A', 'A', 'B', 'C'], + ... "baz": [1, 2, 3, 4]}) + >>> df + foo bar baz + 0 one A 1 + 1 one A 2 + 2 two B 3 + 3 two C 4 + + Notice that the first two rows are the same for our `index` + and `columns` arguments. + + >>> df.pivot(index='foo', columns='bar', values='baz') + Traceback (most recent call last): + ... + ValueError: Index contains duplicate entries, cannot reshape + """ + + @Substitution("") + @Appender(_shared_docs["pivot"]) + def pivot(self, index=None, columns=None, values=None) -> DataFrame: + from pandas.core.reshape.pivot import pivot + + return pivot(self, index=index, columns=columns, values=values) + + _shared_docs[ + "pivot_table" + ] = """ + Create a spreadsheet-style pivot table as a DataFrame. + + The levels in the pivot table will be stored in MultiIndex objects + (hierarchical indexes) on the index and columns of the result DataFrame. + + Parameters + ----------%s + values : column to aggregate, optional + index : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table index. If an array is passed, + it is being used as the same manner as column values. + columns : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table column. If an array is passed, + it is being used as the same manner as column values. + aggfunc : function, list of functions, dict, default numpy.mean + If list of functions passed, the resulting pivot table will have + hierarchical columns whose top level are the function names + (inferred from the function objects themselves) + If dict is passed, the key is column to aggregate and value + is function or list of functions. + fill_value : scalar, default None + Value to replace missing values with (in the resulting pivot table, + after aggregation). + margins : bool, default False + Add all row / columns (e.g. for subtotal / grand totals). + dropna : bool, default True + Do not include columns whose entries are all NaN. + margins_name : str, default 'All' + Name of the row / column that will contain the totals + when margins is True. + observed : bool, default False + This only applies if any of the groupers are Categoricals. + If True: only show observed values for categorical groupers. + If False: show all values for categorical groupers. + + .. versionchanged:: 0.25.0 + + sort : bool, default True + Specifies if the result should be sorted. + + .. versionadded:: 1.3.0 + + Returns + ------- + DataFrame + An Excel style pivot table. + + See Also + -------- + DataFrame.pivot : Pivot without aggregation that can handle + non-numeric data. + DataFrame.melt: Unpivot a DataFrame from wide to long format, + optionally leaving identifiers set. + wide_to_long : Wide panel to long format. Less flexible but more + user-friendly than melt. + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", + ... "one", "one", "two", "two"], + ... "C": ["small", "large", "large", "small", + ... "small", "large", "small", "small", + ... "large"], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]}) + >>> df + A B C D E + 0 foo one small 1 2 + 1 foo one large 2 4 + 2 foo one large 2 5 + 3 foo two small 3 5 + 4 foo two small 3 6 + 5 bar one large 4 6 + 6 bar one small 5 8 + 7 bar two small 6 9 + 8 bar two large 7 9 + + This first example aggregates values by taking the sum. + + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc=np.sum) + >>> table + C large small + A B + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 + + We can also fill missing values using the `fill_value` parameter. + + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc=np.sum, fill_value=0) + >>> table + C large small + A B + bar one 4 5 + two 7 6 + foo one 4 1 + two 0 6 + + The next example aggregates by taking the mean across multiple columns. + + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], + ... aggfunc={'D': np.mean, + ... 'E': np.mean}) + >>> table + D E + A C + bar large 5.500000 7.500000 + small 5.500000 8.500000 + foo large 2.000000 4.500000 + small 2.333333 4.333333 + + We can also calculate multiple types of aggregations for any given + value column. + + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], + ... aggfunc={'D': np.mean, + ... 'E': [min, max, np.mean]}) + >>> table + D E + mean max mean min + A C + bar large 5.500000 9 7.500000 6 + small 5.500000 9 8.500000 8 + foo large 2.000000 5 4.500000 4 + small 2.333333 6 4.333333 2 + """ + + @Substitution("") + @Appender(_shared_docs["pivot_table"]) + def pivot_table( + self, + values=None, + index=None, + columns=None, + aggfunc="mean", + fill_value=None, + margins=False, + dropna=True, + margins_name="All", + observed=False, + sort=True, + ) -> DataFrame: + from pandas.core.reshape.pivot import pivot_table + + return pivot_table( + self, + values=values, + index=index, + columns=columns, + aggfunc=aggfunc, + fill_value=fill_value, + margins=margins, + dropna=dropna, + margins_name=margins_name, + observed=observed, + sort=sort, + ) + + def stack(self, level: Level = -1, dropna: bool = True): + """ + Stack the prescribed level(s) from columns to index. + + Return a reshaped DataFrame or Series having a multi-level + index with one or more new inner-most levels compared to the current + DataFrame. The new inner-most levels are created by pivoting the + columns of the current dataframe: + + - if the columns have a single level, the output is a Series; + - if the columns have multiple levels, the new index + level(s) is (are) taken from the prescribed level(s) and + the output is a DataFrame. + + Parameters + ---------- + level : int, str, list, default -1 + Level(s) to stack from the column axis onto the index + axis, defined as one index or label, or a list of indices + or labels. + dropna : bool, default True + Whether to drop rows in the resulting Frame/Series with + missing values. Stacking a column level onto the index + axis can create combinations of index and column values + that are missing from the original dataframe. See Examples + section. + + Returns + ------- + DataFrame or Series + Stacked dataframe or series. + + See Also + -------- + DataFrame.unstack : Unstack prescribed level(s) from index axis + onto column axis. + DataFrame.pivot : Reshape dataframe from long format to wide + format. + DataFrame.pivot_table : Create a spreadsheet-style pivot table + as a DataFrame. + + Notes + ----- + The function is named by analogy with a collection of books + being reorganized from being side by side on a horizontal + position (the columns of the dataframe) to being stacked + vertically on top of each other (in the index of the + dataframe). + + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + **Single level columns** + + >>> df_single_level_cols = pd.DataFrame([[0, 1], [2, 3]], + ... index=['cat', 'dog'], + ... columns=['weight', 'height']) + + Stacking a dataframe with a single level column axis returns a Series: + + >>> df_single_level_cols + weight height + cat 0 1 + dog 2 3 + >>> df_single_level_cols.stack() + cat weight 0 + height 1 + dog weight 2 + height 3 + dtype: int64 + + **Multi level columns: simple case** + + >>> multicol1 = pd.MultiIndex.from_tuples([('weight', 'kg'), + ... ('weight', 'pounds')]) + >>> df_multi_level_cols1 = pd.DataFrame([[1, 2], [2, 4]], + ... index=['cat', 'dog'], + ... columns=multicol1) + + Stacking a dataframe with a multi-level column axis: + + >>> df_multi_level_cols1 + weight + kg pounds + cat 1 2 + dog 2 4 + >>> df_multi_level_cols1.stack() + weight + cat kg 1 + pounds 2 + dog kg 2 + pounds 4 + + **Missing values** + + >>> multicol2 = pd.MultiIndex.from_tuples([('weight', 'kg'), + ... ('height', 'm')]) + >>> df_multi_level_cols2 = pd.DataFrame([[1.0, 2.0], [3.0, 4.0]], + ... index=['cat', 'dog'], + ... columns=multicol2) + + It is common to have missing values when stacking a dataframe + with multi-level columns, as the stacked dataframe typically + has more values than the original dataframe. Missing values + are filled with NaNs: + + >>> df_multi_level_cols2 + weight height + kg m + cat 1.0 2.0 + dog 3.0 4.0 + >>> df_multi_level_cols2.stack() + height weight + cat kg NaN 1.0 + m 2.0 NaN + dog kg NaN 3.0 + m 4.0 NaN + + **Prescribing the level(s) to be stacked** + + The first parameter controls which level or levels are stacked: + + >>> df_multi_level_cols2.stack(0) + kg m + cat height NaN 2.0 + weight 1.0 NaN + dog height NaN 4.0 + weight 3.0 NaN + >>> df_multi_level_cols2.stack([0, 1]) + cat height m 2.0 + weight kg 1.0 + dog height m 4.0 + weight kg 3.0 + dtype: float64 + + **Dropping missing values** + + >>> df_multi_level_cols3 = pd.DataFrame([[None, 1.0], [2.0, 3.0]], + ... index=['cat', 'dog'], + ... columns=multicol2) + + Note that rows where all values are missing are dropped by + default but this behaviour can be controlled via the dropna + keyword parameter: + + >>> df_multi_level_cols3 + weight height + kg m + cat NaN 1.0 + dog 2.0 3.0 + >>> df_multi_level_cols3.stack(dropna=False) + height weight + cat kg NaN NaN + m 1.0 NaN + dog kg NaN 2.0 + m 3.0 NaN + >>> df_multi_level_cols3.stack(dropna=True) + height weight + cat m 1.0 NaN + dog kg NaN 2.0 + m 3.0 NaN + """ + from pandas.core.reshape.reshape import ( + stack, + stack_multiple, + ) + + if isinstance(level, (tuple, list)): + result = stack_multiple(self, level, dropna=dropna) + else: + result = stack(self, level, dropna=dropna) + + return result.__finalize__(self, method="stack") + + def explode( + self, + column: IndexLabel, + ignore_index: bool = False, + ) -> DataFrame: + """ + Transform each element of a list-like to a row, replicating index values. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + column : IndexLabel + Column(s) to explode. + For multiple columns, specify a non-empty list with each element + be str or tuple, and all specified columns their list-like data + on same row of the frame must have matching length. + + .. versionadded:: 1.3.0 + Multi-column explode + + ignore_index : bool, default False + If True, the resulting index will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame + Exploded lists to rows of the subset columns; + index will be duplicated for these rows. + + Raises + ------ + ValueError : + * If columns of the frame are not unique. + * If specified columns to explode is empty list. + * If specified columns to explode have not matching count of + elements rowwise in the frame. + + See Also + -------- + DataFrame.unstack : Pivot a level of the (necessarily hierarchical) + index labels. + DataFrame.melt : Unpivot a DataFrame from wide format to long format. + Series.explode : Explode a DataFrame from list-like columns to long format. + + Notes + ----- + This routine will explode list-likes including lists, tuples, sets, + Series, and np.ndarray. The result dtype of the subset rows will + be object. Scalars will be returned unchanged, and empty list-likes will + result in a np.nan for that row. In addition, the ordering of rows in the + output will be non-deterministic when exploding sets. + + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> df = pd.DataFrame({'A': [[0, 1, 2], 'foo', [], [3, 4]], + ... 'B': 1, + ... 'C': [['a', 'b', 'c'], np.nan, [], ['d', 'e']]}) + >>> df + A B C + 0 [0, 1, 2] 1 [a, b, c] + 1 foo 1 NaN + 2 [] 1 [] + 3 [3, 4] 1 [d, e] + + Single-column explode. + + >>> df.explode('A') + A B C + 0 0 1 [a, b, c] + 0 1 1 [a, b, c] + 0 2 1 [a, b, c] + 1 foo 1 NaN + 2 NaN 1 [] + 3 3 1 [d, e] + 3 4 1 [d, e] + + Multi-column explode. + + >>> df.explode(list('AC')) + A B C + 0 0 1 a + 0 1 1 b + 0 2 1 c + 1 foo 1 NaN + 2 NaN 1 NaN + 3 3 1 d + 3 4 1 e + """ + if not self.columns.is_unique: + raise ValueError("columns must be unique") + + columns: list[Hashable] + if is_scalar(column) or isinstance(column, tuple): + columns = [column] + elif isinstance(column, list) and all( + map(lambda c: is_scalar(c) or isinstance(c, tuple), column) + ): + if not column: + raise ValueError("column must be nonempty") + if len(column) > len(set(column)): + raise ValueError("column must be unique") + columns = column + else: + raise ValueError("column must be a scalar, tuple, or list thereof") + + df = self.reset_index(drop=True) + if len(columns) == 1: + result = df[columns[0]].explode() + else: + mylen = lambda x: len(x) if is_list_like(x) else -1 + counts0 = self[columns[0]].apply(mylen) + for c in columns[1:]: + if not all(counts0 == self[c].apply(mylen)): + raise ValueError("columns must have matching element counts") + result = DataFrame({c: df[c].explode() for c in columns}) + result = df.drop(columns, axis=1).join(result) + if ignore_index: + result.index = default_index(len(result)) + else: + result.index = self.index.take(result.index) + result = result.reindex(columns=self.columns, copy=False) + + return result + + def unstack(self, level: Level = -1, fill_value=None): + """ + Pivot a level of the (necessarily hierarchical) index labels. + + Returns a DataFrame having a new level of column labels whose inner-most level + consists of the pivoted index labels. + + If the index is not a MultiIndex, the output will be a Series + (the analogue of stack when the columns are not a MultiIndex). + + Parameters + ---------- + level : int, str, or list of these, default -1 (last level) + Level(s) of index to unstack, can pass level name. + fill_value : int, str or dict + Replace NaN with this value if the unstack produces missing values. + + Returns + ------- + Series or DataFrame + + See Also + -------- + DataFrame.pivot : Pivot a table based on column values. + DataFrame.stack : Pivot a level of the column labels (inverse operation + from `unstack`). + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), + ... ('two', 'a'), ('two', 'b')]) + >>> s = pd.Series(np.arange(1.0, 5.0), index=index) + >>> s + one a 1.0 + b 2.0 + two a 3.0 + b 4.0 + dtype: float64 + + >>> s.unstack(level=-1) + a b + one 1.0 2.0 + two 3.0 4.0 + + >>> s.unstack(level=0) + one two + a 1.0 3.0 + b 2.0 4.0 + + >>> df = s.unstack(level=0) + >>> df.unstack() + one a 1.0 + b 2.0 + two a 3.0 + b 4.0 + dtype: float64 + """ + from pandas.core.reshape.reshape import unstack + + result = unstack(self, level, fill_value) + + return result.__finalize__(self, method="unstack") + + @Appender(_shared_docs["melt"] % {"caller": "df.melt(", "other": "melt"}) + def melt( + self, + id_vars=None, + value_vars=None, + var_name=None, + value_name="value", + col_level: Level | None = None, + ignore_index: bool = True, + ) -> DataFrame: + + return melt( + self, + id_vars=id_vars, + value_vars=value_vars, + var_name=var_name, + value_name=value_name, + col_level=col_level, + ignore_index=ignore_index, + ) + + # ---------------------------------------------------------------------- + # Time series-related + + @doc( + Series.diff, + klass="Dataframe", + extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n " + "Take difference over rows (0) or columns (1).\n", + other_klass="Series", + examples=dedent( + """ + Difference with previous row + + >>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], + ... 'b': [1, 1, 2, 3, 5, 8], + ... 'c': [1, 4, 9, 16, 25, 36]}) + >>> df + a b c + 0 1 1 1 + 1 2 1 4 + 2 3 2 9 + 3 4 3 16 + 4 5 5 25 + 5 6 8 36 + + >>> df.diff() + a b c + 0 NaN NaN NaN + 1 1.0 0.0 3.0 + 2 1.0 1.0 5.0 + 3 1.0 1.0 7.0 + 4 1.0 2.0 9.0 + 5 1.0 3.0 11.0 + + Difference with previous column + + >>> df.diff(axis=1) + a b c + 0 NaN 0 0 + 1 NaN -1 3 + 2 NaN -1 7 + 3 NaN -1 13 + 4 NaN 0 20 + 5 NaN 2 28 + + Difference with 3rd previous row + + >>> df.diff(periods=3) + a b c + 0 NaN NaN NaN + 1 NaN NaN NaN + 2 NaN NaN NaN + 3 3.0 2.0 15.0 + 4 3.0 4.0 21.0 + 5 3.0 6.0 27.0 + + Difference with following row + + >>> df.diff(periods=-1) + a b c + 0 -1.0 0.0 -3.0 + 1 -1.0 -1.0 -5.0 + 2 -1.0 -1.0 -7.0 + 3 -1.0 -2.0 -9.0 + 4 -1.0 -3.0 -11.0 + 5 NaN NaN NaN + + Overflow in input dtype + + >>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8) + >>> df.diff() + a + 0 NaN + 1 255.0""" + ), + ) + def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: + if not lib.is_integer(periods): + if not ( + is_float(periods) + # error: "int" has no attribute "is_integer" + and periods.is_integer() # type: ignore[attr-defined] + ): + raise ValueError("periods must be an integer") + periods = int(periods) + + axis = self._get_axis_number(axis) + if axis == 1 and periods != 0: + return self - self.shift(periods, axis=axis) + + new_data = self._mgr.diff(n=periods, axis=axis) + return self._constructor(new_data).__finalize__(self, "diff") + + # ---------------------------------------------------------------------- + # Function application + + def _gotitem( + self, + key: IndexLabel, + ndim: int, + subset: DataFrame | Series | None = None, + ) -> DataFrame | Series: + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : string / list of selections + ndim : {1, 2} + requested ndim of result + subset : object, default None + subset to act on + """ + if subset is None: + subset = self + elif subset.ndim == 1: # is Series + return subset + + # TODO: _shallow_copy(subset)? + return subset[key] + + _agg_summary_and_see_also_doc = dedent( + """ + The aggregation operations are always performed over an axis, either the + index (default) or the column axis. This behavior is different from + `numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`, + `var`), where the default is to compute the aggregation of the flattened + array, e.g., ``numpy.mean(arr_2d)`` as opposed to + ``numpy.mean(arr_2d, axis=0)``. + + `agg` is an alias for `aggregate`. Use the alias. + + See Also + -------- + DataFrame.apply : Perform any type of operations. + DataFrame.transform : Perform transformation type operations. + core.groupby.GroupBy : Perform operations over groups. + core.resample.Resampler : Perform operations over resampled bins. + core.window.Rolling : Perform operations over rolling window. + core.window.Expanding : Perform operations over expanding window. + core.window.ExponentialMovingWindow : Perform operation over exponential weighted + window. + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + >>> df = pd.DataFrame([[1, 2, 3], + ... [4, 5, 6], + ... [7, 8, 9], + ... [np.nan, np.nan, np.nan]], + ... columns=['A', 'B', 'C']) + + Aggregate these functions over the rows. + + >>> df.agg(['sum', 'min']) + A B C + sum 12.0 15.0 18.0 + min 1.0 2.0 3.0 + + Different aggregations per column. + + >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}) + A B + sum 12.0 NaN + min 1.0 2.0 + max NaN 8.0 + + Aggregate different functions over the columns and rename the index of the resulting + DataFrame. + + >>> df.agg(x=('A', max), y=('B', 'min'), z=('C', np.mean)) + A B C + x 7.0 NaN NaN + y NaN 2.0 NaN + z NaN NaN 6.0 + + Aggregate over the columns. + + >>> df.agg("mean", axis="columns") + 0 2.0 + 1 5.0 + 2 8.0 + 3 NaN + dtype: float64 + """ + ) + + @doc( + _shared_docs["aggregate"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], + see_also=_agg_summary_and_see_also_doc, + examples=_agg_examples_doc, + ) + def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): + from pandas.core.apply import frame_apply + + axis = self._get_axis_number(axis) + + relabeling, func, columns, order = reconstruct_func(func, **kwargs) + + op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs) + result = op.agg() + + if relabeling: + # This is to keep the order to columns occurrence unchanged, and also + # keep the order of new columns occurrence unchanged + + # For the return values of reconstruct_func, if relabeling is + # False, columns and order will be None. + assert columns is not None + assert order is not None + + result_in_dict = relabel_result(result, func, columns, order) + result = DataFrame(result_in_dict, index=columns) + + return result + + agg = aggregate + + @doc( + _shared_docs["transform"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], + ) + def transform( + self, func: AggFuncType, axis: Axis = 0, *args, **kwargs + ) -> DataFrame: + from pandas.core.apply import frame_apply + + op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs) + result = op.transform() + assert isinstance(result, DataFrame) + return result + + def apply( + self, + func: AggFuncType, + axis: Axis = 0, + raw: bool = False, + result_type=None, + args=(), + **kwargs, + ): + """ + Apply a function along an axis of the DataFrame. + + Objects passed to the function are Series objects whose index is + either the DataFrame's index (``axis=0``) or the DataFrame's columns + (``axis=1``). By default (``result_type=None``), the final return type + is inferred from the return type of the applied function. Otherwise, + it depends on the `result_type` argument. + + Parameters + ---------- + func : function + Function to apply to each column or row. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Axis along which the function is applied: + + * 0 or 'index': apply function to each column. + * 1 or 'columns': apply function to each row. + + raw : bool, default False + Determines if row or column is passed as a Series or ndarray object: + + * ``False`` : passes each row or column as a Series to the + function. + * ``True`` : the passed function will receive ndarray objects + instead. + If you are just applying a NumPy reduction function this will + achieve much better performance. + + result_type : {'expand', 'reduce', 'broadcast', None}, default None + These only act when ``axis=1`` (columns): + + * 'expand' : list-like results will be turned into columns. + * 'reduce' : returns a Series if possible rather than expanding + list-like results. This is the opposite of 'expand'. + * 'broadcast' : results will be broadcast to the original shape + of the DataFrame, the original index and columns will be + retained. + + The default behaviour (None) depends on the return value of the + applied function: list-like results will be returned as a Series + of those. However if the apply function returns a Series these + are expanded to columns. + args : tuple + Positional arguments to pass to `func` in addition to the + array/series. + **kwargs + Additional keyword arguments to pass as keywords arguments to + `func`. + + Returns + ------- + Series or DataFrame + Result of applying ``func`` along the given axis of the + DataFrame. + + See Also + -------- + DataFrame.applymap: For elementwise operations. + DataFrame.aggregate: Only perform aggregating type operations. + DataFrame.transform: Only perform transforming type operations. + + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + >>> df = pd.DataFrame([[4, 9]] * 3, columns=['A', 'B']) + >>> df + A B + 0 4 9 + 1 4 9 + 2 4 9 + + Using a numpy universal function (in this case the same as + ``np.sqrt(df)``): + + >>> df.apply(np.sqrt) + A B + 0 2.0 3.0 + 1 2.0 3.0 + 2 2.0 3.0 + + Using a reducing function on either axis + + >>> df.apply(np.sum, axis=0) + A 12 + B 27 + dtype: int64 + + >>> df.apply(np.sum, axis=1) + 0 13 + 1 13 + 2 13 + dtype: int64 + + Returning a list-like will result in a Series + + >>> df.apply(lambda x: [1, 2], axis=1) + 0 [1, 2] + 1 [1, 2] + 2 [1, 2] + dtype: object + + Passing ``result_type='expand'`` will expand list-like results + to columns of a Dataframe + + >>> df.apply(lambda x: [1, 2], axis=1, result_type='expand') + 0 1 + 0 1 2 + 1 1 2 + 2 1 2 + + Returning a Series inside the function is similar to passing + ``result_type='expand'``. The resulting column names + will be the Series index. + + >>> df.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1) + foo bar + 0 1 2 + 1 1 2 + 2 1 2 + + Passing ``result_type='broadcast'`` will ensure the same shape + result, whether list-like or scalar is returned by the function, + and broadcast it along the axis. The resulting column names will + be the originals. + + >>> df.apply(lambda x: [1, 2], axis=1, result_type='broadcast') + A B + 0 1 2 + 1 1 2 + 2 1 2 + """ + from pandas.core.apply import frame_apply + + op = frame_apply( + self, + func=func, + axis=axis, + raw=raw, + result_type=result_type, + args=args, + kwargs=kwargs, + ) + return op.apply().__finalize__(self, method="apply") + + def applymap( + self, func: PythonFuncType, na_action: str | None = None, **kwargs + ) -> DataFrame: + """ + Apply a function to a Dataframe elementwise. + + This method applies a function that accepts and returns a scalar + to every element of a DataFrame. + + Parameters + ---------- + func : callable + Python function, returns a single value from a single value. + na_action : {None, 'ignore'}, default None + If ‘ignore’, propagate NaN values, without passing them to func. + + .. versionadded:: 1.2 + + **kwargs + Additional keyword arguments to pass as keywords arguments to + `func`. + + .. versionadded:: 1.3.0 + + Returns + ------- + DataFrame + Transformed DataFrame. + + See Also + -------- + DataFrame.apply : Apply a function along input axis of DataFrame. + + Examples + -------- + >>> df = pd.DataFrame([[1, 2.12], [3.356, 4.567]]) + >>> df + 0 1 + 0 1.000 2.120 + 1 3.356 4.567 + + >>> df.applymap(lambda x: len(str(x))) + 0 1 + 0 3 4 + 1 5 5 + + Like Series.map, NA values can be ignored: + + >>> df_copy = df.copy() + >>> df_copy.iloc[0, 0] = pd.NA + >>> df_copy.applymap(lambda x: len(str(x)), na_action='ignore') + 0 1 + 0 4 + 1 5 5 + + Note that a vectorized version of `func` often exists, which will + be much faster. You could square each number elementwise. + + >>> df.applymap(lambda x: x**2) + 0 1 + 0 1.000000 4.494400 + 1 11.262736 20.857489 + + But it's better to avoid applymap in that case. + + >>> df ** 2 + 0 1 + 0 1.000000 4.494400 + 1 11.262736 20.857489 + """ + if na_action not in {"ignore", None}: + raise ValueError( + f"na_action must be 'ignore' or None. Got {repr(na_action)}" + ) + ignore_na = na_action == "ignore" + func = functools.partial(func, **kwargs) + + # if we have a dtype == 'M8[ns]', provide boxed values + def infer(x): + if x.empty: + return lib.map_infer(x, func, ignore_na=ignore_na) + return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na) + + return self.apply(infer).__finalize__(self, "applymap") + + # ---------------------------------------------------------------------- + # Merging / joining methods + + def append( + self, + other, + ignore_index: bool = False, + verify_integrity: bool = False, + sort: bool = False, + ) -> DataFrame: + """ + Append rows of `other` to the end of caller, returning a new object. + + .. deprecated:: 1.4.0 + Use :func:`concat` instead. For further details see + :ref:`whatsnew_140.deprecations.frame_series_append` + + Columns in `other` that are not in the caller are added as new columns. + + Parameters + ---------- + other : DataFrame or Series/dict-like object, or list of these + The data to append. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + verify_integrity : bool, default False + If True, raise ValueError on creating index with duplicates. + sort : bool, default False + Sort columns if the columns of `self` and `other` are not aligned. + + .. versionchanged:: 1.0.0 + + Changed to not sort by default. + + Returns + ------- + DataFrame + A new DataFrame consisting of the rows of caller and the rows of `other`. + + See Also + -------- + concat : General function to concatenate DataFrame or Series objects. + + Notes + ----- + If a list of dict/series is passed and the keys are all contained in + the DataFrame's index, the order of the columns in the resulting + DataFrame will be unchanged. + + Iteratively appending rows to a DataFrame can be more computationally + intensive than a single concatenate. A better solution is to append + those rows to a list and then concatenate the list with the original + DataFrame all at once. + + Examples + -------- + >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB'), index=['x', 'y']) + >>> df + A B + x 1 2 + y 3 4 + >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'), index=['x', 'y']) + >>> df.append(df2) + A B + x 1 2 + y 3 4 + x 5 6 + y 7 8 + + With `ignore_index` set to True: + + >>> df.append(df2, ignore_index=True) + A B + 0 1 2 + 1 3 4 + 2 5 6 + 3 7 8 + + The following, while not recommended methods for generating DataFrames, + show two ways to generate a DataFrame from multiple data sources. + + Less efficient: + + >>> df = pd.DataFrame(columns=['A']) + >>> for i in range(5): + ... df = df.append({'A': i}, ignore_index=True) + >>> df + A + 0 0 + 1 1 + 2 2 + 3 3 + 4 4 + + More efficient: + + >>> pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)], + ... ignore_index=True) + A + 0 0 + 1 1 + 2 2 + 3 3 + 4 4 + """ + warnings.warn( + "The frame.append method is deprecated " + "and will be removed from pandas in a future version. " + "Use pandas.concat instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return self._append(other, ignore_index, verify_integrity, sort) + + def _append( + self, + other, + ignore_index: bool = False, + verify_integrity: bool = False, + sort: bool = False, + ) -> DataFrame: + combined_columns = None + if isinstance(other, (Series, dict)): + if isinstance(other, dict): + if not ignore_index: + raise TypeError("Can only append a dict if ignore_index=True") + other = Series(other) + if other.name is None and not ignore_index: + raise TypeError( + "Can only append a Series if ignore_index=True " + "or if the Series has a name" + ) + + index = Index([other.name], name=self.index.name) + idx_diff = other.index.difference(self.columns) + combined_columns = self.columns.append(idx_diff) + row_df = other.to_frame().T + # infer_objects is needed for + # test_append_empty_frame_to_series_with_dateutil_tz + other = row_df.infer_objects().rename_axis(index.names, copy=False) + elif isinstance(other, list): + if not other: + pass + elif not isinstance(other[0], DataFrame): + other = DataFrame(other) + if self.index.name is not None and not ignore_index: + other.index.name = self.index.name + + from pandas.core.reshape.concat import concat + + if isinstance(other, (list, tuple)): + to_concat = [self, *other] + else: + to_concat = [self, other] + + result = concat( + to_concat, + ignore_index=ignore_index, + verify_integrity=verify_integrity, + sort=sort, + ) + if ( + combined_columns is not None + and not sort + and not combined_columns.equals(result.columns) + ): + # TODO: reindexing here is a kludge bc union_indexes does not + # pass sort to index.union, xref #43375 + # combined_columns.equals check is necessary for preserving dtype + # in test_crosstab_normalize + result = result.reindex(combined_columns, axis=1) + return result.__finalize__(self, method="append") + + def join( + self, + other: DataFrame | Series, + on: IndexLabel | None = None, + how: str = "left", + lsuffix: str = "", + rsuffix: str = "", + sort: bool = False, + ) -> DataFrame: + """ + Join columns of another DataFrame. + + Join columns with `other` DataFrame either on index or on a key + column. Efficiently join multiple DataFrame objects by index at once by + passing a list. + + Parameters + ---------- + other : DataFrame, Series, or list of DataFrame + Index should be similar to one of the columns in this one. If a + Series is passed, its name attribute must be set, and that will be + used as the column name in the resulting joined DataFrame. + on : str, list of str, or array-like, optional + Column or index level name(s) in the caller to join on the index + in `other`, otherwise joins index-on-index. If multiple + values given, the `other` DataFrame must have a MultiIndex. Can + pass an array as the join key if it is not already contained in + the calling DataFrame. Like an Excel VLOOKUP operation. + how : {'left', 'right', 'outer', 'inner'}, default 'left' + How to handle the operation of the two objects. + + * left: use calling frame's index (or column if on is specified) + * right: use `other`'s index. + * outer: form union of calling frame's index (or column if on is + specified) with `other`'s index, and sort it. + lexicographically. + * inner: form intersection of calling frame's index (or column if + on is specified) with `other`'s index, preserving the order + of the calling's one. + * cross: creates the cartesian product from both frames, preserves the order + of the left keys. + + .. versionadded:: 1.2.0 + + lsuffix : str, default '' + Suffix to use from left frame's overlapping columns. + rsuffix : str, default '' + Suffix to use from right frame's overlapping columns. + sort : bool, default False + Order result DataFrame lexicographically by the join key. If False, + the order of the join key depends on the join type (how keyword). + + Returns + ------- + DataFrame + A dataframe containing columns from both the caller and `other`. + + See Also + -------- + DataFrame.merge : For column(s)-on-column(s) operations. + + Notes + ----- + Parameters `on`, `lsuffix`, and `rsuffix` are not supported when + passing a list of `DataFrame` objects. + + Support for specifying index levels as the `on` parameter was added + in version 0.23.0. + + Examples + -------- + >>> df = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], + ... 'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']}) + + >>> df + key A + 0 K0 A0 + 1 K1 A1 + 2 K2 A2 + 3 K3 A3 + 4 K4 A4 + 5 K5 A5 + + >>> other = pd.DataFrame({'key': ['K0', 'K1', 'K2'], + ... 'B': ['B0', 'B1', 'B2']}) + + >>> other + key B + 0 K0 B0 + 1 K1 B1 + 2 K2 B2 + + Join DataFrames using their indexes. + + >>> df.join(other, lsuffix='_caller', rsuffix='_other') + key_caller A key_other B + 0 K0 A0 K0 B0 + 1 K1 A1 K1 B1 + 2 K2 A2 K2 B2 + 3 K3 A3 NaN NaN + 4 K4 A4 NaN NaN + 5 K5 A5 NaN NaN + + If we want to join using the key columns, we need to set key to be + the index in both `df` and `other`. The joined DataFrame will have + key as its index. + + >>> df.set_index('key').join(other.set_index('key')) + A B + key + K0 A0 B0 + K1 A1 B1 + K2 A2 B2 + K3 A3 NaN + K4 A4 NaN + K5 A5 NaN + + Another option to join using the key columns is to use the `on` + parameter. DataFrame.join always uses `other`'s index but we can use + any column in `df`. This method preserves the original DataFrame's + index in the result. + + >>> df.join(other.set_index('key'), on='key') + key A B + 0 K0 A0 B0 + 1 K1 A1 B1 + 2 K2 A2 B2 + 3 K3 A3 NaN + 4 K4 A4 NaN + 5 K5 A5 NaN + + Using non-unique key values shows how they are matched. + + >>> df = pd.DataFrame({'key': ['K0', 'K1', 'K1', 'K3', 'K0', 'K1'], + ... 'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']}) + + >>> df + key A + 0 K0 A0 + 1 K1 A1 + 2 K1 A2 + 3 K3 A3 + 4 K0 A4 + 5 K1 A5 + + >>> df.join(other.set_index('key'), on='key') + key A B + 0 K0 A0 B0 + 1 K1 A1 B1 + 2 K1 A2 B1 + 3 K3 A3 NaN + 4 K0 A4 B0 + 5 K1 A5 B1 + """ + return self._join_compat( + other, on=on, how=how, lsuffix=lsuffix, rsuffix=rsuffix, sort=sort + ) + + def _join_compat( + self, + other: DataFrame | Series, + on: IndexLabel | None = None, + how: str = "left", + lsuffix: str = "", + rsuffix: str = "", + sort: bool = False, + ): + from pandas.core.reshape.concat import concat + from pandas.core.reshape.merge import merge + + if isinstance(other, Series): + if other.name is None: + raise ValueError("Other Series must have a name") + other = DataFrame({other.name: other}) + + if isinstance(other, DataFrame): + if how == "cross": + return merge( + self, + other, + how=how, + on=on, + suffixes=(lsuffix, rsuffix), + sort=sort, + ) + return merge( + self, + other, + left_on=on, + how=how, + left_index=on is None, + right_index=True, + suffixes=(lsuffix, rsuffix), + sort=sort, + ) + else: + if on is not None: + raise ValueError( + "Joining multiple DataFrames only supported for joining on index" + ) + + frames = [self] + list(other) + + can_concat = all(df.index.is_unique for df in frames) + + # join indexes only using concat + if can_concat: + if how == "left": + res = concat( + frames, axis=1, join="outer", verify_integrity=True, sort=sort + ) + return res.reindex(self.index, copy=False) + else: + return concat( + frames, axis=1, join=how, verify_integrity=True, sort=sort + ) + + joined = frames[0] + + for frame in frames[1:]: + joined = merge( + joined, frame, how=how, left_index=True, right_index=True + ) + + return joined + + @Substitution("") + @Appender(_merge_doc, indents=2) + def merge( + self, + right: DataFrame | Series, + how: str = "inner", + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + left_index: bool = False, + right_index: bool = False, + sort: bool = False, + suffixes: Suffixes = ("_x", "_y"), + copy: bool = True, + indicator: bool = False, + validate: str | None = None, + ) -> DataFrame: + from pandas.core.reshape.merge import merge + + return merge( + self, + right, + how=how, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + sort=sort, + suffixes=suffixes, + copy=copy, + indicator=indicator, + validate=validate, + ) + + def round( + self, decimals: int | dict[IndexLabel, int] | Series = 0, *args, **kwargs + ) -> DataFrame: + """ + Round a DataFrame to a variable number of decimal places. + + Parameters + ---------- + decimals : int, dict, Series + Number of decimal places to round each column to. If an int is + given, round each column to the same number of places. + Otherwise dict and Series round to variable numbers of places. + Column names should be in the keys if `decimals` is a + dict-like, or in the index if `decimals` is a Series. Any + columns not included in `decimals` will be left as is. Elements + of `decimals` which are not columns of the input will be + ignored. + *args + Additional keywords have no effect but might be accepted for + compatibility with numpy. + **kwargs + Additional keywords have no effect but might be accepted for + compatibility with numpy. + + Returns + ------- + DataFrame + A DataFrame with the affected columns rounded to the specified + number of decimal places. + + See Also + -------- + numpy.around : Round a numpy array to the given number of decimals. + Series.round : Round a Series to the given number of decimals. + + Examples + -------- + >>> df = pd.DataFrame([(.21, .32), (.01, .67), (.66, .03), (.21, .18)], + ... columns=['dogs', 'cats']) + >>> df + dogs cats + 0 0.21 0.32 + 1 0.01 0.67 + 2 0.66 0.03 + 3 0.21 0.18 + + By providing an integer each column is rounded to the same number + of decimal places + + >>> df.round(1) + dogs cats + 0 0.2 0.3 + 1 0.0 0.7 + 2 0.7 0.0 + 3 0.2 0.2 + + With a dict, the number of places for specific columns can be + specified with the column names as key and the number of decimal + places as value + + >>> df.round({'dogs': 1, 'cats': 0}) + dogs cats + 0 0.2 0.0 + 1 0.0 1.0 + 2 0.7 0.0 + 3 0.2 0.0 + + Using a Series, the number of places for specific columns can be + specified with the column names as index and the number of + decimal places as value + + >>> decimals = pd.Series([0, 1], index=['cats', 'dogs']) + >>> df.round(decimals) + dogs cats + 0 0.2 0.0 + 1 0.0 1.0 + 2 0.7 0.0 + 3 0.2 0.0 + """ + from pandas.core.reshape.concat import concat + + def _dict_round(df: DataFrame, decimals): + for col, vals in df.items(): + try: + yield _series_round(vals, decimals[col]) + except KeyError: + yield vals + + def _series_round(ser: Series, decimals: int): + if is_integer_dtype(ser.dtype) or is_float_dtype(ser.dtype): + return ser.round(decimals) + return ser + + nv.validate_round(args, kwargs) + + if isinstance(decimals, (dict, Series)): + if isinstance(decimals, Series) and not decimals.index.is_unique: + raise ValueError("Index of decimals must be unique") + if is_dict_like(decimals) and not all( + is_integer(value) for _, value in decimals.items() + ): + raise TypeError("Values in decimals must be integers") + new_cols = list(_dict_round(self, decimals)) + elif is_integer(decimals): + # Dispatch to Series.round + new_cols = [_series_round(v, decimals) for _, v in self.items()] + else: + raise TypeError("decimals must be an integer, a dict-like or a Series") + + if len(new_cols) > 0: + return self._constructor( + concat(new_cols, axis=1), index=self.index, columns=self.columns + ) + else: + return self + + # ---------------------------------------------------------------------- + # Statistical methods, etc. + + def corr( + self, + method: str | Callable[[np.ndarray, np.ndarray], float] = "pearson", + min_periods: int = 1, + ) -> DataFrame: + """ + Compute pairwise correlation of columns, excluding NA/null values. + + Parameters + ---------- + method : {'pearson', 'kendall', 'spearman'} or callable + Method of correlation: + + * pearson : standard correlation coefficient + * kendall : Kendall Tau correlation coefficient + * spearman : Spearman rank correlation + * callable: callable with input two 1d ndarrays + and returning a float. Note that the returned matrix from corr + will have 1 along the diagonals and will be symmetric + regardless of the callable's behavior. + min_periods : int, optional + Minimum number of observations required per pair of columns + to have a valid result. Currently only available for Pearson + and Spearman correlation. + + Returns + ------- + DataFrame + Correlation matrix. + + See Also + -------- + DataFrame.corrwith : Compute pairwise correlation with another + DataFrame or Series. + Series.corr : Compute the correlation between two Series. + + Examples + -------- + >>> def histogram_intersection(a, b): + ... v = np.minimum(a, b).sum().round(decimals=1) + ... return v + >>> df = pd.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)], + ... columns=['dogs', 'cats']) + >>> df.corr(method=histogram_intersection) + dogs cats + dogs 1.0 0.3 + cats 0.3 1.0 + """ + numeric_df = self._get_numeric_data() + cols = numeric_df.columns + idx = cols.copy() + mat = numeric_df.to_numpy(dtype=float, na_value=np.nan, copy=False) + + if method == "pearson": + correl = libalgos.nancorr(mat, minp=min_periods) + elif method == "spearman": + correl = libalgos.nancorr_spearman(mat, minp=min_periods) + elif method == "kendall" or callable(method): + if min_periods is None: + min_periods = 1 + mat = mat.T + corrf = nanops.get_corr_func(method) + K = len(cols) + correl = np.empty((K, K), dtype=float) + mask = np.isfinite(mat) + for i, ac in enumerate(mat): + for j, bc in enumerate(mat): + if i > j: + continue + + valid = mask[i] & mask[j] + if valid.sum() < min_periods: + c = np.nan + elif i == j: + c = 1.0 + elif not valid.all(): + c = corrf(ac[valid], bc[valid]) + else: + c = corrf(ac, bc) + correl[i, j] = c + correl[j, i] = c + else: + raise ValueError( + "method must be either 'pearson', " + "'spearman', 'kendall', or a callable, " + f"'{method}' was supplied" + ) + + return self._constructor(correl, index=idx, columns=cols) + + def cov(self, min_periods: int | None = None, ddof: int | None = 1) -> DataFrame: + """ + Compute pairwise covariance of columns, excluding NA/null values. + + Compute the pairwise covariance among the series of a DataFrame. + The returned data frame is the `covariance matrix + `__ of the columns + of the DataFrame. + + Both NA and null values are automatically excluded from the + calculation. (See the note below about bias from missing values.) + A threshold can be set for the minimum number of + observations for each value created. Comparisons with observations + below this threshold will be returned as ``NaN``. + + This method is generally used for the analysis of time series data to + understand the relationship between different measures + across time. + + Parameters + ---------- + min_periods : int, optional + Minimum number of observations required per pair of columns + to have a valid result. + + ddof : int, default 1 + Delta degrees of freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame + The covariance matrix of the series of the DataFrame. + + See Also + -------- + Series.cov : Compute covariance with another Series. + core.window.ExponentialMovingWindow.cov: Exponential weighted sample covariance. + core.window.Expanding.cov : Expanding sample covariance. + core.window.Rolling.cov : Rolling sample covariance. + + Notes + ----- + Returns the covariance matrix of the DataFrame's time series. + The covariance is normalized by N-ddof. + + For DataFrames that have Series that are missing data (assuming that + data is `missing at random + `__) + the returned covariance matrix will be an unbiased estimate + of the variance and covariance between the member Series. + + However, for many applications this estimate may not be acceptable + because the estimate covariance matrix is not guaranteed to be positive + semi-definite. This could lead to estimate correlations having + absolute values which are greater than one, and/or a non-invertible + covariance matrix. See `Estimation of covariance matrices + `__ for more details. + + Examples + -------- + >>> df = pd.DataFrame([(1, 2), (0, 3), (2, 0), (1, 1)], + ... columns=['dogs', 'cats']) + >>> df.cov() + dogs cats + dogs 0.666667 -1.000000 + cats -1.000000 1.666667 + + >>> np.random.seed(42) + >>> df = pd.DataFrame(np.random.randn(1000, 5), + ... columns=['a', 'b', 'c', 'd', 'e']) + >>> df.cov() + a b c d e + a 0.998438 -0.020161 0.059277 -0.008943 0.014144 + b -0.020161 1.059352 -0.008543 -0.024738 0.009826 + c 0.059277 -0.008543 1.010670 -0.001486 -0.000271 + d -0.008943 -0.024738 -0.001486 0.921297 -0.013692 + e 0.014144 0.009826 -0.000271 -0.013692 0.977795 + + **Minimum number of periods** + + This method also supports an optional ``min_periods`` keyword + that specifies the required minimum number of non-NA observations for + each column pair in order to have a valid result: + + >>> np.random.seed(42) + >>> df = pd.DataFrame(np.random.randn(20, 3), + ... columns=['a', 'b', 'c']) + >>> df.loc[df.index[:5], 'a'] = np.nan + >>> df.loc[df.index[5:10], 'b'] = np.nan + >>> df.cov(min_periods=12) + a b c + a 0.316741 NaN -0.150812 + b NaN 1.248003 0.191417 + c -0.150812 0.191417 0.895202 + """ + numeric_df = self._get_numeric_data() + cols = numeric_df.columns + idx = cols.copy() + mat = numeric_df.to_numpy(dtype=float, na_value=np.nan, copy=False) + + if notna(mat).all(): + if min_periods is not None and min_periods > len(mat): + base_cov = np.empty((mat.shape[1], mat.shape[1])) + base_cov.fill(np.nan) + else: + base_cov = np.cov(mat.T, ddof=ddof) + base_cov = base_cov.reshape((len(cols), len(cols))) + else: + base_cov = libalgos.nancorr(mat, cov=True, minp=min_periods) + + return self._constructor(base_cov, index=idx, columns=cols) + + def corrwith(self, other, axis: Axis = 0, drop=False, method="pearson") -> Series: + """ + Compute pairwise correlation. + + Pairwise correlation is computed between rows or columns of + DataFrame with rows or columns of Series or DataFrame. DataFrames + are first aligned along both axes before computing the + correlations. + + Parameters + ---------- + other : DataFrame, Series + Object with which to compute correlations. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to use. 0 or 'index' to compute column-wise, 1 or 'columns' for + row-wise. + drop : bool, default False + Drop missing indices from result. + method : {'pearson', 'kendall', 'spearman'} or callable + Method of correlation: + + * pearson : standard correlation coefficient + * kendall : Kendall Tau correlation coefficient + * spearman : Spearman rank correlation + * callable: callable with input two 1d ndarrays + and returning a float. + + Returns + ------- + Series + Pairwise correlations. + + See Also + -------- + DataFrame.corr : Compute pairwise correlation of columns. + """ + axis = self._get_axis_number(axis) + this = self._get_numeric_data() + + if isinstance(other, Series): + return this.apply(lambda x: other.corr(x, method=method), axis=axis) + + other = other._get_numeric_data() + left, right = this.align(other, join="inner", copy=False) + + if axis == 1: + left = left.T + right = right.T + + if method == "pearson": + # mask missing values + left = left + right * 0 + right = right + left * 0 + + # demeaned data + ldem = left - left.mean() + rdem = right - right.mean() + + num = (ldem * rdem).sum() + dom = (left.count() - 1) * left.std() * right.std() + + correl = num / dom + + elif method in ["kendall", "spearman"] or callable(method): + + def c(x): + return nanops.nancorr(x[0], x[1], method=method) + + correl = self._constructor_sliced( + map(c, zip(left.values.T, right.values.T)), index=left.columns + ) + + else: + raise ValueError( + f"Invalid method {method} was passed, " + "valid methods are: 'pearson', 'kendall', " + "'spearman', or callable" + ) + + if not drop: + # Find non-matching labels along the given axis + # and append missing correlations (GH 22375) + raxis = 1 if axis == 0 else 0 + result_index = this._get_axis(raxis).union(other._get_axis(raxis)) + idx_diff = result_index.difference(correl.index) + + if len(idx_diff) > 0: + correl = correl._append( + Series([np.nan] * len(idx_diff), index=idx_diff) + ) + + return correl + + # ---------------------------------------------------------------------- + # ndarray-like stats methods + + def count( + self, axis: Axis = 0, level: Level | None = None, numeric_only: bool = False + ): + """ + Count non-NA cells for each column or row. + + The values `None`, `NaN`, `NaT`, and optionally `numpy.inf` (depending + on `pandas.options.mode.use_inf_as_na`) are considered NA. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + If 0 or 'index' counts are generated for each column. + If 1 or 'columns' counts are generated for each row. + level : int or str, optional + If the axis is a `MultiIndex` (hierarchical), count along a + particular `level`, collapsing into a `DataFrame`. + A `str` specifies the level name. + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + Returns + ------- + Series or DataFrame + For each column/row the number of non-NA/null entries. + If `level` is specified returns a `DataFrame`. + + See Also + -------- + Series.count: Number of non-NA elements in a Series. + DataFrame.value_counts: Count unique combinations of columns. + DataFrame.shape: Number of DataFrame rows and columns (including NA + elements). + DataFrame.isna: Boolean same-sized DataFrame showing places of NA + elements. + + Examples + -------- + Constructing DataFrame from a dictionary: + + >>> df = pd.DataFrame({"Person": + ... ["John", "Myla", "Lewis", "John", "Myla"], + ... "Age": [24., np.nan, 21., 33, 26], + ... "Single": [False, True, True, True, False]}) + >>> df + Person Age Single + 0 John 24.0 False + 1 Myla NaN True + 2 Lewis 21.0 True + 3 John 33.0 True + 4 Myla 26.0 False + + Notice the uncounted NA values: + + >>> df.count() + Person 5 + Age 4 + Single 5 + dtype: int64 + + Counts for each **row**: + + >>> df.count(axis='columns') + 0 3 + 1 2 + 2 3 + 3 3 + 4 3 + dtype: int64 + """ + axis = self._get_axis_number(axis) + if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead. df.count(level=1) should use df.groupby(level=1).count().", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._count_level(level, axis=axis, numeric_only=numeric_only) + + if numeric_only: + frame = self._get_numeric_data() + else: + frame = self + + # GH #423 + if len(frame._get_axis(axis)) == 0: + result = self._constructor_sliced(0, index=frame._get_agg_axis(axis)) + else: + if frame._is_mixed_type or frame._mgr.any_extension_types: + # the or any_extension_types is really only hit for single- + # column frames with an extension array + result = notna(frame).sum(axis=axis) + else: + # GH13407 + series_counts = notna(frame).sum(axis=axis) + counts = series_counts.values + result = self._constructor_sliced( + counts, index=frame._get_agg_axis(axis) + ) + + return result.astype("int64") + + def _count_level(self, level: Level, axis: int = 0, numeric_only: bool = False): + if numeric_only: + frame = self._get_numeric_data() + else: + frame = self + + count_axis = frame._get_axis(axis) + agg_axis = frame._get_agg_axis(axis) + + if not isinstance(count_axis, MultiIndex): + raise TypeError( + f"Can only count levels on hierarchical {self._get_axis_name(axis)}." + ) + + # Mask NaNs: Mask rows or columns where the index level is NaN, and all + # values in the DataFrame that are NaN + if frame._is_mixed_type: + # Since we have mixed types, calling notna(frame.values) might + # upcast everything to object + values_mask = notna(frame).values + else: + # But use the speedup when we have homogeneous dtypes + values_mask = notna(frame.values) + + index_mask = notna(count_axis.get_level_values(level=level)) + if axis == 1: + mask = index_mask & values_mask + else: + mask = index_mask.reshape(-1, 1) & values_mask + + if isinstance(level, str): + level = count_axis._get_level_number(level) + + level_name = count_axis._names[level] + level_index = count_axis.levels[level]._rename(name=level_name) + level_codes = ensure_platform_int(count_axis.codes[level]) + counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=axis) + + if axis == 1: + result = self._constructor(counts, index=agg_axis, columns=level_index) + else: + result = self._constructor(counts, index=level_index, columns=agg_axis) + + return result + + def _reduce( + self, + op, + name: str, + *, + axis: Axis = 0, + skipna: bool = True, + numeric_only: bool | None = None, + filter_type=None, + **kwds, + ): + + assert filter_type is None or filter_type == "bool", filter_type + out_dtype = "bool" if filter_type == "bool" else None + + if numeric_only is None and name in ["mean", "median"]: + own_dtypes = [arr.dtype for arr in self._mgr.arrays] + + dtype_is_dt = np.array( + [is_datetime64_any_dtype(dtype) for dtype in own_dtypes], + dtype=bool, + ) + if dtype_is_dt.any(): + warnings.warn( + "DataFrame.mean and DataFrame.median with numeric_only=None " + "will include datetime64 and datetime64tz columns in a " + "future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # Non-copy equivalent to + # dt64_cols = self.dtypes.apply(is_datetime64_any_dtype) + # cols = self.columns[~dt64_cols] + # self = self[cols] + predicate = lambda x: not is_datetime64_any_dtype(x.dtype) + mgr = self._mgr._get_data_subset(predicate) + self = type(self)(mgr) + + # TODO: Make other agg func handle axis=None properly GH#21597 + axis = self._get_axis_number(axis) + labels = self._get_agg_axis(axis) + assert axis in [0, 1] + + def func(values: np.ndarray): + # We only use this in the case that operates on self.values + return op(values, axis=axis, skipna=skipna, **kwds) + + def blk_func(values, axis=1): + if isinstance(values, ExtensionArray): + if not is_1d_only_ea_obj(values) and not isinstance( + self._mgr, ArrayManager + ): + return values._reduce(name, axis=1, skipna=skipna, **kwds) + return values._reduce(name, skipna=skipna, **kwds) + else: + return op(values, axis=axis, skipna=skipna, **kwds) + + def _get_data() -> DataFrame: + if filter_type is None: + data = self._get_numeric_data() + else: + # GH#25101, GH#24434 + assert filter_type == "bool" + data = self._get_bool_data() + return data + + if numeric_only is not None or axis == 0: + # For numeric_only non-None and axis non-None, we know + # which blocks to use and no try/except is needed. + # For numeric_only=None only the case with axis==0 and no object + # dtypes are unambiguous can be handled with BlockManager.reduce + # Case with EAs see GH#35881 + df = self + if numeric_only is True: + df = _get_data() + if axis == 1: + df = df.T + axis = 0 + + ignore_failures = numeric_only is None + + # After possibly _get_data and transposing, we are now in the + # simple case where we can use BlockManager.reduce + res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures) + out = df._constructor(res).iloc[0] + if out_dtype is not None: + out = out.astype(out_dtype) + if axis == 0 and len(self) == 0 and name in ["sum", "prod"]: + # Even if we are object dtype, follow numpy and return + # float64, see test_apply_funcs_over_empty + out = out.astype(np.float64) + + if numeric_only is None and out.shape[0] != df.shape[1]: + # columns have been dropped GH#41480 + arg_name = "numeric_only" + if name in ["all", "any"]: + arg_name = "bool_only" + warnings.warn( + "Dropping of nuisance columns in DataFrame reductions " + f"(with '{arg_name}=None') is deprecated; in a future " + "version this will raise TypeError. Select only valid " + "columns before calling the reduction.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return out + + assert numeric_only is None + + data = self + values = data.values + + try: + result = func(values) + + except TypeError: + # e.g. in nanops trying to convert strs to float + + data = _get_data() + labels = data._get_agg_axis(axis) + + values = data.values + with np.errstate(all="ignore"): + result = func(values) + + # columns have been dropped GH#41480 + arg_name = "numeric_only" + if name in ["all", "any"]: + arg_name = "bool_only" + warnings.warn( + "Dropping of nuisance columns in DataFrame reductions " + f"(with '{arg_name}=None') is deprecated; in a future " + "version this will raise TypeError. Select only valid " + "columns before calling the reduction.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if hasattr(result, "dtype"): + if filter_type == "bool" and notna(result).all(): + result = result.astype(np.bool_) + elif filter_type is None and is_object_dtype(result.dtype): + try: + result = result.astype(np.float64) + except (ValueError, TypeError): + # try to coerce to the original dtypes item by item if we can + pass + + result = self._constructor_sliced(result, index=labels) + return result + + def _reduce_axis1(self, name: str, func, skipna: bool) -> Series: + """ + Special case for _reduce to try to avoid a potentially-expensive transpose. + + Apply the reduction block-wise along axis=1 and then reduce the resulting + 1D arrays. + """ + if name == "all": + result = np.ones(len(self), dtype=bool) + ufunc = np.logical_and + elif name == "any": + result = np.zeros(len(self), dtype=bool) + # error: Incompatible types in assignment + # (expression has type "_UFunc_Nin2_Nout1[Literal['logical_or'], + # Literal[20], Literal[False]]", variable has type + # "_UFunc_Nin2_Nout1[Literal['logical_and'], Literal[20], + # Literal[True]]") + ufunc = np.logical_or # type: ignore[assignment] + else: + raise NotImplementedError(name) + + for arr in self._mgr.arrays: + middle = func(arr, axis=0, skipna=skipna) + result = ufunc(result, middle) + + res_ser = self._constructor_sliced(result, index=self.index) + return res_ser + + def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series: + """ + Count number of distinct elements in specified axis. + + Return Series with number of distinct elements. Can ignore NaN + values. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for + column-wise. + dropna : bool, default True + Don't include NaN in the counts. + + Returns + ------- + Series + + See Also + -------- + Series.nunique: Method nunique for Series. + DataFrame.count: Count non-NA cells for each column or row. + + Examples + -------- + >>> df = pd.DataFrame({'A': [4, 5, 6], 'B': [4, 1, 1]}) + >>> df.nunique() + A 3 + B 2 + dtype: int64 + + >>> df.nunique(axis=1) + 0 1 + 1 2 + 2 2 + dtype: int64 + """ + return self.apply(Series.nunique, axis=axis, dropna=dropna) + + def idxmin(self, axis: Axis = 0, skipna: bool = True) -> Series: + """ + Return index of first occurrence of minimum over requested axis. + + NA/null values are excluded. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + + Returns + ------- + Series + Indexes of minima along the specified axis. + + Raises + ------ + ValueError + * If the row/column is empty + + See Also + -------- + Series.idxmin : Return index of the minimum element. + + Notes + ----- + This method is the DataFrame version of ``ndarray.argmin``. + + Examples + -------- + Consider a dataset containing food consumption in Argentina. + + >>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48], + ... 'co2_emissions': [37.2, 19.66, 1712]}, + ... index=['Pork', 'Wheat Products', 'Beef']) + + >>> df + consumption co2_emissions + Pork 10.51 37.20 + Wheat Products 103.11 19.66 + Beef 55.48 1712.00 + + By default, it returns the index for the minimum value in each column. + + >>> df.idxmin() + consumption Pork + co2_emissions Wheat Products + dtype: object + + To return the index for the minimum value in each row, use ``axis="columns"``. + + >>> df.idxmin(axis="columns") + Pork consumption + Wheat Products co2_emissions + Beef consumption + dtype: object + """ + axis = self._get_axis_number(axis) + + res = self._reduce( + nanops.nanargmin, "argmin", axis=axis, skipna=skipna, numeric_only=False + ) + indices = res._values + + # indices will always be np.ndarray since axis is not None and + # values is a 2d array for DataFrame + # error: Item "int" of "Union[int, Any]" has no attribute "__iter__" + assert isinstance(indices, np.ndarray) # for mypy + + index = self._get_axis(axis) + result = [index[i] if i >= 0 else np.nan for i in indices] + return self._constructor_sliced(result, index=self._get_agg_axis(axis)) + + def idxmax(self, axis: Axis = 0, skipna: bool = True) -> Series: + """ + Return index of first occurrence of maximum over requested axis. + + NA/null values are excluded. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + + Returns + ------- + Series + Indexes of maxima along the specified axis. + + Raises + ------ + ValueError + * If the row/column is empty + + See Also + -------- + Series.idxmax : Return index of the maximum element. + + Notes + ----- + This method is the DataFrame version of ``ndarray.argmax``. + + Examples + -------- + Consider a dataset containing food consumption in Argentina. + + >>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48], + ... 'co2_emissions': [37.2, 19.66, 1712]}, + ... index=['Pork', 'Wheat Products', 'Beef']) + + >>> df + consumption co2_emissions + Pork 10.51 37.20 + Wheat Products 103.11 19.66 + Beef 55.48 1712.00 + + By default, it returns the index for the maximum value in each column. + + >>> df.idxmax() + consumption Wheat Products + co2_emissions Beef + dtype: object + + To return the index for the maximum value in each row, use ``axis="columns"``. + + >>> df.idxmax(axis="columns") + Pork co2_emissions + Wheat Products consumption + Beef co2_emissions + dtype: object + """ + axis = self._get_axis_number(axis) + + res = self._reduce( + nanops.nanargmax, "argmax", axis=axis, skipna=skipna, numeric_only=False + ) + indices = res._values + + # indices will always be np.ndarray since axis is not None and + # values is a 2d array for DataFrame + # error: Item "int" of "Union[int, Any]" has no attribute "__iter__" + assert isinstance(indices, np.ndarray) # for mypy + + index = self._get_axis(axis) + result = [index[i] if i >= 0 else np.nan for i in indices] + return self._constructor_sliced(result, index=self._get_agg_axis(axis)) + + def _get_agg_axis(self, axis_num: int) -> Index: + """ + Let's be explicit about this. + """ + if axis_num == 0: + return self.columns + elif axis_num == 1: + return self.index + else: + raise ValueError(f"Axis must be 0 or 1 (got {repr(axis_num)})") + + def mode( + self, axis: Axis = 0, numeric_only: bool = False, dropna: bool = True + ) -> DataFrame: + """ + Get the mode(s) of each element along the selected axis. + + The mode of a set of values is the value that appears most often. + It can be multiple values. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to iterate over while searching for the mode: + + * 0 or 'index' : get mode of each column + * 1 or 'columns' : get mode of each row. + + numeric_only : bool, default False + If True, only apply to numeric columns. + dropna : bool, default True + Don't consider counts of NaN/NaT. + + Returns + ------- + DataFrame + The modes of each column or row. + + See Also + -------- + Series.mode : Return the highest frequency value in a Series. + Series.value_counts : Return the counts of values in a Series. + + Examples + -------- + >>> df = pd.DataFrame([('bird', 2, 2), + ... ('mammal', 4, np.nan), + ... ('arthropod', 8, 0), + ... ('bird', 2, np.nan)], + ... index=('falcon', 'horse', 'spider', 'ostrich'), + ... columns=('species', 'legs', 'wings')) + >>> df + species legs wings + falcon bird 2 2.0 + horse mammal 4 NaN + spider arthropod 8 0.0 + ostrich bird 2 NaN + + By default, missing values are not considered, and the mode of wings + are both 0 and 2. Because the resulting DataFrame has two rows, + the second row of ``species`` and ``legs`` contains ``NaN``. + + >>> df.mode() + species legs wings + 0 bird 2.0 0.0 + 1 NaN NaN 2.0 + + Setting ``dropna=False`` ``NaN`` values are considered and they can be + the mode (like for wings). + + >>> df.mode(dropna=False) + species legs wings + 0 bird 2 NaN + + Setting ``numeric_only=True``, only the mode of numeric columns is + computed, and columns of other types are ignored. + + >>> df.mode(numeric_only=True) + legs wings + 0 2.0 0.0 + 1 NaN 2.0 + + To compute the mode over columns and not rows, use the axis parameter: + + >>> df.mode(axis='columns', numeric_only=True) + 0 1 + falcon 2.0 NaN + horse 4.0 NaN + spider 0.0 8.0 + ostrich 2.0 NaN + """ + data = self if not numeric_only else self._get_numeric_data() + + def f(s): + return s.mode(dropna=dropna) + + data = data.apply(f, axis=axis) + # Ensure index is type stable (should always use int index) + if data.empty: + data.index = default_index(0) + + return data + + def quantile( + self, + q=0.5, + axis: Axis = 0, + numeric_only: bool = True, + interpolation: str = "linear", + ): + """ + Return values at the given quantile over requested axis. + + Parameters + ---------- + q : float or array-like, default 0.5 (50% quantile) + Value between 0 <= q <= 1, the quantile(s) to compute. + axis : {0, 1, 'index', 'columns'}, default 0 + Equals 0 or 'index' for row-wise, 1 or 'columns' for column-wise. + numeric_only : bool, default True + If False, the quantile of datetime and timedelta data will be + computed as well. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to use, + when the desired quantile lies between two data points `i` and `j`: + + * linear: `i + (j - i) * fraction`, where `fraction` is the + fractional part of the index surrounded by `i` and `j`. + * lower: `i`. + * higher: `j`. + * nearest: `i` or `j` whichever is nearest. + * midpoint: (`i` + `j`) / 2. + + Returns + ------- + Series or DataFrame + + If ``q`` is an array, a DataFrame will be returned where the + index is ``q``, the columns are the columns of self, and the + values are the quantiles. + If ``q`` is a float, a Series will be returned where the + index is the columns of self and the values are the quantiles. + + See Also + -------- + core.window.Rolling.quantile: Rolling quantile. + numpy.percentile: Numpy function to compute the percentile. + + Examples + -------- + >>> df = pd.DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]), + ... columns=['a', 'b']) + >>> df.quantile(.1) + a 1.3 + b 3.7 + Name: 0.1, dtype: float64 + >>> df.quantile([.1, .5]) + a b + 0.1 1.3 3.7 + 0.5 2.5 55.0 + + Specifying `numeric_only=False` will also compute the quantile of + datetime and timedelta data. + + >>> df = pd.DataFrame({'A': [1, 2], + ... 'B': [pd.Timestamp('2010'), + ... pd.Timestamp('2011')], + ... 'C': [pd.Timedelta('1 days'), + ... pd.Timedelta('2 days')]}) + >>> df.quantile(0.5, numeric_only=False) + A 1.5 + B 2010-07-02 12:00:00 + C 1 days 12:00:00 + Name: 0.5, dtype: object + """ + validate_percentile(q) + + if not is_list_like(q): + # BlockManager.quantile expects listlike, so we wrap and unwrap here + res = self.quantile( + [q], axis=axis, numeric_only=numeric_only, interpolation=interpolation + ) + return res.iloc[0] + + q = Index(q, dtype=np.float64) + data = self._get_numeric_data() if numeric_only else self + axis = self._get_axis_number(axis) + + if axis == 1: + data = data.T + + if len(data.columns) == 0: + # GH#23925 _get_numeric_data may have dropped all columns + cols = Index([], name=self.columns.name) + if is_list_like(q): + return self._constructor([], index=q, columns=cols) + return self._constructor_sliced([], index=cols, name=q, dtype=np.float64) + + res = data._mgr.quantile(qs=q, axis=1, interpolation=interpolation) + + result = self._constructor(res) + return result + + @doc(NDFrame.asfreq, **_shared_doc_kwargs) + def asfreq( + self, + freq: Frequency, + method=None, + how: str | None = None, + normalize: bool = False, + fill_value=None, + ) -> DataFrame: + return super().asfreq( + freq=freq, + method=method, + how=how, + normalize=normalize, + fill_value=fill_value, + ) + + @doc(NDFrame.resample, **_shared_doc_kwargs) + def resample( + self, + rule, + axis=0, + closed: str | None = None, + label: str | None = None, + convention: str = "start", + kind: str | None = None, + loffset=None, + base: int | None = None, + on=None, + level=None, + origin: str | TimestampConvertibleTypes = "start_day", + offset: TimedeltaConvertibleTypes | None = None, + ) -> Resampler: + return super().resample( + rule=rule, + axis=axis, + closed=closed, + label=label, + convention=convention, + kind=kind, + loffset=loffset, + base=base, + on=on, + level=level, + origin=origin, + offset=offset, + ) + + def to_timestamp( + self, + freq: Frequency | None = None, + how: str = "start", + axis: Axis = 0, + copy: bool = True, + ) -> DataFrame: + """ + Cast to DatetimeIndex of timestamps, at *beginning* of period. + + Parameters + ---------- + freq : str, default frequency of PeriodIndex + Desired frequency. + how : {'s', 'e', 'start', 'end'} + Convention for converting period to timestamp; start of period + vs. end. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to convert (the index by default). + copy : bool, default True + If False then underlying input data is not copied. + + Returns + ------- + DataFrame with DatetimeIndex + """ + new_obj = self.copy(deep=copy) + + axis_name = self._get_axis_name(axis) + old_ax = getattr(self, axis_name) + if not isinstance(old_ax, PeriodIndex): + raise TypeError(f"unsupported Type {type(old_ax).__name__}") + + new_ax = old_ax.to_timestamp(freq=freq, how=how) + + setattr(new_obj, axis_name, new_ax) + return new_obj + + def to_period( + self, freq: Frequency | None = None, axis: Axis = 0, copy: bool = True + ) -> DataFrame: + """ + Convert DataFrame from DatetimeIndex to PeriodIndex. + + Convert DataFrame from DatetimeIndex to PeriodIndex with desired + frequency (inferred from index if not passed). + + Parameters + ---------- + freq : str, default + Frequency of the PeriodIndex. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to convert (the index by default). + copy : bool, default True + If False then underlying input data is not copied. + + Returns + ------- + DataFrame with PeriodIndex + + Examples + -------- + >>> idx = pd.to_datetime( + ... [ + ... "2001-03-31 00:00:00", + ... "2002-05-31 00:00:00", + ... "2003-08-31 00:00:00", + ... ] + ... ) + + >>> idx + DatetimeIndex(['2001-03-31', '2002-05-31', '2003-08-31'], + dtype='datetime64[ns]', freq=None) + + >>> idx.to_period("M") + PeriodIndex(['2001-03', '2002-05', '2003-08'], dtype='period[M]') + + For the yearly frequency + + >>> idx.to_period("Y") + PeriodIndex(['2001', '2002', '2003'], dtype='period[A-DEC]') + """ + new_obj = self.copy(deep=copy) + + axis_name = self._get_axis_name(axis) + old_ax = getattr(self, axis_name) + if not isinstance(old_ax, DatetimeIndex): + raise TypeError(f"unsupported Type {type(old_ax).__name__}") + + new_ax = old_ax.to_period(freq=freq) + + setattr(new_obj, axis_name, new_ax) + return new_obj + + def isin(self, values) -> DataFrame: + """ + Whether each element in the DataFrame is contained in values. + + Parameters + ---------- + values : iterable, Series, DataFrame or dict + The result will only be true at a location if all the + labels match. If `values` is a Series, that's the index. If + `values` is a dict, the keys must be the column names, + which must match. If `values` is a DataFrame, + then both the index and column labels must match. + + Returns + ------- + DataFrame + DataFrame of booleans showing whether each element in the DataFrame + is contained in values. + + See Also + -------- + DataFrame.eq: Equality test for DataFrame. + Series.isin: Equivalent method on Series. + Series.str.contains: Test if pattern or regex is contained within a + string of a Series or Index. + + Examples + -------- + >>> df = pd.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]}, + ... index=['falcon', 'dog']) + >>> df + num_legs num_wings + falcon 2 2 + dog 4 0 + + When ``values`` is a list check whether every value in the DataFrame + is present in the list (which animals have 0 or 2 legs or wings) + + >>> df.isin([0, 2]) + num_legs num_wings + falcon True True + dog False True + + To check if ``values`` is *not* in the DataFrame, use the ``~`` operator: + + >>> ~df.isin([0, 2]) + num_legs num_wings + falcon False False + dog True False + + When ``values`` is a dict, we can pass values to check for each + column separately: + + >>> df.isin({'num_wings': [0, 3]}) + num_legs num_wings + falcon False False + dog False True + + When ``values`` is a Series or DataFrame the index and column must + match. Note that 'falcon' does not match based on the number of legs + in other. + + >>> other = pd.DataFrame({'num_legs': [8, 3], 'num_wings': [0, 2]}, + ... index=['spider', 'falcon']) + >>> df.isin(other) + num_legs num_wings + falcon False True + dog False False + """ + if isinstance(values, dict): + from pandas.core.reshape.concat import concat + + values = collections.defaultdict(list, values) + return concat( + ( + self.iloc[:, [i]].isin(values[col]) + for i, col in enumerate(self.columns) + ), + axis=1, + ) + elif isinstance(values, Series): + if not values.index.is_unique: + raise ValueError("cannot compute isin with a duplicate axis.") + return self.eq(values.reindex_like(self), axis="index") + elif isinstance(values, DataFrame): + if not (values.columns.is_unique and values.index.is_unique): + raise ValueError("cannot compute isin with a duplicate axis.") + return self.eq(values.reindex_like(self)) + else: + if not is_list_like(values): + raise TypeError( + "only list-like or dict-like objects are allowed " + "to be passed to DataFrame.isin(), " + f"you passed a '{type(values).__name__}'" + ) + return self._constructor( + algorithms.isin(self.values.ravel(), values).reshape(self.shape), + self.index, + self.columns, + ) + + # ---------------------------------------------------------------------- + # Add index and columns + _AXIS_ORDERS = ["index", "columns"] + _AXIS_TO_AXIS_NUMBER: dict[Axis, int] = { + **NDFrame._AXIS_TO_AXIS_NUMBER, + 1: 1, + "columns": 1, + } + _AXIS_LEN = len(_AXIS_ORDERS) + _info_axis_number = 1 + _info_axis_name = "columns" + + index: Index = properties.AxisProperty( + axis=1, doc="The index (row labels) of the DataFrame." + ) + columns: Index = properties.AxisProperty( + axis=0, doc="The column labels of the DataFrame." + ) + + @property + def _AXIS_NUMBERS(self) -> dict[str, int]: + """.. deprecated:: 1.1.0""" + super()._AXIS_NUMBERS + return {"index": 0, "columns": 1} + + @property + def _AXIS_NAMES(self) -> dict[int, str]: + """.. deprecated:: 1.1.0""" + super()._AXIS_NAMES + return {0: "index", 1: "columns"} + + # ---------------------------------------------------------------------- + # Add plotting methods to DataFrame + plot = CachedAccessor("plot", pandas.plotting.PlotAccessor) + hist = pandas.plotting.hist_frame + boxplot = pandas.plotting.boxplot_frame + sparse = CachedAccessor("sparse", SparseFrameAccessor) + + # ---------------------------------------------------------------------- + # Internal Interface Methods + + def _to_dict_of_blocks(self, copy: bool = True): + """ + Return a dict of dtype -> Constructor Types that + each is a homogeneous dtype. + + Internal ONLY - only works for BlockManager + """ + mgr = self._mgr + # convert to BlockManager if needed -> this way support ArrayManager as well + mgr = mgr_to_mgr(mgr, "block") + mgr = cast(BlockManager, mgr) + return { + k: self._constructor(v).__finalize__(self) + for k, v, in mgr.to_dict(copy=copy).items() + } + + @property + def values(self) -> np.ndarray: + """ + Return a Numpy representation of the DataFrame. + + .. warning:: + + We recommend using :meth:`DataFrame.to_numpy` instead. + + Only the values in the DataFrame will be returned, the axes labels + will be removed. + + Returns + ------- + numpy.ndarray + The values of the DataFrame. + + See Also + -------- + DataFrame.to_numpy : Recommended alternative to this method. + DataFrame.index : Retrieve the index labels. + DataFrame.columns : Retrieving the column names. + + Notes + ----- + The dtype will be a lower-common-denominator dtype (implicit + upcasting); that is to say if the dtypes (even of numeric types) + are mixed, the one that accommodates all will be chosen. Use this + with care if you are not dealing with the blocks. + + e.g. If the dtypes are float16 and float32, dtype will be upcast to + float32. If dtypes are int32 and uint8, dtype will be upcast to + int32. By :func:`numpy.find_common_type` convention, mixing int64 + and uint64 will result in a float64 dtype. + + Examples + -------- + A DataFrame where all columns are the same type (e.g., int64) results + in an array of the same type. + + >>> df = pd.DataFrame({'age': [ 3, 29], + ... 'height': [94, 170], + ... 'weight': [31, 115]}) + >>> df + age height weight + 0 3 94 31 + 1 29 170 115 + >>> df.dtypes + age int64 + height int64 + weight int64 + dtype: object + >>> df.values + array([[ 3, 94, 31], + [ 29, 170, 115]]) + + A DataFrame with mixed type columns(e.g., str/object, int64, float32) + results in an ndarray of the broadest type that accommodates these + mixed types (e.g., object). + + >>> df2 = pd.DataFrame([('parrot', 24.0, 'second'), + ... ('lion', 80.5, 1), + ... ('monkey', np.nan, None)], + ... columns=('name', 'max_speed', 'rank')) + >>> df2.dtypes + name object + max_speed float64 + rank object + dtype: object + >>> df2.values + array([['parrot', 24.0, 'second'], + ['lion', 80.5, 1], + ['monkey', nan, None]], dtype=object) + """ + self._consolidate_inplace() + return self._mgr.as_array() + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def ffill( + self: DataFrame, + axis: None | Axis = None, + inplace: bool = False, + limit: None | int = None, + downcast=None, + ) -> DataFrame | None: + return super().ffill(axis, inplace, limit, downcast) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def bfill( + self: DataFrame, + axis: None | Axis = None, + inplace: bool = False, + limit: None | int = None, + downcast=None, + ) -> DataFrame | None: + return super().bfill(axis, inplace, limit, downcast) + + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "lower", "upper"] + ) + def clip( + self: DataFrame, + lower=None, + upper=None, + axis: Axis | None = None, + inplace: bool = False, + *args, + **kwargs, + ) -> DataFrame | None: + return super().clip(lower, upper, axis, inplace, *args, **kwargs) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"]) + def interpolate( + self: DataFrame, + method: str = "linear", + axis: Axis = 0, + limit: int | None = None, + inplace: bool = False, + limit_direction: str | None = None, + limit_area: str | None = None, + downcast: str | None = None, + **kwargs, + ) -> DataFrame | None: + return super().interpolate( + method, + axis, + limit, + inplace, + limit_direction, + limit_area, + downcast, + **kwargs, + ) + + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "cond", "other"] + ) + def where( + self, + cond, + other=lib.no_default, + inplace=False, + axis=None, + level=None, + errors="raise", + try_cast=lib.no_default, + ): + return super().where(cond, other, inplace, axis, level, errors, try_cast) + + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "cond", "other"] + ) + def mask( + self, + cond, + other=np.nan, + inplace=False, + axis=None, + level=None, + errors="raise", + try_cast=lib.no_default, + ): + return super().mask(cond, other, inplace, axis, level, errors, try_cast) + + +DataFrame._add_numeric_operations() + +ops.add_flex_arithmetic_methods(DataFrame) + + +def _from_nested_dict(data) -> collections.defaultdict: + new_data: collections.defaultdict = collections.defaultdict(dict) + for index, s in data.items(): + for col, v in s.items(): + new_data[col][index] = v + return new_data + + +def _reindex_for_setitem(value: DataFrame | Series, index: Index) -> ArrayLike: + # reindex if necessary + + if value.index.equals(index) or not len(index): + return value._values.copy() + + # GH#4107 + try: + reindexed_value = value.reindex(index)._values + except ValueError as err: + # raised in MultiIndex.from_tuples, see test_insert_error_msmgs + if not value.index.is_unique: + # duplicate axis + raise err + + raise TypeError( + "incompatible index of inserted column with frame index" + ) from err + return reindexed_value diff --git a/.local/lib/python3.8/site-packages/pandas/core/generic.py b/.local/lib/python3.8/site-packages/pandas/core/generic.py new file mode 100644 index 0000000..fee048a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/generic.py @@ -0,0 +1,12183 @@ +# pyright: reportPropertyTypeMismatch=false +from __future__ import annotations + +import collections +from datetime import timedelta +import functools +import gc +import json +import operator +import pickle +import re +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, + Literal, + Mapping, + Sequence, + Type, + cast, + final, + overload, +) +import warnings +import weakref + +import numpy as np + +from pandas._config import config + +from pandas._libs import lib +from pandas._libs.tslibs import ( + Period, + Tick, + Timestamp, + to_offset, +) +from pandas._typing import ( + ArrayLike, + Axis, + CompressionOptions, + Dtype, + DtypeArg, + DtypeObj, + FilePath, + IndexKeyFunc, + IndexLabel, + JSONSerializable, + Level, + Manager, + NDFrameT, + RandomState, + Renamer, + StorageOptions, + T, + TimedeltaConvertibleTypes, + TimestampConvertibleTypes, + ValueKeyFunc, + WriteBuffer, + npt, +) +from pandas.compat._optional import import_optional_dependency +from pandas.compat.numpy import function as nv +from pandas.errors import ( + AbstractMethodError, + InvalidIndexError, +) +from pandas.util._decorators import ( + doc, + rewrite_axis_style_signature, +) +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import ( + validate_ascending, + validate_bool_kwarg, + validate_fillna_kwargs, + validate_inclusive, +) + +from pandas.core.dtypes.common import ( + ensure_object, + ensure_platform_int, + ensure_str, + is_bool, + is_bool_dtype, + is_datetime64_any_dtype, + is_datetime64tz_dtype, + is_dict_like, + is_dtype_equal, + is_extension_array_dtype, + is_float, + is_list_like, + is_number, + is_numeric_dtype, + is_re_compilable, + is_scalar, + is_timedelta64_dtype, + pandas_dtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.inference import ( + is_hashable, + is_nested_list_like, +) +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +from pandas.core import ( + arraylike, + indexing, + missing, + nanops, +) +import pandas.core.algorithms as algos +from pandas.core.array_algos.replace import should_use_regex +from pandas.core.arrays import ExtensionArray +from pandas.core.base import PandasObject +import pandas.core.common as com +from pandas.core.construction import ( + create_series_with_explicit_dtype, + extract_array, +) +from pandas.core.describe import describe_ndframe +from pandas.core.flags import Flags +from pandas.core.indexes.api import ( + DatetimeIndex, + Index, + MultiIndex, + PeriodIndex, + RangeIndex, + default_index, + ensure_index, +) +from pandas.core.internals import ( + ArrayManager, + BlockManager, + SingleArrayManager, +) +from pandas.core.internals.construction import mgr_to_mgr +from pandas.core.missing import find_valid_index +from pandas.core.ops import align_method_FRAME +from pandas.core.reshape.concat import concat +import pandas.core.sample as sample +from pandas.core.shared_docs import _shared_docs +from pandas.core.sorting import get_indexer_indexer +from pandas.core.window import ( + Expanding, + ExponentialMovingWindow, + Rolling, + Window, +) + +from pandas.io.formats import format as fmt +from pandas.io.formats.format import ( + DataFrameFormatter, + DataFrameRenderer, +) +from pandas.io.formats.printing import pprint_thing + +if TYPE_CHECKING: + + from pandas._libs.tslibs import BaseOffset + + from pandas.core.frame import DataFrame + from pandas.core.indexers.objects import BaseIndexer + from pandas.core.resample import Resampler + from pandas.core.series import Series + +# goal is to be able to define the docs close to function, while still being +# able to share +_shared_docs = {**_shared_docs} +_shared_doc_kwargs = { + "axes": "keywords for axes", + "klass": "Series/DataFrame", + "axes_single_arg": "int or labels for object", + "args_transpose": "axes to permute (int or label for object)", + "inplace": """ + inplace : bool, default False + If True, performs operation inplace and returns None.""", + "optional_by": """ + by : str or list of str + Name or list of names to sort by""", + "replace_iloc": """ + This differs from updating with ``.loc`` or ``.iloc``, which require + you to specify a location to update with some value.""", +} + + +bool_t = bool # Need alias because NDFrame has def bool: + + +class NDFrame(PandasObject, indexing.IndexingMixin): + """ + N-dimensional analogue of DataFrame. Store multi-dimensional in a + size-mutable, labeled data structure + + Parameters + ---------- + data : BlockManager + axes : list + copy : bool, default False + """ + + _internal_names: list[str] = [ + "_mgr", + "_cacher", + "_item_cache", + "_cache", + "_is_copy", + "_subtyp", + "_name", + "_default_kind", + "_default_fill_value", + "_metadata", + "__array_struct__", + "__array_interface__", + "_flags", + ] + _internal_names_set: set[str] = set(_internal_names) + _accessors: set[str] = set() + _hidden_attrs: frozenset[str] = frozenset( + ["_AXIS_NAMES", "_AXIS_NUMBERS", "get_values", "tshift"] + ) + _metadata: list[str] = [] + _is_copy: weakref.ReferenceType[NDFrame] | None = None + _mgr: Manager + _attrs: dict[Hashable, Any] + _typ: str + + # ---------------------------------------------------------------------- + # Constructors + + def __init__( + self, + data: Manager, + copy: bool_t = False, + attrs: Mapping[Hashable, Any] | None = None, + ): + # copy kwarg is retained for mypy compat, is not used + + object.__setattr__(self, "_is_copy", None) + object.__setattr__(self, "_mgr", data) + object.__setattr__(self, "_item_cache", {}) + if attrs is None: + attrs = {} + else: + attrs = dict(attrs) + object.__setattr__(self, "_attrs", attrs) + object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True)) + + @classmethod + def _init_mgr( + cls, + mgr: Manager, + axes, + dtype: Dtype | None = None, + copy: bool_t = False, + ) -> Manager: + """passed a manager and a axes dict""" + for a, axe in axes.items(): + if axe is not None: + axe = ensure_index(axe) + bm_axis = cls._get_block_manager_axis(a) + mgr = mgr.reindex_axis(axe, axis=bm_axis) + + # make a copy if explicitly requested + if copy: + mgr = mgr.copy() + if dtype is not None: + # avoid further copies if we can + if ( + isinstance(mgr, BlockManager) + and len(mgr.blocks) == 1 + and is_dtype_equal(mgr.blocks[0].values.dtype, dtype) + ): + pass + else: + mgr = mgr.astype(dtype=dtype) + return mgr + + @classmethod + def _from_mgr(cls, mgr: Manager): + """ + Fastpath to create a new DataFrame/Series from just a BlockManager/ArrayManager. + + Notes + ----- + Skips setting `_flags` attribute; caller is responsible for doing so. + """ + obj = cls.__new__(cls) + object.__setattr__(obj, "_is_copy", None) + object.__setattr__(obj, "_mgr", mgr) + object.__setattr__(obj, "_item_cache", {}) + object.__setattr__(obj, "_attrs", {}) + return obj + + def _as_manager(self: NDFrameT, typ: str, copy: bool_t = True) -> NDFrameT: + """ + Private helper function to create a DataFrame with specific manager. + + Parameters + ---------- + typ : {"block", "array"} + copy : bool, default True + Only controls whether the conversion from Block->ArrayManager + copies the 1D arrays (to ensure proper/contiguous memory layout). + + Returns + ------- + DataFrame + New DataFrame using specified manager type. Is not guaranteed + to be a copy or not. + """ + new_mgr: Manager + new_mgr = mgr_to_mgr(self._mgr, typ=typ, copy=copy) + # fastpath of passing a manager doesn't check the option/manager class + return self._constructor(new_mgr).__finalize__(self) + + # ---------------------------------------------------------------------- + # attrs and flags + + @property + def attrs(self) -> dict[Hashable, Any]: + """ + Dictionary of global attributes of this dataset. + + .. warning:: + + attrs is experimental and may change without warning. + + See Also + -------- + DataFrame.flags : Global flags applying to this object. + """ + if self._attrs is None: + self._attrs = {} + return self._attrs + + @attrs.setter + def attrs(self, value: Mapping[Hashable, Any]) -> None: + self._attrs = dict(value) + + @final + @property + def flags(self) -> Flags: + """ + Get the properties associated with this pandas object. + + The available flags are + + * :attr:`Flags.allows_duplicate_labels` + + See Also + -------- + Flags : Flags that apply to pandas objects. + DataFrame.attrs : Global metadata applying to this dataset. + + Notes + ----- + "Flags" differ from "metadata". Flags reflect properties of the + pandas object (the Series or DataFrame). Metadata refer to properties + of the dataset, and should be stored in :attr:`DataFrame.attrs`. + + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2]}) + >>> df.flags + + + Flags can be get or set using ``.`` + + >>> df.flags.allows_duplicate_labels + True + >>> df.flags.allows_duplicate_labels = False + + Or by slicing with a key + + >>> df.flags["allows_duplicate_labels"] + False + >>> df.flags["allows_duplicate_labels"] = True + """ + return self._flags + + @final + def set_flags( + self: NDFrameT, + *, + copy: bool_t = False, + allows_duplicate_labels: bool_t | None = None, + ) -> NDFrameT: + """ + Return a new object with updated flags. + + Parameters + ---------- + allows_duplicate_labels : bool, optional + Whether the returned object allows duplicate labels. + + Returns + ------- + Series or DataFrame + The same type as the caller. + + See Also + -------- + DataFrame.attrs : Global metadata applying to this dataset. + DataFrame.flags : Global flags applying to this object. + + Notes + ----- + This method returns a new object that's a view on the same data + as the input. Mutating the input or the output values will be reflected + in the other. + + This method is intended to be used in method chains. + + "Flags" differ from "metadata". Flags reflect properties of the + pandas object (the Series or DataFrame). Metadata refer to properties + of the dataset, and should be stored in :attr:`DataFrame.attrs`. + + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2]}) + >>> df.flags.allows_duplicate_labels + True + >>> df2 = df.set_flags(allows_duplicate_labels=False) + >>> df2.flags.allows_duplicate_labels + False + """ + df = self.copy(deep=copy) + if allows_duplicate_labels is not None: + df.flags["allows_duplicate_labels"] = allows_duplicate_labels + return df + + @final + @classmethod + def _validate_dtype(cls, dtype) -> DtypeObj | None: + """validate the passed dtype""" + if dtype is not None: + dtype = pandas_dtype(dtype) + + # a compound dtype + if dtype.kind == "V": + raise NotImplementedError( + "compound dtypes are not implemented " + f"in the {cls.__name__} constructor" + ) + + return dtype + + # ---------------------------------------------------------------------- + # Construction + + @property + def _constructor(self: NDFrameT) -> type[NDFrameT]: + """ + Used when a manipulation result has the same dimensions as the + original. + """ + raise AbstractMethodError(self) + + # ---------------------------------------------------------------------- + # Internals + + @final + @property + def _data(self): + # GH#33054 retained because some downstream packages uses this, + # e.g. fastparquet + return self._mgr + + # ---------------------------------------------------------------------- + # Axis + _stat_axis_number = 0 + _stat_axis_name = "index" + _AXIS_ORDERS: list[str] + _AXIS_TO_AXIS_NUMBER: dict[Axis, int] = {0: 0, "index": 0, "rows": 0} + _info_axis_number: int + _info_axis_name: str + _AXIS_LEN: int + + @property + def _AXIS_NUMBERS(self) -> dict[str, int]: + """.. deprecated:: 1.1.0""" + warnings.warn( + "_AXIS_NUMBERS has been deprecated.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return {"index": 0} + + @property + def _AXIS_NAMES(self) -> dict[int, str]: + """.. deprecated:: 1.1.0""" + level = self.ndim + 1 + warnings.warn( + "_AXIS_NAMES has been deprecated.", FutureWarning, stacklevel=level + ) + return {0: "index"} + + @final + def _construct_axes_dict(self, axes=None, **kwargs): + """Return an axes dictionary for myself.""" + d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)} + d.update(kwargs) + return d + + @final + @classmethod + def _construct_axes_from_arguments( + cls, args, kwargs, require_all: bool_t = False, sentinel=None + ): + """ + Construct and returns axes if supplied in args/kwargs. + + If require_all, raise if all axis arguments are not supplied + return a tuple of (axes, kwargs). + + sentinel specifies the default parameter when an axis is not + supplied; useful to distinguish when a user explicitly passes None + in scenarios where None has special meaning. + """ + # construct the args + args = list(args) + for a in cls._AXIS_ORDERS: + + # look for a argument by position + if a not in kwargs: + try: + kwargs[a] = args.pop(0) + except IndexError as err: + if require_all: + raise TypeError( + "not enough/duplicate arguments specified!" + ) from err + + axes = {a: kwargs.pop(a, sentinel) for a in cls._AXIS_ORDERS} + return axes, kwargs + + @final + @classmethod + def _get_axis_number(cls, axis: Axis) -> int: + try: + return cls._AXIS_TO_AXIS_NUMBER[axis] + except KeyError: + raise ValueError(f"No axis named {axis} for object type {cls.__name__}") + + @final + @classmethod + def _get_axis_name(cls, axis: Axis) -> str: + axis_number = cls._get_axis_number(axis) + return cls._AXIS_ORDERS[axis_number] + + @final + def _get_axis(self, axis: Axis) -> Index: + axis_number = self._get_axis_number(axis) + assert axis_number in {0, 1} + return self.index if axis_number == 0 else self.columns + + @final + @classmethod + def _get_block_manager_axis(cls, axis: Axis) -> int: + """Map the axis to the block_manager axis.""" + axis = cls._get_axis_number(axis) + ndim = cls._AXIS_LEN + if ndim == 2: + # i.e. DataFrame + return 1 - axis + return axis + + @final + def _get_axis_resolvers(self, axis: str) -> dict[str, Series | MultiIndex]: + # index or columns + axis_index = getattr(self, axis) + d = {} + prefix = axis[0] + + for i, name in enumerate(axis_index.names): + if name is not None: + key = level = name + else: + # prefix with 'i' or 'c' depending on the input axis + # e.g., you must do ilevel_0 for the 0th level of an unnamed + # multiiindex + key = f"{prefix}level_{i}" + level = i + + level_values = axis_index.get_level_values(level) + s = level_values.to_series() + s.index = axis_index + d[key] = s + + # put the index/columns itself in the dict + if isinstance(axis_index, MultiIndex): + dindex = axis_index + else: + dindex = axis_index.to_series() + + d[axis] = dindex + return d + + @final + def _get_index_resolvers(self) -> dict[Hashable, Series | MultiIndex]: + from pandas.core.computation.parsing import clean_column_name + + d: dict[str, Series | MultiIndex] = {} + for axis_name in self._AXIS_ORDERS: + d.update(self._get_axis_resolvers(axis_name)) + + return {clean_column_name(k): v for k, v in d.items() if not isinstance(k, int)} + + @final + def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]: + """ + Return the special character free column resolvers of a dataframe. + + Column names with special characters are 'cleaned up' so that they can + be referred to by backtick quoting. + Used in :meth:`DataFrame.eval`. + """ + from pandas.core.computation.parsing import clean_column_name + + if isinstance(self, ABCSeries): + return {clean_column_name(self.name): self} + + return { + clean_column_name(k): v for k, v in self.items() if not isinstance(k, int) + } + + @property + def _info_axis(self) -> Index: + return getattr(self, self._info_axis_name) + + @property + def _stat_axis(self) -> Index: + return getattr(self, self._stat_axis_name) + + @property + def shape(self) -> tuple[int, ...]: + """ + Return a tuple of axis dimensions + """ + return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS) + + @property + def axes(self) -> list[Index]: + """ + Return index label(s) of the internal NDFrame + """ + # we do it this way because if we have reversed axes, then + # the block manager shows then reversed + return [self._get_axis(a) for a in self._AXIS_ORDERS] + + @property + def ndim(self) -> int: + """ + Return an int representing the number of axes / array dimensions. + + Return 1 if Series. Otherwise return 2 if DataFrame. + + See Also + -------- + ndarray.ndim : Number of array dimensions. + + Examples + -------- + >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) + >>> s.ndim + 1 + + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.ndim + 2 + """ + return self._mgr.ndim + + @property + def size(self) -> int: + """ + Return an int representing the number of elements in this object. + + Return the number of rows if Series. Otherwise return the number of + rows times number of columns if DataFrame. + + See Also + -------- + ndarray.size : Number of elements in the array. + + Examples + -------- + >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) + >>> s.size + 3 + + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.size + 4 + """ + return np.prod(self.shape) + + @overload + def set_axis( + self: NDFrameT, labels, axis: Axis = ..., inplace: Literal[False] = ... + ) -> NDFrameT: + ... + + @overload + def set_axis(self, labels, axis: Axis, inplace: Literal[True]) -> None: + ... + + @overload + def set_axis(self, labels, *, inplace: Literal[True]) -> None: + ... + + @overload + def set_axis( + self: NDFrameT, labels, axis: Axis = ..., inplace: bool_t = ... + ) -> NDFrameT | None: + ... + + def set_axis(self, labels, axis: Axis = 0, inplace: bool_t = False): + """ + Assign desired index to given axis. + + Indexes for%(extended_summary_sub)s row labels can be changed by assigning + a list-like or Index. + + Parameters + ---------- + labels : list-like, Index + The values for the new index. + + axis : %(axes_single_arg)s, default 0 + The axis to update. The value 0 identifies the rows%(axis_description_sub)s. + + inplace : bool, default False + Whether to return a new %(klass)s instance. + + Returns + ------- + renamed : %(klass)s or None + An object of type %(klass)s or None if ``inplace=True``. + + See Also + -------- + %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s. + """ + self._check_inplace_and_allows_duplicate_labels(inplace) + return self._set_axis_nocheck(labels, axis, inplace) + + @final + def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t): + # NDFrame.rename with inplace=False calls set_axis(inplace=True) on a copy. + if inplace: + setattr(self, self._get_axis_name(axis), labels) + else: + obj = self.copy() + obj.set_axis(labels, axis=axis, inplace=True) + return obj + + def _set_axis(self, axis: int, labels: Index) -> None: + labels = ensure_index(labels) + self._mgr.set_axis(axis, labels) + self._clear_item_cache() + + @final + def swapaxes(self: NDFrameT, axis1, axis2, copy=True) -> NDFrameT: + """ + Interchange axes and swap values axes appropriately. + + Returns + ------- + y : same as input + """ + i = self._get_axis_number(axis1) + j = self._get_axis_number(axis2) + + if i == j: + if copy: + return self.copy() + return self + + mapping = {i: j, j: i} + + new_axes = (self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)) + new_values = self.values.swapaxes(i, j) + if copy: + new_values = new_values.copy() + + # ignore needed because of NDFrame constructor is different than + # DataFrame/Series constructors. + return self._constructor( + # error: Argument 1 to "NDFrame" has incompatible type "ndarray"; expected + # "Union[ArrayManager, BlockManager]" + # error: Argument 2 to "NDFrame" has incompatible type "*Generator[Index, + # None, None]"; expected "bool" [arg-type] + # error: Argument 2 to "NDFrame" has incompatible type "*Generator[Index, + # None, None]"; expected "Optional[Mapping[Hashable, Any]]" + new_values, # type: ignore[arg-type] + *new_axes, # type: ignore[arg-type] + ).__finalize__(self, method="swapaxes") + + @final + @doc(klass=_shared_doc_kwargs["klass"]) + def droplevel(self: NDFrameT, level, axis=0) -> NDFrameT: + """ + Return {klass} with requested index / column level(s) removed. + + Parameters + ---------- + level : int, str, or list-like + If a string is given, must be the name of a level + If list-like, elements must be names or positional indexes + of levels. + + axis : {{0 or 'index', 1 or 'columns'}}, default 0 + Axis along which the level(s) is removed: + + * 0 or 'index': remove level(s) in column. + * 1 or 'columns': remove level(s) in row. + + Returns + ------- + {klass} + {klass} with requested index / column level(s) removed. + + Examples + -------- + >>> df = pd.DataFrame([ + ... [1, 2, 3, 4], + ... [5, 6, 7, 8], + ... [9, 10, 11, 12] + ... ]).set_index([0, 1]).rename_axis(['a', 'b']) + + >>> df.columns = pd.MultiIndex.from_tuples([ + ... ('c', 'e'), ('d', 'f') + ... ], names=['level_1', 'level_2']) + + >>> df + level_1 c d + level_2 e f + a b + 1 2 3 4 + 5 6 7 8 + 9 10 11 12 + + >>> df.droplevel('a') + level_1 c d + level_2 e f + b + 2 3 4 + 6 7 8 + 10 11 12 + + >>> df.droplevel('level_2', axis=1) + level_1 c d + a b + 1 2 3 4 + 5 6 7 8 + 9 10 11 12 + """ + labels = self._get_axis(axis) + new_labels = labels.droplevel(level) + return self.set_axis(new_labels, axis=axis, inplace=False) + + def pop(self, item: Hashable) -> Series | Any: + result = self[item] + del self[item] + + return result + + @final + def squeeze(self, axis=None): + """ + Squeeze 1 dimensional axis objects into scalars. + + Series or DataFrames with a single element are squeezed to a scalar. + DataFrames with a single column or a single row are squeezed to a + Series. Otherwise the object is unchanged. + + This method is most useful when you don't know if your + object is a Series or DataFrame, but you do know it has just a single + column. In that case you can safely call `squeeze` to ensure you have a + Series. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns', None}, default None + A specific axis to squeeze. By default, all length-1 axes are + squeezed. + + Returns + ------- + DataFrame, Series, or scalar + The projection after squeezing `axis` or all the axes. + + See Also + -------- + Series.iloc : Integer-location based indexing for selecting scalars. + DataFrame.iloc : Integer-location based indexing for selecting Series. + Series.to_frame : Inverse of DataFrame.squeeze for a + single-column DataFrame. + + Examples + -------- + >>> primes = pd.Series([2, 3, 5, 7]) + + Slicing might produce a Series with a single value: + + >>> even_primes = primes[primes % 2 == 0] + >>> even_primes + 0 2 + dtype: int64 + + >>> even_primes.squeeze() + 2 + + Squeezing objects with more than one value in every axis does nothing: + + >>> odd_primes = primes[primes % 2 == 1] + >>> odd_primes + 1 3 + 2 5 + 3 7 + dtype: int64 + + >>> odd_primes.squeeze() + 1 3 + 2 5 + 3 7 + dtype: int64 + + Squeezing is even more effective when used with DataFrames. + + >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b']) + >>> df + a b + 0 1 2 + 1 3 4 + + Slicing a single column will produce a DataFrame with the columns + having only one value: + + >>> df_a = df[['a']] + >>> df_a + a + 0 1 + 1 3 + + So the columns can be squeezed down, resulting in a Series: + + >>> df_a.squeeze('columns') + 0 1 + 1 3 + Name: a, dtype: int64 + + Slicing a single row from a single column will produce a single + scalar DataFrame: + + >>> df_0a = df.loc[df.index < 1, ['a']] + >>> df_0a + a + 0 1 + + Squeezing the rows produces a single scalar Series: + + >>> df_0a.squeeze('rows') + a 1 + Name: 0, dtype: int64 + + Squeezing all axes will project directly into a scalar: + + >>> df_0a.squeeze() + 1 + """ + axis = range(self._AXIS_LEN) if axis is None else (self._get_axis_number(axis),) + return self.iloc[ + tuple( + 0 if i in axis and len(a) == 1 else slice(None) + for i, a in enumerate(self.axes) + ) + ] + + # ---------------------------------------------------------------------- + # Rename + + def _rename( + self: NDFrameT, + mapper: Renamer | None = None, + *, + index: Renamer | None = None, + columns: Renamer | None = None, + axis: Axis | None = None, + copy: bool_t = True, + inplace: bool_t = False, + level: Level | None = None, + errors: str = "ignore", + ) -> NDFrameT | None: + """ + Alter axes input function or functions. Function / dict values must be + unique (1-to-1). Labels not contained in a dict / Series will be left + as-is. Extra labels listed don't throw an error. Alternatively, change + ``Series.name`` with a scalar value (Series only). + + Parameters + ---------- + %(axes)s : scalar, list-like, dict-like or function, optional + Scalar or list-like will alter the ``Series.name`` attribute, + and raise on DataFrame. + dict-like or functions are transformations to apply to + that axis' values + copy : bool, default True + Also copy underlying data. + inplace : bool, default False + Whether to return a new {klass}. If True then value of copy is + ignored. + level : int or level name, default None + In case of a MultiIndex, only rename labels in the specified + level. + errors : {'ignore', 'raise'}, default 'ignore' + If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`, + or `columns` contains labels that are not present in the Index + being transformed. + If 'ignore', existing keys will be renamed and extra keys will be + ignored. + + Returns + ------- + renamed : {klass} (new object) + + Raises + ------ + KeyError + If any of the labels is not found in the selected axis and + "errors='raise'". + + See Also + -------- + NDFrame.rename_axis + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s + 0 1 + 1 2 + 2 3 + dtype: int64 + >>> s.rename("my_name") # scalar, changes Series.name + 0 1 + 1 2 + 2 3 + Name: my_name, dtype: int64 + >>> s.rename(lambda x: x ** 2) # function, changes labels + 0 1 + 1 2 + 4 3 + dtype: int64 + >>> s.rename({1: 3, 2: 5}) # mapping, changes labels + 0 1 + 3 2 + 5 3 + dtype: int64 + + Since ``DataFrame`` doesn't have a ``.name`` attribute, + only mapping-type arguments are allowed. + + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + >>> df.rename(2) + Traceback (most recent call last): + ... + TypeError: 'int' object is not callable + + ``DataFrame.rename`` supports two calling conventions + + * ``(index=index_mapper, columns=columns_mapper, ...)`` + * ``(mapper, axis={'index', 'columns'}, ...)`` + + We *highly* recommend using keyword arguments to clarify your + intent. + + >>> df.rename(index=str, columns={"A": "a", "B": "c"}) + a c + 0 1 4 + 1 2 5 + 2 3 6 + + >>> df.rename(index=str, columns={"A": "a", "C": "c"}) + a B + 0 1 4 + 1 2 5 + 2 3 6 + + Using axis-style parameters + + >>> df.rename(str.lower, axis='columns') + a b + 0 1 4 + 1 2 5 + 2 3 6 + + >>> df.rename({1: 2, 2: 4}, axis='index') + A B + 0 1 4 + 2 2 5 + 4 3 6 + + See the :ref:`user guide ` for more. + """ + if mapper is None and index is None and columns is None: + raise TypeError("must pass an index to rename") + + if index is not None or columns is not None: + if axis is not None: + raise TypeError( + "Cannot specify both 'axis' and any of 'index' or 'columns'" + ) + elif mapper is not None: + raise TypeError( + "Cannot specify both 'mapper' and any of 'index' or 'columns'" + ) + else: + # use the mapper argument + if axis and self._get_axis_number(axis) == 1: + columns = mapper + else: + index = mapper + + self._check_inplace_and_allows_duplicate_labels(inplace) + result = self if inplace else self.copy(deep=copy) + + for axis_no, replacements in enumerate((index, columns)): + if replacements is None: + continue + + ax = self._get_axis(axis_no) + f = com.get_rename_function(replacements) + + if level is not None: + level = ax._get_level_number(level) + + # GH 13473 + if not callable(replacements): + if ax._is_multi and level is not None: + indexer = ax.get_level_values(level).get_indexer_for(replacements) + else: + indexer = ax.get_indexer_for(replacements) + + if errors == "raise" and len(indexer[indexer == -1]): + missing_labels = [ + label + for index, label in enumerate(replacements) + if indexer[index] == -1 + ] + raise KeyError(f"{missing_labels} not found in axis") + + new_index = ax._transform_index(f, level=level) + result._set_axis_nocheck(new_index, axis=axis_no, inplace=True) + result._clear_item_cache() + + if inplace: + self._update_inplace(result) + return None + else: + return result.__finalize__(self, method="rename") + + @rewrite_axis_style_signature("mapper", [("copy", True), ("inplace", False)]) + def rename_axis(self, mapper=lib.no_default, **kwargs): + """ + Set the name of the axis for the index or columns. + + Parameters + ---------- + mapper : scalar, list-like, optional + Value to set the axis name attribute. + index, columns : scalar, list-like, dict-like or function, optional + A scalar, list-like, dict-like or functions transformations to + apply to that axis' values. + Note that the ``columns`` parameter is not allowed if the + object is a Series. This parameter only apply for DataFrame + type objects. + + Use either ``mapper`` and ``axis`` to + specify the axis to target with ``mapper``, or ``index`` + and/or ``columns``. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to rename. + copy : bool, default True + Also copy underlying data. + inplace : bool, default False + Modifies the object directly, instead of creating a new Series + or DataFrame. + + Returns + ------- + Series, DataFrame, or None + The same type as the caller or None if ``inplace=True``. + + See Also + -------- + Series.rename : Alter Series index labels or name. + DataFrame.rename : Alter DataFrame index labels or name. + Index.rename : Set new names on index. + + Notes + ----- + ``DataFrame.rename_axis`` supports two calling conventions + + * ``(index=index_mapper, columns=columns_mapper, ...)`` + * ``(mapper, axis={'index', 'columns'}, ...)`` + + The first calling convention will only modify the names of + the index and/or the names of the Index object that is the columns. + In this case, the parameter ``copy`` is ignored. + + The second calling convention will modify the names of the + corresponding index if mapper is a list or a scalar. + However, if mapper is dict-like or a function, it will use the + deprecated behavior of modifying the axis *labels*. + + We *highly* recommend using keyword arguments to clarify your + intent. + + Examples + -------- + **Series** + + >>> s = pd.Series(["dog", "cat", "monkey"]) + >>> s + 0 dog + 1 cat + 2 monkey + dtype: object + >>> s.rename_axis("animal") + animal + 0 dog + 1 cat + 2 monkey + dtype: object + + **DataFrame** + + >>> df = pd.DataFrame({"num_legs": [4, 4, 2], + ... "num_arms": [0, 0, 2]}, + ... ["dog", "cat", "monkey"]) + >>> df + num_legs num_arms + dog 4 0 + cat 4 0 + monkey 2 2 + >>> df = df.rename_axis("animal") + >>> df + num_legs num_arms + animal + dog 4 0 + cat 4 0 + monkey 2 2 + >>> df = df.rename_axis("limbs", axis="columns") + >>> df + limbs num_legs num_arms + animal + dog 4 0 + cat 4 0 + monkey 2 2 + + **MultiIndex** + + >>> df.index = pd.MultiIndex.from_product([['mammal'], + ... ['dog', 'cat', 'monkey']], + ... names=['type', 'name']) + >>> df + limbs num_legs num_arms + type name + mammal dog 4 0 + cat 4 0 + monkey 2 2 + + >>> df.rename_axis(index={'type': 'class'}) + limbs num_legs num_arms + class name + mammal dog 4 0 + cat 4 0 + monkey 2 2 + + >>> df.rename_axis(columns=str.upper) + LIMBS num_legs num_arms + type name + mammal dog 4 0 + cat 4 0 + monkey 2 2 + """ + axes, kwargs = self._construct_axes_from_arguments( + (), kwargs, sentinel=lib.no_default + ) + copy = kwargs.pop("copy", True) + inplace = kwargs.pop("inplace", False) + axis = kwargs.pop("axis", 0) + if axis is not None: + axis = self._get_axis_number(axis) + + if kwargs: + raise TypeError( + "rename_axis() got an unexpected keyword " + f'argument "{list(kwargs.keys())[0]}"' + ) + + inplace = validate_bool_kwarg(inplace, "inplace") + + if mapper is not lib.no_default: + # Use v0.23 behavior if a scalar or list + non_mapper = is_scalar(mapper) or ( + is_list_like(mapper) and not is_dict_like(mapper) + ) + if non_mapper: + return self._set_axis_name(mapper, axis=axis, inplace=inplace) + else: + raise ValueError("Use `.rename` to alter labels with a mapper.") + else: + # Use new behavior. Means that index and/or columns + # is specified + result = self if inplace else self.copy(deep=copy) + + for axis in range(self._AXIS_LEN): + v = axes.get(self._get_axis_name(axis)) + if v is lib.no_default: + continue + non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v)) + if non_mapper: + newnames = v + else: + f = com.get_rename_function(v) + curnames = self._get_axis(axis).names + newnames = [f(name) for name in curnames] + result._set_axis_name(newnames, axis=axis, inplace=True) + if not inplace: + return result + + @final + def _set_axis_name(self, name, axis=0, inplace=False): + """ + Set the name(s) of the axis. + + Parameters + ---------- + name : str or list of str + Name(s) to set. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to set the label. The value 0 or 'index' specifies index, + and the value 1 or 'columns' specifies columns. + inplace : bool, default False + If `True`, do operation inplace and return None. + + Returns + ------- + Series, DataFrame, or None + The same type as the caller or `None` if `inplace` is `True`. + + See Also + -------- + DataFrame.rename : Alter the axis labels of :class:`DataFrame`. + Series.rename : Alter the index labels or set the index name + of :class:`Series`. + Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`. + + Examples + -------- + >>> df = pd.DataFrame({"num_legs": [4, 4, 2]}, + ... ["dog", "cat", "monkey"]) + >>> df + num_legs + dog 4 + cat 4 + monkey 2 + >>> df._set_axis_name("animal") + num_legs + animal + dog 4 + cat 4 + monkey 2 + >>> df.index = pd.MultiIndex.from_product( + ... [["mammal"], ['dog', 'cat', 'monkey']]) + >>> df._set_axis_name(["type", "name"]) + num_legs + type name + mammal dog 4 + cat 4 + monkey 2 + """ + axis = self._get_axis_number(axis) + idx = self._get_axis(axis).set_names(name) + + inplace = validate_bool_kwarg(inplace, "inplace") + renamed = self if inplace else self.copy() + renamed.set_axis(idx, axis=axis, inplace=True) + if not inplace: + return renamed + + # ---------------------------------------------------------------------- + # Comparison Methods + + @final + def _indexed_same(self, other) -> bool_t: + return all( + self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS + ) + + @final + def equals(self, other: object) -> bool_t: + """ + Test whether two objects contain the same elements. + + This function allows two Series or DataFrames to be compared against + each other to see if they have the same shape and elements. NaNs in + the same location are considered equal. + + The row/column index do not need to have the same type, as long + as the values are considered equal. Corresponding columns must be of + the same dtype. + + Parameters + ---------- + other : Series or DataFrame + The other Series or DataFrame to be compared with the first. + + Returns + ------- + bool + True if all elements are the same in both objects, False + otherwise. + + See Also + -------- + Series.eq : Compare two Series objects of the same length + and return a Series where each element is True if the element + in each Series is equal, False otherwise. + DataFrame.eq : Compare two DataFrame objects of the same shape and + return a DataFrame where each element is True if the respective + element in each DataFrame is equal, False otherwise. + testing.assert_series_equal : Raises an AssertionError if left and + right are not equal. Provides an easy interface to ignore + inequality in dtypes, indexes and precision among others. + testing.assert_frame_equal : Like assert_series_equal, but targets + DataFrames. + numpy.array_equal : Return True if two arrays have the same shape + and elements, False otherwise. + + Examples + -------- + >>> df = pd.DataFrame({1: [10], 2: [20]}) + >>> df + 1 2 + 0 10 20 + + DataFrames df and exactly_equal have the same types and values for + their elements and column labels, which will return True. + + >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]}) + >>> exactly_equal + 1 2 + 0 10 20 + >>> df.equals(exactly_equal) + True + + DataFrames df and different_column_type have the same element + types and values, but have different types for the column labels, + which will still return True. + + >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]}) + >>> different_column_type + 1.0 2.0 + 0 10 20 + >>> df.equals(different_column_type) + True + + DataFrames df and different_data_type have different types for the + same values for their elements, and will return False even though + their column labels are the same values and types. + + >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]}) + >>> different_data_type + 1 2 + 0 10.0 20.0 + >>> df.equals(different_data_type) + False + """ + if not (isinstance(other, type(self)) or isinstance(self, type(other))): + return False + other = cast(NDFrame, other) + return self._mgr.equals(other._mgr) + + # ------------------------------------------------------------------------- + # Unary Methods + + @final + def __neg__(self): + def blk_func(values: ArrayLike): + if is_bool_dtype(values.dtype): + return operator.inv(values) + else: + return operator.neg(values) + + new_data = self._mgr.apply(blk_func) + res = self._constructor(new_data) + return res.__finalize__(self, method="__neg__") + + @final + def __pos__(self): + def blk_func(values: ArrayLike): + if is_bool_dtype(values.dtype): + return values.copy() + else: + return operator.pos(values) + + new_data = self._mgr.apply(blk_func) + res = self._constructor(new_data) + return res.__finalize__(self, method="__pos__") + + @final + def __invert__(self): + if not self.size: + # inv fails with 0 len + return self + + new_data = self._mgr.apply(operator.invert) + return self._constructor(new_data).__finalize__(self, method="__invert__") + + @final + def __nonzero__(self): + raise ValueError( + f"The truth value of a {type(self).__name__} is ambiguous. " + "Use a.empty, a.bool(), a.item(), a.any() or a.all()." + ) + + __bool__ = __nonzero__ + + @final + def bool(self): + """ + Return the bool of a single element Series or DataFrame. + + This must be a boolean scalar value, either True or False. It will raise a + ValueError if the Series or DataFrame does not have exactly 1 element, or that + element is not boolean (integer values 0 and 1 will also raise an exception). + + Returns + ------- + bool + The value in the Series or DataFrame. + + See Also + -------- + Series.astype : Change the data type of a Series, including to boolean. + DataFrame.astype : Change the data type of a DataFrame, including to boolean. + numpy.bool_ : NumPy boolean data type, used by pandas for boolean values. + + Examples + -------- + The method will only work for single element objects with a boolean value: + + >>> pd.Series([True]).bool() + True + >>> pd.Series([False]).bool() + False + + >>> pd.DataFrame({'col': [True]}).bool() + True + >>> pd.DataFrame({'col': [False]}).bool() + False + """ + v = self.squeeze() + if isinstance(v, (bool, np.bool_)): + return bool(v) + elif is_scalar(v): + raise ValueError( + "bool cannot act on a non-boolean single element " + f"{type(self).__name__}" + ) + + self.__nonzero__() + + @final + def abs(self: NDFrameT) -> NDFrameT: + """ + Return a Series/DataFrame with absolute numeric value of each element. + + This function only applies to elements that are all numeric. + + Returns + ------- + abs + Series/DataFrame containing the absolute value of each element. + + See Also + -------- + numpy.absolute : Calculate the absolute value element-wise. + + Notes + ----- + For ``complex`` inputs, ``1.2 + 1j``, the absolute value is + :math:`\\sqrt{ a^2 + b^2 }`. + + Examples + -------- + Absolute numeric values in a Series. + + >>> s = pd.Series([-1.10, 2, -3.33, 4]) + >>> s.abs() + 0 1.10 + 1 2.00 + 2 3.33 + 3 4.00 + dtype: float64 + + Absolute numeric values in a Series with complex numbers. + + >>> s = pd.Series([1.2 + 1j]) + >>> s.abs() + 0 1.56205 + dtype: float64 + + Absolute numeric values in a Series with a Timedelta element. + + >>> s = pd.Series([pd.Timedelta('1 days')]) + >>> s.abs() + 0 1 days + dtype: timedelta64[ns] + + Select rows with data closest to certain value using argsort (from + `StackOverflow `__). + + >>> df = pd.DataFrame({ + ... 'a': [4, 5, 6, 7], + ... 'b': [10, 20, 30, 40], + ... 'c': [100, 50, -30, -50] + ... }) + >>> df + a b c + 0 4 10 100 + 1 5 20 50 + 2 6 30 -30 + 3 7 40 -50 + >>> df.loc[(df.c - 43).abs().argsort()] + a b c + 1 5 20 50 + 0 4 10 100 + 2 6 30 -30 + 3 7 40 -50 + """ + res_mgr = self._mgr.apply(np.abs) + return self._constructor(res_mgr).__finalize__(self, name="abs") + + @final + def __abs__(self: NDFrameT) -> NDFrameT: + return self.abs() + + @final + def __round__(self: NDFrameT, decimals: int = 0) -> NDFrameT: + return self.round(decimals) + + # ------------------------------------------------------------------------- + # Label or Level Combination Helpers + # + # A collection of helper methods for DataFrame/Series operations that + # accept a combination of column/index labels and levels. All such + # operations should utilize/extend these methods when possible so that we + # have consistent precedence and validation logic throughout the library. + + @final + def _is_level_reference(self, key, axis=0): + """ + Test whether a key is a level reference for a given axis. + + To be considered a level reference, `key` must be a string that: + - (axis=0): Matches the name of an index level and does NOT match + a column label. + - (axis=1): Matches the name of a column level and does NOT match + an index label. + + Parameters + ---------- + key : str + Potential level name for the given axis + axis : int, default 0 + Axis that levels are associated with (0 for index, 1 for columns) + + Returns + ------- + is_level : bool + """ + axis = self._get_axis_number(axis) + + return ( + key is not None + and is_hashable(key) + and key in self.axes[axis].names + and not self._is_label_reference(key, axis=axis) + ) + + @final + def _is_label_reference(self, key, axis=0) -> bool_t: + """ + Test whether a key is a label reference for a given axis. + + To be considered a label reference, `key` must be a string that: + - (axis=0): Matches a column label + - (axis=1): Matches an index label + + Parameters + ---------- + key : str + Potential label name + axis : int, default 0 + Axis perpendicular to the axis that labels are associated with + (0 means search for column labels, 1 means search for index labels) + + Returns + ------- + is_label: bool + """ + axis = self._get_axis_number(axis) + other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis) + + return ( + key is not None + and is_hashable(key) + and any(key in self.axes[ax] for ax in other_axes) + ) + + @final + def _is_label_or_level_reference(self, key: str, axis: int = 0) -> bool_t: + """ + Test whether a key is a label or level reference for a given axis. + + To be considered either a label or a level reference, `key` must be a + string that: + - (axis=0): Matches a column label or an index level + - (axis=1): Matches an index label or a column level + + Parameters + ---------- + key : str + Potential label or level name + axis : int, default 0 + Axis that levels are associated with (0 for index, 1 for columns) + + Returns + ------- + bool + """ + return self._is_level_reference(key, axis=axis) or self._is_label_reference( + key, axis=axis + ) + + @final + def _check_label_or_level_ambiguity(self, key, axis: int = 0) -> None: + """ + Check whether `key` is ambiguous. + + By ambiguous, we mean that it matches both a level of the input + `axis` and a label of the other axis. + + Parameters + ---------- + key : str or object + Label or level name. + axis : int, default 0 + Axis that levels are associated with (0 for index, 1 for columns). + + Raises + ------ + ValueError: `key` is ambiguous + """ + axis = self._get_axis_number(axis) + other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis) + + if ( + key is not None + and is_hashable(key) + and key in self.axes[axis].names + and any(key in self.axes[ax] for ax in other_axes) + ): + + # Build an informative and grammatical warning + level_article, level_type = ( + ("an", "index") if axis == 0 else ("a", "column") + ) + + label_article, label_type = ( + ("a", "column") if axis == 0 else ("an", "index") + ) + + msg = ( + f"'{key}' is both {level_article} {level_type} level and " + f"{label_article} {label_type} label, which is ambiguous." + ) + raise ValueError(msg) + + @final + def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray: + """ + Return a 1-D array of values associated with `key`, a label or level + from the given `axis`. + + Retrieval logic: + - (axis=0): Return column values if `key` matches a column label. + Otherwise return index level values if `key` matches an index + level. + - (axis=1): Return row values if `key` matches an index label. + Otherwise return column level values if 'key' matches a column + level + + Parameters + ---------- + key : str + Label or level name. + axis : int, default 0 + Axis that levels are associated with (0 for index, 1 for columns) + + Returns + ------- + values : np.ndarray + + Raises + ------ + KeyError + if `key` matches neither a label nor a level + ValueError + if `key` matches multiple labels + FutureWarning + if `key` is ambiguous. This will become an ambiguity error in a + future version + """ + axis = self._get_axis_number(axis) + other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis] + + if self._is_label_reference(key, axis=axis): + self._check_label_or_level_ambiguity(key, axis=axis) + values = self.xs(key, axis=other_axes[0])._values + elif self._is_level_reference(key, axis=axis): + values = self.axes[axis].get_level_values(key)._values + else: + raise KeyError(key) + + # Check for duplicates + if values.ndim > 1: + + if other_axes and isinstance(self._get_axis(other_axes[0]), MultiIndex): + multi_message = ( + "\n" + "For a multi-index, the label must be a " + "tuple with elements corresponding to each level." + ) + else: + multi_message = "" + + label_axis_name = "column" if axis == 0 else "index" + raise ValueError( + f"The {label_axis_name} label '{key}' is not unique.{multi_message}" + ) + + return values + + @final + def _drop_labels_or_levels(self, keys, axis: int = 0): + """ + Drop labels and/or levels for the given `axis`. + + For each key in `keys`: + - (axis=0): If key matches a column label then drop the column. + Otherwise if key matches an index level then drop the level. + - (axis=1): If key matches an index label then drop the row. + Otherwise if key matches a column level then drop the level. + + Parameters + ---------- + keys : str or list of str + labels or levels to drop + axis : int, default 0 + Axis that levels are associated with (0 for index, 1 for columns) + + Returns + ------- + dropped: DataFrame + + Raises + ------ + ValueError + if any `keys` match neither a label nor a level + """ + axis = self._get_axis_number(axis) + + # Validate keys + keys = com.maybe_make_list(keys) + invalid_keys = [ + k for k in keys if not self._is_label_or_level_reference(k, axis=axis) + ] + + if invalid_keys: + raise ValueError( + "The following keys are not valid labels or " + f"levels for axis {axis}: {invalid_keys}" + ) + + # Compute levels and labels to drop + levels_to_drop = [k for k in keys if self._is_level_reference(k, axis=axis)] + + labels_to_drop = [k for k in keys if not self._is_level_reference(k, axis=axis)] + + # Perform copy upfront and then use inplace operations below. + # This ensures that we always perform exactly one copy. + # ``copy`` and/or ``inplace`` options could be added in the future. + dropped = self.copy() + + if axis == 0: + # Handle dropping index levels + if levels_to_drop: + dropped.reset_index(levels_to_drop, drop=True, inplace=True) + + # Handle dropping columns labels + if labels_to_drop: + dropped.drop(labels_to_drop, axis=1, inplace=True) + else: + # Handle dropping column levels + if levels_to_drop: + if isinstance(dropped.columns, MultiIndex): + # Drop the specified levels from the MultiIndex + dropped.columns = dropped.columns.droplevel(levels_to_drop) + else: + # Drop the last level of Index by replacing with + # a RangeIndex + dropped.columns = RangeIndex(dropped.columns.size) + + # Handle dropping index labels + if labels_to_drop: + dropped.drop(labels_to_drop, axis=0, inplace=True) + + return dropped + + # ---------------------------------------------------------------------- + # Iteration + + # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 + # Incompatible types in assignment (expression has type "None", base class + # "object" defined the type as "Callable[[object], int]") + __hash__: None # type: ignore[assignment] + + def __iter__(self): + """ + Iterate over info axis. + + Returns + ------- + iterator + Info axis as iterator. + """ + return iter(self._info_axis) + + # can we get a better explanation of this? + def keys(self): + """ + Get the 'info axis' (see Indexing for more). + + This is index for Series, columns for DataFrame. + + Returns + ------- + Index + Info axis. + """ + return self._info_axis + + def items(self): + """ + Iterate over (label, values) on info axis + + This is index for Series and columns for DataFrame. + + Returns + ------- + Generator + """ + for h in self._info_axis: + yield h, self[h] + + @doc(items) + def iteritems(self): + return self.items() + + def __len__(self) -> int: + """Returns length of info axis""" + return len(self._info_axis) + + @final + def __contains__(self, key) -> bool_t: + """True if the key is in the info axis""" + return key in self._info_axis + + @property + def empty(self) -> bool_t: + """ + Indicator whether Series/DataFrame is empty. + + True if Series/DataFrame is entirely empty (no items), meaning any of the + axes are of length 0. + + Returns + ------- + bool + If Series/DataFrame is empty, return True, if not return False. + + See Also + -------- + Series.dropna : Return series without null values. + DataFrame.dropna : Return DataFrame with labels on given axis omitted + where (all or any) data are missing. + + Notes + ----- + If Series/DataFrame contains only NaNs, it is still not considered empty. See + the example below. + + Examples + -------- + An example of an actual empty DataFrame. Notice the index is empty: + + >>> df_empty = pd.DataFrame({'A' : []}) + >>> df_empty + Empty DataFrame + Columns: [A] + Index: [] + >>> df_empty.empty + True + + If we only have NaNs in our DataFrame, it is not considered empty! We + will need to drop the NaNs to make the DataFrame empty: + + >>> df = pd.DataFrame({'A' : [np.nan]}) + >>> df + A + 0 NaN + >>> df.empty + False + >>> df.dropna().empty + True + + >>> ser_empty = pd.Series({'A' : []}) + >>> ser_empty + A [] + dtype: object + >>> ser_empty.empty + False + >>> ser_empty = pd.Series() + >>> ser_empty.empty + True + """ + return any(len(self._get_axis(a)) == 0 for a in self._AXIS_ORDERS) + + # ---------------------------------------------------------------------- + # Array Interface + + # This is also set in IndexOpsMixin + # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented + __array_priority__ = 1000 + + def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: + return np.asarray(self._values, dtype=dtype) + + def __array_wrap__( + self, + result: np.ndarray, + context: tuple[Callable, tuple[Any, ...], int] | None = None, + ): + """ + Gets called after a ufunc and other functions. + + Parameters + ---------- + result: np.ndarray + The result of the ufunc or other function called on the NumPy array + returned by __array__ + context: tuple of (func, tuple, int) + This parameter is returned by ufuncs as a 3-element tuple: (name of the + ufunc, arguments of the ufunc, domain of the ufunc), but is not set by + other numpy functions.q + + Notes + ----- + Series implements __array_ufunc_ so this not called for ufunc on Series. + """ + # Note: at time of dask 2022.01.0, this is still used by dask + res = lib.item_from_zerodim(result) + if is_scalar(res): + # e.g. we get here with np.ptp(series) + # ptp also requires the item_from_zerodim + return res + d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False) + # error: Argument 1 to "NDFrame" has incompatible type "ndarray"; + # expected "BlockManager" + return self._constructor(res, **d).__finalize__( # type: ignore[arg-type] + self, method="__array_wrap__" + ) + + @final + def __array_ufunc__( + self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any + ): + return arraylike.array_ufunc(self, ufunc, method, *inputs, **kwargs) + + # ---------------------------------------------------------------------- + # Picklability + + @final + def __getstate__(self) -> dict[str, Any]: + meta = {k: getattr(self, k, None) for k in self._metadata} + return { + "_mgr": self._mgr, + "_typ": self._typ, + "_metadata": self._metadata, + "attrs": self.attrs, + "_flags": {k: self.flags[k] for k in self.flags._keys}, + **meta, + } + + @final + def __setstate__(self, state): + if isinstance(state, BlockManager): + self._mgr = state + elif isinstance(state, dict): + if "_data" in state and "_mgr" not in state: + # compat for older pickles + state["_mgr"] = state.pop("_data") + typ = state.get("_typ") + if typ is not None: + attrs = state.get("_attrs", {}) + object.__setattr__(self, "_attrs", attrs) + flags = state.get("_flags", {"allows_duplicate_labels": True}) + object.__setattr__(self, "_flags", Flags(self, **flags)) + + # set in the order of internal names + # to avoid definitional recursion + # e.g. say fill_value needing _mgr to be + # defined + meta = set(self._internal_names + self._metadata) + for k in list(meta): + if k in state and k != "_flags": + v = state[k] + object.__setattr__(self, k, v) + + for k, v in state.items(): + if k not in meta: + object.__setattr__(self, k, v) + + else: + raise NotImplementedError("Pre-0.12 pickles are no longer supported") + elif len(state) == 2: + raise NotImplementedError("Pre-0.12 pickles are no longer supported") + + self._item_cache = {} + + # ---------------------------------------------------------------------- + # Rendering Methods + + def __repr__(self) -> str: + # string representation based upon iterating over self + # (since, by definition, `PandasContainers` are iterable) + prepr = f"[{','.join(map(pprint_thing, self))}]" + return f"{type(self).__name__}({prepr})" + + @final + def _repr_latex_(self): + """ + Returns a LaTeX representation for a particular object. + Mainly for use with nbconvert (jupyter notebook conversion to pdf). + """ + if config.get_option("display.latex.repr"): + return self.to_latex() + else: + return None + + @final + def _repr_data_resource_(self): + """ + Not a real Jupyter special repr method, but we use the same + naming convention. + """ + if config.get_option("display.html.table_schema"): + data = self.head(config.get_option("display.max_rows")) + + as_json = data.to_json(orient="table") + as_json = cast(str, as_json) + return json.loads(as_json, object_pairs_hook=collections.OrderedDict) + + # ---------------------------------------------------------------------- + # I/O Methods + + @final + @doc(klass="object", storage_options=_shared_docs["storage_options"]) + def to_excel( + self, + excel_writer, + sheet_name: str = "Sheet1", + na_rep: str = "", + float_format: str | None = None, + columns=None, + header=True, + index=True, + index_label=None, + startrow=0, + startcol=0, + engine=None, + merge_cells=True, + encoding=None, + inf_rep="inf", + verbose=True, + freeze_panes=None, + storage_options: StorageOptions = None, + ) -> None: + """ + Write {klass} to an Excel sheet. + + To write a single {klass} to an Excel .xlsx file it is only necessary to + specify a target file name. To write to multiple sheets it is necessary to + create an `ExcelWriter` object with a target file name, and specify a sheet + in the file to write to. + + Multiple sheets may be written to by specifying unique `sheet_name`. + With all data written to the file it is necessary to save the changes. + Note that creating an `ExcelWriter` object with a file name that already + exists will result in the contents of the existing file being erased. + + Parameters + ---------- + excel_writer : path-like, file-like, or ExcelWriter object + File path or existing ExcelWriter. + sheet_name : str, default 'Sheet1' + Name of sheet which will contain DataFrame. + na_rep : str, default '' + Missing data representation. + float_format : str, optional + Format string for floating point numbers. For example + ``float_format="%.2f"`` will format 0.1234 to 0.12. + columns : sequence or list of str, optional + Columns to write. + header : bool or list of str, default True + Write out the column names. If a list of string is given it is + assumed to be aliases for the column names. + index : bool, default True + Write row names (index). + index_label : str or sequence, optional + Column label for index column(s) if desired. If not specified, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the DataFrame uses MultiIndex. + startrow : int, default 0 + Upper left cell row to dump data frame. + startcol : int, default 0 + Upper left cell column to dump data frame. + engine : str, optional + Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this + via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and + ``io.excel.xlsm.writer``. + + .. deprecated:: 1.2.0 + + As the `xlwt `__ package is no longer + maintained, the ``xlwt`` engine will be removed in a future version + of pandas. + + merge_cells : bool, default True + Write MultiIndex and Hierarchical Rows as merged cells. + encoding : str, optional + Encoding of the resulting excel file. Only necessary for xlwt, + other writers support unicode natively. + inf_rep : str, default 'inf' + Representation for infinity (there is no native representation for + infinity in Excel). + verbose : bool, default True + Display more information in the error logs. + freeze_panes : tuple of int (length 2), optional + Specifies the one-based bottommost row and rightmost column that + is to be frozen. + {storage_options} + + .. versionadded:: 1.2.0 + + See Also + -------- + to_csv : Write DataFrame to a comma-separated values (csv) file. + ExcelWriter : Class for writing DataFrame objects into excel sheets. + read_excel : Read an Excel file into a pandas DataFrame. + read_csv : Read a comma-separated values (csv) file into DataFrame. + + Notes + ----- + For compatibility with :meth:`~DataFrame.to_csv`, + to_excel serializes lists and dicts to strings before writing. + + Once a workbook has been saved it is not possible to write further + data without rewriting the whole workbook. + + Examples + -------- + + Create, write to and save a workbook: + + >>> df1 = pd.DataFrame([['a', 'b'], ['c', 'd']], + ... index=['row 1', 'row 2'], + ... columns=['col 1', 'col 2']) + >>> df1.to_excel("output.xlsx") # doctest: +SKIP + + To specify the sheet name: + + >>> df1.to_excel("output.xlsx", + ... sheet_name='Sheet_name_1') # doctest: +SKIP + + If you wish to write to more than one sheet in the workbook, it is + necessary to specify an ExcelWriter object: + + >>> df2 = df1.copy() + >>> with pd.ExcelWriter('output.xlsx') as writer: # doctest: +SKIP + ... df1.to_excel(writer, sheet_name='Sheet_name_1') + ... df2.to_excel(writer, sheet_name='Sheet_name_2') + + ExcelWriter can also be used to append to an existing Excel file: + + >>> with pd.ExcelWriter('output.xlsx', + ... mode='a') as writer: # doctest: +SKIP + ... df.to_excel(writer, sheet_name='Sheet_name_3') + + To set the library that is used to write the Excel file, + you can pass the `engine` keyword (the default engine is + automatically chosen depending on the file extension): + + >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') # doctest: +SKIP + """ + + df = self if isinstance(self, ABCDataFrame) else self.to_frame() + + from pandas.io.formats.excel import ExcelFormatter + + formatter = ExcelFormatter( + df, + na_rep=na_rep, + cols=columns, + header=header, + float_format=float_format, + index=index, + index_label=index_label, + merge_cells=merge_cells, + inf_rep=inf_rep, + ) + formatter.write( + excel_writer, + sheet_name=sheet_name, + startrow=startrow, + startcol=startcol, + freeze_panes=freeze_panes, + engine=engine, + storage_options=storage_options, + ) + + @final + @doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "path_or_buf", + ) + def to_json( + self, + path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, + orient: str | None = None, + date_format: str | None = None, + double_precision: int = 10, + force_ascii: bool_t = True, + date_unit: str = "ms", + default_handler: Callable[[Any], JSONSerializable] | None = None, + lines: bool_t = False, + compression: CompressionOptions = "infer", + index: bool_t = True, + indent: int | None = None, + storage_options: StorageOptions = None, + ) -> str | None: + """ + Convert the object to a JSON string. + + Note NaN's and None will be converted to null and datetime objects + will be converted to UNIX timestamps. + + Parameters + ---------- + path_or_buf : str, path object, file-like object, or None, default None + String, path object (implementing os.PathLike[str]), or file-like + object implementing a write() function. If None, the result is + returned as a string. + orient : str + Indication of expected JSON string format. + + * Series: + + - default is 'index' + - allowed values are: {{'split', 'records', 'index', 'table'}}. + + * DataFrame: + + - default is 'columns' + - allowed values are: {{'split', 'records', 'index', 'columns', + 'values', 'table'}}. + + * The format of the JSON string: + + - 'split' : dict like {{'index' -> [index], 'columns' -> [columns], + 'data' -> [values]}} + - 'records' : list like [{{column -> value}}, ... , {{column -> value}}] + - 'index' : dict like {{index -> {{column -> value}}}} + - 'columns' : dict like {{column -> {{index -> value}}}} + - 'values' : just the values array + - 'table' : dict like {{'schema': {{schema}}, 'data': {{data}}}} + + Describing the data, where data component is like ``orient='records'``. + + date_format : {{None, 'epoch', 'iso'}} + Type of date conversion. 'epoch' = epoch milliseconds, + 'iso' = ISO8601. The default depends on the `orient`. For + ``orient='table'``, the default is 'iso'. For all other orients, + the default is 'epoch'. + double_precision : int, default 10 + The number of decimal places to use when encoding + floating point values. + force_ascii : bool, default True + Force encoded string to be ASCII. + date_unit : str, default 'ms' (milliseconds) + The time unit to encode to, governs timestamp and ISO8601 + precision. One of 's', 'ms', 'us', 'ns' for second, millisecond, + microsecond, and nanosecond respectively. + default_handler : callable, default None + Handler to call if object cannot otherwise be converted to a + suitable format for JSON. Should receive a single argument which is + the object to convert and return a serialisable object. + lines : bool, default False + If 'orient' is 'records' write out line-delimited json format. Will + throw ValueError if incorrect 'orient' since others are not + list-like. + {compression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + index : bool, default True + Whether to include the index values in the JSON string. Not + including the index (``index=False``) is only supported when + orient is 'split' or 'table'. + indent : int, optional + Length of whitespace used to indent each record. + + .. versionadded:: 1.0.0 + + {storage_options} + + .. versionadded:: 1.2.0 + + Returns + ------- + None or str + If path_or_buf is None, returns the resulting json format as a + string. Otherwise returns None. + + See Also + -------- + read_json : Convert a JSON string to pandas object. + + Notes + ----- + The behavior of ``indent=0`` varies from the stdlib, which does not + indent the output but does insert newlines. Currently, ``indent=0`` + and the default ``indent=None`` are equivalent in pandas, though this + may change in a future release. + + ``orient='table'`` contains a 'pandas_version' field under 'schema'. + This stores the version of `pandas` used in the latest revision of the + schema. + + Examples + -------- + >>> import json + >>> df = pd.DataFrame( + ... [["a", "b"], ["c", "d"]], + ... index=["row 1", "row 2"], + ... columns=["col 1", "col 2"], + ... ) + + >>> result = df.to_json(orient="split") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + {{ + "columns": [ + "col 1", + "col 2" + ], + "index": [ + "row 1", + "row 2" + ], + "data": [ + [ + "a", + "b" + ], + [ + "c", + "d" + ] + ] + }} + + Encoding/decoding a Dataframe using ``'records'`` formatted JSON. + Note that index labels are not preserved with this encoding. + + >>> result = df.to_json(orient="records") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + [ + {{ + "col 1": "a", + "col 2": "b" + }}, + {{ + "col 1": "c", + "col 2": "d" + }} + ] + + Encoding/decoding a Dataframe using ``'index'`` formatted JSON: + + >>> result = df.to_json(orient="index") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + {{ + "row 1": {{ + "col 1": "a", + "col 2": "b" + }}, + "row 2": {{ + "col 1": "c", + "col 2": "d" + }} + }} + + Encoding/decoding a Dataframe using ``'columns'`` formatted JSON: + + >>> result = df.to_json(orient="columns") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + {{ + "col 1": {{ + "row 1": "a", + "row 2": "c" + }}, + "col 2": {{ + "row 1": "b", + "row 2": "d" + }} + }} + + Encoding/decoding a Dataframe using ``'values'`` formatted JSON: + + >>> result = df.to_json(orient="values") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + [ + [ + "a", + "b" + ], + [ + "c", + "d" + ] + ] + + Encoding with Table Schema: + + >>> result = df.to_json(orient="table") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + {{ + "schema": {{ + "fields": [ + {{ + "name": "index", + "type": "string" + }}, + {{ + "name": "col 1", + "type": "string" + }}, + {{ + "name": "col 2", + "type": "string" + }} + ], + "primaryKey": [ + "index" + ], + "pandas_version": "1.4.0" + }}, + "data": [ + {{ + "index": "row 1", + "col 1": "a", + "col 2": "b" + }}, + {{ + "index": "row 2", + "col 1": "c", + "col 2": "d" + }} + ] + }} + """ + from pandas.io import json + + if date_format is None and orient == "table": + date_format = "iso" + elif date_format is None: + date_format = "epoch" + + config.is_nonnegative_int(indent) + indent = indent or 0 + + return json.to_json( + path_or_buf=path_or_buf, + obj=self, + orient=orient, + date_format=date_format, + double_precision=double_precision, + force_ascii=force_ascii, + date_unit=date_unit, + default_handler=default_handler, + lines=lines, + compression=compression, + index=index, + indent=indent, + storage_options=storage_options, + ) + + @final + def to_hdf( + self, + path_or_buf, + key: str, + mode: str = "a", + complevel: int | None = None, + complib: str | None = None, + append: bool_t = False, + format: str | None = None, + index: bool_t = True, + min_itemsize: int | dict[str, int] | None = None, + nan_rep=None, + dropna: bool_t | None = None, + data_columns: Literal[True] | list[str] | None = None, + errors: str = "strict", + encoding: str = "UTF-8", + ) -> None: + """ + Write the contained data to an HDF5 file using HDFStore. + + Hierarchical Data Format (HDF) is self-describing, allowing an + application to interpret the structure and contents of a file with + no outside information. One HDF file can hold a mix of related objects + which can be accessed as a group or as individual objects. + + In order to add another DataFrame or Series to an existing HDF file + please use append mode and a different a key. + + .. warning:: + + One can store a subclass of ``DataFrame`` or ``Series`` to HDF5, + but the type of the subclass is lost upon storing. + + For more information see the :ref:`user guide `. + + Parameters + ---------- + path_or_buf : str or pandas.HDFStore + File path or HDFStore object. + key : str + Identifier for the group in the store. + mode : {'a', 'w', 'r+'}, default 'a' + Mode to open file: + + - 'w': write, a new file is created (an existing file with + the same name would be deleted). + - 'a': append, an existing file is opened for reading and + writing, and if the file does not exist it is created. + - 'r+': similar to 'a', but the file must already exist. + complevel : {0-9}, default None + Specifies a compression level for data. + A value of 0 or None disables compression. + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' + Specifies the compression library to be used. + As of v0.20.2 these additional compressors for Blosc are supported + (default if no compressor specified: 'blosc:blosclz'): + {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + 'blosc:zlib', 'blosc:zstd'}. + Specifying a compression library which is not available issues + a ValueError. + append : bool, default False + For Table formats, append the input data to the existing. + format : {'fixed', 'table', None}, default 'fixed' + Possible values: + + - 'fixed': Fixed format. Fast writing/reading. Not-appendable, + nor searchable. + - 'table': Table format. Write as a PyTables Table structure + which may perform worse but allow more flexible operations + like searching / selecting subsets of the data. + - If None, pd.get_option('io.hdf.default_format') is checked, + followed by fallback to "fixed". + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. + encoding : str, default "UTF-8" + min_itemsize : dict or int, optional + Map column names to minimum string sizes for columns. + nan_rep : Any, optional + How to represent null values as str. + Not allowed with append=True. + data_columns : list of columns or True, optional + List of columns to create as indexed data columns for on-disk + queries, or True to use all columns. By default only the axes + of the object are indexed. See :ref:`io.hdf5-query-data-columns`. + Applicable only to format='table'. + + See Also + -------- + read_hdf : Read from HDF file. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + DataFrame.to_sql : Write to a SQL table. + DataFrame.to_feather : Write out feather-format for DataFrames. + DataFrame.to_csv : Write out to a csv file. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, + ... index=['a', 'b', 'c']) # doctest: +SKIP + >>> df.to_hdf('data.h5', key='df', mode='w') # doctest: +SKIP + + We can add another object to the same file: + + >>> s = pd.Series([1, 2, 3, 4]) # doctest: +SKIP + >>> s.to_hdf('data.h5', key='s') # doctest: +SKIP + + Reading from HDF file: + + >>> pd.read_hdf('data.h5', 'df') # doctest: +SKIP + A B + a 1 4 + b 2 5 + c 3 6 + >>> pd.read_hdf('data.h5', 's') # doctest: +SKIP + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + """ + from pandas.io import pytables + + # Argument 3 to "to_hdf" has incompatible type "NDFrame"; expected + # "Union[DataFrame, Series]" [arg-type] + pytables.to_hdf( + path_or_buf, + key, + self, # type: ignore[arg-type] + mode=mode, + complevel=complevel, + complib=complib, + append=append, + format=format, + index=index, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + dropna=dropna, + data_columns=data_columns, + errors=errors, + encoding=encoding, + ) + + @final + def to_sql( + self, + name: str, + con, + schema=None, + if_exists: str = "fail", + index: bool_t = True, + index_label=None, + chunksize=None, + dtype: DtypeArg | None = None, + method=None, + ) -> int | None: + """ + Write records stored in a DataFrame to a SQL database. + + Databases supported by SQLAlchemy [1]_ are supported. Tables can be + newly created, appended to, or overwritten. + + Parameters + ---------- + name : str + Name of SQL table. + con : sqlalchemy.engine.(Engine or Connection) or sqlite3.Connection + Using SQLAlchemy makes it possible to use any DB supported by that + library. Legacy support is provided for sqlite3.Connection objects. The user + is responsible for engine disposal and connection closure for the SQLAlchemy + connectable See `here \ + `_. + + schema : str, optional + Specify the schema (if database flavor supports this). If None, use + default schema. + if_exists : {'fail', 'replace', 'append'}, default 'fail' + How to behave if the table already exists. + + * fail: Raise a ValueError. + * replace: Drop the table before inserting new values. + * append: Insert new values to the existing table. + + index : bool, default True + Write DataFrame index as a column. Uses `index_label` as the column + name in the table. + index_label : str or sequence, default None + Column label for index column(s). If None is given (default) and + `index` is True, then the index names are used. + A sequence should be given if the DataFrame uses MultiIndex. + chunksize : int, optional + Specify the number of rows in each batch to be written at a time. + By default, all rows will be written at once. + dtype : dict or scalar, optional + Specifying the datatype for columns. If a dictionary is used, the + keys should be the column names and the values should be the + SQLAlchemy types or strings for the sqlite3 legacy mode. If a + scalar is provided, it will be applied to all columns. + method : {None, 'multi', callable}, optional + Controls the SQL insertion clause used: + + * None : Uses standard SQL ``INSERT`` clause (one per row). + * 'multi': Pass multiple values in a single ``INSERT`` clause. + * callable with signature ``(pd_table, conn, keys, data_iter)``. + + Details and a sample callable implementation can be found in the + section :ref:`insert method `. + + Returns + ------- + None or int + Number of rows affected by to_sql. None is returned if the callable + passed into ``method`` does not return the number of rows. + + The number of returned rows affected is the sum of the ``rowcount`` + attribute of ``sqlite3.Cursor`` or SQLAlchemy connectable which may not + reflect the exact number of written rows as stipulated in the + `sqlite3 `__ or + `SQLAlchemy `__. + + .. versionadded:: 1.4.0 + + Raises + ------ + ValueError + When the table already exists and `if_exists` is 'fail' (the + default). + + See Also + -------- + read_sql : Read a DataFrame from a table. + + Notes + ----- + Timezone aware datetime columns will be written as + ``Timestamp with timezone`` type with SQLAlchemy if supported by the + database. Otherwise, the datetimes will be stored as timezone unaware + timestamps local to the original timezone. + + References + ---------- + .. [1] https://docs.sqlalchemy.org + .. [2] https://www.python.org/dev/peps/pep-0249/ + + Examples + -------- + Create an in-memory SQLite database. + + >>> from sqlalchemy import create_engine + >>> engine = create_engine('sqlite://', echo=False) + + Create a table from scratch with 3 rows. + + >>> df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']}) + >>> df + name + 0 User 1 + 1 User 2 + 2 User 3 + + >>> df.to_sql('users', con=engine) + 3 + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')] + + An `sqlalchemy.engine.Connection` can also be passed to `con`: + + >>> with engine.begin() as connection: + ... df1 = pd.DataFrame({'name' : ['User 4', 'User 5']}) + ... df1.to_sql('users', con=connection, if_exists='append') + 2 + + This is allowed to support operations that require that the same + DBAPI connection is used for the entire operation. + + >>> df2 = pd.DataFrame({'name' : ['User 6', 'User 7']}) + >>> df2.to_sql('users', con=engine, if_exists='append') + 2 + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'), + (0, 'User 4'), (1, 'User 5'), (0, 'User 6'), + (1, 'User 7')] + + Overwrite the table with just ``df2``. + + >>> df2.to_sql('users', con=engine, if_exists='replace', + ... index_label='id') + 2 + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 6'), (1, 'User 7')] + + Specify the dtype (especially useful for integers with missing values). + Notice that while pandas is forced to store the data as floating point, + the database supports nullable integers. When fetching the data with + Python, we get back integer scalars. + + >>> df = pd.DataFrame({"A": [1, None, 2]}) + >>> df + A + 0 1.0 + 1 NaN + 2 2.0 + + >>> from sqlalchemy.types import Integer + >>> df.to_sql('integers', con=engine, index=False, + ... dtype={"A": Integer()}) + 3 + + >>> engine.execute("SELECT * FROM integers").fetchall() + [(1,), (None,), (2,)] + """ # noqa:E501 + from pandas.io import sql + + return sql.to_sql( + self, + name, + con, + schema=schema, + if_exists=if_exists, + index=index, + index_label=index_label, + chunksize=chunksize, + dtype=dtype, + method=method, + ) + + @final + @doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "path", + ) + def to_pickle( + self, + path, + compression: CompressionOptions = "infer", + protocol: int = pickle.HIGHEST_PROTOCOL, + storage_options: StorageOptions = None, + ) -> None: + """ + Pickle (serialize) object to file. + + Parameters + ---------- + path : str + File path where the pickled object will be stored. + {compression_options} + protocol : int + Int which indicates which protocol should be used by the pickler, + default HIGHEST_PROTOCOL (see [1]_ paragraph 12.1.2). The possible + values are 0, 1, 2, 3, 4, 5. A negative value for the protocol + parameter is equivalent to setting its value to HIGHEST_PROTOCOL. + + .. [1] https://docs.python.org/3/library/pickle.html. + + {storage_options} + + .. versionadded:: 1.2.0 + + See Also + -------- + read_pickle : Load pickled pandas object (or any object) from file. + DataFrame.to_hdf : Write DataFrame to an HDF5 file. + DataFrame.to_sql : Write DataFrame to a SQL database. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + + Examples + -------- + >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP + >>> original_df # doctest: +SKIP + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + >>> original_df.to_pickle("./dummy.pkl") # doctest: +SKIP + + >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP + >>> unpickled_df # doctest: +SKIP + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + """ # noqa: E501 + from pandas.io.pickle import to_pickle + + to_pickle( + self, + path, + compression=compression, + protocol=protocol, + storage_options=storage_options, + ) + + @final + def to_clipboard( + self, excel: bool_t = True, sep: str | None = None, **kwargs + ) -> None: + r""" + Copy object to the system clipboard. + + Write a text representation of object to the system clipboard. + This can be pasted into Excel, for example. + + Parameters + ---------- + excel : bool, default True + Produce output in a csv format for easy pasting into excel. + + - True, use the provided separator for csv pasting. + - False, write a string representation of the object to the clipboard. + + sep : str, default ``'\t'`` + Field delimiter. + **kwargs + These parameters will be passed to DataFrame.to_csv. + + See Also + -------- + DataFrame.to_csv : Write a DataFrame to a comma-separated values + (csv) file. + read_clipboard : Read text from clipboard and pass to read_csv. + + Notes + ----- + Requirements for your platform. + + - Linux : `xclip`, or `xsel` (with `PyQt4` modules) + - Windows : none + - macOS : none + + Examples + -------- + Copy the contents of a DataFrame to the clipboard. + + >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) + + >>> df.to_clipboard(sep=',') # doctest: +SKIP + ... # Wrote the following to the system clipboard: + ... # ,A,B,C + ... # 0,1,2,3 + ... # 1,4,5,6 + + We can omit the index by passing the keyword `index` and setting + it to false. + + >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP + ... # Wrote the following to the system clipboard: + ... # A,B,C + ... # 1,2,3 + ... # 4,5,6 + """ + from pandas.io import clipboards + + clipboards.to_clipboard(self, excel=excel, sep=sep, **kwargs) + + @final + def to_xarray(self): + """ + Return an xarray object from the pandas object. + + Returns + ------- + xarray.DataArray or xarray.Dataset + Data in the pandas structure converted to Dataset if the object is + a DataFrame, or a DataArray if the object is a Series. + + See Also + -------- + DataFrame.to_hdf : Write DataFrame to an HDF5 file. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + + Notes + ----- + See the `xarray docs `__ + + Examples + -------- + >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2), + ... ('parrot', 'bird', 24.0, 2), + ... ('lion', 'mammal', 80.5, 4), + ... ('monkey', 'mammal', np.nan, 4)], + ... columns=['name', 'class', 'max_speed', + ... 'num_legs']) + >>> df + name class max_speed num_legs + 0 falcon bird 389.0 2 + 1 parrot bird 24.0 2 + 2 lion mammal 80.5 4 + 3 monkey mammal NaN 4 + + >>> df.to_xarray() + + Dimensions: (index: 4) + Coordinates: + * index (index) int64 0 1 2 3 + Data variables: + name (index) object 'falcon' 'parrot' 'lion' 'monkey' + class (index) object 'bird' 'bird' 'mammal' 'mammal' + max_speed (index) float64 389.0 24.0 80.5 nan + num_legs (index) int64 2 2 4 4 + + >>> df['max_speed'].to_xarray() + + array([389. , 24. , 80.5, nan]) + Coordinates: + * index (index) int64 0 1 2 3 + + >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01', + ... '2018-01-02', '2018-01-02']) + >>> df_multiindex = pd.DataFrame({'date': dates, + ... 'animal': ['falcon', 'parrot', + ... 'falcon', 'parrot'], + ... 'speed': [350, 18, 361, 15]}) + >>> df_multiindex = df_multiindex.set_index(['date', 'animal']) + + >>> df_multiindex + speed + date animal + 2018-01-01 falcon 350 + parrot 18 + 2018-01-02 falcon 361 + parrot 15 + + >>> df_multiindex.to_xarray() + + Dimensions: (animal: 2, date: 2) + Coordinates: + * date (date) datetime64[ns] 2018-01-01 2018-01-02 + * animal (animal) object 'falcon' 'parrot' + Data variables: + speed (date, animal) int64 350 18 361 15 + """ + xarray = import_optional_dependency("xarray") + + if self.ndim == 1: + return xarray.DataArray.from_series(self) + else: + return xarray.Dataset.from_dataframe(self) + + @final + @doc(returns=fmt.return_docstring) + def to_latex( + self, + buf=None, + columns=None, + col_space=None, + header=True, + index=True, + na_rep="NaN", + formatters=None, + float_format=None, + sparsify=None, + index_names=True, + bold_rows=False, + column_format=None, + longtable=None, + escape=None, + encoding=None, + decimal=".", + multicolumn=None, + multicolumn_format=None, + multirow=None, + caption=None, + label=None, + position=None, + ): + r""" + Render object to a LaTeX tabular, longtable, or nested table. + + Requires ``\usepackage{{booktabs}}``. The output can be copy/pasted + into a main LaTeX document or read from an external file + with ``\input{{table.tex}}``. + + .. versionchanged:: 1.0.0 + Added caption and label arguments. + + .. versionchanged:: 1.2.0 + Added position argument, changed meaning of caption argument. + + Parameters + ---------- + buf : str, Path or StringIO-like, optional, default None + Buffer to write to. If None, the output is returned as a string. + columns : list of label, optional + The subset of columns to write. Writes all columns by default. + col_space : int, optional + The minimum width of each column. + header : bool or list of str, default True + Write out the column names. If a list of strings is given, + it is assumed to be aliases for the column names. + index : bool, default True + Write row names (index). + na_rep : str, default 'NaN' + Missing data representation. + formatters : list of functions or dict of {{str: function}}, optional + Formatter functions to apply to columns' elements by position or + name. The result of each function must be a unicode string. + List must be of length equal to the number of columns. + float_format : one-parameter function or str, optional, default None + Formatter for floating point numbers. For example + ``float_format="%.2f"`` and ``float_format="{{:0.2f}}".format`` will + both result in 0.1234 being formatted as 0.12. + sparsify : bool, optional + Set to False for a DataFrame with a hierarchical index to print + every multiindex key at each row. By default, the value will be + read from the config module. + index_names : bool, default True + Prints the names of the indexes. + bold_rows : bool, default False + Make the row labels bold in the output. + column_format : str, optional + The columns format as specified in `LaTeX table format + `__ e.g. 'rcl' for 3 + columns. By default, 'l' will be used for all columns except + columns of numbers, which default to 'r'. + longtable : bool, optional + By default, the value will be read from the pandas config + module. Use a longtable environment instead of tabular. Requires + adding a \usepackage{{longtable}} to your LaTeX preamble. + escape : bool, optional + By default, the value will be read from the pandas config + module. When set to False prevents from escaping latex special + characters in column names. + encoding : str, optional + A string representing the encoding to use in the output file, + defaults to 'utf-8'. + decimal : str, default '.' + Character recognized as decimal separator, e.g. ',' in Europe. + multicolumn : bool, default True + Use \multicolumn to enhance MultiIndex columns. + The default will be read from the config module. + multicolumn_format : str, default 'l' + The alignment for multicolumns, similar to `column_format` + The default will be read from the config module. + multirow : bool, default False + Use \multirow to enhance MultiIndex rows. Requires adding a + \usepackage{{multirow}} to your LaTeX preamble. Will print + centered labels (instead of top-aligned) across the contained + rows, separating groups via clines. The default will be read + from the pandas config module. + caption : str or tuple, optional + Tuple (full_caption, short_caption), + which results in ``\caption[short_caption]{{full_caption}}``; + if a single string is passed, no short caption will be set. + + .. versionadded:: 1.0.0 + + .. versionchanged:: 1.2.0 + Optionally allow caption to be a tuple ``(full_caption, short_caption)``. + + label : str, optional + The LaTeX label to be placed inside ``\label{{}}`` in the output. + This is used with ``\ref{{}}`` in the main ``.tex`` file. + + .. versionadded:: 1.0.0 + position : str, optional + The LaTeX positional argument for tables, to be placed after + ``\begin{{}}`` in the output. + + .. versionadded:: 1.2.0 + {returns} + See Also + -------- + Styler.to_latex : Render a DataFrame to LaTeX with conditional formatting. + DataFrame.to_string : Render a DataFrame to a console-friendly + tabular output. + DataFrame.to_html : Render a DataFrame as an HTML table. + + Examples + -------- + >>> df = pd.DataFrame(dict(name=['Raphael', 'Donatello'], + ... mask=['red', 'purple'], + ... weapon=['sai', 'bo staff'])) + >>> print(df.to_latex(index=False)) # doctest: +SKIP + \begin{{tabular}}{{lll}} + \toprule + name & mask & weapon \\ + \midrule + Raphael & red & sai \\ + Donatello & purple & bo staff \\ + \bottomrule + \end{{tabular}} + """ + msg = ( + "In future versions `DataFrame.to_latex` is expected to utilise the base " + "implementation of `Styler.to_latex` for formatting and rendering. " + "The arguments signature may therefore change. It is recommended instead " + "to use `DataFrame.style.to_latex` which also contains additional " + "functionality." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + + # Get defaults from the pandas config + if self.ndim == 1: + self = self.to_frame() + if longtable is None: + longtable = config.get_option("display.latex.longtable") + if escape is None: + escape = config.get_option("display.latex.escape") + if multicolumn is None: + multicolumn = config.get_option("display.latex.multicolumn") + if multicolumn_format is None: + multicolumn_format = config.get_option("display.latex.multicolumn_format") + if multirow is None: + multirow = config.get_option("display.latex.multirow") + + self = cast("DataFrame", self) + formatter = DataFrameFormatter( + self, + columns=columns, + col_space=col_space, + na_rep=na_rep, + header=header, + index=index, + formatters=formatters, + float_format=float_format, + bold_rows=bold_rows, + sparsify=sparsify, + index_names=index_names, + escape=escape, + decimal=decimal, + ) + return DataFrameRenderer(formatter).to_latex( + buf=buf, + column_format=column_format, + longtable=longtable, + encoding=encoding, + multicolumn=multicolumn, + multicolumn_format=multicolumn_format, + multirow=multirow, + caption=caption, + label=label, + position=position, + ) + + @final + @doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"], + ) + def to_csv( + self, + path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, + sep: str = ",", + na_rep: str = "", + float_format: str | None = None, + columns: Sequence[Hashable] | None = None, + header: bool_t | list[str] = True, + index: bool_t = True, + index_label: IndexLabel | None = None, + mode: str = "w", + encoding: str | None = None, + compression: CompressionOptions = "infer", + quoting: int | None = None, + quotechar: str = '"', + line_terminator: str | None = None, + chunksize: int | None = None, + date_format: str | None = None, + doublequote: bool_t = True, + escapechar: str | None = None, + decimal: str = ".", + errors: str = "strict", + storage_options: StorageOptions = None, + ) -> str | None: + r""" + Write object to a comma-separated values (csv) file. + + Parameters + ---------- + path_or_buf : str, path object, file-like object, or None, default None + String, path object (implementing os.PathLike[str]), or file-like + object implementing a write() function. If None, the result is + returned as a string. If a non-binary file object is passed, it should + be opened with `newline=''`, disabling universal newlines. If a binary + file object is passed, `mode` might need to contain a `'b'`. + + .. versionchanged:: 1.2.0 + + Support for binary file objects was introduced. + + sep : str, default ',' + String of length 1. Field delimiter for the output file. + na_rep : str, default '' + Missing data representation. + float_format : str, default None + Format string for floating point numbers. + columns : sequence, optional + Columns to write. + header : bool or list of str, default True + Write out the column names. If a list of strings is given it is + assumed to be aliases for the column names. + index : bool, default True + Write row names (index). + index_label : str or sequence, or False, default None + Column label for index column(s) if desired. If None is given, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the object uses MultiIndex. If + False do not print fields for index names. Use index_label=False + for easier importing in R. + mode : str + Python write mode, default 'w'. + encoding : str, optional + A string representing the encoding to use in the output file, + defaults to 'utf-8'. `encoding` is not supported if `path_or_buf` + is a non-binary file object. + {compression_options} + + .. versionchanged:: 1.0.0 + + May now be a dict with key 'method' as compression mode + and other entries as additional compression options if + compression mode is 'zip'. + + .. versionchanged:: 1.1.0 + + Passing compression options as keys in dict is + supported for compression modes 'gzip', 'bz2', 'zstd', and 'zip'. + + .. versionchanged:: 1.2.0 + + Compression is supported for binary file objects. + + .. versionchanged:: 1.2.0 + + Previous versions forwarded dict entries for 'gzip' to + `gzip.open` instead of `gzip.GzipFile` which prevented + setting `mtime`. + + quoting : optional constant from csv module + Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` + then floats are converted to strings and thus csv.QUOTE_NONNUMERIC + will treat them as non-numeric. + quotechar : str, default '\"' + String of length 1. Character used to quote fields. + line_terminator : str, optional + The newline character or character sequence to use in the output + file. Defaults to `os.linesep`, which depends on the OS in which + this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.). + chunksize : int or None + Rows to write at a time. + date_format : str, default None + Format string for datetime objects. + doublequote : bool, default True + Control quoting of `quotechar` inside a field. + escapechar : str, default None + String of length 1. Character used to escape `sep` and `quotechar` + when appropriate. + decimal : str, default '.' + Character recognized as decimal separator. E.g. use ',' for + European data. + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. + + .. versionadded:: 1.1.0 + + {storage_options} + + .. versionadded:: 1.2.0 + + Returns + ------- + None or str + If path_or_buf is None, returns the resulting csv format as a + string. Otherwise returns None. + + See Also + -------- + read_csv : Load a CSV file into a DataFrame. + to_excel : Write DataFrame to an Excel file. + + Examples + -------- + >>> df = pd.DataFrame({{'name': ['Raphael', 'Donatello'], + ... 'mask': ['red', 'purple'], + ... 'weapon': ['sai', 'bo staff']}}) + >>> df.to_csv(index=False) + 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n' + + Create 'out.zip' containing 'out.csv' + + >>> compression_opts = dict(method='zip', + ... archive_name='out.csv') # doctest: +SKIP + >>> df.to_csv('out.zip', index=False, + ... compression=compression_opts) # doctest: +SKIP + + To write a csv file to a new folder or nested folder you will first + need to create it using either Pathlib or os: + + >>> from pathlib import Path # doctest: +SKIP + >>> filepath = Path('folder/subfolder/out.csv') # doctest: +SKIP + >>> filepath.parent.mkdir(parents=True, exist_ok=True) # doctest: +SKIP + >>> df.to_csv(filepath) # doctest: +SKIP + + >>> import os # doctest: +SKIP + >>> os.makedirs('folder/subfolder', exist_ok=True) # doctest: +SKIP + >>> df.to_csv('folder/subfolder/out.csv') # doctest: +SKIP + """ + df = self if isinstance(self, ABCDataFrame) else self.to_frame() + + formatter = DataFrameFormatter( + frame=df, + header=header, + index=index, + na_rep=na_rep, + float_format=float_format, + decimal=decimal, + ) + + return DataFrameRenderer(formatter).to_csv( + path_or_buf, + line_terminator=line_terminator, + sep=sep, + encoding=encoding, + errors=errors, + compression=compression, + quoting=quoting, + columns=columns, + index_label=index_label, + mode=mode, + chunksize=chunksize, + quotechar=quotechar, + date_format=date_format, + doublequote=doublequote, + escapechar=escapechar, + storage_options=storage_options, + ) + + # ---------------------------------------------------------------------- + # Lookup Caching + + def _reset_cacher(self) -> None: + """ + Reset the cacher. + """ + raise AbstractMethodError(self) + + def _maybe_update_cacher( + self, + clear: bool_t = False, + verify_is_copy: bool_t = True, + inplace: bool_t = False, + ) -> None: + """ + See if we need to update our parent cacher if clear, then clear our + cache. + + Parameters + ---------- + clear : bool, default False + Clear the item cache. + verify_is_copy : bool, default True + Provide is_copy checks. + """ + + if verify_is_copy: + self._check_setitem_copy(t="referent") + + if clear: + self._clear_item_cache() + + def _clear_item_cache(self) -> None: + raise AbstractMethodError(self) + + # ---------------------------------------------------------------------- + # Indexing Methods + + def take( + self: NDFrameT, indices, axis=0, is_copy: bool_t | None = None, **kwargs + ) -> NDFrameT: + """ + Return the elements in the given *positional* indices along an axis. + + This means that we are not indexing according to actual values in + the index attribute of the object. We are indexing according to the + actual position of the element in the object. + + Parameters + ---------- + indices : array-like + An array of ints indicating which positions to take. + axis : {0 or 'index', 1 or 'columns', None}, default 0 + The axis on which to select elements. ``0`` means that we are + selecting rows, ``1`` means that we are selecting columns. + is_copy : bool + Before pandas 1.0, ``is_copy=False`` can be specified to ensure + that the return value is an actual copy. Starting with pandas 1.0, + ``take`` always returns a copy, and the keyword is therefore + deprecated. + + .. deprecated:: 1.0.0 + **kwargs + For compatibility with :meth:`numpy.take`. Has no effect on the + output. + + Returns + ------- + taken : same type as caller + An array-like containing the elements taken from the object. + + See Also + -------- + DataFrame.loc : Select a subset of a DataFrame by labels. + DataFrame.iloc : Select a subset of a DataFrame by positions. + numpy.take : Take elements from an array along an axis. + + Examples + -------- + >>> df = pd.DataFrame([('falcon', 'bird', 389.0), + ... ('parrot', 'bird', 24.0), + ... ('lion', 'mammal', 80.5), + ... ('monkey', 'mammal', np.nan)], + ... columns=['name', 'class', 'max_speed'], + ... index=[0, 2, 3, 1]) + >>> df + name class max_speed + 0 falcon bird 389.0 + 2 parrot bird 24.0 + 3 lion mammal 80.5 + 1 monkey mammal NaN + + Take elements at positions 0 and 3 along the axis 0 (default). + + Note how the actual indices selected (0 and 1) do not correspond to + our selected indices 0 and 3. That's because we are selecting the 0th + and 3rd rows, not rows whose indices equal 0 and 3. + + >>> df.take([0, 3]) + name class max_speed + 0 falcon bird 389.0 + 1 monkey mammal NaN + + Take elements at indices 1 and 2 along the axis 1 (column selection). + + >>> df.take([1, 2], axis=1) + class max_speed + 0 bird 389.0 + 2 bird 24.0 + 3 mammal 80.5 + 1 mammal NaN + + We may take elements using negative integers for positive indices, + starting from the end of the object, just like with Python lists. + + >>> df.take([-1, -2]) + name class max_speed + 1 monkey mammal NaN + 3 lion mammal 80.5 + """ + if is_copy is not None: + warnings.warn( + "is_copy is deprecated and will be removed in a future version. " + "'take' always returns a copy, so there is no need to specify this.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + nv.validate_take((), kwargs) + + self._consolidate_inplace() + + new_data = self._mgr.take( + indices, axis=self._get_block_manager_axis(axis), verify=True + ) + return self._constructor(new_data).__finalize__(self, method="take") + + def _take_with_is_copy(self: NDFrameT, indices, axis=0) -> NDFrameT: + """ + Internal version of the `take` method that sets the `_is_copy` + attribute to keep track of the parent dataframe (using in indexing + for the SettingWithCopyWarning). + + See the docstring of `take` for full explanation of the parameters. + """ + result = self.take(indices=indices, axis=axis) + # Maybe set copy if we didn't actually change the index. + if not result._get_axis(axis).equals(self._get_axis(axis)): + result._set_is_copy(self) + return result + + @final + def xs(self, key, axis=0, level=None, drop_level: bool_t = True): + """ + Return cross-section from the Series/DataFrame. + + This method takes a `key` argument to select data at a particular + level of a MultiIndex. + + Parameters + ---------- + key : label or tuple of label + Label contained in the index, or partially in a MultiIndex. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Axis to retrieve cross-section on. + level : object, defaults to first n levels (n=1 or len(key)) + In case of a key partially contained in a MultiIndex, indicate + which levels are used. Levels can be referred by label or position. + drop_level : bool, default True + If False, returns object with same levels as self. + + Returns + ------- + Series or DataFrame + Cross-section from the original Series or DataFrame + corresponding to the selected index levels. + + See Also + -------- + DataFrame.loc : Access a group of rows and columns + by label(s) or a boolean array. + DataFrame.iloc : Purely integer-location based indexing + for selection by position. + + Notes + ----- + `xs` can not be used to set values. + + MultiIndex Slicers is a generic way to get/set values on + any level or levels. + It is a superset of `xs` functionality, see + :ref:`MultiIndex Slicers `. + + Examples + -------- + >>> d = {'num_legs': [4, 4, 2, 2], + ... 'num_wings': [0, 0, 2, 2], + ... 'class': ['mammal', 'mammal', 'mammal', 'bird'], + ... 'animal': ['cat', 'dog', 'bat', 'penguin'], + ... 'locomotion': ['walks', 'walks', 'flies', 'walks']} + >>> df = pd.DataFrame(data=d) + >>> df = df.set_index(['class', 'animal', 'locomotion']) + >>> df + num_legs num_wings + class animal locomotion + mammal cat walks 4 0 + dog walks 4 0 + bat flies 2 2 + bird penguin walks 2 2 + + Get values at specified index + + >>> df.xs('mammal') + num_legs num_wings + animal locomotion + cat walks 4 0 + dog walks 4 0 + bat flies 2 2 + + Get values at several indexes + + >>> df.xs(('mammal', 'dog')) + num_legs num_wings + locomotion + walks 4 0 + + Get values at specified index and level + + >>> df.xs('cat', level=1) + num_legs num_wings + class locomotion + mammal walks 4 0 + + Get values at several indexes and levels + + >>> df.xs(('bird', 'walks'), + ... level=[0, 'locomotion']) + num_legs num_wings + animal + penguin 2 2 + + Get values at specified column and axis + + >>> df.xs('num_wings', axis=1) + class animal locomotion + mammal cat walks 0 + dog walks 0 + bat flies 2 + bird penguin walks 2 + Name: num_wings, dtype: int64 + """ + axis = self._get_axis_number(axis) + labels = self._get_axis(axis) + + if isinstance(key, list): + warnings.warn( + "Passing lists as key for xs is deprecated and will be removed in a " + "future version. Pass key as a tuple instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if level is not None: + if not isinstance(labels, MultiIndex): + raise TypeError("Index must be a MultiIndex") + loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level) + + # create the tuple of the indexer + _indexer = [slice(None)] * self.ndim + _indexer[axis] = loc + indexer = tuple(_indexer) + + result = self.iloc[indexer] + setattr(result, result._get_axis_name(axis), new_ax) + return result + + if axis == 1: + if drop_level: + return self[key] + index = self.columns + else: + index = self.index + + self._consolidate_inplace() + + if isinstance(index, MultiIndex): + loc, new_index = index._get_loc_level(key, level=0) + if not drop_level: + if lib.is_integer(loc): + new_index = index[loc : loc + 1] + else: + new_index = index[loc] + else: + loc = index.get_loc(key) + + if isinstance(loc, np.ndarray): + if loc.dtype == np.bool_: + (inds,) = loc.nonzero() + return self._take_with_is_copy(inds, axis=axis) + else: + return self._take_with_is_copy(loc, axis=axis) + + if not is_scalar(loc): + new_index = index[loc] + + if is_scalar(loc) and axis == 0: + # In this case loc should be an integer + if self.ndim == 1: + # if we encounter an array-like and we only have 1 dim + # that means that their are list/ndarrays inside the Series! + # so just return them (GH 6394) + return self._values[loc] + + new_values = self._mgr.fast_xs(loc) + + result = self._constructor_sliced( + new_values, + index=self.columns, + name=self.index[loc], + dtype=new_values.dtype, + ) + elif is_scalar(loc): + result = self.iloc[:, slice(loc, loc + 1)] + elif axis == 1: + result = self.iloc[:, loc] + else: + result = self.iloc[loc] + result.index = new_index + + # this could be a view + # but only in a single-dtyped view sliceable case + result._set_is_copy(self, copy=not result._is_view) + return result + + def __getitem__(self, item): + raise AbstractMethodError(self) + + def _slice(self: NDFrameT, slobj: slice, axis=0) -> NDFrameT: + """ + Construct a slice of this container. + + Slicing with this method is *always* positional. + """ + assert isinstance(slobj, slice), type(slobj) + axis = self._get_block_manager_axis(axis) + result = self._constructor(self._mgr.get_slice(slobj, axis=axis)) + result = result.__finalize__(self) + + # this could be a view + # but only in a single-dtyped view sliceable case + is_copy = axis != 0 or result._is_view + result._set_is_copy(self, copy=is_copy) + return result + + @final + def _set_is_copy(self, ref: NDFrame, copy: bool_t = True) -> None: + if not copy: + self._is_copy = None + else: + assert ref is not None + self._is_copy = weakref.ref(ref) + + def _check_is_chained_assignment_possible(self) -> bool_t: + """ + Check if we are a view, have a cacher, and are of mixed type. + If so, then force a setitem_copy check. + + Should be called just near setting a value + + Will return a boolean if it we are a view and are cached, but a + single-dtype meaning that the cacher should be updated following + setting. + """ + if self._is_copy: + self._check_setitem_copy(t="referent") + return False + + @final + def _check_setitem_copy(self, t="setting", force=False): + """ + + Parameters + ---------- + t : str, the type of setting error + force : bool, default False + If True, then force showing an error. + + validate if we are doing a setitem on a chained copy. + + It is technically possible to figure out that we are setting on + a copy even WITH a multi-dtyped pandas object. In other words, some + blocks may be views while other are not. Currently _is_view will ALWAYS + return False for multi-blocks to avoid having to handle this case. + + df = DataFrame(np.arange(0,9), columns=['count']) + df['group'] = 'b' + + # This technically need not raise SettingWithCopy if both are view + # (which is not generally guaranteed but is usually True. However, + # this is in general not a good practice and we recommend using .loc. + df.iloc[0:5]['group'] = 'a' + + """ + # return early if the check is not needed + if not (force or self._is_copy): + return + + value = config.get_option("mode.chained_assignment") + if value is None: + return + + # see if the copy is not actually referred; if so, then dissolve + # the copy weakref + if self._is_copy is not None and not isinstance(self._is_copy, str): + r = self._is_copy() + if not gc.get_referents(r) or (r is not None and r.shape == self.shape): + self._is_copy = None + return + + # a custom message + if isinstance(self._is_copy, str): + t = self._is_copy + + elif t == "referent": + t = ( + "\n" + "A value is trying to be set on a copy of a slice from a " + "DataFrame\n\n" + "See the caveats in the documentation: " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy" + ) + + else: + t = ( + "\n" + "A value is trying to be set on a copy of a slice from a " + "DataFrame.\n" + "Try using .loc[row_indexer,col_indexer] = value " + "instead\n\nSee the caveats in the documentation: " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy" + ) + + if value == "raise": + raise com.SettingWithCopyError(t) + elif value == "warn": + warnings.warn(t, com.SettingWithCopyWarning, stacklevel=find_stack_level()) + + def __delitem__(self, key) -> None: + """ + Delete item + """ + deleted = False + + maybe_shortcut = False + if self.ndim == 2 and isinstance(self.columns, MultiIndex): + try: + # By using engine's __contains__ we effectively + # restrict to same-length tuples + maybe_shortcut = key not in self.columns._engine + except TypeError: + pass + + if maybe_shortcut: + # Allow shorthand to delete all columns whose first len(key) + # elements match key: + if not isinstance(key, tuple): + key = (key,) + for col in self.columns: + if isinstance(col, tuple) and col[: len(key)] == key: + del self[col] + deleted = True + if not deleted: + # If the above loop ran and didn't delete anything because + # there was no match, this call should raise the appropriate + # exception: + loc = self.axes[-1].get_loc(key) + self._mgr = self._mgr.idelete(loc) + + # delete from the caches + try: + del self._item_cache[key] + except KeyError: + pass + + # ---------------------------------------------------------------------- + # Unsorted + + @final + def _check_inplace_and_allows_duplicate_labels(self, inplace): + if inplace and not self.flags.allows_duplicate_labels: + raise ValueError( + "Cannot specify 'inplace=True' when " + "'self.flags.allows_duplicate_labels' is False." + ) + + @final + def get(self, key, default=None): + """ + Get item from object for given key (ex: DataFrame column). + + Returns default value if not found. + + Parameters + ---------- + key : object + + Returns + ------- + value : same type as items contained in object + + Examples + -------- + >>> df = pd.DataFrame( + ... [ + ... [24.3, 75.7, "high"], + ... [31, 87.8, "high"], + ... [22, 71.6, "medium"], + ... [35, 95, "medium"], + ... ], + ... columns=["temp_celsius", "temp_fahrenheit", "windspeed"], + ... index=pd.date_range(start="2014-02-12", end="2014-02-15", freq="D"), + ... ) + + >>> df + temp_celsius temp_fahrenheit windspeed + 2014-02-12 24.3 75.7 high + 2014-02-13 31.0 87.8 high + 2014-02-14 22.0 71.6 medium + 2014-02-15 35.0 95.0 medium + + >>> df.get(["temp_celsius", "windspeed"]) + temp_celsius windspeed + 2014-02-12 24.3 high + 2014-02-13 31.0 high + 2014-02-14 22.0 medium + 2014-02-15 35.0 medium + + If the key isn't found, the default value will be used. + + >>> df.get(["temp_celsius", "temp_kelvin"], default="default_value") + 'default_value' + """ + try: + return self[key] + except (KeyError, ValueError, IndexError): + return default + + @final + @property + def _is_view(self) -> bool_t: + """Return boolean indicating if self is view of another array""" + return self._mgr.is_view + + @final + def reindex_like( + self: NDFrameT, + other, + method: str | None = None, + copy: bool_t = True, + limit=None, + tolerance=None, + ) -> NDFrameT: + """ + Return an object with matching indices as other object. + + Conform the object to the same index on all axes. Optional + filling logic, placing NaN in locations having no value + in the previous index. A new object is produced unless the + new index is equivalent to the current one and copy=False. + + Parameters + ---------- + other : Object of the same data type + Its row and column indices are used to define the new indices + of this object. + method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} + Method to use for filling holes in reindexed DataFrame. + Please note: this is only applicable to DataFrames/Series with a + monotonically increasing/decreasing index. + + * None (default): don't fill gaps + * pad / ffill: propagate last valid observation forward to next + valid + * backfill / bfill: use next valid observation to fill gap + * nearest: use nearest valid observations to fill gap. + + copy : bool, default True + Return a new object, even if the passed indexes are the same. + limit : int, default None + Maximum number of consecutive labels to fill for inexact matches. + tolerance : optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations must + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + + Returns + ------- + Series or DataFrame + Same type as caller, but with changed indices on each axis. + + See Also + -------- + DataFrame.set_index : Set row labels. + DataFrame.reset_index : Remove row labels or move them to new columns. + DataFrame.reindex : Change to new indices or expand indices. + + Notes + ----- + Same as calling + ``.reindex(index=other.index, columns=other.columns,...)``. + + Examples + -------- + >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'], + ... [31, 87.8, 'high'], + ... [22, 71.6, 'medium'], + ... [35, 95, 'medium']], + ... columns=['temp_celsius', 'temp_fahrenheit', + ... 'windspeed'], + ... index=pd.date_range(start='2014-02-12', + ... end='2014-02-15', freq='D')) + + >>> df1 + temp_celsius temp_fahrenheit windspeed + 2014-02-12 24.3 75.7 high + 2014-02-13 31.0 87.8 high + 2014-02-14 22.0 71.6 medium + 2014-02-15 35.0 95.0 medium + + >>> df2 = pd.DataFrame([[28, 'low'], + ... [30, 'low'], + ... [35.1, 'medium']], + ... columns=['temp_celsius', 'windspeed'], + ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13', + ... '2014-02-15'])) + + >>> df2 + temp_celsius windspeed + 2014-02-12 28.0 low + 2014-02-13 30.0 low + 2014-02-15 35.1 medium + + >>> df2.reindex_like(df1) + temp_celsius temp_fahrenheit windspeed + 2014-02-12 28.0 NaN low + 2014-02-13 30.0 NaN low + 2014-02-14 NaN NaN NaN + 2014-02-15 35.1 NaN medium + """ + d = other._construct_axes_dict( + axes=self._AXIS_ORDERS, + method=method, + copy=copy, + limit=limit, + tolerance=tolerance, + ) + + return self.reindex(**d) + + def drop( + self, + labels=None, + axis=0, + index=None, + columns=None, + level=None, + inplace: bool_t = False, + errors: str = "raise", + ): + + inplace = validate_bool_kwarg(inplace, "inplace") + + if labels is not None: + if index is not None or columns is not None: + raise ValueError("Cannot specify both 'labels' and 'index'/'columns'") + axis_name = self._get_axis_name(axis) + axes = {axis_name: labels} + elif index is not None or columns is not None: + axes, _ = self._construct_axes_from_arguments((index, columns), {}) + else: + raise ValueError( + "Need to specify at least one of 'labels', 'index' or 'columns'" + ) + + obj = self + + for axis, labels in axes.items(): + if labels is not None: + obj = obj._drop_axis(labels, axis, level=level, errors=errors) + + if inplace: + self._update_inplace(obj) + else: + return obj + + @final + def _drop_axis( + self: NDFrameT, + labels, + axis, + level=None, + errors: str = "raise", + consolidate: bool_t = True, + only_slice: bool_t = False, + ) -> NDFrameT: + """ + Drop labels from specified axis. Used in the ``drop`` method + internally. + + Parameters + ---------- + labels : single label or list-like + axis : int or axis name + level : int or level name, default None + For MultiIndex + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and existing labels are dropped. + consolidate : bool, default True + Whether to call consolidate_inplace in the reindex_indexer call. + only_slice : bool, default False + Whether indexing along columns should be view-only. + + """ + axis_num = self._get_axis_number(axis) + axis = self._get_axis(axis) + + if axis.is_unique: + if level is not None: + if not isinstance(axis, MultiIndex): + raise AssertionError("axis must be a MultiIndex") + new_axis = axis.drop(labels, level=level, errors=errors) + else: + new_axis = axis.drop(labels, errors=errors) + indexer = axis.get_indexer(new_axis) + + # Case for non-unique axis + else: + is_tuple_labels = is_nested_list_like(labels) or isinstance(labels, tuple) + labels = ensure_object(com.index_labels_to_array(labels)) + if level is not None: + if not isinstance(axis, MultiIndex): + raise AssertionError("axis must be a MultiIndex") + mask = ~axis.get_level_values(level).isin(labels) + + # GH 18561 MultiIndex.drop should raise if label is absent + if errors == "raise" and mask.all(): + raise KeyError(f"{labels} not found in axis") + elif ( + isinstance(axis, MultiIndex) + and labels.dtype == "object" + and not is_tuple_labels + ): + # Set level to zero in case of MultiIndex and label is string, + # because isin can't handle strings for MultiIndexes GH#36293 + # In case of tuples we get dtype object but have to use isin GH#42771 + mask = ~axis.get_level_values(0).isin(labels) + else: + mask = ~axis.isin(labels) + # Check if label doesn't exist along axis + labels_missing = (axis.get_indexer_for(labels) == -1).any() + if errors == "raise" and labels_missing: + raise KeyError(f"{labels} not found in axis") + + indexer = mask.nonzero()[0] + new_axis = axis.take(indexer) + + bm_axis = self.ndim - axis_num - 1 + new_mgr = self._mgr.reindex_indexer( + new_axis, + indexer, + axis=bm_axis, + allow_dups=True, + consolidate=consolidate, + only_slice=only_slice, + ) + result = self._constructor(new_mgr) + if self.ndim == 1: + result.name = self.name + + return result.__finalize__(self) + + @final + def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: + """ + Replace self internals with result. + + Parameters + ---------- + result : same type as self + verify_is_copy : bool, default True + Provide is_copy checks. + """ + # NOTE: This does *not* call __finalize__ and that's an explicit + # decision that we may revisit in the future. + self._reset_cache() + self._clear_item_cache() + self._mgr = result._mgr + self._maybe_update_cacher(verify_is_copy=verify_is_copy) + + @final + def add_prefix(self: NDFrameT, prefix: str) -> NDFrameT: + """ + Prefix labels with string `prefix`. + + For Series, the row labels are prefixed. + For DataFrame, the column labels are prefixed. + + Parameters + ---------- + prefix : str + The string to add before each label. + + Returns + ------- + Series or DataFrame + New Series or DataFrame with updated labels. + + See Also + -------- + Series.add_suffix: Suffix row labels with string `suffix`. + DataFrame.add_suffix: Suffix column labels with string `suffix`. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + >>> s.add_prefix('item_') + item_0 1 + item_1 2 + item_2 3 + item_3 4 + dtype: int64 + + >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) + >>> df + A B + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + + >>> df.add_prefix('col_') + col_A col_B + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + """ + f = functools.partial("{prefix}{}".format, prefix=prefix) + + mapper = {self._info_axis_name: f} + # error: Incompatible return value type (got "Optional[NDFrameT]", + # expected "NDFrameT") + # error: Argument 1 to "rename" of "NDFrame" has incompatible type + # "**Dict[str, partial[str]]"; expected "Union[str, int, None]" + return self._rename(**mapper) # type: ignore[return-value, arg-type] + + @final + def add_suffix(self: NDFrameT, suffix: str) -> NDFrameT: + """ + Suffix labels with string `suffix`. + + For Series, the row labels are suffixed. + For DataFrame, the column labels are suffixed. + + Parameters + ---------- + suffix : str + The string to add after each label. + + Returns + ------- + Series or DataFrame + New Series or DataFrame with updated labels. + + See Also + -------- + Series.add_prefix: Prefix row labels with string `prefix`. + DataFrame.add_prefix: Prefix column labels with string `prefix`. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + >>> s.add_suffix('_item') + 0_item 1 + 1_item 2 + 2_item 3 + 3_item 4 + dtype: int64 + + >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) + >>> df + A B + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + + >>> df.add_suffix('_col') + A_col B_col + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + """ + f = functools.partial("{}{suffix}".format, suffix=suffix) + + mapper = {self._info_axis_name: f} + # error: Incompatible return value type (got "Optional[NDFrameT]", + # expected "NDFrameT") + # error: Argument 1 to "rename" of "NDFrame" has incompatible type + # "**Dict[str, partial[str]]"; expected "Union[str, int, None]" + return self._rename(**mapper) # type: ignore[return-value, arg-type] + + def sort_values( + self, + axis=0, + ascending=True, + inplace: bool_t = False, + kind: str = "quicksort", + na_position: str = "last", + ignore_index: bool_t = False, + key: ValueKeyFunc = None, + ): + """ + Sort by the values along either axis. + + Parameters + ----------%(optional_by)s + axis : %(axes_single_arg)s, default 0 + Axis to be sorted. + ascending : bool or list of bool, default True + Sort ascending vs. descending. Specify list for multiple sort + orders. If this is a list of bools, must match the length of + the by. + inplace : bool, default False + If True, perform operation in-place. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' + Choice of sorting algorithm. See also :func:`numpy.sort` for more + information. `mergesort` and `stable` are the only stable algorithms. For + DataFrames, this option is only applied when sorting on a single + column or label. + na_position : {'first', 'last'}, default 'last' + Puts NaNs at the beginning if `first`; `last` puts NaNs at the + end. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + key : callable, optional + Apply the key function to the values + before sorting. This is similar to the `key` argument in the + builtin :meth:`sorted` function, with the notable difference that + this `key` function should be *vectorized*. It should expect a + ``Series`` and return a Series with the same shape as the input. + It will be applied to each column in `by` independently. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame or None + DataFrame with sorted values or None if ``inplace=True``. + + See Also + -------- + DataFrame.sort_index : Sort a DataFrame by the index. + Series.sort_values : Similar method for a Series. + + Examples + -------- + >>> df = pd.DataFrame({ + ... 'col1': ['A', 'A', 'B', np.nan, 'D', 'C'], + ... 'col2': [2, 1, 9, 8, 7, 4], + ... 'col3': [0, 1, 9, 4, 2, 3], + ... 'col4': ['a', 'B', 'c', 'D', 'e', 'F'] + ... }) + >>> df + col1 col2 col3 col4 + 0 A 2 0 a + 1 A 1 1 B + 2 B 9 9 c + 3 NaN 8 4 D + 4 D 7 2 e + 5 C 4 3 F + + Sort by col1 + + >>> df.sort_values(by=['col1']) + col1 col2 col3 col4 + 0 A 2 0 a + 1 A 1 1 B + 2 B 9 9 c + 5 C 4 3 F + 4 D 7 2 e + 3 NaN 8 4 D + + Sort by multiple columns + + >>> df.sort_values(by=['col1', 'col2']) + col1 col2 col3 col4 + 1 A 1 1 B + 0 A 2 0 a + 2 B 9 9 c + 5 C 4 3 F + 4 D 7 2 e + 3 NaN 8 4 D + + Sort Descending + + >>> df.sort_values(by='col1', ascending=False) + col1 col2 col3 col4 + 4 D 7 2 e + 5 C 4 3 F + 2 B 9 9 c + 0 A 2 0 a + 1 A 1 1 B + 3 NaN 8 4 D + + Putting NAs first + + >>> df.sort_values(by='col1', ascending=False, na_position='first') + col1 col2 col3 col4 + 3 NaN 8 4 D + 4 D 7 2 e + 5 C 4 3 F + 2 B 9 9 c + 0 A 2 0 a + 1 A 1 1 B + + Sorting with a key function + + >>> df.sort_values(by='col4', key=lambda col: col.str.lower()) + col1 col2 col3 col4 + 0 A 2 0 a + 1 A 1 1 B + 2 B 9 9 c + 3 NaN 8 4 D + 4 D 7 2 e + 5 C 4 3 F + + Natural sort with the key argument, + using the `natsort ` package. + + >>> df = pd.DataFrame({ + ... "time": ['0hr', '128hr', '72hr', '48hr', '96hr'], + ... "value": [10, 20, 30, 40, 50] + ... }) + >>> df + time value + 0 0hr 10 + 1 128hr 20 + 2 72hr 30 + 3 48hr 40 + 4 96hr 50 + >>> from natsort import index_natsorted + >>> df.sort_values( + ... by="time", + ... key=lambda x: np.argsort(index_natsorted(df["time"])) + ... ) + time value + 0 0hr 10 + 3 48hr 40 + 2 72hr 30 + 4 96hr 50 + 1 128hr 20 + """ + raise AbstractMethodError(self) + + def sort_index( + self, + axis=0, + level=None, + ascending: bool_t | int | Sequence[bool_t | int] = True, + inplace: bool_t = False, + kind: str = "quicksort", + na_position: str = "last", + sort_remaining: bool_t = True, + ignore_index: bool_t = False, + key: IndexKeyFunc = None, + ): + + inplace = validate_bool_kwarg(inplace, "inplace") + axis = self._get_axis_number(axis) + ascending = validate_ascending(ascending) + + target = self._get_axis(axis) + + indexer = get_indexer_indexer( + target, level, ascending, kind, na_position, sort_remaining, key + ) + + if indexer is None: + if inplace: + result = self + else: + result = self.copy() + + if ignore_index: + result.index = default_index(len(self)) + if inplace: + return + else: + return result + + baxis = self._get_block_manager_axis(axis) + new_data = self._mgr.take(indexer, axis=baxis, verify=False) + + # reconstruct axis if needed + new_data.set_axis(baxis, new_data.axes[baxis]._sort_levels_monotonic()) + + if ignore_index: + axis = 1 if isinstance(self, ABCDataFrame) else 0 + new_data.set_axis(axis, default_index(len(indexer))) + + result = self._constructor(new_data) + + if inplace: + return self._update_inplace(result) + else: + return result.__finalize__(self, method="sort_index") + + @doc( + klass=_shared_doc_kwargs["klass"], + axes=_shared_doc_kwargs["axes"], + optional_labels="", + optional_axis="", + ) + def reindex(self: NDFrameT, *args, **kwargs) -> NDFrameT: + """ + Conform {klass} to new index with optional filling logic. + + Places NA/NaN in locations having no value in the previous index. A new object + is produced unless the new index is equivalent to the current one and + ``copy=False``. + + Parameters + ---------- + {optional_labels} + {axes} : array-like, optional + New labels / index to conform to, should be specified using + keywords. Preferably an Index object to avoid duplicating data. + {optional_axis} + method : {{None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}} + Method to use for filling holes in reindexed DataFrame. + Please note: this is only applicable to DataFrames/Series with a + monotonically increasing/decreasing index. + + * None (default): don't fill gaps + * pad / ffill: Propagate last valid observation forward to next + valid. + * backfill / bfill: Use next valid observation to fill gap. + * nearest: Use nearest valid observations to fill gap. + + copy : bool, default True + Return a new object, even if the passed indexes are the same. + level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : scalar, default np.NaN + Value to use for missing values. Defaults to NaN, but can be any + "compatible" value. + limit : int, default None + Maximum number of consecutive elements to forward or backward fill. + tolerance : optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations most + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + + Returns + ------- + {klass} with changed index. + + See Also + -------- + DataFrame.set_index : Set row labels. + DataFrame.reset_index : Remove row labels or move them to new columns. + DataFrame.reindex_like : Change to same indices as other DataFrame. + + Examples + -------- + ``DataFrame.reindex`` supports two calling conventions + + * ``(index=index_labels, columns=column_labels, ...)`` + * ``(labels, axis={{'index', 'columns'}}, ...)`` + + We *highly* recommend using keyword arguments to clarify your + intent. + + Create a dataframe with some fictional data. + + >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror'] + >>> df = pd.DataFrame({{'http_status': [200, 200, 404, 404, 301], + ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}}, + ... index=index) + >>> df + http_status response_time + Firefox 200 0.04 + Chrome 200 0.02 + Safari 404 0.07 + IE10 404 0.08 + Konqueror 301 1.00 + + Create a new index and reindex the dataframe. By default + values in the new index that do not have corresponding + records in the dataframe are assigned ``NaN``. + + >>> new_index = ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10', + ... 'Chrome'] + >>> df.reindex(new_index) + http_status response_time + Safari 404.0 0.07 + Iceweasel NaN NaN + Comodo Dragon NaN NaN + IE10 404.0 0.08 + Chrome 200.0 0.02 + + We can fill in the missing values by passing a value to + the keyword ``fill_value``. Because the index is not monotonically + increasing or decreasing, we cannot use arguments to the keyword + ``method`` to fill the ``NaN`` values. + + >>> df.reindex(new_index, fill_value=0) + http_status response_time + Safari 404 0.07 + Iceweasel 0 0.00 + Comodo Dragon 0 0.00 + IE10 404 0.08 + Chrome 200 0.02 + + >>> df.reindex(new_index, fill_value='missing') + http_status response_time + Safari 404 0.07 + Iceweasel missing missing + Comodo Dragon missing missing + IE10 404 0.08 + Chrome 200 0.02 + + We can also reindex the columns. + + >>> df.reindex(columns=['http_status', 'user_agent']) + http_status user_agent + Firefox 200 NaN + Chrome 200 NaN + Safari 404 NaN + IE10 404 NaN + Konqueror 301 NaN + + Or we can use "axis-style" keyword arguments + + >>> df.reindex(['http_status', 'user_agent'], axis="columns") + http_status user_agent + Firefox 200 NaN + Chrome 200 NaN + Safari 404 NaN + IE10 404 NaN + Konqueror 301 NaN + + To further illustrate the filling functionality in + ``reindex``, we will create a dataframe with a + monotonically increasing index (for example, a sequence + of dates). + + >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D') + >>> df2 = pd.DataFrame({{"prices": [100, 101, np.nan, 100, 89, 88]}}, + ... index=date_index) + >>> df2 + prices + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + + Suppose we decide to expand the dataframe to cover a wider + date range. + + >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D') + >>> df2.reindex(date_index2) + prices + 2009-12-29 NaN + 2009-12-30 NaN + 2009-12-31 NaN + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + 2010-01-07 NaN + + The index entries that did not have a value in the original data frame + (for example, '2009-12-29') are by default filled with ``NaN``. + If desired, we can fill in the missing values using one of several + options. + + For example, to back-propagate the last valid value to fill the ``NaN`` + values, pass ``bfill`` as an argument to the ``method`` keyword. + + >>> df2.reindex(date_index2, method='bfill') + prices + 2009-12-29 100.0 + 2009-12-30 100.0 + 2009-12-31 100.0 + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + 2010-01-07 NaN + + Please note that the ``NaN`` value present in the original dataframe + (at index value 2010-01-03) will not be filled by any of the + value propagation schemes. This is because filling while reindexing + does not look at dataframe values, but only compares the original and + desired indexes. If you do want to fill in the ``NaN`` values present + in the original dataframe, use the ``fillna()`` method. + + See the :ref:`user guide ` for more. + """ + # TODO: Decide if we care about having different examples for different + # kinds + + # construct the args + axes, kwargs = self._construct_axes_from_arguments(args, kwargs) + method = missing.clean_reindex_fill_method(kwargs.pop("method", None)) + level = kwargs.pop("level", None) + copy = kwargs.pop("copy", True) + limit = kwargs.pop("limit", None) + tolerance = kwargs.pop("tolerance", None) + fill_value = kwargs.pop("fill_value", None) + + # Series.reindex doesn't use / need the axis kwarg + # We pop and ignore it here, to make writing Series/Frame generic code + # easier + kwargs.pop("axis", None) + + if kwargs: + raise TypeError( + "reindex() got an unexpected keyword " + f'argument "{list(kwargs.keys())[0]}"' + ) + + self._consolidate_inplace() + + # if all axes that are requested to reindex are equal, then only copy + # if indicated must have index names equal here as well as values + if all( + self._get_axis(axis).identical(ax) + for axis, ax in axes.items() + if ax is not None + ): + if copy: + return self.copy() + return self + + # check if we are a multi reindex + if self._needs_reindex_multi(axes, method, level): + return self._reindex_multi(axes, copy, fill_value) + + # perform the reindex on the axes + return self._reindex_axes( + axes, level, limit, tolerance, method, fill_value, copy + ).__finalize__(self, method="reindex") + + def _reindex_axes( + self: NDFrameT, axes, level, limit, tolerance, method, fill_value, copy + ) -> NDFrameT: + """Perform the reindex for all the axes.""" + obj = self + for a in self._AXIS_ORDERS: + labels = axes[a] + if labels is None: + continue + + ax = self._get_axis(a) + new_index, indexer = ax.reindex( + labels, level=level, limit=limit, tolerance=tolerance, method=method + ) + + axis = self._get_axis_number(a) + obj = obj._reindex_with_indexers( + {axis: [new_index, indexer]}, + fill_value=fill_value, + copy=copy, + allow_dups=False, + ) + # If we've made a copy once, no need to make another one + copy = False + + return obj + + def _needs_reindex_multi(self, axes, method, level) -> bool_t: + """Check if we do need a multi reindex.""" + return ( + (com.count_not_none(*axes.values()) == self._AXIS_LEN) + and method is None + and level is None + and not self._is_mixed_type + ) + + def _reindex_multi(self, axes, copy, fill_value): + raise AbstractMethodError(self) + + @final + def _reindex_with_indexers( + self: NDFrameT, + reindexers, + fill_value=None, + copy: bool_t = False, + allow_dups: bool_t = False, + ) -> NDFrameT: + """allow_dups indicates an internal call here""" + # reindex doing multiple operations on different axes if indicated + new_data = self._mgr + for axis in sorted(reindexers.keys()): + index, indexer = reindexers[axis] + baxis = self._get_block_manager_axis(axis) + + if index is None: + continue + + index = ensure_index(index) + if indexer is not None: + indexer = ensure_platform_int(indexer) + + # TODO: speed up on homogeneous DataFrame objects (see _reindex_multi) + new_data = new_data.reindex_indexer( + index, + indexer, + axis=baxis, + fill_value=fill_value, + allow_dups=allow_dups, + copy=copy, + ) + # If we've made a copy once, no need to make another one + copy = False + + if copy and new_data is self._mgr: + new_data = new_data.copy() + + return self._constructor(new_data).__finalize__(self) + + def filter( + self: NDFrameT, + items=None, + like: str | None = None, + regex: str | None = None, + axis=None, + ) -> NDFrameT: + """ + Subset the dataframe rows or columns according to the specified index labels. + + Note that this routine does not filter a dataframe on its + contents. The filter is applied to the labels of the index. + + Parameters + ---------- + items : list-like + Keep labels from axis which are in items. + like : str + Keep labels from axis for which "like in label == True". + regex : str (regular expression) + Keep labels from axis for which re.search(regex, label) == True. + axis : {0 or ‘index’, 1 or ‘columns’, None}, default None + The axis to filter on, expressed either as an index (int) + or axis name (str). By default this is the info axis, + 'index' for Series, 'columns' for DataFrame. + + Returns + ------- + same type as input object + + See Also + -------- + DataFrame.loc : Access a group of rows and columns + by label(s) or a boolean array. + + Notes + ----- + The ``items``, ``like``, and ``regex`` parameters are + enforced to be mutually exclusive. + + ``axis`` defaults to the info axis that is used when indexing + with ``[]``. + + Examples + -------- + >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])), + ... index=['mouse', 'rabbit'], + ... columns=['one', 'two', 'three']) + >>> df + one two three + mouse 1 2 3 + rabbit 4 5 6 + + >>> # select columns by name + >>> df.filter(items=['one', 'three']) + one three + mouse 1 3 + rabbit 4 6 + + >>> # select columns by regular expression + >>> df.filter(regex='e$', axis=1) + one three + mouse 1 3 + rabbit 4 6 + + >>> # select rows containing 'bbi' + >>> df.filter(like='bbi', axis=0) + one two three + rabbit 4 5 6 + """ + nkw = com.count_not_none(items, like, regex) + if nkw > 1: + raise TypeError( + "Keyword arguments `items`, `like`, or `regex` " + "are mutually exclusive" + ) + + if axis is None: + axis = self._info_axis_name + labels = self._get_axis(axis) + + if items is not None: + name = self._get_axis_name(axis) + return self.reindex(**{name: [r for r in items if r in labels]}) + elif like: + + def f(x) -> bool_t: + assert like is not None # needed for mypy + return like in ensure_str(x) + + values = labels.map(f) + return self.loc(axis=axis)[values] + elif regex: + + def f(x) -> bool_t: + return matcher.search(ensure_str(x)) is not None + + matcher = re.compile(regex) + values = labels.map(f) + return self.loc(axis=axis)[values] + else: + raise TypeError("Must pass either `items`, `like`, or `regex`") + + @final + def head(self: NDFrameT, n: int = 5) -> NDFrameT: + """ + Return the first `n` rows. + + This function returns the first `n` rows for the object based + on position. It is useful for quickly testing if your object + has the right type of data in it. + + For negative values of `n`, this function returns all rows except + the last `n` rows, equivalent to ``df[:-n]``. + + Parameters + ---------- + n : int, default 5 + Number of rows to select. + + Returns + ------- + same type as caller + The first `n` rows of the caller object. + + See Also + -------- + DataFrame.tail: Returns the last `n` rows. + + Examples + -------- + >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', + ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) + >>> df + animal + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + 5 parrot + 6 shark + 7 whale + 8 zebra + + Viewing the first 5 lines + + >>> df.head() + animal + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + + Viewing the first `n` lines (three in this case) + + >>> df.head(3) + animal + 0 alligator + 1 bee + 2 falcon + + For negative values of `n` + + >>> df.head(-3) + animal + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + 5 parrot + """ + return self.iloc[:n] + + @final + def tail(self: NDFrameT, n: int = 5) -> NDFrameT: + """ + Return the last `n` rows. + + This function returns last `n` rows from the object based on + position. It is useful for quickly verifying data, for example, + after sorting or appending rows. + + For negative values of `n`, this function returns all rows except + the first `n` rows, equivalent to ``df[n:]``. + + Parameters + ---------- + n : int, default 5 + Number of rows to select. + + Returns + ------- + type of caller + The last `n` rows of the caller object. + + See Also + -------- + DataFrame.head : The first `n` rows of the caller object. + + Examples + -------- + >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', + ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) + >>> df + animal + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + 5 parrot + 6 shark + 7 whale + 8 zebra + + Viewing the last 5 lines + + >>> df.tail() + animal + 4 monkey + 5 parrot + 6 shark + 7 whale + 8 zebra + + Viewing the last `n` lines (three in this case) + + >>> df.tail(3) + animal + 6 shark + 7 whale + 8 zebra + + For negative values of `n` + + >>> df.tail(-3) + animal + 3 lion + 4 monkey + 5 parrot + 6 shark + 7 whale + 8 zebra + """ + if n == 0: + return self.iloc[0:0] + return self.iloc[-n:] + + @final + def sample( + self: NDFrameT, + n: int | None = None, + frac: float | None = None, + replace: bool_t = False, + weights=None, + random_state: RandomState | None = None, + axis: Axis | None = None, + ignore_index: bool_t = False, + ) -> NDFrameT: + """ + Return a random sample of items from an axis of object. + + You can use `random_state` for reproducibility. + + Parameters + ---------- + n : int, optional + Number of items from axis to return. Cannot be used with `frac`. + Default = 1 if `frac` = None. + frac : float, optional + Fraction of axis items to return. Cannot be used with `n`. + replace : bool, default False + Allow or disallow sampling of the same row more than once. + weights : str or ndarray-like, optional + Default 'None' results in equal probability weighting. + If passed a Series, will align with target object on index. Index + values in weights not found in sampled object will be ignored and + index values in sampled object not in weights will be assigned + weights of zero. + If called on a DataFrame, will accept the name of a column + when axis = 0. + Unless weights are a Series, weights must be same length as axis + being sampled. + If weights do not sum to 1, they will be normalized to sum to 1. + Missing values in the weights column will be treated as zero. + Infinite values not allowed. + random_state : int, array-like, BitGenerator, np.random.RandomState, np.random.Generator, optional + If int, array-like, or BitGenerator, seed for random number generator. + If np.random.RandomState or np.random.Generator, use as given. + + .. versionchanged:: 1.1.0 + + array-like and BitGenerator object now passed to np.random.RandomState() + as seed + + .. versionchanged:: 1.4.0 + + np.random.Generator objects now accepted + + axis : {0 or ‘index’, 1 or ‘columns’, None}, default None + Axis to sample. Accepts axis number or name. Default is stat axis + for given data type (0 for Series and DataFrames). + ignore_index : bool, default False + If True, the resulting index will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.3.0 + + Returns + ------- + Series or DataFrame + A new object of same type as caller containing `n` items randomly + sampled from the caller object. + + See Also + -------- + DataFrameGroupBy.sample: Generates random samples from each group of a + DataFrame object. + SeriesGroupBy.sample: Generates random samples from each group of a + Series object. + numpy.random.choice: Generates a random sample from a given 1-D numpy + array. + + Notes + ----- + If `frac` > 1, `replacement` should be set to `True`. + + Examples + -------- + >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0], + ... 'num_wings': [2, 0, 0, 0], + ... 'num_specimen_seen': [10, 2, 1, 8]}, + ... index=['falcon', 'dog', 'spider', 'fish']) + >>> df + num_legs num_wings num_specimen_seen + falcon 2 2 10 + dog 4 0 2 + spider 8 0 1 + fish 0 0 8 + + Extract 3 random elements from the ``Series`` ``df['num_legs']``: + Note that we use `random_state` to ensure the reproducibility of + the examples. + + >>> df['num_legs'].sample(n=3, random_state=1) + fish 0 + spider 8 + falcon 2 + Name: num_legs, dtype: int64 + + A random 50% sample of the ``DataFrame`` with replacement: + + >>> df.sample(frac=0.5, replace=True, random_state=1) + num_legs num_wings num_specimen_seen + dog 4 0 2 + fish 0 0 8 + + An upsample sample of the ``DataFrame`` with replacement: + Note that `replace` parameter has to be `True` for `frac` parameter > 1. + + >>> df.sample(frac=2, replace=True, random_state=1) + num_legs num_wings num_specimen_seen + dog 4 0 2 + fish 0 0 8 + falcon 2 2 10 + falcon 2 2 10 + fish 0 0 8 + dog 4 0 2 + fish 0 0 8 + dog 4 0 2 + + Using a DataFrame column as weights. Rows with larger value in the + `num_specimen_seen` column are more likely to be sampled. + + >>> df.sample(n=2, weights='num_specimen_seen', random_state=1) + num_legs num_wings num_specimen_seen + falcon 2 2 10 + fish 0 0 8 + """ # noqa:E501 + if axis is None: + axis = self._stat_axis_number + + axis = self._get_axis_number(axis) + obj_len = self.shape[axis] + + # Process random_state argument + rs = com.random_state(random_state) + + size = sample.process_sampling_size(n, frac, replace) + if size is None: + assert frac is not None + size = round(frac * obj_len) + + if weights is not None: + weights = sample.preprocess_weights(self, weights, axis) + + sampled_indices = sample.sample(obj_len, size, replace, weights, rs) + result = self.take(sampled_indices, axis=axis) + + if ignore_index: + result.index = default_index(len(result)) + + return result + + @final + @doc(klass=_shared_doc_kwargs["klass"]) + def pipe( + self, + func: Callable[..., T] | tuple[Callable[..., T], str], + *args, + **kwargs, + ) -> T: + r""" + Apply chainable functions that expect Series or DataFrames. + + Parameters + ---------- + func : function + Function to apply to the {klass}. + ``args``, and ``kwargs`` are passed into ``func``. + Alternatively a ``(callable, data_keyword)`` tuple where + ``data_keyword`` is a string indicating the keyword of + ``callable`` that expects the {klass}. + args : iterable, optional + Positional arguments passed into ``func``. + kwargs : mapping, optional + A dictionary of keyword arguments passed into ``func``. + + Returns + ------- + object : the return type of ``func``. + + See Also + -------- + DataFrame.apply : Apply a function along input axis of DataFrame. + DataFrame.applymap : Apply a function elementwise on a whole DataFrame. + Series.map : Apply a mapping correspondence on a + :class:`~pandas.Series`. + + Notes + ----- + Use ``.pipe`` when chaining together functions that expect + Series, DataFrames or GroupBy objects. Instead of writing + + >>> func(g(h(df), arg1=a), arg2=b, arg3=c) # doctest: +SKIP + + You can write + + >>> (df.pipe(h) + ... .pipe(g, arg1=a) + ... .pipe(func, arg2=b, arg3=c) + ... ) # doctest: +SKIP + + If you have a function that takes the data as (say) the second + argument, pass a tuple indicating which keyword expects the + data. For example, suppose ``f`` takes its data as ``arg2``: + + >>> (df.pipe(h) + ... .pipe(g, arg1=a) + ... .pipe((func, 'arg2'), arg1=a, arg3=c) + ... ) # doctest: +SKIP + """ + return com.pipe(self, func, *args, **kwargs) + + # ---------------------------------------------------------------------- + # Attribute access + + @final + def __finalize__( + self: NDFrameT, other, method: str | None = None, **kwargs + ) -> NDFrameT: + """ + Propagate metadata from other to self. + + Parameters + ---------- + other : the object from which to get the attributes that we are going + to propagate + method : str, optional + A passed method name providing context on where ``__finalize__`` + was called. + + .. warning:: + + The value passed as `method` are not currently considered + stable across pandas releases. + """ + if isinstance(other, NDFrame): + for name in other.attrs: + self.attrs[name] = other.attrs[name] + + self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels + # For subclasses using _metadata. + for name in set(self._metadata) & set(other._metadata): + assert isinstance(name, str) + object.__setattr__(self, name, getattr(other, name, None)) + + if method == "concat": + attrs = other.objs[0].attrs + check_attrs = all(objs.attrs == attrs for objs in other.objs[1:]) + if check_attrs: + for name in attrs: + self.attrs[name] = attrs[name] + + allows_duplicate_labels = all( + x.flags.allows_duplicate_labels for x in other.objs + ) + self.flags.allows_duplicate_labels = allows_duplicate_labels + + return self + + def __getattr__(self, name: str): + """ + After regular attribute access, try looking up the name + This allows simpler access to columns for interactive use. + """ + # Note: obj.x will always call obj.__getattribute__('x') prior to + # calling obj.__getattr__('x'). + if ( + name not in self._internal_names_set + and name not in self._metadata + and name not in self._accessors + and self._info_axis._can_hold_identifiers_and_holds_name(name) + ): + return self[name] + return object.__getattribute__(self, name) + + def __setattr__(self, name: str, value) -> None: + """ + After regular attribute access, try setting the name + This allows simpler access to columns for interactive use. + """ + # first try regular attribute access via __getattribute__, so that + # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify + # the same attribute. + + try: + object.__getattribute__(self, name) + return object.__setattr__(self, name, value) + except AttributeError: + pass + + # if this fails, go on to more involved attribute setting + # (note that this matches __getattr__, above). + if name in self._internal_names_set: + object.__setattr__(self, name, value) + elif name in self._metadata: + object.__setattr__(self, name, value) + else: + try: + existing = getattr(self, name) + if isinstance(existing, Index): + object.__setattr__(self, name, value) + elif name in self._info_axis: + self[name] = value + else: + object.__setattr__(self, name, value) + except (AttributeError, TypeError): + if isinstance(self, ABCDataFrame) and (is_list_like(value)): + warnings.warn( + "Pandas doesn't allow columns to be " + "created via a new attribute name - see " + "https://pandas.pydata.org/pandas-docs/" + "stable/indexing.html#attribute-access", + stacklevel=find_stack_level(), + ) + object.__setattr__(self, name, value) + + @final + def _dir_additions(self) -> set[str]: + """ + add the string-like attributes from the info_axis. + If info_axis is a MultiIndex, its first level values are used. + """ + additions = super()._dir_additions() + if self._info_axis._can_hold_strings: + additions.update(self._info_axis._dir_additions_for_owner) + return additions + + # ---------------------------------------------------------------------- + # Consolidation of internals + + @final + def _protect_consolidate(self, f): + """ + Consolidate _mgr -- if the blocks have changed, then clear the + cache + """ + if isinstance(self._mgr, (ArrayManager, SingleArrayManager)): + return f() + blocks_before = len(self._mgr.blocks) + result = f() + if len(self._mgr.blocks) != blocks_before: + self._clear_item_cache() + return result + + @final + def _consolidate_inplace(self) -> None: + """Consolidate data in place and return None""" + + def f(): + self._mgr = self._mgr.consolidate() + + self._protect_consolidate(f) + + @final + def _consolidate(self): + """ + Compute NDFrame with "consolidated" internals (data of each dtype + grouped together in a single ndarray). + + Returns + ------- + consolidated : same type as caller + """ + f = lambda: self._mgr.consolidate() + cons_data = self._protect_consolidate(f) + return self._constructor(cons_data).__finalize__(self) + + @final + @property + def _is_mixed_type(self) -> bool_t: + if self._mgr.is_single_block: + return False + + if self._mgr.any_extension_types: + # Even if they have the same dtype, we can't consolidate them, + # so we pretend this is "mixed'" + return True + + return self.dtypes.nunique() > 1 + + @final + def _check_inplace_setting(self, value) -> bool_t: + """check whether we allow in-place setting with this type of value""" + if self._is_mixed_type and not self._mgr.is_numeric_mixed_type: + + # allow an actual np.nan thru + if is_float(value) and np.isnan(value): + return True + + raise TypeError( + "Cannot do inplace boolean setting on " + "mixed-types with a non np.nan value" + ) + + return True + + @final + def _get_numeric_data(self): + return self._constructor(self._mgr.get_numeric_data()).__finalize__(self) + + @final + def _get_bool_data(self): + return self._constructor(self._mgr.get_bool_data()).__finalize__(self) + + # ---------------------------------------------------------------------- + # Internal Interface Methods + + @property + def values(self) -> np.ndarray: + raise AbstractMethodError(self) + + @property + def _values(self) -> np.ndarray: + """internal implementation""" + raise AbstractMethodError(self) + + @property + def dtypes(self): + """ + Return the dtypes in the DataFrame. + + This returns a Series with the data type of each column. + The result's index is the original DataFrame's columns. Columns + with mixed types are stored with the ``object`` dtype. See + :ref:`the User Guide ` for more. + + Returns + ------- + pandas.Series + The data type of each column. + + Examples + -------- + >>> df = pd.DataFrame({'float': [1.0], + ... 'int': [1], + ... 'datetime': [pd.Timestamp('20180310')], + ... 'string': ['foo']}) + >>> df.dtypes + float float64 + int int64 + datetime datetime64[ns] + string object + dtype: object + """ + data = self._mgr.get_dtypes() + return self._constructor_sliced(data, index=self._info_axis, dtype=np.object_) + + def astype( + self: NDFrameT, dtype, copy: bool_t = True, errors: str = "raise" + ) -> NDFrameT: + """ + Cast a pandas object to a specified dtype ``dtype``. + + Parameters + ---------- + dtype : data type, or dict of column name -> data type + Use a numpy.dtype or Python type to cast entire pandas object to + the same type. Alternatively, use {col: dtype, ...}, where col is a + column label and dtype is a numpy.dtype or Python type to cast one + or more of the DataFrame's columns to column-specific types. + copy : bool, default True + Return a copy when ``copy=True`` (be very careful setting + ``copy=False`` as changes to values then may propagate to other + pandas objects). + errors : {'raise', 'ignore'}, default 'raise' + Control raising of exceptions on invalid data for provided dtype. + + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object. + + Returns + ------- + casted : same type as caller + + See Also + -------- + to_datetime : Convert argument to datetime. + to_timedelta : Convert argument to timedelta. + to_numeric : Convert argument to a numeric type. + numpy.ndarray.astype : Cast a numpy array to a specified type. + + Notes + ----- + .. deprecated:: 1.3.0 + + Using ``astype`` to convert from timezone-naive dtype to + timezone-aware dtype is deprecated and will raise in a + future version. Use :meth:`Series.dt.tz_localize` instead. + + Examples + -------- + Create a DataFrame: + + >>> d = {'col1': [1, 2], 'col2': [3, 4]} + >>> df = pd.DataFrame(data=d) + >>> df.dtypes + col1 int64 + col2 int64 + dtype: object + + Cast all columns to int32: + + >>> df.astype('int32').dtypes + col1 int32 + col2 int32 + dtype: object + + Cast col1 to int32 using a dictionary: + + >>> df.astype({'col1': 'int32'}).dtypes + col1 int32 + col2 int64 + dtype: object + + Create a series: + + >>> ser = pd.Series([1, 2], dtype='int32') + >>> ser + 0 1 + 1 2 + dtype: int32 + >>> ser.astype('int64') + 0 1 + 1 2 + dtype: int64 + + Convert to categorical type: + + >>> ser.astype('category') + 0 1 + 1 2 + dtype: category + Categories (2, int64): [1, 2] + + Convert to ordered categorical type with custom ordering: + + >>> from pandas.api.types import CategoricalDtype + >>> cat_dtype = CategoricalDtype( + ... categories=[2, 1], ordered=True) + >>> ser.astype(cat_dtype) + 0 1 + 1 2 + dtype: category + Categories (2, int64): [2 < 1] + + Note that using ``copy=False`` and changing data on a new + pandas object may propagate changes: + + >>> s1 = pd.Series([1, 2]) + >>> s2 = s1.astype('int64', copy=False) + >>> s2[0] = 10 + >>> s1 # note that s1[0] has changed too + 0 10 + 1 2 + dtype: int64 + + Create a series of dates: + + >>> ser_date = pd.Series(pd.date_range('20200101', periods=3)) + >>> ser_date + 0 2020-01-01 + 1 2020-01-02 + 2 2020-01-03 + dtype: datetime64[ns] + """ + if is_dict_like(dtype): + if self.ndim == 1: # i.e. Series + if len(dtype) > 1 or self.name not in dtype: + raise KeyError( + "Only the Series name can be used for " + "the key in Series dtype mappings." + ) + new_type = dtype[self.name] + return self.astype(new_type, copy, errors) + + for col_name in dtype.keys(): + if col_name not in self: + raise KeyError( + "Only a column name can be used for the " + "key in a dtype mappings argument. " + f"'{col_name}' not found in columns." + ) + + # GH#44417 cast to Series so we can use .iat below, which will be + # robust in case we + from pandas import Series + + dtype_ser = Series(dtype, dtype=object) + dtype_ser = dtype_ser.reindex(self.columns, fill_value=None, copy=False) + + results = [] + for i, (col_name, col) in enumerate(self.items()): + cdt = dtype_ser.iat[i] + if isna(cdt): + res_col = col.copy() if copy else col + else: + res_col = col.astype(dtype=cdt, copy=copy, errors=errors) + results.append(res_col) + + elif is_extension_array_dtype(dtype) and self.ndim > 1: + # GH 18099/22869: columnwise conversion to extension dtype + # GH 24704: use iloc to handle duplicate column names + # TODO(EA2D): special case not needed with 2D EAs + results = [ + self.iloc[:, i].astype(dtype, copy=copy) + for i in range(len(self.columns)) + ] + + else: + # else, only a single dtype is given + new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors) + return self._constructor(new_data).__finalize__(self, method="astype") + + # GH 33113: handle empty frame or series + if not results: + return self.copy() + + # GH 19920: retain column metadata after concat + result = concat(results, axis=1, copy=False) + result.columns = self.columns + result = result.__finalize__(self, method="astype") + # https://github.com/python/mypy/issues/8354 + return cast(NDFrameT, result) + + @final + def copy(self: NDFrameT, deep: bool_t = True) -> NDFrameT: + """ + Make a copy of this object's indices and data. + + When ``deep=True`` (default), a new object will be created with a + copy of the calling object's data and indices. Modifications to + the data or indices of the copy will not be reflected in the + original object (see notes below). + + When ``deep=False``, a new object will be created without copying + the calling object's data or index (only references to the data + and index are copied). Any changes to the data of the original + will be reflected in the shallow copy (and vice versa). + + Parameters + ---------- + deep : bool, default True + Make a deep copy, including a copy of the data and the indices. + With ``deep=False`` neither the indices nor the data are copied. + + Returns + ------- + copy : Series or DataFrame + Object type matches caller. + + Notes + ----- + When ``deep=True``, data is copied but actual Python objects + will not be copied recursively, only the reference to the object. + This is in contrast to `copy.deepcopy` in the Standard Library, + which recursively copies object data (see examples below). + + While ``Index`` objects are copied when ``deep=True``, the underlying + numpy array is not copied for performance reasons. Since ``Index`` is + immutable, the underlying data can be safely shared and a copy + is not needed. + + Examples + -------- + >>> s = pd.Series([1, 2], index=["a", "b"]) + >>> s + a 1 + b 2 + dtype: int64 + + >>> s_copy = s.copy() + >>> s_copy + a 1 + b 2 + dtype: int64 + + **Shallow copy versus default (deep) copy:** + + >>> s = pd.Series([1, 2], index=["a", "b"]) + >>> deep = s.copy() + >>> shallow = s.copy(deep=False) + + Shallow copy shares data and index with original. + + >>> s is shallow + False + >>> s.values is shallow.values and s.index is shallow.index + True + + Deep copy has own copy of data and index. + + >>> s is deep + False + >>> s.values is deep.values or s.index is deep.index + False + + Updates to the data shared by shallow copy and original is reflected + in both; deep copy remains unchanged. + + >>> s[0] = 3 + >>> shallow[1] = 4 + >>> s + a 3 + b 4 + dtype: int64 + >>> shallow + a 3 + b 4 + dtype: int64 + >>> deep + a 1 + b 2 + dtype: int64 + + Note that when copying an object containing Python objects, a deep copy + will copy the data, but will not do so recursively. Updating a nested + data object will be reflected in the deep copy. + + >>> s = pd.Series([[1, 2], [3, 4]]) + >>> deep = s.copy() + >>> s[0][0] = 10 + >>> s + 0 [10, 2] + 1 [3, 4] + dtype: object + >>> deep + 0 [10, 2] + 1 [3, 4] + dtype: object + """ + data = self._mgr.copy(deep=deep) + self._clear_item_cache() + return self._constructor(data).__finalize__(self, method="copy") + + @final + def __copy__(self: NDFrameT, deep: bool_t = True) -> NDFrameT: + return self.copy(deep=deep) + + @final + def __deepcopy__(self: NDFrameT, memo=None) -> NDFrameT: + """ + Parameters + ---------- + memo, default None + Standard signature. Unused + """ + return self.copy(deep=True) + + @final + def _convert( + self: NDFrameT, + datetime: bool_t = False, + numeric: bool_t = False, + timedelta: bool_t = False, + ) -> NDFrameT: + """ + Attempt to infer better dtype for object columns. + + Parameters + ---------- + datetime : bool, default False + If True, convert to date where possible. + numeric : bool, default False + If True, attempt to convert to numbers (including strings), with + unconvertible values becoming NaN. + timedelta : bool, default False + If True, convert to timedelta where possible. + + Returns + ------- + converted : same as input object + """ + validate_bool_kwarg(datetime, "datetime") + validate_bool_kwarg(numeric, "numeric") + validate_bool_kwarg(timedelta, "timedelta") + return self._constructor( + self._mgr.convert( + datetime=datetime, + numeric=numeric, + timedelta=timedelta, + copy=True, + ) + ).__finalize__(self) + + @final + def infer_objects(self: NDFrameT) -> NDFrameT: + """ + Attempt to infer better dtypes for object columns. + + Attempts soft conversion of object-dtyped + columns, leaving non-object and unconvertible + columns unchanged. The inference rules are the + same as during normal Series/DataFrame construction. + + Returns + ------- + converted : same type as input object + + See Also + -------- + to_datetime : Convert argument to datetime. + to_timedelta : Convert argument to timedelta. + to_numeric : Convert argument to numeric type. + convert_dtypes : Convert argument to best possible dtype. + + Examples + -------- + >>> df = pd.DataFrame({"A": ["a", 1, 2, 3]}) + >>> df = df.iloc[1:] + >>> df + A + 1 1 + 2 2 + 3 3 + + >>> df.dtypes + A object + dtype: object + + >>> df.infer_objects().dtypes + A int64 + dtype: object + """ + # numeric=False necessary to only soft convert; + # python objects will still be converted to + # native numpy numeric types + return self._constructor( + self._mgr.convert(datetime=True, numeric=False, timedelta=True, copy=True) + ).__finalize__(self, method="infer_objects") + + @final + def convert_dtypes( + self: NDFrameT, + infer_objects: bool_t = True, + convert_string: bool_t = True, + convert_integer: bool_t = True, + convert_boolean: bool_t = True, + convert_floating: bool_t = True, + ) -> NDFrameT: + """ + Convert columns to best possible dtypes using dtypes supporting ``pd.NA``. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + infer_objects : bool, default True + Whether object dtypes should be converted to the best possible types. + convert_string : bool, default True + Whether object dtypes should be converted to ``StringDtype()``. + convert_integer : bool, default True + Whether, if possible, conversion can be done to integer extension types. + convert_boolean : bool, defaults True + Whether object dtypes should be converted to ``BooleanDtypes()``. + convert_floating : bool, defaults True + Whether, if possible, conversion can be done to floating extension types. + If `convert_integer` is also True, preference will be give to integer + dtypes if the floats can be faithfully casted to integers. + + .. versionadded:: 1.2.0 + + Returns + ------- + Series or DataFrame + Copy of input object with new dtype. + + See Also + -------- + infer_objects : Infer dtypes of objects. + to_datetime : Convert argument to datetime. + to_timedelta : Convert argument to timedelta. + to_numeric : Convert argument to a numeric type. + + Notes + ----- + By default, ``convert_dtypes`` will attempt to convert a Series (or each + Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options + ``convert_string``, ``convert_integer``, ``convert_boolean`` and + ``convert_boolean``, it is possible to turn off individual conversions + to ``StringDtype``, the integer extension types, ``BooleanDtype`` + or floating extension types, respectively. + + For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference + rules as during normal Series/DataFrame construction. Then, if possible, + convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer + or floating extension type, otherwise leave as ``object``. + + If the dtype is integer, convert to an appropriate integer extension type. + + If the dtype is numeric, and consists of all integers, convert to an + appropriate integer extension type. Otherwise, convert to an + appropriate floating extension type. + + .. versionchanged:: 1.2 + Starting with pandas 1.2, this method also converts float columns + to the nullable floating extension type. + + In the future, as new dtypes are added that support ``pd.NA``, the results + of this method will change to support those new dtypes. + + Examples + -------- + >>> df = pd.DataFrame( + ... { + ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), + ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")), + ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")), + ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")), + ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")), + ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")), + ... } + ... ) + + Start with a DataFrame with default dtypes. + + >>> df + a b c d e f + 0 1 x True h 10.0 NaN + 1 2 y False i NaN 100.5 + 2 3 z NaN NaN 20.0 200.0 + + >>> df.dtypes + a int32 + b object + c object + d object + e float64 + f float64 + dtype: object + + Convert the DataFrame to use best possible dtypes. + + >>> dfn = df.convert_dtypes() + >>> dfn + a b c d e f + 0 1 x True h 10 + 1 2 y False i 100.5 + 2 3 z 20 200.0 + + >>> dfn.dtypes + a Int32 + b string + c boolean + d string + e Int64 + f Float64 + dtype: object + + Start with a Series of strings and missing data represented by ``np.nan``. + + >>> s = pd.Series(["a", "b", np.nan]) + >>> s + 0 a + 1 b + 2 NaN + dtype: object + + Obtain a Series with dtype ``StringDtype``. + + >>> s.convert_dtypes() + 0 a + 1 b + 2 + dtype: string + """ + if self.ndim == 1: + return self._convert_dtypes( + infer_objects, + convert_string, + convert_integer, + convert_boolean, + convert_floating, + ) + else: + results = [ + col._convert_dtypes( + infer_objects, + convert_string, + convert_integer, + convert_boolean, + convert_floating, + ) + for col_name, col in self.items() + ] + if len(results) > 0: + result = concat(results, axis=1, copy=False, keys=self.columns) + cons = cast(Type["DataFrame"], self._constructor) + result = cons(result) + result = result.__finalize__(self, method="convert_dtypes") + # https://github.com/python/mypy/issues/8354 + return cast(NDFrameT, result) + else: + return self.copy() + + # ---------------------------------------------------------------------- + # Filling NA's + + @doc(**_shared_doc_kwargs) + def fillna( + self: NDFrameT, + value=None, + method=None, + axis=None, + inplace: bool_t = False, + limit=None, + downcast=None, + ) -> NDFrameT | None: + """ + Fill NA/NaN values using the specified method. + + Parameters + ---------- + value : scalar, dict, Series, or DataFrame + Value to use to fill holes (e.g. 0), alternately a + dict/Series/DataFrame of values specifying which value to use for + each index (for a Series) or column (for a DataFrame). Values not + in the dict/Series/DataFrame will not be filled. This value cannot + be a list. + method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use next valid observation to fill gap. + axis : {axes_single_arg} + Axis along which to fill missing values. + inplace : bool, default False + If True, fill in-place. Note: this will modify any + other views on this object (e.g., a no-copy slice for a column in a + DataFrame). + limit : int, default None + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. Must be greater than 0 if not None. + downcast : dict, default is None + A dict of item->dtype of what to downcast if possible, + or the string 'infer' which will try to downcast to an appropriate + equal type (e.g. float64 to int64 if possible). + + Returns + ------- + {klass} or None + Object with missing values filled or None if ``inplace=True``. + + See Also + -------- + interpolate : Fill NaN values using interpolation. + reindex : Conform object to new index. + asfreq : Convert TimeSeries to specified frequency. + + Examples + -------- + >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], + ... [3, 4, np.nan, 1], + ... [np.nan, np.nan, np.nan, np.nan], + ... [np.nan, 3, np.nan, 4]], + ... columns=list("ABCD")) + >>> df + A B C D + 0 NaN 2.0 NaN 0.0 + 1 3.0 4.0 NaN 1.0 + 2 NaN NaN NaN NaN + 3 NaN 3.0 NaN 4.0 + + Replace all NaN elements with 0s. + + >>> df.fillna(0) + A B C D + 0 0.0 2.0 0.0 0.0 + 1 3.0 4.0 0.0 1.0 + 2 0.0 0.0 0.0 0.0 + 3 0.0 3.0 0.0 4.0 + + We can also propagate non-null values forward or backward. + + >>> df.fillna(method="ffill") + A B C D + 0 NaN 2.0 NaN 0.0 + 1 3.0 4.0 NaN 1.0 + 2 3.0 4.0 NaN 1.0 + 3 3.0 3.0 NaN 4.0 + + Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1, + 2, and 3 respectively. + + >>> values = {{"A": 0, "B": 1, "C": 2, "D": 3}} + >>> df.fillna(value=values) + A B C D + 0 0.0 2.0 2.0 0.0 + 1 3.0 4.0 2.0 1.0 + 2 0.0 1.0 2.0 3.0 + 3 0.0 3.0 2.0 4.0 + + Only replace the first NaN element. + + >>> df.fillna(value=values, limit=1) + A B C D + 0 0.0 2.0 2.0 0.0 + 1 3.0 4.0 NaN 1.0 + 2 NaN 1.0 NaN 3.0 + 3 NaN 3.0 NaN 4.0 + + When filling using a DataFrame, replacement happens along + the same column names and same indices + + >>> df2 = pd.DataFrame(np.zeros((4, 4)), columns=list("ABCE")) + >>> df.fillna(df2) + A B C D + 0 0.0 2.0 0.0 0.0 + 1 3.0 4.0 0.0 1.0 + 2 0.0 0.0 0.0 NaN + 3 0.0 3.0 0.0 4.0 + + Note that column D is not affected since it is not present in df2. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + value, method = validate_fillna_kwargs(value, method) + + self._consolidate_inplace() + + # set the default here, so functions examining the signaure + # can detect if something was set (e.g. in groupby) (GH9221) + if axis is None: + axis = 0 + axis = self._get_axis_number(axis) + + if value is None: + if not self._mgr.is_single_block and axis == 1: + if inplace: + raise NotImplementedError() + result = self.T.fillna(method=method, limit=limit).T + + return result + + new_data = self._mgr.interpolate( + method=method, + axis=axis, + limit=limit, + inplace=inplace, + coerce=True, + downcast=downcast, + ) + else: + if self.ndim == 1: + if isinstance(value, (dict, ABCSeries)): + if not len(value): + # test_fillna_nonscalar + if inplace: + return None + return self.copy() + value = create_series_with_explicit_dtype( + value, dtype_if_empty=object + ) + value = value.reindex(self.index, copy=False) + value = value._values + elif not is_list_like(value): + pass + else: + raise TypeError( + '"value" parameter must be a scalar, dict ' + "or Series, but you passed a " + f'"{type(value).__name__}"' + ) + + new_data = self._mgr.fillna( + value=value, limit=limit, inplace=inplace, downcast=downcast + ) + + elif isinstance(value, (dict, ABCSeries)): + if axis == 1: + raise NotImplementedError( + "Currently only can fill " + "with dict/Series column " + "by column" + ) + + result = self if inplace else self.copy() + is_dict = isinstance(downcast, dict) + for k, v in value.items(): + if k not in result: + continue + downcast_k = downcast if not is_dict else downcast.get(k) + result[k] = result[k].fillna(v, limit=limit, downcast=downcast_k) + return result if not inplace else None + + elif not is_list_like(value): + if not self._mgr.is_single_block and axis == 1: + + result = self.T.fillna(value=value, limit=limit).T + + new_data = result + else: + + new_data = self._mgr.fillna( + value=value, limit=limit, inplace=inplace, downcast=downcast + ) + elif isinstance(value, ABCDataFrame) and self.ndim == 2: + + new_data = self.where(self.notna(), value)._mgr + else: + raise ValueError(f"invalid fill value with a {type(value)}") + + result = self._constructor(new_data) + if inplace: + return self._update_inplace(result) + else: + return result.__finalize__(self, method="fillna") + + @doc(klass=_shared_doc_kwargs["klass"]) + def ffill( + self: NDFrameT, + axis: None | Axis = None, + inplace: bool_t = False, + limit: None | int = None, + downcast=None, + ) -> NDFrameT | None: + """ + Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``. + + Returns + ------- + {klass} or None + Object with missing values filled or None if ``inplace=True``. + """ + return self.fillna( + method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast + ) + + pad = ffill + + @doc(klass=_shared_doc_kwargs["klass"]) + def bfill( + self: NDFrameT, + axis: None | Axis = None, + inplace: bool_t = False, + limit: None | int = None, + downcast=None, + ) -> NDFrameT | None: + """ + Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``. + + Returns + ------- + {klass} or None + Object with missing values filled or None if ``inplace=True``. + """ + return self.fillna( + method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast + ) + + backfill = bfill + + @doc( + _shared_docs["replace"], + klass=_shared_doc_kwargs["klass"], + inplace=_shared_doc_kwargs["inplace"], + replace_iloc=_shared_doc_kwargs["replace_iloc"], + ) + def replace( + self, + to_replace=None, + value=lib.no_default, + inplace: bool_t = False, + limit: int | None = None, + regex=False, + method=lib.no_default, + ): + if not ( + is_scalar(to_replace) + or is_re_compilable(to_replace) + or is_list_like(to_replace) + ): + raise TypeError( + "Expecting 'to_replace' to be either a scalar, array-like, " + "dict or None, got invalid type " + f"{repr(type(to_replace).__name__)}" + ) + + inplace = validate_bool_kwarg(inplace, "inplace") + if not is_bool(regex) and to_replace is not None: + raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") + + self._consolidate_inplace() + + if value is lib.no_default or method is not lib.no_default: + # GH#36984 if the user explicitly passes value=None we want to + # respect that. We have the corner case where the user explicitly + # passes value=None *and* a method, which we interpret as meaning + # they want the (documented) default behavior. + if method is lib.no_default: + # TODO: get this to show up as the default in the docs? + method = "pad" + + # passing a single value that is scalar like + # when value is None (GH5319), for compat + if not is_dict_like(to_replace) and not is_dict_like(regex): + to_replace = [to_replace] + + if isinstance(to_replace, (tuple, list)): + if isinstance(self, ABCDataFrame): + result = self.apply( + self._constructor_sliced._replace_single, + args=(to_replace, method, inplace, limit), + ) + if inplace: + return + return result + self = cast("Series", self) + return self._replace_single(to_replace, method, inplace, limit) + + if not is_dict_like(to_replace): + if not is_dict_like(regex): + raise TypeError( + 'If "to_replace" and "value" are both None ' + 'and "to_replace" is not a list, then ' + "regex must be a mapping" + ) + to_replace = regex + regex = True + + items = list(to_replace.items()) + if items: + keys, values = zip(*items) + else: + keys, values = ([], []) + + are_mappings = [is_dict_like(v) for v in values] + + if any(are_mappings): + if not all(are_mappings): + raise TypeError( + "If a nested mapping is passed, all values " + "of the top level mapping must be mappings" + ) + # passed a nested dict/Series + to_rep_dict = {} + value_dict = {} + + for k, v in items: + keys, values = list(zip(*v.items())) or ([], []) + + to_rep_dict[k] = list(keys) + value_dict[k] = list(values) + + to_replace, value = to_rep_dict, value_dict + else: + to_replace, value = keys, values + + return self.replace( + to_replace, value, inplace=inplace, limit=limit, regex=regex + ) + else: + + # need a non-zero len on all axes + if not self.size: + if inplace: + return + return self.copy() + + if is_dict_like(to_replace): + if is_dict_like(value): # {'A' : NA} -> {'A' : 0} + # Note: Checking below for `in foo.keys()` instead of + # `in foo` is needed for when we have a Series and not dict + mapping = { + col: (to_replace[col], value[col]) + for col in to_replace.keys() + if col in value.keys() and col in self + } + return self._replace_columnwise(mapping, inplace, regex) + + # {'A': NA} -> 0 + elif not is_list_like(value): + # Operate column-wise + if self.ndim == 1: + raise ValueError( + "Series.replace cannot use dict-like to_replace " + "and non-None value" + ) + mapping = { + col: (to_rep, value) for col, to_rep in to_replace.items() + } + return self._replace_columnwise(mapping, inplace, regex) + else: + raise TypeError("value argument must be scalar, dict, or Series") + + elif is_list_like(to_replace): + if not is_list_like(value): + # e.g. to_replace = [NA, ''] and value is 0, + # so we replace NA with 0 and then replace '' with 0 + value = [value] * len(to_replace) + + # e.g. we have to_replace = [NA, ''] and value = [0, 'missing'] + if len(to_replace) != len(value): + raise ValueError( + f"Replacement lists must match in length. " + f"Expecting {len(to_replace)} got {len(value)} " + ) + new_data = self._mgr.replace_list( + src_list=to_replace, + dest_list=value, + inplace=inplace, + regex=regex, + ) + + elif to_replace is None: + if not ( + is_re_compilable(regex) + or is_list_like(regex) + or is_dict_like(regex) + ): + raise TypeError( + f"'regex' must be a string or a compiled regular expression " + f"or a list or dict of strings or regular expressions, " + f"you passed a {repr(type(regex).__name__)}" + ) + return self.replace( + regex, value, inplace=inplace, limit=limit, regex=True + ) + else: + + # dest iterable dict-like + if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} + # Operate column-wise + if self.ndim == 1: + raise ValueError( + "Series.replace cannot use dict-value and " + "non-None to_replace" + ) + mapping = {col: (to_replace, val) for col, val in value.items()} + return self._replace_columnwise(mapping, inplace, regex) + + elif not is_list_like(value): # NA -> 0 + regex = should_use_regex(regex, to_replace) + if regex: + new_data = self._mgr.replace_regex( + to_replace=to_replace, + value=value, + inplace=inplace, + ) + else: + new_data = self._mgr.replace( + to_replace=to_replace, value=value, inplace=inplace + ) + else: + raise TypeError( + f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}' + ) + + result = self._constructor(new_data) + if inplace: + return self._update_inplace(result) + else: + return result.__finalize__(self, method="replace") + + def interpolate( + self: NDFrameT, + method: str = "linear", + axis: Axis = 0, + limit: int | None = None, + inplace: bool_t = False, + limit_direction: str | None = None, + limit_area: str | None = None, + downcast: str | None = None, + **kwargs, + ) -> NDFrameT | None: + """ + Fill NaN values using an interpolation method. + + Please note that only ``method='linear'`` is supported for + DataFrame/Series with a MultiIndex. + + Parameters + ---------- + method : str, default 'linear' + Interpolation technique to use. One of: + + * 'linear': Ignore the index and treat the values as equally + spaced. This is the only method supported on MultiIndexes. + * 'time': Works on daily and higher resolution data to interpolate + given length of interval. + * 'index', 'values': use the actual numerical values of the index. + * 'pad': Fill in NaNs using existing values. + * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'spline', + 'barycentric', 'polynomial': Passed to + `scipy.interpolate.interp1d`. These methods use the numerical + values of the index. Both 'polynomial' and 'spline' require that + you also specify an `order` (int), e.g. + ``df.interpolate(method='polynomial', order=5)``. + * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima', + 'cubicspline': Wrappers around the SciPy interpolation methods of + similar names. See `Notes`. + * 'from_derivatives': Refers to + `scipy.interpolate.BPoly.from_derivatives` which + replaces 'piecewise_polynomial' interpolation method in + scipy 0.18. + + axis : {{0 or 'index', 1 or 'columns', None}}, default None + Axis to interpolate along. + limit : int, optional + Maximum number of consecutive NaNs to fill. Must be greater than + 0. + inplace : bool, default False + Update the data in place if possible. + limit_direction : {{'forward', 'backward', 'both'}}, Optional + Consecutive NaNs will be filled in this direction. + + If limit is specified: + * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'. + * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be + 'backwards'. + + If 'limit' is not specified: + * If 'method' is 'backfill' or 'bfill', the default is 'backward' + * else the default is 'forward' + + .. versionchanged:: 1.1.0 + raises ValueError if `limit_direction` is 'forward' or 'both' and + method is 'backfill' or 'bfill'. + raises ValueError if `limit_direction` is 'backward' or 'both' and + method is 'pad' or 'ffill'. + + limit_area : {{`None`, 'inside', 'outside'}}, default None + If limit is specified, consecutive NaNs will be filled with this + restriction. + + * ``None``: No fill restriction. + * 'inside': Only fill NaNs surrounded by valid values + (interpolate). + * 'outside': Only fill NaNs outside valid values (extrapolate). + + downcast : optional, 'infer' or None, defaults to None + Downcast dtypes if possible. + ``**kwargs`` : optional + Keyword arguments to pass on to the interpolating function. + + Returns + ------- + Series or DataFrame or None + Returns the same object type as the caller, interpolated at + some or all ``NaN`` values or None if ``inplace=True``. + + See Also + -------- + fillna : Fill missing values using different methods. + scipy.interpolate.Akima1DInterpolator : Piecewise cubic polynomials + (Akima interpolator). + scipy.interpolate.BPoly.from_derivatives : Piecewise polynomial in the + Bernstein basis. + scipy.interpolate.interp1d : Interpolate a 1-D function. + scipy.interpolate.KroghInterpolator : Interpolate polynomial (Krogh + interpolator). + scipy.interpolate.PchipInterpolator : PCHIP 1-d monotonic cubic + interpolation. + scipy.interpolate.CubicSpline : Cubic spline data interpolator. + + Notes + ----- + The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' + methods are wrappers around the respective SciPy implementations of + similar names. These use the actual numerical values of the index. + For more information on their behavior, see the + `SciPy documentation + `__ + and `SciPy tutorial + `__. + + Examples + -------- + Filling in ``NaN`` in a :class:`~pandas.Series` via linear + interpolation. + + >>> s = pd.Series([0, 1, np.nan, 3]) + >>> s + 0 0.0 + 1 1.0 + 2 NaN + 3 3.0 + dtype: float64 + >>> s.interpolate() + 0 0.0 + 1 1.0 + 2 2.0 + 3 3.0 + dtype: float64 + + Filling in ``NaN`` in a Series by padding, but filling at most two + consecutive ``NaN`` at a time. + + >>> s = pd.Series([np.nan, "single_one", np.nan, + ... "fill_two_more", np.nan, np.nan, np.nan, + ... 4.71, np.nan]) + >>> s + 0 NaN + 1 single_one + 2 NaN + 3 fill_two_more + 4 NaN + 5 NaN + 6 NaN + 7 4.71 + 8 NaN + dtype: object + >>> s.interpolate(method='pad', limit=2) + 0 NaN + 1 single_one + 2 single_one + 3 fill_two_more + 4 fill_two_more + 5 fill_two_more + 6 NaN + 7 4.71 + 8 4.71 + dtype: object + + Filling in ``NaN`` in a Series via polynomial interpolation or splines: + Both 'polynomial' and 'spline' methods require that you also specify + an ``order`` (int). + + >>> s = pd.Series([0, 2, np.nan, 8]) + >>> s.interpolate(method='polynomial', order=2) + 0 0.000000 + 1 2.000000 + 2 4.666667 + 3 8.000000 + dtype: float64 + + Fill the DataFrame forward (that is, going down) along each column + using linear interpolation. + + Note how the last entry in column 'a' is interpolated differently, + because there is no entry after it to use for interpolation. + Note how the first entry in column 'b' remains ``NaN``, because there + is no entry before it to use for interpolation. + + >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), + ... (np.nan, 2.0, np.nan, np.nan), + ... (2.0, 3.0, np.nan, 9.0), + ... (np.nan, 4.0, -4.0, 16.0)], + ... columns=list('abcd')) + >>> df + a b c d + 0 0.0 NaN -1.0 1.0 + 1 NaN 2.0 NaN NaN + 2 2.0 3.0 NaN 9.0 + 3 NaN 4.0 -4.0 16.0 + >>> df.interpolate(method='linear', limit_direction='forward', axis=0) + a b c d + 0 0.0 NaN -1.0 1.0 + 1 1.0 2.0 -2.0 5.0 + 2 2.0 3.0 -3.0 9.0 + 3 2.0 4.0 -4.0 16.0 + + Using polynomial interpolation. + + >>> df['d'].interpolate(method='polynomial', order=2) + 0 1.0 + 1 4.0 + 2 9.0 + 3 16.0 + Name: d, dtype: float64 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + axis = self._get_axis_number(axis) + + fillna_methods = ["ffill", "bfill", "pad", "backfill"] + should_transpose = axis == 1 and method not in fillna_methods + + obj = self.T if should_transpose else self + + if obj.empty: + return self.copy() + + if method not in fillna_methods: + axis = self._info_axis_number + + if isinstance(obj.index, MultiIndex) and method != "linear": + raise ValueError( + "Only `method=linear` interpolation is supported on MultiIndexes." + ) + + # Set `limit_direction` depending on `method` + if limit_direction is None: + limit_direction = ( + "backward" if method in ("backfill", "bfill") else "forward" + ) + else: + if method in ("pad", "ffill") and limit_direction != "forward": + raise ValueError( + f"`limit_direction` must be 'forward' for method `{method}`" + ) + if method in ("backfill", "bfill") and limit_direction != "backward": + raise ValueError( + f"`limit_direction` must be 'backward' for method `{method}`" + ) + + if obj.ndim == 2 and np.all(obj.dtypes == np.dtype("object")): + raise TypeError( + "Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype." + ) + + # create/use the index + if method == "linear": + # prior default + index = Index(np.arange(len(obj.index))) + else: + index = obj.index + methods = {"index", "values", "nearest", "time"} + is_numeric_or_datetime = ( + is_numeric_dtype(index.dtype) + or is_datetime64_any_dtype(index.dtype) + or is_timedelta64_dtype(index.dtype) + ) + if method not in methods and not is_numeric_or_datetime: + raise ValueError( + "Index column must be numeric or datetime type when " + f"using {method} method other than linear. " + "Try setting a numeric or datetime index column before " + "interpolating." + ) + + if isna(index).any(): + raise NotImplementedError( + "Interpolation with NaNs in the index " + "has not been implemented. Try filling " + "those NaNs before interpolating." + ) + new_data = obj._mgr.interpolate( + method=method, + axis=axis, + index=index, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + inplace=inplace, + downcast=downcast, + **kwargs, + ) + + result = self._constructor(new_data) + if should_transpose: + result = result.T + if inplace: + return self._update_inplace(result) + else: + return result.__finalize__(self, method="interpolate") + + # ---------------------------------------------------------------------- + # Timeseries methods Methods + + @final + def asof(self, where, subset=None): + """ + Return the last row(s) without any NaNs before `where`. + + The last row (for each element in `where`, if list) without any + NaN is taken. + In case of a :class:`~pandas.DataFrame`, the last row without NaN + considering only the subset of columns (if not `None`) + + If there is no good value, NaN is returned for a Series or + a Series of NaN values for a DataFrame + + Parameters + ---------- + where : date or array-like of dates + Date(s) before which the last row(s) are returned. + subset : str or array-like of str, default `None` + For DataFrame, if not `None`, only use these columns to + check for NaNs. + + Returns + ------- + scalar, Series, or DataFrame + + The return can be: + + * scalar : when `self` is a Series and `where` is a scalar + * Series: when `self` is a Series and `where` is an array-like, + or when `self` is a DataFrame and `where` is a scalar + * DataFrame : when `self` is a DataFrame and `where` is an + array-like + + Return scalar, Series, or DataFrame. + + See Also + -------- + merge_asof : Perform an asof merge. Similar to left join. + + Notes + ----- + Dates are assumed to be sorted. Raises if this is not the case. + + Examples + -------- + A Series and a scalar `where`. + + >>> s = pd.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40]) + >>> s + 10 1.0 + 20 2.0 + 30 NaN + 40 4.0 + dtype: float64 + + >>> s.asof(20) + 2.0 + + For a sequence `where`, a Series is returned. The first value is + NaN, because the first element of `where` is before the first + index value. + + >>> s.asof([5, 20]) + 5 NaN + 20 2.0 + dtype: float64 + + Missing values are not considered. The following is ``2.0``, not + NaN, even though NaN is at the index location for ``30``. + + >>> s.asof(30) + 2.0 + + Take all columns into consideration + + >>> df = pd.DataFrame({'a': [10, 20, 30, 40, 50], + ... 'b': [None, None, None, None, 500]}, + ... index=pd.DatetimeIndex(['2018-02-27 09:01:00', + ... '2018-02-27 09:02:00', + ... '2018-02-27 09:03:00', + ... '2018-02-27 09:04:00', + ... '2018-02-27 09:05:00'])) + >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', + ... '2018-02-27 09:04:30'])) + a b + 2018-02-27 09:03:30 NaN NaN + 2018-02-27 09:04:30 NaN NaN + + Take a single column into consideration + + >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', + ... '2018-02-27 09:04:30']), + ... subset=['a']) + a b + 2018-02-27 09:03:30 30.0 NaN + 2018-02-27 09:04:30 40.0 NaN + """ + if isinstance(where, str): + where = Timestamp(where) + + if not self.index.is_monotonic: + raise ValueError("asof requires a sorted index") + + is_series = isinstance(self, ABCSeries) + if is_series: + if subset is not None: + raise ValueError("subset is not valid for Series") + else: + if subset is None: + subset = self.columns + if not is_list_like(subset): + subset = [subset] + + is_list = is_list_like(where) + if not is_list: + start = self.index[0] + if isinstance(self.index, PeriodIndex): + where = Period(where, freq=self.index.freq) + + if where < start: + if not is_series: + return self._constructor_sliced( + index=self.columns, name=where, dtype=np.float64 + ) + return np.nan + + # It's always much faster to use a *while* loop here for + # Series than pre-computing all the NAs. However a + # *while* loop is extremely expensive for DataFrame + # so we later pre-compute all the NAs and use the same + # code path whether *where* is a scalar or list. + # See PR: https://github.com/pandas-dev/pandas/pull/14476 + if is_series: + loc = self.index.searchsorted(where, side="right") + if loc > 0: + loc -= 1 + + values = self._values + while loc > 0 and isna(values[loc]): + loc -= 1 + return values[loc] + + if not isinstance(where, Index): + where = Index(where) if is_list else Index([where]) + + nulls = self.isna() if is_series else self[subset].isna().any(1) + if nulls.all(): + if is_series: + self = cast("Series", self) + return self._constructor(np.nan, index=where, name=self.name) + elif is_list: + self = cast("DataFrame", self) + return self._constructor(np.nan, index=where, columns=self.columns) + else: + self = cast("DataFrame", self) + return self._constructor_sliced( + np.nan, index=self.columns, name=where[0] + ) + + locs = self.index.asof_locs(where, ~(nulls._values)) + + # mask the missing + missing = locs == -1 + data = self.take(locs) + data.index = where + data.loc[missing] = np.nan + return data if is_list else data.iloc[-1] + + # ---------------------------------------------------------------------- + # Action Methods + + @doc(klass=_shared_doc_kwargs["klass"]) + def isna(self: NDFrameT) -> NDFrameT: + """ + Detect missing values. + + Return a boolean same-sized object indicating if the values are NA. + NA values, such as None or :attr:`numpy.NaN`, gets mapped to True + values. + Everything else gets mapped to False values. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values + (unless you set ``pandas.options.mode.use_inf_as_na = True``). + + Returns + ------- + {klass} + Mask of bool values for each element in {klass} that + indicates whether an element is an NA value. + + See Also + -------- + {klass}.isnull : Alias of isna. + {klass}.notna : Boolean inverse of isna. + {klass}.dropna : Omit axes labels with missing values. + isna : Top-level isna. + + Examples + -------- + Show which entries in a DataFrame are NA. + + >>> df = pd.DataFrame(dict(age=[5, 6, np.NaN], + ... born=[pd.NaT, pd.Timestamp('1939-05-27'), + ... pd.Timestamp('1940-04-25')], + ... name=['Alfred', 'Batman', ''], + ... toy=[None, 'Batmobile', 'Joker'])) + >>> df + age born name toy + 0 5.0 NaT Alfred None + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.isna() + age born name toy + 0 False True False True + 1 False False False False + 2 True False False False + + Show which entries in a Series are NA. + + >>> ser = pd.Series([5, 6, np.NaN]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.isna() + 0 False + 1 False + 2 True + dtype: bool + """ + return isna(self).__finalize__(self, method="isna") + + @doc(isna, klass=_shared_doc_kwargs["klass"]) + def isnull(self: NDFrameT) -> NDFrameT: + return isna(self).__finalize__(self, method="isnull") + + @doc(klass=_shared_doc_kwargs["klass"]) + def notna(self: NDFrameT) -> NDFrameT: + """ + Detect existing (non-missing) values. + + Return a boolean same-sized object indicating if the values are not NA. + Non-missing values get mapped to True. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values + (unless you set ``pandas.options.mode.use_inf_as_na = True``). + NA values, such as None or :attr:`numpy.NaN`, get mapped to False + values. + + Returns + ------- + {klass} + Mask of bool values for each element in {klass} that + indicates whether an element is not an NA value. + + See Also + -------- + {klass}.notnull : Alias of notna. + {klass}.isna : Boolean inverse of notna. + {klass}.dropna : Omit axes labels with missing values. + notna : Top-level notna. + + Examples + -------- + Show which entries in a DataFrame are not NA. + + >>> df = pd.DataFrame(dict(age=[5, 6, np.NaN], + ... born=[pd.NaT, pd.Timestamp('1939-05-27'), + ... pd.Timestamp('1940-04-25')], + ... name=['Alfred', 'Batman', ''], + ... toy=[None, 'Batmobile', 'Joker'])) + >>> df + age born name toy + 0 5.0 NaT Alfred None + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.notna() + age born name toy + 0 True False True False + 1 True True True True + 2 False True True True + + Show which entries in a Series are not NA. + + >>> ser = pd.Series([5, 6, np.NaN]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.notna() + 0 True + 1 True + 2 False + dtype: bool + """ + return notna(self).__finalize__(self, method="notna") + + @doc(notna, klass=_shared_doc_kwargs["klass"]) + def notnull(self: NDFrameT) -> NDFrameT: + return notna(self).__finalize__(self, method="notnull") + + @final + def _clip_with_scalar(self, lower, upper, inplace: bool_t = False): + if (lower is not None and np.any(isna(lower))) or ( + upper is not None and np.any(isna(upper)) + ): + raise ValueError("Cannot use an NA value as a clip threshold") + + result = self + mask = isna(self._values) + + with np.errstate(all="ignore"): + if upper is not None: + subset = self <= upper + result = result.where(subset, upper, axis=None, inplace=False) + if lower is not None: + subset = self >= lower + result = result.where(subset, lower, axis=None, inplace=False) + + if np.any(mask): + result[mask] = np.nan + + if inplace: + return self._update_inplace(result) + else: + return result + + @final + def _clip_with_one_bound(self, threshold, method, axis, inplace): + + if axis is not None: + axis = self._get_axis_number(axis) + + # method is self.le for upper bound and self.ge for lower bound + if is_scalar(threshold) and is_number(threshold): + if method.__name__ == "le": + return self._clip_with_scalar(None, threshold, inplace=inplace) + return self._clip_with_scalar(threshold, None, inplace=inplace) + + # GH #15390 + # In order for where method to work, the threshold must + # be transformed to NDFrame from other array like structure. + if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold): + if isinstance(self, ABCSeries): + threshold = self._constructor(threshold, index=self.index) + else: + threshold = align_method_FRAME(self, threshold, axis, flex=None)[1] + + # GH 40420 + # Treat missing thresholds as no bounds, not clipping the values + if is_list_like(threshold): + fill_value = np.inf if method.__name__ == "le" else -np.inf + threshold_inf = threshold.fillna(fill_value) + else: + threshold_inf = threshold + + subset = method(threshold_inf, axis=axis) | isna(self) + + # GH 40420 + return self.where(subset, threshold, axis=axis, inplace=inplace) + + def clip( + self: NDFrameT, + lower=None, + upper=None, + axis: Axis | None = None, + inplace: bool_t = False, + *args, + **kwargs, + ) -> NDFrameT | None: + """ + Trim values at input threshold(s). + + Assigns values outside boundary to boundary values. Thresholds + can be singular values or array like, and in the latter case + the clipping is performed element-wise in the specified axis. + + Parameters + ---------- + lower : float or array-like, default None + Minimum threshold value. All values below this + threshold will be set to it. A missing + threshold (e.g `NA`) will not clip the value. + upper : float or array-like, default None + Maximum threshold value. All values above this + threshold will be set to it. A missing + threshold (e.g `NA`) will not clip the value. + axis : int or str axis name, optional + Align object with lower and upper along the given axis. + inplace : bool, default False + Whether to perform the operation in place on the data. + *args, **kwargs + Additional keywords have no effect but might be accepted + for compatibility with numpy. + + Returns + ------- + Series or DataFrame or None + Same type as calling object with the values outside the + clip boundaries replaced or None if ``inplace=True``. + + See Also + -------- + Series.clip : Trim values at input threshold in series. + DataFrame.clip : Trim values at input threshold in dataframe. + numpy.clip : Clip (limit) the values in an array. + + Examples + -------- + >>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]} + >>> df = pd.DataFrame(data) + >>> df + col_0 col_1 + 0 9 -2 + 1 -3 -7 + 2 0 6 + 3 -1 8 + 4 5 -5 + + Clips per column using lower and upper thresholds: + + >>> df.clip(-4, 6) + col_0 col_1 + 0 6 -2 + 1 -3 -4 + 2 0 6 + 3 -1 6 + 4 5 -4 + + Clips using specific lower and upper thresholds per column element: + + >>> t = pd.Series([2, -4, -1, 6, 3]) + >>> t + 0 2 + 1 -4 + 2 -1 + 3 6 + 4 3 + dtype: int64 + + >>> df.clip(t, t + 4, axis=0) + col_0 col_1 + 0 6 2 + 1 -3 -4 + 2 0 3 + 3 6 8 + 4 5 3 + + Clips using specific lower threshold per column element, with missing values: + + >>> t = pd.Series([2, -4, np.NaN, 6, 3]) + >>> t + 0 2.0 + 1 -4.0 + 2 NaN + 3 6.0 + 4 3.0 + dtype: float64 + + >>> df.clip(t, axis=0) + col_0 col_1 + 0 9 2 + 1 -3 -4 + 2 0 6 + 3 6 8 + 4 5 3 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + axis = nv.validate_clip_with_axis(axis, args, kwargs) + if axis is not None: + axis = self._get_axis_number(axis) + + # GH 17276 + # numpy doesn't like NaN as a clip value + # so ignore + # GH 19992 + # numpy doesn't drop a list-like bound containing NaN + isna_lower = isna(lower) + if not is_list_like(lower): + if np.any(isna_lower): + lower = None + elif np.all(isna_lower): + lower = None + isna_upper = isna(upper) + if not is_list_like(upper): + if np.any(isna_upper): + upper = None + elif np.all(isna_upper): + upper = None + + # GH 2747 (arguments were reversed) + if ( + lower is not None + and upper is not None + and is_scalar(lower) + and is_scalar(upper) + ): + lower, upper = min(lower, upper), max(lower, upper) + + # fast-path for scalars + if (lower is None or (is_scalar(lower) and is_number(lower))) and ( + upper is None or (is_scalar(upper) and is_number(upper)) + ): + return self._clip_with_scalar(lower, upper, inplace=inplace) + + result = self + if lower is not None: + result = result._clip_with_one_bound( + lower, method=self.ge, axis=axis, inplace=inplace + ) + if upper is not None: + if inplace: + result = self + result = result._clip_with_one_bound( + upper, method=self.le, axis=axis, inplace=inplace + ) + + return result + + @doc(**_shared_doc_kwargs) + def asfreq( + self: NDFrameT, + freq, + method=None, + how: str | None = None, + normalize: bool_t = False, + fill_value=None, + ) -> NDFrameT: + """ + Convert time series to specified frequency. + + Returns the original data conformed to a new index with the specified + frequency. + + If the index of this {klass} is a :class:`~pandas.PeriodIndex`, the new index + is the result of transforming the original index with + :meth:`PeriodIndex.asfreq ` (so the original index + will map one-to-one to the new index). + + Otherwise, the new index will be equivalent to ``pd.date_range(start, end, + freq=freq)`` where ``start`` and ``end`` are, respectively, the first and + last entries in the original index (see :func:`pandas.date_range`). The + values corresponding to any timesteps in the new index which were not present + in the original index will be null (``NaN``), unless a method for filling + such unknowns is provided (see the ``method`` parameter below). + + The :meth:`resample` method is more appropriate if an operation on each group of + timesteps (such as an aggregate) is necessary to represent the data at the new + frequency. + + Parameters + ---------- + freq : DateOffset or str + Frequency DateOffset or string. + method : {{'backfill'/'bfill', 'pad'/'ffill'}}, default None + Method to use for filling holes in reindexed Series (note this + does not fill NaNs that already were present): + + * 'pad' / 'ffill': propagate last valid observation forward to next + valid + * 'backfill' / 'bfill': use NEXT valid observation to fill. + how : {{'start', 'end'}}, default end + For PeriodIndex only (see PeriodIndex.asfreq). + normalize : bool, default False + Whether to reset output index to midnight. + fill_value : scalar, optional + Value to use for missing values, applied during upsampling (note + this does not fill NaNs that already were present). + + Returns + ------- + {klass} + {klass} object reindexed to the specified frequency. + + See Also + -------- + reindex : Conform DataFrame to new index with optional filling logic. + + Notes + ----- + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + Start by creating a series with 4 one minute timestamps. + + >>> index = pd.date_range('1/1/2000', periods=4, freq='T') + >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index) + >>> df = pd.DataFrame({{'s': series}}) + >>> df + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:01:00 NaN + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:03:00 3.0 + + Upsample the series into 30 second bins. + + >>> df.asfreq(freq='30S') + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 NaN + 2000-01-01 00:01:00 NaN + 2000-01-01 00:01:30 NaN + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:02:30 NaN + 2000-01-01 00:03:00 3.0 + + Upsample again, providing a ``fill value``. + + >>> df.asfreq(freq='30S', fill_value=9.0) + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 9.0 + 2000-01-01 00:01:00 NaN + 2000-01-01 00:01:30 9.0 + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:02:30 9.0 + 2000-01-01 00:03:00 3.0 + + Upsample again, providing a ``method``. + + >>> df.asfreq(freq='30S', method='bfill') + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 NaN + 2000-01-01 00:01:00 NaN + 2000-01-01 00:01:30 2.0 + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:02:30 3.0 + 2000-01-01 00:03:00 3.0 + """ + from pandas.core.resample import asfreq + + return asfreq( + self, + freq, + method=method, + how=how, + normalize=normalize, + fill_value=fill_value, + ) + + @final + def at_time(self: NDFrameT, time, asof: bool_t = False, axis=None) -> NDFrameT: + """ + Select values at particular time of day (e.g., 9:30AM). + + Parameters + ---------- + time : datetime.time or str + axis : {0 or 'index', 1 or 'columns'}, default 0 + + Returns + ------- + Series or DataFrame + + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` + + See Also + -------- + between_time : Select values between particular times of the day. + first : Select initial periods of time series based on a date offset. + last : Select final periods of time series based on a date offset. + DatetimeIndex.indexer_at_time : Get just the index locations for + values at particular time of the day. + + Examples + -------- + >>> i = pd.date_range('2018-04-09', periods=4, freq='12H') + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) + >>> ts + A + 2018-04-09 00:00:00 1 + 2018-04-09 12:00:00 2 + 2018-04-10 00:00:00 3 + 2018-04-10 12:00:00 4 + + >>> ts.at_time('12:00') + A + 2018-04-09 12:00:00 2 + 2018-04-10 12:00:00 4 + """ + if axis is None: + axis = self._stat_axis_number + axis = self._get_axis_number(axis) + + index = self._get_axis(axis) + + if not isinstance(index, DatetimeIndex): + raise TypeError("Index must be DatetimeIndex") + + indexer = index.indexer_at_time(time, asof=asof) + return self._take_with_is_copy(indexer, axis=axis) + + @final + def between_time( + self: NDFrameT, + start_time, + end_time, + include_start: bool_t | lib.NoDefault = lib.no_default, + include_end: bool_t | lib.NoDefault = lib.no_default, + inclusive: str | None = None, + axis=None, + ) -> NDFrameT: + """ + Select values between particular times of the day (e.g., 9:00-9:30 AM). + + By setting ``start_time`` to be later than ``end_time``, + you can get the times that are *not* between the two times. + + Parameters + ---------- + start_time : datetime.time or str + Initial time as a time filter limit. + end_time : datetime.time or str + End time as a time filter limit. + include_start : bool, default True + Whether the start time needs to be included in the result. + + .. deprecated:: 1.4.0 + Arguments `include_start` and `include_end` have been deprecated + to standardize boundary inputs. Use `inclusive` instead, to set + each bound as closed or open. + include_end : bool, default True + Whether the end time needs to be included in the result. + + .. deprecated:: 1.4.0 + Arguments `include_start` and `include_end` have been deprecated + to standardize boundary inputs. Use `inclusive` instead, to set + each bound as closed or open. + inclusive : {"both", "neither", "left", "right"}, default "both" + Include boundaries; whether to set each bound as closed or open. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Determine range time on index or columns value. + + Returns + ------- + Series or DataFrame + Data from the original object filtered to the specified dates range. + + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` + + See Also + -------- + at_time : Select values at a particular time of the day. + first : Select initial periods of time series based on a date offset. + last : Select final periods of time series based on a date offset. + DatetimeIndex.indexer_between_time : Get just the index locations for + values between particular times of the day. + + Examples + -------- + >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min') + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) + >>> ts + A + 2018-04-09 00:00:00 1 + 2018-04-10 00:20:00 2 + 2018-04-11 00:40:00 3 + 2018-04-12 01:00:00 4 + + >>> ts.between_time('0:15', '0:45') + A + 2018-04-10 00:20:00 2 + 2018-04-11 00:40:00 3 + + You get the times that are *not* between two times by setting + ``start_time`` later than ``end_time``: + + >>> ts.between_time('0:45', '0:15') + A + 2018-04-09 00:00:00 1 + 2018-04-12 01:00:00 4 + """ + if axis is None: + axis = self._stat_axis_number + axis = self._get_axis_number(axis) + + index = self._get_axis(axis) + if not isinstance(index, DatetimeIndex): + raise TypeError("Index must be DatetimeIndex") + + old_include_arg_used = (include_start != lib.no_default) or ( + include_end != lib.no_default + ) + + if old_include_arg_used and inclusive is not None: + raise ValueError( + "Deprecated arguments `include_start` and `include_end` " + "cannot be passed if `inclusive` has been given." + ) + # If any of the deprecated arguments ('include_start', 'include_end') + # have been passed + elif old_include_arg_used: + warnings.warn( + "`include_start` and `include_end` are deprecated in " + "favour of `inclusive`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + left = True if isinstance(include_start, lib.NoDefault) else include_start + right = True if isinstance(include_end, lib.NoDefault) else include_end + + inc_dict = { + (True, True): "both", + (True, False): "left", + (False, True): "right", + (False, False): "neither", + } + inclusive = inc_dict[(left, right)] + elif inclusive is None: + # On arg removal inclusive can default to "both" + inclusive = "both" + left_inclusive, right_inclusive = validate_inclusive(inclusive) + indexer = index.indexer_between_time( + start_time, + end_time, + include_start=left_inclusive, + include_end=right_inclusive, + ) + return self._take_with_is_copy(indexer, axis=axis) + + @doc(**_shared_doc_kwargs) + def resample( + self, + rule, + axis=0, + closed: str | None = None, + label: str | None = None, + convention: str = "start", + kind: str | None = None, + loffset=None, + base: int | None = None, + on=None, + level=None, + origin: str | TimestampConvertibleTypes = "start_day", + offset: TimedeltaConvertibleTypes | None = None, + ) -> Resampler: + """ + Resample time-series data. + + Convenience method for frequency conversion and resampling of time series. + The object must have a datetime-like index (`DatetimeIndex`, `PeriodIndex`, + or `TimedeltaIndex`), or the caller must pass the label of a datetime-like + series/index to the ``on``/``level`` keyword parameter. + + Parameters + ---------- + rule : DateOffset, Timedelta or str + The offset string or object representing target conversion. + axis : {{0 or 'index', 1 or 'columns'}}, default 0 + Which axis to use for up- or down-sampling. For `Series` this + will default to 0, i.e. along the rows. Must be + `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`. + closed : {{'right', 'left'}}, default None + Which side of bin interval is closed. The default is 'left' + for all frequency offsets except for 'M', 'A', 'Q', 'BM', + 'BA', 'BQ', and 'W' which all have a default of 'right'. + label : {{'right', 'left'}}, default None + Which bin edge label to label bucket with. The default is 'left' + for all frequency offsets except for 'M', 'A', 'Q', 'BM', + 'BA', 'BQ', and 'W' which all have a default of 'right'. + convention : {{'start', 'end', 's', 'e'}}, default 'start' + For `PeriodIndex` only, controls whether to use the start or + end of `rule`. + kind : {{'timestamp', 'period'}}, optional, default None + Pass 'timestamp' to convert the resulting index to a + `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`. + By default the input representation is retained. + loffset : timedelta, default None + Adjust the resampled time labels. + + .. deprecated:: 1.1.0 + You should add the loffset to the `df.index` after the resample. + See below. + + base : int, default 0 + For frequencies that evenly subdivide 1 day, the "origin" of the + aggregated intervals. For example, for '5min' frequency, base could + range from 0 through 4. Defaults to 0. + + .. deprecated:: 1.1.0 + The new arguments that you should use are 'offset' or 'origin'. + + on : str, optional + For a DataFrame, column to use instead of index for resampling. + Column must be datetime-like. + level : str or int, optional + For a MultiIndex, level (name or number) to use for + resampling. `level` must be datetime-like. + origin : Timestamp or str, default 'start_day' + The timestamp on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + If string, must be one of the following: + + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + + .. versionadded:: 1.1.0 + + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + + .. versionadded:: 1.3.0 + + offset : Timedelta or str, default is None + An offset timedelta added to the origin. + + .. versionadded:: 1.1.0 + + Returns + ------- + pandas.core.Resampler + :class:`~pandas.core.Resampler` object. + + See Also + -------- + Series.resample : Resample a Series. + DataFrame.resample : Resample a DataFrame. + groupby : Group {klass} by mapping, function, label, or list of labels. + asfreq : Reindex a {klass} with the given frequency without grouping. + + Notes + ----- + See the `user guide + `__ + for more. + + To learn more about the offset strings, please see `this link + `__. + + Examples + -------- + Start by creating a series with 9 one minute timestamps. + + >>> index = pd.date_range('1/1/2000', periods=9, freq='T') + >>> series = pd.Series(range(9), index=index) + >>> series + 2000-01-01 00:00:00 0 + 2000-01-01 00:01:00 1 + 2000-01-01 00:02:00 2 + 2000-01-01 00:03:00 3 + 2000-01-01 00:04:00 4 + 2000-01-01 00:05:00 5 + 2000-01-01 00:06:00 6 + 2000-01-01 00:07:00 7 + 2000-01-01 00:08:00 8 + Freq: T, dtype: int64 + + Downsample the series into 3 minute bins and sum the values + of the timestamps falling into a bin. + + >>> series.resample('3T').sum() + 2000-01-01 00:00:00 3 + 2000-01-01 00:03:00 12 + 2000-01-01 00:06:00 21 + Freq: 3T, dtype: int64 + + Downsample the series into 3 minute bins as above, but label each + bin using the right edge instead of the left. Please note that the + value in the bucket used as the label is not included in the bucket, + which it labels. For example, in the original series the + bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed + value in the resampled bucket with the label ``2000-01-01 00:03:00`` + does not include 3 (if it did, the summed value would be 6, not 3). + To include this value close the right side of the bin interval as + illustrated in the example below this one. + + >>> series.resample('3T', label='right').sum() + 2000-01-01 00:03:00 3 + 2000-01-01 00:06:00 12 + 2000-01-01 00:09:00 21 + Freq: 3T, dtype: int64 + + Downsample the series into 3 minute bins as above, but close the right + side of the bin interval. + + >>> series.resample('3T', label='right', closed='right').sum() + 2000-01-01 00:00:00 0 + 2000-01-01 00:03:00 6 + 2000-01-01 00:06:00 15 + 2000-01-01 00:09:00 15 + Freq: 3T, dtype: int64 + + Upsample the series into 30 second bins. + + >>> series.resample('30S').asfreq()[0:5] # Select first 5 rows + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 NaN + 2000-01-01 00:01:00 1.0 + 2000-01-01 00:01:30 NaN + 2000-01-01 00:02:00 2.0 + Freq: 30S, dtype: float64 + + Upsample the series into 30 second bins and fill the ``NaN`` + values using the ``pad`` method. + + >>> series.resample('30S').pad()[0:5] + 2000-01-01 00:00:00 0 + 2000-01-01 00:00:30 0 + 2000-01-01 00:01:00 1 + 2000-01-01 00:01:30 1 + 2000-01-01 00:02:00 2 + Freq: 30S, dtype: int64 + + Upsample the series into 30 second bins and fill the + ``NaN`` values using the ``bfill`` method. + + >>> series.resample('30S').bfill()[0:5] + 2000-01-01 00:00:00 0 + 2000-01-01 00:00:30 1 + 2000-01-01 00:01:00 1 + 2000-01-01 00:01:30 2 + 2000-01-01 00:02:00 2 + Freq: 30S, dtype: int64 + + Pass a custom function via ``apply`` + + >>> def custom_resampler(arraylike): + ... return np.sum(arraylike) + 5 + ... + >>> series.resample('3T').apply(custom_resampler) + 2000-01-01 00:00:00 8 + 2000-01-01 00:03:00 17 + 2000-01-01 00:06:00 26 + Freq: 3T, dtype: int64 + + For a Series with a PeriodIndex, the keyword `convention` can be + used to control whether to use the start or end of `rule`. + + Resample a year by quarter using 'start' `convention`. Values are + assigned to the first quarter of the period. + + >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01', + ... freq='A', + ... periods=2)) + >>> s + 2012 1 + 2013 2 + Freq: A-DEC, dtype: int64 + >>> s.resample('Q', convention='start').asfreq() + 2012Q1 1.0 + 2012Q2 NaN + 2012Q3 NaN + 2012Q4 NaN + 2013Q1 2.0 + 2013Q2 NaN + 2013Q3 NaN + 2013Q4 NaN + Freq: Q-DEC, dtype: float64 + + Resample quarters by month using 'end' `convention`. Values are + assigned to the last month of the period. + + >>> q = pd.Series([1, 2, 3, 4], index=pd.period_range('2018-01-01', + ... freq='Q', + ... periods=4)) + >>> q + 2018Q1 1 + 2018Q2 2 + 2018Q3 3 + 2018Q4 4 + Freq: Q-DEC, dtype: int64 + >>> q.resample('M', convention='end').asfreq() + 2018-03 1.0 + 2018-04 NaN + 2018-05 NaN + 2018-06 2.0 + 2018-07 NaN + 2018-08 NaN + 2018-09 3.0 + 2018-10 NaN + 2018-11 NaN + 2018-12 4.0 + Freq: M, dtype: float64 + + For DataFrame objects, the keyword `on` can be used to specify the + column instead of the index for resampling. + + >>> d = {{'price': [10, 11, 9, 13, 14, 18, 17, 19], + ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}} + >>> df = pd.DataFrame(d) + >>> df['week_starting'] = pd.date_range('01/01/2018', + ... periods=8, + ... freq='W') + >>> df + price volume week_starting + 0 10 50 2018-01-07 + 1 11 60 2018-01-14 + 2 9 40 2018-01-21 + 3 13 100 2018-01-28 + 4 14 50 2018-02-04 + 5 18 100 2018-02-11 + 6 17 40 2018-02-18 + 7 19 50 2018-02-25 + >>> df.resample('M', on='week_starting').mean() + price volume + week_starting + 2018-01-31 10.75 62.5 + 2018-02-28 17.00 60.0 + + For a DataFrame with MultiIndex, the keyword `level` can be used to + specify on which level the resampling needs to take place. + + >>> days = pd.date_range('1/1/2000', periods=4, freq='D') + >>> d2 = {{'price': [10, 11, 9, 13, 14, 18, 17, 19], + ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}} + >>> df2 = pd.DataFrame( + ... d2, + ... index=pd.MultiIndex.from_product( + ... [days, ['morning', 'afternoon']] + ... ) + ... ) + >>> df2 + price volume + 2000-01-01 morning 10 50 + afternoon 11 60 + 2000-01-02 morning 9 40 + afternoon 13 100 + 2000-01-03 morning 14 50 + afternoon 18 100 + 2000-01-04 morning 17 40 + afternoon 19 50 + >>> df2.resample('D', level=0).sum() + price volume + 2000-01-01 21 110 + 2000-01-02 22 140 + 2000-01-03 32 150 + 2000-01-04 36 90 + + If you want to adjust the start of the bins based on a fixed timestamp: + + >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00' + >>> rng = pd.date_range(start, end, freq='7min') + >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng) + >>> ts + 2000-10-01 23:30:00 0 + 2000-10-01 23:37:00 3 + 2000-10-01 23:44:00 6 + 2000-10-01 23:51:00 9 + 2000-10-01 23:58:00 12 + 2000-10-02 00:05:00 15 + 2000-10-02 00:12:00 18 + 2000-10-02 00:19:00 21 + 2000-10-02 00:26:00 24 + Freq: 7T, dtype: int64 + + >>> ts.resample('17min').sum() + 2000-10-01 23:14:00 0 + 2000-10-01 23:31:00 9 + 2000-10-01 23:48:00 21 + 2000-10-02 00:05:00 54 + 2000-10-02 00:22:00 24 + Freq: 17T, dtype: int64 + + >>> ts.resample('17min', origin='epoch').sum() + 2000-10-01 23:18:00 0 + 2000-10-01 23:35:00 18 + 2000-10-01 23:52:00 27 + 2000-10-02 00:09:00 39 + 2000-10-02 00:26:00 24 + Freq: 17T, dtype: int64 + + >>> ts.resample('17min', origin='2000-01-01').sum() + 2000-10-01 23:24:00 3 + 2000-10-01 23:41:00 15 + 2000-10-01 23:58:00 45 + 2000-10-02 00:15:00 45 + Freq: 17T, dtype: int64 + + If you want to adjust the start of the bins with an `offset` Timedelta, the two + following lines are equivalent: + + >>> ts.resample('17min', origin='start').sum() + 2000-10-01 23:30:00 9 + 2000-10-01 23:47:00 21 + 2000-10-02 00:04:00 54 + 2000-10-02 00:21:00 24 + Freq: 17T, dtype: int64 + + >>> ts.resample('17min', offset='23h30min').sum() + 2000-10-01 23:30:00 9 + 2000-10-01 23:47:00 21 + 2000-10-02 00:04:00 54 + 2000-10-02 00:21:00 24 + Freq: 17T, dtype: int64 + + If you want to take the largest Timestamp as the end of the bins: + + >>> ts.resample('17min', origin='end').sum() + 2000-10-01 23:35:00 0 + 2000-10-01 23:52:00 18 + 2000-10-02 00:09:00 27 + 2000-10-02 00:26:00 63 + Freq: 17T, dtype: int64 + + In contrast with the `start_day`, you can use `end_day` to take the ceiling + midnight of the largest Timestamp as the end of the bins and drop the bins + not containing data: + + >>> ts.resample('17min', origin='end_day').sum() + 2000-10-01 23:38:00 3 + 2000-10-01 23:55:00 15 + 2000-10-02 00:12:00 45 + 2000-10-02 00:29:00 45 + Freq: 17T, dtype: int64 + + To replace the use of the deprecated `base` argument, you can now use `offset`, + in this example it is equivalent to have `base=2`: + + >>> ts.resample('17min', offset='2min').sum() + 2000-10-01 23:16:00 0 + 2000-10-01 23:33:00 9 + 2000-10-01 23:50:00 36 + 2000-10-02 00:07:00 39 + 2000-10-02 00:24:00 24 + Freq: 17T, dtype: int64 + + To replace the use of the deprecated `loffset` argument: + + >>> from pandas.tseries.frequencies import to_offset + >>> loffset = '19min' + >>> ts_out = ts.resample('17min').sum() + >>> ts_out.index = ts_out.index + to_offset(loffset) + >>> ts_out + 2000-10-01 23:33:00 0 + 2000-10-01 23:50:00 9 + 2000-10-02 00:07:00 21 + 2000-10-02 00:24:00 54 + 2000-10-02 00:41:00 24 + Freq: 17T, dtype: int64 + """ + from pandas.core.resample import get_resampler + + axis = self._get_axis_number(axis) + return get_resampler( + self, + freq=rule, + label=label, + closed=closed, + axis=axis, + kind=kind, + loffset=loffset, + convention=convention, + base=base, + key=on, + level=level, + origin=origin, + offset=offset, + ) + + @final + def first(self: NDFrameT, offset) -> NDFrameT: + """ + Select initial periods of time series data based on a date offset. + + When having a DataFrame with dates as index, this function can + select the first few rows based on a date offset. + + Parameters + ---------- + offset : str, DateOffset or dateutil.relativedelta + The offset length of the data that will be selected. For instance, + '1M' will display all the rows having their index within the first month. + + Returns + ------- + Series or DataFrame + A subset of the caller. + + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` + + See Also + -------- + last : Select final periods of time series based on a date offset. + at_time : Select values at a particular time of the day. + between_time : Select values between particular times of the day. + + Examples + -------- + >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) + >>> ts + A + 2018-04-09 1 + 2018-04-11 2 + 2018-04-13 3 + 2018-04-15 4 + + Get the rows for the first 3 days: + + >>> ts.first('3D') + A + 2018-04-09 1 + 2018-04-11 2 + + Notice the data for 3 first calendar days were returned, not the first + 3 days observed in the dataset, and therefore data for 2018-04-13 was + not returned. + """ + if not isinstance(self.index, DatetimeIndex): + raise TypeError("'first' only supports a DatetimeIndex index") + + if len(self.index) == 0: + return self + + offset = to_offset(offset) + if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): + # GH#29623 if first value is end of period, remove offset with n = 1 + # before adding the real offset + end_date = end = self.index[0] - offset.base + offset + else: + end_date = end = self.index[0] + offset + + # Tick-like, e.g. 3 weeks + if isinstance(offset, Tick) and end_date in self.index: + end = self.index.searchsorted(end_date, side="left") + return self.iloc[:end] + + return self.loc[:end] + + @final + def last(self: NDFrameT, offset) -> NDFrameT: + """ + Select final periods of time series data based on a date offset. + + For a DataFrame with a sorted DatetimeIndex, this function + selects the last few rows based on a date offset. + + Parameters + ---------- + offset : str, DateOffset, dateutil.relativedelta + The offset length of the data that will be selected. For instance, + '3D' will display all the rows having their index within the last 3 days. + + Returns + ------- + Series or DataFrame + A subset of the caller. + + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` + + See Also + -------- + first : Select initial periods of time series based on a date offset. + at_time : Select values at a particular time of the day. + between_time : Select values between particular times of the day. + + Examples + -------- + >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) + >>> ts + A + 2018-04-09 1 + 2018-04-11 2 + 2018-04-13 3 + 2018-04-15 4 + + Get the rows for the last 3 days: + + >>> ts.last('3D') + A + 2018-04-13 3 + 2018-04-15 4 + + Notice the data for 3 last calendar days were returned, not the last + 3 observed days in the dataset, and therefore data for 2018-04-11 was + not returned. + """ + if not isinstance(self.index, DatetimeIndex): + raise TypeError("'last' only supports a DatetimeIndex index") + + if len(self.index) == 0: + return self + + offset = to_offset(offset) + + start_date = self.index[-1] - offset + start = self.index.searchsorted(start_date, side="right") + return self.iloc[start:] + + @final + def rank( + self: NDFrameT, + axis=0, + method: str = "average", + numeric_only: bool_t | None | lib.NoDefault = lib.no_default, + na_option: str = "keep", + ascending: bool_t = True, + pct: bool_t = False, + ) -> NDFrameT: + """ + Compute numerical data ranks (1 through n) along axis. + + By default, equal values are assigned a rank that is the average of the + ranks of those values. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + Index to direct ranking. + method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' + How to rank the group of records that have the same value (i.e. ties): + + * average: average rank of the group + * min: lowest rank in the group + * max: highest rank in the group + * first: ranks assigned in order they appear in the array + * dense: like 'min', but rank always increases by 1 between groups. + + numeric_only : bool, optional + For DataFrame objects, rank only numeric columns if set to True. + na_option : {'keep', 'top', 'bottom'}, default 'keep' + How to rank NaN values: + + * keep: assign NaN rank to NaN values + * top: assign lowest rank to NaN values + * bottom: assign highest rank to NaN values + + ascending : bool, default True + Whether or not the elements should be ranked in ascending order. + pct : bool, default False + Whether or not to display the returned rankings in percentile + form. + + Returns + ------- + same type as caller + Return a Series or DataFrame with data ranks as values. + + See Also + -------- + core.groupby.GroupBy.rank : Rank of values within each group. + + Examples + -------- + >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog', + ... 'spider', 'snake'], + ... 'Number_legs': [4, 2, 4, 8, np.nan]}) + >>> df + Animal Number_legs + 0 cat 4.0 + 1 penguin 2.0 + 2 dog 4.0 + 3 spider 8.0 + 4 snake NaN + + The following example shows how the method behaves with the above + parameters: + + * default_rank: this is the default behaviour obtained without using + any parameter. + * max_rank: setting ``method = 'max'`` the records that have the + same values are ranked using the highest rank (e.g.: since 'cat' + and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.) + * NA_bottom: choosing ``na_option = 'bottom'``, if there are records + with NaN values they are placed at the bottom of the ranking. + * pct_rank: when setting ``pct = True``, the ranking is expressed as + percentile rank. + + >>> df['default_rank'] = df['Number_legs'].rank() + >>> df['max_rank'] = df['Number_legs'].rank(method='max') + >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom') + >>> df['pct_rank'] = df['Number_legs'].rank(pct=True) + >>> df + Animal Number_legs default_rank max_rank NA_bottom pct_rank + 0 cat 4.0 2.5 3.0 2.5 0.625 + 1 penguin 2.0 1.0 1.0 1.0 0.250 + 2 dog 4.0 2.5 3.0 2.5 0.625 + 3 spider 8.0 4.0 4.0 4.0 1.000 + 4 snake NaN NaN NaN 5.0 NaN + """ + warned = False + if numeric_only is None: + # GH#45036 + warnings.warn( + f"'numeric_only=None' in {type(self).__name__}.rank is deprecated " + "and will raise in a future version. Pass either 'True' or " + "'False'. 'False' will be the default.", + FutureWarning, + stacklevel=find_stack_level(), + ) + warned = True + elif numeric_only is lib.no_default: + numeric_only = None + + axis = self._get_axis_number(axis) + + if na_option not in {"keep", "top", "bottom"}: + msg = "na_option must be one of 'keep', 'top', or 'bottom'" + raise ValueError(msg) + + def ranker(data): + if data.ndim == 2: + # i.e. DataFrame, we cast to ndarray + values = data.values + else: + # i.e. Series, can dispatch to EA + values = data._values + + if isinstance(values, ExtensionArray): + ranks = values._rank( + axis=axis, + method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + else: + ranks = algos.rank( + values, + axis=axis, + method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + + ranks_obj = self._constructor(ranks, **data._construct_axes_dict()) + return ranks_obj.__finalize__(self, method="rank") + + # if numeric_only is None, and we can't get anything, we try with + # numeric_only=True + if numeric_only is None: + try: + return ranker(self) + except TypeError: + numeric_only = True + if not warned: + # Only warn here if we didn't already issue a warning above + # GH#45036 + warnings.warn( + f"Dropping of nuisance columns in {type(self).__name__}.rank " + "is deprecated; in a future version this will raise TypeError. " + "Select only valid columns before calling rank.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if numeric_only: + data = self._get_numeric_data() + else: + data = self + + return ranker(data) + + @doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"]) + def compare( + self, + other, + align_axis: Axis = 1, + keep_shape: bool_t = False, + keep_equal: bool_t = False, + ): + from pandas.core.reshape.concat import concat + + if type(self) is not type(other): + cls_self, cls_other = type(self).__name__, type(other).__name__ + raise TypeError( + f"can only compare '{cls_self}' (not '{cls_other}') with '{cls_self}'" + ) + + mask = ~((self == other) | (self.isna() & other.isna())) + keys = ["self", "other"] + + if not keep_equal: + self = self.where(mask) + other = other.where(mask) + + if not keep_shape: + if isinstance(self, ABCDataFrame): + cmask = mask.any() + rmask = mask.any(axis=1) + self = self.loc[rmask, cmask] + other = other.loc[rmask, cmask] + else: + self = self[mask] + other = other[mask] + + if align_axis in (1, "columns"): # This is needed for Series + axis = 1 + else: + axis = self._get_axis_number(align_axis) + + diff = concat([self, other], axis=axis, keys=keys) + + if axis >= self.ndim: + # No need to reorganize data if stacking on new axis + # This currently applies for stacking two Series on columns + return diff + + ax = diff._get_axis(axis) + ax_names = np.array(ax.names) + + # set index names to positions to avoid confusion + ax.names = np.arange(len(ax_names)) + + # bring self-other to inner level + order = list(range(1, ax.nlevels)) + [0] + if isinstance(diff, ABCDataFrame): + diff = diff.reorder_levels(order, axis=axis) + else: + diff = diff.reorder_levels(order) + + # restore the index names in order + diff._get_axis(axis=axis).names = ax_names[order] + + # reorder axis to keep things organized + indices = ( + np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten() + ) + diff = diff.take(indices, axis=axis) + + return diff + + @doc(**_shared_doc_kwargs) + def align( + self, + other, + join="outer", + axis=None, + level=None, + copy=True, + fill_value=None, + method=None, + limit=None, + fill_axis=0, + broadcast_axis=None, + ): + """ + Align two objects on their axes with the specified join method. + + Join method is specified for each axis Index. + + Parameters + ---------- + other : DataFrame or Series + join : {{'outer', 'inner', 'left', 'right'}}, default 'outer' + axis : allowed axis of the other object, default None + Align on index (0), columns (1), or both (None). + level : int or level name, default None + Broadcast across a level, matching Index values on the + passed MultiIndex level. + copy : bool, default True + Always returns new objects. If copy=False and no reindexing is + required then original objects are returned. + fill_value : scalar, default np.NaN + Value to use for missing values. Defaults to NaN, but can be any + "compatible" value. + method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None + Method to use for filling holes in reindexed Series: + + - pad / ffill: propagate last valid observation forward to next valid. + - backfill / bfill: use NEXT valid observation to fill gap. + + limit : int, default None + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. Must be greater than 0 if not None. + fill_axis : {axes_single_arg}, default 0 + Filling axis, method and limit. + broadcast_axis : {axes_single_arg}, default None + Broadcast values along this axis, if aligning two objects of + different dimensions. + + Returns + ------- + (left, right) : ({klass}, type of other) + Aligned objects. + + Examples + -------- + >>> df = pd.DataFrame( + ... [[1, 2, 3, 4], [6, 7, 8, 9]], columns=["D", "B", "E", "A"], index=[1, 2] + ... ) + >>> other = pd.DataFrame( + ... [[10, 20, 30, 40], [60, 70, 80, 90], [600, 700, 800, 900]], + ... columns=["A", "B", "C", "D"], + ... index=[2, 3, 4], + ... ) + >>> df + D B E A + 1 1 2 3 4 + 2 6 7 8 9 + >>> other + A B C D + 2 10 20 30 40 + 3 60 70 80 90 + 4 600 700 800 900 + + Align on columns: + + >>> left, right = df.align(other, join="outer", axis=1) + >>> left + A B C D E + 1 4 2 NaN 1 3 + 2 9 7 NaN 6 8 + >>> right + A B C D E + 2 10 20 30 40 NaN + 3 60 70 80 90 NaN + 4 600 700 800 900 NaN + + We can also align on the index: + + >>> left, right = df.align(other, join="outer", axis=0) + >>> left + D B E A + 1 1.0 2.0 3.0 4.0 + 2 6.0 7.0 8.0 9.0 + 3 NaN NaN NaN NaN + 4 NaN NaN NaN NaN + >>> right + A B C D + 1 NaN NaN NaN NaN + 2 10.0 20.0 30.0 40.0 + 3 60.0 70.0 80.0 90.0 + 4 600.0 700.0 800.0 900.0 + + Finally, the default `axis=None` will align on both index and columns: + + >>> left, right = df.align(other, join="outer", axis=None) + >>> left + A B C D E + 1 4.0 2.0 NaN 1.0 3.0 + 2 9.0 7.0 NaN 6.0 8.0 + 3 NaN NaN NaN NaN NaN + 4 NaN NaN NaN NaN NaN + >>> right + A B C D E + 1 NaN NaN NaN NaN NaN + 2 10.0 20.0 30.0 40.0 NaN + 3 60.0 70.0 80.0 90.0 NaN + 4 600.0 700.0 800.0 900.0 NaN + """ + + method = missing.clean_fill_method(method) + + if broadcast_axis == 1 and self.ndim != other.ndim: + if isinstance(self, ABCSeries): + # this means other is a DataFrame, and we need to broadcast + # self + cons = self._constructor_expanddim + df = cons( + {c: self for c in other.columns}, **other._construct_axes_dict() + ) + return df._align_frame( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + ) + elif isinstance(other, ABCSeries): + # this means self is a DataFrame, and we need to broadcast + # other + cons = other._constructor_expanddim + df = cons( + {c: other for c in self.columns}, **self._construct_axes_dict() + ) + return self._align_frame( + df, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + ) + + if axis is not None: + axis = self._get_axis_number(axis) + if isinstance(other, ABCDataFrame): + return self._align_frame( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + ) + elif isinstance(other, ABCSeries): + return self._align_series( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + ) + else: # pragma: no cover + raise TypeError(f"unsupported type: {type(other)}") + + @final + def _align_frame( + self, + other, + join="outer", + axis=None, + level=None, + copy: bool_t = True, + fill_value=None, + method=None, + limit=None, + fill_axis=0, + ): + # defaults + join_index, join_columns = None, None + ilidx, iridx = None, None + clidx, cridx = None, None + + is_series = isinstance(self, ABCSeries) + + if (axis is None or axis == 0) and not self.index.equals(other.index): + join_index, ilidx, iridx = self.index.join( + other.index, how=join, level=level, return_indexers=True + ) + + if ( + (axis is None or axis == 1) + and not is_series + and not self.columns.equals(other.columns) + ): + join_columns, clidx, cridx = self.columns.join( + other.columns, how=join, level=level, return_indexers=True + ) + + if is_series: + reindexers = {0: [join_index, ilidx]} + else: + reindexers = {0: [join_index, ilidx], 1: [join_columns, clidx]} + + left = self._reindex_with_indexers( + reindexers, copy=copy, fill_value=fill_value, allow_dups=True + ) + # other must be always DataFrame + right = other._reindex_with_indexers( + {0: [join_index, iridx], 1: [join_columns, cridx]}, + copy=copy, + fill_value=fill_value, + allow_dups=True, + ) + + if method is not None: + _left = left.fillna(method=method, axis=fill_axis, limit=limit) + assert _left is not None # needed for mypy + left = _left + right = right.fillna(method=method, axis=fill_axis, limit=limit) + + # if DatetimeIndex have different tz, convert to UTC + left, right = _align_as_utc(left, right, join_index) + + return ( + left.__finalize__(self), + right.__finalize__(other), + ) + + @final + def _align_series( + self, + other, + join="outer", + axis=None, + level=None, + copy: bool_t = True, + fill_value=None, + method=None, + limit=None, + fill_axis=0, + ): + + is_series = isinstance(self, ABCSeries) + + # series/series compat, other must always be a Series + if is_series: + if axis: + raise ValueError("cannot align series to a series other than axis 0") + + # equal + if self.index.equals(other.index): + join_index, lidx, ridx = None, None, None + else: + join_index, lidx, ridx = self.index.join( + other.index, how=join, level=level, return_indexers=True + ) + + left = self._reindex_indexer(join_index, lidx, copy) + right = other._reindex_indexer(join_index, ridx, copy) + + else: + # one has > 1 ndim + fdata = self._mgr + if axis in [0, 1]: + join_index = self.axes[axis] + lidx, ridx = None, None + if not join_index.equals(other.index): + join_index, lidx, ridx = join_index.join( + other.index, how=join, level=level, return_indexers=True + ) + + if lidx is not None: + bm_axis = self._get_block_manager_axis(axis) + fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis) + + else: + raise ValueError("Must specify axis=0 or 1") + + if copy and fdata is self._mgr: + fdata = fdata.copy() + + left = self._constructor(fdata) + + if ridx is None: + right = other + else: + right = other.reindex(join_index, level=level) + + # fill + fill_na = notna(fill_value) or (method is not None) + if fill_na: + left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis) + right = right.fillna(fill_value, method=method, limit=limit) + + # if DatetimeIndex have different tz, convert to UTC + if is_series or (not is_series and axis == 0): + left, right = _align_as_utc(left, right, join_index) + + return ( + left.__finalize__(self), + right.__finalize__(other), + ) + + @final + def _where( + self, + cond, + other=lib.no_default, + inplace=False, + axis=None, + level=None, + errors="raise", + ): + """ + Equivalent to public method `where`, except that `other` is not + applied as a function even if callable. Used in __setitem__. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + if axis is not None: + axis = self._get_axis_number(axis) + + # align the cond to same shape as myself + cond = com.apply_if_callable(cond, self) + if isinstance(cond, NDFrame): + cond, _ = cond.align(self, join="right", broadcast_axis=1, copy=False) + else: + if not hasattr(cond, "shape"): + cond = np.asanyarray(cond) + if cond.shape != self.shape: + raise ValueError("Array conditional must be same shape as self") + cond = self._constructor(cond, **self._construct_axes_dict()) + + # make sure we are boolean + fill_value = bool(inplace) + cond = cond.fillna(fill_value) + + msg = "Boolean array expected for the condition, not {dtype}" + + if not cond.empty: + if not isinstance(cond, ABCDataFrame): + # This is a single-dimensional object. + if not is_bool_dtype(cond): + raise ValueError(msg.format(dtype=cond.dtype)) + else: + for dt in cond.dtypes: + if not is_bool_dtype(dt): + raise ValueError(msg.format(dtype=dt)) + else: + # GH#21947 we have an empty DataFrame/Series, could be object-dtype + cond = cond.astype(bool) + + cond = -cond if inplace else cond + cond = cond.reindex(self._info_axis, axis=self._info_axis_number, copy=False) + + # try to align with other + if isinstance(other, NDFrame): + + # align with me + if other.ndim <= self.ndim: + + _, other = self.align( + other, + join="left", + axis=axis, + level=level, + fill_value=None, + copy=False, + ) + + # if we are NOT aligned, raise as we cannot where index + if axis is None and not other._indexed_same(self): + raise InvalidIndexError + + elif other.ndim < self.ndim: + # TODO(EA2D): avoid object-dtype cast in EA case GH#38729 + other = other._values + if axis == 0: + other = np.reshape(other, (-1, 1)) + elif axis == 1: + other = np.reshape(other, (1, -1)) + + other = np.broadcast_to(other, self.shape) + + # slice me out of the other + else: + raise NotImplementedError( + "cannot align with a higher dimensional NDFrame" + ) + + elif not isinstance(other, (MultiIndex, NDFrame)): + # mainly just catching Index here + other = extract_array(other, extract_numpy=True) + + if isinstance(other, (np.ndarray, ExtensionArray)): + + if other.shape != self.shape: + if self.ndim != 1: + # In the ndim == 1 case we may have + # other length 1, which we treat as scalar (GH#2745, GH#4192) + # or len(other) == icond.sum(), which we treat like + # __setitem__ (GH#3235) + raise ValueError( + "other must be the same shape as self when an ndarray" + ) + + # we are the same shape, so create an actual object for alignment + else: + # error: Argument 1 to "NDFrame" has incompatible type "ndarray"; + # expected "BlockManager" + other = self._constructor( + other, **self._construct_axes_dict() # type: ignore[arg-type] + ) + + if axis is None: + axis = 0 + + if self.ndim == getattr(other, "ndim", 0): + align = True + else: + align = self._get_axis_number(axis) == 1 + + if inplace: + # we may have different type blocks come out of putmask, so + # reconstruct the block manager + + self._check_inplace_setting(other) + new_data = self._mgr.putmask(mask=cond, new=other, align=align) + result = self._constructor(new_data) + return self._update_inplace(result) + + else: + new_data = self._mgr.where( + other=other, + cond=cond, + align=align, + ) + result = self._constructor(new_data) + return result.__finalize__(self) + + @doc( + klass=_shared_doc_kwargs["klass"], + cond="True", + cond_rev="False", + name="where", + name_other="mask", + ) + def where( + self, + cond, + other=np.nan, + inplace=False, + axis=None, + level=None, + errors="raise", + try_cast=lib.no_default, + ): + """ + Replace values where the condition is {cond_rev}. + + Parameters + ---------- + cond : bool {klass}, array-like, or callable + Where `cond` is {cond}, keep the original value. Where + {cond_rev}, replace with corresponding value from `other`. + If `cond` is callable, it is computed on the {klass} and + should return boolean {klass} or array. The callable must + not change input {klass} (though pandas doesn't check it). + other : scalar, {klass}, or callable + Entries where `cond` is {cond_rev} are replaced with + corresponding value from `other`. + If other is callable, it is computed on the {klass} and + should return scalar or {klass}. The callable must not + change input {klass} (though pandas doesn't check it). + inplace : bool, default False + Whether to perform the operation in place on the data. + axis : int, default None + Alignment axis if needed. + level : int, default None + Alignment level if needed. + errors : str, {{'raise', 'ignore'}}, default 'raise' + Note that currently this parameter won't affect + the results and will always coerce to a suitable dtype. + + - 'raise' : allow exceptions to be raised. + - 'ignore' : suppress exceptions. On error return original object. + + try_cast : bool, default None + Try to cast the result back to the input type (if possible). + + .. deprecated:: 1.3.0 + Manually cast back if necessary. + + Returns + ------- + Same type as caller or None if ``inplace=True``. + + See Also + -------- + :func:`DataFrame.{name_other}` : Return an object of same shape as + self. + + Notes + ----- + The {name} method is an application of the if-then idiom. For each + element in the calling DataFrame, if ``cond`` is ``{cond}`` the + element is used; otherwise the corresponding element from the DataFrame + ``other`` is used. + + The signature for :func:`DataFrame.where` differs from + :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to + ``np.where(m, df1, df2)``. + + For further details and examples see the ``{name}`` documentation in + :ref:`indexing `. + + Examples + -------- + >>> s = pd.Series(range(5)) + >>> s.where(s > 0) + 0 NaN + 1 1.0 + 2 2.0 + 3 3.0 + 4 4.0 + dtype: float64 + >>> s.mask(s > 0) + 0 0.0 + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: float64 + + >>> s.where(s > 1, 10) + 0 10 + 1 10 + 2 2 + 3 3 + 4 4 + dtype: int64 + >>> s.mask(s > 1, 10) + 0 0 + 1 1 + 2 10 + 3 10 + 4 10 + dtype: int64 + + >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B']) + >>> df + A B + 0 0 1 + 1 2 3 + 2 4 5 + 3 6 7 + 4 8 9 + >>> m = df % 3 == 0 + >>> df.where(m, -df) + A B + 0 0 -1 + 1 -2 3 + 2 -4 -5 + 3 6 -7 + 4 -8 9 + >>> df.where(m, -df) == np.where(m, df, -df) + A B + 0 True True + 1 True True + 2 True True + 3 True True + 4 True True + >>> df.where(m, -df) == df.mask(~m, -df) + A B + 0 True True + 1 True True + 2 True True + 3 True True + 4 True True + """ + other = com.apply_if_callable(other, self) + + if try_cast is not lib.no_default: + warnings.warn( + "try_cast keyword is deprecated and will be removed in a " + "future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return self._where(cond, other, inplace, axis, level, errors=errors) + + @doc( + where, + klass=_shared_doc_kwargs["klass"], + cond="False", + cond_rev="True", + name="mask", + name_other="where", + ) + def mask( + self, + cond, + other=np.nan, + inplace=False, + axis=None, + level=None, + errors="raise", + try_cast=lib.no_default, + ): + + inplace = validate_bool_kwarg(inplace, "inplace") + cond = com.apply_if_callable(cond, self) + + if try_cast is not lib.no_default: + warnings.warn( + "try_cast keyword is deprecated and will be removed in a " + "future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + # see gh-21891 + if not hasattr(cond, "__invert__"): + cond = np.array(cond) + + return self.where( + ~cond, + other=other, + inplace=inplace, + axis=axis, + level=level, + errors=errors, + ) + + @doc(klass=_shared_doc_kwargs["klass"]) + def shift( + self: NDFrameT, periods=1, freq=None, axis=0, fill_value=None + ) -> NDFrameT: + """ + Shift index by desired number of periods with an optional time `freq`. + + When `freq` is not passed, shift the index without realigning the data. + If `freq` is passed (in this case, the index must be date or datetime, + or it will raise a `NotImplementedError`), the index will be + increased using the periods and the `freq`. `freq` can be inferred + when specified as "infer" as long as either freq or inferred_freq + attribute is set in the index. + + Parameters + ---------- + periods : int + Number of periods to shift. Can be positive or negative. + freq : DateOffset, tseries.offsets, timedelta, or str, optional + Offset to use from the tseries module or time rule (e.g. 'EOM'). + If `freq` is specified then the index values are shifted but the + data is not realigned. That is, use `freq` if you would like to + extend the index when shifting and preserve the original data. + If `freq` is specified as "infer" then it will be inferred from + the freq or inferred_freq attributes of the index. If neither of + those attributes exist, a ValueError is thrown. + axis : {{0 or 'index', 1 or 'columns', None}}, default None + Shift direction. + fill_value : object, optional + The scalar value to use for newly introduced missing values. + the default depends on the dtype of `self`. + For numeric data, ``np.nan`` is used. + For datetime, timedelta, or period data, etc. :attr:`NaT` is used. + For extension dtypes, ``self.dtype.na_value`` is used. + + .. versionchanged:: 1.1.0 + + Returns + ------- + {klass} + Copy of input object, shifted. + + See Also + -------- + Index.shift : Shift values of Index. + DatetimeIndex.shift : Shift values of DatetimeIndex. + PeriodIndex.shift : Shift values of PeriodIndex. + tshift : Shift the time index, using the index's frequency if + available. + + Examples + -------- + >>> df = pd.DataFrame({{"Col1": [10, 20, 15, 30, 45], + ... "Col2": [13, 23, 18, 33, 48], + ... "Col3": [17, 27, 22, 37, 52]}}, + ... index=pd.date_range("2020-01-01", "2020-01-05")) + >>> df + Col1 Col2 Col3 + 2020-01-01 10 13 17 + 2020-01-02 20 23 27 + 2020-01-03 15 18 22 + 2020-01-04 30 33 37 + 2020-01-05 45 48 52 + + >>> df.shift(periods=3) + Col1 Col2 Col3 + 2020-01-01 NaN NaN NaN + 2020-01-02 NaN NaN NaN + 2020-01-03 NaN NaN NaN + 2020-01-04 10.0 13.0 17.0 + 2020-01-05 20.0 23.0 27.0 + + >>> df.shift(periods=1, axis="columns") + Col1 Col2 Col3 + 2020-01-01 NaN 10 13 + 2020-01-02 NaN 20 23 + 2020-01-03 NaN 15 18 + 2020-01-04 NaN 30 33 + 2020-01-05 NaN 45 48 + + >>> df.shift(periods=3, fill_value=0) + Col1 Col2 Col3 + 2020-01-01 0 0 0 + 2020-01-02 0 0 0 + 2020-01-03 0 0 0 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + + >>> df.shift(periods=3, freq="D") + Col1 Col2 Col3 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + 2020-01-06 15 18 22 + 2020-01-07 30 33 37 + 2020-01-08 45 48 52 + + >>> df.shift(periods=3, freq="infer") + Col1 Col2 Col3 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + 2020-01-06 15 18 22 + 2020-01-07 30 33 37 + 2020-01-08 45 48 52 + """ + if periods == 0: + return self.copy() + + if freq is None: + # when freq is None, data is shifted, index is not + axis = self._get_axis_number(axis) + new_data = self._mgr.shift( + periods=periods, axis=axis, fill_value=fill_value + ) + return self._constructor(new_data).__finalize__(self, method="shift") + + # when freq is given, index is shifted, data is not + index = self._get_axis(axis) + + if freq == "infer": + freq = getattr(index, "freq", None) + + if freq is None: + freq = getattr(index, "inferred_freq", None) + + if freq is None: + msg = "Freq was not set in the index hence cannot be inferred" + raise ValueError(msg) + + elif isinstance(freq, str): + freq = to_offset(freq) + + if isinstance(index, PeriodIndex): + orig_freq = to_offset(index.freq) + if freq != orig_freq: + assert orig_freq is not None # for mypy + raise ValueError( + f"Given freq {freq.rule_code} does not match " + f"PeriodIndex freq {orig_freq.rule_code}" + ) + new_ax = index.shift(periods) + else: + new_ax = index.shift(periods, freq) + + result = self.set_axis(new_ax, axis=axis) + return result.__finalize__(self, method="shift") + + @final + def slice_shift(self: NDFrameT, periods: int = 1, axis=0) -> NDFrameT: + """ + Equivalent to `shift` without copying data. + The shifted data will not include the dropped periods and the + shifted axis will be smaller than the original. + + .. deprecated:: 1.2.0 + slice_shift is deprecated, + use DataFrame/Series.shift instead. + + Parameters + ---------- + periods : int + Number of periods to move, can be positive or negative. + + Returns + ------- + shifted : same type as caller + + Notes + ----- + While the `slice_shift` is faster than `shift`, you may pay for it + later during alignment. + """ + + msg = ( + "The 'slice_shift' method is deprecated " + "and will be removed in a future version. " + "You can use DataFrame/Series.shift instead." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + + if periods == 0: + return self + + if periods > 0: + vslicer = slice(None, -periods) + islicer = slice(periods, None) + else: + vslicer = slice(-periods, None) + islicer = slice(None, periods) + + new_obj = self._slice(vslicer, axis=axis) + shifted_axis = self._get_axis(axis)[islicer] + new_obj.set_axis(shifted_axis, axis=axis, inplace=True) + + return new_obj.__finalize__(self, method="slice_shift") + + @final + def tshift(self: NDFrameT, periods: int = 1, freq=None, axis: Axis = 0) -> NDFrameT: + """ + Shift the time index, using the index's frequency if available. + + .. deprecated:: 1.1.0 + Use `shift` instead. + + Parameters + ---------- + periods : int + Number of periods to move, can be positive or negative. + freq : DateOffset, timedelta, or str, default None + Increment to use from the tseries module + or time rule expressed as a string (e.g. 'EOM'). + axis : {0 or ‘index’, 1 or ‘columns’, None}, default 0 + Corresponds to the axis that contains the Index. + + Returns + ------- + shifted : Series/DataFrame + + Notes + ----- + If freq is not specified then tries to use the freq or inferred_freq + attributes of the index. If neither of those attributes exist, a + ValueError is thrown + """ + warnings.warn( + ( + "tshift is deprecated and will be removed in a future version. " + "Please use shift instead." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + + if freq is None: + freq = "infer" + + return self.shift(periods, freq, axis) + + def truncate( + self: NDFrameT, before=None, after=None, axis=None, copy: bool_t = True + ) -> NDFrameT: + """ + Truncate a Series or DataFrame before and after some index value. + + This is a useful shorthand for boolean indexing based on index + values above or below certain thresholds. + + Parameters + ---------- + before : date, str, int + Truncate all rows before this index value. + after : date, str, int + Truncate all rows after this index value. + axis : {0 or 'index', 1 or 'columns'}, optional + Axis to truncate. Truncates the index (rows) by default. + copy : bool, default is True, + Return a copy of the truncated section. + + Returns + ------- + type of caller + The truncated Series or DataFrame. + + See Also + -------- + DataFrame.loc : Select a subset of a DataFrame by label. + DataFrame.iloc : Select a subset of a DataFrame by position. + + Notes + ----- + If the index being truncated contains only datetime values, + `before` and `after` may be specified as strings instead of + Timestamps. + + Examples + -------- + >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'], + ... 'B': ['f', 'g', 'h', 'i', 'j'], + ... 'C': ['k', 'l', 'm', 'n', 'o']}, + ... index=[1, 2, 3, 4, 5]) + >>> df + A B C + 1 a f k + 2 b g l + 3 c h m + 4 d i n + 5 e j o + + >>> df.truncate(before=2, after=4) + A B C + 2 b g l + 3 c h m + 4 d i n + + The columns of a DataFrame can be truncated. + + >>> df.truncate(before="A", after="B", axis="columns") + A B + 1 a f + 2 b g + 3 c h + 4 d i + 5 e j + + For Series, only rows can be truncated. + + >>> df['A'].truncate(before=2, after=4) + 2 b + 3 c + 4 d + Name: A, dtype: object + + The index values in ``truncate`` can be datetimes or string + dates. + + >>> dates = pd.date_range('2016-01-01', '2016-02-01', freq='s') + >>> df = pd.DataFrame(index=dates, data={'A': 1}) + >>> df.tail() + A + 2016-01-31 23:59:56 1 + 2016-01-31 23:59:57 1 + 2016-01-31 23:59:58 1 + 2016-01-31 23:59:59 1 + 2016-02-01 00:00:00 1 + + >>> df.truncate(before=pd.Timestamp('2016-01-05'), + ... after=pd.Timestamp('2016-01-10')).tail() + A + 2016-01-09 23:59:56 1 + 2016-01-09 23:59:57 1 + 2016-01-09 23:59:58 1 + 2016-01-09 23:59:59 1 + 2016-01-10 00:00:00 1 + + Because the index is a DatetimeIndex containing only dates, we can + specify `before` and `after` as strings. They will be coerced to + Timestamps before truncation. + + >>> df.truncate('2016-01-05', '2016-01-10').tail() + A + 2016-01-09 23:59:56 1 + 2016-01-09 23:59:57 1 + 2016-01-09 23:59:58 1 + 2016-01-09 23:59:59 1 + 2016-01-10 00:00:00 1 + + Note that ``truncate`` assumes a 0 value for any unspecified time + component (midnight). This differs from partial string slicing, which + returns any partially matching dates. + + >>> df.loc['2016-01-05':'2016-01-10', :].tail() + A + 2016-01-10 23:59:55 1 + 2016-01-10 23:59:56 1 + 2016-01-10 23:59:57 1 + 2016-01-10 23:59:58 1 + 2016-01-10 23:59:59 1 + """ + if axis is None: + axis = self._stat_axis_number + axis = self._get_axis_number(axis) + ax = self._get_axis(axis) + + # GH 17935 + # Check that index is sorted + if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing: + raise ValueError("truncate requires a sorted index") + + # if we have a date index, convert to dates, otherwise + # treat like a slice + if ax._is_all_dates: + from pandas.core.tools.datetimes import to_datetime + + before = to_datetime(before) + after = to_datetime(after) + + if before is not None and after is not None and before > after: + raise ValueError(f"Truncate: {after} must be after {before}") + + if len(ax) > 1 and ax.is_monotonic_decreasing and ax.nunique() > 1: + before, after = after, before + + slicer = [slice(None, None)] * self._AXIS_LEN + slicer[axis] = slice(before, after) + result = self.loc[tuple(slicer)] + + if isinstance(ax, MultiIndex): + setattr(result, self._get_axis_name(axis), ax.truncate(before, after)) + + if copy: + result = result.copy() + + return result + + @final + def tz_convert( + self: NDFrameT, tz, axis=0, level=None, copy: bool_t = True + ) -> NDFrameT: + """ + Convert tz-aware axis to target time zone. + + Parameters + ---------- + tz : str or tzinfo object + axis : the axis to convert + level : int, str, default None + If axis is a MultiIndex, convert a specific level. Otherwise + must be None. + copy : bool, default True + Also make a copy of the underlying data. + + Returns + ------- + {klass} + Object with time zone converted axis. + + Raises + ------ + TypeError + If the axis is tz-naive. + """ + axis = self._get_axis_number(axis) + ax = self._get_axis(axis) + + def _tz_convert(ax, tz): + if not hasattr(ax, "tz_convert"): + if len(ax) > 0: + ax_name = self._get_axis_name(axis) + raise TypeError( + f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" + ) + else: + ax = DatetimeIndex([], tz=tz) + else: + ax = ax.tz_convert(tz) + return ax + + # if a level is given it must be a MultiIndex level or + # equivalent to the axis name + if isinstance(ax, MultiIndex): + level = ax._get_level_number(level) + new_level = _tz_convert(ax.levels[level], tz) + ax = ax.set_levels(new_level, level=level) + else: + if level not in (None, 0, ax.name): + raise ValueError(f"The level {level} is not valid") + ax = _tz_convert(ax, tz) + + result = self.copy(deep=copy) + result = result.set_axis(ax, axis=axis, inplace=False) + return result.__finalize__(self, method="tz_convert") + + @final + def tz_localize( + self: NDFrameT, + tz, + axis=0, + level=None, + copy: bool_t = True, + ambiguous="raise", + nonexistent: str = "raise", + ) -> NDFrameT: + """ + Localize tz-naive index of a Series or DataFrame to target time zone. + + This operation localizes the Index. To localize the values in a + timezone-naive Series, use :meth:`Series.dt.tz_localize`. + + Parameters + ---------- + tz : str or tzinfo + axis : the axis to localize + level : int, str, default None + If axis ia a MultiIndex, localize a specific level. Otherwise + must be None. + copy : bool, default True + Also make a copy of the underlying data. + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False designates + a non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous + times. + nonexistent : str, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. Valid values are: + + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Returns + ------- + Series or DataFrame + Same type as the input. + + Raises + ------ + TypeError + If the TimeSeries is tz-aware and tz is not None. + + Examples + -------- + Localize local times: + + >>> s = pd.Series([1], + ... index=pd.DatetimeIndex(['2018-09-15 01:30:00'])) + >>> s.tz_localize('CET') + 2018-09-15 01:30:00+02:00 1 + dtype: int64 + + Be careful with DST changes. When there is sequential data, pandas + can infer the DST time: + + >>> s = pd.Series(range(7), + ... index=pd.DatetimeIndex(['2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) + >>> s.tz_localize('CET', ambiguous='infer') + 2018-10-28 01:30:00+02:00 0 + 2018-10-28 02:00:00+02:00 1 + 2018-10-28 02:30:00+02:00 2 + 2018-10-28 02:00:00+01:00 3 + 2018-10-28 02:30:00+01:00 4 + 2018-10-28 03:00:00+01:00 5 + 2018-10-28 03:30:00+01:00 6 + dtype: int64 + + In some cases, inferring the DST is impossible. In such cases, you can + pass an ndarray to the ambiguous parameter to set the DST explicitly + + >>> s = pd.Series(range(3), + ... index=pd.DatetimeIndex(['2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) + >>> s.tz_localize('CET', ambiguous=np.array([True, True, False])) + 2018-10-28 01:20:00+02:00 0 + 2018-10-28 02:36:00+02:00 1 + 2018-10-28 03:46:00+01:00 2 + dtype: int64 + + If the DST transition causes nonexistent times, you can shift these + dates forward or backward with a timedelta object or `'shift_forward'` + or `'shift_backward'`. + + >>> s = pd.Series(range(2), + ... index=pd.DatetimeIndex(['2015-03-29 02:30:00', + ... '2015-03-29 03:30:00'])) + >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_forward') + 2015-03-29 03:00:00+02:00 0 + 2015-03-29 03:30:00+02:00 1 + dtype: int64 + >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_backward') + 2015-03-29 01:59:59.999999999+01:00 0 + 2015-03-29 03:30:00+02:00 1 + dtype: int64 + >>> s.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) + 2015-03-29 03:30:00+02:00 0 + 2015-03-29 03:30:00+02:00 1 + dtype: int64 + """ + nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") + if nonexistent not in nonexistent_options and not isinstance( + nonexistent, timedelta + ): + raise ValueError( + "The nonexistent argument must be one of 'raise', " + "'NaT', 'shift_forward', 'shift_backward' or " + "a timedelta object" + ) + + axis = self._get_axis_number(axis) + ax = self._get_axis(axis) + + def _tz_localize(ax, tz, ambiguous, nonexistent): + if not hasattr(ax, "tz_localize"): + if len(ax) > 0: + ax_name = self._get_axis_name(axis) + raise TypeError( + f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" + ) + else: + ax = DatetimeIndex([], tz=tz) + else: + ax = ax.tz_localize(tz, ambiguous=ambiguous, nonexistent=nonexistent) + return ax + + # if a level is given it must be a MultiIndex level or + # equivalent to the axis name + if isinstance(ax, MultiIndex): + level = ax._get_level_number(level) + new_level = _tz_localize(ax.levels[level], tz, ambiguous, nonexistent) + ax = ax.set_levels(new_level, level=level) + else: + if level not in (None, 0, ax.name): + raise ValueError(f"The level {level} is not valid") + ax = _tz_localize(ax, tz, ambiguous, nonexistent) + + result = self.copy(deep=copy) + result = result.set_axis(ax, axis=axis, inplace=False) + return result.__finalize__(self, method="tz_localize") + + # ---------------------------------------------------------------------- + # Numeric Methods + + @final + def describe( + self: NDFrameT, + percentiles=None, + include=None, + exclude=None, + datetime_is_numeric=False, + ) -> NDFrameT: + """ + Generate descriptive statistics. + + Descriptive statistics include those that summarize the central + tendency, dispersion and shape of a + dataset's distribution, excluding ``NaN`` values. + + Analyzes both numeric and object series, as well + as ``DataFrame`` column sets of mixed data types. The output + will vary depending on what is provided. Refer to the notes + below for more detail. + + Parameters + ---------- + percentiles : list-like of numbers, optional + The percentiles to include in the output. All should + fall between 0 and 1. The default is + ``[.25, .5, .75]``, which returns the 25th, 50th, and + 75th percentiles. + include : 'all', list-like of dtypes or None (default), optional + A white list of data types to include in the result. Ignored + for ``Series``. Here are the options: + + - 'all' : All columns of the input will be included in the output. + - A list-like of dtypes : Limits the results to the + provided data types. + To limit the result to numeric types submit + ``numpy.number``. To limit it instead to object columns submit + the ``numpy.object`` data type. Strings + can also be used in the style of + ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To + select pandas categorical columns, use ``'category'`` + - None (default) : The result will include all numeric columns. + exclude : list-like of dtypes or None (default), optional, + A black list of data types to omit from the result. Ignored + for ``Series``. Here are the options: + + - A list-like of dtypes : Excludes the provided data types + from the result. To exclude numeric types submit + ``numpy.number``. To exclude object columns submit the data + type ``numpy.object``. Strings can also be used in the style of + ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``). To + exclude pandas categorical columns, use ``'category'`` + - None (default) : The result will exclude nothing. + datetime_is_numeric : bool, default False + Whether to treat datetime dtypes as numeric. This affects statistics + calculated for the column. For DataFrame input, this also + controls whether datetime columns are included by default. + + .. versionadded:: 1.1.0 + + Returns + ------- + Series or DataFrame + Summary statistics of the Series or Dataframe provided. + + See Also + -------- + DataFrame.count: Count number of non-NA/null observations. + DataFrame.max: Maximum of the values in the object. + DataFrame.min: Minimum of the values in the object. + DataFrame.mean: Mean of the values. + DataFrame.std: Standard deviation of the observations. + DataFrame.select_dtypes: Subset of a DataFrame including/excluding + columns based on their dtype. + + Notes + ----- + For numeric data, the result's index will include ``count``, + ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and + upper percentiles. By default the lower percentile is ``25`` and the + upper percentile is ``75``. The ``50`` percentile is the + same as the median. + + For object data (e.g. strings or timestamps), the result's index + will include ``count``, ``unique``, ``top``, and ``freq``. The ``top`` + is the most common value. The ``freq`` is the most common value's + frequency. Timestamps also include the ``first`` and ``last`` items. + + If multiple object values have the highest count, then the + ``count`` and ``top`` results will be arbitrarily chosen from + among those with the highest count. + + For mixed data types provided via a ``DataFrame``, the default is to + return only an analysis of numeric columns. If the dataframe consists + only of object and categorical data without any numeric columns, the + default is to return an analysis of both the object and categorical + columns. If ``include='all'`` is provided as an option, the result + will include a union of attributes of each type. + + The `include` and `exclude` parameters can be used to limit + which columns in a ``DataFrame`` are analyzed for the output. + The parameters are ignored when analyzing a ``Series``. + + Examples + -------- + Describing a numeric ``Series``. + + >>> s = pd.Series([1, 2, 3]) + >>> s.describe() + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + dtype: float64 + + Describing a categorical ``Series``. + + >>> s = pd.Series(['a', 'a', 'b', 'c']) + >>> s.describe() + count 4 + unique 3 + top a + freq 2 + dtype: object + + Describing a timestamp ``Series``. + + >>> s = pd.Series([ + ... np.datetime64("2000-01-01"), + ... np.datetime64("2010-01-01"), + ... np.datetime64("2010-01-01") + ... ]) + >>> s.describe(datetime_is_numeric=True) + count 3 + mean 2006-09-01 08:00:00 + min 2000-01-01 00:00:00 + 25% 2004-12-31 12:00:00 + 50% 2010-01-01 00:00:00 + 75% 2010-01-01 00:00:00 + max 2010-01-01 00:00:00 + dtype: object + + Describing a ``DataFrame``. By default only numeric fields + are returned. + + >>> df = pd.DataFrame({'categorical': pd.Categorical(['d','e','f']), + ... 'numeric': [1, 2, 3], + ... 'object': ['a', 'b', 'c'] + ... }) + >>> df.describe() + numeric + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + + Describing all columns of a ``DataFrame`` regardless of data type. + + >>> df.describe(include='all') # doctest: +SKIP + categorical numeric object + count 3 3.0 3 + unique 3 NaN 3 + top f NaN a + freq 1 NaN 1 + mean NaN 2.0 NaN + std NaN 1.0 NaN + min NaN 1.0 NaN + 25% NaN 1.5 NaN + 50% NaN 2.0 NaN + 75% NaN 2.5 NaN + max NaN 3.0 NaN + + Describing a column from a ``DataFrame`` by accessing it as + an attribute. + + >>> df.numeric.describe() + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + Name: numeric, dtype: float64 + + Including only numeric columns in a ``DataFrame`` description. + + >>> df.describe(include=[np.number]) + numeric + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + + Including only string columns in a ``DataFrame`` description. + + >>> df.describe(include=[object]) # doctest: +SKIP + object + count 3 + unique 3 + top a + freq 1 + + Including only categorical columns from a ``DataFrame`` description. + + >>> df.describe(include=['category']) + categorical + count 3 + unique 3 + top d + freq 1 + + Excluding numeric columns from a ``DataFrame`` description. + + >>> df.describe(exclude=[np.number]) # doctest: +SKIP + categorical object + count 3 3 + unique 3 3 + top f a + freq 1 1 + + Excluding object columns from a ``DataFrame`` description. + + >>> df.describe(exclude=[object]) # doctest: +SKIP + categorical numeric + count 3 3.0 + unique 3 NaN + top f NaN + freq 1 NaN + mean NaN 2.0 + std NaN 1.0 + min NaN 1.0 + 25% NaN 1.5 + 50% NaN 2.0 + 75% NaN 2.5 + max NaN 3.0 + """ + return describe_ndframe( + obj=self, + include=include, + exclude=exclude, + datetime_is_numeric=datetime_is_numeric, + percentiles=percentiles, + ) + + @final + def pct_change( + self: NDFrameT, + periods=1, + fill_method="pad", + limit=None, + freq=None, + **kwargs, + ) -> NDFrameT: + """ + Percentage change between the current and a prior element. + + Computes the percentage change from the immediately previous row by + default. This is useful in comparing the percentage of change in a time + series of elements. + + Parameters + ---------- + periods : int, default 1 + Periods to shift for forming percent change. + fill_method : str, default 'pad' + How to handle NAs before computing percent changes. + limit : int, default None + The number of consecutive NAs to fill before stopping. + freq : DateOffset, timedelta, or str, optional + Increment to use from time series API (e.g. 'M' or BDay()). + **kwargs + Additional keyword arguments are passed into + `DataFrame.shift` or `Series.shift`. + + Returns + ------- + chg : Series or DataFrame + The same type as the calling object. + + See Also + -------- + Series.diff : Compute the difference of two elements in a Series. + DataFrame.diff : Compute the difference of two elements in a DataFrame. + Series.shift : Shift the index by some number of periods. + DataFrame.shift : Shift the index by some number of periods. + + Examples + -------- + **Series** + + >>> s = pd.Series([90, 91, 85]) + >>> s + 0 90 + 1 91 + 2 85 + dtype: int64 + + >>> s.pct_change() + 0 NaN + 1 0.011111 + 2 -0.065934 + dtype: float64 + + >>> s.pct_change(periods=2) + 0 NaN + 1 NaN + 2 -0.055556 + dtype: float64 + + See the percentage change in a Series where filling NAs with last + valid observation forward to next valid. + + >>> s = pd.Series([90, 91, None, 85]) + >>> s + 0 90.0 + 1 91.0 + 2 NaN + 3 85.0 + dtype: float64 + + >>> s.pct_change(fill_method='ffill') + 0 NaN + 1 0.011111 + 2 0.000000 + 3 -0.065934 + dtype: float64 + + **DataFrame** + + Percentage change in French franc, Deutsche Mark, and Italian lira from + 1980-01-01 to 1980-03-01. + + >>> df = pd.DataFrame({ + ... 'FR': [4.0405, 4.0963, 4.3149], + ... 'GR': [1.7246, 1.7482, 1.8519], + ... 'IT': [804.74, 810.01, 860.13]}, + ... index=['1980-01-01', '1980-02-01', '1980-03-01']) + >>> df + FR GR IT + 1980-01-01 4.0405 1.7246 804.74 + 1980-02-01 4.0963 1.7482 810.01 + 1980-03-01 4.3149 1.8519 860.13 + + >>> df.pct_change() + FR GR IT + 1980-01-01 NaN NaN NaN + 1980-02-01 0.013810 0.013684 0.006549 + 1980-03-01 0.053365 0.059318 0.061876 + + Percentage of change in GOOG and APPL stock volume. Shows computing + the percentage change between columns. + + >>> df = pd.DataFrame({ + ... '2016': [1769950, 30586265], + ... '2015': [1500923, 40912316], + ... '2014': [1371819, 41403351]}, + ... index=['GOOG', 'APPL']) + >>> df + 2016 2015 2014 + GOOG 1769950 1500923 1371819 + APPL 30586265 40912316 41403351 + + >>> df.pct_change(axis='columns', periods=-1) + 2016 2015 2014 + GOOG 0.179241 0.094112 NaN + APPL -0.252395 -0.011860 NaN + """ + axis = self._get_axis_number(kwargs.pop("axis", self._stat_axis_name)) + if fill_method is None: + data = self + else: + _data = self.fillna(method=fill_method, axis=axis, limit=limit) + assert _data is not None # needed for mypy + data = _data + + shifted = data.shift(periods=periods, freq=freq, axis=axis, **kwargs) + # Unsupported left operand type for / ("NDFrameT") + rs = data / shifted - 1 # type: ignore[operator] + if freq is not None: + # Shift method is implemented differently when freq is not None + # We want to restore the original index + rs = rs.loc[~rs.index.duplicated()] + rs = rs.reindex_like(data) + return rs + + @final + def _agg_by_level( + self, + name: str, + axis: Axis = 0, + level: Level = 0, + skipna: bool_t = True, + **kwargs, + ): + if axis is None: + raise ValueError("Must specify 'axis' when aggregating by level.") + grouped = self.groupby(level=level, axis=axis, sort=False) + if hasattr(grouped, name) and skipna: + return getattr(grouped, name)(**kwargs) + axis = self._get_axis_number(axis) + method = getattr(type(self), name) + applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwargs) + return grouped.aggregate(applyf) + + @final + def _logical_func( + self, + name: str, + func, + axis: Axis = 0, + bool_only: bool_t | None = None, + skipna: bool_t = True, + level: Level | None = None, + **kwargs, + ) -> Series | bool_t: + nv.validate_logical_func((), kwargs, fname=name) + validate_bool_kwarg(skipna, "skipna", none_allowed=False) + if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead. df.any(level=1) should use df.groupby(level=1).any()", + FutureWarning, + stacklevel=find_stack_level(), + ) + if bool_only is not None: + raise NotImplementedError( + "Option bool_only is not implemented with option level." + ) + return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) + + if self.ndim > 1 and axis is None: + # Reduce along one dimension then the other, to simplify DataFrame._reduce + res = self._logical_func( + name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs + ) + return res._logical_func(name, func, skipna=skipna, **kwargs) + + if ( + self.ndim > 1 + and axis == 1 + and len(self._mgr.arrays) > 1 + # TODO(EA2D): special-case not needed + and all(x.ndim == 2 for x in self._mgr.arrays) + and bool_only is not None + and not kwargs + ): + # Fastpath avoiding potentially expensive transpose + obj = self + if bool_only: + obj = self._get_bool_data() + return obj._reduce_axis1(name, func, skipna=skipna) + + return self._reduce( + func, + name=name, + axis=axis, + skipna=skipna, + numeric_only=bool_only, + filter_type="bool", + ) + + def any( + self, + axis: Axis = 0, + bool_only: bool_t | None = None, + skipna: bool_t = True, + level: Level | None = None, + **kwargs, + ) -> Series | bool_t: + return self._logical_func( + "any", nanops.nanany, axis, bool_only, skipna, level, **kwargs + ) + + def all( + self, + axis: Axis = 0, + bool_only: bool_t | None = None, + skipna: bool_t = True, + level: Level | None = None, + **kwargs, + ) -> Series | bool_t: + return self._logical_func( + "all", nanops.nanall, axis, bool_only, skipna, level, **kwargs + ) + + @final + def _accum_func( + self, + name: str, + func, + axis: Axis | None = None, + skipna: bool_t = True, + *args, + **kwargs, + ): + skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name) + if axis is None: + axis = self._stat_axis_number + else: + axis = self._get_axis_number(axis) + + if axis == 1: + return self.T._accum_func( + name, func, axis=0, skipna=skipna, *args, **kwargs + ).T + + def block_accum_func(blk_values): + values = blk_values.T if hasattr(blk_values, "T") else blk_values + + result = nanops.na_accum_func(values, func, skipna=skipna) + + result = result.T if hasattr(result, "T") else result + return result + + result = self._mgr.apply(block_accum_func) + + return self._constructor(result).__finalize__(self, method=name) + + def cummax(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): + return self._accum_func( + "cummax", np.maximum.accumulate, axis, skipna, *args, **kwargs + ) + + def cummin(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): + return self._accum_func( + "cummin", np.minimum.accumulate, axis, skipna, *args, **kwargs + ) + + def cumsum(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): + return self._accum_func("cumsum", np.cumsum, axis, skipna, *args, **kwargs) + + def cumprod(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): + return self._accum_func("cumprod", np.cumprod, axis, skipna, *args, **kwargs) + + @final + def _stat_function_ddof( + self, + name: str, + func, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + ddof: int = 1, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + nv.validate_stat_ddof_func((), kwargs, fname=name) + validate_bool_kwarg(skipna, "skipna", none_allowed=False) + if axis is None: + axis = self._stat_axis_number + if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead. df.var(level=1) should use df.groupby(level=1).var().", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._agg_by_level( + name, axis=axis, level=level, skipna=skipna, ddof=ddof + ) + return self._reduce( + func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof + ) + + def sem( + self, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + ddof: int = 1, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + return self._stat_function_ddof( + "sem", nanops.nansem, axis, skipna, level, ddof, numeric_only, **kwargs + ) + + def var( + self, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + ddof: int = 1, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + return self._stat_function_ddof( + "var", nanops.nanvar, axis, skipna, level, ddof, numeric_only, **kwargs + ) + + def std( + self, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + ddof: int = 1, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + return self._stat_function_ddof( + "std", nanops.nanstd, axis, skipna, level, ddof, numeric_only, **kwargs + ) + + @final + def _stat_function( + self, + name: str, + func, + axis: Axis | None | lib.NoDefault = None, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + **kwargs, + ): + if name == "median": + nv.validate_median((), kwargs) + else: + nv.validate_stat_func((), kwargs, fname=name) + + validate_bool_kwarg(skipna, "skipna", none_allowed=False) + + if axis is None and level is None and self.ndim > 1: + # user must have explicitly passed axis=None + # GH#21597 + warnings.warn( + f"In a future version, DataFrame.{name}(axis=None) will return a " + f"scalar {name} over the entire DataFrame. To retain the old " + f"behavior, use 'frame.{name}(axis=0)' or just 'frame.{name}()'", + FutureWarning, + stacklevel=find_stack_level(), + ) + if axis is lib.no_default: + axis = None + + if axis is None: + axis = self._stat_axis_number + axis = cast(Axis, axis) + if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead. df.median(level=1) should use df.groupby(level=1).median().", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._agg_by_level( + name, axis=axis, level=level, skipna=skipna, numeric_only=numeric_only + ) + return self._reduce( + func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only + ) + + def min( + self, + axis: Axis | None | lib.NoDefault = lib.no_default, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + **kwargs, + ): + return self._stat_function( + "min", + nanops.nanmin, + axis, + skipna, + level, + numeric_only, + **kwargs, + ) + + def max( + self, + axis: Axis | None | lib.NoDefault = lib.no_default, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + **kwargs, + ): + return self._stat_function( + "max", + nanops.nanmax, + axis, + skipna, + level, + numeric_only, + **kwargs, + ) + + def mean( + self, + axis: Axis | None | lib.NoDefault = lib.no_default, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + return self._stat_function( + "mean", nanops.nanmean, axis, skipna, level, numeric_only, **kwargs + ) + + def median( + self, + axis: Axis | None | lib.NoDefault = lib.no_default, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + return self._stat_function( + "median", nanops.nanmedian, axis, skipna, level, numeric_only, **kwargs + ) + + def skew( + self, + axis: Axis | None | lib.NoDefault = lib.no_default, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + return self._stat_function( + "skew", nanops.nanskew, axis, skipna, level, numeric_only, **kwargs + ) + + def kurt( + self, + axis: Axis | None | lib.NoDefault = lib.no_default, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + return self._stat_function( + "kurt", nanops.nankurt, axis, skipna, level, numeric_only, **kwargs + ) + + kurtosis = kurt + + @final + def _min_count_stat_function( + self, + name: str, + func, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + min_count: int = 0, + **kwargs, + ): + if name == "sum": + nv.validate_sum((), kwargs) + elif name == "prod": + nv.validate_prod((), kwargs) + else: + nv.validate_stat_func((), kwargs, fname=name) + + validate_bool_kwarg(skipna, "skipna", none_allowed=False) + + if axis is None: + axis = self._stat_axis_number + if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead. df.sum(level=1) should use df.groupby(level=1).sum().", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._agg_by_level( + name, + axis=axis, + level=level, + skipna=skipna, + min_count=min_count, + numeric_only=numeric_only, + ) + + return self._reduce( + func, + name=name, + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + min_count=min_count, + ) + + def sum( + self, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + min_count=0, + **kwargs, + ): + return self._min_count_stat_function( + "sum", nanops.nansum, axis, skipna, level, numeric_only, min_count, **kwargs + ) + + def prod( + self, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + min_count: int = 0, + **kwargs, + ): + return self._min_count_stat_function( + "prod", + nanops.nanprod, + axis, + skipna, + level, + numeric_only, + min_count, + **kwargs, + ) + + product = prod + + def mad( + self, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + ) -> Series | float: + """ + {desc} + + Parameters + ---------- + axis : {axis_descr} + Axis for the function to be applied on. + skipna : bool, default True + Exclude NA/null values when computing the result. + level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a {name1}. + + Returns + ------- + {name1} or {name2} (if level specified)\ + {see_also}\ + {examples} + """ + if not is_bool(skipna): + warnings.warn( + "Passing None for skipna is deprecated and will raise in a future" + "version. Pass True instead. Only boolean values will be allowed " + "in the future.", + FutureWarning, + stacklevel=find_stack_level(), + ) + skipna = True + if axis is None: + axis = self._stat_axis_number + if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead. df.mad(level=1) should use df.groupby(level=1).mad()", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._agg_by_level("mad", axis=axis, level=level, skipna=skipna) + + data = self._get_numeric_data() + if axis == 0: + demeaned = data - data.mean(axis=0) + else: + demeaned = data.sub(data.mean(axis=1), axis=0) + return np.abs(demeaned).mean(axis=axis, skipna=skipna) + + @classmethod + def _add_numeric_operations(cls): + """ + Add the operations to the cls; evaluate the doc strings again + """ + axis_descr, name1, name2 = _doc_params(cls) + + @doc( + _bool_doc, + desc=_any_desc, + name1=name1, + name2=name2, + axis_descr=axis_descr, + see_also=_any_see_also, + examples=_any_examples, + empty_value=False, + ) + def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): + return NDFrame.any(self, axis, bool_only, skipna, level, **kwargs) + + setattr(cls, "any", any) + + @doc( + _bool_doc, + desc=_all_desc, + name1=name1, + name2=name2, + axis_descr=axis_descr, + see_also=_all_see_also, + examples=_all_examples, + empty_value=True, + ) + def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): + return NDFrame.all(self, axis, bool_only, skipna, level, **kwargs) + + setattr(cls, "all", all) + + # error: Argument 1 to "doc" has incompatible type "Optional[str]"; expected + # "Union[str, Callable[..., Any]]" + @doc( + NDFrame.mad.__doc__, # type: ignore[arg-type] + desc="Return the mean absolute deviation of the values " + "over the requested axis.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + see_also="", + examples="", + ) + def mad(self, axis=None, skipna=True, level=None): + return NDFrame.mad(self, axis, skipna, level) + + setattr(cls, "mad", mad) + + @doc( + _num_ddof_doc, + desc="Return unbiased standard error of the mean over requested " + "axis.\n\nNormalized by N-1 by default. This can be changed " + "using the ddof argument", + name1=name1, + name2=name2, + axis_descr=axis_descr, + notes="", + examples="", + ) + def sem( + self, + axis=None, + skipna=True, + level=None, + ddof=1, + numeric_only=None, + **kwargs, + ): + return NDFrame.sem(self, axis, skipna, level, ddof, numeric_only, **kwargs) + + setattr(cls, "sem", sem) + + @doc( + _num_ddof_doc, + desc="Return unbiased variance over requested axis.\n\nNormalized by " + "N-1 by default. This can be changed using the ddof argument.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + notes="", + examples=_var_examples, + ) + def var( + self, + axis=None, + skipna=True, + level=None, + ddof=1, + numeric_only=None, + **kwargs, + ): + return NDFrame.var(self, axis, skipna, level, ddof, numeric_only, **kwargs) + + setattr(cls, "var", var) + + @doc( + _num_ddof_doc, + desc="Return sample standard deviation over requested axis." + "\n\nNormalized by N-1 by default. This can be changed using the " + "ddof argument.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + notes=_std_notes, + examples=_std_examples, + ) + def std( + self, + axis=None, + skipna=True, + level=None, + ddof=1, + numeric_only=None, + **kwargs, + ): + return NDFrame.std(self, axis, skipna, level, ddof, numeric_only, **kwargs) + + setattr(cls, "std", std) + + @doc( + _cnum_doc, + desc="minimum", + name1=name1, + name2=name2, + axis_descr=axis_descr, + accum_func_name="min", + examples=_cummin_examples, + ) + def cummin(self, axis=None, skipna=True, *args, **kwargs): + return NDFrame.cummin(self, axis, skipna, *args, **kwargs) + + setattr(cls, "cummin", cummin) + + @doc( + _cnum_doc, + desc="maximum", + name1=name1, + name2=name2, + axis_descr=axis_descr, + accum_func_name="max", + examples=_cummax_examples, + ) + def cummax(self, axis=None, skipna=True, *args, **kwargs): + return NDFrame.cummax(self, axis, skipna, *args, **kwargs) + + setattr(cls, "cummax", cummax) + + @doc( + _cnum_doc, + desc="sum", + name1=name1, + name2=name2, + axis_descr=axis_descr, + accum_func_name="sum", + examples=_cumsum_examples, + ) + def cumsum(self, axis=None, skipna=True, *args, **kwargs): + return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) + + setattr(cls, "cumsum", cumsum) + + @doc( + _cnum_doc, + desc="product", + name1=name1, + name2=name2, + axis_descr=axis_descr, + accum_func_name="prod", + examples=_cumprod_examples, + ) + def cumprod(self, axis=None, skipna=True, *args, **kwargs): + return NDFrame.cumprod(self, axis, skipna, *args, **kwargs) + + setattr(cls, "cumprod", cumprod) + + @doc( + _num_doc, + desc="Return the sum of the values over the requested axis.\n\n" + "This is equivalent to the method ``numpy.sum``.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count=_min_count_stub, + see_also=_stat_func_see_also, + examples=_sum_examples, + ) + def sum( + self, + axis=None, + skipna=True, + level=None, + numeric_only=None, + min_count=0, + **kwargs, + ): + return NDFrame.sum( + self, axis, skipna, level, numeric_only, min_count, **kwargs + ) + + setattr(cls, "sum", sum) + + @doc( + _num_doc, + desc="Return the product of the values over the requested axis.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count=_min_count_stub, + see_also=_stat_func_see_also, + examples=_prod_examples, + ) + def prod( + self, + axis=None, + skipna=True, + level=None, + numeric_only=None, + min_count=0, + **kwargs, + ): + return NDFrame.prod( + self, axis, skipna, level, numeric_only, min_count, **kwargs + ) + + setattr(cls, "prod", prod) + cls.product = prod + + @doc( + _num_doc, + desc="Return the mean of the values over the requested axis.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count="", + see_also="", + examples="", + ) + def mean( + self, + axis: int | None | lib.NoDefault = lib.no_default, + skipna=True, + level=None, + numeric_only=None, + **kwargs, + ): + return NDFrame.mean(self, axis, skipna, level, numeric_only, **kwargs) + + setattr(cls, "mean", mean) + + @doc( + _num_doc, + desc="Return unbiased skew over requested axis.\n\nNormalized by N-1.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count="", + see_also="", + examples="", + ) + def skew( + self, + axis: int | None | lib.NoDefault = lib.no_default, + skipna=True, + level=None, + numeric_only=None, + **kwargs, + ): + return NDFrame.skew(self, axis, skipna, level, numeric_only, **kwargs) + + setattr(cls, "skew", skew) + + @doc( + _num_doc, + desc="Return unbiased kurtosis over requested axis.\n\n" + "Kurtosis obtained using Fisher's definition of\n" + "kurtosis (kurtosis of normal == 0.0). Normalized " + "by N-1.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count="", + see_also="", + examples="", + ) + def kurt( + self, + axis: Axis | None | lib.NoDefault = lib.no_default, + skipna=True, + level=None, + numeric_only=None, + **kwargs, + ): + return NDFrame.kurt(self, axis, skipna, level, numeric_only, **kwargs) + + setattr(cls, "kurt", kurt) + cls.kurtosis = kurt + + @doc( + _num_doc, + desc="Return the median of the values over the requested axis.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count="", + see_also="", + examples="", + ) + def median( + self, + axis: int | None | lib.NoDefault = lib.no_default, + skipna=True, + level=None, + numeric_only=None, + **kwargs, + ): + return NDFrame.median(self, axis, skipna, level, numeric_only, **kwargs) + + setattr(cls, "median", median) + + # error: Untyped decorator makes function "max" untyped + @doc( # type: ignore[misc] + _num_doc, + desc="Return the maximum of the values over the requested axis.\n\n" + "If you want the *index* of the maximum, use ``idxmax``. This is " + "the equivalent of the ``numpy.ndarray`` method ``argmax``.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count="", + see_also=_stat_func_see_also, + examples=_max_examples, + ) + def max( + self, + axis: int | None | lib.NoDefault = lib.no_default, + skipna=True, + level=None, + numeric_only=None, + **kwargs, + ): + return NDFrame.max(self, axis, skipna, level, numeric_only, **kwargs) + + setattr(cls, "max", max) + + # error: Untyped decorator makes function "max" untyped + @doc( # type: ignore[misc] + _num_doc, + desc="Return the minimum of the values over the requested axis.\n\n" + "If you want the *index* of the minimum, use ``idxmin``. This is " + "the equivalent of the ``numpy.ndarray`` method ``argmin``.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count="", + see_also=_stat_func_see_also, + examples=_min_examples, + ) + def min( + self, + axis: int | None | lib.NoDefault = lib.no_default, + skipna=True, + level=None, + numeric_only=None, + **kwargs, + ): + return NDFrame.min(self, axis, skipna, level, numeric_only, **kwargs) + + setattr(cls, "min", min) + + @final + @doc(Rolling) + def rolling( + self, + window: int | timedelta | BaseOffset | BaseIndexer, + min_periods: int | None = None, + center: bool_t = False, + win_type: str | None = None, + on: str | None = None, + axis: Axis = 0, + closed: str | None = None, + method: str = "single", + ): + axis = self._get_axis_number(axis) + + if win_type is not None: + return Window( + self, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + axis=axis, + closed=closed, + method=method, + ) + + return Rolling( + self, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + axis=axis, + closed=closed, + method=method, + ) + + @final + @doc(Expanding) + def expanding( + self, + min_periods: int = 1, + center: bool_t | None = None, + axis: Axis = 0, + method: str = "single", + ) -> Expanding: + axis = self._get_axis_number(axis) + if center is not None: + warnings.warn( + "The `center` argument on `expanding` will be removed in the future.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + center = False + + return Expanding( + self, min_periods=min_periods, center=center, axis=axis, method=method + ) + + @final + @doc(ExponentialMovingWindow) + def ewm( + self, + com: float | None = None, + span: float | None = None, + halflife: float | TimedeltaConvertibleTypes | None = None, + alpha: float | None = None, + min_periods: int | None = 0, + adjust: bool_t = True, + ignore_na: bool_t = False, + axis: Axis = 0, + times: str | np.ndarray | DataFrame | Series | None = None, + method: str = "single", + ) -> ExponentialMovingWindow: + axis = self._get_axis_number(axis) + return ExponentialMovingWindow( + self, + com=com, + span=span, + halflife=halflife, + alpha=alpha, + min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na, + axis=axis, + times=times, + method=method, + ) + + # ---------------------------------------------------------------------- + # Arithmetic Methods + + @final + def _inplace_method(self, other, op): + """ + Wrap arithmetic method to operate inplace. + """ + result = op(self, other) + + if ( + self.ndim == 1 + and result._indexed_same(self) + and is_dtype_equal(result.dtype, self.dtype) + ): + # GH#36498 this inplace op can _actually_ be inplace. + self._values[:] = result._values + return self + + # Delete cacher + self._reset_cacher() + + # this makes sure that we are aligned like the input + # we are updating inplace so we want to ignore is_copy + self._update_inplace( + result.reindex_like(self, copy=False), verify_is_copy=False + ) + return self + + def __iadd__(self, other): + # error: Unsupported left operand type for + ("Type[NDFrame]") + return self._inplace_method(other, type(self).__add__) # type: ignore[operator] + + def __isub__(self, other): + # error: Unsupported left operand type for - ("Type[NDFrame]") + return self._inplace_method(other, type(self).__sub__) # type: ignore[operator] + + def __imul__(self, other): + # error: Unsupported left operand type for * ("Type[NDFrame]") + return self._inplace_method(other, type(self).__mul__) # type: ignore[operator] + + def __itruediv__(self, other): + # error: Unsupported left operand type for / ("Type[NDFrame]") + return self._inplace_method( + other, type(self).__truediv__ # type: ignore[operator] + ) + + def __ifloordiv__(self, other): + # error: Unsupported left operand type for // ("Type[NDFrame]") + return self._inplace_method( + other, type(self).__floordiv__ # type: ignore[operator] + ) + + def __imod__(self, other): + # error: Unsupported left operand type for % ("Type[NDFrame]") + return self._inplace_method(other, type(self).__mod__) # type: ignore[operator] + + def __ipow__(self, other): + # error: Unsupported left operand type for ** ("Type[NDFrame]") + return self._inplace_method(other, type(self).__pow__) # type: ignore[operator] + + def __iand__(self, other): + # error: Unsupported left operand type for & ("Type[NDFrame]") + return self._inplace_method(other, type(self).__and__) # type: ignore[operator] + + def __ior__(self, other): + # error: Unsupported left operand type for | ("Type[NDFrame]") + return self._inplace_method(other, type(self).__or__) # type: ignore[operator] + + def __ixor__(self, other): + # error: Unsupported left operand type for ^ ("Type[NDFrame]") + return self._inplace_method(other, type(self).__xor__) # type: ignore[operator] + + # ---------------------------------------------------------------------- + # Misc methods + + @final + def _find_valid_index(self, *, how: str) -> Hashable | None: + """ + Retrieves the index of the first valid value. + + Parameters + ---------- + how : {'first', 'last'} + Use this parameter to change between the first or last valid index. + + Returns + ------- + idx_first_valid : type of index + """ + idxpos = find_valid_index(self._values, how=how) + if idxpos is None: + return None + return self.index[idxpos] + + @final + @doc(position="first", klass=_shared_doc_kwargs["klass"]) + def first_valid_index(self) -> Hashable | None: + """ + Return index for {position} non-NA value or None, if no non-NA value is found. + + Returns + ------- + scalar : type of index + + Notes + ----- + If all elements are non-NA/null, returns None. + Also returns None for empty {klass}. + """ + return self._find_valid_index(how="first") + + @final + @doc(first_valid_index, position="last", klass=_shared_doc_kwargs["klass"]) + def last_valid_index(self) -> Hashable | None: + return self._find_valid_index(how="last") + + +def _doc_params(cls): + """Return a tuple of the doc params.""" + axis_descr = ( + f"{{{', '.join([f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS)])}}}" + ) + name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else "scalar" + name2 = cls.__name__ + return axis_descr, name, name2 + + +_num_doc = """ +{desc} + +Parameters +---------- +axis : {axis_descr} + Axis for the function to be applied on. +skipna : bool, default True + Exclude NA/null values when computing the result. +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a {name1}. +numeric_only : bool, default None + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. Not implemented for Series. +{min_count}\ +**kwargs + Additional keyword arguments to be passed to the function. + +Returns +------- +{name1} or {name2} (if level specified)\ +{see_also}\ +{examples} +""" + +_num_ddof_doc = """ +{desc} + +Parameters +---------- +axis : {axis_descr} +skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a {name1}. +ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. +numeric_only : bool, default None + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. Not implemented for Series. + +Returns +------- +{name1} or {name2} (if level specified) \ +{notes}\ +{examples} +""" + +_std_notes = """ + +Notes +----- +To have the same behaviour as `numpy.std`, use `ddof=0` (instead of the +default `ddof=1`)""" + +_std_examples = """ + +Examples +-------- +>>> df = pd.DataFrame({'person_id': [0, 1, 2, 3], +... 'age': [21, 25, 62, 43], +... 'height': [1.61, 1.87, 1.49, 2.01]} +... ).set_index('person_id') +>>> df + age height +person_id +0 21 1.61 +1 25 1.87 +2 62 1.49 +3 43 2.01 + +The standard deviation of the columns can be found as follows: + +>>> df.std() +age 18.786076 +height 0.237417 + +Alternatively, `ddof=0` can be set to normalize by N instead of N-1: + +>>> df.std(ddof=0) +age 16.269219 +height 0.205609""" + +_var_examples = """ + +Examples +-------- +>>> df = pd.DataFrame({'person_id': [0, 1, 2, 3], +... 'age': [21, 25, 62, 43], +... 'height': [1.61, 1.87, 1.49, 2.01]} +... ).set_index('person_id') +>>> df + age height +person_id +0 21 1.61 +1 25 1.87 +2 62 1.49 +3 43 2.01 + +>>> df.var() +age 352.916667 +height 0.056367 + +Alternatively, ``ddof=0`` can be set to normalize by N instead of N-1: + +>>> df.var(ddof=0) +age 264.687500 +height 0.042275""" + +_bool_doc = """ +{desc} + +Parameters +---------- +axis : {{0 or 'index', 1 or 'columns', None}}, default 0 + Indicate which axis or axes should be reduced. + + * 0 / 'index' : reduce the index, return a Series whose index is the + original column labels. + * 1 / 'columns' : reduce the columns, return a Series whose index is the + original index. + * None : reduce all axes, return a scalar. + +bool_only : bool, default None + Include only boolean columns. If None, will attempt to use everything, + then use only boolean data. Not implemented for Series. +skipna : bool, default True + Exclude NA/null values. If the entire row/column is NA and skipna is + True, then the result will be {empty_value}, as for an empty row/column. + If skipna is False, then NA are treated as True, because these are not + equal to zero. +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a {name1}. +**kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + +Returns +------- +{name1} or {name2} + If level is specified, then, {name2} is returned; otherwise, {name1} + is returned. + +{see_also} +{examples}""" + +_all_desc = """\ +Return whether all elements are True, potentially over an axis. + +Returns True unless there at least one element within a series or +along a Dataframe axis that is False or equivalent (e.g. zero or +empty).""" + +_all_examples = """\ +Examples +-------- +**Series** + +>>> pd.Series([True, True]).all() +True +>>> pd.Series([True, False]).all() +False +>>> pd.Series([], dtype="float64").all() +True +>>> pd.Series([np.nan]).all() +True +>>> pd.Series([np.nan]).all(skipna=False) +True + +**DataFrames** + +Create a dataframe from a dictionary. + +>>> df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]}) +>>> df + col1 col2 +0 True True +1 True False + +Default behaviour checks if column-wise values all return True. + +>>> df.all() +col1 True +col2 False +dtype: bool + +Specify ``axis='columns'`` to check if row-wise values all return True. + +>>> df.all(axis='columns') +0 True +1 False +dtype: bool + +Or ``axis=None`` for whether every value is True. + +>>> df.all(axis=None) +False +""" + +_all_see_also = """\ +See Also +-------- +Series.all : Return True if all elements are True. +DataFrame.any : Return True if one (or more) elements are True. +""" + +_cnum_doc = """ +Return cumulative {desc} over a DataFrame or Series axis. + +Returns a DataFrame or Series of the same size containing the cumulative +{desc}. + +Parameters +---------- +axis : {{0 or 'index', 1 or 'columns'}}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. +skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. +*args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + +Returns +------- +{name1} or {name2} + Return cumulative {desc} of {name1} or {name2}. + +See Also +-------- +core.window.Expanding.{accum_func_name} : Similar functionality + but ignores ``NaN`` values. +{name2}.{accum_func_name} : Return the {desc} over + {name2} axis. +{name2}.cummax : Return cumulative maximum over {name2} axis. +{name2}.cummin : Return cumulative minimum over {name2} axis. +{name2}.cumsum : Return cumulative sum over {name2} axis. +{name2}.cumprod : Return cumulative product over {name2} axis. + +{examples}""" + +_cummin_examples = """\ +Examples +-------- +**Series** + +>>> s = pd.Series([2, np.nan, 5, -1, 0]) +>>> s +0 2.0 +1 NaN +2 5.0 +3 -1.0 +4 0.0 +dtype: float64 + +By default, NA values are ignored. + +>>> s.cummin() +0 2.0 +1 NaN +2 2.0 +3 -1.0 +4 -1.0 +dtype: float64 + +To include NA values in the operation, use ``skipna=False`` + +>>> s.cummin(skipna=False) +0 2.0 +1 NaN +2 NaN +3 NaN +4 NaN +dtype: float64 + +**DataFrame** + +>>> df = pd.DataFrame([[2.0, 1.0], +... [3.0, np.nan], +... [1.0, 0.0]], +... columns=list('AB')) +>>> df + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 + +By default, iterates over rows and finds the minimum +in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + +>>> df.cummin() + A B +0 2.0 1.0 +1 2.0 NaN +2 1.0 0.0 + +To iterate over columns and find the minimum in each row, +use ``axis=1`` + +>>> df.cummin(axis=1) + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 +""" + +_cumsum_examples = """\ +Examples +-------- +**Series** + +>>> s = pd.Series([2, np.nan, 5, -1, 0]) +>>> s +0 2.0 +1 NaN +2 5.0 +3 -1.0 +4 0.0 +dtype: float64 + +By default, NA values are ignored. + +>>> s.cumsum() +0 2.0 +1 NaN +2 7.0 +3 6.0 +4 6.0 +dtype: float64 + +To include NA values in the operation, use ``skipna=False`` + +>>> s.cumsum(skipna=False) +0 2.0 +1 NaN +2 NaN +3 NaN +4 NaN +dtype: float64 + +**DataFrame** + +>>> df = pd.DataFrame([[2.0, 1.0], +... [3.0, np.nan], +... [1.0, 0.0]], +... columns=list('AB')) +>>> df + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 + +By default, iterates over rows and finds the sum +in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + +>>> df.cumsum() + A B +0 2.0 1.0 +1 5.0 NaN +2 6.0 1.0 + +To iterate over columns and find the sum in each row, +use ``axis=1`` + +>>> df.cumsum(axis=1) + A B +0 2.0 3.0 +1 3.0 NaN +2 1.0 1.0 +""" + +_cumprod_examples = """\ +Examples +-------- +**Series** + +>>> s = pd.Series([2, np.nan, 5, -1, 0]) +>>> s +0 2.0 +1 NaN +2 5.0 +3 -1.0 +4 0.0 +dtype: float64 + +By default, NA values are ignored. + +>>> s.cumprod() +0 2.0 +1 NaN +2 10.0 +3 -10.0 +4 -0.0 +dtype: float64 + +To include NA values in the operation, use ``skipna=False`` + +>>> s.cumprod(skipna=False) +0 2.0 +1 NaN +2 NaN +3 NaN +4 NaN +dtype: float64 + +**DataFrame** + +>>> df = pd.DataFrame([[2.0, 1.0], +... [3.0, np.nan], +... [1.0, 0.0]], +... columns=list('AB')) +>>> df + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 + +By default, iterates over rows and finds the product +in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + +>>> df.cumprod() + A B +0 2.0 1.0 +1 6.0 NaN +2 6.0 0.0 + +To iterate over columns and find the product in each row, +use ``axis=1`` + +>>> df.cumprod(axis=1) + A B +0 2.0 2.0 +1 3.0 NaN +2 1.0 0.0 +""" + +_cummax_examples = """\ +Examples +-------- +**Series** + +>>> s = pd.Series([2, np.nan, 5, -1, 0]) +>>> s +0 2.0 +1 NaN +2 5.0 +3 -1.0 +4 0.0 +dtype: float64 + +By default, NA values are ignored. + +>>> s.cummax() +0 2.0 +1 NaN +2 5.0 +3 5.0 +4 5.0 +dtype: float64 + +To include NA values in the operation, use ``skipna=False`` + +>>> s.cummax(skipna=False) +0 2.0 +1 NaN +2 NaN +3 NaN +4 NaN +dtype: float64 + +**DataFrame** + +>>> df = pd.DataFrame([[2.0, 1.0], +... [3.0, np.nan], +... [1.0, 0.0]], +... columns=list('AB')) +>>> df + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 + +By default, iterates over rows and finds the maximum +in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + +>>> df.cummax() + A B +0 2.0 1.0 +1 3.0 NaN +2 3.0 1.0 + +To iterate over columns and find the maximum in each row, +use ``axis=1`` + +>>> df.cummax(axis=1) + A B +0 2.0 2.0 +1 3.0 NaN +2 1.0 1.0 +""" + +_any_see_also = """\ +See Also +-------- +numpy.any : Numpy version of this method. +Series.any : Return whether any element is True. +Series.all : Return whether all elements are True. +DataFrame.any : Return whether any element is True over requested axis. +DataFrame.all : Return whether all elements are True over requested axis. +""" + +_any_desc = """\ +Return whether any element is True, potentially over an axis. + +Returns False unless there is at least one element within a series or +along a Dataframe axis that is True or equivalent (e.g. non-zero or +non-empty).""" + +_any_examples = """\ +Examples +-------- +**Series** + +For Series input, the output is a scalar indicating whether any element +is True. + +>>> pd.Series([False, False]).any() +False +>>> pd.Series([True, False]).any() +True +>>> pd.Series([], dtype="float64").any() +False +>>> pd.Series([np.nan]).any() +False +>>> pd.Series([np.nan]).any(skipna=False) +True + +**DataFrame** + +Whether each column contains at least one True element (the default). + +>>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]}) +>>> df + A B C +0 1 0 0 +1 2 2 0 + +>>> df.any() +A True +B True +C False +dtype: bool + +Aggregating over the columns. + +>>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]}) +>>> df + A B +0 True 1 +1 False 2 + +>>> df.any(axis='columns') +0 True +1 True +dtype: bool + +>>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]}) +>>> df + A B +0 True 1 +1 False 0 + +>>> df.any(axis='columns') +0 True +1 False +dtype: bool + +Aggregating over the entire DataFrame with ``axis=None``. + +>>> df.any(axis=None) +True + +`any` for an empty DataFrame is an empty Series. + +>>> pd.DataFrame([]).any() +Series([], dtype: bool) +""" + +_shared_docs[ + "stat_func_example" +] = """ + +Examples +-------- +>>> idx = pd.MultiIndex.from_arrays([ +... ['warm', 'warm', 'cold', 'cold'], +... ['dog', 'falcon', 'fish', 'spider']], +... names=['blooded', 'animal']) +>>> s = pd.Series([4, 2, 0, 8], name='legs', index=idx) +>>> s +blooded animal +warm dog 4 + falcon 2 +cold fish 0 + spider 8 +Name: legs, dtype: int64 + +>>> s.{stat_func}() +{default_output}""" + +_sum_examples = _shared_docs["stat_func_example"].format( + stat_func="sum", verb="Sum", default_output=14, level_output_0=6, level_output_1=8 +) + +_sum_examples += """ + +By default, the sum of an empty or all-NA Series is ``0``. + +>>> pd.Series([], dtype="float64").sum() # min_count=0 is the default +0.0 + +This can be controlled with the ``min_count`` parameter. For example, if +you'd like the sum of an empty series to be NaN, pass ``min_count=1``. + +>>> pd.Series([], dtype="float64").sum(min_count=1) +nan + +Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and +empty series identically. + +>>> pd.Series([np.nan]).sum() +0.0 + +>>> pd.Series([np.nan]).sum(min_count=1) +nan""" + +_max_examples = _shared_docs["stat_func_example"].format( + stat_func="max", verb="Max", default_output=8, level_output_0=4, level_output_1=8 +) + +_min_examples = _shared_docs["stat_func_example"].format( + stat_func="min", verb="Min", default_output=0, level_output_0=2, level_output_1=0 +) + +_stat_func_see_also = """ + +See Also +-------- +Series.sum : Return the sum. +Series.min : Return the minimum. +Series.max : Return the maximum. +Series.idxmin : Return the index of the minimum. +Series.idxmax : Return the index of the maximum. +DataFrame.sum : Return the sum over the requested axis. +DataFrame.min : Return the minimum over the requested axis. +DataFrame.max : Return the maximum over the requested axis. +DataFrame.idxmin : Return the index of the minimum over the requested axis. +DataFrame.idxmax : Return the index of the maximum over the requested axis.""" + +_prod_examples = """ + +Examples +-------- +By default, the product of an empty or all-NA Series is ``1`` + +>>> pd.Series([], dtype="float64").prod() +1.0 + +This can be controlled with the ``min_count`` parameter + +>>> pd.Series([], dtype="float64").prod(min_count=1) +nan + +Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and +empty series identically. + +>>> pd.Series([np.nan]).prod() +1.0 + +>>> pd.Series([np.nan]).prod(min_count=1) +nan""" + +_min_count_stub = """\ +min_count : int, default 0 + The required number of valid values to perform the operation. If fewer than + ``min_count`` non-NA values are present the result will be NA. +""" + + +def _align_as_utc( + left: NDFrameT, right: NDFrameT, join_index: Index | None +) -> tuple[NDFrameT, NDFrameT]: + """ + If we are aligning timezone-aware DatetimeIndexes and the timezones + do not match, convert both to UTC. + """ + if is_datetime64tz_dtype(left.index.dtype): + if left.index.tz != right.index.tz: + if join_index is not None: + # GH#33671 ensure we don't change the index on + # our original Series (NB: by default deep=False) + left = left.copy() + right = right.copy() + left.index = join_index + right.index = join_index + + return left, right diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/groupby/__init__.py new file mode 100644 index 0000000..8248f37 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/groupby/__init__.py @@ -0,0 +1,15 @@ +from pandas.core.groupby.generic import ( + DataFrameGroupBy, + NamedAgg, + SeriesGroupBy, +) +from pandas.core.groupby.groupby import GroupBy +from pandas.core.groupby.grouper import Grouper + +__all__ = [ + "DataFrameGroupBy", + "NamedAgg", + "SeriesGroupBy", + "GroupBy", + "Grouper", +] diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..1c3cbf6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/base.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/base.cpython-38.pyc new file mode 100644 index 0000000..9684a26 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/categorical.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/categorical.cpython-38.pyc new file mode 100644 index 0000000..5dd2257 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/categorical.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/generic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/generic.cpython-38.pyc new file mode 100644 index 0000000..ae5b721 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/generic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/groupby.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/groupby.cpython-38.pyc new file mode 100644 index 0000000..a4b8c76 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/groupby.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/grouper.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/grouper.cpython-38.pyc new file mode 100644 index 0000000..a0a77e5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/grouper.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/indexing.cpython-38.pyc new file mode 100644 index 0000000..7e44017 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/numba_.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/numba_.cpython-38.pyc new file mode 100644 index 0000000..0326bbb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/numba_.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/ops.cpython-38.pyc new file mode 100644 index 0000000..d80d5bd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/groupby/__pycache__/ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/base.py b/.local/lib/python3.8/site-packages/pandas/core/groupby/base.py new file mode 100644 index 0000000..0c8474c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/groupby/base.py @@ -0,0 +1,163 @@ +""" +Provide basic components for groupby. These definitions +hold the allowlist of methods that are exposed on the +SeriesGroupBy and the DataFrameGroupBy objects. +""" +from __future__ import annotations + +import dataclasses +from typing import Hashable + + +@dataclasses.dataclass(order=True, frozen=True) +class OutputKey: + label: Hashable + position: int + + +# special case to prevent duplicate plots when catching exceptions when +# forwarding methods from NDFrames +plotting_methods = frozenset(["plot", "hist"]) + +common_apply_allowlist = ( + frozenset( + [ + "quantile", + "fillna", + "mad", + "take", + "idxmax", + "idxmin", + "tshift", + "skew", + "corr", + "cov", + "diff", + ] + ) + | plotting_methods +) + +series_apply_allowlist: frozenset[str] = ( + common_apply_allowlist + | frozenset( + {"nlargest", "nsmallest", "is_monotonic_increasing", "is_monotonic_decreasing"} + ) +) | frozenset(["dtype", "unique"]) + +dataframe_apply_allowlist: frozenset[str] = common_apply_allowlist | frozenset( + ["dtypes", "corrwith"] +) + +# cythonized transformations or canned "agg+broadcast", which do not +# require postprocessing of the result by transform. +cythonized_kernels = frozenset(["cumprod", "cumsum", "shift", "cummin", "cummax"]) + +# List of aggregation/reduction functions. +# These map each group to a single numeric value +reduction_kernels = frozenset( + [ + "all", + "any", + "corrwith", + "count", + "first", + "idxmax", + "idxmin", + "last", + "mad", + "max", + "mean", + "median", + "min", + "ngroup", + "nth", + "nunique", + "prod", + # as long as `quantile`'s signature accepts only + # a single quantile value, it's a reduction. + # GH#27526 might change that. + "quantile", + "sem", + "size", + "skew", + "std", + "sum", + "var", + ] +) + +# List of transformation functions. +# a transformation is a function that, for each group, +# produces a result that has the same shape as the group. + + +# TODO(2.0) Remove after pad/backfill deprecation enforced +def maybe_normalize_deprecated_kernels(kernel): + if kernel == "backfill": + kernel = "bfill" + elif kernel == "pad": + kernel = "ffill" + return kernel + + +transformation_kernels = frozenset( + [ + "backfill", + "bfill", + "cumcount", + "cummax", + "cummin", + "cumprod", + "cumsum", + "diff", + "ffill", + "fillna", + "pad", + "pct_change", + "rank", + "shift", + "tshift", + ] +) + +# these are all the public methods on Grouper which don't belong +# in either of the above lists +groupby_other_methods = frozenset( + [ + "agg", + "aggregate", + "apply", + "boxplot", + # corr and cov return ngroups*ncolumns rows, so they + # are neither a transformation nor a reduction + "corr", + "cov", + "describe", + "dtypes", + "expanding", + "ewm", + "filter", + "get_group", + "groups", + "head", + "hist", + "indices", + "ndim", + "ngroups", + "ohlc", + "pipe", + "plot", + "resample", + "rolling", + "tail", + "take", + "transform", + "sample", + "value_counts", + ] +) +# Valid values of `name` for `groupby.transform(name)` +# NOTE: do NOT edit this directly. New additions should be inserted +# into the appropriate list above. +transform_kernel_allowlist = reduction_kernels | transformation_kernels diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/categorical.py b/.local/lib/python3.8/site-packages/pandas/core/groupby/categorical.py new file mode 100644 index 0000000..2a26713 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/groupby/categorical.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +import numpy as np + +from pandas.core.algorithms import unique1d +from pandas.core.arrays.categorical import ( + Categorical, + CategoricalDtype, + recode_for_categories, +) +from pandas.core.indexes.api import CategoricalIndex + + +def recode_for_groupby( + c: Categorical, sort: bool, observed: bool +) -> tuple[Categorical, Categorical | None]: + """ + Code the categories to ensure we can groupby for categoricals. + + If observed=True, we return a new Categorical with the observed + categories only. + + If sort=False, return a copy of self, coded with categories as + returned by .unique(), followed by any categories not appearing in + the data. If sort=True, return self. + + This method is needed solely to ensure the categorical index of the + GroupBy result has categories in the order of appearance in the data + (GH-8868). + + Parameters + ---------- + c : Categorical + sort : bool + The value of the sort parameter groupby was called with. + observed : bool + Account only for the observed values + + Returns + ------- + Categorical + If sort=False, the new categories are set to the order of + appearance in codes (unless ordered=True, in which case the + original order is preserved), followed by any unrepresented + categories in the original order. + Categorical or None + If we are observed, return the original categorical, otherwise None + """ + # we only care about observed values + if observed: + # In cases with c.ordered, this is equivalent to + # return c.remove_unused_categories(), c + + unique_codes = unique1d(c.codes) + + take_codes = unique_codes[unique_codes != -1] + if c.ordered: + take_codes = np.sort(take_codes) + + # we recode according to the uniques + categories = c.categories.take(take_codes) + codes = recode_for_categories(c.codes, c.categories, categories) + + # return a new categorical that maps our new codes + # and categories + dtype = CategoricalDtype(categories, ordered=c.ordered) + return Categorical(codes, dtype=dtype, fastpath=True), c + + # Already sorted according to c.categories; all is fine + if sort: + return c, None + + # sort=False should order groups in as-encountered order (GH-8868) + cat = c.unique() + + # See GH-38140 for block below + # exclude nan from indexer for categories + take_codes = cat.codes[cat.codes != -1] + if cat.ordered: + take_codes = np.sort(take_codes) + cat = cat.set_categories(cat.categories.take(take_codes)) + + # But for groupby to work, all categories should be present, + # including those missing from the data (GH-13179), which .unique() + # above dropped + cat = cat.add_categories(c.categories[~c.categories.isin(cat.categories)]) + + return c.reorder_categories(cat.categories), None + + +def recode_from_groupby( + c: Categorical, sort: bool, ci: CategoricalIndex +) -> CategoricalIndex: + """ + Reverse the codes_to_groupby to account for sort / observed. + + Parameters + ---------- + c : Categorical + sort : bool + The value of the sort parameter groupby was called with. + ci : CategoricalIndex + The codes / categories to recode + + Returns + ------- + CategoricalIndex + """ + # we re-order to the original category orderings + if sort: + # error: "CategoricalIndex" has no attribute "set_categories" + return ci.set_categories(c.categories) # type: ignore[attr-defined] + + # we are not sorting, so add unobserved to the end + new_cats = c.categories[~c.categories.isin(ci.categories)] + # error: "CategoricalIndex" has no attribute "add_categories" + return ci.add_categories(new_cats) # type: ignore[attr-defined] diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/generic.py b/.local/lib/python3.8/site-packages/pandas/core/groupby/generic.py new file mode 100644 index 0000000..81c5e74 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/groupby/generic.py @@ -0,0 +1,1785 @@ +""" +Define the SeriesGroupBy and DataFrameGroupBy +classes that hold the groupby interfaces (and some implementations). + +These are user facing as the result of the ``df.groupby(...)`` operations, +which here returns a DataFrameGroupBy object. +""" +from __future__ import annotations + +from collections import abc +from functools import partial +from textwrap import dedent +from typing import ( + Any, + Callable, + Hashable, + Iterable, + Mapping, + NamedTuple, + Sequence, + TypeVar, + Union, + cast, +) +import warnings + +import numpy as np + +from pandas._libs import reduction as libreduction +from pandas._typing import ( + ArrayLike, + Manager, + Manager2D, + SingleManager, +) +from pandas.util._decorators import ( + Appender, + Substitution, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + ensure_int64, + is_bool, + is_categorical_dtype, + is_dict_like, + is_integer_dtype, + is_interval_dtype, + is_scalar, +) +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +from pandas.core import ( + algorithms, + nanops, +) +from pandas.core.apply import ( + GroupByApply, + maybe_mangle_lambdas, + reconstruct_func, + validate_func_kwargs, +) +from pandas.core.base import SpecificationError +import pandas.core.common as com +from pandas.core.construction import create_series_with_explicit_dtype +from pandas.core.frame import DataFrame +from pandas.core.generic import NDFrame +from pandas.core.groupby import base +from pandas.core.groupby.groupby import ( + GroupBy, + _agg_template, + _apply_docs, + _transform_template, + warn_dropping_nuisance_columns_deprecated, +) +from pandas.core.groupby.grouper import get_grouper +from pandas.core.indexes.api import ( + Index, + MultiIndex, + all_indexes_same, +) +from pandas.core.series import Series +from pandas.core.util.numba_ import maybe_use_numba + +from pandas.plotting import boxplot_frame_groupby + +# TODO(typing) the return value on this callable should be any *scalar*. +AggScalar = Union[str, Callable[..., Any]] +# TODO: validate types on ScalarResult and move to _typing +# Blocked from using by https://github.com/python/mypy/issues/1484 +# See note at _mangle_lambda_list +ScalarResult = TypeVar("ScalarResult") + + +class NamedAgg(NamedTuple): + column: Hashable + aggfunc: AggScalar + + +def generate_property(name: str, klass: type[DataFrame | Series]): + """ + Create a property for a GroupBy subclass to dispatch to DataFrame/Series. + + Parameters + ---------- + name : str + klass : {DataFrame, Series} + + Returns + ------- + property + """ + + def prop(self): + return self._make_wrapper(name) + + parent_method = getattr(klass, name) + prop.__doc__ = parent_method.__doc__ or "" + prop.__name__ = name + return property(prop) + + +def pin_allowlisted_properties( + klass: type[DataFrame | Series], allowlist: frozenset[str] +): + """ + Create GroupBy member defs for DataFrame/Series names in a allowlist. + + Parameters + ---------- + klass : DataFrame or Series class + class where members are defined. + allowlist : frozenset[str] + Set of names of klass methods to be constructed + + Returns + ------- + class decorator + + Notes + ----- + Since we don't want to override methods explicitly defined in the + base class, any such name is skipped. + """ + + def pinner(cls): + for name in allowlist: + if hasattr(cls, name): + # don't override anything that was explicitly defined + # in the base class + continue + + prop = generate_property(name, klass) + setattr(cls, name, prop) + + return cls + + return pinner + + +@pin_allowlisted_properties(Series, base.series_apply_allowlist) +class SeriesGroupBy(GroupBy[Series]): + _apply_allowlist = base.series_apply_allowlist + + def _wrap_agged_manager(self, mgr: Manager) -> Series: + if mgr.ndim == 1: + mgr = cast(SingleManager, mgr) + single = mgr + else: + mgr = cast(Manager2D, mgr) + single = mgr.iget(0) + ser = self.obj._constructor(single, name=self.obj.name) + # NB: caller is responsible for setting ser.index + return ser + + def _get_data_to_aggregate(self) -> SingleManager: + ser = self._obj_with_exclusions + single = ser._mgr + return single + + def _iterate_slices(self) -> Iterable[Series]: + yield self._selected_obj + + _agg_examples_doc = dedent( + """ + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + >>> s.groupby([1, 1, 2, 2]).min() + 1 1 + 2 3 + dtype: int64 + + >>> s.groupby([1, 1, 2, 2]).agg('min') + 1 1 + 2 3 + dtype: int64 + + >>> s.groupby([1, 1, 2, 2]).agg(['min', 'max']) + min max + 1 1 2 + 2 3 4 + + The output column names can be controlled by passing + the desired column names and aggregations as keyword arguments. + + >>> s.groupby([1, 1, 2, 2]).agg( + ... minimum='min', + ... maximum='max', + ... ) + minimum maximum + 1 1 2 + 2 3 4 + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the aggregating function. + + >>> s.groupby([1, 1, 2, 2]).agg(lambda x: x.astype(float).min()) + 1 1.0 + 2 3.0 + dtype: float64 + """ + ) + + @Appender( + _apply_docs["template"].format( + input="series", examples=_apply_docs["series_examples"] + ) + ) + def apply(self, func, *args, **kwargs): + return super().apply(func, *args, **kwargs) + + @doc(_agg_template, examples=_agg_examples_doc, klass="Series") + def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs): + + if maybe_use_numba(engine): + with self._group_selection_context(): + data = self._selected_obj + result = self._aggregate_with_numba( + data.to_frame(), func, *args, engine_kwargs=engine_kwargs, **kwargs + ) + index = self.grouper.result_index + return self.obj._constructor(result.ravel(), index=index, name=data.name) + + relabeling = func is None + columns = None + if relabeling: + columns, func = validate_func_kwargs(kwargs) + kwargs = {} + + if isinstance(func, str): + return getattr(self, func)(*args, **kwargs) + + elif isinstance(func, abc.Iterable): + # Catch instances of lists / tuples + # but not the class list / tuple itself. + func = maybe_mangle_lambdas(func) + ret = self._aggregate_multiple_funcs(func) + if relabeling: + # error: Incompatible types in assignment (expression has type + # "Optional[List[str]]", variable has type "Index") + ret.columns = columns # type: ignore[assignment] + return ret + + else: + cyfunc = com.get_cython_func(func) + if cyfunc and not args and not kwargs: + return getattr(self, cyfunc)() + + if self.grouper.nkeys > 1: + return self._python_agg_general(func, *args, **kwargs) + + try: + return self._python_agg_general(func, *args, **kwargs) + except KeyError: + # TODO: KeyError is raised in _python_agg_general, + # see test_groupby.test_basic + result = self._aggregate_named(func, *args, **kwargs) + + # result is a dict whose keys are the elements of result_index + index = self.grouper.result_index + return create_series_with_explicit_dtype( + result, index=index, dtype_if_empty=object + ) + + agg = aggregate + + def _aggregate_multiple_funcs(self, arg) -> DataFrame: + if isinstance(arg, dict): + + # show the deprecation, but only if we + # have not shown a higher level one + # GH 15931 + raise SpecificationError("nested renamer is not supported") + + elif any(isinstance(x, (tuple, list)) for x in arg): + arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg] + + # indicated column order + columns = next(zip(*arg)) + else: + # list of functions / function names + columns = [] + for f in arg: + columns.append(com.get_callable_name(f) or f) + + arg = zip(columns, arg) + + results: dict[base.OutputKey, DataFrame | Series] = {} + for idx, (name, func) in enumerate(arg): + + key = base.OutputKey(label=name, position=idx) + results[key] = self.aggregate(func) + + if any(isinstance(x, DataFrame) for x in results.values()): + from pandas import concat + + res_df = concat( + results.values(), axis=1, keys=[key.label for key in results.keys()] + ) + return res_df + + indexed_output = {key.position: val for key, val in results.items()} + output = self.obj._constructor_expanddim(indexed_output, index=None) + output.columns = Index(key.label for key in results) + + output = self._reindex_output(output) + return output + + def _indexed_output_to_ndframe( + self, output: Mapping[base.OutputKey, ArrayLike] + ) -> Series: + """ + Wrap the dict result of a GroupBy aggregation into a Series. + """ + assert len(output) == 1 + values = next(iter(output.values())) + result = self.obj._constructor(values) + result.name = self.obj.name + return result + + def _wrap_applied_output( + self, + data: Series, + values: list[Any], + not_indexed_same: bool = False, + ) -> DataFrame | Series: + """ + Wrap the output of SeriesGroupBy.apply into the expected result. + + Parameters + ---------- + data : Series + Input data for groupby operation. + values : List[Any] + Applied output for each group. + not_indexed_same : bool, default False + Whether the applied outputs are not indexed the same as the group axes. + + Returns + ------- + DataFrame or Series + """ + if len(values) == 0: + # GH #6265 + return self.obj._constructor( + [], + name=self.obj.name, + index=self.grouper.result_index, + dtype=data.dtype, + ) + assert values is not None + + if isinstance(values[0], dict): + # GH #823 #24880 + index = self.grouper.result_index + res_df = self.obj._constructor_expanddim(values, index=index) + res_df = self._reindex_output(res_df) + # if self.observed is False, + # keep all-NaN rows created while re-indexing + res_ser = res_df.stack(dropna=self.observed) + res_ser.name = self.obj.name + return res_ser + elif isinstance(values[0], (Series, DataFrame)): + return self._concat_objects(values, not_indexed_same=not_indexed_same) + else: + # GH #6265 #24880 + result = self.obj._constructor( + data=values, index=self.grouper.result_index, name=self.obj.name + ) + return self._reindex_output(result) + + def _aggregate_named(self, func, *args, **kwargs): + # Note: this is very similar to _aggregate_series_pure_python, + # but that does not pin group.name + result = {} + initialized = False + + for name, group in self: + object.__setattr__(group, "name", name) + + output = func(group, *args, **kwargs) + output = libreduction.extract_result(output) + if not initialized: + # We only do this validation on the first iteration + libreduction.check_result_array(output, group.dtype) + initialized = True + result[name] = output + + return result + + @Substitution(klass="Series") + @Appender(_transform_template) + def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): + return self._transform( + func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs + ) + + def _cython_transform( + self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs + ): + assert axis == 0 # handled by caller + + obj = self._selected_obj + + try: + result = self.grouper._cython_operation( + "transform", obj._values, how, axis, **kwargs + ) + except NotImplementedError as err: + raise TypeError(f"{how} is not supported for {obj.dtype} dtype") from err + + return obj._constructor(result, index=self.obj.index, name=obj.name) + + def _transform_general(self, func: Callable, *args, **kwargs) -> Series: + """ + Transform with a callable func`. + """ + assert callable(func) + klass = type(self.obj) + + results = [] + for name, group in self: + # this setattr is needed for test_transform_lambda_with_datetimetz + object.__setattr__(group, "name", name) + res = func(group, *args, **kwargs) + + results.append(klass(res, index=group.index)) + + # check for empty "results" to avoid concat ValueError + if results: + from pandas.core.reshape.concat import concat + + concatenated = concat(results) + result = self._set_result_index_ordered(concatenated) + else: + result = self.obj._constructor(dtype=np.float64) + + result.name = self.obj.name + return result + + def _can_use_transform_fast(self, result) -> bool: + return True + + def filter(self, func, dropna: bool = True, *args, **kwargs): + """ + Return a copy of a Series excluding elements from groups that + do not satisfy the boolean criterion specified by func. + + Parameters + ---------- + func : function + To apply to each group. Should return True or False. + dropna : Drop groups that do not pass the filter. True by default; + if False, groups that evaluate False are filled with NaNs. + + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + ... 'foo', 'bar'], + ... 'B' : [1, 2, 3, 4, 5, 6], + ... 'C' : [2.0, 5., 8., 1., 2., 9.]}) + >>> grouped = df.groupby('A') + >>> df.groupby('A').B.filter(lambda x: x.mean() > 3.) + 1 2 + 3 4 + 5 6 + Name: B, dtype: int64 + + Returns + ------- + filtered : Series + """ + if isinstance(func, str): + wrapper = lambda x: getattr(x, func)(*args, **kwargs) + else: + wrapper = lambda x: func(x, *args, **kwargs) + + # Interpret np.nan as False. + def true_and_notna(x) -> bool: + b = wrapper(x) + return b and notna(b) + + try: + indices = [ + self._get_index(name) for name, group in self if true_and_notna(group) + ] + except (ValueError, TypeError) as err: + raise TypeError("the filter must return a boolean result") from err + + filtered = self._apply_filter(indices, dropna) + return filtered + + def nunique(self, dropna: bool = True) -> Series: + """ + Return number of unique elements in the group. + + Returns + ------- + Series + Number of unique values within each group. + """ + ids, _, _ = self.grouper.group_info + + val = self.obj._values + + codes, _ = algorithms.factorize(val, sort=False) + sorter = np.lexsort((codes, ids)) + codes = codes[sorter] + ids = ids[sorter] + + # group boundaries are where group ids change + # unique observations are where sorted values change + idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]] + inc = np.r_[1, codes[1:] != codes[:-1]] + + # 1st item of each group is a new unique observation + mask = codes == -1 + if dropna: + inc[idx] = 1 + inc[mask] = 0 + else: + inc[mask & np.r_[False, mask[:-1]]] = 0 + inc[idx] = 1 + + out = np.add.reduceat(inc, idx).astype("int64", copy=False) + if len(ids): + # NaN/NaT group exists if the head of ids is -1, + # so remove it from res and exclude its index from idx + if ids[0] == -1: + res = out[1:] + idx = idx[np.flatnonzero(idx)] + else: + res = out + else: + res = out[1:] + ri = self.grouper.result_index + + # we might have duplications among the bins + if len(res) != len(ri): + res, out = np.zeros(len(ri), dtype=out.dtype), res + res[ids[idx]] = out + + result = self.obj._constructor(res, index=ri, name=self.obj.name) + return self._reindex_output(result, fill_value=0) + + @doc(Series.describe) + def describe(self, **kwargs): + return super().describe(**kwargs) + + def value_counts( + self, + normalize: bool = False, + sort: bool = True, + ascending: bool = False, + bins=None, + dropna: bool = True, + ): + + from pandas.core.reshape.merge import get_join_indexers + from pandas.core.reshape.tile import cut + + ids, _, _ = self.grouper.group_info + val = self.obj._values + + def apply_series_value_counts(): + return self.apply( + Series.value_counts, + normalize=normalize, + sort=sort, + ascending=ascending, + bins=bins, + ) + + if bins is not None: + if not np.iterable(bins): + # scalar bins cannot be done at top level + # in a backward compatible way + return apply_series_value_counts() + elif is_categorical_dtype(val.dtype): + # GH38672 + return apply_series_value_counts() + + # groupby removes null keys from groupings + mask = ids != -1 + ids, val = ids[mask], val[mask] + + if bins is None: + lab, lev = algorithms.factorize(val, sort=True) + llab = lambda lab, inc: lab[inc] + else: + + # lab is a Categorical with categories an IntervalIndex + lab = cut(Series(val), bins, include_lowest=True) + # error: "ndarray" has no attribute "cat" + lev = lab.cat.categories # type: ignore[attr-defined] + # error: No overload variant of "take" of "_ArrayOrScalarCommon" matches + # argument types "Any", "bool", "Union[Any, float]" + lab = lev.take( # type: ignore[call-overload] + # error: "ndarray" has no attribute "cat" + lab.cat.codes, # type: ignore[attr-defined] + allow_fill=True, + # error: Item "ndarray" of "Union[ndarray, Index]" has no attribute + # "_na_value" + fill_value=lev._na_value, # type: ignore[union-attr] + ) + llab = lambda lab, inc: lab[inc]._multiindex.codes[-1] + + if is_interval_dtype(lab.dtype): + # TODO: should we do this inside II? + + # error: "ndarray" has no attribute "left" + # error: "ndarray" has no attribute "right" + sorter = np.lexsort( + (lab.left, lab.right, ids) # type: ignore[attr-defined] + ) + else: + sorter = np.lexsort((lab, ids)) + + ids, lab = ids[sorter], lab[sorter] + + # group boundaries are where group ids change + idchanges = 1 + np.nonzero(ids[1:] != ids[:-1])[0] + idx = np.r_[0, idchanges] + if not len(ids): + idx = idchanges + + # new values are where sorted labels change + lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1)) + inc = np.r_[True, lchanges] + if not len(val): + inc = lchanges + inc[idx] = True # group boundaries are also new values + out = np.diff(np.nonzero(np.r_[inc, True])[0]) # value counts + + # num. of times each group should be repeated + rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx)) + + # multi-index components + codes = self.grouper.reconstructed_codes + codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)] + # error: List item 0 has incompatible type "Union[ndarray[Any, Any], Index]"; + # expected "Index" + levels = [ping.group_index for ping in self.grouper.groupings] + [ + lev # type: ignore[list-item] + ] + names = self.grouper.names + [self.obj.name] + + if dropna: + mask = codes[-1] != -1 + if mask.all(): + dropna = False + else: + out, codes = out[mask], [level_codes[mask] for level_codes in codes] + + if normalize: + out = out.astype("float") + d = np.diff(np.r_[idx, len(ids)]) + if dropna: + m = ids[lab == -1] + np.add.at(d, m, -1) + acc = rep(d)[mask] + else: + acc = rep(d) + out /= acc + + if sort and bins is None: + cat = ids[inc][mask] if dropna else ids[inc] + sorter = np.lexsort((out if ascending else -out, cat)) + out, codes[-1] = out[sorter], codes[-1][sorter] + + if bins is not None: + # for compat. with libgroupby.value_counts need to ensure every + # bin is present at every index level, null filled with zeros + diff = np.zeros(len(out), dtype="bool") + for level_codes in codes[:-1]: + diff |= np.r_[True, level_codes[1:] != level_codes[:-1]] + + ncat, nbin = diff.sum(), len(levels[-1]) + + left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)] + + right = [diff.cumsum() - 1, codes[-1]] + + _, idx = get_join_indexers(left, right, sort=False, how="left") + out = np.where(idx != -1, out[idx], 0) + + if sort: + sorter = np.lexsort((out if ascending else -out, left[0])) + out, left[-1] = out[sorter], left[-1][sorter] + + # build the multi-index w/ full levels + def build_codes(lev_codes: np.ndarray) -> np.ndarray: + return np.repeat(lev_codes[diff], nbin) + + codes = [build_codes(lev_codes) for lev_codes in codes[:-1]] + codes.append(left[-1]) + + mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False) + + if is_integer_dtype(out.dtype): + out = ensure_int64(out) + return self.obj._constructor(out, index=mi, name=self.obj.name) + + @doc(Series.nlargest) + def nlargest(self, n: int = 5, keep: str = "first"): + f = partial(Series.nlargest, n=n, keep=keep) + data = self._obj_with_exclusions + # Don't change behavior if result index happens to be the same, i.e. + # already ordered and n >= all group sizes. + result = self._python_apply_general(f, data, not_indexed_same=True) + return result + + @doc(Series.nsmallest) + def nsmallest(self, n: int = 5, keep: str = "first"): + f = partial(Series.nsmallest, n=n, keep=keep) + data = self._obj_with_exclusions + # Don't change behavior if result index happens to be the same, i.e. + # already ordered and n >= all group sizes. + result = self._python_apply_general(f, data, not_indexed_same=True) + return result + + +@pin_allowlisted_properties(DataFrame, base.dataframe_apply_allowlist) +class DataFrameGroupBy(GroupBy[DataFrame]): + + _apply_allowlist = base.dataframe_apply_allowlist + + _agg_examples_doc = dedent( + """ + Examples + -------- + >>> df = pd.DataFrame( + ... { + ... "A": [1, 1, 2, 2], + ... "B": [1, 2, 3, 4], + ... "C": [0.362838, 0.227877, 1.267767, -0.562860], + ... } + ... ) + + >>> df + A B C + 0 1 1 0.362838 + 1 1 2 0.227877 + 2 2 3 1.267767 + 3 2 4 -0.562860 + + The aggregation is for each column. + + >>> df.groupby('A').agg('min') + B C + A + 1 1 0.227877 + 2 3 -0.562860 + + Multiple aggregations + + >>> df.groupby('A').agg(['min', 'max']) + B C + min max min max + A + 1 1 2 0.227877 0.362838 + 2 3 4 -0.562860 1.267767 + + Select a column for aggregation + + >>> df.groupby('A').B.agg(['min', 'max']) + min max + A + 1 1 2 + 2 3 4 + + Different aggregations per column + + >>> df.groupby('A').agg({'B': ['min', 'max'], 'C': 'sum'}) + B C + min max sum + A + 1 1 2 0.590715 + 2 3 4 0.704907 + + To control the output names with different aggregations per column, + pandas supports "named aggregation" + + >>> df.groupby("A").agg( + ... b_min=pd.NamedAgg(column="B", aggfunc="min"), + ... c_sum=pd.NamedAgg(column="C", aggfunc="sum")) + b_min c_sum + A + 1 1 0.590715 + 2 3 0.704907 + + - The keywords are the *output* column names + - The values are tuples whose first element is the column to select + and the second element is the aggregation to apply to that column. + Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields + ``['column', 'aggfunc']`` to make it clearer what the arguments are. + As usual, the aggregation can be a callable or a string alias. + + See :ref:`groupby.aggregate.named` for more. + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the aggregating function. + + >>> df.groupby("A")[["B"]].agg(lambda x: x.astype(float).min()) + B + A + 1 1.0 + 2 3.0 + """ + ) + + @doc(_agg_template, examples=_agg_examples_doc, klass="DataFrame") + def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs): + + if maybe_use_numba(engine): + with self._group_selection_context(): + data = self._selected_obj + result = self._aggregate_with_numba( + data, func, *args, engine_kwargs=engine_kwargs, **kwargs + ) + index = self.grouper.result_index + return self.obj._constructor(result, index=index, columns=data.columns) + + relabeling, func, columns, order = reconstruct_func(func, **kwargs) + func = maybe_mangle_lambdas(func) + + op = GroupByApply(self, func, args, kwargs) + result = op.agg() + if not is_dict_like(func) and result is not None: + return result + elif relabeling and result is not None: + # this should be the only (non-raising) case with relabeling + # used reordered index of columns + result = result.iloc[:, order] + result.columns = columns + + if result is None: + + # grouper specific aggregations + if self.grouper.nkeys > 1: + # test_groupby_as_index_series_scalar gets here with 'not self.as_index' + return self._python_agg_general(func, *args, **kwargs) + elif args or kwargs: + # test_pass_args_kwargs gets here (with and without as_index) + # can't return early + result = self._aggregate_frame(func, *args, **kwargs) + + elif self.axis == 1: + # _aggregate_multiple_funcs does not allow self.axis == 1 + # Note: axis == 1 precludes 'not self.as_index', see __init__ + result = self._aggregate_frame(func) + return result + + else: + + # try to treat as if we are passing a list + gba = GroupByApply(self, [func], args=(), kwargs={}) + try: + result = gba.agg() + + except ValueError as err: + if "no results" not in str(err): + # raised directly by _aggregate_multiple_funcs + raise + result = self._aggregate_frame(func) + + else: + sobj = self._selected_obj + + if isinstance(sobj, Series): + # GH#35246 test_groupby_as_index_select_column_sum_empty_df + result.columns = self._obj_with_exclusions.columns.copy() + else: + # Retain our column names + result.columns._set_names( + sobj.columns.names, level=list(range(sobj.columns.nlevels)) + ) + # select everything except for the last level, which is the one + # containing the name of the function(s), see GH#32040 + result.columns = result.columns.droplevel(-1) + + if not self.as_index: + self._insert_inaxis_grouper_inplace(result) + result.index = Index(range(len(result))) + + return result + + agg = aggregate + + def _iterate_slices(self) -> Iterable[Series]: + obj = self._selected_obj + if self.axis == 1: + obj = obj.T + + if isinstance(obj, Series) and obj.name not in self.exclusions: + # Occurs when doing DataFrameGroupBy(...)["X"] + yield obj + else: + for label, values in obj.items(): + if label in self.exclusions: + continue + + yield values + + def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: + if self.grouper.nkeys != 1: + raise AssertionError("Number of keys must be 1") + + obj = self._obj_with_exclusions + + result: dict[Hashable, NDFrame | np.ndarray] = {} + if self.axis == 0: + # test_pass_args_kwargs_duplicate_columns gets here with non-unique columns + for name, data in self: + fres = func(data, *args, **kwargs) + result[name] = fres + else: + # we get here in a number of test_multilevel tests + for name in self.indices: + grp_df = self.get_group(name, obj=obj) + fres = func(grp_df, *args, **kwargs) + result[name] = fres + + result_index = self.grouper.result_index + other_ax = obj.axes[1 - self.axis] + out = self.obj._constructor(result, index=other_ax, columns=result_index) + if self.axis == 0: + out = out.T + + return out + + def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame: + # only for axis==0 + # tests that get here with non-unique cols: + # test_resample_with_timedelta_yields_no_empty_groups, + # test_resample_apply_product + + obj = self._obj_with_exclusions + result: dict[int, NDFrame] = {} + + for i, (item, sgb) in enumerate(self._iterate_column_groupbys(obj)): + result[i] = sgb.aggregate(func, *args, **kwargs) + + res_df = self.obj._constructor(result) + res_df.columns = obj.columns + return res_df + + def _wrap_applied_output( + self, data: DataFrame, values: list, not_indexed_same: bool = False + ): + + if len(values) == 0: + result = self.obj._constructor( + index=self.grouper.result_index, columns=data.columns + ) + result = result.astype(data.dtypes, copy=False) + return result + + # GH12824 + first_not_none = next(com.not_none(*values), None) + + if first_not_none is None: + # GH9684 - All values are None, return an empty frame. + return self.obj._constructor() + elif isinstance(first_not_none, DataFrame): + return self._concat_objects(values, not_indexed_same=not_indexed_same) + + key_index = self.grouper.result_index if self.as_index else None + + if isinstance(first_not_none, (np.ndarray, Index)): + # GH#1738: values is list of arrays of unequal lengths + # fall through to the outer else clause + # TODO: sure this is right? we used to do this + # after raising AttributeError above + return self.obj._constructor_sliced( + values, index=key_index, name=self._selection + ) + elif not isinstance(first_not_none, Series): + # values are not series or array-like but scalars + # self._selection not passed through to Series as the + # result should not take the name of original selection + # of columns + if self.as_index: + return self.obj._constructor_sliced(values, index=key_index) + else: + result = self.obj._constructor(values, columns=[self._selection]) + self._insert_inaxis_grouper_inplace(result) + return result + else: + # values are Series + return self._wrap_applied_output_series( + values, not_indexed_same, first_not_none, key_index + ) + + def _wrap_applied_output_series( + self, + values: list[Series], + not_indexed_same: bool, + first_not_none, + key_index, + ) -> DataFrame | Series: + # this is to silence a DeprecationWarning + # TODO(2.0): Remove when default dtype of empty Series is object + kwargs = first_not_none._construct_axes_dict() + backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs) + values = [x if (x is not None) else backup for x in values] + + all_indexed_same = all_indexes_same(x.index for x in values) + + # GH3596 + # provide a reduction (Frame -> Series) if groups are + # unique + if self.squeeze: + applied_index = self._selected_obj._get_axis(self.axis) + singular_series = len(values) == 1 and applied_index.nlevels == 1 + + if singular_series: + # GH2893 + # we have series in the values array, we want to + # produce a series: + # if any of the sub-series are not indexed the same + # OR we don't have a multi-index and we have only a + # single values + return self._concat_objects(values, not_indexed_same=not_indexed_same) + + # still a series + # path added as of GH 5545 + elif all_indexed_same: + from pandas.core.reshape.concat import concat + + return concat(values) + + if not all_indexed_same: + # GH 8467 + return self._concat_objects(values, not_indexed_same=True) + + # Combine values + # vstack+constructor is faster than concat and handles MI-columns + stacked_values = np.vstack([np.asarray(v) for v in values]) + + if self.axis == 0: + index = key_index + columns = first_not_none.index.copy() + if columns.name is None: + # GH6124 - propagate name of Series when it's consistent + names = {v.name for v in values} + if len(names) == 1: + columns.name = list(names)[0] + else: + index = first_not_none.index + columns = key_index + stacked_values = stacked_values.T + + if stacked_values.dtype == object: + # We'll have the DataFrame constructor do inference + stacked_values = stacked_values.tolist() + result = self.obj._constructor(stacked_values, index=index, columns=columns) + + if not self.as_index: + self._insert_inaxis_grouper_inplace(result) + + return self._reindex_output(result) + + def _cython_transform( + self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs + ) -> DataFrame: + assert axis == 0 # handled by caller + # TODO: no tests with self.ndim == 1 for DataFrameGroupBy + + # With self.axis == 0, we have multi-block tests + # e.g. test_rank_min_int, test_cython_transform_frame + # test_transform_numeric_ret + # With self.axis == 1, _get_data_to_aggregate does a transpose + # so we always have a single block. + mgr: Manager2D = self._get_data_to_aggregate() + if numeric_only: + mgr = mgr.get_numeric_data(copy=False) + + def arr_func(bvalues: ArrayLike) -> ArrayLike: + return self.grouper._cython_operation( + "transform", bvalues, how, 1, **kwargs + ) + + # We could use `mgr.apply` here and not have to set_axis, but + # we would have to do shape gymnastics for ArrayManager compat + res_mgr = mgr.grouped_reduce(arr_func, ignore_failures=True) + res_mgr.set_axis(1, mgr.axes[1]) + + if len(res_mgr) < len(mgr): + warn_dropping_nuisance_columns_deprecated(type(self), how) + + res_df = self.obj._constructor(res_mgr) + if self.axis == 1: + res_df = res_df.T + return res_df + + def _transform_general(self, func, *args, **kwargs): + from pandas.core.reshape.concat import concat + + applied = [] + obj = self._obj_with_exclusions + gen = self.grouper.get_iterator(obj, axis=self.axis) + fast_path, slow_path = self._define_paths(func, *args, **kwargs) + + # Determine whether to use slow or fast path by evaluating on the first group. + # Need to handle the case of an empty generator and process the result so that + # it does not need to be computed again. + try: + name, group = next(gen) + except StopIteration: + pass + else: + object.__setattr__(group, "name", name) + try: + path, res = self._choose_path(fast_path, slow_path, group) + except TypeError: + return self._transform_item_by_item(obj, fast_path) + except ValueError as err: + msg = "transform must return a scalar value for each group" + raise ValueError(msg) from err + if group.size > 0: + res = _wrap_transform_general_frame(self.obj, group, res) + applied.append(res) + + # Compute and process with the remaining groups + for name, group in gen: + if group.size == 0: + continue + object.__setattr__(group, "name", name) + res = path(group) + res = _wrap_transform_general_frame(self.obj, group, res) + applied.append(res) + + concat_index = obj.columns if self.axis == 0 else obj.index + other_axis = 1 if self.axis == 0 else 0 # switches between 0 & 1 + concatenated = concat(applied, axis=self.axis, verify_integrity=False) + concatenated = concatenated.reindex(concat_index, axis=other_axis, copy=False) + return self._set_result_index_ordered(concatenated) + + @Substitution(klass="DataFrame") + @Appender(_transform_template) + def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): + return self._transform( + func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs + ) + + def _can_use_transform_fast(self, result) -> bool: + return isinstance(result, DataFrame) and result.columns.equals( + self._obj_with_exclusions.columns + ) + + def _define_paths(self, func, *args, **kwargs): + if isinstance(func, str): + fast_path = lambda group: getattr(group, func)(*args, **kwargs) + slow_path = lambda group: group.apply( + lambda x: getattr(x, func)(*args, **kwargs), axis=self.axis + ) + else: + fast_path = lambda group: func(group, *args, **kwargs) + slow_path = lambda group: group.apply( + lambda x: func(x, *args, **kwargs), axis=self.axis + ) + return fast_path, slow_path + + def _choose_path(self, fast_path: Callable, slow_path: Callable, group: DataFrame): + path = slow_path + res = slow_path(group) + + # if we make it here, test if we can use the fast path + try: + res_fast = fast_path(group) + except AssertionError: + raise # pragma: no cover + except Exception: + # GH#29631 For user-defined function, we can't predict what may be + # raised; see test_transform.test_transform_fastpath_raises + return path, res + + # verify fast path does not change columns (and names), otherwise + # its results cannot be joined with those of the slow path + if not isinstance(res_fast, DataFrame): + return path, res + + if not res_fast.columns.equals(group.columns): + return path, res + + if res_fast.equals(res): + path = fast_path + + return path, res + + def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame: + # iterate through columns, see test_transform_exclude_nuisance + # gets here with non-unique columns + output = {} + inds = [] + for i, (colname, sgb) in enumerate(self._iterate_column_groupbys(obj)): + try: + output[i] = sgb.transform(wrapper) + except TypeError: + # e.g. trying to call nanmean with string values + warn_dropping_nuisance_columns_deprecated(type(self), "transform") + else: + inds.append(i) + + if not output: + raise TypeError("Transform function invalid for data types") + + columns = obj.columns.take(inds) + + result = self.obj._constructor(output, index=obj.index) + result.columns = columns + return result + + def filter(self, func, dropna=True, *args, **kwargs): + """ + Return a copy of a DataFrame excluding filtered elements. + + Elements from groups are filtered if they do not satisfy the + boolean criterion specified by func. + + Parameters + ---------- + func : function + Function to apply to each subframe. Should return True or False. + dropna : Drop groups that do not pass the filter. True by default; + If False, groups that evaluate False are filled with NaNs. + + Returns + ------- + filtered : DataFrame + + Notes + ----- + Each subframe is endowed the attribute 'name' in case you need to know + which group you are working on. + + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + ... 'foo', 'bar'], + ... 'B' : [1, 2, 3, 4, 5, 6], + ... 'C' : [2.0, 5., 8., 1., 2., 9.]}) + >>> grouped = df.groupby('A') + >>> grouped.filter(lambda x: x['B'].mean() > 3.) + A B C + 1 bar 2 5.0 + 3 bar 4 1.0 + 5 bar 6 9.0 + """ + indices = [] + + obj = self._selected_obj + gen = self.grouper.get_iterator(obj, axis=self.axis) + + for name, group in gen: + object.__setattr__(group, "name", name) + + res = func(group, *args, **kwargs) + + try: + res = res.squeeze() + except AttributeError: # allow e.g., scalars and frames to pass + pass + + # interpret the result of the filter + if is_bool(res) or (is_scalar(res) and isna(res)): + if res and notna(res): + indices.append(self._get_index(name)) + else: + # non scalars aren't allowed + raise TypeError( + f"filter function returned a {type(res).__name__}, " + "but expected a scalar bool" + ) + + return self._apply_filter(indices, dropna) + + def __getitem__(self, key) -> DataFrameGroupBy | SeriesGroupBy: + if self.axis == 1: + # GH 37725 + raise ValueError("Cannot subset columns when using axis=1") + # per GH 23566 + if isinstance(key, tuple) and len(key) > 1: + # if len == 1, then it becomes a SeriesGroupBy and this is actually + # valid syntax, so don't raise warning + warnings.warn( + "Indexing with multiple keys (implicitly converted to a tuple " + "of keys) will be deprecated, use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return super().__getitem__(key) + + def _gotitem(self, key, ndim: int, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : string / list of selections + ndim : {1, 2} + requested ndim of result + subset : object, default None + subset to act on + """ + if ndim == 2: + if subset is None: + subset = self.obj + return DataFrameGroupBy( + subset, + self.grouper, + axis=self.axis, + level=self.level, + grouper=self.grouper, + exclusions=self.exclusions, + selection=key, + as_index=self.as_index, + sort=self.sort, + group_keys=self.group_keys, + squeeze=self.squeeze, + observed=self.observed, + mutated=self.mutated, + dropna=self.dropna, + ) + elif ndim == 1: + if subset is None: + subset = self.obj[key] + return SeriesGroupBy( + subset, + level=self.level, + grouper=self.grouper, + selection=key, + sort=self.sort, + group_keys=self.group_keys, + squeeze=self.squeeze, + observed=self.observed, + dropna=self.dropna, + ) + + raise AssertionError("invalid ndim for _gotitem") + + def _get_data_to_aggregate(self) -> Manager2D: + obj = self._obj_with_exclusions + if self.axis == 1: + return obj.T._mgr + else: + return obj._mgr + + def _insert_inaxis_grouper_inplace(self, result: DataFrame) -> None: + # zip in reverse so we can always insert at loc 0 + columns = result.columns + for name, lev, in_axis in zip( + reversed(self.grouper.names), + reversed(self.grouper.get_group_levels()), + reversed([grp.in_axis for grp in self.grouper.groupings]), + ): + # GH #28549 + # When using .apply(-), name will be in columns already + if in_axis and name not in columns: + result.insert(0, name, lev) + + def _indexed_output_to_ndframe( + self, output: Mapping[base.OutputKey, ArrayLike] + ) -> DataFrame: + """ + Wrap the dict result of a GroupBy aggregation into a DataFrame. + """ + indexed_output = {key.position: val for key, val in output.items()} + columns = Index([key.label for key in output]) + columns._set_names(self._obj_with_exclusions._get_axis(1 - self.axis).names) + + result = self.obj._constructor(indexed_output) + result.columns = columns + return result + + def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: + if not self.as_index: + # GH 41998 - empty mgr always gets index of length 0 + rows = mgr.shape[1] if mgr.shape[0] > 0 else 0 + index = Index(range(rows)) + mgr.set_axis(1, index) + result = self.obj._constructor(mgr) + + self._insert_inaxis_grouper_inplace(result) + result = result._consolidate() + else: + index = self.grouper.result_index + mgr.set_axis(1, index) + result = self.obj._constructor(mgr) + + if self.axis == 1: + result = result.T + + # Note: we only need to pass datetime=True in order to get numeric + # values converted + return self._reindex_output(result)._convert(datetime=True) + + def _iterate_column_groupbys(self, obj: DataFrame | Series): + for i, colname in enumerate(obj.columns): + yield colname, SeriesGroupBy( + obj.iloc[:, i], + selection=colname, + grouper=self.grouper, + exclusions=self.exclusions, + observed=self.observed, + ) + + def _apply_to_column_groupbys(self, func, obj: DataFrame | Series) -> DataFrame: + from pandas.core.reshape.concat import concat + + columns = obj.columns + results = [ + func(col_groupby) for _, col_groupby in self._iterate_column_groupbys(obj) + ] + + if not len(results): + # concat would raise + return DataFrame([], columns=columns, index=self.grouper.result_index) + else: + return concat(results, keys=columns, axis=1) + + def nunique(self, dropna: bool = True) -> DataFrame: + """ + Return DataFrame with counts of unique elements in each position. + + Parameters + ---------- + dropna : bool, default True + Don't include NaN in the counts. + + Returns + ------- + nunique: DataFrame + + Examples + -------- + >>> df = pd.DataFrame({'id': ['spam', 'egg', 'egg', 'spam', + ... 'ham', 'ham'], + ... 'value1': [1, 5, 5, 2, 5, 5], + ... 'value2': list('abbaxy')}) + >>> df + id value1 value2 + 0 spam 1 a + 1 egg 5 b + 2 egg 5 b + 3 spam 2 a + 4 ham 5 x + 5 ham 5 y + + >>> df.groupby('id').nunique() + value1 value2 + id + egg 1 1 + ham 1 2 + spam 2 1 + + Check for rows with the same id but conflicting values: + + >>> df.groupby('id').filter(lambda g: (g.nunique() > 1).any()) + id value1 value2 + 0 spam 1 a + 3 spam 2 a + 4 ham 5 x + 5 ham 5 y + """ + + if self.axis != 0: + # see test_groupby_crash_on_nunique + return self._python_agg_general(lambda sgb: sgb.nunique(dropna)) + + obj = self._obj_with_exclusions + results = self._apply_to_column_groupbys( + lambda sgb: sgb.nunique(dropna), obj=obj + ) + + if not self.as_index: + results.index = Index(range(len(results))) + self._insert_inaxis_grouper_inplace(results) + + return results + + @Appender(DataFrame.idxmax.__doc__) + def idxmax(self, axis=0, skipna: bool = True): + axis = DataFrame._get_axis_number(axis) + numeric_only = None if axis == 0 else False + + def func(df): + # NB: here we use numeric_only=None, in DataFrame it is False GH#38217 + res = df._reduce( + nanops.nanargmax, + "argmax", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + ) + indices = res._values + index = df._get_axis(axis) + result = [index[i] if i >= 0 else np.nan for i in indices] + return df._constructor_sliced(result, index=res.index) + + func.__name__ = "idxmax" + return self._python_apply_general(func, self._obj_with_exclusions) + + @Appender(DataFrame.idxmin.__doc__) + def idxmin(self, axis=0, skipna: bool = True): + axis = DataFrame._get_axis_number(axis) + numeric_only = None if axis == 0 else False + + def func(df): + # NB: here we use numeric_only=None, in DataFrame it is False GH#38217 + res = df._reduce( + nanops.nanargmin, + "argmin", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + ) + indices = res._values + index = df._get_axis(axis) + result = [index[i] if i >= 0 else np.nan for i in indices] + return df._constructor_sliced(result, index=res.index) + + func.__name__ = "idxmin" + return self._python_apply_general(func, self._obj_with_exclusions) + + boxplot = boxplot_frame_groupby + + def value_counts( + self, + subset: Sequence[Hashable] | None = None, + normalize: bool = False, + sort: bool = True, + ascending: bool = False, + dropna: bool = True, + ) -> DataFrame | Series: + """ + Return a Series or DataFrame containing counts of unique rows. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + subset : list-like, optional + Columns to use when counting unique combinations. + normalize : bool, default False + Return proportions rather than frequencies. + sort : bool, default True + Sort by frequencies. + ascending : bool, default False + Sort in ascending order. + dropna : bool, default True + Don’t include counts of rows that contain NA values. + + Returns + ------- + Series or DataFrame + Series if the groupby as_index is True, otherwise DataFrame. + + See Also + -------- + Series.value_counts: Equivalent method on Series. + DataFrame.value_counts: Equivalent method on DataFrame. + SeriesGroupBy.value_counts: Equivalent method on SeriesGroupBy. + + Notes + ----- + - If the groupby as_index is True then the returned Series will have a + MultiIndex with one level per input column. + - If the groupby as_index is False then the returned DataFrame will have an + additional column with the value_counts. The column is labelled 'count' or + 'proportion', depending on the ``normalize`` parameter. + + By default, rows that contain any NA values are omitted from + the result. + + By default, the result will be in descending order so that the + first element of each group is the most frequently-occurring row. + + Examples + -------- + >>> df = pd.DataFrame({ + ... 'gender': ['male', 'male', 'female', 'male', 'female', 'male'], + ... 'education': ['low', 'medium', 'high', 'low', 'high', 'low'], + ... 'country': ['US', 'FR', 'US', 'FR', 'FR', 'FR'] + ... }) + + >>> df + gender education country + 0 male low US + 1 male medium FR + 2 female high US + 3 male low FR + 4 female high FR + 5 male low FR + + >>> df.groupby('gender').value_counts() + gender education country + female high FR 1 + US 1 + male low FR 2 + US 1 + medium FR 1 + dtype: int64 + + >>> df.groupby('gender').value_counts(ascending=True) + gender education country + female high FR 1 + US 1 + male low US 1 + medium FR 1 + low FR 2 + dtype: int64 + + >>> df.groupby('gender').value_counts(normalize=True) + gender education country + female high FR 0.50 + US 0.50 + male low FR 0.50 + US 0.25 + medium FR 0.25 + dtype: float64 + + >>> df.groupby('gender', as_index=False).value_counts() + gender education country count + 0 female high FR 1 + 1 female high US 1 + 2 male low FR 2 + 3 male low US 1 + 4 male medium FR 1 + + >>> df.groupby('gender', as_index=False).value_counts(normalize=True) + gender education country proportion + 0 female high FR 0.50 + 1 female high US 0.50 + 2 male low FR 0.50 + 3 male low US 0.25 + 4 male medium FR 0.25 + """ + if self.axis == 1: + raise NotImplementedError( + "DataFrameGroupBy.value_counts only handles axis=0" + ) + + with self._group_selection_context(): + df = self.obj + + in_axis_names = { + grouping.name for grouping in self.grouper.groupings if grouping.in_axis + } + if isinstance(self._selected_obj, Series): + name = self._selected_obj.name + keys = [] if name in in_axis_names else [self._selected_obj] + else: + keys = [ + # Can't use .values because the column label needs to be preserved + self._selected_obj.iloc[:, idx] + for idx, name in enumerate(self._selected_obj.columns) + if name not in in_axis_names + ] + + if subset is not None: + clashing = set(subset) & set(in_axis_names) + if clashing: + raise ValueError( + f"Keys {clashing} in subset cannot be in " + "the groupby column keys" + ) + + groupings = list(self.grouper.groupings) + for key in keys: + grouper, _, _ = get_grouper( + df, + key=key, + axis=self.axis, + sort=self.sort, + dropna=dropna, + ) + groupings += list(grouper.groupings) + + # Take the size of the overall columns + gb = df.groupby( + groupings, + sort=self.sort, + observed=self.observed, + dropna=self.dropna, + ) + result = cast(Series, gb.size()) + + if normalize: + # Normalize the results by dividing by the original group sizes. + # We are guaranteed to have the first N levels be the + # user-requested grouping. + levels = list(range(len(self.grouper.groupings), result.index.nlevels)) + indexed_group_size = result.groupby( + result.index.droplevel(levels), + sort=self.sort, + observed=self.observed, + dropna=self.dropna, + ).transform("sum") + + result /= indexed_group_size + + if sort: + # Sort the values and then resort by the main grouping + index_level = range(len(self.grouper.groupings)) + result = result.sort_values(ascending=ascending).sort_index( + level=index_level, sort_remaining=False + ) + + if not self.as_index: + # Convert to frame + result = result.reset_index(name="proportion" if normalize else "count") + return result.__finalize__(self.obj, method="value_counts") + + +def _wrap_transform_general_frame( + obj: DataFrame, group: DataFrame, res: DataFrame | Series +) -> DataFrame: + from pandas import concat + + if isinstance(res, Series): + # we need to broadcast across the + # other dimension; this will preserve dtypes + # GH14457 + if res.index.is_(obj.index): + res_frame = concat([res] * len(group.columns), axis=1) + res_frame.columns = group.columns + res_frame.index = group.index + else: + res_frame = obj._constructor( + np.concatenate([res.values] * len(group.index)).reshape(group.shape), + columns=group.columns, + index=group.index, + ) + assert isinstance(res_frame, DataFrame) + return res_frame + else: + return res diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/groupby.py b/.local/lib/python3.8/site-packages/pandas/core/groupby/groupby.py new file mode 100644 index 0000000..e55149c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/groupby/groupby.py @@ -0,0 +1,3901 @@ +""" +Provide the groupby split-apply-combine paradigm. Define the GroupBy +class providing the base-class of operations. + +The SeriesGroupBy and DataFrameGroupBy sub-class +(defined in pandas.core.groupby.generic) +expose these user-facing objects to provide specific functionality. +""" +from __future__ import annotations + +from contextlib import contextmanager +import datetime +from functools import ( + partial, + wraps, +) +import inspect +from textwrap import dedent +import types +from typing import ( + Callable, + Hashable, + Iterable, + Iterator, + List, + Literal, + Mapping, + Sequence, + TypeVar, + Union, + cast, + final, +) +import warnings + +import numpy as np + +from pandas._config.config import option_context + +from pandas._libs import ( + Timestamp, + lib, +) +import pandas._libs.groupby as libgroupby +from pandas._typing import ( + ArrayLike, + IndexLabel, + NDFrameT, + PositionalIndexer, + RandomState, + Scalar, + T, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError +from pandas.util._decorators import ( + Appender, + Substitution, + cache_readonly, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_datetime64_dtype, + is_float_dtype, + is_integer, + is_integer_dtype, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_timedelta64_dtype, +) +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +from pandas.core import nanops +from pandas.core._numba import executor +import pandas.core.algorithms as algorithms +from pandas.core.arrays import ( + BaseMaskedArray, + BooleanArray, + Categorical, + ExtensionArray, +) +from pandas.core.base import ( + DataError, + PandasObject, + SelectionMixin, +) +import pandas.core.common as com +from pandas.core.frame import DataFrame +from pandas.core.generic import NDFrame +from pandas.core.groupby import ( + base, + numba_, + ops, +) +from pandas.core.groupby.indexing import ( + GroupByIndexingMixin, + GroupByNthSelector, +) +from pandas.core.indexes.api import ( + CategoricalIndex, + Index, + MultiIndex, +) +from pandas.core.internals.blocks import ensure_block_shape +import pandas.core.sample as sample +from pandas.core.series import Series +from pandas.core.sorting import get_group_index_sorter +from pandas.core.util.numba_ import ( + NUMBA_FUNC_CACHE, + maybe_use_numba, +) + +_common_see_also = """ + See Also + -------- + Series.%(name)s : Apply a function %(name)s to a Series. + DataFrame.%(name)s : Apply a function %(name)s + to each row or column of a DataFrame. +""" + +_apply_docs = { + "template": """ + Apply function ``func`` group-wise and combine the results together. + + The function passed to ``apply`` must take a {input} as its first + argument and return a DataFrame, Series or scalar. ``apply`` will + then take care of combining the results back together into a single + dataframe or series. ``apply`` is therefore a highly flexible + grouping method. + + While ``apply`` is a very flexible method, its downside is that + using it can be quite a bit slower than using more specific methods + like ``agg`` or ``transform``. Pandas offers a wide range of method that will + be much faster than using ``apply`` for their specific purposes, so try to + use them before reaching for ``apply``. + + Parameters + ---------- + func : callable + A callable that takes a {input} as its first argument, and + returns a dataframe, a series or a scalar. In addition the + callable may take positional and keyword arguments. + args, kwargs : tuple and dict + Optional positional and keyword arguments to pass to ``func``. + + Returns + ------- + applied : Series or DataFrame + + See Also + -------- + pipe : Apply function to the full GroupBy object instead of to each + group. + aggregate : Apply aggregate function to the GroupBy object. + transform : Apply function column-by-column to the GroupBy object. + Series.apply : Apply a function to a Series. + DataFrame.apply : Apply a function to each row or column of a DataFrame. + + Notes + ----- + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + see the examples below. + + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + {examples} + """, + "dataframe_examples": """ + >>> df = pd.DataFrame({'A': 'a a b'.split(), + ... 'B': [1,2,3], + ... 'C': [4,6,5]}) + >>> g = df.groupby('A') + + Notice that ``g`` has two groups, ``a`` and ``b``. + Calling `apply` in various ways, we can get different grouping results: + + Example 1: below the function passed to `apply` takes a DataFrame as + its argument and returns a DataFrame. `apply` combines the result for + each group together into a new DataFrame: + + >>> g[['B', 'C']].apply(lambda x: x / x.sum()) + B C + 0 0.333333 0.4 + 1 0.666667 0.6 + 2 1.000000 1.0 + + Example 2: The function passed to `apply` takes a DataFrame as + its argument and returns a Series. `apply` combines the result for + each group together into a new DataFrame. + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``. + + >>> g[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min()) + B C + A + a 1.0 2.0 + b 0.0 0.0 + + Example 3: The function passed to `apply` takes a DataFrame as + its argument and returns a scalar. `apply` combines the result for + each group together into a Series, including setting the index as + appropriate: + + >>> g.apply(lambda x: x.C.max() - x.B.min()) + A + a 5 + b 2 + dtype: int64""", + "series_examples": """ + >>> s = pd.Series([0, 1, 2], index='a a b'.split()) + >>> g = s.groupby(s.index) + + From ``s`` above we can see that ``g`` has two groups, ``a`` and ``b``. + Calling `apply` in various ways, we can get different grouping results: + + Example 1: The function passed to `apply` takes a Series as + its argument and returns a Series. `apply` combines the result for + each group together into a new Series. + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``. + + >>> g.apply(lambda x: x*2 if x.name == 'a' else x/2) + a 0.0 + a 2.0 + b 1.0 + dtype: float64 + + Example 2: The function passed to `apply` takes a Series as + its argument and returns a scalar. `apply` combines the result for + each group together into a Series, including setting the index as + appropriate: + + >>> g.apply(lambda x: x.max() - x.min()) + a 1 + b 0 + dtype: int64""", +} + +_groupby_agg_method_template = """ +Compute {fname} of group values. + +Parameters +---------- +numeric_only : bool, default {no} + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. +min_count : int, default {mc} + The required number of valid values to perform the operation. If fewer + than ``min_count`` non-NA values are present the result will be NA. + +Returns +------- +Series or DataFrame + Computed {fname} of values within each group. +""" + +_pipe_template = """ +Apply a function `func` with arguments to this %(klass)s object and return +the function's result. + +Use `.pipe` when you want to improve readability by chaining together +functions that expect Series, DataFrames, GroupBy or Resampler objects. +Instead of writing + +>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c) # doctest: +SKIP + +You can write + +>>> (df.groupby('group') +... .pipe(f) +... .pipe(g, arg1=a) +... .pipe(h, arg2=b, arg3=c)) # doctest: +SKIP + +which is much more readable. + +Parameters +---------- +func : callable or tuple of (callable, str) + Function to apply to this %(klass)s object or, alternatively, + a `(callable, data_keyword)` tuple where `data_keyword` is a + string indicating the keyword of `callable` that expects the + %(klass)s object. +args : iterable, optional + Positional arguments passed into `func`. +kwargs : dict, optional + A dictionary of keyword arguments passed into `func`. + +Returns +------- +object : the return type of `func`. + +See Also +-------- +Series.pipe : Apply a function with arguments to a series. +DataFrame.pipe: Apply a function with arguments to a dataframe. +apply : Apply function to each group instead of to the + full %(klass)s object. + +Notes +----- +See more `here +`_ + +Examples +-------- +%(examples)s +""" + +_transform_template = """ +Call function producing a like-indexed %(klass)s on each group and +return a %(klass)s having the same indexes as the original object +filled with the transformed values. + +Parameters +---------- +f : function + Function to apply to each group. + + Can also accept a Numba JIT function with + ``engine='numba'`` specified. + + If the ``'numba'`` engine is chosen, the function must be + a user defined function with ``values`` and ``index`` as the + first and second arguments respectively in the function signature. + Each group's index will be passed to the user defined function + and optionally available for use. + + .. versionchanged:: 1.1.0 +*args + Positional arguments to pass to func. +engine : str, default None + * ``'cython'`` : Runs the function through C-extensions from cython. + * ``'numba'`` : Runs the function through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or the global setting ``compute.use_numba`` + + .. versionadded:: 1.1.0 +engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be + applied to the function + + .. versionadded:: 1.1.0 +**kwargs + Keyword arguments to be passed into func. + +Returns +------- +%(klass)s + +See Also +-------- +%(klass)s.groupby.apply : Apply function ``func`` group-wise and combine + the results together. +%(klass)s.groupby.aggregate : Aggregate using one or more + operations over the specified axis. +%(klass)s.transform : Call ``func`` on self producing a %(klass)s with the + same axis shape as self. + +Notes +----- +Each group is endowed the attribute 'name' in case you need to know +which group you are working on. + +The current implementation imposes three requirements on f: + +* f must return a value that either has the same shape as the input + subframe or can be broadcast to the shape of the input subframe. + For example, if `f` returns a scalar it will be broadcast to have the + same shape as the input subframe. +* if this is a DataFrame, f must support application column-by-column + in the subframe. If f also supports application to the entire subframe, + then a fast path is used starting from the second chunk. +* f must not mutate groups. Mutation is not supported and may + produce unexpected results. See :ref:`gotchas.udf-mutation` for more details. + +When using ``engine='numba'``, there will be no "fall back" behavior internally. +The group data and group index will be passed as numpy arrays to the JITed +user defined function, and no alternative execution attempts will be tried. + +.. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + see the examples below. + +Examples +-------- + +>>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', +... 'foo', 'bar'], +... 'B' : ['one', 'one', 'two', 'three', +... 'two', 'two'], +... 'C' : [1, 5, 5, 2, 5, 5], +... 'D' : [2.0, 5., 8., 1., 2., 9.]}) +>>> grouped = df.groupby('A') +>>> grouped.transform(lambda x: (x - x.mean()) / x.std()) + C D +0 -1.154701 -0.577350 +1 0.577350 0.000000 +2 0.577350 1.154701 +3 -1.154701 -1.000000 +4 0.577350 -0.577350 +5 0.577350 1.000000 + +Broadcast result of the transformation + +>>> grouped.transform(lambda x: x.max() - x.min()) + C D +0 4 6.0 +1 3 8.0 +2 4 6.0 +3 3 8.0 +4 4 6.0 +5 3 8.0 + +.. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + for example: + +>>> grouped[['C', 'D']].transform(lambda x: x.astype(int).max()) + C D +0 5 8 +1 5 9 +2 5 8 +3 5 9 +4 5 8 +5 5 9 +""" + +_agg_template = """ +Aggregate using one or more operations over the specified axis. + +Parameters +---------- +func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a {klass} or when passed to {klass}.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. + + Can also accept a Numba JIT function with + ``engine='numba'`` specified. Only passing a single function is supported + with this engine. + + If the ``'numba'`` engine is chosen, the function must be + a user defined function with ``values`` and ``index`` as the + first and second arguments respectively in the function signature. + Each group's index will be passed to the user defined function + and optionally available for use. + + .. versionchanged:: 1.1.0 +*args + Positional arguments to pass to func. +engine : str, default None + * ``'cython'`` : Runs the function through C-extensions from cython. + * ``'numba'`` : Runs the function through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.1.0 +engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be + applied to the function + + .. versionadded:: 1.1.0 +**kwargs + Keyword arguments to be passed into func. + +Returns +------- +{klass} + +See Also +-------- +{klass}.groupby.apply : Apply function func group-wise + and combine the results together. +{klass}.groupby.transform : Aggregate using one or more + operations over the specified axis. +{klass}.aggregate : Transforms the Series on each group + based on the given function. + +Notes +----- +When using ``engine='numba'``, there will be no "fall back" behavior internally. +The group data and group index will be passed as numpy arrays to the JITed +user defined function, and no alternative execution attempts will be tried. + +Functions that mutate the passed object can produce unexpected +behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` +for more details. + +.. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + see the examples below. +{examples}""" + + +@final +class GroupByPlot(PandasObject): + """ + Class implementing the .plot attribute for groupby objects. + """ + + def __init__(self, groupby: GroupBy): + self._groupby = groupby + + def __call__(self, *args, **kwargs): + def f(self): + return self.plot(*args, **kwargs) + + f.__name__ = "plot" + return self._groupby.apply(f) + + def __getattr__(self, name: str): + def attr(*args, **kwargs): + def f(self): + return getattr(self.plot, name)(*args, **kwargs) + + return self._groupby.apply(f) + + return attr + + +_KeysArgType = Union[ + Hashable, + List[Hashable], + Callable[[Hashable], Hashable], + List[Callable[[Hashable], Hashable]], + Mapping[Hashable, Hashable], +] + + +class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin): + _group_selection: IndexLabel | None = None + _apply_allowlist: frozenset[str] = frozenset() + _hidden_attrs = PandasObject._hidden_attrs | { + "as_index", + "axis", + "dropna", + "exclusions", + "grouper", + "group_keys", + "keys", + "level", + "mutated", + "obj", + "observed", + "sort", + "squeeze", + } + + axis: int + grouper: ops.BaseGrouper + group_keys: bool + + @final + def __len__(self) -> int: + return len(self.groups) + + @final + def __repr__(self) -> str: + # TODO: Better repr for GroupBy object + return object.__repr__(self) + + @final + @property + def groups(self) -> dict[Hashable, np.ndarray]: + """ + Dict {group name -> group labels}. + """ + return self.grouper.groups + + @final + @property + def ngroups(self) -> int: + return self.grouper.ngroups + + @final + @property + def indices(self): + """ + Dict {group name -> group indices}. + """ + return self.grouper.indices + + @final + def _get_indices(self, names): + """ + Safe get multiple indices, translate keys for + datelike to underlying repr. + """ + + def get_converter(s): + # possibly convert to the actual key types + # in the indices, could be a Timestamp or a np.datetime64 + if isinstance(s, datetime.datetime): + return lambda key: Timestamp(key) + elif isinstance(s, np.datetime64): + return lambda key: Timestamp(key).asm8 + else: + return lambda key: key + + if len(names) == 0: + return [] + + if len(self.indices) > 0: + index_sample = next(iter(self.indices)) + else: + index_sample = None # Dummy sample + + name_sample = names[0] + if isinstance(index_sample, tuple): + if not isinstance(name_sample, tuple): + msg = "must supply a tuple to get_group with multiple grouping keys" + raise ValueError(msg) + if not len(name_sample) == len(index_sample): + try: + # If the original grouper was a tuple + return [self.indices[name] for name in names] + except KeyError as err: + # turns out it wasn't a tuple + msg = ( + "must supply a same-length tuple to get_group " + "with multiple grouping keys" + ) + raise ValueError(msg) from err + + converters = [get_converter(s) for s in index_sample] + names = (tuple(f(n) for f, n in zip(converters, name)) for name in names) + + else: + converter = get_converter(index_sample) + names = (converter(name) for name in names) + + return [self.indices.get(name, []) for name in names] + + @final + def _get_index(self, name): + """ + Safe get index, translate keys for datelike to underlying repr. + """ + return self._get_indices([name])[0] + + @final + @cache_readonly + def _selected_obj(self): + # Note: _selected_obj is always just `self.obj` for SeriesGroupBy + + if self._selection is None or isinstance(self.obj, Series): + if self._group_selection is not None: + return self.obj[self._group_selection] + return self.obj + else: + return self.obj[self._selection] + + @final + def _dir_additions(self) -> set[str]: + return self.obj._dir_additions() | self._apply_allowlist + + @Substitution( + klass="GroupBy", + examples=dedent( + """\ + >>> df = pd.DataFrame({'A': 'a b a b'.split(), 'B': [1, 2, 3, 4]}) + >>> df + A B + 0 a 1 + 1 b 2 + 2 a 3 + 3 b 4 + + To get the difference between each groups maximum and minimum value in one + pass, you can do + + >>> df.groupby('A').pipe(lambda x: x.max() - x.min()) + B + A + a 2 + b 2""" + ), + ) + @Appender(_pipe_template) + def pipe( + self, + func: Callable[..., T] | tuple[Callable[..., T], str], + *args, + **kwargs, + ) -> T: + return com.pipe(self, func, *args, **kwargs) + + plot = property(GroupByPlot) + + @final + def get_group(self, name, obj=None) -> DataFrame | Series: + """ + Construct DataFrame from group with provided name. + + Parameters + ---------- + name : object + The name of the group to get as a DataFrame. + obj : DataFrame, default None + The DataFrame to take the DataFrame out of. If + it is None, the object groupby was called on will + be used. + + Returns + ------- + group : same type as obj + """ + if obj is None: + obj = self._selected_obj + + inds = self._get_index(name) + if not len(inds): + raise KeyError(name) + + return obj._take_with_is_copy(inds, axis=self.axis) + + @final + def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: + """ + Groupby iterator. + + Returns + ------- + Generator yielding sequence of (name, subsetted object) + for each group + """ + return self.grouper.get_iterator(self._selected_obj, axis=self.axis) + + +# To track operations that expand dimensions, like ohlc +OutputFrameOrSeries = TypeVar("OutputFrameOrSeries", bound=NDFrame) + + +class GroupBy(BaseGroupBy[NDFrameT]): + """ + Class for grouping and aggregating relational data. + + See aggregate, transform, and apply functions on this object. + + It's easiest to use obj.groupby(...) to use GroupBy, but you can also do: + + :: + + grouped = groupby(obj, ...) + + Parameters + ---------- + obj : pandas object + axis : int, default 0 + level : int, default None + Level of MultiIndex + groupings : list of Grouping objects + Most users should ignore this + exclusions : array-like, optional + List of columns to exclude + name : str + Most users should ignore this + + Returns + ------- + **Attributes** + groups : dict + {group name -> group labels} + len(grouped) : int + Number of groups + + Notes + ----- + After grouping, see aggregate, apply, and transform functions. Here are + some other brief notes about usage. When grouping by multiple groups, the + result index will be a MultiIndex (hierarchical) by default. + + Iteration produces (key, group) tuples, i.e. chunking the data by group. So + you can write code like: + + :: + + grouped = obj.groupby(keys, axis=axis) + for key, group in grouped: + # do something with the data + + Function calls on GroupBy, if not specially implemented, "dispatch" to the + grouped data. So if you group a DataFrame and wish to invoke the std() + method on each group, you can simply do: + + :: + + df.groupby(mapper).std() + + rather than + + :: + + df.groupby(mapper).aggregate(np.std) + + You can pass arguments to these "wrapped" functions, too. + + See the online documentation for full exposition on these topics and much + more + """ + + grouper: ops.BaseGrouper + as_index: bool + + @final + def __init__( + self, + obj: NDFrameT, + keys: _KeysArgType | None = None, + axis: int = 0, + level: IndexLabel | None = None, + grouper: ops.BaseGrouper | None = None, + exclusions: frozenset[Hashable] | None = None, + selection: IndexLabel | None = None, + as_index: bool = True, + sort: bool = True, + group_keys: bool = True, + squeeze: bool = False, + observed: bool = False, + mutated: bool = False, + dropna: bool = True, + ): + + self._selection = selection + + assert isinstance(obj, NDFrame), type(obj) + + self.level = level + + if not as_index: + if not isinstance(obj, DataFrame): + raise TypeError("as_index=False only valid with DataFrame") + if axis != 0: + raise ValueError("as_index=False only valid for axis=0") + + self.as_index = as_index + self.keys = keys + self.sort = sort + self.group_keys = group_keys + self.squeeze = squeeze + self.observed = observed + self.mutated = mutated + self.dropna = dropna + + if grouper is None: + from pandas.core.groupby.grouper import get_grouper + + grouper, exclusions, obj = get_grouper( + obj, + keys, + axis=axis, + level=level, + sort=sort, + observed=observed, + mutated=self.mutated, + dropna=self.dropna, + ) + + self.obj = obj + self.axis = obj._get_axis_number(axis) + self.grouper = grouper + self.exclusions = frozenset(exclusions) if exclusions else frozenset() + + def __getattr__(self, attr: str): + if attr in self._internal_names_set: + return object.__getattribute__(self, attr) + if attr in self.obj: + return self[attr] + + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{attr}'" + ) + + def __getattribute__(self, attr: str): + # Intercept nth to allow both call and index + if attr == "nth": + return GroupByNthSelector(self) + elif attr == "nth_actual": + return super().__getattribute__("nth") + else: + return super().__getattribute__(attr) + + @final + def _make_wrapper(self, name: str) -> Callable: + assert name in self._apply_allowlist + + with self._group_selection_context(): + # need to setup the selection + # as are not passed directly but in the grouper + f = getattr(self._obj_with_exclusions, name) + if not isinstance(f, types.MethodType): + return self.apply(lambda self: getattr(self, name)) + + f = getattr(type(self._obj_with_exclusions), name) + sig = inspect.signature(f) + + def wrapper(*args, **kwargs): + # a little trickery for aggregation functions that need an axis + # argument + if "axis" in sig.parameters: + if kwargs.get("axis", None) is None: + kwargs["axis"] = self.axis + + def curried(x): + return f(x, *args, **kwargs) + + # preserve the name so we can detect it when calling plot methods, + # to avoid duplicates + curried.__name__ = name + + # special case otherwise extra plots are created when catching the + # exception below + if name in base.plotting_methods: + return self.apply(curried) + + return self._python_apply_general(curried, self._obj_with_exclusions) + + wrapper.__name__ = name + return wrapper + + # ----------------------------------------------------------------- + # Selection + + @final + def _set_group_selection(self) -> None: + """ + Create group based selection. + + Used when selection is not passed directly but instead via a grouper. + + NOTE: this should be paired with a call to _reset_group_selection + """ + # This is a no-op for SeriesGroupBy + grp = self.grouper + if not ( + self.as_index + and grp.groupings is not None + and self.obj.ndim > 1 + and self._group_selection is None + ): + return + + groupers = [g.name for g in grp.groupings if g.level is None and g.in_axis] + + if len(groupers): + # GH12839 clear selected obj cache when group selection changes + ax = self.obj._info_axis + self._group_selection = ax.difference(Index(groupers), sort=False).tolist() + self._reset_cache("_selected_obj") + + @final + def _reset_group_selection(self) -> None: + """ + Clear group based selection. + + Used for methods needing to return info on each group regardless of + whether a group selection was previously set. + """ + if self._group_selection is not None: + # GH12839 clear cached selection too when changing group selection + self._group_selection = None + self._reset_cache("_selected_obj") + + @contextmanager + def _group_selection_context(self) -> Iterator[GroupBy]: + """ + Set / reset the _group_selection_context. + """ + self._set_group_selection() + try: + yield self + finally: + self._reset_group_selection() + + def _iterate_slices(self) -> Iterable[Series]: + raise AbstractMethodError(self) + + # ----------------------------------------------------------------- + # Dispatch/Wrapping + + @final + def _concat_objects(self, values, not_indexed_same: bool = False): + from pandas.core.reshape.concat import concat + + def reset_identity(values): + # reset the identities of the components + # of the values to prevent aliasing + for v in com.not_none(*values): + ax = v._get_axis(self.axis) + ax._reset_identity() + return values + + if not not_indexed_same: + result = concat(values, axis=self.axis) + + ax = self._selected_obj._get_axis(self.axis) + if self.dropna: + labels = self.grouper.group_info[0] + mask = labels != -1 + ax = ax[mask] + + # this is a very unfortunate situation + # we can't use reindex to restore the original order + # when the ax has duplicates + # so we resort to this + # GH 14776, 30667 + if ax.has_duplicates and not result.axes[self.axis].equals(ax): + indexer, _ = result.index.get_indexer_non_unique(ax._values) + indexer = algorithms.unique1d(indexer) + result = result.take(indexer, axis=self.axis) + else: + result = result.reindex(ax, axis=self.axis, copy=False) + + elif self.group_keys: + + values = reset_identity(values) + if self.as_index: + + # possible MI return case + group_keys = self.grouper.result_index + group_levels = self.grouper.levels + group_names = self.grouper.names + + result = concat( + values, + axis=self.axis, + keys=group_keys, + levels=group_levels, + names=group_names, + sort=False, + ) + else: + + # GH5610, returns a MI, with the first level being a + # range index + keys = list(range(len(values))) + result = concat(values, axis=self.axis, keys=keys) + else: + values = reset_identity(values) + result = concat(values, axis=self.axis) + + name = self.obj.name if self.obj.ndim == 1 else self._selection + if isinstance(result, Series) and name is not None: + + result.name = name + + return result + + @final + def _set_result_index_ordered( + self, result: OutputFrameOrSeries + ) -> OutputFrameOrSeries: + # set the result index on the passed values object and + # return the new object, xref 8046 + + if self.grouper.is_monotonic: + # shortcut if we have an already ordered grouper + result.set_axis(self.obj._get_axis(self.axis), axis=self.axis, inplace=True) + return result + + # row order is scrambled => sort the rows by position in original index + original_positions = Index( + np.concatenate(self._get_indices(self.grouper.result_index)) + ) + result.set_axis(original_positions, axis=self.axis, inplace=True) + result = result.sort_index(axis=self.axis) + + dropped_rows = len(result.index) < len(self.obj.index) + + if dropped_rows: + # get index by slicing original index according to original positions + # slice drops attrs => use set_axis when no rows were dropped + sorted_indexer = result.index + result.index = self._selected_obj.index[sorted_indexer] + else: + result.set_axis(self.obj._get_axis(self.axis), axis=self.axis, inplace=True) + + return result + + def _indexed_output_to_ndframe( + self, result: Mapping[base.OutputKey, ArrayLike] + ) -> Series | DataFrame: + raise AbstractMethodError(self) + + @final + def _wrap_aggregated_output( + self, + output: Series | DataFrame | Mapping[base.OutputKey, ArrayLike], + qs: npt.NDArray[np.float64] | None = None, + ): + """ + Wraps the output of GroupBy aggregations into the expected result. + + Parameters + ---------- + output : Series, DataFrame, or Mapping[base.OutputKey, ArrayLike] + Data to wrap. + + Returns + ------- + Series or DataFrame + """ + + if isinstance(output, (Series, DataFrame)): + # We get here (for DataFrameGroupBy) if we used Manager.grouped_reduce, + # in which case our columns are already set correctly. + # ATM we do not get here for SeriesGroupBy; when we do, we will + # need to require that result.name already match self.obj.name + result = output + else: + result = self._indexed_output_to_ndframe(output) + + if not self.as_index: + # `not self.as_index` is only relevant for DataFrameGroupBy, + # enforced in __init__ + self._insert_inaxis_grouper_inplace(result) + result = result._consolidate() + index = Index(range(self.grouper.ngroups)) + + else: + index = self.grouper.result_index + + if qs is not None: + # We get here with len(qs) != 1 and not self.as_index + # in test_pass_args_kwargs + index = _insert_quantile_level(index, qs) + + result.index = index + + if self.axis == 1: + # Only relevant for DataFrameGroupBy, no-op for SeriesGroupBy + result = result.T + if result.index.equals(self.obj.index): + # Retain e.g. DatetimeIndex/TimedeltaIndex freq + result.index = self.obj.index.copy() + # TODO: Do this more systematically + + return self._reindex_output(result, qs=qs) + + @final + def _wrap_transformed_output( + self, output: Mapping[base.OutputKey, ArrayLike] + ) -> Series | DataFrame: + """ + Wraps the output of GroupBy transformations into the expected result. + + Parameters + ---------- + output : Mapping[base.OutputKey, ArrayLike] + Data to wrap. + + Returns + ------- + Series or DataFrame + Series for SeriesGroupBy, DataFrame for DataFrameGroupBy + """ + if isinstance(output, (Series, DataFrame)): + result = output + else: + result = self._indexed_output_to_ndframe(output) + + if self.axis == 1: + # Only relevant for DataFrameGroupBy + result = result.T + result.columns = self.obj.columns + + result.index = self.obj.index + return result + + def _wrap_applied_output(self, data, values: list, not_indexed_same: bool = False): + raise AbstractMethodError(self) + + def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool: + """ + Determine subclass-specific default value for 'numeric_only'. + + For SeriesGroupBy we want the default to be False (to match Series behavior). + For DataFrameGroupBy we want it to be True (for backwards-compat). + + Parameters + ---------- + numeric_only : bool or lib.no_default + + Returns + ------- + bool + """ + # GH#41291 + if numeric_only is lib.no_default: + # i.e. not explicitly passed by user + if self.obj.ndim == 2: + # i.e. DataFrameGroupBy + numeric_only = True + # GH#42395 GH#43108 GH#43154 + # Regression from 1.2.5 to 1.3 caused object columns to be dropped + if self.axis: + obj = self._obj_with_exclusions.T + else: + obj = self._obj_with_exclusions + check = obj._get_numeric_data() + if len(obj.columns) and not len(check.columns) and not obj.empty: + numeric_only = False + # TODO: v1.4+ Add FutureWarning + + else: + numeric_only = False + + # error: Incompatible return value type (got "Union[bool, NoDefault]", + # expected "bool") + return numeric_only # type: ignore[return-value] + + # ----------------------------------------------------------------- + # numba + + @final + def _numba_prep(self, func, data): + if not callable(func): + raise NotImplementedError( + "Numba engine can only be used with a single function." + ) + ids, _, ngroups = self.grouper.group_info + sorted_index = get_group_index_sorter(ids, ngroups) + sorted_ids = algorithms.take_nd(ids, sorted_index, allow_fill=False) + + sorted_data = data.take(sorted_index, axis=self.axis).to_numpy() + sorted_index_data = data.index.take(sorted_index).to_numpy() + + starts, ends = lib.generate_slices(sorted_ids, ngroups) + return ( + starts, + ends, + sorted_index_data, + sorted_data, + ) + + def _numba_agg_general( + self, + func: Callable, + engine_kwargs: dict[str, bool] | None, + numba_cache_key_str: str, + *aggregator_args, + ): + """ + Perform groupby with a standard numerical aggregation function (e.g. mean) + with Numba. + """ + if not self.as_index: + raise NotImplementedError( + "as_index=False is not supported. Use .reset_index() instead." + ) + if self.axis == 1: + raise NotImplementedError("axis=1 is not supported.") + + with self._group_selection_context(): + data = self._selected_obj + df = data if data.ndim == 2 else data.to_frame() + starts, ends, sorted_index, sorted_data = self._numba_prep(func, df) + aggregator = executor.generate_shared_aggregator( + func, engine_kwargs, numba_cache_key_str + ) + result = aggregator(sorted_data, starts, ends, 0, *aggregator_args) + + cache_key = (func, numba_cache_key_str) + if cache_key not in NUMBA_FUNC_CACHE: + NUMBA_FUNC_CACHE[cache_key] = aggregator + + index = self.grouper.result_index + if data.ndim == 1: + result_kwargs = {"name": data.name} + result = result.ravel() + else: + result_kwargs = {"columns": data.columns} + return data._constructor(result, index=index, **result_kwargs) + + @final + def _transform_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs): + """ + Perform groupby transform routine with the numba engine. + + This routine mimics the data splitting routine of the DataSplitter class + to generate the indices of each group in the sorted data and then passes the + data and indices into a Numba jitted function. + """ + starts, ends, sorted_index, sorted_data = self._numba_prep(func, data) + + numba_transform_func = numba_.generate_numba_transform_func( + kwargs, func, engine_kwargs + ) + result = numba_transform_func( + sorted_data, + sorted_index, + starts, + ends, + len(data.columns), + *args, + ) + + cache_key = (func, "groupby_transform") + if cache_key not in NUMBA_FUNC_CACHE: + NUMBA_FUNC_CACHE[cache_key] = numba_transform_func + + # result values needs to be resorted to their original positions since we + # evaluated the data sorted by group + return result.take(np.argsort(sorted_index), axis=0) + + @final + def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs): + """ + Perform groupby aggregation routine with the numba engine. + + This routine mimics the data splitting routine of the DataSplitter class + to generate the indices of each group in the sorted data and then passes the + data and indices into a Numba jitted function. + """ + starts, ends, sorted_index, sorted_data = self._numba_prep(func, data) + + numba_agg_func = numba_.generate_numba_agg_func(kwargs, func, engine_kwargs) + result = numba_agg_func( + sorted_data, + sorted_index, + starts, + ends, + len(data.columns), + *args, + ) + + cache_key = (func, "groupby_agg") + if cache_key not in NUMBA_FUNC_CACHE: + NUMBA_FUNC_CACHE[cache_key] = numba_agg_func + + return result + + # ----------------------------------------------------------------- + # apply/agg/transform + + @Appender( + _apply_docs["template"].format( + input="dataframe", examples=_apply_docs["dataframe_examples"] + ) + ) + def apply(self, func, *args, **kwargs): + + func = com.is_builtin_func(func) + + # this is needed so we don't try and wrap strings. If we could + # resolve functions to their callable functions prior, this + # wouldn't be needed + if args or kwargs: + if callable(func): + + @wraps(func) + def f(g): + with np.errstate(all="ignore"): + return func(g, *args, **kwargs) + + elif hasattr(nanops, "nan" + func): + # TODO: should we wrap this in to e.g. _is_builtin_func? + f = getattr(nanops, "nan" + func) + + else: + raise ValueError( + "func must be a callable if args or kwargs are supplied" + ) + elif isinstance(func, str): + if hasattr(self, func): + res = getattr(self, func) + if callable(res): + return res() + return res + + else: + raise TypeError(f"apply func should be callable, not '{func}'") + else: + + f = func + + # ignore SettingWithCopy here in case the user mutates + with option_context("mode.chained_assignment", None): + try: + result = self._python_apply_general(f, self._selected_obj) + except TypeError: + # gh-20949 + # try again, with .apply acting as a filtering + # operation, by excluding the grouping column + # This would normally not be triggered + # except if the udf is trying an operation that + # fails on *some* columns, e.g. a numeric operation + # on a string grouper column + + with self._group_selection_context(): + return self._python_apply_general(f, self._selected_obj) + + return result + + @final + def _python_apply_general( + self, + f: Callable, + data: DataFrame | Series, + not_indexed_same: bool | None = None, + ) -> DataFrame | Series: + """ + Apply function f in python space + + Parameters + ---------- + f : callable + Function to apply + data : Series or DataFrame + Data to apply f to + not_indexed_same: bool, optional + When specified, overrides the value of not_indexed_same. Apply behaves + differently when the result index is equal to the input index, but + this can be coincidental leading to value-dependent behavior. + + Returns + ------- + Series or DataFrame + data after applying f + """ + values, mutated = self.grouper.apply(f, data, self.axis) + + if not_indexed_same is None: + not_indexed_same = mutated or self.mutated + + return self._wrap_applied_output( + data, values, not_indexed_same=not_indexed_same + ) + + @final + def _python_agg_general(self, func, *args, **kwargs): + func = com.is_builtin_func(func) + f = lambda x: func(x, *args, **kwargs) + + # iterate through "columns" ex exclusions to populate output dict + output: dict[base.OutputKey, ArrayLike] = {} + + if self.ngroups == 0: + # agg_series below assumes ngroups > 0 + return self._python_apply_general(f, self._selected_obj) + + for idx, obj in enumerate(self._iterate_slices()): + name = obj.name + + try: + # if this function is invalid for this dtype, we will ignore it. + result = self.grouper.agg_series(obj, f) + except TypeError: + warn_dropping_nuisance_columns_deprecated(type(self), "agg") + continue + + key = base.OutputKey(label=name, position=idx) + output[key] = result + + if not output: + return self._python_apply_general(f, self._selected_obj) + + return self._wrap_aggregated_output(output) + + @final + def _agg_general( + self, + numeric_only: bool = True, + min_count: int = -1, + *, + alias: str, + npfunc: Callable, + ): + + with self._group_selection_context(): + # try a cython aggregation if we can + result = self._cython_agg_general( + how=alias, + alt=npfunc, + numeric_only=numeric_only, + min_count=min_count, + ) + return result.__finalize__(self.obj, method="groupby") + + def _agg_py_fallback( + self, values: ArrayLike, ndim: int, alt: Callable + ) -> ArrayLike: + """ + Fallback to pure-python aggregation if _cython_operation raises + NotImplementedError. + """ + # We get here with a) EADtypes and b) object dtype + + if values.ndim == 1: + # For DataFrameGroupBy we only get here with ExtensionArray + ser = Series(values) + else: + # We only get here with values.dtype == object + # TODO: special case not needed with ArrayManager + df = DataFrame(values.T) + # bc we split object blocks in grouped_reduce, we have only 1 col + # otherwise we'd have to worry about block-splitting GH#39329 + assert df.shape[1] == 1 + # Avoid call to self.values that can occur in DataFrame + # reductions; see GH#28949 + ser = df.iloc[:, 0] + + # We do not get here with UDFs, so we know that our dtype + # should always be preserved by the implemented aggregations + # TODO: Is this exactly right; see WrappedCythonOp get_result_dtype? + res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True) + + if isinstance(values, Categorical): + # Because we only get here with known dtype-preserving + # reductions, we cast back to Categorical. + # TODO: if we ever get "rank" working, exclude it here. + res_values = type(values)._from_sequence(res_values, dtype=values.dtype) + + # If we are DataFrameGroupBy and went through a SeriesGroupByPath + # then we need to reshape + # GH#32223 includes case with IntegerArray values, ndarray res_values + # test_groupby_duplicate_columns with object dtype values + return ensure_block_shape(res_values, ndim=ndim) + + @final + def _cython_agg_general( + self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1 + ): + # Note: we never get here with how="ohlc" for DataFrameGroupBy; + # that goes through SeriesGroupBy + + data = self._get_data_to_aggregate() + is_ser = data.ndim == 1 + + if numeric_only: + if is_ser and not is_numeric_dtype(self._selected_obj.dtype): + # GH#41291 match Series behavior + kwd_name = "numeric_only" + if how in ["any", "all"]: + kwd_name = "bool_only" + raise NotImplementedError( + f"{type(self).__name__}.{how} does not implement {kwd_name}." + ) + elif not is_ser: + data = data.get_numeric_data(copy=False) + + def array_func(values: ArrayLike) -> ArrayLike: + try: + result = self.grouper._cython_operation( + "aggregate", values, how, axis=data.ndim - 1, min_count=min_count + ) + except NotImplementedError: + # generally if we have numeric_only=False + # and non-applicable functions + # try to python agg + # TODO: shouldn't min_count matter? + result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt) + + return result + + # TypeError -> we may have an exception in trying to aggregate + # continue and exclude the block + new_mgr = data.grouped_reduce(array_func, ignore_failures=True) + + if not is_ser and len(new_mgr) < len(data): + warn_dropping_nuisance_columns_deprecated(type(self), how) + + res = self._wrap_agged_manager(new_mgr) + if is_ser: + res.index = self.grouper.result_index + return self._reindex_output(res) + else: + return res + + def _cython_transform( + self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs + ): + raise AbstractMethodError(self) + + @final + def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): + + if maybe_use_numba(engine): + # TODO: tests with self._selected_obj.ndim == 1 on DataFrameGroupBy + with self._group_selection_context(): + data = self._selected_obj + df = data if data.ndim == 2 else data.to_frame() + result = self._transform_with_numba( + df, func, *args, engine_kwargs=engine_kwargs, **kwargs + ) + if self.obj.ndim == 2: + return cast(DataFrame, self.obj)._constructor( + result, index=data.index, columns=data.columns + ) + else: + return cast(Series, self.obj)._constructor( + result.ravel(), index=data.index, name=data.name + ) + + # optimized transforms + func = com.get_cython_func(func) or func + + if not isinstance(func, str): + return self._transform_general(func, *args, **kwargs) + + elif func not in base.transform_kernel_allowlist: + msg = f"'{func}' is not a valid function name for transform(name)" + raise ValueError(msg) + elif func in base.cythonized_kernels or func in base.transformation_kernels: + # cythonized transform or canned "agg+broadcast" + return getattr(self, func)(*args, **kwargs) + + else: + # i.e. func in base.reduction_kernels + + # GH#30918 Use _transform_fast only when we know func is an aggregation + # If func is a reduction, we need to broadcast the + # result to the whole group. Compute func result + # and deal with possible broadcasting below. + # Temporarily set observed for dealing with categoricals. + with com.temp_setattr(self, "observed", True): + result = getattr(self, func)(*args, **kwargs) + + if self._can_use_transform_fast(result): + return self._wrap_transform_fast_result(result) + + # only reached for DataFrameGroupBy + return self._transform_general(func, *args, **kwargs) + + @final + def _wrap_transform_fast_result(self, result: NDFrameT) -> NDFrameT: + """ + Fast transform path for aggregations. + """ + obj = self._obj_with_exclusions + + # for each col, reshape to size of original frame by take operation + ids, _, _ = self.grouper.group_info + result = result.reindex(self.grouper.result_index, copy=False) + + if self.obj.ndim == 1: + # i.e. SeriesGroupBy + out = algorithms.take_nd(result._values, ids) + output = obj._constructor(out, index=obj.index, name=obj.name) + else: + output = result.take(ids, axis=0) + output.index = obj.index + return output + + # ----------------------------------------------------------------- + # Utilities + + @final + def _apply_filter(self, indices, dropna): + if len(indices) == 0: + indices = np.array([], dtype="int64") + else: + indices = np.sort(np.concatenate(indices)) + if dropna: + filtered = self._selected_obj.take(indices, axis=self.axis) + else: + mask = np.empty(len(self._selected_obj.index), dtype=bool) + mask.fill(False) + mask[indices.astype(int)] = True + # mask fails to broadcast when passed to where; broadcast manually. + mask = np.tile(mask, list(self._selected_obj.shape[1:]) + [1]).T + filtered = self._selected_obj.where(mask) # Fill with NaNs. + return filtered + + @final + def _cumcount_array(self, ascending: bool = True) -> np.ndarray: + """ + Parameters + ---------- + ascending : bool, default True + If False, number in reverse, from length of group - 1 to 0. + + Notes + ----- + this is currently implementing sort=False + (though the default is sort=True) for groupby in general + """ + ids, _, ngroups = self.grouper.group_info + sorter = get_group_index_sorter(ids, ngroups) + ids, count = ids[sorter], len(ids) + + if count == 0: + return np.empty(0, dtype=np.int64) + + run = np.r_[True, ids[:-1] != ids[1:]] + rep = np.diff(np.r_[np.nonzero(run)[0], count]) + out = (~run).cumsum() + + if ascending: + out -= np.repeat(out[run], rep) + else: + out = np.repeat(out[np.r_[run[1:], True]], rep) - out + + rev = np.empty(count, dtype=np.intp) + rev[sorter] = np.arange(count, dtype=np.intp) + return out[rev].astype(np.int64, copy=False) + + # ----------------------------------------------------------------- + + @final + @property + def _obj_1d_constructor(self) -> type[Series]: + # GH28330 preserve subclassed Series/DataFrames + if isinstance(self.obj, DataFrame): + return self.obj._constructor_sliced + assert isinstance(self.obj, Series) + return self.obj._constructor + + @final + def _bool_agg(self, val_test: Literal["any", "all"], skipna: bool): + """ + Shared func to call any / all Cython GroupBy implementations. + """ + + def objs_to_bool(vals: ArrayLike) -> tuple[np.ndarray, type]: + if is_object_dtype(vals.dtype): + # GH#37501: don't raise on pd.NA when skipna=True + if skipna: + func = np.vectorize( + lambda x: bool(x) if not isna(x) else True, otypes=[bool] + ) + vals = func(vals) + else: + vals = vals.astype(bool, copy=False) + + vals = cast(np.ndarray, vals) + elif isinstance(vals, BaseMaskedArray): + vals = vals._data.astype(bool, copy=False) + else: + vals = vals.astype(bool, copy=False) + + return vals.view(np.int8), bool + + def result_to_bool( + result: np.ndarray, + inference: type, + nullable: bool = False, + ) -> ArrayLike: + if nullable: + return BooleanArray(result.astype(bool, copy=False), result == -1) + else: + return result.astype(inference, copy=False) + + return self._get_cythonized_result( + libgroupby.group_any_all, + numeric_only=False, + cython_dtype=np.dtype(np.int8), + needs_mask=True, + needs_nullable=True, + pre_processing=objs_to_bool, + post_processing=result_to_bool, + val_test=val_test, + skipna=skipna, + ) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def any(self, skipna: bool = True): + """ + Return True if any value in the group is truthful, else False. + + Parameters + ---------- + skipna : bool, default True + Flag to ignore nan values during truth testing. + + Returns + ------- + Series or DataFrame + DataFrame or Series of boolean values, where a value is True if any element + is True within its respective group, False otherwise. + """ + return self._bool_agg("any", skipna) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def all(self, skipna: bool = True): + """ + Return True if all values in the group are truthful, else False. + + Parameters + ---------- + skipna : bool, default True + Flag to ignore nan values during truth testing. + + Returns + ------- + Series or DataFrame + DataFrame or Series of boolean values, where a value is True if all elements + are True within its respective group, False otherwise. + """ + return self._bool_agg("all", skipna) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def count(self) -> Series | DataFrame: + """ + Compute count of group, excluding missing values. + + Returns + ------- + Series or DataFrame + Count of values within each group. + """ + data = self._get_data_to_aggregate() + ids, _, ngroups = self.grouper.group_info + mask = ids != -1 + + is_series = data.ndim == 1 + + def hfunc(bvalues: ArrayLike) -> ArrayLike: + # TODO(EA2D): reshape would not be necessary with 2D EAs + if bvalues.ndim == 1: + # EA + masked = mask & ~isna(bvalues).reshape(1, -1) + else: + masked = mask & ~isna(bvalues) + + counted = lib.count_level_2d(masked, labels=ids, max_bin=ngroups, axis=1) + if is_series: + assert counted.ndim == 2 + assert counted.shape[0] == 1 + return counted[0] + return counted + + new_mgr = data.grouped_reduce(hfunc) + + # If we are grouping on categoricals we want unobserved categories to + # return zero, rather than the default of NaN which the reindexing in + # _wrap_agged_manager() returns. GH 35028 + with com.temp_setattr(self, "observed", True): + result = self._wrap_agged_manager(new_mgr) + + if result.ndim == 1: + result.index = self.grouper.result_index + + return self._reindex_output(result, fill_value=0) + + @final + @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) + def mean( + self, + numeric_only: bool | lib.NoDefault = lib.no_default, + engine: str = "cython", + engine_kwargs: dict[str, bool] | None = None, + ): + """ + Compute mean of groups, excluding missing values. + + Parameters + ---------- + numeric_only : bool, default True + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. + + engine : str, default None + * ``'cython'`` : Runs the operation through C-extensions from cython. + * ``'numba'`` : Runs the operation through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting + ``compute.use_numba`` + + .. versionadded:: 1.4.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` + + .. versionadded:: 1.4.0 + + Returns + ------- + pandas.Series or pandas.DataFrame + %(see_also)s + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], + ... 'B': [np.nan, 2, 3, 4, 5], + ... 'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C']) + + Groupby one column and return the mean of the remaining columns in + each group. + + >>> df.groupby('A').mean() + B C + A + 1 3.0 1.333333 + 2 4.0 1.500000 + + Groupby two columns and return the mean of the remaining column. + + >>> df.groupby(['A', 'B']).mean() + C + A B + 1 2.0 2.0 + 4.0 1.0 + 2 3.0 1.0 + 5.0 2.0 + + Groupby one column and return the mean of only particular column in + the group. + + >>> df.groupby('A')['B'].mean() + A + 1 3.0 + 2 4.0 + Name: B, dtype: float64 + """ + numeric_only_bool = self._resolve_numeric_only(numeric_only) + + if maybe_use_numba(engine): + from pandas.core._numba.kernels import sliding_mean + + return self._numba_agg_general(sliding_mean, engine_kwargs, "groupby_mean") + else: + result = self._cython_agg_general( + "mean", + alt=lambda x: Series(x).mean(numeric_only=numeric_only_bool), + numeric_only=numeric_only_bool, + ) + return result.__finalize__(self.obj, method="groupby") + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def median(self, numeric_only: bool | lib.NoDefault = lib.no_default): + """ + Compute median of groups, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex + + Parameters + ---------- + numeric_only : bool, default True + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. + + Returns + ------- + Series or DataFrame + Median of values within each group. + """ + numeric_only_bool = self._resolve_numeric_only(numeric_only) + + result = self._cython_agg_general( + "median", + alt=lambda x: Series(x).median(numeric_only=numeric_only_bool), + numeric_only=numeric_only_bool, + ) + return result.__finalize__(self.obj, method="groupby") + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def std( + self, + ddof: int = 1, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + ): + """ + Compute standard deviation of groups, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + engine : str, default None + * ``'cython'`` : Runs the operation through C-extensions from cython. + * ``'numba'`` : Runs the operation through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting + ``compute.use_numba`` + + .. versionadded:: 1.4.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` + + .. versionadded:: 1.4.0 + + Returns + ------- + Series or DataFrame + Standard deviation of values within each group. + """ + if maybe_use_numba(engine): + from pandas.core._numba.kernels import sliding_var + + return np.sqrt( + self._numba_agg_general(sliding_var, engine_kwargs, "groupby_std", ddof) + ) + else: + return self._get_cythonized_result( + libgroupby.group_var, + needs_counts=True, + cython_dtype=np.dtype(np.float64), + post_processing=lambda vals, inference: np.sqrt(vals), + ddof=ddof, + ) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def var( + self, + ddof: int = 1, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + ): + """ + Compute variance of groups, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + engine : str, default None + * ``'cython'`` : Runs the operation through C-extensions from cython. + * ``'numba'`` : Runs the operation through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting + ``compute.use_numba`` + + .. versionadded:: 1.4.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` + + .. versionadded:: 1.4.0 + + Returns + ------- + Series or DataFrame + Variance of values within each group. + """ + if maybe_use_numba(engine): + from pandas.core._numba.kernels import sliding_var + + return self._numba_agg_general( + sliding_var, engine_kwargs, "groupby_var", ddof + ) + else: + if ddof == 1: + numeric_only = self._resolve_numeric_only(lib.no_default) + return self._cython_agg_general( + "var", + alt=lambda x: Series(x).var(ddof=ddof), + numeric_only=numeric_only, + ) + else: + func = lambda x: x.var(ddof=ddof) + with self._group_selection_context(): + return self._python_agg_general(func) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def sem(self, ddof: int = 1): + """ + Compute standard error of the mean of groups, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + Returns + ------- + Series or DataFrame + Standard error of the mean of values within each group. + """ + result = self.std(ddof=ddof) + if result.ndim == 1: + result /= np.sqrt(self.count()) + else: + cols = result.columns.difference(self.exclusions).unique() + counts = self.count() + result_ilocs = result.columns.get_indexer_for(cols) + count_ilocs = counts.columns.get_indexer_for(cols) + result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) + return result + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def size(self) -> DataFrame | Series: + """ + Compute group sizes. + + Returns + ------- + DataFrame or Series + Number of rows in each group as a Series if as_index is True + or a DataFrame if as_index is False. + """ + result = self.grouper.size() + + # GH28330 preserve subclassed Series/DataFrames through calls + if issubclass(self.obj._constructor, Series): + result = self._obj_1d_constructor(result, name=self.obj.name) + else: + result = self._obj_1d_constructor(result) + + if not self.as_index: + # Item "None" of "Optional[Series]" has no attribute "reset_index" + result = result.rename("size").reset_index() # type: ignore[union-attr] + + return self._reindex_output(result, fill_value=0) + + @final + @doc(_groupby_agg_method_template, fname="sum", no=True, mc=0) + def sum( + self, + numeric_only: bool | lib.NoDefault = lib.no_default, + min_count: int = 0, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + ): + if maybe_use_numba(engine): + from pandas.core._numba.kernels import sliding_sum + + return self._numba_agg_general( + sliding_sum, + engine_kwargs, + "groupby_sum", + ) + else: + numeric_only = self._resolve_numeric_only(numeric_only) + + # If we are grouping on categoricals we want unobserved categories to + # return zero, rather than the default of NaN which the reindexing in + # _agg_general() returns. GH #31422 + with com.temp_setattr(self, "observed", True): + result = self._agg_general( + numeric_only=numeric_only, + min_count=min_count, + alias="add", + npfunc=np.sum, + ) + + return self._reindex_output(result, fill_value=0) + + @final + @doc(_groupby_agg_method_template, fname="prod", no=True, mc=0) + def prod( + self, numeric_only: bool | lib.NoDefault = lib.no_default, min_count: int = 0 + ): + numeric_only = self._resolve_numeric_only(numeric_only) + + return self._agg_general( + numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod + ) + + @final + @doc(_groupby_agg_method_template, fname="min", no=False, mc=-1) + def min(self, numeric_only: bool = False, min_count: int = -1): + return self._agg_general( + numeric_only=numeric_only, min_count=min_count, alias="min", npfunc=np.min + ) + + @final + @doc(_groupby_agg_method_template, fname="max", no=False, mc=-1) + def max(self, numeric_only: bool = False, min_count: int = -1): + return self._agg_general( + numeric_only=numeric_only, min_count=min_count, alias="max", npfunc=np.max + ) + + @final + @doc(_groupby_agg_method_template, fname="first", no=False, mc=-1) + def first(self, numeric_only: bool = False, min_count: int = -1): + def first_compat(obj: NDFrameT, axis: int = 0): + def first(x: Series): + """Helper function for first item that isn't NA.""" + arr = x.array[notna(x.array)] + if not len(arr): + return np.nan + return arr[0] + + if isinstance(obj, DataFrame): + return obj.apply(first, axis=axis) + elif isinstance(obj, Series): + return first(obj) + else: # pragma: no cover + raise TypeError(type(obj)) + + return self._agg_general( + numeric_only=numeric_only, + min_count=min_count, + alias="first", + npfunc=first_compat, + ) + + @final + @doc(_groupby_agg_method_template, fname="last", no=False, mc=-1) + def last(self, numeric_only: bool = False, min_count: int = -1): + def last_compat(obj: NDFrameT, axis: int = 0): + def last(x: Series): + """Helper function for last item that isn't NA.""" + arr = x.array[notna(x.array)] + if not len(arr): + return np.nan + return arr[-1] + + if isinstance(obj, DataFrame): + return obj.apply(last, axis=axis) + elif isinstance(obj, Series): + return last(obj) + else: # pragma: no cover + raise TypeError(type(obj)) + + return self._agg_general( + numeric_only=numeric_only, + min_count=min_count, + alias="last", + npfunc=last_compat, + ) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def ohlc(self) -> DataFrame: + """ + Compute open, high, low and close values of a group, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex + + Returns + ------- + DataFrame + Open, high, low and close values within each group. + """ + if self.obj.ndim == 1: + # self._iterate_slices() yields only self._selected_obj + obj = self._selected_obj + + is_numeric = is_numeric_dtype(obj.dtype) + if not is_numeric: + raise DataError("No numeric types to aggregate") + + res_values = self.grouper._cython_operation( + "aggregate", obj._values, "ohlc", axis=0, min_count=-1 + ) + + agg_names = ["open", "high", "low", "close"] + result = self.obj._constructor_expanddim( + res_values, index=self.grouper.result_index, columns=agg_names + ) + return self._reindex_output(result) + + return self._apply_to_column_groupbys( + lambda x: x.ohlc(), self._obj_with_exclusions + ) + + @doc(DataFrame.describe) + def describe(self, **kwargs): + with self._group_selection_context(): + result = self.apply(lambda x: x.describe(**kwargs)) + if self.axis == 1: + return result.T + return result.unstack() + + @final + def resample(self, rule, *args, **kwargs): + """ + Provide resampling when using a TimeGrouper. + + Given a grouper, the function resamples it according to a string + "string" -> "frequency". + + See the :ref:`frequency aliases ` + documentation for more details. + + Parameters + ---------- + rule : str or DateOffset + The offset string or object representing target grouper conversion. + *args, **kwargs + Possible arguments are `how`, `fill_method`, `limit`, `kind` and + `on`, and other arguments of `TimeGrouper`. + + Returns + ------- + Grouper + Return a new grouper with our resampler appended. + + See Also + -------- + Grouper : Specify a frequency to resample with when + grouping by a key. + DatetimeIndex.resample : Frequency conversion and resampling of + time series. + + Examples + -------- + >>> idx = pd.date_range('1/1/2000', periods=4, freq='T') + >>> df = pd.DataFrame(data=4 * [range(2)], + ... index=idx, + ... columns=['a', 'b']) + >>> df.iloc[2, 0] = 5 + >>> df + a b + 2000-01-01 00:00:00 0 1 + 2000-01-01 00:01:00 0 1 + 2000-01-01 00:02:00 5 1 + 2000-01-01 00:03:00 0 1 + + Downsample the DataFrame into 3 minute bins and sum the values of + the timestamps falling into a bin. + + >>> df.groupby('a').resample('3T').sum() + a b + a + 0 2000-01-01 00:00:00 0 2 + 2000-01-01 00:03:00 0 1 + 5 2000-01-01 00:00:00 5 1 + + Upsample the series into 30 second bins. + + >>> df.groupby('a').resample('30S').sum() + a b + a + 0 2000-01-01 00:00:00 0 1 + 2000-01-01 00:00:30 0 0 + 2000-01-01 00:01:00 0 1 + 2000-01-01 00:01:30 0 0 + 2000-01-01 00:02:00 0 0 + 2000-01-01 00:02:30 0 0 + 2000-01-01 00:03:00 0 1 + 5 2000-01-01 00:02:00 5 1 + + Resample by month. Values are assigned to the month of the period. + + >>> df.groupby('a').resample('M').sum() + a b + a + 0 2000-01-31 0 3 + 5 2000-01-31 5 1 + + Downsample the series into 3 minute bins as above, but close the right + side of the bin interval. + + >>> df.groupby('a').resample('3T', closed='right').sum() + a b + a + 0 1999-12-31 23:57:00 0 1 + 2000-01-01 00:00:00 0 2 + 5 2000-01-01 00:00:00 5 1 + + Downsample the series into 3 minute bins and close the right side of + the bin interval, but label each bin using the right edge instead of + the left. + + >>> df.groupby('a').resample('3T', closed='right', label='right').sum() + a b + a + 0 2000-01-01 00:00:00 0 1 + 2000-01-01 00:03:00 0 2 + 5 2000-01-01 00:03:00 5 1 + """ + from pandas.core.resample import get_resampler_for_grouping + + return get_resampler_for_grouping(self, rule, *args, **kwargs) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def rolling(self, *args, **kwargs): + """ + Return a rolling grouper, providing rolling functionality per group. + """ + from pandas.core.window import RollingGroupby + + return RollingGroupby( + self._selected_obj, + *args, + _grouper=self.grouper, + _as_index=self.as_index, + **kwargs, + ) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def expanding(self, *args, **kwargs): + """ + Return an expanding grouper, providing expanding + functionality per group. + """ + from pandas.core.window import ExpandingGroupby + + return ExpandingGroupby( + self._selected_obj, + *args, + _grouper=self.grouper, + **kwargs, + ) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def ewm(self, *args, **kwargs): + """ + Return an ewm grouper, providing ewm functionality per group. + """ + from pandas.core.window import ExponentialMovingWindowGroupby + + return ExponentialMovingWindowGroupby( + self._selected_obj, + *args, + _grouper=self.grouper, + **kwargs, + ) + + @final + def _fill(self, direction: Literal["ffill", "bfill"], limit=None): + """ + Shared function for `pad` and `backfill` to call Cython method. + + Parameters + ---------- + direction : {'ffill', 'bfill'} + Direction passed to underlying Cython function. `bfill` will cause + values to be filled backwards. `ffill` and any other values will + default to a forward fill + limit : int, default None + Maximum number of consecutive values to fill. If `None`, this + method will convert to -1 prior to passing to Cython + + Returns + ------- + `Series` or `DataFrame` with filled values + + See Also + -------- + pad : Returns Series with minimum number of char in object. + backfill : Backward fill the missing values in the dataset. + """ + # Need int value for Cython + if limit is None: + limit = -1 + + ids, _, _ = self.grouper.group_info + sorted_labels = np.argsort(ids, kind="mergesort").astype(np.intp, copy=False) + if direction == "bfill": + sorted_labels = sorted_labels[::-1] + + col_func = partial( + libgroupby.group_fillna_indexer, + labels=ids, + sorted_labels=sorted_labels, + direction=direction, + limit=limit, + dropna=self.dropna, + ) + + def blk_func(values: ArrayLike) -> ArrayLike: + mask = isna(values) + if values.ndim == 1: + indexer = np.empty(values.shape, dtype=np.intp) + col_func(out=indexer, mask=mask) + return algorithms.take_nd(values, indexer) + + else: + # We broadcast algorithms.take_nd analogous to + # np.take_along_axis + + # Note: we only get here with backfill/pad, + # so if we have a dtype that cannot hold NAs, + # then there will be no -1s in indexer, so we can use + # the original dtype (no need to ensure_dtype_can_hold_na) + if isinstance(values, np.ndarray): + out = np.empty(values.shape, dtype=values.dtype) + else: + out = type(values)._empty(values.shape, dtype=values.dtype) + + for i in range(len(values)): + # call group_fillna_indexer column-wise + indexer = np.empty(values.shape[1], dtype=np.intp) + col_func(out=indexer, mask=mask[i]) + out[i, :] = algorithms.take_nd(values[i], indexer) + return out + + obj = self._obj_with_exclusions + if self.axis == 1: + obj = obj.T + mgr = obj._mgr + res_mgr = mgr.apply(blk_func) + + new_obj = obj._constructor(res_mgr) + if isinstance(new_obj, Series): + new_obj.name = obj.name + + return self._wrap_transformed_output(new_obj) + + @final + @Substitution(name="groupby") + def ffill(self, limit=None): + """ + Forward fill the values. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + Series or DataFrame + Object with missing values filled. + + See Also + -------- + Series.ffill: Returns Series with minimum number of char in object. + DataFrame.ffill: Object with missing values filled or None if inplace=True. + Series.fillna: Fill NaN values of a Series. + DataFrame.fillna: Fill NaN values of a DataFrame. + """ + return self._fill("ffill", limit=limit) + + def pad(self, limit=None): + warnings.warn( + "pad is deprecated and will be removed in a future version. " + "Use ffill instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.ffill(limit=limit) + + pad.__doc__ = ffill.__doc__ + + @final + @Substitution(name="groupby") + def bfill(self, limit=None): + """ + Backward fill the values. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + Series or DataFrame + Object with missing values filled. + + See Also + -------- + Series.bfill : Backward fill the missing values in the dataset. + DataFrame.bfill: Backward fill the missing values in the dataset. + Series.fillna: Fill NaN values of a Series. + DataFrame.fillna: Fill NaN values of a DataFrame. + """ + return self._fill("bfill", limit=limit) + + def backfill(self, limit=None): + warnings.warn( + "backfill is deprecated and will be removed in a future version. " + "Use bfill instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.bfill(limit=limit) + + backfill.__doc__ = bfill.__doc__ + + @final + @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) + def nth( + self, + n: PositionalIndexer | tuple, + dropna: Literal["any", "all", None] = None, + ) -> NDFrameT: + """ + Take the nth row from each group if n is an int, otherwise a subset of rows. + + Can be either a call or an index. dropna is not available with index notation. + Index notation accepts a comma separated list of integers and slices. + + If dropna, will take the nth non-null row, dropna is either + 'all' or 'any'; this is equivalent to calling dropna(how=dropna) + before the groupby. + + Parameters + ---------- + n : int, slice or list of ints and slices + A single nth value for the row or a list of nth values or slices. + + .. versionchanged:: 1.4.0 + Added slice and lists containing slices. + Added index notation. + + dropna : {'any', 'all', None}, default None + Apply the specified dropna operation before counting which row is + the nth row. Only supported if n is an int. + + Returns + ------- + Series or DataFrame + N-th value within each group. + %(see_also)s + Examples + -------- + + >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], + ... 'B': [np.nan, 2, 3, 4, 5]}, columns=['A', 'B']) + >>> g = df.groupby('A') + >>> g.nth(0) + B + A + 1 NaN + 2 3.0 + >>> g.nth(1) + B + A + 1 2.0 + 2 5.0 + >>> g.nth(-1) + B + A + 1 4.0 + 2 5.0 + >>> g.nth([0, 1]) + B + A + 1 NaN + 1 2.0 + 2 3.0 + 2 5.0 + >>> g.nth(slice(None, -1)) + B + A + 1 NaN + 1 2.0 + 2 3.0 + + Index notation may also be used + + >>> g.nth[0, 1] + B + A + 1 NaN + 1 2.0 + 2 3.0 + 2 5.0 + >>> g.nth[:-1] + B + A + 1 NaN + 1 2.0 + 2 3.0 + + Specifying `dropna` allows count ignoring ``NaN`` + + >>> g.nth(0, dropna='any') + B + A + 1 2.0 + 2 3.0 + + NaNs denote group exhausted when using dropna + + >>> g.nth(3, dropna='any') + B + A + 1 NaN + 2 NaN + + Specifying `as_index=False` in `groupby` keeps the original index. + + >>> df.groupby('A', as_index=False).nth(1) + A B + 1 1 2.0 + 4 2 5.0 + """ + if not dropna: + with self._group_selection_context(): + mask = self._make_mask_from_positional_indexer(n) + + ids, _, _ = self.grouper.group_info + + # Drop NA values in grouping + mask = mask & (ids != -1) + + out = self._mask_selected_obj(mask) + if not self.as_index: + return out + + result_index = self.grouper.result_index + if self.axis == 0: + out.index = result_index[ids[mask]] + if not self.observed and isinstance(result_index, CategoricalIndex): + out = out.reindex(result_index) + + out = self._reindex_output(out) + else: + out.columns = result_index[ids[mask]] + + return out.sort_index(axis=self.axis) if self.sort else out + + # dropna is truthy + if not is_integer(n): + raise ValueError("dropna option only supported for an integer argument") + + if dropna not in ["any", "all"]: + # Note: when agg-ing picker doesn't raise this, just returns NaN + raise ValueError( + "For a DataFrame or Series groupby.nth, dropna must be " + "either None, 'any' or 'all', " + f"(was passed {dropna})." + ) + + # old behaviour, but with all and any support for DataFrames. + # modified in GH 7559 to have better perf + n = cast(int, n) + max_len = n if n >= 0 else -1 - n + dropped = self.obj.dropna(how=dropna, axis=self.axis) + + # get a new grouper for our dropped obj + if self.keys is None and self.level is None: + + # we don't have the grouper info available + # (e.g. we have selected out + # a column that is not in the current object) + axis = self.grouper.axis + grouper = axis[axis.isin(dropped.index)] + + else: + + # create a grouper with the original parameters, but on dropped + # object + from pandas.core.groupby.grouper import get_grouper + + grouper, _, _ = get_grouper( + dropped, + key=self.keys, + axis=self.axis, + level=self.level, + sort=self.sort, + mutated=self.mutated, + ) + + grb = dropped.groupby( + grouper, as_index=self.as_index, sort=self.sort, axis=self.axis + ) + sizes, result = grb.size(), grb.nth(n) + mask = (sizes < max_len)._values + + # set the results which don't meet the criteria + if len(result) and mask.any(): + result.loc[mask] = np.nan + + # reset/reindex to the original groups + if len(self.obj) == len(dropped) or len(result) == len( + self.grouper.result_index + ): + result.index = self.grouper.result_index + else: + result = result.reindex(self.grouper.result_index) + + return result + + @final + def quantile(self, q=0.5, interpolation: str = "linear"): + """ + Return group values at the given quantile, a la numpy.percentile. + + Parameters + ---------- + q : float or array-like, default 0.5 (50% quantile) + Value(s) between 0 and 1 providing the quantile(s) to compute. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + Method to use when the desired quantile falls between two points. + + Returns + ------- + Series or DataFrame + Return type determined by caller of GroupBy object. + + See Also + -------- + Series.quantile : Similar method for Series. + DataFrame.quantile : Similar method for DataFrame. + numpy.percentile : NumPy method to compute qth percentile. + + Examples + -------- + >>> df = pd.DataFrame([ + ... ['a', 1], ['a', 2], ['a', 3], + ... ['b', 1], ['b', 3], ['b', 5] + ... ], columns=['key', 'val']) + >>> df.groupby('key').quantile() + val + key + a 2.0 + b 3.0 + """ + + def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, np.dtype | None]: + if is_object_dtype(vals): + raise TypeError( + "'quantile' cannot be performed against 'object' dtypes!" + ) + + inference: np.dtype | None = None + if is_integer_dtype(vals.dtype): + if isinstance(vals, ExtensionArray): + out = vals.to_numpy(dtype=float, na_value=np.nan) + else: + out = vals + inference = np.dtype(np.int64) + elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray): + out = vals.to_numpy(dtype=float, na_value=np.nan) + elif is_datetime64_dtype(vals.dtype): + inference = np.dtype("datetime64[ns]") + out = np.asarray(vals).astype(float) + elif is_timedelta64_dtype(vals.dtype): + inference = np.dtype("timedelta64[ns]") + out = np.asarray(vals).astype(float) + elif isinstance(vals, ExtensionArray) and is_float_dtype(vals): + inference = np.dtype(np.float64) + out = vals.to_numpy(dtype=float, na_value=np.nan) + else: + out = np.asarray(vals) + + return out, inference + + def post_processor(vals: np.ndarray, inference: np.dtype | None) -> np.ndarray: + if inference: + # Check for edge case + if not ( + is_integer_dtype(inference) + and interpolation in {"linear", "midpoint"} + ): + vals = vals.astype(inference) + + return vals + + orig_scalar = is_scalar(q) + if orig_scalar: + q = [q] + + qs = np.array(q, dtype=np.float64) + ids, _, ngroups = self.grouper.group_info + nqs = len(qs) + + func = partial( + libgroupby.group_quantile, labels=ids, qs=qs, interpolation=interpolation + ) + + # Put '-1' (NaN) labels as the last group so it does not interfere + # with the calculations. Note: length check avoids failure on empty + # labels. In that case, the value doesn't matter + na_label_for_sorting = ids.max() + 1 if len(ids) > 0 else 0 + labels_for_lexsort = np.where(ids == -1, na_label_for_sorting, ids) + + def blk_func(values: ArrayLike) -> ArrayLike: + mask = isna(values) + vals, inference = pre_processor(values) + + ncols = 1 + if vals.ndim == 2: + ncols = vals.shape[0] + shaped_labels = np.broadcast_to( + labels_for_lexsort, (ncols, len(labels_for_lexsort)) + ) + else: + shaped_labels = labels_for_lexsort + + out = np.empty((ncols, ngroups, nqs), dtype=np.float64) + + # Get an index of values sorted by values and then labels + order = (vals, shaped_labels) + sort_arr = np.lexsort(order).astype(np.intp, copy=False) + + if vals.ndim == 1: + func(out[0], values=vals, mask=mask, sort_indexer=sort_arr) + else: + for i in range(ncols): + func(out[i], values=vals[i], mask=mask[i], sort_indexer=sort_arr[i]) + + if vals.ndim == 1: + out = out.ravel("K") + else: + out = out.reshape(ncols, ngroups * nqs) + return post_processor(out, inference) + + obj = self._obj_with_exclusions + is_ser = obj.ndim == 1 + mgr = self._get_data_to_aggregate() + + res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True) + if not is_ser and len(res_mgr.items) != len(mgr.items): + warn_dropping_nuisance_columns_deprecated(type(self), "quantile") + + if len(res_mgr.items) == 0: + # re-call grouped_reduce to get the desired exception message + mgr.grouped_reduce(blk_func, ignore_failures=False) + # grouped_reduce _should_ raise, so this should not be reached + raise TypeError( # pragma: no cover + "All columns were dropped in grouped_reduce" + ) + + if is_ser: + res = self._wrap_agged_manager(res_mgr) + else: + res = obj._constructor(res_mgr) + + if orig_scalar: + # Avoid expensive MultiIndex construction + return self._wrap_aggregated_output(res) + return self._wrap_aggregated_output(res, qs=qs) + + @final + @Substitution(name="groupby") + def ngroup(self, ascending: bool = True): + """ + Number each group from 0 to the number of groups - 1. + + This is the enumerative complement of cumcount. Note that the + numbers given to the groups match the order in which the groups + would be seen when iterating over the groupby object, not the + order they are first observed. + + Parameters + ---------- + ascending : bool, default True + If False, number in reverse, from number of group - 1 to 0. + + Returns + ------- + Series + Unique numbers for each group. + + See Also + -------- + .cumcount : Number the rows in each group. + + Examples + -------- + >>> df = pd.DataFrame({"A": list("aaabba")}) + >>> df + A + 0 a + 1 a + 2 a + 3 b + 4 b + 5 a + >>> df.groupby('A').ngroup() + 0 0 + 1 0 + 2 0 + 3 1 + 4 1 + 5 0 + dtype: int64 + >>> df.groupby('A').ngroup(ascending=False) + 0 1 + 1 1 + 2 1 + 3 0 + 4 0 + 5 1 + dtype: int64 + >>> df.groupby(["A", [1,1,2,3,2,1]]).ngroup() + 0 0 + 1 0 + 2 1 + 3 3 + 4 2 + 5 0 + dtype: int64 + """ + with self._group_selection_context(): + index = self._selected_obj.index + result = self._obj_1d_constructor( + self.grouper.group_info[0], index, dtype=np.int64 + ) + if not ascending: + result = self.ngroups - 1 - result + return result + + @final + @Substitution(name="groupby") + def cumcount(self, ascending: bool = True): + """ + Number each item in each group from 0 to the length of that group - 1. + + Essentially this is equivalent to + + .. code-block:: python + + self.apply(lambda x: pd.Series(np.arange(len(x)), x.index)) + + Parameters + ---------- + ascending : bool, default True + If False, number in reverse, from length of group - 1 to 0. + + Returns + ------- + Series + Sequence number of each element within each group. + + See Also + -------- + .ngroup : Number the groups themselves. + + Examples + -------- + >>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']], + ... columns=['A']) + >>> df + A + 0 a + 1 a + 2 a + 3 b + 4 b + 5 a + >>> df.groupby('A').cumcount() + 0 0 + 1 1 + 2 2 + 3 0 + 4 1 + 5 3 + dtype: int64 + >>> df.groupby('A').cumcount(ascending=False) + 0 3 + 1 2 + 2 1 + 3 1 + 4 0 + 5 0 + dtype: int64 + """ + with self._group_selection_context(): + index = self._selected_obj._get_axis(self.axis) + cumcounts = self._cumcount_array(ascending=ascending) + return self._obj_1d_constructor(cumcounts, index) + + @final + @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) + def rank( + self, + method: str = "average", + ascending: bool = True, + na_option: str = "keep", + pct: bool = False, + axis: int = 0, + ): + """ + Provide the rank of values within each group. + + Parameters + ---------- + method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' + * average: average rank of group. + * min: lowest rank in group. + * max: highest rank in group. + * first: ranks assigned in order they appear in the array. + * dense: like 'min', but rank always increases by 1 between groups. + ascending : bool, default True + False for ranks by high (1) to low (N). + na_option : {'keep', 'top', 'bottom'}, default 'keep' + * keep: leave NA values where they are. + * top: smallest rank if ascending. + * bottom: smallest rank if descending. + pct : bool, default False + Compute percentage rank of data within each group. + axis : int, default 0 + The axis of the object over which to compute the rank. + + Returns + ------- + DataFrame with ranking of values within each group + %(see_also)s + Examples + -------- + >>> df = pd.DataFrame( + ... { + ... "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"], + ... "value": [2, 4, 2, 3, 5, 1, 2, 4, 1, 5], + ... } + ... ) + >>> df + group value + 0 a 2 + 1 a 4 + 2 a 2 + 3 a 3 + 4 a 5 + 5 b 1 + 6 b 2 + 7 b 4 + 8 b 1 + 9 b 5 + >>> for method in ['average', 'min', 'max', 'dense', 'first']: + ... df[f'{method}_rank'] = df.groupby('group')['value'].rank(method) + >>> df + group value average_rank min_rank max_rank dense_rank first_rank + 0 a 2 1.5 1.0 2.0 1.0 1.0 + 1 a 4 4.0 4.0 4.0 3.0 4.0 + 2 a 2 1.5 1.0 2.0 1.0 2.0 + 3 a 3 3.0 3.0 3.0 2.0 3.0 + 4 a 5 5.0 5.0 5.0 4.0 5.0 + 5 b 1 1.5 1.0 2.0 1.0 1.0 + 6 b 2 3.0 3.0 3.0 2.0 3.0 + 7 b 4 4.0 4.0 4.0 3.0 4.0 + 8 b 1 1.5 1.0 2.0 1.0 2.0 + 9 b 5 5.0 5.0 5.0 4.0 5.0 + """ + if na_option not in {"keep", "top", "bottom"}: + msg = "na_option must be one of 'keep', 'top', or 'bottom'" + raise ValueError(msg) + + kwargs = { + "ties_method": method, + "ascending": ascending, + "na_option": na_option, + "pct": pct, + } + if axis != 0: + # DataFrame uses different keyword name + kwargs["method"] = kwargs.pop("ties_method") + return self.apply(lambda x: x.rank(axis=axis, numeric_only=False, **kwargs)) + + return self._cython_transform( + "rank", + numeric_only=False, + axis=axis, + **kwargs, + ) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def cumprod(self, axis=0, *args, **kwargs): + """ + Cumulative product for each group. + + Returns + ------- + Series or DataFrame + """ + nv.validate_groupby_func("cumprod", args, kwargs, ["numeric_only", "skipna"]) + if axis != 0: + return self.apply(lambda x: x.cumprod(axis=axis, **kwargs)) + + return self._cython_transform("cumprod", **kwargs) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def cumsum(self, axis=0, *args, **kwargs): + """ + Cumulative sum for each group. + + Returns + ------- + Series or DataFrame + """ + nv.validate_groupby_func("cumsum", args, kwargs, ["numeric_only", "skipna"]) + if axis != 0: + return self.apply(lambda x: x.cumsum(axis=axis, **kwargs)) + + return self._cython_transform("cumsum", **kwargs) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def cummin(self, axis=0, **kwargs): + """ + Cumulative min for each group. + + Returns + ------- + Series or DataFrame + """ + skipna = kwargs.get("skipna", True) + if axis != 0: + return self.apply(lambda x: np.minimum.accumulate(x, axis)) + + return self._cython_transform("cummin", numeric_only=False, skipna=skipna) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def cummax(self, axis=0, **kwargs): + """ + Cumulative max for each group. + + Returns + ------- + Series or DataFrame + """ + skipna = kwargs.get("skipna", True) + if axis != 0: + return self.apply(lambda x: np.maximum.accumulate(x, axis)) + + return self._cython_transform("cummax", numeric_only=False, skipna=skipna) + + @final + def _get_cythonized_result( + self, + base_func: Callable, + cython_dtype: np.dtype, + numeric_only: bool | lib.NoDefault = lib.no_default, + needs_counts: bool = False, + needs_nullable: bool = False, + needs_mask: bool = False, + pre_processing=None, + post_processing=None, + **kwargs, + ): + """ + Get result for Cythonized functions. + + Parameters + ---------- + base_func : callable, Cythonized function to be called + cython_dtype : np.dtype + Type of the array that will be modified by the Cython call. + numeric_only : bool, default True + Whether only numeric datatypes should be computed + needs_counts : bool, default False + Whether the counts should be a part of the Cython call + needs_mask : bool, default False + Whether boolean mask needs to be part of the Cython call + signature + needs_nullable : bool, default False + Whether a bool specifying if the input is nullable is part + of the Cython call signature + pre_processing : function, default None + Function to be applied to `values` prior to passing to Cython. + Function should return a tuple where the first element is the + values to be passed to Cython and the second element is an optional + type which the values should be converted to after being returned + by the Cython operation. This function is also responsible for + raising a TypeError if the values have an invalid type. Raises + if `needs_values` is False. + post_processing : function, default None + Function to be applied to result of Cython function. Should accept + an array of values as the first argument and type inferences as its + second argument, i.e. the signature should be + (ndarray, Type). If `needs_nullable=True`, a third argument should be + `nullable`, to allow for processing specific to nullable values. + **kwargs : dict + Extra arguments to be passed back to Cython funcs + + Returns + ------- + `Series` or `DataFrame` with filled values + """ + numeric_only = self._resolve_numeric_only(numeric_only) + + if post_processing and not callable(post_processing): + raise ValueError("'post_processing' must be a callable!") + if pre_processing and not callable(pre_processing): + raise ValueError("'pre_processing' must be a callable!") + + grouper = self.grouper + + ids, _, ngroups = grouper.group_info + + how = base_func.__name__ + base_func = partial(base_func, labels=ids) + + def blk_func(values: ArrayLike) -> ArrayLike: + values = values.T + ncols = 1 if values.ndim == 1 else values.shape[1] + + result: ArrayLike + result = np.zeros(ngroups * ncols, dtype=cython_dtype) + result = result.reshape((ngroups, ncols)) + + func = partial(base_func, out=result) + + inferences = None + + if needs_counts: + counts = np.zeros(self.ngroups, dtype=np.int64) + func = partial(func, counts=counts) + + vals = values + if pre_processing: + vals, inferences = pre_processing(vals) + + vals = vals.astype(cython_dtype, copy=False) + if vals.ndim == 1: + vals = vals.reshape((-1, 1)) + func = partial(func, values=vals) + + if needs_mask: + mask = isna(values).view(np.uint8) + if mask.ndim == 1: + mask = mask.reshape(-1, 1) + func = partial(func, mask=mask) + + if needs_nullable: + is_nullable = isinstance(values, BaseMaskedArray) + func = partial(func, nullable=is_nullable) + + func(**kwargs) # Call func to modify indexer values in place + + if values.ndim == 1: + assert result.shape[1] == 1, result.shape + result = result[:, 0] + + if post_processing: + pp_kwargs = {} + if needs_nullable: + pp_kwargs["nullable"] = isinstance(values, BaseMaskedArray) + + result = post_processing(result, inferences, **pp_kwargs) + + return result.T + + obj = self._obj_with_exclusions + + # Operate block-wise instead of column-by-column + is_ser = obj.ndim == 1 + mgr = self._get_data_to_aggregate() + + if numeric_only: + mgr = mgr.get_numeric_data() + + res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True) + + if not is_ser and len(res_mgr.items) != len(mgr.items): + howstr = how.replace("group_", "") + warn_dropping_nuisance_columns_deprecated(type(self), howstr) + + if len(res_mgr.items) == 0: + # We re-call grouped_reduce to get the right exception message + mgr.grouped_reduce(blk_func, ignore_failures=False) + # grouped_reduce _should_ raise, so this should not be reached + raise TypeError( # pragma: no cover + "All columns were dropped in grouped_reduce" + ) + + if is_ser: + out = self._wrap_agged_manager(res_mgr) + else: + out = obj._constructor(res_mgr) + + return self._wrap_aggregated_output(out) + + @final + @Substitution(name="groupby") + def shift(self, periods=1, freq=None, axis=0, fill_value=None): + """ + Shift each group by periods observations. + + If freq is passed, the index will be increased using the periods and the freq. + + Parameters + ---------- + periods : int, default 1 + Number of periods to shift. + freq : str, optional + Frequency string. + axis : axis to shift, default 0 + Shift direction. + fill_value : optional + The scalar value to use for newly introduced missing values. + + Returns + ------- + Series or DataFrame + Object shifted within each group. + + See Also + -------- + Index.shift : Shift values of Index. + tshift : Shift the time index, using the index’s frequency + if available. + """ + if freq is not None or axis != 0: + return self.apply(lambda x: x.shift(periods, freq, axis, fill_value)) + + ids, _, ngroups = self.grouper.group_info + res_indexer = np.zeros(len(ids), dtype=np.int64) + + libgroupby.group_shift_indexer(res_indexer, ids, ngroups, periods) + + obj = self._obj_with_exclusions + + res = obj._reindex_with_indexers( + {self.axis: (obj.axes[self.axis], res_indexer)}, + fill_value=fill_value, + allow_dups=True, + ) + return res + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def pct_change(self, periods=1, fill_method="ffill", limit=None, freq=None, axis=0): + """ + Calculate pct_change of each value to previous entry in group. + + Returns + ------- + Series or DataFrame + Percentage changes within each group. + """ + # TODO(GH#23918): Remove this conditional for SeriesGroupBy when + # GH#23918 is fixed + if freq is not None or axis != 0: + return self.apply( + lambda x: x.pct_change( + periods=periods, + fill_method=fill_method, + limit=limit, + freq=freq, + axis=axis, + ) + ) + if fill_method is None: # GH30463 + fill_method = "ffill" + limit = 0 + filled = getattr(self, fill_method)(limit=limit) + fill_grp = filled.groupby(self.grouper.codes, axis=self.axis) + shifted = fill_grp.shift(periods=periods, freq=freq, axis=self.axis) + return (filled / shifted) - 1 + + @final + @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) + def head(self, n=5): + """ + Return first n rows of each group. + + Similar to ``.apply(lambda x: x.head(n))``, but it returns a subset of rows + from the original DataFrame with original index and order preserved + (``as_index`` flag is ignored). + + Parameters + ---------- + n : int + If positive: number of entries to include from start of each group. + If negative: number of entries to exclude from end of each group. + + Returns + ------- + Series or DataFrame + Subset of original Series or DataFrame as determined by n. + %(see_also)s + Examples + -------- + + >>> df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], + ... columns=['A', 'B']) + >>> df.groupby('A').head(1) + A B + 0 1 2 + 2 5 6 + >>> df.groupby('A').head(-1) + A B + 0 1 2 + """ + self._reset_group_selection() + mask = self._make_mask_from_positional_indexer(slice(None, n)) + return self._mask_selected_obj(mask) + + @final + @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) + def tail(self, n=5): + """ + Return last n rows of each group. + + Similar to ``.apply(lambda x: x.tail(n))``, but it returns a subset of rows + from the original DataFrame with original index and order preserved + (``as_index`` flag is ignored). + + Parameters + ---------- + n : int + If positive: number of entries to include from end of each group. + If negative: number of entries to exclude from start of each group. + + Returns + ------- + Series or DataFrame + Subset of original Series or DataFrame as determined by n. + %(see_also)s + Examples + -------- + + >>> df = pd.DataFrame([['a', 1], ['a', 2], ['b', 1], ['b', 2]], + ... columns=['A', 'B']) + >>> df.groupby('A').tail(1) + A B + 1 a 2 + 3 b 2 + >>> df.groupby('A').tail(-1) + A B + 1 a 2 + 3 b 2 + """ + self._reset_group_selection() + if n: + mask = self._make_mask_from_positional_indexer(slice(-n, None)) + else: + mask = self._make_mask_from_positional_indexer([]) + + return self._mask_selected_obj(mask) + + @final + def _mask_selected_obj(self, mask: np.ndarray) -> NDFrameT: + """ + Return _selected_obj with mask applied to the correct axis. + + Parameters + ---------- + mask : np.ndarray + Boolean mask to apply. + + Returns + ------- + Series or DataFrame + Filtered _selected_obj. + """ + ids = self.grouper.group_info[0] + mask = mask & (ids != -1) + + if self.axis == 0: + return self._selected_obj[mask] + else: + return self._selected_obj.iloc[:, mask] + + @final + def _reindex_output( + self, + output: OutputFrameOrSeries, + fill_value: Scalar = np.NaN, + qs: npt.NDArray[np.float64] | None = None, + ) -> OutputFrameOrSeries: + """ + If we have categorical groupers, then we might want to make sure that + we have a fully re-indexed output to the levels. This means expanding + the output space to accommodate all values in the cartesian product of + our groups, regardless of whether they were observed in the data or + not. This will expand the output space if there are missing groups. + + The method returns early without modifying the input if the number of + groupings is less than 2, self.observed == True or none of the groupers + are categorical. + + Parameters + ---------- + output : Series or DataFrame + Object resulting from grouping and applying an operation. + fill_value : scalar, default np.NaN + Value to use for unobserved categories if self.observed is False. + qs : np.ndarray[float64] or None, default None + quantile values, only relevant for quantile. + + Returns + ------- + Series or DataFrame + Object (potentially) re-indexed to include all possible groups. + """ + groupings = self.grouper.groupings + if len(groupings) == 1: + return output + + # if we only care about the observed values + # we are done + elif self.observed: + return output + + # reindexing only applies to a Categorical grouper + elif not any( + isinstance(ping.grouping_vector, (Categorical, CategoricalIndex)) + for ping in groupings + ): + return output + + levels_list = [ping.group_index for ping in groupings] + names = self.grouper.names + if qs is not None: + # error: Argument 1 to "append" of "list" has incompatible type + # "ndarray[Any, dtype[floating[_64Bit]]]"; expected "Index" + levels_list.append(qs) # type: ignore[arg-type] + names = names + [None] + index, _ = MultiIndex.from_product(levels_list, names=names).sortlevel() + + if self.as_index: + d = { + self.obj._get_axis_name(self.axis): index, + "copy": False, + "fill_value": fill_value, + } + return output.reindex(**d) + + # GH 13204 + # Here, the categorical in-axis groupers, which need to be fully + # expanded, are columns in `output`. An idea is to do: + # output = output.set_index(self.grouper.names) + # .reindex(index).reset_index() + # but special care has to be taken because of possible not-in-axis + # groupers. + # So, we manually select and drop the in-axis grouper columns, + # reindex `output`, and then reset the in-axis grouper columns. + + # Select in-axis groupers + in_axis_grps = ( + (i, ping.name) for (i, ping) in enumerate(groupings) if ping.in_axis + ) + g_nums, g_names = zip(*in_axis_grps) + + output = output.drop(labels=list(g_names), axis=1) + + # Set a temp index and reindex (possibly expanding) + output = output.set_index(self.grouper.result_index).reindex( + index, copy=False, fill_value=fill_value + ) + + # Reset in-axis grouper columns + # (using level numbers `g_nums` because level names may not be unique) + output = output.reset_index(level=g_nums) + + return output.reset_index(drop=True) + + @final + def sample( + self, + n: int | None = None, + frac: float | None = None, + replace: bool = False, + weights: Sequence | Series | None = None, + random_state: RandomState | None = None, + ): + """ + Return a random sample of items from each group. + + You can use `random_state` for reproducibility. + + .. versionadded:: 1.1.0 + + Parameters + ---------- + n : int, optional + Number of items to return for each group. Cannot be used with + `frac` and must be no larger than the smallest group unless + `replace` is True. Default is one if `frac` is None. + frac : float, optional + Fraction of items to return. Cannot be used with `n`. + replace : bool, default False + Allow or disallow sampling of the same row more than once. + weights : list-like, optional + Default None results in equal probability weighting. + If passed a list-like then values must have the same length as + the underlying DataFrame or Series object and will be used as + sampling probabilities after normalization within each group. + Values must be non-negative with at least one positive element + within each group. + random_state : int, array-like, BitGenerator, np.random.RandomState, np.random.Generator, optional + If int, array-like, or BitGenerator, seed for random number generator. + If np.random.RandomState or np.random.Generator, use as given. + + .. versionchanged:: 1.4.0 + + np.random.Generator objects now accepted + + Returns + ------- + Series or DataFrame + A new object of same type as caller containing items randomly + sampled within each group from the caller object. + + See Also + -------- + DataFrame.sample: Generate random samples from a DataFrame object. + numpy.random.choice: Generate a random sample from a given 1-D numpy + array. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"a": ["red"] * 2 + ["blue"] * 2 + ["black"] * 2, "b": range(6)} + ... ) + >>> df + a b + 0 red 0 + 1 red 1 + 2 blue 2 + 3 blue 3 + 4 black 4 + 5 black 5 + + Select one row at random for each distinct value in column a. The + `random_state` argument can be used to guarantee reproducibility: + + >>> df.groupby("a").sample(n=1, random_state=1) + a b + 4 black 4 + 2 blue 2 + 1 red 1 + + Set `frac` to sample fixed proportions rather than counts: + + >>> df.groupby("a")["b"].sample(frac=0.5, random_state=2) + 5 5 + 2 2 + 0 0 + Name: b, dtype: int64 + + Control sample probabilities within groups by setting weights: + + >>> df.groupby("a").sample( + ... n=1, + ... weights=[1, 1, 1, 0, 0, 1], + ... random_state=1, + ... ) + a b + 5 black 5 + 2 blue 2 + 0 red 0 + """ # noqa:E501 + size = sample.process_sampling_size(n, frac, replace) + if weights is not None: + weights_arr = sample.preprocess_weights( + self._selected_obj, weights, axis=self.axis + ) + + random_state = com.random_state(random_state) + + group_iterator = self.grouper.get_iterator(self._selected_obj, self.axis) + + sampled_indices = [] + for labels, obj in group_iterator: + grp_indices = self.indices[labels] + group_size = len(grp_indices) + if size is not None: + sample_size = size + else: + assert frac is not None + sample_size = round(frac * group_size) + + grp_sample = sample.sample( + group_size, + size=sample_size, + replace=replace, + weights=None if weights is None else weights_arr[grp_indices], + random_state=random_state, + ) + sampled_indices.append(grp_indices[grp_sample]) + + sampled_indices = np.concatenate(sampled_indices) + return self._selected_obj.take(sampled_indices, axis=self.axis) + + +@doc(GroupBy) +def get_groupby( + obj: NDFrame, + by: _KeysArgType | None = None, + axis: int = 0, + level=None, + grouper: ops.BaseGrouper | None = None, + exclusions=None, + selection=None, + as_index: bool = True, + sort: bool = True, + group_keys: bool = True, + squeeze: bool = False, + observed: bool = False, + mutated: bool = False, + dropna: bool = True, +) -> GroupBy: + + klass: type[GroupBy] + if isinstance(obj, Series): + from pandas.core.groupby.generic import SeriesGroupBy + + klass = SeriesGroupBy + elif isinstance(obj, DataFrame): + from pandas.core.groupby.generic import DataFrameGroupBy + + klass = DataFrameGroupBy + else: # pragma: no cover + raise TypeError(f"invalid type: {obj}") + + return klass( + obj=obj, + keys=by, + axis=axis, + level=level, + grouper=grouper, + exclusions=exclusions, + selection=selection, + as_index=as_index, + sort=sort, + group_keys=group_keys, + squeeze=squeeze, + observed=observed, + mutated=mutated, + dropna=dropna, + ) + + +def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiIndex: + """ + Insert the sequence 'qs' of quantiles as the inner-most level of a MultiIndex. + + The quantile level in the MultiIndex is a repeated copy of 'qs'. + + Parameters + ---------- + idx : Index + qs : np.ndarray[float64] + + Returns + ------- + MultiIndex + """ + nqs = len(qs) + + if idx._is_multi: + idx = cast(MultiIndex, idx) + lev_codes, lev = Index(qs).factorize() + levels = list(idx.levels) + [lev] + codes = [np.repeat(x, nqs) for x in idx.codes] + [np.tile(lev_codes, len(idx))] + mi = MultiIndex(levels=levels, codes=codes, names=idx.names + [None]) + else: + mi = MultiIndex.from_product([idx, qs]) + return mi + + +def warn_dropping_nuisance_columns_deprecated(cls, how: str) -> None: + warnings.warn( + "Dropping invalid columns in " + f"{cls.__name__}.{how} is deprecated. " + "In a future version, a TypeError will be raised. " + f"Before calling .{how}, select only columns which " + "should be valid for the function.", + FutureWarning, + stacklevel=find_stack_level(), + ) diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/grouper.py b/.local/lib/python3.8/site-packages/pandas/core/groupby/grouper.py new file mode 100644 index 0000000..3cf56af --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/groupby/grouper.py @@ -0,0 +1,991 @@ +""" +Provide user facing operators for doing the split part of the +split-apply-combine paradigm. +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Hashable, + final, +) +import warnings + +import numpy as np + +from pandas._typing import ( + ArrayLike, + NDFrameT, + npt, +) +from pandas.errors import InvalidIndexError +from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import sanitize_to_nanoseconds +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_list_like, + is_scalar, +) + +import pandas.core.algorithms as algorithms +from pandas.core.arrays import ( + Categorical, + ExtensionArray, +) +import pandas.core.common as com +from pandas.core.frame import DataFrame +from pandas.core.groupby import ops +from pandas.core.groupby.categorical import ( + recode_for_groupby, + recode_from_groupby, +) +from pandas.core.indexes.api import ( + CategoricalIndex, + Index, + MultiIndex, +) +from pandas.core.series import Series + +from pandas.io.formats.printing import pprint_thing + +if TYPE_CHECKING: + from pandas.core.generic import NDFrame + + +class Grouper: + """ + A Grouper allows the user to specify a groupby instruction for an object. + + This specification will select a column via the key parameter, or if the + level and/or axis parameters are given, a level of the index of the target + object. + + If `axis` and/or `level` are passed as keywords to both `Grouper` and + `groupby`, the values passed to `Grouper` take precedence. + + Parameters + ---------- + key : str, defaults to None + Groupby key, which selects the grouping column of the target. + level : name/number, defaults to None + The level for the target index. + freq : str / frequency object, defaults to None + This will groupby the specified frequency if the target selection + (via key or level) is a datetime-like object. For full specification + of available frequencies, please see `here + `_. + axis : str, int, defaults to 0 + Number/name of the axis. + sort : bool, default to False + Whether to sort the resulting labels. + closed : {'left' or 'right'} + Closed end of interval. Only when `freq` parameter is passed. + label : {'left' or 'right'} + Interval boundary to use for labeling. + Only when `freq` parameter is passed. + convention : {'start', 'end', 'e', 's'} + If grouper is PeriodIndex and `freq` parameter is passed. + base : int, default 0 + Only when `freq` parameter is passed. + For frequencies that evenly subdivide 1 day, the "origin" of the + aggregated intervals. For example, for '5min' frequency, base could + range from 0 through 4. Defaults to 0. + + .. deprecated:: 1.1.0 + The new arguments that you should use are 'offset' or 'origin'. + + loffset : str, DateOffset, timedelta object + Only when `freq` parameter is passed. + + .. deprecated:: 1.1.0 + loffset is only working for ``.resample(...)`` and not for + Grouper (:issue:`28302`). + However, loffset is also deprecated for ``.resample(...)`` + See: :class:`DataFrame.resample` + + origin : Timestamp or str, default 'start_day' + The timestamp on which to adjust the grouping. The timezone of origin must + match the timezone of the index. + If string, must be one of the following: + + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + + .. versionadded:: 1.1.0 + + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + + .. versionadded:: 1.3.0 + + offset : Timedelta or str, default is None + An offset timedelta added to the origin. + + .. versionadded:: 1.1.0 + + dropna : bool, default True + If True, and if group keys contain NA values, NA values together with + row/column will be dropped. If False, NA values will also be treated as + the key in groups. + + .. versionadded:: 1.2.0 + + Returns + ------- + A specification for a groupby instruction + + Examples + -------- + Syntactic sugar for ``df.groupby('A')`` + + >>> df = pd.DataFrame( + ... { + ... "Animal": ["Falcon", "Parrot", "Falcon", "Falcon", "Parrot"], + ... "Speed": [100, 5, 200, 300, 15], + ... } + ... ) + >>> df + Animal Speed + 0 Falcon 100 + 1 Parrot 5 + 2 Falcon 200 + 3 Falcon 300 + 4 Parrot 15 + >>> df.groupby(pd.Grouper(key="Animal")).mean() + Speed + Animal + Falcon 200.0 + Parrot 10.0 + + Specify a resample operation on the column 'Publish date' + + >>> df = pd.DataFrame( + ... { + ... "Publish date": [ + ... pd.Timestamp("2000-01-02"), + ... pd.Timestamp("2000-01-02"), + ... pd.Timestamp("2000-01-09"), + ... pd.Timestamp("2000-01-16") + ... ], + ... "ID": [0, 1, 2, 3], + ... "Price": [10, 20, 30, 40] + ... } + ... ) + >>> df + Publish date ID Price + 0 2000-01-02 0 10 + 1 2000-01-02 1 20 + 2 2000-01-09 2 30 + 3 2000-01-16 3 40 + >>> df.groupby(pd.Grouper(key="Publish date", freq="1W")).mean() + ID Price + Publish date + 2000-01-02 0.5 15.0 + 2000-01-09 2.0 30.0 + 2000-01-16 3.0 40.0 + + If you want to adjust the start of the bins based on a fixed timestamp: + + >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00' + >>> rng = pd.date_range(start, end, freq='7min') + >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng) + >>> ts + 2000-10-01 23:30:00 0 + 2000-10-01 23:37:00 3 + 2000-10-01 23:44:00 6 + 2000-10-01 23:51:00 9 + 2000-10-01 23:58:00 12 + 2000-10-02 00:05:00 15 + 2000-10-02 00:12:00 18 + 2000-10-02 00:19:00 21 + 2000-10-02 00:26:00 24 + Freq: 7T, dtype: int64 + + >>> ts.groupby(pd.Grouper(freq='17min')).sum() + 2000-10-01 23:14:00 0 + 2000-10-01 23:31:00 9 + 2000-10-01 23:48:00 21 + 2000-10-02 00:05:00 54 + 2000-10-02 00:22:00 24 + Freq: 17T, dtype: int64 + + >>> ts.groupby(pd.Grouper(freq='17min', origin='epoch')).sum() + 2000-10-01 23:18:00 0 + 2000-10-01 23:35:00 18 + 2000-10-01 23:52:00 27 + 2000-10-02 00:09:00 39 + 2000-10-02 00:26:00 24 + Freq: 17T, dtype: int64 + + >>> ts.groupby(pd.Grouper(freq='17min', origin='2000-01-01')).sum() + 2000-10-01 23:24:00 3 + 2000-10-01 23:41:00 15 + 2000-10-01 23:58:00 45 + 2000-10-02 00:15:00 45 + Freq: 17T, dtype: int64 + + If you want to adjust the start of the bins with an `offset` Timedelta, the two + following lines are equivalent: + + >>> ts.groupby(pd.Grouper(freq='17min', origin='start')).sum() + 2000-10-01 23:30:00 9 + 2000-10-01 23:47:00 21 + 2000-10-02 00:04:00 54 + 2000-10-02 00:21:00 24 + Freq: 17T, dtype: int64 + + >>> ts.groupby(pd.Grouper(freq='17min', offset='23h30min')).sum() + 2000-10-01 23:30:00 9 + 2000-10-01 23:47:00 21 + 2000-10-02 00:04:00 54 + 2000-10-02 00:21:00 24 + Freq: 17T, dtype: int64 + + To replace the use of the deprecated `base` argument, you can now use `offset`, + in this example it is equivalent to have `base=2`: + + >>> ts.groupby(pd.Grouper(freq='17min', offset='2min')).sum() + 2000-10-01 23:16:00 0 + 2000-10-01 23:33:00 9 + 2000-10-01 23:50:00 36 + 2000-10-02 00:07:00 39 + 2000-10-02 00:24:00 24 + Freq: 17T, dtype: int64 + """ + + axis: int + sort: bool + dropna: bool + _gpr_index: Index | None + _grouper: Index | None + + _attributes: tuple[str, ...] = ("key", "level", "freq", "axis", "sort") + + def __new__(cls, *args, **kwargs): + if kwargs.get("freq") is not None: + from pandas.core.resample import TimeGrouper + + _check_deprecated_resample_kwargs(kwargs, origin=cls) + cls = TimeGrouper + return super().__new__(cls) + + def __init__( + self, + key=None, + level=None, + freq=None, + axis: int = 0, + sort: bool = False, + dropna: bool = True, + ): + self.key = key + self.level = level + self.freq = freq + self.axis = axis + self.sort = sort + + self.grouper = None + self._gpr_index = None + self.obj = None + self.indexer = None + self.binner = None + self._grouper = None + self._indexer = None + self.dropna = dropna + + @final + @property + def ax(self) -> Index: + index = self._gpr_index + if index is None: + raise ValueError("_set_grouper must be called before ax is accessed") + return index + + def _get_grouper( + self, obj: NDFrameT, validate: bool = True + ) -> tuple[Any, ops.BaseGrouper, NDFrameT]: + """ + Parameters + ---------- + obj : Series or DataFrame + validate : bool, default True + if True, validate the grouper + + Returns + ------- + a tuple of binner, grouper, obj (possibly sorted) + """ + self._set_grouper(obj) + # error: Value of type variable "NDFrameT" of "get_grouper" cannot be + # "Optional[Any]" + # error: Incompatible types in assignment (expression has type "BaseGrouper", + # variable has type "None") + self.grouper, _, self.obj = get_grouper( # type: ignore[type-var,assignment] + self.obj, + [self.key], + axis=self.axis, + level=self.level, + sort=self.sort, + validate=validate, + dropna=self.dropna, + ) + + # error: Incompatible return value type (got "Tuple[None, None, None]", + # expected "Tuple[Any, BaseGrouper, NDFrameT]") + return self.binner, self.grouper, self.obj # type: ignore[return-value] + + @final + def _set_grouper(self, obj: NDFrame, sort: bool = False): + """ + given an object and the specifications, setup the internal grouper + for this particular specification + + Parameters + ---------- + obj : Series or DataFrame + sort : bool, default False + whether the resulting grouper should be sorted + """ + assert obj is not None + + if self.key is not None and self.level is not None: + raise ValueError("The Grouper cannot specify both a key and a level!") + + # Keep self.grouper value before overriding + if self._grouper is None: + # TODO: What are we assuming about subsequent calls? + self._grouper = self._gpr_index + self._indexer = self.indexer + + # the key must be a valid info item + if self.key is not None: + key = self.key + # The 'on' is already defined + if getattr(self._gpr_index, "name", None) == key and isinstance( + obj, Series + ): + # Sometimes self._grouper will have been resorted while + # obj has not. In this case there is a mismatch when we + # call self._grouper.take(obj.index) so we need to undo the sorting + # before we call _grouper.take. + assert self._grouper is not None + if self._indexer is not None: + reverse_indexer = self._indexer.argsort() + unsorted_ax = self._grouper.take(reverse_indexer) + ax = unsorted_ax.take(obj.index) + else: + ax = self._grouper.take(obj.index) + else: + if key not in obj._info_axis: + raise KeyError(f"The grouper name {key} is not found") + ax = Index(obj[key], name=key) + + else: + ax = obj._get_axis(self.axis) + if self.level is not None: + level = self.level + + # if a level is given it must be a mi level or + # equivalent to the axis name + if isinstance(ax, MultiIndex): + level = ax._get_level_number(level) + ax = Index(ax._get_level_values(level), name=ax.names[level]) + + else: + if level not in (0, ax.name): + raise ValueError(f"The level {level} is not valid") + + # possibly sort + if (self.sort or sort) and not ax.is_monotonic: + # use stable sort to support first, last, nth + # TODO: why does putting na_position="first" fix datetimelike cases? + indexer = self.indexer = ax.array.argsort( + kind="mergesort", na_position="first" + ) + ax = ax.take(indexer) + obj = obj.take(indexer, axis=self.axis) + + # error: Incompatible types in assignment (expression has type + # "NDFrameT", variable has type "None") + self.obj = obj # type: ignore[assignment] + self._gpr_index = ax + return self._gpr_index + + @final + @property + def groups(self): + # error: "None" has no attribute "groups" + return self.grouper.groups # type: ignore[attr-defined] + + @final + def __repr__(self) -> str: + attrs_list = ( + f"{attr_name}={repr(getattr(self, attr_name))}" + for attr_name in self._attributes + if getattr(self, attr_name) is not None + ) + attrs = ", ".join(attrs_list) + cls_name = type(self).__name__ + return f"{cls_name}({attrs})" + + +@final +class Grouping: + """ + Holds the grouping information for a single key + + Parameters + ---------- + index : Index + grouper : + obj : DataFrame or Series + name : Label + level : + observed : bool, default False + If we are a Categorical, use the observed values + in_axis : if the Grouping is a column in self.obj and hence among + Groupby.exclusions list + + Returns + ------- + **Attributes**: + * indices : dict of {group -> index_list} + * codes : ndarray, group codes + * group_index : unique groups + * groups : dict of {group -> label_list} + """ + + _codes: np.ndarray | None = None + _group_index: Index | None = None + _passed_categorical: bool + _all_grouper: Categorical | None + _index: Index + + def __init__( + self, + index: Index, + grouper=None, + obj: NDFrame | None = None, + level=None, + sort: bool = True, + observed: bool = False, + in_axis: bool = False, + dropna: bool = True, + ): + self.level = level + self._orig_grouper = grouper + self.grouping_vector = _convert_grouper(index, grouper) + self._all_grouper = None + self._index = index + self._sort = sort + self.obj = obj + self._observed = observed + self.in_axis = in_axis + self._dropna = dropna + + self._passed_categorical = False + + # we have a single grouper which may be a myriad of things, + # some of which are dependent on the passing in level + + ilevel = self._ilevel + if ilevel is not None: + mapper = self.grouping_vector + # In extant tests, the new self.grouping_vector matches + # `index.get_level_values(ilevel)` whenever + # mapper is None and isinstance(index, MultiIndex) + ( + self.grouping_vector, # Index + self._codes, + self._group_index, + ) = index._get_grouper_for_level(mapper, level=ilevel) + + # a passed Grouper like, directly get the grouper in the same way + # as single grouper groupby, use the group_info to get codes + elif isinstance(self.grouping_vector, Grouper): + # get the new grouper; we already have disambiguated + # what key/level refer to exactly, don't need to + # check again as we have by this point converted these + # to an actual value (rather than a pd.Grouper) + assert self.obj is not None # for mypy + _, newgrouper, newobj = self.grouping_vector._get_grouper( + self.obj, validate=False + ) + self.obj = newobj + + ng = newgrouper._get_grouper() + if isinstance(newgrouper, ops.BinGrouper): + # in this case we have `ng is newgrouper` + self.grouping_vector = ng + else: + # ops.BaseGrouper + # use Index instead of ndarray so we can recover the name + self.grouping_vector = Index(ng, name=newgrouper.result_index.name) + + elif is_categorical_dtype(self.grouping_vector): + # a passed Categorical + self._passed_categorical = True + + self.grouping_vector, self._all_grouper = recode_for_groupby( + self.grouping_vector, sort, observed + ) + + elif not isinstance( + self.grouping_vector, (Series, Index, ExtensionArray, np.ndarray) + ): + # no level passed + if getattr(self.grouping_vector, "ndim", 1) != 1: + t = self.name or str(type(self.grouping_vector)) + raise ValueError(f"Grouper for '{t}' not 1-dimensional") + + self.grouping_vector = index.map(self.grouping_vector) + + if not ( + hasattr(self.grouping_vector, "__len__") + and len(self.grouping_vector) == len(index) + ): + grper = pprint_thing(self.grouping_vector) + errmsg = ( + "Grouper result violates len(labels) == " + f"len(data)\nresult: {grper}" + ) + self.grouping_vector = None # Try for sanity + raise AssertionError(errmsg) + + if isinstance(self.grouping_vector, np.ndarray): + # if we have a date/time-like grouper, make sure that we have + # Timestamps like + self.grouping_vector = sanitize_to_nanoseconds(self.grouping_vector) + + def __repr__(self) -> str: + return f"Grouping({self.name})" + + def __iter__(self): + return iter(self.indices) + + @cache_readonly + def name(self) -> Hashable: + ilevel = self._ilevel + if ilevel is not None: + return self._index.names[ilevel] + + if isinstance(self._orig_grouper, (Index, Series)): + return self._orig_grouper.name + + elif isinstance(self.grouping_vector, ops.BaseGrouper): + return self.grouping_vector.result_index.name + + elif isinstance(self.grouping_vector, Index): + return self.grouping_vector.name + + # otherwise we have ndarray or ExtensionArray -> no name + return None + + @cache_readonly + def _ilevel(self) -> int | None: + """ + If necessary, converted index level name to index level position. + """ + level = self.level + if level is None: + return None + if not isinstance(level, int): + index = self._index + if level not in index.names: + raise AssertionError(f"Level {level} not in index") + return index.names.index(level) + return level + + @property + def ngroups(self) -> int: + return len(self.group_index) + + @cache_readonly + def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: + # we have a list of groupers + if isinstance(self.grouping_vector, ops.BaseGrouper): + return self.grouping_vector.indices + + values = Categorical(self.grouping_vector) + return values._reverse_indexer() + + @property + def codes(self) -> np.ndarray: + if self._codes is not None: + # _codes is set in __init__ for MultiIndex cases + return self._codes + + return self._codes_and_uniques[0] + + @cache_readonly + def group_arraylike(self) -> ArrayLike: + """ + Analogous to result_index, but holding an ArrayLike to ensure + we can retain ExtensionDtypes. + """ + if self._group_index is not None: + # _group_index is set in __init__ for MultiIndex cases + return self._group_index._values + + elif self._all_grouper is not None: + # retain dtype for categories, including unobserved ones + return self.result_index._values + + return self._codes_and_uniques[1] + + @cache_readonly + def result_index(self) -> Index: + # result_index retains dtype for categories, including unobserved ones, + # which group_index does not + if self._all_grouper is not None: + group_idx = self.group_index + assert isinstance(group_idx, CategoricalIndex) + return recode_from_groupby(self._all_grouper, self._sort, group_idx) + return self.group_index + + @cache_readonly + def group_index(self) -> Index: + if self._group_index is not None: + # _group_index is set in __init__ for MultiIndex cases + return self._group_index + + uniques = self._codes_and_uniques[1] + return Index._with_infer(uniques, name=self.name) + + @cache_readonly + def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]: + if self._passed_categorical: + # we make a CategoricalIndex out of the cat grouper + # preserving the categories / ordered attributes + cat = self.grouping_vector + categories = cat.categories + + if self._observed: + ucodes = algorithms.unique1d(cat.codes) + ucodes = ucodes[ucodes != -1] + if self._sort or cat.ordered: + ucodes = np.sort(ucodes) + else: + ucodes = np.arange(len(categories)) + + uniques = Categorical.from_codes( + codes=ucodes, categories=categories, ordered=cat.ordered + ) + return cat.codes, uniques + + elif isinstance(self.grouping_vector, ops.BaseGrouper): + # we have a list of groupers + codes = self.grouping_vector.codes_info + uniques = self.grouping_vector.result_arraylike + else: + # GH35667, replace dropna=False with na_sentinel=None + if not self._dropna: + na_sentinel = None + else: + na_sentinel = -1 + codes, uniques = algorithms.factorize( + self.grouping_vector, sort=self._sort, na_sentinel=na_sentinel + ) + return codes, uniques + + @cache_readonly + def groups(self) -> dict[Hashable, np.ndarray]: + return self._index.groupby(Categorical.from_codes(self.codes, self.group_index)) + + +def get_grouper( + obj: NDFrameT, + key=None, + axis: int = 0, + level=None, + sort: bool = True, + observed: bool = False, + mutated: bool = False, + validate: bool = True, + dropna: bool = True, +) -> tuple[ops.BaseGrouper, frozenset[Hashable], NDFrameT]: + """ + Create and return a BaseGrouper, which is an internal + mapping of how to create the grouper indexers. + This may be composed of multiple Grouping objects, indicating + multiple groupers + + Groupers are ultimately index mappings. They can originate as: + index mappings, keys to columns, functions, or Groupers + + Groupers enable local references to axis,level,sort, while + the passed in axis, level, and sort are 'global'. + + This routine tries to figure out what the passing in references + are and then creates a Grouping for each one, combined into + a BaseGrouper. + + If observed & we have a categorical grouper, only show the observed + values. + + If validate, then check for key/level overlaps. + + """ + group_axis = obj._get_axis(axis) + + # validate that the passed single level is compatible with the passed + # axis of the object + if level is not None: + # TODO: These if-block and else-block are almost same. + # MultiIndex instance check is removable, but it seems that there are + # some processes only for non-MultiIndex in else-block, + # eg. `obj.index.name != level`. We have to consider carefully whether + # these are applicable for MultiIndex. Even if these are applicable, + # we need to check if it makes no side effect to subsequent processes + # on the outside of this condition. + # (GH 17621) + if isinstance(group_axis, MultiIndex): + if is_list_like(level) and len(level) == 1: + level = level[0] + + if key is None and is_scalar(level): + # Get the level values from group_axis + key = group_axis.get_level_values(level) + level = None + + else: + # allow level to be a length-one list-like object + # (e.g., level=[0]) + # GH 13901 + if is_list_like(level): + nlevels = len(level) + if nlevels == 1: + level = level[0] + elif nlevels == 0: + raise ValueError("No group keys passed!") + else: + raise ValueError("multiple levels only valid with MultiIndex") + + if isinstance(level, str): + if obj._get_axis(axis).name != level: + raise ValueError( + f"level name {level} is not the name " + f"of the {obj._get_axis_name(axis)}" + ) + elif level > 0 or level < -1: + raise ValueError("level > 0 or level < -1 only valid with MultiIndex") + + # NOTE: `group_axis` and `group_axis.get_level_values(level)` + # are same in this section. + level = None + key = group_axis + + # a passed-in Grouper, directly convert + if isinstance(key, Grouper): + binner, grouper, obj = key._get_grouper(obj, validate=False) + if key.key is None: + return grouper, frozenset(), obj + else: + return grouper, frozenset({key.key}), obj + + # already have a BaseGrouper, just return it + elif isinstance(key, ops.BaseGrouper): + return key, frozenset(), obj + + if not isinstance(key, list): + keys = [key] + match_axis_length = False + else: + keys = key + match_axis_length = len(keys) == len(group_axis) + + # what are we after, exactly? + any_callable = any(callable(g) or isinstance(g, dict) for g in keys) + any_groupers = any(isinstance(g, (Grouper, Grouping)) for g in keys) + any_arraylike = any( + isinstance(g, (list, tuple, Series, Index, np.ndarray)) for g in keys + ) + + # is this an index replacement? + if ( + not any_callable + and not any_arraylike + and not any_groupers + and match_axis_length + and level is None + ): + if isinstance(obj, DataFrame): + all_in_columns_index = all( + g in obj.columns or g in obj.index.names for g in keys + ) + else: + assert isinstance(obj, Series) + all_in_columns_index = all(g in obj.index.names for g in keys) + + if not all_in_columns_index: + keys = [com.asarray_tuplesafe(keys)] + + if isinstance(level, (tuple, list)): + if key is None: + keys = [None] * len(level) + levels = level + else: + levels = [level] * len(keys) + + groupings: list[Grouping] = [] + exclusions: set[Hashable] = set() + + # if the actual grouper should be obj[key] + def is_in_axis(key) -> bool: + if not _is_label_like(key): + # items -> .columns for DataFrame, .index for Series + items = obj.axes[-1] + try: + items.get_loc(key) + except (KeyError, TypeError, InvalidIndexError): + # TypeError shows up here if we pass e.g. Int64Index + return False + + return True + + # if the grouper is obj[name] + def is_in_obj(gpr) -> bool: + if not hasattr(gpr, "name"): + return False + try: + return gpr is obj[gpr.name] + except (KeyError, IndexError, InvalidIndexError): + # IndexError reached in e.g. test_skip_group_keys when we pass + # lambda here + # InvalidIndexError raised on key-types inappropriate for index, + # e.g. DatetimeIndex.get_loc(tuple()) + return False + + for gpr, level in zip(keys, levels): + + if is_in_obj(gpr): # df.groupby(df['name']) + in_axis = True + exclusions.add(gpr.name) + + elif is_in_axis(gpr): # df.groupby('name') + if gpr in obj: + if validate: + obj._check_label_or_level_ambiguity(gpr, axis=axis) + in_axis, name, gpr = True, gpr, obj[gpr] + if gpr.ndim != 1: + # non-unique columns; raise here to get the name in the + # exception message + raise ValueError(f"Grouper for '{name}' not 1-dimensional") + exclusions.add(name) + elif obj._is_level_reference(gpr, axis=axis): + in_axis, level, gpr = False, gpr, None + else: + raise KeyError(gpr) + elif isinstance(gpr, Grouper) and gpr.key is not None: + # Add key to exclusions + exclusions.add(gpr.key) + in_axis = False + else: + in_axis = False + + # create the Grouping + # allow us to passing the actual Grouping as the gpr + ping = ( + Grouping( + group_axis, + gpr, + obj=obj, + level=level, + sort=sort, + observed=observed, + in_axis=in_axis, + dropna=dropna, + ) + if not isinstance(gpr, Grouping) + else gpr + ) + + groupings.append(ping) + + if len(groupings) == 0 and len(obj): + raise ValueError("No group keys passed!") + elif len(groupings) == 0: + groupings.append(Grouping(Index([], dtype="int"), np.array([], dtype=np.intp))) + + # create the internals grouper + grouper = ops.BaseGrouper( + group_axis, groupings, sort=sort, mutated=mutated, dropna=dropna + ) + return grouper, frozenset(exclusions), obj + + +def _is_label_like(val) -> bool: + return isinstance(val, (str, tuple)) or (val is not None and is_scalar(val)) + + +def _convert_grouper(axis: Index, grouper): + if isinstance(grouper, dict): + return grouper.get + elif isinstance(grouper, Series): + if grouper.index.equals(axis): + return grouper._values + else: + return grouper.reindex(axis)._values + elif isinstance(grouper, MultiIndex): + return grouper._values + elif isinstance(grouper, (list, tuple, Index, Categorical, np.ndarray)): + if len(grouper) != len(axis): + raise ValueError("Grouper and axis must be same length") + + if isinstance(grouper, (list, tuple)): + grouper = com.asarray_tuplesafe(grouper) + return grouper + else: + return grouper + + +def _check_deprecated_resample_kwargs(kwargs, origin): + """ + Check for use of deprecated parameters in ``resample`` and related functions. + + Raises the appropriate warnings if these parameters are detected. + Only sets an approximate ``stacklevel`` for the warnings (see #37603, #36629). + + Parameters + ---------- + kwargs : dict + Dictionary of keyword arguments to check for deprecated parameters. + origin : object + From where this function is being called; either Grouper or TimeGrouper. Used + to determine an approximate stacklevel. + """ + # Deprecation warning of `base` and `loffset` since v1.1.0: + # we are raising the warning here to be able to set the `stacklevel` + # properly since we need to raise the `base` and `loffset` deprecation + # warning from three different cases: + # core/generic.py::NDFrame.resample + # core/groupby/groupby.py::GroupBy.resample + # core/groupby/grouper.py::Grouper + # raising these warnings from TimeGrouper directly would fail the test: + # tests/resample/test_deprecated.py::test_deprecating_on_loffset_and_base + + if kwargs.get("base", None) is not None: + warnings.warn( + "'base' in .resample() and in Grouper() is deprecated.\n" + "The new arguments that you should use are 'offset' or 'origin'.\n" + '\n>>> df.resample(freq="3s", base=2)\n' + "\nbecomes:\n" + '\n>>> df.resample(freq="3s", offset="2s")\n', + FutureWarning, + stacklevel=find_stack_level(), + ) + if kwargs.get("loffset", None) is not None: + warnings.warn( + "'loffset' in .resample() and in Grouper() is deprecated.\n" + '\n>>> df.resample(freq="3s", loffset="8H")\n' + "\nbecomes:\n" + "\n>>> from pandas.tseries.frequencies import to_offset" + '\n>>> df = df.resample(freq="3s").mean()' + '\n>>> df.index = df.index.to_timestamp() + to_offset("8H")\n', + FutureWarning, + stacklevel=find_stack_level(), + ) diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/indexing.py b/.local/lib/python3.8/site-packages/pandas/core/groupby/indexing.py new file mode 100644 index 0000000..f98bdf4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/groupby/indexing.py @@ -0,0 +1,303 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Iterable, + Literal, + cast, +) + +import numpy as np + +from pandas._typing import PositionalIndexer +from pandas.util._decorators import ( + cache_readonly, + doc, +) + +from pandas.core.dtypes.common import ( + is_integer, + is_list_like, +) + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + from pandas.core.groupby import groupby + + +class GroupByIndexingMixin: + """ + Mixin for adding ._positional_selector to GroupBy. + """ + + @cache_readonly + def _positional_selector(self) -> GroupByPositionalSelector: + """ + Return positional selection for each group. + + ``groupby._positional_selector[i:j]`` is similar to + ``groupby.apply(lambda x: x.iloc[i:j])`` + but much faster and preserves the original index and order. + + ``_positional_selector[]`` is compatible with and extends :meth:`~GroupBy.head` + and :meth:`~GroupBy.tail`. For example: + + - ``head(5)`` + - ``_positional_selector[5:-5]`` + - ``tail(5)`` + + together return all the rows. + + Allowed inputs for the index are: + + - An integer valued iterable, e.g. ``range(2, 4)``. + - A comma separated list of integers and slices, e.g. ``5``, ``2, 4``, ``2:4``. + + The output format is the same as :meth:`~GroupBy.head` and + :meth:`~GroupBy.tail`, namely + a subset of the ``DataFrame`` or ``Series`` with the index and order preserved. + + Returns + ------- + Series + The filtered subset of the original Series. + DataFrame + The filtered subset of the original DataFrame. + + See Also + -------- + DataFrame.iloc : Purely integer-location based indexing for selection by + position. + GroupBy.head : Return first n rows of each group. + GroupBy.tail : Return last n rows of each group. + GroupBy.nth : Take the nth row from each group if n is an int, or a + subset of rows, if n is a list of ints. + + Notes + ----- + - The slice step cannot be negative. + - If the index specification results in overlaps, the item is not duplicated. + - If the index specification changes the order of items, then + they are returned in their original order. + By contrast, ``DataFrame.iloc`` can change the row order. + - ``groupby()`` parameters such as as_index and dropna are ignored. + + The differences between ``_positional_selector[]`` and :meth:`~GroupBy.nth` + with ``as_index=False`` are: + + - Input to ``_positional_selector`` can include + one or more slices whereas ``nth`` + just handles an integer or a list of integers. + - ``_positional_selector`` can accept a slice relative to the + last row of each group. + - ``_positional_selector`` does not have an equivalent to the + ``nth()`` ``dropna`` parameter. + + Examples + -------- + >>> df = pd.DataFrame([["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]], + ... columns=["A", "B"]) + >>> df.groupby("A")._positional_selector[1:2] + A B + 1 a 2 + 4 b 5 + + >>> df.groupby("A")._positional_selector[1, -1] + A B + 1 a 2 + 2 a 3 + 4 b 5 + """ + if TYPE_CHECKING: + groupby_self = cast(groupby.GroupBy, self) + else: + groupby_self = self + + return GroupByPositionalSelector(groupby_self) + + def _make_mask_from_positional_indexer( + self, + arg: PositionalIndexer | tuple, + ) -> np.ndarray: + if is_list_like(arg): + if all(is_integer(i) for i in cast(Iterable, arg)): + mask = self._make_mask_from_list(cast(Iterable[int], arg)) + else: + mask = self._make_mask_from_tuple(cast(tuple, arg)) + + elif isinstance(arg, slice): + mask = self._make_mask_from_slice(arg) + elif is_integer(arg): + mask = self._make_mask_from_int(cast(int, arg)) + else: + raise TypeError( + f"Invalid index {type(arg)}. " + "Must be integer, list-like, slice or a tuple of " + "integers and slices" + ) + + if isinstance(mask, bool): + if mask: + mask = self._ascending_count >= 0 + else: + mask = self._ascending_count < 0 + + return cast(np.ndarray, mask) + + def _make_mask_from_int(self, arg: int) -> np.ndarray: + if arg >= 0: + return self._ascending_count == arg + else: + return self._descending_count == (-arg - 1) + + def _make_mask_from_list(self, args: Iterable[int]) -> bool | np.ndarray: + positive = [arg for arg in args if arg >= 0] + negative = [-arg - 1 for arg in args if arg < 0] + + mask: bool | np.ndarray = False + + if positive: + mask |= np.isin(self._ascending_count, positive) + + if negative: + mask |= np.isin(self._descending_count, negative) + + return mask + + def _make_mask_from_tuple(self, args: tuple) -> bool | np.ndarray: + mask: bool | np.ndarray = False + + for arg in args: + if is_integer(arg): + mask |= self._make_mask_from_int(cast(int, arg)) + elif isinstance(arg, slice): + mask |= self._make_mask_from_slice(arg) + else: + raise ValueError( + f"Invalid argument {type(arg)}. Should be int or slice." + ) + + return mask + + def _make_mask_from_slice(self, arg: slice) -> bool | np.ndarray: + start = arg.start + stop = arg.stop + step = arg.step + + if step is not None and step < 0: + raise ValueError(f"Invalid step {step}. Must be non-negative") + + mask: bool | np.ndarray = True + + if step is None: + step = 1 + + if start is None: + if step > 1: + mask &= self._ascending_count % step == 0 + + elif start >= 0: + mask &= self._ascending_count >= start + + if step > 1: + mask &= (self._ascending_count - start) % step == 0 + + else: + mask &= self._descending_count < -start + + offset_array = self._descending_count + start + 1 + limit_array = ( + self._ascending_count + self._descending_count + (start + 1) + ) < 0 + offset_array = np.where(limit_array, self._ascending_count, offset_array) + + mask &= offset_array % step == 0 + + if stop is not None: + if stop >= 0: + mask &= self._ascending_count < stop + else: + mask &= self._descending_count >= -stop + + return mask + + @cache_readonly + def _ascending_count(self) -> np.ndarray: + if TYPE_CHECKING: + groupby_self = cast(groupby.GroupBy, self) + else: + groupby_self = self + + return groupby_self._cumcount_array() + + @cache_readonly + def _descending_count(self) -> np.ndarray: + if TYPE_CHECKING: + groupby_self = cast(groupby.GroupBy, self) + else: + groupby_self = self + + return groupby_self._cumcount_array(ascending=False) + + +@doc(GroupByIndexingMixin._positional_selector) +class GroupByPositionalSelector: + def __init__(self, groupby_object: groupby.GroupBy): + self.groupby_object = groupby_object + + def __getitem__(self, arg: PositionalIndexer | tuple) -> DataFrame | Series: + """ + Select by positional index per group. + + Implements GroupBy._positional_selector + + Parameters + ---------- + arg : PositionalIndexer | tuple + Allowed values are: + - int + - int valued iterable such as list or range + - slice with step either None or positive + - tuple of integers and slices + + Returns + ------- + Series + The filtered subset of the original groupby Series. + DataFrame + The filtered subset of the original groupby DataFrame. + + See Also + -------- + DataFrame.iloc : Integer-location based indexing for selection by position. + GroupBy.head : Return first n rows of each group. + GroupBy.tail : Return last n rows of each group. + GroupBy._positional_selector : Return positional selection for each group. + GroupBy.nth : Take the nth row from each group if n is an int, or a + subset of rows, if n is a list of ints. + """ + self.groupby_object._reset_group_selection() + mask = self.groupby_object._make_mask_from_positional_indexer(arg) + return self.groupby_object._mask_selected_obj(mask) + + +class GroupByNthSelector: + """ + Dynamically substituted for GroupBy.nth to enable both call and index + """ + + def __init__(self, groupby_object: groupby.GroupBy): + self.groupby_object = groupby_object + + def __call__( + self, + n: PositionalIndexer | tuple, + dropna: Literal["any", "all", None] = None, + ) -> DataFrame | Series: + return self.groupby_object.nth_actual(n, dropna) + + def __getitem__(self, n: PositionalIndexer | tuple) -> DataFrame | Series: + return self.groupby_object.nth_actual(n) diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/numba_.py b/.local/lib/python3.8/site-packages/pandas/core/groupby/numba_.py new file mode 100644 index 0000000..24d6672 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/groupby/numba_.py @@ -0,0 +1,184 @@ +"""Common utilities for Numba operations with groupby ops""" +from __future__ import annotations + +import inspect +from typing import ( + TYPE_CHECKING, + Any, + Callable, +) + +import numpy as np + +from pandas._typing import Scalar +from pandas.compat._optional import import_optional_dependency + +from pandas.core.util.numba_ import ( + NUMBA_FUNC_CACHE, + NumbaUtilError, + get_jit_arguments, + jit_user_function, +) + + +def validate_udf(func: Callable) -> None: + """ + Validate user defined function for ops when using Numba with groupby ops. + + The first signature arguments should include: + + def f(values, index, ...): + ... + + Parameters + ---------- + func : function, default False + user defined function + + Returns + ------- + None + + Raises + ------ + NumbaUtilError + """ + udf_signature = list(inspect.signature(func).parameters.keys()) + expected_args = ["values", "index"] + min_number_args = len(expected_args) + if ( + len(udf_signature) < min_number_args + or udf_signature[:min_number_args] != expected_args + ): + raise NumbaUtilError( + f"The first {min_number_args} arguments to {func.__name__} must be " + f"{expected_args}" + ) + + +def generate_numba_agg_func( + kwargs: dict[str, Any], + func: Callable[..., Scalar], + engine_kwargs: dict[str, bool] | None, +) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, Any], np.ndarray]: + """ + Generate a numba jitted agg function specified by values from engine_kwargs. + + 1. jit the user's function + 2. Return a groupby agg function with the jitted function inline + + Configurations specified in engine_kwargs apply to both the user's + function _AND_ the groupby evaluation loop. + + Parameters + ---------- + kwargs : dict + **kwargs to be passed into the function + func : function + function to be applied to each window and will be JITed + engine_kwargs : dict + dictionary of arguments to be passed into numba.jit + + Returns + ------- + Numba function + """ + nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) + + validate_udf(func) + cache_key = (func, "groupby_agg") + if cache_key in NUMBA_FUNC_CACHE: + return NUMBA_FUNC_CACHE[cache_key] + + numba_func = jit_user_function(func, nopython, nogil, parallel) + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def group_agg( + values: np.ndarray, + index: np.ndarray, + begin: np.ndarray, + end: np.ndarray, + num_columns: int, + *args: Any, + ) -> np.ndarray: + + assert len(begin) == len(end) + num_groups = len(begin) + + result = np.empty((num_groups, num_columns)) + for i in numba.prange(num_groups): + group_index = index[begin[i] : end[i]] + for j in numba.prange(num_columns): + group = values[begin[i] : end[i], j] + result[i, j] = numba_func(group, group_index, *args) + return result + + return group_agg + + +def generate_numba_transform_func( + kwargs: dict[str, Any], + func: Callable[..., np.ndarray], + engine_kwargs: dict[str, bool] | None, +) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, Any], np.ndarray]: + """ + Generate a numba jitted transform function specified by values from engine_kwargs. + + 1. jit the user's function + 2. Return a groupby transform function with the jitted function inline + + Configurations specified in engine_kwargs apply to both the user's + function _AND_ the groupby evaluation loop. + + Parameters + ---------- + kwargs : dict + **kwargs to be passed into the function + func : function + function to be applied to each window and will be JITed + engine_kwargs : dict + dictionary of arguments to be passed into numba.jit + + Returns + ------- + Numba function + """ + nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) + + validate_udf(func) + cache_key = (func, "groupby_transform") + if cache_key in NUMBA_FUNC_CACHE: + return NUMBA_FUNC_CACHE[cache_key] + + numba_func = jit_user_function(func, nopython, nogil, parallel) + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def group_transform( + values: np.ndarray, + index: np.ndarray, + begin: np.ndarray, + end: np.ndarray, + num_columns: int, + *args: Any, + ) -> np.ndarray: + + assert len(begin) == len(end) + num_groups = len(begin) + + result = np.empty((len(values), num_columns)) + for i in numba.prange(num_groups): + group_index = index[begin[i] : end[i]] + for j in numba.prange(num_columns): + group = values[begin[i] : end[i], j] + result[begin[i] : end[i], j] = numba_func(group, group_index, *args) + return result + + return group_transform diff --git a/.local/lib/python3.8/site-packages/pandas/core/groupby/ops.py b/.local/lib/python3.8/site-packages/pandas/core/groupby/ops.py new file mode 100644 index 0000000..29073cc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/groupby/ops.py @@ -0,0 +1,1272 @@ +""" +Provide classes to perform the groupby aggregate operations. + +These are not exposed to the user and provide implementations of the grouping +operations, primarily in cython. These classes (BaseGrouper and BinGrouper) +are contained *in* the SeriesGroupBy and DataFrameGroupBy objects. +""" +from __future__ import annotations + +import collections +import functools +from typing import ( + Callable, + Generic, + Hashable, + Iterator, + Sequence, + final, + overload, +) + +import numpy as np + +from pandas._libs import ( + NaT, + lib, +) +import pandas._libs.groupby as libgroupby +import pandas._libs.reduction as libreduction +from pandas._typing import ( + ArrayLike, + DtypeObj, + NDFrameT, + Shape, + npt, +) +from pandas.errors import AbstractMethodError +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.cast import ( + maybe_cast_pointwise_result, + maybe_downcast_to_dtype, +) +from pandas.core.dtypes.common import ( + ensure_float64, + ensure_int64, + ensure_platform_int, + is_1d_only_ea_obj, + is_bool_dtype, + is_categorical_dtype, + is_complex_dtype, + is_datetime64_any_dtype, + is_float_dtype, + is_integer_dtype, + is_numeric_dtype, + is_sparse, + is_timedelta64_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.missing import ( + isna, + maybe_fill, +) + +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.arrays.boolean import BooleanDtype +from pandas.core.arrays.floating import ( + Float64Dtype, + FloatingDtype, +) +from pandas.core.arrays.integer import ( + Int64Dtype, + _IntegerDtype, +) +from pandas.core.arrays.masked import ( + BaseMaskedArray, + BaseMaskedDtype, +) +from pandas.core.arrays.string_ import StringDtype +from pandas.core.frame import DataFrame +from pandas.core.generic import NDFrame +from pandas.core.groupby import grouper +from pandas.core.indexes.api import ( + CategoricalIndex, + Index, + MultiIndex, + ensure_index, +) +from pandas.core.series import Series +from pandas.core.sorting import ( + compress_group_index, + decons_obs_group_ids, + get_flattened_list, + get_group_index, + get_group_index_sorter, + get_indexer_dict, +) + + +class WrappedCythonOp: + """ + Dispatch logic for functions defined in _libs.groupby + """ + + # Functions for which we do _not_ attempt to cast the cython result + # back to the original dtype. + cast_blocklist = frozenset(["rank", "count", "size", "idxmin", "idxmax"]) + + def __init__(self, kind: str, how: str): + self.kind = kind + self.how = how + + _CYTHON_FUNCTIONS = { + "aggregate": { + "add": "group_add", + "prod": "group_prod", + "min": "group_min", + "max": "group_max", + "mean": "group_mean", + "median": "group_median", + "var": "group_var", + "first": "group_nth", + "last": "group_last", + "ohlc": "group_ohlc", + }, + "transform": { + "cumprod": "group_cumprod", + "cumsum": "group_cumsum", + "cummin": "group_cummin", + "cummax": "group_cummax", + "rank": "group_rank", + }, + } + + _MASKED_CYTHON_FUNCTIONS = {"cummin", "cummax", "min", "max"} + + _cython_arity = {"ohlc": 4} # OHLC + + # Note: we make this a classmethod and pass kind+how so that caching + # works at the class level and not the instance level + @classmethod + @functools.lru_cache(maxsize=None) + def _get_cython_function( + cls, kind: str, how: str, dtype: np.dtype, is_numeric: bool + ): + + dtype_str = dtype.name + ftype = cls._CYTHON_FUNCTIONS[kind][how] + + # see if there is a fused-type version of function + # only valid for numeric + f = getattr(libgroupby, ftype) + if is_numeric: + return f + elif dtype == object: + if "object" not in f.__signatures__: + # raise NotImplementedError here rather than TypeError later + raise NotImplementedError( + f"function is not implemented for this dtype: " + f"[how->{how},dtype->{dtype_str}]" + ) + return f + + def get_cython_func_and_vals(self, values: np.ndarray, is_numeric: bool): + """ + Find the appropriate cython function, casting if necessary. + + Parameters + ---------- + values : np.ndarray + is_numeric : bool + + Returns + ------- + func : callable + values : np.ndarray + """ + how = self.how + kind = self.kind + + if how in ["median", "cumprod"]: + # these two only have float64 implementations + if is_numeric: + values = ensure_float64(values) + else: + raise NotImplementedError( + f"function is not implemented for this dtype: " + f"[how->{how},dtype->{values.dtype.name}]" + ) + func = getattr(libgroupby, f"group_{how}_float64") + return func, values + + func = self._get_cython_function(kind, how, values.dtype, is_numeric) + + if values.dtype.kind in ["i", "u"]: + if how in ["add", "var", "prod", "mean", "ohlc"]: + # result may still include NaN, so we have to cast + values = ensure_float64(values) + + return func, values + + def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False): + """ + Check if we can do this operation with our cython functions. + + Raises + ------ + NotImplementedError + This is either not a valid function for this dtype, or + valid but not implemented in cython. + """ + how = self.how + + if is_numeric: + # never an invalid op for those dtypes, so return early as fastpath + return + + if is_categorical_dtype(dtype): + # NotImplementedError for methods that can fall back to a + # non-cython implementation. + if how in ["add", "prod", "cumsum", "cumprod"]: + raise TypeError(f"{dtype} type does not support {how} operations") + raise NotImplementedError(f"{dtype} dtype not supported") + + elif is_sparse(dtype): + # categoricals are only 1d, so we + # are not setup for dim transforming + raise NotImplementedError(f"{dtype} dtype not supported") + elif is_datetime64_any_dtype(dtype): + # we raise NotImplemented if this is an invalid operation + # entirely, e.g. adding datetimes + if how in ["add", "prod", "cumsum", "cumprod"]: + raise TypeError(f"datetime64 type does not support {how} operations") + elif is_timedelta64_dtype(dtype): + if how in ["prod", "cumprod"]: + raise TypeError(f"timedelta64 type does not support {how} operations") + + def _get_output_shape(self, ngroups: int, values: np.ndarray) -> Shape: + how = self.how + kind = self.kind + + arity = self._cython_arity.get(how, 1) + + out_shape: Shape + if how == "ohlc": + out_shape = (ngroups, 4) + elif arity > 1: + raise NotImplementedError( + "arity of more than 1 is not supported for the 'how' argument" + ) + elif kind == "transform": + out_shape = values.shape + else: + out_shape = (ngroups,) + values.shape[1:] + return out_shape + + def get_out_dtype(self, dtype: np.dtype) -> np.dtype: + how = self.how + + if how == "rank": + out_dtype = "float64" + else: + if is_numeric_dtype(dtype): + out_dtype = f"{dtype.kind}{dtype.itemsize}" + else: + out_dtype = "object" + return np.dtype(out_dtype) + + @overload + def _get_result_dtype(self, dtype: np.dtype) -> np.dtype: + ... # pragma: no cover + + @overload + def _get_result_dtype(self, dtype: ExtensionDtype) -> ExtensionDtype: + ... # pragma: no cover + + def _get_result_dtype(self, dtype: DtypeObj) -> DtypeObj: + """ + Get the desired dtype of a result based on the + input dtype and how it was computed. + + Parameters + ---------- + dtype : np.dtype or ExtensionDtype + Input dtype. + + Returns + ------- + np.dtype or ExtensionDtype + The desired dtype of the result. + """ + how = self.how + + if how in ["add", "cumsum", "sum", "prod"]: + if dtype == np.dtype(bool): + return np.dtype(np.int64) + elif isinstance(dtype, (BooleanDtype, _IntegerDtype)): + return Int64Dtype() + elif how in ["mean", "median", "var"]: + if isinstance(dtype, (BooleanDtype, _IntegerDtype)): + return Float64Dtype() + elif is_float_dtype(dtype) or is_complex_dtype(dtype): + return dtype + elif is_numeric_dtype(dtype): + return np.dtype(np.float64) + return dtype + + def uses_mask(self) -> bool: + return self.how in self._MASKED_CYTHON_FUNCTIONS + + @final + def _ea_wrap_cython_operation( + self, + values: ExtensionArray, + min_count: int, + ngroups: int, + comp_ids: np.ndarray, + **kwargs, + ) -> ArrayLike: + """ + If we have an ExtensionArray, unwrap, call _cython_operation, and + re-wrap if appropriate. + """ + # TODO: general case implementation overridable by EAs. + if isinstance(values, BaseMaskedArray) and self.uses_mask(): + return self._masked_ea_wrap_cython_operation( + values, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + **kwargs, + ) + + if isinstance(values, (DatetimeArray, PeriodArray, TimedeltaArray)): + # All of the functions implemented here are ordinal, so we can + # operate on the tz-naive equivalents + npvalues = values._ndarray.view("M8[ns]") + elif isinstance(values.dtype, (BooleanDtype, _IntegerDtype)): + # IntegerArray or BooleanArray + npvalues = values.to_numpy("float64", na_value=np.nan) + elif isinstance(values.dtype, FloatingDtype): + # FloatingArray + npvalues = values.to_numpy(values.dtype.numpy_dtype, na_value=np.nan) + elif isinstance(values.dtype, StringDtype): + # StringArray + npvalues = values.to_numpy(object, na_value=np.nan) + else: + raise NotImplementedError( + f"function is not implemented for this dtype: {values.dtype}" + ) + + res_values = self._cython_op_ndim_compat( + npvalues, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + mask=None, + **kwargs, + ) + + if self.how in ["rank"]: + # i.e. how in WrappedCythonOp.cast_blocklist, since + # other cast_blocklist methods dont go through cython_operation + return res_values + + return self._reconstruct_ea_result(values, res_values) + + def _reconstruct_ea_result(self, values, res_values): + """ + Construct an ExtensionArray result from an ndarray result. + """ + # TODO: allow EAs to override this logic + + if isinstance( + values.dtype, (BooleanDtype, _IntegerDtype, FloatingDtype, StringDtype) + ): + dtype = self._get_result_dtype(values.dtype) + cls = dtype.construct_array_type() + return cls._from_sequence(res_values, dtype=dtype) + + elif needs_i8_conversion(values.dtype): + i8values = res_values.view("i8") + return type(values)(i8values, dtype=values.dtype) + + raise NotImplementedError + + @final + def _masked_ea_wrap_cython_operation( + self, + values: BaseMaskedArray, + min_count: int, + ngroups: int, + comp_ids: np.ndarray, + **kwargs, + ) -> BaseMaskedArray: + """ + Equivalent of `_ea_wrap_cython_operation`, but optimized for masked EA's + and cython algorithms which accept a mask. + """ + orig_values = values + + # Copy to ensure input and result masks don't end up shared + mask = values._mask.copy() + result_mask = np.zeros(ngroups, dtype=bool) + arr = values._data + + res_values = self._cython_op_ndim_compat( + arr, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + mask=mask, + result_mask=result_mask, + **kwargs, + ) + + dtype = self._get_result_dtype(orig_values.dtype) + assert isinstance(dtype, BaseMaskedDtype) + cls = dtype.construct_array_type() + + if self.kind != "aggregate": + return cls(res_values.astype(dtype.type, copy=False), mask) + else: + return cls(res_values.astype(dtype.type, copy=False), result_mask) + + @final + def _cython_op_ndim_compat( + self, + values: np.ndarray, + *, + min_count: int, + ngroups: int, + comp_ids: np.ndarray, + mask: np.ndarray | None = None, + result_mask: np.ndarray | None = None, + **kwargs, + ) -> np.ndarray: + if values.ndim == 1: + # expand to 2d, dispatch, then squeeze if appropriate + values2d = values[None, :] + if mask is not None: + mask = mask[None, :] + if result_mask is not None: + result_mask = result_mask[None, :] + res = self._call_cython_op( + values2d, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + mask=mask, + result_mask=result_mask, + **kwargs, + ) + if res.shape[0] == 1: + return res[0] + + # otherwise we have OHLC + return res.T + + return self._call_cython_op( + values, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + mask=mask, + result_mask=result_mask, + **kwargs, + ) + + @final + def _call_cython_op( + self, + values: np.ndarray, # np.ndarray[ndim=2] + *, + min_count: int, + ngroups: int, + comp_ids: np.ndarray, + mask: np.ndarray | None, + result_mask: np.ndarray | None, + **kwargs, + ) -> np.ndarray: # np.ndarray[ndim=2] + orig_values = values + + dtype = values.dtype + is_numeric = is_numeric_dtype(dtype) + + is_datetimelike = needs_i8_conversion(dtype) + + if is_datetimelike: + values = values.view("int64") + is_numeric = True + elif is_bool_dtype(dtype): + values = values.astype("int64") + elif is_integer_dtype(dtype): + # GH#43329 If the dtype is explicitly of type uint64 the type is not + # changed to prevent overflow. + if dtype != np.uint64: + values = values.astype(np.int64, copy=False) + elif is_numeric: + if not is_complex_dtype(dtype): + values = ensure_float64(values) + + values = values.T + if mask is not None: + mask = mask.T + if result_mask is not None: + result_mask = result_mask.T + + out_shape = self._get_output_shape(ngroups, values) + func, values = self.get_cython_func_and_vals(values, is_numeric) + out_dtype = self.get_out_dtype(values.dtype) + + result = maybe_fill(np.empty(out_shape, dtype=out_dtype)) + if self.kind == "aggregate": + counts = np.zeros(ngroups, dtype=np.int64) + if self.how in ["min", "max", "mean"]: + func( + result, + counts, + values, + comp_ids, + min_count, + mask=mask, + result_mask=result_mask, + is_datetimelike=is_datetimelike, + ) + elif self.how in ["add"]: + # We support datetimelike + func( + result, + counts, + values, + comp_ids, + min_count, + datetimelike=is_datetimelike, + ) + else: + func(result, counts, values, comp_ids, min_count) + else: + # TODO: min_count + if self.uses_mask(): + func( + result, + values, + comp_ids, + ngroups, + is_datetimelike, + mask=mask, + **kwargs, + ) + else: + func(result, values, comp_ids, ngroups, is_datetimelike, **kwargs) + + if self.kind == "aggregate": + # i.e. counts is defined. Locations where count ArrayLike: + """ + Call our cython function, with appropriate pre- and post- processing. + """ + if values.ndim > 2: + raise NotImplementedError("number of dimensions is currently limited to 2") + elif values.ndim == 2: + assert axis == 1, axis + elif not is_1d_only_ea_obj(values): + # Note: it is *not* the case that axis is always 0 for 1-dim values, + # as we can have 1D ExtensionArrays that we need to treat as 2D + assert axis == 0 + + dtype = values.dtype + is_numeric = is_numeric_dtype(dtype) + + # can we do this operation with our cython functions + # if not raise NotImplementedError + self._disallow_invalid_ops(dtype, is_numeric) + + if not isinstance(values, np.ndarray): + # i.e. ExtensionArray + return self._ea_wrap_cython_operation( + values, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + **kwargs, + ) + + return self._cython_op_ndim_compat( + values, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + mask=None, + **kwargs, + ) + + +class BaseGrouper: + """ + This is an internal Grouper class, which actually holds + the generated groups + + Parameters + ---------- + axis : Index + groupings : Sequence[Grouping] + all the grouping instances to handle in this grouper + for example for grouper list to groupby, need to pass the list + sort : bool, default True + whether this grouper will give sorted result or not + group_keys : bool, default True + mutated : bool, default False + indexer : np.ndarray[np.intp], optional + the indexer created by Grouper + some groupers (TimeGrouper) will sort its axis and its + group_info is also sorted, so need the indexer to reorder + + """ + + axis: Index + + def __init__( + self, + axis: Index, + groupings: Sequence[grouper.Grouping], + sort: bool = True, + group_keys: bool = True, + mutated: bool = False, + indexer: npt.NDArray[np.intp] | None = None, + dropna: bool = True, + ): + assert isinstance(axis, Index), axis + + self.axis = axis + self._groupings: list[grouper.Grouping] = list(groupings) + self._sort = sort + self.group_keys = group_keys + self.mutated = mutated + self.indexer = indexer + self.dropna = dropna + + @property + def groupings(self) -> list[grouper.Grouping]: + return self._groupings + + @property + def shape(self) -> Shape: + return tuple(ping.ngroups for ping in self.groupings) + + def __iter__(self): + return iter(self.indices) + + @property + def nkeys(self) -> int: + return len(self.groupings) + + def get_iterator( + self, data: NDFrameT, axis: int = 0 + ) -> Iterator[tuple[Hashable, NDFrameT]]: + """ + Groupby iterator + + Returns + ------- + Generator yielding sequence of (name, subsetted object) + for each group + """ + splitter = self._get_splitter(data, axis=axis) + keys = self.group_keys_seq + for key, group in zip(keys, splitter): + yield key, group.__finalize__(data, method="groupby") + + @final + def _get_splitter(self, data: NDFrame, axis: int = 0) -> DataSplitter: + """ + Returns + ------- + Generator yielding subsetted objects + + __finalize__ has not been called for the subsetted objects returned. + """ + ids, _, ngroups = self.group_info + return get_splitter(data, ids, ngroups, axis=axis) + + def _get_grouper(self): + """ + We are a grouper as part of another's groupings. + + We have a specific method of grouping, so cannot + convert to a Index for our grouper. + """ + return self.groupings[0].grouping_vector + + @final + @cache_readonly + def group_keys_seq(self): + if len(self.groupings) == 1: + return self.levels[0] + else: + ids, _, ngroups = self.group_info + + # provide "flattened" iterator for multi-group setting + return get_flattened_list(ids, ngroups, self.levels, self.codes) + + @final + def apply( + self, f: Callable, data: DataFrame | Series, axis: int = 0 + ) -> tuple[list, bool]: + mutated = self.mutated + splitter = self._get_splitter(data, axis=axis) + group_keys = self.group_keys_seq + result_values = [] + + # This calls DataSplitter.__iter__ + zipped = zip(group_keys, splitter) + + for key, group in zipped: + group = group.__finalize__(data, method="groupby") + object.__setattr__(group, "name", key) + + # group might be modified + group_axes = group.axes + res = f(group) + if not mutated and not _is_indexed_like(res, group_axes, axis): + mutated = True + result_values.append(res) + + # getattr pattern for __name__ is needed for functools.partial objects + if len(group_keys) == 0 and getattr(f, "__name__", None) not in [ + "idxmin", + "idxmax", + "nanargmin", + "nanargmax", + ]: + # If group_keys is empty, then no function calls have been made, + # so we will not have raised even if this is an invalid dtype. + # So do one dummy call here to raise appropriate TypeError. + f(data.iloc[:0]) + + return result_values, mutated + + @cache_readonly + def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: + """dict {group name -> group indices}""" + if len(self.groupings) == 1 and isinstance(self.result_index, CategoricalIndex): + # This shows unused categories in indices GH#38642 + return self.groupings[0].indices + codes_list = [ping.codes for ping in self.groupings] + keys = [ping.group_index for ping in self.groupings] + return get_indexer_dict(codes_list, keys) + + @final + @property + def codes(self) -> list[np.ndarray]: + return [ping.codes for ping in self.groupings] + + @property + def levels(self) -> list[Index]: + return [ping.group_index for ping in self.groupings] + + @property + def names(self) -> list[Hashable]: + return [ping.name for ping in self.groupings] + + @final + def size(self) -> Series: + """ + Compute group sizes. + """ + ids, _, ngroups = self.group_info + out: np.ndarray | list + if ngroups: + out = np.bincount(ids[ids != -1], minlength=ngroups) + else: + out = [] + return Series(out, index=self.result_index, dtype="int64") + + @cache_readonly + def groups(self) -> dict[Hashable, np.ndarray]: + """dict {group name -> group labels}""" + if len(self.groupings) == 1: + return self.groupings[0].groups + else: + to_groupby = zip(*(ping.grouping_vector for ping in self.groupings)) + index = Index(to_groupby) + return self.axis.groupby(index) + + @final + @cache_readonly + def is_monotonic(self) -> bool: + # return if my group orderings are monotonic + return Index(self.group_info[0]).is_monotonic + + @cache_readonly + def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]: + comp_ids, obs_group_ids = self._get_compressed_codes() + + ngroups = len(obs_group_ids) + comp_ids = ensure_platform_int(comp_ids) + + return comp_ids, obs_group_ids, ngroups + + @final + @cache_readonly + def codes_info(self) -> npt.NDArray[np.intp]: + # return the codes of items in original grouped axis + ids, _, _ = self.group_info + if self.indexer is not None: + sorter = np.lexsort((ids, self.indexer)) + ids = ids[sorter] + ids = ensure_platform_int(ids) + # TODO: if numpy annotates np.lexsort, this ensure_platform_int + # may become unnecessary + return ids + + @final + def _get_compressed_codes(self) -> tuple[np.ndarray, npt.NDArray[np.intp]]: + # The first returned ndarray may have any signed integer dtype + if len(self.groupings) > 1: + group_index = get_group_index(self.codes, self.shape, sort=True, xnull=True) + return compress_group_index(group_index, sort=self._sort) + + ping = self.groupings[0] + return ping.codes, np.arange(len(ping.group_index), dtype=np.intp) + + @final + @cache_readonly + def ngroups(self) -> int: + return len(self.result_index) + + @property + def reconstructed_codes(self) -> list[np.ndarray]: + codes = self.codes + ids, obs_ids, _ = self.group_info + return decons_obs_group_ids(ids, obs_ids, self.shape, codes, xnull=True) + + @final + @cache_readonly + def result_arraylike(self) -> ArrayLike: + """ + Analogous to result_index, but returning an ndarray/ExtensionArray + allowing us to retain ExtensionDtypes not supported by Index. + """ + # TODO(ExtensionIndex): once Index supports arbitrary EAs, this can + # be removed in favor of result_index + if len(self.groupings) == 1: + return self.groupings[0].group_arraylike + + # result_index is MultiIndex + return self.result_index._values + + @cache_readonly + def result_index(self) -> Index: + if len(self.groupings) == 1: + return self.groupings[0].result_index.rename(self.names[0]) + + codes = self.reconstructed_codes + levels = [ping.result_index for ping in self.groupings] + return MultiIndex( + levels=levels, codes=codes, verify_integrity=False, names=self.names + ) + + @final + def get_group_levels(self) -> list[ArrayLike]: + # Note: only called from _insert_inaxis_grouper_inplace, which + # is only called for BaseGrouper, never for BinGrouper + if len(self.groupings) == 1: + return [self.groupings[0].group_arraylike] + + name_list = [] + for ping, codes in zip(self.groupings, self.reconstructed_codes): + codes = ensure_platform_int(codes) + levels = ping.group_arraylike.take(codes) + + name_list.append(levels) + + return name_list + + # ------------------------------------------------------------ + # Aggregation functions + + @final + def _cython_operation( + self, + kind: str, + values, + how: str, + axis: int, + min_count: int = -1, + **kwargs, + ) -> ArrayLike: + """ + Returns the values of a cython operation. + """ + assert kind in ["transform", "aggregate"] + + cy_op = WrappedCythonOp(kind=kind, how=how) + + ids, _, _ = self.group_info + ngroups = self.ngroups + return cy_op.cython_operation( + values=values, + axis=axis, + min_count=min_count, + comp_ids=ids, + ngroups=ngroups, + **kwargs, + ) + + @final + def agg_series( + self, obj: Series, func: Callable, preserve_dtype: bool = False + ) -> ArrayLike: + """ + Parameters + ---------- + obj : Series + func : function taking a Series and returning a scalar-like + preserve_dtype : bool + Whether the aggregation is known to be dtype-preserving. + + Returns + ------- + np.ndarray or ExtensionArray + """ + # test_groupby_empty_with_category gets here with self.ngroups == 0 + # and len(obj) > 0 + + if len(obj) == 0: + # SeriesGrouper would raise if we were to call _aggregate_series_fast + result = self._aggregate_series_pure_python(obj, func) + + elif not isinstance(obj._values, np.ndarray): + result = self._aggregate_series_pure_python(obj, func) + + # we can preserve a little bit more aggressively with EA dtype + # because maybe_cast_pointwise_result will do a try/except + # with _from_sequence. NB we are assuming here that _from_sequence + # is sufficiently strict that it casts appropriately. + preserve_dtype = True + + else: + result = self._aggregate_series_pure_python(obj, func) + + npvalues = lib.maybe_convert_objects(result, try_float=False) + if preserve_dtype: + out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True) + else: + out = npvalues + return out + + @final + def _aggregate_series_pure_python( + self, obj: Series, func: Callable + ) -> npt.NDArray[np.object_]: + ids, _, ngroups = self.group_info + + counts = np.zeros(ngroups, dtype=int) + result = np.empty(ngroups, dtype="O") + initialized = False + + # equiv: splitter = self._get_splitter(obj, axis=0) + splitter = get_splitter(obj, ids, ngroups, axis=0) + + for i, group in enumerate(splitter): + group = group.__finalize__(obj, method="groupby") + res = func(group) + res = libreduction.extract_result(res) + + if not initialized: + # We only do this validation on the first iteration + libreduction.check_result_array(res, group.dtype) + initialized = True + + counts[i] = group.shape[0] + result[i] = res + + return result + + +class BinGrouper(BaseGrouper): + """ + This is an internal Grouper class + + Parameters + ---------- + bins : the split index of binlabels to group the item of axis + binlabels : the label list + mutated : bool, default False + indexer : np.ndarray[np.intp] + + Examples + -------- + bins: [2, 4, 6, 8, 10] + binlabels: DatetimeIndex(['2005-01-01', '2005-01-03', + '2005-01-05', '2005-01-07', '2005-01-09'], + dtype='datetime64[ns]', freq='2D') + + the group_info, which contains the label of each item in grouped + axis, the index of label in label list, group number, is + + (array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]), array([0, 1, 2, 3, 4]), 5) + + means that, the grouped axis has 10 items, can be grouped into 5 + labels, the first and second items belong to the first label, the + third and forth items belong to the second label, and so on + + """ + + bins: npt.NDArray[np.int64] + binlabels: Index + mutated: bool + + def __init__( + self, + bins, + binlabels, + mutated: bool = False, + indexer=None, + ): + self.bins = ensure_int64(bins) + self.binlabels = ensure_index(binlabels) + self.mutated = mutated + self.indexer = indexer + + # These lengths must match, otherwise we could call agg_series + # with empty self.bins, which would raise in libreduction. + assert len(self.binlabels) == len(self.bins) + + @cache_readonly + def groups(self): + """dict {group name -> group labels}""" + # this is mainly for compat + # GH 3881 + result = { + key: value + for key, value in zip(self.binlabels, self.bins) + if key is not NaT + } + return result + + @property + def nkeys(self) -> int: + # still matches len(self.groupings), but we can hard-code + return 1 + + def _get_grouper(self): + """ + We are a grouper as part of another's groupings. + + We have a specific method of grouping, so cannot + convert to a Index for our grouper. + """ + return self + + def get_iterator(self, data: NDFrame, axis: int = 0): + """ + Groupby iterator + + Returns + ------- + Generator yielding sequence of (name, subsetted object) + for each group + """ + if axis == 0: + slicer = lambda start, edge: data.iloc[start:edge] + else: + slicer = lambda start, edge: data.iloc[:, start:edge] + + length = len(data.axes[axis]) + + start = 0 + for edge, label in zip(self.bins, self.binlabels): + if label is not NaT: + yield label, slicer(start, edge) + start = edge + + if start < length: + yield self.binlabels[-1], slicer(start, None) + + @cache_readonly + def indices(self): + indices = collections.defaultdict(list) + + i = 0 + for label, bin in zip(self.binlabels, self.bins): + if i < bin: + if label is not NaT: + indices[label] = list(range(i, bin)) + i = bin + return indices + + @cache_readonly + def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]: + ngroups = self.ngroups + obs_group_ids = np.arange(ngroups, dtype=np.intp) + rep = np.diff(np.r_[0, self.bins]) + + rep = ensure_platform_int(rep) + if ngroups == len(self.bins): + comp_ids = np.repeat(np.arange(ngroups), rep) + else: + comp_ids = np.repeat(np.r_[-1, np.arange(ngroups)], rep) + + return ( + ensure_platform_int(comp_ids), + obs_group_ids, + ngroups, + ) + + @cache_readonly + def reconstructed_codes(self) -> list[np.ndarray]: + # get unique result indices, and prepend 0 as groupby starts from the first + return [np.r_[0, np.flatnonzero(self.bins[1:] != self.bins[:-1]) + 1]] + + @cache_readonly + def result_index(self): + if len(self.binlabels) != 0 and isna(self.binlabels[0]): + return self.binlabels[1:] + + return self.binlabels + + @property + def levels(self) -> list[Index]: + return [self.binlabels] + + @property + def names(self) -> list[Hashable]: + return [self.binlabels.name] + + @property + def groupings(self) -> list[grouper.Grouping]: + lev = self.binlabels + ping = grouper.Grouping(lev, lev, in_axis=False, level=None) + return [ping] + + def _aggregate_series_fast(self, obj: Series, func: Callable) -> np.ndarray: + # -> np.ndarray[object] + raise NotImplementedError( + "This should not be reached; use _aggregate_series_pure_python" + ) + + +def _is_indexed_like(obj, axes, axis: int) -> bool: + if isinstance(obj, Series): + if len(axes) > 1: + return False + return obj.axes[axis].equals(axes[axis]) + elif isinstance(obj, DataFrame): + return obj.axes[axis].equals(axes[axis]) + + return False + + +# ---------------------------------------------------------------------- +# Splitting / application + + +class DataSplitter(Generic[NDFrameT]): + def __init__( + self, + data: NDFrameT, + labels: npt.NDArray[np.intp], + ngroups: int, + axis: int = 0, + ): + self.data = data + self.labels = ensure_platform_int(labels) # _should_ already be np.intp + self.ngroups = ngroups + + self.axis = axis + assert isinstance(axis, int), axis + + @cache_readonly + def slabels(self) -> npt.NDArray[np.intp]: + # Sorted labels + return self.labels.take(self._sort_idx) + + @cache_readonly + def _sort_idx(self) -> npt.NDArray[np.intp]: + # Counting sort indexer + return get_group_index_sorter(self.labels, self.ngroups) + + def __iter__(self): + sdata = self.sorted_data + + if self.ngroups == 0: + # we are inside a generator, rather than raise StopIteration + # we merely return signal the end + return + + starts, ends = lib.generate_slices(self.slabels, self.ngroups) + + for start, end in zip(starts, ends): + yield self._chop(sdata, slice(start, end)) + + @cache_readonly + def sorted_data(self) -> NDFrameT: + return self.data.take(self._sort_idx, axis=self.axis) + + def _chop(self, sdata, slice_obj: slice) -> NDFrame: + raise AbstractMethodError(self) + + +class SeriesSplitter(DataSplitter): + def _chop(self, sdata: Series, slice_obj: slice) -> Series: + # fastpath equivalent to `sdata.iloc[slice_obj]` + mgr = sdata._mgr.get_slice(slice_obj) + # __finalize__ not called here, must be applied by caller if applicable + return sdata._constructor(mgr, name=sdata.name, fastpath=True) + + +class FrameSplitter(DataSplitter): + def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: + # Fastpath equivalent to: + # if self.axis == 0: + # return sdata.iloc[slice_obj] + # else: + # return sdata.iloc[:, slice_obj] + mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) + # __finalize__ not called here, must be applied by caller if applicable + return sdata._constructor(mgr) + + +def get_splitter( + data: NDFrame, labels: np.ndarray, ngroups: int, axis: int = 0 +) -> DataSplitter: + if isinstance(data, Series): + klass: type[DataSplitter] = SeriesSplitter + else: + # i.e. DataFrame + klass = FrameSplitter + + return klass(data, labels, ngroups, axis) diff --git a/.local/lib/python3.8/site-packages/pandas/core/index.py b/.local/lib/python3.8/site-packages/pandas/core/index.py new file mode 100644 index 0000000..00ca6f9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/index.py @@ -0,0 +1,32 @@ +import warnings + +from pandas.util._exceptions import find_stack_level + +from pandas.core.indexes.api import ( # noqa:F401 + CategoricalIndex, + DatetimeIndex, + Float64Index, + Index, + Int64Index, + IntervalIndex, + MultiIndex, + NaT, + NumericIndex, + PeriodIndex, + RangeIndex, + TimedeltaIndex, + UInt64Index, + _new_Index, + ensure_index, + ensure_index_from_sequences, + get_objs_combined_axis, +) +from pandas.core.indexes.multi import sparsify_labels # noqa:F401 + +# GH#30193 +warnings.warn( + "pandas.core.index is deprecated and will be removed in a future version. " + "The public classes are available in the top-level namespace.", + FutureWarning, + stacklevel=find_stack_level(), +) diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexers/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/indexers/__init__.py new file mode 100644 index 0000000..86ec361 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexers/__init__.py @@ -0,0 +1,33 @@ +from pandas.core.indexers.utils import ( + check_array_indexer, + check_key_length, + check_setitem_lengths, + deprecate_ndim_indexing, + is_empty_indexer, + is_exact_shape_match, + is_list_like_indexer, + is_scalar_indexer, + is_valid_positional_slice, + length_of_indexer, + maybe_convert_indices, + unpack_1tuple, + unpack_tuple_and_ellipses, + validate_indices, +) + +__all__ = [ + "is_valid_positional_slice", + "is_list_like_indexer", + "is_scalar_indexer", + "is_empty_indexer", + "check_setitem_lengths", + "validate_indices", + "maybe_convert_indices", + "is_exact_shape_match", + "length_of_indexer", + "deprecate_ndim_indexing", + "unpack_1tuple", + "check_key_length", + "check_array_indexer", + "unpack_tuple_and_ellipses", +] diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexers/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexers/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..b0265f7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexers/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexers/__pycache__/objects.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexers/__pycache__/objects.cpython-38.pyc new file mode 100644 index 0000000..4e64492 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexers/__pycache__/objects.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexers/__pycache__/utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexers/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000..99a2c40 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexers/__pycache__/utils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexers/objects.py b/.local/lib/python3.8/site-packages/pandas/core/indexers/objects.py new file mode 100644 index 0000000..4d5e4bb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexers/objects.py @@ -0,0 +1,378 @@ +"""Indexer objects for computing start/end window bounds for rolling operations""" +from __future__ import annotations + +from datetime import timedelta + +import numpy as np + +from pandas._libs.window.indexers import calculate_variable_window_bounds +from pandas.util._decorators import Appender + +from pandas.core.dtypes.common import ensure_platform_int + +from pandas.tseries.offsets import Nano + +get_window_bounds_doc = """ +Computes the bounds of a window. + +Parameters +---------- +num_values : int, default 0 + number of values that will be aggregated over +window_size : int, default 0 + the number of rows in a window +min_periods : int, default None + min_periods passed from the top level rolling API +center : bool, default None + center passed from the top level rolling API +closed : str, default None + closed passed from the top level rolling API +win_type : str, default None + win_type passed from the top level rolling API + +Returns +------- +A tuple of ndarray[int64]s, indicating the boundaries of each +window +""" + + +class BaseIndexer: + """Base class for window bounds calculations.""" + + def __init__( + self, index_array: np.ndarray | None = None, window_size: int = 0, **kwargs + ): + """ + Parameters + ---------- + **kwargs : + keyword arguments that will be available when get_window_bounds is called + """ + self.index_array = index_array + self.window_size = window_size + # Set user defined kwargs as attributes that can be used in get_window_bounds + for key, value in kwargs.items(): + setattr(self, key, value) + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + raise NotImplementedError + + +class FixedWindowIndexer(BaseIndexer): + """Creates window boundaries that are of fixed length.""" + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + if center: + offset = (self.window_size - 1) // 2 + else: + offset = 0 + + end = np.arange(1 + offset, num_values + 1 + offset, dtype="int64") + start = end - self.window_size + if closed in ["left", "both"]: + start -= 1 + if closed in ["left", "neither"]: + end -= 1 + + end = np.clip(end, 0, num_values) + start = np.clip(start, 0, num_values) + + return start, end + + +class VariableWindowIndexer(BaseIndexer): + """Creates window boundaries that are of variable length, namely for time series.""" + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + # error: Argument 4 to "calculate_variable_window_bounds" has incompatible + # type "Optional[bool]"; expected "bool" + # error: Argument 6 to "calculate_variable_window_bounds" has incompatible + # type "Optional[ndarray]"; expected "ndarray" + return calculate_variable_window_bounds( + num_values, + self.window_size, + min_periods, + center, # type: ignore[arg-type] + closed, + self.index_array, # type: ignore[arg-type] + ) + + +class VariableOffsetWindowIndexer(BaseIndexer): + """Calculate window boundaries based on a non-fixed offset such as a BusinessDay.""" + + def __init__( + self, + index_array: np.ndarray | None = None, + window_size: int = 0, + index=None, + offset=None, + **kwargs, + ): + super().__init__(index_array, window_size, **kwargs) + self.index = index + self.offset = offset + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + # if windows is variable, default is 'right', otherwise default is 'both' + if closed is None: + closed = "right" if self.index is not None else "both" + + right_closed = closed in ["right", "both"] + left_closed = closed in ["left", "both"] + + if self.index[num_values - 1] < self.index[0]: + index_growth_sign = -1 + else: + index_growth_sign = 1 + + start = np.empty(num_values, dtype="int64") + start.fill(-1) + end = np.empty(num_values, dtype="int64") + end.fill(-1) + + start[0] = 0 + + # right endpoint is closed + if right_closed: + end[0] = 1 + # right endpoint is open + else: + end[0] = 0 + + # start is start of slice interval (including) + # end is end of slice interval (not including) + for i in range(1, num_values): + end_bound = self.index[i] + start_bound = self.index[i] - index_growth_sign * self.offset + + # left endpoint is closed + if left_closed: + start_bound -= Nano(1) + + # advance the start bound until we are + # within the constraint + start[i] = i + for j in range(start[i - 1], i): + if (self.index[j] - start_bound) * index_growth_sign > timedelta(0): + start[i] = j + break + + # end bound is previous end + # or current index + if (self.index[end[i - 1]] - end_bound) * index_growth_sign <= timedelta(0): + end[i] = i + 1 + else: + end[i] = end[i - 1] + + # right endpoint is open + if not right_closed: + end[i] -= 1 + + return start, end + + +class ExpandingIndexer(BaseIndexer): + """Calculate expanding window bounds, mimicking df.expanding()""" + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + return ( + np.zeros(num_values, dtype=np.int64), + np.arange(1, num_values + 1, dtype=np.int64), + ) + + +class FixedForwardWindowIndexer(BaseIndexer): + """ + Creates window boundaries for fixed-length windows that include the + current row. + + Examples + -------- + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + >>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2) + >>> df.rolling(window=indexer, min_periods=1).sum() + B + 0 1.0 + 1 3.0 + 2 2.0 + 3 4.0 + 4 4.0 + """ + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + if center: + raise ValueError("Forward-looking windows can't have center=True") + if closed is not None: + raise ValueError( + "Forward-looking windows don't support setting the closed argument" + ) + + start = np.arange(num_values, dtype="int64") + end = start + self.window_size + if self.window_size: + end[-self.window_size :] = num_values + + return start, end + + +class GroupbyIndexer(BaseIndexer): + """Calculate bounds to compute groupby rolling, mimicking df.groupby().rolling()""" + + def __init__( + self, + index_array: np.ndarray | None = None, + window_size: int | BaseIndexer = 0, + groupby_indices: dict | None = None, + window_indexer: type[BaseIndexer] = BaseIndexer, + indexer_kwargs: dict | None = None, + **kwargs, + ): + """ + Parameters + ---------- + index_array : np.ndarray or None + np.ndarray of the index of the original object that we are performing + a chained groupby operation over. This index has been pre-sorted relative to + the groups + window_size : int or BaseIndexer + window size during the windowing operation + groupby_indices : dict or None + dict of {group label: [positional index of rows belonging to the group]} + window_indexer : BaseIndexer + BaseIndexer class determining the start and end bounds of each group + indexer_kwargs : dict or None + Custom kwargs to be passed to window_indexer + **kwargs : + keyword arguments that will be available when get_window_bounds is called + """ + self.groupby_indices = groupby_indices or {} + self.window_indexer = window_indexer + self.indexer_kwargs = indexer_kwargs.copy() if indexer_kwargs else {} + super().__init__( + index_array=index_array, + window_size=self.indexer_kwargs.pop("window_size", window_size), + **kwargs, + ) + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + # 1) For each group, get the indices that belong to the group + # 2) Use the indices to calculate the start & end bounds of the window + # 3) Append the window bounds in group order + start_arrays = [] + end_arrays = [] + window_indices_start = 0 + for key, indices in self.groupby_indices.items(): + index_array: np.ndarray | None + + if self.index_array is not None: + index_array = self.index_array.take(ensure_platform_int(indices)) + else: + index_array = self.index_array + indexer = self.window_indexer( + index_array=index_array, + window_size=self.window_size, + **self.indexer_kwargs, + ) + start, end = indexer.get_window_bounds( + len(indices), min_periods, center, closed + ) + start = start.astype(np.int64) + end = end.astype(np.int64) + assert len(start) == len( + end + ), "these should be equal in length from get_window_bounds" + # Cannot use groupby_indices as they might not be monotonic with the object + # we're rolling over + window_indices = np.arange( + window_indices_start, window_indices_start + len(indices) + ) + window_indices_start += len(indices) + # Extend as we'll be slicing window like [start, end) + window_indices = np.append(window_indices, [window_indices[-1] + 1]).astype( + np.int64, copy=False + ) + start_arrays.append(window_indices.take(ensure_platform_int(start))) + end_arrays.append(window_indices.take(ensure_platform_int(end))) + start = np.concatenate(start_arrays) + end = np.concatenate(end_arrays) + return start, end + + +class ExponentialMovingWindowIndexer(BaseIndexer): + """Calculate ewm window bounds (the entire window)""" + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + return np.array([0], dtype=np.int64), np.array([num_values], dtype=np.int64) diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexers/utils.py b/.local/lib/python3.8/site-packages/pandas/core/indexers/utils.py new file mode 100644 index 0000000..4192072 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexers/utils.py @@ -0,0 +1,593 @@ +""" +Low-dependency indexing utilities. +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, +) +import warnings + +import numpy as np + +from pandas._typing import ( + AnyArrayLike, + ArrayLike, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_extension_array_dtype, + is_integer, + is_integer_dtype, + is_list_like, +) +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) + +if TYPE_CHECKING: + from pandas.core.frame import DataFrame + from pandas.core.indexes.base import Index + +# ----------------------------------------------------------- +# Indexer Identification + + +def is_valid_positional_slice(slc: slice) -> bool: + """ + Check if a slice object can be interpreted as a positional indexer. + + Parameters + ---------- + slc : slice + + Returns + ------- + bool + + Notes + ----- + A valid positional slice may also be interpreted as a label-based slice + depending on the index being sliced. + """ + + def is_int_or_none(val): + return val is None or is_integer(val) + + return ( + is_int_or_none(slc.start) + and is_int_or_none(slc.stop) + and is_int_or_none(slc.step) + ) + + +def is_list_like_indexer(key) -> bool: + """ + Check if we have a list-like indexer that is *not* a NamedTuple. + + Parameters + ---------- + key : object + + Returns + ------- + bool + """ + # allow a list_like, but exclude NamedTuples which can be indexers + return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) + + +def is_scalar_indexer(indexer, ndim: int) -> bool: + """ + Return True if we are all scalar indexers. + + Parameters + ---------- + indexer : object + ndim : int + Number of dimensions in the object being indexed. + + Returns + ------- + bool + """ + if ndim == 1 and is_integer(indexer): + # GH37748: allow indexer to be an integer for Series + return True + if isinstance(indexer, tuple) and len(indexer) == ndim: + return all(is_integer(x) for x in indexer) + return False + + +def is_empty_indexer(indexer, arr_value: ArrayLike) -> bool: + """ + Check if we have an empty indexer. + + Parameters + ---------- + indexer : object + arr_value : np.ndarray or ExtensionArray + + Returns + ------- + bool + """ + if is_list_like(indexer) and not len(indexer): + return True + if arr_value.ndim == 1: + if not isinstance(indexer, tuple): + indexer = (indexer,) + return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) + return False + + +# ----------------------------------------------------------- +# Indexer Validation + + +def check_setitem_lengths(indexer, value, values) -> bool: + """ + Validate that value and indexer are the same length. + + An special-case is allowed for when the indexer is a boolean array + and the number of true values equals the length of ``value``. In + this case, no exception is raised. + + Parameters + ---------- + indexer : sequence + Key for the setitem. + value : array-like + Value for the setitem. + values : array-like + Values being set into. + + Returns + ------- + bool + Whether this is an empty listlike setting which is a no-op. + + Raises + ------ + ValueError + When the indexer is an ndarray or list and the lengths don't match. + """ + no_op = False + + if isinstance(indexer, (np.ndarray, list)): + # We can ignore other listlikes because they are either + # a) not necessarily 1-D indexers, e.g. tuple + # b) boolean indexers e.g. BoolArray + if is_list_like(value): + if len(indexer) != len(value) and values.ndim == 1: + # boolean with truth values == len of the value is ok too + if isinstance(indexer, list): + indexer = np.array(indexer) + if not ( + isinstance(indexer, np.ndarray) + and indexer.dtype == np.bool_ + and indexer.sum() == len(value) + ): + raise ValueError( + "cannot set using a list-like indexer " + "with a different length than the value" + ) + if not len(indexer): + no_op = True + + elif isinstance(indexer, slice): + if is_list_like(value): + if len(value) != length_of_indexer(indexer, values) and values.ndim == 1: + # In case of two dimensional value is used row-wise and broadcasted + raise ValueError( + "cannot set using a slice indexer with a " + "different length than the value" + ) + if not len(value): + no_op = True + + return no_op + + +def validate_indices(indices: np.ndarray, n: int) -> None: + """ + Perform bounds-checking for an indexer. + + -1 is allowed for indicating missing values. + + Parameters + ---------- + indices : ndarray + n : int + Length of the array being indexed. + + Raises + ------ + ValueError + + Examples + -------- + >>> validate_indices(np.array([1, 2]), 3) # OK + + >>> validate_indices(np.array([1, -2]), 3) + Traceback (most recent call last): + ... + ValueError: negative dimensions are not allowed + + >>> validate_indices(np.array([1, 2, 3]), 3) + Traceback (most recent call last): + ... + IndexError: indices are out-of-bounds + + >>> validate_indices(np.array([-1, -1]), 0) # OK + + >>> validate_indices(np.array([0, 1]), 0) + Traceback (most recent call last): + ... + IndexError: indices are out-of-bounds + """ + if len(indices): + min_idx = indices.min() + if min_idx < -1: + msg = f"'indices' contains values less than allowed ({min_idx} < -1)" + raise ValueError(msg) + + max_idx = indices.max() + if max_idx >= n: + raise IndexError("indices are out-of-bounds") + + +# ----------------------------------------------------------- +# Indexer Conversion + + +def maybe_convert_indices(indices, n: int, verify: bool = True): + """ + Attempt to convert indices into valid, positive indices. + + If we have negative indices, translate to positive here. + If we have indices that are out-of-bounds, raise an IndexError. + + Parameters + ---------- + indices : array-like + Array of indices that we are to convert. + n : int + Number of elements in the array that we are indexing. + verify : bool, default True + Check that all entries are between 0 and n - 1, inclusive. + + Returns + ------- + array-like + An array-like of positive indices that correspond to the ones + that were passed in initially to this function. + + Raises + ------ + IndexError + One of the converted indices either exceeded the number of, + elements (specified by `n`), or was still negative. + """ + if isinstance(indices, list): + indices = np.array(indices) + if len(indices) == 0: + # If `indices` is empty, np.array will return a float, + # and will cause indexing errors. + return np.empty(0, dtype=np.intp) + + mask = indices < 0 + if mask.any(): + indices = indices.copy() + indices[mask] += n + + if verify: + mask = (indices >= n) | (indices < 0) + if mask.any(): + raise IndexError("indices are out-of-bounds") + return indices + + +# ----------------------------------------------------------- +# Unsorted + + +def is_exact_shape_match(target: ArrayLike, value: ArrayLike) -> bool: + """ + Is setting this value into this target overwriting the entire column? + + Parameters + ---------- + target : np.ndarray or ExtensionArray + value : np.ndarray or ExtensionArray + + Returns + ------- + bool + """ + return ( + len(value.shape) > 0 + and len(target.shape) > 0 + and value.shape[0] == target.shape[0] + and value.size == target.size + ) + + +def length_of_indexer(indexer, target=None) -> int: + """ + Return the expected length of target[indexer] + + Returns + ------- + int + """ + if target is not None and isinstance(indexer, slice): + target_len = len(target) + start = indexer.start + stop = indexer.stop + step = indexer.step + if start is None: + start = 0 + elif start < 0: + start += target_len + if stop is None or stop > target_len: + stop = target_len + elif stop < 0: + stop += target_len + if step is None: + step = 1 + elif step < 0: + start, stop = stop + 1, start + 1 + step = -step + return (stop - start + step - 1) // step + elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)): + if isinstance(indexer, list): + indexer = np.array(indexer) + + if indexer.dtype == bool: + # GH#25774 + return indexer.sum() + return len(indexer) + elif isinstance(indexer, range): + return (indexer.stop - indexer.start) // indexer.step + elif not is_list_like_indexer(indexer): + return 1 + raise AssertionError("cannot find the length of the indexer") + + +def deprecate_ndim_indexing(result, stacklevel: int = 3) -> None: + """ + Helper function to raise the deprecation warning for multi-dimensional + indexing on 1D Series/Index. + + GH#27125 indexer like idx[:, None] expands dim, but we cannot do that + and keep an index, so we currently return ndarray, which is deprecated + (Deprecation GH#30588). + """ + if np.ndim(result) > 1: + warnings.warn( + "Support for multi-dimensional indexing (e.g. `obj[:, None]`) " + "is deprecated and will be removed in a future " + "version. Convert to a numpy array before indexing instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + +def unpack_1tuple(tup): + """ + If we have a length-1 tuple/list that contains a slice, unpack to just + the slice. + + Notes + ----- + The list case is deprecated. + """ + if len(tup) == 1 and isinstance(tup[0], slice): + # if we don't have a MultiIndex, we may still be able to handle + # a 1-tuple. see test_1tuple_without_multiindex + + if isinstance(tup, list): + # GH#31299 + warnings.warn( + "Indexing with a single-item list containing a " + "slice is deprecated and will raise in a future " + "version. Pass a tuple instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return tup[0] + return tup + + +def check_key_length(columns: Index, key, value: DataFrame) -> None: + """ + Checks if a key used as indexer has the same length as the columns it is + associated with. + + Parameters + ---------- + columns : Index The columns of the DataFrame to index. + key : A list-like of keys to index with. + value : DataFrame The value to set for the keys. + + Raises + ------ + ValueError: If the length of key is not equal to the number of columns in value + or if the number of columns referenced by key is not equal to number + of columns. + """ + if columns.is_unique: + if len(value.columns) != len(key): + raise ValueError("Columns must be same length as key") + else: + # Missing keys in columns are represented as -1 + if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns): + raise ValueError("Columns must be same length as key") + + +def unpack_tuple_and_ellipses(item: tuple): + """ + Possibly unpack arr[..., n] to arr[n] + """ + if len(item) > 1: + # Note: we are assuming this indexing is being done on a 1D arraylike + if item[0] is Ellipsis: + item = item[1:] + elif item[-1] is Ellipsis: + item = item[:-1] + + if len(item) > 1: + raise IndexError("too many indices for array.") + + item = item[0] + return item + + +# ----------------------------------------------------------- +# Public indexer validation + + +def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: + """ + Check if `indexer` is a valid array indexer for `array`. + + For a boolean mask, `array` and `indexer` are checked to have the same + length. The dtype is validated, and if it is an integer or boolean + ExtensionArray, it is checked if there are missing values present, and + it is converted to the appropriate numpy array. Other dtypes will raise + an error. + + Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed + through as is. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + array : array-like + The array that is being indexed (only used for the length). + indexer : array-like or list-like + The array-like that's used to index. List-like input that is not yet + a numpy array or an ExtensionArray is converted to one. Other input + types are passed through as is. + + Returns + ------- + numpy.ndarray + The validated indexer as a numpy array that can be used to index. + + Raises + ------ + IndexError + When the lengths don't match. + ValueError + When `indexer` cannot be converted to a numpy ndarray to index + (e.g. presence of missing values). + + See Also + -------- + api.types.is_bool_dtype : Check if `key` is of boolean dtype. + + Examples + -------- + When checking a boolean mask, a boolean ndarray is returned when the + arguments are all valid. + + >>> mask = pd.array([True, False]) + >>> arr = pd.array([1, 2]) + >>> pd.api.indexers.check_array_indexer(arr, mask) + array([ True, False]) + + An IndexError is raised when the lengths don't match. + + >>> mask = pd.array([True, False, True]) + >>> pd.api.indexers.check_array_indexer(arr, mask) + Traceback (most recent call last): + ... + IndexError: Boolean index has wrong length: 3 instead of 2. + + NA values in a boolean array are treated as False. + + >>> mask = pd.array([True, pd.NA]) + >>> pd.api.indexers.check_array_indexer(arr, mask) + array([ True, False]) + + A numpy boolean mask will get passed through (if the length is correct): + + >>> mask = np.array([True, False]) + >>> pd.api.indexers.check_array_indexer(arr, mask) + array([ True, False]) + + Similarly for integer indexers, an integer ndarray is returned when it is + a valid indexer, otherwise an error is (for integer indexers, a matching + length is not required): + + >>> indexer = pd.array([0, 2], dtype="Int64") + >>> arr = pd.array([1, 2, 3]) + >>> pd.api.indexers.check_array_indexer(arr, indexer) + array([0, 2]) + + >>> indexer = pd.array([0, pd.NA], dtype="Int64") + >>> pd.api.indexers.check_array_indexer(arr, indexer) + Traceback (most recent call last): + ... + ValueError: Cannot index with an integer indexer containing NA values + + For non-integer/boolean dtypes, an appropriate error is raised: + + >>> indexer = np.array([0., 2.], dtype="float64") + >>> pd.api.indexers.check_array_indexer(arr, indexer) + Traceback (most recent call last): + ... + IndexError: arrays used as indices must be of integer or boolean type + """ + from pandas.core.construction import array as pd_array + + # whatever is not an array-like is returned as-is (possible valid array + # indexers that are not array-like: integer, slice, Ellipsis, None) + # In this context, tuples are not considered as array-like, as they have + # a specific meaning in indexing (multi-dimensional indexing) + if is_list_like(indexer): + if isinstance(indexer, tuple): + return indexer + else: + return indexer + + # convert list-likes to array + if not is_array_like(indexer): + indexer = pd_array(indexer) + if len(indexer) == 0: + # empty list is converted to float array by pd.array + indexer = np.array([], dtype=np.intp) + + dtype = indexer.dtype + if is_bool_dtype(dtype): + if is_extension_array_dtype(dtype): + indexer = indexer.to_numpy(dtype=bool, na_value=False) + else: + indexer = np.asarray(indexer, dtype=bool) + + # GH26658 + if len(indexer) != len(array): + raise IndexError( + f"Boolean index has wrong length: " + f"{len(indexer)} instead of {len(array)}" + ) + elif is_integer_dtype(dtype): + try: + indexer = np.asarray(indexer, dtype=np.intp) + except ValueError as err: + raise ValueError( + "Cannot index with an integer indexer containing NA values" + ) from err + else: + raise IndexError("arrays used as indices must be of integer or boolean type") + + return indexer diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ed5a729 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/accessors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/accessors.cpython-38.pyc new file mode 100644 index 0000000..203d353 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/accessors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/api.cpython-38.pyc new file mode 100644 index 0000000..cb4adbf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/base.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/base.cpython-38.pyc new file mode 100644 index 0000000..13ac2b2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/category.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/category.cpython-38.pyc new file mode 100644 index 0000000..6cdfd14 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/category.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/datetimelike.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/datetimelike.cpython-38.pyc new file mode 100644 index 0000000..a1cf6e2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/datetimelike.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/datetimes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/datetimes.cpython-38.pyc new file mode 100644 index 0000000..ceb390f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/datetimes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/extension.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/extension.cpython-38.pyc new file mode 100644 index 0000000..8d07dd2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/extension.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/frozen.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/frozen.cpython-38.pyc new file mode 100644 index 0000000..c28873e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/frozen.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/interval.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/interval.cpython-38.pyc new file mode 100644 index 0000000..d554661 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/interval.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/multi.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/multi.cpython-38.pyc new file mode 100644 index 0000000..cf8fd7b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/multi.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/numeric.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/numeric.cpython-38.pyc new file mode 100644 index 0000000..9257731 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/numeric.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/period.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/period.cpython-38.pyc new file mode 100644 index 0000000..b6aee32 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/period.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/range.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/range.cpython-38.pyc new file mode 100644 index 0000000..b47fe9b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/range.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/timedeltas.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/timedeltas.cpython-38.pyc new file mode 100644 index 0000000..1e921e4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/indexes/__pycache__/timedeltas.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/accessors.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/accessors.py new file mode 100644 index 0000000..8c2813f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexes/accessors.py @@ -0,0 +1,509 @@ +""" +datetimelike delegation +""" +from __future__ import annotations + +from typing import TYPE_CHECKING +import warnings + +import numpy as np + +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_integer_dtype, + is_list_like, + is_period_dtype, + is_timedelta64_dtype, +) +from pandas.core.dtypes.generic import ABCSeries + +from pandas.core.accessor import ( + PandasDelegate, + delegate_names, +) +from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.base import ( + NoNewAttributesMixin, + PandasObject, +) +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex + +if TYPE_CHECKING: + from pandas import Series + + +class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin): + _hidden_attrs = PandasObject._hidden_attrs | { + "orig", + "name", + } + + def __init__(self, data: Series, orig): + if not isinstance(data, ABCSeries): + raise TypeError( + f"cannot convert an object of type {type(data)} to a datetimelike index" + ) + + self._parent = data + self.orig = orig + self.name = getattr(data, "name", None) + self._freeze() + + def _get_values(self): + data = self._parent + if is_datetime64_dtype(data.dtype): + return DatetimeIndex(data, copy=False, name=self.name) + + elif is_datetime64tz_dtype(data.dtype): + return DatetimeIndex(data, copy=False, name=self.name) + + elif is_timedelta64_dtype(data.dtype): + return TimedeltaIndex(data, copy=False, name=self.name) + + elif is_period_dtype(data.dtype): + return PeriodArray(data, copy=False) + + raise TypeError( + f"cannot convert an object of type {type(data)} to a datetimelike index" + ) + + def _delegate_property_get(self, name): + from pandas import Series + + values = self._get_values() + + result = getattr(values, name) + + # maybe need to upcast (ints) + if isinstance(result, np.ndarray): + if is_integer_dtype(result): + result = result.astype("int64") + elif not is_list_like(result): + return result + + result = np.asarray(result) + + if self.orig is not None: + index = self.orig.index + else: + index = self._parent.index + # return the result as a Series, which is by definition a copy + result = Series(result, index=index, name=self.name).__finalize__(self._parent) + + # setting this object will show a SettingWithCopyWarning/Error + result._is_copy = ( + "modifications to a property of a datetimelike " + "object are not supported and are discarded. " + "Change values on the original." + ) + + return result + + def _delegate_property_set(self, name, value, *args, **kwargs): + raise ValueError( + "modifications to a property of a datetimelike object are not supported. " + "Change values on the original." + ) + + def _delegate_method(self, name, *args, **kwargs): + from pandas import Series + + values = self._get_values() + + method = getattr(values, name) + result = method(*args, **kwargs) + + if not is_list_like(result): + return result + + result = Series(result, index=self._parent.index, name=self.name).__finalize__( + self._parent + ) + + # setting this object will show a SettingWithCopyWarning/Error + result._is_copy = ( + "modifications to a method of a datetimelike " + "object are not supported and are discarded. " + "Change values on the original." + ) + + return result + + +@delegate_names( + delegate=DatetimeArray, accessors=DatetimeArray._datetimelike_ops, typ="property" +) +@delegate_names( + delegate=DatetimeArray, accessors=DatetimeArray._datetimelike_methods, typ="method" +) +class DatetimeProperties(Properties): + """ + Accessor object for datetimelike properties of the Series values. + + Examples + -------- + >>> seconds_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="s")) + >>> seconds_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 00:00:01 + 2 2000-01-01 00:00:02 + dtype: datetime64[ns] + >>> seconds_series.dt.second + 0 0 + 1 1 + 2 2 + dtype: int64 + + >>> hours_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="h")) + >>> hours_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 01:00:00 + 2 2000-01-01 02:00:00 + dtype: datetime64[ns] + >>> hours_series.dt.hour + 0 0 + 1 1 + 2 2 + dtype: int64 + + >>> quarters_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="q")) + >>> quarters_series + 0 2000-03-31 + 1 2000-06-30 + 2 2000-09-30 + dtype: datetime64[ns] + >>> quarters_series.dt.quarter + 0 1 + 1 2 + 2 3 + dtype: int64 + + Returns a Series indexed like the original Series. + Raises TypeError if the Series does not contain datetimelike values. + """ + + def to_pydatetime(self) -> np.ndarray: + """ + Return the data as an array of :class:`datetime.datetime` objects. + + Timezone information is retained if present. + + .. warning:: + + Python's datetime uses microsecond resolution, which is lower than + pandas (nanosecond). The values are truncated. + + Returns + ------- + numpy.ndarray + Object dtype array containing native Python datetime objects. + + See Also + -------- + datetime.datetime : Standard library value for a datetime. + + Examples + -------- + >>> s = pd.Series(pd.date_range('20180310', periods=2)) + >>> s + 0 2018-03-10 + 1 2018-03-11 + dtype: datetime64[ns] + + >>> s.dt.to_pydatetime() + array([datetime.datetime(2018, 3, 10, 0, 0), + datetime.datetime(2018, 3, 11, 0, 0)], dtype=object) + + pandas' nanosecond precision is truncated to microseconds. + + >>> s = pd.Series(pd.date_range('20180310', periods=2, freq='ns')) + >>> s + 0 2018-03-10 00:00:00.000000000 + 1 2018-03-10 00:00:00.000000001 + dtype: datetime64[ns] + + >>> s.dt.to_pydatetime() + array([datetime.datetime(2018, 3, 10, 0, 0), + datetime.datetime(2018, 3, 10, 0, 0)], dtype=object) + """ + return self._get_values().to_pydatetime() + + @property + def freq(self): + return self._get_values().inferred_freq + + def isocalendar(self): + """ + Returns a DataFrame with the year, week, and day calculated according to + the ISO 8601 standard. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame + with columns year, week and day + + See Also + -------- + Timestamp.isocalendar : Function return a 3-tuple containing ISO year, + week number, and weekday for the given Timestamp object. + datetime.date.isocalendar : Return a named tuple object with + three components: year, week and weekday. + + Examples + -------- + >>> ser = pd.to_datetime(pd.Series(["2010-01-01", pd.NaT])) + >>> ser.dt.isocalendar() + year week day + 0 2009 53 5 + 1 + >>> ser.dt.isocalendar().week + 0 53 + 1 + Name: week, dtype: UInt32 + """ + return self._get_values().isocalendar().set_index(self._parent.index) + + @property + def weekofyear(self): + """ + The week ordinal of the year. + + .. deprecated:: 1.1.0 + + Series.dt.weekofyear and Series.dt.week have been deprecated. + Please use Series.dt.isocalendar().week instead. + """ + warnings.warn( + "Series.dt.weekofyear and Series.dt.week have been deprecated. " + "Please use Series.dt.isocalendar().week instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + week_series = self.isocalendar().week + week_series.name = self.name + if week_series.hasnans: + return week_series.astype("float64") + return week_series.astype("int64") + + week = weekofyear + + +@delegate_names( + delegate=TimedeltaArray, accessors=TimedeltaArray._datetimelike_ops, typ="property" +) +@delegate_names( + delegate=TimedeltaArray, + accessors=TimedeltaArray._datetimelike_methods, + typ="method", +) +class TimedeltaProperties(Properties): + """ + Accessor object for datetimelike properties of the Series values. + + Returns a Series indexed like the original Series. + Raises TypeError if the Series does not contain datetimelike values. + + Examples + -------- + >>> seconds_series = pd.Series( + ... pd.timedelta_range(start="1 second", periods=3, freq="S") + ... ) + >>> seconds_series + 0 0 days 00:00:01 + 1 0 days 00:00:02 + 2 0 days 00:00:03 + dtype: timedelta64[ns] + >>> seconds_series.dt.seconds + 0 1 + 1 2 + 2 3 + dtype: int64 + """ + + def to_pytimedelta(self) -> np.ndarray: + """ + Return an array of native :class:`datetime.timedelta` objects. + + Python's standard `datetime` library uses a different representation + timedelta's. This method converts a Series of pandas Timedeltas + to `datetime.timedelta` format with the same length as the original + Series. + + Returns + ------- + numpy.ndarray + Array of 1D containing data with `datetime.timedelta` type. + + See Also + -------- + datetime.timedelta : A duration expressing the difference + between two date, time, or datetime. + + Examples + -------- + >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="d")) + >>> s + 0 0 days + 1 1 days + 2 2 days + 3 3 days + 4 4 days + dtype: timedelta64[ns] + + >>> s.dt.to_pytimedelta() + array([datetime.timedelta(0), datetime.timedelta(days=1), + datetime.timedelta(days=2), datetime.timedelta(days=3), + datetime.timedelta(days=4)], dtype=object) + """ + return self._get_values().to_pytimedelta() + + @property + def components(self): + """ + Return a Dataframe of the components of the Timedeltas. + + Returns + ------- + DataFrame + + Examples + -------- + >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='s')) + >>> s + 0 0 days 00:00:00 + 1 0 days 00:00:01 + 2 0 days 00:00:02 + 3 0 days 00:00:03 + 4 0 days 00:00:04 + dtype: timedelta64[ns] + >>> s.dt.components + days hours minutes seconds milliseconds microseconds nanoseconds + 0 0 0 0 0 0 0 0 + 1 0 0 0 1 0 0 0 + 2 0 0 0 2 0 0 0 + 3 0 0 0 3 0 0 0 + 4 0 0 0 4 0 0 0 + """ + return ( + self._get_values() + .components.set_index(self._parent.index) + .__finalize__(self._parent) + ) + + @property + def freq(self): + return self._get_values().inferred_freq + + +@delegate_names( + delegate=PeriodArray, accessors=PeriodArray._datetimelike_ops, typ="property" +) +@delegate_names( + delegate=PeriodArray, accessors=PeriodArray._datetimelike_methods, typ="method" +) +class PeriodProperties(Properties): + """ + Accessor object for datetimelike properties of the Series values. + + Returns a Series indexed like the original Series. + Raises TypeError if the Series does not contain datetimelike values. + + Examples + -------- + >>> seconds_series = pd.Series( + ... pd.period_range( + ... start="2000-01-01 00:00:00", end="2000-01-01 00:00:03", freq="s" + ... ) + ... ) + >>> seconds_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 00:00:01 + 2 2000-01-01 00:00:02 + 3 2000-01-01 00:00:03 + dtype: period[S] + >>> seconds_series.dt.second + 0 0 + 1 1 + 2 2 + 3 3 + dtype: int64 + + >>> hours_series = pd.Series( + ... pd.period_range(start="2000-01-01 00:00", end="2000-01-01 03:00", freq="h") + ... ) + >>> hours_series + 0 2000-01-01 00:00 + 1 2000-01-01 01:00 + 2 2000-01-01 02:00 + 3 2000-01-01 03:00 + dtype: period[H] + >>> hours_series.dt.hour + 0 0 + 1 1 + 2 2 + 3 3 + dtype: int64 + + >>> quarters_series = pd.Series( + ... pd.period_range(start="2000-01-01", end="2000-12-31", freq="Q-DEC") + ... ) + >>> quarters_series + 0 2000Q1 + 1 2000Q2 + 2 2000Q3 + 3 2000Q4 + dtype: period[Q-DEC] + >>> quarters_series.dt.quarter + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + """ + + +class CombinedDatetimelikeProperties( + DatetimeProperties, TimedeltaProperties, PeriodProperties +): + def __new__(cls, data: Series): + # CombinedDatetimelikeProperties isn't really instantiated. Instead + # we need to choose which parent (datetime or timedelta) is + # appropriate. Since we're checking the dtypes anyway, we'll just + # do all the validation here. + + if not isinstance(data, ABCSeries): + raise TypeError( + f"cannot convert an object of type {type(data)} to a datetimelike index" + ) + + orig = data if is_categorical_dtype(data.dtype) else None + if orig is not None: + data = data._constructor( + orig.array, + name=orig.name, + copy=False, + dtype=orig._values.categories.dtype, + index=orig.index, + ) + + if is_datetime64_dtype(data.dtype): + return DatetimeProperties(data, orig) + elif is_datetime64tz_dtype(data.dtype): + return DatetimeProperties(data, orig) + elif is_timedelta64_dtype(data.dtype): + return TimedeltaProperties(data, orig) + elif is_period_dtype(data.dtype): + return PeriodProperties(data, orig) + + raise AttributeError("Can only use .dt accessor with datetimelike values") diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/api.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/api.py new file mode 100644 index 0000000..922c344 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexes/api.py @@ -0,0 +1,324 @@ +from __future__ import annotations + +import textwrap + +from pandas._libs import ( + NaT, + lib, +) +from pandas.errors import InvalidIndexError + +from pandas.core.dtypes.common import is_dtype_equal + +from pandas.core.indexes.base import ( + Index, + _new_Index, + ensure_index, + ensure_index_from_sequences, + get_unanimous_names, +) +from pandas.core.indexes.category import CategoricalIndex +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.interval import IntervalIndex +from pandas.core.indexes.multi import MultiIndex +from pandas.core.indexes.numeric import ( + Float64Index, + Int64Index, + NumericIndex, + UInt64Index, +) +from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexes.range import RangeIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex + +_sort_msg = textwrap.dedent( + """\ +Sorting because non-concatenation axis is not aligned. A future version +of pandas will change to not sort by default. + +To accept the future behavior, pass 'sort=False'. + +To retain the current behavior and silence the warning, pass 'sort=True'. +""" +) + + +__all__ = [ + "Index", + "MultiIndex", + "NumericIndex", + "Float64Index", + "Int64Index", + "CategoricalIndex", + "IntervalIndex", + "RangeIndex", + "UInt64Index", + "InvalidIndexError", + "TimedeltaIndex", + "PeriodIndex", + "DatetimeIndex", + "_new_Index", + "NaT", + "ensure_index", + "ensure_index_from_sequences", + "get_objs_combined_axis", + "union_indexes", + "get_unanimous_names", + "all_indexes_same", + "default_index", +] + + +def get_objs_combined_axis( + objs, intersect: bool = False, axis=0, sort: bool = True, copy: bool = False +) -> Index: + """ + Extract combined index: return intersection or union (depending on the + value of "intersect") of indexes on given axis, or None if all objects + lack indexes (e.g. they are numpy arrays). + + Parameters + ---------- + objs : list + Series or DataFrame objects, may be mix of the two. + intersect : bool, default False + If True, calculate the intersection between indexes. Otherwise, + calculate the union. + axis : {0 or 'index', 1 or 'outer'}, default 0 + The axis to extract indexes from. + sort : bool, default True + Whether the result index should come out sorted or not. + copy : bool, default False + If True, return a copy of the combined index. + + Returns + ------- + Index + """ + obs_idxes = [obj._get_axis(axis) for obj in objs] + return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy) + + +def _get_distinct_objs(objs: list[Index]) -> list[Index]: + """ + Return a list with distinct elements of "objs" (different ids). + Preserves order. + """ + ids: set[int] = set() + res = [] + for obj in objs: + if id(obj) not in ids: + ids.add(id(obj)) + res.append(obj) + return res + + +def _get_combined_index( + indexes: list[Index], + intersect: bool = False, + sort: bool = False, + copy: bool = False, +) -> Index: + """ + Return the union or intersection of indexes. + + Parameters + ---------- + indexes : list of Index or list objects + When intersect=True, do not accept list of lists. + intersect : bool, default False + If True, calculate the intersection between indexes. Otherwise, + calculate the union. + sort : bool, default False + Whether the result index should come out sorted or not. + copy : bool, default False + If True, return a copy of the combined index. + + Returns + ------- + Index + """ + # TODO: handle index names! + indexes = _get_distinct_objs(indexes) + if len(indexes) == 0: + index = Index([]) + elif len(indexes) == 1: + index = indexes[0] + elif intersect: + index = indexes[0] + for other in indexes[1:]: + index = index.intersection(other) + else: + index = union_indexes(indexes, sort=False) + index = ensure_index(index) + + if sort: + try: + index = index.sort_values() + except TypeError: + pass + + # GH 29879 + if copy: + index = index.copy() + + return index + + +def union_indexes(indexes, sort: bool | None = True) -> Index: + """ + Return the union of indexes. + + The behavior of sort and names is not consistent. + + Parameters + ---------- + indexes : list of Index or list objects + sort : bool, default True + Whether the result index should come out sorted or not. + + Returns + ------- + Index + """ + if len(indexes) == 0: + raise AssertionError("Must have at least 1 Index to union") + if len(indexes) == 1: + result = indexes[0] + if isinstance(result, list): + result = Index(sorted(result)) + return result + + indexes, kind = _sanitize_and_check(indexes) + + def _unique_indices(inds) -> Index: + """ + Convert indexes to lists and concatenate them, removing duplicates. + + The final dtype is inferred. + + Parameters + ---------- + inds : list of Index or list objects + + Returns + ------- + Index + """ + + def conv(i): + if isinstance(i, Index): + i = i.tolist() + return i + + return Index(lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort)) + + if kind == "special": + result = indexes[0] + first = result + + dtis = [x for x in indexes if isinstance(x, DatetimeIndex)] + dti_tzs = [x for x in dtis if x.tz is not None] + if len(dti_tzs) not in [0, len(dtis)]: + # TODO: this behavior is not tested (so may not be desired), + # but is kept in order to keep behavior the same when + # deprecating union_many + # test_frame_from_dict_with_mixed_indexes + raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") + + if len(dtis) == len(indexes): + sort = True + if not all(is_dtype_equal(x.dtype, first.dtype) for x in indexes): + # i.e. timezones mismatch + # TODO(2.0): once deprecation is enforced, this union will + # cast to UTC automatically. + indexes = [x.tz_convert("UTC") for x in indexes] + + result = indexes[0] + + elif len(dtis) > 1: + # If we have mixed timezones, our casting behavior may depend on + # the order of indexes, which we don't want. + sort = False + + # TODO: what about Categorical[dt64]? + # test_frame_from_dict_with_mixed_indexes + indexes = [x.astype(object, copy=False) for x in indexes] + result = indexes[0] + + for other in indexes[1:]: + result = result.union(other, sort=None if sort else False) + return result + + elif kind == "array": + index = indexes[0] + if not all(index.equals(other) for other in indexes[1:]): + index = _unique_indices(indexes) + + name = get_unanimous_names(*indexes)[0] + if name != index.name: + index = index.rename(name) + return index + else: # kind='list' + return _unique_indices(indexes) + + +def _sanitize_and_check(indexes): + """ + Verify the type of indexes and convert lists to Index. + + Cases: + + - [list, list, ...]: Return ([list, list, ...], 'list') + - [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...]) + Lists are sorted and converted to Index. + - [Index, Index, ...]: Return ([Index, Index, ...], TYPE) + TYPE = 'special' if at least one special type, 'array' otherwise. + + Parameters + ---------- + indexes : list of Index or list objects + + Returns + ------- + sanitized_indexes : list of Index or list objects + type : {'list', 'array', 'special'} + """ + kinds = list({type(index) for index in indexes}) + + if list in kinds: + if len(kinds) > 1: + indexes = [ + Index(list(x)) if not isinstance(x, Index) else x for x in indexes + ] + kinds.remove(list) + else: + return indexes, "list" + + if len(kinds) > 1 or Index not in kinds: + return indexes, "special" + else: + return indexes, "array" + + +def all_indexes_same(indexes) -> bool: + """ + Determine if all indexes contain the same elements. + + Parameters + ---------- + indexes : iterable of Index objects + + Returns + ------- + bool + True if all indexes contain the same elements, False otherwise. + """ + itr = iter(indexes) + first = next(itr) + return all(first.equals(index) for index in itr) + + +def default_index(n: int) -> RangeIndex: + rng = range(0, n) + return RangeIndex._simple_new(rng, name=None) diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py new file mode 100644 index 0000000..2abd649 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py @@ -0,0 +1,7201 @@ +from __future__ import annotations + +from datetime import datetime +import functools +from itertools import zip_longest +import operator +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, + Literal, + Sequence, + TypeVar, + cast, + final, + overload, +) +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import ( + NaT, + algos as libalgos, + index as libindex, + lib, +) +import pandas._libs.join as libjoin +from pandas._libs.lib import ( + is_datetime_array, + no_default, +) +from pandas._libs.missing import is_float_nan +from pandas._libs.tslibs import ( + IncompatibleFrequency, + OutOfBoundsDatetime, + Timestamp, + tz_compare, +) +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + Dtype, + DtypeObj, + F, + Shape, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import ( + DuplicateLabelError, + InvalidIndexError, +) +from pandas.util._decorators import ( + Appender, + cache_readonly, + deprecate_nonkeyword_arguments, + doc, +) +from pandas.util._exceptions import ( + find_stack_level, + rewrite_exception, +) + +from pandas.core.dtypes.cast import ( + can_hold_element, + find_common_type, + infer_dtype_from, + maybe_cast_pointwise_result, +) +from pandas.core.dtypes.common import ( + ensure_int64, + ensure_object, + ensure_platform_int, + is_bool_dtype, + is_categorical_dtype, + is_dtype_equal, + is_ea_or_datetimelike_dtype, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_hashable, + is_integer, + is_interval_dtype, + is_iterator, + is_list_like, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_signed_integer_dtype, + is_unsigned_integer_dtype, + needs_i8_conversion, + pandas_dtype, + validate_all_hashable, +) +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + ExtensionDtype, + IntervalDtype, + PandasDtype, + PeriodDtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCDatetimeIndex, + ABCMultiIndex, + ABCPeriodIndex, + ABCSeries, + ABCTimedeltaIndex, +) +from pandas.core.dtypes.inference import is_dict_like +from pandas.core.dtypes.missing import ( + array_equivalent, + is_valid_na_for_dtype, + isna, +) + +from pandas.core import ( + arraylike, + missing, + ops, +) +from pandas.core.accessor import CachedAccessor +import pandas.core.algorithms as algos +from pandas.core.array_algos.putmask import ( + setitem_datetimelike_compat, + validate_putmask, +) +from pandas.core.arrays import ( + Categorical, + ExtensionArray, +) +from pandas.core.arrays.datetimes import ( + tz_to_dtype, + validate_tz_from_dtype, +) +from pandas.core.arrays.masked import BaseMaskedArray +from pandas.core.arrays.sparse import SparseDtype +from pandas.core.base import ( + IndexOpsMixin, + PandasObject, +) +import pandas.core.common as com +from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, + extract_array, + sanitize_array, +) +from pandas.core.indexers import deprecate_ndim_indexing +from pandas.core.indexes.frozen import FrozenList +from pandas.core.ops import get_op_result_name +from pandas.core.ops.invalid import make_invalid_op +from pandas.core.sorting import ( + ensure_key_mapped, + get_group_index_sorter, + nargsort, +) +from pandas.core.strings import StringMethods + +from pandas.io.formats.printing import ( + PrettyDict, + default_pprint, + format_object_summary, + pprint_thing, +) + +if TYPE_CHECKING: + from pandas import ( + CategoricalIndex, + DataFrame, + IntervalIndex, + MultiIndex, + Series, + ) + from pandas.core.arrays import PeriodArray + + +__all__ = ["Index"] + +_unsortable_types = frozenset(("mixed", "mixed-integer")) + +_index_doc_kwargs: dict[str, str] = { + "klass": "Index", + "inplace": "", + "target_klass": "Index", + "raises_section": "", + "unique": "Index", + "duplicated": "np.ndarray", +} +_index_shared_docs: dict[str, str] = {} +str_t = str + + +_dtype_obj = np.dtype("object") + + +def _maybe_return_indexers(meth: F) -> F: + """ + Decorator to simplify 'return_indexers' checks in Index.join. + """ + + @functools.wraps(meth) + def join( + self, + other, + how: str_t = "left", + level=None, + return_indexers: bool = False, + sort: bool = False, + ): + join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort) + if not return_indexers: + return join_index + + if lidx is not None: + lidx = ensure_platform_int(lidx) + if ridx is not None: + ridx = ensure_platform_int(ridx) + return join_index, lidx, ridx + + return cast(F, join) + + +def disallow_kwargs(kwargs: dict[str, Any]) -> None: + if kwargs: + raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") + + +def _new_Index(cls, d): + """ + This is called upon unpickling, rather than the default which doesn't + have arguments and breaks __new__. + """ + # required for backward compat, because PI can't be instantiated with + # ordinals through __new__ GH #13277 + if issubclass(cls, ABCPeriodIndex): + from pandas.core.indexes.period import _new_PeriodIndex + + return _new_PeriodIndex(cls, **d) + + if issubclass(cls, ABCMultiIndex): + if "labels" in d and "codes" not in d: + # GH#23752 "labels" kwarg has been replaced with "codes" + d["codes"] = d.pop("labels") + + elif "dtype" not in d and "data" in d: + # Prevent Index.__new__ from conducting inference; + # "data" key not in RangeIndex + d["dtype"] = d["data"].dtype + return cls.__new__(cls, **d) + + +_IndexT = TypeVar("_IndexT", bound="Index") + + +class Index(IndexOpsMixin, PandasObject): + """ + Immutable sequence used for indexing and alignment. The basic object + storing axis labels for all pandas objects. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype (default: object) + If dtype is None, we find the dtype that best fits the data. + If an actual dtype is provided, we coerce to that dtype if it's safe. + Otherwise, an error will be raised. + copy : bool + Make a copy of input ndarray. + name : object + Name to be stored in the index. + tupleize_cols : bool (default: True) + When True, attempt to create a MultiIndex if possible. + + See Also + -------- + RangeIndex : Index implementing a monotonic integer range. + CategoricalIndex : Index of :class:`Categorical` s. + MultiIndex : A multi-level, or hierarchical Index. + IntervalIndex : An Index of :class:`Interval` s. + DatetimeIndex : Index of datetime64 data. + TimedeltaIndex : Index of timedelta64 data. + PeriodIndex : Index of Period data. + NumericIndex : Index of numpy int/uint/float data. + Int64Index : Index of purely int64 labels (deprecated). + UInt64Index : Index of purely uint64 labels (deprecated). + Float64Index : Index of purely float64 labels (deprecated). + + Notes + ----- + An Index instance can **only** contain hashable objects + + Examples + -------- + >>> pd.Index([1, 2, 3]) + Int64Index([1, 2, 3], dtype='int64') + + >>> pd.Index(list('abc')) + Index(['a', 'b', 'c'], dtype='object') + """ + + # tolist is not actually deprecated, just suppressed in the __dir__ + _hidden_attrs: frozenset[str] = ( + PandasObject._hidden_attrs + | IndexOpsMixin._hidden_attrs + | frozenset(["contains", "set_value"]) + ) + + # To hand over control to subclasses + _join_precedence = 1 + + # Cython methods; see github.com/cython/cython/issues/2647 + # for why we need to wrap these instead of making them class attributes + # Moreover, cython will choose the appropriate-dtyped sub-function + # given the dtypes of the passed arguments + + @final + def _left_indexer_unique(self: _IndexT, other: _IndexT) -> npt.NDArray[np.intp]: + # Caller is responsible for ensuring other.dtype == self.dtype + sv = self._get_engine_target() + ov = other._get_engine_target() + return libjoin.left_join_indexer_unique(sv, ov) + + @final + def _left_indexer( + self: _IndexT, other: _IndexT + ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: + # Caller is responsible for ensuring other.dtype == self.dtype + sv = self._get_engine_target() + ov = other._get_engine_target() + joined_ndarray, lidx, ridx = libjoin.left_join_indexer(sv, ov) + joined = self._from_join_target(joined_ndarray) + return joined, lidx, ridx + + @final + def _inner_indexer( + self: _IndexT, other: _IndexT + ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: + # Caller is responsible for ensuring other.dtype == self.dtype + sv = self._get_engine_target() + ov = other._get_engine_target() + joined_ndarray, lidx, ridx = libjoin.inner_join_indexer(sv, ov) + joined = self._from_join_target(joined_ndarray) + return joined, lidx, ridx + + @final + def _outer_indexer( + self: _IndexT, other: _IndexT + ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: + # Caller is responsible for ensuring other.dtype == self.dtype + sv = self._get_engine_target() + ov = other._get_engine_target() + joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov) + joined = self._from_join_target(joined_ndarray) + return joined, lidx, ridx + + _typ: str = "index" + _data: ExtensionArray | np.ndarray + _data_cls: type[ExtensionArray] | tuple[type[np.ndarray], type[ExtensionArray]] = ( + np.ndarray, + ExtensionArray, + ) + _id: object | None = None + _name: Hashable = None + # MultiIndex.levels previously allowed setting the index name. We + # don't allow this anymore, and raise if it happens rather than + # failing silently. + _no_setting_name: bool = False + _comparables: list[str] = ["name"] + _attributes: list[str] = ["name"] + _is_numeric_dtype: bool = False + _can_hold_na: bool = True + _can_hold_strings: bool = True + + # Whether this index is a NumericIndex, but not a Int64Index, Float64Index, + # UInt64Index or RangeIndex. Needed for backwards compat. Remove this attribute and + # associated code in pandas 2.0. + _is_backward_compat_public_numeric_index: bool = False + + _engine_type: type[libindex.IndexEngine] = libindex.ObjectEngine + # whether we support partial string indexing. Overridden + # in DatetimeIndex and PeriodIndex + _supports_partial_string_indexing = False + + _accessors = {"str"} + + str = CachedAccessor("str", StringMethods) + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs + ) -> Index: + + if kwargs: + warnings.warn( + "Passing keywords other than 'data', 'dtype', 'copy', 'name', " + "'tupleize_cols' is deprecated and will raise TypeError in a " + "future version. Use the specific Index subclass directly instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + from pandas.core.arrays import PandasArray + from pandas.core.indexes.range import RangeIndex + + name = maybe_extract_name(name, data, cls) + + if dtype is not None: + dtype = pandas_dtype(dtype) + if "tz" in kwargs: + tz = kwargs.pop("tz") + validate_tz_from_dtype(dtype, tz) + dtype = tz_to_dtype(tz) + + if type(data) is PandasArray: + # ensure users don't accidentally put a PandasArray in an index, + # but don't unpack StringArray + data = data.to_numpy() + if isinstance(dtype, PandasDtype): + dtype = dtype.numpy_dtype + + data_dtype = getattr(data, "dtype", None) + + # range + if isinstance(data, (range, RangeIndex)): + result = RangeIndex(start=data, copy=copy, name=name) + if dtype is not None: + return result.astype(dtype, copy=False) + return result + + elif is_ea_or_datetimelike_dtype(dtype): + # non-EA dtype indexes have special casting logic, so we punt here + klass = cls._dtype_to_subclass(dtype) + if klass is not Index: + return klass(data, dtype=dtype, copy=copy, name=name, **kwargs) + + ea_cls = dtype.construct_array_type() + data = ea_cls._from_sequence(data, dtype=dtype, copy=copy) + disallow_kwargs(kwargs) + return Index._simple_new(data, name=name) + + elif is_ea_or_datetimelike_dtype(data_dtype): + data_dtype = cast(DtypeObj, data_dtype) + klass = cls._dtype_to_subclass(data_dtype) + if klass is not Index: + result = klass(data, copy=copy, name=name, **kwargs) + if dtype is not None: + return result.astype(dtype, copy=False) + return result + elif dtype is not None: + # GH#45206 + data = data.astype(dtype, copy=False) + + disallow_kwargs(kwargs) + data = extract_array(data, extract_numpy=True) + return Index._simple_new(data, name=name) + + # index-like + elif ( + isinstance(data, Index) + and data._is_backward_compat_public_numeric_index + and dtype is None + ): + return data._constructor(data, name=name, copy=copy) + elif isinstance(data, (np.ndarray, Index, ABCSeries)): + + if isinstance(data, ABCMultiIndex): + data = data._values + + if dtype is not None: + # we need to avoid having numpy coerce + # things that look like ints/floats to ints unless + # they are actually ints, e.g. '0' and 0.0 + # should not be coerced + # GH 11836 + data = sanitize_array(data, None, dtype=dtype, copy=copy) + + dtype = data.dtype + + if data.dtype.kind in ["i", "u", "f"]: + # maybe coerce to a sub-class + arr = data + else: + arr = com.asarray_tuplesafe(data, dtype=_dtype_obj) + + if dtype is None: + arr = _maybe_cast_data_without_dtype( + arr, cast_numeric_deprecated=True + ) + dtype = arr.dtype + + if kwargs: + return cls(arr, dtype, copy=copy, name=name, **kwargs) + + klass = cls._dtype_to_subclass(arr.dtype) + arr = klass._ensure_array(arr, dtype, copy) + disallow_kwargs(kwargs) + return klass._simple_new(arr, name) + + elif is_scalar(data): + raise cls._scalar_data_error(data) + elif hasattr(data, "__array__"): + return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) + else: + + if tupleize_cols and is_list_like(data): + # GH21470: convert iterable to list before determining if empty + if is_iterator(data): + data = list(data) + + if data and all(isinstance(e, tuple) for e in data): + # we must be all tuples, otherwise don't construct + # 10697 + from pandas.core.indexes.multi import MultiIndex + + return MultiIndex.from_tuples( + data, names=name or kwargs.get("names") + ) + # other iterable of some kind + + subarr = com.asarray_tuplesafe(data, dtype=_dtype_obj) + if dtype is None: + # with e.g. a list [1, 2, 3] casting to numeric is _not_ deprecated + # error: Incompatible types in assignment (expression has type + # "Union[ExtensionArray, ndarray[Any, Any]]", variable has type + # "ndarray[Any, Any]") + subarr = _maybe_cast_data_without_dtype( # type: ignore[assignment] + subarr, cast_numeric_deprecated=False + ) + dtype = subarr.dtype + return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) + + @classmethod + def _ensure_array(cls, data, dtype, copy: bool): + """ + Ensure we have a valid array to pass to _simple_new. + """ + if data.ndim > 1: + # GH#13601, GH#20285, GH#27125 + raise ValueError("Index data must be 1-dimensional") + if copy: + # asarray_tuplesafe does not always copy underlying data, + # so need to make sure that this happens + data = data.copy() + return data + + @final + @classmethod + def _dtype_to_subclass(cls, dtype: DtypeObj): + # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 + + if isinstance(dtype, ExtensionDtype): + if isinstance(dtype, DatetimeTZDtype): + from pandas import DatetimeIndex + + return DatetimeIndex + elif isinstance(dtype, CategoricalDtype): + from pandas import CategoricalIndex + + return CategoricalIndex + elif isinstance(dtype, IntervalDtype): + from pandas import IntervalIndex + + return IntervalIndex + elif isinstance(dtype, PeriodDtype): + from pandas import PeriodIndex + + return PeriodIndex + + elif isinstance(dtype, SparseDtype): + warnings.warn( + "In a future version, passing a SparseArray to pd.Index " + "will store that array directly instead of converting to a " + "dense numpy ndarray. To retain the old behavior, use " + "pd.Index(arr.to_numpy()) instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + return cls._dtype_to_subclass(dtype.subtype) + + return Index + + if dtype.kind == "M": + from pandas import DatetimeIndex + + return DatetimeIndex + + elif dtype.kind == "m": + from pandas import TimedeltaIndex + + return TimedeltaIndex + + elif is_float_dtype(dtype): + from pandas.core.api import Float64Index + + return Float64Index + elif is_unsigned_integer_dtype(dtype): + from pandas.core.api import UInt64Index + + return UInt64Index + elif is_signed_integer_dtype(dtype): + from pandas.core.api import Int64Index + + return Int64Index + + elif dtype == _dtype_obj: + # NB: assuming away MultiIndex + return Index + + elif issubclass(dtype.type, (str, bool, np.bool_)): + return Index + + raise NotImplementedError(dtype) + + """ + NOTE for new Index creation: + + - _simple_new: It returns new Index with the same type as the caller. + All metadata (such as name) must be provided by caller's responsibility. + Using _shallow_copy is recommended because it fills these metadata + otherwise specified. + + - _shallow_copy: It returns new Index with the same type (using + _simple_new), but fills caller's metadata otherwise specified. Passed + kwargs will overwrite corresponding metadata. + + See each method's docstring. + """ + + @property + def asi8(self): + """ + Integer representation of the values. + + Returns + ------- + ndarray + An ndarray with int64 dtype. + """ + warnings.warn( + "Index.asi8 is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return None + + @classmethod + def _simple_new(cls: type[_IndexT], values, name: Hashable = None) -> _IndexT: + """ + We require that we have a dtype compat for the values. If we are passed + a non-dtype compat, then coerce using the constructor. + + Must be careful not to recurse. + """ + assert isinstance(values, cls._data_cls), type(values) + + result = object.__new__(cls) + result._data = values + result._name = name + result._cache = {} + result._reset_identity() + + return result + + @classmethod + def _with_infer(cls, *args, **kwargs): + """ + Constructor that uses the 1.0.x behavior inferring numeric dtypes + for ndarray[object] inputs. + """ + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", ".*the Index constructor", FutureWarning) + result = cls(*args, **kwargs) + + if result.dtype == _dtype_obj and not result._is_multi: + # error: Argument 1 to "maybe_convert_objects" has incompatible type + # "Union[ExtensionArray, ndarray[Any, Any]]"; expected + # "ndarray[Any, Any]" + values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type] + if values.dtype.kind in ["i", "u", "f"]: + return Index(values, name=result.name) + + return result + + @cache_readonly + def _constructor(self: _IndexT) -> type[_IndexT]: + return type(self) + + @final + def _maybe_check_unique(self) -> None: + """ + Check that an Index has no duplicates. + + This is typically only called via + `NDFrame.flags.allows_duplicate_labels.setter` when it's set to + True (duplicates aren't allowed). + + Raises + ------ + DuplicateLabelError + When the index is not unique. + """ + if not self.is_unique: + msg = """Index has duplicates.""" + duplicates = self._format_duplicate_message() + msg += f"\n{duplicates}" + + raise DuplicateLabelError(msg) + + @final + def _format_duplicate_message(self) -> DataFrame: + """ + Construct the DataFrame for a DuplicateLabelError. + + This returns a DataFrame indicating the labels and positions + of duplicates in an index. This should only be called when it's + already known that duplicates are present. + + Examples + -------- + >>> idx = pd.Index(['a', 'b', 'a']) + >>> idx._format_duplicate_message() + positions + label + a [0, 2] + """ + from pandas import Series + + duplicates = self[self.duplicated(keep="first")].unique() + assert len(duplicates) + + out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates] + if self._is_multi: + # test_format_duplicate_labels_message_multi + # error: "Type[Index]" has no attribute "from_tuples" [attr-defined] + out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined] + + if self.nlevels == 1: + out = out.rename_axis("label") + return out.to_frame(name="positions") + + # -------------------------------------------------------------------- + # Index Internals Methods + + @final + def _get_attributes_dict(self) -> dict[str_t, Any]: + """ + Return an attributes dict for my class. + + Temporarily added back for compatibility issue in dask, see + https://github.com/pandas-dev/pandas/pull/43895 + """ + warnings.warn( + "The Index._get_attributes_dict method is deprecated, and will be " + "removed in a future version", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + return {k: getattr(self, k, None) for k in self._attributes} + + def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT: + """ + Create a new Index with the same class as the caller, don't copy the + data, use the same object attributes with passed in attributes taking + precedence. + + *this is an internal non-public method* + + Parameters + ---------- + values : the values to create the new Index, optional + name : Label, defaults to self.name + """ + name = self._name if name is no_default else name + + return self._simple_new(values, name=name) + + def _view(self: _IndexT) -> _IndexT: + """ + fastpath to make a shallow copy, i.e. new object with same data. + """ + result = self._simple_new(self._values, name=self._name) + + result._cache = self._cache + return result + + @final + def _rename(self: _IndexT, name: Hashable) -> _IndexT: + """ + fastpath for rename if new name is already validated. + """ + result = self._view() + result._name = name + return result + + @final + def is_(self, other) -> bool: + """ + More flexible, faster check like ``is`` but that works through views. + + Note: this is *not* the same as ``Index.identical()``, which checks + that metadata is also the same. + + Parameters + ---------- + other : object + Other object to compare against. + + Returns + ------- + bool + True if both have same underlying data, False otherwise. + + See Also + -------- + Index.identical : Works like ``Index.is_`` but also checks metadata. + """ + if self is other: + return True + elif not hasattr(other, "_id"): + return False + elif self._id is None or other._id is None: + return False + else: + return self._id is other._id + + @final + def _reset_identity(self) -> None: + """ + Initializes or resets ``_id`` attribute with new object. + """ + self._id = object() + + @final + def _cleanup(self) -> None: + self._engine.clear_mapping() + + @cache_readonly + def _engine( + self, + ) -> libindex.IndexEngine: + # For base class (object dtype) we get ObjectEngine + + if isinstance(self._values, BaseMaskedArray): + # TODO(ExtensionIndex): use libindex.NullableEngine(self._values) + return libindex.ObjectEngine(self._get_engine_target()) + elif ( + isinstance(self._values, ExtensionArray) + and self._engine_type is libindex.ObjectEngine + ): + # TODO(ExtensionIndex): use libindex.ExtensionEngine(self._values) + return libindex.ObjectEngine(self._get_engine_target()) + + # to avoid a reference cycle, bind `target_values` to a local variable, so + # `self` is not passed into the lambda. + target_values = self._get_engine_target() + return self._engine_type(target_values) + + @final + @cache_readonly + def _dir_additions_for_owner(self) -> set[str_t]: + """ + Add the string-like labels to the owner dataframe/series dir output. + + If this is a MultiIndex, it's first level values are used. + """ + return { + c + for c in self.unique(level=0)[: get_option("display.max_dir_items")] + if isinstance(c, str) and c.isidentifier() + } + + # -------------------------------------------------------------------- + # Array-Like Methods + + # ndarray compat + def __len__(self) -> int: + """ + Return the length of the Index. + """ + return len(self._data) + + def __array__(self, dtype=None) -> np.ndarray: + """ + The array interface, return my values. + """ + return np.asarray(self._data, dtype=dtype) + + def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs): + if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs): + return NotImplemented + + result = arraylike.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + if "out" in kwargs: + # e.g. test_dti_isub_tdi + return arraylike.dispatch_ufunc_with_out( + self, ufunc, method, *inputs, **kwargs + ) + + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + new_inputs = [x if x is not self else x._values for x in inputs] + result = getattr(ufunc, method)(*new_inputs, **kwargs) + if ufunc.nout == 2: + # i.e. np.divmod, np.modf, np.frexp + return tuple(self.__array_wrap__(x) for x in result) + + return self.__array_wrap__(result) + + def __array_wrap__(self, result, context=None): + """ + Gets called after a ufunc and other functions e.g. np.split. + """ + result = lib.item_from_zerodim(result) + if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1: + return result + + return Index(result, name=self.name) + + @cache_readonly + def dtype(self) -> DtypeObj: + """ + Return the dtype object of the underlying data. + """ + return self._data.dtype + + @final + def ravel(self, order="C"): + """ + Return an ndarray of the flattened values of the underlying data. + + Returns + ------- + numpy.ndarray + Flattened array. + + See Also + -------- + numpy.ndarray.ravel : Return a flattened array. + """ + warnings.warn( + "Index.ravel returning ndarray is deprecated; in a future version " + "this will return a view on self.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if needs_i8_conversion(self.dtype): + # Item "ndarray[Any, Any]" of "Union[ExtensionArray, ndarray[Any, Any]]" + # has no attribute "_ndarray" + values = self._data._ndarray # type: ignore[union-attr] + elif is_interval_dtype(self.dtype): + values = np.asarray(self._data) + else: + values = self._get_engine_target() + return values.ravel(order=order) + + def view(self, cls=None): + + # we need to see if we are subclassing an + # index type here + if cls is not None and not hasattr(cls, "_typ"): + dtype = cls + if isinstance(cls, str): + dtype = pandas_dtype(cls) + + if isinstance(dtype, (np.dtype, ExtensionDtype)) and needs_i8_conversion( + dtype + ): + if dtype.kind == "m" and dtype != "m8[ns]": + # e.g. m8[s] + return self._data.view(cls) + + idx_cls = self._dtype_to_subclass(dtype) + # NB: we only get here for subclasses that override + # _data_cls such that it is a type and not a tuple + # of types. + arr_cls = idx_cls._data_cls + arr = arr_cls(self._data.view("i8"), dtype=dtype) + return idx_cls._simple_new(arr, name=self.name) + + result = self._data.view(cls) + else: + result = self._view() + if isinstance(result, Index): + result._id = self._id + return result + + def astype(self, dtype, copy: bool = True): + """ + Create an Index with values cast to dtypes. + + The class of a new Index is determined by dtype. When conversion is + impossible, a TypeError exception is raised. + + Parameters + ---------- + dtype : numpy dtype or pandas type + Note that any signed integer `dtype` is treated as ``'int64'``, + and any unsigned integer `dtype` is treated as ``'uint64'``, + regardless of the size. + copy : bool, default True + By default, astype always returns a newly allocated object. + If copy is set to False and internal requirements on dtype are + satisfied, the original data is used to create a new Index + or the original Index is returned. + + Returns + ------- + Index + Index with values cast to specified dtype. + """ + if dtype is not None: + dtype = pandas_dtype(dtype) + + if is_dtype_equal(self.dtype, dtype): + # Ensure that self.astype(self.dtype) is self + return self.copy() if copy else self + + if ( + self.dtype == np.dtype("M8[ns]") + and isinstance(dtype, np.dtype) + and dtype.kind == "M" + and dtype != np.dtype("M8[ns]") + ): + # For now DatetimeArray supports this by unwrapping ndarray, + # but DatetimeIndex doesn't + raise TypeError(f"Cannot cast {type(self).__name__} to dtype") + + values = self._data + if isinstance(values, ExtensionArray): + with rewrite_exception(type(values).__name__, type(self).__name__): + new_values = values.astype(dtype, copy=copy) + + elif isinstance(dtype, ExtensionDtype): + cls = dtype.construct_array_type() + # Note: for RangeIndex and CategoricalDtype self vs self._values + # behaves differently here. + new_values = cls._from_sequence(self, dtype=dtype, copy=copy) + + else: + try: + new_values = values.astype(dtype, copy=copy) + except (TypeError, ValueError) as err: + raise TypeError( + f"Cannot cast {type(self).__name__} to dtype {dtype}" + ) from err + + # pass copy=False because any copying will be done in the astype above + return Index(new_values, name=self.name, dtype=new_values.dtype, copy=False) + + _index_shared_docs[ + "take" + ] = """ + Return a new %(klass)s of the values selected by the indices. + + For internal compatibility with numpy arrays. + + Parameters + ---------- + indices : array-like + Indices to be taken. + axis : int, optional + The axis over which to select values, always 0. + allow_fill : bool, default True + fill_value : scalar, default None + If allow_fill=True and fill_value is not None, indices specified by + -1 are regarded as NA. If Index doesn't hold NA, raise ValueError. + + Returns + ------- + Index + An index formed of elements at the given indices. Will be the same + type as self, except for RangeIndex. + + See Also + -------- + numpy.ndarray.take: Return an array formed from the + elements of a at the given indices. + """ + + @Appender(_index_shared_docs["take"] % _index_doc_kwargs) + def take( + self, indices, axis: int = 0, allow_fill: bool = True, fill_value=None, **kwargs + ): + if kwargs: + nv.validate_take((), kwargs) + if is_scalar(indices): + raise TypeError("Expected indices to be array-like") + indices = ensure_platform_int(indices) + allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices) + + # Note: we discard fill_value and use self._na_value, only relevant + # in the case where allow_fill is True and fill_value is not None + values = self._values + if isinstance(values, np.ndarray): + taken = algos.take( + values, indices, allow_fill=allow_fill, fill_value=self._na_value + ) + else: + # algos.take passes 'axis' keyword which not all EAs accept + taken = values.take( + indices, allow_fill=allow_fill, fill_value=self._na_value + ) + # _constructor so RangeIndex->Int64Index + return self._constructor._simple_new(taken, name=self.name) + + @final + def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool: + """ + We only use pandas-style take when allow_fill is True _and_ + fill_value is not None. + """ + if allow_fill and fill_value is not None: + # only fill if we are passing a non-None fill_value + if self._can_hold_na: + if (indices < -1).any(): + raise ValueError( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + else: + cls_name = type(self).__name__ + raise ValueError( + f"Unable to fill values because {cls_name} cannot contain NA" + ) + else: + allow_fill = False + return allow_fill + + _index_shared_docs[ + "repeat" + ] = """ + Repeat elements of a %(klass)s. + + Returns a new %(klass)s where each element of the current %(klass)s + is repeated consecutively a given number of times. + + Parameters + ---------- + repeats : int or array of ints + The number of repetitions for each element. This should be a + non-negative integer. Repeating 0 times will return an empty + %(klass)s. + axis : None + Must be ``None``. Has no effect but is accepted for compatibility + with numpy. + + Returns + ------- + repeated_index : %(klass)s + Newly created %(klass)s with repeated elements. + + See Also + -------- + Series.repeat : Equivalent function for Series. + numpy.repeat : Similar method for :class:`numpy.ndarray`. + + Examples + -------- + >>> idx = pd.Index(['a', 'b', 'c']) + >>> idx + Index(['a', 'b', 'c'], dtype='object') + >>> idx.repeat(2) + Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object') + >>> idx.repeat([1, 2, 3]) + Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object') + """ + + @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) + def repeat(self, repeats, axis=None): + repeats = ensure_platform_int(repeats) + nv.validate_repeat((), {"axis": axis}) + res_values = self._values.repeat(repeats) + + # _constructor so RangeIndex->Int64Index + return self._constructor._simple_new(res_values, name=self.name) + + # -------------------------------------------------------------------- + # Copying Methods + + def copy( + self: _IndexT, + name: Hashable | None = None, + deep: bool = False, + dtype: Dtype | None = None, + names: Sequence[Hashable] | None = None, + ) -> _IndexT: + """ + Make a copy of this object. + + Name and dtype sets those attributes on the new object. + + Parameters + ---------- + name : Label, optional + Set name for new object. + deep : bool, default False + dtype : numpy dtype or pandas type, optional + Set dtype for new object. + + .. deprecated:: 1.2.0 + use ``astype`` method instead. + names : list-like, optional + Kept for compatibility with MultiIndex. Should not be used. + + .. deprecated:: 1.4.0 + use ``name`` instead. + + Returns + ------- + Index + Index refer to new object which is a copy of this object. + + Notes + ----- + In most cases, there should be no functional difference from using + ``deep``, but if ``deep`` is passed it will attempt to deepcopy. + """ + if names is not None: + warnings.warn( + "parameter names is deprecated and will be removed in a future " + "version. Use the name parameter instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + name = self._validate_names(name=name, names=names, deep=deep)[0] + if deep: + new_data = self._data.copy() + new_index = type(self)._simple_new(new_data, name=name) + else: + new_index = self._rename(name=name) + + if dtype: + warnings.warn( + "parameter dtype is deprecated and will be removed in a future " + "version. Use the astype method instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + new_index = new_index.astype(dtype) + return new_index + + @final + def __copy__(self: _IndexT, **kwargs) -> _IndexT: + return self.copy(**kwargs) + + @final + def __deepcopy__(self: _IndexT, memo=None) -> _IndexT: + """ + Parameters + ---------- + memo, default None + Standard signature. Unused + """ + return self.copy(deep=True) + + # -------------------------------------------------------------------- + # Rendering Methods + + @final + def __repr__(self) -> str_t: + """ + Return a string representation for this object. + """ + klass_name = type(self).__name__ + data = self._format_data() + attrs = self._format_attrs() + space = self._format_space() + attrs_str = [f"{k}={v}" for k, v in attrs] + prepr = f",{space}".join(attrs_str) + + # no data provided, just attributes + if data is None: + data = "" + + return f"{klass_name}({data}{prepr})" + + def _format_space(self) -> str_t: + + # using space here controls if the attributes + # are line separated or not (the default) + + # max_seq_items = get_option('display.max_seq_items') + # if len(self) > max_seq_items: + # space = "\n%s" % (' ' * (len(klass) + 1)) + return " " + + @property + def _formatter_func(self): + """ + Return the formatter function. + """ + return default_pprint + + def _format_data(self, name=None) -> str_t: + """ + Return the formatted data as a unicode string. + """ + # do we want to justify (only do so for non-objects) + is_justify = True + + if self.inferred_type == "string": + is_justify = False + elif self.inferred_type == "categorical": + self = cast("CategoricalIndex", self) + if is_object_dtype(self.categories): + is_justify = False + + return format_object_summary( + self, + self._formatter_func, + is_justify=is_justify, + name=name, + line_break_each_value=self._is_multi, + ) + + def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]: + """ + Return a list of tuples of the (attr,formatted_value). + """ + attrs: list[tuple[str_t, str_t | int | bool | None]] = [] + + if not self._is_multi: + attrs.append(("dtype", f"'{self.dtype}'")) + + if self.name is not None: + attrs.append(("name", default_pprint(self.name))) + elif self._is_multi and any(x is not None for x in self.names): + attrs.append(("names", default_pprint(self.names))) + + max_seq_items = get_option("display.max_seq_items") or len(self) + if len(self) > max_seq_items: + attrs.append(("length", len(self))) + return attrs + + @final + def _mpl_repr(self) -> np.ndarray: + # how to represent ourselves to matplotlib + if isinstance(self.dtype, np.dtype) and self.dtype.kind != "M": + return cast(np.ndarray, self.values) + return self.astype(object, copy=False)._values + + def format( + self, + name: bool = False, + formatter: Callable | None = None, + na_rep: str_t = "NaN", + ) -> list[str_t]: + """ + Render a string representation of the Index. + """ + header = [] + if name: + header.append( + pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) + if self.name is not None + else "" + ) + + if formatter is not None: + return header + list(self.map(formatter)) + + return self._format_with_header(header, na_rep=na_rep) + + def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]: + from pandas.io.formats.format import format_array + + values = self._values + + if is_object_dtype(values.dtype): + values = cast(np.ndarray, values) + values = lib.maybe_convert_objects(values, safe=True) + + result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values] + + # could have nans + mask = is_float_nan(values) + if mask.any(): + result_arr = np.array(result) + result_arr[mask] = na_rep + result = result_arr.tolist() + else: + result = trim_front(format_array(values, None, justify="left")) + return header + result + + @final + def to_native_types(self, slicer=None, **kwargs) -> np.ndarray: + """ + Format specified values of `self` and return them. + + .. deprecated:: 1.2.0 + + Parameters + ---------- + slicer : int, array-like + An indexer into `self` that specifies which values + are used in the formatting process. + kwargs : dict + Options for specifying how the values should be formatted. + These options include the following: + + 1) na_rep : str + The value that serves as a placeholder for NULL values + 2) quoting : bool or None + Whether or not there are quoted values in `self` + 3) date_format : str + The format used to represent date-like values. + + Returns + ------- + numpy.ndarray + Formatted values. + """ + warnings.warn( + "The 'to_native_types' method is deprecated and will be removed in " + "a future version. Use 'astype(str)' instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + values = self + if slicer is not None: + values = values[slicer] + return values._format_native_types(**kwargs) + + def _format_native_types(self, *, na_rep="", quoting=None, **kwargs): + """ + Actually format specific types of the index. + """ + mask = isna(self) + if not self.is_object() and not quoting: + values = np.asarray(self).astype(str) + else: + values = np.array(self, dtype=object, copy=True) + + values[mask] = na_rep + return values + + def _summary(self, name=None) -> str_t: + """ + Return a summarized representation. + + Parameters + ---------- + name : str + name to use in the summary representation + + Returns + ------- + String with a summarized representation of the index + """ + if len(self) > 0: + head = self[0] + if hasattr(head, "format") and not isinstance(head, str): + head = head.format() + elif needs_i8_conversion(self.dtype): + # e.g. Timedelta, display as values, not quoted + head = self._formatter_func(head).replace("'", "") + tail = self[-1] + if hasattr(tail, "format") and not isinstance(tail, str): + tail = tail.format() + elif needs_i8_conversion(self.dtype): + # e.g. Timedelta, display as values, not quoted + tail = self._formatter_func(tail).replace("'", "") + + index_summary = f", {head} to {tail}" + else: + index_summary = "" + + if name is None: + name = type(self).__name__ + return f"{name}: {len(self)} entries{index_summary}" + + # -------------------------------------------------------------------- + # Conversion Methods + + def to_flat_index(self): + """ + Identity method. + + This is implemented for compatibility with subclass implementations + when chaining. + + Returns + ------- + pd.Index + Caller. + + See Also + -------- + MultiIndex.to_flat_index : Subclass implementation. + """ + return self + + def to_series(self, index=None, name: Hashable = None) -> Series: + """ + Create a Series with both index and values equal to the index keys. + + Useful with map for returning an indexer based on an index. + + Parameters + ---------- + index : Index, optional + Index of resulting Series. If None, defaults to original index. + name : str, optional + Name of resulting Series. If None, defaults to name of original + index. + + Returns + ------- + Series + The dtype will be based on the type of the Index values. + + See Also + -------- + Index.to_frame : Convert an Index to a DataFrame. + Series.to_frame : Convert Series to DataFrame. + + Examples + -------- + >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') + + By default, the original Index and original name is reused. + + >>> idx.to_series() + animal + Ant Ant + Bear Bear + Cow Cow + Name: animal, dtype: object + + To enforce a new Index, specify new labels to ``index``: + + >>> idx.to_series(index=[0, 1, 2]) + 0 Ant + 1 Bear + 2 Cow + Name: animal, dtype: object + + To override the name of the resulting column, specify `name`: + + >>> idx.to_series(name='zoo') + animal + Ant Ant + Bear Bear + Cow Cow + Name: zoo, dtype: object + """ + from pandas import Series + + if index is None: + index = self._view() + if name is None: + name = self.name + + return Series(self._values.copy(), index=index, name=name) + + def to_frame( + self, index: bool = True, name: Hashable = lib.no_default + ) -> DataFrame: + """ + Create a DataFrame with a column containing the Index. + + Parameters + ---------- + index : bool, default True + Set the index of the returned DataFrame as the original Index. + + name : object, default None + The passed name should substitute for the index name (if it has + one). + + Returns + ------- + DataFrame + DataFrame containing the original Index data. + + See Also + -------- + Index.to_series : Convert an Index to a Series. + Series.to_frame : Convert Series to DataFrame. + + Examples + -------- + >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') + >>> idx.to_frame() + animal + animal + Ant Ant + Bear Bear + Cow Cow + + By default, the original Index is reused. To enforce a new Index: + + >>> idx.to_frame(index=False) + animal + 0 Ant + 1 Bear + 2 Cow + + To override the name of the resulting column, specify `name`: + + >>> idx.to_frame(index=False, name='zoo') + zoo + 0 Ant + 1 Bear + 2 Cow + """ + from pandas import DataFrame + + if name is None: + warnings.warn( + "Explicitly passing `name=None` currently preserves the Index's name " + "or uses a default name of 0. This behaviour is deprecated, and in " + "the future `None` will be used as the name of the resulting " + "DataFrame column.", + FutureWarning, + stacklevel=find_stack_level(), + ) + name = lib.no_default + + if name is lib.no_default: + name = self.name or 0 + result = DataFrame({name: self._values.copy()}) + + if index: + result.index = self + return result + + # -------------------------------------------------------------------- + # Name-Centric Methods + + @property + def name(self): + """ + Return Index or MultiIndex name. + """ + return self._name + + @name.setter + def name(self, value: Hashable): + if self._no_setting_name: + # Used in MultiIndex.levels to avoid silently ignoring name updates. + raise RuntimeError( + "Cannot set name on a level of a MultiIndex. Use " + "'MultiIndex.set_names' instead." + ) + maybe_extract_name(value, None, type(self)) + self._name = value + + @final + def _validate_names( + self, name=None, names=None, deep: bool = False + ) -> list[Hashable]: + """ + Handles the quirks of having a singular 'name' parameter for general + Index and plural 'names' parameter for MultiIndex. + """ + from copy import deepcopy + + if names is not None and name is not None: + raise TypeError("Can only provide one of `names` and `name`") + elif names is None and name is None: + new_names = deepcopy(self.names) if deep else self.names + elif names is not None: + if not is_list_like(names): + raise TypeError("Must pass list-like as `names`.") + new_names = names + elif not is_list_like(name): + new_names = [name] + else: + new_names = name + + if len(new_names) != len(self.names): + raise ValueError( + f"Length of new names must be {len(self.names)}, got {len(new_names)}" + ) + + # All items in 'new_names' need to be hashable + validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name") + + return new_names + + def _get_names(self) -> FrozenList: + return FrozenList((self.name,)) + + def _set_names(self, values, *, level=None) -> None: + """ + Set new names on index. Each name has to be a hashable type. + + Parameters + ---------- + values : str or sequence + name(s) to set + level : int, level name, or sequence of int/level names (default None) + If the index is a MultiIndex (hierarchical), level(s) to set (None + for all levels). Otherwise level must be None + + Raises + ------ + TypeError if each name is not hashable. + """ + if not is_list_like(values): + raise ValueError("Names must be a list-like") + if len(values) != 1: + raise ValueError(f"Length of new names must be 1, got {len(values)}") + + # GH 20527 + # All items in 'name' need to be hashable: + validate_all_hashable(*values, error_name=f"{type(self).__name__}.name") + + self._name = values[0] + + names = property(fset=_set_names, fget=_get_names) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "names"]) + def set_names(self, names, level=None, inplace: bool = False): + """ + Set Index or MultiIndex name. + + Able to set new names partially and by level. + + Parameters + ---------- + + names : label or list of label or dict-like for MultiIndex + Name(s) to set. + + .. versionchanged:: 1.3.0 + + level : int, label or list of int or label, optional + If the index is a MultiIndex and names is not dict-like, level(s) to set + (None for all levels). Otherwise level must be None. + + .. versionchanged:: 1.3.0 + + inplace : bool, default False + Modifies the object directly, instead of creating a new Index or + MultiIndex. + + Returns + ------- + Index or None + The same type as the caller or None if ``inplace=True``. + + See Also + -------- + Index.rename : Able to set new names without level. + + Examples + -------- + >>> idx = pd.Index([1, 2, 3, 4]) + >>> idx + Int64Index([1, 2, 3, 4], dtype='int64') + >>> idx.set_names('quarter') + Int64Index([1, 2, 3, 4], dtype='int64', name='quarter') + + >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], + ... [2018, 2019]]) + >>> idx + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + ) + >>> idx.set_names(['kind', 'year'], inplace=True) + >>> idx + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + names=['kind', 'year']) + >>> idx.set_names('species', level=0) + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + names=['species', 'year']) + + When renaming levels with a dict, levels can not be passed. + + >>> idx.set_names({'kind': 'snake'}) + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + names=['snake', 'year']) + """ + if level is not None and not isinstance(self, ABCMultiIndex): + raise ValueError("Level must be None for non-MultiIndex") + + elif level is not None and not is_list_like(level) and is_list_like(names): + raise TypeError("Names must be a string when a single level is provided.") + + elif not is_list_like(names) and level is None and self.nlevels > 1: + raise TypeError("Must pass list-like as `names`.") + + elif is_dict_like(names) and not isinstance(self, ABCMultiIndex): + raise TypeError("Can only pass dict-like as `names` for MultiIndex.") + + elif is_dict_like(names) and level is not None: + raise TypeError("Can not pass level for dictlike `names`.") + + if isinstance(self, ABCMultiIndex) and is_dict_like(names) and level is None: + # Transform dict to list of new names and corresponding levels + level, names_adjusted = [], [] + for i, name in enumerate(self.names): + if name in names.keys(): + level.append(i) + names_adjusted.append(names[name]) + names = names_adjusted + + if not is_list_like(names): + names = [names] + if level is not None and not is_list_like(level): + level = [level] + + if inplace: + idx = self + else: + idx = self._view() + + idx._set_names(names, level=level) + if not inplace: + return idx + + def rename(self, name, inplace=False): + """ + Alter Index or MultiIndex name. + + Able to set new names without level. Defaults to returning new index. + Length of names must match number of levels in MultiIndex. + + Parameters + ---------- + name : label or list of labels + Name(s) to set. + inplace : bool, default False + Modifies the object directly, instead of creating a new Index or + MultiIndex. + + Returns + ------- + Index or None + The same type as the caller or None if ``inplace=True``. + + See Also + -------- + Index.set_names : Able to set new names partially and by level. + + Examples + -------- + >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score') + >>> idx.rename('grade') + Index(['A', 'C', 'A', 'B'], dtype='object', name='grade') + + >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], + ... [2018, 2019]], + ... names=['kind', 'year']) + >>> idx + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + names=['kind', 'year']) + >>> idx.rename(['species', 'year']) + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + names=['species', 'year']) + >>> idx.rename('species') + Traceback (most recent call last): + TypeError: Must pass list-like as `names`. + """ + return self.set_names([name], inplace=inplace) + + # -------------------------------------------------------------------- + # Level-Centric Methods + + @property + def nlevels(self) -> int: + """ + Number of levels. + """ + return 1 + + def _sort_levels_monotonic(self: _IndexT) -> _IndexT: + """ + Compat with MultiIndex. + """ + return self + + @final + def _validate_index_level(self, level) -> None: + """ + Validate index level. + + For single-level Index getting level number is a no-op, but some + verification must be done like in MultiIndex. + + """ + if isinstance(level, int): + if level < 0 and level != -1: + raise IndexError( + "Too many levels: Index has only 1 level, " + f"{level} is not a valid level number" + ) + elif level > 0: + raise IndexError( + f"Too many levels: Index has only 1 level, not {level + 1}" + ) + elif level != self.name: + raise KeyError( + f"Requested level ({level}) does not match index name ({self.name})" + ) + + def _get_level_number(self, level) -> int: + self._validate_index_level(level) + return 0 + + def sortlevel(self, level=None, ascending=True, sort_remaining=None): + """ + For internal compatibility with the Index API. + + Sort the Index. This is for compat with MultiIndex + + Parameters + ---------- + ascending : bool, default True + False to sort in descending order + + level, sort_remaining are compat parameters + + Returns + ------- + Index + """ + if not isinstance(ascending, (list, bool)): + raise TypeError( + "ascending must be a single bool value or" + "a list of bool values of length 1" + ) + + if isinstance(ascending, list): + if len(ascending) != 1: + raise TypeError("ascending must be a list of bool values of length 1") + ascending = ascending[0] + + if not isinstance(ascending, bool): + raise TypeError("ascending must be a bool value") + + return self.sort_values(return_indexer=True, ascending=ascending) + + def _get_level_values(self, level) -> Index: + """ + Return an Index of values for requested level. + + This is primarily useful to get an individual level of values from a + MultiIndex, but is provided on Index as well for compatibility. + + Parameters + ---------- + level : int or str + It is either the integer position or the name of the level. + + Returns + ------- + Index + Calling object, as there is only one level in the Index. + + See Also + -------- + MultiIndex.get_level_values : Get values for a level of a MultiIndex. + + Notes + ----- + For Index, level should be 0, since there are no multiple levels. + + Examples + -------- + >>> idx = pd.Index(list('abc')) + >>> idx + Index(['a', 'b', 'c'], dtype='object') + + Get level values by supplying `level` as integer: + + >>> idx.get_level_values(0) + Index(['a', 'b', 'c'], dtype='object') + """ + self._validate_index_level(level) + return self + + get_level_values = _get_level_values + + @final + def droplevel(self, level=0): + """ + Return index with requested level(s) removed. + + If resulting index has only 1 level left, the result will be + of Index type, not MultiIndex. + + Parameters + ---------- + level : int, str, or list-like, default 0 + If a string is given, must be the name of a level + If list-like, elements must be names or indexes of levels. + + Returns + ------- + Index or MultiIndex + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays( + ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z']) + >>> mi + MultiIndex([(1, 3, 5), + (2, 4, 6)], + names=['x', 'y', 'z']) + + >>> mi.droplevel() + MultiIndex([(3, 5), + (4, 6)], + names=['y', 'z']) + + >>> mi.droplevel(2) + MultiIndex([(1, 3), + (2, 4)], + names=['x', 'y']) + + >>> mi.droplevel('z') + MultiIndex([(1, 3), + (2, 4)], + names=['x', 'y']) + + >>> mi.droplevel(['x', 'y']) + Int64Index([5, 6], dtype='int64', name='z') + """ + if not isinstance(level, (tuple, list)): + level = [level] + + levnums = sorted(self._get_level_number(lev) for lev in level)[::-1] + + return self._drop_level_numbers(levnums) + + @final + def _drop_level_numbers(self, levnums: list[int]): + """ + Drop MultiIndex levels by level _number_, not name. + """ + + if not levnums and not isinstance(self, ABCMultiIndex): + return self + if len(levnums) >= self.nlevels: + raise ValueError( + f"Cannot remove {len(levnums)} levels from an index with " + f"{self.nlevels} levels: at least one level must be left." + ) + # The two checks above guarantee that here self is a MultiIndex + self = cast("MultiIndex", self) + + new_levels = list(self.levels) + new_codes = list(self.codes) + new_names = list(self.names) + + for i in levnums: + new_levels.pop(i) + new_codes.pop(i) + new_names.pop(i) + + if len(new_levels) == 1: + lev = new_levels[0] + + if len(lev) == 0: + # If lev is empty, lev.take will fail GH#42055 + if len(new_codes[0]) == 0: + # GH#45230 preserve RangeIndex here + # see test_reset_index_empty_rangeindex + result = lev[:0] + else: + res_values = algos.take(lev._values, new_codes[0], allow_fill=True) + # _constructor instead of type(lev) for RangeIndex compat GH#35230 + result = lev._constructor._simple_new(res_values, name=new_names[0]) + else: + # set nan if needed + mask = new_codes[0] == -1 + result = new_levels[0].take(new_codes[0]) + if mask.any(): + result = result.putmask(mask, np.nan) + + result._name = new_names[0] + + return result + else: + from pandas.core.indexes.multi import MultiIndex + + return MultiIndex( + levels=new_levels, + codes=new_codes, + names=new_names, + verify_integrity=False, + ) + + def _get_grouper_for_level(self, mapper, *, level=None): + """ + Get index grouper corresponding to an index level + + Parameters + ---------- + mapper: Group mapping function or None + Function mapping index values to groups + level : int or None + Index level, positional + + Returns + ------- + grouper : Index + Index of values to group on. + labels : ndarray of int or None + Array of locations in level_index. + uniques : Index or None + Index of unique values for level. + """ + assert level is None or level == 0 + if mapper is None: + grouper = self + else: + grouper = self.map(mapper) + + return grouper, None, None + + # -------------------------------------------------------------------- + # Introspection Methods + + @final + @property + def is_monotonic(self) -> bool: + """ + Alias for is_monotonic_increasing. + """ + return self.is_monotonic_increasing + + @property + def is_monotonic_increasing(self) -> bool: + """ + Return if the index is monotonic increasing (only equal or + increasing) values. + + Examples + -------- + >>> Index([1, 2, 3]).is_monotonic_increasing + True + >>> Index([1, 2, 2]).is_monotonic_increasing + True + >>> Index([1, 3, 2]).is_monotonic_increasing + False + """ + return self._engine.is_monotonic_increasing + + @property + def is_monotonic_decreasing(self) -> bool: + """ + Return if the index is monotonic decreasing (only equal or + decreasing) values. + + Examples + -------- + >>> Index([3, 2, 1]).is_monotonic_decreasing + True + >>> Index([3, 2, 2]).is_monotonic_decreasing + True + >>> Index([3, 1, 2]).is_monotonic_decreasing + False + """ + return self._engine.is_monotonic_decreasing + + @final + @property + def _is_strictly_monotonic_increasing(self) -> bool: + """ + Return if the index is strictly monotonic increasing + (only increasing) values. + + Examples + -------- + >>> Index([1, 2, 3])._is_strictly_monotonic_increasing + True + >>> Index([1, 2, 2])._is_strictly_monotonic_increasing + False + >>> Index([1, 3, 2])._is_strictly_monotonic_increasing + False + """ + return self.is_unique and self.is_monotonic_increasing + + @final + @property + def _is_strictly_monotonic_decreasing(self) -> bool: + """ + Return if the index is strictly monotonic decreasing + (only decreasing) values. + + Examples + -------- + >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing + True + >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing + False + >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing + False + """ + return self.is_unique and self.is_monotonic_decreasing + + @cache_readonly + def is_unique(self) -> bool: + """ + Return if the index has unique values. + """ + return self._engine.is_unique + + @final + @property + def has_duplicates(self) -> bool: + """ + Check if the Index has duplicate values. + + Returns + ------- + bool + Whether or not the Index has duplicate values. + + Examples + -------- + >>> idx = pd.Index([1, 5, 7, 7]) + >>> idx.has_duplicates + True + + >>> idx = pd.Index([1, 5, 7]) + >>> idx.has_duplicates + False + + >>> idx = pd.Index(["Watermelon", "Orange", "Apple", + ... "Watermelon"]).astype("category") + >>> idx.has_duplicates + True + + >>> idx = pd.Index(["Orange", "Apple", + ... "Watermelon"]).astype("category") + >>> idx.has_duplicates + False + """ + return not self.is_unique + + @final + def is_boolean(self) -> bool: + """ + Check if the Index only consists of booleans. + + Returns + ------- + bool + Whether or not the Index only consists of booleans. + + See Also + -------- + is_integer : Check if the Index only consists of integers. + is_floating : Check if the Index is a floating type. + is_numeric : Check if the Index only consists of numeric data. + is_object : Check if the Index is of the object dtype. + is_categorical : Check if the Index holds categorical data. + is_interval : Check if the Index holds Interval objects. + is_mixed : Check if the Index holds data with mixed data types. + + Examples + -------- + >>> idx = pd.Index([True, False, True]) + >>> idx.is_boolean() + True + + >>> idx = pd.Index(["True", "False", "True"]) + >>> idx.is_boolean() + False + + >>> idx = pd.Index([True, False, "True"]) + >>> idx.is_boolean() + False + """ + return self.inferred_type in ["boolean"] + + @final + def is_integer(self) -> bool: + """ + Check if the Index only consists of integers. + + Returns + ------- + bool + Whether or not the Index only consists of integers. + + See Also + -------- + is_boolean : Check if the Index only consists of booleans. + is_floating : Check if the Index is a floating type. + is_numeric : Check if the Index only consists of numeric data. + is_object : Check if the Index is of the object dtype. + is_categorical : Check if the Index holds categorical data. + is_interval : Check if the Index holds Interval objects. + is_mixed : Check if the Index holds data with mixed data types. + + Examples + -------- + >>> idx = pd.Index([1, 2, 3, 4]) + >>> idx.is_integer() + True + + >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) + >>> idx.is_integer() + False + + >>> idx = pd.Index(["Apple", "Mango", "Watermelon"]) + >>> idx.is_integer() + False + """ + return self.inferred_type in ["integer"] + + @final + def is_floating(self) -> bool: + """ + Check if the Index is a floating type. + + The Index may consist of only floats, NaNs, or a mix of floats, + integers, or NaNs. + + Returns + ------- + bool + Whether or not the Index only consists of only consists of floats, NaNs, or + a mix of floats, integers, or NaNs. + + See Also + -------- + is_boolean : Check if the Index only consists of booleans. + is_integer : Check if the Index only consists of integers. + is_numeric : Check if the Index only consists of numeric data. + is_object : Check if the Index is of the object dtype. + is_categorical : Check if the Index holds categorical data. + is_interval : Check if the Index holds Interval objects. + is_mixed : Check if the Index holds data with mixed data types. + + Examples + -------- + >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) + >>> idx.is_floating() + True + + >>> idx = pd.Index([1.0, 2.0, np.nan, 4.0]) + >>> idx.is_floating() + True + + >>> idx = pd.Index([1, 2, 3, 4, np.nan]) + >>> idx.is_floating() + True + + >>> idx = pd.Index([1, 2, 3, 4]) + >>> idx.is_floating() + False + """ + return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"] + + @final + def is_numeric(self) -> bool: + """ + Check if the Index only consists of numeric data. + + Returns + ------- + bool + Whether or not the Index only consists of numeric data. + + See Also + -------- + is_boolean : Check if the Index only consists of booleans. + is_integer : Check if the Index only consists of integers. + is_floating : Check if the Index is a floating type. + is_object : Check if the Index is of the object dtype. + is_categorical : Check if the Index holds categorical data. + is_interval : Check if the Index holds Interval objects. + is_mixed : Check if the Index holds data with mixed data types. + + Examples + -------- + >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) + >>> idx.is_numeric() + True + + >>> idx = pd.Index([1, 2, 3, 4.0]) + >>> idx.is_numeric() + True + + >>> idx = pd.Index([1, 2, 3, 4]) + >>> idx.is_numeric() + True + + >>> idx = pd.Index([1, 2, 3, 4.0, np.nan]) + >>> idx.is_numeric() + True + + >>> idx = pd.Index([1, 2, 3, 4.0, np.nan, "Apple"]) + >>> idx.is_numeric() + False + """ + return self.inferred_type in ["integer", "floating"] + + @final + def is_object(self) -> bool: + """ + Check if the Index is of the object dtype. + + Returns + ------- + bool + Whether or not the Index is of the object dtype. + + See Also + -------- + is_boolean : Check if the Index only consists of booleans. + is_integer : Check if the Index only consists of integers. + is_floating : Check if the Index is a floating type. + is_numeric : Check if the Index only consists of numeric data. + is_categorical : Check if the Index holds categorical data. + is_interval : Check if the Index holds Interval objects. + is_mixed : Check if the Index holds data with mixed data types. + + Examples + -------- + >>> idx = pd.Index(["Apple", "Mango", "Watermelon"]) + >>> idx.is_object() + True + + >>> idx = pd.Index(["Apple", "Mango", 2.0]) + >>> idx.is_object() + True + + >>> idx = pd.Index(["Watermelon", "Orange", "Apple", + ... "Watermelon"]).astype("category") + >>> idx.is_object() + False + + >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) + >>> idx.is_object() + False + """ + return is_object_dtype(self.dtype) + + @final + def is_categorical(self) -> bool: + """ + Check if the Index holds categorical data. + + Returns + ------- + bool + True if the Index is categorical. + + See Also + -------- + CategoricalIndex : Index for categorical data. + is_boolean : Check if the Index only consists of booleans. + is_integer : Check if the Index only consists of integers. + is_floating : Check if the Index is a floating type. + is_numeric : Check if the Index only consists of numeric data. + is_object : Check if the Index is of the object dtype. + is_interval : Check if the Index holds Interval objects. + is_mixed : Check if the Index holds data with mixed data types. + + Examples + -------- + >>> idx = pd.Index(["Watermelon", "Orange", "Apple", + ... "Watermelon"]).astype("category") + >>> idx.is_categorical() + True + + >>> idx = pd.Index([1, 3, 5, 7]) + >>> idx.is_categorical() + False + + >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"]) + >>> s + 0 Peter + 1 Victor + 2 Elisabeth + 3 Mar + dtype: object + >>> s.index.is_categorical() + False + """ + return self.inferred_type in ["categorical"] + + @final + def is_interval(self) -> bool: + """ + Check if the Index holds Interval objects. + + Returns + ------- + bool + Whether or not the Index holds Interval objects. + + See Also + -------- + IntervalIndex : Index for Interval objects. + is_boolean : Check if the Index only consists of booleans. + is_integer : Check if the Index only consists of integers. + is_floating : Check if the Index is a floating type. + is_numeric : Check if the Index only consists of numeric data. + is_object : Check if the Index is of the object dtype. + is_categorical : Check if the Index holds categorical data. + is_mixed : Check if the Index holds data with mixed data types. + + Examples + -------- + >>> idx = pd.Index([pd.Interval(left=0, right=5), + ... pd.Interval(left=5, right=10)]) + >>> idx.is_interval() + True + + >>> idx = pd.Index([1, 3, 5, 7]) + >>> idx.is_interval() + False + """ + return self.inferred_type in ["interval"] + + @final + def is_mixed(self) -> bool: + """ + Check if the Index holds data with mixed data types. + + Returns + ------- + bool + Whether or not the Index holds data with mixed data types. + + See Also + -------- + is_boolean : Check if the Index only consists of booleans. + is_integer : Check if the Index only consists of integers. + is_floating : Check if the Index is a floating type. + is_numeric : Check if the Index only consists of numeric data. + is_object : Check if the Index is of the object dtype. + is_categorical : Check if the Index holds categorical data. + is_interval : Check if the Index holds Interval objects. + + Examples + -------- + >>> idx = pd.Index(['a', np.nan, 'b']) + >>> idx.is_mixed() + True + + >>> idx = pd.Index([1.0, 2.0, 3.0, 5.0]) + >>> idx.is_mixed() + False + """ + warnings.warn( + "Index.is_mixed is deprecated and will be removed in a future version. " + "Check index.inferred_type directly instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.inferred_type in ["mixed"] + + @final + def holds_integer(self) -> bool: + """ + Whether the type is an integer type. + """ + return self.inferred_type in ["integer", "mixed-integer"] + + @cache_readonly + def inferred_type(self) -> str_t: + """ + Return a string of the type inferred from the values. + """ + return lib.infer_dtype(self._values, skipna=False) + + @cache_readonly + def _is_all_dates(self) -> bool: + """ + Whether or not the index values only consist of dates. + """ + return is_datetime_array(ensure_object(self._values)) + + @cache_readonly + @final + def is_all_dates(self) -> bool: + """ + Whether or not the index values only consist of dates. + """ + warnings.warn( + "Index.is_all_dates is deprecated, will be removed in a future version. " + "check index.inferred_type instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._is_all_dates + + @final + @cache_readonly + def _is_multi(self) -> bool: + """ + Cached check equivalent to isinstance(self, MultiIndex) + """ + return isinstance(self, ABCMultiIndex) + + # -------------------------------------------------------------------- + # Pickle Methods + + def __reduce__(self): + d = {"data": self._data, "name": self.name} + return _new_Index, (type(self), d), None + + # -------------------------------------------------------------------- + # Null Handling Methods + + @cache_readonly + def _na_value(self): + """The expected NA value to use with this index.""" + dtype = self.dtype + if isinstance(dtype, np.dtype): + if dtype.kind in ["m", "M"]: + return NaT + return np.nan + return dtype.na_value + + @cache_readonly + def _isnan(self) -> npt.NDArray[np.bool_]: + """ + Return if each value is NaN. + """ + if self._can_hold_na: + return isna(self) + else: + # shouldn't reach to this condition by checking hasnans beforehand + values = np.empty(len(self), dtype=np.bool_) + values.fill(False) + return values + + @cache_readonly + def hasnans(self) -> bool: + """ + Return True if there are any NaNs. + + Enables various performance speedups. + """ + if self._can_hold_na: + return bool(self._isnan.any()) + else: + return False + + @final + def isna(self) -> npt.NDArray[np.bool_]: + """ + Detect missing values. + + Return a boolean same-sized object indicating if the values are NA. + NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get + mapped to ``True`` values. + Everything else get mapped to ``False`` values. Characters such as + empty strings `''` or :attr:`numpy.inf` are not considered NA values + (unless you set ``pandas.options.mode.use_inf_as_na = True``). + + Returns + ------- + numpy.ndarray[bool] + A boolean array of whether my values are NA. + + See Also + -------- + Index.notna : Boolean inverse of isna. + Index.dropna : Omit entries with missing values. + isna : Top-level isna. + Series.isna : Detect missing values in Series object. + + Examples + -------- + Show which entries in a pandas.Index are NA. The result is an + array. + + >>> idx = pd.Index([5.2, 6.0, np.NaN]) + >>> idx + Float64Index([5.2, 6.0, nan], dtype='float64') + >>> idx.isna() + array([False, False, True]) + + Empty strings are not considered NA values. None is considered an NA + value. + + >>> idx = pd.Index(['black', '', 'red', None]) + >>> idx + Index(['black', '', 'red', None], dtype='object') + >>> idx.isna() + array([False, False, False, True]) + + For datetimes, `NaT` (Not a Time) is considered as an NA value. + + >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'), + ... pd.Timestamp(''), None, pd.NaT]) + >>> idx + DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'], + dtype='datetime64[ns]', freq=None) + >>> idx.isna() + array([False, True, True, True]) + """ + return self._isnan + + isnull = isna + + @final + def notna(self) -> npt.NDArray[np.bool_]: + """ + Detect existing (non-missing) values. + + Return a boolean same-sized object indicating if the values are not NA. + Non-missing values get mapped to ``True``. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values + (unless you set ``pandas.options.mode.use_inf_as_na = True``). + NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False`` + values. + + Returns + ------- + numpy.ndarray[bool] + Boolean array to indicate which entries are not NA. + + See Also + -------- + Index.notnull : Alias of notna. + Index.isna: Inverse of notna. + notna : Top-level notna. + + Examples + -------- + Show which entries in an Index are not NA. The result is an + array. + + >>> idx = pd.Index([5.2, 6.0, np.NaN]) + >>> idx + Float64Index([5.2, 6.0, nan], dtype='float64') + >>> idx.notna() + array([ True, True, False]) + + Empty strings are not considered NA values. None is considered a NA + value. + + >>> idx = pd.Index(['black', '', 'red', None]) + >>> idx + Index(['black', '', 'red', None], dtype='object') + >>> idx.notna() + array([ True, True, True, False]) + """ + return ~self.isna() + + notnull = notna + + def fillna(self, value=None, downcast=None): + """ + Fill NA/NaN values with the specified value. + + Parameters + ---------- + value : scalar + Scalar value to use to fill holes (e.g. 0). + This value cannot be a list-likes. + downcast : dict, default is None + A dict of item->dtype of what to downcast if possible, + or the string 'infer' which will try to downcast to an appropriate + equal type (e.g. float64 to int64 if possible). + + Returns + ------- + Index + + See Also + -------- + DataFrame.fillna : Fill NaN values of a DataFrame. + Series.fillna : Fill NaN Values of a Series. + """ + + value = self._require_scalar(value) + if self.hasnans: + result = self.putmask(self._isnan, value) + if downcast is None: + # no need to care metadata other than name + # because it can't have freq if it has NaTs + return Index._with_infer(result, name=self.name) + raise NotImplementedError( + f"{type(self).__name__}.fillna does not support 'downcast' " + "argument values other than 'None'." + ) + return self._view() + + def dropna(self: _IndexT, how: str_t = "any") -> _IndexT: + """ + Return Index without NA/NaN values. + + Parameters + ---------- + how : {'any', 'all'}, default 'any' + If the Index is a MultiIndex, drop the value when any or all levels + are NaN. + + Returns + ------- + Index + """ + if how not in ("any", "all"): + raise ValueError(f"invalid how option: {how}") + + if self.hasnans: + res_values = self._values[~self._isnan] + return type(self)._simple_new(res_values, name=self.name) + return self._view() + + # -------------------------------------------------------------------- + # Uniqueness Methods + + def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT: + """ + Return unique values in the index. + + Unique values are returned in order of appearance, this does NOT sort. + + Parameters + ---------- + level : int or hashable, optional + Only return values from specified level (for MultiIndex). + If int, gets the level by integer position, else by level name. + + Returns + ------- + Index + + See Also + -------- + unique : Numpy array of unique values in that column. + Series.unique : Return unique values of Series object. + """ + if level is not None: + self._validate_index_level(level) + + if self.is_unique: + return self._view() + + result = super().unique() + return self._shallow_copy(result) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT: + """ + Return Index with duplicate values removed. + + Parameters + ---------- + keep : {'first', 'last', ``False``}, default 'first' + - 'first' : Drop duplicates except for the first occurrence. + - 'last' : Drop duplicates except for the last occurrence. + - ``False`` : Drop all duplicates. + + Returns + ------- + deduplicated : Index + + See Also + -------- + Series.drop_duplicates : Equivalent method on Series. + DataFrame.drop_duplicates : Equivalent method on DataFrame. + Index.duplicated : Related method on Index, indicating duplicate + Index values. + + Examples + -------- + Generate an pandas.Index with duplicate values. + + >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) + + The `keep` parameter controls which duplicate values are removed. + The value 'first' keeps the first occurrence for each + set of duplicated entries. The default value of keep is 'first'. + + >>> idx.drop_duplicates(keep='first') + Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object') + + The value 'last' keeps the last occurrence for each set of duplicated + entries. + + >>> idx.drop_duplicates(keep='last') + Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object') + + The value ``False`` discards all sets of duplicated entries. + + >>> idx.drop_duplicates(keep=False) + Index(['cow', 'beetle', 'hippo'], dtype='object') + """ + if self.is_unique: + return self._view() + + return super().drop_duplicates(keep=keep) + + def duplicated( + self, keep: Literal["first", "last", False] = "first" + ) -> npt.NDArray[np.bool_]: + """ + Indicate duplicate index values. + + Duplicated values are indicated as ``True`` values in the resulting + array. Either all duplicates, all except the first, or all except the + last occurrence of duplicates can be indicated. + + Parameters + ---------- + keep : {'first', 'last', False}, default 'first' + The value or values in a set of duplicates to mark as missing. + + - 'first' : Mark duplicates as ``True`` except for the first + occurrence. + - 'last' : Mark duplicates as ``True`` except for the last + occurrence. + - ``False`` : Mark all duplicates as ``True``. + + Returns + ------- + np.ndarray[bool] + + See Also + -------- + Series.duplicated : Equivalent method on pandas.Series. + DataFrame.duplicated : Equivalent method on pandas.DataFrame. + Index.drop_duplicates : Remove duplicate values from Index. + + Examples + -------- + By default, for each set of duplicated values, the first occurrence is + set to False and all others to True: + + >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama']) + >>> idx.duplicated() + array([False, False, True, False, True]) + + which is equivalent to + + >>> idx.duplicated(keep='first') + array([False, False, True, False, True]) + + By using 'last', the last occurrence of each set of duplicated values + is set on False and all others on True: + + >>> idx.duplicated(keep='last') + array([ True, False, True, False, False]) + + By setting keep on ``False``, all duplicates are True: + + >>> idx.duplicated(keep=False) + array([ True, False, True, False, True]) + """ + if self.is_unique: + # fastpath available bc we are immutable + return np.zeros(len(self), dtype=bool) + return self._duplicated(keep=keep) + + # -------------------------------------------------------------------- + # Arithmetic & Logical Methods + + def __iadd__(self, other): + # alias for __add__ + return self + other + + @final + def __and__(self, other): + warnings.warn( + "Index.__and__ operating as a set operation is deprecated, " + "in the future this will be a logical operation matching " + "Series.__and__. Use index.intersection(other) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.intersection(other) + + @final + def __or__(self, other): + warnings.warn( + "Index.__or__ operating as a set operation is deprecated, " + "in the future this will be a logical operation matching " + "Series.__or__. Use index.union(other) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.union(other) + + @final + def __xor__(self, other): + warnings.warn( + "Index.__xor__ operating as a set operation is deprecated, " + "in the future this will be a logical operation matching " + "Series.__xor__. Use index.symmetric_difference(other) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.symmetric_difference(other) + + @final + def __nonzero__(self): + raise ValueError( + f"The truth value of a {type(self).__name__} is ambiguous. " + "Use a.empty, a.bool(), a.item(), a.any() or a.all()." + ) + + __bool__ = __nonzero__ + + # -------------------------------------------------------------------- + # Set Operation Methods + + def _get_reconciled_name_object(self, other): + """ + If the result of a set operation will be self, + return self, unless the name changes, in which + case make a shallow copy of self. + """ + name = get_op_result_name(self, other) + if self.name is not name: + return self.rename(name) + return self + + @final + def _validate_sort_keyword(self, sort): + if sort not in [None, False]: + raise ValueError( + "The 'sort' keyword only takes the values of " + f"None or False; {sort} was passed." + ) + + @final + def union(self, other, sort=None): + """ + Form the union of two Index objects. + + If the Index objects are incompatible, both Index objects will be + cast to dtype('object') first. + + .. versionchanged:: 0.25.0 + + Parameters + ---------- + other : Index or array-like + sort : bool or None, default None + Whether to sort the resulting Index. + + * None : Sort the result, except when + + 1. `self` and `other` are equal. + 2. `self` or `other` has length 0. + 3. Some values in `self` or `other` cannot be compared. + A RuntimeWarning is issued in this case. + + * False : do not sort the result. + + Returns + ------- + union : Index + + Examples + -------- + Union matching dtypes + + >>> idx1 = pd.Index([1, 2, 3, 4]) + >>> idx2 = pd.Index([3, 4, 5, 6]) + >>> idx1.union(idx2) + Int64Index([1, 2, 3, 4, 5, 6], dtype='int64') + + Union mismatched dtypes + + >>> idx1 = pd.Index(['a', 'b', 'c', 'd']) + >>> idx2 = pd.Index([1, 2, 3, 4]) + >>> idx1.union(idx2) + Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object') + + MultiIndex case + + >>> idx1 = pd.MultiIndex.from_arrays( + ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]] + ... ) + >>> idx1 + MultiIndex([(1, 'Red'), + (1, 'Blue'), + (2, 'Red'), + (2, 'Blue')], + ) + >>> idx2 = pd.MultiIndex.from_arrays( + ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]] + ... ) + >>> idx2 + MultiIndex([(3, 'Red'), + (3, 'Green'), + (2, 'Red'), + (2, 'Green')], + ) + >>> idx1.union(idx2) + MultiIndex([(1, 'Blue'), + (1, 'Red'), + (2, 'Blue'), + (2, 'Green'), + (2, 'Red'), + (3, 'Green'), + (3, 'Red')], + ) + >>> idx1.union(idx2, sort=False) + MultiIndex([(1, 'Red'), + (1, 'Blue'), + (2, 'Red'), + (2, 'Blue'), + (3, 'Red'), + (3, 'Green'), + (2, 'Green')], + ) + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other, result_name = self._convert_can_do_setop(other) + + if not is_dtype_equal(self.dtype, other.dtype): + if ( + isinstance(self, ABCMultiIndex) + and not is_object_dtype(unpack_nested_dtype(other)) + and len(other) > 0 + ): + raise NotImplementedError( + "Can only union MultiIndex with MultiIndex or Index of tuples, " + "try mi.to_flat_index().union(other) instead." + ) + if ( + isinstance(self, ABCDatetimeIndex) + and isinstance(other, ABCDatetimeIndex) + and self.tz is not None + and other.tz is not None + ): + # GH#39328 + warnings.warn( + "In a future version, the union of DatetimeIndex objects " + "with mismatched timezones will cast both to UTC instead of " + "object dtype. To retain the old behavior, " + "use `index.astype(object).union(other)`", + FutureWarning, + stacklevel=find_stack_level(), + ) + + dtype = self._find_common_type_compat(other) + left = self.astype(dtype, copy=False) + right = other.astype(dtype, copy=False) + return left.union(right, sort=sort) + + elif not len(other) or self.equals(other): + # NB: whether this (and the `if not len(self)` check below) come before + # or after the is_dtype_equal check above affects the returned dtype + return self._get_reconciled_name_object(other) + + elif not len(self): + return other._get_reconciled_name_object(self) + + result = self._union(other, sort=sort) + + return self._wrap_setop_result(other, result) + + def _union(self, other: Index, sort): + """ + Specific union logic should go here. In subclasses, union behavior + should be overwritten here rather than in `self.union`. + + Parameters + ---------- + other : Index or array-like + sort : False or None, default False + Whether to sort the resulting index. + + * False : do not sort the result. + * None : sort the result, except when `self` and `other` are equal + or when the values cannot be compared. + + Returns + ------- + Index + """ + lvals = self._values + rvals = other._values + + if ( + sort is None + and self.is_monotonic + and other.is_monotonic + and not (self.has_duplicates and other.has_duplicates) + and self._can_use_libjoin + ): + # Both are monotonic and at least one is unique, so can use outer join + # (actually don't need either unique, but without this restriction + # test_union_same_value_duplicated_in_both fails) + try: + return self._outer_indexer(other)[0] + except (TypeError, IncompatibleFrequency): + # incomparable objects; should only be for object dtype + value_list = list(lvals) + + # worth making this faster? a very unusual case + value_set = set(lvals) + value_list.extend([x for x in rvals if x not in value_set]) + # If objects are unorderable, we must have object dtype. + return np.array(value_list, dtype=object) + + elif not other.is_unique: + # other has duplicates + result = algos.union_with_duplicates(lvals, rvals) + return _maybe_try_sort(result, sort) + + # Self may have duplicates; other already checked as unique + # find indexes of things in "other" that are not in "self" + if self._index_as_unique: + indexer = self.get_indexer(other) + missing = (indexer == -1).nonzero()[0] + else: + missing = algos.unique1d(self.get_indexer_non_unique(other)[1]) + + if len(missing) > 0: + other_diff = rvals.take(missing) + result = concat_compat((lvals, other_diff)) + else: + result = lvals + + if not self.is_monotonic or not other.is_monotonic: + # if both are monotonic then result should already be sorted + result = _maybe_try_sort(result, sort) + + return result + + @final + def _wrap_setop_result(self, other: Index, result) -> Index: + name = get_op_result_name(self, other) + if isinstance(result, Index): + if result.name != name: + result = result.rename(name) + else: + result = self._shallow_copy(result, name=name) + + if type(self) is Index and self.dtype != _dtype_obj: + # i.e. ExtensionArray-backed + # TODO(ExtensionIndex): revert this astype; it is a kludge to make + # it possible to split ExtensionEngine from ExtensionIndex PR. + return result.astype(self.dtype, copy=False) + return result + + @final + def intersection(self, other, sort=False): + """ + Form the intersection of two Index objects. + + This returns a new Index with elements common to the index and `other`. + + Parameters + ---------- + other : Index or array-like + sort : False or None, default False + Whether to sort the resulting index. + + * False : do not sort the result. + * None : sort the result, except when `self` and `other` are equal + or when the values cannot be compared. + + Returns + ------- + intersection : Index + + Examples + -------- + >>> idx1 = pd.Index([1, 2, 3, 4]) + >>> idx2 = pd.Index([3, 4, 5, 6]) + >>> idx1.intersection(idx2) + Int64Index([3, 4], dtype='int64') + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other, result_name = self._convert_can_do_setop(other) + + if self.equals(other): + if self.has_duplicates: + return self.unique()._get_reconciled_name_object(other) + return self._get_reconciled_name_object(other) + + if len(self) == 0 or len(other) == 0: + # fastpath; we need to be careful about having commutativity + + if self._is_multi or other._is_multi: + # _convert_can_do_setop ensures that we have both or neither + # We retain self.levels + return self[:0].rename(result_name) + + dtype = self._find_common_type_compat(other) + if is_dtype_equal(self.dtype, dtype): + # Slicing allows us to retain DTI/TDI.freq, RangeIndex + + # Note: self[:0] vs other[:0] affects + # 1) which index's `freq` we get in DTI/TDI cases + # This may be a historical artifact, i.e. no documented + # reason for this choice. + # 2) The `step` we get in RangeIndex cases + if len(self) == 0: + return self[:0].rename(result_name) + else: + return other[:0].rename(result_name) + + return Index([], dtype=dtype, name=result_name) + + elif not self._should_compare(other): + # We can infer that the intersection is empty. + if isinstance(self, ABCMultiIndex): + return self[:0].rename(result_name) + return Index([], name=result_name) + + elif not is_dtype_equal(self.dtype, other.dtype): + dtype = self._find_common_type_compat(other) + this = self.astype(dtype, copy=False) + other = other.astype(dtype, copy=False) + return this.intersection(other, sort=sort) + + result = self._intersection(other, sort=sort) + return self._wrap_intersection_result(other, result) + + def _intersection(self, other: Index, sort=False): + """ + intersection specialized to the case with matching dtypes. + """ + if self.is_monotonic and other.is_monotonic and self._can_use_libjoin: + try: + result = self._inner_indexer(other)[0] + except TypeError: + # non-comparable; should only be for object dtype + pass + else: + # TODO: algos.unique1d should preserve DTA/TDA + res = algos.unique1d(result) + return ensure_wrapped_if_datetimelike(res) + + res_values = self._intersection_via_get_indexer(other, sort=sort) + res_values = _maybe_try_sort(res_values, sort) + return res_values + + def _wrap_intersection_result(self, other, result): + # We will override for MultiIndex to handle empty results + return self._wrap_setop_result(other, result) + + @final + def _intersection_via_get_indexer(self, other: Index, sort) -> ArrayLike: + """ + Find the intersection of two Indexes using get_indexer. + + Returns + ------- + np.ndarray or ExtensionArray + The returned array will be unique. + """ + left_unique = self.unique() + right_unique = other.unique() + + # even though we are unique, we need get_indexer_for for IntervalIndex + indexer = left_unique.get_indexer_for(right_unique) + + mask = indexer != -1 + + taker = indexer.take(mask.nonzero()[0]) + if sort is False: + # sort bc we want the elements in the same order they are in self + # unnecessary in the case with sort=None bc we will sort later + taker = np.sort(taker) + + result = left_unique.take(taker)._values + return result + + @final + def difference(self, other, sort=None): + """ + Return a new Index with elements of index not in `other`. + + This is the set difference of two Index objects. + + Parameters + ---------- + other : Index or array-like + sort : False or None, default None + Whether to sort the resulting index. By default, the + values are attempted to be sorted, but any TypeError from + incomparable elements is caught by pandas. + + * None : Attempt to sort the result, but catch any TypeErrors + from comparing incomparable elements. + * False : Do not sort the result. + + Returns + ------- + difference : Index + + Examples + -------- + >>> idx1 = pd.Index([2, 1, 3, 4]) + >>> idx2 = pd.Index([3, 4, 5, 6]) + >>> idx1.difference(idx2) + Int64Index([1, 2], dtype='int64') + >>> idx1.difference(idx2, sort=False) + Int64Index([2, 1], dtype='int64') + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other, result_name = self._convert_can_do_setop(other) + + if self.equals(other): + # Note: we do not (yet) sort even if sort=None GH#24959 + return self[:0].rename(result_name) + + if len(other) == 0: + # Note: we do not (yet) sort even if sort=None GH#24959 + return self.rename(result_name) + + if not self._should_compare(other): + # Nothing matches -> difference is everything + return self.rename(result_name) + + result = self._difference(other, sort=sort) + return self._wrap_difference_result(other, result) + + def _difference(self, other, sort): + # overridden by RangeIndex + + this = self.unique() + + indexer = this.get_indexer_for(other) + indexer = indexer.take((indexer != -1).nonzero()[0]) + + label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) + the_diff = this._values.take(label_diff) + the_diff = _maybe_try_sort(the_diff, sort) + + return the_diff + + def _wrap_difference_result(self, other, result): + # We will override for MultiIndex to handle empty results + return self._wrap_setop_result(other, result) + + def symmetric_difference(self, other, result_name=None, sort=None): + """ + Compute the symmetric difference of two Index objects. + + Parameters + ---------- + other : Index or array-like + result_name : str + sort : False or None, default None + Whether to sort the resulting index. By default, the + values are attempted to be sorted, but any TypeError from + incomparable elements is caught by pandas. + + * None : Attempt to sort the result, but catch any TypeErrors + from comparing incomparable elements. + * False : Do not sort the result. + + Returns + ------- + symmetric_difference : Index + + Notes + ----- + ``symmetric_difference`` contains elements that appear in either + ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by + ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates + dropped. + + Examples + -------- + >>> idx1 = pd.Index([1, 2, 3, 4]) + >>> idx2 = pd.Index([2, 3, 4, 5]) + >>> idx1.symmetric_difference(idx2) + Int64Index([1, 5], dtype='int64') + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other, result_name_update = self._convert_can_do_setop(other) + if result_name is None: + result_name = result_name_update + + if not self._should_compare(other): + return self.union(other, sort=sort).rename(result_name) + + elif not is_dtype_equal(self.dtype, other.dtype): + dtype = self._find_common_type_compat(other) + this = self.astype(dtype, copy=False) + that = other.astype(dtype, copy=False) + return this.symmetric_difference(that, sort=sort).rename(result_name) + + this = self.unique() + other = other.unique() + indexer = this.get_indexer_for(other) + + # {this} minus {other} + common_indexer = indexer.take((indexer != -1).nonzero()[0]) + left_indexer = np.setdiff1d( + np.arange(this.size), common_indexer, assume_unique=True + ) + left_diff = this._values.take(left_indexer) + + # {other} minus {this} + right_indexer = (indexer == -1).nonzero()[0] + right_diff = other._values.take(right_indexer) + + res_values = concat_compat([left_diff, right_diff]) + res_values = _maybe_try_sort(res_values, sort) + + # pass dtype so we retain object dtype + result = Index(res_values, name=result_name, dtype=res_values.dtype) + + if self._is_multi: + self = cast("MultiIndex", self) + if len(result) == 0: + # On equal symmetric_difference MultiIndexes the difference is empty. + # Therefore, an empty MultiIndex is returned GH#13490 + return type(self)( + levels=[[] for _ in range(self.nlevels)], + codes=[[] for _ in range(self.nlevels)], + names=result.name, + ) + return type(self).from_tuples(result, names=result.name) + + return result + + @final + def _assert_can_do_setop(self, other) -> bool: + if not is_list_like(other): + raise TypeError("Input must be Index or array-like") + return True + + def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]: + if not isinstance(other, Index): + # TODO(2.0): no need to special-case here once _with_infer + # deprecation is enforced + if hasattr(other, "dtype"): + other = Index(other, name=self.name, dtype=other.dtype) + else: + # e.g. list + other = Index(other, name=self.name) + result_name = self.name + else: + result_name = get_op_result_name(self, other) + return other, result_name + + # -------------------------------------------------------------------- + # Indexing Methods + + def get_loc(self, key, method=None, tolerance=None): + """ + Get integer location, slice or boolean mask for requested label. + + Parameters + ---------- + key : label + method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional + * default: exact matches only. + * pad / ffill: find the PREVIOUS index value if no exact match. + * backfill / bfill: use NEXT index value if no exact match + * nearest: use the NEAREST index value if no exact match. Tied + distances are broken by preferring the larger index value. + tolerance : int or float, optional + Maximum distance from index value for inexact matches. The value of + the index at the matching location must satisfy the equation + ``abs(index[loc] - key) <= tolerance``. + + Returns + ------- + loc : int if unique index, slice if monotonic index, else mask + + Examples + -------- + >>> unique_index = pd.Index(list('abc')) + >>> unique_index.get_loc('b') + 1 + + >>> monotonic_index = pd.Index(list('abbc')) + >>> monotonic_index.get_loc('b') + slice(1, 3, None) + + >>> non_monotonic_index = pd.Index(list('abcb')) + >>> non_monotonic_index.get_loc('b') + array([False, True, False, True]) + """ + if method is None: + if tolerance is not None: + raise ValueError( + "tolerance argument only valid if using pad, " + "backfill or nearest lookups" + ) + casted_key = self._maybe_cast_indexer(key) + try: + return self._engine.get_loc(casted_key) + except KeyError as err: + raise KeyError(key) from err + except TypeError: + # If we have a listlike key, _check_indexing_error will raise + # InvalidIndexError. Otherwise we fall through and re-raise + # the TypeError. + self._check_indexing_error(key) + raise + + # GH#42269 + warnings.warn( + f"Passing method to {type(self).__name__}.get_loc is deprecated " + "and will raise in a future version. Use " + "index.get_indexer([item], method=...) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if is_scalar(key) and isna(key) and not self.hasnans: + raise KeyError(key) + + if tolerance is not None: + tolerance = self._convert_tolerance(tolerance, np.asarray(key)) + + indexer = self.get_indexer([key], method=method, tolerance=tolerance) + if indexer.ndim > 1 or indexer.size > 1: + raise TypeError("get_loc requires scalar valued input") + loc = indexer.item() + if loc == -1: + raise KeyError(key) + return loc + + _index_shared_docs[ + "get_indexer" + ] = """ + Compute indexer and mask for new index given the current index. The + indexer should be then used as an input to ndarray.take to align the + current data to the new index. + + Parameters + ---------- + target : %(target_klass)s + method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional + * default: exact matches only. + * pad / ffill: find the PREVIOUS index value if no exact match. + * backfill / bfill: use NEXT index value if no exact match + * nearest: use the NEAREST index value if no exact match. Tied + distances are broken by preferring the larger index value. + limit : int, optional + Maximum number of consecutive labels in ``target`` to match for + inexact matches. + tolerance : optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations must + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + + Returns + ------- + indexer : np.ndarray[np.intp] + Integers from 0 to n - 1 indicating that the index at these + positions matches the corresponding target values. Missing values + in the target are marked by -1. + %(raises_section)s + Notes + ----- + Returns -1 for unmatched values, for further explanation see the + example below. + + Examples + -------- + >>> index = pd.Index(['c', 'a', 'b']) + >>> index.get_indexer(['a', 'b', 'x']) + array([ 1, 2, -1]) + + Notice that the return value is an array of locations in ``index`` + and ``x`` is marked by -1, as it is not in ``index``. + """ + + @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) + @final + def get_indexer( + self, + target, + method: str_t | None = None, + limit: int | None = None, + tolerance=None, + ) -> npt.NDArray[np.intp]: + method = missing.clean_reindex_fill_method(method) + target = self._maybe_cast_listlike_indexer(target) + + self._check_indexing_method(method, limit, tolerance) + + if not self._index_as_unique: + raise InvalidIndexError(self._requires_unique_msg) + + if len(target) == 0: + return np.array([], dtype=np.intp) + + if not self._should_compare(target) and not self._should_partial_index(target): + # IntervalIndex get special treatment bc numeric scalars can be + # matched to Interval scalars + return self._get_indexer_non_comparable(target, method=method, unique=True) + + if is_categorical_dtype(self.dtype): + # _maybe_cast_listlike_indexer ensures target has our dtype + # (could improve perf by doing _should_compare check earlier?) + assert is_dtype_equal(self.dtype, target.dtype) + + indexer = self._engine.get_indexer(target.codes) + if self.hasnans and target.hasnans: + loc = self.get_loc(np.nan) + mask = target.isna() + indexer[mask] = loc + return indexer + + if is_categorical_dtype(target.dtype): + # potential fastpath + # get an indexer for unique categories then propagate to codes via take_nd + # get_indexer instead of _get_indexer needed for MultiIndex cases + # e.g. test_append_different_columns_types + categories_indexer = self.get_indexer(target.categories) + + indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1) + + if (not self._is_multi and self.hasnans) and target.hasnans: + # Exclude MultiIndex because hasnans raises NotImplementedError + # we should only get here if we are unique, so loc is an integer + # GH#41934 + loc = self.get_loc(np.nan) + mask = target.isna() + indexer[mask] = loc + + return ensure_platform_int(indexer) + + pself, ptarget = self._maybe_promote(target) + if pself is not self or ptarget is not target: + return pself.get_indexer( + ptarget, method=method, limit=limit, tolerance=tolerance + ) + + if is_dtype_equal(self.dtype, target.dtype) and self.equals(target): + # Only call equals if we have same dtype to avoid inference/casting + return np.arange(len(target), dtype=np.intp) + + if not is_dtype_equal(self.dtype, target.dtype) and not is_interval_dtype( + self.dtype + ): + # IntervalIndex gets special treatment for partial-indexing + dtype = self._find_common_type_compat(target) + + this = self.astype(dtype, copy=False) + target = target.astype(dtype, copy=False) + return this._get_indexer( + target, method=method, limit=limit, tolerance=tolerance + ) + + return self._get_indexer(target, method, limit, tolerance) + + def _get_indexer( + self, + target: Index, + method: str_t | None = None, + limit: int | None = None, + tolerance=None, + ) -> npt.NDArray[np.intp]: + if tolerance is not None: + tolerance = self._convert_tolerance(tolerance, target) + + if method in ["pad", "backfill"]: + indexer = self._get_fill_indexer(target, method, limit, tolerance) + elif method == "nearest": + indexer = self._get_nearest_indexer(target, limit, tolerance) + else: + tgt_values = target._get_engine_target() + if target._is_multi and self._is_multi: + engine = self._engine + # error: "IndexEngine" has no attribute "_extract_level_codes" + tgt_values = engine._extract_level_codes( # type: ignore[attr-defined] + target + ) + + indexer = self._engine.get_indexer(tgt_values) + + return ensure_platform_int(indexer) + + @final + def _should_partial_index(self, target: Index) -> bool: + """ + Should we attempt partial-matching indexing? + """ + if is_interval_dtype(self.dtype): + # "Index" has no attribute "left" + return self.left._should_compare(target) # type: ignore[attr-defined] + return False + + @final + def _check_indexing_method( + self, + method: str_t | None, + limit: int | None = None, + tolerance=None, + ) -> None: + """ + Raise if we have a get_indexer `method` that is not supported or valid. + """ + if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]: + # in practice the clean_reindex_fill_method call would raise + # before we get here + raise ValueError("Invalid fill method") # pragma: no cover + + if self._is_multi: + if method == "nearest": + raise NotImplementedError( + "method='nearest' not implemented yet " + "for MultiIndex; see GitHub issue 9365" + ) + elif method == "pad" or method == "backfill": + if tolerance is not None: + raise NotImplementedError( + "tolerance not implemented yet for MultiIndex" + ) + + if is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype): + # GH#37871 for now this is only for IntervalIndex and CategoricalIndex + if method is not None: + raise NotImplementedError( + f"method {method} not yet implemented for {type(self).__name__}" + ) + + if method is None: + if tolerance is not None: + raise ValueError( + "tolerance argument only valid if doing pad, " + "backfill or nearest reindexing" + ) + if limit is not None: + raise ValueError( + "limit argument only valid if doing pad, " + "backfill or nearest reindexing" + ) + + def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray: + # override this method on subclasses + tolerance = np.asarray(tolerance) + if target.size != tolerance.size and tolerance.size > 1: + raise ValueError("list-like tolerance size must match target index size") + return tolerance + + @final + def _get_fill_indexer( + self, target: Index, method: str_t, limit: int | None = None, tolerance=None + ) -> npt.NDArray[np.intp]: + + if self._is_multi: + # TODO: get_indexer_with_fill docstring says values must be _sorted_ + # but that doesn't appear to be enforced + # error: "IndexEngine" has no attribute "get_indexer_with_fill" + return self._engine.get_indexer_with_fill( # type: ignore[attr-defined] + target=target._values, values=self._values, method=method, limit=limit + ) + + if self.is_monotonic_increasing and target.is_monotonic_increasing: + target_values = target._get_engine_target() + own_values = self._get_engine_target() + + if method == "pad": + indexer = libalgos.pad(own_values, target_values, limit=limit) + else: + # i.e. "backfill" + indexer = libalgos.backfill(own_values, target_values, limit=limit) + else: + indexer = self._get_fill_indexer_searchsorted(target, method, limit) + if tolerance is not None and len(self): + indexer = self._filter_indexer_tolerance(target, indexer, tolerance) + return indexer + + @final + def _get_fill_indexer_searchsorted( + self, target: Index, method: str_t, limit: int | None = None + ) -> npt.NDArray[np.intp]: + """ + Fallback pad/backfill get_indexer that works for monotonic decreasing + indexes and non-monotonic targets. + """ + if limit is not None: + raise ValueError( + f"limit argument for {repr(method)} method only well-defined " + "if index and target are monotonic" + ) + + side: Literal["left", "right"] = "left" if method == "pad" else "right" + + # find exact matches first (this simplifies the algorithm) + indexer = self.get_indexer(target) + nonexact = indexer == -1 + indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side) + if side == "left": + # searchsorted returns "indices into a sorted array such that, + # if the corresponding elements in v were inserted before the + # indices, the order of a would be preserved". + # Thus, we need to subtract 1 to find values to the left. + indexer[nonexact] -= 1 + # This also mapped not found values (values of 0 from + # np.searchsorted) to -1, which conveniently is also our + # sentinel for missing values + else: + # Mark indices to the right of the largest value as not found + indexer[indexer == len(self)] = -1 + return indexer + + @final + def _get_nearest_indexer( + self, target: Index, limit: int | None, tolerance + ) -> npt.NDArray[np.intp]: + """ + Get the indexer for the nearest index labels; requires an index with + values that can be subtracted from each other (e.g., not strings or + tuples). + """ + if not len(self): + return self._get_fill_indexer(target, "pad") + + left_indexer = self.get_indexer(target, "pad", limit=limit) + right_indexer = self.get_indexer(target, "backfill", limit=limit) + + left_distances = self._difference_compat(target, left_indexer) + right_distances = self._difference_compat(target, right_indexer) + + op = operator.lt if self.is_monotonic_increasing else operator.le + indexer = np.where( + op(left_distances, right_distances) | (right_indexer == -1), + left_indexer, + right_indexer, + ) + if tolerance is not None: + indexer = self._filter_indexer_tolerance(target, indexer, tolerance) + return indexer + + @final + def _filter_indexer_tolerance( + self, + target: Index, + indexer: npt.NDArray[np.intp], + tolerance, + ) -> npt.NDArray[np.intp]: + + distance = self._difference_compat(target, indexer) + + return np.where(distance <= tolerance, indexer, -1) + + @final + def _difference_compat( + self, target: Index, indexer: npt.NDArray[np.intp] + ) -> ArrayLike: + # Compatibility for PeriodArray, for which __sub__ returns an ndarray[object] + # of DateOffset objects, which do not support __abs__ (and would be slow + # if they did) + + if isinstance(self.dtype, PeriodDtype): + # Note: we only get here with matching dtypes + own_values = cast("PeriodArray", self._data)._ndarray + target_values = cast("PeriodArray", target._data)._ndarray + diff = own_values[indexer] - target_values + else: + # error: Unsupported left operand type for - ("ExtensionArray") + diff = self._values[indexer] - target._values # type: ignore[operator] + return abs(diff) + + # -------------------------------------------------------------------- + # Indexer Conversion Methods + + @final + def _validate_positional_slice(self, key: slice) -> None: + """ + For positional indexing, a slice must have either int or None + for each of start, stop, and step. + """ + self._validate_indexer("positional", key.start, "iloc") + self._validate_indexer("positional", key.stop, "iloc") + self._validate_indexer("positional", key.step, "iloc") + + def _convert_slice_indexer(self, key: slice, kind: str_t): + """ + Convert a slice indexer. + + By definition, these are labels unless 'iloc' is passed in. + Floats are not allowed as the start, step, or stop of the slice. + + Parameters + ---------- + key : label of the slice bound + kind : {'loc', 'getitem'} + """ + assert kind in ["loc", "getitem"], kind + + # potentially cast the bounds to integers + start, stop, step = key.start, key.stop, key.step + + # figure out if this is a positional indexer + def is_int(v): + return v is None or is_integer(v) + + is_index_slice = is_int(start) and is_int(stop) and is_int(step) + is_positional = is_index_slice and not ( + self.is_integer() or self.is_categorical() + ) + + if kind == "getitem": + """ + called from the getitem slicers, validate that we are in fact + integers + """ + if self.is_integer() or is_index_slice: + self._validate_indexer("slice", key.start, "getitem") + self._validate_indexer("slice", key.stop, "getitem") + self._validate_indexer("slice", key.step, "getitem") + return key + + # convert the slice to an indexer here + + # if we are mixed and have integers + if is_positional: + try: + # Validate start & stop + if start is not None: + self.get_loc(start) + if stop is not None: + self.get_loc(stop) + is_positional = False + except KeyError: + pass + + if com.is_null_slice(key): + # It doesn't matter if we are positional or label based + indexer = key + elif is_positional: + if kind == "loc": + # GH#16121, GH#24612, GH#31810 + warnings.warn( + "Slicing a positional slice with .loc is not supported, " + "and will raise TypeError in a future version. " + "Use .loc with labels or .iloc with positions instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + indexer = key + else: + indexer = self.slice_indexer(start, stop, step) + + return indexer + + @final + def _invalid_indexer(self, form: str_t, key) -> TypeError: + """ + Consistent invalid indexer message. + """ + return TypeError( + f"cannot do {form} indexing on {type(self).__name__} with these " + f"indexers [{key}] of type {type(key).__name__}" + ) + + # -------------------------------------------------------------------- + # Reindex Methods + + @final + def _validate_can_reindex(self, indexer: np.ndarray) -> None: + """ + Check if we are allowing reindexing with this particular indexer. + + Parameters + ---------- + indexer : an integer ndarray + + Raises + ------ + ValueError if its a duplicate axis + """ + # trying to reindex on an axis with duplicates + if not self._index_as_unique and len(indexer): + raise ValueError("cannot reindex on an axis with duplicate labels") + + def reindex( + self, target, method=None, level=None, limit=None, tolerance=None + ) -> tuple[Index, npt.NDArray[np.intp] | None]: + """ + Create index with target's values. + + Parameters + ---------- + target : an iterable + method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional + * default: exact matches only. + * pad / ffill: find the PREVIOUS index value if no exact match. + * backfill / bfill: use NEXT index value if no exact match + * nearest: use the NEAREST index value if no exact match. Tied + distances are broken by preferring the larger index value. + level : int, optional + Level of multiindex. + limit : int, optional + Maximum number of consecutive labels in ``target`` to match for + inexact matches. + tolerance : int or float, optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations must + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + + Returns + ------- + new_index : pd.Index + Resulting index. + indexer : np.ndarray[np.intp] or None + Indices of output values in original index. + + Raises + ------ + TypeError + If ``method`` passed along with ``level``. + ValueError + If non-unique multi-index + ValueError + If non-unique index and ``method`` or ``limit`` passed. + + See Also + -------- + Series.reindex + DataFrame.reindex + + Examples + -------- + >>> idx = pd.Index(['car', 'bike', 'train', 'tractor']) + >>> idx + Index(['car', 'bike', 'train', 'tractor'], dtype='object') + >>> idx.reindex(['car', 'bike']) + (Index(['car', 'bike'], dtype='object'), array([0, 1])) + """ + # GH6552: preserve names when reindexing to non-named target + # (i.e. neither Index nor Series). + preserve_names = not hasattr(target, "name") + + # GH7774: preserve dtype/tz if target is empty and not an Index. + target = ensure_has_len(target) # target may be an iterator + + if not isinstance(target, Index) and len(target) == 0: + if level is not None and self._is_multi: + # "Index" has no attribute "levels"; maybe "nlevels"? + idx = self.levels[level] # type: ignore[attr-defined] + else: + idx = self + target = idx[:0] + else: + target = ensure_index(target) + + if level is not None: + if method is not None: + raise TypeError("Fill method not supported if level passed") + + # TODO: tests where passing `keep_order=not self._is_multi` + # makes a difference for non-MultiIndex case + target, indexer, _ = self._join_level( + target, level, how="right", keep_order=not self._is_multi + ) + + else: + if self.equals(target): + indexer = None + else: + if self._index_as_unique: + indexer = self.get_indexer( + target, method=method, limit=limit, tolerance=tolerance + ) + elif self._is_multi: + raise ValueError("cannot handle a non-unique multi-index!") + else: + if method is not None or limit is not None: + raise ValueError( + "cannot reindex a non-unique index " + "with a method or limit" + ) + indexer, _ = self.get_indexer_non_unique(target) + + if not self.is_unique: + # GH#42568 + warnings.warn( + "reindexing with a non-unique Index is deprecated and " + "will raise in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + target = self._wrap_reindex_result(target, indexer, preserve_names) + return target, indexer + + def _wrap_reindex_result(self, target, indexer, preserve_names: bool): + target = self._maybe_preserve_names(target, preserve_names) + return target + + def _maybe_preserve_names(self, target: Index, preserve_names: bool): + if preserve_names and target.nlevels == 1 and target.name != self.name: + target = target.copy(deep=False) + target.name = self.name + return target + + @final + def _reindex_non_unique( + self, target: Index + ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]: + """ + Create a new index with target's values (move/add/delete values as + necessary) use with non-unique Index and a possibly non-unique target. + + Parameters + ---------- + target : an iterable + + Returns + ------- + new_index : pd.Index + Resulting index. + indexer : np.ndarray[np.intp] + Indices of output values in original index. + new_indexer : np.ndarray[np.intp] or None + + """ + target = ensure_index(target) + if len(target) == 0: + # GH#13691 + return self[:0], np.array([], dtype=np.intp), None + + indexer, missing = self.get_indexer_non_unique(target) + check = indexer != -1 + new_labels = self.take(indexer[check]) + new_indexer = None + + if len(missing): + length = np.arange(len(indexer), dtype=np.intp) + + missing = ensure_platform_int(missing) + missing_labels = target.take(missing) + missing_indexer = length[~check] + cur_labels = self.take(indexer[check]).values + cur_indexer = length[check] + + # Index constructor below will do inference + new_labels = np.empty((len(indexer),), dtype=object) + new_labels[cur_indexer] = cur_labels + new_labels[missing_indexer] = missing_labels + + # GH#38906 + if not len(self): + + new_indexer = np.arange(0, dtype=np.intp) + + # a unique indexer + elif target.is_unique: + + # see GH5553, make sure we use the right indexer + new_indexer = np.arange(len(indexer), dtype=np.intp) + new_indexer[cur_indexer] = np.arange(len(cur_labels)) + new_indexer[missing_indexer] = -1 + + # we have a non_unique selector, need to use the original + # indexer here + else: + + # need to retake to have the same size as the indexer + indexer[~check] = -1 + + # reset the new indexer to account for the new size + new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp) + new_indexer[~check] = -1 + + if isinstance(self, ABCMultiIndex): + new_index = type(self).from_tuples(new_labels, names=self.names) + else: + new_index = Index._with_infer(new_labels, name=self.name) + return new_index, indexer, new_indexer + + # -------------------------------------------------------------------- + # Join Methods + + @final + @_maybe_return_indexers + def join( + self, + other, + how: str_t = "left", + level=None, + return_indexers: bool = False, + sort: bool = False, + ): + """ + Compute join_index and indexers to conform data + structures to the new index. + + Parameters + ---------- + other : Index + how : {'left', 'right', 'inner', 'outer'} + level : int or level name, default None + return_indexers : bool, default False + sort : bool, default False + Sort the join keys lexicographically in the result Index. If False, + the order of the join keys depends on the join type (how keyword). + + Returns + ------- + join_index, (left_indexer, right_indexer) + """ + other = ensure_index(other) + + if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex): + if (self.tz is None) ^ (other.tz is None): + # Raise instead of casting to object below. + raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") + + if not self._is_multi and not other._is_multi: + # We have specific handling for MultiIndex below + pself, pother = self._maybe_promote(other) + if pself is not self or pother is not other: + return pself.join( + pother, how=how, level=level, return_indexers=True, sort=sort + ) + + lindexer: np.ndarray | None + rindexer: np.ndarray | None + + # try to figure out the join level + # GH3662 + if level is None and (self._is_multi or other._is_multi): + + # have the same levels/names so a simple join + if self.names == other.names: + pass + else: + return self._join_multi(other, how=how) + + # join on the level + if level is not None and (self._is_multi or other._is_multi): + return self._join_level(other, level, how=how) + + if len(other) == 0 and how in ("left", "outer"): + join_index = self._view() + rindexer = np.repeat(np.intp(-1), len(join_index)) + return join_index, None, rindexer + + if len(self) == 0 and how in ("right", "outer"): + join_index = other._view() + lindexer = np.repeat(np.intp(-1), len(join_index)) + return join_index, lindexer, None + + if self._join_precedence < other._join_precedence: + how = {"right": "left", "left": "right"}.get(how, how) + join_index, lidx, ridx = other.join( + self, how=how, level=level, return_indexers=True + ) + lidx, ridx = ridx, lidx + return join_index, lidx, ridx + + if not is_dtype_equal(self.dtype, other.dtype): + dtype = self._find_common_type_compat(other) + this = self.astype(dtype, copy=False) + other = other.astype(dtype, copy=False) + return this.join(other, how=how, return_indexers=True) + + _validate_join_method(how) + + if not self.is_unique and not other.is_unique: + return self._join_non_unique(other, how=how) + elif not self.is_unique or not other.is_unique: + if self.is_monotonic and other.is_monotonic: + if self._can_use_libjoin: + # otherwise we will fall through to _join_via_get_indexer + return self._join_monotonic(other, how=how) + else: + return self._join_non_unique(other, how=how) + elif ( + self.is_monotonic + and other.is_monotonic + and self._can_use_libjoin + and ( + not isinstance(self, ABCMultiIndex) + or not any(is_categorical_dtype(dtype) for dtype in self.dtypes) + ) + ): + # Categorical is monotonic if data are ordered as categories, but join can + # not handle this in case of not lexicographically monotonic GH#38502 + try: + return self._join_monotonic(other, how=how) + except TypeError: + # object dtype; non-comparable objects + pass + + return self._join_via_get_indexer(other, how, sort) + + @final + def _join_via_get_indexer( + self, other: Index, how: str_t, sort: bool + ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + # Fallback if we do not have any fastpaths available based on + # uniqueness/monotonicity + + # Note: at this point we have checked matching dtypes + + if how == "left": + join_index = self + elif how == "right": + join_index = other + elif how == "inner": + # TODO: sort=False here for backwards compat. It may + # be better to use the sort parameter passed into join + join_index = self.intersection(other, sort=False) + elif how == "outer": + # TODO: sort=True here for backwards compat. It may + # be better to use the sort parameter passed into join + join_index = self.union(other) + + if sort: + join_index = join_index.sort_values() + + if join_index is self: + lindexer = None + else: + lindexer = self.get_indexer_for(join_index) + if join_index is other: + rindexer = None + else: + rindexer = other.get_indexer_for(join_index) + return join_index, lindexer, rindexer + + @final + def _join_multi(self, other: Index, how: str_t): + from pandas.core.indexes.multi import MultiIndex + from pandas.core.reshape.merge import restore_dropped_levels_multijoin + + # figure out join names + self_names_list = list(com.not_none(*self.names)) + other_names_list = list(com.not_none(*other.names)) + self_names_order = self_names_list.index + other_names_order = other_names_list.index + self_names = set(self_names_list) + other_names = set(other_names_list) + overlap = self_names & other_names + + # need at least 1 in common + if not overlap: + raise ValueError("cannot join with no overlapping index names") + + if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): + + # Drop the non-matching levels from left and right respectively + ldrop_names = sorted(self_names - overlap, key=self_names_order) + rdrop_names = sorted(other_names - overlap, key=other_names_order) + + # if only the order differs + if not len(ldrop_names + rdrop_names): + self_jnlevels = self + other_jnlevels = other.reorder_levels(self.names) + else: + self_jnlevels = self.droplevel(ldrop_names) + other_jnlevels = other.droplevel(rdrop_names) + + # Join left and right + # Join on same leveled multi-index frames is supported + join_idx, lidx, ridx = self_jnlevels.join( + other_jnlevels, how, return_indexers=True + ) + + # Restore the dropped levels + # Returned index level order is + # common levels, ldrop_names, rdrop_names + dropped_names = ldrop_names + rdrop_names + + levels, codes, names = restore_dropped_levels_multijoin( + self, other, dropped_names, join_idx, lidx, ridx + ) + + # Re-create the multi-index + multi_join_idx = MultiIndex( + levels=levels, codes=codes, names=names, verify_integrity=False + ) + + multi_join_idx = multi_join_idx.remove_unused_levels() + + return multi_join_idx, lidx, ridx + + jl = list(overlap)[0] + + # Case where only one index is multi + # make the indices into mi's that match + flip_order = False + if isinstance(self, MultiIndex): + self, other = other, self + flip_order = True + # flip if join method is right or left + how = {"right": "left", "left": "right"}.get(how, how) + + level = other.names.index(jl) + result = self._join_level(other, level, how=how) + + if flip_order: + return result[0], result[2], result[1] + return result + + @final + def _join_non_unique( + self, other: Index, how: str_t = "left" + ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]: + from pandas.core.reshape.merge import get_join_indexers + + # We only get here if dtypes match + assert self.dtype == other.dtype + + left_idx, right_idx = get_join_indexers( + [self._values], [other._values], how=how, sort=True + ) + mask = left_idx == -1 + + join_array = self._values.take(left_idx) + right = other._values.take(right_idx) + + if isinstance(join_array, np.ndarray): + # Argument 3 to "putmask" has incompatible type "Union[ExtensionArray, + # ndarray[Any, Any]]"; expected "Union[_SupportsArray[dtype[Any]], + # _NestedSequence[_SupportsArray[dtype[Any]]], bool, int, f + # loat, complex, str, bytes, _NestedSequence[Union[bool, int, float, + # complex, str, bytes]]]" [arg-type] + np.putmask(join_array, mask, right) # type: ignore[arg-type] + else: + join_array._putmask(mask, right) + + join_index = self._wrap_joined_index(join_array, other) + + return join_index, left_idx, right_idx + + @final + def _join_level( + self, other: Index, level, how: str_t = "left", keep_order: bool = True + ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + """ + The join method *only* affects the level of the resulting + MultiIndex. Otherwise it just exactly aligns the Index data to the + labels of the level in the MultiIndex. + + If ```keep_order == True```, the order of the data indexed by the + MultiIndex will not be changed; otherwise, it will tie out + with `other`. + """ + from pandas.core.indexes.multi import MultiIndex + + def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: + """ + Returns sorter for the inner most level while preserving the + order of higher levels. + + Parameters + ---------- + labels : list[np.ndarray] + Each ndarray has signed integer dtype, not necessarily identical. + + Returns + ------- + np.ndarray[np.intp] + """ + if labels[0].size == 0: + return np.empty(0, dtype=np.intp) + + if len(labels) == 1: + return get_group_index_sorter(ensure_platform_int(labels[0])) + + # find indexers of beginning of each set of + # same-key labels w.r.t all but last level + tic = labels[0][:-1] != labels[0][1:] + for lab in labels[1:-1]: + tic |= lab[:-1] != lab[1:] + + starts = np.hstack(([True], tic, [True])).nonzero()[0] + lab = ensure_int64(labels[-1]) + return lib.get_level_sorter(lab, ensure_platform_int(starts)) + + if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): + raise TypeError("Join on level between two MultiIndex objects is ambiguous") + + left, right = self, other + + flip_order = not isinstance(self, MultiIndex) + if flip_order: + left, right = right, left + how = {"right": "left", "left": "right"}.get(how, how) + + assert isinstance(left, MultiIndex) + + level = left._get_level_number(level) + old_level = left.levels[level] + + if not right.is_unique: + raise NotImplementedError( + "Index._join_level on non-unique index is not implemented" + ) + + new_level, left_lev_indexer, right_lev_indexer = old_level.join( + right, how=how, return_indexers=True + ) + + if left_lev_indexer is None: + if keep_order or len(left) == 0: + left_indexer = None + join_index = left + else: # sort the leaves + left_indexer = _get_leaf_sorter(left.codes[: level + 1]) + join_index = left[left_indexer] + + else: + left_lev_indexer = ensure_platform_int(left_lev_indexer) + rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level)) + old_codes = left.codes[level] + + taker = old_codes[old_codes != -1] + new_lev_codes = rev_indexer.take(taker) + + new_codes = list(left.codes) + new_codes[level] = new_lev_codes + + new_levels = list(left.levels) + new_levels[level] = new_level + + if keep_order: # just drop missing values. o.w. keep order + left_indexer = np.arange(len(left), dtype=np.intp) + left_indexer = cast(np.ndarray, left_indexer) + mask = new_lev_codes != -1 + if not mask.all(): + new_codes = [lab[mask] for lab in new_codes] + left_indexer = left_indexer[mask] + + else: # tie out the order with other + if level == 0: # outer most level, take the fast route + max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max() + ngroups = 1 + max_new_lev + left_indexer, counts = libalgos.groupsort_indexer( + new_lev_codes, ngroups + ) + + # missing values are placed first; drop them! + left_indexer = left_indexer[counts[0] :] + new_codes = [lab[left_indexer] for lab in new_codes] + + else: # sort the leaves + mask = new_lev_codes != -1 + mask_all = mask.all() + if not mask_all: + new_codes = [lab[mask] for lab in new_codes] + + left_indexer = _get_leaf_sorter(new_codes[: level + 1]) + new_codes = [lab[left_indexer] for lab in new_codes] + + # left_indexers are w.r.t masked frame. + # reverse to original frame! + if not mask_all: + left_indexer = mask.nonzero()[0][left_indexer] + + join_index = MultiIndex( + levels=new_levels, + codes=new_codes, + names=left.names, + verify_integrity=False, + ) + + if right_lev_indexer is not None: + right_indexer = right_lev_indexer.take(join_index.codes[level]) + else: + right_indexer = join_index.codes[level] + + if flip_order: + left_indexer, right_indexer = right_indexer, left_indexer + + left_indexer = ( + None if left_indexer is None else ensure_platform_int(left_indexer) + ) + right_indexer = ( + None if right_indexer is None else ensure_platform_int(right_indexer) + ) + return join_index, left_indexer, right_indexer + + @final + def _join_monotonic( + self, other: Index, how: str_t = "left" + ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + # We only get here with matching dtypes and both monotonic increasing + assert other.dtype == self.dtype + + if self.equals(other): + ret_index = other if how == "right" else self + return ret_index, None, None + + ridx: np.ndarray | None + lidx: np.ndarray | None + + if self.is_unique and other.is_unique: + # We can perform much better than the general case + if how == "left": + join_index = self + lidx = None + ridx = self._left_indexer_unique(other) + elif how == "right": + join_index = other + lidx = other._left_indexer_unique(self) + ridx = None + elif how == "inner": + join_array, lidx, ridx = self._inner_indexer(other) + join_index = self._wrap_joined_index(join_array, other) + elif how == "outer": + join_array, lidx, ridx = self._outer_indexer(other) + join_index = self._wrap_joined_index(join_array, other) + else: + if how == "left": + join_array, lidx, ridx = self._left_indexer(other) + elif how == "right": + join_array, ridx, lidx = other._left_indexer(self) + elif how == "inner": + join_array, lidx, ridx = self._inner_indexer(other) + elif how == "outer": + join_array, lidx, ridx = self._outer_indexer(other) + + join_index = self._wrap_joined_index(join_array, other) + + lidx = None if lidx is None else ensure_platform_int(lidx) + ridx = None if ridx is None else ensure_platform_int(ridx) + return join_index, lidx, ridx + + def _wrap_joined_index(self: _IndexT, joined: ArrayLike, other: _IndexT) -> _IndexT: + assert other.dtype == self.dtype + + if isinstance(self, ABCMultiIndex): + name = self.names if self.names == other.names else None + # error: Incompatible return value type (got "MultiIndex", + # expected "_IndexT") + return self._constructor(joined, name=name) # type: ignore[return-value] + else: + name = get_op_result_name(self, other) + return self._constructor._with_infer(joined, name=name) + + @cache_readonly + def _can_use_libjoin(self) -> bool: + """ + Whether we can use the fastpaths implement in _libs.join + """ + # Note: this will need to be updated when e.g. Nullable dtypes + # are supported in Indexes. + return not is_interval_dtype(self.dtype) + + # -------------------------------------------------------------------- + # Uncategorized Methods + + @property + def values(self) -> ArrayLike: + """ + Return an array representing the data in the Index. + + .. warning:: + + We recommend using :attr:`Index.array` or + :meth:`Index.to_numpy`, depending on whether you need + a reference to the underlying data or a NumPy array. + + Returns + ------- + array: numpy.ndarray or ExtensionArray + + See Also + -------- + Index.array : Reference to the underlying data. + Index.to_numpy : A NumPy array representing the underlying data. + """ + return self._data + + # error: Decorated property not supported + # https://github.com/python/mypy/issues/1362 + @cache_readonly # type: ignore[misc] + @doc(IndexOpsMixin.array) + def array(self) -> ExtensionArray: + array = self._data + if isinstance(array, np.ndarray): + from pandas.core.arrays.numpy_ import PandasArray + + array = PandasArray(array) + return array + + @property + def _values(self) -> ExtensionArray | np.ndarray: + """ + The best array representation. + + This is an ndarray or ExtensionArray. + + ``_values`` are consistent between ``Series`` and ``Index``. + + It may differ from the public '.values' method. + + index | values | _values | + ----------------- | --------------- | ------------- | + Index | ndarray | ndarray | + CategoricalIndex | Categorical | Categorical | + DatetimeIndex | ndarray[M8ns] | DatetimeArray | + DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray | + PeriodIndex | ndarray[object] | PeriodArray | + IntervalIndex | IntervalArray | IntervalArray | + + See Also + -------- + values : Values + """ + return self._data + + def _get_engine_target(self) -> np.ndarray: + """ + Get the ndarray that we can pass to the IndexEngine constructor. + """ + # error: Incompatible return value type (got "Union[ExtensionArray, + # ndarray]", expected "ndarray") + if type(self) is Index and isinstance(self._values, ExtensionArray): + # TODO(ExtensionIndex): remove special-case, just use self._values + return self._values.astype(object) + return self._values # type: ignore[return-value] + + def _from_join_target(self, result: np.ndarray) -> ArrayLike: + """ + Cast the ndarray returned from one of the libjoin.foo_indexer functions + back to type(self)._data. + """ + return result + + @doc(IndexOpsMixin._memory_usage) + def memory_usage(self, deep: bool = False) -> int: + result = self._memory_usage(deep=deep) + + # include our engine hashtable + result += self._engine.sizeof(deep=deep) + return result + + @final + def where(self, cond, other=None) -> Index: + """ + Replace values where the condition is False. + + The replacement is taken from other. + + Parameters + ---------- + cond : bool array-like with the same length as self + Condition to select the values on. + other : scalar, or array-like, default None + Replacement if the condition is False. + + Returns + ------- + pandas.Index + A copy of self with values replaced from other + where the condition is False. + + See Also + -------- + Series.where : Same method for Series. + DataFrame.where : Same method for DataFrame. + + Examples + -------- + >>> idx = pd.Index(['car', 'bike', 'train', 'tractor']) + >>> idx + Index(['car', 'bike', 'train', 'tractor'], dtype='object') + >>> idx.where(idx.isin(['car', 'train']), 'other') + Index(['car', 'other', 'train', 'other'], dtype='object') + """ + if isinstance(self, ABCMultiIndex): + raise NotImplementedError( + ".where is not supported for MultiIndex operations" + ) + cond = np.asarray(cond, dtype=bool) + return self.putmask(~cond, other) + + # construction helpers + @final + @classmethod + def _scalar_data_error(cls, data): + # We return the TypeError so that we can raise it from the constructor + # in order to keep mypy happy + return TypeError( + f"{cls.__name__}(...) must be called with a collection of some " + f"kind, {repr(data)} was passed" + ) + + @final + @classmethod + def _string_data_error(cls, data): + raise TypeError( + "String dtype not supported, you may need " + "to explicitly cast to a numeric type" + ) + + def _validate_fill_value(self, value): + """ + Check if the value can be inserted into our array without casting, + and convert it to an appropriate native type if necessary. + + Raises + ------ + TypeError + If the value cannot be inserted into an array of this dtype. + """ + if not can_hold_element(self._values, value): + raise TypeError + return value + + @final + def _require_scalar(self, value): + """ + Check that this is a scalar value that we can use for setitem-like + operations without changing dtype. + """ + if not is_scalar(value): + raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}") + return value + + def _is_memory_usage_qualified(self) -> bool: + """ + Return a boolean if we need a qualified .info display. + """ + return self.is_object() + + def is_type_compatible(self, kind: str_t) -> bool: + """ + Whether the index type is compatible with the provided type. + """ + warnings.warn( + "Index.is_type_compatible is deprecated and will be removed in a " + "future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return kind == self.inferred_type + + def __contains__(self, key: Any) -> bool: + """ + Return a boolean indicating whether the provided key is in the index. + + Parameters + ---------- + key : label + The key to check if it is present in the index. + + Returns + ------- + bool + Whether the key search is in the index. + + Raises + ------ + TypeError + If the key is not hashable. + + See Also + -------- + Index.isin : Returns an ndarray of boolean dtype indicating whether the + list-like key is in the index. + + Examples + -------- + >>> idx = pd.Index([1, 2, 3, 4]) + >>> idx + Int64Index([1, 2, 3, 4], dtype='int64') + + >>> 2 in idx + True + >>> 6 in idx + False + """ + hash(key) + try: + return key in self._engine + except (OverflowError, TypeError, ValueError): + return False + + # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 + # Incompatible types in assignment (expression has type "None", base class + # "object" defined the type as "Callable[[object], int]") + __hash__: None # type: ignore[assignment] + + @final + def __setitem__(self, key, value): + raise TypeError("Index does not support mutable operations") + + def __getitem__(self, key): + """ + Override numpy.ndarray's __getitem__ method to work as desired. + + This function adds lists and Series as valid boolean indexers + (ndarrays only supports ndarray with dtype=bool). + + If resulting ndim != 1, plain ndarray is returned instead of + corresponding `Index` subclass. + + """ + getitem = self._data.__getitem__ + + if is_integer(key) or is_float(key): + # GH#44051 exclude bool, which would return a 2d ndarray + key = com.cast_scalar_indexer(key, warn_float=True) + return getitem(key) + + if isinstance(key, slice): + # This case is separated from the conditional above to avoid + # pessimization com.is_bool_indexer and ndim checks. + result = getitem(key) + # Going through simple_new for performance. + return type(self)._simple_new(result, name=self._name) + + if com.is_bool_indexer(key): + # if we have list[bools, length=1e5] then doing this check+convert + # takes 166 µs + 2.1 ms and cuts the ndarray.__getitem__ + # time below from 3.8 ms to 496 µs + # if we already have ndarray[bool], the overhead is 1.4 µs or .25% + key = np.asarray(key, dtype=bool) + + result = getitem(key) + # Because we ruled out integer above, we always get an arraylike here + if result.ndim > 1: + deprecate_ndim_indexing(result) + if hasattr(result, "_ndarray"): + # error: Item "ndarray[Any, Any]" of "Union[ExtensionArray, + # ndarray[Any, Any]]" has no attribute "_ndarray" [union-attr] + # i.e. NDArrayBackedExtensionArray + # Unpack to ndarray for MPL compat + return result._ndarray # type: ignore[union-attr] + return result + + # NB: Using _constructor._simple_new would break if MultiIndex + # didn't override __getitem__ + return self._constructor._simple_new(result, name=self._name) + + def _getitem_slice(self: _IndexT, slobj: slice) -> _IndexT: + """ + Fastpath for __getitem__ when we know we have a slice. + """ + res = self._data[slobj] + return type(self)._simple_new(res, name=self._name) + + @final + def _can_hold_identifiers_and_holds_name(self, name) -> bool: + """ + Faster check for ``name in self`` when we know `name` is a Python + identifier (e.g. in NDFrame.__getattr__, which hits this to support + . key lookup). For indexes that can't hold identifiers (everything + but object & categorical) we just return False. + + https://github.com/pandas-dev/pandas/issues/19764 + """ + if self.is_object() or self.is_categorical(): + return name in self + return False + + def append(self, other: Index | Sequence[Index]) -> Index: + """ + Append a collection of Index options together. + + Parameters + ---------- + other : Index or list/tuple of indices + + Returns + ------- + Index + """ + to_concat = [self] + + if isinstance(other, (list, tuple)): + to_concat += list(other) + else: + # error: Argument 1 to "append" of "list" has incompatible type + # "Union[Index, Sequence[Index]]"; expected "Index" + to_concat.append(other) # type: ignore[arg-type] + + for obj in to_concat: + if not isinstance(obj, Index): + raise TypeError("all inputs must be Index") + + names = {obj.name for obj in to_concat} + name = None if len(names) > 1 else self.name + + return self._concat(to_concat, name) + + def _concat(self, to_concat: list[Index], name: Hashable) -> Index: + """ + Concatenate multiple Index objects. + """ + to_concat_vals = [x._values for x in to_concat] + + result = concat_compat(to_concat_vals) + + is_numeric = result.dtype.kind in ["i", "u", "f"] + if self._is_backward_compat_public_numeric_index and is_numeric: + return type(self)._simple_new(result, name=name) + + return Index._with_infer(result, name=name) + + @final + def putmask(self, mask, value) -> Index: + """ + Return a new Index of the values set with the mask. + + Returns + ------- + Index + + See Also + -------- + numpy.ndarray.putmask : Changes elements of an array + based on conditional and input values. + """ + mask, noop = validate_putmask(self._values, mask) + if noop: + return self.copy() + + if value is None and (self._is_numeric_dtype or self.dtype == object): + value = self._na_value + try: + converted = self._validate_fill_value(value) + except (ValueError, TypeError) as err: + if is_object_dtype(self): # pragma: no cover + raise err + + dtype = self._find_common_type_compat(value) + return self.astype(dtype).putmask(mask, value) + + values = self._values.copy() + + if isinstance(values, np.ndarray): + converted = setitem_datetimelike_compat(values, mask.sum(), converted) + np.putmask(values, mask, converted) + + else: + # Note: we use the original value here, not converted, as + # _validate_fill_value is not idempotent + values._putmask(mask, value) + + return self._shallow_copy(values) + + def equals(self, other: Any) -> bool: + """ + Determine if two Index object are equal. + + The things that are being compared are: + + * The elements inside the Index object. + * The order of the elements inside the Index object. + + Parameters + ---------- + other : Any + The other object to compare against. + + Returns + ------- + bool + True if "other" is an Index and it has the same elements and order + as the calling index; False otherwise. + + Examples + -------- + >>> idx1 = pd.Index([1, 2, 3]) + >>> idx1 + Int64Index([1, 2, 3], dtype='int64') + >>> idx1.equals(pd.Index([1, 2, 3])) + True + + The elements inside are compared + + >>> idx2 = pd.Index(["1", "2", "3"]) + >>> idx2 + Index(['1', '2', '3'], dtype='object') + + >>> idx1.equals(idx2) + False + + The order is compared + + >>> ascending_idx = pd.Index([1, 2, 3]) + >>> ascending_idx + Int64Index([1, 2, 3], dtype='int64') + >>> descending_idx = pd.Index([3, 2, 1]) + >>> descending_idx + Int64Index([3, 2, 1], dtype='int64') + >>> ascending_idx.equals(descending_idx) + False + + The dtype is *not* compared + + >>> int64_idx = pd.Int64Index([1, 2, 3]) + >>> int64_idx + Int64Index([1, 2, 3], dtype='int64') + >>> uint64_idx = pd.UInt64Index([1, 2, 3]) + >>> uint64_idx + UInt64Index([1, 2, 3], dtype='uint64') + >>> int64_idx.equals(uint64_idx) + True + """ + if self.is_(other): + return True + + if not isinstance(other, Index): + return False + + if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype): + # if other is not object, use other's logic for coercion + return other.equals(self) + + if isinstance(other, ABCMultiIndex): + # d-level MultiIndex can equal d-tuple Index + return other.equals(self) + + if isinstance(self._values, ExtensionArray): + # Dispatch to the ExtensionArray's .equals method. + if not isinstance(other, type(self)): + return False + + earr = cast(ExtensionArray, self._data) + return earr.equals(other._data) + + if is_extension_array_dtype(other.dtype): + # All EA-backed Index subclasses override equals + return other.equals(self) + + return array_equivalent(self._values, other._values) + + @final + def identical(self, other) -> bool: + """ + Similar to equals, but checks that object attributes and types are also equal. + + Returns + ------- + bool + If two Index objects have equal elements and same type True, + otherwise False. + """ + return ( + self.equals(other) + and all( + getattr(self, c, None) == getattr(other, c, None) + for c in self._comparables + ) + and type(self) == type(other) + ) + + @final + def asof(self, label): + """ + Return the label from the index, or, if not present, the previous one. + + Assuming that the index is sorted, return the passed index label if it + is in the index, or return the previous index label if the passed one + is not in the index. + + Parameters + ---------- + label : object + The label up to which the method returns the latest index label. + + Returns + ------- + object + The passed label if it is in the index. The previous label if the + passed label is not in the sorted index or `NaN` if there is no + such label. + + See Also + -------- + Series.asof : Return the latest value in a Series up to the + passed index. + merge_asof : Perform an asof merge (similar to left join but it + matches on nearest key rather than equal key). + Index.get_loc : An `asof` is a thin wrapper around `get_loc` + with method='pad'. + + Examples + -------- + `Index.asof` returns the latest index label up to the passed label. + + >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03']) + >>> idx.asof('2014-01-01') + '2013-12-31' + + If the label is in the index, the method returns the passed label. + + >>> idx.asof('2014-01-02') + '2014-01-02' + + If all of the labels in the index are later than the passed label, + NaN is returned. + + >>> idx.asof('1999-01-02') + nan + + If the index is not sorted, an error is raised. + + >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02', + ... '2014-01-03']) + >>> idx_not_sorted.asof('2013-12-31') + Traceback (most recent call last): + ValueError: index must be monotonic increasing or decreasing + """ + self._searchsorted_monotonic(label) # validate sortedness + try: + loc = self.get_loc(label) + except (KeyError, TypeError): + # KeyError -> No exact match, try for padded + # TypeError -> passed e.g. non-hashable, fall through to get + # the tested exception message + indexer = self.get_indexer([label], method="pad") + if indexer.ndim > 1 or indexer.size > 1: + raise TypeError("asof requires scalar valued input") + loc = indexer.item() + if loc == -1: + return self._na_value + else: + if isinstance(loc, slice): + loc = loc.indices(len(self))[-1] + + return self[loc] + + def asof_locs(self, where: Index, mask: np.ndarray) -> npt.NDArray[np.intp]: + """ + Return the locations (indices) of labels in the index. + + As in the `asof` function, if the label (a particular entry in + `where`) is not in the index, the latest index label up to the + passed label is chosen and its index returned. + + If all of the labels in the index are later than a label in `where`, + -1 is returned. + + `mask` is used to ignore NA values in the index during calculation. + + Parameters + ---------- + where : Index + An Index consisting of an array of timestamps. + mask : np.ndarray[bool] + Array of booleans denoting where values in the original + data are not NA. + + Returns + ------- + np.ndarray[np.intp] + An array of locations (indices) of the labels from the Index + which correspond to the return values of the `asof` function + for every element in `where`. + """ + # error: No overload variant of "searchsorted" of "ndarray" matches argument + # types "Union[ExtensionArray, ndarray[Any, Any]]", "str" + # TODO: will be fixed when ExtensionArray.searchsorted() is fixed + locs = self._values[mask].searchsorted( + where._values, side="right" # type: ignore[call-overload] + ) + locs = np.where(locs > 0, locs - 1, 0) + + result = np.arange(len(self), dtype=np.intp)[mask].take(locs) + + first_value = self._values[mask.argmax()] + result[(locs == 0) & (where._values < first_value)] = -1 + + return result + + def sort_values( + self, + return_indexer: bool = False, + ascending: bool = True, + na_position: str_t = "last", + key: Callable | None = None, + ): + """ + Return a sorted copy of the index. + + Return a sorted copy of the index, and optionally return the indices + that sorted the index itself. + + Parameters + ---------- + return_indexer : bool, default False + Should the indices that would sort the index be returned. + ascending : bool, default True + Should the index values be sorted in an ascending order. + na_position : {'first' or 'last'}, default 'last' + Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at + the end. + + .. versionadded:: 1.2.0 + + key : callable, optional + If not None, apply the key function to the index values + before sorting. This is similar to the `key` argument in the + builtin :meth:`sorted` function, with the notable difference that + this `key` function should be *vectorized*. It should expect an + ``Index`` and return an ``Index`` of the same shape. + + .. versionadded:: 1.1.0 + + Returns + ------- + sorted_index : pandas.Index + Sorted copy of the index. + indexer : numpy.ndarray, optional + The indices that the index itself was sorted by. + + See Also + -------- + Series.sort_values : Sort values of a Series. + DataFrame.sort_values : Sort values in a DataFrame. + + Examples + -------- + >>> idx = pd.Index([10, 100, 1, 1000]) + >>> idx + Int64Index([10, 100, 1, 1000], dtype='int64') + + Sort values in ascending order (default behavior). + + >>> idx.sort_values() + Int64Index([1, 10, 100, 1000], dtype='int64') + + Sort values in descending order, and also get the indices `idx` was + sorted by. + + >>> idx.sort_values(ascending=False, return_indexer=True) + (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2])) + """ + idx = ensure_key_mapped(self, key) + + # GH 35584. Sort missing values according to na_position kwarg + # ignore na_position for MultiIndex + if not isinstance(self, ABCMultiIndex): + _as = nargsort( + items=idx, ascending=ascending, na_position=na_position, key=key + ) + else: + _as = idx.argsort() + if not ascending: + _as = _as[::-1] + + sorted_index = self.take(_as) + + if return_indexer: + return sorted_index, _as + else: + return sorted_index + + @final + def sort(self, *args, **kwargs): + """ + Use sort_values instead. + """ + raise TypeError("cannot sort an Index object in-place, use sort_values instead") + + def shift(self, periods=1, freq=None): + """ + Shift index by desired number of time frequency increments. + + This method is for shifting the values of datetime-like indexes + by a specified time increment a given number of times. + + Parameters + ---------- + periods : int, default 1 + Number of periods (or increments) to shift by, + can be positive or negative. + freq : pandas.DateOffset, pandas.Timedelta or str, optional + Frequency increment to shift by. + If None, the index is shifted by its own `freq` attribute. + Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. + + Returns + ------- + pandas.Index + Shifted index. + + See Also + -------- + Series.shift : Shift values of Series. + + Notes + ----- + This method is only implemented for datetime-like index classes, + i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex. + + Examples + -------- + Put the first 5 month starts of 2011 into an index. + + >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS') + >>> month_starts + DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01', + '2011-05-01'], + dtype='datetime64[ns]', freq='MS') + + Shift the index by 10 days. + + >>> month_starts.shift(10, freq='D') + DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11', + '2011-05-11'], + dtype='datetime64[ns]', freq=None) + + The default value of `freq` is the `freq` attribute of the index, + which is 'MS' (month start) in this example. + + >>> month_starts.shift(10) + DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01', + '2012-03-01'], + dtype='datetime64[ns]', freq='MS') + """ + raise NotImplementedError( + f"This method is only implemented for DatetimeIndex, PeriodIndex and " + f"TimedeltaIndex; Got type {type(self).__name__}" + ) + + def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: + """ + Return the integer indices that would sort the index. + + Parameters + ---------- + *args + Passed to `numpy.ndarray.argsort`. + **kwargs + Passed to `numpy.ndarray.argsort`. + + Returns + ------- + np.ndarray[np.intp] + Integer indices that would sort the index if used as + an indexer. + + See Also + -------- + numpy.argsort : Similar method for NumPy arrays. + Index.sort_values : Return sorted copy of Index. + + Examples + -------- + >>> idx = pd.Index(['b', 'a', 'd', 'c']) + >>> idx + Index(['b', 'a', 'd', 'c'], dtype='object') + + >>> order = idx.argsort() + >>> order + array([1, 0, 3, 2]) + + >>> idx[order] + Index(['a', 'b', 'c', 'd'], dtype='object') + """ + # This works for either ndarray or EA, is overridden + # by RangeIndex, MultIIndex + return self._data.argsort(*args, **kwargs) + + @final + def get_value(self, series: Series, key): + """ + Fast lookup of value from 1-dimensional ndarray. + + Only use this if you know what you're doing. + + Returns + ------- + scalar or Series + """ + warnings.warn( + "get_value is deprecated and will be removed in a future version. " + "Use Series[key] instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + self._check_indexing_error(key) + + try: + # GH 20882, 21257 + # First try to convert the key to a location + # If that fails, raise a KeyError if an integer + # index, otherwise, see if key is an integer, and + # try that + loc = self.get_loc(key) + except KeyError: + if not self._should_fallback_to_positional: + raise + elif is_integer(key): + # If the Index cannot hold integer, then this is unambiguously + # a locational lookup. + loc = key + else: + raise + + return self._get_values_for_loc(series, loc, key) + + def _check_indexing_error(self, key): + if not is_scalar(key): + # if key is not a scalar, directly raise an error (the code below + # would convert to numpy arrays and raise later any way) - GH29926 + raise InvalidIndexError(key) + + @cache_readonly + def _should_fallback_to_positional(self) -> bool: + """ + Should an integer key be treated as positional? + """ + return not self.holds_integer() and not self.is_boolean() + + def _get_values_for_loc(self, series: Series, loc, key): + """ + Do a positional lookup on the given Series, returning either a scalar + or a Series. + + Assumes that `series.index is self` + + key is included for MultiIndex compat. + """ + if is_integer(loc): + return series._values[loc] + + return series.iloc[loc] + + @final + def set_value(self, arr, key, value): + """ + Fast lookup of value from 1-dimensional ndarray. + + .. deprecated:: 1.0 + + Notes + ----- + Only use this if you know what you're doing. + """ + warnings.warn( + ( + "The 'set_value' method is deprecated, and " + "will be removed in a future version." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + loc = self._engine.get_loc(key) + if not can_hold_element(arr, value): + raise ValueError + arr[loc] = value + + _index_shared_docs[ + "get_indexer_non_unique" + ] = """ + Compute indexer and mask for new index given the current index. The + indexer should be then used as an input to ndarray.take to align the + current data to the new index. + + Parameters + ---------- + target : %(target_klass)s + + Returns + ------- + indexer : np.ndarray[np.intp] + Integers from 0 to n - 1 indicating that the index at these + positions matches the corresponding target values. Missing values + in the target are marked by -1. + missing : np.ndarray[np.intp] + An indexer into the target of the values not found. + These correspond to the -1 in the indexer array. + """ + + @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) + def get_indexer_non_unique( + self, target + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + target = ensure_index(target) + target = self._maybe_cast_listlike_indexer(target) + + if not self._should_compare(target) and not is_interval_dtype(self.dtype): + # IntervalIndex get special treatment bc numeric scalars can be + # matched to Interval scalars + return self._get_indexer_non_comparable(target, method=None, unique=False) + + pself, ptarget = self._maybe_promote(target) + if pself is not self or ptarget is not target: + return pself.get_indexer_non_unique(ptarget) + + if not is_dtype_equal(self.dtype, target.dtype): + # TODO: if object, could use infer_dtype to preempt costly + # conversion if still non-comparable? + dtype = self._find_common_type_compat(target) + + this = self.astype(dtype, copy=False) + that = target.astype(dtype, copy=False) + return this.get_indexer_non_unique(that) + + # Note: _maybe_promote ensures we never get here with MultiIndex + # self and non-Multi target + tgt_values = target._get_engine_target() + if self._is_multi and target._is_multi: + engine = self._engine + # error: "IndexEngine" has no attribute "_extract_level_codes" + tgt_values = engine._extract_level_codes( # type: ignore[attr-defined] + target + ) + + indexer, missing = self._engine.get_indexer_non_unique(tgt_values) + return ensure_platform_int(indexer), ensure_platform_int(missing) + + @final + def get_indexer_for(self, target) -> npt.NDArray[np.intp]: + """ + Guaranteed return of an indexer even when non-unique. + + This dispatches to get_indexer or get_indexer_non_unique + as appropriate. + + Returns + ------- + np.ndarray[np.intp] + List of indices. + + Examples + -------- + >>> idx = pd.Index([np.nan, 'var1', np.nan]) + >>> idx.get_indexer_for([np.nan]) + array([0, 2]) + """ + if self._index_as_unique: + return self.get_indexer(target) + indexer, _ = self.get_indexer_non_unique(target) + return indexer + + def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]: + """ + Analogue to get_indexer that raises if any elements are missing. + """ + keyarr = key + if not isinstance(keyarr, Index): + keyarr = com.asarray_tuplesafe(keyarr) + + if self._index_as_unique: + indexer = self.get_indexer_for(keyarr) + keyarr = self.reindex(keyarr)[0] + else: + keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr) + + self._raise_if_missing(keyarr, indexer, axis_name) + + keyarr = self.take(indexer) + if isinstance(key, Index): + # GH 42790 - Preserve name from an Index + keyarr.name = key.name + if keyarr.dtype.kind in ["m", "M"]: + # DTI/TDI.take can infer a freq in some cases when we dont want one + if isinstance(key, list) or ( + isinstance(key, type(self)) + # "Index" has no attribute "freq" + and key.freq is None # type: ignore[attr-defined] + ): + keyarr = keyarr._with_freq(None) + + return keyarr, indexer + + def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None: + """ + Check that indexer can be used to return a result. + + e.g. at least one element was found, + unless the list of keys was actually empty. + + Parameters + ---------- + key : list-like + Targeted labels (only used to show correct error message). + indexer: array-like of booleans + Indices corresponding to the key, + (with -1 indicating not found). + axis_name : str + + Raises + ------ + KeyError + If at least one key was requested but none was found. + """ + if len(key) == 0: + return + + # Count missing values + missing_mask = indexer < 0 + nmissing = missing_mask.sum() + + if nmissing: + + # TODO: remove special-case; this is just to keep exception + # message tests from raising while debugging + use_interval_msg = is_interval_dtype(self.dtype) or ( + is_categorical_dtype(self.dtype) + # "Index" has no attribute "categories" [attr-defined] + and is_interval_dtype( + self.categories.dtype # type: ignore[attr-defined] + ) + ) + + if nmissing == len(indexer): + if use_interval_msg: + key = list(key) + raise KeyError(f"None of [{key}] are in the [{axis_name}]") + + not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique()) + raise KeyError(f"{not_found} not in index") + + @overload + def _get_indexer_non_comparable( + self, target: Index, method, unique: Literal[True] = ... + ) -> npt.NDArray[np.intp]: + ... + + @overload + def _get_indexer_non_comparable( + self, target: Index, method, unique: Literal[False] + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + ... + + @overload + def _get_indexer_non_comparable( + self, target: Index, method, unique: bool = True + ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + ... + + @final + def _get_indexer_non_comparable( + self, target: Index, method, unique: bool = True + ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + """ + Called from get_indexer or get_indexer_non_unique when the target + is of a non-comparable dtype. + + For get_indexer lookups with method=None, get_indexer is an _equality_ + check, so non-comparable dtypes mean we will always have no matches. + + For get_indexer lookups with a method, get_indexer is an _inequality_ + check, so non-comparable dtypes mean we will always raise TypeError. + + Parameters + ---------- + target : Index + method : str or None + unique : bool, default True + * True if called from get_indexer. + * False if called from get_indexer_non_unique. + + Raises + ------ + TypeError + If doing an inequality check, i.e. method is not None. + """ + if method is not None: + other = unpack_nested_dtype(target) + raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}") + + no_matches = -1 * np.ones(target.shape, dtype=np.intp) + if unique: + # This is for get_indexer + return no_matches + else: + # This is for get_indexer_non_unique + missing = np.arange(len(target), dtype=np.intp) + return no_matches, missing + + @property + def _index_as_unique(self) -> bool: + """ + Whether we should treat this as unique for the sake of + get_indexer vs get_indexer_non_unique. + + For IntervalIndex compat. + """ + return self.is_unique + + _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects" + + @final + def _maybe_promote(self, other: Index) -> tuple[Index, Index]: + """ + When dealing with an object-dtype Index and a non-object Index, see + if we can upcast the object-dtype one to improve performance. + """ + + if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex): + if ( + self.tz is not None + and other.tz is not None + and not tz_compare(self.tz, other.tz) + ): + # standardize on UTC + return self.tz_convert("UTC"), other.tz_convert("UTC") + + elif self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex): + try: + return type(other)(self), other + except OutOfBoundsDatetime: + return self, other + elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex): + # TODO: we dont have tests that get here + return type(other)(self), other + + elif self.dtype.kind == "u" and other.dtype.kind == "i": + # GH#41873 + if other.min() >= 0: + # lookup min as it may be cached + # TODO: may need itemsize check if we have non-64-bit Indexes + return self, other.astype(self.dtype) + + elif self._is_multi and not other._is_multi: + try: + # "Type[Index]" has no attribute "from_tuples" + other = type(self).from_tuples(other) # type: ignore[attr-defined] + except (TypeError, ValueError): + # let's instead try with a straight Index + self = Index(self._values) + + if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype): + # Reverse op so we dont need to re-implement on the subclasses + other, self = other._maybe_promote(self) + + return self, other + + @final + def _find_common_type_compat(self, target) -> DtypeObj: + """ + Implementation of find_common_type that adjusts for Index-specific + special cases. + """ + if is_interval_dtype(self.dtype) and is_valid_na_for_dtype(target, self.dtype): + # e.g. setting NA value into IntervalArray[int64] + self = cast("IntervalIndex", self) + return IntervalDtype(np.float64, closed=self.closed) + + target_dtype, _ = infer_dtype_from(target, pandas_dtype=True) + + # special case: if one dtype is uint64 and the other a signed int, return object + # See https://github.com/pandas-dev/pandas/issues/26778 for discussion + # Now it's: + # * float | [u]int -> float + # * uint64 | signed int -> object + # We may change union(float | [u]int) to go to object. + if self.dtype == "uint64" or target_dtype == "uint64": + if is_signed_integer_dtype(self.dtype) or is_signed_integer_dtype( + target_dtype + ): + return _dtype_obj + + dtype = find_common_type([self.dtype, target_dtype]) + + if dtype.kind in ["i", "u"]: + # TODO: what about reversed with self being categorical? + if ( + isinstance(target, Index) + and is_categorical_dtype(target.dtype) + and target.hasnans + ): + # FIXME: find_common_type incorrect with Categorical GH#38240 + # FIXME: some cases where float64 cast can be lossy? + dtype = np.dtype(np.float64) + if dtype.kind == "c": + dtype = _dtype_obj + return dtype + + @final + def _should_compare(self, other: Index) -> bool: + """ + Check if `self == other` can ever have non-False entries. + """ + + if (other.is_boolean() and self.is_numeric()) or ( + self.is_boolean() and other.is_numeric() + ): + # GH#16877 Treat boolean labels passed to a numeric index as not + # found. Without this fix False and True would be treated as 0 and 1 + # respectively. + return False + + other = unpack_nested_dtype(other) + dtype = other.dtype + return self._is_comparable_dtype(dtype) or is_object_dtype(dtype) + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + """ + Can we compare values of the given dtype to our own? + """ + return True + + @final + def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]: + """ + Group the index labels by a given array of values. + + Parameters + ---------- + values : array + Values used to determine the groups. + + Returns + ------- + dict + {group name -> group labels} + """ + # TODO: if we are a MultiIndex, we can do better + # that converting to tuples + if isinstance(values, ABCMultiIndex): + values = values._values + values = Categorical(values) + result = values._reverse_indexer() + + # map to the label + result = {k: self.take(v) for k, v in result.items()} + + return PrettyDict(result) + + def map(self, mapper, na_action=None): + """ + Map values using an input mapping or function. + + Parameters + ---------- + mapper : function, dict, or Series + Mapping correspondence. + na_action : {None, 'ignore'} + If 'ignore', propagate NA values, without passing them to the + mapping correspondence. + + Returns + ------- + applied : Union[Index, MultiIndex], inferred + The output of the mapping function applied to the index. + If the function returns a tuple with more than one element + a MultiIndex will be returned. + """ + from pandas.core.indexes.multi import MultiIndex + + new_values = self._map_values(mapper, na_action=na_action) + + # we can return a MultiIndex + if new_values.size and isinstance(new_values[0], tuple): + if isinstance(self, MultiIndex): + names = self.names + elif self.name: + names = [self.name] * len(new_values[0]) + else: + names = None + return MultiIndex.from_tuples(new_values, names=names) + + dtype = None + if not new_values.size: + # empty + dtype = self.dtype + + # e.g. if we are floating and new_values is all ints, then we + # don't want to cast back to floating. But if we are UInt64 + # and new_values is all ints, we want to try. + same_dtype = lib.infer_dtype(new_values, skipna=False) == self.inferred_type + if same_dtype: + new_values = maybe_cast_pointwise_result( + new_values, self.dtype, same_dtype=same_dtype + ) + + if self._is_backward_compat_public_numeric_index and is_numeric_dtype( + new_values.dtype + ): + return self._constructor( + new_values, dtype=dtype, copy=False, name=self.name + ) + + return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name) + + # TODO: De-duplicate with map, xref GH#32349 + @final + def _transform_index(self, func, *, level=None) -> Index: + """ + Apply function to all values found in index. + + This includes transforming multiindex entries separately. + Only apply function to one level of the MultiIndex if level is specified. + """ + if isinstance(self, ABCMultiIndex): + if level is not None: + # Caller is responsible for ensuring level is positional. + items = [ + tuple(func(y) if i == level else y for i, y in enumerate(x)) + for x in self + ] + else: + items = [tuple(func(y) for y in x) for x in self] + return type(self).from_tuples(items, names=self.names) + else: + items = [func(x) for x in self] + return Index(items, name=self.name, tupleize_cols=False) + + def isin(self, values, level=None) -> np.ndarray: + """ + Return a boolean array where the index values are in `values`. + + Compute boolean array of whether each index value is found in the + passed set of values. The length of the returned boolean array matches + the length of the index. + + Parameters + ---------- + values : set or list-like + Sought values. + level : str or int, optional + Name or position of the index level to use (if the index is a + `MultiIndex`). + + Returns + ------- + np.ndarray[bool] + NumPy array of boolean values. + + See Also + -------- + Series.isin : Same for Series. + DataFrame.isin : Same method for DataFrames. + + Notes + ----- + In the case of `MultiIndex` you must either specify `values` as a + list-like object containing tuples that are the same length as the + number of levels, or specify `level`. Otherwise it will raise a + ``ValueError``. + + If `level` is specified: + + - if it is the name of one *and only one* index level, use that level; + - otherwise it should be a number indicating level position. + + Examples + -------- + >>> idx = pd.Index([1,2,3]) + >>> idx + Int64Index([1, 2, 3], dtype='int64') + + Check whether each index value in a list of values. + + >>> idx.isin([1, 4]) + array([ True, False, False]) + + >>> midx = pd.MultiIndex.from_arrays([[1,2,3], + ... ['red', 'blue', 'green']], + ... names=('number', 'color')) + >>> midx + MultiIndex([(1, 'red'), + (2, 'blue'), + (3, 'green')], + names=['number', 'color']) + + Check whether the strings in the 'color' level of the MultiIndex + are in a list of colors. + + >>> midx.isin(['red', 'orange', 'yellow'], level='color') + array([ True, False, False]) + + To check across the levels of a MultiIndex, pass a list of tuples: + + >>> midx.isin([(1, 'red'), (3, 'red')]) + array([ True, False, False]) + + For a DatetimeIndex, string values in `values` are converted to + Timestamps. + + >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13'] + >>> dti = pd.to_datetime(dates) + >>> dti + DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'], + dtype='datetime64[ns]', freq=None) + + >>> dti.isin(['2000-03-11']) + array([ True, False, False]) + """ + if level is not None: + self._validate_index_level(level) + return algos.isin(self._values, values) + + def _get_string_slice(self, key: str_t): + # this is for partial string indexing, + # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex + raise NotImplementedError + + def slice_indexer( + self, + start: Hashable | None = None, + end: Hashable | None = None, + step: int | None = None, + kind=no_default, + ) -> slice: + """ + Compute the slice indexer for input labels and step. + + Index needs to be ordered and unique. + + Parameters + ---------- + start : label, default None + If None, defaults to the beginning. + end : label, default None + If None, defaults to the end. + step : int, default None + kind : str, default None + + .. deprecated:: 1.4.0 + + Returns + ------- + indexer : slice + + Raises + ------ + KeyError : If key does not exist, or key is not unique and index is + not ordered. + + Notes + ----- + This function assumes that the data is sorted, so use at your own peril + + Examples + -------- + This is a method on all index types. For example you can do: + + >>> idx = pd.Index(list('abcd')) + >>> idx.slice_indexer(start='b', end='c') + slice(1, 3, None) + + >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')]) + >>> idx.slice_indexer(start='b', end=('c', 'g')) + slice(1, 3, None) + """ + self._deprecated_arg(kind, "kind", "slice_indexer") + + start_slice, end_slice = self.slice_locs(start, end, step=step) + + # return a slice + if not is_scalar(start_slice): + raise AssertionError("Start slice bound is non-scalar") + if not is_scalar(end_slice): + raise AssertionError("End slice bound is non-scalar") + + return slice(start_slice, end_slice, step) + + def _maybe_cast_indexer(self, key): + """ + If we have a float key and are not a floating index, then try to cast + to an int if equivalent. + """ + if not self.is_floating(): + return com.cast_scalar_indexer(key) + return key + + def _maybe_cast_listlike_indexer(self, target) -> Index: + """ + Analogue to maybe_cast_indexer for get_indexer instead of get_loc. + """ + return ensure_index(target) + + @final + def _validate_indexer(self, form: str_t, key, kind: str_t): + """ + If we are positional indexer, validate that we have appropriate + typed bounds must be an integer. + """ + assert kind in ["getitem", "iloc"] + + if key is not None and not is_integer(key): + raise self._invalid_indexer(form, key) + + def _maybe_cast_slice_bound(self, label, side: str_t, kind=no_default): + """ + This function should be overloaded in subclasses that allow non-trivial + casting on label-slice bounds, e.g. datetime-like indices allowing + strings containing formatted datetimes. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : {'loc', 'getitem'} or None + + .. deprecated:: 1.3.0 + + Returns + ------- + label : object + + Notes + ----- + Value of `side` parameter should be validated in caller. + """ + assert kind in ["loc", "getitem", None, no_default] + self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound") + + # We are a plain index here (sub-class override this method if they + # wish to have special treatment for floats/ints, e.g. Float64Index and + # datetimelike Indexes + # reject them, if index does not contain label + if (is_float(label) or is_integer(label)) and label not in self: + raise self._invalid_indexer("slice", label) + + return label + + def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"): + if self.is_monotonic_increasing: + return self.searchsorted(label, side=side) + elif self.is_monotonic_decreasing: + # np.searchsorted expects ascending sort order, have to reverse + # everything for it to work (element ordering, search side and + # resulting value). + pos = self[::-1].searchsorted( + label, side="right" if side == "left" else "left" + ) + return len(self) - pos + + raise ValueError("index must be monotonic increasing or decreasing") + + def get_slice_bound( + self, label, side: Literal["left", "right"], kind=no_default + ) -> int: + """ + Calculate slice bound that corresponds to given label. + + Returns leftmost (one-past-the-rightmost if ``side=='right'``) position + of given label. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : {'loc', 'getitem'} or None + + .. deprecated:: 1.4.0 + + Returns + ------- + int + Index of label. + """ + assert kind in ["loc", "getitem", None, no_default] + self._deprecated_arg(kind, "kind", "get_slice_bound") + + if side not in ("left", "right"): + raise ValueError( + "Invalid value for side kwarg, must be either " + f"'left' or 'right': {side}" + ) + + original_label = label + + # For datetime indices label may be a string that has to be converted + # to datetime boundary according to its resolution. + label = self._maybe_cast_slice_bound(label, side) + + # we need to look up the label + try: + slc = self.get_loc(label) + except KeyError as err: + try: + return self._searchsorted_monotonic(label, side) + except ValueError: + # raise the original KeyError + raise err + + if isinstance(slc, np.ndarray): + # get_loc may return a boolean array, which + # is OK as long as they are representable by a slice. + assert is_bool_dtype(slc.dtype) + slc = lib.maybe_booleans_to_slice(slc.view("u1")) + if isinstance(slc, np.ndarray): + raise KeyError( + f"Cannot get {side} slice bound for non-unique " + f"label: {repr(original_label)}" + ) + + if isinstance(slc, slice): + if side == "left": + return slc.start + else: + return slc.stop + else: + if side == "right": + return slc + 1 + else: + return slc + + def slice_locs( + self, start=None, end=None, step=None, kind=no_default + ) -> tuple[int, int]: + """ + Compute slice locations for input labels. + + Parameters + ---------- + start : label, default None + If None, defaults to the beginning. + end : label, default None + If None, defaults to the end. + step : int, defaults None + If None, defaults to 1. + kind : {'loc', 'getitem'} or None + + .. deprecated:: 1.4.0 + + Returns + ------- + start, end : int + + See Also + -------- + Index.get_loc : Get location for a single label. + + Notes + ----- + This method only works if the index is monotonic or unique. + + Examples + -------- + >>> idx = pd.Index(list('abcd')) + >>> idx.slice_locs(start='b', end='c') + (1, 3) + """ + self._deprecated_arg(kind, "kind", "slice_locs") + inc = step is None or step >= 0 + + if not inc: + # If it's a reverse slice, temporarily swap bounds. + start, end = end, start + + # GH 16785: If start and end happen to be date strings with UTC offsets + # attempt to parse and check that the offsets are the same + if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)): + try: + ts_start = Timestamp(start) + ts_end = Timestamp(end) + except (ValueError, TypeError): + pass + else: + if not tz_compare(ts_start.tzinfo, ts_end.tzinfo): + raise ValueError("Both dates must have the same UTC offset") + + start_slice = None + if start is not None: + start_slice = self.get_slice_bound(start, "left") + if start_slice is None: + start_slice = 0 + + end_slice = None + if end is not None: + end_slice = self.get_slice_bound(end, "right") + if end_slice is None: + end_slice = len(self) + + if not inc: + # Bounds at this moment are swapped, swap them back and shift by 1. + # + # slice_locs('B', 'A', step=-1): s='B', e='A' + # + # s='A' e='B' + # AFTER SWAP: | | + # v ------------------> V + # ----------------------------------- + # | | |A|A|A|A| | | | | |B|B| | | | | + # ----------------------------------- + # ^ <------------------ ^ + # SHOULD BE: | | + # end=s-1 start=e-1 + # + end_slice, start_slice = start_slice - 1, end_slice - 1 + + # i == -1 triggers ``len(self) + i`` selection that points to the + # last element, not before-the-first one, subtracting len(self) + # compensates that. + if end_slice == -1: + end_slice -= len(self) + if start_slice == -1: + start_slice -= len(self) + + return start_slice, end_slice + + def delete(self: _IndexT, loc) -> _IndexT: + """ + Make new Index with passed location(-s) deleted. + + Parameters + ---------- + loc : int or list of int + Location of item(-s) which will be deleted. + Use a list of locations to delete more than one value at the same time. + + Returns + ------- + Index + Will be same type as self, except for RangeIndex. + + See Also + -------- + numpy.delete : Delete any rows and column from NumPy array (ndarray). + + Examples + -------- + >>> idx = pd.Index(['a', 'b', 'c']) + >>> idx.delete(1) + Index(['a', 'c'], dtype='object') + + >>> idx = pd.Index(['a', 'b', 'c']) + >>> idx.delete([0, 2]) + Index(['b'], dtype='object') + """ + values = self._values + res_values: ArrayLike + if isinstance(values, np.ndarray): + # TODO(__array_function__): special casing will be unnecessary + res_values = np.delete(values, loc) + else: + res_values = values.delete(loc) + + # _constructor so RangeIndex->Int64Index + return self._constructor._simple_new(res_values, name=self.name) + + def insert(self, loc: int, item) -> Index: + """ + Make new Index inserting new item at location. + + Follows Python numpy.insert semantics for negative values. + + Parameters + ---------- + loc : int + item : object + + Returns + ------- + new_index : Index + """ + item = lib.item_from_zerodim(item) + if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object: + item = self._na_value + + arr = self._values + + try: + if isinstance(arr, ExtensionArray): + res_values = arr.insert(loc, item) + return type(self)._simple_new(res_values, name=self.name) + else: + item = self._validate_fill_value(item) + except (TypeError, ValueError): + # e.g. trying to insert an integer into a DatetimeIndex + # We cannot keep the same dtype, so cast to the (often object) + # minimal shared dtype before doing the insert. + dtype = self._find_common_type_compat(item) + return self.astype(dtype).insert(loc, item) + + if arr.dtype != object or not isinstance( + item, (tuple, np.datetime64, np.timedelta64) + ): + # with object-dtype we need to worry about numpy incorrectly casting + # dt64/td64 to integer, also about treating tuples as sequences + # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550 + casted = arr.dtype.type(item) + new_values = np.insert(arr, loc, casted) + + else: + # No overload variant of "insert" matches argument types + # "ndarray[Any, Any]", "int", "None" [call-overload] + new_values = np.insert(arr, loc, None) # type: ignore[call-overload] + loc = loc if loc >= 0 else loc - 1 + new_values[loc] = item + + # Use self._constructor instead of Index to retain NumericIndex GH#43921 + # TODO(2.0) can use Index instead of self._constructor + return self._constructor._with_infer(new_values, name=self.name) + + def drop(self, labels, errors: str_t = "raise") -> Index: + """ + Make new Index with passed list of labels deleted. + + Parameters + ---------- + labels : array-like or scalar + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and existing labels are dropped. + + Returns + ------- + dropped : Index + Will be same type as self, except for RangeIndex. + + Raises + ------ + KeyError + If not all of the labels are found in the selected axis + """ + if not isinstance(labels, Index): + # avoid materializing e.g. RangeIndex + arr_dtype = "object" if self.dtype == "object" else None + labels = com.index_labels_to_array(labels, dtype=arr_dtype) + + indexer = self.get_indexer_for(labels) + mask = indexer == -1 + if mask.any(): + if errors != "ignore": + raise KeyError(f"{list(labels[mask])} not found in axis") + indexer = indexer[~mask] + return self.delete(indexer) + + # -------------------------------------------------------------------- + # Generated Arithmetic, Comparison, and Unary Methods + + def _cmp_method(self, other, op): + """ + Wrapper used to dispatch comparison operations. + """ + if self.is_(other): + # fastpath + if op in {operator.eq, operator.le, operator.ge}: + arr = np.ones(len(self), dtype=bool) + if self._can_hold_na and not isinstance(self, ABCMultiIndex): + # TODO: should set MultiIndex._can_hold_na = False? + arr[self.isna()] = False + return arr + elif op in {operator.ne, operator.lt, operator.gt}: + arr = np.zeros(len(self), dtype=bool) + if self._can_hold_na and not isinstance(self, ABCMultiIndex): + arr[self.isna()] = True + return arr + + if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len( + self + ) != len(other): + raise ValueError("Lengths must match to compare") + + if not isinstance(other, ABCMultiIndex): + other = extract_array(other, extract_numpy=True) + else: + other = np.asarray(other) + + if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray): + # e.g. PeriodArray, Categorical + with np.errstate(all="ignore"): + result = op(self._values, other) + + elif isinstance(self._values, ExtensionArray): + result = op(self._values, other) + + elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex): + # don't pass MultiIndex + with np.errstate(all="ignore"): + result = ops.comp_method_OBJECT_ARRAY(op, self._values, other) + + else: + with np.errstate(all="ignore"): + result = ops.comparison_op(self._values, other, op) + + return result + + def _construct_result(self, result, name): + if isinstance(result, tuple): + return ( + Index._with_infer(result[0], name=name), + Index._with_infer(result[1], name=name), + ) + return Index._with_infer(result, name=name) + + def _arith_method(self, other, op): + if ( + isinstance(other, Index) + and is_object_dtype(other.dtype) + and type(other) is not Index + ): + # We return NotImplemented for object-dtype index *subclasses* so they have + # a chance to implement ops before we unwrap them. + # See https://github.com/pandas-dev/pandas/issues/31109 + return NotImplemented + + return super()._arith_method(other, op) + + @final + def _unary_method(self, op): + result = op(self._values) + return Index(result, name=self.name) + + def __abs__(self): + return self._unary_method(operator.abs) + + def __neg__(self): + return self._unary_method(operator.neg) + + def __pos__(self): + return self._unary_method(operator.pos) + + def __invert__(self): + # GH#8875 + return self._unary_method(operator.inv) + + # -------------------------------------------------------------------- + # Reductions + + def any(self, *args, **kwargs): + """ + Return whether any element is Truthy. + + Parameters + ---------- + *args + Required for compatibility with numpy. + **kwargs + Required for compatibility with numpy. + + Returns + ------- + any : bool or array-like (if axis is specified) + A single element array-like may be converted to bool. + + See Also + -------- + Index.all : Return whether all elements are True. + Series.all : Return whether all elements are True. + + Notes + ----- + Not a Number (NaN), positive infinity and negative infinity + evaluate to True because these are not equal to zero. + + Examples + -------- + >>> index = pd.Index([0, 1, 2]) + >>> index.any() + True + + >>> index = pd.Index([0, 0, 0]) + >>> index.any() + False + """ + nv.validate_any(args, kwargs) + self._maybe_disable_logical_methods("any") + # error: Argument 1 to "any" has incompatible type "ArrayLike"; expected + # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int, + # float, complex, str, bytes, generic]], Sequence[Sequence[Any]], + # _SupportsArray]" + return np.any(self.values) # type: ignore[arg-type] + + def all(self, *args, **kwargs): + """ + Return whether all elements are Truthy. + + Parameters + ---------- + *args + Required for compatibility with numpy. + **kwargs + Required for compatibility with numpy. + + Returns + ------- + all : bool or array-like (if axis is specified) + A single element array-like may be converted to bool. + + See Also + -------- + Index.any : Return whether any element in an Index is True. + Series.any : Return whether any element in a Series is True. + Series.all : Return whether all elements in a Series are True. + + Notes + ----- + Not a Number (NaN), positive infinity and negative infinity + evaluate to True because these are not equal to zero. + + Examples + -------- + True, because nonzero integers are considered True. + + >>> pd.Index([1, 2, 3]).all() + True + + False, because ``0`` is considered False. + + >>> pd.Index([0, 1, 2]).all() + False + """ + nv.validate_all(args, kwargs) + self._maybe_disable_logical_methods("all") + # error: Argument 1 to "all" has incompatible type "ArrayLike"; expected + # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int, + # float, complex, str, bytes, generic]], Sequence[Sequence[Any]], + # _SupportsArray]" + return np.all(self.values) # type: ignore[arg-type] + + @final + def _maybe_disable_logical_methods(self, opname: str_t) -> None: + """ + raise if this Index subclass does not support any or all. + """ + if ( + isinstance(self, ABCMultiIndex) + or needs_i8_conversion(self.dtype) + or is_interval_dtype(self.dtype) + or is_categorical_dtype(self.dtype) + or is_float_dtype(self.dtype) + ): + # This call will raise + make_invalid_op(opname)(self) + + @Appender(IndexOpsMixin.argmin.__doc__) + def argmin(self, axis=None, skipna=True, *args, **kwargs): + nv.validate_argmin(args, kwargs) + nv.validate_minmax_axis(axis) + + if not self._is_multi and self.hasnans: + # Take advantage of cache + mask = self._isnan + if not skipna or mask.all(): + return -1 + return super().argmin(skipna=skipna) + + @Appender(IndexOpsMixin.argmax.__doc__) + def argmax(self, axis=None, skipna=True, *args, **kwargs): + nv.validate_argmax(args, kwargs) + nv.validate_minmax_axis(axis) + + if not self._is_multi and self.hasnans: + # Take advantage of cache + mask = self._isnan + if not skipna or mask.all(): + return -1 + return super().argmax(skipna=skipna) + + @doc(IndexOpsMixin.min) + def min(self, axis=None, skipna=True, *args, **kwargs): + nv.validate_min(args, kwargs) + nv.validate_minmax_axis(axis) + + if not len(self): + return self._na_value + + if len(self) and self.is_monotonic_increasing: + # quick check + first = self[0] + if not isna(first): + return first + + if not self._is_multi and self.hasnans: + # Take advantage of cache + mask = self._isnan + if not skipna or mask.all(): + return self._na_value + + if not self._is_multi and not isinstance(self._values, np.ndarray): + # "ExtensionArray" has no attribute "min" + return self._values.min(skipna=skipna) # type: ignore[attr-defined] + + return super().min(skipna=skipna) + + @doc(IndexOpsMixin.max) + def max(self, axis=None, skipna=True, *args, **kwargs): + nv.validate_max(args, kwargs) + nv.validate_minmax_axis(axis) + + if not len(self): + return self._na_value + + if len(self) and self.is_monotonic_increasing: + # quick check + last = self[-1] + if not isna(last): + return last + + if not self._is_multi and self.hasnans: + # Take advantage of cache + mask = self._isnan + if not skipna or mask.all(): + return self._na_value + + if not self._is_multi and not isinstance(self._values, np.ndarray): + # "ExtensionArray" has no attribute "max" + return self._values.max(skipna=skipna) # type: ignore[attr-defined] + + return super().max(skipna=skipna) + + # -------------------------------------------------------------------- + + @final + @property + def shape(self) -> Shape: + """ + Return a tuple of the shape of the underlying data. + """ + # See GH#27775, GH#27384 for history/reasoning in how this is defined. + return (len(self),) + + @final + def _deprecated_arg(self, value, name: str_t, methodname: str_t) -> None: + """ + Issue a FutureWarning if the arg/kwarg is not no_default. + """ + if value is not no_default: + warnings.warn( + f"'{name}' argument in {methodname} is deprecated " + "and will be removed in a future version. Do not pass it.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + +def ensure_index_from_sequences(sequences, names=None) -> Index: + """ + Construct an index from sequences of data. + + A single sequence returns an Index. Many sequences returns a + MultiIndex. + + Parameters + ---------- + sequences : sequence of sequences + names : sequence of str + + Returns + ------- + index : Index or MultiIndex + + Examples + -------- + >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"]) + Int64Index([1, 2, 3], dtype='int64', name='name') + + >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"]) + MultiIndex([('a', 'a'), + ('a', 'b')], + names=['L1', 'L2']) + + See Also + -------- + ensure_index + """ + from pandas.core.indexes.multi import MultiIndex + + if len(sequences) == 1: + if names is not None: + names = names[0] + return Index(sequences[0], name=names) + else: + return MultiIndex.from_arrays(sequences, names=names) + + +def ensure_index(index_like: AnyArrayLike | Sequence, copy: bool = False) -> Index: + """ + Ensure that we have an index from some index-like object. + + Parameters + ---------- + index_like : sequence + An Index or other sequence + copy : bool, default False + + Returns + ------- + index : Index or MultiIndex + + See Also + -------- + ensure_index_from_sequences + + Examples + -------- + >>> ensure_index(['a', 'b']) + Index(['a', 'b'], dtype='object') + + >>> ensure_index([('a', 'a'), ('b', 'c')]) + Index([('a', 'a'), ('b', 'c')], dtype='object') + + >>> ensure_index([['a', 'a'], ['b', 'c']]) + MultiIndex([('a', 'b'), + ('a', 'c')], + ) + """ + if isinstance(index_like, Index): + if copy: + index_like = index_like.copy() + return index_like + + if isinstance(index_like, ABCSeries): + name = index_like.name + return Index._with_infer(index_like, name=name, copy=copy) + + if is_iterator(index_like): + index_like = list(index_like) + + if isinstance(index_like, list): + if type(index_like) is not list: + # must check for exactly list here because of strict type + # check in clean_index_list + index_like = list(index_like) + + if len(index_like) and lib.is_all_arraylike(index_like): + from pandas.core.indexes.multi import MultiIndex + + return MultiIndex.from_arrays(index_like) + else: + return Index._with_infer(index_like, copy=copy, tupleize_cols=False) + else: + return Index._with_infer(index_like, copy=copy) + + +def ensure_has_len(seq): + """ + If seq is an iterator, put its values into a list. + """ + try: + len(seq) + except TypeError: + return list(seq) + else: + return seq + + +def trim_front(strings: list[str]) -> list[str]: + """ + Trims zeros and decimal points. + + Examples + -------- + >>> trim_front([" a", " b"]) + ['a', 'b'] + + >>> trim_front([" a", " "]) + ['a', ''] + """ + if not strings: + return strings + while all(strings) and all(x[0] == " " for x in strings): + strings = [x[1:] for x in strings] + return strings + + +def _validate_join_method(method: str) -> None: + if method not in ["left", "right", "inner", "outer"]: + raise ValueError(f"do not recognize join method {method}") + + +def maybe_extract_name(name, obj, cls) -> Hashable: + """ + If no name is passed, then extract it from data, validating hashability. + """ + if name is None and isinstance(obj, (Index, ABCSeries)): + # Note we don't just check for "name" attribute since that would + # pick up e.g. dtype.name + name = obj.name + + # GH#29069 + if not is_hashable(name): + raise TypeError(f"{cls.__name__}.name must be a hashable type") + + return name + + +_cast_depr_msg = ( + "In a future version, passing an object-dtype arraylike to pd.Index will " + "not infer numeric values to numeric dtype (matching the Series behavior). " + "To retain the old behavior, explicitly pass the desired dtype or use the " + "desired Index subclass" +) + + +def _maybe_cast_data_without_dtype( + subarr: np.ndarray, cast_numeric_deprecated: bool = True +) -> ArrayLike: + """ + If we have an arraylike input but no passed dtype, try to infer + a supported dtype. + + Parameters + ---------- + subarr : np.ndarray[object] + cast_numeric_deprecated : bool, default True + Whether to issue a FutureWarning when inferring numeric dtypes. + + Returns + ------- + np.ndarray or ExtensionArray + """ + + result = lib.maybe_convert_objects( + subarr, + convert_datetime=True, + convert_timedelta=True, + convert_period=True, + convert_interval=True, + dtype_if_all_nat=np.dtype("datetime64[ns]"), + ) + if result.dtype.kind in ["i", "u", "f"]: + if not cast_numeric_deprecated: + # i.e. we started with a list, not an ndarray[object] + return result + + warnings.warn( + "In a future version, the Index constructor will not infer numeric " + "dtypes when passed object-dtype sequences (matching Series behavior)", + FutureWarning, + stacklevel=3, + ) + if result.dtype.kind in ["b", "c"]: + return subarr + result = ensure_wrapped_if_datetimelike(result) + return result + + +def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]: + """ + Return common name if all indices agree, otherwise None (level-by-level). + + Parameters + ---------- + indexes : list of Index objects + + Returns + ------- + list + A list representing the unanimous 'names' found. + """ + name_tups = [tuple(i.names) for i in indexes] + name_sets = [{*ns} for ns in zip_longest(*name_tups)] + names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets) + return names + + +def unpack_nested_dtype(other: _IndexT) -> _IndexT: + """ + When checking if our dtype is comparable with another, we need + to unpack CategoricalDtype to look at its categories.dtype. + + Parameters + ---------- + other : Index + + Returns + ------- + Index + """ + dtype = other.dtype + if is_categorical_dtype(dtype): + # If there is ever a SparseIndex, this could get dispatched + # here too. + # error: Item "dtype[Any]"/"ExtensionDtype" of "Union[dtype[Any], + # ExtensionDtype]" has no attribute "categories" + return dtype.categories # type: ignore[union-attr] + return other + + +def _maybe_try_sort(result, sort): + if sort is None: + try: + result = algos.safe_sort(result) + except TypeError as err: + warnings.warn( + f"{err}, sort order is undefined for incomparable objects.", + RuntimeWarning, + stacklevel=find_stack_level(), + ) + return result diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/category.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/category.py new file mode 100644 index 0000000..5be04c7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexes/category.py @@ -0,0 +1,589 @@ +from __future__ import annotations + +from typing import ( + Any, + Hashable, +) +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import index as libindex +from pandas._typing import ( + Dtype, + DtypeObj, + npt, +) +from pandas.util._decorators import ( + cache_readonly, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_scalar, + pandas_dtype, +) +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + notna, +) + +from pandas.core.arrays.categorical import ( + Categorical, + contains, +) +from pandas.core.construction import extract_array +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import ( + Index, + maybe_extract_name, +) +from pandas.core.indexes.extension import ( + NDArrayBackedExtensionIndex, + inherit_names, +) + +from pandas.io.formats.printing import pprint_thing + +_index_doc_kwargs: dict[str, str] = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update({"target_klass": "CategoricalIndex"}) + + +@inherit_names( + [ + "argsort", + "tolist", + "codes", + "categories", + "ordered", + "_reverse_indexer", + "searchsorted", + "is_dtype_equal", + "min", + "max", + ], + Categorical, +) +@inherit_names( + [ + "rename_categories", + "reorder_categories", + "add_categories", + "remove_categories", + "remove_unused_categories", + "set_categories", + "as_ordered", + "as_unordered", + ], + Categorical, + wrap=True, +) +class CategoricalIndex(NDArrayBackedExtensionIndex): + """ + Index based on an underlying :class:`Categorical`. + + CategoricalIndex, like Categorical, can only take on a limited, + and usually fixed, number of possible values (`categories`). Also, + like Categorical, it might have an order, but numerical operations + (additions, divisions, ...) are not possible. + + Parameters + ---------- + data : array-like (1-dimensional) + The values of the categorical. If `categories` are given, values not in + `categories` will be replaced with NaN. + categories : index-like, optional + The categories for the categorical. Items need to be unique. + If the categories are not given here (and also not in `dtype`), they + will be inferred from the `data`. + ordered : bool, optional + Whether or not this categorical is treated as an ordered + categorical. If not given here or in `dtype`, the resulting + categorical will be unordered. + dtype : CategoricalDtype or "category", optional + If :class:`CategoricalDtype`, cannot be used together with + `categories` or `ordered`. + copy : bool, default False + Make a copy of input ndarray. + name : object, optional + Name to be stored in the index. + + Attributes + ---------- + codes + categories + ordered + + Methods + ------- + rename_categories + reorder_categories + add_categories + remove_categories + remove_unused_categories + set_categories + as_ordered + as_unordered + map + + Raises + ------ + ValueError + If the categories do not validate. + TypeError + If an explicit ``ordered=True`` is given but no `categories` and the + `values` are not sortable. + + See Also + -------- + Index : The base pandas Index type. + Categorical : A categorical array. + CategoricalDtype : Type for categorical data. + + Notes + ----- + See the `user guide + `__ + for more. + + Examples + -------- + >>> pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]) + CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], + categories=['a', 'b', 'c'], ordered=False, dtype='category') + + ``CategoricalIndex`` can also be instantiated from a ``Categorical``: + + >>> c = pd.Categorical(["a", "b", "c", "a", "b", "c"]) + >>> pd.CategoricalIndex(c) + CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], + categories=['a', 'b', 'c'], ordered=False, dtype='category') + + Ordered ``CategoricalIndex`` can have a min and max value. + + >>> ci = pd.CategoricalIndex( + ... ["a", "b", "c", "a", "b", "c"], ordered=True, categories=["c", "b", "a"] + ... ) + >>> ci + CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], + categories=['c', 'b', 'a'], ordered=True, dtype='category') + >>> ci.min() + 'c' + """ + + _typ = "categoricalindex" + _data_cls = Categorical + + @property + def _can_hold_strings(self): + return self.categories._can_hold_strings + + @cache_readonly + def _should_fallback_to_positional(self) -> bool: + return self.categories._should_fallback_to_positional + + codes: np.ndarray + categories: Index + ordered: bool | None + _data: Categorical + _values: Categorical + + @property + def _engine_type(self): + # self.codes can have dtype int8, int16, int32 or int64, so we need + # to return the corresponding engine type (libindex.Int8Engine, etc.). + return { + np.int8: libindex.Int8Engine, + np.int16: libindex.Int16Engine, + np.int32: libindex.Int32Engine, + np.int64: libindex.Int64Engine, + }[self.codes.dtype.type] + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + data=None, + categories=None, + ordered=None, + dtype: Dtype | None = None, + copy: bool = False, + name: Hashable = None, + ) -> CategoricalIndex: + + name = maybe_extract_name(name, data, cls) + + if data is None: + # GH#38944 + warnings.warn( + "Constructing a CategoricalIndex without passing data is " + "deprecated and will raise in a future version. " + "Use CategoricalIndex([], ...) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + data = [] + + if is_scalar(data): + raise cls._scalar_data_error(data) + + data = Categorical( + data, categories=categories, ordered=ordered, dtype=dtype, copy=copy + ) + + return cls._simple_new(data, name=name) + + # -------------------------------------------------------------------- + + def _is_dtype_compat(self, other) -> Categorical: + """ + *this is an internal non-public method* + + provide a comparison between the dtype of self and other (coercing if + needed) + + Parameters + ---------- + other : Index + + Returns + ------- + Categorical + + Raises + ------ + TypeError if the dtypes are not compatible + """ + if is_categorical_dtype(other): + other = extract_array(other) + if not other._categories_match_up_to_permutation(self): + raise TypeError( + "categories must match existing categories when appending" + ) + + elif other._is_multi: + # preempt raising NotImplementedError in isna call + raise TypeError("MultiIndex is not dtype-compatible with CategoricalIndex") + else: + values = other + + cat = Categorical(other, dtype=self.dtype) + other = CategoricalIndex(cat) + if not other.isin(values).all(): + raise TypeError( + "cannot append a non-category item to a CategoricalIndex" + ) + other = other._values + + if not ((other == values) | (isna(other) & isna(values))).all(): + # GH#37667 see test_equals_non_category + raise TypeError( + "categories must match existing categories when appending" + ) + + return other + + @doc(Index.astype) + def astype(self, dtype: Dtype, copy: bool = True) -> Index: + from pandas.core.api import NumericIndex + + dtype = pandas_dtype(dtype) + + categories = self.categories + # the super method always returns Int64Index, UInt64Index and Float64Index + # but if the categories are a NumericIndex with dtype float32, we want to + # return an index with the same dtype as self.categories. + if categories._is_backward_compat_public_numeric_index: + assert isinstance(categories, NumericIndex) # mypy complaint fix + try: + categories._validate_dtype(dtype) + except ValueError: + pass + else: + new_values = self._data.astype(dtype, copy=copy) + # pass copy=False because any copying has been done in the + # _data.astype call above + return categories._constructor(new_values, name=self.name, copy=False) + + return super().astype(dtype, copy=copy) + + def equals(self, other: object) -> bool: + """ + Determine if two CategoricalIndex objects contain the same elements. + + Returns + ------- + bool + If two CategoricalIndex objects have equal elements True, + otherwise False. + """ + if self.is_(other): + return True + + if not isinstance(other, Index): + return False + + try: + other = self._is_dtype_compat(other) + except (TypeError, ValueError): + return False + + return self._data.equals(other) + + # -------------------------------------------------------------------- + # Rendering Methods + + @property + def _formatter_func(self): + return self.categories._formatter_func + + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value) + """ + max_categories = ( + 10 + if get_option("display.max_categories") == 0 + else get_option("display.max_categories") + ) + attrs: list[tuple[str, str | int | bool | None]] + attrs = [ + ( + "categories", + ibase.default_pprint(self.categories, max_seq_items=max_categories), + ), + ("ordered", self.ordered), + ] + extra = super()._format_attrs() + return attrs + extra + + def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: + result = [ + pprint_thing(x, escape_chars=("\t", "\r", "\n")) if notna(x) else na_rep + for x in self._values + ] + return header + result + + # -------------------------------------------------------------------- + + @property + def inferred_type(self) -> str: + return "categorical" + + @doc(Index.__contains__) + def __contains__(self, key: Any) -> bool: + # if key is a NaN, check if any NaN is in self. + if is_valid_na_for_dtype(key, self.categories.dtype): + return self.hasnans + + return contains(self, key, container=self._engine) + + # TODO(2.0): remove reindex once non-unique deprecation is enforced + def reindex( + self, target, method=None, level=None, limit=None, tolerance=None + ) -> tuple[Index, npt.NDArray[np.intp] | None]: + """ + Create index with target's values (move/add/delete values as necessary) + + Returns + ------- + new_index : pd.Index + Resulting index + indexer : np.ndarray[np.intp] or None + Indices of output values in original index + + """ + if method is not None: + raise NotImplementedError( + "argument method is not implemented for CategoricalIndex.reindex" + ) + if level is not None: + raise NotImplementedError( + "argument level is not implemented for CategoricalIndex.reindex" + ) + if limit is not None: + raise NotImplementedError( + "argument limit is not implemented for CategoricalIndex.reindex" + ) + + target = ibase.ensure_index(target) + + if self.equals(target): + indexer = None + missing = np.array([], dtype=np.intp) + else: + indexer, missing = self.get_indexer_non_unique(target) + if not self.is_unique: + # GH#42568 + warnings.warn( + "reindexing with a non-unique Index is deprecated and will " + "raise in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if len(self) and indexer is not None: + new_target = self.take(indexer) + else: + new_target = target + + # filling in missing if needed + if len(missing): + cats = self.categories.get_indexer(target) + + if not isinstance(target, CategoricalIndex) or (cats == -1).any(): + new_target, indexer, _ = super()._reindex_non_unique(target) + else: + + codes = new_target.codes.copy() + codes[indexer == -1] = cats[missing] + cat = self._data._from_backing_data(codes) + new_target = type(self)._simple_new(cat, name=self.name) + + # we always want to return an Index type here + # to be consistent with .reindex for other index types (e.g. they don't + # coerce based on the actual values, only on the dtype) + # unless we had an initial Categorical to begin with + # in which case we are going to conform to the passed Categorical + if is_categorical_dtype(target): + cat = Categorical(new_target, dtype=target.dtype) + new_target = type(self)._simple_new(cat, name=self.name) + else: + # e.g. test_reindex_with_categoricalindex, test_reindex_duplicate_target + new_target = np.asarray(new_target) + new_target = Index._with_infer(new_target, name=self.name) + + return new_target, indexer + + # -------------------------------------------------------------------- + # Indexing Methods + + def _maybe_cast_indexer(self, key) -> int: + # GH#41933: we have to do this instead of self._data._validate_scalar + # because this will correctly get partial-indexing on Interval categories + try: + return self._data._unbox_scalar(key) + except KeyError: + if is_valid_na_for_dtype(key, self.categories.dtype): + return -1 + raise + + def _maybe_cast_listlike_indexer(self, values) -> CategoricalIndex: + if isinstance(values, CategoricalIndex): + values = values._data + if isinstance(values, Categorical): + # Indexing on codes is more efficient if categories are the same, + # so we can apply some optimizations based on the degree of + # dtype-matching. + cat = self._data._encode_with_my_categories(values) + codes = cat._codes + else: + codes = self.categories.get_indexer(values) + codes = codes.astype(self.codes.dtype, copy=False) + cat = self._data._from_backing_data(codes) + return type(self)._simple_new(cat) + + # -------------------------------------------------------------------- + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + return self.categories._is_comparable_dtype(dtype) + + def take_nd(self, *args, **kwargs): + """Alias for `take`""" + warnings.warn( + "CategoricalIndex.take_nd is deprecated, use CategoricalIndex.take " + "instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.take(*args, **kwargs) + + def map(self, mapper): + """ + Map values using input an input mapping or function. + + Maps the values (their categories, not the codes) of the index to new + categories. If the mapping correspondence is one-to-one the result is a + :class:`~pandas.CategoricalIndex` which has the same order property as + the original, otherwise an :class:`~pandas.Index` is returned. + + If a `dict` or :class:`~pandas.Series` is used any unmapped category is + mapped to `NaN`. Note that if this happens an :class:`~pandas.Index` + will be returned. + + Parameters + ---------- + mapper : function, dict, or Series + Mapping correspondence. + + Returns + ------- + pandas.CategoricalIndex or pandas.Index + Mapped index. + + See Also + -------- + Index.map : Apply a mapping correspondence on an + :class:`~pandas.Index`. + Series.map : Apply a mapping correspondence on a + :class:`~pandas.Series`. + Series.apply : Apply more complex functions on a + :class:`~pandas.Series`. + + Examples + -------- + >>> idx = pd.CategoricalIndex(['a', 'b', 'c']) + >>> idx + CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], + ordered=False, dtype='category') + >>> idx.map(lambda x: x.upper()) + CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'], + ordered=False, dtype='category') + >>> idx.map({'a': 'first', 'b': 'second', 'c': 'third'}) + CategoricalIndex(['first', 'second', 'third'], categories=['first', + 'second', 'third'], ordered=False, dtype='category') + + If the mapping is one-to-one the ordering of the categories is + preserved: + + >>> idx = pd.CategoricalIndex(['a', 'b', 'c'], ordered=True) + >>> idx + CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], + ordered=True, dtype='category') + >>> idx.map({'a': 3, 'b': 2, 'c': 1}) + CategoricalIndex([3, 2, 1], categories=[3, 2, 1], ordered=True, + dtype='category') + + If the mapping is not one-to-one an :class:`~pandas.Index` is returned: + + >>> idx.map({'a': 'first', 'b': 'second', 'c': 'first'}) + Index(['first', 'second', 'first'], dtype='object') + + If a `dict` is used, all unmapped categories are mapped to `NaN` and + the result is an :class:`~pandas.Index`: + + >>> idx.map({'a': 'first', 'b': 'second'}) + Index(['first', 'second', nan], dtype='object') + """ + mapped = self._values.map(mapper) + return Index(mapped, name=self.name) + + def _concat(self, to_concat: list[Index], name: Hashable) -> Index: + # if calling index is category, don't check dtype of others + try: + codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat]) + except TypeError: + # not all to_concat elements are among our categories (or NA) + from pandas.core.dtypes.concat import concat_compat + + res = concat_compat(to_concat) + return Index(res, name=name) + else: + cat = self._data._from_backing_data(codes) + return type(self)._simple_new(cat, name=name) diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/datetimelike.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/datetimelike.py new file mode 100644 index 0000000..8e94d72 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexes/datetimelike.py @@ -0,0 +1,709 @@ +""" +Base and utility classes for tseries type pandas objects. +""" +from __future__ import annotations + +from datetime import datetime +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Sequence, + TypeVar, + cast, + final, +) +import warnings + +import numpy as np + +from pandas._libs import ( + NaT, + Timedelta, + lib, +) +from pandas._libs.tslibs import ( + BaseOffset, + Resolution, + Tick, + parsing, + to_offset, +) +from pandas.compat.numpy import function as nv +from pandas.util._decorators import ( + Appender, + cache_readonly, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_dtype_equal, + is_integer, + is_list_like, +) +from pandas.core.dtypes.concat import concat_compat + +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin +import pandas.core.common as com +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import ( + Index, + _index_shared_docs, +) +from pandas.core.indexes.extension import ( + NDArrayBackedExtensionIndex, + inherit_names, +) +from pandas.core.indexes.range import RangeIndex +from pandas.core.tools.timedeltas import to_timedelta + +if TYPE_CHECKING: + from pandas import CategoricalIndex + +_index_doc_kwargs = dict(ibase._index_doc_kwargs) + +_T = TypeVar("_T", bound="DatetimeIndexOpsMixin") +_TDT = TypeVar("_TDT", bound="DatetimeTimedeltaMixin") + + +@inherit_names( + ["inferred_freq", "_resolution_obj", "resolution"], + DatetimeLikeArrayMixin, + cache=True, +) +@inherit_names(["mean", "asi8", "freq", "freqstr"], DatetimeLikeArrayMixin) +class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex): + """ + Common ops mixin to support a unified interface datetimelike Index. + """ + + _is_numeric_dtype = False + _can_hold_strings = False + _data: DatetimeArray | TimedeltaArray | PeriodArray + freq: BaseOffset | None + freqstr: str | None + _resolution_obj: Resolution + + # error: "Callable[[Any], Any]" has no attribute "fget" + hasnans = cast( + bool, + cache_readonly( + DatetimeLikeArrayMixin._hasna.fget # type: ignore[attr-defined] + ), + ) + + @property + def _is_all_dates(self) -> bool: + return True + + # ------------------------------------------------------------------------ + + def equals(self, other: Any) -> bool: + """ + Determines if two Index objects contain the same elements. + """ + if self.is_(other): + return True + + if not isinstance(other, Index): + return False + elif other.dtype.kind in ["f", "i", "u", "c"]: + return False + elif not isinstance(other, type(self)): + should_try = False + inferable = self._data._infer_matches + if other.dtype == object: + should_try = other.inferred_type in inferable + elif is_categorical_dtype(other.dtype): + other = cast("CategoricalIndex", other) + should_try = other.categories.inferred_type in inferable + + if should_try: + try: + other = type(self)(other) + except (ValueError, TypeError, OverflowError): + # e.g. + # ValueError -> cannot parse str entry, or OutOfBoundsDatetime + # TypeError -> trying to convert IntervalIndex to DatetimeIndex + # OverflowError -> Index([very_large_timedeltas]) + return False + + if not is_dtype_equal(self.dtype, other.dtype): + # have different timezone + return False + + return np.array_equal(self.asi8, other.asi8) + + @Appender(Index.__contains__.__doc__) + def __contains__(self, key: Any) -> bool: + hash(key) + try: + self.get_loc(key) + except (KeyError, TypeError, ValueError): + return False + return True + + _can_hold_na = True + + def _convert_tolerance(self, tolerance, target): + tolerance = np.asarray(to_timedelta(tolerance).to_numpy()) + return super()._convert_tolerance(tolerance, target) + + # -------------------------------------------------------------------- + # Rendering Methods + + def format( + self, + name: bool = False, + formatter: Callable | None = None, + na_rep: str = "NaT", + date_format: str | None = None, + ) -> list[str]: + """ + Render a string representation of the Index. + """ + header = [] + if name: + header.append( + ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) + if self.name is not None + else "" + ) + + if formatter is not None: + return header + list(self.map(formatter)) + + return self._format_with_header(header, na_rep=na_rep, date_format=date_format) + + def _format_with_header( + self, header: list[str], na_rep: str = "NaT", date_format: str | None = None + ) -> list[str]: + # matches base class except for whitespace padding and date_format + return header + list( + self._format_native_types(na_rep=na_rep, date_format=date_format) + ) + + @property + def _formatter_func(self): + return self._data._formatter() + + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value). + """ + attrs = super()._format_attrs() + for attrib in self._attributes: + # iterating over _attributes prevents us from doing this for PeriodIndex + if attrib == "freq": + freq = self.freqstr + if freq is not None: + freq = repr(freq) # e.g. D -> 'D' + attrs.append(("freq", freq)) + return attrs + + @Appender(Index._summary.__doc__) + def _summary(self, name=None) -> str: + result = super()._summary(name=name) + if self.freq: + result += f"\nFreq: {self.freqstr}" + + return result + + # -------------------------------------------------------------------- + # Indexing Methods + + def _can_partial_date_slice(self, reso: Resolution) -> bool: + raise NotImplementedError + + def _parsed_string_to_bounds(self, reso: Resolution, parsed): + raise NotImplementedError + + def _parse_with_reso(self, label: str): + # overridden by TimedeltaIndex + parsed, reso_str = parsing.parse_time_string(label, self.freq) + reso = Resolution.from_attrname(reso_str) + return parsed, reso + + def _get_string_slice(self, key: str): + parsed, reso = self._parse_with_reso(key) + try: + return self._partial_date_slice(reso, parsed) + except KeyError as err: + raise KeyError(key) from err + + @final + def _partial_date_slice( + self, + reso: Resolution, + parsed: datetime, + ): + """ + Parameters + ---------- + reso : Resolution + parsed : datetime + + Returns + ------- + slice or ndarray[intp] + """ + if not self._can_partial_date_slice(reso): + raise ValueError + + t1, t2 = self._parsed_string_to_bounds(reso, parsed) + vals = self._data._ndarray + unbox = self._data._unbox + + if self.is_monotonic_increasing: + + if len(self) and ( + (t1 < self[0] and t2 < self[0]) or (t1 > self[-1] and t2 > self[-1]) + ): + # we are out of range + raise KeyError + + # TODO: does this depend on being monotonic _increasing_? + + # a monotonic (sorted) series can be sliced + left = vals.searchsorted(unbox(t1), side="left") + right = vals.searchsorted(unbox(t2), side="right") + return slice(left, right) + + else: + lhs_mask = vals >= unbox(t1) + rhs_mask = vals <= unbox(t2) + + # try to find the dates + return (lhs_mask & rhs_mask).nonzero()[0] + + def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): + """ + If label is a string, cast it to scalar type according to resolution. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : {'loc', 'getitem'} or None + + Returns + ------- + label : object + + Notes + ----- + Value of `side` parameter should be validated in caller. + """ + assert kind in ["loc", "getitem", None, lib.no_default] + self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound") + + if isinstance(label, str): + try: + parsed, reso = self._parse_with_reso(label) + except ValueError as err: + # DTI -> parsing.DateParseError + # TDI -> 'unit abbreviation w/o a number' + # PI -> string cannot be parsed as datetime-like + raise self._invalid_indexer("slice", label) from err + + lower, upper = self._parsed_string_to_bounds(reso, parsed) + return lower if side == "left" else upper + elif not isinstance(label, self._data._recognized_scalars): + raise self._invalid_indexer("slice", label) + + return label + + # -------------------------------------------------------------------- + # Arithmetic Methods + + def shift(self: _T, periods: int = 1, freq=None) -> _T: + """ + Shift index by desired number of time frequency increments. + + This method is for shifting the values of datetime-like indexes + by a specified time increment a given number of times. + + Parameters + ---------- + periods : int, default 1 + Number of periods (or increments) to shift by, + can be positive or negative. + freq : pandas.DateOffset, pandas.Timedelta or string, optional + Frequency increment to shift by. + If None, the index is shifted by its own `freq` attribute. + Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. + + Returns + ------- + pandas.DatetimeIndex + Shifted index. + + See Also + -------- + Index.shift : Shift values of Index. + PeriodIndex.shift : Shift values of PeriodIndex. + """ + arr = self._data.view() + arr._freq = self.freq + result = arr._time_shift(periods, freq=freq) + return type(self)._simple_new(result, name=self.name) + + # -------------------------------------------------------------------- + + @doc(Index._maybe_cast_listlike_indexer) + def _maybe_cast_listlike_indexer(self, keyarr): + try: + res = self._data._validate_listlike(keyarr, allow_object=True) + except (ValueError, TypeError): + if not isinstance(keyarr, ExtensionArray): + # e.g. we don't want to cast DTA to ndarray[object] + res = com.asarray_tuplesafe(keyarr) + # TODO: com.asarray_tuplesafe shouldn't cast e.g. DatetimeArray + else: + res = keyarr + return Index(res, dtype=res.dtype) + + +class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin): + """ + Mixin class for methods shared by DatetimeIndex and TimedeltaIndex, + but not PeriodIndex + """ + + _data: DatetimeArray | TimedeltaArray + _comparables = ["name", "freq"] + _attributes = ["name", "freq"] + + # Compat for frequency inference, see GH#23789 + _is_monotonic_increasing = Index.is_monotonic_increasing + _is_monotonic_decreasing = Index.is_monotonic_decreasing + _is_unique = Index.is_unique + + _join_precedence = 10 + + def _with_freq(self, freq): + arr = self._data._with_freq(freq) + return type(self)._simple_new(arr, name=self._name) + + def is_type_compatible(self, kind: str) -> bool: + warnings.warn( + f"{type(self).__name__}.is_type_compatible is deprecated and will be " + "removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return kind in self._data._infer_matches + + @property + def values(self) -> np.ndarray: + # NB: For Datetime64TZ this is lossy + return self._data._ndarray + + # -------------------------------------------------------------------- + # Set Operation Methods + + @cache_readonly + def _as_range_index(self) -> RangeIndex: + # Convert our i8 representations to RangeIndex + # Caller is responsible for checking isinstance(self.freq, Tick) + freq = cast(Tick, self.freq) + tick = freq.delta.value + rng = range(self[0].value, self[-1].value + tick, tick) + return RangeIndex(rng) + + def _can_range_setop(self, other): + return isinstance(self.freq, Tick) and isinstance(other.freq, Tick) + + def _wrap_range_setop(self, other, res_i8): + new_freq = None + if not len(res_i8): + # RangeIndex defaults to step=1, which we don't want. + new_freq = self.freq + elif isinstance(res_i8, RangeIndex): + new_freq = to_offset(Timedelta(res_i8.step)) + res_i8 = res_i8 + + # TODO: we cannot just do + # type(self._data)(res_i8.values, dtype=self.dtype, freq=new_freq) + # because test_setops_preserve_freq fails with _validate_frequency raising. + # This raising is incorrect, as 'on_freq' is incorrect. This will + # be fixed by GH#41493 + res_values = res_i8.values.view(self._data._ndarray.dtype) + result = type(self._data)._simple_new( + res_values, dtype=self.dtype, freq=new_freq + ) + return self._wrap_setop_result(other, result) + + def _range_intersect(self, other, sort): + # Dispatch to RangeIndex intersection logic. + left = self._as_range_index + right = other._as_range_index + res_i8 = left.intersection(right, sort=sort) + return self._wrap_range_setop(other, res_i8) + + def _range_union(self, other, sort): + # Dispatch to RangeIndex union logic. + left = self._as_range_index + right = other._as_range_index + res_i8 = left.union(right, sort=sort) + return self._wrap_range_setop(other, res_i8) + + def _intersection(self, other: Index, sort=False) -> Index: + """ + intersection specialized to the case with matching dtypes and both non-empty. + """ + other = cast("DatetimeTimedeltaMixin", other) + + if self._can_range_setop(other): + return self._range_intersect(other, sort=sort) + + if not self._can_fast_intersect(other): + result = Index._intersection(self, other, sort=sort) + # We need to invalidate the freq because Index._intersection + # uses _shallow_copy on a view of self._data, which will preserve + # self.freq if we're not careful. + # At this point we should have result.dtype == self.dtype + # and type(result) is type(self._data) + result = self._wrap_setop_result(other, result) + return result._with_freq(None)._with_freq("infer") + + else: + return self._fast_intersect(other, sort) + + def _fast_intersect(self, other, sort): + # to make our life easier, "sort" the two ranges + if self[0] <= other[0]: + left, right = self, other + else: + left, right = other, self + + # after sorting, the intersection always starts with the right index + # and ends with the index of which the last elements is smallest + end = min(left[-1], right[-1]) + start = right[0] + + if end < start: + result = self[:0] + else: + lslice = slice(*left.slice_locs(start, end)) + result = left._values[lslice] + + return result + + def _can_fast_intersect(self: _T, other: _T) -> bool: + # Note: we only get here with len(self) > 0 and len(other) > 0 + if self.freq is None: + return False + + elif other.freq != self.freq: + return False + + elif not self.is_monotonic_increasing: + # Because freq is not None, we must then be monotonic decreasing + return False + + # this along with matching freqs ensure that we "line up", + # so intersection will preserve freq + # Note we are assuming away Ticks, as those go through _range_intersect + # GH#42104 + return self.freq.n == 1 + + def _can_fast_union(self: _T, other: _T) -> bool: + # Assumes that type(self) == type(other), as per the annotation + # The ability to fast_union also implies that `freq` should be + # retained on union. + freq = self.freq + + if freq is None or freq != other.freq: + return False + + if not self.is_monotonic_increasing: + # Because freq is not None, we must then be monotonic decreasing + # TODO: do union on the reversed indexes? + return False + + if len(self) == 0 or len(other) == 0: + # only reached via union_many + return True + + # to make our life easier, "sort" the two ranges + if self[0] <= other[0]: + left, right = self, other + else: + left, right = other, self + + right_start = right[0] + left_end = left[-1] + + # Only need to "adjoin", not overlap + return (right_start == left_end + freq) or right_start in left + + def _fast_union(self: _TDT, other: _TDT, sort=None) -> _TDT: + # Caller is responsible for ensuring self and other are non-empty + + # to make our life easier, "sort" the two ranges + if self[0] <= other[0]: + left, right = self, other + elif sort is False: + # TDIs are not in the "correct" order and we don't want + # to sort but want to remove overlaps + left, right = self, other + left_start = left[0] + loc = right.searchsorted(left_start, side="left") + right_chunk = right._values[:loc] + dates = concat_compat((left._values, right_chunk)) + result = type(self)._simple_new(dates, name=self.name) + return result + else: + left, right = other, self + + left_end = left[-1] + right_end = right[-1] + + # concatenate + if left_end < right_end: + loc = right.searchsorted(left_end, side="right") + right_chunk = right._values[loc:] + dates = concat_compat([left._values, right_chunk]) + # The can_fast_union check ensures that the result.freq + # should match self.freq + dates = type(self._data)(dates, freq=self.freq) + result = type(self)._simple_new(dates) + return result + else: + return left + + def _union(self, other, sort): + # We are called by `union`, which is responsible for this validation + assert isinstance(other, type(self)) + assert self.dtype == other.dtype + + if self._can_range_setop(other): + return self._range_union(other, sort=sort) + + if self._can_fast_union(other): + result = self._fast_union(other, sort=sort) + # in the case with sort=None, the _can_fast_union check ensures + # that result.freq == self.freq + return result + else: + return super()._union(other, sort)._with_freq("infer") + + # -------------------------------------------------------------------- + # Join Methods + + def _get_join_freq(self, other): + """ + Get the freq to attach to the result of a join operation. + """ + freq = None + if self._can_fast_union(other): + freq = self.freq + return freq + + def _wrap_joined_index(self, joined, other): + assert other.dtype == self.dtype, (other.dtype, self.dtype) + result = super()._wrap_joined_index(joined, other) + result._data._freq = self._get_join_freq(other) + return result + + def _get_engine_target(self) -> np.ndarray: + # engine methods and libjoin methods need dt64/td64 values cast to i8 + return self._data._ndarray.view("i8") + + def _from_join_target(self, result: np.ndarray): + # view e.g. i8 back to M8[ns] + result = result.view(self._data._ndarray.dtype) + return self._data._from_backing_data(result) + + # -------------------------------------------------------------------- + # List-like Methods + + def _get_delete_freq(self, loc: int | slice | Sequence[int]): + """ + Find the `freq` for self.delete(loc). + """ + freq = None + if self.freq is not None: + if is_integer(loc): + if loc in (0, -len(self), -1, len(self) - 1): + freq = self.freq + else: + if is_list_like(loc): + # error: Incompatible types in assignment (expression has + # type "Union[slice, ndarray]", variable has type + # "Union[int, slice, Sequence[int]]") + loc = lib.maybe_indices_to_slice( # type: ignore[assignment] + np.asarray(loc, dtype=np.intp), len(self) + ) + if isinstance(loc, slice) and loc.step in (1, None): + if loc.start in (0, None) or loc.stop in (len(self), None): + freq = self.freq + return freq + + def _get_insert_freq(self, loc: int, item): + """ + Find the `freq` for self.insert(loc, item). + """ + value = self._data._validate_scalar(item) + item = self._data._box_func(value) + + freq = None + if self.freq is not None: + # freq can be preserved on edge cases + if self.size: + if item is NaT: + pass + elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: + freq = self.freq + elif (loc == len(self)) and item - self.freq == self[-1]: + freq = self.freq + else: + # Adding a single item to an empty index may preserve freq + if isinstance(self.freq, Tick): + # all TimedeltaIndex cases go through here; is_on_offset + # would raise TypeError + freq = self.freq + elif self.freq.is_on_offset(item): + freq = self.freq + return freq + + @doc(NDArrayBackedExtensionIndex.delete) + def delete(self, loc): + result = super().delete(loc) + result._data._freq = self._get_delete_freq(loc) + return result + + @doc(NDArrayBackedExtensionIndex.insert) + def insert(self, loc: int, item): + result = super().insert(loc, item) + if isinstance(result, type(self)): + # i.e. parent class method did not cast + result._data._freq = self._get_insert_freq(loc, item) + return result + + # -------------------------------------------------------------------- + # NDArray-Like Methods + + @Appender(_index_shared_docs["take"] % _index_doc_kwargs) + def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): + nv.validate_take((), kwargs) + indices = np.asarray(indices, dtype=np.intp) + + result = NDArrayBackedExtensionIndex.take( + self, indices, axis, allow_fill, fill_value, **kwargs + ) + + maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) + if isinstance(maybe_slice, slice): + freq = self._data._get_getitem_freq(maybe_slice) + result._data._freq = freq + return result diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py new file mode 100644 index 0000000..004d860 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py @@ -0,0 +1,1197 @@ +from __future__ import annotations + +from datetime import ( + date, + datetime, + time, + timedelta, + tzinfo, +) +import operator +from typing import ( + TYPE_CHECKING, + Hashable, + Literal, +) +import warnings + +import numpy as np + +from pandas._libs import ( + NaT, + Period, + Timestamp, + index as libindex, + lib, +) +from pandas._libs.tslibs import ( + Resolution, + timezones, + to_offset, +) +from pandas._libs.tslibs.offsets import prefix_mapping +from pandas._typing import ( + Dtype, + DtypeObj, + npt, +) +from pandas.util._decorators import ( + cache_readonly, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + DT64NS_DTYPE, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_scalar, +) +from pandas.core.dtypes.missing import is_valid_na_for_dtype + +from pandas.core.arrays.datetimes import ( + DatetimeArray, + tz_to_dtype, +) +import pandas.core.common as com +from pandas.core.indexes.base import ( + Index, + get_unanimous_names, + maybe_extract_name, +) +from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin +from pandas.core.indexes.extension import inherit_names +from pandas.core.tools.times import to_time + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Float64Index, + PeriodIndex, + TimedeltaIndex, + ) + + +def _new_DatetimeIndex(cls, d): + """ + This is called upon unpickling, rather than the default which doesn't + have arguments and breaks __new__ + """ + if "data" in d and not isinstance(d["data"], DatetimeIndex): + # Avoid need to verify integrity by calling simple_new directly + data = d.pop("data") + if not isinstance(data, DatetimeArray): + # For backward compat with older pickles, we may need to construct + # a DatetimeArray to adapt to the newer _simple_new signature + tz = d.pop("tz") + freq = d.pop("freq") + dta = DatetimeArray._simple_new(data, dtype=tz_to_dtype(tz), freq=freq) + else: + dta = data + for key in ["tz", "freq"]: + # These are already stored in our DatetimeArray; if they are + # also in the pickle and don't match, we have a problem. + if key in d: + assert d[key] == getattr(dta, key) + d.pop(key) + result = cls._simple_new(dta, **d) + else: + with warnings.catch_warnings(): + # TODO: If we knew what was going in to **d, we might be able to + # go through _simple_new instead + warnings.simplefilter("ignore") + result = cls.__new__(cls, **d) + + return result + + +@inherit_names( + DatetimeArray._field_ops + + [ + method + for method in DatetimeArray._datetimelike_methods + if method not in ("tz_localize", "tz_convert") + ], + DatetimeArray, + wrap=True, +) +@inherit_names(["is_normalized", "_resolution_obj"], DatetimeArray, cache=True) +@inherit_names( + [ + "tz", + "tzinfo", + "dtype", + "to_pydatetime", + "_format_native_types", + "date", + "time", + "timetz", + "std", + ] + + DatetimeArray._bool_ops, + DatetimeArray, +) +class DatetimeIndex(DatetimeTimedeltaMixin): + """ + Immutable ndarray-like of datetime64 data. + + Represented internally as int64, and which can be boxed to Timestamp objects + that are subclasses of datetime and carry metadata. + + Parameters + ---------- + data : array-like (1-dimensional), optional + Optional datetime-like data to construct index with. + freq : str or pandas offset object, optional + One of pandas date offset strings or corresponding objects. The string + 'infer' can be passed in order to set the frequency of the index as the + inferred frequency upon creation. + tz : pytz.timezone or dateutil.tz.tzfile or datetime.tzinfo or str + Set the Timezone of the data. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + closed : {'left', 'right'}, optional + Set whether to include `start` and `end` that are on the + boundary. The default includes boundary points on either end. + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from 03:00 + DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC + and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter + dictates how ambiguous times should be handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False signifies a + non-DST time (note that this flag is only applicable for ambiguous + times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous times. + dayfirst : bool, default False + If True, parse dates in `data` with the day first order. + yearfirst : bool, default False + If True parse dates in `data` with the year first order. + dtype : numpy.dtype or DatetimeTZDtype or str, default None + Note that the only NumPy dtype allowed is ‘datetime64[ns]’. + copy : bool, default False + Make a copy of input ndarray. + name : label, default None + Name to be stored in the index. + + Attributes + ---------- + year + month + day + hour + minute + second + microsecond + nanosecond + date + time + timetz + dayofyear + day_of_year + weekofyear + week + dayofweek + day_of_week + weekday + quarter + tz + freq + freqstr + is_month_start + is_month_end + is_quarter_start + is_quarter_end + is_year_start + is_year_end + is_leap_year + inferred_freq + + Methods + ------- + normalize + strftime + snap + tz_convert + tz_localize + round + floor + ceil + to_period + to_perioddelta + to_pydatetime + to_series + to_frame + month_name + day_name + mean + std + + See Also + -------- + Index : The base pandas Index type. + TimedeltaIndex : Index of timedelta64 data. + PeriodIndex : Index of Period data. + to_datetime : Convert argument to datetime. + date_range : Create a fixed-frequency DatetimeIndex. + + Notes + ----- + To learn more about the frequency strings, please see `this link + `__. + """ + + _typ = "datetimeindex" + + _data_cls = DatetimeArray + _engine_type = libindex.DatetimeEngine + _supports_partial_string_indexing = True + + _data: DatetimeArray + inferred_freq: str | None + tz: tzinfo | None + + # -------------------------------------------------------------------- + # methods that dispatch to DatetimeArray and wrap result + + @doc(DatetimeArray.strftime) + def strftime(self, date_format) -> Index: + arr = self._data.strftime(date_format) + return Index(arr, name=self.name) + + @doc(DatetimeArray.tz_convert) + def tz_convert(self, tz) -> DatetimeIndex: + arr = self._data.tz_convert(tz) + return type(self)._simple_new(arr, name=self.name) + + @doc(DatetimeArray.tz_localize) + def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeIndex: + arr = self._data.tz_localize(tz, ambiguous, nonexistent) + return type(self)._simple_new(arr, name=self.name) + + @doc(DatetimeArray.to_period) + def to_period(self, freq=None) -> PeriodIndex: + from pandas.core.indexes.api import PeriodIndex + + arr = self._data.to_period(freq) + return PeriodIndex._simple_new(arr, name=self.name) + + @doc(DatetimeArray.to_perioddelta) + def to_perioddelta(self, freq) -> TimedeltaIndex: + from pandas.core.indexes.api import TimedeltaIndex + + arr = self._data.to_perioddelta(freq) + return TimedeltaIndex._simple_new(arr, name=self.name) + + @doc(DatetimeArray.to_julian_date) + def to_julian_date(self) -> Float64Index: + from pandas.core.indexes.api import Float64Index + + arr = self._data.to_julian_date() + return Float64Index._simple_new(arr, name=self.name) + + @doc(DatetimeArray.isocalendar) + def isocalendar(self) -> DataFrame: + df = self._data.isocalendar() + return df.set_index(self) + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + data=None, + freq=lib.no_default, + tz=None, + normalize: bool = False, + closed=None, + ambiguous="raise", + dayfirst: bool = False, + yearfirst: bool = False, + dtype: Dtype | None = None, + copy: bool = False, + name: Hashable = None, + ) -> DatetimeIndex: + + if is_scalar(data): + raise cls._scalar_data_error(data) + + # - Cases checked above all return/raise before reaching here - # + + name = maybe_extract_name(name, data, cls) + + dtarr = DatetimeArray._from_sequence_not_strict( + data, + dtype=dtype, + copy=copy, + tz=tz, + freq=freq, + dayfirst=dayfirst, + yearfirst=yearfirst, + ambiguous=ambiguous, + ) + + subarr = cls._simple_new(dtarr, name=name) + return subarr + + # -------------------------------------------------------------------- + + @cache_readonly + def _is_dates_only(self) -> bool: + """ + Return a boolean if we are only dates (and don't have a timezone) + + Returns + ------- + bool + """ + from pandas.io.formats.format import is_dates_only + + # error: Argument 1 to "is_dates_only" has incompatible type + # "Union[ExtensionArray, ndarray]"; expected "Union[ndarray, + # DatetimeArray, Index, DatetimeIndex]" + return self.tz is None and is_dates_only(self._values) # type: ignore[arg-type] + + def __reduce__(self): + d = {"data": self._data, "name": self.name} + return _new_DatetimeIndex, (type(self), d), None + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + """ + Can we compare values of the given dtype to our own? + """ + if self.tz is not None: + # If we have tz, we can compare to tzaware + return is_datetime64tz_dtype(dtype) + # if we dont have tz, we can only compare to tznaive + return is_datetime64_dtype(dtype) + + # -------------------------------------------------------------------- + # Rendering Methods + + @property + def _formatter_func(self): + from pandas.io.formats.format import get_format_datetime64 + + formatter = get_format_datetime64(is_dates_only=self._is_dates_only) + return lambda x: f"'{formatter(x)}'" + + # -------------------------------------------------------------------- + # Set Operation Methods + + def union_many(self, others): + """ + A bit of a hack to accelerate unioning a collection of indexes. + """ + warnings.warn( + "DatetimeIndex.union_many is deprecated and will be removed in " + "a future version. Use obj.union instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + this = self + + for other in others: + if not isinstance(this, DatetimeIndex): + this = Index.union(this, other) + continue + + if not isinstance(other, DatetimeIndex): + try: + other = DatetimeIndex(other) + except TypeError: + pass + + this, other = this._maybe_utc_convert(other) + + if len(self) and len(other) and this._can_fast_union(other): + # union already has fastpath handling for empty cases + this = this._fast_union(other) + else: + this = Index.union(this, other) + + res_name = get_unanimous_names(self, *others)[0] + if this.name != res_name: + return this.rename(res_name) + return this + + def _maybe_utc_convert(self, other: Index) -> tuple[DatetimeIndex, Index]: + this = self + + if isinstance(other, DatetimeIndex): + if (self.tz is None) ^ (other.tz is None): + raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") + + if not timezones.tz_compare(self.tz, other.tz): + this = self.tz_convert("UTC") + other = other.tz_convert("UTC") + return this, other + + # -------------------------------------------------------------------- + + def _get_time_micros(self) -> np.ndarray: + """ + Return the number of microseconds since midnight. + + Returns + ------- + ndarray[int64_t] + """ + values = self._data._local_timestamps() + + nanos = values % (24 * 3600 * 1_000_000_000) + micros = nanos // 1000 + + micros[self._isnan] = -1 + return micros + + def to_series(self, keep_tz=lib.no_default, index=None, name=None): + """ + Create a Series with both index and values equal to the index keys + useful with map for returning an indexer based on an index. + + Parameters + ---------- + keep_tz : optional, defaults True + Return the data keeping the timezone. + + If keep_tz is True: + + If the timezone is not set, the resulting + Series will have a datetime64[ns] dtype. + + Otherwise the Series will have an datetime64[ns, tz] dtype; the + tz will be preserved. + + If keep_tz is False: + + Series will have a datetime64[ns] dtype. TZ aware + objects will have the tz removed. + + .. versionchanged:: 1.0.0 + The default value is now True. In a future version, + this keyword will be removed entirely. Stop passing the + argument to obtain the future behavior and silence the warning. + + index : Index, optional + Index of resulting Series. If None, defaults to original index. + name : str, optional + Name of resulting Series. If None, defaults to name of original + index. + + Returns + ------- + Series + """ + from pandas import Series + + if index is None: + index = self._view() + if name is None: + name = self.name + + if keep_tz is not lib.no_default: + if keep_tz: + warnings.warn( + "The 'keep_tz' keyword in DatetimeIndex.to_series " + "is deprecated and will be removed in a future version. " + "You can stop passing 'keep_tz' to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + warnings.warn( + "Specifying 'keep_tz=False' is deprecated and this " + "option will be removed in a future release. If " + "you want to remove the timezone information, you " + "can do 'idx.tz_convert(None)' before calling " + "'to_series'.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + keep_tz = True + + if keep_tz and self.tz is not None: + # preserve the tz & copy + values = self.copy(deep=True) + else: + # error: Incompatible types in assignment (expression has type + # "Union[ExtensionArray, ndarray]", variable has type "DatetimeIndex") + values = self._values.view("M8[ns]").copy() # type: ignore[assignment] + + return Series(values, index=index, name=name) + + def snap(self, freq="S") -> DatetimeIndex: + """ + Snap time stamps to nearest occurring frequency. + + Returns + ------- + DatetimeIndex + """ + # Superdumb, punting on any optimizing + freq = to_offset(freq) + + snapped = np.empty(len(self), dtype=DT64NS_DTYPE) + + for i, v in enumerate(self): + s = v + if not freq.is_on_offset(s): + t0 = freq.rollback(s) + t1 = freq.rollforward(s) + if abs(s - t0) < abs(t1 - s): + s = t0 + else: + s = t1 + snapped[i] = s + + dta = DatetimeArray(snapped, dtype=self.dtype) + return DatetimeIndex._simple_new(dta, name=self.name) + + # -------------------------------------------------------------------- + # Indexing Methods + + def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): + """ + Calculate datetime bounds for parsed time string and its resolution. + + Parameters + ---------- + reso : str + Resolution provided by parsed string. + parsed : datetime + Datetime from parsed string. + + Returns + ------- + lower, upper: pd.Timestamp + """ + grp = reso.freq_group + per = Period(parsed, freq=grp.value) + start, end = per.start_time, per.end_time + + # GH 24076 + # If an incoming date string contained a UTC offset, need to localize + # the parsed date to this offset first before aligning with the index's + # timezone + start = start.tz_localize(parsed.tzinfo) + end = end.tz_localize(parsed.tzinfo) + + if parsed.tzinfo is not None: + if self.tz is None: + raise ValueError( + "The index must be timezone aware when indexing " + "with a date string with a UTC offset" + ) + start = self._maybe_cast_for_get_loc(start) + end = self._maybe_cast_for_get_loc(end) + return start, end + + def _can_partial_date_slice(self, reso: Resolution) -> bool: + # History of conversation GH#3452, GH#3931, GH#2369, GH#14826 + return reso > self._resolution_obj + + def _deprecate_mismatched_indexing(self, key) -> None: + # GH#36148 + # we get here with isinstance(key, self._data._recognized_scalars) + try: + self._data._assert_tzawareness_compat(key) + except TypeError: + if self.tz is None: + msg = ( + "Indexing a timezone-naive DatetimeIndex with a " + "timezone-aware datetime is deprecated and will " + "raise KeyError in a future version. " + "Use a timezone-naive object instead." + ) + else: + msg = ( + "Indexing a timezone-aware DatetimeIndex with a " + "timezone-naive datetime is deprecated and will " + "raise KeyError in a future version. " + "Use a timezone-aware object instead." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + + def get_loc(self, key, method=None, tolerance=None): + """ + Get integer location for requested label + + Returns + ------- + loc : int + """ + self._check_indexing_error(key) + + orig_key = key + if is_valid_na_for_dtype(key, self.dtype): + key = NaT + + if isinstance(key, self._data._recognized_scalars): + # needed to localize naive datetimes + self._deprecate_mismatched_indexing(key) + key = self._maybe_cast_for_get_loc(key) + + elif isinstance(key, str): + + try: + parsed, reso = self._parse_with_reso(key) + except ValueError as err: + raise KeyError(key) from err + + if self._can_partial_date_slice(reso): + try: + return self._partial_date_slice(reso, parsed) + except KeyError as err: + if method is None: + raise KeyError(key) from err + try: + key = self._maybe_cast_for_get_loc(key) + except ValueError as err: + # FIXME(dateutil#1180): we get here because parse_with_reso + # doesn't raise on "t2m" + raise KeyError(key) from err + + elif isinstance(key, timedelta): + # GH#20464 + raise TypeError( + f"Cannot index {type(self).__name__} with {type(key).__name__}" + ) + + elif isinstance(key, time): + if method is not None: + raise NotImplementedError( + "cannot yet lookup inexact labels when key is a time object" + ) + return self.indexer_at_time(key) + + else: + # unrecognized type + raise KeyError(key) + + try: + return Index.get_loc(self, key, method, tolerance) + except KeyError as err: + raise KeyError(orig_key) from err + + def _maybe_cast_for_get_loc(self, key) -> Timestamp: + # needed to localize naive datetimes or dates (GH 35690) + key = Timestamp(key) + if key.tzinfo is None: + key = key.tz_localize(self.tz) + else: + key = key.tz_convert(self.tz) + return key + + @doc(DatetimeTimedeltaMixin._maybe_cast_slice_bound) + def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): + label = super()._maybe_cast_slice_bound(label, side, kind=kind) + self._deprecate_mismatched_indexing(label) + return self._maybe_cast_for_get_loc(label) + + def slice_indexer(self, start=None, end=None, step=None, kind=lib.no_default): + """ + Return indexer for specified label slice. + Index.slice_indexer, customized to handle time slicing. + + In addition to functionality provided by Index.slice_indexer, does the + following: + + - if both `start` and `end` are instances of `datetime.time`, it + invokes `indexer_between_time` + - if `start` and `end` are both either string or None perform + value-based selection in non-monotonic cases. + + """ + self._deprecated_arg(kind, "kind", "slice_indexer") + + # For historical reasons DatetimeIndex supports slices between two + # instances of datetime.time as if it were applying a slice mask to + # an array of (self.hour, self.minute, self.seconds, self.microsecond). + if isinstance(start, time) and isinstance(end, time): + if step is not None and step != 1: + raise ValueError("Must have step size of 1 with time slices") + return self.indexer_between_time(start, end) + + if isinstance(start, time) or isinstance(end, time): + raise KeyError("Cannot mix time and non-time slice keys") + + # Pandas supports slicing with dates, treated as datetimes at midnight. + # https://github.com/pandas-dev/pandas/issues/31501 + if isinstance(start, date) and not isinstance(start, datetime): + start = datetime.combine(start, time(0, 0)) + if isinstance(end, date) and not isinstance(end, datetime): + end = datetime.combine(end, time(0, 0)) + + def check_str_or_none(point): + return point is not None and not isinstance(point, str) + + # GH#33146 if start and end are combinations of str and None and Index is not + # monotonic, we can not use Index.slice_indexer because it does not honor the + # actual elements, is only searching for start and end + if ( + check_str_or_none(start) + or check_str_or_none(end) + or self.is_monotonic_increasing + ): + return Index.slice_indexer(self, start, end, step, kind=kind) + + mask = np.array(True) + deprecation_mask = np.array(True) + if start is not None: + start_casted = self._maybe_cast_slice_bound(start, "left") + mask = start_casted <= self + deprecation_mask = start_casted == self + + if end is not None: + end_casted = self._maybe_cast_slice_bound(end, "right") + mask = (self <= end_casted) & mask + deprecation_mask = (end_casted == self) | deprecation_mask + + if not deprecation_mask.any(): + warnings.warn( + "Value based partial slicing on non-monotonic DatetimeIndexes " + "with non-existing keys is deprecated and will raise a " + "KeyError in a future Version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + indexer = mask.nonzero()[0][::step] + if len(indexer) == len(self): + return slice(None) + else: + return indexer + + @doc(Index.get_slice_bound) + def get_slice_bound( + self, label, side: Literal["left", "right"], kind=lib.no_default + ) -> int: + # GH#42855 handle date here instead of _maybe_cast_slice_bound + if isinstance(label, date) and not isinstance(label, datetime): + label = Timestamp(label).to_pydatetime() + return super().get_slice_bound(label, side=side, kind=kind) + + # -------------------------------------------------------------------- + + @property + def inferred_type(self) -> str: + # b/c datetime is represented as microseconds since the epoch, make + # sure we can't have ambiguous indexing + return "datetime64" + + def indexer_at_time(self, time, asof: bool = False) -> npt.NDArray[np.intp]: + """ + Return index locations of values at particular time of day + (e.g. 9:30AM). + + Parameters + ---------- + time : datetime.time or str + Time passed in either as object (datetime.time) or as string in + appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p", + "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", "%I%M%S%p"). + + Returns + ------- + np.ndarray[np.intp] + + See Also + -------- + indexer_between_time : Get index locations of values between particular + times of day. + DataFrame.at_time : Select values at particular time of day. + """ + if asof: + raise NotImplementedError("'asof' argument is not supported") + + if isinstance(time, str): + from dateutil.parser import parse + + time = parse(time).time() + + if time.tzinfo: + if self.tz is None: + raise ValueError("Index must be timezone aware.") + time_micros = self.tz_convert(time.tzinfo)._get_time_micros() + else: + time_micros = self._get_time_micros() + micros = _time_to_micros(time) + return (time_micros == micros).nonzero()[0] + + def indexer_between_time( + self, start_time, end_time, include_start: bool = True, include_end: bool = True + ) -> npt.NDArray[np.intp]: + """ + Return index locations of values between particular times of day + (e.g., 9:00-9:30AM). + + Parameters + ---------- + start_time, end_time : datetime.time, str + Time passed either as object (datetime.time) or as string in + appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p", + "%H:%M:%S", "%H%M%S", "%I:%M:%S%p","%I%M%S%p"). + include_start : bool, default True + include_end : bool, default True + + Returns + ------- + np.ndarray[np.intp] + + See Also + -------- + indexer_at_time : Get index locations of values at particular time of day. + DataFrame.between_time : Select values between particular times of day. + """ + start_time = to_time(start_time) + end_time = to_time(end_time) + time_micros = self._get_time_micros() + start_micros = _time_to_micros(start_time) + end_micros = _time_to_micros(end_time) + + if include_start and include_end: + lop = rop = operator.le + elif include_start: + lop = operator.le + rop = operator.lt + elif include_end: + lop = operator.lt + rop = operator.le + else: + lop = rop = operator.lt + + if start_time <= end_time: + join_op = operator.and_ + else: + join_op = operator.or_ + + mask = join_op(lop(start_micros, time_micros), rop(time_micros, end_micros)) + + return mask.nonzero()[0] + + +def date_range( + start=None, + end=None, + periods=None, + freq=None, + tz=None, + normalize: bool = False, + name: Hashable = None, + closed: str | None | lib.NoDefault = lib.no_default, + inclusive: str | None = None, + **kwargs, +) -> DatetimeIndex: + """ + Return a fixed frequency DatetimeIndex. + + Returns the range of equally spaced time points (where the difference between any + two adjacent points is specified by the given frequency) such that they all + satisfy `start <[=] x <[=] end`, where the first one and the last one are, resp., + the first and last time points in that range that fall on the boundary of ``freq`` + (if given as a frequency string) or that are valid for ``freq`` (if given as a + :class:`pandas.tseries.offsets.DateOffset`). (If exactly one of ``start``, + ``end``, or ``freq`` is *not* specified, this missing parameter can be computed + given ``periods``, the number of timesteps in the range. See the note below.) + + Parameters + ---------- + start : str or datetime-like, optional + Left bound for generating dates. + end : str or datetime-like, optional + Right bound for generating dates. + periods : int, optional + Number of periods to generate. + freq : str or DateOffset, default 'D' + Frequency strings can have multiples, e.g. '5H'. See + :ref:`here ` for a list of + frequency aliases. + tz : str or tzinfo, optional + Time zone name for returning localized DatetimeIndex, for example + 'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is + timezone-naive. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + name : str, default None + Name of the resulting DatetimeIndex. + closed : {None, 'left', 'right'}, optional + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None, the default). + + .. deprecated:: 1.4.0 + Argument `closed` has been deprecated to standardize boundary inputs. + Use `inclusive` instead, to set each bound as closed or open. + inclusive : {"both", "neither", "left", "right"}, default "both" + Include boundaries; Whether to set each bound as closed or open. + + .. versionadded:: 1.4.0 + **kwargs + For compatibility. Has no effect on the result. + + Returns + ------- + rng : DatetimeIndex + + See Also + -------- + DatetimeIndex : An immutable container for datetimes. + timedelta_range : Return a fixed frequency TimedeltaIndex. + period_range : Return a fixed frequency PeriodIndex. + interval_range : Return a fixed frequency IntervalIndex. + + Notes + ----- + Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. If ``freq`` is omitted, the resulting + ``DatetimeIndex`` will have ``periods`` linearly spaced elements between + ``start`` and ``end`` (closed on both sides). + + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + **Specifying the values** + + The next four examples generate the same `DatetimeIndex`, but vary + the combination of `start`, `end` and `periods`. + + Specify `start` and `end`, with the default daily frequency. + + >>> pd.date_range(start='1/1/2018', end='1/08/2018') + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', + '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'], + dtype='datetime64[ns]', freq='D') + + Specify `start` and `periods`, the number of periods (days). + + >>> pd.date_range(start='1/1/2018', periods=8) + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', + '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'], + dtype='datetime64[ns]', freq='D') + + Specify `end` and `periods`, the number of periods (days). + + >>> pd.date_range(end='1/1/2018', periods=8) + DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28', + '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'], + dtype='datetime64[ns]', freq='D') + + Specify `start`, `end`, and `periods`; the frequency is generated + automatically (linearly spaced). + + >>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3) + DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00', + '2018-04-27 00:00:00'], + dtype='datetime64[ns]', freq=None) + + **Other Parameters** + + Changed the `freq` (frequency) to ``'M'`` (month end frequency). + + >>> pd.date_range(start='1/1/2018', periods=5, freq='M') + DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30', + '2018-05-31'], + dtype='datetime64[ns]', freq='M') + + Multiples are allowed + + >>> pd.date_range(start='1/1/2018', periods=5, freq='3M') + DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', + '2019-01-31'], + dtype='datetime64[ns]', freq='3M') + + `freq` can also be specified as an Offset object. + + >>> pd.date_range(start='1/1/2018', periods=5, freq=pd.offsets.MonthEnd(3)) + DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', + '2019-01-31'], + dtype='datetime64[ns]', freq='3M') + + Specify `tz` to set the timezone. + + >>> pd.date_range(start='1/1/2018', periods=5, tz='Asia/Tokyo') + DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00', + '2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00', + '2018-01-05 00:00:00+09:00'], + dtype='datetime64[ns, Asia/Tokyo]', freq='D') + + `inclusive` controls whether to include `start` and `end` that are on the + boundary. The default, "both", includes boundary points on either end. + + >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive="both") + DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], + dtype='datetime64[ns]', freq='D') + + Use ``inclusive='left'`` to exclude `end` if it falls on the boundary. + + >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='left') + DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], + dtype='datetime64[ns]', freq='D') + + Use ``inclusive='right'`` to exclude `start` if it falls on the boundary, and + similarly ``inclusive='neither'`` will exclude both `start` and `end`. + + >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='right') + DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], + dtype='datetime64[ns]', freq='D') + """ + if inclusive is not None and not isinstance(closed, lib.NoDefault): + raise ValueError( + "Deprecated argument `closed` cannot be passed" + "if argument `inclusive` is not None" + ) + elif not isinstance(closed, lib.NoDefault): + warnings.warn( + "Argument `closed` is deprecated in favor of `inclusive`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if closed is None: + inclusive = "both" + elif closed in ("left", "right"): + inclusive = closed + else: + raise ValueError( + "Argument `closed` has to be either 'left', 'right' or None" + ) + elif inclusive is None: + inclusive = "both" + + if freq is None and com.any_none(periods, start, end): + freq = "D" + + dtarr = DatetimeArray._generate_range( + start=start, + end=end, + periods=periods, + freq=freq, + tz=tz, + normalize=normalize, + inclusive=inclusive, + **kwargs, + ) + return DatetimeIndex._simple_new(dtarr, name=name) + + +def bdate_range( + start=None, + end=None, + periods: int | None = None, + freq="B", + tz=None, + normalize: bool = True, + name: Hashable = None, + weekmask=None, + holidays=None, + closed: lib.NoDefault = lib.no_default, + inclusive: str | None = None, + **kwargs, +) -> DatetimeIndex: + """ + Return a fixed frequency DatetimeIndex, with business day as the default + frequency. + + Parameters + ---------- + start : str or datetime-like, default None + Left bound for generating dates. + end : str or datetime-like, default None + Right bound for generating dates. + periods : int, default None + Number of periods to generate. + freq : str or DateOffset, default 'B' (business daily) + Frequency strings can have multiples, e.g. '5H'. + tz : str or None + Time zone name for returning localized DatetimeIndex, for example + Asia/Beijing. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + name : str, default None + Name of the resulting DatetimeIndex. + weekmask : str or None, default None + Weekmask of valid business days, passed to ``numpy.busdaycalendar``, + only used when custom frequency strings are passed. The default + value None is equivalent to 'Mon Tue Wed Thu Fri'. + holidays : list-like or None, default None + Dates to exclude from the set of valid business days, passed to + ``numpy.busdaycalendar``, only used when custom frequency strings + are passed. + closed : str, default None + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None). + + .. deprecated:: 1.4.0 + Argument `closed` has been deprecated to standardize boundary inputs. + Use `inclusive` instead, to set each bound as closed or open. + inclusive : {"both", "neither", "left", "right"}, default "both" + Include boundaries; Whether to set each bound as closed or open. + + .. versionadded:: 1.4.0 + **kwargs + For compatibility. Has no effect on the result. + + Returns + ------- + DatetimeIndex + + Notes + ----- + Of the four parameters: ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. Specifying ``freq`` is a requirement + for ``bdate_range``. Use ``date_range`` if specifying ``freq`` is not + desired. + + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + Note how the two weekend days are skipped in the result. + + >>> pd.bdate_range(start='1/1/2018', end='1/08/2018') + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', + '2018-01-05', '2018-01-08'], + dtype='datetime64[ns]', freq='B') + """ + if freq is None: + msg = "freq must be specified for bdate_range; use date_range instead" + raise TypeError(msg) + + if isinstance(freq, str) and freq.startswith("C"): + try: + weekmask = weekmask or "Mon Tue Wed Thu Fri" + freq = prefix_mapping[freq](holidays=holidays, weekmask=weekmask) + except (KeyError, TypeError) as err: + msg = f"invalid custom frequency string: {freq}" + raise ValueError(msg) from err + elif holidays or weekmask: + msg = ( + "a custom frequency string is required when holidays or " + f"weekmask are passed, got frequency {freq}" + ) + raise ValueError(msg) + + return date_range( + start=start, + end=end, + periods=periods, + freq=freq, + tz=tz, + normalize=normalize, + name=name, + closed=closed, + inclusive=inclusive, + **kwargs, + ) + + +def _time_to_micros(time_obj: time) -> int: + seconds = time_obj.hour * 60 * 60 + 60 * time_obj.minute + time_obj.second + return 1_000_000 * seconds + time_obj.microsecond diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/extension.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/extension.py new file mode 100644 index 0000000..28a0b43 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexes/extension.py @@ -0,0 +1,189 @@ +""" +Shared methods for Index subclasses backed by ExtensionArray. +""" +from __future__ import annotations + +from typing import ( + Callable, + TypeVar, +) + +import numpy as np + +from pandas._typing import ( + ArrayLike, + npt, +) +from pandas.util._decorators import ( + cache_readonly, + doc, +) + +from pandas.core.dtypes.generic import ABCDataFrame + +from pandas.core.arrays import IntervalArray +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +from pandas.core.indexes.base import Index + +_T = TypeVar("_T", bound="NDArrayBackedExtensionIndex") +_ExtensionIndexT = TypeVar("_ExtensionIndexT", bound="ExtensionIndex") + + +def _inherit_from_data( + name: str, delegate: type, cache: bool = False, wrap: bool = False +): + """ + Make an alias for a method of the underlying ExtensionArray. + + Parameters + ---------- + name : str + Name of an attribute the class should inherit from its EA parent. + delegate : class + cache : bool, default False + Whether to convert wrapped properties into cache_readonly + wrap : bool, default False + Whether to wrap the inherited result in an Index. + + Returns + ------- + attribute, method, property, or cache_readonly + """ + attr = getattr(delegate, name) + + if isinstance(attr, property) or type(attr).__name__ == "getset_descriptor": + # getset_descriptor i.e. property defined in cython class + if cache: + + def cached(self): + return getattr(self._data, name) + + cached.__name__ = name + cached.__doc__ = attr.__doc__ + method = cache_readonly(cached) + + else: + + def fget(self): + result = getattr(self._data, name) + if wrap: + if isinstance(result, type(self._data)): + return type(self)._simple_new(result, name=self.name) + elif isinstance(result, ABCDataFrame): + return result.set_index(self) + return Index(result, name=self.name) + return result + + def fset(self, value): + setattr(self._data, name, value) + + fget.__name__ = name + fget.__doc__ = attr.__doc__ + + method = property(fget, fset) + + elif not callable(attr): + # just a normal attribute, no wrapping + method = attr + + else: + # error: Incompatible redefinition (redefinition with type "Callable[[Any, + # VarArg(Any), KwArg(Any)], Any]", original type "property") + def method(self, *args, **kwargs): # type: ignore[misc] + if "inplace" in kwargs: + raise ValueError(f"cannot use inplace with {type(self).__name__}") + result = attr(self._data, *args, **kwargs) + if wrap: + if isinstance(result, type(self._data)): + return type(self)._simple_new(result, name=self.name) + elif isinstance(result, ABCDataFrame): + return result.set_index(self) + return Index(result, name=self.name) + return result + + # error: "property" has no attribute "__name__" + method.__name__ = name # type: ignore[attr-defined] + method.__doc__ = attr.__doc__ + return method + + +def inherit_names( + names: list[str], delegate: type, cache: bool = False, wrap: bool = False +) -> Callable[[type[_ExtensionIndexT]], type[_ExtensionIndexT]]: + """ + Class decorator to pin attributes from an ExtensionArray to a Index subclass. + + Parameters + ---------- + names : List[str] + delegate : class + cache : bool, default False + wrap : bool, default False + Whether to wrap the inherited result in an Index. + """ + + def wrapper(cls: type[_ExtensionIndexT]) -> type[_ExtensionIndexT]: + for name in names: + meth = _inherit_from_data(name, delegate, cache=cache, wrap=wrap) + setattr(cls, name, meth) + + return cls + + return wrapper + + +class ExtensionIndex(Index): + """ + Index subclass for indexes backed by ExtensionArray. + """ + + # The base class already passes through to _data: + # size, __len__, dtype + + _data: IntervalArray | NDArrayBackedExtensionArray + + # --------------------------------------------------------------------- + + def _validate_fill_value(self, value): + """ + Convert value to be insertable to underlying array. + """ + return self._data._validate_setitem_value(value) + + @doc(Index.map) + def map(self, mapper, na_action=None): + # Try to run function on index first, and then on elements of index + # Especially important for group-by functionality + try: + result = mapper(self) + + # Try to use this result if we can + if isinstance(result, np.ndarray): + result = Index(result) + + if not isinstance(result, Index): + raise TypeError("The map function must return an Index object") + return result + except Exception: + return self.astype(object).map(mapper) + + @cache_readonly + def _isnan(self) -> npt.NDArray[np.bool_]: + # error: Incompatible return value type (got "ExtensionArray", expected + # "ndarray") + return self._data.isna() # type: ignore[return-value] + + +class NDArrayBackedExtensionIndex(ExtensionIndex): + """ + Index subclass for indexes backed by NDArrayBackedExtensionArray. + """ + + _data: NDArrayBackedExtensionArray + + def _get_engine_target(self) -> np.ndarray: + return self._data._ndarray + + def _from_join_target(self, result: np.ndarray) -> ArrayLike: + assert result.dtype == self._data._ndarray.dtype + return self._data._from_backing_data(result) diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/frozen.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/frozen.py new file mode 100644 index 0000000..ed5cf04 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexes/frozen.py @@ -0,0 +1,110 @@ +""" +frozen (immutable) data structures to support MultiIndexing + +These are used for: + +- .names (FrozenList) + +""" +from __future__ import annotations + +from typing import Any + +from pandas.core.base import PandasObject + +from pandas.io.formats.printing import pprint_thing + + +class FrozenList(PandasObject, list): + """ + Container that doesn't allow setting item *but* + because it's technically non-hashable, will be used + for lookups, appropriately, etc. + """ + + # Side note: This has to be of type list. Otherwise, + # it messes up PyTables type checks. + + def union(self, other) -> FrozenList: + """ + Returns a FrozenList with other concatenated to the end of self. + + Parameters + ---------- + other : array-like + The array-like whose elements we are concatenating. + + Returns + ------- + FrozenList + The collection difference between self and other. + """ + if isinstance(other, tuple): + other = list(other) + return type(self)(super().__add__(other)) + + def difference(self, other) -> FrozenList: + """ + Returns a FrozenList with elements from other removed from self. + + Parameters + ---------- + other : array-like + The array-like whose elements we are removing self. + + Returns + ------- + FrozenList + The collection difference between self and other. + """ + other = set(other) + temp = [x for x in self if x not in other] + return type(self)(temp) + + # TODO: Consider deprecating these in favor of `union` (xref gh-15506) + __add__ = __iadd__ = union + + def __getitem__(self, n): + if isinstance(n, slice): + return type(self)(super().__getitem__(n)) + return super().__getitem__(n) + + def __radd__(self, other): + if isinstance(other, tuple): + other = list(other) + return type(self)(other + list(self)) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, (tuple, FrozenList)): + other = list(other) + return super().__eq__(other) + + __req__ = __eq__ + + def __mul__(self, other): + return type(self)(super().__mul__(other)) + + __imul__ = __mul__ + + def __reduce__(self): + return type(self), (list(self),) + + def __hash__(self): + return hash(tuple(self)) + + def _disabled(self, *args, **kwargs): + """ + This method will not function because object is immutable. + """ + raise TypeError(f"'{type(self).__name__}' does not support mutable operations.") + + def __str__(self) -> str: + return pprint_thing(self, quote_strings=True, escape_chars=("\t", "\r", "\n")) + + def __repr__(self) -> str: + return f"{type(self).__name__}({str(self)})" + + __setitem__ = __setslice__ = _disabled # type: ignore[assignment] + __delitem__ = __delslice__ = _disabled + pop = append = extend = _disabled + remove = sort = insert = _disabled # type: ignore[assignment] diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/interval.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/interval.py new file mode 100644 index 0000000..a378fd9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexes/interval.py @@ -0,0 +1,1116 @@ +""" define the IntervalIndex """ +from __future__ import annotations + +from operator import ( + le, + lt, +) +import textwrap +from typing import ( + Any, + Hashable, +) + +import numpy as np + +from pandas._libs import lib +from pandas._libs.interval import ( + Interval, + IntervalMixin, + IntervalTree, +) +from pandas._libs.tslibs import ( + BaseOffset, + Timedelta, + Timestamp, + to_offset, +) +from pandas._typing import ( + Dtype, + DtypeObj, + npt, +) +from pandas.errors import InvalidIndexError +from pandas.util._decorators import ( + Appender, + cache_readonly, +) +from pandas.util._exceptions import rewrite_exception + +from pandas.core.dtypes.cast import ( + find_common_type, + infer_dtype_from_scalar, + maybe_box_datetimelike, + maybe_downcast_numeric, +) +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_datetime64tz_dtype, + is_datetime_or_timedelta_dtype, + is_dtype_equal, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_interval_dtype, + is_list_like, + is_number, + is_object_dtype, + is_scalar, +) +from pandas.core.dtypes.dtypes import IntervalDtype +from pandas.core.dtypes.missing import is_valid_na_for_dtype + +from pandas.core.algorithms import unique +from pandas.core.arrays.interval import ( + IntervalArray, + _interval_shared_docs, +) +import pandas.core.common as com +from pandas.core.indexers import is_valid_positional_slice +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import ( + Index, + _index_shared_docs, + ensure_index, + maybe_extract_name, +) +from pandas.core.indexes.datetimes import ( + DatetimeIndex, + date_range, +) +from pandas.core.indexes.extension import ( + ExtensionIndex, + inherit_names, +) +from pandas.core.indexes.multi import MultiIndex +from pandas.core.indexes.timedeltas import ( + TimedeltaIndex, + timedelta_range, +) + +_index_doc_kwargs = dict(ibase._index_doc_kwargs) + +_index_doc_kwargs.update( + { + "klass": "IntervalIndex", + "qualname": "IntervalIndex", + "target_klass": "IntervalIndex or list of Intervals", + "name": textwrap.dedent( + """\ + name : object, optional + Name to be stored in the index. + """ + ), + } +) + + +def _get_next_label(label): + dtype = getattr(label, "dtype", type(label)) + if isinstance(label, (Timestamp, Timedelta)): + dtype = "datetime64" + if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype): + return label + np.timedelta64(1, "ns") + elif is_integer_dtype(dtype): + return label + 1 + elif is_float_dtype(dtype): + return np.nextafter(label, np.infty) + else: + raise TypeError(f"cannot determine next label for type {repr(type(label))}") + + +def _get_prev_label(label): + dtype = getattr(label, "dtype", type(label)) + if isinstance(label, (Timestamp, Timedelta)): + dtype = "datetime64" + if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype): + return label - np.timedelta64(1, "ns") + elif is_integer_dtype(dtype): + return label - 1 + elif is_float_dtype(dtype): + return np.nextafter(label, -np.infty) + else: + raise TypeError(f"cannot determine next label for type {repr(type(label))}") + + +def _new_IntervalIndex(cls, d): + """ + This is called upon unpickling, rather than the default which doesn't have + arguments and breaks __new__. + """ + return cls.from_arrays(**d) + + +@Appender( + _interval_shared_docs["class"] + % { + "klass": "IntervalIndex", + "summary": "Immutable index of intervals that are closed on the same side.", + "name": _index_doc_kwargs["name"], + "versionadded": "0.20.0", + "extra_attributes": "is_overlapping\nvalues\n", + "extra_methods": "", + "examples": textwrap.dedent( + """\ + Examples + -------- + A new ``IntervalIndex`` is typically constructed using + :func:`interval_range`: + + >>> pd.interval_range(start=0, end=5) + IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], + dtype='interval[int64, right]') + + It may also be constructed using one of the constructor + methods: :meth:`IntervalIndex.from_arrays`, + :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`. + + See further examples in the doc strings of ``interval_range`` and the + mentioned constructor methods. + """ + ), + } +) +@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True) +@inherit_names( + [ + "__array__", + "overlaps", + "contains", + "closed_left", + "closed_right", + "open_left", + "open_right", + "is_empty", + ], + IntervalArray, +) +@inherit_names(["is_non_overlapping_monotonic", "closed"], IntervalArray, cache=True) +class IntervalIndex(ExtensionIndex): + _typ = "intervalindex" + + # annotate properties pinned via inherit_names + closed: str + is_non_overlapping_monotonic: bool + closed_left: bool + closed_right: bool + + _data: IntervalArray + _values: IntervalArray + _can_hold_strings = False + _data_cls = IntervalArray + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + data, + closed=None, + dtype: Dtype | None = None, + copy: bool = False, + name: Hashable = None, + verify_integrity: bool = True, + ) -> IntervalIndex: + + name = maybe_extract_name(name, data, cls) + + with rewrite_exception("IntervalArray", cls.__name__): + array = IntervalArray( + data, + closed=closed, + copy=copy, + dtype=dtype, + verify_integrity=verify_integrity, + ) + + return cls._simple_new(array, name) + + @classmethod + @Appender( + _interval_shared_docs["from_breaks"] + % { + "klass": "IntervalIndex", + "examples": textwrap.dedent( + """\ + Examples + -------- + >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3]) + IntervalIndex([(0, 1], (1, 2], (2, 3]], + dtype='interval[int64, right]') + """ + ), + } + ) + def from_breaks( + cls, + breaks, + closed: str = "right", + name: Hashable = None, + copy: bool = False, + dtype: Dtype | None = None, + ) -> IntervalIndex: + with rewrite_exception("IntervalArray", cls.__name__): + array = IntervalArray.from_breaks( + breaks, closed=closed, copy=copy, dtype=dtype + ) + return cls._simple_new(array, name=name) + + @classmethod + @Appender( + _interval_shared_docs["from_arrays"] + % { + "klass": "IntervalIndex", + "examples": textwrap.dedent( + """\ + Examples + -------- + >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) + IntervalIndex([(0, 1], (1, 2], (2, 3]], + dtype='interval[int64, right]') + """ + ), + } + ) + def from_arrays( + cls, + left, + right, + closed: str = "right", + name: Hashable = None, + copy: bool = False, + dtype: Dtype | None = None, + ) -> IntervalIndex: + with rewrite_exception("IntervalArray", cls.__name__): + array = IntervalArray.from_arrays( + left, right, closed, copy=copy, dtype=dtype + ) + return cls._simple_new(array, name=name) + + @classmethod + @Appender( + _interval_shared_docs["from_tuples"] + % { + "klass": "IntervalIndex", + "examples": textwrap.dedent( + """\ + Examples + -------- + >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)]) + IntervalIndex([(0, 1], (1, 2]], + dtype='interval[int64, right]') + """ + ), + } + ) + def from_tuples( + cls, + data, + closed: str = "right", + name: Hashable = None, + copy: bool = False, + dtype: Dtype | None = None, + ) -> IntervalIndex: + with rewrite_exception("IntervalArray", cls.__name__): + arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype) + return cls._simple_new(arr, name=name) + + # -------------------------------------------------------------------- + + @cache_readonly + def _engine(self) -> IntervalTree: + left = self._maybe_convert_i8(self.left) + right = self._maybe_convert_i8(self.right) + return IntervalTree(left, right, closed=self.closed) + + def __contains__(self, key: Any) -> bool: + """ + return a boolean if this key is IN the index + We *only* accept an Interval + + Parameters + ---------- + key : Interval + + Returns + ------- + bool + """ + hash(key) + if not isinstance(key, Interval): + if is_valid_na_for_dtype(key, self.dtype): + return self.hasnans + return False + + try: + self.get_loc(key) + return True + except KeyError: + return False + + @cache_readonly + def _multiindex(self) -> MultiIndex: + return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"]) + + def __reduce__(self): + d = { + "left": self.left, + "right": self.right, + "closed": self.closed, + "name": self.name, + } + return _new_IntervalIndex, (type(self), d), None + + @property + def inferred_type(self) -> str: + """Return a string of the type inferred from the values""" + return "interval" + + @Appender(Index.memory_usage.__doc__) + def memory_usage(self, deep: bool = False) -> int: + # we don't use an explicit engine + # so return the bytes here + return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep) + + # IntervalTree doesn't have a is_monotonic_decreasing, so have to override + # the Index implementation + @cache_readonly + def is_monotonic_decreasing(self) -> bool: + """ + Return True if the IntervalIndex is monotonic decreasing (only equal or + decreasing values), else False + """ + return self[::-1].is_monotonic_increasing + + @cache_readonly + def is_unique(self) -> bool: + """ + Return True if the IntervalIndex contains unique elements, else False. + """ + left = self.left + right = self.right + + if self.isna().sum() > 1: + return False + + if left.is_unique or right.is_unique: + return True + + seen_pairs = set() + check_idx = np.where(left.duplicated(keep=False))[0] + for idx in check_idx: + pair = (left[idx], right[idx]) + if pair in seen_pairs: + return False + seen_pairs.add(pair) + + return True + + @property + def is_overlapping(self) -> bool: + """ + Return True if the IntervalIndex has overlapping intervals, else False. + + Two intervals overlap if they share a common point, including closed + endpoints. Intervals that only have an open endpoint in common do not + overlap. + + Returns + ------- + bool + Boolean indicating if the IntervalIndex has overlapping intervals. + + See Also + -------- + Interval.overlaps : Check whether two Interval objects overlap. + IntervalIndex.overlaps : Check an IntervalIndex elementwise for + overlaps. + + Examples + -------- + >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)]) + >>> index + IntervalIndex([(0, 2], (1, 3], (4, 5]], + dtype='interval[int64, right]') + >>> index.is_overlapping + True + + Intervals that share closed endpoints overlap: + + >>> index = pd.interval_range(0, 3, closed='both') + >>> index + IntervalIndex([[0, 1], [1, 2], [2, 3]], + dtype='interval[int64, both]') + >>> index.is_overlapping + True + + Intervals that only have an open endpoint in common do not overlap: + + >>> index = pd.interval_range(0, 3, closed='left') + >>> index + IntervalIndex([[0, 1), [1, 2), [2, 3)], + dtype='interval[int64, left]') + >>> index.is_overlapping + False + """ + # GH 23309 + return self._engine.is_overlapping + + def _needs_i8_conversion(self, key) -> bool: + """ + Check if a given key needs i8 conversion. Conversion is necessary for + Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An + Interval-like requires conversion if its endpoints are one of the + aforementioned types. + + Assumes that any list-like data has already been cast to an Index. + + Parameters + ---------- + key : scalar or Index-like + The key that should be checked for i8 conversion + + Returns + ------- + bool + """ + if is_interval_dtype(key) or isinstance(key, Interval): + return self._needs_i8_conversion(key.left) + + i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex) + return isinstance(key, i8_types) + + def _maybe_convert_i8(self, key): + """ + Maybe convert a given key to its equivalent i8 value(s). Used as a + preprocessing step prior to IntervalTree queries (self._engine), which + expects numeric data. + + Parameters + ---------- + key : scalar or list-like + The key that should maybe be converted to i8. + + Returns + ------- + scalar or list-like + The original key if no conversion occurred, int if converted scalar, + Int64Index if converted list-like. + """ + original = key + if is_list_like(key): + key = ensure_index(key) + + if not self._needs_i8_conversion(key): + return original + + scalar = is_scalar(key) + if is_interval_dtype(key) or isinstance(key, Interval): + # convert left/right and reconstruct + left = self._maybe_convert_i8(key.left) + right = self._maybe_convert_i8(key.right) + constructor = Interval if scalar else IntervalIndex.from_arrays + return constructor(left, right, closed=self.closed) + + if scalar: + # Timestamp/Timedelta + key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True) + if lib.is_period(key): + key_i8 = key.ordinal + elif isinstance(key_i8, Timestamp): + key_i8 = key_i8.value + elif isinstance(key_i8, (np.datetime64, np.timedelta64)): + key_i8 = key_i8.view("i8") + else: + # DatetimeIndex/TimedeltaIndex + key_dtype, key_i8 = key.dtype, Index(key.asi8) + if key.hasnans: + # convert NaT from its i8 value to np.nan so it's not viewed + # as a valid value, maybe causing errors (e.g. is_overlapping) + key_i8 = key_i8.where(~key._isnan) + + # ensure consistency with IntervalIndex subtype + # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any], + # ExtensionDtype]" has no attribute "subtype" + subtype = self.dtype.subtype # type: ignore[union-attr] + + if not is_dtype_equal(subtype, key_dtype): + raise ValueError( + f"Cannot index an IntervalIndex of subtype {subtype} with " + f"values of dtype {key_dtype}" + ) + + return key_i8 + + def _searchsorted_monotonic(self, label, side: str = "left"): + if not self.is_non_overlapping_monotonic: + raise KeyError( + "can only get slices from an IntervalIndex if bounds are " + "non-overlapping and all monotonic increasing or decreasing" + ) + + if isinstance(label, (IntervalMixin, IntervalIndex)): + raise NotImplementedError("Interval objects are not currently supported") + + # GH 20921: "not is_monotonic_increasing" for the second condition + # instead of "is_monotonic_decreasing" to account for single element + # indexes being both increasing and decreasing + if (side == "left" and self.left.is_monotonic_increasing) or ( + side == "right" and not self.left.is_monotonic_increasing + ): + sub_idx = self.right + if self.open_right: + label = _get_next_label(label) + else: + sub_idx = self.left + if self.open_left: + label = _get_prev_label(label) + + return sub_idx._searchsorted_monotonic(label, side) + + # -------------------------------------------------------------------- + # Indexing Methods + + def get_loc( + self, key, method: str | None = None, tolerance=None + ) -> int | slice | np.ndarray: + """ + Get integer location, slice or boolean mask for requested label. + + Parameters + ---------- + key : label + method : {None}, optional + * default: matches where the label is within an interval only. + + Returns + ------- + int if unique index, slice if monotonic index, else mask + + Examples + -------- + >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2) + >>> index = pd.IntervalIndex([i1, i2]) + >>> index.get_loc(1) + 0 + + You can also supply a point inside an interval. + + >>> index.get_loc(1.5) + 1 + + If a label is in several intervals, you get the locations of all the + relevant intervals. + + >>> i3 = pd.Interval(0, 2) + >>> overlapping_index = pd.IntervalIndex([i1, i2, i3]) + >>> overlapping_index.get_loc(0.5) + array([ True, False, True]) + + Only exact matches will be returned if an interval is provided. + + >>> index.get_loc(pd.Interval(0, 1)) + 0 + """ + self._check_indexing_method(method) + self._check_indexing_error(key) + + if isinstance(key, Interval): + if self.closed != key.closed: + raise KeyError(key) + mask = (self.left == key.left) & (self.right == key.right) + elif is_valid_na_for_dtype(key, self.dtype): + mask = self.isna() + else: + # assume scalar + op_left = le if self.closed_left else lt + op_right = le if self.closed_right else lt + try: + mask = op_left(self.left, key) & op_right(key, self.right) + except TypeError as err: + # scalar is not comparable to II subtype --> invalid label + raise KeyError(key) from err + + matches = mask.sum() + if matches == 0: + raise KeyError(key) + elif matches == 1: + return mask.argmax() + + res = lib.maybe_booleans_to_slice(mask.view("u1")) + if isinstance(res, slice) and res.stop is None: + # TODO: DO this in maybe_booleans_to_slice? + res = slice(res.start, len(self), res.step) + return res + + def _get_indexer( + self, + target: Index, + method: str | None = None, + limit: int | None = None, + tolerance: Any | None = None, + ) -> npt.NDArray[np.intp]: + + if isinstance(target, IntervalIndex): + # We only get here with not self.is_overlapping + # -> at most one match per interval in target + # want exact matches -> need both left/right to match, so defer to + # left/right get_indexer, compare elementwise, equality -> match + indexer = self._get_indexer_unique_sides(target) + + elif not is_object_dtype(target.dtype): + # homogeneous scalar index: use IntervalTree + # we should always have self._should_partial_index(target) here + target = self._maybe_convert_i8(target) + indexer = self._engine.get_indexer(target.values) + else: + # heterogeneous scalar index: defer elementwise to get_loc + # we should always have self._should_partial_index(target) here + return self._get_indexer_pointwise(target)[0] + + return ensure_platform_int(indexer) + + @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) + def get_indexer_non_unique( + self, target: Index + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + target = ensure_index(target) + + if not self._should_compare(target) and not self._should_partial_index(target): + # e.g. IntervalIndex with different closed or incompatible subtype + # -> no matches + return self._get_indexer_non_comparable(target, None, unique=False) + + elif isinstance(target, IntervalIndex): + if self.left.is_unique and self.right.is_unique: + # fastpath available even if we don't have self._index_as_unique + indexer = self._get_indexer_unique_sides(target) + missing = (indexer == -1).nonzero()[0] + else: + return self._get_indexer_pointwise(target) + + elif is_object_dtype(target.dtype) or not self._should_partial_index(target): + # target might contain intervals: defer elementwise to get_loc + return self._get_indexer_pointwise(target) + + else: + # Note: this case behaves differently from other Index subclasses + # because IntervalIndex does partial-int indexing + target = self._maybe_convert_i8(target) + indexer, missing = self._engine.get_indexer_non_unique(target.values) + + return ensure_platform_int(indexer), ensure_platform_int(missing) + + def _get_indexer_unique_sides(self, target: IntervalIndex) -> npt.NDArray[np.intp]: + """ + _get_indexer specialized to the case where both of our sides are unique. + """ + # Caller is responsible for checking + # `self.left.is_unique and self.right.is_unique` + + left_indexer = self.left.get_indexer(target.left) + right_indexer = self.right.get_indexer(target.right) + indexer = np.where(left_indexer == right_indexer, left_indexer, -1) + return indexer + + def _get_indexer_pointwise( + self, target: Index + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + """ + pointwise implementation for get_indexer and get_indexer_non_unique. + """ + indexer, missing = [], [] + for i, key in enumerate(target): + try: + locs = self.get_loc(key) + if isinstance(locs, slice): + # Only needed for get_indexer_non_unique + locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp") + elif lib.is_integer(locs): + locs = np.array(locs, ndmin=1) + else: + # otherwise we have ndarray[bool] + locs = np.where(locs)[0] + except KeyError: + missing.append(i) + locs = np.array([-1]) + except InvalidIndexError: + # i.e. non-scalar key e.g. a tuple. + # see test_append_different_columns_types_raises + missing.append(i) + locs = np.array([-1]) + + indexer.append(locs) + + indexer = np.concatenate(indexer) + return ensure_platform_int(indexer), ensure_platform_int(missing) + + @cache_readonly + def _index_as_unique(self) -> bool: + return not self.is_overlapping and self._engine._na_count < 2 + + _requires_unique_msg = ( + "cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique" + ) + + def _convert_slice_indexer(self, key: slice, kind: str): + if not (key.step is None or key.step == 1): + # GH#31658 if label-based, we require step == 1, + # if positional, we disallow float start/stop + msg = "label-based slicing with step!=1 is not supported for IntervalIndex" + if kind == "loc": + raise ValueError(msg) + elif kind == "getitem": + if not is_valid_positional_slice(key): + # i.e. this cannot be interpreted as a positional slice + raise ValueError(msg) + + return super()._convert_slice_indexer(key, kind) + + @cache_readonly + def _should_fallback_to_positional(self) -> bool: + # integer lookups in Series.__getitem__ are unambiguously + # positional in this case + # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any], + # ExtensionDtype]" has no attribute "subtype" + return self.dtype.subtype.kind in ["m", "M"] # type: ignore[union-attr] + + def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): + self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound") + return getattr(self, side)._maybe_cast_slice_bound(label, side) + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + if not isinstance(dtype, IntervalDtype): + return False + common_subtype = find_common_type([self.dtype, dtype]) + return not is_object_dtype(common_subtype) + + # -------------------------------------------------------------------- + + @cache_readonly + def left(self) -> Index: + return Index(self._data.left, copy=False) + + @cache_readonly + def right(self) -> Index: + return Index(self._data.right, copy=False) + + @cache_readonly + def mid(self) -> Index: + return Index(self._data.mid, copy=False) + + @property + def length(self) -> Index: + return Index(self._data.length, copy=False) + + # -------------------------------------------------------------------- + # Rendering Methods + # __repr__ associated methods are based on MultiIndex + + def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: + # matches base class except for whitespace padding + return header + list(self._format_native_types(na_rep=na_rep)) + + def _format_native_types(self, *, na_rep="NaN", quoting=None, **kwargs): + # GH 28210: use base method but with different default na_rep + return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs) + + def _format_data(self, name=None) -> str: + # TODO: integrate with categorical and make generic + # name argument is unused here; just for compat with base / categorical + return self._data._format_data() + "," + self._format_space() + + # -------------------------------------------------------------------- + # Set Operations + + def _intersection(self, other, sort): + """ + intersection specialized to the case with matching dtypes. + """ + # For IntervalIndex we also know other.closed == self.closed + if self.left.is_unique and self.right.is_unique: + taken = self._intersection_unique(other) + elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1: + # Swap other/self if other is unique and self does not have + # multiple NaNs + taken = other._intersection_unique(self) + else: + # duplicates + taken = self._intersection_non_unique(other) + + if sort is None: + taken = taken.sort_values() + + return taken + + def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex: + """ + Used when the IntervalIndex does not have any common endpoint, + no matter left or right. + Return the intersection with another IntervalIndex. + Parameters + ---------- + other : IntervalIndex + Returns + ------- + IntervalIndex + """ + # Note: this is much more performant than super()._intersection(other) + lindexer = self.left.get_indexer(other.left) + rindexer = self.right.get_indexer(other.right) + + match = (lindexer == rindexer) & (lindexer != -1) + indexer = lindexer.take(match.nonzero()[0]) + indexer = unique(indexer) + + return self.take(indexer) + + def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex: + """ + Used when the IntervalIndex does have some common endpoints, + on either sides. + Return the intersection with another IntervalIndex. + + Parameters + ---------- + other : IntervalIndex + + Returns + ------- + IntervalIndex + """ + # Note: this is about 3.25x faster than super()._intersection(other) + # in IntervalIndexMethod.time_intersection_both_duplicate(1000) + mask = np.zeros(len(self), dtype=bool) + + if self.hasnans and other.hasnans: + first_nan_loc = np.arange(len(self))[self.isna()][0] + mask[first_nan_loc] = True + + other_tups = set(zip(other.left, other.right)) + for i, tup in enumerate(zip(self.left, self.right)): + if tup in other_tups: + mask[i] = True + + return self[mask] + + # -------------------------------------------------------------------- + + @property + def _is_all_dates(self) -> bool: + """ + This is False even when left/right contain datetime-like objects, + as the check is done on the Interval itself + """ + return False + + def _get_engine_target(self) -> np.ndarray: + # Note: we _could_ use libjoin functions by either casting to object + # dtype or constructing tuples (faster than constructing Intervals) + # but the libjoin fastpaths are no longer fast in these cases. + raise NotImplementedError( + "IntervalIndex does not use libjoin fastpaths or pass values to " + "IndexEngine objects" + ) + + def _from_join_target(self, result): + raise NotImplementedError("IntervalIndex does not use libjoin fastpaths") + + # TODO: arithmetic operations + + +def _is_valid_endpoint(endpoint) -> bool: + """ + Helper for interval_range to check if start/end are valid types. + """ + return any( + [ + is_number(endpoint), + isinstance(endpoint, Timestamp), + isinstance(endpoint, Timedelta), + endpoint is None, + ] + ) + + +def _is_type_compatible(a, b) -> bool: + """ + Helper for interval_range to check type compat of start/end/freq. + """ + is_ts_compat = lambda x: isinstance(x, (Timestamp, BaseOffset)) + is_td_compat = lambda x: isinstance(x, (Timedelta, BaseOffset)) + return ( + (is_number(a) and is_number(b)) + or (is_ts_compat(a) and is_ts_compat(b)) + or (is_td_compat(a) and is_td_compat(b)) + or com.any_none(a, b) + ) + + +def interval_range( + start=None, end=None, periods=None, freq=None, name: Hashable = None, closed="right" +) -> IntervalIndex: + """ + Return a fixed frequency IntervalIndex. + + Parameters + ---------- + start : numeric or datetime-like, default None + Left bound for generating intervals. + end : numeric or datetime-like, default None + Right bound for generating intervals. + periods : int, default None + Number of periods to generate. + freq : numeric, str, or DateOffset, default None + The length of each interval. Must be consistent with the type of start + and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 + for numeric and 'D' for datetime-like. + name : str, default None + Name of the resulting IntervalIndex. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. + + Returns + ------- + IntervalIndex + + See Also + -------- + IntervalIndex : An Index of intervals that are all closed on the same side. + + Notes + ----- + Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. If ``freq`` is omitted, the resulting + ``IntervalIndex`` will have ``periods`` linearly spaced elements between + ``start`` and ``end``, inclusively. + + To learn more about datetime-like frequency strings, please see `this link + `__. + + Examples + -------- + Numeric ``start`` and ``end`` is supported. + + >>> pd.interval_range(start=0, end=5) + IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], + dtype='interval[int64, right]') + + Additionally, datetime-like input is also supported. + + >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), + ... end=pd.Timestamp('2017-01-04')) + IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], + (2017-01-03, 2017-01-04]], + dtype='interval[datetime64[ns], right]') + + The ``freq`` parameter specifies the frequency between the left and right. + endpoints of the individual intervals within the ``IntervalIndex``. For + numeric ``start`` and ``end``, the frequency must also be numeric. + + >>> pd.interval_range(start=0, periods=4, freq=1.5) + IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], + dtype='interval[float64, right]') + + Similarly, for datetime-like ``start`` and ``end``, the frequency must be + convertible to a DateOffset. + + >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), + ... periods=3, freq='MS') + IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], + (2017-03-01, 2017-04-01]], + dtype='interval[datetime64[ns], right]') + + Specify ``start``, ``end``, and ``periods``; the frequency is generated + automatically (linearly spaced). + + >>> pd.interval_range(start=0, end=6, periods=4) + IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], + dtype='interval[float64, right]') + + The ``closed`` parameter specifies which endpoints of the individual + intervals within the ``IntervalIndex`` are closed. + + >>> pd.interval_range(end=5, periods=4, closed='both') + IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], + dtype='interval[int64, both]') + """ + start = maybe_box_datetimelike(start) + end = maybe_box_datetimelike(end) + endpoint = start if start is not None else end + + if freq is None and com.any_none(periods, start, end): + freq = 1 if is_number(endpoint) else "D" + + if com.count_not_none(start, end, periods, freq) != 3: + raise ValueError( + "Of the four parameters: start, end, periods, and " + "freq, exactly three must be specified" + ) + + if not _is_valid_endpoint(start): + raise ValueError(f"start must be numeric or datetime-like, got {start}") + elif not _is_valid_endpoint(end): + raise ValueError(f"end must be numeric or datetime-like, got {end}") + + if is_float(periods): + periods = int(periods) + elif not is_integer(periods) and periods is not None: + raise TypeError(f"periods must be a number, got {periods}") + + if freq is not None and not is_number(freq): + try: + freq = to_offset(freq) + except ValueError as err: + raise ValueError( + f"freq must be numeric or convertible to DateOffset, got {freq}" + ) from err + + # verify type compatibility + if not all( + [ + _is_type_compatible(start, end), + _is_type_compatible(start, freq), + _is_type_compatible(end, freq), + ] + ): + raise TypeError("start, end, freq need to be type compatible") + + # +1 to convert interval count to breaks count (n breaks = n-1 intervals) + if periods is not None: + periods += 1 + + breaks: np.ndarray | TimedeltaIndex | DatetimeIndex + + if is_number(endpoint): + # force consistency between start/end/freq (lower end if freq skips it) + if com.all_not_none(start, end, freq): + end -= (end - start) % freq + + # compute the period/start/end if unspecified (at most one) + if periods is None: + periods = int((end - start) // freq) + 1 + elif start is None: + start = end - (periods - 1) * freq + elif end is None: + end = start + (periods - 1) * freq + + breaks = np.linspace(start, end, periods) + if all(is_integer(x) for x in com.not_none(start, end, freq)): + # np.linspace always produces float output + + # error: Incompatible types in assignment (expression has type + # "Union[ExtensionArray, ndarray]", variable has type "ndarray") + breaks = maybe_downcast_numeric( # type: ignore[assignment] + breaks, np.dtype("int64") + ) + else: + # delegate to the appropriate range function + if isinstance(endpoint, Timestamp): + breaks = date_range(start=start, end=end, periods=periods, freq=freq) + else: + breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) + + return IntervalIndex.from_breaks(breaks, name=name, closed=closed) diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/multi.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/multi.py new file mode 100644 index 0000000..dcb8437 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexes/multi.py @@ -0,0 +1,3962 @@ +from __future__ import annotations + +from functools import wraps +from sys import getsizeof +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Collection, + Hashable, + Iterable, + List, + Sequence, + Tuple, + cast, +) +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import ( + algos as libalgos, + index as libindex, + lib, +) +from pandas._libs.hashtable import duplicated +from pandas._typing import ( + AnyArrayLike, + DtypeObj, + F, + Scalar, + Shape, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import ( + InvalidIndexError, + PerformanceWarning, + UnsortedIndexError, +) +from pandas.util._decorators import ( + Appender, + cache_readonly, + deprecate_nonkeyword_arguments, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import coerce_indexer_dtype +from pandas.core.dtypes.common import ( + ensure_int64, + ensure_platform_int, + is_categorical_dtype, + is_hashable, + is_integer, + is_iterator, + is_list_like, + is_object_dtype, + is_scalar, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCDatetimeIndex, + ABCTimedeltaIndex, +) +from pandas.core.dtypes.missing import ( + array_equivalent, + isna, +) + +import pandas.core.algorithms as algos +from pandas.core.arrays import Categorical +from pandas.core.arrays.categorical import factorize_from_iterables +import pandas.core.common as com +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import ( + Index, + _index_shared_docs, + ensure_index, + get_unanimous_names, +) +from pandas.core.indexes.frozen import FrozenList +from pandas.core.indexes.numeric import Int64Index +from pandas.core.ops.invalid import make_invalid_op +from pandas.core.sorting import ( + get_group_index, + indexer_from_factorized, + lexsort_indexer, +) + +from pandas.io.formats.printing import pprint_thing + +if TYPE_CHECKING: + from pandas import ( + CategoricalIndex, + DataFrame, + Series, + ) + +_index_doc_kwargs = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update( + {"klass": "MultiIndex", "target_klass": "MultiIndex or list of tuples"} +) + + +class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine): + """ + This class manages a MultiIndex by mapping label combinations to positive + integers. + """ + + _base = libindex.UInt64Engine + + def _codes_to_ints(self, codes): + """ + Transform combination(s) of uint64 in one uint64 (each), in a strictly + monotonic way (i.e. respecting the lexicographic order of integer + combinations): see BaseMultiIndexCodesEngine documentation. + + Parameters + ---------- + codes : 1- or 2-dimensional array of dtype uint64 + Combinations of integers (one per row) + + Returns + ------- + scalar or 1-dimensional array, of dtype uint64 + Integer(s) representing one combination (each). + """ + # Shift the representation of each level by the pre-calculated number + # of bits: + codes <<= self.offsets + + # Now sum and OR are in fact interchangeable. This is a simple + # composition of the (disjunct) significant bits of each level (i.e. + # each column in "codes") in a single positive integer: + if codes.ndim == 1: + # Single key + return np.bitwise_or.reduce(codes) + + # Multiple keys + return np.bitwise_or.reduce(codes, axis=1) + + +class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine): + """ + This class manages those (extreme) cases in which the number of possible + label combinations overflows the 64 bits integers, and uses an ObjectEngine + containing Python integers. + """ + + _base = libindex.ObjectEngine + + def _codes_to_ints(self, codes): + """ + Transform combination(s) of uint64 in one Python integer (each), in a + strictly monotonic way (i.e. respecting the lexicographic order of + integer combinations): see BaseMultiIndexCodesEngine documentation. + + Parameters + ---------- + codes : 1- or 2-dimensional array of dtype uint64 + Combinations of integers (one per row) + + Returns + ------- + int, or 1-dimensional array of dtype object + Integer(s) representing one combination (each). + """ + # Shift the representation of each level by the pre-calculated number + # of bits. Since this can overflow uint64, first make sure we are + # working with Python integers: + codes = codes.astype("object") << self.offsets + + # Now sum and OR are in fact interchangeable. This is a simple + # composition of the (disjunct) significant bits of each level (i.e. + # each column in "codes") in a single positive integer (per row): + if codes.ndim == 1: + # Single key + return np.bitwise_or.reduce(codes) + + # Multiple keys + return np.bitwise_or.reduce(codes, axis=1) + + +def names_compat(meth: F) -> F: + """ + A decorator to allow either `name` or `names` keyword but not both. + + This makes it easier to share code with base class. + """ + + @wraps(meth) + def new_meth(self_or_cls, *args, **kwargs): + if "name" in kwargs and "names" in kwargs: + raise TypeError("Can only provide one of `names` and `name`") + elif "name" in kwargs: + kwargs["names"] = kwargs.pop("name") + + return meth(self_or_cls, *args, **kwargs) + + return cast(F, new_meth) + + +class MultiIndex(Index): + """ + A multi-level, or hierarchical, index object for pandas objects. + + Parameters + ---------- + levels : sequence of arrays + The unique labels for each level. + codes : sequence of arrays + Integers for each level designating which label at each location. + sortorder : optional int + Level of sortedness (must be lexicographically sorted by that + level). + names : optional sequence of objects + Names for each of the index levels. (name is accepted for compat). + copy : bool, default False + Copy the meta-data. + verify_integrity : bool, default True + Check that the levels/codes are consistent and valid. + + Attributes + ---------- + names + levels + codes + nlevels + levshape + + Methods + ------- + from_arrays + from_tuples + from_product + from_frame + set_levels + set_codes + to_frame + to_flat_index + sortlevel + droplevel + swaplevel + reorder_levels + remove_unused_levels + get_locs + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex. + MultiIndex.from_product : Create a MultiIndex from the cartesian product + of iterables. + MultiIndex.from_tuples : Convert list of tuples to a MultiIndex. + MultiIndex.from_frame : Make a MultiIndex from a DataFrame. + Index : The base pandas Index type. + + Notes + ----- + See the `user guide + `__ + for more. + + Examples + -------- + A new ``MultiIndex`` is typically constructed using one of the helper + methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product` + and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``): + + >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] + >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], + names=['number', 'color']) + + See further examples for how to construct a MultiIndex in the doc strings + of the mentioned helper methods. + """ + + _hidden_attrs = Index._hidden_attrs | frozenset() + + # initialize to zero-length tuples to make everything work + _typ = "multiindex" + _names = FrozenList() + _levels = FrozenList() + _codes = FrozenList() + _comparables = ["names"] + + sortorder: int | None + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + levels=None, + codes=None, + sortorder=None, + names=None, + dtype=None, + copy=False, + name=None, + verify_integrity: bool = True, + ): + + # compat with Index + if name is not None: + names = name + if levels is None or codes is None: + raise TypeError("Must pass both levels and codes") + if len(levels) != len(codes): + raise ValueError("Length of levels and codes must be the same.") + if len(levels) == 0: + raise ValueError("Must pass non-zero number of levels/codes") + + result = object.__new__(cls) + result._cache = {} + + # we've already validated levels and codes, so shortcut here + result._set_levels(levels, copy=copy, validate=False) + result._set_codes(codes, copy=copy, validate=False) + + # Incompatible types in assignment (expression has type "List[None]", + # variable has type "FrozenList") [assignment] + result._names = [None] * len(levels) # type: ignore[assignment] + if names is not None: + # handles name validation + result._set_names(names) + + if sortorder is not None: + result.sortorder = int(sortorder) + else: + result.sortorder = sortorder + + if verify_integrity: + new_codes = result._verify_integrity() + result._codes = new_codes + + result._reset_identity() + + return result + + def _validate_codes(self, level: list, code: list): + """ + Reassign code values as -1 if their corresponding levels are NaN. + + Parameters + ---------- + code : list + Code to reassign. + level : list + Level to check for missing values (NaN, NaT, None). + + Returns + ------- + new code where code value = -1 if it corresponds + to a level with missing values (NaN, NaT, None). + """ + null_mask = isna(level) + if np.any(null_mask): + # Incompatible types in assignment (expression has type + # "ndarray[Any, dtype[Any]]", variable has type "List[Any]") + code = np.where(null_mask[code], -1, code) # type: ignore[assignment] + return code + + def _verify_integrity(self, codes: list | None = None, levels: list | None = None): + """ + Parameters + ---------- + codes : optional list + Codes to check for validity. Defaults to current codes. + levels : optional list + Levels to check for validity. Defaults to current levels. + + Raises + ------ + ValueError + If length of levels and codes don't match, if the codes for any + level would exceed level bounds, or there are any duplicate levels. + + Returns + ------- + new codes where code value = -1 if it corresponds to a + NaN level. + """ + # NOTE: Currently does not check, among other things, that cached + # nlevels matches nor that sortorder matches actually sortorder. + codes = codes or self.codes + levels = levels or self.levels + + if len(levels) != len(codes): + raise ValueError( + "Length of levels and codes must match. NOTE: " + "this index is in an inconsistent state." + ) + codes_length = len(codes[0]) + for i, (level, level_codes) in enumerate(zip(levels, codes)): + if len(level_codes) != codes_length: + raise ValueError( + f"Unequal code lengths: {[len(code_) for code_ in codes]}" + ) + if len(level_codes) and level_codes.max() >= len(level): + raise ValueError( + f"On level {i}, code max ({level_codes.max()}) >= length of " + f"level ({len(level)}). NOTE: this index is in an " + "inconsistent state" + ) + if len(level_codes) and level_codes.min() < -1: + raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1") + if not level.is_unique: + raise ValueError( + f"Level values must be unique: {list(level)} on level {i}" + ) + if self.sortorder is not None: + if self.sortorder > _lexsort_depth(self.codes, self.nlevels): + raise ValueError( + "Value for sortorder must be inferior or equal to actual " + f"lexsort_depth: sortorder {self.sortorder} " + f"with lexsort_depth {_lexsort_depth(self.codes, self.nlevels)}" + ) + + codes = [ + self._validate_codes(level, code) for level, code in zip(levels, codes) + ] + new_codes = FrozenList(codes) + return new_codes + + @classmethod + def from_arrays(cls, arrays, sortorder=None, names=lib.no_default) -> MultiIndex: + """ + Convert arrays to MultiIndex. + + Parameters + ---------- + arrays : list / sequence of array-likes + Each array-like gives one level's value for each data point. + len(arrays) is the number of levels. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level). + names : list / sequence of str, optional + Names for the levels in the index. + + Returns + ------- + MultiIndex + + See Also + -------- + MultiIndex.from_tuples : Convert list of tuples to MultiIndex. + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables. + MultiIndex.from_frame : Make a MultiIndex from a DataFrame. + + Examples + -------- + >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] + >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], + names=['number', 'color']) + """ + error_msg = "Input must be a list / sequence of array-likes." + if not is_list_like(arrays): + raise TypeError(error_msg) + elif is_iterator(arrays): + arrays = list(arrays) + + # Check if elements of array are list-like + for array in arrays: + if not is_list_like(array): + raise TypeError(error_msg) + + # Check if lengths of all arrays are equal or not, + # raise ValueError, if not + for i in range(1, len(arrays)): + if len(arrays[i]) != len(arrays[i - 1]): + raise ValueError("all arrays must be same length") + + codes, levels = factorize_from_iterables(arrays) + if names is lib.no_default: + names = [getattr(arr, "name", None) for arr in arrays] + + return cls( + levels=levels, + codes=codes, + sortorder=sortorder, + names=names, + verify_integrity=False, + ) + + @classmethod + @names_compat + def from_tuples( + cls, + tuples: Iterable[tuple[Hashable, ...]], + sortorder: int | None = None, + names: Sequence[Hashable] | None = None, + ) -> MultiIndex: + """ + Convert list of tuples to MultiIndex. + + Parameters + ---------- + tuples : list / sequence of tuple-likes + Each tuple is the index of one row/column. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level). + names : list / sequence of str, optional + Names for the levels in the index. + + Returns + ------- + MultiIndex + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex. + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables. + MultiIndex.from_frame : Make a MultiIndex from a DataFrame. + + Examples + -------- + >>> tuples = [(1, 'red'), (1, 'blue'), + ... (2, 'red'), (2, 'blue')] + >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color')) + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], + names=['number', 'color']) + """ + if not is_list_like(tuples): + raise TypeError("Input must be a list / sequence of tuple-likes.") + elif is_iterator(tuples): + tuples = list(tuples) + tuples = cast(Collection[Tuple[Hashable, ...]], tuples) + + arrays: list[Sequence[Hashable]] + if len(tuples) == 0: + if names is None: + raise TypeError("Cannot infer number of levels from empty list") + arrays = [[]] * len(names) + elif isinstance(tuples, (np.ndarray, Index)): + if isinstance(tuples, Index): + tuples = np.asarray(tuples._values) + + arrays = list(lib.tuples_to_object_array(tuples).T) + elif isinstance(tuples, list): + arrays = list(lib.to_object_array_tuples(tuples).T) + else: + arrs = zip(*tuples) + arrays = cast(List[Sequence[Hashable]], arrs) + + return cls.from_arrays(arrays, sortorder=sortorder, names=names) + + @classmethod + def from_product( + cls, iterables, sortorder=None, names=lib.no_default + ) -> MultiIndex: + """ + Make a MultiIndex from the cartesian product of multiple iterables. + + Parameters + ---------- + iterables : list / sequence of iterables + Each iterable has unique labels for each level of the index. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level). + names : list / sequence of str, optional + Names for the levels in the index. + + .. versionchanged:: 1.0.0 + + If not explicitly provided, names will be inferred from the + elements of iterables if an element has a name attribute + + Returns + ------- + MultiIndex + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex. + MultiIndex.from_tuples : Convert list of tuples to MultiIndex. + MultiIndex.from_frame : Make a MultiIndex from a DataFrame. + + Examples + -------- + >>> numbers = [0, 1, 2] + >>> colors = ['green', 'purple'] + >>> pd.MultiIndex.from_product([numbers, colors], + ... names=['number', 'color']) + MultiIndex([(0, 'green'), + (0, 'purple'), + (1, 'green'), + (1, 'purple'), + (2, 'green'), + (2, 'purple')], + names=['number', 'color']) + """ + from pandas.core.reshape.util import cartesian_product + + if not is_list_like(iterables): + raise TypeError("Input must be a list / sequence of iterables.") + elif is_iterator(iterables): + iterables = list(iterables) + + codes, levels = factorize_from_iterables(iterables) + if names is lib.no_default: + names = [getattr(it, "name", None) for it in iterables] + + # codes are all ndarrays, so cartesian_product is lossless + codes = cartesian_product(codes) + return cls(levels, codes, sortorder=sortorder, names=names) + + @classmethod + def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex: + """ + Make a MultiIndex from a DataFrame. + + Parameters + ---------- + df : DataFrame + DataFrame to be converted to MultiIndex. + sortorder : int, optional + Level of sortedness (must be lexicographically sorted by that + level). + names : list-like, optional + If no names are provided, use the column names, or tuple of column + names if the columns is a MultiIndex. If a sequence, overwrite + names with the given sequence. + + Returns + ------- + MultiIndex + The MultiIndex representation of the given DataFrame. + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex. + MultiIndex.from_tuples : Convert list of tuples to MultiIndex. + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables. + + Examples + -------- + >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'], + ... ['NJ', 'Temp'], ['NJ', 'Precip']], + ... columns=['a', 'b']) + >>> df + a b + 0 HI Temp + 1 HI Precip + 2 NJ Temp + 3 NJ Precip + + >>> pd.MultiIndex.from_frame(df) + MultiIndex([('HI', 'Temp'), + ('HI', 'Precip'), + ('NJ', 'Temp'), + ('NJ', 'Precip')], + names=['a', 'b']) + + Using explicit names, instead of the column names + + >>> pd.MultiIndex.from_frame(df, names=['state', 'observation']) + MultiIndex([('HI', 'Temp'), + ('HI', 'Precip'), + ('NJ', 'Temp'), + ('NJ', 'Precip')], + names=['state', 'observation']) + """ + if not isinstance(df, ABCDataFrame): + raise TypeError("Input must be a DataFrame") + + column_names, columns = zip(*df.items()) + names = column_names if names is None else names + return cls.from_arrays(columns, sortorder=sortorder, names=names) + + # -------------------------------------------------------------------- + + @cache_readonly + def _values(self) -> np.ndarray: + # We override here, since our parent uses _data, which we don't use. + values = [] + + for i in range(self.nlevels): + vals = self._get_level_values(i) + if is_categorical_dtype(vals.dtype): + vals = cast("CategoricalIndex", vals) + vals = vals._data._internal_get_values() + if isinstance(vals.dtype, ExtensionDtype) or isinstance( + vals, (ABCDatetimeIndex, ABCTimedeltaIndex) + ): + vals = vals.astype(object) + # error: Incompatible types in assignment (expression has type "ndarray", + # variable has type "Index") + vals = np.array(vals, copy=False) # type: ignore[assignment] + values.append(vals) + + arr = lib.fast_zip(values) + return arr + + @property + def values(self) -> np.ndarray: + return self._values + + @property + def array(self): + """ + Raises a ValueError for `MultiIndex` because there's no single + array backing a MultiIndex. + + Raises + ------ + ValueError + """ + raise ValueError( + "MultiIndex has no single backing array. Use " + "'MultiIndex.to_numpy()' to get a NumPy array of tuples." + ) + + @cache_readonly + def dtypes(self) -> Series: + """ + Return the dtypes as a Series for the underlying MultiIndex. + """ + from pandas import Series + + names = com.fill_missing_names([level.name for level in self.levels]) + return Series([level.dtype for level in self.levels], index=names) + + def __len__(self) -> int: + return len(self.codes[0]) + + # -------------------------------------------------------------------- + # Levels Methods + + @cache_readonly + def levels(self) -> FrozenList: + # Use cache_readonly to ensure that self.get_locs doesn't repeatedly + # create new IndexEngine + # https://github.com/pandas-dev/pandas/issues/31648 + result = [x._rename(name=name) for x, name in zip(self._levels, self._names)] + for level in result: + # disallow midx.levels[0].name = "foo" + level._no_setting_name = True + return FrozenList(result) + + def _set_levels( + self, + levels, + *, + level=None, + copy: bool = False, + validate: bool = True, + verify_integrity: bool = False, + ) -> None: + # This is NOT part of the levels property because it should be + # externally not allowed to set levels. User beware if you change + # _levels directly + if validate: + if len(levels) == 0: + raise ValueError("Must set non-zero number of levels.") + if level is None and len(levels) != self.nlevels: + raise ValueError("Length of levels must match number of levels.") + if level is not None and len(levels) != len(level): + raise ValueError("Length of levels must match length of level.") + + if level is None: + new_levels = FrozenList( + ensure_index(lev, copy=copy)._view() for lev in levels + ) + else: + level_numbers = [self._get_level_number(lev) for lev in level] + new_levels_list = list(self._levels) + for lev_num, lev in zip(level_numbers, levels): + new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view() + new_levels = FrozenList(new_levels_list) + + if verify_integrity: + new_codes = self._verify_integrity(levels=new_levels) + self._codes = new_codes + + names = self.names + self._levels = new_levels + if any(names): + self._set_names(names) + + self._reset_cache() + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "levels"]) + def set_levels( + self, levels, level=None, inplace=None, verify_integrity: bool = True + ): + """ + Set new levels on MultiIndex. Defaults to returning new index. + + Parameters + ---------- + levels : sequence or list of sequence + New level(s) to apply. + level : int, level name, or sequence of int/level names (default None) + Level(s) to set (None for all levels). + inplace : bool + If True, mutates in place. + + .. deprecated:: 1.2.0 + verify_integrity : bool, default True + If True, checks that levels and codes are compatible. + + Returns + ------- + new index (of same type and class...etc) or None + The same type as the caller or None if ``inplace=True``. + + Examples + -------- + >>> idx = pd.MultiIndex.from_tuples( + ... [ + ... (1, "one"), + ... (1, "two"), + ... (2, "one"), + ... (2, "two"), + ... (3, "one"), + ... (3, "two") + ... ], + ... names=["foo", "bar"] + ... ) + >>> idx + MultiIndex([(1, 'one'), + (1, 'two'), + (2, 'one'), + (2, 'two'), + (3, 'one'), + (3, 'two')], + names=['foo', 'bar']) + + >>> idx.set_levels([['a', 'b', 'c'], [1, 2]]) + MultiIndex([('a', 1), + ('a', 2), + ('b', 1), + ('b', 2), + ('c', 1), + ('c', 2)], + names=['foo', 'bar']) + >>> idx.set_levels(['a', 'b', 'c'], level=0) + MultiIndex([('a', 'one'), + ('a', 'two'), + ('b', 'one'), + ('b', 'two'), + ('c', 'one'), + ('c', 'two')], + names=['foo', 'bar']) + >>> idx.set_levels(['a', 'b'], level='bar') + MultiIndex([(1, 'a'), + (1, 'b'), + (2, 'a'), + (2, 'b'), + (3, 'a'), + (3, 'b')], + names=['foo', 'bar']) + + If any of the levels passed to ``set_levels()`` exceeds the + existing length, all of the values from that argument will + be stored in the MultiIndex levels, though the values will + be truncated in the MultiIndex output. + + >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]) + MultiIndex([('a', 1), + ('a', 2), + ('b', 1), + ('b', 2), + ('c', 1), + ('c', 2)], + names=['foo', 'bar']) + >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels + FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]]) + """ + if inplace is not None: + warnings.warn( + "inplace is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + if is_list_like(levels) and not isinstance(levels, Index): + levels = list(levels) + + level, levels = _require_listlike(level, levels, "Levels") + + if inplace: + idx = self + else: + idx = self._view() + idx._reset_identity() + idx._set_levels( + levels, level=level, validate=True, verify_integrity=verify_integrity + ) + if not inplace: + return idx + + @property + def nlevels(self) -> int: + """ + Integer number of levels in this MultiIndex. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) + >>> mi + MultiIndex([('a', 'b', 'c')], + ) + >>> mi.nlevels + 3 + """ + return len(self._levels) + + @property + def levshape(self) -> Shape: + """ + A tuple with the length of each level. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) + >>> mi + MultiIndex([('a', 'b', 'c')], + ) + >>> mi.levshape + (1, 1, 1) + """ + return tuple(len(x) for x in self.levels) + + # -------------------------------------------------------------------- + # Codes Methods + + @property + def codes(self): + return self._codes + + def _set_codes( + self, + codes, + *, + level=None, + copy: bool = False, + validate: bool = True, + verify_integrity: bool = False, + ) -> None: + if validate: + if level is None and len(codes) != self.nlevels: + raise ValueError("Length of codes must match number of levels") + if level is not None and len(codes) != len(level): + raise ValueError("Length of codes must match length of levels.") + + if level is None: + new_codes = FrozenList( + _coerce_indexer_frozen(level_codes, lev, copy=copy).view() + for lev, level_codes in zip(self._levels, codes) + ) + else: + level_numbers = [self._get_level_number(lev) for lev in level] + new_codes_list = list(self._codes) + for lev_num, level_codes in zip(level_numbers, codes): + lev = self.levels[lev_num] + new_codes_list[lev_num] = _coerce_indexer_frozen( + level_codes, lev, copy=copy + ) + new_codes = FrozenList(new_codes_list) + + if verify_integrity: + new_codes = self._verify_integrity(codes=new_codes) + + self._codes = new_codes + + self._reset_cache() + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "codes"]) + def set_codes(self, codes, level=None, inplace=None, verify_integrity: bool = True): + """ + Set new codes on MultiIndex. Defaults to returning new index. + + Parameters + ---------- + codes : sequence or list of sequence + New codes to apply. + level : int, level name, or sequence of int/level names (default None) + Level(s) to set (None for all levels). + inplace : bool + If True, mutates in place. + + .. deprecated:: 1.2.0 + verify_integrity : bool, default True + If True, checks that levels and codes are compatible. + + Returns + ------- + new index (of same type and class...etc) or None + The same type as the caller or None if ``inplace=True``. + + Examples + -------- + >>> idx = pd.MultiIndex.from_tuples( + ... [(1, "one"), (1, "two"), (2, "one"), (2, "two")], names=["foo", "bar"] + ... ) + >>> idx + MultiIndex([(1, 'one'), + (1, 'two'), + (2, 'one'), + (2, 'two')], + names=['foo', 'bar']) + + >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) + MultiIndex([(2, 'one'), + (1, 'one'), + (2, 'two'), + (1, 'two')], + names=['foo', 'bar']) + >>> idx.set_codes([1, 0, 1, 0], level=0) + MultiIndex([(2, 'one'), + (1, 'two'), + (2, 'one'), + (1, 'two')], + names=['foo', 'bar']) + >>> idx.set_codes([0, 0, 1, 1], level='bar') + MultiIndex([(1, 'one'), + (1, 'one'), + (2, 'two'), + (2, 'two')], + names=['foo', 'bar']) + >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1]) + MultiIndex([(2, 'one'), + (1, 'one'), + (2, 'two'), + (1, 'two')], + names=['foo', 'bar']) + """ + if inplace is not None: + warnings.warn( + "inplace is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + level, codes = _require_listlike(level, codes, "Codes") + + if inplace: + idx = self + else: + idx = self._view() + idx._reset_identity() + idx._set_codes(codes, level=level, verify_integrity=verify_integrity) + if not inplace: + return idx + + # -------------------------------------------------------------------- + # Index Internals + + @cache_readonly + def _engine(self): + # Calculate the number of bits needed to represent labels in each + # level, as log2 of their sizes (including -1 for NaN): + sizes = np.ceil(np.log2([len(level) + 1 for level in self.levels])) + + # Sum bit counts, starting from the _right_.... + lev_bits = np.cumsum(sizes[::-1])[::-1] + + # ... in order to obtain offsets such that sorting the combination of + # shifted codes (one for each level, resulting in a unique integer) is + # equivalent to sorting lexicographically the codes themselves. Notice + # that each level needs to be shifted by the number of bits needed to + # represent the _previous_ ones: + offsets = np.concatenate([lev_bits[1:], [0]]).astype( # type: ignore[arg-type] + "uint64" + ) + + # Check the total number of bits needed for our representation: + if lev_bits[0] > 64: + # The levels would overflow a 64 bit uint - use Python integers: + return MultiIndexPyIntEngine(self.levels, self.codes, offsets) + return MultiIndexUIntEngine(self.levels, self.codes, offsets) + + # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return + # type "Type[MultiIndex]" in supertype "Index" + @property + def _constructor(self) -> Callable[..., MultiIndex]: # type: ignore[override] + return type(self).from_tuples + + @doc(Index._shallow_copy) + def _shallow_copy(self, values: np.ndarray, name=lib.no_default) -> MultiIndex: + names = name if name is not lib.no_default else self.names + + return type(self).from_tuples(values, sortorder=None, names=names) + + def _view(self) -> MultiIndex: + result = type(self)( + levels=self.levels, + codes=self.codes, + sortorder=self.sortorder, + names=self.names, + verify_integrity=False, + ) + result._cache = self._cache.copy() + result._cache.pop("levels", None) # GH32669 + return result + + # -------------------------------------------------------------------- + + def copy( + self, + names=None, + dtype=None, + levels=None, + codes=None, + deep=False, + name=None, + ): + """ + Make a copy of this object. Names, dtype, levels and codes can be + passed and will be set on new copy. + + Parameters + ---------- + names : sequence, optional + dtype : numpy dtype or pandas type, optional + + .. deprecated:: 1.2.0 + levels : sequence, optional + + .. deprecated:: 1.2.0 + codes : sequence, optional + + .. deprecated:: 1.2.0 + deep : bool, default False + name : Label + Kept for compatibility with 1-dimensional Index. Should not be used. + + Returns + ------- + MultiIndex + + Notes + ----- + In most cases, there should be no functional difference from using + ``deep``, but if ``deep`` is passed it will attempt to deepcopy. + This could be potentially expensive on large MultiIndex objects. + """ + names = self._validate_names(name=name, names=names, deep=deep) + if levels is not None: + warnings.warn( + "parameter levels is deprecated and will be removed in a future " + "version. Use the set_levels method instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if codes is not None: + warnings.warn( + "parameter codes is deprecated and will be removed in a future " + "version. Use the set_codes method instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if deep: + from copy import deepcopy + + if levels is None: + levels = deepcopy(self.levels) + if codes is None: + codes = deepcopy(self.codes) + + levels = levels if levels is not None else self.levels + codes = codes if codes is not None else self.codes + + new_index = type(self)( + levels=levels, + codes=codes, + sortorder=self.sortorder, + names=names, + verify_integrity=False, + ) + new_index._cache = self._cache.copy() + new_index._cache.pop("levels", None) # GH32669 + + if dtype: + warnings.warn( + "parameter dtype is deprecated and will be removed in a future " + "version. Use the astype method instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + new_index = new_index.astype(dtype) + return new_index + + def __array__(self, dtype=None) -> np.ndarray: + """the array interface, return my values""" + return self.values + + def view(self, cls=None): + """this is defined as a copy with the same identity""" + result = self.copy() + result._id = self._id + return result + + @doc(Index.__contains__) + def __contains__(self, key: Any) -> bool: + hash(key) + try: + self.get_loc(key) + return True + except (LookupError, TypeError, ValueError): + return False + + @cache_readonly + def dtype(self) -> np.dtype: + return np.dtype("O") + + def _is_memory_usage_qualified(self) -> bool: + """return a boolean if we need a qualified .info display""" + + def f(level): + return "mixed" in level or "string" in level or "unicode" in level + + return any(f(level) for level in self._inferred_type_levels) + + @doc(Index.memory_usage) + def memory_usage(self, deep: bool = False) -> int: + # we are overwriting our base class to avoid + # computing .values here which could materialize + # a tuple representation unnecessarily + return self._nbytes(deep) + + @cache_readonly + def nbytes(self) -> int: + """return the number of bytes in the underlying data""" + return self._nbytes(False) + + def _nbytes(self, deep: bool = False) -> int: + """ + return the number of bytes in the underlying data + deeply introspect the level data if deep=True + + include the engine hashtable + + *this is in internal routine* + + """ + # for implementations with no useful getsizeof (PyPy) + objsize = 24 + + level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels) + label_nbytes = sum(i.nbytes for i in self.codes) + names_nbytes = sum(getsizeof(i, objsize) for i in self.names) + result = level_nbytes + label_nbytes + names_nbytes + + # include our engine hashtable + result += self._engine.sizeof(deep=deep) + return result + + # -------------------------------------------------------------------- + # Rendering Methods + + def _formatter_func(self, tup): + """ + Formats each item in tup according to its level's formatter function. + """ + formatter_funcs = [level._formatter_func for level in self.levels] + return tuple(func(val) for func, val in zip(formatter_funcs, tup)) + + def _format_native_types(self, *, na_rep="nan", **kwargs): + new_levels = [] + new_codes = [] + + # go through the levels and format them + for level, level_codes in zip(self.levels, self.codes): + level_strs = level._format_native_types(na_rep=na_rep, **kwargs) + # add nan values, if there are any + mask = level_codes == -1 + if mask.any(): + nan_index = len(level_strs) + # numpy 1.21 deprecated implicit string casting + level_strs = level_strs.astype(str) + level_strs = np.append(level_strs, na_rep) + assert not level_codes.flags.writeable # i.e. copy is needed + level_codes = level_codes.copy() # make writeable + level_codes[mask] = nan_index + new_levels.append(level_strs) + new_codes.append(level_codes) + + if len(new_levels) == 1: + # a single-level multi-index + return Index(new_levels[0].take(new_codes[0]))._format_native_types() + else: + # reconstruct the multi-index + mi = MultiIndex( + levels=new_levels, + codes=new_codes, + names=self.names, + sortorder=self.sortorder, + verify_integrity=False, + ) + return mi._values + + def format( + self, + name: bool | None = None, + formatter: Callable | None = None, + na_rep: str | None = None, + names: bool = False, + space: int = 2, + sparsify=None, + adjoin: bool = True, + ) -> list: + if name is not None: + names = name + + if len(self) == 0: + return [] + + stringified_levels = [] + for lev, level_codes in zip(self.levels, self.codes): + na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type) + + if len(lev) > 0: + + formatted = lev.take(level_codes).format(formatter=formatter) + + # we have some NA + mask = level_codes == -1 + if mask.any(): + formatted = np.array(formatted, dtype=object) + formatted[mask] = na + formatted = formatted.tolist() + + else: + # weird all NA case + formatted = [ + pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n")) + for x in algos.take_nd(lev._values, level_codes) + ] + stringified_levels.append(formatted) + + result_levels = [] + for lev, lev_name in zip(stringified_levels, self.names): + level = [] + + if names: + level.append( + pprint_thing(lev_name, escape_chars=("\t", "\r", "\n")) + if lev_name is not None + else "" + ) + + level.extend(np.array(lev, dtype=object)) + result_levels.append(level) + + if sparsify is None: + sparsify = get_option("display.multi_sparse") + + if sparsify: + sentinel = "" + # GH3547 use value of sparsify as sentinel if it's "Falsey" + assert isinstance(sparsify, bool) or sparsify is lib.no_default + if sparsify in [False, lib.no_default]: + sentinel = sparsify + # little bit of a kludge job for #1217 + result_levels = sparsify_labels( + result_levels, start=int(names), sentinel=sentinel + ) + + if adjoin: + from pandas.io.formats.format import get_adjustment + + adj = get_adjustment() + return adj.adjoin(space, *result_levels).split("\n") + else: + return result_levels + + # -------------------------------------------------------------------- + # Names Methods + + def _get_names(self) -> FrozenList: + return FrozenList(self._names) + + def _set_names(self, names, *, level=None, validate: bool = True): + """ + Set new names on index. Each name has to be a hashable type. + + Parameters + ---------- + values : str or sequence + name(s) to set + level : int, level name, or sequence of int/level names (default None) + If the index is a MultiIndex (hierarchical), level(s) to set (None + for all levels). Otherwise level must be None + validate : bool, default True + validate that the names match level lengths + + Raises + ------ + TypeError if each name is not hashable. + + Notes + ----- + sets names on levels. WARNING: mutates! + + Note that you generally want to set this *after* changing levels, so + that it only acts on copies + """ + # GH 15110 + # Don't allow a single string for names in a MultiIndex + if names is not None and not is_list_like(names): + raise ValueError("Names should be list-like for a MultiIndex") + names = list(names) + + if validate: + if level is not None and len(names) != len(level): + raise ValueError("Length of names must match length of level.") + if level is None and len(names) != self.nlevels: + raise ValueError( + "Length of names must match number of levels in MultiIndex." + ) + + if level is None: + level = range(self.nlevels) + else: + level = [self._get_level_number(lev) for lev in level] + + # set the name + for lev, name in zip(level, names): + if name is not None: + # GH 20527 + # All items in 'names' need to be hashable: + if not is_hashable(name): + raise TypeError( + f"{type(self).__name__}.name must be a hashable type" + ) + # error: Cannot determine type of '__setitem__' + self._names[lev] = name # type: ignore[has-type] + + # If .levels has been accessed, the names in our cache will be stale. + self._reset_cache() + + names = property( + fset=_set_names, + fget=_get_names, + doc=""" + Names of levels in MultiIndex. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays( + ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z']) + >>> mi + MultiIndex([(1, 3, 5), + (2, 4, 6)], + names=['x', 'y', 'z']) + >>> mi.names + FrozenList(['x', 'y', 'z']) + """, + ) + + # -------------------------------------------------------------------- + + @doc(Index._get_grouper_for_level) + def _get_grouper_for_level(self, mapper, *, level): + indexer = self.codes[level] + level_index = self.levels[level] + + if mapper is not None: + # Handle group mapping function and return + level_values = self.levels[level].take(indexer) + grouper = level_values.map(mapper) + return grouper, None, None + + codes, uniques = algos.factorize(indexer, sort=True) + + if len(uniques) > 0 and uniques[0] == -1: + # Handle NAs + mask = indexer != -1 + ok_codes, uniques = algos.factorize(indexer[mask], sort=True) + + codes = np.empty(len(indexer), dtype=indexer.dtype) + codes[mask] = ok_codes + codes[~mask] = -1 + + if len(uniques) < len(level_index): + # Remove unobserved levels from level_index + level_index = level_index.take(uniques) + else: + # break references back to us so that setting the name + # on the output of a groupby doesn't reflect back here. + level_index = level_index.copy() + + if level_index._can_hold_na: + grouper = level_index.take(codes, fill_value=True) + else: + grouper = level_index.take(codes) + + return grouper, codes, level_index + + @cache_readonly + def inferred_type(self) -> str: + return "mixed" + + def _get_level_number(self, level) -> int: + count = self.names.count(level) + if (count > 1) and not is_integer(level): + raise ValueError( + f"The name {level} occurs multiple times, use a level number" + ) + try: + level = self.names.index(level) + except ValueError as err: + if not is_integer(level): + raise KeyError(f"Level {level} not found") from err + elif level < 0: + level += self.nlevels + if level < 0: + orig_level = level - self.nlevels + raise IndexError( + f"Too many levels: Index has only {self.nlevels} levels, " + f"{orig_level} is not a valid level number" + ) from err + # Note: levels are zero-based + elif level >= self.nlevels: + raise IndexError( + f"Too many levels: Index has only {self.nlevels} levels, " + f"not {level + 1}" + ) from err + return level + + @cache_readonly + def is_monotonic_increasing(self) -> bool: + """ + return if the index is monotonic increasing (only equal or + increasing) values. + """ + if any(-1 in code for code in self.codes): + return False + + if all(level.is_monotonic for level in self.levels): + # If each level is sorted, we can operate on the codes directly. GH27495 + return libalgos.is_lexsorted( + [x.astype("int64", copy=False) for x in self.codes] + ) + + # reversed() because lexsort() wants the most significant key last. + values = [ + self._get_level_values(i)._values for i in reversed(range(len(self.levels))) + ] + try: + # Argument 1 to "lexsort" has incompatible type "List[Union[ExtensionArray, + # ndarray[Any, Any]]]"; expected "Union[_SupportsArray[dtype[Any]], + # _NestedSequence[_SupportsArray[dtype[Any]]], bool, + # int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, + # complex, str, bytes]]]" [arg-type] + sort_order = np.lexsort(values) # type: ignore[arg-type] + return Index(sort_order).is_monotonic + except TypeError: + + # we have mixed types and np.lexsort is not happy + return Index(self._values).is_monotonic + + @cache_readonly + def is_monotonic_decreasing(self) -> bool: + """ + return if the index is monotonic decreasing (only equal or + decreasing) values. + """ + # monotonic decreasing if and only if reverse is monotonic increasing + return self[::-1].is_monotonic_increasing + + @cache_readonly + def _inferred_type_levels(self) -> list[str]: + """return a list of the inferred types, one for each level""" + return [i.inferred_type for i in self.levels] + + @doc(Index.duplicated) + def duplicated(self, keep="first") -> npt.NDArray[np.bool_]: + shape = tuple(len(lev) for lev in self.levels) + ids = get_group_index(self.codes, shape, sort=False, xnull=False) + + return duplicated(ids, keep) + + # error: Cannot override final attribute "_duplicated" + # (previously declared in base class "IndexOpsMixin") + _duplicated = duplicated # type: ignore[misc] + + def fillna(self, value=None, downcast=None): + """ + fillna is not implemented for MultiIndex + """ + raise NotImplementedError("isna is not defined for MultiIndex") + + @doc(Index.dropna) + def dropna(self, how: str = "any") -> MultiIndex: + nans = [level_codes == -1 for level_codes in self.codes] + if how == "any": + indexer = np.any(nans, axis=0) + elif how == "all": + indexer = np.all(nans, axis=0) + else: + raise ValueError(f"invalid how option: {how}") + + new_codes = [level_codes[~indexer] for level_codes in self.codes] + return self.set_codes(codes=new_codes) + + def _get_level_values(self, level: int, unique: bool = False) -> Index: + """ + Return vector of label values for requested level, + equal to the length of the index + + **this is an internal method** + + Parameters + ---------- + level : int + unique : bool, default False + if True, drop duplicated values + + Returns + ------- + Index + """ + lev = self.levels[level] + level_codes = self.codes[level] + name = self._names[level] + if unique: + level_codes = algos.unique(level_codes) + filled = algos.take_nd(lev._values, level_codes, fill_value=lev._na_value) + return lev._shallow_copy(filled, name=name) + + def get_level_values(self, level): + """ + Return vector of label values for requested level. + + Length of returned vector is equal to the length of the index. + + Parameters + ---------- + level : int or str + ``level`` is either the integer position of the level in the + MultiIndex, or the name of the level. + + Returns + ------- + values : Index + Values is a level of this MultiIndex converted to + a single :class:`Index` (or subclass thereof). + + Notes + ----- + If the level contains missing values, the result may be casted to + ``float`` with missing values specified as ``NaN``. This is because + the level is converted to a regular ``Index``. + + Examples + -------- + Create a MultiIndex: + + >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def'))) + >>> mi.names = ['level_1', 'level_2'] + + Get level values by supplying level as either integer or name: + + >>> mi.get_level_values(0) + Index(['a', 'b', 'c'], dtype='object', name='level_1') + >>> mi.get_level_values('level_2') + Index(['d', 'e', 'f'], dtype='object', name='level_2') + + If a level contains missing values, the return type of the level + maybe casted to ``float``. + + >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).dtypes + level_0 int64 + level_1 int64 + dtype: object + >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).get_level_values(0) + Float64Index([1.0, nan, 2.0], dtype='float64') + """ + level = self._get_level_number(level) + values = self._get_level_values(level) + return values + + @doc(Index.unique) + def unique(self, level=None): + + if level is None: + return super().unique() + else: + level = self._get_level_number(level) + return self._get_level_values(level=level, unique=True) + + def to_frame(self, index: bool = True, name=lib.no_default) -> DataFrame: + """ + Create a DataFrame with the levels of the MultiIndex as columns. + + Column ordering is determined by the DataFrame constructor with data as + a dict. + + Parameters + ---------- + index : bool, default True + Set the index of the returned DataFrame as the original MultiIndex. + + name : list / sequence of str, optional + The passed names should substitute index level names. + + Returns + ------- + DataFrame : a DataFrame containing the original MultiIndex data. + + See Also + -------- + DataFrame : Two-dimensional, size-mutable, potentially heterogeneous + tabular data. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([['a', 'b'], ['c', 'd']]) + >>> mi + MultiIndex([('a', 'c'), + ('b', 'd')], + ) + + >>> df = mi.to_frame() + >>> df + 0 1 + a c a c + b d b d + + >>> df = mi.to_frame(index=False) + >>> df + 0 1 + 0 a c + 1 b d + + >>> df = mi.to_frame(name=['x', 'y']) + >>> df + x y + a c a c + b d b d + """ + from pandas import DataFrame + + if name is None: + warnings.warn( + "Explicitly passing `name=None` currently preserves the Index's name " + "or uses a default name of 0. This behaviour is deprecated, and in " + "the future `None` will be used as the name of the resulting " + "DataFrame column.", + FutureWarning, + stacklevel=find_stack_level(), + ) + name = lib.no_default + + if name is not lib.no_default: + if not is_list_like(name): + raise TypeError("'name' must be a list / sequence of column names.") + + if len(name) != len(self.levels): + raise ValueError( + "'name' should have same length as number of levels on index." + ) + idx_names = name + else: + idx_names = self.names + + # Guarantee resulting column order - PY36+ dict maintains insertion order + result = DataFrame( + { + (level if lvlname is None else lvlname): self._get_level_values(level) + for lvlname, level in zip(idx_names, range(len(self.levels))) + }, + copy=False, + ) + + if index: + result.index = self + return result + + def to_flat_index(self) -> Index: + """ + Convert a MultiIndex to an Index of Tuples containing the level values. + + Returns + ------- + pd.Index + Index with the MultiIndex data represented in Tuples. + + See Also + -------- + MultiIndex.from_tuples : Convert flat index back to MultiIndex. + + Notes + ----- + This method will simply return the caller if called by anything other + than a MultiIndex. + + Examples + -------- + >>> index = pd.MultiIndex.from_product( + ... [['foo', 'bar'], ['baz', 'qux']], + ... names=['a', 'b']) + >>> index.to_flat_index() + Index([('foo', 'baz'), ('foo', 'qux'), + ('bar', 'baz'), ('bar', 'qux')], + dtype='object') + """ + return Index(self._values, tupleize_cols=False) + + @property + def _is_all_dates(self) -> bool: + return False + + def is_lexsorted(self) -> bool: + warnings.warn( + "MultiIndex.is_lexsorted is deprecated as a public function, " + "users should use MultiIndex.is_monotonic_increasing instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._is_lexsorted() + + def _is_lexsorted(self) -> bool: + """ + Return True if the codes are lexicographically sorted. + + Returns + ------- + bool + + Examples + -------- + In the below examples, the first level of the MultiIndex is sorted because + a>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'e', 'f']]).is_lexsorted() + True + >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'f', 'e']]).is_lexsorted() + True + + In case there is a tie, the lexicographical sorting looks + at the next level of the MultiIndex. + + >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']]).is_lexsorted() + True + >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']]).is_lexsorted() + False + >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], + ... ['aa', 'bb', 'aa', 'bb']]).is_lexsorted() + True + >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], + ... ['bb', 'aa', 'aa', 'bb']]).is_lexsorted() + False + """ + return self._lexsort_depth == self.nlevels + + @property + def lexsort_depth(self) -> int: + warnings.warn( + "MultiIndex.is_lexsorted is deprecated as a public function, " + "users should use MultiIndex.is_monotonic_increasing instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._lexsort_depth + + @cache_readonly + def _lexsort_depth(self) -> int: + """ + Compute and return the lexsort_depth, the number of levels of the + MultiIndex that are sorted lexically + + Returns + ------- + int + """ + if self.sortorder is not None: + return self.sortorder + return _lexsort_depth(self.codes, self.nlevels) + + def _sort_levels_monotonic(self) -> MultiIndex: + """ + This is an *internal* function. + + Create a new MultiIndex from the current to monotonically sorted + items IN the levels. This does not actually make the entire MultiIndex + monotonic, JUST the levels. + + The resulting MultiIndex will have the same outward + appearance, meaning the same .values and ordering. It will also + be .equals() to the original. + + Returns + ------- + MultiIndex + + Examples + -------- + >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], + ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + >>> mi + MultiIndex([('a', 'bb'), + ('a', 'aa'), + ('b', 'bb'), + ('b', 'aa')], + ) + + >>> mi.sort_values() + MultiIndex([('a', 'aa'), + ('a', 'bb'), + ('b', 'aa'), + ('b', 'bb')], + ) + """ + if self._is_lexsorted() and self.is_monotonic: + return self + + new_levels = [] + new_codes = [] + + for lev, level_codes in zip(self.levels, self.codes): + + if not lev.is_monotonic: + try: + # indexer to reorder the levels + indexer = lev.argsort() + except TypeError: + pass + else: + lev = lev.take(indexer) + + # indexer to reorder the level codes + indexer = ensure_platform_int(indexer) + ri = lib.get_reverse_indexer(indexer, len(indexer)) + level_codes = algos.take_nd(ri, level_codes) + + new_levels.append(lev) + new_codes.append(level_codes) + + return MultiIndex( + new_levels, + new_codes, + names=self.names, + sortorder=self.sortorder, + verify_integrity=False, + ) + + def remove_unused_levels(self) -> MultiIndex: + """ + Create new MultiIndex from current that removes unused levels. + + Unused level(s) means levels that are not expressed in the + labels. The resulting MultiIndex will have the same outward + appearance, meaning the same .values and ordering. It will + also be .equals() to the original. + + Returns + ------- + MultiIndex + + Examples + -------- + >>> mi = pd.MultiIndex.from_product([range(2), list('ab')]) + >>> mi + MultiIndex([(0, 'a'), + (0, 'b'), + (1, 'a'), + (1, 'b')], + ) + + >>> mi[2:] + MultiIndex([(1, 'a'), + (1, 'b')], + ) + + The 0 from the first level is not represented + and can be removed + + >>> mi2 = mi[2:].remove_unused_levels() + >>> mi2.levels + FrozenList([[1], ['a', 'b']]) + """ + new_levels = [] + new_codes = [] + + changed = False + for lev, level_codes in zip(self.levels, self.codes): + + # Since few levels are typically unused, bincount() is more + # efficient than unique() - however it only accepts positive values + # (and drops order): + uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1 + has_na = int(len(uniques) and (uniques[0] == -1)) + + if len(uniques) != len(lev) + has_na: + + if lev.isna().any() and len(uniques) == len(lev): + break + # We have unused levels + changed = True + + # Recalculate uniques, now preserving order. + # Can easily be cythonized by exploiting the already existing + # "uniques" and stop parsing "level_codes" when all items + # are found: + uniques = algos.unique(level_codes) + if has_na: + na_idx = np.where(uniques == -1)[0] + # Just ensure that -1 is in first position: + uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]] + + # codes get mapped from uniques to 0:len(uniques) + # -1 (if present) is mapped to last position + code_mapping = np.zeros(len(lev) + has_na) + # ... and reassigned value -1: + code_mapping[uniques] = np.arange(len(uniques)) - has_na + + level_codes = code_mapping[level_codes] + + # new levels are simple + lev = lev.take(uniques[has_na:]) + + new_levels.append(lev) + new_codes.append(level_codes) + + result = self.view() + + if changed: + result._reset_identity() + result._set_levels(new_levels, validate=False) + result._set_codes(new_codes, validate=False) + + return result + + # -------------------------------------------------------------------- + # Pickling Methods + + def __reduce__(self): + """Necessary for making this object picklable""" + d = { + "levels": list(self.levels), + "codes": list(self.codes), + "sortorder": self.sortorder, + "names": list(self.names), + } + return ibase._new_Index, (type(self), d), None + + # -------------------------------------------------------------------- + + def __getitem__(self, key): + if is_scalar(key): + key = com.cast_scalar_indexer(key, warn_float=True) + + retval = [] + for lev, level_codes in zip(self.levels, self.codes): + if level_codes[key] == -1: + retval.append(np.nan) + else: + retval.append(lev[level_codes[key]]) + + return tuple(retval) + else: + # in general cannot be sure whether the result will be sorted + sortorder = None + if com.is_bool_indexer(key): + key = np.asarray(key, dtype=bool) + sortorder = self.sortorder + elif isinstance(key, slice): + if key.step is None or key.step > 0: + sortorder = self.sortorder + elif isinstance(key, Index): + key = np.asarray(key) + + new_codes = [level_codes[key] for level_codes in self.codes] + + return MultiIndex( + levels=self.levels, + codes=new_codes, + names=self.names, + sortorder=sortorder, + verify_integrity=False, + ) + + def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex: + """ + Fastpath for __getitem__ when we know we have a slice. + """ + sortorder = None + if slobj.step is None or slobj.step > 0: + sortorder = self.sortorder + + new_codes = [level_codes[slobj] for level_codes in self.codes] + + return type(self)( + levels=self.levels, + codes=new_codes, + names=self._names, + sortorder=sortorder, + verify_integrity=False, + ) + + @Appender(_index_shared_docs["take"] % _index_doc_kwargs) + def take( + self: MultiIndex, + indices, + axis: int = 0, + allow_fill: bool = True, + fill_value=None, + **kwargs, + ) -> MultiIndex: + nv.validate_take((), kwargs) + indices = ensure_platform_int(indices) + + # only fill if we are passing a non-None fill_value + allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices) + + na_value = -1 + + taken = [lab.take(indices) for lab in self.codes] + if allow_fill: + mask = indices == -1 + if mask.any(): + masked = [] + for new_label in taken: + label_values = new_label + label_values[mask] = na_value + masked.append(np.asarray(label_values)) + taken = masked + + return MultiIndex( + levels=self.levels, codes=taken, names=self.names, verify_integrity=False + ) + + def append(self, other): + """ + Append a collection of Index options together + + Parameters + ---------- + other : Index or list/tuple of indices + + Returns + ------- + appended : Index + """ + if not isinstance(other, (list, tuple)): + other = [other] + + if all( + (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other + ): + arrays = [] + for i in range(self.nlevels): + label = self._get_level_values(i) + appended = [o._get_level_values(i) for o in other] + arrays.append(label.append(appended)) + return MultiIndex.from_arrays(arrays, names=self.names) + + to_concat = (self._values,) + tuple(k._values for k in other) + new_tuples = np.concatenate(to_concat) + + # if all(isinstance(x, MultiIndex) for x in other): + try: + return MultiIndex.from_tuples(new_tuples, names=self.names) + except (TypeError, IndexError): + return Index._with_infer(new_tuples) + + def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: + return self._values.argsort(*args, **kwargs) + + @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) + def repeat(self, repeats: int, axis=None) -> MultiIndex: + nv.validate_repeat((), {"axis": axis}) + # error: Incompatible types in assignment (expression has type "ndarray", + # variable has type "int") + repeats = ensure_platform_int(repeats) # type: ignore[assignment] + return MultiIndex( + levels=self.levels, + codes=[ + level_codes.view(np.ndarray).astype(np.intp, copy=False).repeat(repeats) + for level_codes in self.codes + ], + names=self.names, + sortorder=self.sortorder, + verify_integrity=False, + ) + + def drop(self, codes, level=None, errors="raise"): + """ + Make new MultiIndex with passed list of codes deleted + + Parameters + ---------- + codes : array-like + Must be a list of tuples when level is not specified + level : int or level name, default None + errors : str, default 'raise' + + Returns + ------- + dropped : MultiIndex + """ + if level is not None: + return self._drop_from_level(codes, level, errors) + + if not isinstance(codes, (np.ndarray, Index)): + try: + codes = com.index_labels_to_array(codes, dtype=np.dtype("object")) + except ValueError: + pass + + inds = [] + for level_codes in codes: + try: + loc = self.get_loc(level_codes) + # get_loc returns either an integer, a slice, or a boolean + # mask + if isinstance(loc, int): + inds.append(loc) + elif isinstance(loc, slice): + step = loc.step if loc.step is not None else 1 + inds.extend(range(loc.start, loc.stop, step)) + elif com.is_bool_indexer(loc): + if self._lexsort_depth == 0: + warnings.warn( + "dropping on a non-lexsorted multi-index " + "without a level parameter may impact performance.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) + loc = loc.nonzero()[0] + inds.extend(loc) + else: + msg = f"unsupported indexer of type {type(loc)}" + raise AssertionError(msg) + except KeyError: + if errors != "ignore": + raise + + return self.delete(inds) + + def _drop_from_level(self, codes, level, errors="raise") -> MultiIndex: + codes = com.index_labels_to_array(codes) + i = self._get_level_number(level) + index = self.levels[i] + values = index.get_indexer(codes) + # If nan should be dropped it will equal -1 here. We have to check which values + # are not nan and equal -1, this means they are missing in the index + nan_codes = isna(codes) + values[(np.equal(nan_codes, False)) & (values == -1)] = -2 + if index.shape[0] == self.shape[0]: + values[np.equal(nan_codes, True)] = -2 + + not_found = codes[values == -2] + if len(not_found) != 0 and errors != "ignore": + raise KeyError(f"labels {not_found} not found in level") + mask = ~algos.isin(self.codes[i], values) + + return self[mask] + + def swaplevel(self, i=-2, j=-1) -> MultiIndex: + """ + Swap level i with level j. + + Calling this method does not change the ordering of the values. + + Parameters + ---------- + i : int, str, default -2 + First level of index to be swapped. Can pass level name as string. + Type of parameters can be mixed. + j : int, str, default -1 + Second level of index to be swapped. Can pass level name as string. + Type of parameters can be mixed. + + Returns + ------- + MultiIndex + A new MultiIndex. + + See Also + -------- + Series.swaplevel : Swap levels i and j in a MultiIndex. + Dataframe.swaplevel : Swap levels i and j in a MultiIndex on a + particular axis. + + Examples + -------- + >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], + ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + >>> mi + MultiIndex([('a', 'bb'), + ('a', 'aa'), + ('b', 'bb'), + ('b', 'aa')], + ) + >>> mi.swaplevel(0, 1) + MultiIndex([('bb', 'a'), + ('aa', 'a'), + ('bb', 'b'), + ('aa', 'b')], + ) + """ + new_levels = list(self.levels) + new_codes = list(self.codes) + new_names = list(self.names) + + i = self._get_level_number(i) + j = self._get_level_number(j) + + new_levels[i], new_levels[j] = new_levels[j], new_levels[i] + new_codes[i], new_codes[j] = new_codes[j], new_codes[i] + new_names[i], new_names[j] = new_names[j], new_names[i] + + return MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + def reorder_levels(self, order) -> MultiIndex: + """ + Rearrange levels using input order. May not drop or duplicate levels. + + Parameters + ---------- + order : list of int or list of str + List representing new level order. Reference level by number + (position) or by key (label). + + Returns + ------- + MultiIndex + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=['x', 'y']) + >>> mi + MultiIndex([(1, 3), + (2, 4)], + names=['x', 'y']) + + >>> mi.reorder_levels(order=[1, 0]) + MultiIndex([(3, 1), + (4, 2)], + names=['y', 'x']) + + >>> mi.reorder_levels(order=['y', 'x']) + MultiIndex([(3, 1), + (4, 2)], + names=['y', 'x']) + """ + order = [self._get_level_number(i) for i in order] + if len(order) != self.nlevels: + raise AssertionError( + f"Length of order must be same as number of levels ({self.nlevels}), " + f"got {len(order)}" + ) + new_levels = [self.levels[i] for i in order] + new_codes = [self.codes[i] for i in order] + new_names = [self.names[i] for i in order] + + return MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + def _get_codes_for_sorting(self) -> list[Categorical]: + """ + we are categorizing our codes by using the + available categories (all, not just observed) + excluding any missing ones (-1); this is in preparation + for sorting, where we need to disambiguate that -1 is not + a valid valid + """ + + def cats(level_codes): + return np.arange( + np.array(level_codes).max() + 1 if len(level_codes) else 0, + dtype=level_codes.dtype, + ) + + return [ + Categorical.from_codes(level_codes, cats(level_codes), ordered=True) + for level_codes in self.codes + ] + + def sortlevel( + self, level=0, ascending: bool = True, sort_remaining: bool = True + ) -> tuple[MultiIndex, npt.NDArray[np.intp]]: + """ + Sort MultiIndex at the requested level. + + The result will respect the original ordering of the associated + factor at that level. + + Parameters + ---------- + level : list-like, int or str, default 0 + If a string is given, must be a name of the level. + If list-like must be names or ints of levels. + ascending : bool, default True + False to sort in descending order. + Can also be a list to specify a directed ordering. + sort_remaining : sort by the remaining levels after level + + Returns + ------- + sorted_index : pd.MultiIndex + Resulting index. + indexer : np.ndarray[np.intp] + Indices of output values in original index. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([[0, 0], [2, 1]]) + >>> mi + MultiIndex([(0, 2), + (0, 1)], + ) + + >>> mi.sortlevel() + (MultiIndex([(0, 1), + (0, 2)], + ), array([1, 0])) + + >>> mi.sortlevel(sort_remaining=False) + (MultiIndex([(0, 2), + (0, 1)], + ), array([0, 1])) + + >>> mi.sortlevel(1) + (MultiIndex([(0, 1), + (0, 2)], + ), array([1, 0])) + + >>> mi.sortlevel(1, ascending=False) + (MultiIndex([(0, 2), + (0, 1)], + ), array([0, 1])) + """ + if isinstance(level, (str, int)): + level = [level] + level = [self._get_level_number(lev) for lev in level] + sortorder = None + + # we have a directed ordering via ascending + if isinstance(ascending, list): + if not len(level) == len(ascending): + raise ValueError("level must have same length as ascending") + + indexer = lexsort_indexer( + [self.codes[lev] for lev in level], orders=ascending + ) + + # level ordering + else: + + codes = list(self.codes) + shape = list(self.levshape) + + # partition codes and shape + primary = tuple(codes[lev] for lev in level) + primshp = tuple(shape[lev] for lev in level) + + # Reverse sorted to retain the order of + # smaller indices that needs to be removed + for lev in sorted(level, reverse=True): + codes.pop(lev) + shape.pop(lev) + + if sort_remaining: + primary += primary + tuple(codes) + primshp += primshp + tuple(shape) + else: + sortorder = level[0] + + indexer = indexer_from_factorized(primary, primshp, compress=False) + + if not ascending: + indexer = indexer[::-1] + + indexer = ensure_platform_int(indexer) + new_codes = [level_codes.take(indexer) for level_codes in self.codes] + + new_index = MultiIndex( + codes=new_codes, + levels=self.levels, + names=self.names, + sortorder=sortorder, + verify_integrity=False, + ) + + return new_index, indexer + + def _wrap_reindex_result(self, target, indexer, preserve_names: bool): + if not isinstance(target, MultiIndex): + if indexer is None: + target = self + elif (indexer >= 0).all(): + target = self.take(indexer) + else: + try: + target = MultiIndex.from_tuples(target) + except TypeError: + # not all tuples, see test_constructor_dict_multiindex_reindex_flat + return target + + target = self._maybe_preserve_names(target, preserve_names) + return target + + def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index: + if ( + preserve_names + and target.nlevels == self.nlevels + and target.names != self.names + ): + target = target.copy(deep=False) + target.names = self.names + return target + + # -------------------------------------------------------------------- + # Indexing Methods + + def _check_indexing_error(self, key) -> None: + if not is_hashable(key) or is_iterator(key): + # We allow tuples if they are hashable, whereas other Index + # subclasses require scalar. + # We have to explicitly exclude generators, as these are hashable. + raise InvalidIndexError(key) + + @cache_readonly + def _should_fallback_to_positional(self) -> bool: + """ + Should integer key(s) be treated as positional? + """ + # GH#33355 + return self.levels[0]._should_fallback_to_positional + + def _get_values_for_loc(self, series: Series, loc, key): + """ + Do a positional lookup on the given Series, returning either a scalar + or a Series. + + Assumes that `series.index is self` + """ + new_values = series._values[loc] + if is_scalar(loc): + return new_values + + if len(new_values) == 1 and not self.nlevels > 1: + # If more than one level left, we can not return a scalar + return new_values[0] + + new_index = self[loc] + new_index = maybe_droplevels(new_index, key) + new_ser = series._constructor(new_values, index=new_index, name=series.name) + return new_ser.__finalize__(series) + + def _get_indexer_strict( + self, key, axis_name: str + ) -> tuple[Index, npt.NDArray[np.intp]]: + + keyarr = key + if not isinstance(keyarr, Index): + keyarr = com.asarray_tuplesafe(keyarr) + + if len(keyarr) and not isinstance(keyarr[0], tuple): + indexer = self._get_indexer_level_0(keyarr) + + self._raise_if_missing(key, indexer, axis_name) + return self[indexer], indexer + + return super()._get_indexer_strict(key, axis_name) + + def _raise_if_missing(self, key, indexer, axis_name: str) -> None: + keyarr = key + if not isinstance(key, Index): + keyarr = com.asarray_tuplesafe(key) + + if len(keyarr) and not isinstance(keyarr[0], tuple): + # i.e. same condition for special case in MultiIndex._get_indexer_strict + + mask = indexer == -1 + if mask.any(): + check = self.levels[0].get_indexer(keyarr) + cmask = check == -1 + if cmask.any(): + raise KeyError(f"{keyarr[cmask]} not in index") + # We get here when levels still contain values which are not + # actually in Index anymore + raise KeyError(f"{keyarr} not in index") + else: + return super()._raise_if_missing(key, indexer, axis_name) + + def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]: + """ + Optimized equivalent to `self.get_level_values(0).get_indexer_for(target)`. + """ + lev = self.levels[0] + codes = self._codes[0] + cat = Categorical.from_codes(codes=codes, categories=lev) + ci = Index(cat) + return ci.get_indexer_for(target) + + def get_slice_bound( + self, label: Hashable | Sequence[Hashable], side: str, kind=lib.no_default + ) -> int: + """ + For an ordered MultiIndex, compute slice bound + that corresponds to given label. + + Returns leftmost (one-past-the-rightmost if `side=='right') position + of given label. + + Parameters + ---------- + label : object or tuple of objects + side : {'left', 'right'} + kind : {'loc', 'getitem', None} + + .. deprecated:: 1.4.0 + + Returns + ------- + int + Index of label. + + Notes + ----- + This method only works if level 0 index of the MultiIndex is lexsorted. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')]) + + Get the locations from the leftmost 'b' in the first level + until the end of the multiindex: + + >>> mi.get_slice_bound('b', side="left") + 1 + + Like above, but if you get the locations from the rightmost + 'b' in the first level and 'f' in the second level: + + >>> mi.get_slice_bound(('b','f'), side="right") + 3 + + See Also + -------- + MultiIndex.get_loc : Get location for a label or a tuple of labels. + MultiIndex.get_locs : Get location for a label/slice/list/mask or a + sequence of such. + """ + self._deprecated_arg(kind, "kind", "get_slice_bound") + + if not isinstance(label, tuple): + label = (label,) + return self._partial_tup_index(label, side=side) + + def slice_locs( + self, start=None, end=None, step=None, kind=lib.no_default + ) -> tuple[int, int]: + """ + For an ordered MultiIndex, compute the slice locations for input + labels. + + The input labels can be tuples representing partial levels, e.g. for a + MultiIndex with 3 levels, you can pass a single value (corresponding to + the first level), or a 1-, 2-, or 3-tuple. + + Parameters + ---------- + start : label or tuple, default None + If None, defaults to the beginning + end : label or tuple + If None, defaults to the end + step : int or None + Slice step + kind : string, optional, defaults None + + .. deprecated:: 1.4.0 + + Returns + ------- + (start, end) : (int, int) + + Notes + ----- + This method only works if the MultiIndex is properly lexsorted. So, + if only the first 2 levels of a 3-level MultiIndex are lexsorted, + you can only pass two levels to ``.slice_locs``. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')], + ... names=['A', 'B']) + + Get the slice locations from the beginning of 'b' in the first level + until the end of the multiindex: + + >>> mi.slice_locs(start='b') + (1, 4) + + Like above, but stop at the end of 'b' in the first level and 'f' in + the second level: + + >>> mi.slice_locs(start='b', end=('b', 'f')) + (1, 3) + + See Also + -------- + MultiIndex.get_loc : Get location for a label or a tuple of labels. + MultiIndex.get_locs : Get location for a label/slice/list/mask or a + sequence of such. + """ + self._deprecated_arg(kind, "kind", "slice_locs") + # This function adds nothing to its parent implementation (the magic + # happens in get_slice_bound method), but it adds meaningful doc. + return super().slice_locs(start, end, step) + + def _partial_tup_index(self, tup: tuple, side="left"): + if len(tup) > self._lexsort_depth: + raise UnsortedIndexError( + f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth " + f"({self._lexsort_depth})" + ) + + n = len(tup) + start, end = 0, len(self) + zipped = zip(tup, self.levels, self.codes) + for k, (lab, lev, level_codes) in enumerate(zipped): + section = level_codes[start:end] + + if lab not in lev and not isna(lab): + # short circuit + try: + loc = lev.searchsorted(lab, side=side) + except TypeError as err: + # non-comparable e.g. test_slice_locs_with_type_mismatch + raise TypeError(f"Level type mismatch: {lab}") from err + if not is_integer(loc): + # non-comparable level, e.g. test_groupby_example + raise TypeError(f"Level type mismatch: {lab}") + if side == "right" and loc >= 0: + loc -= 1 + return start + section.searchsorted(loc, side=side) + + idx = self._get_loc_single_level_index(lev, lab) + if isinstance(idx, slice) and k < n - 1: + # Get start and end value from slice, necessary when a non-integer + # interval is given as input GH#37707 + start = idx.start + end = idx.stop + elif k < n - 1: + end = start + section.searchsorted(idx, side="right") + start = start + section.searchsorted(idx, side="left") + elif isinstance(idx, slice): + idx = idx.start + return start + section.searchsorted(idx, side=side) + else: + return start + section.searchsorted(idx, side=side) + + def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: + """ + If key is NA value, location of index unify as -1. + + Parameters + ---------- + level_index: Index + key : label + + Returns + ------- + loc : int + If key is NA value, loc is -1 + Else, location of key in index. + + See Also + -------- + Index.get_loc : The get_loc method for (single-level) index. + """ + if is_scalar(key) and isna(key): + return -1 + else: + return level_index.get_loc(key) + + def get_loc(self, key, method=None): + """ + Get location for a label or a tuple of labels. + + The location is returned as an integer/slice or boolean + mask. + + Parameters + ---------- + key : label or tuple of labels (one for each level) + method : None + + Returns + ------- + loc : int, slice object or boolean mask + If the key is past the lexsort depth, the return may be a + boolean mask array, otherwise it is always a slice or int. + + See Also + -------- + Index.get_loc : The get_loc method for (single-level) index. + MultiIndex.slice_locs : Get slice location given start label(s) and + end label(s). + MultiIndex.get_locs : Get location for a label/slice/list/mask or a + sequence of such. + + Notes + ----- + The key cannot be a slice, list of same-level labels, a boolean mask, + or a sequence of such. If you want to use those, use + :meth:`MultiIndex.get_locs` instead. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) + + >>> mi.get_loc('b') + slice(1, 3, None) + + >>> mi.get_loc(('b', 'e')) + 1 + """ + if method is not None: + raise NotImplementedError( + "only the default get_loc method is " + "currently supported for MultiIndex" + ) + + self._check_indexing_error(key) + + def _maybe_to_slice(loc): + """convert integer indexer to boolean mask or slice if possible""" + if not isinstance(loc, np.ndarray) or loc.dtype != np.intp: + return loc + + loc = lib.maybe_indices_to_slice(loc, len(self)) + if isinstance(loc, slice): + return loc + + mask = np.empty(len(self), dtype="bool") + mask.fill(False) + mask[loc] = True + return mask + + if not isinstance(key, tuple): + loc = self._get_level_indexer(key, level=0) + return _maybe_to_slice(loc) + + keylen = len(key) + if self.nlevels < keylen: + raise KeyError( + f"Key length ({keylen}) exceeds index depth ({self.nlevels})" + ) + + if keylen == self.nlevels and self.is_unique: + try: + return self._engine.get_loc(key) + except TypeError: + # e.g. test_partial_slicing_with_multiindex partial string slicing + loc, _ = self.get_loc_level(key, list(range(self.nlevels))) + return loc + + # -- partial selection or non-unique index + # break the key into 2 parts based on the lexsort_depth of the index; + # the first part returns a continuous slice of the index; the 2nd part + # needs linear search within the slice + i = self._lexsort_depth + lead_key, follow_key = key[:i], key[i:] + + if not lead_key: + start = 0 + stop = len(self) + else: + try: + start, stop = self.slice_locs(lead_key, lead_key) + except TypeError as err: + # e.g. test_groupby_example key = ((0, 0, 1, 2), "new_col") + # when self has 5 integer levels + raise KeyError(key) from err + + if start == stop: + raise KeyError(key) + + if not follow_key: + return slice(start, stop) + + warnings.warn( + "indexing past lexsort depth may impact performance.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) + + loc = np.arange(start, stop, dtype=np.intp) + + for i, k in enumerate(follow_key, len(lead_key)): + mask = self.codes[i][loc] == self._get_loc_single_level_index( + self.levels[i], k + ) + if not mask.all(): + loc = loc[mask] + if not len(loc): + raise KeyError(key) + + return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop) + + def get_loc_level(self, key, level=0, drop_level: bool = True): + """ + Get location and sliced index for requested label(s)/level(s). + + Parameters + ---------- + key : label or sequence of labels + level : int/level name or list thereof, optional + drop_level : bool, default True + If ``False``, the resulting index will not drop any level. + + Returns + ------- + loc : A 2-tuple where the elements are: + Element 0: int, slice object or boolean array + Element 1: The resulting sliced multiindex/index. If the key + contains all levels, this will be ``None``. + + See Also + -------- + MultiIndex.get_loc : Get location for a label or a tuple of labels. + MultiIndex.get_locs : Get location for a label/slice/list/mask or a + sequence of such. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')], + ... names=['A', 'B']) + + >>> mi.get_loc_level('b') + (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B')) + + >>> mi.get_loc_level('e', level='B') + (array([False, True, False]), Index(['b'], dtype='object', name='A')) + + >>> mi.get_loc_level(['b', 'e']) + (1, None) + """ + if not isinstance(level, (list, tuple)): + level = self._get_level_number(level) + else: + level = [self._get_level_number(lev) for lev in level] + + loc, mi = self._get_loc_level(key, level=level) + if not drop_level: + if lib.is_integer(loc): + mi = self[loc : loc + 1] + else: + mi = self[loc] + return loc, mi + + def _get_loc_level(self, key, level: int | list[int] = 0): + """ + get_loc_level but with `level` known to be positional, not name-based. + """ + + # different name to distinguish from maybe_droplevels + def maybe_mi_droplevels(indexer, levels): + """ + If level does not exist or all levels were dropped, the exception + has to be handled outside. + """ + new_index = self[indexer] + + for i in sorted(levels, reverse=True): + new_index = new_index._drop_level_numbers([i]) + + return new_index + + if isinstance(level, (tuple, list)): + if len(key) != len(level): + raise AssertionError( + "Key for location must have same length as number of levels" + ) + result = None + for lev, k in zip(level, key): + loc, new_index = self._get_loc_level(k, level=lev) + if isinstance(loc, slice): + mask = np.zeros(len(self), dtype=bool) + mask[loc] = True + loc = mask + result = loc if result is None else result & loc + + try: + # FIXME: we should be only dropping levels on which we are + # scalar-indexing + mi = maybe_mi_droplevels(result, level) + except ValueError: + # droplevel failed because we tried to drop all levels, + # i.e. len(level) == self.nlevels + mi = self[result] + + return result, mi + + # kludge for #1796 + if isinstance(key, list): + key = tuple(key) + + if isinstance(key, tuple) and level == 0: + + try: + # Check if this tuple is a single key in our first level + if key in self.levels[0]: + indexer = self._get_level_indexer(key, level=level) + new_index = maybe_mi_droplevels(indexer, [0]) + return indexer, new_index + except (TypeError, InvalidIndexError): + pass + + if not any(isinstance(k, slice) for k in key): + + if len(key) == self.nlevels and self.is_unique: + # Complete key in unique index -> standard get_loc + try: + return (self._engine.get_loc(key), None) + except KeyError as err: + raise KeyError(key) from err + except TypeError: + # e.g. partial string indexing + # test_partial_string_timestamp_multiindex + pass + + # partial selection + indexer = self.get_loc(key) + ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] + if len(ilevels) == self.nlevels: + if is_integer(indexer): + # we are dropping all levels + return indexer, None + + # TODO: in some cases we still need to drop some levels, + # e.g. test_multiindex_perf_warn + # test_partial_string_timestamp_multiindex + ilevels = [ + i + for i in range(len(key)) + if ( + not isinstance(key[i], str) + or not self.levels[i]._supports_partial_string_indexing + ) + and key[i] != slice(None, None) + ] + if len(ilevels) == self.nlevels: + # TODO: why? + ilevels = [] + return indexer, maybe_mi_droplevels(indexer, ilevels) + + else: + indexer = None + for i, k in enumerate(key): + if not isinstance(k, slice): + loc_level = self._get_level_indexer(k, level=i) + if isinstance(loc_level, slice): + if com.is_null_slice(loc_level) or com.is_full_slice( + loc_level, len(self) + ): + # everything + continue + else: + # e.g. test_xs_IndexSlice_argument_not_implemented + k_index = np.zeros(len(self), dtype=bool) + k_index[loc_level] = True + + else: + k_index = loc_level + + elif com.is_null_slice(k): + # taking everything, does not affect `indexer` below + continue + + else: + # FIXME: this message can be inaccurate, e.g. + # test_series_varied_multiindex_alignment + raise TypeError(f"Expected label or tuple of labels, got {key}") + + if indexer is None: + indexer = k_index + else: + indexer &= k_index + if indexer is None: + indexer = slice(None, None) + ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] + return indexer, maybe_mi_droplevels(indexer, ilevels) + else: + indexer = self._get_level_indexer(key, level=level) + if ( + isinstance(key, str) + and self.levels[level]._supports_partial_string_indexing + ): + # check to see if we did an exact lookup vs sliced + check = self.levels[level].get_loc(key) + if not is_integer(check): + # e.g. test_partial_string_timestamp_multiindex + return indexer, self[indexer] + + try: + result_index = maybe_mi_droplevels(indexer, [level]) + except ValueError: + result_index = self[indexer] + + return indexer, result_index + + def _get_level_indexer( + self, key, level: int = 0, indexer: Int64Index | None = None + ): + # `level` kwarg is _always_ positional, never name + # return an indexer, boolean array or a slice showing where the key is + # in the totality of values + # if the indexer is provided, then use this + + level_index = self.levels[level] + level_codes = self.codes[level] + + def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): + # given the inputs and the codes/indexer, compute an indexer set + # if we have a provided indexer, then this need not consider + # the entire labels set + if step is not None and step < 0: + # Switch elements for negative step size + start, stop = stop - 1, start - 1 + r = np.arange(start, stop, step) + + if indexer is not None and len(indexer) != len(codes): + + # we have an indexer which maps the locations in the labels + # that we have already selected (and is not an indexer for the + # entire set) otherwise this is wasteful so we only need to + # examine locations that are in this set the only magic here is + # that the result are the mappings to the set that we have + # selected + from pandas import Series + + mapper = Series(indexer) + indexer = codes.take(ensure_platform_int(indexer)) + result = Series(Index(indexer).isin(r).nonzero()[0]) + m = result.map(mapper) + # error: Incompatible types in assignment (expression has type + # "ndarray", variable has type "Series") + m = np.asarray(m) # type: ignore[assignment] + + else: + # error: Incompatible types in assignment (expression has type + # "ndarray", variable has type "Series") + m = np.zeros(len(codes), dtype=bool) # type: ignore[assignment] + m[np.in1d(codes, r, assume_unique=Index(codes).is_unique)] = True + + return m + + if isinstance(key, slice): + # handle a slice, returning a slice if we can + # otherwise a boolean indexer + + try: + if key.start is not None: + start = level_index.get_loc(key.start) + else: + start = 0 + if key.stop is not None: + stop = level_index.get_loc(key.stop) + elif isinstance(start, slice): + stop = len(level_index) + else: + stop = len(level_index) - 1 + step = key.step + except KeyError: + + # we have a partial slice (like looking up a partial date + # string) + start = stop = level_index.slice_indexer(key.start, key.stop, key.step) + step = start.step + + if isinstance(start, slice) or isinstance(stop, slice): + # we have a slice for start and/or stop + # a partial date slicer on a DatetimeIndex generates a slice + # note that the stop ALREADY includes the stopped point (if + # it was a string sliced) + start = getattr(start, "start", start) + stop = getattr(stop, "stop", stop) + return convert_indexer(start, stop, step) + + elif level > 0 or self._lexsort_depth == 0 or step is not None: + # need to have like semantics here to right + # searching as when we are using a slice + # so include the stop+1 (so we include stop) + return convert_indexer(start, stop + 1, step) + else: + # sorted, so can return slice object -> view + i = level_codes.searchsorted(start, side="left") + j = level_codes.searchsorted(stop, side="right") + return slice(i, j, step) + + else: + + idx = self._get_loc_single_level_index(level_index, key) + + if level > 0 or self._lexsort_depth == 0: + # Desired level is not sorted + if isinstance(idx, slice): + # test_get_loc_partial_timestamp_multiindex + locs = (level_codes >= idx.start) & (level_codes < idx.stop) + return locs + + locs = np.array(level_codes == idx, dtype=bool, copy=False) + + if not locs.any(): + # The label is present in self.levels[level] but unused: + raise KeyError(key) + return locs + + if isinstance(idx, slice): + # e.g. test_partial_string_timestamp_multiindex + start = level_codes.searchsorted(idx.start, side="left") + # NB: "left" here bc of slice semantics + end = level_codes.searchsorted(idx.stop, side="left") + else: + start = level_codes.searchsorted(idx, side="left") + end = level_codes.searchsorted(idx, side="right") + + if start == end: + # The label is present in self.levels[level] but unused: + raise KeyError(key) + return slice(start, end) + + def get_locs(self, seq): + """ + Get location for a sequence of labels. + + Parameters + ---------- + seq : label, slice, list, mask or a sequence of such + You should use one of the above for each level. + If a level should not be used, set it to ``slice(None)``. + + Returns + ------- + numpy.ndarray + NumPy array of integers suitable for passing to iloc. + + See Also + -------- + MultiIndex.get_loc : Get location for a label or a tuple of labels. + MultiIndex.slice_locs : Get slice location given start label(s) and + end label(s). + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) + + >>> mi.get_locs('b') # doctest: +SKIP + array([1, 2], dtype=int64) + + >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP + array([1, 2], dtype=int64) + + >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP + array([2], dtype=int64) + """ + + # must be lexsorted to at least as many levels + true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s] + if true_slices and true_slices[-1] >= self._lexsort_depth: + raise UnsortedIndexError( + "MultiIndex slicing requires the index to be lexsorted: slicing " + f"on levels {true_slices}, lexsort depth {self._lexsort_depth}" + ) + + n = len(self) + # indexer is the list of all positions that we want to take; we + # start with it being everything and narrow it down as we look at each + # entry in `seq` + indexer = Index(np.arange(n)) + + if any(x is Ellipsis for x in seq): + raise NotImplementedError( + "MultiIndex does not support indexing with Ellipsis" + ) + + def _convert_to_indexer(r) -> Int64Index: + # return an indexer + if isinstance(r, slice): + m = np.zeros(n, dtype=bool) + m[r] = True + r = m.nonzero()[0] + elif com.is_bool_indexer(r): + if len(r) != n: + raise ValueError( + "cannot index with a boolean indexer " + "that is not the same length as the " + "index" + ) + r = r.nonzero()[0] + return Int64Index(r) + + def _update_indexer(idxr: Index, indexer: Index) -> Index: + indexer_intersection = indexer.intersection(idxr) + if indexer_intersection.empty and not idxr.empty and not indexer.empty: + raise KeyError(seq) + return indexer_intersection + + for i, k in enumerate(seq): + + if com.is_bool_indexer(k): + # a boolean indexer, must be the same length! + k = np.asarray(k) + lvl_indexer = _convert_to_indexer(k) + indexer = _update_indexer(lvl_indexer, indexer=indexer) + + elif is_list_like(k): + # a collection of labels to include from this level (these + # are or'd) + + indexers: Int64Index | None = None + + # GH#27591 check if this is a single tuple key in the level + try: + # Argument "indexer" to "_get_level_indexer" of "MultiIndex" + # has incompatible type "Index"; expected "Optional[Int64Index]" + lev_loc = self._get_level_indexer( + k, level=i, indexer=indexer # type: ignore[arg-type] + ) + except (InvalidIndexError, TypeError, KeyError) as err: + # InvalidIndexError e.g. non-hashable, fall back to treating + # this as a sequence of labels + # KeyError it can be ambiguous if this is a label or sequence + # of labels + # github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708 + for x in k: + if not is_hashable(x): + # e.g. slice + raise err + try: + # Argument "indexer" to "_get_level_indexer" of "MultiIndex" + # has incompatible type "Index"; expected + # "Optional[Int64Index]" + item_lvl_indexer = self._get_level_indexer( + x, level=i, indexer=indexer # type: ignore[arg-type] + ) + except KeyError: + # ignore not founds; see discussion in GH#39424 + warnings.warn( + "The behavior of indexing on a MultiIndex with a " + "nested sequence of labels is deprecated and will " + "change in a future version. " + "`series.loc[label, sequence]` will raise if any " + "members of 'sequence' or not present in " + "the index's second level. To retain the old " + "behavior, use `series.index.isin(sequence, level=1)`", + # TODO: how to opt in to the future behavior? + # TODO: how to handle IntervalIndex level? + # (no test cases) + FutureWarning, + stacklevel=find_stack_level(), + ) + continue + else: + idxrs = _convert_to_indexer(item_lvl_indexer) + + if indexers is None: + indexers = idxrs + else: + indexers = indexers.union(idxrs, sort=False) + + else: + idxrs = _convert_to_indexer(lev_loc) + if indexers is None: + indexers = idxrs + else: + indexers = indexers.union(idxrs, sort=False) + + if indexers is not None: + indexer = _update_indexer(indexers, indexer=indexer) + else: + # no matches we are done + # test_loc_getitem_duplicates_multiindex_empty_indexer + return np.array([], dtype=np.intp) + + elif com.is_null_slice(k): + # empty slice + pass + + elif isinstance(k, slice): + + # a slice, include BOTH of the labels + # Argument "indexer" to "_get_level_indexer" of "MultiIndex" has + # incompatible type "Index"; expected "Optional[Int64Index]" + lvl_indexer = self._get_level_indexer( + k, + level=i, + indexer=indexer, # type: ignore[arg-type] + ) + indexer = _update_indexer( + _convert_to_indexer(lvl_indexer), + indexer=indexer, + ) + else: + # a single label + lvl_indexer = self._get_loc_level(k, level=i)[0] + indexer = _update_indexer( + _convert_to_indexer(lvl_indexer), + indexer=indexer, + ) + + # empty indexer + if indexer is None: + return np.array([], dtype=np.intp) + + assert isinstance(indexer, Int64Index), type(indexer) + indexer = self._reorder_indexer(seq, indexer) + + return indexer._values.astype(np.intp, copy=False) + + # -------------------------------------------------------------------- + + def _reorder_indexer( + self, + seq: tuple[Scalar | Iterable | AnyArrayLike, ...], + indexer: Int64Index, + ) -> Int64Index: + """ + Reorder an indexer of a MultiIndex (self) so that the label are in the + same order as given in seq + + Parameters + ---------- + seq : label/slice/list/mask or a sequence of such + indexer: an Int64Index indexer of self + + Returns + ------- + indexer : a sorted Int64Index indexer of self ordered as seq + """ + # If the index is lexsorted and the list_like label in seq are sorted + # then we do not need to sort + if self._is_lexsorted(): + need_sort = False + for i, k in enumerate(seq): + if is_list_like(k): + if not need_sort: + k_codes = self.levels[i].get_indexer(k) + k_codes = k_codes[k_codes >= 0] # Filter absent keys + # True if the given codes are not ordered + need_sort = (k_codes[:-1] > k_codes[1:]).any() + elif isinstance(k, slice) and k.step is not None and k.step < 0: + need_sort = True + # Bail out if both index and seq are sorted + if not need_sort: + return indexer + + n = len(self) + keys: tuple[np.ndarray, ...] = () + # For each level of the sequence in seq, map the level codes with the + # order they appears in a list-like sequence + # This mapping is then use to reorder the indexer + for i, k in enumerate(seq): + if is_scalar(k): + # GH#34603 we want to treat a scalar the same as an all equal list + k = [k] + if com.is_bool_indexer(k): + new_order = np.arange(n)[indexer] + elif is_list_like(k): + # Generate a map with all level codes as sorted initially + k = algos.unique(k) + key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len( + self.levels[i] + ) + # Set order as given in the indexer list + level_indexer = self.levels[i].get_indexer(k) + level_indexer = level_indexer[level_indexer >= 0] # Filter absent keys + key_order_map[level_indexer] = np.arange(len(level_indexer)) + + new_order = key_order_map[self.codes[i][indexer]] + elif isinstance(k, slice) and k.step is not None and k.step < 0: + new_order = np.arange(n)[k][indexer] + elif isinstance(k, slice) and k.start is None and k.stop is None: + # slice(None) should not determine order GH#31330 + new_order = np.ones((n,))[indexer] + else: + # For all other case, use the same order as the level + new_order = np.arange(n)[indexer] + keys = (new_order,) + keys + + # Find the reordering using lexsort on the keys mapping + ind = np.lexsort(keys) + return indexer[ind] + + def truncate(self, before=None, after=None) -> MultiIndex: + """ + Slice index between two labels / tuples, return new MultiIndex + + Parameters + ---------- + before : label or tuple, can be partial. Default None + None defaults to start + after : label or tuple, can be partial. Default None + None defaults to end + + Returns + ------- + truncated : MultiIndex + """ + if after and before and after < before: + raise ValueError("after < before") + + i, j = self.levels[0].slice_locs(before, after) + left, right = self.slice_locs(before, after) + + new_levels = list(self.levels) + new_levels[0] = new_levels[0][i:j] + + new_codes = [level_codes[left:right] for level_codes in self.codes] + new_codes[0] = new_codes[0] - i + + return MultiIndex( + levels=new_levels, + codes=new_codes, + names=self._names, + verify_integrity=False, + ) + + def equals(self, other: object) -> bool: + """ + Determines if two MultiIndex objects have the same labeling information + (the levels themselves do not necessarily have to be the same) + + See Also + -------- + equal_levels + """ + if self.is_(other): + return True + + if not isinstance(other, Index): + return False + + if len(self) != len(other): + return False + + if not isinstance(other, MultiIndex): + # d-level MultiIndex can equal d-tuple Index + if not self._should_compare(other): + # object Index or Categorical[object] may contain tuples + return False + return array_equivalent(self._values, other._values) + + if self.nlevels != other.nlevels: + return False + + for i in range(self.nlevels): + self_codes = self.codes[i] + other_codes = other.codes[i] + self_mask = self_codes == -1 + other_mask = other_codes == -1 + if not np.array_equal(self_mask, other_mask): + return False + self_codes = self_codes[~self_mask] + self_values = self.levels[i]._values.take(self_codes) + + other_codes = other_codes[~other_mask] + other_values = other.levels[i]._values.take(other_codes) + + # since we use NaT both datetime64 and timedelta64 we can have a + # situation where a level is typed say timedelta64 in self (IOW it + # has other values than NaT) but types datetime64 in other (where + # its all NaT) but these are equivalent + if len(self_values) == 0 and len(other_values) == 0: + continue + + if not isinstance(self_values, np.ndarray): + # i.e. ExtensionArray + if not self_values.equals(other_values): + return False + else: + if not array_equivalent(self_values, other_values): + return False + + return True + + def equal_levels(self, other: MultiIndex) -> bool: + """ + Return True if the levels of both MultiIndex objects are the same + + """ + if self.nlevels != other.nlevels: + return False + + for i in range(self.nlevels): + if not self.levels[i].equals(other.levels[i]): + return False + return True + + # -------------------------------------------------------------------- + # Set Methods + + def _union(self, other, sort) -> MultiIndex: + other, result_names = self._convert_can_do_setop(other) + if ( + any(-1 in code for code in self.codes) + and any(-1 in code for code in other.codes) + or self.has_duplicates + or other.has_duplicates + ): + # This is only necessary if both sides have nans or one has dups, + # fast_unique_multiple is faster + result = super()._union(other, sort) + else: + rvals = other._values.astype(object, copy=False) + result = lib.fast_unique_multiple([self._values, rvals], sort=sort) + + return MultiIndex.from_arrays(zip(*result), sortorder=None, names=result_names) + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + return is_object_dtype(dtype) + + def _get_reconciled_name_object(self, other) -> MultiIndex: + """ + If the result of a set operation will be self, + return self, unless the names change, in which + case make a shallow copy of self. + """ + names = self._maybe_match_names(other) + if self.names != names: + # Incompatible return value type (got "Optional[MultiIndex]", expected + # "MultiIndex") + return self.rename(names) # type: ignore[return-value] + return self + + def _maybe_match_names(self, other): + """ + Try to find common names to attach to the result of an operation between + a and b. Return a consensus list of names if they match at least partly + or list of None if they have completely different names. + """ + if len(self.names) != len(other.names): + return [None] * len(self.names) + names = [] + for a_name, b_name in zip(self.names, other.names): + if a_name == b_name: + names.append(a_name) + else: + # TODO: what if they both have np.nan for their names? + names.append(None) + return names + + def _wrap_intersection_result(self, other, result) -> MultiIndex: + _, result_names = self._convert_can_do_setop(other) + + if len(result) == 0: + return MultiIndex( + levels=self.levels, + codes=[[]] * self.nlevels, + names=result_names, + verify_integrity=False, + ) + else: + return MultiIndex.from_arrays(zip(*result), sortorder=0, names=result_names) + + def _wrap_difference_result(self, other, result) -> MultiIndex: + _, result_names = self._convert_can_do_setop(other) + + if len(result) == 0: + return MultiIndex( + levels=[[]] * self.nlevels, + codes=[[]] * self.nlevels, + names=result_names, + verify_integrity=False, + ) + else: + return MultiIndex.from_tuples(result, sortorder=0, names=result_names) + + def _convert_can_do_setop(self, other): + result_names = self.names + + if not isinstance(other, Index): + + if len(other) == 0: + return self[:0], self.names + else: + msg = "other must be a MultiIndex or a list of tuples" + try: + other = MultiIndex.from_tuples(other, names=self.names) + except (ValueError, TypeError) as err: + # ValueError raised by tuples_to_object_array if we + # have non-object dtype + raise TypeError(msg) from err + else: + result_names = get_unanimous_names(self, other) + + return other, result_names + + # -------------------------------------------------------------------- + + @doc(Index.astype) + def astype(self, dtype, copy: bool = True): + dtype = pandas_dtype(dtype) + if is_categorical_dtype(dtype): + msg = "> 1 ndim Categorical are not supported at this time" + raise NotImplementedError(msg) + elif not is_object_dtype(dtype): + raise TypeError( + "Setting a MultiIndex dtype to anything other than object " + "is not supported" + ) + elif copy is True: + return self._view() + return self + + def _validate_fill_value(self, item): + if isinstance(item, MultiIndex): + # GH#43212 + if item.nlevels != self.nlevels: + raise ValueError("Item must have length equal to number of levels.") + return item._values + elif not isinstance(item, tuple): + # Pad the key with empty strings if lower levels of the key + # aren't specified: + item = (item,) + ("",) * (self.nlevels - 1) + elif len(item) != self.nlevels: + raise ValueError("Item must have length equal to number of levels.") + return item + + def insert(self, loc: int, item) -> MultiIndex: + """ + Make new MultiIndex inserting new item at location + + Parameters + ---------- + loc : int + item : tuple + Must be same length as number of levels in the MultiIndex + + Returns + ------- + new_index : Index + """ + item = self._validate_fill_value(item) + + new_levels = [] + new_codes = [] + for k, level, level_codes in zip(item, self.levels, self.codes): + if k not in level: + # have to insert into level + # must insert at end otherwise you have to recompute all the + # other codes + lev_loc = len(level) + level = level.insert(lev_loc, k) + else: + lev_loc = level.get_loc(k) + + new_levels.append(level) + new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc)) + + return MultiIndex( + levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False + ) + + def delete(self, loc) -> MultiIndex: + """ + Make new index with passed location deleted + + Returns + ------- + new_index : MultiIndex + """ + new_codes = [np.delete(level_codes, loc) for level_codes in self.codes] + return MultiIndex( + levels=self.levels, + codes=new_codes, + names=self.names, + verify_integrity=False, + ) + + @doc(Index.isin) + def isin(self, values, level=None) -> npt.NDArray[np.bool_]: + if level is None: + values = MultiIndex.from_tuples(values, names=self.names)._values + return algos.isin(self._values, values) + else: + num = self._get_level_number(level) + levs = self.get_level_values(num) + + if levs.size == 0: + return np.zeros(len(levs), dtype=np.bool_) + return levs.isin(values) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "names"]) + def set_names(self, names, level=None, inplace: bool = False) -> MultiIndex | None: + return super().set_names(names=names, level=level, inplace=inplace) + + rename = set_names + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def drop_duplicates(self, keep: str | bool = "first") -> MultiIndex: + return super().drop_duplicates(keep=keep) + + # --------------------------------------------------------------- + # Arithmetic/Numeric Methods - Disabled + + __add__ = make_invalid_op("__add__") + __radd__ = make_invalid_op("__radd__") + __iadd__ = make_invalid_op("__iadd__") + __sub__ = make_invalid_op("__sub__") + __rsub__ = make_invalid_op("__rsub__") + __isub__ = make_invalid_op("__isub__") + __pow__ = make_invalid_op("__pow__") + __rpow__ = make_invalid_op("__rpow__") + __mul__ = make_invalid_op("__mul__") + __rmul__ = make_invalid_op("__rmul__") + __floordiv__ = make_invalid_op("__floordiv__") + __rfloordiv__ = make_invalid_op("__rfloordiv__") + __truediv__ = make_invalid_op("__truediv__") + __rtruediv__ = make_invalid_op("__rtruediv__") + __mod__ = make_invalid_op("__mod__") + __rmod__ = make_invalid_op("__rmod__") + __divmod__ = make_invalid_op("__divmod__") + __rdivmod__ = make_invalid_op("__rdivmod__") + # Unary methods disabled + __neg__ = make_invalid_op("__neg__") + __pos__ = make_invalid_op("__pos__") + __abs__ = make_invalid_op("__abs__") + __invert__ = make_invalid_op("__invert__") + + +def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int: + """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted.""" + int64_codes = [ensure_int64(level_codes) for level_codes in codes] + for k in range(nlevels, 0, -1): + if libalgos.is_lexsorted(int64_codes[:k]): + return k + return 0 + + +def sparsify_labels(label_list, start: int = 0, sentinel=""): + pivoted = list(zip(*label_list)) + k = len(label_list) + + result = pivoted[: start + 1] + prev = pivoted[start] + + for cur in pivoted[start + 1 :]: + sparse_cur = [] + + for i, (p, t) in enumerate(zip(prev, cur)): + if i == k - 1: + sparse_cur.append(t) + result.append(sparse_cur) + break + + if p == t: + sparse_cur.append(sentinel) + else: + sparse_cur.extend(cur[i:]) + result.append(sparse_cur) + break + + prev = cur + + return list(zip(*result)) + + +def _get_na_rep(dtype) -> str: + return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN") + + +def maybe_droplevels(index: Index, key) -> Index: + """ + Attempt to drop level or levels from the given index. + + Parameters + ---------- + index: Index + key : scalar or tuple + + Returns + ------- + Index + """ + # drop levels + original_index = index + if isinstance(key, tuple): + for _ in key: + try: + index = index._drop_level_numbers([0]) + except ValueError: + # we have dropped too much, so back out + return original_index + else: + try: + index = index._drop_level_numbers([0]) + except ValueError: + pass + + return index + + +def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray: + """ + Coerce the array-like indexer to the smallest integer dtype that can encode all + of the given categories. + + Parameters + ---------- + array_like : array-like + categories : array-like + copy : bool + + Returns + ------- + np.ndarray + Non-writeable. + """ + array_like = coerce_indexer_dtype(array_like, categories) + if copy: + array_like = array_like.copy() + array_like.flags.writeable = False + return array_like + + +def _require_listlike(level, arr, arrname: str): + """ + Ensure that level is either None or listlike, and arr is list-of-listlike. + """ + if level is not None and not is_list_like(level): + if not is_list_like(arr): + raise TypeError(f"{arrname} must be list-like") + if is_list_like(arr[0]): + raise TypeError(f"{arrname} must be list-like") + level = [level] + arr = [arr] + elif level is None or is_list_like(level): + if not is_list_like(arr) or not is_list_like(arr[0]): + raise TypeError(f"{arrname} must be list of lists-like") + return level, arr diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/numeric.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/numeric.py new file mode 100644 index 0000000..5cd4cc9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexes/numeric.py @@ -0,0 +1,488 @@ +from __future__ import annotations + +from typing import ( + Callable, + Hashable, +) +import warnings + +import numpy as np + +from pandas._libs import ( + index as libindex, + lib, +) +from pandas._typing import ( + Dtype, + DtypeObj, + npt, +) +from pandas.util._decorators import ( + cache_readonly, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import astype_nansafe +from pandas.core.dtypes.common import ( + is_dtype_equal, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_integer_dtype, + is_numeric_dtype, + is_scalar, + is_signed_integer_dtype, + is_unsigned_integer_dtype, + needs_i8_conversion, + pandas_dtype, +) +from pandas.core.dtypes.generic import ABCSeries + +from pandas.core.indexes.base import ( + Index, + maybe_extract_name, +) + + +class NumericIndex(Index): + """ + Immutable sequence used for indexing and alignment. The basic object + storing axis labels for all pandas objects. NumericIndex is a special case + of `Index` with purely numpy int/uint/float labels. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype (default: None) + copy : bool + Make a copy of input ndarray. + name : object + Name to be stored in the index. + + Attributes + ---------- + None + + Methods + ---------- + None + + See Also + -------- + Index : The base pandas Index type. + Int64Index : Index of purely int64 labels (deprecated). + UInt64Index : Index of purely uint64 labels (deprecated). + Float64Index : Index of purely float64 labels (deprecated). + + Notes + ----- + An NumericIndex instance can **only** contain numpy int64/32/16/8, uint64/32/16/8 or + float64/32/16 dtype. In particular, ``NumericIndex`` *can not* hold Pandas numeric + dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.). + """ + + _typ = "numericindex" + _values: np.ndarray + _default_dtype: np.dtype | None = None + _dtype_validation_metadata: tuple[Callable[..., bool], str] = ( + is_numeric_dtype, + "numeric type", + ) + _is_numeric_dtype = True + _can_hold_strings = False + _is_backward_compat_public_numeric_index: bool = True + + # error: Signature of "_can_hold_na" incompatible with supertype "Index" + @cache_readonly + def _can_hold_na(self) -> bool: # type: ignore[override] + if is_float_dtype(self.dtype): + return True + else: + return False + + _engine_types: dict[np.dtype, type[libindex.IndexEngine]] = { + np.dtype(np.int8): libindex.Int8Engine, + np.dtype(np.int16): libindex.Int16Engine, + np.dtype(np.int32): libindex.Int32Engine, + np.dtype(np.int64): libindex.Int64Engine, + np.dtype(np.uint8): libindex.UInt8Engine, + np.dtype(np.uint16): libindex.UInt16Engine, + np.dtype(np.uint32): libindex.UInt32Engine, + np.dtype(np.uint64): libindex.UInt64Engine, + np.dtype(np.float32): libindex.Float32Engine, + np.dtype(np.float64): libindex.Float64Engine, + } + + @property + def _engine_type(self): + # error: Invalid index type "Union[dtype[Any], ExtensionDtype]" for + # "Dict[dtype[Any], Type[IndexEngine]]"; expected type "dtype[Any]" + return self._engine_types[self.dtype] # type: ignore[index] + + @cache_readonly + def inferred_type(self) -> str: + return { + "i": "integer", + "u": "integer", + "f": "floating", + }[self.dtype.kind] + + def __new__(cls, data=None, dtype: Dtype | None = None, copy=False, name=None): + name = maybe_extract_name(name, data, cls) + + subarr = cls._ensure_array(data, dtype, copy) + return cls._simple_new(subarr, name=name) + + @classmethod + def _ensure_array(cls, data, dtype, copy: bool): + """ + Ensure we have a valid array to pass to _simple_new. + """ + cls._validate_dtype(dtype) + + if not isinstance(data, (np.ndarray, Index)): + # Coerce to ndarray if not already ndarray or Index + if is_scalar(data): + raise cls._scalar_data_error(data) + + # other iterable of some kind + if not isinstance(data, (ABCSeries, list, tuple)): + data = list(data) + + orig = data + data = np.asarray(data, dtype=dtype) + if dtype is None and data.dtype.kind == "f": + if cls is UInt64Index and (data >= 0).all(): + # https://github.com/numpy/numpy/issues/19146 + data = np.asarray(orig, dtype=np.uint64) + + if issubclass(data.dtype.type, str): + cls._string_data_error(data) + + dtype = cls._ensure_dtype(dtype) + + if copy or not is_dtype_equal(data.dtype, dtype): + # TODO: the try/except below is because it's difficult to predict the error + # and/or error message from different combinations of data and dtype. + # Efforts to avoid this try/except welcome. + # See https://github.com/pandas-dev/pandas/pull/41153#discussion_r676206222 + try: + subarr = np.array(data, dtype=dtype, copy=copy) + cls._validate_dtype(subarr.dtype) + except (TypeError, ValueError): + raise ValueError(f"data is not compatible with {cls.__name__}") + cls._assert_safe_casting(data, subarr) + else: + subarr = data + + if subarr.ndim > 1: + # GH#13601, GH#20285, GH#27125 + raise ValueError("Index data must be 1-dimensional") + + subarr = np.asarray(subarr) + return subarr + + @classmethod + def _validate_dtype(cls, dtype: Dtype | None) -> None: + if dtype is None: + return + + validation_func, expected = cls._dtype_validation_metadata + if not validation_func(dtype): + raise ValueError( + f"Incorrect `dtype` passed: expected {expected}, received {dtype}" + ) + + @classmethod + def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: + """ + Ensure int64 dtype for Int64Index etc. but allow int32 etc. for NumericIndex. + + Assumes dtype has already been validated. + """ + if dtype is None: + return cls._default_dtype + + dtype = pandas_dtype(dtype) + assert isinstance(dtype, np.dtype) + + if cls._is_backward_compat_public_numeric_index: + # dtype for NumericIndex + return dtype + else: + # dtype for Int64Index, UInt64Index etc. Needed for backwards compat. + return cls._default_dtype + + def __contains__(self, key) -> bool: + """ + Check if key is a float and has a decimal. If it has, return False. + """ + if not is_integer_dtype(self.dtype): + return super().__contains__(key) + + hash(key) + try: + if is_float(key) and int(key) != key: + # otherwise the `key in self._engine` check casts e.g. 1.1 -> 1 + return False + return key in self._engine + except (OverflowError, TypeError, ValueError): + return False + + @doc(Index.astype) + def astype(self, dtype, copy: bool = True): + dtype = pandas_dtype(dtype) + if is_float_dtype(self.dtype): + if needs_i8_conversion(dtype): + raise TypeError( + f"Cannot convert Float64Index to dtype {dtype}; integer " + "values are required for conversion" + ) + elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype): + # TODO(ExtensionIndex); this can change once we have an EA Index type + # GH 13149 + arr = astype_nansafe(self._values, dtype=dtype) + if isinstance(self, Float64Index): + return Int64Index(arr, name=self.name) + else: + return NumericIndex(arr, name=self.name, dtype=dtype) + elif self._is_backward_compat_public_numeric_index: + # this block is needed so e.g. NumericIndex[int8].astype("int32") returns + # NumericIndex[int32] and not Int64Index with dtype int64. + # When Int64Index etc. are removed from the code base, removed this also. + if not is_extension_array_dtype(dtype) and is_numeric_dtype(dtype): + return self._constructor(self, dtype=dtype, copy=copy) + + return super().astype(dtype, copy=copy) + + # ---------------------------------------------------------------- + # Indexing Methods + + # error: Decorated property not supported + @cache_readonly # type: ignore[misc] + @doc(Index._should_fallback_to_positional) + def _should_fallback_to_positional(self) -> bool: + return False + + @doc(Index._convert_slice_indexer) + def _convert_slice_indexer(self, key: slice, kind: str): + if is_float_dtype(self.dtype): + assert kind in ["loc", "getitem"] + + # We always treat __getitem__ slicing as label-based + # translate to locations + return self.slice_indexer(key.start, key.stop, key.step) + + return super()._convert_slice_indexer(key, kind=kind) + + @doc(Index._maybe_cast_slice_bound) + def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): + assert kind in ["loc", "getitem", None, lib.no_default] + self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound") + + # we will try to coerce to integers + return self._maybe_cast_indexer(label) + + # ---------------------------------------------------------------- + + @doc(Index._shallow_copy) + def _shallow_copy(self, values, name: Hashable = lib.no_default): + if not self._can_hold_na and values.dtype.kind == "f": + name = self._name if name is lib.no_default else name + # Ensure we are not returning an Int64Index with float data: + return Float64Index._simple_new(values, name=name) + return super()._shallow_copy(values=values, name=name) + + def _convert_tolerance(self, tolerance, target): + tolerance = super()._convert_tolerance(tolerance, target) + + if not np.issubdtype(tolerance.dtype, np.number): + if tolerance.ndim > 0: + raise ValueError( + f"tolerance argument for {type(self).__name__} must contain " + "numeric elements if it is list type" + ) + else: + raise ValueError( + f"tolerance argument for {type(self).__name__} must be numeric " + f"if it is a scalar: {repr(tolerance)}" + ) + return tolerance + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + # If we ever have BoolIndex or ComplexIndex, this may need to be tightened + return is_numeric_dtype(dtype) + + @classmethod + def _assert_safe_casting(cls, data: np.ndarray, subarr: np.ndarray) -> None: + """ + Ensure incoming data can be represented with matching signed-ness. + + Needed if the process of casting data from some accepted dtype to the internal + dtype(s) bears the risk of truncation (e.g. float to int). + """ + if is_integer_dtype(subarr.dtype): + if not np.array_equal(data, subarr): + raise TypeError("Unsafe NumPy casting, you must explicitly cast") + + @property + def _is_all_dates(self) -> bool: + """ + Checks that all the labels are datetime objects. + """ + return False + + def _format_native_types( + self, *, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs + ): + from pandas.io.formats.format import FloatArrayFormatter + + if is_float_dtype(self.dtype): + formatter = FloatArrayFormatter( + self._values, + na_rep=na_rep, + float_format=float_format, + decimal=decimal, + quoting=quoting, + fixed_width=False, + ) + return formatter.get_result_as_array() + + return super()._format_native_types( + na_rep=na_rep, + float_format=float_format, + decimal=decimal, + quoting=quoting, + **kwargs, + ) + + +_num_index_shared_docs = {} + + +_num_index_shared_docs[ + "class_descr" +] = """ + Immutable sequence used for indexing and alignment. The basic object + storing axis labels for all pandas objects. %(klass)s is a special case + of `Index` with purely %(ltype)s labels. %(extra)s. + + .. deprecated:: 1.4.0 + In pandas v2.0 %(klass)s will be removed and :class:`NumericIndex` used instead. + %(klass)s will remain fully functional for the duration of pandas 1.x. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype (default: %(dtype)s) + copy : bool + Make a copy of input ndarray. + name : object + Name to be stored in the index. + + Attributes + ---------- + None + + Methods + ---------- + None + + See Also + -------- + Index : The base pandas Index type. + NumericIndex : Index of numpy int/uint/float data. + + Notes + ----- + An Index instance can **only** contain hashable objects. +""" + + +class IntegerIndex(NumericIndex): + """ + This is an abstract class for Int64Index, UInt64Index. + """ + + _is_backward_compat_public_numeric_index: bool = False + + @property + def asi8(self) -> npt.NDArray[np.int64]: + # do not cache or you'll create a memory leak + warnings.warn( + "Index.asi8 is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._values.view(self._default_dtype) + + def _validate_fill_value(self, value): + # e.g. np.array([1.0]) we want np.array([1], dtype=self.dtype) + # see TestSetitemFloatNDarrayIntoIntegerSeries + super()._validate_fill_value(value) + if hasattr(value, "dtype") and is_float_dtype(value.dtype): + converted = value.astype(self.dtype) + if (converted == value).all(): + # See also: can_hold_element + return converted + raise TypeError + return value + + +class Int64Index(IntegerIndex): + _index_descr_args = { + "klass": "Int64Index", + "ltype": "integer", + "dtype": "int64", + "extra": "", + } + __doc__ = _num_index_shared_docs["class_descr"] % _index_descr_args + + _typ = "int64index" + _engine_type = libindex.Int64Engine + _default_dtype = np.dtype(np.int64) + _dtype_validation_metadata = (is_signed_integer_dtype, "signed integer") + + +class UInt64Index(IntegerIndex): + _index_descr_args = { + "klass": "UInt64Index", + "ltype": "unsigned integer", + "dtype": "uint64", + "extra": "", + } + __doc__ = _num_index_shared_docs["class_descr"] % _index_descr_args + + _typ = "uint64index" + _engine_type = libindex.UInt64Engine + _default_dtype = np.dtype(np.uint64) + _dtype_validation_metadata = (is_unsigned_integer_dtype, "unsigned integer") + + def _validate_fill_value(self, value): + # e.g. np.array([1]) we want np.array([1], dtype=np.uint64) + # see test_where_uin64 + super()._validate_fill_value(value) + if hasattr(value, "dtype") and is_signed_integer_dtype(value.dtype): + if (value >= 0).all(): + return value.astype(self.dtype) + raise TypeError + return value + + +class Float64Index(NumericIndex): + _index_descr_args = { + "klass": "Float64Index", + "dtype": "float64", + "ltype": "float", + "extra": "", + } + __doc__ = _num_index_shared_docs["class_descr"] % _index_descr_args + + _typ = "float64index" + _engine_type = libindex.Float64Engine + _default_dtype = np.dtype(np.float64) + _dtype_validation_metadata = (is_float_dtype, "float") + _is_backward_compat_public_numeric_index: bool = False diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/period.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/period.py new file mode 100644 index 0000000..aba834a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexes/period.py @@ -0,0 +1,564 @@ +from __future__ import annotations + +from datetime import ( + datetime, + timedelta, +) +from typing import Hashable +import warnings + +import numpy as np + +from pandas._libs import ( + index as libindex, + lib, +) +from pandas._libs.tslibs import ( + BaseOffset, + NaT, + Period, + Resolution, + Tick, +) +from pandas._typing import ( + Dtype, + DtypeObj, +) +from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_datetime64_any_dtype, + is_integer, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import PeriodDtype +from pandas.core.dtypes.missing import is_valid_na_for_dtype + +from pandas.core.arrays.period import ( + PeriodArray, + period_array, + raise_on_incompatible, + validate_dtype_freq, +) +import pandas.core.common as com +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import maybe_extract_name +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin +from pandas.core.indexes.datetimes import ( + DatetimeIndex, + Index, +) +from pandas.core.indexes.extension import inherit_names +from pandas.core.indexes.numeric import Int64Index + +_index_doc_kwargs = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"}) +_shared_doc_kwargs = { + "klass": "PeriodArray", +} + +# --- Period index sketch + + +def _new_PeriodIndex(cls, **d): + # GH13277 for unpickling + values = d.pop("data") + if values.dtype == "int64": + freq = d.pop("freq", None) + values = PeriodArray(values, freq=freq) + return cls._simple_new(values, **d) + else: + return cls(values, **d) + + +@inherit_names( + ["strftime", "start_time", "end_time"] + PeriodArray._field_ops, + PeriodArray, + wrap=True, +) +@inherit_names(["is_leap_year", "_format_native_types"], PeriodArray) +class PeriodIndex(DatetimeIndexOpsMixin): + """ + Immutable ndarray holding ordinal values indicating regular periods in time. + + Index keys are boxed to Period objects which carries the metadata (eg, + frequency information). + + Parameters + ---------- + data : array-like (1d int np.ndarray or PeriodArray), optional + Optional period-like data to construct index with. + copy : bool + Make a copy of input ndarray. + freq : str or period object, optional + One of pandas period strings or corresponding objects. + year : int, array, or Series, default None + month : int, array, or Series, default None + quarter : int, array, or Series, default None + day : int, array, or Series, default None + hour : int, array, or Series, default None + minute : int, array, or Series, default None + second : int, array, or Series, default None + dtype : str or PeriodDtype, default None + + Attributes + ---------- + day + dayofweek + day_of_week + dayofyear + day_of_year + days_in_month + daysinmonth + end_time + freq + freqstr + hour + is_leap_year + minute + month + quarter + qyear + second + start_time + week + weekday + weekofyear + year + + Methods + ------- + asfreq + strftime + to_timestamp + + See Also + -------- + Index : The base pandas Index type. + Period : Represents a period of time. + DatetimeIndex : Index with datetime64 data. + TimedeltaIndex : Index of timedelta64 data. + period_range : Create a fixed-frequency PeriodIndex. + + Examples + -------- + >>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3]) + >>> idx + PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]') + """ + + _typ = "periodindex" + + _data: PeriodArray + freq: BaseOffset + dtype: PeriodDtype + + _data_cls = PeriodArray + _engine_type = libindex.PeriodEngine + _supports_partial_string_indexing = True + + # -------------------------------------------------------------------- + # methods that dispatch to array and wrap result in Index + # These are defined here instead of via inherit_names for mypy + + @doc( + PeriodArray.asfreq, + other="pandas.arrays.PeriodArray", + other_name="PeriodArray", + **_shared_doc_kwargs, + ) + def asfreq(self, freq=None, how: str = "E") -> PeriodIndex: + arr = self._data.asfreq(freq, how) + return type(self)._simple_new(arr, name=self.name) + + @doc(PeriodArray.to_timestamp) + def to_timestamp(self, freq=None, how="start") -> DatetimeIndex: + arr = self._data.to_timestamp(freq, how) + return DatetimeIndex._simple_new(arr, name=self.name) + + # https://github.com/python/mypy/issues/1362 + # error: Decorated property not supported + @property # type:ignore[misc] + @doc(PeriodArray.hour.fget) + def hour(self) -> Int64Index: + return Int64Index(self._data.hour, name=self.name) + + # https://github.com/python/mypy/issues/1362 + # error: Decorated property not supported + @property # type:ignore[misc] + @doc(PeriodArray.minute.fget) + def minute(self) -> Int64Index: + return Int64Index(self._data.minute, name=self.name) + + # https://github.com/python/mypy/issues/1362 + # error: Decorated property not supported + @property # type:ignore[misc] + @doc(PeriodArray.second.fget) + def second(self) -> Int64Index: + return Int64Index(self._data.second, name=self.name) + + # ------------------------------------------------------------------------ + # Index Constructors + + def __new__( + cls, + data=None, + ordinal=None, + freq=None, + dtype: Dtype | None = None, + copy: bool = False, + name: Hashable = None, + **fields, + ) -> PeriodIndex: + + valid_field_set = { + "year", + "month", + "day", + "quarter", + "hour", + "minute", + "second", + } + + if not set(fields).issubset(valid_field_set): + argument = list(set(fields) - valid_field_set)[0] + raise TypeError(f"__new__() got an unexpected keyword argument {argument}") + + name = maybe_extract_name(name, data, cls) + + if data is None and ordinal is None: + # range-based. + if not fields: + # test_pickle_compat_construction + raise cls._scalar_data_error(None) + + data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields) + # PeriodArray._generate range does validation that fields is + # empty when really using the range-based constructor. + freq = freq2 + + data = PeriodArray(data, freq=freq) + else: + freq = validate_dtype_freq(dtype, freq) + + # PeriodIndex allow PeriodIndex(period_index, freq=different) + # Let's not encourage that kind of behavior in PeriodArray. + + if freq and isinstance(data, cls) and data.freq != freq: + # TODO: We can do some of these with no-copy / coercion? + # e.g. D -> 2D seems to be OK + data = data.asfreq(freq) + + if data is None and ordinal is not None: + # we strangely ignore `ordinal` if data is passed. + ordinal = np.asarray(ordinal, dtype=np.int64) + data = PeriodArray(ordinal, freq=freq) + else: + # don't pass copy here, since we copy later. + data = period_array(data=data, freq=freq) + + if copy: + data = data.copy() + + return cls._simple_new(data, name=name) + + # ------------------------------------------------------------------------ + # Data + + @property + def values(self) -> np.ndarray: + return np.asarray(self, dtype=object) + + def _maybe_convert_timedelta(self, other): + """ + Convert timedelta-like input to an integer multiple of self.freq + + Parameters + ---------- + other : timedelta, np.timedelta64, DateOffset, int, np.ndarray + + Returns + ------- + converted : int, np.ndarray[int64] + + Raises + ------ + IncompatibleFrequency : if the input cannot be written as a multiple + of self.freq. Note IncompatibleFrequency subclasses ValueError. + """ + if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)): + if isinstance(self.freq, Tick): + # _check_timedeltalike_freq_compat will raise if incompatible + delta = self._data._check_timedeltalike_freq_compat(other) + return delta + elif isinstance(other, BaseOffset): + if other.base == self.freq.base: + return other.n + + raise raise_on_incompatible(self, other) + elif is_integer(other): + # integer is passed to .shift via + # _add_datetimelike_methods basically + # but ufunc may pass integer to _add_delta + return other + + # raise when input doesn't have freq + raise raise_on_incompatible(self, None) + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + """ + Can we compare values of the given dtype to our own? + """ + if not isinstance(dtype, PeriodDtype): + return False + # For the subset of DateOffsets that can be a dtype.freq, it + # suffices (and is much faster) to compare the dtype_code rather than + # the freq itself. + # See also: PeriodDtype.__eq__ + freq = dtype.freq + own_freq = self.freq + return ( + freq._period_dtype_code == own_freq._period_dtype_code + and freq.n == own_freq.n + ) + + # ------------------------------------------------------------------------ + # Index Methods + + def asof_locs(self, where: Index, mask: np.ndarray) -> np.ndarray: + """ + where : array of timestamps + mask : np.ndarray[bool] + Array of booleans where data is not NA. + """ + if isinstance(where, DatetimeIndex): + where = PeriodIndex(where._values, freq=self.freq) + elif not isinstance(where, PeriodIndex): + raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex") + + return super().asof_locs(where, mask) + + @doc(Index.astype) + def astype(self, dtype, copy: bool = True, how=lib.no_default): + dtype = pandas_dtype(dtype) + + if how is not lib.no_default: + # GH#37982 + warnings.warn( + "The 'how' keyword in PeriodIndex.astype is deprecated and " + "will be removed in a future version. " + "Use index.to_timestamp(how=how) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + how = "start" + + if is_datetime64_any_dtype(dtype): + # 'how' is index-specific, isn't part of the EA interface. + # GH#45038 implement this for PeriodArray (but without "how") + # once the "how" deprecation is enforced we can just dispatch + # directly to PeriodArray. + tz = getattr(dtype, "tz", None) + return self.to_timestamp(how=how).tz_localize(tz) + + return super().astype(dtype, copy=copy) + + @property + def is_full(self) -> bool: + """ + Returns True if this PeriodIndex is range-like in that all Periods + between start and end are present, in order. + """ + if len(self) == 0: + return True + if not self.is_monotonic_increasing: + raise ValueError("Index is not monotonic") + values = self.asi8 + return ((values[1:] - values[:-1]) < 2).all() + + @property + def inferred_type(self) -> str: + # b/c data is represented as ints make sure we can't have ambiguous + # indexing + return "period" + + # ------------------------------------------------------------------------ + # Indexing Methods + + def _convert_tolerance(self, tolerance, target): + # Returned tolerance must be in dtype/units so that + # `|self._get_engine_target() - target._engine_target()| <= tolerance` + # is meaningful. Since PeriodIndex returns int64 for engine_target, + # we may need to convert timedelta64 tolerance to int64. + tolerance = super()._convert_tolerance(tolerance, target) + + if self.dtype == target.dtype: + # convert tolerance to i8 + tolerance = self._maybe_convert_timedelta(tolerance) + + return tolerance + + def get_loc(self, key, method=None, tolerance=None): + """ + Get integer location for requested label. + + Parameters + ---------- + key : Period, NaT, str, or datetime + String or datetime key must be parsable as Period. + + Returns + ------- + loc : int or ndarray[int64] + + Raises + ------ + KeyError + Key is not present in the index. + TypeError + If key is listlike or otherwise not hashable. + """ + orig_key = key + + self._check_indexing_error(key) + + if is_valid_na_for_dtype(key, self.dtype): + key = NaT + + elif isinstance(key, str): + + try: + parsed, reso = self._parse_with_reso(key) + except ValueError as err: + # A string with invalid format + raise KeyError(f"Cannot interpret '{key}' as period") from err + + if self._can_partial_date_slice(reso): + try: + return self._partial_date_slice(reso, parsed) + except KeyError as err: + # TODO: pass if method is not None, like DTI does? + raise KeyError(key) from err + + if reso == self.dtype.resolution: + # the reso < self.dtype.resolution case goes through _get_string_slice + key = Period(parsed, freq=self.freq) + loc = self.get_loc(key, method=method, tolerance=tolerance) + # Recursing instead of falling through matters for the exception + # message in test_get_loc3 (though not clear if that really matters) + return loc + elif method is None: + raise KeyError(key) + else: + key = Period(parsed, freq=self.freq) + + elif isinstance(key, Period): + sfreq = self.freq + kfreq = key.freq + if not ( + sfreq.n == kfreq.n + and sfreq._period_dtype_code == kfreq._period_dtype_code + ): + # GH#42247 For the subset of DateOffsets that can be Period freqs, + # checking these two attributes is sufficient to check equality, + # and much more performant than `self.freq == key.freq` + raise KeyError(key) + elif isinstance(key, datetime): + try: + key = Period(key, freq=self.freq) + except ValueError as err: + # we cannot construct the Period + raise KeyError(orig_key) from err + else: + # in particular integer, which Period constructor would cast to string + raise KeyError(key) + + try: + return Index.get_loc(self, key, method, tolerance) + except KeyError as err: + raise KeyError(orig_key) from err + + @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound) + def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): + if isinstance(label, datetime): + label = Period(label, freq=self.freq) + + return super()._maybe_cast_slice_bound(label, side, kind=kind) + + def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): + grp = reso.freq_group + iv = Period(parsed, freq=grp.value) + return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end")) + + def _can_partial_date_slice(self, reso: Resolution) -> bool: + assert isinstance(reso, Resolution), (type(reso), reso) + # e.g. test_getitem_setitem_periodindex + return reso > self.dtype.resolution + + +def period_range( + start=None, end=None, periods: int | None = None, freq=None, name=None +) -> PeriodIndex: + """ + Return a fixed frequency PeriodIndex. + + The day (calendar) is the default frequency. + + Parameters + ---------- + start : str or period-like, default None + Left bound for generating periods. + end : str or period-like, default None + Right bound for generating periods. + periods : int, default None + Number of periods to generate. + freq : str or DateOffset, optional + Frequency alias. By default the freq is taken from `start` or `end` + if those are Period objects. Otherwise, the default is ``"D"`` for + daily frequency. + name : str, default None + Name of the resulting PeriodIndex. + + Returns + ------- + PeriodIndex + + Notes + ----- + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. + + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') + PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', + '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', + '2018-01'], + dtype='period[M]') + + If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor + endpoints for a ``PeriodIndex`` with frequency matching that of the + ``period_range`` constructor. + + >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), + ... end=pd.Period('2017Q2', freq='Q'), freq='M') + PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], + dtype='period[M]') + """ + if com.count_not_none(start, end, periods) != 2: + raise ValueError( + "Of the three parameters: start, end, and periods, " + "exactly two must be specified" + ) + if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)): + freq = "D" + + data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={}) + data = PeriodArray(data, freq=freq) + return PeriodIndex(data, name=name) diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/range.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/range.py new file mode 100644 index 0000000..fdb1ee7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexes/range.py @@ -0,0 +1,1057 @@ +from __future__ import annotations + +from datetime import timedelta +import operator +from sys import getsizeof +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, + List, + cast, +) +import warnings + +import numpy as np + +from pandas._libs import ( + index as libindex, + lib, +) +from pandas._libs.lib import no_default +from pandas._typing import ( + Dtype, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.util._decorators import ( + cache_readonly, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + ensure_platform_int, + ensure_python_int, + is_float, + is_integer, + is_scalar, + is_signed_integer_dtype, + is_timedelta64_dtype, +) +from pandas.core.dtypes.generic import ABCTimedeltaIndex + +from pandas.core import ops +import pandas.core.common as com +from pandas.core.construction import extract_array +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import maybe_extract_name +from pandas.core.indexes.numeric import ( + Float64Index, + Int64Index, + NumericIndex, +) +from pandas.core.ops.common import unpack_zerodim_and_defer + +if TYPE_CHECKING: + from pandas import Index + +_empty_range = range(0) + + +class RangeIndex(NumericIndex): + """ + Immutable Index implementing a monotonic integer range. + + RangeIndex is a memory-saving special case of Int64Index limited to + representing monotonic ranges. Using RangeIndex may in some instances + improve computing speed. + + This is the default index type used + by DataFrame and Series when no explicit index is provided by the user. + + Parameters + ---------- + start : int (default: 0), range, or other RangeIndex instance + If int and "stop" is not given, interpreted as "stop" instead. + stop : int (default: 0) + step : int (default: 1) + dtype : np.int64 + Unused, accepted for homogeneity with other index types. + copy : bool, default False + Unused, accepted for homogeneity with other index types. + name : object, optional + Name to be stored in the index. + + Attributes + ---------- + start + stop + step + + Methods + ------- + from_range + + See Also + -------- + Index : The base pandas Index type. + Int64Index : Index of int64 data. + """ + + _typ = "rangeindex" + _engine_type = libindex.Int64Engine + _dtype_validation_metadata = (is_signed_integer_dtype, "signed integer") + _range: range + _is_backward_compat_public_numeric_index: bool = False + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + start=None, + stop=None, + step=None, + dtype: Dtype | None = None, + copy: bool = False, + name: Hashable = None, + ) -> RangeIndex: + cls._validate_dtype(dtype) + name = maybe_extract_name(name, start, cls) + + # RangeIndex + if isinstance(start, RangeIndex): + return start.copy(name=name) + elif isinstance(start, range): + return cls._simple_new(start, name=name) + + # validate the arguments + if com.all_none(start, stop, step): + raise TypeError("RangeIndex(...) must be called with integers") + + start = ensure_python_int(start) if start is not None else 0 + + if stop is None: + start, stop = 0, start + else: + stop = ensure_python_int(stop) + + step = ensure_python_int(step) if step is not None else 1 + if step == 0: + raise ValueError("Step must not be zero") + + rng = range(start, stop, step) + return cls._simple_new(rng, name=name) + + @classmethod + def from_range( + cls, data: range, name=None, dtype: Dtype | None = None + ) -> RangeIndex: + """ + Create RangeIndex from a range object. + + Returns + ------- + RangeIndex + """ + if not isinstance(data, range): + raise TypeError( + f"{cls.__name__}(...) must be called with object coercible to a " + f"range, {repr(data)} was passed" + ) + cls._validate_dtype(dtype) + return cls._simple_new(data, name=name) + + @classmethod + def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex: + result = object.__new__(cls) + + assert isinstance(values, range) + + result._range = values + result._name = name + result._cache = {} + result._reset_identity() + return result + + # -------------------------------------------------------------------- + + # error: Return type "Type[Int64Index]" of "_constructor" incompatible with return + # type "Type[RangeIndex]" in supertype "Index" + @cache_readonly + def _constructor(self) -> type[Int64Index]: # type: ignore[override] + """return the class to use for construction""" + return Int64Index + + # error: Signature of "_data" incompatible with supertype "Index" + @cache_readonly + def _data(self) -> np.ndarray: # type: ignore[override] + """ + An int array that for performance reasons is created only when needed. + + The constructed array is saved in ``_cache``. + """ + return np.arange(self.start, self.stop, self.step, dtype=np.int64) + + def _get_data_as_items(self): + """return a list of tuples of start, stop, step""" + rng = self._range + return [("start", rng.start), ("stop", rng.stop), ("step", rng.step)] + + def __reduce__(self): + d = {"name": self.name} + d.update(dict(self._get_data_as_items())) + return ibase._new_Index, (type(self), d), None + + # -------------------------------------------------------------------- + # Rendering Methods + + def _format_attrs(self): + """ + Return a list of tuples of the (attr, formatted_value) + """ + attrs = self._get_data_as_items() + if self.name is not None: + attrs.append(("name", ibase.default_pprint(self.name))) + return attrs + + def _format_data(self, name=None): + # we are formatting thru the attributes + return None + + def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: + # Equivalent to Index implementation, but faster + if not len(self._range): + return header + first_val_str = str(self._range[0]) + last_val_str = str(self._range[-1]) + max_length = max(len(first_val_str), len(last_val_str)) + + return header + [f"{x:<{max_length}}" for x in self._range] + + # -------------------------------------------------------------------- + _deprecation_message = ( + "RangeIndex.{} is deprecated and will be " + "removed in a future version. Use RangeIndex.{} " + "instead" + ) + + @property + def start(self) -> int: + """ + The value of the `start` parameter (``0`` if this was not supplied). + """ + # GH 25710 + return self._range.start + + @property + def _start(self) -> int: + """ + The value of the `start` parameter (``0`` if this was not supplied). + + .. deprecated:: 0.25.0 + Use ``start`` instead. + """ + warnings.warn( + self._deprecation_message.format("_start", "start"), + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.start + + @property + def stop(self) -> int: + """ + The value of the `stop` parameter. + """ + return self._range.stop + + @property + def _stop(self) -> int: + """ + The value of the `stop` parameter. + + .. deprecated:: 0.25.0 + Use ``stop`` instead. + """ + # GH 25710 + warnings.warn( + self._deprecation_message.format("_stop", "stop"), + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.stop + + @property + def step(self) -> int: + """ + The value of the `step` parameter (``1`` if this was not supplied). + """ + # GH 25710 + return self._range.step + + @property + def _step(self) -> int: + """ + The value of the `step` parameter (``1`` if this was not supplied). + + .. deprecated:: 0.25.0 + Use ``step`` instead. + """ + # GH 25710 + warnings.warn( + self._deprecation_message.format("_step", "step"), + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.step + + @cache_readonly + def nbytes(self) -> int: + """ + Return the number of bytes in the underlying data. + """ + rng = self._range + return getsizeof(rng) + sum( + getsizeof(getattr(rng, attr_name)) + for attr_name in ["start", "stop", "step"] + ) + + def memory_usage(self, deep: bool = False) -> int: + """ + Memory usage of my values + + Parameters + ---------- + deep : bool + Introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption + + Returns + ------- + bytes used + + Notes + ----- + Memory usage does not include memory consumed by elements that + are not components of the array if deep=False + + See Also + -------- + numpy.ndarray.nbytes + """ + return self.nbytes + + @property + def dtype(self) -> np.dtype: + return np.dtype(np.int64) + + @property + def is_unique(self) -> bool: + """return if the index has unique values""" + return True + + @cache_readonly + def is_monotonic_increasing(self) -> bool: + return self._range.step > 0 or len(self) <= 1 + + @cache_readonly + def is_monotonic_decreasing(self) -> bool: + return self._range.step < 0 or len(self) <= 1 + + def __contains__(self, key: Any) -> bool: + hash(key) + try: + key = ensure_python_int(key) + except TypeError: + return False + return key in self._range + + @property + def inferred_type(self) -> str: + return "integer" + + # -------------------------------------------------------------------- + # Indexing Methods + + @doc(Int64Index.get_loc) + def get_loc(self, key, method=None, tolerance=None): + if method is None and tolerance is None: + if is_integer(key) or (is_float(key) and key.is_integer()): + new_key = int(key) + try: + return self._range.index(new_key) + except ValueError as err: + raise KeyError(key) from err + self._check_indexing_error(key) + raise KeyError(key) + return super().get_loc(key, method=method, tolerance=tolerance) + + def _get_indexer( + self, + target: Index, + method: str | None = None, + limit: int | None = None, + tolerance=None, + ) -> npt.NDArray[np.intp]: + if com.any_not_none(method, tolerance, limit): + return super()._get_indexer( + target, method=method, tolerance=tolerance, limit=limit + ) + + if self.step > 0: + start, stop, step = self.start, self.stop, self.step + else: + # GH 28678: work on reversed range for simplicity + reverse = self._range[::-1] + start, stop, step = reverse.start, reverse.stop, reverse.step + + target_array = np.asarray(target) + locs = target_array - start + valid = (locs % step == 0) & (locs >= 0) & (target_array < stop) + locs[~valid] = -1 + locs[valid] = locs[valid] / step + + if step != self.step: + # We reversed this range: transform to original locs + locs[valid] = len(self) - 1 - locs[valid] + return ensure_platform_int(locs) + + # -------------------------------------------------------------------- + + def tolist(self) -> list[int]: + return list(self._range) + + @doc(Int64Index.__iter__) + def __iter__(self): + yield from self._range + + @doc(Int64Index._shallow_copy) + def _shallow_copy(self, values, name: Hashable = no_default): + name = self.name if name is no_default else name + + if values.dtype.kind == "f": + return Float64Index(values, name=name) + return Int64Index._simple_new(values, name=name) + + def _view(self: RangeIndex) -> RangeIndex: + result = type(self)._simple_new(self._range, name=self._name) + result._cache = self._cache + return result + + @doc(Int64Index.copy) + def copy( + self, + name: Hashable = None, + deep: bool = False, + dtype: Dtype | None = None, + names=None, + ): + name = self._validate_names(name=name, names=names, deep=deep)[0] + new_index = self._rename(name=name) + + if dtype: + warnings.warn( + "parameter dtype is deprecated and will be removed in a future " + "version. Use the astype method instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + new_index = new_index.astype(dtype) + return new_index + + def _minmax(self, meth: str): + no_steps = len(self) - 1 + if no_steps == -1: + return np.nan + elif (meth == "min" and self.step > 0) or (meth == "max" and self.step < 0): + return self.start + + return self.start + self.step * no_steps + + def min(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + """The minimum value of the RangeIndex""" + nv.validate_minmax_axis(axis) + nv.validate_min(args, kwargs) + return self._minmax("min") + + def max(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + """The maximum value of the RangeIndex""" + nv.validate_minmax_axis(axis) + nv.validate_max(args, kwargs) + return self._minmax("max") + + def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: + """ + Returns the indices that would sort the index and its + underlying data. + + Returns + ------- + np.ndarray[np.intp] + + See Also + -------- + numpy.ndarray.argsort + """ + ascending = kwargs.pop("ascending", True) # EA compat + kwargs.pop("kind", None) # e.g. "mergesort" is irrelevant + nv.validate_argsort(args, kwargs) + + if self._range.step > 0: + result = np.arange(len(self), dtype=np.intp) + else: + result = np.arange(len(self) - 1, -1, -1, dtype=np.intp) + + if not ascending: + result = result[::-1] + return result + + def factorize( + self, sort: bool = False, na_sentinel: int | None = -1 + ) -> tuple[npt.NDArray[np.intp], RangeIndex]: + codes = np.arange(len(self), dtype=np.intp) + uniques = self + if sort and self.step < 0: + codes = codes[::-1] + uniques = uniques[::-1] + return codes, uniques + + def equals(self, other: object) -> bool: + """ + Determines if two Index objects contain the same elements. + """ + if isinstance(other, RangeIndex): + return self._range == other._range + return super().equals(other) + + def sort_values( + self, + return_indexer: bool = False, + ascending: bool = True, + na_position: str = "last", + key: Callable | None = None, + ): + sorted_index = self + indexer = RangeIndex(range(len(self))) + if key is not None: + return super().sort_values( + return_indexer=return_indexer, + ascending=ascending, + na_position=na_position, + key=key, + ) + else: + sorted_index = self + if ascending: + if self.step < 0: + sorted_index = self[::-1] + indexer = indexer[::-1] + else: + if self.step > 0: + sorted_index = self[::-1] + indexer = indexer = indexer[::-1] + + if return_indexer: + return sorted_index, indexer + else: + return sorted_index + + # -------------------------------------------------------------------- + # Set Operations + + def _intersection(self, other: Index, sort=False): + # caller is responsible for checking self and other are both non-empty + + if not isinstance(other, RangeIndex): + # Int64Index + return super()._intersection(other, sort=sort) + + first = self._range[::-1] if self.step < 0 else self._range + second = other._range[::-1] if other.step < 0 else other._range + + # check whether intervals intersect + # deals with in- and decreasing ranges + int_low = max(first.start, second.start) + int_high = min(first.stop, second.stop) + if int_high <= int_low: + return self._simple_new(_empty_range) + + # Method hint: linear Diophantine equation + # solve intersection problem + # performance hint: for identical step sizes, could use + # cheaper alternative + gcd, s, _ = self._extended_gcd(first.step, second.step) + + # check whether element sets intersect + if (first.start - second.start) % gcd: + return self._simple_new(_empty_range) + + # calculate parameters for the RangeIndex describing the + # intersection disregarding the lower bounds + tmp_start = first.start + (second.start - first.start) * first.step // gcd * s + new_step = first.step * second.step // gcd + new_range = range(tmp_start, int_high, new_step) + new_index = self._simple_new(new_range) + + # adjust index to limiting interval + new_start = new_index._min_fitting_element(int_low) + new_range = range(new_start, new_index.stop, new_index.step) + new_index = self._simple_new(new_range) + + if (self.step < 0 and other.step < 0) is not (new_index.step < 0): + new_index = new_index[::-1] + + if sort is None: + new_index = new_index.sort_values() + + return new_index + + def _min_fitting_element(self, lower_limit: int) -> int: + """Returns the smallest element greater than or equal to the limit""" + no_steps = -(-(lower_limit - self.start) // abs(self.step)) + return self.start + abs(self.step) * no_steps + + def _extended_gcd(self, a: int, b: int) -> tuple[int, int, int]: + """ + Extended Euclidean algorithms to solve Bezout's identity: + a*x + b*y = gcd(x, y) + Finds one particular solution for x, y: s, t + Returns: gcd, s, t + """ + s, old_s = 0, 1 + t, old_t = 1, 0 + r, old_r = b, a + while r: + quotient = old_r // r + old_r, r = r, old_r - quotient * r + old_s, s = s, old_s - quotient * s + old_t, t = t, old_t - quotient * t + return old_r, old_s, old_t + + def _union(self, other: Index, sort): + """ + Form the union of two Index objects and sorts if possible + + Parameters + ---------- + other : Index or array-like + + sort : False or None, default None + Whether to sort resulting index. ``sort=None`` returns a + monotonically increasing ``RangeIndex`` if possible or a sorted + ``Int64Index`` if not. ``sort=False`` always returns an + unsorted ``Int64Index`` + + .. versionadded:: 0.25.0 + + Returns + ------- + union : Index + """ + if isinstance(other, RangeIndex) and sort is None: + start_s, step_s = self.start, self.step + end_s = self.start + self.step * (len(self) - 1) + start_o, step_o = other.start, other.step + end_o = other.start + other.step * (len(other) - 1) + if self.step < 0: + start_s, step_s, end_s = end_s, -step_s, start_s + if other.step < 0: + start_o, step_o, end_o = end_o, -step_o, start_o + if len(self) == 1 and len(other) == 1: + step_s = step_o = abs(self.start - other.start) + elif len(self) == 1: + step_s = step_o + elif len(other) == 1: + step_o = step_s + start_r = min(start_s, start_o) + end_r = max(end_s, end_o) + if step_o == step_s: + if ( + (start_s - start_o) % step_s == 0 + and (start_s - end_o) <= step_s + and (start_o - end_s) <= step_s + ): + return type(self)(start_r, end_r + step_s, step_s) + if ( + (step_s % 2 == 0) + and (abs(start_s - start_o) == step_s / 2) + and (abs(end_s - end_o) == step_s / 2) + ): + # e.g. range(0, 10, 2) and range(1, 11, 2) + # but not range(0, 20, 4) and range(1, 21, 4) GH#44019 + return type(self)(start_r, end_r + step_s / 2, step_s / 2) + + elif step_o % step_s == 0: + if ( + (start_o - start_s) % step_s == 0 + and (start_o + step_s >= start_s) + and (end_o - step_s <= end_s) + ): + return type(self)(start_r, end_r + step_s, step_s) + elif step_s % step_o == 0: + if ( + (start_s - start_o) % step_o == 0 + and (start_s + step_o >= start_o) + and (end_s - step_o <= end_o) + ): + return type(self)(start_r, end_r + step_o, step_o) + + return super()._union(other, sort=sort) + + def _difference(self, other, sort=None): + # optimized set operation if we have another RangeIndex + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other, result_name = self._convert_can_do_setop(other) + + if not isinstance(other, RangeIndex): + return super()._difference(other, sort=sort) + + if sort is None and self.step < 0: + return self[::-1]._difference(other) + + res_name = ops.get_op_result_name(self, other) + + first = self._range[::-1] if self.step < 0 else self._range + overlap = self.intersection(other) + if overlap.step < 0: + overlap = overlap[::-1] + + if len(overlap) == 0: + return self.rename(name=res_name) + if len(overlap) == len(self): + return self[:0].rename(res_name) + + # overlap.step will always be a multiple of self.step (see _intersection) + + if len(overlap) == 1: + if overlap[0] == self[0]: + return self[1:] + + elif overlap[0] == self[-1]: + return self[:-1] + + elif len(self) == 3 and overlap[0] == self[1]: + return self[::2] + + else: + return super()._difference(other, sort=sort) + + elif len(overlap) == 2 and overlap[0] == first[0] and overlap[-1] == first[-1]: + # e.g. range(-8, 20, 7) and range(13, -9, -3) + return self[1:-1] + + if overlap.step == first.step: + if overlap[0] == first.start: + # The difference is everything after the intersection + new_rng = range(overlap[-1] + first.step, first.stop, first.step) + elif overlap[-1] == first[-1]: + # The difference is everything before the intersection + new_rng = range(first.start, overlap[0], first.step) + elif overlap._range == first[1:-1]: + # e.g. range(4) and range(1, 3) + step = len(first) - 1 + new_rng = first[::step] + else: + # The difference is not range-like + # e.g. range(1, 10, 1) and range(3, 7, 1) + return super()._difference(other, sort=sort) + + else: + # We must have len(self) > 1, bc we ruled out above + # len(overlap) == 0 and len(overlap) == len(self) + assert len(self) > 1 + + if overlap.step == first.step * 2: + if overlap[0] == first[0] and overlap[-1] in (first[-1], first[-2]): + # e.g. range(1, 10, 1) and range(1, 10, 2) + new_rng = first[1::2] + + elif overlap[0] == first[1] and overlap[-1] in (first[-1], first[-2]): + # e.g. range(1, 10, 1) and range(2, 10, 2) + new_rng = first[::2] + + else: + # We can get here with e.g. range(20) and range(0, 10, 2) + return super()._difference(other, sort=sort) + + else: + # e.g. range(10) and range(0, 10, 3) + return super()._difference(other, sort=sort) + + new_index = type(self)._simple_new(new_rng, name=res_name) + if first is not self._range: + new_index = new_index[::-1] + + return new_index + + def symmetric_difference(self, other, result_name: Hashable = None, sort=None): + if not isinstance(other, RangeIndex) or sort is not None: + return super().symmetric_difference(other, result_name, sort) + + left = self.difference(other) + right = other.difference(self) + result = left.union(right) + + if result_name is not None: + result = result.rename(result_name) + return result + + # -------------------------------------------------------------------- + + # error: Return type "Index" of "delete" incompatible with return type + # "RangeIndex" in supertype "Index" + def delete(self, loc) -> Index: # type: ignore[override] + # In some cases we can retain RangeIndex, see also + # DatetimeTimedeltaMixin._get_delete_Freq + if is_integer(loc): + if loc == 0 or loc == -len(self): + return self[1:] + if loc == -1 or loc == len(self) - 1: + return self[:-1] + if len(self) == 3 and (loc == 1 or loc == -2): + return self[::2] + + elif lib.is_list_like(loc): + slc = lib.maybe_indices_to_slice(np.asarray(loc, dtype=np.intp), len(self)) + + if isinstance(slc, slice): + # defer to RangeIndex._difference, which is optimized to return + # a RangeIndex whenever possible + other = self[slc] + return self.difference(other, sort=False) + + return super().delete(loc) + + def insert(self, loc: int, item) -> Index: + if len(self) and (is_integer(item) or is_float(item)): + # We can retain RangeIndex is inserting at the beginning or end, + # or right in the middle. + rng = self._range + if loc == 0 and item == self[0] - self.step: + new_rng = range(rng.start - rng.step, rng.stop, rng.step) + return type(self)._simple_new(new_rng, name=self.name) + + elif loc == len(self) and item == self[-1] + self.step: + new_rng = range(rng.start, rng.stop + rng.step, rng.step) + return type(self)._simple_new(new_rng, name=self.name) + + elif len(self) == 2 and item == self[0] + self.step / 2: + # e.g. inserting 1 into [0, 2] + step = int(self.step / 2) + new_rng = range(self.start, self.stop, step) + return type(self)._simple_new(new_rng, name=self.name) + + return super().insert(loc, item) + + def _concat(self, indexes: list[Index], name: Hashable) -> Index: + """ + Overriding parent method for the case of all RangeIndex instances. + + When all members of "indexes" are of type RangeIndex: result will be + RangeIndex if possible, Int64Index otherwise. E.g.: + indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) + indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) + """ + if not all(isinstance(x, RangeIndex) for x in indexes): + return super()._concat(indexes, name) + + elif len(indexes) == 1: + return indexes[0] + + rng_indexes = cast(List[RangeIndex], indexes) + + start = step = next_ = None + + # Filter the empty indexes + non_empty_indexes = [obj for obj in rng_indexes if len(obj)] + + for obj in non_empty_indexes: + rng = obj._range + + if start is None: + # This is set by the first non-empty index + start = rng.start + if step is None and len(rng) > 1: + step = rng.step + elif step is None: + # First non-empty index had only one element + if rng.start == start: + values = np.concatenate([x._values for x in rng_indexes]) + result = Int64Index(values) + return result.rename(name) + + step = rng.start - start + + non_consecutive = (step != rng.step and len(rng) > 1) or ( + next_ is not None and rng.start != next_ + ) + if non_consecutive: + result = Int64Index(np.concatenate([x._values for x in rng_indexes])) + return result.rename(name) + + if step is not None: + next_ = rng[-1] + step + + if non_empty_indexes: + # Get the stop value from "next" or alternatively + # from the last non-empty index + stop = non_empty_indexes[-1].stop if next_ is None else next_ + return RangeIndex(start, stop, step).rename(name) + + # Here all "indexes" had 0 length, i.e. were empty. + # In this case return an empty range index. + return RangeIndex(0, 0).rename(name) + + def __len__(self) -> int: + """ + return the length of the RangeIndex + """ + return len(self._range) + + @property + def size(self) -> int: + return len(self) + + def __getitem__(self, key): + """ + Conserve RangeIndex type for scalar and slice keys. + """ + if isinstance(key, slice): + new_range = self._range[key] + return self._simple_new(new_range, name=self._name) + elif is_integer(key): + new_key = int(key) + try: + return self._range[new_key] + except IndexError as err: + raise IndexError( + f"index {key} is out of bounds for axis 0 with size {len(self)}" + ) from err + elif is_scalar(key): + raise IndexError( + "only integers, slices (`:`), " + "ellipsis (`...`), numpy.newaxis (`None`) " + "and integer or boolean " + "arrays are valid indices" + ) + # fall back to Int64Index + return super().__getitem__(key) + + def _getitem_slice(self: RangeIndex, slobj: slice) -> RangeIndex: + """ + Fastpath for __getitem__ when we know we have a slice. + """ + res = self._range[slobj] + return type(self)._simple_new(res, name=self._name) + + @unpack_zerodim_and_defer("__floordiv__") + def __floordiv__(self, other): + + if is_integer(other) and other != 0: + if len(self) == 0 or self.start % other == 0 and self.step % other == 0: + start = self.start // other + step = self.step // other + stop = start + len(self) * step + new_range = range(start, stop, step or 1) + return self._simple_new(new_range, name=self.name) + if len(self) == 1: + start = self.start // other + new_range = range(start, start + 1, 1) + return self._simple_new(new_range, name=self.name) + + return super().__floordiv__(other) + + # -------------------------------------------------------------------- + # Reductions + + def all(self, *args, **kwargs) -> bool: + return 0 not in self._range + + def any(self, *args, **kwargs) -> bool: + return any(self._range) + + # -------------------------------------------------------------------- + + def _cmp_method(self, other, op): + if isinstance(other, RangeIndex) and self._range == other._range: + # Both are immutable so if ._range attr. are equal, shortcut is possible + return super()._cmp_method(self, op) + return super()._cmp_method(other, op) + + def _arith_method(self, other, op): + """ + Parameters + ---------- + other : Any + op : callable that accepts 2 params + perform the binary op + """ + + if isinstance(other, ABCTimedeltaIndex): + # Defer to TimedeltaIndex implementation + return NotImplemented + elif isinstance(other, (timedelta, np.timedelta64)): + # GH#19333 is_integer evaluated True on timedelta64, + # so we need to catch these explicitly + return super()._arith_method(other, op) + elif is_timedelta64_dtype(other): + # Must be an np.ndarray; GH#22390 + return super()._arith_method(other, op) + + if op in [ + operator.pow, + ops.rpow, + operator.mod, + ops.rmod, + operator.floordiv, + ops.rfloordiv, + divmod, + ops.rdivmod, + ]: + return super()._arith_method(other, op) + + step: Callable | None = None + if op in [operator.mul, ops.rmul, operator.truediv, ops.rtruediv]: + step = op + + # TODO: if other is a RangeIndex we may have more efficient options + right = extract_array(other, extract_numpy=True, extract_range=True) + left = self + + try: + # apply if we have an override + if step: + with np.errstate(all="ignore"): + rstep = step(left.step, right) + + # we don't have a representable op + # so return a base index + if not is_integer(rstep) or not rstep: + raise ValueError + + else: + rstep = left.step + + with np.errstate(all="ignore"): + rstart = op(left.start, right) + rstop = op(left.stop, right) + + res_name = ops.get_op_result_name(self, other) + result = type(self)(rstart, rstop, rstep, name=res_name) + + # for compat with numpy / Int64Index + # even if we can represent as a RangeIndex, return + # as a Float64Index if we have float-like descriptors + if not all(is_integer(x) for x in [rstart, rstop, rstep]): + result = result.astype("float64") + + return result + + except (ValueError, TypeError, ZeroDivisionError): + # Defer to Int64Index implementation + # test_arithmetic_explicit_conversions + return super()._arith_method(other, op) diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexes/timedeltas.py b/.local/lib/python3.8/site-packages/pandas/core/indexes/timedeltas.py new file mode 100644 index 0000000..0249bf5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexes/timedeltas.py @@ -0,0 +1,276 @@ +""" implement the TimedeltaIndex """ +from __future__ import annotations + +from pandas._libs import ( + index as libindex, + lib, +) +from pandas._libs.tslibs import ( + Timedelta, + to_offset, +) +from pandas._typing import DtypeObj + +from pandas.core.dtypes.common import ( + TD64NS_DTYPE, + is_scalar, + is_timedelta64_dtype, +) + +from pandas.core.arrays import datetimelike as dtl +from pandas.core.arrays.timedeltas import TimedeltaArray +import pandas.core.common as com +from pandas.core.indexes.base import ( + Index, + maybe_extract_name, +) +from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin +from pandas.core.indexes.extension import inherit_names + + +@inherit_names( + ["__neg__", "__pos__", "__abs__", "total_seconds", "round", "floor", "ceil"] + + TimedeltaArray._field_ops, + TimedeltaArray, + wrap=True, +) +@inherit_names( + [ + "components", + "to_pytimedelta", + "sum", + "std", + "median", + "_format_native_types", + ], + TimedeltaArray, +) +class TimedeltaIndex(DatetimeTimedeltaMixin): + """ + Immutable ndarray of timedelta64 data, represented internally as int64, and + which can be boxed to timedelta objects. + + Parameters + ---------- + data : array-like (1-dimensional), optional + Optional timedelta-like data to construct index with. + unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, optional + Which is an integer/float number. + freq : str or pandas offset object, optional + One of pandas date offset strings or corresponding objects. The string + 'infer' can be passed in order to set the frequency of the index as the + inferred frequency upon creation. + copy : bool + Make a copy of input ndarray. + name : object + Name to be stored in the index. + + Attributes + ---------- + days + seconds + microseconds + nanoseconds + components + inferred_freq + + Methods + ------- + to_pytimedelta + to_series + round + floor + ceil + to_frame + mean + + See Also + -------- + Index : The base pandas Index type. + Timedelta : Represents a duration between two dates or times. + DatetimeIndex : Index of datetime64 data. + PeriodIndex : Index of Period data. + timedelta_range : Create a fixed-frequency TimedeltaIndex. + + Notes + ----- + To learn more about the frequency strings, please see `this link + `__. + """ + + _typ = "timedeltaindex" + + _data_cls = TimedeltaArray + _engine_type = libindex.TimedeltaEngine + + _data: TimedeltaArray + + # Use base class method instead of DatetimeTimedeltaMixin._get_string_slice + _get_string_slice = Index._get_string_slice + + # ------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + data=None, + unit=None, + freq=lib.no_default, + closed=None, + dtype=TD64NS_DTYPE, + copy=False, + name=None, + ): + name = maybe_extract_name(name, data, cls) + + if is_scalar(data): + raise cls._scalar_data_error(data) + + if unit in {"Y", "y", "M"}: + raise ValueError( + "Units 'M', 'Y', and 'y' are no longer supported, as they do not " + "represent unambiguous timedelta values durations." + ) + + if isinstance(data, TimedeltaArray) and freq is lib.no_default: + if copy: + data = data.copy() + return cls._simple_new(data, name=name) + + if isinstance(data, TimedeltaIndex) and freq is lib.no_default and name is None: + if copy: + return data.copy() + else: + return data._view() + + # - Cases checked above all return/raise before reaching here - # + + tdarr = TimedeltaArray._from_sequence_not_strict( + data, freq=freq, unit=unit, dtype=dtype, copy=copy + ) + return cls._simple_new(tdarr, name=name) + + # ------------------------------------------------------------------- + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + """ + Can we compare values of the given dtype to our own? + """ + return is_timedelta64_dtype(dtype) # aka self._data._is_recognized_dtype + + # ------------------------------------------------------------------- + # Indexing Methods + + def get_loc(self, key, method=None, tolerance=None): + """ + Get integer location for requested label + + Returns + ------- + loc : int, slice, or ndarray[int] + """ + self._check_indexing_error(key) + + try: + key = self._data._validate_scalar(key, unbox=False) + except TypeError as err: + raise KeyError(key) from err + + return Index.get_loc(self, key, method, tolerance) + + def _parse_with_reso(self, label: str): + # the "with_reso" is a no-op for TimedeltaIndex + parsed = Timedelta(label) + return parsed, None + + def _parsed_string_to_bounds(self, reso, parsed: Timedelta): + # reso is unused, included to match signature of DTI/PI + lbound = parsed.round(parsed.resolution_string) + rbound = lbound + to_offset(parsed.resolution_string) - Timedelta(1, "ns") + return lbound, rbound + + # ------------------------------------------------------------------- + + @property + def inferred_type(self) -> str: + return "timedelta64" + + +def timedelta_range( + start=None, + end=None, + periods: int | None = None, + freq=None, + name=None, + closed=None, +) -> TimedeltaIndex: + """ + Return a fixed frequency TimedeltaIndex, with day as the default + frequency. + + Parameters + ---------- + start : str or timedelta-like, default None + Left bound for generating timedeltas. + end : str or timedelta-like, default None + Right bound for generating timedeltas. + periods : int, default None + Number of periods to generate. + freq : str or DateOffset, default 'D' + Frequency strings can have multiples, e.g. '5H'. + name : str, default None + Name of the resulting TimedeltaIndex. + closed : str, default None + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None). + + Returns + ------- + TimedeltaIndex + + Notes + ----- + Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. If ``freq`` is omitted, the resulting + ``TimedeltaIndex`` will have ``periods`` linearly spaced elements between + ``start`` and ``end`` (closed on both sides). + + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + >>> pd.timedelta_range(start='1 day', periods=4) + TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq='D') + + The ``closed`` parameter specifies which endpoint is included. The default + behavior is to include both endpoints. + + >>> pd.timedelta_range(start='1 day', periods=4, closed='right') + TimedeltaIndex(['2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq='D') + + The ``freq`` parameter specifies the frequency of the TimedeltaIndex. + Only fixed frequencies can be passed, non-fixed frequencies such as + 'M' (month end) will raise. + + >>> pd.timedelta_range(start='1 day', end='2 days', freq='6H') + TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00', + '1 days 18:00:00', '2 days 00:00:00'], + dtype='timedelta64[ns]', freq='6H') + + Specify ``start``, ``end``, and ``periods``; the frequency is generated + automatically (linearly spaced). + + >>> pd.timedelta_range(start='1 day', end='5 days', periods=4) + TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00', + '5 days 00:00:00'], + dtype='timedelta64[ns]', freq=None) + """ + if freq is None and com.any_none(periods, start, end): + freq = "D" + + freq, _ = dtl.maybe_infer_freq(freq) + tdarr = TimedeltaArray._generate_range(start, end, periods, freq, closed=closed) + return TimedeltaIndex._simple_new(tdarr, name=name) diff --git a/.local/lib/python3.8/site-packages/pandas/core/indexing.py b/.local/lib/python3.8/site-packages/pandas/core/indexing.py new file mode 100644 index 0000000..0c3640e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/indexing.py @@ -0,0 +1,2497 @@ +from __future__ import annotations + +from contextlib import suppress +from typing import ( + TYPE_CHECKING, + Hashable, + Sequence, +) +import warnings + +import numpy as np + +from pandas._libs.indexing import NDFrameIndexerBase +from pandas._libs.lib import item_from_zerodim +from pandas.errors import ( + AbstractMethodError, + InvalidIndexError, +) +from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_hashable, + is_integer, + is_iterator, + is_list_like, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_sequence, +) +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + infer_fill_value, + isna, +) + +from pandas.core import algorithms as algos +import pandas.core.common as com +from pandas.core.construction import ( + array as pd_array, + extract_array, +) +from pandas.core.indexers import ( + check_array_indexer, + is_empty_indexer, + is_exact_shape_match, + is_list_like_indexer, + length_of_indexer, +) +from pandas.core.indexes.api import ( + Index, + MultiIndex, +) + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + +# "null slice" +_NS = slice(None, None) +_one_ellipsis_message = "indexer may only contain one '...' entry" + + +# the public IndexSlicerMaker +class _IndexSlice: + """ + Create an object to more easily perform multi-index slicing. + + See Also + -------- + MultiIndex.remove_unused_levels : New MultiIndex with no unused levels. + + Notes + ----- + See :ref:`Defined Levels ` + for further info on slicing a MultiIndex. + + Examples + -------- + >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']]) + >>> columns = ['foo', 'bar'] + >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))), + ... index=midx, columns=columns) + + Using the default slice command: + + >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :] + foo bar + A0 B0 0 1 + B1 2 3 + A1 B0 8 9 + B1 10 11 + + Using the IndexSlice class for a more intuitive command: + + >>> idx = pd.IndexSlice + >>> dfmi.loc[idx[:, 'B0':'B1'], :] + foo bar + A0 B0 0 1 + B1 2 3 + A1 B0 8 9 + B1 10 11 + """ + + def __getitem__(self, arg): + return arg + + +IndexSlice = _IndexSlice() + + +class IndexingError(Exception): + pass + + +class IndexingMixin: + """ + Mixin for adding .loc/.iloc/.at/.iat to Dataframes and Series. + """ + + @property + def iloc(self) -> _iLocIndexer: + """ + Purely integer-location based indexing for selection by position. + + ``.iloc[]`` is primarily integer position based (from ``0`` to + ``length-1`` of the axis), but may also be used with a boolean + array. + + Allowed inputs are: + + - An integer, e.g. ``5``. + - A list or array of integers, e.g. ``[4, 3, 0]``. + - A slice object with ints, e.g. ``1:7``. + - A boolean array. + - A ``callable`` function with one argument (the calling Series or + DataFrame) and that returns valid output for indexing (one of the above). + This is useful in method chains, when you don't have a reference to the + calling object, but would like to base your selection on some value. + + ``.iloc`` will raise ``IndexError`` if a requested indexer is + out-of-bounds, except *slice* indexers which allow out-of-bounds + indexing (this conforms with python/numpy *slice* semantics). + + See more at :ref:`Selection by Position `. + + See Also + -------- + DataFrame.iat : Fast integer location scalar accessor. + DataFrame.loc : Purely label-location based indexer for selection by label. + Series.iloc : Purely integer-location based indexing for + selection by position. + + Examples + -------- + >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4}, + ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, + ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }] + >>> df = pd.DataFrame(mydict) + >>> df + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + 2 1000 2000 3000 4000 + + **Indexing just the rows** + + With a scalar integer. + + >>> type(df.iloc[0]) + + >>> df.iloc[0] + a 1 + b 2 + c 3 + d 4 + Name: 0, dtype: int64 + + With a list of integers. + + >>> df.iloc[[0]] + a b c d + 0 1 2 3 4 + >>> type(df.iloc[[0]]) + + + >>> df.iloc[[0, 1]] + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + + With a `slice` object. + + >>> df.iloc[:3] + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + 2 1000 2000 3000 4000 + + With a boolean mask the same length as the index. + + >>> df.iloc[[True, False, True]] + a b c d + 0 1 2 3 4 + 2 1000 2000 3000 4000 + + With a callable, useful in method chains. The `x` passed + to the ``lambda`` is the DataFrame being sliced. This selects + the rows whose index label even. + + >>> df.iloc[lambda x: x.index % 2 == 0] + a b c d + 0 1 2 3 4 + 2 1000 2000 3000 4000 + + **Indexing both axes** + + You can mix the indexer types for the index and columns. Use ``:`` to + select the entire axis. + + With scalar integers. + + >>> df.iloc[0, 1] + 2 + + With lists of integers. + + >>> df.iloc[[0, 2], [1, 3]] + b d + 0 2 4 + 2 2000 4000 + + With `slice` objects. + + >>> df.iloc[1:3, 0:3] + a b c + 1 100 200 300 + 2 1000 2000 3000 + + With a boolean array whose length matches the columns. + + >>> df.iloc[:, [True, False, True, False]] + a c + 0 1 3 + 1 100 300 + 2 1000 3000 + + With a callable function that expects the Series or DataFrame. + + >>> df.iloc[:, lambda df: [0, 2]] + a c + 0 1 3 + 1 100 300 + 2 1000 3000 + """ + return _iLocIndexer("iloc", self) + + @property + def loc(self) -> _LocIndexer: + """ + Access a group of rows and columns by label(s) or a boolean array. + + ``.loc[]`` is primarily label based, but may also be used with a + boolean array. + + Allowed inputs are: + + - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is + interpreted as a *label* of the index, and **never** as an + integer position along the index). + - A list or array of labels, e.g. ``['a', 'b', 'c']``. + - A slice object with labels, e.g. ``'a':'f'``. + + .. warning:: Note that contrary to usual python slices, **both** the + start and the stop are included + + - A boolean array of the same length as the axis being sliced, + e.g. ``[True, False, True]``. + - An alignable boolean Series. The index of the key will be aligned before + masking. + - An alignable Index. The Index of the returned selection will be the input. + - A ``callable`` function with one argument (the calling Series or + DataFrame) and that returns valid output for indexing (one of the above) + + See more at :ref:`Selection by Label `. + + Raises + ------ + KeyError + If any items are not found. + IndexingError + If an indexed key is passed and its index is unalignable to the frame index. + + See Also + -------- + DataFrame.at : Access a single value for a row/column label pair. + DataFrame.iloc : Access group of rows and columns by integer position(s). + DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the + Series/DataFrame. + Series.loc : Access group of values using labels. + + Examples + -------- + **Getting values** + + >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]], + ... index=['cobra', 'viper', 'sidewinder'], + ... columns=['max_speed', 'shield']) + >>> df + max_speed shield + cobra 1 2 + viper 4 5 + sidewinder 7 8 + + Single label. Note this returns the row as a Series. + + >>> df.loc['viper'] + max_speed 4 + shield 5 + Name: viper, dtype: int64 + + List of labels. Note using ``[[]]`` returns a DataFrame. + + >>> df.loc[['viper', 'sidewinder']] + max_speed shield + viper 4 5 + sidewinder 7 8 + + Single label for row and column + + >>> df.loc['cobra', 'shield'] + 2 + + Slice with labels for row and single label for column. As mentioned + above, note that both the start and stop of the slice are included. + + >>> df.loc['cobra':'viper', 'max_speed'] + cobra 1 + viper 4 + Name: max_speed, dtype: int64 + + Boolean list with the same length as the row axis + + >>> df.loc[[False, False, True]] + max_speed shield + sidewinder 7 8 + + Alignable boolean Series: + + >>> df.loc[pd.Series([False, True, False], + ... index=['viper', 'sidewinder', 'cobra'])] + max_speed shield + sidewinder 7 8 + + Index (same behavior as ``df.reindex``) + + >>> df.loc[pd.Index(["cobra", "viper"], name="foo")] + max_speed shield + foo + cobra 1 2 + viper 4 5 + + Conditional that returns a boolean Series + + >>> df.loc[df['shield'] > 6] + max_speed shield + sidewinder 7 8 + + Conditional that returns a boolean Series with column labels specified + + >>> df.loc[df['shield'] > 6, ['max_speed']] + max_speed + sidewinder 7 + + Callable that returns a boolean Series + + >>> df.loc[lambda df: df['shield'] == 8] + max_speed shield + sidewinder 7 8 + + **Setting values** + + Set value for all items matching the list of labels + + >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50 + >>> df + max_speed shield + cobra 1 2 + viper 4 50 + sidewinder 7 50 + + Set value for an entire row + + >>> df.loc['cobra'] = 10 + >>> df + max_speed shield + cobra 10 10 + viper 4 50 + sidewinder 7 50 + + Set value for an entire column + + >>> df.loc[:, 'max_speed'] = 30 + >>> df + max_speed shield + cobra 30 10 + viper 30 50 + sidewinder 30 50 + + Set value for rows matching callable condition + + >>> df.loc[df['shield'] > 35] = 0 + >>> df + max_speed shield + cobra 30 10 + viper 0 0 + sidewinder 0 0 + + **Getting values on a DataFrame with an index that has integer labels** + + Another example using integers for the index + + >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]], + ... index=[7, 8, 9], columns=['max_speed', 'shield']) + >>> df + max_speed shield + 7 1 2 + 8 4 5 + 9 7 8 + + Slice with integer labels for rows. As mentioned above, note that both + the start and stop of the slice are included. + + >>> df.loc[7:9] + max_speed shield + 7 1 2 + 8 4 5 + 9 7 8 + + **Getting values with a MultiIndex** + + A number of examples using a DataFrame with a MultiIndex + + >>> tuples = [ + ... ('cobra', 'mark i'), ('cobra', 'mark ii'), + ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'), + ... ('viper', 'mark ii'), ('viper', 'mark iii') + ... ] + >>> index = pd.MultiIndex.from_tuples(tuples) + >>> values = [[12, 2], [0, 4], [10, 20], + ... [1, 4], [7, 1], [16, 36]] + >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index) + >>> df + max_speed shield + cobra mark i 12 2 + mark ii 0 4 + sidewinder mark i 10 20 + mark ii 1 4 + viper mark ii 7 1 + mark iii 16 36 + + Single label. Note this returns a DataFrame with a single index. + + >>> df.loc['cobra'] + max_speed shield + mark i 12 2 + mark ii 0 4 + + Single index tuple. Note this returns a Series. + + >>> df.loc[('cobra', 'mark ii')] + max_speed 0 + shield 4 + Name: (cobra, mark ii), dtype: int64 + + Single label for row and column. Similar to passing in a tuple, this + returns a Series. + + >>> df.loc['cobra', 'mark i'] + max_speed 12 + shield 2 + Name: (cobra, mark i), dtype: int64 + + Single tuple. Note using ``[[]]`` returns a DataFrame. + + >>> df.loc[[('cobra', 'mark ii')]] + max_speed shield + cobra mark ii 0 4 + + Single tuple for the index with a single label for the column + + >>> df.loc[('cobra', 'mark i'), 'shield'] + 2 + + Slice from index tuple to single label + + >>> df.loc[('cobra', 'mark i'):'viper'] + max_speed shield + cobra mark i 12 2 + mark ii 0 4 + sidewinder mark i 10 20 + mark ii 1 4 + viper mark ii 7 1 + mark iii 16 36 + + Slice from index tuple to index tuple + + >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')] + max_speed shield + cobra mark i 12 2 + mark ii 0 4 + sidewinder mark i 10 20 + mark ii 1 4 + viper mark ii 7 1 + """ + return _LocIndexer("loc", self) + + @property + def at(self) -> _AtIndexer: + """ + Access a single value for a row/column label pair. + + Similar to ``loc``, in that both provide label-based lookups. Use + ``at`` if you only need to get or set a single value in a DataFrame + or Series. + + Raises + ------ + KeyError + If 'label' does not exist in DataFrame. + + See Also + -------- + DataFrame.iat : Access a single value for a row/column pair by integer + position. + DataFrame.loc : Access a group of rows and columns by label(s). + Series.at : Access a single value using a label. + + Examples + -------- + >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], + ... index=[4, 5, 6], columns=['A', 'B', 'C']) + >>> df + A B C + 4 0 2 3 + 5 0 4 1 + 6 10 20 30 + + Get value at specified row/column pair + + >>> df.at[4, 'B'] + 2 + + Set value at specified row/column pair + + >>> df.at[4, 'B'] = 10 + >>> df.at[4, 'B'] + 10 + + Get value within a Series + + >>> df.loc[5].at['B'] + 4 + """ + return _AtIndexer("at", self) + + @property + def iat(self) -> _iAtIndexer: + """ + Access a single value for a row/column pair by integer position. + + Similar to ``iloc``, in that both provide integer-based lookups. Use + ``iat`` if you only need to get or set a single value in a DataFrame + or Series. + + Raises + ------ + IndexError + When integer position is out of bounds. + + See Also + -------- + DataFrame.at : Access a single value for a row/column label pair. + DataFrame.loc : Access a group of rows and columns by label(s). + DataFrame.iloc : Access a group of rows and columns by integer position(s). + + Examples + -------- + >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], + ... columns=['A', 'B', 'C']) + >>> df + A B C + 0 0 2 3 + 1 0 4 1 + 2 10 20 30 + + Get value at specified row/column pair + + >>> df.iat[1, 2] + 1 + + Set value at specified row/column pair + + >>> df.iat[1, 2] = 10 + >>> df.iat[1, 2] + 10 + + Get value within a series + + >>> df.loc[0].iat[1] + 2 + """ + return _iAtIndexer("iat", self) + + +class _LocationIndexer(NDFrameIndexerBase): + _valid_types: str + axis = None + + def __call__(self, axis=None): + # we need to return a copy of ourselves + new_self = type(self)(self.name, self.obj) + + if axis is not None: + axis = self.obj._get_axis_number(axis) + new_self.axis = axis + return new_self + + def _get_setitem_indexer(self, key): + """ + Convert a potentially-label-based key into a positional indexer. + """ + if self.name == "loc": + self._ensure_listlike_indexer(key) + + if isinstance(key, tuple): + for x in key: + check_deprecated_indexers(x) + + if self.axis is not None: + return self._convert_tuple(key) + + ax = self.obj._get_axis(0) + + if isinstance(ax, MultiIndex) and self.name != "iloc" and is_hashable(key): + with suppress(KeyError, InvalidIndexError): + # TypeError e.g. passed a bool + return ax.get_loc(key) + + if isinstance(key, tuple): + with suppress(IndexingError): + return self._convert_tuple(key) + + if isinstance(key, range): + return list(key) + + return self._convert_to_indexer(key, axis=0) + + def _ensure_listlike_indexer(self, key, axis=None, value=None): + """ + Ensure that a list-like of column labels are all present by adding them if + they do not already exist. + + Parameters + ---------- + key : list-like of column labels + Target labels. + axis : key axis if known + """ + column_axis = 1 + + # column only exists in 2-dimensional DataFrame + if self.ndim != 2: + return + + if isinstance(key, tuple) and len(key) > 1: + # key may be a tuple if we are .loc + # if length of key is > 1 set key to column part + key = key[column_axis] + axis = column_axis + + if ( + axis == column_axis + and not isinstance(self.obj.columns, MultiIndex) + and is_list_like_indexer(key) + and not com.is_bool_indexer(key) + and all(is_hashable(k) for k in key) + ): + # GH#38148 + keys = self.obj.columns.union(key, sort=False) + + self.obj._mgr = self.obj._mgr.reindex_axis( + keys, axis=0, consolidate=False, only_slice=True + ) + + def __setitem__(self, key, value): + check_deprecated_indexers(key) + if isinstance(key, tuple): + key = tuple(list(x) if is_iterator(x) else x for x in key) + key = tuple(com.apply_if_callable(x, self.obj) for x in key) + else: + key = com.apply_if_callable(key, self.obj) + indexer = self._get_setitem_indexer(key) + self._has_valid_setitem_indexer(key) + + iloc = self if self.name == "iloc" else self.obj.iloc + iloc._setitem_with_indexer(indexer, value, self.name) + + def _validate_key(self, key, axis: int): + """ + Ensure that key is valid for current indexer. + + Parameters + ---------- + key : scalar, slice or list-like + Key requested. + axis : int + Dimension on which the indexing is being made. + + Raises + ------ + TypeError + If the key (or some element of it) has wrong type. + IndexError + If the key (or some element of it) is out of bounds. + KeyError + If the key was not found. + """ + raise AbstractMethodError(self) + + def _expand_ellipsis(self, tup: tuple) -> tuple: + """ + If a tuple key includes an Ellipsis, replace it with an appropriate + number of null slices. + """ + if any(x is Ellipsis for x in tup): + if tup.count(Ellipsis) > 1: + raise IndexingError(_one_ellipsis_message) + + if len(tup) == self.ndim: + # It is unambiguous what axis this Ellipsis is indexing, + # treat as a single null slice. + i = tup.index(Ellipsis) + # FIXME: this assumes only one Ellipsis + new_key = tup[:i] + (_NS,) + tup[i + 1 :] + return new_key + + # TODO: other cases? only one test gets here, and that is covered + # by _validate_key_length + return tup + + def _validate_tuple_indexer(self, key: tuple) -> tuple: + """ + Check the key for valid keys across my indexer. + """ + key = self._validate_key_length(key) + key = self._expand_ellipsis(key) + for i, k in enumerate(key): + try: + self._validate_key(k, i) + except ValueError as err: + raise ValueError( + "Location based indexing can only have " + f"[{self._valid_types}] types" + ) from err + return key + + def _is_nested_tuple_indexer(self, tup: tuple) -> bool: + """ + Returns + ------- + bool + """ + if any(isinstance(ax, MultiIndex) for ax in self.obj.axes): + return any(is_nested_tuple(tup, ax) for ax in self.obj.axes) + return False + + def _convert_tuple(self, key): + keyidx = [] + if self.axis is not None: + axis = self.obj._get_axis_number(self.axis) + for i in range(self.ndim): + if i == axis: + keyidx.append(self._convert_to_indexer(key, axis=axis)) + else: + keyidx.append(slice(None)) + else: + self._validate_key_length(key) + for i, k in enumerate(key): + idx = self._convert_to_indexer(k, axis=i) + keyidx.append(idx) + + return tuple(keyidx) + + def _validate_key_length(self, key: tuple) -> tuple: + if len(key) > self.ndim: + if key[0] is Ellipsis: + # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3] + key = key[1:] + if Ellipsis in key: + raise IndexingError(_one_ellipsis_message) + return self._validate_key_length(key) + raise IndexingError("Too many indexers") + return key + + def _getitem_tuple_same_dim(self, tup: tuple): + """ + Index with indexers that should return an object of the same dimension + as self.obj. + + This is only called after a failed call to _getitem_lowerdim. + """ + retval = self.obj + for i, key in enumerate(tup): + if com.is_null_slice(key): + continue + + retval = getattr(retval, self.name)._getitem_axis(key, axis=i) + # We should never have retval.ndim < self.ndim, as that should + # be handled by the _getitem_lowerdim call above. + assert retval.ndim == self.ndim + + return retval + + def _getitem_lowerdim(self, tup: tuple): + + # we can directly get the axis result since the axis is specified + if self.axis is not None: + axis = self.obj._get_axis_number(self.axis) + return self._getitem_axis(tup, axis=axis) + + # we may have a nested tuples indexer here + if self._is_nested_tuple_indexer(tup): + return self._getitem_nested_tuple(tup) + + # we maybe be using a tuple to represent multiple dimensions here + ax0 = self.obj._get_axis(0) + # ...but iloc should handle the tuple as simple integer-location + # instead of checking it as multiindex representation (GH 13797) + if ( + isinstance(ax0, MultiIndex) + and self.name != "iloc" + and not any(isinstance(x, slice) for x in tup) + ): + # Note: in all extant test cases, replacing the slice condition with + # `all(is_hashable(x) or com.is_null_slice(x) for x in tup)` + # is equivalent. + # (see the other place where we call _handle_lowerdim_multi_index_axis0) + with suppress(IndexingError): + return self._handle_lowerdim_multi_index_axis0(tup) + + tup = self._validate_key_length(tup) + + for i, key in enumerate(tup): + if is_label_like(key): + # We don't need to check for tuples here because those are + # caught by the _is_nested_tuple_indexer check above. + section = self._getitem_axis(key, axis=i) + + # We should never have a scalar section here, because + # _getitem_lowerdim is only called after a check for + # is_scalar_access, which that would be. + if section.ndim == self.ndim: + # we're in the middle of slicing through a MultiIndex + # revise the key wrt to `section` by inserting an _NS + new_key = tup[:i] + (_NS,) + tup[i + 1 :] + + else: + # Note: the section.ndim == self.ndim check above + # rules out having DataFrame here, so we dont need to worry + # about transposing. + new_key = tup[:i] + tup[i + 1 :] + + if len(new_key) == 1: + new_key = new_key[0] + + # Slices should return views, but calling iloc/loc with a null + # slice returns a new object. + if com.is_null_slice(new_key): + return section + # This is an elided recursive call to iloc/loc + return getattr(section, self.name)[new_key] + + raise IndexingError("not applicable") + + def _getitem_nested_tuple(self, tup: tuple): + # we have a nested tuple so have at least 1 multi-index level + # we should be able to match up the dimensionality here + + for key in tup: + check_deprecated_indexers(key) + + # we have too many indexers for our dim, but have at least 1 + # multi-index dimension, try to see if we have something like + # a tuple passed to a series with a multi-index + if len(tup) > self.ndim: + if self.name != "loc": + # This should never be reached, but let's be explicit about it + raise ValueError("Too many indices") # pragma: no cover + if all(is_hashable(x) or com.is_null_slice(x) for x in tup): + # GH#10521 Series should reduce MultiIndex dimensions instead of + # DataFrame, IndexingError is not raised when slice(None,None,None) + # with one row. + with suppress(IndexingError): + return self._handle_lowerdim_multi_index_axis0(tup) + elif isinstance(self.obj, ABCSeries) and any( + isinstance(k, tuple) for k in tup + ): + # GH#35349 Raise if tuple in tuple for series + # Do this after the all-hashable-or-null-slice check so that + # we are only getting non-hashable tuples, in particular ones + # that themselves contain a slice entry + # See test_loc_series_getitem_too_many_dimensions + raise ValueError("Too many indices") + + # this is a series with a multi-index specified a tuple of + # selectors + axis = self.axis or 0 + return self._getitem_axis(tup, axis=axis) + + # handle the multi-axis by taking sections and reducing + # this is iterative + obj = self.obj + # GH#41369 Loop in reverse order ensures indexing along columns before rows + # which selects only necessary blocks which avoids dtype conversion if possible + axis = len(tup) - 1 + for key in tup[::-1]: + + if com.is_null_slice(key): + axis -= 1 + continue + + obj = getattr(obj, self.name)._getitem_axis(key, axis=axis) + axis -= 1 + + # if we have a scalar, we are done + if is_scalar(obj) or not hasattr(obj, "ndim"): + break + + return obj + + def _convert_to_indexer(self, key, axis: int): + raise AbstractMethodError(self) + + def __getitem__(self, key): + check_deprecated_indexers(key) + if type(key) is tuple: + key = tuple(list(x) if is_iterator(x) else x for x in key) + key = tuple(com.apply_if_callable(x, self.obj) for x in key) + if self._is_scalar_access(key): + return self.obj._get_value(*key, takeable=self._takeable) + return self._getitem_tuple(key) + else: + # we by definition only have the 0th axis + axis = self.axis or 0 + + maybe_callable = com.apply_if_callable(key, self.obj) + return self._getitem_axis(maybe_callable, axis=axis) + + def _is_scalar_access(self, key: tuple): + raise NotImplementedError() + + def _getitem_tuple(self, tup: tuple): + raise AbstractMethodError(self) + + def _getitem_axis(self, key, axis: int): + raise NotImplementedError() + + def _has_valid_setitem_indexer(self, indexer) -> bool: + raise AbstractMethodError(self) + + def _getbool_axis(self, key, axis: int): + # caller is responsible for ensuring non-None axis + labels = self.obj._get_axis(axis) + key = check_bool_indexer(labels, key) + inds = key.nonzero()[0] + return self.obj._take_with_is_copy(inds, axis=axis) + + +@doc(IndexingMixin.loc) +class _LocIndexer(_LocationIndexer): + _takeable: bool = False + _valid_types = ( + "labels (MUST BE IN THE INDEX), slices of labels (BOTH " + "endpoints included! Can be slices of integers if the " + "index is integers), listlike of labels, boolean" + ) + + # ------------------------------------------------------------------- + # Key Checks + + @doc(_LocationIndexer._validate_key) + def _validate_key(self, key, axis: int): + # valid for a collection of labels (we check their presence later) + # slice of labels (where start-end in labels) + # slice of integers (only if in the labels) + # boolean not in slice and with boolean index + if isinstance(key, bool) and not ( + is_bool_dtype(self.obj._get_axis(axis)) + or self.obj._get_axis(axis).dtype.name == "boolean" + ): + raise KeyError( + f"{key}: boolean label can not be used without a boolean index" + ) + + if isinstance(key, slice) and ( + isinstance(key.start, bool) or isinstance(key.stop, bool) + ): + raise TypeError(f"{key}: boolean values can not be used in a slice") + + def _has_valid_setitem_indexer(self, indexer) -> bool: + return True + + def _is_scalar_access(self, key: tuple) -> bool: + """ + Returns + ------- + bool + """ + # this is a shortcut accessor to both .loc and .iloc + # that provide the equivalent access of .at and .iat + # a) avoid getting things via sections and (to minimize dtype changes) + # b) provide a performant path + if len(key) != self.ndim: + return False + + for i, k in enumerate(key): + if not is_scalar(k): + return False + + ax = self.obj.axes[i] + if isinstance(ax, MultiIndex): + return False + + if isinstance(k, str) and ax._supports_partial_string_indexing: + # partial string indexing, df.loc['2000', 'A'] + # should not be considered scalar + return False + + if not ax._index_as_unique: + return False + + return True + + # ------------------------------------------------------------------- + # MultiIndex Handling + + def _multi_take_opportunity(self, tup: tuple) -> bool: + """ + Check whether there is the possibility to use ``_multi_take``. + + Currently the limit is that all axes being indexed, must be indexed with + list-likes. + + Parameters + ---------- + tup : tuple + Tuple of indexers, one per axis. + + Returns + ------- + bool + Whether the current indexing, + can be passed through `_multi_take`. + """ + if not all(is_list_like_indexer(x) for x in tup): + return False + + # just too complicated + return not any(com.is_bool_indexer(x) for x in tup) + + def _multi_take(self, tup: tuple): + """ + Create the indexers for the passed tuple of keys, and + executes the take operation. This allows the take operation to be + executed all at once, rather than once for each dimension. + Improving efficiency. + + Parameters + ---------- + tup : tuple + Tuple of indexers, one per axis. + + Returns + ------- + values: same type as the object being indexed + """ + # GH 836 + d = { + axis: self._get_listlike_indexer(key, axis) + for (key, axis) in zip(tup, self.obj._AXIS_ORDERS) + } + return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True) + + # ------------------------------------------------------------------- + + def _getitem_iterable(self, key, axis: int): + """ + Index current object with an iterable collection of keys. + + Parameters + ---------- + key : iterable + Targeted labels. + axis : int + Dimension on which the indexing is being made. + + Raises + ------ + KeyError + If no key was found. Will change in the future to raise if not all + keys were found. + + Returns + ------- + scalar, DataFrame, or Series: indexed value(s). + """ + # we assume that not com.is_bool_indexer(key), as that is + # handled before we get here. + self._validate_key(key, axis) + + # A collection of keys + keyarr, indexer = self._get_listlike_indexer(key, axis) + return self.obj._reindex_with_indexers( + {axis: [keyarr, indexer]}, copy=True, allow_dups=True + ) + + def _getitem_tuple(self, tup: tuple): + with suppress(IndexingError): + tup = self._expand_ellipsis(tup) + return self._getitem_lowerdim(tup) + + # no multi-index, so validate all of the indexers + tup = self._validate_tuple_indexer(tup) + + # ugly hack for GH #836 + if self._multi_take_opportunity(tup): + return self._multi_take(tup) + + return self._getitem_tuple_same_dim(tup) + + def _get_label(self, label, axis: int): + # GH#5667 this will fail if the label is not present in the axis. + return self.obj.xs(label, axis=axis) + + def _handle_lowerdim_multi_index_axis0(self, tup: tuple): + # we have an axis0 multi-index, handle or raise + axis = self.axis or 0 + try: + # fast path for series or for tup devoid of slices + return self._get_label(tup, axis=axis) + + except KeyError as ek: + # raise KeyError if number of indexers match + # else IndexingError will be raised + if self.ndim < len(tup) <= self.obj.index.nlevels: + raise ek + raise IndexingError("No label returned") from ek + + def _getitem_axis(self, key, axis: int): + key = item_from_zerodim(key) + if is_iterator(key): + key = list(key) + if key is Ellipsis: + key = slice(None) + + labels = self.obj._get_axis(axis) + + if isinstance(key, slice): + self._validate_key(key, axis) + return self._get_slice_axis(key, axis=axis) + elif com.is_bool_indexer(key): + return self._getbool_axis(key, axis=axis) + elif is_list_like_indexer(key): + + # an iterable multi-selection + if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)): + + if hasattr(key, "ndim") and key.ndim > 1: + raise ValueError("Cannot index with multidimensional key") + + return self._getitem_iterable(key, axis=axis) + + # nested tuple slicing + if is_nested_tuple(key, labels): + locs = labels.get_locs(key) + indexer = [slice(None)] * self.ndim + indexer[axis] = locs + return self.obj.iloc[tuple(indexer)] + + # fall thru to straight lookup + self._validate_key(key, axis) + return self._get_label(key, axis=axis) + + def _get_slice_axis(self, slice_obj: slice, axis: int): + """ + This is pretty simple as we just have to deal with labels. + """ + # caller is responsible for ensuring non-None axis + obj = self.obj + if not need_slice(slice_obj): + return obj.copy(deep=False) + + labels = obj._get_axis(axis) + indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step) + + if isinstance(indexer, slice): + return self.obj._slice(indexer, axis=axis) + else: + # DatetimeIndex overrides Index.slice_indexer and may + # return a DatetimeIndex instead of a slice object. + return self.obj.take(indexer, axis=axis) + + def _convert_to_indexer(self, key, axis: int): + """ + Convert indexing key into something we can use to do actual fancy + indexing on a ndarray. + + Examples + ix[:5] -> slice(0, 5) + ix[[1,2,3]] -> [1,2,3] + ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) + + Going by Zen of Python? + 'In the face of ambiguity, refuse the temptation to guess.' + raise AmbiguousIndexError with integer labels? + - No, prefer label-based indexing + """ + labels = self.obj._get_axis(axis) + + if isinstance(key, slice): + return labels._convert_slice_indexer(key, kind="loc") + + # see if we are positional in nature + is_int_index = labels.is_integer() + is_int_positional = is_integer(key) and not is_int_index + + if is_scalar(key) or (isinstance(labels, MultiIndex) and is_hashable(key)): + # Otherwise get_loc will raise InvalidIndexError + + # if we are a label return me + try: + return labels.get_loc(key) + except LookupError: + if isinstance(key, tuple) and isinstance(labels, MultiIndex): + if len(key) == labels.nlevels: + return {"key": key} + raise + except InvalidIndexError: + # GH35015, using datetime as column indices raises exception + if not isinstance(labels, MultiIndex): + raise + except ValueError: + if not is_int_positional: + raise + + # a positional + if is_int_positional: + + # if we are setting and its not a valid location + # its an insert which fails by definition + + # always valid + return {"key": key} + + if is_nested_tuple(key, labels): + if self.ndim == 1 and any(isinstance(k, tuple) for k in key): + # GH#35349 Raise if tuple in tuple for series + raise ValueError("Too many indices") + return labels.get_locs(key) + + elif is_list_like_indexer(key): + + if is_iterator(key): + key = list(key) + + if com.is_bool_indexer(key): + key = check_bool_indexer(labels, key) + (inds,) = key.nonzero() + return inds + else: + return self._get_listlike_indexer(key, axis)[1] + else: + try: + return labels.get_loc(key) + except LookupError: + # allow a not found key only if we are a setter + if not is_list_like_indexer(key): + return {"key": key} + raise + + def _get_listlike_indexer(self, key, axis: int): + """ + Transform a list-like of keys into a new index and an indexer. + + Parameters + ---------- + key : list-like + Targeted labels. + axis: int + Dimension on which the indexing is being made. + + Raises + ------ + KeyError + If at least one key was requested but none was found. + + Returns + ------- + keyarr: Index + New index (coinciding with 'key' if the axis is unique). + values : array-like + Indexer for the return object, -1 denotes keys not found. + """ + ax = self.obj._get_axis(axis) + axis_name = self.obj._get_axis_name(axis) + + keyarr, indexer = ax._get_indexer_strict(key, axis_name) + + return keyarr, indexer + + +@doc(IndexingMixin.iloc) +class _iLocIndexer(_LocationIndexer): + _valid_types = ( + "integer, integer slice (START point is INCLUDED, END " + "point is EXCLUDED), listlike of integers, boolean array" + ) + _takeable = True + + # ------------------------------------------------------------------- + # Key Checks + + def _validate_key(self, key, axis: int): + if com.is_bool_indexer(key): + if hasattr(key, "index") and isinstance(key.index, Index): + if key.index.inferred_type == "integer": + raise NotImplementedError( + "iLocation based boolean " + "indexing on an integer type " + "is not available" + ) + raise ValueError( + "iLocation based boolean indexing cannot use " + "an indexable as a mask" + ) + return + + if isinstance(key, slice): + return + elif is_integer(key): + self._validate_integer(key, axis) + elif isinstance(key, tuple): + # a tuple should already have been caught by this point + # so don't treat a tuple as a valid indexer + raise IndexingError("Too many indexers") + elif is_list_like_indexer(key): + arr = np.array(key) + len_axis = len(self.obj._get_axis(axis)) + + # check that the key has a numeric dtype + if not is_numeric_dtype(arr.dtype): + raise IndexError(f".iloc requires numeric indexers, got {arr}") + + # check that the key does not exceed the maximum size of the index + if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis): + raise IndexError("positional indexers are out-of-bounds") + else: + raise ValueError(f"Can only index by location with a [{self._valid_types}]") + + def _has_valid_setitem_indexer(self, indexer) -> bool: + """ + Validate that a positional indexer cannot enlarge its target + will raise if needed, does not modify the indexer externally. + + Returns + ------- + bool + """ + if isinstance(indexer, dict): + raise IndexError("iloc cannot enlarge its target object") + + if isinstance(indexer, ABCDataFrame): + warnings.warn( + "DataFrame indexer for .iloc is deprecated and will be removed in " + "a future version.\n" + "consider using .loc with a DataFrame indexer for automatic alignment.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if not isinstance(indexer, tuple): + indexer = _tuplify(self.ndim, indexer) + + for ax, i in zip(self.obj.axes, indexer): + if isinstance(i, slice): + # should check the stop slice? + pass + elif is_list_like_indexer(i): + # should check the elements? + pass + elif is_integer(i): + if i >= len(ax): + raise IndexError("iloc cannot enlarge its target object") + elif isinstance(i, dict): + raise IndexError("iloc cannot enlarge its target object") + + return True + + def _is_scalar_access(self, key: tuple) -> bool: + """ + Returns + ------- + bool + """ + # this is a shortcut accessor to both .loc and .iloc + # that provide the equivalent access of .at and .iat + # a) avoid getting things via sections and (to minimize dtype changes) + # b) provide a performant path + if len(key) != self.ndim: + return False + + return all(is_integer(k) for k in key) + + def _validate_integer(self, key: int, axis: int) -> None: + """ + Check that 'key' is a valid position in the desired axis. + + Parameters + ---------- + key : int + Requested position. + axis : int + Desired axis. + + Raises + ------ + IndexError + If 'key' is not a valid position in axis 'axis'. + """ + len_axis = len(self.obj._get_axis(axis)) + if key >= len_axis or key < -len_axis: + raise IndexError("single positional indexer is out-of-bounds") + + # ------------------------------------------------------------------- + + def _getitem_tuple(self, tup: tuple): + + tup = self._validate_tuple_indexer(tup) + with suppress(IndexingError): + return self._getitem_lowerdim(tup) + + return self._getitem_tuple_same_dim(tup) + + def _get_list_axis(self, key, axis: int): + """ + Return Series values by list or array of integers. + + Parameters + ---------- + key : list-like positional indexer + axis : int + + Returns + ------- + Series object + + Notes + ----- + `axis` can only be zero. + """ + try: + return self.obj._take_with_is_copy(key, axis=axis) + except IndexError as err: + # re-raise with different error message + raise IndexError("positional indexers are out-of-bounds") from err + + def _getitem_axis(self, key, axis: int): + if key is Ellipsis: + key = slice(None) + elif isinstance(key, ABCDataFrame): + raise IndexError( + "DataFrame indexer is not allowed for .iloc\n" + "Consider using .loc for automatic alignment." + ) + + if isinstance(key, slice): + return self._get_slice_axis(key, axis=axis) + + if is_iterator(key): + key = list(key) + + if isinstance(key, list): + key = np.asarray(key) + + if com.is_bool_indexer(key): + self._validate_key(key, axis) + return self._getbool_axis(key, axis=axis) + + # a list of integers + elif is_list_like_indexer(key): + return self._get_list_axis(key, axis=axis) + + # a single integer + else: + key = item_from_zerodim(key) + if not is_integer(key): + raise TypeError("Cannot index by location index with a non-integer key") + + # validate the location + self._validate_integer(key, axis) + + return self.obj._ixs(key, axis=axis) + + def _get_slice_axis(self, slice_obj: slice, axis: int): + # caller is responsible for ensuring non-None axis + obj = self.obj + + if not need_slice(slice_obj): + return obj.copy(deep=False) + + labels = obj._get_axis(axis) + labels._validate_positional_slice(slice_obj) + return self.obj._slice(slice_obj, axis=axis) + + def _convert_to_indexer(self, key, axis: int): + """ + Much simpler as we only have to deal with our valid types. + """ + return key + + def _get_setitem_indexer(self, key): + # GH#32257 Fall through to let numpy do validation + if is_iterator(key): + key = list(key) + + if self.axis is not None: + return self._convert_tuple(key) + + return key + + # ------------------------------------------------------------------- + + def _setitem_with_indexer(self, indexer, value, name="iloc"): + """ + _setitem_with_indexer is for setting values on a Series/DataFrame + using positional indexers. + + If the relevant keys are not present, the Series/DataFrame may be + expanded. + + This method is currently broken when dealing with non-unique Indexes, + since it goes from positional indexers back to labels when calling + BlockManager methods, see GH#12991, GH#22046, GH#15686. + """ + info_axis = self.obj._info_axis_number + + # maybe partial set + take_split_path = not self.obj._mgr.is_single_block + + # if there is only one block/type, still have to take split path + # unless the block is one-dimensional or it can hold the value + if ( + not take_split_path + and getattr(self.obj._mgr, "blocks", False) + and self.ndim > 1 + ): + # in case of dict, keys are indices + val = list(value.values()) if isinstance(value, dict) else value + blk = self.obj._mgr.blocks[0] + take_split_path = not blk._can_hold_element(val) + + # if we have any multi-indexes that have non-trivial slices + # (not null slices) then we must take the split path, xref + # GH 10360, GH 27841 + if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes): + for i, ax in zip(indexer, self.obj.axes): + if isinstance(ax, MultiIndex) and not ( + is_integer(i) or com.is_null_slice(i) + ): + take_split_path = True + break + + if isinstance(indexer, tuple): + nindexer = [] + for i, idx in enumerate(indexer): + if isinstance(idx, dict): + + # reindex the axis to the new value + # and set inplace + key, _ = convert_missing_indexer(idx) + + # if this is the items axes, then take the main missing + # path first + # this correctly sets the dtype and avoids cache issues + # essentially this separates out the block that is needed + # to possibly be modified + if self.ndim > 1 and i == info_axis: + + # add the new item, and set the value + # must have all defined axes if we have a scalar + # or a list-like on the non-info axes if we have a + # list-like + if not len(self.obj): + if not is_list_like_indexer(value): + raise ValueError( + "cannot set a frame with no " + "defined index and a scalar" + ) + self.obj[key] = value + return + + # add a new item with the dtype setup + if com.is_null_slice(indexer[0]): + # We are setting an entire column + self.obj[key] = value + return + elif is_array_like(value): + # GH#42099 + arr = extract_array(value, extract_numpy=True) + taker = -1 * np.ones(len(self.obj), dtype=np.intp) + empty_value = algos.take_nd(arr, taker) + if not isinstance(value, ABCSeries): + # if not Series (in which case we need to align), + # we can short-circuit + empty_value[indexer[0]] = arr + self.obj[key] = empty_value + return + + self.obj[key] = empty_value + + else: + self.obj[key] = infer_fill_value(value) + + new_indexer = convert_from_missing_indexer_tuple( + indexer, self.obj.axes + ) + self._setitem_with_indexer(new_indexer, value, name) + + return + + # reindex the axis + # make sure to clear the cache because we are + # just replacing the block manager here + # so the object is the same + index = self.obj._get_axis(i) + labels = index.insert(len(index), key) + + # We are expanding the Series/DataFrame values to match + # the length of thenew index `labels`. GH#40096 ensure + # this is valid even if the index has duplicates. + taker = np.arange(len(index) + 1, dtype=np.intp) + taker[-1] = -1 + reindexers = {i: (labels, taker)} + new_obj = self.obj._reindex_with_indexers( + reindexers, allow_dups=True + ) + self.obj._mgr = new_obj._mgr + self.obj._maybe_update_cacher(clear=True) + self.obj._is_copy = None + + nindexer.append(labels.get_loc(key)) + + else: + nindexer.append(idx) + + indexer = tuple(nindexer) + else: + + indexer, missing = convert_missing_indexer(indexer) + + if missing: + self._setitem_with_indexer_missing(indexer, value) + return + + # align and set the values + if take_split_path: + # We have to operate column-wise + self._setitem_with_indexer_split_path(indexer, value, name) + else: + self._setitem_single_block(indexer, value, name) + + def _setitem_with_indexer_split_path(self, indexer, value, name: str): + """ + Setitem column-wise. + """ + # Above we only set take_split_path to True for 2D cases + assert self.ndim == 2 + + if not isinstance(indexer, tuple): + indexer = _tuplify(self.ndim, indexer) + if len(indexer) > self.ndim: + raise IndexError("too many indices for array") + if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2: + raise ValueError(r"Cannot set values with ndim > 2") + + if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict): + from pandas import Series + + value = self._align_series(indexer, Series(value)) + + # Ensure we have something we can iterate over + info_axis = indexer[1] + ilocs = self._ensure_iterable_column_indexer(info_axis) + + pi = indexer[0] + lplane_indexer = length_of_indexer(pi, self.obj.index) + # lplane_indexer gives the expected length of obj[indexer[0]] + + # we need an iterable, with a ndim of at least 1 + # eg. don't pass through np.array(0) + if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0: + + if isinstance(value, ABCDataFrame): + self._setitem_with_indexer_frame_value(indexer, value, name) + + elif np.ndim(value) == 2: + self._setitem_with_indexer_2d_value(indexer, value) + + elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi): + # We are setting multiple rows in a single column. + self._setitem_single_column(ilocs[0], value, pi) + + elif len(ilocs) == 1 and 0 != lplane_indexer != len(value): + # We are trying to set N values into M entries of a single + # column, which is invalid for N != M + # Exclude zero-len for e.g. boolean masking that is all-false + + if len(value) == 1 and not is_integer(info_axis): + # This is a case like df.iloc[:3, [1]] = [0] + # where we treat as df.iloc[:3, 1] = 0 + return self._setitem_with_indexer((pi, info_axis[0]), value[0]) + + raise ValueError( + "Must have equal len keys and value " + "when setting with an iterable" + ) + + elif lplane_indexer == 0 and len(value) == len(self.obj.index): + # We get here in one case via .loc with a all-False mask + pass + + elif self._is_scalar_access(indexer): + # We are setting nested data + self._setitem_single_column(indexer[1], value, pi) + + elif len(ilocs) == len(value): + # We are setting multiple columns in a single row. + for loc, v in zip(ilocs, value): + self._setitem_single_column(loc, v, pi) + + elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0: + # This is a setitem-with-expansion, see + # test_loc_setitem_empty_append_expands_rows_mixed_dtype + # e.g. df = DataFrame(columns=["x", "y"]) + # df["x"] = df["x"].astype(np.int64) + # df.loc[:, "x"] = [1, 2, 3] + self._setitem_single_column(ilocs[0], value, pi) + + else: + raise ValueError( + "Must have equal len keys and value " + "when setting with an iterable" + ) + + else: + + # scalar value + for loc in ilocs: + self._setitem_single_column(loc, value, pi) + + def _setitem_with_indexer_2d_value(self, indexer, value): + # We get here with np.ndim(value) == 2, excluding DataFrame, + # which goes through _setitem_with_indexer_frame_value + pi = indexer[0] + + ilocs = self._ensure_iterable_column_indexer(indexer[1]) + + # GH#7551 Note that this coerces the dtype if we are mixed + value = np.array(value, dtype=object) + if len(ilocs) != value.shape[1]: + raise ValueError( + "Must have equal len keys and value when setting with an ndarray" + ) + + for i, loc in enumerate(ilocs): + # setting with a list, re-coerces + self._setitem_single_column(loc, value[:, i].tolist(), pi) + + def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str): + ilocs = self._ensure_iterable_column_indexer(indexer[1]) + + sub_indexer = list(indexer) + pi = indexer[0] + + multiindex_indexer = isinstance(self.obj.columns, MultiIndex) + + unique_cols = value.columns.is_unique + + # We do not want to align the value in case of iloc GH#37728 + if name == "iloc": + for i, loc in enumerate(ilocs): + val = value.iloc[:, i] + self._setitem_single_column(loc, val, pi) + + elif not unique_cols and value.columns.equals(self.obj.columns): + # We assume we are already aligned, see + # test_iloc_setitem_frame_duplicate_columns_multiple_blocks + for loc in ilocs: + item = self.obj.columns[loc] + if item in value: + sub_indexer[1] = item + val = self._align_series( + tuple(sub_indexer), + value.iloc[:, loc], + multiindex_indexer, + ) + else: + val = np.nan + + self._setitem_single_column(loc, val, pi) + + elif not unique_cols: + raise ValueError("Setting with non-unique columns is not allowed.") + + else: + for loc in ilocs: + item = self.obj.columns[loc] + if item in value: + sub_indexer[1] = item + val = self._align_series( + tuple(sub_indexer), value[item], multiindex_indexer + ) + else: + val = np.nan + + self._setitem_single_column(loc, val, pi) + + def _setitem_single_column(self, loc: int, value, plane_indexer): + """ + + Parameters + ---------- + loc : int + Indexer for column position + plane_indexer : int, slice, listlike[int] + The indexer we use for setitem along axis=0. + """ + pi = plane_indexer + + ser = self.obj._ixs(loc, axis=1) + + # perform the equivalent of a setitem on the info axis + # as we have a null slice or a slice with full bounds + # which means essentially reassign to the columns of a + # multi-dim object + # GH#6149 (null slice), GH#10408 (full bounds) + if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)): + ser = value + elif ( + is_array_like(value) + and is_exact_shape_match(ser, value) + and not is_empty_indexer(pi, value) + ): + if is_list_like(pi): + ser = value[np.argsort(pi)] + else: + # in case of slice + ser = value[pi] + else: + # set the item, first attempting to operate inplace, then + # falling back to casting if necessary; see + # _whatsnew_130.notable_bug_fixes.setitem_column_try_inplace + + orig_values = ser._values + ser._mgr = ser._mgr.setitem((pi,), value) + + if ser._values is orig_values: + # The setitem happened inplace, so the DataFrame's values + # were modified inplace. + return + self.obj._iset_item(loc, ser) + return + + # reset the sliced object if unique + self.obj._iset_item(loc, ser) + + def _setitem_single_block(self, indexer, value, name: str): + """ + _setitem_with_indexer for the case when we have a single Block. + """ + from pandas import Series + + info_axis = self.obj._info_axis_number + item_labels = self.obj._get_axis(info_axis) + + if isinstance(indexer, tuple): + + # if we are setting on the info axis ONLY + # set using those methods to avoid block-splitting + # logic here + if ( + self.ndim == len(indexer) == 2 + and is_integer(indexer[1]) + and com.is_null_slice(indexer[0]) + ): + col = item_labels[indexer[info_axis]] + if len(item_labels.get_indexer_for([col])) == 1: + # e.g. test_loc_setitem_empty_append_expands_rows + loc = item_labels.get_loc(col) + self.obj._iset_item(loc, value) + return + + indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align + + if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict): + # TODO(EA): ExtensionBlock.setitem this causes issues with + # setting for extensionarrays that store dicts. Need to decide + # if it's worth supporting that. + value = self._align_series(indexer, Series(value)) + + elif isinstance(value, ABCDataFrame) and name != "iloc": + value = self._align_frame(indexer, value) + + # check for chained assignment + self.obj._check_is_chained_assignment_possible() + + # actually do the set + self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value) + self.obj._maybe_update_cacher(clear=True, inplace=True) + + def _setitem_with_indexer_missing(self, indexer, value): + """ + Insert new row(s) or column(s) into the Series or DataFrame. + """ + from pandas import Series + + # reindex the axis to the new value + # and set inplace + if self.ndim == 1: + index = self.obj.index + new_index = index.insert(len(index), indexer) + + # we have a coerced indexer, e.g. a float + # that matches in an Int64Index, so + # we will not create a duplicate index, rather + # index to that element + # e.g. 0.0 -> 0 + # GH#12246 + if index.is_unique: + # pass new_index[-1:] instead if [new_index[-1]] + # so that we retain dtype + new_indexer = index.get_indexer(new_index[-1:]) + if (new_indexer != -1).any(): + # We get only here with loc, so can hard code + return self._setitem_with_indexer(new_indexer, value, "loc") + + # this preserves dtype of the value + new_values = Series([value])._values + if len(self.obj._values): + # GH#22717 handle casting compatibility that np.concatenate + # does incorrectly + new_values = concat_compat([self.obj._values, new_values]) + self.obj._mgr = self.obj._constructor( + new_values, index=new_index, name=self.obj.name + )._mgr + self.obj._maybe_update_cacher(clear=True) + + elif self.ndim == 2: + + if not len(self.obj.columns): + # no columns and scalar + raise ValueError("cannot set a frame with no defined columns") + + has_dtype = hasattr(value, "dtype") + if isinstance(value, ABCSeries): + # append a Series + value = value.reindex(index=self.obj.columns, copy=True) + value.name = indexer + elif isinstance(value, dict): + value = Series( + value, index=self.obj.columns, name=indexer, dtype=object + ) + else: + # a list-list + if is_list_like_indexer(value): + # must have conforming columns + if len(value) != len(self.obj.columns): + raise ValueError("cannot set a row with mismatched columns") + + value = Series(value, index=self.obj.columns, name=indexer) + + if not len(self.obj): + # We will ignore the existing dtypes instead of using + # internals.concat logic + df = value.to_frame().T + df.index = Index([indexer], name=self.obj.index.name) + if not has_dtype: + # i.e. if we already had a Series or ndarray, keep that + # dtype. But if we had a list or dict, then do inference + df = df.infer_objects() + self.obj._mgr = df._mgr + else: + self.obj._mgr = self.obj._append(value)._mgr + self.obj._maybe_update_cacher(clear=True) + + def _ensure_iterable_column_indexer(self, column_indexer): + """ + Ensure that our column indexer is something that can be iterated over. + """ + ilocs: Sequence[int] + if is_integer(column_indexer): + ilocs = [column_indexer] + elif isinstance(column_indexer, slice): + ilocs = np.arange(len(self.obj.columns))[ # type: ignore[assignment] + column_indexer + ] + elif isinstance(column_indexer, np.ndarray) and is_bool_dtype( + column_indexer.dtype + ): + ilocs = np.arange(len(column_indexer))[column_indexer] + else: + ilocs = column_indexer + return ilocs + + def _align_series(self, indexer, ser: Series, multiindex_indexer: bool = False): + """ + Parameters + ---------- + indexer : tuple, slice, scalar + Indexer used to get the locations that will be set to `ser`. + ser : pd.Series + Values to assign to the locations specified by `indexer`. + multiindex_indexer : bool, optional + Defaults to False. Should be set to True if `indexer` was from + a `pd.MultiIndex`, to avoid unnecessary broadcasting. + + Returns + ------- + `np.array` of `ser` broadcast to the appropriate shape for assignment + to the locations selected by `indexer` + """ + if isinstance(indexer, (slice, np.ndarray, list, Index)): + indexer = (indexer,) + + if isinstance(indexer, tuple): + + # flatten np.ndarray indexers + def ravel(i): + return i.ravel() if isinstance(i, np.ndarray) else i + + indexer = tuple(map(ravel, indexer)) + + aligners = [not com.is_null_slice(idx) for idx in indexer] + sum_aligners = sum(aligners) + single_aligner = sum_aligners == 1 + is_frame = self.ndim == 2 + obj = self.obj + + # are we a single alignable value on a non-primary + # dim (e.g. panel: 1,2, or frame: 0) ? + # hence need to align to a single axis dimension + # rather that find all valid dims + + # frame + if is_frame: + single_aligner = single_aligner and aligners[0] + + # we have a frame, with multiple indexers on both axes; and a + # series, so need to broadcast (see GH5206) + if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer): + # TODO: This is hacky, align Series and DataFrame behavior GH#45778 + if obj.ndim == 2 and is_empty_indexer(indexer[0], ser._values): + return ser._values.copy() + ser = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values + + # single indexer + if len(indexer) > 1 and not multiindex_indexer: + len_indexer = len(indexer[1]) + ser = ( + np.tile(ser, len_indexer) # type: ignore[assignment] + .reshape(len_indexer, -1) + .T + ) + + return ser + + for i, idx in enumerate(indexer): + ax = obj.axes[i] + + # multiple aligners (or null slices) + if is_sequence(idx) or isinstance(idx, slice): + if single_aligner and com.is_null_slice(idx): + continue + new_ix = ax[idx] + if not is_list_like_indexer(new_ix): + new_ix = Index([new_ix]) + else: + new_ix = Index(new_ix) + if ser.index.equals(new_ix) or not len(new_ix): + return ser._values.copy() + + return ser.reindex(new_ix)._values + + # 2 dims + elif single_aligner: + + # reindex along index + ax = self.obj.axes[1] + if ser.index.equals(ax) or not len(ax): + return ser._values.copy() + return ser.reindex(ax)._values + + elif is_integer(indexer) and self.ndim == 1: + if is_object_dtype(self.obj): + return ser + ax = self.obj._get_axis(0) + + if ser.index.equals(ax): + return ser._values.copy() + + return ser.reindex(ax)._values[indexer] + + elif is_integer(indexer): + ax = self.obj._get_axis(1) + + if ser.index.equals(ax): + return ser._values.copy() + + return ser.reindex(ax)._values + + raise ValueError("Incompatible indexer with Series") + + def _align_frame(self, indexer, df: DataFrame): + is_frame = self.ndim == 2 + + if isinstance(indexer, tuple): + + idx, cols = None, None + sindexers = [] + for i, ix in enumerate(indexer): + ax = self.obj.axes[i] + if is_sequence(ix) or isinstance(ix, slice): + if isinstance(ix, np.ndarray): + ix = ix.ravel() + if idx is None: + idx = ax[ix] + elif cols is None: + cols = ax[ix] + else: + break + else: + sindexers.append(i) + + if idx is not None and cols is not None: + + if df.index.equals(idx) and df.columns.equals(cols): + val = df.copy()._values + else: + val = df.reindex(idx, columns=cols)._values + return val + + elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame: + ax = self.obj.index[indexer] + if df.index.equals(ax): + val = df.copy()._values + else: + + # we have a multi-index and are trying to align + # with a particular, level GH3738 + if ( + isinstance(ax, MultiIndex) + and isinstance(df.index, MultiIndex) + and ax.nlevels != df.index.nlevels + ): + raise TypeError( + "cannot align on a multi-index with out " + "specifying the join levels" + ) + + val = df.reindex(index=ax)._values + return val + + raise ValueError("Incompatible indexer with DataFrame") + + +class _ScalarAccessIndexer(NDFrameIndexerBase): + """ + Access scalars quickly. + """ + + def _convert_key(self, key): + raise AbstractMethodError(self) + + def __getitem__(self, key): + if not isinstance(key, tuple): + + # we could have a convertible item here (e.g. Timestamp) + if not is_list_like_indexer(key): + key = (key,) + else: + raise ValueError("Invalid call for scalar access (getting)!") + + key = self._convert_key(key) + return self.obj._get_value(*key, takeable=self._takeable) + + def __setitem__(self, key, value): + if isinstance(key, tuple): + key = tuple(com.apply_if_callable(x, self.obj) for x in key) + else: + # scalar callable may return tuple + key = com.apply_if_callable(key, self.obj) + + if not isinstance(key, tuple): + key = _tuplify(self.ndim, key) + key = list(self._convert_key(key)) + if len(key) != self.ndim: + raise ValueError("Not enough indexers for scalar access (setting)!") + + self.obj._set_value(*key, value=value, takeable=self._takeable) + + +@doc(IndexingMixin.at) +class _AtIndexer(_ScalarAccessIndexer): + _takeable = False + + def _convert_key(self, key): + """ + Require they keys to be the same type as the index. (so we don't + fallback) + """ + # GH 26989 + # For series, unpacking key needs to result in the label. + # This is already the case for len(key) == 1; e.g. (1,) + if self.ndim == 1 and len(key) > 1: + key = (key,) + + return key + + @property + def _axes_are_unique(self) -> bool: + # Only relevant for self.ndim == 2 + assert self.ndim == 2 + return self.obj.index.is_unique and self.obj.columns.is_unique + + def __getitem__(self, key): + + if self.ndim == 2 and not self._axes_are_unique: + # GH#33041 fall back to .loc + if not isinstance(key, tuple) or not all(is_scalar(x) for x in key): + raise ValueError("Invalid call for scalar access (getting)!") + return self.obj.loc[key] + + return super().__getitem__(key) + + def __setitem__(self, key, value): + if self.ndim == 2 and not self._axes_are_unique: + # GH#33041 fall back to .loc + if not isinstance(key, tuple) or not all(is_scalar(x) for x in key): + raise ValueError("Invalid call for scalar access (setting)!") + + self.obj.loc[key] = value + return + + return super().__setitem__(key, value) + + +@doc(IndexingMixin.iat) +class _iAtIndexer(_ScalarAccessIndexer): + _takeable = True + + def _convert_key(self, key): + """ + Require integer args. (and convert to label arguments) + """ + for i in key: + if not is_integer(i): + raise ValueError("iAt based indexing can only have integer indexers") + return key + + +def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]: + """ + Given an indexer for the first dimension, create an equivalent tuple + for indexing over all dimensions. + + Parameters + ---------- + ndim : int + loc : object + + Returns + ------- + tuple + """ + _tup: list[Hashable | slice] + _tup = [slice(None, None) for _ in range(ndim)] + _tup[0] = loc + return tuple(_tup) + + +def convert_to_index_sliceable(obj: DataFrame, key): + """ + If we are index sliceable, then return my slicer, otherwise return None. + """ + idx = obj.index + if isinstance(key, slice): + return idx._convert_slice_indexer(key, kind="getitem") + + elif isinstance(key, str): + + # we are an actual column + if key in obj.columns: + return None + + # We might have a datetimelike string that we can translate to a + # slice here via partial string indexing + if idx._supports_partial_string_indexing: + try: + res = idx._get_string_slice(str(key)) + warnings.warn( + "Indexing a DataFrame with a datetimelike index using a single " + "string to slice the rows, like `frame[string]`, is deprecated " + "and will be removed in a future version. Use `frame.loc[string]` " + "instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return res + except (KeyError, ValueError, NotImplementedError): + return None + + return None + + +def check_bool_indexer(index: Index, key) -> np.ndarray: + """ + Check if key is a valid boolean indexer for an object with such index and + perform reindexing or conversion if needed. + + This function assumes that is_bool_indexer(key) == True. + + Parameters + ---------- + index : Index + Index of the object on which the indexing is done. + key : list-like + Boolean indexer to check. + + Returns + ------- + np.array + Resulting key. + + Raises + ------ + IndexError + If the key does not have the same length as index. + IndexingError + If the index of the key is unalignable to index. + """ + result = key + if isinstance(key, ABCSeries) and not key.index.equals(index): + result = result.reindex(index) + mask = isna(result._values) + if mask.any(): + raise IndexingError( + "Unalignable boolean Series provided as " + "indexer (index of the boolean Series and of " + "the indexed object do not match)." + ) + return result.astype(bool)._values + if is_object_dtype(key): + # key might be object-dtype bool, check_array_indexer needs bool array + result = np.asarray(result, dtype=bool) + elif not is_array_like(result): + # GH 33924 + # key may contain nan elements, check_array_indexer needs bool array + result = pd_array(result, dtype=bool) + return check_array_indexer(index, result) + + +def convert_missing_indexer(indexer): + """ + Reverse convert a missing indexer, which is a dict + return the scalar indexer and a boolean indicating if we converted + """ + if isinstance(indexer, dict): + + # a missing key (but not a tuple indexer) + indexer = indexer["key"] + + if isinstance(indexer, bool): + raise KeyError("cannot use a single bool to index into setitem") + return indexer, True + + return indexer, False + + +def convert_from_missing_indexer_tuple(indexer, axes): + """ + Create a filtered indexer that doesn't have any missing indexers. + """ + + def get_indexer(_i, _idx): + return axes[_i].get_loc(_idx["key"]) if isinstance(_idx, dict) else _idx + + return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer)) + + +def maybe_convert_ix(*args): + """ + We likely want to take the cross-product. + """ + for arg in args: + if not isinstance(arg, (np.ndarray, list, ABCSeries, Index)): + return args + return np.ix_(*args) + + +def is_nested_tuple(tup, labels) -> bool: + """ + Returns + ------- + bool + """ + # check for a compatible nested tuple and multiindexes among the axes + if not isinstance(tup, tuple): + return False + + for k in tup: + if is_list_like(k) or isinstance(k, slice): + return isinstance(labels, MultiIndex) + + return False + + +def is_label_like(key) -> bool: + """ + Returns + ------- + bool + """ + # select a label or row + return ( + not isinstance(key, slice) + and not is_list_like_indexer(key) + and key is not Ellipsis + ) + + +def need_slice(obj: slice) -> bool: + """ + Returns + ------- + bool + """ + return ( + obj.start is not None + or obj.stop is not None + or (obj.step is not None and obj.step != 1) + ) + + +def check_deprecated_indexers(key) -> None: + """Checks if the key is a deprecated indexer.""" + if ( + isinstance(key, set) + or isinstance(key, tuple) + and any(isinstance(x, set) for x in key) + ): + warnings.warn( + "Passing a set as an indexer is deprecated and will raise in " + "a future version. Use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if ( + isinstance(key, dict) + or isinstance(key, tuple) + and any(isinstance(x, dict) for x in key) + ): + warnings.warn( + "Passing a dict as an indexer is deprecated and will raise in " + "a future version. Use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/internals/__init__.py new file mode 100644 index 0000000..75715bd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/internals/__init__.py @@ -0,0 +1,60 @@ +from pandas.core.internals.api import make_block +from pandas.core.internals.array_manager import ( + ArrayManager, + SingleArrayManager, +) +from pandas.core.internals.base import ( + DataManager, + SingleDataManager, +) +from pandas.core.internals.blocks import ( # io.pytables, io.packers + Block, + DatetimeTZBlock, + ExtensionBlock, + NumericBlock, + ObjectBlock, +) +from pandas.core.internals.concat import concatenate_managers +from pandas.core.internals.managers import ( + BlockManager, + SingleBlockManager, + create_block_manager_from_blocks, +) + +__all__ = [ + "Block", + "CategoricalBlock", + "NumericBlock", + "DatetimeTZBlock", + "ExtensionBlock", + "ObjectBlock", + "make_block", + "DataManager", + "ArrayManager", + "BlockManager", + "SingleDataManager", + "SingleBlockManager", + "SingleArrayManager", + "concatenate_managers", + # this is preserved here for downstream compatibility (GH-33892) + "create_block_manager_from_blocks", +] + + +def __getattr__(name: str): + import warnings + + from pandas.util._exceptions import find_stack_level + + if name == "CategoricalBlock": + warnings.warn( + "CategoricalBlock is deprecated and will be removed in a future version. " + "Use ExtensionBlock instead.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + from pandas.core.internals.blocks import CategoricalBlock + + return CategoricalBlock + + raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'") diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..7d12463 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/api.cpython-38.pyc new file mode 100644 index 0000000..e0b2784 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/array_manager.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/array_manager.cpython-38.pyc new file mode 100644 index 0000000..69de98d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/array_manager.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/base.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/base.cpython-38.pyc new file mode 100644 index 0000000..c212b37 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/blocks.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/blocks.cpython-38.pyc new file mode 100644 index 0000000..21b704d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/blocks.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/concat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/concat.cpython-38.pyc new file mode 100644 index 0000000..ba7c024 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/concat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/construction.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/construction.cpython-38.pyc new file mode 100644 index 0000000..28a1dae Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/construction.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/managers.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/managers.cpython-38.pyc new file mode 100644 index 0000000..17f04f4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/managers.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/ops.cpython-38.pyc new file mode 100644 index 0000000..5a734d1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/internals/__pycache__/ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/api.py b/.local/lib/python3.8/site-packages/pandas/core/internals/api.py new file mode 100644 index 0000000..371f1e2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/internals/api.py @@ -0,0 +1,97 @@ +""" +This is a pseudo-public API for downstream libraries. We ask that downstream +authors + +1) Try to avoid using internals directly altogether, and failing that, +2) Use only functions exposed here (or in core.internals) + +""" +from __future__ import annotations + +import numpy as np + +from pandas._libs.internals import BlockPlacement +from pandas._typing import Dtype + +from pandas.core.dtypes.common import ( + is_datetime64tz_dtype, + is_period_dtype, + pandas_dtype, +) + +from pandas.core.arrays import DatetimeArray +from pandas.core.construction import extract_array +from pandas.core.internals.blocks import ( + Block, + DatetimeTZBlock, + ExtensionBlock, + check_ndim, + ensure_block_shape, + extract_pandas_array, + get_block_type, + maybe_coerce_values, +) + + +def make_block( + values, placement, klass=None, ndim=None, dtype: Dtype | None = None +) -> Block: + """ + This is a pseudo-public analogue to blocks.new_block. + + We ask that downstream libraries use this rather than any fully-internal + APIs, including but not limited to: + + - core.internals.blocks.make_block + - Block.make_block + - Block.make_block_same_class + - Block.__init__ + """ + if dtype is not None: + dtype = pandas_dtype(dtype) + + values, dtype = extract_pandas_array(values, dtype, ndim) + + if klass is ExtensionBlock and is_period_dtype(values.dtype): + # GH-44681 changed PeriodArray to be stored in the 2D + # NDArrayBackedExtensionBlock instead of ExtensionBlock + # -> still allow ExtensionBlock to be passed in this case for back compat + klass = None + + if klass is None: + dtype = dtype or values.dtype + klass = get_block_type(dtype) + + elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype): + # pyarrow calls get here + values = DatetimeArray._simple_new(values, dtype=dtype) + + if not isinstance(placement, BlockPlacement): + placement = BlockPlacement(placement) + + ndim = maybe_infer_ndim(values, placement, ndim) + if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype): + # GH#41168 ensure we can pass 1D dt64tz values + # More generally, any EA dtype that isn't is_1d_only_ea_dtype + values = extract_array(values, extract_numpy=True) + values = ensure_block_shape(values, ndim) + + check_ndim(values, placement, ndim) + values = maybe_coerce_values(values) + return klass(values, ndim=ndim, placement=placement) + + +def maybe_infer_ndim(values, placement: BlockPlacement, ndim: int | None) -> int: + """ + If `ndim` is not provided, infer it from placment and values. + """ + if ndim is None: + # GH#38134 Block constructor now assumes ndim is not None + if not isinstance(values.dtype, np.dtype): + if len(placement) != 1: + ndim = 1 + else: + ndim = 2 + else: + ndim = values.ndim + return ndim diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/array_manager.py b/.local/lib/python3.8/site-packages/pandas/core/internals/array_manager.py new file mode 100644 index 0000000..4bd8c70 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/internals/array_manager.py @@ -0,0 +1,1367 @@ +""" +Experimental manager based on storing a collection of 1D arrays +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, + TypeVar, +) + +import numpy as np + +from pandas._libs import ( + NaT, + lib, +) +from pandas._typing import ( + ArrayLike, + DtypeObj, +) +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.cast import ( + astype_array_safe, + ensure_dtype_can_hold_na, + infer_dtype_from_scalar, + soft_convert_objects, +) +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_datetime64_ns_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_numeric_dtype, + is_object_dtype, + is_timedelta64_ns_dtype, +) +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + PandasDtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.inference import is_inferred_bool_dtype +from pandas.core.dtypes.missing import ( + array_equals, + isna, + na_value_for_dtype, +) + +import pandas.core.algorithms as algos +from pandas.core.array_algos.quantile import quantile_compat +from pandas.core.array_algos.take import take_1d +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + PandasArray, + TimedeltaArray, +) +from pandas.core.arrays.sparse import SparseDtype +from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, + extract_array, + sanitize_array, +) +from pandas.core.indexers import ( + maybe_convert_indices, + validate_indices, +) +from pandas.core.indexes.api import ( + Index, + ensure_index, +) +from pandas.core.internals.base import ( + DataManager, + SingleDataManager, + interleaved_dtype, +) +from pandas.core.internals.blocks import ( + ensure_block_shape, + external_values, + extract_pandas_array, + maybe_coerce_values, + new_block, + to_native_types, +) + +if TYPE_CHECKING: + from pandas import Float64Index + + +T = TypeVar("T", bound="BaseArrayManager") + + +class BaseArrayManager(DataManager): + """ + Core internal data structure to implement DataFrame and Series. + + Alternative to the BlockManager, storing a list of 1D arrays instead of + Blocks. + + This is *not* a public API class + + Parameters + ---------- + arrays : Sequence of arrays + axes : Sequence of Index + verify_integrity : bool, default True + + """ + + __slots__ = [ + "_axes", # private attribute, because 'axes' has different order, see below + "arrays", + ] + + arrays: list[np.ndarray | ExtensionArray] + _axes: list[Index] + + def __init__( + self, + arrays: list[np.ndarray | ExtensionArray], + axes: list[Index], + verify_integrity: bool = True, + ): + raise NotImplementedError + + def make_empty(self: T, axes=None) -> T: + """Return an empty ArrayManager with the items axis of len 0 (no columns)""" + if axes is None: + axes = [self.axes[1:], Index([])] + + arrays: list[np.ndarray | ExtensionArray] = [] + return type(self)(arrays, axes) + + @property + def items(self) -> Index: + return self._axes[-1] + + @property + # error: Signature of "axes" incompatible with supertype "DataManager" + def axes(self) -> list[Index]: # type: ignore[override] + # mypy doesn't work to override attribute with property + # see https://github.com/python/mypy/issues/4125 + """Axes is BlockManager-compatible order (columns, rows)""" + return [self._axes[1], self._axes[0]] + + @property + def shape_proper(self) -> tuple[int, ...]: + # this returns (n_rows, n_columns) + return tuple(len(ax) for ax in self._axes) + + @staticmethod + def _normalize_axis(axis: int) -> int: + # switch axis + axis = 1 if axis == 0 else 0 + return axis + + def set_axis(self, axis: int, new_labels: Index) -> None: + # Caller is responsible for ensuring we have an Index object. + self._validate_set_axis(axis, new_labels) + axis = self._normalize_axis(axis) + self._axes[axis] = new_labels + + def get_dtypes(self): + return np.array([arr.dtype for arr in self.arrays], dtype="object") + + def __getstate__(self): + return self.arrays, self._axes + + def __setstate__(self, state): + self.arrays = state[0] + self._axes = state[1] + + def __repr__(self) -> str: + output = type(self).__name__ + output += f"\nIndex: {self._axes[0]}" + if self.ndim == 2: + output += f"\nColumns: {self._axes[1]}" + output += f"\n{len(self.arrays)} arrays:" + for arr in self.arrays: + output += f"\n{arr.dtype}" + return output + + def apply( + self: T, + f, + align_keys: list[str] | None = None, + ignore_failures: bool = False, + **kwargs, + ) -> T: + """ + Iterate over the arrays, collect and create a new ArrayManager. + + Parameters + ---------- + f : str or callable + Name of the Array method to apply. + align_keys: List[str] or None, default None + ignore_failures: bool, default False + **kwargs + Keywords to pass to `f` + + Returns + ------- + ArrayManager + """ + assert "filter" not in kwargs + + align_keys = align_keys or [] + result_arrays: list[np.ndarray] = [] + result_indices: list[int] = [] + # fillna: Series/DataFrame is responsible for making sure value is aligned + + aligned_args = {k: kwargs[k] for k in align_keys} + + if f == "apply": + f = kwargs.pop("func") + + for i, arr in enumerate(self.arrays): + + if aligned_args: + + for k, obj in aligned_args.items(): + if isinstance(obj, (ABCSeries, ABCDataFrame)): + # The caller is responsible for ensuring that + # obj.axes[-1].equals(self.items) + if obj.ndim == 1: + kwargs[k] = obj.iloc[i] + else: + kwargs[k] = obj.iloc[:, i]._values + else: + # otherwise we have an array-like + kwargs[k] = obj[i] + + try: + if callable(f): + applied = f(arr, **kwargs) + else: + applied = getattr(arr, f)(**kwargs) + except (TypeError, NotImplementedError): + if not ignore_failures: + raise + continue + # if not isinstance(applied, ExtensionArray): + # # TODO not all EA operations return new EAs (eg astype) + # applied = array(applied) + result_arrays.append(applied) + result_indices.append(i) + + new_axes: list[Index] + if ignore_failures: + # TODO copy? + new_axes = [self._axes[0], self._axes[1][result_indices]] + else: + new_axes = self._axes + + # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; + # expected "List[Union[ndarray, ExtensionArray]]" + return type(self)(result_arrays, new_axes) # type: ignore[arg-type] + + def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T: + # switch axis to follow BlockManager logic + if swap_axis and "axis" in kwargs and self.ndim == 2: + kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0 + + align_keys = align_keys or [] + aligned_args = {k: kwargs[k] for k in align_keys} + + result_arrays = [] + + for i, arr in enumerate(self.arrays): + + if aligned_args: + for k, obj in aligned_args.items(): + if isinstance(obj, (ABCSeries, ABCDataFrame)): + # The caller is responsible for ensuring that + # obj.axes[-1].equals(self.items) + if obj.ndim == 1: + if self.ndim == 2: + kwargs[k] = obj.iloc[slice(i, i + 1)]._values + else: + kwargs[k] = obj.iloc[:]._values + else: + kwargs[k] = obj.iloc[:, [i]]._values + else: + # otherwise we have an ndarray + if obj.ndim == 2: + kwargs[k] = obj[[i]] + + # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no + # attribute "tz" + if hasattr(arr, "tz") and arr.tz is None: # type: ignore[union-attr] + # DatetimeArray needs to be converted to ndarray for DatetimeLikeBlock + + # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no + # attribute "_data" + arr = arr._data # type: ignore[union-attr] + elif arr.dtype.kind == "m" and not isinstance(arr, np.ndarray): + # TimedeltaArray needs to be converted to ndarray for TimedeltaBlock + + # error: "ExtensionArray" has no attribute "_data" + arr = arr._data # type: ignore[attr-defined] + + if self.ndim == 2: + arr = ensure_block_shape(arr, 2) + block = new_block(arr, placement=slice(0, 1, 1), ndim=2) + else: + block = new_block(arr, placement=slice(0, len(self), 1), ndim=1) + + applied = getattr(block, f)(**kwargs) + if isinstance(applied, list): + applied = applied[0] + arr = applied.values + if self.ndim == 2 and arr.ndim == 2: + # 2D for np.ndarray or DatetimeArray/TimedeltaArray + assert len(arr) == 1 + # error: No overload variant of "__getitem__" of "ExtensionArray" + # matches argument type "Tuple[int, slice]" + arr = arr[0, :] # type: ignore[call-overload] + result_arrays.append(arr) + + return type(self)(result_arrays, self._axes) + + def where(self: T, other, cond, align: bool) -> T: + if align: + align_keys = ["other", "cond"] + else: + align_keys = ["cond"] + other = extract_array(other, extract_numpy=True) + + return self.apply_with_block( + "where", + align_keys=align_keys, + other=other, + cond=cond, + ) + + # TODO what is this used for? + # def setitem(self, indexer, value) -> ArrayManager: + # return self.apply_with_block("setitem", indexer=indexer, value=value) + + def putmask(self, mask, new, align: bool = True): + if align: + align_keys = ["new", "mask"] + else: + align_keys = ["mask"] + new = extract_array(new, extract_numpy=True) + + return self.apply_with_block( + "putmask", + align_keys=align_keys, + mask=mask, + new=new, + ) + + def diff(self: T, n: int, axis: int) -> T: + if axis == 1: + # DataFrame only calls this for n=0, in which case performing it + # with axis=0 is equivalent + assert n == 0 + axis = 0 + return self.apply(algos.diff, n=n, axis=axis) + + def interpolate(self: T, **kwargs) -> T: + return self.apply_with_block("interpolate", swap_axis=False, **kwargs) + + def shift(self: T, periods: int, axis: int, fill_value) -> T: + if fill_value is lib.no_default: + fill_value = None + + if axis == 1 and self.ndim == 2: + # TODO column-wise shift + raise NotImplementedError + + return self.apply_with_block( + "shift", periods=periods, axis=axis, fill_value=fill_value + ) + + def fillna(self: T, value, limit, inplace: bool, downcast) -> T: + return self.apply_with_block( + "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast + ) + + def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: + return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors) + + def convert( + self: T, + copy: bool = True, + datetime: bool = True, + numeric: bool = True, + timedelta: bool = True, + ) -> T: + def _convert(arr): + if is_object_dtype(arr.dtype): + # extract PandasArray for tests that patch PandasArray._typ + arr = np.asarray(arr) + return soft_convert_objects( + arr, + datetime=datetime, + numeric=numeric, + timedelta=timedelta, + copy=copy, + ) + else: + return arr.copy() if copy else arr + + return self.apply(_convert) + + def replace_regex(self: T, **kwargs) -> T: + return self.apply_with_block("_replace_regex", **kwargs) + + def replace(self: T, to_replace, value, inplace: bool) -> T: + inplace = validate_bool_kwarg(inplace, "inplace") + assert np.ndim(value) == 0, value + # TODO "replace" is right now implemented on the blocks, we should move + # it to general array algos so it can be reused here + return self.apply_with_block( + "replace", value=value, to_replace=to_replace, inplace=inplace + ) + + def replace_list( + self: T, + src_list: list[Any], + dest_list: list[Any], + inplace: bool = False, + regex: bool = False, + ) -> T: + """do a list replace""" + inplace = validate_bool_kwarg(inplace, "inplace") + + return self.apply_with_block( + "replace_list", + src_list=src_list, + dest_list=dest_list, + inplace=inplace, + regex=regex, + ) + + def to_native_types(self, **kwargs): + return self.apply(to_native_types, **kwargs) + + @property + def is_mixed_type(self) -> bool: + return True + + @property + def is_numeric_mixed_type(self) -> bool: + return all(is_numeric_dtype(t) for t in self.get_dtypes()) + + @property + def any_extension_types(self) -> bool: + """Whether any of the blocks in this manager are extension blocks""" + return False # any(block.is_extension for block in self.blocks) + + @property + def is_view(self) -> bool: + """return a boolean if we are a single block and are a view""" + # TODO what is this used for? + return False + + @property + def is_single_block(self) -> bool: + return False + + def _get_data_subset(self: T, predicate: Callable) -> T: + indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)] + arrays = [self.arrays[i] for i in indices] + # TODO copy? + # Note: using Index.take ensures we can retain e.g. DatetimeIndex.freq, + # see test_describe_datetime_columns + taker = np.array(indices, dtype="intp") + new_cols = self._axes[1].take(taker) + new_axes = [self._axes[0], new_cols] + return type(self)(arrays, new_axes, verify_integrity=False) + + def get_bool_data(self: T, copy: bool = False) -> T: + """ + Select columns that are bool-dtype and object-dtype columns that are all-bool. + + Parameters + ---------- + copy : bool, default False + Whether to copy the blocks + """ + return self._get_data_subset(is_inferred_bool_dtype) + + def get_numeric_data(self: T, copy: bool = False) -> T: + """ + Select columns that have a numeric dtype. + + Parameters + ---------- + copy : bool, default False + Whether to copy the blocks + """ + return self._get_data_subset( + lambda arr: is_numeric_dtype(arr.dtype) + or getattr(arr.dtype, "_is_numeric", False) + ) + + def copy(self: T, deep=True) -> T: + """ + Make deep or shallow copy of ArrayManager + + Parameters + ---------- + deep : bool or string, default True + If False, return shallow copy (do not copy data) + If 'all', copy data and a deep copy of the index + + Returns + ------- + BlockManager + """ + # this preserves the notion of view copying of axes + if deep: + # hit in e.g. tests.io.json.test_pandas + + def copy_func(ax): + return ax.copy(deep=True) if deep == "all" else ax.view() + + new_axes = [copy_func(ax) for ax in self._axes] + else: + new_axes = list(self._axes) + + if deep: + new_arrays = [arr.copy() for arr in self.arrays] + else: + new_arrays = list(self.arrays) + return type(self)(new_arrays, new_axes, verify_integrity=False) + + def reindex_indexer( + self: T, + new_axis, + indexer, + axis: int, + fill_value=None, + allow_dups: bool = False, + copy: bool = True, + # ignored keywords + consolidate: bool = True, + only_slice: bool = False, + # ArrayManager specific keywords + use_na_proxy: bool = False, + ) -> T: + axis = self._normalize_axis(axis) + return self._reindex_indexer( + new_axis, + indexer, + axis, + fill_value, + allow_dups, + copy, + use_na_proxy, + ) + + def _reindex_indexer( + self: T, + new_axis, + indexer, + axis: int, + fill_value=None, + allow_dups: bool = False, + copy: bool = True, + use_na_proxy: bool = False, + ) -> T: + """ + Parameters + ---------- + new_axis : Index + indexer : ndarray of int64 or None + axis : int + fill_value : object, default None + allow_dups : bool, default False + copy : bool, default True + + + pandas-indexer with -1's only. + """ + if indexer is None: + if new_axis is self._axes[axis] and not copy: + return self + + result = self.copy(deep=copy) + result._axes = list(self._axes) + result._axes[axis] = new_axis + return result + + # some axes don't allow reindexing with dups + if not allow_dups: + self._axes[axis]._validate_can_reindex(indexer) + + if axis >= self.ndim: + raise IndexError("Requested axis not found in manager") + + if axis == 1: + new_arrays = [] + for i in indexer: + if i == -1: + arr = self._make_na_array( + fill_value=fill_value, use_na_proxy=use_na_proxy + ) + else: + arr = self.arrays[i] + if copy: + arr = arr.copy() + new_arrays.append(arr) + + else: + validate_indices(indexer, len(self._axes[0])) + indexer = ensure_platform_int(indexer) + mask = indexer == -1 + needs_masking = mask.any() + new_arrays = [ + take_1d( + arr, + indexer, + allow_fill=needs_masking, + fill_value=fill_value, + mask=mask, + # if fill_value is not None else blk.fill_value + ) + for arr in self.arrays + ] + + new_axes = list(self._axes) + new_axes[axis] = new_axis + + return type(self)(new_arrays, new_axes, verify_integrity=False) + + def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T: + """ + Take items along any axis. + """ + axis = self._normalize_axis(axis) + + indexer = ( + np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64") + if isinstance(indexer, slice) + else np.asanyarray(indexer, dtype="int64") + ) + + if not indexer.ndim == 1: + raise ValueError("indexer should be 1-dimensional") + + n = self.shape_proper[axis] + indexer = maybe_convert_indices(indexer, n, verify=verify) + + new_labels = self._axes[axis].take(indexer) + return self._reindex_indexer( + new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True + ) + + def _make_na_array(self, fill_value=None, use_na_proxy=False): + if use_na_proxy: + assert fill_value is None + return NullArrayProxy(self.shape_proper[0]) + + if fill_value is None: + fill_value = np.nan + + dtype, fill_value = infer_dtype_from_scalar(fill_value) + # error: Argument "dtype" to "empty" has incompatible type "Union[dtype[Any], + # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, + # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], + # _DTypeDict, Tuple[Any, Any]]]" + values = np.empty(self.shape_proper[0], dtype=dtype) # type: ignore[arg-type] + values.fill(fill_value) + return values + + def _equal_values(self, other) -> bool: + """ + Used in .equals defined in base class. Only check the column values + assuming shape and indexes have already been checked. + """ + for left, right in zip(self.arrays, other.arrays): + if not array_equals(left, right): + return False + else: + return True + + # TODO + # to_dict + + +class ArrayManager(BaseArrayManager): + ndim = 2 + + def __init__( + self, + arrays: list[np.ndarray | ExtensionArray], + axes: list[Index], + verify_integrity: bool = True, + ): + # Note: we are storing the axes in "_axes" in the (row, columns) order + # which contrasts the order how it is stored in BlockManager + self._axes = axes + self.arrays = arrays + + if verify_integrity: + self._axes = [ensure_index(ax) for ax in axes] + arrays = [extract_pandas_array(x, None, 1)[0] for x in arrays] + self.arrays = [maybe_coerce_values(arr) for arr in arrays] + self._verify_integrity() + + def _verify_integrity(self) -> None: + n_rows, n_columns = self.shape_proper + if not len(self.arrays) == n_columns: + raise ValueError( + "Number of passed arrays must equal the size of the column Index: " + f"{len(self.arrays)} arrays vs {n_columns} columns." + ) + for arr in self.arrays: + if not len(arr) == n_rows: + raise ValueError( + "Passed arrays should have the same length as the rows Index: " + f"{len(arr)} vs {n_rows} rows" + ) + if not isinstance(arr, (np.ndarray, ExtensionArray)): + raise ValueError( + "Passed arrays should be np.ndarray or ExtensionArray instances, " + f"got {type(arr)} instead" + ) + if not arr.ndim == 1: + raise ValueError( + "Passed arrays should be 1-dimensional, got array with " + f"{arr.ndim} dimensions instead." + ) + + # -------------------------------------------------------------------- + # Indexing + + def fast_xs(self, loc: int) -> ArrayLike: + """ + Return the array corresponding to `frame.iloc[loc]`. + + Parameters + ---------- + loc : int + + Returns + ------- + np.ndarray or ExtensionArray + """ + dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) + + values = [arr[loc] for arr in self.arrays] + if isinstance(dtype, ExtensionDtype): + result = dtype.construct_array_type()._from_sequence(values, dtype=dtype) + # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT + elif is_datetime64_ns_dtype(dtype): + result = DatetimeArray._from_sequence(values, dtype=dtype)._data + elif is_timedelta64_ns_dtype(dtype): + result = TimedeltaArray._from_sequence(values, dtype=dtype)._data + else: + result = np.array(values, dtype=dtype) + return result + + def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager: + axis = self._normalize_axis(axis) + + if axis == 0: + arrays = [arr[slobj] for arr in self.arrays] + elif axis == 1: + arrays = self.arrays[slobj] + + new_axes = list(self._axes) + new_axes[axis] = new_axes[axis]._getitem_slice(slobj) + + return type(self)(arrays, new_axes, verify_integrity=False) + + def iget(self, i: int) -> SingleArrayManager: + """ + Return the data as a SingleArrayManager. + """ + values = self.arrays[i] + return SingleArrayManager([values], [self._axes[0]]) + + def iget_values(self, i: int) -> ArrayLike: + """ + Return the data for column i as the values (ndarray or ExtensionArray). + """ + return self.arrays[i] + + @property + def column_arrays(self) -> list[ArrayLike]: + """ + Used in the JSON C code to access column arrays. + """ + + return [np.asarray(arr) for arr in self.arrays] + + def iset( + self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False + ): + """ + Set new column(s). + + This changes the ArrayManager in-place, but replaces (an) existing + column(s), not changing column values in-place). + + Parameters + ---------- + loc : integer, slice or boolean mask + Positional location (already bounds checked) + value : np.ndarray or ExtensionArray + inplace : bool, default False + Whether overwrite existing array as opposed to replacing it. + """ + # single column -> single integer index + if lib.is_integer(loc): + + # TODO can we avoid needing to unpack this here? That means converting + # DataFrame into 1D array when loc is an integer + if isinstance(value, np.ndarray) and value.ndim == 2: + assert value.shape[1] == 1 + value = value[:, 0] + + # TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item + # but we should avoid that and pass directly the proper array + value = maybe_coerce_values(value) + + assert isinstance(value, (np.ndarray, ExtensionArray)) + assert value.ndim == 1 + assert len(value) == len(self._axes[0]) + self.arrays[loc] = value + return + + # multiple columns -> convert slice or array to integer indices + elif isinstance(loc, slice): + indices = range( + loc.start if loc.start is not None else 0, + loc.stop if loc.stop is not None else self.shape_proper[1], + loc.step if loc.step is not None else 1, + ) + else: + assert isinstance(loc, np.ndarray) + assert loc.dtype == "bool" + # error: Incompatible types in assignment (expression has type "ndarray", + # variable has type "range") + indices = np.nonzero(loc)[0] # type: ignore[assignment] + + assert value.ndim == 2 + assert value.shape[0] == len(self._axes[0]) + + for value_idx, mgr_idx in enumerate(indices): + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[slice, int]" + value_arr = value[:, value_idx] # type: ignore[call-overload] + self.arrays[mgr_idx] = value_arr + return + + def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: + """ + Insert item at selected position. + + Parameters + ---------- + loc : int + item : hashable + value : np.ndarray or ExtensionArray + """ + # insert to the axis; this could possibly raise a TypeError + new_axis = self.items.insert(loc, item) + + value = extract_array(value, extract_numpy=True) + if value.ndim == 2: + if value.shape[0] == 1: + # error: No overload variant of "__getitem__" of "ExtensionArray" + # matches argument type "Tuple[int, slice]" + value = value[0, :] # type: ignore[call-overload] + else: + raise ValueError( + f"Expected a 1D array, got an array with shape {value.shape}" + ) + value = maybe_coerce_values(value) + + # TODO self.arrays can be empty + # assert len(value) == len(self.arrays[0]) + + # TODO is this copy needed? + arrays = self.arrays.copy() + arrays.insert(loc, value) + + self.arrays = arrays + self._axes[1] = new_axis + + def idelete(self, indexer): + """ + Delete selected locations in-place (new block and array, same BlockManager) + """ + to_keep = np.ones(self.shape[0], dtype=np.bool_) + to_keep[indexer] = False + + self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]] + self._axes = [self._axes[0], self._axes[1][to_keep]] + return self + + # -------------------------------------------------------------------- + # Array-wise Operation + + def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T: + """ + Apply grouped reduction function columnwise, returning a new ArrayManager. + + Parameters + ---------- + func : grouped reduction function + ignore_failures : bool, default False + Whether to drop columns where func raises TypeError. + + Returns + ------- + ArrayManager + """ + result_arrays: list[np.ndarray] = [] + result_indices: list[int] = [] + + for i, arr in enumerate(self.arrays): + # grouped_reduce functions all expect 2D arrays + arr = ensure_block_shape(arr, ndim=2) + try: + res = func(arr) + except (TypeError, NotImplementedError): + if not ignore_failures: + raise + continue + + if res.ndim == 2: + # reverse of ensure_block_shape + assert res.shape[0] == 1 + res = res[0] + + result_arrays.append(res) + result_indices.append(i) + + if len(result_arrays) == 0: + index = Index([None]) # placeholder + else: + index = Index(range(result_arrays[0].shape[0])) + + if ignore_failures: + columns = self.items[np.array(result_indices, dtype="int64")] + else: + columns = self.items + + # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; + # expected "List[Union[ndarray, ExtensionArray]]" + return type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] + + def reduce( + self: T, func: Callable, ignore_failures: bool = False + ) -> tuple[T, np.ndarray]: + """ + Apply reduction function column-wise, returning a single-row ArrayManager. + + Parameters + ---------- + func : reduction function + ignore_failures : bool, default False + Whether to drop columns where func raises TypeError. + + Returns + ------- + ArrayManager + np.ndarray + Indexer of column indices that are retained. + """ + result_arrays: list[np.ndarray] = [] + result_indices: list[int] = [] + for i, arr in enumerate(self.arrays): + try: + res = func(arr, axis=0) + except TypeError: + if not ignore_failures: + raise + else: + # TODO NaT doesn't preserve dtype, so we need to ensure to create + # a timedelta result array if original was timedelta + # what if datetime results in timedelta? (eg std) + if res is NaT and is_timedelta64_ns_dtype(arr.dtype): + result_arrays.append(np.array(["NaT"], dtype="timedelta64[ns]")) + else: + # error: Argument 1 to "append" of "list" has incompatible type + # "ExtensionArray"; expected "ndarray" + result_arrays.append( + sanitize_array([res], None) # type: ignore[arg-type] + ) + result_indices.append(i) + + index = Index._simple_new(np.array([None], dtype=object)) # placeholder + if ignore_failures: + indexer = np.array(result_indices) + columns = self.items[result_indices] + else: + indexer = np.arange(self.shape[0]) + columns = self.items + + # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; + # expected "List[Union[ndarray, ExtensionArray]]" + new_mgr = type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] + return new_mgr, indexer + + def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager: + """ + Apply array_op blockwise with another (aligned) BlockManager. + """ + # TODO what if `other` is BlockManager ? + left_arrays = self.arrays + right_arrays = other.arrays + result_arrays = [ + array_op(left, right) for left, right in zip(left_arrays, right_arrays) + ] + return type(self)(result_arrays, self._axes) + + def quantile( + self, + *, + qs: Float64Index, + axis: int = 0, + transposed: bool = False, + interpolation="linear", + ) -> ArrayManager: + + arrs = [ensure_block_shape(x, 2) for x in self.arrays] + assert axis == 1 + new_arrs = [ + quantile_compat(x, np.asarray(qs._values), interpolation) for x in arrs + ] + for i, arr in enumerate(new_arrs): + if arr.ndim == 2: + assert arr.shape[0] == 1, arr.shape + new_arrs[i] = arr[0] + + axes = [qs, self._axes[1]] + return type(self)(new_arrs, axes) + + # ---------------------------------------------------------------- + + def unstack(self, unstacker, fill_value) -> ArrayManager: + """ + Return a BlockManager with all blocks unstacked. + + Parameters + ---------- + unstacker : reshape._Unstacker + fill_value : Any + fill_value for newly introduced missing values. + + Returns + ------- + unstacked : BlockManager + """ + indexer, _ = unstacker._indexer_and_to_sort + if unstacker.mask.all(): + new_indexer = indexer + allow_fill = False + new_mask2D = None + needs_masking = None + else: + new_indexer = np.full(unstacker.mask.shape, -1) + new_indexer[unstacker.mask] = indexer + allow_fill = True + # calculating the full mask once and passing it to take_1d is faster + # than letting take_1d calculate it in each repeated call + new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape) + needs_masking = new_mask2D.any(axis=0) + new_indexer2D = new_indexer.reshape(*unstacker.full_shape) + new_indexer2D = ensure_platform_int(new_indexer2D) + + new_arrays = [] + for arr in self.arrays: + for i in range(unstacker.full_shape[1]): + if allow_fill: + # error: Value of type "Optional[Any]" is not indexable [index] + new_arr = take_1d( + arr, + new_indexer2D[:, i], + allow_fill=needs_masking[i], # type: ignore[index] + fill_value=fill_value, + mask=new_mask2D[:, i], # type: ignore[index] + ) + else: + new_arr = take_1d(arr, new_indexer2D[:, i], allow_fill=False) + new_arrays.append(new_arr) + + new_index = unstacker.new_index + new_columns = unstacker.get_new_columns(self._axes[1]) + new_axes = [new_index, new_columns] + + return type(self)(new_arrays, new_axes, verify_integrity=False) + + def as_array( + self, + dtype=None, + copy: bool = False, + na_value=lib.no_default, + ) -> np.ndarray: + """ + Convert the blockmanager data into an numpy array. + + Parameters + ---------- + dtype : object, default None + Data type of the return array. + copy : bool, default False + If True then guarantee that a copy is returned. A value of + False does not guarantee that the underlying data is not + copied. + na_value : object, default lib.no_default + Value to be used as the missing value sentinel. + + Returns + ------- + arr : ndarray + """ + if len(self.arrays) == 0: + empty_arr = np.empty(self.shape, dtype=float) + return empty_arr.transpose() + + # We want to copy when na_value is provided to avoid + # mutating the original object + copy = copy or na_value is not lib.no_default + + if not dtype: + dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) + + if isinstance(dtype, SparseDtype): + dtype = dtype.subtype + elif isinstance(dtype, PandasDtype): + dtype = dtype.numpy_dtype + elif is_extension_array_dtype(dtype): + dtype = "object" + elif is_dtype_equal(dtype, str): + dtype = "object" + + result = np.empty(self.shape_proper, dtype=dtype) + + for i, arr in enumerate(self.arrays): + arr = arr.astype(dtype, copy=copy) + result[:, i] = arr + + if na_value is not lib.no_default: + result[isna(result)] = na_value + + return result + + +class SingleArrayManager(BaseArrayManager, SingleDataManager): + + __slots__ = [ + "_axes", # private attribute, because 'axes' has different order, see below + "arrays", + ] + + arrays: list[np.ndarray | ExtensionArray] + _axes: list[Index] + + ndim = 1 + + def __init__( + self, + arrays: list[np.ndarray | ExtensionArray], + axes: list[Index], + verify_integrity: bool = True, + ): + self._axes = axes + self.arrays = arrays + + if verify_integrity: + assert len(axes) == 1 + assert len(arrays) == 1 + self._axes = [ensure_index(ax) for ax in self._axes] + arr = arrays[0] + arr = maybe_coerce_values(arr) + arr = extract_pandas_array(arr, None, 1)[0] + self.arrays = [arr] + self._verify_integrity() + + def _verify_integrity(self) -> None: + (n_rows,) = self.shape + assert len(self.arrays) == 1 + arr = self.arrays[0] + assert len(arr) == n_rows + if not arr.ndim == 1: + raise ValueError( + "Passed array should be 1-dimensional, got array with " + f"{arr.ndim} dimensions instead." + ) + + @staticmethod + def _normalize_axis(axis): + return axis + + def make_empty(self, axes=None) -> SingleArrayManager: + """Return an empty ArrayManager with index/array of length 0""" + if axes is None: + axes = [Index([], dtype=object)] + array: np.ndarray = np.array([], dtype=self.dtype) + return type(self)([array], axes) + + @classmethod + def from_array(cls, array, index): + return cls([array], [index]) + + @property + def axes(self): + return self._axes + + @property + def index(self) -> Index: + return self._axes[0] + + @property + def dtype(self): + return self.array.dtype + + def external_values(self): + """The array that Series.values returns""" + return external_values(self.array) + + def internal_values(self): + """The array that Series._values returns""" + return self.array + + def array_values(self): + """The array that Series.array returns""" + arr = self.array + if isinstance(arr, np.ndarray): + arr = PandasArray(arr) + return arr + + @property + def _can_hold_na(self) -> bool: + if isinstance(self.array, np.ndarray): + return self.array.dtype.kind not in ["b", "i", "u"] + else: + # ExtensionArray + return self.array._can_hold_na + + @property + def is_single_block(self) -> bool: + return True + + def fast_xs(self, loc: int) -> ArrayLike: + raise NotImplementedError("Use series._values[loc] instead") + + def get_slice(self, slobj: slice, axis: int = 0) -> SingleArrayManager: + if axis >= self.ndim: + raise IndexError("Requested axis not found in manager") + + new_array = self.array[slobj] + new_index = self.index._getitem_slice(slobj) + return type(self)([new_array], [new_index], verify_integrity=False) + + def getitem_mgr(self, indexer) -> SingleArrayManager: + new_array = self.array[indexer] + new_index = self.index[indexer] + return type(self)([new_array], [new_index]) + + def apply(self, func, **kwargs): + if callable(func): + new_array = func(self.array, **kwargs) + else: + new_array = getattr(self.array, func)(**kwargs) + return type(self)([new_array], self._axes) + + def setitem(self, indexer, value): + """ + Set values with indexer. + + For SingleArrayManager, this backs s[indexer] = value + + See `setitem_inplace` for a version that works inplace and doesn't + return a new Manager. + """ + return self.apply_with_block("setitem", indexer=indexer, value=value) + + def idelete(self, indexer) -> SingleArrayManager: + """ + Delete selected locations in-place (new array, same ArrayManager) + """ + to_keep = np.ones(self.shape[0], dtype=np.bool_) + to_keep[indexer] = False + + self.arrays = [self.arrays[0][to_keep]] + self._axes = [self._axes[0][to_keep]] + return self + + def _get_data_subset(self, predicate: Callable) -> SingleArrayManager: + # used in get_numeric_data / get_bool_data + if predicate(self.array): + return type(self)(self.arrays, self._axes, verify_integrity=False) + else: + return self.make_empty() + + def set_values(self, values: ArrayLike): + """ + Set (replace) the values of the SingleArrayManager in place. + + Use at your own risk! This does not check if the passed values are + valid for the current SingleArrayManager (length, dtype, etc). + """ + self.arrays[0] = values + + def to_2d_mgr(self, columns: Index) -> ArrayManager: + """ + Manager analogue of Series.to_frame + """ + arrays = [self.arrays[0]] + axes = [self.axes[0], columns] + + return ArrayManager(arrays, axes, verify_integrity=False) + + +class NullArrayProxy: + """ + Proxy object for an all-NA array. + + Only stores the length of the array, and not the dtype. The dtype + will only be known when actually concatenating (after determining the + common dtype, for which this proxy is ignored). + Using this object avoids that the internals/concat.py needs to determine + the proper dtype and array type. + """ + + ndim = 1 + + def __init__(self, n: int): + self.n = n + + @property + def shape(self): + return (self.n,) + + def to_array(self, dtype: DtypeObj) -> ArrayLike: + """ + Helper function to create the actual all-NA array from the NullArrayProxy + object. + + Parameters + ---------- + arr : NullArrayProxy + dtype : the dtype for the resulting array + + Returns + ------- + np.ndarray or ExtensionArray + """ + if isinstance(dtype, ExtensionDtype): + empty = dtype.construct_array_type()._from_sequence([], dtype=dtype) + indexer = -np.ones(self.n, dtype=np.intp) + return empty.take(indexer, allow_fill=True) + else: + # when introducing missing values, int becomes float, bool becomes object + dtype = ensure_dtype_can_hold_na(dtype) + fill_value = na_value_for_dtype(dtype) + arr = np.empty(self.n, dtype=dtype) + arr.fill(fill_value) + return ensure_wrapped_if_datetimelike(arr) diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/base.py b/.local/lib/python3.8/site-packages/pandas/core/internals/base.py new file mode 100644 index 0000000..b365bf9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/internals/base.py @@ -0,0 +1,226 @@ +""" +Base class for the internal managers. Both BlockManager and ArrayManager +inherit from this class. +""" +from __future__ import annotations + +from typing import ( + TypeVar, + final, +) + +import numpy as np + +from pandas._typing import ( + ArrayLike, + DtypeObj, + Shape, +) +from pandas.errors import AbstractMethodError + +from pandas.core.dtypes.cast import ( + find_common_type, + np_can_hold_element, +) + +from pandas.core.base import PandasObject +from pandas.core.indexes.api import ( + Index, + default_index, +) + +T = TypeVar("T", bound="DataManager") + + +class DataManager(PandasObject): + + # TODO share more methods/attributes + + axes: list[Index] + + @property + def items(self) -> Index: + raise AbstractMethodError(self) + + @final + def __len__(self) -> int: + return len(self.items) + + @property + def ndim(self) -> int: + return len(self.axes) + + @property + def shape(self) -> Shape: + return tuple(len(ax) for ax in self.axes) + + @final + def _validate_set_axis(self, axis: int, new_labels: Index) -> None: + # Caller is responsible for ensuring we have an Index object. + old_len = len(self.axes[axis]) + new_len = len(new_labels) + + if axis == 1 and len(self.items) == 0: + # If we are setting the index on a DataFrame with no columns, + # it is OK to change the length. + pass + + elif new_len != old_len: + raise ValueError( + f"Length mismatch: Expected axis has {old_len} elements, new " + f"values have {new_len} elements" + ) + + def reindex_indexer( + self: T, + new_axis, + indexer, + axis: int, + fill_value=None, + allow_dups: bool = False, + copy: bool = True, + consolidate: bool = True, + only_slice: bool = False, + ) -> T: + raise AbstractMethodError(self) + + @final + def reindex_axis( + self: T, + new_index: Index, + axis: int, + fill_value=None, + consolidate: bool = True, + only_slice: bool = False, + ) -> T: + """ + Conform data manager to new index. + """ + new_index, indexer = self.axes[axis].reindex(new_index) + + return self.reindex_indexer( + new_index, + indexer, + axis=axis, + fill_value=fill_value, + copy=False, + consolidate=consolidate, + only_slice=only_slice, + ) + + def _equal_values(self: T, other: T) -> bool: + """ + To be implemented by the subclasses. Only check the column values + assuming shape and indexes have already been checked. + """ + raise AbstractMethodError(self) + + @final + def equals(self, other: object) -> bool: + """ + Implementation for DataFrame.equals + """ + if not isinstance(other, DataManager): + return False + + self_axes, other_axes = self.axes, other.axes + if len(self_axes) != len(other_axes): + return False + if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)): + return False + + return self._equal_values(other) + + def apply( + self: T, + f, + align_keys: list[str] | None = None, + ignore_failures: bool = False, + **kwargs, + ) -> T: + raise AbstractMethodError(self) + + @final + def isna(self: T, func) -> T: + return self.apply("apply", func=func) + + # -------------------------------------------------------------------- + # Consolidation: No-ops for all but BlockManager + + def is_consolidated(self) -> bool: + return True + + def consolidate(self: T) -> T: + return self + + def _consolidate_inplace(self) -> None: + return + + +class SingleDataManager(DataManager): + ndim = 1 + + @final + @property + def array(self) -> ArrayLike: + """ + Quick access to the backing array of the Block or SingleArrayManager. + """ + # error: "SingleDataManager" has no attribute "arrays"; maybe "array" + return self.arrays[0] # type: ignore[attr-defined] + + def setitem_inplace(self, indexer, value) -> None: + """ + Set values with indexer. + + For Single[Block/Array]Manager, this backs s[indexer] = value + + This is an inplace version of `setitem()`, mutating the manager/values + in place, not returning a new Manager (and Block), and thus never changing + the dtype. + """ + arr = self.array + + # EAs will do this validation in their own __setitem__ methods. + if isinstance(arr, np.ndarray): + # Note: checking for ndarray instead of np.dtype means we exclude + # dt64/td64, which do their own validation. + value = np_can_hold_element(arr.dtype, value) + + arr[indexer] = value + + def grouped_reduce(self, func, ignore_failures: bool = False): + """ + ignore_failures : bool, default False + Not used; for compatibility with ArrayManager/BlockManager. + """ + + arr = self.array + res = func(arr) + index = default_index(len(res)) + + mgr = type(self).from_array(res, index) + return mgr + + @classmethod + def from_array(cls, arr: ArrayLike, index: Index): + raise AbstractMethodError(cls) + + +def interleaved_dtype(dtypes: list[DtypeObj]) -> DtypeObj | None: + """ + Find the common dtype for `blocks`. + + Parameters + ---------- + blocks : List[DtypeObj] + + Returns + ------- + dtype : np.dtype, ExtensionDtype, or None + None is returned when `blocks` is empty. + """ + if not len(dtypes): + return None + + return find_common_type(dtypes) diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/blocks.py b/.local/lib/python3.8/site-packages/pandas/core/internals/blocks.py new file mode 100644 index 0000000..5810f35 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/internals/blocks.py @@ -0,0 +1,2240 @@ +from __future__ import annotations + +from functools import wraps +import re +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Iterable, + Sequence, + cast, + final, +) +import warnings + +import numpy as np + +from pandas._libs import ( + Timestamp, + algos as libalgos, + internals as libinternals, + lib, + writers, +) +from pandas._libs.internals import BlockPlacement +from pandas._typing import ( + ArrayLike, + DtypeObj, + F, + Shape, + npt, +) +from pandas.compat import np_version_under1p20 +from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.cast import ( + astype_array_safe, + can_hold_element, + find_common_type, + infer_dtype_from, + maybe_downcast_numeric, + maybe_downcast_to_dtype, + maybe_upcast, + soft_convert_objects, +) +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_1d_only_ea_dtype, + is_1d_only_ea_obj, + is_dtype_equal, + is_extension_array_dtype, + is_interval_dtype, + is_list_like, + is_string_dtype, +) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + ExtensionDtype, + PandasDtype, + PeriodDtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCPandasArray, + ABCSeries, +) +from pandas.core.dtypes.inference import is_inferred_bool_dtype +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + na_value_for_dtype, +) + +import pandas.core.algorithms as algos +from pandas.core.array_algos.putmask import ( + extract_bool_array, + putmask_inplace, + putmask_smart, + putmask_without_repeat, + setitem_datetimelike_compat, + validate_putmask, +) +from pandas.core.array_algos.quantile import quantile_compat +from pandas.core.array_algos.replace import ( + compare_or_regex_search, + replace_regex, + should_use_regex, +) +from pandas.core.array_algos.take import take_nd +from pandas.core.array_algos.transforms import shift +from pandas.core.arrays import ( + Categorical, + DatetimeArray, + ExtensionArray, + IntervalArray, + PandasArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +from pandas.core.arrays.sparse import SparseDtype +from pandas.core.base import PandasObject +import pandas.core.common as com +import pandas.core.computation.expressions as expressions +from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, + extract_array, +) +from pandas.core.indexers import ( + check_setitem_lengths, + is_empty_indexer, + is_scalar_indexer, +) +import pandas.core.missing as missing + +if TYPE_CHECKING: + from pandas import ( + Float64Index, + Index, + ) + +# comparison is faster than is_object_dtype +_dtype_obj = np.dtype("object") + + +def maybe_split(meth: F) -> F: + """ + If we have a multi-column block, split and operate block-wise. Otherwise + use the original method. + """ + + @wraps(meth) + def newfunc(self, *args, **kwargs) -> list[Block]: + + if self.ndim == 1 or self.shape[0] == 1: + return meth(self, *args, **kwargs) + else: + # Split and operate column-by-column + return self.split_and_operate(meth, *args, **kwargs) + + return cast(F, newfunc) + + +class Block(PandasObject): + """ + Canonical n-dimensional unit of homogeneous dtype contained in a pandas + data structure + + Index-ignorant; let the container take care of that + """ + + values: np.ndarray | ExtensionArray + ndim: int + __init__: Callable + + __slots__ = () + is_numeric = False + is_object = False + is_extension = False + _can_consolidate = True + _validate_ndim = True + + @final + @cache_readonly + def _consolidate_key(self): + return self._can_consolidate, self.dtype.name + + @property + def is_view(self) -> bool: + """return a boolean if I am possibly a view""" + values = self.values + values = cast(np.ndarray, values) + return values.base is not None + + @final + @cache_readonly + def _can_hold_na(self) -> bool: + """ + Can we store NA values in this Block? + """ + dtype = self.dtype + if isinstance(dtype, np.dtype): + return dtype.kind not in ["b", "i", "u"] + return dtype._can_hold_na + + @final + @cache_readonly + def is_categorical(self) -> bool: + warnings.warn( + "Block.is_categorical is deprecated and will be removed in a " + "future version. Use isinstance(block.values, Categorical) " + "instead. See https://github.com/pandas-dev/pandas/issues/40226", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + return isinstance(self.values, Categorical) + + @final + @property + def is_bool(self) -> bool: + """ + We can be bool if a) we are bool dtype or b) object dtype with bool objects. + """ + return is_inferred_bool_dtype(self.values) + + @final + def external_values(self): + return external_values(self.values) + + @property + def array_values(self) -> ExtensionArray: + """ + The array that Series.array returns. Always an ExtensionArray. + """ + # error: Argument 1 to "PandasArray" has incompatible type "Union[ndarray, + # ExtensionArray]"; expected "Union[ndarray, PandasArray]" + return PandasArray(self.values) # type: ignore[arg-type] + + def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: + """ + return an internal format, currently just the ndarray + this is often overridden to handle to_dense like operations + """ + if dtype == _dtype_obj: + return self.values.astype(_dtype_obj) + # error: Incompatible return value type (got "Union[ndarray, ExtensionArray]", + # expected "ndarray") + return self.values # type: ignore[return-value] + + def values_for_json(self) -> np.ndarray: + # Incompatible return value type (got "Union[ndarray[Any, Any], + # ExtensionArray]", expected "ndarray[Any, Any]") + return self.values # type: ignore[return-value] + + @final + @cache_readonly + def fill_value(self): + # Used in reindex_indexer + return na_value_for_dtype(self.dtype, compat=False) + + @property + def mgr_locs(self) -> BlockPlacement: + return self._mgr_locs + + @mgr_locs.setter + def mgr_locs(self, new_mgr_locs: BlockPlacement): + self._mgr_locs = new_mgr_locs + + @final + def make_block(self, values, placement=None) -> Block: + """ + Create a new block, with type inference propagate any values that are + not specified + """ + if placement is None: + placement = self._mgr_locs + if self.is_extension: + values = ensure_block_shape(values, ndim=self.ndim) + + # TODO: perf by not going through new_block + # We assume maybe_coerce_values has already been called + return new_block(values, placement=placement, ndim=self.ndim) + + @final + def make_block_same_class( + self, values, placement: BlockPlacement | None = None + ) -> Block: + """Wrap given values in a block of same type as self.""" + if placement is None: + placement = self._mgr_locs + + if values.dtype.kind in ["m", "M"]: + + new_values = ensure_wrapped_if_datetimelike(values) + if new_values is not values: + # TODO(2.0): remove once fastparquet has stopped relying on it + warnings.warn( + "In a future version, Block.make_block_same_class will " + "assume that datetime64 and timedelta64 ndarrays have " + "already been cast to DatetimeArray and TimedeltaArray, " + "respectively.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + values = new_values + + # We assume maybe_coerce_values has already been called + return type(self)(values, placement=placement, ndim=self.ndim) + + @final + def __repr__(self) -> str: + # don't want to print out all of the items here + name = type(self).__name__ + if self.ndim == 1: + result = f"{name}: {len(self)} dtype: {self.dtype}" + else: + + shape = " x ".join([str(s) for s in self.shape]) + result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}" + + return result + + @final + def __len__(self) -> int: + return len(self.values) + + def _slice(self, slicer) -> ArrayLike: + """return a slice of my values""" + + return self.values[slicer] + + @final + def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block: + """ + Perform __getitem__-like, return result as block. + + Only supports slices that preserve dimensionality. + """ + axis0_slicer = slicer[0] if isinstance(slicer, tuple) else slicer + new_mgr_locs = self._mgr_locs[axis0_slicer] + + new_values = self._slice(slicer) + + if new_values.ndim != self.values.ndim: + raise ValueError("Only same dim slicing is allowed") + + return type(self)(new_values, new_mgr_locs, self.ndim) + + @final + def getitem_block_columns( + self, slicer: slice, new_mgr_locs: BlockPlacement + ) -> Block: + """ + Perform __getitem__-like, return result as block. + + Only supports slices that preserve dimensionality. + """ + new_values = self._slice(slicer) + + if new_values.ndim != self.values.ndim: + raise ValueError("Only same dim slicing is allowed") + + return type(self)(new_values, new_mgr_locs, self.ndim) + + # NB: this cannot be made cache_readonly because in libreduction we pin + # new .values that can have different shape GH#42631 + @property + def shape(self) -> Shape: + return self.values.shape + + @cache_readonly + def dtype(self) -> DtypeObj: + return self.values.dtype + + def iget(self, i: int | tuple[int, int] | tuple[slice, int]): + # In the case where we have a tuple[slice, int], the slice will always + # be slice(None) + # Note: only reached with self.ndim == 2 + # Invalid index type "Union[int, Tuple[int, int], Tuple[slice, int]]" + # for "Union[ndarray[Any, Any], ExtensionArray]"; expected type + # "Union[int, integer[Any]]" + return self.values[i] # type: ignore[index] + + def set_inplace(self, locs, values) -> None: + """ + Modify block values in-place with new item value. + + Notes + ----- + `set` never creates a new array or new Block, whereas `setitem` _may_ + create a new array and always creates a new Block. + """ + self.values[locs] = values + + def delete(self, loc) -> None: + """ + Delete given loc(-s) from block in-place. + """ + # Argument 1 to "delete" has incompatible type "Union[ndarray[Any, Any], + # ExtensionArray]"; expected "Union[_SupportsArray[dtype[Any]], + # Sequence[_SupportsArray[dtype[Any]]], Sequence[Sequence + # [_SupportsArray[dtype[Any]]]], Sequence[Sequence[Sequence[ + # _SupportsArray[dtype[Any]]]]], Sequence[Sequence[Sequence[Sequence[ + # _SupportsArray[dtype[Any]]]]]]]" [arg-type] + self.values = np.delete(self.values, loc, 0) # type: ignore[arg-type] + self.mgr_locs = self._mgr_locs.delete(loc) + try: + self._cache.clear() + except AttributeError: + # _cache not yet initialized + pass + + @final + def apply(self, func, **kwargs) -> list[Block]: + """ + apply the function to my values; return a block if we are not + one + """ + result = func(self.values, **kwargs) + + return self._split_op_result(result) + + def reduce(self, func, ignore_failures: bool = False) -> list[Block]: + # We will apply the function and reshape the result into a single-row + # Block with the same mgr_locs; squeezing will be done at a higher level + assert self.ndim == 2 + + try: + result = func(self.values) + except (TypeError, NotImplementedError): + if ignore_failures: + return [] + raise + + if self.values.ndim == 1: + # TODO(EA2D): special case not needed with 2D EAs + res_values = np.array([[result]]) + else: + res_values = result.reshape(-1, 1) + + nb = self.make_block(res_values) + return [nb] + + @final + def _split_op_result(self, result: ArrayLike) -> list[Block]: + # See also: split_and_operate + if result.ndim > 1 and isinstance(result.dtype, ExtensionDtype): + # TODO(EA2D): unnecessary with 2D EAs + # if we get a 2D ExtensionArray, we need to split it into 1D pieces + nbs = [] + for i, loc in enumerate(self._mgr_locs): + if not is_1d_only_ea_obj(result): + vals = result[i : i + 1] + else: + vals = result[i] + + block = self.make_block(values=vals, placement=loc) + nbs.append(block) + return nbs + + nb = self.make_block(result) + + return [nb] + + def fillna( + self, value, limit=None, inplace: bool = False, downcast=None + ) -> list[Block]: + """ + fillna on the block with the value. If we fail, then convert to + ObjectBlock and try again + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + mask = isna(self.values) + mask, noop = validate_putmask(self.values, mask) + + if limit is not None: + limit = libalgos.validate_limit(None, limit=limit) + mask[mask.cumsum(self.ndim - 1) > limit] = False + + if not self._can_hold_na: + if inplace: + return [self] + else: + return [self.copy()] + + if self._can_hold_element(value): + nb = self if inplace else self.copy() + putmask_inplace(nb.values, mask, value) + return nb._maybe_downcast([nb], downcast) + + if noop: + # we can't process the value, but nothing to do + return [self] if inplace else [self.copy()] + + elif self.ndim == 1 or self.shape[0] == 1: + blk = self.coerce_to_target_dtype(value) + # bc we have already cast, inplace=True may avoid an extra copy + return blk.fillna(value, limit=limit, inplace=True, downcast=None) + + else: + # operate column-by-column + return self.split_and_operate( + type(self).fillna, value, limit=limit, inplace=inplace, downcast=None + ) + + @final + def _split(self) -> list[Block]: + """ + Split a block into a list of single-column blocks. + """ + assert self.ndim == 2 + + new_blocks = [] + for i, ref_loc in enumerate(self._mgr_locs): + vals = self.values[slice(i, i + 1)] + + bp = BlockPlacement(ref_loc) + nb = type(self)(vals, placement=bp, ndim=2) + new_blocks.append(nb) + return new_blocks + + @final + def split_and_operate(self, func, *args, **kwargs) -> list[Block]: + """ + Split the block and apply func column-by-column. + + Parameters + ---------- + func : Block method + *args + **kwargs + + Returns + ------- + List[Block] + """ + assert self.ndim == 2 and self.shape[0] != 1 + + res_blocks = [] + for nb in self._split(): + rbs = func(nb, *args, **kwargs) + res_blocks.extend(rbs) + return res_blocks + + @final + def _maybe_downcast(self, blocks: list[Block], downcast=None) -> list[Block]: + if downcast is False: + return blocks + + if self.dtype == _dtype_obj: + # GH#44241 We downcast regardless of the argument; + # respecting 'downcast=None' may be worthwhile at some point, + # but ATM it breaks too much existing code. + # split and convert the blocks + + return extend_blocks( + [blk.convert(datetime=True, numeric=False) for blk in blocks] + ) + + if downcast is None: + return blocks + + return extend_blocks([b._downcast_2d(downcast) for b in blocks]) + + @final + @maybe_split + def _downcast_2d(self, dtype) -> list[Block]: + """ + downcast specialized to 2D case post-validation. + + Refactored to allow use of maybe_split. + """ + new_values = maybe_downcast_to_dtype(self.values, dtype=dtype) + return [self.make_block(new_values)] + + @final + def astype(self, dtype: DtypeObj, copy: bool = False, errors: str = "raise"): + """ + Coerce to the new dtype. + + Parameters + ---------- + dtype : np.dtype or ExtensionDtype + copy : bool, default False + copy if indicated + errors : str, {'raise', 'ignore'}, default 'raise' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + + Returns + ------- + Block + """ + values = self.values + + new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) + + new_values = maybe_coerce_values(new_values) + newb = self.make_block(new_values) + if newb.shape != self.shape: + raise TypeError( + f"cannot set astype for copy = [{copy}] for dtype " + f"({self.dtype.name} [{self.shape}]) to different shape " + f"({newb.dtype.name} [{newb.shape}])" + ) + return newb + + def convert( + self, + copy: bool = True, + datetime: bool = True, + numeric: bool = True, + timedelta: bool = True, + ) -> list[Block]: + """ + attempt to coerce any object types to better types return a copy + of the block (if copy = True) by definition we are not an ObjectBlock + here! + """ + return [self.copy()] if copy else [self] + + @final + def _can_hold_element(self, element: Any) -> bool: + """require the same dtype as ourselves""" + element = extract_array(element, extract_numpy=True) + return can_hold_element(self.values, element) + + @final + def should_store(self, value: ArrayLike) -> bool: + """ + Should we set self.values[indexer] = value inplace or do we need to cast? + + Parameters + ---------- + value : np.ndarray or ExtensionArray + + Returns + ------- + bool + """ + # faster equivalent to is_dtype_equal(value.dtype, self.dtype) + try: + return value.dtype == self.dtype + except TypeError: + return False + + @final + def to_native_types(self, na_rep="nan", quoting=None, **kwargs): + """convert to our native types format""" + result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs) + return self.make_block(result) + + # block actions # + @final + def copy(self, deep: bool = True): + """copy constructor""" + values = self.values + if deep: + values = values.copy() + return type(self)(values, placement=self._mgr_locs, ndim=self.ndim) + + # --------------------------------------------------------------------- + # Replace + + @final + def replace( + self, + to_replace, + value, + inplace: bool = False, + # mask may be pre-computed if we're called from replace_list + mask: npt.NDArray[np.bool_] | None = None, + ) -> list[Block]: + """ + replace the to_replace value with value, possible to create new + blocks here this is just a call to putmask. + """ + + # Note: the checks we do in NDFrame.replace ensure we never get + # here with listlike to_replace or value, as those cases + # go through replace_list + + values = self.values + + if isinstance(values, Categorical): + # TODO: avoid special-casing + blk = self if inplace else self.copy() + blk.values._replace(to_replace=to_replace, value=value, inplace=True) + return [blk] + + if not self._can_hold_element(to_replace): + # We cannot hold `to_replace`, so we know immediately that + # replacing it is a no-op. + # Note: If to_replace were a list, NDFrame.replace would call + # replace_list instead of replace. + return [self] if inplace else [self.copy()] + + if mask is None: + mask = missing.mask_missing(values, to_replace) + if not mask.any(): + # Note: we get here with test_replace_extension_other incorrectly + # bc _can_hold_element is incorrect. + return [self] if inplace else [self.copy()] + + elif self._can_hold_element(value): + blk = self if inplace else self.copy() + putmask_inplace(blk.values, mask, value) + if not (self.is_object and value is None): + # if the user *explicitly* gave None, we keep None, otherwise + # may downcast to NaN + blocks = blk.convert(numeric=False, copy=False) + else: + blocks = [blk] + return blocks + + elif self.ndim == 1 or self.shape[0] == 1: + blk = self.coerce_to_target_dtype(value) + return blk.replace( + to_replace=to_replace, + value=value, + inplace=True, + mask=mask, + ) + + else: + # split so that we only upcast where necessary + return self.split_and_operate( + type(self).replace, to_replace, value, inplace=True + ) + + @final + def _replace_regex( + self, + to_replace, + value, + inplace: bool = False, + convert: bool = True, + mask=None, + ) -> list[Block]: + """ + Replace elements by the given value. + + Parameters + ---------- + to_replace : object or pattern + Scalar to replace or regular expression to match. + value : object + Replacement object. + inplace : bool, default False + Perform inplace modification. + convert : bool, default True + If true, try to coerce any object types to better types. + mask : array-like of bool, optional + True indicate corresponding element is ignored. + + Returns + ------- + List[Block] + """ + if not self._can_hold_element(to_replace): + # i.e. only ObjectBlock, but could in principle include a + # String ExtensionBlock + return [self] if inplace else [self.copy()] + + rx = re.compile(to_replace) + + new_values = self.values if inplace else self.values.copy() + replace_regex(new_values, rx, value, mask) + + block = self.make_block(new_values) + return block.convert(numeric=False, copy=False) + + @final + def replace_list( + self, + src_list: Iterable[Any], + dest_list: Sequence[Any], + inplace: bool = False, + regex: bool = False, + ) -> list[Block]: + """ + See BlockManager.replace_list docstring. + """ + values = self.values + + # Exclude anything that we know we won't contain + pairs = [ + (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x) + ] + if not len(pairs): + # shortcut, nothing to replace + return [self] if inplace else [self.copy()] + + src_len = len(pairs) - 1 + + if is_string_dtype(values.dtype): + # Calculate the mask once, prior to the call of comp + # in order to avoid repeating the same computations + mask = ~isna(values) + masks = [ + compare_or_regex_search(values, s[0], regex=regex, mask=mask) + for s in pairs + ] + else: + # GH#38086 faster if we know we dont need to check for regex + masks = [missing.mask_missing(values, s[0]) for s in pairs] + + # error: Argument 1 to "extract_bool_array" has incompatible type + # "Union[ExtensionArray, ndarray, bool]"; expected "Union[ExtensionArray, + # ndarray]" + masks = [extract_bool_array(x) for x in masks] # type: ignore[arg-type] + + rb = [self if inplace else self.copy()] + for i, (src, dest) in enumerate(pairs): + convert = i == src_len # only convert once at the end + new_rb: list[Block] = [] + + # GH-39338: _replace_coerce can split a block into + # single-column blocks, so track the index so we know + # where to index into the mask + for blk_num, blk in enumerate(rb): + if len(rb) == 1: + m = masks[i] + else: + mib = masks[i] + assert not isinstance(mib, bool) + m = mib[blk_num : blk_num + 1] + + result = blk._replace_coerce( + to_replace=src, + value=dest, + mask=m, + inplace=inplace, + regex=regex, + ) + if convert and blk.is_object and not all(x is None for x in dest_list): + # GH#44498 avoid unwanted cast-back + result = extend_blocks( + [b.convert(numeric=False, copy=True) for b in result] + ) + new_rb.extend(result) + rb = new_rb + return rb + + @final + def _replace_coerce( + self, + to_replace, + value, + mask: np.ndarray, + inplace: bool = True, + regex: bool = False, + ) -> list[Block]: + """ + Replace value corresponding to the given boolean array with another + value. + + Parameters + ---------- + to_replace : object or pattern + Scalar to replace or regular expression to match. + value : object + Replacement object. + mask : np.ndarray[bool] + True indicate corresponding element is ignored. + inplace : bool, default True + Perform inplace modification. + regex : bool, default False + If true, perform regular expression substitution. + + Returns + ------- + List[Block] + """ + if should_use_regex(regex, to_replace): + return self._replace_regex( + to_replace, + value, + inplace=inplace, + convert=False, + mask=mask, + ) + else: + return self.replace( + to_replace=to_replace, value=value, inplace=inplace, mask=mask + ) + + # --------------------------------------------------------------------- + + def _maybe_squeeze_arg(self, arg: np.ndarray) -> np.ndarray: + """ + For compatibility with 1D-only ExtensionArrays. + """ + return arg + + def setitem(self, indexer, value): + """ + Attempt self.values[indexer] = value, possibly creating a new array. + + Parameters + ---------- + indexer : tuple, list-like, array-like, slice, int + The subset of self.values to set + value : object + The value being set + + Returns + ------- + Block + + Notes + ----- + `indexer` is a direct slice/positional indexer. `value` must + be a compatible shape. + """ + transpose = self.ndim == 2 + + if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim: + raise ValueError(f"Cannot set values with ndim > {self.ndim}") + + # coerce None values, if appropriate + if value is None: + if self.is_numeric: + value = np.nan + + # coerce if block dtype can store value + values = cast(np.ndarray, self.values) + if not self._can_hold_element(value): + # current dtype cannot store value, coerce to common dtype + return self.coerce_to_target_dtype(value).setitem(indexer, value) + + # value must be storable at this moment + if is_extension_array_dtype(getattr(value, "dtype", None)): + # We need to be careful not to allow through strings that + # can be parsed to EADtypes + arr_value = value + else: + arr_value = np.asarray(value) + + if transpose: + values = values.T + + # length checking + check_setitem_lengths(indexer, value, values) + + if is_empty_indexer(indexer, arr_value): + # GH#8669 empty indexers, test_loc_setitem_boolean_mask_allfalse + pass + + elif is_scalar_indexer(indexer, self.ndim): + # setting a single element for each dim and with a rhs that could + # be e.g. a list; see GH#6043 + values[indexer] = value + + else: + value = setitem_datetimelike_compat(values, len(values[indexer]), value) + values[indexer] = value + + return self + + def putmask(self, mask, new) -> list[Block]: + """ + putmask the data to the block; it is possible that we may create a + new dtype of block + + Return the resulting block(s). + + Parameters + ---------- + mask : np.ndarray[bool], SparseArray[bool], or BooleanArray + new : a ndarray/object + + Returns + ------- + List[Block] + """ + orig_mask = mask + values = cast(np.ndarray, self.values) + mask, noop = validate_putmask(values.T, mask) + assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame)) + + if new is lib.no_default: + new = self.fill_value + + # if we are passed a scalar None, convert it here + if not self.is_object and is_valid_na_for_dtype(new, self.dtype): + new = self.fill_value + + if self._can_hold_element(new): + putmask_without_repeat(values.T, mask, new) + return [self] + + elif np_version_under1p20 and infer_dtype_from(new)[0].kind in ["m", "M"]: + # using putmask with object dtype will incorrectly cast to object + # Having excluded self._can_hold_element, we know we cannot operate + # in-place, so we are safe using `where` + return self.where(new, ~mask) + + elif noop: + return [self] + + elif self.ndim == 1 or self.shape[0] == 1: + # no need to split columns + + if not is_list_like(new): + # putmask_smart can't save us the need to cast + return self.coerce_to_target_dtype(new).putmask(mask, new) + + # This differs from + # `self.coerce_to_target_dtype(new).putmask(mask, new)` + # because putmask_smart will check if new[mask] may be held + # by our dtype. + nv = putmask_smart(values.T, mask, new).T + return [self.make_block(nv)] + + else: + is_array = isinstance(new, np.ndarray) + + res_blocks = [] + nbs = self._split() + for i, nb in enumerate(nbs): + n = new + if is_array: + # we have a different value per-column + n = new[:, i : i + 1] + + submask = orig_mask[:, i : i + 1] + rbs = nb.putmask(submask, n) + res_blocks.extend(rbs) + return res_blocks + + @final + def coerce_to_target_dtype(self, other) -> Block: + """ + coerce the current block to a dtype compat for other + we will return a block, possibly object, and not raise + + we can also safely try to coerce to the same dtype + and will receive the same block + """ + # if we cannot then coerce to object + dtype, _ = infer_dtype_from(other, pandas_dtype=True) + + new_dtype = find_common_type([self.dtype, dtype]) + + return self.astype(new_dtype, copy=False) + + def interpolate( + self, + method: str = "pad", + axis: int = 0, + index: Index | None = None, + inplace: bool = False, + limit: int | None = None, + limit_direction: str = "forward", + limit_area: str | None = None, + fill_value: Any | None = None, + coerce: bool = False, + downcast: str | None = None, + **kwargs, + ) -> list[Block]: + + inplace = validate_bool_kwarg(inplace, "inplace") + + if not self._can_hold_na: + # If there are no NAs, then interpolate is a no-op + return [self] if inplace else [self.copy()] + + if self.is_object and self.ndim == 2 and self.shape[0] != 1 and axis == 0: + # split improves performance in ndarray.copy() + return self.split_and_operate( + type(self).interpolate, + method, + axis, + index, + inplace, + limit, + limit_direction, + limit_area, + fill_value, + coerce, + downcast, + **kwargs, + ) + + try: + m = missing.clean_fill_method(method) + except ValueError: + m = None + if m is None and self.dtype.kind != "f": + # only deal with floats + # bc we already checked that can_hold_na, we dont have int dtype here + # TODO: make a copy if not inplace? + return [self] + + data = self.values if inplace else self.values.copy() + data = cast(np.ndarray, data) # bc overridden by ExtensionBlock + + missing.interpolate_array_2d( + data, + method=method, + axis=axis, + index=index, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + fill_value=fill_value, + **kwargs, + ) + + nb = self.make_block_same_class(data) + return nb._maybe_downcast([nb], downcast) + + def take_nd( + self, + indexer, + axis: int, + new_mgr_locs: BlockPlacement | None = None, + fill_value=lib.no_default, + ) -> Block: + """ + Take values according to indexer and return them as a block.bb + + """ + # algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock + # so need to preserve types + # sparse is treated like an ndarray, but needs .get_values() shaping + + values = self.values + + if fill_value is lib.no_default: + fill_value = self.fill_value + allow_fill = False + else: + allow_fill = True + + new_values = algos.take_nd( + values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value + ) + + # Called from three places in managers, all of which satisfy + # this assertion + assert not (axis == 0 and new_mgr_locs is None) + if new_mgr_locs is None: + new_mgr_locs = self._mgr_locs + + if not is_dtype_equal(new_values.dtype, self.dtype): + return self.make_block(new_values, new_mgr_locs) + else: + return self.make_block_same_class(new_values, new_mgr_locs) + + def diff(self, n: int, axis: int = 1) -> list[Block]: + """return block for the diff of the values""" + new_values = algos.diff(self.values, n, axis=axis) + return [self.make_block(values=new_values)] + + def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: + """shift the block by periods, possibly upcast""" + # convert integer to float if necessary. need to do a lot more than + # that, handle boolean etc also + + values = cast(np.ndarray, self.values) + + new_values, fill_value = maybe_upcast(values, fill_value) + + new_values = shift(new_values, periods, axis, fill_value) + + return [self.make_block(new_values)] + + def where(self, other, cond) -> list[Block]: + """ + evaluate the block; return result block(s) from the result + + Parameters + ---------- + other : a ndarray/object + cond : np.ndarray[bool], SparseArray[bool], or BooleanArray + + Returns + ------- + List[Block] + """ + assert cond.ndim == self.ndim + assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame)) + + transpose = self.ndim == 2 + + # EABlocks override where + values = cast(np.ndarray, self.values) + orig_other = other + if transpose: + values = values.T + + icond, noop = validate_putmask(values, ~cond) + if noop: + # GH-39595: Always return a copy; short-circuit up/downcasting + return self.copy() + + if other is lib.no_default: + other = self.fill_value + + if is_valid_na_for_dtype(other, self.dtype) and self.dtype != _dtype_obj: + other = self.fill_value + + if not self._can_hold_element(other): + # we cannot coerce, return a compat dtype + block = self.coerce_to_target_dtype(other) + blocks = block.where(orig_other, cond) + return self._maybe_downcast(blocks, "infer") + + else: + alt = setitem_datetimelike_compat(values, icond.sum(), other) + if alt is not other: + if is_list_like(other) and len(other) < len(values): + # call np.where with other to get the appropriate ValueError + np.where(~icond, values, other) + raise NotImplementedError( + "This should not be reached; call to np.where above is " + "expected to raise ValueError. Please report a bug at " + "github.com/pandas-dev/pandas" + ) + result = values.copy() + np.putmask(result, icond, alt) + else: + # By the time we get here, we should have all Series/Index + # args extracted to ndarray + if ( + is_list_like(other) + and not isinstance(other, np.ndarray) + and len(other) == self.shape[-1] + ): + # If we don't do this broadcasting here, then expressions.where + # will broadcast a 1D other to be row-like instead of + # column-like. + other = np.array(other).reshape(values.shape) + # If lengths don't match (or len(other)==1), we will raise + # inside expressions.where, see test_series_where + + # Note: expressions.where may upcast. + result = expressions.where(~icond, values, other) + + if self._can_hold_na or self.ndim == 1: + + if transpose: + result = result.T + + return [self.make_block(result)] + + # might need to separate out blocks + cond = ~icond + axis = cond.ndim - 1 + cond = cond.swapaxes(axis, 0) + mask = cond.all(axis=1) + + result_blocks: list[Block] = [] + for m in [mask, ~mask]: + if m.any(): + taken = result.take(m.nonzero()[0], axis=axis) + r = maybe_downcast_numeric(taken, self.dtype) + if r.dtype != taken.dtype: + warnings.warn( + "Downcasting integer-dtype results in .where is " + "deprecated and will change in a future version. " + "To retain the old behavior, explicitly cast the results " + "to the desired dtype.", + FutureWarning, + stacklevel=find_stack_level(), + ) + nb = self.make_block(r.T, placement=self._mgr_locs[m]) + result_blocks.append(nb) + + return result_blocks + + def _unstack( + self, + unstacker, + fill_value, + new_placement: npt.NDArray[np.intp], + needs_masking: npt.NDArray[np.bool_], + ): + """ + Return a list of unstacked blocks of self + + Parameters + ---------- + unstacker : reshape._Unstacker + fill_value : int + Only used in ExtensionBlock._unstack + new_placement : np.ndarray[np.intp] + allow_fill : bool + needs_masking : np.ndarray[bool] + + Returns + ------- + blocks : list of Block + New blocks of unstacked values. + mask : array-like of bool + The mask of columns of `blocks` we should keep. + """ + new_values, mask = unstacker.get_new_values( + self.values.T, fill_value=fill_value + ) + + mask = mask.any(0) + # TODO: in all tests we have mask.all(); can we rely on that? + + # Note: these next two lines ensure that + # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks) + # which the calling function needs in order to pass verify_integrity=False + # to the BlockManager constructor + new_values = new_values.T[mask] + new_placement = new_placement[mask] + + bp = BlockPlacement(new_placement) + blocks = [new_block_2d(new_values, placement=bp)] + return blocks, mask + + @final + def quantile( + self, qs: Float64Index, interpolation="linear", axis: int = 0 + ) -> Block: + """ + compute the quantiles of the + + Parameters + ---------- + qs : Float64Index + List of the quantiles to be computed. + interpolation : str, default 'linear' + Type of interpolation. + axis : int, default 0 + Axis to compute. + + Returns + ------- + Block + """ + # We should always have ndim == 2 because Series dispatches to DataFrame + assert self.ndim == 2 + assert axis == 1 # only ever called this way + assert is_list_like(qs) # caller is responsible for this + + result = quantile_compat(self.values, np.asarray(qs._values), interpolation) + # ensure_block_shape needed for cases where we start with EA and result + # is ndarray, e.g. IntegerArray, SparseArray + result = ensure_block_shape(result, ndim=2) + return new_block_2d(result, placement=self._mgr_locs) + + +class EABackedBlock(Block): + """ + Mixin for Block subclasses backed by ExtensionArray. + """ + + values: ExtensionArray + + def where(self, other, cond) -> list[Block]: + arr = self.values.T + + cond = extract_bool_array(cond) + + other = self._maybe_squeeze_arg(other) + cond = self._maybe_squeeze_arg(cond) + + if other is lib.no_default: + other = self.fill_value + + icond, noop = validate_putmask(arr, ~cond) + if noop: + # GH#44181, GH#45135 + # Avoid a) raising for Interval/PeriodDtype and b) unnecessary object upcast + return self.copy() + + try: + res_values = arr._where(cond, other).T + except (ValueError, TypeError) as err: + _catch_deprecated_value_error(err) + + if is_interval_dtype(self.dtype): + # TestSetitemFloatIntervalWithIntIntervalValues + blk = self.coerce_to_target_dtype(other) + if blk.dtype == _dtype_obj: + # For now at least only support casting e.g. + # Interval[int64]->Interval[float64] + raise + return blk.where(other, cond) + + elif isinstance(self, NDArrayBackedExtensionBlock): + # NB: not (yet) the same as + # isinstance(values, NDArrayBackedExtensionArray) + if isinstance(self.dtype, PeriodDtype): + # TODO: don't special-case + raise + blk = self.coerce_to_target_dtype(other) + nbs = blk.where(other, cond) + return self._maybe_downcast(nbs, "infer") + + else: + raise + + nb = self.make_block_same_class(res_values) + return [nb] + + def putmask(self, mask, new) -> list[Block]: + """ + See Block.putmask.__doc__ + """ + mask = extract_bool_array(mask) + + values = self.values + + mask = self._maybe_squeeze_arg(mask) + + try: + # Caller is responsible for ensuring matching lengths + values._putmask(mask, new) + except (TypeError, ValueError) as err: + _catch_deprecated_value_error(err) + + if is_interval_dtype(self.dtype): + # Discussion about what we want to support in the general + # case GH#39584 + blk = self.coerce_to_target_dtype(new) + if blk.dtype == _dtype_obj: + # For now at least, only support casting e.g. + # Interval[int64]->Interval[float64], + raise + return blk.putmask(mask, new) + + elif isinstance(self, NDArrayBackedExtensionBlock): + # NB: not (yet) the same as + # isinstance(values, NDArrayBackedExtensionArray) + if isinstance(self.dtype, PeriodDtype): + # TODO: don't special-case + raise + blk = self.coerce_to_target_dtype(new) + return blk.putmask(mask, new) + + else: + raise + + return [self] + + def delete(self, loc) -> None: + """ + Delete given loc(-s) from block in-place. + """ + # This will be unnecessary if/when __array_function__ is implemented + self.values = self.values.delete(loc) + self.mgr_locs = self._mgr_locs.delete(loc) + try: + self._cache.clear() + except AttributeError: + # _cache not yet initialized + pass + + @cache_readonly + def array_values(self) -> ExtensionArray: + return self.values + + def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: + """ + return object dtype as boxed values, such as Timestamps/Timedelta + """ + values: ArrayLike = self.values + if dtype == _dtype_obj: + values = values.astype(object) + # TODO(EA2D): reshape not needed with 2D EAs + return np.asarray(values).reshape(self.shape) + + def values_for_json(self) -> np.ndarray: + return np.asarray(self.values) + + def interpolate( + self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs + ): + values = self.values + if values.ndim == 2 and axis == 0: + # NDArrayBackedExtensionArray.fillna assumes axis=1 + new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T + else: + new_values = values.fillna(value=fill_value, method=method, limit=limit) + return self.make_block_same_class(new_values) + + +class ExtensionBlock(libinternals.Block, EABackedBlock): + """ + Block for holding extension types. + + Notes + ----- + This holds all 3rd-party extension array types. It's also the immediate + parent class for our internal extension types' blocks, CategoricalBlock. + + ExtensionArrays are limited to 1-D. + """ + + _can_consolidate = False + _validate_ndim = False + is_extension = True + + values: ExtensionArray + + @cache_readonly + def shape(self) -> Shape: + # TODO(EA2D): override unnecessary with 2D EAs + if self.ndim == 1: + return (len(self.values),) + return len(self._mgr_locs), len(self.values) + + def iget(self, i: int | tuple[int, int] | tuple[slice, int]): + # In the case where we have a tuple[slice, int], the slice will always + # be slice(None) + # We _could_ make the annotation more specific, but mypy would + # complain about override mismatch: + # Literal[0] | tuple[Literal[0], int] | tuple[slice, int] + + # Note: only reached with self.ndim == 2 + + if isinstance(i, tuple): + # TODO(EA2D): unnecessary with 2D EAs + col, loc = i + if not com.is_null_slice(col) and col != 0: + raise IndexError(f"{self} only contains one item") + elif isinstance(col, slice): + if col != slice(None): + raise NotImplementedError(col) + return self.values[[loc]] + return self.values[loc] + else: + if i != 0: + raise IndexError(f"{self} only contains one item") + return self.values + + def set_inplace(self, locs, values) -> None: + # NB: This is a misnomer, is supposed to be inplace but is not, + # see GH#33457 + # When an ndarray, we should have locs.tolist() == [0] + # When a BlockPlacement we should have list(locs) == [0] + self.values = values + try: + # TODO(GH33457) this can be removed + self._cache.clear() + except AttributeError: + # _cache not yet initialized + pass + + def _maybe_squeeze_arg(self, arg): + """ + If necessary, squeeze a (N, 1) ndarray to (N,) + """ + # e.g. if we are passed a 2D mask for putmask + if isinstance(arg, np.ndarray) and arg.ndim == self.values.ndim + 1: + # TODO(EA2D): unnecessary with 2D EAs + assert arg.shape[1] == 1 + arg = arg[:, 0] + return arg + + @property + def is_view(self) -> bool: + """Extension arrays are never treated as views.""" + return False + + @cache_readonly + def is_numeric(self): + return self.values.dtype._is_numeric + + def setitem(self, indexer, value): + """ + Attempt self.values[indexer] = value, possibly creating a new array. + + This differs from Block.setitem by not allowing setitem to change + the dtype of the Block. + + Parameters + ---------- + indexer : tuple, list-like, array-like, slice, int + The subset of self.values to set + value : object + The value being set + + Returns + ------- + Block + + Notes + ----- + `indexer` is a direct slice/positional indexer. `value` must + be a compatible shape. + """ + if not self._can_hold_element(value): + # see TestSetitemFloatIntervalWithIntIntervalValues + return self.coerce_to_target_dtype(value).setitem(indexer, value) + + if isinstance(indexer, tuple): + # TODO(EA2D): not needed with 2D EAs + # we are always 1-D + indexer = indexer[0] + if isinstance(indexer, np.ndarray) and indexer.ndim == 2: + # GH#44703 + if indexer.shape[1] != 1: + raise NotImplementedError( + "This should not be reached. Please report a bug at " + "github.com/pandas-dev/pandas/" + ) + indexer = indexer[:, 0] + + # TODO(EA2D): not needed with 2D EAS + if isinstance(value, (np.ndarray, ExtensionArray)) and value.ndim == 2: + assert value.shape[1] == 1 + # error: No overload variant of "__getitem__" of "ExtensionArray" + # matches argument type "Tuple[slice, int]" + value = value[:, 0] # type: ignore[call-overload] + elif isinstance(value, ABCDataFrame): + # TODO: should we avoid getting here with DataFrame? + assert value.shape[1] == 1 + value = value._ixs(0, axis=1)._values + + check_setitem_lengths(indexer, value, self.values) + self.values[indexer] = value + return self + + def take_nd( + self, + indexer, + axis: int = 0, + new_mgr_locs: BlockPlacement | None = None, + fill_value=lib.no_default, + ) -> Block: + """ + Take values according to indexer and return them as a block. + """ + if fill_value is lib.no_default: + fill_value = None + + # TODO(EA2D): special case not needed with 2D EAs + # axis doesn't matter; we are really a single-dim object + # but are passed the axis depending on the calling routing + # if its REALLY axis 0, then this will be a reindex and not a take + new_values = self.values.take(indexer, fill_value=fill_value, allow_fill=True) + + # Called from three places in managers, all of which satisfy + # this assertion + assert not (self.ndim == 1 and new_mgr_locs is None) + if new_mgr_locs is None: + new_mgr_locs = self._mgr_locs + + return self.make_block_same_class(new_values, new_mgr_locs) + + def _slice(self, slicer) -> ExtensionArray: + """ + Return a slice of my values. + + Parameters + ---------- + slicer : slice, ndarray[int], or a tuple of these + Valid (non-reducing) indexer for self.values. + + Returns + ------- + ExtensionArray + """ + # return same dims as we currently have + if not isinstance(slicer, tuple) and self.ndim == 2: + # reached via getitem_block via _slice_take_blocks_ax0 + # TODO(EA2D): won't be necessary with 2D EAs + slicer = (slicer, slice(None)) + + if isinstance(slicer, tuple) and len(slicer) == 2: + first = slicer[0] + if not isinstance(first, slice): + raise AssertionError( + "invalid slicing for a 1-ndim ExtensionArray", first + ) + # GH#32959 only full-slicers along fake-dim0 are valid + # TODO(EA2D): won't be necessary with 2D EAs + # range(1) instead of self._mgr_locs to avoid exception on [::-1] + # see test_iloc_getitem_slice_negative_step_ea_block + new_locs = range(1)[first] + if len(new_locs): + # effectively slice(None) + slicer = slicer[1] + else: + raise AssertionError( + "invalid slicing for a 1-ndim ExtensionArray", slicer + ) + + return self.values[slicer] + + @final + def getitem_block_index(self, slicer: slice) -> ExtensionBlock: + """ + Perform __getitem__-like specialized to slicing along index. + """ + # GH#42787 in principle this is equivalent to values[..., slicer], but we don't + # require subclasses of ExtensionArray to support that form (for now). + new_values = self.values[slicer] + return type(self)(new_values, self._mgr_locs, ndim=self.ndim) + + def fillna( + self, value, limit=None, inplace: bool = False, downcast=None + ) -> list[Block]: + values = self.values.fillna(value=value, limit=limit) + return [self.make_block_same_class(values=values)] + + def diff(self, n: int, axis: int = 1) -> list[Block]: + if axis == 0 and n != 0: + # n==0 case will be a no-op so let is fall through + # Since we only have one column, the result will be all-NA. + # Create this result by shifting along axis=0 past the length of + # our values. + return super().diff(len(self.values), axis=0) + if axis == 1: + # TODO(EA2D): unnecessary with 2D EAs + # we are by definition 1D. + axis = 0 + return super().diff(n, axis) + + def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: + """ + Shift the block by `periods`. + + Dispatches to underlying ExtensionArray and re-boxes in an + ExtensionBlock. + """ + new_values = self.values.shift(periods=periods, fill_value=fill_value) + return [self.make_block_same_class(new_values)] + + def _unstack( + self, + unstacker, + fill_value, + new_placement: npt.NDArray[np.intp], + needs_masking: npt.NDArray[np.bool_], + ): + # ExtensionArray-safe unstack. + # We override ObjectBlock._unstack, which unstacks directly on the + # values of the array. For EA-backed blocks, this would require + # converting to a 2-D ndarray of objects. + # Instead, we unstack an ndarray of integer positions, followed by + # a `take` on the actual values. + + # Caller is responsible for ensuring self.shape[-1] == len(unstacker.index) + new_values, mask = unstacker.arange_result + + # Note: these next two lines ensure that + # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks) + # which the calling function needs in order to pass verify_integrity=False + # to the BlockManager constructor + new_values = new_values.T[mask] + new_placement = new_placement[mask] + + # needs_masking[i] calculated once in BlockManager.unstack tells + # us if there are any -1s in the relevant indices. When False, + # that allows us to go through a faster path in 'take', among + # other things avoiding e.g. Categorical._validate_scalar. + blocks = [ + # TODO: could cast to object depending on fill_value? + type(self)( + self.values.take( + indices, allow_fill=needs_masking[i], fill_value=fill_value + ), + BlockPlacement(place), + ndim=2, + ) + for i, (indices, place) in enumerate(zip(new_values, new_placement)) + ] + return blocks, mask + + +class NumpyBlock(libinternals.NumpyBlock, Block): + values: np.ndarray + + +class NumericBlock(NumpyBlock): + __slots__ = () + is_numeric = True + + +class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock): + """ + Block backed by an NDArrayBackedExtensionArray + """ + + values: NDArrayBackedExtensionArray + + # error: Signature of "is_extension" incompatible with supertype "Block" + @cache_readonly + def is_extension(self) -> bool: # type: ignore[override] + # i.e. datetime64tz, PeriodDtype + return not isinstance(self.dtype, np.dtype) + + @property + def is_view(self) -> bool: + """return a boolean if I am possibly a view""" + # check the ndarray values of the DatetimeIndex values + return self.values._ndarray.base is not None + + def setitem(self, indexer, value): + if not self._can_hold_element(value): + return self.coerce_to_target_dtype(value).setitem(indexer, value) + + values = self.values + if self.ndim > 1: + # Dont transpose with ndim=1 bc we would fail to invalidate + # arr.freq + values = values.T + values[indexer] = value + return self + + def diff(self, n: int, axis: int = 0) -> list[Block]: + """ + 1st discrete difference. + + Parameters + ---------- + n : int + Number of periods to diff. + axis : int, default 0 + Axis to diff upon. + + Returns + ------- + A list with a new Block. + + Notes + ----- + The arguments here are mimicking shift so they are called correctly + by apply. + """ + values = self.values + + new_values = values - values.shift(n, axis=axis) + return [self.make_block(new_values)] + + def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: + values = self.values + new_values = values.shift(periods, fill_value=fill_value, axis=axis) + return [self.make_block_same_class(new_values)] + + def fillna( + self, value, limit=None, inplace: bool = False, downcast=None + ) -> list[Block]: + + if not self._can_hold_element(value) and self.dtype.kind != "m": + # We support filling a DatetimeTZ with a `value` whose timezone + # is different by coercing to object. + # TODO: don't special-case td64 + return self.coerce_to_target_dtype(value).fillna( + value, limit, inplace, downcast + ) + + new_values = self.values.fillna(value=value, limit=limit) + return [self.make_block_same_class(values=new_values)] + + +def _catch_deprecated_value_error(err: Exception) -> None: + """ + We catch ValueError for now, but only a specific one raised by DatetimeArray + which will no longer be raised in version.2.0. + """ + if isinstance(err, ValueError): + # TODO(2.0): once DTA._validate_setitem_value deprecation + # is enforced, stop catching ValueError here altogether + if "Timezones don't match" not in str(err): + raise + + +class DatetimeLikeBlock(NDArrayBackedExtensionBlock): + """Block for datetime64[ns], timedelta64[ns].""" + + __slots__ = () + is_numeric = False + values: DatetimeArray | TimedeltaArray + + def values_for_json(self) -> np.ndarray: + # special casing datetimetz to avoid conversion through + # object dtype + return self.values._ndarray + + +class DatetimeTZBlock(DatetimeLikeBlock): + """implement a datetime64 block with a tz attribute""" + + values: DatetimeArray + + __slots__ = () + is_extension = True + _validate_ndim = True + _can_consolidate = False + + +class ObjectBlock(NumpyBlock): + __slots__ = () + is_object = True + + @maybe_split + def reduce(self, func, ignore_failures: bool = False) -> list[Block]: + """ + For object-dtype, we operate column-wise. + """ + assert self.ndim == 2 + + try: + res = func(self.values) + except TypeError: + if not ignore_failures: + raise + return [] + + assert isinstance(res, np.ndarray) + assert res.ndim == 1 + res = res.reshape(1, -1) + return [self.make_block_same_class(res)] + + @maybe_split + def convert( + self, + copy: bool = True, + datetime: bool = True, + numeric: bool = True, + timedelta: bool = True, + ) -> list[Block]: + """ + attempt to cast any object types to better types return a copy of + the block (if copy = True) by definition we ARE an ObjectBlock!!!!! + """ + values = self.values + if values.ndim == 2: + # maybe_split ensures we only get here with values.shape[0] == 1, + # avoid doing .ravel as that might make a copy + values = values[0] + + res_values = soft_convert_objects( + values, + datetime=datetime, + numeric=numeric, + timedelta=timedelta, + copy=copy, + ) + res_values = ensure_block_shape(res_values, self.ndim) + return [self.make_block(res_values)] + + +class CategoricalBlock(ExtensionBlock): + # this Block type is kept for backwards-compatibility + __slots__ = () + + # GH#43232, GH#43334 self.values.dtype can be changed inplace until 2.0, + # so this cannot be cached + @property + def dtype(self) -> DtypeObj: + return self.values.dtype + + +# ----------------------------------------------------------------- +# Constructor Helpers + + +def maybe_coerce_values(values: ArrayLike) -> ArrayLike: + """ + Input validation for values passed to __init__. Ensure that + any datetime64/timedelta64 dtypes are in nanoseconds. Ensure + that we do not have string dtypes. + + Parameters + ---------- + values : np.ndarray or ExtensionArray + + Returns + ------- + values : np.ndarray or ExtensionArray + """ + # Caller is responsible for ensuring PandasArray is already extracted. + + if isinstance(values, np.ndarray): + values = ensure_wrapped_if_datetimelike(values) + + if issubclass(values.dtype.type, str): + values = np.array(values, dtype=object) + + if isinstance(values, (DatetimeArray, TimedeltaArray)) and values.freq is not None: + # freq is only stored in DatetimeIndex/TimedeltaIndex, not in Series/DataFrame + values = values._with_freq(None) + + return values + + +def get_block_type(dtype: DtypeObj): + """ + Find the appropriate Block subclass to use for the given values and dtype. + + Parameters + ---------- + dtype : numpy or pandas dtype + + Returns + ------- + cls : class, subclass of Block + """ + # We use vtype and kind checks because they are much more performant + # than is_foo_dtype + vtype = dtype.type + kind = dtype.kind + + cls: type[Block] + + if isinstance(dtype, SparseDtype): + # Need this first(ish) so that Sparse[datetime] is sparse + cls = ExtensionBlock + elif isinstance(dtype, CategoricalDtype): + cls = CategoricalBlock + elif vtype is Timestamp: + cls = DatetimeTZBlock + elif isinstance(dtype, PeriodDtype): + cls = NDArrayBackedExtensionBlock + elif isinstance(dtype, ExtensionDtype): + # Note: need to be sure PandasArray is unwrapped before we get here + cls = ExtensionBlock + + elif kind in ["M", "m"]: + cls = DatetimeLikeBlock + elif kind in ["f", "c", "i", "u", "b"]: + cls = NumericBlock + else: + cls = ObjectBlock + return cls + + +def new_block_2d(values: ArrayLike, placement: BlockPlacement): + # new_block specialized to case with + # ndim=2 + # isinstance(placement, BlockPlacement) + # check_ndim/ensure_block_shape already checked + klass = get_block_type(values.dtype) + + values = maybe_coerce_values(values) + return klass(values, ndim=2, placement=placement) + + +def new_block(values, placement, *, ndim: int) -> Block: + # caller is responsible for ensuring values is NOT a PandasArray + + if not isinstance(placement, BlockPlacement): + placement = BlockPlacement(placement) + + check_ndim(values, placement, ndim) + + klass = get_block_type(values.dtype) + + values = maybe_coerce_values(values) + return klass(values, ndim=ndim, placement=placement) + + +def check_ndim(values, placement: BlockPlacement, ndim: int): + """ + ndim inference and validation. + + Validates that values.ndim and ndim are consistent. + Validates that len(values) and len(placement) are consistent. + + Parameters + ---------- + values : array-like + placement : BlockPlacement + ndim : int + + Raises + ------ + ValueError : the number of dimensions do not match + """ + + if values.ndim > ndim: + # Check for both np.ndarray and ExtensionArray + raise ValueError( + "Wrong number of dimensions. " + f"values.ndim > ndim [{values.ndim} > {ndim}]" + ) + + elif not is_1d_only_ea_dtype(values.dtype): + # TODO(EA2D): special case not needed with 2D EAs + if values.ndim != ndim: + raise ValueError( + "Wrong number of dimensions. " + f"values.ndim != ndim [{values.ndim} != {ndim}]" + ) + if len(placement) != len(values): + raise ValueError( + f"Wrong number of items passed {len(values)}, " + f"placement implies {len(placement)}" + ) + elif ndim == 2 and len(placement) != 1: + # TODO(EA2D): special case unnecessary with 2D EAs + raise ValueError("need to split") + + +def extract_pandas_array( + values: np.ndarray | ExtensionArray, dtype: DtypeObj | None, ndim: int +) -> tuple[np.ndarray | ExtensionArray, DtypeObj | None]: + """ + Ensure that we don't allow PandasArray / PandasDtype in internals. + """ + # For now, blocks should be backed by ndarrays when possible. + if isinstance(values, ABCPandasArray): + values = values.to_numpy() + if ndim and ndim > 1: + # TODO(EA2D): special case not needed with 2D EAs + values = np.atleast_2d(values) + + if isinstance(dtype, PandasDtype): + dtype = dtype.numpy_dtype + + return values, dtype + + +# ----------------------------------------------------------------- + + +def extend_blocks(result, blocks=None) -> list[Block]: + """return a new extended blocks, given the result""" + if blocks is None: + blocks = [] + if isinstance(result, list): + for r in result: + if isinstance(r, list): + blocks.extend(r) + else: + blocks.append(r) + else: + assert isinstance(result, Block), type(result) + blocks.append(result) + return blocks + + +def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: + """ + Reshape if possible to have values.ndim == ndim. + """ + + if values.ndim < ndim: + if not is_1d_only_ea_dtype(values.dtype): + # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023 + # block.shape is incorrect for "2D" ExtensionArrays + # We can't, and don't need to, reshape. + values = cast("np.ndarray | DatetimeArray | TimedeltaArray", values) + values = values.reshape(1, -1) + + return values + + +def to_native_types( + values: ArrayLike, + *, + na_rep="nan", + quoting=None, + float_format=None, + decimal=".", + **kwargs, +) -> np.ndarray: + """convert to our native types format""" + if isinstance(values, Categorical): + # GH#40754 Convert categorical datetimes to datetime array + values = take_nd( + values.categories._values, + ensure_platform_int(values._codes), + fill_value=na_rep, + ) + + values = ensure_wrapped_if_datetimelike(values) + + if isinstance(values, (DatetimeArray, TimedeltaArray)): + if values.ndim == 1: + result = values._format_native_types(na_rep=na_rep, **kwargs) + result = result.astype(object, copy=False) + return result + + # GH#21734 Process every column separately, they might have different formats + results_converted = [] + for i in range(len(values)): + result = values[i, :]._format_native_types(na_rep=na_rep, **kwargs) + results_converted.append(result.astype(object, copy=False)) + return np.vstack(results_converted) + + elif isinstance(values, ExtensionArray): + mask = isna(values) + + new_values = np.asarray(values.astype(object)) + new_values[mask] = na_rep + return new_values + + elif values.dtype.kind == "f": + # see GH#13418: no special formatting is desired at the + # output (important for appropriate 'quoting' behaviour), + # so do not pass it through the FloatArrayFormatter + if float_format is None and decimal == ".": + mask = isna(values) + + if not quoting: + values = values.astype(str) + else: + values = np.array(values, dtype="object") + + values[mask] = na_rep + values = values.astype(object, copy=False) + return values + + from pandas.io.formats.format import FloatArrayFormatter + + formatter = FloatArrayFormatter( + values, + na_rep=na_rep, + float_format=float_format, + decimal=decimal, + quoting=quoting, + fixed_width=False, + ) + res = formatter.get_result_as_array() + res = res.astype(object, copy=False) + return res + + else: + + mask = isna(values) + itemsize = writers.word_len(na_rep) + + if values.dtype != _dtype_obj and not quoting and itemsize: + values = values.astype(str) + if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize: + # enlarge for the na_rep + values = values.astype(f" ArrayLike: + """ + The array that Series.values returns (public attribute). + + This has some historical constraints, and is overridden in block + subclasses to return the correct array (e.g. period returns + object ndarray and datetimetz a datetime64[ns] ndarray instead of + proper extension array). + """ + if isinstance(values, (PeriodArray, IntervalArray)): + return values.astype(object) + elif isinstance(values, (DatetimeArray, TimedeltaArray)): + # NB: for datetime64tz this is different from np.asarray(values), since + # that returns an object-dtype ndarray of Timestamps. + # Avoid FutureWarning in .astype in casting from dt64tz to dt64 + return values._data + else: + return values diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/concat.py b/.local/lib/python3.8/site-packages/pandas/core/internals/concat.py new file mode 100644 index 0000000..782842d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/internals/concat.py @@ -0,0 +1,652 @@ +from __future__ import annotations + +import itertools +from typing import ( + TYPE_CHECKING, + Sequence, + cast, +) + +import numpy as np + +from pandas._libs import ( + NaT, + internals as libinternals, +) +from pandas._typing import ( + ArrayLike, + DtypeObj, + Manager, + Shape, +) +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.cast import ( + ensure_dtype_can_hold_na, + find_common_type, +) +from pandas.core.dtypes.common import ( + is_1d_only_ea_dtype, + is_1d_only_ea_obj, + is_datetime64tz_dtype, + is_dtype_equal, +) +from pandas.core.dtypes.concat import ( + cast_to_common_type, + concat_compat, +) +from pandas.core.dtypes.dtypes import ExtensionDtype + +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, +) +from pandas.core.construction import ensure_wrapped_if_datetimelike +from pandas.core.internals.array_manager import ( + ArrayManager, + NullArrayProxy, +) +from pandas.core.internals.blocks import ( + ensure_block_shape, + new_block_2d, +) +from pandas.core.internals.managers import BlockManager + +if TYPE_CHECKING: + from pandas import Index + from pandas.core.internals.blocks import Block + + +def _concatenate_array_managers( + mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool +) -> Manager: + """ + Concatenate array managers into one. + + Parameters + ---------- + mgrs_indexers : list of (ArrayManager, {axis: indexer,...}) tuples + axes : list of Index + concat_axis : int + copy : bool + + Returns + ------- + ArrayManager + """ + # reindex all arrays + mgrs = [] + for mgr, indexers in mgrs_indexers: + axis1_made_copy = False + for ax, indexer in indexers.items(): + mgr = mgr.reindex_indexer( + axes[ax], indexer, axis=ax, allow_dups=True, use_na_proxy=True + ) + if ax == 1 and indexer is not None: + axis1_made_copy = True + if copy and concat_axis == 0 and not axis1_made_copy: + # for concat_axis 1 we will always get a copy through concat_arrays + mgr = mgr.copy() + mgrs.append(mgr) + + if concat_axis == 1: + # concatting along the rows -> concat the reindexed arrays + # TODO(ArrayManager) doesn't yet preserve the correct dtype + arrays = [ + concat_arrays([mgrs[i].arrays[j] for i in range(len(mgrs))]) + for j in range(len(mgrs[0].arrays)) + ] + else: + # concatting along the columns -> combine reindexed arrays in a single manager + assert concat_axis == 0 + arrays = list(itertools.chain.from_iterable([mgr.arrays for mgr in mgrs])) + + new_mgr = ArrayManager(arrays, [axes[1], axes[0]], verify_integrity=False) + return new_mgr + + +def concat_arrays(to_concat: list) -> ArrayLike: + """ + Alternative for concat_compat but specialized for use in the ArrayManager. + + Differences: only deals with 1D arrays (no axis keyword), assumes + ensure_wrapped_if_datetimelike and does not skip empty arrays to determine + the dtype. + In addition ensures that all NullArrayProxies get replaced with actual + arrays. + + Parameters + ---------- + to_concat : list of arrays + + Returns + ------- + np.ndarray or ExtensionArray + """ + # ignore the all-NA proxies to determine the resulting dtype + to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)] + + dtypes = {x.dtype for x in to_concat_no_proxy} + single_dtype = len(dtypes) == 1 + + if single_dtype: + target_dtype = to_concat_no_proxy[0].dtype + elif all(x.kind in ["i", "u", "b"] and isinstance(x, np.dtype) for x in dtypes): + # GH#42092 + target_dtype = np.find_common_type(list(dtypes), []) + else: + target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy]) + + if target_dtype.kind in ["m", "M"]: + # for datetimelike use DatetimeArray/TimedeltaArray concatenation + # don't use arr.astype(target_dtype, copy=False), because that doesn't + # work for DatetimeArray/TimedeltaArray (returns ndarray) + to_concat = [ + arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) else arr + for arr in to_concat + ] + return type(to_concat_no_proxy[0])._concat_same_type(to_concat, axis=0) + + to_concat = [ + arr.to_array(target_dtype) + if isinstance(arr, NullArrayProxy) + else cast_to_common_type(arr, target_dtype) + for arr in to_concat + ] + + if isinstance(to_concat[0], ExtensionArray): + cls = type(to_concat[0]) + return cls._concat_same_type(to_concat) + + result = np.concatenate(to_concat) + + # TODO decide on exact behaviour (we shouldn't do this only for empty result) + # see https://github.com/pandas-dev/pandas/issues/39817 + if len(result) == 0: + # all empties -> check for bool to not coerce to float + kinds = {obj.dtype.kind for obj in to_concat_no_proxy} + if len(kinds) != 1: + if "b" in kinds: + result = result.astype(object) + return result + + +def concatenate_managers( + mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool +) -> Manager: + """ + Concatenate block managers into one. + + Parameters + ---------- + mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples + axes : list of Index + concat_axis : int + copy : bool + + Returns + ------- + BlockManager + """ + # TODO(ArrayManager) this assumes that all managers are of the same type + if isinstance(mgrs_indexers[0][0], ArrayManager): + return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy) + + # Assertions disabled for performance + # for tup in mgrs_indexers: + # # caller is responsible for ensuring this + # indexers = tup[1] + # assert concat_axis not in indexers + + if concat_axis == 0: + return _concat_managers_axis0(mgrs_indexers, axes, copy) + + mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers) + + # Assertion disabled for performance + # assert all(not x[1] for x in mgrs_indexers) + + concat_plans = [_get_mgr_concatenation_plan(mgr) for mgr, _ in mgrs_indexers] + concat_plan = _combine_concat_plans(concat_plans) + blocks = [] + + for placement, join_units in concat_plan: + unit = join_units[0] + blk = unit.block + + if len(join_units) == 1: + values = blk.values + if copy: + values = values.copy() + else: + values = values.view() + fastpath = True + elif _is_uniform_join_units(join_units): + vals = [ju.block.values for ju in join_units] + + if not blk.is_extension: + # _is_uniform_join_units ensures a single dtype, so + # we can use np.concatenate, which is more performant + # than concat_compat + values = np.concatenate(vals, axis=1) + else: + # TODO(EA2D): special-casing not needed with 2D EAs + values = concat_compat(vals, axis=1) + values = ensure_block_shape(values, ndim=2) + + values = ensure_wrapped_if_datetimelike(values) + + fastpath = blk.values.dtype == values.dtype + else: + values = _concatenate_join_units(join_units, copy=copy) + fastpath = False + + if fastpath: + b = blk.make_block_same_class(values, placement=placement) + else: + b = new_block_2d(values, placement=placement) + + blocks.append(b) + + return BlockManager(tuple(blocks), axes) + + +def _concat_managers_axis0( + mgrs_indexers, axes: list[Index], copy: bool +) -> BlockManager: + """ + concat_managers specialized to concat_axis=0, with reindexing already + having been done in _maybe_reindex_columns_na_proxy. + """ + had_reindexers = { + i: len(mgrs_indexers[i][1]) > 0 for i in range(len(mgrs_indexers)) + } + mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers) + + mgrs = [x[0] for x in mgrs_indexers] + + offset = 0 + blocks = [] + for i, mgr in enumerate(mgrs): + # If we already reindexed, then we definitely don't need another copy + made_copy = had_reindexers[i] + + for blk in mgr.blocks: + if made_copy: + nb = blk.copy(deep=False) + elif copy: + nb = blk.copy() + else: + # by slicing instead of copy(deep=False), we get a new array + # object, see test_concat_copy + nb = blk.getitem_block(slice(None)) + nb._mgr_locs = nb._mgr_locs.add(offset) + blocks.append(nb) + + offset += len(mgr.items) + return BlockManager(tuple(blocks), axes) + + +def _maybe_reindex_columns_na_proxy( + axes: list[Index], mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]] +) -> list[tuple[BlockManager, dict[int, np.ndarray]]]: + """ + Reindex along columns so that all of the BlockManagers being concatenated + have matching columns. + + Columns added in this reindexing have dtype=np.void, indicating they + should be ignored when choosing a column's final dtype. + """ + new_mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]] = [] + + for mgr, indexers in mgrs_indexers: + # For axis=0 (i.e. columns) we use_na_proxy and only_slice, so this + # is a cheap reindexing. + for i, indexer in indexers.items(): + mgr = mgr.reindex_indexer( + axes[i], + indexers[i], + axis=i, + copy=False, + only_slice=True, # only relevant for i==0 + allow_dups=True, + use_na_proxy=True, # only relevant for i==0 + ) + new_mgrs_indexers.append((mgr, {})) + + return new_mgrs_indexers + + +def _get_mgr_concatenation_plan(mgr: BlockManager): + """ + Construct concatenation plan for given block manager. + + Parameters + ---------- + mgr : BlockManager + + Returns + ------- + plan : list of (BlockPlacement, JoinUnit) tuples + + """ + # Calculate post-reindex shape , save for item axis which will be separate + # for each block anyway. + mgr_shape_list = list(mgr.shape) + mgr_shape = tuple(mgr_shape_list) + + if mgr.is_single_block: + blk = mgr.blocks[0] + return [(blk.mgr_locs, JoinUnit(blk, mgr_shape))] + + blknos = mgr.blknos + blklocs = mgr.blklocs + + plan = [] + for blkno, placements in libinternals.get_blkno_placements(blknos, group=False): + + assert placements.is_slice_like + assert blkno != -1 + + shape_list = list(mgr_shape) + shape_list[0] = len(placements) + shape = tuple(shape_list) + + blk = mgr.blocks[blkno] + ax0_blk_indexer = blklocs[placements.indexer] + + unit_no_ax0_reindexing = ( + len(placements) == len(blk.mgr_locs) + and + # Fastpath detection of join unit not + # needing to reindex its block: no ax0 + # reindexing took place and block + # placement was sequential before. + ( + (blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice.step == 1) + or + # Slow-ish detection: all indexer locs + # are sequential (and length match is + # checked above). + (np.diff(ax0_blk_indexer) == 1).all() + ) + ) + + if not unit_no_ax0_reindexing: + # create block from subset of columns + blk = blk.getitem_block(ax0_blk_indexer) + + # Assertions disabled for performance + # assert blk._mgr_locs.as_slice == placements.as_slice + # assert blk.shape[0] == shape[0] + unit = JoinUnit(blk, shape) + + plan.append((placements, unit)) + + return plan + + +class JoinUnit: + def __init__(self, block: Block, shape: Shape): + # Passing shape explicitly is required for cases when block is None. + self.block = block + self.shape = shape + + def __repr__(self) -> str: + return f"{type(self).__name__}({repr(self.block)})" + + @cache_readonly + def is_na(self) -> bool: + blk = self.block + if blk.dtype.kind == "V": + return True + return False + + def get_reindexed_values(self, empty_dtype: DtypeObj) -> ArrayLike: + values: ArrayLike + + if self.is_na: + return make_na_array(empty_dtype, self.shape) + + else: + + if not self.block._can_consolidate: + # preserve these for validation in concat_compat + return self.block.values + + # No dtype upcasting is done here, it will be performed during + # concatenation itself. + values = self.block.values + + return values + + +def make_na_array(dtype: DtypeObj, shape: Shape) -> ArrayLike: + """ + Construct an np.ndarray or ExtensionArray of the given dtype and shape + holding all-NA values. + """ + if is_datetime64tz_dtype(dtype): + # NaT here is analogous to dtype.na_value below + i8values = np.full(shape, NaT.value) + return DatetimeArray(i8values, dtype=dtype) + + elif is_1d_only_ea_dtype(dtype): + dtype = cast(ExtensionDtype, dtype) + cls = dtype.construct_array_type() + + missing_arr = cls._from_sequence([], dtype=dtype) + nrows = shape[-1] + taker = -1 * np.ones((nrows,), dtype=np.intp) + return missing_arr.take(taker, allow_fill=True, fill_value=dtype.na_value) + elif isinstance(dtype, ExtensionDtype): + # TODO: no tests get here, a handful would if we disabled + # the dt64tz special-case above (which is faster) + cls = dtype.construct_array_type() + missing_arr = cls._empty(shape=shape, dtype=dtype) + missing_arr[:] = dtype.na_value + return missing_arr + else: + # NB: we should never get here with dtype integer or bool; + # if we did, the missing_arr.fill would cast to gibberish + missing_arr = np.empty(shape, dtype=dtype) + fill_value = _dtype_to_na_value(dtype) + missing_arr.fill(fill_value) + return missing_arr + + +def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike: + """ + Concatenate values from several join units along axis=1. + """ + + empty_dtype = _get_empty_dtype(join_units) + + to_concat = [ju.get_reindexed_values(empty_dtype=empty_dtype) for ju in join_units] + + if len(to_concat) == 1: + # Only one block, nothing to concatenate. + concat_values = to_concat[0] + if copy: + if isinstance(concat_values, np.ndarray): + # non-reindexed (=not yet copied) arrays are made into a view + # in JoinUnit.get_reindexed_values + if concat_values.base is not None: + concat_values = concat_values.copy() + else: + concat_values = concat_values.copy() + + elif any(is_1d_only_ea_obj(t) for t in to_concat): + # TODO(EA2D): special case not needed if all EAs used HybridBlocks + # NB: we are still assuming here that Hybrid blocks have shape (1, N) + # concatting with at least one EA means we are concatting a single column + # the non-EA values are 2D arrays with shape (1, n) + + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[int, slice]" + to_concat = [ + t if is_1d_only_ea_obj(t) else t[0, :] # type: ignore[call-overload] + for t in to_concat + ] + concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True) + concat_values = ensure_block_shape(concat_values, 2) + + else: + concat_values = concat_compat(to_concat, axis=1) + + return concat_values + + +def _dtype_to_na_value(dtype: DtypeObj): + """ + Find the NA value to go with this dtype. + """ + if isinstance(dtype, ExtensionDtype): + return dtype.na_value + elif dtype.kind in ["m", "M"]: + return dtype.type("NaT") + elif dtype.kind in ["f", "c"]: + return dtype.type("NaN") + elif dtype.kind == "b": + # different from missing.na_value_for_dtype + return None + elif dtype.kind in ["i", "u"]: + return np.nan + elif dtype.kind == "O": + return np.nan + raise NotImplementedError + + +def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj: + """ + Return dtype and N/A values to use when concatenating specified units. + + Returned N/A value may be None which means there was no casting involved. + + Returns + ------- + dtype + """ + if len(join_units) == 1: + blk = join_units[0].block + return blk.dtype + + if _is_uniform_reindex(join_units): + empty_dtype = join_units[0].block.dtype + return empty_dtype + + needs_can_hold_na = any(unit.is_na for unit in join_units) + + dtypes = [unit.block.dtype for unit in join_units if not unit.is_na] + + dtype = find_common_type(dtypes) + if needs_can_hold_na: + dtype = ensure_dtype_can_hold_na(dtype) + return dtype + + +def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool: + """ + Check if the join units consist of blocks of uniform type that can + be concatenated using Block.concat_same_type instead of the generic + _concatenate_join_units (which uses `concat_compat`). + + """ + first = join_units[0].block + if first.dtype.kind == "V": + return False + return ( + # exclude cases where a) ju.block is None or b) we have e.g. Int64+int64 + all(type(ju.block) is type(first) for ju in join_units) + and + # e.g. DatetimeLikeBlock can be dt64 or td64, but these are not uniform + all( + is_dtype_equal(ju.block.dtype, first.dtype) + # GH#42092 we only want the dtype_equal check for non-numeric blocks + # (for now, may change but that would need a deprecation) + or ju.block.dtype.kind in ["b", "i", "u"] + for ju in join_units + ) + and + # no blocks that would get missing values (can lead to type upcasts) + # unless we're an extension dtype. + all(not ju.is_na or ju.block.is_extension for ju in join_units) + and + # only use this path when there is something to concatenate + len(join_units) > 1 + ) + + +def _is_uniform_reindex(join_units) -> bool: + return ( + # TODO: should this be ju.block._can_hold_na? + all(ju.block.is_extension for ju in join_units) + and len({ju.block.dtype.name for ju in join_units}) == 1 + ) + + +def _trim_join_unit(join_unit: JoinUnit, length: int) -> JoinUnit: + """ + Reduce join_unit's shape along item axis to length. + + Extra items that didn't fit are returned as a separate block. + """ + + extra_block = join_unit.block.getitem_block(slice(length, None)) + join_unit.block = join_unit.block.getitem_block(slice(length)) + + extra_shape = (join_unit.shape[0] - length,) + join_unit.shape[1:] + join_unit.shape = (length,) + join_unit.shape[1:] + + return JoinUnit(block=extra_block, shape=extra_shape) + + +def _combine_concat_plans(plans): + """ + Combine multiple concatenation plans into one. + + existing_plan is updated in-place. + """ + if len(plans) == 1: + for p in plans[0]: + yield p[0], [p[1]] + + else: + # singleton list so we can modify it as a side-effect within _next_or_none + num_ended = [0] + + def _next_or_none(seq): + retval = next(seq, None) + if retval is None: + num_ended[0] += 1 + return retval + + plans = list(map(iter, plans)) + next_items = list(map(_next_or_none, plans)) + + while num_ended[0] != len(next_items): + if num_ended[0] > 0: + raise ValueError("Plan shapes are not aligned") + + placements, units = zip(*next_items) + + lengths = list(map(len, placements)) + min_len, max_len = min(lengths), max(lengths) + + if min_len == max_len: + yield placements[0], units + next_items[:] = map(_next_or_none, plans) + else: + yielded_placement = None + yielded_units = [None] * len(next_items) + for i, (plc, unit) in enumerate(next_items): + yielded_units[i] = unit + if len(plc) > min_len: + # _trim_join_unit updates unit in place, so only + # placement needs to be sliced to skip min_len. + next_items[i] = (plc[min_len:], _trim_join_unit(unit, min_len)) + else: + yielded_placement = plc + next_items[i] = _next_or_none(plans[i]) + + yield yielded_placement, yielded_units diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/construction.py b/.local/lib/python3.8/site-packages/pandas/core/internals/construction.py new file mode 100644 index 0000000..d14db58 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/internals/construction.py @@ -0,0 +1,1075 @@ +""" +Functions for preparing various inputs passed to the DataFrame or Series +constructors before passing them to a BlockManager. +""" +from __future__ import annotations + +from collections import abc +from typing import ( + TYPE_CHECKING, + Any, + Hashable, + Sequence, + cast, +) +import warnings + +import numpy as np +import numpy.ma as ma + +from pandas._libs import lib +from pandas._typing import ( + ArrayLike, + DtypeObj, + Manager, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import ( + construct_1d_arraylike_from_scalar, + dict_compat, + maybe_cast_to_datetime, + maybe_convert_platform, + maybe_infer_to_datetimelike, + maybe_upcast, +) +from pandas.core.dtypes.common import ( + is_1d_only_ea_dtype, + is_datetime64tz_dtype, + is_datetime_or_timedelta_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_integer_dtype, + is_list_like, + is_named_tuple, + is_object_dtype, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +from pandas.core import ( + algorithms, + common as com, +) +from pandas.core.arrays import ( + Categorical, + DatetimeArray, + ExtensionArray, + TimedeltaArray, +) +from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, + extract_array, + range_to_ndarray, + sanitize_array, +) +from pandas.core.indexes.api import ( + DatetimeIndex, + Index, + TimedeltaIndex, + default_index, + ensure_index, + get_objs_combined_axis, + union_indexes, +) +from pandas.core.internals.array_manager import ( + ArrayManager, + SingleArrayManager, +) +from pandas.core.internals.blocks import ( + BlockPlacement, + ensure_block_shape, + new_block_2d, +) +from pandas.core.internals.managers import ( + BlockManager, + SingleBlockManager, + create_block_manager_from_blocks, + create_block_manager_from_column_arrays, +) + +if TYPE_CHECKING: + from numpy.ma.mrecords import MaskedRecords + + +# --------------------------------------------------------------------- +# BlockManager Interface + + +def arrays_to_mgr( + arrays, + columns: Index, + index, + *, + dtype: DtypeObj | None = None, + verify_integrity: bool = True, + typ: str | None = None, + consolidate: bool = True, +) -> Manager: + """ + Segregate Series based on type and coerce into matrices. + + Needs to handle a lot of exceptional cases. + """ + if verify_integrity: + # figure out the index, if necessary + if index is None: + index = _extract_index(arrays) + else: + index = ensure_index(index) + + # don't force copy because getting jammed in an ndarray anyway + arrays = _homogenize(arrays, index, dtype) + # _homogenize ensures + # - all(len(x) == len(index) for x in arrays) + # - all(x.ndim == 1 for x in arrays) + # - all(isinstance(x, (np.ndarray, ExtensionArray)) for x in arrays) + # - all(type(x) is not PandasArray for x in arrays) + + else: + index = ensure_index(index) + arrays = [extract_array(x, extract_numpy=True) for x in arrays] + + # Reached via DataFrame._from_arrays; we do validation here + for arr in arrays: + if ( + not isinstance(arr, (np.ndarray, ExtensionArray)) + or arr.ndim != 1 + or len(arr) != len(index) + ): + raise ValueError( + "Arrays must be 1-dimensional np.ndarray or ExtensionArray " + "with length matching len(index)" + ) + + columns = ensure_index(columns) + if len(columns) != len(arrays): + raise ValueError("len(arrays) must match len(columns)") + + # from BlockManager perspective + axes = [columns, index] + + if typ == "block": + return create_block_manager_from_column_arrays( + arrays, axes, consolidate=consolidate + ) + elif typ == "array": + return ArrayManager(arrays, [index, columns]) + else: + raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{typ}'") + + +def rec_array_to_mgr( + data: MaskedRecords | np.recarray | np.ndarray, + index, + columns, + dtype: DtypeObj | None, + copy: bool, + typ: str, +): + """ + Extract from a masked rec array and create the manager. + """ + # essentially process a record array then fill it + fdata = ma.getdata(data) + if index is None: + index = default_index(len(fdata)) + else: + index = ensure_index(index) + + if columns is not None: + columns = ensure_index(columns) + arrays, arr_columns = to_arrays(fdata, columns) + + # fill if needed + if isinstance(data, np.ma.MaskedArray): + # GH#42200 we only get here with MaskedRecords, but check for the + # parent class MaskedArray to avoid the need to import MaskedRecords + data = cast("MaskedRecords", data) + new_arrays = fill_masked_arrays(data, arr_columns) + else: + # error: Incompatible types in assignment (expression has type + # "List[ExtensionArray]", variable has type "List[ndarray]") + new_arrays = arrays # type: ignore[assignment] + + # create the manager + + # error: Argument 1 to "reorder_arrays" has incompatible type "List[ndarray]"; + # expected "List[Union[ExtensionArray, ndarray]]" + arrays, arr_columns = reorder_arrays( + new_arrays, arr_columns, columns, len(index) # type: ignore[arg-type] + ) + if columns is None: + columns = arr_columns + + mgr = arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ) + + if copy: + mgr = mgr.copy() + return mgr + + +def fill_masked_arrays(data: MaskedRecords, arr_columns: Index) -> list[np.ndarray]: + """ + Convert numpy MaskedRecords to ensure mask is softened. + """ + new_arrays = [] + + for col in arr_columns: + arr = data[col] + fv = arr.fill_value + + mask = ma.getmaskarray(arr) + if mask.any(): + arr, fv = maybe_upcast(arr, fill_value=fv, copy=True) + arr[mask] = fv + new_arrays.append(arr) + return new_arrays + + +def mgr_to_mgr(mgr, typ: str, copy: bool = True): + """ + Convert to specific type of Manager. Does not copy if the type is already + correct. Does not guarantee a copy otherwise. `copy` keyword only controls + whether conversion from Block->ArrayManager copies the 1D arrays. + """ + new_mgr: Manager + + if typ == "block": + if isinstance(mgr, BlockManager): + new_mgr = mgr + else: + if mgr.ndim == 2: + new_mgr = arrays_to_mgr( + mgr.arrays, mgr.axes[0], mgr.axes[1], typ="block" + ) + else: + new_mgr = SingleBlockManager.from_array(mgr.arrays[0], mgr.index) + elif typ == "array": + if isinstance(mgr, ArrayManager): + new_mgr = mgr + else: + if mgr.ndim == 2: + arrays = [mgr.iget_values(i) for i in range(len(mgr.axes[0]))] + if copy: + arrays = [arr.copy() for arr in arrays] + new_mgr = ArrayManager(arrays, [mgr.axes[1], mgr.axes[0]]) + else: + array = mgr.internal_values() + if copy: + array = array.copy() + new_mgr = SingleArrayManager([array], [mgr.index]) + else: + raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{typ}'") + return new_mgr + + +# --------------------------------------------------------------------- +# DataFrame Constructor Interface + + +def ndarray_to_mgr( + values, index, columns, dtype: DtypeObj | None, copy: bool, typ: str +) -> Manager: + # used in DataFrame.__init__ + # input must be a ndarray, list, Series, Index, ExtensionArray + + if isinstance(values, ABCSeries): + if columns is None: + if values.name is not None: + columns = Index([values.name]) + if index is None: + index = values.index + else: + values = values.reindex(index) + + # zero len case (GH #2234) + if not len(values) and columns is not None and len(columns): + values = np.empty((0, 1), dtype=object) + + # if the array preparation does a copy -> avoid this for ArrayManager, + # since the copy is done on conversion to 1D arrays + copy_on_sanitize = False if typ == "array" else copy + + vdtype = getattr(values, "dtype", None) + if is_1d_only_ea_dtype(vdtype) or is_1d_only_ea_dtype(dtype): + # GH#19157 + + if isinstance(values, (np.ndarray, ExtensionArray)) and values.ndim > 1: + # GH#12513 a EA dtype passed with a 2D array, split into + # multiple EAs that view the values + # error: No overload variant of "__getitem__" of "ExtensionArray" + # matches argument type "Tuple[slice, int]" + values = [ + values[:, n] # type: ignore[call-overload] + for n in range(values.shape[1]) + ] + else: + values = [values] + + if columns is None: + columns = Index(range(len(values))) + else: + columns = ensure_index(columns) + + return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ) + + elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype): + # i.e. Datetime64TZ, PeriodDtype + values = extract_array(values, extract_numpy=True) + if copy: + values = values.copy() + if values.ndim == 1: + values = values.reshape(-1, 1) + + else: + # by definition an array here + # the dtypes will be coerced to a single dtype + values = _prep_ndarray(values, copy=copy_on_sanitize) + + if dtype is not None and not is_dtype_equal(values.dtype, dtype): + shape = values.shape + flat = values.ravel() + + # GH#40110 see similar check inside sanitize_array + rcf = not (is_integer_dtype(dtype) and values.dtype.kind == "f") + + values = sanitize_array( + flat, None, dtype=dtype, copy=copy_on_sanitize, raise_cast_failure=rcf + ) + + values = values.reshape(shape) + + # _prep_ndarray ensures that values.ndim == 2 at this point + index, columns = _get_axes( + values.shape[0], values.shape[1], index=index, columns=columns + ) + + _check_values_indices_shape_match(values, index, columns) + + if typ == "array": + + if issubclass(values.dtype.type, str): + values = np.array(values, dtype=object) + + if dtype is None and is_object_dtype(values.dtype): + arrays = [ + ensure_wrapped_if_datetimelike( + maybe_infer_to_datetimelike(values[:, i]) + ) + for i in range(values.shape[1]) + ] + else: + if is_datetime_or_timedelta_dtype(values.dtype): + values = ensure_wrapped_if_datetimelike(values) + arrays = [values[:, i] for i in range(values.shape[1])] + + if copy: + arrays = [arr.copy() for arr in arrays] + + return ArrayManager(arrays, [index, columns], verify_integrity=False) + + values = values.T + + # if we don't have a dtype specified, then try to convert objects + # on the entire block; this is to convert if we have datetimelike's + # embedded in an object type + if dtype is None and is_object_dtype(values.dtype): + obj_columns = list(values) + maybe_datetime = [maybe_infer_to_datetimelike(x) for x in obj_columns] + # don't convert (and copy) the objects if no type inference occurs + if any(x is not y for x, y in zip(obj_columns, maybe_datetime)): + dvals_list = [ensure_block_shape(dval, 2) for dval in maybe_datetime] + block_values = [ + new_block_2d(dvals_list[n], placement=BlockPlacement(n)) + for n in range(len(dvals_list)) + ] + else: + bp = BlockPlacement(slice(len(columns))) + nb = new_block_2d(values, placement=bp) + block_values = [nb] + else: + bp = BlockPlacement(slice(len(columns))) + nb = new_block_2d(values, placement=bp) + block_values = [nb] + + if len(columns) == 0: + block_values = [] + + return create_block_manager_from_blocks( + block_values, [columns, index], verify_integrity=False + ) + + +def _check_values_indices_shape_match( + values: np.ndarray, index: Index, columns: Index +) -> None: + """ + Check that the shape implied by our axes matches the actual shape of the + data. + """ + if values.shape[1] != len(columns) or values.shape[0] != len(index): + # Could let this raise in Block constructor, but we get a more + # helpful exception message this way. + if values.shape[0] == 0: + raise ValueError("Empty data passed with indices specified.") + + passed = values.shape + implied = (len(index), len(columns)) + raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}") + + +def dict_to_mgr( + data: dict, + index, + columns, + *, + dtype: DtypeObj | None = None, + typ: str = "block", + copy: bool = True, +) -> Manager: + """ + Segregate Series based on type and coerce into matrices. + Needs to handle a lot of exceptional cases. + + Used in DataFrame.__init__ + """ + arrays: Sequence[Any] | Series + + if columns is not None: + from pandas.core.series import Series + + arrays = Series(data, index=columns, dtype=object) + missing = arrays.isna() + if index is None: + # GH10856 + # raise ValueError if only scalars in dict + index = _extract_index(arrays[~missing]) + else: + index = ensure_index(index) + + # no obvious "empty" int column + if missing.any() and not is_integer_dtype(dtype): + nan_dtype: DtypeObj + + if dtype is not None: + # calling sanitize_array ensures we don't mix-and-match + # NA dtypes + midxs = missing.values.nonzero()[0] + for i in midxs: + arr = sanitize_array(arrays.iat[i], index, dtype=dtype) + arrays.iat[i] = arr + else: + # GH#1783 + nan_dtype = np.dtype("object") + val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) + nmissing = missing.sum() + if copy: + rhs = [val] * nmissing + else: + # GH#45369 + rhs = [val.copy() for _ in range(nmissing)] + arrays.loc[missing] = rhs + + arrays = list(arrays) + columns = ensure_index(columns) + + else: + keys = list(data.keys()) + columns = Index(keys) + arrays = [com.maybe_iterable_to_list(data[k]) for k in keys] + # GH#24096 need copy to be deep for datetime64tz case + # TODO: See if we can avoid these copies + arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays] + arrays = [ + arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays + ] + + if copy: + # arrays_to_mgr (via form_blocks) won't make copies for EAs + # dtype attr check to exclude EADtype-castable strs + arrays = [ + x + if not hasattr(x, "dtype") or not isinstance(x.dtype, ExtensionDtype) + else x.copy() + for x in arrays + ] + # TODO: can we get rid of the dt64tz special case above? + + return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy) + + +def nested_data_to_arrays( + data: Sequence, + columns: Index | None, + index: Index | None, + dtype: DtypeObj | None, +) -> tuple[list[ArrayLike], Index, Index]: + """ + Convert a single sequence of arrays to multiple arrays. + """ + # By the time we get here we have already checked treat_as_nested(data) + + if is_named_tuple(data[0]) and columns is None: + columns = ensure_index(data[0]._fields) + + arrays, columns = to_arrays(data, columns, dtype=dtype) + columns = ensure_index(columns) + + if index is None: + if isinstance(data[0], ABCSeries): + index = _get_names_from_index(data) + elif isinstance(data[0], Categorical): + # GH#38845 hit in test_constructor_categorical + index = default_index(len(data[0])) + else: + index = default_index(len(data)) + + return arrays, columns, index + + +def treat_as_nested(data) -> bool: + """ + Check if we should use nested_data_to_arrays. + """ + return ( + len(data) > 0 + and is_list_like(data[0]) + and getattr(data[0], "ndim", 1) == 1 + and not (isinstance(data, ExtensionArray) and data.ndim == 2) + ) + + +# --------------------------------------------------------------------- + + +def _prep_ndarray(values, copy: bool = True) -> np.ndarray: + if isinstance(values, TimedeltaArray) or ( + isinstance(values, DatetimeArray) and values.tz is None + ): + # On older numpy, np.asarray below apparently does not call __array__, + # so nanoseconds get dropped. + values = values._ndarray + + if not isinstance(values, (np.ndarray, ABCSeries, Index)): + if len(values) == 0: + return np.empty((0, 0), dtype=object) + elif isinstance(values, range): + arr = range_to_ndarray(values) + return arr[..., np.newaxis] + + def convert(v): + if not is_list_like(v) or isinstance(v, ABCDataFrame): + return v + + v = extract_array(v, extract_numpy=True) + res = maybe_convert_platform(v) + return res + + # we could have a 1-dim or 2-dim list here + # this is equiv of np.asarray, but does object conversion + # and platform dtype preservation + if is_list_like(values[0]): + values = np.array([convert(v) for v in values]) + elif isinstance(values[0], np.ndarray) and values[0].ndim == 0: + # GH#21861 see test_constructor_list_of_lists + values = np.array([convert(v) for v in values]) + else: + values = convert(values) + + else: + + # drop subclass info + values = np.array(values, copy=copy) + + if values.ndim == 1: + values = values.reshape((values.shape[0], 1)) + elif values.ndim != 2: + raise ValueError(f"Must pass 2-d input. shape={values.shape}") + + return values + + +def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]: + oindex = None + homogenized = [] + + for val in data: + if isinstance(val, ABCSeries): + if dtype is not None: + val = val.astype(dtype, copy=False) + if val.index is not index: + # Forces alignment. No need to copy data since we + # are putting it into an ndarray later + val = val.reindex(index, copy=False) + + val = val._values + else: + if isinstance(val, dict): + # GH#41785 this _should_ be equivalent to (but faster than) + # val = create_series_with_explicit_dtype(val, index=index)._values + if oindex is None: + oindex = index.astype("O") + + if isinstance(index, (DatetimeIndex, TimedeltaIndex)): + # see test_constructor_dict_datetime64_index + val = dict_compat(val) + else: + # see test_constructor_subclass_dict + val = dict(val) + val = lib.fast_multiget(val, oindex._values, default=np.nan) + + val = sanitize_array( + val, index, dtype=dtype, copy=False, raise_cast_failure=False + ) + com.require_length_match(val, index) + + homogenized.append(val) + + return homogenized + + +def _extract_index(data) -> Index: + """ + Try to infer an Index from the passed data, raise ValueError on failure. + """ + index = None + if len(data) == 0: + index = Index([]) + else: + raw_lengths = [] + indexes: list[list[Hashable] | Index] = [] + + have_raw_arrays = False + have_series = False + have_dicts = False + + for val in data: + if isinstance(val, ABCSeries): + have_series = True + indexes.append(val.index) + elif isinstance(val, dict): + have_dicts = True + indexes.append(list(val.keys())) + elif is_list_like(val) and getattr(val, "ndim", 1) == 1: + have_raw_arrays = True + raw_lengths.append(len(val)) + elif isinstance(val, np.ndarray) and val.ndim > 1: + raise ValueError("Per-column arrays must each be 1-dimensional") + + if not indexes and not raw_lengths: + raise ValueError("If using all scalar values, you must pass an index") + + elif have_series: + index = union_indexes(indexes) + elif have_dicts: + index = union_indexes(indexes, sort=False) + + if have_raw_arrays: + lengths = list(set(raw_lengths)) + if len(lengths) > 1: + raise ValueError("All arrays must be of the same length") + + if have_dicts: + raise ValueError( + "Mixing dicts with non-Series may lead to ambiguous ordering." + ) + + if have_series: + assert index is not None # for mypy + if lengths[0] != len(index): + msg = ( + f"array length {lengths[0]} does not match index " + f"length {len(index)}" + ) + raise ValueError(msg) + else: + index = default_index(lengths[0]) + + # error: Argument 1 to "ensure_index" has incompatible type "Optional[Index]"; + # expected "Union[Union[Union[ExtensionArray, ndarray], Index, Series], + # Sequence[Any]]" + return ensure_index(index) # type: ignore[arg-type] + + +def reorder_arrays( + arrays: list[ArrayLike], arr_columns: Index, columns: Index | None, length: int +) -> tuple[list[ArrayLike], Index]: + """ + Pre-emptively (cheaply) reindex arrays with new columns. + """ + # reorder according to the columns + if columns is not None: + if not columns.equals(arr_columns): + # if they are equal, there is nothing to do + new_arrays: list[ArrayLike | None] + new_arrays = [None] * len(columns) + indexer = arr_columns.get_indexer(columns) + for i, k in enumerate(indexer): + if k == -1: + # by convention default is all-NaN object dtype + arr = np.empty(length, dtype=object) + arr.fill(np.nan) + else: + arr = arrays[k] + new_arrays[i] = arr + + # Incompatible types in assignment (expression has type + # "List[Union[ExtensionArray, ndarray[Any, Any], None]]", variable + # has type "List[Union[ExtensionArray, ndarray[Any, Any]]]") + arrays = new_arrays # type: ignore[assignment] + arr_columns = columns + + return arrays, arr_columns + + +def _get_names_from_index(data) -> Index: + has_some_name = any(getattr(s, "name", None) is not None for s in data) + if not has_some_name: + return default_index(len(data)) + + index: list[Hashable] = list(range(len(data))) + count = 0 + for i, s in enumerate(data): + n = getattr(s, "name", None) + if n is not None: + index[i] = n + else: + index[i] = f"Unnamed {count}" + count += 1 + + return Index(index) + + +def _get_axes( + N: int, K: int, index: Index | None, columns: Index | None +) -> tuple[Index, Index]: + # helper to create the axes as indexes + # return axes or defaults + + if index is None: + index = default_index(N) + else: + index = ensure_index(index) + + if columns is None: + columns = default_index(K) + else: + columns = ensure_index(columns) + return index, columns + + +def dataclasses_to_dicts(data): + """ + Converts a list of dataclass instances to a list of dictionaries. + + Parameters + ---------- + data : List[Type[dataclass]] + + Returns + -------- + list_dict : List[dict] + + Examples + -------- + >>> from dataclasses import dataclass + >>> @dataclass + ... class Point: + ... x: int + ... y: int + + >>> dataclasses_to_dicts([Point(1, 2), Point(2, 3)]) + [{'x': 1, 'y': 2}, {'x': 2, 'y': 3}] + + """ + from dataclasses import asdict + + return list(map(asdict, data)) + + +# --------------------------------------------------------------------- +# Conversion of Inputs to Arrays + + +def to_arrays( + data, columns: Index | None, dtype: DtypeObj | None = None +) -> tuple[list[ArrayLike], Index]: + """ + Return list of arrays, columns. + + Returns + ------- + list[ArrayLike] + These will become columns in a DataFrame. + Index + This will become frame.columns. + + Notes + ----- + Ensures that len(result_arrays) == len(result_index). + """ + if isinstance(data, ABCDataFrame): + # see test_from_records_with_index_data, test_from_records_bad_index_column + if columns is not None: + arrays = [ + data._ixs(i, axis=1).values + for i, col in enumerate(data.columns) + if col in columns + ] + else: + columns = data.columns + arrays = [data._ixs(i, axis=1).values for i in range(len(columns))] + + return arrays, columns + + if not len(data): + if isinstance(data, np.ndarray): + if data.dtype.names is not None: + # i.e. numpy structured array + columns = ensure_index(data.dtype.names) + arrays = [data[name] for name in columns] + + if len(data) == 0: + # GH#42456 the indexing above results in list of 2D ndarrays + # TODO: is that an issue with numpy? + for i, arr in enumerate(arrays): + if arr.ndim == 2: + arrays[i] = arr[:, 0] + + return arrays, columns + return [], ensure_index([]) + + elif isinstance(data[0], Categorical): + # GH#38845 deprecate special case + warnings.warn( + "The behavior of DataFrame([categorical, ...]) is deprecated and " + "in a future version will be changed to match the behavior of " + "DataFrame([any_listlike, ...]). " + "To retain the old behavior, pass as a dictionary " + "DataFrame({col: categorical, ..})", + FutureWarning, + stacklevel=find_stack_level(), + ) + if columns is None: + columns = default_index(len(data)) + elif len(columns) > len(data): + raise ValueError("len(columns) > len(data)") + elif len(columns) < len(data): + # doing this here is akin to a pre-emptive reindex + data = data[: len(columns)] + return data, columns + + elif isinstance(data, np.ndarray) and data.dtype.names is not None: + # e.g. recarray + columns = Index(list(data.dtype.names)) + arrays = [data[k] for k in columns] + return arrays, columns + + if isinstance(data[0], (list, tuple)): + arr = _list_to_arrays(data) + elif isinstance(data[0], abc.Mapping): + arr, columns = _list_of_dict_to_arrays(data, columns) + elif isinstance(data[0], ABCSeries): + arr, columns = _list_of_series_to_arrays(data, columns) + else: + # last ditch effort + data = [tuple(x) for x in data] + arr = _list_to_arrays(data) + + content, columns = _finalize_columns_and_data(arr, columns, dtype) + return content, columns + + +def _list_to_arrays(data: list[tuple | list]) -> np.ndarray: + # Returned np.ndarray has ndim = 2 + # Note: we already check len(data) > 0 before getting hre + if isinstance(data[0], tuple): + content = lib.to_object_array_tuples(data) + else: + # list of lists + content = lib.to_object_array(data) + return content + + +def _list_of_series_to_arrays( + data: list, + columns: Index | None, +) -> tuple[np.ndarray, Index]: + # returned np.ndarray has ndim == 2 + + if columns is None: + # We know pass_data is non-empty because data[0] is a Series + pass_data = [x for x in data if isinstance(x, (ABCSeries, ABCDataFrame))] + columns = get_objs_combined_axis(pass_data, sort=False) + + indexer_cache: dict[int, np.ndarray] = {} + + aligned_values = [] + for s in data: + index = getattr(s, "index", None) + if index is None: + index = default_index(len(s)) + + if id(index) in indexer_cache: + indexer = indexer_cache[id(index)] + else: + indexer = indexer_cache[id(index)] = index.get_indexer(columns) + + values = extract_array(s, extract_numpy=True) + aligned_values.append(algorithms.take_nd(values, indexer)) + + # error: Argument 1 to "vstack" has incompatible type "List[ExtensionArray]"; + # expected "Sequence[Union[Union[int, float, complex, str, bytes, generic], + # Sequence[Union[int, float, complex, str, bytes, generic]], + # Sequence[Sequence[Any]], _SupportsArray]]" + content = np.vstack(aligned_values) # type: ignore[arg-type] + + return content, columns + + +def _list_of_dict_to_arrays( + data: list[dict], + columns: Index | None, +) -> tuple[np.ndarray, Index]: + """ + Convert list of dicts to numpy arrays + + if `columns` is not passed, column names are inferred from the records + - for OrderedDict and dicts, the column names match + the key insertion-order from the first record to the last. + - For other kinds of dict-likes, the keys are lexically sorted. + + Parameters + ---------- + data : iterable + collection of records (OrderedDict, dict) + columns: iterables or None + + Returns + ------- + content : np.ndarray[object, ndim=2] + columns : Index + """ + if columns is None: + gen = (list(x.keys()) for x in data) + sort = not any(isinstance(d, dict) for d in data) + pre_cols = lib.fast_unique_multiple_list_gen(gen, sort=sort) + columns = ensure_index(pre_cols) + + # assure that they are of the base dict class and not of derived + # classes + data = [d if type(d) is dict else dict(d) for d in data] + + content = lib.dicts_to_array(data, list(columns)) + return content, columns + + +def _finalize_columns_and_data( + content: np.ndarray, # ndim == 2 + columns: Index | None, + dtype: DtypeObj | None, +) -> tuple[list[ArrayLike], Index]: + """ + Ensure we have valid columns, cast object dtypes if possible. + """ + contents = list(content.T) + + try: + columns = _validate_or_indexify_columns(contents, columns) + except AssertionError as err: + # GH#26429 do not raise user-facing AssertionError + raise ValueError(err) from err + + if len(contents) and contents[0].dtype == np.object_: + contents = _convert_object_array(contents, dtype=dtype) + + return contents, columns + + +def _validate_or_indexify_columns( + content: list[np.ndarray], columns: Index | None +) -> Index: + """ + If columns is None, make numbers as column names; Otherwise, validate that + columns have valid length. + + Parameters + ---------- + content : list of np.ndarrays + columns : Index or None + + Returns + ------- + Index + If columns is None, assign positional column index value as columns. + + Raises + ------ + 1. AssertionError when content is not composed of list of lists, and if + length of columns is not equal to length of content. + 2. ValueError when content is list of lists, but length of each sub-list + is not equal + 3. ValueError when content is list of lists, but length of sub-list is + not equal to length of content + """ + if columns is None: + columns = default_index(len(content)) + else: + + # Add mask for data which is composed of list of lists + is_mi_list = isinstance(columns, list) and all( + isinstance(col, list) for col in columns + ) + + if not is_mi_list and len(columns) != len(content): # pragma: no cover + # caller's responsibility to check for this... + raise AssertionError( + f"{len(columns)} columns passed, passed data had " + f"{len(content)} columns" + ) + elif is_mi_list: + + # check if nested list column, length of each sub-list should be equal + if len({len(col) for col in columns}) > 1: + raise ValueError( + "Length of columns passed for MultiIndex columns is different" + ) + + # if columns is not empty and length of sublist is not equal to content + elif columns and len(columns[0]) != len(content): + raise ValueError( + f"{len(columns[0])} columns passed, passed data had " + f"{len(content)} columns" + ) + return columns + + +def _convert_object_array( + content: list[np.ndarray], dtype: DtypeObj | None +) -> list[ArrayLike]: + """ + Internal function to convert object array. + + Parameters + ---------- + content: List[np.ndarray] + dtype: np.dtype or ExtensionDtype + + Returns + ------- + List[ArrayLike] + """ + # provide soft conversion of object dtypes + def convert(arr): + if dtype != np.dtype("O"): + arr = lib.maybe_convert_objects(arr) + arr = maybe_cast_to_datetime(arr, dtype) + return arr + + arrays = [convert(arr) for arr in content] + + return arrays diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/managers.py b/.local/lib/python3.8/site-packages/pandas/core/internals/managers.py new file mode 100644 index 0000000..42688fa --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/internals/managers.py @@ -0,0 +1,2157 @@ +from __future__ import annotations + +import itertools +from typing import ( + Any, + Callable, + Hashable, + Sequence, + TypeVar, + cast, +) +import warnings + +import numpy as np + +from pandas._libs import ( + internals as libinternals, + lib, +) +from pandas._libs.internals import BlockPlacement +from pandas._typing import ( + ArrayLike, + DtypeObj, + Shape, + npt, + type_t, +) +from pandas.errors import PerformanceWarning +from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.cast import infer_dtype_from_scalar +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_1d_only_ea_dtype, + is_dtype_equal, + is_list_like, + needs_i8_conversion, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + array_equals, + isna, +) + +import pandas.core.algorithms as algos +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +from pandas.core.arrays.sparse import SparseDtype +from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, + extract_array, +) +from pandas.core.indexers import maybe_convert_indices +from pandas.core.indexes.api import ( + Float64Index, + Index, + ensure_index, +) +from pandas.core.internals.base import ( + DataManager, + SingleDataManager, + interleaved_dtype, +) +from pandas.core.internals.blocks import ( + Block, + DatetimeTZBlock, + NumpyBlock, + ensure_block_shape, + extend_blocks, + get_block_type, + new_block, + new_block_2d, +) +from pandas.core.internals.ops import ( + blockwise_all, + operate_blockwise, +) + +T = TypeVar("T", bound="BaseBlockManager") + + +class BaseBlockManager(DataManager): + """ + Core internal data structure to implement DataFrame, Series, etc. + + Manage a bunch of labeled 2D mixed-type ndarrays. Essentially it's a + lightweight blocked set of labeled data to be manipulated by the DataFrame + public API class + + Attributes + ---------- + shape + ndim + axes + values + items + + Methods + ------- + set_axis(axis, new_labels) + copy(deep=True) + + get_dtypes + + apply(func, axes, block_filter_fn) + + get_bool_data + get_numeric_data + + get_slice(slice_like, axis) + get(label) + iget(loc) + + take(indexer, axis) + reindex_axis(new_labels, axis) + reindex_indexer(new_labels, indexer, axis) + + delete(label) + insert(loc, label, value) + set(label, value) + + Parameters + ---------- + blocks: Sequence of Block + axes: Sequence of Index + verify_integrity: bool, default True + + Notes + ----- + This is *not* a public API class + """ + + __slots__ = () + + _blknos: npt.NDArray[np.intp] + _blklocs: npt.NDArray[np.intp] + blocks: tuple[Block, ...] + axes: list[Index] + + ndim: int + _known_consolidated: bool + _is_consolidated: bool + + def __init__(self, blocks, axes, verify_integrity: bool = True): + raise NotImplementedError + + @classmethod + def from_blocks(cls: type_t[T], blocks: list[Block], axes: list[Index]) -> T: + raise NotImplementedError + + @property + def blknos(self) -> npt.NDArray[np.intp]: + """ + Suppose we want to find the array corresponding to our i'th column. + + blknos[i] identifies the block from self.blocks that contains this column. + + blklocs[i] identifies the column of interest within + self.blocks[self.blknos[i]] + """ + if self._blknos is None: + # Note: these can be altered by other BlockManager methods. + self._rebuild_blknos_and_blklocs() + + return self._blknos + + @property + def blklocs(self) -> npt.NDArray[np.intp]: + """ + See blknos.__doc__ + """ + if self._blklocs is None: + # Note: these can be altered by other BlockManager methods. + self._rebuild_blknos_and_blklocs() + + return self._blklocs + + def make_empty(self: T, axes=None) -> T: + """return an empty BlockManager with the items axis of len 0""" + if axes is None: + axes = [Index([])] + self.axes[1:] + + # preserve dtype if possible + if self.ndim == 1: + assert isinstance(self, SingleBlockManager) # for mypy + blk = self.blocks[0] + arr = blk.values[:0] + bp = BlockPlacement(slice(0, 0)) + nb = blk.make_block_same_class(arr, placement=bp) + blocks = [nb] + else: + blocks = [] + return type(self).from_blocks(blocks, axes) + + def __nonzero__(self) -> bool: + return True + + # Python3 compat + __bool__ = __nonzero__ + + def _normalize_axis(self, axis: int) -> int: + # switch axis to follow BlockManager logic + if self.ndim == 2: + axis = 1 if axis == 0 else 0 + return axis + + def set_axis(self, axis: int, new_labels: Index) -> None: + # Caller is responsible for ensuring we have an Index object. + self._validate_set_axis(axis, new_labels) + self.axes[axis] = new_labels + + @property + def is_single_block(self) -> bool: + # Assumes we are 2D; overridden by SingleBlockManager + return len(self.blocks) == 1 + + @property + def items(self) -> Index: + return self.axes[0] + + def get_dtypes(self): + dtypes = np.array([blk.dtype for blk in self.blocks]) + return dtypes.take(self.blknos) + + @property + def arrays(self) -> list[ArrayLike]: + """ + Quick access to the backing arrays of the Blocks. + + Only for compatibility with ArrayManager for testing convenience. + Not to be used in actual code, and return value is not the same as the + ArrayManager method (list of 1D arrays vs iterator of 2D ndarrays / 1D EAs). + """ + return [blk.values for blk in self.blocks] + + def __repr__(self) -> str: + output = type(self).__name__ + for i, ax in enumerate(self.axes): + if i == 0: + output += f"\nItems: {ax}" + else: + output += f"\nAxis {i}: {ax}" + + for block in self.blocks: + output += f"\n{block}" + return output + + def apply( + self: T, + f, + align_keys: list[str] | None = None, + ignore_failures: bool = False, + **kwargs, + ) -> T: + """ + Iterate over the blocks, collect and create a new BlockManager. + + Parameters + ---------- + f : str or callable + Name of the Block method to apply. + align_keys: List[str] or None, default None + ignore_failures: bool, default False + **kwargs + Keywords to pass to `f` + + Returns + ------- + BlockManager + """ + assert "filter" not in kwargs + + align_keys = align_keys or [] + result_blocks: list[Block] = [] + # fillna: Series/DataFrame is responsible for making sure value is aligned + + aligned_args = {k: kwargs[k] for k in align_keys} + + for b in self.blocks: + + if aligned_args: + + for k, obj in aligned_args.items(): + if isinstance(obj, (ABCSeries, ABCDataFrame)): + # The caller is responsible for ensuring that + # obj.axes[-1].equals(self.items) + if obj.ndim == 1: + kwargs[k] = obj.iloc[b.mgr_locs.indexer]._values + else: + kwargs[k] = obj.iloc[:, b.mgr_locs.indexer]._values + else: + # otherwise we have an ndarray + kwargs[k] = obj[b.mgr_locs.indexer] + + try: + if callable(f): + applied = b.apply(f, **kwargs) + else: + applied = getattr(b, f)(**kwargs) + except (TypeError, NotImplementedError): + if not ignore_failures: + raise + continue + result_blocks = extend_blocks(applied, result_blocks) + + if ignore_failures: + return self._combine(result_blocks) + + out = type(self).from_blocks(result_blocks, self.axes) + return out + + def where(self: T, other, cond, align: bool) -> T: + if align: + align_keys = ["other", "cond"] + else: + align_keys = ["cond"] + other = extract_array(other, extract_numpy=True) + + return self.apply( + "where", + align_keys=align_keys, + other=other, + cond=cond, + ) + + def setitem(self: T, indexer, value) -> T: + """ + Set values with indexer. + + For SingleBlockManager, this backs s[indexer] = value + """ + return self.apply("setitem", indexer=indexer, value=value) + + def putmask(self, mask, new, align: bool = True): + + if align: + align_keys = ["new", "mask"] + else: + align_keys = ["mask"] + new = extract_array(new, extract_numpy=True) + + return self.apply( + "putmask", + align_keys=align_keys, + mask=mask, + new=new, + ) + + def diff(self: T, n: int, axis: int) -> T: + axis = self._normalize_axis(axis) + return self.apply("diff", n=n, axis=axis) + + def interpolate(self: T, **kwargs) -> T: + return self.apply("interpolate", **kwargs) + + def shift(self: T, periods: int, axis: int, fill_value) -> T: + axis = self._normalize_axis(axis) + if fill_value is lib.no_default: + fill_value = None + + if ( + axis == 0 + and self.ndim == 2 + and ( + self.nblocks > 1 + or ( + # If we only have one block and we know that we can't + # keep the same dtype (i.e. the _can_hold_element check) + # then we can go through the reindex_indexer path + # (and avoid casting logic in the Block method). + # The exception to this (until 2.0) is datetimelike + # dtypes with integers, which cast. + not self.blocks[0]._can_hold_element(fill_value) + # TODO(2.0): remove special case for integer-with-datetimelike + # once deprecation is enforced + and not ( + lib.is_integer(fill_value) + and needs_i8_conversion(self.blocks[0].dtype) + ) + ) + ) + ): + # GH#35488 we need to watch out for multi-block cases + # We only get here with fill_value not-lib.no_default + ncols = self.shape[0] + nper = abs(periods) + nper = min(nper, ncols) + if periods > 0: + indexer = np.array( + [-1] * nper + list(range(ncols - periods)), dtype=np.intp + ) + else: + indexer = np.array( + list(range(nper, ncols)) + [-1] * nper, dtype=np.intp + ) + result = self.reindex_indexer( + self.items, + indexer, + axis=0, + fill_value=fill_value, + allow_dups=True, + consolidate=False, + ) + return result + + return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value) + + def fillna(self: T, value, limit, inplace: bool, downcast) -> T: + return self.apply( + "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast + ) + + def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: + return self.apply("astype", dtype=dtype, copy=copy, errors=errors) + + def convert( + self: T, + copy: bool = True, + datetime: bool = True, + numeric: bool = True, + timedelta: bool = True, + ) -> T: + return self.apply( + "convert", + copy=copy, + datetime=datetime, + numeric=numeric, + timedelta=timedelta, + ) + + def replace(self: T, to_replace, value, inplace: bool) -> T: + inplace = validate_bool_kwarg(inplace, "inplace") + # NDFrame.replace ensures the not-is_list_likes here + assert not is_list_like(to_replace) + assert not is_list_like(value) + return self.apply( + "replace", to_replace=to_replace, value=value, inplace=inplace + ) + + def replace_regex(self, **kwargs): + return self.apply("_replace_regex", **kwargs) + + def replace_list( + self: T, + src_list: list[Any], + dest_list: list[Any], + inplace: bool = False, + regex: bool = False, + ) -> T: + """do a list replace""" + inplace = validate_bool_kwarg(inplace, "inplace") + + bm = self.apply( + "replace_list", + src_list=src_list, + dest_list=dest_list, + inplace=inplace, + regex=regex, + ) + bm._consolidate_inplace() + return bm + + def to_native_types(self: T, **kwargs) -> T: + """ + Convert values to native types (strings / python objects) that are used + in formatting (repr / csv). + """ + return self.apply("to_native_types", **kwargs) + + @property + def is_numeric_mixed_type(self) -> bool: + return all(block.is_numeric for block in self.blocks) + + @property + def any_extension_types(self) -> bool: + """Whether any of the blocks in this manager are extension blocks""" + return any(block.is_extension for block in self.blocks) + + @property + def is_view(self) -> bool: + """return a boolean if we are a single block and are a view""" + if len(self.blocks) == 1: + return self.blocks[0].is_view + + # It is technically possible to figure out which blocks are views + # e.g. [ b.values.base is not None for b in self.blocks ] + # but then we have the case of possibly some blocks being a view + # and some blocks not. setting in theory is possible on the non-view + # blocks w/o causing a SettingWithCopy raise/warn. But this is a bit + # complicated + + return False + + def _get_data_subset(self: T, predicate: Callable) -> T: + blocks = [blk for blk in self.blocks if predicate(blk.values)] + return self._combine(blocks, copy=False) + + def get_bool_data(self: T, copy: bool = False) -> T: + """ + Select blocks that are bool-dtype and columns from object-dtype blocks + that are all-bool. + + Parameters + ---------- + copy : bool, default False + Whether to copy the blocks + """ + + new_blocks = [] + + for blk in self.blocks: + if blk.dtype == bool: + new_blocks.append(blk) + + elif blk.is_object: + nbs = blk._split() + for nb in nbs: + if nb.is_bool: + new_blocks.append(nb) + + return self._combine(new_blocks, copy) + + def get_numeric_data(self: T, copy: bool = False) -> T: + """ + Parameters + ---------- + copy : bool, default False + Whether to copy the blocks + """ + numeric_blocks = [blk for blk in self.blocks if blk.is_numeric] + if len(numeric_blocks) == len(self.blocks): + # Avoid somewhat expensive _combine + if copy: + return self.copy(deep=True) + return self + return self._combine(numeric_blocks, copy) + + def _combine( + self: T, blocks: list[Block], copy: bool = True, index: Index | None = None + ) -> T: + """return a new manager with the blocks""" + if len(blocks) == 0: + if self.ndim == 2: + # retain our own Index dtype + if index is not None: + axes = [self.items[:0], index] + else: + axes = [self.items[:0]] + self.axes[1:] + return self.make_empty(axes) + return self.make_empty() + + # FIXME: optimization potential + indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks])) + inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0]) + + new_blocks: list[Block] = [] + for b in blocks: + b = b.copy(deep=copy) + b.mgr_locs = BlockPlacement(inv_indexer[b.mgr_locs.indexer]) + new_blocks.append(b) + + axes = list(self.axes) + if index is not None: + axes[-1] = index + axes[0] = self.items.take(indexer) + + return type(self).from_blocks(new_blocks, axes) + + @property + def nblocks(self) -> int: + return len(self.blocks) + + def copy(self: T, deep=True) -> T: + """ + Make deep or shallow copy of BlockManager + + Parameters + ---------- + deep : bool or string, default True + If False, return shallow copy (do not copy data) + If 'all', copy data and a deep copy of the index + + Returns + ------- + BlockManager + """ + # this preserves the notion of view copying of axes + if deep: + # hit in e.g. tests.io.json.test_pandas + + def copy_func(ax): + return ax.copy(deep=True) if deep == "all" else ax.view() + + new_axes = [copy_func(ax) for ax in self.axes] + else: + new_axes = list(self.axes) + + res = self.apply("copy", deep=deep) + + res.axes = new_axes + + if self.ndim > 1: + # Avoid needing to re-compute these + blknos = self._blknos + if blknos is not None: + res._blknos = blknos.copy() + res._blklocs = self._blklocs.copy() + + if deep: + res._consolidate_inplace() + return res + + def consolidate(self: T) -> T: + """ + Join together blocks having same dtype + + Returns + ------- + y : BlockManager + """ + if self.is_consolidated(): + return self + + bm = type(self)(self.blocks, self.axes, verify_integrity=False) + bm._is_consolidated = False + bm._consolidate_inplace() + return bm + + def reindex_indexer( + self: T, + new_axis: Index, + indexer, + axis: int, + fill_value=None, + allow_dups: bool = False, + copy: bool = True, + consolidate: bool = True, + only_slice: bool = False, + *, + use_na_proxy: bool = False, + ) -> T: + """ + Parameters + ---------- + new_axis : Index + indexer : ndarray of int64 or None + axis : int + fill_value : object, default None + allow_dups : bool, default False + copy : bool, default True + consolidate: bool, default True + Whether to consolidate inplace before reindexing. + only_slice : bool, default False + Whether to take views, not copies, along columns. + use_na_proxy : bool, default False + Whether to use a np.void ndarray for newly introduced columns. + + pandas-indexer with -1's only. + """ + if indexer is None: + if new_axis is self.axes[axis] and not copy: + return self + + result = self.copy(deep=copy) + result.axes = list(self.axes) + result.axes[axis] = new_axis + return result + + if consolidate: + self._consolidate_inplace() + + # some axes don't allow reindexing with dups + if not allow_dups: + self.axes[axis]._validate_can_reindex(indexer) + + if axis >= self.ndim: + raise IndexError("Requested axis not found in manager") + + if axis == 0: + new_blocks = self._slice_take_blocks_ax0( + indexer, + fill_value=fill_value, + only_slice=only_slice, + use_na_proxy=use_na_proxy, + ) + else: + new_blocks = [ + blk.take_nd( + indexer, + axis=1, + fill_value=( + fill_value if fill_value is not None else blk.fill_value + ), + ) + for blk in self.blocks + ] + + new_axes = list(self.axes) + new_axes[axis] = new_axis + + new_mgr = type(self).from_blocks(new_blocks, new_axes) + if axis == 1: + # We can avoid the need to rebuild these + new_mgr._blknos = self.blknos.copy() + new_mgr._blklocs = self.blklocs.copy() + return new_mgr + + def _slice_take_blocks_ax0( + self, + slice_or_indexer: slice | np.ndarray, + fill_value=lib.no_default, + only_slice: bool = False, + *, + use_na_proxy: bool = False, + ) -> list[Block]: + """ + Slice/take blocks along axis=0. + + Overloaded for SingleBlock + + Parameters + ---------- + slice_or_indexer : slice or np.ndarray[int64] + fill_value : scalar, default lib.no_default + only_slice : bool, default False + If True, we always return views on existing arrays, never copies. + This is used when called from ops.blockwise.operate_blockwise. + use_na_proxy : bool, default False + Whether to use a np.void ndarray for newly introduced columns. + + Returns + ------- + new_blocks : list of Block + """ + allow_fill = fill_value is not lib.no_default + + sl_type, slobj, sllen = _preprocess_slice_or_indexer( + slice_or_indexer, self.shape[0], allow_fill=allow_fill + ) + + if self.is_single_block: + blk = self.blocks[0] + + if sl_type == "slice": + # GH#32959 EABlock would fail since we can't make 0-width + # TODO(EA2D): special casing unnecessary with 2D EAs + if sllen == 0: + return [] + bp = BlockPlacement(slice(0, sllen)) + return [blk.getitem_block_columns(slobj, new_mgr_locs=bp)] + elif not allow_fill or self.ndim == 1: + if allow_fill and fill_value is None: + fill_value = blk.fill_value + + if not allow_fill and only_slice: + # GH#33597 slice instead of take, so we get + # views instead of copies + blocks = [ + blk.getitem_block_columns( + slice(ml, ml + 1), new_mgr_locs=BlockPlacement(i) + ) + for i, ml in enumerate(slobj) + ] + # We have + # all(np.shares_memory(nb.values, blk.values) for nb in blocks) + return blocks + else: + bp = BlockPlacement(slice(0, sllen)) + return [ + blk.take_nd( + slobj, + axis=0, + new_mgr_locs=bp, + fill_value=fill_value, + ) + ] + + if sl_type == "slice": + blknos = self.blknos[slobj] + blklocs = self.blklocs[slobj] + else: + blknos = algos.take_nd( + self.blknos, slobj, fill_value=-1, allow_fill=allow_fill + ) + blklocs = algos.take_nd( + self.blklocs, slobj, fill_value=-1, allow_fill=allow_fill + ) + + # When filling blknos, make sure blknos is updated before appending to + # blocks list, that way new blkno is exactly len(blocks). + blocks = [] + group = not only_slice + for blkno, mgr_locs in libinternals.get_blkno_placements(blknos, group=group): + if blkno == -1: + # If we've got here, fill_value was not lib.no_default + + blocks.append( + self._make_na_block( + placement=mgr_locs, + fill_value=fill_value, + use_na_proxy=use_na_proxy, + ) + ) + else: + blk = self.blocks[blkno] + + # Otherwise, slicing along items axis is necessary. + if not blk._can_consolidate and not blk._validate_ndim: + # i.e. we dont go through here for DatetimeTZBlock + # A non-consolidatable block, it's easy, because there's + # only one item and each mgr loc is a copy of that single + # item. + for mgr_loc in mgr_locs: + newblk = blk.copy(deep=False) + newblk.mgr_locs = BlockPlacement(slice(mgr_loc, mgr_loc + 1)) + blocks.append(newblk) + + else: + # GH#32779 to avoid the performance penalty of copying, + # we may try to only slice + taker = blklocs[mgr_locs.indexer] + max_len = max(len(mgr_locs), taker.max() + 1) + if only_slice: + taker = lib.maybe_indices_to_slice(taker, max_len) + + if isinstance(taker, slice): + nb = blk.getitem_block_columns(taker, new_mgr_locs=mgr_locs) + blocks.append(nb) + elif only_slice: + # GH#33597 slice instead of take, so we get + # views instead of copies + for i, ml in zip(taker, mgr_locs): + slc = slice(i, i + 1) + bp = BlockPlacement(ml) + nb = blk.getitem_block_columns(slc, new_mgr_locs=bp) + # We have np.shares_memory(nb.values, blk.values) + blocks.append(nb) + else: + nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs) + blocks.append(nb) + + return blocks + + def _make_na_block( + self, placement: BlockPlacement, fill_value=None, use_na_proxy: bool = False + ) -> Block: + # Note: we only get here with self.ndim == 2 + + if use_na_proxy: + assert fill_value is None + shape = (len(placement), self.shape[1]) + vals = np.empty(shape, dtype=np.void) + nb = NumpyBlock(vals, placement, ndim=2) + return nb + + if fill_value is None: + fill_value = np.nan + block_shape = list(self.shape) + block_shape[0] = len(placement) + + dtype, fill_value = infer_dtype_from_scalar(fill_value) + # error: Argument "dtype" to "empty" has incompatible type "Union[dtype, + # ExtensionDtype]"; expected "Union[dtype, None, type, _SupportsDtype, str, + # Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict, + # Tuple[Any, Any]]" + block_values = np.empty(block_shape, dtype=dtype) # type: ignore[arg-type] + block_values.fill(fill_value) + return new_block_2d(block_values, placement=placement) + + def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T: + """ + Take items along any axis. + + indexer : np.ndarray or slice + axis : int, default 1 + verify : bool, default True + Check that all entries are between 0 and len(self) - 1, inclusive. + Pass verify=False if this check has been done by the caller. + + Returns + ------- + BlockManager + """ + # We have 6 tests that get here with a slice + indexer = ( + np.arange(indexer.start, indexer.stop, indexer.step, dtype=np.intp) + if isinstance(indexer, slice) + else np.asanyarray(indexer, dtype=np.intp) + ) + + n = self.shape[axis] + indexer = maybe_convert_indices(indexer, n, verify=verify) + + new_labels = self.axes[axis].take(indexer) + return self.reindex_indexer( + new_axis=new_labels, + indexer=indexer, + axis=axis, + allow_dups=True, + consolidate=False, + ) + + +class BlockManager(libinternals.BlockManager, BaseBlockManager): + """ + BaseBlockManager that holds 2D blocks. + """ + + ndim = 2 + + # ---------------------------------------------------------------- + # Constructors + + def __init__( + self, + blocks: Sequence[Block], + axes: Sequence[Index], + verify_integrity: bool = True, + ): + + if verify_integrity: + # Assertion disabled for performance + # assert all(isinstance(x, Index) for x in axes) + + for block in blocks: + if self.ndim != block.ndim: + raise AssertionError( + f"Number of Block dimensions ({block.ndim}) must equal " + f"number of axes ({self.ndim})" + ) + if isinstance(block, DatetimeTZBlock) and block.values.ndim == 1: + # TODO(2.0): remove once fastparquet no longer needs this + warnings.warn( + "In a future version, the BlockManager constructor " + "will assume that a DatetimeTZBlock with block.ndim==2 " + "has block.values.ndim == 2.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + + # error: Incompatible types in assignment (expression has type + # "Union[ExtensionArray, ndarray]", variable has type + # "DatetimeArray") + block.values = ensure_block_shape( # type: ignore[assignment] + block.values, self.ndim + ) + try: + block._cache.clear() + except AttributeError: + # _cache not initialized + pass + + self._verify_integrity() + + def _verify_integrity(self) -> None: + mgr_shape = self.shape + tot_items = sum(len(x.mgr_locs) for x in self.blocks) + for block in self.blocks: + if block.shape[1:] != mgr_shape[1:]: + raise construction_error(tot_items, block.shape[1:], self.axes) + if len(self.items) != tot_items: + raise AssertionError( + "Number of manager items must equal union of " + f"block items\n# manager items: {len(self.items)}, # " + f"tot_items: {tot_items}" + ) + + @classmethod + def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> BlockManager: + """ + Constructor for BlockManager and SingleBlockManager with same signature. + """ + return cls(blocks, axes, verify_integrity=False) + + # ---------------------------------------------------------------- + # Indexing + + def fast_xs(self, loc: int) -> ArrayLike: + """ + Return the array corresponding to `frame.iloc[loc]`. + + Parameters + ---------- + loc : int + + Returns + ------- + np.ndarray or ExtensionArray + """ + if len(self.blocks) == 1: + return self.blocks[0].iget((slice(None), loc)) + + dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) + + n = len(self) + if isinstance(dtype, ExtensionDtype): + cls = dtype.construct_array_type() + result = cls._empty((n,), dtype=dtype) + else: + result = np.empty(n, dtype=dtype) + result = ensure_wrapped_if_datetimelike(result) + + for blk in self.blocks: + # Such assignment may incorrectly coerce NaT to None + # result[blk.mgr_locs] = blk._slice((slice(None), loc)) + for i, rl in enumerate(blk.mgr_locs): + result[rl] = blk.iget((i, loc)) + + return result + + def iget(self, i: int) -> SingleBlockManager: + """ + Return the data as a SingleBlockManager. + """ + block = self.blocks[self.blknos[i]] + values = block.iget(self.blklocs[i]) + + # shortcut for select a single-dim from a 2-dim BM + bp = BlockPlacement(slice(0, len(values))) + nb = type(block)(values, placement=bp, ndim=1) + return SingleBlockManager(nb, self.axes[1]) + + def iget_values(self, i: int) -> ArrayLike: + """ + Return the data for column i as the values (ndarray or ExtensionArray). + """ + block = self.blocks[self.blknos[i]] + values = block.iget(self.blklocs[i]) + return values + + @property + def column_arrays(self) -> list[np.ndarray]: + """ + Used in the JSON C code to access column arrays. + This optimizes compared to using `iget_values` by converting each + """ + # This is an optimized equivalent to + # result = [self.iget_values(i) for i in range(len(self.items))] + result: list[np.ndarray | None] = [None] * len(self.items) + + for blk in self.blocks: + mgr_locs = blk._mgr_locs + values = blk.values_for_json() + if values.ndim == 1: + # TODO(EA2D): special casing not needed with 2D EAs + result[mgr_locs[0]] = values + + else: + for i, loc in enumerate(mgr_locs): + result[loc] = values[i] + + # error: Incompatible return value type (got "List[None]", + # expected "List[ndarray[Any, Any]]") + return result # type: ignore[return-value] + + def iset( + self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False + ): + """ + Set new item in-place. Does not consolidate. Adds new Block if not + contained in the current set of items + """ + + # FIXME: refactor, clearly separate broadcasting & zip-like assignment + # can prob also fix the various if tests for sparse/categorical + if self._blklocs is None and self.ndim > 1: + self._rebuild_blknos_and_blklocs() + + # Note: we exclude DTA/TDA here + value_is_extension_type = is_1d_only_ea_dtype(value.dtype) + if not value_is_extension_type: + if value.ndim == 2: + value = value.T + else: + value = ensure_block_shape(value, ndim=2) + + if value.shape[1:] != self.shape[1:]: + raise AssertionError( + "Shape of new values must be compatible with manager shape" + ) + + if lib.is_integer(loc): + # We have 6 tests where loc is _not_ an int. + # In this case, get_blkno_placements will yield only one tuple, + # containing (self._blknos[loc], BlockPlacement(slice(0, 1, 1))) + + # Check if we can use _iset_single fastpath + loc = cast(int, loc) + blkno = self.blknos[loc] + blk = self.blocks[blkno] + if len(blk._mgr_locs) == 1: # TODO: fastest way to check this? + return self._iset_single( + loc, + value, + inplace=inplace, + blkno=blkno, + blk=blk, + ) + + # error: Incompatible types in assignment (expression has type + # "List[Union[int, slice, ndarray]]", variable has type "Union[int, + # slice, ndarray]") + loc = [loc] # type: ignore[assignment] + + # categorical/sparse/datetimetz + if value_is_extension_type: + + def value_getitem(placement): + return value + + else: + + def value_getitem(placement): + return value[placement.indexer] + + # Accessing public blknos ensures the public versions are initialized + blknos = self.blknos[loc] + blklocs = self.blklocs[loc].copy() + + unfit_mgr_locs = [] + unfit_val_locs = [] + removed_blknos = [] + for blkno_l, val_locs in libinternals.get_blkno_placements(blknos, group=True): + blk = self.blocks[blkno_l] + blk_locs = blklocs[val_locs.indexer] + if inplace and blk.should_store(value): + blk.set_inplace(blk_locs, value_getitem(val_locs)) + else: + unfit_mgr_locs.append(blk.mgr_locs.as_array[blk_locs]) + unfit_val_locs.append(val_locs) + + # If all block items are unfit, schedule the block for removal. + if len(val_locs) == len(blk.mgr_locs): + removed_blknos.append(blkno_l) + else: + blk.delete(blk_locs) + self._blklocs[blk.mgr_locs.indexer] = np.arange(len(blk)) + + if len(removed_blknos): + # Remove blocks & update blknos accordingly + is_deleted = np.zeros(self.nblocks, dtype=np.bool_) + is_deleted[removed_blknos] = True + + new_blknos = np.empty(self.nblocks, dtype=np.intp) + new_blknos.fill(-1) + new_blknos[~is_deleted] = np.arange(self.nblocks - len(removed_blknos)) + self._blknos = new_blknos[self._blknos] + self.blocks = tuple( + blk for i, blk in enumerate(self.blocks) if i not in set(removed_blknos) + ) + + if unfit_val_locs: + unfit_idxr = np.concatenate(unfit_mgr_locs) + unfit_count = len(unfit_idxr) + + new_blocks: list[Block] = [] + if value_is_extension_type: + # This code (ab-)uses the fact that EA blocks contain only + # one item. + # TODO(EA2D): special casing unnecessary with 2D EAs + new_blocks.extend( + new_block_2d( + values=value, + placement=BlockPlacement(slice(mgr_loc, mgr_loc + 1)), + ) + for mgr_loc in unfit_idxr + ) + + self._blknos[unfit_idxr] = np.arange(unfit_count) + len(self.blocks) + self._blklocs[unfit_idxr] = 0 + + else: + # unfit_val_locs contains BlockPlacement objects + unfit_val_items = unfit_val_locs[0].append(unfit_val_locs[1:]) + + new_blocks.append( + new_block_2d( + values=value_getitem(unfit_val_items), + placement=BlockPlacement(unfit_idxr), + ) + ) + + self._blknos[unfit_idxr] = len(self.blocks) + self._blklocs[unfit_idxr] = np.arange(unfit_count) + + self.blocks += tuple(new_blocks) + + # Newly created block's dtype may already be present. + self._known_consolidated = False + + def _iset_single( + self, loc: int, value: ArrayLike, inplace: bool, blkno: int, blk: Block + ) -> None: + """ + Fastpath for iset when we are only setting a single position and + the Block currently in that position is itself single-column. + + In this case we can swap out the entire Block and blklocs and blknos + are unaffected. + """ + # Caller is responsible for verifying value.shape + + if inplace and blk.should_store(value): + iloc = self.blklocs[loc] + blk.set_inplace(slice(iloc, iloc + 1), value) + return + + nb = new_block_2d(value, placement=blk._mgr_locs) + old_blocks = self.blocks + new_blocks = old_blocks[:blkno] + (nb,) + old_blocks[blkno + 1 :] + self.blocks = new_blocks + return + + def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: + """ + Insert item at selected position. + + Parameters + ---------- + loc : int + item : hashable + value : np.ndarray or ExtensionArray + """ + # insert to the axis; this could possibly raise a TypeError + new_axis = self.items.insert(loc, item) + + if value.ndim == 2: + value = value.T + if len(value) > 1: + raise ValueError( + f"Expected a 1D array, got an array with shape {value.T.shape}" + ) + else: + value = ensure_block_shape(value, ndim=self.ndim) + + bp = BlockPlacement(slice(loc, loc + 1)) + block = new_block_2d(values=value, placement=bp) + + if not len(self.blocks): + # Fastpath + self._blklocs = np.array([0], dtype=np.intp) + self._blknos = np.array([0], dtype=np.intp) + else: + self._insert_update_mgr_locs(loc) + self._insert_update_blklocs_and_blknos(loc) + + self.axes[0] = new_axis + self.blocks += (block,) + + self._known_consolidated = False + + if sum(not block.is_extension for block in self.blocks) > 100: + warnings.warn( + "DataFrame is highly fragmented. This is usually the result " + "of calling `frame.insert` many times, which has poor performance. " + "Consider joining all columns at once using pd.concat(axis=1) " + "instead. To get a de-fragmented frame, use `newframe = frame.copy()`", + PerformanceWarning, + stacklevel=find_stack_level(), + ) + + def _insert_update_mgr_locs(self, loc) -> None: + """ + When inserting a new Block at location 'loc', we increment + all of the mgr_locs of blocks above that by one. + """ + for blkno, count in _fast_count_smallints(self.blknos[loc:]): + # .620 this way, .326 of which is in increment_above + blk = self.blocks[blkno] + blk._mgr_locs = blk._mgr_locs.increment_above(loc) + + def _insert_update_blklocs_and_blknos(self, loc) -> None: + """ + When inserting a new Block at location 'loc', we update our + _blklocs and _blknos. + """ + + # Accessing public blklocs ensures the public versions are initialized + if loc == self.blklocs.shape[0]: + # np.append is a lot faster, let's use it if we can. + self._blklocs = np.append(self._blklocs, 0) + self._blknos = np.append(self._blknos, len(self.blocks)) + elif loc == 0: + # np.append is a lot faster, let's use it if we can. + self._blklocs = np.append(self._blklocs[::-1], 0)[::-1] + self._blknos = np.append(self._blknos[::-1], len(self.blocks))[::-1] + else: + new_blklocs, new_blknos = libinternals.update_blklocs_and_blknos( + self.blklocs, self.blknos, loc, len(self.blocks) + ) + self._blklocs = new_blklocs + self._blknos = new_blknos + + def idelete(self, indexer) -> BlockManager: + """ + Delete selected locations, returning a new BlockManager. + """ + is_deleted = np.zeros(self.shape[0], dtype=np.bool_) + is_deleted[indexer] = True + taker = (~is_deleted).nonzero()[0] + + nbs = self._slice_take_blocks_ax0(taker, only_slice=True) + new_columns = self.items[~is_deleted] + axes = [new_columns, self.axes[1]] + return type(self)(tuple(nbs), axes, verify_integrity=False) + + # ---------------------------------------------------------------- + # Block-wise Operation + + def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T: + """ + Apply grouped reduction function blockwise, returning a new BlockManager. + + Parameters + ---------- + func : grouped reduction function + ignore_failures : bool, default False + Whether to drop blocks where func raises TypeError. + + Returns + ------- + BlockManager + """ + result_blocks: list[Block] = [] + dropped_any = False + + for blk in self.blocks: + if blk.is_object: + # split on object-dtype blocks bc some columns may raise + # while others do not. + for sb in blk._split(): + try: + applied = sb.apply(func) + except (TypeError, NotImplementedError): + if not ignore_failures: + raise + dropped_any = True + continue + result_blocks = extend_blocks(applied, result_blocks) + else: + try: + applied = blk.apply(func) + except (TypeError, NotImplementedError): + if not ignore_failures: + raise + dropped_any = True + continue + result_blocks = extend_blocks(applied, result_blocks) + + if len(result_blocks) == 0: + index = Index([None]) # placeholder + else: + index = Index(range(result_blocks[0].values.shape[-1])) + + if dropped_any: + # faster to skip _combine if we haven't dropped any blocks + return self._combine(result_blocks, copy=False, index=index) + + return type(self).from_blocks(result_blocks, [self.axes[0], index]) + + def reduce( + self: T, func: Callable, ignore_failures: bool = False + ) -> tuple[T, np.ndarray]: + """ + Apply reduction function blockwise, returning a single-row BlockManager. + + Parameters + ---------- + func : reduction function + ignore_failures : bool, default False + Whether to drop blocks where func raises TypeError. + + Returns + ------- + BlockManager + np.ndarray + Indexer of mgr_locs that are retained. + """ + # If 2D, we assume that we're operating column-wise + assert self.ndim == 2 + + res_blocks: list[Block] = [] + for blk in self.blocks: + nbs = blk.reduce(func, ignore_failures) + res_blocks.extend(nbs) + + index = Index([None]) # placeholder + if ignore_failures: + if res_blocks: + indexer = np.concatenate([blk.mgr_locs.as_array for blk in res_blocks]) + new_mgr = self._combine(res_blocks, copy=False, index=index) + else: + indexer = [] + new_mgr = type(self).from_blocks([], [self.items[:0], index]) + else: + indexer = np.arange(self.shape[0]) + new_mgr = type(self).from_blocks(res_blocks, [self.items, index]) + return new_mgr, indexer + + def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager: + """ + Apply array_op blockwise with another (aligned) BlockManager. + """ + return operate_blockwise(self, other, array_op) + + def _equal_values(self: BlockManager, other: BlockManager) -> bool: + """ + Used in .equals defined in base class. Only check the column values + assuming shape and indexes have already been checked. + """ + return blockwise_all(self, other, array_equals) + + def quantile( + self: T, + *, + qs: Float64Index, + axis: int = 0, + interpolation="linear", + ) -> T: + """ + Iterate over blocks applying quantile reduction. + This routine is intended for reduction type operations and + will do inference on the generated blocks. + + Parameters + ---------- + axis: reduction axis, default 0 + consolidate: bool, default True. Join together blocks having same + dtype + interpolation : type of interpolation, default 'linear' + qs : list of the quantiles to be computed + + Returns + ------- + BlockManager + """ + # Series dispatches to DataFrame for quantile, which allows us to + # simplify some of the code here and in the blocks + assert self.ndim >= 2 + assert is_list_like(qs) # caller is responsible for this + assert axis == 1 # only ever called this way + + new_axes = list(self.axes) + new_axes[1] = Float64Index(qs) + + blocks = [ + blk.quantile(axis=axis, qs=qs, interpolation=interpolation) + for blk in self.blocks + ] + + return type(self)(blocks, new_axes) + + # ---------------------------------------------------------------- + + def unstack(self, unstacker, fill_value) -> BlockManager: + """ + Return a BlockManager with all blocks unstacked. + + Parameters + ---------- + unstacker : reshape._Unstacker + fill_value : Any + fill_value for newly introduced missing values. + + Returns + ------- + unstacked : BlockManager + """ + new_columns = unstacker.get_new_columns(self.items) + new_index = unstacker.new_index + + allow_fill = not unstacker.mask_all + if allow_fill: + # calculating the full mask once and passing it to Block._unstack is + # faster than letting calculating it in each repeated call + new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape) + needs_masking = new_mask2D.any(axis=0) + else: + needs_masking = np.zeros(unstacker.full_shape[1], dtype=bool) + + new_blocks: list[Block] = [] + columns_mask: list[np.ndarray] = [] + + if len(self.items) == 0: + factor = 1 + else: + fac = len(new_columns) / len(self.items) + assert fac == int(fac) + factor = int(fac) + + for blk in self.blocks: + mgr_locs = blk.mgr_locs + new_placement = mgr_locs.tile_for_unstack(factor) + + blocks, mask = blk._unstack( + unstacker, + fill_value, + new_placement=new_placement, + needs_masking=needs_masking, + ) + + new_blocks.extend(blocks) + columns_mask.extend(mask) + + # Block._unstack should ensure this holds, + assert mask.sum() == sum(len(nb._mgr_locs) for nb in blocks) + # In turn this ensures that in the BlockManager call below + # we have len(new_columns) == sum(x.shape[0] for x in new_blocks) + # which suffices to allow us to pass verify_inegrity=False + + new_columns = new_columns[columns_mask] + + bm = BlockManager(new_blocks, [new_columns, new_index], verify_integrity=False) + return bm + + def to_dict(self, copy: bool = True): + """ + Return a dict of str(dtype) -> BlockManager + + Parameters + ---------- + copy : bool, default True + + Returns + ------- + values : a dict of dtype -> BlockManager + """ + + bd: dict[str, list[Block]] = {} + for b in self.blocks: + bd.setdefault(str(b.dtype), []).append(b) + + # TODO(EA2D): the combine will be unnecessary with 2D EAs + return {dtype: self._combine(blocks, copy=copy) for dtype, blocks in bd.items()} + + def as_array( + self, + dtype: np.dtype | None = None, + copy: bool = False, + na_value=lib.no_default, + ) -> np.ndarray: + """ + Convert the blockmanager data into an numpy array. + + Parameters + ---------- + dtype : np.dtype or None, default None + Data type of the return array. + copy : bool, default False + If True then guarantee that a copy is returned. A value of + False does not guarantee that the underlying data is not + copied. + na_value : object, default lib.no_default + Value to be used as the missing value sentinel. + + Returns + ------- + arr : ndarray + """ + if len(self.blocks) == 0: + arr = np.empty(self.shape, dtype=float) + return arr.transpose() + + # We want to copy when na_value is provided to avoid + # mutating the original object + copy = copy or na_value is not lib.no_default + + if self.is_single_block: + blk = self.blocks[0] + if blk.is_extension: + # Avoid implicit conversion of extension blocks to object + + # error: Item "ndarray" of "Union[ndarray, ExtensionArray]" has no + # attribute "to_numpy" + arr = blk.values.to_numpy( # type: ignore[union-attr] + dtype=dtype, + na_value=na_value, + ).reshape(blk.shape) + else: + arr = np.asarray(blk.get_values()) + if dtype: + arr = arr.astype(dtype, copy=False) + else: + arr = self._interleave(dtype=dtype, na_value=na_value) + # The underlying data was copied within _interleave + copy = False + + if copy: + arr = arr.copy() + + if na_value is not lib.no_default: + arr[isna(arr)] = na_value + + return arr.transpose() + + def _interleave( + self, + dtype: np.dtype | None = None, + na_value=lib.no_default, + ) -> np.ndarray: + """ + Return ndarray from blocks with specified item order + Items must be contained in the blocks + """ + if not dtype: + # Incompatible types in assignment (expression has type + # "Optional[Union[dtype[Any], ExtensionDtype]]", variable has + # type "Optional[dtype[Any]]") + dtype = interleaved_dtype( # type: ignore[assignment] + [blk.dtype for blk in self.blocks] + ) + + # TODO: https://github.com/pandas-dev/pandas/issues/22791 + # Give EAs some input on what happens here. Sparse needs this. + if isinstance(dtype, SparseDtype): + dtype = dtype.subtype + dtype = cast(np.dtype, dtype) + elif isinstance(dtype, ExtensionDtype): + dtype = np.dtype("object") + elif is_dtype_equal(dtype, str): + dtype = np.dtype("object") + + result = np.empty(self.shape, dtype=dtype) + + itemmask = np.zeros(self.shape[0]) + + if dtype == np.dtype("object") and na_value is lib.no_default: + # much more performant than using to_numpy below + for blk in self.blocks: + rl = blk.mgr_locs + arr = blk.get_values(dtype) + result[rl.indexer] = arr + itemmask[rl.indexer] = 1 + return result + + for blk in self.blocks: + rl = blk.mgr_locs + if blk.is_extension: + # Avoid implicit conversion of extension blocks to object + + # error: Item "ndarray" of "Union[ndarray, ExtensionArray]" has no + # attribute "to_numpy" + arr = blk.values.to_numpy( # type: ignore[union-attr] + dtype=dtype, + na_value=na_value, + ) + else: + arr = blk.get_values(dtype) + result[rl.indexer] = arr + itemmask[rl.indexer] = 1 + + if not itemmask.all(): + raise AssertionError("Some items were not contained in blocks") + + return result + + # ---------------------------------------------------------------- + # Consolidation + + def is_consolidated(self) -> bool: + """ + Return True if more than one block with the same dtype + """ + if not self._known_consolidated: + self._consolidate_check() + return self._is_consolidated + + def _consolidate_check(self) -> None: + if len(self.blocks) == 1: + # fastpath + self._is_consolidated = True + self._known_consolidated = True + return + dtypes = [blk.dtype for blk in self.blocks if blk._can_consolidate] + self._is_consolidated = len(dtypes) == len(set(dtypes)) + self._known_consolidated = True + + def _consolidate_inplace(self) -> None: + if not self.is_consolidated(): + self.blocks = tuple(_consolidate(self.blocks)) + self._is_consolidated = True + self._known_consolidated = True + self._rebuild_blknos_and_blklocs() + + +class SingleBlockManager(BaseBlockManager, SingleDataManager): + """manage a single block with""" + + ndim = 1 + _is_consolidated = True + _known_consolidated = True + __slots__ = () + is_single_block = True + + def __init__( + self, + block: Block, + axis: Index, + verify_integrity: bool = False, + fastpath=lib.no_default, + ): + # Assertions disabled for performance + # assert isinstance(block, Block), type(block) + # assert isinstance(axis, Index), type(axis) + + if fastpath is not lib.no_default: + warnings.warn( + "The `fastpath` keyword is deprecated and will be removed " + "in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + self.axes = [axis] + self.blocks = (block,) + + @classmethod + def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> SingleBlockManager: + """ + Constructor for BlockManager and SingleBlockManager with same signature. + """ + assert len(blocks) == 1 + assert len(axes) == 1 + return cls(blocks[0], axes[0], verify_integrity=False) + + @classmethod + def from_array(cls, array: ArrayLike, index: Index) -> SingleBlockManager: + """ + Constructor for if we have an array that is not yet a Block. + """ + block = new_block(array, placement=slice(0, len(index)), ndim=1) + return cls(block, index) + + def to_2d_mgr(self, columns: Index) -> BlockManager: + """ + Manager analogue of Series.to_frame + """ + blk = self.blocks[0] + arr = ensure_block_shape(blk.values, ndim=2) + bp = BlockPlacement(0) + new_blk = type(blk)(arr, placement=bp, ndim=2) + axes = [columns, self.axes[0]] + return BlockManager([new_blk], axes=axes, verify_integrity=False) + + def __getstate__(self): + block_values = [b.values for b in self.blocks] + block_items = [self.items[b.mgr_locs.indexer] for b in self.blocks] + axes_array = list(self.axes) + + extra_state = { + "0.14.1": { + "axes": axes_array, + "blocks": [ + {"values": b.values, "mgr_locs": b.mgr_locs.indexer} + for b in self.blocks + ], + } + } + + # First three elements of the state are to maintain forward + # compatibility with 0.13.1. + return axes_array, block_values, block_items, extra_state + + def __setstate__(self, state): + def unpickle_block(values, mgr_locs, ndim: int) -> Block: + # TODO(EA2D): ndim would be unnecessary with 2D EAs + # older pickles may store e.g. DatetimeIndex instead of DatetimeArray + values = extract_array(values, extract_numpy=True) + return new_block(values, placement=mgr_locs, ndim=ndim) + + if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]: + state = state[3]["0.14.1"] + self.axes = [ensure_index(ax) for ax in state["axes"]] + ndim = len(self.axes) + self.blocks = tuple( + unpickle_block(b["values"], b["mgr_locs"], ndim=ndim) + for b in state["blocks"] + ) + else: + raise NotImplementedError("pre-0.14.1 pickles are no longer supported") + + self._post_setstate() + + def _post_setstate(self): + pass + + @cache_readonly + def _block(self) -> Block: + return self.blocks[0] + + @property + def _blknos(self): + """compat with BlockManager""" + return None + + @property + def _blklocs(self): + """compat with BlockManager""" + return None + + def getitem_mgr(self, indexer) -> SingleBlockManager: + # similar to get_slice, but not restricted to slice indexer + blk = self._block + array = blk._slice(indexer) + if array.ndim > 1: + # This will be caught by Series._get_values + raise ValueError("dimension-expanding indexing not allowed") + + bp = BlockPlacement(slice(0, len(array))) + block = type(blk)(array, placement=bp, ndim=1) + + new_idx = self.index[indexer] + return type(self)(block, new_idx) + + def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: + # Assertion disabled for performance + # assert isinstance(slobj, slice), type(slobj) + if axis >= self.ndim: + raise IndexError("Requested axis not found in manager") + + blk = self._block + array = blk._slice(slobj) + bp = BlockPlacement(slice(0, len(array))) + block = type(blk)(array, placement=bp, ndim=1) + new_index = self.index._getitem_slice(slobj) + return type(self)(block, new_index) + + @property + def index(self) -> Index: + return self.axes[0] + + @property + def dtype(self) -> DtypeObj: + return self._block.dtype + + def get_dtypes(self) -> np.ndarray: + return np.array([self._block.dtype]) + + def external_values(self): + """The array that Series.values returns""" + return self._block.external_values() + + def internal_values(self): + """The array that Series._values returns""" + return self._block.values + + def array_values(self): + """The array that Series.array returns""" + return self._block.array_values + + def get_numeric_data(self, copy: bool = False): + if self._block.is_numeric: + if copy: + return self.copy() + return self + return self.make_empty() + + @property + def _can_hold_na(self) -> bool: + return self._block._can_hold_na + + def idelete(self, indexer) -> SingleBlockManager: + """ + Delete single location from SingleBlockManager. + + Ensures that self.blocks doesn't become empty. + """ + self._block.delete(indexer) + self.axes[0] = self.axes[0].delete(indexer) + return self + + def fast_xs(self, loc): + """ + fast path for getting a cross-section + return a view of the data + """ + raise NotImplementedError("Use series._values[loc] instead") + + def set_values(self, values: ArrayLike): + """ + Set the values of the single block in place. + + Use at your own risk! This does not check if the passed values are + valid for the current Block/SingleBlockManager (length, dtype, etc). + """ + self.blocks[0].values = values + self.blocks[0]._mgr_locs = BlockPlacement(slice(len(values))) + + def _equal_values(self: T, other: T) -> bool: + """ + Used in .equals defined in base class. Only check the column values + assuming shape and indexes have already been checked. + """ + # For SingleBlockManager (i.e.Series) + if other.ndim != 1: + return False + left = self.blocks[0].values + right = other.blocks[0].values + return array_equals(left, right) + + +# -------------------------------------------------------------------- +# Constructor Helpers + + +def create_block_manager_from_blocks( + blocks: list[Block], + axes: list[Index], + consolidate: bool = True, + verify_integrity: bool = True, +) -> BlockManager: + # If verify_integrity=False, then caller is responsible for checking + # all(x.shape[-1] == len(axes[1]) for x in blocks) + # sum(x.shape[0] for x in blocks) == len(axes[0]) + # set(x for for blk in blocks for x in blk.mgr_locs) == set(range(len(axes[0]))) + # all(blk.ndim == 2 for blk in blocks) + # This allows us to safely pass verify_integrity=False + + try: + mgr = BlockManager(blocks, axes, verify_integrity=verify_integrity) + + except ValueError as err: + arrays = [blk.values for blk in blocks] + tot_items = sum(arr.shape[0] for arr in arrays) + raise construction_error(tot_items, arrays[0].shape[1:], axes, err) + + if consolidate: + mgr._consolidate_inplace() + return mgr + + +def create_block_manager_from_column_arrays( + arrays: list[ArrayLike], + axes: list[Index], + consolidate: bool = True, +) -> BlockManager: + # Assertions disabled for performance (caller is responsible for verifying) + # assert isinstance(axes, list) + # assert all(isinstance(x, Index) for x in axes) + # assert all(isinstance(x, (np.ndarray, ExtensionArray)) for x in arrays) + # assert all(type(x) is not PandasArray for x in arrays) + # assert all(x.ndim == 1 for x in arrays) + # assert all(len(x) == len(axes[1]) for x in arrays) + # assert len(arrays) == len(axes[0]) + # These last three are sufficient to allow us to safely pass + # verify_integrity=False below. + + try: + blocks = _form_blocks(arrays, consolidate) + mgr = BlockManager(blocks, axes, verify_integrity=False) + except ValueError as e: + raise construction_error(len(arrays), arrays[0].shape, axes, e) + if consolidate: + mgr._consolidate_inplace() + return mgr + + +def construction_error( + tot_items: int, + block_shape: Shape, + axes: list[Index], + e: ValueError | None = None, +): + """raise a helpful message about our construction""" + passed = tuple(map(int, [tot_items] + list(block_shape))) + # Correcting the user facing error message during dataframe construction + if len(passed) <= 2: + passed = passed[::-1] + + implied = tuple(len(ax) for ax in axes) + # Correcting the user facing error message during dataframe construction + if len(implied) <= 2: + implied = implied[::-1] + + # We return the exception object instead of raising it so that we + # can raise it in the caller; mypy plays better with that + if passed == implied and e is not None: + return e + if block_shape[0] == 0: + return ValueError("Empty data passed with indices specified.") + return ValueError(f"Shape of passed values is {passed}, indices imply {implied}") + + +# ----------------------------------------------------------------------- + + +def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[int, bool, DtypeObj]: + # compat for numpy<1.21, in which comparing a np.dtype with an ExtensionDtype + # raises instead of returning False. Once earlier numpy versions are dropped, + # this can be simplified to `return tup[1].dtype` + dtype = tup[1].dtype + + if is_1d_only_ea_dtype(dtype): + # We know these won't be consolidated, so don't need to group these. + # This avoids expensive comparisons of CategoricalDtype objects + sep = id(dtype) + else: + sep = 0 + + return sep, isinstance(dtype, np.dtype), dtype + + +def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]: + tuples = list(enumerate(arrays)) + + if not consolidate: + nbs = _tuples_to_blocks_no_consolidate(tuples) + return nbs + + # group by dtype + grouper = itertools.groupby(tuples, _grouping_func) + + nbs = [] + for (_, _, dtype), tup_block in grouper: + block_type = get_block_type(dtype) + + if isinstance(dtype, np.dtype): + is_dtlike = dtype.kind in ["m", "M"] + + if issubclass(dtype.type, (str, bytes)): + dtype = np.dtype(object) + + values, placement = _stack_arrays(list(tup_block), dtype) + if is_dtlike: + values = ensure_wrapped_if_datetimelike(values) + blk = block_type(values, placement=BlockPlacement(placement), ndim=2) + nbs.append(blk) + + elif is_1d_only_ea_dtype(dtype): + dtype_blocks = [ + block_type(x[1], placement=BlockPlacement(x[0]), ndim=2) + for x in tup_block + ] + nbs.extend(dtype_blocks) + + else: + dtype_blocks = [ + block_type( + ensure_block_shape(x[1], 2), placement=BlockPlacement(x[0]), ndim=2 + ) + for x in tup_block + ] + nbs.extend(dtype_blocks) + return nbs + + +def _tuples_to_blocks_no_consolidate(tuples) -> list[Block]: + # tuples produced within _form_blocks are of the form (placement, array) + return [ + new_block_2d(ensure_block_shape(x[1], ndim=2), placement=BlockPlacement(x[0])) + for x in tuples + ] + + +def _stack_arrays(tuples, dtype: np.dtype): + + placement, arrays = zip(*tuples) + + first = arrays[0] + shape = (len(arrays),) + first.shape + + stacked = np.empty(shape, dtype=dtype) + for i, arr in enumerate(arrays): + stacked[i] = arr + + return stacked, placement + + +def _consolidate(blocks: tuple[Block, ...]) -> list[Block]: + """ + Merge blocks having same dtype, exclude non-consolidating blocks + """ + # sort by _can_consolidate, dtype + gkey = lambda x: x._consolidate_key + grouper = itertools.groupby(sorted(blocks, key=gkey), gkey) + + new_blocks: list[Block] = [] + for (_can_consolidate, dtype), group_blocks in grouper: + merged_blocks = _merge_blocks( + list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate + ) + new_blocks = extend_blocks(merged_blocks, new_blocks) + return new_blocks + + +def _merge_blocks( + blocks: list[Block], dtype: DtypeObj, can_consolidate: bool +) -> list[Block]: + + if len(blocks) == 1: + return blocks + + if can_consolidate: + + # TODO: optimization potential in case all mgrs contain slices and + # combination of those slices is a slice, too. + new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks]) + + new_values: ArrayLike + + if isinstance(blocks[0].dtype, np.dtype): + # error: List comprehension has incompatible type List[Union[ndarray, + # ExtensionArray]]; expected List[Union[complex, generic, + # Sequence[Union[int, float, complex, str, bytes, generic]], + # Sequence[Sequence[Any]], SupportsArray]] + new_values = np.vstack([b.values for b in blocks]) # type: ignore[misc] + else: + bvals = [blk.values for blk in blocks] + bvals2 = cast(Sequence[NDArrayBackedExtensionArray], bvals) + new_values = bvals2[0]._concat_same_type(bvals2, axis=0) + + argsort = np.argsort(new_mgr_locs) + new_values = new_values[argsort] + new_mgr_locs = new_mgr_locs[argsort] + + bp = BlockPlacement(new_mgr_locs) + return [new_block_2d(new_values, placement=bp)] + + # can't consolidate --> no merge + return blocks + + +def _fast_count_smallints(arr: npt.NDArray[np.intp]): + """Faster version of set(arr) for sequences of small numbers.""" + counts = np.bincount(arr) + nz = counts.nonzero()[0] + # Note: list(zip(...) outperforms list(np.c_[nz, counts[nz]]) here, + # in one benchmark by a factor of 11 + return zip(nz, counts[nz]) + + +def _preprocess_slice_or_indexer( + slice_or_indexer: slice | np.ndarray, length: int, allow_fill: bool +): + if isinstance(slice_or_indexer, slice): + return ( + "slice", + slice_or_indexer, + libinternals.slice_len(slice_or_indexer, length), + ) + else: + if ( + not isinstance(slice_or_indexer, np.ndarray) + or slice_or_indexer.dtype.kind != "i" + ): + dtype = getattr(slice_or_indexer, "dtype", None) + raise TypeError(type(slice_or_indexer), dtype) + + indexer = ensure_platform_int(slice_or_indexer) + if not allow_fill: + indexer = maybe_convert_indices(indexer, length) + return "fancy", indexer, len(indexer) diff --git a/.local/lib/python3.8/site-packages/pandas/core/internals/ops.py b/.local/lib/python3.8/site-packages/pandas/core/internals/ops.py new file mode 100644 index 0000000..1160d3b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/internals/ops.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Iterator, + NamedTuple, +) + +from pandas._typing import ArrayLike + +if TYPE_CHECKING: + from pandas._libs.internals import BlockPlacement + + from pandas.core.internals.blocks import Block + from pandas.core.internals.managers import BlockManager + + +class BlockPairInfo(NamedTuple): + lvals: ArrayLike + rvals: ArrayLike + locs: BlockPlacement + left_ea: bool + right_ea: bool + rblk: Block + + +def _iter_block_pairs( + left: BlockManager, right: BlockManager +) -> Iterator[BlockPairInfo]: + # At this point we have already checked the parent DataFrames for + # assert rframe._indexed_same(lframe) + + for blk in left.blocks: + locs = blk.mgr_locs + blk_vals = blk.values + + left_ea = blk_vals.ndim == 1 + + rblks = right._slice_take_blocks_ax0(locs.indexer, only_slice=True) + + # Assertions are disabled for performance, but should hold: + # if left_ea: + # assert len(locs) == 1, locs + # assert len(rblks) == 1, rblks + # assert rblks[0].shape[0] == 1, rblks[0].shape + + for rblk in rblks: + right_ea = rblk.values.ndim == 1 + + lvals, rvals = _get_same_shape_values(blk, rblk, left_ea, right_ea) + info = BlockPairInfo(lvals, rvals, locs, left_ea, right_ea, rblk) + yield info + + +def operate_blockwise( + left: BlockManager, right: BlockManager, array_op +) -> BlockManager: + # At this point we have already checked the parent DataFrames for + # assert rframe._indexed_same(lframe) + + res_blks: list[Block] = [] + for lvals, rvals, locs, left_ea, right_ea, rblk in _iter_block_pairs(left, right): + res_values = array_op(lvals, rvals) + if left_ea and not right_ea and hasattr(res_values, "reshape"): + res_values = res_values.reshape(1, -1) + nbs = rblk._split_op_result(res_values) + + # Assertions are disabled for performance, but should hold: + # if right_ea or left_ea: + # assert len(nbs) == 1 + # else: + # assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape) + + _reset_block_mgr_locs(nbs, locs) + + res_blks.extend(nbs) + + # Assertions are disabled for performance, but should hold: + # slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array} + # nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks) + # assert nlocs == len(left.items), (nlocs, len(left.items)) + # assert len(slocs) == nlocs, (len(slocs), nlocs) + # assert slocs == set(range(nlocs)), slocs + + new_mgr = type(right)(tuple(res_blks), axes=right.axes, verify_integrity=False) + return new_mgr + + +def _reset_block_mgr_locs(nbs: list[Block], locs): + """ + Reset mgr_locs to correspond to our original DataFrame. + """ + for nb in nbs: + nblocs = locs[nb.mgr_locs.indexer] + nb.mgr_locs = nblocs + # Assertions are disabled for performance, but should hold: + # assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape) + # assert all(x in locs.as_array for x in nb.mgr_locs.as_array) + + +def _get_same_shape_values( + lblk: Block, rblk: Block, left_ea: bool, right_ea: bool +) -> tuple[ArrayLike, ArrayLike]: + """ + Slice lblk.values to align with rblk. Squeeze if we have EAs. + """ + lvals = lblk.values + rvals = rblk.values + + # Require that the indexing into lvals be slice-like + assert rblk.mgr_locs.is_slice_like, rblk.mgr_locs + + # TODO(EA2D): with 2D EAs only this first clause would be needed + if not (left_ea or right_ea): + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[Union[ndarray, slice], slice]" + lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[call-overload] + assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape) + elif left_ea and right_ea: + assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape) + elif right_ea: + # lvals are 2D, rvals are 1D + + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[Union[ndarray, slice], slice]" + lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[call-overload] + assert lvals.shape[0] == 1, lvals.shape + lvals = lvals[0, :] + else: + # lvals are 1D, rvals are 2D + assert rvals.shape[0] == 1, rvals.shape + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[int, slice]" + rvals = rvals[0, :] # type: ignore[call-overload] + + return lvals, rvals + + +def blockwise_all(left: BlockManager, right: BlockManager, op) -> bool: + """ + Blockwise `all` reduction. + """ + for info in _iter_block_pairs(left, right): + res = op(info.lvals, info.rvals) + if not res: + return False + return True diff --git a/.local/lib/python3.8/site-packages/pandas/core/missing.py b/.local/lib/python3.8/site-packages/pandas/core/missing.py new file mode 100644 index 0000000..e09701e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/missing.py @@ -0,0 +1,994 @@ +""" +Routines for filling missing data. +""" +from __future__ import annotations + +from functools import ( + partial, + wraps, +) +from typing import ( + TYPE_CHECKING, + Any, + cast, +) + +import numpy as np + +from pandas._libs import ( + algos, + lib, +) +from pandas._typing import ( + ArrayLike, + Axis, + F, + npt, +) +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.cast import infer_dtype_from +from pandas.core.dtypes.common import ( + is_array_like, + is_numeric_v_string_like, + needs_i8_conversion, +) +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + na_value_for_dtype, +) + +if TYPE_CHECKING: + from pandas import Index + + +def check_value_size(value, mask: np.ndarray, length: int): + """ + Validate the size of the values passed to ExtensionArray.fillna. + """ + if is_array_like(value): + if len(value) != length: + raise ValueError( + f"Length of 'value' does not match. Got ({len(value)}) " + f" expected {length}" + ) + value = value[mask] + + return value + + +def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: + """ + Return a masking array of same size/shape as arr + with entries equaling any member of values_to_mask set to True + + Parameters + ---------- + arr : ArrayLike + values_to_mask: list, tuple, or scalar + + Returns + ------- + np.ndarray[bool] + """ + # When called from Block.replace/replace_list, values_to_mask is a scalar + # known to be holdable by arr. + # When called from Series._single_replace, values_to_mask is tuple or list + dtype, values_to_mask = infer_dtype_from(values_to_mask) + # error: Argument "dtype" to "array" has incompatible type "Union[dtype[Any], + # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, + # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], + # _DTypeDict, Tuple[Any, Any]]]" + values_to_mask = np.array(values_to_mask, dtype=dtype) # type: ignore[arg-type] + + na_mask = isna(values_to_mask) + nonna = values_to_mask[~na_mask] + + # GH 21977 + mask = np.zeros(arr.shape, dtype=bool) + for x in nonna: + if is_numeric_v_string_like(arr, x): + # GH#29553 prevent numpy deprecation warnings + pass + else: + new_mask = arr == x + if not isinstance(new_mask, np.ndarray): + # usually BooleanArray + new_mask = new_mask.to_numpy(dtype=bool, na_value=False) + mask |= new_mask + + if na_mask.any(): + mask |= isna(arr) + + return mask + + +def clean_fill_method(method, allow_nearest: bool = False): + # asfreq is compat for resampling + if method in [None, "asfreq"]: + return None + + if isinstance(method, str): + method = method.lower() + if method == "ffill": + method = "pad" + elif method == "bfill": + method = "backfill" + + valid_methods = ["pad", "backfill"] + expecting = "pad (ffill) or backfill (bfill)" + if allow_nearest: + valid_methods.append("nearest") + expecting = "pad (ffill), backfill (bfill) or nearest" + if method not in valid_methods: + raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}") + return method + + +# interpolation methods that dispatch to np.interp + +NP_METHODS = ["linear", "time", "index", "values"] + +# interpolation methods that dispatch to _interpolate_scipy_wrapper + +SP_METHODS = [ + "nearest", + "zero", + "slinear", + "quadratic", + "cubic", + "barycentric", + "krogh", + "spline", + "polynomial", + "from_derivatives", + "piecewise_polynomial", + "pchip", + "akima", + "cubicspline", +] + + +def clean_interp_method(method: str, index: Index, **kwargs) -> str: + order = kwargs.get("order") + + if method in ("spline", "polynomial") and order is None: + raise ValueError("You must specify the order of the spline or polynomial.") + + valid = NP_METHODS + SP_METHODS + if method not in valid: + raise ValueError(f"method must be one of {valid}. Got '{method}' instead.") + + if method in ("krogh", "piecewise_polynomial", "pchip"): + if not index.is_monotonic: + raise ValueError( + f"{method} interpolation requires that the index be monotonic." + ) + + return method + + +def find_valid_index(values, *, how: str) -> int | None: + """ + Retrieves the index of the first valid value. + + Parameters + ---------- + values : ndarray or ExtensionArray + how : {'first', 'last'} + Use this parameter to change between the first or last valid index. + + Returns + ------- + int or None + """ + assert how in ["first", "last"] + + if len(values) == 0: # early stop + return None + + is_valid = ~isna(values) + + if values.ndim == 2: + is_valid = is_valid.any(1) # reduce axis 1 + + if how == "first": + idxpos = is_valid[::].argmax() + + elif how == "last": + idxpos = len(values) - 1 - is_valid[::-1].argmax() + + chk_notna = is_valid[idxpos] + + if not chk_notna: + return None + return idxpos + + +def interpolate_array_2d( + data: np.ndarray, + method: str = "pad", + axis: int = 0, + index: Index | None = None, + limit: int | None = None, + limit_direction: str = "forward", + limit_area: str | None = None, + fill_value: Any | None = None, + coerce: bool = False, + downcast: str | None = None, + **kwargs, +) -> None: + """ + Wrapper to dispatch to either interpolate_2d or _interpolate_2d_with_fill. + + Notes + ----- + Alters 'data' in-place. + """ + try: + m = clean_fill_method(method) + except ValueError: + m = None + + if m is not None: + if fill_value is not None: + # similar to validate_fillna_kwargs + raise ValueError("Cannot pass both fill_value and method") + + interpolate_2d( + data, + method=m, + axis=axis, + limit=limit, + limit_area=limit_area, + ) + else: + assert index is not None # for mypy + + _interpolate_2d_with_fill( + data=data, + index=index, + axis=axis, + method=method, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + fill_value=fill_value, + **kwargs, + ) + return + + +def _interpolate_2d_with_fill( + data: np.ndarray, # floating dtype + index: Index, + axis: int, + method: str = "linear", + limit: int | None = None, + limit_direction: str = "forward", + limit_area: str | None = None, + fill_value: Any | None = None, + **kwargs, +) -> None: + """ + Column-wise application of _interpolate_1d. + + Notes + ----- + Alters 'data' in-place. + + The signature does differ from _interpolate_1d because it only + includes what is needed for Block.interpolate. + """ + # validate the interp method + clean_interp_method(method, index, **kwargs) + + if is_valid_na_for_dtype(fill_value, data.dtype): + fill_value = na_value_for_dtype(data.dtype, compat=False) + + if method == "time": + if not needs_i8_conversion(index.dtype): + raise ValueError( + "time-weighted interpolation only works " + "on Series or DataFrames with a " + "DatetimeIndex" + ) + method = "values" + + valid_limit_directions = ["forward", "backward", "both"] + limit_direction = limit_direction.lower() + if limit_direction not in valid_limit_directions: + raise ValueError( + "Invalid limit_direction: expecting one of " + f"{valid_limit_directions}, got '{limit_direction}'." + ) + + if limit_area is not None: + valid_limit_areas = ["inside", "outside"] + limit_area = limit_area.lower() + if limit_area not in valid_limit_areas: + raise ValueError( + f"Invalid limit_area: expecting one of {valid_limit_areas}, got " + f"{limit_area}." + ) + + # default limit is unlimited GH #16282 + limit = algos.validate_limit(nobs=None, limit=limit) + + indices = _index_to_interp_indices(index, method) + + def func(yvalues: np.ndarray) -> None: + # process 1-d slices in the axis direction + + _interpolate_1d( + indices=indices, + yvalues=yvalues, + method=method, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + fill_value=fill_value, + bounds_error=False, + **kwargs, + ) + + # Argument 1 to "apply_along_axis" has incompatible type + # "Callable[[ndarray[Any, Any]], None]"; expected + # "Callable[..., Union[_SupportsArray[dtype[]], + # Sequence[_SupportsArray[dtype[ + # ]]], Sequence[Sequence[_SupportsArray[dtype[]]]], + # Sequence[Sequence[Sequence[_SupportsArray[dtype[]]]]], + # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[]]]]]]]]" + # interp each column independently + np.apply_along_axis(func, axis, data) # type: ignore[arg-type] + return + + +def _index_to_interp_indices(index: Index, method: str) -> np.ndarray: + """ + Convert Index to ndarray of indices to pass to NumPy/SciPy. + """ + xarr = index._values + if needs_i8_conversion(xarr.dtype): + # GH#1646 for dt64tz + xarr = xarr.view("i8") + + if method == "linear": + inds = xarr + inds = cast(np.ndarray, inds) + else: + inds = np.asarray(xarr) + + if method in ("values", "index"): + if inds.dtype == np.object_: + inds = lib.maybe_convert_objects(inds) + + return inds + + +def _interpolate_1d( + indices: np.ndarray, + yvalues: np.ndarray, + method: str | None = "linear", + limit: int | None = None, + limit_direction: str = "forward", + limit_area: str | None = None, + fill_value: Any | None = None, + bounds_error: bool = False, + order: int | None = None, + **kwargs, +): + """ + Logic for the 1-d interpolation. The input + indices and yvalues will each be 1-d arrays of the same length. + + Bounds_error is currently hardcoded to False since non-scipy ones don't + take it as an argument. + + Notes + ----- + Fills 'yvalues' in-place. + """ + + invalid = isna(yvalues) + valid = ~invalid + + if not valid.any(): + return + + if valid.all(): + return + + # These are sets of index pointers to invalid values... i.e. {0, 1, etc... + all_nans = set(np.flatnonzero(invalid)) + + first_valid_index = find_valid_index(yvalues, how="first") + if first_valid_index is None: # no nan found in start + first_valid_index = 0 + start_nans = set(range(first_valid_index)) + + last_valid_index = find_valid_index(yvalues, how="last") + if last_valid_index is None: # no nan found in end + last_valid_index = len(yvalues) + end_nans = set(range(1 + last_valid_index, len(valid))) + + # Like the sets above, preserve_nans contains indices of invalid values, + # but in this case, it is the final set of indices that need to be + # preserved as NaN after the interpolation. + + # For example if limit_direction='forward' then preserve_nans will + # contain indices of NaNs at the beginning of the series, and NaNs that + # are more than'limit' away from the prior non-NaN. + + # set preserve_nans based on direction using _interp_limit + preserve_nans: list | set + if limit_direction == "forward": + preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) + elif limit_direction == "backward": + preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) + else: + # both directions... just use _interp_limit + preserve_nans = set(_interp_limit(invalid, limit, limit)) + + # if limit_area is set, add either mid or outside indices + # to preserve_nans GH #16284 + if limit_area == "inside": + # preserve NaNs on the outside + preserve_nans |= start_nans | end_nans + elif limit_area == "outside": + # preserve NaNs on the inside + mid_nans = all_nans - start_nans - end_nans + preserve_nans |= mid_nans + + # sort preserve_nans and convert to list + preserve_nans = sorted(preserve_nans) + + if method in NP_METHODS: + # np.interp requires sorted X values, #21037 + + indexer = np.argsort(indices[valid]) + yvalues[invalid] = np.interp( + indices[invalid], indices[valid][indexer], yvalues[valid][indexer] + ) + else: + yvalues[invalid] = _interpolate_scipy_wrapper( + indices[valid], + yvalues[valid], + indices[invalid], + method=method, + fill_value=fill_value, + bounds_error=bounds_error, + order=order, + **kwargs, + ) + + yvalues[preserve_nans] = np.nan + return + + +def _interpolate_scipy_wrapper( + x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs +): + """ + Passed off to scipy.interpolate.interp1d. method is scipy's kind. + Returns an array interpolated at new_x. Add any new methods to + the list in _clean_interp_method. + """ + extra = f"{method} interpolation requires SciPy." + import_optional_dependency("scipy", extra=extra) + from scipy import interpolate + + new_x = np.asarray(new_x) + + # ignores some kwargs that could be passed along. + alt_methods = { + "barycentric": interpolate.barycentric_interpolate, + "krogh": interpolate.krogh_interpolate, + "from_derivatives": _from_derivatives, + "piecewise_polynomial": _from_derivatives, + } + + if getattr(x, "_is_all_dates", False): + # GH 5975, scipy.interp1d can't handle datetime64s + x, new_x = x._values.astype("i8"), new_x.astype("i8") + + if method == "pchip": + alt_methods["pchip"] = interpolate.pchip_interpolate + elif method == "akima": + alt_methods["akima"] = _akima_interpolate + elif method == "cubicspline": + alt_methods["cubicspline"] = _cubicspline_interpolate + + interp1d_methods = [ + "nearest", + "zero", + "slinear", + "quadratic", + "cubic", + "polynomial", + ] + if method in interp1d_methods: + if method == "polynomial": + method = order + terp = interpolate.interp1d( + x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error + ) + new_y = terp(new_x) + elif method == "spline": + # GH #10633, #24014 + if isna(order) or (order <= 0): + raise ValueError( + f"order needs to be specified and greater than 0; got order: {order}" + ) + terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs) + new_y = terp(new_x) + else: + # GH 7295: need to be able to write for some reason + # in some circumstances: check all three + if not x.flags.writeable: + x = x.copy() + if not y.flags.writeable: + y = y.copy() + if not new_x.flags.writeable: + new_x = new_x.copy() + method = alt_methods[method] + new_y = method(x, y, new_x, **kwargs) + return new_y + + +def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): + """ + Convenience function for interpolate.BPoly.from_derivatives. + + Construct a piecewise polynomial in the Bernstein basis, compatible + with the specified values and derivatives at breakpoints. + + Parameters + ---------- + xi : array-like + sorted 1D array of x-coordinates + yi : array-like or list of array-likes + yi[i][j] is the j-th derivative known at xi[i] + order: None or int or array-like of ints. Default: None. + Specifies the degree of local polynomials. If not None, some + derivatives are ignored. + der : int or list + How many derivatives to extract; None for all potentially nonzero + derivatives (that is a number equal to the number of points), or a + list of derivatives to extract. This number includes the function + value as 0th derivative. + extrapolate : bool, optional + Whether to extrapolate to ouf-of-bounds points based on first and last + intervals, or to return NaNs. Default: True. + + See Also + -------- + scipy.interpolate.BPoly.from_derivatives + + Returns + ------- + y : scalar or array-like + The result, of length R or length M or M by R. + """ + from scipy import interpolate + + # return the method for compat with scipy version & backwards compat + method = interpolate.BPoly.from_derivatives + m = method(xi, yi.reshape(-1, 1), orders=order, extrapolate=extrapolate) + + return m(x) + + +def _akima_interpolate(xi, yi, x, der=0, axis=0): + """ + Convenience function for akima interpolation. + xi and yi are arrays of values used to approximate some function f, + with ``yi = f(xi)``. + + See `Akima1DInterpolator` for details. + + Parameters + ---------- + xi : array-like + A sorted list of x-coordinates, of length N. + yi : array-like + A 1-D array of real values. `yi`'s length along the interpolation + axis must be equal to the length of `xi`. If N-D array, use axis + parameter to select correct axis. + x : scalar or array-like + Of length M. + der : int, optional + How many derivatives to extract; None for all potentially + nonzero derivatives (that is a number equal to the number + of points), or a list of derivatives to extract. This number + includes the function value as 0th derivative. + axis : int, optional + Axis in the yi array corresponding to the x-coordinate values. + + See Also + -------- + scipy.interpolate.Akima1DInterpolator + + Returns + ------- + y : scalar or array-like + The result, of length R or length M or M by R, + + """ + from scipy import interpolate + + P = interpolate.Akima1DInterpolator(xi, yi, axis=axis) + + return P(x, nu=der) + + +def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolate=None): + """ + Convenience function for cubic spline data interpolator. + + See `scipy.interpolate.CubicSpline` for details. + + Parameters + ---------- + xi : array-like, shape (n,) + 1-d array containing values of the independent variable. + Values must be real, finite and in strictly increasing order. + yi : array-like + Array containing values of the dependent variable. It can have + arbitrary number of dimensions, but the length along ``axis`` + (see below) must match the length of ``x``. Values must be finite. + x : scalar or array-like, shape (m,) + axis : int, optional + Axis along which `y` is assumed to be varying. Meaning that for + ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``. + Default is 0. + bc_type : string or 2-tuple, optional + Boundary condition type. Two additional equations, given by the + boundary conditions, are required to determine all coefficients of + polynomials on each segment [2]_. + If `bc_type` is a string, then the specified condition will be applied + at both ends of a spline. Available conditions are: + * 'not-a-knot' (default): The first and second segment at a curve end + are the same polynomial. It is a good default when there is no + information on boundary conditions. + * 'periodic': The interpolated functions is assumed to be periodic + of period ``x[-1] - x[0]``. The first and last value of `y` must be + identical: ``y[0] == y[-1]``. This boundary condition will result in + ``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``. + * 'clamped': The first derivative at curves ends are zero. Assuming + a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition. + * 'natural': The second derivative at curve ends are zero. Assuming + a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition. + If `bc_type` is a 2-tuple, the first and the second value will be + applied at the curve start and end respectively. The tuple values can + be one of the previously mentioned strings (except 'periodic') or a + tuple `(order, deriv_values)` allowing to specify arbitrary + derivatives at curve ends: + * `order`: the derivative order, 1 or 2. + * `deriv_value`: array-like containing derivative values, shape must + be the same as `y`, excluding ``axis`` dimension. For example, if + `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with + the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D + and have the shape (n0, n1). + extrapolate : {bool, 'periodic', None}, optional + If bool, determines whether to extrapolate to out-of-bounds points + based on first and last intervals, or to return NaNs. If 'periodic', + periodic extrapolation is used. If None (default), ``extrapolate`` is + set to 'periodic' for ``bc_type='periodic'`` and to True otherwise. + + See Also + -------- + scipy.interpolate.CubicHermiteSpline + + Returns + ------- + y : scalar or array-like + The result, of shape (m,) + + References + ---------- + .. [1] `Cubic Spline Interpolation + `_ + on Wikiversity. + .. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978. + """ + from scipy import interpolate + + P = interpolate.CubicSpline( + xi, yi, axis=axis, bc_type=bc_type, extrapolate=extrapolate + ) + + return P(x) + + +def _interpolate_with_limit_area( + values: np.ndarray, method: str, limit: int | None, limit_area: str | None +) -> None: + """ + Apply interpolation and limit_area logic to values along a to-be-specified axis. + + Parameters + ---------- + values: np.ndarray + Input array. + method: str + Interpolation method. Could be "bfill" or "pad" + limit: int, optional + Index limit on interpolation. + limit_area: str + Limit area for interpolation. Can be "inside" or "outside" + + Notes + ----- + Modifies values in-place. + """ + + invalid = isna(values) + + if not invalid.all(): + first = find_valid_index(values, how="first") + if first is None: + first = 0 + last = find_valid_index(values, how="last") + if last is None: + last = len(values) + + interpolate_2d( + values, + method=method, + limit=limit, + ) + + if limit_area == "inside": + invalid[first : last + 1] = False + elif limit_area == "outside": + invalid[:first] = invalid[last + 1 :] = False + + values[invalid] = np.nan + + return + + +def interpolate_2d( + values: np.ndarray, + method: str = "pad", + axis: Axis = 0, + limit: int | None = None, + limit_area: str | None = None, +) -> None: + """ + Perform an actual interpolation of values, values will be make 2-d if + needed fills inplace, returns the result. + + Parameters + ---------- + values: np.ndarray + Input array. + method: str, default "pad" + Interpolation method. Could be "bfill" or "pad" + axis: 0 or 1 + Interpolation axis + limit: int, optional + Index limit on interpolation. + limit_area: str, optional + Limit area for interpolation. Can be "inside" or "outside" + + Notes + ----- + Modifies values in-place. + """ + if limit_area is not None: + # Argument 1 to "apply_along_axis" has incompatible type "partial[None]"; + # expected "Callable[..., Union[_SupportsArray[dtype[]], + # Sequence[_SupportsArray[dtype[]]], Sequence[Sequence + # [_SupportsArray[dtype[]]]], + # Sequence[Sequence[Sequence[_SupportsArray[dtype[]]]]], + # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[]]]]]]]]" + + # Argument 2 to "apply_along_axis" has incompatible type "Union[str, int]"; + # expected "SupportsIndex" [arg-type] + np.apply_along_axis( + partial( + _interpolate_with_limit_area, + method=method, + limit=limit, + limit_area=limit_area, + ), # type: ignore[arg-type] + axis, # type: ignore[arg-type] + values, + ) + return + + transf = (lambda x: x) if axis == 0 else (lambda x: x.T) + + # reshape a 1 dim if needed + if values.ndim == 1: + if axis != 0: # pragma: no cover + raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") + values = values.reshape(tuple((1,) + values.shape)) + + method = clean_fill_method(method) + tvalues = transf(values) + + # _pad_2d and _backfill_2d both modify tvalues inplace + if method == "pad": + _pad_2d(tvalues, limit=limit) + else: + _backfill_2d(tvalues, limit=limit) + + return + + +def _fillna_prep( + values, mask: npt.NDArray[np.bool_] | None = None +) -> npt.NDArray[np.bool_]: + # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d + + if mask is None: + mask = isna(values) + + mask = mask.view(np.uint8) + return mask + + +def _datetimelike_compat(func: F) -> F: + """ + Wrapper to handle datetime64 and timedelta64 dtypes. + """ + + @wraps(func) + def new_func(values, limit=None, mask=None): + if needs_i8_conversion(values.dtype): + if mask is None: + # This needs to occur before casting to int64 + mask = isna(values) + + result, mask = func(values.view("i8"), limit=limit, mask=mask) + return result.view(values.dtype), mask + + return func(values, limit=limit, mask=mask) + + return cast(F, new_func) + + +@_datetimelike_compat +def _pad_1d( + values: np.ndarray, + limit: int | None = None, + mask: npt.NDArray[np.bool_] | None = None, +) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: + mask = _fillna_prep(values, mask) + algos.pad_inplace(values, mask, limit=limit) + return values, mask + + +@_datetimelike_compat +def _backfill_1d( + values: np.ndarray, + limit: int | None = None, + mask: npt.NDArray[np.bool_] | None = None, +) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: + mask = _fillna_prep(values, mask) + algos.backfill_inplace(values, mask, limit=limit) + return values, mask + + +@_datetimelike_compat +def _pad_2d(values: np.ndarray, limit=None, mask: npt.NDArray[np.bool_] | None = None): + mask = _fillna_prep(values, mask) + + if np.all(values.shape): + algos.pad_2d_inplace(values, mask, limit=limit) + else: + # for test coverage + pass + return values, mask + + +@_datetimelike_compat +def _backfill_2d(values, limit=None, mask: npt.NDArray[np.bool_] | None = None): + mask = _fillna_prep(values, mask) + + if np.all(values.shape): + algos.backfill_2d_inplace(values, mask, limit=limit) + else: + # for test coverage + pass + return values, mask + + +_fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d} + + +def get_fill_func(method, ndim: int = 1): + method = clean_fill_method(method) + if ndim == 1: + return _fill_methods[method] + return {"pad": _pad_2d, "backfill": _backfill_2d}[method] + + +def clean_reindex_fill_method(method): + return clean_fill_method(method, allow_nearest=True) + + +def _interp_limit(invalid: npt.NDArray[np.bool_], fw_limit, bw_limit): + """ + Get indexers of values that won't be filled + because they exceed the limits. + + Parameters + ---------- + invalid : np.ndarray[bool] + fw_limit : int or None + forward limit to index + bw_limit : int or None + backward limit to index + + Returns + ------- + set of indexers + + Notes + ----- + This is equivalent to the more readable, but slower + + .. code-block:: python + + def _interp_limit(invalid, fw_limit, bw_limit): + for x in np.where(invalid)[0]: + if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): + yield x + """ + # handle forward first; the backward direction is the same except + # 1. operate on the reversed array + # 2. subtract the returned indices from N - 1 + N = len(invalid) + f_idx = set() + b_idx = set() + + def inner(invalid, limit): + limit = min(limit, N) + windowed = _rolling_window(invalid, limit + 1).all(1) + idx = set(np.where(windowed)[0] + limit) | set( + np.where((~invalid[: limit + 1]).cumsum() == 0)[0] + ) + return idx + + if fw_limit is not None: + + if fw_limit == 0: + f_idx = set(np.where(invalid)[0]) + else: + f_idx = inner(invalid, fw_limit) + + if bw_limit is not None: + + if bw_limit == 0: + # then we don't even need to care about backwards + # just use forwards + return f_idx + else: + b_idx_inv = list(inner(invalid[::-1], bw_limit)) + b_idx = set(N - 1 - np.asarray(b_idx_inv)) + if fw_limit == 0: + return b_idx + + return f_idx & b_idx + + +def _rolling_window(a: npt.NDArray[np.bool_], window: int) -> npt.NDArray[np.bool_]: + """ + [True, True, False, True, False], 2 -> + + [ + [True, True], + [True, False], + [False, True], + [True, False], + ] + """ + # https://stackoverflow.com/a/6811241 + shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) + strides = a.strides + (a.strides[-1],) + return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) diff --git a/.local/lib/python3.8/site-packages/pandas/core/nanops.py b/.local/lib/python3.8/site-packages/pandas/core/nanops.py new file mode 100644 index 0000000..40664f1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/nanops.py @@ -0,0 +1,1739 @@ +from __future__ import annotations + +import functools +import itertools +import operator +from typing import ( + Any, + cast, +) +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import ( + NaT, + NaTType, + Timedelta, + iNaT, + lib, +) +from pandas._typing import ( + ArrayLike, + Dtype, + DtypeObj, + F, + Scalar, + Shape, + npt, +) +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.common import ( + is_any_int_dtype, + is_bool_dtype, + is_complex, + is_datetime64_any_dtype, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_timedelta64_dtype, + needs_i8_conversion, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import PeriodDtype +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, + notna, +) + +from pandas.core.construction import extract_array + +bn = import_optional_dependency("bottleneck", errors="warn") +_BOTTLENECK_INSTALLED = bn is not None +_USE_BOTTLENECK = False + + +def set_use_bottleneck(v: bool = True) -> None: + # set/unset to use bottleneck + global _USE_BOTTLENECK + if _BOTTLENECK_INSTALLED: + _USE_BOTTLENECK = v + + +set_use_bottleneck(get_option("compute.use_bottleneck")) + + +class disallow: + def __init__(self, *dtypes: Dtype): + super().__init__() + self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes) + + def check(self, obj) -> bool: + return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes) + + def __call__(self, f: F) -> F: + @functools.wraps(f) + def _f(*args, **kwargs): + obj_iter = itertools.chain(args, kwargs.values()) + if any(self.check(obj) for obj in obj_iter): + f_name = f.__name__.replace("nan", "") + raise TypeError( + f"reduction operation '{f_name}' not allowed for this dtype" + ) + try: + with np.errstate(invalid="ignore"): + return f(*args, **kwargs) + except ValueError as e: + # we want to transform an object array + # ValueError message to the more typical TypeError + # e.g. this is normally a disallowed function on + # object arrays that contain strings + if is_object_dtype(args[0]): + raise TypeError(e) from e + raise + + return cast(F, _f) + + +class bottleneck_switch: + def __init__(self, name=None, **kwargs): + self.name = name + self.kwargs = kwargs + + def __call__(self, alt: F) -> F: + bn_name = self.name or alt.__name__ + + try: + bn_func = getattr(bn, bn_name) + except (AttributeError, NameError): # pragma: no cover + bn_func = None + + @functools.wraps(alt) + def f( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + **kwds, + ): + if len(self.kwargs) > 0: + for k, v in self.kwargs.items(): + if k not in kwds: + kwds[k] = v + + if values.size == 0 and kwds.get("min_count") is None: + # We are empty, returning NA for our type + # Only applies for the default `min_count` of None + # since that affects how empty arrays are handled. + # TODO(GH-18976) update all the nanops methods to + # correctly handle empty inputs and remove this check. + # It *may* just be `var` + return _na_for_min_count(values, axis) + + if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name): + if kwds.get("mask", None) is None: + # `mask` is not recognised by bottleneck, would raise + # TypeError if called + kwds.pop("mask", None) + result = bn_func(values, axis=axis, **kwds) + + # prefer to treat inf/-inf as NA, but must compute the func + # twice :( + if _has_infs(result): + result = alt(values, axis=axis, skipna=skipna, **kwds) + else: + result = alt(values, axis=axis, skipna=skipna, **kwds) + else: + result = alt(values, axis=axis, skipna=skipna, **kwds) + + return result + + return cast(F, f) + + +def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool: + # Bottleneck chokes on datetime64, PeriodDtype (or and EA) + if not is_object_dtype(dtype) and not needs_i8_conversion(dtype): + + # GH 15507 + # bottleneck does not properly upcast during the sum + # so can overflow + + # GH 9422 + # further we also want to preserve NaN when all elements + # are NaN, unlike bottleneck/numpy which consider this + # to be 0 + return name not in ["nansum", "nanprod"] + return False + + +def _has_infs(result) -> bool: + if isinstance(result, np.ndarray): + if result.dtype == "f8" or result.dtype == "f4": + # Note: outside of an nanops-specific test, we always have + # result.ndim == 1, so there is no risk of this ravel making a copy. + return lib.has_infs(result.ravel("K")) + try: + return np.isinf(result).any() + except (TypeError, NotImplementedError): + # if it doesn't support infs, then it can't have infs + return False + + +def _get_fill_value( + dtype: DtypeObj, fill_value: Scalar | None = None, fill_value_typ=None +): + """return the correct fill value for the dtype of the values""" + if fill_value is not None: + return fill_value + if _na_ok_dtype(dtype): + if fill_value_typ is None: + return np.nan + else: + if fill_value_typ == "+inf": + return np.inf + else: + return -np.inf + else: + if fill_value_typ == "+inf": + # need the max int here + return lib.i8max + else: + return iNaT + + +def _maybe_get_mask( + values: np.ndarray, skipna: bool, mask: npt.NDArray[np.bool_] | None +) -> npt.NDArray[np.bool_] | None: + """ + Compute a mask if and only if necessary. + + This function will compute a mask iff it is necessary. Otherwise, + return the provided mask (potentially None) when a mask does not need to be + computed. + + A mask is never necessary if the values array is of boolean or integer + dtypes, as these are incapable of storing NaNs. If passing a NaN-capable + dtype that is interpretable as either boolean or integer data (eg, + timedelta64), a mask must be provided. + + If the skipna parameter is False, a new mask will not be computed. + + The mask is computed using isna() by default. Setting invert=True selects + notna() as the masking function. + + Parameters + ---------- + values : ndarray + input array to potentially compute mask for + skipna : bool + boolean for whether NaNs should be skipped + mask : Optional[ndarray] + nan-mask if known + + Returns + ------- + Optional[np.ndarray[bool]] + """ + if mask is None: + if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype): + # Boolean data cannot contain nulls, so signal via mask being None + return None + + if skipna or needs_i8_conversion(values.dtype): + mask = isna(values) + + return mask + + +def _get_values( + values: np.ndarray, + skipna: bool, + fill_value: Any = None, + fill_value_typ: str | None = None, + mask: npt.NDArray[np.bool_] | None = None, +) -> tuple[np.ndarray, npt.NDArray[np.bool_] | None, np.dtype, np.dtype, Any]: + """ + Utility to get the values view, mask, dtype, dtype_max, and fill_value. + + If both mask and fill_value/fill_value_typ are not None and skipna is True, + the values array will be copied. + + For input arrays of boolean or integer dtypes, copies will only occur if a + precomputed mask, a fill_value/fill_value_typ, and skipna=True are + provided. + + Parameters + ---------- + values : ndarray + input array to potentially compute mask for + skipna : bool + boolean for whether NaNs should be skipped + fill_value : Any + value to fill NaNs with + fill_value_typ : str + Set to '+inf' or '-inf' to handle dtype-specific infinities + mask : Optional[np.ndarray[bool]] + nan-mask if known + + Returns + ------- + values : ndarray + Potential copy of input value array + mask : Optional[ndarray[bool]] + Mask for values, if deemed necessary to compute + dtype : np.dtype + dtype for values + dtype_max : np.dtype + platform independent dtype + fill_value : Any + fill value used + """ + # In _get_values is only called from within nanops, and in all cases + # with scalar fill_value. This guarantee is important for the + # np.where call below + assert is_scalar(fill_value) + # error: Incompatible types in assignment (expression has type "Union[Any, + # Union[ExtensionArray, ndarray]]", variable has type "ndarray") + values = extract_array(values, extract_numpy=True) # type: ignore[assignment] + + mask = _maybe_get_mask(values, skipna, mask) + + dtype = values.dtype + + datetimelike = False + if needs_i8_conversion(values.dtype): + # changing timedelta64/datetime64 to int64 needs to happen after + # finding `mask` above + values = np.asarray(values.view("i8")) + datetimelike = True + + dtype_ok = _na_ok_dtype(dtype) + + # get our fill value (in case we need to provide an alternative + # dtype for it) + fill_value = _get_fill_value( + dtype, fill_value=fill_value, fill_value_typ=fill_value_typ + ) + + if skipna and (mask is not None) and (fill_value is not None): + if mask.any(): + if dtype_ok or datetimelike: + values = values.copy() + np.putmask(values, mask, fill_value) + else: + # np.where will promote if needed + values = np.where(~mask, values, fill_value) + + # return a platform independent precision dtype + dtype_max = dtype + if is_integer_dtype(dtype) or is_bool_dtype(dtype): + dtype_max = np.dtype(np.int64) + elif is_float_dtype(dtype): + dtype_max = np.dtype(np.float64) + + return values, mask, dtype, dtype_max, fill_value + + +def _na_ok_dtype(dtype: DtypeObj) -> bool: + if needs_i8_conversion(dtype): + return False + return not issubclass(dtype.type, np.integer) + + +def _wrap_results(result, dtype: np.dtype, fill_value=None): + """wrap our results if needed""" + if result is NaT: + pass + + elif is_datetime64_any_dtype(dtype): + if fill_value is None: + # GH#24293 + fill_value = iNaT + if not isinstance(result, np.ndarray): + assert not isna(fill_value), "Expected non-null fill_value" + if result == fill_value: + result = np.nan + + if isna(result): + result = np.datetime64("NaT", "ns") + else: + result = np.int64(result).view("datetime64[ns]") + else: + # If we have float dtype, taking a view will give the wrong result + result = result.astype(dtype) + elif is_timedelta64_dtype(dtype): + if not isinstance(result, np.ndarray): + if result == fill_value: + result = np.nan + + # raise if we have a timedelta64[ns] which is too large + if np.fabs(result) > lib.i8max: + raise ValueError("overflow in timedelta operation") + + result = Timedelta(result, unit="ns") + else: + result = result.astype("m8[ns]").view(dtype) + + return result + + +def _datetimelike_compat(func: F) -> F: + """ + If we have datetime64 or timedelta64 values, ensure we have a correct + mask before calling the wrapped function, then cast back afterwards. + """ + + @functools.wraps(func) + def new_func( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, + **kwargs, + ): + orig_values = values + + datetimelike = values.dtype.kind in ["m", "M"] + if datetimelike and mask is None: + mask = isna(values) + + result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs) + + if datetimelike: + result = _wrap_results(result, orig_values.dtype, fill_value=iNaT) + if not skipna: + assert mask is not None # checked above + result = _mask_datetimelike_result(result, axis, mask, orig_values) + + return result + + return cast(F, new_func) + + +def _na_for_min_count(values: np.ndarray, axis: int | None) -> Scalar | np.ndarray: + """ + Return the missing value for `values`. + + Parameters + ---------- + values : ndarray + axis : int or None + axis for the reduction, required if values.ndim > 1. + + Returns + ------- + result : scalar or ndarray + For 1-D values, returns a scalar of the correct missing type. + For 2-D values, returns a 1-D array where each element is missing. + """ + # we either return np.nan or pd.NaT + if is_numeric_dtype(values): + values = values.astype("float64") + fill_value = na_value_for_dtype(values.dtype) + + if values.ndim == 1: + return fill_value + elif axis is None: + return fill_value + else: + result_shape = values.shape[:axis] + values.shape[axis + 1 :] + + return np.full(result_shape, fill_value, dtype=values.dtype) + + +def maybe_operate_rowwise(func: F) -> F: + """ + NumPy operations on C-contiguous ndarrays with axis=1 can be + very slow if axis 1 >> axis 0. + Operate row-by-row and concatenate the results. + """ + + @functools.wraps(func) + def newfunc(values: np.ndarray, *, axis: int | None = None, **kwargs): + if ( + axis == 1 + and values.ndim == 2 + and values.flags["C_CONTIGUOUS"] + # only takes this path for wide arrays (long dataframes), for threshold see + # https://github.com/pandas-dev/pandas/pull/43311#issuecomment-974891737 + and (values.shape[1] / 1000) > values.shape[0] + and values.dtype != object + and values.dtype != bool + ): + arrs = list(values) + if kwargs.get("mask") is not None: + mask = kwargs.pop("mask") + results = [ + func(arrs[i], mask=mask[i], **kwargs) for i in range(len(arrs)) + ] + else: + results = [func(x, **kwargs) for x in arrs] + return np.array(results) + + return func(values, axis=axis, **kwargs) + + return cast(F, newfunc) + + +def nanany( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> bool: + """ + Check if any elements along an axis evaluate to True. + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : bool + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2]) + >>> nanops.nanany(s) + True + + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([np.nan]) + >>> nanops.nanany(s) + False + """ + values, _, _, _, _ = _get_values(values, skipna, fill_value=False, mask=mask) + + # For object type, any won't necessarily return + # boolean values (numpy/numpy#4352) + if is_object_dtype(values): + values = values.astype(bool) + + # error: Incompatible return value type (got "Union[bool_, ndarray]", expected + # "bool") + return values.any(axis) # type: ignore[return-value] + + +def nanall( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> bool: + """ + Check if all elements along an axis evaluate to True. + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : bool + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nanall(s) + True + + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 0]) + >>> nanops.nanall(s) + False + """ + values, _, _, _, _ = _get_values(values, skipna, fill_value=True, mask=mask) + + # For object type, all won't necessarily return + # boolean values (numpy/numpy#4352) + if is_object_dtype(values): + values = values.astype(bool) + + # error: Incompatible return value type (got "Union[bool_, ndarray]", expected + # "bool") + return values.all(axis) # type: ignore[return-value] + + +@disallow("M8") +@_datetimelike_compat +@maybe_operate_rowwise +def nansum( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + min_count: int = 0, + mask: npt.NDArray[np.bool_] | None = None, +) -> float: + """ + Sum the elements along an axis ignoring NaNs + + Parameters + ---------- + values : ndarray[dtype] + axis : int, optional + skipna : bool, default True + min_count: int, default 0 + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : dtype + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nansum(s) + 3.0 + """ + values, mask, dtype, dtype_max, _ = _get_values( + values, skipna, fill_value=0, mask=mask + ) + dtype_sum = dtype_max + if is_float_dtype(dtype): + dtype_sum = dtype + elif is_timedelta64_dtype(dtype): + dtype_sum = np.dtype(np.float64) + + the_sum = values.sum(axis, dtype=dtype_sum) + the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count) + + return the_sum + + +def _mask_datetimelike_result( + result: np.ndarray | np.datetime64 | np.timedelta64, + axis: int | None, + mask: npt.NDArray[np.bool_], + orig_values: np.ndarray, +) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType: + if isinstance(result, np.ndarray): + # we need to apply the mask + result = result.astype("i8").view(orig_values.dtype) + axis_mask = mask.any(axis=axis) + # error: Unsupported target for indexed assignment ("Union[ndarray[Any, Any], + # datetime64, timedelta64]") + result[axis_mask] = iNaT # type: ignore[index] + else: + if mask.any(): + return NaT + return result + + +@disallow(PeriodDtype) +@bottleneck_switch() +@_datetimelike_compat +def nanmean( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> float: + """ + Compute the mean of the element along an axis ignoring NaNs + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nanmean(s) + 1.5 + """ + values, mask, dtype, dtype_max, _ = _get_values( + values, skipna, fill_value=0, mask=mask + ) + dtype_sum = dtype_max + dtype_count = np.dtype(np.float64) + + # not using needs_i8_conversion because that includes period + if dtype.kind in ["m", "M"]: + dtype_sum = np.dtype(np.float64) + elif is_integer_dtype(dtype): + dtype_sum = np.dtype(np.float64) + elif is_float_dtype(dtype): + dtype_sum = dtype + dtype_count = dtype + + count = _get_counts(values.shape, mask, axis, dtype=dtype_count) + the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum)) + + if axis is not None and getattr(the_sum, "ndim", False): + count = cast(np.ndarray, count) + with np.errstate(all="ignore"): + # suppress division by zero warnings + the_mean = the_sum / count + ct_mask = count == 0 + if ct_mask.any(): + the_mean[ct_mask] = np.nan + else: + the_mean = the_sum / count if count > 0 else np.nan + + return the_mean + + +@bottleneck_switch() +def nanmedian(values, *, axis=None, skipna=True, mask=None): + """ + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 2]) + >>> nanops.nanmedian(s) + 2.0 + """ + + def get_median(x): + mask = notna(x) + if not skipna and not mask.all(): + return np.nan + with warnings.catch_warnings(): + # Suppress RuntimeWarning about All-NaN slice + warnings.filterwarnings("ignore", "All-NaN slice encountered") + res = np.nanmedian(x[mask]) + return res + + values, mask, dtype, _, _ = _get_values(values, skipna, mask=mask) + if not is_float_dtype(values.dtype): + try: + values = values.astype("f8") + except ValueError as err: + # e.g. "could not convert string to float: 'a'" + raise TypeError(str(err)) from err + if mask is not None: + values[mask] = np.nan + + notempty = values.size + + # an array from a frame + if values.ndim > 1 and axis is not None: + + # there's a non-empty array to apply over otherwise numpy raises + if notempty: + if not skipna: + res = np.apply_along_axis(get_median, axis, values) + + else: + # fastpath for the skipna case + with warnings.catch_warnings(): + # Suppress RuntimeWarning about All-NaN slice + warnings.filterwarnings("ignore", "All-NaN slice encountered") + res = np.nanmedian(values, axis) + + else: + # must return the correct shape, but median is not defined for the + # empty set so return nans of shape "everything but the passed axis" + # since "axis" is where the reduction would occur if we had a nonempty + # array + res = get_empty_reduction_result(values.shape, axis, np.float_, np.nan) + + else: + # otherwise return a scalar value + res = get_median(values) if notempty else np.nan + return _wrap_results(res, dtype) + + +def get_empty_reduction_result( + shape: tuple[int, ...], + axis: int, + dtype: np.dtype | type[np.floating], + fill_value: Any, +) -> np.ndarray: + """ + The result from a reduction on an empty ndarray. + + Parameters + ---------- + shape : Tuple[int] + axis : int + dtype : np.dtype + fill_value : Any + + Returns + ------- + np.ndarray + """ + shp = np.array(shape) + dims = np.arange(len(shape)) + ret = np.empty(shp[dims != axis], dtype=dtype) + ret.fill(fill_value) + return ret + + +def _get_counts_nanvar( + values_shape: Shape, + mask: npt.NDArray[np.bool_] | None, + axis: int | None, + ddof: int, + dtype: np.dtype = np.dtype(np.float64), +) -> tuple[int | float | np.ndarray, int | float | np.ndarray]: + """ + Get the count of non-null values along an axis, accounting + for degrees of freedom. + + Parameters + ---------- + values_shape : Tuple[int, ...] + shape tuple from values ndarray, used if mask is None + mask : Optional[ndarray[bool]] + locations in values that should be considered missing + axis : Optional[int] + axis to count along + ddof : int + degrees of freedom + dtype : type, optional + type to use for count + + Returns + ------- + count : int, np.nan or np.ndarray + d : int, np.nan or np.ndarray + """ + count = _get_counts(values_shape, mask, axis, dtype=dtype) + d = count - dtype.type(ddof) + + # always return NaN, never inf + if is_scalar(count): + if count <= ddof: + count = np.nan + d = np.nan + else: + # count is not narrowed by is_scalar check + count = cast(np.ndarray, count) + mask = count <= ddof + if mask.any(): + np.putmask(d, mask, np.nan) + np.putmask(count, mask, np.nan) + return count, d + + +@bottleneck_switch(ddof=1) +def nanstd(values, *, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the standard deviation along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nanstd(s) + 1.0 + """ + if values.dtype == "M8[ns]": + values = values.view("m8[ns]") + + orig_dtype = values.dtype + values, mask, _, _, _ = _get_values(values, skipna, mask=mask) + + result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)) + return _wrap_results(result, orig_dtype) + + +@disallow("M8", "m8") +@bottleneck_switch(ddof=1) +def nanvar(values, *, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the variance along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nanvar(s) + 1.0 + """ + values = extract_array(values, extract_numpy=True) + dtype = values.dtype + mask = _maybe_get_mask(values, skipna, mask) + if is_any_int_dtype(dtype): + values = values.astype("f8") + if mask is not None: + values[mask] = np.nan + + if is_float_dtype(values.dtype): + count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) + else: + count, d = _get_counts_nanvar(values.shape, mask, axis, ddof) + + if skipna and mask is not None: + values = values.copy() + np.putmask(values, mask, 0) + + # xref GH10242 + # Compute variance via two-pass algorithm, which is stable against + # cancellation errors and relatively accurate for small numbers of + # observations. + # + # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count + if axis is not None: + avg = np.expand_dims(avg, axis) + sqr = _ensure_numeric((avg - values) ** 2) + if mask is not None: + np.putmask(sqr, mask, 0) + result = sqr.sum(axis=axis, dtype=np.float64) / d + + # Return variance as np.float64 (the datatype used in the accumulator), + # unless we were dealing with a float array, in which case use the same + # precision as the original values array. + if is_float_dtype(dtype): + result = result.astype(dtype, copy=False) + return result + + +@disallow("M8", "m8") +def nansem( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + ddof: int = 1, + mask: npt.NDArray[np.bool_] | None = None, +) -> float: + """ + Compute the standard error in the mean along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nansem(s) + 0.5773502691896258 + """ + # This checks if non-numeric-like data is passed with numeric_only=False + # and raises a TypeError otherwise + nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask) + + mask = _maybe_get_mask(values, skipna, mask) + if not is_float_dtype(values.dtype): + values = values.astype("f8") + + count, _ = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) + var = nanvar(values, axis=axis, skipna=skipna, ddof=ddof) + + return np.sqrt(var) / np.sqrt(count) + + +def _nanminmax(meth, fill_value_typ): + @bottleneck_switch(name="nan" + meth) + @_datetimelike_compat + def reduction( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, + ) -> Dtype: + + values, mask, dtype, dtype_max, fill_value = _get_values( + values, skipna, fill_value_typ=fill_value_typ, mask=mask + ) + + if (axis is not None and values.shape[axis] == 0) or values.size == 0: + try: + result = getattr(values, meth)(axis, dtype=dtype_max) + result.fill(np.nan) + except (AttributeError, TypeError, ValueError): + result = np.nan + else: + result = getattr(values, meth)(axis) + + result = _maybe_null_out(result, axis, mask, values.shape) + return result + + return reduction + + +nanmin = _nanminmax("min", fill_value_typ="+inf") +nanmax = _nanminmax("max", fill_value_typ="-inf") + + +@disallow("O") +def nanargmax( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> int | np.ndarray: + """ + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : int or ndarray[int] + The index/indices of max value in specified axis or -1 in the NA case + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> arr = np.array([1, 2, 3, np.nan, 4]) + >>> nanops.nanargmax(arr) + 4 + + >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3) + >>> arr[2:, 2] = np.nan + >>> arr + array([[ 0., 1., 2.], + [ 3., 4., 5.], + [ 6., 7., nan], + [ 9., 10., nan]]) + >>> nanops.nanargmax(arr, axis=1) + array([2, 2, 1, 1]) + """ + values, mask, _, _, _ = _get_values(values, True, fill_value_typ="-inf", mask=mask) + # error: Need type annotation for 'result' + result = values.argmax(axis) # type: ignore[var-annotated] + result = _maybe_arg_null_out(result, axis, mask, skipna) + return result + + +@disallow("O") +def nanargmin( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> int | np.ndarray: + """ + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : int or ndarray[int] + The index/indices of min value in specified axis or -1 in the NA case + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> arr = np.array([1, 2, 3, np.nan, 4]) + >>> nanops.nanargmin(arr) + 0 + + >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3) + >>> arr[2:, 0] = np.nan + >>> arr + array([[ 0., 1., 2.], + [ 3., 4., 5.], + [nan, 7., 8.], + [nan, 10., 11.]]) + >>> nanops.nanargmin(arr, axis=1) + array([0, 0, 1, 1]) + """ + values, mask, _, _, _ = _get_values(values, True, fill_value_typ="+inf", mask=mask) + # error: Need type annotation for 'result' + result = values.argmin(axis) # type: ignore[var-annotated] + result = _maybe_arg_null_out(result, axis, mask, skipna) + return result + + +@disallow("M8", "m8") +@maybe_operate_rowwise +def nanskew( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> float: + """ + Compute the sample skewness. + + The statistic computed here is the adjusted Fisher-Pearson standardized + moment coefficient G1. The algorithm computes this coefficient directly + from the second and third central moment. + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 1, 2]) + >>> nanops.nanskew(s) + 1.7320508075688787 + """ + # error: Incompatible types in assignment (expression has type "Union[Any, + # Union[ExtensionArray, ndarray]]", variable has type "ndarray") + values = extract_array(values, extract_numpy=True) # type: ignore[assignment] + mask = _maybe_get_mask(values, skipna, mask) + if not is_float_dtype(values.dtype): + values = values.astype("f8") + count = _get_counts(values.shape, mask, axis) + else: + count = _get_counts(values.shape, mask, axis, dtype=values.dtype) + + if skipna and mask is not None: + values = values.copy() + np.putmask(values, mask, 0) + + mean = values.sum(axis, dtype=np.float64) / count + if axis is not None: + mean = np.expand_dims(mean, axis) + + adjusted = values - mean + if skipna and mask is not None: + np.putmask(adjusted, mask, 0) + adjusted2 = adjusted ** 2 + adjusted3 = adjusted2 * adjusted + m2 = adjusted2.sum(axis, dtype=np.float64) + m3 = adjusted3.sum(axis, dtype=np.float64) + + # floating point error + # + # #18044 in _libs/windows.pyx calc_skew follow this behavior + # to fix the fperr to treat m2 <1e-14 as zero + m2 = _zero_out_fperr(m2) + m3 = _zero_out_fperr(m3) + + with np.errstate(invalid="ignore", divide="ignore"): + result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2 ** 1.5) + + dtype = values.dtype + if is_float_dtype(dtype): + result = result.astype(dtype, copy=False) + + if isinstance(result, np.ndarray): + result = np.where(m2 == 0, 0, result) + result[count < 3] = np.nan + else: + result = 0 if m2 == 0 else result + if count < 3: + return np.nan + + return result + + +@disallow("M8", "m8") +@maybe_operate_rowwise +def nankurt( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> float: + """ + Compute the sample excess kurtosis + + The statistic computed here is the adjusted Fisher-Pearson standardized + moment coefficient G2, computed directly from the second and fourth + central moment. + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 1, 3, 2]) + >>> nanops.nankurt(s) + -1.2892561983471076 + """ + # error: Incompatible types in assignment (expression has type "Union[Any, + # Union[ExtensionArray, ndarray]]", variable has type "ndarray") + values = extract_array(values, extract_numpy=True) # type: ignore[assignment] + mask = _maybe_get_mask(values, skipna, mask) + if not is_float_dtype(values.dtype): + values = values.astype("f8") + count = _get_counts(values.shape, mask, axis) + else: + count = _get_counts(values.shape, mask, axis, dtype=values.dtype) + + if skipna and mask is not None: + values = values.copy() + np.putmask(values, mask, 0) + + mean = values.sum(axis, dtype=np.float64) / count + if axis is not None: + mean = np.expand_dims(mean, axis) + + adjusted = values - mean + if skipna and mask is not None: + np.putmask(adjusted, mask, 0) + adjusted2 = adjusted ** 2 + adjusted4 = adjusted2 ** 2 + m2 = adjusted2.sum(axis, dtype=np.float64) + m4 = adjusted4.sum(axis, dtype=np.float64) + + with np.errstate(invalid="ignore", divide="ignore"): + adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3)) + numerator = count * (count + 1) * (count - 1) * m4 + denominator = (count - 2) * (count - 3) * m2 ** 2 + + # floating point error + # + # #18044 in _libs/windows.pyx calc_kurt follow this behavior + # to fix the fperr to treat denom <1e-14 as zero + numerator = _zero_out_fperr(numerator) + denominator = _zero_out_fperr(denominator) + + if not isinstance(denominator, np.ndarray): + # if ``denom`` is a scalar, check these corner cases first before + # doing division + if count < 4: + return np.nan + if denominator == 0: + return 0 + + with np.errstate(invalid="ignore", divide="ignore"): + result = numerator / denominator - adj + + dtype = values.dtype + if is_float_dtype(dtype): + result = result.astype(dtype, copy=False) + + if isinstance(result, np.ndarray): + result = np.where(denominator == 0, 0, result) + result[count < 4] = np.nan + + return result + + +@disallow("M8", "m8") +@maybe_operate_rowwise +def nanprod( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + min_count: int = 0, + mask: npt.NDArray[np.bool_] | None = None, +) -> float: + """ + Parameters + ---------- + values : ndarray[dtype] + axis : int, optional + skipna : bool, default True + min_count: int, default 0 + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + Dtype + The product of all elements on a given axis. ( NaNs are treated as 1) + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, 3, np.nan]) + >>> nanops.nanprod(s) + 6.0 + """ + mask = _maybe_get_mask(values, skipna, mask) + + if skipna and mask is not None: + values = values.copy() + values[mask] = 1 + result = values.prod(axis) + # error: Incompatible return value type (got "Union[ndarray, float]", expected + # "float") + return _maybe_null_out( # type: ignore[return-value] + result, axis, mask, values.shape, min_count=min_count + ) + + +def _maybe_arg_null_out( + result: np.ndarray, + axis: int | None, + mask: npt.NDArray[np.bool_] | None, + skipna: bool, +) -> np.ndarray | int: + # helper function for nanargmin/nanargmax + if mask is None: + return result + + if axis is None or not getattr(result, "ndim", False): + if skipna: + if mask.all(): + return -1 + else: + if mask.any(): + return -1 + else: + if skipna: + na_mask = mask.all(axis) + else: + na_mask = mask.any(axis) + if na_mask.any(): + result[na_mask] = -1 + return result + + +def _get_counts( + values_shape: Shape, + mask: npt.NDArray[np.bool_] | None, + axis: int | None, + dtype: np.dtype = np.dtype(np.float64), +) -> int | float | np.ndarray: + """ + Get the count of non-null values along an axis + + Parameters + ---------- + values_shape : tuple of int + shape tuple from values ndarray, used if mask is None + mask : Optional[ndarray[bool]] + locations in values that should be considered missing + axis : Optional[int] + axis to count along + dtype : type, optional + type to use for count + + Returns + ------- + count : scalar or array + """ + if axis is None: + if mask is not None: + n = mask.size - mask.sum() + else: + n = np.prod(values_shape) + return dtype.type(n) + + if mask is not None: + count = mask.shape[axis] - mask.sum(axis) + else: + count = values_shape[axis] + + if is_scalar(count): + return dtype.type(count) + return count.astype(dtype, copy=False) + + +def _maybe_null_out( + result: np.ndarray | float | NaTType, + axis: int | None, + mask: npt.NDArray[np.bool_] | None, + shape: tuple[int, ...], + min_count: int = 1, +) -> np.ndarray | float | NaTType: + """ + Returns + ------- + Dtype + The product of all elements on a given axis. ( NaNs are treated as 1) + """ + if axis is not None and isinstance(result, np.ndarray): + if mask is not None: + null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0 + else: + # we have no nulls, kept mask=None in _maybe_get_mask + below_count = shape[axis] - min_count < 0 + new_shape = shape[:axis] + shape[axis + 1 :] + null_mask = np.broadcast_to(below_count, new_shape) + + if np.any(null_mask): + if is_numeric_dtype(result): + if np.iscomplexobj(result): + result = result.astype("c16") + else: + result = result.astype("f8") + result[null_mask] = np.nan + else: + # GH12941, use None to auto cast null + result[null_mask] = None + elif result is not NaT: + if check_below_min_count(shape, mask, min_count): + result = np.nan + + return result + + +def check_below_min_count( + shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int +) -> bool: + """ + Check for the `min_count` keyword. Returns True if below `min_count` (when + missing value should be returned from the reduction). + + Parameters + ---------- + shape : tuple + The shape of the values (`values.shape`). + mask : ndarray[bool] or None + Boolean numpy array (typically of same shape as `shape`) or None. + min_count : int + Keyword passed through from sum/prod call. + + Returns + ------- + bool + """ + if min_count > 0: + if mask is None: + # no missing values, only check size + non_nulls = np.prod(shape) + else: + non_nulls = mask.size - mask.sum() + if non_nulls < min_count: + return True + return False + + +def _zero_out_fperr(arg): + # #18044 reference this behavior to fix rolling skew/kurt issue + if isinstance(arg, np.ndarray): + with np.errstate(invalid="ignore"): + return np.where(np.abs(arg) < 1e-14, 0, arg) + else: + return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg + + +@disallow("M8", "m8") +def nancorr( + a: np.ndarray, b: np.ndarray, *, method="pearson", min_periods: int | None = None +): + """ + a, b: ndarrays + """ + if len(a) != len(b): + raise AssertionError("Operands to nancorr must have same size") + + if min_periods is None: + min_periods = 1 + + valid = notna(a) & notna(b) + if not valid.all(): + a = a[valid] + b = b[valid] + + if len(a) < min_periods: + return np.nan + + f = get_corr_func(method) + return f(a, b) + + +def get_corr_func(method): + if method == "kendall": + from scipy.stats import kendalltau + + def func(a, b): + return kendalltau(a, b)[0] + + return func + elif method == "spearman": + from scipy.stats import spearmanr + + def func(a, b): + return spearmanr(a, b)[0] + + return func + elif method == "pearson": + + def func(a, b): + return np.corrcoef(a, b)[0, 1] + + return func + elif callable(method): + return method + + raise ValueError( + f"Unknown method '{method}', expected one of " + "'kendall', 'spearman', 'pearson', or callable" + ) + + +@disallow("M8", "m8") +def nancov( + a: np.ndarray, + b: np.ndarray, + *, + min_periods: int | None = None, + ddof: int | None = 1, +): + if len(a) != len(b): + raise AssertionError("Operands to nancov must have same size") + + if min_periods is None: + min_periods = 1 + + valid = notna(a) & notna(b) + if not valid.all(): + a = a[valid] + b = b[valid] + + if len(a) < min_periods: + return np.nan + + return np.cov(a, b, ddof=ddof)[0, 1] + + +def _ensure_numeric(x): + if isinstance(x, np.ndarray): + if is_integer_dtype(x) or is_bool_dtype(x): + x = x.astype(np.float64) + elif is_object_dtype(x): + try: + x = x.astype(np.complex128) + except (TypeError, ValueError): + try: + x = x.astype(np.float64) + except ValueError as err: + # GH#29941 we get here with object arrays containing strs + raise TypeError(f"Could not convert {x} to numeric") from err + else: + if not np.any(np.imag(x)): + x = x.real + elif not (is_float(x) or is_integer(x) or is_complex(x)): + try: + x = float(x) + except (TypeError, ValueError): + # e.g. "1+1j" or "foo" + try: + x = complex(x) + except ValueError as err: + # e.g. "foo" + raise TypeError(f"Could not convert {x} to numeric") from err + return x + + +# NA-friendly array comparisons + + +def make_nancomp(op): + def f(x, y): + xmask = isna(x) + ymask = isna(y) + mask = xmask | ymask + + with np.errstate(all="ignore"): + result = op(x, y) + + if mask.any(): + if is_bool_dtype(result): + result = result.astype("O") + np.putmask(result, mask, np.nan) + + return result + + return f + + +nangt = make_nancomp(operator.gt) +nange = make_nancomp(operator.ge) +nanlt = make_nancomp(operator.lt) +nanle = make_nancomp(operator.le) +naneq = make_nancomp(operator.eq) +nanne = make_nancomp(operator.ne) + + +def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: + """ + Cumulative function with skipna support. + + Parameters + ---------- + values : np.ndarray or ExtensionArray + accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate} + skipna : bool + + Returns + ------- + np.ndarray or ExtensionArray + """ + mask_a, mask_b = { + np.cumprod: (1.0, np.nan), + np.maximum.accumulate: (-np.inf, np.nan), + np.cumsum: (0.0, np.nan), + np.minimum.accumulate: (np.inf, np.nan), + }[accum_func] + + # We will be applying this function to block values + if values.dtype.kind in ["m", "M"]: + # GH#30460, GH#29058 + # numpy 1.18 started sorting NaTs at the end instead of beginning, + # so we need to work around to maintain backwards-consistency. + orig_dtype = values.dtype + + # We need to define mask before masking NaTs + mask = isna(values) + + y = values.view("i8") + # Note: the accum_func comparison fails as an "is" comparison + changed = accum_func == np.minimum.accumulate + + try: + if changed: + y[mask] = lib.i8max + + result = accum_func(y, axis=0) + finally: + if changed: + # restore NaT elements + y[mask] = iNaT + + if skipna: + result[mask] = iNaT + elif accum_func == np.minimum.accumulate: + # Restore NaTs that we masked previously + nz = (~np.asarray(mask)).nonzero()[0] + if len(nz): + # everything up to the first non-na entry stays NaT + result[: nz[0]] = iNaT + + if isinstance(values.dtype, np.dtype): + result = result.view(orig_dtype) + else: + # DatetimeArray/TimedeltaArray + # TODO: have this case go through a DTA method? + # For DatetimeTZDtype, view result as M8[ns] + npdtype = orig_dtype if isinstance(orig_dtype, np.dtype) else "M8[ns]" + # Item "type" of "Union[Type[ExtensionArray], Type[ndarray[Any, Any]]]" + # has no attribute "_simple_new" + result = type(values)._simple_new( # type: ignore[union-attr] + result.view(npdtype), dtype=orig_dtype + ) + + elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)): + vals = values.copy() + mask = isna(vals) + vals[mask] = mask_a + result = accum_func(vals, axis=0) + result[mask] = mask_b + else: + result = accum_func(values, axis=0) + + return result diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/ops/__init__.py new file mode 100644 index 0000000..540a557 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/ops/__init__.py @@ -0,0 +1,475 @@ +""" +Arithmetic operations for PandasObjects + +This is not a public API. +""" +from __future__ import annotations + +import operator +from typing import TYPE_CHECKING +import warnings + +import numpy as np + +from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op # noqa:F401 +from pandas._typing import Level +from pandas.util._decorators import Appender +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_array_like, + is_list_like, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.missing import isna + +from pandas.core import ( + algorithms, + roperator, +) +from pandas.core.ops.array_ops import ( # noqa:F401 + arithmetic_op, + comp_method_OBJECT_ARRAY, + comparison_op, + get_array_op, + logical_op, + maybe_prepare_scalar_for_op, +) +from pandas.core.ops.common import ( # noqa:F401 + get_op_result_name, + unpack_zerodim_and_defer, +) +from pandas.core.ops.docstrings import ( + _flex_comp_doc_FRAME, + _op_descriptions, + make_flex_doc, +) +from pandas.core.ops.invalid import invalid_comparison # noqa:F401 +from pandas.core.ops.mask_ops import ( # noqa: F401 + kleene_and, + kleene_or, + kleene_xor, +) +from pandas.core.ops.methods import add_flex_arithmetic_methods # noqa:F401 +from pandas.core.roperator import ( # noqa:F401 + radd, + rand_, + rdiv, + rdivmod, + rfloordiv, + rmod, + rmul, + ror_, + rpow, + rsub, + rtruediv, + rxor, +) + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + +# ----------------------------------------------------------------------------- +# constants +ARITHMETIC_BINOPS: set[str] = { + "add", + "sub", + "mul", + "pow", + "mod", + "floordiv", + "truediv", + "divmod", + "radd", + "rsub", + "rmul", + "rpow", + "rmod", + "rfloordiv", + "rtruediv", + "rdivmod", +} + + +COMPARISON_BINOPS: set[str] = {"eq", "ne", "lt", "gt", "le", "ge"} + + +# ----------------------------------------------------------------------------- +# Masking NA values and fallbacks for operations numpy does not support + + +def fill_binop(left, right, fill_value): + """ + If a non-None fill_value is given, replace null entries in left and right + with this value, but only in positions where _one_ of left/right is null, + not both. + + Parameters + ---------- + left : array-like + right : array-like + fill_value : object + + Returns + ------- + left : array-like + right : array-like + + Notes + ----- + Makes copies if fill_value is not None and NAs are present. + """ + if fill_value is not None: + left_mask = isna(left) + right_mask = isna(right) + + # one but not both + mask = left_mask ^ right_mask + + if left_mask.any(): + # Avoid making a copy if we can + left = left.copy() + left[left_mask & mask] = fill_value + + if right_mask.any(): + # Avoid making a copy if we can + right = right.copy() + right[right_mask & mask] = fill_value + + return left, right + + +# ----------------------------------------------------------------------------- +# Series + + +def align_method_SERIES(left: Series, right, align_asobject: bool = False): + """align lhs and rhs Series""" + # ToDo: Different from align_method_FRAME, list, tuple and ndarray + # are not coerced here + # because Series has inconsistencies described in #13637 + + if isinstance(right, ABCSeries): + # avoid repeated alignment + if not left.index.equals(right.index): + + if align_asobject: + # to keep original value's dtype for bool ops + left = left.astype(object) + right = right.astype(object) + + left, right = left.align(right, copy=False) + + return left, right + + +def flex_method_SERIES(op): + name = op.__name__.strip("_") + doc = make_flex_doc(name, "series") + + @Appender(doc) + def flex_wrapper(self, other, level=None, fill_value=None, axis=0): + # validate axis + if axis is not None: + self._get_axis_number(axis) + + res_name = get_op_result_name(self, other) + + if isinstance(other, ABCSeries): + return self._binop(other, op, level=level, fill_value=fill_value) + elif isinstance(other, (np.ndarray, list, tuple)): + if len(other) != len(self): + raise ValueError("Lengths must be equal") + other = self._constructor(other, self.index) + result = self._binop(other, op, level=level, fill_value=fill_value) + result.name = res_name + return result + else: + if fill_value is not None: + self = self.fillna(fill_value) + + return op(self, other) + + flex_wrapper.__name__ = name + return flex_wrapper + + +# ----------------------------------------------------------------------------- +# DataFrame + + +def align_method_FRAME( + left, right, axis, flex: bool | None = False, level: Level = None +): + """ + Convert rhs to meet lhs dims if input is list, tuple or np.ndarray. + + Parameters + ---------- + left : DataFrame + right : Any + axis : int, str, or None + flex : bool or None, default False + Whether this is a flex op, in which case we reindex. + None indicates not to check for alignment. + level : int or level name, default None + + Returns + ------- + left : DataFrame + right : Any + """ + + def to_series(right): + msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}" + if axis is not None and left._get_axis_name(axis) == "index": + if len(left.index) != len(right): + raise ValueError( + msg.format(req_len=len(left.index), given_len=len(right)) + ) + right = left._constructor_sliced(right, index=left.index) + else: + if len(left.columns) != len(right): + raise ValueError( + msg.format(req_len=len(left.columns), given_len=len(right)) + ) + right = left._constructor_sliced(right, index=left.columns) + return right + + if isinstance(right, np.ndarray): + + if right.ndim == 1: + right = to_series(right) + + elif right.ndim == 2: + if right.shape == left.shape: + right = left._constructor(right, index=left.index, columns=left.columns) + + elif right.shape[0] == left.shape[0] and right.shape[1] == 1: + # Broadcast across columns + right = np.broadcast_to(right, left.shape) + right = left._constructor(right, index=left.index, columns=left.columns) + + elif right.shape[1] == left.shape[1] and right.shape[0] == 1: + # Broadcast along rows + right = to_series(right[0, :]) + + else: + raise ValueError( + "Unable to coerce to DataFrame, shape " + f"must be {left.shape}: given {right.shape}" + ) + + elif right.ndim > 2: + raise ValueError( + "Unable to coerce to Series/DataFrame, " + f"dimension must be <= 2: {right.shape}" + ) + + elif is_list_like(right) and not isinstance(right, (ABCSeries, ABCDataFrame)): + # GH 36702. Raise when attempting arithmetic with list of array-like. + if any(is_array_like(el) for el in right): + raise ValueError( + f"Unable to coerce list of {type(right[0])} to Series/DataFrame" + ) + # GH17901 + right = to_series(right) + + if flex is not None and isinstance(right, ABCDataFrame): + if not left._indexed_same(right): + if flex: + left, right = left.align(right, join="outer", level=level, copy=False) + else: + raise ValueError( + "Can only compare identically-labeled DataFrame objects" + ) + elif isinstance(right, ABCSeries): + # axis=1 is default for DataFrame-with-Series op + axis = left._get_axis_number(axis) if axis is not None else 1 + + if not flex: + if not left.axes[axis].equals(right.index): + warnings.warn( + "Automatic reindexing on DataFrame vs Series comparisons " + "is deprecated and will raise ValueError in a future version. " + "Do `left, right = left.align(right, axis=1, copy=False)` " + "before e.g. `left == right`", + FutureWarning, + stacklevel=find_stack_level(), + ) + + left, right = left.align( + right, join="outer", axis=axis, level=level, copy=False + ) + right = _maybe_align_series_as_frame(left, right, axis) + + return left, right + + +def should_reindex_frame_op( + left: DataFrame, right, op, axis, default_axis, fill_value, level +) -> bool: + """ + Check if this is an operation between DataFrames that will need to reindex. + """ + assert isinstance(left, ABCDataFrame) + + if op is operator.pow or op is roperator.rpow: + # GH#32685 pow has special semantics for operating with null values + return False + + if not isinstance(right, ABCDataFrame): + return False + + if fill_value is None and level is None and axis is default_axis: + # TODO: any other cases we should handle here? + + # Intersection is always unique so we have to check the unique columns + left_uniques = left.columns.unique() + right_uniques = right.columns.unique() + cols = left_uniques.intersection(right_uniques) + if len(cols) and not (cols.equals(left_uniques) and cols.equals(right_uniques)): + # TODO: is there a shortcut available when len(cols) == 0? + return True + + return False + + +def frame_arith_method_with_reindex(left: DataFrame, right: DataFrame, op) -> DataFrame: + """ + For DataFrame-with-DataFrame operations that require reindexing, + operate only on shared columns, then reindex. + + Parameters + ---------- + left : DataFrame + right : DataFrame + op : binary operator + + Returns + ------- + DataFrame + """ + # GH#31623, only operate on shared columns + cols, lcols, rcols = left.columns.join( + right.columns, how="inner", level=None, return_indexers=True + ) + + new_left = left.iloc[:, lcols] + new_right = right.iloc[:, rcols] + result = op(new_left, new_right) + + # Do the join on the columns instead of using align_method_FRAME + # to avoid constructing two potentially large/sparse DataFrames + join_columns, _, _ = left.columns.join( + right.columns, how="outer", level=None, return_indexers=True + ) + + if result.columns.has_duplicates: + # Avoid reindexing with a duplicate axis. + # https://github.com/pandas-dev/pandas/issues/35194 + indexer, _ = result.columns.get_indexer_non_unique(join_columns) + indexer = algorithms.unique1d(indexer) + result = result._reindex_with_indexers( + {1: [join_columns, indexer]}, allow_dups=True + ) + else: + result = result.reindex(join_columns, axis=1) + + return result + + +def _maybe_align_series_as_frame(frame: DataFrame, series: Series, axis: int): + """ + If the Series operand is not EA-dtype, we can broadcast to 2D and operate + blockwise. + """ + rvalues = series._values + if not isinstance(rvalues, np.ndarray): + # TODO(EA2D): no need to special-case with 2D EAs + if rvalues.dtype == "datetime64[ns]" or rvalues.dtype == "timedelta64[ns]": + # We can losslessly+cheaply cast to ndarray + rvalues = np.asarray(rvalues) + else: + return series + + if axis == 0: + rvalues = rvalues.reshape(-1, 1) + else: + rvalues = rvalues.reshape(1, -1) + + rvalues = np.broadcast_to(rvalues, frame.shape) + return type(frame)(rvalues, index=frame.index, columns=frame.columns) + + +def flex_arith_method_FRAME(op): + op_name = op.__name__.strip("_") + default_axis = "columns" + + na_op = get_array_op(op) + doc = make_flex_doc(op_name, "dataframe") + + @Appender(doc) + def f(self, other, axis=default_axis, level=None, fill_value=None): + + if should_reindex_frame_op( + self, other, op, axis, default_axis, fill_value, level + ): + return frame_arith_method_with_reindex(self, other, op) + + if isinstance(other, ABCSeries) and fill_value is not None: + # TODO: We could allow this in cases where we end up going + # through the DataFrame path + raise NotImplementedError(f"fill_value {fill_value} not supported.") + + axis = self._get_axis_number(axis) if axis is not None else 1 + + other = maybe_prepare_scalar_for_op(other, self.shape) + self, other = align_method_FRAME(self, other, axis, flex=True, level=level) + + if isinstance(other, ABCDataFrame): + # Another DataFrame + new_data = self._combine_frame(other, na_op, fill_value) + + elif isinstance(other, ABCSeries): + new_data = self._dispatch_frame_op(other, op, axis=axis) + else: + # in this case we always have `np.ndim(other) == 0` + if fill_value is not None: + self = self.fillna(fill_value) + + new_data = self._dispatch_frame_op(other, op) + + return self._construct_result(new_data) + + f.__name__ = op_name + + return f + + +def flex_comp_method_FRAME(op): + op_name = op.__name__.strip("_") + default_axis = "columns" # because we are "flex" + + doc = _flex_comp_doc_FRAME.format( + op_name=op_name, desc=_op_descriptions[op_name]["desc"] + ) + + @Appender(doc) + def f(self, other, axis=default_axis, level=None): + axis = self._get_axis_number(axis) if axis is not None else 1 + + self, other = align_method_FRAME(self, other, axis, flex=True, level=level) + + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + f.__name__ = op_name + + return f diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ad108c6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/array_ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/array_ops.cpython-38.pyc new file mode 100644 index 0000000..b2df481 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/array_ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..85ef1e2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/dispatch.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/dispatch.cpython-38.pyc new file mode 100644 index 0000000..1ea399d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/dispatch.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/docstrings.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/docstrings.cpython-38.pyc new file mode 100644 index 0000000..b20d1cd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/docstrings.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/invalid.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/invalid.cpython-38.pyc new file mode 100644 index 0000000..9969cc9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/invalid.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/mask_ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/mask_ops.cpython-38.pyc new file mode 100644 index 0000000..8b4aa4f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/mask_ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/methods.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/methods.cpython-38.pyc new file mode 100644 index 0000000..e9865c6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/methods.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/missing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/missing.cpython-38.pyc new file mode 100644 index 0000000..1c6333d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/ops/__pycache__/missing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/array_ops.py b/.local/lib/python3.8/site-packages/pandas/core/ops/array_ops.py new file mode 100644 index 0000000..84b8ce6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/ops/array_ops.py @@ -0,0 +1,522 @@ +""" +Functions for arithmetic and comparison operations on NumPy arrays and +ExtensionArrays. +""" +import datetime +from functools import partial +import operator +from typing import Any + +import numpy as np + +from pandas._libs import ( + NaT, + Timedelta, + Timestamp, + lib, + ops as libops, +) +from pandas._libs.tslibs import BaseOffset +from pandas._typing import ( + ArrayLike, + Shape, +) + +from pandas.core.dtypes.cast import ( + construct_1d_object_array_from_listlike, + find_common_type, +) +from pandas.core.dtypes.common import ( + ensure_object, + is_bool_dtype, + is_integer_dtype, + is_list_like, + is_numeric_v_string_like, + is_object_dtype, + is_scalar, +) +from pandas.core.dtypes.generic import ( + ABCExtensionArray, + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +import pandas.core.computation.expressions as expressions +from pandas.core.construction import ensure_wrapped_if_datetimelike +from pandas.core.ops import ( + missing, + roperator, +) +from pandas.core.ops.dispatch import should_extension_dispatch +from pandas.core.ops.invalid import invalid_comparison + + +def comp_method_OBJECT_ARRAY(op, x, y): + if isinstance(y, list): + y = construct_1d_object_array_from_listlike(y) + + if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): + if not is_object_dtype(y.dtype): + y = y.astype(np.object_) + + if isinstance(y, (ABCSeries, ABCIndex)): + y = y._values + + if x.shape != y.shape: + raise ValueError("Shapes must match", x.shape, y.shape) + result = libops.vec_compare(x.ravel(), y.ravel(), op) + else: + result = libops.scalar_compare(x.ravel(), y, op) + return result.reshape(x.shape) + + +def _masked_arith_op(x: np.ndarray, y, op): + """ + If the given arithmetic operation fails, attempt it again on + only the non-null elements of the input array(s). + + Parameters + ---------- + x : np.ndarray + y : np.ndarray, Series, Index + op : binary operator + """ + # For Series `x` is 1D so ravel() is a no-op; calling it anyway makes + # the logic valid for both Series and DataFrame ops. + xrav = x.ravel() + assert isinstance(x, np.ndarray), type(x) + if isinstance(y, np.ndarray): + dtype = find_common_type([x.dtype, y.dtype]) + result = np.empty(x.size, dtype=dtype) + + if len(x) != len(y): + raise ValueError(x.shape, y.shape) + else: + ymask = notna(y) + + # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex + # we would get int64 dtype, see GH#19956 + yrav = y.ravel() + mask = notna(xrav) & ymask.ravel() + + # See GH#5284, GH#5035, GH#19448 for historical reference + if mask.any(): + result[mask] = op(xrav[mask], yrav[mask]) + + else: + if not is_scalar(y): + raise TypeError( + f"Cannot broadcast np.ndarray with operand of type { type(y) }" + ) + + # mask is only meaningful for x + result = np.empty(x.size, dtype=x.dtype) + mask = notna(xrav) + + # 1 ** np.nan is 1. So we have to unmask those. + if op is pow: + mask = np.where(x == 1, False, mask) + elif op is roperator.rpow: + mask = np.where(y == 1, False, mask) + + if mask.any(): + result[mask] = op(xrav[mask], y) + + np.putmask(result, ~mask, np.nan) + result = result.reshape(x.shape) # 2D compat + return result + + +def _na_arithmetic_op(left: np.ndarray, right, op, is_cmp: bool = False): + """ + Return the result of evaluating op on the passed in values. + + If native types are not compatible, try coercion to object dtype. + + Parameters + ---------- + left : np.ndarray + right : np.ndarray or scalar + Excludes DataFrame, Series, Index, ExtensionArray. + is_cmp : bool, default False + If this a comparison operation. + + Returns + ------- + array-like + + Raises + ------ + TypeError : invalid operation + """ + if isinstance(right, str): + # can never use numexpr + func = op + else: + func = partial(expressions.evaluate, op) + + try: + result = func(left, right) + except TypeError: + if not is_cmp and (is_object_dtype(left.dtype) or is_object_dtype(right)): + # For object dtype, fallback to a masked operation (only operating + # on the non-missing values) + # Don't do this for comparisons, as that will handle complex numbers + # incorrectly, see GH#32047 + result = _masked_arith_op(left, right, op) + else: + raise + + if is_cmp and (is_scalar(result) or result is NotImplemented): + # numpy returned a scalar instead of operating element-wise + # e.g. numeric array vs str + # TODO: can remove this after dropping some future numpy version? + return invalid_comparison(left, right, op) + + return missing.dispatch_fill_zeros(op, left, right, result) + + +def arithmetic_op(left: ArrayLike, right: Any, op): + """ + Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ... + + Note: the caller is responsible for ensuring that numpy warnings are + suppressed (with np.errstate(all="ignore")) if needed. + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : object + Cannot be a DataFrame or Index. Series is *not* excluded. + op : {operator.add, operator.sub, ...} + Or one of the reversed variants from roperator. + + Returns + ------- + ndarray or ExtensionArray + Or a 2-tuple of these in the case of divmod or rdivmod. + """ + # NB: We assume that extract_array and ensure_wrapped_if_datetimelike + # have already been called on `left` and `right`, + # and `maybe_prepare_scalar_for_op` has already been called on `right` + # We need to special-case datetime64/timedelta64 dtypes (e.g. because numpy + # casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390) + + if ( + should_extension_dispatch(left, right) + or isinstance(right, (Timedelta, BaseOffset, Timestamp)) + or right is NaT + ): + # Timedelta/Timestamp and other custom scalars are included in the check + # because numexpr will fail on it, see GH#31457 + res_values = op(left, right) + else: + # TODO we should handle EAs consistently and move this check before the if/else + # (https://github.com/pandas-dev/pandas/issues/41165) + _bool_arith_check(op, left, right) + + res_values = _na_arithmetic_op(left, right, op) + + return res_values + + +def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: + """ + Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. + + Note: the caller is responsible for ensuring that numpy warnings are + suppressed (with np.errstate(all="ignore")) if needed. + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : object + Cannot be a DataFrame, Series, or Index. + op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le} + + Returns + ------- + ndarray or ExtensionArray + """ + # NB: We assume extract_array has already been called on left and right + lvalues = ensure_wrapped_if_datetimelike(left) + rvalues = ensure_wrapped_if_datetimelike(right) + + rvalues = lib.item_from_zerodim(rvalues) + if isinstance(rvalues, list): + # We don't catch tuple here bc we may be comparing e.g. MultiIndex + # to a tuple that represents a single entry, see test_compare_tuple_strs + rvalues = np.asarray(rvalues) + + if isinstance(rvalues, (np.ndarray, ABCExtensionArray)): + # TODO: make this treatment consistent across ops and classes. + # We are not catching all listlikes here (e.g. frozenset, tuple) + # The ambiguous case is object-dtype. See GH#27803 + if len(lvalues) != len(rvalues): + raise ValueError( + "Lengths must match to compare", lvalues.shape, rvalues.shape + ) + + if should_extension_dispatch(lvalues, rvalues) or ( + (isinstance(rvalues, (Timedelta, BaseOffset, Timestamp)) or right is NaT) + and not is_object_dtype(lvalues.dtype) + ): + # Call the method on lvalues + res_values = op(lvalues, rvalues) + + elif is_scalar(rvalues) and isna(rvalues): # TODO: but not pd.NA? + # numpy does not like comparisons vs None + if op is operator.ne: + res_values = np.ones(lvalues.shape, dtype=bool) + else: + res_values = np.zeros(lvalues.shape, dtype=bool) + + elif is_numeric_v_string_like(lvalues, rvalues): + # GH#36377 going through the numexpr path would incorrectly raise + return invalid_comparison(lvalues, rvalues, op) + + elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str): + res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) + + else: + res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True) + + return res_values + + +def na_logical_op(x: np.ndarray, y, op): + try: + # For exposition, write: + # yarr = isinstance(y, np.ndarray) + # yint = is_integer(y) or (yarr and y.dtype.kind == "i") + # ybool = is_bool(y) or (yarr and y.dtype.kind == "b") + # xint = x.dtype.kind == "i" + # xbool = x.dtype.kind == "b" + # Then Cases where this goes through without raising include: + # (xint or xbool) and (yint or bool) + result = op(x, y) + except TypeError: + if isinstance(y, np.ndarray): + # bool-bool dtype operations should be OK, should not get here + assert not (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)) + x = ensure_object(x) + y = ensure_object(y) + result = libops.vec_binop(x.ravel(), y.ravel(), op) + else: + # let null fall thru + assert lib.is_scalar(y) + if not isna(y): + y = bool(y) + try: + result = libops.scalar_binop(x, y, op) + except ( + TypeError, + ValueError, + AttributeError, + OverflowError, + NotImplementedError, + ) as err: + typ = type(y).__name__ + raise TypeError( + f"Cannot perform '{op.__name__}' with a dtyped [{x.dtype}] array " + f"and scalar of type [{typ}]" + ) from err + + return result.reshape(x.shape) + + +def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike: + """ + Evaluate a logical operation `|`, `&`, or `^`. + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : object + Cannot be a DataFrame, Series, or Index. + op : {operator.and_, operator.or_, operator.xor} + Or one of the reversed variants from roperator. + + Returns + ------- + ndarray or ExtensionArray + """ + fill_int = lambda x: x + + def fill_bool(x, left=None): + # if `left` is specifically not-boolean, we do not cast to bool + if x.dtype.kind in ["c", "f", "O"]: + # dtypes that can hold NA + mask = isna(x) + if mask.any(): + x = x.astype(object) + x[mask] = False + + if left is None or is_bool_dtype(left.dtype): + x = x.astype(bool) + return x + + is_self_int_dtype = is_integer_dtype(left.dtype) + + right = lib.item_from_zerodim(right) + if is_list_like(right) and not hasattr(right, "dtype"): + # e.g. list, tuple + right = construct_1d_object_array_from_listlike(right) + + # NB: We assume extract_array has already been called on left and right + lvalues = ensure_wrapped_if_datetimelike(left) + rvalues = right + + if should_extension_dispatch(lvalues, rvalues): + # Call the method on lvalues + res_values = op(lvalues, rvalues) + + else: + if isinstance(rvalues, np.ndarray): + is_other_int_dtype = is_integer_dtype(rvalues.dtype) + rvalues = rvalues if is_other_int_dtype else fill_bool(rvalues, lvalues) + + else: + # i.e. scalar + is_other_int_dtype = lib.is_integer(rvalues) + + # For int vs int `^`, `|`, `&` are bitwise operators and return + # integer dtypes. Otherwise these are boolean ops + filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool + + res_values = na_logical_op(lvalues, rvalues, op) + # error: Cannot call function of unknown type + res_values = filler(res_values) # type: ignore[operator] + + return res_values + + +def get_array_op(op): + """ + Return a binary array operation corresponding to the given operator op. + + Parameters + ---------- + op : function + Binary operator from operator or roperator module. + + Returns + ------- + functools.partial + """ + if isinstance(op, partial): + # We get here via dispatch_to_series in DataFrame case + # e.g. test_rolling_consistency_var_debiasing_factors + return op + + op_name = op.__name__.strip("_").lstrip("r") + if op_name == "arith_op": + # Reached via DataFrame._combine_frame i.e. flex methods + # e.g. test_df_add_flex_filled_mixed_dtypes + return op + + if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: + return partial(comparison_op, op=op) + elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}: + return partial(logical_op, op=op) + elif op_name in { + "add", + "sub", + "mul", + "truediv", + "floordiv", + "mod", + "divmod", + "pow", + }: + return partial(arithmetic_op, op=op) + else: + raise NotImplementedError(op_name) + + +def maybe_prepare_scalar_for_op(obj, shape: Shape): + """ + Cast non-pandas objects to pandas types to unify behavior of arithmetic + and comparison operations. + + Parameters + ---------- + obj: object + shape : tuple[int] + + Returns + ------- + out : object + + Notes + ----- + Be careful to call this *after* determining the `name` attribute to be + attached to the result of the arithmetic operation. + """ + if type(obj) is datetime.timedelta: + # GH#22390 cast up to Timedelta to rely on Timedelta + # implementation; otherwise operation against numeric-dtype + # raises TypeError + return Timedelta(obj) + elif type(obj) is datetime.datetime: + # cast up to Timestamp to rely on Timestamp implementation, see Timedelta above + return Timestamp(obj) + elif isinstance(obj, np.datetime64): + # GH#28080 numpy casts integer-dtype to datetime64 when doing + # array[int] + datetime64, which we do not allow + if isna(obj): + from pandas.core.arrays import DatetimeArray + + # Avoid possible ambiguities with pd.NaT + obj = obj.astype("datetime64[ns]") + right = np.broadcast_to(obj, shape) + return DatetimeArray(right) + + return Timestamp(obj) + + elif isinstance(obj, np.timedelta64): + if isna(obj): + from pandas.core.arrays import TimedeltaArray + + # wrapping timedelta64("NaT") in Timedelta returns NaT, + # which would incorrectly be treated as a datetime-NaT, so + # we broadcast and wrap in a TimedeltaArray + obj = obj.astype("timedelta64[ns]") + right = np.broadcast_to(obj, shape) + return TimedeltaArray(right) + + # In particular non-nanosecond timedelta64 needs to be cast to + # nanoseconds, or else we get undesired behavior like + # np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D') + return Timedelta(obj) + + return obj + + +_BOOL_OP_NOT_ALLOWED = { + operator.truediv, + roperator.rtruediv, + operator.floordiv, + roperator.rfloordiv, + operator.pow, + roperator.rpow, +} + + +def _bool_arith_check(op, a, b): + """ + In contrast to numpy, pandas raises an error for certain operations + with booleans. + """ + if op in _BOOL_OP_NOT_ALLOWED: + if is_bool_dtype(a.dtype) and ( + is_bool_dtype(b) or isinstance(b, (bool, np.bool_)) + ): + op_name = op.__name__.strip("_").lstrip("r") + raise NotImplementedError( + f"operator '{op_name}' not implemented for bool dtypes" + ) diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/common.py b/.local/lib/python3.8/site-packages/pandas/core/ops/common.py new file mode 100644 index 0000000..b883fe7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/ops/common.py @@ -0,0 +1,140 @@ +""" +Boilerplate functions used in defining binary operations. +""" +from functools import wraps +from typing import Callable + +from pandas._libs.lib import item_from_zerodim +from pandas._libs.missing import is_matching_na +from pandas._typing import F + +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) + + +def unpack_zerodim_and_defer(name: str) -> Callable[[F], F]: + """ + Boilerplate for pandas conventions in arithmetic and comparison methods. + + Parameters + ---------- + name : str + + Returns + ------- + decorator + """ + + def wrapper(method: F) -> F: + return _unpack_zerodim_and_defer(method, name) + + return wrapper + + +def _unpack_zerodim_and_defer(method, name: str): + """ + Boilerplate for pandas conventions in arithmetic and comparison methods. + + Ensure method returns NotImplemented when operating against "senior" + classes. Ensure zero-dimensional ndarrays are always unpacked. + + Parameters + ---------- + method : binary method + name : str + + Returns + ------- + method + """ + is_cmp = name.strip("__") in {"eq", "ne", "lt", "le", "gt", "ge"} + + @wraps(method) + def new_method(self, other): + + if is_cmp and isinstance(self, ABCIndex) and isinstance(other, ABCSeries): + # For comparison ops, Index does *not* defer to Series + pass + else: + for cls in [ABCDataFrame, ABCSeries, ABCIndex]: + if isinstance(self, cls): + break + if isinstance(other, cls): + return NotImplemented + + other = item_from_zerodim(other) + + return method(self, other) + + return new_method + + +def get_op_result_name(left, right): + """ + Find the appropriate name to pin to an operation result. This result + should always be either an Index or a Series. + + Parameters + ---------- + left : {Series, Index} + right : object + + Returns + ------- + name : object + Usually a string + """ + if isinstance(right, (ABCSeries, ABCIndex)): + name = _maybe_match_name(left, right) + else: + name = left.name + return name + + +def _maybe_match_name(a, b): + """ + Try to find a name to attach to the result of an operation between + a and b. If only one of these has a `name` attribute, return that + name. Otherwise return a consensus name if they match or None if + they have different names. + + Parameters + ---------- + a : object + b : object + + Returns + ------- + name : str or None + + See Also + -------- + pandas.core.common.consensus_name_attr + """ + a_has = hasattr(a, "name") + b_has = hasattr(b, "name") + if a_has and b_has: + try: + if a.name == b.name: + return a.name + elif is_matching_na(a.name, b.name): + # e.g. both are np.nan + return a.name + else: + return None + except TypeError: + # pd.NA + if is_matching_na(a.name, b.name): + return a.name + return None + except ValueError: + # e.g. np.int64(1) vs (np.int64(1), np.int64(2)) + return None + elif a_has: + return a.name + elif b_has: + return b.name + return None diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/dispatch.py b/.local/lib/python3.8/site-packages/pandas/core/ops/dispatch.py new file mode 100644 index 0000000..bfd4afe --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/ops/dispatch.py @@ -0,0 +1,24 @@ +""" +Functions for defining unary operations. +""" +from typing import Any + +from pandas._typing import ArrayLike + +from pandas.core.dtypes.generic import ABCExtensionArray + + +def should_extension_dispatch(left: ArrayLike, right: Any) -> bool: + """ + Identify cases where Series operation should dispatch to ExtensionArray method. + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : object + + Returns + ------- + bool + """ + return isinstance(left, ABCExtensionArray) or isinstance(right, ABCExtensionArray) diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/docstrings.py b/.local/lib/python3.8/site-packages/pandas/core/ops/docstrings.py new file mode 100644 index 0000000..9134ec7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/ops/docstrings.py @@ -0,0 +1,749 @@ +""" +Templating for ops docstrings +""" +from __future__ import annotations + + +def make_flex_doc(op_name: str, typ: str) -> str: + """ + Make the appropriate substitutions for the given operation and class-typ + into either _flex_doc_SERIES or _flex_doc_FRAME to return the docstring + to attach to a generated method. + + Parameters + ---------- + op_name : str {'__add__', '__sub__', ... '__eq__', '__ne__', ...} + typ : str {series, 'dataframe']} + + Returns + ------- + doc : str + """ + op_name = op_name.replace("__", "") + op_desc = _op_descriptions[op_name] + + op_desc_op = op_desc["op"] + assert op_desc_op is not None # for mypy + if op_name.startswith("r"): + equiv = "other " + op_desc_op + " " + typ + elif op_name == "divmod": + equiv = f"{op_name}({typ}, other)" + else: + equiv = typ + " " + op_desc_op + " other" + + if typ == "series": + base_doc = _flex_doc_SERIES + if op_desc["reverse"]: + base_doc += _see_also_reverse_SERIES.format( + reverse=op_desc["reverse"], see_also_desc=op_desc["see_also_desc"] + ) + doc_no_examples = base_doc.format( + desc=op_desc["desc"], + op_name=op_name, + equiv=equiv, + series_returns=op_desc["series_returns"], + ) + ser_example = op_desc["series_examples"] + if ser_example: + doc = doc_no_examples + ser_example + else: + doc = doc_no_examples + elif typ == "dataframe": + base_doc = _flex_doc_FRAME + doc = base_doc.format( + desc=op_desc["desc"], + op_name=op_name, + equiv=equiv, + reverse=op_desc["reverse"], + ) + else: + raise AssertionError("Invalid typ argument.") + return doc + + +_common_examples_algebra_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64""" + +_common_examples_comparison_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan, 1], index=['a', 'b', 'c', 'd', 'e']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +e 1.0 +dtype: float64 +>>> b = pd.Series([0, 1, 2, np.nan, 1], index=['a', 'b', 'c', 'd', 'f']) +>>> b +a 0.0 +b 1.0 +c 2.0 +d NaN +f 1.0 +dtype: float64""" + +_add_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.add(b, fill_value=0) +a 2.0 +b 1.0 +c 1.0 +d 1.0 +e NaN +dtype: float64 +""" +) + +_sub_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.subtract(b, fill_value=0) +a 0.0 +b 1.0 +c 1.0 +d -1.0 +e NaN +dtype: float64 +""" +) + +_mul_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.multiply(b, fill_value=0) +a 1.0 +b 0.0 +c 0.0 +d 0.0 +e NaN +dtype: float64 +""" +) + +_div_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.divide(b, fill_value=0) +a 1.0 +b inf +c inf +d 0.0 +e NaN +dtype: float64 +""" +) + +_floordiv_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.floordiv(b, fill_value=0) +a 1.0 +b NaN +c NaN +d 0.0 +e NaN +dtype: float64 +""" +) + +_divmod_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.divmod(b, fill_value=0) +(a 1.0 + b NaN + c NaN + d 0.0 + e NaN + dtype: float64, + a 0.0 + b NaN + c NaN + d 0.0 + e NaN + dtype: float64) +""" +) + +_mod_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.mod(b, fill_value=0) +a 0.0 +b NaN +c NaN +d 0.0 +e NaN +dtype: float64 +""" +) +_pow_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.pow(b, fill_value=0) +a 1.0 +b 1.0 +c 1.0 +d 0.0 +e NaN +dtype: float64 +""" +) + +_ne_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.ne(b, fill_value=0) +a False +b True +c True +d True +e True +dtype: bool +""" +) + +_eq_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.eq(b, fill_value=0) +a True +b False +c False +d False +e False +dtype: bool +""" +) + +_lt_example_SERIES = ( + _common_examples_comparison_SERIES + + """ +>>> a.lt(b, fill_value=0) +a False +b False +c True +d False +e False +f True +dtype: bool +""" +) + +_le_example_SERIES = ( + _common_examples_comparison_SERIES + + """ +>>> a.le(b, fill_value=0) +a False +b True +c True +d False +e False +f True +dtype: bool +""" +) + +_gt_example_SERIES = ( + _common_examples_comparison_SERIES + + """ +>>> a.gt(b, fill_value=0) +a True +b False +c False +d False +e True +f False +dtype: bool +""" +) + +_ge_example_SERIES = ( + _common_examples_comparison_SERIES + + """ +>>> a.ge(b, fill_value=0) +a True +b True +c False +d False +e True +f False +dtype: bool +""" +) + +_returns_series = """Series\n The result of the operation.""" + +_returns_tuple = """2-Tuple of Series\n The result of the operation.""" + +_op_descriptions: dict[str, dict[str, str | None]] = { + # Arithmetic Operators + "add": { + "op": "+", + "desc": "Addition", + "reverse": "radd", + "series_examples": _add_example_SERIES, + "series_returns": _returns_series, + }, + "sub": { + "op": "-", + "desc": "Subtraction", + "reverse": "rsub", + "series_examples": _sub_example_SERIES, + "series_returns": _returns_series, + }, + "mul": { + "op": "*", + "desc": "Multiplication", + "reverse": "rmul", + "series_examples": _mul_example_SERIES, + "series_returns": _returns_series, + "df_examples": None, + }, + "mod": { + "op": "%", + "desc": "Modulo", + "reverse": "rmod", + "series_examples": _mod_example_SERIES, + "series_returns": _returns_series, + }, + "pow": { + "op": "**", + "desc": "Exponential power", + "reverse": "rpow", + "series_examples": _pow_example_SERIES, + "series_returns": _returns_series, + "df_examples": None, + }, + "truediv": { + "op": "/", + "desc": "Floating division", + "reverse": "rtruediv", + "series_examples": _div_example_SERIES, + "series_returns": _returns_series, + "df_examples": None, + }, + "floordiv": { + "op": "//", + "desc": "Integer division", + "reverse": "rfloordiv", + "series_examples": _floordiv_example_SERIES, + "series_returns": _returns_series, + "df_examples": None, + }, + "divmod": { + "op": "divmod", + "desc": "Integer division and modulo", + "reverse": "rdivmod", + "series_examples": _divmod_example_SERIES, + "series_returns": _returns_tuple, + "df_examples": None, + }, + # Comparison Operators + "eq": { + "op": "==", + "desc": "Equal to", + "reverse": None, + "series_examples": _eq_example_SERIES, + "series_returns": _returns_series, + }, + "ne": { + "op": "!=", + "desc": "Not equal to", + "reverse": None, + "series_examples": _ne_example_SERIES, + "series_returns": _returns_series, + }, + "lt": { + "op": "<", + "desc": "Less than", + "reverse": None, + "series_examples": _lt_example_SERIES, + "series_returns": _returns_series, + }, + "le": { + "op": "<=", + "desc": "Less than or equal to", + "reverse": None, + "series_examples": _le_example_SERIES, + "series_returns": _returns_series, + }, + "gt": { + "op": ">", + "desc": "Greater than", + "reverse": None, + "series_examples": _gt_example_SERIES, + "series_returns": _returns_series, + }, + "ge": { + "op": ">=", + "desc": "Greater than or equal to", + "reverse": None, + "series_examples": _ge_example_SERIES, + "series_returns": _returns_series, + }, +} + +_py_num_ref = """see + `Python documentation + `_ + for more details""" +_op_names = list(_op_descriptions.keys()) +for key in _op_names: + reverse_op = _op_descriptions[key]["reverse"] + if reverse_op is not None: + _op_descriptions[reverse_op] = _op_descriptions[key].copy() + _op_descriptions[reverse_op]["reverse"] = key + _op_descriptions[key][ + "see_also_desc" + ] = f"Reverse of the {_op_descriptions[key]['desc']} operator, {_py_num_ref}" + _op_descriptions[reverse_op][ + "see_also_desc" + ] = f"Element-wise {_op_descriptions[key]['desc']}, {_py_num_ref}" + +_flex_doc_SERIES = """ +Return {desc} of series and other, element-wise (binary operator `{op_name}`). + +Equivalent to ``{equiv}``, but with support to substitute a fill_value for +missing data in either one of the inputs. + +Parameters +---------- +other : Series or scalar value +fill_value : None or float value, default None (NaN) + Fill existing missing (NaN) values, and any new element needed for + successful Series alignment, with this value before computation. + If data in both corresponding Series locations is missing + the result of filling (at that location) will be missing. +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level. + +Returns +------- +{series_returns} +""" + +_see_also_reverse_SERIES = """ +See Also +-------- +Series.{reverse} : {see_also_desc}. +""" + +_flex_doc_FRAME = """ +Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`). + +Equivalent to ``{equiv}``, but with support to substitute a fill_value +for missing data in one of the inputs. With reverse version, `{reverse}`. + +Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to +arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + +Parameters +---------- +other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. +axis : {{0 or 'index', 1 or 'columns'}} + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). For Series input, axis to match Series index on. +level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. +fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + +Returns +------- +DataFrame + Result of the arithmetic operation. + +See Also +-------- +DataFrame.add : Add DataFrames. +DataFrame.sub : Subtract DataFrames. +DataFrame.mul : Multiply DataFrames. +DataFrame.div : Divide DataFrames (float division). +DataFrame.truediv : Divide DataFrames (float division). +DataFrame.floordiv : Divide DataFrames (integer division). +DataFrame.mod : Calculate modulo (remainder after division). +DataFrame.pow : Calculate exponential power. + +Notes +----- +Mismatched indices will be unioned together. + +Examples +-------- +>>> df = pd.DataFrame({{'angles': [0, 3, 4], +... 'degrees': [360, 180, 360]}}, +... index=['circle', 'triangle', 'rectangle']) +>>> df + angles degrees +circle 0 360 +triangle 3 180 +rectangle 4 360 + +Add a scalar with operator version which return the same +results. + +>>> df + 1 + angles degrees +circle 1 361 +triangle 4 181 +rectangle 5 361 + +>>> df.add(1) + angles degrees +circle 1 361 +triangle 4 181 +rectangle 5 361 + +Divide by constant with reverse version. + +>>> df.div(10) + angles degrees +circle 0.0 36.0 +triangle 0.3 18.0 +rectangle 0.4 36.0 + +>>> df.rdiv(10) + angles degrees +circle inf 0.027778 +triangle 3.333333 0.055556 +rectangle 2.500000 0.027778 + +Subtract a list and Series by axis with operator version. + +>>> df - [1, 2] + angles degrees +circle -1 358 +triangle 2 178 +rectangle 3 358 + +>>> df.sub([1, 2], axis='columns') + angles degrees +circle -1 358 +triangle 2 178 +rectangle 3 358 + +>>> df.sub(pd.Series([1, 1, 1], index=['circle', 'triangle', 'rectangle']), +... axis='index') + angles degrees +circle -1 359 +triangle 2 179 +rectangle 3 359 + +Multiply a DataFrame of different shape with operator version. + +>>> other = pd.DataFrame({{'angles': [0, 3, 4]}}, +... index=['circle', 'triangle', 'rectangle']) +>>> other + angles +circle 0 +triangle 3 +rectangle 4 + +>>> df * other + angles degrees +circle 0 NaN +triangle 9 NaN +rectangle 16 NaN + +>>> df.mul(other, fill_value=0) + angles degrees +circle 0 0.0 +triangle 9 0.0 +rectangle 16 0.0 + +Divide by a MultiIndex by level. + +>>> df_multindex = pd.DataFrame({{'angles': [0, 3, 4, 4, 5, 6], +... 'degrees': [360, 180, 360, 360, 540, 720]}}, +... index=[['A', 'A', 'A', 'B', 'B', 'B'], +... ['circle', 'triangle', 'rectangle', +... 'square', 'pentagon', 'hexagon']]) +>>> df_multindex + angles degrees +A circle 0 360 + triangle 3 180 + rectangle 4 360 +B square 4 360 + pentagon 5 540 + hexagon 6 720 + +>>> df.div(df_multindex, level=1, fill_value=0) + angles degrees +A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 +B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 +""" + +_flex_comp_doc_FRAME = """ +Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`). + +Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison +operators. + +Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis +(rows or columns) and level for comparison. + +Parameters +---------- +other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. +axis : {{0 or 'index', 1 or 'columns'}}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). +level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. + +Returns +------- +DataFrame of bool + Result of the comparison. + +See Also +-------- +DataFrame.eq : Compare DataFrames for equality elementwise. +DataFrame.ne : Compare DataFrames for inequality elementwise. +DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. +DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. +DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. +DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. + +Notes +----- +Mismatched indices will be unioned together. +`NaN` values are considered different (i.e. `NaN` != `NaN`). + +Examples +-------- +>>> df = pd.DataFrame({{'cost': [250, 150, 100], +... 'revenue': [100, 250, 300]}}, +... index=['A', 'B', 'C']) +>>> df + cost revenue +A 250 100 +B 150 250 +C 100 300 + +Comparison with a scalar, using either the operator or method: + +>>> df == 100 + cost revenue +A False True +B False False +C True False + +>>> df.eq(100) + cost revenue +A False True +B False False +C True False + +When `other` is a :class:`Series`, the columns of a DataFrame are aligned +with the index of `other` and broadcast: + +>>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue +A True True +B True False +C False True + +Use the method to control the broadcast axis: + +>>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis='index') + cost revenue +A True False +B True True +C True True +D True True + +When comparing to an arbitrary sequence, the number of columns must +match the number elements in `other`: + +>>> df == [250, 100] + cost revenue +A True True +B False False +C False False + +Use the method to control the axis: + +>>> df.eq([250, 250, 100], axis='index') + cost revenue +A True False +B False True +C True False + +Compare to a DataFrame of different shape. + +>>> other = pd.DataFrame({{'revenue': [300, 250, 100, 150]}}, +... index=['A', 'B', 'C', 'D']) +>>> other + revenue +A 300 +B 250 +C 100 +D 150 + +>>> df.gt(other) + cost revenue +A False False +B False False +C False True +D False False + +Compare to a MultiIndex by level. + +>>> df_multindex = pd.DataFrame({{'cost': [250, 150, 100, 150, 300, 220], +... 'revenue': [100, 250, 300, 200, 175, 225]}}, +... index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'], +... ['A', 'B', 'C', 'A', 'B', 'C']]) +>>> df_multindex + cost revenue +Q1 A 250 100 + B 150 250 + C 100 300 +Q2 A 150 200 + B 300 175 + C 220 225 + +>>> df.le(df_multindex, level=1) + cost revenue +Q1 A True True + B True True + C True True +Q2 A False True + B True False + C True False +""" diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/invalid.py b/.local/lib/python3.8/site-packages/pandas/core/ops/invalid.py new file mode 100644 index 0000000..cc4a1f1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/ops/invalid.py @@ -0,0 +1,56 @@ +""" +Templates for invalid operations. +""" +import operator + +import numpy as np + + +def invalid_comparison(left, right, op): + """ + If a comparison has mismatched types and is not necessarily meaningful, + follow python3 conventions by: + + - returning all-False for equality + - returning all-True for inequality + - raising TypeError otherwise + + Parameters + ---------- + left : array-like + right : scalar, array-like + op : operator.{eq, ne, lt, le, gt} + + Raises + ------ + TypeError : on inequality comparisons + """ + if op is operator.eq: + res_values = np.zeros(left.shape, dtype=bool) + elif op is operator.ne: + res_values = np.ones(left.shape, dtype=bool) + else: + typ = type(right).__name__ + raise TypeError(f"Invalid comparison between dtype={left.dtype} and {typ}") + return res_values + + +def make_invalid_op(name: str): + """ + Return a binary method that always raises a TypeError. + + Parameters + ---------- + name : str + + Returns + ------- + invalid_op : function + """ + + def invalid_op(self, other=None): + typ = type(self).__name__ + raise TypeError(f"cannot perform {name} with this index type: {typ}") + + invalid_op.__name__ = name + return invalid_op diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/mask_ops.py b/.local/lib/python3.8/site-packages/pandas/core/ops/mask_ops.py new file mode 100644 index 0000000..57bacba --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/ops/mask_ops.py @@ -0,0 +1,189 @@ +""" +Ops for masked arrays. +""" +from __future__ import annotations + +import numpy as np + +from pandas._libs import ( + lib, + missing as libmissing, +) + + +def kleene_or( + left: bool | np.ndarray | libmissing.NAType, + right: bool | np.ndarray | libmissing.NAType, + left_mask: np.ndarray | None, + right_mask: np.ndarray | None, +): + """ + Boolean ``or`` using Kleene logic. + + Values are NA where we have ``NA | NA`` or ``NA | False``. + ``NA | True`` is considered True. + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. Only one of these may be None, which implies that + the associated `left` or `right` value is a scalar. + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical or, and the new mask. + """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since + # A | B == B | A + if left_mask is None: + return kleene_or(right, left, right_mask, left_mask) + + if not isinstance(left, np.ndarray): + raise TypeError("Either `left` or `right` need to be a np.ndarray.") + + raise_for_nan(right, method="or") + + if right is libmissing.NA: + result = left.copy() + else: + result = left | right + + if right_mask is not None: + # output is unknown where (False & NA), (NA & False), (NA & NA) + left_false = ~(left | left_mask) + right_false = ~(right | right_mask) + mask = ( + (left_false & right_mask) + | (right_false & left_mask) + | (left_mask & right_mask) + ) + else: + if right is True: + mask = np.zeros_like(left_mask) + elif right is libmissing.NA: + mask = (~left & ~left_mask) | left_mask + else: + # False + mask = left_mask.copy() + + return result, mask + + +def kleene_xor( + left: bool | np.ndarray | libmissing.NAType, + right: bool | np.ndarray | libmissing.NAType, + left_mask: np.ndarray | None, + right_mask: np.ndarray | None, +): + """ + Boolean ``xor`` using Kleene logic. + + This is the same as ``or``, with the following adjustments + + * True, True -> False + * True, NA -> NA + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. Only one of these may be None, which implies that + the associated `left` or `right` value is a scalar. + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical xor, and the new mask. + """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since + # A ^ B == B ^ A + if left_mask is None: + return kleene_xor(right, left, right_mask, left_mask) + + if not isinstance(left, np.ndarray): + raise TypeError("Either `left` or `right` need to be a np.ndarray.") + + raise_for_nan(right, method="xor") + if right is libmissing.NA: + result = np.zeros_like(left) + else: + result = left ^ right + + if right_mask is None: + if right is libmissing.NA: + mask = np.ones_like(left_mask) + else: + mask = left_mask.copy() + else: + mask = left_mask | right_mask + + return result, mask + + +def kleene_and( + left: bool | libmissing.NAType | np.ndarray, + right: bool | libmissing.NAType | np.ndarray, + left_mask: np.ndarray | None, + right_mask: np.ndarray | None, +): + """ + Boolean ``and`` using Kleene logic. + + Values are ``NA`` for ``NA & NA`` or ``True & NA``. + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. Only one of these may be None, which implies that + the associated `left` or `right` value is a scalar. + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical xor, and the new mask. + """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since + # A & B == B & A + if left_mask is None: + return kleene_and(right, left, right_mask, left_mask) + + if not isinstance(left, np.ndarray): + raise TypeError("Either `left` or `right` need to be a np.ndarray.") + raise_for_nan(right, method="and") + + if right is libmissing.NA: + result = np.zeros_like(left) + else: + result = left & right + + if right_mask is None: + # Scalar `right` + if right is libmissing.NA: + mask = (left & ~left_mask) | left_mask + + else: + mask = left_mask.copy() + if right is False: + # unmask everything + mask[:] = False + else: + # unmask where either left or right is False + left_false = ~(left | left_mask) + right_false = ~(right | right_mask) + mask = (left_mask & ~right_false) | (right_mask & ~left_false) + + return result, mask + + +def raise_for_nan(value, method: str): + if lib.is_float(value) and np.isnan(value): + raise ValueError(f"Cannot perform logical '{method}' with floating NaN") diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/methods.py b/.local/lib/python3.8/site-packages/pandas/core/ops/methods.py new file mode 100644 index 0000000..df22919 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/ops/methods.py @@ -0,0 +1,122 @@ +""" +Functions to generate methods and pin them to the appropriate classes. +""" +import operator + +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +from pandas.core.ops import roperator + + +def _get_method_wrappers(cls): + """ + Find the appropriate operation-wrappers to use when defining flex/special + arithmetic, boolean, and comparison operations with the given class. + + Parameters + ---------- + cls : class + + Returns + ------- + arith_flex : function or None + comp_flex : function or None + """ + # TODO: make these non-runtime imports once the relevant functions + # are no longer in __init__ + from pandas.core.ops import ( + flex_arith_method_FRAME, + flex_comp_method_FRAME, + flex_method_SERIES, + ) + + if issubclass(cls, ABCSeries): + # Just Series + arith_flex = flex_method_SERIES + comp_flex = flex_method_SERIES + elif issubclass(cls, ABCDataFrame): + arith_flex = flex_arith_method_FRAME + comp_flex = flex_comp_method_FRAME + return arith_flex, comp_flex + + +def add_flex_arithmetic_methods(cls): + """ + Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) + to the class. + + Parameters + ---------- + cls : class + flex methods will be defined and pinned to this class + """ + flex_arith_method, flex_comp_method = _get_method_wrappers(cls) + new_methods = _create_methods(cls, flex_arith_method, flex_comp_method) + new_methods.update( + { + "multiply": new_methods["mul"], + "subtract": new_methods["sub"], + "divide": new_methods["div"], + } + ) + # opt out of bool flex methods for now + assert not any(kname in new_methods for kname in ("ror_", "rxor", "rand_")) + + _add_methods(cls, new_methods=new_methods) + + +def _create_methods(cls, arith_method, comp_method): + # creates actual flex methods based upon arithmetic, and comp method + # constructors. + + have_divmod = issubclass(cls, ABCSeries) + # divmod is available for Series + + new_methods = {} + + new_methods.update( + { + "add": arith_method(operator.add), + "radd": arith_method(roperator.radd), + "sub": arith_method(operator.sub), + "mul": arith_method(operator.mul), + "truediv": arith_method(operator.truediv), + "floordiv": arith_method(operator.floordiv), + "mod": arith_method(operator.mod), + "pow": arith_method(operator.pow), + "rmul": arith_method(roperator.rmul), + "rsub": arith_method(roperator.rsub), + "rtruediv": arith_method(roperator.rtruediv), + "rfloordiv": arith_method(roperator.rfloordiv), + "rpow": arith_method(roperator.rpow), + "rmod": arith_method(roperator.rmod), + } + ) + new_methods["div"] = new_methods["truediv"] + new_methods["rdiv"] = new_methods["rtruediv"] + if have_divmod: + # divmod doesn't have an op that is supported by numexpr + new_methods["divmod"] = arith_method(divmod) + new_methods["rdivmod"] = arith_method(roperator.rdivmod) + + new_methods.update( + { + "eq": comp_method(operator.eq), + "ne": comp_method(operator.ne), + "lt": comp_method(operator.lt), + "gt": comp_method(operator.gt), + "le": comp_method(operator.le), + "ge": comp_method(operator.ge), + } + ) + + new_methods = {k.strip("_"): v for k, v in new_methods.items()} + return new_methods + + +def _add_methods(cls, new_methods): + for name, method in new_methods.items(): + setattr(cls, name, method) diff --git a/.local/lib/python3.8/site-packages/pandas/core/ops/missing.py b/.local/lib/python3.8/site-packages/pandas/core/ops/missing.py new file mode 100644 index 0000000..8d5f7fb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/ops/missing.py @@ -0,0 +1,181 @@ +""" +Missing data handling for arithmetic operations. + +In particular, pandas conventions regarding division by zero differ +from numpy in the following ways: + 1) np.array([-1, 0, 1], dtype=dtype1) // np.array([0, 0, 0], dtype=dtype2) + gives [nan, nan, nan] for most dtype combinations, and [0, 0, 0] for + the remaining pairs + (the remaining being dtype1==dtype2==intN and dtype==dtype2==uintN). + + pandas convention is to return [-inf, nan, inf] for all dtype + combinations. + + Note: the numpy behavior described here is py3-specific. + + 2) np.array([-1, 0, 1], dtype=dtype1) % np.array([0, 0, 0], dtype=dtype2) + gives precisely the same results as the // operation. + + pandas convention is to return [nan, nan, nan] for all dtype + combinations. + + 3) divmod behavior consistent with 1) and 2). +""" +import operator + +import numpy as np + +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, + is_scalar, +) + +from pandas.core.ops import roperator + + +def _fill_zeros(result, x, y): + """ + If this is a reversed op, then flip x,y + + If we have an integer value (or array in y) + and we have 0's, fill them with np.nan, + return the result. + + Mask the nan's from x. + """ + if is_float_dtype(result.dtype): + return result + + is_variable_type = hasattr(y, "dtype") + is_scalar_type = is_scalar(y) + + if not is_variable_type and not is_scalar_type: + return result + + if is_scalar_type: + y = np.array(y) + + if is_integer_dtype(y.dtype): + + ymask = y == 0 + if ymask.any(): + + # GH#7325, mask and nans must be broadcastable + mask = ymask & ~np.isnan(result) + + # GH#9308 doing ravel on result and mask can improve putmask perf, + # but can also make unwanted copies. + result = result.astype("float64", copy=False) + + np.putmask(result, mask, np.nan) + + return result + + +def mask_zero_div_zero(x, y, result: np.ndarray) -> np.ndarray: + """ + Set results of 0 // 0 to np.nan, regardless of the dtypes + of the numerator or the denominator. + + Parameters + ---------- + x : ndarray + y : ndarray + result : ndarray + + Returns + ------- + ndarray + The filled result. + + Examples + -------- + >>> x = np.array([1, 0, -1], dtype=np.int64) + >>> x + array([ 1, 0, -1]) + >>> y = 0 # int 0; numpy behavior is different with float + >>> result = x // y + >>> result # raw numpy result does not fill division by zero + array([0, 0, 0]) + >>> mask_zero_div_zero(x, y, result) + array([ inf, nan, -inf]) + """ + + if not hasattr(y, "dtype"): + # e.g. scalar, tuple + y = np.array(y) + if not hasattr(x, "dtype"): + # e.g scalar, tuple + x = np.array(x) + + zmask = y == 0 + + if zmask.any(): + + # Flip sign if necessary for -0.0 + zneg_mask = zmask & np.signbit(y) + zpos_mask = zmask & ~zneg_mask + + x_lt0 = x < 0 + x_gt0 = x > 0 + nan_mask = zmask & (x == 0) + with np.errstate(invalid="ignore"): + neginf_mask = (zpos_mask & x_lt0) | (zneg_mask & x_gt0) + posinf_mask = (zpos_mask & x_gt0) | (zneg_mask & x_lt0) + + if nan_mask.any() or neginf_mask.any() or posinf_mask.any(): + # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN + result = result.astype("float64", copy=False) + + result[nan_mask] = np.nan + result[posinf_mask] = np.inf + result[neginf_mask] = -np.inf + + return result + + +def dispatch_fill_zeros(op, left, right, result): + """ + Call _fill_zeros with the appropriate fill value depending on the operation, + with special logic for divmod and rdivmod. + + Parameters + ---------- + op : function (operator.add, operator.div, ...) + left : object (np.ndarray for non-reversed ops) + right : object (np.ndarray for reversed ops) + result : ndarray + + Returns + ------- + result : np.ndarray + + Notes + ----- + For divmod and rdivmod, the `result` parameter and returned `result` + is a 2-tuple of ndarray objects. + """ + if op is divmod: + result = ( + mask_zero_div_zero(left, right, result[0]), + _fill_zeros(result[1], left, right), + ) + elif op is roperator.rdivmod: + result = ( + mask_zero_div_zero(right, left, result[0]), + _fill_zeros(result[1], right, left), + ) + elif op is operator.floordiv: + # Note: no need to do this for truediv; in py3 numpy behaves the way + # we want. + result = mask_zero_div_zero(left, right, result) + elif op is roperator.rfloordiv: + # Note: no need to do this for rtruediv; in py3 numpy behaves the way + # we want. + result = mask_zero_div_zero(right, left, result) + elif op is operator.mod: + result = _fill_zeros(result, left, right) + elif op is roperator.rmod: + result = _fill_zeros(result, right, left) + return result diff --git a/.local/lib/python3.8/site-packages/pandas/core/resample.py b/.local/lib/python3.8/site-packages/pandas/core/resample.py new file mode 100644 index 0000000..e0b9eac --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/resample.py @@ -0,0 +1,2130 @@ +from __future__ import annotations + +import copy +from datetime import timedelta +from textwrap import dedent +from typing import ( + TYPE_CHECKING, + Callable, + Hashable, + Literal, + final, + no_type_check, +) +import warnings + +import numpy as np + +from pandas._libs import lib +from pandas._libs.tslibs import ( + BaseOffset, + IncompatibleFrequency, + NaT, + Period, + Timedelta, + Timestamp, + to_offset, +) +from pandas._typing import ( + IndexLabel, + NDFrameT, + T, + TimedeltaConvertibleTypes, + TimestampConvertibleTypes, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError +from pandas.util._decorators import ( + Appender, + Substitution, + deprecate_nonkeyword_arguments, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +import pandas.core.algorithms as algos +from pandas.core.apply import ResamplerWindowApply +from pandas.core.base import ( + DataError, + PandasObject, +) +import pandas.core.common as com +from pandas.core.generic import ( + NDFrame, + _shared_docs, +) +from pandas.core.groupby.generic import SeriesGroupBy +from pandas.core.groupby.groupby import ( + BaseGroupBy, + GroupBy, + _pipe_template, + get_groupby, +) +from pandas.core.groupby.grouper import Grouper +from pandas.core.groupby.ops import BinGrouper +from pandas.core.indexes.api import Index +from pandas.core.indexes.datetimes import ( + DatetimeIndex, + date_range, +) +from pandas.core.indexes.period import ( + PeriodIndex, + period_range, +) +from pandas.core.indexes.timedeltas import ( + TimedeltaIndex, + timedelta_range, +) + +from pandas.tseries.frequencies import ( + is_subperiod, + is_superperiod, +) +from pandas.tseries.offsets import ( + DateOffset, + Day, + Nano, + Tick, +) + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + +_shared_docs_kwargs: dict[str, str] = {} + + +class Resampler(BaseGroupBy, PandasObject): + """ + Class for resampling datetimelike data, a groupby-like operation. + See aggregate, transform, and apply functions on this object. + + It's easiest to use obj.resample(...) to use Resampler. + + Parameters + ---------- + obj : Series or DataFrame + groupby : TimeGrouper + axis : int, default 0 + kind : str or None + 'period', 'timestamp' to override default index treatment + + Returns + ------- + a Resampler of the appropriate type + + Notes + ----- + After resampling, see aggregate, apply, and transform functions. + """ + + grouper: BinGrouper + exclusions: frozenset[Hashable] = frozenset() # for SelectionMixin compat + + # to the groupby descriptor + _attributes = [ + "freq", + "axis", + "closed", + "label", + "convention", + "loffset", + "kind", + "origin", + "offset", + ] + + def __init__( + self, + obj: DataFrame | Series, + groupby: TimeGrouper, + axis: int = 0, + kind=None, + *, + selection=None, + **kwargs, + ): + self.groupby = groupby + self.keys = None + self.sort = True + self.axis = axis + self.kind = kind + self.squeeze = False + self.group_keys = True + self.as_index = True + + self.groupby._set_grouper(self._convert_obj(obj), sort=True) + self.binner, self.grouper = self._get_binner() + self._selection = selection + + @final + def _shallow_copy(self, obj, **kwargs): + """ + return a new object with the replacement attributes + """ + if isinstance(obj, self._constructor): + obj = obj.obj + for attr in self._attributes: + if attr not in kwargs: + kwargs[attr] = getattr(self, attr) + return self._constructor(obj, **kwargs) + + def __str__(self) -> str: + """ + Provide a nice str repr of our rolling object. + """ + attrs = ( + f"{k}={getattr(self.groupby, k)}" + for k in self._attributes + if getattr(self.groupby, k, None) is not None + ) + return f"{type(self).__name__} [{', '.join(attrs)}]" + + def __getattr__(self, attr: str): + if attr in self._internal_names_set: + return object.__getattribute__(self, attr) + if attr in self._attributes: + return getattr(self.groupby, attr) + if attr in self.obj: + return self[attr] + + return object.__getattribute__(self, attr) + + # error: Signature of "obj" incompatible with supertype "BaseGroupBy" + @property + def obj(self) -> NDFrameT: # type: ignore[override] + # error: Incompatible return value type (got "Optional[Any]", + # expected "NDFrameT") + return self.groupby.obj # type: ignore[return-value] + + @property + def ax(self): + # we can infer that this is a PeriodIndex/DatetimeIndex/TimedeltaIndex, + # but skipping annotating bc the overrides overwhelming + return self.groupby.ax + + @property + def _from_selection(self) -> bool: + """ + Is the resampling from a DataFrame column or MultiIndex level. + """ + # upsampling and PeriodIndex resampling do not work + # with selection, this state used to catch and raise an error + return self.groupby is not None and ( + self.groupby.key is not None or self.groupby.level is not None + ) + + def _convert_obj(self, obj: NDFrameT) -> NDFrameT: + """ + Provide any conversions for the object in order to correctly handle. + + Parameters + ---------- + obj : Series or DataFrame + + Returns + ------- + Series or DataFrame + """ + return obj._consolidate() + + def _get_binner_for_time(self): + raise AbstractMethodError(self) + + @final + def _get_binner(self): + """ + Create the BinGrouper, assume that self.set_grouper(obj) + has already been called. + """ + binner, bins, binlabels = self._get_binner_for_time() + assert len(bins) == len(binlabels) + bin_grouper = BinGrouper(bins, binlabels, indexer=self.groupby.indexer) + return binner, bin_grouper + + @Substitution( + klass="Resampler", + examples=""" + >>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, + ... index=pd.date_range('2012-08-02', periods=4)) + >>> df + A + 2012-08-02 1 + 2012-08-03 2 + 2012-08-04 3 + 2012-08-05 4 + + To get the difference between each 2-day period's maximum and minimum + value in one pass, you can do + + >>> df.resample('2D').pipe(lambda x: x.max() - x.min()) + A + 2012-08-02 1 + 2012-08-04 1""", + ) + @Appender(_pipe_template) + def pipe( + self, + func: Callable[..., T] | tuple[Callable[..., T], str], + *args, + **kwargs, + ) -> T: + return super().pipe(func, *args, **kwargs) + + _agg_see_also_doc = dedent( + """ + See Also + -------- + DataFrame.groupby.aggregate : Aggregate using callable, string, dict, + or list of string/callables. + DataFrame.resample.transform : Transforms the Series on each group + based on the given function. + DataFrame.aggregate: Aggregate using one or more + operations over the specified axis. + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4, 5], + ... index=pd.date_range('20130101', periods=5, freq='s')) + >>> s + 2013-01-01 00:00:00 1 + 2013-01-01 00:00:01 2 + 2013-01-01 00:00:02 3 + 2013-01-01 00:00:03 4 + 2013-01-01 00:00:04 5 + Freq: S, dtype: int64 + + >>> r = s.resample('2s') + + >>> r.agg(np.sum) + 2013-01-01 00:00:00 3 + 2013-01-01 00:00:02 7 + 2013-01-01 00:00:04 5 + Freq: 2S, dtype: int64 + + >>> r.agg(['sum', 'mean', 'max']) + sum mean max + 2013-01-01 00:00:00 3 1.5 2 + 2013-01-01 00:00:02 7 3.5 4 + 2013-01-01 00:00:04 5 5.0 5 + + >>> r.agg({'result': lambda x: x.mean() / x.std(), + ... 'total': np.sum}) + result total + 2013-01-01 00:00:00 2.121320 3 + 2013-01-01 00:00:02 4.949747 7 + 2013-01-01 00:00:04 NaN 5 + + >>> r.agg(average="mean", total="sum") + average total + 2013-01-01 00:00:00 1.5 3 + 2013-01-01 00:00:02 3.5 7 + 2013-01-01 00:00:04 5.0 5 + """ + ) + + @doc( + _shared_docs["aggregate"], + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + klass="DataFrame", + axis="", + ) + def aggregate(self, func=None, *args, **kwargs): + + result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() + if result is None: + how = func + result = self._groupby_and_aggregate(how, *args, **kwargs) + + result = self._apply_loffset(result) + return result + + agg = aggregate + apply = aggregate + + def transform(self, arg, *args, **kwargs): + """ + Call function producing a like-indexed Series on each group and return + a Series with the transformed values. + + Parameters + ---------- + arg : function + To apply to each group. Should return a Series with the same index. + + Returns + ------- + transformed : Series + + Examples + -------- + >>> s = pd.Series([1, 2], + ... index=pd.date_range('20180101', + ... periods=2, + ... freq='1h')) + >>> s + 2018-01-01 00:00:00 1 + 2018-01-01 01:00:00 2 + Freq: H, dtype: int64 + + >>> resampled = s.resample('15min') + >>> resampled.transform(lambda x: (x - x.mean()) / x.std()) + 2018-01-01 00:00:00 NaN + 2018-01-01 01:00:00 NaN + Freq: H, dtype: float64 + """ + return self._selected_obj.groupby(self.groupby).transform(arg, *args, **kwargs) + + def _downsample(self, f): + raise AbstractMethodError(self) + + def _upsample(self, f, limit=None, fill_value=None): + raise AbstractMethodError(self) + + def _gotitem(self, key, ndim: int, subset=None): + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : string / list of selections + ndim : {1, 2} + requested ndim of result + subset : object, default None + subset to act on + """ + grouper = self.grouper + if subset is None: + subset = self.obj + grouped = get_groupby(subset, by=None, grouper=grouper, axis=self.axis) + + # try the key selection + try: + return grouped[key] + except KeyError: + return grouped + + def _groupby_and_aggregate(self, how, *args, **kwargs): + """ + Re-evaluate the obj with a groupby aggregation. + """ + grouper = self.grouper + + obj = self._selected_obj + + grouped = get_groupby(obj, by=None, grouper=grouper, axis=self.axis) + + try: + if isinstance(obj, ABCDataFrame) and callable(how): + # Check if the function is reducing or not. + result = grouped._aggregate_item_by_item(how, *args, **kwargs) + else: + result = grouped.aggregate(how, *args, **kwargs) + except DataError: + # got TypeErrors on aggregation + result = grouped.apply(how, *args, **kwargs) + except (AttributeError, KeyError): + # we have a non-reducing function; try to evaluate + # alternatively we want to evaluate only a column of the input + + # test_apply_to_one_column_of_df the function being applied references + # a DataFrame column, but aggregate_item_by_item operates column-wise + # on Series, raising AttributeError or KeyError + # (depending on whether the column lookup uses getattr/__getitem__) + result = grouped.apply(how, *args, **kwargs) + + except ValueError as err: + if "Must produce aggregated value" in str(err): + # raised in _aggregate_named + # see test_apply_without_aggregation, test_apply_with_mutated_index + pass + else: + raise + + # we have a non-reducing function + # try to evaluate + result = grouped.apply(how, *args, **kwargs) + + result = self._apply_loffset(result) + return self._wrap_result(result) + + def _apply_loffset(self, result): + """ + If loffset is set, offset the result index. + + This is NOT an idempotent routine, it will be applied + exactly once to the result. + + Parameters + ---------- + result : Series or DataFrame + the result of resample + """ + # error: Cannot determine type of 'loffset' + needs_offset = ( + isinstance( + self.loffset, # type: ignore[has-type] + (DateOffset, timedelta, np.timedelta64), + ) + and isinstance(result.index, DatetimeIndex) + and len(result.index) > 0 + ) + + if needs_offset: + # error: Cannot determine type of 'loffset' + result.index = result.index + self.loffset # type: ignore[has-type] + + self.loffset = None + return result + + def _get_resampler_for_grouping(self, groupby): + """ + Return the correct class for resampling with groupby. + """ + return self._resampler_for_grouping(self, groupby=groupby) + + def _wrap_result(self, result): + """ + Potentially wrap any results. + """ + if isinstance(result, ABCSeries) and self._selection is not None: + result.name = self._selection + + if isinstance(result, ABCSeries) and result.empty: + obj = self.obj + # When index is all NaT, result is empty but index is not + result.index = _asfreq_compat(obj.index[:0], freq=self.freq) + result.name = getattr(obj, "name", None) + + return result + + def ffill(self, limit=None): + """ + Forward fill the values. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + An upsampled Series. + + See Also + -------- + Series.fillna: Fill NA/NaN values using the specified method. + DataFrame.fillna: Fill NA/NaN values using the specified method. + """ + return self._upsample("ffill", limit=limit) + + def pad(self, limit=None): + warnings.warn( + "pad is deprecated and will be removed in a future version. " + "Use ffill instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.ffill(limit=limit) + + pad.__doc__ = ffill.__doc__ + + def nearest(self, limit=None): + """ + Resample by using the nearest value. + + When resampling data, missing values may appear (e.g., when the + resampling frequency is higher than the original frequency). + The `nearest` method will replace ``NaN`` values that appeared in + the resampled data with the value from the nearest member of the + sequence, based on the index value. + Missing values that existed in the original data will not be modified. + If `limit` is given, fill only this many values in each direction for + each of the original values. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + Series or DataFrame + An upsampled Series or DataFrame with ``NaN`` values filled with + their nearest value. + + See Also + -------- + backfill : Backward fill the new missing values in the resampled data. + pad : Forward fill ``NaN`` values. + + Examples + -------- + >>> s = pd.Series([1, 2], + ... index=pd.date_range('20180101', + ... periods=2, + ... freq='1h')) + >>> s + 2018-01-01 00:00:00 1 + 2018-01-01 01:00:00 2 + Freq: H, dtype: int64 + + >>> s.resample('15min').nearest() + 2018-01-01 00:00:00 1 + 2018-01-01 00:15:00 1 + 2018-01-01 00:30:00 2 + 2018-01-01 00:45:00 2 + 2018-01-01 01:00:00 2 + Freq: 15T, dtype: int64 + + Limit the number of upsampled values imputed by the nearest: + + >>> s.resample('15min').nearest(limit=1) + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:15:00 1.0 + 2018-01-01 00:30:00 NaN + 2018-01-01 00:45:00 2.0 + 2018-01-01 01:00:00 2.0 + Freq: 15T, dtype: float64 + """ + return self._upsample("nearest", limit=limit) + + def bfill(self, limit=None): + """ + Backward fill the new missing values in the resampled data. + + In statistics, imputation is the process of replacing missing data with + substituted values [1]_. When resampling data, missing values may + appear (e.g., when the resampling frequency is higher than the original + frequency). The backward fill will replace NaN values that appeared in + the resampled data with the next value in the original sequence. + Missing values that existed in the original data will not be modified. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + Series, DataFrame + An upsampled Series or DataFrame with backward filled NaN values. + + See Also + -------- + bfill : Alias of backfill. + fillna : Fill NaN values using the specified method, which can be + 'backfill'. + nearest : Fill NaN values with nearest neighbor starting from center. + ffill : Forward fill NaN values. + Series.fillna : Fill NaN values in the Series using the + specified method, which can be 'backfill'. + DataFrame.fillna : Fill NaN values in the DataFrame using the + specified method, which can be 'backfill'. + + References + ---------- + .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics) + + Examples + -------- + Resampling a Series: + + >>> s = pd.Series([1, 2, 3], + ... index=pd.date_range('20180101', periods=3, freq='h')) + >>> s + 2018-01-01 00:00:00 1 + 2018-01-01 01:00:00 2 + 2018-01-01 02:00:00 3 + Freq: H, dtype: int64 + + >>> s.resample('30min').bfill() + 2018-01-01 00:00:00 1 + 2018-01-01 00:30:00 2 + 2018-01-01 01:00:00 2 + 2018-01-01 01:30:00 3 + 2018-01-01 02:00:00 3 + Freq: 30T, dtype: int64 + + >>> s.resample('15min').bfill(limit=2) + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:15:00 NaN + 2018-01-01 00:30:00 2.0 + 2018-01-01 00:45:00 2.0 + 2018-01-01 01:00:00 2.0 + 2018-01-01 01:15:00 NaN + 2018-01-01 01:30:00 3.0 + 2018-01-01 01:45:00 3.0 + 2018-01-01 02:00:00 3.0 + Freq: 15T, dtype: float64 + + Resampling a DataFrame that has missing values: + + >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]}, + ... index=pd.date_range('20180101', periods=3, + ... freq='h')) + >>> df + a b + 2018-01-01 00:00:00 2.0 1 + 2018-01-01 01:00:00 NaN 3 + 2018-01-01 02:00:00 6.0 5 + + >>> df.resample('30min').bfill() + a b + 2018-01-01 00:00:00 2.0 1 + 2018-01-01 00:30:00 NaN 3 + 2018-01-01 01:00:00 NaN 3 + 2018-01-01 01:30:00 6.0 5 + 2018-01-01 02:00:00 6.0 5 + + >>> df.resample('15min').bfill(limit=2) + a b + 2018-01-01 00:00:00 2.0 1.0 + 2018-01-01 00:15:00 NaN NaN + 2018-01-01 00:30:00 NaN 3.0 + 2018-01-01 00:45:00 NaN 3.0 + 2018-01-01 01:00:00 NaN 3.0 + 2018-01-01 01:15:00 NaN NaN + 2018-01-01 01:30:00 6.0 5.0 + 2018-01-01 01:45:00 6.0 5.0 + 2018-01-01 02:00:00 6.0 5.0 + """ + return self._upsample("bfill", limit=limit) + + def backfill(self, limit=None): + warnings.warn( + "backfill is deprecated and will be removed in a future version. " + "Use bfill instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.bfill(limit=limit) + + backfill.__doc__ = bfill.__doc__ + + def fillna(self, method, limit=None): + """ + Fill missing values introduced by upsampling. + + In statistics, imputation is the process of replacing missing data with + substituted values [1]_. When resampling data, missing values may + appear (e.g., when the resampling frequency is higher than the original + frequency). + + Missing values that existed in the original data will + not be modified. + + Parameters + ---------- + method : {'pad', 'backfill', 'ffill', 'bfill', 'nearest'} + Method to use for filling holes in resampled data + + * 'pad' or 'ffill': use previous valid observation to fill gap + (forward fill). + * 'backfill' or 'bfill': use next valid observation to fill gap. + * 'nearest': use nearest valid observation to fill gap. + + limit : int, optional + Limit of how many consecutive missing values to fill. + + Returns + ------- + Series or DataFrame + An upsampled Series or DataFrame with missing values filled. + + See Also + -------- + bfill : Backward fill NaN values in the resampled data. + ffill : Forward fill NaN values in the resampled data. + nearest : Fill NaN values in the resampled data + with nearest neighbor starting from center. + interpolate : Fill NaN values using interpolation. + Series.fillna : Fill NaN values in the Series using the + specified method, which can be 'bfill' and 'ffill'. + DataFrame.fillna : Fill NaN values in the DataFrame using the + specified method, which can be 'bfill' and 'ffill'. + + References + ---------- + .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics) + + Examples + -------- + Resampling a Series: + + >>> s = pd.Series([1, 2, 3], + ... index=pd.date_range('20180101', periods=3, freq='h')) + >>> s + 2018-01-01 00:00:00 1 + 2018-01-01 01:00:00 2 + 2018-01-01 02:00:00 3 + Freq: H, dtype: int64 + + Without filling the missing values you get: + + >>> s.resample("30min").asfreq() + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:30:00 NaN + 2018-01-01 01:00:00 2.0 + 2018-01-01 01:30:00 NaN + 2018-01-01 02:00:00 3.0 + Freq: 30T, dtype: float64 + + >>> s.resample('30min').fillna("backfill") + 2018-01-01 00:00:00 1 + 2018-01-01 00:30:00 2 + 2018-01-01 01:00:00 2 + 2018-01-01 01:30:00 3 + 2018-01-01 02:00:00 3 + Freq: 30T, dtype: int64 + + >>> s.resample('15min').fillna("backfill", limit=2) + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:15:00 NaN + 2018-01-01 00:30:00 2.0 + 2018-01-01 00:45:00 2.0 + 2018-01-01 01:00:00 2.0 + 2018-01-01 01:15:00 NaN + 2018-01-01 01:30:00 3.0 + 2018-01-01 01:45:00 3.0 + 2018-01-01 02:00:00 3.0 + Freq: 15T, dtype: float64 + + >>> s.resample('30min').fillna("pad") + 2018-01-01 00:00:00 1 + 2018-01-01 00:30:00 1 + 2018-01-01 01:00:00 2 + 2018-01-01 01:30:00 2 + 2018-01-01 02:00:00 3 + Freq: 30T, dtype: int64 + + >>> s.resample('30min').fillna("nearest") + 2018-01-01 00:00:00 1 + 2018-01-01 00:30:00 2 + 2018-01-01 01:00:00 2 + 2018-01-01 01:30:00 3 + 2018-01-01 02:00:00 3 + Freq: 30T, dtype: int64 + + Missing values present before the upsampling are not affected. + + >>> sm = pd.Series([1, None, 3], + ... index=pd.date_range('20180101', periods=3, freq='h')) + >>> sm + 2018-01-01 00:00:00 1.0 + 2018-01-01 01:00:00 NaN + 2018-01-01 02:00:00 3.0 + Freq: H, dtype: float64 + + >>> sm.resample('30min').fillna('backfill') + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:30:00 NaN + 2018-01-01 01:00:00 NaN + 2018-01-01 01:30:00 3.0 + 2018-01-01 02:00:00 3.0 + Freq: 30T, dtype: float64 + + >>> sm.resample('30min').fillna('pad') + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:30:00 1.0 + 2018-01-01 01:00:00 NaN + 2018-01-01 01:30:00 NaN + 2018-01-01 02:00:00 3.0 + Freq: 30T, dtype: float64 + + >>> sm.resample('30min').fillna('nearest') + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:30:00 NaN + 2018-01-01 01:00:00 NaN + 2018-01-01 01:30:00 3.0 + 2018-01-01 02:00:00 3.0 + Freq: 30T, dtype: float64 + + DataFrame resampling is done column-wise. All the same options are + available. + + >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]}, + ... index=pd.date_range('20180101', periods=3, + ... freq='h')) + >>> df + a b + 2018-01-01 00:00:00 2.0 1 + 2018-01-01 01:00:00 NaN 3 + 2018-01-01 02:00:00 6.0 5 + + >>> df.resample('30min').fillna("bfill") + a b + 2018-01-01 00:00:00 2.0 1 + 2018-01-01 00:30:00 NaN 3 + 2018-01-01 01:00:00 NaN 3 + 2018-01-01 01:30:00 6.0 5 + 2018-01-01 02:00:00 6.0 5 + """ + return self._upsample(method, limit=limit) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"]) + @doc(NDFrame.interpolate, **_shared_docs_kwargs) + def interpolate( + self, + method="linear", + axis=0, + limit=None, + inplace=False, + limit_direction="forward", + limit_area=None, + downcast=None, + **kwargs, + ): + """ + Interpolate values according to different methods. + """ + result = self._upsample("asfreq") + return result.interpolate( + method=method, + axis=axis, + limit=limit, + inplace=inplace, + limit_direction=limit_direction, + limit_area=limit_area, + downcast=downcast, + **kwargs, + ) + + def asfreq(self, fill_value=None): + """ + Return the values at the new freq, essentially a reindex. + + Parameters + ---------- + fill_value : scalar, optional + Value to use for missing values, applied during upsampling (note + this does not fill NaNs that already were present). + + Returns + ------- + DataFrame or Series + Values at the specified freq. + + See Also + -------- + Series.asfreq: Convert TimeSeries to specified frequency. + DataFrame.asfreq: Convert TimeSeries to specified frequency. + """ + return self._upsample("asfreq", fill_value=fill_value) + + def std(self, ddof=1, *args, **kwargs): + """ + Compute standard deviation of groups, excluding missing values. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + Returns + ------- + DataFrame or Series + Standard deviation of values within each group. + """ + nv.validate_resampler_func("std", args, kwargs) + # error: Unexpected keyword argument "ddof" for "_downsample" + return self._downsample("std", ddof=ddof) # type: ignore[call-arg] + + def var(self, ddof=1, *args, **kwargs): + """ + Compute variance of groups, excluding missing values. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + Returns + ------- + DataFrame or Series + Variance of values within each group. + """ + nv.validate_resampler_func("var", args, kwargs) + # error: Unexpected keyword argument "ddof" for "_downsample" + return self._downsample("var", ddof=ddof) # type: ignore[call-arg] + + @doc(GroupBy.size) + def size(self): + result = self._downsample("size") + if not len(self.ax): + from pandas import Series + + if self._selected_obj.ndim == 1: + name = self._selected_obj.name + else: + name = None + result = Series([], index=result.index, dtype="int64", name=name) + return result + + @doc(GroupBy.count) + def count(self): + result = self._downsample("count") + if not len(self.ax): + if self._selected_obj.ndim == 1: + result = type(self._selected_obj)( + [], index=result.index, dtype="int64", name=self._selected_obj.name + ) + else: + from pandas import DataFrame + + result = DataFrame( + [], index=result.index, columns=result.columns, dtype="int64" + ) + + return result + + def quantile(self, q=0.5, **kwargs): + """ + Return value at the given quantile. + + Parameters + ---------- + q : float or array-like, default 0.5 (50% quantile) + + Returns + ------- + DataFrame or Series + Quantile of values within each group. + + See Also + -------- + Series.quantile + Return a series, where the index is q and the values are the quantiles. + DataFrame.quantile + Return a DataFrame, where the columns are the columns of self, + and the values are the quantiles. + DataFrameGroupBy.quantile + Return a DataFrame, where the coulmns are groupby columns, + and the values are its quantiles. + """ + # error: Unexpected keyword argument "q" for "_downsample" + # error: Too many arguments for "_downsample" + return self._downsample("quantile", q=q, **kwargs) # type: ignore[call-arg] + + +# downsample methods +for method in ["sum", "prod", "min", "max", "first", "last"]: + + def f(self, _method=method, min_count=0, *args, **kwargs): + nv.validate_resampler_func(_method, args, kwargs) + return self._downsample(_method, min_count=min_count) + + f.__doc__ = getattr(GroupBy, method).__doc__ + setattr(Resampler, method, f) + + +# downsample methods +for method in ["mean", "sem", "median", "ohlc"]: + + def g(self, _method=method, *args, **kwargs): + nv.validate_resampler_func(_method, args, kwargs) + return self._downsample(_method) + + g.__doc__ = getattr(GroupBy, method).__doc__ + setattr(Resampler, method, g) + + +# series only methods +for method in ["nunique"]: + + def h(self, _method=method): + return self._downsample(_method) + + h.__doc__ = getattr(SeriesGroupBy, method).__doc__ + setattr(Resampler, method, h) + + +class _GroupByMixin(PandasObject): + """ + Provide the groupby facilities. + """ + + _attributes: list[str] # in practice the same as Resampler._attributes + _selection: IndexLabel | None = None + + def __init__(self, obj, parent=None, groupby=None, **kwargs): + # reached via ._gotitem and _get_resampler_for_grouping + + if parent is None: + parent = obj + + # initialize our GroupByMixin object with + # the resampler attributes + for attr in self._attributes: + setattr(self, attr, kwargs.get(attr, getattr(parent, attr))) + self._selection = kwargs.get("selection") + + self.binner = parent.binner + + self._groupby = groupby + self._groupby.mutated = True + self._groupby.grouper.mutated = True + self.groupby = copy.copy(parent.groupby) + + @no_type_check + def _apply(self, f, *args, **kwargs): + """ + Dispatch to _upsample; we are stripping all of the _upsample kwargs and + performing the original function call on the grouped object. + """ + + def func(x): + x = self._shallow_copy(x, groupby=self.groupby) + + if isinstance(f, str): + return getattr(x, f)(**kwargs) + + return x.apply(f, *args, **kwargs) + + result = self._groupby.apply(func) + return self._wrap_result(result) + + _upsample = _apply + _downsample = _apply + _groupby_and_aggregate = _apply + + @final + def _gotitem(self, key, ndim, subset=None): + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : string / list of selections + ndim : {1, 2} + requested ndim of result + subset : object, default None + subset to act on + """ + # create a new object to prevent aliasing + if subset is None: + # error: "GotItemMixin" has no attribute "obj" + subset = self.obj # type: ignore[attr-defined] + + # we need to make a shallow copy of ourselves + # with the same groupby + kwargs = {attr: getattr(self, attr) for attr in self._attributes} + + # Try to select from a DataFrame, falling back to a Series + try: + groupby = self._groupby[key] + except IndexError: + groupby = self._groupby + + selection = None + if subset.ndim == 2 and ( + (lib.is_scalar(key) and key in subset) or lib.is_list_like(key) + ): + selection = key + + new_rs = type(self)( + subset, groupby=groupby, parent=self, selection=selection, **kwargs + ) + return new_rs + + +class DatetimeIndexResampler(Resampler): + @property + def _resampler_for_grouping(self): + return DatetimeIndexResamplerGroupby + + def _get_binner_for_time(self): + + # this is how we are actually creating the bins + if self.kind == "period": + return self.groupby._get_time_period_bins(self.ax) + return self.groupby._get_time_bins(self.ax) + + def _downsample(self, how, **kwargs): + """ + Downsample the cython defined function. + + Parameters + ---------- + how : string / cython mapped function + **kwargs : kw args passed to how function + """ + how = com.get_cython_func(how) or how + ax = self.ax + obj = self._selected_obj + + if not len(ax): + # reset to the new freq + obj = obj.copy() + obj.index = obj.index._with_freq(self.freq) + assert obj.index.freq == self.freq, (obj.index.freq, self.freq) + return obj + + # do we have a regular frequency + + # error: Item "None" of "Optional[Any]" has no attribute "binlabels" + if ( + (ax.freq is not None or ax.inferred_freq is not None) + and len(self.grouper.binlabels) > len(ax) + and how is None + ): + + # let's do an asfreq + return self.asfreq() + + # we are downsampling + # we want to call the actual grouper method here + result = obj.groupby(self.grouper, axis=self.axis).aggregate(how, **kwargs) + + result = self._apply_loffset(result) + return self._wrap_result(result) + + def _adjust_binner_for_upsample(self, binner): + """ + Adjust our binner when upsampling. + + The range of a new index should not be outside specified range + """ + if self.closed == "right": + binner = binner[1:] + else: + binner = binner[:-1] + return binner + + def _upsample(self, method, limit=None, fill_value=None): + """ + Parameters + ---------- + method : string {'backfill', 'bfill', 'pad', + 'ffill', 'asfreq'} method for upsampling + limit : int, default None + Maximum size gap to fill when reindexing + fill_value : scalar, default None + Value to use for missing values + + See Also + -------- + .fillna: Fill NA/NaN values using the specified method. + + """ + if self.axis: + raise AssertionError("axis must be 0") + if self._from_selection: + raise ValueError( + "Upsampling from level= or on= selection " + "is not supported, use .set_index(...) " + "to explicitly set index to datetime-like" + ) + + ax = self.ax + obj = self._selected_obj + binner = self.binner + res_index = self._adjust_binner_for_upsample(binner) + + # if we have the same frequency as our axis, then we are equal sampling + if ( + limit is None + and to_offset(ax.inferred_freq) == self.freq + and len(obj) == len(res_index) + ): + result = obj.copy() + result.index = res_index + else: + result = obj.reindex( + res_index, method=method, limit=limit, fill_value=fill_value + ) + + result = self._apply_loffset(result) + return self._wrap_result(result) + + def _wrap_result(self, result): + result = super()._wrap_result(result) + + # we may have a different kind that we were asked originally + # convert if needed + if self.kind == "period" and not isinstance(result.index, PeriodIndex): + result.index = result.index.to_period(self.freq) + return result + + +class DatetimeIndexResamplerGroupby(_GroupByMixin, DatetimeIndexResampler): + """ + Provides a resample of a groupby implementation + """ + + @property + def _constructor(self): + return DatetimeIndexResampler + + +class PeriodIndexResampler(DatetimeIndexResampler): + @property + def _resampler_for_grouping(self): + return PeriodIndexResamplerGroupby + + def _get_binner_for_time(self): + if self.kind == "timestamp": + return super()._get_binner_for_time() + return self.groupby._get_period_bins(self.ax) + + def _convert_obj(self, obj: NDFrameT) -> NDFrameT: + obj = super()._convert_obj(obj) + + if self._from_selection: + # see GH 14008, GH 12871 + msg = ( + "Resampling from level= or on= selection " + "with a PeriodIndex is not currently supported, " + "use .set_index(...) to explicitly set index" + ) + raise NotImplementedError(msg) + + if self.loffset is not None: + # Cannot apply loffset/timedelta to PeriodIndex -> convert to + # timestamps + self.kind = "timestamp" + + # convert to timestamp + if self.kind == "timestamp": + obj = obj.to_timestamp(how=self.convention) + + return obj + + def _downsample(self, how, **kwargs): + """ + Downsample the cython defined function. + + Parameters + ---------- + how : string / cython mapped function + **kwargs : kw args passed to how function + """ + # we may need to actually resample as if we are timestamps + if self.kind == "timestamp": + return super()._downsample(how, **kwargs) + + how = com.get_cython_func(how) or how + ax = self.ax + + if is_subperiod(ax.freq, self.freq): + # Downsampling + return self._groupby_and_aggregate(how, **kwargs) + elif is_superperiod(ax.freq, self.freq): + if how == "ohlc": + # GH #13083 + # upsampling to subperiods is handled as an asfreq, which works + # for pure aggregating/reducing methods + # OHLC reduces along the time dimension, but creates multiple + # values for each period -> handle by _groupby_and_aggregate() + return self._groupby_and_aggregate(how) + return self.asfreq() + elif ax.freq == self.freq: + return self.asfreq() + + raise IncompatibleFrequency( + f"Frequency {ax.freq} cannot be resampled to {self.freq}, " + "as they are not sub or super periods" + ) + + def _upsample(self, method, limit=None, fill_value=None): + """ + Parameters + ---------- + method : {'backfill', 'bfill', 'pad', 'ffill'} + Method for upsampling. + limit : int, default None + Maximum size gap to fill when reindexing. + fill_value : scalar, default None + Value to use for missing values. + + See Also + -------- + .fillna: Fill NA/NaN values using the specified method. + + """ + # we may need to actually resample as if we are timestamps + if self.kind == "timestamp": + return super()._upsample(method, limit=limit, fill_value=fill_value) + + ax = self.ax + obj = self.obj + new_index = self.binner + + # Start vs. end of period + memb = ax.asfreq(self.freq, how=self.convention) + + # Get the fill indexer + indexer = memb.get_indexer(new_index, method=method, limit=limit) + new_obj = _take_new_index( + obj, + indexer, + new_index, + axis=self.axis, + ) + return self._wrap_result(new_obj) + + +class PeriodIndexResamplerGroupby(_GroupByMixin, PeriodIndexResampler): + """ + Provides a resample of a groupby implementation. + """ + + @property + def _constructor(self): + return PeriodIndexResampler + + +class TimedeltaIndexResampler(DatetimeIndexResampler): + @property + def _resampler_for_grouping(self): + return TimedeltaIndexResamplerGroupby + + def _get_binner_for_time(self): + return self.groupby._get_time_delta_bins(self.ax) + + def _adjust_binner_for_upsample(self, binner): + """ + Adjust our binner when upsampling. + + The range of a new index is allowed to be greater than original range + so we don't need to change the length of a binner, GH 13022 + """ + return binner + + +class TimedeltaIndexResamplerGroupby(_GroupByMixin, TimedeltaIndexResampler): + """ + Provides a resample of a groupby implementation. + """ + + @property + def _constructor(self): + return TimedeltaIndexResampler + + +def get_resampler(obj, kind=None, **kwds): + """ + Create a TimeGrouper and return our resampler. + """ + tg = TimeGrouper(**kwds) + return tg._get_resampler(obj, kind=kind) + + +get_resampler.__doc__ = Resampler.__doc__ + + +def get_resampler_for_grouping( + groupby, rule, how=None, fill_method=None, limit=None, kind=None, on=None, **kwargs +): + """ + Return our appropriate resampler when grouping as well. + """ + # .resample uses 'on' similar to how .groupby uses 'key' + tg = TimeGrouper(freq=rule, key=on, **kwargs) + resampler = tg._get_resampler(groupby.obj, kind=kind) + return resampler._get_resampler_for_grouping(groupby=groupby) + + +class TimeGrouper(Grouper): + """ + Custom groupby class for time-interval grouping. + + Parameters + ---------- + freq : pandas date offset or offset alias for identifying bin edges + closed : closed end of interval; 'left' or 'right' + label : interval boundary to use for labeling; 'left' or 'right' + convention : {'start', 'end', 'e', 's'} + If axis is PeriodIndex + """ + + _attributes = Grouper._attributes + ( + "closed", + "label", + "how", + "loffset", + "kind", + "convention", + "origin", + "offset", + ) + + def __init__( + self, + freq="Min", + closed: Literal["left", "right"] | None = None, + label: str | None = None, + how="mean", + axis=0, + fill_method=None, + limit=None, + loffset=None, + kind: str | None = None, + convention: str | None = None, + base: int | None = None, + origin: str | TimestampConvertibleTypes = "start_day", + offset: TimedeltaConvertibleTypes | None = None, + **kwargs, + ): + # Check for correctness of the keyword arguments which would + # otherwise silently use the default if misspelled + if label not in {None, "left", "right"}: + raise ValueError(f"Unsupported value {label} for `label`") + if closed not in {None, "left", "right"}: + raise ValueError(f"Unsupported value {closed} for `closed`") + if convention not in {None, "start", "end", "e", "s"}: + raise ValueError(f"Unsupported value {convention} for `convention`") + + freq = to_offset(freq) + + end_types = {"M", "A", "Q", "BM", "BA", "BQ", "W"} + rule = freq.rule_code + if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types): + if closed is None: + closed = "right" + if label is None: + label = "right" + else: + # The backward resample sets ``closed`` to ``'right'`` by default + # since the last value should be considered as the edge point for + # the last bin. When origin in "end" or "end_day", the value for a + # specific ``Timestamp`` index stands for the resample result from + # the current ``Timestamp`` minus ``freq`` to the current + # ``Timestamp`` with a right close. + if origin in ["end", "end_day"]: + if closed is None: + closed = "right" + if label is None: + label = "right" + else: + if closed is None: + closed = "left" + if label is None: + label = "left" + + self.closed = closed + self.label = label + self.kind = kind + + self.convention = convention or "E" + self.convention = self.convention.lower() + + self.how = how + self.fill_method = fill_method + self.limit = limit + + if origin in ("epoch", "start", "start_day", "end", "end_day"): + self.origin = origin + else: + try: + self.origin = Timestamp(origin) + except (ValueError, TypeError) as err: + raise ValueError( + "'origin' should be equal to 'epoch', 'start', 'start_day', " + "'end', 'end_day' or " + f"should be a Timestamp convertible type. Got '{origin}' instead." + ) from err + + try: + self.offset = Timedelta(offset) if offset is not None else None + except (ValueError, TypeError) as err: + raise ValueError( + "'offset' should be a Timedelta convertible type. " + f"Got '{offset}' instead." + ) from err + + # always sort time groupers + kwargs["sort"] = True + + # Handle deprecated arguments since v1.1.0 of `base` and `loffset` (GH #31809) + if base is not None and offset is not None: + raise ValueError("'offset' and 'base' cannot be present at the same time") + + if base and isinstance(freq, Tick): + # this conversion handle the default behavior of base and the + # special case of GH #10530. Indeed in case when dealing with + # a TimedeltaIndex base was treated as a 'pure' offset even though + # the default behavior of base was equivalent of a modulo on + # freq_nanos. + self.offset = Timedelta(base * freq.nanos // freq.n) + + if isinstance(loffset, str): + loffset = to_offset(loffset) + self.loffset = loffset + + super().__init__(freq=freq, axis=axis, **kwargs) + + def _get_resampler(self, obj, kind=None): + """ + Return my resampler or raise if we have an invalid axis. + + Parameters + ---------- + obj : input object + kind : string, optional + 'period','timestamp','timedelta' are valid + + Returns + ------- + a Resampler + + Raises + ------ + TypeError if incompatible axis + + """ + self._set_grouper(obj) + + ax = self.ax + if isinstance(ax, DatetimeIndex): + return DatetimeIndexResampler(obj, groupby=self, kind=kind, axis=self.axis) + elif isinstance(ax, PeriodIndex) or kind == "period": + return PeriodIndexResampler(obj, groupby=self, kind=kind, axis=self.axis) + elif isinstance(ax, TimedeltaIndex): + return TimedeltaIndexResampler(obj, groupby=self, axis=self.axis) + + raise TypeError( + "Only valid with DatetimeIndex, " + "TimedeltaIndex or PeriodIndex, " + f"but got an instance of '{type(ax).__name__}'" + ) + + def _get_grouper(self, obj, validate: bool = True): + # create the resampler and return our binner + r = self._get_resampler(obj) + return r.binner, r.grouper, r.obj + + def _get_time_bins(self, ax: DatetimeIndex): + if not isinstance(ax, DatetimeIndex): + raise TypeError( + "axis must be a DatetimeIndex, but got " + f"an instance of {type(ax).__name__}" + ) + + if len(ax) == 0: + binner = labels = DatetimeIndex(data=[], freq=self.freq, name=ax.name) + return binner, [], labels + + first, last = _get_timestamp_range_edges( + ax.min(), + ax.max(), + self.freq, + closed=self.closed, + origin=self.origin, + offset=self.offset, + ) + # GH #12037 + # use first/last directly instead of call replace() on them + # because replace() will swallow the nanosecond part + # thus last bin maybe slightly before the end if the end contains + # nanosecond part and lead to `Values falls after last bin` error + # GH 25758: If DST lands at midnight (e.g. 'America/Havana'), user feedback + # has noted that ambiguous=True provides the most sensible result + binner = labels = date_range( + freq=self.freq, + start=first, + end=last, + tz=ax.tz, + name=ax.name, + ambiguous=True, + nonexistent="shift_forward", + ) + + ax_values = ax.asi8 + binner, bin_edges = self._adjust_bin_edges(binner, ax_values) + + # general version, knowing nothing about relative frequencies + bins = lib.generate_bins_dt64( + ax_values, bin_edges, self.closed, hasnans=ax.hasnans + ) + + if self.closed == "right": + labels = binner + if self.label == "right": + labels = labels[1:] + elif self.label == "right": + labels = labels[1:] + + if ax.hasnans: + binner = binner.insert(0, NaT) + labels = labels.insert(0, NaT) + + # if we end up with more labels than bins + # adjust the labels + # GH4076 + if len(bins) < len(labels): + labels = labels[: len(bins)] + + return binner, bins, labels + + def _adjust_bin_edges(self, binner, ax_values): + # Some hacks for > daily data, see #1471, #1458, #1483 + + if self.freq != "D" and is_superperiod(self.freq, "D"): + if self.closed == "right": + # GH 21459, GH 9119: Adjust the bins relative to the wall time + bin_edges = binner.tz_localize(None) + bin_edges = bin_edges + timedelta(1) - Nano(1) + bin_edges = bin_edges.tz_localize(binner.tz).asi8 + else: + bin_edges = binner.asi8 + + # intraday values on last day + if bin_edges[-2] > ax_values.max(): + bin_edges = bin_edges[:-1] + binner = binner[:-1] + else: + bin_edges = binner.asi8 + return binner, bin_edges + + def _get_time_delta_bins(self, ax: TimedeltaIndex): + if not isinstance(ax, TimedeltaIndex): + raise TypeError( + "axis must be a TimedeltaIndex, but got " + f"an instance of {type(ax).__name__}" + ) + + if not len(ax): + binner = labels = TimedeltaIndex(data=[], freq=self.freq, name=ax.name) + return binner, [], labels + + start, end = ax.min(), ax.max() + labels = binner = timedelta_range( + start=start, end=end, freq=self.freq, name=ax.name + ) + + end_stamps = labels + self.freq + bins = ax.searchsorted(end_stamps, side="left") + + if self.offset: + # GH 10530 & 31809 + labels += self.offset + if self.loffset: + # GH 33498 + labels += self.loffset + + return binner, bins, labels + + def _get_time_period_bins(self, ax: DatetimeIndex): + if not isinstance(ax, DatetimeIndex): + raise TypeError( + "axis must be a DatetimeIndex, but got " + f"an instance of {type(ax).__name__}" + ) + + freq = self.freq + + if not len(ax): + binner = labels = PeriodIndex(data=[], freq=freq, name=ax.name) + return binner, [], labels + + labels = binner = period_range(start=ax[0], end=ax[-1], freq=freq, name=ax.name) + + end_stamps = (labels + freq).asfreq(freq, "s").to_timestamp() + if ax.tz: + end_stamps = end_stamps.tz_localize(ax.tz) + bins = ax.searchsorted(end_stamps, side="left") + + return binner, bins, labels + + def _get_period_bins(self, ax: PeriodIndex): + if not isinstance(ax, PeriodIndex): + raise TypeError( + "axis must be a PeriodIndex, but got " + f"an instance of {type(ax).__name__}" + ) + + memb = ax.asfreq(self.freq, how=self.convention) + + # NaT handling as in pandas._lib.lib.generate_bins_dt64() + nat_count = 0 + if memb.hasnans: + nat_count = np.sum(memb._isnan) + memb = memb[~memb._isnan] + + if not len(memb): + # index contains no valid (non-NaT) values + bins = np.array([], dtype=np.int64) + binner = labels = PeriodIndex(data=[], freq=self.freq, name=ax.name) + if len(ax) > 0: + # index is all NaT + binner, bins, labels = _insert_nat_bin(binner, bins, labels, len(ax)) + return binner, bins, labels + + freq_mult = self.freq.n + + start = ax.min().asfreq(self.freq, how=self.convention) + end = ax.max().asfreq(self.freq, how="end") + bin_shift = 0 + + if isinstance(self.freq, Tick): + # GH 23882 & 31809: get adjusted bin edge labels with 'origin' + # and 'origin' support. This call only makes sense if the freq is a + # Tick since offset and origin are only used in those cases. + # Not doing this check could create an extra empty bin. + p_start, end = _get_period_range_edges( + start, + end, + self.freq, + closed=self.closed, + origin=self.origin, + offset=self.offset, + ) + + # Get offset for bin edge (not label edge) adjustment + start_offset = Period(start, self.freq) - Period(p_start, self.freq) + # error: Item "Period" of "Union[Period, Any]" has no attribute "n" + bin_shift = start_offset.n % freq_mult # type: ignore[union-attr] + start = p_start + + labels = binner = period_range( + start=start, end=end, freq=self.freq, name=ax.name + ) + + i8 = memb.asi8 + + # when upsampling to subperiods, we need to generate enough bins + expected_bins_count = len(binner) * freq_mult + i8_extend = expected_bins_count - (i8[-1] - i8[0]) + rng = np.arange(i8[0], i8[-1] + i8_extend, freq_mult) + rng += freq_mult + # adjust bin edge indexes to account for base + rng -= bin_shift + + # Wrap in PeriodArray for PeriodArray.searchsorted + prng = type(memb._data)(rng, dtype=memb.dtype) + bins = memb.searchsorted(prng, side="left") + + if nat_count > 0: + binner, bins, labels = _insert_nat_bin(binner, bins, labels, nat_count) + + return binner, bins, labels + + +def _take_new_index( + obj: NDFrameT, indexer: npt.NDArray[np.intp], new_index: Index, axis: int = 0 +) -> NDFrameT: + + if isinstance(obj, ABCSeries): + new_values = algos.take_nd(obj._values, indexer) + # error: Incompatible return value type (got "Series", expected "NDFrameT") + return obj._constructor( # type: ignore[return-value] + new_values, index=new_index, name=obj.name + ) + elif isinstance(obj, ABCDataFrame): + if axis == 1: + raise NotImplementedError("axis 1 is not supported") + new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1) + # error: Incompatible return value type + # (got "DataFrame", expected "NDFrameT") + return obj._constructor(new_mgr) # type: ignore[return-value] + else: + raise ValueError("'obj' should be either a Series or a DataFrame") + + +def _get_timestamp_range_edges( + first: Timestamp, + last: Timestamp, + freq: BaseOffset, + closed: Literal["right", "left"] = "left", + origin="start_day", + offset: Timedelta | None = None, +) -> tuple[Timestamp, Timestamp]: + """ + Adjust the `first` Timestamp to the preceding Timestamp that resides on + the provided offset. Adjust the `last` Timestamp to the following + Timestamp that resides on the provided offset. Input Timestamps that + already reside on the offset will be adjusted depending on the type of + offset and the `closed` parameter. + + Parameters + ---------- + first : pd.Timestamp + The beginning Timestamp of the range to be adjusted. + last : pd.Timestamp + The ending Timestamp of the range to be adjusted. + freq : pd.DateOffset + The dateoffset to which the Timestamps will be adjusted. + closed : {'right', 'left'}, default "left" + Which side of bin interval is closed. + origin : {'epoch', 'start', 'start_day'} or Timestamp, default 'start_day' + The timestamp on which to adjust the grouping. The timezone of origin must + match the timezone of the index. + If a timestamp is not used, these values are also supported: + + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + offset : pd.Timedelta, default is None + An offset timedelta added to the origin. + + Returns + ------- + A tuple of length 2, containing the adjusted pd.Timestamp objects. + """ + if isinstance(freq, Tick): + index_tz = first.tz + if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None): + raise ValueError("The origin must have the same timezone as the index.") + elif origin == "epoch": + # set the epoch based on the timezone to have similar bins results when + # resampling on the same kind of indexes on different timezones + origin = Timestamp("1970-01-01", tz=index_tz) + + if isinstance(freq, Day): + # _adjust_dates_anchored assumes 'D' means 24H, but first/last + # might contain a DST transition (23H, 24H, or 25H). + # So "pretend" the dates are naive when adjusting the endpoints + first = first.tz_localize(None) + last = last.tz_localize(None) + if isinstance(origin, Timestamp): + origin = origin.tz_localize(None) + + first, last = _adjust_dates_anchored( + first, last, freq, closed=closed, origin=origin, offset=offset + ) + if isinstance(freq, Day): + first = first.tz_localize(index_tz) + last = last.tz_localize(index_tz) + else: + first = first.normalize() + last = last.normalize() + + if closed == "left": + first = Timestamp(freq.rollback(first)) + else: + first = Timestamp(first - freq) + + last = Timestamp(last + freq) + + return first, last + + +def _get_period_range_edges( + first: Period, + last: Period, + freq: BaseOffset, + closed: Literal["right", "left"] = "left", + origin="start_day", + offset: Timedelta | None = None, +) -> tuple[Period, Period]: + """ + Adjust the provided `first` and `last` Periods to the respective Period of + the given offset that encompasses them. + + Parameters + ---------- + first : pd.Period + The beginning Period of the range to be adjusted. + last : pd.Period + The ending Period of the range to be adjusted. + freq : pd.DateOffset + The freq to which the Periods will be adjusted. + closed : {'right', 'left'}, default "left" + Which side of bin interval is closed. + origin : {'epoch', 'start', 'start_day'}, Timestamp, default 'start_day' + The timestamp on which to adjust the grouping. The timezone of origin must + match the timezone of the index. + + If a timestamp is not used, these values are also supported: + + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + offset : pd.Timedelta, default is None + An offset timedelta added to the origin. + + Returns + ------- + A tuple of length 2, containing the adjusted pd.Period objects. + """ + if not all(isinstance(obj, Period) for obj in [first, last]): + raise TypeError("'first' and 'last' must be instances of type Period") + + # GH 23882 + first_ts = first.to_timestamp() + last_ts = last.to_timestamp() + adjust_first = not freq.is_on_offset(first_ts) + adjust_last = freq.is_on_offset(last_ts) + + first_ts, last_ts = _get_timestamp_range_edges( + first_ts, last_ts, freq, closed=closed, origin=origin, offset=offset + ) + + first = (first_ts + int(adjust_first) * freq).to_period(freq) + last = (last_ts - int(adjust_last) * freq).to_period(freq) + return first, last + + +def _insert_nat_bin( + binner: PeriodIndex, bins: np.ndarray, labels: PeriodIndex, nat_count: int +) -> tuple[PeriodIndex, np.ndarray, PeriodIndex]: + # NaT handling as in pandas._lib.lib.generate_bins_dt64() + # shift bins by the number of NaT + assert nat_count > 0 + bins += nat_count + bins = np.insert(bins, 0, nat_count) + + # Incompatible types in assignment (expression has type "Index", variable + # has type "PeriodIndex") + binner = binner.insert(0, NaT) # type: ignore[assignment] + # Incompatible types in assignment (expression has type "Index", variable + # has type "PeriodIndex") + labels = labels.insert(0, NaT) # type: ignore[assignment] + return binner, bins, labels + + +def _adjust_dates_anchored( + first: Timestamp, + last: Timestamp, + freq: Tick, + closed: Literal["right", "left"] = "right", + origin="start_day", + offset: Timedelta | None = None, +) -> tuple[Timestamp, Timestamp]: + # First and last offsets should be calculated from the start day to fix an + # error cause by resampling across multiple days when a one day period is + # not a multiple of the frequency. See GH 8683 + # To handle frequencies that are not multiple or divisible by a day we let + # the possibility to define a fixed origin timestamp. See GH 31809 + origin_nanos = 0 # origin == "epoch" + if origin == "start_day": + origin_nanos = first.normalize().value + elif origin == "start": + origin_nanos = first.value + elif isinstance(origin, Timestamp): + origin_nanos = origin.value + elif origin in ["end", "end_day"]: + origin = last if origin == "end" else last.ceil("D") + sub_freq_times = (origin.value - first.value) // freq.nanos + if closed == "left": + sub_freq_times += 1 + first = origin - sub_freq_times * freq + origin_nanos = first.value + origin_nanos += offset.value if offset else 0 + + # GH 10117 & GH 19375. If first and last contain timezone information, + # Perform the calculation in UTC in order to avoid localizing on an + # Ambiguous or Nonexistent time. + first_tzinfo = first.tzinfo + last_tzinfo = last.tzinfo + if first_tzinfo is not None: + first = first.tz_convert("UTC") + if last_tzinfo is not None: + last = last.tz_convert("UTC") + + foffset = (first.value - origin_nanos) % freq.nanos + loffset = (last.value - origin_nanos) % freq.nanos + + if closed == "right": + if foffset > 0: + # roll back + fresult_int = first.value - foffset + else: + fresult_int = first.value - freq.nanos + + if loffset > 0: + # roll forward + lresult_int = last.value + (freq.nanos - loffset) + else: + # already the end of the road + lresult_int = last.value + else: # closed == 'left' + if foffset > 0: + fresult_int = first.value - foffset + else: + # start of the road + fresult_int = first.value + + if loffset > 0: + # roll forward + lresult_int = last.value + (freq.nanos - loffset) + else: + lresult_int = last.value + freq.nanos + fresult = Timestamp(fresult_int) + lresult = Timestamp(lresult_int) + if first_tzinfo is not None: + fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo) + if last_tzinfo is not None: + lresult = lresult.tz_localize("UTC").tz_convert(last_tzinfo) + return fresult, lresult + + +def asfreq( + obj: NDFrameT, + freq, + method=None, + how=None, + normalize: bool = False, + fill_value=None, +) -> NDFrameT: + """ + Utility frequency conversion method for Series/DataFrame. + + See :meth:`pandas.NDFrame.asfreq` for full documentation. + """ + if isinstance(obj.index, PeriodIndex): + if method is not None: + raise NotImplementedError("'method' argument is not supported") + + if how is None: + how = "E" + + new_obj = obj.copy() + new_obj.index = obj.index.asfreq(freq, how=how) + + elif len(obj.index) == 0: + new_obj = obj.copy() + + new_obj.index = _asfreq_compat(obj.index, freq) + else: + dti = date_range(obj.index.min(), obj.index.max(), freq=freq) + dti.name = obj.index.name + new_obj = obj.reindex(dti, method=method, fill_value=fill_value) + if normalize: + new_obj.index = new_obj.index.normalize() + + return new_obj + + +def _asfreq_compat(index: DatetimeIndex | PeriodIndex | TimedeltaIndex, freq): + """ + Helper to mimic asfreq on (empty) DatetimeIndex and TimedeltaIndex. + + Parameters + ---------- + index : PeriodIndex, DatetimeIndex, or TimedeltaIndex + freq : DateOffset + + Returns + ------- + same type as index + """ + if len(index) != 0: + # This should never be reached, always checked by the caller + raise ValueError( + "Can only set arbitrary freq for empty DatetimeIndex or TimedeltaIndex" + ) + new_index: Index + if isinstance(index, PeriodIndex): + new_index = index.asfreq(freq=freq) + elif isinstance(index, DatetimeIndex): + new_index = DatetimeIndex([], dtype=index.dtype, freq=freq, name=index.name) + elif isinstance(index, TimedeltaIndex): + new_index = TimedeltaIndex([], dtype=index.dtype, freq=freq, name=index.name) + else: # pragma: no cover + raise TypeError(type(index)) + return new_index diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/reshape/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..7b41641 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/api.cpython-38.pyc new file mode 100644 index 0000000..350a67e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/concat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/concat.cpython-38.pyc new file mode 100644 index 0000000..1fb9d61 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/concat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/melt.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/melt.cpython-38.pyc new file mode 100644 index 0000000..8bf25b1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/melt.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/merge.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/merge.cpython-38.pyc new file mode 100644 index 0000000..8f54534 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/merge.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/pivot.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/pivot.cpython-38.pyc new file mode 100644 index 0000000..5c6abe1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/pivot.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/reshape.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/reshape.cpython-38.pyc new file mode 100644 index 0000000..b054f23 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/reshape.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/tile.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/tile.cpython-38.pyc new file mode 100644 index 0000000..c6ea5cb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/tile.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/util.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/util.cpython-38.pyc new file mode 100644 index 0000000..943c3b6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/reshape/__pycache__/util.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/api.py b/.local/lib/python3.8/site-packages/pandas/core/reshape/api.py new file mode 100644 index 0000000..bffdadb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/reshape/api.py @@ -0,0 +1,23 @@ +# flake8: noqa:F401 + +from pandas.core.reshape.concat import concat +from pandas.core.reshape.melt import ( + lreshape, + melt, + wide_to_long, +) +from pandas.core.reshape.merge import ( + merge, + merge_asof, + merge_ordered, +) +from pandas.core.reshape.pivot import ( + crosstab, + pivot, + pivot_table, +) +from pandas.core.reshape.reshape import get_dummies +from pandas.core.reshape.tile import ( + cut, + qcut, +) diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/concat.py b/.local/lib/python3.8/site-packages/pandas/core/reshape/concat.py new file mode 100644 index 0000000..71b53d5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/reshape/concat.py @@ -0,0 +1,790 @@ +""" +Concat routines. +""" +from __future__ import annotations + +from collections import abc +from typing import ( + TYPE_CHECKING, + Hashable, + Iterable, + Literal, + Mapping, + cast, + overload, +) +import warnings + +import numpy as np + +from pandas._typing import Axis +from pandas.util._decorators import ( + cache_readonly, + deprecate_nonkeyword_arguments, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.inference import is_bool +from pandas.core.dtypes.missing import isna + +from pandas.core.arrays.categorical import ( + factorize_from_iterable, + factorize_from_iterables, +) +import pandas.core.common as com +from pandas.core.indexes.api import ( + Index, + MultiIndex, + all_indexes_same, + default_index, + ensure_index, + get_objs_combined_axis, + get_unanimous_names, +) +from pandas.core.internals import concatenate_managers + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + from pandas.core.generic import NDFrame + +# --------------------------------------------------------------------- +# Concatenate DataFrame objects + + +@overload +def concat( + objs: Iterable[DataFrame] | Mapping[Hashable, DataFrame], + axis: Literal[0, "index"] = ..., + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> DataFrame: + ... + + +@overload +def concat( + objs: Iterable[Series] | Mapping[Hashable, Series], + axis: Literal[0, "index"] = ..., + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> Series: + ... + + +@overload +def concat( + objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], + axis: Literal[0, "index"] = ..., + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> DataFrame | Series: + ... + + +@overload +def concat( + objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], + axis: Literal[1, "columns"], + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> DataFrame: + ... + + +@overload +def concat( + objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], + axis: Axis = ..., + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> DataFrame | Series: + ... + + +@deprecate_nonkeyword_arguments(version=None, allowed_args=["objs"]) +def concat( + objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], + axis: Axis = 0, + join: str = "outer", + ignore_index: bool = False, + keys=None, + levels=None, + names=None, + verify_integrity: bool = False, + sort: bool = False, + copy: bool = True, +) -> DataFrame | Series: + """ + Concatenate pandas objects along a particular axis with optional set logic + along the other axes. + + Can also add a layer of hierarchical indexing on the concatenation axis, + which may be useful if the labels are the same (or overlapping) on + the passed axis number. + + Parameters + ---------- + objs : a sequence or mapping of Series or DataFrame objects + If a mapping is passed, the sorted keys will be used as the `keys` + argument, unless it is passed, in which case the values will be + selected (see below). Any None objects will be dropped silently unless + they are all None in which case a ValueError will be raised. + axis : {0/'index', 1/'columns'}, default 0 + The axis to concatenate along. + join : {'inner', 'outer'}, default 'outer' + How to handle indexes on other axis (or axes). + ignore_index : bool, default False + If True, do not use the index values along the concatenation axis. The + resulting axis will be labeled 0, ..., n - 1. This is useful if you are + concatenating objects where the concatenation axis does not have + meaningful indexing information. Note the index values on the other + axes are still respected in the join. + keys : sequence, default None + If multiple levels passed, should contain tuples. Construct + hierarchical index using the passed keys as the outermost level. + levels : list of sequences, default None + Specific levels (unique values) to use for constructing a + MultiIndex. Otherwise they will be inferred from the keys. + names : list, default None + Names for the levels in the resulting hierarchical index. + verify_integrity : bool, default False + Check whether the new concatenated axis contains duplicates. This can + be very expensive relative to the actual data concatenation. + sort : bool, default False + Sort non-concatenation axis if it is not already aligned when `join` + is 'outer'. + This has no effect when ``join='inner'``, which already preserves + the order of the non-concatenation axis. + + .. versionchanged:: 1.0.0 + + Changed to not sort by default. + + copy : bool, default True + If False, do not copy data unnecessarily. + + Returns + ------- + object, type of objs + When concatenating all ``Series`` along the index (axis=0), a + ``Series`` is returned. When ``objs`` contains at least one + ``DataFrame``, a ``DataFrame`` is returned. When concatenating along + the columns (axis=1), a ``DataFrame`` is returned. + + See Also + -------- + Series.append : Concatenate Series. + DataFrame.append : Concatenate DataFrames. + DataFrame.join : Join DataFrames using indexes. + DataFrame.merge : Merge DataFrames by indexes or columns. + + Notes + ----- + The keys, levels, and names arguments are all optional. + + A walkthrough of how this method fits in with other tools for combining + pandas objects can be found `here + `__. + + Examples + -------- + Combine two ``Series``. + + >>> s1 = pd.Series(['a', 'b']) + >>> s2 = pd.Series(['c', 'd']) + >>> pd.concat([s1, s2]) + 0 a + 1 b + 0 c + 1 d + dtype: object + + Clear the existing index and reset it in the result + by setting the ``ignore_index`` option to ``True``. + + >>> pd.concat([s1, s2], ignore_index=True) + 0 a + 1 b + 2 c + 3 d + dtype: object + + Add a hierarchical index at the outermost level of + the data with the ``keys`` option. + + >>> pd.concat([s1, s2], keys=['s1', 's2']) + s1 0 a + 1 b + s2 0 c + 1 d + dtype: object + + Label the index keys you create with the ``names`` option. + + >>> pd.concat([s1, s2], keys=['s1', 's2'], + ... names=['Series name', 'Row ID']) + Series name Row ID + s1 0 a + 1 b + s2 0 c + 1 d + dtype: object + + Combine two ``DataFrame`` objects with identical columns. + + >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], + ... columns=['letter', 'number']) + >>> df1 + letter number + 0 a 1 + 1 b 2 + >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], + ... columns=['letter', 'number']) + >>> df2 + letter number + 0 c 3 + 1 d 4 + >>> pd.concat([df1, df2]) + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 + + Combine ``DataFrame`` objects with overlapping columns + and return everything. Columns outside the intersection will + be filled with ``NaN`` values. + + >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], + ... columns=['letter', 'number', 'animal']) + >>> df3 + letter number animal + 0 c 3 cat + 1 d 4 dog + >>> pd.concat([df1, df3], sort=False) + letter number animal + 0 a 1 NaN + 1 b 2 NaN + 0 c 3 cat + 1 d 4 dog + + Combine ``DataFrame`` objects with overlapping columns + and return only those that are shared by passing ``inner`` to + the ``join`` keyword argument. + + >>> pd.concat([df1, df3], join="inner") + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 + + Combine ``DataFrame`` objects horizontally along the x axis by + passing in ``axis=1``. + + >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], + ... columns=['animal', 'name']) + >>> pd.concat([df1, df4], axis=1) + letter number animal name + 0 a 1 bird polly + 1 b 2 monkey george + + Prevent the result from including duplicate index values with the + ``verify_integrity`` option. + + >>> df5 = pd.DataFrame([1], index=['a']) + >>> df5 + 0 + a 1 + >>> df6 = pd.DataFrame([2], index=['a']) + >>> df6 + 0 + a 2 + >>> pd.concat([df5, df6], verify_integrity=True) + Traceback (most recent call last): + ... + ValueError: Indexes have overlapping values: ['a'] + """ + op = _Concatenator( + objs, + axis=axis, + ignore_index=ignore_index, + join=join, + keys=keys, + levels=levels, + names=names, + verify_integrity=verify_integrity, + copy=copy, + sort=sort, + ) + + return op.get_result() + + +class _Concatenator: + """ + Orchestrates a concatenation operation for BlockManagers + """ + + def __init__( + self, + objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], + axis=0, + join: str = "outer", + keys=None, + levels=None, + names=None, + ignore_index: bool = False, + verify_integrity: bool = False, + copy: bool = True, + sort=False, + ): + if isinstance(objs, (ABCSeries, ABCDataFrame, str)): + raise TypeError( + "first argument must be an iterable of pandas " + f'objects, you passed an object of type "{type(objs).__name__}"' + ) + + if join == "outer": + self.intersect = False + elif join == "inner": + self.intersect = True + else: # pragma: no cover + raise ValueError( + "Only can inner (intersect) or outer (union) join the other axis" + ) + + if isinstance(objs, abc.Mapping): + if keys is None: + keys = list(objs.keys()) + objs = [objs[k] for k in keys] + else: + objs = list(objs) + + if len(objs) == 0: + raise ValueError("No objects to concatenate") + + if keys is None: + objs = list(com.not_none(*objs)) + else: + # #1649 + clean_keys = [] + clean_objs = [] + for k, v in zip(keys, objs): + if v is None: + continue + clean_keys.append(k) + clean_objs.append(v) + objs = clean_objs + + if isinstance(keys, MultiIndex): + # TODO: retain levels? + keys = type(keys).from_tuples(clean_keys, names=keys.names) + else: + name = getattr(keys, "name", None) + keys = Index(clean_keys, name=name) + + if len(objs) == 0: + raise ValueError("All objects passed were None") + + # figure out what our result ndim is going to be + ndims = set() + for obj in objs: + if not isinstance(obj, (ABCSeries, ABCDataFrame)): + msg = ( + f"cannot concatenate object of type '{type(obj)}'; " + "only Series and DataFrame objs are valid" + ) + raise TypeError(msg) + + ndims.add(obj.ndim) + + # get the sample + # want the highest ndim that we have, and must be non-empty + # unless all objs are empty + sample: NDFrame | None = None + if len(ndims) > 1: + max_ndim = max(ndims) + for obj in objs: + if obj.ndim == max_ndim and np.sum(obj.shape): + sample = obj + break + + else: + # filter out the empties if we have not multi-index possibilities + # note to keep empty Series as it affect to result columns / name + non_empties = [ + obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, ABCSeries) + ] + + if len(non_empties) and ( + keys is None and names is None and levels is None and not self.intersect + ): + objs = non_empties + sample = objs[0] + + if sample is None: + sample = objs[0] + self.objs = objs + + # Standardize axis parameter to int + if isinstance(sample, ABCSeries): + axis = sample._constructor_expanddim._get_axis_number(axis) + else: + axis = sample._get_axis_number(axis) + + # Need to flip BlockManager axis in the DataFrame special case + self._is_frame = isinstance(sample, ABCDataFrame) + if self._is_frame: + axis = sample._get_block_manager_axis(axis) + + self._is_series = isinstance(sample, ABCSeries) + if not 0 <= axis <= sample.ndim: + raise AssertionError( + f"axis must be between 0 and {sample.ndim}, input was {axis}" + ) + + # if we have mixed ndims, then convert to highest ndim + # creating column numbers as needed + if len(ndims) > 1: + current_column = 0 + max_ndim = sample.ndim + self.objs, objs = [], self.objs + for obj in objs: + + ndim = obj.ndim + if ndim == max_ndim: + pass + + elif ndim != max_ndim - 1: + raise ValueError( + "cannot concatenate unaligned mixed " + "dimensional NDFrame objects" + ) + + else: + name = getattr(obj, "name", None) + if ignore_index or name is None: + name = current_column + current_column += 1 + + # doing a row-wise concatenation so need everything + # to line up + if self._is_frame and axis == 1: + name = 0 + # mypy needs to know sample is not an NDFrame + sample = cast("DataFrame | Series", sample) + obj = sample._constructor({name: obj}) + + self.objs.append(obj) + + # note: this is the BlockManager axis (since DataFrame is transposed) + self.bm_axis = axis + self.axis = 1 - self.bm_axis if self._is_frame else 0 + self.keys = keys + self.names = names or getattr(keys, "names", None) + self.levels = levels + + if not is_bool(sort): + warnings.warn( + "Passing non boolean values for sort is deprecated and " + "will error in a future version!", + FutureWarning, + stacklevel=find_stack_level(), + ) + self.sort = sort + + self.ignore_index = ignore_index + self.verify_integrity = verify_integrity + self.copy = copy + + self.new_axes = self._get_new_axes() + + def get_result(self): + cons: type[DataFrame | Series] + sample: DataFrame | Series + + # series only + if self._is_series: + sample = cast("Series", self.objs[0]) + + # stack blocks + if self.bm_axis == 0: + name = com.consensus_name_attr(self.objs) + cons = sample._constructor + + arrs = [ser._values for ser in self.objs] + + res = concat_compat(arrs, axis=0) + result = cons(res, index=self.new_axes[0], name=name, dtype=res.dtype) + return result.__finalize__(self, method="concat") + + # combine as columns in a frame + else: + data = dict(zip(range(len(self.objs)), self.objs)) + + # GH28330 Preserves subclassed objects through concat + cons = sample._constructor_expanddim + + index, columns = self.new_axes + df = cons(data, index=index, copy=self.copy) + df.columns = columns + return df.__finalize__(self, method="concat") + + # combine block managers + else: + sample = cast("DataFrame", self.objs[0]) + + mgrs_indexers = [] + for obj in self.objs: + indexers = {} + for ax, new_labels in enumerate(self.new_axes): + # ::-1 to convert BlockManager ax to DataFrame ax + if ax == self.bm_axis: + # Suppress reindexing on concat axis + continue + + # 1-ax to convert BlockManager axis to DataFrame axis + obj_labels = obj.axes[1 - ax] + if not new_labels.equals(obj_labels): + indexers[ax] = obj_labels.get_indexer(new_labels) + + mgrs_indexers.append((obj._mgr, indexers)) + + new_data = concatenate_managers( + mgrs_indexers, self.new_axes, concat_axis=self.bm_axis, copy=self.copy + ) + if not self.copy: + new_data._consolidate_inplace() + + cons = sample._constructor + return cons(new_data).__finalize__(self, method="concat") + + def _get_result_dim(self) -> int: + if self._is_series and self.bm_axis == 1: + return 2 + else: + return self.objs[0].ndim + + def _get_new_axes(self) -> list[Index]: + ndim = self._get_result_dim() + return [ + self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i) + for i in range(ndim) + ] + + def _get_comb_axis(self, i: int) -> Index: + data_axis = self.objs[0]._get_block_manager_axis(i) + return get_objs_combined_axis( + self.objs, + axis=data_axis, + intersect=self.intersect, + sort=self.sort, + copy=self.copy, + ) + + @cache_readonly + def _get_concat_axis(self) -> Index: + """ + Return index to be used along concatenation axis. + """ + if self._is_series: + if self.bm_axis == 0: + indexes = [x.index for x in self.objs] + elif self.ignore_index: + idx = default_index(len(self.objs)) + return idx + elif self.keys is None: + names: list[Hashable] = [None] * len(self.objs) + num = 0 + has_names = False + for i, x in enumerate(self.objs): + if not isinstance(x, ABCSeries): + raise TypeError( + f"Cannot concatenate type 'Series' with " + f"object of type '{type(x).__name__}'" + ) + if x.name is not None: + names[i] = x.name + has_names = True + else: + names[i] = num + num += 1 + if has_names: + return Index(names) + else: + return default_index(len(self.objs)) + else: + return ensure_index(self.keys).set_names(self.names) + else: + indexes = [x.axes[self.axis] for x in self.objs] + + if self.ignore_index: + idx = default_index(sum(len(i) for i in indexes)) + return idx + + if self.keys is None: + concat_axis = _concat_indexes(indexes) + else: + concat_axis = _make_concat_multiindex( + indexes, self.keys, self.levels, self.names + ) + + self._maybe_check_integrity(concat_axis) + + return concat_axis + + def _maybe_check_integrity(self, concat_index: Index): + if self.verify_integrity: + if not concat_index.is_unique: + overlap = concat_index[concat_index.duplicated()].unique() + raise ValueError(f"Indexes have overlapping values: {overlap}") + + +def _concat_indexes(indexes) -> Index: + return indexes[0].append(indexes[1:]) + + +def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiIndex: + + if (levels is None and isinstance(keys[0], tuple)) or ( + levels is not None and len(levels) > 1 + ): + zipped = list(zip(*keys)) + if names is None: + names = [None] * len(zipped) + + if levels is None: + _, levels = factorize_from_iterables(zipped) + else: + levels = [ensure_index(x) for x in levels] + else: + zipped = [keys] + if names is None: + names = [None] + + if levels is None: + levels = [ensure_index(keys)] + else: + levels = [ensure_index(x) for x in levels] + + if not all_indexes_same(indexes) or not all(level.is_unique for level in levels): + codes_list = [] + + # things are potentially different sizes, so compute the exact codes + # for each level and pass those to MultiIndex.from_arrays + + for hlevel, level in zip(zipped, levels): + to_concat = [] + for key, index in zip(hlevel, indexes): + # Find matching codes, include matching nan values as equal. + mask = (isna(level) & isna(key)) | (level == key) + if not mask.any(): + raise ValueError(f"Key {key} not in level {level}") + i = np.nonzero(mask)[0][0] + + to_concat.append(np.repeat(i, len(index))) + codes_list.append(np.concatenate(to_concat)) + + concat_index = _concat_indexes(indexes) + + # these go at the end + if isinstance(concat_index, MultiIndex): + levels.extend(concat_index.levels) + codes_list.extend(concat_index.codes) + else: + codes, categories = factorize_from_iterable(concat_index) + levels.append(categories) + codes_list.append(codes) + + if len(names) == len(levels): + names = list(names) + else: + # make sure that all of the passed indices have the same nlevels + if not len({idx.nlevels for idx in indexes}) == 1: + raise AssertionError( + "Cannot concat indices that do not have the same number of levels" + ) + + # also copies + names = list(names) + list(get_unanimous_names(*indexes)) + + return MultiIndex( + levels=levels, codes=codes_list, names=names, verify_integrity=False + ) + + new_index = indexes[0] + n = len(new_index) + kpieces = len(indexes) + + # also copies + new_names = list(names) + new_levels = list(levels) + + # construct codes + new_codes = [] + + # do something a bit more speedy + + for hlevel, level in zip(zipped, levels): + hlevel = ensure_index(hlevel) + mapped = level.get_indexer(hlevel) + + mask = mapped == -1 + if mask.any(): + raise ValueError(f"Values not found in passed level: {hlevel[mask]!s}") + + new_codes.append(np.repeat(mapped, n)) + + if isinstance(new_index, MultiIndex): + new_levels.extend(new_index.levels) + new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes]) + else: + new_levels.append(new_index.unique()) + single_codes = new_index.unique().get_indexer(new_index) + new_codes.append(np.tile(single_codes, kpieces)) + + if len(new_names) < len(new_levels): + new_names.extend(new_index.names) + + return MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/melt.py b/.local/lib/python3.8/site-packages/pandas/core/reshape/melt.py new file mode 100644 index 0000000..4d5b115 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/reshape/melt.py @@ -0,0 +1,547 @@ +from __future__ import annotations + +import re +from typing import TYPE_CHECKING +import warnings + +import numpy as np + +from pandas.util._decorators import ( + Appender, + deprecate_kwarg, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_extension_array_dtype, + is_list_like, +) +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.missing import notna + +import pandas.core.algorithms as algos +from pandas.core.arrays import Categorical +import pandas.core.common as com +from pandas.core.indexes.api import ( + Index, + MultiIndex, +) +from pandas.core.reshape.concat import concat +from pandas.core.reshape.util import tile_compat +from pandas.core.shared_docs import _shared_docs +from pandas.core.tools.numeric import to_numeric + +if TYPE_CHECKING: + from pandas import DataFrame + + +@Appender(_shared_docs["melt"] % {"caller": "pd.melt(df, ", "other": "DataFrame.melt"}) +def melt( + frame: DataFrame, + id_vars=None, + value_vars=None, + var_name=None, + value_name="value", + col_level=None, + ignore_index: bool = True, +) -> DataFrame: + # If multiindex, gather names of columns on all level for checking presence + # of `id_vars` and `value_vars` + if isinstance(frame.columns, MultiIndex): + cols = [x for c in frame.columns for x in c] + else: + cols = list(frame.columns) + + if value_name in frame.columns: + warnings.warn( + "This dataframe has a column name that matches the 'value_name' column " + "name of the resulting Dataframe. " + "In the future this will raise an error, please set the 'value_name' " + "parameter of DataFrame.melt to a unique name.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if id_vars is not None: + if not is_list_like(id_vars): + id_vars = [id_vars] + elif isinstance(frame.columns, MultiIndex) and not isinstance(id_vars, list): + raise ValueError( + "id_vars must be a list of tuples when columns are a MultiIndex" + ) + else: + # Check that `id_vars` are in frame + id_vars = list(id_vars) + missing = Index(com.flatten(id_vars)).difference(cols) + if not missing.empty: + raise KeyError( + "The following 'id_vars' are not present " + f"in the DataFrame: {list(missing)}" + ) + else: + id_vars = [] + + if value_vars is not None: + if not is_list_like(value_vars): + value_vars = [value_vars] + elif isinstance(frame.columns, MultiIndex) and not isinstance(value_vars, list): + raise ValueError( + "value_vars must be a list of tuples when columns are a MultiIndex" + ) + else: + value_vars = list(value_vars) + # Check that `value_vars` are in frame + missing = Index(com.flatten(value_vars)).difference(cols) + if not missing.empty: + raise KeyError( + "The following 'value_vars' are not present in " + f"the DataFrame: {list(missing)}" + ) + if col_level is not None: + idx = frame.columns.get_level_values(col_level).get_indexer( + id_vars + value_vars + ) + else: + idx = algos.unique(frame.columns.get_indexer_for(id_vars + value_vars)) + frame = frame.iloc[:, idx] + else: + frame = frame.copy() + + if col_level is not None: # allow list or other? + # frame is a copy + frame.columns = frame.columns.get_level_values(col_level) + + if var_name is None: + if isinstance(frame.columns, MultiIndex): + if len(frame.columns.names) == len(set(frame.columns.names)): + var_name = frame.columns.names + else: + var_name = [f"variable_{i}" for i in range(len(frame.columns.names))] + else: + var_name = [ + frame.columns.name if frame.columns.name is not None else "variable" + ] + if isinstance(var_name, str): + var_name = [var_name] + + N, K = frame.shape + K -= len(id_vars) + + mdata = {} + for col in id_vars: + id_data = frame.pop(col) + if is_extension_array_dtype(id_data): + id_data = concat([id_data] * K, ignore_index=True) + else: + # Incompatible types in assignment (expression has type + # "ndarray[Any, dtype[Any]]", variable has type "Series") [assignment] + id_data = np.tile(id_data._values, K) # type: ignore[assignment] + mdata[col] = id_data + + mcolumns = id_vars + var_name + [value_name] + + # error: Incompatible types in assignment (expression has type "ndarray", + # target has type "Series") + mdata[value_name] = frame._values.ravel("F") # type: ignore[assignment] + for i, col in enumerate(var_name): + # asanyarray will keep the columns as an Index + + # error: Incompatible types in assignment (expression has type "ndarray", target + # has type "Series") + mdata[col] = np.asanyarray( # type: ignore[assignment] + frame.columns._get_level_values(i) + ).repeat(N) + + result = frame._constructor(mdata, columns=mcolumns) + + if not ignore_index: + result.index = tile_compat(frame.index, K) + + return result + + +@deprecate_kwarg(old_arg_name="label", new_arg_name=None) +def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame: + """ + Reshape wide-format data to long. Generalized inverse of DataFrame.pivot. + + Accepts a dictionary, ``groups``, in which each key is a new column name + and each value is a list of old column names that will be "melted" under + the new column name as part of the reshape. + + Parameters + ---------- + data : DataFrame + The wide-format DataFrame. + groups : dict + {new_name : list_of_columns}. + dropna : bool, default True + Do not include columns whose entries are all NaN. + label : None + Not used. + + .. deprecated:: 1.0.0 + + Returns + ------- + DataFrame + Reshaped DataFrame. + + See Also + -------- + melt : Unpivot a DataFrame from wide to long format, optionally leaving + identifiers set. + pivot : Create a spreadsheet-style pivot table as a DataFrame. + DataFrame.pivot : Pivot without aggregation that can handle + non-numeric data. + DataFrame.pivot_table : Generalization of pivot that can handle + duplicate values for one index/column pair. + DataFrame.unstack : Pivot based on the index values instead of a + column. + wide_to_long : Wide panel to long format. Less flexible but more + user-friendly than melt. + + Examples + -------- + >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526], + ... 'team': ['Red Sox', 'Yankees'], + ... 'year1': [2007, 2007], 'year2': [2008, 2008]}) + >>> data + hr1 hr2 team year1 year2 + 0 514 545 Red Sox 2007 2008 + 1 573 526 Yankees 2007 2008 + + >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) + team year hr + 0 Red Sox 2007 514 + 1 Yankees 2007 573 + 2 Red Sox 2008 545 + 3 Yankees 2008 526 + """ + if isinstance(groups, dict): + keys = list(groups.keys()) + values = list(groups.values()) + else: + keys, values = zip(*groups) + + all_cols = list(set.union(*(set(x) for x in values))) + id_cols = list(data.columns.difference(all_cols)) + + K = len(values[0]) + + for seq in values: + if len(seq) != K: + raise ValueError("All column lists must be same length") + + mdata = {} + pivot_cols = [] + + for target, names in zip(keys, values): + to_concat = [data[col]._values for col in names] + + mdata[target] = concat_compat(to_concat) + pivot_cols.append(target) + + for col in id_cols: + mdata[col] = np.tile(data[col]._values, K) + + if dropna: + mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool) + for c in pivot_cols: + mask &= notna(mdata[c]) + if not mask.all(): + mdata = {k: v[mask] for k, v in mdata.items()} + + return data._constructor(mdata, columns=id_cols + pivot_cols) + + +def wide_to_long( + df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+" +) -> DataFrame: + r""" + Unpivot a DataFrame from wide to long format. + + Less flexible but more user-friendly than melt. + + With stubnames ['A', 'B'], this function expects to find one or more + group of columns with format + A-suffix1, A-suffix2,..., B-suffix1, B-suffix2,... + You specify what you want to call this suffix in the resulting long format + with `j` (for example `j='year'`) + + Each row of these wide variables are assumed to be uniquely identified by + `i` (can be a single column name or a list of column names) + + All remaining variables in the data frame are left intact. + + Parameters + ---------- + df : DataFrame + The wide-format DataFrame. + stubnames : str or list-like + The stub name(s). The wide format variables are assumed to + start with the stub names. + i : str or list-like + Column(s) to use as id variable(s). + j : str + The name of the sub-observation variable. What you wish to name your + suffix in the long format. + sep : str, default "" + A character indicating the separation of the variable names + in the wide format, to be stripped from the names in the long format. + For example, if your column names are A-suffix1, A-suffix2, you + can strip the hyphen by specifying `sep='-'`. + suffix : str, default '\\d+' + A regular expression capturing the wanted suffixes. '\\d+' captures + numeric suffixes. Suffixes with no numbers could be specified with the + negated character class '\\D+'. You can also further disambiguate + suffixes, for example, if your wide variables are of the form A-one, + B-two,.., and you have an unrelated column A-rating, you can ignore the + last one by specifying `suffix='(!?one|two)'`. When all suffixes are + numeric, they are cast to int64/float64. + + Returns + ------- + DataFrame + A DataFrame that contains each stub name as a variable, with new index + (i, j). + + See Also + -------- + melt : Unpivot a DataFrame from wide to long format, optionally leaving + identifiers set. + pivot : Create a spreadsheet-style pivot table as a DataFrame. + DataFrame.pivot : Pivot without aggregation that can handle + non-numeric data. + DataFrame.pivot_table : Generalization of pivot that can handle + duplicate values for one index/column pair. + DataFrame.unstack : Pivot based on the index values instead of a + column. + + Notes + ----- + All extra variables are left untouched. This simply uses + `pandas.melt` under the hood, but is hard-coded to "do the right thing" + in a typical case. + + Examples + -------- + >>> np.random.seed(123) + >>> df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"}, + ... "A1980" : {0 : "d", 1 : "e", 2 : "f"}, + ... "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7}, + ... "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1}, + ... "X" : dict(zip(range(3), np.random.randn(3))) + ... }) + >>> df["id"] = df.index + >>> df + A1970 A1980 B1970 B1980 X id + 0 a d 2.5 3.2 -1.085631 0 + 1 b e 1.2 1.3 0.997345 1 + 2 c f 0.7 0.1 0.282978 2 + >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year") + ... # doctest: +NORMALIZE_WHITESPACE + X A B + id year + 0 1970 -1.085631 a 2.5 + 1 1970 0.997345 b 1.2 + 2 1970 0.282978 c 0.7 + 0 1980 -1.085631 d 3.2 + 1 1980 0.997345 e 1.3 + 2 1980 0.282978 f 0.1 + + With multiple id columns + + >>> df = pd.DataFrame({ + ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], + ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], + ... 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], + ... 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] + ... }) + >>> df + famid birth ht1 ht2 + 0 1 1 2.8 3.4 + 1 1 2 2.9 3.8 + 2 1 3 2.2 2.9 + 3 2 1 2.0 3.2 + 4 2 2 1.8 2.8 + 5 2 3 1.9 2.4 + 6 3 1 2.2 3.3 + 7 3 2 2.3 3.4 + 8 3 3 2.1 2.9 + >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age') + >>> l + ... # doctest: +NORMALIZE_WHITESPACE + ht + famid birth age + 1 1 1 2.8 + 2 3.4 + 2 1 2.9 + 2 3.8 + 3 1 2.2 + 2 2.9 + 2 1 1 2.0 + 2 3.2 + 2 1 1.8 + 2 2.8 + 3 1 1.9 + 2 2.4 + 3 1 1 2.2 + 2 3.3 + 2 1 2.3 + 2 3.4 + 3 1 2.1 + 2 2.9 + + Going from long back to wide just takes some creative use of `unstack` + + >>> w = l.unstack() + >>> w.columns = w.columns.map('{0[0]}{0[1]}'.format) + >>> w.reset_index() + famid birth ht1 ht2 + 0 1 1 2.8 3.4 + 1 1 2 2.9 3.8 + 2 1 3 2.2 2.9 + 3 2 1 2.0 3.2 + 4 2 2 1.8 2.8 + 5 2 3 1.9 2.4 + 6 3 1 2.2 3.3 + 7 3 2 2.3 3.4 + 8 3 3 2.1 2.9 + + Less wieldy column names are also handled + + >>> np.random.seed(0) + >>> df = pd.DataFrame({'A(weekly)-2010': np.random.rand(3), + ... 'A(weekly)-2011': np.random.rand(3), + ... 'B(weekly)-2010': np.random.rand(3), + ... 'B(weekly)-2011': np.random.rand(3), + ... 'X' : np.random.randint(3, size=3)}) + >>> df['id'] = df.index + >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS + A(weekly)-2010 A(weekly)-2011 B(weekly)-2010 B(weekly)-2011 X id + 0 0.548814 0.544883 0.437587 0.383442 0 0 + 1 0.715189 0.423655 0.891773 0.791725 1 1 + 2 0.602763 0.645894 0.963663 0.528895 1 2 + + >>> pd.wide_to_long(df, ['A(weekly)', 'B(weekly)'], i='id', + ... j='year', sep='-') + ... # doctest: +NORMALIZE_WHITESPACE + X A(weekly) B(weekly) + id year + 0 2010 0 0.548814 0.437587 + 1 2010 1 0.715189 0.891773 + 2 2010 1 0.602763 0.963663 + 0 2011 0 0.544883 0.383442 + 1 2011 1 0.423655 0.791725 + 2 2011 1 0.645894 0.528895 + + If we have many columns, we could also use a regex to find our + stubnames and pass that list on to wide_to_long + + >>> stubnames = sorted( + ... set([match[0] for match in df.columns.str.findall( + ... r'[A-B]\(.*\)').values if match != []]) + ... ) + >>> list(stubnames) + ['A(weekly)', 'B(weekly)'] + + All of the above examples have integers as suffixes. It is possible to + have non-integers as suffixes. + + >>> df = pd.DataFrame({ + ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], + ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], + ... 'ht_one': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], + ... 'ht_two': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] + ... }) + >>> df + famid birth ht_one ht_two + 0 1 1 2.8 3.4 + 1 1 2 2.9 3.8 + 2 1 3 2.2 2.9 + 3 2 1 2.0 3.2 + 4 2 2 1.8 2.8 + 5 2 3 1.9 2.4 + 6 3 1 2.2 3.3 + 7 3 2 2.3 3.4 + 8 3 3 2.1 2.9 + + >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age', + ... sep='_', suffix=r'\w+') + >>> l + ... # doctest: +NORMALIZE_WHITESPACE + ht + famid birth age + 1 1 one 2.8 + two 3.4 + 2 one 2.9 + two 3.8 + 3 one 2.2 + two 2.9 + 2 1 one 2.0 + two 3.2 + 2 one 1.8 + two 2.8 + 3 one 1.9 + two 2.4 + 3 1 one 2.2 + two 3.3 + 2 one 2.3 + two 3.4 + 3 one 2.1 + two 2.9 + """ + + def get_var_names(df, stub: str, sep: str, suffix: str) -> list[str]: + regex = fr"^{re.escape(stub)}{re.escape(sep)}{suffix}$" + pattern = re.compile(regex) + return [col for col in df.columns if pattern.match(col)] + + def melt_stub(df, stub: str, i, j, value_vars, sep: str): + newdf = melt( + df, + id_vars=i, + value_vars=value_vars, + value_name=stub.rstrip(sep), + var_name=j, + ) + newdf[j] = Categorical(newdf[j]) + newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "", regex=True) + + # GH17627 Cast numerics suffixes to int/float + newdf[j] = to_numeric(newdf[j], errors="ignore") + + return newdf.set_index(i + [j]) + + if not is_list_like(stubnames): + stubnames = [stubnames] + else: + stubnames = list(stubnames) + + if any(col in stubnames for col in df.columns): + raise ValueError("stubname can't be identical to a column name") + + if not is_list_like(i): + i = [i] + else: + i = list(i) + + if df[i].duplicated().any(): + raise ValueError("the id variables need to uniquely identify each row") + + value_vars = [get_var_names(df, stub, sep, suffix) for stub in stubnames] + + value_vars_flattened = [e for sublist in value_vars for e in sublist] + id_vars = list(set(df.columns.tolist()).difference(value_vars_flattened)) + + _melted = [melt_stub(df, s, i, j, v, sep) for s, v in zip(stubnames, value_vars)] + melted = _melted[0].join(_melted[1:], how="outer") + + if len(i) == 1: + new = df[id_vars].set_index(i).join(melted) + return new + + new = df[id_vars].merge(melted.reset_index(), on=i).set_index(i + [j]) + + return new diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/merge.py b/.local/lib/python3.8/site-packages/pandas/core/reshape/merge.py new file mode 100644 index 0000000..3a39713 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/reshape/merge.py @@ -0,0 +1,2356 @@ +""" +SQL-style merge routines +""" +from __future__ import annotations + +import copy +import datetime +from functools import partial +import hashlib +import string +from typing import ( + TYPE_CHECKING, + Hashable, + cast, +) +import warnings + +import numpy as np + +from pandas._libs import ( + Timedelta, + hashtable as libhashtable, + join as libjoin, + lib, +) +from pandas._typing import ( + ArrayLike, + DtypeObj, + IndexLabel, + Suffixes, + npt, +) +from pandas.errors import MergeError +from pandas.util._decorators import ( + Appender, + Substitution, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import find_common_type +from pandas.core.dtypes.common import ( + ensure_float64, + ensure_int64, + ensure_object, + is_array_like, + is_bool, + is_bool_dtype, + is_categorical_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_float_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_number, + is_numeric_dtype, + is_object_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, +) + +from pandas import ( + Categorical, + Index, + MultiIndex, + Series, +) +from pandas.core import groupby +import pandas.core.algorithms as algos +from pandas.core.arrays import ExtensionArray +import pandas.core.common as com +from pandas.core.construction import extract_array +from pandas.core.frame import _merge_doc +from pandas.core.internals import concatenate_managers +from pandas.core.sorting import is_int64_overflow_possible + +if TYPE_CHECKING: + from pandas import DataFrame + from pandas.core.arrays import DatetimeArray + + +@Substitution("\nleft : DataFrame or named Series") +@Appender(_merge_doc, indents=0) +def merge( + left: DataFrame | Series, + right: DataFrame | Series, + how: str = "inner", + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + left_index: bool = False, + right_index: bool = False, + sort: bool = False, + suffixes: Suffixes = ("_x", "_y"), + copy: bool = True, + indicator: bool = False, + validate: str | None = None, +) -> DataFrame: + op = _MergeOperation( + left, + right, + how=how, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + sort=sort, + suffixes=suffixes, + copy=copy, + indicator=indicator, + validate=validate, + ) + return op.get_result() + + +if __debug__: + merge.__doc__ = _merge_doc % "\nleft : DataFrame" + + +def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces): + """ + groupby & merge; we are always performing a left-by type operation + + Parameters + ---------- + by: field to group + left: DataFrame + right: DataFrame + merge_pieces: function for merging + """ + pieces = [] + if not isinstance(by, (list, tuple)): + by = [by] + + lby = left.groupby(by, sort=False) + rby: groupby.DataFrameGroupBy | None = None + + # if we can groupby the rhs + # then we can get vastly better perf + if all(item in right.columns for item in by): + rby = right.groupby(by, sort=False) + + for key, lhs in lby: + + if rby is None: + rhs = right + else: + try: + rhs = right.take(rby.indices[key]) + except KeyError: + # key doesn't exist in left + lcols = lhs.columns.tolist() + cols = lcols + [r for r in right.columns if r not in set(lcols)] + merged = lhs.reindex(columns=cols) + merged.index = range(len(merged)) + pieces.append(merged) + continue + + merged = merge_pieces(lhs, rhs) + + # make sure join keys are in the merged + # TODO, should merge_pieces do this? + merged[by] = key + + pieces.append(merged) + + # preserve the original order + # if we have a missing piece this can be reset + from pandas.core.reshape.concat import concat + + result = concat(pieces, ignore_index=True) + result = result.reindex(columns=pieces[0].columns, copy=False) + return result, lby + + +def merge_ordered( + left: DataFrame, + right: DataFrame, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + left_by=None, + right_by=None, + fill_method: str | None = None, + suffixes: Suffixes = ("_x", "_y"), + how: str = "outer", +) -> DataFrame: + """ + Perform a merge for ordered data with optional filling/interpolation. + + Designed for ordered data like time series data. Optionally + perform group-wise merge (see examples). + + Parameters + ---------- + left : DataFrame + right : DataFrame + on : label or list + Field names to join on. Must be found in both DataFrames. + left_on : label or list, or array-like + Field names to join on in left DataFrame. Can be a vector or list of + vectors of the length of the DataFrame to use a particular vector as + the join key instead of columns. + right_on : label or list, or array-like + Field names to join on in right DataFrame or vector/list of vectors per + left_on docs. + left_by : column name or list of column names + Group left DataFrame by group columns and merge piece by piece with + right DataFrame. + right_by : column name or list of column names + Group right DataFrame by group columns and merge piece by piece with + left DataFrame. + fill_method : {'ffill', None}, default None + Interpolation method for data. + suffixes : list-like, default is ("_x", "_y") + A length-2 sequence where each element is optionally a string + indicating the suffix to add to overlapping column names in + `left` and `right` respectively. Pass a value of `None` instead + of a string to indicate that the column name from `left` or + `right` should be left as-is, with no suffix. At least one of the + values must not be None. + + .. versionchanged:: 0.25.0 + how : {'left', 'right', 'outer', 'inner'}, default 'outer' + * left: use only keys from left frame (SQL: left outer join) + * right: use only keys from right frame (SQL: right outer join) + * outer: use union of keys from both frames (SQL: full outer join) + * inner: use intersection of keys from both frames (SQL: inner join). + + Returns + ------- + DataFrame + The merged DataFrame output type will the be same as + 'left', if it is a subclass of DataFrame. + + See Also + -------- + merge : Merge with a database-style join. + merge_asof : Merge on nearest keys. + + Examples + -------- + >>> df1 = pd.DataFrame( + ... { + ... "key": ["a", "c", "e", "a", "c", "e"], + ... "lvalue": [1, 2, 3, 1, 2, 3], + ... "group": ["a", "a", "a", "b", "b", "b"] + ... } + ... ) + >>> df1 + key lvalue group + 0 a 1 a + 1 c 2 a + 2 e 3 a + 3 a 1 b + 4 c 2 b + 5 e 3 b + + >>> df2 = pd.DataFrame({"key": ["b", "c", "d"], "rvalue": [1, 2, 3]}) + >>> df2 + key rvalue + 0 b 1 + 1 c 2 + 2 d 3 + + >>> merge_ordered(df1, df2, fill_method="ffill", left_by="group") + key lvalue group rvalue + 0 a 1 a NaN + 1 b 1 a 1.0 + 2 c 2 a 2.0 + 3 d 2 a 3.0 + 4 e 3 a 3.0 + 5 a 1 b NaN + 6 b 1 b 1.0 + 7 c 2 b 2.0 + 8 d 2 b 3.0 + 9 e 3 b 3.0 + """ + + def _merger(x, y) -> DataFrame: + # perform the ordered merge operation + op = _OrderedMerge( + x, + y, + on=on, + left_on=left_on, + right_on=right_on, + suffixes=suffixes, + fill_method=fill_method, + how=how, + ) + return op.get_result() + + if left_by is not None and right_by is not None: + raise ValueError("Can only group either left or right frames") + elif left_by is not None: + if isinstance(left_by, str): + left_by = [left_by] + check = set(left_by).difference(left.columns) + if len(check) != 0: + raise KeyError(f"{check} not found in left columns") + result, _ = _groupby_and_merge(left_by, left, right, lambda x, y: _merger(x, y)) + elif right_by is not None: + if isinstance(right_by, str): + right_by = [right_by] + check = set(right_by).difference(right.columns) + if len(check) != 0: + raise KeyError(f"{check} not found in right columns") + result, _ = _groupby_and_merge( + right_by, right, left, lambda x, y: _merger(y, x) + ) + else: + result = _merger(left, right) + return result + + +def merge_asof( + left: DataFrame | Series, + right: DataFrame | Series, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + left_index: bool = False, + right_index: bool = False, + by=None, + left_by=None, + right_by=None, + suffixes: Suffixes = ("_x", "_y"), + tolerance=None, + allow_exact_matches: bool = True, + direction: str = "backward", +) -> DataFrame: + """ + Perform a merge by key distance. + + This is similar to a left-join except that we match on nearest + key rather than equal keys. Both DataFrames must be sorted by the key. + + For each row in the left DataFrame: + + - A "backward" search selects the last row in the right DataFrame whose + 'on' key is less than or equal to the left's key. + + - A "forward" search selects the first row in the right DataFrame whose + 'on' key is greater than or equal to the left's key. + + - A "nearest" search selects the row in the right DataFrame whose 'on' + key is closest in absolute distance to the left's key. + + The default is "backward" and is compatible in versions below 0.20.0. + The direction parameter was added in version 0.20.0 and introduces + "forward" and "nearest". + + Optionally match on equivalent keys with 'by' before searching with 'on'. + + Parameters + ---------- + left : DataFrame or named Series + right : DataFrame or named Series + on : label + Field name to join on. Must be found in both DataFrames. + The data MUST be ordered. Furthermore this must be a numeric column, + such as datetimelike, integer, or float. On or left_on/right_on + must be given. + left_on : label + Field name to join on in left DataFrame. + right_on : label + Field name to join on in right DataFrame. + left_index : bool + Use the index of the left DataFrame as the join key. + right_index : bool + Use the index of the right DataFrame as the join key. + by : column name or list of column names + Match on these columns before performing merge operation. + left_by : column name + Field names to match on in the left DataFrame. + right_by : column name + Field names to match on in the right DataFrame. + suffixes : 2-length sequence (tuple, list, ...) + Suffix to apply to overlapping column names in the left and right + side, respectively. + tolerance : int or Timedelta, optional, default None + Select asof tolerance within this range; must be compatible + with the merge index. + allow_exact_matches : bool, default True + + - If True, allow matching with the same 'on' value + (i.e. less-than-or-equal-to / greater-than-or-equal-to) + - If False, don't match the same 'on' value + (i.e., strictly less-than / strictly greater-than). + + direction : 'backward' (default), 'forward', or 'nearest' + Whether to search for prior, subsequent, or closest matches. + + Returns + ------- + merged : DataFrame + + See Also + -------- + merge : Merge with a database-style join. + merge_ordered : Merge with optional filling/interpolation. + + Examples + -------- + >>> left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + >>> left + a left_val + 0 1 a + 1 5 b + 2 10 c + + >>> right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}) + >>> right + a right_val + 0 1 1 + 1 2 2 + 2 3 3 + 3 6 6 + 4 7 7 + + >>> pd.merge_asof(left, right, on="a") + a left_val right_val + 0 1 a 1 + 1 5 b 3 + 2 10 c 7 + + >>> pd.merge_asof(left, right, on="a", allow_exact_matches=False) + a left_val right_val + 0 1 a NaN + 1 5 b 3.0 + 2 10 c 7.0 + + >>> pd.merge_asof(left, right, on="a", direction="forward") + a left_val right_val + 0 1 a 1.0 + 1 5 b 6.0 + 2 10 c NaN + + >>> pd.merge_asof(left, right, on="a", direction="nearest") + a left_val right_val + 0 1 a 1 + 1 5 b 6 + 2 10 c 7 + + We can use indexed DataFrames as well. + + >>> left = pd.DataFrame({"left_val": ["a", "b", "c"]}, index=[1, 5, 10]) + >>> left + left_val + 1 a + 5 b + 10 c + + >>> right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7]) + >>> right + right_val + 1 1 + 2 2 + 3 3 + 6 6 + 7 7 + + >>> pd.merge_asof(left, right, left_index=True, right_index=True) + left_val right_val + 1 a 1 + 5 b 3 + 10 c 7 + + Here is a real-world times-series example + + >>> quotes = pd.DataFrame( + ... { + ... "time": [ + ... pd.Timestamp("2016-05-25 13:30:00.023"), + ... pd.Timestamp("2016-05-25 13:30:00.023"), + ... pd.Timestamp("2016-05-25 13:30:00.030"), + ... pd.Timestamp("2016-05-25 13:30:00.041"), + ... pd.Timestamp("2016-05-25 13:30:00.048"), + ... pd.Timestamp("2016-05-25 13:30:00.049"), + ... pd.Timestamp("2016-05-25 13:30:00.072"), + ... pd.Timestamp("2016-05-25 13:30:00.075") + ... ], + ... "ticker": [ + ... "GOOG", + ... "MSFT", + ... "MSFT", + ... "MSFT", + ... "GOOG", + ... "AAPL", + ... "GOOG", + ... "MSFT" + ... ], + ... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01], + ... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03] + ... } + ... ) + >>> quotes + time ticker bid ask + 0 2016-05-25 13:30:00.023 GOOG 720.50 720.93 + 1 2016-05-25 13:30:00.023 MSFT 51.95 51.96 + 2 2016-05-25 13:30:00.030 MSFT 51.97 51.98 + 3 2016-05-25 13:30:00.041 MSFT 51.99 52.00 + 4 2016-05-25 13:30:00.048 GOOG 720.50 720.93 + 5 2016-05-25 13:30:00.049 AAPL 97.99 98.01 + 6 2016-05-25 13:30:00.072 GOOG 720.50 720.88 + 7 2016-05-25 13:30:00.075 MSFT 52.01 52.03 + + >>> trades = pd.DataFrame( + ... { + ... "time": [ + ... pd.Timestamp("2016-05-25 13:30:00.023"), + ... pd.Timestamp("2016-05-25 13:30:00.038"), + ... pd.Timestamp("2016-05-25 13:30:00.048"), + ... pd.Timestamp("2016-05-25 13:30:00.048"), + ... pd.Timestamp("2016-05-25 13:30:00.048") + ... ], + ... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], + ... "price": [51.95, 51.95, 720.77, 720.92, 98.0], + ... "quantity": [75, 155, 100, 100, 100] + ... } + ... ) + >>> trades + time ticker price quantity + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 + + By default we are taking the asof of the quotes + + >>> pd.merge_asof(trades, quotes, on="time", by="ticker") + time ticker price quantity bid ask + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN + + We only asof within 2ms between the quote time and the trade time + + >>> pd.merge_asof( + ... trades, quotes, on="time", by="ticker", tolerance=pd.Timedelta("2ms") + ... ) + time ticker price quantity bid ask + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 NaN NaN + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN + + We only asof within 10ms between the quote time and the trade time + and we exclude exact matches on time. However *prior* data will + propagate forward + + >>> pd.merge_asof( + ... trades, + ... quotes, + ... on="time", + ... by="ticker", + ... tolerance=pd.Timedelta("10ms"), + ... allow_exact_matches=False + ... ) + time ticker price quantity bid ask + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 NaN NaN + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 NaN NaN + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 NaN NaN + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN + """ + op = _AsOfMerge( + left, + right, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + by=by, + left_by=left_by, + right_by=right_by, + suffixes=suffixes, + how="asof", + tolerance=tolerance, + allow_exact_matches=allow_exact_matches, + direction=direction, + ) + return op.get_result() + + +# TODO: transformations?? +# TODO: only copy DataFrames when modification necessary +class _MergeOperation: + """ + Perform a database (SQL) merge operation between two DataFrame or Series + objects using either columns as keys or their row indexes + """ + + _merge_type = "merge" + + def __init__( + self, + left: DataFrame | Series, + right: DataFrame | Series, + how: str = "inner", + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + axis: int = 1, + left_index: bool = False, + right_index: bool = False, + sort: bool = True, + suffixes: Suffixes = ("_x", "_y"), + copy: bool = True, + indicator: bool = False, + validate: str | None = None, + ): + _left = _validate_operand(left) + _right = _validate_operand(right) + self.left = self.orig_left = _left + self.right = self.orig_right = _right + self.how = how + + # bm_axis -> the axis on the BlockManager + self.bm_axis = axis + # axis --> the axis on the Series/DataFrame + self.axis = 1 - axis if self.left.ndim == 2 else 0 + + self.on = com.maybe_make_list(on) + self.left_on = com.maybe_make_list(left_on) + self.right_on = com.maybe_make_list(right_on) + + self.copy = copy + self.suffixes = suffixes + self.sort = sort + + self.left_index = left_index + self.right_index = right_index + + self.indicator = indicator + + self.indicator_name: str | None + if isinstance(self.indicator, str): + self.indicator_name = self.indicator + elif isinstance(self.indicator, bool): + self.indicator_name = "_merge" if self.indicator else None + else: + raise ValueError( + "indicator option can only accept boolean or string arguments" + ) + + if not is_bool(left_index): + raise ValueError( + f"left_index parameter must be of type bool, not {type(left_index)}" + ) + if not is_bool(right_index): + raise ValueError( + f"right_index parameter must be of type bool, not {type(right_index)}" + ) + + # warn user when merging between different levels + if _left.columns.nlevels != _right.columns.nlevels: + msg = ( + "merging between different levels is deprecated and will be removed " + f"in a future version. ({left.columns.nlevels} levels on the left, " + f"{right.columns.nlevels} on the right)" + ) + # stacklevel chosen to be correct when this is reached via pd.merge + # (and not DataFrame.join) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + + self._validate_specification() + + cross_col = None + if self.how == "cross": + ( + self.left, + self.right, + self.how, + cross_col, + ) = self._create_cross_configuration(self.left, self.right) + self.left_on = self.right_on = [cross_col] + self._cross = cross_col + + # note this function has side effects + ( + self.left_join_keys, + self.right_join_keys, + self.join_names, + ) = self._get_merge_keys() + + # validate the merge keys dtypes. We may need to coerce + # to avoid incompatible dtypes + self._maybe_coerce_merge_keys() + + # If argument passed to validate, + # check if columns specified as unique + # are in fact unique. + if validate is not None: + self._validate(validate) + + def get_result(self) -> DataFrame: + if self.indicator: + self.left, self.right = self._indicator_pre_merge(self.left, self.right) + + join_index, left_indexer, right_indexer = self._get_join_info() + + llabels, rlabels = _items_overlap_with_suffix( + self.left._info_axis, self.right._info_axis, self.suffixes + ) + + lindexers = {1: left_indexer} if left_indexer is not None else {} + rindexers = {1: right_indexer} if right_indexer is not None else {} + + result_data = concatenate_managers( + [(self.left._mgr, lindexers), (self.right._mgr, rindexers)], + axes=[llabels.append(rlabels), join_index], + concat_axis=0, + copy=self.copy, + ) + + typ = self.left._constructor + result = typ(result_data).__finalize__(self, method=self._merge_type) + + if self.indicator: + result = self._indicator_post_merge(result) + + self._maybe_add_join_keys(result, left_indexer, right_indexer) + + self._maybe_restore_index_levels(result) + + self._maybe_drop_cross_column(result, self._cross) + + return result.__finalize__(self, method="merge") + + def _maybe_drop_cross_column( + self, result: DataFrame, cross_col: str | None + ) -> None: + if cross_col is not None: + del result[cross_col] + + def _indicator_pre_merge( + self, left: DataFrame, right: DataFrame + ) -> tuple[DataFrame, DataFrame]: + + columns = left.columns.union(right.columns) + + for i in ["_left_indicator", "_right_indicator"]: + if i in columns: + raise ValueError( + "Cannot use `indicator=True` option when " + f"data contains a column named {i}" + ) + if self.indicator_name in columns: + raise ValueError( + "Cannot use name of an existing column for indicator column" + ) + + left = left.copy() + right = right.copy() + + left["_left_indicator"] = 1 + left["_left_indicator"] = left["_left_indicator"].astype("int8") + + right["_right_indicator"] = 2 + right["_right_indicator"] = right["_right_indicator"].astype("int8") + + return left, right + + def _indicator_post_merge(self, result: DataFrame) -> DataFrame: + + result["_left_indicator"] = result["_left_indicator"].fillna(0) + result["_right_indicator"] = result["_right_indicator"].fillna(0) + + result[self.indicator_name] = Categorical( + (result["_left_indicator"] + result["_right_indicator"]), + categories=[1, 2, 3], + ) + result[self.indicator_name] = result[self.indicator_name].cat.rename_categories( + ["left_only", "right_only", "both"] + ) + + result = result.drop(labels=["_left_indicator", "_right_indicator"], axis=1) + return result + + def _maybe_restore_index_levels(self, result: DataFrame) -> None: + """ + Restore index levels specified as `on` parameters + + Here we check for cases where `self.left_on` and `self.right_on` pairs + each reference an index level in their respective DataFrames. The + joined columns corresponding to these pairs are then restored to the + index of `result`. + + **Note:** This method has side effects. It modifies `result` in-place + + Parameters + ---------- + result: DataFrame + merge result + + Returns + ------- + None + """ + names_to_restore = [] + for name, left_key, right_key in zip( + self.join_names, self.left_on, self.right_on + ): + if ( + self.orig_left._is_level_reference(left_key) + and self.orig_right._is_level_reference(right_key) + and left_key == right_key + and name not in result.index.names + ): + + names_to_restore.append(name) + + if names_to_restore: + result.set_index(names_to_restore, inplace=True) + + def _maybe_add_join_keys( + self, + result: DataFrame, + left_indexer: np.ndarray | None, + right_indexer: np.ndarray | None, + ) -> None: + + left_has_missing = None + right_has_missing = None + + keys = zip(self.join_names, self.left_on, self.right_on) + for i, (name, lname, rname) in enumerate(keys): + if not _should_fill(lname, rname): + continue + + take_left, take_right = None, None + + if name in result: + + if left_indexer is not None and right_indexer is not None: + if name in self.left: + + if left_has_missing is None: + left_has_missing = (left_indexer == -1).any() + + if left_has_missing: + take_right = self.right_join_keys[i] + + if not is_dtype_equal( + result[name].dtype, self.left[name].dtype + ): + take_left = self.left[name]._values + + elif name in self.right: + + if right_has_missing is None: + right_has_missing = (right_indexer == -1).any() + + if right_has_missing: + take_left = self.left_join_keys[i] + + if not is_dtype_equal( + result[name].dtype, self.right[name].dtype + ): + take_right = self.right[name]._values + + elif left_indexer is not None and is_array_like(self.left_join_keys[i]): + take_left = self.left_join_keys[i] + take_right = self.right_join_keys[i] + + if take_left is not None or take_right is not None: + + if take_left is None: + lvals = result[name]._values + else: + # TODO: can we pin down take_left's type earlier? + take_left = extract_array(take_left, extract_numpy=True) + lfill = na_value_for_dtype(take_left.dtype) + lvals = algos.take_nd(take_left, left_indexer, fill_value=lfill) + + if take_right is None: + rvals = result[name]._values + else: + # TODO: can we pin down take_right's type earlier? + take_right = extract_array(take_right, extract_numpy=True) + rfill = na_value_for_dtype(take_right.dtype) + rvals = algos.take_nd(take_right, right_indexer, fill_value=rfill) + + # if we have an all missing left_indexer + # make sure to just use the right values or vice-versa + mask_left = left_indexer == -1 + mask_right = right_indexer == -1 + # error: Item "bool" of "Union[Any, bool]" has no attribute "all" + if mask_left.all(): # type: ignore[union-attr] + key_col = Index(rvals) + result_dtype = rvals.dtype + # error: Item "bool" of "Union[Any, bool]" has no attribute "all" + elif ( + right_indexer is not None + and mask_right.all() # type: ignore[union-attr] + ): + key_col = Index(lvals) + result_dtype = lvals.dtype + else: + key_col = Index(lvals).where(~mask_left, rvals) + result_dtype = find_common_type([lvals.dtype, rvals.dtype]) + + if result._is_label_reference(name): + result[name] = Series( + key_col, dtype=result_dtype, index=result.index + ) + elif result._is_level_reference(name): + if isinstance(result.index, MultiIndex): + key_col.name = name + idx_list = [ + result.index.get_level_values(level_name) + if level_name != name + else key_col + for level_name in result.index.names + ] + + result.set_index(idx_list, inplace=True) + else: + result.index = Index(key_col, name=name) + else: + result.insert(i, name or f"key_{i}", key_col) + + def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + """return the join indexers""" + return get_join_indexers( + self.left_join_keys, self.right_join_keys, sort=self.sort, how=self.how + ) + + def _get_join_info( + self, + ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + + left_ax = self.left.axes[self.axis] + right_ax = self.right.axes[self.axis] + + if self.left_index and self.right_index and self.how != "asof": + join_index, left_indexer, right_indexer = left_ax.join( + right_ax, how=self.how, return_indexers=True, sort=self.sort + ) + + elif self.right_index and self.how == "left": + join_index, left_indexer, right_indexer = _left_join_on_index( + left_ax, right_ax, self.left_join_keys, sort=self.sort + ) + + elif self.left_index and self.how == "right": + join_index, right_indexer, left_indexer = _left_join_on_index( + right_ax, left_ax, self.right_join_keys, sort=self.sort + ) + else: + (left_indexer, right_indexer) = self._get_join_indexers() + + if self.right_index: + if len(self.left) > 0: + join_index = self._create_join_index( + self.left.index, + self.right.index, + left_indexer, + how="right", + ) + else: + join_index = self.right.index.take(right_indexer) + left_indexer = np.array([-1] * len(join_index), dtype=np.intp) + elif self.left_index: + if self.how == "asof": + # GH#33463 asof should always behave like a left merge + join_index = self._create_join_index( + self.left.index, + self.right.index, + left_indexer, + how="left", + ) + + elif len(self.right) > 0: + join_index = self._create_join_index( + self.right.index, + self.left.index, + right_indexer, + how="left", + ) + else: + join_index = self.left.index.take(left_indexer) + right_indexer = np.array([-1] * len(join_index), dtype=np.intp) + else: + join_index = Index(np.arange(len(left_indexer))) + + if len(join_index) == 0: + join_index = join_index.astype(object) + return join_index, left_indexer, right_indexer + + def _create_join_index( + self, + index: Index, + other_index: Index, + indexer: npt.NDArray[np.intp], + how: str = "left", + ) -> Index: + """ + Create a join index by rearranging one index to match another + + Parameters + ---------- + index : Index being rearranged + other_index : Index used to supply values not found in index + indexer : np.ndarray[np.intp] how to rearrange index + how : str + Replacement is only necessary if indexer based on other_index. + + Returns + ------- + Index + """ + if self.how in (how, "outer") and not isinstance(other_index, MultiIndex): + # if final index requires values in other_index but not target + # index, indexer may hold missing (-1) values, causing Index.take + # to take the final value in target index. So, we set the last + # element to be the desired fill value. We do not use allow_fill + # and fill_value because it throws a ValueError on integer indices + mask = indexer == -1 + if np.any(mask): + fill_value = na_value_for_dtype(index.dtype, compat=False) + index = index.append(Index([fill_value])) + return index.take(indexer) + + def _get_merge_keys(self): + """ + Note: has side effects (copy/delete key columns) + + Parameters + ---------- + left + right + on + + Returns + ------- + left_keys, right_keys + """ + left_keys = [] + right_keys = [] + # error: Need type annotation for 'join_names' (hint: "join_names: List[] + # = ...") + join_names = [] # type: ignore[var-annotated] + right_drop = [] + left_drop = [] + + left, right = self.left, self.right + + is_lkey = lambda x: is_array_like(x) and len(x) == len(left) + is_rkey = lambda x: is_array_like(x) and len(x) == len(right) + + # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A + # user could, for example, request 'left_index' and 'left_by'. In a + # regular pd.merge(), users cannot specify both 'left_index' and + # 'left_on'. (Instead, users have a MultiIndex). That means the + # self.left_on in this function is always empty in a pd.merge(), but + # a pd.merge_asof(left_index=True, left_by=...) will result in a + # self.left_on array with a None in the middle of it. This requires + # a work-around as designated in the code below. + # See _validate_specification() for where this happens. + + # ugh, spaghetti re #733 + if _any(self.left_on) and _any(self.right_on): + for lk, rk in zip(self.left_on, self.right_on): + if is_lkey(lk): + left_keys.append(lk) + if is_rkey(rk): + right_keys.append(rk) + join_names.append(None) # what to do? + else: + if rk is not None: + right_keys.append(right._get_label_or_level_values(rk)) + join_names.append(rk) + else: + # work-around for merge_asof(right_index=True) + right_keys.append(right.index) + join_names.append(right.index.name) + else: + if not is_rkey(rk): + if rk is not None: + right_keys.append(right._get_label_or_level_values(rk)) + else: + # work-around for merge_asof(right_index=True) + right_keys.append(right.index) + if lk is not None and lk == rk: + # avoid key upcast in corner case (length-0) + if len(left) > 0: + right_drop.append(rk) + else: + left_drop.append(lk) + else: + right_keys.append(rk) + if lk is not None: + left_keys.append(left._get_label_or_level_values(lk)) + join_names.append(lk) + else: + # work-around for merge_asof(left_index=True) + left_keys.append(left.index) + join_names.append(left.index.name) + elif _any(self.left_on): + for k in self.left_on: + if is_lkey(k): + left_keys.append(k) + join_names.append(None) + else: + left_keys.append(left._get_label_or_level_values(k)) + join_names.append(k) + if isinstance(self.right.index, MultiIndex): + right_keys = [ + lev._values.take(lev_codes) + for lev, lev_codes in zip( + self.right.index.levels, self.right.index.codes + ) + ] + else: + right_keys = [self.right.index._values] + elif _any(self.right_on): + for k in self.right_on: + if is_rkey(k): + right_keys.append(k) + join_names.append(None) + else: + right_keys.append(right._get_label_or_level_values(k)) + join_names.append(k) + if isinstance(self.left.index, MultiIndex): + left_keys = [ + lev._values.take(lev_codes) + for lev, lev_codes in zip( + self.left.index.levels, self.left.index.codes + ) + ] + else: + left_keys = [self.left.index._values] + + if left_drop: + self.left = self.left._drop_labels_or_levels(left_drop) + + if right_drop: + self.right = self.right._drop_labels_or_levels(right_drop) + + return left_keys, right_keys, join_names + + def _maybe_coerce_merge_keys(self) -> None: + # we have valid merges but we may have to further + # coerce these if they are originally incompatible types + # + # for example if these are categorical, but are not dtype_equal + # or if we have object and integer dtypes + + for lk, rk, name in zip( + self.left_join_keys, self.right_join_keys, self.join_names + ): + if (len(lk) and not len(rk)) or (not len(lk) and len(rk)): + continue + + lk_is_cat = is_categorical_dtype(lk.dtype) + rk_is_cat = is_categorical_dtype(rk.dtype) + lk_is_object = is_object_dtype(lk.dtype) + rk_is_object = is_object_dtype(rk.dtype) + + # if either left or right is a categorical + # then the must match exactly in categories & ordered + if lk_is_cat and rk_is_cat: + if lk._categories_match_up_to_permutation(rk): + continue + + elif lk_is_cat or rk_is_cat: + pass + + elif is_dtype_equal(lk.dtype, rk.dtype): + continue + + msg = ( + f"You are trying to merge on {lk.dtype} and " + f"{rk.dtype} columns. If you wish to proceed you should use pd.concat" + ) + + # if we are numeric, then allow differing + # kinds to proceed, eg. int64 and int8, int and float + # further if we are object, but we infer to + # the same, then proceed + if is_numeric_dtype(lk.dtype) and is_numeric_dtype(rk.dtype): + if lk.dtype.kind == rk.dtype.kind: + continue + + # check whether ints and floats + elif is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype): + if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all(): + warnings.warn( + "You are merging on int and float " + "columns where the float values " + "are not equal to their int representation.", + UserWarning, + ) + continue + + elif is_float_dtype(rk.dtype) and is_integer_dtype(lk.dtype): + if not (rk == rk.astype(lk.dtype))[~np.isnan(rk)].all(): + warnings.warn( + "You are merging on int and float " + "columns where the float values " + "are not equal to their int representation.", + UserWarning, + ) + continue + + # let's infer and see if we are ok + elif lib.infer_dtype(lk, skipna=False) == lib.infer_dtype( + rk, skipna=False + ): + continue + + # Check if we are trying to merge on obviously + # incompatible dtypes GH 9780, GH 15800 + + # bool values are coerced to object + elif (lk_is_object and is_bool_dtype(rk.dtype)) or ( + is_bool_dtype(lk.dtype) and rk_is_object + ): + pass + + # object values are allowed to be merged + elif (lk_is_object and is_numeric_dtype(rk.dtype)) or ( + is_numeric_dtype(lk.dtype) and rk_is_object + ): + inferred_left = lib.infer_dtype(lk, skipna=False) + inferred_right = lib.infer_dtype(rk, skipna=False) + bool_types = ["integer", "mixed-integer", "boolean", "empty"] + string_types = ["string", "unicode", "mixed", "bytes", "empty"] + + # inferred bool + if inferred_left in bool_types and inferred_right in bool_types: + pass + + # unless we are merging non-string-like with string-like + elif ( + inferred_left in string_types and inferred_right not in string_types + ) or ( + inferred_right in string_types and inferred_left not in string_types + ): + raise ValueError(msg) + + # datetimelikes must match exactly + elif needs_i8_conversion(lk.dtype) and not needs_i8_conversion(rk.dtype): + raise ValueError(msg) + elif not needs_i8_conversion(lk.dtype) and needs_i8_conversion(rk.dtype): + raise ValueError(msg) + elif is_datetime64tz_dtype(lk.dtype) and not is_datetime64tz_dtype( + rk.dtype + ): + raise ValueError(msg) + elif not is_datetime64tz_dtype(lk.dtype) and is_datetime64tz_dtype( + rk.dtype + ): + raise ValueError(msg) + + elif lk_is_object and rk_is_object: + continue + + # Houston, we have a problem! + # let's coerce to object if the dtypes aren't + # categorical, otherwise coerce to the category + # dtype. If we coerced categories to object, + # then we would lose type information on some + # columns, and end up trying to merge + # incompatible dtypes. See GH 16900. + if name in self.left.columns: + typ = lk.categories.dtype if lk_is_cat else object + self.left = self.left.copy() + self.left[name] = self.left[name].astype(typ) + if name in self.right.columns: + typ = rk.categories.dtype if rk_is_cat else object + self.right = self.right.copy() + self.right[name] = self.right[name].astype(typ) + + def _create_cross_configuration( + self, left: DataFrame, right: DataFrame + ) -> tuple[DataFrame, DataFrame, str, str]: + """ + Creates the configuration to dispatch the cross operation to inner join, + e.g. adding a join column and resetting parameters. Join column is added + to a new object, no inplace modification + + Parameters + ---------- + left : DataFrame + right : DataFrame + + Returns + ------- + a tuple (left, right, how, cross_col) representing the adjusted + DataFrames with cross_col, the merge operation set to inner and the column + to join over. + """ + cross_col = f"_cross_{hashlib.md5().hexdigest()}" + how = "inner" + return ( + left.assign(**{cross_col: 1}), + right.assign(**{cross_col: 1}), + how, + cross_col, + ) + + def _validate_specification(self) -> None: + if self.how == "cross": + if ( + self.left_index + or self.right_index + or self.right_on is not None + or self.left_on is not None + or self.on is not None + ): + raise MergeError( + "Can not pass on, right_on, left_on or set right_index=True or " + "left_index=True" + ) + return + # Hm, any way to make this logic less complicated?? + elif self.on is None and self.left_on is None and self.right_on is None: + + if self.left_index and self.right_index: + self.left_on, self.right_on = (), () + elif self.left_index: + raise MergeError("Must pass right_on or right_index=True") + elif self.right_index: + raise MergeError("Must pass left_on or left_index=True") + else: + # use the common columns + left_cols = self.left.columns + right_cols = self.right.columns + common_cols = left_cols.intersection(right_cols) + if len(common_cols) == 0: + raise MergeError( + "No common columns to perform merge on. " + f"Merge options: left_on={self.left_on}, " + f"right_on={self.right_on}, " + f"left_index={self.left_index}, " + f"right_index={self.right_index}" + ) + if ( + not left_cols.join(common_cols, how="inner").is_unique + or not right_cols.join(common_cols, how="inner").is_unique + ): + raise MergeError(f"Data columns not unique: {repr(common_cols)}") + self.left_on = self.right_on = common_cols + elif self.on is not None: + if self.left_on is not None or self.right_on is not None: + raise MergeError( + 'Can only pass argument "on" OR "left_on" ' + 'and "right_on", not a combination of both.' + ) + if self.left_index or self.right_index: + raise MergeError( + 'Can only pass argument "on" OR "left_index" ' + 'and "right_index", not a combination of both.' + ) + self.left_on = self.right_on = self.on + elif self.left_on is not None: + if self.left_index: + raise MergeError( + 'Can only pass argument "left_on" OR "left_index" not both.' + ) + if not self.right_index and self.right_on is None: + raise MergeError('Must pass "right_on" OR "right_index".') + n = len(self.left_on) + if self.right_index: + if len(self.left_on) != self.right.index.nlevels: + raise ValueError( + "len(left_on) must equal the number " + 'of levels in the index of "right"' + ) + self.right_on = [None] * n + elif self.right_on is not None: + if self.right_index: + raise MergeError( + 'Can only pass argument "right_on" OR "right_index" not both.' + ) + if not self.left_index and self.left_on is None: + raise MergeError('Must pass "left_on" OR "left_index".') + n = len(self.right_on) + if self.left_index: + if len(self.right_on) != self.left.index.nlevels: + raise ValueError( + "len(right_on) must equal the number " + 'of levels in the index of "left"' + ) + self.left_on = [None] * n + if self.how != "cross" and len(self.right_on) != len(self.left_on): + raise ValueError("len(right_on) must equal len(left_on)") + + def _validate(self, validate: str) -> None: + + # Check uniqueness of each + if self.left_index: + left_unique = self.orig_left.index.is_unique + else: + left_unique = MultiIndex.from_arrays(self.left_join_keys).is_unique + + if self.right_index: + right_unique = self.orig_right.index.is_unique + else: + right_unique = MultiIndex.from_arrays(self.right_join_keys).is_unique + + # Check data integrity + if validate in ["one_to_one", "1:1"]: + if not left_unique and not right_unique: + raise MergeError( + "Merge keys are not unique in either left " + "or right dataset; not a one-to-one merge" + ) + elif not left_unique: + raise MergeError( + "Merge keys are not unique in left dataset; not a one-to-one merge" + ) + elif not right_unique: + raise MergeError( + "Merge keys are not unique in right dataset; not a one-to-one merge" + ) + + elif validate in ["one_to_many", "1:m"]: + if not left_unique: + raise MergeError( + "Merge keys are not unique in left dataset; not a one-to-many merge" + ) + + elif validate in ["many_to_one", "m:1"]: + if not right_unique: + raise MergeError( + "Merge keys are not unique in right dataset; " + "not a many-to-one merge" + ) + + elif validate in ["many_to_many", "m:m"]: + pass + + else: + raise ValueError("Not a valid argument for validate") + + +def get_join_indexers( + left_keys, right_keys, sort: bool = False, how: str = "inner", **kwargs +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + """ + + Parameters + ---------- + left_keys : ndarray, Index, Series + right_keys : ndarray, Index, Series + sort : bool, default False + how : {'inner', 'outer', 'left', 'right'}, default 'inner' + + Returns + ------- + np.ndarray[np.intp] + Indexer into the left_keys. + np.ndarray[np.intp] + Indexer into the right_keys. + """ + assert len(left_keys) == len( + right_keys + ), "left_key and right_keys must be the same length" + + # get left & right join labels and num. of levels at each location + mapped = ( + _factorize_keys(left_keys[n], right_keys[n], sort=sort, how=how) + for n in range(len(left_keys)) + ) + zipped = zip(*mapped) + llab, rlab, shape = (list(x) for x in zipped) + + # get flat i8 keys from label lists + lkey, rkey = _get_join_keys(llab, rlab, shape, sort) + + # factorize keys to a dense i8 space + # `count` is the num. of unique keys + # set(lkey) | set(rkey) == range(count) + + lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort, how=how) + # preserve left frame order if how == 'left' and sort == False + kwargs = copy.copy(kwargs) + if how in ("left", "right"): + kwargs["sort"] = sort + join_func = { + "inner": libjoin.inner_join, + "left": libjoin.left_outer_join, + "right": lambda x, y, count, **kwargs: libjoin.left_outer_join( + y, x, count, **kwargs + )[::-1], + "outer": libjoin.full_outer_join, + }[how] + + # error: Cannot call function of unknown type + return join_func(lkey, rkey, count, **kwargs) # type: ignore[operator] + + +def restore_dropped_levels_multijoin( + left: MultiIndex, + right: MultiIndex, + dropped_level_names, + join_index: Index, + lindexer: npt.NDArray[np.intp], + rindexer: npt.NDArray[np.intp], +) -> tuple[list[Index], npt.NDArray[np.intp], list[Hashable]]: + """ + *this is an internal non-public method* + + Returns the levels, labels and names of a multi-index to multi-index join. + Depending on the type of join, this method restores the appropriate + dropped levels of the joined multi-index. + The method relies on lidx, rindexer which hold the index positions of + left and right, where a join was feasible + + Parameters + ---------- + left : MultiIndex + left index + right : MultiIndex + right index + dropped_level_names : str array + list of non-common level names + join_index : Index + the index of the join between the + common levels of left and right + lindexer : np.ndarray[np.intp] + left indexer + rindexer : np.ndarray[np.intp] + right indexer + + Returns + ------- + levels : list of Index + levels of combined multiindexes + labels : np.ndarray[np.intp] + labels of combined multiindexes + names : List[Hashable] + names of combined multiindex levels + + """ + + def _convert_to_multiindex(index: Index) -> MultiIndex: + if isinstance(index, MultiIndex): + return index + else: + return MultiIndex.from_arrays([index._values], names=[index.name]) + + # For multi-multi joins with one overlapping level, + # the returned index if of type Index + # Assure that join_index is of type MultiIndex + # so that dropped levels can be appended + join_index = _convert_to_multiindex(join_index) + + join_levels = join_index.levels + join_codes = join_index.codes + join_names = join_index.names + + # lindexer and rindexer hold the indexes where the join occurred + # for left and right respectively. If left/right is None then + # the join occurred on all indices of left/right + if lindexer is None: + lindexer = range(left.size) + + if rindexer is None: + rindexer = range(right.size) + + # Iterate through the levels that must be restored + for dropped_level_name in dropped_level_names: + if dropped_level_name in left.names: + idx = left + indexer = lindexer + else: + idx = right + indexer = rindexer + + # The index of the level name to be restored + name_idx = idx.names.index(dropped_level_name) + + restore_levels = idx.levels[name_idx] + # Inject -1 in the codes list where a join was not possible + # IOW indexer[i]=-1 + codes = idx.codes[name_idx] + restore_codes = algos.take_nd(codes, indexer, fill_value=-1) + + join_levels = join_levels + [restore_levels] + join_codes = join_codes + [restore_codes] + join_names = join_names + [dropped_level_name] + + return join_levels, join_codes, join_names + + +class _OrderedMerge(_MergeOperation): + _merge_type = "ordered_merge" + + def __init__( + self, + left: DataFrame | Series, + right: DataFrame | Series, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + left_index: bool = False, + right_index: bool = False, + axis: int = 1, + suffixes: Suffixes = ("_x", "_y"), + copy: bool = True, + fill_method: str | None = None, + how: str = "outer", + ): + + self.fill_method = fill_method + _MergeOperation.__init__( + self, + left, + right, + on=on, + left_on=left_on, + left_index=left_index, + right_index=right_index, + right_on=right_on, + axis=axis, + how=how, + suffixes=suffixes, + sort=True, # factorize sorts + ) + + def get_result(self) -> DataFrame: + join_index, left_indexer, right_indexer = self._get_join_info() + + llabels, rlabels = _items_overlap_with_suffix( + self.left._info_axis, self.right._info_axis, self.suffixes + ) + + left_join_indexer: np.ndarray | None + right_join_indexer: np.ndarray | None + + if self.fill_method == "ffill": + if left_indexer is None: + raise TypeError("left_indexer cannot be None") + left_indexer, right_indexer = cast(np.ndarray, left_indexer), cast( + np.ndarray, right_indexer + ) + left_join_indexer = libjoin.ffill_indexer(left_indexer) + right_join_indexer = libjoin.ffill_indexer(right_indexer) + else: + left_join_indexer = left_indexer + right_join_indexer = right_indexer + + lindexers = {1: left_join_indexer} if left_join_indexer is not None else {} + rindexers = {1: right_join_indexer} if right_join_indexer is not None else {} + + result_data = concatenate_managers( + [(self.left._mgr, lindexers), (self.right._mgr, rindexers)], + axes=[llabels.append(rlabels), join_index], + concat_axis=0, + copy=self.copy, + ) + + typ = self.left._constructor + result = typ(result_data) + + self._maybe_add_join_keys(result, left_indexer, right_indexer) + + return result + + +def _asof_function(direction: str): + name = f"asof_join_{direction}" + return getattr(libjoin, name, None) + + +def _asof_by_function(direction: str): + name = f"asof_join_{direction}_on_X_by_Y" + return getattr(libjoin, name, None) + + +_type_casters = { + "int64_t": ensure_int64, + "double": ensure_float64, + "object": ensure_object, +} + + +def _get_cython_type_upcast(dtype: DtypeObj) -> str: + """Upcast a dtype to 'int64_t', 'double', or 'object'""" + if is_integer_dtype(dtype): + return "int64_t" + elif is_float_dtype(dtype): + return "double" + else: + return "object" + + +class _AsOfMerge(_OrderedMerge): + _merge_type = "asof_merge" + + def __init__( + self, + left: DataFrame | Series, + right: DataFrame | Series, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + left_index: bool = False, + right_index: bool = False, + by=None, + left_by=None, + right_by=None, + axis: int = 1, + suffixes: Suffixes = ("_x", "_y"), + copy: bool = True, + fill_method: str | None = None, + how: str = "asof", + tolerance=None, + allow_exact_matches: bool = True, + direction: str = "backward", + ): + + self.by = by + self.left_by = left_by + self.right_by = right_by + self.tolerance = tolerance + self.allow_exact_matches = allow_exact_matches + self.direction = direction + + _OrderedMerge.__init__( + self, + left, + right, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + axis=axis, + how=how, + suffixes=suffixes, + fill_method=fill_method, + ) + + def _validate_specification(self) -> None: + super()._validate_specification() + + # we only allow on to be a single item for on + if len(self.left_on) != 1 and not self.left_index: + raise MergeError("can only asof on a key for left") + + if len(self.right_on) != 1 and not self.right_index: + raise MergeError("can only asof on a key for right") + + if self.left_index and isinstance(self.left.index, MultiIndex): + raise MergeError("left can only have one index") + + if self.right_index and isinstance(self.right.index, MultiIndex): + raise MergeError("right can only have one index") + + # set 'by' columns + if self.by is not None: + if self.left_by is not None or self.right_by is not None: + raise MergeError("Can only pass by OR left_by and right_by") + self.left_by = self.right_by = self.by + if self.left_by is None and self.right_by is not None: + raise MergeError("missing left_by") + if self.left_by is not None and self.right_by is None: + raise MergeError("missing right_by") + + # GH#29130 Check that merge keys do not have dtype object + if not self.left_index: + left_on = self.left_on[0] + if is_array_like(left_on): + lo_dtype = left_on.dtype + else: + lo_dtype = ( + self.left[left_on].dtype + if left_on in self.left.columns + else self.left.index.get_level_values(left_on) + ) + else: + lo_dtype = self.left.index.dtype + + if not self.right_index: + right_on = self.right_on[0] + if is_array_like(right_on): + ro_dtype = right_on.dtype + else: + ro_dtype = ( + self.right[right_on].dtype + if right_on in self.right.columns + else self.right.index.get_level_values(right_on) + ) + else: + ro_dtype = self.right.index.dtype + + if is_object_dtype(lo_dtype) or is_object_dtype(ro_dtype): + raise MergeError( + f"Incompatible merge dtype, {repr(ro_dtype)} and " + f"{repr(lo_dtype)}, both sides must have numeric dtype" + ) + + # add 'by' to our key-list so we can have it in the + # output as a key + if self.left_by is not None: + if not is_list_like(self.left_by): + self.left_by = [self.left_by] + if not is_list_like(self.right_by): + self.right_by = [self.right_by] + + if len(self.left_by) != len(self.right_by): + raise MergeError("left_by and right_by must be same length") + + self.left_on = self.left_by + list(self.left_on) + self.right_on = self.right_by + list(self.right_on) + + # check 'direction' is valid + if self.direction not in ["backward", "forward", "nearest"]: + raise MergeError(f"direction invalid: {self.direction}") + + def _get_merge_keys(self): + + # note this function has side effects + (left_join_keys, right_join_keys, join_names) = super()._get_merge_keys() + + # validate index types are the same + for i, (lk, rk) in enumerate(zip(left_join_keys, right_join_keys)): + if not is_dtype_equal(lk.dtype, rk.dtype): + if is_categorical_dtype(lk.dtype) and is_categorical_dtype(rk.dtype): + # The generic error message is confusing for categoricals. + # + # In this function, the join keys include both the original + # ones of the merge_asof() call, and also the keys passed + # to its by= argument. Unordered but equal categories + # are not supported for the former, but will fail + # later with a ValueError, so we don't *need* to check + # for them here. + msg = ( + f"incompatible merge keys [{i}] {repr(lk.dtype)} and " + f"{repr(rk.dtype)}, both sides category, but not equal ones" + ) + else: + msg = ( + f"incompatible merge keys [{i}] {repr(lk.dtype)} and " + f"{repr(rk.dtype)}, must be the same type" + ) + raise MergeError(msg) + + # validate tolerance; datetime.timedelta or Timedelta if we have a DTI + if self.tolerance is not None: + + if self.left_index: + lt = self.left.index + else: + lt = left_join_keys[-1] + + msg = ( + f"incompatible tolerance {self.tolerance}, must be compat " + f"with type {repr(lt.dtype)}" + ) + + if needs_i8_conversion(lt): + if not isinstance(self.tolerance, datetime.timedelta): + raise MergeError(msg) + if self.tolerance < Timedelta(0): + raise MergeError("tolerance must be positive") + + elif is_integer_dtype(lt): + if not is_integer(self.tolerance): + raise MergeError(msg) + if self.tolerance < 0: + raise MergeError("tolerance must be positive") + + elif is_float_dtype(lt): + if not is_number(self.tolerance): + raise MergeError(msg) + if self.tolerance < 0: + raise MergeError("tolerance must be positive") + + else: + raise MergeError("key must be integer, timestamp or float") + + # validate allow_exact_matches + if not is_bool(self.allow_exact_matches): + msg = ( + "allow_exact_matches must be boolean, " + f"passed {self.allow_exact_matches}" + ) + raise MergeError(msg) + + return left_join_keys, right_join_keys, join_names + + def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + """return the join indexers""" + + def flip(xs) -> np.ndarray: + """unlike np.transpose, this returns an array of tuples""" + # error: Item "ndarray" of "Union[Any, Union[ExtensionArray, ndarray]]" has + # no attribute "_values_for_argsort" + xs = [ + x + if not is_extension_array_dtype(x) + else extract_array(x)._values_for_argsort() # type: ignore[union-attr] + for x in xs + ] + labels = list(string.ascii_lowercase[: len(xs)]) + dtypes = [x.dtype for x in xs] + labeled_dtypes = list(zip(labels, dtypes)) + return np.array(list(zip(*xs)), labeled_dtypes) + + # values to compare + left_values = ( + self.left.index._values if self.left_index else self.left_join_keys[-1] + ) + right_values = ( + self.right.index._values if self.right_index else self.right_join_keys[-1] + ) + tolerance = self.tolerance + + # we require sortedness and non-null values in the join keys + if not Index(left_values).is_monotonic: + side = "left" + if isna(left_values).any(): + raise ValueError(f"Merge keys contain null values on {side} side") + else: + raise ValueError(f"{side} keys must be sorted") + + if not Index(right_values).is_monotonic: + side = "right" + if isna(right_values).any(): + raise ValueError(f"Merge keys contain null values on {side} side") + else: + raise ValueError(f"{side} keys must be sorted") + + # initial type conversion as needed + if needs_i8_conversion(left_values): + left_values = left_values.view("i8") + right_values = right_values.view("i8") + if tolerance is not None: + tolerance = Timedelta(tolerance) + tolerance = tolerance.value + + # a "by" parameter requires special handling + if self.left_by is not None: + # remove 'on' parameter from values if one existed + if self.left_index and self.right_index: + left_by_values = self.left_join_keys + right_by_values = self.right_join_keys + else: + left_by_values = self.left_join_keys[0:-1] + right_by_values = self.right_join_keys[0:-1] + + # get tuple representation of values if more than one + if len(left_by_values) == 1: + left_by_values = left_by_values[0] + right_by_values = right_by_values[0] + else: + left_by_values = flip(left_by_values) + right_by_values = flip(right_by_values) + + # upcast 'by' parameter because HashTable is limited + by_type = _get_cython_type_upcast(left_by_values.dtype) + by_type_caster = _type_casters[by_type] + # error: Cannot call function of unknown type + left_by_values = by_type_caster(left_by_values) # type: ignore[operator] + # error: Cannot call function of unknown type + right_by_values = by_type_caster(right_by_values) # type: ignore[operator] + + # choose appropriate function by type + func = _asof_by_function(self.direction) + return func( + left_values, + right_values, + left_by_values, + right_by_values, + self.allow_exact_matches, + tolerance, + ) + else: + # choose appropriate function by type + func = _asof_function(self.direction) + return func(left_values, right_values, self.allow_exact_matches, tolerance) + + +def _get_multiindex_indexer( + join_keys, index: MultiIndex, sort: bool +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + + # left & right join labels and num. of levels at each location + mapped = ( + _factorize_keys(index.levels[n], join_keys[n], sort=sort) + for n in range(index.nlevels) + ) + zipped = zip(*mapped) + rcodes, lcodes, shape = (list(x) for x in zipped) + if sort: + rcodes = list(map(np.take, rcodes, index.codes)) + else: + i8copy = lambda a: a.astype("i8", subok=False, copy=True) + rcodes = list(map(i8copy, index.codes)) + + # fix right labels if there were any nulls + for i in range(len(join_keys)): + mask = index.codes[i] == -1 + if mask.any(): + # check if there already was any nulls at this location + # if there was, it is factorized to `shape[i] - 1` + a = join_keys[i][lcodes[i] == shape[i] - 1] + if a.size == 0 or not a[0] != a[0]: + shape[i] += 1 + + rcodes[i][mask] = shape[i] - 1 + + # get flat i8 join keys + lkey, rkey = _get_join_keys(lcodes, rcodes, shape, sort) + + # factorize keys to a dense i8 space + lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) + + return libjoin.left_outer_join(lkey, rkey, count, sort=sort) + + +def _get_single_indexer( + join_key, index: Index, sort: bool = False +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + left_key, right_key, count = _factorize_keys(join_key, index._values, sort=sort) + + return libjoin.left_outer_join(left_key, right_key, count, sort=sort) + + +def _left_join_on_index( + left_ax: Index, right_ax: Index, join_keys, sort: bool = False +) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp]]: + if len(join_keys) > 1: + if not ( + isinstance(right_ax, MultiIndex) and len(join_keys) == right_ax.nlevels + ): + raise AssertionError( + "If more than one join key is given then " + "'right_ax' must be a MultiIndex and the " + "number of join keys must be the number of levels in right_ax" + ) + + left_indexer, right_indexer = _get_multiindex_indexer( + join_keys, right_ax, sort=sort + ) + else: + jkey = join_keys[0] + + left_indexer, right_indexer = _get_single_indexer(jkey, right_ax, sort=sort) + + if sort or len(left_ax) != len(left_indexer): + # if asked to sort or there are 1-to-many matches + join_index = left_ax.take(left_indexer) + return join_index, left_indexer, right_indexer + + # left frame preserves order & length of its index + return left_ax, None, right_indexer + + +def _factorize_keys( + lk: ArrayLike, rk: ArrayLike, sort: bool = True, how: str = "inner" +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]: + """ + Encode left and right keys as enumerated types. + + This is used to get the join indexers to be used when merging DataFrames. + + Parameters + ---------- + lk : array-like + Left key. + rk : array-like + Right key. + sort : bool, defaults to True + If True, the encoding is done such that the unique elements in the + keys are sorted. + how : {‘left’, ‘right’, ‘outer’, ‘inner’}, default ‘inner’ + Type of merge. + + Returns + ------- + np.ndarray[np.intp] + Left (resp. right if called with `key='right'`) labels, as enumerated type. + np.ndarray[np.intp] + Right (resp. left if called with `key='right'`) labels, as enumerated type. + int + Number of unique elements in union of left and right labels. + + See Also + -------- + merge : Merge DataFrame or named Series objects + with a database-style join. + algorithms.factorize : Encode the object as an enumerated type + or categorical variable. + + Examples + -------- + >>> lk = np.array(["a", "c", "b"]) + >>> rk = np.array(["a", "c"]) + + Here, the unique values are `'a', 'b', 'c'`. With the default + `sort=True`, the encoding will be `{0: 'a', 1: 'b', 2: 'c'}`: + + >>> pd.core.reshape.merge._factorize_keys(lk, rk) + (array([0, 2, 1]), array([0, 2]), 3) + + With the `sort=False`, the encoding will correspond to the order + in which the unique elements first appear: `{0: 'a', 1: 'c', 2: 'b'}`: + + >>> pd.core.reshape.merge._factorize_keys(lk, rk, sort=False) + (array([0, 1, 2]), array([0, 1]), 3) + """ + # Some pre-processing for non-ndarray lk / rk + lk = extract_array(lk, extract_numpy=True, extract_range=True) + rk = extract_array(rk, extract_numpy=True, extract_range=True) + # TODO: if either is a RangeIndex, we can likely factorize more efficiently? + + if is_datetime64tz_dtype(lk.dtype) and is_datetime64tz_dtype(rk.dtype): + # Extract the ndarray (UTC-localized) values + # Note: we dont need the dtypes to match, as these can still be compared + lk = cast("DatetimeArray", lk)._ndarray + rk = cast("DatetimeArray", rk)._ndarray + + elif ( + is_categorical_dtype(lk.dtype) + and is_categorical_dtype(rk.dtype) + and is_dtype_equal(lk.dtype, rk.dtype) + ): + assert isinstance(lk, Categorical) + assert isinstance(rk, Categorical) + # Cast rk to encoding so we can compare codes with lk + + rk = lk._encode_with_my_categories(rk) + + lk = ensure_int64(lk.codes) + rk = ensure_int64(rk.codes) + + elif isinstance(lk, ExtensionArray) and is_dtype_equal(lk.dtype, rk.dtype): + # error: Incompatible types in assignment (expression has type "ndarray", + # variable has type "ExtensionArray") + lk, _ = lk._values_for_factorize() + + # error: Item "ndarray" of "Union[Any, ndarray]" has no attribute + # "_values_for_factorize" + rk, _ = rk._values_for_factorize() # type: ignore[union-attr] + + klass: type[libhashtable.Factorizer] | type[libhashtable.Int64Factorizer] + if is_integer_dtype(lk.dtype) and is_integer_dtype(rk.dtype): + # GH#23917 TODO: needs tests for case where lk is integer-dtype + # and rk is datetime-dtype + klass = libhashtable.Int64Factorizer + lk = ensure_int64(np.asarray(lk)) + rk = ensure_int64(np.asarray(rk)) + + elif needs_i8_conversion(lk.dtype) and is_dtype_equal(lk.dtype, rk.dtype): + # GH#23917 TODO: Needs tests for non-matching dtypes + klass = libhashtable.Int64Factorizer + lk = ensure_int64(np.asarray(lk, dtype=np.int64)) + rk = ensure_int64(np.asarray(rk, dtype=np.int64)) + + else: + klass = libhashtable.ObjectFactorizer + lk = ensure_object(lk) + rk = ensure_object(rk) + + rizer = klass(max(len(lk), len(rk))) + + # Argument 1 to "factorize" of "ObjectFactorizer" has incompatible type + # "Union[ndarray[Any, dtype[signedinteger[_64Bit]]], + # ndarray[Any, dtype[object_]]]"; expected "ndarray[Any, dtype[object_]]" + llab = rizer.factorize(lk) # type: ignore[arg-type] + # Argument 1 to "factorize" of "ObjectFactorizer" has incompatible type + # "Union[ndarray[Any, dtype[signedinteger[_64Bit]]], + # ndarray[Any, dtype[object_]]]"; expected "ndarray[Any, dtype[object_]]" + rlab = rizer.factorize(rk) # type: ignore[arg-type] + assert llab.dtype == np.dtype(np.intp), llab.dtype + assert rlab.dtype == np.dtype(np.intp), rlab.dtype + + count = rizer.get_count() + + if sort: + uniques = rizer.uniques.to_array() + llab, rlab = _sort_labels(uniques, llab, rlab) + + # NA group + lmask = llab == -1 + lany = lmask.any() + rmask = rlab == -1 + rany = rmask.any() + + if lany or rany: + if lany: + np.putmask(llab, lmask, count) + if rany: + np.putmask(rlab, rmask, count) + count += 1 + + if how == "right": + return rlab, llab, count + return llab, rlab, count + + +def _sort_labels( + uniques: np.ndarray, left: np.ndarray, right: np.ndarray +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + + llength = len(left) + labels = np.concatenate([left, right]) + + _, new_labels = algos.safe_sort(uniques, labels, na_sentinel=-1) + assert new_labels.dtype == np.intp + new_left, new_right = new_labels[:llength], new_labels[llength:] + + return new_left, new_right + + +def _get_join_keys(llab, rlab, shape, sort: bool): + + # how many levels can be done without overflow + nlev = next( + lev + for lev in range(len(shape), 0, -1) + if not is_int64_overflow_possible(shape[:lev]) + ) + + # get keys for the first `nlev` levels + stride = np.prod(shape[1:nlev], dtype="i8") + lkey = stride * llab[0].astype("i8", subok=False, copy=False) + rkey = stride * rlab[0].astype("i8", subok=False, copy=False) + + for i in range(1, nlev): + with np.errstate(divide="ignore"): + stride //= shape[i] + lkey += llab[i] * stride + rkey += rlab[i] * stride + + if nlev == len(shape): # all done! + return lkey, rkey + + # densify current keys to avoid overflow + lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) + + llab = [lkey] + llab[nlev:] + rlab = [rkey] + rlab[nlev:] + shape = [count] + shape[nlev:] + + return _get_join_keys(llab, rlab, shape, sort) + + +def _should_fill(lname, rname) -> bool: + if not isinstance(lname, str) or not isinstance(rname, str): + return True + return lname == rname + + +def _any(x) -> bool: + return x is not None and com.any_not_none(*x) + + +def _validate_operand(obj: DataFrame | Series) -> DataFrame: + if isinstance(obj, ABCDataFrame): + return obj + elif isinstance(obj, ABCSeries): + if obj.name is None: + raise ValueError("Cannot merge a Series without a name") + else: + return obj.to_frame() + else: + raise TypeError( + f"Can only merge Series or DataFrame objects, a {type(obj)} was passed" + ) + + +def _items_overlap_with_suffix( + left: Index, right: Index, suffixes: Suffixes +) -> tuple[Index, Index]: + """ + Suffixes type validation. + + If two indices overlap, add suffixes to overlapping entries. + + If corresponding suffix is empty, the entry is simply converted to string. + + """ + if not is_list_like(suffixes, allow_sets=False): + warnings.warn( + f"Passing 'suffixes' as a {type(suffixes)}, is not supported and may give " + "unexpected results. Provide 'suffixes' as a tuple instead. In the " + "future a 'TypeError' will be raised.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + to_rename = left.intersection(right) + if len(to_rename) == 0: + return left, right + + lsuffix, rsuffix = suffixes + + if not lsuffix and not rsuffix: + raise ValueError(f"columns overlap but no suffix specified: {to_rename}") + + def renamer(x, suffix): + """ + Rename the left and right indices. + + If there is overlap, and suffix is not None, add + suffix, otherwise, leave it as-is. + + Parameters + ---------- + x : original column name + suffix : str or None + + Returns + ------- + x : renamed column name + """ + if x in to_rename and suffix is not None: + return f"{x}{suffix}" + return x + + lrenamer = partial(renamer, suffix=lsuffix) + rrenamer = partial(renamer, suffix=rsuffix) + + llabels = left._transform_index(lrenamer) + rlabels = right._transform_index(rrenamer) + + dups = [] + if not llabels.is_unique: + # Only warn when duplicates are caused because of suffixes, already duplicated + # columns in origin should not warn + dups = llabels[(llabels.duplicated()) & (~left.duplicated())].tolist() + if not rlabels.is_unique: + dups.extend(rlabels[(rlabels.duplicated()) & (~right.duplicated())].tolist()) + if dups: + warnings.warn( + f"Passing 'suffixes' which cause duplicate columns {set(dups)} in the " + f"result is deprecated and will raise a MergeError in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return llabels, rlabels diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/pivot.py b/.local/lib/python3.8/site-packages/pandas/core/reshape/pivot.py new file mode 100644 index 0000000..b428155 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/reshape/pivot.py @@ -0,0 +1,842 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Callable, + Hashable, + Sequence, + cast, +) + +import numpy as np + +from pandas._typing import ( + AggFuncType, + AggFuncTypeBase, + AggFuncTypeDict, + IndexLabel, +) +from pandas.util._decorators import ( + Appender, + Substitution, +) + +from pandas.core.dtypes.cast import maybe_downcast_to_dtype +from pandas.core.dtypes.common import ( + is_integer_dtype, + is_list_like, + is_nested_list_like, + is_scalar, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +import pandas.core.common as com +from pandas.core.frame import _shared_docs +from pandas.core.groupby import Grouper +from pandas.core.indexes.api import ( + Index, + MultiIndex, + get_objs_combined_axis, +) +from pandas.core.reshape.concat import concat +from pandas.core.reshape.util import cartesian_product +from pandas.core.series import Series + +if TYPE_CHECKING: + from pandas import DataFrame + + +# Note: We need to make sure `frame` is imported before `pivot`, otherwise +# _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency +@Substitution("\ndata : DataFrame") +@Appender(_shared_docs["pivot_table"], indents=1) +def pivot_table( + data: DataFrame, + values=None, + index=None, + columns=None, + aggfunc: AggFuncType = "mean", + fill_value=None, + margins: bool = False, + dropna: bool = True, + margins_name: str = "All", + observed: bool = False, + sort: bool = True, +) -> DataFrame: + index = _convert_by(index) + columns = _convert_by(columns) + + if isinstance(aggfunc, list): + pieces: list[DataFrame] = [] + keys = [] + for func in aggfunc: + _table = __internal_pivot_table( + data, + values=values, + index=index, + columns=columns, + fill_value=fill_value, + aggfunc=func, + margins=margins, + dropna=dropna, + margins_name=margins_name, + observed=observed, + sort=sort, + ) + pieces.append(_table) + keys.append(getattr(func, "__name__", func)) + + table = concat(pieces, keys=keys, axis=1) + return table.__finalize__(data, method="pivot_table") + + table = __internal_pivot_table( + data, + values, + index, + columns, + aggfunc, + fill_value, + margins, + dropna, + margins_name, + observed, + sort, + ) + return table.__finalize__(data, method="pivot_table") + + +def __internal_pivot_table( + data: DataFrame, + values, + index, + columns, + aggfunc: AggFuncTypeBase | AggFuncTypeDict, + fill_value, + margins: bool, + dropna: bool, + margins_name: str, + observed: bool, + sort: bool, +) -> DataFrame: + """ + Helper of :func:`pandas.pivot_table` for any non-list ``aggfunc``. + """ + keys = index + columns + + values_passed = values is not None + if values_passed: + if is_list_like(values): + values_multi = True + values = list(values) + else: + values_multi = False + values = [values] + + # GH14938 Make sure value labels are in data + for i in values: + if i not in data: + raise KeyError(i) + + to_filter = [] + for x in keys + values: + if isinstance(x, Grouper): + x = x.key + try: + if x in data: + to_filter.append(x) + except TypeError: + pass + if len(to_filter) < len(data.columns): + data = data[to_filter] + + else: + values = data.columns + for key in keys: + try: + values = values.drop(key) + except (TypeError, ValueError, KeyError): + pass + values = list(values) + + grouped = data.groupby(keys, observed=observed, sort=sort) + agged = grouped.agg(aggfunc) + if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns): + agged = agged.dropna(how="all") + + # gh-21133 + # we want to down cast if + # the original values are ints + # as we grouped with a NaN value + # and then dropped, coercing to floats + for v in values: + if ( + v in data + and is_integer_dtype(data[v]) + and v in agged + and not is_integer_dtype(agged[v]) + ): + if not isinstance(agged[v], ABCDataFrame): + # exclude DataFrame case bc maybe_downcast_to_dtype expects + # ArrayLike + # e.g. test_pivot_table_multiindex_columns_doctest_case + # agged.columns is a MultiIndex and 'v' is indexing only + # on its first level. + agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype) + + table = agged + + # GH17038, this check should only happen if index is defined (not None) + if table.index.nlevels > 1 and index: + # Related GH #17123 + # If index_names are integers, determine whether the integers refer + # to the level position or name. + index_names = agged.index.names[: len(index)] + to_unstack = [] + for i in range(len(index), len(keys)): + name = agged.index.names[i] + if name is None or name in index_names: + to_unstack.append(i) + else: + to_unstack.append(name) + table = agged.unstack(to_unstack) + + if not dropna: + if isinstance(table.index, MultiIndex): + m = MultiIndex.from_arrays( + cartesian_product(table.index.levels), names=table.index.names + ) + table = table.reindex(m, axis=0) + + if isinstance(table.columns, MultiIndex): + m = MultiIndex.from_arrays( + cartesian_product(table.columns.levels), names=table.columns.names + ) + table = table.reindex(m, axis=1) + + if isinstance(table, ABCDataFrame): + table = table.sort_index(axis=1) + + if fill_value is not None: + table = table.fillna(fill_value, downcast="infer") + + if margins: + if dropna: + data = data[data.notna().all(axis=1)] + table = _add_margins( + table, + data, + values, + rows=index, + cols=columns, + aggfunc=aggfunc, + observed=dropna, + margins_name=margins_name, + fill_value=fill_value, + ) + + # discard the top level + if values_passed and not values_multi and table.columns.nlevels > 1: + table = table.droplevel(0, axis=1) + if len(index) == 0 and len(columns) > 0: + table = table.T + + # GH 15193 Make sure empty columns are removed if dropna=True + if isinstance(table, ABCDataFrame) and dropna: + table = table.dropna(how="all", axis=1) + + return table + + +def _add_margins( + table: DataFrame | Series, + data: DataFrame, + values, + rows, + cols, + aggfunc, + observed=None, + margins_name: str = "All", + fill_value=None, +): + if not isinstance(margins_name, str): + raise ValueError("margins_name argument must be a string") + + msg = f'Conflicting name "{margins_name}" in margins' + for level in table.index.names: + if margins_name in table.index.get_level_values(level): + raise ValueError(msg) + + grand_margin = _compute_grand_margin(data, values, aggfunc, margins_name) + + if table.ndim == 2: + # i.e. DataFrame + for level in table.columns.names[1:]: + if margins_name in table.columns.get_level_values(level): + raise ValueError(msg) + + key: str | tuple[str, ...] + if len(rows) > 1: + key = (margins_name,) + ("",) * (len(rows) - 1) + else: + key = margins_name + + if not values and isinstance(table, ABCSeries): + # If there are no values and the table is a series, then there is only + # one column in the data. Compute grand margin and return it. + return table._append(Series({key: grand_margin[margins_name]})) + + elif values: + marginal_result_set = _generate_marginal_results( + table, data, values, rows, cols, aggfunc, observed, margins_name + ) + if not isinstance(marginal_result_set, tuple): + return marginal_result_set + result, margin_keys, row_margin = marginal_result_set + else: + # no values, and table is a DataFrame + assert isinstance(table, ABCDataFrame) + marginal_result_set = _generate_marginal_results_without_values( + table, data, rows, cols, aggfunc, observed, margins_name + ) + if not isinstance(marginal_result_set, tuple): + return marginal_result_set + result, margin_keys, row_margin = marginal_result_set + + row_margin = row_margin.reindex(result.columns, fill_value=fill_value) + # populate grand margin + for k in margin_keys: + if isinstance(k, str): + row_margin[k] = grand_margin[k] + else: + row_margin[k] = grand_margin[k[0]] + + from pandas import DataFrame + + margin_dummy = DataFrame(row_margin, columns=[key]).T + + row_names = result.index.names + # check the result column and leave floats + for dtype in set(result.dtypes): + cols = result.select_dtypes([dtype]).columns + margin_dummy[cols] = margin_dummy[cols].apply( + maybe_downcast_to_dtype, args=(dtype,) + ) + result = result._append(margin_dummy) + result.index.names = row_names + + return result + + +def _compute_grand_margin(data: DataFrame, values, aggfunc, margins_name: str = "All"): + + if values: + grand_margin = {} + for k, v in data[values].items(): + try: + if isinstance(aggfunc, str): + grand_margin[k] = getattr(v, aggfunc)() + elif isinstance(aggfunc, dict): + if isinstance(aggfunc[k], str): + grand_margin[k] = getattr(v, aggfunc[k])() + else: + grand_margin[k] = aggfunc[k](v) + else: + grand_margin[k] = aggfunc(v) + except TypeError: + pass + return grand_margin + else: + return {margins_name: aggfunc(data.index)} + + +def _generate_marginal_results( + table, data, values, rows, cols, aggfunc, observed, margins_name: str = "All" +): + if len(cols) > 0: + # need to "interleave" the margins + table_pieces = [] + margin_keys = [] + + def _all_key(key): + return (key, margins_name) + ("",) * (len(cols) - 1) + + if len(rows) > 0: + margin = data[rows + values].groupby(rows, observed=observed).agg(aggfunc) + cat_axis = 1 + + for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed): + all_key = _all_key(key) + + # we are going to mutate this, so need to copy! + piece = piece.copy() + piece[all_key] = margin[key] + + table_pieces.append(piece) + margin_keys.append(all_key) + else: + from pandas import DataFrame + + cat_axis = 0 + for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed): + if len(cols) > 1: + all_key = _all_key(key) + else: + all_key = margins_name + table_pieces.append(piece) + # GH31016 this is to calculate margin for each group, and assign + # corresponded key as index + transformed_piece = DataFrame(piece.apply(aggfunc)).T + transformed_piece.index = Index([all_key], name=piece.index.name) + + # append piece for margin into table_piece + table_pieces.append(transformed_piece) + margin_keys.append(all_key) + + result = concat(table_pieces, axis=cat_axis) + + if len(rows) == 0: + return result + else: + result = table + margin_keys = table.columns + + if len(cols) > 0: + row_margin = data[cols + values].groupby(cols, observed=observed).agg(aggfunc) + row_margin = row_margin.stack() + + # slight hack + new_order = [len(cols)] + list(range(len(cols))) + row_margin.index = row_margin.index.reorder_levels(new_order) + else: + row_margin = Series(np.nan, index=result.columns) + + return result, margin_keys, row_margin + + +def _generate_marginal_results_without_values( + table: DataFrame, data, rows, cols, aggfunc, observed, margins_name: str = "All" +): + if len(cols) > 0: + # need to "interleave" the margins + margin_keys: list | Index = [] + + def _all_key(): + if len(cols) == 1: + return margins_name + return (margins_name,) + ("",) * (len(cols) - 1) + + if len(rows) > 0: + margin = data[rows].groupby(rows, observed=observed).apply(aggfunc) + all_key = _all_key() + table[all_key] = margin + result = table + margin_keys.append(all_key) + + else: + margin = data.groupby(level=0, axis=0, observed=observed).apply(aggfunc) + all_key = _all_key() + table[all_key] = margin + result = table + margin_keys.append(all_key) + return result + else: + result = table + margin_keys = table.columns + + if len(cols): + row_margin = data[cols].groupby(cols, observed=observed).apply(aggfunc) + else: + row_margin = Series(np.nan, index=result.columns) + + return result, margin_keys, row_margin + + +def _convert_by(by): + if by is None: + by = [] + elif ( + is_scalar(by) + or isinstance(by, (np.ndarray, Index, ABCSeries, Grouper)) + or callable(by) + ): + by = [by] + else: + by = list(by) + return by + + +@Substitution("\ndata : DataFrame") +@Appender(_shared_docs["pivot"], indents=1) +def pivot( + data: DataFrame, + index: IndexLabel | None = None, + columns: IndexLabel | None = None, + values: IndexLabel | None = None, +) -> DataFrame: + if columns is None: + raise TypeError("pivot() missing 1 required argument: 'columns'") + + columns_listlike = com.convert_to_list_like(columns) + + if values is None: + if index is not None: + cols = com.convert_to_list_like(index) + else: + cols = [] + + append = index is None + # error: Unsupported operand types for + ("List[Any]" and "ExtensionArray") + # error: Unsupported left operand type for + ("ExtensionArray") + indexed = data.set_index( + cols + columns_listlike, append=append # type: ignore[operator] + ) + else: + if index is None: + if isinstance(data.index, MultiIndex): + # GH 23955 + index_list = [ + data.index.get_level_values(i) for i in range(data.index.nlevels) + ] + else: + index_list = [Series(data.index, name=data.index.name)] + else: + index_list = [data[idx] for idx in com.convert_to_list_like(index)] + + data_columns = [data[col] for col in columns_listlike] + index_list.extend(data_columns) + multiindex = MultiIndex.from_arrays(index_list) + + if is_list_like(values) and not isinstance(values, tuple): + # Exclude tuple because it is seen as a single column name + values = cast(Sequence[Hashable], values) + indexed = data._constructor( + data[values]._values, index=multiindex, columns=values + ) + else: + indexed = data._constructor_sliced(data[values]._values, index=multiindex) + return indexed.unstack(columns_listlike) + + +def crosstab( + index, + columns, + values=None, + rownames=None, + colnames=None, + aggfunc=None, + margins: bool = False, + margins_name: str = "All", + dropna: bool = True, + normalize=False, +) -> DataFrame: + """ + Compute a simple cross tabulation of two (or more) factors. By default + computes a frequency table of the factors unless an array of values and an + aggregation function are passed. + + Parameters + ---------- + index : array-like, Series, or list of arrays/Series + Values to group by in the rows. + columns : array-like, Series, or list of arrays/Series + Values to group by in the columns. + values : array-like, optional + Array of values to aggregate according to the factors. + Requires `aggfunc` be specified. + rownames : sequence, default None + If passed, must match number of row arrays passed. + colnames : sequence, default None + If passed, must match number of column arrays passed. + aggfunc : function, optional + If specified, requires `values` be specified as well. + margins : bool, default False + Add row/column margins (subtotals). + margins_name : str, default 'All' + Name of the row/column that will contain the totals + when margins is True. + dropna : bool, default True + Do not include columns whose entries are all NaN. + normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False + Normalize by dividing all values by the sum of values. + + - If passed 'all' or `True`, will normalize over all values. + - If passed 'index' will normalize over each row. + - If passed 'columns' will normalize over each column. + - If margins is `True`, will also normalize margin values. + + Returns + ------- + DataFrame + Cross tabulation of the data. + + See Also + -------- + DataFrame.pivot : Reshape data based on column values. + pivot_table : Create a pivot table as a DataFrame. + + Notes + ----- + Any Series passed will have their name attributes used unless row or column + names for the cross-tabulation are specified. + + Any input passed containing Categorical data will have **all** of its + categories included in the cross-tabulation, even if the actual data does + not contain any instances of a particular category. + + In the event that there aren't overlapping indexes an empty DataFrame will + be returned. + + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar", + ... "bar", "bar", "foo", "foo", "foo"], dtype=object) + >>> b = np.array(["one", "one", "one", "two", "one", "one", + ... "one", "two", "two", "two", "one"], dtype=object) + >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny", + ... "shiny", "dull", "shiny", "shiny", "shiny"], + ... dtype=object) + >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) + b one two + c dull shiny dull shiny + a + bar 1 2 1 0 + foo 2 2 1 2 + + Here 'c' and 'f' are not represented in the data and will not be + shown in the output because dropna is True by default. Set + dropna=False to preserve categories with no data. + + >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) + >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) + >>> pd.crosstab(foo, bar) + col_0 d e + row_0 + a 1 0 + b 0 1 + >>> pd.crosstab(foo, bar, dropna=False) + col_0 d e f + row_0 + a 1 0 0 + b 0 1 0 + c 0 0 0 + """ + if values is None and aggfunc is not None: + raise ValueError("aggfunc cannot be used without values.") + + if values is not None and aggfunc is None: + raise ValueError("values cannot be used without an aggfunc.") + + if not is_nested_list_like(index): + index = [index] + if not is_nested_list_like(columns): + columns = [columns] + + common_idx = None + pass_objs = [x for x in index + columns if isinstance(x, (ABCSeries, ABCDataFrame))] + if pass_objs: + common_idx = get_objs_combined_axis(pass_objs, intersect=True, sort=False) + + rownames = _get_names(index, rownames, prefix="row") + colnames = _get_names(columns, colnames, prefix="col") + + # duplicate names mapped to unique names for pivot op + ( + rownames_mapper, + unique_rownames, + colnames_mapper, + unique_colnames, + ) = _build_names_mapper(rownames, colnames) + + from pandas import DataFrame + + data = { + **dict(zip(unique_rownames, index)), + **dict(zip(unique_colnames, columns)), + } + df = DataFrame(data, index=common_idx) + + if values is None: + df["__dummy__"] = 0 + kwargs = {"aggfunc": len, "fill_value": 0} + else: + df["__dummy__"] = values + kwargs = {"aggfunc": aggfunc} + + table = df.pivot_table( + "__dummy__", + index=unique_rownames, + columns=unique_colnames, + margins=margins, + margins_name=margins_name, + dropna=dropna, + **kwargs, + ) + + # Post-process + if normalize is not False: + table = _normalize( + table, normalize=normalize, margins=margins, margins_name=margins_name + ) + + table = table.rename_axis(index=rownames_mapper, axis=0) + table = table.rename_axis(columns=colnames_mapper, axis=1) + + return table + + +def _normalize( + table: DataFrame, normalize, margins: bool, margins_name="All" +) -> DataFrame: + + if not isinstance(normalize, (bool, str)): + axis_subs = {0: "index", 1: "columns"} + try: + normalize = axis_subs[normalize] + except KeyError as err: + raise ValueError("Not a valid normalize argument") from err + + if margins is False: + + # Actual Normalizations + normalizers: dict[bool | str, Callable] = { + "all": lambda x: x / x.sum(axis=1).sum(axis=0), + "columns": lambda x: x / x.sum(), + "index": lambda x: x.div(x.sum(axis=1), axis=0), + } + + normalizers[True] = normalizers["all"] + + try: + f = normalizers[normalize] + except KeyError as err: + raise ValueError("Not a valid normalize argument") from err + + table = f(table) + table = table.fillna(0) + + elif margins is True: + # keep index and column of pivoted table + table_index = table.index + table_columns = table.columns + last_ind_or_col = table.iloc[-1, :].name + + # check if margin name is not in (for MI cases) and not equal to last + # index/column and save the column and index margin + if (margins_name not in last_ind_or_col) & (margins_name != last_ind_or_col): + raise ValueError(f"{margins_name} not in pivoted DataFrame") + column_margin = table.iloc[:-1, -1] + index_margin = table.iloc[-1, :-1] + + # keep the core table + table = table.iloc[:-1, :-1] + + # Normalize core + table = _normalize(table, normalize=normalize, margins=False) + + # Fix Margins + if normalize == "columns": + column_margin = column_margin / column_margin.sum() + table = concat([table, column_margin], axis=1) + table = table.fillna(0) + table.columns = table_columns + + elif normalize == "index": + index_margin = index_margin / index_margin.sum() + table = table._append(index_margin) + table = table.fillna(0) + table.index = table_index + + elif normalize == "all" or normalize is True: + column_margin = column_margin / column_margin.sum() + index_margin = index_margin / index_margin.sum() + index_margin.loc[margins_name] = 1 + table = concat([table, column_margin], axis=1) + table = table._append(index_margin) + + table = table.fillna(0) + table.index = table_index + table.columns = table_columns + + else: + raise ValueError("Not a valid normalize argument") + + else: + raise ValueError("Not a valid margins argument") + + return table + + +def _get_names(arrs, names, prefix: str = "row"): + if names is None: + names = [] + for i, arr in enumerate(arrs): + if isinstance(arr, ABCSeries) and arr.name is not None: + names.append(arr.name) + else: + names.append(f"{prefix}_{i}") + else: + if len(names) != len(arrs): + raise AssertionError("arrays and names must have the same length") + if not isinstance(names, list): + names = list(names) + + return names + + +def _build_names_mapper( + rownames: list[str], colnames: list[str] +) -> tuple[dict[str, str], list[str], dict[str, str], list[str]]: + """ + Given the names of a DataFrame's rows and columns, returns a set of unique row + and column names and mappers that convert to original names. + + A row or column name is replaced if it is duplicate among the rows of the inputs, + among the columns of the inputs or between the rows and the columns. + + Parameters + ---------- + rownames: list[str] + colnames: list[str] + + Returns + ------- + Tuple(Dict[str, str], List[str], Dict[str, str], List[str]) + + rownames_mapper: dict[str, str] + a dictionary with new row names as keys and original rownames as values + unique_rownames: list[str] + a list of rownames with duplicate names replaced by dummy names + colnames_mapper: dict[str, str] + a dictionary with new column names as keys and original column names as values + unique_colnames: list[str] + a list of column names with duplicate names replaced by dummy names + + """ + + def get_duplicates(names): + seen: set = set() + return {name for name in names if name not in seen} + + shared_names = set(rownames).intersection(set(colnames)) + dup_names = get_duplicates(rownames) | get_duplicates(colnames) | shared_names + + rownames_mapper = { + f"row_{i}": name for i, name in enumerate(rownames) if name in dup_names + } + unique_rownames = [ + f"row_{i}" if name in dup_names else name for i, name in enumerate(rownames) + ] + + colnames_mapper = { + f"col_{i}": name for i, name in enumerate(colnames) if name in dup_names + } + unique_colnames = [ + f"col_{i}" if name in dup_names else name for i, name in enumerate(colnames) + ] + + return rownames_mapper, unique_rownames, colnames_mapper, unique_colnames diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/reshape.py b/.local/lib/python3.8/site-packages/pandas/core/reshape/reshape.py new file mode 100644 index 0000000..7f67d34 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/reshape/reshape.py @@ -0,0 +1,1143 @@ +from __future__ import annotations + +import itertools +from typing import TYPE_CHECKING +import warnings + +import numpy as np + +import pandas._libs.reshape as libreshape +from pandas._libs.sparse import IntIndex +from pandas._typing import ( + Dtype, + npt, +) +from pandas.errors import PerformanceWarning +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_1d_only_ea_dtype, + is_extension_array_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_object_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.missing import notna + +import pandas.core.algorithms as algos +from pandas.core.arrays import SparseArray +from pandas.core.arrays.categorical import factorize_from_iterable +from pandas.core.construction import ensure_wrapped_if_datetimelike +from pandas.core.frame import DataFrame +from pandas.core.indexes.api import ( + Index, + MultiIndex, +) +from pandas.core.indexes.frozen import FrozenList +from pandas.core.series import Series +from pandas.core.sorting import ( + compress_group_index, + decons_obs_group_ids, + get_compressed_ids, + get_group_index, + get_group_index_sorter, +) + +if TYPE_CHECKING: + from pandas.core.arrays import ExtensionArray + + +class _Unstacker: + """ + Helper class to unstack data / pivot with multi-level index + + Parameters + ---------- + index : MultiIndex + level : int or str, default last level + Level to "unstack". Accepts a name for the level. + fill_value : scalar, optional + Default value to fill in missing values if subgroups do not have the + same set of labels. By default, missing values will be replaced with + the default fill value for that data type, NaN for float, NaT for + datetimelike, etc. For integer types, by default data will converted to + float and missing values will be set to NaN. + constructor : object + Pandas ``DataFrame`` or subclass used to create unstacked + response. If None, DataFrame will be used. + + Examples + -------- + >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), + ... ('two', 'a'), ('two', 'b')]) + >>> s = pd.Series(np.arange(1, 5, dtype=np.int64), index=index) + >>> s + one a 1 + b 2 + two a 3 + b 4 + dtype: int64 + + >>> s.unstack(level=-1) + a b + one 1 2 + two 3 4 + + >>> s.unstack(level=0) + one two + a 1 3 + b 2 4 + + Returns + ------- + unstacked : DataFrame + """ + + def __init__(self, index: MultiIndex, level=-1, constructor=None): + + if constructor is None: + constructor = DataFrame + self.constructor = constructor + + self.index = index.remove_unused_levels() + + self.level = self.index._get_level_number(level) + + # when index includes `nan`, need to lift levels/strides by 1 + self.lift = 1 if -1 in self.index.codes[self.level] else 0 + + # Note: the "pop" below alters these in-place. + self.new_index_levels = list(self.index.levels) + self.new_index_names = list(self.index.names) + + self.removed_name = self.new_index_names.pop(self.level) + self.removed_level = self.new_index_levels.pop(self.level) + self.removed_level_full = index.levels[self.level] + + # Bug fix GH 20601 + # If the data frame is too big, the number of unique index combination + # will cause int32 overflow on windows environments. + # We want to check and raise an error before this happens + num_rows = np.max([index_level.size for index_level in self.new_index_levels]) + num_columns = self.removed_level.size + + # GH20601: This forces an overflow if the number of cells is too high. + num_cells = num_rows * num_columns + + # GH 26314: Previous ValueError raised was too restrictive for many users. + if num_cells > np.iinfo(np.int32).max: + warnings.warn( + f"The following operation may generate {num_cells} cells " + f"in the resulting pandas object.", + PerformanceWarning, + ) + + self._make_selectors() + + @cache_readonly + def _indexer_and_to_sort( + self, + ) -> tuple[ + npt.NDArray[np.intp], + list[np.ndarray], # each has _some_ signed integer dtype + ]: + v = self.level + + codes = list(self.index.codes) + levs = list(self.index.levels) + to_sort = codes[:v] + codes[v + 1 :] + [codes[v]] + sizes = tuple(len(x) for x in levs[:v] + levs[v + 1 :] + [levs[v]]) + + comp_index, obs_ids = get_compressed_ids(to_sort, sizes) + ngroups = len(obs_ids) + + indexer = get_group_index_sorter(comp_index, ngroups) + return indexer, to_sort + + @cache_readonly + def sorted_labels(self): + indexer, to_sort = self._indexer_and_to_sort + return [line.take(indexer) for line in to_sort] + + def _make_sorted_values(self, values: np.ndarray) -> np.ndarray: + indexer, _ = self._indexer_and_to_sort + + sorted_values = algos.take_nd(values, indexer, axis=0) + return sorted_values + + def _make_selectors(self): + new_levels = self.new_index_levels + + # make the mask + remaining_labels = self.sorted_labels[:-1] + level_sizes = tuple(len(x) for x in new_levels) + + comp_index, obs_ids = get_compressed_ids(remaining_labels, level_sizes) + ngroups = len(obs_ids) + + comp_index = ensure_platform_int(comp_index) + stride = self.index.levshape[self.level] + self.lift + self.full_shape = ngroups, stride + + selector = self.sorted_labels[-1] + stride * comp_index + self.lift + mask = np.zeros(np.prod(self.full_shape), dtype=bool) + mask.put(selector, True) + + if mask.sum() < len(self.index): + raise ValueError("Index contains duplicate entries, cannot reshape") + + self.group_index = comp_index + self.mask = mask + self.unique_groups = obs_ids + self.compressor = comp_index.searchsorted(np.arange(ngroups)) + + @cache_readonly + def mask_all(self) -> bool: + return bool(self.mask.all()) + + @cache_readonly + def arange_result(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.bool_]]: + # We cache this for re-use in ExtensionBlock._unstack + dummy_arr = np.arange(len(self.index), dtype=np.intp) + new_values, mask = self.get_new_values(dummy_arr, fill_value=-1) + return new_values, mask.any(0) + # TODO: in all tests we have mask.any(0).all(); can we rely on that? + + def get_result(self, values, value_columns, fill_value): + + if values.ndim == 1: + values = values[:, np.newaxis] + + if value_columns is None and values.shape[1] != 1: # pragma: no cover + raise ValueError("must pass column labels for multi-column data") + + values, _ = self.get_new_values(values, fill_value) + columns = self.get_new_columns(value_columns) + index = self.new_index + + return self.constructor( + values, index=index, columns=columns, dtype=values.dtype + ) + + def get_new_values(self, values, fill_value=None): + + if values.ndim == 1: + values = values[:, np.newaxis] + + sorted_values = self._make_sorted_values(values) + + # place the values + length, width = self.full_shape + stride = values.shape[1] + result_width = width * stride + result_shape = (length, result_width) + mask = self.mask + mask_all = self.mask_all + + # we can simply reshape if we don't have a mask + if mask_all and len(values): + # TODO: Under what circumstances can we rely on sorted_values + # matching values? When that holds, we can slice instead + # of take (in particular for EAs) + new_values = ( + sorted_values.reshape(length, width, stride) + .swapaxes(1, 2) + .reshape(result_shape) + ) + new_mask = np.ones(result_shape, dtype=bool) + return new_values, new_mask + + dtype = values.dtype + + # if our mask is all True, then we can use our existing dtype + if mask_all: + dtype = values.dtype + new_values = np.empty(result_shape, dtype=dtype) + else: + if isinstance(dtype, ExtensionDtype): + # GH#41875 + cls = dtype.construct_array_type() + new_values = cls._empty(result_shape, dtype=dtype) + new_values[:] = fill_value + else: + dtype, fill_value = maybe_promote(dtype, fill_value) + new_values = np.empty(result_shape, dtype=dtype) + new_values.fill(fill_value) + + name = dtype.name + new_mask = np.zeros(result_shape, dtype=bool) + + # we need to convert to a basic dtype + # and possibly coerce an input to our output dtype + # e.g. ints -> floats + if needs_i8_conversion(values.dtype): + sorted_values = sorted_values.view("i8") + new_values = new_values.view("i8") + else: + sorted_values = sorted_values.astype(name, copy=False) + + # fill in our values & mask + libreshape.unstack( + sorted_values, + mask.view("u1"), + stride, + length, + width, + new_values, + new_mask.view("u1"), + ) + + # reconstruct dtype if needed + if needs_i8_conversion(values.dtype): + # view as datetime64 so we can wrap in DatetimeArray and use + # DTA's view method + new_values = new_values.view("M8[ns]") + new_values = ensure_wrapped_if_datetimelike(new_values) + new_values = new_values.view(values.dtype) + + return new_values, new_mask + + def get_new_columns(self, value_columns: Index | None): + if value_columns is None: + if self.lift == 0: + return self.removed_level._rename(name=self.removed_name) + + lev = self.removed_level.insert(0, item=self.removed_level._na_value) + return lev.rename(self.removed_name) + + stride = len(self.removed_level) + self.lift + width = len(value_columns) + propagator = np.repeat(np.arange(width), stride) + + new_levels: FrozenList | list[Index] + + if isinstance(value_columns, MultiIndex): + new_levels = value_columns.levels + (self.removed_level_full,) + new_names = value_columns.names + (self.removed_name,) + + new_codes = [lab.take(propagator) for lab in value_columns.codes] + else: + new_levels = [ + value_columns, + self.removed_level_full, + ] + new_names = [value_columns.name, self.removed_name] + new_codes = [propagator] + + repeater = self._repeater + + # The entire level is then just a repetition of the single chunk: + new_codes.append(np.tile(repeater, width)) + return MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + @cache_readonly + def _repeater(self) -> np.ndarray: + # The two indices differ only if the unstacked level had unused items: + if len(self.removed_level_full) != len(self.removed_level): + # In this case, we remap the new codes to the original level: + repeater = self.removed_level_full.get_indexer(self.removed_level) + if self.lift: + repeater = np.insert(repeater, 0, -1) + else: + # Otherwise, we just use each level item exactly once: + stride = len(self.removed_level) + self.lift + repeater = np.arange(stride) - self.lift + + return repeater + + @cache_readonly + def new_index(self): + # Does not depend on values or value_columns + result_codes = [lab.take(self.compressor) for lab in self.sorted_labels[:-1]] + + # construct the new index + if len(self.new_index_levels) == 1: + level, level_codes = self.new_index_levels[0], result_codes[0] + if (level_codes == -1).any(): + level = level.insert(len(level), level._na_value) + return level.take(level_codes).rename(self.new_index_names[0]) + + return MultiIndex( + levels=self.new_index_levels, + codes=result_codes, + names=self.new_index_names, + verify_integrity=False, + ) + + +def _unstack_multiple(data, clocs, fill_value=None): + if len(clocs) == 0: + return data + + # NOTE: This doesn't deal with hierarchical columns yet + + index = data.index + + # GH 19966 Make sure if MultiIndexed index has tuple name, they will be + # recognised as a whole + if clocs in index.names: + clocs = [clocs] + clocs = [index._get_level_number(i) for i in clocs] + + rlocs = [i for i in range(index.nlevels) if i not in clocs] + + clevels = [index.levels[i] for i in clocs] + ccodes = [index.codes[i] for i in clocs] + cnames = [index.names[i] for i in clocs] + rlevels = [index.levels[i] for i in rlocs] + rcodes = [index.codes[i] for i in rlocs] + rnames = [index.names[i] for i in rlocs] + + shape = tuple(len(x) for x in clevels) + group_index = get_group_index(ccodes, shape, sort=False, xnull=False) + + comp_ids, obs_ids = compress_group_index(group_index, sort=False) + recons_codes = decons_obs_group_ids(comp_ids, obs_ids, shape, ccodes, xnull=False) + + if not rlocs: + # Everything is in clocs, so the dummy df has a regular index + dummy_index = Index(obs_ids, name="__placeholder__") + else: + dummy_index = MultiIndex( + levels=rlevels + [obs_ids], + codes=rcodes + [comp_ids], + names=rnames + ["__placeholder__"], + verify_integrity=False, + ) + + if isinstance(data, Series): + dummy = data.copy() + dummy.index = dummy_index + + unstacked = dummy.unstack("__placeholder__", fill_value=fill_value) + new_levels = clevels + new_names = cnames + new_codes = recons_codes + else: + if isinstance(data.columns, MultiIndex): + result = data + for i in range(len(clocs)): + val = clocs[i] + result = result.unstack(val, fill_value=fill_value) + clocs = [v if v < val else v - 1 for v in clocs] + + return result + + # GH#42579 deep=False to avoid consolidating + dummy = data.copy(deep=False) + dummy.index = dummy_index + + unstacked = dummy.unstack("__placeholder__", fill_value=fill_value) + if isinstance(unstacked, Series): + unstcols = unstacked.index + else: + unstcols = unstacked.columns + assert isinstance(unstcols, MultiIndex) # for mypy + new_levels = [unstcols.levels[0]] + clevels + new_names = [data.columns.name] + cnames + + new_codes = [unstcols.codes[0]] + for rec in recons_codes: + new_codes.append(rec.take(unstcols.codes[-1])) + + new_columns = MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + if isinstance(unstacked, Series): + unstacked.index = new_columns + else: + unstacked.columns = new_columns + + return unstacked + + +def unstack(obj, level, fill_value=None): + + if isinstance(level, (tuple, list)): + if len(level) != 1: + # _unstack_multiple only handles MultiIndexes, + # and isn't needed for a single level + return _unstack_multiple(obj, level, fill_value=fill_value) + else: + level = level[0] + + # Prioritize integer interpretation (GH #21677): + if not is_integer(level) and not level == "__placeholder__": + level = obj.index._get_level_number(level) + + if isinstance(obj, DataFrame): + if isinstance(obj.index, MultiIndex): + return _unstack_frame(obj, level, fill_value=fill_value) + else: + return obj.T.stack(dropna=False) + elif not isinstance(obj.index, MultiIndex): + # GH 36113 + # Give nicer error messages when unstack a Series whose + # Index is not a MultiIndex. + raise ValueError( + f"index must be a MultiIndex to unstack, {type(obj.index)} was passed" + ) + else: + if is_1d_only_ea_dtype(obj.dtype): + return _unstack_extension_series(obj, level, fill_value) + unstacker = _Unstacker( + obj.index, level=level, constructor=obj._constructor_expanddim + ) + return unstacker.get_result( + obj._values, value_columns=None, fill_value=fill_value + ) + + +def _unstack_frame(obj, level, fill_value=None): + if not obj._can_fast_transpose: + unstacker = _Unstacker(obj.index, level=level) + mgr = obj._mgr.unstack(unstacker, fill_value=fill_value) + return obj._constructor(mgr) + else: + unstacker = _Unstacker(obj.index, level=level, constructor=obj._constructor) + return unstacker.get_result( + obj._values, value_columns=obj.columns, fill_value=fill_value + ) + + +def _unstack_extension_series(series, level, fill_value): + """ + Unstack an ExtensionArray-backed Series. + + The ExtensionDtype is preserved. + + Parameters + ---------- + series : Series + A Series with an ExtensionArray for values + level : Any + The level name or number. + fill_value : Any + The user-level (not physical storage) fill value to use for + missing values introduced by the reshape. Passed to + ``series.values.take``. + + Returns + ------- + DataFrame + Each column of the DataFrame will have the same dtype as + the input Series. + """ + # Defer to the logic in ExtensionBlock._unstack + df = series.to_frame() + result = df.unstack(level=level, fill_value=fill_value) + + # equiv: result.droplevel(level=0, axis=1) + # but this avoids an extra copy + result.columns = result.columns.droplevel(0) + return result + + +def stack(frame, level=-1, dropna=True): + """ + Convert DataFrame to Series with multi-level Index. Columns become the + second level of the resulting hierarchical index + + Returns + ------- + stacked : Series + """ + + def factorize(index): + if index.is_unique: + return index, np.arange(len(index)) + codes, categories = factorize_from_iterable(index) + return categories, codes + + N, K = frame.shape + + # Will also convert negative level numbers and check if out of bounds. + level_num = frame.columns._get_level_number(level) + + if isinstance(frame.columns, MultiIndex): + return _stack_multi_columns(frame, level_num=level_num, dropna=dropna) + elif isinstance(frame.index, MultiIndex): + new_levels = list(frame.index.levels) + new_codes = [lab.repeat(K) for lab in frame.index.codes] + + clev, clab = factorize(frame.columns) + new_levels.append(clev) + new_codes.append(np.tile(clab, N).ravel()) + + new_names = list(frame.index.names) + new_names.append(frame.columns.name) + new_index = MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + else: + levels, (ilab, clab) = zip(*map(factorize, (frame.index, frame.columns))) + codes = ilab.repeat(K), np.tile(clab, N).ravel() + new_index = MultiIndex( + levels=levels, + codes=codes, + names=[frame.index.name, frame.columns.name], + verify_integrity=False, + ) + + if not frame.empty and frame._is_homogeneous_type: + # For homogeneous EAs, frame._values will coerce to object. So + # we concatenate instead. + dtypes = list(frame.dtypes._values) + dtype = dtypes[0] + + if is_extension_array_dtype(dtype): + arr = dtype.construct_array_type() + new_values = arr._concat_same_type( + [col._values for _, col in frame.items()] + ) + new_values = _reorder_for_extension_array_stack(new_values, N, K) + else: + # homogeneous, non-EA + new_values = frame._values.ravel() + + else: + # non-homogeneous + new_values = frame._values.ravel() + + if dropna: + mask = notna(new_values) + new_values = new_values[mask] + new_index = new_index[mask] + + return frame._constructor_sliced(new_values, index=new_index) + + +def stack_multiple(frame, level, dropna=True): + # If all passed levels match up to column names, no + # ambiguity about what to do + if all(lev in frame.columns.names for lev in level): + result = frame + for lev in level: + result = stack(result, lev, dropna=dropna) + + # Otherwise, level numbers may change as each successive level is stacked + elif all(isinstance(lev, int) for lev in level): + # As each stack is done, the level numbers decrease, so we need + # to account for that when level is a sequence of ints + result = frame + # _get_level_number() checks level numbers are in range and converts + # negative numbers to positive + level = [frame.columns._get_level_number(lev) for lev in level] + + # Can't iterate directly through level as we might need to change + # values as we go + for index in range(len(level)): + lev = level[index] + result = stack(result, lev, dropna=dropna) + # Decrement all level numbers greater than current, as these + # have now shifted down by one + updated_level = [] + for other in level: + if other > lev: + updated_level.append(other - 1) + else: + updated_level.append(other) + level = updated_level + + else: + raise ValueError( + "level should contain all level names or all level " + "numbers, not a mixture of the two." + ) + + return result + + +def _stack_multi_column_index(columns: MultiIndex) -> MultiIndex: + """Creates a MultiIndex from the first N-1 levels of this MultiIndex.""" + if len(columns.levels) <= 2: + return columns.levels[0]._rename(name=columns.names[0]) + + levs = [ + [lev[c] if c >= 0 else None for c in codes] + for lev, codes in zip(columns.levels[:-1], columns.codes[:-1]) + ] + + # Remove duplicate tuples in the MultiIndex. + tuples = zip(*levs) + unique_tuples = (key for key, _ in itertools.groupby(tuples)) + new_levs = zip(*unique_tuples) + + # The dtype of each level must be explicitly set to avoid inferring the wrong type. + # See GH-36991. + return MultiIndex.from_arrays( + [ + # Not all indices can accept None values. + Index(new_lev, dtype=lev.dtype) if None not in new_lev else new_lev + for new_lev, lev in zip(new_levs, columns.levels) + ], + names=columns.names[:-1], + ) + + +def _stack_multi_columns(frame, level_num=-1, dropna=True): + def _convert_level_number(level_num: int, columns): + """ + Logic for converting the level number to something we can safely pass + to swaplevel. + + If `level_num` matches a column name return the name from + position `level_num`, otherwise return `level_num`. + """ + if level_num in columns.names: + return columns.names[level_num] + + return level_num + + this = frame.copy() + + # this makes life much simpler + if level_num != frame.columns.nlevels - 1: + # roll levels to put selected level at end + roll_columns = this.columns + for i in range(level_num, frame.columns.nlevels - 1): + # Need to check if the ints conflict with level names + lev1 = _convert_level_number(i, roll_columns) + lev2 = _convert_level_number(i + 1, roll_columns) + roll_columns = roll_columns.swaplevel(lev1, lev2) + this.columns = roll_columns + + if not this.columns._is_lexsorted(): + # Workaround the edge case where 0 is one of the column names, + # which interferes with trying to sort based on the first + # level + level_to_sort = _convert_level_number(0, this.columns) + this = this.sort_index(level=level_to_sort, axis=1) + + new_columns = _stack_multi_column_index(this.columns) + + # time to ravel the values + new_data = {} + level_vals = this.columns.levels[-1] + level_codes = sorted(set(this.columns.codes[-1])) + level_vals_nan = level_vals.insert(len(level_vals), None) + + level_vals_used = np.take(level_vals_nan, level_codes) + levsize = len(level_codes) + drop_cols = [] + for key in new_columns: + try: + loc = this.columns.get_loc(key) + except KeyError: + drop_cols.append(key) + continue + + # can make more efficient? + # we almost always return a slice + # but if unsorted can get a boolean + # indexer + if not isinstance(loc, slice): + slice_len = len(loc) + else: + slice_len = loc.stop - loc.start + + if slice_len != levsize: + chunk = this.loc[:, this.columns[loc]] + chunk.columns = level_vals_nan.take(chunk.columns.codes[-1]) + value_slice = chunk.reindex(columns=level_vals_used).values + else: + if frame._is_homogeneous_type and is_extension_array_dtype( + frame.dtypes.iloc[0] + ): + # TODO(EA2D): won't need special case, can go through .values + # paths below (might change to ._values) + dtype = this[this.columns[loc]].dtypes.iloc[0] + subset = this[this.columns[loc]] + + value_slice = dtype.construct_array_type()._concat_same_type( + [x._values for _, x in subset.items()] + ) + N, K = subset.shape + idx = np.arange(N * K).reshape(K, N).T.ravel() + value_slice = value_slice.take(idx) + + elif frame._is_mixed_type: + value_slice = this[this.columns[loc]].values + else: + value_slice = this.values[:, loc] + + if value_slice.ndim > 1: + # i.e. not extension + value_slice = value_slice.ravel() + + new_data[key] = value_slice + + if len(drop_cols) > 0: + new_columns = new_columns.difference(drop_cols) + + N = len(this) + + if isinstance(this.index, MultiIndex): + new_levels = list(this.index.levels) + new_names = list(this.index.names) + new_codes = [lab.repeat(levsize) for lab in this.index.codes] + else: + old_codes, old_levels = factorize_from_iterable(this.index) + new_levels = [old_levels] + new_codes = [old_codes.repeat(levsize)] + new_names = [this.index.name] # something better? + + new_levels.append(level_vals) + new_codes.append(np.tile(level_codes, N)) + new_names.append(frame.columns.names[level_num]) + + new_index = MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + result = frame._constructor(new_data, index=new_index, columns=new_columns) + + # more efficient way to go about this? can do the whole masking biz but + # will only save a small amount of time... + if dropna: + result = result.dropna(axis=0, how="all") + + return result + + +def get_dummies( + data, + prefix=None, + prefix_sep="_", + dummy_na: bool = False, + columns=None, + sparse: bool = False, + drop_first: bool = False, + dtype: Dtype | None = None, +) -> DataFrame: + """ + Convert categorical variable into dummy/indicator variables. + + Parameters + ---------- + data : array-like, Series, or DataFrame + Data of which to get dummy indicators. + prefix : str, list of str, or dict of str, default None + String to append DataFrame column names. + Pass a list with length equal to the number of columns + when calling get_dummies on a DataFrame. Alternatively, `prefix` + can be a dictionary mapping column names to prefixes. + prefix_sep : str, default '_' + If appending prefix, separator/delimiter to use. Or pass a + list or dictionary as with `prefix`. + dummy_na : bool, default False + Add a column to indicate NaNs, if False NaNs are ignored. + columns : list-like, default None + Column names in the DataFrame to be encoded. + If `columns` is None then all the columns with + `object` or `category` dtype will be converted. + sparse : bool, default False + Whether the dummy-encoded columns should be backed by + a :class:`SparseArray` (True) or a regular NumPy array (False). + drop_first : bool, default False + Whether to get k-1 dummies out of k categorical levels by removing the + first level. + dtype : dtype, default np.uint8 + Data type for new columns. Only a single dtype is allowed. + + Returns + ------- + DataFrame + Dummy-coded data. + + See Also + -------- + Series.str.get_dummies : Convert Series to dummy codes. + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> s = pd.Series(list('abca')) + + >>> pd.get_dummies(s) + a b c + 0 1 0 0 + 1 0 1 0 + 2 0 0 1 + 3 1 0 0 + + >>> s1 = ['a', 'b', np.nan] + + >>> pd.get_dummies(s1) + a b + 0 1 0 + 1 0 1 + 2 0 0 + + >>> pd.get_dummies(s1, dummy_na=True) + a b NaN + 0 1 0 0 + 1 0 1 0 + 2 0 0 1 + + >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], + ... 'C': [1, 2, 3]}) + + >>> pd.get_dummies(df, prefix=['col1', 'col2']) + C col1_a col1_b col2_a col2_b col2_c + 0 1 1 0 0 1 0 + 1 2 0 1 1 0 0 + 2 3 1 0 0 0 1 + + >>> pd.get_dummies(pd.Series(list('abcaa'))) + a b c + 0 1 0 0 + 1 0 1 0 + 2 0 0 1 + 3 1 0 0 + 4 1 0 0 + + >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True) + b c + 0 0 0 + 1 1 0 + 2 0 1 + 3 0 0 + 4 0 0 + + >>> pd.get_dummies(pd.Series(list('abc')), dtype=float) + a b c + 0 1.0 0.0 0.0 + 1 0.0 1.0 0.0 + 2 0.0 0.0 1.0 + """ + from pandas.core.reshape.concat import concat + + dtypes_to_encode = ["object", "category"] + + if isinstance(data, DataFrame): + # determine columns being encoded + if columns is None: + data_to_encode = data.select_dtypes(include=dtypes_to_encode) + elif not is_list_like(columns): + raise TypeError("Input must be a list-like for parameter `columns`") + else: + data_to_encode = data[columns] + + # validate prefixes and separator to avoid silently dropping cols + def check_len(item, name): + + if is_list_like(item): + if not len(item) == data_to_encode.shape[1]: + len_msg = ( + f"Length of '{name}' ({len(item)}) did not match the " + "length of the columns being encoded " + f"({data_to_encode.shape[1]})." + ) + raise ValueError(len_msg) + + check_len(prefix, "prefix") + check_len(prefix_sep, "prefix_sep") + + if isinstance(prefix, str): + prefix = itertools.cycle([prefix]) + if isinstance(prefix, dict): + prefix = [prefix[col] for col in data_to_encode.columns] + + if prefix is None: + prefix = data_to_encode.columns + + # validate separators + if isinstance(prefix_sep, str): + prefix_sep = itertools.cycle([prefix_sep]) + elif isinstance(prefix_sep, dict): + prefix_sep = [prefix_sep[col] for col in data_to_encode.columns] + + with_dummies: list[DataFrame] + if data_to_encode.shape == data.shape: + # Encoding the entire df, do not prepend any dropped columns + with_dummies = [] + elif columns is not None: + # Encoding only cols specified in columns. Get all cols not in + # columns to prepend to result. + with_dummies = [data.drop(columns, axis=1)] + else: + # Encoding only object and category dtype columns. Get remaining + # columns to prepend to result. + with_dummies = [data.select_dtypes(exclude=dtypes_to_encode)] + + for (col, pre, sep) in zip(data_to_encode.items(), prefix, prefix_sep): + # col is (column_name, column), use just column data here + dummy = _get_dummies_1d( + col[1], + prefix=pre, + prefix_sep=sep, + dummy_na=dummy_na, + sparse=sparse, + drop_first=drop_first, + dtype=dtype, + ) + with_dummies.append(dummy) + result = concat(with_dummies, axis=1) + else: + result = _get_dummies_1d( + data, + prefix, + prefix_sep, + dummy_na, + sparse=sparse, + drop_first=drop_first, + dtype=dtype, + ) + return result + + +def _get_dummies_1d( + data, + prefix, + prefix_sep="_", + dummy_na: bool = False, + sparse: bool = False, + drop_first: bool = False, + dtype: Dtype | None = None, +) -> DataFrame: + from pandas.core.reshape.concat import concat + + # Series avoids inconsistent NaN handling + codes, levels = factorize_from_iterable(Series(data)) + + if dtype is None: + dtype = np.dtype(np.uint8) + # error: Argument 1 to "dtype" has incompatible type "Union[ExtensionDtype, str, + # dtype[Any], Type[object]]"; expected "Type[Any]" + dtype = np.dtype(dtype) # type: ignore[arg-type] + + if is_object_dtype(dtype): + raise ValueError("dtype=object is not a valid dtype for get_dummies") + + def get_empty_frame(data) -> DataFrame: + index: Index | np.ndarray + if isinstance(data, Series): + index = data.index + else: + index = Index(range(len(data))) + return DataFrame(index=index) + + # if all NaN + if not dummy_na and len(levels) == 0: + return get_empty_frame(data) + + codes = codes.copy() + if dummy_na: + codes[codes == -1] = len(levels) + levels = levels.insert(len(levels), np.nan) + + # if dummy_na, we just fake a nan level. drop_first will drop it again + if drop_first and len(levels) == 1: + return get_empty_frame(data) + + number_of_cols = len(levels) + + if prefix is None: + dummy_cols = levels + else: + dummy_cols = Index([f"{prefix}{prefix_sep}{level}" for level in levels]) + + index: Index | None + if isinstance(data, Series): + index = data.index + else: + index = None + + if sparse: + + fill_value: bool | float | int + if is_integer_dtype(dtype): + fill_value = 0 + elif dtype == np.dtype(bool): + fill_value = False + else: + fill_value = 0.0 + + sparse_series = [] + N = len(data) + sp_indices: list[list] = [[] for _ in range(len(dummy_cols))] + mask = codes != -1 + codes = codes[mask] + n_idx = np.arange(N)[mask] + + for ndx, code in zip(n_idx, codes): + sp_indices[code].append(ndx) + + if drop_first: + # remove first categorical level to avoid perfect collinearity + # GH12042 + sp_indices = sp_indices[1:] + dummy_cols = dummy_cols[1:] + for col, ixs in zip(dummy_cols, sp_indices): + sarr = SparseArray( + np.ones(len(ixs), dtype=dtype), + sparse_index=IntIndex(N, ixs), + fill_value=fill_value, + dtype=dtype, + ) + sparse_series.append(Series(data=sarr, index=index, name=col)) + + return concat(sparse_series, axis=1, copy=False) + + else: + # take on axis=1 + transpose to ensure ndarray layout is column-major + dummy_mat = np.eye(number_of_cols, dtype=dtype).take(codes, axis=1).T + + if not dummy_na: + # reset NaN GH4446 + dummy_mat[codes == -1] = 0 + + if drop_first: + # remove first GH12042 + dummy_mat = dummy_mat[:, 1:] + dummy_cols = dummy_cols[1:] + return DataFrame(dummy_mat, index=index, columns=dummy_cols) + + +def _reorder_for_extension_array_stack( + arr: ExtensionArray, n_rows: int, n_columns: int +) -> ExtensionArray: + """ + Re-orders the values when stacking multiple extension-arrays. + + The indirect stacking method used for EAs requires a followup + take to get the order correct. + + Parameters + ---------- + arr : ExtensionArray + n_rows, n_columns : int + The number of rows and columns in the original DataFrame. + + Returns + ------- + taken : ExtensionArray + The original `arr` with elements re-ordered appropriately + + Examples + -------- + >>> arr = np.array(['a', 'b', 'c', 'd', 'e', 'f']) + >>> _reorder_for_extension_array_stack(arr, 2, 3) + array(['a', 'c', 'e', 'b', 'd', 'f'], dtype='>> _reorder_for_extension_array_stack(arr, 3, 2) + array(['a', 'd', 'b', 'e', 'c', 'f'], dtype='` for more examples. + + Examples + -------- + Discretize into three equal-sized bins. + + >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3) + ... # doctest: +ELLIPSIS + [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... + Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... + + >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True) + ... # doctest: +ELLIPSIS + ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... + Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... + array([0.994, 3. , 5. , 7. ])) + + Discovers the same bins, but assign them specific labels. Notice that + the returned Categorical's categories are `labels` and is ordered. + + >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), + ... 3, labels=["bad", "medium", "good"]) + ['bad', 'good', 'medium', 'medium', 'good', 'bad'] + Categories (3, object): ['bad' < 'medium' < 'good'] + + ``ordered=False`` will result in unordered categories when labels are passed. + This parameter can be used to allow non-unique labels: + + >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, + ... labels=["B", "A", "B"], ordered=False) + ['B', 'B', 'A', 'A', 'B', 'B'] + Categories (2, object): ['A', 'B'] + + ``labels=False`` implies you just want the bins back. + + >>> pd.cut([0, 1, 1, 2], bins=4, labels=False) + array([0, 1, 1, 3]) + + Passing a Series as an input returns a Series with categorical dtype: + + >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), + ... index=['a', 'b', 'c', 'd', 'e']) + >>> pd.cut(s, 3) + ... # doctest: +ELLIPSIS + a (1.992, 4.667] + b (1.992, 4.667] + c (4.667, 7.333] + d (7.333, 10.0] + e (7.333, 10.0] + dtype: category + Categories (3, interval[float64, right]): [(1.992, 4.667] < (4.667, ... + + Passing a Series as an input returns a Series with mapping value. + It is used to map numerically to intervals based on bins. + + >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), + ... index=['a', 'b', 'c', 'd', 'e']) + >>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False) + ... # doctest: +ELLIPSIS + (a 1.0 + b 2.0 + c 3.0 + d 4.0 + e NaN + dtype: float64, + array([ 0, 2, 4, 6, 8, 10])) + + Use `drop` optional when bins is not unique + + >>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True, + ... right=False, duplicates='drop') + ... # doctest: +ELLIPSIS + (a 1.0 + b 2.0 + c 3.0 + d 3.0 + e NaN + dtype: float64, + array([ 0, 2, 4, 6, 10])) + + Passing an IntervalIndex for `bins` results in those categories exactly. + Notice that values not covered by the IntervalIndex are set to NaN. 0 + is to the left of the first bin (which is closed on the right), and 1.5 + falls between two bins. + + >>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]) + >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins) + [NaN, (0.0, 1.0], NaN, (2.0, 3.0], (4.0, 5.0]] + Categories (3, interval[int64, right]): [(0, 1] < (2, 3] < (4, 5]] + """ + # NOTE: this binning code is changed a bit from histogram for var(x) == 0 + + original = x + x = _preprocess_for_cut(x) + x, dtype = _coerce_to_type(x) + + if not np.iterable(bins): + if is_scalar(bins) and bins < 1: + raise ValueError("`bins` should be a positive integer.") + + try: # for array-like + sz = x.size + except AttributeError: + x = np.asarray(x) + sz = x.size + + if sz == 0: + raise ValueError("Cannot cut empty array") + + rng = (nanops.nanmin(x), nanops.nanmax(x)) + mn, mx = (mi + 0.0 for mi in rng) + + if np.isinf(mn) or np.isinf(mx): + # GH 24314 + raise ValueError( + "cannot specify integer `bins` when input data contains infinity" + ) + elif mn == mx: # adjust end points before binning + mn -= 0.001 * abs(mn) if mn != 0 else 0.001 + mx += 0.001 * abs(mx) if mx != 0 else 0.001 + bins = np.linspace(mn, mx, bins + 1, endpoint=True) + else: # adjust end points after binning + bins = np.linspace(mn, mx, bins + 1, endpoint=True) + adj = (mx - mn) * 0.001 # 0.1% of the range + if right: + bins[0] -= adj + else: + bins[-1] += adj + + elif isinstance(bins, IntervalIndex): + if bins.is_overlapping: + raise ValueError("Overlapping IntervalIndex is not accepted.") + + else: + if is_datetime64tz_dtype(bins): + bins = np.asarray(bins, dtype=DT64NS_DTYPE) + else: + bins = np.asarray(bins) + bins = _convert_bin_to_numeric_type(bins, dtype) + + # GH 26045: cast to float64 to avoid an overflow + if (np.diff(bins.astype("float64")) < 0).any(): + raise ValueError("bins must increase monotonically.") + + fac, bins = _bins_to_cuts( + x, + bins, + right=right, + labels=labels, + precision=precision, + include_lowest=include_lowest, + dtype=dtype, + duplicates=duplicates, + ordered=ordered, + ) + + return _postprocess_for_cut(fac, bins, retbins, dtype, original) + + +def qcut( + x, + q, + labels=None, + retbins: bool = False, + precision: int = 3, + duplicates: str = "raise", +): + """ + Quantile-based discretization function. + + Discretize variable into equal-sized buckets based on rank or based + on sample quantiles. For example 1000 values for 10 quantiles would + produce a Categorical object indicating quantile membership for each data point. + + Parameters + ---------- + x : 1d ndarray or Series + q : int or list-like of float + Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately + array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles. + labels : array or False, default None + Used as labels for the resulting bins. Must be of the same length as + the resulting bins. If False, return only integer indicators of the + bins. If True, raises an error. + retbins : bool, optional + Whether to return the (bins, labels) or not. Can be useful if bins + is given as a scalar. + precision : int, optional + The precision at which to store and display the bins labels. + duplicates : {default 'raise', 'drop'}, optional + If bin edges are not unique, raise ValueError or drop non-uniques. + + Returns + ------- + out : Categorical or Series or array of integers if labels is False + The return type (Categorical or Series) depends on the input: a Series + of type category if input is a Series else Categorical. Bins are + represented as categories when categorical data is returned. + bins : ndarray of floats + Returned only if `retbins` is True. + + Notes + ----- + Out of bounds values will be NA in the resulting Categorical object + + Examples + -------- + >>> pd.qcut(range(5), 4) + ... # doctest: +ELLIPSIS + [(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]] + Categories (4, interval[float64, right]): [(-0.001, 1.0] < (1.0, 2.0] ... + + >>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"]) + ... # doctest: +SKIP + [good, good, medium, bad, bad] + Categories (3, object): [good < medium < bad] + + >>> pd.qcut(range(5), 4, labels=False) + array([0, 0, 1, 2, 3]) + """ + original = x + x = _preprocess_for_cut(x) + x, dtype = _coerce_to_type(x) + + quantiles = np.linspace(0, 1, q + 1) if is_integer(q) else q + + x_np = np.asarray(x) + x_np = x_np[~np.isnan(x_np)] + bins = np.quantile(x_np, quantiles) + + fac, bins = _bins_to_cuts( + x, + bins, + labels=labels, + precision=precision, + include_lowest=True, + dtype=dtype, + duplicates=duplicates, + ) + + return _postprocess_for_cut(fac, bins, retbins, dtype, original) + + +def _bins_to_cuts( + x, + bins: np.ndarray, + right: bool = True, + labels=None, + precision: int = 3, + include_lowest: bool = False, + dtype=None, + duplicates: str = "raise", + ordered: bool = True, +): + if not ordered and labels is None: + raise ValueError("'labels' must be provided if 'ordered = False'") + + if duplicates not in ["raise", "drop"]: + raise ValueError( + "invalid value for 'duplicates' parameter, valid options are: raise, drop" + ) + + if isinstance(bins, IntervalIndex): + # we have a fast-path here + ids = bins.get_indexer(x) + result = Categorical.from_codes(ids, categories=bins, ordered=True) + return result, bins + + unique_bins = algos.unique(bins) + if len(unique_bins) < len(bins) and len(bins) != 2: + if duplicates == "raise": + raise ValueError( + f"Bin edges must be unique: {repr(bins)}.\n" + f"You can drop duplicate edges by setting the 'duplicates' kwarg" + ) + else: + bins = unique_bins + + side: Literal["left", "right"] = "left" if right else "right" + ids = ensure_platform_int(bins.searchsorted(x, side=side)) + + if include_lowest: + ids[np.asarray(x) == bins[0]] = 1 + + na_mask = isna(x) | (ids == len(bins)) | (ids == 0) + has_nas = na_mask.any() + + if labels is not False: + if not (labels is None or is_list_like(labels)): + raise ValueError( + "Bin labels must either be False, None or passed in as a " + "list-like argument" + ) + + elif labels is None: + labels = _format_labels( + bins, precision, right=right, include_lowest=include_lowest, dtype=dtype + ) + elif ordered and len(set(labels)) != len(labels): + raise ValueError( + "labels must be unique if ordered=True; pass ordered=False " + "for duplicate labels" + ) + else: + if len(labels) != len(bins) - 1: + raise ValueError( + "Bin labels must be one fewer than the number of bin edges" + ) + if not is_categorical_dtype(labels): + labels = Categorical( + labels, + categories=labels if len(set(labels)) == len(labels) else None, + ordered=ordered, + ) + # TODO: handle mismatch between categorical label order and pandas.cut order. + np.putmask(ids, na_mask, 0) + result = algos.take_nd(labels, ids - 1) + + else: + result = ids - 1 + if has_nas: + result = result.astype(np.float64) + np.putmask(result, na_mask, np.nan) + + return result, bins + + +def _coerce_to_type(x): + """ + if the passed data is of datetime/timedelta, bool or nullable int type, + this method converts it to numeric so that cut or qcut method can + handle it + """ + dtype = None + + if is_datetime64tz_dtype(x.dtype): + dtype = x.dtype + elif is_datetime64_dtype(x.dtype): + x = to_datetime(x) + dtype = np.dtype("datetime64[ns]") + elif is_timedelta64_dtype(x.dtype): + x = to_timedelta(x) + dtype = np.dtype("timedelta64[ns]") + elif is_bool_dtype(x.dtype): + # GH 20303 + x = x.astype(np.int64) + # To support cut and qcut for IntegerArray we convert to float dtype. + # Will properly support in the future. + # https://github.com/pandas-dev/pandas/pull/31290 + # https://github.com/pandas-dev/pandas/issues/31389 + elif is_extension_array_dtype(x.dtype) and is_numeric_dtype(x.dtype): + x = x.to_numpy(dtype=np.float64, na_value=np.nan) + + if dtype is not None: + # GH 19768: force NaT to NaN during integer conversion + x = np.where(x.notna(), x.view(np.int64), np.nan) + + return x, dtype + + +def _convert_bin_to_numeric_type(bins, dtype): + """ + if the passed bin is of datetime/timedelta type, + this method converts it to integer + + Parameters + ---------- + bins : list-like of bins + dtype : dtype of data + + Raises + ------ + ValueError if bins are not of a compat dtype to dtype + """ + bins_dtype = infer_dtype(bins, skipna=False) + if is_timedelta64_dtype(dtype): + if bins_dtype in ["timedelta", "timedelta64"]: + bins = to_timedelta(bins).view(np.int64) + else: + raise ValueError("bins must be of timedelta64 dtype") + elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): + if bins_dtype in ["datetime", "datetime64"]: + bins = to_datetime(bins).view(np.int64) + else: + raise ValueError("bins must be of datetime64 dtype") + + return bins + + +def _convert_bin_to_datelike_type(bins, dtype): + """ + Convert bins to a DatetimeIndex or TimedeltaIndex if the original dtype is + datelike + + Parameters + ---------- + bins : list-like of bins + dtype : dtype of data + + Returns + ------- + bins : Array-like of bins, DatetimeIndex or TimedeltaIndex if dtype is + datelike + """ + if is_datetime64tz_dtype(dtype): + bins = to_datetime(bins.astype(np.int64), utc=True).tz_convert(dtype.tz) + elif is_datetime_or_timedelta_dtype(dtype): + bins = Index(bins.astype(np.int64), dtype=dtype) + return bins + + +def _format_labels( + bins, precision: int, right: bool = True, include_lowest: bool = False, dtype=None +): + """based on the dtype, return our labels""" + closed = "right" if right else "left" + + formatter: Callable[[Any], Timestamp] | Callable[[Any], Timedelta] + + if is_datetime64tz_dtype(dtype): + formatter = lambda x: Timestamp(x, tz=dtype.tz) + adjust = lambda x: x - Timedelta("1ns") + elif is_datetime64_dtype(dtype): + formatter = Timestamp + adjust = lambda x: x - Timedelta("1ns") + elif is_timedelta64_dtype(dtype): + formatter = Timedelta + adjust = lambda x: x - Timedelta("1ns") + else: + precision = _infer_precision(precision, bins) + formatter = lambda x: _round_frac(x, precision) + adjust = lambda x: x - 10 ** (-precision) + + breaks = [formatter(b) for b in bins] + if right and include_lowest: + # adjust lhs of first interval by precision to account for being right closed + breaks[0] = adjust(breaks[0]) + + return IntervalIndex.from_breaks(breaks, closed=closed) + + +def _preprocess_for_cut(x): + """ + handles preprocessing for cut where we convert passed + input to array, strip the index information and store it + separately + """ + # Check that the passed array is a Pandas or Numpy object + # We don't want to strip away a Pandas data-type here (e.g. datetimetz) + ndim = getattr(x, "ndim", None) + if ndim is None: + x = np.asarray(x) + if x.ndim != 1: + raise ValueError("Input array must be 1 dimensional") + + return x + + +def _postprocess_for_cut(fac, bins, retbins: bool, dtype, original): + """ + handles post processing for the cut method where + we combine the index information if the originally passed + datatype was a series + """ + if isinstance(original, ABCSeries): + fac = original._constructor(fac, index=original.index, name=original.name) + + if not retbins: + return fac + + bins = _convert_bin_to_datelike_type(bins, dtype) + + return fac, bins + + +def _round_frac(x, precision: int): + """ + Round the fractional part of the given number + """ + if not np.isfinite(x) or x == 0: + return x + else: + frac, whole = np.modf(x) + if whole == 0: + digits = -int(np.floor(np.log10(abs(frac)))) - 1 + precision + else: + digits = precision + return np.around(x, digits) + + +def _infer_precision(base_precision: int, bins) -> int: + """ + Infer an appropriate precision for _round_frac + """ + for precision in range(base_precision, 20): + levels = [_round_frac(b, precision) for b in bins] + if algos.unique(levels).size == bins.size: + return precision + return base_precision # default diff --git a/.local/lib/python3.8/site-packages/pandas/core/reshape/util.py b/.local/lib/python3.8/site-packages/pandas/core/reshape/util.py new file mode 100644 index 0000000..d2c0871 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/reshape/util.py @@ -0,0 +1,70 @@ +import numpy as np + +from pandas.core.dtypes.common import is_list_like + + +def cartesian_product(X): + """ + Numpy version of itertools.product. + Sometimes faster (for large inputs)... + + Parameters + ---------- + X : list-like of list-likes + + Returns + ------- + product : list of ndarrays + + Examples + -------- + >>> cartesian_product([list('ABC'), [1, 2]]) + [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype=' np.ndarray: + """ + Process and validate the `weights` argument to `NDFrame.sample` and + `.GroupBy.sample`. + + Returns `weights` as an ndarray[np.float64], validated except for normalizing + weights (because that must be done groupwise in groupby sampling). + """ + # If a series, align with frame + if isinstance(weights, ABCSeries): + weights = weights.reindex(obj.axes[axis]) + + # Strings acceptable if a dataframe and axis = 0 + if isinstance(weights, str): + if isinstance(obj, ABCDataFrame): + if axis == 0: + try: + weights = obj[weights] + except KeyError as err: + raise KeyError( + "String passed to weights not a valid column" + ) from err + else: + raise ValueError( + "Strings can only be passed to " + "weights when sampling from rows on " + "a DataFrame" + ) + else: + raise ValueError( + "Strings cannot be passed as weights when sampling from a Series." + ) + + if isinstance(obj, ABCSeries): + func = obj._constructor + else: + func = obj._constructor_sliced + + weights = func(weights, dtype="float64")._values + + if len(weights) != obj.shape[axis]: + raise ValueError("Weights and axis to be sampled must be of same length") + + if lib.has_infs(weights): + raise ValueError("weight vector may not include `inf` values") + + if (weights < 0).any(): + raise ValueError("weight vector many not include negative values") + + missing = np.isnan(weights) + if missing.any(): + # Don't modify weights in place + weights = weights.copy() + weights[missing] = 0 + return weights + + +def process_sampling_size( + n: int | None, frac: float | None, replace: bool +) -> int | None: + """ + Process and validate the `n` and `frac` arguments to `NDFrame.sample` and + `.GroupBy.sample`. + + Returns None if `frac` should be used (variable sampling sizes), otherwise returns + the constant sampling size. + """ + # If no frac or n, default to n=1. + if n is None and frac is None: + n = 1 + elif n is not None and frac is not None: + raise ValueError("Please enter a value for `frac` OR `n`, not both") + elif n is not None: + if n < 0: + raise ValueError( + "A negative number of rows requested. Please provide `n` >= 0." + ) + if n % 1 != 0: + raise ValueError("Only integers accepted as `n` values") + else: + assert frac is not None # for mypy + if frac > 1 and not replace: + raise ValueError( + "Replace has to be set to `True` when " + "upsampling the population `frac` > 1." + ) + if frac < 0: + raise ValueError( + "A negative number of rows requested. Please provide `frac` >= 0." + ) + + return n + + +def sample( + obj_len: int, + size: int, + replace: bool, + weights: np.ndarray | None, + random_state: np.random.RandomState | np.random.Generator, +) -> np.ndarray: + """ + Randomly sample `size` indices in `np.arange(obj_len)` + + Parameters + ---------- + obj_len : int + The length of the indices being considered + size : int + The number of values to choose + replace : bool + Allow or disallow sampling of the same row more than once. + weights : np.ndarray[np.float64] or None + If None, equal probability weighting, otherwise weights according + to the vector normalized + random_state: np.random.RandomState or np.random.Generator + State used for the random sampling + + Returns + ------- + np.ndarray[np.intp] + """ + if weights is not None: + weight_sum = weights.sum() + if weight_sum != 0: + weights = weights / weight_sum + else: + raise ValueError("Invalid weights: weights sum to zero") + + return random_state.choice(obj_len, size=size, replace=replace, p=weights).astype( + np.intp, copy=False + ) diff --git a/.local/lib/python3.8/site-packages/pandas/core/series.py b/.local/lib/python3.8/site-packages/pandas/core/series.py new file mode 100644 index 0000000..990da14 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/series.py @@ -0,0 +1,5645 @@ +""" +Data structure for 1-dimensional cross-sectional and time series data +""" +from __future__ import annotations + +from textwrap import dedent +from typing import ( + IO, + TYPE_CHECKING, + Any, + Callable, + Hashable, + Iterable, + Literal, + Sequence, + Union, + cast, + overload, +) +import warnings +import weakref + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import ( + lib, + properties, + reshape, + tslibs, +) +from pandas._libs.lib import no_default +from pandas._typing import ( + AggFuncType, + ArrayLike, + Axis, + Dtype, + DtypeObj, + FillnaOptions, + IndexKeyFunc, + SingleManager, + StorageOptions, + TimedeltaConvertibleTypes, + TimestampConvertibleTypes, + ValueKeyFunc, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import InvalidIndexError +from pandas.util._decorators import ( + Appender, + Substitution, + deprecate_nonkeyword_arguments, + doc, +) +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import ( + validate_ascending, + validate_bool_kwarg, + validate_percentile, +) + +from pandas.core.dtypes.cast import ( + convert_dtypes, + maybe_box_native, + maybe_cast_pointwise_result, +) +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_dict_like, + is_integer, + is_iterator, + is_list_like, + is_object_dtype, + is_scalar, + pandas_dtype, + validate_all_hashable, +) +from pandas.core.dtypes.generic import ABCDataFrame +from pandas.core.dtypes.inference import is_hashable +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, + notna, + remove_na_arraylike, +) + +from pandas.core import ( + algorithms, + base, + missing, + nanops, + ops, +) +from pandas.core.accessor import CachedAccessor +from pandas.core.apply import SeriesApply +from pandas.core.arrays import ExtensionArray +from pandas.core.arrays.categorical import CategoricalAccessor +from pandas.core.arrays.sparse import SparseAccessor +import pandas.core.common as com +from pandas.core.construction import ( + create_series_with_explicit_dtype, + extract_array, + is_empty_data, + sanitize_array, +) +from pandas.core.generic import NDFrame +from pandas.core.indexers import ( + deprecate_ndim_indexing, + unpack_1tuple, +) +from pandas.core.indexes.accessors import CombinedDatetimelikeProperties +from pandas.core.indexes.api import ( + CategoricalIndex, + DatetimeIndex, + Float64Index, + Index, + MultiIndex, + PeriodIndex, + TimedeltaIndex, + default_index, + ensure_index, +) +import pandas.core.indexes.base as ibase +from pandas.core.indexing import ( + check_bool_indexer, + check_deprecated_indexers, +) +from pandas.core.internals import ( + SingleArrayManager, + SingleBlockManager, +) +from pandas.core.shared_docs import _shared_docs +from pandas.core.sorting import ( + ensure_key_mapped, + nargsort, +) +from pandas.core.strings import StringMethods +from pandas.core.tools.datetimes import to_datetime + +import pandas.io.formats.format as fmt +from pandas.io.formats.info import ( + INFO_DOCSTRING, + SeriesInfo, + series_sub_kwargs, +) +import pandas.plotting + +if TYPE_CHECKING: + + from pandas._typing import ( + NumpySorter, + NumpyValueArrayLike, + ) + + from pandas.core.frame import DataFrame + from pandas.core.groupby.generic import SeriesGroupBy + from pandas.core.resample import Resampler + +__all__ = ["Series"] + +_shared_doc_kwargs = { + "axes": "index", + "klass": "Series", + "axes_single_arg": "{0 or 'index'}", + "axis": """axis : {0 or 'index'} + Parameter needed for compatibility with DataFrame.""", + "inplace": """inplace : bool, default False + If True, performs operation inplace and returns None.""", + "unique": "np.ndarray", + "duplicated": "Series", + "optional_by": "", + "optional_mapper": "", + "optional_labels": "", + "optional_axis": "", + "replace_iloc": """ + This differs from updating with ``.loc`` or ``.iloc``, which require + you to specify a location to update with some value.""", +} + + +def _coerce_method(converter): + """ + Install the scalar coercion methods. + """ + + def wrapper(self): + if len(self) == 1: + return converter(self.iloc[0]) + raise TypeError(f"cannot convert the series to {converter}") + + wrapper.__name__ = f"__{converter.__name__}__" + return wrapper + + +# ---------------------------------------------------------------------- +# Series class + + +class Series(base.IndexOpsMixin, NDFrame): + """ + One-dimensional ndarray with axis labels (including time series). + + Labels need not be unique but must be a hashable type. The object + supports both integer- and label-based indexing and provides a host of + methods for performing operations involving the index. Statistical + methods from ndarray have been overridden to automatically exclude + missing data (currently represented as NaN). + + Operations between Series (+, -, /, \\*, \\*\\*) align values based on their + associated index values-- they need not be the same length. The result + index will be the sorted union of the two indexes. + + Parameters + ---------- + data : array-like, Iterable, dict, or scalar value + Contains data stored in Series. If data is a dict, argument order is + maintained. + index : array-like or Index (1d) + Values must be hashable and have the same length as `data`. + Non-unique index values are allowed. Will default to + RangeIndex (0, 1, 2, ..., n) if not provided. If data is dict-like + and index is None, then the keys in the data are used as the index. If the + index is not None, the resulting Series is reindexed with the index values. + dtype : str, numpy.dtype, or ExtensionDtype, optional + Data type for the output Series. If not specified, this will be + inferred from `data`. + See the :ref:`user guide ` for more usages. + name : str, optional + The name to give to the Series. + copy : bool, default False + Copy input data. Only affects Series or 1d ndarray input. See examples. + + Examples + -------- + Constructing Series from a dictionary with an Index specified + + >>> d = {'a': 1, 'b': 2, 'c': 3} + >>> ser = pd.Series(data=d, index=['a', 'b', 'c']) + >>> ser + a 1 + b 2 + c 3 + dtype: int64 + + The keys of the dictionary match with the Index values, hence the Index + values have no effect. + + >>> d = {'a': 1, 'b': 2, 'c': 3} + >>> ser = pd.Series(data=d, index=['x', 'y', 'z']) + >>> ser + x NaN + y NaN + z NaN + dtype: float64 + + Note that the Index is first build with the keys from the dictionary. + After this the Series is reindexed with the given Index values, hence we + get all NaN as a result. + + Constructing Series from a list with `copy=False`. + + >>> r = [1, 2] + >>> ser = pd.Series(r, copy=False) + >>> ser.iloc[0] = 999 + >>> r + [1, 2] + >>> ser + 0 999 + 1 2 + dtype: int64 + + Due to input data type the Series has a `copy` of + the original data even though `copy=False`, so + the data is unchanged. + + Constructing Series from a 1d ndarray with `copy=False`. + + >>> r = np.array([1, 2]) + >>> ser = pd.Series(r, copy=False) + >>> ser.iloc[0] = 999 + >>> r + array([999, 2]) + >>> ser + 0 999 + 1 2 + dtype: int64 + + Due to input data type the Series has a `view` on + the original data, so + the data is changed as well. + """ + + _typ = "series" + _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray) + + _name: Hashable + _metadata: list[str] = ["name"] + _internal_names_set = {"index"} | NDFrame._internal_names_set + _accessors = {"dt", "cat", "str", "sparse"} + _hidden_attrs = ( + base.IndexOpsMixin._hidden_attrs + | NDFrame._hidden_attrs + | frozenset(["compress", "ptp"]) + ) + + # Override cache_readonly bc Series is mutable + # error: Incompatible types in assignment (expression has type "property", + # base class "IndexOpsMixin" defined the type as "Callable[[IndexOpsMixin], bool]") + hasnans = property( # type: ignore[assignment] + # error: "Callable[[IndexOpsMixin], bool]" has no attribute "fget" + base.IndexOpsMixin.hasnans.fget, # type: ignore[attr-defined] + doc=base.IndexOpsMixin.hasnans.__doc__, + ) + _mgr: SingleManager + div: Callable[[Series, Any], Series] + rdiv: Callable[[Series, Any], Series] + + # ---------------------------------------------------------------------- + # Constructors + + def __init__( + self, + data=None, + index=None, + dtype: Dtype | None = None, + name=None, + copy: bool = False, + fastpath: bool = False, + ): + + if ( + isinstance(data, (SingleBlockManager, SingleArrayManager)) + and index is None + and dtype is None + and copy is False + ): + # GH#33357 called with just the SingleBlockManager + NDFrame.__init__(self, data) + if fastpath: + # e.g. from _box_col_values, skip validation of name + object.__setattr__(self, "_name", name) + else: + self.name = name + return + + # we are called internally, so short-circuit + if fastpath: + + # data is an ndarray, index is defined + if not isinstance(data, (SingleBlockManager, SingleArrayManager)): + manager = get_option("mode.data_manager") + if manager == "block": + data = SingleBlockManager.from_array(data, index) + elif manager == "array": + data = SingleArrayManager.from_array(data, index) + if copy: + data = data.copy() + if index is None: + index = data.index + + else: + + name = ibase.maybe_extract_name(name, data, type(self)) + + if is_empty_data(data) and dtype is None: + # gh-17261 + warnings.warn( + "The default dtype for empty Series will be 'object' instead " + "of 'float64' in a future version. Specify a dtype explicitly " + "to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # uncomment the line below when removing the FutureWarning + # dtype = np.dtype(object) + + if index is not None: + index = ensure_index(index) + + if data is None: + data = {} + if dtype is not None: + dtype = self._validate_dtype(dtype) + + if isinstance(data, MultiIndex): + raise NotImplementedError( + "initializing a Series from a MultiIndex is not supported" + ) + elif isinstance(data, Index): + + if dtype is not None: + # astype copies + data = data.astype(dtype) + else: + # GH#24096 we need to ensure the index remains immutable + data = data._values.copy() + copy = False + + elif isinstance(data, np.ndarray): + if len(data.dtype): + # GH#13296 we are dealing with a compound dtype, which + # should be treated as 2D + raise ValueError( + "Cannot construct a Series from an ndarray with " + "compound dtype. Use DataFrame instead." + ) + elif isinstance(data, Series): + if index is None: + index = data.index + else: + data = data.reindex(index, copy=copy) + copy = False + data = data._mgr + elif is_dict_like(data): + data, index = self._init_dict(data, index, dtype) + dtype = None + copy = False + elif isinstance(data, (SingleBlockManager, SingleArrayManager)): + if index is None: + index = data.index + elif not data.index.equals(index) or copy: + # GH#19275 SingleBlockManager input should only be called + # internally + raise AssertionError( + "Cannot pass both SingleBlockManager " + "`data` argument and a different " + "`index` argument. `copy` must be False." + ) + + elif isinstance(data, ExtensionArray): + pass + else: + data = com.maybe_iterable_to_list(data) + + if index is None: + if not is_list_like(data): + data = [data] + index = default_index(len(data)) + elif is_list_like(data): + com.require_length_match(data, index) + + # create/copy the manager + if isinstance(data, (SingleBlockManager, SingleArrayManager)): + if dtype is not None: + data = data.astype(dtype=dtype, errors="ignore", copy=copy) + elif copy: + data = data.copy() + else: + data = sanitize_array(data, index, dtype, copy) + + manager = get_option("mode.data_manager") + if manager == "block": + data = SingleBlockManager.from_array(data, index) + elif manager == "array": + data = SingleArrayManager.from_array(data, index) + + NDFrame.__init__(self, data) + self.name = name + self._set_axis(0, index, fastpath=True) + + def _init_dict( + self, data, index: Index | None = None, dtype: DtypeObj | None = None + ): + """ + Derive the "_mgr" and "index" attributes of a new Series from a + dictionary input. + + Parameters + ---------- + data : dict or dict-like + Data used to populate the new Series. + index : Index or None, default None + Index for the new Series: if None, use dict keys. + dtype : np.dtype, ExtensionDtype, or None, default None + The dtype for the new Series: if None, infer from data. + + Returns + ------- + _data : BlockManager for the new Series + index : index for the new Series + """ + keys: Index | tuple + + # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] + # raises KeyError), so we iterate the entire dict, and align + if data: + # GH:34717, issue was using zip to extract key and values from data. + # using generators in effects the performance. + # Below is the new way of extracting the keys and values + + keys = tuple(data.keys()) + values = list(data.values()) # Generating list of values- faster way + elif index is not None: + # fastpath for Series(data=None). Just use broadcasting a scalar + # instead of reindexing. + values = na_value_for_dtype(pandas_dtype(dtype), compat=False) + keys = index + else: + keys, values = (), [] + + # Input is now list-like, so rely on "standard" construction: + + # TODO: passing np.float64 to not break anything yet. See GH-17261 + s = create_series_with_explicit_dtype( + # error: Argument "index" to "create_series_with_explicit_dtype" has + # incompatible type "Tuple[Any, ...]"; expected "Union[ExtensionArray, + # ndarray, Index, None]" + values, + index=keys, # type: ignore[arg-type] + dtype=dtype, + dtype_if_empty=np.float64, + ) + + # Now we just make sure the order is respected, if any + if data and index is not None: + s = s.reindex(index, copy=False) + return s._mgr, s.index + + # ---------------------------------------------------------------------- + + @property + def _constructor(self) -> type[Series]: + return Series + + @property + def _constructor_expanddim(self) -> type[DataFrame]: + """ + Used when a manipulation result has one higher dimension as the + original, such as Series.to_frame() + """ + from pandas.core.frame import DataFrame + + return DataFrame + + # types + @property + def _can_hold_na(self) -> bool: + return self._mgr._can_hold_na + + def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None: + """ + Override generic, we want to set the _typ here. + + This is called from the cython code when we set the `index` attribute + directly, e.g. `series.index = [1, 2, 3]`. + """ + if not fastpath: + labels = ensure_index(labels) + + if labels._is_all_dates: + deep_labels = labels + if isinstance(labels, CategoricalIndex): + deep_labels = labels.categories + + if not isinstance( + deep_labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex) + ): + try: + labels = DatetimeIndex(labels) + # need to set here because we changed the index + if fastpath: + self._mgr.set_axis(axis, labels) + except (tslibs.OutOfBoundsDatetime, ValueError): + # labels may exceeds datetime bounds, + # or not be a DatetimeIndex + pass + + if not fastpath: + # The ensure_index call above ensures we have an Index object + self._mgr.set_axis(axis, labels) + + # ndarray compatibility + @property + def dtype(self) -> DtypeObj: + """ + Return the dtype object of the underlying data. + """ + return self._mgr.dtype + + @property + def dtypes(self) -> DtypeObj: + """ + Return the dtype object of the underlying data. + """ + # DataFrame compatibility + return self.dtype + + @property + def name(self) -> Hashable: + """ + Return the name of the Series. + + The name of a Series becomes its index or column name if it is used + to form a DataFrame. It is also used whenever displaying the Series + using the interpreter. + + Returns + ------- + label (hashable object) + The name of the Series, also the column name if part of a DataFrame. + + See Also + -------- + Series.rename : Sets the Series name when given a scalar input. + Index.name : Corresponding Index property. + + Examples + -------- + The Series name can be set initially when calling the constructor. + + >>> s = pd.Series([1, 2, 3], dtype=np.int64, name='Numbers') + >>> s + 0 1 + 1 2 + 2 3 + Name: Numbers, dtype: int64 + >>> s.name = "Integers" + >>> s + 0 1 + 1 2 + 2 3 + Name: Integers, dtype: int64 + + The name of a Series within a DataFrame is its column name. + + >>> df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], + ... columns=["Odd Numbers", "Even Numbers"]) + >>> df + Odd Numbers Even Numbers + 0 1 2 + 1 3 4 + 2 5 6 + >>> df["Even Numbers"].name + 'Even Numbers' + """ + return self._name + + @name.setter + def name(self, value: Hashable) -> None: + validate_all_hashable(value, error_name=f"{type(self).__name__}.name") + object.__setattr__(self, "_name", value) + + @property + def values(self): + """ + Return Series as ndarray or ndarray-like depending on the dtype. + + .. warning:: + + We recommend using :attr:`Series.array` or + :meth:`Series.to_numpy`, depending on whether you need + a reference to the underlying data or a NumPy array. + + Returns + ------- + numpy.ndarray or ndarray-like + + See Also + -------- + Series.array : Reference to the underlying data. + Series.to_numpy : A NumPy array representing the underlying data. + + Examples + -------- + >>> pd.Series([1, 2, 3]).values + array([1, 2, 3]) + + >>> pd.Series(list('aabc')).values + array(['a', 'a', 'b', 'c'], dtype=object) + + >>> pd.Series(list('aabc')).astype('category').values + ['a', 'a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] + + Timezone aware datetime data is converted to UTC: + + >>> pd.Series(pd.date_range('20130101', periods=3, + ... tz='US/Eastern')).values + array(['2013-01-01T05:00:00.000000000', + '2013-01-02T05:00:00.000000000', + '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]') + """ + return self._mgr.external_values() + + @property + def _values(self): + """ + Return the internal repr of this data (defined by Block.interval_values). + This are the values as stored in the Block (ndarray or ExtensionArray + depending on the Block class), with datetime64[ns] and timedelta64[ns] + wrapped in ExtensionArrays to match Index._values behavior. + + Differs from the public ``.values`` for certain data types, because of + historical backwards compatibility of the public attribute (e.g. period + returns object ndarray and datetimetz a datetime64[ns] ndarray for + ``.values`` while it returns an ExtensionArray for ``._values`` in those + cases). + + Differs from ``.array`` in that this still returns the numpy array if + the Block is backed by a numpy array (except for datetime64 and + timedelta64 dtypes), while ``.array`` ensures to always return an + ExtensionArray. + + Overview: + + dtype | values | _values | array | + ----------- | ------------- | ------------- | ------------- | + Numeric | ndarray | ndarray | PandasArray | + Category | Categorical | Categorical | Categorical | + dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray | + dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | + td64[ns] | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] | + Period | ndarray[obj] | PeriodArray | PeriodArray | + Nullable | EA | EA | EA | + + """ + return self._mgr.internal_values() + + # error: Decorated property not supported + @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore[misc] + @property + def array(self) -> ExtensionArray: + return self._mgr.array_values() + + # ops + def ravel(self, order="C"): + """ + Return the flattened underlying data as an ndarray. + + Returns + ------- + numpy.ndarray or ndarray-like + Flattened data of the Series. + + See Also + -------- + numpy.ndarray.ravel : Return a flattened array. + """ + return self._values.ravel(order=order) + + def __len__(self) -> int: + """ + Return the length of the Series. + """ + return len(self._mgr) + + def view(self, dtype: Dtype | None = None) -> Series: + """ + Create a new view of the Series. + + This function will return a new Series with a view of the same + underlying values in memory, optionally reinterpreted with a new data + type. The new data type must preserve the same size in bytes as to not + cause index misalignment. + + Parameters + ---------- + dtype : data type + Data type object or one of their string representations. + + Returns + ------- + Series + A new Series object as a view of the same data in memory. + + See Also + -------- + numpy.ndarray.view : Equivalent numpy function to create a new view of + the same data in memory. + + Notes + ----- + Series are instantiated with ``dtype=float64`` by default. While + ``numpy.ndarray.view()`` will return a view with the same data type as + the original array, ``Series.view()`` (without specified dtype) + will try using ``float64`` and may fail if the original data type size + in bytes is not the same. + + Examples + -------- + >>> s = pd.Series([-2, -1, 0, 1, 2], dtype='int8') + >>> s + 0 -2 + 1 -1 + 2 0 + 3 1 + 4 2 + dtype: int8 + + The 8 bit signed integer representation of `-1` is `0b11111111`, but + the same bytes represent 255 if read as an 8 bit unsigned integer: + + >>> us = s.view('uint8') + >>> us + 0 254 + 1 255 + 2 0 + 3 1 + 4 2 + dtype: uint8 + + The views share the same underlying values: + + >>> us[0] = 128 + >>> s + 0 -128 + 1 -1 + 2 0 + 3 1 + 4 2 + dtype: int8 + """ + # self.array instead of self._values so we piggyback on PandasArray + # implementation + res_values = self.array.view(dtype) + res_ser = self._constructor(res_values, index=self.index) + return res_ser.__finalize__(self, method="view") + + # ---------------------------------------------------------------------- + # NDArray Compat + _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray) + + def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: + """ + Return the values as a NumPy array. + + Users should not call this directly. Rather, it is invoked by + :func:`numpy.array` and :func:`numpy.asarray`. + + Parameters + ---------- + dtype : str or numpy.dtype, optional + The dtype to use for the resulting NumPy array. By default, + the dtype is inferred from the data. + + Returns + ------- + numpy.ndarray + The values in the series converted to a :class:`numpy.ndarray` + with the specified `dtype`. + + See Also + -------- + array : Create a new array from data. + Series.array : Zero-copy view to the array backing the Series. + Series.to_numpy : Series method for similar behavior. + + Examples + -------- + >>> ser = pd.Series([1, 2, 3]) + >>> np.asarray(ser) + array([1, 2, 3]) + + For timezone-aware data, the timezones may be retained with + ``dtype='object'`` + + >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) + >>> np.asarray(tzser, dtype="object") + array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'), + Timestamp('2000-01-02 00:00:00+0100', tz='CET')], + dtype=object) + + Or the values may be localized to UTC and the tzinfo discarded with + ``dtype='datetime64[ns]'`` + + >>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS + array(['1999-12-31T23:00:00.000000000', ...], + dtype='datetime64[ns]') + """ + return np.asarray(self._values, dtype) + + # ---------------------------------------------------------------------- + # Unary Methods + + # coercion + __float__ = _coerce_method(float) + __long__ = _coerce_method(int) + __int__ = _coerce_method(int) + + # ---------------------------------------------------------------------- + + # indexers + @property + def axes(self) -> list[Index]: + """ + Return a list of the row axis labels. + """ + return [self.index] + + # ---------------------------------------------------------------------- + # Indexing Methods + + @Appender(NDFrame.take.__doc__) + def take(self, indices, axis=0, is_copy=None, **kwargs) -> Series: + if is_copy is not None: + warnings.warn( + "is_copy is deprecated and will be removed in a future version. " + "'take' always returns a copy, so there is no need to specify this.", + FutureWarning, + stacklevel=find_stack_level(), + ) + nv.validate_take((), kwargs) + + indices = ensure_platform_int(indices) + new_index = self.index.take(indices) + new_values = self._values.take(indices) + + result = self._constructor(new_values, index=new_index, fastpath=True) + return result.__finalize__(self, method="take") + + def _take_with_is_copy(self, indices, axis=0) -> Series: + """ + Internal version of the `take` method that sets the `_is_copy` + attribute to keep track of the parent dataframe (using in indexing + for the SettingWithCopyWarning). For Series this does the same + as the public take (it never sets `_is_copy`). + + See the docstring of `take` for full explanation of the parameters. + """ + return self.take(indices=indices, axis=axis) + + def _ixs(self, i: int, axis: int = 0): + """ + Return the i-th value or values in the Series by location. + + Parameters + ---------- + i : int + + Returns + ------- + scalar (int) or Series (slice, sequence) + """ + return self._values[i] + + def _slice(self, slobj: slice, axis: int = 0) -> Series: + # axis kwarg is retained for compat with NDFrame method + # _slice is *always* positional + return self._get_values(slobj) + + def __getitem__(self, key): + check_deprecated_indexers(key) + key = com.apply_if_callable(key, self) + + if key is Ellipsis: + return self + + key_is_scalar = is_scalar(key) + if isinstance(key, (list, tuple)): + key = unpack_1tuple(key) + + if is_integer(key) and self.index._should_fallback_to_positional: + return self._values[key] + + elif key_is_scalar: + return self._get_value(key) + + if is_hashable(key): + # Otherwise index.get_value will raise InvalidIndexError + try: + # For labels that don't resolve as scalars like tuples and frozensets + result = self._get_value(key) + + return result + + except (KeyError, TypeError, InvalidIndexError): + # InvalidIndexError for e.g. generator + # see test_series_getitem_corner_generator + if isinstance(key, tuple) and isinstance(self.index, MultiIndex): + # We still have the corner case where a tuple is a key + # in the first level of our MultiIndex + return self._get_values_tuple(key) + + if is_iterator(key): + key = list(key) + + if com.is_bool_indexer(key): + key = check_bool_indexer(self.index, key) + key = np.asarray(key, dtype=bool) + return self._get_values(key) + + return self._get_with(key) + + def _get_with(self, key): + # other: fancy integer or otherwise + if isinstance(key, slice): + # _convert_slice_indexer to determine if this slice is positional + # or label based, and if the latter, convert to positional + slobj = self.index._convert_slice_indexer(key, kind="getitem") + return self._slice(slobj) + elif isinstance(key, ABCDataFrame): + raise TypeError( + "Indexing a Series with DataFrame is not " + "supported, use the appropriate DataFrame column" + ) + elif isinstance(key, tuple): + return self._get_values_tuple(key) + + elif not is_list_like(key): + # e.g. scalars that aren't recognized by lib.is_scalar, GH#32684 + return self.loc[key] + + if not isinstance(key, (list, np.ndarray, ExtensionArray, Series, Index)): + key = list(key) + + if isinstance(key, Index): + key_type = key.inferred_type + else: + key_type = lib.infer_dtype(key, skipna=False) + + # Note: The key_type == "boolean" case should be caught by the + # com.is_bool_indexer check in __getitem__ + if key_type == "integer": + # We need to decide whether to treat this as a positional indexer + # (i.e. self.iloc) or label-based (i.e. self.loc) + if not self.index._should_fallback_to_positional: + return self.loc[key] + else: + return self.iloc[key] + + # handle the dup indexing case GH#4246 + return self.loc[key] + + def _get_values_tuple(self, key): + # mpl hackaround + if com.any_none(*key): + result = self._get_values(key) + deprecate_ndim_indexing(result, stacklevel=find_stack_level()) + return result + + if not isinstance(self.index, MultiIndex): + raise KeyError("key of type tuple not found and not a MultiIndex") + + # If key is contained, would have returned by now + indexer, new_index = self.index.get_loc_level(key) + return self._constructor(self._values[indexer], index=new_index).__finalize__( + self + ) + + def _get_values(self, indexer): + try: + new_mgr = self._mgr.getitem_mgr(indexer) + return self._constructor(new_mgr).__finalize__(self) + except ValueError: + # mpl compat if we look up e.g. ser[:, np.newaxis]; + # see tests.series.timeseries.test_mpl_compat_hack + # the asarray is needed to avoid returning a 2D DatetimeArray + return np.asarray(self._values[indexer]) + + def _get_value(self, label, takeable: bool = False): + """ + Quickly retrieve single value at passed index label. + + Parameters + ---------- + label : object + takeable : interpret the index as indexers, default False + + Returns + ------- + scalar value + """ + if takeable: + return self._values[label] + + # Similar to Index.get_value, but we do not fall back to positional + loc = self.index.get_loc(label) + return self.index._get_values_for_loc(self, loc, label) + + def __setitem__(self, key, value) -> None: + check_deprecated_indexers(key) + key = com.apply_if_callable(key, self) + cacher_needs_updating = self._check_is_chained_assignment_possible() + + if key is Ellipsis: + key = slice(None) + + if isinstance(key, slice): + indexer = self.index._convert_slice_indexer(key, kind="getitem") + return self._set_values(indexer, value) + + try: + self._set_with_engine(key, value) + except (KeyError, ValueError): + if is_integer(key) and self.index.inferred_type != "integer": + # positional setter + if not self.index._should_fallback_to_positional: + # GH#33469 + warnings.warn( + "Treating integers as positional in Series.__setitem__ " + "with a Float64Index is deprecated. In a future version, " + "`series[an_int] = val` will insert a new key into the " + "Series. Use `series.iloc[an_int] = val` to treat the " + "key as positional.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # this is equivalent to self._values[key] = value + self._mgr.setitem_inplace(key, value) + else: + # GH#12862 adding a new key to the Series + self.loc[key] = value + + except (InvalidIndexError, TypeError) as err: + if isinstance(key, tuple) and not isinstance(self.index, MultiIndex): + # cases with MultiIndex don't get here bc they raise KeyError + raise KeyError( + "key of type tuple not found and not a MultiIndex" + ) from err + + if com.is_bool_indexer(key): + key = check_bool_indexer(self.index, key) + key = np.asarray(key, dtype=bool) + + if ( + is_list_like(value) + and len(value) != len(self) + and not isinstance(value, Series) + and not is_object_dtype(self.dtype) + ): + # Series will be reindexed to have matching length inside + # _where call below + # GH#44265 + indexer = key.nonzero()[0] + self._set_values(indexer, value) + return + + # otherwise with listlike other we interpret series[mask] = other + # as series[mask] = other[mask] + try: + self._where(~key, value, inplace=True) + except InvalidIndexError: + # test_where_dups + self.iloc[key] = value + return + + else: + self._set_with(key, value) + + if cacher_needs_updating: + self._maybe_update_cacher() + + def _set_with_engine(self, key, value) -> None: + loc = self.index.get_loc(key) + + # this is equivalent to self._values[key] = value + self._mgr.setitem_inplace(loc, value) + + def _set_with(self, key, value): + # other: fancy integer or otherwise + assert not isinstance(key, tuple) + + if is_scalar(key): + key = [key] + elif is_iterator(key): + # Without this, the call to infer_dtype will consume the generator + key = list(key) + + key_type = lib.infer_dtype(key, skipna=False) + + # Note: key_type == "boolean" should not occur because that + # should be caught by the is_bool_indexer check in __setitem__ + if key_type == "integer": + if not self.index._should_fallback_to_positional: + self._set_labels(key, value) + else: + self._set_values(key, value) + else: + self.loc[key] = value + + def _set_labels(self, key, value) -> None: + key = com.asarray_tuplesafe(key) + indexer: np.ndarray = self.index.get_indexer(key) + mask = indexer == -1 + if mask.any(): + raise KeyError(f"{key[mask]} not in index") + self._set_values(indexer, value) + + def _set_values(self, key, value) -> None: + if isinstance(key, (Index, Series)): + key = key._values + + self._mgr = self._mgr.setitem(indexer=key, value=value) + self._maybe_update_cacher() + + def _set_value(self, label, value, takeable: bool = False): + """ + Quickly set single value at passed label. + + If label is not contained, a new object is created with the label + placed at the end of the result index. + + Parameters + ---------- + label : object + Partial indexing with MultiIndex not allowed. + value : object + Scalar value. + takeable : interpret the index as indexers, default False + """ + if not takeable: + try: + loc = self.index.get_loc(label) + except KeyError: + # set using a non-recursive method + self.loc[label] = value + return + else: + loc = label + + self._set_values(loc, value) + + # ---------------------------------------------------------------------- + # Lookup Caching + + @property + def _is_cached(self) -> bool: + """Return boolean indicating if self is cached or not.""" + return getattr(self, "_cacher", None) is not None + + def _get_cacher(self): + """return my cacher or None""" + cacher = getattr(self, "_cacher", None) + if cacher is not None: + cacher = cacher[1]() + return cacher + + def _reset_cacher(self) -> None: + """ + Reset the cacher. + """ + if hasattr(self, "_cacher"): + # should only get here with self.ndim == 1 + del self._cacher + + def _set_as_cached(self, item, cacher) -> None: + """ + Set the _cacher attribute on the calling object with a weakref to + cacher. + """ + self._cacher = (item, weakref.ref(cacher)) + + def _clear_item_cache(self) -> None: + # no-op for Series + pass + + def _check_is_chained_assignment_possible(self) -> bool: + """ + See NDFrame._check_is_chained_assignment_possible.__doc__ + """ + if self._is_view and self._is_cached: + ref = self._get_cacher() + if ref is not None and ref._is_mixed_type: + self._check_setitem_copy(t="referent", force=True) + return True + return super()._check_is_chained_assignment_possible() + + def _maybe_update_cacher( + self, clear: bool = False, verify_is_copy: bool = True, inplace: bool = False + ) -> None: + """ + See NDFrame._maybe_update_cacher.__doc__ + """ + cacher = getattr(self, "_cacher", None) + if cacher is not None: + assert self.ndim == 1 + ref: DataFrame = cacher[1]() + + # we are trying to reference a dead referent, hence + # a copy + if ref is None: + del self._cacher + elif len(self) == len(ref) and self.name in ref.columns: + # GH#42530 self.name must be in ref.columns + # to ensure column still in dataframe + # otherwise, either self or ref has swapped in new arrays + ref._maybe_cache_changed(cacher[0], self, inplace=inplace) + else: + # GH#33675 we have swapped in a new array, so parent + # reference to self is now invalid + ref._item_cache.pop(cacher[0], None) + + super()._maybe_update_cacher( + clear=clear, verify_is_copy=verify_is_copy, inplace=inplace + ) + + # ---------------------------------------------------------------------- + # Unsorted + + @property + def _is_mixed_type(self): + return False + + def repeat(self, repeats, axis=None) -> Series: + """ + Repeat elements of a Series. + + Returns a new Series where each element of the current Series + is repeated consecutively a given number of times. + + Parameters + ---------- + repeats : int or array of ints + The number of repetitions for each element. This should be a + non-negative integer. Repeating 0 times will return an empty + Series. + axis : None + Must be ``None``. Has no effect but is accepted for compatibility + with numpy. + + Returns + ------- + Series + Newly created Series with repeated elements. + + See Also + -------- + Index.repeat : Equivalent function for Index. + numpy.repeat : Similar method for :class:`numpy.ndarray`. + + Examples + -------- + >>> s = pd.Series(['a', 'b', 'c']) + >>> s + 0 a + 1 b + 2 c + dtype: object + >>> s.repeat(2) + 0 a + 0 a + 1 b + 1 b + 2 c + 2 c + dtype: object + >>> s.repeat([1, 2, 3]) + 0 a + 1 b + 1 b + 2 c + 2 c + 2 c + dtype: object + """ + nv.validate_repeat((), {"axis": axis}) + new_index = self.index.repeat(repeats) + new_values = self._values.repeat(repeats) + return self._constructor(new_values, index=new_index).__finalize__( + self, method="repeat" + ) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"]) + def reset_index(self, level=None, drop=False, name=lib.no_default, inplace=False): + """ + Generate a new DataFrame or Series with the index reset. + + This is useful when the index needs to be treated as a column, or + when the index is meaningless and needs to be reset to the default + before another operation. + + Parameters + ---------- + level : int, str, tuple, or list, default optional + For a Series with a MultiIndex, only remove the specified levels + from the index. Removes all levels by default. + drop : bool, default False + Just reset the index, without inserting it as a column in + the new DataFrame. + name : object, optional + The name to use for the column containing the original Series + values. Uses ``self.name`` by default. This argument is ignored + when `drop` is True. + inplace : bool, default False + Modify the Series in place (do not create a new object). + + Returns + ------- + Series or DataFrame or None + When `drop` is False (the default), a DataFrame is returned. + The newly created columns will come first in the DataFrame, + followed by the original Series values. + When `drop` is True, a `Series` is returned. + In either case, if ``inplace=True``, no value is returned. + + See Also + -------- + DataFrame.reset_index: Analogous function for DataFrame. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4], name='foo', + ... index=pd.Index(['a', 'b', 'c', 'd'], name='idx')) + + Generate a DataFrame with default index. + + >>> s.reset_index() + idx foo + 0 a 1 + 1 b 2 + 2 c 3 + 3 d 4 + + To specify the name of the new column use `name`. + + >>> s.reset_index(name='values') + idx values + 0 a 1 + 1 b 2 + 2 c 3 + 3 d 4 + + To generate a new Series with the default set `drop` to True. + + >>> s.reset_index(drop=True) + 0 1 + 1 2 + 2 3 + 3 4 + Name: foo, dtype: int64 + + To update the Series in place, without generating a new one + set `inplace` to True. Note that it also requires ``drop=True``. + + >>> s.reset_index(inplace=True, drop=True) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + Name: foo, dtype: int64 + + The `level` parameter is interesting for Series with a multi-level + index. + + >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']), + ... np.array(['one', 'two', 'one', 'two'])] + >>> s2 = pd.Series( + ... range(4), name='foo', + ... index=pd.MultiIndex.from_arrays(arrays, + ... names=['a', 'b'])) + + To remove a specific level from the Index, use `level`. + + >>> s2.reset_index(level='a') + a foo + b + one bar 0 + two bar 1 + one baz 2 + two baz 3 + + If `level` is not set, all levels are removed from the Index. + + >>> s2.reset_index() + a b foo + 0 bar one 0 + 1 bar two 1 + 2 baz one 2 + 3 baz two 3 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if drop: + new_index = default_index(len(self)) + if level is not None: + if not isinstance(level, (tuple, list)): + level = [level] + level = [self.index._get_level_number(lev) for lev in level] + if len(level) < self.index.nlevels: + new_index = self.index.droplevel(level) + + if inplace: + self.index = new_index + else: + return self._constructor( + self._values.copy(), index=new_index + ).__finalize__(self, method="reset_index") + elif inplace: + raise TypeError( + "Cannot reset_index inplace on a Series to create a DataFrame" + ) + else: + if name is lib.no_default: + # For backwards compatibility, keep columns as [0] instead of + # [None] when self.name is None + if self.name is None: + name = 0 + else: + name = self.name + + df = self.to_frame(name) + return df.reset_index(level=level, drop=drop) + + # ---------------------------------------------------------------------- + # Rendering Methods + + def __repr__(self) -> str: + """ + Return a string representation for a particular Series. + """ + repr_params = fmt.get_series_repr_params() + return self.to_string(**repr_params) + + def to_string( + self, + buf=None, + na_rep="NaN", + float_format=None, + header=True, + index=True, + length=False, + dtype=False, + name=False, + max_rows=None, + min_rows=None, + ): + """ + Render a string representation of the Series. + + Parameters + ---------- + buf : StringIO-like, optional + Buffer to write to. + na_rep : str, optional + String representation of NaN to use, default 'NaN'. + float_format : one-parameter function, optional + Formatter function to apply to columns' elements if they are + floats, default None. + header : bool, default True + Add the Series header (index name). + index : bool, optional + Add index (row) labels, default True. + length : bool, default False + Add the Series length. + dtype : bool, default False + Add the Series dtype. + name : bool, default False + Add the Series name if not None. + max_rows : int, optional + Maximum number of rows to show before truncating. If None, show + all. + min_rows : int, optional + The number of rows to display in a truncated repr (when number + of rows is above `max_rows`). + + Returns + ------- + str or None + String representation of Series if ``buf=None``, otherwise None. + """ + formatter = fmt.SeriesFormatter( + self, + name=name, + length=length, + header=header, + index=index, + dtype=dtype, + na_rep=na_rep, + float_format=float_format, + min_rows=min_rows, + max_rows=max_rows, + ) + result = formatter.to_string() + + # catch contract violations + if not isinstance(result, str): + raise AssertionError( + "result must be of type str, type " + f"of result is {repr(type(result).__name__)}" + ) + + if buf is None: + return result + else: + try: + buf.write(result) + except AttributeError: + with open(buf, "w") as f: + f.write(result) + + @doc( + klass=_shared_doc_kwargs["klass"], + storage_options=_shared_docs["storage_options"], + examples=dedent( + """Examples + -------- + >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") + >>> print(s.to_markdown()) + | | animal | + |---:|:---------| + | 0 | elk | + | 1 | pig | + | 2 | dog | + | 3 | quetzal | + + Output markdown with a tabulate option. + + >>> print(s.to_markdown(tablefmt="grid")) + +----+----------+ + | | animal | + +====+==========+ + | 0 | elk | + +----+----------+ + | 1 | pig | + +----+----------+ + | 2 | dog | + +----+----------+ + | 3 | quetzal | + +----+----------+""" + ), + ) + def to_markdown( + self, + buf: IO[str] | None = None, + mode: str = "wt", + index: bool = True, + storage_options: StorageOptions = None, + **kwargs, + ) -> str | None: + """ + Print {klass} in Markdown-friendly format. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + buf : str, Path or StringIO-like, optional, default None + Buffer to write to. If None, the output is returned as a string. + mode : str, optional + Mode in which file is opened, "wt" by default. + index : bool, optional, default True + Add index (row) labels. + + .. versionadded:: 1.1.0 + {storage_options} + + .. versionadded:: 1.2.0 + + **kwargs + These parameters will be passed to `tabulate \ + `_. + + Returns + ------- + str + {klass} in Markdown-friendly format. + + Notes + ----- + Requires the `tabulate `_ package. + + {examples} + """ + return self.to_frame().to_markdown( + buf, mode, index, storage_options=storage_options, **kwargs + ) + + # ---------------------------------------------------------------------- + + def items(self) -> Iterable[tuple[Hashable, Any]]: + """ + Lazily iterate over (index, value) tuples. + + This method returns an iterable tuple (index, value). This is + convenient if you want to create a lazy iterator. + + Returns + ------- + iterable + Iterable of tuples containing the (index, value) pairs from a + Series. + + See Also + -------- + DataFrame.items : Iterate over (column name, Series) pairs. + DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs. + + Examples + -------- + >>> s = pd.Series(['A', 'B', 'C']) + >>> for index, value in s.items(): + ... print(f"Index : {index}, Value : {value}") + Index : 0, Value : A + Index : 1, Value : B + Index : 2, Value : C + """ + return zip(iter(self.index), iter(self)) + + @Appender(items.__doc__) + def iteritems(self) -> Iterable[tuple[Hashable, Any]]: + return self.items() + + # ---------------------------------------------------------------------- + # Misc public methods + + def keys(self) -> Index: + """ + Return alias for index. + + Returns + ------- + Index + Index of the Series. + """ + return self.index + + def to_dict(self, into=dict): + """ + Convert Series to {label -> value} dict or dict-like object. + + Parameters + ---------- + into : class, default dict + The collections.abc.Mapping subclass to use as the return + object. Can be the actual class or an empty + instance of the mapping type you want. If you want a + collections.defaultdict, you must pass it initialized. + + Returns + ------- + collections.abc.Mapping + Key-value representation of Series. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.to_dict() + {0: 1, 1: 2, 2: 3, 3: 4} + >>> from collections import OrderedDict, defaultdict + >>> s.to_dict(OrderedDict) + OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) + >>> dd = defaultdict(list) + >>> s.to_dict(dd) + defaultdict(, {0: 1, 1: 2, 2: 3, 3: 4}) + """ + # GH16122 + into_c = com.standardize_mapping(into) + return into_c((k, maybe_box_native(v)) for k, v in self.items()) + + def to_frame(self, name: Hashable = lib.no_default) -> DataFrame: + """ + Convert Series to DataFrame. + + Parameters + ---------- + name : object, optional + The passed name should substitute for the series name (if it has + one). + + Returns + ------- + DataFrame + DataFrame representation of Series. + + Examples + -------- + >>> s = pd.Series(["a", "b", "c"], + ... name="vals") + >>> s.to_frame() + vals + 0 a + 1 b + 2 c + """ + if name is None: + warnings.warn( + "Explicitly passing `name=None` currently preserves the Series' name " + "or uses a default name of 0. This behaviour is deprecated, and in " + "the future `None` will be used as the name of the resulting " + "DataFrame column.", + FutureWarning, + stacklevel=find_stack_level(), + ) + name = lib.no_default + + columns: Index + if name is lib.no_default: + name = self.name + if name is None: + # default to [0], same as we would get with DataFrame(self) + columns = default_index(1) + else: + columns = Index([name]) + else: + columns = Index([name]) + + mgr = self._mgr.to_2d_mgr(columns) + return self._constructor_expanddim(mgr) + + def _set_name(self, name, inplace=False) -> Series: + """ + Set the Series name. + + Parameters + ---------- + name : str + inplace : bool + Whether to modify `self` directly or return a copy. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + ser = self if inplace else self.copy() + ser.name = name + return ser + + @Appender( + """ +Examples +-------- +>>> ser = pd.Series([390., 350., 30., 20.], +... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed") +>>> ser +Falcon 390.0 +Falcon 350.0 +Parrot 30.0 +Parrot 20.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(["a", "b", "a", "b"]).mean() +a 210.0 +b 185.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(level=0).mean() +Falcon 370.0 +Parrot 25.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(ser > 100).mean() +Max Speed +False 25.0 +True 370.0 +Name: Max Speed, dtype: float64 + +**Grouping by Indexes** + +We can groupby different levels of a hierarchical index +using the `level` parameter: + +>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'], +... ['Captive', 'Wild', 'Captive', 'Wild']] +>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type')) +>>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed") +>>> ser +Animal Type +Falcon Captive 390.0 + Wild 350.0 +Parrot Captive 30.0 + Wild 20.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(level=0).mean() +Animal +Falcon 370.0 +Parrot 25.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(level="Type").mean() +Type +Captive 210.0 +Wild 185.0 +Name: Max Speed, dtype: float64 + +We can also choose to include `NA` in group keys or not by defining +`dropna` parameter, the default setting is `True`. + +>>> ser = pd.Series([1, 2, 3, 3], index=["a", 'a', 'b', np.nan]) +>>> ser.groupby(level=0).sum() +a 3 +b 3 +dtype: int64 + +>>> ser.groupby(level=0, dropna=False).sum() +a 3 +b 3 +NaN 3 +dtype: int64 + +>>> arrays = ['Falcon', 'Falcon', 'Parrot', 'Parrot'] +>>> ser = pd.Series([390., 350., 30., 20.], index=arrays, name="Max Speed") +>>> ser.groupby(["a", "b", "a", np.nan]).mean() +a 210.0 +b 350.0 +Name: Max Speed, dtype: float64 + +>>> ser.groupby(["a", "b", "a", np.nan], dropna=False).mean() +a 210.0 +b 350.0 +NaN 20.0 +Name: Max Speed, dtype: float64 +""" + ) + @Appender(_shared_docs["groupby"] % _shared_doc_kwargs) + def groupby( + self, + by=None, + axis=0, + level=None, + as_index: bool = True, + sort: bool = True, + group_keys: bool = True, + squeeze: bool | lib.NoDefault = no_default, + observed: bool = False, + dropna: bool = True, + ) -> SeriesGroupBy: + from pandas.core.groupby.generic import SeriesGroupBy + + if squeeze is not no_default: + warnings.warn( + ( + "The `squeeze` parameter is deprecated and " + "will be removed in a future version." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + squeeze = False + + if level is None and by is None: + raise TypeError("You have to supply one of 'by' and 'level'") + axis = self._get_axis_number(axis) + + # error: Argument "squeeze" to "SeriesGroupBy" has incompatible type + # "Union[bool, NoDefault]"; expected "bool" + return SeriesGroupBy( + obj=self, + keys=by, + axis=axis, + level=level, + as_index=as_index, + sort=sort, + group_keys=group_keys, + squeeze=squeeze, # type: ignore[arg-type] + observed=observed, + dropna=dropna, + ) + + # ---------------------------------------------------------------------- + # Statistics, overridden ndarray methods + + # TODO: integrate bottleneck + + def count(self, level=None): + """ + Return number of non-NA/null observations in the Series. + + Parameters + ---------- + level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a smaller Series. + + Returns + ------- + int or Series (if level specified) + Number of non-null values in the Series. + + See Also + -------- + DataFrame.count : Count non-NA cells for each column or row. + + Examples + -------- + >>> s = pd.Series([0.0, 1.0, np.nan]) + >>> s.count() + 2 + """ + if level is None: + return notna(self._values).sum().astype("int64") + else: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead. ser.count(level=1) should use ser.groupby(level=1).count().", + FutureWarning, + stacklevel=find_stack_level(), + ) + if not isinstance(self.index, MultiIndex): + raise ValueError("Series.count level is only valid with a MultiIndex") + + index = self.index + assert isinstance(index, MultiIndex) # for mypy + + if isinstance(level, str): + level = index._get_level_number(level) + + lev = index.levels[level] + level_codes = np.array(index.codes[level], subok=False, copy=True) + + mask = level_codes == -1 + if mask.any(): + level_codes[mask] = cnt = len(lev) + lev = lev.insert(cnt, lev._na_value) + + obs = level_codes[notna(self._values)] + # Argument "minlength" to "bincount" has incompatible type "Optional[int]"; + # expected "SupportsIndex" [arg-type] + out = np.bincount(obs, minlength=len(lev) or None) # type: ignore[arg-type] + return self._constructor(out, index=lev, dtype="int64").__finalize__( + self, method="count" + ) + + def mode(self, dropna: bool = True) -> Series: + """ + Return the mode(s) of the Series. + + The mode is the value that appears most often. There can be multiple modes. + + Always returns Series even if only one value is returned. + + Parameters + ---------- + dropna : bool, default True + Don't consider counts of NaN/NaT. + + Returns + ------- + Series + Modes of the Series in sorted order. + """ + # TODO: Add option for bins like value_counts() + values = self._values + if isinstance(values, np.ndarray): + res_values = algorithms.mode(values, dropna=dropna) + else: + res_values = values._mode(dropna=dropna) + + # Ensure index is type stable (should always use int index) + return self._constructor( + res_values, index=range(len(res_values)), name=self.name + ) + + def unique(self) -> ArrayLike: + """ + Return unique values of Series object. + + Uniques are returned in order of appearance. Hash table-based unique, + therefore does NOT sort. + + Returns + ------- + ndarray or ExtensionArray + The unique values returned as a NumPy array. See Notes. + + See Also + -------- + unique : Top-level unique method for any 1-d array-like object. + Index.unique : Return Index with unique values from an Index object. + + Notes + ----- + Returns the unique values as a NumPy array. In case of an + extension-array backed Series, a new + :class:`~api.extensions.ExtensionArray` of that type with just + the unique values is returned. This includes + + * Categorical + * Period + * Datetime with Timezone + * Interval + * Sparse + * IntegerNA + + See Examples section. + + Examples + -------- + >>> pd.Series([2, 1, 3, 3], name='A').unique() + array([2, 1, 3]) + + >>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique() + array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]') + + >>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern') + ... for _ in range(3)]).unique() + + ['2016-01-01 00:00:00-05:00'] + Length: 1, dtype: datetime64[ns, US/Eastern] + + An Categorical will return categories in the order of + appearance and with the same dtype. + + >>> pd.Series(pd.Categorical(list('baabc'))).unique() + ['b', 'a', 'c'] + Categories (3, object): ['a', 'b', 'c'] + >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'), + ... ordered=True)).unique() + ['b', 'a', 'c'] + Categories (3, object): ['a' < 'b' < 'c'] + """ + return super().unique() + + @overload + def drop_duplicates(self, keep=..., inplace: Literal[False] = ...) -> Series: + ... + + @overload + def drop_duplicates(self, keep, inplace: Literal[True]) -> None: + ... + + @overload + def drop_duplicates(self, *, inplace: Literal[True]) -> None: + ... + + @overload + def drop_duplicates(self, keep=..., inplace: bool = ...) -> Series | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def drop_duplicates(self, keep="first", inplace=False) -> Series | None: + """ + Return Series with duplicate values removed. + + Parameters + ---------- + keep : {'first', 'last', ``False``}, default 'first' + Method to handle dropping duplicates: + + - 'first' : Drop duplicates except for the first occurrence. + - 'last' : Drop duplicates except for the last occurrence. + - ``False`` : Drop all duplicates. + + inplace : bool, default ``False`` + If ``True``, performs operation inplace and returns None. + + Returns + ------- + Series or None + Series with duplicates dropped or None if ``inplace=True``. + + See Also + -------- + Index.drop_duplicates : Equivalent method on Index. + DataFrame.drop_duplicates : Equivalent method on DataFrame. + Series.duplicated : Related method on Series, indicating duplicate + Series values. + + Examples + -------- + Generate a Series with duplicated entries. + + >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'], + ... name='animal') + >>> s + 0 lama + 1 cow + 2 lama + 3 beetle + 4 lama + 5 hippo + Name: animal, dtype: object + + With the 'keep' parameter, the selection behaviour of duplicated values + can be changed. The value 'first' keeps the first occurrence for each + set of duplicated entries. The default value of keep is 'first'. + + >>> s.drop_duplicates() + 0 lama + 1 cow + 3 beetle + 5 hippo + Name: animal, dtype: object + + The value 'last' for parameter 'keep' keeps the last occurrence for + each set of duplicated entries. + + >>> s.drop_duplicates(keep='last') + 1 cow + 3 beetle + 4 lama + 5 hippo + Name: animal, dtype: object + + The value ``False`` for parameter 'keep' discards all sets of + duplicated entries. Setting the value of 'inplace' to ``True`` performs + the operation inplace and returns ``None``. + + >>> s.drop_duplicates(keep=False, inplace=True) + >>> s + 1 cow + 3 beetle + 5 hippo + Name: animal, dtype: object + """ + inplace = validate_bool_kwarg(inplace, "inplace") + result = super().drop_duplicates(keep=keep) + if inplace: + self._update_inplace(result) + return None + else: + return result + + def duplicated(self, keep="first") -> Series: + """ + Indicate duplicate Series values. + + Duplicated values are indicated as ``True`` values in the resulting + Series. Either all duplicates, all except the first or all except the + last occurrence of duplicates can be indicated. + + Parameters + ---------- + keep : {'first', 'last', False}, default 'first' + Method to handle dropping duplicates: + + - 'first' : Mark duplicates as ``True`` except for the first + occurrence. + - 'last' : Mark duplicates as ``True`` except for the last + occurrence. + - ``False`` : Mark all duplicates as ``True``. + + Returns + ------- + Series[bool] + Series indicating whether each value has occurred in the + preceding values. + + See Also + -------- + Index.duplicated : Equivalent method on pandas.Index. + DataFrame.duplicated : Equivalent method on pandas.DataFrame. + Series.drop_duplicates : Remove duplicate values from Series. + + Examples + -------- + By default, for each set of duplicated values, the first occurrence is + set on False and all others on True: + + >>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama']) + >>> animals.duplicated() + 0 False + 1 False + 2 True + 3 False + 4 True + dtype: bool + + which is equivalent to + + >>> animals.duplicated(keep='first') + 0 False + 1 False + 2 True + 3 False + 4 True + dtype: bool + + By using 'last', the last occurrence of each set of duplicated values + is set on False and all others on True: + + >>> animals.duplicated(keep='last') + 0 True + 1 False + 2 True + 3 False + 4 False + dtype: bool + + By setting keep on ``False``, all duplicates are True: + + >>> animals.duplicated(keep=False) + 0 True + 1 False + 2 True + 3 False + 4 True + dtype: bool + """ + res = self._duplicated(keep=keep) + result = self._constructor(res, index=self.index) + return result.__finalize__(self, method="duplicated") + + def idxmin(self, axis=0, skipna=True, *args, **kwargs): + """ + Return the row label of the minimum value. + + If multiple values equal the minimum, the first row label with that + value is returned. + + Parameters + ---------- + axis : int, default 0 + For compatibility with DataFrame.idxmin. Redundant for application + on Series. + skipna : bool, default True + Exclude NA/null values. If the entire Series is NA, the result + will be NA. + *args, **kwargs + Additional arguments and keywords have no effect but might be + accepted for compatibility with NumPy. + + Returns + ------- + Index + Label of the minimum value. + + Raises + ------ + ValueError + If the Series is empty. + + See Also + -------- + numpy.argmin : Return indices of the minimum values + along the given axis. + DataFrame.idxmin : Return index of first occurrence of minimum + over requested axis. + Series.idxmax : Return index *label* of the first occurrence + of maximum of values. + + Notes + ----- + This method is the Series version of ``ndarray.argmin``. This method + returns the label of the minimum, while ``ndarray.argmin`` returns + the position. To get the position, use ``series.values.argmin()``. + + Examples + -------- + >>> s = pd.Series(data=[1, None, 4, 1], + ... index=['A', 'B', 'C', 'D']) + >>> s + A 1.0 + B NaN + C 4.0 + D 1.0 + dtype: float64 + + >>> s.idxmin() + 'A' + + If `skipna` is False and there is an NA value in the data, + the function returns ``nan``. + + >>> s.idxmin(skipna=False) + nan + """ + i = self.argmin(axis, skipna, *args, **kwargs) + if i == -1: + return np.nan + return self.index[i] + + def idxmax(self, axis=0, skipna=True, *args, **kwargs): + """ + Return the row label of the maximum value. + + If multiple values equal the maximum, the first row label with that + value is returned. + + Parameters + ---------- + axis : int, default 0 + For compatibility with DataFrame.idxmax. Redundant for application + on Series. + skipna : bool, default True + Exclude NA/null values. If the entire Series is NA, the result + will be NA. + *args, **kwargs + Additional arguments and keywords have no effect but might be + accepted for compatibility with NumPy. + + Returns + ------- + Index + Label of the maximum value. + + Raises + ------ + ValueError + If the Series is empty. + + See Also + -------- + numpy.argmax : Return indices of the maximum values + along the given axis. + DataFrame.idxmax : Return index of first occurrence of maximum + over requested axis. + Series.idxmin : Return index *label* of the first occurrence + of minimum of values. + + Notes + ----- + This method is the Series version of ``ndarray.argmax``. This method + returns the label of the maximum, while ``ndarray.argmax`` returns + the position. To get the position, use ``series.values.argmax()``. + + Examples + -------- + >>> s = pd.Series(data=[1, None, 4, 3, 4], + ... index=['A', 'B', 'C', 'D', 'E']) + >>> s + A 1.0 + B NaN + C 4.0 + D 3.0 + E 4.0 + dtype: float64 + + >>> s.idxmax() + 'C' + + If `skipna` is False and there is an NA value in the data, + the function returns ``nan``. + + >>> s.idxmax(skipna=False) + nan + """ + i = self.argmax(axis, skipna, *args, **kwargs) + if i == -1: + return np.nan + return self.index[i] + + def round(self, decimals=0, *args, **kwargs) -> Series: + """ + Round each value in a Series to the given number of decimals. + + Parameters + ---------- + decimals : int, default 0 + Number of decimal places to round to. If decimals is negative, + it specifies the number of positions to the left of the decimal point. + *args, **kwargs + Additional arguments and keywords have no effect but might be + accepted for compatibility with NumPy. + + Returns + ------- + Series + Rounded values of the Series. + + See Also + -------- + numpy.around : Round values of an np.array. + DataFrame.round : Round values of a DataFrame. + + Examples + -------- + >>> s = pd.Series([0.1, 1.3, 2.7]) + >>> s.round() + 0 0.0 + 1 1.0 + 2 3.0 + dtype: float64 + """ + nv.validate_round(args, kwargs) + result = self._values.round(decimals) + result = self._constructor(result, index=self.index).__finalize__( + self, method="round" + ) + + return result + + def quantile(self, q=0.5, interpolation="linear"): + """ + Return value at the given quantile. + + Parameters + ---------- + q : float or array-like, default 0.5 (50% quantile) + The quantile(s) to compute, which can lie in range: 0 <= q <= 1. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to use, + when the desired quantile lies between two data points `i` and `j`: + + * linear: `i + (j - i) * fraction`, where `fraction` is the + fractional part of the index surrounded by `i` and `j`. + * lower: `i`. + * higher: `j`. + * nearest: `i` or `j` whichever is nearest. + * midpoint: (`i` + `j`) / 2. + + Returns + ------- + float or Series + If ``q`` is an array, a Series will be returned where the + index is ``q`` and the values are the quantiles, otherwise + a float will be returned. + + See Also + -------- + core.window.Rolling.quantile : Calculate the rolling quantile. + numpy.percentile : Returns the q-th percentile(s) of the array elements. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.quantile(.5) + 2.5 + >>> s.quantile([.25, .5, .75]) + 0.25 1.75 + 0.50 2.50 + 0.75 3.25 + dtype: float64 + """ + validate_percentile(q) + + # We dispatch to DataFrame so that core.internals only has to worry + # about 2D cases. + df = self.to_frame() + + result = df.quantile(q=q, interpolation=interpolation, numeric_only=False) + if result.ndim == 2: + result = result.iloc[:, 0] + + if is_list_like(q): + result.name = self.name + return self._constructor(result, index=Float64Index(q), name=self.name) + else: + # scalar + return result.iloc[0] + + def corr(self, other, method="pearson", min_periods=None) -> float: + """ + Compute correlation with `other` Series, excluding missing values. + + Parameters + ---------- + other : Series + Series with which to compute the correlation. + method : {'pearson', 'kendall', 'spearman'} or callable + Method used to compute correlation: + + - pearson : Standard correlation coefficient + - kendall : Kendall Tau correlation coefficient + - spearman : Spearman rank correlation + - callable: Callable with input two 1d ndarrays and returning a float. + + .. warning:: + Note that the returned matrix from corr will have 1 along the + diagonals and will be symmetric regardless of the callable's + behavior. + min_periods : int, optional + Minimum number of observations needed to have a valid result. + + Returns + ------- + float + Correlation with other. + + See Also + -------- + DataFrame.corr : Compute pairwise correlation between columns. + DataFrame.corrwith : Compute pairwise correlation with another + DataFrame or Series. + + Examples + -------- + >>> def histogram_intersection(a, b): + ... v = np.minimum(a, b).sum().round(decimals=1) + ... return v + >>> s1 = pd.Series([.2, .0, .6, .2]) + >>> s2 = pd.Series([.3, .6, .0, .1]) + >>> s1.corr(s2, method=histogram_intersection) + 0.3 + """ + this, other = self.align(other, join="inner", copy=False) + if len(this) == 0: + return np.nan + + if method in ["pearson", "spearman", "kendall"] or callable(method): + return nanops.nancorr( + this.values, other.values, method=method, min_periods=min_periods + ) + + raise ValueError( + "method must be either 'pearson', " + "'spearman', 'kendall', or a callable, " + f"'{method}' was supplied" + ) + + def cov( + self, + other: Series, + min_periods: int | None = None, + ddof: int | None = 1, + ) -> float: + """ + Compute covariance with Series, excluding missing values. + + Parameters + ---------- + other : Series + Series with which to compute the covariance. + min_periods : int, optional + Minimum number of observations needed to have a valid result. + ddof : int, default 1 + Delta degrees of freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + + .. versionadded:: 1.1.0 + + Returns + ------- + float + Covariance between Series and other normalized by N-1 + (unbiased estimator). + + See Also + -------- + DataFrame.cov : Compute pairwise covariance of columns. + + Examples + -------- + >>> s1 = pd.Series([0.90010907, 0.13484424, 0.62036035]) + >>> s2 = pd.Series([0.12528585, 0.26962463, 0.51111198]) + >>> s1.cov(s2) + -0.01685762652715874 + """ + this, other = self.align(other, join="inner", copy=False) + if len(this) == 0: + return np.nan + return nanops.nancov( + this.values, other.values, min_periods=min_periods, ddof=ddof + ) + + @doc( + klass="Series", + extra_params="", + other_klass="DataFrame", + examples=dedent( + """ + Difference with previous row + + >>> s = pd.Series([1, 1, 2, 3, 5, 8]) + >>> s.diff() + 0 NaN + 1 0.0 + 2 1.0 + 3 1.0 + 4 2.0 + 5 3.0 + dtype: float64 + + Difference with 3rd previous row + + >>> s.diff(periods=3) + 0 NaN + 1 NaN + 2 NaN + 3 2.0 + 4 4.0 + 5 6.0 + dtype: float64 + + Difference with following row + + >>> s.diff(periods=-1) + 0 0.0 + 1 -1.0 + 2 -1.0 + 3 -2.0 + 4 -3.0 + 5 NaN + dtype: float64 + + Overflow in input dtype + + >>> s = pd.Series([1, 0], dtype=np.uint8) + >>> s.diff() + 0 NaN + 1 255.0 + dtype: float64""" + ), + ) + def diff(self, periods: int = 1) -> Series: + """ + First discrete difference of element. + + Calculates the difference of a {klass} element compared with another + element in the {klass} (default is element in previous row). + + Parameters + ---------- + periods : int, default 1 + Periods to shift for calculating difference, accepts negative + values. + {extra_params} + Returns + ------- + {klass} + First differences of the Series. + + See Also + -------- + {klass}.pct_change: Percent change over given number of periods. + {klass}.shift: Shift index by desired number of periods with an + optional time freq. + {other_klass}.diff: First discrete difference of object. + + Notes + ----- + For boolean dtypes, this uses :meth:`operator.xor` rather than + :meth:`operator.sub`. + The result is calculated according to current dtype in {klass}, + however dtype of the result is always float64. + + Examples + -------- + {examples} + """ + result = algorithms.diff(self._values, periods) + return self._constructor(result, index=self.index).__finalize__( + self, method="diff" + ) + + def autocorr(self, lag=1) -> float: + """ + Compute the lag-N autocorrelation. + + This method computes the Pearson correlation between + the Series and its shifted self. + + Parameters + ---------- + lag : int, default 1 + Number of lags to apply before performing autocorrelation. + + Returns + ------- + float + The Pearson correlation between self and self.shift(lag). + + See Also + -------- + Series.corr : Compute the correlation between two Series. + Series.shift : Shift index by desired number of periods. + DataFrame.corr : Compute pairwise correlation of columns. + DataFrame.corrwith : Compute pairwise correlation between rows or + columns of two DataFrame objects. + + Notes + ----- + If the Pearson correlation is not well defined return 'NaN'. + + Examples + -------- + >>> s = pd.Series([0.25, 0.5, 0.2, -0.05]) + >>> s.autocorr() # doctest: +ELLIPSIS + 0.10355... + >>> s.autocorr(lag=2) # doctest: +ELLIPSIS + -0.99999... + + If the Pearson correlation is not well defined, then 'NaN' is returned. + + >>> s = pd.Series([1, 0, 0, 0]) + >>> s.autocorr() + nan + """ + return self.corr(self.shift(lag)) + + def dot(self, other): + """ + Compute the dot product between the Series and the columns of other. + + This method computes the dot product between the Series and another + one, or the Series and each columns of a DataFrame, or the Series and + each columns of an array. + + It can also be called using `self @ other` in Python >= 3.5. + + Parameters + ---------- + other : Series, DataFrame or array-like + The other object to compute the dot product with its columns. + + Returns + ------- + scalar, Series or numpy.ndarray + Return the dot product of the Series and other if other is a + Series, the Series of the dot product of Series and each rows of + other if other is a DataFrame or a numpy.ndarray between the Series + and each columns of the numpy array. + + See Also + -------- + DataFrame.dot: Compute the matrix product with the DataFrame. + Series.mul: Multiplication of series and other, element-wise. + + Notes + ----- + The Series and other has to share the same index if other is a Series + or a DataFrame. + + Examples + -------- + >>> s = pd.Series([0, 1, 2, 3]) + >>> other = pd.Series([-1, 2, -3, 4]) + >>> s.dot(other) + 8 + >>> s @ other + 8 + >>> df = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]]) + >>> s.dot(df) + 0 24 + 1 14 + dtype: int64 + >>> arr = np.array([[0, 1], [-2, 3], [4, -5], [6, 7]]) + >>> s.dot(arr) + array([24, 14]) + """ + if isinstance(other, (Series, ABCDataFrame)): + common = self.index.union(other.index) + if len(common) > len(self.index) or len(common) > len(other.index): + raise ValueError("matrices are not aligned") + + left = self.reindex(index=common, copy=False) + right = other.reindex(index=common, copy=False) + lvals = left.values + rvals = right.values + else: + lvals = self.values + rvals = np.asarray(other) + if lvals.shape[0] != rvals.shape[0]: + raise Exception( + f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}" + ) + + if isinstance(other, ABCDataFrame): + return self._constructor( + np.dot(lvals, rvals), index=other.columns + ).__finalize__(self, method="dot") + elif isinstance(other, Series): + return np.dot(lvals, rvals) + elif isinstance(rvals, np.ndarray): + return np.dot(lvals, rvals) + else: # pragma: no cover + raise TypeError(f"unsupported type: {type(other)}") + + def __matmul__(self, other): + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ + return self.dot(other) + + def __rmatmul__(self, other): + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ + return self.dot(np.transpose(other)) + + @doc(base.IndexOpsMixin.searchsorted, klass="Series") + # Signature of "searchsorted" incompatible with supertype "IndexOpsMixin" + def searchsorted( # type: ignore[override] + self, + value: NumpyValueArrayLike | ExtensionArray, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> npt.NDArray[np.intp] | np.intp: + return base.IndexOpsMixin.searchsorted(self, value, side=side, sorter=sorter) + + # ------------------------------------------------------------------- + # Combination + + def append( + self, to_append, ignore_index: bool = False, verify_integrity: bool = False + ): + """ + Concatenate two or more Series. + + .. deprecated:: 1.4.0 + Use :func:`concat` instead. For further details see + :ref:`whatsnew_140.deprecations.frame_series_append` + + Parameters + ---------- + to_append : Series or list/tuple of Series + Series to append with self. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + verify_integrity : bool, default False + If True, raise Exception on creating index with duplicates. + + Returns + ------- + Series + Concatenated Series. + + See Also + -------- + concat : General function to concatenate DataFrame or Series objects. + + Notes + ----- + Iteratively appending to a Series can be more computationally intensive + than a single concatenate. A better solution is to append values to a + list and then concatenate the list with the original Series all at + once. + + Examples + -------- + >>> s1 = pd.Series([1, 2, 3]) + >>> s2 = pd.Series([4, 5, 6]) + >>> s3 = pd.Series([4, 5, 6], index=[3, 4, 5]) + >>> s1.append(s2) + 0 1 + 1 2 + 2 3 + 0 4 + 1 5 + 2 6 + dtype: int64 + + >>> s1.append(s3) + 0 1 + 1 2 + 2 3 + 3 4 + 4 5 + 5 6 + dtype: int64 + + With `ignore_index` set to True: + + >>> s1.append(s2, ignore_index=True) + 0 1 + 1 2 + 2 3 + 3 4 + 4 5 + 5 6 + dtype: int64 + + With `verify_integrity` set to True: + + >>> s1.append(s2, verify_integrity=True) + Traceback (most recent call last): + ... + ValueError: Indexes have overlapping values: [0, 1, 2] + """ + warnings.warn( + "The series.append method is deprecated " + "and will be removed from pandas in a future version. " + "Use pandas.concat instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return self._append(to_append, ignore_index, verify_integrity) + + def _append( + self, to_append, ignore_index: bool = False, verify_integrity: bool = False + ): + from pandas.core.reshape.concat import concat + + if isinstance(to_append, (list, tuple)): + to_concat = [self] + to_concat.extend(to_append) + else: + to_concat = [self, to_append] + if any(isinstance(x, (ABCDataFrame,)) for x in to_concat[1:]): + msg = "to_append should be a Series or list/tuple of Series, got DataFrame" + raise TypeError(msg) + return concat( + to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity + ) + + def _binop(self, other: Series, func, level=None, fill_value=None): + """ + Perform generic binary operation with optional fill value. + + Parameters + ---------- + other : Series + func : binary operator + fill_value : float or object + Value to substitute for NA/null values. If both Series are NA in a + location, the result will be NA regardless of the passed fill value. + level : int or level name, default None + Broadcast across a level, matching Index values on the + passed MultiIndex level. + + Returns + ------- + Series + """ + if not isinstance(other, Series): + raise AssertionError("Other operand must be Series") + + this = self + + if not self.index.equals(other.index): + this, other = self.align(other, level=level, join="outer", copy=False) + + this_vals, other_vals = ops.fill_binop(this._values, other._values, fill_value) + + with np.errstate(all="ignore"): + result = func(this_vals, other_vals) + + name = ops.get_op_result_name(self, other) + return this._construct_result(result, name) + + def _construct_result( + self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable + ) -> Series | tuple[Series, Series]: + """ + Construct an appropriately-labelled Series from the result of an op. + + Parameters + ---------- + result : ndarray or ExtensionArray + name : Label + + Returns + ------- + Series + In the case of __divmod__ or __rdivmod__, a 2-tuple of Series. + """ + if isinstance(result, tuple): + # produced by divmod or rdivmod + + res1 = self._construct_result(result[0], name=name) + res2 = self._construct_result(result[1], name=name) + + # GH#33427 assertions to keep mypy happy + assert isinstance(res1, Series) + assert isinstance(res2, Series) + return (res1, res2) + + # We do not pass dtype to ensure that the Series constructor + # does inference in the case where `result` has object-dtype. + out = self._constructor(result, index=self.index) + out = out.__finalize__(self) + + # Set the result's name after __finalize__ is called because __finalize__ + # would set it back to self.name + out.name = name + return out + + @doc( + _shared_docs["compare"], + """ +Returns +------- +Series or DataFrame + If axis is 0 or 'index' the result will be a Series. + The resulting index will be a MultiIndex with 'self' and 'other' + stacked alternately at the inner level. + + If axis is 1 or 'columns' the result will be a DataFrame. + It will have two columns namely 'self' and 'other'. + +See Also +-------- +DataFrame.compare : Compare with another DataFrame and show differences. + +Notes +----- +Matching NaNs will not appear as a difference. + +Examples +-------- +>>> s1 = pd.Series(["a", "b", "c", "d", "e"]) +>>> s2 = pd.Series(["a", "a", "c", "b", "e"]) + +Align the differences on columns + +>>> s1.compare(s2) + self other +1 b a +3 d b + +Stack the differences on indices + +>>> s1.compare(s2, align_axis=0) +1 self b + other a +3 self d + other b +dtype: object + +Keep all original rows + +>>> s1.compare(s2, keep_shape=True) + self other +0 NaN NaN +1 b a +2 NaN NaN +3 d b +4 NaN NaN + +Keep all original rows and also all original values + +>>> s1.compare(s2, keep_shape=True, keep_equal=True) + self other +0 a a +1 b a +2 c c +3 d b +4 e e +""", + klass=_shared_doc_kwargs["klass"], + ) + def compare( + self, + other: Series, + align_axis: Axis = 1, + keep_shape: bool = False, + keep_equal: bool = False, + ) -> DataFrame | Series: + return super().compare( + other=other, + align_axis=align_axis, + keep_shape=keep_shape, + keep_equal=keep_equal, + ) + + def combine(self, other, func, fill_value=None) -> Series: + """ + Combine the Series with a Series or scalar according to `func`. + + Combine the Series and `other` using `func` to perform elementwise + selection for combined Series. + `fill_value` is assumed when value is missing at some index + from one of the two objects being combined. + + Parameters + ---------- + other : Series or scalar + The value(s) to be combined with the `Series`. + func : function + Function that takes two scalars as inputs and returns an element. + fill_value : scalar, optional + The value to assume when an index is missing from + one Series or the other. The default specifies to use the + appropriate NaN value for the underlying dtype of the Series. + + Returns + ------- + Series + The result of combining the Series with the other object. + + See Also + -------- + Series.combine_first : Combine Series values, choosing the calling + Series' values first. + + Examples + -------- + Consider 2 Datasets ``s1`` and ``s2`` containing + highest clocked speeds of different birds. + + >>> s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0}) + >>> s1 + falcon 330.0 + eagle 160.0 + dtype: float64 + >>> s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0}) + >>> s2 + falcon 345.0 + eagle 200.0 + duck 30.0 + dtype: float64 + + Now, to combine the two datasets and view the highest speeds + of the birds across the two datasets + + >>> s1.combine(s2, max) + duck NaN + eagle 200.0 + falcon 345.0 + dtype: float64 + + In the previous example, the resulting value for duck is missing, + because the maximum of a NaN and a float is a NaN. + So, in the example, we set ``fill_value=0``, + so the maximum value returned will be the value from some dataset. + + >>> s1.combine(s2, max, fill_value=0) + duck 30.0 + eagle 200.0 + falcon 345.0 + dtype: float64 + """ + if fill_value is None: + fill_value = na_value_for_dtype(self.dtype, compat=False) + + if isinstance(other, Series): + # If other is a Series, result is based on union of Series, + # so do this element by element + new_index = self.index.union(other.index) + new_name = ops.get_op_result_name(self, other) + new_values = np.empty(len(new_index), dtype=object) + for i, idx in enumerate(new_index): + lv = self.get(idx, fill_value) + rv = other.get(idx, fill_value) + with np.errstate(all="ignore"): + new_values[i] = func(lv, rv) + else: + # Assume that other is a scalar, so apply the function for + # each element in the Series + new_index = self.index + new_values = np.empty(len(new_index), dtype=object) + with np.errstate(all="ignore"): + new_values[:] = [func(lv, other) for lv in self._values] + new_name = self.name + + # try_float=False is to match agg_series + npvalues = lib.maybe_convert_objects(new_values, try_float=False) + res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False) + return self._constructor(res_values, index=new_index, name=new_name) + + def combine_first(self, other) -> Series: + """ + Update null elements with value in the same location in 'other'. + + Combine two Series objects by filling null values in one Series with + non-null values from the other Series. Result index will be the union + of the two indexes. + + Parameters + ---------- + other : Series + The value(s) to be used for filling null values. + + Returns + ------- + Series + The result of combining the provided Series with the other object. + + See Also + -------- + Series.combine : Perform element-wise operation on two Series + using a given function. + + Examples + -------- + >>> s1 = pd.Series([1, np.nan]) + >>> s2 = pd.Series([3, 4, 5]) + >>> s1.combine_first(s2) + 0 1.0 + 1 4.0 + 2 5.0 + dtype: float64 + + Null values still persist if the location of that null value + does not exist in `other` + + >>> s1 = pd.Series({'falcon': np.nan, 'eagle': 160.0}) + >>> s2 = pd.Series({'eagle': 200.0, 'duck': 30.0}) + >>> s1.combine_first(s2) + duck 30.0 + eagle 160.0 + falcon NaN + dtype: float64 + """ + new_index = self.index.union(other.index) + this = self.reindex(new_index, copy=False) + other = other.reindex(new_index, copy=False) + if this.dtype.kind == "M" and other.dtype.kind != "M": + other = to_datetime(other) + + return this.where(notna(this), other) + + def update(self, other) -> None: + """ + Modify Series in place using values from passed Series. + + Uses non-NA values from passed Series to make updates. Aligns + on index. + + Parameters + ---------- + other : Series, or object coercible into Series + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.update(pd.Series([4, 5, 6])) + >>> s + 0 4 + 1 5 + 2 6 + dtype: int64 + + >>> s = pd.Series(['a', 'b', 'c']) + >>> s.update(pd.Series(['d', 'e'], index=[0, 2])) + >>> s + 0 d + 1 b + 2 e + dtype: object + + >>> s = pd.Series([1, 2, 3]) + >>> s.update(pd.Series([4, 5, 6, 7, 8])) + >>> s + 0 4 + 1 5 + 2 6 + dtype: int64 + + If ``other`` contains NaNs the corresponding values are not updated + in the original Series. + + >>> s = pd.Series([1, 2, 3]) + >>> s.update(pd.Series([4, np.nan, 6])) + >>> s + 0 4 + 1 2 + 2 6 + dtype: int64 + + ``other`` can also be a non-Series object type + that is coercible into a Series + + >>> s = pd.Series([1, 2, 3]) + >>> s.update([4, np.nan, 6]) + >>> s + 0 4 + 1 2 + 2 6 + dtype: int64 + + >>> s = pd.Series([1, 2, 3]) + >>> s.update({1: 9}) + >>> s + 0 1 + 1 9 + 2 3 + dtype: int64 + """ + + if not isinstance(other, Series): + other = Series(other) + + other = other.reindex_like(self) + mask = notna(other) + + self._mgr = self._mgr.putmask(mask=mask, new=other) + self._maybe_update_cacher() + + # ---------------------------------------------------------------------- + # Reindexing, sorting + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def sort_values( + self, + axis=0, + ascending: bool | int | Sequence[bool | int] = True, + inplace: bool = False, + kind: str = "quicksort", + na_position: str = "last", + ignore_index: bool = False, + key: ValueKeyFunc = None, + ): + """ + Sort by the values. + + Sort a Series in ascending or descending order by some + criterion. + + Parameters + ---------- + axis : {0 or 'index'}, default 0 + Axis to direct sorting. The value 'index' is accepted for + compatibility with DataFrame.sort_values. + ascending : bool or list of bools, default True + If True, sort values in ascending order, otherwise descending. + inplace : bool, default False + If True, perform operation in-place. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' + Choice of sorting algorithm. See also :func:`numpy.sort` for more + information. 'mergesort' and 'stable' are the only stable algorithms. + na_position : {'first' or 'last'}, default 'last' + Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at + the end. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + key : callable, optional + If not None, apply the key function to the series values + before sorting. This is similar to the `key` argument in the + builtin :meth:`sorted` function, with the notable difference that + this `key` function should be *vectorized*. It should expect a + ``Series`` and return an array-like. + + .. versionadded:: 1.1.0 + + Returns + ------- + Series or None + Series ordered by values or None if ``inplace=True``. + + See Also + -------- + Series.sort_index : Sort by the Series indices. + DataFrame.sort_values : Sort DataFrame by the values along either axis. + DataFrame.sort_index : Sort DataFrame by indices. + + Examples + -------- + >>> s = pd.Series([np.nan, 1, 3, 10, 5]) + >>> s + 0 NaN + 1 1.0 + 2 3.0 + 3 10.0 + 4 5.0 + dtype: float64 + + Sort values ascending order (default behaviour) + + >>> s.sort_values(ascending=True) + 1 1.0 + 2 3.0 + 4 5.0 + 3 10.0 + 0 NaN + dtype: float64 + + Sort values descending order + + >>> s.sort_values(ascending=False) + 3 10.0 + 4 5.0 + 2 3.0 + 1 1.0 + 0 NaN + dtype: float64 + + Sort values inplace + + >>> s.sort_values(ascending=False, inplace=True) + >>> s + 3 10.0 + 4 5.0 + 2 3.0 + 1 1.0 + 0 NaN + dtype: float64 + + Sort values putting NAs first + + >>> s.sort_values(na_position='first') + 0 NaN + 1 1.0 + 2 3.0 + 4 5.0 + 3 10.0 + dtype: float64 + + Sort a series of strings + + >>> s = pd.Series(['z', 'b', 'd', 'a', 'c']) + >>> s + 0 z + 1 b + 2 d + 3 a + 4 c + dtype: object + + >>> s.sort_values() + 3 a + 1 b + 4 c + 2 d + 0 z + dtype: object + + Sort using a key function. Your `key` function will be + given the ``Series`` of values and should return an array-like. + + >>> s = pd.Series(['a', 'B', 'c', 'D', 'e']) + >>> s.sort_values() + 1 B + 3 D + 0 a + 2 c + 4 e + dtype: object + >>> s.sort_values(key=lambda x: x.str.lower()) + 0 a + 1 B + 2 c + 3 D + 4 e + dtype: object + + NumPy ufuncs work well here. For example, we can + sort by the ``sin`` of the value + + >>> s = pd.Series([-4, -2, 0, 2, 4]) + >>> s.sort_values(key=np.sin) + 1 -2 + 4 4 + 2 0 + 0 -4 + 3 2 + dtype: int64 + + More complicated user-defined functions can be used, + as long as they expect a Series and return an array-like + + >>> s.sort_values(key=lambda x: (np.tan(x.cumsum()))) + 0 -4 + 3 2 + 4 4 + 1 -2 + 2 0 + dtype: int64 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + # Validate the axis parameter + self._get_axis_number(axis) + + # GH 5856/5853 + if inplace and self._is_cached: + raise ValueError( + "This Series is a view of some other array, to " + "sort in-place you must create a copy" + ) + + if is_list_like(ascending): + ascending = cast(Sequence[Union[bool, int]], ascending) + if len(ascending) != 1: + raise ValueError( + f"Length of ascending ({len(ascending)}) must be 1 for Series" + ) + ascending = ascending[0] + + ascending = validate_ascending(ascending) + + if na_position not in ["first", "last"]: + raise ValueError(f"invalid na_position: {na_position}") + + # GH 35922. Make sorting stable by leveraging nargsort + values_to_sort = ensure_key_mapped(self, key)._values if key else self._values + sorted_index = nargsort(values_to_sort, kind, bool(ascending), na_position) + + result = self._constructor( + self._values[sorted_index], index=self.index[sorted_index] + ) + + if ignore_index: + result.index = default_index(len(sorted_index)) + + if inplace: + self._update_inplace(result) + else: + return result.__finalize__(self, method="sort_values") + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def sort_index( + self, + axis=0, + level=None, + ascending: bool | int | Sequence[bool | int] = True, + inplace: bool = False, + kind: str = "quicksort", + na_position: str = "last", + sort_remaining: bool = True, + ignore_index: bool = False, + key: IndexKeyFunc = None, + ): + """ + Sort Series by index labels. + + Returns a new Series sorted by label if `inplace` argument is + ``False``, otherwise updates the original series and returns None. + + Parameters + ---------- + axis : int, default 0 + Axis to direct sorting. This can only be 0 for Series. + level : int, optional + If not None, sort on values in specified index level(s). + ascending : bool or list-like of bools, default True + Sort ascending vs. descending. When the index is a MultiIndex the + sort direction can be controlled for each level individually. + inplace : bool, default False + If True, perform operation in-place. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' + Choice of sorting algorithm. See also :func:`numpy.sort` for more + information. 'mergesort' and 'stable' are the only stable algorithms. For + DataFrames, this option is only applied when sorting on a single + column or label. + na_position : {'first', 'last'}, default 'last' + If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end. + Not implemented for MultiIndex. + sort_remaining : bool, default True + If True and sorting by level and index is multilevel, sort by other + levels too (in order) after sorting by specified level. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + key : callable, optional + If not None, apply the key function to the index values + before sorting. This is similar to the `key` argument in the + builtin :meth:`sorted` function, with the notable difference that + this `key` function should be *vectorized*. It should expect an + ``Index`` and return an ``Index`` of the same shape. + + .. versionadded:: 1.1.0 + + Returns + ------- + Series or None + The original Series sorted by the labels or None if ``inplace=True``. + + See Also + -------- + DataFrame.sort_index: Sort DataFrame by the index. + DataFrame.sort_values: Sort DataFrame by the value. + Series.sort_values : Sort Series by the value. + + Examples + -------- + >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4]) + >>> s.sort_index() + 1 c + 2 b + 3 a + 4 d + dtype: object + + Sort Descending + + >>> s.sort_index(ascending=False) + 4 d + 3 a + 2 b + 1 c + dtype: object + + Sort Inplace + + >>> s.sort_index(inplace=True) + >>> s + 1 c + 2 b + 3 a + 4 d + dtype: object + + By default NaNs are put at the end, but use `na_position` to place + them at the beginning + + >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan]) + >>> s.sort_index(na_position='first') + NaN d + 1.0 c + 2.0 b + 3.0 a + dtype: object + + Specify index level to sort + + >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo', + ... 'baz', 'baz', 'bar', 'bar']), + ... np.array(['two', 'one', 'two', 'one', + ... 'two', 'one', 'two', 'one'])] + >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) + >>> s.sort_index(level=1) + bar one 8 + baz one 6 + foo one 4 + qux one 2 + bar two 7 + baz two 5 + foo two 3 + qux two 1 + dtype: int64 + + Does not sort by remaining levels when sorting by levels + + >>> s.sort_index(level=1, sort_remaining=False) + qux one 2 + foo one 4 + baz one 6 + bar one 8 + qux two 1 + foo two 3 + baz two 5 + bar two 7 + dtype: int64 + + Apply a key function before sorting + + >>> s = pd.Series([1, 2, 3, 4], index=['A', 'b', 'C', 'd']) + >>> s.sort_index(key=lambda x : x.str.lower()) + A 1 + b 2 + C 3 + d 4 + dtype: int64 + """ + + return super().sort_index( + axis, + level, + ascending, + inplace, + kind, + na_position, + sort_remaining, + ignore_index, + key, + ) + + def argsort(self, axis=0, kind="quicksort", order=None) -> Series: + """ + Return the integer indices that would sort the Series values. + + Override ndarray.argsort. Argsorts the value, omitting NA/null values, + and places the result in the same locations as the non-NA values. + + Parameters + ---------- + axis : {0 or "index"} + Has no effect but is accepted for compatibility with numpy. + kind : {'mergesort', 'quicksort', 'heapsort', 'stable'}, default 'quicksort' + Choice of sorting algorithm. See :func:`numpy.sort` for more + information. 'mergesort' and 'stable' are the only stable algorithms. + order : None + Has no effect but is accepted for compatibility with numpy. + + Returns + ------- + Series[np.intp] + Positions of values within the sort order with -1 indicating + nan values. + + See Also + -------- + numpy.ndarray.argsort : Returns the indices that would sort this array. + """ + values = self._values + mask = isna(values) + + if mask.any(): + result = np.full(len(self), -1, dtype=np.intp) + notmask = ~mask + result[notmask] = np.argsort(values[notmask], kind=kind) + else: + result = np.argsort(values, kind=kind) + + res = self._constructor(result, index=self.index, name=self.name, dtype=np.intp) + return res.__finalize__(self, method="argsort") + + def nlargest(self, n=5, keep="first") -> Series: + """ + Return the largest `n` elements. + + Parameters + ---------- + n : int, default 5 + Return this many descending sorted values. + keep : {'first', 'last', 'all'}, default 'first' + When there are duplicate values that cannot all fit in a + Series of `n` elements: + + - ``first`` : return the first `n` occurrences in order + of appearance. + - ``last`` : return the last `n` occurrences in reverse + order of appearance. + - ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. + + Returns + ------- + Series + The `n` largest values in the Series, sorted in decreasing order. + + See Also + -------- + Series.nsmallest: Get the `n` smallest elements. + Series.sort_values: Sort Series by values. + Series.head: Return the first `n` rows. + + Notes + ----- + Faster than ``.sort_values(ascending=False).head(n)`` for small `n` + relative to the size of the ``Series`` object. + + Examples + -------- + >>> countries_population = {"Italy": 59000000, "France": 65000000, + ... "Malta": 434000, "Maldives": 434000, + ... "Brunei": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Montserrat": 5200} + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Montserrat 5200 + dtype: int64 + + The `n` largest elements where ``n=5`` by default. + + >>> s.nlargest() + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 + + The `n` largest elements where ``n=3``. Default `keep` value is 'first' + so Malta will be kept. + + >>> s.nlargest(3) + France 65000000 + Italy 59000000 + Malta 434000 + dtype: int64 + + The `n` largest elements where ``n=3`` and keeping the last duplicates. + Brunei will be kept since it is the last with value 434000 based on + the index order. + + >>> s.nlargest(3, keep='last') + France 65000000 + Italy 59000000 + Brunei 434000 + dtype: int64 + + The `n` largest elements where ``n=3`` with all duplicates kept. Note + that the returned Series has five elements due to the three duplicates. + + >>> s.nlargest(3, keep='all') + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 + """ + return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest() + + def nsmallest(self, n: int = 5, keep: str = "first") -> Series: + """ + Return the smallest `n` elements. + + Parameters + ---------- + n : int, default 5 + Return this many ascending sorted values. + keep : {'first', 'last', 'all'}, default 'first' + When there are duplicate values that cannot all fit in a + Series of `n` elements: + + - ``first`` : return the first `n` occurrences in order + of appearance. + - ``last`` : return the last `n` occurrences in reverse + order of appearance. + - ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. + + Returns + ------- + Series + The `n` smallest values in the Series, sorted in increasing order. + + See Also + -------- + Series.nlargest: Get the `n` largest elements. + Series.sort_values: Sort Series by values. + Series.head: Return the first `n` rows. + + Notes + ----- + Faster than ``.sort_values().head(n)`` for small `n` relative to + the size of the ``Series`` object. + + Examples + -------- + >>> countries_population = {"Italy": 59000000, "France": 65000000, + ... "Brunei": 434000, "Malta": 434000, + ... "Maldives": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Montserrat": 5200} + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Brunei 434000 + Malta 434000 + Maldives 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Montserrat 5200 + dtype: int64 + + The `n` smallest elements where ``n=5`` by default. + + >>> s.nsmallest() + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Iceland 337000 + dtype: int64 + + The `n` smallest elements where ``n=3``. Default `keep` value is + 'first' so Nauru and Tuvalu will be kept. + + >>> s.nsmallest(3) + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + dtype: int64 + + The `n` smallest elements where ``n=3`` and keeping the last + duplicates. Anguilla and Tuvalu will be kept since they are the last + with value 11300 based on the index order. + + >>> s.nsmallest(3, keep='last') + Montserrat 5200 + Anguilla 11300 + Tuvalu 11300 + dtype: int64 + + The `n` smallest elements where ``n=3`` with all duplicates kept. Note + that the returned Series has four elements due to the three duplicates. + + >>> s.nsmallest(3, keep='all') + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + dtype: int64 + """ + return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest() + + @doc( + klass=_shared_doc_kwargs["klass"], + extra_params=dedent( + """copy : bool, default True + Whether to copy underlying data.""" + ), + examples=dedent( + """\ + Examples + -------- + >>> s = pd.Series( + ... ["A", "B", "A", "C"], + ... index=[ + ... ["Final exam", "Final exam", "Coursework", "Coursework"], + ... ["History", "Geography", "History", "Geography"], + ... ["January", "February", "March", "April"], + ... ], + ... ) + >>> s + Final exam History January A + Geography February B + Coursework History March A + Geography April C + dtype: object + + In the following example, we will swap the levels of the indices. + Here, we will swap the levels column-wise, but levels can be swapped row-wise + in a similar manner. Note that column-wise is the default behaviour. + By not supplying any arguments for i and j, we swap the last and second to + last indices. + + >>> s.swaplevel() + Final exam January History A + February Geography B + Coursework March History A + April Geography C + dtype: object + + By supplying one argument, we can choose which index to swap the last + index with. We can for example swap the first index with the last one as + follows. + + >>> s.swaplevel(0) + January History Final exam A + February Geography Final exam B + March History Coursework A + April Geography Coursework C + dtype: object + + We can also define explicitly which indices we want to swap by supplying values + for both i and j. Here, we for example swap the first and second indices. + + >>> s.swaplevel(0, 1) + History Final exam January A + Geography Final exam February B + History Coursework March A + Geography Coursework April C + dtype: object""" + ), + ) + def swaplevel(self, i=-2, j=-1, copy=True) -> Series: + """ + Swap levels i and j in a :class:`MultiIndex`. + + Default is to swap the two innermost levels of the index. + + Parameters + ---------- + i, j : int or str + Levels of the indices to be swapped. Can pass level name as string. + {extra_params} + + Returns + ------- + {klass} + {klass} with levels swapped in MultiIndex. + + {examples} + """ + assert isinstance(self.index, MultiIndex) + new_index = self.index.swaplevel(i, j) + return self._constructor(self._values, index=new_index, copy=copy).__finalize__( + self, method="swaplevel" + ) + + def reorder_levels(self, order) -> Series: + """ + Rearrange index levels using input order. + + May not drop or duplicate levels. + + Parameters + ---------- + order : list of int representing new level order + Reference level by number or key. + + Returns + ------- + type of caller (new object) + """ + if not isinstance(self.index, MultiIndex): # pragma: no cover + raise Exception("Can only reorder levels on a hierarchical axis.") + + result = self.copy() + assert isinstance(result.index, MultiIndex) + result.index = result.index.reorder_levels(order) + return result + + def explode(self, ignore_index: bool = False) -> Series: + """ + Transform each element of a list-like to a row. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + ignore_index : bool, default False + If True, the resulting index will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.1.0 + + Returns + ------- + Series + Exploded lists to rows; index will be duplicated for these rows. + + See Also + -------- + Series.str.split : Split string values on specified separator. + Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex + to produce DataFrame. + DataFrame.melt : Unpivot a DataFrame from wide format to long format. + DataFrame.explode : Explode a DataFrame from list-like + columns to long format. + + Notes + ----- + This routine will explode list-likes including lists, tuples, sets, + Series, and np.ndarray. The result dtype of the subset rows will + be object. Scalars will be returned unchanged, and empty list-likes will + result in a np.nan for that row. In addition, the ordering of elements in + the output will be non-deterministic when exploding sets. + + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> s = pd.Series([[1, 2, 3], 'foo', [], [3, 4]]) + >>> s + 0 [1, 2, 3] + 1 foo + 2 [] + 3 [3, 4] + dtype: object + + >>> s.explode() + 0 1 + 0 2 + 0 3 + 1 foo + 2 NaN + 3 3 + 3 4 + dtype: object + """ + if not len(self) or not is_object_dtype(self): + result = self.copy() + return result.reset_index(drop=True) if ignore_index else result + + values, counts = reshape.explode(np.asarray(self._values)) + + if ignore_index: + index = default_index(len(values)) + else: + index = self.index.repeat(counts) + + return self._constructor(values, index=index, name=self.name) + + def unstack(self, level=-1, fill_value=None) -> DataFrame: + """ + Unstack, also known as pivot, Series with MultiIndex to produce DataFrame. + + Parameters + ---------- + level : int, str, or list of these, default last level + Level(s) to unstack, can pass level name. + fill_value : scalar value, default None + Value to use when replacing NaN values. + + Returns + ------- + DataFrame + Unstacked Series. + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4], + ... index=pd.MultiIndex.from_product([['one', 'two'], + ... ['a', 'b']])) + >>> s + one a 1 + b 2 + two a 3 + b 4 + dtype: int64 + + >>> s.unstack(level=-1) + a b + one 1 2 + two 3 4 + + >>> s.unstack(level=0) + one two + a 1 3 + b 2 4 + """ + from pandas.core.reshape.reshape import unstack + + return unstack(self, level, fill_value) + + # ---------------------------------------------------------------------- + # function application + + def map(self, arg, na_action=None) -> Series: + """ + Map values of Series according to an input mapping or function. + + Used for substituting each value in a Series with another value, + that may be derived from a function, a ``dict`` or + a :class:`Series`. + + Parameters + ---------- + arg : function, collections.abc.Mapping subclass or Series + Mapping correspondence. + na_action : {None, 'ignore'}, default None + If 'ignore', propagate NaN values, without passing them to the + mapping correspondence. + + Returns + ------- + Series + Same index as caller. + + See Also + -------- + Series.apply : For applying more complex functions on a Series. + DataFrame.apply : Apply a function row-/column-wise. + DataFrame.applymap : Apply a function elementwise on a whole DataFrame. + + Notes + ----- + When ``arg`` is a dictionary, values in Series that are not in the + dictionary (as keys) are converted to ``NaN``. However, if the + dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e. + provides a method for default values), then this default is used + rather than ``NaN``. + + Examples + -------- + >>> s = pd.Series(['cat', 'dog', np.nan, 'rabbit']) + >>> s + 0 cat + 1 dog + 2 NaN + 3 rabbit + dtype: object + + ``map`` accepts a ``dict`` or a ``Series``. Values that are not found + in the ``dict`` are converted to ``NaN``, unless the dict has a default + value (e.g. ``defaultdict``): + + >>> s.map({'cat': 'kitten', 'dog': 'puppy'}) + 0 kitten + 1 puppy + 2 NaN + 3 NaN + dtype: object + + It also accepts a function: + + >>> s.map('I am a {}'.format) + 0 I am a cat + 1 I am a dog + 2 I am a nan + 3 I am a rabbit + dtype: object + + To avoid applying the function to missing values (and keep them as + ``NaN``) ``na_action='ignore'`` can be used: + + >>> s.map('I am a {}'.format, na_action='ignore') + 0 I am a cat + 1 I am a dog + 2 NaN + 3 I am a rabbit + dtype: object + """ + new_values = self._map_values(arg, na_action=na_action) + return self._constructor(new_values, index=self.index).__finalize__( + self, method="map" + ) + + def _gotitem(self, key, ndim, subset=None) -> Series: + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : string / list of selections + ndim : {1, 2} + Requested ndim of result. + subset : object, default None + Subset to act on. + """ + return self + + _agg_see_also_doc = dedent( + """ + See Also + -------- + Series.apply : Invoke function on a Series. + Series.transform : Transform function producing a Series with like indexes. + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + >>> s.agg('min') + 1 + + >>> s.agg(['min', 'max']) + min 1 + max 4 + dtype: int64 + """ + ) + + @doc( + _shared_docs["aggregate"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + ) + def aggregate(self, func=None, axis=0, *args, **kwargs): + # Validate the axis parameter + self._get_axis_number(axis) + + # if func is None, will switch to user-provided "named aggregation" kwargs + if func is None: + func = dict(kwargs.items()) + + op = SeriesApply(self, func, convert_dtype=False, args=args, kwargs=kwargs) + result = op.agg() + return result + + agg = aggregate + + @doc( + _shared_docs["transform"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], + ) + def transform( + self, func: AggFuncType, axis: Axis = 0, *args, **kwargs + ) -> DataFrame | Series: + # Validate axis argument + self._get_axis_number(axis) + result = SeriesApply( + self, func=func, convert_dtype=True, args=args, kwargs=kwargs + ).transform() + return result + + def apply( + self, + func: AggFuncType, + convert_dtype: bool = True, + args: tuple[Any, ...] = (), + **kwargs, + ) -> DataFrame | Series: + """ + Invoke function on values of Series. + + Can be ufunc (a NumPy function that applies to the entire Series) + or a Python function that only works on single values. + + Parameters + ---------- + func : function + Python function or NumPy ufunc to apply. + convert_dtype : bool, default True + Try to find better dtype for elementwise function results. If + False, leave as dtype=object. Note that the dtype is always + preserved for some extension array dtypes, such as Categorical. + args : tuple + Positional arguments passed to func after the series value. + **kwargs + Additional keyword arguments passed to func. + + Returns + ------- + Series or DataFrame + If func returns a Series object the result will be a DataFrame. + + See Also + -------- + Series.map: For element-wise operations. + Series.agg: Only perform aggregating type operations. + Series.transform: Only perform transforming type operations. + + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + Create a series with typical summer temperatures for each city. + + >>> s = pd.Series([20, 21, 12], + ... index=['London', 'New York', 'Helsinki']) + >>> s + London 20 + New York 21 + Helsinki 12 + dtype: int64 + + Square the values by defining a function and passing it as an + argument to ``apply()``. + + >>> def square(x): + ... return x ** 2 + >>> s.apply(square) + London 400 + New York 441 + Helsinki 144 + dtype: int64 + + Square the values by passing an anonymous function as an + argument to ``apply()``. + + >>> s.apply(lambda x: x ** 2) + London 400 + New York 441 + Helsinki 144 + dtype: int64 + + Define a custom function that needs additional positional + arguments and pass these additional arguments using the + ``args`` keyword. + + >>> def subtract_custom_value(x, custom_value): + ... return x - custom_value + + >>> s.apply(subtract_custom_value, args=(5,)) + London 15 + New York 16 + Helsinki 7 + dtype: int64 + + Define a custom function that takes keyword arguments + and pass these arguments to ``apply``. + + >>> def add_custom_values(x, **kwargs): + ... for month in kwargs: + ... x += kwargs[month] + ... return x + + >>> s.apply(add_custom_values, june=30, july=20, august=25) + London 95 + New York 96 + Helsinki 87 + dtype: int64 + + Use a function from the Numpy library. + + >>> s.apply(np.log) + London 2.995732 + New York 3.044522 + Helsinki 2.484907 + dtype: float64 + """ + return SeriesApply(self, func, convert_dtype, args, kwargs).apply() + + def _reduce( + self, + op, + name: str, + *, + axis=0, + skipna=True, + numeric_only=None, + filter_type=None, + **kwds, + ): + """ + Perform a reduction operation. + + If we have an ndarray as a value, then simply perform the operation, + otherwise delegate to the object. + """ + delegate = self._values + + if axis is not None: + self._get_axis_number(axis) + + if isinstance(delegate, ExtensionArray): + # dispatch to ExtensionArray interface + return delegate._reduce(name, skipna=skipna, **kwds) + + else: + # dispatch to numpy arrays + if numeric_only: + kwd_name = "numeric_only" + if name in ["any", "all"]: + kwd_name = "bool_only" + raise NotImplementedError( + f"Series.{name} does not implement {kwd_name}." + ) + with np.errstate(all="ignore"): + return op(delegate, skipna=skipna, **kwds) + + def _reindex_indexer( + self, new_index: Index | None, indexer: npt.NDArray[np.intp] | None, copy: bool + ) -> Series: + # Note: new_index is None iff indexer is None + # if not None, indexer is np.intp + if indexer is None: + if copy: + return self.copy() + return self + + new_values = algorithms.take_nd( + self._values, indexer, allow_fill=True, fill_value=None + ) + return self._constructor(new_values, index=new_index) + + def _needs_reindex_multi(self, axes, method, level) -> bool: + """ + Check if we do need a multi reindex; this is for compat with + higher dims. + """ + return False + + # error: Cannot determine type of 'align' + @doc( + NDFrame.align, # type: ignore[has-type] + klass=_shared_doc_kwargs["klass"], + axes_single_arg=_shared_doc_kwargs["axes_single_arg"], + ) + def align( + self, + other, + join="outer", + axis=None, + level=None, + copy=True, + fill_value=None, + method=None, + limit=None, + fill_axis=0, + broadcast_axis=None, + ): + return super().align( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + broadcast_axis=broadcast_axis, + ) + + def rename( + self, + index=None, + *, + axis=None, + copy=True, + inplace=False, + level=None, + errors="ignore", + ) -> Series | None: + """ + Alter Series index labels or name. + + Function / dict values must be unique (1-to-1). Labels not contained in + a dict / Series will be left as-is. Extra labels listed don't throw an + error. + + Alternatively, change ``Series.name`` with a scalar value. + + See the :ref:`user guide ` for more. + + Parameters + ---------- + axis : {0 or "index"} + Unused. Accepted for compatibility with DataFrame method only. + index : scalar, hashable sequence, dict-like or function, optional + Functions or dict-like are transformations to apply to + the index. + Scalar or hashable sequence-like will alter the ``Series.name`` + attribute. + + **kwargs + Additional keyword arguments passed to the function. Only the + "inplace" keyword is used. + + Returns + ------- + Series or None + Series with index labels or name altered or None if ``inplace=True``. + + See Also + -------- + DataFrame.rename : Corresponding DataFrame method. + Series.rename_axis : Set the name of the axis. + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s + 0 1 + 1 2 + 2 3 + dtype: int64 + >>> s.rename("my_name") # scalar, changes Series.name + 0 1 + 1 2 + 2 3 + Name: my_name, dtype: int64 + >>> s.rename(lambda x: x ** 2) # function, changes labels + 0 1 + 1 2 + 4 3 + dtype: int64 + >>> s.rename({1: 3, 2: 5}) # mapping, changes labels + 0 1 + 3 2 + 5 3 + dtype: int64 + """ + if axis is not None: + # Make sure we raise if an invalid 'axis' is passed. + axis = self._get_axis_number(axis) + + if callable(index) or is_dict_like(index): + return super()._rename( + index, copy=copy, inplace=inplace, level=level, errors=errors + ) + else: + return self._set_name(index, inplace=inplace) + + @overload + def set_axis( + self, labels, axis: Axis = ..., inplace: Literal[False] = ... + ) -> Series: + ... + + @overload + def set_axis(self, labels, axis: Axis, inplace: Literal[True]) -> None: + ... + + @overload + def set_axis(self, labels, *, inplace: Literal[True]) -> None: + ... + + @overload + def set_axis(self, labels, axis: Axis = ..., inplace: bool = ...) -> Series | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + @Appender( + """ + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s + 0 1 + 1 2 + 2 3 + dtype: int64 + + >>> s.set_axis(['a', 'b', 'c'], axis=0) + a 1 + b 2 + c 3 + dtype: int64 + """ + ) + @Substitution( + **_shared_doc_kwargs, + extended_summary_sub="", + axis_description_sub="", + see_also_sub="", + ) + @Appender(NDFrame.set_axis.__doc__) + def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): + return super().set_axis(labels, axis=axis, inplace=inplace) + + # error: Cannot determine type of 'reindex' + @doc( + NDFrame.reindex, # type: ignore[has-type] + klass=_shared_doc_kwargs["klass"], + axes=_shared_doc_kwargs["axes"], + optional_labels=_shared_doc_kwargs["optional_labels"], + optional_axis=_shared_doc_kwargs["optional_axis"], + ) + def reindex(self, *args, **kwargs) -> Series: + if len(args) > 1: + raise TypeError("Only one positional argument ('index') is allowed") + if args: + (index,) = args + if "index" in kwargs: + raise TypeError( + "'index' passed as both positional and keyword argument" + ) + kwargs.update({"index": index}) + return super().reindex(**kwargs) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + def drop( + self, + labels=None, + axis=0, + index=None, + columns=None, + level=None, + inplace=False, + errors="raise", + ) -> Series: + """ + Return Series with specified index labels removed. + + Remove elements of a Series based on specifying the index labels. + When using a multi-index, labels on different levels can be removed + by specifying the level. + + Parameters + ---------- + labels : single label or list-like + Index labels to drop. + axis : 0, default 0 + Redundant for application on Series. + index : single label or list-like + Redundant for application on Series, but 'index' can be used instead + of 'labels'. + columns : single label or list-like + No change is made to the Series; use 'index' or 'labels' instead. + level : int or level name, optional + For MultiIndex, level for which the labels will be removed. + inplace : bool, default False + If True, do operation inplace and return None. + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and only existing labels are dropped. + + Returns + ------- + Series or None + Series with specified index labels removed or None if ``inplace=True``. + + Raises + ------ + KeyError + If none of the labels are found in the index. + + See Also + -------- + Series.reindex : Return only specified index labels of Series. + Series.dropna : Return series without null values. + Series.drop_duplicates : Return Series with duplicate values removed. + DataFrame.drop : Drop specified labels from rows or columns. + + Examples + -------- + >>> s = pd.Series(data=np.arange(3), index=['A', 'B', 'C']) + >>> s + A 0 + B 1 + C 2 + dtype: int64 + + Drop labels B en C + + >>> s.drop(labels=['B', 'C']) + A 0 + dtype: int64 + + Drop 2nd level label in MultiIndex Series + + >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'], + ... ['speed', 'weight', 'length']], + ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], + ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) + >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], + ... index=midx) + >>> s + lama speed 45.0 + weight 200.0 + length 1.2 + cow speed 30.0 + weight 250.0 + length 1.5 + falcon speed 320.0 + weight 1.0 + length 0.3 + dtype: float64 + + >>> s.drop(labels='weight', level=1) + lama speed 45.0 + length 1.2 + cow speed 30.0 + length 1.5 + falcon speed 320.0 + length 0.3 + dtype: float64 + """ + return super().drop( + labels=labels, + axis=axis, + index=index, + columns=columns, + level=level, + inplace=inplace, + errors=errors, + ) + + @overload + def fillna( + self, + value=..., + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: Literal[False] = ..., + limit=..., + downcast=..., + ) -> Series: + ... + + @overload + def fillna( + self, + value, + method: FillnaOptions | None, + axis: Axis | None, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + *, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + value, + *, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + *, + method: FillnaOptions | None, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + *, + axis: Axis | None, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + *, + method: FillnaOptions | None, + axis: Axis | None, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + value, + *, + axis: Axis | None, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + value, + method: FillnaOptions | None, + *, + inplace: Literal[True], + limit=..., + downcast=..., + ) -> None: + ... + + @overload + def fillna( + self, + value=..., + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: bool = ..., + limit=..., + downcast=..., + ) -> Series | None: + ... + + # error: Cannot determine type of 'fillna' + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) + @doc(NDFrame.fillna, **_shared_doc_kwargs) # type: ignore[has-type] + def fillna( + self, + value: object | ArrayLike | None = None, + method: FillnaOptions | None = None, + axis=None, + inplace=False, + limit=None, + downcast=None, + ) -> Series | None: + return super().fillna( + value=value, + method=method, + axis=axis, + inplace=inplace, + limit=limit, + downcast=downcast, + ) + + def pop(self, item: Hashable) -> Any: + """ + Return item and drops from series. Raise KeyError if not found. + + Parameters + ---------- + item : label + Index of the element that needs to be removed. + + Returns + ------- + Value that is popped from series. + + Examples + -------- + >>> ser = pd.Series([1,2,3]) + + >>> ser.pop(0) + 1 + + >>> ser + 1 2 + 2 3 + dtype: int64 + """ + return super().pop(item=item) + + # error: Cannot determine type of 'replace' + @doc( + NDFrame.replace, # type: ignore[has-type] + klass=_shared_doc_kwargs["klass"], + inplace=_shared_doc_kwargs["inplace"], + replace_iloc=_shared_doc_kwargs["replace_iloc"], + ) + def replace( + self, + to_replace=None, + value=lib.no_default, + inplace=False, + limit=None, + regex=False, + method: str | lib.NoDefault = lib.no_default, + ): + return super().replace( + to_replace=to_replace, + value=value, + inplace=inplace, + limit=limit, + regex=regex, + method=method, + ) + + @doc(INFO_DOCSTRING, **series_sub_kwargs) + def info( + self, + verbose: bool | None = None, + buf: IO[str] | None = None, + max_cols: int | None = None, + memory_usage: bool | str | None = None, + show_counts: bool = True, + ) -> None: + return SeriesInfo(self, memory_usage).render( + buf=buf, + max_cols=max_cols, + verbose=verbose, + show_counts=show_counts, + ) + + def _replace_single(self, to_replace, method: str, inplace: bool, limit): + """ + Replaces values in a Series using the fill method specified when no + replacement value is given in the replace method + """ + + result = self if inplace else self.copy() + + values = result._values + mask = missing.mask_missing(values, to_replace) + + if isinstance(values, ExtensionArray): + # dispatch to the EA's _pad_mask_inplace method + values._fill_mask_inplace(method, limit, mask) + else: + fill_f = missing.get_fill_func(method) + values, _ = fill_f(values, limit=limit, mask=mask) + + if inplace: + return + return result + + # error: Cannot determine type of 'shift' + @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] + def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> Series: + return super().shift( + periods=periods, freq=freq, axis=axis, fill_value=fill_value + ) + + def memory_usage(self, index: bool = True, deep: bool = False) -> int: + """ + Return the memory usage of the Series. + + The memory usage can optionally include the contribution of + the index and of elements of `object` dtype. + + Parameters + ---------- + index : bool, default True + Specifies whether to include the memory usage of the Series index. + deep : bool, default False + If True, introspect the data deeply by interrogating + `object` dtypes for system-level memory consumption, and include + it in the returned value. + + Returns + ------- + int + Bytes of memory consumed. + + See Also + -------- + numpy.ndarray.nbytes : Total bytes consumed by the elements of the + array. + DataFrame.memory_usage : Bytes consumed by a DataFrame. + + Examples + -------- + >>> s = pd.Series(range(3)) + >>> s.memory_usage() + 152 + + Not including the index gives the size of the rest of the data, which + is necessarily smaller: + + >>> s.memory_usage(index=False) + 24 + + The memory footprint of `object` values is ignored by default: + + >>> s = pd.Series(["a", "b"]) + >>> s.values + array(['a', 'b'], dtype=object) + >>> s.memory_usage() + 144 + >>> s.memory_usage(deep=True) + 244 + """ + v = self._memory_usage(deep=deep) + if index: + v += self.index.memory_usage(deep=deep) + return v + + def isin(self, values) -> Series: + """ + Whether elements in Series are contained in `values`. + + Return a boolean Series showing whether each element in the Series + matches an element in the passed sequence of `values` exactly. + + Parameters + ---------- + values : set or list-like + The sequence of values to test. Passing in a single string will + raise a ``TypeError``. Instead, turn a single string into a + list of one element. + + Returns + ------- + Series + Series of booleans indicating if each element is in values. + + Raises + ------ + TypeError + * If `values` is a string + + See Also + -------- + DataFrame.isin : Equivalent method on DataFrame. + + Examples + -------- + >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', + ... 'hippo'], name='animal') + >>> s.isin(['cow', 'lama']) + 0 True + 1 True + 2 True + 3 False + 4 True + 5 False + Name: animal, dtype: bool + + To invert the boolean values, use the ``~`` operator: + + >>> ~s.isin(['cow', 'lama']) + 0 False + 1 False + 2 False + 3 True + 4 False + 5 True + Name: animal, dtype: bool + + Passing a single string as ``s.isin('lama')`` will raise an error. Use + a list of one element instead: + + >>> s.isin(['lama']) + 0 True + 1 False + 2 True + 3 False + 4 True + 5 False + Name: animal, dtype: bool + + Strings and integers are distinct and are therefore not comparable: + + >>> pd.Series([1]).isin(['1']) + 0 False + dtype: bool + >>> pd.Series([1.1]).isin(['1.1']) + 0 False + dtype: bool + """ + result = algorithms.isin(self._values, values) + return self._constructor(result, index=self.index).__finalize__( + self, method="isin" + ) + + def between(self, left, right, inclusive="both") -> Series: + """ + Return boolean Series equivalent to left <= series <= right. + + This function returns a boolean vector containing `True` wherever the + corresponding Series element is between the boundary values `left` and + `right`. NA values are treated as `False`. + + Parameters + ---------- + left : scalar or list-like + Left boundary. + right : scalar or list-like + Right boundary. + inclusive : {"both", "neither", "left", "right"} + Include boundaries. Whether to set each bound as closed or open. + + .. versionchanged:: 1.3.0 + + Returns + ------- + Series + Series representing whether each element is between left and + right (inclusive). + + See Also + -------- + Series.gt : Greater than of series and other. + Series.lt : Less than of series and other. + + Notes + ----- + This function is equivalent to ``(left <= ser) & (ser <= right)`` + + Examples + -------- + >>> s = pd.Series([2, 0, 4, 8, np.nan]) + + Boundary values are included by default: + + >>> s.between(1, 4) + 0 True + 1 False + 2 True + 3 False + 4 False + dtype: bool + + With `inclusive` set to ``"neither"`` boundary values are excluded: + + >>> s.between(1, 4, inclusive="neither") + 0 True + 1 False + 2 False + 3 False + 4 False + dtype: bool + + `left` and `right` can be any scalar value: + + >>> s = pd.Series(['Alice', 'Bob', 'Carol', 'Eve']) + >>> s.between('Anna', 'Daniel') + 0 False + 1 True + 2 True + 3 False + dtype: bool + """ + if inclusive is True or inclusive is False: + warnings.warn( + "Boolean inputs to the `inclusive` argument are deprecated in " + "favour of `both` or `neither`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if inclusive: + inclusive = "both" + else: + inclusive = "neither" + if inclusive == "both": + lmask = self >= left + rmask = self <= right + elif inclusive == "left": + lmask = self >= left + rmask = self < right + elif inclusive == "right": + lmask = self > left + rmask = self <= right + elif inclusive == "neither": + lmask = self > left + rmask = self < right + else: + raise ValueError( + "Inclusive has to be either string of 'both'," + "'left', 'right', or 'neither'." + ) + + return lmask & rmask + + # ---------------------------------------------------------------------- + # Convert to types that support pd.NA + + def _convert_dtypes( + self, + infer_objects: bool = True, + convert_string: bool = True, + convert_integer: bool = True, + convert_boolean: bool = True, + convert_floating: bool = True, + ) -> Series: + input_series = self + if infer_objects: + input_series = input_series.infer_objects() + if is_object_dtype(input_series): + input_series = input_series.copy() + + if convert_string or convert_integer or convert_boolean or convert_floating: + inferred_dtype = convert_dtypes( + input_series._values, + convert_string, + convert_integer, + convert_boolean, + convert_floating, + ) + result = input_series.astype(inferred_dtype) + else: + result = input_series.copy() + return result + + # error: Cannot determine type of 'isna' + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] + def isna(self) -> Series: + return NDFrame.isna(self) + + # error: Cannot determine type of 'isna' + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] + def isnull(self) -> Series: + """ + Series.isnull is an alias for Series.isna. + """ + return super().isnull() + + # error: Cannot determine type of 'notna' + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] + def notna(self) -> Series: + return super().notna() + + # error: Cannot determine type of 'notna' + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] + def notnull(self) -> Series: + """ + Series.notnull is an alias for Series.notna. + """ + return super().notnull() + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def dropna(self, axis=0, inplace=False, how=None): + """ + Return a new Series with missing values removed. + + See the :ref:`User Guide ` for more on which values are + considered missing, and how to work with missing data. + + Parameters + ---------- + axis : {0 or 'index'}, default 0 + There is only one axis to drop values from. + inplace : bool, default False + If True, do operation inplace and return None. + how : str, optional + Not in use. Kept for compatibility. + + Returns + ------- + Series or None + Series with NA entries dropped from it or None if ``inplace=True``. + + See Also + -------- + Series.isna: Indicate missing values. + Series.notna : Indicate existing (non-missing) values. + Series.fillna : Replace missing values. + DataFrame.dropna : Drop rows or columns which contain NA values. + Index.dropna : Drop missing indices. + + Examples + -------- + >>> ser = pd.Series([1., 2., np.nan]) + >>> ser + 0 1.0 + 1 2.0 + 2 NaN + dtype: float64 + + Drop NA values from a Series. + + >>> ser.dropna() + 0 1.0 + 1 2.0 + dtype: float64 + + Keep the Series with valid entries in the same variable. + + >>> ser.dropna(inplace=True) + >>> ser + 0 1.0 + 1 2.0 + dtype: float64 + + Empty strings are not considered NA values. ``None`` is considered an + NA value. + + >>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay']) + >>> ser + 0 NaN + 1 2 + 2 NaT + 3 + 4 None + 5 I stay + dtype: object + >>> ser.dropna() + 1 2 + 3 + 5 I stay + dtype: object + """ + inplace = validate_bool_kwarg(inplace, "inplace") + # Validate the axis parameter + self._get_axis_number(axis or 0) + + if self._can_hold_na: + result = remove_na_arraylike(self) + if inplace: + self._update_inplace(result) + else: + return result + else: + if inplace: + # do nothing + pass + else: + return self.copy() + + # ---------------------------------------------------------------------- + # Time series-oriented methods + + # error: Cannot determine type of 'asfreq' + @doc(NDFrame.asfreq, **_shared_doc_kwargs) # type: ignore[has-type] + def asfreq( + self, + freq, + method=None, + how: str | None = None, + normalize: bool = False, + fill_value=None, + ) -> Series: + return super().asfreq( + freq=freq, + method=method, + how=how, + normalize=normalize, + fill_value=fill_value, + ) + + # error: Cannot determine type of 'resample' + @doc(NDFrame.resample, **_shared_doc_kwargs) # type: ignore[has-type] + def resample( + self, + rule, + axis=0, + closed: str | None = None, + label: str | None = None, + convention: str = "start", + kind: str | None = None, + loffset=None, + base: int | None = None, + on=None, + level=None, + origin: str | TimestampConvertibleTypes = "start_day", + offset: TimedeltaConvertibleTypes | None = None, + ) -> Resampler: + return super().resample( + rule=rule, + axis=axis, + closed=closed, + label=label, + convention=convention, + kind=kind, + loffset=loffset, + base=base, + on=on, + level=level, + origin=origin, + offset=offset, + ) + + def to_timestamp(self, freq=None, how="start", copy=True) -> Series: + """ + Cast to DatetimeIndex of Timestamps, at *beginning* of period. + + Parameters + ---------- + freq : str, default frequency of PeriodIndex + Desired frequency. + how : {'s', 'e', 'start', 'end'} + Convention for converting period to timestamp; start of period + vs. end. + copy : bool, default True + Whether or not to return a copy. + + Returns + ------- + Series with DatetimeIndex + """ + new_values = self._values + if copy: + new_values = new_values.copy() + + if not isinstance(self.index, PeriodIndex): + raise TypeError(f"unsupported Type {type(self.index).__name__}") + new_index = self.index.to_timestamp(freq=freq, how=how) + return self._constructor(new_values, index=new_index).__finalize__( + self, method="to_timestamp" + ) + + def to_period(self, freq=None, copy=True) -> Series: + """ + Convert Series from DatetimeIndex to PeriodIndex. + + Parameters + ---------- + freq : str, default None + Frequency associated with the PeriodIndex. + copy : bool, default True + Whether or not to return a copy. + + Returns + ------- + Series + Series with index converted to PeriodIndex. + """ + new_values = self._values + if copy: + new_values = new_values.copy() + + if not isinstance(self.index, DatetimeIndex): + raise TypeError(f"unsupported Type {type(self.index).__name__}") + new_index = self.index.to_period(freq=freq) + return self._constructor(new_values, index=new_index).__finalize__( + self, method="to_period" + ) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def ffill( + self: Series, + axis: None | Axis = None, + inplace: bool = False, + limit: None | int = None, + downcast=None, + ) -> Series | None: + return super().ffill(axis, inplace, limit, downcast) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def bfill( + self: Series, + axis: None | Axis = None, + inplace: bool = False, + limit: None | int = None, + downcast=None, + ) -> Series | None: + return super().bfill(axis, inplace, limit, downcast) + + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "lower", "upper"] + ) + def clip( + self: Series, + lower=None, + upper=None, + axis: Axis | None = None, + inplace: bool = False, + *args, + **kwargs, + ) -> Series | None: + return super().clip(lower, upper, axis, inplace, *args, **kwargs) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"]) + def interpolate( + self: Series, + method: str = "linear", + axis: Axis = 0, + limit: int | None = None, + inplace: bool = False, + limit_direction: str | None = None, + limit_area: str | None = None, + downcast: str | None = None, + **kwargs, + ) -> Series | None: + return super().interpolate( + method, + axis, + limit, + inplace, + limit_direction, + limit_area, + downcast, + **kwargs, + ) + + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "cond", "other"] + ) + def where( + self, + cond, + other=lib.no_default, + inplace=False, + axis=None, + level=None, + errors=lib.no_default, + try_cast=lib.no_default, + ): + return super().where(cond, other, inplace, axis, level, errors, try_cast) + + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "cond", "other"] + ) + def mask( + self, + cond, + other=np.nan, + inplace=False, + axis=None, + level=None, + errors=lib.no_default, + try_cast=lib.no_default, + ): + return super().mask(cond, other, inplace, axis, level, errors, try_cast) + + # ---------------------------------------------------------------------- + # Add index + _AXIS_ORDERS = ["index"] + _AXIS_LEN = len(_AXIS_ORDERS) + _info_axis_number = 0 + _info_axis_name = "index" + + index: Index = properties.AxisProperty( + axis=0, doc="The index (axis labels) of the Series." + ) + + # ---------------------------------------------------------------------- + # Accessor Methods + # ---------------------------------------------------------------------- + str = CachedAccessor("str", StringMethods) + dt = CachedAccessor("dt", CombinedDatetimelikeProperties) + cat = CachedAccessor("cat", CategoricalAccessor) + plot = CachedAccessor("plot", pandas.plotting.PlotAccessor) + sparse = CachedAccessor("sparse", SparseAccessor) + + # ---------------------------------------------------------------------- + # Add plotting methods to Series + hist = pandas.plotting.hist_series + + # ---------------------------------------------------------------------- + # Template-Based Arithmetic/Comparison Methods + + def _cmp_method(self, other, op): + res_name = ops.get_op_result_name(self, other) + + if isinstance(other, Series) and not self._indexed_same(other): + raise ValueError("Can only compare identically-labeled Series objects") + + lvalues = self._values + rvalues = extract_array(other, extract_numpy=True, extract_range=True) + + with np.errstate(all="ignore"): + res_values = ops.comparison_op(lvalues, rvalues, op) + + return self._construct_result(res_values, name=res_name) + + def _logical_method(self, other, op): + res_name = ops.get_op_result_name(self, other) + self, other = ops.align_method_SERIES(self, other, align_asobject=True) + + lvalues = self._values + rvalues = extract_array(other, extract_numpy=True, extract_range=True) + + res_values = ops.logical_op(lvalues, rvalues, op) + return self._construct_result(res_values, name=res_name) + + def _arith_method(self, other, op): + self, other = ops.align_method_SERIES(self, other) + return base.IndexOpsMixin._arith_method(self, other, op) + + +Series._add_numeric_operations() + +# Add arithmetic! +ops.add_flex_arithmetic_methods(Series) diff --git a/.local/lib/python3.8/site-packages/pandas/core/shared_docs.py b/.local/lib/python3.8/site-packages/pandas/core/shared_docs.py new file mode 100644 index 0000000..35ee1c7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/shared_docs.py @@ -0,0 +1,724 @@ +from __future__ import annotations + +_shared_docs: dict[str, str] = {} + +_shared_docs[ + "aggregate" +] = """ +Aggregate using one or more operations over the specified axis. + +Parameters +---------- +func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a {klass} or when passed to {klass}.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. +{axis} +*args + Positional arguments to pass to `func`. +**kwargs + Keyword arguments to pass to `func`. + +Returns +------- +scalar, Series or DataFrame + + The return can be: + + * scalar : when Series.agg is called with single function + * Series : when DataFrame.agg is called with a single function + * DataFrame : when DataFrame.agg is called with several functions + + Return scalar, Series or DataFrame. +{see_also} +Notes +----- +`agg` is an alias for `aggregate`. Use the alias. + +Functions that mutate the passed object can produce unexpected +behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` +for more details. + +A passed user-defined-function will be passed a Series for evaluation. +{examples}""" + +_shared_docs[ + "compare" +] = """ +Compare to another {klass} and show the differences. + +.. versionadded:: 1.1.0 + +Parameters +---------- +other : {klass} + Object to compare with. + +align_axis : {{0 or 'index', 1 or 'columns'}}, default 1 + Determine which axis to align the comparison on. + + * 0, or 'index' : Resulting differences are stacked vertically + with rows drawn alternately from self and other. + * 1, or 'columns' : Resulting differences are aligned horizontally + with columns drawn alternately from self and other. + +keep_shape : bool, default False + If true, all rows and columns are kept. + Otherwise, only the ones with different values are kept. + +keep_equal : bool, default False + If true, the result keeps values that are equal. + Otherwise, equal values are shown as NaNs. +""" + +_shared_docs[ + "groupby" +] = """ +Group %(klass)s using a mapper or by a Series of columns. + +A groupby operation involves some combination of splitting the +object, applying a function, and combining the results. This can be +used to group large amounts of data and compute operations on these +groups. + +Parameters +---------- +by : mapping, function, label, or list of labels + Used to determine the groups for the groupby. + If ``by`` is a function, it's called on each value of the object's + index. If a dict or Series is passed, the Series or dict VALUES + will be used to determine the groups (the Series' values are first + aligned; see ``.align()`` method). If a list or ndarray of length + equal to the selected axis is passed (see the `groupby user guide + `_), + the values are used as-is to determine the groups. A label or list + of labels may be passed to group by the columns in ``self``. + Notice that a tuple is interpreted as a (single) key. +axis : {0 or 'index', 1 or 'columns'}, default 0 + Split along rows (0) or columns (1). +level : int, level name, or sequence of such, default None + If the axis is a MultiIndex (hierarchical), group by a particular + level or levels. +as_index : bool, default True + For aggregated output, return object with group labels as the + index. Only relevant for DataFrame input. as_index=False is + effectively "SQL-style" grouped output. +sort : bool, default True + Sort group keys. Get better performance by turning this off. + Note this does not influence the order of observations within each + group. Groupby preserves the order of rows within each group. +group_keys : bool, default True + When calling apply, add group keys to index to identify pieces. +squeeze : bool, default False + Reduce the dimensionality of the return type if possible, + otherwise return a consistent type. + + .. deprecated:: 1.1.0 + +observed : bool, default False + This only applies if any of the groupers are Categoricals. + If True: only show observed values for categorical groupers. + If False: show all values for categorical groupers. +dropna : bool, default True + If True, and if group keys contain NA values, NA values together + with row/column will be dropped. + If False, NA values will also be treated as the key in groups. + + .. versionadded:: 1.1.0 + +Returns +------- +%(klass)sGroupBy + Returns a groupby object that contains information about the groups. + +See Also +-------- +resample : Convenience method for frequency conversion and resampling + of time series. + +Notes +----- +See the `user guide +`__ for more +detailed usage and examples, including splitting an object into groups, +iterating through groups, selecting a group, aggregation, and more. +""" + +_shared_docs[ + "melt" +] = """ +Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. + +This function is useful to massage a DataFrame into a format where one +or more columns are identifier variables (`id_vars`), while all other +columns, considered measured variables (`value_vars`), are "unpivoted" to +the row axis, leaving just two non-identifier columns, 'variable' and +'value'. + +Parameters +---------- +id_vars : tuple, list, or ndarray, optional + Column(s) to use as identifier variables. +value_vars : tuple, list, or ndarray, optional + Column(s) to unpivot. If not specified, uses all columns that + are not set as `id_vars`. +var_name : scalar + Name to use for the 'variable' column. If None it uses + ``frame.columns.name`` or 'variable'. +value_name : scalar, default 'value' + Name to use for the 'value' column. +col_level : int or str, optional + If columns are a MultiIndex then use this level to melt. +ignore_index : bool, default True + If True, original index is ignored. If False, the original index is retained. + Index labels will be repeated as necessary. + + .. versionadded:: 1.1.0 + +Returns +------- +DataFrame + Unpivoted DataFrame. + +See Also +-------- +%(other)s : Identical method. +pivot_table : Create a spreadsheet-style pivot table as a DataFrame. +DataFrame.pivot : Return reshaped DataFrame organized + by given index / column values. +DataFrame.explode : Explode a DataFrame from list-like + columns to long format. + +Notes +----- +Reference :ref:`the user guide ` for more examples. + +Examples +-------- +>>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, +... 'B': {0: 1, 1: 3, 2: 5}, +... 'C': {0: 2, 1: 4, 2: 6}}) +>>> df + A B C +0 a 1 2 +1 b 3 4 +2 c 5 6 + +>>> %(caller)sid_vars=['A'], value_vars=['B']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 + +>>> %(caller)sid_vars=['A'], value_vars=['B', 'C']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 +3 a C 2 +4 b C 4 +5 c C 6 + +The names of 'variable' and 'value' columns can be customized: + +>>> %(caller)sid_vars=['A'], value_vars=['B'], +... var_name='myVarname', value_name='myValname') + A myVarname myValname +0 a B 1 +1 b B 3 +2 c B 5 + +Original index values can be kept around: + +>>> %(caller)sid_vars=['A'], value_vars=['B', 'C'], ignore_index=False) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 +0 a C 2 +1 b C 4 +2 c C 6 + +If you have multi-index columns: + +>>> df.columns = [list('ABC'), list('DEF')] +>>> df + A B C + D E F +0 a 1 2 +1 b 3 4 +2 c 5 6 + +>>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 + +>>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')]) + (A, D) variable_0 variable_1 value +0 a B E 1 +1 b B E 3 +2 c B E 5 +""" + +_shared_docs[ + "transform" +] = """ +Call ``func`` on self producing a {klass} with the same axis shape as self. + +Parameters +---------- +func : function, str, list-like or dict-like + Function to use for transforming the data. If a function, must either + work when passed a {klass} or when passed to {klass}.apply. If func + is both list-like and dict-like, dict-like behavior takes precedence. + + Accepted combinations are: + + - function + - string function name + - list-like of functions and/or function names, e.g. ``[np.exp, 'sqrt']`` + - dict-like of axis labels -> functions, function names or list-like of such. +{axis} +*args + Positional arguments to pass to `func`. +**kwargs + Keyword arguments to pass to `func`. + +Returns +------- +{klass} + A {klass} that must have the same length as self. + +Raises +------ +ValueError : If the returned {klass} has a different length than self. + +See Also +-------- +{klass}.agg : Only perform aggregating type operations. +{klass}.apply : Invoke function on a {klass}. + +Notes +----- +Functions that mutate the passed object can produce unexpected +behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` +for more details. + +Examples +-------- +>>> df = pd.DataFrame({{'A': range(3), 'B': range(1, 4)}}) +>>> df + A B +0 0 1 +1 1 2 +2 2 3 +>>> df.transform(lambda x: x + 1) + A B +0 1 2 +1 2 3 +2 3 4 + +Even though the resulting {klass} must have the same length as the +input {klass}, it is possible to provide several input functions: + +>>> s = pd.Series(range(3)) +>>> s +0 0 +1 1 +2 2 +dtype: int64 +>>> s.transform([np.sqrt, np.exp]) + sqrt exp +0 0.000000 1.000000 +1 1.000000 2.718282 +2 1.414214 7.389056 + +You can call transform on a GroupBy object: + +>>> df = pd.DataFrame({{ +... "Date": [ +... "2015-05-08", "2015-05-07", "2015-05-06", "2015-05-05", +... "2015-05-08", "2015-05-07", "2015-05-06", "2015-05-05"], +... "Data": [5, 8, 6, 1, 50, 100, 60, 120], +... }}) +>>> df + Date Data +0 2015-05-08 5 +1 2015-05-07 8 +2 2015-05-06 6 +3 2015-05-05 1 +4 2015-05-08 50 +5 2015-05-07 100 +6 2015-05-06 60 +7 2015-05-05 120 +>>> df.groupby('Date')['Data'].transform('sum') +0 55 +1 108 +2 66 +3 121 +4 55 +5 108 +6 66 +7 121 +Name: Data, dtype: int64 + +>>> df = pd.DataFrame({{ +... "c": [1, 1, 1, 2, 2, 2, 2], +... "type": ["m", "n", "o", "m", "m", "n", "n"] +... }}) +>>> df + c type +0 1 m +1 1 n +2 1 o +3 2 m +4 2 m +5 2 n +6 2 n +>>> df['size'] = df.groupby('c')['type'].transform(len) +>>> df + c type size +0 1 m 3 +1 1 n 3 +2 1 o 3 +3 2 m 4 +4 2 m 4 +5 2 n 4 +6 2 n 4 +""" + +_shared_docs[ + "storage_options" +] = """storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib`` as header options. For other URLs (e.g. + starting with "s3://", and "gcs://") the key-value pairs are forwarded to + ``fsspec``. Please see ``fsspec`` and ``urllib`` for more details.""" + +_shared_docs[ + "compression_options" +] = """compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' and '%s' + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). Set to + ``None`` for no compression. Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other + key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an + example, the following could be passed for faster compression and to create + a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.""" + +_shared_docs[ + "decompression_options" +] = """compression : str or dict, default 'infer' + For on-the-fly decompression of on-disk data. If 'infer' and '%s' is + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). If using + 'zip', the ZIP file must contain only one data file to be read in. Set to + ``None`` for no decompression. Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other + key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an + example, the following could be passed for Zstandard decompression using a + custom compression dictionary: + ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``.""" + +_shared_docs[ + "replace" +] = """ + Replace values given in `to_replace` with `value`. + + Values of the {klass} are replaced with other values dynamically. + {replace_iloc} + + Parameters + ---------- + to_replace : str, regex, list, dict, Series, int, float, or None + How to find the values that will be replaced. + + * numeric, str or regex: + + - numeric: numeric values equal to `to_replace` will be + replaced with `value` + - str: string exactly matching `to_replace` will be replaced + with `value` + - regex: regexs matching `to_replace` will be replaced with + `value` + + * list of str, regex, or numeric: + + - First, if `to_replace` and `value` are both lists, they + **must** be the same length. + - Second, if ``regex=True`` then all of the strings in **both** + lists will be interpreted as regexs otherwise they will match + directly. This doesn't matter much for `value` since there + are only a few possible substitution regexes you can use. + - str, regex and numeric rules apply as above. + + * dict: + + - Dicts can be used to specify different replacement values + for different existing values. For example, + ``{{'a': 'b', 'y': 'z'}}`` replaces the value 'a' with 'b' and + 'y' with 'z'. To use a dict in this way the `value` + parameter should be `None`. + - For a DataFrame a dict can specify that different values + should be replaced in different columns. For example, + ``{{'a': 1, 'b': 'z'}}`` looks for the value 1 in column 'a' + and the value 'z' in column 'b' and replaces these values + with whatever is specified in `value`. The `value` parameter + should not be ``None`` in this case. You can treat this as a + special case of passing two lists except that you are + specifying the column to search in. + - For a DataFrame nested dictionaries, e.g., + ``{{'a': {{'b': np.nan}}}}``, are read as follows: look in column + 'a' for the value 'b' and replace it with NaN. The `value` + parameter should be ``None`` to use a nested dict in this + way. You can nest regular expressions as well. Note that + column names (the top-level dictionary keys in a nested + dictionary) **cannot** be regular expressions. + + * None: + + - This means that the `regex` argument must be a string, + compiled regular expression, or list, dict, ndarray or + Series of such elements. If `value` is also ``None`` then + this **must** be a nested dictionary or Series. + + See the examples section for examples of each of these. + value : scalar, dict, list, str, regex, default None + Value to replace any values matching `to_replace` with. + For a DataFrame a dict of values can be used to specify which + value to use for each column (columns not in the dict will not be + filled). Regular expressions, strings and lists or dicts of such + objects are also allowed. + {inplace} + limit : int, default None + Maximum size gap to forward or backward fill. + regex : bool or same types as `to_replace`, default False + Whether to interpret `to_replace` and/or `value` as regular + expressions. If this is ``True`` then `to_replace` *must* be a + string. Alternatively, this could be a regular expression or a + list, dict, or array of regular expressions in which case + `to_replace` must be ``None``. + method : {{'pad', 'ffill', 'bfill', `None`}} + The method to use when for replacement, when `to_replace` is a + scalar, list or tuple and `value` is ``None``. + + .. versionchanged:: 0.23.0 + Added to DataFrame. + + Returns + ------- + {klass} + Object after replacement. + + Raises + ------ + AssertionError + * If `regex` is not a ``bool`` and `to_replace` is not + ``None``. + + TypeError + * If `to_replace` is not a scalar, array-like, ``dict``, or ``None`` + * If `to_replace` is a ``dict`` and `value` is not a ``list``, + ``dict``, ``ndarray``, or ``Series`` + * If `to_replace` is ``None`` and `regex` is not compilable + into a regular expression or is a list, dict, ndarray, or + Series. + * When replacing multiple ``bool`` or ``datetime64`` objects and + the arguments to `to_replace` does not match the type of the + value being replaced + + ValueError + * If a ``list`` or an ``ndarray`` is passed to `to_replace` and + `value` but they are not the same length. + + See Also + -------- + {klass}.fillna : Fill NA values. + {klass}.where : Replace values based on boolean condition. + Series.str.replace : Simple string replacement. + + Notes + ----- + * Regex substitution is performed under the hood with ``re.sub``. The + rules for substitution for ``re.sub`` are the same. + * Regular expressions will only substitute on strings, meaning you + cannot provide, for example, a regular expression matching floating + point numbers and expect the columns in your frame that have a + numeric dtype to be matched. However, if those floating point + numbers *are* strings, then you can do this. + * This method has *a lot* of options. You are encouraged to experiment + and play with this method to gain intuition about how it works. + * When dict is used as the `to_replace` value, it is like + key(s) in the dict are the to_replace part and + value(s) in the dict are the value parameter. + + Examples + -------- + + **Scalar `to_replace` and `value`** + + >>> s = pd.Series([1, 2, 3, 4, 5]) + >>> s.replace(1, 5) + 0 5 + 1 2 + 2 3 + 3 4 + 4 5 + dtype: int64 + + >>> df = pd.DataFrame({{'A': [0, 1, 2, 3, 4], + ... 'B': [5, 6, 7, 8, 9], + ... 'C': ['a', 'b', 'c', 'd', 'e']}}) + >>> df.replace(0, 5) + A B C + 0 5 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + **List-like `to_replace`** + + >>> df.replace([0, 1, 2, 3], 4) + A B C + 0 4 5 a + 1 4 6 b + 2 4 7 c + 3 4 8 d + 4 4 9 e + + >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) + A B C + 0 4 5 a + 1 3 6 b + 2 2 7 c + 3 1 8 d + 4 4 9 e + + >>> s.replace([1, 2], method='bfill') + 0 3 + 1 3 + 2 3 + 3 4 + 4 5 + dtype: int64 + + **dict-like `to_replace`** + + >>> df.replace({{0: 10, 1: 100}}) + A B C + 0 10 5 a + 1 100 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({{'A': 0, 'B': 5}}, 100) + A B C + 0 100 100 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({{'A': {{0: 100, 4: 400}}}}) + A B C + 0 100 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 400 9 e + + **Regular expression `to_replace`** + + >>> df = pd.DataFrame({{'A': ['bat', 'foo', 'bait'], + ... 'B': ['abc', 'bar', 'xyz']}}) + >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace({{'A': r'^ba.$'}}, {{'A': 'new'}}, regex=True) + A B + 0 new abc + 1 foo bar + 2 bait xyz + + >>> df.replace(regex=r'^ba.$', value='new') + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace(regex={{r'^ba.$': 'new', 'foo': 'xyz'}}) + A B + 0 new abc + 1 xyz new + 2 bait xyz + + >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') + A B + 0 new abc + 1 new new + 2 bait xyz + + Compare the behavior of ``s.replace({{'a': None}})`` and + ``s.replace('a', None)`` to understand the peculiarities + of the `to_replace` parameter: + + >>> s = pd.Series([10, 'a', 'a', 'b', 'a']) + + When one uses a dict as the `to_replace` value, it is like the + value(s) in the dict are equal to the `value` parameter. + ``s.replace({{'a': None}})`` is equivalent to + ``s.replace(to_replace={{'a': None}}, value=None, method=None)``: + + >>> s.replace({{'a': None}}) + 0 10 + 1 None + 2 None + 3 b + 4 None + dtype: object + + When ``value`` is not explicitly passed and `to_replace` is a scalar, list + or tuple, `replace` uses the method parameter (default 'pad') to do the + replacement. So this is why the 'a' values are being replaced by 10 + in rows 1 and 2 and 'b' in row 4 in this case. + + >>> s.replace('a') + 0 10 + 1 10 + 2 10 + 3 b + 4 b + dtype: object + + On the other hand, if ``None`` is explicitly passed for ``value``, it will + be respected: + + >>> s.replace('a', None) + 0 10 + 1 None + 2 None + 3 b + 4 None + dtype: object + + .. versionchanged:: 1.4.0 + Previously the explicit ``None`` was silently ignored. +""" diff --git a/.local/lib/python3.8/site-packages/pandas/core/sorting.py b/.local/lib/python3.8/site-packages/pandas/core/sorting.py new file mode 100644 index 0000000..89cca2c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/sorting.py @@ -0,0 +1,713 @@ +""" miscellaneous sorting / groupby utilities """ +from __future__ import annotations + +from collections import defaultdict +from typing import ( + TYPE_CHECKING, + Callable, + DefaultDict, + Hashable, + Iterable, + Sequence, +) +import warnings + +import numpy as np + +from pandas._libs import ( + algos, + hashtable, + lib, +) +from pandas._libs.hashtable import unique_label_indices +from pandas._typing import ( + IndexKeyFunc, + Shape, + npt, +) + +from pandas.core.dtypes.common import ( + ensure_int64, + ensure_platform_int, + is_extension_array_dtype, +) +from pandas.core.dtypes.generic import ( + ABCMultiIndex, + ABCRangeIndex, +) +from pandas.core.dtypes.missing import isna + +from pandas.core.construction import extract_array + +if TYPE_CHECKING: + from pandas import MultiIndex + from pandas.core.indexes.base import Index + + +def get_indexer_indexer( + target: Index, + level: str | int | list[str] | list[int], + ascending: Sequence[bool | int] | bool | int, + kind: str, + na_position: str, + sort_remaining: bool, + key: IndexKeyFunc, +) -> np.ndarray | None: + """ + Helper method that return the indexer according to input parameters for + the sort_index method of DataFrame and Series. + + Parameters + ---------- + target : Index + level : int or level name or list of ints or list of level names + ascending : bool or list of bools, default True + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' + na_position : {'first', 'last'}, default 'last' + sort_remaining : bool, default True + key : callable, optional + + Returns + ------- + Optional[ndarray] + The indexer for the new index. + """ + + target = ensure_key_mapped(target, key, levels=level) + target = target._sort_levels_monotonic() + + if level is not None: + _, indexer = target.sortlevel( + level, ascending=ascending, sort_remaining=sort_remaining + ) + elif isinstance(target, ABCMultiIndex): + indexer = lexsort_indexer( + target._get_codes_for_sorting(), orders=ascending, na_position=na_position + ) + else: + # Check monotonic-ness before sort an index (GH 11080) + if (ascending and target.is_monotonic_increasing) or ( + not ascending and target.is_monotonic_decreasing + ): + return None + + indexer = nargsort( + target, kind=kind, ascending=ascending, na_position=na_position + ) + return indexer + + +def get_group_index( + labels, shape: Shape, sort: bool, xnull: bool +) -> npt.NDArray[np.int64]: + """ + For the particular label_list, gets the offsets into the hypothetical list + representing the totally ordered cartesian product of all possible label + combinations, *as long as* this space fits within int64 bounds; + otherwise, though group indices identify unique combinations of + labels, they cannot be deconstructed. + - If `sort`, rank of returned ids preserve lexical ranks of labels. + i.e. returned id's can be used to do lexical sort on labels; + - If `xnull` nulls (-1 labels) are passed through. + + Parameters + ---------- + labels : sequence of arrays + Integers identifying levels at each location + shape : tuple[int, ...] + Number of unique levels at each location + sort : bool + If the ranks of returned ids should match lexical ranks of labels + xnull : bool + If true nulls are excluded. i.e. -1 values in the labels are + passed through. + + Returns + ------- + An array of type int64 where two elements are equal if their corresponding + labels are equal at all location. + + Notes + ----- + The length of `labels` and `shape` must be identical. + """ + + def _int64_cut_off(shape) -> int: + acc = 1 + for i, mul in enumerate(shape): + acc *= int(mul) + if not acc < lib.i8max: + return i + return len(shape) + + def maybe_lift(lab, size) -> tuple[np.ndarray, int]: + # promote nan values (assigned -1 label in lab array) + # so that all output values are non-negative + return (lab + 1, size + 1) if (lab == -1).any() else (lab, size) + + labels = [ensure_int64(x) for x in labels] + lshape = list(shape) + if not xnull: + for i, (lab, size) in enumerate(zip(labels, shape)): + lab, size = maybe_lift(lab, size) + labels[i] = lab + lshape[i] = size + + labels = list(labels) + + # Iteratively process all the labels in chunks sized so less + # than lib.i8max unique int ids will be required for each chunk + while True: + # how many levels can be done without overflow: + nlev = _int64_cut_off(lshape) + + # compute flat ids for the first `nlev` levels + stride = np.prod(lshape[1:nlev], dtype="i8") + out = stride * labels[0].astype("i8", subok=False, copy=False) + + for i in range(1, nlev): + if lshape[i] == 0: + stride = np.int64(0) + else: + stride //= lshape[i] + out += labels[i] * stride + + if xnull: # exclude nulls + mask = labels[0] == -1 + for lab in labels[1:nlev]: + mask |= lab == -1 + out[mask] = -1 + + if nlev == len(lshape): # all levels done! + break + + # compress what has been done so far in order to avoid overflow + # to retain lexical ranks, obs_ids should be sorted + comp_ids, obs_ids = compress_group_index(out, sort=sort) + + labels = [comp_ids] + labels[nlev:] + lshape = [len(obs_ids)] + lshape[nlev:] + + return out + + +def get_compressed_ids( + labels, sizes: Shape +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64]]: + """ + Group_index is offsets into cartesian product of all possible labels. This + space can be huge, so this function compresses it, by computing offsets + (comp_ids) into the list of unique labels (obs_group_ids). + + Parameters + ---------- + labels : list of label arrays + sizes : tuple[int] of size of the levels + + Returns + ------- + np.ndarray[np.intp] + comp_ids + np.ndarray[np.int64] + obs_group_ids + """ + ids = get_group_index(labels, sizes, sort=True, xnull=False) + return compress_group_index(ids, sort=True) + + +def is_int64_overflow_possible(shape) -> bool: + the_prod = 1 + for x in shape: + the_prod *= int(x) + + return the_prod >= lib.i8max + + +def decons_group_index(comp_labels, shape): + # reconstruct labels + if is_int64_overflow_possible(shape): + # at some point group indices are factorized, + # and may not be deconstructed here! wrong path! + raise ValueError("cannot deconstruct factorized group indices!") + + label_list = [] + factor = 1 + y = 0 + x = comp_labels + for i in reversed(range(len(shape))): + labels = (x - y) % (factor * shape[i]) // factor + np.putmask(labels, comp_labels < 0, -1) + label_list.append(labels) + y = labels * factor + factor *= shape[i] + return label_list[::-1] + + +def decons_obs_group_ids( + comp_ids: npt.NDArray[np.intp], obs_ids, shape, labels, xnull: bool +): + """ + Reconstruct labels from observed group ids. + + Parameters + ---------- + comp_ids : np.ndarray[np.intp] + xnull : bool + If nulls are excluded; i.e. -1 labels are passed through. + """ + if not xnull: + lift = np.fromiter(((a == -1).any() for a in labels), dtype="i8") + shape = np.asarray(shape, dtype="i8") + lift + + if not is_int64_overflow_possible(shape): + # obs ids are deconstructable! take the fast route! + out = decons_group_index(obs_ids, shape) + return out if xnull or not lift.any() else [x - y for x, y in zip(out, lift)] + + indexer = unique_label_indices(comp_ids) + return [lab[indexer].astype(np.intp, subok=False, copy=True) for lab in labels] + + +def indexer_from_factorized( + labels, shape: Shape, compress: bool = True +) -> npt.NDArray[np.intp]: + ids = get_group_index(labels, shape, sort=True, xnull=False) + + if not compress: + ngroups = (ids.size and ids.max()) + 1 + else: + ids, obs = compress_group_index(ids, sort=True) + ngroups = len(obs) + + return get_group_index_sorter(ids, ngroups) + + +def lexsort_indexer( + keys, orders=None, na_position: str = "last", key: Callable | None = None +) -> npt.NDArray[np.intp]: + """ + Performs lexical sorting on a set of keys + + Parameters + ---------- + keys : sequence of arrays + Sequence of ndarrays to be sorted by the indexer + orders : bool or list of booleans, optional + Determines the sorting order for each element in keys. If a list, + it must be the same length as keys. This determines whether the + corresponding element in keys should be sorted in ascending + (True) or descending (False) order. if bool, applied to all + elements as above. if None, defaults to True. + na_position : {'first', 'last'}, default 'last' + Determines placement of NA elements in the sorted list ("last" or "first") + key : Callable, optional + Callable key function applied to every element in keys before sorting + + .. versionadded:: 1.0.0 + + Returns + ------- + np.ndarray[np.intp] + """ + from pandas.core.arrays import Categorical + + labels = [] + shape = [] + if isinstance(orders, bool): + orders = [orders] * len(keys) + elif orders is None: + orders = [True] * len(keys) + + keys = [ensure_key_mapped(k, key) for k in keys] + + for k, order in zip(keys, orders): + with warnings.catch_warnings(): + # TODO(2.0): unnecessary once deprecation is enforced + # GH#45618 don't issue warning user can't do anything about + warnings.filterwarnings("ignore", ".*SparseArray.*", category=FutureWarning) + + cat = Categorical(k, ordered=True) + + if na_position not in ["last", "first"]: + raise ValueError(f"invalid na_position: {na_position}") + + n = len(cat.categories) + codes = cat.codes.copy() + + mask = cat.codes == -1 + if order: # ascending + if na_position == "last": + codes = np.where(mask, n, codes) + elif na_position == "first": + codes += 1 + else: # not order means descending + if na_position == "last": + codes = np.where(mask, n, n - codes - 1) + elif na_position == "first": + codes = np.where(mask, 0, n - codes) + if mask.any(): + n += 1 + + shape.append(n) + labels.append(codes) + + return indexer_from_factorized(labels, tuple(shape)) + + +def nargsort( + items, + kind: str = "quicksort", + ascending: bool = True, + na_position: str = "last", + key: Callable | None = None, + mask: np.ndarray | None = None, +) -> npt.NDArray[np.intp]: + """ + Intended to be a drop-in replacement for np.argsort which handles NaNs. + + Adds ascending, na_position, and key parameters. + + (GH #6399, #5231, #27237) + + Parameters + ---------- + kind : str, default 'quicksort' + ascending : bool, default True + na_position : {'first', 'last'}, default 'last' + key : Optional[Callable], default None + mask : Optional[np.ndarray], default None + Passed when called by ExtensionArray.argsort. + + Returns + ------- + np.ndarray[np.intp] + """ + + if key is not None: + items = ensure_key_mapped(items, key) + return nargsort( + items, + kind=kind, + ascending=ascending, + na_position=na_position, + key=None, + mask=mask, + ) + + if isinstance(items, ABCRangeIndex): + return items.argsort(ascending=ascending) # TODO: test coverage with key? + elif not isinstance(items, ABCMultiIndex): + items = extract_array(items) + if mask is None: + mask = np.asarray(isna(items)) # TODO: does this exclude MultiIndex too? + + if is_extension_array_dtype(items): + return items.argsort(ascending=ascending, kind=kind, na_position=na_position) + else: + items = np.asanyarray(items) + + idx = np.arange(len(items)) + non_nans = items[~mask] + non_nan_idx = idx[~mask] + + nan_idx = np.nonzero(mask)[0] + if not ascending: + non_nans = non_nans[::-1] + non_nan_idx = non_nan_idx[::-1] + indexer = non_nan_idx[non_nans.argsort(kind=kind)] + if not ascending: + indexer = indexer[::-1] + # Finally, place the NaNs at the end or the beginning according to + # na_position + if na_position == "last": + indexer = np.concatenate([indexer, nan_idx]) + elif na_position == "first": + indexer = np.concatenate([nan_idx, indexer]) + else: + raise ValueError(f"invalid na_position: {na_position}") + return ensure_platform_int(indexer) + + +def nargminmax(values, method: str, axis: int = 0): + """ + Implementation of np.argmin/argmax but for ExtensionArray and which + handles missing values. + + Parameters + ---------- + values : ExtensionArray + method : {"argmax", "argmin"} + axis : int, default 0 + + Returns + ------- + int + """ + assert method in {"argmax", "argmin"} + func = np.argmax if method == "argmax" else np.argmin + + mask = np.asarray(isna(values)) + values = values._values_for_argsort() + + if values.ndim > 1: + if mask.any(): + if axis == 1: + zipped = zip(values, mask) + else: + zipped = zip(values.T, mask.T) + return np.array([_nanargminmax(v, m, func) for v, m in zipped]) + return func(values, axis=axis) + + return _nanargminmax(values, mask, func) + + +def _nanargminmax(values, mask, func) -> int: + """ + See nanargminmax.__doc__. + """ + idx = np.arange(values.shape[0]) + non_nans = values[~mask] + non_nan_idx = idx[~mask] + + return non_nan_idx[func(non_nans)] + + +def _ensure_key_mapped_multiindex( + index: MultiIndex, key: Callable, level=None +) -> MultiIndex: + """ + Returns a new MultiIndex in which key has been applied + to all levels specified in level (or all levels if level + is None). Used for key sorting for MultiIndex. + + Parameters + ---------- + index : MultiIndex + Index to which to apply the key function on the + specified levels. + key : Callable + Function that takes an Index and returns an Index of + the same shape. This key is applied to each level + separately. The name of the level can be used to + distinguish different levels for application. + level : list-like, int or str, default None + Level or list of levels to apply the key function to. + If None, key function is applied to all levels. Other + levels are left unchanged. + + Returns + ------- + labels : MultiIndex + Resulting MultiIndex with modified levels. + """ + + if level is not None: + if isinstance(level, (str, int)): + sort_levels = [level] + else: + sort_levels = level + + sort_levels = [index._get_level_number(lev) for lev in sort_levels] + else: + sort_levels = list(range(index.nlevels)) # satisfies mypy + + mapped = [ + ensure_key_mapped(index._get_level_values(level), key) + if level in sort_levels + else index._get_level_values(level) + for level in range(index.nlevels) + ] + + return type(index).from_arrays(mapped) + + +def ensure_key_mapped(values, key: Callable | None, levels=None): + """ + Applies a callable key function to the values function and checks + that the resulting value has the same shape. Can be called on Index + subclasses, Series, DataFrames, or ndarrays. + + Parameters + ---------- + values : Series, DataFrame, Index subclass, or ndarray + key : Optional[Callable], key to be called on the values array + levels : Optional[List], if values is a MultiIndex, list of levels to + apply the key to. + """ + from pandas.core.indexes.api import Index + + if not key: + return values + + if isinstance(values, ABCMultiIndex): + return _ensure_key_mapped_multiindex(values, key, level=levels) + + result = key(values.copy()) + if len(result) != len(values): + raise ValueError( + "User-provided `key` function must not change the shape of the array." + ) + + try: + if isinstance( + values, Index + ): # convert to a new Index subclass, not necessarily the same + result = Index(result) + else: + type_of_values = type(values) + result = type_of_values(result) # try to revert to original type otherwise + except TypeError: + raise TypeError( + f"User-provided `key` function returned an invalid type {type(result)} \ + which could not be converted to {type(values)}." + ) + + return result + + +def get_flattened_list( + comp_ids: npt.NDArray[np.intp], + ngroups: int, + levels: Iterable[Index], + labels: Iterable[np.ndarray], +) -> list[tuple]: + """Map compressed group id -> key tuple.""" + comp_ids = comp_ids.astype(np.int64, copy=False) + arrays: DefaultDict[int, list[int]] = defaultdict(list) + for labs, level in zip(labels, levels): + table = hashtable.Int64HashTable(ngroups) + table.map(comp_ids, labs.astype(np.int64, copy=False)) + for i in range(ngroups): + arrays[i].append(level[table.get_item(i)]) + return [tuple(array) for array in arrays.values()] + + +def get_indexer_dict( + label_list: list[np.ndarray], keys: list[Index] +) -> dict[Hashable, npt.NDArray[np.intp]]: + """ + Returns + ------- + dict: + Labels mapped to indexers. + """ + shape = [len(x) for x in keys] + + group_index = get_group_index(label_list, tuple(shape), sort=True, xnull=True) + if np.all(group_index == -1): + # Short-circuit, lib.indices_fast will return the same + return {} + ngroups = ( + ((group_index.size and group_index.max()) + 1) + if is_int64_overflow_possible(shape) + else np.prod(shape, dtype="i8") + ) + + sorter = get_group_index_sorter(group_index, ngroups) + + sorted_labels = [lab.take(sorter) for lab in label_list] + group_index = group_index.take(sorter) + + return lib.indices_fast(sorter, group_index, keys, sorted_labels) + + +# ---------------------------------------------------------------------- +# sorting levels...cleverly? + + +def get_group_index_sorter( + group_index: npt.NDArray[np.intp], ngroups: int | None = None +) -> npt.NDArray[np.intp]: + """ + algos.groupsort_indexer implements `counting sort` and it is at least + O(ngroups), where + ngroups = prod(shape) + shape = map(len, keys) + that is, linear in the number of combinations (cartesian product) of unique + values of groupby keys. This can be huge when doing multi-key groupby. + np.argsort(kind='mergesort') is O(count x log(count)) where count is the + length of the data-frame; + Both algorithms are `stable` sort and that is necessary for correctness of + groupby operations. e.g. consider: + df.groupby(key)[col].transform('first') + + Parameters + ---------- + group_index : np.ndarray[np.intp] + signed integer dtype + ngroups : int or None, default None + + Returns + ------- + np.ndarray[np.intp] + """ + if ngroups is None: + ngroups = 1 + group_index.max() + count = len(group_index) + alpha = 0.0 # taking complexities literally; there may be + beta = 1.0 # some room for fine-tuning these parameters + do_groupsort = count > 0 and ((alpha + beta * ngroups) < (count * np.log(count))) + if do_groupsort: + sorter, _ = algos.groupsort_indexer( + ensure_platform_int(group_index), + ngroups, + ) + # sorter _should_ already be intp, but mypy is not yet able to verify + else: + sorter = group_index.argsort(kind="mergesort") + return ensure_platform_int(sorter) + + +def compress_group_index( + group_index: npt.NDArray[np.int64], sort: bool = True +) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: + """ + Group_index is offsets into cartesian product of all possible labels. This + space can be huge, so this function compresses it, by computing offsets + (comp_ids) into the list of unique labels (obs_group_ids). + """ + size_hint = len(group_index) + table = hashtable.Int64HashTable(size_hint) + + group_index = ensure_int64(group_index) + + # note, group labels come out ascending (ie, 1,2,3 etc) + comp_ids, obs_group_ids = table.get_labels_groupby(group_index) + + if sort and len(obs_group_ids) > 0: + obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids) + + return ensure_int64(comp_ids), ensure_int64(obs_group_ids) + + +def _reorder_by_uniques( + uniques: npt.NDArray[np.int64], labels: npt.NDArray[np.intp] +) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.intp]]: + """ + Parameters + ---------- + uniques : np.ndarray[np.int64] + labels : np.ndarray[np.intp] + + Returns + ------- + np.ndarray[np.int64] + np.ndarray[np.intp] + """ + # sorter is index where elements ought to go + sorter = uniques.argsort() + + # reverse_indexer is where elements came from + reverse_indexer = np.empty(len(sorter), dtype=np.intp) + reverse_indexer.put(sorter, np.arange(len(sorter))) + + mask = labels < 0 + + # move labels to right locations (ie, unsort ascending labels) + labels = reverse_indexer.take(labels) + np.putmask(labels, mask, -1) + + # sort observed ids + uniques = uniques.take(sorter) + + return uniques, labels diff --git a/.local/lib/python3.8/site-packages/pandas/core/sparse/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/sparse/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/core/sparse/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/sparse/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..f807d2b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/sparse/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/sparse/__pycache__/api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/sparse/__pycache__/api.cpython-38.pyc new file mode 100644 index 0000000..08b53ec Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/sparse/__pycache__/api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/sparse/api.py b/.local/lib/python3.8/site-packages/pandas/core/sparse/api.py new file mode 100644 index 0000000..2a324eb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/sparse/api.py @@ -0,0 +1,6 @@ +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) + +__all__ = ["SparseArray", "SparseDtype"] diff --git a/.local/lib/python3.8/site-packages/pandas/core/strings/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/strings/__init__.py new file mode 100644 index 0000000..28aba7c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/strings/__init__.py @@ -0,0 +1,33 @@ +""" +Implementation of pandas.Series.str and its interface. + +* strings.accessor.StringMethods : Accessor for Series.str +* strings.base.BaseStringArrayMethods: Mixin ABC for EAs to implement str methods + +Most methods on the StringMethods accessor follow the pattern: + + 1. extract the array from the series (or index) + 2. Call that array's implementation of the string method + 3. Wrap the result (in a Series, index, or DataFrame) + +Pandas extension arrays implementing string methods should inherit from +pandas.core.strings.base.BaseStringArrayMethods. This is an ABC defining +the various string methods. To avoid namespace clashes and pollution, +these are prefixed with `_str_`. So ``Series.str.upper()`` calls +``Series.array._str_upper()``. The interface isn't currently public +to other string extension arrays. +""" +# Pandas current implementation is in ObjectStringArrayMixin. This is designed +# to work on object-dtype ndarrays. +# +# BaseStringArrayMethods +# - ObjectStringArrayMixin +# - StringArray +# - PandasArray +# - Categorical +# - ArrowStringArray + +from pandas.core.strings.accessor import StringMethods +from pandas.core.strings.base import BaseStringArrayMethods + +__all__ = ["StringMethods", "BaseStringArrayMethods"] diff --git a/.local/lib/python3.8/site-packages/pandas/core/strings/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/strings/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..d33a09a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/strings/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/strings/__pycache__/accessor.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/strings/__pycache__/accessor.cpython-38.pyc new file mode 100644 index 0000000..7e9f319 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/strings/__pycache__/accessor.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/strings/__pycache__/base.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/strings/__pycache__/base.cpython-38.pyc new file mode 100644 index 0000000..3287023 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/strings/__pycache__/base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/strings/__pycache__/object_array.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/strings/__pycache__/object_array.cpython-38.pyc new file mode 100644 index 0000000..447010d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/strings/__pycache__/object_array.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/strings/accessor.py b/.local/lib/python3.8/site-packages/pandas/core/strings/accessor.py new file mode 100644 index 0000000..6a0fa3c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/strings/accessor.py @@ -0,0 +1,3287 @@ +from __future__ import annotations + +import codecs +from functools import wraps +import re +from typing import ( + TYPE_CHECKING, + Callable, + Hashable, + cast, +) +import warnings + +import numpy as np + +import pandas._libs.lib as lib +from pandas._typing import ( + DtypeObj, + F, +) +from pandas.util._decorators import Appender +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + ensure_object, + is_bool_dtype, + is_categorical_dtype, + is_integer, + is_list_like, + is_object_dtype, + is_re, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCMultiIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import isna + +from pandas.core.base import NoNewAttributesMixin +from pandas.core.construction import extract_array + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Index, + Series, + ) + +_shared_docs: dict[str, str] = {} +_cpython_optimized_encoders = ( + "utf-8", + "utf8", + "latin-1", + "latin1", + "iso-8859-1", + "mbcs", + "ascii", +) +_cpython_optimized_decoders = _cpython_optimized_encoders + ("utf-16", "utf-32") + + +def forbid_nonstring_types( + forbidden: list[str] | None, name: str | None = None +) -> Callable[[F], F]: + """ + Decorator to forbid specific types for a method of StringMethods. + + For calling `.str.{method}` on a Series or Index, it is necessary to first + initialize the :class:`StringMethods` object, and then call the method. + However, different methods allow different input types, and so this can not + be checked during :meth:`StringMethods.__init__`, but must be done on a + per-method basis. This decorator exists to facilitate this process, and + make it explicit which (inferred) types are disallowed by the method. + + :meth:`StringMethods.__init__` allows the *union* of types its different + methods allow (after skipping NaNs; see :meth:`StringMethods._validate`), + namely: ['string', 'empty', 'bytes', 'mixed', 'mixed-integer']. + + The default string types ['string', 'empty'] are allowed for all methods. + For the additional types ['bytes', 'mixed', 'mixed-integer'], each method + then needs to forbid the types it is not intended for. + + Parameters + ---------- + forbidden : list-of-str or None + List of forbidden non-string types, may be one or more of + `['bytes', 'mixed', 'mixed-integer']`. + name : str, default None + Name of the method to use in the error message. By default, this is + None, in which case the name from the method being wrapped will be + copied. However, for working with further wrappers (like _pat_wrapper + and _noarg_wrapper), it is necessary to specify the name. + + Returns + ------- + func : wrapper + The method to which the decorator is applied, with an added check that + enforces the inferred type to not be in the list of forbidden types. + + Raises + ------ + TypeError + If the inferred type of the underlying data is in `forbidden`. + """ + # deal with None + forbidden = [] if forbidden is None else forbidden + + allowed_types = {"string", "empty", "bytes", "mixed", "mixed-integer"} - set( + forbidden + ) + + def _forbid_nonstring_types(func: F) -> F: + func_name = func.__name__ if name is None else name + + @wraps(func) + def wrapper(self, *args, **kwargs): + if self._inferred_dtype not in allowed_types: + msg = ( + f"Cannot use .str.{func_name} with values of " + f"inferred dtype '{self._inferred_dtype}'." + ) + raise TypeError(msg) + return func(self, *args, **kwargs) + + wrapper.__name__ = func_name + return cast(F, wrapper) + + return _forbid_nonstring_types + + +def _map_and_wrap(name, docstring): + @forbid_nonstring_types(["bytes"], name=name) + def wrapper(self): + result = getattr(self._data.array, f"_str_{name}")() + return self._wrap_result(result) + + wrapper.__doc__ = docstring + return wrapper + + +class StringMethods(NoNewAttributesMixin): + """ + Vectorized string functions for Series and Index. + + NAs stay NA unless handled otherwise by a particular method. + Patterned after Python's string methods, with some inspiration from + R's stringr package. + + Examples + -------- + >>> s = pd.Series(["A_Str_Series"]) + >>> s + 0 A_Str_Series + dtype: object + + >>> s.str.split("_") + 0 [A, Str, Series] + dtype: object + + >>> s.str.replace("_", "") + 0 AStrSeries + dtype: object + """ + + # Note: see the docstring in pandas.core.strings.__init__ + # for an explanation of the implementation. + # TODO: Dispatch all the methods + # Currently the following are not dispatched to the array + # * cat + # * extractall + + def __init__(self, data): + from pandas.core.arrays.string_ import StringDtype + + self._inferred_dtype = self._validate(data) + self._is_categorical = is_categorical_dtype(data.dtype) + self._is_string = isinstance(data.dtype, StringDtype) + self._data = data + + self._index = self._name = None + if isinstance(data, ABCSeries): + self._index = data.index + self._name = data.name + + # ._values.categories works for both Series/Index + self._parent = data._values.categories if self._is_categorical else data + # save orig to blow up categoricals to the right type + self._orig = data + self._freeze() + + @staticmethod + def _validate(data): + """ + Auxiliary function for StringMethods, infers and checks dtype of data. + + This is a "first line of defence" at the creation of the StringMethods- + object, and just checks that the dtype is in the + *union* of the allowed types over all string methods below; this + restriction is then refined on a per-method basis using the decorator + @forbid_nonstring_types (more info in the corresponding docstring). + + This really should exclude all series/index with any non-string values, + but that isn't practical for performance reasons until we have a str + dtype (GH 9343 / 13877) + + Parameters + ---------- + data : The content of the Series + + Returns + ------- + dtype : inferred dtype of data + """ + if isinstance(data, ABCMultiIndex): + raise AttributeError( + "Can only use .str accessor with Index, not MultiIndex" + ) + + # see _libs/lib.pyx for list of inferred types + allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"] + + data = extract_array(data) + + values = getattr(data, "categories", data) # categorical / normal + + inferred_dtype = lib.infer_dtype(values, skipna=True) + + if inferred_dtype not in allowed_types: + raise AttributeError("Can only use .str accessor with string values!") + return inferred_dtype + + def __getitem__(self, key): + result = self._data.array._str_getitem(key) + return self._wrap_result(result) + + def __iter__(self): + warnings.warn( + "Columnar iteration over characters will be deprecated in future releases.", + FutureWarning, + stacklevel=find_stack_level(), + ) + i = 0 + g = self.get(i) + while g.notna().any(): + yield g + i += 1 + g = self.get(i) + + def _wrap_result( + self, + result, + name=None, + expand: bool | None = None, + fill_value=np.nan, + returns_string=True, + returns_bool: bool = False, + ): + from pandas import ( + Index, + MultiIndex, + ) + + if not hasattr(result, "ndim") or not hasattr(result, "dtype"): + if isinstance(result, ABCDataFrame): + result = result.__finalize__(self._orig, name="str") + return result + assert result.ndim < 3 + + # We can be wrapping a string / object / categorical result, in which + # case we'll want to return the same dtype as the input. + # Or we can be wrapping a numeric output, in which case we don't want + # to return a StringArray. + # Ideally the array method returns the right array type. + if expand is None: + # infer from ndim if expand is not specified + expand = result.ndim != 1 + + elif ( + expand is True + and is_object_dtype(result) + and not isinstance(self._orig, ABCIndex) + ): + # required when expand=True is explicitly specified + # not needed when inferred + + def cons_row(x): + if is_list_like(x): + return x + else: + return [x] + + result = [cons_row(x) for x in result] + if result and not self._is_string: + # propagate nan values to match longest sequence (GH 18450) + max_len = max(len(x) for x in result) + result = [ + x * max_len if len(x) == 0 or x[0] is np.nan else x for x in result + ] + + if not isinstance(expand, bool): + raise ValueError("expand must be True or False") + + if expand is False: + # if expand is False, result should have the same name + # as the original otherwise specified + if name is None: + name = getattr(result, "name", None) + if name is None: + # do not use logical or, _orig may be a DataFrame + # which has "name" column + name = self._orig.name + + # Wait until we are sure result is a Series or Index before + # checking attributes (GH 12180) + if isinstance(self._orig, ABCIndex): + # if result is a boolean np.array, return the np.array + # instead of wrapping it into a boolean Index (GH 8875) + if is_bool_dtype(result): + return result + + if expand: + result = list(result) + out = MultiIndex.from_tuples(result, names=name) + if out.nlevels == 1: + # We had all tuples of length-one, which are + # better represented as a regular Index. + out = out.get_level_values(0) + return out + else: + return Index._with_infer(result, name=name) + else: + index = self._orig.index + # This is a mess. + dtype: DtypeObj | str | None + vdtype = getattr(result, "dtype", None) + if self._is_string: + if is_bool_dtype(vdtype): + dtype = result.dtype + elif returns_string: + dtype = self._orig.dtype + else: + dtype = vdtype + else: + dtype = vdtype + + if expand: + cons = self._orig._constructor_expanddim + result = cons(result, columns=name, index=index, dtype=dtype) + else: + # Must be a Series + cons = self._orig._constructor + result = cons(result, name=name, index=index, dtype=dtype) + result = result.__finalize__(self._orig, method="str") + if name is not None and result.ndim == 1: + # __finalize__ might copy over the original name, but we may + # want the new name (e.g. str.extract). + result.name = name + return result + + def _get_series_list(self, others): + """ + Auxiliary function for :meth:`str.cat`. Turn potentially mixed input + into a list of Series (elements without an index must match the length + of the calling Series/Index). + + Parameters + ---------- + others : Series, DataFrame, np.ndarray, list-like or list-like of + Objects that are either Series, Index or np.ndarray (1-dim). + + Returns + ------- + list of Series + Others transformed into list of Series. + """ + from pandas import ( + DataFrame, + Series, + ) + + # self._orig is either Series or Index + idx = self._orig if isinstance(self._orig, ABCIndex) else self._orig.index + + # Generally speaking, all objects without an index inherit the index + # `idx` of the calling Series/Index - i.e. must have matching length. + # Objects with an index (i.e. Series/Index/DataFrame) keep their own. + if isinstance(others, ABCSeries): + return [others] + elif isinstance(others, ABCIndex): + return [Series(others._values, index=idx, dtype=others.dtype)] + elif isinstance(others, ABCDataFrame): + return [others[x] for x in others] + elif isinstance(others, np.ndarray) and others.ndim == 2: + others = DataFrame(others, index=idx) + return [others[x] for x in others] + elif is_list_like(others, allow_sets=False): + others = list(others) # ensure iterators do not get read twice etc + + # in case of list-like `others`, all elements must be + # either Series/Index/np.ndarray (1-dim)... + if all( + isinstance(x, (ABCSeries, ABCIndex)) + or (isinstance(x, np.ndarray) and x.ndim == 1) + for x in others + ): + los: list[Series] = [] + while others: # iterate through list and append each element + los = los + self._get_series_list(others.pop(0)) + return los + # ... or just strings + elif all(not is_list_like(x) for x in others): + return [Series(others, index=idx)] + raise TypeError( + "others must be Series, Index, DataFrame, np.ndarray " + "or list-like (either containing only strings or " + "containing only objects of type Series/Index/" + "np.ndarray[1-dim])" + ) + + @forbid_nonstring_types(["bytes", "mixed", "mixed-integer"]) + def cat( + self, others=None, sep=None, na_rep=None, join="left" + ) -> str | Series | Index: + """ + Concatenate strings in the Series/Index with given separator. + + If `others` is specified, this function concatenates the Series/Index + and elements of `others` element-wise. + If `others` is not passed, then all values in the Series/Index are + concatenated into a single string with a given `sep`. + + Parameters + ---------- + others : Series, Index, DataFrame, np.ndarray or list-like + Series, Index, DataFrame, np.ndarray (one- or two-dimensional) and + other list-likes of strings must have the same length as the + calling Series/Index, with the exception of indexed objects (i.e. + Series/Index/DataFrame) if `join` is not None. + + If others is a list-like that contains a combination of Series, + Index or np.ndarray (1-dim), then all elements will be unpacked and + must satisfy the above criteria individually. + + If others is None, the method returns the concatenation of all + strings in the calling Series/Index. + sep : str, default '' + The separator between the different elements/columns. By default + the empty string `''` is used. + na_rep : str or None, default None + Representation that is inserted for all missing values: + + - If `na_rep` is None, and `others` is None, missing values in the + Series/Index are omitted from the result. + - If `na_rep` is None, and `others` is not None, a row containing a + missing value in any of the columns (before concatenation) will + have a missing value in the result. + join : {'left', 'right', 'outer', 'inner'}, default 'left' + Determines the join-style between the calling Series/Index and any + Series/Index/DataFrame in `others` (objects without an index need + to match the length of the calling Series/Index). To disable + alignment, use `.values` on any Series/Index/DataFrame in `others`. + + .. versionadded:: 0.23.0 + .. versionchanged:: 1.0.0 + Changed default of `join` from None to `'left'`. + + Returns + ------- + str, Series or Index + If `others` is None, `str` is returned, otherwise a `Series/Index` + (same type as caller) of objects is returned. + + See Also + -------- + split : Split each string in the Series/Index. + join : Join lists contained as elements in the Series/Index. + + Examples + -------- + When not passing `others`, all values are concatenated into a single + string: + + >>> s = pd.Series(['a', 'b', np.nan, 'd']) + >>> s.str.cat(sep=' ') + 'a b d' + + By default, NA values in the Series are ignored. Using `na_rep`, they + can be given a representation: + + >>> s.str.cat(sep=' ', na_rep='?') + 'a b ? d' + + If `others` is specified, corresponding values are concatenated with + the separator. Result will be a Series of strings. + + >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',') + 0 a,A + 1 b,B + 2 NaN + 3 d,D + dtype: object + + Missing values will remain missing in the result, but can again be + represented using `na_rep` + + >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-') + 0 a,A + 1 b,B + 2 -,C + 3 d,D + dtype: object + + If `sep` is not specified, the values are concatenated without + separation. + + >>> s.str.cat(['A', 'B', 'C', 'D'], na_rep='-') + 0 aA + 1 bB + 2 -C + 3 dD + dtype: object + + Series with different indexes can be aligned before concatenation. The + `join`-keyword works as in other methods. + + >>> t = pd.Series(['d', 'a', 'e', 'c'], index=[3, 0, 4, 2]) + >>> s.str.cat(t, join='left', na_rep='-') + 0 aa + 1 b- + 2 -c + 3 dd + dtype: object + >>> + >>> s.str.cat(t, join='outer', na_rep='-') + 0 aa + 1 b- + 2 -c + 3 dd + 4 -e + dtype: object + >>> + >>> s.str.cat(t, join='inner', na_rep='-') + 0 aa + 2 -c + 3 dd + dtype: object + >>> + >>> s.str.cat(t, join='right', na_rep='-') + 3 dd + 0 aa + 4 -e + 2 -c + dtype: object + + For more examples, see :ref:`here `. + """ + # TODO: dispatch + from pandas import ( + Index, + Series, + concat, + ) + + if isinstance(others, str): + raise ValueError("Did you mean to supply a `sep` keyword?") + if sep is None: + sep = "" + + if isinstance(self._orig, ABCIndex): + data = Series(self._orig, index=self._orig, dtype=self._orig.dtype) + else: # Series + data = self._orig + + # concatenate Series/Index with itself if no "others" + if others is None: + # error: Incompatible types in assignment (expression has type + # "ndarray", variable has type "Series") + data = ensure_object(data) # type: ignore[assignment] + na_mask = isna(data) + if na_rep is None and na_mask.any(): + return sep.join(data[~na_mask]) + elif na_rep is not None and na_mask.any(): + return sep.join(np.where(na_mask, na_rep, data)) + else: + return sep.join(data) + + try: + # turn anything in "others" into lists of Series + others = self._get_series_list(others) + except ValueError as err: # do not catch TypeError raised by _get_series_list + raise ValueError( + "If `others` contains arrays or lists (or other " + "list-likes without an index), these must all be " + "of the same length as the calling Series/Index." + ) from err + + # align if required + if any(not data.index.equals(x.index) for x in others): + # Need to add keys for uniqueness in case of duplicate columns + others = concat( + others, + axis=1, + join=(join if join == "inner" else "outer"), + keys=range(len(others)), + sort=False, + copy=False, + ) + data, others = data.align(others, join=join) + others = [others[x] for x in others] # again list of Series + + all_cols = [ensure_object(x) for x in [data] + others] + na_masks = np.array([isna(x) for x in all_cols]) + union_mask = np.logical_or.reduce(na_masks, axis=0) + + if na_rep is None and union_mask.any(): + # no na_rep means NaNs for all rows where any column has a NaN + # only necessary if there are actually any NaNs + result = np.empty(len(data), dtype=object) + np.putmask(result, union_mask, np.nan) + + not_masked = ~union_mask + result[not_masked] = cat_safe([x[not_masked] for x in all_cols], sep) + elif na_rep is not None and union_mask.any(): + # fill NaNs with na_rep in case there are actually any NaNs + all_cols = [ + np.where(nm, na_rep, col) for nm, col in zip(na_masks, all_cols) + ] + result = cat_safe(all_cols, sep) + else: + # no NaNs - can just concatenate + result = cat_safe(all_cols, sep) + + out: Index | Series + if isinstance(self._orig, ABCIndex): + # add dtype for case that result is all-NA + + out = Index(result, dtype=object, name=self._orig.name) + else: # Series + if is_categorical_dtype(self._orig.dtype): + # We need to infer the new categories. + dtype = None + else: + dtype = self._orig.dtype + res_ser = Series( + result, dtype=dtype, index=data.index, name=self._orig.name + ) + out = res_ser.__finalize__(self._orig, method="str_cat") + return out + + _shared_docs[ + "str_split" + ] = r""" + Split strings around given separator/delimiter. + + Splits the string in the Series/Index from the %(side)s, + at the specified delimiter string. + + Parameters + ---------- + pat : str or compiled regex, optional + String or regular expression to split on. + If not specified, split on whitespace. + n : int, default -1 (all) + Limit number of splits in output. + ``None``, 0 and -1 will be interpreted as return all splits. + expand : bool, default False + Expand the split strings into separate columns. + + - If ``True``, return DataFrame/MultiIndex expanding dimensionality. + - If ``False``, return Series/Index, containing lists of strings. + + regex : bool, default None + Determines if the passed-in pattern is a regular expression: + + - If ``True``, assumes the passed-in pattern is a regular expression + - If ``False``, treats the pattern as a literal string. + - If ``None`` and `pat` length is 1, treats `pat` as a literal string. + - If ``None`` and `pat` length is not 1, treats `pat` as a regular expression. + - Cannot be set to False if `pat` is a compiled regex + + .. versionadded:: 1.4.0 + + Returns + ------- + Series, Index, DataFrame or MultiIndex + Type matches caller unless ``expand=True`` (see Notes). + + Raises + ------ + ValueError + * if `regex` is False and `pat` is a compiled regex + + See Also + -------- + Series.str.split : Split strings around given separator/delimiter. + Series.str.rsplit : Splits string around given separator/delimiter, + starting from the right. + Series.str.join : Join lists contained as elements in the Series/Index + with passed delimiter. + str.split : Standard library version for split. + str.rsplit : Standard library version for rsplit. + + Notes + ----- + The handling of the `n` keyword depends on the number of found splits: + + - If found splits > `n`, make first `n` splits only + - If found splits <= `n`, make all splits + - If for a certain row the number of found splits < `n`, + append `None` for padding up to `n` if ``expand=True`` + + If using ``expand=True``, Series and Index callers return DataFrame and + MultiIndex objects, respectively. + + Use of `regex=False` with a `pat` as a compiled regex will raise + an error. + + Examples + -------- + >>> s = pd.Series( + ... [ + ... "this is a regular sentence", + ... "https://docs.python.org/3/tutorial/index.html", + ... np.nan + ... ] + ... ) + >>> s + 0 this is a regular sentence + 1 https://docs.python.org/3/tutorial/index.html + 2 NaN + dtype: object + + In the default setting, the string is split by whitespace. + + >>> s.str.split() + 0 [this, is, a, regular, sentence] + 1 [https://docs.python.org/3/tutorial/index.html] + 2 NaN + dtype: object + + Without the `n` parameter, the outputs of `rsplit` and `split` + are identical. + + >>> s.str.rsplit() + 0 [this, is, a, regular, sentence] + 1 [https://docs.python.org/3/tutorial/index.html] + 2 NaN + dtype: object + + The `n` parameter can be used to limit the number of splits on the + delimiter. The outputs of `split` and `rsplit` are different. + + >>> s.str.split(n=2) + 0 [this, is, a regular sentence] + 1 [https://docs.python.org/3/tutorial/index.html] + 2 NaN + dtype: object + + >>> s.str.rsplit(n=2) + 0 [this is a, regular, sentence] + 1 [https://docs.python.org/3/tutorial/index.html] + 2 NaN + dtype: object + + The `pat` parameter can be used to split by other characters. + + >>> s.str.split(pat="/") + 0 [this is a regular sentence] + 1 [https:, , docs.python.org, 3, tutorial, index... + 2 NaN + dtype: object + + When using ``expand=True``, the split elements will expand out into + separate columns. If NaN is present, it is propagated throughout + the columns during the split. + + >>> s.str.split(expand=True) + 0 1 2 3 4 + 0 this is a regular sentence + 1 https://docs.python.org/3/tutorial/index.html None None None None + 2 NaN NaN NaN NaN NaN + + For slightly more complex use cases like splitting the html document name + from a url, a combination of parameter settings can be used. + + >>> s.str.rsplit("/", n=1, expand=True) + 0 1 + 0 this is a regular sentence None + 1 https://docs.python.org/3/tutorial index.html + 2 NaN NaN + + Remember to escape special characters when explicitly using regular expressions. + + >>> s = pd.Series(["foo and bar plus baz"]) + >>> s.str.split(r"and|plus", expand=True) + 0 1 2 + 0 foo bar baz + + Regular expressions can be used to handle urls or file names. + When `pat` is a string and ``regex=None`` (the default), the given `pat` is compiled + as a regex only if ``len(pat) != 1``. + + >>> s = pd.Series(['foojpgbar.jpg']) + >>> s.str.split(r".", expand=True) + 0 1 + 0 foojpgbar jpg + + >>> s.str.split(r"\.jpg", expand=True) + 0 1 + 0 foojpgbar + + When ``regex=True``, `pat` is interpreted as a regex + + >>> s.str.split(r"\.jpg", regex=True, expand=True) + 0 1 + 0 foojpgbar + + A compiled regex can be passed as `pat` + + >>> import re + >>> s.str.split(re.compile(r"\.jpg"), expand=True) + 0 1 + 0 foojpgbar + + When ``regex=False``, `pat` is interpreted as the string itself + + >>> s.str.split(r"\.jpg", regex=False, expand=True) + 0 + 0 foojpgbar.jpg + """ + + @Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"}) + @forbid_nonstring_types(["bytes"]) + def split( + self, + pat: str | re.Pattern | None = None, + n=-1, + expand=False, + *, + regex: bool | None = None, + ): + if regex is False and is_re(pat): + raise ValueError( + "Cannot use a compiled regex as replacement pattern with regex=False" + ) + if is_re(pat): + regex = True + result = self._data.array._str_split(pat, n, expand, regex) + return self._wrap_result(result, returns_string=expand, expand=expand) + + @Appender(_shared_docs["str_split"] % {"side": "end", "method": "rsplit"}) + @forbid_nonstring_types(["bytes"]) + def rsplit(self, pat=None, n=-1, expand=False): + result = self._data.array._str_rsplit(pat, n=n) + return self._wrap_result(result, expand=expand, returns_string=expand) + + _shared_docs[ + "str_partition" + ] = """ + Split the string at the %(side)s occurrence of `sep`. + + This method splits the string at the %(side)s occurrence of `sep`, + and returns 3 elements containing the part before the separator, + the separator itself, and the part after the separator. + If the separator is not found, return %(return)s. + + Parameters + ---------- + sep : str, default whitespace + String to split on. + expand : bool, default True + If True, return DataFrame/MultiIndex expanding dimensionality. + If False, return Series/Index. + + Returns + ------- + DataFrame/MultiIndex or Series/Index of objects + + See Also + -------- + %(also)s + Series.str.split : Split strings around given separators. + str.partition : Standard library version. + + Examples + -------- + + >>> s = pd.Series(['Linda van der Berg', 'George Pitt-Rivers']) + >>> s + 0 Linda van der Berg + 1 George Pitt-Rivers + dtype: object + + >>> s.str.partition() + 0 1 2 + 0 Linda van der Berg + 1 George Pitt-Rivers + + To partition by the last space instead of the first one: + + >>> s.str.rpartition() + 0 1 2 + 0 Linda van der Berg + 1 George Pitt-Rivers + + To partition by something different than a space: + + >>> s.str.partition('-') + 0 1 2 + 0 Linda van der Berg + 1 George Pitt - Rivers + + To return a Series containing tuples instead of a DataFrame: + + >>> s.str.partition('-', expand=False) + 0 (Linda van der Berg, , ) + 1 (George Pitt, -, Rivers) + dtype: object + + Also available on indices: + + >>> idx = pd.Index(['X 123', 'Y 999']) + >>> idx + Index(['X 123', 'Y 999'], dtype='object') + + Which will create a MultiIndex: + + >>> idx.str.partition() + MultiIndex([('X', ' ', '123'), + ('Y', ' ', '999')], + ) + + Or an index with tuples with ``expand=False``: + + >>> idx.str.partition(expand=False) + Index([('X', ' ', '123'), ('Y', ' ', '999')], dtype='object') + """ + + @Appender( + _shared_docs["str_partition"] + % { + "side": "first", + "return": "3 elements containing the string itself, followed by two " + "empty strings", + "also": "rpartition : Split the string at the last occurrence of `sep`.", + } + ) + @forbid_nonstring_types(["bytes"]) + def partition(self, sep=" ", expand=True): + result = self._data.array._str_partition(sep, expand) + return self._wrap_result(result, expand=expand, returns_string=expand) + + @Appender( + _shared_docs["str_partition"] + % { + "side": "last", + "return": "3 elements containing two empty strings, followed by the " + "string itself", + "also": "partition : Split the string at the first occurrence of `sep`.", + } + ) + @forbid_nonstring_types(["bytes"]) + def rpartition(self, sep=" ", expand=True): + result = self._data.array._str_rpartition(sep, expand) + return self._wrap_result(result, expand=expand, returns_string=expand) + + def get(self, i): + """ + Extract element from each component at specified position. + + Extract element from lists, tuples, or strings in each element in the + Series/Index. + + Parameters + ---------- + i : int + Position of element to extract. + + Returns + ------- + Series or Index + + Examples + -------- + >>> s = pd.Series(["String", + ... (1, 2, 3), + ... ["a", "b", "c"], + ... 123, + ... -456, + ... {1: "Hello", "2": "World"}]) + >>> s + 0 String + 1 (1, 2, 3) + 2 [a, b, c] + 3 123 + 4 -456 + 5 {1: 'Hello', '2': 'World'} + dtype: object + + >>> s.str.get(1) + 0 t + 1 2 + 2 b + 3 NaN + 4 NaN + 5 Hello + dtype: object + + >>> s.str.get(-1) + 0 g + 1 3 + 2 c + 3 NaN + 4 NaN + 5 None + dtype: object + """ + result = self._data.array._str_get(i) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def join(self, sep): + """ + Join lists contained as elements in the Series/Index with passed delimiter. + + If the elements of a Series are lists themselves, join the content of these + lists using the delimiter passed to the function. + This function is an equivalent to :meth:`str.join`. + + Parameters + ---------- + sep : str + Delimiter to use between list entries. + + Returns + ------- + Series/Index: object + The list entries concatenated by intervening occurrences of the + delimiter. + + Raises + ------ + AttributeError + If the supplied Series contains neither strings nor lists. + + See Also + -------- + str.join : Standard library version of this method. + Series.str.split : Split strings around given separator/delimiter. + + Notes + ----- + If any of the list items is not a string object, the result of the join + will be `NaN`. + + Examples + -------- + Example with a list that contains non-string elements. + + >>> s = pd.Series([['lion', 'elephant', 'zebra'], + ... [1.1, 2.2, 3.3], + ... ['cat', np.nan, 'dog'], + ... ['cow', 4.5, 'goat'], + ... ['duck', ['swan', 'fish'], 'guppy']]) + >>> s + 0 [lion, elephant, zebra] + 1 [1.1, 2.2, 3.3] + 2 [cat, nan, dog] + 3 [cow, 4.5, goat] + 4 [duck, [swan, fish], guppy] + dtype: object + + Join all lists using a '-'. The lists containing object(s) of types other + than str will produce a NaN. + + >>> s.str.join('-') + 0 lion-elephant-zebra + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: object + """ + result = self._data.array._str_join(sep) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def contains(self, pat, case=True, flags=0, na=None, regex=True): + r""" + Test if pattern or regex is contained within a string of a Series or Index. + + Return boolean Series or Index based on whether a given pattern or regex is + contained within a string of a Series or Index. + + Parameters + ---------- + pat : str + Character sequence or regular expression. + case : bool, default True + If True, case sensitive. + flags : int, default 0 (no flags) + Flags to pass through to the re module, e.g. re.IGNORECASE. + na : scalar, optional + Fill value for missing values. The default depends on dtype of the + array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``, + ``pandas.NA`` is used. + regex : bool, default True + If True, assumes the pat is a regular expression. + + If False, treats the pat as a literal string. + + Returns + ------- + Series or Index of boolean values + A Series or Index of boolean values indicating whether the + given pattern is contained within the string of each element + of the Series or Index. + + See Also + -------- + match : Analogous, but stricter, relying on re.match instead of re.search. + Series.str.startswith : Test if the start of each string element matches a + pattern. + Series.str.endswith : Same as startswith, but tests the end of string. + + Examples + -------- + Returning a Series of booleans using only a literal pattern. + + >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN]) + >>> s1.str.contains('og', regex=False) + 0 False + 1 True + 2 False + 3 False + 4 NaN + dtype: object + + Returning an Index of booleans using only a literal pattern. + + >>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN]) + >>> ind.str.contains('23', regex=False) + Index([False, False, False, True, nan], dtype='object') + + Specifying case sensitivity using `case`. + + >>> s1.str.contains('oG', case=True, regex=True) + 0 False + 1 False + 2 False + 3 False + 4 NaN + dtype: object + + Specifying `na` to be `False` instead of `NaN` replaces NaN values + with `False`. If Series or Index does not contain NaN values + the resultant dtype will be `bool`, otherwise, an `object` dtype. + + >>> s1.str.contains('og', na=False, regex=True) + 0 False + 1 True + 2 False + 3 False + 4 False + dtype: bool + + Returning 'house' or 'dog' when either expression occurs in a string. + + >>> s1.str.contains('house|dog', regex=True) + 0 False + 1 True + 2 True + 3 False + 4 NaN + dtype: object + + Ignoring case sensitivity using `flags` with regex. + + >>> import re + >>> s1.str.contains('PARROT', flags=re.IGNORECASE, regex=True) + 0 False + 1 False + 2 True + 3 False + 4 NaN + dtype: object + + Returning any digit using regular expression. + + >>> s1.str.contains('\\d', regex=True) + 0 False + 1 False + 2 False + 3 True + 4 NaN + dtype: object + + Ensure `pat` is a not a literal pattern when `regex` is set to True. + Note in the following example one might expect only `s2[1]` and `s2[3]` to + return `True`. However, '.0' as a regex matches any character + followed by a 0. + + >>> s2 = pd.Series(['40', '40.0', '41', '41.0', '35']) + >>> s2.str.contains('.0', regex=True) + 0 True + 1 True + 2 False + 3 True + 4 False + dtype: bool + """ + if regex and re.compile(pat).groups: + warnings.warn( + "This pattern is interpreted as a regular expression, and has " + "match groups. To actually get the groups, use str.extract.", + UserWarning, + stacklevel=find_stack_level(), + ) + + result = self._data.array._str_contains(pat, case, flags, na, regex) + return self._wrap_result(result, fill_value=na, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def match(self, pat, case=True, flags=0, na=None): + """ + Determine if each string starts with a match of a regular expression. + + Parameters + ---------- + pat : str + Character sequence or regular expression. + case : bool, default True + If True, case sensitive. + flags : int, default 0 (no flags) + Regex module flags, e.g. re.IGNORECASE. + na : scalar, optional + Fill value for missing values. The default depends on dtype of the + array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``, + ``pandas.NA`` is used. + + Returns + ------- + Series/Index/array of boolean values + + See Also + -------- + fullmatch : Stricter matching that requires the entire string to match. + contains : Analogous, but less strict, relying on re.search instead of + re.match. + extract : Extract matched groups. + """ + result = self._data.array._str_match(pat, case=case, flags=flags, na=na) + return self._wrap_result(result, fill_value=na, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def fullmatch(self, pat, case=True, flags=0, na=None): + """ + Determine if each string entirely matches a regular expression. + + .. versionadded:: 1.1.0 + + Parameters + ---------- + pat : str + Character sequence or regular expression. + case : bool, default True + If True, case sensitive. + flags : int, default 0 (no flags) + Regex module flags, e.g. re.IGNORECASE. + na : scalar, optional + Fill value for missing values. The default depends on dtype of the + array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``, + ``pandas.NA`` is used. + + Returns + ------- + Series/Index/array of boolean values + + See Also + -------- + match : Similar, but also returns `True` when only a *prefix* of the string + matches the regular expression. + extract : Extract matched groups. + """ + result = self._data.array._str_fullmatch(pat, case=case, flags=flags, na=na) + return self._wrap_result(result, fill_value=na, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def replace( + self, + pat: str | re.Pattern, + repl: str | Callable, + n: int = -1, + case: bool | None = None, + flags: int = 0, + regex: bool | None = None, + ): + r""" + Replace each occurrence of pattern/regex in the Series/Index. + + Equivalent to :meth:`str.replace` or :func:`re.sub`, depending on + the regex value. + + Parameters + ---------- + pat : str or compiled regex + String can be a character sequence or regular expression. + repl : str or callable + Replacement string or a callable. The callable is passed the regex + match object and must return a replacement string to be used. + See :func:`re.sub`. + n : int, default -1 (all) + Number of replacements to make from start. + case : bool, default None + Determines if replace is case sensitive: + + - If True, case sensitive (the default if `pat` is a string) + - Set to False for case insensitive + - Cannot be set if `pat` is a compiled regex. + + flags : int, default 0 (no flags) + Regex module flags, e.g. re.IGNORECASE. Cannot be set if `pat` is a compiled + regex. + regex : bool, default True + Determines if the passed-in pattern is a regular expression: + + - If True, assumes the passed-in pattern is a regular expression. + - If False, treats the pattern as a literal string + - Cannot be set to False if `pat` is a compiled regex or `repl` is + a callable. + + .. versionadded:: 0.23.0 + + Returns + ------- + Series or Index of object + A copy of the object with all matching occurrences of `pat` replaced by + `repl`. + + Raises + ------ + ValueError + * if `regex` is False and `repl` is a callable or `pat` is a compiled + regex + * if `pat` is a compiled regex and `case` or `flags` is set + + Notes + ----- + When `pat` is a compiled regex, all flags should be included in the + compiled regex. Use of `case`, `flags`, or `regex=False` with a compiled + regex will raise an error. + + Examples + -------- + When `pat` is a string and `regex` is True (the default), the given `pat` + is compiled as a regex. When `repl` is a string, it replaces matching + regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are + left as is: + + >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f.', 'ba', regex=True) + 0 bao + 1 baz + 2 NaN + dtype: object + + When `pat` is a string and `regex` is False, every `pat` is replaced with + `repl` as with :meth:`str.replace`: + + >>> pd.Series(['f.o', 'fuz', np.nan]).str.replace('f.', 'ba', regex=False) + 0 bao + 1 fuz + 2 NaN + dtype: object + + When `repl` is a callable, it is called on every `pat` using + :func:`re.sub`. The callable should expect one positional argument + (a regex object) and return a string. + + To get the idea: + + >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr, regex=True) + 0 oo + 1 uz + 2 NaN + dtype: object + + Reverse every lowercase alphabetic word: + + >>> repl = lambda m: m.group(0)[::-1] + >>> ser = pd.Series(['foo 123', 'bar baz', np.nan]) + >>> ser.str.replace(r'[a-z]+', repl, regex=True) + 0 oof 123 + 1 rab zab + 2 NaN + dtype: object + + Using regex groups (extract second group and swap case): + + >>> pat = r"(?P\w+) (?P\w+) (?P\w+)" + >>> repl = lambda m: m.group('two').swapcase() + >>> ser = pd.Series(['One Two Three', 'Foo Bar Baz']) + >>> ser.str.replace(pat, repl, regex=True) + 0 tWO + 1 bAR + dtype: object + + Using a compiled regex with flags + + >>> import re + >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE) + >>> pd.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar', regex=True) + 0 foo + 1 bar + 2 NaN + dtype: object + """ + if regex is None: + if isinstance(pat, str) and any(c in pat for c in ".+*|^$?[](){}\\"): + # warn only in cases where regex behavior would differ from literal + msg = ( + "The default value of regex will change from True to False " + "in a future version." + ) + if len(pat) == 1: + msg += ( + " In addition, single character regular expressions will " + "*not* be treated as literal strings when regex=True." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + + # Check whether repl is valid (GH 13438, GH 15055) + if not (isinstance(repl, str) or callable(repl)): + raise TypeError("repl must be a string or callable") + + is_compiled_re = is_re(pat) + if regex or regex is None: + if is_compiled_re and (case is not None or flags != 0): + raise ValueError( + "case and flags cannot be set when pat is a compiled regex" + ) + + elif is_compiled_re: + raise ValueError( + "Cannot use a compiled regex as replacement pattern with regex=False" + ) + elif callable(repl): + raise ValueError("Cannot use a callable replacement when regex=False") + + # The current behavior is to treat single character patterns as literal strings, + # even when ``regex`` is set to ``True``. + if isinstance(pat, str) and len(pat) == 1: + regex = False + + if regex is None: + regex = True + + if case is None: + case = True + + result = self._data.array._str_replace( + pat, repl, n=n, case=case, flags=flags, regex=regex + ) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def repeat(self, repeats): + """ + Duplicate each string in the Series or Index. + + Parameters + ---------- + repeats : int or sequence of int + Same value for all (int) or different value per (sequence). + + Returns + ------- + Series or Index of object + Series or Index of repeated string objects specified by + input parameter repeats. + + Examples + -------- + >>> s = pd.Series(['a', 'b', 'c']) + >>> s + 0 a + 1 b + 2 c + dtype: object + + Single int repeats string in Series + + >>> s.str.repeat(repeats=2) + 0 aa + 1 bb + 2 cc + dtype: object + + Sequence of int repeats corresponding string in Series + + >>> s.str.repeat(repeats=[1, 2, 3]) + 0 a + 1 bb + 2 ccc + dtype: object + """ + result = self._data.array._str_repeat(repeats) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def pad(self, width, side="left", fillchar=" "): + """ + Pad strings in the Series/Index up to width. + + Parameters + ---------- + width : int + Minimum width of resulting string; additional characters will be filled + with character defined in `fillchar`. + side : {'left', 'right', 'both'}, default 'left' + Side from which to fill resulting string. + fillchar : str, default ' ' + Additional character for filling, default is whitespace. + + Returns + ------- + Series or Index of object + Returns Series or Index with minimum number of char in object. + + See Also + -------- + Series.str.rjust : Fills the left side of strings with an arbitrary + character. Equivalent to ``Series.str.pad(side='left')``. + Series.str.ljust : Fills the right side of strings with an arbitrary + character. Equivalent to ``Series.str.pad(side='right')``. + Series.str.center : Fills both sides of strings with an arbitrary + character. Equivalent to ``Series.str.pad(side='both')``. + Series.str.zfill : Pad strings in the Series/Index by prepending '0' + character. Equivalent to ``Series.str.pad(side='left', fillchar='0')``. + + Examples + -------- + >>> s = pd.Series(["caribou", "tiger"]) + >>> s + 0 caribou + 1 tiger + dtype: object + + >>> s.str.pad(width=10) + 0 caribou + 1 tiger + dtype: object + + >>> s.str.pad(width=10, side='right', fillchar='-') + 0 caribou--- + 1 tiger----- + dtype: object + + >>> s.str.pad(width=10, side='both', fillchar='-') + 0 -caribou-- + 1 --tiger--- + dtype: object + """ + if not isinstance(fillchar, str): + msg = f"fillchar must be a character, not {type(fillchar).__name__}" + raise TypeError(msg) + + if len(fillchar) != 1: + raise TypeError("fillchar must be a character, not str") + + if not is_integer(width): + msg = f"width must be of integer type, not {type(width).__name__}" + raise TypeError(msg) + + result = self._data.array._str_pad(width, side=side, fillchar=fillchar) + return self._wrap_result(result) + + _shared_docs[ + "str_pad" + ] = """ + Pad %(side)s side of strings in the Series/Index. + + Equivalent to :meth:`str.%(method)s`. + + Parameters + ---------- + width : int + Minimum width of resulting string; additional characters will be filled + with ``fillchar``. + fillchar : str + Additional character for filling, default is whitespace. + + Returns + ------- + filled : Series/Index of objects. + """ + + @Appender(_shared_docs["str_pad"] % {"side": "left and right", "method": "center"}) + @forbid_nonstring_types(["bytes"]) + def center(self, width, fillchar=" "): + return self.pad(width, side="both", fillchar=fillchar) + + @Appender(_shared_docs["str_pad"] % {"side": "right", "method": "ljust"}) + @forbid_nonstring_types(["bytes"]) + def ljust(self, width, fillchar=" "): + return self.pad(width, side="right", fillchar=fillchar) + + @Appender(_shared_docs["str_pad"] % {"side": "left", "method": "rjust"}) + @forbid_nonstring_types(["bytes"]) + def rjust(self, width, fillchar=" "): + return self.pad(width, side="left", fillchar=fillchar) + + @forbid_nonstring_types(["bytes"]) + def zfill(self, width): + """ + Pad strings in the Series/Index by prepending '0' characters. + + Strings in the Series/Index are padded with '0' characters on the + left of the string to reach a total string length `width`. Strings + in the Series/Index with length greater or equal to `width` are + unchanged. + + Parameters + ---------- + width : int + Minimum length of resulting string; strings with length less + than `width` be prepended with '0' characters. + + Returns + ------- + Series/Index of objects. + + See Also + -------- + Series.str.rjust : Fills the left side of strings with an arbitrary + character. + Series.str.ljust : Fills the right side of strings with an arbitrary + character. + Series.str.pad : Fills the specified sides of strings with an arbitrary + character. + Series.str.center : Fills both sides of strings with an arbitrary + character. + + Notes + ----- + Differs from :meth:`str.zfill` which has special handling + for '+'/'-' in the string. + + Examples + -------- + >>> s = pd.Series(['-1', '1', '1000', 10, np.nan]) + >>> s + 0 -1 + 1 1 + 2 1000 + 3 10 + 4 NaN + dtype: object + + Note that ``10`` and ``NaN`` are not strings, therefore they are + converted to ``NaN``. The minus sign in ``'-1'`` is treated as a + regular character and the zero is added to the left of it + (:meth:`str.zfill` would have moved it to the left). ``1000`` + remains unchanged as it is longer than `width`. + + >>> s.str.zfill(3) + 0 0-1 + 1 001 + 2 1000 + 3 NaN + 4 NaN + dtype: object + """ + result = self.pad(width, side="left", fillchar="0") + return self._wrap_result(result) + + def slice(self, start=None, stop=None, step=None): + """ + Slice substrings from each element in the Series or Index. + + Parameters + ---------- + start : int, optional + Start position for slice operation. + stop : int, optional + Stop position for slice operation. + step : int, optional + Step size for slice operation. + + Returns + ------- + Series or Index of object + Series or Index from sliced substring from original string object. + + See Also + -------- + Series.str.slice_replace : Replace a slice with a string. + Series.str.get : Return element at position. + Equivalent to `Series.str.slice(start=i, stop=i+1)` with `i` + being the position. + + Examples + -------- + >>> s = pd.Series(["koala", "dog", "chameleon"]) + >>> s + 0 koala + 1 dog + 2 chameleon + dtype: object + + >>> s.str.slice(start=1) + 0 oala + 1 og + 2 hameleon + dtype: object + + >>> s.str.slice(start=-1) + 0 a + 1 g + 2 n + dtype: object + + >>> s.str.slice(stop=2) + 0 ko + 1 do + 2 ch + dtype: object + + >>> s.str.slice(step=2) + 0 kaa + 1 dg + 2 caeen + dtype: object + + >>> s.str.slice(start=0, stop=5, step=3) + 0 kl + 1 d + 2 cm + dtype: object + + Equivalent behaviour to: + + >>> s.str[0:5:3] + 0 kl + 1 d + 2 cm + dtype: object + """ + result = self._data.array._str_slice(start, stop, step) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def slice_replace(self, start=None, stop=None, repl=None): + """ + Replace a positional slice of a string with another value. + + Parameters + ---------- + start : int, optional + Left index position to use for the slice. If not specified (None), + the slice is unbounded on the left, i.e. slice from the start + of the string. + stop : int, optional + Right index position to use for the slice. If not specified (None), + the slice is unbounded on the right, i.e. slice until the + end of the string. + repl : str, optional + String for replacement. If not specified (None), the sliced region + is replaced with an empty string. + + Returns + ------- + Series or Index + Same type as the original object. + + See Also + -------- + Series.str.slice : Just slicing without replacement. + + Examples + -------- + >>> s = pd.Series(['a', 'ab', 'abc', 'abdc', 'abcde']) + >>> s + 0 a + 1 ab + 2 abc + 3 abdc + 4 abcde + dtype: object + + Specify just `start`, meaning replace `start` until the end of the + string with `repl`. + + >>> s.str.slice_replace(1, repl='X') + 0 aX + 1 aX + 2 aX + 3 aX + 4 aX + dtype: object + + Specify just `stop`, meaning the start of the string to `stop` is replaced + with `repl`, and the rest of the string is included. + + >>> s.str.slice_replace(stop=2, repl='X') + 0 X + 1 X + 2 Xc + 3 Xdc + 4 Xcde + dtype: object + + Specify `start` and `stop`, meaning the slice from `start` to `stop` is + replaced with `repl`. Everything before or after `start` and `stop` is + included as is. + + >>> s.str.slice_replace(start=1, stop=3, repl='X') + 0 aX + 1 aX + 2 aX + 3 aXc + 4 aXde + dtype: object + """ + result = self._data.array._str_slice_replace(start, stop, repl) + return self._wrap_result(result) + + def decode(self, encoding, errors="strict"): + """ + Decode character string in the Series/Index using indicated encoding. + + Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in + python3. + + Parameters + ---------- + encoding : str + errors : str, optional + + Returns + ------- + Series or Index + """ + # TODO: Add a similar _bytes interface. + if encoding in _cpython_optimized_decoders: + # CPython optimized implementation + f = lambda x: x.decode(encoding, errors) + else: + decoder = codecs.getdecoder(encoding) + f = lambda x: decoder(x, errors)[0] + arr = self._data.array + # assert isinstance(arr, (StringArray,)) + result = arr._str_map(f) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def encode(self, encoding, errors="strict"): + """ + Encode character string in the Series/Index using indicated encoding. + + Equivalent to :meth:`str.encode`. + + Parameters + ---------- + encoding : str + errors : str, optional + + Returns + ------- + encoded : Series/Index of objects + """ + result = self._data.array._str_encode(encoding, errors) + return self._wrap_result(result, returns_string=False) + + _shared_docs[ + "str_strip" + ] = r""" + Remove %(position)s characters. + + Strip whitespaces (including newlines) or a set of specified characters + from each string in the Series/Index from %(side)s. + Equivalent to :meth:`str.%(method)s`. + + Parameters + ---------- + to_strip : str or None, default None + Specifying the set of characters to be removed. + All combinations of this set of characters will be stripped. + If None then whitespaces are removed. + + Returns + ------- + Series or Index of object + + See Also + -------- + Series.str.strip : Remove leading and trailing characters in Series/Index. + Series.str.lstrip : Remove leading characters in Series/Index. + Series.str.rstrip : Remove trailing characters in Series/Index. + + Examples + -------- + >>> s = pd.Series(['1. Ant. ', '2. Bee!\n', '3. Cat?\t', np.nan]) + >>> s + 0 1. Ant. + 1 2. Bee!\n + 2 3. Cat?\t + 3 NaN + dtype: object + + >>> s.str.strip() + 0 1. Ant. + 1 2. Bee! + 2 3. Cat? + 3 NaN + dtype: object + + >>> s.str.lstrip('123.') + 0 Ant. + 1 Bee!\n + 2 Cat?\t + 3 NaN + dtype: object + + >>> s.str.rstrip('.!? \n\t') + 0 1. Ant + 1 2. Bee + 2 3. Cat + 3 NaN + dtype: object + + >>> s.str.strip('123.!? \n\t') + 0 Ant + 1 Bee + 2 Cat + 3 NaN + dtype: object + """ + + @Appender( + _shared_docs["str_strip"] + % { + "side": "left and right sides", + "method": "strip", + "position": "leading and trailing", + } + ) + @forbid_nonstring_types(["bytes"]) + def strip(self, to_strip=None): + result = self._data.array._str_strip(to_strip) + return self._wrap_result(result) + + @Appender( + _shared_docs["str_strip"] + % {"side": "left side", "method": "lstrip", "position": "leading"} + ) + @forbid_nonstring_types(["bytes"]) + def lstrip(self, to_strip=None): + result = self._data.array._str_lstrip(to_strip) + return self._wrap_result(result) + + @Appender( + _shared_docs["str_strip"] + % {"side": "right side", "method": "rstrip", "position": "trailing"} + ) + @forbid_nonstring_types(["bytes"]) + def rstrip(self, to_strip=None): + result = self._data.array._str_rstrip(to_strip) + return self._wrap_result(result) + + _shared_docs[ + "str_removefix" + ] = r""" + Remove a %(side)s from an object series. If the %(side)s is not present, + the original string will be returned. + + Parameters + ---------- + %(side)s : str + Remove the %(side)s of the string. + + Returns + ------- + Series/Index: object + The Series or Index with given %(side)s removed. + + See Also + -------- + Series.str.remove%(other_side)s : Remove a %(other_side)s from an object series. + + Examples + -------- + >>> s = pd.Series(["str_foo", "str_bar", "no_prefix"]) + >>> s + 0 str_foo + 1 str_bar + 2 no_prefix + dtype: object + >>> s.str.removeprefix("str_") + 0 foo + 1 bar + 2 no_prefix + dtype: object + + >>> s = pd.Series(["foo_str", "bar_str", "no_suffix"]) + >>> s + 0 foo_str + 1 bar_str + 2 no_suffix + dtype: object + >>> s.str.removesuffix("_str") + 0 foo + 1 bar + 2 no_suffix + dtype: object + """ + + @Appender( + _shared_docs["str_removefix"] % {"side": "prefix", "other_side": "suffix"} + ) + @forbid_nonstring_types(["bytes"]) + def removeprefix(self, prefix): + result = self._data.array._str_removeprefix(prefix) + return self._wrap_result(result) + + @Appender( + _shared_docs["str_removefix"] % {"side": "suffix", "other_side": "prefix"} + ) + @forbid_nonstring_types(["bytes"]) + def removesuffix(self, suffix): + result = self._data.array._str_removesuffix(suffix) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def wrap(self, width, **kwargs): + r""" + Wrap strings in Series/Index at specified line width. + + This method has the same keyword parameters and defaults as + :class:`textwrap.TextWrapper`. + + Parameters + ---------- + width : int + Maximum line width. + expand_tabs : bool, optional + If True, tab characters will be expanded to spaces (default: True). + replace_whitespace : bool, optional + If True, each whitespace character (as defined by string.whitespace) + remaining after tab expansion will be replaced by a single space + (default: True). + drop_whitespace : bool, optional + If True, whitespace that, after wrapping, happens to end up at the + beginning or end of a line is dropped (default: True). + break_long_words : bool, optional + If True, then words longer than width will be broken in order to ensure + that no lines are longer than width. If it is false, long words will + not be broken, and some lines may be longer than width (default: True). + break_on_hyphens : bool, optional + If True, wrapping will occur preferably on whitespace and right after + hyphens in compound words, as it is customary in English. If false, + only whitespaces will be considered as potentially good places for line + breaks, but you need to set break_long_words to false if you want truly + insecable words (default: True). + + Returns + ------- + Series or Index + + Notes + ----- + Internally, this method uses a :class:`textwrap.TextWrapper` instance with + default settings. To achieve behavior matching R's stringr library str_wrap + function, use the arguments: + + - expand_tabs = False + - replace_whitespace = True + - drop_whitespace = True + - break_long_words = False + - break_on_hyphens = False + + Examples + -------- + >>> s = pd.Series(['line to be wrapped', 'another line to be wrapped']) + >>> s.str.wrap(12) + 0 line to be\nwrapped + 1 another line\nto be\nwrapped + dtype: object + """ + result = self._data.array._str_wrap(width, **kwargs) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def get_dummies(self, sep="|"): + """ + Return DataFrame of dummy/indicator variables for Series. + + Each string in Series is split by sep and returned as a DataFrame + of dummy/indicator variables. + + Parameters + ---------- + sep : str, default "|" + String to split on. + + Returns + ------- + DataFrame + Dummy variables corresponding to values of the Series. + + See Also + -------- + get_dummies : Convert categorical variable into dummy/indicator + variables. + + Examples + -------- + >>> pd.Series(['a|b', 'a', 'a|c']).str.get_dummies() + a b c + 0 1 1 0 + 1 1 0 0 + 2 1 0 1 + + >>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies() + a b c + 0 1 1 0 + 1 0 0 0 + 2 1 0 1 + """ + # we need to cast to Series of strings as only that has all + # methods available for making the dummies... + result, name = self._data.array._str_get_dummies(sep) + return self._wrap_result( + result, + name=name, + expand=True, + returns_string=False, + ) + + @forbid_nonstring_types(["bytes"]) + def translate(self, table): + """ + Map all characters in the string through the given mapping table. + + Equivalent to standard :meth:`str.translate`. + + Parameters + ---------- + table : dict + Table is a mapping of Unicode ordinals to Unicode ordinals, strings, or + None. Unmapped characters are left untouched. + Characters mapped to None are deleted. :meth:`str.maketrans` is a + helper function for making translation tables. + + Returns + ------- + Series or Index + """ + result = self._data.array._str_translate(table) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def count(self, pat, flags=0): + r""" + Count occurrences of pattern in each string of the Series/Index. + + This function is used to count the number of times a particular regex + pattern is repeated in each of the string elements of the + :class:`~pandas.Series`. + + Parameters + ---------- + pat : str + Valid regular expression. + flags : int, default 0, meaning no flags + Flags for the `re` module. For a complete list, `see here + `_. + **kwargs + For compatibility with other string methods. Not used. + + Returns + ------- + Series or Index + Same type as the calling object containing the integer counts. + + See Also + -------- + re : Standard library module for regular expressions. + str.count : Standard library version, without regular expression support. + + Notes + ----- + Some characters need to be escaped when passing in `pat`. + eg. ``'$'`` has a special meaning in regex and must be escaped when + finding this literal character. + + Examples + -------- + >>> s = pd.Series(['A', 'B', 'Aaba', 'Baca', np.nan, 'CABA', 'cat']) + >>> s.str.count('a') + 0 0.0 + 1 0.0 + 2 2.0 + 3 2.0 + 4 NaN + 5 0.0 + 6 1.0 + dtype: float64 + + Escape ``'$'`` to find the literal dollar sign. + + >>> s = pd.Series(['$', 'B', 'Aab$', '$$ca', 'C$B$', 'cat']) + >>> s.str.count('\\$') + 0 1 + 1 0 + 2 1 + 3 2 + 4 2 + 5 0 + dtype: int64 + + This is also available on Index + + >>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a') + Int64Index([0, 0, 2, 1], dtype='int64') + """ + result = self._data.array._str_count(pat, flags) + return self._wrap_result(result, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def startswith(self, pat, na=None): + """ + Test if the start of each string element matches a pattern. + + Equivalent to :meth:`str.startswith`. + + Parameters + ---------- + pat : str + Character sequence. Regular expressions are not accepted. + na : object, default NaN + Object shown if element tested is not a string. The default depends + on dtype of the array. For object-dtype, ``numpy.nan`` is used. + For ``StringDtype``, ``pandas.NA`` is used. + + Returns + ------- + Series or Index of bool + A Series of booleans indicating whether the given pattern matches + the start of each string element. + + See Also + -------- + str.startswith : Python standard library string method. + Series.str.endswith : Same as startswith, but tests the end of string. + Series.str.contains : Tests if string element contains a pattern. + + Examples + -------- + >>> s = pd.Series(['bat', 'Bear', 'cat', np.nan]) + >>> s + 0 bat + 1 Bear + 2 cat + 3 NaN + dtype: object + + >>> s.str.startswith('b') + 0 True + 1 False + 2 False + 3 NaN + dtype: object + + Specifying `na` to be `False` instead of `NaN`. + + >>> s.str.startswith('b', na=False) + 0 True + 1 False + 2 False + 3 False + dtype: bool + """ + result = self._data.array._str_startswith(pat, na=na) + return self._wrap_result(result, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def endswith(self, pat, na=None): + """ + Test if the end of each string element matches a pattern. + + Equivalent to :meth:`str.endswith`. + + Parameters + ---------- + pat : str + Character sequence. Regular expressions are not accepted. + na : object, default NaN + Object shown if element tested is not a string. The default depends + on dtype of the array. For object-dtype, ``numpy.nan`` is used. + For ``StringDtype``, ``pandas.NA`` is used. + + Returns + ------- + Series or Index of bool + A Series of booleans indicating whether the given pattern matches + the end of each string element. + + See Also + -------- + str.endswith : Python standard library string method. + Series.str.startswith : Same as endswith, but tests the start of string. + Series.str.contains : Tests if string element contains a pattern. + + Examples + -------- + >>> s = pd.Series(['bat', 'bear', 'caT', np.nan]) + >>> s + 0 bat + 1 bear + 2 caT + 3 NaN + dtype: object + + >>> s.str.endswith('t') + 0 True + 1 False + 2 False + 3 NaN + dtype: object + + Specifying `na` to be `False` instead of `NaN`. + + >>> s.str.endswith('t', na=False) + 0 True + 1 False + 2 False + 3 False + dtype: bool + """ + result = self._data.array._str_endswith(pat, na=na) + return self._wrap_result(result, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def findall(self, pat, flags=0): + """ + Find all occurrences of pattern or regular expression in the Series/Index. + + Equivalent to applying :func:`re.findall` to all the elements in the + Series/Index. + + Parameters + ---------- + pat : str + Pattern or regular expression. + flags : int, default 0 + Flags from ``re`` module, e.g. `re.IGNORECASE` (default is 0, which + means no flags). + + Returns + ------- + Series/Index of lists of strings + All non-overlapping matches of pattern or regular expression in each + string of this Series/Index. + + See Also + -------- + count : Count occurrences of pattern or regular expression in each string + of the Series/Index. + extractall : For each string in the Series, extract groups from all matches + of regular expression and return a DataFrame with one row for each + match and one column for each group. + re.findall : The equivalent ``re`` function to all non-overlapping matches + of pattern or regular expression in string, as a list of strings. + + Examples + -------- + >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit']) + + The search for the pattern 'Monkey' returns one match: + + >>> s.str.findall('Monkey') + 0 [] + 1 [Monkey] + 2 [] + dtype: object + + On the other hand, the search for the pattern 'MONKEY' doesn't return any + match: + + >>> s.str.findall('MONKEY') + 0 [] + 1 [] + 2 [] + dtype: object + + Flags can be added to the pattern or regular expression. For instance, + to find the pattern 'MONKEY' ignoring the case: + + >>> import re + >>> s.str.findall('MONKEY', flags=re.IGNORECASE) + 0 [] + 1 [Monkey] + 2 [] + dtype: object + + When the pattern matches more than one string in the Series, all matches + are returned: + + >>> s.str.findall('on') + 0 [on] + 1 [on] + 2 [] + dtype: object + + Regular expressions are supported too. For instance, the search for all the + strings ending with the word 'on' is shown next: + + >>> s.str.findall('on$') + 0 [on] + 1 [] + 2 [] + dtype: object + + If the pattern is found more than once in the same string, then a list of + multiple strings is returned: + + >>> s.str.findall('b') + 0 [] + 1 [] + 2 [b, b] + dtype: object + """ + result = self._data.array._str_findall(pat, flags) + return self._wrap_result(result, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def extract( + self, pat: str, flags: int = 0, expand: bool = True + ) -> DataFrame | Series | Index: + r""" + Extract capture groups in the regex `pat` as columns in a DataFrame. + + For each subject string in the Series, extract groups from the + first match of regular expression `pat`. + + Parameters + ---------- + pat : str + Regular expression pattern with capturing groups. + flags : int, default 0 (no flags) + Flags from the ``re`` module, e.g. ``re.IGNORECASE``, that + modify regular expression matching for things like case, + spaces, etc. For more details, see :mod:`re`. + expand : bool, default True + If True, return DataFrame with one column per capture group. + If False, return a Series/Index if there is one capture group + or DataFrame if there are multiple capture groups. + + Returns + ------- + DataFrame or Series or Index + A DataFrame with one row for each subject string, and one + column for each group. Any capture group names in regular + expression pat will be used for column names; otherwise + capture group numbers will be used. The dtype of each result + column is always object, even when no match is found. If + ``expand=False`` and pat has only one capture group, then + return a Series (if subject is a Series) or Index (if subject + is an Index). + + See Also + -------- + extractall : Returns all matches (not just the first match). + + Examples + -------- + A pattern with two groups will return a DataFrame with two columns. + Non-matches will be NaN. + + >>> s = pd.Series(['a1', 'b2', 'c3']) + >>> s.str.extract(r'([ab])(\d)') + 0 1 + 0 a 1 + 1 b 2 + 2 NaN NaN + + A pattern may contain optional groups. + + >>> s.str.extract(r'([ab])?(\d)') + 0 1 + 0 a 1 + 1 b 2 + 2 NaN 3 + + Named groups will become column names in the result. + + >>> s.str.extract(r'(?P[ab])(?P\d)') + letter digit + 0 a 1 + 1 b 2 + 2 NaN NaN + + A pattern with one group will return a DataFrame with one column + if expand=True. + + >>> s.str.extract(r'[ab](\d)', expand=True) + 0 + 0 1 + 1 2 + 2 NaN + + A pattern with one group will return a Series if expand=False. + + >>> s.str.extract(r'[ab](\d)', expand=False) + 0 1 + 1 2 + 2 NaN + dtype: object + """ + from pandas import DataFrame + + if not isinstance(expand, bool): + raise ValueError("expand must be True or False") + + regex = re.compile(pat, flags=flags) + if regex.groups == 0: + raise ValueError("pattern contains no capture groups") + + if not expand and regex.groups > 1 and isinstance(self._data, ABCIndex): + raise ValueError("only one regex group is supported with Index") + + obj = self._data + result_dtype = _result_dtype(obj) + + returns_df = regex.groups > 1 or expand + + if returns_df: + name = None + columns = _get_group_names(regex) + + if obj.array.size == 0: + result = DataFrame(columns=columns, dtype=result_dtype) + + else: + result_list = self._data.array._str_extract( + pat, flags=flags, expand=returns_df + ) + + result_index: Index | None + if isinstance(obj, ABCSeries): + result_index = obj.index + else: + result_index = None + + result = DataFrame( + result_list, columns=columns, index=result_index, dtype=result_dtype + ) + + else: + name = _get_single_group_name(regex) + result = self._data.array._str_extract(pat, flags=flags, expand=returns_df) + return self._wrap_result(result, name=name) + + @forbid_nonstring_types(["bytes"]) + def extractall(self, pat, flags=0): + r""" + Extract capture groups in the regex `pat` as columns in DataFrame. + + For each subject string in the Series, extract groups from all + matches of regular expression pat. When each subject string in the + Series has exactly one match, extractall(pat).xs(0, level='match') + is the same as extract(pat). + + Parameters + ---------- + pat : str + Regular expression pattern with capturing groups. + flags : int, default 0 (no flags) + A ``re`` module flag, for example ``re.IGNORECASE``. These allow + to modify regular expression matching for things like case, spaces, + etc. Multiple flags can be combined with the bitwise OR operator, + for example ``re.IGNORECASE | re.MULTILINE``. + + Returns + ------- + DataFrame + A ``DataFrame`` with one row for each match, and one column for each + group. Its rows have a ``MultiIndex`` with first levels that come from + the subject ``Series``. The last level is named 'match' and indexes the + matches in each item of the ``Series``. Any capture group names in + regular expression pat will be used for column names; otherwise capture + group numbers will be used. + + See Also + -------- + extract : Returns first match only (not all matches). + + Examples + -------- + A pattern with one group will return a DataFrame with one column. + Indices with no matches will not appear in the result. + + >>> s = pd.Series(["a1a2", "b1", "c1"], index=["A", "B", "C"]) + >>> s.str.extractall(r"[ab](\d)") + 0 + match + A 0 1 + 1 2 + B 0 1 + + Capture group names are used for column names of the result. + + >>> s.str.extractall(r"[ab](?P\d)") + digit + match + A 0 1 + 1 2 + B 0 1 + + A pattern with two groups will return a DataFrame with two columns. + + >>> s.str.extractall(r"(?P[ab])(?P\d)") + letter digit + match + A 0 a 1 + 1 a 2 + B 0 b 1 + + Optional groups that do not match are NaN in the result. + + >>> s.str.extractall(r"(?P[ab])?(?P\d)") + letter digit + match + A 0 a 1 + 1 a 2 + B 0 b 1 + C 0 NaN 1 + """ + # TODO: dispatch + return str_extractall(self._orig, pat, flags) + + _shared_docs[ + "find" + ] = """ + Return %(side)s indexes in each strings in the Series/Index. + + Each of returned indexes corresponds to the position where the + substring is fully contained between [start:end]. Return -1 on + failure. Equivalent to standard :meth:`str.%(method)s`. + + Parameters + ---------- + sub : str + Substring being searched. + start : int + Left edge index. + end : int + Right edge index. + + Returns + ------- + Series or Index of int. + + See Also + -------- + %(also)s + """ + + @Appender( + _shared_docs["find"] + % { + "side": "lowest", + "method": "find", + "also": "rfind : Return highest indexes in each strings.", + } + ) + @forbid_nonstring_types(["bytes"]) + def find(self, sub, start=0, end=None): + if not isinstance(sub, str): + msg = f"expected a string object, not {type(sub).__name__}" + raise TypeError(msg) + + result = self._data.array._str_find(sub, start, end) + return self._wrap_result(result, returns_string=False) + + @Appender( + _shared_docs["find"] + % { + "side": "highest", + "method": "rfind", + "also": "find : Return lowest indexes in each strings.", + } + ) + @forbid_nonstring_types(["bytes"]) + def rfind(self, sub, start=0, end=None): + if not isinstance(sub, str): + msg = f"expected a string object, not {type(sub).__name__}" + raise TypeError(msg) + + result = self._data.array._str_rfind(sub, start=start, end=end) + return self._wrap_result(result, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def normalize(self, form): + """ + Return the Unicode normal form for the strings in the Series/Index. + + For more information on the forms, see the + :func:`unicodedata.normalize`. + + Parameters + ---------- + form : {'NFC', 'NFKC', 'NFD', 'NFKD'} + Unicode form. + + Returns + ------- + normalized : Series/Index of objects + """ + result = self._data.array._str_normalize(form) + return self._wrap_result(result) + + _shared_docs[ + "index" + ] = """ + Return %(side)s indexes in each string in Series/Index. + + Each of the returned indexes corresponds to the position where the + substring is fully contained between [start:end]. This is the same + as ``str.%(similar)s`` except instead of returning -1, it raises a + ValueError when the substring is not found. Equivalent to standard + ``str.%(method)s``. + + Parameters + ---------- + sub : str + Substring being searched. + start : int + Left edge index. + end : int + Right edge index. + + Returns + ------- + Series or Index of object + + See Also + -------- + %(also)s + """ + + @Appender( + _shared_docs["index"] + % { + "side": "lowest", + "similar": "find", + "method": "index", + "also": "rindex : Return highest indexes in each strings.", + } + ) + @forbid_nonstring_types(["bytes"]) + def index(self, sub, start=0, end=None): + if not isinstance(sub, str): + msg = f"expected a string object, not {type(sub).__name__}" + raise TypeError(msg) + + result = self._data.array._str_index(sub, start=start, end=end) + return self._wrap_result(result, returns_string=False) + + @Appender( + _shared_docs["index"] + % { + "side": "highest", + "similar": "rfind", + "method": "rindex", + "also": "index : Return lowest indexes in each strings.", + } + ) + @forbid_nonstring_types(["bytes"]) + def rindex(self, sub, start=0, end=None): + if not isinstance(sub, str): + msg = f"expected a string object, not {type(sub).__name__}" + raise TypeError(msg) + + result = self._data.array._str_rindex(sub, start=start, end=end) + return self._wrap_result(result, returns_string=False) + + def len(self): + """ + Compute the length of each element in the Series/Index. + + The element may be a sequence (such as a string, tuple or list) or a collection + (such as a dictionary). + + Returns + ------- + Series or Index of int + A Series or Index of integer values indicating the length of each + element in the Series or Index. + + See Also + -------- + str.len : Python built-in function returning the length of an object. + Series.size : Returns the length of the Series. + + Examples + -------- + Returns the length (number of characters) in a string. Returns the + number of entries for dictionaries, lists or tuples. + + >>> s = pd.Series(['dog', + ... '', + ... 5, + ... {'foo' : 'bar'}, + ... [2, 3, 5, 7], + ... ('one', 'two', 'three')]) + >>> s + 0 dog + 1 + 2 5 + 3 {'foo': 'bar'} + 4 [2, 3, 5, 7] + 5 (one, two, three) + dtype: object + >>> s.str.len() + 0 3.0 + 1 0.0 + 2 NaN + 3 1.0 + 4 4.0 + 5 3.0 + dtype: float64 + """ + result = self._data.array._str_len() + return self._wrap_result(result, returns_string=False) + + _shared_docs[ + "casemethods" + ] = """ + Convert strings in the Series/Index to %(type)s. + %(version)s + Equivalent to :meth:`str.%(method)s`. + + Returns + ------- + Series or Index of object + + See Also + -------- + Series.str.lower : Converts all characters to lowercase. + Series.str.upper : Converts all characters to uppercase. + Series.str.title : Converts first character of each word to uppercase and + remaining to lowercase. + Series.str.capitalize : Converts first character to uppercase and + remaining to lowercase. + Series.str.swapcase : Converts uppercase to lowercase and lowercase to + uppercase. + Series.str.casefold: Removes all case distinctions in the string. + + Examples + -------- + >>> s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe']) + >>> s + 0 lower + 1 CAPITALS + 2 this is a sentence + 3 SwApCaSe + dtype: object + + >>> s.str.lower() + 0 lower + 1 capitals + 2 this is a sentence + 3 swapcase + dtype: object + + >>> s.str.upper() + 0 LOWER + 1 CAPITALS + 2 THIS IS A SENTENCE + 3 SWAPCASE + dtype: object + + >>> s.str.title() + 0 Lower + 1 Capitals + 2 This Is A Sentence + 3 Swapcase + dtype: object + + >>> s.str.capitalize() + 0 Lower + 1 Capitals + 2 This is a sentence + 3 Swapcase + dtype: object + + >>> s.str.swapcase() + 0 LOWER + 1 capitals + 2 THIS IS A SENTENCE + 3 sWaPcAsE + dtype: object + """ + # Types: + # cases: + # upper, lower, title, capitalize, swapcase, casefold + # boolean: + # isalpha, isnumeric isalnum isdigit isdecimal isspace islower isupper istitle + # _doc_args holds dict of strings to use in substituting casemethod docs + _doc_args: dict[str, dict[str, str]] = {} + _doc_args["lower"] = {"type": "lowercase", "method": "lower", "version": ""} + _doc_args["upper"] = {"type": "uppercase", "method": "upper", "version": ""} + _doc_args["title"] = {"type": "titlecase", "method": "title", "version": ""} + _doc_args["capitalize"] = { + "type": "be capitalized", + "method": "capitalize", + "version": "", + } + _doc_args["swapcase"] = { + "type": "be swapcased", + "method": "swapcase", + "version": "", + } + _doc_args["casefold"] = { + "type": "be casefolded", + "method": "casefold", + "version": "\n .. versionadded:: 0.25.0\n", + } + + @Appender(_shared_docs["casemethods"] % _doc_args["lower"]) + @forbid_nonstring_types(["bytes"]) + def lower(self): + result = self._data.array._str_lower() + return self._wrap_result(result) + + @Appender(_shared_docs["casemethods"] % _doc_args["upper"]) + @forbid_nonstring_types(["bytes"]) + def upper(self): + result = self._data.array._str_upper() + return self._wrap_result(result) + + @Appender(_shared_docs["casemethods"] % _doc_args["title"]) + @forbid_nonstring_types(["bytes"]) + def title(self): + result = self._data.array._str_title() + return self._wrap_result(result) + + @Appender(_shared_docs["casemethods"] % _doc_args["capitalize"]) + @forbid_nonstring_types(["bytes"]) + def capitalize(self): + result = self._data.array._str_capitalize() + return self._wrap_result(result) + + @Appender(_shared_docs["casemethods"] % _doc_args["swapcase"]) + @forbid_nonstring_types(["bytes"]) + def swapcase(self): + result = self._data.array._str_swapcase() + return self._wrap_result(result) + + @Appender(_shared_docs["casemethods"] % _doc_args["casefold"]) + @forbid_nonstring_types(["bytes"]) + def casefold(self): + result = self._data.array._str_casefold() + return self._wrap_result(result) + + _shared_docs[ + "ismethods" + ] = """ + Check whether all characters in each string are %(type)s. + + This is equivalent to running the Python string method + :meth:`str.%(method)s` for each element of the Series/Index. If a string + has zero characters, ``False`` is returned for that check. + + Returns + ------- + Series or Index of bool + Series or Index of boolean values with the same length as the original + Series/Index. + + See Also + -------- + Series.str.isalpha : Check whether all characters are alphabetic. + Series.str.isnumeric : Check whether all characters are numeric. + Series.str.isalnum : Check whether all characters are alphanumeric. + Series.str.isdigit : Check whether all characters are digits. + Series.str.isdecimal : Check whether all characters are decimal. + Series.str.isspace : Check whether all characters are whitespace. + Series.str.islower : Check whether all characters are lowercase. + Series.str.isupper : Check whether all characters are uppercase. + Series.str.istitle : Check whether all characters are titlecase. + + Examples + -------- + **Checks for Alphabetic and Numeric Characters** + + >>> s1 = pd.Series(['one', 'one1', '1', '']) + + >>> s1.str.isalpha() + 0 True + 1 False + 2 False + 3 False + dtype: bool + + >>> s1.str.isnumeric() + 0 False + 1 False + 2 True + 3 False + dtype: bool + + >>> s1.str.isalnum() + 0 True + 1 True + 2 True + 3 False + dtype: bool + + Note that checks against characters mixed with any additional punctuation + or whitespace will evaluate to false for an alphanumeric check. + + >>> s2 = pd.Series(['A B', '1.5', '3,000']) + >>> s2.str.isalnum() + 0 False + 1 False + 2 False + dtype: bool + + **More Detailed Checks for Numeric Characters** + + There are several different but overlapping sets of numeric characters that + can be checked for. + + >>> s3 = pd.Series(['23', '³', '⅕', '']) + + The ``s3.str.isdecimal`` method checks for characters used to form numbers + in base 10. + + >>> s3.str.isdecimal() + 0 True + 1 False + 2 False + 3 False + dtype: bool + + The ``s.str.isdigit`` method is the same as ``s3.str.isdecimal`` but also + includes special digits, like superscripted and subscripted digits in + unicode. + + >>> s3.str.isdigit() + 0 True + 1 True + 2 False + 3 False + dtype: bool + + The ``s.str.isnumeric`` method is the same as ``s3.str.isdigit`` but also + includes other characters that can represent quantities such as unicode + fractions. + + >>> s3.str.isnumeric() + 0 True + 1 True + 2 True + 3 False + dtype: bool + + **Checks for Whitespace** + + >>> s4 = pd.Series([' ', '\\t\\r\\n ', '']) + >>> s4.str.isspace() + 0 True + 1 True + 2 False + dtype: bool + + **Checks for Character Case** + + >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) + + >>> s5.str.islower() + 0 True + 1 False + 2 False + 3 False + dtype: bool + + >>> s5.str.isupper() + 0 False + 1 False + 2 True + 3 False + dtype: bool + + The ``s5.str.istitle`` method checks for whether all words are in title + case (whether only the first letter of each word is capitalized). Words are + assumed to be as any sequence of non-numeric characters separated by + whitespace characters. + + >>> s5.str.istitle() + 0 False + 1 True + 2 False + 3 False + dtype: bool + """ + _doc_args["isalnum"] = {"type": "alphanumeric", "method": "isalnum"} + _doc_args["isalpha"] = {"type": "alphabetic", "method": "isalpha"} + _doc_args["isdigit"] = {"type": "digits", "method": "isdigit"} + _doc_args["isspace"] = {"type": "whitespace", "method": "isspace"} + _doc_args["islower"] = {"type": "lowercase", "method": "islower"} + _doc_args["isupper"] = {"type": "uppercase", "method": "isupper"} + _doc_args["istitle"] = {"type": "titlecase", "method": "istitle"} + _doc_args["isnumeric"] = {"type": "numeric", "method": "isnumeric"} + _doc_args["isdecimal"] = {"type": "decimal", "method": "isdecimal"} + # force _noarg_wrapper return type with dtype=np.dtype(bool) (GH 29624) + + isalnum = _map_and_wrap( + "isalnum", docstring=_shared_docs["ismethods"] % _doc_args["isalnum"] + ) + isalpha = _map_and_wrap( + "isalpha", docstring=_shared_docs["ismethods"] % _doc_args["isalpha"] + ) + isdigit = _map_and_wrap( + "isdigit", docstring=_shared_docs["ismethods"] % _doc_args["isdigit"] + ) + isspace = _map_and_wrap( + "isspace", docstring=_shared_docs["ismethods"] % _doc_args["isspace"] + ) + islower = _map_and_wrap( + "islower", docstring=_shared_docs["ismethods"] % _doc_args["islower"] + ) + isupper = _map_and_wrap( + "isupper", docstring=_shared_docs["ismethods"] % _doc_args["isupper"] + ) + istitle = _map_and_wrap( + "istitle", docstring=_shared_docs["ismethods"] % _doc_args["istitle"] + ) + isnumeric = _map_and_wrap( + "isnumeric", docstring=_shared_docs["ismethods"] % _doc_args["isnumeric"] + ) + isdecimal = _map_and_wrap( + "isdecimal", docstring=_shared_docs["ismethods"] % _doc_args["isdecimal"] + ) + + +def cat_safe(list_of_columns: list, sep: str): + """ + Auxiliary function for :meth:`str.cat`. + + Same signature as cat_core, but handles TypeErrors in concatenation, which + happen if the arrays in list_of columns have the wrong dtypes or content. + + Parameters + ---------- + list_of_columns : list of numpy arrays + List of arrays to be concatenated with sep; + these arrays may not contain NaNs! + sep : string + The separator string for concatenating the columns. + + Returns + ------- + nd.array + The concatenation of list_of_columns with sep. + """ + try: + result = cat_core(list_of_columns, sep) + except TypeError: + # if there are any non-string values (wrong dtype or hidden behind + # object dtype), np.sum will fail; catch and return with better message + for column in list_of_columns: + dtype = lib.infer_dtype(column, skipna=True) + if dtype not in ["string", "empty"]: + raise TypeError( + "Concatenation requires list-likes containing only " + "strings (or missing values). Offending values found in " + f"column {dtype}" + ) from None + return result + + +def cat_core(list_of_columns: list, sep: str): + """ + Auxiliary function for :meth:`str.cat` + + Parameters + ---------- + list_of_columns : list of numpy arrays + List of arrays to be concatenated with sep; + these arrays may not contain NaNs! + sep : string + The separator string for concatenating the columns. + + Returns + ------- + nd.array + The concatenation of list_of_columns with sep. + """ + if sep == "": + # no need to interleave sep if it is empty + arr_of_cols = np.asarray(list_of_columns, dtype=object) + return np.sum(arr_of_cols, axis=0) + list_with_sep = [sep] * (2 * len(list_of_columns) - 1) + list_with_sep[::2] = list_of_columns + arr_with_sep = np.asarray(list_with_sep, dtype=object) + return np.sum(arr_with_sep, axis=0) + + +def _result_dtype(arr): + # workaround #27953 + # ideally we just pass `dtype=arr.dtype` unconditionally, but this fails + # when the list of values is empty. + from pandas.core.arrays.string_ import StringDtype + + if isinstance(arr.dtype, StringDtype): + return arr.dtype + else: + return object + + +def _get_single_group_name(regex: re.Pattern) -> Hashable: + if regex.groupindex: + return next(iter(regex.groupindex)) + else: + return None + + +def _get_group_names(regex: re.Pattern) -> list[Hashable]: + """ + Get named groups from compiled regex. + + Unnamed groups are numbered. + + Parameters + ---------- + regex : compiled regex + + Returns + ------- + list of column labels + """ + names = {v: k for k, v in regex.groupindex.items()} + return [names.get(1 + i, i) for i in range(regex.groups)] + + +def str_extractall(arr, pat, flags=0): + regex = re.compile(pat, flags=flags) + # the regex must contain capture groups. + if regex.groups == 0: + raise ValueError("pattern contains no capture groups") + + if isinstance(arr, ABCIndex): + arr = arr.to_series().reset_index(drop=True) + + columns = _get_group_names(regex) + match_list = [] + index_list = [] + is_mi = arr.index.nlevels > 1 + + for subject_key, subject in arr.items(): + if isinstance(subject, str): + + if not is_mi: + subject_key = (subject_key,) + + for match_i, match_tuple in enumerate(regex.findall(subject)): + if isinstance(match_tuple, str): + match_tuple = (match_tuple,) + na_tuple = [np.NaN if group == "" else group for group in match_tuple] + match_list.append(na_tuple) + result_key = tuple(subject_key + (match_i,)) + index_list.append(result_key) + + from pandas import MultiIndex + + index = MultiIndex.from_tuples(index_list, names=arr.index.names + ["match"]) + dtype = _result_dtype(arr) + + result = arr._constructor_expanddim( + match_list, index=index, columns=columns, dtype=dtype + ) + return result diff --git a/.local/lib/python3.8/site-packages/pandas/core/strings/base.py b/.local/lib/python3.8/site-packages/pandas/core/strings/base.py new file mode 100644 index 0000000..ef0c3f8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/strings/base.py @@ -0,0 +1,248 @@ +from __future__ import annotations + +import abc +from collections.abc import Callable # noqa: PDF001 +import re +from typing import TYPE_CHECKING + +import numpy as np + +from pandas._typing import Scalar + +if TYPE_CHECKING: + from pandas import Series + + +class BaseStringArrayMethods(abc.ABC): + """ + Base class for extension arrays implementing string methods. + + This is where our ExtensionArrays can override the implementation of + Series.str.. We don't expect this to work with + 3rd-party extension arrays. + + * User calls Series.str. + * pandas extracts the extension array from the Series + * pandas calls ``extension_array._str_(*args, **kwargs)`` + * pandas wraps the result, to return to the user. + + See :ref:`Series.str` for the docstring of each method. + """ + + def _str_getitem(self, key): + if isinstance(key, slice): + return self._str_slice(start=key.start, stop=key.stop, step=key.step) + else: + return self._str_get(key) + + @abc.abstractmethod + def _str_count(self, pat, flags=0): + pass + + @abc.abstractmethod + def _str_pad(self, width, side="left", fillchar=" "): + pass + + @abc.abstractmethod + def _str_contains(self, pat, case=True, flags=0, na=None, regex=True): + pass + + @abc.abstractmethod + def _str_startswith(self, pat, na=None): + pass + + @abc.abstractmethod + def _str_endswith(self, pat, na=None): + pass + + @abc.abstractmethod + def _str_replace( + self, + pat: str | re.Pattern, + repl: str | Callable, + n: int = -1, + case: bool = True, + flags: int = 0, + regex: bool = True, + ): + pass + + @abc.abstractmethod + def _str_repeat(self, repeats): + pass + + @abc.abstractmethod + def _str_match( + self, pat: str, case: bool = True, flags: int = 0, na: Scalar = np.nan + ): + pass + + @abc.abstractmethod + def _str_fullmatch( + self, + pat: str | re.Pattern, + case: bool = True, + flags: int = 0, + na: Scalar = np.nan, + ): + pass + + @abc.abstractmethod + def _str_encode(self, encoding, errors="strict"): + pass + + @abc.abstractmethod + def _str_find(self, sub, start=0, end=None): + pass + + @abc.abstractmethod + def _str_rfind(self, sub, start=0, end=None): + pass + + @abc.abstractmethod + def _str_findall(self, pat, flags=0): + pass + + @abc.abstractmethod + def _str_get(self, i): + pass + + @abc.abstractmethod + def _str_index(self, sub, start=0, end=None): + pass + + @abc.abstractmethod + def _str_rindex(self, sub, start=0, end=None): + pass + + @abc.abstractmethod + def _str_join(self, sep): + pass + + @abc.abstractmethod + def _str_partition(self, sep, expand): + pass + + @abc.abstractmethod + def _str_rpartition(self, sep, expand): + pass + + @abc.abstractmethod + def _str_len(self): + pass + + @abc.abstractmethod + def _str_slice(self, start=None, stop=None, step=None): + pass + + @abc.abstractmethod + def _str_slice_replace(self, start=None, stop=None, repl=None): + pass + + @abc.abstractmethod + def _str_translate(self, table): + pass + + @abc.abstractmethod + def _str_wrap(self, width, **kwargs): + pass + + @abc.abstractmethod + def _str_get_dummies(self, sep="|"): + pass + + @abc.abstractmethod + def _str_isalnum(self): + pass + + @abc.abstractmethod + def _str_isalpha(self): + pass + + @abc.abstractmethod + def _str_isdecimal(self): + pass + + @abc.abstractmethod + def _str_isdigit(self): + pass + + @abc.abstractmethod + def _str_islower(self): + pass + + @abc.abstractmethod + def _str_isnumeric(self): + pass + + @abc.abstractmethod + def _str_isspace(self): + pass + + @abc.abstractmethod + def _str_istitle(self): + pass + + @abc.abstractmethod + def _str_isupper(self): + pass + + @abc.abstractmethod + def _str_capitalize(self): + pass + + @abc.abstractmethod + def _str_casefold(self): + pass + + @abc.abstractmethod + def _str_title(self): + pass + + @abc.abstractmethod + def _str_swapcase(self): + pass + + @abc.abstractmethod + def _str_lower(self): + pass + + @abc.abstractmethod + def _str_upper(self): + pass + + @abc.abstractmethod + def _str_normalize(self, form): + pass + + @abc.abstractmethod + def _str_strip(self, to_strip=None): + pass + + @abc.abstractmethod + def _str_lstrip(self, to_strip=None): + pass + + @abc.abstractmethod + def _str_rstrip(self, to_strip=None): + pass + + @abc.abstractmethod + def _str_removeprefix(self, prefix: str) -> Series: + pass + + @abc.abstractmethod + def _str_removesuffix(self, suffix: str) -> Series: + pass + + @abc.abstractmethod + def _str_split(self, pat=None, n=-1, expand=False): + pass + + @abc.abstractmethod + def _str_rsplit(self, pat=None, n=-1): + pass + + @abc.abstractmethod + def _str_extract(self, pat: str, flags: int = 0, expand: bool = True): + pass diff --git a/.local/lib/python3.8/site-packages/pandas/core/strings/object_array.py b/.local/lib/python3.8/site-packages/pandas/core/strings/object_array.py new file mode 100644 index 0000000..6b0380a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/strings/object_array.py @@ -0,0 +1,483 @@ +from __future__ import annotations + +from collections.abc import Callable # noqa: PDF001 +import re +import textwrap +from typing import TYPE_CHECKING +import unicodedata + +import numpy as np + +import pandas._libs.lib as lib +import pandas._libs.missing as libmissing +import pandas._libs.ops as libops +from pandas._typing import ( + NpDtype, + Scalar, +) + +from pandas.core.dtypes.common import is_scalar +from pandas.core.dtypes.missing import isna + +from pandas.core.strings.base import BaseStringArrayMethods + +if TYPE_CHECKING: + from pandas import Series + + +class ObjectStringArrayMixin(BaseStringArrayMethods): + """ + String Methods operating on object-dtype ndarrays. + """ + + _str_na_value = np.nan + + def __len__(self): + # For typing, _str_map relies on the object being sized. + raise NotImplementedError + + def _str_map( + self, f, na_value=None, dtype: NpDtype | None = None, convert: bool = True + ): + """ + Map a callable over valid elements of the array. + + Parameters + ---------- + f : Callable + A function to call on each non-NA element. + na_value : Scalar, optional + The value to set for NA values. Might also be used for the + fill value if the callable `f` raises an exception. + This defaults to ``self._str_na_value`` which is ``np.nan`` + for object-dtype and Categorical and ``pd.NA`` for StringArray. + dtype : Dtype, optional + The dtype of the result array. + convert : bool, default True + Whether to call `maybe_convert_objects` on the resulting ndarray + """ + if dtype is None: + dtype = np.dtype("object") + if na_value is None: + na_value = self._str_na_value + + if not len(self): + return np.ndarray(0, dtype=dtype) + + arr = np.asarray(self, dtype=object) + mask = isna(arr) + map_convert = convert and not np.all(mask) + try: + result = lib.map_infer_mask(arr, f, mask.view(np.uint8), map_convert) + except (TypeError, AttributeError) as err: + # Reraise the exception if callable `f` got wrong number of args. + # The user may want to be warned by this, instead of getting NaN + p_err = ( + r"((takes)|(missing)) (?(2)from \d+ to )?\d+ " + r"(?(3)required )positional arguments?" + ) + + if len(err.args) >= 1 and re.search(p_err, err.args[0]): + # FIXME: this should be totally avoidable + raise err + + def g(x): + # This type of fallback behavior can be removed once + # we remove object-dtype .str accessor. + try: + return f(x) + except (TypeError, AttributeError): + return na_value + + return self._str_map(g, na_value=na_value, dtype=dtype) + if not isinstance(result, np.ndarray): + return result + if na_value is not np.nan: + np.putmask(result, mask, na_value) + if convert and result.dtype == object: + result = lib.maybe_convert_objects(result) + return result + + def _str_count(self, pat, flags=0): + regex = re.compile(pat, flags=flags) + f = lambda x: len(regex.findall(x)) + return self._str_map(f, dtype="int64") + + def _str_pad(self, width, side="left", fillchar=" "): + if side == "left": + f = lambda x: x.rjust(width, fillchar) + elif side == "right": + f = lambda x: x.ljust(width, fillchar) + elif side == "both": + f = lambda x: x.center(width, fillchar) + else: # pragma: no cover + raise ValueError("Invalid side") + return self._str_map(f) + + def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex: bool = True): + if regex: + if not case: + flags |= re.IGNORECASE + + pat = re.compile(pat, flags=flags) + + f = lambda x: pat.search(x) is not None + else: + if case: + f = lambda x: pat in x + else: + upper_pat = pat.upper() + f = lambda x: upper_pat in x.upper() + return self._str_map(f, na, dtype=np.dtype("bool")) + + def _str_startswith(self, pat, na=None): + f = lambda x: x.startswith(pat) + return self._str_map(f, na_value=na, dtype=np.dtype(bool)) + + def _str_endswith(self, pat, na=None): + f = lambda x: x.endswith(pat) + return self._str_map(f, na_value=na, dtype=np.dtype(bool)) + + def _str_replace( + self, + pat: str | re.Pattern, + repl: str | Callable, + n: int = -1, + case: bool = True, + flags: int = 0, + regex: bool = True, + ): + if case is False: + # add case flag, if provided + flags |= re.IGNORECASE + + if regex or flags or callable(repl): + if not isinstance(pat, re.Pattern): + if regex is False: + pat = re.escape(pat) + pat = re.compile(pat, flags=flags) + + n = n if n >= 0 else 0 + f = lambda x: pat.sub(repl=repl, string=x, count=n) + else: + f = lambda x: x.replace(pat, repl, n) + + return self._str_map(f, dtype=str) + + def _str_repeat(self, repeats): + if is_scalar(repeats): + + def scalar_rep(x): + try: + return bytes.__mul__(x, repeats) + except TypeError: + return str.__mul__(x, repeats) + + return self._str_map(scalar_rep, dtype=str) + else: + from pandas.core.arrays.string_ import BaseStringArray + + def rep(x, r): + if x is libmissing.NA: + return x + try: + return bytes.__mul__(x, r) + except TypeError: + return str.__mul__(x, r) + + repeats = np.asarray(repeats, dtype=object) + result = libops.vec_binop(np.asarray(self), repeats, rep) + if isinstance(self, BaseStringArray): + # Not going through map, so we have to do this here. + result = type(self)._from_sequence(result) + return result + + def _str_match( + self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None + ): + if not case: + flags |= re.IGNORECASE + + regex = re.compile(pat, flags=flags) + + f = lambda x: regex.match(x) is not None + return self._str_map(f, na_value=na, dtype=np.dtype(bool)) + + def _str_fullmatch( + self, + pat: str | re.Pattern, + case: bool = True, + flags: int = 0, + na: Scalar | None = None, + ): + if not case: + flags |= re.IGNORECASE + + regex = re.compile(pat, flags=flags) + + f = lambda x: regex.fullmatch(x) is not None + return self._str_map(f, na_value=na, dtype=np.dtype(bool)) + + def _str_encode(self, encoding, errors="strict"): + f = lambda x: x.encode(encoding, errors=errors) + return self._str_map(f, dtype=object) + + def _str_find(self, sub, start=0, end=None): + return self._str_find_(sub, start, end, side="left") + + def _str_rfind(self, sub, start=0, end=None): + return self._str_find_(sub, start, end, side="right") + + def _str_find_(self, sub, start, end, side): + if side == "left": + method = "find" + elif side == "right": + method = "rfind" + else: # pragma: no cover + raise ValueError("Invalid side") + + if end is None: + f = lambda x: getattr(x, method)(sub, start) + else: + f = lambda x: getattr(x, method)(sub, start, end) + return self._str_map(f, dtype="int64") + + def _str_findall(self, pat, flags=0): + regex = re.compile(pat, flags=flags) + return self._str_map(regex.findall, dtype="object") + + def _str_get(self, i): + def f(x): + if isinstance(x, dict): + return x.get(i) + elif len(x) > i >= -len(x): + return x[i] + return self._str_na_value + + return self._str_map(f) + + def _str_index(self, sub, start=0, end=None): + if end: + f = lambda x: x.index(sub, start, end) + else: + f = lambda x: x.index(sub, start, end) + return self._str_map(f, dtype="int64") + + def _str_rindex(self, sub, start=0, end=None): + if end: + f = lambda x: x.rindex(sub, start, end) + else: + f = lambda x: x.rindex(sub, start, end) + return self._str_map(f, dtype="int64") + + def _str_join(self, sep): + return self._str_map(sep.join) + + def _str_partition(self, sep, expand): + result = self._str_map(lambda x: x.partition(sep), dtype="object") + return result + + def _str_rpartition(self, sep, expand): + return self._str_map(lambda x: x.rpartition(sep), dtype="object") + + def _str_len(self): + return self._str_map(len, dtype="int64") + + def _str_slice(self, start=None, stop=None, step=None): + obj = slice(start, stop, step) + return self._str_map(lambda x: x[obj]) + + def _str_slice_replace(self, start=None, stop=None, repl=None): + if repl is None: + repl = "" + + def f(x): + if x[start:stop] == "": + local_stop = start + else: + local_stop = stop + y = "" + if start is not None: + y += x[:start] + y += repl + if stop is not None: + y += x[local_stop:] + return y + + return self._str_map(f) + + def _str_split( + self, + pat: str | re.Pattern | None = None, + n=-1, + expand=False, + regex: bool | None = None, + ): + if pat is None: + if n is None or n == 0: + n = -1 + f = lambda x: x.split(pat, n) + else: + new_pat: str | re.Pattern + if regex is True or isinstance(pat, re.Pattern): + new_pat = re.compile(pat) + elif regex is False: + new_pat = pat + # regex is None so link to old behavior #43563 + else: + if len(pat) == 1: + new_pat = pat + else: + new_pat = re.compile(pat) + + if isinstance(new_pat, re.Pattern): + if n is None or n == -1: + n = 0 + f = lambda x: new_pat.split(x, maxsplit=n) + else: + if n is None or n == 0: + n = -1 + f = lambda x: x.split(pat, n) + return self._str_map(f, dtype=object) + + def _str_rsplit(self, pat=None, n=-1): + if n is None or n == 0: + n = -1 + f = lambda x: x.rsplit(pat, n) + return self._str_map(f, dtype="object") + + def _str_translate(self, table): + return self._str_map(lambda x: x.translate(table)) + + def _str_wrap(self, width, **kwargs): + kwargs["width"] = width + tw = textwrap.TextWrapper(**kwargs) + return self._str_map(lambda s: "\n".join(tw.wrap(s))) + + def _str_get_dummies(self, sep="|"): + from pandas import Series + + arr = Series(self).fillna("") + try: + arr = sep + arr + sep + except TypeError: + arr = sep + arr.astype(str) + sep + + tags: set[str] = set() + for ts in Series(arr).str.split(sep): + tags.update(ts) + tags2 = sorted(tags - {""}) + + dummies = np.empty((len(arr), len(tags2)), dtype=np.int64) + + for i, t in enumerate(tags2): + pat = sep + t + sep + dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x) + return dummies, tags2 + + def _str_upper(self): + return self._str_map(lambda x: x.upper()) + + def _str_isalnum(self): + return self._str_map(str.isalnum, dtype="bool") + + def _str_isalpha(self): + return self._str_map(str.isalpha, dtype="bool") + + def _str_isdecimal(self): + return self._str_map(str.isdecimal, dtype="bool") + + def _str_isdigit(self): + return self._str_map(str.isdigit, dtype="bool") + + def _str_islower(self): + return self._str_map(str.islower, dtype="bool") + + def _str_isnumeric(self): + return self._str_map(str.isnumeric, dtype="bool") + + def _str_isspace(self): + return self._str_map(str.isspace, dtype="bool") + + def _str_istitle(self): + return self._str_map(str.istitle, dtype="bool") + + def _str_isupper(self): + return self._str_map(str.isupper, dtype="bool") + + def _str_capitalize(self): + return self._str_map(str.capitalize) + + def _str_casefold(self): + return self._str_map(str.casefold) + + def _str_title(self): + return self._str_map(str.title) + + def _str_swapcase(self): + return self._str_map(str.swapcase) + + def _str_lower(self): + return self._str_map(str.lower) + + def _str_normalize(self, form): + f = lambda x: unicodedata.normalize(form, x) + return self._str_map(f) + + def _str_strip(self, to_strip=None): + return self._str_map(lambda x: x.strip(to_strip)) + + def _str_lstrip(self, to_strip=None): + return self._str_map(lambda x: x.lstrip(to_strip)) + + def _str_rstrip(self, to_strip=None): + return self._str_map(lambda x: x.rstrip(to_strip)) + + def _str_removeprefix(self, prefix: str) -> Series: + # outstanding question on whether to use native methods for users + # on Python 3.9+ https://git.io/JE9QK, in which case we could do + # return self._str_map(str.removeprefix) + + def removeprefix(text: str) -> str: + if text.startswith(prefix): + return text[len(prefix) :] + return text + + return self._str_map(removeprefix) + + def _str_removesuffix(self, suffix: str) -> Series: + # this could be used on Python 3.9+ + # f = lambda x: x.removesuffix(suffix) + # return self._str_map(str.removesuffix) + + def removesuffix(text: str) -> str: + if text.endswith(suffix): + return text[: -len(suffix)] + return text + + return self._str_map(removesuffix) + + def _str_extract(self, pat: str, flags: int = 0, expand: bool = True): + regex = re.compile(pat, flags=flags) + na_value = self._str_na_value + + if not expand: + + def g(x): + m = regex.search(x) + return m.groups()[0] if m else na_value + + return self._str_map(g, convert=False) + + empty_row = [na_value] * regex.groups + + def f(x): + if not isinstance(x, str): + return empty_row + m = regex.search(x) + if m: + return [na_value if item is None else item for item in m.groups()] + else: + return empty_row + + return [f(val) for val in np.asarray(self)] diff --git a/.local/lib/python3.8/site-packages/pandas/core/tools/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..e9ed8e1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/datetimes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/datetimes.cpython-38.pyc new file mode 100644 index 0000000..adc8df5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/datetimes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/numeric.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/numeric.cpython-38.pyc new file mode 100644 index 0000000..ae189ff Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/numeric.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/timedeltas.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/timedeltas.cpython-38.pyc new file mode 100644 index 0000000..4052be8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/timedeltas.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/times.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/times.cpython-38.pyc new file mode 100644 index 0000000..7a38c63 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/tools/__pycache__/times.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/tools/datetimes.py b/.local/lib/python3.8/site-packages/pandas/core/tools/datetimes.py new file mode 100644 index 0000000..4d9420f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/tools/datetimes.py @@ -0,0 +1,1268 @@ +from __future__ import annotations + +from collections import abc +from datetime import datetime +from functools import partial +from itertools import islice +from typing import ( + TYPE_CHECKING, + Callable, + Hashable, + List, + Tuple, + TypeVar, + Union, + overload, +) +import warnings + +import numpy as np + +from pandas._libs import tslib +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + Timedelta, + Timestamp, + conversion, + iNaT, + nat_strings, + parsing, +) +from pandas._libs.tslibs.parsing import ( # noqa:F401 + DateParseError, + format_is_iso, + guess_datetime_format, +) +from pandas._libs.tslibs.strptime import array_strptime +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + Timezone, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + ensure_object, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64tz_dtype, + is_float, + is_integer, + is_integer_dtype, + is_list_like, + is_numeric_dtype, + is_scalar, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.missing import notna + +from pandas.arrays import ( + DatetimeArray, + IntegerArray, +) +from pandas.core import algorithms +from pandas.core.algorithms import unique +from pandas.core.arrays.datetimes import ( + maybe_convert_dtype, + objects_to_datetime64ns, + tz_to_dtype, +) +from pandas.core.construction import extract_array +from pandas.core.indexes.base import Index +from pandas.core.indexes.datetimes import DatetimeIndex + +if TYPE_CHECKING: + from pandas._libs.tslibs.nattype import NaTType + + from pandas import Series + +# --------------------------------------------------------------------- +# types used in annotations + +ArrayConvertible = Union[List, Tuple, AnyArrayLike, "Series"] +Scalar = Union[int, float, str] +DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime) +DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, ArrayConvertible] +start_caching_at = 50 + + +# --------------------------------------------------------------------- + + +def _guess_datetime_format_for_array(arr, **kwargs): + # Try to guess the format based on the first non-NaN element + non_nan_elements = notna(arr).nonzero()[0] + if len(non_nan_elements): + return guess_datetime_format(arr[non_nan_elements[0]], **kwargs) + + +def should_cache( + arg: ArrayConvertible, unique_share: float = 0.7, check_count: int | None = None +) -> bool: + """ + Decides whether to do caching. + + If the percent of unique elements among `check_count` elements less + than `unique_share * 100` then we can do caching. + + Parameters + ---------- + arg: listlike, tuple, 1-d array, Series + unique_share: float, default=0.7, optional + 0 < unique_share < 1 + check_count: int, optional + 0 <= check_count <= len(arg) + + Returns + ------- + do_caching: bool + + Notes + ----- + By default for a sequence of less than 50 items in size, we don't do + caching; for the number of elements less than 5000, we take ten percent of + all elements to check for a uniqueness share; if the sequence size is more + than 5000, then we check only the first 500 elements. + All constants were chosen empirically by. + """ + do_caching = True + + # default realization + if check_count is None: + # in this case, the gain from caching is negligible + if len(arg) <= start_caching_at: + return False + + if len(arg) <= 5000: + check_count = len(arg) // 10 + else: + check_count = 500 + else: + assert ( + 0 <= check_count <= len(arg) + ), "check_count must be in next bounds: [0; len(arg)]" + if check_count == 0: + return False + + assert 0 < unique_share < 1, "unique_share must be in next bounds: (0; 1)" + + try: + # We can't cache if the items are not hashable. + unique_elements = set(islice(arg, check_count)) + except TypeError: + return False + if len(unique_elements) > check_count * unique_share: + do_caching = False + return do_caching + + +def _maybe_cache( + arg: ArrayConvertible, + format: str | None, + cache: bool, + convert_listlike: Callable, +) -> Series: + """ + Create a cache of unique dates from an array of dates + + Parameters + ---------- + arg : listlike, tuple, 1-d array, Series + format : string + Strftime format to parse time + cache : bool + True attempts to create a cache of converted values + convert_listlike : function + Conversion function to apply on dates + + Returns + ------- + cache_array : Series + Cache of converted, unique dates. Can be empty + """ + from pandas import Series + + cache_array = Series(dtype=object) + + if cache: + # Perform a quicker unique check + if not should_cache(arg): + return cache_array + + unique_dates = unique(arg) + if len(unique_dates) < len(arg): + cache_dates = convert_listlike(unique_dates, format) + cache_array = Series(cache_dates, index=unique_dates) + # GH#39882 and GH#35888 in case of None and NaT we get duplicates + if not cache_array.index.is_unique: + cache_array = cache_array[~cache_array.index.duplicated()] + return cache_array + + +def _box_as_indexlike( + dt_array: ArrayLike, utc: bool | None = None, name: Hashable = None +) -> Index: + """ + Properly boxes the ndarray of datetimes to DatetimeIndex + if it is possible or to generic Index instead + + Parameters + ---------- + dt_array: 1-d array + Array of datetimes to be wrapped in an Index. + tz : object + None or 'utc' + name : string, default None + Name for a resulting index + + Returns + ------- + result : datetime of converted dates + - DatetimeIndex if convertible to sole datetime64 type + - general Index otherwise + """ + + if is_datetime64_dtype(dt_array): + tz = "utc" if utc else None + return DatetimeIndex(dt_array, tz=tz, name=name) + return Index(dt_array, name=name, dtype=dt_array.dtype) + + +def _convert_and_box_cache( + arg: DatetimeScalarOrArrayConvertible, + cache_array: Series, + name: str | None = None, +) -> Index: + """ + Convert array of dates with a cache and wrap the result in an Index. + + Parameters + ---------- + arg : integer, float, string, datetime, list, tuple, 1-d array, Series + cache_array : Series + Cache of converted, unique dates + name : string, default None + Name for a DatetimeIndex + + Returns + ------- + result : Index-like of converted dates + """ + from pandas import Series + + result = Series(arg).map(cache_array) + return _box_as_indexlike(result._values, utc=None, name=name) + + +def _return_parsed_timezone_results(result: np.ndarray, timezones, tz, name) -> Index: + """ + Return results from array_strptime if a %z or %Z directive was passed. + + Parameters + ---------- + result : ndarray[int64] + int64 date representations of the dates + timezones : ndarray + pytz timezone objects + tz : object + None or pytz timezone object + name : string, default None + Name for a DatetimeIndex + + Returns + ------- + tz_result : Index-like of parsed dates with timezone + """ + tz_results = np.array( + [Timestamp(res).tz_localize(zone) for res, zone in zip(result, timezones)] + ) + if tz is not None: + # Convert to the same tz + tz_results = np.array([tz_result.tz_convert(tz) for tz_result in tz_results]) + + return Index(tz_results, name=name) + + +def _convert_listlike_datetimes( + arg, + format: str | None, + name: Hashable = None, + tz: Timezone | None = None, + unit: str | None = None, + errors: str = "raise", + infer_datetime_format: bool = False, + dayfirst: bool | None = None, + yearfirst: bool | None = None, + exact: bool = True, +): + """ + Helper function for to_datetime. Performs the conversions of 1D listlike + of dates + + Parameters + ---------- + arg : list, tuple, ndarray, Series, Index + date to be parsed + name : object + None or string for the Index name + tz : object + None or 'utc' + unit : str + None or string of the frequency of the passed data + errors : str + error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' + infer_datetime_format : bool, default False + inferring format behavior from to_datetime + dayfirst : bool + dayfirst parsing behavior from to_datetime + yearfirst : bool + yearfirst parsing behavior from to_datetime + exact : bool, default True + exact format matching behavior from to_datetime + + Returns + ------- + Index-like of parsed dates + """ + if isinstance(arg, (list, tuple)): + arg = np.array(arg, dtype="O") + + arg_dtype = getattr(arg, "dtype", None) + # these are shortcutable + if is_datetime64tz_dtype(arg_dtype): + if not isinstance(arg, (DatetimeArray, DatetimeIndex)): + return DatetimeIndex(arg, tz=tz, name=name) + if tz == "utc": + arg = arg.tz_convert(None).tz_localize(tz) + return arg + + elif is_datetime64_ns_dtype(arg_dtype): + if not isinstance(arg, (DatetimeArray, DatetimeIndex)): + try: + return DatetimeIndex(arg, tz=tz, name=name) + except ValueError: + pass + elif tz: + # DatetimeArray, DatetimeIndex + return arg.tz_localize(tz) + + return arg + + elif unit is not None: + if format is not None: + raise ValueError("cannot specify both format and unit") + return _to_datetime_with_unit(arg, unit, name, tz, errors) + elif getattr(arg, "ndim", 1) > 1: + raise TypeError( + "arg must be a string, datetime, list, tuple, 1-d array, or Series" + ) + + # warn if passing timedelta64, raise for PeriodDtype + # NB: this must come after unit transformation + orig_arg = arg + try: + arg, _ = maybe_convert_dtype(arg, copy=False) + except TypeError: + if errors == "coerce": + npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) + return DatetimeIndex(npvalues, name=name) + elif errors == "ignore": + idx = Index(arg, name=name) + return idx + raise + + arg = ensure_object(arg) + require_iso8601 = False + + if infer_datetime_format and format is None: + format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) + + if format is not None: + # There is a special fast-path for iso8601 formatted + # datetime strings, so in those cases don't use the inferred + # format because this path makes process slower in this + # special case + format_is_iso8601 = format_is_iso(format) + if format_is_iso8601: + require_iso8601 = not infer_datetime_format + format = None + + if format is not None: + res = _to_datetime_with_format( + arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format + ) + if res is not None: + return res + + assert format is None or infer_datetime_format + utc = tz == "utc" + result, tz_parsed = objects_to_datetime64ns( + arg, + dayfirst=dayfirst, + yearfirst=yearfirst, + utc=utc, + errors=errors, + require_iso8601=require_iso8601, + allow_object=True, + ) + + if tz_parsed is not None: + # We can take a shortcut since the datetime64 numpy array + # is in UTC + dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed)) + return DatetimeIndex._simple_new(dta, name=name) + + utc = tz == "utc" + return _box_as_indexlike(result, utc=utc, name=name) + + +def _array_strptime_with_fallback( + arg, + name, + tz, + fmt: str, + exact: bool, + errors: str, + infer_datetime_format: bool, +) -> Index | None: + """ + Call array_strptime, with fallback behavior depending on 'errors'. + """ + utc = tz == "utc" + + try: + result, timezones = array_strptime(arg, fmt, exact=exact, errors=errors) + if "%Z" in fmt or "%z" in fmt: + return _return_parsed_timezone_results(result, timezones, tz, name) + except OutOfBoundsDatetime: + if errors == "raise": + raise + elif errors == "coerce": + result = np.empty(arg.shape, dtype="M8[ns]") + iresult = result.view("i8") + iresult.fill(iNaT) + else: + result = arg + except ValueError: + # if fmt was inferred, try falling back + # to array_to_datetime - terminate here + # for specified formats + if not infer_datetime_format: + if errors == "raise": + raise + elif errors == "coerce": + result = np.empty(arg.shape, dtype="M8[ns]") + iresult = result.view("i8") + iresult.fill(iNaT) + else: + result = arg + else: + # Indicates to the caller to fallback to objects_to_datetime64ns + return None + + return _box_as_indexlike(result, utc=utc, name=name) + + +def _to_datetime_with_format( + arg, + orig_arg, + name, + tz, + fmt: str, + exact: bool, + errors: str, + infer_datetime_format: bool, +) -> Index | None: + """ + Try parsing with the given format, returning None on failure. + """ + result = None + try: + # shortcut formatting here + if fmt == "%Y%m%d": + # pass orig_arg as float-dtype may have been converted to + # datetime64[ns] + orig_arg = ensure_object(orig_arg) + try: + # may return None without raising + result = _attempt_YYYYMMDD(orig_arg, errors=errors) + except (ValueError, TypeError, OutOfBoundsDatetime) as err: + raise ValueError( + "cannot convert the input to '%Y%m%d' date format" + ) from err + if result is not None: + utc = tz == "utc" + return _box_as_indexlike(result, utc=utc, name=name) + + # fallback + res = _array_strptime_with_fallback( + arg, name, tz, fmt, exact, errors, infer_datetime_format + ) + return res + + except ValueError as err: + # Fallback to try to convert datetime objects if timezone-aware + # datetime objects are found without passing `utc=True` + try: + values, tz = conversion.datetime_to_datetime64(arg) + dta = DatetimeArray(values, dtype=tz_to_dtype(tz)) + return DatetimeIndex._simple_new(dta, name=name) + except (ValueError, TypeError): + raise err + + +def _to_datetime_with_unit(arg, unit, name, tz, errors: str) -> Index: + """ + to_datetime specalized to the case where a 'unit' is passed. + """ + arg = extract_array(arg, extract_numpy=True) + + # GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime + # because it expects an ndarray argument + if isinstance(arg, IntegerArray): + arr = arg.astype(f"datetime64[{unit}]") + tz_parsed = None + else: + arg = np.asarray(arg) + arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) + + if errors == "ignore": + # Index constructor _may_ infer to DatetimeIndex + result = Index._with_infer(arr, name=name) + else: + result = DatetimeIndex(arr, name=name) + + if not isinstance(result, DatetimeIndex): + return result + + # GH#23758: We may still need to localize the result with tz + # GH#25546: Apply tz_parsed first (from arg), then tz (from caller) + # result will be naive but in UTC + result = result.tz_localize("UTC").tz_convert(tz_parsed) + + if tz is not None: + if result.tz is None: + result = result.tz_localize(tz) + else: + result = result.tz_convert(tz) + return result + + +def _adjust_to_origin(arg, origin, unit): + """ + Helper function for to_datetime. + Adjust input argument to the specified origin + + Parameters + ---------- + arg : list, tuple, ndarray, Series, Index + date to be adjusted + origin : 'julian' or Timestamp + origin offset for the arg + unit : str + passed unit from to_datetime, must be 'D' + + Returns + ------- + ndarray or scalar of adjusted date(s) + """ + if origin == "julian": + original = arg + j0 = Timestamp(0).to_julian_date() + if unit != "D": + raise ValueError("unit must be 'D' for origin='julian'") + try: + arg = arg - j0 + except TypeError as err: + raise ValueError( + "incompatible 'arg' type for given 'origin'='julian'" + ) from err + + # preemptively check this for a nice range + j_max = Timestamp.max.to_julian_date() - j0 + j_min = Timestamp.min.to_julian_date() - j0 + if np.any(arg > j_max) or np.any(arg < j_min): + raise OutOfBoundsDatetime( + f"{original} is Out of Bounds for origin='julian'" + ) + else: + # arg must be numeric + if not ( + (is_scalar(arg) and (is_integer(arg) or is_float(arg))) + or is_numeric_dtype(np.asarray(arg)) + ): + raise ValueError( + f"'{arg}' is not compatible with origin='{origin}'; " + "it must be numeric with a unit specified" + ) + + # we are going to offset back to unix / epoch time + try: + offset = Timestamp(origin) + except OutOfBoundsDatetime as err: + raise OutOfBoundsDatetime(f"origin {origin} is Out of Bounds") from err + except ValueError as err: + raise ValueError( + f"origin {origin} cannot be converted to a Timestamp" + ) from err + + if offset.tz is not None: + raise ValueError(f"origin offset {offset} must be tz-naive") + td_offset = offset - Timestamp(0) + + # convert the offset to the unit of the arg + # this should be lossless in terms of precision + ioffset = td_offset // Timedelta(1, unit=unit) + + # scalars & ndarray-like can handle the addition + if is_list_like(arg) and not isinstance(arg, (ABCSeries, Index, np.ndarray)): + arg = np.asarray(arg) + arg = arg + ioffset + return arg + + +@overload +def to_datetime( + arg: DatetimeScalar, + errors: str = ..., + dayfirst: bool = ..., + yearfirst: bool = ..., + utc: bool | None = ..., + format: str | None = ..., + exact: bool = ..., + unit: str | None = ..., + infer_datetime_format: bool = ..., + origin=..., + cache: bool = ..., +) -> DatetimeScalar | NaTType: + ... + + +@overload +def to_datetime( + arg: Series, + errors: str = ..., + dayfirst: bool = ..., + yearfirst: bool = ..., + utc: bool | None = ..., + format: str | None = ..., + exact: bool = ..., + unit: str | None = ..., + infer_datetime_format: bool = ..., + origin=..., + cache: bool = ..., +) -> Series: + ... + + +@overload +def to_datetime( + arg: list | tuple | np.ndarray, + errors: str = ..., + dayfirst: bool = ..., + yearfirst: bool = ..., + utc: bool | None = ..., + format: str | None = ..., + exact: bool = ..., + unit: str | None = ..., + infer_datetime_format: bool = ..., + origin=..., + cache: bool = ..., +) -> DatetimeIndex: + ... + + +def to_datetime( + arg: DatetimeScalarOrArrayConvertible, + errors: str = "raise", + dayfirst: bool = False, + yearfirst: bool = False, + utc: bool | None = None, + format: str | None = None, + exact: bool = True, + unit: str | None = None, + infer_datetime_format: bool = False, + origin="unix", + cache: bool = True, +) -> DatetimeIndex | Series | DatetimeScalar | NaTType | None: + """ + Convert argument to datetime. + + This function converts a scalar, array-like, :class:`Series` or + :class:`DataFrame`/dict-like to a pandas datetime object. + + Parameters + ---------- + arg : int, float, str, datetime, list, tuple, 1-d array, Series, DataFrame/dict-like + The object to convert to a datetime. If a :class:`DataFrame` is provided, the + method expects minimally the following columns: :const:`"year"`, + :const:`"month"`, :const:`"day"`. + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If :const:`'raise'`, then invalid parsing will raise an exception. + - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT`. + - If :const:`'ignore'`, then invalid parsing will return the input. + dayfirst : bool, default False + Specify a date parse order if `arg` is str or is list-like. + If :const:`True`, parses dates with the day first, e.g. :const:`"10/11/12"` + is parsed as :const:`2012-11-10`. + + .. warning:: + + ``dayfirst=True`` is not strict, but will prefer to parse + with day first. If a delimited date string cannot be parsed in + accordance with the given `dayfirst` option, e.g. + ``to_datetime(['31-12-2021'])``, then a warning will be shown. + + yearfirst : bool, default False + Specify a date parse order if `arg` is str or is list-like. + + - If :const:`True` parses dates with the year first, e.g. + :const:`"10/11/12"` is parsed as :const:`2010-11-12`. + - If both `dayfirst` and `yearfirst` are :const:`True`, `yearfirst` is + preceded (same as :mod:`dateutil`). + + .. warning:: + + ``yearfirst=True`` is not strict, but will prefer to parse + with year first. + + utc : bool, default None + Control timezone-related parsing, localization and conversion. + + - If :const:`True`, the function *always* returns a timezone-aware + UTC-localized :class:`Timestamp`, :class:`Series` or + :class:`DatetimeIndex`. To do this, timezone-naive inputs are + *localized* as UTC, while timezone-aware inputs are *converted* to UTC. + + - If :const:`False` (default), inputs will not be coerced to UTC. + Timezone-naive inputs will remain naive, while timezone-aware ones + will keep their time offsets. Limitations exist for mixed + offsets (typically, daylight savings), see :ref:`Examples + ` section for details. + + See also: pandas general documentation about `timezone conversion and + localization + `_. + + format : str, default None + The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. Note that + :const:`"%f"` will parse all the way up to nanoseconds. See + `strftime documentation + `_ for more information on choices. + exact : bool, default True + Control how `format` is used: + + - If :const:`True`, require an exact `format` match. + - If :const:`False`, allow the `format` to match anywhere in the target + string. + + unit : str, default 'ns' + The unit of the arg (D,s,ms,us,ns) denote the unit, which is an + integer or float number. This will be based off the origin. + Example, with ``unit='ms'`` and ``origin='unix'`` (the default), this + would calculate the number of milliseconds to the unix epoch start. + infer_datetime_format : bool, default False + If :const:`True` and no `format` is given, attempt to infer the format + of the datetime strings based on the first non-NaN element, + and if it can be inferred, switch to a faster method of parsing them. + In some cases this can increase the parsing speed by ~5-10x. + origin : scalar, default 'unix' + Define the reference date. The numeric values would be parsed as number + of units (defined by `unit`) since this reference date. + + - If :const:`'unix'` (or POSIX) time; origin is set to 1970-01-01. + - If :const:`'julian'`, unit must be :const:`'D'`, and origin is set to + beginning of Julian Calendar. Julian day number :const:`0` is assigned + to the day starting at noon on January 1, 4713 BC. + - If Timestamp convertible, origin is set to Timestamp identified by + origin. + cache : bool, default True + If :const:`True`, use a cache of unique, converted dates to apply the + datetime conversion. May produce significant speed-up when parsing + duplicate date strings, especially ones with timezone offsets. The cache + is only used when there are at least 50 values. The presence of + out-of-bounds values will render the cache unusable and may slow down + parsing. + + .. versionchanged:: 0.25.0 + changed default value from :const:`False` to :const:`True`. + + Returns + ------- + datetime + If parsing succeeded. + Return type depends on input (types in parenthesis correspond to + fallback in case of unsuccessful timezone or out-of-range timestamp + parsing): + + - scalar: :class:`Timestamp` (or :class:`datetime.datetime`) + - array-like: :class:`DatetimeIndex` (or :class:`Series` with + :class:`object` dtype containing :class:`datetime.datetime`) + - Series: :class:`Series` of :class:`datetime64` dtype (or + :class:`Series` of :class:`object` dtype containing + :class:`datetime.datetime`) + - DataFrame: :class:`Series` of :class:`datetime64` dtype (or + :class:`Series` of :class:`object` dtype containing + :class:`datetime.datetime`) + + Raises + ------ + ParserError + When parsing a date from string fails. + ValueError + When another datetime conversion error happens. For example when one + of 'year', 'month', day' columns is missing in a :class:`DataFrame`, or + when a Timezone-aware :class:`datetime.datetime` is found in an array-like + of mixed time offsets, and ``utc=False``. + + See Also + -------- + DataFrame.astype : Cast argument to a specified dtype. + to_timedelta : Convert argument to timedelta. + convert_dtypes : Convert dtypes. + + Notes + ----- + + Many input types are supported, and lead to different output types: + + - **scalars** can be int, float, str, datetime object (from stdlib :mod:`datetime` + module or :mod:`numpy`). They are converted to :class:`Timestamp` when + possible, otherwise they are converted to :class:`datetime.datetime`. + None/NaN/null scalars are converted to :const:`NaT`. + + - **array-like** can contain int, float, str, datetime objects. They are + converted to :class:`DatetimeIndex` when possible, otherwise they are + converted to :class:`Index` with :class:`object` dtype, containing + :class:`datetime.datetime`. None/NaN/null entries are converted to + :const:`NaT` in both cases. + + - **Series** are converted to :class:`Series` with :class:`datetime64` + dtype when possible, otherwise they are converted to :class:`Series` with + :class:`object` dtype, containing :class:`datetime.datetime`. None/NaN/null + entries are converted to :const:`NaT` in both cases. + + - **DataFrame/dict-like** are converted to :class:`Series` with + :class:`datetime64` dtype. For each row a datetime is created from assembling + the various dataframe columns. Column keys can be common abbreviations + like [‘year’, ‘month’, ‘day’, ‘minute’, ‘second’, ‘ms’, ‘us’, ‘ns’]) or + plurals of the same. + + The following causes are responsible for :class:`datetime.datetime` objects + being returned (possibly inside an :class:`Index` or a :class:`Series` with + :class:`object` dtype) instead of a proper pandas designated type + (:class:`Timestamp`, :class:`DatetimeIndex` or :class:`Series` + with :class:`datetime64` dtype): + + - when any input element is before :const:`Timestamp.min` or after + :const:`Timestamp.max`, see `timestamp limitations + `_. + + - when ``utc=False`` (default) and the input is an array-like or + :class:`Series` containing mixed naive/aware datetime, or aware with mixed + time offsets. Note that this happens in the (quite frequent) situation when + the timezone has a daylight savings policy. In that case you may wish to + use ``utc=True``. + + Examples + -------- + + **Handling various input formats** + + Assembling a datetime from multiple columns of a :class:`DataFrame`. The keys + can be common abbreviations like ['year', 'month', 'day', 'minute', 'second', + 'ms', 'us', 'ns']) or plurals of the same + + >>> df = pd.DataFrame({'year': [2015, 2016], + ... 'month': [2, 3], + ... 'day': [4, 5]}) + >>> pd.to_datetime(df) + 0 2015-02-04 + 1 2016-03-05 + dtype: datetime64[ns] + + Passing ``infer_datetime_format=True`` can often-times speedup a parsing + if its not an ISO8601 format exactly, but in a regular format. + + >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000) + >>> s.head() + 0 3/11/2000 + 1 3/12/2000 + 2 3/13/2000 + 3 3/11/2000 + 4 3/12/2000 + dtype: object + + >>> %timeit pd.to_datetime(s, infer_datetime_format=True) # doctest: +SKIP + 100 loops, best of 3: 10.4 ms per loop + + >>> %timeit pd.to_datetime(s, infer_datetime_format=False) # doctest: +SKIP + 1 loop, best of 3: 471 ms per loop + + Using a unix epoch time + + >>> pd.to_datetime(1490195805, unit='s') + Timestamp('2017-03-22 15:16:45') + >>> pd.to_datetime(1490195805433502912, unit='ns') + Timestamp('2017-03-22 15:16:45.433502912') + + .. warning:: For float arg, precision rounding might happen. To prevent + unexpected behavior use a fixed-width exact type. + + Using a non-unix epoch origin + + >>> pd.to_datetime([1, 2, 3], unit='D', + ... origin=pd.Timestamp('1960-01-01')) + DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], + dtype='datetime64[ns]', freq=None) + + **Non-convertible date/times** + + If a date does not meet the `timestamp limitations + `_, passing ``errors='ignore'`` + will return the original input instead of raising any exception. + + Passing ``errors='coerce'`` will force an out-of-bounds date to :const:`NaT`, + in addition to forcing non-dates (or non-parseable dates) to :const:`NaT`. + + >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore') + datetime.datetime(1300, 1, 1, 0, 0) + >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') + NaT + + .. _to_datetime_tz_examples: + + **Timezones and time offsets** + + The default behaviour (``utc=False``) is as follows: + + - Timezone-naive inputs are converted to timezone-naive :class:`DatetimeIndex`: + + >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00:15']) + DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'], + dtype='datetime64[ns]', freq=None) + + - Timezone-aware inputs *with constant time offset* are converted to + timezone-aware :class:`DatetimeIndex`: + + >>> pd.to_datetime(['2018-10-26 12:00 -0500', '2018-10-26 13:00 -0500']) + DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'], + dtype='datetime64[ns, pytz.FixedOffset(-300)]', freq=None) + + - However, timezone-aware inputs *with mixed time offsets* (for example + issued from a timezone with daylight savings, such as Europe/Paris) + are **not successfully converted** to a :class:`DatetimeIndex`. Instead a + simple :class:`Index` containing :class:`datetime.datetime` objects is + returned: + + >>> pd.to_datetime(['2020-10-25 02:00 +0200', '2020-10-25 04:00 +0100']) + Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00], + dtype='object') + + - A mix of timezone-aware and timezone-naive inputs is converted to + a timezone-aware :class:`DatetimeIndex` if the offsets of the timezone-aware + are constant: + + >>> from datetime import datetime + >>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)]) + DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'], + dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None) + + - Finally, mixing timezone-aware strings and :class:`datetime.datetime` always + raises an error, even if the elements all have the same time offset. + + >>> from datetime import datetime, timezone, timedelta + >>> d = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1))) + >>> pd.to_datetime(["2020-01-01 17:00 -0100", d]) + Traceback (most recent call last): + ... + ValueError: Tz-aware datetime.datetime cannot be converted to datetime64 + unless utc=True + + | + + Setting ``utc=True`` solves most of the above issues: + + - Timezone-naive inputs are *localized* as UTC + + >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00'], utc=True) + DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + + - Timezone-aware inputs are *converted* to UTC (the output represents the + exact same datetime, but viewed from the UTC time offset `+00:00`). + + >>> pd.to_datetime(['2018-10-26 12:00 -0530', '2018-10-26 12:00 -0500'], + ... utc=True) + DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + + - Inputs can contain both naive and aware, string or datetime, the above + rules still apply + + >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 12:00 -0530', + ... datetime(2020, 1, 1, 18), + ... datetime(2020, 1, 1, 18, + ... tzinfo=timezone(-timedelta(hours=1)))], + ... utc=True) + DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 17:30:00+00:00', + '2020-01-01 18:00:00+00:00', '2020-01-01 19:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + """ + if arg is None: + return None + + if origin != "unix": + arg = _adjust_to_origin(arg, origin, unit) + + tz = "utc" if utc else None + convert_listlike = partial( + _convert_listlike_datetimes, + tz=tz, + unit=unit, + dayfirst=dayfirst, + yearfirst=yearfirst, + errors=errors, + exact=exact, + infer_datetime_format=infer_datetime_format, + ) + + result: Timestamp | NaTType | Series | Index + + if isinstance(arg, Timestamp): + result = arg + if tz is not None: + if arg.tz is not None: + result = arg.tz_convert(tz) + else: + result = arg.tz_localize(tz) + elif isinstance(arg, ABCSeries): + cache_array = _maybe_cache(arg, format, cache, convert_listlike) + if not cache_array.empty: + result = arg.map(cache_array) + else: + values = convert_listlike(arg._values, format) + result = arg._constructor(values, index=arg.index, name=arg.name) + elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)): + result = _assemble_from_unit_mappings(arg, errors, tz) + elif isinstance(arg, Index): + cache_array = _maybe_cache(arg, format, cache, convert_listlike) + if not cache_array.empty: + result = _convert_and_box_cache(arg, cache_array, name=arg.name) + else: + result = convert_listlike(arg, format, name=arg.name) + elif is_list_like(arg): + try: + cache_array = _maybe_cache(arg, format, cache, convert_listlike) + except OutOfBoundsDatetime: + # caching attempts to create a DatetimeIndex, which may raise + # an OOB. If that's the desired behavior, then just reraise... + if errors == "raise": + raise + # ... otherwise, continue without the cache. + from pandas import Series + + cache_array = Series([], dtype=object) # just an empty array + if not cache_array.empty: + result = _convert_and_box_cache(arg, cache_array) + else: + result = convert_listlike(arg, format) + else: + result = convert_listlike(np.array([arg]), format)[0] + + # error: Incompatible return value type (got "Union[Timestamp, NaTType, + # Series, Index]", expected "Union[DatetimeIndex, Series, float, str, + # NaTType, None]") + return result # type: ignore[return-value] + + +# mappings for assembling units +_unit_map = { + "year": "year", + "years": "year", + "month": "month", + "months": "month", + "day": "day", + "days": "day", + "hour": "h", + "hours": "h", + "minute": "m", + "minutes": "m", + "second": "s", + "seconds": "s", + "ms": "ms", + "millisecond": "ms", + "milliseconds": "ms", + "us": "us", + "microsecond": "us", + "microseconds": "us", + "ns": "ns", + "nanosecond": "ns", + "nanoseconds": "ns", +} + + +def _assemble_from_unit_mappings(arg, errors, tz): + """ + assemble the unit specified fields from the arg (DataFrame) + Return a Series for actual parsing + + Parameters + ---------- + arg : DataFrame + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + + - If :const:`'raise'`, then invalid parsing will raise an exception + - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT` + - If :const:`'ignore'`, then invalid parsing will return the input + tz : None or 'utc' + + Returns + ------- + Series + """ + from pandas import ( + DataFrame, + to_numeric, + to_timedelta, + ) + + arg = DataFrame(arg) + if not arg.columns.is_unique: + raise ValueError("cannot assemble with duplicate keys") + + # replace passed unit with _unit_map + def f(value): + if value in _unit_map: + return _unit_map[value] + + # m is case significant + if value.lower() in _unit_map: + return _unit_map[value.lower()] + + return value + + unit = {k: f(k) for k in arg.keys()} + unit_rev = {v: k for k, v in unit.items()} + + # we require at least Ymd + required = ["year", "month", "day"] + req = sorted(set(required) - set(unit_rev.keys())) + if len(req): + _required = ",".join(req) + raise ValueError( + "to assemble mappings requires at least that " + f"[year, month, day] be specified: [{_required}] is missing" + ) + + # keys we don't recognize + excess = sorted(set(unit_rev.keys()) - set(_unit_map.values())) + if len(excess): + _excess = ",".join(excess) + raise ValueError( + f"extra keys have been passed to the datetime assemblage: [{_excess}]" + ) + + def coerce(values): + # we allow coercion to if errors allows + values = to_numeric(values, errors=errors) + + # prevent overflow in case of int8 or int16 + if is_integer_dtype(values): + values = values.astype("int64", copy=False) + return values + + values = ( + coerce(arg[unit_rev["year"]]) * 10000 + + coerce(arg[unit_rev["month"]]) * 100 + + coerce(arg[unit_rev["day"]]) + ) + try: + values = to_datetime(values, format="%Y%m%d", errors=errors, utc=tz) + except (TypeError, ValueError) as err: + raise ValueError(f"cannot assemble the datetimes: {err}") from err + + for u in ["h", "m", "s", "ms", "us", "ns"]: + value = unit_rev.get(u) + if value is not None and value in arg: + try: + values += to_timedelta(coerce(arg[value]), unit=u, errors=errors) + except (TypeError, ValueError) as err: + raise ValueError( + f"cannot assemble the datetimes [{value}]: {err}" + ) from err + return values + + +def _attempt_YYYYMMDD(arg: np.ndarray, errors: str) -> np.ndarray | None: + """ + try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, + arg is a passed in as an object dtype, but could really be ints/strings + with nan-like/or floats (e.g. with nan) + + Parameters + ---------- + arg : np.ndarray[object] + errors : {'raise','ignore','coerce'} + """ + + def calc(carg): + # calculate the actual result + carg = carg.astype(object) + parsed = parsing.try_parse_year_month_day( + carg / 10000, carg / 100 % 100, carg % 100 + ) + return tslib.array_to_datetime(parsed, errors=errors)[0] + + def calc_with_mask(carg, mask): + result = np.empty(carg.shape, dtype="M8[ns]") + iresult = result.view("i8") + iresult[~mask] = iNaT + + masked_result = calc(carg[mask].astype(np.float64).astype(np.int64)) + result[mask] = masked_result.astype("M8[ns]") + return result + + # try intlike / strings that are ints + try: + return calc(arg.astype(np.int64)) + except (ValueError, OverflowError, TypeError): + pass + + # a float with actual np.nan + try: + carg = arg.astype(np.float64) + return calc_with_mask(carg, notna(carg)) + except (ValueError, OverflowError, TypeError): + pass + + # string with NaN-like + try: + # error: Argument 2 to "isin" has incompatible type "List[Any]"; expected + # "Union[Union[ExtensionArray, ndarray], Index, Series]" + mask = ~algorithms.isin(arg, list(nat_strings)) # type: ignore[arg-type] + return calc_with_mask(arg, mask) + except (ValueError, OverflowError, TypeError): + pass + + return None + + +def to_time(arg, format=None, infer_time_format=False, errors="raise"): + # GH#34145 + warnings.warn( + "`to_time` has been moved, should be imported from pandas.core.tools.times. " + "This alias will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + from pandas.core.tools.times import to_time + + return to_time(arg, format, infer_time_format, errors) diff --git a/.local/lib/python3.8/site-packages/pandas/core/tools/numeric.py b/.local/lib/python3.8/site-packages/pandas/core/tools/numeric.py new file mode 100644 index 0000000..26197e1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/tools/numeric.py @@ -0,0 +1,243 @@ +from __future__ import annotations + +import numpy as np + +from pandas._libs import lib + +from pandas.core.dtypes.cast import maybe_downcast_numeric +from pandas.core.dtypes.common import ( + ensure_object, + is_datetime_or_timedelta_dtype, + is_decimal, + is_integer_dtype, + is_number, + is_numeric_dtype, + is_scalar, + needs_i8_conversion, +) +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) + +import pandas as pd +from pandas.core.arrays.numeric import NumericArray + + +def to_numeric(arg, errors="raise", downcast=None): + """ + Convert argument to a numeric type. + + The default return dtype is `float64` or `int64` + depending on the data supplied. Use the `downcast` parameter + to obtain other dtypes. + + Please note that precision loss may occur if really large numbers + are passed in. Due to the internal limitations of `ndarray`, if + numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min) + or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are + passed in, it is very likely they will be converted to float so that + they can stored in an `ndarray`. These warnings apply similarly to + `Series` since it internally leverages `ndarray`. + + Parameters + ---------- + arg : scalar, list, tuple, 1-d array, or Series + Argument to be converted. + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception. + - If 'coerce', then invalid parsing will be set as NaN. + - If 'ignore', then invalid parsing will return the input. + downcast : str, default None + Can be 'integer', 'signed', 'unsigned', or 'float'. + If not None, and if the data has been successfully cast to a + numerical dtype (or if the data was numeric to begin with), + downcast that resulting data to the smallest numerical dtype + possible according to the following rules: + + - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) + - 'unsigned': smallest unsigned int dtype (min.: np.uint8) + - 'float': smallest float dtype (min.: np.float32) + + As this behaviour is separate from the core conversion to + numeric values, any errors raised during the downcasting + will be surfaced regardless of the value of the 'errors' input. + + In addition, downcasting will only occur if the size + of the resulting data's dtype is strictly larger than + the dtype it is to be cast to, so if none of the dtypes + checked satisfy that specification, no downcasting will be + performed on the data. + + Returns + ------- + ret + Numeric if parsing succeeded. + Return type depends on input. Series if Series, otherwise ndarray. + + See Also + -------- + DataFrame.astype : Cast argument to a specified dtype. + to_datetime : Convert argument to datetime. + to_timedelta : Convert argument to timedelta. + numpy.ndarray.astype : Cast a numpy array to a specified type. + DataFrame.convert_dtypes : Convert dtypes. + + Examples + -------- + Take separate series and convert to numeric, coercing when told to + + >>> s = pd.Series(['1.0', '2', -3]) + >>> pd.to_numeric(s) + 0 1.0 + 1 2.0 + 2 -3.0 + dtype: float64 + >>> pd.to_numeric(s, downcast='float') + 0 1.0 + 1 2.0 + 2 -3.0 + dtype: float32 + >>> pd.to_numeric(s, downcast='signed') + 0 1 + 1 2 + 2 -3 + dtype: int8 + >>> s = pd.Series(['apple', '1.0', '2', -3]) + >>> pd.to_numeric(s, errors='ignore') + 0 apple + 1 1.0 + 2 2 + 3 -3 + dtype: object + >>> pd.to_numeric(s, errors='coerce') + 0 NaN + 1 1.0 + 2 2.0 + 3 -3.0 + dtype: float64 + + Downcasting of nullable integer and floating dtypes is supported: + + >>> s = pd.Series([1, 2, 3], dtype="Int64") + >>> pd.to_numeric(s, downcast="integer") + 0 1 + 1 2 + 2 3 + dtype: Int8 + >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64") + >>> pd.to_numeric(s, downcast="float") + 0 1.0 + 1 2.1 + 2 3.0 + dtype: Float32 + """ + if downcast not in (None, "integer", "signed", "unsigned", "float"): + raise ValueError("invalid downcasting method provided") + + if errors not in ("ignore", "raise", "coerce"): + raise ValueError("invalid error value specified") + + is_series = False + is_index = False + is_scalars = False + + if isinstance(arg, ABCSeries): + is_series = True + values = arg.values + elif isinstance(arg, ABCIndex): + is_index = True + if needs_i8_conversion(arg.dtype): + values = arg.asi8 + else: + values = arg.values + elif isinstance(arg, (list, tuple)): + values = np.array(arg, dtype="O") + elif is_scalar(arg): + if is_decimal(arg): + return float(arg) + if is_number(arg): + return arg + is_scalars = True + values = np.array([arg], dtype="O") + elif getattr(arg, "ndim", 1) > 1: + raise TypeError("arg must be a list, tuple, 1-d array, or Series") + else: + values = arg + + # GH33013: for IntegerArray & FloatingArray extract non-null values for casting + # save mask to reconstruct the full array after casting + mask: np.ndarray | None = None + if isinstance(values, NumericArray): + mask = values._mask + values = values._data[~mask] + + values_dtype = getattr(values, "dtype", None) + if is_numeric_dtype(values_dtype): + pass + elif is_datetime_or_timedelta_dtype(values_dtype): + values = values.view(np.int64) + else: + values = ensure_object(values) + coerce_numeric = errors not in ("ignore", "raise") + try: + values, _ = lib.maybe_convert_numeric( + values, set(), coerce_numeric=coerce_numeric + ) + except (ValueError, TypeError): + if errors == "raise": + raise + + # attempt downcast only if the data has been successfully converted + # to a numerical dtype and if a downcast method has been specified + if downcast is not None and is_numeric_dtype(values.dtype): + typecodes: str | None = None + + if downcast in ("integer", "signed"): + typecodes = np.typecodes["Integer"] + elif downcast == "unsigned" and (not len(values) or np.min(values) >= 0): + typecodes = np.typecodes["UnsignedInteger"] + elif downcast == "float": + typecodes = np.typecodes["Float"] + + # pandas support goes only to np.float32, + # as float dtypes smaller than that are + # extremely rare and not well supported + float_32_char = np.dtype(np.float32).char + float_32_ind = typecodes.index(float_32_char) + typecodes = typecodes[float_32_ind:] + + if typecodes is not None: + # from smallest to largest + for typecode in typecodes: + dtype = np.dtype(typecode) + if dtype.itemsize <= values.dtype.itemsize: + values = maybe_downcast_numeric(values, dtype) + + # successful conversion + if values.dtype == dtype: + break + + # GH33013: for IntegerArray & FloatingArray need to reconstruct masked array + if mask is not None: + data = np.zeros(mask.shape, dtype=values.dtype) + data[~mask] = values + + from pandas.core.arrays import ( + FloatingArray, + IntegerArray, + ) + + klass = IntegerArray if is_integer_dtype(data.dtype) else FloatingArray + values = klass(data, mask.copy()) + + if is_series: + return arg._constructor(values, index=arg.index, name=arg.name) + elif is_index: + # because we want to coerce to numeric if possible, + # do not use _shallow_copy + return pd.Index(values, name=arg.name) + elif is_scalars: + return values[0] + else: + return values diff --git a/.local/lib/python3.8/site-packages/pandas/core/tools/timedeltas.py b/.local/lib/python3.8/site-packages/pandas/core/tools/timedeltas.py new file mode 100644 index 0000000..81b2be4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/tools/timedeltas.py @@ -0,0 +1,196 @@ +""" +timedelta support tools +""" +from __future__ import annotations + +import numpy as np + +from pandas._libs import lib +from pandas._libs.tslibs import ( + NaT, + NaTType, +) +from pandas._libs.tslibs.timedeltas import ( + Timedelta, + parse_timedelta_unit, +) + +from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) + +from pandas.core.arrays.timedeltas import sequence_to_td64ns + + +def to_timedelta(arg, unit=None, errors="raise"): + """ + Convert argument to timedelta. + + Timedeltas are absolute differences in times, expressed in difference + units (e.g. days, hours, minutes, seconds). This method converts + an argument from a recognized timedelta format / value into + a Timedelta type. + + Parameters + ---------- + arg : str, timedelta, list-like or Series + The data to be converted to timedelta. + + .. deprecated:: 1.2 + Strings with units 'M', 'Y' and 'y' do not represent + unambiguous timedelta values and will be removed in a future version + + unit : str, optional + Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``. + + Possible values: + + * 'W' + * 'D' / 'days' / 'day' + * 'hours' / 'hour' / 'hr' / 'h' + * 'm' / 'minute' / 'min' / 'minutes' / 'T' + * 'S' / 'seconds' / 'sec' / 'second' + * 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis' / 'L' + * 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros' / 'U' + * 'ns' / 'nanoseconds' / 'nano' / 'nanos' / 'nanosecond' / 'N' + + .. versionchanged:: 1.1.0 + + Must not be specified when `arg` context strings and + ``errors="raise"``. + + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception. + - If 'coerce', then invalid parsing will be set as NaT. + - If 'ignore', then invalid parsing will return the input. + + Returns + ------- + timedelta + If parsing succeeded. + Return type depends on input: + + - list-like: TimedeltaIndex of timedelta64 dtype + - Series: Series of timedelta64 dtype + - scalar: Timedelta + + See Also + -------- + DataFrame.astype : Cast argument to a specified dtype. + to_datetime : Convert argument to datetime. + convert_dtypes : Convert dtypes. + + Notes + ----- + If the precision is higher than nanoseconds, the precision of the duration is + truncated to nanoseconds for string inputs. + + Examples + -------- + Parsing a single string to a Timedelta: + + >>> pd.to_timedelta('1 days 06:05:01.00003') + Timedelta('1 days 06:05:01.000030') + >>> pd.to_timedelta('15.5us') + Timedelta('0 days 00:00:00.000015500') + + Parsing a list or array of strings: + + >>> pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan']) + TimedeltaIndex(['1 days 06:05:01.000030', '0 days 00:00:00.000015500', NaT], + dtype='timedelta64[ns]', freq=None) + + Converting numbers by specifying the `unit` keyword argument: + + >>> pd.to_timedelta(np.arange(5), unit='s') + TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02', + '0 days 00:00:03', '0 days 00:00:04'], + dtype='timedelta64[ns]', freq=None) + >>> pd.to_timedelta(np.arange(5), unit='d') + TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq=None) + """ + if unit is not None: + unit = parse_timedelta_unit(unit) + + if errors not in ("ignore", "raise", "coerce"): + raise ValueError("errors must be one of 'ignore', 'raise', or 'coerce'.") + + if unit in {"Y", "y", "M"}: + raise ValueError( + "Units 'M', 'Y', and 'y' are no longer supported, as they do not " + "represent unambiguous timedelta values durations." + ) + + if arg is None: + return arg + elif isinstance(arg, ABCSeries): + values = _convert_listlike(arg._values, unit=unit, errors=errors) + return arg._constructor(values, index=arg.index, name=arg.name) + elif isinstance(arg, ABCIndex): + return _convert_listlike(arg, unit=unit, errors=errors, name=arg.name) + elif isinstance(arg, np.ndarray) and arg.ndim == 0: + # extract array scalar and process below + arg = lib.item_from_zerodim(arg) + elif is_list_like(arg) and getattr(arg, "ndim", 1) == 1: + return _convert_listlike(arg, unit=unit, errors=errors) + elif getattr(arg, "ndim", 1) > 1: + raise TypeError( + "arg must be a string, timedelta, list, tuple, 1-d array, or Series" + ) + + if isinstance(arg, str) and unit is not None: + raise ValueError("unit must not be specified if the input is/contains a str") + + # ...so it must be a scalar value. Return scalar. + return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors) + + +def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): + """Convert string 'r' to a timedelta object.""" + result: Timedelta | NaTType + + try: + result = Timedelta(r, unit) + except ValueError: + if errors == "raise": + raise + elif errors == "ignore": + return r + + # coerce + result = NaT + + return result + + +def _convert_listlike(arg, unit=None, errors="raise", name=None): + """Convert a list of objects to a timedelta index object.""" + if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"): + # This is needed only to ensure that in the case where we end up + # returning arg (errors == "ignore"), and where the input is a + # generator, we return a useful list-like instead of a + # used-up generator + arg = np.array(list(arg), dtype=object) + + try: + td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0] + except ValueError: + if errors == "ignore": + return arg + else: + # This else-block accounts for the cases when errors='raise' + # and errors='coerce'. If errors == 'raise', these errors + # should be raised. If errors == 'coerce', we shouldn't + # expect any errors to be raised, since all parsing errors + # cause coercion to pd.NaT. However, if an error / bug is + # introduced that causes an Exception to be raised, we would + # like to surface it. + raise + + from pandas import TimedeltaIndex + + value = TimedeltaIndex(td64arr, unit="ns", name=name) + return value diff --git a/.local/lib/python3.8/site-packages/pandas/core/tools/times.py b/.local/lib/python3.8/site-packages/pandas/core/tools/times.py new file mode 100644 index 0000000..030cee3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/tools/times.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +from datetime import ( + datetime, + time, +) + +import numpy as np + +from pandas._libs.lib import is_list_like + +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import notna + + +def to_time(arg, format=None, infer_time_format=False, errors="raise"): + """ + Parse time strings to time objects using fixed strptime formats ("%H:%M", + "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", + "%I%M%S%p") + + Use infer_time_format if all the strings are in the same format to speed + up conversion. + + Parameters + ---------- + arg : string in time format, datetime.time, list, tuple, 1-d array, Series + format : str, default None + Format used to convert arg into a time object. If None, fixed formats + are used. + infer_time_format: bool, default False + Infer the time format based on the first non-NaN element. If all + strings are in the same format, this will speed up conversion. + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception + - If 'coerce', then invalid parsing will be set as None + - If 'ignore', then invalid parsing will return the input + + Returns + ------- + datetime.time + """ + + def _convert_listlike(arg, format): + + if isinstance(arg, (list, tuple)): + arg = np.array(arg, dtype="O") + + elif getattr(arg, "ndim", 1) > 1: + raise TypeError( + "arg must be a string, datetime, list, tuple, 1-d array, or Series" + ) + + arg = np.asarray(arg, dtype="O") + + if infer_time_format and format is None: + format = _guess_time_format_for_array(arg) + + times: list[time | None] = [] + if format is not None: + for element in arg: + try: + times.append(datetime.strptime(element, format).time()) + except (ValueError, TypeError) as err: + if errors == "raise": + msg = ( + f"Cannot convert {element} to a time with given " + f"format {format}" + ) + raise ValueError(msg) from err + elif errors == "ignore": + return arg + else: + times.append(None) + else: + formats = _time_formats[:] + format_found = False + for element in arg: + time_object = None + for time_format in formats: + try: + time_object = datetime.strptime(element, time_format).time() + if not format_found: + # Put the found format in front + fmt = formats.pop(formats.index(time_format)) + formats.insert(0, fmt) + format_found = True + break + except (ValueError, TypeError): + continue + + if time_object is not None: + times.append(time_object) + elif errors == "raise": + raise ValueError(f"Cannot convert arg {arg} to a time") + elif errors == "ignore": + return arg + else: + times.append(None) + + return times + + if arg is None: + return arg + elif isinstance(arg, time): + return arg + elif isinstance(arg, ABCSeries): + values = _convert_listlike(arg._values, format) + return arg._constructor(values, index=arg.index, name=arg.name) + elif isinstance(arg, ABCIndex): + return _convert_listlike(arg, format) + elif is_list_like(arg): + return _convert_listlike(arg, format) + + return _convert_listlike(np.array([arg]), format)[0] + + +# Fixed time formats for time parsing +_time_formats = [ + "%H:%M", + "%H%M", + "%I:%M%p", + "%I%M%p", + "%H:%M:%S", + "%H%M%S", + "%I:%M:%S%p", + "%I%M%S%p", +] + + +def _guess_time_format_for_array(arr): + # Try to guess the format based on the first non-NaN element + non_nan_elements = notna(arr).nonzero()[0] + if len(non_nan_elements): + element = arr[non_nan_elements[0]] + for time_format in _time_formats: + try: + datetime.strptime(element, time_format) + return time_format + except ValueError: + pass + + return None diff --git a/.local/lib/python3.8/site-packages/pandas/core/util/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/core/util/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/util/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..a28feae Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/util/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/util/__pycache__/hashing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/util/__pycache__/hashing.cpython-38.pyc new file mode 100644 index 0000000..d1a2942 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/util/__pycache__/hashing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/util/__pycache__/numba_.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/util/__pycache__/numba_.cpython-38.pyc new file mode 100644 index 0000000..69f619a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/util/__pycache__/numba_.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/util/hashing.py b/.local/lib/python3.8/site-packages/pandas/core/util/hashing.py new file mode 100644 index 0000000..02899ba --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/util/hashing.py @@ -0,0 +1,351 @@ +""" +data hash pandas / numpy objects +""" +from __future__ import annotations + +import itertools +from typing import ( + TYPE_CHECKING, + Hashable, + Iterable, + Iterator, + cast, +) + +import numpy as np + +from pandas._libs import lib +from pandas._libs.hashing import hash_object_array +from pandas._typing import ArrayLike + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_list_like, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCMultiIndex, + ABCSeries, +) + +if TYPE_CHECKING: + from pandas import ( + Categorical, + DataFrame, + Index, + MultiIndex, + Series, + ) + + +# 16 byte long hashing key +_default_hash_key = "0123456789123456" + + +def combine_hash_arrays(arrays: Iterator[np.ndarray], num_items: int) -> np.ndarray: + """ + Parameters + ---------- + arrays : Iterator[np.ndarray] + num_items : int + + Returns + ------- + np.ndarray[uint64] + + Should be the same as CPython's tupleobject.c + """ + try: + first = next(arrays) + except StopIteration: + return np.array([], dtype=np.uint64) + + arrays = itertools.chain([first], arrays) + + mult = np.uint64(1000003) + out = np.zeros_like(first) + np.uint64(0x345678) + for i, a in enumerate(arrays): + inverse_i = num_items - i + out ^= a + out *= mult + mult += np.uint64(82520 + inverse_i + inverse_i) + assert i + 1 == num_items, "Fed in wrong num_items" + out += np.uint64(97531) + return out + + +def hash_pandas_object( + obj: Index | DataFrame | Series, + index: bool = True, + encoding: str = "utf8", + hash_key: str | None = _default_hash_key, + categorize: bool = True, +) -> Series: + """ + Return a data hash of the Index/Series/DataFrame. + + Parameters + ---------- + obj : Index, Series, or DataFrame + index : bool, default True + Include the index in the hash (if Series/DataFrame). + encoding : str, default 'utf8' + Encoding for data & key when strings. + hash_key : str, default _default_hash_key + Hash_key for string key to encode. + categorize : bool, default True + Whether to first categorize object arrays before hashing. This is more + efficient when the array contains duplicate values. + + Returns + ------- + Series of uint64, same length as the object + """ + from pandas import Series + + if hash_key is None: + hash_key = _default_hash_key + + if isinstance(obj, ABCMultiIndex): + return Series(hash_tuples(obj, encoding, hash_key), dtype="uint64", copy=False) + + elif isinstance(obj, ABCIndex): + h = hash_array(obj._values, encoding, hash_key, categorize).astype( + "uint64", copy=False + ) + ser = Series(h, index=obj, dtype="uint64", copy=False) + + elif isinstance(obj, ABCSeries): + h = hash_array(obj._values, encoding, hash_key, categorize).astype( + "uint64", copy=False + ) + if index: + index_iter = ( + hash_pandas_object( + obj.index, + index=False, + encoding=encoding, + hash_key=hash_key, + categorize=categorize, + )._values + for _ in [None] + ) + arrays = itertools.chain([h], index_iter) + h = combine_hash_arrays(arrays, 2) + + ser = Series(h, index=obj.index, dtype="uint64", copy=False) + + elif isinstance(obj, ABCDataFrame): + hashes = ( + hash_array(series._values, encoding, hash_key, categorize) + for _, series in obj.items() + ) + num_items = len(obj.columns) + if index: + index_hash_generator = ( + hash_pandas_object( + obj.index, + index=False, + encoding=encoding, + hash_key=hash_key, + categorize=categorize, + )._values + for _ in [None] + ) + num_items += 1 + + # keep `hashes` specifically a generator to keep mypy happy + _hashes = itertools.chain(hashes, index_hash_generator) + hashes = (x for x in _hashes) + h = combine_hash_arrays(hashes, num_items) + + ser = Series(h, index=obj.index, dtype="uint64", copy=False) + else: + raise TypeError(f"Unexpected type for hashing {type(obj)}") + + return ser + + +def hash_tuples( + vals: MultiIndex | Iterable[tuple[Hashable, ...]], + encoding: str = "utf8", + hash_key: str = _default_hash_key, +) -> np.ndarray: + """ + Hash an MultiIndex / listlike-of-tuples efficiently. + + Parameters + ---------- + vals : MultiIndex or listlike-of-tuples + encoding : str, default 'utf8' + hash_key : str, default _default_hash_key + + Returns + ------- + ndarray[np.uint64] of hashed values + """ + if not is_list_like(vals): + raise TypeError("must be convertible to a list-of-tuples") + + from pandas import ( + Categorical, + MultiIndex, + ) + + if not isinstance(vals, ABCMultiIndex): + mi = MultiIndex.from_tuples(vals) + else: + mi = vals + + # create a list-of-Categoricals + cat_vals = [ + Categorical(mi.codes[level], mi.levels[level], ordered=False, fastpath=True) + for level in range(mi.nlevels) + ] + + # hash the list-of-ndarrays + hashes = ( + _hash_categorical(cat, encoding=encoding, hash_key=hash_key) for cat in cat_vals + ) + h = combine_hash_arrays(hashes, len(cat_vals)) + + return h + + +def _hash_categorical(cat: Categorical, encoding: str, hash_key: str) -> np.ndarray: + """ + Hash a Categorical by hashing its categories, and then mapping the codes + to the hashes + + Parameters + ---------- + cat : Categorical + encoding : str + hash_key : str + + Returns + ------- + ndarray[np.uint64] of hashed values, same size as len(c) + """ + # Convert ExtensionArrays to ndarrays + values = np.asarray(cat.categories._values) + hashed = hash_array(values, encoding, hash_key, categorize=False) + + # we have uint64, as we don't directly support missing values + # we don't want to use take_nd which will coerce to float + # instead, directly construct the result with a + # max(np.uint64) as the missing value indicator + # + # TODO: GH 15362 + + mask = cat.isna() + if len(hashed): + result = hashed.take(cat.codes) + else: + result = np.zeros(len(mask), dtype="uint64") + + if mask.any(): + result[mask] = lib.u8max + + return result + + +def hash_array( + vals: ArrayLike, + encoding: str = "utf8", + hash_key: str = _default_hash_key, + categorize: bool = True, +) -> np.ndarray: + """ + Given a 1d array, return an array of deterministic integers. + + Parameters + ---------- + vals : ndarray or ExtensionArray + encoding : str, default 'utf8' + Encoding for data & key when strings. + hash_key : str, default _default_hash_key + Hash_key for string key to encode. + categorize : bool, default True + Whether to first categorize object arrays before hashing. This is more + efficient when the array contains duplicate values. + + Returns + ------- + ndarray[np.uint64, ndim=1] + Hashed values, same length as the vals. + """ + if not hasattr(vals, "dtype"): + raise TypeError("must pass a ndarray-like") + dtype = vals.dtype + + # For categoricals, we hash the categories, then remap the codes to the + # hash values. (This check is above the complex check so that we don't ask + # numpy if categorical is a subdtype of complex, as it will choke). + if is_categorical_dtype(dtype): + vals = cast("Categorical", vals) + return _hash_categorical(vals, encoding, hash_key) + elif not isinstance(vals, np.ndarray): + # i.e. ExtensionArray + vals, _ = vals._values_for_factorize() + + return _hash_ndarray(vals, encoding, hash_key, categorize) + + +def _hash_ndarray( + vals: np.ndarray, + encoding: str = "utf8", + hash_key: str = _default_hash_key, + categorize: bool = True, +) -> np.ndarray: + """ + See hash_array.__doc__. + """ + dtype = vals.dtype + + # we'll be working with everything as 64-bit values, so handle this + # 128-bit value early + if np.issubdtype(dtype, np.complex128): + return hash_array(np.real(vals)) + 23 * hash_array(np.imag(vals)) + + # First, turn whatever array this is into unsigned 64-bit ints, if we can + # manage it. + elif isinstance(dtype, bool): + vals = vals.astype("u8") + elif issubclass(dtype.type, (np.datetime64, np.timedelta64)): + vals = vals.view("i8").astype("u8", copy=False) + elif issubclass(dtype.type, np.number) and dtype.itemsize <= 8: + vals = vals.view(f"u{vals.dtype.itemsize}").astype("u8") + else: + # With repeated values, its MUCH faster to categorize object dtypes, + # then hash and rename categories. We allow skipping the categorization + # when the values are known/likely to be unique. + if categorize: + from pandas import ( + Categorical, + Index, + factorize, + ) + + codes, categories = factorize(vals, sort=False) + cat = Categorical( + codes, Index._with_infer(categories), ordered=False, fastpath=True + ) + return _hash_categorical(cat, encoding, hash_key) + + try: + vals = hash_object_array(vals, hash_key, encoding) + except TypeError: + # we have mixed types + vals = hash_object_array( + vals.astype(str).astype(object), hash_key, encoding + ) + + # Then, redistribute these 64-bit ints within the space of 64-bit ints + vals ^= vals >> 30 + vals *= np.uint64(0xBF58476D1CE4E5B9) + vals ^= vals >> 27 + vals *= np.uint64(0x94D049BB133111EB) + vals ^= vals >> 31 + return vals diff --git a/.local/lib/python3.8/site-packages/pandas/core/util/numba_.py b/.local/lib/python3.8/site-packages/pandas/core/util/numba_.py new file mode 100644 index 0000000..0663098 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/util/numba_.py @@ -0,0 +1,113 @@ +"""Common utilities for Numba operations""" +from __future__ import annotations + +import types +from typing import ( + TYPE_CHECKING, + Callable, +) + +import numpy as np + +from pandas.compat._optional import import_optional_dependency +from pandas.errors import NumbaUtilError + +GLOBAL_USE_NUMBA: bool = False +NUMBA_FUNC_CACHE: dict[tuple[Callable, str], Callable] = {} + + +def maybe_use_numba(engine: str | None) -> bool: + """Signal whether to use numba routines.""" + return engine == "numba" or (engine is None and GLOBAL_USE_NUMBA) + + +def set_use_numba(enable: bool = False) -> None: + global GLOBAL_USE_NUMBA + if enable: + import_optional_dependency("numba") + GLOBAL_USE_NUMBA = enable + + +def get_jit_arguments( + engine_kwargs: dict[str, bool] | None = None, kwargs: dict | None = None +) -> tuple[bool, bool, bool]: + """ + Return arguments to pass to numba.JIT, falling back on pandas default JIT settings. + + Parameters + ---------- + engine_kwargs : dict, default None + user passed keyword arguments for numba.JIT + kwargs : dict, default None + user passed keyword arguments to pass into the JITed function + + Returns + ------- + (bool, bool, bool) + nopython, nogil, parallel + + Raises + ------ + NumbaUtilError + """ + if engine_kwargs is None: + engine_kwargs = {} + + nopython = engine_kwargs.get("nopython", True) + if kwargs and nopython: + raise NumbaUtilError( + "numba does not support kwargs with nopython=True: " + "https://github.com/numba/numba/issues/2916" + ) + nogil = engine_kwargs.get("nogil", False) + parallel = engine_kwargs.get("parallel", False) + return nopython, nogil, parallel + + +def jit_user_function( + func: Callable, nopython: bool, nogil: bool, parallel: bool +) -> Callable: + """ + JIT the user's function given the configurable arguments. + + Parameters + ---------- + func : function + user defined function + nopython : bool + nopython parameter for numba.JIT + nogil : bool + nogil parameter for numba.JIT + parallel : bool + parallel parameter for numba.JIT + + Returns + ------- + function + Numba JITed function + """ + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + if numba.extending.is_jitted(func): + # Don't jit a user passed jitted function + numba_func = func + else: + + @numba.generated_jit(nopython=nopython, nogil=nogil, parallel=parallel) + def numba_func(data, *_args): + if getattr(np, func.__name__, False) is func or isinstance( + func, types.BuiltinFunctionType + ): + jf = func + else: + jf = numba.jit(func, nopython=nopython, nogil=nogil) + + def impl(data, *_args): + return jf(data, *_args) + + return impl + + return numba_func diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/__init__.py b/.local/lib/python3.8/site-packages/pandas/core/window/__init__.py new file mode 100644 index 0000000..8f42cd7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/window/__init__.py @@ -0,0 +1,13 @@ +from pandas.core.window.ewm import ( # noqa:F401 + ExponentialMovingWindow, + ExponentialMovingWindowGroupby, +) +from pandas.core.window.expanding import ( # noqa:F401 + Expanding, + ExpandingGroupby, +) +from pandas.core.window.rolling import ( # noqa:F401 + Rolling, + RollingGroupby, + Window, +) diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..8132914 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..38c97b1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/doc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/doc.cpython-38.pyc new file mode 100644 index 0000000..939b8af Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/doc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/ewm.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/ewm.cpython-38.pyc new file mode 100644 index 0000000..999918a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/ewm.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/expanding.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/expanding.cpython-38.pyc new file mode 100644 index 0000000..8928224 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/expanding.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/numba_.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/numba_.cpython-38.pyc new file mode 100644 index 0000000..7567fa4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/numba_.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/online.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/online.cpython-38.pyc new file mode 100644 index 0000000..9ca54bd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/online.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/rolling.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/rolling.cpython-38.pyc new file mode 100644 index 0000000..d15e532 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/core/window/__pycache__/rolling.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/common.py b/.local/lib/python3.8/site-packages/pandas/core/window/common.py new file mode 100644 index 0000000..1514411 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/window/common.py @@ -0,0 +1,167 @@ +"""Common utility functions for rolling operations""" +from collections import defaultdict +from typing import cast + +import numpy as np + +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +from pandas.core.indexes.api import MultiIndex + + +def flex_binary_moment(arg1, arg2, f, pairwise=False): + + if isinstance(arg1, ABCSeries) and isinstance(arg2, ABCSeries): + X, Y = prep_binary(arg1, arg2) + return f(X, Y) + + elif isinstance(arg1, ABCDataFrame): + from pandas import DataFrame + + def dataframe_from_int_dict(data, frame_template): + result = DataFrame(data, index=frame_template.index) + if len(result.columns) > 0: + result.columns = frame_template.columns[result.columns] + return result + + results = {} + if isinstance(arg2, ABCDataFrame): + if pairwise is False: + if arg1 is arg2: + # special case in order to handle duplicate column names + for i in range(len(arg1.columns)): + results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) + return dataframe_from_int_dict(results, arg1) + else: + if not arg1.columns.is_unique: + raise ValueError("'arg1' columns are not unique") + if not arg2.columns.is_unique: + raise ValueError("'arg2' columns are not unique") + X, Y = arg1.align(arg2, join="outer") + X, Y = prep_binary(X, Y) + res_columns = arg1.columns.union(arg2.columns) + for col in res_columns: + if col in X and col in Y: + results[col] = f(X[col], Y[col]) + return DataFrame(results, index=X.index, columns=res_columns) + elif pairwise is True: + results = defaultdict(dict) + for i in range(len(arg1.columns)): + for j in range(len(arg2.columns)): + if j < i and arg2 is arg1: + # Symmetric case + results[i][j] = results[j][i] + else: + results[i][j] = f( + *prep_binary(arg1.iloc[:, i], arg2.iloc[:, j]) + ) + + from pandas import concat + + result_index = arg1.index.union(arg2.index) + if len(result_index): + + # construct result frame + result = concat( + [ + concat( + [results[i][j] for j in range(len(arg2.columns))], + ignore_index=True, + ) + for i in range(len(arg1.columns)) + ], + ignore_index=True, + axis=1, + ) + result.columns = arg1.columns + + # set the index and reorder + if arg2.columns.nlevels > 1: + # mypy needs to know columns is a MultiIndex, Index doesn't + # have levels attribute + arg2.columns = cast(MultiIndex, arg2.columns) + # GH 21157: Equivalent to MultiIndex.from_product( + # [result_index], , + # ) + # A normal MultiIndex.from_product will produce too many + # combinations. + result_level = np.tile( + result_index, len(result) // len(result_index) + ) + arg2_levels = ( + np.repeat( + arg2.columns.get_level_values(i), + len(result) // len(arg2.columns), + ) + for i in range(arg2.columns.nlevels) + ) + result_names = list(arg2.columns.names) + [result_index.name] + result.index = MultiIndex.from_arrays( + [*arg2_levels, result_level], names=result_names + ) + # GH 34440 + num_levels = len(result.index.levels) + new_order = [num_levels - 1] + list(range(num_levels - 1)) + result = result.reorder_levels(new_order).sort_index() + else: + result.index = MultiIndex.from_product( + [range(len(arg2.columns)), range(len(result_index))] + ) + result = result.swaplevel(1, 0).sort_index() + result.index = MultiIndex.from_product( + [result_index] + [arg2.columns] + ) + else: + + # empty result + result = DataFrame( + index=MultiIndex( + levels=[arg1.index, arg2.columns], codes=[[], []] + ), + columns=arg2.columns, + dtype="float64", + ) + + # reset our index names to arg1 names + # reset our column names to arg2 names + # careful not to mutate the original names + result.columns = result.columns.set_names(arg1.columns.names) + result.index = result.index.set_names( + result_index.names + arg2.columns.names + ) + + return result + else: + results = { + i: f(*prep_binary(arg1.iloc[:, i], arg2)) + for i in range(len(arg1.columns)) + } + return dataframe_from_int_dict(results, arg1) + + else: + return flex_binary_moment(arg2, arg1, f) + + +def zsqrt(x): + with np.errstate(all="ignore"): + result = np.sqrt(x) + mask = x < 0 + + if isinstance(x, ABCDataFrame): + if mask._values.any(): + result[mask] = 0 + else: + if mask.any(): + result[mask] = 0 + + return result + + +def prep_binary(arg1, arg2): + # mask out values, this also makes a common index... + X = arg1 + 0 * arg2 + Y = arg2 + 0 * arg1 + return X, Y diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/doc.py b/.local/lib/python3.8/site-packages/pandas/core/window/doc.py new file mode 100644 index 0000000..930c128 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/window/doc.py @@ -0,0 +1,125 @@ +"""Any shareable docstring components for rolling/expanding/ewm""" +from textwrap import dedent + +from pandas.core.shared_docs import _shared_docs + +_shared_docs = dict(**_shared_docs) + + +def create_section_header(header: str) -> str: + """Create numpydoc section header""" + return "\n".join((header, "-" * len(header))) + "\n" + + +template_header = "\nCalculate the {window_method} {aggregation_description}.\n\n" + +template_returns = dedent( + """ + Series or DataFrame + Return type is the same as the original object with ``np.float64`` dtype.\n + """ +).replace("\n", "", 1) + +template_see_also = dedent( + """ + pandas.Series.{window_method} : Calling {window_method} with Series data. + pandas.DataFrame.{window_method} : Calling {window_method} with DataFrames. + pandas.Series.{agg_method} : Aggregating {agg_method} for Series. + pandas.DataFrame.{agg_method} : Aggregating {agg_method} for DataFrame.\n + """ +).replace("\n", "", 1) + +args_compat = dedent( + """ + *args + For NumPy compatibility and will not have an effect on the result.\n + """ +).replace("\n", "", 1) + +kwargs_compat = dedent( + """ + **kwargs + For NumPy compatibility and will not have an effect on the result.\n + """ +).replace("\n", "", 1) + +kwargs_scipy = dedent( + """ + **kwargs + Keyword arguments to configure the ``SciPy`` weighted window type.\n + """ +).replace("\n", "", 1) + +window_apply_parameters = dedent( + """ + func : function + Must produce a single value from an ndarray input if ``raw=True`` + or a single value from a Series if ``raw=False``. Can also accept a + Numba JIT function with ``engine='numba'`` specified. + + .. versionchanged:: 1.0.0 + + raw : bool, default False + * ``False`` : passes each row or column as a Series to the + function. + * ``True`` : the passed function will receive ndarray + objects instead. + If you are just applying a NumPy reduction function this will + achieve much better performance. + + engine : str, default None + * ``'cython'`` : Runs rolling apply through C-extensions from cython. + * ``'numba'`` : Runs rolling apply through JIT compiled code from numba. + Only available when ``raw`` is set to ``True``. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.0.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be + applied to both the ``func`` and the ``apply`` rolling aggregation. + + .. versionadded:: 1.0.0 + + args : tuple, default None + Positional arguments to be passed into func. + + kwargs : dict, default None + Keyword arguments to be passed into func.\n + """ +).replace("\n", "", 1) + +numba_notes = ( + "See :ref:`window.numba_engine` and :ref:`enhancingperf.numba` for " + "extended documentation and performance considerations for the Numba engine.\n\n" +) + + +def window_agg_numba_parameters(version: str = "1.3") -> str: + return ( + dedent( + """ + engine : str, default None + * ``'cython'`` : Runs the operation through C-extensions from cython. + * ``'numba'`` : Runs the operation through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: {version}.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` + + .. versionadded:: {version}.0\n + """ + ) + .replace("\n", "", 1) + .replace("{version}", version) + ) diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/ewm.py b/.local/lib/python3.8/site-packages/pandas/core/window/ewm.py new file mode 100644 index 0000000..4bebc56 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/window/ewm.py @@ -0,0 +1,1025 @@ +from __future__ import annotations + +import datetime +from functools import partial +from textwrap import dedent +from typing import TYPE_CHECKING +import warnings + +import numpy as np + +from pandas._libs.tslibs import Timedelta +import pandas._libs.window.aggregations as window_aggregations +from pandas._typing import ( + Axis, + TimedeltaConvertibleTypes, +) + +if TYPE_CHECKING: + from pandas import DataFrame, Series + from pandas.core.generic import NDFrame + +from pandas.compat.numpy import function as nv +from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import is_datetime64_ns_dtype +from pandas.core.dtypes.missing import isna + +import pandas.core.common as common # noqa: PDF018 +from pandas.core.indexers.objects import ( + BaseIndexer, + ExponentialMovingWindowIndexer, + GroupbyIndexer, +) +from pandas.core.util.numba_ import maybe_use_numba +from pandas.core.window.common import zsqrt +from pandas.core.window.doc import ( + _shared_docs, + args_compat, + create_section_header, + kwargs_compat, + numba_notes, + template_header, + template_returns, + template_see_also, + window_agg_numba_parameters, +) +from pandas.core.window.numba_ import ( + generate_numba_ewm_func, + generate_numba_ewm_table_func, +) +from pandas.core.window.online import ( + EWMMeanState, + generate_online_numba_ewma_func, +) +from pandas.core.window.rolling import ( + BaseWindow, + BaseWindowGroupby, +) + + +def get_center_of_mass( + comass: float | None, + span: float | None, + halflife: float | None, + alpha: float | None, +) -> float: + valid_count = common.count_not_none(comass, span, halflife, alpha) + if valid_count > 1: + raise ValueError("comass, span, halflife, and alpha are mutually exclusive") + + # Convert to center of mass; domain checks ensure 0 < alpha <= 1 + if comass is not None: + if comass < 0: + raise ValueError("comass must satisfy: comass >= 0") + elif span is not None: + if span < 1: + raise ValueError("span must satisfy: span >= 1") + comass = (span - 1) / 2 + elif halflife is not None: + if halflife <= 0: + raise ValueError("halflife must satisfy: halflife > 0") + decay = 1 - np.exp(np.log(0.5) / halflife) + comass = 1 / decay - 1 + elif alpha is not None: + if alpha <= 0 or alpha > 1: + raise ValueError("alpha must satisfy: 0 < alpha <= 1") + comass = (1 - alpha) / alpha + else: + raise ValueError("Must pass one of comass, span, halflife, or alpha") + + return float(comass) + + +def _calculate_deltas( + times: str | np.ndarray | NDFrame | None, + halflife: float | TimedeltaConvertibleTypes | None, +) -> np.ndarray: + """ + Return the diff of the times divided by the half-life. These values are used in + the calculation of the ewm mean. + + Parameters + ---------- + times : str, np.ndarray, Series, default None + Times corresponding to the observations. Must be monotonically increasing + and ``datetime64[ns]`` dtype. + halflife : float, str, timedelta, optional + Half-life specifying the decay + + Returns + ------- + np.ndarray + Diff of the times divided by the half-life + """ + # error: Item "str" of "Union[str, ndarray, NDFrameT, None]" has no + # attribute "view" + # error: Item "None" of "Union[str, ndarray, NDFrameT, None]" has no + # attribute "view" + _times = np.asarray( + times.view(np.int64), dtype=np.float64 # type: ignore[union-attr] + ) + _halflife = float(Timedelta(halflife).value) + return np.diff(_times) / _halflife + + +class ExponentialMovingWindow(BaseWindow): + r""" + Provide exponentially weighted (EW) calculations. + + Exactly one parameter: ``com``, ``span``, ``halflife``, or ``alpha`` must be + provided. + + Parameters + ---------- + com : float, optional + Specify decay in terms of center of mass + + :math:`\alpha = 1 / (1 + com)`, for :math:`com \geq 0`. + + span : float, optional + Specify decay in terms of span + + :math:`\alpha = 2 / (span + 1)`, for :math:`span \geq 1`. + + halflife : float, str, timedelta, optional + Specify decay in terms of half-life + + :math:`\alpha = 1 - \exp\left(-\ln(2) / halflife\right)`, for + :math:`halflife > 0`. + + If ``times`` is specified, the time unit (str or timedelta) over which an + observation decays to half its value. Only applicable to ``mean()``, + and halflife value will not apply to the other functions. + + .. versionadded:: 1.1.0 + + alpha : float, optional + Specify smoothing factor :math:`\alpha` directly + + :math:`0 < \alpha \leq 1`. + + min_periods : int, default 0 + Minimum number of observations in window required to have a value; + otherwise, result is ``np.nan``. + + adjust : bool, default True + Divide by decaying adjustment factor in beginning periods to account + for imbalance in relative weightings (viewing EWMA as a moving average). + + - When ``adjust=True`` (default), the EW function is calculated using weights + :math:`w_i = (1 - \alpha)^i`. For example, the EW moving average of the series + [:math:`x_0, x_1, ..., x_t`] would be: + + .. math:: + y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ... + (1 - + \alpha)^t x_0}{1 + (1 - \alpha) + (1 - \alpha)^2 + ... + (1 - \alpha)^t} + + - When ``adjust=False``, the exponentially weighted function is calculated + recursively: + + .. math:: + \begin{split} + y_0 &= x_0\\ + y_t &= (1 - \alpha) y_{t-1} + \alpha x_t, + \end{split} + ignore_na : bool, default False + Ignore missing values when calculating weights. + + - When ``ignore_na=False`` (default), weights are based on absolute positions. + For example, the weights of :math:`x_0` and :math:`x_2` used in calculating + the final weighted average of [:math:`x_0`, None, :math:`x_2`] are + :math:`(1-\alpha)^2` and :math:`1` if ``adjust=True``, and + :math:`(1-\alpha)^2` and :math:`\alpha` if ``adjust=False``. + + - When ``ignore_na=True``, weights are based + on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` + used in calculating the final weighted average of + [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if + ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if ``adjust=False``. + + axis : {0, 1}, default 0 + If ``0`` or ``'index'``, calculate across the rows. + + If ``1`` or ``'columns'``, calculate across the columns. + + times : str, np.ndarray, Series, default None + + .. versionadded:: 1.1.0 + + Only applicable to ``mean()``. + + Times corresponding to the observations. Must be monotonically increasing and + ``datetime64[ns]`` dtype. + + If 1-D array like, a sequence with the same shape as the observations. + + .. deprecated:: 1.4.0 + If str, the name of the column in the DataFrame representing the times. + + method : str {'single', 'table'}, default 'single' + .. versionadded:: 1.4.0 + + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). + + This argument is only implemented when specifying ``engine='numba'`` + in the method call. + + Only applicable to ``mean()`` + + Returns + ------- + ``ExponentialMovingWindow`` subclass + + See Also + -------- + rolling : Provides rolling window calculations. + expanding : Provides expanding transformations. + + Notes + ----- + See :ref:`Windowing Operations ` + for further usage details and examples. + + Examples + -------- + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + >>> df.ewm(com=0.5).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + >>> df.ewm(alpha=2 / 3).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + + **adjust** + + >>> df.ewm(com=0.5, adjust=True).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + >>> df.ewm(com=0.5, adjust=False).mean() + B + 0 0.000000 + 1 0.666667 + 2 1.555556 + 3 1.555556 + 4 3.650794 + + **ignore_na** + + >>> df.ewm(com=0.5, ignore_na=True).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.225000 + >>> df.ewm(com=0.5, ignore_na=False).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + + **times** + + Exponentially weighted mean with weights calculated with a timedelta ``halflife`` + relative to ``times``. + + >>> times = ['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', '2020-01-17'] + >>> df.ewm(halflife='4 days', times=pd.DatetimeIndex(times)).mean() + B + 0 0.000000 + 1 0.585786 + 2 1.523889 + 3 1.523889 + 4 3.233686 + """ + + _attributes = [ + "com", + "span", + "halflife", + "alpha", + "min_periods", + "adjust", + "ignore_na", + "axis", + "times", + "method", + ] + + def __init__( + self, + obj: NDFrame, + com: float | None = None, + span: float | None = None, + halflife: float | TimedeltaConvertibleTypes | None = None, + alpha: float | None = None, + min_periods: int | None = 0, + adjust: bool = True, + ignore_na: bool = False, + axis: Axis = 0, + times: str | np.ndarray | NDFrame | None = None, + method: str = "single", + *, + selection=None, + ): + super().__init__( + obj=obj, + min_periods=1 if min_periods is None else max(int(min_periods), 1), + on=None, + center=False, + closed=None, + method=method, + axis=axis, + selection=selection, + ) + self.com = com + self.span = span + self.halflife = halflife + self.alpha = alpha + self.adjust = adjust + self.ignore_na = ignore_na + self.times = times + if self.times is not None: + if not self.adjust: + raise NotImplementedError("times is not supported with adjust=False.") + if isinstance(self.times, str): + warnings.warn( + ( + "Specifying times as a string column label is deprecated " + "and will be removed in a future version. Pass the column " + "into times instead." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + self.times = self._selected_obj[self.times] + if not is_datetime64_ns_dtype(self.times): + raise ValueError("times must be datetime64[ns] dtype.") + # error: Argument 1 to "len" has incompatible type "Union[str, ndarray, + # NDFrameT, None]"; expected "Sized" + if len(self.times) != len(obj): # type: ignore[arg-type] + raise ValueError("times must be the same length as the object.") + if not isinstance(self.halflife, (str, datetime.timedelta)): + raise ValueError( + "halflife must be a string or datetime.timedelta object" + ) + if isna(self.times).any(): + raise ValueError("Cannot convert NaT values to integer") + self._deltas = _calculate_deltas(self.times, self.halflife) + # Halflife is no longer applicable when calculating COM + # But allow COM to still be calculated if the user passes other decay args + if common.count_not_none(self.com, self.span, self.alpha) > 0: + self._com = get_center_of_mass(self.com, self.span, None, self.alpha) + else: + self._com = 1.0 + else: + if self.halflife is not None and isinstance( + self.halflife, (str, datetime.timedelta) + ): + raise ValueError( + "halflife can only be a timedelta convertible argument if " + "times is not None." + ) + # Without times, points are equally spaced + self._deltas = np.ones(max(len(self.obj) - 1, 0), dtype=np.float64) + self._com = get_center_of_mass( + # error: Argument 3 to "get_center_of_mass" has incompatible type + # "Union[float, Any, None, timedelta64, signedinteger[_64Bit]]"; + # expected "Optional[float]" + self.com, + self.span, + self.halflife, # type: ignore[arg-type] + self.alpha, + ) + + def _check_window_bounds( + self, start: np.ndarray, end: np.ndarray, num_vals: int + ) -> None: + # emw algorithms are iterative with each point + # ExponentialMovingWindowIndexer "bounds" are the entire window + pass + + def _get_window_indexer(self) -> BaseIndexer: + """ + Return an indexer class that will compute the window start and end bounds + """ + return ExponentialMovingWindowIndexer() + + def online(self, engine="numba", engine_kwargs=None): + """ + Return an ``OnlineExponentialMovingWindow`` object to calculate + exponentially moving window aggregations in an online method. + + .. versionadded:: 1.3.0 + + Parameters + ---------- + engine: str, default ``'numba'`` + Execution engine to calculate online aggregations. + Applies to all supported aggregation methods. + + engine_kwargs : dict, default None + Applies to all supported aggregation methods. + + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be + applied to the function + + Returns + ------- + OnlineExponentialMovingWindow + """ + return OnlineExponentialMovingWindow( + obj=self.obj, + com=self.com, + span=self.span, + halflife=self.halflife, + alpha=self.alpha, + min_periods=self.min_periods, + adjust=self.adjust, + ignore_na=self.ignore_na, + axis=self.axis, + times=self.times, + engine=engine, + engine_kwargs=engine_kwargs, + selection=self._selection, + ) + + @doc( + _shared_docs["aggregate"], + see_also=dedent( + """ + See Also + -------- + pandas.DataFrame.rolling.aggregate + """ + ), + examples=dedent( + """ + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> df.ewm(alpha=0.5).mean() + A B C + 0 1.000000 4.000000 7.000000 + 1 1.666667 4.666667 7.666667 + 2 2.428571 5.428571 8.428571 + """ + ), + klass="Series/Dataframe", + axis="", + ) + def aggregate(self, func, *args, **kwargs): + return super().aggregate(func, *args, **kwargs) + + agg = aggregate + + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes.replace("\n", "", 1), + window_method="ewm", + aggregation_description="(exponential weighted moment) mean", + agg_method="mean", + ) + def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): + if maybe_use_numba(engine): + if self.method == "single": + func = generate_numba_ewm_func + numba_cache_key = (lambda x: x, "ewm_mean") + else: + func = generate_numba_ewm_table_func + numba_cache_key = (lambda x: x, "ewm_mean_table") + ewm_func = func( + engine_kwargs=engine_kwargs, + com=self._com, + adjust=self.adjust, + ignore_na=self.ignore_na, + deltas=self._deltas, + normalize=True, + ) + return self._apply( + ewm_func, + numba_cache_key=numba_cache_key, + ) + elif engine in ("cython", None): + if engine_kwargs is not None: + raise ValueError("cython engine does not accept engine_kwargs") + nv.validate_window_func("mean", args, kwargs) + + deltas = None if self.times is None else self._deltas + window_func = partial( + window_aggregations.ewm, + com=self._com, + adjust=self.adjust, + ignore_na=self.ignore_na, + deltas=deltas, + normalize=True, + ) + return self._apply(window_func) + else: + raise ValueError("engine must be either 'numba' or 'cython'") + + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes.replace("\n", "", 1), + window_method="ewm", + aggregation_description="(exponential weighted moment) sum", + agg_method="sum", + ) + def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): + if not self.adjust: + raise NotImplementedError("sum is not implemented with adjust=False") + if maybe_use_numba(engine): + if self.method == "single": + func = generate_numba_ewm_func + numba_cache_key = (lambda x: x, "ewm_sum") + else: + func = generate_numba_ewm_table_func + numba_cache_key = (lambda x: x, "ewm_sum_table") + ewm_func = func( + engine_kwargs=engine_kwargs, + com=self._com, + adjust=self.adjust, + ignore_na=self.ignore_na, + deltas=self._deltas, + normalize=False, + ) + return self._apply( + ewm_func, + numba_cache_key=numba_cache_key, + ) + elif engine in ("cython", None): + if engine_kwargs is not None: + raise ValueError("cython engine does not accept engine_kwargs") + nv.validate_window_func("sum", args, kwargs) + + deltas = None if self.times is None else self._deltas + window_func = partial( + window_aggregations.ewm, + com=self._com, + adjust=self.adjust, + ignore_na=self.ignore_na, + deltas=deltas, + normalize=False, + ) + return self._apply(window_func) + else: + raise ValueError("engine must be either 'numba' or 'cython'") + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + bias : bool, default False + Use a standard estimation bias correction. + """ + ).replace("\n", "", 1), + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="ewm", + aggregation_description="(exponential weighted moment) standard deviation", + agg_method="std", + ) + def std(self, bias: bool = False, *args, **kwargs): + nv.validate_window_func("std", args, kwargs) + return zsqrt(self.var(bias=bias, **kwargs)) + + def vol(self, bias: bool = False, *args, **kwargs): + warnings.warn( + ( + "vol is deprecated will be removed in a future version. " + "Use std instead." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.std(bias, *args, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + bias : bool, default False + Use a standard estimation bias correction. + """ + ).replace("\n", "", 1), + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="ewm", + aggregation_description="(exponential weighted moment) variance", + agg_method="var", + ) + def var(self, bias: bool = False, *args, **kwargs): + nv.validate_window_func("var", args, kwargs) + window_func = window_aggregations.ewmcov + wfunc = partial( + window_func, + com=self._com, + adjust=self.adjust, + ignore_na=self.ignore_na, + bias=bias, + ) + + def var_func(values, begin, end, min_periods): + return wfunc(values, begin, end, min_periods, values) + + return self._apply(var_func) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series or DataFrame , optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndex DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + bias : bool, default False + Use a standard estimation bias correction. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="ewm", + aggregation_description="(exponential weighted moment) sample covariance", + agg_method="cov", + ) + def cov( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + bias: bool = False, + **kwargs, + ): + from pandas import Series + + def cov_func(x, y): + x_array = self._prep_values(x) + y_array = self._prep_values(y) + window_indexer = self._get_window_indexer() + min_periods = ( + self.min_periods + if self.min_periods is not None + else window_indexer.window_size + ) + start, end = window_indexer.get_window_bounds( + num_values=len(x_array), + min_periods=min_periods, + center=self.center, + closed=self.closed, + ) + result = window_aggregations.ewmcov( + x_array, + start, + end, + # error: Argument 4 to "ewmcov" has incompatible type + # "Optional[int]"; expected "int" + self.min_periods, # type: ignore[arg-type] + y_array, + self._com, + self.adjust, + self.ignore_na, + bias, + ) + return Series(result, index=x.index, name=x.name) + + return self._apply_pairwise(self._selected_obj, other, pairwise, cov_func) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series or DataFrame, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndex DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="ewm", + aggregation_description="(exponential weighted moment) sample correlation", + agg_method="corr", + ) + def corr( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + **kwargs, + ): + from pandas import Series + + def cov_func(x, y): + x_array = self._prep_values(x) + y_array = self._prep_values(y) + window_indexer = self._get_window_indexer() + min_periods = ( + self.min_periods + if self.min_periods is not None + else window_indexer.window_size + ) + start, end = window_indexer.get_window_bounds( + num_values=len(x_array), + min_periods=min_periods, + center=self.center, + closed=self.closed, + ) + + def _cov(X, Y): + return window_aggregations.ewmcov( + X, + start, + end, + min_periods, + Y, + self._com, + self.adjust, + self.ignore_na, + True, + ) + + with np.errstate(all="ignore"): + cov = _cov(x_array, y_array) + x_var = _cov(x_array, x_array) + y_var = _cov(y_array, y_array) + result = cov / zsqrt(x_var * y_var) + return Series(result, index=x.index, name=x.name) + + return self._apply_pairwise(self._selected_obj, other, pairwise, cov_func) + + +class ExponentialMovingWindowGroupby(BaseWindowGroupby, ExponentialMovingWindow): + """ + Provide an exponential moving window groupby implementation. + """ + + _attributes = ExponentialMovingWindow._attributes + BaseWindowGroupby._attributes + + def __init__(self, obj, *args, _grouper=None, **kwargs): + super().__init__(obj, *args, _grouper=_grouper, **kwargs) + + if not obj.empty and self.times is not None: + # sort the times and recalculate the deltas according to the groups + groupby_order = np.concatenate(list(self._grouper.indices.values())) + self._deltas = _calculate_deltas( + self.times.take(groupby_order), # type: ignore[union-attr] + self.halflife, + ) + + def _get_window_indexer(self) -> GroupbyIndexer: + """ + Return an indexer class that will compute the window start and end bounds + + Returns + ------- + GroupbyIndexer + """ + window_indexer = GroupbyIndexer( + groupby_indices=self._grouper.indices, + window_indexer=ExponentialMovingWindowIndexer, + ) + return window_indexer + + +class OnlineExponentialMovingWindow(ExponentialMovingWindow): + def __init__( + self, + obj: NDFrame, + com: float | None = None, + span: float | None = None, + halflife: float | TimedeltaConvertibleTypes | None = None, + alpha: float | None = None, + min_periods: int | None = 0, + adjust: bool = True, + ignore_na: bool = False, + axis: Axis = 0, + times: str | np.ndarray | NDFrame | None = None, + engine: str = "numba", + engine_kwargs: dict[str, bool] | None = None, + *, + selection=None, + ): + if times is not None: + raise NotImplementedError( + "times is not implemented with online operations." + ) + super().__init__( + obj=obj, + com=com, + span=span, + halflife=halflife, + alpha=alpha, + min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na, + axis=axis, + times=times, + selection=selection, + ) + self._mean = EWMMeanState( + self._com, self.adjust, self.ignore_na, self.axis, obj.shape + ) + if maybe_use_numba(engine): + self.engine = engine + self.engine_kwargs = engine_kwargs + else: + raise ValueError("'numba' is the only supported engine") + + def reset(self): + """ + Reset the state captured by `update` calls. + """ + self._mean.reset() + + def aggregate(self, func, *args, **kwargs): + return NotImplementedError + + def std(self, bias: bool = False, *args, **kwargs): + return NotImplementedError + + def corr( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + **kwargs, + ): + return NotImplementedError + + def cov( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + bias: bool = False, + **kwargs, + ): + return NotImplementedError + + def var(self, bias: bool = False, *args, **kwargs): + return NotImplementedError + + def mean(self, *args, update=None, update_times=None, **kwargs): + """ + Calculate an online exponentially weighted mean. + + Parameters + ---------- + update: DataFrame or Series, default None + New values to continue calculating the + exponentially weighted mean from the last values and weights. + Values should be float64 dtype. + + ``update`` needs to be ``None`` the first time the + exponentially weighted mean is calculated. + + update_times: Series or 1-D np.ndarray, default None + New times to continue calculating the + exponentially weighted mean from the last values and weights. + If ``None``, values are assumed to be evenly spaced + in time. + This feature is currently unsupported. + + Returns + ------- + DataFrame or Series + + Examples + -------- + >>> df = pd.DataFrame({"a": range(5), "b": range(5, 10)}) + >>> online_ewm = df.head(2).ewm(0.5).online() + >>> online_ewm.mean() + a b + 0 0.00 5.00 + 1 0.75 5.75 + >>> online_ewm.mean(update=df.tail(3)) + a b + 2 1.615385 6.615385 + 3 2.550000 7.550000 + 4 3.520661 8.520661 + >>> online_ewm.reset() + >>> online_ewm.mean() + a b + 0 0.00 5.00 + 1 0.75 5.75 + """ + result_kwargs = {} + is_frame = True if self._selected_obj.ndim == 2 else False + if update_times is not None: + raise NotImplementedError("update_times is not implemented.") + else: + update_deltas = np.ones( + max(self._selected_obj.shape[self.axis - 1] - 1, 0), dtype=np.float64 + ) + if update is not None: + if self._mean.last_ewm is None: + raise ValueError( + "Must call mean with update=None first before passing update" + ) + result_from = 1 + result_kwargs["index"] = update.index + if is_frame: + last_value = self._mean.last_ewm[np.newaxis, :] + result_kwargs["columns"] = update.columns + else: + last_value = self._mean.last_ewm + result_kwargs["name"] = update.name + np_array = np.concatenate((last_value, update.to_numpy())) + else: + result_from = 0 + result_kwargs["index"] = self._selected_obj.index + if is_frame: + result_kwargs["columns"] = self._selected_obj.columns + else: + result_kwargs["name"] = self._selected_obj.name + np_array = self._selected_obj.astype(np.float64).to_numpy() + ewma_func = generate_online_numba_ewma_func(self.engine_kwargs) + result = self._mean.run_ewm( + np_array if is_frame else np_array[:, np.newaxis], + update_deltas, + self.min_periods, + ewma_func, + ) + if not is_frame: + result = result.squeeze() + result = result[result_from:] + result = self._selected_obj._constructor(result, **result_kwargs) + return result diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/expanding.py b/.local/lib/python3.8/site-packages/pandas/core/window/expanding.py new file mode 100644 index 0000000..8c8b7a8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/window/expanding.py @@ -0,0 +1,807 @@ +from __future__ import annotations + +from textwrap import dedent +from typing import ( + TYPE_CHECKING, + Any, + Callable, +) + +from pandas._typing import ( + Axis, + WindowingRankType, +) + +if TYPE_CHECKING: + from pandas import DataFrame, Series + from pandas.core.generic import NDFrame + +from pandas.compat.numpy import function as nv +from pandas.util._decorators import doc + +from pandas.core.indexers.objects import ( + BaseIndexer, + ExpandingIndexer, + GroupbyIndexer, +) +from pandas.core.window.doc import ( + _shared_docs, + args_compat, + create_section_header, + kwargs_compat, + numba_notes, + template_header, + template_returns, + template_see_also, + window_agg_numba_parameters, + window_apply_parameters, +) +from pandas.core.window.rolling import ( + BaseWindowGroupby, + RollingAndExpandingMixin, +) + + +class Expanding(RollingAndExpandingMixin): + """ + Provide expanding window calculations. + + Parameters + ---------- + min_periods : int, default 1 + Minimum number of observations in window required to have a value; + otherwise, result is ``np.nan``. + + center : bool, default False + If False, set the window labels as the right edge of the window index. + + If True, set the window labels as the center of the window index. + + .. deprecated:: 1.1.0 + + axis : int or str, default 0 + If ``0`` or ``'index'``, roll across the rows. + + If ``1`` or ``'columns'``, roll across the columns. + + method : str {'single', 'table'}, default 'single' + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). + + This argument is only implemented when specifying ``engine='numba'`` + in the method call. + + .. versionadded:: 1.3.0 + + Returns + ------- + ``Expanding`` subclass + + See Also + -------- + rolling : Provides rolling window calculations. + ewm : Provides exponential weighted functions. + + Notes + ----- + See :ref:`Windowing Operations ` for further usage details + and examples. + + Examples + -------- + >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + **min_periods** + + Expanding sum with 1 vs 3 observations needed to calculate a value. + + >>> df.expanding(1).sum() + B + 0 0.0 + 1 1.0 + 2 3.0 + 3 3.0 + 4 7.0 + >>> df.expanding(3).sum() + B + 0 NaN + 1 NaN + 2 3.0 + 3 3.0 + 4 7.0 + """ + + _attributes: list[str] = ["min_periods", "center", "axis", "method"] + + def __init__( + self, + obj: NDFrame, + min_periods: int = 1, + center=None, + axis: Axis = 0, + method: str = "single", + selection=None, + ): + super().__init__( + obj=obj, + min_periods=min_periods, + center=center, + axis=axis, + method=method, + selection=selection, + ) + + def _get_window_indexer(self) -> BaseIndexer: + """ + Return an indexer class that will compute the window start and end bounds + """ + return ExpandingIndexer() + + @doc( + _shared_docs["aggregate"], + see_also=dedent( + """ + See Also + -------- + pandas.DataFrame.aggregate : Similar DataFrame method. + pandas.Series.aggregate : Similar Series method. + """ + ), + examples=dedent( + """ + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> df.ewm(alpha=0.5).mean() + A B C + 0 1.000000 4.000000 7.000000 + 1 1.666667 4.666667 7.666667 + 2 2.428571 5.428571 8.428571 + """ + ), + klass="Series/Dataframe", + axis="", + ) + def aggregate(self, func, *args, **kwargs): + return super().aggregate(func, *args, **kwargs) + + agg = aggregate + + @doc( + template_header, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="expanding", + aggregation_description="count of non NaN observations", + agg_method="count", + ) + def count(self): + return super().count() + + @doc( + template_header, + create_section_header("Parameters"), + window_apply_parameters, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="expanding", + aggregation_description="custom aggregation function", + agg_method="apply", + ) + def apply( + self, + func: Callable[..., Any], + raw: bool = False, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + args: tuple[Any, ...] | None = None, + kwargs: dict[str, Any] | None = None, + ): + return super().apply( + func, + raw=raw, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, + kwargs=kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes[:-1], + window_method="expanding", + aggregation_description="sum", + agg_method="sum", + ) + def sum( + self, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_expanding_func("sum", args, kwargs) + return super().sum(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes[:-1], + window_method="expanding", + aggregation_description="maximum", + agg_method="max", + ) + def max( + self, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_expanding_func("max", args, kwargs) + return super().max(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes[:-1], + window_method="expanding", + aggregation_description="minimum", + agg_method="min", + ) + def min( + self, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_expanding_func("min", args, kwargs) + return super().min(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes[:-1], + window_method="expanding", + aggregation_description="mean", + agg_method="mean", + ) + def mean( + self, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_expanding_func("mean", args, kwargs) + return super().mean(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes[:-1], + window_method="expanding", + aggregation_description="median", + agg_method="median", + ) + def median( + self, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + return super().median(engine=engine, engine_kwargs=engine_kwargs, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.\n + """ + ).replace("\n", "", 1), + args_compat, + window_agg_numba_parameters("1.4"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "numpy.std : Equivalent method for NumPy array.\n", + template_see_also, + create_section_header("Notes"), + dedent( + """ + The default ``ddof`` of 1 used in :meth:`Series.std` is different + than the default ``ddof`` of 0 in :func:`numpy.std`. + + A minimum of one period is required for the rolling calculation.\n + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) + + >>> s.expanding(3).std() + 0 NaN + 1 NaN + 2 0.577350 + 3 0.957427 + 4 0.894427 + 5 0.836660 + 6 0.786796 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="standard deviation", + agg_method="std", + ) + def std( + self, + ddof: int = 1, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_expanding_func("std", args, kwargs) + return super().std( + ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.\n + """ + ).replace("\n", "", 1), + args_compat, + window_agg_numba_parameters("1.4"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "numpy.var : Equivalent method for NumPy array.\n", + template_see_also, + create_section_header("Notes"), + dedent( + """ + The default ``ddof`` of 1 used in :meth:`Series.var` is different + than the default ``ddof`` of 0 in :func:`numpy.var`. + + A minimum of one period is required for the rolling calculation.\n + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) + + >>> s.expanding(3).var() + 0 NaN + 1 NaN + 2 0.333333 + 3 0.916667 + 4 0.800000 + 5 0.700000 + 6 0.619048 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="variance", + agg_method="var", + ) + def var( + self, + ddof: int = 1, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_expanding_func("var", args, kwargs) + return super().var( + ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.\n + """ + ).replace("\n", "", 1), + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + "A minimum of one period is required for the calculation.\n\n", + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([0, 1, 2, 3]) + + >>> s.expanding().sem() + 0 NaN + 1 0.707107 + 2 0.707107 + 3 0.745356 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="standard error of mean", + agg_method="sem", + ) + def sem(self, ddof: int = 1, *args, **kwargs): + return super().sem(ddof=ddof, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "scipy.stats.skew : Third moment of a probability density.\n", + template_see_also, + create_section_header("Notes"), + "A minimum of three periods is required for the rolling calculation.\n", + window_method="expanding", + aggregation_description="unbiased skewness", + agg_method="skew", + ) + def skew(self, **kwargs): + return super().skew(**kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "scipy.stats.kurtosis : Reference SciPy method.\n", + template_see_also, + create_section_header("Notes"), + "A minimum of four periods is required for the calculation.\n\n", + create_section_header("Examples"), + dedent( + """ + The example below will show a rolling calculation with a window size of + four matching the equivalent function call using `scipy.stats`. + + >>> arr = [1, 2, 3, 4, 999] + >>> import scipy.stats + >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}") + -1.200000 + >>> print(f"{{scipy.stats.kurtosis(arr, bias=False):.6f}}") + 4.999874 + >>> s = pd.Series(arr) + >>> s.expanding(4).kurt() + 0 NaN + 1 NaN + 2 NaN + 3 -1.200000 + 4 4.999874 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="Fisher's definition of kurtosis without bias", + agg_method="kurt", + ) + def kurt(self, **kwargs): + return super().kurt(**kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + quantile : float + Quantile to compute. 0 <= quantile <= 1. + interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}} + This optional parameter specifies the interpolation method to use, + when the desired quantile lies between two data points `i` and `j`: + + * linear: `i + (j - i) * fraction`, where `fraction` is the + fractional part of the index surrounded by `i` and `j`. + * lower: `i`. + * higher: `j`. + * nearest: `i` or `j` whichever is nearest. + * midpoint: (`i` + `j`) / 2. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="expanding", + aggregation_description="quantile", + agg_method="quantile", + ) + def quantile( + self, + quantile: float, + interpolation: str = "linear", + **kwargs, + ): + return super().quantile( + quantile=quantile, + interpolation=interpolation, + **kwargs, + ) + + @doc( + template_header, + ".. versionadded:: 1.4.0 \n\n", + create_section_header("Parameters"), + dedent( + """ + method : {{'average', 'min', 'max'}}, default 'average' + How to rank the group of records that have the same value (i.e. ties): + + * average: average rank of the group + * min: lowest rank in the group + * max: highest rank in the group + + ascending : bool, default True + Whether or not the elements should be ranked in ascending order. + pct : bool, default False + Whether or not to display the returned rankings in percentile + form. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([1, 4, 2, 3, 5, 3]) + >>> s.expanding().rank() + 0 1.0 + 1 2.0 + 2 2.0 + 3 3.0 + 4 5.0 + 5 3.5 + dtype: float64 + + >>> s.expanding().rank(method="max") + 0 1.0 + 1 2.0 + 2 2.0 + 3 3.0 + 4 5.0 + 5 4.0 + dtype: float64 + + >>> s.expanding().rank(method="min") + 0 1.0 + 1 2.0 + 2 2.0 + 3 3.0 + 4 5.0 + 5 3.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="rank", + agg_method="rank", + ) + def rank( + self, + method: WindowingRankType = "average", + ascending: bool = True, + pct: bool = False, + **kwargs, + ): + return super().rank( + method=method, + ascending=ascending, + pct=pct, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series or DataFrame, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndexed DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="expanding", + aggregation_description="sample covariance", + agg_method="cov", + ) + def cov( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + ddof: int = 1, + **kwargs, + ): + return super().cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series or DataFrame, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndexed DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + dedent( + """ + cov : Similar method to calculate covariance. + numpy.corrcoef : NumPy Pearson's correlation calculation. + """ + ).replace("\n", "", 1), + template_see_also, + create_section_header("Notes"), + dedent( + """ + This function uses Pearson's definition of correlation + (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient). + + When `other` is not specified, the output will be self correlation (e.g. + all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise` + set to `True`. + + Function will return ``NaN`` for correlations of equal valued sequences; + this is the result of a 0/0 division error. + + When `pairwise` is set to `False`, only matching columns between `self` and + `other` will be used. + + When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame + with the original index on the first level, and the `other` DataFrame + columns on the second level. + + In the case of missing elements, only complete pairwise observations + will be used. + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="correlation", + agg_method="corr", + ) + def corr( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + ddof: int = 1, + **kwargs, + ): + return super().corr(other=other, pairwise=pairwise, ddof=ddof, **kwargs) + + +class ExpandingGroupby(BaseWindowGroupby, Expanding): + """ + Provide a expanding groupby implementation. + """ + + _attributes = Expanding._attributes + BaseWindowGroupby._attributes + + def _get_window_indexer(self) -> GroupbyIndexer: + """ + Return an indexer class that will compute the window start and end bounds + + Returns + ------- + GroupbyIndexer + """ + window_indexer = GroupbyIndexer( + groupby_indices=self._grouper.indices, + window_indexer=ExpandingIndexer, + ) + return window_indexer diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/numba_.py b/.local/lib/python3.8/site-packages/pandas/core/window/numba_.py new file mode 100644 index 0000000..0e8eea3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/window/numba_.py @@ -0,0 +1,364 @@ +from __future__ import annotations + +import functools +from typing import ( + TYPE_CHECKING, + Any, + Callable, +) + +import numpy as np + +from pandas._typing import Scalar +from pandas.compat._optional import import_optional_dependency + +from pandas.core.util.numba_ import ( + NUMBA_FUNC_CACHE, + get_jit_arguments, + jit_user_function, +) + + +def generate_numba_apply_func( + kwargs: dict[str, Any], + func: Callable[..., Scalar], + engine_kwargs: dict[str, bool] | None, + name: str, +): + """ + Generate a numba jitted apply function specified by values from engine_kwargs. + + 1. jit the user's function + 2. Return a rolling apply function with the jitted function inline + + Configurations specified in engine_kwargs apply to both the user's + function _AND_ the rolling apply function. + + Parameters + ---------- + kwargs : dict + **kwargs to be passed into the function + func : function + function to be applied to each window and will be JITed + engine_kwargs : dict + dictionary of arguments to be passed into numba.jit + name: str + name of the caller (Rolling/Expanding) + + Returns + ------- + Numba function + """ + nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) + + cache_key = (func, f"{name}_apply_single") + if cache_key in NUMBA_FUNC_CACHE: + return NUMBA_FUNC_CACHE[cache_key] + + numba_func = jit_user_function(func, nopython, nogil, parallel) + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def roll_apply( + values: np.ndarray, + begin: np.ndarray, + end: np.ndarray, + minimum_periods: int, + *args: Any, + ) -> np.ndarray: + result = np.empty(len(begin)) + for i in numba.prange(len(result)): + start = begin[i] + stop = end[i] + window = values[start:stop] + count_nan = np.sum(np.isnan(window)) + if len(window) - count_nan >= minimum_periods: + result[i] = numba_func(window, *args) + else: + result[i] = np.nan + return result + + return roll_apply + + +def generate_numba_ewm_func( + engine_kwargs: dict[str, bool] | None, + com: float, + adjust: bool, + ignore_na: bool, + deltas: np.ndarray, + normalize: bool, +): + """ + Generate a numba jitted ewm mean or sum function specified by values + from engine_kwargs. + + Parameters + ---------- + engine_kwargs : dict + dictionary of arguments to be passed into numba.jit + com : float + adjust : bool + ignore_na : bool + deltas : numpy.ndarray + normalize : bool + + Returns + ------- + Numba function + """ + nopython, nogil, parallel = get_jit_arguments(engine_kwargs) + + str_key = "ewm_mean" if normalize else "ewm_sum" + cache_key = (lambda x: x, str_key) + if cache_key in NUMBA_FUNC_CACHE: + return NUMBA_FUNC_CACHE[cache_key] + + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def ewm( + values: np.ndarray, + begin: np.ndarray, + end: np.ndarray, + minimum_periods: int, + ) -> np.ndarray: + result = np.empty(len(values)) + alpha = 1.0 / (1.0 + com) + old_wt_factor = 1.0 - alpha + new_wt = 1.0 if adjust else alpha + + for i in numba.prange(len(begin)): + start = begin[i] + stop = end[i] + window = values[start:stop] + sub_result = np.empty(len(window)) + + weighted = window[0] + nobs = int(not np.isnan(weighted)) + sub_result[0] = weighted if nobs >= minimum_periods else np.nan + old_wt = 1.0 + + for j in range(1, len(window)): + cur = window[j] + is_observation = not np.isnan(cur) + nobs += is_observation + if not np.isnan(weighted): + + if is_observation or not ignore_na: + if normalize: + # note that len(deltas) = len(vals) - 1 and deltas[i] + # is to be used in conjunction with vals[i+1] + old_wt *= old_wt_factor ** deltas[start + j - 1] + else: + weighted = old_wt_factor * weighted + if is_observation: + if normalize: + # avoid numerical errors on constant series + if weighted != cur: + weighted = old_wt * weighted + new_wt * cur + if normalize: + weighted = weighted / (old_wt + new_wt) + if adjust: + old_wt += new_wt + else: + old_wt = 1.0 + else: + weighted += cur + elif is_observation: + weighted = cur + + sub_result[j] = weighted if nobs >= minimum_periods else np.nan + + result[start:stop] = sub_result + + return result + + return ewm + + +def generate_numba_table_func( + kwargs: dict[str, Any], + func: Callable[..., np.ndarray], + engine_kwargs: dict[str, bool] | None, + name: str, +): + """ + Generate a numba jitted function to apply window calculations table-wise. + + Func will be passed a M window size x N number of columns array, and + must return a 1 x N number of columns array. Func is intended to operate + row-wise, but the result will be transposed for axis=1. + + 1. jit the user's function + 2. Return a rolling apply function with the jitted function inline + + Parameters + ---------- + kwargs : dict + **kwargs to be passed into the function + func : function + function to be applied to each window and will be JITed + engine_kwargs : dict + dictionary of arguments to be passed into numba.jit + name : str + caller (Rolling/Expanding) and original method name for numba cache key + + Returns + ------- + Numba function + """ + nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) + + cache_key = (func, f"{name}_table") + if cache_key in NUMBA_FUNC_CACHE: + return NUMBA_FUNC_CACHE[cache_key] + + numba_func = jit_user_function(func, nopython, nogil, parallel) + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def roll_table( + values: np.ndarray, + begin: np.ndarray, + end: np.ndarray, + minimum_periods: int, + *args: Any, + ): + result = np.empty(values.shape) + min_periods_mask = np.empty(values.shape) + for i in numba.prange(len(result)): + start = begin[i] + stop = end[i] + window = values[start:stop] + count_nan = np.sum(np.isnan(window), axis=0) + sub_result = numba_func(window, *args) + nan_mask = len(window) - count_nan >= minimum_periods + min_periods_mask[i, :] = nan_mask + result[i, :] = sub_result + result = np.where(min_periods_mask, result, np.nan) + return result + + return roll_table + + +# This function will no longer be needed once numba supports +# axis for all np.nan* agg functions +# https://github.com/numba/numba/issues/1269 +@functools.lru_cache(maxsize=None) +def generate_manual_numpy_nan_agg_with_axis(nan_func): + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=True, nogil=True, parallel=True) + def nan_agg_with_axis(table): + result = np.empty(table.shape[1]) + for i in numba.prange(table.shape[1]): + partition = table[:, i] + result[i] = nan_func(partition) + return result + + return nan_agg_with_axis + + +def generate_numba_ewm_table_func( + engine_kwargs: dict[str, bool] | None, + com: float, + adjust: bool, + ignore_na: bool, + deltas: np.ndarray, + normalize: bool, +): + """ + Generate a numba jitted ewm mean or sum function applied table wise specified + by values from engine_kwargs. + + Parameters + ---------- + engine_kwargs : dict + dictionary of arguments to be passed into numba.jit + com : float + adjust : bool + ignore_na : bool + deltas : numpy.ndarray + normalize: bool + + Returns + ------- + Numba function + """ + nopython, nogil, parallel = get_jit_arguments(engine_kwargs) + + str_key = "ewm_mean_table" if normalize else "ewm_sum_table" + cache_key = (lambda x: x, str_key) + if cache_key in NUMBA_FUNC_CACHE: + return NUMBA_FUNC_CACHE[cache_key] + + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def ewm_table( + values: np.ndarray, + begin: np.ndarray, + end: np.ndarray, + minimum_periods: int, + ) -> np.ndarray: + alpha = 1.0 / (1.0 + com) + old_wt_factor = 1.0 - alpha + new_wt = 1.0 if adjust else alpha + old_wt = np.ones(values.shape[1]) + + result = np.empty(values.shape) + weighted = values[0].copy() + nobs = (~np.isnan(weighted)).astype(np.int64) + result[0] = np.where(nobs >= minimum_periods, weighted, np.nan) + for i in range(1, len(values)): + cur = values[i] + is_observations = ~np.isnan(cur) + nobs += is_observations.astype(np.int64) + for j in numba.prange(len(cur)): + if not np.isnan(weighted[j]): + if is_observations[j] or not ignore_na: + if normalize: + # note that len(deltas) = len(vals) - 1 and deltas[i] + # is to be used in conjunction with vals[i+1] + old_wt[j] *= old_wt_factor ** deltas[i - 1] + else: + weighted[j] = old_wt_factor * weighted[j] + if is_observations[j]: + if normalize: + # avoid numerical errors on constant series + if weighted[j] != cur[j]: + weighted[j] = ( + old_wt[j] * weighted[j] + new_wt * cur[j] + ) + if normalize: + weighted[j] = weighted[j] / (old_wt[j] + new_wt) + if adjust: + old_wt[j] += new_wt + else: + old_wt[j] = 1.0 + else: + weighted[j] += cur[j] + elif is_observations[j]: + weighted[j] = cur[j] + + result[i] = np.where(nobs >= minimum_periods, weighted, np.nan) + + return result + + return ewm_table diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/online.py b/.local/lib/python3.8/site-packages/pandas/core/window/online.py new file mode 100644 index 0000000..8ef4aee --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/window/online.py @@ -0,0 +1,122 @@ +from typing import ( + TYPE_CHECKING, + Dict, + Optional, +) + +import numpy as np + +from pandas.compat._optional import import_optional_dependency + +from pandas.core.util.numba_ import ( + NUMBA_FUNC_CACHE, + get_jit_arguments, +) + + +def generate_online_numba_ewma_func(engine_kwargs: Optional[Dict[str, bool]]): + """ + Generate a numba jitted groupby ewma function specified by values + from engine_kwargs. + Parameters + ---------- + engine_kwargs : dict + dictionary of arguments to be passed into numba.jit + Returns + ------- + Numba function + """ + nopython, nogil, parallel = get_jit_arguments(engine_kwargs) + + cache_key = (lambda x: x, "online_ewma") + if cache_key in NUMBA_FUNC_CACHE: + return NUMBA_FUNC_CACHE[cache_key] + + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def online_ewma( + values: np.ndarray, + deltas: np.ndarray, + minimum_periods: int, + old_wt_factor: float, + new_wt: float, + old_wt: np.ndarray, + adjust: bool, + ignore_na: bool, + ): + """ + Compute online exponentially weighted mean per column over 2D values. + + Takes the first observation as is, then computes the subsequent + exponentially weighted mean accounting minimum periods. + """ + result = np.empty(values.shape) + weighted_avg = values[0] + nobs = (~np.isnan(weighted_avg)).astype(np.int64) + result[0] = np.where(nobs >= minimum_periods, weighted_avg, np.nan) + + for i in range(1, len(values)): + cur = values[i] + is_observations = ~np.isnan(cur) + nobs += is_observations.astype(np.int64) + for j in numba.prange(len(cur)): + if not np.isnan(weighted_avg[j]): + if is_observations[j] or not ignore_na: + + # note that len(deltas) = len(vals) - 1 and deltas[i] is to be + # used in conjunction with vals[i+1] + old_wt[j] *= old_wt_factor ** deltas[j - 1] + if is_observations[j]: + # avoid numerical errors on constant series + if weighted_avg[j] != cur[j]: + weighted_avg[j] = ( + (old_wt[j] * weighted_avg[j]) + (new_wt * cur[j]) + ) / (old_wt[j] + new_wt) + if adjust: + old_wt[j] += new_wt + else: + old_wt[j] = 1.0 + elif is_observations[j]: + weighted_avg[j] = cur[j] + + result[i] = np.where(nobs >= minimum_periods, weighted_avg, np.nan) + + return result, old_wt + + return online_ewma + + +class EWMMeanState: + def __init__(self, com, adjust, ignore_na, axis, shape): + alpha = 1.0 / (1.0 + com) + self.axis = axis + self.shape = shape + self.adjust = adjust + self.ignore_na = ignore_na + self.new_wt = 1.0 if adjust else alpha + self.old_wt_factor = 1.0 - alpha + self.old_wt = np.ones(self.shape[self.axis - 1]) + self.last_ewm = None + + def run_ewm(self, weighted_avg, deltas, min_periods, ewm_func): + result, old_wt = ewm_func( + weighted_avg, + deltas, + min_periods, + self.old_wt_factor, + self.new_wt, + self.old_wt, + self.adjust, + self.ignore_na, + ) + self.old_wt = old_wt + self.last_ewm = result[-1] + return result + + def reset(self): + self.old_wt = np.ones(self.shape[self.axis - 1]) + self.last_ewm = None diff --git a/.local/lib/python3.8/site-packages/pandas/core/window/rolling.py b/.local/lib/python3.8/site-packages/pandas/core/window/rolling.py new file mode 100644 index 0000000..72ebbcb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/core/window/rolling.py @@ -0,0 +1,2642 @@ +""" +Provide a generic structure to support window functions, +similar to how we have a Groupby object. +""" +from __future__ import annotations + +import copy +from datetime import timedelta +from functools import partial +import inspect +from textwrap import dedent +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, +) +import warnings + +import numpy as np + +from pandas._libs.tslibs import ( + BaseOffset, + to_offset, +) +import pandas._libs.window.aggregations as window_aggregations +from pandas._typing import ( + ArrayLike, + Axis, + NDFrameT, + WindowingRankType, +) +from pandas.compat._optional import import_optional_dependency +from pandas.compat.numpy import function as nv +from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + ensure_float64, + is_bool, + is_integer, + is_list_like, + is_scalar, + needs_i8_conversion, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.missing import notna + +from pandas.core._numba import executor +from pandas.core.algorithms import factorize +from pandas.core.apply import ResamplerWindowApply +from pandas.core.arrays import ExtensionArray +from pandas.core.base import ( + DataError, + SelectionMixin, +) +import pandas.core.common as com +from pandas.core.indexers.objects import ( + BaseIndexer, + FixedWindowIndexer, + GroupbyIndexer, + VariableWindowIndexer, +) +from pandas.core.indexes.api import ( + DatetimeIndex, + Index, + MultiIndex, + PeriodIndex, + TimedeltaIndex, +) +from pandas.core.reshape.concat import concat +from pandas.core.util.numba_ import ( + NUMBA_FUNC_CACHE, + maybe_use_numba, +) +from pandas.core.window.common import ( + flex_binary_moment, + zsqrt, +) +from pandas.core.window.doc import ( + _shared_docs, + args_compat, + create_section_header, + kwargs_compat, + kwargs_scipy, + numba_notes, + template_header, + template_returns, + template_see_also, + window_agg_numba_parameters, + window_apply_parameters, +) +from pandas.core.window.numba_ import ( + generate_manual_numpy_nan_agg_with_axis, + generate_numba_apply_func, + generate_numba_table_func, +) + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + from pandas.core.generic import NDFrame + from pandas.core.groupby.ops import BaseGrouper + from pandas.core.internals import Block # noqa:F401 + + +class BaseWindow(SelectionMixin): + """Provides utilities for performing windowing operations.""" + + _attributes: list[str] = [] + exclusions: frozenset[Hashable] = frozenset() + _on: Index + + def __init__( + self, + obj: NDFrame, + window=None, + min_periods: int | None = None, + center: bool = False, + win_type: str | None = None, + axis: Axis = 0, + on: str | Index | None = None, + closed: str | None = None, + method: str = "single", + *, + selection=None, + ): + self.obj = obj + self.on = on + self.closed = closed + self.window = window + self.min_periods = min_periods + self.center = center + # TODO(2.0): Change this back to self.win_type once deprecation is enforced + self._win_type = win_type + self.axis = obj._get_axis_number(axis) if axis is not None else None + self.method = method + self._win_freq_i8 = None + if self.on is None: + if self.axis == 0: + self._on = self.obj.index + else: + # i.e. self.axis == 1 + self._on = self.obj.columns + elif isinstance(self.on, Index): + self._on = self.on + elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns: + self._on = Index(self.obj[self.on]) + else: + raise ValueError( + f"invalid on specified as {self.on}, " + "must be a column (of DataFrame), an Index or None" + ) + + self._selection = selection + self._validate() + + @property + def win_type(self): + if self._win_freq_i8 is not None: + warnings.warn( + "win_type will no longer return 'freq' in a future version. " + "Check the type of self.window instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return "freq" + return self._win_type + + @property + def is_datetimelike(self) -> bool: + warnings.warn( + "is_datetimelike is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._win_freq_i8 is not None + + def validate(self) -> None: + warnings.warn( + "validate is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._validate() + + def _validate(self) -> None: + if self.center is not None and not is_bool(self.center): + raise ValueError("center must be a boolean") + if self.min_periods is not None: + if not is_integer(self.min_periods): + raise ValueError("min_periods must be an integer") + elif self.min_periods < 0: + raise ValueError("min_periods must be >= 0") + elif is_integer(self.window) and self.min_periods > self.window: + raise ValueError( + f"min_periods {self.min_periods} must be <= window {self.window}" + ) + if self.closed is not None and self.closed not in [ + "right", + "both", + "left", + "neither", + ]: + raise ValueError("closed must be 'right', 'left', 'both' or 'neither'") + if not isinstance(self.obj, (ABCSeries, ABCDataFrame)): + raise TypeError(f"invalid type: {type(self)}") + if isinstance(self.window, BaseIndexer): + # Validate that the passed BaseIndexer subclass has + # a get_window_bounds with the correct signature. + get_window_bounds_signature = inspect.signature( + self.window.get_window_bounds + ).parameters.keys() + expected_signature = inspect.signature( + BaseIndexer().get_window_bounds + ).parameters.keys() + if get_window_bounds_signature != expected_signature: + raise ValueError( + f"{type(self.window).__name__} does not implement " + f"the correct signature for get_window_bounds" + ) + if self.method not in ["table", "single"]: + raise ValueError("method must be 'table' or 'single") + + def _check_window_bounds( + self, start: np.ndarray, end: np.ndarray, num_vals: int + ) -> None: + if len(start) != len(end): + raise ValueError( + f"start ({len(start)}) and end ({len(end)}) bounds must be the " + f"same length" + ) + elif len(start) != num_vals: + raise ValueError( + f"start and end bounds ({len(start)}) must be the same length " + f"as the object ({num_vals})" + ) + + def _create_data(self, obj: NDFrameT) -> NDFrameT: + """ + Split data into blocks & return conformed data. + """ + # filter out the on from the object + if self.on is not None and not isinstance(self.on, Index) and obj.ndim == 2: + obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) + if self.axis == 1: + # GH: 20649 in case of mixed dtype and axis=1 we have to convert everything + # to float to calculate the complete row at once. We exclude all non-numeric + # dtypes. + obj = obj.select_dtypes(include=["number"], exclude=["timedelta"]) + obj = obj.astype("float64", copy=False) + obj._mgr = obj._mgr.consolidate() + return obj + + def _gotitem(self, key, ndim, subset=None): + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : str / list of selections + ndim : {1, 2} + requested ndim of result + subset : object, default None + subset to act on + """ + # create a new object to prevent aliasing + if subset is None: + subset = self.obj + + # we need to make a shallow copy of ourselves + # with the same groupby + with warnings.catch_warnings(): + # TODO(2.0): Remove once win_type deprecation is enforced + warnings.filterwarnings("ignore", "win_type", FutureWarning) + kwargs = {attr: getattr(self, attr) for attr in self._attributes} + + selection = None + if subset.ndim == 2 and ( + (is_scalar(key) and key in subset) or is_list_like(key) + ): + selection = key + + new_win = type(self)(subset, selection=selection, **kwargs) + return new_win + + def __getattr__(self, attr: str): + if attr in self._internal_names_set: + return object.__getattribute__(self, attr) + if attr in self.obj: + return self[attr] + + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{attr}'" + ) + + def _dir_additions(self): + return self.obj._dir_additions() + + def __repr__(self) -> str: + """ + Provide a nice str repr of our rolling object. + """ + attrs_list = ( + f"{attr_name}={getattr(self, attr_name)}" + for attr_name in self._attributes + if getattr(self, attr_name, None) is not None and attr_name[0] != "_" + ) + attrs = ",".join(attrs_list) + return f"{type(self).__name__} [{attrs}]" + + def __iter__(self): + obj = self._selected_obj.set_axis(self._on) + obj = self._create_data(obj) + indexer = self._get_window_indexer() + + start, end = indexer.get_window_bounds( + num_values=len(obj), + min_periods=self.min_periods, + center=self.center, + closed=self.closed, + ) + self._check_window_bounds(start, end, len(obj)) + + for s, e in zip(start, end): + result = obj.iloc[slice(s, e)] + yield result + + def _prep_values(self, values: ArrayLike) -> np.ndarray: + """Convert input to numpy arrays for Cython routines""" + if needs_i8_conversion(values.dtype): + raise NotImplementedError( + f"ops for {type(self).__name__} for this " + f"dtype {values.dtype} are not implemented" + ) + else: + # GH #12373 : rolling functions error on float32 data + # make sure the data is coerced to float64 + try: + if isinstance(values, ExtensionArray): + values = values.to_numpy(np.float64, na_value=np.nan) + else: + values = ensure_float64(values) + except (ValueError, TypeError) as err: + raise TypeError(f"cannot handle this type -> {values.dtype}") from err + + # Convert inf to nan for C funcs + inf = np.isinf(values) + if inf.any(): + values = np.where(inf, np.nan, values) + + return values + + def _insert_on_column(self, result: DataFrame, obj: DataFrame) -> None: + # if we have an 'on' column we want to put it back into + # the results in the same location + from pandas import Series + + if self.on is not None and not self._on.equals(obj.index): + name = self._on.name + extra_col = Series(self._on, index=self.obj.index, name=name) + if name in result.columns: + # TODO: sure we want to overwrite results? + result[name] = extra_col + elif name in result.index.names: + pass + elif name in self._selected_obj.columns: + # insert in the same location as we had in _selected_obj + old_cols = self._selected_obj.columns + new_cols = result.columns + old_loc = old_cols.get_loc(name) + overlap = new_cols.intersection(old_cols[:old_loc]) + new_loc = len(overlap) + result.insert(new_loc, name, extra_col) + else: + # insert at the end + result[name] = extra_col + + @property + def _index_array(self): + # TODO: why do we get here with e.g. MultiIndex? + if needs_i8_conversion(self._on.dtype): + return self._on.asi8 + return None + + def _resolve_output(self, out: DataFrame, obj: DataFrame) -> DataFrame: + """Validate and finalize result.""" + if out.shape[1] == 0 and obj.shape[1] > 0: + raise DataError("No numeric types to aggregate") + elif out.shape[1] == 0: + return obj.astype("float64") + + self._insert_on_column(out, obj) + return out + + def _get_window_indexer(self) -> BaseIndexer: + """ + Return an indexer class that will compute the window start and end bounds + """ + if isinstance(self.window, BaseIndexer): + return self.window + if self._win_freq_i8 is not None: + return VariableWindowIndexer( + index_array=self._index_array, + window_size=self._win_freq_i8, + center=self.center, + ) + return FixedWindowIndexer(window_size=self.window) + + def _apply_series( + self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None + ) -> Series: + """ + Series version of _apply_blockwise + """ + obj = self._create_data(self._selected_obj) + + if name == "count": + # GH 12541: Special case for count where we support date-like types + obj = notna(obj).astype(int) + try: + values = self._prep_values(obj._values) + except (TypeError, NotImplementedError) as err: + raise DataError("No numeric types to aggregate") from err + + result = homogeneous_func(values) + return obj._constructor(result, index=obj.index, name=obj.name) + + def _apply_blockwise( + self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None + ) -> DataFrame | Series: + """ + Apply the given function to the DataFrame broken down into homogeneous + sub-frames. + """ + if self._selected_obj.ndim == 1: + return self._apply_series(homogeneous_func, name) + + obj = self._create_data(self._selected_obj) + if name == "count": + # GH 12541: Special case for count where we support date-like types + obj = notna(obj).astype(int) + obj._mgr = obj._mgr.consolidate() + + def hfunc(values: ArrayLike) -> ArrayLike: + values = self._prep_values(values) + return homogeneous_func(values) + + if self.axis == 1: + obj = obj.T + + taker = [] + res_values = [] + for i, arr in enumerate(obj._iter_column_arrays()): + # GH#42736 operate column-wise instead of block-wise + try: + res = hfunc(arr) + except (TypeError, NotImplementedError): + pass + else: + res_values.append(res) + taker.append(i) + + df = type(obj)._from_arrays( + res_values, + index=obj.index, + columns=obj.columns.take(taker), + verify_integrity=False, + ) + + if self.axis == 1: + df = df.T + + if 0 != len(res_values) != len(obj.columns): + # GH#42738 ignore_failures dropped nuisance columns + dropped = obj.columns.difference(obj.columns.take(taker)) + warnings.warn( + "Dropping of nuisance columns in rolling operations " + "is deprecated; in a future version this will raise TypeError. " + "Select only valid columns before calling the operation. " + f"Dropped columns were {dropped}", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return self._resolve_output(df, obj) + + def _apply_tablewise( + self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None + ) -> DataFrame | Series: + """ + Apply the given function to the DataFrame across the entire object + """ + if self._selected_obj.ndim == 1: + raise ValueError("method='table' not applicable for Series objects.") + obj = self._create_data(self._selected_obj) + values = self._prep_values(obj.to_numpy()) + values = values.T if self.axis == 1 else values + result = homogeneous_func(values) + result = result.T if self.axis == 1 else result + out = obj._constructor(result, index=obj.index, columns=obj.columns) + + return self._resolve_output(out, obj) + + def _apply_pairwise( + self, + target: DataFrame | Series, + other: DataFrame | Series | None, + pairwise: bool | None, + func: Callable[[DataFrame | Series, DataFrame | Series], DataFrame | Series], + ) -> DataFrame | Series: + """ + Apply the given pairwise function given 2 pandas objects (DataFrame/Series) + """ + if other is None: + other = target + # only default unset + pairwise = True if pairwise is None else pairwise + elif not isinstance(other, (ABCDataFrame, ABCSeries)): + raise ValueError("other must be a DataFrame or Series") + + return flex_binary_moment(target, other, func, pairwise=bool(pairwise)) + + def _apply( + self, + func: Callable[..., Any], + name: str | None = None, + numba_cache_key: tuple[Callable, str] | None = None, + numba_args: tuple[Any, ...] = (), + **kwargs, + ): + """ + Rolling statistical measure using supplied function. + + Designed to be used with passed-in Cython array-based functions. + + Parameters + ---------- + func : callable function to apply + name : str, + numba_cache_key : tuple + caching key to be used to store a compiled numba func + numba_args : tuple + args to be passed when func is a numba func + **kwargs + additional arguments for rolling function and window function + + Returns + ------- + y : type of input + """ + window_indexer = self._get_window_indexer() + min_periods = ( + self.min_periods + if self.min_periods is not None + else window_indexer.window_size + ) + + def homogeneous_func(values: np.ndarray): + # calculation function + + if values.size == 0: + return values.copy() + + def calc(x): + start, end = window_indexer.get_window_bounds( + num_values=len(x), + min_periods=min_periods, + center=self.center, + closed=self.closed, + ) + self._check_window_bounds(start, end, len(x)) + + return func(x, start, end, min_periods, *numba_args) + + with np.errstate(all="ignore"): + result = calc(values) + + if numba_cache_key is not None: + NUMBA_FUNC_CACHE[numba_cache_key] = func + + return result + + if self.method == "single": + return self._apply_blockwise(homogeneous_func, name) + else: + return self._apply_tablewise(homogeneous_func, name) + + def _numba_apply( + self, + func: Callable[..., Any], + numba_cache_key_str: str, + engine_kwargs: dict[str, bool] | None = None, + *func_args, + ): + window_indexer = self._get_window_indexer() + min_periods = ( + self.min_periods + if self.min_periods is not None + else window_indexer.window_size + ) + obj = self._create_data(self._selected_obj) + if self.axis == 1: + obj = obj.T + values = self._prep_values(obj.to_numpy()) + if values.ndim == 1: + values = values.reshape(-1, 1) + start, end = window_indexer.get_window_bounds( + num_values=len(values), + min_periods=min_periods, + center=self.center, + closed=self.closed, + ) + self._check_window_bounds(start, end, len(values)) + aggregator = executor.generate_shared_aggregator( + func, engine_kwargs, numba_cache_key_str + ) + result = aggregator(values, start, end, min_periods, *func_args) + NUMBA_FUNC_CACHE[(func, numba_cache_key_str)] = aggregator + result = result.T if self.axis == 1 else result + if obj.ndim == 1: + result = result.squeeze() + out = obj._constructor(result, index=obj.index, name=obj.name) + return out + else: + out = obj._constructor(result, index=obj.index, columns=obj.columns) + return self._resolve_output(out, obj) + + def aggregate(self, func, *args, **kwargs): + result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() + if result is None: + return self.apply(func, raw=False, args=args, kwargs=kwargs) + return result + + agg = aggregate + + +class BaseWindowGroupby(BaseWindow): + """ + Provide the groupby windowing facilities. + """ + + _grouper: BaseGrouper + _as_index: bool + _attributes: list[str] = ["_grouper"] + + def __init__( + self, + obj: DataFrame | Series, + *args, + _grouper: BaseGrouper, + _as_index: bool = True, + **kwargs, + ): + from pandas.core.groupby.ops import BaseGrouper + + if not isinstance(_grouper, BaseGrouper): + raise ValueError("Must pass a BaseGrouper object.") + self._grouper = _grouper + self._as_index = _as_index + # GH 32262: It's convention to keep the grouping column in + # groupby., but unexpected to users in + # groupby.rolling. + obj = obj.drop(columns=self._grouper.names, errors="ignore") + super().__init__(obj, *args, **kwargs) + + def _apply( + self, + func: Callable[..., Any], + name: str | None = None, + numba_cache_key: tuple[Callable, str] | None = None, + numba_args: tuple[Any, ...] = (), + **kwargs, + ) -> DataFrame | Series: + result = super()._apply( + func, + name, + numba_cache_key, + numba_args, + **kwargs, + ) + # Reconstruct the resulting MultiIndex + # 1st set of levels = group by labels + # 2nd set of levels = original DataFrame/Series index + grouped_object_index = self.obj.index + grouped_index_name = [*grouped_object_index.names] + groupby_keys = copy.copy(self._grouper.names) + result_index_names = groupby_keys + grouped_index_name + + drop_columns = [ + key + for key in self._grouper.names + if key not in self.obj.index.names or key is None + ] + + if len(drop_columns) != len(groupby_keys): + # Our result will have still kept the column in the result + result = result.drop(columns=drop_columns, errors="ignore") + + codes = self._grouper.codes + levels = copy.copy(self._grouper.levels) + + group_indices = self._grouper.indices.values() + if group_indices: + indexer = np.concatenate(list(group_indices)) + else: + indexer = np.array([], dtype=np.intp) + codes = [c.take(indexer) for c in codes] + + # if the index of the original dataframe needs to be preserved, append + # this index (but reordered) to the codes/levels from the groupby + if grouped_object_index is not None: + idx = grouped_object_index.take(indexer) + if not isinstance(idx, MultiIndex): + idx = MultiIndex.from_arrays([idx]) + codes.extend(list(idx.codes)) + levels.extend(list(idx.levels)) + + result_index = MultiIndex( + levels, codes, names=result_index_names, verify_integrity=False + ) + + result.index = result_index + if not self._as_index: + result = result.reset_index(level=list(range(len(groupby_keys)))) + return result + + def _apply_pairwise( + self, + target: DataFrame | Series, + other: DataFrame | Series | None, + pairwise: bool | None, + func: Callable[[DataFrame | Series, DataFrame | Series], DataFrame | Series], + ) -> DataFrame | Series: + """ + Apply the given pairwise function given 2 pandas objects (DataFrame/Series) + """ + # Manually drop the grouping column first + target = target.drop(columns=self._grouper.names, errors="ignore") + target = self._create_data(target) + result = super()._apply_pairwise(target, other, pairwise, func) + # 1) Determine the levels + codes of the groupby levels + if other is not None and not all( + len(group) == len(other) for group in self._grouper.indices.values() + ): + # GH 42915 + # len(other) != len(any group), so must reindex (expand) the result + # from flex_binary_moment to a "transform"-like result + # per groupby combination + old_result_len = len(result) + result = concat( + [ + result.take(gb_indices).reindex(result.index) + for gb_indices in self._grouper.indices.values() + ] + ) + + gb_pairs = ( + com.maybe_make_list(pair) for pair in self._grouper.indices.keys() + ) + groupby_codes = [] + groupby_levels = [] + # e.g. [[1, 2], [4, 5]] as [[1, 4], [2, 5]] + for gb_level_pair in map(list, zip(*gb_pairs)): + labels = np.repeat(np.array(gb_level_pair), old_result_len) + codes, levels = factorize(labels) + groupby_codes.append(codes) + groupby_levels.append(levels) + else: + # pairwise=True or len(other) == len(each group), so repeat + # the groupby labels by the number of columns in the original object + groupby_codes = self._grouper.codes + # error: Incompatible types in assignment (expression has type + # "List[Index]", variable has type "List[Union[ndarray, Index]]") + groupby_levels = self._grouper.levels # type: ignore[assignment] + + group_indices = self._grouper.indices.values() + if group_indices: + indexer = np.concatenate(list(group_indices)) + else: + indexer = np.array([], dtype=np.intp) + + if target.ndim == 1: + repeat_by = 1 + else: + repeat_by = len(target.columns) + groupby_codes = [ + np.repeat(c.take(indexer), repeat_by) for c in groupby_codes + ] + # 2) Determine the levels + codes of the result from super()._apply_pairwise + if isinstance(result.index, MultiIndex): + result_codes = list(result.index.codes) + result_levels = list(result.index.levels) + result_names = list(result.index.names) + else: + idx_codes, idx_levels = factorize(result.index) + result_codes = [idx_codes] + result_levels = [idx_levels] + result_names = [result.index.name] + + # 3) Create the resulting index by combining 1) + 2) + result_codes = groupby_codes + result_codes + result_levels = groupby_levels + result_levels + result_names = self._grouper.names + result_names + + result_index = MultiIndex( + result_levels, result_codes, names=result_names, verify_integrity=False + ) + result.index = result_index + return result + + def _create_data(self, obj: NDFrameT) -> NDFrameT: + """ + Split data into blocks & return conformed data. + """ + # Ensure the object we're rolling over is monotonically sorted relative + # to the groups + # GH 36197 + if not obj.empty: + groupby_order = np.concatenate(list(self._grouper.indices.values())).astype( + np.int64 + ) + obj = obj.take(groupby_order) + return super()._create_data(obj) + + def _gotitem(self, key, ndim, subset=None): + # we are setting the index on the actual object + # here so our index is carried through to the selected obj + # when we do the splitting for the groupby + if self.on is not None: + # GH 43355 + subset = self.obj.set_index(self._on) + return super()._gotitem(key, ndim, subset=subset) + + def _validate_monotonic(self): + """ + Validate that "on" is monotonic; already validated at a higher level. + """ + pass + + +class Window(BaseWindow): + """ + Provide rolling window calculations. + + Parameters + ---------- + window : int, offset, or BaseIndexer subclass + Size of the moving window. + + If an integer, the fixed number of observations used for + each window. + + If an offset, the time period of each window. Each + window will be a variable sized based on the observations included in + the time-period. This is only valid for datetimelike indexes. + To learn more about the offsets & frequency strings, please see `this link + `__. + + If a BaseIndexer subclass, the window boundaries + based on the defined ``get_window_bounds`` method. Additional rolling + keyword arguments, namely ``min_periods``, ``center``, and + ``closed`` will be passed to ``get_window_bounds``. + + min_periods : int, default None + Minimum number of observations in window required to have a value; + otherwise, result is ``np.nan``. + + For a window that is specified by an offset, ``min_periods`` will default to 1. + + For a window that is specified by an integer, ``min_periods`` will default + to the size of the window. + + center : bool, default False + If False, set the window labels as the right edge of the window index. + + If True, set the window labels as the center of the window index. + + win_type : str, default None + If ``None``, all points are evenly weighted. + + If a string, it must be a valid `scipy.signal window function + `__. + + Certain Scipy window types require additional parameters to be passed + in the aggregation function. The additional parameters must match + the keywords specified in the Scipy window type method signature. + + on : str, optional + For a DataFrame, a column label or Index level on which + to calculate the rolling window, rather than the DataFrame's index. + + Provided integer column is ignored and excluded from result since + an integer index is not used to calculate the rolling window. + + axis : int or str, default 0 + If ``0`` or ``'index'``, roll across the rows. + + If ``1`` or ``'columns'``, roll across the columns. + + closed : str, default None + If ``'right'``, the first point in the window is excluded from calculations. + + If ``'left'``, the last point in the window is excluded from calculations. + + If ``'both'``, the no points in the window are excluded from calculations. + + If ``'neither'``, the first and last points in the window are excluded + from calculations. + + Default ``None`` (``'right'``). + + .. versionchanged:: 1.2.0 + + The closed parameter with fixed windows is now supported. + + method : str {'single', 'table'}, default 'single' + + .. versionadded:: 1.3.0 + + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). + + This argument is only implemented when specifying ``engine='numba'`` + in the method call. + + Returns + ------- + ``Window`` subclass if a ``win_type`` is passed + + ``Rolling`` subclass if ``win_type`` is not passed + + See Also + -------- + expanding : Provides expanding transformations. + ewm : Provides exponential weighted functions. + + Notes + ----- + See :ref:`Windowing Operations ` for further usage details + and examples. + + Examples + -------- + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + **window** + + Rolling sum with a window length of 2 observations. + + >>> df.rolling(2).sum() + B + 0 NaN + 1 1.0 + 2 3.0 + 3 NaN + 4 NaN + + Rolling sum with a window span of 2 seconds. + + >>> df_time = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, + ... index = [pd.Timestamp('20130101 09:00:00'), + ... pd.Timestamp('20130101 09:00:02'), + ... pd.Timestamp('20130101 09:00:03'), + ... pd.Timestamp('20130101 09:00:05'), + ... pd.Timestamp('20130101 09:00:06')]) + + >>> df_time + B + 2013-01-01 09:00:00 0.0 + 2013-01-01 09:00:02 1.0 + 2013-01-01 09:00:03 2.0 + 2013-01-01 09:00:05 NaN + 2013-01-01 09:00:06 4.0 + + >>> df_time.rolling('2s').sum() + B + 2013-01-01 09:00:00 0.0 + 2013-01-01 09:00:02 1.0 + 2013-01-01 09:00:03 3.0 + 2013-01-01 09:00:05 NaN + 2013-01-01 09:00:06 4.0 + + Rolling sum with forward looking windows with 2 observations. + + >>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2) + >>> df.rolling(window=indexer, min_periods=1).sum() + B + 0 1.0 + 1 3.0 + 2 2.0 + 3 4.0 + 4 4.0 + + **min_periods** + + Rolling sum with a window length of 2 observations, but only needs a minimum of 1 + observation to calculate a value. + + >>> df.rolling(2, min_periods=1).sum() + B + 0 0.0 + 1 1.0 + 2 3.0 + 3 2.0 + 4 4.0 + + **center** + + Rolling sum with the result assigned to the center of the window index. + + >>> df.rolling(3, min_periods=1, center=True).sum() + B + 0 1.0 + 1 3.0 + 2 3.0 + 3 6.0 + 4 4.0 + + >>> df.rolling(3, min_periods=1, center=False).sum() + B + 0 0.0 + 1 1.0 + 2 3.0 + 3 3.0 + 4 6.0 + + **win_type** + + Rolling sum with a window length of 2, using the Scipy ``'gaussian'`` + window type. ``std`` is required in the aggregation function. + + >>> df.rolling(2, win_type='gaussian').sum(std=3) + B + 0 NaN + 1 0.986207 + 2 2.958621 + 3 NaN + 4 NaN + """ + + _attributes = [ + "window", + "min_periods", + "center", + "win_type", + "axis", + "on", + "closed", + "method", + ] + + def _validate(self): + super()._validate() + + if not isinstance(self.win_type, str): + raise ValueError(f"Invalid win_type {self.win_type}") + signal = import_optional_dependency( + "scipy.signal", extra="Scipy is required to generate window weight." + ) + self._scipy_weight_generator = getattr(signal, self.win_type, None) + if self._scipy_weight_generator is None: + raise ValueError(f"Invalid win_type {self.win_type}") + + if isinstance(self.window, BaseIndexer): + raise NotImplementedError( + "BaseIndexer subclasses not implemented with win_types." + ) + elif not is_integer(self.window) or self.window < 0: + raise ValueError("window must be an integer 0 or greater") + + if self.method != "single": + raise NotImplementedError("'single' is the only supported method type.") + + def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray: + """ + Center the result in the window for weighted rolling aggregations. + """ + if self.axis > result.ndim - 1: + raise ValueError("Requested axis is larger then no. of argument dimensions") + + if offset > 0: + lead_indexer = [slice(None)] * result.ndim + lead_indexer[self.axis] = slice(offset, None) + result = np.copy(result[tuple(lead_indexer)]) + return result + + def _apply( + self, + func: Callable[[np.ndarray, int, int], np.ndarray], + name: str | None = None, + numba_cache_key: tuple[Callable, str] | None = None, + numba_args: tuple[Any, ...] = (), + **kwargs, + ): + """ + Rolling with weights statistical measure using supplied function. + + Designed to be used with passed-in Cython array-based functions. + + Parameters + ---------- + func : callable function to apply + name : str, + use_numba_cache : tuple + unused + numba_args : tuple + unused + **kwargs + additional arguments for scipy windows if necessary + + Returns + ------- + y : type of input + """ + # "None" not callable [misc] + window = self._scipy_weight_generator( # type: ignore[misc] + self.window, **kwargs + ) + offset = (len(window) - 1) // 2 if self.center else 0 + + def homogeneous_func(values: np.ndarray): + # calculation function + + if values.size == 0: + return values.copy() + + def calc(x): + additional_nans = np.array([np.nan] * offset) + x = np.concatenate((x, additional_nans)) + return func(x, window, self.min_periods or len(window)) + + with np.errstate(all="ignore"): + # Our weighted aggregations return memoryviews + result = np.asarray(calc(values)) + + if self.center: + result = self._center_window(result, offset) + + return result + + return self._apply_blockwise(homogeneous_func, name) + + @doc( + _shared_docs["aggregate"], + see_also=dedent( + """ + See Also + -------- + pandas.DataFrame.aggregate : Similar DataFrame method. + pandas.Series.aggregate : Similar Series method. + """ + ), + examples=dedent( + """ + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> df.rolling(2, win_type="boxcar").agg("mean") + A B C + 0 NaN NaN NaN + 1 1.5 4.5 7.5 + 2 2.5 5.5 8.5 + """ + ), + klass="Series/DataFrame", + axis="", + ) + def aggregate(self, func, *args, **kwargs): + result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() + if result is None: + + # these must apply directly + result = func(self) + + return result + + agg = aggregate + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_scipy, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="weighted window sum", + agg_method="sum", + ) + def sum(self, *args, **kwargs): + nv.validate_window_func("sum", args, kwargs) + window_func = window_aggregations.roll_weighted_sum + # error: Argument 1 to "_apply" of "Window" has incompatible type + # "Callable[[ndarray, ndarray, int], ndarray]"; expected + # "Callable[[ndarray, int, int], ndarray]" + return self._apply(window_func, name="sum", **kwargs) # type: ignore[arg-type] + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_scipy, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="weighted window mean", + agg_method="mean", + ) + def mean(self, *args, **kwargs): + nv.validate_window_func("mean", args, kwargs) + window_func = window_aggregations.roll_weighted_mean + # error: Argument 1 to "_apply" of "Window" has incompatible type + # "Callable[[ndarray, ndarray, int], ndarray]"; expected + # "Callable[[ndarray, int, int], ndarray]" + return self._apply(window_func, name="mean", **kwargs) # type: ignore[arg-type] + + @doc( + template_header, + ".. versionadded:: 1.0.0 \n\n", + create_section_header("Parameters"), + kwargs_scipy, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="weighted window variance", + agg_method="var", + ) + def var(self, ddof: int = 1, *args, **kwargs): + nv.validate_window_func("var", args, kwargs) + window_func = partial(window_aggregations.roll_weighted_var, ddof=ddof) + kwargs.pop("name", None) + return self._apply(window_func, name="var", **kwargs) + + @doc( + template_header, + ".. versionadded:: 1.0.0 \n\n", + create_section_header("Parameters"), + kwargs_scipy, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="weighted window standard deviation", + agg_method="std", + ) + def std(self, ddof: int = 1, *args, **kwargs): + nv.validate_window_func("std", args, kwargs) + return zsqrt(self.var(ddof=ddof, name="std", **kwargs)) + + +class RollingAndExpandingMixin(BaseWindow): + def count(self): + window_func = window_aggregations.roll_sum + return self._apply(window_func, name="count") + + def apply( + self, + func: Callable[..., Any], + raw: bool = False, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + args: tuple[Any, ...] | None = None, + kwargs: dict[str, Any] | None = None, + ): + if args is None: + args = () + if kwargs is None: + kwargs = {} + + if not is_bool(raw): + raise ValueError("raw parameter must be `True` or `False`") + + numba_cache_key = None + numba_args: tuple[Any, ...] = () + if maybe_use_numba(engine): + if raw is False: + raise ValueError("raw must be `True` when using the numba engine") + caller_name = type(self).__name__ + numba_args = args + if self.method == "single": + apply_func = generate_numba_apply_func( + kwargs, func, engine_kwargs, caller_name + ) + numba_cache_key = (func, f"{caller_name}_apply_single") + else: + apply_func = generate_numba_table_func( + kwargs, func, engine_kwargs, f"{caller_name}_apply" + ) + numba_cache_key = (func, f"{caller_name}_apply_table") + elif engine in ("cython", None): + if engine_kwargs is not None: + raise ValueError("cython engine does not accept engine_kwargs") + apply_func = self._generate_cython_apply_func(args, kwargs, raw, func) + else: + raise ValueError("engine must be either 'numba' or 'cython'") + + return self._apply( + apply_func, + numba_cache_key=numba_cache_key, + numba_args=numba_args, + ) + + def _generate_cython_apply_func( + self, + args: tuple[Any, ...], + kwargs: dict[str, Any], + raw: bool, + function: Callable[..., Any], + ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, int], np.ndarray]: + from pandas import Series + + window_func = partial( + window_aggregations.roll_apply, + args=args, + kwargs=kwargs, + raw=raw, + function=function, + ) + + def apply_func(values, begin, end, min_periods, raw=raw): + if not raw: + # GH 45912 + values = Series(values, index=self._on) + return window_func(values, begin, end, min_periods) + + return apply_func + + def sum( + self, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_window_func("sum", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + func = generate_manual_numpy_nan_agg_with_axis(np.nansum) + return self.apply( + func, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + ) + else: + from pandas.core._numba.kernels import sliding_sum + + return self._numba_apply(sliding_sum, "rolling_sum", engine_kwargs) + window_func = window_aggregations.roll_sum + return self._apply(window_func, name="sum", **kwargs) + + def max( + self, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_window_func("max", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + func = generate_manual_numpy_nan_agg_with_axis(np.nanmax) + return self.apply( + func, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + ) + else: + from pandas.core._numba.kernels import sliding_min_max + + return self._numba_apply( + sliding_min_max, "rolling_max", engine_kwargs, True + ) + window_func = window_aggregations.roll_max + return self._apply(window_func, name="max", **kwargs) + + def min( + self, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_window_func("min", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + func = generate_manual_numpy_nan_agg_with_axis(np.nanmin) + return self.apply( + func, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + ) + else: + from pandas.core._numba.kernels import sliding_min_max + + return self._numba_apply( + sliding_min_max, "rolling_min", engine_kwargs, False + ) + window_func = window_aggregations.roll_min + return self._apply(window_func, name="min", **kwargs) + + def mean( + self, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_window_func("mean", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + func = generate_manual_numpy_nan_agg_with_axis(np.nanmean) + return self.apply( + func, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + ) + else: + from pandas.core._numba.kernels import sliding_mean + + return self._numba_apply(sliding_mean, "rolling_mean", engine_kwargs) + window_func = window_aggregations.roll_mean + return self._apply(window_func, name="mean", **kwargs) + + def median( + self, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + if maybe_use_numba(engine): + if self.method == "table": + func = generate_manual_numpy_nan_agg_with_axis(np.nanmedian) + else: + func = np.nanmedian + + return self.apply( + func, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + ) + window_func = window_aggregations.roll_median_c + return self._apply(window_func, name="median", **kwargs) + + def std( + self, + ddof: int = 1, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_window_func("std", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + raise NotImplementedError("std not supported with method='table'") + else: + from pandas.core._numba.kernels import sliding_var + + return zsqrt( + self._numba_apply(sliding_var, "rolling_std", engine_kwargs, ddof) + ) + window_func = window_aggregations.roll_var + + def zsqrt_func(values, begin, end, min_periods): + return zsqrt(window_func(values, begin, end, min_periods, ddof=ddof)) + + return self._apply( + zsqrt_func, + name="std", + **kwargs, + ) + + def var( + self, + ddof: int = 1, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_window_func("var", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + raise NotImplementedError("var not supported with method='table'") + else: + from pandas.core._numba.kernels import sliding_var + + return self._numba_apply( + sliding_var, "rolling_var", engine_kwargs, ddof + ) + window_func = partial(window_aggregations.roll_var, ddof=ddof) + return self._apply( + window_func, + name="var", + **kwargs, + ) + + def skew(self, **kwargs): + window_func = window_aggregations.roll_skew + return self._apply( + window_func, + name="skew", + **kwargs, + ) + + def sem(self, ddof: int = 1, *args, **kwargs): + return self.std(*args, **kwargs) / (self.count() - ddof).pow(0.5) + + def kurt(self, **kwargs): + window_func = window_aggregations.roll_kurt + return self._apply( + window_func, + name="kurt", + **kwargs, + ) + + def quantile(self, quantile: float, interpolation: str = "linear", **kwargs): + if quantile == 1.0: + window_func = window_aggregations.roll_max + elif quantile == 0.0: + window_func = window_aggregations.roll_min + else: + window_func = partial( + window_aggregations.roll_quantile, + quantile=quantile, + interpolation=interpolation, + ) + + return self._apply(window_func, name="quantile", **kwargs) + + def rank( + self, + method: WindowingRankType = "average", + ascending: bool = True, + pct: bool = False, + **kwargs, + ): + window_func = partial( + window_aggregations.roll_rank, + method=method, + ascending=ascending, + percentile=pct, + ) + + return self._apply(window_func, name="rank", **kwargs) + + def cov( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + ddof: int = 1, + **kwargs, + ): + from pandas import Series + + def cov_func(x, y): + x_array = self._prep_values(x) + y_array = self._prep_values(y) + window_indexer = self._get_window_indexer() + min_periods = ( + self.min_periods + if self.min_periods is not None + else window_indexer.window_size + ) + start, end = window_indexer.get_window_bounds( + num_values=len(x_array), + min_periods=min_periods, + center=self.center, + closed=self.closed, + ) + self._check_window_bounds(start, end, len(x_array)) + + with np.errstate(all="ignore"): + mean_x_y = window_aggregations.roll_mean( + x_array * y_array, start, end, min_periods + ) + mean_x = window_aggregations.roll_mean(x_array, start, end, min_periods) + mean_y = window_aggregations.roll_mean(y_array, start, end, min_periods) + count_x_y = window_aggregations.roll_sum( + notna(x_array + y_array).astype(np.float64), start, end, 0 + ) + result = (mean_x_y - mean_x * mean_y) * (count_x_y / (count_x_y - ddof)) + return Series(result, index=x.index, name=x.name) + + return self._apply_pairwise(self._selected_obj, other, pairwise, cov_func) + + def corr( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + ddof: int = 1, + **kwargs, + ): + + from pandas import Series + + def corr_func(x, y): + x_array = self._prep_values(x) + y_array = self._prep_values(y) + window_indexer = self._get_window_indexer() + min_periods = ( + self.min_periods + if self.min_periods is not None + else window_indexer.window_size + ) + start, end = window_indexer.get_window_bounds( + num_values=len(x_array), + min_periods=min_periods, + center=self.center, + closed=self.closed, + ) + self._check_window_bounds(start, end, len(x_array)) + + with np.errstate(all="ignore"): + mean_x_y = window_aggregations.roll_mean( + x_array * y_array, start, end, min_periods + ) + mean_x = window_aggregations.roll_mean(x_array, start, end, min_periods) + mean_y = window_aggregations.roll_mean(y_array, start, end, min_periods) + count_x_y = window_aggregations.roll_sum( + notna(x_array + y_array).astype(np.float64), start, end, 0 + ) + x_var = window_aggregations.roll_var( + x_array, start, end, min_periods, ddof + ) + y_var = window_aggregations.roll_var( + y_array, start, end, min_periods, ddof + ) + numerator = (mean_x_y - mean_x * mean_y) * ( + count_x_y / (count_x_y - ddof) + ) + denominator = (x_var * y_var) ** 0.5 + result = numerator / denominator + return Series(result, index=x.index, name=x.name) + + return self._apply_pairwise(self._selected_obj, other, pairwise, corr_func) + + +class Rolling(RollingAndExpandingMixin): + + _attributes: list[str] = [ + "window", + "min_periods", + "center", + "win_type", + "axis", + "on", + "closed", + "method", + ] + + def _validate(self): + super()._validate() + + # we allow rolling on a datetimelike index + if ( + self.obj.empty + or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex)) + ) and isinstance(self.window, (str, BaseOffset, timedelta)): + + self._validate_monotonic() + + # this will raise ValueError on non-fixed freqs + try: + freq = to_offset(self.window) + except (TypeError, ValueError) as err: + raise ValueError( + f"passed window {self.window} is not " + "compatible with a datetimelike index" + ) from err + if isinstance(self._on, PeriodIndex): + self._win_freq_i8 = freq.nanos / (self._on.freq.nanos / self._on.freq.n) + else: + self._win_freq_i8 = freq.nanos + + # min_periods must be an integer + if self.min_periods is None: + self.min_periods = 1 + + elif isinstance(self.window, BaseIndexer): + # Passed BaseIndexer subclass should handle all other rolling kwargs + return + elif not is_integer(self.window) or self.window < 0: + raise ValueError("window must be an integer 0 or greater") + + def _validate_monotonic(self): + """ + Validate monotonic (increasing or decreasing). + """ + if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing): + self._raise_monotonic_error() + + def _raise_monotonic_error(self): + formatted = self.on + if self.on is None: + formatted = "index" + raise ValueError(f"{formatted} must be monotonic") + + @doc( + _shared_docs["aggregate"], + see_also=dedent( + """ + See Also + -------- + pandas.Series.rolling : Calling object with Series data. + pandas.DataFrame.rolling : Calling object with DataFrame data. + """ + ), + examples=dedent( + """ + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> df.rolling(2).sum() + A B C + 0 NaN NaN NaN + 1 3.0 9.0 15.0 + 2 5.0 11.0 17.0 + + >>> df.rolling(2).agg({"A": "sum", "B": "min"}) + A B + 0 NaN NaN + 1 3.0 4.0 + 2 5.0 5.0 + """ + ), + klass="Series/Dataframe", + axis="", + ) + def aggregate(self, func, *args, **kwargs): + return super().aggregate(func, *args, **kwargs) + + agg = aggregate + + @doc( + template_header, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([2, 3, np.nan, 10]) + >>> s.rolling(2).count() + 0 1.0 + 1 2.0 + 2 1.0 + 3 1.0 + dtype: float64 + >>> s.rolling(3).count() + 0 1.0 + 1 2.0 + 2 2.0 + 3 2.0 + dtype: float64 + >>> s.rolling(4).count() + 0 1.0 + 1 2.0 + 2 2.0 + 3 3.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="count of non NaN observations", + agg_method="count", + ) + def count(self): + if self.min_periods is None: + warnings.warn( + ( + "min_periods=None will default to the size of window " + "consistent with other methods in a future version. " + "Specify min_periods=0 instead." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + self.min_periods = 0 + result = super().count() + self.min_periods = None + else: + result = super().count() + return result + + @doc( + template_header, + create_section_header("Parameters"), + window_apply_parameters, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="custom aggregation function", + agg_method="apply", + ) + def apply( + self, + func: Callable[..., Any], + raw: bool = False, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + args: tuple[Any, ...] | None = None, + kwargs: dict[str, Any] | None = None, + ): + return super().apply( + func, + raw=raw, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, + kwargs=kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([1, 2, 3, 4, 5]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + 4 5 + dtype: int64 + + >>> s.rolling(3).sum() + 0 NaN + 1 NaN + 2 6.0 + 3 9.0 + 4 12.0 + dtype: float64 + + >>> s.rolling(3, center=True).sum() + 0 NaN + 1 6.0 + 2 9.0 + 3 12.0 + 4 NaN + dtype: float64 + + For DataFrame, each sum is computed column-wise. + + >>> df = pd.DataFrame({{"A": s, "B": s ** 2}}) + >>> df + A B + 0 1 1 + 1 2 4 + 2 3 9 + 3 4 16 + 4 5 25 + + >>> df.rolling(3).sum() + A B + 0 NaN NaN + 1 NaN NaN + 2 6.0 14.0 + 3 9.0 29.0 + 4 12.0 50.0 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="sum", + agg_method="sum", + ) + def sum( + self, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_rolling_func("sum", args, kwargs) + return super().sum(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes[:-1], + window_method="rolling", + aggregation_description="maximum", + agg_method="max", + ) + def max( + self, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_rolling_func("max", args, kwargs) + return super().max(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + create_section_header("Examples"), + dedent( + """ + Performing a rolling minimum with a window size of 3. + + >>> s = pd.Series([4, 3, 5, 2, 6]) + >>> s.rolling(3).min() + 0 NaN + 1 NaN + 2 3.0 + 3 2.0 + 4 2.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="minimum", + agg_method="min", + ) + def min( + self, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_rolling_func("min", args, kwargs) + return super().min(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + create_section_header("Examples"), + dedent( + """ + The below examples will show rolling mean calculations with window sizes of + two and three, respectively. + + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.rolling(2).mean() + 0 NaN + 1 1.5 + 2 2.5 + 3 3.5 + dtype: float64 + + >>> s.rolling(3).mean() + 0 NaN + 1 NaN + 2 2.0 + 3 3.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="mean", + agg_method="mean", + ) + def mean( + self, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_rolling_func("mean", args, kwargs) + return super().mean(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + create_section_header("Examples"), + dedent( + """ + Compute the rolling median of a series with a window size of 3. + + >>> s = pd.Series([0, 1, 2, 3, 4]) + >>> s.rolling(3).median() + 0 NaN + 1 NaN + 2 1.0 + 3 2.0 + 4 3.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="median", + agg_method="median", + ) + def median( + self, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + return super().median(engine=engine, engine_kwargs=engine_kwargs, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + args_compat, + window_agg_numba_parameters("1.4"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "numpy.std : Equivalent method for NumPy array.\n", + template_see_also, + create_section_header("Notes"), + dedent( + """ + The default ``ddof`` of 1 used in :meth:`Series.std` is different + than the default ``ddof`` of 0 in :func:`numpy.std`. + + A minimum of one period is required for the rolling calculation. + + The implementation is susceptible to floating point imprecision as + shown in the example below.\n + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) + >>> s.rolling(3).std() + 0 NaN + 1 NaN + 2 5.773503e-01 + 3 1.000000e+00 + 4 1.000000e+00 + 5 1.154701e+00 + 6 2.580957e-08 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="standard deviation", + agg_method="std", + ) + def std( + self, + ddof: int = 1, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_rolling_func("std", args, kwargs) + return super().std( + ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + args_compat, + window_agg_numba_parameters("1.4"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "numpy.var : Equivalent method for NumPy array.\n", + template_see_also, + create_section_header("Notes"), + dedent( + """ + The default ``ddof`` of 1 used in :meth:`Series.var` is different + than the default ``ddof`` of 0 in :func:`numpy.var`. + + A minimum of one period is required for the rolling calculation. + + The implementation is susceptible to floating point imprecision as + shown in the example below.\n + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) + >>> s.rolling(3).var() + 0 NaN + 1 NaN + 2 3.333333e-01 + 3 1.000000e+00 + 4 1.000000e+00 + 5 1.333333e+00 + 6 6.661338e-16 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="variance", + agg_method="var", + ) + def var( + self, + ddof: int = 1, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_rolling_func("var", args, kwargs) + return super().var( + ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs + ) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "scipy.stats.skew : Third moment of a probability density.\n", + template_see_also, + create_section_header("Notes"), + "A minimum of three periods is required for the rolling calculation.\n", + window_method="rolling", + aggregation_description="unbiased skewness", + agg_method="skew", + ) + def skew(self, **kwargs): + return super().skew(**kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + "A minimum of one period is required for the calculation.\n\n", + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([0, 1, 2, 3]) + >>> s.rolling(2, min_periods=1).sem() + 0 NaN + 1 0.707107 + 2 0.707107 + 3 0.707107 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="standard error of mean", + agg_method="sem", + ) + def sem(self, ddof: int = 1, *args, **kwargs): + return self.std(*args, **kwargs) / (self.count() - ddof).pow(0.5) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "scipy.stats.kurtosis : Reference SciPy method.\n", + template_see_also, + create_section_header("Notes"), + "A minimum of four periods is required for the calculation.\n\n", + create_section_header("Examples"), + dedent( + """ + The example below will show a rolling calculation with a window size of + four matching the equivalent function call using `scipy.stats`. + + >>> arr = [1, 2, 3, 4, 999] + >>> import scipy.stats + >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}") + -1.200000 + >>> print(f"{{scipy.stats.kurtosis(arr[1:], bias=False):.6f}}") + 3.999946 + >>> s = pd.Series(arr) + >>> s.rolling(4).kurt() + 0 NaN + 1 NaN + 2 NaN + 3 -1.200000 + 4 3.999946 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="Fisher's definition of kurtosis without bias", + agg_method="kurt", + ) + def kurt(self, **kwargs): + return super().kurt(**kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + quantile : float + Quantile to compute. 0 <= quantile <= 1. + interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}} + This optional parameter specifies the interpolation method to use, + when the desired quantile lies between two data points `i` and `j`: + + * linear: `i + (j - i) * fraction`, where `fraction` is the + fractional part of the index surrounded by `i` and `j`. + * lower: `i`. + * higher: `j`. + * nearest: `i` or `j` whichever is nearest. + * midpoint: (`i` + `j`) / 2. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.rolling(2).quantile(.4, interpolation='lower') + 0 NaN + 1 1.0 + 2 2.0 + 3 3.0 + dtype: float64 + + >>> s.rolling(2).quantile(.4, interpolation='midpoint') + 0 NaN + 1 1.5 + 2 2.5 + 3 3.5 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="quantile", + agg_method="quantile", + ) + def quantile(self, quantile: float, interpolation: str = "linear", **kwargs): + return super().quantile( + quantile=quantile, + interpolation=interpolation, + **kwargs, + ) + + @doc( + template_header, + ".. versionadded:: 1.4.0 \n\n", + create_section_header("Parameters"), + dedent( + """ + method : {{'average', 'min', 'max'}}, default 'average' + How to rank the group of records that have the same value (i.e. ties): + + * average: average rank of the group + * min: lowest rank in the group + * max: highest rank in the group + + ascending : bool, default True + Whether or not the elements should be ranked in ascending order. + pct : bool, default False + Whether or not to display the returned rankings in percentile + form. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([1, 4, 2, 3, 5, 3]) + >>> s.rolling(3).rank() + 0 NaN + 1 NaN + 2 2.0 + 3 2.0 + 4 3.0 + 5 1.5 + dtype: float64 + + >>> s.rolling(3).rank(method="max") + 0 NaN + 1 NaN + 2 2.0 + 3 2.0 + 4 3.0 + 5 2.0 + dtype: float64 + + >>> s.rolling(3).rank(method="min") + 0 NaN + 1 NaN + 2 2.0 + 3 2.0 + 4 3.0 + 5 1.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="rank", + agg_method="rank", + ) + def rank( + self, + method: WindowingRankType = "average", + ascending: bool = True, + pct: bool = False, + **kwargs, + ): + return super().rank( + method=method, + ascending=ascending, + pct=pct, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series or DataFrame, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndexed DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="sample covariance", + agg_method="cov", + ) + def cov( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + ddof: int = 1, + **kwargs, + ): + return super().cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series or DataFrame, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndexed DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + dedent( + """ + cov : Similar method to calculate covariance. + numpy.corrcoef : NumPy Pearson's correlation calculation. + """ + ).replace("\n", "", 1), + template_see_also, + create_section_header("Notes"), + dedent( + """ + This function uses Pearson's definition of correlation + (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient). + + When `other` is not specified, the output will be self correlation (e.g. + all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise` + set to `True`. + + Function will return ``NaN`` for correlations of equal valued sequences; + this is the result of a 0/0 division error. + + When `pairwise` is set to `False`, only matching columns between `self` and + `other` will be used. + + When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame + with the original index on the first level, and the `other` DataFrame + columns on the second level. + + In the case of missing elements, only complete pairwise observations + will be used.\n + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + The below example shows a rolling calculation with a window size of + four matching the equivalent function call using :meth:`numpy.corrcoef`. + + >>> v1 = [3, 3, 3, 5, 8] + >>> v2 = [3, 4, 4, 4, 8] + >>> # numpy returns a 2X2 array, the correlation coefficient + >>> # is the number at entry [0][1] + >>> print(f"{{np.corrcoef(v1[:-1], v2[:-1])[0][1]:.6f}}") + 0.333333 + >>> print(f"{{np.corrcoef(v1[1:], v2[1:])[0][1]:.6f}}") + 0.916949 + >>> s1 = pd.Series(v1) + >>> s2 = pd.Series(v2) + >>> s1.rolling(4).corr(s2) + 0 NaN + 1 NaN + 2 NaN + 3 0.333333 + 4 0.916949 + dtype: float64 + + The below example shows a similar rolling calculation on a + DataFrame using the pairwise option. + + >>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\ + [46., 31.], [50., 36.]]) + >>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7)) + [[1. 0.6263001] + [0.6263001 1. ]] + >>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7)) + [[1. 0.5553681] + [0.5553681 1. ]] + >>> df = pd.DataFrame(matrix, columns=['X','Y']) + >>> df + X Y + 0 51.0 35.0 + 1 49.0 30.0 + 2 47.0 32.0 + 3 46.0 31.0 + 4 50.0 36.0 + >>> df.rolling(4).corr(pairwise=True) + X Y + 0 X NaN NaN + Y NaN NaN + 1 X NaN NaN + Y NaN NaN + 2 X NaN NaN + Y NaN NaN + 3 X 1.000000 0.626300 + Y 0.626300 1.000000 + 4 X 1.000000 0.555368 + Y 0.555368 1.000000 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="correlation", + agg_method="corr", + ) + def corr( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + ddof: int = 1, + **kwargs, + ): + return super().corr(other=other, pairwise=pairwise, ddof=ddof, **kwargs) + + +Rolling.__doc__ = Window.__doc__ + + +class RollingGroupby(BaseWindowGroupby, Rolling): + """ + Provide a rolling groupby implementation. + """ + + _attributes = Rolling._attributes + BaseWindowGroupby._attributes + + def _get_window_indexer(self) -> GroupbyIndexer: + """ + Return an indexer class that will compute the window start and end bounds + + Returns + ------- + GroupbyIndexer + """ + rolling_indexer: type[BaseIndexer] + indexer_kwargs: dict[str, Any] | None = None + index_array = self._index_array + if isinstance(self.window, BaseIndexer): + rolling_indexer = type(self.window) + indexer_kwargs = self.window.__dict__.copy() + assert isinstance(indexer_kwargs, dict) # for mypy + # We'll be using the index of each group later + indexer_kwargs.pop("index_array", None) + window = self.window + elif self._win_freq_i8 is not None: + rolling_indexer = VariableWindowIndexer + window = self._win_freq_i8 + else: + rolling_indexer = FixedWindowIndexer + window = self.window + window_indexer = GroupbyIndexer( + index_array=index_array, + window_size=window, + groupby_indices=self._grouper.indices, + window_indexer=rolling_indexer, + indexer_kwargs=indexer_kwargs, + ) + return window_indexer + + def _validate_monotonic(self): + """ + Validate that on is monotonic; + """ + if ( + not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing) + or self._on.hasnans + ): + self._raise_monotonic_error() diff --git a/.local/lib/python3.8/site-packages/pandas/errors/__init__.py b/.local/lib/python3.8/site-packages/pandas/errors/__init__.py new file mode 100644 index 0000000..cbe9467 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/errors/__init__.py @@ -0,0 +1,238 @@ +""" +Expose public exceptions & warnings +""" + +from pandas._config.config import OptionError # noqa:F401 + +from pandas._libs.tslibs import ( # noqa:F401 + OutOfBoundsDatetime, + OutOfBoundsTimedelta, +) + + +class IntCastingNaNError(ValueError): + """ + Raised when attempting an astype operation on an array with NaN to an integer + dtype. + """ + + pass + + +class NullFrequencyError(ValueError): + """ + Error raised when a null `freq` attribute is used in an operation + that needs a non-null frequency, particularly `DatetimeIndex.shift`, + `TimedeltaIndex.shift`, `PeriodIndex.shift`. + """ + + pass + + +class PerformanceWarning(Warning): + """ + Warning raised when there is a possible performance impact. + """ + + +class UnsupportedFunctionCall(ValueError): + """ + Exception raised when attempting to call a numpy function + on a pandas object, but that function is not supported by + the object e.g. ``np.cumsum(groupby_object)``. + """ + + +class UnsortedIndexError(KeyError): + """ + Error raised when attempting to get a slice of a MultiIndex, + and the index has not been lexsorted. Subclass of `KeyError`. + """ + + +class ParserError(ValueError): + """ + Exception that is raised by an error encountered in parsing file contents. + + This is a generic error raised for errors encountered when functions like + `read_csv` or `read_html` are parsing contents of a file. + + See Also + -------- + read_csv : Read CSV (comma-separated) file into a DataFrame. + read_html : Read HTML table into a DataFrame. + """ + + +class DtypeWarning(Warning): + """ + Warning raised when reading different dtypes in a column from a file. + + Raised for a dtype incompatibility. This can happen whenever `read_csv` + or `read_table` encounter non-uniform dtypes in a column(s) of a given + CSV file. + + See Also + -------- + read_csv : Read CSV (comma-separated) file into a DataFrame. + read_table : Read general delimited file into a DataFrame. + + Notes + ----- + This warning is issued when dealing with larger files because the dtype + checking happens per chunk read. + + Despite the warning, the CSV file is read with mixed types in a single + column which will be an object type. See the examples below to better + understand this issue. + + Examples + -------- + This example creates and reads a large CSV file with a column that contains + `int` and `str`. + + >>> df = pd.DataFrame({'a': (['1'] * 100000 + ['X'] * 100000 + + ... ['1'] * 100000), + ... 'b': ['b'] * 300000}) # doctest: +SKIP + >>> df.to_csv('test.csv', index=False) # doctest: +SKIP + >>> df2 = pd.read_csv('test.csv') # doctest: +SKIP + ... # DtypeWarning: Columns (0) have mixed types + + Important to notice that ``df2`` will contain both `str` and `int` for the + same input, '1'. + + >>> df2.iloc[262140, 0] # doctest: +SKIP + '1' + >>> type(df2.iloc[262140, 0]) # doctest: +SKIP + + >>> df2.iloc[262150, 0] # doctest: +SKIP + 1 + >>> type(df2.iloc[262150, 0]) # doctest: +SKIP + + + One way to solve this issue is using the `dtype` parameter in the + `read_csv` and `read_table` functions to explicit the conversion: + + >>> df2 = pd.read_csv('test.csv', sep=',', dtype={'a': str}) # doctest: +SKIP + + No warning was issued. + """ + + +class EmptyDataError(ValueError): + """ + Exception that is thrown in `pd.read_csv` (by both the C and + Python engines) when empty data or header is encountered. + """ + + +class ParserWarning(Warning): + """ + Warning raised when reading a file that doesn't use the default 'c' parser. + + Raised by `pd.read_csv` and `pd.read_table` when it is necessary to change + parsers, generally from the default 'c' parser to 'python'. + + It happens due to a lack of support or functionality for parsing a + particular attribute of a CSV file with the requested engine. + + Currently, 'c' unsupported options include the following parameters: + + 1. `sep` other than a single character (e.g. regex separators) + 2. `skipfooter` higher than 0 + 3. `sep=None` with `delim_whitespace=False` + + The warning can be avoided by adding `engine='python'` as a parameter in + `pd.read_csv` and `pd.read_table` methods. + + See Also + -------- + pd.read_csv : Read CSV (comma-separated) file into DataFrame. + pd.read_table : Read general delimited file into DataFrame. + + Examples + -------- + Using a `sep` in `pd.read_csv` other than a single character: + + >>> import io + >>> csv = '''a;b;c + ... 1;1,8 + ... 1;2,1''' + >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]') # doctest: +SKIP + ... # ParserWarning: Falling back to the 'python' engine... + + Adding `engine='python'` to `pd.read_csv` removes the Warning: + + >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]', engine='python') + """ + + +class MergeError(ValueError): + """ + Error raised when problems arise during merging due to problems + with input data. Subclass of `ValueError`. + """ + + +class AccessorRegistrationWarning(Warning): + """ + Warning for attribute conflicts in accessor registration. + """ + + +class AbstractMethodError(NotImplementedError): + """ + Raise this error instead of NotImplementedError for abstract methods + while keeping compatibility with Python 2 and Python 3. + """ + + def __init__(self, class_instance, methodtype="method"): + types = {"method", "classmethod", "staticmethod", "property"} + if methodtype not in types: + raise ValueError( + f"methodtype must be one of {methodtype}, got {types} instead." + ) + self.methodtype = methodtype + self.class_instance = class_instance + + def __str__(self) -> str: + if self.methodtype == "classmethod": + name = self.class_instance.__name__ + else: + name = type(self.class_instance).__name__ + return f"This {self.methodtype} must be defined in the concrete class {name}" + + +class NumbaUtilError(Exception): + """ + Error raised for unsupported Numba engine routines. + """ + + +class DuplicateLabelError(ValueError): + """ + Error raised when an operation would introduce duplicate labels. + + .. versionadded:: 1.2.0 + + Examples + -------- + >>> s = pd.Series([0, 1, 2], index=['a', 'b', 'c']).set_flags( + ... allows_duplicate_labels=False + ... ) + >>> s.reindex(['a', 'a', 'b']) + Traceback (most recent call last): + ... + DuplicateLabelError: Index has duplicates. + positions + label + a [0, 1] + """ + + +class InvalidIndexError(Exception): + """ + Exception raised when attempting to use an invalid index key. + + .. versionadded:: 1.1.0 + """ diff --git a/.local/lib/python3.8/site-packages/pandas/errors/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/errors/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..a7499e7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/errors/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__init__.py b/.local/lib/python3.8/site-packages/pandas/io/__init__.py new file mode 100644 index 0000000..bd3ddc0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/__init__.py @@ -0,0 +1,12 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + # import modules that have public classes/functions + from pandas.io import ( + formats, + json, + stata, + ) + + # and mark only those modules as public + __all__ = ["formats", "json", "stata"] diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..9ae90a6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/api.cpython-38.pyc new file mode 100644 index 0000000..434b536 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/clipboards.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/clipboards.cpython-38.pyc new file mode 100644 index 0000000..e50cf22 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/clipboards.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..ab39766 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/date_converters.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/date_converters.cpython-38.pyc new file mode 100644 index 0000000..3a3f9c4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/date_converters.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/feather_format.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/feather_format.cpython-38.pyc new file mode 100644 index 0000000..87963bd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/feather_format.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/gbq.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/gbq.cpython-38.pyc new file mode 100644 index 0000000..e685743 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/gbq.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/html.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/html.cpython-38.pyc new file mode 100644 index 0000000..b94cd94 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/html.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/orc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/orc.cpython-38.pyc new file mode 100644 index 0000000..02d46cc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/orc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/parquet.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/parquet.cpython-38.pyc new file mode 100644 index 0000000..a6f5e11 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/parquet.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/pickle.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/pickle.cpython-38.pyc new file mode 100644 index 0000000..35655a2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/pickle.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/pytables.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/pytables.cpython-38.pyc new file mode 100644 index 0000000..2798213 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/pytables.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/spss.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/spss.cpython-38.pyc new file mode 100644 index 0000000..e297f1b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/spss.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/sql.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/sql.cpython-38.pyc new file mode 100644 index 0000000..9857ae1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/sql.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/stata.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/stata.cpython-38.pyc new file mode 100644 index 0000000..c12cde0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/stata.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/__pycache__/xml.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/xml.cpython-38.pyc new file mode 100644 index 0000000..85e6c2e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/__pycache__/xml.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/api.py b/.local/lib/python3.8/site-packages/pandas/io/api.py new file mode 100644 index 0000000..5926f21 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/api.py @@ -0,0 +1,40 @@ +""" +Data IO api +""" + +# flake8: noqa + +from pandas.io.clipboards import read_clipboard +from pandas.io.excel import ( + ExcelFile, + ExcelWriter, + read_excel, +) +from pandas.io.feather_format import read_feather +from pandas.io.gbq import read_gbq +from pandas.io.html import read_html +from pandas.io.json import read_json +from pandas.io.orc import read_orc +from pandas.io.parquet import read_parquet +from pandas.io.parsers import ( + read_csv, + read_fwf, + read_table, +) +from pandas.io.pickle import ( + read_pickle, + to_pickle, +) +from pandas.io.pytables import ( + HDFStore, + read_hdf, +) +from pandas.io.sas import read_sas +from pandas.io.spss import read_spss +from pandas.io.sql import ( + read_sql, + read_sql_query, + read_sql_table, +) +from pandas.io.stata import read_stata +from pandas.io.xml import read_xml diff --git a/.local/lib/python3.8/site-packages/pandas/io/clipboard/__init__.py b/.local/lib/python3.8/site-packages/pandas/io/clipboard/__init__.py new file mode 100644 index 0000000..94cda74 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/clipboard/__init__.py @@ -0,0 +1,676 @@ +""" +Pyperclip + +A cross-platform clipboard module for Python, +with copy & paste functions for plain text. +By Al Sweigart al@inventwithpython.com +BSD License + +Usage: + import pyperclip + pyperclip.copy('The text to be copied to the clipboard.') + spam = pyperclip.paste() + + if not pyperclip.is_available(): + print("Copy functionality unavailable!") + +On Windows, no additional modules are needed. +On Mac, the pyobjc module is used, falling back to the pbcopy and pbpaste cli + commands. (These commands should come with OS X.). +On Linux, install xclip or xsel via package manager. For example, in Debian: + sudo apt-get install xclip + sudo apt-get install xsel + +Otherwise on Linux, you will need the PyQt5 modules installed. + +This module does not work with PyGObject yet. + +Cygwin is currently not supported. + +Security Note: This module runs programs with these names: + - which + - where + - pbcopy + - pbpaste + - xclip + - xsel + - klipper + - qdbus +A malicious user could rename or add programs with these names, tricking +Pyperclip into running them with whatever permissions the Python process has. + +""" +__version__ = "1.7.0" + +import contextlib +import ctypes +from ctypes import ( + c_size_t, + c_wchar, + c_wchar_p, + get_errno, + sizeof, +) +import os +import platform +from shutil import which +import subprocess +import time +import warnings + +# `import PyQt4` sys.exit()s if DISPLAY is not in the environment. +# Thus, we need to detect the presence of $DISPLAY manually +# and not load PyQt4 if it is absent. +HAS_DISPLAY = os.getenv("DISPLAY", False) + +EXCEPT_MSG = """ + Pyperclip could not find a copy/paste mechanism for your system. + For more information, please visit + https://pyperclip.readthedocs.io/en/latest/#not-implemented-error + """ + +ENCODING = "utf-8" + +# The "which" unix command finds where a command is. +if platform.system() == "Windows": + WHICH_CMD = "where" +else: + WHICH_CMD = "which" + + +def _executable_exists(name): + return ( + subprocess.call( + [WHICH_CMD, name], stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + == 0 + ) + + +# Exceptions +class PyperclipException(RuntimeError): + pass + + +class PyperclipWindowsException(PyperclipException): + def __init__(self, message): + message += f" ({ctypes.WinError()})" + super().__init__(message) + + +def _stringifyText(text) -> str: + acceptedTypes = (str, int, float, bool) + if not isinstance(text, acceptedTypes): + raise PyperclipException( + f"only str, int, float, and bool values " + f"can be copied to the clipboard, not {type(text).__name__}" + ) + return str(text) + + +def init_osx_pbcopy_clipboard(): + def copy_osx_pbcopy(text): + text = _stringifyText(text) # Converts non-str values to str. + with subprocess.Popen( + ["pbcopy", "w"], stdin=subprocess.PIPE, close_fds=True + ) as p: + p.communicate(input=text.encode(ENCODING)) + + def paste_osx_pbcopy(): + with subprocess.Popen( + ["pbpaste", "r"], stdout=subprocess.PIPE, close_fds=True + ) as p: + stdout = p.communicate()[0] + return stdout.decode(ENCODING) + + return copy_osx_pbcopy, paste_osx_pbcopy + + +def init_osx_pyobjc_clipboard(): + def copy_osx_pyobjc(text): + """Copy string argument to clipboard""" + text = _stringifyText(text) # Converts non-str values to str. + newStr = Foundation.NSString.stringWithString_(text).nsstring() + newData = newStr.dataUsingEncoding_(Foundation.NSUTF8StringEncoding) + board = AppKit.NSPasteboard.generalPasteboard() + board.declareTypes_owner_([AppKit.NSStringPboardType], None) + board.setData_forType_(newData, AppKit.NSStringPboardType) + + def paste_osx_pyobjc(): + """Returns contents of clipboard""" + board = AppKit.NSPasteboard.generalPasteboard() + content = board.stringForType_(AppKit.NSStringPboardType) + return content + + return copy_osx_pyobjc, paste_osx_pyobjc + + +def init_qt_clipboard(): + global QApplication + # $DISPLAY should exist + + # Try to import from qtpy, but if that fails try PyQt5 then PyQt4 + try: + from qtpy.QtWidgets import QApplication + except ImportError: + try: + from PyQt5.QtWidgets import QApplication + except ImportError: + from PyQt4.QtGui import QApplication + + app = QApplication.instance() + if app is None: + app = QApplication([]) + + def copy_qt(text): + text = _stringifyText(text) # Converts non-str values to str. + cb = app.clipboard() + cb.setText(text) + + def paste_qt() -> str: + cb = app.clipboard() + return str(cb.text()) + + return copy_qt, paste_qt + + +def init_xclip_clipboard(): + DEFAULT_SELECTION = "c" + PRIMARY_SELECTION = "p" + + def copy_xclip(text, primary=False): + text = _stringifyText(text) # Converts non-str values to str. + selection = DEFAULT_SELECTION + if primary: + selection = PRIMARY_SELECTION + with subprocess.Popen( + ["xclip", "-selection", selection], stdin=subprocess.PIPE, close_fds=True + ) as p: + p.communicate(input=text.encode(ENCODING)) + + def paste_xclip(primary=False): + selection = DEFAULT_SELECTION + if primary: + selection = PRIMARY_SELECTION + with subprocess.Popen( + ["xclip", "-selection", selection, "-o"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) as p: + stdout = p.communicate()[0] + # Intentionally ignore extraneous output on stderr when clipboard is empty + return stdout.decode(ENCODING) + + return copy_xclip, paste_xclip + + +def init_xsel_clipboard(): + DEFAULT_SELECTION = "-b" + PRIMARY_SELECTION = "-p" + + def copy_xsel(text, primary=False): + text = _stringifyText(text) # Converts non-str values to str. + selection_flag = DEFAULT_SELECTION + if primary: + selection_flag = PRIMARY_SELECTION + with subprocess.Popen( + ["xsel", selection_flag, "-i"], stdin=subprocess.PIPE, close_fds=True + ) as p: + p.communicate(input=text.encode(ENCODING)) + + def paste_xsel(primary=False): + selection_flag = DEFAULT_SELECTION + if primary: + selection_flag = PRIMARY_SELECTION + with subprocess.Popen( + ["xsel", selection_flag, "-o"], stdout=subprocess.PIPE, close_fds=True + ) as p: + stdout = p.communicate()[0] + return stdout.decode(ENCODING) + + return copy_xsel, paste_xsel + + +def init_klipper_clipboard(): + def copy_klipper(text): + text = _stringifyText(text) # Converts non-str values to str. + with subprocess.Popen( + [ + "qdbus", + "org.kde.klipper", + "/klipper", + "setClipboardContents", + text.encode(ENCODING), + ], + stdin=subprocess.PIPE, + close_fds=True, + ) as p: + p.communicate(input=None) + + def paste_klipper(): + with subprocess.Popen( + ["qdbus", "org.kde.klipper", "/klipper", "getClipboardContents"], + stdout=subprocess.PIPE, + close_fds=True, + ) as p: + stdout = p.communicate()[0] + + # Workaround for https://bugs.kde.org/show_bug.cgi?id=342874 + # TODO: https://github.com/asweigart/pyperclip/issues/43 + clipboardContents = stdout.decode(ENCODING) + # even if blank, Klipper will append a newline at the end + assert len(clipboardContents) > 0 + # make sure that newline is there + assert clipboardContents.endswith("\n") + if clipboardContents.endswith("\n"): + clipboardContents = clipboardContents[:-1] + return clipboardContents + + return copy_klipper, paste_klipper + + +def init_dev_clipboard_clipboard(): + def copy_dev_clipboard(text): + text = _stringifyText(text) # Converts non-str values to str. + if text == "": + warnings.warn( + "Pyperclip cannot copy a blank string to the clipboard on Cygwin. " + "This is effectively a no-op." + ) + if "\r" in text: + warnings.warn("Pyperclip cannot handle \\r characters on Cygwin.") + + with open("/dev/clipboard", "wt") as fd: + fd.write(text) + + def paste_dev_clipboard() -> str: + with open("/dev/clipboard") as fd: + content = fd.read() + return content + + return copy_dev_clipboard, paste_dev_clipboard + + +def init_no_clipboard(): + class ClipboardUnavailable: + def __call__(self, *args, **kwargs): + raise PyperclipException(EXCEPT_MSG) + + def __bool__(self) -> bool: + return False + + return ClipboardUnavailable(), ClipboardUnavailable() + + +# Windows-related clipboard functions: +class CheckedCall: + def __init__(self, f): + super().__setattr__("f", f) + + def __call__(self, *args): + ret = self.f(*args) + if not ret and get_errno(): + raise PyperclipWindowsException("Error calling " + self.f.__name__) + return ret + + def __setattr__(self, key, value): + setattr(self.f, key, value) + + +def init_windows_clipboard(): + global HGLOBAL, LPVOID, DWORD, LPCSTR, INT + global HWND, HINSTANCE, HMENU, BOOL, UINT, HANDLE + from ctypes.wintypes import ( + BOOL, + DWORD, + HANDLE, + HGLOBAL, + HINSTANCE, + HMENU, + HWND, + INT, + LPCSTR, + LPVOID, + UINT, + ) + + windll = ctypes.windll + msvcrt = ctypes.CDLL("msvcrt") + + safeCreateWindowExA = CheckedCall(windll.user32.CreateWindowExA) + safeCreateWindowExA.argtypes = [ + DWORD, + LPCSTR, + LPCSTR, + DWORD, + INT, + INT, + INT, + INT, + HWND, + HMENU, + HINSTANCE, + LPVOID, + ] + safeCreateWindowExA.restype = HWND + + safeDestroyWindow = CheckedCall(windll.user32.DestroyWindow) + safeDestroyWindow.argtypes = [HWND] + safeDestroyWindow.restype = BOOL + + OpenClipboard = windll.user32.OpenClipboard + OpenClipboard.argtypes = [HWND] + OpenClipboard.restype = BOOL + + safeCloseClipboard = CheckedCall(windll.user32.CloseClipboard) + safeCloseClipboard.argtypes = [] + safeCloseClipboard.restype = BOOL + + safeEmptyClipboard = CheckedCall(windll.user32.EmptyClipboard) + safeEmptyClipboard.argtypes = [] + safeEmptyClipboard.restype = BOOL + + safeGetClipboardData = CheckedCall(windll.user32.GetClipboardData) + safeGetClipboardData.argtypes = [UINT] + safeGetClipboardData.restype = HANDLE + + safeSetClipboardData = CheckedCall(windll.user32.SetClipboardData) + safeSetClipboardData.argtypes = [UINT, HANDLE] + safeSetClipboardData.restype = HANDLE + + safeGlobalAlloc = CheckedCall(windll.kernel32.GlobalAlloc) + safeGlobalAlloc.argtypes = [UINT, c_size_t] + safeGlobalAlloc.restype = HGLOBAL + + safeGlobalLock = CheckedCall(windll.kernel32.GlobalLock) + safeGlobalLock.argtypes = [HGLOBAL] + safeGlobalLock.restype = LPVOID + + safeGlobalUnlock = CheckedCall(windll.kernel32.GlobalUnlock) + safeGlobalUnlock.argtypes = [HGLOBAL] + safeGlobalUnlock.restype = BOOL + + wcslen = CheckedCall(msvcrt.wcslen) + wcslen.argtypes = [c_wchar_p] + wcslen.restype = UINT + + GMEM_MOVEABLE = 0x0002 + CF_UNICODETEXT = 13 + + @contextlib.contextmanager + def window(): + """ + Context that provides a valid Windows hwnd. + """ + # we really just need the hwnd, so setting "STATIC" + # as predefined lpClass is just fine. + hwnd = safeCreateWindowExA( + 0, b"STATIC", None, 0, 0, 0, 0, 0, None, None, None, None + ) + try: + yield hwnd + finally: + safeDestroyWindow(hwnd) + + @contextlib.contextmanager + def clipboard(hwnd): + """ + Context manager that opens the clipboard and prevents + other applications from modifying the clipboard content. + """ + # We may not get the clipboard handle immediately because + # some other application is accessing it (?) + # We try for at least 500ms to get the clipboard. + t = time.time() + 0.5 + success = False + while time.time() < t: + success = OpenClipboard(hwnd) + if success: + break + time.sleep(0.01) + if not success: + raise PyperclipWindowsException("Error calling OpenClipboard") + + try: + yield + finally: + safeCloseClipboard() + + def copy_windows(text): + # This function is heavily based on + # http://msdn.com/ms649016#_win32_Copying_Information_to_the_Clipboard + + text = _stringifyText(text) # Converts non-str values to str. + + with window() as hwnd: + # http://msdn.com/ms649048 + # If an application calls OpenClipboard with hwnd set to NULL, + # EmptyClipboard sets the clipboard owner to NULL; + # this causes SetClipboardData to fail. + # => We need a valid hwnd to copy something. + with clipboard(hwnd): + safeEmptyClipboard() + + if text: + # http://msdn.com/ms649051 + # If the hMem parameter identifies a memory object, + # the object must have been allocated using the + # function with the GMEM_MOVEABLE flag. + count = wcslen(text) + 1 + handle = safeGlobalAlloc(GMEM_MOVEABLE, count * sizeof(c_wchar)) + locked_handle = safeGlobalLock(handle) + + ctypes.memmove( + c_wchar_p(locked_handle), + c_wchar_p(text), + count * sizeof(c_wchar), + ) + + safeGlobalUnlock(handle) + safeSetClipboardData(CF_UNICODETEXT, handle) + + def paste_windows(): + with clipboard(None): + handle = safeGetClipboardData(CF_UNICODETEXT) + if not handle: + # GetClipboardData may return NULL with errno == NO_ERROR + # if the clipboard is empty. + # (Also, it may return a handle to an empty buffer, + # but technically that's not empty) + return "" + return c_wchar_p(handle).value + + return copy_windows, paste_windows + + +def init_wsl_clipboard(): + def copy_wsl(text): + text = _stringifyText(text) # Converts non-str values to str. + with subprocess.Popen(["clip.exe"], stdin=subprocess.PIPE, close_fds=True) as p: + p.communicate(input=text.encode(ENCODING)) + + def paste_wsl(): + with subprocess.Popen( + ["powershell.exe", "-command", "Get-Clipboard"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) as p: + stdout = p.communicate()[0] + # WSL appends "\r\n" to the contents. + return stdout[:-2].decode(ENCODING) + + return copy_wsl, paste_wsl + + +# Automatic detection of clipboard mechanisms +# and importing is done in determine_clipboard(): +def determine_clipboard(): + """ + Determine the OS/platform and set the copy() and paste() functions + accordingly. + """ + global Foundation, AppKit, qtpy, PyQt4, PyQt5 + + # Setup for the CYGWIN platform: + if ( + "cygwin" in platform.system().lower() + ): # Cygwin has a variety of values returned by platform.system(), + # such as 'CYGWIN_NT-6.1' + # FIXME(pyperclip#55): pyperclip currently does not support Cygwin, + # see https://github.com/asweigart/pyperclip/issues/55 + if os.path.exists("/dev/clipboard"): + warnings.warn( + "Pyperclip's support for Cygwin is not perfect, " + "see https://github.com/asweigart/pyperclip/issues/55" + ) + return init_dev_clipboard_clipboard() + + # Setup for the WINDOWS platform: + elif os.name == "nt" or platform.system() == "Windows": + return init_windows_clipboard() + + if platform.system() == "Linux": + if which("wslconfig.exe"): + return init_wsl_clipboard() + + # Setup for the MAC OS X platform: + if os.name == "mac" or platform.system() == "Darwin": + try: + import AppKit + import Foundation # check if pyobjc is installed + except ImportError: + return init_osx_pbcopy_clipboard() + else: + return init_osx_pyobjc_clipboard() + + # Setup for the LINUX platform: + if HAS_DISPLAY: + if _executable_exists("xsel"): + return init_xsel_clipboard() + if _executable_exists("xclip"): + return init_xclip_clipboard() + if _executable_exists("klipper") and _executable_exists("qdbus"): + return init_klipper_clipboard() + + try: + # qtpy is a small abstraction layer that lets you write applications + # using a single api call to either PyQt or PySide. + # https://pypi.python.org/project/QtPy + import qtpy # check if qtpy is installed + except ImportError: + # If qtpy isn't installed, fall back on importing PyQt4. + try: + import PyQt5 # check if PyQt5 is installed + except ImportError: + try: + import PyQt4 # check if PyQt4 is installed + except ImportError: + pass # We want to fail fast for all non-ImportError exceptions. + else: + return init_qt_clipboard() + else: + return init_qt_clipboard() + else: + return init_qt_clipboard() + + return init_no_clipboard() + + +def set_clipboard(clipboard): + """ + Explicitly sets the clipboard mechanism. The "clipboard mechanism" is how + the copy() and paste() functions interact with the operating system to + implement the copy/paste feature. The clipboard parameter must be one of: + - pbcopy + - pbobjc (default on Mac OS X) + - qt + - xclip + - xsel + - klipper + - windows (default on Windows) + - no (this is what is set when no clipboard mechanism can be found) + """ + global copy, paste + + clipboard_types = { + "pbcopy": init_osx_pbcopy_clipboard, + "pyobjc": init_osx_pyobjc_clipboard, + "qt": init_qt_clipboard, # TODO - split this into 'qtpy', 'pyqt4', and 'pyqt5' + "xclip": init_xclip_clipboard, + "xsel": init_xsel_clipboard, + "klipper": init_klipper_clipboard, + "windows": init_windows_clipboard, + "no": init_no_clipboard, + } + + if clipboard not in clipboard_types: + allowed_clipboard_types = [repr(_) for _ in clipboard_types.keys()] + raise ValueError( + f"Argument must be one of {', '.join(allowed_clipboard_types)}" + ) + + # Sets pyperclip's copy() and paste() functions: + copy, paste = clipboard_types[clipboard]() + + +def lazy_load_stub_copy(text): + """ + A stub function for copy(), which will load the real copy() function when + called so that the real copy() function is used for later calls. + + This allows users to import pyperclip without having determine_clipboard() + automatically run, which will automatically select a clipboard mechanism. + This could be a problem if it selects, say, the memory-heavy PyQt4 module + but the user was just going to immediately call set_clipboard() to use a + different clipboard mechanism. + + The lazy loading this stub function implements gives the user a chance to + call set_clipboard() to pick another clipboard mechanism. Or, if the user + simply calls copy() or paste() without calling set_clipboard() first, + will fall back on whatever clipboard mechanism that determine_clipboard() + automatically chooses. + """ + global copy, paste + copy, paste = determine_clipboard() + return copy(text) + + +def lazy_load_stub_paste(): + """ + A stub function for paste(), which will load the real paste() function when + called so that the real paste() function is used for later calls. + + This allows users to import pyperclip without having determine_clipboard() + automatically run, which will automatically select a clipboard mechanism. + This could be a problem if it selects, say, the memory-heavy PyQt4 module + but the user was just going to immediately call set_clipboard() to use a + different clipboard mechanism. + + The lazy loading this stub function implements gives the user a chance to + call set_clipboard() to pick another clipboard mechanism. Or, if the user + simply calls copy() or paste() without calling set_clipboard() first, + will fall back on whatever clipboard mechanism that determine_clipboard() + automatically chooses. + """ + global copy, paste + copy, paste = determine_clipboard() + return paste() + + +def is_available() -> bool: + return copy != lazy_load_stub_copy and paste != lazy_load_stub_paste + + +# Initially, copy() and paste() are set to lazy loading wrappers which will +# set `copy` and `paste` to real functions the first time they're used, unless +# set_clipboard() or determine_clipboard() is called first. +copy, paste = lazy_load_stub_copy, lazy_load_stub_paste + + +__all__ = ["copy", "paste", "set_clipboard", "determine_clipboard"] + +# pandas aliases +clipboard_get = paste +clipboard_set = copy diff --git a/.local/lib/python3.8/site-packages/pandas/io/clipboard/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/clipboard/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..4f4e8ea Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/clipboard/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/clipboards.py b/.local/lib/python3.8/site-packages/pandas/io/clipboards.py new file mode 100644 index 0000000..0968f1f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/clipboards.py @@ -0,0 +1,149 @@ +""" io on the clipboard """ +from __future__ import annotations + +from io import StringIO +import warnings + +from pandas.core.dtypes.generic import ABCDataFrame + +from pandas import ( + get_option, + option_context, +) + + +def read_clipboard(sep: str = r"\s+", **kwargs): # pragma: no cover + r""" + Read text from clipboard and pass to read_csv. + + Parameters + ---------- + sep : str, default '\s+' + A string or regex delimiter. The default of '\s+' denotes + one or more whitespace characters. + + **kwargs + See read_csv for the full argument list. + + Returns + ------- + DataFrame + A parsed DataFrame object. + """ + encoding = kwargs.pop("encoding", "utf-8") + + # only utf-8 is valid for passed value because that's what clipboard + # supports + if encoding is not None and encoding.lower().replace("-", "") != "utf8": + raise NotImplementedError("reading from clipboard only supports utf-8 encoding") + + from pandas.io.clipboard import clipboard_get + from pandas.io.parsers import read_csv + + text = clipboard_get() + + # Try to decode (if needed, as "text" might already be a string here). + try: + text = text.decode(kwargs.get("encoding") or get_option("display.encoding")) + except AttributeError: + pass + + # Excel copies into clipboard with \t separation + # inspect no more then the 10 first lines, if they + # all contain an equal number (>0) of tabs, infer + # that this came from excel and set 'sep' accordingly + lines = text[:10000].split("\n")[:-1][:10] + + # Need to remove leading white space, since read_csv + # accepts: + # a b + # 0 1 2 + # 1 3 4 + + counts = {x.lstrip(" ").count("\t") for x in lines} + if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0: + sep = "\t" + # check the number of leading tabs in the first line + # to account for index columns + index_length = len(lines[0]) - len(lines[0].lstrip(" \t")) + if index_length != 0: + kwargs.setdefault("index_col", list(range(index_length))) + + # Edge case where sep is specified to be None, return to default + if sep is None and kwargs.get("delim_whitespace") is None: + sep = r"\s+" + + # Regex separator currently only works with python engine. + # Default to python if separator is multi-character (regex) + if len(sep) > 1 and kwargs.get("engine") is None: + kwargs["engine"] = "python" + elif len(sep) > 1 and kwargs.get("engine") == "c": + warnings.warn( + "read_clipboard with regex separator does not work properly with c engine." + ) + + return read_csv(StringIO(text), sep=sep, **kwargs) + + +def to_clipboard( + obj, excel: bool | None = True, sep: str | None = None, **kwargs +) -> None: # pragma: no cover + """ + Attempt to write text representation of object to the system clipboard + The clipboard can be then pasted into Excel for example. + + Parameters + ---------- + obj : the object to write to the clipboard + excel : bool, defaults to True + if True, use the provided separator, writing in a csv + format for allowing easy pasting into excel. + if False, write a string representation of the object + to the clipboard + sep : optional, defaults to tab + other keywords are passed to to_csv + + Notes + ----- + Requirements for your platform + - Linux: xclip, or xsel (with PyQt4 modules) + - Windows: + - OS X: + """ + encoding = kwargs.pop("encoding", "utf-8") + + # testing if an invalid encoding is passed to clipboard + if encoding is not None and encoding.lower().replace("-", "") != "utf8": + raise ValueError("clipboard only supports utf-8 encoding") + + from pandas.io.clipboard import clipboard_set + + if excel is None: + excel = True + + if excel: + try: + if sep is None: + sep = "\t" + buf = StringIO() + + # clipboard_set (pyperclip) expects unicode + obj.to_csv(buf, sep=sep, encoding="utf-8", **kwargs) + text = buf.getvalue() + + clipboard_set(text) + return + except TypeError: + warnings.warn( + "to_clipboard in excel mode requires a single character separator." + ) + elif sep is not None: + warnings.warn("to_clipboard with excel=False ignores the sep argument.") + + if isinstance(obj, ABCDataFrame): + # str(df) has various unhelpful defaults, like truncation + with option_context("display.max_colwidth", None): + objstr = obj.to_string(**kwargs) + else: + objstr = str(obj) + clipboard_set(objstr) diff --git a/.local/lib/python3.8/site-packages/pandas/io/common.py b/.local/lib/python3.8/site-packages/pandas/io/common.py new file mode 100644 index 0000000..eaf6f64 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/common.py @@ -0,0 +1,1148 @@ +"""Common IO api utilities""" +from __future__ import annotations + +import bz2 +import codecs +from collections import abc +import dataclasses +import functools +import gzip +from io import ( + BufferedIOBase, + BytesIO, + RawIOBase, + StringIO, + TextIOBase, + TextIOWrapper, +) +import mmap +import os +from pathlib import Path +import re +from typing import ( + IO, + Any, + AnyStr, + Generic, + Literal, + Mapping, + TypeVar, + cast, + overload, +) +from urllib.parse import ( + urljoin, + urlparse as parse_url, + uses_netloc, + uses_params, + uses_relative, +) +import warnings +import zipfile + +from pandas._typing import ( + BaseBuffer, + CompressionDict, + CompressionOptions, + FilePath, + ReadBuffer, + StorageOptions, + WriteBuffer, +) +from pandas.compat import get_lzma_file +from pandas.compat._optional import import_optional_dependency +from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import is_file_like + +from pandas.core.shared_docs import _shared_docs + +_VALID_URLS = set(uses_relative + uses_netloc + uses_params) +_VALID_URLS.discard("") +_RFC_3986_PATTERN = re.compile(r"^[A-Za-z][A-Za-z0-9+\-+.]*://") + +BaseBufferT = TypeVar("BaseBufferT", bound=BaseBuffer) + + +@dataclasses.dataclass +class IOArgs: + """ + Return value of io/common.py:_get_filepath_or_buffer. + """ + + filepath_or_buffer: str | BaseBuffer + encoding: str + mode: str + compression: CompressionDict + should_close: bool = False + + +@dataclasses.dataclass +class IOHandles(Generic[AnyStr]): + """ + Return value of io/common.py:get_handle + + Can be used as a context manager. + + This is used to easily close created buffers and to handle corner cases when + TextIOWrapper is inserted. + + handle: The file handle to be used. + created_handles: All file handles that are created by get_handle + is_wrapped: Whether a TextIOWrapper needs to be detached. + """ + + # handle might not implement the IO-interface + handle: IO[AnyStr] + compression: CompressionDict + created_handles: list[IO[bytes] | IO[str]] = dataclasses.field(default_factory=list) + is_wrapped: bool = False + is_mmap: bool = False + + def close(self) -> None: + """ + Close all created buffers. + + Note: If a TextIOWrapper was inserted, it is flushed and detached to + avoid closing the potentially user-created buffer. + """ + if self.is_wrapped: + assert isinstance(self.handle, TextIOWrapper) + self.handle.flush() + self.handle.detach() + self.created_handles.remove(self.handle) + try: + for handle in self.created_handles: + handle.close() + except (OSError, ValueError): + pass + self.created_handles = [] + self.is_wrapped = False + + def __enter__(self) -> IOHandles[AnyStr]: + return self + + def __exit__(self, *args: Any) -> None: + self.close() + + +def is_url(url: object) -> bool: + """ + Check to see if a URL has a valid protocol. + + Parameters + ---------- + url : str or unicode + + Returns + ------- + isurl : bool + If `url` has a valid protocol return True otherwise False. + """ + if not isinstance(url, str): + return False + return parse_url(url).scheme in _VALID_URLS + + +@overload +def _expand_user(filepath_or_buffer: str) -> str: + ... + + +@overload +def _expand_user(filepath_or_buffer: BaseBufferT) -> BaseBufferT: + ... + + +def _expand_user(filepath_or_buffer: str | BaseBufferT) -> str | BaseBufferT: + """ + Return the argument with an initial component of ~ or ~user + replaced by that user's home directory. + + Parameters + ---------- + filepath_or_buffer : object to be converted if possible + + Returns + ------- + expanded_filepath_or_buffer : an expanded filepath or the + input if not expandable + """ + if isinstance(filepath_or_buffer, str): + return os.path.expanduser(filepath_or_buffer) + return filepath_or_buffer + + +def validate_header_arg(header: object) -> None: + if isinstance(header, bool): + raise TypeError( + "Passing a bool to header is invalid. Use header=None for no header or " + "header=int or list-like of ints to specify " + "the row(s) making up the column names" + ) + + +@overload +def stringify_path(filepath_or_buffer: FilePath, convert_file_like: bool = ...) -> str: + ... + + +@overload +def stringify_path( + filepath_or_buffer: BaseBufferT, convert_file_like: bool = ... +) -> BaseBufferT: + ... + + +def stringify_path( + filepath_or_buffer: FilePath | BaseBufferT, + convert_file_like: bool = False, +) -> str | BaseBufferT: + """ + Attempt to convert a path-like object to a string. + + Parameters + ---------- + filepath_or_buffer : object to be converted + + Returns + ------- + str_filepath_or_buffer : maybe a string version of the object + + Notes + ----- + Objects supporting the fspath protocol (python 3.6+) are coerced + according to its __fspath__ method. + + Any other object is passed through unchanged, which includes bytes, + strings, buffers, or anything else that's not even path-like. + """ + if not convert_file_like and is_file_like(filepath_or_buffer): + # GH 38125: some fsspec objects implement os.PathLike but have already opened a + # file. This prevents opening the file a second time. infer_compression calls + # this function with convert_file_like=True to infer the compression. + return cast(BaseBufferT, filepath_or_buffer) + + if isinstance(filepath_or_buffer, os.PathLike): + filepath_or_buffer = filepath_or_buffer.__fspath__() + return _expand_user(filepath_or_buffer) + + +def urlopen(*args, **kwargs): + """ + Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of + the stdlib. + """ + import urllib.request + + return urllib.request.urlopen(*args, **kwargs) + + +def is_fsspec_url(url: FilePath | BaseBuffer) -> bool: + """ + Returns true if the given URL looks like + something fsspec can handle + """ + return ( + isinstance(url, str) + and bool(_RFC_3986_PATTERN.match(url)) + and not url.startswith(("http://", "https://")) + ) + + +@doc(compression_options=_shared_docs["compression_options"] % "filepath_or_buffer") +def _get_filepath_or_buffer( + filepath_or_buffer: FilePath | BaseBuffer, + encoding: str = "utf-8", + compression: CompressionOptions = None, + mode: str = "r", + storage_options: StorageOptions = None, +) -> IOArgs: + """ + If the filepath_or_buffer is a url, translate and return the buffer. + Otherwise passthrough. + + Parameters + ---------- + filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), + or buffer + {compression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + encoding : the encoding to use to decode bytes, default is 'utf-8' + mode : str, optional + + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc., if using a URL that will + be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". An error + will be raised if providing this argument with a local path or + a file-like buffer. See the fsspec and backend storage implementation + docs for the set of allowed keys and values + + .. versionadded:: 1.2.0 + + ..versionchange:: 1.2.0 + + Returns the dataclass IOArgs. + """ + filepath_or_buffer = stringify_path(filepath_or_buffer) + + # handle compression dict + compression_method, compression = get_compression_method(compression) + compression_method = infer_compression(filepath_or_buffer, compression_method) + + # GH21227 internal compression is not used for non-binary handles. + if compression_method and hasattr(filepath_or_buffer, "write") and "b" not in mode: + warnings.warn( + "compression has no effect when passing a non-binary object as input.", + RuntimeWarning, + stacklevel=find_stack_level(), + ) + compression_method = None + + compression = dict(compression, method=compression_method) + + # bz2 and xz do not write the byte order mark for utf-16 and utf-32 + # print a warning when writing such files + if ( + "w" in mode + and compression_method in ["bz2", "xz"] + and encoding in ["utf-16", "utf-32"] + ): + warnings.warn( + f"{compression} will not write the byte order mark for {encoding}", + UnicodeWarning, + ) + + # Use binary mode when converting path-like objects to file-like objects (fsspec) + # except when text mode is explicitly requested. The original mode is returned if + # fsspec is not used. + fsspec_mode = mode + if "t" not in fsspec_mode and "b" not in fsspec_mode: + fsspec_mode += "b" + + if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer): + # TODO: fsspec can also handle HTTP via requests, but leaving this + # unchanged. using fsspec appears to break the ability to infer if the + # server responded with gzipped data + storage_options = storage_options or {} + + # waiting until now for importing to match intended lazy logic of + # urlopen function defined elsewhere in this module + import urllib.request + + # assuming storage_options is to be interpreted as headers + req_info = urllib.request.Request(filepath_or_buffer, headers=storage_options) + with urlopen(req_info) as req: + content_encoding = req.headers.get("Content-Encoding", None) + if content_encoding == "gzip": + # Override compression based on Content-Encoding header + compression = {"method": "gzip"} + reader = BytesIO(req.read()) + return IOArgs( + filepath_or_buffer=reader, + encoding=encoding, + compression=compression, + should_close=True, + mode=fsspec_mode, + ) + + if is_fsspec_url(filepath_or_buffer): + assert isinstance( + filepath_or_buffer, str + ) # just to appease mypy for this branch + # two special-case s3-like protocols; these have special meaning in Hadoop, + # but are equivalent to just "s3" from fsspec's point of view + # cc #11071 + if filepath_or_buffer.startswith("s3a://"): + filepath_or_buffer = filepath_or_buffer.replace("s3a://", "s3://") + if filepath_or_buffer.startswith("s3n://"): + filepath_or_buffer = filepath_or_buffer.replace("s3n://", "s3://") + fsspec = import_optional_dependency("fsspec") + + # If botocore is installed we fallback to reading with anon=True + # to allow reads from public buckets + err_types_to_retry_with_anon: list[Any] = [] + try: + import_optional_dependency("botocore") + from botocore.exceptions import ( + ClientError, + NoCredentialsError, + ) + + err_types_to_retry_with_anon = [ + ClientError, + NoCredentialsError, + PermissionError, + ] + except ImportError: + pass + + try: + file_obj = fsspec.open( + filepath_or_buffer, mode=fsspec_mode, **(storage_options or {}) + ).open() + # GH 34626 Reads from Public Buckets without Credentials needs anon=True + except tuple(err_types_to_retry_with_anon): + if storage_options is None: + storage_options = {"anon": True} + else: + # don't mutate user input. + storage_options = dict(storage_options) + storage_options["anon"] = True + file_obj = fsspec.open( + filepath_or_buffer, mode=fsspec_mode, **(storage_options or {}) + ).open() + + return IOArgs( + filepath_or_buffer=file_obj, + encoding=encoding, + compression=compression, + should_close=True, + mode=fsspec_mode, + ) + elif storage_options: + raise ValueError( + "storage_options passed with file object or non-fsspec file path" + ) + + if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)): + return IOArgs( + filepath_or_buffer=_expand_user(filepath_or_buffer), + encoding=encoding, + compression=compression, + should_close=False, + mode=mode, + ) + + # is_file_like requires (read | write) & __iter__ but __iter__ is only + # needed for read_csv(engine=python) + if not ( + hasattr(filepath_or_buffer, "read") or hasattr(filepath_or_buffer, "write") + ): + msg = f"Invalid file path or buffer object type: {type(filepath_or_buffer)}" + raise ValueError(msg) + + return IOArgs( + filepath_or_buffer=filepath_or_buffer, + encoding=encoding, + compression=compression, + should_close=False, + mode=mode, + ) + + +def file_path_to_url(path: str) -> str: + """ + converts an absolute native path to a FILE URL. + + Parameters + ---------- + path : a path in native format + + Returns + ------- + a valid FILE URL + """ + # lazify expensive import (~30ms) + from urllib.request import pathname2url + + return urljoin("file:", pathname2url(path)) + + +_compression_to_extension = { + "gzip": ".gz", + "bz2": ".bz2", + "zip": ".zip", + "xz": ".xz", + "zstd": ".zst", +} + + +def get_compression_method( + compression: CompressionOptions, +) -> tuple[str | None, CompressionDict]: + """ + Simplifies a compression argument to a compression method string and + a mapping containing additional arguments. + + Parameters + ---------- + compression : str or mapping + If string, specifies the compression method. If mapping, value at key + 'method' specifies compression method. + + Returns + ------- + tuple of ({compression method}, Optional[str] + {compression arguments}, Dict[str, Any]) + + Raises + ------ + ValueError on mapping missing 'method' key + """ + compression_method: str | None + if isinstance(compression, Mapping): + compression_args = dict(compression) + try: + compression_method = compression_args.pop("method") + except KeyError as err: + raise ValueError("If mapping, compression must have key 'method'") from err + else: + compression_args = {} + compression_method = compression + return compression_method, compression_args + + +@doc(compression_options=_shared_docs["compression_options"] % "filepath_or_buffer") +def infer_compression( + filepath_or_buffer: FilePath | BaseBuffer, compression: str | None +) -> str | None: + """ + Get the compression method for filepath_or_buffer. If compression='infer', + the inferred compression method is returned. Otherwise, the input + compression method is returned unchanged, unless it's invalid, in which + case an error is raised. + + Parameters + ---------- + filepath_or_buffer : str or file handle + File path or object. + {compression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + Returns + ------- + string or None + + Raises + ------ + ValueError on invalid compression specified. + """ + if compression is None: + return None + + # Infer compression + if compression == "infer": + # Convert all path types (e.g. pathlib.Path) to strings + filepath_or_buffer = stringify_path(filepath_or_buffer, convert_file_like=True) + if not isinstance(filepath_or_buffer, str): + # Cannot infer compression of a buffer, assume no compression + return None + + # Infer compression from the filename/URL extension + for compression, extension in _compression_to_extension.items(): + if filepath_or_buffer.lower().endswith(extension): + return compression + return None + + # Compression has been specified. Check that it's valid + if compression in _compression_to_extension: + return compression + + # https://github.com/python/mypy/issues/5492 + # Unsupported operand types for + ("List[Optional[str]]" and "List[str]") + valid = ["infer", None] + sorted( + _compression_to_extension + ) # type: ignore[operator] + msg = ( + f"Unrecognized compression type: {compression}\n" + f"Valid compression types are {valid}" + ) + raise ValueError(msg) + + +def check_parent_directory(path: Path | str) -> None: + """ + Check if parent directory of a file exists, raise OSError if it does not + + Parameters + ---------- + path: Path or str + Path to check parent directory of + + """ + parent = Path(path).parent + if not parent.is_dir(): + raise OSError(fr"Cannot save file into a non-existent directory: '{parent}'") + + +@overload +def get_handle( + path_or_buf: FilePath | BaseBuffer, + mode: str, + *, + encoding: str | None = ..., + compression: CompressionOptions = ..., + memory_map: bool = ..., + is_text: Literal[False], + errors: str | None = ..., + storage_options: StorageOptions = ..., +) -> IOHandles[bytes]: + ... + + +@overload +def get_handle( + path_or_buf: FilePath | BaseBuffer, + mode: str, + *, + encoding: str | None = ..., + compression: CompressionOptions = ..., + memory_map: bool = ..., + is_text: Literal[True] = ..., + errors: str | None = ..., + storage_options: StorageOptions = ..., +) -> IOHandles[str]: + ... + + +@doc(compression_options=_shared_docs["compression_options"] % "path_or_buf") +def get_handle( + path_or_buf: FilePath | BaseBuffer, + mode: str, + *, + encoding: str | None = None, + compression: CompressionOptions = None, + memory_map: bool = False, + is_text: bool = True, + errors: str | None = None, + storage_options: StorageOptions = None, +) -> IOHandles[str] | IOHandles[bytes]: + """ + Get file handle for given path/buffer and mode. + + Parameters + ---------- + path_or_buf : str or file handle + File path or object. + mode : str + Mode to open path_or_buf with. + encoding : str or None + Encoding to use. + {compression_options} + + .. versionchanged:: 1.0.0 + May now be a dict with key 'method' as compression mode + and other keys as compression options if compression + mode is 'zip'. + + .. versionchanged:: 1.1.0 + Passing compression options as keys in dict is now + supported for compression modes 'gzip', 'bz2', 'zstd' and 'zip'. + + .. versionchanged:: 1.4.0 Zstandard support. + + memory_map : bool, default False + See parsers._parser_params for more information. + is_text : bool, default True + Whether the type of the content passed to the file/buffer is string or + bytes. This is not the same as `"b" not in mode`. If a string content is + passed to a binary file/buffer, a wrapper is inserted. + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. + storage_options: StorageOptions = None + Passed to _get_filepath_or_buffer + + .. versionchanged:: 1.2.0 + + Returns the dataclass IOHandles + """ + # Windows does not default to utf-8. Set to utf-8 for a consistent behavior + encoding = encoding or "utf-8" + + # read_csv does not know whether the buffer is opened in binary/text mode + if _is_binary_mode(path_or_buf, mode) and "b" not in mode: + mode += "b" + + # validate encoding and errors + codecs.lookup(encoding) + if isinstance(errors, str): + codecs.lookup_error(errors) + + # open URLs + ioargs = _get_filepath_or_buffer( + path_or_buf, + encoding=encoding, + compression=compression, + mode=mode, + storage_options=storage_options, + ) + + handle = ioargs.filepath_or_buffer + handles: list[BaseBuffer] + + # memory mapping needs to be the first step + handle, memory_map, handles = _maybe_memory_map( + handle, + memory_map, + ioargs.encoding, + ioargs.mode, + errors, + ioargs.compression["method"] not in _compression_to_extension, + ) + + is_path = isinstance(handle, str) + compression_args = dict(ioargs.compression) + compression = compression_args.pop("method") + + # Only for write methods + if "r" not in mode and is_path: + check_parent_directory(str(handle)) + + if compression: + if compression != "zstd": + # compression libraries do not like an explicit text-mode + ioargs.mode = ioargs.mode.replace("t", "") + elif compression == "zstd" and "b" not in ioargs.mode: + # python-zstandard defaults to text mode, but we always expect + # compression libraries to use binary mode. + ioargs.mode += "b" + + # GZ Compression + if compression == "gzip": + if is_path: + assert isinstance(handle, str) + # error: Incompatible types in assignment (expression has type + # "GzipFile", variable has type "Union[str, BaseBuffer]") + handle = gzip.GzipFile( # type: ignore[assignment] + filename=handle, + mode=ioargs.mode, + **compression_args, + ) + else: + handle = gzip.GzipFile( + # No overload variant of "GzipFile" matches argument types + # "Union[str, BaseBuffer]", "str", "Dict[str, Any]" + fileobj=handle, # type: ignore[call-overload] + mode=ioargs.mode, + **compression_args, + ) + + # BZ Compression + elif compression == "bz2": + # No overload variant of "BZ2File" matches argument types + # "Union[str, BaseBuffer]", "str", "Dict[str, Any]" + handle = bz2.BZ2File( # type: ignore[call-overload] + handle, + mode=ioargs.mode, + **compression_args, + ) + + # ZIP Compression + elif compression == "zip": + # error: Argument 1 to "_BytesZipFile" has incompatible type "Union[str, + # BaseBuffer]"; expected "Union[Union[str, PathLike[str]], + # ReadBuffer[bytes], WriteBuffer[bytes]]" + handle = _BytesZipFile( + handle, ioargs.mode, **compression_args # type: ignore[arg-type] + ) + if handle.mode == "r": + handles.append(handle) + zip_names = handle.namelist() + if len(zip_names) == 1: + handle = handle.open(zip_names.pop()) + elif len(zip_names) == 0: + raise ValueError(f"Zero files found in ZIP file {path_or_buf}") + else: + raise ValueError( + "Multiple files found in ZIP file. " + f"Only one file per ZIP: {zip_names}" + ) + + # XZ Compression + elif compression == "xz": + handle = get_lzma_file()(handle, ioargs.mode) + + # Zstd Compression + elif compression == "zstd": + zstd = import_optional_dependency("zstandard") + if "r" in ioargs.mode: + open_args = {"dctx": zstd.ZstdDecompressor(**compression_args)} + else: + open_args = {"cctx": zstd.ZstdCompressor(**compression_args)} + handle = zstd.open( + handle, + mode=ioargs.mode, + **open_args, + ) + + # Unrecognized Compression + else: + msg = f"Unrecognized compression type: {compression}" + raise ValueError(msg) + + assert not isinstance(handle, str) + handles.append(handle) + + elif isinstance(handle, str): + # Check whether the filename is to be opened in binary mode. + # Binary mode does not support 'encoding' and 'newline'. + if ioargs.encoding and "b" not in ioargs.mode: + # Encoding + handle = open( + handle, + ioargs.mode, + encoding=ioargs.encoding, + errors=errors, + newline="", + ) + else: + # Binary mode + handle = open(handle, ioargs.mode) + handles.append(handle) + + # Convert BytesIO or file objects passed with an encoding + is_wrapped = False + if not is_text and ioargs.mode == "rb" and isinstance(handle, TextIOBase): + # not added to handles as it does not open/buffer resources + handle = _BytesIOWrapper( + handle, + encoding=ioargs.encoding, + ) + elif is_text and (compression or _is_binary_mode(handle, ioargs.mode)): + handle = TextIOWrapper( + # error: Argument 1 to "TextIOWrapper" has incompatible type + # "Union[IO[bytes], IO[Any], RawIOBase, BufferedIOBase, TextIOBase, mmap]"; + # expected "IO[bytes]" + _IOWrapper(handle), # type: ignore[arg-type] + encoding=ioargs.encoding, + errors=errors, + newline="", + ) + handles.append(handle) + # only marked as wrapped when the caller provided a handle + is_wrapped = not ( + isinstance(ioargs.filepath_or_buffer, str) or ioargs.should_close + ) + + if "r" in ioargs.mode and not hasattr(handle, "read"): + raise TypeError( + "Expected file path name or file-like object, " + f"got {type(ioargs.filepath_or_buffer)} type" + ) + + handles.reverse() # close the most recently added buffer first + if ioargs.should_close: + assert not isinstance(ioargs.filepath_or_buffer, str) + handles.append(ioargs.filepath_or_buffer) + + return IOHandles( + # error: Argument "handle" to "IOHandles" has incompatible type + # "Union[TextIOWrapper, GzipFile, BaseBuffer, typing.IO[bytes], + # typing.IO[Any]]"; expected "pandas._typing.IO[Any]" + handle=handle, # type: ignore[arg-type] + # error: Argument "created_handles" to "IOHandles" has incompatible type + # "List[BaseBuffer]"; expected "List[Union[IO[bytes], IO[str]]]" + created_handles=handles, # type: ignore[arg-type] + is_wrapped=is_wrapped, + is_mmap=memory_map, + compression=ioargs.compression, + ) + + +# error: Definition of "__exit__" in base class "ZipFile" is incompatible with +# definition in base class "BytesIO" [misc] +# error: Definition of "__enter__" in base class "ZipFile" is incompatible with +# definition in base class "BytesIO" [misc] +# error: Definition of "__enter__" in base class "ZipFile" is incompatible with +# definition in base class "BinaryIO" [misc] +# error: Definition of "__enter__" in base class "ZipFile" is incompatible with +# definition in base class "IO" [misc] +# error: Definition of "read" in base class "ZipFile" is incompatible with +# definition in base class "BytesIO" [misc] +# error: Definition of "read" in base class "ZipFile" is incompatible with +# definition in base class "IO" [misc] +class _BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore[misc] + """ + Wrapper for standard library class ZipFile and allow the returned file-like + handle to accept byte strings via `write` method. + + BytesIO provides attributes of file-like object and ZipFile.writestr writes + bytes strings into a member of the archive. + """ + + # GH 17778 + def __init__( + self, + file: FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], + mode: str, + archive_name: str | None = None, + **kwargs, + ): + mode = mode.replace("b", "") + self.archive_name = archive_name + self.multiple_write_buffer: StringIO | BytesIO | None = None + + kwargs_zip: dict[str, Any] = {"compression": zipfile.ZIP_DEFLATED} + kwargs_zip.update(kwargs) + + # error: Argument 1 to "__init__" of "ZipFile" has incompatible type + # "Union[_PathLike[str], Union[str, Union[IO[Any], RawIOBase, BufferedIOBase, + # TextIOBase, TextIOWrapper, mmap]]]"; expected "Union[Union[str, + # _PathLike[str]], IO[bytes]]" + super().__init__(file, mode, **kwargs_zip) # type: ignore[arg-type] + + def infer_filename(self): + """ + If an explicit archive_name is not given, we still want the file inside the zip + file not to be named something.zip, because that causes confusion (GH39465). + """ + if isinstance(self.filename, (os.PathLike, str)): + filename = Path(self.filename) + if filename.suffix == ".zip": + return filename.with_suffix("").name + return filename.name + return None + + def write(self, data): + # buffer multiple write calls, write on flush + if self.multiple_write_buffer is None: + self.multiple_write_buffer = ( + BytesIO() if isinstance(data, bytes) else StringIO() + ) + self.multiple_write_buffer.write(data) + + def flush(self) -> None: + # write to actual handle and close write buffer + if self.multiple_write_buffer is None or self.multiple_write_buffer.closed: + return + + # ZipFile needs a non-empty string + archive_name = self.archive_name or self.infer_filename() or "zip" + with self.multiple_write_buffer: + super().writestr(archive_name, self.multiple_write_buffer.getvalue()) + + def close(self): + self.flush() + super().close() + + @property + def closed(self): + return self.fp is None + + +class _MMapWrapper(abc.Iterator): + """ + Wrapper for the Python's mmap class so that it can be properly read in + by Python's csv.reader class. + + Parameters + ---------- + f : file object + File object to be mapped onto memory. Must support the 'fileno' + method or have an equivalent attribute + + """ + + def __init__( + self, + f: IO, + encoding: str = "utf-8", + errors: str = "strict", + decode: bool = True, + ): + self.encoding = encoding + self.errors = errors + self.decoder = codecs.getincrementaldecoder(encoding)(errors=errors) + self.decode = decode + + self.attributes = {} + for attribute in ("seekable", "readable"): + if not hasattr(f, attribute): + continue + self.attributes[attribute] = getattr(f, attribute)() + self.mmap = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + + def __getattr__(self, name: str): + if name in self.attributes: + return lambda: self.attributes[name] + return getattr(self.mmap, name) + + def __iter__(self) -> _MMapWrapper: + return self + + def read(self, size: int = -1) -> str | bytes: + # CSV c-engine uses read instead of iterating + content: bytes = self.mmap.read(size) + if self.decode and self.encoding != "utf-8": + # memory mapping is applied before compression. Encoding should + # be applied to the de-compressed data. + final = size == -1 or len(content) < size + return self.decoder.decode(content, final=final) + return content + + def __next__(self) -> str: + newbytes = self.mmap.readline() + + # readline returns bytes, not str, but Python's CSV reader + # expects str, so convert the output to str before continuing + newline = self.decoder.decode(newbytes) + + # mmap doesn't raise if reading past the allocated + # data but instead returns an empty string, so raise + # if that is returned + if newline == "": + raise StopIteration + + # IncrementalDecoder seems to push newline to the next line + return newline.lstrip("\n") + + +class _IOWrapper: + # TextIOWrapper is overly strict: it request that the buffer has seekable, readable, + # and writable. If we have a read-only buffer, we shouldn't need writable and vice + # versa. Some buffers, are seek/read/writ-able but they do not have the "-able" + # methods, e.g., tempfile.SpooledTemporaryFile. + # If a buffer does not have the above "-able" methods, we simple assume they are + # seek/read/writ-able. + def __init__(self, buffer: BaseBuffer): + self.buffer = buffer + + def __getattr__(self, name: str): + return getattr(self.buffer, name) + + def readable(self) -> bool: + if hasattr(self.buffer, "readable"): + # error: "BaseBuffer" has no attribute "readable" + return self.buffer.readable() # type: ignore[attr-defined] + return True + + def seekable(self) -> bool: + if hasattr(self.buffer, "seekable"): + return self.buffer.seekable() + return True + + def writable(self) -> bool: + if hasattr(self.buffer, "writable"): + # error: "BaseBuffer" has no attribute "writable" + return self.buffer.writable() # type: ignore[attr-defined] + return True + + +class _BytesIOWrapper: + # Wrapper that wraps a StringIO buffer and reads bytes from it + # Created for compat with pyarrow read_csv + def __init__(self, buffer: StringIO | TextIOBase, encoding: str = "utf-8"): + self.buffer = buffer + self.encoding = encoding + # Because a character can be represented by more than 1 byte, + # it is possible that reading will produce more bytes than n + # We store the extra bytes in this overflow variable, and append the + # overflow to the front of the bytestring the next time reading is performed + self.overflow = b"" + + def __getattr__(self, attr: str): + return getattr(self.buffer, attr) + + def read(self, n: int | None = -1) -> bytes: + assert self.buffer is not None + bytestring = self.buffer.read(n).encode(self.encoding) + # When n=-1/n greater than remaining bytes: Read entire file/rest of file + combined_bytestring = self.overflow + bytestring + if n is None or n < 0 or n >= len(combined_bytestring): + self.overflow = b"" + return combined_bytestring + else: + to_return = combined_bytestring[:n] + self.overflow = combined_bytestring[n:] + return to_return + + +def _maybe_memory_map( + handle: str | BaseBuffer, + memory_map: bool, + encoding: str, + mode: str, + errors: str | None, + decode: bool, +) -> tuple[str | BaseBuffer, bool, list[BaseBuffer]]: + """Try to memory map file/buffer.""" + handles: list[BaseBuffer] = [] + memory_map &= hasattr(handle, "fileno") or isinstance(handle, str) + if not memory_map: + return handle, memory_map, handles + + # need to open the file first + if isinstance(handle, str): + if encoding and "b" not in mode: + # Encoding + handle = open(handle, mode, encoding=encoding, errors=errors, newline="") + else: + # Binary mode + handle = open(handle, mode) + handles.append(handle) + + # error: Argument 1 to "_MMapWrapper" has incompatible type "Union[IO[Any], + # RawIOBase, BufferedIOBase, TextIOBase, mmap]"; expected "IO[Any]" + try: + wrapped = cast( + BaseBuffer, + _MMapWrapper(handle, encoding, errors, decode), # type: ignore[arg-type] + ) + finally: + for handle in reversed(handles): + # error: "BaseBuffer" has no attribute "close" + handle.close() # type: ignore[attr-defined] + handles.append(wrapped) + + return wrapped, memory_map, handles + + +def file_exists(filepath_or_buffer: FilePath | BaseBuffer) -> bool: + """Test whether file exists.""" + exists = False + filepath_or_buffer = stringify_path(filepath_or_buffer) + if not isinstance(filepath_or_buffer, str): + return exists + try: + exists = os.path.exists(filepath_or_buffer) + # gh-5874: if the filepath is too long will raise here + except (TypeError, ValueError): + pass + return exists + + +def _is_binary_mode(handle: FilePath | BaseBuffer, mode: str) -> bool: + """Whether the handle is opened in binary mode""" + # specified by user + if "t" in mode or "b" in mode: + return "b" in mode + + # exceptions + text_classes = ( + # classes that expect string but have 'b' in mode + codecs.StreamWriter, + codecs.StreamReader, + codecs.StreamReaderWriter, + ) + if issubclass(type(handle), text_classes): + return False + + return isinstance(handle, _get_binary_io_classes()) or "b" in getattr( + handle, "mode", mode + ) + + +@functools.lru_cache +def _get_binary_io_classes() -> tuple[type, ...]: + """IO classes that that expect bytes""" + binary_classes: tuple[type, ...] = (BufferedIOBase, RawIOBase) + + # python-zstandard doesn't use any of the builtin base classes; instead we + # have to use the `zstd.ZstdDecompressionReader` class for isinstance checks. + # Unfortunately `zstd.ZstdDecompressionReader` isn't exposed by python-zstandard + # so we have to get it from a `zstd.ZstdDecompressor` instance. + # See also https://github.com/indygreg/python-zstandard/pull/165. + zstd = import_optional_dependency("zstandard", errors="ignore") + if zstd is not None: + with zstd.ZstdDecompressor().stream_reader(b"") as reader: + binary_classes += (type(reader),) + + return binary_classes diff --git a/.local/lib/python3.8/site-packages/pandas/io/date_converters.py b/.local/lib/python3.8/site-packages/pandas/io/date_converters.py new file mode 100644 index 0000000..077524f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/date_converters.py @@ -0,0 +1,126 @@ +"""This module is designed for community supported date conversion functions""" +import warnings + +import numpy as np + +from pandas._libs.tslibs import parsing +from pandas.util._exceptions import find_stack_level + + +def parse_date_time(date_col, time_col): + """ + Parse columns with dates and times into a single datetime column. + + .. deprecated:: 1.2 + """ + warnings.warn( + """ + Use pd.to_datetime(date_col + " " + time_col) instead to get a Pandas Series. + Use pd.to_datetime(date_col + " " + time_col).to_pydatetime() instead to get a Numpy array. +""", # noqa: E501 + FutureWarning, + stacklevel=find_stack_level(), + ) + date_col = _maybe_cast(date_col) + time_col = _maybe_cast(time_col) + return parsing.try_parse_date_and_time(date_col, time_col) + + +def parse_date_fields(year_col, month_col, day_col): + """ + Parse columns with years, months and days into a single date column. + + .. deprecated:: 1.2 + """ + warnings.warn( + """ + Use pd.to_datetime({"year": year_col, "month": month_col, "day": day_col}) instead to get a Pandas Series. + Use ser = pd.to_datetime({"year": year_col, "month": month_col, "day": day_col}) and + np.array([s.to_pydatetime() for s in ser]) instead to get a Numpy array. +""", # noqa: E501 + FutureWarning, + stacklevel=find_stack_level(), + ) + + year_col = _maybe_cast(year_col) + month_col = _maybe_cast(month_col) + day_col = _maybe_cast(day_col) + return parsing.try_parse_year_month_day(year_col, month_col, day_col) + + +def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col, second_col): + """ + Parse columns with datetime information into a single datetime column. + + .. deprecated:: 1.2 + """ + + warnings.warn( + """ + Use pd.to_datetime({"year": year_col, "month": month_col, "day": day_col, + "hour": hour_col, "minute": minute_col, second": second_col}) instead to get a Pandas Series. + Use ser = pd.to_datetime({"year": year_col, "month": month_col, "day": day_col, + "hour": hour_col, "minute": minute_col, second": second_col}) and + np.array([s.to_pydatetime() for s in ser]) instead to get a Numpy array. +""", # noqa: E501 + FutureWarning, + stacklevel=find_stack_level(), + ) + + year_col = _maybe_cast(year_col) + month_col = _maybe_cast(month_col) + day_col = _maybe_cast(day_col) + hour_col = _maybe_cast(hour_col) + minute_col = _maybe_cast(minute_col) + second_col = _maybe_cast(second_col) + return parsing.try_parse_datetime_components( + year_col, month_col, day_col, hour_col, minute_col, second_col + ) + + +def generic_parser(parse_func, *cols): + """ + Use dateparser to parse columns with data information into a single datetime column. + + .. deprecated:: 1.2 + """ + + warnings.warn( + """ + Use pd.to_datetime instead. +""", + FutureWarning, + stacklevel=find_stack_level(), + ) + + N = _check_columns(cols) + results = np.empty(N, dtype=object) + + for i in range(N): + args = [c[i] for c in cols] + results[i] = parse_func(*args) + + return results + + +def _maybe_cast(arr: np.ndarray) -> np.ndarray: + if not arr.dtype.type == np.object_: + arr = np.array(arr, dtype=object) + return arr + + +def _check_columns(cols) -> int: + if not len(cols): + raise AssertionError("There must be at least 1 column") + + head, tail = cols[0], cols[1:] + + N = len(head) + + for i, n in enumerate(map(len, tail)): + if n != N: + raise AssertionError( + f"All columns must have the same length: {N}; column {i} has length {n}" + ) + + return N diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/__init__.py b/.local/lib/python3.8/site-packages/pandas/io/excel/__init__.py new file mode 100644 index 0000000..854e2a1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/excel/__init__.py @@ -0,0 +1,24 @@ +from pandas.io.excel._base import ( + ExcelFile, + ExcelWriter, + read_excel, +) +from pandas.io.excel._odswriter import ODSWriter as _ODSWriter +from pandas.io.excel._openpyxl import OpenpyxlWriter as _OpenpyxlWriter +from pandas.io.excel._util import register_writer +from pandas.io.excel._xlsxwriter import XlsxWriter as _XlsxWriter +from pandas.io.excel._xlwt import XlwtWriter as _XlwtWriter + +__all__ = ["read_excel", "ExcelWriter", "ExcelFile"] + + +register_writer(_OpenpyxlWriter) + + +register_writer(_XlwtWriter) + + +register_writer(_XlsxWriter) + + +register_writer(_ODSWriter) diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..679bd06 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_base.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_base.cpython-38.pyc new file mode 100644 index 0000000..865d5e8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_odfreader.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_odfreader.cpython-38.pyc new file mode 100644 index 0000000..c3e771f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_odfreader.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_odswriter.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_odswriter.cpython-38.pyc new file mode 100644 index 0000000..da1a901 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_odswriter.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_openpyxl.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_openpyxl.cpython-38.pyc new file mode 100644 index 0000000..f098e7f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_openpyxl.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_pyxlsb.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_pyxlsb.cpython-38.pyc new file mode 100644 index 0000000..5b47d58 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_pyxlsb.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_util.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_util.cpython-38.pyc new file mode 100644 index 0000000..1fdc828 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_util.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_xlrd.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_xlrd.cpython-38.pyc new file mode 100644 index 0000000..4ec940b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_xlrd.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_xlsxwriter.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_xlsxwriter.cpython-38.pyc new file mode 100644 index 0000000..4dccdbf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_xlsxwriter.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_xlwt.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_xlwt.cpython-38.pyc new file mode 100644 index 0000000..7299dfd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/excel/__pycache__/_xlwt.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/_base.py b/.local/lib/python3.8/site-packages/pandas/io/excel/_base.py new file mode 100644 index 0000000..b490244 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/excel/_base.py @@ -0,0 +1,1505 @@ +from __future__ import annotations + +import abc +import datetime +from io import BytesIO +import os +from textwrap import fill +from typing import ( + IO, + Any, + Callable, + Hashable, + Iterable, + List, + Literal, + Mapping, + Sequence, + Union, + cast, + overload, +) +import warnings +import zipfile + +from pandas._config import config + +from pandas._libs.parsers import STR_NA_VALUES +from pandas._typing import ( + DtypeArg, + FilePath, + IntStrT, + ReadBuffer, + StorageOptions, + WriteExcelBuffer, +) +from pandas.compat._optional import ( + get_version, + import_optional_dependency, +) +from pandas.errors import EmptyDataError +from pandas.util._decorators import ( + Appender, + deprecate_nonkeyword_arguments, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_bool, + is_float, + is_integer, + is_list_like, +) + +from pandas.core.frame import DataFrame +from pandas.core.shared_docs import _shared_docs +from pandas.util.version import Version + +from pandas.io.common import ( + IOHandles, + get_handle, + stringify_path, + validate_header_arg, +) +from pandas.io.excel._util import ( + fill_mi_header, + get_default_engine, + get_writer, + maybe_convert_usecols, + pop_header_name, +) +from pandas.io.parsers import TextParser + +_read_excel_doc = ( + """ +Read an Excel file into a pandas DataFrame. + +Supports `xls`, `xlsx`, `xlsm`, `xlsb`, `odf`, `ods` and `odt` file extensions +read from a local filesystem or URL. Supports an option to read +a single sheet or a list of sheets. + +Parameters +---------- +io : str, bytes, ExcelFile, xlrd.Book, path object, or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: ``file://localhost/path/to/table.xlsx``. + + If you want to pass in a path object, pandas accepts any ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handle (e.g. via builtin ``open`` function) + or ``StringIO``. +sheet_name : str, int, list, or None, default 0 + Strings are used for sheet names. Integers are used in zero-indexed + sheet positions (chart sheets do not count as a sheet position). + Lists of strings/integers are used to request multiple sheets. + Specify None to get all worksheets. + + Available cases: + + * Defaults to ``0``: 1st sheet as a `DataFrame` + * ``1``: 2nd sheet as a `DataFrame` + * ``"Sheet1"``: Load sheet with name "Sheet1" + * ``[0, 1, "Sheet5"]``: Load first, second and sheet named "Sheet5" + as a dict of `DataFrame` + * None: All worksheets. + +header : int, list of int, default 0 + Row (0-indexed) to use for the column labels of the parsed + DataFrame. If a list of integers is passed those row positions will + be combined into a ``MultiIndex``. Use None if there is no header. +names : array-like, default None + List of column names to use. If file contains no header row, + then you should explicitly pass header=None. +index_col : int, list of int, default None + Column (0-indexed) to use as the row labels of the DataFrame. + Pass None if there is no such column. If a list is passed, + those columns will be combined into a ``MultiIndex``. If a + subset of data is selected with ``usecols``, index_col + is based on the subset. +usecols : int, str, list-like, or callable default None + * If None, then parse all columns. + * If str, then indicates comma separated list of Excel column letters + and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of + both sides. + * If list of int, then indicates list of column numbers to be parsed. + * If list of string, then indicates list of column names to be parsed. + * If callable, then evaluate each column name against it and parse the + column if the callable returns ``True``. + + Returns a subset of the columns according to behavior above. +squeeze : bool, default False + If the parsed data only contains one column then return a Series. + + .. deprecated:: 1.4.0 + Append ``.squeeze("columns")`` to the call to ``read_excel`` to squeeze + the data. +dtype : Type name or dict of column -> type, default None + Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32} + Use `object` to preserve data as stored in Excel and not interpret dtype. + If converters are specified, they will be applied INSTEAD + of dtype conversion. +engine : str, default None + If io is not a buffer or path, this must be set to identify io. + Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb". + Engine compatibility : + + - "xlrd" supports old-style Excel files (.xls). + - "openpyxl" supports newer Excel file formats. + - "odf" supports OpenDocument file formats (.odf, .ods, .odt). + - "pyxlsb" supports Binary Excel files. + + .. versionchanged:: 1.2.0 + The engine `xlrd `_ + now only supports old-style ``.xls`` files. + When ``engine=None``, the following logic will be + used to determine the engine: + + - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt), + then `odf `_ will be used. + - Otherwise if ``path_or_buffer`` is an xls format, + ``xlrd`` will be used. + - Otherwise if ``path_or_buffer`` is in xlsb format, + ``pyxlsb`` will be used. + + .. versionadded:: 1.3.0 + - Otherwise ``openpyxl`` will be used. + + .. versionchanged:: 1.3.0 + +converters : dict, default None + Dict of functions for converting values in certain columns. Keys can + either be integers or column labels, values are functions that take one + input argument, the Excel cell content, and return the transformed + content. +true_values : list, default None + Values to consider as True. +false_values : list, default None + Values to consider as False. +skiprows : list-like, int, or callable, optional + Line numbers to skip (0-indexed) or number of lines to skip (int) at the + start of the file. If callable, the callable function will be evaluated + against the row indices, returning True if the row should be skipped and + False otherwise. An example of a valid callable argument would be ``lambda + x: x in [0, 2]``. +nrows : int, default None + Number of rows to parse. +na_values : scalar, str, list-like, or dict, default None + Additional strings to recognize as NA/NaN. If dict passed, specific + per-column NA values. By default the following values are interpreted + as NaN: '""" + + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") + + """'. +keep_default_na : bool, default True + Whether or not to include the default NaN values when parsing the data. + Depending on whether `na_values` is passed in, the behavior is as follows: + + * If `keep_default_na` is True, and `na_values` are specified, `na_values` + is appended to the default NaN values used for parsing. + * If `keep_default_na` is True, and `na_values` are not specified, only + the default NaN values are used for parsing. + * If `keep_default_na` is False, and `na_values` are specified, only + the NaN values specified `na_values` are used for parsing. + * If `keep_default_na` is False, and `na_values` are not specified, no + strings will be parsed as NaN. + + Note that if `na_filter` is passed in as False, the `keep_default_na` and + `na_values` parameters will be ignored. +na_filter : bool, default True + Detect missing value markers (empty strings and the value of na_values). In + data without any NAs, passing na_filter=False can improve the performance + of reading a large file. +verbose : bool, default False + Indicate number of NA values placed in non-numeric columns. +parse_dates : bool, list-like, or dict, default False + The behavior is as follows: + + * bool. If True -> try parsing the index. + * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 + each as a separate date column. + * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as + a single date column. + * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call + result 'foo' + + If a column or index contains an unparsable date, the entire column or + index will be returned unaltered as an object data type. If you don`t want to + parse some cells as date just change their type in Excel to "Text". + For non-standard datetime parsing, use ``pd.to_datetime`` after ``pd.read_excel``. + + Note: A fast-path exists for iso8601-formatted dates. +date_parser : function, optional + Function to use for converting a sequence of string columns to an array of + datetime instances. The default uses ``dateutil.parser.parser`` to do the + conversion. Pandas will try to call `date_parser` in three different ways, + advancing to the next if an exception occurs: 1) Pass one or more arrays + (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the + string values from the columns defined by `parse_dates` into a single array + and pass that; and 3) call `date_parser` once for each row using one or + more strings (corresponding to the columns defined by `parse_dates`) as + arguments. +thousands : str, default None + Thousands separator for parsing string columns to numeric. Note that + this parameter is only necessary for columns stored as TEXT in Excel, + any numeric columns will automatically be parsed, regardless of display + format. +decimal : str, default '.' + Character to recognize as decimal point for parsing string columns to numeric. + Note that this parameter is only necessary for columns stored as TEXT in Excel, + any numeric columns will automatically be parsed, regardless of display + format.(e.g. use ',' for European data). + + .. versionadded:: 1.4.0 + +comment : str, default None + Comments out remainder of line. Pass a character or characters to this + argument to indicate comments in the input file. Any data between the + comment string and the end of the current line is ignored. +skipfooter : int, default 0 + Rows at the end to skip (0-indexed). +convert_float : bool, default True + Convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric + data will be read in as floats: Excel stores all numbers as floats + internally. + + .. deprecated:: 1.3.0 + convert_float will be removed in a future version + +mangle_dupe_cols : bool, default True + Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than + 'X'...'X'. Passing in False will cause data to be overwritten if there + are duplicate names in the columns. +storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc., if using a URL that will + be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". An error + will be raised if providing this argument with a local path or + a file-like buffer. See the fsspec and backend storage implementation + docs for the set of allowed keys and values. + + .. versionadded:: 1.2.0 + +Returns +------- +DataFrame or dict of DataFrames + DataFrame from the passed in Excel file. See notes in sheet_name + argument for more information on when a dict of DataFrames is returned. + +See Also +-------- +DataFrame.to_excel : Write DataFrame to an Excel file. +DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file. +read_csv : Read a comma-separated values (csv) file into DataFrame. +read_fwf : Read a table of fixed-width formatted lines into DataFrame. + +Examples +-------- +The file can be read using the file name as string or an open file object: + +>>> pd.read_excel('tmp.xlsx', index_col=0) # doctest: +SKIP + Name Value +0 string1 1 +1 string2 2 +2 #Comment 3 + +>>> pd.read_excel(open('tmp.xlsx', 'rb'), +... sheet_name='Sheet3') # doctest: +SKIP + Unnamed: 0 Name Value +0 0 string1 1 +1 1 string2 2 +2 2 #Comment 3 + +Index and header can be specified via the `index_col` and `header` arguments + +>>> pd.read_excel('tmp.xlsx', index_col=None, header=None) # doctest: +SKIP + 0 1 2 +0 NaN Name Value +1 0.0 string1 1 +2 1.0 string2 2 +3 2.0 #Comment 3 + +Column types are inferred but can be explicitly specified + +>>> pd.read_excel('tmp.xlsx', index_col=0, +... dtype={'Name': str, 'Value': float}) # doctest: +SKIP + Name Value +0 string1 1.0 +1 string2 2.0 +2 #Comment 3.0 + +True, False, and NA values, and thousands separators have defaults, +but can be explicitly specified, too. Supply the values you would like +as strings or lists of strings! + +>>> pd.read_excel('tmp.xlsx', index_col=0, +... na_values=['string1', 'string2']) # doctest: +SKIP + Name Value +0 NaN 1 +1 NaN 2 +2 #Comment 3 + +Comment lines in the excel input file can be skipped using the `comment` kwarg + +>>> pd.read_excel('tmp.xlsx', index_col=0, comment='#') # doctest: +SKIP + Name Value +0 string1 1.0 +1 string2 2.0 +2 None NaN +""" +) + + +@overload +def read_excel( + io, + # sheet name is str or int -> DataFrame + sheet_name: str | int, + header: int | Sequence[int] | None = ..., + names=..., + index_col: int | Sequence[int] | None = ..., + usecols=..., + squeeze: bool | None = ..., + dtype: DtypeArg | None = ..., + engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ..., + converters=..., + true_values: Iterable[Hashable] | None = ..., + false_values: Iterable[Hashable] | None = ..., + skiprows: Sequence[int] | int | Callable[[int], object] | None = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + parse_dates=..., + date_parser=..., + thousands: str | None = ..., + decimal: str = ..., + comment: str | None = ..., + skipfooter: int = ..., + convert_float: bool | None = ..., + mangle_dupe_cols: bool = ..., + storage_options: StorageOptions = ..., +) -> DataFrame: + ... + + +@overload +def read_excel( + io, + # sheet name is list or None -> dict[IntStrT, DataFrame] + sheet_name: list[IntStrT] | None, + header: int | Sequence[int] | None = ..., + names=..., + index_col: int | Sequence[int] | None = ..., + usecols=..., + squeeze: bool | None = ..., + dtype: DtypeArg | None = ..., + engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ..., + converters=..., + true_values: Iterable[Hashable] | None = ..., + false_values: Iterable[Hashable] | None = ..., + skiprows: Sequence[int] | int | Callable[[int], object] | None = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + parse_dates=..., + date_parser=..., + thousands: str | None = ..., + decimal: str = ..., + comment: str | None = ..., + skipfooter: int = ..., + convert_float: bool | None = ..., + mangle_dupe_cols: bool = ..., + storage_options: StorageOptions = ..., +) -> dict[IntStrT, DataFrame]: + ... + + +@deprecate_nonkeyword_arguments(allowed_args=["io", "sheet_name"], version="2.0") +@Appender(_read_excel_doc) +def read_excel( + io, + sheet_name: str | int | list[IntStrT] | None = 0, + header: int | Sequence[int] | None = 0, + names=None, + index_col: int | Sequence[int] | None = None, + usecols=None, + squeeze: bool | None = None, + dtype: DtypeArg | None = None, + engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = None, + converters=None, + true_values: Iterable[Hashable] | None = None, + false_values: Iterable[Hashable] | None = None, + skiprows: Sequence[int] | int | Callable[[int], object] | None = None, + nrows: int | None = None, + na_values=None, + keep_default_na: bool = True, + na_filter: bool = True, + verbose: bool = False, + parse_dates=False, + date_parser=None, + thousands: str | None = None, + decimal: str = ".", + comment: str | None = None, + skipfooter: int = 0, + convert_float: bool | None = None, + mangle_dupe_cols: bool = True, + storage_options: StorageOptions = None, +) -> DataFrame | dict[IntStrT, DataFrame]: + + should_close = False + if not isinstance(io, ExcelFile): + should_close = True + io = ExcelFile(io, storage_options=storage_options, engine=engine) + elif engine and engine != io.engine: + raise ValueError( + "Engine should not be specified when passing " + "an ExcelFile - ExcelFile already has the engine set" + ) + + try: + data = io.parse( + sheet_name=sheet_name, + header=header, + names=names, + index_col=index_col, + usecols=usecols, + squeeze=squeeze, + dtype=dtype, + converters=converters, + true_values=true_values, + false_values=false_values, + skiprows=skiprows, + nrows=nrows, + na_values=na_values, + keep_default_na=keep_default_na, + na_filter=na_filter, + verbose=verbose, + parse_dates=parse_dates, + date_parser=date_parser, + thousands=thousands, + decimal=decimal, + comment=comment, + skipfooter=skipfooter, + convert_float=convert_float, + mangle_dupe_cols=mangle_dupe_cols, + ) + finally: + # make sure to close opened file handles + if should_close: + io.close() + return data + + +class BaseExcelReader(metaclass=abc.ABCMeta): + def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None): + # First argument can also be bytes, so create a buffer + if isinstance(filepath_or_buffer, bytes): + filepath_or_buffer = BytesIO(filepath_or_buffer) + + self.handles = IOHandles( + handle=filepath_or_buffer, compression={"method": None} + ) + if not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)): + self.handles = get_handle( + filepath_or_buffer, "rb", storage_options=storage_options, is_text=False + ) + + if isinstance(self.handles.handle, self._workbook_class): + self.book = self.handles.handle + elif hasattr(self.handles.handle, "read"): + # N.B. xlrd.Book has a read attribute too + self.handles.handle.seek(0) + try: + self.book = self.load_workbook(self.handles.handle) + except Exception: + self.close() + raise + else: + raise ValueError( + "Must explicitly set engine if not passing in buffer or path for io." + ) + + @property + @abc.abstractmethod + def _workbook_class(self): + pass + + @abc.abstractmethod + def load_workbook(self, filepath_or_buffer): + pass + + def close(self): + if hasattr(self, "book") and hasattr(self.book, "close"): + # pyxlsb: opens a TemporaryFile + # openpyxl: https://stackoverflow.com/questions/31416842/ + # openpyxl-does-not-close-excel-workbook-in-read-only-mode + self.book.close() + self.handles.close() + + @property + @abc.abstractmethod + def sheet_names(self) -> list[str]: + pass + + @abc.abstractmethod + def get_sheet_by_name(self, name: str): + pass + + @abc.abstractmethod + def get_sheet_by_index(self, index: int): + pass + + @abc.abstractmethod + def get_sheet_data(self, sheet, convert_float: bool): + pass + + def raise_if_bad_sheet_by_index(self, index: int) -> None: + n_sheets = len(self.sheet_names) + if index >= n_sheets: + raise ValueError( + f"Worksheet index {index} is invalid, {n_sheets} worksheets found" + ) + + def raise_if_bad_sheet_by_name(self, name: str) -> None: + if name not in self.sheet_names: + raise ValueError(f"Worksheet named '{name}' not found") + + def parse( + self, + sheet_name: str | int | list[int] | list[str] | None = 0, + header: int | Sequence[int] | None = 0, + names=None, + index_col: int | Sequence[int] | None = None, + usecols=None, + squeeze: bool | None = None, + dtype: DtypeArg | None = None, + true_values: Iterable[Hashable] | None = None, + false_values: Iterable[Hashable] | None = None, + skiprows: Sequence[int] | int | Callable[[int], object] | None = None, + nrows: int | None = None, + na_values=None, + verbose: bool = False, + parse_dates=False, + date_parser=None, + thousands: str | None = None, + decimal: str = ".", + comment: str | None = None, + skipfooter: int = 0, + convert_float: bool | None = None, + mangle_dupe_cols: bool = True, + **kwds, + ): + + if convert_float is None: + convert_float = True + else: + warnings.warn( + "convert_float is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + validate_header_arg(header) + + ret_dict = False + + # Keep sheetname to maintain backwards compatibility. + sheets: list[int] | list[str] + if isinstance(sheet_name, list): + sheets = sheet_name + ret_dict = True + elif sheet_name is None: + sheets = self.sheet_names + ret_dict = True + elif isinstance(sheet_name, str): + sheets = [sheet_name] + else: + sheets = [sheet_name] + + # handle same-type duplicates. + sheets = cast(Union[List[int], List[str]], list(dict.fromkeys(sheets).keys())) + + output = {} + + for asheetname in sheets: + if verbose: + print(f"Reading sheet {asheetname}") + + if isinstance(asheetname, str): + sheet = self.get_sheet_by_name(asheetname) + else: # assume an integer if not a string + sheet = self.get_sheet_by_index(asheetname) + + data = self.get_sheet_data(sheet, convert_float) + if hasattr(sheet, "close"): + # pyxlsb opens two TemporaryFiles + sheet.close() + usecols = maybe_convert_usecols(usecols) + + if not data: + output[asheetname] = DataFrame() + continue + + is_list_header = False + is_len_one_list_header = False + if is_list_like(header): + assert isinstance(header, Sequence) + is_list_header = True + if len(header) == 1: + is_len_one_list_header = True + + if is_len_one_list_header: + header = cast(Sequence[int], header)[0] + + # forward fill and pull out names for MultiIndex column + header_names = None + if header is not None and is_list_like(header): + assert isinstance(header, Sequence) + + header_names = [] + control_row = [True] * len(data[0]) + + for row in header: + if is_integer(skiprows): + assert isinstance(skiprows, int) + row += skiprows + + data[row], control_row = fill_mi_header(data[row], control_row) + + if index_col is not None: + header_name, _ = pop_header_name(data[row], index_col) + header_names.append(header_name) + + # If there is a MultiIndex header and an index then there is also + # a row containing just the index name(s) + has_index_names = ( + is_list_header and not is_len_one_list_header and index_col is not None + ) + + if is_list_like(index_col): + # Forward fill values for MultiIndex index. + if header is None: + offset = 0 + elif isinstance(header, int): + offset = 1 + header + else: + offset = 1 + max(header) + + # GH34673: if MultiIndex names present and not defined in the header, + # offset needs to be incremented so that forward filling starts + # from the first MI value instead of the name + if has_index_names: + offset += 1 + + # Check if we have an empty dataset + # before trying to collect data. + if offset < len(data): + assert isinstance(index_col, Sequence) + + for col in index_col: + last = data[offset][col] + + for row in range(offset + 1, len(data)): + if data[row][col] == "" or data[row][col] is None: + data[row][col] = last + else: + last = data[row][col] + + # GH 12292 : error when read one empty column from excel file + try: + parser = TextParser( + data, + names=names, + header=header, + index_col=index_col, + has_index_names=has_index_names, + squeeze=squeeze, + dtype=dtype, + true_values=true_values, + false_values=false_values, + skiprows=skiprows, + nrows=nrows, + na_values=na_values, + skip_blank_lines=False, # GH 39808 + parse_dates=parse_dates, + date_parser=date_parser, + thousands=thousands, + decimal=decimal, + comment=comment, + skipfooter=skipfooter, + usecols=usecols, + mangle_dupe_cols=mangle_dupe_cols, + **kwds, + ) + + output[asheetname] = parser.read(nrows=nrows) + + if not squeeze or isinstance(output[asheetname], DataFrame): + if header_names: + output[asheetname].columns = output[ + asheetname + ].columns.set_names(header_names) + + except EmptyDataError: + # No Data, return an empty DataFrame + output[asheetname] = DataFrame() + + if ret_dict: + return output + else: + return output[asheetname] + + +class ExcelWriter(metaclass=abc.ABCMeta): + """ + Class for writing DataFrame objects into excel sheets. + + Default is to use : + * xlwt for xls + * xlsxwriter for xlsx if xlsxwriter is installed otherwise openpyxl + * odf for ods. + See DataFrame.to_excel for typical usage. + + The writer should be used as a context manager. Otherwise, call `close()` to save + and close any opened file handles. + + Parameters + ---------- + path : str or typing.BinaryIO + Path to xls or xlsx or ods file. + engine : str (optional) + Engine to use for writing. If None, defaults to + ``io.excel..writer``. NOTE: can only be passed as a keyword + argument. + + .. deprecated:: 1.2.0 + + As the `xlwt `__ package is no longer + maintained, the ``xlwt`` engine will be removed in a future + version of pandas. + + date_format : str, default None + Format string for dates written into Excel files (e.g. 'YYYY-MM-DD'). + datetime_format : str, default None + Format string for datetime objects written into Excel files. + (e.g. 'YYYY-MM-DD HH:MM:SS'). + mode : {'w', 'a'}, default 'w' + File mode to use (write or append). Append does not work with fsspec URLs. + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc., if using a URL that will + be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". + + .. versionadded:: 1.2.0 + + if_sheet_exists : {'error', 'new', 'replace', 'overlay'}, default 'error' + How to behave when trying to write to a sheet that already + exists (append mode only). + + * error: raise a ValueError. + * new: Create a new sheet, with a name determined by the engine. + * replace: Delete the contents of the sheet before writing to it. + * overlay: Write contents to the existing sheet without removing the old + contents. + + .. versionadded:: 1.3.0 + + .. versionchanged:: 1.4.0 + + Added ``overlay`` option + + engine_kwargs : dict, optional + Keyword arguments to be passed into the engine. These will be passed to + the following functions of the respective engines: + + * xlsxwriter: ``xlsxwriter.Workbook(file, **engine_kwargs)`` + * openpyxl (write mode): ``openpyxl.Workbook(**engine_kwargs)`` + * openpyxl (append mode): ``openpyxl.load_workbook(file, **engine_kwargs)`` + * odswriter: ``odf.opendocument.OpenDocumentSpreadsheet(**engine_kwargs)`` + + .. versionadded:: 1.3.0 + **kwargs : dict, optional + Keyword arguments to be passed into the engine. + + .. deprecated:: 1.3.0 + + Use engine_kwargs instead. + + Attributes + ---------- + None + + Methods + ------- + None + + Notes + ----- + None of the methods and properties are considered public. + + For compatibility with CSV writers, ExcelWriter serializes lists + and dicts to strings before writing. + + Examples + -------- + Default usage: + + >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP + >>> with pd.ExcelWriter("path_to_file.xlsx") as writer: + ... df.to_excel(writer) # doctest: +SKIP + + To write to separate sheets in a single file: + + >>> df1 = pd.DataFrame([["AAA", "BBB"]], columns=["Spam", "Egg"]) # doctest: +SKIP + >>> df2 = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP + >>> with pd.ExcelWriter("path_to_file.xlsx") as writer: + ... df1.to_excel(writer, sheet_name="Sheet1") # doctest: +SKIP + ... df2.to_excel(writer, sheet_name="Sheet2") # doctest: +SKIP + + You can set the date format or datetime format: + + >>> from datetime import date, datetime # doctest: +SKIP + >>> df = pd.DataFrame( + ... [ + ... [date(2014, 1, 31), date(1999, 9, 24)], + ... [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)], + ... ], + ... index=["Date", "Datetime"], + ... columns=["X", "Y"], + ... ) # doctest: +SKIP + >>> with pd.ExcelWriter( + ... "path_to_file.xlsx", + ... date_format="YYYY-MM-DD", + ... datetime_format="YYYY-MM-DD HH:MM:SS" + ... ) as writer: + ... df.to_excel(writer) # doctest: +SKIP + + You can also append to an existing Excel file: + + >>> with pd.ExcelWriter("path_to_file.xlsx", mode="a", engine="openpyxl") as writer: + ... df.to_excel(writer, sheet_name="Sheet3") # doctest: +SKIP + + Here, the `if_sheet_exists` parameter can be set to replace a sheet if it + already exists: + + >>> with ExcelWriter( + ... "path_to_file.xlsx", + ... mode="a", + ... engine="openpyxl", + ... if_sheet_exists="replace", + ... ) as writer: + ... df.to_excel(writer, sheet_name="Sheet1") # doctest: +SKIP + + You can also write multiple DataFrames to a single sheet. Note that the + ``if_sheet_exists`` parameter needs to be set to ``overlay``: + + >>> with ExcelWriter("path_to_file.xlsx", + ... mode="a", + ... engine="openpyxl", + ... if_sheet_exists="overlay", + ... ) as writer: + ... df1.to_excel(writer, sheet_name="Sheet1") + ... df2.to_excel(writer, sheet_name="Sheet1", startcol=3) # doctest: +SKIP + + You can store Excel file in RAM: + + >>> import io + >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) + >>> buffer = io.BytesIO() + >>> with pd.ExcelWriter(buffer) as writer: + ... df.to_excel(writer) + + You can pack Excel file into zip archive: + + >>> import zipfile # doctest: +SKIP + >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP + >>> with zipfile.ZipFile("path_to_file.zip", "w") as zf: + ... with zf.open("filename.xlsx", "w") as buffer: + ... with pd.ExcelWriter(buffer) as writer: + ... df.to_excel(writer) # doctest: +SKIP + + You can specify additional arguments to the underlying engine: + + >>> with pd.ExcelWriter( + ... "path_to_file.xlsx", + ... engine="xlsxwriter", + ... engine_kwargs={"options": {"nan_inf_to_errors": True}} + ... ) as writer: + ... df.to_excel(writer) # doctest: +SKIP + + In append mode, ``engine_kwargs`` are passed through to + openpyxl's ``load_workbook``: + + >>> with pd.ExcelWriter( + ... "path_to_file.xlsx", + ... engine="openpyxl", + ... mode="a", + ... engine_kwargs={"keep_vba": True} + ... ) as writer: + ... df.to_excel(writer, sheet_name="Sheet2") # doctest: +SKIP + """ + + # Defining an ExcelWriter implementation (see abstract methods for more...) + + # - Mandatory + # - ``write_cells(self, cells, sheet_name=None, startrow=0, startcol=0)`` + # --> called to write additional DataFrames to disk + # - ``supported_extensions`` (tuple of supported extensions), used to + # check that engine supports the given extension. + # - ``engine`` - string that gives the engine name. Necessary to + # instantiate class directly and bypass ``ExcelWriterMeta`` engine + # lookup. + # - ``save(self)`` --> called to save file to disk + # - Mostly mandatory (i.e. should at least exist) + # - book, cur_sheet, path + + # - Optional: + # - ``__init__(self, path, engine=None, **kwargs)`` --> always called + # with path as first argument. + + # You also need to register the class with ``register_writer()``. + # Technically, ExcelWriter implementations don't need to subclass + # ExcelWriter. + def __new__( + cls, + path: FilePath | WriteExcelBuffer | ExcelWriter, + engine: str | None = None, + date_format: str | None = None, + datetime_format: str | None = None, + mode: str = "w", + storage_options: StorageOptions = None, + if_sheet_exists: Literal["error", "new", "replace", "overlay"] | None = None, + engine_kwargs: dict | None = None, + **kwargs, + ): + if kwargs: + if engine_kwargs is not None: + raise ValueError("Cannot use both engine_kwargs and **kwargs") + warnings.warn( + "Use of **kwargs is deprecated, use engine_kwargs instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + # only switch class if generic(ExcelWriter) + + if cls is ExcelWriter: + if engine is None or (isinstance(engine, str) and engine == "auto"): + if isinstance(path, str): + ext = os.path.splitext(path)[-1][1:] + else: + ext = "xlsx" + + try: + engine = config.get_option(f"io.excel.{ext}.writer", silent=True) + if engine == "auto": + engine = get_default_engine(ext, mode="writer") + except KeyError as err: + raise ValueError(f"No engine for filetype: '{ext}'") from err + + if engine == "xlwt": + xls_config_engine = config.get_option( + "io.excel.xls.writer", silent=True + ) + # Don't warn a 2nd time if user has changed the default engine for xls + if xls_config_engine != "xlwt": + warnings.warn( + "As the xlwt package is no longer maintained, the xlwt " + "engine will be removed in a future version of pandas. " + "This is the only engine in pandas that supports writing " + "in the xls format. Install openpyxl and write to an xlsx " + "file instead. You can set the option io.excel.xls.writer " + "to 'xlwt' to silence this warning. While this option is " + "deprecated and will also raise a warning, it can " + "be globally set and the warning suppressed.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + # for mypy + assert engine is not None + cls = get_writer(engine) + + return object.__new__(cls) + + # declare external properties you can count on + path = None + + @property + @abc.abstractmethod + def supported_extensions(self) -> tuple[str, ...] | list[str]: + """Extensions that writer engine supports.""" + pass + + @property + @abc.abstractmethod + def engine(self) -> str: + """Name of engine.""" + pass + + @abc.abstractmethod + def write_cells( + self, + cells, + sheet_name: str | None = None, + startrow: int = 0, + startcol: int = 0, + freeze_panes: tuple[int, int] | None = None, + ) -> None: + """ + Write given formatted cells into Excel an excel sheet + + Parameters + ---------- + cells : generator + cell of formatted data to save to Excel sheet + sheet_name : str, default None + Name of Excel sheet, if None, then use self.cur_sheet + startrow : upper left cell row to dump data frame + startcol : upper left cell column to dump data frame + freeze_panes: int tuple of length 2 + contains the bottom-most row and right-most column to freeze + """ + pass + + @abc.abstractmethod + def save(self) -> None: + """ + Save workbook to disk. + """ + pass + + def __init__( + self, + path: FilePath | WriteExcelBuffer | ExcelWriter, + engine: str | None = None, + date_format: str | None = None, + datetime_format: str | None = None, + mode: str = "w", + storage_options: StorageOptions = None, + if_sheet_exists: str | None = None, + engine_kwargs: dict | None = None, + **kwargs, + ): + # validate that this engine can handle the extension + if isinstance(path, str): + ext = os.path.splitext(path)[-1] + self.check_extension(ext) + + # use mode to open the file + if "b" not in mode: + mode += "b" + # use "a" for the user to append data to excel but internally use "r+" to let + # the excel backend first read the existing file and then write any data to it + mode = mode.replace("a", "r+") + + # cast ExcelWriter to avoid adding 'if self.handles is not None' + self.handles = IOHandles( + cast(IO[bytes], path), compression={"compression": None} + ) + if not isinstance(path, ExcelWriter): + self.handles = get_handle( + path, mode, storage_options=storage_options, is_text=False + ) + self.sheets: dict[str, Any] = {} + self.cur_sheet = None + + if date_format is None: + self.date_format = "YYYY-MM-DD" + else: + self.date_format = date_format + if datetime_format is None: + self.datetime_format = "YYYY-MM-DD HH:MM:SS" + else: + self.datetime_format = datetime_format + + self.mode = mode + + if if_sheet_exists not in (None, "error", "new", "replace", "overlay"): + raise ValueError( + f"'{if_sheet_exists}' is not valid for if_sheet_exists. " + "Valid options are 'error', 'new', 'replace' and 'overlay'." + ) + if if_sheet_exists and "r+" not in mode: + raise ValueError("if_sheet_exists is only valid in append mode (mode='a')") + if if_sheet_exists is None: + if_sheet_exists = "error" + self.if_sheet_exists = if_sheet_exists + + def __fspath__(self): + return getattr(self.handles.handle, "name", "") + + def _get_sheet_name(self, sheet_name: str | None) -> str: + if sheet_name is None: + sheet_name = self.cur_sheet + if sheet_name is None: # pragma: no cover + raise ValueError("Must pass explicit sheet_name or set cur_sheet property") + return sheet_name + + def _value_with_fmt(self, val) -> tuple[object, str | None]: + """ + Convert numpy types to Python types for the Excel writers. + + Parameters + ---------- + val : object + Value to be written into cells + + Returns + ------- + Tuple with the first element being the converted value and the second + being an optional format + """ + fmt = None + + if is_integer(val): + val = int(val) + elif is_float(val): + val = float(val) + elif is_bool(val): + val = bool(val) + elif isinstance(val, datetime.datetime): + fmt = self.datetime_format + elif isinstance(val, datetime.date): + fmt = self.date_format + elif isinstance(val, datetime.timedelta): + val = val.total_seconds() / 86400 + fmt = "0" + else: + val = str(val) + + return val, fmt + + @classmethod + def check_extension(cls, ext: str) -> Literal[True]: + """ + checks that path's extension against the Writer's supported + extensions. If it isn't supported, raises UnsupportedFiletypeError. + """ + if ext.startswith("."): + ext = ext[1:] + # error: "Callable[[ExcelWriter], Any]" has no attribute "__iter__" (not + # iterable) + if not any( + ext in extension + for extension in cls.supported_extensions # type: ignore[attr-defined] + ): + raise ValueError(f"Invalid extension for engine '{cls.engine}': '{ext}'") + else: + return True + + # Allow use as a contextmanager + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def close(self) -> None: + """synonym for save, to make it more file-like""" + self.save() + self.handles.close() + + +XLS_SIGNATURES = ( + b"\x09\x00\x04\x00\x07\x00\x10\x00", # BIFF2 + b"\x09\x02\x06\x00\x00\x00\x10\x00", # BIFF3 + b"\x09\x04\x06\x00\x00\x00\x10\x00", # BIFF4 + b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", # Compound File Binary +) +ZIP_SIGNATURE = b"PK\x03\x04" +PEEK_SIZE = max(map(len, XLS_SIGNATURES + (ZIP_SIGNATURE,))) + + +@doc(storage_options=_shared_docs["storage_options"]) +def inspect_excel_format( + content_or_path: FilePath | ReadBuffer[bytes], + storage_options: StorageOptions = None, +) -> str | None: + """ + Inspect the path or content of an excel file and get its format. + + Adopted from xlrd: https://github.com/python-excel/xlrd. + + Parameters + ---------- + content_or_path : str or file-like object + Path to file or content of file to inspect. May be a URL. + {storage_options} + + Returns + ------- + str or None + Format of file if it can be determined. + + Raises + ------ + ValueError + If resulting stream is empty. + BadZipFile + If resulting stream does not have an XLS signature and is not a valid zipfile. + """ + if isinstance(content_or_path, bytes): + content_or_path = BytesIO(content_or_path) + + with get_handle( + content_or_path, "rb", storage_options=storage_options, is_text=False + ) as handle: + stream = handle.handle + stream.seek(0) + buf = stream.read(PEEK_SIZE) + if buf is None: + raise ValueError("stream is empty") + else: + assert isinstance(buf, bytes) + peek = buf + stream.seek(0) + + if any(peek.startswith(sig) for sig in XLS_SIGNATURES): + return "xls" + elif not peek.startswith(ZIP_SIGNATURE): + return None + + zf = zipfile.ZipFile(stream) + + # Workaround for some third party files that use forward slashes and + # lower case names. + component_names = [name.replace("\\", "/").lower() for name in zf.namelist()] + + if "xl/workbook.xml" in component_names: + return "xlsx" + if "xl/workbook.bin" in component_names: + return "xlsb" + if "content.xml" in component_names: + return "ods" + return "zip" + + +class ExcelFile: + """ + Class for parsing tabular excel sheets into DataFrame objects. + + See read_excel for more documentation. + + Parameters + ---------- + path_or_buffer : str, bytes, path object (pathlib.Path or py._path.local.LocalPath), + a file-like object, xlrd workbook or openpyxl workbook. + If a string or path object, expected to be a path to a + .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file. + engine : str, default None + If io is not a buffer or path, this must be set to identify io. + Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb`` + Engine compatibility : + + - ``xlrd`` supports old-style Excel files (.xls). + - ``openpyxl`` supports newer Excel file formats. + - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt). + - ``pyxlsb`` supports Binary Excel files. + + .. versionchanged:: 1.2.0 + + The engine `xlrd `_ + now only supports old-style ``.xls`` files. + When ``engine=None``, the following logic will be + used to determine the engine: + + - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt), + then `odf `_ will be used. + - Otherwise if ``path_or_buffer`` is an xls format, + ``xlrd`` will be used. + - Otherwise if ``path_or_buffer`` is in xlsb format, + `pyxlsb `_ will be used. + + .. versionadded:: 1.3.0 + - Otherwise if `openpyxl `_ is installed, + then ``openpyxl`` will be used. + - Otherwise if ``xlrd >= 2.0`` is installed, a ``ValueError`` will be raised. + - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised. + This case will raise a ``ValueError`` in a future version of pandas. + + .. warning:: + + Please do not report issues when using ``xlrd`` to read ``.xlsx`` files. + This is not supported, switch to using ``openpyxl`` instead. + """ + + from pandas.io.excel._odfreader import ODFReader + from pandas.io.excel._openpyxl import OpenpyxlReader + from pandas.io.excel._pyxlsb import PyxlsbReader + from pandas.io.excel._xlrd import XlrdReader + + _engines: Mapping[str, Any] = { + "xlrd": XlrdReader, + "openpyxl": OpenpyxlReader, + "odf": ODFReader, + "pyxlsb": PyxlsbReader, + } + + def __init__( + self, + path_or_buffer, + engine: str | None = None, + storage_options: StorageOptions = None, + ): + if engine is not None and engine not in self._engines: + raise ValueError(f"Unknown engine: {engine}") + + # First argument can also be bytes, so create a buffer + if isinstance(path_or_buffer, bytes): + path_or_buffer = BytesIO(path_or_buffer) + + # Could be a str, ExcelFile, Book, etc. + self.io = path_or_buffer + # Always a string + self._io = stringify_path(path_or_buffer) + + # Determine xlrd version if installed + if import_optional_dependency("xlrd", errors="ignore") is None: + xlrd_version = None + else: + import xlrd + + xlrd_version = Version(get_version(xlrd)) + + ext = None + if engine is None: + # Only determine ext if it is needed + if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book): + ext = "xls" + else: + ext = inspect_excel_format( + content_or_path=path_or_buffer, storage_options=storage_options + ) + if ext is None: + raise ValueError( + "Excel file format cannot be determined, you must specify " + "an engine manually." + ) + + engine = config.get_option(f"io.excel.{ext}.reader", silent=True) + if engine == "auto": + engine = get_default_engine(ext, mode="reader") + + if engine == "xlrd" and xlrd_version is not None: + if ext is None: + # Need ext to determine ext in order to raise/warn + if isinstance(path_or_buffer, xlrd.Book): + ext = "xls" + else: + ext = inspect_excel_format( + path_or_buffer, storage_options=storage_options + ) + + # Pass through if ext is None, otherwise check if ext valid for xlrd + if ext and ext != "xls" and xlrd_version >= Version("2"): + raise ValueError( + f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, " + f"only the xls format is supported. Install openpyxl instead." + ) + elif ext and ext != "xls": + stacklevel = find_stack_level() + warnings.warn( + f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, " + f"only the xls format is supported. Install " + f"openpyxl instead.", + FutureWarning, + stacklevel=stacklevel, + ) + + assert engine is not None + self.engine = engine + self.storage_options = storage_options + + self._reader = self._engines[engine](self._io, storage_options=storage_options) + + def __fspath__(self): + return self._io + + def parse( + self, + sheet_name: str | int | list[int] | list[str] | None = 0, + header: int | Sequence[int] | None = 0, + names=None, + index_col: int | Sequence[int] | None = None, + usecols=None, + squeeze: bool | None = None, + converters=None, + true_values: Iterable[Hashable] | None = None, + false_values: Iterable[Hashable] | None = None, + skiprows: Sequence[int] | int | Callable[[int], object] | None = None, + nrows: int | None = None, + na_values=None, + parse_dates=False, + date_parser=None, + thousands: str | None = None, + comment: str | None = None, + skipfooter: int = 0, + convert_float: bool | None = None, + mangle_dupe_cols: bool = True, + **kwds, + ) -> DataFrame | dict[str, DataFrame] | dict[int, DataFrame]: + """ + Parse specified sheet(s) into a DataFrame. + + Equivalent to read_excel(ExcelFile, ...) See the read_excel + docstring for more info on accepted parameters. + + Returns + ------- + DataFrame or dict of DataFrames + DataFrame from the passed in Excel file. + """ + return self._reader.parse( + sheet_name=sheet_name, + header=header, + names=names, + index_col=index_col, + usecols=usecols, + squeeze=squeeze, + converters=converters, + true_values=true_values, + false_values=false_values, + skiprows=skiprows, + nrows=nrows, + na_values=na_values, + parse_dates=parse_dates, + date_parser=date_parser, + thousands=thousands, + comment=comment, + skipfooter=skipfooter, + convert_float=convert_float, + mangle_dupe_cols=mangle_dupe_cols, + **kwds, + ) + + @property + def book(self): + return self._reader.book + + @property + def sheet_names(self): + return self._reader.sheet_names + + def close(self) -> None: + """close io if necessary""" + self._reader.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def __del__(self): + # Ensure we don't leak file descriptors, but put in try/except in case + # attributes are already deleted + try: + self.close() + except AttributeError: + pass diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/_odfreader.py b/.local/lib/python3.8/site-packages/pandas/io/excel/_odfreader.py new file mode 100644 index 0000000..952ad72 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/excel/_odfreader.py @@ -0,0 +1,233 @@ +from __future__ import annotations + +import numpy as np + +from pandas._typing import ( + FilePath, + ReadBuffer, + Scalar, + StorageOptions, +) +from pandas.compat._optional import import_optional_dependency + +import pandas as pd + +from pandas.io.excel._base import BaseExcelReader + + +class ODFReader(BaseExcelReader): + """ + Read tables out of OpenDocument formatted files. + + Parameters + ---------- + filepath_or_buffer : str, path to be parsed or + an open readable stream. + storage_options : dict, optional + passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``) + """ + + def __init__( + self, + filepath_or_buffer: FilePath | ReadBuffer[bytes], + storage_options: StorageOptions = None, + ): + import_optional_dependency("odf") + super().__init__(filepath_or_buffer, storage_options=storage_options) + + @property + def _workbook_class(self): + from odf.opendocument import OpenDocument + + return OpenDocument + + def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]): + from odf.opendocument import load + + return load(filepath_or_buffer) + + @property + def empty_value(self) -> str: + """Property for compat with other readers.""" + return "" + + @property + def sheet_names(self) -> list[str]: + """Return a list of sheet names present in the document""" + from odf.table import Table + + tables = self.book.getElementsByType(Table) + return [t.getAttribute("name") for t in tables] + + def get_sheet_by_index(self, index: int): + from odf.table import Table + + self.raise_if_bad_sheet_by_index(index) + tables = self.book.getElementsByType(Table) + return tables[index] + + def get_sheet_by_name(self, name: str): + from odf.table import Table + + self.raise_if_bad_sheet_by_name(name) + tables = self.book.getElementsByType(Table) + + for table in tables: + if table.getAttribute("name") == name: + return table + + self.close() + raise ValueError(f"sheet {name} not found") + + def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]: + """ + Parse an ODF Table into a list of lists + """ + from odf.table import ( + CoveredTableCell, + TableCell, + TableRow, + ) + + covered_cell_name = CoveredTableCell().qname + table_cell_name = TableCell().qname + cell_names = {covered_cell_name, table_cell_name} + + sheet_rows = sheet.getElementsByType(TableRow) + empty_rows = 0 + max_row_len = 0 + + table: list[list[Scalar]] = [] + + for sheet_row in sheet_rows: + sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names] + empty_cells = 0 + table_row: list[Scalar] = [] + + for sheet_cell in sheet_cells: + if sheet_cell.qname == table_cell_name: + value = self._get_cell_value(sheet_cell, convert_float) + else: + value = self.empty_value + + column_repeat = self._get_column_repeat(sheet_cell) + + # Queue up empty values, writing only if content succeeds them + if value == self.empty_value: + empty_cells += column_repeat + else: + table_row.extend([self.empty_value] * empty_cells) + empty_cells = 0 + table_row.extend([value] * column_repeat) + + if max_row_len < len(table_row): + max_row_len = len(table_row) + + row_repeat = self._get_row_repeat(sheet_row) + if self._is_empty_row(sheet_row): + empty_rows += row_repeat + else: + # add blank rows to our table + table.extend([[self.empty_value]] * empty_rows) + empty_rows = 0 + for _ in range(row_repeat): + table.append(table_row) + + # Make our table square + for row in table: + if len(row) < max_row_len: + row.extend([self.empty_value] * (max_row_len - len(row))) + + return table + + def _get_row_repeat(self, row) -> int: + """ + Return number of times this row was repeated + Repeating an empty row appeared to be a common way + of representing sparse rows in the table. + """ + from odf.namespaces import TABLENS + + return int(row.attributes.get((TABLENS, "number-rows-repeated"), 1)) + + def _get_column_repeat(self, cell) -> int: + from odf.namespaces import TABLENS + + return int(cell.attributes.get((TABLENS, "number-columns-repeated"), 1)) + + def _is_empty_row(self, row) -> bool: + """ + Helper function to find empty rows + """ + for column in row.childNodes: + if len(column.childNodes) > 0: + return False + + return True + + def _get_cell_value(self, cell, convert_float: bool) -> Scalar: + from odf.namespaces import OFFICENS + + if str(cell) == "#N/A": + return np.nan + + cell_type = cell.attributes.get((OFFICENS, "value-type")) + if cell_type == "boolean": + if str(cell) == "TRUE": + return True + return False + if cell_type is None: + return self.empty_value + elif cell_type == "float": + # GH5394 + cell_value = float(cell.attributes.get((OFFICENS, "value"))) + if convert_float: + val = int(cell_value) + if val == cell_value: + return val + return cell_value + elif cell_type == "percentage": + cell_value = cell.attributes.get((OFFICENS, "value")) + return float(cell_value) + elif cell_type == "string": + return self._get_cell_string_value(cell) + elif cell_type == "currency": + cell_value = cell.attributes.get((OFFICENS, "value")) + return float(cell_value) + elif cell_type == "date": + cell_value = cell.attributes.get((OFFICENS, "date-value")) + return pd.to_datetime(cell_value) + elif cell_type == "time": + stamp = pd.to_datetime(str(cell)) + # error: Item "str" of "Union[float, str, NaTType]" has no attribute "time" + return stamp.time() # type: ignore[union-attr] + else: + self.close() + raise ValueError(f"Unrecognized type {cell_type}") + + def _get_cell_string_value(self, cell) -> str: + """ + Find and decode OpenDocument text:s tags that represent + a run length encoded sequence of space characters. + """ + from odf.element import Element + from odf.namespaces import TEXTNS + from odf.text import S + + text_s = S().qname + + value = [] + + for fragment in cell.childNodes: + if isinstance(fragment, Element): + if fragment.qname == text_s: + spaces = int(fragment.attributes.get((TEXTNS, "c"), 1)) + value.append(" " * spaces) + else: + # recursive impl needed in case of nested fragments + # with multiple spaces + # https://github.com/pandas-dev/pandas/pull/36175#discussion_r484639704 + value.append(self._get_cell_string_value(fragment)) + else: + value.append(str(fragment)) + return "".join(value) diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/_odswriter.py b/.local/lib/python3.8/site-packages/pandas/io/excel/_odswriter.py new file mode 100644 index 0000000..d4fe368 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/excel/_odswriter.py @@ -0,0 +1,303 @@ +from __future__ import annotations + +from collections import defaultdict +import datetime +from typing import ( + Any, + DefaultDict, +) + +import pandas._libs.json as json +from pandas._typing import StorageOptions + +from pandas.io.excel._base import ExcelWriter +from pandas.io.excel._util import ( + combine_kwargs, + validate_freeze_panes, +) +from pandas.io.formats.excel import ExcelCell + + +class ODSWriter(ExcelWriter): + engine = "odf" + supported_extensions = (".ods",) + + def __init__( + self, + path: str, + engine: str | None = None, + date_format=None, + datetime_format=None, + mode: str = "w", + storage_options: StorageOptions = None, + if_sheet_exists: str | None = None, + engine_kwargs: dict[str, Any] | None = None, + **kwargs, + ): + from odf.opendocument import OpenDocumentSpreadsheet + + if mode == "a": + raise ValueError("Append mode is not supported with odf!") + + super().__init__( + path, + mode=mode, + storage_options=storage_options, + if_sheet_exists=if_sheet_exists, + engine_kwargs=engine_kwargs, + ) + + engine_kwargs = combine_kwargs(engine_kwargs, kwargs) + + self.book = OpenDocumentSpreadsheet(**engine_kwargs) + self._style_dict: dict[str, str] = {} + + def save(self) -> None: + """ + Save workbook to disk. + """ + for sheet in self.sheets.values(): + self.book.spreadsheet.addElement(sheet) + self.book.save(self.handles.handle) + + def write_cells( + self, + cells: list[ExcelCell], + sheet_name: str | None = None, + startrow: int = 0, + startcol: int = 0, + freeze_panes: tuple[int, int] | None = None, + ) -> None: + """ + Write the frame cells using odf + """ + from odf.table import ( + Table, + TableCell, + TableRow, + ) + from odf.text import P + + sheet_name = self._get_sheet_name(sheet_name) + assert sheet_name is not None + + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = Table(name=sheet_name) + self.sheets[sheet_name] = wks + + if validate_freeze_panes(freeze_panes): + assert freeze_panes is not None + self._create_freeze_panes(sheet_name, freeze_panes) + + for _ in range(startrow): + wks.addElement(TableRow()) + + rows: DefaultDict = defaultdict(TableRow) + col_count: DefaultDict = defaultdict(int) + + for cell in sorted(cells, key=lambda cell: (cell.row, cell.col)): + # only add empty cells if the row is still empty + if not col_count[cell.row]: + for _ in range(startcol): + rows[cell.row].addElement(TableCell()) + + # fill with empty cells if needed + for _ in range(cell.col - col_count[cell.row]): + rows[cell.row].addElement(TableCell()) + col_count[cell.row] += 1 + + pvalue, tc = self._make_table_cell(cell) + rows[cell.row].addElement(tc) + col_count[cell.row] += 1 + p = P(text=pvalue) + tc.addElement(p) + + # add all rows to the sheet + for row_nr in range(max(rows.keys()) + 1): + wks.addElement(rows[row_nr]) + + def _make_table_cell_attributes(self, cell) -> dict[str, int | str]: + """Convert cell attributes to OpenDocument attributes + + Parameters + ---------- + cell : ExcelCell + Spreadsheet cell data + + Returns + ------- + attributes : Dict[str, Union[int, str]] + Dictionary with attributes and attribute values + """ + attributes: dict[str, int | str] = {} + style_name = self._process_style(cell.style) + if style_name is not None: + attributes["stylename"] = style_name + if cell.mergestart is not None and cell.mergeend is not None: + attributes["numberrowsspanned"] = max(1, cell.mergestart) + attributes["numbercolumnsspanned"] = cell.mergeend + return attributes + + def _make_table_cell(self, cell) -> tuple[object, Any]: + """Convert cell data to an OpenDocument spreadsheet cell + + Parameters + ---------- + cell : ExcelCell + Spreadsheet cell data + + Returns + ------- + pvalue, cell : Tuple[str, TableCell] + Display value, Cell value + """ + from odf.table import TableCell + + attributes = self._make_table_cell_attributes(cell) + val, fmt = self._value_with_fmt(cell.val) + pvalue = value = val + if isinstance(val, bool): + value = str(val).lower() + pvalue = str(val).upper() + if isinstance(val, datetime.datetime): + value = val.isoformat() + pvalue = val.strftime("%c") + return ( + pvalue, + TableCell(valuetype="date", datevalue=value, attributes=attributes), + ) + elif isinstance(val, datetime.date): + value = val.strftime("%Y-%m-%d") + pvalue = val.strftime("%x") + return ( + pvalue, + TableCell(valuetype="date", datevalue=value, attributes=attributes), + ) + else: + class_to_cell_type = { + str: "string", + int: "float", + float: "float", + bool: "boolean", + } + return ( + pvalue, + TableCell( + valuetype=class_to_cell_type[type(val)], + value=value, + attributes=attributes, + ), + ) + + def _process_style(self, style: dict[str, Any]) -> str: + """Convert a style dictionary to a OpenDocument style sheet + + Parameters + ---------- + style : Dict + Style dictionary + + Returns + ------- + style_key : str + Unique style key for later reference in sheet + """ + from odf.style import ( + ParagraphProperties, + Style, + TableCellProperties, + TextProperties, + ) + + if style is None: + return None + style_key = json.dumps(style) + if style_key in self._style_dict: + return self._style_dict[style_key] + name = f"pd{len(self._style_dict)+1}" + self._style_dict[style_key] = name + odf_style = Style(name=name, family="table-cell") + if "font" in style: + font = style["font"] + if font.get("bold", False): + odf_style.addElement(TextProperties(fontweight="bold")) + if "borders" in style: + borders = style["borders"] + for side, thickness in borders.items(): + thickness_translation = {"thin": "0.75pt solid #000000"} + odf_style.addElement( + TableCellProperties( + attributes={f"border{side}": thickness_translation[thickness]} + ) + ) + if "alignment" in style: + alignment = style["alignment"] + horizontal = alignment.get("horizontal") + if horizontal: + odf_style.addElement(ParagraphProperties(textalign=horizontal)) + vertical = alignment.get("vertical") + if vertical: + odf_style.addElement(TableCellProperties(verticalalign=vertical)) + self.book.styles.addElement(odf_style) + return name + + def _create_freeze_panes( + self, sheet_name: str, freeze_panes: tuple[int, int] + ) -> None: + """ + Create freeze panes in the sheet. + + Parameters + ---------- + sheet_name : str + Name of the spreadsheet + freeze_panes : tuple of (int, int) + Freeze pane location x and y + """ + from odf.config import ( + ConfigItem, + ConfigItemMapEntry, + ConfigItemMapIndexed, + ConfigItemMapNamed, + ConfigItemSet, + ) + + config_item_set = ConfigItemSet(name="ooo:view-settings") + self.book.settings.addElement(config_item_set) + + config_item_map_indexed = ConfigItemMapIndexed(name="Views") + config_item_set.addElement(config_item_map_indexed) + + config_item_map_entry = ConfigItemMapEntry() + config_item_map_indexed.addElement(config_item_map_entry) + + config_item_map_named = ConfigItemMapNamed(name="Tables") + config_item_map_entry.addElement(config_item_map_named) + + config_item_map_entry = ConfigItemMapEntry(name=sheet_name) + config_item_map_named.addElement(config_item_map_entry) + + config_item_map_entry.addElement( + ConfigItem(name="HorizontalSplitMode", type="short", text="2") + ) + config_item_map_entry.addElement( + ConfigItem(name="VerticalSplitMode", type="short", text="2") + ) + config_item_map_entry.addElement( + ConfigItem( + name="HorizontalSplitPosition", type="int", text=str(freeze_panes[0]) + ) + ) + config_item_map_entry.addElement( + ConfigItem( + name="VerticalSplitPosition", type="int", text=str(freeze_panes[1]) + ) + ) + config_item_map_entry.addElement( + ConfigItem(name="PositionRight", type="int", text=str(freeze_panes[0])) + ) + config_item_map_entry.addElement( + ConfigItem(name="PositionBottom", type="int", text=str(freeze_panes[1])) + ) diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/_openpyxl.py b/.local/lib/python3.8/site-packages/pandas/io/excel/_openpyxl.py new file mode 100644 index 0000000..27c03d4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/excel/_openpyxl.py @@ -0,0 +1,597 @@ +from __future__ import annotations + +import mmap +from typing import ( + TYPE_CHECKING, + Any, +) + +import numpy as np + +from pandas._typing import ( + FilePath, + ReadBuffer, + Scalar, + StorageOptions, +) +from pandas.compat._optional import import_optional_dependency + +from pandas.io.excel._base import ( + BaseExcelReader, + ExcelWriter, +) +from pandas.io.excel._util import ( + combine_kwargs, + validate_freeze_panes, +) + +if TYPE_CHECKING: + from openpyxl.descriptors.serialisable import Serialisable + + +class OpenpyxlWriter(ExcelWriter): + engine = "openpyxl" + supported_extensions = (".xlsx", ".xlsm") + + def __init__( + self, + path, + engine=None, + date_format=None, + datetime_format=None, + mode: str = "w", + storage_options: StorageOptions = None, + if_sheet_exists: str | None = None, + engine_kwargs: dict[str, Any] | None = None, + **kwargs, + ): + # Use the openpyxl module as the Excel writer. + from openpyxl.workbook import Workbook + + engine_kwargs = combine_kwargs(engine_kwargs, kwargs) + + super().__init__( + path, + mode=mode, + storage_options=storage_options, + if_sheet_exists=if_sheet_exists, + engine_kwargs=engine_kwargs, + ) + + # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from + # the file and later write to it + if "r+" in self.mode: # Load from existing workbook + from openpyxl import load_workbook + + self.book = load_workbook(self.handles.handle, **engine_kwargs) + self.handles.handle.seek(0) + self.sheets = {name: self.book[name] for name in self.book.sheetnames} + + else: + # Create workbook object with default optimized_write=True. + self.book = Workbook(**engine_kwargs) + + if self.book.worksheets: + self.book.remove(self.book.worksheets[0]) + + def save(self): + """ + Save workbook to disk. + """ + self.book.save(self.handles.handle) + if "r+" in self.mode and not isinstance(self.handles.handle, mmap.mmap): + # truncate file to the written content + self.handles.handle.truncate() + + @classmethod + def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]: + """ + Convert a style_dict to a set of kwargs suitable for initializing + or updating-on-copy an openpyxl v2 style object. + + Parameters + ---------- + style_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'font' + 'fill' + 'border' ('borders') + 'alignment' + 'number_format' + 'protection' + + Returns + ------- + style_kwargs : dict + A dict with the same, normalized keys as ``style_dict`` but each + value has been replaced with a native openpyxl style object of the + appropriate class. + """ + _style_key_map = {"borders": "border"} + + style_kwargs: dict[str, Serialisable] = {} + for k, v in style_dict.items(): + if k in _style_key_map: + k = _style_key_map[k] + _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None) + new_v = _conv_to_x(v) + if new_v: + style_kwargs[k] = new_v + + return style_kwargs + + @classmethod + def _convert_to_color(cls, color_spec): + """ + Convert ``color_spec`` to an openpyxl v2 Color object. + + Parameters + ---------- + color_spec : str, dict + A 32-bit ARGB hex string, or a dict with zero or more of the + following keys. + 'rgb' + 'indexed' + 'auto' + 'theme' + 'tint' + 'index' + 'type' + + Returns + ------- + color : openpyxl.styles.Color + """ + from openpyxl.styles import Color + + if isinstance(color_spec, str): + return Color(color_spec) + else: + return Color(**color_spec) + + @classmethod + def _convert_to_font(cls, font_dict): + """ + Convert ``font_dict`` to an openpyxl v2 Font object. + + Parameters + ---------- + font_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'name' + 'size' ('sz') + 'bold' ('b') + 'italic' ('i') + 'underline' ('u') + 'strikethrough' ('strike') + 'color' + 'vertAlign' ('vertalign') + 'charset' + 'scheme' + 'family' + 'outline' + 'shadow' + 'condense' + + Returns + ------- + font : openpyxl.styles.Font + """ + from openpyxl.styles import Font + + _font_key_map = { + "sz": "size", + "b": "bold", + "i": "italic", + "u": "underline", + "strike": "strikethrough", + "vertalign": "vertAlign", + } + + font_kwargs = {} + for k, v in font_dict.items(): + if k in _font_key_map: + k = _font_key_map[k] + if k == "color": + v = cls._convert_to_color(v) + font_kwargs[k] = v + + return Font(**font_kwargs) + + @classmethod + def _convert_to_stop(cls, stop_seq): + """ + Convert ``stop_seq`` to a list of openpyxl v2 Color objects, + suitable for initializing the ``GradientFill`` ``stop`` parameter. + + Parameters + ---------- + stop_seq : iterable + An iterable that yields objects suitable for consumption by + ``_convert_to_color``. + + Returns + ------- + stop : list of openpyxl.styles.Color + """ + return map(cls._convert_to_color, stop_seq) + + @classmethod + def _convert_to_fill(cls, fill_dict): + """ + Convert ``fill_dict`` to an openpyxl v2 Fill object. + + Parameters + ---------- + fill_dict : dict + A dict with one or more of the following keys (or their synonyms), + 'fill_type' ('patternType', 'patterntype') + 'start_color' ('fgColor', 'fgcolor') + 'end_color' ('bgColor', 'bgcolor') + or one or more of the following keys (or their synonyms). + 'type' ('fill_type') + 'degree' + 'left' + 'right' + 'top' + 'bottom' + 'stop' + + Returns + ------- + fill : openpyxl.styles.Fill + """ + from openpyxl.styles import ( + GradientFill, + PatternFill, + ) + + _pattern_fill_key_map = { + "patternType": "fill_type", + "patterntype": "fill_type", + "fgColor": "start_color", + "fgcolor": "start_color", + "bgColor": "end_color", + "bgcolor": "end_color", + } + + _gradient_fill_key_map = {"fill_type": "type"} + + pfill_kwargs = {} + gfill_kwargs = {} + for k, v in fill_dict.items(): + pk = gk = None + if k in _pattern_fill_key_map: + pk = _pattern_fill_key_map[k] + if k in _gradient_fill_key_map: + gk = _gradient_fill_key_map[k] + if pk in ["start_color", "end_color"]: + v = cls._convert_to_color(v) + if gk == "stop": + v = cls._convert_to_stop(v) + if pk: + pfill_kwargs[pk] = v + elif gk: + gfill_kwargs[gk] = v + else: + pfill_kwargs[k] = v + gfill_kwargs[k] = v + + try: + return PatternFill(**pfill_kwargs) + except TypeError: + return GradientFill(**gfill_kwargs) + + @classmethod + def _convert_to_side(cls, side_spec): + """ + Convert ``side_spec`` to an openpyxl v2 Side object. + + Parameters + ---------- + side_spec : str, dict + A string specifying the border style, or a dict with zero or more + of the following keys (or their synonyms). + 'style' ('border_style') + 'color' + + Returns + ------- + side : openpyxl.styles.Side + """ + from openpyxl.styles import Side + + _side_key_map = {"border_style": "style"} + + if isinstance(side_spec, str): + return Side(style=side_spec) + + side_kwargs = {} + for k, v in side_spec.items(): + if k in _side_key_map: + k = _side_key_map[k] + if k == "color": + v = cls._convert_to_color(v) + side_kwargs[k] = v + + return Side(**side_kwargs) + + @classmethod + def _convert_to_border(cls, border_dict): + """ + Convert ``border_dict`` to an openpyxl v2 Border object. + + Parameters + ---------- + border_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'left' + 'right' + 'top' + 'bottom' + 'diagonal' + 'diagonal_direction' + 'vertical' + 'horizontal' + 'diagonalUp' ('diagonalup') + 'diagonalDown' ('diagonaldown') + 'outline' + + Returns + ------- + border : openpyxl.styles.Border + """ + from openpyxl.styles import Border + + _border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"} + + border_kwargs = {} + for k, v in border_dict.items(): + if k in _border_key_map: + k = _border_key_map[k] + if k == "color": + v = cls._convert_to_color(v) + if k in ["left", "right", "top", "bottom", "diagonal"]: + v = cls._convert_to_side(v) + border_kwargs[k] = v + + return Border(**border_kwargs) + + @classmethod + def _convert_to_alignment(cls, alignment_dict): + """ + Convert ``alignment_dict`` to an openpyxl v2 Alignment object. + + Parameters + ---------- + alignment_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'horizontal' + 'vertical' + 'text_rotation' + 'wrap_text' + 'shrink_to_fit' + 'indent' + Returns + ------- + alignment : openpyxl.styles.Alignment + """ + from openpyxl.styles import Alignment + + return Alignment(**alignment_dict) + + @classmethod + def _convert_to_number_format(cls, number_format_dict): + """ + Convert ``number_format_dict`` to an openpyxl v2.1.0 number format + initializer. + + Parameters + ---------- + number_format_dict : dict + A dict with zero or more of the following keys. + 'format_code' : str + + Returns + ------- + number_format : str + """ + return number_format_dict["format_code"] + + @classmethod + def _convert_to_protection(cls, protection_dict): + """ + Convert ``protection_dict`` to an openpyxl v2 Protection object. + + Parameters + ---------- + protection_dict : dict + A dict with zero or more of the following keys. + 'locked' + 'hidden' + + Returns + ------- + """ + from openpyxl.styles import Protection + + return Protection(**protection_dict) + + def write_cells( + self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None + ): + # Write the frame cells using openpyxl. + sheet_name = self._get_sheet_name(sheet_name) + + _style_cache: dict[str, dict[str, Serialisable]] = {} + + if sheet_name in self.sheets and self.if_sheet_exists != "new": + if "r+" in self.mode: + if self.if_sheet_exists == "replace": + old_wks = self.sheets[sheet_name] + target_index = self.book.index(old_wks) + del self.book[sheet_name] + wks = self.book.create_sheet(sheet_name, target_index) + self.sheets[sheet_name] = wks + elif self.if_sheet_exists == "error": + raise ValueError( + f"Sheet '{sheet_name}' already exists and " + f"if_sheet_exists is set to 'error'." + ) + elif self.if_sheet_exists == "overlay": + wks = self.sheets[sheet_name] + else: + raise ValueError( + f"'{self.if_sheet_exists}' is not valid for if_sheet_exists. " + "Valid options are 'error', 'new', 'replace' and 'overlay'." + ) + else: + wks = self.sheets[sheet_name] + else: + wks = self.book.create_sheet() + wks.title = sheet_name + self.sheets[sheet_name] = wks + + if validate_freeze_panes(freeze_panes): + wks.freeze_panes = wks.cell( + row=freeze_panes[0] + 1, column=freeze_panes[1] + 1 + ) + + for cell in cells: + xcell = wks.cell( + row=startrow + cell.row + 1, column=startcol + cell.col + 1 + ) + xcell.value, fmt = self._value_with_fmt(cell.val) + if fmt: + xcell.number_format = fmt + + style_kwargs: dict[str, Serialisable] | None = {} + if cell.style: + key = str(cell.style) + style_kwargs = _style_cache.get(key) + if style_kwargs is None: + style_kwargs = self._convert_to_style_kwargs(cell.style) + _style_cache[key] = style_kwargs + + if style_kwargs: + for k, v in style_kwargs.items(): + setattr(xcell, k, v) + + if cell.mergestart is not None and cell.mergeend is not None: + + wks.merge_cells( + start_row=startrow + cell.row + 1, + start_column=startcol + cell.col + 1, + end_column=startcol + cell.mergeend + 1, + end_row=startrow + cell.mergestart + 1, + ) + + # When cells are merged only the top-left cell is preserved + # The behaviour of the other cells in a merged range is + # undefined + if style_kwargs: + first_row = startrow + cell.row + 1 + last_row = startrow + cell.mergestart + 1 + first_col = startcol + cell.col + 1 + last_col = startcol + cell.mergeend + 1 + + for row in range(first_row, last_row + 1): + for col in range(first_col, last_col + 1): + if row == first_row and col == first_col: + # Ignore first cell. It is already handled. + continue + xcell = wks.cell(column=col, row=row) + for k, v in style_kwargs.items(): + setattr(xcell, k, v) + + +class OpenpyxlReader(BaseExcelReader): + def __init__( + self, + filepath_or_buffer: FilePath | ReadBuffer[bytes], + storage_options: StorageOptions = None, + ) -> None: + """ + Reader using openpyxl engine. + + Parameters + ---------- + filepath_or_buffer : str, path object or Workbook + Object to be parsed. + storage_options : dict, optional + passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``) + """ + import_optional_dependency("openpyxl") + super().__init__(filepath_or_buffer, storage_options=storage_options) + + @property + def _workbook_class(self): + from openpyxl import Workbook + + return Workbook + + def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]): + from openpyxl import load_workbook + + return load_workbook( + filepath_or_buffer, read_only=True, data_only=True, keep_links=False + ) + + @property + def sheet_names(self) -> list[str]: + return [sheet.title for sheet in self.book.worksheets] + + def get_sheet_by_name(self, name: str): + self.raise_if_bad_sheet_by_name(name) + return self.book[name] + + def get_sheet_by_index(self, index: int): + self.raise_if_bad_sheet_by_index(index) + return self.book.worksheets[index] + + def _convert_cell(self, cell, convert_float: bool) -> Scalar: + + from openpyxl.cell.cell import ( + TYPE_ERROR, + TYPE_NUMERIC, + ) + + if cell.value is None: + return "" # compat with xlrd + elif cell.data_type == TYPE_ERROR: + return np.nan + elif not convert_float and cell.data_type == TYPE_NUMERIC: + return float(cell.value) + + return cell.value + + def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]: + + if self.book.read_only: + sheet.reset_dimensions() + + data: list[list[Scalar]] = [] + last_row_with_data = -1 + for row_number, row in enumerate(sheet.rows): + converted_row = [self._convert_cell(cell, convert_float) for cell in row] + while converted_row and converted_row[-1] == "": + # trim trailing empty elements + converted_row.pop() + if converted_row: + last_row_with_data = row_number + data.append(converted_row) + + # Trim trailing empty rows + data = data[: last_row_with_data + 1] + + if len(data) > 0: + # extend rows to max width + max_width = max(len(data_row) for data_row in data) + if min(len(data_row) for data_row in data) < max_width: + empty_cell: list[Scalar] = [""] + data = [ + data_row + (max_width - len(data_row)) * empty_cell + for data_row in data + ] + + return data diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/_pyxlsb.py b/.local/lib/python3.8/site-packages/pandas/io/excel/_pyxlsb.py new file mode 100644 index 0000000..9284cf9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/excel/_pyxlsb.py @@ -0,0 +1,103 @@ +# pyright: reportMissingImports=false +from __future__ import annotations + +from pandas._typing import ( + FilePath, + ReadBuffer, + Scalar, + StorageOptions, +) +from pandas.compat._optional import import_optional_dependency + +from pandas.io.excel._base import BaseExcelReader + + +class PyxlsbReader(BaseExcelReader): + def __init__( + self, + filepath_or_buffer: FilePath | ReadBuffer[bytes], + storage_options: StorageOptions = None, + ): + """ + Reader using pyxlsb engine. + + Parameters + ---------- + filepath_or_buffer : str, path object, or Workbook + Object to be parsed. + storage_options : dict, optional + passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``) + """ + import_optional_dependency("pyxlsb") + # This will call load_workbook on the filepath or buffer + # And set the result to the book-attribute + super().__init__(filepath_or_buffer, storage_options=storage_options) + + @property + def _workbook_class(self): + from pyxlsb import Workbook + + return Workbook + + def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]): + from pyxlsb import open_workbook + + # TODO: hack in buffer capability + # This might need some modifications to the Pyxlsb library + # Actual work for opening it is in xlsbpackage.py, line 20-ish + + return open_workbook(filepath_or_buffer) + + @property + def sheet_names(self) -> list[str]: + return self.book.sheets + + def get_sheet_by_name(self, name: str): + self.raise_if_bad_sheet_by_name(name) + return self.book.get_sheet(name) + + def get_sheet_by_index(self, index: int): + self.raise_if_bad_sheet_by_index(index) + # pyxlsb sheets are indexed from 1 onwards + # There's a fix for this in the source, but the pypi package doesn't have it + return self.book.get_sheet(index + 1) + + def _convert_cell(self, cell, convert_float: bool) -> Scalar: + # TODO: there is no way to distinguish between floats and datetimes in pyxlsb + # This means that there is no way to read datetime types from an xlsb file yet + if cell.v is None: + return "" # Prevents non-named columns from not showing up as Unnamed: i + if isinstance(cell.v, float) and convert_float: + val = int(cell.v) + if val == cell.v: + return val + else: + return float(cell.v) + + return cell.v + + def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]: + data: list[list[Scalar]] = [] + prevous_row_number = -1 + # When sparse=True the rows can have different lengths and empty rows are + # not returned. The cells are namedtuples of row, col, value (r, c, v). + for row in sheet.rows(sparse=True): + row_number = row[0].r + converted_row = [self._convert_cell(cell, convert_float) for cell in row] + while converted_row and converted_row[-1] == "": + # trim trailing empty elements + converted_row.pop() + if converted_row: + data.extend([[]] * (row_number - prevous_row_number - 1)) + data.append(converted_row) + prevous_row_number = row_number + if data: + # extend rows to max_width + max_width = max(len(data_row) for data_row in data) + if min(len(data_row) for data_row in data) < max_width: + empty_cell: list[Scalar] = [""] + data = [ + data_row + (max_width - len(data_row)) * empty_cell + for data_row in data + ] + return data diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/_util.py b/.local/lib/python3.8/site-packages/pandas/io/excel/_util.py new file mode 100644 index 0000000..6be5ef0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/excel/_util.py @@ -0,0 +1,335 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, + Iterable, + Literal, + MutableMapping, + Sequence, + TypeVar, + overload, +) + +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.common import ( + is_integer, + is_list_like, +) + +if TYPE_CHECKING: + from pandas.io.excel._base import ExcelWriter + + ExcelWriter_t = type[ExcelWriter] + usecols_func = TypeVar("usecols_func", bound=Callable[[Hashable], object]) + +_writers: MutableMapping[str, ExcelWriter_t] = {} + + +def register_writer(klass: ExcelWriter_t) -> None: + """ + Add engine to the excel writer registry.io.excel. + + You must use this method to integrate with ``to_excel``. + + Parameters + ---------- + klass : ExcelWriter + """ + if not callable(klass): + raise ValueError("Can only register callables as engines") + engine_name = klass.engine + # for mypy + assert isinstance(engine_name, str) + _writers[engine_name] = klass + + +def get_default_engine(ext: str, mode: Literal["reader", "writer"] = "reader") -> str: + """ + Return the default reader/writer for the given extension. + + Parameters + ---------- + ext : str + The excel file extension for which to get the default engine. + mode : str {'reader', 'writer'} + Whether to get the default engine for reading or writing. + Either 'reader' or 'writer' + + Returns + ------- + str + The default engine for the extension. + """ + _default_readers = { + "xlsx": "openpyxl", + "xlsm": "openpyxl", + "xlsb": "pyxlsb", + "xls": "xlrd", + "ods": "odf", + } + _default_writers = { + "xlsx": "openpyxl", + "xlsm": "openpyxl", + "xlsb": "pyxlsb", + "xls": "xlwt", + "ods": "odf", + } + assert mode in ["reader", "writer"] + if mode == "writer": + # Prefer xlsxwriter over openpyxl if installed + xlsxwriter = import_optional_dependency("xlsxwriter", errors="warn") + if xlsxwriter: + _default_writers["xlsx"] = "xlsxwriter" + return _default_writers[ext] + else: + return _default_readers[ext] + + +def get_writer(engine_name: str) -> ExcelWriter_t: + try: + return _writers[engine_name] + except KeyError as err: + raise ValueError(f"No Excel writer '{engine_name}'") from err + + +def _excel2num(x: str) -> int: + """ + Convert Excel column name like 'AB' to 0-based column index. + + Parameters + ---------- + x : str + The Excel column name to convert to a 0-based column index. + + Returns + ------- + num : int + The column index corresponding to the name. + + Raises + ------ + ValueError + Part of the Excel column name was invalid. + """ + index = 0 + + for c in x.upper().strip(): + cp = ord(c) + + if cp < ord("A") or cp > ord("Z"): + raise ValueError(f"Invalid column name: {x}") + + index = index * 26 + cp - ord("A") + 1 + + return index - 1 + + +def _range2cols(areas: str) -> list[int]: + """ + Convert comma separated list of column names and ranges to indices. + + Parameters + ---------- + areas : str + A string containing a sequence of column ranges (or areas). + + Returns + ------- + cols : list + A list of 0-based column indices. + + Examples + -------- + >>> _range2cols('A:E') + [0, 1, 2, 3, 4] + >>> _range2cols('A,C,Z:AB') + [0, 2, 25, 26, 27] + """ + cols: list[int] = [] + + for rng in areas.split(","): + if ":" in rng: + rngs = rng.split(":") + cols.extend(range(_excel2num(rngs[0]), _excel2num(rngs[1]) + 1)) + else: + cols.append(_excel2num(rng)) + + return cols + + +@overload +def maybe_convert_usecols(usecols: str | list[int]) -> list[int]: + ... + + +@overload +def maybe_convert_usecols(usecols: list[str]) -> list[str]: + ... + + +@overload +def maybe_convert_usecols(usecols: usecols_func) -> usecols_func: + ... + + +@overload +def maybe_convert_usecols(usecols: None) -> None: + ... + + +def maybe_convert_usecols( + usecols: str | list[int] | list[str] | usecols_func | None, +) -> None | list[int] | list[str] | usecols_func: + """ + Convert `usecols` into a compatible format for parsing in `parsers.py`. + + Parameters + ---------- + usecols : object + The use-columns object to potentially convert. + + Returns + ------- + converted : object + The compatible format of `usecols`. + """ + if usecols is None: + return usecols + + if is_integer(usecols): + raise ValueError( + "Passing an integer for `usecols` is no longer supported. " + "Please pass in a list of int from 0 to `usecols` inclusive instead." + ) + + if isinstance(usecols, str): + return _range2cols(usecols) + + return usecols + + +@overload +def validate_freeze_panes(freeze_panes: tuple[int, int]) -> Literal[True]: + ... + + +@overload +def validate_freeze_panes(freeze_panes: None) -> Literal[False]: + ... + + +def validate_freeze_panes(freeze_panes: tuple[int, int] | None) -> bool: + if freeze_panes is not None: + if len(freeze_panes) == 2 and all( + isinstance(item, int) for item in freeze_panes + ): + return True + + raise ValueError( + "freeze_panes must be of form (row, column) " + "where row and column are integers" + ) + + # freeze_panes wasn't specified, return False so it won't be applied + # to output sheet + return False + + +def fill_mi_header( + row: list[Hashable], control_row: list[bool] +) -> tuple[list[Hashable], list[bool]]: + """ + Forward fill blank entries in row but only inside the same parent index. + + Used for creating headers in Multiindex. + + Parameters + ---------- + row : list + List of items in a single row. + control_row : list of bool + Helps to determine if particular column is in same parent index as the + previous value. Used to stop propagation of empty cells between + different indexes. + + Returns + ------- + Returns changed row and control_row + """ + last = row[0] + for i in range(1, len(row)): + if not control_row[i]: + last = row[i] + + if row[i] == "" or row[i] is None: + row[i] = last + else: + control_row[i] = False + last = row[i] + + return row, control_row + + +def pop_header_name( + row: list[Hashable], index_col: int | Sequence[int] +) -> tuple[Hashable | None, list[Hashable]]: + """ + Pop the header name for MultiIndex parsing. + + Parameters + ---------- + row : list + The data row to parse for the header name. + index_col : int, list + The index columns for our data. Assumed to be non-null. + + Returns + ------- + header_name : str + The extracted header name. + trimmed_row : list + The original data row with the header name removed. + """ + # Pop out header name and fill w/blank. + if is_list_like(index_col): + assert isinstance(index_col, Iterable) + i = max(index_col) + else: + assert not isinstance(index_col, Iterable) + i = index_col + + header_name = row[i] + header_name = None if header_name == "" else header_name + + return header_name, row[:i] + [""] + row[i + 1 :] + + +def combine_kwargs(engine_kwargs: dict[str, Any] | None, kwargs: dict) -> dict: + """ + Used to combine two sources of kwargs for the backend engine. + + Use of kwargs is deprecated, this function is solely for use in 1.3 and should + be removed in 1.4/2.0. Also _base.ExcelWriter.__new__ ensures either engine_kwargs + or kwargs must be None or empty respectively. + + Parameters + ---------- + engine_kwargs: dict + kwargs to be passed through to the engine. + kwargs: dict + kwargs to be psased through to the engine (deprecated) + + Returns + ------- + engine_kwargs combined with kwargs + """ + if engine_kwargs is None: + result = {} + else: + result = engine_kwargs.copy() + result.update(kwargs) + return result diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/_xlrd.py b/.local/lib/python3.8/site-packages/pandas/io/excel/_xlrd.py new file mode 100644 index 0000000..eea0f1c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/excel/_xlrd.py @@ -0,0 +1,112 @@ +from datetime import time + +import numpy as np + +from pandas._typing import StorageOptions +from pandas.compat._optional import import_optional_dependency + +from pandas.io.excel._base import BaseExcelReader + + +class XlrdReader(BaseExcelReader): + def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None): + """ + Reader using xlrd engine. + + Parameters + ---------- + filepath_or_buffer : str, path object or Workbook + Object to be parsed. + storage_options : dict, optional + passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``) + """ + err_msg = "Install xlrd >= 1.0.0 for Excel support" + import_optional_dependency("xlrd", extra=err_msg) + super().__init__(filepath_or_buffer, storage_options=storage_options) + + @property + def _workbook_class(self): + from xlrd import Book + + return Book + + def load_workbook(self, filepath_or_buffer): + from xlrd import open_workbook + + if hasattr(filepath_or_buffer, "read"): + data = filepath_or_buffer.read() + return open_workbook(file_contents=data) + else: + return open_workbook(filepath_or_buffer) + + @property + def sheet_names(self): + return self.book.sheet_names() + + def get_sheet_by_name(self, name): + self.raise_if_bad_sheet_by_name(name) + return self.book.sheet_by_name(name) + + def get_sheet_by_index(self, index): + self.raise_if_bad_sheet_by_index(index) + return self.book.sheet_by_index(index) + + def get_sheet_data(self, sheet, convert_float): + from xlrd import ( + XL_CELL_BOOLEAN, + XL_CELL_DATE, + XL_CELL_ERROR, + XL_CELL_NUMBER, + xldate, + ) + + epoch1904 = self.book.datemode + + def _parse_cell(cell_contents, cell_typ): + """ + converts the contents of the cell into a pandas appropriate object + """ + if cell_typ == XL_CELL_DATE: + + # Use the newer xlrd datetime handling. + try: + cell_contents = xldate.xldate_as_datetime(cell_contents, epoch1904) + except OverflowError: + return cell_contents + + # Excel doesn't distinguish between dates and time, + # so we treat dates on the epoch as times only. + # Also, Excel supports 1900 and 1904 epochs. + year = (cell_contents.timetuple())[0:3] + if (not epoch1904 and year == (1899, 12, 31)) or ( + epoch1904 and year == (1904, 1, 1) + ): + cell_contents = time( + cell_contents.hour, + cell_contents.minute, + cell_contents.second, + cell_contents.microsecond, + ) + + elif cell_typ == XL_CELL_ERROR: + cell_contents = np.nan + elif cell_typ == XL_CELL_BOOLEAN: + cell_contents = bool(cell_contents) + elif convert_float and cell_typ == XL_CELL_NUMBER: + # GH5394 - Excel 'numbers' are always floats + # it's a minimal perf hit and less surprising + val = int(cell_contents) + if val == cell_contents: + cell_contents = val + return cell_contents + + data = [] + + for i in range(sheet.nrows): + row = [ + _parse_cell(value, typ) + for value, typ in zip(sheet.row_values(i), sheet.row_types(i)) + ] + data.append(row) + + return data diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/_xlsxwriter.py b/.local/lib/python3.8/site-packages/pandas/io/excel/_xlsxwriter.py new file mode 100644 index 0000000..06c73f2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/excel/_xlsxwriter.py @@ -0,0 +1,250 @@ +from __future__ import annotations + +from typing import Any + +import pandas._libs.json as json +from pandas._typing import StorageOptions + +from pandas.io.excel._base import ExcelWriter +from pandas.io.excel._util import ( + combine_kwargs, + validate_freeze_panes, +) + + +class _XlsxStyler: + # Map from openpyxl-oriented styles to flatter xlsxwriter representation + # Ordering necessary for both determinism and because some are keyed by + # prefixes of others. + STYLE_MAPPING: dict[str, list[tuple[tuple[str, ...], str]]] = { + "font": [ + (("name",), "font_name"), + (("sz",), "font_size"), + (("size",), "font_size"), + (("color", "rgb"), "font_color"), + (("color",), "font_color"), + (("b",), "bold"), + (("bold",), "bold"), + (("i",), "italic"), + (("italic",), "italic"), + (("u",), "underline"), + (("underline",), "underline"), + (("strike",), "font_strikeout"), + (("vertAlign",), "font_script"), + (("vertalign",), "font_script"), + ], + "number_format": [(("format_code",), "num_format"), ((), "num_format")], + "protection": [(("locked",), "locked"), (("hidden",), "hidden")], + "alignment": [ + (("horizontal",), "align"), + (("vertical",), "valign"), + (("text_rotation",), "rotation"), + (("wrap_text",), "text_wrap"), + (("indent",), "indent"), + (("shrink_to_fit",), "shrink"), + ], + "fill": [ + (("patternType",), "pattern"), + (("patterntype",), "pattern"), + (("fill_type",), "pattern"), + (("start_color", "rgb"), "fg_color"), + (("fgColor", "rgb"), "fg_color"), + (("fgcolor", "rgb"), "fg_color"), + (("start_color",), "fg_color"), + (("fgColor",), "fg_color"), + (("fgcolor",), "fg_color"), + (("end_color", "rgb"), "bg_color"), + (("bgColor", "rgb"), "bg_color"), + (("bgcolor", "rgb"), "bg_color"), + (("end_color",), "bg_color"), + (("bgColor",), "bg_color"), + (("bgcolor",), "bg_color"), + ], + "border": [ + (("color", "rgb"), "border_color"), + (("color",), "border_color"), + (("style",), "border"), + (("top", "color", "rgb"), "top_color"), + (("top", "color"), "top_color"), + (("top", "style"), "top"), + (("top",), "top"), + (("right", "color", "rgb"), "right_color"), + (("right", "color"), "right_color"), + (("right", "style"), "right"), + (("right",), "right"), + (("bottom", "color", "rgb"), "bottom_color"), + (("bottom", "color"), "bottom_color"), + (("bottom", "style"), "bottom"), + (("bottom",), "bottom"), + (("left", "color", "rgb"), "left_color"), + (("left", "color"), "left_color"), + (("left", "style"), "left"), + (("left",), "left"), + ], + } + + @classmethod + def convert(cls, style_dict, num_format_str=None): + """ + converts a style_dict to an xlsxwriter format dict + + Parameters + ---------- + style_dict : style dictionary to convert + num_format_str : optional number format string + """ + # Create a XlsxWriter format object. + props = {} + + if num_format_str is not None: + props["num_format"] = num_format_str + + if style_dict is None: + return props + + if "borders" in style_dict: + style_dict = style_dict.copy() + style_dict["border"] = style_dict.pop("borders") + + for style_group_key, style_group in style_dict.items(): + for src, dst in cls.STYLE_MAPPING.get(style_group_key, []): + # src is a sequence of keys into a nested dict + # dst is a flat key + if dst in props: + continue + v = style_group + for k in src: + try: + v = v[k] + except (KeyError, TypeError): + break + else: + props[dst] = v + + if isinstance(props.get("pattern"), str): + # TODO: support other fill patterns + props["pattern"] = 0 if props["pattern"] == "none" else 1 + + for k in ["border", "top", "right", "bottom", "left"]: + if isinstance(props.get(k), str): + try: + props[k] = [ + "none", + "thin", + "medium", + "dashed", + "dotted", + "thick", + "double", + "hair", + "mediumDashed", + "dashDot", + "mediumDashDot", + "dashDotDot", + "mediumDashDotDot", + "slantDashDot", + ].index(props[k]) + except ValueError: + props[k] = 2 + + if isinstance(props.get("font_script"), str): + props["font_script"] = ["baseline", "superscript", "subscript"].index( + props["font_script"] + ) + + if isinstance(props.get("underline"), str): + props["underline"] = { + "none": 0, + "single": 1, + "double": 2, + "singleAccounting": 33, + "doubleAccounting": 34, + }[props["underline"]] + + return props + + +class XlsxWriter(ExcelWriter): + engine = "xlsxwriter" + supported_extensions = (".xlsx",) + + def __init__( + self, + path, + engine=None, + date_format=None, + datetime_format=None, + mode: str = "w", + storage_options: StorageOptions = None, + if_sheet_exists: str | None = None, + engine_kwargs: dict[str, Any] | None = None, + **kwargs, + ): + # Use the xlsxwriter module as the Excel writer. + from xlsxwriter import Workbook + + engine_kwargs = combine_kwargs(engine_kwargs, kwargs) + + if mode == "a": + raise ValueError("Append mode is not supported with xlsxwriter!") + + super().__init__( + path, + engine=engine, + date_format=date_format, + datetime_format=datetime_format, + mode=mode, + storage_options=storage_options, + if_sheet_exists=if_sheet_exists, + engine_kwargs=engine_kwargs, + ) + + self.book = Workbook(self.handles.handle, **engine_kwargs) + + def save(self): + """ + Save workbook to disk. + """ + return self.book.close() + + def write_cells( + self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None + ): + # Write the frame cells using xlsxwriter. + sheet_name = self._get_sheet_name(sheet_name) + + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = self.book.add_worksheet(sheet_name) + self.sheets[sheet_name] = wks + + style_dict = {"null": None} + + if validate_freeze_panes(freeze_panes): + wks.freeze_panes(*(freeze_panes)) + + for cell in cells: + val, fmt = self._value_with_fmt(cell.val) + + stylekey = json.dumps(cell.style) + if fmt: + stylekey += fmt + + if stylekey in style_dict: + style = style_dict[stylekey] + else: + style = self.book.add_format(_XlsxStyler.convert(cell.style, fmt)) + style_dict[stylekey] = style + + if cell.mergestart is not None and cell.mergeend is not None: + wks.merge_range( + startrow + cell.row, + startcol + cell.col, + startrow + cell.mergestart, + startcol + cell.mergeend, + val, + style, + ) + else: + wks.write(startrow + cell.row, startcol + cell.col, val, style) diff --git a/.local/lib/python3.8/site-packages/pandas/io/excel/_xlwt.py b/.local/lib/python3.8/site-packages/pandas/io/excel/_xlwt.py new file mode 100644 index 0000000..a74c03f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/excel/_xlwt.py @@ -0,0 +1,172 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, +) + +import pandas._libs.json as json +from pandas._typing import StorageOptions + +from pandas.io.excel._base import ExcelWriter +from pandas.io.excel._util import ( + combine_kwargs, + validate_freeze_panes, +) + +if TYPE_CHECKING: + from xlwt import XFStyle + + +class XlwtWriter(ExcelWriter): + engine = "xlwt" + supported_extensions = (".xls",) + + def __init__( + self, + path, + engine=None, + date_format=None, + datetime_format=None, + encoding=None, + mode: str = "w", + storage_options: StorageOptions = None, + if_sheet_exists: str | None = None, + engine_kwargs: dict[str, Any] | None = None, + **kwargs, + ): + # Use the xlwt module as the Excel writer. + import xlwt + + engine_kwargs = combine_kwargs(engine_kwargs, kwargs) + + if mode == "a": + raise ValueError("Append mode is not supported with xlwt!") + + super().__init__( + path, + mode=mode, + storage_options=storage_options, + if_sheet_exists=if_sheet_exists, + engine_kwargs=engine_kwargs, + ) + + if encoding is None: + encoding = "ascii" + self.book = xlwt.Workbook(encoding=encoding, **engine_kwargs) + self.fm_datetime = xlwt.easyxf(num_format_str=self.datetime_format) + self.fm_date = xlwt.easyxf(num_format_str=self.date_format) + + def save(self): + """ + Save workbook to disk. + """ + if self.sheets: + # fails when the ExcelWriter is just opened and then closed + self.book.save(self.handles.handle) + + def write_cells( + self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None + ): + + sheet_name = self._get_sheet_name(sheet_name) + + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = self.book.add_sheet(sheet_name) + self.sheets[sheet_name] = wks + + if validate_freeze_panes(freeze_panes): + wks.set_panes_frozen(True) + wks.set_horz_split_pos(freeze_panes[0]) + wks.set_vert_split_pos(freeze_panes[1]) + + style_dict: dict[str, XFStyle] = {} + + for cell in cells: + val, fmt = self._value_with_fmt(cell.val) + + stylekey = json.dumps(cell.style) + if fmt: + stylekey += fmt + + if stylekey in style_dict: + style = style_dict[stylekey] + else: + style = self._convert_to_style(cell.style, fmt) + style_dict[stylekey] = style + + if cell.mergestart is not None and cell.mergeend is not None: + wks.write_merge( + startrow + cell.row, + startrow + cell.mergestart, + startcol + cell.col, + startcol + cell.mergeend, + val, + style, + ) + else: + wks.write(startrow + cell.row, startcol + cell.col, val, style) + + @classmethod + def _style_to_xlwt( + cls, item, firstlevel: bool = True, field_sep=",", line_sep=";" + ) -> str: + """ + helper which recursively generate an xlwt easy style string + for example: + + hstyle = {"font": {"bold": True}, + "border": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "align": {"horiz": "center"}} + will be converted to + font: bold on; \ + border: top thin, right thin, bottom thin, left thin; \ + align: horiz center; + """ + if hasattr(item, "items"): + if firstlevel: + it = [ + f"{key}: {cls._style_to_xlwt(value, False)}" + for key, value in item.items() + ] + out = f"{line_sep.join(it)} " + return out + else: + it = [ + f"{key} {cls._style_to_xlwt(value, False)}" + for key, value in item.items() + ] + out = f"{field_sep.join(it)} " + return out + else: + item = f"{item}" + item = item.replace("True", "on") + item = item.replace("False", "off") + return item + + @classmethod + def _convert_to_style(cls, style_dict, num_format_str=None): + """ + converts a style_dict to an xlwt style object + + Parameters + ---------- + style_dict : style dictionary to convert + num_format_str : optional number format string + """ + import xlwt + + if style_dict: + xlwt_stylestr = cls._style_to_xlwt(style_dict) + style = xlwt.easyxf(xlwt_stylestr, field_sep=",", line_sep=";") + else: + style = xlwt.XFStyle() + if num_format_str is not None: + style.num_format_str = num_format_str + + return style diff --git a/.local/lib/python3.8/site-packages/pandas/io/feather_format.py b/.local/lib/python3.8/site-packages/pandas/io/feather_format.py new file mode 100644 index 0000000..9813b91 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/feather_format.py @@ -0,0 +1,134 @@ +""" feather-format compat """ +from __future__ import annotations + +from typing import ( + Hashable, + Sequence, +) + +from pandas._typing import ( + FilePath, + ReadBuffer, + StorageOptions, + WriteBuffer, +) +from pandas.compat._optional import import_optional_dependency +from pandas.util._decorators import doc + +from pandas.core.api import ( + DataFrame, + Int64Index, + RangeIndex, +) +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import get_handle + + +@doc(storage_options=_shared_docs["storage_options"]) +def to_feather( + df: DataFrame, + path: FilePath | WriteBuffer[bytes], + storage_options: StorageOptions = None, + **kwargs, +): + """ + Write a DataFrame to the binary Feather format. + + Parameters + ---------- + df : DataFrame + path : str, path object, or file-like object + {storage_options} + + .. versionadded:: 1.2.0 + + **kwargs : + Additional keywords passed to `pyarrow.feather.write_feather`. + + .. versionadded:: 1.1.0 + """ + import_optional_dependency("pyarrow") + from pyarrow import feather + + if not isinstance(df, DataFrame): + raise ValueError("feather only support IO with DataFrames") + + valid_types = {"string", "unicode"} + + # validate index + # -------------- + + # validate that we have only a default index + # raise on anything else as we don't serialize the index + + if not isinstance(df.index, (Int64Index, RangeIndex)): + typ = type(df.index) + raise ValueError( + f"feather does not support serializing {typ} " + "for the index; you can .reset_index() to make the index into column(s)" + ) + + if not df.index.equals(RangeIndex.from_range(range(len(df)))): + raise ValueError( + "feather does not support serializing a non-default index for the index; " + "you can .reset_index() to make the index into column(s)" + ) + + if df.index.name is not None: + raise ValueError( + "feather does not serialize index meta-data on a default index" + ) + + # validate columns + # ---------------- + + # must have value column names (strings only) + if df.columns.inferred_type not in valid_types: + raise ValueError("feather must have string column names") + + with get_handle( + path, "wb", storage_options=storage_options, is_text=False + ) as handles: + feather.write_feather(df, handles.handle, **kwargs) + + +@doc(storage_options=_shared_docs["storage_options"]) +def read_feather( + path: FilePath | ReadBuffer[bytes], + columns: Sequence[Hashable] | None = None, + use_threads: bool = True, + storage_options: StorageOptions = None, +): + """ + Load a feather-format object from the file path. + + Parameters + ---------- + path : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``read()`` function. The string could be a URL. + Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: ``file://localhost/path/to/table.feather``. + columns : sequence, default None + If not provided, all columns are read. + use_threads : bool, default True + Whether to parallelize reading using multiple threads. + {storage_options} + + .. versionadded:: 1.2.0 + + Returns + ------- + type of object stored in file + """ + import_optional_dependency("pyarrow") + from pyarrow import feather + + with get_handle( + path, "rb", storage_options=storage_options, is_text=False + ) as handles: + + return feather.read_feather( + handles.handle, columns=columns, use_threads=bool(use_threads) + ) diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__init__.py b/.local/lib/python3.8/site-packages/pandas/io/formats/__init__.py new file mode 100644 index 0000000..8a3486a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/__init__.py @@ -0,0 +1,8 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + # import modules that have public classes/functions + from pandas.io.formats import style + + # and mark only those modules as public + __all__ = ["style"] diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..570673c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/_color_data.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/_color_data.cpython-38.pyc new file mode 100644 index 0000000..f9e209d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/_color_data.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/console.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/console.cpython-38.pyc new file mode 100644 index 0000000..008d564 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/console.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/css.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/css.cpython-38.pyc new file mode 100644 index 0000000..3c27dd5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/css.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/csvs.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/csvs.cpython-38.pyc new file mode 100644 index 0000000..63ca987 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/csvs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/excel.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/excel.cpython-38.pyc new file mode 100644 index 0000000..7c0d14e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/excel.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/format.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/format.cpython-38.pyc new file mode 100644 index 0000000..366e341 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/format.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/html.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/html.cpython-38.pyc new file mode 100644 index 0000000..dd082bc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/html.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/info.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/info.cpython-38.pyc new file mode 100644 index 0000000..6138604 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/info.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/latex.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/latex.cpython-38.pyc new file mode 100644 index 0000000..0ea42cf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/latex.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/printing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/printing.cpython-38.pyc new file mode 100644 index 0000000..c52d200 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/printing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/string.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/string.cpython-38.pyc new file mode 100644 index 0000000..a0e5b44 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/string.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/style.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/style.cpython-38.pyc new file mode 100644 index 0000000..77ca9c8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/style.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/style_render.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/style_render.cpython-38.pyc new file mode 100644 index 0000000..dd7dc12 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/style_render.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/xml.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/xml.cpython-38.pyc new file mode 100644 index 0000000..f7a5042 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/formats/__pycache__/xml.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/_color_data.py b/.local/lib/python3.8/site-packages/pandas/io/formats/_color_data.py new file mode 100644 index 0000000..e5b72b2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/_color_data.py @@ -0,0 +1,155 @@ +# GH37967: Enable the use of CSS named colors, as defined in +# matplotlib.colors.CSS4_COLORS, when exporting to Excel. +# This data has been copied here, instead of being imported from matplotlib, +# not to have ``to_excel`` methods require matplotlib. +# source: matplotlib._color_data (3.3.3) +CSS4_COLORS = { + "aliceblue": "F0F8FF", + "antiquewhite": "FAEBD7", + "aqua": "00FFFF", + "aquamarine": "7FFFD4", + "azure": "F0FFFF", + "beige": "F5F5DC", + "bisque": "FFE4C4", + "black": "000000", + "blanchedalmond": "FFEBCD", + "blue": "0000FF", + "blueviolet": "8A2BE2", + "brown": "A52A2A", + "burlywood": "DEB887", + "cadetblue": "5F9EA0", + "chartreuse": "7FFF00", + "chocolate": "D2691E", + "coral": "FF7F50", + "cornflowerblue": "6495ED", + "cornsilk": "FFF8DC", + "crimson": "DC143C", + "cyan": "00FFFF", + "darkblue": "00008B", + "darkcyan": "008B8B", + "darkgoldenrod": "B8860B", + "darkgray": "A9A9A9", + "darkgreen": "006400", + "darkgrey": "A9A9A9", + "darkkhaki": "BDB76B", + "darkmagenta": "8B008B", + "darkolivegreen": "556B2F", + "darkorange": "FF8C00", + "darkorchid": "9932CC", + "darkred": "8B0000", + "darksalmon": "E9967A", + "darkseagreen": "8FBC8F", + "darkslateblue": "483D8B", + "darkslategray": "2F4F4F", + "darkslategrey": "2F4F4F", + "darkturquoise": "00CED1", + "darkviolet": "9400D3", + "deeppink": "FF1493", + "deepskyblue": "00BFFF", + "dimgray": "696969", + "dimgrey": "696969", + "dodgerblue": "1E90FF", + "firebrick": "B22222", + "floralwhite": "FFFAF0", + "forestgreen": "228B22", + "fuchsia": "FF00FF", + "gainsboro": "DCDCDC", + "ghostwhite": "F8F8FF", + "gold": "FFD700", + "goldenrod": "DAA520", + "gray": "808080", + "green": "008000", + "greenyellow": "ADFF2F", + "grey": "808080", + "honeydew": "F0FFF0", + "hotpink": "FF69B4", + "indianred": "CD5C5C", + "indigo": "4B0082", + "ivory": "FFFFF0", + "khaki": "F0E68C", + "lavender": "E6E6FA", + "lavenderblush": "FFF0F5", + "lawngreen": "7CFC00", + "lemonchiffon": "FFFACD", + "lightblue": "ADD8E6", + "lightcoral": "F08080", + "lightcyan": "E0FFFF", + "lightgoldenrodyellow": "FAFAD2", + "lightgray": "D3D3D3", + "lightgreen": "90EE90", + "lightgrey": "D3D3D3", + "lightpink": "FFB6C1", + "lightsalmon": "FFA07A", + "lightseagreen": "20B2AA", + "lightskyblue": "87CEFA", + "lightslategray": "778899", + "lightslategrey": "778899", + "lightsteelblue": "B0C4DE", + "lightyellow": "FFFFE0", + "lime": "00FF00", + "limegreen": "32CD32", + "linen": "FAF0E6", + "magenta": "FF00FF", + "maroon": "800000", + "mediumaquamarine": "66CDAA", + "mediumblue": "0000CD", + "mediumorchid": "BA55D3", + "mediumpurple": "9370DB", + "mediumseagreen": "3CB371", + "mediumslateblue": "7B68EE", + "mediumspringgreen": "00FA9A", + "mediumturquoise": "48D1CC", + "mediumvioletred": "C71585", + "midnightblue": "191970", + "mintcream": "F5FFFA", + "mistyrose": "FFE4E1", + "moccasin": "FFE4B5", + "navajowhite": "FFDEAD", + "navy": "000080", + "oldlace": "FDF5E6", + "olive": "808000", + "olivedrab": "6B8E23", + "orange": "FFA500", + "orangered": "FF4500", + "orchid": "DA70D6", + "palegoldenrod": "EEE8AA", + "palegreen": "98FB98", + "paleturquoise": "AFEEEE", + "palevioletred": "DB7093", + "papayawhip": "FFEFD5", + "peachpuff": "FFDAB9", + "peru": "CD853F", + "pink": "FFC0CB", + "plum": "DDA0DD", + "powderblue": "B0E0E6", + "purple": "800080", + "rebeccapurple": "663399", + "red": "FF0000", + "rosybrown": "BC8F8F", + "royalblue": "4169E1", + "saddlebrown": "8B4513", + "salmon": "FA8072", + "sandybrown": "F4A460", + "seagreen": "2E8B57", + "seashell": "FFF5EE", + "sienna": "A0522D", + "silver": "C0C0C0", + "skyblue": "87CEEB", + "slateblue": "6A5ACD", + "slategray": "708090", + "slategrey": "708090", + "snow": "FFFAFA", + "springgreen": "00FF7F", + "steelblue": "4682B4", + "tan": "D2B48C", + "teal": "008080", + "thistle": "D8BFD8", + "tomato": "FF6347", + "turquoise": "40E0D0", + "violet": "EE82EE", + "wheat": "F5DEB3", + "white": "FFFFFF", + "whitesmoke": "F5F5F5", + "yellow": "FFFF00", + "yellowgreen": "9ACD32", +} diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/console.py b/.local/lib/python3.8/site-packages/pandas/io/formats/console.py new file mode 100644 index 0000000..bdd2b3d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/console.py @@ -0,0 +1,93 @@ +""" +Internal module for console introspection +""" + +from shutil import get_terminal_size + + +def get_console_size(): + """ + Return console size as tuple = (width, height). + + Returns (None,None) in non-interactive session. + """ + from pandas import get_option + + display_width = get_option("display.width") + display_height = get_option("display.max_rows") + + # Consider + # interactive shell terminal, can detect term size + # interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term + # size non-interactive script, should disregard term size + + # in addition + # width,height have default values, but setting to 'None' signals + # should use Auto-Detection, But only in interactive shell-terminal. + # Simple. yeah. + + if in_interactive_session(): + if in_ipython_frontend(): + # sane defaults for interactive non-shell terminal + # match default for width,height in config_init + from pandas._config.config import get_default_val + + terminal_width = get_default_val("display.width") + terminal_height = get_default_val("display.max_rows") + else: + # pure terminal + terminal_width, terminal_height = get_terminal_size() + else: + terminal_width, terminal_height = None, None + + # Note if the User sets width/Height to None (auto-detection) + # and we're in a script (non-inter), this will return (None,None) + # caller needs to deal. + return (display_width or terminal_width, display_height or terminal_height) + + +# ---------------------------------------------------------------------- +# Detect our environment + + +def in_interactive_session(): + """ + Check if we're running in an interactive shell. + + Returns + ------- + bool + True if running under python/ipython interactive shell. + """ + from pandas import get_option + + def check_main(): + try: + import __main__ as main + except ModuleNotFoundError: + return get_option("mode.sim_interactive") + return not hasattr(main, "__file__") or get_option("mode.sim_interactive") + + try: + # error: Name '__IPYTHON__' is not defined + return __IPYTHON__ or check_main() # type: ignore[name-defined] + except NameError: + return check_main() + + +def in_ipython_frontend(): + """ + Check if we're inside an IPython zmq frontend. + + Returns + ------- + bool + """ + try: + # error: Name 'get_ipython' is not defined + ip = get_ipython() # type: ignore[name-defined] + return "zmq" in str(type(ip)).lower() + except NameError: + pass + + return False diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/css.py b/.local/lib/python3.8/site-packages/pandas/io/formats/css.py new file mode 100644 index 0000000..956951a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/css.py @@ -0,0 +1,287 @@ +""" +Utilities for interpreting CSS from Stylers for formatting non-HTML outputs. +""" +from __future__ import annotations + +import re +import warnings + + +class CSSWarning(UserWarning): + """ + This CSS syntax cannot currently be parsed. + """ + + +def _side_expander(prop_fmt: str): + def expand(self, prop, value: str): + tokens = value.split() + try: + mapping = self.SIDE_SHORTHANDS[len(tokens)] + except KeyError: + warnings.warn(f'Could not expand "{prop}: {value}"', CSSWarning) + return + for key, idx in zip(self.SIDES, mapping): + yield prop_fmt.format(key), tokens[idx] + + return expand + + +class CSSResolver: + """ + A callable for parsing and resolving CSS to atomic properties. + """ + + UNIT_RATIOS = { + "rem": ("pt", 12), + "ex": ("em", 0.5), + # 'ch': + "px": ("pt", 0.75), + "pc": ("pt", 12), + "in": ("pt", 72), + "cm": ("in", 1 / 2.54), + "mm": ("in", 1 / 25.4), + "q": ("mm", 0.25), + "!!default": ("em", 0), + } + + FONT_SIZE_RATIOS = UNIT_RATIOS.copy() + FONT_SIZE_RATIOS.update( + { + "%": ("em", 0.01), + "xx-small": ("rem", 0.5), + "x-small": ("rem", 0.625), + "small": ("rem", 0.8), + "medium": ("rem", 1), + "large": ("rem", 1.125), + "x-large": ("rem", 1.5), + "xx-large": ("rem", 2), + "smaller": ("em", 1 / 1.2), + "larger": ("em", 1.2), + "!!default": ("em", 1), + } + ) + + MARGIN_RATIOS = UNIT_RATIOS.copy() + MARGIN_RATIOS.update({"none": ("pt", 0)}) + + BORDER_WIDTH_RATIOS = UNIT_RATIOS.copy() + BORDER_WIDTH_RATIOS.update( + { + "none": ("pt", 0), + "thick": ("px", 4), + "medium": ("px", 2), + "thin": ("px", 1), + # Default: medium only if solid + } + ) + + SIDE_SHORTHANDS = { + 1: [0, 0, 0, 0], + 2: [0, 1, 0, 1], + 3: [0, 1, 2, 1], + 4: [0, 1, 2, 3], + } + + SIDES = ("top", "right", "bottom", "left") + + def __call__( + self, + declarations_str: str, + inherited: dict[str, str] | None = None, + ) -> dict[str, str]: + """ + The given declarations to atomic properties. + + Parameters + ---------- + declarations_str : str + A list of CSS declarations + inherited : dict, optional + Atomic properties indicating the inherited style context in which + declarations_str is to be resolved. ``inherited`` should already + be resolved, i.e. valid output of this method. + + Returns + ------- + dict + Atomic CSS 2.2 properties. + + Examples + -------- + >>> resolve = CSSResolver() + >>> inherited = {'font-family': 'serif', 'font-weight': 'bold'} + >>> out = resolve(''' + ... border-color: BLUE RED; + ... font-size: 1em; + ... font-size: 2em; + ... font-weight: normal; + ... font-weight: inherit; + ... ''', inherited) + >>> sorted(out.items()) # doctest: +NORMALIZE_WHITESPACE + [('border-bottom-color', 'blue'), + ('border-left-color', 'red'), + ('border-right-color', 'red'), + ('border-top-color', 'blue'), + ('font-family', 'serif'), + ('font-size', '24pt'), + ('font-weight', 'bold')] + """ + props = dict(self.atomize(self.parse(declarations_str))) + if inherited is None: + inherited = {} + + props = self._update_initial(props, inherited) + props = self._update_font_size(props, inherited) + return self._update_other_units(props) + + def _update_initial( + self, + props: dict[str, str], + inherited: dict[str, str], + ) -> dict[str, str]: + # 1. resolve inherited, initial + for prop, val in inherited.items(): + if prop not in props: + props[prop] = val + + new_props = props.copy() + for prop, val in props.items(): + if val == "inherit": + val = inherited.get(prop, "initial") + + if val in ("initial", None): + # we do not define a complete initial stylesheet + del new_props[prop] + else: + new_props[prop] = val + return new_props + + def _update_font_size( + self, + props: dict[str, str], + inherited: dict[str, str], + ) -> dict[str, str]: + # 2. resolve relative font size + if props.get("font-size"): + props["font-size"] = self.size_to_pt( + props["font-size"], + self._get_font_size(inherited), + conversions=self.FONT_SIZE_RATIOS, + ) + return props + + def _get_font_size(self, props: dict[str, str]) -> float | None: + if props.get("font-size"): + font_size_string = props["font-size"] + return self._get_float_font_size_from_pt(font_size_string) + return None + + def _get_float_font_size_from_pt(self, font_size_string: str) -> float: + assert font_size_string.endswith("pt") + return float(font_size_string.rstrip("pt")) + + def _update_other_units(self, props: dict[str, str]) -> dict[str, str]: + font_size = self._get_font_size(props) + # 3. TODO: resolve other font-relative units + for side in self.SIDES: + prop = f"border-{side}-width" + if prop in props: + props[prop] = self.size_to_pt( + props[prop], + em_pt=font_size, + conversions=self.BORDER_WIDTH_RATIOS, + ) + + for prop in [f"margin-{side}", f"padding-{side}"]: + if prop in props: + # TODO: support % + props[prop] = self.size_to_pt( + props[prop], + em_pt=font_size, + conversions=self.MARGIN_RATIOS, + ) + return props + + def size_to_pt(self, in_val, em_pt=None, conversions=UNIT_RATIOS): + def _error(): + warnings.warn(f"Unhandled size: {repr(in_val)}", CSSWarning) + return self.size_to_pt("1!!default", conversions=conversions) + + match = re.match(r"^(\S*?)([a-zA-Z%!].*)", in_val) + if match is None: + return _error() + + val, unit = match.groups() + if val == "": + # hack for 'large' etc. + val = 1 + else: + try: + val = float(val) + except ValueError: + return _error() + + while unit != "pt": + if unit == "em": + if em_pt is None: + unit = "rem" + else: + val *= em_pt + unit = "pt" + continue + + try: + unit, mul = conversions[unit] + except KeyError: + return _error() + val *= mul + + val = round(val, 5) + if int(val) == val: + size_fmt = f"{int(val):d}pt" + else: + size_fmt = f"{val:f}pt" + return size_fmt + + def atomize(self, declarations): + for prop, value in declarations: + attr = "expand_" + prop.replace("-", "_") + try: + expand = getattr(self, attr) + except AttributeError: + yield prop, value + else: + for prop, value in expand(prop, value): + yield prop, value + + expand_border_color = _side_expander("border-{:s}-color") + expand_border_style = _side_expander("border-{:s}-style") + expand_border_width = _side_expander("border-{:s}-width") + expand_margin = _side_expander("margin-{:s}") + expand_padding = _side_expander("padding-{:s}") + + def parse(self, declarations_str: str): + """ + Generates (prop, value) pairs from declarations. + + In a future version may generate parsed tokens from tinycss/tinycss2 + + Parameters + ---------- + declarations_str : str + """ + for decl in declarations_str.split(";"): + if not decl.strip(): + continue + prop, sep, val = decl.partition(":") + prop = prop.strip().lower() + # TODO: don't lowercase case sensitive parts of values (strings) + val = val.strip().lower() + if sep: + yield prop, val + else: + warnings.warn( + f"Ill-formatted attribute: expected a colon in {repr(decl)}", + CSSWarning, + ) diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/csvs.py b/.local/lib/python3.8/site-packages/pandas/io/formats/csvs.py new file mode 100644 index 0000000..d2cc77a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/csvs.py @@ -0,0 +1,321 @@ +""" +Module for formatting output data into CSV files. +""" + +from __future__ import annotations + +import csv as csvlib +import os +from typing import ( + TYPE_CHECKING, + Any, + Hashable, + Iterator, + Sequence, + cast, +) + +import numpy as np + +from pandas._libs import writers as libwriters +from pandas._typing import ( + CompressionOptions, + FilePath, + FloatFormatType, + IndexLabel, + StorageOptions, + WriteBuffer, +) +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.generic import ( + ABCDatetimeIndex, + ABCIndex, + ABCMultiIndex, + ABCPeriodIndex, +) +from pandas.core.dtypes.missing import notna + +from pandas.core.indexes.api import Index + +from pandas.io.common import get_handle + +if TYPE_CHECKING: + from pandas.io.formats.format import DataFrameFormatter + + +class CSVFormatter: + cols: np.ndarray + + def __init__( + self, + formatter: DataFrameFormatter, + path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes] = "", + sep: str = ",", + cols: Sequence[Hashable] | None = None, + index_label: IndexLabel | None = None, + mode: str = "w", + encoding: str | None = None, + errors: str = "strict", + compression: CompressionOptions = "infer", + quoting: int | None = None, + line_terminator: str | None = "\n", + chunksize: int | None = None, + quotechar: str | None = '"', + date_format: str | None = None, + doublequote: bool = True, + escapechar: str | None = None, + storage_options: StorageOptions = None, + ): + self.fmt = formatter + + self.obj = self.fmt.frame + + self.filepath_or_buffer = path_or_buf + self.encoding = encoding + self.compression = compression + self.mode = mode + self.storage_options = storage_options + + self.sep = sep + self.index_label = self._initialize_index_label(index_label) + self.errors = errors + self.quoting = quoting or csvlib.QUOTE_MINIMAL + self.quotechar = self._initialize_quotechar(quotechar) + self.doublequote = doublequote + self.escapechar = escapechar + self.line_terminator = line_terminator or os.linesep + self.date_format = date_format + self.cols = self._initialize_columns(cols) + self.chunksize = self._initialize_chunksize(chunksize) + + @property + def na_rep(self) -> str: + return self.fmt.na_rep + + @property + def float_format(self) -> FloatFormatType | None: + return self.fmt.float_format + + @property + def decimal(self) -> str: + return self.fmt.decimal + + @property + def header(self) -> bool | Sequence[str]: + return self.fmt.header + + @property + def index(self) -> bool: + return self.fmt.index + + def _initialize_index_label(self, index_label: IndexLabel | None) -> IndexLabel: + if index_label is not False: + if index_label is None: + return self._get_index_label_from_obj() + elif not isinstance(index_label, (list, tuple, np.ndarray, ABCIndex)): + # given a string for a DF with Index + return [index_label] + return index_label + + def _get_index_label_from_obj(self) -> list[str]: + if isinstance(self.obj.index, ABCMultiIndex): + return self._get_index_label_multiindex() + else: + return self._get_index_label_flat() + + def _get_index_label_multiindex(self) -> list[str]: + return [name or "" for name in self.obj.index.names] + + def _get_index_label_flat(self) -> list[str]: + index_label = self.obj.index.name + return [""] if index_label is None else [index_label] + + def _initialize_quotechar(self, quotechar: str | None) -> str | None: + if self.quoting != csvlib.QUOTE_NONE: + # prevents crash in _csv + return quotechar + return None + + @property + def has_mi_columns(self) -> bool: + return bool(isinstance(self.obj.columns, ABCMultiIndex)) + + def _initialize_columns(self, cols: Sequence[Hashable] | None) -> np.ndarray: + # validate mi options + if self.has_mi_columns: + if cols is not None: + msg = "cannot specify cols with a MultiIndex on the columns" + raise TypeError(msg) + + if cols is not None: + if isinstance(cols, ABCIndex): + cols = cols._format_native_types(**self._number_format) + else: + cols = list(cols) + self.obj = self.obj.loc[:, cols] + + # update columns to include possible multiplicity of dupes + # and make sure cols is just a list of labels + new_cols = self.obj.columns + return new_cols._format_native_types(**self._number_format) + + def _initialize_chunksize(self, chunksize: int | None) -> int: + if chunksize is None: + return (100000 // (len(self.cols) or 1)) or 1 + return int(chunksize) + + @property + def _number_format(self) -> dict[str, Any]: + """Dictionary used for storing number formatting settings.""" + return { + "na_rep": self.na_rep, + "float_format": self.float_format, + "date_format": self.date_format, + "quoting": self.quoting, + "decimal": self.decimal, + } + + @cache_readonly + def data_index(self) -> Index: + data_index = self.obj.index + if ( + isinstance(data_index, (ABCDatetimeIndex, ABCPeriodIndex)) + and self.date_format is not None + ): + data_index = Index( + [x.strftime(self.date_format) if notna(x) else "" for x in data_index] + ) + elif isinstance(data_index, ABCMultiIndex): + data_index = data_index.remove_unused_levels() + return data_index + + @property + def nlevels(self) -> int: + if self.index: + return getattr(self.data_index, "nlevels", 1) + else: + return 0 + + @property + def _has_aliases(self) -> bool: + return isinstance(self.header, (tuple, list, np.ndarray, ABCIndex)) + + @property + def _need_to_save_header(self) -> bool: + return bool(self._has_aliases or self.header) + + @property + def write_cols(self) -> Sequence[Hashable]: + if self._has_aliases: + assert not isinstance(self.header, bool) + if len(self.header) != len(self.cols): + raise ValueError( + f"Writing {len(self.cols)} cols but got {len(self.header)} aliases" + ) + else: + return self.header + else: + # self.cols is an ndarray derived from Index._format_native_types, + # so its entries are strings, i.e. hashable + return cast(Sequence[Hashable], self.cols) + + @property + def encoded_labels(self) -> list[Hashable]: + encoded_labels: list[Hashable] = [] + + if self.index and self.index_label: + assert isinstance(self.index_label, Sequence) + encoded_labels = list(self.index_label) + + if not self.has_mi_columns or self._has_aliases: + encoded_labels += list(self.write_cols) + + return encoded_labels + + def save(self) -> None: + """ + Create the writer & save. + """ + # apply compression and byte/text conversion + with get_handle( + self.filepath_or_buffer, + self.mode, + encoding=self.encoding, + errors=self.errors, + compression=self.compression, + storage_options=self.storage_options, + ) as handles: + + # Note: self.encoding is irrelevant here + self.writer = csvlib.writer( + handles.handle, + lineterminator=self.line_terminator, + delimiter=self.sep, + quoting=self.quoting, + doublequote=self.doublequote, + escapechar=self.escapechar, + quotechar=self.quotechar, + ) + + self._save() + + def _save(self) -> None: + if self._need_to_save_header: + self._save_header() + self._save_body() + + def _save_header(self) -> None: + if not self.has_mi_columns or self._has_aliases: + self.writer.writerow(self.encoded_labels) + else: + for row in self._generate_multiindex_header_rows(): + self.writer.writerow(row) + + def _generate_multiindex_header_rows(self) -> Iterator[list[Hashable]]: + columns = self.obj.columns + for i in range(columns.nlevels): + # we need at least 1 index column to write our col names + col_line = [] + if self.index: + # name is the first column + col_line.append(columns.names[i]) + + if isinstance(self.index_label, list) and len(self.index_label) > 1: + col_line.extend([""] * (len(self.index_label) - 1)) + + col_line.extend(columns._get_level_values(i)) + yield col_line + + # Write out the index line if it's not empty. + # Otherwise, we will print out an extraneous + # blank line between the mi and the data rows. + if self.encoded_labels and set(self.encoded_labels) != {""}: + yield self.encoded_labels + [""] * len(columns) + + def _save_body(self) -> None: + nrows = len(self.data_index) + chunks = (nrows // self.chunksize) + 1 + for i in range(chunks): + start_i = i * self.chunksize + end_i = min(start_i + self.chunksize, nrows) + if start_i >= end_i: + break + self._save_chunk(start_i, end_i) + + def _save_chunk(self, start_i: int, end_i: int) -> None: + # create the data for a chunk + slicer = slice(start_i, end_i) + df = self.obj.iloc[slicer] + + res = df._mgr.to_native_types(**self._number_format) + data = [res.iget_values(i) for i in range(len(res.items))] + + ix = self.data_index[slicer]._format_native_types(**self._number_format) + libwriters.write_csv_rows( + data, + ix, + self.nlevels, + self.cols, + self.writer, + ) diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/excel.py b/.local/lib/python3.8/site-packages/pandas/io/formats/excel.py new file mode 100644 index 0000000..507516e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/excel.py @@ -0,0 +1,902 @@ +""" +Utilities for conversion to writer-agnostic Excel representation. +""" +from __future__ import annotations + +from functools import reduce +import itertools +import re +from typing import ( + Any, + Callable, + Hashable, + Iterable, + Mapping, + Sequence, + cast, +) +import warnings + +import numpy as np + +from pandas._libs.lib import is_list_like +from pandas._typing import ( + IndexLabel, + StorageOptions, +) +from pandas.util._decorators import doc + +from pandas.core.dtypes import missing +from pandas.core.dtypes.common import ( + is_float, + is_scalar, +) + +from pandas import ( + DataFrame, + Index, + MultiIndex, + PeriodIndex, +) +import pandas.core.common as com +from pandas.core.shared_docs import _shared_docs + +from pandas.io.formats._color_data import CSS4_COLORS +from pandas.io.formats.css import ( + CSSResolver, + CSSWarning, +) +from pandas.io.formats.format import get_level_lengths +from pandas.io.formats.printing import pprint_thing + + +class ExcelCell: + __fields__ = ("row", "col", "val", "style", "mergestart", "mergeend") + __slots__ = __fields__ + + def __init__( + self, + row: int, + col: int, + val, + style=None, + mergestart: int | None = None, + mergeend: int | None = None, + ): + self.row = row + self.col = col + self.val = val + self.style = style + self.mergestart = mergestart + self.mergeend = mergeend + + +class CssExcelCell(ExcelCell): + def __init__( + self, + row: int, + col: int, + val, + style: dict | None, + css_styles: dict[tuple[int, int], list[tuple[str, Any]]] | None, + css_row: int, + css_col: int, + css_converter: Callable | None, + **kwargs, + ): + if css_styles and css_converter: + css = ";".join( + [a + ":" + str(v) for (a, v) in css_styles[css_row, css_col]] + ) + style = css_converter(css) + + return super().__init__(row=row, col=col, val=val, style=style, **kwargs) + + +class CSSToExcelConverter: + """ + A callable for converting CSS declarations to ExcelWriter styles + + Supports parts of CSS 2.2, with minimal CSS 3.0 support (e.g. text-shadow), + focusing on font styling, backgrounds, borders and alignment. + + Operates by first computing CSS styles in a fairly generic + way (see :meth:`compute_css`) then determining Excel style + properties from CSS properties (see :meth:`build_xlstyle`). + + Parameters + ---------- + inherited : str, optional + CSS declarations understood to be the containing scope for the + CSS processed by :meth:`__call__`. + """ + + NAMED_COLORS = CSS4_COLORS + + VERTICAL_MAP = { + "top": "top", + "text-top": "top", + "middle": "center", + "baseline": "bottom", + "bottom": "bottom", + "text-bottom": "bottom", + # OpenXML also has 'justify', 'distributed' + } + + BOLD_MAP = { + "bold": True, + "bolder": True, + "600": True, + "700": True, + "800": True, + "900": True, + "normal": False, + "lighter": False, + "100": False, + "200": False, + "300": False, + "400": False, + "500": False, + } + + ITALIC_MAP = { + "normal": False, + "italic": True, + "oblique": True, + } + + FAMILY_MAP = { + "serif": 1, # roman + "sans-serif": 2, # swiss + "cursive": 4, # script + "fantasy": 5, # decorative + } + + # NB: Most of the methods here could be classmethods, as only __init__ + # and __call__ make use of instance attributes. We leave them as + # instancemethods so that users can easily experiment with extensions + # without monkey-patching. + inherited: dict[str, str] | None + + def __init__(self, inherited: str | None = None): + if inherited is not None: + self.inherited = self.compute_css(inherited) + else: + self.inherited = None + + compute_css = CSSResolver() + + def __call__(self, declarations_str: str) -> dict[str, dict[str, str]]: + """ + Convert CSS declarations to ExcelWriter style. + + Parameters + ---------- + declarations_str : str + List of CSS declarations. + e.g. "font-weight: bold; background: blue" + + Returns + ------- + xlstyle : dict + A style as interpreted by ExcelWriter when found in + ExcelCell.style. + """ + # TODO: memoize? + properties = self.compute_css(declarations_str, self.inherited) + return self.build_xlstyle(properties) + + def build_xlstyle(self, props: Mapping[str, str]) -> dict[str, dict[str, str]]: + out = { + "alignment": self.build_alignment(props), + "border": self.build_border(props), + "fill": self.build_fill(props), + "font": self.build_font(props), + "number_format": self.build_number_format(props), + } + + # TODO: handle cell width and height: needs support in pandas.io.excel + + def remove_none(d: dict[str, str]) -> None: + """Remove key where value is None, through nested dicts""" + for k, v in list(d.items()): + if v is None: + del d[k] + elif isinstance(v, dict): + remove_none(v) + if not v: + del d[k] + + remove_none(out) + return out + + def build_alignment(self, props: Mapping[str, str]) -> dict[str, bool | str | None]: + # TODO: text-indent, padding-left -> alignment.indent + return { + "horizontal": props.get("text-align"), + "vertical": self._get_vertical_alignment(props), + "wrap_text": self._get_is_wrap_text(props), + } + + def _get_vertical_alignment(self, props: Mapping[str, str]) -> str | None: + vertical_align = props.get("vertical-align") + if vertical_align: + return self.VERTICAL_MAP.get(vertical_align) + return None + + def _get_is_wrap_text(self, props: Mapping[str, str]) -> bool | None: + if props.get("white-space") is None: + return None + return bool(props["white-space"] not in ("nowrap", "pre", "pre-line")) + + def build_border( + self, props: Mapping[str, str] + ) -> dict[str, dict[str, str | None]]: + return { + side: { + "style": self._border_style( + props.get(f"border-{side}-style"), + props.get(f"border-{side}-width"), + ), + "color": self.color_to_excel(props.get(f"border-{side}-color")), + } + for side in ["top", "right", "bottom", "left"] + } + + def _border_style(self, style: str | None, width: str | None): + # convert styles and widths to openxml, one of: + # 'dashDot' + # 'dashDotDot' + # 'dashed' + # 'dotted' + # 'double' + # 'hair' + # 'medium' + # 'mediumDashDot' + # 'mediumDashDotDot' + # 'mediumDashed' + # 'slantDashDot' + # 'thick' + # 'thin' + if width is None and style is None: + return None + if style == "none" or style == "hidden": + return None + + width_name = self._get_width_name(width) + if width_name is None: + return None + + if style in (None, "groove", "ridge", "inset", "outset", "solid"): + # not handled + return width_name + + if style == "double": + return "double" + if style == "dotted": + if width_name in ("hair", "thin"): + return "dotted" + return "mediumDashDotDot" + if style == "dashed": + if width_name in ("hair", "thin"): + return "dashed" + return "mediumDashed" + + def _get_width_name(self, width_input: str | None) -> str | None: + width = self._width_to_float(width_input) + if width < 1e-5: + return None + elif width < 1.3: + return "thin" + elif width < 2.8: + return "medium" + return "thick" + + def _width_to_float(self, width: str | None) -> float: + if width is None: + width = "2pt" + return self._pt_to_float(width) + + def _pt_to_float(self, pt_string: str) -> float: + assert pt_string.endswith("pt") + return float(pt_string.rstrip("pt")) + + def build_fill(self, props: Mapping[str, str]): + # TODO: perhaps allow for special properties + # -excel-pattern-bgcolor and -excel-pattern-type + fill_color = props.get("background-color") + if fill_color not in (None, "transparent", "none"): + return {"fgColor": self.color_to_excel(fill_color), "patternType": "solid"} + + def build_number_format(self, props: Mapping[str, str]) -> dict[str, str | None]: + return {"format_code": props.get("number-format")} + + def build_font( + self, props: Mapping[str, str] + ) -> dict[str, bool | int | float | str | None]: + font_names = self._get_font_names(props) + decoration = self._get_decoration(props) + return { + "name": font_names[0] if font_names else None, + "family": self._select_font_family(font_names), + "size": self._get_font_size(props), + "bold": self._get_is_bold(props), + "italic": self._get_is_italic(props), + "underline": ("single" if "underline" in decoration else None), + "strike": ("line-through" in decoration) or None, + "color": self.color_to_excel(props.get("color")), + # shadow if nonzero digit before shadow color + "shadow": self._get_shadow(props), + } + + def _get_is_bold(self, props: Mapping[str, str]) -> bool | None: + weight = props.get("font-weight") + if weight: + return self.BOLD_MAP.get(weight) + return None + + def _get_is_italic(self, props: Mapping[str, str]) -> bool | None: + font_style = props.get("font-style") + if font_style: + return self.ITALIC_MAP.get(font_style) + return None + + def _get_decoration(self, props: Mapping[str, str]) -> Sequence[str]: + decoration = props.get("text-decoration") + if decoration is not None: + return decoration.split() + else: + return () + + def _get_underline(self, decoration: Sequence[str]) -> str | None: + if "underline" in decoration: + return "single" + return None + + def _get_shadow(self, props: Mapping[str, str]) -> bool | None: + if "text-shadow" in props: + return bool(re.search("^[^#(]*[1-9]", props["text-shadow"])) + return None + + def _get_font_names(self, props: Mapping[str, str]) -> Sequence[str]: + font_names_tmp = re.findall( + r"""(?x) + ( + "(?:[^"]|\\")+" + | + '(?:[^']|\\')+' + | + [^'",]+ + )(?=,|\s*$) + """, + props.get("font-family", ""), + ) + + font_names = [] + for name in font_names_tmp: + if name[:1] == '"': + name = name[1:-1].replace('\\"', '"') + elif name[:1] == "'": + name = name[1:-1].replace("\\'", "'") + else: + name = name.strip() + if name: + font_names.append(name) + return font_names + + def _get_font_size(self, props: Mapping[str, str]) -> float | None: + size = props.get("font-size") + if size is None: + return size + return self._pt_to_float(size) + + def _select_font_family(self, font_names) -> int | None: + family = None + for name in font_names: + family = self.FAMILY_MAP.get(name) + if family: + break + + return family + + def color_to_excel(self, val: str | None) -> str | None: + if val is None: + return None + + if self._is_hex_color(val): + return self._convert_hex_to_excel(val) + + try: + return self.NAMED_COLORS[val] + except KeyError: + warnings.warn(f"Unhandled color format: {repr(val)}", CSSWarning) + return None + + def _is_hex_color(self, color_string: str) -> bool: + return bool(color_string.startswith("#")) + + def _convert_hex_to_excel(self, color_string: str) -> str: + code = color_string.lstrip("#") + if self._is_shorthand_color(color_string): + return (code[0] * 2 + code[1] * 2 + code[2] * 2).upper() + else: + return code.upper() + + def _is_shorthand_color(self, color_string: str) -> bool: + """Check if color code is shorthand. + + #FFF is a shorthand as opposed to full #FFFFFF. + """ + code = color_string.lstrip("#") + if len(code) == 3: + return True + elif len(code) == 6: + return False + else: + raise ValueError(f"Unexpected color {color_string}") + + +class ExcelFormatter: + """ + Class for formatting a DataFrame to a list of ExcelCells, + + Parameters + ---------- + df : DataFrame or Styler + na_rep: na representation + float_format : str, default None + Format string for floating point numbers + cols : sequence, optional + Columns to write + header : bool or sequence of str, default True + Write out column names. If a list of string is given it is + assumed to be aliases for the column names + index : bool, default True + output row names (index) + index_label : str or sequence, default None + Column label for index column(s) if desired. If None is given, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the DataFrame uses MultiIndex. + merge_cells : bool, default False + Format MultiIndex and Hierarchical Rows as merged cells. + inf_rep : str, default `'inf'` + representation for np.inf values (which aren't representable in Excel) + A `'-'` sign will be added in front of -inf. + style_converter : callable, optional + This translates Styler styles (CSS) into ExcelWriter styles. + Defaults to ``CSSToExcelConverter()``. + It should have signature css_declarations string -> excel style. + This is only called for body cells. + """ + + max_rows = 2 ** 20 + max_cols = 2 ** 14 + + def __init__( + self, + df, + na_rep: str = "", + float_format: str | None = None, + cols: Sequence[Hashable] | None = None, + header: Sequence[Hashable] | bool = True, + index: bool = True, + index_label: IndexLabel | None = None, + merge_cells: bool = False, + inf_rep: str = "inf", + style_converter: Callable | None = None, + ): + self.rowcounter = 0 + self.na_rep = na_rep + if not isinstance(df, DataFrame): + self.styler = df + self.styler._compute() # calculate applied styles + df = df.data + if style_converter is None: + style_converter = CSSToExcelConverter() + self.style_converter: Callable | None = style_converter + else: + self.styler = None + self.style_converter = None + self.df = df + if cols is not None: + + # all missing, raise + if not len(Index(cols).intersection(df.columns)): + raise KeyError("passes columns are not ALL present dataframe") + + if len(Index(cols).intersection(df.columns)) != len(set(cols)): + # Deprecated in GH#17295, enforced in 1.0.0 + raise KeyError("Not all names specified in 'columns' are found") + + self.df = df.reindex(columns=cols) + + self.columns = self.df.columns + self.float_format = float_format + self.index = index + self.index_label = index_label + self.header = header + self.merge_cells = merge_cells + self.inf_rep = inf_rep + + @property + def header_style(self): + return { + "font": {"bold": True}, + "borders": { + "top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin", + }, + "alignment": {"horizontal": "center", "vertical": "top"}, + } + + def _format_value(self, val): + if is_scalar(val) and missing.isna(val): + val = self.na_rep + elif is_float(val): + if missing.isposinf_scalar(val): + val = self.inf_rep + elif missing.isneginf_scalar(val): + val = f"-{self.inf_rep}" + elif self.float_format is not None: + val = float(self.float_format % val) + if getattr(val, "tzinfo", None) is not None: + raise ValueError( + "Excel does not support datetimes with " + "timezones. Please ensure that datetimes " + "are timezone unaware before writing to Excel." + ) + return val + + def _format_header_mi(self) -> Iterable[ExcelCell]: + if self.columns.nlevels > 1: + if not self.index: + raise NotImplementedError( + "Writing to Excel with MultiIndex columns and no " + "index ('index'=False) is not yet implemented." + ) + + if not (self._has_aliases or self.header): + return + + columns = self.columns + level_strs = columns.format( + sparsify=self.merge_cells, adjoin=False, names=False + ) + level_lengths = get_level_lengths(level_strs) + coloffset = 0 + lnum = 0 + + if self.index and isinstance(self.df.index, MultiIndex): + coloffset = len(self.df.index[0]) - 1 + + if self.merge_cells: + # Format multi-index as a merged cells. + for lnum, name in enumerate(columns.names): + yield ExcelCell( + row=lnum, + col=coloffset, + val=name, + style=self.header_style, + ) + + for lnum, (spans, levels, level_codes) in enumerate( + zip(level_lengths, columns.levels, columns.codes) + ): + values = levels.take(level_codes) + for i, span_val in spans.items(): + mergestart, mergeend = None, None + if span_val > 1: + mergestart, mergeend = lnum, coloffset + i + span_val + yield CssExcelCell( + row=lnum, + col=coloffset + i + 1, + val=values[i], + style=self.header_style, + css_styles=getattr(self.styler, "ctx_columns", None), + css_row=lnum, + css_col=i, + css_converter=self.style_converter, + mergestart=mergestart, + mergeend=mergeend, + ) + else: + # Format in legacy format with dots to indicate levels. + for i, values in enumerate(zip(*level_strs)): + v = ".".join(map(pprint_thing, values)) + yield CssExcelCell( + row=lnum, + col=coloffset + i + 1, + val=v, + style=self.header_style, + css_styles=getattr(self.styler, "ctx_columns", None), + css_row=lnum, + css_col=i, + css_converter=self.style_converter, + ) + + self.rowcounter = lnum + + def _format_header_regular(self) -> Iterable[ExcelCell]: + if self._has_aliases or self.header: + coloffset = 0 + + if self.index: + coloffset = 1 + if isinstance(self.df.index, MultiIndex): + coloffset = len(self.df.index[0]) + + colnames = self.columns + if self._has_aliases: + self.header = cast(Sequence, self.header) + if len(self.header) != len(self.columns): + raise ValueError( + f"Writing {len(self.columns)} cols " + f"but got {len(self.header)} aliases" + ) + else: + colnames = self.header + + for colindex, colname in enumerate(colnames): + yield CssExcelCell( + row=self.rowcounter, + col=colindex + coloffset, + val=colname, + style=self.header_style, + css_styles=getattr(self.styler, "ctx_columns", None), + css_row=0, + css_col=colindex, + css_converter=self.style_converter, + ) + + def _format_header(self) -> Iterable[ExcelCell]: + if isinstance(self.columns, MultiIndex): + gen = self._format_header_mi() + else: + gen = self._format_header_regular() + + gen2 = () + if self.df.index.names: + row = [x if x is not None else "" for x in self.df.index.names] + [ + "" + ] * len(self.columns) + if reduce(lambda x, y: x and y, map(lambda x: x != "", row)): + # error: Incompatible types in assignment (expression has type + # "Generator[ExcelCell, None, None]", variable has type "Tuple[]") + gen2 = ( # type: ignore[assignment] + ExcelCell(self.rowcounter, colindex, val, self.header_style) + for colindex, val in enumerate(row) + ) + self.rowcounter += 1 + return itertools.chain(gen, gen2) + + def _format_body(self) -> Iterable[ExcelCell]: + if isinstance(self.df.index, MultiIndex): + return self._format_hierarchical_rows() + else: + return self._format_regular_rows() + + def _format_regular_rows(self) -> Iterable[ExcelCell]: + if self._has_aliases or self.header: + self.rowcounter += 1 + + # output index and index_label? + if self.index: + # check aliases + # if list only take first as this is not a MultiIndex + if self.index_label and isinstance( + self.index_label, (list, tuple, np.ndarray, Index) + ): + index_label = self.index_label[0] + # if string good to go + elif self.index_label and isinstance(self.index_label, str): + index_label = self.index_label + else: + index_label = self.df.index.names[0] + + if isinstance(self.columns, MultiIndex): + self.rowcounter += 1 + + if index_label and self.header is not False: + yield ExcelCell(self.rowcounter - 1, 0, index_label, self.header_style) + + # write index_values + index_values = self.df.index + if isinstance(self.df.index, PeriodIndex): + index_values = self.df.index.to_timestamp() + + for idx, idxval in enumerate(index_values): + yield CssExcelCell( + row=self.rowcounter + idx, + col=0, + val=idxval, + style=self.header_style, + css_styles=getattr(self.styler, "ctx_index", None), + css_row=idx, + css_col=0, + css_converter=self.style_converter, + ) + coloffset = 1 + else: + coloffset = 0 + + yield from self._generate_body(coloffset) + + def _format_hierarchical_rows(self) -> Iterable[ExcelCell]: + if self._has_aliases or self.header: + self.rowcounter += 1 + + gcolidx = 0 + + if self.index: + index_labels = self.df.index.names + # check for aliases + if self.index_label and isinstance( + self.index_label, (list, tuple, np.ndarray, Index) + ): + index_labels = self.index_label + + # MultiIndex columns require an extra row + # with index names (blank if None) for + # unambiguous round-trip, unless not merging, + # in which case the names all go on one row Issue #11328 + if isinstance(self.columns, MultiIndex) and self.merge_cells: + self.rowcounter += 1 + + # if index labels are not empty go ahead and dump + if com.any_not_none(*index_labels) and self.header is not False: + + for cidx, name in enumerate(index_labels): + yield ExcelCell(self.rowcounter - 1, cidx, name, self.header_style) + + if self.merge_cells: + # Format hierarchical rows as merged cells. + level_strs = self.df.index.format( + sparsify=True, adjoin=False, names=False + ) + level_lengths = get_level_lengths(level_strs) + + for spans, levels, level_codes in zip( + level_lengths, self.df.index.levels, self.df.index.codes + ): + + values = levels.take( + level_codes, + allow_fill=levels._can_hold_na, + fill_value=levels._na_value, + ) + + for i, span_val in spans.items(): + mergestart, mergeend = None, None + if span_val > 1: + mergestart = self.rowcounter + i + span_val - 1 + mergeend = gcolidx + yield CssExcelCell( + row=self.rowcounter + i, + col=gcolidx, + val=values[i], + style=self.header_style, + css_styles=getattr(self.styler, "ctx_index", None), + css_row=i, + css_col=gcolidx, + css_converter=self.style_converter, + mergestart=mergestart, + mergeend=mergeend, + ) + gcolidx += 1 + + else: + # Format hierarchical rows with non-merged values. + for indexcolvals in zip(*self.df.index): + for idx, indexcolval in enumerate(indexcolvals): + yield CssExcelCell( + row=self.rowcounter + idx, + col=gcolidx, + val=indexcolval, + style=self.header_style, + css_styles=getattr(self.styler, "ctx_index", None), + css_row=idx, + css_col=gcolidx, + css_converter=self.style_converter, + ) + gcolidx += 1 + + yield from self._generate_body(gcolidx) + + @property + def _has_aliases(self) -> bool: + """Whether the aliases for column names are present.""" + return is_list_like(self.header) + + def _generate_body(self, coloffset: int) -> Iterable[ExcelCell]: + # Write the body of the frame data series by series. + for colidx in range(len(self.columns)): + series = self.df.iloc[:, colidx] + for i, val in enumerate(series): + yield CssExcelCell( + row=self.rowcounter + i, + col=colidx + coloffset, + val=val, + style=None, + css_styles=getattr(self.styler, "ctx", None), + css_row=i, + css_col=colidx, + css_converter=self.style_converter, + ) + + def get_formatted_cells(self) -> Iterable[ExcelCell]: + for cell in itertools.chain(self._format_header(), self._format_body()): + cell.val = self._format_value(cell.val) + yield cell + + @doc(storage_options=_shared_docs["storage_options"]) + def write( + self, + writer, + sheet_name="Sheet1", + startrow=0, + startcol=0, + freeze_panes=None, + engine=None, + storage_options: StorageOptions = None, + ): + """ + writer : path-like, file-like, or ExcelWriter object + File path or existing ExcelWriter + sheet_name : str, default 'Sheet1' + Name of sheet which will contain DataFrame + startrow : + upper left cell row to dump data frame + startcol : + upper left cell column to dump data frame + freeze_panes : tuple of integer (length 2), default None + Specifies the one-based bottommost row and rightmost column that + is to be frozen + engine : string, default None + write engine to use if writer is a path - you can also set this + via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, + and ``io.excel.xlsm.writer``. + + .. deprecated:: 1.2.0 + + As the `xlwt `__ package is no longer + maintained, the ``xlwt`` engine will be removed in a future + version of pandas. + + {storage_options} + + .. versionadded:: 1.2.0 + """ + from pandas.io.excel import ExcelWriter + + num_rows, num_cols = self.df.shape + if num_rows > self.max_rows or num_cols > self.max_cols: + raise ValueError( + f"This sheet is too large! Your sheet size is: {num_rows}, {num_cols} " + f"Max sheet size is: {self.max_rows}, {self.max_cols}" + ) + + formatted_cells = self.get_formatted_cells() + if isinstance(writer, ExcelWriter): + need_save = False + else: + # error: Cannot instantiate abstract class 'ExcelWriter' with abstract + # attributes 'engine', 'save', 'supported_extensions' and 'write_cells' + writer = ExcelWriter( # type: ignore[abstract] + writer, engine=engine, storage_options=storage_options + ) + need_save = True + + try: + writer.write_cells( + formatted_cells, + sheet_name, + startrow=startrow, + startcol=startcol, + freeze_panes=freeze_panes, + ) + finally: + # make sure to close opened file handles + if need_save: + writer.close() diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/format.py b/.local/lib/python3.8/site-packages/pandas/io/formats/format.py new file mode 100644 index 0000000..616331b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/format.py @@ -0,0 +1,2162 @@ +""" +Internal module for formatting output data in csv, html, xml, +and latex files. This module also applies to display formatting. +""" +from __future__ import annotations + +from contextlib import contextmanager +from csv import ( + QUOTE_NONE, + QUOTE_NONNUMERIC, +) +import decimal +from functools import partial +from io import StringIO +import math +import re +from shutil import get_terminal_size +from typing import ( + IO, + TYPE_CHECKING, + Any, + Callable, + Hashable, + Iterable, + List, + Mapping, + Sequence, + cast, +) +from unicodedata import east_asian_width + +import numpy as np + +from pandas._config.config import ( + get_option, + set_option, +) + +from pandas._libs import lib +from pandas._libs.missing import NA +from pandas._libs.tslibs import ( + NaT, + Timedelta, + Timestamp, + iNaT, +) +from pandas._libs.tslibs.nattype import NaTType +from pandas._typing import ( + ArrayLike, + ColspaceArgType, + ColspaceType, + CompressionOptions, + FilePath, + FloatFormatType, + FormattersType, + IndexLabel, + StorageOptions, + WriteBuffer, +) + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_complex_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_numeric_dtype, + is_scalar, + is_timedelta64_dtype, +) +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +from pandas.core.arrays import ( + Categorical, + DatetimeArray, + TimedeltaArray, +) +from pandas.core.base import PandasObject +import pandas.core.common as com +from pandas.core.construction import extract_array +from pandas.core.indexes.api import ( + Index, + MultiIndex, + PeriodIndex, + ensure_index, +) +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex +from pandas.core.reshape.concat import concat + +from pandas.io.common import ( + check_parent_directory, + stringify_path, +) +from pandas.io.formats.printing import ( + adjoin, + justify, + pprint_thing, +) + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + + +common_docstring = """ + Parameters + ---------- + buf : str, Path or StringIO-like, optional, default None + Buffer to write to. If None, the output is returned as a string. + columns : sequence, optional, default None + The subset of columns to write. Writes all columns by default. + col_space : %(col_space_type)s, optional + %(col_space)s. + header : %(header_type)s, optional + %(header)s. + index : bool, optional, default True + Whether to print index (row) labels. + na_rep : str, optional, default 'NaN' + String representation of ``NaN`` to use. + formatters : list, tuple or dict of one-param. functions, optional + Formatter functions to apply to columns' elements by position or + name. + The result of each function must be a unicode string. + List/tuple must be of length equal to the number of columns. + float_format : one-parameter function, optional, default None + Formatter function to apply to columns' elements if they are + floats. This function must return a unicode string and will be + applied only to the non-``NaN`` elements, with ``NaN`` being + handled by ``na_rep``. + + .. versionchanged:: 1.2.0 + + sparsify : bool, optional, default True + Set to False for a DataFrame with a hierarchical index to print + every multiindex key at each row. + index_names : bool, optional, default True + Prints the names of the indexes. + justify : str, default None + How to justify the column labels. If None uses the option from + the print configuration (controlled by set_option), 'right' out + of the box. Valid values are + + * left + * right + * center + * justify + * justify-all + * start + * end + * inherit + * match-parent + * initial + * unset. + max_rows : int, optional + Maximum number of rows to display in the console. + max_cols : int, optional + Maximum number of columns to display in the console. + show_dimensions : bool, default False + Display DataFrame dimensions (number of rows by number of columns). + decimal : str, default '.' + Character recognized as decimal separator, e.g. ',' in Europe. + """ + +_VALID_JUSTIFY_PARAMETERS = ( + "left", + "right", + "center", + "justify", + "justify-all", + "start", + "end", + "inherit", + "match-parent", + "initial", + "unset", +) + +return_docstring = """ + Returns + ------- + str or None + If buf is None, returns the result as a string. Otherwise returns + None. + """ + + +class CategoricalFormatter: + def __init__( + self, + categorical: Categorical, + buf: IO[str] | None = None, + length: bool = True, + na_rep: str = "NaN", + footer: bool = True, + ): + self.categorical = categorical + self.buf = buf if buf is not None else StringIO("") + self.na_rep = na_rep + self.length = length + self.footer = footer + self.quoting = QUOTE_NONNUMERIC + + def _get_footer(self) -> str: + footer = "" + + if self.length: + if footer: + footer += ", " + footer += f"Length: {len(self.categorical)}" + + level_info = self.categorical._repr_categories_info() + + # Levels are added in a newline + if footer: + footer += "\n" + footer += level_info + + return str(footer) + + def _get_formatted_values(self) -> list[str]: + return format_array( + self.categorical._internal_get_values(), + None, + float_format=None, + na_rep=self.na_rep, + quoting=self.quoting, + ) + + def to_string(self) -> str: + categorical = self.categorical + + if len(categorical) == 0: + if self.footer: + return self._get_footer() + else: + return "" + + fmt_values = self._get_formatted_values() + + fmt_values = [i.strip() for i in fmt_values] + values = ", ".join(fmt_values) + result = ["[" + values + "]"] + if self.footer: + footer = self._get_footer() + if footer: + result.append(footer) + + return str("\n".join(result)) + + +class SeriesFormatter: + def __init__( + self, + series: Series, + buf: IO[str] | None = None, + length: bool | str = True, + header: bool = True, + index: bool = True, + na_rep: str = "NaN", + name: bool = False, + float_format: str | None = None, + dtype: bool = True, + max_rows: int | None = None, + min_rows: int | None = None, + ): + self.series = series + self.buf = buf if buf is not None else StringIO() + self.name = name + self.na_rep = na_rep + self.header = header + self.length = length + self.index = index + self.max_rows = max_rows + self.min_rows = min_rows + + if float_format is None: + float_format = get_option("display.float_format") + self.float_format = float_format + self.dtype = dtype + self.adj = get_adjustment() + + self._chk_truncate() + + def _chk_truncate(self) -> None: + self.tr_row_num: int | None + + min_rows = self.min_rows + max_rows = self.max_rows + # truncation determined by max_rows, actual truncated number of rows + # used below by min_rows + is_truncated_vertically = max_rows and (len(self.series) > max_rows) + series = self.series + if is_truncated_vertically: + max_rows = cast(int, max_rows) + if min_rows: + # if min_rows is set (not None or 0), set max_rows to minimum + # of both + max_rows = min(min_rows, max_rows) + if max_rows == 1: + row_num = max_rows + series = series.iloc[:max_rows] + else: + row_num = max_rows // 2 + series = concat((series.iloc[:row_num], series.iloc[-row_num:])) + self.tr_row_num = row_num + else: + self.tr_row_num = None + self.tr_series = series + self.is_truncated_vertically = is_truncated_vertically + + def _get_footer(self) -> str: + name = self.series.name + footer = "" + + if getattr(self.series.index, "freq", None) is not None: + assert isinstance( + self.series.index, (DatetimeIndex, PeriodIndex, TimedeltaIndex) + ) + footer += f"Freq: {self.series.index.freqstr}" + + if self.name is not False and name is not None: + if footer: + footer += ", " + + series_name = pprint_thing(name, escape_chars=("\t", "\r", "\n")) + footer += f"Name: {series_name}" + + if self.length is True or ( + self.length == "truncate" and self.is_truncated_vertically + ): + if footer: + footer += ", " + footer += f"Length: {len(self.series)}" + + if self.dtype is not False and self.dtype is not None: + dtype_name = getattr(self.tr_series.dtype, "name", None) + if dtype_name: + if footer: + footer += ", " + footer += f"dtype: {pprint_thing(dtype_name)}" + + # level infos are added to the end and in a new line, like it is done + # for Categoricals + if is_categorical_dtype(self.tr_series.dtype): + level_info = self.tr_series._values._repr_categories_info() + if footer: + footer += "\n" + footer += level_info + + return str(footer) + + def _get_formatted_index(self) -> tuple[list[str], bool]: + index = self.tr_series.index + + if isinstance(index, MultiIndex): + have_header = any(name for name in index.names) + fmt_index = index.format(names=True) + else: + have_header = index.name is not None + fmt_index = index.format(name=True) + return fmt_index, have_header + + def _get_formatted_values(self) -> list[str]: + return format_array( + self.tr_series._values, + None, + float_format=self.float_format, + na_rep=self.na_rep, + leading_space=self.index, + ) + + def to_string(self) -> str: + series = self.tr_series + footer = self._get_footer() + + if len(series) == 0: + return f"{type(self.series).__name__}([], {footer})" + + fmt_index, have_header = self._get_formatted_index() + fmt_values = self._get_formatted_values() + + if self.is_truncated_vertically: + n_header_rows = 0 + row_num = self.tr_row_num + row_num = cast(int, row_num) + width = self.adj.len(fmt_values[row_num - 1]) + if width > 3: + dot_str = "..." + else: + dot_str = ".." + # Series uses mode=center because it has single value columns + # DataFrame uses mode=left + dot_str = self.adj.justify([dot_str], width, mode="center")[0] + fmt_values.insert(row_num + n_header_rows, dot_str) + fmt_index.insert(row_num + 1, "") + + if self.index: + result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values]) + else: + result = self.adj.adjoin(3, fmt_values) + + if self.header and have_header: + result = fmt_index[0] + "\n" + result + + if footer: + result += "\n" + footer + + return str("".join(result)) + + +class TextAdjustment: + def __init__(self): + self.encoding = get_option("display.encoding") + + def len(self, text: str) -> int: + return len(text) + + def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]: + return justify(texts, max_len, mode=mode) + + def adjoin(self, space: int, *lists, **kwargs) -> str: + return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs) + + +class EastAsianTextAdjustment(TextAdjustment): + def __init__(self): + super().__init__() + if get_option("display.unicode.ambiguous_as_wide"): + self.ambiguous_width = 2 + else: + self.ambiguous_width = 1 + + # Definition of East Asian Width + # https://unicode.org/reports/tr11/ + # Ambiguous width can be changed by option + self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1} + + def len(self, text: str) -> int: + """ + Calculate display width considering unicode East Asian Width + """ + if not isinstance(text, str): + return len(text) + + return sum( + self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text + ) + + def justify( + self, texts: Iterable[str], max_len: int, mode: str = "right" + ) -> list[str]: + # re-calculate padding space per str considering East Asian Width + def _get_pad(t): + return max_len - self.len(t) + len(t) + + if mode == "left": + return [x.ljust(_get_pad(x)) for x in texts] + elif mode == "center": + return [x.center(_get_pad(x)) for x in texts] + else: + return [x.rjust(_get_pad(x)) for x in texts] + + +def get_adjustment() -> TextAdjustment: + use_east_asian_width = get_option("display.unicode.east_asian_width") + if use_east_asian_width: + return EastAsianTextAdjustment() + else: + return TextAdjustment() + + +def get_dataframe_repr_params() -> dict[str, Any]: + """Get the parameters used to repr(dataFrame) calls using DataFrame.to_string. + + Supplying these parameters to DataFrame.to_string is equivalent to calling + ``repr(DataFrame)``. This is useful if you want to adjust the repr output. + + .. versionadded:: 1.4.0 + + Example + ------- + >>> import pandas as pd + >>> + >>> df = pd.DataFrame([[1, 2], [3, 4]]) + >>> repr_params = pd.io.formats.format.get_dataframe_repr_params() + >>> repr(df) == df.to_string(**repr_params) + True + """ + from pandas.io.formats import console + + if get_option("display.expand_frame_repr"): + line_width, _ = console.get_console_size() + else: + line_width = None + return { + "max_rows": get_option("display.max_rows"), + "min_rows": get_option("display.min_rows"), + "max_cols": get_option("display.max_columns"), + "max_colwidth": get_option("display.max_colwidth"), + "show_dimensions": get_option("display.show_dimensions"), + "line_width": line_width, + } + + +def get_series_repr_params() -> dict[str, Any]: + """Get the parameters used to repr(Series) calls using Series.to_string. + + Supplying these parameters to Series.to_string is equivalent to calling + ``repr(series)``. This is useful if you want to adjust the series repr output. + + .. versionadded:: 1.4.0 + + Example + ------- + >>> import pandas as pd + >>> + >>> ser = pd.Series([1, 2, 3, 4]) + >>> repr_params = pd.io.formats.format.get_series_repr_params() + >>> repr(ser) == ser.to_string(**repr_params) + True + """ + width, height = get_terminal_size() + max_rows = ( + height + if get_option("display.max_rows") == 0 + else get_option("display.max_rows") + ) + min_rows = ( + height + if get_option("display.max_rows") == 0 + else get_option("display.min_rows") + ) + + return { + "name": True, + "dtype": True, + "min_rows": min_rows, + "max_rows": max_rows, + "length": get_option("display.show_dimensions"), + } + + +class DataFrameFormatter: + """Class for processing dataframe formatting options and data.""" + + __doc__ = __doc__ if __doc__ else "" + __doc__ += common_docstring + return_docstring + + def __init__( + self, + frame: DataFrame, + columns: Sequence[str] | None = None, + col_space: ColspaceArgType | None = None, + header: bool | Sequence[str] = True, + index: bool = True, + na_rep: str = "NaN", + formatters: FormattersType | None = None, + justify: str | None = None, + float_format: FloatFormatType | None = None, + sparsify: bool | None = None, + index_names: bool = True, + max_rows: int | None = None, + min_rows: int | None = None, + max_cols: int | None = None, + show_dimensions: bool | str = False, + decimal: str = ".", + bold_rows: bool = False, + escape: bool = True, + ): + self.frame = frame + self.columns = self._initialize_columns(columns) + self.col_space = self._initialize_colspace(col_space) + self.header = header + self.index = index + self.na_rep = na_rep + self.formatters = self._initialize_formatters(formatters) + self.justify = self._initialize_justify(justify) + self.float_format = float_format + self.sparsify = self._initialize_sparsify(sparsify) + self.show_index_names = index_names + self.decimal = decimal + self.bold_rows = bold_rows + self.escape = escape + self.max_rows = max_rows + self.min_rows = min_rows + self.max_cols = max_cols + self.show_dimensions = show_dimensions + + self.max_cols_fitted = self._calc_max_cols_fitted() + self.max_rows_fitted = self._calc_max_rows_fitted() + + self.tr_frame = self.frame + self.truncate() + self.adj = get_adjustment() + + def get_strcols(self) -> list[list[str]]: + """ + Render a DataFrame to a list of columns (as lists of strings). + """ + strcols = self._get_strcols_without_index() + + if self.index: + str_index = self._get_formatted_index(self.tr_frame) + strcols.insert(0, str_index) + + return strcols + + @property + def should_show_dimensions(self) -> bool: + return self.show_dimensions is True or ( + self.show_dimensions == "truncate" and self.is_truncated + ) + + @property + def is_truncated(self) -> bool: + return bool(self.is_truncated_horizontally or self.is_truncated_vertically) + + @property + def is_truncated_horizontally(self) -> bool: + return bool(self.max_cols_fitted and (len(self.columns) > self.max_cols_fitted)) + + @property + def is_truncated_vertically(self) -> bool: + return bool(self.max_rows_fitted and (len(self.frame) > self.max_rows_fitted)) + + @property + def dimensions_info(self) -> str: + return f"\n\n[{len(self.frame)} rows x {len(self.frame.columns)} columns]" + + @property + def has_index_names(self) -> bool: + return _has_names(self.frame.index) + + @property + def has_column_names(self) -> bool: + return _has_names(self.frame.columns) + + @property + def show_row_idx_names(self) -> bool: + return all((self.has_index_names, self.index, self.show_index_names)) + + @property + def show_col_idx_names(self) -> bool: + return all((self.has_column_names, self.show_index_names, self.header)) + + @property + def max_rows_displayed(self) -> int: + return min(self.max_rows or len(self.frame), len(self.frame)) + + def _initialize_sparsify(self, sparsify: bool | None) -> bool: + if sparsify is None: + return get_option("display.multi_sparse") + return sparsify + + def _initialize_formatters( + self, formatters: FormattersType | None + ) -> FormattersType: + if formatters is None: + return {} + elif len(self.frame.columns) == len(formatters) or isinstance(formatters, dict): + return formatters + else: + raise ValueError( + f"Formatters length({len(formatters)}) should match " + f"DataFrame number of columns({len(self.frame.columns)})" + ) + + def _initialize_justify(self, justify: str | None) -> str: + if justify is None: + return get_option("display.colheader_justify") + else: + return justify + + def _initialize_columns(self, columns: Sequence[str] | None) -> Index: + if columns is not None: + cols = ensure_index(columns) + self.frame = self.frame[cols] + return cols + else: + return self.frame.columns + + def _initialize_colspace(self, col_space: ColspaceArgType | None) -> ColspaceType: + result: ColspaceType + + if col_space is None: + result = {} + elif isinstance(col_space, (int, str)): + result = {"": col_space} + result.update({column: col_space for column in self.frame.columns}) + elif isinstance(col_space, Mapping): + for column in col_space.keys(): + if column not in self.frame.columns and column != "": + raise ValueError( + f"Col_space is defined for an unknown column: {column}" + ) + result = col_space + else: + if len(self.frame.columns) != len(col_space): + raise ValueError( + f"Col_space length({len(col_space)}) should match " + f"DataFrame number of columns({len(self.frame.columns)})" + ) + result = dict(zip(self.frame.columns, col_space)) + return result + + def _calc_max_cols_fitted(self) -> int | None: + """Number of columns fitting the screen.""" + if not self._is_in_terminal(): + return self.max_cols + + width, _ = get_terminal_size() + if self._is_screen_narrow(width): + return width + else: + return self.max_cols + + def _calc_max_rows_fitted(self) -> int | None: + """Number of rows with data fitting the screen.""" + max_rows: int | None + + if self._is_in_terminal(): + _, height = get_terminal_size() + if self.max_rows == 0: + # rows available to fill with actual data + return height - self._get_number_of_auxillary_rows() + + if self._is_screen_short(height): + max_rows = height + else: + max_rows = self.max_rows + else: + max_rows = self.max_rows + + return self._adjust_max_rows(max_rows) + + def _adjust_max_rows(self, max_rows: int | None) -> int | None: + """Adjust max_rows using display logic. + + See description here: + https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options + + GH #37359 + """ + if max_rows: + if (len(self.frame) > max_rows) and self.min_rows: + # if truncated, set max_rows showed to min_rows + max_rows = min(self.min_rows, max_rows) + return max_rows + + def _is_in_terminal(self) -> bool: + """Check if the output is to be shown in terminal.""" + return bool(self.max_cols == 0 or self.max_rows == 0) + + def _is_screen_narrow(self, max_width) -> bool: + return bool(self.max_cols == 0 and len(self.frame.columns) > max_width) + + def _is_screen_short(self, max_height) -> bool: + return bool(self.max_rows == 0 and len(self.frame) > max_height) + + def _get_number_of_auxillary_rows(self) -> int: + """Get number of rows occupied by prompt, dots and dimension info.""" + dot_row = 1 + prompt_row = 1 + num_rows = dot_row + prompt_row + + if self.show_dimensions: + num_rows += len(self.dimensions_info.splitlines()) + + if self.header: + num_rows += 1 + + return num_rows + + def truncate(self) -> None: + """ + Check whether the frame should be truncated. If so, slice the frame up. + """ + if self.is_truncated_horizontally: + self._truncate_horizontally() + + if self.is_truncated_vertically: + self._truncate_vertically() + + def _truncate_horizontally(self) -> None: + """Remove columns, which are not to be displayed and adjust formatters. + + Attributes affected: + - tr_frame + - formatters + - tr_col_num + """ + assert self.max_cols_fitted is not None + col_num = self.max_cols_fitted // 2 + if col_num >= 1: + left = self.tr_frame.iloc[:, :col_num] + right = self.tr_frame.iloc[:, -col_num:] + self.tr_frame = concat((left, right), axis=1) + + # truncate formatter + if isinstance(self.formatters, (list, tuple)): + self.formatters = [ + *self.formatters[:col_num], + *self.formatters[-col_num:], + ] + else: + col_num = cast(int, self.max_cols) + self.tr_frame = self.tr_frame.iloc[:, :col_num] + self.tr_col_num = col_num + + def _truncate_vertically(self) -> None: + """Remove rows, which are not to be displayed. + + Attributes affected: + - tr_frame + - tr_row_num + """ + assert self.max_rows_fitted is not None + row_num = self.max_rows_fitted // 2 + if row_num >= 1: + head = self.tr_frame.iloc[:row_num, :] + tail = self.tr_frame.iloc[-row_num:, :] + self.tr_frame = concat((head, tail)) + else: + row_num = cast(int, self.max_rows) + self.tr_frame = self.tr_frame.iloc[:row_num, :] + self.tr_row_num = row_num + + def _get_strcols_without_index(self) -> list[list[str]]: + strcols: list[list[str]] = [] + + if not is_list_like(self.header) and not self.header: + for i, c in enumerate(self.tr_frame): + fmt_values = self.format_col(i) + fmt_values = _make_fixed_width( + strings=fmt_values, + justify=self.justify, + minimum=int(self.col_space.get(c, 0)), + adj=self.adj, + ) + strcols.append(fmt_values) + return strcols + + if is_list_like(self.header): + # cast here since can't be bool if is_list_like + self.header = cast(List[str], self.header) + if len(self.header) != len(self.columns): + raise ValueError( + f"Writing {len(self.columns)} cols " + f"but got {len(self.header)} aliases" + ) + str_columns = [[label] for label in self.header] + else: + str_columns = self._get_formatted_column_labels(self.tr_frame) + + if self.show_row_idx_names: + for x in str_columns: + x.append("") + + for i, c in enumerate(self.tr_frame): + cheader = str_columns[i] + header_colwidth = max( + int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader) + ) + fmt_values = self.format_col(i) + fmt_values = _make_fixed_width( + fmt_values, self.justify, minimum=header_colwidth, adj=self.adj + ) + + max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth) + cheader = self.adj.justify(cheader, max_len, mode=self.justify) + strcols.append(cheader + fmt_values) + + return strcols + + def format_col(self, i: int) -> list[str]: + frame = self.tr_frame + formatter = self._get_formatter(i) + return format_array( + frame.iloc[:, i]._values, + formatter, + float_format=self.float_format, + na_rep=self.na_rep, + space=self.col_space.get(frame.columns[i]), + decimal=self.decimal, + leading_space=self.index, + ) + + def _get_formatter(self, i: str | int) -> Callable | None: + if isinstance(self.formatters, (list, tuple)): + if is_integer(i): + i = cast(int, i) + return self.formatters[i] + else: + return None + else: + if is_integer(i) and i not in self.columns: + i = self.columns[i] + return self.formatters.get(i, None) + + def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]: + from pandas.core.indexes.multi import sparsify_labels + + columns = frame.columns + + if isinstance(columns, MultiIndex): + fmt_columns = columns.format(sparsify=False, adjoin=False) + fmt_columns = list(zip(*fmt_columns)) + dtypes = self.frame.dtypes._values + + # if we have a Float level, they don't use leading space at all + restrict_formatting = any(level.is_floating for level in columns.levels) + need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) + + def space_format(x, y): + if ( + y not in self.formatters + and need_leadsp[x] + and not restrict_formatting + ): + return " " + y + return y + + str_columns = list( + zip(*([space_format(x, y) for y in x] for x in fmt_columns)) + ) + if self.sparsify and len(str_columns): + str_columns = sparsify_labels(str_columns) + + str_columns = [list(x) for x in zip(*str_columns)] + else: + fmt_columns = columns.format() + dtypes = self.frame.dtypes + need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) + str_columns = [ + [" " + x if not self._get_formatter(i) and need_leadsp[x] else x] + for i, x in enumerate(fmt_columns) + ] + # self.str_columns = str_columns + return str_columns + + def _get_formatted_index(self, frame: DataFrame) -> list[str]: + # Note: this is only used by to_string() and to_latex(), not by + # to_html(). so safe to cast col_space here. + col_space = {k: cast(int, v) for k, v in self.col_space.items()} + index = frame.index + columns = frame.columns + fmt = self._get_formatter("__index__") + + if isinstance(index, MultiIndex): + fmt_index = index.format( + sparsify=self.sparsify, + adjoin=False, + names=self.show_row_idx_names, + formatter=fmt, + ) + else: + fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)] + + fmt_index = [ + tuple( + _make_fixed_width( + list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj + ) + ) + for x in fmt_index + ] + + adjoined = self.adj.adjoin(1, *fmt_index).split("\n") + + # empty space for columns + if self.show_col_idx_names: + col_header = [str(x) for x in self._get_column_name_list()] + else: + col_header = [""] * columns.nlevels + + if self.header: + return col_header + adjoined + else: + return adjoined + + def _get_column_name_list(self) -> list[str]: + names: list[str] = [] + columns = self.frame.columns + if isinstance(columns, MultiIndex): + names.extend("" if name is None else name for name in columns.names) + else: + names.append("" if columns.name is None else columns.name) + return names + + +class DataFrameRenderer: + """Class for creating dataframe output in multiple formats. + + Called in pandas.core.generic.NDFrame: + - to_csv + - to_latex + + Called in pandas.core.frame.DataFrame: + - to_html + - to_string + + Parameters + ---------- + fmt : DataFrameFormatter + Formatter with the formatting options. + """ + + def __init__(self, fmt: DataFrameFormatter): + self.fmt = fmt + + def to_latex( + self, + buf: FilePath | WriteBuffer[str] | None = None, + column_format: str | None = None, + longtable: bool = False, + encoding: str | None = None, + multicolumn: bool = False, + multicolumn_format: str | None = None, + multirow: bool = False, + caption: str | None = None, + label: str | None = None, + position: str | None = None, + ) -> str | None: + """ + Render a DataFrame to a LaTeX tabular/longtable environment output. + """ + from pandas.io.formats.latex import LatexFormatter + + latex_formatter = LatexFormatter( + self.fmt, + longtable=longtable, + column_format=column_format, + multicolumn=multicolumn, + multicolumn_format=multicolumn_format, + multirow=multirow, + caption=caption, + label=label, + position=position, + ) + string = latex_formatter.to_string() + return save_to_buffer(string, buf=buf, encoding=encoding) + + def to_html( + self, + buf: FilePath | WriteBuffer[str] | None = None, + encoding: str | None = None, + classes: str | list | tuple | None = None, + notebook: bool = False, + border: int | None = None, + table_id: str | None = None, + render_links: bool = False, + ) -> str | None: + """ + Render a DataFrame to a html table. + + Parameters + ---------- + buf : str, path object, file-like object, or None, default None + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a string ``write()`` function. If None, the result is + returned as a string. + encoding : str, default “utf-8” + Set character encoding. + classes : str or list-like + classes to include in the `class` attribute of the opening + ``
`` tag, in addition to the default "dataframe". + notebook : {True, False}, optional, default False + Whether the generated HTML is for IPython Notebook. + border : int + A ``border=border`` attribute is included in the opening + ``
`` tag. Default ``pd.options.display.html.border``. + table_id : str, optional + A css id is included in the opening `
` tag if specified. + render_links : bool, default False + Convert URLs to HTML links. + """ + from pandas.io.formats.html import ( + HTMLFormatter, + NotebookFormatter, + ) + + Klass = NotebookFormatter if notebook else HTMLFormatter + + html_formatter = Klass( + self.fmt, + classes=classes, + border=border, + table_id=table_id, + render_links=render_links, + ) + string = html_formatter.to_string() + return save_to_buffer(string, buf=buf, encoding=encoding) + + def to_string( + self, + buf: FilePath | WriteBuffer[str] | None = None, + encoding: str | None = None, + line_width: int | None = None, + ) -> str | None: + """ + Render a DataFrame to a console-friendly tabular output. + + Parameters + ---------- + buf : str, path object, file-like object, or None, default None + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a string ``write()`` function. If None, the result is + returned as a string. + encoding: str, default “utf-8” + Set character encoding. + line_width : int, optional + Width to wrap a line in characters. + """ + from pandas.io.formats.string import StringFormatter + + string_formatter = StringFormatter(self.fmt, line_width=line_width) + string = string_formatter.to_string() + return save_to_buffer(string, buf=buf, encoding=encoding) + + def to_csv( + self, + path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, + encoding: str | None = None, + sep: str = ",", + columns: Sequence[Hashable] | None = None, + index_label: IndexLabel | None = None, + mode: str = "w", + compression: CompressionOptions = "infer", + quoting: int | None = None, + quotechar: str = '"', + line_terminator: str | None = None, + chunksize: int | None = None, + date_format: str | None = None, + doublequote: bool = True, + escapechar: str | None = None, + errors: str = "strict", + storage_options: StorageOptions = None, + ) -> str | None: + """ + Render dataframe as comma-separated file. + """ + from pandas.io.formats.csvs import CSVFormatter + + if path_or_buf is None: + created_buffer = True + path_or_buf = StringIO() + else: + created_buffer = False + + csv_formatter = CSVFormatter( + path_or_buf=path_or_buf, + line_terminator=line_terminator, + sep=sep, + encoding=encoding, + errors=errors, + compression=compression, + quoting=quoting, + cols=columns, + index_label=index_label, + mode=mode, + chunksize=chunksize, + quotechar=quotechar, + date_format=date_format, + doublequote=doublequote, + escapechar=escapechar, + storage_options=storage_options, + formatter=self.fmt, + ) + csv_formatter.save() + + if created_buffer: + assert isinstance(path_or_buf, StringIO) + content = path_or_buf.getvalue() + path_or_buf.close() + return content + + return None + + +def save_to_buffer( + string: str, + buf: FilePath | WriteBuffer[str] | None = None, + encoding: str | None = None, +) -> str | None: + """ + Perform serialization. Write to buf or return as string if buf is None. + """ + with get_buffer(buf, encoding=encoding) as f: + f.write(string) + if buf is None: + return f.getvalue() + return None + + +@contextmanager +def get_buffer(buf: FilePath | WriteBuffer[str] | None, encoding: str | None = None): + """ + Context manager to open, yield and close buffer for filenames or Path-like + objects, otherwise yield buf unchanged. + """ + if buf is not None: + buf = stringify_path(buf) + else: + buf = StringIO() + + if encoding is None: + encoding = "utf-8" + elif not isinstance(buf, str): + raise ValueError("buf is not a file name and encoding is specified.") + + if hasattr(buf, "write"): + yield buf + elif isinstance(buf, str): + check_parent_directory(str(buf)) + with open(buf, "w", encoding=encoding, newline="") as f: + # GH#30034 open instead of codecs.open prevents a file leak + # if we have an invalid encoding argument. + # newline="" is needed to roundtrip correctly on + # windows test_to_latex_filename + yield f + else: + raise TypeError("buf is not a file name and it has no write method") + + +# ---------------------------------------------------------------------- +# Array formatters + + +def format_array( + values: Any, + formatter: Callable | None, + float_format: FloatFormatType | None = None, + na_rep: str = "NaN", + digits: int | None = None, + space: str | int | None = None, + justify: str = "right", + decimal: str = ".", + leading_space: bool | None = True, + quoting: int | None = None, +) -> list[str]: + """ + Format an array for printing. + + Parameters + ---------- + values + formatter + float_format + na_rep + digits + space + justify + decimal + leading_space : bool, optional, default True + Whether the array should be formatted with a leading space. + When an array as a column of a Series or DataFrame, we do want + the leading space to pad between columns. + + When formatting an Index subclass + (e.g. IntervalIndex._format_native_types), we don't want the + leading space since it should be left-aligned. + + Returns + ------- + List[str] + """ + fmt_klass: type[GenericArrayFormatter] + if is_datetime64_dtype(values.dtype): + fmt_klass = Datetime64Formatter + elif is_datetime64tz_dtype(values.dtype): + fmt_klass = Datetime64TZFormatter + elif is_timedelta64_dtype(values.dtype): + fmt_klass = Timedelta64Formatter + elif is_extension_array_dtype(values.dtype): + fmt_klass = ExtensionArrayFormatter + elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype): + fmt_klass = FloatArrayFormatter + elif is_integer_dtype(values.dtype): + fmt_klass = IntArrayFormatter + else: + fmt_klass = GenericArrayFormatter + + if space is None: + space = get_option("display.column_space") + + if float_format is None: + float_format = get_option("display.float_format") + + if digits is None: + digits = get_option("display.precision") + + fmt_obj = fmt_klass( + values, + digits=digits, + na_rep=na_rep, + float_format=float_format, + formatter=formatter, + space=space, + justify=justify, + decimal=decimal, + leading_space=leading_space, + quoting=quoting, + ) + + return fmt_obj.get_result() + + +class GenericArrayFormatter: + def __init__( + self, + values: Any, + digits: int = 7, + formatter: Callable | None = None, + na_rep: str = "NaN", + space: str | int = 12, + float_format: FloatFormatType | None = None, + justify: str = "right", + decimal: str = ".", + quoting: int | None = None, + fixed_width: bool = True, + leading_space: bool | None = True, + ): + self.values = values + self.digits = digits + self.na_rep = na_rep + self.space = space + self.formatter = formatter + self.float_format = float_format + self.justify = justify + self.decimal = decimal + self.quoting = quoting + self.fixed_width = fixed_width + self.leading_space = leading_space + + def get_result(self) -> list[str]: + fmt_values = self._format_strings() + return _make_fixed_width(fmt_values, self.justify) + + def _format_strings(self) -> list[str]: + if self.float_format is None: + float_format = get_option("display.float_format") + if float_format is None: + precision = get_option("display.precision") + float_format = lambda x: _trim_zeros_single_float( + f"{x: .{precision:d}f}" + ) + else: + float_format = self.float_format + + if self.formatter is not None: + formatter = self.formatter + else: + quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE + formatter = partial( + pprint_thing, + escape_chars=("\t", "\r", "\n"), + quote_strings=quote_strings, + ) + + def _format(x): + if self.na_rep is not None and is_scalar(x) and isna(x): + try: + # try block for np.isnat specifically + # determine na_rep if x is None or NaT-like + if x is None: + return "None" + elif x is NA: + return str(NA) + elif x is NaT or np.isnat(x): + return "NaT" + except (TypeError, ValueError): + # np.isnat only handles datetime or timedelta objects + pass + return self.na_rep + elif isinstance(x, PandasObject): + return str(x) + else: + # object dtype + return str(formatter(x)) + + vals = extract_array(self.values, extract_numpy=True) + if not isinstance(vals, np.ndarray): + raise TypeError( + "ExtensionArray formatting should use ExtensionArrayFormatter" + ) + inferred = lib.map_infer(vals, is_float) + is_float_type = ( + inferred + # vals may have 2 or more dimensions + & np.all(notna(vals), axis=tuple(range(1, len(vals.shape)))) + ) + leading_space = self.leading_space + if leading_space is None: + leading_space = is_float_type.any() + + fmt_values = [] + for i, v in enumerate(vals): + if not is_float_type[i] and leading_space: + fmt_values.append(f" {_format(v)}") + elif is_float_type[i]: + fmt_values.append(float_format(v)) + else: + if leading_space is False: + # False specifically, so that the default is + # to include a space if we get here. + tpl = "{v}" + else: + tpl = " {v}" + fmt_values.append(tpl.format(v=_format(v))) + + return fmt_values + + +class FloatArrayFormatter(GenericArrayFormatter): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # float_format is expected to be a string + # formatter should be used to pass a function + if self.float_format is not None and self.formatter is None: + # GH21625, GH22270 + self.fixed_width = False + if callable(self.float_format): + self.formatter = self.float_format + self.float_format = None + + def _value_formatter( + self, + float_format: FloatFormatType | None = None, + threshold: float | int | None = None, + ) -> Callable: + """Returns a function to be applied on each value to format it""" + # the float_format parameter supersedes self.float_format + if float_format is None: + float_format = self.float_format + + # we are going to compose different functions, to first convert to + # a string, then replace the decimal symbol, and finally chop according + # to the threshold + + # when there is no float_format, we use str instead of '%g' + # because str(0.0) = '0.0' while '%g' % 0.0 = '0' + if float_format: + + def base_formatter(v): + assert float_format is not None # for mypy + # error: "str" not callable + # error: Unexpected keyword argument "value" for "__call__" of + # "EngFormatter" + return ( + float_format(value=v) # type: ignore[operator,call-arg] + if notna(v) + else self.na_rep + ) + + else: + + def base_formatter(v): + return str(v) if notna(v) else self.na_rep + + if self.decimal != ".": + + def decimal_formatter(v): + return base_formatter(v).replace(".", self.decimal, 1) + + else: + decimal_formatter = base_formatter + + if threshold is None: + return decimal_formatter + + def formatter(value): + if notna(value): + if abs(value) > threshold: + return decimal_formatter(value) + else: + return decimal_formatter(0.0) + else: + return self.na_rep + + return formatter + + def get_result_as_array(self) -> np.ndarray: + """ + Returns the float values converted into strings using + the parameters given at initialisation, as a numpy array + """ + + def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str): + mask = isna(values) + formatted = np.array( + [ + formatter(val) if not m else na_rep + for val, m in zip(values.ravel(), mask.ravel()) + ] + ).reshape(values.shape) + return formatted + + if self.formatter is not None: + return format_with_na_rep(self.values, self.formatter, self.na_rep) + + if self.fixed_width: + threshold = get_option("display.chop_threshold") + else: + threshold = None + + # if we have a fixed_width, we'll need to try different float_format + def format_values_with(float_format): + formatter = self._value_formatter(float_format, threshold) + + # default formatter leaves a space to the left when formatting + # floats, must be consistent for left-justifying NaNs (GH #25061) + if self.justify == "left": + na_rep = " " + self.na_rep + else: + na_rep = self.na_rep + + # separate the wheat from the chaff + values = self.values + is_complex = is_complex_dtype(values) + values = format_with_na_rep(values, formatter, na_rep) + + if self.fixed_width: + if is_complex: + result = _trim_zeros_complex(values, self.decimal) + else: + result = _trim_zeros_float(values, self.decimal) + return np.asarray(result, dtype="object") + + return values + + # There is a special default string when we are fixed-width + # The default is otherwise to use str instead of a formatting string + float_format: FloatFormatType | None + if self.float_format is None: + if self.fixed_width: + if self.leading_space is True: + fmt_str = "{value: .{digits:d}f}" + else: + fmt_str = "{value:.{digits:d}f}" + float_format = partial(fmt_str.format, digits=self.digits) + else: + float_format = self.float_format + else: + float_format = lambda value: self.float_format % value + + formatted_values = format_values_with(float_format) + + if not self.fixed_width: + return formatted_values + + # we need do convert to engineering format if some values are too small + # and would appear as 0, or if some values are too big and take too + # much space + + if len(formatted_values) > 0: + maxlen = max(len(x) for x in formatted_values) + too_long = maxlen > self.digits + 6 + else: + too_long = False + + with np.errstate(invalid="ignore"): + abs_vals = np.abs(self.values) + # this is pretty arbitrary for now + # large values: more that 8 characters including decimal symbol + # and first digit, hence > 1e6 + has_large_values = (abs_vals > 1e6).any() + has_small_values = ( + (abs_vals < 10 ** (-self.digits)) & (abs_vals > 0) + ).any() + + if has_small_values or (too_long and has_large_values): + if self.leading_space is True: + fmt_str = "{value: .{digits:d}e}" + else: + fmt_str = "{value:.{digits:d}e}" + float_format = partial(fmt_str.format, digits=self.digits) + formatted_values = format_values_with(float_format) + + return formatted_values + + def _format_strings(self) -> list[str]: + return list(self.get_result_as_array()) + + +class IntArrayFormatter(GenericArrayFormatter): + def _format_strings(self) -> list[str]: + if self.leading_space is False: + formatter_str = lambda x: f"{x:d}".format(x=x) + else: + formatter_str = lambda x: f"{x: d}".format(x=x) + formatter = self.formatter or formatter_str + fmt_values = [formatter(x) for x in self.values] + return fmt_values + + +class Datetime64Formatter(GenericArrayFormatter): + def __init__( + self, + values: np.ndarray | Series | DatetimeIndex | DatetimeArray, + nat_rep: str = "NaT", + date_format: None = None, + **kwargs, + ): + super().__init__(values, **kwargs) + self.nat_rep = nat_rep + self.date_format = date_format + + def _format_strings(self) -> list[str]: + """we by definition have DO NOT have a TZ""" + values = self.values + + if not isinstance(values, DatetimeIndex): + values = DatetimeIndex(values) + + if self.formatter is not None and callable(self.formatter): + return [self.formatter(x) for x in values] + + fmt_values = values._data._format_native_types( + na_rep=self.nat_rep, date_format=self.date_format + ) + return fmt_values.tolist() + + +class ExtensionArrayFormatter(GenericArrayFormatter): + def _format_strings(self) -> list[str]: + values = extract_array(self.values, extract_numpy=True) + + formatter = self.formatter + if formatter is None: + # error: Item "ndarray" of "Union[Any, Union[ExtensionArray, ndarray]]" has + # no attribute "_formatter" + formatter = values._formatter(boxed=True) # type: ignore[union-attr] + + if isinstance(values, Categorical): + # Categorical is special for now, so that we can preserve tzinfo + array = values._internal_get_values() + else: + array = np.asarray(values) + + fmt_values = format_array( + array, + formatter, + float_format=self.float_format, + na_rep=self.na_rep, + digits=self.digits, + space=self.space, + justify=self.justify, + decimal=self.decimal, + leading_space=self.leading_space, + quoting=self.quoting, + ) + return fmt_values + + +def format_percentiles( + percentiles: (np.ndarray | list[int | float] | list[float] | list[str | float]), +) -> list[str]: + """ + Outputs rounded and formatted percentiles. + + Parameters + ---------- + percentiles : list-like, containing floats from interval [0,1] + + Returns + ------- + formatted : list of strings + + Notes + ----- + Rounding precision is chosen so that: (1) if any two elements of + ``percentiles`` differ, they remain different after rounding + (2) no entry is *rounded* to 0% or 100%. + Any non-integer is always rounded to at least 1 decimal place. + + Examples + -------- + Keeps all entries different after rounding: + + >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999]) + ['1.999%', '2.001%', '50%', '66.667%', '99.99%'] + + No element is rounded to 0% or 100% (unless already equal to it). + Duplicates are allowed: + + >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999]) + ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%'] + """ + percentiles = np.asarray(percentiles) + + # It checks for np.NaN as well + with np.errstate(invalid="ignore"): + if ( + not is_numeric_dtype(percentiles) + or not np.all(percentiles >= 0) + or not np.all(percentiles <= 1) + ): + raise ValueError("percentiles should all be in the interval [0,1]") + + percentiles = 100 * percentiles + + int_idx = np.isclose(percentiles.astype(int), percentiles) + + if np.all(int_idx): + out = percentiles.astype(int).astype(str) + return [i + "%" for i in out] + + unique_pcts = np.unique(percentiles) + to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None + to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None + + # Least precision that keeps percentiles unique after rounding + prec = -np.floor( + np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end))) + ).astype(int) + prec = max(1, prec) + out = np.empty_like(percentiles, dtype=object) + out[int_idx] = percentiles[int_idx].astype(int).astype(str) + + out[~int_idx] = percentiles[~int_idx].round(prec).astype(str) + return [i + "%" for i in out] + + +def is_dates_only(values: np.ndarray | DatetimeArray | Index | DatetimeIndex) -> bool: + # return a boolean if we are only dates (and don't have a timezone) + if not isinstance(values, Index): + values = values.ravel() + + values = DatetimeIndex(values) + if values.tz is not None: + return False + + values_int = values.asi8 + consider_values = values_int != iNaT + one_day_nanos = 86400 * 10 ** 9 + even_days = ( + np.logical_and(consider_values, values_int % int(one_day_nanos) != 0).sum() == 0 + ) + if even_days: + return True + return False + + +def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str: + if x is NaT: + return nat_rep + + return str(x) + + +def _format_datetime64_dateonly( + x: NaTType | Timestamp, + nat_rep: str = "NaT", + date_format: str | None = None, +) -> str: + if x is NaT: + return nat_rep + + if date_format: + return x.strftime(date_format) + else: + # error: Item "NaTType" of "Union[NaTType, Any]" has no attribute "_date_repr" + # The underlying problem here is that mypy doesn't understand that NaT + # is a singleton, so that the check above excludes it here. + return x._date_repr # type: ignore[union-attr] + + +def get_format_datetime64( + is_dates_only: bool, nat_rep: str = "NaT", date_format: str | None = None +) -> Callable: + + if is_dates_only: + return lambda x: _format_datetime64_dateonly( + x, nat_rep=nat_rep, date_format=date_format + ) + else: + return lambda x: _format_datetime64(x, nat_rep=nat_rep) + + +def get_format_datetime64_from_values( + values: np.ndarray | DatetimeArray | DatetimeIndex, date_format: str | None +) -> str | None: + """given values and a date_format, return a string format""" + if isinstance(values, np.ndarray) and values.ndim > 1: + # We don't actually care about the order of values, and DatetimeIndex + # only accepts 1D values + values = values.ravel() + + ido = is_dates_only(values) + if ido: + return date_format or "%Y-%m-%d" + return date_format + + +class Datetime64TZFormatter(Datetime64Formatter): + def _format_strings(self) -> list[str]: + """we by definition have a TZ""" + values = self.values.astype(object) + ido = is_dates_only(values) + formatter = self.formatter or get_format_datetime64( + ido, date_format=self.date_format + ) + fmt_values = [formatter(x) for x in values] + + return fmt_values + + +class Timedelta64Formatter(GenericArrayFormatter): + def __init__( + self, + values: np.ndarray | TimedeltaIndex, + nat_rep: str = "NaT", + box: bool = False, + **kwargs, + ): + super().__init__(values, **kwargs) + self.nat_rep = nat_rep + self.box = box + + def _format_strings(self) -> list[str]: + formatter = self.formatter or get_format_timedelta64( + self.values, nat_rep=self.nat_rep, box=self.box + ) + return [formatter(x) for x in self.values] + + +def get_format_timedelta64( + values: np.ndarray | TimedeltaIndex | TimedeltaArray, + nat_rep: str = "NaT", + box: bool = False, +) -> Callable: + """ + Return a formatter function for a range of timedeltas. + These will all have the same format argument + + If box, then show the return in quotes + """ + values_int = values.view(np.int64) + + consider_values = values_int != iNaT + + one_day_nanos = 86400 * 10 ** 9 + # error: Unsupported operand types for % ("ExtensionArray" and "int") + not_midnight = values_int % one_day_nanos != 0 # type: ignore[operator] + # error: Argument 1 to "__call__" of "ufunc" has incompatible type + # "Union[Any, ExtensionArray, ndarray]"; expected + # "Union[Union[int, float, complex, str, bytes, generic], + # Sequence[Union[int, float, complex, str, bytes, generic]], + # Sequence[Sequence[Any]], _SupportsArray]" + both = np.logical_and(consider_values, not_midnight) # type: ignore[arg-type] + even_days = both.sum() == 0 + + if even_days: + format = None + else: + format = "long" + + def _formatter(x): + if x is None or (is_scalar(x) and isna(x)): + return nat_rep + + if not isinstance(x, Timedelta): + x = Timedelta(x) + result = x._repr_base(format=format) + if box: + result = f"'{result}'" + return result + + return _formatter + + +def _make_fixed_width( + strings: list[str], + justify: str = "right", + minimum: int | None = None, + adj: TextAdjustment | None = None, +) -> list[str]: + + if len(strings) == 0 or justify == "all": + return strings + + if adj is None: + adjustment = get_adjustment() + else: + adjustment = adj + + max_len = max(adjustment.len(x) for x in strings) + + if minimum is not None: + max_len = max(minimum, max_len) + + conf_max = get_option("display.max_colwidth") + if conf_max is not None and max_len > conf_max: + max_len = conf_max + + def just(x: str) -> str: + if conf_max is not None: + if (conf_max > 3) & (adjustment.len(x) > max_len): + x = x[: max_len - 3] + "..." + return x + + strings = [just(x) for x in strings] + result = adjustment.justify(strings, max_len, mode=justify) + return result + + +def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[str]: + """ + Separates the real and imaginary parts from the complex number, and + executes the _trim_zeros_float method on each of those. + """ + trimmed = [ + "".join(_trim_zeros_float(re.split(r"([j+-])", x), decimal)) + for x in str_complexes + ] + + # pad strings to the length of the longest trimmed string for alignment + lengths = [len(s) for s in trimmed] + max_length = max(lengths) + padded = [ + s[: -((k - 1) // 2 + 1)] # real part + + (max_length - k) // 2 * "0" + + s[-((k - 1) // 2 + 1) : -((k - 1) // 2)] # + / - + + s[-((k - 1) // 2) : -1] # imaginary part + + (max_length - k) // 2 * "0" + + s[-1] + for s, k in zip(trimmed, lengths) + ] + return padded + + +def _trim_zeros_single_float(str_float: str) -> str: + """ + Trims trailing zeros after a decimal point, + leaving just one if necessary. + """ + str_float = str_float.rstrip("0") + if str_float.endswith("."): + str_float += "0" + + return str_float + + +def _trim_zeros_float( + str_floats: np.ndarray | list[str], decimal: str = "." +) -> list[str]: + """ + Trims the maximum number of trailing zeros equally from + all numbers containing decimals, leaving just one if + necessary. + """ + trimmed = str_floats + number_regex = re.compile(fr"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$") + + def is_number_with_decimal(x): + return re.match(number_regex, x) is not None + + def should_trim(values: np.ndarray | list[str]) -> bool: + """ + Determine if an array of strings should be trimmed. + + Returns True if all numbers containing decimals (defined by the + above regular expression) within the array end in a zero, otherwise + returns False. + """ + numbers = [x for x in values if is_number_with_decimal(x)] + return len(numbers) > 0 and all(x.endswith("0") for x in numbers) + + while should_trim(trimmed): + trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed] + + # leave one 0 after the decimal points if need be. + result = [ + x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x + for x in trimmed + ] + return result + + +def _has_names(index: Index) -> bool: + if isinstance(index, MultiIndex): + return com.any_not_none(*index.names) + else: + return index.name is not None + + +class EngFormatter: + """ + Formats float values according to engineering format. + + Based on matplotlib.ticker.EngFormatter + """ + + # The SI engineering prefixes + ENG_PREFIXES = { + -24: "y", + -21: "z", + -18: "a", + -15: "f", + -12: "p", + -9: "n", + -6: "u", + -3: "m", + 0: "", + 3: "k", + 6: "M", + 9: "G", + 12: "T", + 15: "P", + 18: "E", + 21: "Z", + 24: "Y", + } + + def __init__(self, accuracy: int | None = None, use_eng_prefix: bool = False): + self.accuracy = accuracy + self.use_eng_prefix = use_eng_prefix + + def __call__(self, num: int | float) -> str: + """ + Formats a number in engineering notation, appending a letter + representing the power of 1000 of the original number. Some examples: + >>> format_eng = EngFormatter(accuracy=0, use_eng_prefix=True) + >>> format_eng(0) + ' 0' + >>> format_eng = EngFormatter(accuracy=1, use_eng_prefix=True) + >>> format_eng(1_000_000) + ' 1.0M' + >>> format_eng = EngFormatter(accuracy=2, use_eng_prefix=False) + >>> format_eng("-1e-6") + '-1.00E-06' + + @param num: the value to represent + @type num: either a numeric value or a string that can be converted to + a numeric value (as per decimal.Decimal constructor) + + @return: engineering formatted string + """ + dnum = decimal.Decimal(str(num)) + + if decimal.Decimal.is_nan(dnum): + return "NaN" + + if decimal.Decimal.is_infinite(dnum): + return "inf" + + sign = 1 + + if dnum < 0: # pragma: no cover + sign = -1 + dnum = -dnum + + if dnum != 0: + pow10 = decimal.Decimal(int(math.floor(dnum.log10() / 3) * 3)) + else: + pow10 = decimal.Decimal(0) + + pow10 = pow10.min(max(self.ENG_PREFIXES.keys())) + pow10 = pow10.max(min(self.ENG_PREFIXES.keys())) + int_pow10 = int(pow10) + + if self.use_eng_prefix: + prefix = self.ENG_PREFIXES[int_pow10] + else: + if int_pow10 < 0: + prefix = f"E-{-int_pow10:02d}" + else: + prefix = f"E+{int_pow10:02d}" + + mant = sign * dnum / (10 ** pow10) + + if self.accuracy is None: # pragma: no cover + format_str = "{mant: g}{prefix}" + else: + format_str = f"{{mant: .{self.accuracy:d}f}}{{prefix}}" + + formatted = format_str.format(mant=mant, prefix=prefix) + + return formatted + + +def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None: + """ + Alter default behavior on how float is formatted in DataFrame. + Format float in engineering format. By accuracy, we mean the number of + decimal digits after the floating point. + + See also EngFormatter. + """ + set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix)) + set_option("display.column_space", max(12, accuracy + 9)) + + +def get_level_lengths( + levels: Any, sentinel: bool | object | str = "" +) -> list[dict[int, int]]: + """ + For each index in each level the function returns lengths of indexes. + + Parameters + ---------- + levels : list of lists + List of values on for level. + sentinel : string, optional + Value which states that no new index starts on there. + + Returns + ------- + Returns list of maps. For each level returns map of indexes (key is index + in row and value is length of index). + """ + if len(levels) == 0: + return [] + + control = [True] * len(levels[0]) + + result = [] + for level in levels: + last_index = 0 + + lengths = {} + for i, key in enumerate(level): + if control[i] and key == sentinel: + pass + else: + control[i] = False + lengths[last_index] = i - last_index + last_index = i + + lengths[last_index] = len(level) - last_index + + result.append(lengths) + + return result + + +def buffer_put_lines(buf: WriteBuffer[str], lines: list[str]) -> None: + """ + Appends lines to a buffer. + + Parameters + ---------- + buf + The buffer to write to + lines + The lines to append. + """ + if any(isinstance(x, str) for x in lines): + lines = [str(x) for x in lines] + buf.write("\n".join(lines)) diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/html.py b/.local/lib/python3.8/site-packages/pandas/io/formats/html.py new file mode 100644 index 0000000..0c92727 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/html.py @@ -0,0 +1,623 @@ +""" +Module for formatting output data in HTML. +""" +from __future__ import annotations + +from textwrap import dedent +from typing import ( + Any, + Iterable, + Mapping, + cast, +) + +from pandas._config import get_option + +from pandas._libs import lib + +from pandas import ( + MultiIndex, + option_context, +) + +from pandas.io.common import is_url +from pandas.io.formats.format import ( + DataFrameFormatter, + get_level_lengths, +) +from pandas.io.formats.printing import pprint_thing + + +class HTMLFormatter: + """ + Internal class for formatting output data in html. + This class is intended for shared functionality between + DataFrame.to_html() and DataFrame._repr_html_(). + Any logic in common with other output formatting methods + should ideally be inherited from classes in format.py + and this class responsible for only producing html markup. + """ + + indent_delta = 2 + + def __init__( + self, + formatter: DataFrameFormatter, + classes: str | list[str] | tuple[str, ...] | None = None, + border: int | None = None, + table_id: str | None = None, + render_links: bool = False, + ) -> None: + self.fmt = formatter + self.classes = classes + + self.frame = self.fmt.frame + self.columns = self.fmt.tr_frame.columns + self.elements: list[str] = [] + self.bold_rows = self.fmt.bold_rows + self.escape = self.fmt.escape + self.show_dimensions = self.fmt.show_dimensions + if border is None: + border = cast(int, get_option("display.html.border")) + self.border = border + self.table_id = table_id + self.render_links = render_links + + self.col_space = { + column: f"{value}px" if isinstance(value, int) else value + for column, value in self.fmt.col_space.items() + } + + def to_string(self) -> str: + lines = self.render() + if any(isinstance(x, str) for x in lines): + lines = [str(x) for x in lines] + return "\n".join(lines) + + def render(self) -> list[str]: + self._write_table() + + if self.should_show_dimensions: + by = chr(215) # × + self.write( + f"

{len(self.frame)} rows {by} {len(self.frame.columns)} columns

" + ) + + return self.elements + + @property + def should_show_dimensions(self): + return self.fmt.should_show_dimensions + + @property + def show_row_idx_names(self) -> bool: + return self.fmt.show_row_idx_names + + @property + def show_col_idx_names(self) -> bool: + return self.fmt.show_col_idx_names + + @property + def row_levels(self) -> int: + if self.fmt.index: + # showing (row) index + return self.frame.index.nlevels + elif self.show_col_idx_names: + # see gh-22579 + # Column misalignment also occurs for + # a standard index when the columns index is named. + # If the row index is not displayed a column of + # blank cells need to be included before the DataFrame values. + return 1 + # not showing (row) index + return 0 + + def _get_columns_formatted_values(self) -> Iterable: + return self.columns + + @property + def is_truncated(self) -> bool: + return self.fmt.is_truncated + + @property + def ncols(self) -> int: + return len(self.fmt.tr_frame.columns) + + def write(self, s: Any, indent: int = 0) -> None: + rs = pprint_thing(s) + self.elements.append(" " * indent + rs) + + def write_th( + self, s: Any, header: bool = False, indent: int = 0, tags: str | None = None + ) -> None: + """ + Method for writing a formatted . This will + cause min-width to be set if there is one. + indent : int, default 0 + The indentation level of the cell. + tags : str, default None + Tags to include in the cell. + + Returns + ------- + A written ", indent) + else: + self.write(f'', indent) + indent += indent_delta + + for i, s in enumerate(line): + val_tag = tags.get(i, None) + if header or (self.bold_rows and i < nindex_levels): + self.write_th(s, indent=indent, header=header, tags=val_tag) + else: + self.write_td(s, indent, tags=val_tag) + + indent -= indent_delta + self.write("", indent) + + def _write_table(self, indent: int = 0) -> None: + _classes = ["dataframe"] # Default class. + use_mathjax = get_option("display.html.use_mathjax") + if not use_mathjax: + _classes.append("tex2jax_ignore") + if self.classes is not None: + if isinstance(self.classes, str): + self.classes = self.classes.split() + if not isinstance(self.classes, (list, tuple)): + raise TypeError( + "classes must be a string, list, " + f"or tuple, not {type(self.classes)}" + ) + _classes.extend(self.classes) + + if self.table_id is None: + id_section = "" + else: + id_section = f' id="{self.table_id}"' + + self.write( + f'
cell. + + If col_space is set on the formatter then that is used for + the value of min-width. + + Parameters + ---------- + s : object + The data to be written inside the cell. + header : bool, default False + Set to True if the is for use inside
cell. + """ + col_space = self.col_space.get(s, None) + + if header and col_space is not None: + tags = tags or "" + tags += f'style="min-width: {col_space};"' + + self._write_cell(s, kind="th", indent=indent, tags=tags) + + def write_td(self, s: Any, indent: int = 0, tags: str | None = None) -> None: + self._write_cell(s, kind="td", indent=indent, tags=tags) + + def _write_cell( + self, s: Any, kind: str = "td", indent: int = 0, tags: str | None = None + ) -> None: + if tags is not None: + start_tag = f"<{kind} {tags}>" + else: + start_tag = f"<{kind}>" + + if self.escape: + # escape & first to prevent double escaping of & + esc = {"&": r"&", "<": r"<", ">": r">"} + else: + esc = {} + + rs = pprint_thing(s, escape_chars=esc).strip() + + if self.render_links and is_url(rs): + rs_unescaped = pprint_thing(s, escape_chars={}).strip() + start_tag += f'' + end_a = "" + else: + end_a = "" + + self.write(f"{start_tag}{rs}{end_a}", indent) + + def write_tr( + self, + line: Iterable, + indent: int = 0, + indent_delta: int = 0, + header: bool = False, + align: str | None = None, + tags: dict[int, str] | None = None, + nindex_levels: int = 0, + ) -> None: + if tags is None: + tags = {} + + if align is None: + self.write("
', + indent, + ) + + if self.fmt.header or self.show_row_idx_names: + self._write_header(indent + self.indent_delta) + + self._write_body(indent + self.indent_delta) + + self.write("
", indent) + + def _write_col_header(self, indent: int) -> None: + is_truncated_horizontally = self.fmt.is_truncated_horizontally + if isinstance(self.columns, MultiIndex): + template = 'colspan="{span:d}" halign="left"' + + sentinel: lib.NoDefault | bool + if self.fmt.sparsify: + # GH3547 + sentinel = lib.no_default + else: + sentinel = False + levels = self.columns.format(sparsify=sentinel, adjoin=False, names=False) + level_lengths = get_level_lengths(levels, sentinel) + inner_lvl = len(level_lengths) - 1 + for lnum, (records, values) in enumerate(zip(level_lengths, levels)): + if is_truncated_horizontally: + # modify the header lines + ins_col = self.fmt.tr_col_num + if self.fmt.sparsify: + recs_new = {} + # Increment tags after ... col. + for tag, span in list(records.items()): + if tag >= ins_col: + recs_new[tag + 1] = span + elif tag + span > ins_col: + recs_new[tag] = span + 1 + if lnum == inner_lvl: + values = ( + values[:ins_col] + ("...",) + values[ins_col:] + ) + else: + # sparse col headers do not receive a ... + values = ( + values[:ins_col] + + (values[ins_col - 1],) + + values[ins_col:] + ) + else: + recs_new[tag] = span + # if ins_col lies between tags, all col headers + # get ... + if tag + span == ins_col: + recs_new[ins_col] = 1 + values = values[:ins_col] + ("...",) + values[ins_col:] + records = recs_new + inner_lvl = len(level_lengths) - 1 + if lnum == inner_lvl: + records[ins_col] = 1 + else: + recs_new = {} + for tag, span in list(records.items()): + if tag >= ins_col: + recs_new[tag + 1] = span + else: + recs_new[tag] = span + recs_new[ins_col] = 1 + records = recs_new + values = values[:ins_col] + ["..."] + values[ins_col:] + + # see gh-22579 + # Column Offset Bug with to_html(index=False) with + # MultiIndex Columns and Index. + # Initially fill row with blank cells before column names. + # TODO: Refactor to remove code duplication with code + # block below for standard columns index. + row = [""] * (self.row_levels - 1) + if self.fmt.index or self.show_col_idx_names: + # see gh-22747 + # If to_html(index_names=False) do not show columns + # index names. + # TODO: Refactor to use _get_column_name_list from + # DataFrameFormatter class and create a + # _get_formatted_column_labels function for code + # parity with DataFrameFormatter class. + if self.fmt.show_index_names: + name = self.columns.names[lnum] + row.append(pprint_thing(name or "")) + else: + row.append("") + + tags = {} + j = len(row) + for i, v in enumerate(values): + if i in records: + if records[i] > 1: + tags[j] = template.format(span=records[i]) + else: + continue + j += 1 + row.append(v) + self.write_tr(row, indent, self.indent_delta, tags=tags, header=True) + else: + # see gh-22579 + # Column misalignment also occurs for + # a standard index when the columns index is named. + # Initially fill row with blank cells before column names. + # TODO: Refactor to remove code duplication with code block + # above for columns MultiIndex. + row = [""] * (self.row_levels - 1) + if self.fmt.index or self.show_col_idx_names: + # see gh-22747 + # If to_html(index_names=False) do not show columns + # index names. + # TODO: Refactor to use _get_column_name_list from + # DataFrameFormatter class. + if self.fmt.show_index_names: + row.append(self.columns.name or "") + else: + row.append("") + row.extend(self._get_columns_formatted_values()) + align = self.fmt.justify + + if is_truncated_horizontally: + ins_col = self.row_levels + self.fmt.tr_col_num + row.insert(ins_col, "...") + + self.write_tr(row, indent, self.indent_delta, header=True, align=align) + + def _write_row_header(self, indent: int) -> None: + is_truncated_horizontally = self.fmt.is_truncated_horizontally + row = [x if x is not None else "" for x in self.frame.index.names] + [""] * ( + self.ncols + (1 if is_truncated_horizontally else 0) + ) + self.write_tr(row, indent, self.indent_delta, header=True) + + def _write_header(self, indent: int) -> None: + self.write("
`` tag + in addition to automatic (by default) id. + cell_ids : bool, default True + If True, each cell will have an ``id`` attribute in their HTML tag. + The ``id`` takes the form ``T__row_col`` + where ```` is the unique identifier, ```` is the row + number and ```` is the column number. + na_rep : str, optional + Representation for missing values. + If ``na_rep`` is None, no special formatting is applied, and falls back to + ``pandas.options.styler.format.na_rep``. + + .. versionadded:: 1.0.0 + + uuid_len : int, default 5 + If ``uuid`` is not specified, the length of the ``uuid`` to randomly generate + expressed in hex characters, in range [0, 32]. + + .. versionadded:: 1.2.0 + + decimal : str, optional + Character used as decimal separator for floats, complex and integers. If not + given uses ``pandas.options.styler.format.decimal``. + + .. versionadded:: 1.3.0 + + thousands : str, optional, default None + Character used as thousands separator for floats, complex and integers. If not + given uses ``pandas.options.styler.format.thousands``. + + .. versionadded:: 1.3.0 + + escape : str, optional + Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` + in cell display string with HTML-safe sequences. + Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, + ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with + LaTeX-safe sequences. If not given uses ``pandas.options.styler.format.escape``. + + .. versionadded:: 1.3.0 + formatter : str, callable, dict, optional + Object to define how values are displayed. See ``Styler.format``. If not given + uses ``pandas.options.styler.format.formatter``. + + .. versionadded:: 1.4.0 + + Attributes + ---------- + env : Jinja2 jinja2.Environment + template_html : Jinja2 Template + template_html_table : Jinja2 Template + template_html_style : Jinja2 Template + template_latex : Jinja2 Template + loader : Jinja2 Loader + + See Also + -------- + DataFrame.style : Return a Styler object containing methods for building + a styled HTML representation for the DataFrame. + + Notes + ----- + Most styling will be done by passing style functions into + ``Styler.apply`` or ``Styler.applymap``. Style functions should + return values with strings containing CSS ``'attr: value'`` that will + be applied to the indicated cells. + + If using in the Jupyter notebook, Styler has defined a ``_repr_html_`` + to automatically render itself. Otherwise call Styler.to_html to get + the generated HTML. + + CSS classes are attached to the generated HTML + + * Index and Column names include ``index_name`` and ``level`` + where `k` is its level in a MultiIndex + * Index label cells include + + * ``row_heading`` + * ``row`` where `n` is the numeric position of the row + * ``level`` where `k` is the level in a MultiIndex + + * Column label cells include + * ``col_heading`` + * ``col`` where `n` is the numeric position of the column + * ``level`` where `k` is the level in a MultiIndex + + * Blank cells include ``blank`` + * Data cells include ``data`` + * Trimmed cells include ``col_trim`` or ``row_trim``. + + Any, or all, or these classes can be renamed by using the ``css_class_names`` + argument in ``Styler.set_table_classes``, giving a value such as + *{"row": "MY_ROW_CLASS", "col_trim": "", "row_trim": ""}*. + """ + + def __init__( + self, + data: DataFrame | Series, + precision: int | None = None, + table_styles: CSSStyles | None = None, + uuid: str | None = None, + caption: str | tuple | None = None, + table_attributes: str | None = None, + cell_ids: bool = True, + na_rep: str | None = None, + uuid_len: int = 5, + decimal: str | None = None, + thousands: str | None = None, + escape: str | None = None, + formatter: ExtFormatter | None = None, + ): + super().__init__( + data=data, + uuid=uuid, + uuid_len=uuid_len, + table_styles=table_styles, + table_attributes=table_attributes, + caption=caption, + cell_ids=cell_ids, + precision=precision, + ) + + # validate ordered args + thousands = thousands or get_option("styler.format.thousands") + decimal = decimal or get_option("styler.format.decimal") + na_rep = na_rep or get_option("styler.format.na_rep") + escape = escape or get_option("styler.format.escape") + formatter = formatter or get_option("styler.format.formatter") + # precision is handled by superclass as default for performance + + self.precision = precision # can be removed on set_precision depr cycle + self.na_rep = na_rep # can be removed on set_na_rep depr cycle + self.format( + formatter=formatter, + precision=precision, + na_rep=na_rep, + escape=escape, + decimal=decimal, + thousands=thousands, + ) + + def _repr_html_(self) -> str | None: + """ + Hooks into Jupyter notebook rich display system, which calls _repr_html_ by + default if an object is returned at the end of a cell. + """ + if get_option("styler.render.repr") == "html": + return self.to_html() + return None + + def _repr_latex_(self) -> str | None: + if get_option("styler.render.repr") == "latex": + return self.to_latex() + return None + + def render( + self, + sparse_index: bool | None = None, + sparse_columns: bool | None = None, + **kwargs, + ) -> str: + """ + Render the ``Styler`` including all applied styles to HTML. + + .. deprecated:: 1.4.0 + + Parameters + ---------- + sparse_index : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each row. + Defaults to ``pandas.options.styler.sparse.index`` value. + sparse_columns : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each row. + Defaults to ``pandas.options.styler.sparse.columns`` value. + **kwargs + Any additional keyword arguments are passed + through to ``self.template.render``. + This is useful when you need to provide + additional variables for a custom template. + + Returns + ------- + rendered : str + The rendered HTML. + + Notes + ----- + This method is deprecated in favour of ``Styler.to_html``. + + Styler objects have defined the ``_repr_html_`` method + which automatically calls ``self.to_html()`` when it's the + last item in a Notebook cell. + + When calling ``Styler.render()`` directly, wrap the result in + ``IPython.display.HTML`` to view the rendered HTML in the notebook. + + Pandas uses the following keys in render. Arguments passed + in ``**kwargs`` take precedence, so think carefully if you want + to override them: + + * head + * cellstyle + * body + * uuid + * table_styles + * caption + * table_attributes + """ + warnings.warn( + "this method is deprecated in favour of `Styler.to_html()`", + FutureWarning, + stacklevel=find_stack_level(), + ) + if sparse_index is None: + sparse_index = get_option("styler.sparse.index") + if sparse_columns is None: + sparse_columns = get_option("styler.sparse.columns") + return self._render_html(sparse_index, sparse_columns, **kwargs) + + def set_tooltips( + self, + ttips: DataFrame, + props: CSSProperties | None = None, + css_class: str | None = None, + ) -> Styler: + """ + Set the DataFrame of strings on ``Styler`` generating ``:hover`` tooltips. + + These string based tooltips are only applicable to `` + + + + + + """ + ) + assert expected in df.style.to_html() + + +@pytest.mark.parametrize("comprehensive", [True, False]) +@pytest.mark.parametrize("render", [True, False]) +@pytest.mark.parametrize("deepcopy", [True, False]) +def test_copy(comprehensive, render, deepcopy, mi_styler, mi_styler_comp): + styler = mi_styler_comp if comprehensive else mi_styler + styler.uuid_len = 5 + + s2 = copy.deepcopy(styler) if deepcopy else copy.copy(styler) # make copy and check + assert s2 is not styler + + if render: + styler.to_html() + + excl = [ + "na_rep", # deprecated + "precision", # deprecated + "cellstyle_map", # render time vars.. + "cellstyle_map_columns", + "cellstyle_map_index", + "template_latex", # render templates are class level + "template_html", + "template_html_style", + "template_html_table", + ] + if not deepcopy: # check memory locations are equal for all included attributes + for attr in [a for a in styler.__dict__ if (not callable(a) and a not in excl)]: + assert id(getattr(s2, attr)) == id(getattr(styler, attr)) + else: # check memory locations are different for nested or mutable vars + shallow = [ + "data", + "columns", + "index", + "uuid_len", + "uuid", + "caption", + "cell_ids", + "hide_index_", + "hide_columns_", + "hide_index_names", + "hide_column_names", + "table_attributes", + ] + for attr in shallow: + assert id(getattr(s2, attr)) == id(getattr(styler, attr)) + + for attr in [ + a + for a in styler.__dict__ + if (not callable(a) and a not in excl and a not in shallow) + ]: + if getattr(s2, attr) is None: + assert id(getattr(s2, attr)) == id(getattr(styler, attr)) + else: + assert id(getattr(s2, attr)) != id(getattr(styler, attr)) + + +def test_clear(mi_styler_comp): + # NOTE: if this test fails for new features then 'mi_styler_comp' should be updated + # to ensure proper testing of the 'copy', 'clear', 'export' methods with new feature + # GH 40675 + styler = mi_styler_comp + styler._compute() # execute applied methods + + clean_copy = Styler(styler.data, uuid=styler.uuid) + + excl = [ + "data", + "index", + "columns", + "uuid", + "uuid_len", # uuid is set to be the same on styler and clean_copy + "cell_ids", + "cellstyle_map", # execution time only + "cellstyle_map_columns", # execution time only + "cellstyle_map_index", # execution time only + "precision", # deprecated + "na_rep", # deprecated + "template_latex", # render templates are class level + "template_html", + "template_html_style", + "template_html_table", + ] + # tests vars are not same vals on obj and clean copy before clear (except for excl) + for attr in [a for a in styler.__dict__ if not (callable(a) or a in excl)]: + res = getattr(styler, attr) == getattr(clean_copy, attr) + if hasattr(res, "__iter__") and len(res) > 0: + assert not all(res) # some element in iterable differs + elif hasattr(res, "__iter__") and len(res) == 0: + pass # empty array + else: + assert not res # explicit var differs + + # test vars have same vales on obj and clean copy after clearing + styler.clear() + for attr in [a for a in styler.__dict__ if not (callable(a))]: + res = getattr(styler, attr) == getattr(clean_copy, attr) + assert all(res) if hasattr(res, "__iter__") else res + + +def test_export(mi_styler_comp, mi_styler): + exp_attrs = [ + "_todo", + "hide_index_", + "hide_index_names", + "hide_columns_", + "hide_column_names", + "table_attributes", + "table_styles", + "css", + ] + for attr in exp_attrs: + check = getattr(mi_styler, attr) == getattr(mi_styler_comp, attr) + assert not ( + all(check) if (hasattr(check, "__iter__") and len(check) > 0) else check + ) + + export = mi_styler_comp.export() + used = mi_styler.use(export) + for attr in exp_attrs: + check = getattr(used, attr) == getattr(mi_styler_comp, attr) + assert all(check) if (hasattr(check, "__iter__") and len(check) > 0) else check + + used.to_html() + + +def test_hide_raises(mi_styler): + msg = "`subset` and `level` cannot be passed simultaneously" + with pytest.raises(ValueError, match=msg): + mi_styler.hide(axis="index", subset="something", level="something else") + + msg = "`level` must be of type `int`, `str` or list of such" + with pytest.raises(ValueError, match=msg): + mi_styler.hide(axis="index", level={"bad": 1, "type": 2}) + + +@pytest.mark.parametrize("level", [1, "one", [1], ["one"]]) +def test_hide_index_level(mi_styler, level): + mi_styler.index.names, mi_styler.columns.names = ["zero", "one"], ["zero", "one"] + ctx = mi_styler.hide(axis="index", level=level)._translate(False, True) + assert len(ctx["head"][0]) == 3 + assert len(ctx["head"][1]) == 3 + assert len(ctx["head"][2]) == 4 + assert ctx["head"][2][0]["is_visible"] + assert not ctx["head"][2][1]["is_visible"] + + assert ctx["body"][0][0]["is_visible"] + assert not ctx["body"][0][1]["is_visible"] + assert ctx["body"][1][0]["is_visible"] + assert not ctx["body"][1][1]["is_visible"] + + +@pytest.mark.parametrize("level", [1, "one", [1], ["one"]]) +@pytest.mark.parametrize("names", [True, False]) +def test_hide_columns_level(mi_styler, level, names): + mi_styler.columns.names = ["zero", "one"] + if names: + mi_styler.index.names = ["zero", "one"] + ctx = mi_styler.hide(axis="columns", level=level)._translate(True, False) + assert len(ctx["head"]) == (2 if names else 1) + + +@pytest.mark.parametrize("method", ["applymap", "apply"]) +@pytest.mark.parametrize("axis", ["index", "columns"]) +def test_apply_map_header(method, axis): + # GH 41893 + df = DataFrame({"A": [0, 0], "B": [1, 1]}, index=["C", "D"]) + func = { + "apply": lambda s: ["attr: val" if ("A" in v or "C" in v) else "" for v in s], + "applymap": lambda v: "attr: val" if ("A" in v or "C" in v) else "", + } + + # test execution added to todo + result = getattr(df.style, f"{method}_index")(func[method], axis=axis) + assert len(result._todo) == 1 + assert len(getattr(result, f"ctx_{axis}")) == 0 + + # test ctx object on compute + result._compute() + expected = { + (0, 0): [("attr", "val")], + } + assert getattr(result, f"ctx_{axis}") == expected + + +@pytest.mark.parametrize("method", ["apply", "applymap"]) +@pytest.mark.parametrize("axis", ["index", "columns"]) +def test_apply_map_header_mi(mi_styler, method, axis): + # GH 41893 + func = { + "apply": lambda s: ["attr: val;" if "b" in v else "" for v in s], + "applymap": lambda v: "attr: val" if "b" in v else "", + } + result = getattr(mi_styler, f"{method}_index")(func[method], axis=axis)._compute() + expected = {(1, 1): [("attr", "val")]} + assert getattr(result, f"ctx_{axis}") == expected + + +def test_apply_map_header_raises(mi_styler): + # GH 41893 + with pytest.raises(ValueError, match="No axis named bad for object type DataFrame"): + mi_styler.applymap_index(lambda v: "attr: val;", axis="bad")._compute() + + +class TestStyler: + def setup_method(self, method): + np.random.seed(24) + self.s = DataFrame({"A": np.random.permutation(range(6))}) + self.df = DataFrame({"A": [0, 1], "B": np.random.randn(2)}) + self.f = lambda x: x + self.g = lambda x: x + + def h(x, foo="bar"): + return pd.Series(f"color: {foo}", index=x.index, name=x.name) + + self.h = h + self.styler = Styler(self.df) + self.attrs = DataFrame({"A": ["color: red", "color: blue"]}) + self.dataframes = [ + self.df, + DataFrame( + {"f": [1.0, 2.0], "o": ["a", "b"], "c": pd.Categorical(["a", "b"])} + ), + ] + self.blank_value = " " + + def test_init_non_pandas(self): + msg = "``data`` must be a Series or DataFrame" + with pytest.raises(TypeError, match=msg): + Styler([1, 2, 3]) + + def test_init_series(self): + result = Styler(pd.Series([1, 2])) + assert result.data.ndim == 2 + + def test_repr_html_ok(self): + self.styler._repr_html_() + + def test_repr_html_mathjax(self): + # gh-19824 / 41395 + assert "tex2jax_ignore" not in self.styler._repr_html_() + + with pd.option_context("styler.html.mathjax", False): + assert "tex2jax_ignore" in self.styler._repr_html_() + + def test_update_ctx(self): + self.styler._update_ctx(self.attrs) + expected = {(0, 0): [("color", "red")], (1, 0): [("color", "blue")]} + assert self.styler.ctx == expected + + def test_update_ctx_flatten_multi_and_trailing_semi(self): + attrs = DataFrame({"A": ["color: red; foo: bar", "color:blue ; foo: baz;"]}) + self.styler._update_ctx(attrs) + expected = { + (0, 0): [("color", "red"), ("foo", "bar")], + (1, 0): [("color", "blue"), ("foo", "baz")], + } + assert self.styler.ctx == expected + + def test_render(self): + df = DataFrame({"A": [0, 1]}) + style = lambda x: pd.Series(["color: red", "color: blue"], name=x.name) + s = Styler(df, uuid="AB").apply(style) + s.to_html() + # it worked? + + def test_multiple_render(self): + # GH 39396 + s = Styler(self.df, uuid_len=0).applymap(lambda x: "color: red;", subset=["A"]) + s.to_html() # do 2 renders to ensure css styles not duplicated + assert ( + '" in s.to_html() + ) + + def test_render_empty_dfs(self): + empty_df = DataFrame() + es = Styler(empty_df) + es.to_html() + # An index but no columns + DataFrame(columns=["a"]).style.to_html() + # A column but no index + DataFrame(index=["a"]).style.to_html() + # No IndexError raised? + + def test_render_double(self): + df = DataFrame({"A": [0, 1]}) + style = lambda x: pd.Series( + ["color: red; border: 1px", "color: blue; border: 2px"], name=x.name + ) + s = Styler(df, uuid="AB").apply(style) + s.to_html() + # it worked? + + def test_set_properties(self): + df = DataFrame({"A": [0, 1]}) + result = df.style.set_properties(color="white", size="10px")._compute().ctx + # order is deterministic + v = [("color", "white"), ("size", "10px")] + expected = {(0, 0): v, (1, 0): v} + assert result.keys() == expected.keys() + for v1, v2 in zip(result.values(), expected.values()): + assert sorted(v1) == sorted(v2) + + def test_set_properties_subset(self): + df = DataFrame({"A": [0, 1]}) + result = ( + df.style.set_properties(subset=pd.IndexSlice[0, "A"], color="white") + ._compute() + .ctx + ) + expected = {(0, 0): [("color", "white")]} + assert result == expected + + def test_empty_index_name_doesnt_display(self): + # https://github.com/pandas-dev/pandas/pull/12090#issuecomment-180695902 + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + result = df.style._translate(True, True) + assert len(result["head"]) == 1 + expected = { + "class": "blank level0", + "type": "th", + "value": self.blank_value, + "is_visible": True, + "display_value": self.blank_value, + } + assert expected.items() <= result["head"][0][0].items() + + def test_index_name(self): + # https://github.com/pandas-dev/pandas/issues/11655 + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + result = df.set_index("A").style._translate(True, True) + expected = { + "class": "index_name level0", + "type": "th", + "value": "A", + "is_visible": True, + "display_value": "A", + } + assert expected.items() <= result["head"][1][0].items() + + def test_numeric_columns(self): + # https://github.com/pandas-dev/pandas/issues/12125 + # smoke test for _translate + df = DataFrame({0: [1, 2, 3]}) + df.style._translate(True, True) + + def test_apply_axis(self): + df = DataFrame({"A": [0, 0], "B": [1, 1]}) + f = lambda x: [f"val: {x.max()}" for v in x] + result = df.style.apply(f, axis=1) + assert len(result._todo) == 1 + assert len(result.ctx) == 0 + result._compute() + expected = { + (0, 0): [("val", "1")], + (0, 1): [("val", "1")], + (1, 0): [("val", "1")], + (1, 1): [("val", "1")], + } + assert result.ctx == expected + + result = df.style.apply(f, axis=0) + expected = { + (0, 0): [("val", "0")], + (0, 1): [("val", "1")], + (1, 0): [("val", "0")], + (1, 1): [("val", "1")], + } + result._compute() + assert result.ctx == expected + result = df.style.apply(f) # default + result._compute() + assert result.ctx == expected + + @pytest.mark.parametrize("axis", [0, 1]) + def test_apply_series_return(self, axis): + # GH 42014 + df = DataFrame([[1, 2], [3, 4]], index=["X", "Y"], columns=["X", "Y"]) + + # test Series return where len(Series) < df.index or df.columns but labels OK + func = lambda s: pd.Series(["color: red;"], index=["Y"]) + result = df.style.apply(func, axis=axis)._compute().ctx + assert result[(1, 1)] == [("color", "red")] + assert result[(1 - axis, axis)] == [("color", "red")] + + # test Series return where labels align but different order + func = lambda s: pd.Series(["color: red;", "color: blue;"], index=["Y", "X"]) + result = df.style.apply(func, axis=axis)._compute().ctx + assert result[(0, 0)] == [("color", "blue")] + assert result[(1, 1)] == [("color", "red")] + assert result[(1 - axis, axis)] == [("color", "red")] + assert result[(axis, 1 - axis)] == [("color", "blue")] + + @pytest.mark.parametrize("index", [False, True]) + @pytest.mark.parametrize("columns", [False, True]) + def test_apply_dataframe_return(self, index, columns): + # GH 42014 + df = DataFrame([[1, 2], [3, 4]], index=["X", "Y"], columns=["X", "Y"]) + idxs = ["X", "Y"] if index else ["Y"] + cols = ["X", "Y"] if columns else ["Y"] + df_styles = DataFrame("color: red;", index=idxs, columns=cols) + result = df.style.apply(lambda x: df_styles, axis=None)._compute().ctx + + assert result[(1, 1)] == [("color", "red")] # (Y,Y) styles always present + assert (result[(0, 1)] == [("color", "red")]) is index # (X,Y) only if index + assert (result[(1, 0)] == [("color", "red")]) is columns # (Y,X) only if cols + assert (result[(0, 0)] == [("color", "red")]) is (index and columns) # (X,X) + + @pytest.mark.parametrize( + "slice_", + [ + pd.IndexSlice[:], + pd.IndexSlice[:, ["A"]], + pd.IndexSlice[[1], :], + pd.IndexSlice[[1], ["A"]], + pd.IndexSlice[:2, ["A", "B"]], + ], + ) + @pytest.mark.parametrize("axis", [0, 1]) + def test_apply_subset(self, slice_, axis): + result = ( + self.df.style.apply(self.h, axis=axis, subset=slice_, foo="baz") + ._compute() + .ctx + ) + expected = { + (r, c): [("color", "baz")] + for r, row in enumerate(self.df.index) + for c, col in enumerate(self.df.columns) + if row in self.df.loc[slice_].index and col in self.df.loc[slice_].columns + } + assert result == expected + + @pytest.mark.parametrize( + "slice_", + [ + pd.IndexSlice[:], + pd.IndexSlice[:, ["A"]], + pd.IndexSlice[[1], :], + pd.IndexSlice[[1], ["A"]], + pd.IndexSlice[:2, ["A", "B"]], + ], + ) + def test_applymap_subset(self, slice_): + result = ( + self.df.style.applymap(lambda x: "color:baz;", subset=slice_)._compute().ctx + ) + expected = { + (r, c): [("color", "baz")] + for r, row in enumerate(self.df.index) + for c, col in enumerate(self.df.columns) + if row in self.df.loc[slice_].index and col in self.df.loc[slice_].columns + } + assert result == expected + + @pytest.mark.parametrize( + "slice_", + [ + pd.IndexSlice[:, pd.IndexSlice["x", "A"]], + pd.IndexSlice[:, pd.IndexSlice[:, "A"]], + pd.IndexSlice[:, pd.IndexSlice[:, ["A", "C"]]], # missing col element + pd.IndexSlice[pd.IndexSlice["a", 1], :], + pd.IndexSlice[pd.IndexSlice[:, 1], :], + pd.IndexSlice[pd.IndexSlice[:, [1, 3]], :], # missing row element + pd.IndexSlice[:, ("x", "A")], + pd.IndexSlice[("a", 1), :], + ], + ) + def test_applymap_subset_multiindex(self, slice_): + # GH 19861 + # edited for GH 33562 + warn = None + msg = "indexing on a MultiIndex with a nested sequence of labels" + if ( + isinstance(slice_[-1], tuple) + and isinstance(slice_[-1][-1], list) + and "C" in slice_[-1][-1] + ): + warn = FutureWarning + elif ( + isinstance(slice_[0], tuple) + and isinstance(slice_[0][1], list) + and 3 in slice_[0][1] + ): + warn = FutureWarning + + idx = MultiIndex.from_product([["a", "b"], [1, 2]]) + col = MultiIndex.from_product([["x", "y"], ["A", "B"]]) + df = DataFrame(np.random.rand(4, 4), columns=col, index=idx) + + with tm.assert_produces_warning(warn, match=msg): + df.style.applymap(lambda x: "color: red;", subset=slice_).to_html() + + def test_applymap_subset_multiindex_code(self): + # https://github.com/pandas-dev/pandas/issues/25858 + # Checks styler.applymap works with multindex when codes are provided + codes = np.array([[0, 0, 1, 1], [0, 1, 0, 1]]) + columns = MultiIndex( + levels=[["a", "b"], ["%", "#"]], codes=codes, names=["", ""] + ) + df = DataFrame( + [[1, -1, 1, 1], [-1, 1, 1, 1]], index=["hello", "world"], columns=columns + ) + pct_subset = pd.IndexSlice[:, pd.IndexSlice[:, "%":"%"]] + + def color_negative_red(val): + color = "red" if val < 0 else "black" + return f"color: {color}" + + df.loc[pct_subset] + df.style.applymap(color_negative_red, subset=pct_subset) + + def test_empty(self): + df = DataFrame({"A": [1, 0]}) + s = df.style + s.ctx = {(0, 0): [("color", "red")], (1, 0): [("", "")]} + + result = s._translate(True, True)["cellstyle"] + expected = [ + {"props": [("color", "red")], "selectors": ["row0_col0"]}, + {"props": [("", "")], "selectors": ["row1_col0"]}, + ] + assert result == expected + + def test_duplicate(self): + df = DataFrame({"A": [1, 0]}) + s = df.style + s.ctx = {(0, 0): [("color", "red")], (1, 0): [("color", "red")]} + + result = s._translate(True, True)["cellstyle"] + expected = [ + {"props": [("color", "red")], "selectors": ["row0_col0", "row1_col0"]} + ] + assert result == expected + + def test_init_with_na_rep(self): + # GH 21527 28358 + df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + + ctx = Styler(df, na_rep="NA")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "NA" + + def test_caption(self): + styler = Styler(self.df, caption="foo") + result = styler.to_html() + assert all(["caption" in result, "foo" in result]) + + styler = self.df.style + result = styler.set_caption("baz") + assert styler is result + assert styler.caption == "baz" + + def test_uuid(self): + styler = Styler(self.df, uuid="abc123") + result = styler.to_html() + assert "abc123" in result + + styler = self.df.style + result = styler.set_uuid("aaa") + assert result is styler + assert result.uuid == "aaa" + + def test_unique_id(self): + # See https://github.com/pandas-dev/pandas/issues/16780 + df = DataFrame({"a": [1, 3, 5, 6], "b": [2, 4, 12, 21]}) + result = df.style.to_html(uuid="test") + assert "test" in result + ids = re.findall('id="(.*?)"', result) + assert np.unique(ids).size == len(ids) + + def test_table_styles(self): + style = [{"selector": "th", "props": [("foo", "bar")]}] # default format + styler = Styler(self.df, table_styles=style) + result = " ".join(styler.to_html().split()) + assert "th { foo: bar; }" in result + + styler = self.df.style + result = styler.set_table_styles(style) + assert styler is result + assert styler.table_styles == style + + # GH 39563 + style = [{"selector": "th", "props": "foo:bar;"}] # css string format + styler = self.df.style.set_table_styles(style) + result = " ".join(styler.to_html().split()) + assert "th { foo: bar; }" in result + + def test_table_styles_multiple(self): + ctx = self.df.style.set_table_styles( + [ + {"selector": "th,td", "props": "color:red;"}, + {"selector": "tr", "props": "color:green;"}, + ] + )._translate(True, True)["table_styles"] + assert ctx == [ + {"selector": "th", "props": [("color", "red")]}, + {"selector": "td", "props": [("color", "red")]}, + {"selector": "tr", "props": [("color", "green")]}, + ] + + def test_table_styles_dict_multiple_selectors(self): + # GH 44011 + result = self.df.style.set_table_styles( + [{"selector": "th,td", "props": [("border-left", "2px solid black")]}] + )._translate(True, True)["table_styles"] + + expected = [ + {"selector": "th", "props": [("border-left", "2px solid black")]}, + {"selector": "td", "props": [("border-left", "2px solid black")]}, + ] + + assert result == expected + + def test_maybe_convert_css_to_tuples(self): + expected = [("a", "b"), ("c", "d e")] + assert maybe_convert_css_to_tuples("a:b;c:d e;") == expected + assert maybe_convert_css_to_tuples("a: b ;c: d e ") == expected + expected = [] + assert maybe_convert_css_to_tuples("") == expected + + def test_maybe_convert_css_to_tuples_err(self): + msg = "Styles supplied as string must follow CSS rule formats" + with pytest.raises(ValueError, match=msg): + maybe_convert_css_to_tuples("err") + + def test_table_attributes(self): + attributes = 'class="foo" data-bar' + styler = Styler(self.df, table_attributes=attributes) + result = styler.to_html() + assert 'class="foo" data-bar' in result + + result = self.df.style.set_table_attributes(attributes).to_html() + assert 'class="foo" data-bar' in result + + def test_apply_none(self): + def f(x): + return DataFrame( + np.where(x == x.max(), "color: red", ""), + index=x.index, + columns=x.columns, + ) + + result = DataFrame([[1, 2], [3, 4]]).style.apply(f, axis=None)._compute().ctx + assert result[(1, 1)] == [("color", "red")] + + def test_trim(self): + result = self.df.style.to_html() # trim=True + assert result.count("#") == 0 + + result = self.df.style.highlight_max().to_html() + assert result.count("#") == len(self.df.columns) + + def test_export(self): + f = lambda x: "color: red" if x > 0 else "color: blue" + g = lambda x, z: f"color: {z}" if x > 0 else f"color: {z}" + style1 = self.styler + style1.applymap(f).applymap(g, z="b").highlight_max()._compute() # = render + result = style1.export() + style2 = self.df.style + style2.use(result) + assert style1._todo == style2._todo + style2.to_html() + + def test_bad_apply_shape(self): + df = DataFrame([[1, 2], [3, 4]], index=["A", "B"], columns=["X", "Y"]) + + msg = "resulted in the apply method collapsing to a Series." + with pytest.raises(ValueError, match=msg): + df.style._apply(lambda x: "x") + + msg = "created invalid {} labels" + with pytest.raises(ValueError, match=msg.format("index")): + df.style._apply(lambda x: [""]) + + with pytest.raises(ValueError, match=msg.format("index")): + df.style._apply(lambda x: ["", "", "", ""]) + + with pytest.raises(ValueError, match=msg.format("index")): + df.style._apply(lambda x: pd.Series(["a:v;", ""], index=["A", "C"]), axis=0) + + with pytest.raises(ValueError, match=msg.format("columns")): + df.style._apply(lambda x: ["", "", ""], axis=1) + + with pytest.raises(ValueError, match=msg.format("columns")): + df.style._apply(lambda x: pd.Series(["a:v;", ""], index=["X", "Z"]), axis=1) + + msg = "returned ndarray with wrong shape" + with pytest.raises(ValueError, match=msg): + df.style._apply(lambda x: np.array([[""], [""]]), axis=None) + + def test_apply_bad_return(self): + def f(x): + return "" + + df = DataFrame([[1, 2], [3, 4]]) + msg = ( + "must return a DataFrame or ndarray when passed to `Styler.apply` " + "with axis=None" + ) + with pytest.raises(TypeError, match=msg): + df.style._apply(f, axis=None) + + @pytest.mark.parametrize("axis", ["index", "columns"]) + def test_apply_bad_labels(self, axis): + def f(x): + return DataFrame(**{axis: ["bad", "labels"]}) + + df = DataFrame([[1, 2], [3, 4]]) + msg = f"created invalid {axis} labels." + with pytest.raises(ValueError, match=msg): + df.style._apply(f, axis=None) + + def test_get_level_lengths(self): + index = MultiIndex.from_product([["a", "b"], [0, 1, 2]]) + expected = { + (0, 0): 3, + (0, 3): 3, + (1, 0): 1, + (1, 1): 1, + (1, 2): 1, + (1, 3): 1, + (1, 4): 1, + (1, 5): 1, + } + result = _get_level_lengths(index, sparsify=True, max_index=100) + tm.assert_dict_equal(result, expected) + + expected = { + (0, 0): 1, + (0, 1): 1, + (0, 2): 1, + (0, 3): 1, + (0, 4): 1, + (0, 5): 1, + (1, 0): 1, + (1, 1): 1, + (1, 2): 1, + (1, 3): 1, + (1, 4): 1, + (1, 5): 1, + } + result = _get_level_lengths(index, sparsify=False, max_index=100) + tm.assert_dict_equal(result, expected) + + def test_get_level_lengths_un_sorted(self): + index = MultiIndex.from_arrays([[1, 1, 2, 1], ["a", "b", "b", "d"]]) + expected = { + (0, 0): 2, + (0, 2): 1, + (0, 3): 1, + (1, 0): 1, + (1, 1): 1, + (1, 2): 1, + (1, 3): 1, + } + result = _get_level_lengths(index, sparsify=True, max_index=100) + tm.assert_dict_equal(result, expected) + + expected = { + (0, 0): 1, + (0, 1): 1, + (0, 2): 1, + (0, 3): 1, + (1, 0): 1, + (1, 1): 1, + (1, 2): 1, + (1, 3): 1, + } + result = _get_level_lengths(index, sparsify=False, max_index=100) + tm.assert_dict_equal(result, expected) + + def test_mi_sparse_index_names(self): + # Test the class names and displayed value are correct on rendering MI names + df = DataFrame( + {"A": [1, 2]}, + index=MultiIndex.from_arrays( + [["a", "a"], [0, 1]], names=["idx_level_0", "idx_level_1"] + ), + ) + result = df.style._translate(True, True) + head = result["head"][1] + expected = [ + { + "class": "index_name level0", + "display_value": "idx_level_0", + "is_visible": True, + }, + { + "class": "index_name level1", + "display_value": "idx_level_1", + "is_visible": True, + }, + { + "class": "blank col0", + "display_value": self.blank_value, + "is_visible": True, + }, + ] + for i, expected_dict in enumerate(expected): + assert expected_dict.items() <= head[i].items() + + def test_mi_sparse_column_names(self): + df = DataFrame( + np.arange(16).reshape(4, 4), + index=MultiIndex.from_arrays( + [["a", "a", "b", "a"], [0, 1, 1, 2]], + names=["idx_level_0", "idx_level_1"], + ), + columns=MultiIndex.from_arrays( + [["C1", "C1", "C2", "C2"], [1, 0, 1, 0]], names=["colnam_0", "colnam_1"] + ), + ) + result = Styler(df, cell_ids=False)._translate(True, True) + + for level in [0, 1]: + head = result["head"][level] + expected = [ + { + "class": "blank", + "display_value": self.blank_value, + "is_visible": True, + }, + { + "class": f"index_name level{level}", + "display_value": f"colnam_{level}", + "is_visible": True, + }, + ] + for i, expected_dict in enumerate(expected): + assert expected_dict.items() <= head[i].items() + + def test_hide_column_headers(self): + ctx = self.styler.hide(axis="columns")._translate(True, True) + assert len(ctx["head"]) == 0 # no header entries with an unnamed index + + self.df.index.name = "some_name" + ctx = self.df.style.hide(axis="columns")._translate(True, True) + assert len(ctx["head"]) == 1 + # index names still visible, changed in #42101, reverted in 43404 + + def test_hide_single_index(self): + # GH 14194 + # single unnamed index + ctx = self.df.style._translate(True, True) + assert ctx["body"][0][0]["is_visible"] + assert ctx["head"][0][0]["is_visible"] + ctx2 = self.df.style.hide(axis="index")._translate(True, True) + assert not ctx2["body"][0][0]["is_visible"] + assert not ctx2["head"][0][0]["is_visible"] + + # single named index + ctx3 = self.df.set_index("A").style._translate(True, True) + assert ctx3["body"][0][0]["is_visible"] + assert len(ctx3["head"]) == 2 # 2 header levels + assert ctx3["head"][0][0]["is_visible"] + + ctx4 = self.df.set_index("A").style.hide(axis="index")._translate(True, True) + assert not ctx4["body"][0][0]["is_visible"] + assert len(ctx4["head"]) == 1 # only 1 header levels + assert not ctx4["head"][0][0]["is_visible"] + + def test_hide_multiindex(self): + # GH 14194 + df = DataFrame( + {"A": [1, 2], "B": [1, 2]}, + index=MultiIndex.from_arrays( + [["a", "a"], [0, 1]], names=["idx_level_0", "idx_level_1"] + ), + ) + ctx1 = df.style._translate(True, True) + # tests for 'a' and '0' + assert ctx1["body"][0][0]["is_visible"] + assert ctx1["body"][0][1]["is_visible"] + # check for blank header rows + assert len(ctx1["head"][0]) == 4 # two visible indexes and two data columns + + ctx2 = df.style.hide(axis="index")._translate(True, True) + # tests for 'a' and '0' + assert not ctx2["body"][0][0]["is_visible"] + assert not ctx2["body"][0][1]["is_visible"] + # check for blank header rows + assert len(ctx2["head"][0]) == 3 # one hidden (col name) and two data columns + assert not ctx2["head"][0][0]["is_visible"] + + def test_hide_columns_single_level(self): + # GH 14194 + # test hiding single column + ctx = self.df.style._translate(True, True) + assert ctx["head"][0][1]["is_visible"] + assert ctx["head"][0][1]["display_value"] == "A" + assert ctx["head"][0][2]["is_visible"] + assert ctx["head"][0][2]["display_value"] == "B" + assert ctx["body"][0][1]["is_visible"] # col A, row 1 + assert ctx["body"][1][2]["is_visible"] # col B, row 1 + + ctx = self.df.style.hide("A", axis="columns")._translate(True, True) + assert not ctx["head"][0][1]["is_visible"] + assert not ctx["body"][0][1]["is_visible"] # col A, row 1 + assert ctx["body"][1][2]["is_visible"] # col B, row 1 + + # test hiding mulitiple columns + ctx = self.df.style.hide(["A", "B"], axis="columns")._translate(True, True) + assert not ctx["head"][0][1]["is_visible"] + assert not ctx["head"][0][2]["is_visible"] + assert not ctx["body"][0][1]["is_visible"] # col A, row 1 + assert not ctx["body"][1][2]["is_visible"] # col B, row 1 + + def test_hide_columns_index_mult_levels(self): + # GH 14194 + # setup dataframe with multiple column levels and indices + i1 = MultiIndex.from_arrays( + [["a", "a"], [0, 1]], names=["idx_level_0", "idx_level_1"] + ) + i2 = MultiIndex.from_arrays( + [["b", "b"], [0, 1]], names=["col_level_0", "col_level_1"] + ) + df = DataFrame([[1, 2], [3, 4]], index=i1, columns=i2) + ctx = df.style._translate(True, True) + # column headers + assert ctx["head"][0][2]["is_visible"] + assert ctx["head"][1][2]["is_visible"] + assert ctx["head"][1][3]["display_value"] == "1" + # indices + assert ctx["body"][0][0]["is_visible"] + # data + assert ctx["body"][1][2]["is_visible"] + assert ctx["body"][1][2]["display_value"] == "3" + assert ctx["body"][1][3]["is_visible"] + assert ctx["body"][1][3]["display_value"] == "4" + + # hide top column level, which hides both columns + ctx = df.style.hide("b", axis="columns")._translate(True, True) + assert not ctx["head"][0][2]["is_visible"] # b + assert not ctx["head"][1][2]["is_visible"] # 0 + assert not ctx["body"][1][2]["is_visible"] # 3 + assert ctx["body"][0][0]["is_visible"] # index + + # hide first column only + ctx = df.style.hide([("b", 0)], axis="columns")._translate(True, True) + assert not ctx["head"][0][2]["is_visible"] # b + assert ctx["head"][0][3]["is_visible"] # b + assert not ctx["head"][1][2]["is_visible"] # 0 + assert not ctx["body"][1][2]["is_visible"] # 3 + assert ctx["body"][1][3]["is_visible"] + assert ctx["body"][1][3]["display_value"] == "4" + + # hide second column and index + ctx = df.style.hide([("b", 1)], axis=1).hide(axis=0)._translate(True, True) + assert not ctx["body"][0][0]["is_visible"] # index + assert len(ctx["head"][0]) == 3 + assert ctx["head"][0][1]["is_visible"] # b + assert ctx["head"][1][1]["is_visible"] # 0 + assert not ctx["head"][1][2]["is_visible"] # 1 + assert not ctx["body"][1][3]["is_visible"] # 4 + assert ctx["body"][1][2]["is_visible"] + assert ctx["body"][1][2]["display_value"] == "3" + + # hide top row level, which hides both rows so body empty + ctx = df.style.hide("a", axis="index")._translate(True, True) + assert ctx["body"] == [] + + # hide first row only + ctx = df.style.hide(("a", 0), axis="index")._translate(True, True) + for i in [0, 1, 2, 3]: + assert "row1" in ctx["body"][0][i]["class"] # row0 not included in body + assert ctx["body"][0][i]["is_visible"] + + def test_pipe(self): + def set_caption_from_template(styler, a, b): + return styler.set_caption(f"Dataframe with a = {a} and b = {b}") + + styler = self.df.style.pipe(set_caption_from_template, "A", b="B") + assert "Dataframe with a = A and b = B" in styler.to_html() + + # Test with an argument that is a (callable, keyword_name) pair. + def f(a, b, styler): + return (a, b, styler) + + styler = self.df.style + result = styler.pipe((f, "styler"), a=1, b=2) + assert result == (1, 2, styler) + + def test_no_cell_ids(self): + # GH 35588 + # GH 35663 + df = DataFrame(data=[[0]]) + styler = Styler(df, uuid="_", cell_ids=False) + styler.to_html() + s = styler.to_html() # render twice to ensure ctx is not updated + assert s.find('' in s + assert '' in s + assert '' in s + assert '' in s + # GH 39317 + s = Styler(df, uuid_len=0, cell_ids=True).set_td_classes(classes).to_html() + assert '' in s + assert '' in s + assert '' in s + assert '' in s + + def test_set_data_classes_reindex(self): + # GH 39317 + df = DataFrame( + data=[[0, 1, 2], [3, 4, 5], [6, 7, 8]], columns=[0, 1, 2], index=[0, 1, 2] + ) + classes = DataFrame( + data=[["mi", "ma"], ["mu", "mo"]], + columns=[0, 2], + index=[0, 2], + ) + s = Styler(df, uuid_len=0).set_td_classes(classes).to_html() + assert '' in s + assert '' in s + assert '' in s + assert '' in s + assert '' in s + + def test_chaining_table_styles(self): + # GH 35607 + df = DataFrame(data=[[0, 1], [1, 2]], columns=["A", "B"]) + styler = df.style.set_table_styles( + [{"selector": "", "props": [("background-color", "yellow")]}] + ).set_table_styles( + [{"selector": ".col0", "props": [("background-color", "blue")]}], + overwrite=False, + ) + assert len(styler.table_styles) == 2 + + def test_column_and_row_styling(self): + # GH 35607 + df = DataFrame(data=[[0, 1], [1, 2]], columns=["A", "B"]) + s = Styler(df, uuid_len=0) + s = s.set_table_styles({"A": [{"selector": "", "props": [("color", "blue")]}]}) + assert "#T_ .col0 {\n color: blue;\n}" in s.to_html() + s = s.set_table_styles( + {0: [{"selector": "", "props": [("color", "blue")]}]}, axis=1 + ) + assert "#T_ .row0 {\n color: blue;\n}" in s.to_html() + + @pytest.mark.parametrize("len_", [1, 5, 32, 33, 100]) + def test_uuid_len(self, len_): + # GH 36345 + df = DataFrame(data=[["A"]]) + s = Styler(df, uuid_len=len_, cell_ids=False).to_html() + strt = s.find('id="T_') + end = s[strt + 6 :].find('"') + if len_ > 32: + assert end == 32 + else: + assert end == len_ + + @pytest.mark.parametrize("len_", [-2, "bad", None]) + def test_uuid_len_raises(self, len_): + # GH 36345 + df = DataFrame(data=[["A"]]) + msg = "``uuid_len`` must be an integer in range \\[0, 32\\]." + with pytest.raises(TypeError, match=msg): + Styler(df, uuid_len=len_, cell_ids=False).to_html() + + @pytest.mark.parametrize( + "slc", + [ + pd.IndexSlice[:, :], + pd.IndexSlice[:, 1], + pd.IndexSlice[1, :], + pd.IndexSlice[[1], [1]], + pd.IndexSlice[1, [1]], + pd.IndexSlice[[1], 1], + pd.IndexSlice[1], + pd.IndexSlice[1, 1], + slice(None, None, None), + [0, 1], + np.array([0, 1]), + pd.Series([0, 1]), + ], + ) + def test_non_reducing_slice(self, slc): + df = DataFrame([[0, 1], [2, 3]]) + + tslice_ = non_reducing_slice(slc) + assert isinstance(df.loc[tslice_], DataFrame) + + @pytest.mark.parametrize("box", [list, pd.Series, np.array]) + def test_list_slice(self, box): + # like dataframe getitem + subset = box(["A"]) + + df = DataFrame({"A": [1, 2], "B": [3, 4]}, index=["A", "B"]) + expected = pd.IndexSlice[:, ["A"]] + + result = non_reducing_slice(subset) + tm.assert_frame_equal(df.loc[result], df.loc[expected]) + + def test_non_reducing_slice_on_multiindex(self): + # GH 19861 + dic = { + ("a", "d"): [1, 4], + ("a", "c"): [2, 3], + ("b", "c"): [3, 2], + ("b", "d"): [4, 1], + } + df = DataFrame(dic, index=[0, 1]) + idx = pd.IndexSlice + slice_ = idx[:, idx["b", "d"]] + tslice_ = non_reducing_slice(slice_) + + result = df.loc[tslice_] + expected = DataFrame({("b", "d"): [4, 1]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "slice_", + [ + pd.IndexSlice[:, :], + # check cols + pd.IndexSlice[:, pd.IndexSlice[["a"]]], # inferred deeper need list + pd.IndexSlice[:, pd.IndexSlice[["a"], ["c"]]], # inferred deeper need list + pd.IndexSlice[:, pd.IndexSlice["a", "c", :]], + pd.IndexSlice[:, pd.IndexSlice["a", :, "e"]], + pd.IndexSlice[:, pd.IndexSlice[:, "c", "e"]], + pd.IndexSlice[:, pd.IndexSlice["a", ["c", "d"], :]], # check list + pd.IndexSlice[:, pd.IndexSlice["a", ["c", "d", "-"], :]], # allow missing + pd.IndexSlice[:, pd.IndexSlice["a", ["c", "d", "-"], "e"]], # no slice + # check rows + pd.IndexSlice[pd.IndexSlice[["U"]], :], # inferred deeper need list + pd.IndexSlice[pd.IndexSlice[["U"], ["W"]], :], # inferred deeper need list + pd.IndexSlice[pd.IndexSlice["U", "W", :], :], + pd.IndexSlice[pd.IndexSlice["U", :, "Y"], :], + pd.IndexSlice[pd.IndexSlice[:, "W", "Y"], :], + pd.IndexSlice[pd.IndexSlice[:, "W", ["Y", "Z"]], :], # check list + pd.IndexSlice[pd.IndexSlice[:, "W", ["Y", "Z", "-"]], :], # allow missing + pd.IndexSlice[pd.IndexSlice["U", "W", ["Y", "Z", "-"]], :], # no slice + # check simultaneous + pd.IndexSlice[pd.IndexSlice[:, "W", "Y"], pd.IndexSlice["a", "c", :]], + ], + ) + def test_non_reducing_multi_slice_on_multiindex(self, slice_): + # GH 33562 + cols = MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]) + idxs = MultiIndex.from_product([["U", "V"], ["W", "X"], ["Y", "Z"]]) + df = DataFrame(np.arange(64).reshape(8, 8), columns=cols, index=idxs) + + msg = "indexing on a MultiIndex with a nested sequence of labels" + warn = None + for lvl in [0, 1]: + key = slice_[lvl] + if isinstance(key, tuple): + for subkey in key: + if isinstance(subkey, list) and "-" in subkey: + # not present in the index level, ignored, will raise in future + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=msg): + expected = df.loc[slice_] + + with tm.assert_produces_warning(warn, match=msg): + result = df.loc[non_reducing_slice(slice_)] + tm.assert_frame_equal(result, expected) + + +def test_hidden_index_names(mi_df): + mi_df.index.names = ["Lev0", "Lev1"] + mi_styler = mi_df.style + ctx = mi_styler._translate(True, True) + assert len(ctx["head"]) == 3 # 2 column index levels + 1 index names row + + mi_styler.hide(axis="index", names=True) + ctx = mi_styler._translate(True, True) + assert len(ctx["head"]) == 2 # index names row is unparsed + for i in range(4): + assert ctx["body"][0][i]["is_visible"] # 2 index levels + 2 data values visible + + mi_styler.hide(axis="index", level=1) + ctx = mi_styler._translate(True, True) + assert len(ctx["head"]) == 2 # index names row is still hidden + assert ctx["body"][0][0]["is_visible"] is True + assert ctx["body"][0][1]["is_visible"] is False + + +def test_hidden_column_names(mi_df): + mi_df.columns.names = ["Lev0", "Lev1"] + mi_styler = mi_df.style + ctx = mi_styler._translate(True, True) + assert ctx["head"][0][1]["display_value"] == "Lev0" + assert ctx["head"][1][1]["display_value"] == "Lev1" + + mi_styler.hide(names=True, axis="columns") + ctx = mi_styler._translate(True, True) + assert ctx["head"][0][1]["display_value"] == " " + assert ctx["head"][1][1]["display_value"] == " " + + mi_styler.hide(level=0, axis="columns") + ctx = mi_styler._translate(True, True) + assert len(ctx["head"]) == 1 # no index names and only one visible column headers + assert ctx["head"][0][1]["display_value"] == " " + + +@pytest.mark.parametrize("caption", [1, ("a", "b", "c"), (1, "s")]) +def test_caption_raises(mi_styler, caption): + msg = "`caption` must be either a string or 2-tuple of strings." + with pytest.raises(ValueError, match=msg): + mi_styler.set_caption(caption) + + +def test_hiding_headers_over_index_no_sparsify(): + # GH 43464 + midx = MultiIndex.from_product([[1, 2], ["a", "a", "b"]]) + df = DataFrame(9, index=midx, columns=[0]) + ctx = df.style._translate(False, False) + assert len(ctx["body"]) == 6 + ctx = df.style.hide((1, "a"), axis=0)._translate(False, False) + assert len(ctx["body"]) == 4 + assert "row2" in ctx["body"][0][0]["class"] + + +def test_hiding_headers_over_columns_no_sparsify(): + # GH 43464 + midx = MultiIndex.from_product([[1, 2], ["a", "a", "b"]]) + df = DataFrame(9, columns=midx, index=[0]) + ctx = df.style._translate(False, False) + for ix in [(0, 1), (0, 2), (1, 1), (1, 2)]: + assert ctx["head"][ix[0]][ix[1]]["is_visible"] is True + ctx = df.style.hide((1, "a"), axis="columns")._translate(False, False) + for ix in [(0, 1), (0, 2), (1, 1), (1, 2)]: + assert ctx["head"][ix[0]][ix[1]]["is_visible"] is False + + +def test_get_level_lengths_mi_hidden(): + # GH 43464 + index = MultiIndex.from_arrays([[1, 1, 1, 2, 2, 2], ["a", "a", "b", "a", "a", "b"]]) + expected = { + (0, 2): 1, + (0, 3): 1, + (0, 4): 1, + (0, 5): 1, + (1, 2): 1, + (1, 3): 1, + (1, 4): 1, + (1, 5): 1, + } + result = _get_level_lengths( + index, + sparsify=False, + max_index=100, + hidden_elements=[0, 1, 0, 1], # hidden element can repeat if duplicated index + ) + tm.assert_dict_equal(result, expected) + + +def test_row_trimming_hide_index(): + # gh 43703 + df = DataFrame([[1], [2], [3], [4], [5]]) + with pd.option_context("styler.render.max_rows", 2): + ctx = df.style.hide([0, 1], axis="index")._translate(True, True) + assert len(ctx["body"]) == 3 + for r, val in enumerate(["3", "4", "..."]): + assert ctx["body"][r][1]["display_value"] == val + + +def test_row_trimming_hide_index_mi(): + # gh 44247 + df = DataFrame([[1], [2], [3], [4], [5]]) + df.index = MultiIndex.from_product([[0], [0, 1, 2, 3, 4]]) + with pd.option_context("styler.render.max_rows", 2): + ctx = df.style.hide([(0, 0), (0, 1)], axis="index")._translate(True, True) + assert len(ctx["body"]) == 3 + + # level 0 index headers (sparsified) + assert {"value": 0, "attributes": 'rowspan="2"', "is_visible": True}.items() <= ctx[ + "body" + ][0][0].items() + assert {"value": 0, "attributes": "", "is_visible": False}.items() <= ctx["body"][ + 1 + ][0].items() + assert {"value": "...", "is_visible": True}.items() <= ctx["body"][2][0].items() + + for r, val in enumerate(["2", "3", "..."]): + assert ctx["body"][r][1]["display_value"] == val # level 1 index headers + for r, val in enumerate(["3", "4", "..."]): + assert ctx["body"][r][2]["display_value"] == val # data values + + +def test_col_trimming_hide_columns(): + # gh 44272 + df = DataFrame([[1, 2, 3, 4, 5]]) + with pd.option_context("styler.render.max_columns", 2): + ctx = df.style.hide([0, 1], axis="columns")._translate(True, True) + + assert len(ctx["head"][0]) == 6 # blank, [0, 1 (hidden)], [2 ,3 (visible)], + trim + for c, vals in enumerate([(1, False), (2, True), (3, True), ("...", True)]): + assert ctx["head"][0][c + 2]["value"] == vals[0] + assert ctx["head"][0][c + 2]["is_visible"] == vals[1] + + assert len(ctx["body"][0]) == 6 # index + 2 hidden + 2 visible + trimming col diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_to_latex.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_to_latex.py new file mode 100644 index 0000000..145cd83 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_to_latex.py @@ -0,0 +1,992 @@ +from textwrap import dedent + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + option_context, +) + +pytest.importorskip("jinja2") +from pandas.io.formats.style import Styler +from pandas.io.formats.style_render import ( + _parse_latex_cell_styles, + _parse_latex_css_conversion, + _parse_latex_header_span, + _parse_latex_table_styles, + _parse_latex_table_wrapping, +) + + +@pytest.fixture +def df(): + return DataFrame({"A": [0, 1], "B": [-0.61, -1.22], "C": ["ab", "cd"]}) + + +@pytest.fixture +def df_ext(): + return DataFrame( + {"A": [0, 1, 2], "B": [-0.61, -1.22, -2.22], "C": ["ab", "cd", "de"]} + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0, precision=2) + + +def test_minimal_latex_tabular(styler): + expected = dedent( + """\ + \\begin{tabular}{lrrl} + & A & B & C \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + assert styler.to_latex() == expected + + +def test_tabular_hrules(styler): + expected = dedent( + """\ + \\begin{tabular}{lrrl} + \\toprule + & A & B & C \\\\ + \\midrule + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\bottomrule + \\end{tabular} + """ + ) + assert styler.to_latex(hrules=True) == expected + + +def test_tabular_custom_hrules(styler): + styler.set_table_styles( + [ + {"selector": "toprule", "props": ":hline"}, + {"selector": "bottomrule", "props": ":otherline"}, + ] + ) # no midrule + expected = dedent( + """\ + \\begin{tabular}{lrrl} + \\hline + & A & B & C \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\otherline + \\end{tabular} + """ + ) + assert styler.to_latex() == expected + + +def test_column_format(styler): + # default setting is already tested in `test_latex_minimal_tabular` + styler.set_table_styles([{"selector": "column_format", "props": ":cccc"}]) + + assert "\\begin{tabular}{rrrr}" in styler.to_latex(column_format="rrrr") + styler.set_table_styles([{"selector": "column_format", "props": ":r|r|cc"}]) + assert "\\begin{tabular}{r|r|cc}" in styler.to_latex() + + +def test_siunitx_cols(styler): + expected = dedent( + """\ + \\begin{tabular}{lSSl} + {} & {A} & {B} & {C} \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + assert styler.to_latex(siunitx=True) == expected + + +def test_position(styler): + assert "\\begin{table}[h!]" in styler.to_latex(position="h!") + assert "\\end{table}" in styler.to_latex(position="h!") + styler.set_table_styles([{"selector": "position", "props": ":b!"}]) + assert "\\begin{table}[b!]" in styler.to_latex() + assert "\\end{table}" in styler.to_latex() + + +@pytest.mark.parametrize("env", [None, "longtable"]) +def test_label(styler, env): + assert "\n\\label{text}" in styler.to_latex(label="text", environment=env) + styler.set_table_styles([{"selector": "label", "props": ":{more §text}"}]) + assert "\n\\label{more :text}" in styler.to_latex(environment=env) + + +def test_position_float_raises(styler): + msg = "`position_float` should be one of 'raggedright', 'raggedleft', 'centering'," + with pytest.raises(ValueError, match=msg): + styler.to_latex(position_float="bad_string") + + msg = "`position_float` cannot be used in 'longtable' `environment`" + with pytest.raises(ValueError, match=msg): + styler.to_latex(position_float="centering", environment="longtable") + + +@pytest.mark.parametrize("label", [(None, ""), ("text", "\\label{text}")]) +@pytest.mark.parametrize("position", [(None, ""), ("h!", "{table}[h!]")]) +@pytest.mark.parametrize("caption", [(None, ""), ("text", "\\caption{text}")]) +@pytest.mark.parametrize("column_format", [(None, ""), ("rcrl", "{tabular}{rcrl}")]) +@pytest.mark.parametrize("position_float", [(None, ""), ("centering", "\\centering")]) +def test_kwargs_combinations( + styler, label, position, caption, column_format, position_float +): + result = styler.to_latex( + label=label[0], + position=position[0], + caption=caption[0], + column_format=column_format[0], + position_float=position_float[0], + ) + assert label[1] in result + assert position[1] in result + assert caption[1] in result + assert column_format[1] in result + assert position_float[1] in result + + +def test_custom_table_styles(styler): + styler.set_table_styles( + [ + {"selector": "mycommand", "props": ":{myoptions}"}, + {"selector": "mycommand2", "props": ":{myoptions2}"}, + ] + ) + expected = dedent( + """\ + \\begin{table} + \\mycommand{myoptions} + \\mycommand2{myoptions2} + """ + ) + assert expected in styler.to_latex() + + +def test_cell_styling(styler): + styler.highlight_max(props="itshape:;Huge:--wrap;") + expected = dedent( + """\ + \\begin{tabular}{lrrl} + & A & B & C \\\\ + 0 & 0 & \\itshape {\\Huge -0.61} & ab \\\\ + 1 & \\itshape {\\Huge 1} & -1.22 & \\itshape {\\Huge cd} \\\\ + \\end{tabular} + """ + ) + assert expected == styler.to_latex() + + +def test_multiindex_columns(df): + cidx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df.columns = cidx + expected = dedent( + """\ + \\begin{tabular}{lrrl} + & \\multicolumn{2}{r}{A} & B \\\\ + & a & b & c \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + s = df.style.format(precision=2) + assert expected == s.to_latex() + + # non-sparse + expected = dedent( + """\ + \\begin{tabular}{lrrl} + & A & A & B \\\\ + & a & b & c \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + s = df.style.format(precision=2) + assert expected == s.to_latex(sparse_columns=False) + + +def test_multiindex_row(df_ext): + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index = ridx + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & A & B & C \\\\ + \\multirow[c]{2}{*}{A} & a & 0 & -0.61 & ab \\\\ + & b & 1 & -1.22 & cd \\\\ + B & c & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + styler = df_ext.style.format(precision=2) + result = styler.to_latex() + assert expected == result + + # non-sparse + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & A & B & C \\\\ + A & a & 0 & -0.61 & ab \\\\ + A & b & 1 & -1.22 & cd \\\\ + B & c & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + result = styler.to_latex(sparse_index=False) + assert expected == result + + +def test_multirow_naive(df_ext): + ridx = MultiIndex.from_tuples([("X", "x"), ("X", "y"), ("Y", "z")]) + df_ext.index = ridx + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & A & B & C \\\\ + X & x & 0 & -0.61 & ab \\\\ + & y & 1 & -1.22 & cd \\\\ + Y & z & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + styler = df_ext.style.format(precision=2) + result = styler.to_latex(multirow_align="naive") + assert expected == result + + +def test_multiindex_row_and_col(df_ext): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & \\multicolumn{2}{l}{Z} & Y \\\\ + & & a & b & c \\\\ + \\multirow[b]{2}{*}{A} & a & 0 & -0.61 & ab \\\\ + & b & 1 & -1.22 & cd \\\\ + B & c & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + styler = df_ext.style.format(precision=2) + result = styler.to_latex(multirow_align="b", multicol_align="l") + assert result == expected + + # non-sparse + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & Z & Z & Y \\\\ + & & a & b & c \\\\ + A & a & 0 & -0.61 & ab \\\\ + A & b & 1 & -1.22 & cd \\\\ + B & c & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + result = styler.to_latex(sparse_index=False, sparse_columns=False) + assert result == expected + + +@pytest.mark.parametrize( + "multicol_align, siunitx, header", + [ + ("naive-l", False, " & A & &"), + ("naive-r", False, " & & & A"), + ("naive-l", True, "{} & {A} & {} & {}"), + ("naive-r", True, "{} & {} & {} & {A}"), + ], +) +def test_multicol_naive(df, multicol_align, siunitx, header): + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")]) + df.columns = ridx + level1 = " & a & b & c" if not siunitx else "{} & {a} & {b} & {c}" + col_format = "lrrl" if not siunitx else "lSSl" + expected = dedent( + f"""\ + \\begin{{tabular}}{{{col_format}}} + {header} \\\\ + {level1} \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{{tabular}} + """ + ) + styler = df.style.format(precision=2) + result = styler.to_latex(multicol_align=multicol_align, siunitx=siunitx) + assert expected == result + + +def test_multi_options(df_ext): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + styler = df_ext.style.format(precision=2) + + expected = dedent( + """\ + & & \\multicolumn{2}{r}{Z} & Y \\\\ + & & a & b & c \\\\ + \\multirow[c]{2}{*}{A} & a & 0 & -0.61 & ab \\\\ + """ + ) + result = styler.to_latex() + assert expected in result + + with option_context("styler.latex.multicol_align", "l"): + assert " & & \\multicolumn{2}{l}{Z} & Y \\\\" in styler.to_latex() + + with option_context("styler.latex.multirow_align", "b"): + assert "\\multirow[b]{2}{*}{A} & a & 0 & -0.61 & ab \\\\" in styler.to_latex() + + +def test_multiindex_columns_hidden(): + df = DataFrame([[1, 2, 3, 4]]) + df.columns = MultiIndex.from_tuples([("A", 1), ("A", 2), ("A", 3), ("B", 1)]) + s = df.style + assert "{tabular}{lrrrr}" in s.to_latex() + s.set_table_styles([]) # reset the position command + s.hide([("A", 2)], axis="columns") + assert "{tabular}{lrrr}" in s.to_latex() + + +@pytest.mark.parametrize( + "option, value", + [ + ("styler.sparse.index", True), + ("styler.sparse.index", False), + ("styler.sparse.columns", True), + ("styler.sparse.columns", False), + ], +) +def test_sparse_options(df_ext, option, value): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + styler = df_ext.style + + latex1 = styler.to_latex() + with option_context(option, value): + latex2 = styler.to_latex() + assert (latex1 == latex2) is value + + +def test_hidden_index(styler): + styler.hide(axis="index") + expected = dedent( + """\ + \\begin{tabular}{rrl} + A & B & C \\\\ + 0 & -0.61 & ab \\\\ + 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + assert styler.to_latex() == expected + + +@pytest.mark.parametrize("environment", ["table", "figure*", None]) +def test_comprehensive(df_ext, environment): + # test as many low level features simultaneously as possible + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + stlr = df_ext.style + stlr.set_caption("mycap") + stlr.set_table_styles( + [ + {"selector": "label", "props": ":{fig§item}"}, + {"selector": "position", "props": ":h!"}, + {"selector": "position_float", "props": ":centering"}, + {"selector": "column_format", "props": ":rlrlr"}, + {"selector": "toprule", "props": ":toprule"}, + {"selector": "midrule", "props": ":midrule"}, + {"selector": "bottomrule", "props": ":bottomrule"}, + {"selector": "rowcolors", "props": ":{3}{pink}{}"}, # custom command + ] + ) + stlr.highlight_max(axis=0, props="textbf:--rwrap;cellcolor:[rgb]{1,1,0.6}--rwrap") + stlr.highlight_max(axis=None, props="Huge:--wrap;", subset=[("Z", "a"), ("Z", "b")]) + + expected = ( + """\ +\\begin{table}[h!] +\\centering +\\caption{mycap} +\\label{fig:item} +\\rowcolors{3}{pink}{} +\\begin{tabular}{rlrlr} +\\toprule + & & \\multicolumn{2}{r}{Z} & Y \\\\ + & & a & b & c \\\\ +\\midrule +\\multirow[c]{2}{*}{A} & a & 0 & \\textbf{\\cellcolor[rgb]{1,1,0.6}{-0.61}} & ab \\\\ + & b & 1 & -1.22 & cd \\\\ +B & c & \\textbf{\\cellcolor[rgb]{1,1,0.6}{{\\Huge 2}}} & -2.22 & """ + """\ +\\textbf{\\cellcolor[rgb]{1,1,0.6}{de}} \\\\ +\\bottomrule +\\end{tabular} +\\end{table} +""" + ).replace("table", environment if environment else "table") + result = stlr.format(precision=2).to_latex(environment=environment) + assert result == expected + + +def test_environment_option(styler): + with option_context("styler.latex.environment", "bar-env"): + assert "\\begin{bar-env}" in styler.to_latex() + assert "\\begin{foo-env}" in styler.to_latex(environment="foo-env") + + +def test_parse_latex_table_styles(styler): + styler.set_table_styles( + [ + {"selector": "foo", "props": [("attr", "value")]}, + {"selector": "bar", "props": [("attr", "overwritten")]}, + {"selector": "bar", "props": [("attr", "baz"), ("attr2", "ignored")]}, + {"selector": "label", "props": [("", "{fig§item}")]}, + ] + ) + assert _parse_latex_table_styles(styler.table_styles, "bar") == "baz" + + # test '§' replaced by ':' [for CSS compatibility] + assert _parse_latex_table_styles(styler.table_styles, "label") == "{fig:item}" + + +def test_parse_latex_cell_styles_basic(): # test nesting + cell_style = [("itshape", "--rwrap"), ("cellcolor", "[rgb]{0,1,1}--rwrap")] + expected = "\\itshape{\\cellcolor[rgb]{0,1,1}{text}}" + assert _parse_latex_cell_styles(cell_style, "text") == expected + + +@pytest.mark.parametrize( + "wrap_arg, expected", + [ # test wrapping + ("", "\\ "), + ("--wrap", "{\\ }"), + ("--nowrap", "\\ "), + ("--lwrap", "{\\} "), + ("--dwrap", "{\\}{}"), + ("--rwrap", "\\{}"), + ], +) +def test_parse_latex_cell_styles_braces(wrap_arg, expected): + cell_style = [("", f"{wrap_arg}")] + assert _parse_latex_cell_styles(cell_style, "") == expected + + +def test_parse_latex_header_span(): + cell = {"attributes": 'colspan="3"', "display_value": "text", "cellstyle": []} + expected = "\\multicolumn{3}{Y}{text}" + assert _parse_latex_header_span(cell, "X", "Y") == expected + + cell = {"attributes": 'rowspan="5"', "display_value": "text", "cellstyle": []} + expected = "\\multirow[X]{5}{*}{text}" + assert _parse_latex_header_span(cell, "X", "Y") == expected + + cell = {"display_value": "text", "cellstyle": []} + assert _parse_latex_header_span(cell, "X", "Y") == "text" + + cell = {"display_value": "text", "cellstyle": [("bfseries", "--rwrap")]} + assert _parse_latex_header_span(cell, "X", "Y") == "\\bfseries{text}" + + +def test_parse_latex_table_wrapping(styler): + styler.set_table_styles( + [ + {"selector": "toprule", "props": ":value"}, + {"selector": "bottomrule", "props": ":value"}, + {"selector": "midrule", "props": ":value"}, + {"selector": "column_format", "props": ":value"}, + ] + ) + assert _parse_latex_table_wrapping(styler.table_styles, styler.caption) is False + assert _parse_latex_table_wrapping(styler.table_styles, "some caption") is True + styler.set_table_styles( + [ + {"selector": "not-ignored", "props": ":value"}, + ], + overwrite=False, + ) + assert _parse_latex_table_wrapping(styler.table_styles, None) is True + + +def test_short_caption(styler): + result = styler.to_latex(caption=("full cap", "short cap")) + assert "\\caption[short cap]{full cap}" in result + + +@pytest.mark.parametrize( + "css, expected", + [ + ([("color", "red")], [("color", "{red}")]), # test color and input format types + ( + [("color", "rgb(128, 128, 128 )")], + [("color", "[rgb]{0.502, 0.502, 0.502}")], + ), + ( + [("color", "rgb(128, 50%, 25% )")], + [("color", "[rgb]{0.502, 0.500, 0.250}")], + ), + ( + [("color", "rgba(128,128,128,1)")], + [("color", "[rgb]{0.502, 0.502, 0.502}")], + ), + ([("color", "#FF00FF")], [("color", "[HTML]{FF00FF}")]), + ([("color", "#F0F")], [("color", "[HTML]{FF00FF}")]), + ([("font-weight", "bold")], [("bfseries", "")]), # test font-weight and types + ([("font-weight", "bolder")], [("bfseries", "")]), + ([("font-weight", "normal")], []), + ([("background-color", "red")], [("cellcolor", "{red}--lwrap")]), + ( + [("background-color", "#FF00FF")], # test background-color command and wrap + [("cellcolor", "[HTML]{FF00FF}--lwrap")], + ), + ([("font-style", "italic")], [("itshape", "")]), # test font-style and types + ([("font-style", "oblique")], [("slshape", "")]), + ([("font-style", "normal")], []), + ([("color", "red /*--dwrap*/")], [("color", "{red}--dwrap")]), # css comments + ([("background-color", "red /* --dwrap */")], [("cellcolor", "{red}--dwrap")]), + ], +) +def test_parse_latex_css_conversion(css, expected): + result = _parse_latex_css_conversion(css) + assert result == expected + + +@pytest.mark.parametrize( + "env, inner_env", + [ + (None, "tabular"), + ("table", "tabular"), + ("longtable", "longtable"), + ], +) +@pytest.mark.parametrize( + "convert, exp", [(True, "bfseries"), (False, "font-weightbold")] +) +def test_parse_latex_css_convert_minimal(styler, env, inner_env, convert, exp): + # parameters ensure longtable template is also tested + styler.highlight_max(props="font-weight:bold;") + result = styler.to_latex(convert_css=convert, environment=env) + expected = dedent( + f"""\ + 0 & 0 & \\{exp} -0.61 & ab \\\\ + 1 & \\{exp} 1 & -1.22 & \\{exp} cd \\\\ + \\end{{{inner_env}}} + """ + ) + assert expected in result + + +def test_parse_latex_css_conversion_option(): + css = [("command", "option--latex--wrap")] + expected = [("command", "option--wrap")] + result = _parse_latex_css_conversion(css) + assert result == expected + + +def test_styler_object_after_render(styler): + # GH 42320 + pre_render = styler._copy(deepcopy=True) + styler.to_latex( + column_format="rllr", + position="h", + position_float="centering", + hrules=True, + label="my lab", + caption="my cap", + ) + + assert pre_render.table_styles == styler.table_styles + assert pre_render.caption == styler.caption + + +def test_longtable_comprehensive(styler): + result = styler.to_latex( + environment="longtable", hrules=True, label="fig:A", caption=("full", "short") + ) + expected = dedent( + """\ + \\begin{longtable}{lrrl} + \\caption[short]{full} \\label{fig:A} \\\\ + \\toprule + & A & B & C \\\\ + \\midrule + \\endfirsthead + \\caption[]{full} \\\\ + \\toprule + & A & B & C \\\\ + \\midrule + \\endhead + \\midrule + \\multicolumn{4}{r}{Continued on next page} \\\\ + \\midrule + \\endfoot + \\bottomrule + \\endlastfoot + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{longtable} + """ + ) + assert result == expected + + +def test_longtable_minimal(styler): + result = styler.to_latex(environment="longtable") + expected = dedent( + """\ + \\begin{longtable}{lrrl} + & A & B & C \\\\ + \\endfirsthead + & A & B & C \\\\ + \\endhead + \\multicolumn{4}{r}{Continued on next page} \\\\ + \\endfoot + \\endlastfoot + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{longtable} + """ + ) + assert result == expected + + +@pytest.mark.parametrize( + "sparse, exp, siunitx", + [ + (True, "{} & \\multicolumn{2}{r}{A} & {B}", True), + (False, "{} & {A} & {A} & {B}", True), + (True, " & \\multicolumn{2}{r}{A} & B", False), + (False, " & A & A & B", False), + ], +) +def test_longtable_multiindex_columns(df, sparse, exp, siunitx): + cidx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df.columns = cidx + with_si = "{} & {a} & {b} & {c} \\\\" + without_si = " & a & b & c \\\\" + expected = dedent( + f"""\ + \\begin{{longtable}}{{l{"SS" if siunitx else "rr"}l}} + {exp} \\\\ + {with_si if siunitx else without_si} + \\endfirsthead + {exp} \\\\ + {with_si if siunitx else without_si} + \\endhead + """ + ) + result = df.style.to_latex( + environment="longtable", sparse_columns=sparse, siunitx=siunitx + ) + assert expected in result + + +@pytest.mark.parametrize( + "caption, cap_exp", + [ + ("full", ("{full}", "")), + (("full", "short"), ("{full}", "[short]")), + ], +) +@pytest.mark.parametrize("label, lab_exp", [(None, ""), ("tab:A", " \\label{tab:A}")]) +def test_longtable_caption_label(styler, caption, cap_exp, label, lab_exp): + cap_exp1 = f"\\caption{cap_exp[1]}{cap_exp[0]}" + cap_exp2 = f"\\caption[]{cap_exp[0]}" + + expected = dedent( + f"""\ + {cap_exp1}{lab_exp} \\\\ + & A & B & C \\\\ + \\endfirsthead + {cap_exp2} \\\\ + """ + ) + assert expected in styler.to_latex( + environment="longtable", caption=caption, label=label + ) + + +@pytest.mark.parametrize("index", [True, False]) +@pytest.mark.parametrize( + "columns, siunitx", + [ + (True, True), + (True, False), + (False, False), + ], +) +def test_apply_map_header_render_mi(df_ext, index, columns, siunitx): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + styler = df_ext.style + + func = lambda v: "bfseries: --rwrap" if "A" in v or "Z" in v or "c" in v else None + + if index: + styler.applymap_index(func, axis="index") + if columns: + styler.applymap_index(func, axis="columns") + + result = styler.to_latex(siunitx=siunitx) + + expected_index = dedent( + """\ + \\multirow[c]{2}{*}{\\bfseries{A}} & a & 0 & -0.610000 & ab \\\\ + \\bfseries{} & b & 1 & -1.220000 & cd \\\\ + B & \\bfseries{c} & 2 & -2.220000 & de \\\\ + """ + ) + assert (expected_index in result) is index + + exp_cols_si = dedent( + """\ + {} & {} & \\multicolumn{2}{r}{\\bfseries{Z}} & {Y} \\\\ + {} & {} & {a} & {b} & {\\bfseries{c}} \\\\ + """ + ) + exp_cols_no_si = """\ + & & \\multicolumn{2}{r}{\\bfseries{Z}} & Y \\\\ + & & a & b & \\bfseries{c} \\\\ +""" + assert ((exp_cols_si if siunitx else exp_cols_no_si) in result) is columns + + +def test_repr_option(styler): + assert "" not in df._repr_html_() + + with option_context("display.max_rows", 12, "display.min_rows", None): + # when set to None, follow value of max_rows + assert "5 5" in repr(df) + assert "" in df._repr_html_() + + with option_context("display.max_rows", 10, "display.min_rows", 12): + # when set value higher as max_rows, use the minimum + assert "5 5" not in repr(df) + assert "" not in df._repr_html_() + + with option_context("display.max_rows", None, "display.min_rows", 12): + # max_rows of None -> never truncate + assert ".." not in repr(df) + assert ".." not in df._repr_html_() + + def test_str_max_colwidth(self): + # GH 7856 + df = DataFrame( + [ + { + "a": "foo", + "b": "bar", + "c": "uncomfortably long line with lots of stuff", + "d": 1, + }, + {"a": "foo", "b": "bar", "c": "stuff", "d": 1}, + ] + ) + df.set_index(["a", "b", "c"]) + assert str(df) == ( + " a b c d\n" + "0 foo bar uncomfortably long line with lots of stuff 1\n" + "1 foo bar stuff 1" + ) + with option_context("max_colwidth", 20): + assert str(df) == ( + " a b c d\n" + "0 foo bar uncomfortably lo... 1\n" + "1 foo bar stuff 1" + ) + + def test_auto_detect(self): + term_width, term_height = get_terminal_size() + fac = 1.05 # Arbitrary large factor to exceed term width + cols = range(int(term_width * fac)) + index = range(10) + df = DataFrame(index=index, columns=cols) + with option_context("mode.sim_interactive", True): + with option_context("display.max_rows", None): + with option_context("display.max_columns", None): + # Wrap around with None + assert has_expanded_repr(df) + with option_context("display.max_rows", 0): + with option_context("display.max_columns", 0): + # Truncate with auto detection. + assert has_horizontally_truncated_repr(df) + + index = range(int(term_height * fac)) + df = DataFrame(index=index, columns=cols) + with option_context("display.max_rows", 0): + with option_context("display.max_columns", None): + # Wrap around with None + assert has_expanded_repr(df) + # Truncate vertically + assert has_vertically_truncated_repr(df) + + with option_context("display.max_rows", None): + with option_context("display.max_columns", 0): + assert has_horizontally_truncated_repr(df) + + def test_to_string_repr_unicode(self): + buf = StringIO() + + unicode_values = ["\u03c3"] * 10 + unicode_values = np.array(unicode_values, dtype=object) + df = DataFrame({"unicode": unicode_values}) + df.to_string(col_space=10, buf=buf) + + # it works! + repr(df) + + idx = Index(["abc", "\u03c3a", "aegdvg"]) + ser = Series(np.random.randn(len(idx)), idx) + rs = repr(ser).split("\n") + line_len = len(rs[0]) + for line in rs[1:]: + try: + line = line.decode(get_option("display.encoding")) + except AttributeError: + pass + if not line.startswith("dtype:"): + assert len(line) == line_len + + # it works even if sys.stdin in None + _stdin = sys.stdin + try: + sys.stdin = None + repr(df) + finally: + sys.stdin = _stdin + + def test_east_asian_unicode_false(self): + # not aligned properly because of east asian width + + # mid col + df = DataFrame( + {"a": ["あ", "いいい", "う", "ええええええ"], "b": [1, 222, 33333, 4]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " a b\na あ 1\n" + "bb いいい 222\nc う 33333\n" + "ddd ええええええ 4" + ) + assert repr(df) == expected + + # last col + df = DataFrame( + {"a": [1, 222, 33333, 4], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " a b\na 1 あ\n" + "bb 222 いいい\nc 33333 う\n" + "ddd 4 ええええええ" + ) + assert repr(df) == expected + + # all col + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " a b\na あああああ あ\n" + "bb い いいい\nc う う\n" + "ddd えええ ええええええ" + ) + assert repr(df) == expected + + # column name + df = DataFrame( + {"b": ["あ", "いいい", "う", "ええええええ"], "あああああ": [1, 222, 33333, 4]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " b あああああ\na あ 1\n" + "bb いいい 222\nc う 33333\n" + "ddd ええええええ 4" + ) + assert repr(df) == expected + + # index + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=["あああ", "いいいいいい", "うう", "え"], + ) + expected = ( + " a b\nあああ あああああ あ\n" + "いいいいいい い いいい\nうう う う\n" + "え えええ ええええええ" + ) + assert repr(df) == expected + + # index name + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=Index(["あ", "い", "うう", "え"], name="おおおお"), + ) + expected = ( + " a b\n" + "おおおお \n" + "あ あああああ あ\n" + "い い いいい\n" + "うう う う\n" + "え えええ ええええええ" + ) + assert repr(df) == expected + + # all + df = DataFrame( + {"あああ": ["あああ", "い", "う", "えええええ"], "いいいいい": ["あ", "いいい", "う", "ええ"]}, + index=Index(["あ", "いいい", "うう", "え"], name="お"), + ) + expected = ( + " あああ いいいいい\n" + "お \n" + "あ あああ あ\n" + "いいい い いいい\n" + "うう う う\n" + "え えええええ ええ" + ) + assert repr(df) == expected + + # MultiIndex + idx = MultiIndex.from_tuples( + [("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")] + ) + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=idx, + ) + expected = ( + " a b\n" + "あ いい あああああ あ\n" + "う え い いいい\n" + "おおお かかかか う う\n" + "き くく えええ ええええええ" + ) + assert repr(df) == expected + + # truncate + with option_context("display.max_rows", 3, "display.max_columns", 3): + df = DataFrame( + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + "c": ["お", "か", "ききき", "くくくくくく"], + "ああああ": ["さ", "し", "す", "せ"], + }, + columns=["a", "b", "c", "ああああ"], + ) + + expected = ( + " a ... ああああ\n0 あああああ ... さ\n" + ".. ... ... ...\n3 えええ ... せ\n" + "\n[4 rows x 4 columns]" + ) + assert repr(df) == expected + + df.index = ["あああ", "いいいい", "う", "aaa"] + expected = ( + " a ... ああああ\nあああ あああああ ... さ\n" + ".. ... ... ...\naaa えええ ... せ\n" + "\n[4 rows x 4 columns]" + ) + assert repr(df) == expected + + def test_east_asian_unicode_true(self): + # Enable Unicode option ----------------------------------------- + with option_context("display.unicode.east_asian_width", True): + + # mid col + df = DataFrame( + {"a": ["あ", "いいい", "う", "ええええええ"], "b": [1, 222, 33333, 4]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " a b\na あ 1\n" + "bb いいい 222\nc う 33333\n" + "ddd ええええええ 4" + ) + assert repr(df) == expected + + # last col + df = DataFrame( + {"a": [1, 222, 33333, 4], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " a b\na 1 あ\n" + "bb 222 いいい\nc 33333 う\n" + "ddd 4 ええええええ" + ) + assert repr(df) == expected + + # all col + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " a b\n" + "a あああああ あ\n" + "bb い いいい\n" + "c う う\n" + "ddd えええ ええええええ" + ) + assert repr(df) == expected + + # column name + df = DataFrame( + {"b": ["あ", "いいい", "う", "ええええええ"], "あああああ": [1, 222, 33333, 4]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " b あああああ\n" + "a あ 1\n" + "bb いいい 222\n" + "c う 33333\n" + "ddd ええええええ 4" + ) + assert repr(df) == expected + + # index + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=["あああ", "いいいいいい", "うう", "え"], + ) + expected = ( + " a b\n" + "あああ あああああ あ\n" + "いいいいいい い いいい\n" + "うう う う\n" + "え えええ ええええええ" + ) + assert repr(df) == expected + + # index name + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=Index(["あ", "い", "うう", "え"], name="おおおお"), + ) + expected = ( + " a b\n" + "おおおお \n" + "あ あああああ あ\n" + "い い いいい\n" + "うう う う\n" + "え えええ ええええええ" + ) + assert repr(df) == expected + + # all + df = DataFrame( + {"あああ": ["あああ", "い", "う", "えええええ"], "いいいいい": ["あ", "いいい", "う", "ええ"]}, + index=Index(["あ", "いいい", "うう", "え"], name="お"), + ) + expected = ( + " あああ いいいいい\n" + "お \n" + "あ あああ あ\n" + "いいい い いいい\n" + "うう う う\n" + "え えええええ ええ" + ) + assert repr(df) == expected + + # MultiIndex + idx = MultiIndex.from_tuples( + [("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")] + ) + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=idx, + ) + expected = ( + " a b\n" + "あ いい あああああ あ\n" + "う え い いいい\n" + "おおお かかかか う う\n" + "き くく えええ ええええええ" + ) + assert repr(df) == expected + + # truncate + with option_context("display.max_rows", 3, "display.max_columns", 3): + + df = DataFrame( + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + "c": ["お", "か", "ききき", "くくくくくく"], + "ああああ": ["さ", "し", "す", "せ"], + }, + columns=["a", "b", "c", "ああああ"], + ) + + expected = ( + " a ... ああああ\n" + "0 あああああ ... さ\n" + ".. ... ... ...\n" + "3 えええ ... せ\n" + "\n[4 rows x 4 columns]" + ) + assert repr(df) == expected + + df.index = ["あああ", "いいいい", "う", "aaa"] + expected = ( + " a ... ああああ\n" + "あああ あああああ ... さ\n" + "... ... ... ...\n" + "aaa えええ ... せ\n" + "\n[4 rows x 4 columns]" + ) + assert repr(df) == expected + + # ambiguous unicode + df = DataFrame( + {"b": ["あ", "いいい", "¡¡", "ええええええ"], "あああああ": [1, 222, 33333, 4]}, + index=["a", "bb", "c", "¡¡¡"], + ) + expected = ( + " b あああああ\n" + "a あ 1\n" + "bb いいい 222\n" + "c ¡¡ 33333\n" + "¡¡¡ ええええええ 4" + ) + assert repr(df) == expected + + def test_to_string_buffer_all_unicode(self): + buf = StringIO() + + empty = DataFrame({"c/\u03c3": Series(dtype=object)}) + nonempty = DataFrame({"c/\u03c3": Series([1, 2, 3])}) + + print(empty, file=buf) + print(nonempty, file=buf) + + # this should work + buf.getvalue() + + def test_to_string_with_col_space(self): + df = DataFrame(np.random.random(size=(1, 3))) + c10 = len(df.to_string(col_space=10).split("\n")[1]) + c20 = len(df.to_string(col_space=20).split("\n")[1]) + c30 = len(df.to_string(col_space=30).split("\n")[1]) + assert c10 < c20 < c30 + + # GH 8230 + # col_space wasn't being applied with header=False + with_header = df.to_string(col_space=20) + with_header_row1 = with_header.splitlines()[1] + no_header = df.to_string(col_space=20, header=False) + assert len(with_header_row1) == len(no_header) + + def test_to_string_with_column_specific_col_space_raises(self): + df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + + msg = ( + "Col_space length\\(\\d+\\) should match " + "DataFrame number of columns\\(\\d+\\)" + ) + with pytest.raises(ValueError, match=msg): + df.to_string(col_space=[30, 40]) + + with pytest.raises(ValueError, match=msg): + df.to_string(col_space=[30, 40, 50, 60]) + + msg = "unknown column" + with pytest.raises(ValueError, match=msg): + df.to_string(col_space={"a": "foo", "b": 23, "d": 34}) + + def test_to_string_with_column_specific_col_space(self): + df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + + result = df.to_string(col_space={"a": 10, "b": 11, "c": 12}) + # 3 separating space + each col_space for (id, a, b, c) + assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12) + + result = df.to_string(col_space=[10, 11, 12]) + assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12) + + def test_to_string_truncate_indices(self): + for index in [ + tm.makeStringIndex, + tm.makeUnicodeIndex, + tm.makeIntIndex, + tm.makeDateIndex, + tm.makePeriodIndex, + ]: + for column in [tm.makeStringIndex]: + for h in [10, 20]: + for w in [10, 20]: + with option_context("display.expand_frame_repr", False): + df = DataFrame(index=index(h), columns=column(w)) + with option_context("display.max_rows", 15): + if h == 20: + assert has_vertically_truncated_repr(df) + else: + assert not has_vertically_truncated_repr(df) + with option_context("display.max_columns", 15): + if w == 20: + assert has_horizontally_truncated_repr(df) + else: + assert not (has_horizontally_truncated_repr(df)) + with option_context( + "display.max_rows", 15, "display.max_columns", 15 + ): + if h == 20 and w == 20: + assert has_doubly_truncated_repr(df) + else: + assert not has_doubly_truncated_repr(df) + + def test_to_string_truncate_multilevel(self): + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + df = DataFrame(index=arrays, columns=arrays) + with option_context("display.max_rows", 7, "display.max_columns", 7): + assert has_doubly_truncated_repr(df) + + def test_truncate_with_different_dtypes(self): + + # 11594, 12045 + # when truncated the dtypes of the splits can differ + + # 11594 + import datetime + + s = Series( + [datetime.datetime(2012, 1, 1)] * 10 + + [datetime.datetime(1012, 1, 2)] + + [datetime.datetime(2012, 1, 3)] * 10 + ) + + with option_context("display.max_rows", 8): + result = str(s) + assert "object" in result + + # 12045 + df = DataFrame({"text": ["some words"] + [None] * 9}) + + with option_context("display.max_rows", 8, "display.max_columns", 3): + result = str(df) + assert "None" in result + assert "NaN" not in result + + def test_truncate_with_different_dtypes_multiindex(self): + # GH#13000 + df = DataFrame({"Vals": range(100)}) + frame = pd.concat([df], keys=["Sweep"], names=["Sweep", "Index"]) + result = repr(frame) + + result2 = repr(frame.iloc[:5]) + assert result.startswith(result2) + + def test_datetimelike_frame(self): + + # GH 12211 + df = DataFrame({"date": [Timestamp("20130101").tz_localize("UTC")] + [NaT] * 5}) + + with option_context("display.max_rows", 5): + result = str(df) + assert "2013-01-01 00:00:00+00:00" in result + assert "NaT" in result + assert "..." in result + assert "[6 rows x 1 columns]" in result + + dts = [Timestamp("2011-01-01", tz="US/Eastern")] * 5 + [NaT] * 5 + df = DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) + with option_context("display.max_rows", 5): + expected = ( + " dt x\n" + "0 2011-01-01 00:00:00-05:00 1\n" + "1 2011-01-01 00:00:00-05:00 2\n" + ".. ... ..\n" + "8 NaT 9\n" + "9 NaT 10\n\n" + "[10 rows x 2 columns]" + ) + assert repr(df) == expected + + dts = [NaT] * 5 + [Timestamp("2011-01-01", tz="US/Eastern")] * 5 + df = DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) + with option_context("display.max_rows", 5): + expected = ( + " dt x\n" + "0 NaT 1\n" + "1 NaT 2\n" + ".. ... ..\n" + "8 2011-01-01 00:00:00-05:00 9\n" + "9 2011-01-01 00:00:00-05:00 10\n\n" + "[10 rows x 2 columns]" + ) + assert repr(df) == expected + + dts = [Timestamp("2011-01-01", tz="Asia/Tokyo")] * 5 + [ + Timestamp("2011-01-01", tz="US/Eastern") + ] * 5 + df = DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) + with option_context("display.max_rows", 5): + expected = ( + " dt x\n" + "0 2011-01-01 00:00:00+09:00 1\n" + "1 2011-01-01 00:00:00+09:00 2\n" + ".. ... ..\n" + "8 2011-01-01 00:00:00-05:00 9\n" + "9 2011-01-01 00:00:00-05:00 10\n\n" + "[10 rows x 2 columns]" + ) + assert repr(df) == expected + + @pytest.mark.parametrize( + "start_date", + [ + "2017-01-01 23:59:59.999999999", + "2017-01-01 23:59:59.99999999", + "2017-01-01 23:59:59.9999999", + "2017-01-01 23:59:59.999999", + "2017-01-01 23:59:59.99999", + "2017-01-01 23:59:59.9999", + ], + ) + def test_datetimeindex_highprecision(self, start_date): + # GH19030 + # Check that high-precision time values for the end of day are + # included in repr for DatetimeIndex + df = DataFrame({"A": date_range(start=start_date, freq="D", periods=5)}) + result = str(df) + assert start_date in result + + dti = date_range(start=start_date, freq="D", periods=5) + df = DataFrame({"A": range(5)}, index=dti) + result = str(df.index) + assert start_date in result + + def test_nonunicode_nonascii_alignment(self): + df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]]) + rep_str = df.to_string() + lines = rep_str.split("\n") + assert len(lines[1]) == len(lines[2]) + + def test_unicode_problem_decoding_as_ascii(self): + dm = DataFrame({"c/\u03c3": Series({"test": np.nan})}) + str(dm.to_string()) + + def test_string_repr_encoding(self, datapath): + filepath = datapath("io", "parser", "data", "unicode_series.csv") + df = read_csv(filepath, header=None, encoding="latin1") + repr(df) + repr(df[1]) + + def test_repr_corner(self): + # representing infs poses no problems + df = DataFrame({"foo": [-np.inf, np.inf]}) + repr(df) + + def test_frame_info_encoding(self): + index = ["'Til There Was You (1997)", "ldum klaka (Cold Fever) (1994)"] + fmt.set_option("display.max_rows", 1) + df = DataFrame(columns=["a", "b", "c"], index=index) + repr(df) + repr(df.T) + fmt.set_option("display.max_rows", 200) + + def test_wide_repr(self): + with option_context( + "mode.sim_interactive", + True, + "display.show_dimensions", + True, + "display.max_columns", + 20, + ): + max_cols = get_option("display.max_columns") + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) + set_option("display.expand_frame_repr", False) + rep_str = repr(df) + + assert f"10 rows x {max_cols - 1} columns" in rep_str + set_option("display.expand_frame_repr", True) + wide_repr = repr(df) + assert rep_str != wide_repr + + with option_context("display.width", 120): + wider_repr = repr(df) + assert len(wider_repr) < len(wide_repr) + + reset_option("display.expand_frame_repr") + + def test_wide_repr_wide_columns(self): + with option_context("mode.sim_interactive", True, "display.max_columns", 20): + df = DataFrame( + np.random.randn(5, 3), columns=["a" * 90, "b" * 90, "c" * 90] + ) + rep_str = repr(df) + + assert len(rep_str.splitlines()) == 20 + + def test_wide_repr_named(self): + with option_context("mode.sim_interactive", True, "display.max_columns", 20): + max_cols = get_option("display.max_columns") + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) + df.index.name = "DataFrame Index" + set_option("display.expand_frame_repr", False) + + rep_str = repr(df) + set_option("display.expand_frame_repr", True) + wide_repr = repr(df) + assert rep_str != wide_repr + + with option_context("display.width", 150): + wider_repr = repr(df) + assert len(wider_repr) < len(wide_repr) + + for line in wide_repr.splitlines()[1::13]: + assert "DataFrame Index" in line + + reset_option("display.expand_frame_repr") + + def test_wide_repr_multiindex(self): + with option_context("mode.sim_interactive", True, "display.max_columns", 20): + midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10))) + max_cols = get_option("display.max_columns") + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)), index=midx) + df.index.names = ["Level 0", "Level 1"] + set_option("display.expand_frame_repr", False) + rep_str = repr(df) + set_option("display.expand_frame_repr", True) + wide_repr = repr(df) + assert rep_str != wide_repr + + with option_context("display.width", 150): + wider_repr = repr(df) + assert len(wider_repr) < len(wide_repr) + + for line in wide_repr.splitlines()[1::13]: + assert "Level 0 Level 1" in line + + reset_option("display.expand_frame_repr") + + def test_wide_repr_multiindex_cols(self): + with option_context("mode.sim_interactive", True, "display.max_columns", 20): + max_cols = get_option("display.max_columns") + midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10))) + mcols = MultiIndex.from_arrays(tm.rands_array(3, size=(2, max_cols - 1))) + df = DataFrame( + tm.rands_array(25, (10, max_cols - 1)), index=midx, columns=mcols + ) + df.index.names = ["Level 0", "Level 1"] + set_option("display.expand_frame_repr", False) + rep_str = repr(df) + set_option("display.expand_frame_repr", True) + wide_repr = repr(df) + assert rep_str != wide_repr + + with option_context("display.width", 150, "display.max_columns", 20): + wider_repr = repr(df) + assert len(wider_repr) < len(wide_repr) + + reset_option("display.expand_frame_repr") + + def test_wide_repr_unicode(self): + with option_context("mode.sim_interactive", True, "display.max_columns", 20): + max_cols = 20 + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) + set_option("display.expand_frame_repr", False) + rep_str = repr(df) + set_option("display.expand_frame_repr", True) + wide_repr = repr(df) + assert rep_str != wide_repr + + with option_context("display.width", 150): + wider_repr = repr(df) + assert len(wider_repr) < len(wide_repr) + + reset_option("display.expand_frame_repr") + + def test_wide_repr_wide_long_columns(self): + with option_context("mode.sim_interactive", True): + df = DataFrame({"a": ["a" * 30, "b" * 30], "b": ["c" * 70, "d" * 80]}) + + result = repr(df) + assert "ccccc" in result + assert "ddddd" in result + + def test_long_series(self): + n = 1000 + s = Series( + np.random.randint(-50, 50, n), + index=[f"s{x:04d}" for x in range(n)], + dtype="int64", + ) + + import re + + str_rep = str(s) + nmatches = len(re.findall("dtype", str_rep)) + assert nmatches == 1 + + def test_index_with_nan(self): + # GH 2850 + df = DataFrame( + { + "id1": {0: "1a3", 1: "9h4"}, + "id2": {0: np.nan, 1: "d67"}, + "id3": {0: "78d", 1: "79d"}, + "value": {0: 123, 1: 64}, + } + ) + + # multi-index + y = df.set_index(["id1", "id2", "id3"]) + result = y.to_string() + expected = ( + " value\nid1 id2 id3 \n" + "1a3 NaN 78d 123\n9h4 d67 79d 64" + ) + assert result == expected + + # index + y = df.set_index("id2") + result = y.to_string() + expected = ( + " id1 id3 value\nid2 \n" + "NaN 1a3 78d 123\nd67 9h4 79d 64" + ) + assert result == expected + + # with append (this failed in 0.12) + y = df.set_index(["id1", "id2"]).set_index("id3", append=True) + result = y.to_string() + expected = ( + " value\nid1 id2 id3 \n" + "1a3 NaN 78d 123\n9h4 d67 79d 64" + ) + assert result == expected + + # all-nan in mi + df2 = df.copy() + df2.loc[:, "id2"] = np.nan + y = df2.set_index("id2") + result = y.to_string() + expected = ( + " id1 id3 value\nid2 \n" + "NaN 1a3 78d 123\nNaN 9h4 79d 64" + ) + assert result == expected + + # partial nan in mi + df2 = df.copy() + df2.loc[:, "id2"] = np.nan + y = df2.set_index(["id2", "id3"]) + result = y.to_string() + expected = ( + " id1 value\nid2 id3 \n" + "NaN 78d 1a3 123\n 79d 9h4 64" + ) + assert result == expected + + df = DataFrame( + { + "id1": {0: np.nan, 1: "9h4"}, + "id2": {0: np.nan, 1: "d67"}, + "id3": {0: np.nan, 1: "79d"}, + "value": {0: 123, 1: 64}, + } + ) + + y = df.set_index(["id1", "id2", "id3"]) + result = y.to_string() + expected = ( + " value\nid1 id2 id3 \n" + "NaN NaN NaN 123\n9h4 d67 79d 64" + ) + assert result == expected + + def test_to_string(self): + + # big mixed + biggie = DataFrame( + {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, + index=np.arange(200), + ) + + biggie.loc[:20, "A"] = np.nan + biggie.loc[:20, "B"] = np.nan + s = biggie.to_string() + + buf = StringIO() + retval = biggie.to_string(buf=buf) + assert retval is None + assert buf.getvalue() == s + + assert isinstance(s, str) + + # print in right order + result = biggie.to_string( + columns=["B", "A"], col_space=17, float_format="%.5f".__mod__ + ) + lines = result.split("\n") + header = lines[0].strip().split() + joined = "\n".join([re.sub(r"\s+", " ", x).strip() for x in lines[1:]]) + recons = read_csv(StringIO(joined), names=header, header=None, sep=" ") + tm.assert_series_equal(recons["B"], biggie["B"]) + assert recons["A"].count() == biggie["A"].count() + assert (np.abs(recons["A"].dropna() - biggie["A"].dropna()) < 0.1).all() + + # expected = ['B', 'A'] + # assert header == expected + + result = biggie.to_string(columns=["A"], col_space=17) + header = result.split("\n")[0].strip().split() + expected = ["A"] + assert header == expected + + biggie.to_string(columns=["B", "A"], formatters={"A": lambda x: f"{x:.1f}"}) + + biggie.to_string(columns=["B", "A"], float_format=str) + biggie.to_string(columns=["B", "A"], col_space=12, float_format=str) + + frame = DataFrame(index=np.arange(200)) + frame.to_string() + + def test_to_string_no_header(self): + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(header=False) + expected = "0 1 4\n1 2 5\n2 3 6" + + assert df_s == expected + + def test_to_string_specified_header(self): + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(header=["X", "Y"]) + expected = " X Y\n0 1 4\n1 2 5\n2 3 6" + + assert df_s == expected + + msg = "Writing 2 cols but got 1 aliases" + with pytest.raises(ValueError, match=msg): + df.to_string(header=["X"]) + + def test_to_string_no_index(self): + # GH 16839, GH 13032 + df = DataFrame({"x": [11, 22], "y": [33, -44], "z": ["AAA", " "]}) + + df_s = df.to_string(index=False) + # Leading space is expected for positive numbers. + expected = " x y z\n11 33 AAA\n22 -44 " + assert df_s == expected + + df_s = df[["y", "x", "z"]].to_string(index=False) + expected = " y x z\n 33 11 AAA\n-44 22 " + assert df_s == expected + + def test_to_string_line_width_no_index(self): + # GH 13998, GH 22505 + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1, index=False) + expected = " x \\\n 1 \n 2 \n 3 \n\n y \n 4 \n 5 \n 6 " + + assert df_s == expected + + df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1, index=False) + expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 " + + assert df_s == expected + + df = DataFrame({"x": [11, 22, -33], "y": [4, 5, -6]}) + + df_s = df.to_string(line_width=1, index=False) + expected = " x \\\n 11 \n 22 \n-33 \n\n y \n 4 \n 5 \n-6 " + + assert df_s == expected + + def test_to_string_float_formatting(self): + tm.reset_display_options() + fmt.set_option( + "display.precision", + 5, + "display.column_space", + 12, + "display.notebook_repr_html", + False, + ) + + df = DataFrame( + {"x": [0, 0.25, 3456.000, 12e45, 1.64e6, 1.7e8, 1.253456, np.pi, -1e6]} + ) + + df_s = df.to_string() + + if _three_digit_exp(): + expected = ( + " x\n0 0.00000e+000\n1 2.50000e-001\n" + "2 3.45600e+003\n3 1.20000e+046\n4 1.64000e+006\n" + "5 1.70000e+008\n6 1.25346e+000\n7 3.14159e+000\n" + "8 -1.00000e+006" + ) + else: + expected = ( + " x\n0 0.00000e+00\n1 2.50000e-01\n" + "2 3.45600e+03\n3 1.20000e+46\n4 1.64000e+06\n" + "5 1.70000e+08\n6 1.25346e+00\n7 3.14159e+00\n" + "8 -1.00000e+06" + ) + assert df_s == expected + + df = DataFrame({"x": [3234, 0.253]}) + df_s = df.to_string() + + expected = " x\n0 3234.000\n1 0.253" + assert df_s == expected + + tm.reset_display_options() + assert get_option("display.precision") == 6 + + df = DataFrame({"x": [1e9, 0.2512]}) + df_s = df.to_string() + + if _three_digit_exp(): + expected = " x\n0 1.000000e+009\n1 2.512000e-001" + else: + expected = " x\n0 1.000000e+09\n1 2.512000e-01" + assert df_s == expected + + def test_to_string_float_format_no_fixed_width(self): + + # GH 21625 + df = DataFrame({"x": [0.19999]}) + expected = " x\n0 0.200" + assert df.to_string(float_format="%.3f") == expected + + # GH 22270 + df = DataFrame({"x": [100.0]}) + expected = " x\n0 100" + assert df.to_string(float_format="%.0f") == expected + + def test_to_string_small_float_values(self): + df = DataFrame({"a": [1.5, 1e-17, -5.5e-7]}) + + result = df.to_string() + # sadness per above + if _three_digit_exp(): + expected = ( + " a\n" + "0 1.500000e+000\n" + "1 1.000000e-017\n" + "2 -5.500000e-007" + ) + else: + expected = ( + " a\n" + "0 1.500000e+00\n" + "1 1.000000e-17\n" + "2 -5.500000e-07" + ) + assert result == expected + + # but not all exactly zero + df = df * 0 + result = df.to_string() + expected = " 0\n0 0\n1 0\n2 -0" + + def test_to_string_float_index(self): + index = Index([1.5, 2, 3, 4, 5]) + df = DataFrame(np.arange(5), index=index) + + result = df.to_string() + expected = " 0\n1.5 0\n2.0 1\n3.0 2\n4.0 3\n5.0 4" + assert result == expected + + def test_to_string_complex_float_formatting(self): + # GH #25514, 25745 + with option_context("display.precision", 5): + df = DataFrame( + { + "x": [ + (0.4467846931321966 + 0.0715185102060818j), + (0.2739442392974528 + 0.23515228785438969j), + (0.26974928742135185 + 0.3250604054898979j), + (-1j), + ] + } + ) + result = df.to_string() + expected = ( + " x\n0 0.44678+0.07152j\n" + "1 0.27394+0.23515j\n" + "2 0.26975+0.32506j\n" + "3 -0.00000-1.00000j" + ) + assert result == expected + + def test_to_string_ascii_error(self): + data = [ + ( + "0 ", + " .gitignore ", + " 5 ", + " \xe2\x80\xa2\xe2\x80\xa2\xe2\x80\xa2\xe2\x80\xa2\xe2\x80\xa2", + ) + ] + df = DataFrame(data) + + # it works! + repr(df) + + def test_to_string_int_formatting(self): + df = DataFrame({"x": [-15, 20, 25, -35]}) + assert issubclass(df["x"].dtype.type, np.integer) + + output = df.to_string() + expected = " x\n0 -15\n1 20\n2 25\n3 -35" + assert output == expected + + def test_to_string_index_formatter(self): + df = DataFrame([range(5), range(5, 10), range(10, 15)]) + + rs = df.to_string(formatters={"__index__": lambda x: "abc"[x]}) + + xp = """\ + 0 1 2 3 4 +a 0 1 2 3 4 +b 5 6 7 8 9 +c 10 11 12 13 14\ +""" + + assert rs == xp + + def test_to_string_left_justify_cols(self): + tm.reset_display_options() + df = DataFrame({"x": [3234, 0.253]}) + df_s = df.to_string(justify="left") + expected = " x \n0 3234.000\n1 0.253" + assert df_s == expected + + def test_to_string_format_na(self): + tm.reset_display_options() + df = DataFrame( + { + "A": [np.nan, -1, -2.1234, 3, 4], + "B": [np.nan, "foo", "foooo", "fooooo", "bar"], + } + ) + result = df.to_string() + + expected = ( + " A B\n" + "0 NaN NaN\n" + "1 -1.0000 foo\n" + "2 -2.1234 foooo\n" + "3 3.0000 fooooo\n" + "4 4.0000 bar" + ) + assert result == expected + + df = DataFrame( + { + "A": [np.nan, -1.0, -2.0, 3.0, 4.0], + "B": [np.nan, "foo", "foooo", "fooooo", "bar"], + } + ) + result = df.to_string() + + expected = ( + " A B\n" + "0 NaN NaN\n" + "1 -1.0 foo\n" + "2 -2.0 foooo\n" + "3 3.0 fooooo\n" + "4 4.0 bar" + ) + assert result == expected + + def test_to_string_format_inf(self): + # Issue #24861 + tm.reset_display_options() + df = DataFrame( + { + "A": [-np.inf, np.inf, -1, -2.1234, 3, 4], + "B": [-np.inf, np.inf, "foo", "foooo", "fooooo", "bar"], + } + ) + result = df.to_string() + + expected = ( + " A B\n" + "0 -inf -inf\n" + "1 inf inf\n" + "2 -1.0000 foo\n" + "3 -2.1234 foooo\n" + "4 3.0000 fooooo\n" + "5 4.0000 bar" + ) + assert result == expected + + df = DataFrame( + { + "A": [-np.inf, np.inf, -1.0, -2.0, 3.0, 4.0], + "B": [-np.inf, np.inf, "foo", "foooo", "fooooo", "bar"], + } + ) + result = df.to_string() + + expected = ( + " A B\n" + "0 -inf -inf\n" + "1 inf inf\n" + "2 -1.0 foo\n" + "3 -2.0 foooo\n" + "4 3.0 fooooo\n" + "5 4.0 bar" + ) + assert result == expected + + def test_to_string_decimal(self): + # Issue #23614 + df = DataFrame({"A": [6.0, 3.1, 2.2]}) + expected = " A\n0 6,0\n1 3,1\n2 2,2" + assert df.to_string(decimal=",") == expected + + def test_to_string_line_width(self): + df = DataFrame(123, index=range(10, 15), columns=range(30)) + s = df.to_string(line_width=80) + assert max(len(line) for line in s.split("\n")) == 80 + + def test_show_dimensions(self): + df = DataFrame(123, index=range(10, 15), columns=range(30)) + + with option_context( + "display.max_rows", + 10, + "display.max_columns", + 40, + "display.width", + 500, + "display.expand_frame_repr", + "info", + "display.show_dimensions", + True, + ): + assert "5 rows" in str(df) + assert "5 rows" in df._repr_html_() + with option_context( + "display.max_rows", + 10, + "display.max_columns", + 40, + "display.width", + 500, + "display.expand_frame_repr", + "info", + "display.show_dimensions", + False, + ): + assert "5 rows" not in str(df) + assert "5 rows" not in df._repr_html_() + with option_context( + "display.max_rows", + 2, + "display.max_columns", + 2, + "display.width", + 500, + "display.expand_frame_repr", + "info", + "display.show_dimensions", + "truncate", + ): + assert "5 rows" in str(df) + assert "5 rows" in df._repr_html_() + with option_context( + "display.max_rows", + 10, + "display.max_columns", + 40, + "display.width", + 500, + "display.expand_frame_repr", + "info", + "display.show_dimensions", + "truncate", + ): + assert "5 rows" not in str(df) + assert "5 rows" not in df._repr_html_() + + def test_repr_html(self, float_frame): + df = float_frame + df._repr_html_() + + fmt.set_option("display.max_rows", 1, "display.max_columns", 1) + df._repr_html_() + + fmt.set_option("display.notebook_repr_html", False) + df._repr_html_() + + tm.reset_display_options() + + df = DataFrame([[1, 2], [3, 4]]) + fmt.set_option("display.show_dimensions", True) + assert "2 rows" in df._repr_html_() + fmt.set_option("display.show_dimensions", False) + assert "2 rows" not in df._repr_html_() + + tm.reset_display_options() + + def test_repr_html_mathjax(self): + df = DataFrame([[1, 2], [3, 4]]) + assert "tex2jax_ignore" not in df._repr_html_() + + with option_context("display.html.use_mathjax", False): + assert "tex2jax_ignore" in df._repr_html_() + + def test_repr_html_wide(self): + max_cols = 20 + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) + with option_context("display.max_rows", 60, "display.max_columns", 20): + assert "..." not in df._repr_html_() + + wide_df = DataFrame(tm.rands_array(25, size=(10, max_cols + 1))) + with option_context("display.max_rows", 60, "display.max_columns", 20): + assert "..." in wide_df._repr_html_() + + def test_repr_html_wide_multiindex_cols(self): + max_cols = 20 + + mcols = MultiIndex.from_product( + [np.arange(max_cols // 2), ["foo", "bar"]], names=["first", "second"] + ) + df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), columns=mcols) + reg_repr = df._repr_html_() + assert "..." not in reg_repr + + mcols = MultiIndex.from_product( + (np.arange(1 + (max_cols // 2)), ["foo", "bar"]), names=["first", "second"] + ) + df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), columns=mcols) + with option_context("display.max_rows", 60, "display.max_columns", 20): + assert "..." in df._repr_html_() + + def test_repr_html_long(self): + with option_context("display.max_rows", 60): + max_rows = get_option("display.max_rows") + h = max_rows - 1 + df = DataFrame({"A": np.arange(1, 1 + h), "B": np.arange(41, 41 + h)}) + reg_repr = df._repr_html_() + assert ".." not in reg_repr + assert str(41 + max_rows // 2) in reg_repr + + h = max_rows + 1 + df = DataFrame({"A": np.arange(1, 1 + h), "B": np.arange(41, 41 + h)}) + long_repr = df._repr_html_() + assert ".." in long_repr + assert str(41 + max_rows // 2) not in long_repr + assert f"{h} rows " in long_repr + assert "2 columns" in long_repr + + def test_repr_html_float(self): + with option_context("display.max_rows", 60): + + max_rows = get_option("display.max_rows") + h = max_rows - 1 + df = DataFrame( + { + "idx": np.linspace(-10, 10, h), + "A": np.arange(1, 1 + h), + "B": np.arange(41, 41 + h), + } + ).set_index("idx") + reg_repr = df._repr_html_() + assert ".." not in reg_repr + assert f"" in reg_repr + + h = max_rows + 1 + df = DataFrame( + { + "idx": np.linspace(-10, 10, h), + "A": np.arange(1, 1 + h), + "B": np.arange(41, 41 + h), + } + ).set_index("idx") + long_repr = df._repr_html_() + assert ".." in long_repr + assert "" not in long_repr + assert f"{h} rows " in long_repr + assert "2 columns" in long_repr + + def test_repr_html_long_multiindex(self): + max_rows = 60 + max_L1 = max_rows // 2 + + tuples = list(itertools.product(np.arange(max_L1), ["foo", "bar"])) + idx = MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = DataFrame(np.random.randn(max_L1 * 2, 2), index=idx, columns=["A", "B"]) + with option_context("display.max_rows", 60, "display.max_columns", 20): + reg_repr = df._repr_html_() + assert "..." not in reg_repr + + tuples = list(itertools.product(np.arange(max_L1 + 1), ["foo", "bar"])) + idx = MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = DataFrame( + np.random.randn((max_L1 + 1) * 2, 2), index=idx, columns=["A", "B"] + ) + long_repr = df._repr_html_() + assert "..." in long_repr + + def test_repr_html_long_and_wide(self): + max_cols = 20 + max_rows = 60 + + h, w = max_rows - 1, max_cols - 1 + df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) + with option_context("display.max_rows", 60, "display.max_columns", 20): + assert "..." not in df._repr_html_() + + h, w = max_rows + 1, max_cols + 1 + df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) + with option_context("display.max_rows", 60, "display.max_columns", 20): + assert "..." in df._repr_html_() + + def test_info_repr(self): + # GH#21746 For tests inside a terminal (i.e. not CI) we need to detect + # the terminal size to ensure that we try to print something "too big" + term_width, term_height = get_terminal_size() + + max_rows = 60 + max_cols = 20 + (max(term_width, 80) - 80) // 4 + # Long + h, w = max_rows + 1, max_cols - 1 + df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) + assert has_vertically_truncated_repr(df) + with option_context("display.large_repr", "info"): + assert has_info_repr(df) + + # Wide + h, w = max_rows - 1, max_cols + 1 + df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) + assert has_horizontally_truncated_repr(df) + with option_context( + "display.large_repr", "info", "display.max_columns", max_cols + ): + assert has_info_repr(df) + + def test_info_repr_max_cols(self): + # GH #6939 + df = DataFrame(np.random.randn(10, 5)) + with option_context( + "display.large_repr", + "info", + "display.max_columns", + 1, + "display.max_info_columns", + 4, + ): + assert has_non_verbose_info_repr(df) + + with option_context( + "display.large_repr", + "info", + "display.max_columns", + 1, + "display.max_info_columns", + 5, + ): + assert not has_non_verbose_info_repr(df) + + # test verbose overrides + # fmt.set_option('display.max_info_columns', 4) # exceeded + + def test_info_repr_html(self): + max_rows = 60 + max_cols = 20 + # Long + h, w = max_rows + 1, max_cols - 1 + df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) + assert r"<class" not in df._repr_html_() + with option_context("display.large_repr", "info"): + assert r"<class" in df._repr_html_() + + # Wide + h, w = max_rows - 1, max_cols + 1 + df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) + assert " len(frame), hence max_rows + (50, 30, 10, 10), # max_rows < len(frame), hence min_rows + (100, 60, 10, 10), # same + (60, 60, 10, 60), # edge case + (61, 60, 10, 10), # edge case + ], + ) + def test_max_rows_fitted(self, length, min_rows, max_rows, expected): + """Check that display logic is correct. + + GH #37359 + + See description here: + https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options + """ + formatter = fmt.DataFrameFormatter( + DataFrame(np.random.rand(length, 3)), + max_rows=max_rows, + min_rows=min_rows, + ) + result = formatter.max_rows_fitted + assert result == expected + + +def gen_series_formatting(): + s1 = Series(["a"] * 100) + s2 = Series(["ab"] * 100) + s3 = Series(["a", "ab", "abc", "abcd", "abcde", "abcdef"]) + s4 = s3[::-1] + test_sers = {"onel": s1, "twol": s2, "asc": s3, "desc": s4} + return test_sers + + +class TestSeriesFormatting: + def setup_method(self, method): + self.ts = tm.makeTimeSeries() + + def test_repr_unicode(self): + s = Series(["\u03c3"] * 10) + repr(s) + + a = Series(["\u05d0"] * 1000) + a.name = "title1" + repr(a) + + def test_to_string(self): + buf = StringIO() + + s = self.ts.to_string() + + retval = self.ts.to_string(buf=buf) + assert retval is None + assert buf.getvalue().strip() == s + + # pass float_format + format = "%.4f".__mod__ + result = self.ts.to_string(float_format=format) + result = [x.split()[1] for x in result.split("\n")[:-1]] + expected = [format(x) for x in self.ts] + assert result == expected + + # empty string + result = self.ts[:0].to_string() + assert result == "Series([], Freq: B)" + + result = self.ts[:0].to_string(length=0) + assert result == "Series([], Freq: B)" + + # name and length + cp = self.ts.copy() + cp.name = "foo" + result = cp.to_string(length=True, name=True, dtype=True) + last_line = result.split("\n")[-1].strip() + assert last_line == (f"Freq: B, Name: foo, Length: {len(cp)}, dtype: float64") + + def test_freq_name_separation(self): + s = Series( + np.random.randn(10), index=date_range("1/1/2000", periods=10), name=0 + ) + + result = repr(s) + assert "Freq: D, Name: 0" in result + + def test_to_string_mixed(self): + s = Series(["foo", np.nan, -1.23, 4.56]) + result = s.to_string() + expected = "0 foo\n" + "1 NaN\n" + "2 -1.23\n" + "3 4.56" + assert result == expected + + # but don't count NAs as floats + s = Series(["foo", np.nan, "bar", "baz"]) + result = s.to_string() + expected = "0 foo\n" + "1 NaN\n" + "2 bar\n" + "3 baz" + assert result == expected + + s = Series(["foo", 5, "bar", "baz"]) + result = s.to_string() + expected = "0 foo\n" + "1 5\n" + "2 bar\n" + "3 baz" + assert result == expected + + def test_to_string_float_na_spacing(self): + s = Series([0.0, 1.5678, 2.0, -3.0, 4.0]) + s[::2] = np.nan + + result = s.to_string() + expected = ( + "0 NaN\n" + + "1 1.5678\n" + + "2 NaN\n" + + "3 -3.0000\n" + + "4 NaN" + ) + assert result == expected + + def test_to_string_without_index(self): + # GH 11729 Test index=False option + s = Series([1, 2, 3, 4]) + result = s.to_string(index=False) + expected = "1\n" + "2\n" + "3\n" + "4" + assert result == expected + + def test_unicode_name_in_footer(self): + s = Series([1, 2], name="\u05e2\u05d1\u05e8\u05d9\u05ea") + sf = fmt.SeriesFormatter(s, name="\u05e2\u05d1\u05e8\u05d9\u05ea") + sf._get_footer() # should not raise exception + + def test_east_asian_unicode_series(self): + # not aligned properly because of east asian width + + # unicode index + s = Series(["a", "bb", "CCC", "D"], index=["あ", "いい", "ううう", "ええええ"]) + expected = "あ a\nいい bb\nううう CCC\nええええ D\ndtype: object" + assert repr(s) == expected + + # unicode values + s = Series(["あ", "いい", "ううう", "ええええ"], index=["a", "bb", "c", "ddd"]) + expected = "a あ\nbb いい\nc ううう\nddd ええええ\ndtype: object" + assert repr(s) == expected + + # both + s = Series(["あ", "いい", "ううう", "ええええ"], index=["ああ", "いいいい", "う", "えええ"]) + expected = ( + "ああ あ\nいいいい いい\nう ううう\nえええ ええええ\ndtype: object" + ) + assert repr(s) == expected + + # unicode footer + s = Series( + ["あ", "いい", "ううう", "ええええ"], index=["ああ", "いいいい", "う", "えええ"], name="おおおおおおお" + ) + expected = ( + "ああ あ\nいいいい いい\nう ううう\n" + "えええ ええええ\nName: おおおおおおお, dtype: object" + ) + assert repr(s) == expected + + # MultiIndex + idx = MultiIndex.from_tuples( + [("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")] + ) + s = Series([1, 22, 3333, 44444], index=idx) + expected = ( + "あ いい 1\n" + "う え 22\n" + "おおお かかかか 3333\n" + "き くく 44444\ndtype: int64" + ) + assert repr(s) == expected + + # object dtype, shorter than unicode repr + s = Series([1, 22, 3333, 44444], index=[1, "AB", np.nan, "あああ"]) + expected = ( + "1 1\nAB 22\nNaN 3333\nあああ 44444\ndtype: int64" + ) + assert repr(s) == expected + + # object dtype, longer than unicode repr + s = Series( + [1, 22, 3333, 44444], index=[1, "AB", Timestamp("2011-01-01"), "あああ"] + ) + expected = ( + "1 1\n" + "AB 22\n" + "2011-01-01 00:00:00 3333\n" + "あああ 44444\ndtype: int64" + ) + assert repr(s) == expected + + # truncate + with option_context("display.max_rows", 3): + s = Series(["あ", "いい", "ううう", "ええええ"], name="おおおおおおお") + + expected = ( + "0 あ\n ... \n" + "3 ええええ\n" + "Name: おおおおおおお, Length: 4, dtype: object" + ) + assert repr(s) == expected + + s.index = ["ああ", "いいいい", "う", "えええ"] + expected = ( + "ああ あ\n ... \n" + "えええ ええええ\n" + "Name: おおおおおおお, Length: 4, dtype: object" + ) + assert repr(s) == expected + + # Enable Unicode option ----------------------------------------- + with option_context("display.unicode.east_asian_width", True): + + # unicode index + s = Series(["a", "bb", "CCC", "D"], index=["あ", "いい", "ううう", "ええええ"]) + expected = ( + "あ a\nいい bb\nううう CCC\n" + "ええええ D\ndtype: object" + ) + assert repr(s) == expected + + # unicode values + s = Series(["あ", "いい", "ううう", "ええええ"], index=["a", "bb", "c", "ddd"]) + expected = ( + "a あ\nbb いい\nc ううう\n" + "ddd ええええ\ndtype: object" + ) + assert repr(s) == expected + + # both + s = Series(["あ", "いい", "ううう", "ええええ"], index=["ああ", "いいいい", "う", "えええ"]) + expected = ( + "ああ あ\n" + "いいいい いい\n" + "う ううう\n" + "えええ ええええ\ndtype: object" + ) + assert repr(s) == expected + + # unicode footer + s = Series( + ["あ", "いい", "ううう", "ええええ"], + index=["ああ", "いいいい", "う", "えええ"], + name="おおおおおおお", + ) + expected = ( + "ああ あ\n" + "いいいい いい\n" + "う ううう\n" + "えええ ええええ\n" + "Name: おおおおおおお, dtype: object" + ) + assert repr(s) == expected + + # MultiIndex + idx = MultiIndex.from_tuples( + [("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")] + ) + s = Series([1, 22, 3333, 44444], index=idx) + expected = ( + "あ いい 1\n" + "う え 22\n" + "おおお かかかか 3333\n" + "き くく 44444\n" + "dtype: int64" + ) + assert repr(s) == expected + + # object dtype, shorter than unicode repr + s = Series([1, 22, 3333, 44444], index=[1, "AB", np.nan, "あああ"]) + expected = ( + "1 1\nAB 22\nNaN 3333\n" + "あああ 44444\ndtype: int64" + ) + assert repr(s) == expected + + # object dtype, longer than unicode repr + s = Series( + [1, 22, 3333, 44444], + index=[1, "AB", Timestamp("2011-01-01"), "あああ"], + ) + expected = ( + "1 1\n" + "AB 22\n" + "2011-01-01 00:00:00 3333\n" + "あああ 44444\ndtype: int64" + ) + assert repr(s) == expected + + # truncate + with option_context("display.max_rows", 3): + s = Series(["あ", "いい", "ううう", "ええええ"], name="おおおおおおお") + expected = ( + "0 あ\n ... \n" + "3 ええええ\n" + "Name: おおおおおおお, Length: 4, dtype: object" + ) + assert repr(s) == expected + + s.index = ["ああ", "いいいい", "う", "えええ"] + expected = ( + "ああ あ\n" + " ... \n" + "えええ ええええ\n" + "Name: おおおおおおお, Length: 4, dtype: object" + ) + assert repr(s) == expected + + # ambiguous unicode + s = Series( + ["¡¡", "い¡¡", "ううう", "ええええ"], index=["ああ", "¡¡¡¡いい", "¡¡", "えええ"] + ) + expected = ( + "ああ ¡¡\n" + "¡¡¡¡いい い¡¡\n" + "¡¡ ううう\n" + "えええ ええええ\ndtype: object" + ) + assert repr(s) == expected + + def test_float_trim_zeros(self): + vals = [ + 2.08430917305e10, + 3.52205017305e10, + 2.30674817305e10, + 2.03954217305e10, + 5.59897817305e10, + ] + for line in repr(Series(vals)).split("\n"): + if line.startswith("dtype:"): + continue + if _three_digit_exp(): + assert "+010" in line + else: + assert "+10" in line + + def test_datetimeindex(self): + + index = date_range("20130102", periods=6) + s = Series(1, index=index) + result = s.to_string() + assert "2013-01-02" in result + + # nat in index + s2 = Series(2, index=[Timestamp("20130111"), NaT]) + s = pd.concat([s2, s]) + result = s.to_string() + assert "NaT" in result + + # nat in summary + result = str(s2.index) + assert "NaT" in result + + @pytest.mark.parametrize( + "start_date", + [ + "2017-01-01 23:59:59.999999999", + "2017-01-01 23:59:59.99999999", + "2017-01-01 23:59:59.9999999", + "2017-01-01 23:59:59.999999", + "2017-01-01 23:59:59.99999", + "2017-01-01 23:59:59.9999", + ], + ) + def test_datetimeindex_highprecision(self, start_date): + # GH19030 + # Check that high-precision time values for the end of day are + # included in repr for DatetimeIndex + s1 = Series(date_range(start=start_date, freq="D", periods=5)) + result = str(s1) + assert start_date in result + + dti = date_range(start=start_date, freq="D", periods=5) + s2 = Series(3, index=dti) + result = str(s2.index) + assert start_date in result + + def test_timedelta64(self): + + from datetime import ( + datetime, + timedelta, + ) + + Series(np.array([1100, 20], dtype="timedelta64[ns]")).to_string() + + s = Series(date_range("2012-1-1", periods=3, freq="D")) + + # GH2146 + + # adding NaTs + y = s - s.shift(1) + result = y.to_string() + assert "1 days" in result + assert "00:00:00" not in result + assert "NaT" in result + + # with frac seconds + o = Series([datetime(2012, 1, 1, microsecond=150)] * 3) + y = s - o + result = y.to_string() + assert "-1 days +23:59:59.999850" in result + + # rounding? + o = Series([datetime(2012, 1, 1, 1)] * 3) + y = s - o + result = y.to_string() + assert "-1 days +23:00:00" in result + assert "1 days 23:00:00" in result + + o = Series([datetime(2012, 1, 1, 1, 1)] * 3) + y = s - o + result = y.to_string() + assert "-1 days +22:59:00" in result + assert "1 days 22:59:00" in result + + o = Series([datetime(2012, 1, 1, 1, 1, microsecond=150)] * 3) + y = s - o + result = y.to_string() + assert "-1 days +22:58:59.999850" in result + assert "0 days 22:58:59.999850" in result + + # neg time + td = timedelta(minutes=5, seconds=3) + s2 = Series(date_range("2012-1-1", periods=3, freq="D")) + td + y = s - s2 + result = y.to_string() + assert "-1 days +23:54:57" in result + + td = timedelta(microseconds=550) + s2 = Series(date_range("2012-1-1", periods=3, freq="D")) + td + y = s - td + result = y.to_string() + assert "2012-01-01 23:59:59.999450" in result + + # no boxing of the actual elements + td = Series(pd.timedelta_range("1 days", periods=3)) + result = td.to_string() + assert result == "0 1 days\n1 2 days\n2 3 days" + + def test_mixed_datetime64(self): + df = DataFrame({"A": [1, 2], "B": ["2012-01-01", "2012-01-02"]}) + df["B"] = pd.to_datetime(df.B) + + result = repr(df.loc[0]) + assert "2012-01-01" in result + + def test_period(self): + # GH 12615 + index = pd.period_range("2013-01", periods=6, freq="M") + s = Series(np.arange(6, dtype="int64"), index=index) + exp = ( + "2013-01 0\n" + "2013-02 1\n" + "2013-03 2\n" + "2013-04 3\n" + "2013-05 4\n" + "2013-06 5\n" + "Freq: M, dtype: int64" + ) + assert str(s) == exp + + s = Series(index) + exp = ( + "0 2013-01\n" + "1 2013-02\n" + "2 2013-03\n" + "3 2013-04\n" + "4 2013-05\n" + "5 2013-06\n" + "dtype: period[M]" + ) + assert str(s) == exp + + # periods with mixed freq + s = Series( + [ + pd.Period("2011-01", freq="M"), + pd.Period("2011-02-01", freq="D"), + pd.Period("2011-03-01 09:00", freq="H"), + ] + ) + exp = ( + "0 2011-01\n1 2011-02-01\n" + "2 2011-03-01 09:00\ndtype: object" + ) + assert str(s) == exp + + def test_max_multi_index_display(self): + # GH 7101 + + # doc example (indexing.rst) + + # multi-index + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = list(zip(*arrays)) + index = MultiIndex.from_tuples(tuples, names=["first", "second"]) + s = Series(np.random.randn(8), index=index) + + with option_context("display.max_rows", 10): + assert len(str(s).split("\n")) == 10 + with option_context("display.max_rows", 3): + assert len(str(s).split("\n")) == 5 + with option_context("display.max_rows", 2): + assert len(str(s).split("\n")) == 5 + with option_context("display.max_rows", 1): + assert len(str(s).split("\n")) == 4 + with option_context("display.max_rows", 0): + assert len(str(s).split("\n")) == 10 + + # index + s = Series(np.random.randn(8), None) + + with option_context("display.max_rows", 10): + assert len(str(s).split("\n")) == 9 + with option_context("display.max_rows", 3): + assert len(str(s).split("\n")) == 4 + with option_context("display.max_rows", 2): + assert len(str(s).split("\n")) == 4 + with option_context("display.max_rows", 1): + assert len(str(s).split("\n")) == 3 + with option_context("display.max_rows", 0): + assert len(str(s).split("\n")) == 9 + + # Make sure #8532 is fixed + def test_consistent_format(self): + s = Series([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.9999, 1, 1] * 10) + with option_context("display.max_rows", 10, "display.show_dimensions", False): + res = repr(s) + exp = ( + "0 1.0000\n1 1.0000\n2 1.0000\n3 " + "1.0000\n4 1.0000\n ... \n125 " + "1.0000\n126 1.0000\n127 0.9999\n128 " + "1.0000\n129 1.0000\ndtype: float64" + ) + assert res == exp + + def chck_ncols(self, s): + with option_context("display.max_rows", 10): + res = repr(s) + lines = res.split("\n") + lines = [ + line for line in repr(s).split("\n") if not re.match(r"[^\.]*\.+", line) + ][:-1] + ncolsizes = len({len(line.strip()) for line in lines}) + assert ncolsizes == 1 + + def test_format_explicit(self): + test_sers = gen_series_formatting() + with option_context("display.max_rows", 4, "display.show_dimensions", False): + res = repr(test_sers["onel"]) + exp = "0 a\n1 a\n ..\n98 a\n99 a\ndtype: object" + assert exp == res + res = repr(test_sers["twol"]) + exp = "0 ab\n1 ab\n ..\n98 ab\n99 ab\ndtype: object" + assert exp == res + res = repr(test_sers["asc"]) + exp = ( + "0 a\n1 ab\n ... \n4 abcde\n5 " + "abcdef\ndtype: object" + ) + assert exp == res + res = repr(test_sers["desc"]) + exp = ( + "5 abcdef\n4 abcde\n ... \n1 ab\n0 " + "a\ndtype: object" + ) + assert exp == res + + def test_ncols(self): + test_sers = gen_series_formatting() + for s in test_sers.values(): + self.chck_ncols(s) + + def test_max_rows_eq_one(self): + s = Series(range(10), dtype="int64") + with option_context("display.max_rows", 1): + strrepr = repr(s).split("\n") + exp1 = ["0", "0"] + res1 = strrepr[0].split() + assert exp1 == res1 + exp2 = [".."] + res2 = strrepr[1].split() + assert exp2 == res2 + + def test_truncate_ndots(self): + def getndots(s): + return len(re.match(r"[^\.]*(\.*)", s).groups()[0]) + + s = Series([0, 2, 3, 6]) + with option_context("display.max_rows", 2): + strrepr = repr(s).replace("\n", "") + assert getndots(strrepr) == 2 + + s = Series([0, 100, 200, 400]) + with option_context("display.max_rows", 2): + strrepr = repr(s).replace("\n", "") + assert getndots(strrepr) == 3 + + def test_show_dimensions(self): + # gh-7117 + s = Series(range(5)) + + assert "Length" not in repr(s) + + with option_context("display.max_rows", 4): + assert "Length" in repr(s) + + with option_context("display.show_dimensions", True): + assert "Length" in repr(s) + + with option_context("display.max_rows", 4, "display.show_dimensions", False): + assert "Length" not in repr(s) + + def test_repr_min_rows(self): + s = Series(range(20)) + + # default setting no truncation even if above min_rows + assert ".." not in repr(s) + + s = Series(range(61)) + + # default of max_rows 60 triggers truncation if above + assert ".." in repr(s) + + with option_context("display.max_rows", 10, "display.min_rows", 4): + # truncated after first two rows + assert ".." in repr(s) + assert "2 " not in repr(s) + + with option_context("display.max_rows", 12, "display.min_rows", None): + # when set to None, follow value of max_rows + assert "5 5" in repr(s) + + with option_context("display.max_rows", 10, "display.min_rows", 12): + # when set value higher as max_rows, use the minimum + assert "5 5" not in repr(s) + + with option_context("display.max_rows", None, "display.min_rows", 12): + # max_rows of None -> never truncate + assert ".." not in repr(s) + + def test_to_string_name(self): + s = Series(range(100), dtype="int64") + s.name = "myser" + res = s.to_string(max_rows=2, name=True) + exp = "0 0\n ..\n99 99\nName: myser" + assert res == exp + res = s.to_string(max_rows=2, name=False) + exp = "0 0\n ..\n99 99" + assert res == exp + + def test_to_string_dtype(self): + s = Series(range(100), dtype="int64") + res = s.to_string(max_rows=2, dtype=True) + exp = "0 0\n ..\n99 99\ndtype: int64" + assert res == exp + res = s.to_string(max_rows=2, dtype=False) + exp = "0 0\n ..\n99 99" + assert res == exp + + def test_to_string_length(self): + s = Series(range(100), dtype="int64") + res = s.to_string(max_rows=2, length=True) + exp = "0 0\n ..\n99 99\nLength: 100" + assert res == exp + + def test_to_string_na_rep(self): + s = Series(index=range(100), dtype=np.float64) + res = s.to_string(na_rep="foo", max_rows=2) + exp = "0 foo\n ..\n99 foo" + assert res == exp + + def test_to_string_float_format(self): + s = Series(range(10), dtype="float64") + res = s.to_string(float_format=lambda x: f"{x:2.1f}", max_rows=2) + exp = "0 0.0\n ..\n9 9.0" + assert res == exp + + def test_to_string_header(self): + s = Series(range(10), dtype="int64") + s.index.name = "foo" + res = s.to_string(header=True, max_rows=2) + exp = "foo\n0 0\n ..\n9 9" + assert res == exp + res = s.to_string(header=False, max_rows=2) + exp = "0 0\n ..\n9 9" + assert res == exp + + def test_to_string_multindex_header(self): + # GH 16718 + df = DataFrame({"a": [0], "b": [1], "c": [2], "d": [3]}).set_index(["a", "b"]) + res = df.to_string(header=["r1", "r2"]) + exp = " r1 r2\na b \n0 1 2 3" + assert res == exp + + def test_to_string_empty_col(self): + # GH 13653 + s = Series(["", "Hello", "World", "", "", "Mooooo", "", ""]) + res = s.to_string(index=False) + exp = " \n Hello\n World\n \n \nMooooo\n \n " + assert re.match(exp, res) + + +class TestGenericArrayFormatter: + def test_1d_array(self): + # GenericArrayFormatter is used on types for which there isn't a dedicated + # formatter. np.bool_ is one of those types. + obj = fmt.GenericArrayFormatter(np.array([True, False])) + res = obj.get_result() + assert len(res) == 2 + # Results should be right-justified. + assert res[0] == " True" + assert res[1] == " False" + + def test_2d_array(self): + obj = fmt.GenericArrayFormatter(np.array([[True, False], [False, True]])) + res = obj.get_result() + assert len(res) == 2 + assert res[0] == " [True, False]" + assert res[1] == " [False, True]" + + def test_3d_array(self): + obj = fmt.GenericArrayFormatter( + np.array([[[True, True], [False, False]], [[False, True], [True, False]]]) + ) + res = obj.get_result() + assert len(res) == 2 + assert res[0] == " [[True, True], [False, False]]" + assert res[1] == " [[False, True], [True, False]]" + + def test_2d_extension_type(self): + # GH 33770 + + # Define a stub extension type with just enough code to run Series.__repr__() + class DtypeStub(pd.api.extensions.ExtensionDtype): + @property + def type(self): + return np.ndarray + + @property + def name(self): + return "DtypeStub" + + class ExtTypeStub(pd.api.extensions.ExtensionArray): + def __len__(self): + return 2 + + def __getitem__(self, ix): + return [ix == 1, ix == 0] + + @property + def dtype(self): + return DtypeStub() + + series = Series(ExtTypeStub()) + res = repr(series) # This line crashed before #33770 was fixed. + expected = "0 [False True]\n" + "1 [ True False]\n" + "dtype: DtypeStub" + assert res == expected + + +def _three_digit_exp(): + return f"{1.7e8:.4g}" == "1.7e+008" + + +class TestFloatArrayFormatter: + def test_misc(self): + obj = fmt.FloatArrayFormatter(np.array([], dtype=np.float64)) + result = obj.get_result() + assert len(result) == 0 + + def test_format(self): + obj = fmt.FloatArrayFormatter(np.array([12, 0], dtype=np.float64)) + result = obj.get_result() + assert result[0] == " 12.0" + assert result[1] == " 0.0" + + def test_output_display_precision_trailing_zeroes(self): + # Issue #20359: trimming zeros while there is no decimal point + + # Happens when display precision is set to zero + with option_context("display.precision", 0): + s = Series([840.0, 4200.0]) + expected_output = "0 840\n1 4200\ndtype: float64" + assert str(s) == expected_output + + def test_output_significant_digits(self): + # Issue #9764 + + # In case default display precision changes: + with option_context("display.precision", 6): + # DataFrame example from issue #9764 + d = DataFrame( + { + "col1": [ + 9.999e-8, + 1e-7, + 1.0001e-7, + 2e-7, + 4.999e-7, + 5e-7, + 5.0001e-7, + 6e-7, + 9.999e-7, + 1e-6, + 1.0001e-6, + 2e-6, + 4.999e-6, + 5e-6, + 5.0001e-6, + 6e-6, + ] + } + ) + + expected_output = { + (0, 6): " col1\n" + "0 9.999000e-08\n" + "1 1.000000e-07\n" + "2 1.000100e-07\n" + "3 2.000000e-07\n" + "4 4.999000e-07\n" + "5 5.000000e-07", + (1, 6): " col1\n" + "1 1.000000e-07\n" + "2 1.000100e-07\n" + "3 2.000000e-07\n" + "4 4.999000e-07\n" + "5 5.000000e-07", + (1, 8): " col1\n" + "1 1.000000e-07\n" + "2 1.000100e-07\n" + "3 2.000000e-07\n" + "4 4.999000e-07\n" + "5 5.000000e-07\n" + "6 5.000100e-07\n" + "7 6.000000e-07", + (8, 16): " col1\n" + "8 9.999000e-07\n" + "9 1.000000e-06\n" + "10 1.000100e-06\n" + "11 2.000000e-06\n" + "12 4.999000e-06\n" + "13 5.000000e-06\n" + "14 5.000100e-06\n" + "15 6.000000e-06", + (9, 16): " col1\n" + "9 0.000001\n" + "10 0.000001\n" + "11 0.000002\n" + "12 0.000005\n" + "13 0.000005\n" + "14 0.000005\n" + "15 0.000006", + } + + for (start, stop), v in expected_output.items(): + assert str(d[start:stop]) == v + + def test_too_long(self): + # GH 10451 + with option_context("display.precision", 4): + # need both a number > 1e6 and something that normally formats to + # having length > display.precision + 6 + df = DataFrame({"x": [12345.6789]}) + assert str(df) == " x\n0 12345.6789" + df = DataFrame({"x": [2e6]}) + assert str(df) == " x\n0 2000000.0" + df = DataFrame({"x": [12345.6789, 2e6]}) + assert str(df) == " x\n0 1.2346e+04\n1 2.0000e+06" + + +class TestRepr_timedelta64: + def test_none(self): + delta_1d = pd.to_timedelta(1, unit="D") + delta_0d = pd.to_timedelta(0, unit="D") + delta_1s = pd.to_timedelta(1, unit="s") + delta_500ms = pd.to_timedelta(500, unit="ms") + + drepr = lambda x: x._repr_base() + assert drepr(delta_1d) == "1 days" + assert drepr(-delta_1d) == "-1 days" + assert drepr(delta_0d) == "0 days" + assert drepr(delta_1s) == "0 days 00:00:01" + assert drepr(delta_500ms) == "0 days 00:00:00.500000" + assert drepr(delta_1d + delta_1s) == "1 days 00:00:01" + assert drepr(-delta_1d + delta_1s) == "-1 days +00:00:01" + assert drepr(delta_1d + delta_500ms) == "1 days 00:00:00.500000" + assert drepr(-delta_1d + delta_500ms) == "-1 days +00:00:00.500000" + + def test_sub_day(self): + delta_1d = pd.to_timedelta(1, unit="D") + delta_0d = pd.to_timedelta(0, unit="D") + delta_1s = pd.to_timedelta(1, unit="s") + delta_500ms = pd.to_timedelta(500, unit="ms") + + drepr = lambda x: x._repr_base(format="sub_day") + assert drepr(delta_1d) == "1 days" + assert drepr(-delta_1d) == "-1 days" + assert drepr(delta_0d) == "00:00:00" + assert drepr(delta_1s) == "00:00:01" + assert drepr(delta_500ms) == "00:00:00.500000" + assert drepr(delta_1d + delta_1s) == "1 days 00:00:01" + assert drepr(-delta_1d + delta_1s) == "-1 days +00:00:01" + assert drepr(delta_1d + delta_500ms) == "1 days 00:00:00.500000" + assert drepr(-delta_1d + delta_500ms) == "-1 days +00:00:00.500000" + + def test_long(self): + delta_1d = pd.to_timedelta(1, unit="D") + delta_0d = pd.to_timedelta(0, unit="D") + delta_1s = pd.to_timedelta(1, unit="s") + delta_500ms = pd.to_timedelta(500, unit="ms") + + drepr = lambda x: x._repr_base(format="long") + assert drepr(delta_1d) == "1 days 00:00:00" + assert drepr(-delta_1d) == "-1 days +00:00:00" + assert drepr(delta_0d) == "0 days 00:00:00" + assert drepr(delta_1s) == "0 days 00:00:01" + assert drepr(delta_500ms) == "0 days 00:00:00.500000" + assert drepr(delta_1d + delta_1s) == "1 days 00:00:01" + assert drepr(-delta_1d + delta_1s) == "-1 days +00:00:01" + assert drepr(delta_1d + delta_500ms) == "1 days 00:00:00.500000" + assert drepr(-delta_1d + delta_500ms) == "-1 days +00:00:00.500000" + + def test_all(self): + delta_1d = pd.to_timedelta(1, unit="D") + delta_0d = pd.to_timedelta(0, unit="D") + delta_1ns = pd.to_timedelta(1, unit="ns") + + drepr = lambda x: x._repr_base(format="all") + assert drepr(delta_1d) == "1 days 00:00:00.000000000" + assert drepr(-delta_1d) == "-1 days +00:00:00.000000000" + assert drepr(delta_0d) == "0 days 00:00:00.000000000" + assert drepr(delta_1ns) == "0 days 00:00:00.000000001" + assert drepr(-delta_1d + delta_1ns) == "-1 days +00:00:00.000000001" + + +class TestTimedelta64Formatter: + def test_days(self): + x = pd.to_timedelta(list(range(5)) + [NaT], unit="D") + result = fmt.Timedelta64Formatter(x, box=True).get_result() + assert result[0].strip() == "'0 days'" + assert result[1].strip() == "'1 days'" + + result = fmt.Timedelta64Formatter(x[1:2], box=True).get_result() + assert result[0].strip() == "'1 days'" + + result = fmt.Timedelta64Formatter(x, box=False).get_result() + assert result[0].strip() == "0 days" + assert result[1].strip() == "1 days" + + result = fmt.Timedelta64Formatter(x[1:2], box=False).get_result() + assert result[0].strip() == "1 days" + + def test_days_neg(self): + x = pd.to_timedelta(list(range(5)) + [NaT], unit="D") + result = fmt.Timedelta64Formatter(-x, box=True).get_result() + assert result[0].strip() == "'0 days'" + assert result[1].strip() == "'-1 days'" + + def test_subdays(self): + y = pd.to_timedelta(list(range(5)) + [NaT], unit="s") + result = fmt.Timedelta64Formatter(y, box=True).get_result() + assert result[0].strip() == "'0 days 00:00:00'" + assert result[1].strip() == "'0 days 00:00:01'" + + def test_subdays_neg(self): + y = pd.to_timedelta(list(range(5)) + [NaT], unit="s") + result = fmt.Timedelta64Formatter(-y, box=True).get_result() + assert result[0].strip() == "'0 days 00:00:00'" + assert result[1].strip() == "'-1 days +23:59:59'" + + def test_zero(self): + x = pd.to_timedelta(list(range(1)) + [NaT], unit="D") + result = fmt.Timedelta64Formatter(x, box=True).get_result() + assert result[0].strip() == "'0 days'" + + x = pd.to_timedelta(list(range(1)), unit="D") + result = fmt.Timedelta64Formatter(x, box=True).get_result() + assert result[0].strip() == "'0 days'" + + +class TestDatetime64Formatter: + def test_mixed(self): + x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), NaT]) + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "2013-01-01 00:00:00" + assert result[1].strip() == "2013-01-01 12:00:00" + + def test_dates(self): + x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), NaT]) + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "2013-01-01" + assert result[1].strip() == "2013-01-02" + + def test_date_nanos(self): + x = Series([Timestamp(200)]) + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "1970-01-01 00:00:00.000000200" + + def test_dates_display(self): + + # 10170 + # make sure that we are consistently display date formatting + x = Series(date_range("20130101 09:00:00", periods=5, freq="D")) + x.iloc[1] = np.nan + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "2013-01-01 09:00:00" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-05 09:00:00" + + x = Series(date_range("20130101 09:00:00", periods=5, freq="s")) + x.iloc[1] = np.nan + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "2013-01-01 09:00:00" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-01 09:00:04" + + x = Series(date_range("20130101 09:00:00", periods=5, freq="ms")) + x.iloc[1] = np.nan + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "2013-01-01 09:00:00.000" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-01 09:00:00.004" + + x = Series(date_range("20130101 09:00:00", periods=5, freq="us")) + x.iloc[1] = np.nan + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "2013-01-01 09:00:00.000000" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-01 09:00:00.000004" + + x = Series(date_range("20130101 09:00:00", periods=5, freq="N")) + x.iloc[1] = np.nan + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "2013-01-01 09:00:00.000000000" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-01 09:00:00.000000004" + + def test_datetime64formatter_yearmonth(self): + x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)]) + + def format_func(x): + return x.strftime("%Y-%m") + + formatter = fmt.Datetime64Formatter(x, formatter=format_func) + result = formatter.get_result() + assert result == ["2016-01", "2016-02"] + + def test_datetime64formatter_hoursecond(self): + + x = Series( + pd.to_datetime(["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f") + ) + + def format_func(x): + return x.strftime("%H:%M") + + formatter = fmt.Datetime64Formatter(x, formatter=format_func) + result = formatter.get_result() + assert result == ["10:10", "12:12"] + + +class TestNaTFormatting: + def test_repr(self): + assert repr(NaT) == "NaT" + + def test_str(self): + assert str(NaT) == "NaT" + + +class TestDatetimeIndexFormat: + def test_datetime(self): + formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format() + assert formatted[0] == "2003-01-01 12:00:00" + assert formatted[1] == "NaT" + + def test_date(self): + formatted = pd.to_datetime([datetime(2003, 1, 1), NaT]).format() + assert formatted[0] == "2003-01-01" + assert formatted[1] == "NaT" + + def test_date_tz(self): + formatted = pd.to_datetime([datetime(2013, 1, 1)], utc=True).format() + assert formatted[0] == "2013-01-01 00:00:00+00:00" + + formatted = pd.to_datetime([datetime(2013, 1, 1), NaT], utc=True).format() + assert formatted[0] == "2013-01-01 00:00:00+00:00" + + def test_date_explicit_date_format(self): + formatted = pd.to_datetime([datetime(2003, 2, 1), NaT]).format( + date_format="%m-%d-%Y", na_rep="UT" + ) + assert formatted[0] == "02-01-2003" + assert formatted[1] == "UT" + + +class TestDatetimeIndexUnicode: + def test_dates(self): + text = str(pd.to_datetime([datetime(2013, 1, 1), datetime(2014, 1, 1)])) + assert "['2013-01-01'," in text + assert ", '2014-01-01']" in text + + def test_mixed(self): + text = str( + pd.to_datetime( + [datetime(2013, 1, 1), datetime(2014, 1, 1, 12), datetime(2014, 1, 1)] + ) + ) + assert "'2013-01-01 00:00:00'," in text + assert "'2014-01-01 00:00:00']" in text + + +class TestStringRepTimestamp: + def test_no_tz(self): + dt_date = datetime(2013, 1, 2) + assert str(dt_date) == str(Timestamp(dt_date)) + + dt_datetime = datetime(2013, 1, 2, 12, 1, 3) + assert str(dt_datetime) == str(Timestamp(dt_datetime)) + + dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45) + assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us)) + + ts_nanos_only = Timestamp(200) + assert str(ts_nanos_only) == "1970-01-01 00:00:00.000000200" + + ts_nanos_micros = Timestamp(1200) + assert str(ts_nanos_micros) == "1970-01-01 00:00:00.000001200" + + def test_tz_pytz(self): + dt_date = datetime(2013, 1, 2, tzinfo=pytz.utc) + assert str(dt_date) == str(Timestamp(dt_date)) + + dt_datetime = datetime(2013, 1, 2, 12, 1, 3, tzinfo=pytz.utc) + assert str(dt_datetime) == str(Timestamp(dt_datetime)) + + dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45, tzinfo=pytz.utc) + assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us)) + + def test_tz_dateutil(self): + utc = dateutil.tz.tzutc() + + dt_date = datetime(2013, 1, 2, tzinfo=utc) + assert str(dt_date) == str(Timestamp(dt_date)) + + dt_datetime = datetime(2013, 1, 2, 12, 1, 3, tzinfo=utc) + assert str(dt_datetime) == str(Timestamp(dt_datetime)) + + dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45, tzinfo=utc) + assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us)) + + def test_nat_representations(self): + for f in (str, repr, methodcaller("isoformat")): + assert f(NaT) == "NaT" + + +def test_format_percentiles(): + result = fmt.format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999]) + expected = ["1.999%", "2.001%", "50%", "66.667%", "99.99%"] + assert result == expected + + result = fmt.format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999]) + expected = ["0%", "50%", "2.0%", "50%", "66.67%", "99.99%"] + assert result == expected + + msg = r"percentiles should all be in the interval \[0,1\]" + with pytest.raises(ValueError, match=msg): + fmt.format_percentiles([0.1, np.nan, 0.5]) + with pytest.raises(ValueError, match=msg): + fmt.format_percentiles([-0.001, 0.1, 0.5]) + with pytest.raises(ValueError, match=msg): + fmt.format_percentiles([2, 0.1, 0.5]) + with pytest.raises(ValueError, match=msg): + fmt.format_percentiles([0.1, 0.5, "a"]) + + +def test_format_percentiles_integer_idx(): + # Issue #26660 + result = fmt.format_percentiles(np.linspace(0, 1, 10 + 1)) + expected = [ + "0%", + "10%", + "20%", + "30%", + "40%", + "50%", + "60%", + "70%", + "80%", + "90%", + "100%", + ] + assert result == expected + + +@td.check_file_leaks +def test_repr_html_ipython_config(ip): + code = textwrap.dedent( + """\ + from pandas import DataFrame + df = DataFrame({"A": [1, 2]}) + df._repr_html_() + + cfg = get_ipython().config + cfg['IPKernelApp']['parent_appname'] + df._repr_html_() + """ + ) + result = ip.run_cell(code) + assert not result.error_in_exec + + +@pytest.mark.filterwarnings("ignore:In future versions `DataFrame.to_latex`") +@pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) +@pytest.mark.parametrize( + "encoding, data", + [(None, "abc"), ("utf-8", "abc"), ("gbk", "造成输出中文显示乱码"), ("foo", "abc")], +) +def test_filepath_or_buffer_arg( + method, + filepath_or_buffer, + assert_filepath_or_buffer_equals, + encoding, + data, + filepath_or_buffer_id, +): + df = DataFrame([data]) + + if filepath_or_buffer_id not in ["string", "pathlike"] and encoding is not None: + with pytest.raises( + ValueError, match="buf is not a file name and encoding is specified." + ): + getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) + elif encoding == "foo": + expected_warning = FutureWarning if method == "to_latex" else None + with tm.assert_produces_warning(expected_warning): + with pytest.raises(LookupError, match="unknown encoding"): + getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) + else: + expected = getattr(df, method)() + getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) + assert_filepath_or_buffer_equals(expected) + + +@pytest.mark.filterwarnings("ignore::FutureWarning") +@pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) +def test_filepath_or_buffer_bad_arg_raises(float_frame, method): + msg = "buf is not a file name and it has no write method" + with pytest.raises(TypeError, match=msg): + getattr(float_frame, method)(buf=object()) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_info.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_info.py new file mode 100644 index 0000000..5522631 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_info.py @@ -0,0 +1,492 @@ +from io import StringIO +import re +from string import ascii_uppercase as uppercase +import sys +import textwrap + +import numpy as np +import pytest + +from pandas.compat import ( + IS64, + PYPY, +) + +from pandas import ( + CategoricalIndex, + DataFrame, + MultiIndex, + Series, + date_range, + option_context, +) + + +@pytest.fixture +def duplicate_columns_frame(): + """Dataframe with duplicate column names.""" + return DataFrame(np.random.randn(1500, 4), columns=["a", "a", "b", "b"]) + + +def test_info_empty(): + df = DataFrame() + buf = StringIO() + df.info(buf=buf) + result = buf.getvalue() + expected = textwrap.dedent( + """\ + + Index: 0 entries + Empty DataFrame""" + ) + assert result == expected + + +def test_info_categorical_column_smoke_test(): + n = 2500 + df = DataFrame({"int64": np.random.randint(100, size=n)}) + df["category"] = Series( + np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n)) + ).astype("category") + df.isna() + buf = StringIO() + df.info(buf=buf) + + df2 = df[df["category"] == "d"] + buf = StringIO() + df2.info(buf=buf) + + +@pytest.mark.parametrize( + "fixture_func_name", + [ + "int_frame", + "float_frame", + "datetime_frame", + "duplicate_columns_frame", + ], +) +def test_info_smoke_test(fixture_func_name, request): + frame = request.getfixturevalue(fixture_func_name) + buf = StringIO() + frame.info(buf=buf) + result = buf.getvalue().splitlines() + assert len(result) > 10 + + +@pytest.mark.parametrize( + "num_columns, max_info_columns, verbose", + [ + (10, 100, True), + (10, 11, True), + (10, 10, True), + (10, 9, False), + (10, 1, False), + ], +) +def test_info_default_verbose_selection(num_columns, max_info_columns, verbose): + frame = DataFrame(np.random.randn(5, num_columns)) + with option_context("display.max_info_columns", max_info_columns): + io_default = StringIO() + frame.info(buf=io_default) + result = io_default.getvalue() + + io_explicit = StringIO() + frame.info(buf=io_explicit, verbose=verbose) + expected = io_explicit.getvalue() + + assert result == expected + + +def test_info_verbose_check_header_separator_body(): + buf = StringIO() + size = 1001 + start = 5 + frame = DataFrame(np.random.randn(3, size)) + frame.info(verbose=True, buf=buf) + + res = buf.getvalue() + header = " # Column Dtype \n--- ------ ----- " + assert header in res + + frame.info(verbose=True, buf=buf) + buf.seek(0) + lines = buf.readlines() + assert len(lines) > 0 + + for i, line in enumerate(lines): + if i >= start and i < start + size: + line_nr = f" {i - start} " + assert line.startswith(line_nr) + + +@pytest.mark.parametrize( + "size, header_exp, separator_exp, first_line_exp, last_line_exp", + [ + ( + 4, + " # Column Non-Null Count Dtype ", + "--- ------ -------------- ----- ", + " 0 0 3 non-null float64", + " 3 3 3 non-null float64", + ), + ( + 11, + " # Column Non-Null Count Dtype ", + "--- ------ -------------- ----- ", + " 0 0 3 non-null float64", + " 10 10 3 non-null float64", + ), + ( + 101, + " # Column Non-Null Count Dtype ", + "--- ------ -------------- ----- ", + " 0 0 3 non-null float64", + " 100 100 3 non-null float64", + ), + ( + 1001, + " # Column Non-Null Count Dtype ", + "--- ------ -------------- ----- ", + " 0 0 3 non-null float64", + " 1000 1000 3 non-null float64", + ), + ( + 10001, + " # Column Non-Null Count Dtype ", + "--- ------ -------------- ----- ", + " 0 0 3 non-null float64", + " 10000 10000 3 non-null float64", + ), + ], +) +def test_info_verbose_with_counts_spacing( + size, header_exp, separator_exp, first_line_exp, last_line_exp +): + """Test header column, spacer, first line and last line in verbose mode.""" + frame = DataFrame(np.random.randn(3, size)) + with StringIO() as buf: + frame.info(verbose=True, show_counts=True, buf=buf) + all_lines = buf.getvalue().splitlines() + # Here table would contain only header, separator and table lines + # dframe repr, index summary, memory usage and dtypes are excluded + table = all_lines[3:-2] + header, separator, first_line, *rest, last_line = table + assert header == header_exp + assert separator == separator_exp + assert first_line == first_line_exp + assert last_line == last_line_exp + + +def test_info_memory(): + # https://github.com/pandas-dev/pandas/issues/21056 + df = DataFrame({"a": Series([1, 2], dtype="i8")}) + buf = StringIO() + df.info(buf=buf) + result = buf.getvalue() + bytes = float(df.memory_usage().sum()) + expected = textwrap.dedent( + f"""\ + + RangeIndex: 2 entries, 0 to 1 + Data columns (total 1 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 a 2 non-null int64 + dtypes: int64(1) + memory usage: {bytes} bytes + """ + ) + assert result == expected + + +def test_info_wide(): + io = StringIO() + df = DataFrame(np.random.randn(5, 101)) + df.info(buf=io) + + io = StringIO() + df.info(buf=io, max_cols=101) + result = io.getvalue() + assert len(result.splitlines()) > 100 + + expected = result + with option_context("display.max_info_columns", 101): + io = StringIO() + df.info(buf=io) + result = io.getvalue() + assert result == expected + + +def test_info_duplicate_columns_shows_correct_dtypes(): + # GH11761 + io = StringIO() + frame = DataFrame([[1, 2.0]], columns=["a", "a"]) + frame.info(buf=io) + lines = io.getvalue().splitlines(True) + assert " 0 a 1 non-null int64 \n" == lines[5] + assert " 1 a 1 non-null float64\n" == lines[6] + + +def test_info_shows_column_dtypes(): + dtypes = [ + "int64", + "float64", + "datetime64[ns]", + "timedelta64[ns]", + "complex128", + "object", + "bool", + ] + data = {} + n = 10 + for i, dtype in enumerate(dtypes): + data[i] = np.random.randint(2, size=n).astype(dtype) + df = DataFrame(data) + buf = StringIO() + df.info(buf=buf) + res = buf.getvalue() + header = ( + " # Column Non-Null Count Dtype \n" + "--- ------ -------------- ----- " + ) + assert header in res + for i, dtype in enumerate(dtypes): + name = f" {i:d} {i:d} {n:d} non-null {dtype}" + assert name in res + + +def test_info_max_cols(): + df = DataFrame(np.random.randn(10, 5)) + for len_, verbose in [(5, None), (5, False), (12, True)]: + # For verbose always ^ setting ^ summarize ^ full output + with option_context("max_info_columns", 4): + buf = StringIO() + df.info(buf=buf, verbose=verbose) + res = buf.getvalue() + assert len(res.strip().split("\n")) == len_ + + for len_, verbose in [(12, None), (5, False), (12, True)]: + # max_cols not exceeded + with option_context("max_info_columns", 5): + buf = StringIO() + df.info(buf=buf, verbose=verbose) + res = buf.getvalue() + assert len(res.strip().split("\n")) == len_ + + for len_, max_cols in [(12, 5), (5, 4)]: + # setting truncates + with option_context("max_info_columns", 4): + buf = StringIO() + df.info(buf=buf, max_cols=max_cols) + res = buf.getvalue() + assert len(res.strip().split("\n")) == len_ + + # setting wouldn't truncate + with option_context("max_info_columns", 5): + buf = StringIO() + df.info(buf=buf, max_cols=max_cols) + res = buf.getvalue() + assert len(res.strip().split("\n")) == len_ + + +def test_info_memory_usage(): + # Ensure memory usage is displayed, when asserted, on the last line + dtypes = [ + "int64", + "float64", + "datetime64[ns]", + "timedelta64[ns]", + "complex128", + "object", + "bool", + ] + data = {} + n = 10 + for i, dtype in enumerate(dtypes): + data[i] = np.random.randint(2, size=n).astype(dtype) + df = DataFrame(data) + buf = StringIO() + + # display memory usage case + df.info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + assert "memory usage: " in res[-1] + + # do not display memory usage case + df.info(buf=buf, memory_usage=False) + res = buf.getvalue().splitlines() + assert "memory usage: " not in res[-1] + + df.info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + + # memory usage is a lower bound, so print it as XYZ+ MB + assert re.match(r"memory usage: [^+]+\+", res[-1]) + + df.iloc[:, :5].info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + + # excluded column with object dtype, so estimate is accurate + assert not re.match(r"memory usage: [^+]+\+", res[-1]) + + # Test a DataFrame with duplicate columns + dtypes = ["int64", "int64", "int64", "float64"] + data = {} + n = 100 + for i, dtype in enumerate(dtypes): + data[i] = np.random.randint(2, size=n).astype(dtype) + df = DataFrame(data) + df.columns = dtypes + + df_with_object_index = DataFrame({"a": [1]}, index=["foo"]) + df_with_object_index.info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + assert re.match(r"memory usage: [^+]+\+", res[-1]) + + df_with_object_index.info(buf=buf, memory_usage="deep") + res = buf.getvalue().splitlines() + assert re.match(r"memory usage: [^+]+$", res[-1]) + + # Ensure df size is as expected + # (cols * rows * bytes) + index size + df_size = df.memory_usage().sum() + exp_size = len(dtypes) * n * 8 + df.index.nbytes + assert df_size == exp_size + + # Ensure number of cols in memory_usage is the same as df + size_df = np.size(df.columns.values) + 1 # index=True; default + assert size_df == np.size(df.memory_usage()) + + # assert deep works only on object + assert df.memory_usage().sum() == df.memory_usage(deep=True).sum() + + # test for validity + DataFrame(1, index=["a"], columns=["A"]).memory_usage(index=True) + DataFrame(1, index=["a"], columns=["A"]).index.nbytes + df = DataFrame( + data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"] + ) + df.index.nbytes + df.memory_usage(index=True) + df.index.values.nbytes + + mem = df.memory_usage(deep=True).sum() + assert mem > 0 + + +@pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result") +def test_info_memory_usage_deep_not_pypy(): + df_with_object_index = DataFrame({"a": [1]}, index=["foo"]) + assert ( + df_with_object_index.memory_usage(index=True, deep=True).sum() + > df_with_object_index.memory_usage(index=True).sum() + ) + + df_object = DataFrame({"a": ["a"]}) + assert df_object.memory_usage(deep=True).sum() > df_object.memory_usage().sum() + + +@pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result") +def test_info_memory_usage_deep_pypy(): + df_with_object_index = DataFrame({"a": [1]}, index=["foo"]) + assert ( + df_with_object_index.memory_usage(index=True, deep=True).sum() + == df_with_object_index.memory_usage(index=True).sum() + ) + + df_object = DataFrame({"a": ["a"]}) + assert df_object.memory_usage(deep=True).sum() == df_object.memory_usage().sum() + + +@pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design") +def test_usage_via_getsizeof(): + df = DataFrame( + data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"] + ) + mem = df.memory_usage(deep=True).sum() + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = mem - sys.getsizeof(df) + assert abs(diff) < 100 + + +def test_info_memory_usage_qualified(): + buf = StringIO() + df = DataFrame(1, columns=list("ab"), index=[1, 2, 3]) + df.info(buf=buf) + assert "+" not in buf.getvalue() + + buf = StringIO() + df = DataFrame(1, columns=list("ab"), index=list("ABC")) + df.info(buf=buf) + assert "+" in buf.getvalue() + + buf = StringIO() + df = DataFrame( + 1, columns=list("ab"), index=MultiIndex.from_product([range(3), range(3)]) + ) + df.info(buf=buf) + assert "+" not in buf.getvalue() + + buf = StringIO() + df = DataFrame( + 1, columns=list("ab"), index=MultiIndex.from_product([range(3), ["foo", "bar"]]) + ) + df.info(buf=buf) + assert "+" in buf.getvalue() + + +def test_info_memory_usage_bug_on_multiindex(): + # GH 14308 + # memory usage introspection should not materialize .values + + def memory_usage(f): + return f.memory_usage(deep=True).sum() + + N = 100 + M = len(uppercase) + index = MultiIndex.from_product( + [list(uppercase), date_range("20160101", periods=N)], + names=["id", "date"], + ) + df = DataFrame({"value": np.random.randn(N * M)}, index=index) + + unstacked = df.unstack("id") + assert df.values.nbytes == unstacked.values.nbytes + assert memory_usage(df) > memory_usage(unstacked) + + # high upper bound + assert memory_usage(unstacked) - memory_usage(df) < 2000 + + +def test_info_categorical(): + # GH14298 + idx = CategoricalIndex(["a", "b"]) + df = DataFrame(np.zeros((2, 2)), index=idx, columns=idx) + + buf = StringIO() + df.info(buf=buf) + + +@pytest.mark.xfail(not IS64, reason="GH 36579: fail on 32-bit system") +def test_info_int_columns(): + # GH#37245 + df = DataFrame({1: [1, 2], 2: [2, 3]}, index=["A", "B"]) + buf = StringIO() + df.info(show_counts=True, buf=buf) + result = buf.getvalue() + expected = textwrap.dedent( + """\ + + Index: 2 entries, A to B + Data columns (total 2 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 1 2 non-null int64 + 1 2 2 non-null int64 + dtypes: int64(2) + memory usage: 48.0+ bytes + """ + ) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_printing.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_printing.py new file mode 100644 index 0000000..4fc8a46 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_printing.py @@ -0,0 +1,200 @@ +import numpy as np +import pytest + +import pandas._config.config as cf + +import pandas as pd + +import pandas.io.formats.format as fmt +import pandas.io.formats.printing as printing + + +def test_adjoin(): + data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]] + expected = "a dd ggg\nb ee hhh\nc ff iii" + + adjoined = printing.adjoin(2, *data) + + assert adjoined == expected + + +def test_repr_binary_type(): + import string + + letters = string.ascii_letters + try: + raw = bytes(letters, encoding=cf.get_option("display.encoding")) + except TypeError: + raw = bytes(letters) + b = str(raw.decode("utf-8")) + res = printing.pprint_thing(b, quote_strings=True) + assert res == repr(b) + res = printing.pprint_thing(b, quote_strings=False) + assert res == b + + +class TestFormattBase: + def test_adjoin(self): + data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]] + expected = "a dd ggg\nb ee hhh\nc ff iii" + + adjoined = printing.adjoin(2, *data) + + assert adjoined == expected + + def test_adjoin_unicode(self): + data = [["あ", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "hhh", "いいい"]] + expected = "あ dd ggg\nb ええ hhh\nc ff いいい" + adjoined = printing.adjoin(2, *data) + assert adjoined == expected + + adj = fmt.EastAsianTextAdjustment() + + expected = """あ dd ggg +b ええ hhh +c ff いいい""" + + adjoined = adj.adjoin(2, *data) + assert adjoined == expected + cols = adjoined.split("\n") + assert adj.len(cols[0]) == 13 + assert adj.len(cols[1]) == 13 + assert adj.len(cols[2]) == 16 + + expected = """あ dd ggg +b ええ hhh +c ff いいい""" + + adjoined = adj.adjoin(7, *data) + assert adjoined == expected + cols = adjoined.split("\n") + assert adj.len(cols[0]) == 23 + assert adj.len(cols[1]) == 23 + assert adj.len(cols[2]) == 26 + + def test_justify(self): + adj = fmt.EastAsianTextAdjustment() + + def just(x, *args, **kwargs): + # wrapper to test single str + return adj.justify([x], *args, **kwargs)[0] + + assert just("abc", 5, mode="left") == "abc " + assert just("abc", 5, mode="center") == " abc " + assert just("abc", 5, mode="right") == " abc" + assert just("abc", 5, mode="left") == "abc " + assert just("abc", 5, mode="center") == " abc " + assert just("abc", 5, mode="right") == " abc" + + assert just("パンダ", 5, mode="left") == "パンダ" + assert just("パンダ", 5, mode="center") == "パンダ" + assert just("パンダ", 5, mode="right") == "パンダ" + + assert just("パンダ", 10, mode="left") == "パンダ " + assert just("パンダ", 10, mode="center") == " パンダ " + assert just("パンダ", 10, mode="right") == " パンダ" + + def test_east_asian_len(self): + adj = fmt.EastAsianTextAdjustment() + + assert adj.len("abc") == 3 + assert adj.len("abc") == 3 + + assert adj.len("パンダ") == 6 + assert adj.len("パンダ") == 5 + assert adj.len("パンダpanda") == 11 + assert adj.len("パンダpanda") == 10 + + def test_ambiguous_width(self): + adj = fmt.EastAsianTextAdjustment() + assert adj.len("¡¡ab") == 4 + + with cf.option_context("display.unicode.ambiguous_as_wide", True): + adj = fmt.EastAsianTextAdjustment() + assert adj.len("¡¡ab") == 6 + + data = [["あ", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "¡¡ab", "いいい"]] + expected = "あ dd ggg \nb ええ ¡¡ab\nc ff いいい" + adjoined = adj.adjoin(2, *data) + assert adjoined == expected + + +class TestTableSchemaRepr: + @pytest.mark.filterwarnings( + "ignore:.*signature may therefore change.*:FutureWarning" + ) + def test_publishes(self, ip): + ipython = ip.instance(config=ip.config) + df = pd.DataFrame({"A": [1, 2]}) + objects = [df["A"], df, df] # dataframe / series + expected_keys = [ + {"text/plain", "application/vnd.dataresource+json"}, + {"text/plain", "text/html", "application/vnd.dataresource+json"}, + ] + + opt = pd.option_context("display.html.table_schema", True) + for obj, expected in zip(objects, expected_keys): + with opt: + formatted = ipython.display_formatter.format(obj) + assert set(formatted[0].keys()) == expected + + with_latex = pd.option_context("display.latex.repr", True) + + with opt, with_latex: + formatted = ipython.display_formatter.format(obj) + + expected = { + "text/plain", + "text/html", + "text/latex", + "application/vnd.dataresource+json", + } + assert set(formatted[0].keys()) == expected + + def test_publishes_not_implemented(self, ip): + # column MultiIndex + # GH 15996 + midx = pd.MultiIndex.from_product([["A", "B"], ["a", "b", "c"]]) + df = pd.DataFrame(np.random.randn(5, len(midx)), columns=midx) + + opt = pd.option_context("display.html.table_schema", True) + + with opt: + formatted = ip.instance(config=ip.config).display_formatter.format(df) + + expected = {"text/plain", "text/html"} + assert set(formatted[0].keys()) == expected + + def test_config_on(self): + df = pd.DataFrame({"A": [1, 2]}) + with pd.option_context("display.html.table_schema", True): + result = df._repr_data_resource_() + + assert result is not None + + def test_config_default_off(self): + df = pd.DataFrame({"A": [1, 2]}) + with pd.option_context("display.html.table_schema", False): + result = df._repr_data_resource_() + + assert result is None + + def test_enable_data_resource_formatter(self, ip): + # GH 10491 + formatters = ip.instance(config=ip.config).display_formatter.formatters + mimetype = "application/vnd.dataresource+json" + + with pd.option_context("display.html.table_schema", True): + assert "application/vnd.dataresource+json" in formatters + assert formatters[mimetype].enabled + + # still there, just disabled + assert "application/vnd.dataresource+json" in formatters + assert not formatters[mimetype].enabled + + # able to re-set + with pd.option_context("display.html.table_schema", True): + assert "application/vnd.dataresource+json" in formatters + assert formatters[mimetype].enabled + # smoke test that it works + ip.instance(config=ip.config).display_formatter.format(cf) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_series_info.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_series_info.py new file mode 100644 index 0000000..fc83b6c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_series_info.py @@ -0,0 +1,179 @@ +from io import StringIO +from string import ascii_uppercase as uppercase +import textwrap + +import numpy as np +import pytest + +from pandas.compat import PYPY + +from pandas import ( + CategoricalIndex, + MultiIndex, + Series, + date_range, +) + + +def test_info_categorical_column_just_works(): + n = 2500 + data = np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n)) + s = Series(data).astype("category") + s.isna() + buf = StringIO() + s.info(buf=buf) + + s2 = s[s == "d"] + buf = StringIO() + s2.info(buf=buf) + + +def test_info_categorical(): + # GH14298 + idx = CategoricalIndex(["a", "b"]) + s = Series(np.zeros(2), index=idx) + buf = StringIO() + s.info(buf=buf) + + +@pytest.mark.parametrize("verbose", [True, False]) +def test_info_series(lexsorted_two_level_string_multiindex, verbose): + index = lexsorted_two_level_string_multiindex + ser = Series(range(len(index)), index=index, name="sth") + buf = StringIO() + ser.info(verbose=verbose, buf=buf) + result = buf.getvalue() + + expected = textwrap.dedent( + """\ + + MultiIndex: 10 entries, ('foo', 'one') to ('qux', 'three') + """ + ) + if verbose: + expected += textwrap.dedent( + """\ + Series name: sth + Non-Null Count Dtype + -------------- ----- + 10 non-null int64 + """ + ) + expected += textwrap.dedent( + f"""\ + dtypes: int64(1) + memory usage: {ser.memory_usage()}.0+ bytes + """ + ) + assert result == expected + + +def test_info_memory(): + s = Series([1, 2], dtype="i8") + buf = StringIO() + s.info(buf=buf) + result = buf.getvalue() + memory_bytes = float(s.memory_usage()) + expected = textwrap.dedent( + f"""\ + + RangeIndex: 2 entries, 0 to 1 + Series name: None + Non-Null Count Dtype + -------------- ----- + 2 non-null int64 + dtypes: int64(1) + memory usage: {memory_bytes} bytes + """ + ) + assert result == expected + + +def test_info_wide(): + s = Series(np.random.randn(101)) + msg = "Argument `max_cols` can only be passed in DataFrame.info, not Series.info" + with pytest.raises(ValueError, match=msg): + s.info(max_cols=1) + + +def test_info_shows_dtypes(): + dtypes = [ + "int64", + "float64", + "datetime64[ns]", + "timedelta64[ns]", + "complex128", + "object", + "bool", + ] + n = 10 + for dtype in dtypes: + s = Series(np.random.randint(2, size=n).astype(dtype)) + buf = StringIO() + s.info(buf=buf) + res = buf.getvalue() + name = f"{n:d} non-null {dtype}" + assert name in res + + +@pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result") +def test_info_memory_usage_deep_not_pypy(): + s_with_object_index = Series({"a": [1]}, index=["foo"]) + assert s_with_object_index.memory_usage( + index=True, deep=True + ) > s_with_object_index.memory_usage(index=True) + + s_object = Series({"a": ["a"]}) + assert s_object.memory_usage(deep=True) > s_object.memory_usage() + + +@pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result") +def test_info_memory_usage_deep_pypy(): + s_with_object_index = Series({"a": [1]}, index=["foo"]) + assert s_with_object_index.memory_usage( + index=True, deep=True + ) == s_with_object_index.memory_usage(index=True) + + s_object = Series({"a": ["a"]}) + assert s_object.memory_usage(deep=True) == s_object.memory_usage() + + +@pytest.mark.parametrize( + "series, plus", + [ + (Series(1, index=[1, 2, 3]), False), + (Series(1, index=list("ABC")), True), + (Series(1, index=MultiIndex.from_product([range(3), range(3)])), False), + ( + Series(1, index=MultiIndex.from_product([range(3), ["foo", "bar"]])), + True, + ), + ], +) +def test_info_memory_usage_qualified(series, plus): + buf = StringIO() + series.info(buf=buf) + if plus: + assert "+" in buf.getvalue() + else: + assert "+" not in buf.getvalue() + + +def test_info_memory_usage_bug_on_multiindex(): + # GH 14308 + # memory usage introspection should not materialize .values + N = 100 + M = len(uppercase) + index = MultiIndex.from_product( + [list(uppercase), date_range("20160101", periods=N)], + names=["id", "date"], + ) + s = Series(np.random.randn(N * M), index=index) + + unstacked = s.unstack("id") + assert s.values.nbytes == unstacked.values.nbytes + assert s.memory_usage(deep=True) > unstacked.memory_usage(deep=True).sum() + + # high upper bound + diff = unstacked.memory_usage(deep=True).sum() - s.memory_usage(deep=True) + assert diff < 2000 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_csv.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_csv.py new file mode 100644 index 0000000..e039ff2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_csv.py @@ -0,0 +1,717 @@ +import io +import os +from pathlib import Path +import sys +from zipfile import ZipFile + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + compat, +) +import pandas._testing as tm + +import pandas.io.common as icom + + +class TestToCSV: + def test_to_csv_with_single_column(self): + # see gh-18676, https://bugs.python.org/issue32255 + # + # Python's CSV library adds an extraneous '""' + # before the newline when the NaN-value is in + # the first row. Otherwise, only the newline + # character is added. This behavior is inconsistent + # and was patched in https://bugs.python.org/pull_request4672. + df1 = DataFrame([None, 1]) + expected1 = """\ +"" +1.0 +""" + with tm.ensure_clean("test.csv") as path: + df1.to_csv(path, header=None, index=None) + with open(path) as f: + assert f.read() == expected1 + + df2 = DataFrame([1, None]) + expected2 = """\ +1.0 +"" +""" + with tm.ensure_clean("test.csv") as path: + df2.to_csv(path, header=None, index=None) + with open(path) as f: + assert f.read() == expected2 + + def test_to_csv_defualt_encoding(self): + # GH17097 + df = DataFrame({"col": ["AAAAA", "ÄÄÄÄÄ", "ßßßßß", "聞聞聞聞聞"]}) + + with tm.ensure_clean("test.csv") as path: + # the default to_csv encoding is uft-8. + df.to_csv(path) + tm.assert_frame_equal(pd.read_csv(path, index_col=0), df) + + def test_to_csv_quotechar(self): + df = DataFrame({"col": [1, 2]}) + expected = """\ +"","col" +"0","1" +"1","2" +""" + + with tm.ensure_clean("test.csv") as path: + df.to_csv(path, quoting=1) # 1=QUOTE_ALL + with open(path) as f: + assert f.read() == expected + + expected = """\ +$$,$col$ +$0$,$1$ +$1$,$2$ +""" + + with tm.ensure_clean("test.csv") as path: + df.to_csv(path, quoting=1, quotechar="$") + with open(path) as f: + assert f.read() == expected + + with tm.ensure_clean("test.csv") as path: + with pytest.raises(TypeError, match="quotechar"): + df.to_csv(path, quoting=1, quotechar=None) + + def test_to_csv_doublequote(self): + df = DataFrame({"col": ['a"a', '"bb"']}) + expected = '''\ +"","col" +"0","a""a" +"1","""bb""" +''' + + with tm.ensure_clean("test.csv") as path: + df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL + with open(path) as f: + assert f.read() == expected + + from _csv import Error + + with tm.ensure_clean("test.csv") as path: + with pytest.raises(Error, match="escapechar"): + df.to_csv(path, doublequote=False) # no escapechar set + + def test_to_csv_escapechar(self): + df = DataFrame({"col": ['a"a', '"bb"']}) + expected = """\ +"","col" +"0","a\\"a" +"1","\\"bb\\"" +""" + + with tm.ensure_clean("test.csv") as path: # QUOTE_ALL + df.to_csv(path, quoting=1, doublequote=False, escapechar="\\") + with open(path) as f: + assert f.read() == expected + + df = DataFrame({"col": ["a,a", ",bb,"]}) + expected = """\ +,col +0,a\\,a +1,\\,bb\\, +""" + + with tm.ensure_clean("test.csv") as path: + df.to_csv(path, quoting=3, escapechar="\\") # QUOTE_NONE + with open(path) as f: + assert f.read() == expected + + def test_csv_to_string(self): + df = DataFrame({"col": [1, 2]}) + expected_rows = [",col", "0,1", "1,2"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv() == expected + + def test_to_csv_decimal(self): + # see gh-781 + df = DataFrame({"col1": [1], "col2": ["a"], "col3": [10.1]}) + + expected_rows = [",col1,col2,col3", "0,1,a,10.1"] + expected_default = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv() == expected_default + + expected_rows = [";col1;col2;col3", "0;1;a;10,1"] + expected_european_excel = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv(decimal=",", sep=";") == expected_european_excel + + expected_rows = [",col1,col2,col3", "0,1,a,10.10"] + expected_float_format_default = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv(float_format="%.2f") == expected_float_format_default + + expected_rows = [";col1;col2;col3", "0;1;a;10,10"] + expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows) + assert ( + df.to_csv(decimal=",", sep=";", float_format="%.2f") + == expected_float_format + ) + + # see gh-11553: testing if decimal is taken into account for '0.0' + df = DataFrame({"a": [0, 1.1], "b": [2.2, 3.3], "c": 1}) + + expected_rows = ["a,b,c", "0^0,2^2,1", "1^1,3^3,1"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv(index=False, decimal="^") == expected + + # same but for an index + assert df.set_index("a").to_csv(decimal="^") == expected + + # same for a multi-index + assert df.set_index(["a", "b"]).to_csv(decimal="^") == expected + + def test_to_csv_float_format(self): + # testing if float_format is taken into account for the index + # GH 11553 + df = DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1}) + + expected_rows = ["a,b,c", "0,2.20,1", "1,3.30,1"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.set_index("a").to_csv(float_format="%.2f") == expected + + # same for a multi-index + assert df.set_index(["a", "b"]).to_csv(float_format="%.2f") == expected + + def test_to_csv_na_rep(self): + # see gh-11553 + # + # Testing if NaN values are correctly represented in the index. + df = DataFrame({"a": [0, np.NaN], "b": [0, 1], "c": [2, 3]}) + expected_rows = ["a,b,c", "0.0,0,2", "_,1,3"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + assert df.set_index("a").to_csv(na_rep="_") == expected + assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected + + # now with an index containing only NaNs + df = DataFrame({"a": np.NaN, "b": [0, 1], "c": [2, 3]}) + expected_rows = ["a,b,c", "_,0,2", "_,1,3"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + assert df.set_index("a").to_csv(na_rep="_") == expected + assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected + + # check if na_rep parameter does not break anything when no NaN + df = DataFrame({"a": 0, "b": [0, 1], "c": [2, 3]}) + expected_rows = ["a,b,c", "0,0,2", "0,1,3"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + assert df.set_index("a").to_csv(na_rep="_") == expected + assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected + + csv = pd.Series(["a", pd.NA, "c"]).to_csv(na_rep="ZZZZZ") + expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"]) + assert expected == csv + + def test_to_csv_na_rep_nullable_string(self, nullable_string_dtype): + # GH 29975 + # Make sure full na_rep shows up when a dtype is provided + expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"]) + csv = pd.Series(["a", pd.NA, "c"], dtype=nullable_string_dtype).to_csv( + na_rep="ZZZZZ" + ) + assert expected == csv + + def test_to_csv_date_format(self): + # GH 10209 + df_sec = DataFrame({"A": pd.date_range("20130101", periods=5, freq="s")}) + df_day = DataFrame({"A": pd.date_range("20130101", periods=5, freq="d")}) + + expected_rows = [ + ",A", + "0,2013-01-01 00:00:00", + "1,2013-01-01 00:00:01", + "2,2013-01-01 00:00:02", + "3,2013-01-01 00:00:03", + "4,2013-01-01 00:00:04", + ] + expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows) + assert df_sec.to_csv() == expected_default_sec + + expected_rows = [ + ",A", + "0,2013-01-01 00:00:00", + "1,2013-01-02 00:00:00", + "2,2013-01-03 00:00:00", + "3,2013-01-04 00:00:00", + "4,2013-01-05 00:00:00", + ] + expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows) + assert df_day.to_csv(date_format="%Y-%m-%d %H:%M:%S") == expected_ymdhms_day + + expected_rows = [ + ",A", + "0,2013-01-01", + "1,2013-01-01", + "2,2013-01-01", + "3,2013-01-01", + "4,2013-01-01", + ] + expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows) + assert df_sec.to_csv(date_format="%Y-%m-%d") == expected_ymd_sec + + expected_rows = [ + ",A", + "0,2013-01-01", + "1,2013-01-02", + "2,2013-01-03", + "3,2013-01-04", + "4,2013-01-05", + ] + expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows) + assert df_day.to_csv() == expected_default_day + assert df_day.to_csv(date_format="%Y-%m-%d") == expected_default_day + + # see gh-7791 + # + # Testing if date_format parameter is taken into account + # for multi-indexed DataFrames. + df_sec["B"] = 0 + df_sec["C"] = 1 + + expected_rows = ["A,B,C", "2013-01-01,0,1.0"] + expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows) + + df_sec_grouped = df_sec.groupby([pd.Grouper(key="A", freq="1h"), "B"]) + assert df_sec_grouped.mean().to_csv(date_format="%Y-%m-%d") == expected_ymd_sec + + def test_to_csv_different_datetime_formats(self): + # GH#21734 + df = DataFrame( + { + "date": pd.to_datetime("1970-01-01"), + "datetime": pd.date_range("1970-01-01", periods=2, freq="H"), + } + ) + expected_rows = [ + "date,datetime", + "1970-01-01,1970-01-01 00:00:00", + "1970-01-01,1970-01-01 01:00:00", + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv(index=False) == expected + + def test_to_csv_date_format_in_categorical(self): + # GH#40754 + ser = pd.Series(pd.to_datetime(["2021-03-27", pd.NaT], format="%Y-%m-%d")) + ser = ser.astype("category") + expected = tm.convert_rows_list_to_csv_str(["0", "2021-03-27", '""']) + assert ser.to_csv(index=False) == expected + + ser = pd.Series( + pd.date_range( + start="2021-03-27", freq="D", periods=1, tz="Europe/Berlin" + ).append(pd.DatetimeIndex([pd.NaT])) + ) + ser = ser.astype("category") + assert ser.to_csv(index=False, date_format="%Y-%m-%d") == expected + + def test_to_csv_multi_index(self): + # see gh-6618 + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]])) + + exp_rows = [",1", ",2", "0,1"] + exp = tm.convert_rows_list_to_csv_str(exp_rows) + assert df.to_csv() == exp + + exp_rows = ["1", "2", "1"] + exp = tm.convert_rows_list_to_csv_str(exp_rows) + assert df.to_csv(index=False) == exp + + df = DataFrame( + [1], + columns=pd.MultiIndex.from_arrays([[1], [2]]), + index=pd.MultiIndex.from_arrays([[1], [2]]), + ) + + exp_rows = [",,1", ",,2", "1,2,1"] + exp = tm.convert_rows_list_to_csv_str(exp_rows) + assert df.to_csv() == exp + + exp_rows = ["1", "2", "1"] + exp = tm.convert_rows_list_to_csv_str(exp_rows) + assert df.to_csv(index=False) == exp + + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([["foo"], ["bar"]])) + + exp_rows = [",foo", ",bar", "0,1"] + exp = tm.convert_rows_list_to_csv_str(exp_rows) + assert df.to_csv() == exp + + exp_rows = ["foo", "bar", "1"] + exp = tm.convert_rows_list_to_csv_str(exp_rows) + assert df.to_csv(index=False) == exp + + @pytest.mark.parametrize( + "ind,expected", + [ + ( + pd.MultiIndex(levels=[[1.0]], codes=[[0]], names=["x"]), + "x,data\n1.0,1\n", + ), + ( + pd.MultiIndex( + levels=[[1.0], [2.0]], codes=[[0], [0]], names=["x", "y"] + ), + "x,y,data\n1.0,2.0,1\n", + ), + ], + ) + @pytest.mark.parametrize("klass", [DataFrame, pd.Series]) + def test_to_csv_single_level_multi_index(self, ind, expected, klass): + # see gh-19589 + result = klass(pd.Series([1], ind, name="data")).to_csv( + line_terminator="\n", header=True + ) + assert result == expected + + def test_to_csv_string_array_ascii(self): + # GH 10813 + str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}] + df = DataFrame(str_array) + expected_ascii = """\ +,names +0,"['foo', 'bar']" +1,"['baz', 'qux']" +""" + with tm.ensure_clean("str_test.csv") as path: + df.to_csv(path, encoding="ascii") + with open(path) as f: + assert f.read() == expected_ascii + + def test_to_csv_string_array_utf8(self): + # GH 10813 + str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}] + df = DataFrame(str_array) + expected_utf8 = """\ +,names +0,"['foo', 'bar']" +1,"['baz', 'qux']" +""" + with tm.ensure_clean("unicode_test.csv") as path: + df.to_csv(path, encoding="utf-8") + with open(path) as f: + assert f.read() == expected_utf8 + + def test_to_csv_string_with_lf(self): + # GH 20353 + data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]} + df = DataFrame(data) + with tm.ensure_clean("lf_test.csv") as path: + # case 1: The default line terminator(=os.linesep)(PR 21406) + os_linesep = os.linesep.encode("utf-8") + expected_noarg = ( + b"int,str_lf" + + os_linesep + + b"1,abc" + + os_linesep + + b'2,"d\nef"' + + os_linesep + + b'3,"g\nh\n\ni"' + + os_linesep + ) + df.to_csv(path, index=False) + with open(path, "rb") as f: + assert f.read() == expected_noarg + with tm.ensure_clean("lf_test.csv") as path: + # case 2: LF as line terminator + expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n' + df.to_csv(path, line_terminator="\n", index=False) + with open(path, "rb") as f: + assert f.read() == expected_lf + with tm.ensure_clean("lf_test.csv") as path: + # case 3: CRLF as line terminator + # 'line_terminator' should not change inner element + expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n' + df.to_csv(path, line_terminator="\r\n", index=False) + with open(path, "rb") as f: + assert f.read() == expected_crlf + + def test_to_csv_string_with_crlf(self): + # GH 20353 + data = {"int": [1, 2, 3], "str_crlf": ["abc", "d\r\nef", "g\r\nh\r\n\r\ni"]} + df = DataFrame(data) + with tm.ensure_clean("crlf_test.csv") as path: + # case 1: The default line terminator(=os.linesep)(PR 21406) + os_linesep = os.linesep.encode("utf-8") + expected_noarg = ( + b"int,str_crlf" + + os_linesep + + b"1,abc" + + os_linesep + + b'2,"d\r\nef"' + + os_linesep + + b'3,"g\r\nh\r\n\r\ni"' + + os_linesep + ) + df.to_csv(path, index=False) + with open(path, "rb") as f: + assert f.read() == expected_noarg + with tm.ensure_clean("crlf_test.csv") as path: + # case 2: LF as line terminator + expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n' + df.to_csv(path, line_terminator="\n", index=False) + with open(path, "rb") as f: + assert f.read() == expected_lf + with tm.ensure_clean("crlf_test.csv") as path: + # case 3: CRLF as line terminator + # 'line_terminator' should not change inner element + expected_crlf = ( + b"int,str_crlf\r\n" + b"1,abc\r\n" + b'2,"d\r\nef"\r\n' + b'3,"g\r\nh\r\n\r\ni"\r\n' + ) + df.to_csv(path, line_terminator="\r\n", index=False) + with open(path, "rb") as f: + assert f.read() == expected_crlf + + def test_to_csv_stdout_file(self, capsys): + # GH 21561 + df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["name_1", "name_2"]) + expected_rows = [",name_1,name_2", "0,foo,bar", "1,baz,qux"] + expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows) + + df.to_csv(sys.stdout, encoding="ascii") + captured = capsys.readouterr() + + assert captured.out == expected_ascii + assert not sys.stdout.closed + + @pytest.mark.xfail( + compat.is_platform_windows(), + reason=( + "Especially in Windows, file stream should not be passed" + "to csv writer without newline='' option." + "(https://docs.python.org/3.6/library/csv.html#csv.writer)" + ), + ) + def test_to_csv_write_to_open_file(self): + # GH 21696 + df = DataFrame({"a": ["x", "y", "z"]}) + expected = """\ +manual header +x +y +z +""" + with tm.ensure_clean("test.txt") as path: + with open(path, "w") as f: + f.write("manual header\n") + df.to_csv(f, header=None, index=None) + with open(path) as f: + assert f.read() == expected + + def test_to_csv_write_to_open_file_with_newline_py3(self): + # see gh-21696 + # see gh-20353 + df = DataFrame({"a": ["x", "y", "z"]}) + expected_rows = ["x", "y", "z"] + expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows) + with tm.ensure_clean("test.txt") as path: + with open(path, "w", newline="") as f: + f.write("manual header\n") + df.to_csv(f, header=None, index=None) + + with open(path, "rb") as f: + assert f.read() == bytes(expected, "utf-8") + + @pytest.mark.parametrize("to_infer", [True, False]) + @pytest.mark.parametrize("read_infer", [True, False]) + def test_to_csv_compression(self, compression_only, read_infer, to_infer): + # see gh-15008 + compression = compression_only + + # We'll complete file extension subsequently. + filename = "test." + filename += icom._compression_to_extension[compression] + + df = DataFrame({"A": [1]}) + + to_compression = "infer" if to_infer else compression + read_compression = "infer" if read_infer else compression + + with tm.ensure_clean(filename) as path: + df.to_csv(path, compression=to_compression) + result = pd.read_csv(path, index_col=0, compression=read_compression) + tm.assert_frame_equal(result, df) + + def test_to_csv_compression_dict(self, compression_only): + # GH 26023 + method = compression_only + df = DataFrame({"ABC": [1]}) + filename = "to_csv_compress_as_dict." + extension = { + "gzip": "gz", + "zstd": "zst", + }.get(method, method) + filename += extension + with tm.ensure_clean(filename) as path: + df.to_csv(path, compression={"method": method}) + read_df = pd.read_csv(path, index_col=0) + tm.assert_frame_equal(read_df, df) + + def test_to_csv_compression_dict_no_method_raises(self): + # GH 26023 + df = DataFrame({"ABC": [1]}) + compression = {"some_option": True} + msg = "must have key 'method'" + + with tm.ensure_clean("out.zip") as path: + with pytest.raises(ValueError, match=msg): + df.to_csv(path, compression=compression) + + @pytest.mark.parametrize("compression", ["zip", "infer"]) + @pytest.mark.parametrize("archive_name", ["test_to_csv.csv", "test_to_csv.zip"]) + def test_to_csv_zip_arguments(self, compression, archive_name): + # GH 26023 + df = DataFrame({"ABC": [1]}) + with tm.ensure_clean("to_csv_archive_name.zip") as path: + df.to_csv( + path, compression={"method": compression, "archive_name": archive_name} + ) + with ZipFile(path) as zp: + assert len(zp.filelist) == 1 + archived_file = zp.filelist[0].filename + assert archived_file == archive_name + + @pytest.mark.parametrize( + "filename,expected_arcname", + [ + ("archive.csv", "archive.csv"), + ("archive.tsv", "archive.tsv"), + ("archive.csv.zip", "archive.csv"), + ("archive.tsv.zip", "archive.tsv"), + ("archive.zip", "archive"), + ], + ) + def test_to_csv_zip_infer_name(self, filename, expected_arcname): + # GH 39465 + df = DataFrame({"ABC": [1]}) + with tm.ensure_clean_dir() as dir: + path = Path(dir, filename) + df.to_csv(path, compression="zip") + with ZipFile(path) as zp: + assert len(zp.filelist) == 1 + archived_file = zp.filelist[0].filename + assert archived_file == expected_arcname + + @pytest.mark.parametrize("df_new_type", ["Int64"]) + def test_to_csv_na_rep_long_string(self, df_new_type): + # see gh-25099 + df = DataFrame({"c": [float("nan")] * 3}) + df = df.astype(df_new_type) + expected_rows = ["c", "mynull", "mynull", "mynull"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + result = df.to_csv(index=False, na_rep="mynull", encoding="ascii") + + assert expected == result + + def test_to_csv_timedelta_precision(self): + # GH 6783 + s = pd.Series([1, 1]).astype("timedelta64[ns]") + buf = io.StringIO() + s.to_csv(buf) + result = buf.getvalue() + expected_rows = [ + ",0", + "0,0 days 00:00:00.000000001", + "1,0 days 00:00:00.000000001", + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_na_rep_truncated(self): + # https://github.com/pandas-dev/pandas/issues/31447 + result = pd.Series(range(8, 12)).to_csv(na_rep="-") + expected = tm.convert_rows_list_to_csv_str([",0", "0,8", "1,9", "2,10", "3,11"]) + assert result == expected + + result = pd.Series([True, False]).to_csv(na_rep="nan") + expected = tm.convert_rows_list_to_csv_str([",0", "0,True", "1,False"]) + assert result == expected + + result = pd.Series([1.1, 2.2]).to_csv(na_rep=".") + expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"]) + assert result == expected + + @pytest.mark.parametrize("errors", ["surrogatepass", "ignore", "replace"]) + def test_to_csv_errors(self, errors): + # GH 22610 + data = ["\ud800foo"] + ser = pd.Series(data, index=pd.Index(data)) + with tm.ensure_clean("test.csv") as path: + ser.to_csv(path, errors=errors) + # No use in reading back the data as it is not the same anymore + # due to the error handling + + @pytest.mark.parametrize("mode", ["wb", "w"]) + def test_to_csv_binary_handle(self, mode): + """ + Binary file objects should work (if 'mode' contains a 'b') or even without + it in most cases. + + GH 35058 and GH 19827 + """ + df = tm.makeDataFrame() + with tm.ensure_clean() as path: + with open(path, mode="w+b") as handle: + df.to_csv(handle, mode=mode) + tm.assert_frame_equal(df, pd.read_csv(path, index_col=0)) + + @pytest.mark.parametrize("mode", ["wb", "w"]) + def test_to_csv_encoding_binary_handle(self, mode): + """ + Binary file objects should honor a specified encoding. + + GH 23854 and GH 13068 with binary handles + """ + # example from GH 23854 + content = "a, b, 🐟".encode("utf-8-sig") + buffer = io.BytesIO(content) + df = pd.read_csv(buffer, encoding="utf-8-sig") + + buffer = io.BytesIO() + df.to_csv(buffer, mode=mode, encoding="utf-8-sig", index=False) + buffer.seek(0) # tests whether file handle wasn't closed + assert buffer.getvalue().startswith(content) + + # example from GH 13068 + with tm.ensure_clean() as path: + with open(path, "w+b") as handle: + DataFrame().to_csv(handle, mode=mode, encoding="utf-8-sig") + + handle.seek(0) + assert handle.read().startswith(b'\xef\xbb\xbf""') + + +def test_to_csv_iterative_compression_name(compression): + # GH 38714 + df = tm.makeDataFrame() + with tm.ensure_clean() as path: + df.to_csv(path, compression=compression, chunksize=1) + tm.assert_frame_equal( + pd.read_csv(path, compression=compression, index_col=0), df + ) + + +def test_to_csv_iterative_compression_buffer(compression): + # GH 38714 + df = tm.makeDataFrame() + with io.BytesIO() as buffer: + df.to_csv(buffer, compression=compression, chunksize=1) + buffer.seek(0) + tm.assert_frame_equal( + pd.read_csv(buffer, compression=compression, index_col=0), df + ) + assert not buffer.closed diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_excel.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_excel.py new file mode 100644 index 0000000..968ad63 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_excel.py @@ -0,0 +1,333 @@ +"""Tests formatting as writer-agnostic ExcelCells + +ExcelFormatter is tested implicitly in pandas/tests/io/excel +""" +import string + +import pytest + +import pandas.util._test_decorators as td + +import pandas._testing as tm + +from pandas.io.formats.css import CSSWarning +from pandas.io.formats.excel import CSSToExcelConverter + + +@pytest.mark.parametrize( + "css,expected", + [ + # FONT + # - name + ("font-family: foo,bar", {"font": {"name": "foo"}}), + ('font-family: "foo bar",baz', {"font": {"name": "foo bar"}}), + ("font-family: foo,\nbar", {"font": {"name": "foo"}}), + ("font-family: foo, bar, baz", {"font": {"name": "foo"}}), + ("font-family: bar, foo", {"font": {"name": "bar"}}), + ("font-family: 'foo bar', baz", {"font": {"name": "foo bar"}}), + ("font-family: 'foo \\'bar', baz", {"font": {"name": "foo 'bar"}}), + ('font-family: "foo \\"bar", baz', {"font": {"name": 'foo "bar'}}), + ('font-family: "foo ,bar", baz', {"font": {"name": "foo ,bar"}}), + # - family + ("font-family: serif", {"font": {"name": "serif", "family": 1}}), + ("font-family: Serif", {"font": {"name": "serif", "family": 1}}), + ("font-family: roman, serif", {"font": {"name": "roman", "family": 1}}), + ("font-family: roman, sans-serif", {"font": {"name": "roman", "family": 2}}), + ("font-family: roman, sans serif", {"font": {"name": "roman"}}), + ("font-family: roman, sansserif", {"font": {"name": "roman"}}), + ("font-family: roman, cursive", {"font": {"name": "roman", "family": 4}}), + ("font-family: roman, fantasy", {"font": {"name": "roman", "family": 5}}), + # - size + ("font-size: 1em", {"font": {"size": 12}}), + ("font-size: xx-small", {"font": {"size": 6}}), + ("font-size: x-small", {"font": {"size": 7.5}}), + ("font-size: small", {"font": {"size": 9.6}}), + ("font-size: medium", {"font": {"size": 12}}), + ("font-size: large", {"font": {"size": 13.5}}), + ("font-size: x-large", {"font": {"size": 18}}), + ("font-size: xx-large", {"font": {"size": 24}}), + ("font-size: 50%", {"font": {"size": 6}}), + # - bold + ("font-weight: 100", {"font": {"bold": False}}), + ("font-weight: 200", {"font": {"bold": False}}), + ("font-weight: 300", {"font": {"bold": False}}), + ("font-weight: 400", {"font": {"bold": False}}), + ("font-weight: normal", {"font": {"bold": False}}), + ("font-weight: lighter", {"font": {"bold": False}}), + ("font-weight: bold", {"font": {"bold": True}}), + ("font-weight: bolder", {"font": {"bold": True}}), + ("font-weight: 700", {"font": {"bold": True}}), + ("font-weight: 800", {"font": {"bold": True}}), + ("font-weight: 900", {"font": {"bold": True}}), + # - italic + ("font-style: italic", {"font": {"italic": True}}), + ("font-style: oblique", {"font": {"italic": True}}), + # - underline + ("text-decoration: underline", {"font": {"underline": "single"}}), + ("text-decoration: overline", {}), + ("text-decoration: none", {}), + # - strike + ("text-decoration: line-through", {"font": {"strike": True}}), + ( + "text-decoration: underline line-through", + {"font": {"strike": True, "underline": "single"}}, + ), + ( + "text-decoration: underline; text-decoration: line-through", + {"font": {"strike": True}}, + ), + # - color + ("color: red", {"font": {"color": "FF0000"}}), + ("color: #ff0000", {"font": {"color": "FF0000"}}), + ("color: #f0a", {"font": {"color": "FF00AA"}}), + # - shadow + ("text-shadow: none", {"font": {"shadow": False}}), + ("text-shadow: 0px -0em 0px #CCC", {"font": {"shadow": False}}), + ("text-shadow: 0px -0em 0px #999", {"font": {"shadow": False}}), + ("text-shadow: 0px -0em 0px", {"font": {"shadow": False}}), + ("text-shadow: 2px -0em 0px #CCC", {"font": {"shadow": True}}), + ("text-shadow: 0px -2em 0px #CCC", {"font": {"shadow": True}}), + ("text-shadow: 0px -0em 2px #CCC", {"font": {"shadow": True}}), + ("text-shadow: 0px -0em 2px", {"font": {"shadow": True}}), + ("text-shadow: 0px -2em", {"font": {"shadow": True}}), + # FILL + # - color, fillType + ( + "background-color: red", + {"fill": {"fgColor": "FF0000", "patternType": "solid"}}, + ), + ( + "background-color: #ff0000", + {"fill": {"fgColor": "FF0000", "patternType": "solid"}}, + ), + ( + "background-color: #f0a", + {"fill": {"fgColor": "FF00AA", "patternType": "solid"}}, + ), + # BORDER + # - style + ( + "border-style: solid", + { + "border": { + "top": {"style": "medium"}, + "bottom": {"style": "medium"}, + "left": {"style": "medium"}, + "right": {"style": "medium"}, + } + }, + ), + ( + "border-style: solid; border-width: thin", + { + "border": { + "top": {"style": "thin"}, + "bottom": {"style": "thin"}, + "left": {"style": "thin"}, + "right": {"style": "thin"}, + } + }, + ), + ( + "border-top-style: solid; border-top-width: thin", + {"border": {"top": {"style": "thin"}}}, + ), + ( + "border-top-style: solid; border-top-width: 1pt", + {"border": {"top": {"style": "thin"}}}, + ), + ("border-top-style: solid", {"border": {"top": {"style": "medium"}}}), + ( + "border-top-style: solid; border-top-width: medium", + {"border": {"top": {"style": "medium"}}}, + ), + ( + "border-top-style: solid; border-top-width: 2pt", + {"border": {"top": {"style": "medium"}}}, + ), + ( + "border-top-style: solid; border-top-width: thick", + {"border": {"top": {"style": "thick"}}}, + ), + ( + "border-top-style: solid; border-top-width: 4pt", + {"border": {"top": {"style": "thick"}}}, + ), + ( + "border-top-style: dotted", + {"border": {"top": {"style": "mediumDashDotDot"}}}, + ), + ( + "border-top-style: dotted; border-top-width: thin", + {"border": {"top": {"style": "dotted"}}}, + ), + ("border-top-style: dashed", {"border": {"top": {"style": "mediumDashed"}}}), + ( + "border-top-style: dashed; border-top-width: thin", + {"border": {"top": {"style": "dashed"}}}, + ), + ("border-top-style: double", {"border": {"top": {"style": "double"}}}), + # - color + ( + "border-style: solid; border-color: #0000ff", + { + "border": { + "top": {"style": "medium", "color": "0000FF"}, + "right": {"style": "medium", "color": "0000FF"}, + "bottom": {"style": "medium", "color": "0000FF"}, + "left": {"style": "medium", "color": "0000FF"}, + } + }, + ), + ( + "border-top-style: double; border-top-color: blue", + {"border": {"top": {"style": "double", "color": "0000FF"}}}, + ), + ( + "border-top-style: solid; border-top-color: #06c", + {"border": {"top": {"style": "medium", "color": "0066CC"}}}, + ), + # ALIGNMENT + # - horizontal + ("text-align: center", {"alignment": {"horizontal": "center"}}), + ("text-align: left", {"alignment": {"horizontal": "left"}}), + ("text-align: right", {"alignment": {"horizontal": "right"}}), + ("text-align: justify", {"alignment": {"horizontal": "justify"}}), + # - vertical + ("vertical-align: top", {"alignment": {"vertical": "top"}}), + ("vertical-align: text-top", {"alignment": {"vertical": "top"}}), + ("vertical-align: middle", {"alignment": {"vertical": "center"}}), + ("vertical-align: bottom", {"alignment": {"vertical": "bottom"}}), + ("vertical-align: text-bottom", {"alignment": {"vertical": "bottom"}}), + # - wrap_text + ("white-space: nowrap", {"alignment": {"wrap_text": False}}), + ("white-space: pre", {"alignment": {"wrap_text": False}}), + ("white-space: pre-line", {"alignment": {"wrap_text": False}}), + ("white-space: normal", {"alignment": {"wrap_text": True}}), + # NUMBER FORMAT + ("number-format: 0%", {"number_format": {"format_code": "0%"}}), + ], +) +def test_css_to_excel(css, expected): + convert = CSSToExcelConverter() + assert expected == convert(css) + + +def test_css_to_excel_multiple(): + convert = CSSToExcelConverter() + actual = convert( + """ + font-weight: bold; + text-decoration: underline; + color: red; + border-width: thin; + text-align: center; + vertical-align: top; + unused: something; + """ + ) + assert { + "font": {"bold": True, "underline": "single", "color": "FF0000"}, + "border": { + "top": {"style": "thin"}, + "right": {"style": "thin"}, + "bottom": {"style": "thin"}, + "left": {"style": "thin"}, + }, + "alignment": {"horizontal": "center", "vertical": "top"}, + } == actual + + +@pytest.mark.parametrize( + "css,inherited,expected", + [ + ("font-weight: bold", "", {"font": {"bold": True}}), + ("", "font-weight: bold", {"font": {"bold": True}}), + ( + "font-weight: bold", + "font-style: italic", + {"font": {"bold": True, "italic": True}}, + ), + ("font-style: normal", "font-style: italic", {"font": {"italic": False}}), + ("font-style: inherit", "", {}), + ( + "font-style: normal; font-style: inherit", + "font-style: italic", + {"font": {"italic": True}}, + ), + ], +) +def test_css_to_excel_inherited(css, inherited, expected): + convert = CSSToExcelConverter(inherited) + assert expected == convert(css) + + +@pytest.mark.parametrize( + "input_color,output_color", + ( + list(CSSToExcelConverter.NAMED_COLORS.items()) + + [("#" + rgb, rgb) for rgb in CSSToExcelConverter.NAMED_COLORS.values()] + + [("#F0F", "FF00FF"), ("#ABC", "AABBCC")] + ), +) +def test_css_to_excel_good_colors(input_color, output_color): + # see gh-18392 + css = ( + f"border-top-color: {input_color}; " + f"border-right-color: {input_color}; " + f"border-bottom-color: {input_color}; " + f"border-left-color: {input_color}; " + f"background-color: {input_color}; " + f"color: {input_color}" + ) + + expected = {} + + expected["fill"] = {"patternType": "solid", "fgColor": output_color} + + expected["font"] = {"color": output_color} + + expected["border"] = { + k: {"color": output_color} for k in ("top", "right", "bottom", "left") + } + + with tm.assert_produces_warning(None): + convert = CSSToExcelConverter() + assert expected == convert(css) + + +@pytest.mark.parametrize("input_color", [None, "not-a-color"]) +def test_css_to_excel_bad_colors(input_color): + # see gh-18392 + css = ( + f"border-top-color: {input_color}; " + f"border-right-color: {input_color}; " + f"border-bottom-color: {input_color}; " + f"border-left-color: {input_color}; " + f"background-color: {input_color}; " + f"color: {input_color}" + ) + + expected = {} + + if input_color is not None: + expected["fill"] = {"patternType": "solid"} + + with tm.assert_produces_warning(CSSWarning): + convert = CSSToExcelConverter() + assert expected == convert(css) + + +def tests_css_named_colors_valid(): + upper_hexs = set(map(str.upper, string.hexdigits)) + for color in CSSToExcelConverter.NAMED_COLORS.values(): + assert len(color) == 6 and all(c in upper_hexs for c in color) + + +@td.skip_if_no_mpl +def test_css_named_colors_from_mpl_present(): + from matplotlib.colors import CSS4_COLORS as mpl_colors + + pd_colors = CSSToExcelConverter.NAMED_COLORS + for name, color in mpl_colors.items(): + assert name in pd_colors and pd_colors[name] == color[1:] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_html.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_html.py new file mode 100644 index 0000000..aa8508d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_html.py @@ -0,0 +1,890 @@ +from datetime import datetime +from io import StringIO +import re + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + option_context, +) +import pandas._testing as tm + +import pandas.io.formats.format as fmt + +lorem_ipsum = ( + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod " + "tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim " + "veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex " + "ea commodo consequat. Duis aute irure dolor in reprehenderit in " + "voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur " + "sint occaecat cupidatat non proident, sunt in culpa qui officia " + "deserunt mollit anim id est laborum." +) + + +def expected_html(datapath, name): + """ + Read HTML file from formats data directory. + + Parameters + ---------- + datapath : pytest fixture + The datapath fixture injected into a test by pytest. + name : str + The name of the HTML file without the suffix. + + Returns + ------- + str : contents of HTML file. + """ + filename = ".".join([name, "html"]) + filepath = datapath("io", "formats", "data", "html", filename) + with open(filepath, encoding="utf-8") as f: + html = f.read() + return html.rstrip() + + +@pytest.fixture(params=["mixed", "empty"]) +def biggie_df_fixture(request): + """Fixture for a big mixed Dataframe and an empty Dataframe""" + if request.param == "mixed": + df = DataFrame( + {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, + index=np.arange(200), + ) + df.loc[:20, "A"] = np.nan + df.loc[:20, "B"] = np.nan + return df + elif request.param == "empty": + df = DataFrame(index=np.arange(200)) + return df + + +@pytest.fixture(params=fmt._VALID_JUSTIFY_PARAMETERS) +def justify(request): + return request.param + + +@pytest.mark.parametrize("col_space", [30, 50]) +def test_to_html_with_col_space(col_space): + df = DataFrame(np.random.random(size=(1, 3))) + # check that col_space affects HTML generation + # and be very brittle about it. + result = df.to_html(col_space=col_space) + hdrs = [x for x in result.split(r"\n") if re.search(r"\s]", x)] + assert len(hdrs) > 0 + for h in hdrs: + assert "min-width" in h + assert str(col_space) in h + + +def test_to_html_with_column_specific_col_space_raises(): + df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + + msg = ( + "Col_space length\\(\\d+\\) should match " + "DataFrame number of columns\\(\\d+\\)" + ) + with pytest.raises(ValueError, match=msg): + df.to_html(col_space=[30, 40]) + + with pytest.raises(ValueError, match=msg): + df.to_html(col_space=[30, 40, 50, 60]) + + msg = "unknown column" + with pytest.raises(ValueError, match=msg): + df.to_html(col_space={"a": "foo", "b": 23, "d": 34}) + + +def test_to_html_with_column_specific_col_space(): + df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + + result = df.to_html(col_space={"a": "2em", "b": 23}) + hdrs = [x for x in result.split("\n") if re.search(r"\s]", x)] + assert 'min-width: 2em;">a' in hdrs[1] + assert 'min-width: 23px;">b' in hdrs[2] + assert "" in hdrs[3] + + result = df.to_html(col_space=["1em", 2, 3]) + hdrs = [x for x in result.split("\n") if re.search(r"\s]", x)] + assert 'min-width: 1em;">a' in hdrs[1] + assert 'min-width: 2px;">b' in hdrs[2] + assert 'min-width: 3px;">c' in hdrs[3] + + +def test_to_html_with_empty_string_label(): + # GH 3547, to_html regards empty string labels as repeated labels + data = {"c1": ["a", "b"], "c2": ["a", ""], "data": [1, 2]} + df = DataFrame(data).set_index(["c1", "c2"]) + result = df.to_html() + assert "rowspan" not in result + + +@pytest.mark.parametrize( + "df,expected", + [ + (DataFrame({"\u03c3": np.arange(10.0)}), "unicode_1"), + (DataFrame({"A": ["\u03c3"]}), "unicode_2"), + ], +) +def test_to_html_unicode(df, expected, datapath): + expected = expected_html(datapath, expected) + result = df.to_html() + assert result == expected + + +def test_to_html_encoding(float_frame, tmp_path): + # GH 28663 + path = tmp_path / "test.html" + float_frame.to_html(path, encoding="gbk") + with open(str(path), encoding="gbk") as f: + assert float_frame.to_html() == f.read() + + +def test_to_html_decimal(datapath): + # GH 12031 + df = DataFrame({"A": [6.0, 3.1, 2.2]}) + result = df.to_html(decimal=",") + expected = expected_html(datapath, "gh12031_expected_output") + assert result == expected + + +@pytest.mark.parametrize( + "kwargs,string,expected", + [ + ({}, "", "escaped"), + ({"escape": False}, "bold", "escape_disabled"), + ], +) +def test_to_html_escaped(kwargs, string, expected, datapath): + a = "strl2": {a: string, b: string}} + result = DataFrame(test_dict).to_html(**kwargs) + expected = expected_html(datapath, expected) + assert result == expected + + +@pytest.mark.parametrize("index_is_named", [True, False]) +def test_to_html_multiindex_index_false(index_is_named, datapath): + # GH 8452 + df = DataFrame( + {"a": range(2), "b": range(3, 5), "c": range(5, 7), "d": range(3, 5)} + ) + df.columns = MultiIndex.from_product([["a", "b"], ["c", "d"]]) + if index_is_named: + df.index = Index(df.index.values, name="idx") + result = df.to_html(index=False) + expected = expected_html(datapath, "gh8452_expected_output") + assert result == expected + + +@pytest.mark.parametrize( + "multi_sparse,expected", + [ + (False, "multiindex_sparsify_false_multi_sparse_1"), + (False, "multiindex_sparsify_false_multi_sparse_2"), + (True, "multiindex_sparsify_1"), + (True, "multiindex_sparsify_2"), + ], +) +def test_to_html_multiindex_sparsify(multi_sparse, expected, datapath): + index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], names=["foo", None]) + df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index) + if expected.endswith("2"): + df.columns = index[::2] + with option_context("display.multi_sparse", multi_sparse): + result = df.to_html() + expected = expected_html(datapath, expected) + assert result == expected + + +@pytest.mark.parametrize( + "max_rows,expected", + [ + (60, "gh14882_expected_output_1"), + # Test that ... appears in a middle level + (56, "gh14882_expected_output_2"), + ], +) +def test_to_html_multiindex_odd_even_truncate(max_rows, expected, datapath): + # GH 14882 - Issue on truncation with odd length DataFrame + index = MultiIndex.from_product( + [[100, 200, 300], [10, 20, 30], [1, 2, 3, 4, 5, 6, 7]], names=["a", "b", "c"] + ) + df = DataFrame({"n": range(len(index))}, index=index) + result = df.to_html(max_rows=max_rows) + expected = expected_html(datapath, expected) + assert result == expected + + +@pytest.mark.parametrize( + "df,formatters,expected", + [ + ( + DataFrame( + [[0, 1], [2, 3], [4, 5], [6, 7]], + columns=["foo", None], + index=np.arange(4), + ), + {"__index__": lambda x: "abcd"[x]}, + "index_formatter", + ), + ( + DataFrame({"months": [datetime(2016, 1, 1), datetime(2016, 2, 2)]}), + {"months": lambda x: x.strftime("%Y-%m")}, + "datetime64_monthformatter", + ), + ( + DataFrame( + { + "hod": pd.to_datetime( + ["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f" + ) + } + ), + {"hod": lambda x: x.strftime("%H:%M")}, + "datetime64_hourformatter", + ), + ( + DataFrame( + { + "i": pd.Series([1, 2], dtype="int64"), + "f": pd.Series([1, 2], dtype="float64"), + "I": pd.Series([1, 2], dtype="Int64"), + "s": pd.Series([1, 2], dtype="string"), + "b": pd.Series([True, False], dtype="boolean"), + "c": pd.Series(["a", "b"], dtype=pd.CategoricalDtype(["a", "b"])), + "o": pd.Series([1, "2"], dtype=object), + } + ), + [lambda x: "formatted"] * 7, + "various_dtypes_formatted", + ), + ], +) +def test_to_html_formatters(df, formatters, expected, datapath): + expected = expected_html(datapath, expected) + result = df.to_html(formatters=formatters) + assert result == expected + + +def test_to_html_regression_GH6098(): + df = DataFrame( + { + "clé1": ["a", "a", "b", "b", "a"], + "clé2": ["1er", "2ème", "1er", "2ème", "1er"], + "données1": np.random.randn(5), + "données2": np.random.randn(5), + } + ) + + # it works + df.pivot_table(index=["clé1"], columns=["clé2"])._repr_html_() + + +def test_to_html_truncate(datapath): + index = pd.date_range(start="20010101", freq="D", periods=20) + df = DataFrame(index=index, columns=range(20)) + result = df.to_html(max_rows=8, max_cols=4) + expected = expected_html(datapath, "truncate") + assert result == expected + + +@pytest.mark.parametrize("size", [1, 5]) +def test_html_invalid_formatters_arg_raises(size): + # issue-28469 + df = DataFrame(columns=["a", "b", "c"]) + msg = "Formatters length({}) should match DataFrame number of columns(3)" + with pytest.raises(ValueError, match=re.escape(msg.format(size))): + df.to_html(formatters=["{}".format] * size) + + +def test_to_html_truncate_formatter(datapath): + # issue-25955 + data = [ + {"A": 1, "B": 2, "C": 3, "D": 4}, + {"A": 5, "B": 6, "C": 7, "D": 8}, + {"A": 9, "B": 10, "C": 11, "D": 12}, + {"A": 13, "B": 14, "C": 15, "D": 16}, + ] + + df = DataFrame(data) + fmt = lambda x: str(x) + "_mod" + formatters = [fmt, fmt, None, None] + result = df.to_html(formatters=formatters, max_cols=3) + expected = expected_html(datapath, "truncate_formatter") + assert result == expected + + +@pytest.mark.parametrize( + "sparsify,expected", + [(True, "truncate_multi_index"), (False, "truncate_multi_index_sparse_off")], +) +def test_to_html_truncate_multi_index(sparsify, expected, datapath): + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + df = DataFrame(index=arrays, columns=arrays) + result = df.to_html(max_rows=7, max_cols=7, sparsify=sparsify) + expected = expected_html(datapath, expected) + assert result == expected + + +@pytest.mark.parametrize( + "option,result,expected", + [ + (None, lambda df: df.to_html(), "1"), + (None, lambda df: df.to_html(border=0), "0"), + (0, lambda df: df.to_html(), "0"), + (0, lambda df: df._repr_html_(), "0"), + ], +) +def test_to_html_border(option, result, expected): + df = DataFrame({"A": [1, 2]}) + if option is None: + result = result(df) + else: + with option_context("display.html.border", option): + result = result(df) + expected = f'border="{expected}"' + assert expected in result + + +@pytest.mark.parametrize("biggie_df_fixture", ["mixed"], indirect=True) +def test_to_html(biggie_df_fixture): + # TODO: split this test + df = biggie_df_fixture + s = df.to_html() + + buf = StringIO() + retval = df.to_html(buf=buf) + assert retval is None + assert buf.getvalue() == s + + assert isinstance(s, str) + + df.to_html(columns=["B", "A"], col_space=17) + df.to_html(columns=["B", "A"], formatters={"A": lambda x: f"{x:.1f}"}) + + df.to_html(columns=["B", "A"], float_format=str) + df.to_html(columns=["B", "A"], col_space=12, float_format=str) + + +@pytest.mark.parametrize("biggie_df_fixture", ["empty"], indirect=True) +def test_to_html_empty_dataframe(biggie_df_fixture): + df = biggie_df_fixture + df.to_html() + + +def test_to_html_filename(biggie_df_fixture, tmpdir): + df = biggie_df_fixture + expected = df.to_html() + path = tmpdir.join("test.html") + df.to_html(path) + result = path.read() + assert result == expected + + +def test_to_html_with_no_bold(): + df = DataFrame({"x": np.random.randn(5)}) + html = df.to_html(bold_rows=False) + result = html[html.find("")] + assert "B" not in result + + +@pytest.mark.parametrize( + "columns,justify,expected", + [ + ( + MultiIndex.from_tuples( + list(zip(np.arange(2).repeat(2), np.mod(range(4), 2))), + names=["CL0", "CL1"], + ), + "left", + "multiindex_1", + ), + ( + MultiIndex.from_tuples(list(zip(range(4), np.mod(range(4), 2)))), + "right", + "multiindex_2", + ), + ], +) +def test_to_html_multiindex(columns, justify, expected, datapath): + df = DataFrame([list("abcd"), list("efgh")], columns=columns) + result = df.to_html(justify=justify) + expected = expected_html(datapath, expected) + assert result == expected + + +def test_to_html_justify(justify, datapath): + df = DataFrame( + {"A": [6, 30000, 2], "B": [1, 2, 70000], "C": [223442, 0, 1]}, + columns=["A", "B", "C"], + ) + result = df.to_html(justify=justify) + expected = expected_html(datapath, "justify").format(justify=justify) + assert result == expected + + +@pytest.mark.parametrize( + "justify", ["super-right", "small-left", "noinherit", "tiny", "pandas"] +) +def test_to_html_invalid_justify(justify): + # GH 17527 + df = DataFrame() + msg = "Invalid value for justify parameter" + + with pytest.raises(ValueError, match=msg): + df.to_html(justify=justify) + + +class TestHTMLIndex: + @pytest.fixture + def df(self): + index = ["foo", "bar", "baz"] + df = DataFrame( + {"A": [1, 2, 3], "B": [1.2, 3.4, 5.6], "C": ["one", "two", np.nan]}, + columns=["A", "B", "C"], + index=index, + ) + return df + + @pytest.fixture + def expected_without_index(self, datapath): + return expected_html(datapath, "index_2") + + def test_to_html_flat_index_without_name( + self, datapath, df, expected_without_index + ): + expected_with_index = expected_html(datapath, "index_1") + assert df.to_html() == expected_with_index + + result = df.to_html(index=False) + for i in df.index: + assert i not in result + assert result == expected_without_index + + def test_to_html_flat_index_with_name(self, datapath, df, expected_without_index): + df.index = Index(["foo", "bar", "baz"], name="idx") + expected_with_index = expected_html(datapath, "index_3") + assert df.to_html() == expected_with_index + assert df.to_html(index=False) == expected_without_index + + def test_to_html_multiindex_without_names( + self, datapath, df, expected_without_index + ): + tuples = [("foo", "car"), ("foo", "bike"), ("bar", "car")] + df.index = MultiIndex.from_tuples(tuples) + + expected_with_index = expected_html(datapath, "index_4") + assert df.to_html() == expected_with_index + + result = df.to_html(index=False) + for i in ["foo", "bar", "car", "bike"]: + assert i not in result + # must be the same result as normal index + assert result == expected_without_index + + def test_to_html_multiindex_with_names(self, datapath, df, expected_without_index): + tuples = [("foo", "car"), ("foo", "bike"), ("bar", "car")] + df.index = MultiIndex.from_tuples(tuples, names=["idx1", "idx2"]) + expected_with_index = expected_html(datapath, "index_5") + assert df.to_html() == expected_with_index + assert df.to_html(index=False) == expected_without_index + + +@pytest.mark.parametrize("classes", ["sortable draggable", ["sortable", "draggable"]]) +def test_to_html_with_classes(classes, datapath): + df = DataFrame() + expected = expected_html(datapath, "with_classes") + result = df.to_html(classes=classes) + assert result == expected + + +def test_to_html_no_index_max_rows(datapath): + # GH 14998 + df = DataFrame({"A": [1, 2, 3, 4]}) + result = df.to_html(index=False, max_rows=1) + expected = expected_html(datapath, "gh14998_expected_output") + assert result == expected + + +def test_to_html_multiindex_max_cols(datapath): + # GH 6131 + index = MultiIndex( + levels=[["ba", "bb", "bc"], ["ca", "cb", "cc"]], + codes=[[0, 1, 2], [0, 1, 2]], + names=["b", "c"], + ) + columns = MultiIndex( + levels=[["d"], ["aa", "ab", "ac"]], + codes=[[0, 0, 0], [0, 1, 2]], + names=[None, "a"], + ) + data = np.array( + [[1.0, np.nan, np.nan], [np.nan, 2.0, np.nan], [np.nan, np.nan, 3.0]] + ) + df = DataFrame(data, index, columns) + result = df.to_html(max_cols=2) + expected = expected_html(datapath, "gh6131_expected_output") + assert result == expected + + +def test_to_html_multi_indexes_index_false(datapath): + # GH 22579 + df = DataFrame( + {"a": range(10), "b": range(10, 20), "c": range(10, 20), "d": range(10, 20)} + ) + df.columns = MultiIndex.from_product([["a", "b"], ["c", "d"]]) + df.index = MultiIndex.from_product([["a", "b"], ["c", "d", "e", "f", "g"]]) + result = df.to_html(index=False) + expected = expected_html(datapath, "gh22579_expected_output") + assert result == expected + + +@pytest.mark.parametrize("index_names", [True, False]) +@pytest.mark.parametrize("header", [True, False]) +@pytest.mark.parametrize("index", [True, False]) +@pytest.mark.parametrize( + "column_index, column_type", + [ + (Index([0, 1]), "unnamed_standard"), + (Index([0, 1], name="columns.name"), "named_standard"), + (MultiIndex.from_product([["a"], ["b", "c"]]), "unnamed_multi"), + ( + MultiIndex.from_product( + [["a"], ["b", "c"]], names=["columns.name.0", "columns.name.1"] + ), + "named_multi", + ), + ], +) +@pytest.mark.parametrize( + "row_index, row_type", + [ + (Index([0, 1]), "unnamed_standard"), + (Index([0, 1], name="index.name"), "named_standard"), + (MultiIndex.from_product([["a"], ["b", "c"]]), "unnamed_multi"), + ( + MultiIndex.from_product( + [["a"], ["b", "c"]], names=["index.name.0", "index.name.1"] + ), + "named_multi", + ), + ], +) +def test_to_html_basic_alignment( + datapath, row_index, row_type, column_index, column_type, index, header, index_names +): + # GH 22747, GH 22579 + df = DataFrame(np.zeros((2, 2), dtype=int), index=row_index, columns=column_index) + result = df.to_html(index=index, header=header, index_names=index_names) + + if not index: + row_type = "none" + elif not index_names and row_type.startswith("named"): + row_type = "un" + row_type + + if not header: + column_type = "none" + elif not index_names and column_type.startswith("named"): + column_type = "un" + column_type + + filename = "index_" + row_type + "_columns_" + column_type + expected = expected_html(datapath, filename) + assert result == expected + + +@pytest.mark.parametrize("index_names", [True, False]) +@pytest.mark.parametrize("header", [True, False]) +@pytest.mark.parametrize("index", [True, False]) +@pytest.mark.parametrize( + "column_index, column_type", + [ + (Index(np.arange(8)), "unnamed_standard"), + (Index(np.arange(8), name="columns.name"), "named_standard"), + ( + MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]), + "unnamed_multi", + ), + ( + MultiIndex.from_product( + [["a", "b"], ["c", "d"], ["e", "f"]], names=["foo", None, "baz"] + ), + "named_multi", + ), + ], +) +@pytest.mark.parametrize( + "row_index, row_type", + [ + (Index(np.arange(8)), "unnamed_standard"), + (Index(np.arange(8), name="index.name"), "named_standard"), + ( + MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]), + "unnamed_multi", + ), + ( + MultiIndex.from_product( + [["a", "b"], ["c", "d"], ["e", "f"]], names=["foo", None, "baz"] + ), + "named_multi", + ), + ], +) +def test_to_html_alignment_with_truncation( + datapath, row_index, row_type, column_index, column_type, index, header, index_names +): + # GH 22747, GH 22579 + df = DataFrame(np.arange(64).reshape(8, 8), index=row_index, columns=column_index) + result = df.to_html( + max_rows=4, max_cols=4, index=index, header=header, index_names=index_names + ) + + if not index: + row_type = "none" + elif not index_names and row_type.startswith("named"): + row_type = "un" + row_type + + if not header: + column_type = "none" + elif not index_names and column_type.startswith("named"): + column_type = "un" + column_type + + filename = "trunc_df_index_" + row_type + "_columns_" + column_type + expected = expected_html(datapath, filename) + assert result == expected + + +@pytest.mark.parametrize("index", [False, 0]) +def test_to_html_truncation_index_false_max_rows(datapath, index): + # GH 15019 + data = [ + [1.764052, 0.400157], + [0.978738, 2.240893], + [1.867558, -0.977278], + [0.950088, -0.151357], + [-0.103219, 0.410599], + ] + df = DataFrame(data) + result = df.to_html(max_rows=4, index=index) + expected = expected_html(datapath, "gh15019_expected_output") + assert result == expected + + +@pytest.mark.parametrize("index", [False, 0]) +@pytest.mark.parametrize( + "col_index_named, expected_output", + [(False, "gh22783_expected_output"), (True, "gh22783_named_columns_index")], +) +def test_to_html_truncation_index_false_max_cols( + datapath, index, col_index_named, expected_output +): + # GH 22783 + data = [ + [1.764052, 0.400157, 0.978738, 2.240893, 1.867558], + [-0.977278, 0.950088, -0.151357, -0.103219, 0.410599], + ] + df = DataFrame(data) + if col_index_named: + df.columns.rename("columns.name", inplace=True) + result = df.to_html(max_cols=4, index=index) + expected = expected_html(datapath, expected_output) + assert result == expected + + +@pytest.mark.parametrize("notebook", [True, False]) +def test_to_html_notebook_has_style(notebook): + df = DataFrame({"A": [1, 2, 3]}) + result = df.to_html(notebook=notebook) + + if notebook: + assert "tbody tr th:only-of-type" in result + assert "vertical-align: middle;" in result + assert "thead th" in result + else: + assert "tbody tr th:only-of-type" not in result + assert "vertical-align: middle;" not in result + assert "thead th" not in result + + +def test_to_html_with_index_names_false(): + # GH 16493 + df = DataFrame({"A": [1, 2]}, index=Index(["a", "b"], name="myindexname")) + result = df.to_html(index_names=False) + assert "myindexname" not in result + + +def test_to_html_with_id(): + # GH 8496 + df = DataFrame({"A": [1, 2]}, index=Index(["a", "b"], name="myindexname")) + result = df.to_html(index_names=False, table_id="TEST_ID") + assert ' id="TEST_ID"' in result + + +@pytest.mark.parametrize( + "value,float_format,expected", + [ + (0.19999, "%.3f", "gh21625_expected_output"), + (100.0, "%.0f", "gh22270_expected_output"), + ], +) +def test_to_html_float_format_no_fixed_width(value, float_format, expected, datapath): + # GH 21625, GH 22270 + df = DataFrame({"x": [value]}) + expected = expected_html(datapath, expected) + result = df.to_html(float_format=float_format) + assert result == expected + + +@pytest.mark.parametrize( + "render_links,expected", + [(True, "render_links_true"), (False, "render_links_false")], +) +def test_to_html_render_links(render_links, expected, datapath): + # GH 2679 + data = [ + [0, "https://pandas.pydata.org/?q1=a&q2=b", "pydata.org"], + [0, "www.pydata.org", "pydata.org"], + ] + df = DataFrame(data, columns=["foo", "bar", None]) + + result = df.to_html(render_links=render_links) + expected = expected_html(datapath, expected) + assert result == expected + + +@pytest.mark.parametrize( + "method,expected", + [ + ("to_html", lambda x: lorem_ipsum), + ("_repr_html_", lambda x: lorem_ipsum[: x - 4] + "..."), # regression case + ], +) +@pytest.mark.parametrize("max_colwidth", [10, 20, 50, 100]) +def test_ignore_display_max_colwidth(method, expected, max_colwidth): + # see gh-17004 + df = DataFrame([lorem_ipsum]) + with option_context("display.max_colwidth", max_colwidth): + result = getattr(df, method)() + expected = expected(max_colwidth) + assert expected in result + + +@pytest.mark.parametrize("classes", [True, 0]) +def test_to_html_invalid_classes_type(classes): + # GH 25608 + df = DataFrame() + msg = "classes must be a string, list, or tuple" + + with pytest.raises(TypeError, match=msg): + df.to_html(classes=classes) + + +def test_to_html_round_column_headers(): + # GH 17280 + df = DataFrame([1], columns=[0.55555]) + with option_context("display.precision", 3): + html = df.to_html(notebook=False) + notebook = df.to_html(notebook=True) + assert "0.55555" in html + assert "0.556" in notebook + + +@pytest.mark.parametrize("unit", ["100px", "10%", "5em", 150]) +def test_to_html_with_col_space_units(unit): + # GH 25941 + df = DataFrame(np.random.random(size=(1, 3))) + result = df.to_html(col_space=unit) + result = result.split("tbody")[0] + hdrs = [x for x in result.split("\n") if re.search(r"\s]", x)] + if isinstance(unit, int): + unit = str(unit) + "px" + for h in hdrs: + expected = f' within on malformed HTML. + """ + result = self.read_html( + """
`` HTML elements, + and cannot be used for column or index headers. + + .. versionadded:: 1.3.0 + + Parameters + ---------- + ttips : DataFrame + DataFrame containing strings that will be translated to tooltips, mapped + by identical column and index values that must exist on the underlying + Styler data. None, NaN values, and empty strings will be ignored and + not affect the rendered HTML. + props : list-like or str, optional + List of (attr, value) tuples or a valid CSS string. If ``None`` adopts + the internal default values described in notes. + css_class : str, optional + Name of the tooltip class used in CSS, should conform to HTML standards. + Only useful if integrating tooltips with external CSS. If ``None`` uses the + internal default value 'pd-t'. + + Returns + ------- + self : Styler + + Notes + ----- + Tooltips are created by adding `` to each data cell + and then manipulating the table level CSS to attach pseudo hover and pseudo + after selectors to produce the required the results. + + The default properties for the tooltip CSS class are: + + - visibility: hidden + - position: absolute + - z-index: 1 + - background-color: black + - color: white + - transform: translate(-20px, -20px) + + The property 'visibility: hidden;' is a key prerequisite to the hover + functionality, and should always be included in any manual properties + specification, using the ``props`` argument. + + Tooltips are not designed to be efficient, and can add large amounts of + additional HTML for larger tables, since they also require that ``cell_ids`` + is forced to `True`. + + Examples + -------- + Basic application + + >>> df = pd.DataFrame(data=[[0, 1], [2, 3]]) + >>> ttips = pd.DataFrame( + ... data=[["Min", ""], [np.nan, "Max"]], columns=df.columns, index=df.index + ... ) + >>> s = df.style.set_tooltips(ttips).to_html() + + Optionally controlling the tooltip visual display + + >>> df.style.set_tooltips(ttips, css_class='tt-add', props=[ + ... ('visibility', 'hidden'), + ... ('position', 'absolute'), + ... ('z-index', 1)]) # doctest: +SKIP + >>> df.style.set_tooltips(ttips, css_class='tt-add', + ... props='visibility:hidden; position:absolute; z-index:1;') + ... # doctest: +SKIP + """ + if not self.cell_ids: + # tooltips not optimised for individual cell check. requires reasonable + # redesign and more extensive code for a feature that might be rarely used. + raise NotImplementedError( + "Tooltips can only render with 'cell_ids' is True." + ) + if not ttips.index.is_unique or not ttips.columns.is_unique: + raise KeyError( + "Tooltips render only if `ttips` has unique index and columns." + ) + if self.tooltips is None: # create a default instance if necessary + self.tooltips = Tooltips() + self.tooltips.tt_data = ttips + if props: + self.tooltips.class_properties = props + if css_class: + self.tooltips.class_name = css_class + + return self + + @doc( + NDFrame.to_excel, + klass="Styler", + storage_options=_shared_docs["storage_options"], + ) + def to_excel( + self, + excel_writer, + sheet_name: str = "Sheet1", + na_rep: str = "", + float_format: str | None = None, + columns: Sequence[Hashable] | None = None, + header: Sequence[Hashable] | bool = True, + index: bool = True, + index_label: IndexLabel | None = None, + startrow: int = 0, + startcol: int = 0, + engine: str | None = None, + merge_cells: bool = True, + encoding: str | None = None, + inf_rep: str = "inf", + verbose: bool = True, + freeze_panes: tuple[int, int] | None = None, + ) -> None: + + from pandas.io.formats.excel import ExcelFormatter + + formatter = ExcelFormatter( + self, + na_rep=na_rep, + cols=columns, + header=header, + float_format=float_format, + index=index, + index_label=index_label, + merge_cells=merge_cells, + inf_rep=inf_rep, + ) + formatter.write( + excel_writer, + sheet_name=sheet_name, + startrow=startrow, + startcol=startcol, + freeze_panes=freeze_panes, + engine=engine, + ) + + def to_latex( + self, + buf: FilePath | WriteBuffer[str] | None = None, + *, + column_format: str | None = None, + position: str | None = None, + position_float: str | None = None, + hrules: bool | None = None, + clines: str | None = None, + label: str | None = None, + caption: str | tuple | None = None, + sparse_index: bool | None = None, + sparse_columns: bool | None = None, + multirow_align: str | None = None, + multicol_align: str | None = None, + siunitx: bool = False, + environment: str | None = None, + encoding: str | None = None, + convert_css: bool = False, + ): + r""" + Write Styler to a file, buffer or string in LaTeX format. + + .. versionadded:: 1.3.0 + + Parameters + ---------- + buf : str, path object, file-like object, or None, default None + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a string ``write()`` function. If None, the result is + returned as a string. + column_format : str, optional + The LaTeX column specification placed in location: + + \\begin{tabular}{} + + Defaults to 'l' for index and + non-numeric data columns, and, for numeric data columns, + to 'r' by default, or 'S' if ``siunitx`` is ``True``. + position : str, optional + The LaTeX positional argument (e.g. 'h!') for tables, placed in location: + + ``\\begin{table}[]``. + position_float : {"centering", "raggedleft", "raggedright"}, optional + The LaTeX float command placed in location: + + \\begin{table}[] + + \\ + + Cannot be used if ``environment`` is "longtable". + hrules : bool + Set to `True` to add \\toprule, \\midrule and \\bottomrule from the + {booktabs} LaTeX package. + Defaults to ``pandas.options.styler.latex.hrules``, which is `False`. + + .. versionchanged:: 1.4.0 + clines : str, optional + Use to control adding \\cline commands for the index labels separation. + Possible values are: + + - `None`: no cline commands are added (default). + - `"all;data"`: a cline is added for every index value extending the + width of the table, including data entries. + - `"all;index"`: as above with lines extending only the width of the + index entries. + - `"skip-last;data"`: a cline is added for each index value except the + last level (which is never sparsified), extending the widtn of the + table. + - `"skip-last;index"`: as above with lines extending only the width of the + index entries. + + .. versionadded:: 1.4.0 + label : str, optional + The LaTeX label included as: \\label{
}. + If tuple, i.e ("full caption", "short caption"), the caption included + as: \\caption[]{}. + sparse_index : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each row. + Defaults to ``pandas.options.styler.sparse.index``, which is `True`. + sparse_columns : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each + column. Defaults to ``pandas.options.styler.sparse.columns``, which + is `True`. + multirow_align : {"c", "t", "b", "naive"}, optional + If sparsifying hierarchical MultiIndexes whether to align text centrally, + at the top or bottom using the multirow package. If not given defaults to + ``pandas.options.styler.latex.multirow_align``, which is `"c"`. + If "naive" is given renders without multirow. + + .. versionchanged:: 1.4.0 + multicol_align : {"r", "c", "l", "naive-l", "naive-r"}, optional + If sparsifying hierarchical MultiIndex columns whether to align text at + the left, centrally, or at the right. If not given defaults to + ``pandas.options.styler.latex.multicol_align``, which is "r". + If a naive option is given renders without multicol. + Pipe decorators can also be added to non-naive values to draw vertical + rules, e.g. "\|r" will draw a rule on the left side of right aligned merged + cells. + + .. versionchanged:: 1.4.0 + siunitx : bool, default False + Set to ``True`` to structure LaTeX compatible with the {siunitx} package. + environment : str, optional + If given, the environment that will replace 'table' in ``\\begin{table}``. + If 'longtable' is specified then a more suitable template is + rendered. If not given defaults to + ``pandas.options.styler.latex.environment``, which is `None`. + + .. versionadded:: 1.4.0 + encoding : str, optional + Character encoding setting. Defaults + to ``pandas.options.styler.render.encoding``, which is "utf-8". + convert_css : bool, default False + Convert simple cell-styles from CSS to LaTeX format. Any CSS not found in + conversion table is dropped. A style can be forced by adding option + `--latex`. See notes. + + Returns + ------- + str or None + If `buf` is None, returns the result as a string. Otherwise returns `None`. + + See Also + -------- + Styler.format: Format the text display value of cells. + + Notes + ----- + **Latex Packages** + + For the following features we recommend the following LaTeX inclusions: + + ===================== ========================================================== + Feature Inclusion + ===================== ========================================================== + sparse columns none: included within default {tabular} environment + sparse rows \\usepackage{multirow} + hrules \\usepackage{booktabs} + colors \\usepackage[table]{xcolor} + siunitx \\usepackage{siunitx} + bold (with siunitx) | \\usepackage{etoolbox} + | \\robustify\\bfseries + | \\sisetup{detect-all = true} *(within {document})* + italic (with siunitx) | \\usepackage{etoolbox} + | \\robustify\\itshape + | \\sisetup{detect-all = true} *(within {document})* + environment \\usepackage{longtable} if arg is "longtable" + | or any other relevant environment package + hyperlinks \\usepackage{hyperref} + ===================== ========================================================== + + **Cell Styles** + + LaTeX styling can only be rendered if the accompanying styling functions have + been constructed with appropriate LaTeX commands. All styling + functionality is built around the concept of a CSS ``(, )`` + pair (see `Table Visualization <../../user_guide/style.ipynb>`_), and this + should be replaced by a LaTeX + ``(, )`` approach. Each cell will be styled individually + using nested LaTeX commands with their accompanied options. + + For example the following code will highlight and bold a cell in HTML-CSS: + + >>> df = pd.DataFrame([[1,2], [3,4]]) + >>> s = df.style.highlight_max(axis=None, + ... props='background-color:red; font-weight:bold;') + >>> s.to_html() # doctest: +SKIP + + The equivalent using LaTeX only commands is the following: + + >>> s = df.style.highlight_max(axis=None, + ... props='cellcolor:{red}; bfseries: ;') + >>> s.to_latex() # doctest: +SKIP + + Internally these structured LaTeX ``(, )`` pairs + are translated to the + ``display_value`` with the default structure: + ``\ ``. + Where there are multiple commands the latter is nested recursively, so that + the above example highlighed cell is rendered as + ``\cellcolor{red} \bfseries 4``. + + Occasionally this format does not suit the applied command, or + combination of LaTeX packages that is in use, so additional flags can be + added to the ````, within the tuple, to result in different + positions of required braces (the **default** being the same as ``--nowrap``): + + =================================== ============================================ + Tuple Format Output Structure + =================================== ============================================ + (,) \\ + (, ``--nowrap``) \\ + (, ``--rwrap``) \\{} + (, ``--wrap``) {\\ } + (, ``--lwrap``) {\\} + (, ``--dwrap``) {\\}{} + =================================== ============================================ + + For example the `textbf` command for font-weight + should always be used with `--rwrap` so ``('textbf', '--rwrap')`` will render a + working cell, wrapped with braces, as ``\textbf{}``. + + A more comprehensive example is as follows: + + >>> df = pd.DataFrame([[1, 2.2, "dogs"], [3, 4.4, "cats"], [2, 6.6, "cows"]], + ... index=["ix1", "ix2", "ix3"], + ... columns=["Integers", "Floats", "Strings"]) + >>> s = df.style.highlight_max( + ... props='cellcolor:[HTML]{FFFF00}; color:{red};' + ... 'textit:--rwrap; textbf:--rwrap;' + ... ) + >>> s.to_latex() # doctest: +SKIP + + .. figure:: ../../_static/style/latex_1.png + + **Table Styles** + + Internally Styler uses its ``table_styles`` object to parse the + ``column_format``, ``position``, ``position_float``, and ``label`` + input arguments. These arguments are added to table styles in the format: + + .. code-block:: python + + set_table_styles([ + {"selector": "column_format", "props": f":{column_format};"}, + {"selector": "position", "props": f":{position};"}, + {"selector": "position_float", "props": f":{position_float};"}, + {"selector": "label", "props": f":{{{label.replace(':','§')}}};"} + ], overwrite=False) + + Exception is made for the ``hrules`` argument which, in fact, controls all three + commands: ``toprule``, ``bottomrule`` and ``midrule`` simultaneously. Instead of + setting ``hrules`` to ``True``, it is also possible to set each + individual rule definition, by manually setting the ``table_styles``, + for example below we set a regular ``toprule``, set an ``hline`` for + ``bottomrule`` and exclude the ``midrule``: + + .. code-block:: python + + set_table_styles([ + {'selector': 'toprule', 'props': ':toprule;'}, + {'selector': 'bottomrule', 'props': ':hline;'}, + ], overwrite=False) + + If other ``commands`` are added to table styles they will be detected, and + positioned immediately above the '\\begin{tabular}' command. For example to + add odd and even row coloring, from the {colortbl} package, in format + ``\rowcolors{1}{pink}{red}``, use: + + .. code-block:: python + + set_table_styles([ + {'selector': 'rowcolors', 'props': ':{1}{pink}{red};'} + ], overwrite=False) + + A more comprehensive example using these arguments is as follows: + + >>> df.columns = pd.MultiIndex.from_tuples([ + ... ("Numeric", "Integers"), + ... ("Numeric", "Floats"), + ... ("Non-Numeric", "Strings") + ... ]) + >>> df.index = pd.MultiIndex.from_tuples([ + ... ("L0", "ix1"), ("L0", "ix2"), ("L1", "ix3") + ... ]) + >>> s = df.style.highlight_max( + ... props='cellcolor:[HTML]{FFFF00}; color:{red}; itshape:; bfseries:;' + ... ) + >>> s.to_latex( + ... column_format="rrrrr", position="h", position_float="centering", + ... hrules=True, label="table:5", caption="Styled LaTeX Table", + ... multirow_align="t", multicol_align="r" + ... ) # doctest: +SKIP + + .. figure:: ../../_static/style/latex_2.png + + **Formatting** + + To format values :meth:`Styler.format` should be used prior to calling + `Styler.to_latex`, as well as other methods such as :meth:`Styler.hide` + for example: + + >>> s.clear() + >>> s.table_styles = [] + >>> s.caption = None + >>> s.format({ + ... ("Numeric", "Integers"): '\${}', + ... ("Numeric", "Floats"): '{:.3f}', + ... ("Non-Numeric", "Strings"): str.upper + ... }) # doctest: +SKIP + Numeric Non-Numeric + Integers Floats Strings + L0 ix1 $1 2.200 DOGS + ix2 $3 4.400 CATS + L1 ix3 $2 6.600 COWS + + >>> s.to_latex() # doctest: +SKIP + \begin{tabular}{llrrl} + {} & {} & \multicolumn{2}{r}{Numeric} & {Non-Numeric} \\ + {} & {} & {Integers} & {Floats} & {Strings} \\ + \multirow[c]{2}{*}{L0} & ix1 & \\$1 & 2.200 & DOGS \\ + & ix2 & \$3 & 4.400 & CATS \\ + L1 & ix3 & \$2 & 6.600 & COWS \\ + \end{tabular} + + **CSS Conversion** + + This method can convert a Styler constructured with HTML-CSS to LaTeX using + the following limited conversions. + + ================== ==================== ============= ========================== + CSS Attribute CSS value LaTeX Command LaTeX Options + ================== ==================== ============= ========================== + font-weight | bold | bfseries + | bolder | bfseries + font-style | italic | itshape + | oblique | slshape + background-color | red cellcolor | {red}--lwrap + | #fe01ea | [HTML]{FE01EA}--lwrap + | #f0e | [HTML]{FF00EE}--lwrap + | rgb(128,255,0) | [rgb]{0.5,1,0}--lwrap + | rgba(128,0,0,0.5) | [rgb]{0.5,0,0}--lwrap + | rgb(25%,255,50%) | [rgb]{0.25,1,0.5}--lwrap + color | red color | {red} + | #fe01ea | [HTML]{FE01EA} + | #f0e | [HTML]{FF00EE} + | rgb(128,255,0) | [rgb]{0.5,1,0} + | rgba(128,0,0,0.5) | [rgb]{0.5,0,0} + | rgb(25%,255,50%) | [rgb]{0.25,1,0.5} + ================== ==================== ============= ========================== + + It is also possible to add user-defined LaTeX only styles to a HTML-CSS Styler + using the ``--latex`` flag, and to add LaTeX parsing options that the + converter will detect within a CSS-comment. + + >>> df = pd.DataFrame([[1]]) + >>> df.style.set_properties( + ... **{"font-weight": "bold /* --dwrap */", "Huge": "--latex--rwrap"} + ... ).to_latex(convert_css=True) # doctest: +SKIP + \begin{tabular}{lr} + {} & {0} \\ + 0 & {\bfseries}{\Huge{1}} \\ + \end{tabular} + + Examples + -------- + Below we give a complete step by step example adding some advanced features + and noting some common gotchas. + + First we create the DataFrame and Styler as usual, including MultiIndex rows + and columns, which allow for more advanced formatting options: + + >>> cidx = pd.MultiIndex.from_arrays([ + ... ["Equity", "Equity", "Equity", "Equity", + ... "Stats", "Stats", "Stats", "Stats", "Rating"], + ... ["Energy", "Energy", "Consumer", "Consumer", "", "", "", "", ""], + ... ["BP", "Shell", "H&M", "Unilever", + ... "Std Dev", "Variance", "52w High", "52w Low", ""] + ... ]) + >>> iidx = pd.MultiIndex.from_arrays([ + ... ["Equity", "Equity", "Equity", "Equity"], + ... ["Energy", "Energy", "Consumer", "Consumer"], + ... ["BP", "Shell", "H&M", "Unilever"] + ... ]) + >>> styler = pd.DataFrame([ + ... [1, 0.8, 0.66, 0.72, 32.1678, 32.1678**2, 335.12, 240.89, "Buy"], + ... [0.8, 1.0, 0.69, 0.79, 1.876, 1.876**2, 14.12, 19.78, "Hold"], + ... [0.66, 0.69, 1.0, 0.86, 7, 7**2, 210.9, 140.6, "Buy"], + ... [0.72, 0.79, 0.86, 1.0, 213.76, 213.76**2, 2807, 3678, "Sell"], + ... ], columns=cidx, index=iidx).style + + Second we will format the display and, since our table is quite wide, will + hide the repeated level-0 of the index: + + >>> styler.format(subset="Equity", precision=2) + ... .format(subset="Stats", precision=1, thousands=",") + ... .format(subset="Rating", formatter=str.upper) + ... .format_index(escape="latex", axis=1) + ... .format_index(escape="latex", axis=0) + ... .hide(level=0, axis=0) # doctest: +SKIP + + Note that one of the string entries of the index and column headers is "H&M". + Without applying the `escape="latex"` option to the `format_index` method the + resultant LaTeX will fail to render, and the error returned is quite + difficult to debug. Using the appropriate escape the "&" is converted to "\\&". + + Thirdly we will apply some (CSS-HTML) styles to our object. We will use a + builtin method and also define our own method to highlight the stock + recommendation: + + >>> def rating_color(v): + ... if v == "Buy": color = "#33ff85" + ... elif v == "Sell": color = "#ff5933" + ... else: color = "#ffdd33" + ... return f"color: {color}; font-weight: bold;" + >>> styler.background_gradient(cmap="inferno", subset="Equity", vmin=0, vmax=1) + ... .applymap(rating_color, subset="Rating") # doctest: +SKIP + + All the above styles will work with HTML (see below) and LaTeX upon conversion: + + .. figure:: ../../_static/style/latex_stocks_html.png + + However, we finally want to add one LaTeX only style + (from the {graphicx} package), that is not easy to convert from CSS and + pandas does not support it. Notice the `--latex` flag used here, + as well as `--rwrap` to ensure this is formatted correctly and + not ignored upon conversion. + + >>> styler.applymap_index( + ... lambda v: "rotatebox:{45}--rwrap--latex;", level=2, axis=1 + ... ) # doctest: +SKIP + + Finally we render our LaTeX adding in other options as required: + + >>> styler.to_latex( + ... caption="Selected stock correlation and simple statistics.", + ... clines="skip-last;data", + ... convert_css=True, + ... position_float="centering", + ... multicol_align="|c|", + ... hrules=True, + ... ) # doctest: +SKIP + \begin{table} + \centering + \caption{Selected stock correlation and simple statistics.} + \begin{tabular}{llrrrrrrrrl} + \toprule + & & \multicolumn{4}{|c|}{Equity} & \multicolumn{4}{|c|}{Stats} & Rating \\ + & & \multicolumn{2}{|c|}{Energy} & \multicolumn{2}{|c|}{Consumer} & + \multicolumn{4}{|c|}{} & \\ + & & \rotatebox{45}{BP} & \rotatebox{45}{Shell} & \rotatebox{45}{H\&M} & + \rotatebox{45}{Unilever} & \rotatebox{45}{Std Dev} & \rotatebox{45}{Variance} & + \rotatebox{45}{52w High} & \rotatebox{45}{52w Low} & \rotatebox{45}{} \\ + \midrule + \multirow[c]{2}{*}{Energy} & BP & {\cellcolor[HTML]{FCFFA4}} + \color[HTML]{000000} 1.00 & {\cellcolor[HTML]{FCA50A}} \color[HTML]{000000} + 0.80 & {\cellcolor[HTML]{EB6628}} \color[HTML]{F1F1F1} 0.66 & + {\cellcolor[HTML]{F68013}} \color[HTML]{F1F1F1} 0.72 & 32.2 & 1,034.8 & 335.1 + & 240.9 & \color[HTML]{33FF85} \bfseries BUY \\ + & Shell & {\cellcolor[HTML]{FCA50A}} \color[HTML]{000000} 0.80 & + {\cellcolor[HTML]{FCFFA4}} \color[HTML]{000000} 1.00 & + {\cellcolor[HTML]{F1731D}} \color[HTML]{F1F1F1} 0.69 & + {\cellcolor[HTML]{FCA108}} \color[HTML]{000000} 0.79 & 1.9 & 3.5 & 14.1 & + 19.8 & \color[HTML]{FFDD33} \bfseries HOLD \\ + \cline{1-11} + \multirow[c]{2}{*}{Consumer} & H\&M & {\cellcolor[HTML]{EB6628}} + \color[HTML]{F1F1F1} 0.66 & {\cellcolor[HTML]{F1731D}} \color[HTML]{F1F1F1} + 0.69 & {\cellcolor[HTML]{FCFFA4}} \color[HTML]{000000} 1.00 & + {\cellcolor[HTML]{FAC42A}} \color[HTML]{000000} 0.86 & 7.0 & 49.0 & 210.9 & + 140.6 & \color[HTML]{33FF85} \bfseries BUY \\ + & Unilever & {\cellcolor[HTML]{F68013}} \color[HTML]{F1F1F1} 0.72 & + {\cellcolor[HTML]{FCA108}} \color[HTML]{000000} 0.79 & + {\cellcolor[HTML]{FAC42A}} \color[HTML]{000000} 0.86 & + {\cellcolor[HTML]{FCFFA4}} \color[HTML]{000000} 1.00 & 213.8 & 45,693.3 & + 2,807.0 & 3,678.0 & \color[HTML]{FF5933} \bfseries SELL \\ + \cline{1-11} + \bottomrule + \end{tabular} + \end{table} + + .. figure:: ../../_static/style/latex_stocks.png + """ + obj = self._copy(deepcopy=True) # manipulate table_styles on obj, not self + + table_selectors = ( + [style["selector"] for style in self.table_styles] + if self.table_styles is not None + else [] + ) + + if column_format is not None: + # add more recent setting to table_styles + obj.set_table_styles( + [{"selector": "column_format", "props": f":{column_format}"}], + overwrite=False, + ) + elif "column_format" in table_selectors: + pass # adopt what has been previously set in table_styles + else: + # create a default: set float, complex, int cols to 'r' ('S'), index to 'l' + _original_columns = self.data.columns + self.data.columns = RangeIndex(stop=len(self.data.columns)) + numeric_cols = self.data._get_numeric_data().columns.to_list() + self.data.columns = _original_columns + column_format = "" + for level in range(self.index.nlevels): + column_format += "" if self.hide_index_[level] else "l" + for ci, _ in enumerate(self.data.columns): + if ci not in self.hidden_columns: + column_format += ( + ("r" if not siunitx else "S") if ci in numeric_cols else "l" + ) + obj.set_table_styles( + [{"selector": "column_format", "props": f":{column_format}"}], + overwrite=False, + ) + + if position: + obj.set_table_styles( + [{"selector": "position", "props": f":{position}"}], + overwrite=False, + ) + + if position_float: + if environment == "longtable": + raise ValueError( + "`position_float` cannot be used in 'longtable' `environment`" + ) + if position_float not in ["raggedright", "raggedleft", "centering"]: + raise ValueError( + f"`position_float` should be one of " + f"'raggedright', 'raggedleft', 'centering', " + f"got: '{position_float}'" + ) + obj.set_table_styles( + [{"selector": "position_float", "props": f":{position_float}"}], + overwrite=False, + ) + + hrules = get_option("styler.latex.hrules") if hrules is None else hrules + if hrules: + obj.set_table_styles( + [ + {"selector": "toprule", "props": ":toprule"}, + {"selector": "midrule", "props": ":midrule"}, + {"selector": "bottomrule", "props": ":bottomrule"}, + ], + overwrite=False, + ) + + if label: + obj.set_table_styles( + [{"selector": "label", "props": f":{{{label.replace(':', '§')}}}"}], + overwrite=False, + ) + + if caption: + obj.set_caption(caption) + + if sparse_index is None: + sparse_index = get_option("styler.sparse.index") + if sparse_columns is None: + sparse_columns = get_option("styler.sparse.columns") + environment = environment or get_option("styler.latex.environment") + multicol_align = multicol_align or get_option("styler.latex.multicol_align") + multirow_align = multirow_align or get_option("styler.latex.multirow_align") + latex = obj._render_latex( + sparse_index=sparse_index, + sparse_columns=sparse_columns, + multirow_align=multirow_align, + multicol_align=multicol_align, + environment=environment, + convert_css=convert_css, + siunitx=siunitx, + clines=clines, + ) + + encoding = encoding or get_option("styler.render.encoding") + return save_to_buffer( + latex, buf=buf, encoding=None if buf is None else encoding + ) + + def to_html( + self, + buf: FilePath | WriteBuffer[str] | None = None, + *, + table_uuid: str | None = None, + table_attributes: str | None = None, + sparse_index: bool | None = None, + sparse_columns: bool | None = None, + bold_headers: bool = False, + caption: str | None = None, + max_rows: int | None = None, + max_columns: int | None = None, + encoding: str | None = None, + doctype_html: bool = False, + exclude_styles: bool = False, + **kwargs, + ): + """ + Write Styler to a file, buffer or string in HTML-CSS format. + + .. versionadded:: 1.3.0 + + Parameters + ---------- + buf : str, path object, file-like object, or None, default None + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a string ``write()`` function. If None, the result is + returned as a string. + table_uuid : str, optional + Id attribute assigned to the HTML element in the format: + + ``
`` + + If not given uses Styler's initially assigned value. + table_attributes : str, optional + Attributes to assign within the `
` HTML element in the format: + + ``
>`` + + If not given defaults to Styler's preexisting value. + sparse_index : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each row. + Defaults to ``pandas.options.styler.sparse.index`` value. + + .. versionadded:: 1.4.0 + sparse_columns : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each + column. Defaults to ``pandas.options.styler.sparse.columns`` value. + + .. versionadded:: 1.4.0 + bold_headers : bool, optional + Adds "font-weight: bold;" as a CSS property to table style header cells. + + .. versionadded:: 1.4.0 + caption : str, optional + Set, or overwrite, the caption on Styler before rendering. + + .. versionadded:: 1.4.0 + max_rows : int, optional + The maximum number of rows that will be rendered. Defaults to + ``pandas.options.styler.render.max_rows/max_columns``. + + .. versionadded:: 1.4.0 + max_columns : int, optional + The maximum number of columns that will be rendered. Defaults to + ``pandas.options.styler.render.max_columns``, which is None. + + Rows and columns may be reduced if the number of total elements is + large. This value is set to ``pandas.options.styler.render.max_elements``, + which is 262144 (18 bit browser rendering). + + .. versionadded:: 1.4.0 + encoding : str, optional + Character encoding setting for file output, and HTML meta tags. + Defaults to ``pandas.options.styler.render.encoding`` value of "utf-8". + doctype_html : bool, default False + Whether to output a fully structured HTML file including all + HTML elements, or just the core ``' + '
' + ' ' + ' ' + ' ' + ' ' + ' ' + ' ' + '
0
1
' + """ + if not classes.index.is_unique or not classes.columns.is_unique: + raise KeyError( + "Classes render only if `classes` has unique index and columns." + ) + classes = classes.reindex_like(self.data) + + for r, row_tup in enumerate(classes.itertuples()): + for c, value in enumerate(row_tup[1:]): + if not (pd.isna(value) or value == ""): + self.cell_context[(r, c)] = str(value) + + return self + + def _update_ctx(self, attrs: DataFrame) -> None: + """ + Update the state of the ``Styler`` for data cells. + + Collects a mapping of {index_label: [('', ''), ..]}. + + Parameters + ---------- + attrs : DataFrame + should contain strings of ': ;: ' + Whitespace shouldn't matter and the final trailing ';' shouldn't + matter. + """ + if not self.index.is_unique or not self.columns.is_unique: + raise KeyError( + "`Styler.apply` and `.applymap` are not compatible " + "with non-unique index or columns." + ) + + for cn in attrs.columns: + j = self.columns.get_loc(cn) + ser = attrs[cn] + for rn, c in ser.items(): + if not c or pd.isna(c): + continue + css_list = maybe_convert_css_to_tuples(c) + i = self.index.get_loc(rn) + self.ctx[(i, j)].extend(css_list) + + def _update_ctx_header(self, attrs: DataFrame, axis: int) -> None: + """ + Update the state of the ``Styler`` for header cells. + + Collects a mapping of {index_label: [('', ''), ..]}. + + Parameters + ---------- + attrs : Series + Should contain strings of ': ;: ', and an + integer index. + Whitespace shouldn't matter and the final trailing ';' shouldn't + matter. + axis : int + Identifies whether the ctx object being updated is the index or columns + """ + for j in attrs.columns: + ser = attrs[j] + for i, c in ser.items(): + if not c: + continue + css_list = maybe_convert_css_to_tuples(c) + if axis == 0: + self.ctx_index[(i, j)].extend(css_list) + else: + self.ctx_columns[(j, i)].extend(css_list) + + def _copy(self, deepcopy: bool = False) -> Styler: + """ + Copies a Styler, allowing for deepcopy or shallow copy + + Copying a Styler aims to recreate a new Styler object which contains the same + data and styles as the original. + + Data dependent attributes [copied and NOT exported]: + - formatting (._display_funcs) + - hidden index values or column values (.hidden_rows, .hidden_columns) + - tooltips + - cell_context (cell css classes) + - ctx (cell css styles) + - caption + + Non-data dependent attributes [copied and exported]: + - css + - hidden index state and hidden columns state (.hide_index_, .hide_columns_) + - table_attributes + - table_styles + - applied styles (_todo) + + """ + # GH 40675 + styler = Styler( + self.data, # populates attributes 'data', 'columns', 'index' as shallow + ) + shallow = [ # simple string or boolean immutables + "hide_index_", + "hide_columns_", + "hide_column_names", + "hide_index_names", + "table_attributes", + "cell_ids", + "caption", + "uuid", + "uuid_len", + "template_latex", # also copy templates if these have been customised + "template_html_style", + "template_html_table", + "template_html", + ] + deep = [ # nested lists or dicts + "css", + "_display_funcs", + "_display_funcs_index", + "_display_funcs_columns", + "hidden_rows", + "hidden_columns", + "ctx", + "ctx_index", + "ctx_columns", + "cell_context", + "_todo", + "table_styles", + "tooltips", + ] + + for attr in shallow: + setattr(styler, attr, getattr(self, attr)) + + for attr in deep: + val = getattr(self, attr) + setattr(styler, attr, copy.deepcopy(val) if deepcopy else val) + + return styler + + def __copy__(self) -> Styler: + return self._copy(deepcopy=False) + + def __deepcopy__(self, memo) -> Styler: + return self._copy(deepcopy=True) + + def clear(self) -> None: + """ + Reset the ``Styler``, removing any previously applied styles. + + Returns None. + """ + # create default GH 40675 + clean_copy = Styler(self.data, uuid=self.uuid) + clean_attrs = [a for a in clean_copy.__dict__ if not callable(a)] + self_attrs = [a for a in self.__dict__ if not callable(a)] # maybe more attrs + for attr in clean_attrs: + setattr(self, attr, getattr(clean_copy, attr)) + for attr in set(self_attrs).difference(clean_attrs): + delattr(self, attr) + + def _apply( + self, + func: Callable, + axis: Axis | None = 0, + subset: Subset | None = None, + **kwargs, + ) -> Styler: + subset = slice(None) if subset is None else subset + subset = non_reducing_slice(subset) + data = self.data.loc[subset] + if axis is None: + result = func(data, **kwargs) + if not isinstance(result, DataFrame): + if not isinstance(result, np.ndarray): + raise TypeError( + f"Function {repr(func)} must return a DataFrame or ndarray " + f"when passed to `Styler.apply` with axis=None" + ) + if not (data.shape == result.shape): + raise ValueError( + f"Function {repr(func)} returned ndarray with wrong shape.\n" + f"Result has shape: {result.shape}\n" + f"Expected shape: {data.shape}" + ) + result = DataFrame(result, index=data.index, columns=data.columns) + else: + axis = self.data._get_axis_number(axis) + if axis == 0: + result = data.apply(func, axis=0, **kwargs) + else: + result = data.T.apply(func, axis=0, **kwargs).T # see GH 42005 + + if isinstance(result, Series): + raise ValueError( + f"Function {repr(func)} resulted in the apply method collapsing to a " + f"Series.\nUsually, this is the result of the function returning a " + f"single value, instead of list-like." + ) + msg = ( + f"Function {repr(func)} created invalid {{0}} labels.\nUsually, this is " + f"the result of the function returning a " + f"{'Series' if axis is not None else 'DataFrame'} which contains invalid " + f"labels, or returning an incorrectly shaped, list-like object which " + f"cannot be mapped to labels, possibly due to applying the function along " + f"the wrong axis.\n" + f"Result {{0}} has shape: {{1}}\n" + f"Expected {{0}} shape: {{2}}" + ) + if not all(result.index.isin(data.index)): + raise ValueError(msg.format("index", result.index.shape, data.index.shape)) + if not all(result.columns.isin(data.columns)): + raise ValueError( + msg.format("columns", result.columns.shape, data.columns.shape) + ) + self._update_ctx(result) + return self + + @Substitution(subset=subset) + def apply( + self, + func: Callable, + axis: Axis | None = 0, + subset: Subset | None = None, + **kwargs, + ) -> Styler: + """ + Apply a CSS-styling function column-wise, row-wise, or table-wise. + + Updates the HTML representation with the result. + + Parameters + ---------- + func : function + ``func`` should take a Series if ``axis`` in [0,1] and return a list-like + object of same length, or a Series, not necessarily of same length, with + valid index labels considering ``subset``. + ``func`` should take a DataFrame if ``axis`` is ``None`` and return either + an ndarray with the same shape or a DataFrame, not necessarily of the same + shape, with valid index and columns labels considering ``subset``. + + .. versionchanged:: 1.3.0 + + .. versionchanged:: 1.4.0 + + axis : {0 or 'index', 1 or 'columns', None}, default 0 + Apply to each column (``axis=0`` or ``'index'``), to each row + (``axis=1`` or ``'columns'``), or to the entire DataFrame at once + with ``axis=None``. + %(subset)s + **kwargs : dict + Pass along to ``func``. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.applymap_index: Apply a CSS-styling function to headers elementwise. + Styler.apply_index: Apply a CSS-styling function to headers level-wise. + Styler.applymap: Apply a CSS-styling function elementwise. + + Notes + ----- + The elements of the output of ``func`` should be CSS styles as strings, in the + format 'attribute: value; attribute2: value2; ...' or, + if nothing is to be applied to that element, an empty string or ``None``. + + This is similar to ``DataFrame.apply``, except that ``axis=None`` + applies the function to the entire DataFrame at once, + rather than column-wise or row-wise. + + Examples + -------- + >>> def highlight_max(x, color): + ... return np.where(x == np.nanmax(x.to_numpy()), f"color: {color};", None) + >>> df = pd.DataFrame(np.random.randn(5, 2), columns=["A", "B"]) + >>> df.style.apply(highlight_max, color='red') # doctest: +SKIP + >>> df.style.apply(highlight_max, color='blue', axis=1) # doctest: +SKIP + >>> df.style.apply(highlight_max, color='green', axis=None) # doctest: +SKIP + + Using ``subset`` to restrict application to a single column or multiple columns + + >>> df.style.apply(highlight_max, color='red', subset="A") + ... # doctest: +SKIP + >>> df.style.apply(highlight_max, color='red', subset=["A", "B"]) + ... # doctest: +SKIP + + Using a 2d input to ``subset`` to select rows in addition to columns + + >>> df.style.apply(highlight_max, color='red', subset=([0,1,2], slice(None))) + ... # doctest: +SKIP + >>> df.style.apply(highlight_max, color='red', subset=(slice(0,5,2), "A")) + ... # doctest: +SKIP + + Using a function which returns a Series / DataFrame of unequal length but + containing valid index labels + + >>> df = pd.DataFrame([[1, 2], [3, 4], [4, 6]], index=["A1", "A2", "Total"]) + >>> total_style = pd.Series("font-weight: bold;", index=["Total"]) + >>> df.style.apply(lambda s: total_style) # doctest: +SKIP + + See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for + more details. + """ + self._todo.append( + (lambda instance: getattr(instance, "_apply"), (func, axis, subset), kwargs) + ) + return self + + def _apply_index( + self, + func: Callable, + axis: int | str = 0, + level: Level | list[Level] | None = None, + method: str = "apply", + **kwargs, + ) -> Styler: + axis = self.data._get_axis_number(axis) + obj = self.index if axis == 0 else self.columns + + levels_ = refactor_levels(level, obj) + data = DataFrame(obj.to_list()).loc[:, levels_] + + if method == "apply": + result = data.apply(func, axis=0, **kwargs) + elif method == "applymap": + result = data.applymap(func, **kwargs) + + self._update_ctx_header(result, axis) + return self + + @doc( + this="apply", + wise="level-wise", + alt="applymap", + altwise="elementwise", + func="take a Series and return a string array of the same length", + axis='{0, 1, "index", "columns"}', + input_note="the index as a Series, if an Index, or a level of a MultiIndex", + output_note="an identically sized array of CSS styles as strings", + var="s", + ret='np.where(s == "B", "background-color: yellow;", "")', + ret2='["background-color: yellow;" if "x" in v else "" for v in s]', + ) + def apply_index( + self, + func: Callable, + axis: int | str = 0, + level: Level | list[Level] | None = None, + **kwargs, + ) -> Styler: + """ + Apply a CSS-styling function to the index or column headers, {wise}. + + Updates the HTML representation with the result. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + func : function + ``func`` should {func}. + axis : {axis} + The headers over which to apply the function. + level : int, str, list, optional + If index is MultiIndex the level(s) over which to apply the function. + **kwargs : dict + Pass along to ``func``. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.{alt}_index: Apply a CSS-styling function to headers {altwise}. + Styler.apply: Apply a CSS-styling function column-wise, row-wise, or table-wise. + Styler.applymap: Apply a CSS-styling function elementwise. + + Notes + ----- + Each input to ``func`` will be {input_note}. The output of ``func`` should be + {output_note}, in the format 'attribute: value; attribute2: value2; ...' + or, if nothing is to be applied to that element, an empty string or ``None``. + + Examples + -------- + Basic usage to conditionally highlight values in the index. + + >>> df = pd.DataFrame([[1,2], [3,4]], index=["A", "B"]) + >>> def color_b(s): + ... return {ret} + >>> df.style.{this}_index(color_b) # doctest: +SKIP + + .. figure:: ../../_static/style/appmaphead1.png + + Selectively applying to specific levels of MultiIndex columns. + + >>> midx = pd.MultiIndex.from_product([['ix', 'jy'], [0, 1], ['x3', 'z4']]) + >>> df = pd.DataFrame([np.arange(8)], columns=midx) + >>> def highlight_x({var}): + ... return {ret2} + >>> df.style.{this}_index(highlight_x, axis="columns", level=[0, 2]) + ... # doctest: +SKIP + + .. figure:: ../../_static/style/appmaphead2.png + """ + self._todo.append( + ( + lambda instance: getattr(instance, "_apply_index"), + (func, axis, level, "apply"), + kwargs, + ) + ) + return self + + @doc( + apply_index, + this="applymap", + wise="elementwise", + alt="apply", + altwise="level-wise", + func="take a scalar and return a string", + axis='{0, 1, "index", "columns"}', + input_note="an index value, if an Index, or a level value of a MultiIndex", + output_note="CSS styles as a string", + var="v", + ret='"background-color: yellow;" if v == "B" else None', + ret2='"background-color: yellow;" if "x" in v else None', + ) + def applymap_index( + self, + func: Callable, + axis: int | str = 0, + level: Level | list[Level] | None = None, + **kwargs, + ) -> Styler: + self._todo.append( + ( + lambda instance: getattr(instance, "_apply_index"), + (func, axis, level, "applymap"), + kwargs, + ) + ) + return self + + def _applymap( + self, func: Callable, subset: Subset | None = None, **kwargs + ) -> Styler: + func = partial(func, **kwargs) # applymap doesn't take kwargs? + if subset is None: + subset = IndexSlice[:] + subset = non_reducing_slice(subset) + result = self.data.loc[subset].applymap(func) + self._update_ctx(result) + return self + + @Substitution(subset=subset) + def applymap( + self, func: Callable, subset: Subset | None = None, **kwargs + ) -> Styler: + """ + Apply a CSS-styling function elementwise. + + Updates the HTML representation with the result. + + Parameters + ---------- + func : function + ``func`` should take a scalar and return a string. + %(subset)s + **kwargs : dict + Pass along to ``func``. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.applymap_index: Apply a CSS-styling function to headers elementwise. + Styler.apply_index: Apply a CSS-styling function to headers level-wise. + Styler.apply: Apply a CSS-styling function column-wise, row-wise, or table-wise. + + Notes + ----- + The elements of the output of ``func`` should be CSS styles as strings, in the + format 'attribute: value; attribute2: value2; ...' or, + if nothing is to be applied to that element, an empty string or ``None``. + + Examples + -------- + >>> def color_negative(v, color): + ... return f"color: {color};" if v < 0 else None + >>> df = pd.DataFrame(np.random.randn(5, 2), columns=["A", "B"]) + >>> df.style.applymap(color_negative, color='red') # doctest: +SKIP + + Using ``subset`` to restrict application to a single column or multiple columns + + >>> df.style.applymap(color_negative, color='red', subset="A") + ... # doctest: +SKIP + >>> df.style.applymap(color_negative, color='red', subset=["A", "B"]) + ... # doctest: +SKIP + + Using a 2d input to ``subset`` to select rows in addition to columns + + >>> df.style.applymap(color_negative, color='red', + ... subset=([0,1,2], slice(None))) # doctest: +SKIP + >>> df.style.applymap(color_negative, color='red', subset=(slice(0,5,2), "A")) + ... # doctest: +SKIP + + See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for + more details. + """ + self._todo.append( + (lambda instance: getattr(instance, "_applymap"), (func, subset), kwargs) + ) + return self + + @Substitution(subset=subset) + def where( + self, + cond: Callable, + value: str, + other: str | None = None, + subset: Subset | None = None, + **kwargs, + ) -> Styler: + """ + Apply CSS-styles based on a conditional function elementwise. + + .. deprecated:: 1.3.0 + + Updates the HTML representation with a style which is + selected in accordance with the return value of a function. + + Parameters + ---------- + cond : callable + ``cond`` should take a scalar, and optional keyword arguments, and return + a boolean. + value : str + Applied when ``cond`` returns true. + other : str + Applied when ``cond`` returns false. + %(subset)s + **kwargs : dict + Pass along to ``cond``. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.applymap: Apply a CSS-styling function elementwise. + Styler.apply: Apply a CSS-styling function column-wise, row-wise, or table-wise. + + Notes + ----- + This method is deprecated. + + This method is a convenience wrapper for :meth:`Styler.applymap`, which we + recommend using instead. + + The example: + + >>> df = pd.DataFrame([[1, 2], [3, 4]]) + >>> def cond(v, limit=4): + ... return v > 1 and v != limit + >>> df.style.where(cond, value='color:green;', other='color:red;') + ... # doctest: +SKIP + + should be refactored to: + + >>> def style_func(v, value, other, limit=4): + ... cond = v > 1 and v != limit + ... return value if cond else other + >>> df.style.applymap(style_func, value='color:green;', other='color:red;') + ... # doctest: +SKIP + """ + warnings.warn( + "this method is deprecated in favour of `Styler.applymap()`", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if other is None: + other = "" + + return self.applymap( + lambda val: value if cond(val, **kwargs) else other, + subset=subset, + ) + + def set_precision(self, precision: int) -> StylerRenderer: + """ + Set the precision used to display values. + + .. deprecated:: 1.3.0 + + Parameters + ---------- + precision : int + + Returns + ------- + self : Styler + + Notes + ----- + This method is deprecated see `Styler.format`. + """ + warnings.warn( + "this method is deprecated in favour of `Styler.format(precision=..)`", + FutureWarning, + stacklevel=find_stack_level(), + ) + self.precision = precision + return self.format(precision=precision, na_rep=self.na_rep) + + def set_table_attributes(self, attributes: str) -> Styler: + """ + Set the table attributes added to the ```` HTML element. + + These are items in addition to automatic (by default) ``id`` attribute. + + Parameters + ---------- + attributes : str + + Returns + ------- + self : Styler + + See Also + -------- + Styler.set_table_styles: Set the table styles included within the `` block + + Parameters + ---------- + sparsify_index : bool + Whether index_headers section will add rowspan attributes (>1) to elements. + + Returns + ------- + body : list + The associated HTML elements needed for template rendering. + """ + rlabels = self.data.index.tolist() + if not isinstance(self.data.index, MultiIndex): + rlabels = [[x] for x in rlabels] + + body, row_count = [], 0 + for r, row_tup in [ + z for z in enumerate(self.data.itertuples()) if z[0] not in self.hidden_rows + ]: + row_count += 1 + if row_count > max_rows: # used only to add a '...' trimmed row: + trimmed_row = self._generate_trimmed_row(max_cols) + body.append(trimmed_row) + break + body_row = self._generate_body_row( + (r, row_tup, rlabels), max_cols, idx_lengths + ) + body.append(body_row) + return body + + def _generate_trimmed_row(self, max_cols: int) -> list: + """ + When a render has too many rows we generate a trimming row containing "..." + + Parameters + ---------- + max_cols : int + Number of permissible columns + + Returns + ------- + list of elements + """ + index_headers = [ + _element( + "th", + ( + f"{self.css['row_heading']} {self.css['level']}{c} " + f"{self.css['row_trim']}" + ), + "...", + not self.hide_index_[c], + attributes="", + ) + for c in range(self.data.index.nlevels) + ] + + data, visible_col_count = [], 0 + for c, _ in enumerate(self.columns): + data_element_visible = c not in self.hidden_columns + if data_element_visible: + visible_col_count += 1 + if visible_col_count > max_cols: + data.append( + _element( + "td", + ( + f"{self.css['data']} {self.css['row_trim']} " + f"{self.css['col_trim']}" + ), + "...", + True, + attributes="", + ) + ) + break + + data.append( + _element( + "td", + f"{self.css['data']} {self.css['col']}{c} {self.css['row_trim']}", + "...", + data_element_visible, + attributes="", + ) + ) + + return index_headers + data + + def _generate_body_row( + self, + iter: tuple, + max_cols: int, + idx_lengths: dict, + ): + """ + Generate a regular row for the body section of appropriate format. + + +--------------------------------------------+---------------------------+ + | index_header_0 ... index_header_n | data_by_column ... | + +--------------------------------------------+---------------------------+ + + Parameters + ---------- + iter : tuple + Iterable from outer scope: row number, row data tuple, row index labels. + max_cols : int + Number of permissible columns. + idx_lengths : dict + A map of the sparsification structure of the index + + Returns + ------- + list of elements + """ + r, row_tup, rlabels = iter + + index_headers = [] + for c, value in enumerate(rlabels[r]): + header_element_visible = ( + _is_visible(r, c, idx_lengths) and not self.hide_index_[c] + ) + header_element = _element( + "th", + ( + f"{self.css['row_heading']} {self.css['level']}{c} " + f"{self.css['row']}{r}" + ), + value, + header_element_visible, + display_value=self._display_funcs_index[(r, c)](value), + attributes=( + f'rowspan="{idx_lengths.get((c, r), 0)}"' + if idx_lengths.get((c, r), 0) > 1 + else "" + ), + ) + + if self.cell_ids: + header_element[ + "id" + ] = f"{self.css['level']}{c}_{self.css['row']}{r}" # id is given + if ( + header_element_visible + and (r, c) in self.ctx_index + and self.ctx_index[r, c] + ): + # always add id if a style is specified + header_element["id"] = f"{self.css['level']}{c}_{self.css['row']}{r}" + self.cellstyle_map_index[tuple(self.ctx_index[r, c])].append( + f"{self.css['level']}{c}_{self.css['row']}{r}" + ) + + index_headers.append(header_element) + + data, visible_col_count = [], 0 + for c, value in enumerate(row_tup[1:]): + data_element_visible = ( + c not in self.hidden_columns and r not in self.hidden_rows + ) + if data_element_visible: + visible_col_count += 1 + if visible_col_count > max_cols: + data.append( + _element( + "td", + ( + f"{self.css['data']} {self.css['row']}{r} " + f"{self.css['col_trim']}" + ), + "...", + True, + attributes="", + ) + ) + break + + # add custom classes from cell context + cls = "" + if (r, c) in self.cell_context: + cls = " " + self.cell_context[r, c] + + data_element = _element( + "td", + ( + f"{self.css['data']} {self.css['row']}{r} " + f"{self.css['col']}{c}{cls}" + ), + value, + data_element_visible, + attributes="", + display_value=self._display_funcs[(r, c)](value), + ) + + if self.cell_ids: + data_element["id"] = f"{self.css['row']}{r}_{self.css['col']}{c}" + if data_element_visible and (r, c) in self.ctx and self.ctx[r, c]: + # always add id if needed due to specified style + data_element["id"] = f"{self.css['row']}{r}_{self.css['col']}{c}" + self.cellstyle_map[tuple(self.ctx[r, c])].append( + f"{self.css['row']}{r}_{self.css['col']}{c}" + ) + + data.append(data_element) + + return index_headers + data + + def _translate_latex(self, d: dict, clines: str | None) -> None: + r""" + Post-process the default render dict for the LaTeX template format. + + Processing items included are: + - Remove hidden columns from the non-headers part of the body. + - Place cellstyles directly in td cells rather than use cellstyle_map. + - Remove hidden indexes or reinsert missing th elements if part of multiindex + or multirow sparsification (so that \multirow and \multicol work correctly). + """ + index_levels = self.index.nlevels + visible_index_level_n = index_levels - sum(self.hide_index_) + d["head"] = [ + [ + {**col, "cellstyle": self.ctx_columns[r, c - visible_index_level_n]} + for c, col in enumerate(row) + if col["is_visible"] + ] + for r, row in enumerate(d["head"]) + ] + body = [] + for r, row in zip( + [r for r in range(len(self.data.index)) if r not in self.hidden_rows], + d["body"], + ): + # note: cannot enumerate d["body"] because rows were dropped if hidden + # during _translate_body so must zip to acquire the true r-index associated + # with the ctx obj which contains the cell styles. + if all(self.hide_index_): + row_body_headers = [] + else: + row_body_headers = [ + { + **col, + "display_value": col["display_value"] + if col["is_visible"] + else "", + "cellstyle": self.ctx_index[r, c], + } + for c, col in enumerate(row[:index_levels]) + if (col["type"] == "th" and not self.hide_index_[c]) + ] + + row_body_cells = [ + {**col, "cellstyle": self.ctx[r, c]} + for c, col in enumerate(row[index_levels:]) + if (col["is_visible"] and col["type"] == "td") + ] + + body.append(row_body_headers + row_body_cells) + d["body"] = body + + # clines are determined from info on index_lengths and hidden_rows and input + # to a dict defining which row clines should be added in the template. + if clines not in [ + None, + "all;data", + "all;index", + "skip-last;data", + "skip-last;index", + ]: + raise ValueError( + f"`clines` value of {clines} is invalid. Should either be None or one " + f"of 'all;data', 'all;index', 'skip-last;data', 'skip-last;index'." + ) + elif clines is not None: + data_len = len(row_body_cells) if "data" in clines else 0 + + d["clines"] = defaultdict(list) + visible_row_indexes: list[int] = [ + r for r in range(len(self.data.index)) if r not in self.hidden_rows + ] + visible_index_levels: list[int] = [ + i for i in range(index_levels) if not self.hide_index_[i] + ] + for rn, r in enumerate(visible_row_indexes): + for lvln, lvl in enumerate(visible_index_levels): + if lvl == index_levels - 1 and "skip-last" in clines: + continue + idx_len = d["index_lengths"].get((lvl, r), None) + if idx_len is not None: # i.e. not a sparsified entry + d["clines"][rn + idx_len].append( + f"\\cline{{{lvln+1}-{len(visible_index_levels)+data_len}}}" + ) + + def format( + self, + formatter: ExtFormatter | None = None, + subset: Subset | None = None, + na_rep: str | None = None, + precision: int | None = None, + decimal: str = ".", + thousands: str | None = None, + escape: str | None = None, + hyperlinks: str | None = None, + ) -> StylerRenderer: + r""" + Format the text display value of cells. + + Parameters + ---------- + formatter : str, callable, dict or None + Object to define how values are displayed. See notes. + subset : label, array-like, IndexSlice, optional + A valid 2d input to `DataFrame.loc[]`, or, in the case of a 1d input + or single key, to `DataFrame.loc[:, ]` where the columns are + prioritised, to limit ``data`` to *before* applying the function. + na_rep : str, optional + Representation for missing values. + If ``na_rep`` is None, no special formatting is applied. + + .. versionadded:: 1.0.0 + + precision : int, optional + Floating point precision to use for display purposes, if not determined by + the specified ``formatter``. + + .. versionadded:: 1.3.0 + + decimal : str, default "." + Character used as decimal separator for floats, complex and integers. + + .. versionadded:: 1.3.0 + + thousands : str, optional, default None + Character used as thousands separator for floats, complex and integers. + + .. versionadded:: 1.3.0 + + escape : str, optional + Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` + in cell display string with HTML-safe sequences. + Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, + ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with + LaTeX-safe sequences. + Escaping is done before ``formatter``. + + .. versionadded:: 1.3.0 + + hyperlinks : {"html", "latex"}, optional + Convert string patterns containing https://, http://, ftp:// or www. to + HTML tags as clickable URL hyperlinks if "html", or LaTeX \href + commands if "latex". + + .. versionadded:: 1.4.0 + + Returns + ------- + self : Styler + + Notes + ----- + This method assigns a formatting function, ``formatter``, to each cell in the + DataFrame. If ``formatter`` is ``None``, then the default formatter is used. + If a callable then that function should take a data value as input and return + a displayable representation, such as a string. If ``formatter`` is + given as a string this is assumed to be a valid Python format specification + and is wrapped to a callable as ``string.format(x)``. If a ``dict`` is given, + keys should correspond to column names, and values should be string or + callable, as above. + + The default formatter currently expresses floats and complex numbers with the + pandas display precision unless using the ``precision`` argument here. The + default formatter does not adjust the representation of missing values unless + the ``na_rep`` argument is used. + + The ``subset`` argument defines which region to apply the formatting function + to. If the ``formatter`` argument is given in dict form but does not include + all columns within the subset then these columns will have the default formatter + applied. Any columns in the formatter dict excluded from the subset will + be ignored. + + When using a ``formatter`` string the dtypes must be compatible, otherwise a + `ValueError` will be raised. + + When instantiating a Styler, default formatting can be applied be setting the + ``pandas.options``: + + - ``styler.format.formatter``: default None. + - ``styler.format.na_rep``: default None. + - ``styler.format.precision``: default 6. + - ``styler.format.decimal``: default ".". + - ``styler.format.thousands``: default None. + - ``styler.format.escape``: default None. + + Examples + -------- + Using ``na_rep`` and ``precision`` with the default ``formatter`` + + >>> df = pd.DataFrame([[np.nan, 1.0, 'A'], [2.0, np.nan, 3.0]]) + >>> df.style.format(na_rep='MISS', precision=3) # doctest: +SKIP + 0 1 2 + 0 MISS 1.000 A + 1 2.000 MISS 3.000 + + Using a ``formatter`` specification on consistent column dtypes + + >>> df.style.format('{:.2f}', na_rep='MISS', subset=[0,1]) # doctest: +SKIP + 0 1 2 + 0 MISS 1.00 A + 1 2.00 MISS 3.000000 + + Using the default ``formatter`` for unspecified columns + + >>> df.style.format({0: '{:.2f}', 1: '£ {:.1f}'}, na_rep='MISS', precision=1) + ... # doctest: +SKIP + 0 1 2 + 0 MISS £ 1.0 A + 1 2.00 MISS 3.0 + + Multiple ``na_rep`` or ``precision`` specifications under the default + ``formatter``. + + >>> df.style.format(na_rep='MISS', precision=1, subset=[0]) + ... .format(na_rep='PASS', precision=2, subset=[1, 2]) # doctest: +SKIP + 0 1 2 + 0 MISS 1.00 A + 1 2.0 PASS 3.00 + + Using a callable ``formatter`` function. + + >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT' + >>> df.style.format({0: '{:.1f}', 2: func}, precision=4, na_rep='MISS') + ... # doctest: +SKIP + 0 1 2 + 0 MISS 1.0000 STRING + 1 2.0 MISS FLOAT + + Using a ``formatter`` with HTML ``escape`` and ``na_rep``. + + >>> df = pd.DataFrame([['
', '"A&B"', None]]) + >>> s = df.style.format( + ... '
{0}', escape="html", na_rep="NA" + ... ) + >>> s.to_html() # doctest: +SKIP + ... +
+ + + ... + + Using a ``formatter`` with LaTeX ``escape``. + + >>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]]) + >>> df.style.format("\\textbf{{{}}}", escape="latex").to_latex() + ... # doctest: +SKIP + \begin{tabular}{ll} + {} & {0} \\ + 0 & \textbf{123} \\ + 1 & \textbf{\textasciitilde \space \textasciicircum } \\ + 2 & \textbf{\$\%\#} \\ + \end{tabular} + """ + if all( + ( + formatter is None, + subset is None, + precision is None, + decimal == ".", + thousands is None, + na_rep is None, + escape is None, + hyperlinks is None, + ) + ): + self._display_funcs.clear() + return self # clear the formatter / revert to default and avoid looping + + subset = slice(None) if subset is None else subset + subset = non_reducing_slice(subset) + data = self.data.loc[subset] + + if not isinstance(formatter, dict): + formatter = {col: formatter for col in data.columns} + + cis = self.columns.get_indexer_for(data.columns) + ris = self.index.get_indexer_for(data.index) + for ci in cis: + format_func = _maybe_wrap_formatter( + formatter.get(self.columns[ci]), + na_rep=na_rep, + precision=precision, + decimal=decimal, + thousands=thousands, + escape=escape, + hyperlinks=hyperlinks, + ) + for ri in ris: + self._display_funcs[(ri, ci)] = format_func + + return self + + def format_index( + self, + formatter: ExtFormatter | None = None, + axis: int | str = 0, + level: Level | list[Level] | None = None, + na_rep: str | None = None, + precision: int | None = None, + decimal: str = ".", + thousands: str | None = None, + escape: str | None = None, + hyperlinks: str | None = None, + ) -> StylerRenderer: + r""" + Format the text display value of index labels or column headers. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + formatter : str, callable, dict or None + Object to define how values are displayed. See notes. + axis : {0, "index", 1, "columns"} + Whether to apply the formatter to the index or column headers. + level : int, str, list + The level(s) over which to apply the generic formatter. + na_rep : str, optional + Representation for missing values. + If ``na_rep`` is None, no special formatting is applied. + precision : int, optional + Floating point precision to use for display purposes, if not determined by + the specified ``formatter``. + decimal : str, default "." + Character used as decimal separator for floats, complex and integers. + thousands : str, optional, default None + Character used as thousands separator for floats, complex and integers. + escape : str, optional + Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` + in cell display string with HTML-safe sequences. + Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, + ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with + LaTeX-safe sequences. + Escaping is done before ``formatter``. + hyperlinks : {"html", "latex"}, optional + Convert string patterns containing https://, http://, ftp:// or www. to + HTML tags as clickable URL hyperlinks if "html", or LaTeX \href + commands if "latex". + + Returns + ------- + self : Styler + + Notes + ----- + This method assigns a formatting function, ``formatter``, to each level label + in the DataFrame's index or column headers. If ``formatter`` is ``None``, + then the default formatter is used. + If a callable then that function should take a label value as input and return + a displayable representation, such as a string. If ``formatter`` is + given as a string this is assumed to be a valid Python format specification + and is wrapped to a callable as ``string.format(x)``. If a ``dict`` is given, + keys should correspond to MultiIndex level numbers or names, and values should + be string or callable, as above. + + The default formatter currently expresses floats and complex numbers with the + pandas display precision unless using the ``precision`` argument here. The + default formatter does not adjust the representation of missing values unless + the ``na_rep`` argument is used. + + The ``level`` argument defines which levels of a MultiIndex to apply the + method to. If the ``formatter`` argument is given in dict form but does + not include all levels within the level argument then these unspecified levels + will have the default formatter applied. Any levels in the formatter dict + specifically excluded from the level argument will be ignored. + + When using a ``formatter`` string the dtypes must be compatible, otherwise a + `ValueError` will be raised. + + Examples + -------- + Using ``na_rep`` and ``precision`` with the default ``formatter`` + + >>> df = pd.DataFrame([[1, 2, 3]], columns=[2.0, np.nan, 4.0]) + >>> df.style.format_index(axis=1, na_rep='MISS', precision=3) # doctest: +SKIP + 2.000 MISS 4.000 + 0 1 2 3 + + Using a ``formatter`` specification on consistent dtypes in a level + + >>> df.style.format_index('{:.2f}', axis=1, na_rep='MISS') # doctest: +SKIP + 2.00 MISS 4.00 + 0 1 2 3 + + Using the default ``formatter`` for unspecified levels + + >>> df = pd.DataFrame([[1, 2, 3]], + ... columns=pd.MultiIndex.from_arrays([["a", "a", "b"],[2, np.nan, 4]])) + >>> df.style.format_index({0: lambda v: upper(v)}, axis=1, precision=1) + ... # doctest: +SKIP + A B + 2.0 nan 4.0 + 0 1 2 3 + + Using a callable ``formatter`` function. + + >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT' + >>> df.style.format_index(func, axis=1, na_rep='MISS') + ... # doctest: +SKIP + STRING STRING + FLOAT MISS FLOAT + 0 1 2 3 + + Using a ``formatter`` with HTML ``escape`` and ``na_rep``. + + >>> df = pd.DataFrame([[1, 2, 3]], columns=['"A"', 'A&B', None]) + >>> s = df.style.format_index('$ {0}', axis=1, escape="html", na_rep="NA") + ... # doctest: +SKIP + + + or element. + """ + if "display_value" not in kwargs: + kwargs["display_value"] = value + return { + "type": html_element, + "value": value, + "class": html_class, + "is_visible": is_visible, + **kwargs, + } + + +def _get_trimming_maximums( + rn, + cn, + max_elements, + max_rows=None, + max_cols=None, + scaling_factor=0.8, +) -> tuple[int, int]: + """ + Recursively reduce the number of rows and columns to satisfy max elements. + + Parameters + ---------- + rn, cn : int + The number of input rows / columns + max_elements : int + The number of allowable elements + max_rows, max_cols : int, optional + Directly specify an initial maximum rows or columns before compression. + scaling_factor : float + Factor at which to reduce the number of rows / columns to fit. + + Returns + ------- + rn, cn : tuple + New rn and cn values that satisfy the max_elements constraint + """ + + def scale_down(rn, cn): + if cn >= rn: + return rn, int(cn * scaling_factor) + else: + return int(rn * scaling_factor), cn + + if max_rows: + rn = max_rows if rn > max_rows else rn + if max_cols: + cn = max_cols if cn > max_cols else cn + + while rn * cn > max_elements: + rn, cn = scale_down(rn, cn) + + return rn, cn + + +def _get_level_lengths( + index: Index, + sparsify: bool, + max_index: int, + hidden_elements: Sequence[int] | None = None, +): + """ + Given an index, find the level length for each element. + + Parameters + ---------- + index : Index + Index or columns to determine lengths of each element + sparsify : bool + Whether to hide or show each distinct element in a MultiIndex + max_index : int + The maximum number of elements to analyse along the index due to trimming + hidden_elements : sequence of int + Index positions of elements hidden from display in the index affecting + length + + Returns + ------- + Dict : + Result is a dictionary of (level, initial_position): span + """ + if isinstance(index, MultiIndex): + levels = index.format(sparsify=lib.no_default, adjoin=False) + else: + levels = index.format() + + if hidden_elements is None: + hidden_elements = [] + + lengths = {} + if not isinstance(index, MultiIndex): + for i, value in enumerate(levels): + if i not in hidden_elements: + lengths[(0, i)] = 1 + return lengths + + for i, lvl in enumerate(levels): + visible_row_count = 0 # used to break loop due to display trimming + for j, row in enumerate(lvl): + if visible_row_count > max_index: + break + if not sparsify: + # then lengths will always equal 1 since no aggregation. + if j not in hidden_elements: + lengths[(i, j)] = 1 + visible_row_count += 1 + elif (row is not lib.no_default) and (j not in hidden_elements): + # this element has not been sparsified so must be the start of section + last_label = j + lengths[(i, last_label)] = 1 + visible_row_count += 1 + elif row is not lib.no_default: + # even if the above is hidden, keep track of it in case length > 1 and + # later elements are visible + last_label = j + lengths[(i, last_label)] = 0 + elif j not in hidden_elements: + # then element must be part of sparsified section and is visible + visible_row_count += 1 + if visible_row_count > max_index: + break # do not add a length since the render trim limit reached + if lengths[(i, last_label)] == 0: + # if previous iteration was first-of-section but hidden then offset + last_label = j + lengths[(i, last_label)] = 1 + else: + # else add to previous iteration + lengths[(i, last_label)] += 1 + + non_zero_lengths = { + element: length for element, length in lengths.items() if length >= 1 + } + + return non_zero_lengths + + +def _is_visible(idx_row, idx_col, lengths) -> bool: + """ + Index -> {(idx_row, idx_col): bool}). + """ + return (idx_col, idx_row) in lengths + + +def format_table_styles(styles: CSSStyles) -> CSSStyles: + """ + looks for multiple CSS selectors and separates them: + [{'selector': 'td, th', 'props': 'a:v;'}] + ---> [{'selector': 'td', 'props': 'a:v;'}, + {'selector': 'th', 'props': 'a:v;'}] + """ + return [ + {"selector": selector, "props": css_dict["props"]} + for css_dict in styles + for selector in css_dict["selector"].split(",") + ] + + +def _default_formatter(x: Any, precision: int, thousands: bool = False) -> Any: + """ + Format the display of a value + + Parameters + ---------- + x : Any + Input variable to be formatted + precision : Int + Floating point precision used if ``x`` is float or complex. + thousands : bool, default False + Whether to group digits with thousands separated with ",". + + Returns + ------- + value : Any + Matches input type, or string if input is float or complex or int with sep. + """ + if isinstance(x, (float, complex)): + return f"{x:,.{precision}f}" if thousands else f"{x:.{precision}f}" + elif isinstance(x, int): + return f"{x:,.0f}" if thousands else f"{x:.0f}" + return x + + +def _wrap_decimal_thousands( + formatter: Callable, decimal: str, thousands: str | None +) -> Callable: + """ + Takes a string formatting function and wraps logic to deal with thousands and + decimal parameters, in the case that they are non-standard and that the input + is a (float, complex, int). + """ + + def wrapper(x): + if isinstance(x, (float, complex, int)): + if decimal != "." and thousands is not None and thousands != ",": + return ( + formatter(x) + .replace(",", "§_§-") # rare string to avoid "," <-> "." clash. + .replace(".", decimal) + .replace("§_§-", thousands) + ) + elif decimal != "." and (thousands is None or thousands == ","): + return formatter(x).replace(".", decimal) + elif decimal == "." and thousands is not None and thousands != ",": + return formatter(x).replace(",", thousands) + return formatter(x) + + return wrapper + + +def _str_escape(x, escape): + """if escaping: only use on str, else return input""" + if isinstance(x, str): + if escape == "html": + return escape_html(x) + elif escape == "latex": + return _escape_latex(x) + else: + raise ValueError( + f"`escape` only permitted in {{'html', 'latex'}}, got {escape}" + ) + return x + + +def _render_href(x, format): + """uses regex to detect a common URL pattern and converts to href tag in format.""" + if isinstance(x, str): + if format == "html": + href = '{0}' + elif format == "latex": + href = r"\href{{{0}}}{{{0}}}" + else: + raise ValueError("``hyperlinks`` format can only be 'html' or 'latex'") + pat = r"(https?:\/\/|ftp:\/\/|www.)[\w/\-?=%.]+\.[\w/\-&?=%.]+" + return re.sub(pat, lambda m: href.format(m.group(0)), x) + return x + + +def _maybe_wrap_formatter( + formatter: BaseFormatter | None = None, + na_rep: str | None = None, + precision: int | None = None, + decimal: str = ".", + thousands: str | None = None, + escape: str | None = None, + hyperlinks: str | None = None, +) -> Callable: + """ + Allows formatters to be expressed as str, callable or None, where None returns + a default formatting function. wraps with na_rep, and precision where they are + available. + """ + # Get initial func from input string, input callable, or from default factory + if isinstance(formatter, str): + func_0 = lambda x: formatter.format(x) + elif callable(formatter): + func_0 = formatter + elif formatter is None: + precision = ( + get_option("styler.format.precision") if precision is None else precision + ) + func_0 = partial( + _default_formatter, precision=precision, thousands=(thousands is not None) + ) + else: + raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") + + # Replace chars if escaping + if escape is not None: + func_1 = lambda x: func_0(_str_escape(x, escape=escape)) + else: + func_1 = func_0 + + # Replace decimals and thousands if non-standard inputs detected + if decimal != "." or (thousands is not None and thousands != ","): + func_2 = _wrap_decimal_thousands(func_1, decimal=decimal, thousands=thousands) + else: + func_2 = func_1 + + # Render links + if hyperlinks is not None: + func_3 = lambda x: func_2(_render_href(x, format=hyperlinks)) + else: + func_3 = func_2 + + # Replace missing values if na_rep + if na_rep is None: + return func_3 + else: + return lambda x: na_rep if isna(x) else func_3(x) + + +def non_reducing_slice(slice_: Subset): + """ + Ensure that a slice doesn't reduce to a Series or Scalar. + + Any user-passed `subset` should have this called on it + to make sure we're always working with DataFrames. + """ + # default to column slice, like DataFrame + # ['A', 'B'] -> IndexSlices[:, ['A', 'B']] + kinds = (ABCSeries, np.ndarray, Index, list, str) + if isinstance(slice_, kinds): + slice_ = IndexSlice[:, slice_] + + def pred(part) -> bool: + """ + Returns + ------- + bool + True if slice does *not* reduce, + False if `part` is a tuple. + """ + # true when slice does *not* reduce, False when part is a tuple, + # i.e. MultiIndex slice + if isinstance(part, tuple): + # GH#39421 check for sub-slice: + return any((isinstance(s, slice) or is_list_like(s)) for s in part) + else: + return isinstance(part, slice) or is_list_like(part) + + if not is_list_like(slice_): + if not isinstance(slice_, slice): + # a 1-d slice, like df.loc[1] + slice_ = [[slice_]] + else: + # slice(a, b, c) + slice_ = [slice_] # to tuplize later + else: + # error: Item "slice" of "Union[slice, Sequence[Any]]" has no attribute + # "__iter__" (not iterable) -> is specifically list_like in conditional + slice_ = [p if pred(p) else [p] for p in slice_] # type: ignore[union-attr] + return tuple(slice_) + + +def maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: + """ + Convert css-string to sequence of tuples format if needed. + 'color:red; border:1px solid black;' -> [('color', 'red'), + ('border','1px solid red')] + """ + if isinstance(style, str): + s = style.split(";") + try: + return [ + (x.split(":")[0].strip(), x.split(":")[1].strip()) + for x in s + if x.strip() != "" + ] + except IndexError: + raise ValueError( + "Styles supplied as string must follow CSS rule formats, " + f"for example 'attr: val;'. '{style}' was given." + ) + return style + + +def refactor_levels( + level: Level | list[Level] | None, + obj: Index, +) -> list[int]: + """ + Returns a consistent levels arg for use in ``hide_index`` or ``hide_columns``. + + Parameters + ---------- + level : int, str, list + Original ``level`` arg supplied to above methods. + obj: + Either ``self.index`` or ``self.columns`` + + Returns + ------- + list : refactored arg with a list of levels to hide + """ + if level is None: + levels_: list[int] = list(range(obj.nlevels)) + elif isinstance(level, int): + levels_ = [level] + elif isinstance(level, str): + levels_ = [obj._get_level_number(level)] + elif isinstance(level, list): + levels_ = [ + obj._get_level_number(lev) if not isinstance(lev, int) else lev + for lev in level + ] + else: + raise ValueError("`level` must be of type `int`, `str` or list of such") + return levels_ + + +class Tooltips: + """ + An extension to ``Styler`` that allows for and manipulates tooltips on hover + of ``" in result + result = styler.to_html() + assert "" not in result + + +def test_block_names(tpl_style, tpl_table): + # catch accidental removal of a block + expected_style = { + "before_style", + "style", + "table_styles", + "before_cellstyle", + "cellstyle", + } + expected_table = { + "before_table", + "table", + "caption", + "thead", + "tbody", + "after_table", + "before_head_rows", + "head_tr", + "after_head_rows", + "before_rows", + "tr", + "after_rows", + } + result1 = set(tpl_style.blocks) + assert result1 == expected_style + + result2 = set(tpl_table.blocks) + assert result2 == expected_table + + +def test_from_custom_template_table(tmpdir): + p = tmpdir.mkdir("tpl").join("myhtml_table.tpl") + p.write( + dedent( + """\ + {% extends "html_table.tpl" %} + {% block table %} +

{{custom_title}}

+ {{ super() }} + {% endblock table %}""" + ) + ) + result = Styler.from_custom_template(str(tmpdir.join("tpl")), "myhtml_table.tpl") + assert issubclass(result, Styler) + assert result.env is not Styler.env + assert result.template_html_table is not Styler.template_html_table + styler = result(DataFrame({"A": [1, 2]})) + assert "

My Title

\n\n\n + {{ super() }} + {% endblock style %}""" + ) + ) + result = Styler.from_custom_template( + str(tmpdir.join("tpl")), html_style="myhtml_style.tpl" + ) + assert issubclass(result, Styler) + assert result.env is not Styler.env + assert result.template_html_style is not Styler.template_html_style + styler = result(DataFrame({"A": [1, 2]})) + assert '\n\nfull cap" in styler.to_html() + + +@pytest.mark.parametrize("index", [False, True]) +@pytest.mark.parametrize("columns", [False, True]) +@pytest.mark.parametrize("index_name", [True, False]) +def test_sticky_basic(styler, index, columns, index_name): + if index_name: + styler.index.name = "some text" + if index: + styler.set_sticky(axis=0) + if columns: + styler.set_sticky(axis=1) + + left_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: white;\n" + " left: 0px;\n z-index: {1};\n}}" + ) + top_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: white;\n" + " top: {1}px;\n z-index: {2};\n{3}}}" + ) + + res = styler.set_uuid("").to_html() + + # test index stickys over thead and tbody + assert (left_css.format("thead tr th:nth-child(1)", "3 !important") in res) is index + assert (left_css.format("tbody tr th:nth-child(1)", "1") in res) is index + + # test column stickys including if name row + assert ( + top_css.format("thead tr:nth-child(1) th", "0", "2", " height: 25px;\n") in res + ) is (columns and index_name) + assert ( + top_css.format("thead tr:nth-child(2) th", "25", "2", " height: 25px;\n") + in res + ) is (columns and index_name) + assert (top_css.format("thead tr:nth-child(1) th", "0", "2", "") in res) is ( + columns and not index_name + ) + + +@pytest.mark.parametrize("index", [False, True]) +@pytest.mark.parametrize("columns", [False, True]) +def test_sticky_mi(styler_mi, index, columns): + if index: + styler_mi.set_sticky(axis=0) + if columns: + styler_mi.set_sticky(axis=1) + + left_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: white;\n" + " left: {1}px;\n min-width: 75px;\n max-width: 75px;\n z-index: {2};\n}}" + ) + top_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: white;\n" + " top: {1}px;\n height: 25px;\n z-index: {2};\n}}" + ) + + res = styler_mi.set_uuid("").to_html() + + # test the index stickys for thead and tbody over both levels + assert ( + left_css.format("thead tr th:nth-child(1)", "0", "3 !important") in res + ) is index + assert (left_css.format("tbody tr th.level0", "0", "1") in res) is index + assert ( + left_css.format("thead tr th:nth-child(2)", "75", "3 !important") in res + ) is index + assert (left_css.format("tbody tr th.level1", "75", "1") in res) is index + + # test the column stickys for each level row + assert (top_css.format("thead tr:nth-child(1) th", "0", "2") in res) is columns + assert (top_css.format("thead tr:nth-child(2) th", "25", "2") in res) is columns + + +@pytest.mark.parametrize("index", [False, True]) +@pytest.mark.parametrize("columns", [False, True]) +@pytest.mark.parametrize("levels", [[1], ["one"], "one"]) +def test_sticky_levels(styler_mi, index, columns, levels): + styler_mi.index.names, styler_mi.columns.names = ["zero", "one"], ["zero", "one"] + if index: + styler_mi.set_sticky(axis=0, levels=levels) + if columns: + styler_mi.set_sticky(axis=1, levels=levels) + + left_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: white;\n" + " left: {1}px;\n min-width: 75px;\n max-width: 75px;\n z-index: {2};\n}}" + ) + top_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: white;\n" + " top: {1}px;\n height: 25px;\n z-index: {2};\n}}" + ) + + res = styler_mi.set_uuid("").to_html() + + # test no sticking of level0 + assert "#T_ thead tr th:nth-child(1)" not in res + assert "#T_ tbody tr th.level0" not in res + assert "#T_ thead tr:nth-child(1) th" not in res + + # test sticking level1 + assert ( + left_css.format("thead tr th:nth-child(2)", "0", "3 !important") in res + ) is index + assert (left_css.format("tbody tr th.level1", "0", "1") in res) is index + assert (top_css.format("thead tr:nth-child(2) th", "0", "2") in res) is columns + + +def test_sticky_raises(styler): + with pytest.raises(ValueError, match="No axis named bad for object type DataFrame"): + styler.set_sticky(axis="bad") + + +@pytest.mark.parametrize( + "sparse_index, sparse_columns", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_sparse_options(sparse_index, sparse_columns): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=ridx, columns=cidx) + styler = df.style + + default_html = styler.to_html() # defaults under pd.options to (True , True) + + with option_context( + "styler.sparse.index", sparse_index, "styler.sparse.columns", sparse_columns + ): + html1 = styler.to_html() + assert (html1 == default_html) is (sparse_index and sparse_columns) + html2 = styler.to_html(sparse_index=sparse_index, sparse_columns=sparse_columns) + assert html1 == html2 + + +@pytest.mark.parametrize("index", [True, False]) +@pytest.mark.parametrize("columns", [True, False]) +def test_applymap_header_cell_ids(styler, index, columns): + # GH 41893 + func = lambda v: "attr: val;" + styler.uuid, styler.cell_ids = "", False + if index: + styler.applymap_index(func, axis="index") + if columns: + styler.applymap_index(func, axis="columns") + + result = styler.to_html() + + # test no data cell ids + assert '' in result + assert '' in result + + # test index header ids where needed and css styles + assert ( + '' in result + ) is index + assert ( + '' in result + ) is index + assert ("#T__level0_row0, #T__level0_row1 {\n attr: val;\n}" in result) is index + + # test column header ids where needed and css styles + assert ( + '' in result + ) is columns + assert ("#T__level0_col0 {\n attr: val;\n}" in result) is columns + + +@pytest.mark.parametrize("rows", [True, False]) +@pytest.mark.parametrize("cols", [True, False]) +def test_maximums(styler_mi, rows, cols): + result = styler_mi.to_html( + max_rows=2 if rows else None, + max_columns=2 if cols else None, + ) + + assert ">5" in result # [[0,1], [4,5]] always visible + assert (">8" in result) is not rows # first trimmed vertical element + assert (">2" in result) is not cols # first trimmed horizontal element + + +def test_replaced_css_class_names(styler_mi): + css = { + "row_heading": "ROWHEAD", + # "col_heading": "COLHEAD", + "index_name": "IDXNAME", + # "col": "COL", + "row": "ROW", + # "col_trim": "COLTRIM", + "row_trim": "ROWTRIM", + "level": "LEVEL", + "data": "DATA", + "blank": "BLANK", + } + midx = MultiIndex.from_product([["a", "b"], ["c", "d"]]) + styler_mi = Styler( + DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=midx), + uuid_len=0, + ).set_table_styles(css_class_names=css) + styler_mi.index.names = ["n1", "n2"] + styler_mi.hide(styler_mi.index[1:], axis=0) + styler_mi.hide(styler_mi.columns[1:], axis=1) + styler_mi.applymap_index(lambda v: "color: red;", axis=0) + styler_mi.applymap_index(lambda v: "color: green;", axis=1) + styler_mi.applymap(lambda v: "color: blue;") + expected = dedent( + """\ + +
<div></div>"A&B"NA$ "A"$ A&BNA + ... + + Using a ``formatter`` with LaTeX ``escape``. + + >>> df = pd.DataFrame([[1, 2, 3]], columns=["123", "~", "$%#"]) + >>> df.style.format_index("\\textbf{{{}}}", escape="latex", axis=1).to_latex() + ... # doctest: +SKIP + \begin{tabular}{lrrr} + {} & {\textbf{123}} & {\textbf{\textasciitilde }} & {\textbf{\$\%\#}} \\ + 0 & 1 & 2 & 3 \\ + \end{tabular} + """ + axis = self.data._get_axis_number(axis) + if axis == 0: + display_funcs_, obj = self._display_funcs_index, self.index + else: + display_funcs_, obj = self._display_funcs_columns, self.columns + levels_ = refactor_levels(level, obj) + + if all( + ( + formatter is None, + level is None, + precision is None, + decimal == ".", + thousands is None, + na_rep is None, + escape is None, + hyperlinks is None, + ) + ): + display_funcs_.clear() + return self # clear the formatter / revert to default and avoid looping + + if not isinstance(formatter, dict): + formatter = {level: formatter for level in levels_} + else: + formatter = { + obj._get_level_number(level): formatter_ + for level, formatter_ in formatter.items() + } + + for lvl in levels_: + format_func = _maybe_wrap_formatter( + formatter.get(lvl), + na_rep=na_rep, + precision=precision, + decimal=decimal, + thousands=thousands, + escape=escape, + hyperlinks=hyperlinks, + ) + + for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]: + display_funcs_[idx] = format_func + + return self + + +def _element( + html_element: str, + html_class: str, + value: Any, + is_visible: bool, + **kwargs, +) -> dict: + """ + Template to return container with information for a `` cells in the HTML result. + + Parameters + ---------- + css_name: str, default "pd-t" + Name of the CSS class that controls visualisation of tooltips. + css_props: list-like, default; see Notes + List of (attr, value) tuples defining properties of the CSS class. + tooltips: DataFrame, default empty + DataFrame of strings aligned with underlying Styler data for tooltip + display. + + Notes + ----- + The default properties for the tooltip CSS class are: + + - visibility: hidden + - position: absolute + - z-index: 1 + - background-color: black + - color: white + - transform: translate(-20px, -20px) + + Hidden visibility is a key prerequisite to the hover functionality, and should + always be included in any manual properties specification. + """ + + def __init__( + self, + css_props: CSSProperties = [ + ("visibility", "hidden"), + ("position", "absolute"), + ("z-index", 1), + ("background-color", "black"), + ("color", "white"), + ("transform", "translate(-20px, -20px)"), + ], + css_name: str = "pd-t", + tooltips: DataFrame = DataFrame(), + ): + self.class_name = css_name + self.class_properties = css_props + self.tt_data = tooltips + self.table_styles: CSSStyles = [] + + @property + def _class_styles(self): + """ + Combine the ``_Tooltips`` CSS class name and CSS properties to the format + required to extend the underlying ``Styler`` `table_styles` to allow + tooltips to render in HTML. + + Returns + ------- + styles : List + """ + return [ + { + "selector": f".{self.class_name}", + "props": maybe_convert_css_to_tuples(self.class_properties), + } + ] + + def _pseudo_css(self, uuid: str, name: str, row: int, col: int, text: str): + """ + For every table data-cell that has a valid tooltip (not None, NaN or + empty string) must create two pseudo CSS entries for the specific + element id which are added to overall table styles: + an on hover visibility change and a content change + dependent upon the user's chosen display string. + + For example: + [{"selector": "T__row1_col1:hover .pd-t", + "props": [("visibility", "visible")]}, + {"selector": "T__row1_col1 .pd-t::after", + "props": [("content", "Some Valid Text String")]}] + + Parameters + ---------- + uuid: str + The uuid of the Styler instance + name: str + The css-name of the class used for styling tooltips + row : int + The row index of the specified tooltip string data + col : int + The col index of the specified tooltip string data + text : str + The textual content of the tooltip to be displayed in HTML. + + Returns + ------- + pseudo_css : List + """ + selector_id = "#T_" + uuid + "_row" + str(row) + "_col" + str(col) + return [ + { + "selector": selector_id + f":hover .{name}", + "props": [("visibility", "visible")], + }, + { + "selector": selector_id + f" .{name}::after", + "props": [("content", f'"{text}"')], + }, + ] + + def _translate(self, styler: StylerRenderer, d: dict): + """ + Mutate the render dictionary to allow for tooltips: + + - Add ```` HTML element to each data cells ``display_value``. Ignores + headers. + - Add table level CSS styles to control pseudo classes. + + Parameters + ---------- + styler_data : DataFrame + Underlying ``Styler`` DataFrame used for reindexing. + uuid : str + The underlying ``Styler`` uuid for CSS id. + d : dict + The dictionary prior to final render + + Returns + ------- + render_dict : Dict + """ + self.tt_data = self.tt_data.reindex_like(styler.data) + if self.tt_data.empty: + return d + + name = self.class_name + mask = (self.tt_data.isna()) | (self.tt_data.eq("")) # empty string = no ttip + self.table_styles = [ + style + for sublist in [ + self._pseudo_css(styler.uuid, name, i, j, str(self.tt_data.iloc[i, j])) + for i in range(len(self.tt_data.index)) + for j in range(len(self.tt_data.columns)) + if not ( + mask.iloc[i, j] + or i in styler.hidden_rows + or j in styler.hidden_columns + ) + ] + for style in sublist + ] + + if self.table_styles: + # add span class to every cell only if at least 1 non-empty tooltip + for row in d["body"]: + for item in row: + if item["type"] == "td": + item["display_value"] = ( + str(item["display_value"]) + + f'' + ) + d["table_styles"].extend(self._class_styles) + d["table_styles"].extend(self.table_styles) + + return d + + +def _parse_latex_table_wrapping(table_styles: CSSStyles, caption: str | None) -> bool: + """ + Indicate whether LaTeX {tabular} should be wrapped with a {table} environment. + + Parses the `table_styles` and detects any selectors which must be included outside + of {tabular}, i.e. indicating that wrapping must occur, and therefore return True, + or if a caption exists and requires similar. + """ + IGNORED_WRAPPERS = ["toprule", "midrule", "bottomrule", "column_format"] + # ignored selectors are included with {tabular} so do not need wrapping + return ( + table_styles is not None + and any(d["selector"] not in IGNORED_WRAPPERS for d in table_styles) + ) or caption is not None + + +def _parse_latex_table_styles(table_styles: CSSStyles, selector: str) -> str | None: + """ + Return the first 'props' 'value' from ``tables_styles`` identified by ``selector``. + + Examples + -------- + >>> table_styles = [{'selector': 'foo', 'props': [('attr','value')]}, + ... {'selector': 'bar', 'props': [('attr', 'overwritten')]}, + ... {'selector': 'bar', 'props': [('a1', 'baz'), ('a2', 'ignore')]}] + >>> _parse_latex_table_styles(table_styles, selector='bar') + 'baz' + + Notes + ----- + The replacement of "§" with ":" is to avoid the CSS problem where ":" has structural + significance and cannot be used in LaTeX labels, but is often required by them. + """ + for style in table_styles[::-1]: # in reverse for most recently applied style + if style["selector"] == selector: + return str(style["props"][0][1]).replace("§", ":") + return None + + +def _parse_latex_cell_styles( + latex_styles: CSSList, display_value: str, convert_css: bool = False +) -> str: + r""" + Mutate the ``display_value`` string including LaTeX commands from ``latex_styles``. + + This method builds a recursive latex chain of commands based on the + CSSList input, nested around ``display_value``. + + If a CSS style is given as ('', '') this is translated to + '\{display_value}', and this value is treated as the + display value for the next iteration. + + The most recent style forms the inner component, for example for styles: + `[('c1', 'o1'), ('c2', 'o2')]` this returns: `\c1o1{\c2o2{display_value}}` + + Sometimes latex commands have to be wrapped with curly braces in different ways: + We create some parsing flags to identify the different behaviours: + + - `--rwrap` : `\{}` + - `--wrap` : `{\ }` + - `--nowrap` : `\ ` + - `--lwrap` : `{\} ` + - `--dwrap` : `{\}{}` + + For example for styles: + `[('c1', 'o1--wrap'), ('c2', 'o2')]` this returns: `{\c1o1 \c2o2{display_value}} + """ + if convert_css: + latex_styles = _parse_latex_css_conversion(latex_styles) + for (command, options) in latex_styles[::-1]: # in reverse for most recent style + formatter = { + "--wrap": f"{{\\{command}--to_parse {display_value}}}", + "--nowrap": f"\\{command}--to_parse {display_value}", + "--lwrap": f"{{\\{command}--to_parse}} {display_value}", + "--rwrap": f"\\{command}--to_parse{{{display_value}}}", + "--dwrap": f"{{\\{command}--to_parse}}{{{display_value}}}", + } + display_value = f"\\{command}{options} {display_value}" + for arg in ["--nowrap", "--wrap", "--lwrap", "--rwrap", "--dwrap"]: + if arg in str(options): + display_value = formatter[arg].replace( + "--to_parse", _parse_latex_options_strip(value=options, arg=arg) + ) + break # only ever one purposeful entry + return display_value + + +def _parse_latex_header_span( + cell: dict[str, Any], + multirow_align: str, + multicol_align: str, + wrap: bool = False, + convert_css: bool = False, +) -> str: + r""" + Refactor the cell `display_value` if a 'colspan' or 'rowspan' attribute is present. + + 'rowspan' and 'colspan' do not occur simultaneouly. If they are detected then + the `display_value` is altered to a LaTeX `multirow` or `multicol` command + respectively, with the appropriate cell-span. + + ``wrap`` is used to enclose the `display_value` in braces which is needed for + column headers using an siunitx package. + + Requires the package {multirow}, whereas multicol support is usually built in + to the {tabular} environment. + + Examples + -------- + >>> cell = {'cellstyle': '', 'display_value':'text', 'attributes': 'colspan="3"'} + >>> _parse_latex_header_span(cell, 't', 'c') + '\\multicolumn{3}{c}{text}' + """ + display_val = _parse_latex_cell_styles( + cell["cellstyle"], cell["display_value"], convert_css + ) + if "attributes" in cell: + attrs = cell["attributes"] + if 'colspan="' in attrs: + colspan = attrs[attrs.find('colspan="') + 9 :] # len('colspan="') = 9 + colspan = int(colspan[: colspan.find('"')]) + if "naive-l" == multicol_align: + out = f"{{{display_val}}}" if wrap else f"{display_val}" + blanks = " & {}" if wrap else " &" + return out + blanks * (colspan - 1) + elif "naive-r" == multicol_align: + out = f"{{{display_val}}}" if wrap else f"{display_val}" + blanks = "{} & " if wrap else "& " + return blanks * (colspan - 1) + out + return f"\\multicolumn{{{colspan}}}{{{multicol_align}}}{{{display_val}}}" + elif 'rowspan="' in attrs: + if multirow_align == "naive": + return display_val + rowspan = attrs[attrs.find('rowspan="') + 9 :] + rowspan = int(rowspan[: rowspan.find('"')]) + return f"\\multirow[{multirow_align}]{{{rowspan}}}{{*}}{{{display_val}}}" + if wrap: + return f"{{{display_val}}}" + else: + return display_val + + +def _parse_latex_options_strip(value: str | int | float, arg: str) -> str: + """ + Strip a css_value which may have latex wrapping arguments, css comment identifiers, + and whitespaces, to a valid string for latex options parsing. + + For example: 'red /* --wrap */ ' --> 'red' + """ + return str(value).replace(arg, "").replace("/*", "").replace("*/", "").strip() + + +def _parse_latex_css_conversion(styles: CSSList) -> CSSList: + """ + Convert CSS (attribute,value) pairs to equivalent LaTeX (command,options) pairs. + + Ignore conversion if tagged with `--latex` option, skipped if no conversion found. + """ + + def font_weight(value, arg): + if value == "bold" or value == "bolder": + return "bfseries", f"{arg}" + return None + + def font_style(value, arg): + if value == "italic": + return "itshape", f"{arg}" + elif value == "oblique": + return "slshape", f"{arg}" + return None + + def color(value, user_arg, command, comm_arg): + """ + CSS colors have 5 formats to process: + + - 6 digit hex code: "#ff23ee" --> [HTML]{FF23EE} + - 3 digit hex code: "#f0e" --> [HTML]{FF00EE} + - rgba: rgba(128, 255, 0, 0.5) --> [rgb]{0.502, 1.000, 0.000} + - rgb: rgb(128, 255, 0,) --> [rbg]{0.502, 1.000, 0.000} + - string: red --> {red} + + Additionally rgb or rgba can be expressed in % which is also parsed. + """ + arg = user_arg if user_arg != "" else comm_arg + + if value[0] == "#" and len(value) == 7: # color is hex code + return command, f"[HTML]{{{value[1:].upper()}}}{arg}" + if value[0] == "#" and len(value) == 4: # color is short hex code + val = f"{value[1].upper()*2}{value[2].upper()*2}{value[3].upper()*2}" + return command, f"[HTML]{{{val}}}{arg}" + elif value[:3] == "rgb": # color is rgb or rgba + r = re.findall("(?<=\\()[0-9\\s%]+(?=,)", value)[0].strip() + r = float(r[:-1]) / 100 if "%" in r else int(r) / 255 + g = re.findall("(?<=,)[0-9\\s%]+(?=,)", value)[0].strip() + g = float(g[:-1]) / 100 if "%" in g else int(g) / 255 + if value[3] == "a": # color is rgba + b = re.findall("(?<=,)[0-9\\s%]+(?=,)", value)[1].strip() + else: # color is rgb + b = re.findall("(?<=,)[0-9\\s%]+(?=\\))", value)[0].strip() + b = float(b[:-1]) / 100 if "%" in b else int(b) / 255 + return command, f"[rgb]{{{r:.3f}, {g:.3f}, {b:.3f}}}{arg}" + else: + return command, f"{{{value}}}{arg}" # color is likely string-named + + CONVERTED_ATTRIBUTES: dict[str, Callable] = { + "font-weight": font_weight, + "background-color": partial(color, command="cellcolor", comm_arg="--lwrap"), + "color": partial(color, command="color", comm_arg=""), + "font-style": font_style, + } + + latex_styles: CSSList = [] + for (attribute, value) in styles: + if isinstance(value, str) and "--latex" in value: + # return the style without conversion but drop '--latex' + latex_styles.append((attribute, value.replace("--latex", ""))) + if attribute in CONVERTED_ATTRIBUTES.keys(): + arg = "" + for x in ["--wrap", "--nowrap", "--lwrap", "--dwrap", "--rwrap"]: + if x in str(value): + arg, value = x, _parse_latex_options_strip(value, x) + break + latex_style = CONVERTED_ATTRIBUTES[attribute](value, arg) + if latex_style is not None: + latex_styles.extend([latex_style]) + return latex_styles + + +def _escape_latex(s): + r""" + Replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``, + ``~``, ``^``, and ``\`` in the string with LaTeX-safe sequences. + + Use this if you need to display text that might contain such characters in LaTeX. + + Parameters + ---------- + s : str + Input to be escaped + + Return + ------ + str : + Escaped string + """ + return ( + s.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash + .replace("ab2§=§8yz ", "ab2§=§8yz\\space ") # since \backslash gobbles spaces + .replace("&", "\\&") + .replace("%", "\\%") + .replace("$", "\\$") + .replace("#", "\\#") + .replace("_", "\\_") + .replace("{", "\\{") + .replace("}", "\\}") + .replace("~ ", "~\\space ") # since \textasciitilde gobbles spaces + .replace("~", "\\textasciitilde ") + .replace("^ ", "^\\space ") # since \textasciicircum gobbles spaces + .replace("^", "\\textasciicircum ") + .replace("ab2§=§8yz", "\\textbackslash ") + ) diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/templates/html.tpl b/.local/lib/python3.8/site-packages/pandas/io/formats/templates/html.tpl new file mode 100644 index 0000000..8c63be3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/templates/html.tpl @@ -0,0 +1,16 @@ +{# Update the html_style/table_structure.html documentation too #} +{% if doctype_html %} + + + + +{% if not exclude_styles %}{% include html_style_tpl %}{% endif %} + + +{% include html_table_tpl %} + + +{% elif not doctype_html %} +{% if not exclude_styles %}{% include html_style_tpl %}{% endif %} +{% include html_table_tpl %} +{% endif %} diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/templates/html_style.tpl b/.local/lib/python3.8/site-packages/pandas/io/formats/templates/html_style.tpl new file mode 100644 index 0000000..5c3fcd9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/templates/html_style.tpl @@ -0,0 +1,26 @@ +{%- block before_style -%}{%- endblock before_style -%} +{% block style %} + +{% endblock style %} diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/templates/html_table.tpl b/.local/lib/python3.8/site-packages/pandas/io/formats/templates/html_table.tpl new file mode 100644 index 0000000..17118d2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/templates/html_table.tpl @@ -0,0 +1,63 @@ +{% block before_table %}{% endblock before_table %} +{% block table %} +{% if exclude_styles %} + +{% else %} +
+{% endif %} +{% block caption %} +{% if caption and caption is string %} + +{% elif caption and caption is sequence %} + +{% endif %} +{% endblock caption %} +{% block thead %} + +{% block before_head_rows %}{% endblock %} +{% for r in head %} +{% block head_tr scoped %} + +{% if exclude_styles %} +{% for c in r %} +{% if c.is_visible != False %} + <{{c.type}} {{c.attributes}}>{{c.display_value}} +{% endif %} +{% endfor %} +{% else %} +{% for c in r %} +{% if c.is_visible != False %} + <{{c.type}} {%- if c.id is defined %} id="T_{{uuid}}_{{c.id}}" {%- endif %} class="{{c.class}}" {{c.attributes}}>{{c.display_value}} +{% endif %} +{% endfor %} +{% endif %} + +{% endblock head_tr %} +{% endfor %} +{% block after_head_rows %}{% endblock %} + +{% endblock thead %} +{% block tbody %} + +{% block before_rows %}{% endblock before_rows %} +{% for r in body %} +{% block tr scoped %} + +{% if exclude_styles %} +{% for c in r %}{% if c.is_visible != False %} + <{{c.type}} {{c.attributes}}>{{c.display_value}} +{% endif %}{% endfor %} +{% else %} +{% for c in r %}{% if c.is_visible != False %} + <{{c.type}} {%- if c.id is defined %} id="T_{{uuid}}_{{c.id}}" {%- endif %} class="{{c.class}}" {{c.attributes}}>{{c.display_value}} +{% endif %}{% endfor %} +{% endif %} + +{% endblock tr %} +{% endfor %} +{% block after_rows %}{% endblock after_rows %} + +{% endblock tbody %} +
{{caption}}{{caption[0]}}
+{% endblock table %} +{% block after_table %}{% endblock after_table %} diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/templates/latex.tpl b/.local/lib/python3.8/site-packages/pandas/io/formats/templates/latex.tpl new file mode 100644 index 0000000..ae341bb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/templates/latex.tpl @@ -0,0 +1,5 @@ +{% if environment == "longtable" %} +{% include "latex_longtable.tpl" %} +{% else %} +{% include "latex_table.tpl" %} +{% endif %} diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/templates/latex_longtable.tpl b/.local/lib/python3.8/site-packages/pandas/io/formats/templates/latex_longtable.tpl new file mode 100644 index 0000000..580f12a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/templates/latex_longtable.tpl @@ -0,0 +1,82 @@ +\begin{longtable} +{%- set position = parse_table(table_styles, 'position') %} +{%- if position is not none %} +[{{position}}] +{%- endif %} +{%- set column_format = parse_table(table_styles, 'column_format') %} +{% raw %}{{% endraw %}{{column_format}}{% raw %}}{% endraw %} + +{% for style in table_styles %} +{% if style['selector'] not in ['position', 'position_float', 'caption', 'toprule', 'midrule', 'bottomrule', 'column_format', 'label'] %} +\{{style['selector']}}{{parse_table(table_styles, style['selector'])}} +{% endif %} +{% endfor %} +{% if caption and caption is string %} +\caption{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %} +{%- set label = parse_table(table_styles, 'label') %} +{%- if label is not none %} + \label{{label}} +{%- endif %} \\ +{% elif caption and caption is sequence %} +\caption[{{caption[1]}}]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %} +{%- set label = parse_table(table_styles, 'label') %} +{%- if label is not none %} + \label{{label}} +{%- endif %} \\ +{% else %} +{%- set label = parse_table(table_styles, 'label') %} +{%- if label is not none %} +\label{{label}} \\ +{% endif %} +{% endif %} +{% set toprule = parse_table(table_styles, 'toprule') %} +{% if toprule is not none %} +\{{toprule}} +{% endif %} +{% for row in head %} +{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx)}}{% endfor %} \\ +{% endfor %} +{% set midrule = parse_table(table_styles, 'midrule') %} +{% if midrule is not none %} +\{{midrule}} +{% endif %} +\endfirsthead +{% if caption and caption is string %} +\caption[]{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %} \\ +{% elif caption and caption is sequence %} +\caption[]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %} \\ +{% endif %} +{% if toprule is not none %} +\{{toprule}} +{% endif %} +{% for row in head %} +{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx)}}{% endfor %} \\ +{% endfor %} +{% if midrule is not none %} +\{{midrule}} +{% endif %} +\endhead +{% if midrule is not none %} +\{{midrule}} +{% endif %} +\multicolumn{% raw %}{{% endraw %}{{column_format|length}}{% raw %}}{% endraw %}{r}{Continued on next page} \\ +{% if midrule is not none %} +\{{midrule}} +{% endif %} +\endfoot +{% set bottomrule = parse_table(table_styles, 'bottomrule') %} +{% if bottomrule is not none %} +\{{bottomrule}} +{% endif %} +\endlastfoot +{% for row in body %} +{% for c in row %}{% if not loop.first %} & {% endif %} + {%- if c.type == 'th' %}{{parse_header(c, multirow_align, multicol_align)}}{% else %}{{parse_cell(c.cellstyle, c.display_value, convert_css)}}{% endif %} +{%- endfor %} \\ +{% if clines and clines[loop.index] | length > 0 %} + {%- for cline in clines[loop.index] %}{% if not loop.first %} {% endif %}{{ cline }}{% endfor %} + +{% endif %} +{% endfor %} +\end{longtable} +{% raw %}{% endraw %} diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/templates/latex_table.tpl b/.local/lib/python3.8/site-packages/pandas/io/formats/templates/latex_table.tpl new file mode 100644 index 0000000..7858cb4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/templates/latex_table.tpl @@ -0,0 +1,57 @@ +{% if environment or parse_wrap(table_styles, caption) %} +\begin{% raw %}{{% endraw %}{{environment if environment else "table"}}{% raw %}}{% endraw %} +{%- set position = parse_table(table_styles, 'position') %} +{%- if position is not none %} +[{{position}}] +{%- endif %} + +{% set position_float = parse_table(table_styles, 'position_float') %} +{% if position_float is not none%} +\{{position_float}} +{% endif %} +{% if caption and caption is string %} +\caption{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %} + +{% elif caption and caption is sequence %} +\caption[{{caption[1]}}]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %} + +{% endif %} +{% for style in table_styles %} +{% if style['selector'] not in ['position', 'position_float', 'caption', 'toprule', 'midrule', 'bottomrule', 'column_format'] %} +\{{style['selector']}}{{parse_table(table_styles, style['selector'])}} +{% endif %} +{% endfor %} +{% endif %} +\begin{tabular} +{%- set column_format = parse_table(table_styles, 'column_format') %} +{% raw %}{{% endraw %}{{column_format}}{% raw %}}{% endraw %} + +{% set toprule = parse_table(table_styles, 'toprule') %} +{% if toprule is not none %} +\{{toprule}} +{% endif %} +{% for row in head %} +{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx, convert_css)}}{% endfor %} \\ +{% endfor %} +{% set midrule = parse_table(table_styles, 'midrule') %} +{% if midrule is not none %} +\{{midrule}} +{% endif %} +{% for row in body %} +{% for c in row %}{% if not loop.first %} & {% endif %} + {%- if c.type == 'th' %}{{parse_header(c, multirow_align, multicol_align, False, convert_css)}}{% else %}{{parse_cell(c.cellstyle, c.display_value, convert_css)}}{% endif %} +{%- endfor %} \\ +{% if clines and clines[loop.index] | length > 0 %} + {%- for cline in clines[loop.index] %}{% if not loop.first %} {% endif %}{{ cline }}{% endfor %} + +{% endif %} +{% endfor %} +{% set bottomrule = parse_table(table_styles, 'bottomrule') %} +{% if bottomrule is not none %} +\{{bottomrule}} +{% endif %} +\end{tabular} +{% if environment or parse_wrap(table_styles, caption) %} +\end{% raw %}{{% endraw %}{{environment if environment else "table"}}{% raw %}}{% endraw %} + +{% endif %} diff --git a/.local/lib/python3.8/site-packages/pandas/io/formats/xml.py b/.local/lib/python3.8/site-packages/pandas/io/formats/xml.py new file mode 100644 index 0000000..aa69792 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/formats/xml.py @@ -0,0 +1,552 @@ +""" +:mod:`pandas.io.formats.xml` is a module for formatting data in XML. +""" +from __future__ import annotations + +import codecs +import io +from typing import Any + +from pandas._typing import ( + CompressionOptions, + FilePath, + ReadBuffer, + StorageOptions, + WriteBuffer, +) +from pandas.errors import AbstractMethodError +from pandas.util._decorators import doc + +from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.missing import isna + +from pandas.core.frame import DataFrame +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import get_handle +from pandas.io.xml import ( + get_data_from_filepath, + preprocess_data, +) + + +@doc(compression_options=_shared_docs["compression_options"] % "path_or_buffer") +class BaseXMLFormatter: + """ + Subclass for formatting data in XML. + + Parameters + ---------- + path_or_buffer : str or file-like + This can be either a string of raw XML, a valid URL, + file or file-like object. + + index : bool + Whether to include index in xml document. + + row_name : str + Name for root of xml document. Default is 'data'. + + root_name : str + Name for row elements of xml document. Default is 'row'. + + na_rep : str + Missing data representation. + + attrs_cols : list + List of columns to write as attributes in row element. + + elem_cols : list + List of columns to write as children in row element. + + namespacess : dict + The namespaces to define in XML document as dicts with key + being namespace and value the URI. + + prefix : str + The prefix for each element in XML document including root. + + encoding : str + Encoding of xml object or document. + + xml_declaration : bool + Whether to include xml declaration at top line item in xml. + + pretty_print : bool + Whether to write xml document with line breaks and indentation. + + stylesheet : str or file-like + A URL, file, file-like object, or a raw string containing XSLT. + + {compression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + storage_options : dict, optional + Extra options that make sense for a particular storage connection, + e.g. host, port, username, password, etc., + + See also + -------- + pandas.io.formats.xml.EtreeXMLFormatter + pandas.io.formats.xml.LxmlXMLFormatter + + """ + + def __init__( + self, + frame: DataFrame, + path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, + index: bool = True, + root_name: str | None = "data", + row_name: str | None = "row", + na_rep: str | None = None, + attr_cols: list[str] | None = None, + elem_cols: list[str] | None = None, + namespaces: dict[str | None, str] | None = None, + prefix: str | None = None, + encoding: str = "utf-8", + xml_declaration: bool | None = True, + pretty_print: bool | None = True, + stylesheet: FilePath | ReadBuffer[str] | ReadBuffer[bytes] | None = None, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, + ) -> None: + self.frame = frame + self.path_or_buffer = path_or_buffer + self.index = index + self.root_name = root_name + self.row_name = row_name + self.na_rep = na_rep + self.attr_cols = attr_cols + self.elem_cols = elem_cols + self.namespaces = namespaces + self.prefix = prefix + self.encoding = encoding + self.xml_declaration = xml_declaration + self.pretty_print = pretty_print + self.stylesheet = stylesheet + self.compression = compression + self.storage_options = storage_options + + self.orig_cols = self.frame.columns.tolist() + self.frame_dicts = self.process_dataframe() + + self.validate_columns() + self.validate_encoding() + self.prefix_uri = self.get_prefix_uri() + self.handle_indexes() + + def build_tree(self) -> bytes: + """ + Build tree from data. + + This method initializes the root and builds attributes and elements + with optional namespaces. + """ + raise AbstractMethodError(self) + + def validate_columns(self) -> None: + """ + Validate elems_cols and attrs_cols. + + This method will check if columns is list-like. + + Raises + ------ + ValueError + * If value is not a list and less then length of nodes. + """ + if self.attr_cols and not is_list_like(self.attr_cols): + raise TypeError( + f"{type(self.attr_cols).__name__} is not a valid type for attr_cols" + ) + + if self.elem_cols and not is_list_like(self.elem_cols): + raise TypeError( + f"{type(self.elem_cols).__name__} is not a valid type for elem_cols" + ) + + def validate_encoding(self) -> None: + """ + Validate encoding. + + This method will check if encoding is among listed under codecs. + + Raises + ------ + LookupError + * If encoding is not available in codecs. + """ + + codecs.lookup(self.encoding) + + def process_dataframe(self) -> dict[int | str, dict[str, Any]]: + """ + Adjust Data Frame to fit xml output. + + This method will adjust underlying data frame for xml output, + including optionally replacing missing values and including indexes. + """ + + df = self.frame + + if self.index: + df = df.reset_index() + + if self.na_rep is not None: + df = df.fillna(self.na_rep) + + return df.to_dict(orient="index") + + def handle_indexes(self) -> None: + """ + Handle indexes. + + This method will add indexes into attr_cols or elem_cols. + """ + + if not self.index: + return + + first_key = next(iter(self.frame_dicts)) + indexes: list[str] = [ + x for x in self.frame_dicts[first_key].keys() if x not in self.orig_cols + ] + + if self.attr_cols: + self.attr_cols = indexes + self.attr_cols + + if self.elem_cols: + self.elem_cols = indexes + self.elem_cols + + def get_prefix_uri(self) -> str: + """ + Get uri of namespace prefix. + + This method retrieves corresponding URI to prefix in namespaces. + + Raises + ------ + KeyError + *If prefix is not included in namespace dict. + """ + + raise AbstractMethodError(self) + + def other_namespaces(self) -> dict: + """ + Define other namespaces. + + This method will build dictionary of namespaces attributes + for root element, conditionally with optional namespaces and + prefix. + """ + + nmsp_dict: dict[str, str] = {} + if self.namespaces and self.prefix is None: + nmsp_dict = {"xmlns": n for p, n in self.namespaces.items() if p != ""} + + if self.namespaces and self.prefix: + nmsp_dict = {"xmlns": n for p, n in self.namespaces.items() if p == ""} + + return nmsp_dict + + def build_attribs(self, d: dict[str, Any], elem_row: Any) -> Any: + """ + Create attributes of row. + + This method adds attributes using attr_cols to row element and + works with tuples for multindex or hierarchical columns. + """ + + if not self.attr_cols: + return elem_row + + for col in self.attr_cols: + attr_name = self._get_flat_col_name(col) + try: + if not isna(d[col]): + elem_row.attrib[attr_name] = str(d[col]) + except KeyError: + raise KeyError(f"no valid column, {col}") + return elem_row + + def _get_flat_col_name(self, col: str | tuple) -> str: + flat_col = col + if isinstance(col, tuple): + flat_col = ( + "".join([str(c) for c in col]).strip() + if "" in col + else "_".join([str(c) for c in col]).strip() + ) + return f"{self.prefix_uri}{flat_col}" + + def build_elems(self, d: dict[str, Any], elem_row: Any) -> None: + """ + Create child elements of row. + + This method adds child elements using elem_cols to row element and + works with tuples for multindex or hierarchical columns. + """ + + raise AbstractMethodError(self) + + def _build_elems(self, sub_element_cls, d: dict[str, Any], elem_row: Any) -> None: + + if not self.elem_cols: + return + + for col in self.elem_cols: + elem_name = self._get_flat_col_name(col) + try: + val = None if isna(d[col]) or d[col] == "" else str(d[col]) + sub_element_cls(elem_row, elem_name).text = val + except KeyError: + raise KeyError(f"no valid column, {col}") + + def write_output(self) -> str | None: + xml_doc = self.build_tree() + + if self.path_or_buffer is not None: + with get_handle( + self.path_or_buffer, + "wb", + compression=self.compression, + storage_options=self.storage_options, + is_text=False, + ) as handles: + handles.handle.write(xml_doc) + return None + + else: + return xml_doc.decode(self.encoding).rstrip() + + +class EtreeXMLFormatter(BaseXMLFormatter): + """ + Class for formatting data in xml using Python standard library + modules: `xml.etree.ElementTree` and `xml.dom.minidom`. + """ + + def build_tree(self) -> bytes: + from xml.etree.ElementTree import ( + Element, + SubElement, + tostring, + ) + + self.root = Element( + f"{self.prefix_uri}{self.root_name}", attrib=self.other_namespaces() + ) + + for d in self.frame_dicts.values(): + elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}") + + if not self.attr_cols and not self.elem_cols: + self.elem_cols = list(d.keys()) + self.build_elems(d, elem_row) + + else: + elem_row = self.build_attribs(d, elem_row) + self.build_elems(d, elem_row) + + self.out_xml = tostring(self.root, method="xml", encoding=self.encoding) + + if self.pretty_print: + self.out_xml = self.prettify_tree() + + if self.xml_declaration: + self.out_xml = self.add_declaration() + else: + self.out_xml = self.remove_declaration() + + if self.stylesheet is not None: + raise ValueError( + "To use stylesheet, you need lxml installed and selected as parser." + ) + + return self.out_xml + + def get_prefix_uri(self) -> str: + from xml.etree.ElementTree import register_namespace + + uri = "" + if self.namespaces: + for p, n in self.namespaces.items(): + if isinstance(p, str) and isinstance(n, str): + register_namespace(p, n) + if self.prefix: + try: + uri = f"{{{self.namespaces[self.prefix]}}}" + except KeyError: + raise KeyError(f"{self.prefix} is not included in namespaces") + else: + uri = f'{{{self.namespaces[""]}}}' + + return uri + + def build_elems(self, d: dict[str, Any], elem_row: Any) -> None: + from xml.etree.ElementTree import SubElement + + self._build_elems(SubElement, d, elem_row) + + def prettify_tree(self) -> bytes: + """ + Output tree for pretty print format. + + This method will pretty print xml with line breaks and indentation. + """ + + from xml.dom.minidom import parseString + + dom = parseString(self.out_xml) + + return dom.toprettyxml(indent=" ", encoding=self.encoding) + + def add_declaration(self) -> bytes: + """ + Add xml declaration. + + This method will add xml declaration of working tree. Currently, + xml_declaration is supported in etree starting in Python 3.8. + """ + decl = f'\n' + + doc = ( + self.out_xml + if self.out_xml.startswith(b" bytes: + """ + Remove xml declaration. + + This method will remove xml declaration of working tree. Currently, + pretty_print is not supported in etree. + """ + + return self.out_xml.split(b"?>")[-1].strip() + + +class LxmlXMLFormatter(BaseXMLFormatter): + """ + Class for formatting data in xml using Python standard library + modules: `xml.etree.ElementTree` and `xml.dom.minidom`. + """ + + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + + self.convert_empty_str_key() + + def build_tree(self) -> bytes: + """ + Build tree from data. + + This method initializes the root and builds attributes and elements + with optional namespaces. + """ + from lxml.etree import ( + Element, + SubElement, + tostring, + ) + + self.root = Element(f"{self.prefix_uri}{self.root_name}", nsmap=self.namespaces) + + for d in self.frame_dicts.values(): + elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}") + + if not self.attr_cols and not self.elem_cols: + self.elem_cols = list(d.keys()) + self.build_elems(d, elem_row) + + else: + elem_row = self.build_attribs(d, elem_row) + self.build_elems(d, elem_row) + + self.out_xml = tostring( + self.root, + pretty_print=self.pretty_print, + method="xml", + encoding=self.encoding, + xml_declaration=self.xml_declaration, + ) + + if self.stylesheet is not None: + self.out_xml = self.transform_doc() + + return self.out_xml + + def convert_empty_str_key(self) -> None: + """ + Replace zero-lengh string in `namespaces`. + + This method will replce '' with None to align to `lxml` + requirement that empty string prefixes are not allowed. + """ + + if self.namespaces and "" in self.namespaces.keys(): + self.namespaces[None] = self.namespaces.pop("", "default") + + def get_prefix_uri(self) -> str: + uri = "" + if self.namespaces: + if self.prefix: + try: + uri = f"{{{self.namespaces[self.prefix]}}}" + except KeyError: + raise KeyError(f"{self.prefix} is not included in namespaces") + else: + uri = f'{{{self.namespaces[""]}}}' + + return uri + + def build_elems(self, d: dict[str, Any], elem_row: Any) -> None: + from lxml.etree import SubElement + + self._build_elems(SubElement, d, elem_row) + + def transform_doc(self) -> bytes: + """ + Parse stylesheet from file or buffer and run it. + + This method will parse stylesheet object into tree for parsing + conditionally by its specific object type, then transforms + original tree with XSLT script. + """ + from lxml.etree import ( + XSLT, + XMLParser, + fromstring, + parse, + ) + + style_doc = self.stylesheet + assert style_doc is not None # is ensured by caller + + handle_data = get_data_from_filepath( + filepath_or_buffer=style_doc, + encoding=self.encoding, + compression=self.compression, + storage_options=self.storage_options, + ) + + with preprocess_data(handle_data) as xml_data: + curr_parser = XMLParser(encoding=self.encoding) + + if isinstance(xml_data, io.StringIO): + xsl_doc = fromstring( + xml_data.getvalue().encode(self.encoding), parser=curr_parser + ) + else: + xsl_doc = parse(xml_data, parser=curr_parser) + + transformer = XSLT(xsl_doc) + new_doc = transformer(self.root) + + return bytes(new_doc) diff --git a/.local/lib/python3.8/site-packages/pandas/io/gbq.py b/.local/lib/python3.8/site-packages/pandas/io/gbq.py new file mode 100644 index 0000000..77ad40b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/gbq.py @@ -0,0 +1,224 @@ +""" Google BigQuery support """ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, +) + +from pandas.compat._optional import import_optional_dependency + +if TYPE_CHECKING: + from pandas import DataFrame + + +def _try_import(): + # since pandas is a dependency of pandas-gbq + # we need to import on first use + msg = ( + "pandas-gbq is required to load data from Google BigQuery. " + "See the docs: https://pandas-gbq.readthedocs.io." + ) + pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg) + return pandas_gbq + + +def read_gbq( + query: str, + project_id: str | None = None, + index_col: str | None = None, + col_order: list[str] | None = None, + reauth: bool = False, + auth_local_webserver: bool = False, + dialect: str | None = None, + location: str | None = None, + configuration: dict[str, Any] | None = None, + credentials=None, + use_bqstorage_api: bool | None = None, + max_results: int | None = None, + progress_bar_type: str | None = None, +) -> DataFrame: + """ + Load data from Google BigQuery. + + This function requires the `pandas-gbq package + `__. + + See the `How to authenticate with Google BigQuery + `__ + guide for authentication instructions. + + Parameters + ---------- + query : str + SQL-Like Query to return data values. + project_id : str, optional + Google BigQuery Account project ID. Optional when available from + the environment. + index_col : str, optional + Name of result column to use for index in results DataFrame. + col_order : list(str), optional + List of BigQuery column names in the desired order for results + DataFrame. + reauth : bool, default False + Force Google BigQuery to re-authenticate the user. This is useful + if multiple accounts are used. + auth_local_webserver : bool, default False + Use the `local webserver flow`_ instead of the `console flow`_ + when getting user credentials. + + .. _local webserver flow: + https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + .. _console flow: + https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + + *New in version 0.2.0 of pandas-gbq*. + dialect : str, default 'legacy' + Note: The default value is changing to 'standard' in a future version. + + SQL syntax dialect to use. Value can be one of: + + ``'legacy'`` + Use BigQuery's legacy SQL dialect. For more information see + `BigQuery Legacy SQL Reference + `__. + ``'standard'`` + Use BigQuery's standard SQL, which is + compliant with the SQL 2011 standard. For more information + see `BigQuery Standard SQL Reference + `__. + location : str, optional + Location where the query job should run. See the `BigQuery locations + documentation + `__ for a + list of available locations. The location must match that of any + datasets used in the query. + + *New in version 0.5.0 of pandas-gbq*. + configuration : dict, optional + Query config parameters for job processing. + For example: + + configuration = {'query': {'useQueryCache': False}} + + For more information see `BigQuery REST API Reference + `__. + credentials : google.auth.credentials.Credentials, optional + Credentials for accessing Google APIs. Use this parameter to override + default credentials, such as to use Compute Engine + :class:`google.auth.compute_engine.Credentials` or Service Account + :class:`google.oauth2.service_account.Credentials` directly. + + *New in version 0.8.0 of pandas-gbq*. + use_bqstorage_api : bool, default False + Use the `BigQuery Storage API + `__ to + download query results quickly, but at an increased cost. To use this + API, first `enable it in the Cloud Console + `__. + You must also have the `bigquery.readsessions.create + `__ + permission on the project you are billing queries to. + + This feature requires version 0.10.0 or later of the ``pandas-gbq`` + package. It also requires the ``google-cloud-bigquery-storage`` and + ``fastavro`` packages. + + .. versionadded:: 0.25.0 + max_results : int, optional + If set, limit the maximum number of rows to fetch from the query + results. + + *New in version 0.12.0 of pandas-gbq*. + + .. versionadded:: 1.1.0 + progress_bar_type : Optional, str + If set, use the `tqdm `__ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + + Note that this feature requires version 0.12.0 or later of the + ``pandas-gbq`` package. And it requires the ``tqdm`` package. Slightly + different than ``pandas-gbq``, here the default is ``None``. + + .. versionadded:: 1.0.0 + + Returns + ------- + df: DataFrame + DataFrame representing results of query. + + See Also + -------- + pandas_gbq.read_gbq : This function in the pandas-gbq library. + DataFrame.to_gbq : Write a DataFrame to Google BigQuery. + """ + pandas_gbq = _try_import() + + kwargs: dict[str, str | bool | int | None] = {} + + # START: new kwargs. Don't populate unless explicitly set. + if use_bqstorage_api is not None: + kwargs["use_bqstorage_api"] = use_bqstorage_api + if max_results is not None: + kwargs["max_results"] = max_results + + kwargs["progress_bar_type"] = progress_bar_type + # END: new kwargs + + return pandas_gbq.read_gbq( + query, + project_id=project_id, + index_col=index_col, + col_order=col_order, + reauth=reauth, + auth_local_webserver=auth_local_webserver, + dialect=dialect, + location=location, + configuration=configuration, + credentials=credentials, + **kwargs, + ) + + +def to_gbq( + dataframe: DataFrame, + destination_table: str, + project_id: str | None = None, + chunksize: int | None = None, + reauth: bool = False, + if_exists: str = "fail", + auth_local_webserver: bool = False, + table_schema: list[dict[str, str]] | None = None, + location: str | None = None, + progress_bar: bool = True, + credentials=None, +) -> None: + pandas_gbq = _try_import() + pandas_gbq.to_gbq( + dataframe, + destination_table, + project_id=project_id, + chunksize=chunksize, + reauth=reauth, + if_exists=if_exists, + auth_local_webserver=auth_local_webserver, + table_schema=table_schema, + location=location, + progress_bar=progress_bar, + credentials=credentials, + ) diff --git a/.local/lib/python3.8/site-packages/pandas/io/html.py b/.local/lib/python3.8/site-packages/pandas/io/html.py new file mode 100644 index 0000000..05d7c29 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/html.py @@ -0,0 +1,1129 @@ +""" +:mod:`pandas.io.html` is a module containing functionality for dealing with +HTML IO. + +""" + +from __future__ import annotations + +from collections import abc +import numbers +import re +from typing import ( + Pattern, + Sequence, + cast, +) + +from pandas._typing import ( + FilePath, + ReadBuffer, +) +from pandas.compat._optional import import_optional_dependency +from pandas.errors import ( + AbstractMethodError, + EmptyDataError, +) +from pandas.util._decorators import deprecate_nonkeyword_arguments + +from pandas.core.dtypes.common import is_list_like + +from pandas.core.construction import create_series_with_explicit_dtype +from pandas.core.frame import DataFrame + +from pandas.io.common import ( + file_exists, + get_handle, + is_url, + stringify_path, + urlopen, + validate_header_arg, +) +from pandas.io.formats.printing import pprint_thing +from pandas.io.parsers import TextParser + +_IMPORTS = False +_HAS_BS4 = False +_HAS_LXML = False +_HAS_HTML5LIB = False + + +def _importers() -> None: + # import things we need + # but make this done on a first use basis + + global _IMPORTS + if _IMPORTS: + return + + global _HAS_BS4, _HAS_LXML, _HAS_HTML5LIB + bs4 = import_optional_dependency("bs4", errors="ignore") + _HAS_BS4 = bs4 is not None + + lxml = import_optional_dependency("lxml.etree", errors="ignore") + _HAS_LXML = lxml is not None + + html5lib = import_optional_dependency("html5lib", errors="ignore") + _HAS_HTML5LIB = html5lib is not None + + _IMPORTS = True + + +############# +# READ HTML # +############# +_RE_WHITESPACE = re.compile(r"[\r\n]+|\s{2,}") + + +def _remove_whitespace(s: str, regex: Pattern = _RE_WHITESPACE) -> str: + """ + Replace extra whitespace inside of a string with a single space. + + Parameters + ---------- + s : str or unicode + The string from which to remove extra whitespace. + regex : re.Pattern + The regular expression to use to remove extra whitespace. + + Returns + ------- + subd : str or unicode + `s` with all extra whitespace replaced with a single space. + """ + return regex.sub(" ", s.strip()) + + +def _get_skiprows(skiprows: int | Sequence[int] | slice | None) -> int | Sequence[int]: + """ + Get an iterator given an integer, slice or container. + + Parameters + ---------- + skiprows : int, slice, container + The iterator to use to skip rows; can also be a slice. + + Raises + ------ + TypeError + * If `skiprows` is not a slice, integer, or Container + + Returns + ------- + it : iterable + A proper iterator to use to skip rows of a DataFrame. + """ + if isinstance(skiprows, slice): + start, step = skiprows.start or 0, skiprows.step or 1 + return list(range(start, skiprows.stop, step)) + elif isinstance(skiprows, numbers.Integral) or is_list_like(skiprows): + return cast("int | Sequence[int]", skiprows) + elif skiprows is None: + return 0 + raise TypeError(f"{type(skiprows).__name__} is not a valid type for skipping rows") + + +def _read( + obj: bytes | FilePath | ReadBuffer[str] | ReadBuffer[bytes], encoding: str | None +) -> str | bytes: + """ + Try to read from a url, file or string. + + Parameters + ---------- + obj : str, unicode, path object, or file-like object + + Returns + ------- + raw_text : str + """ + text: str | bytes + if ( + is_url(obj) + or hasattr(obj, "read") + or (isinstance(obj, str) and file_exists(obj)) + ): + # error: Argument 1 to "get_handle" has incompatible type "Union[str, bytes, + # Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]]"; + # expected "Union[PathLike[str], Union[str, Union[IO[Any], RawIOBase, + # BufferedIOBase, TextIOBase, TextIOWrapper, mmap]]]" + with get_handle( + obj, "r", encoding=encoding # type: ignore[arg-type] + ) as handles: + text = handles.handle.read() + elif isinstance(obj, (str, bytes)): + text = obj + else: + raise TypeError(f"Cannot read object of type '{type(obj).__name__}'") + return text + + +class _HtmlFrameParser: + """ + Base class for parsers that parse HTML into DataFrames. + + Parameters + ---------- + io : str or file-like + This can be either a string of raw HTML, a valid URL using the HTTP, + FTP, or FILE protocols or a file-like object. + + match : str or regex + The text to match in the document. + + attrs : dict + List of HTML element attributes to match. + + encoding : str + Encoding to be used by parser + + displayed_only : bool + Whether or not items with "display:none" should be ignored + + Attributes + ---------- + io : str or file-like + raw HTML, URL, or file-like object + + match : regex + The text to match in the raw HTML + + attrs : dict-like + A dictionary of valid table attributes to use to search for table + elements. + + encoding : str + Encoding to be used by parser + + displayed_only : bool + Whether or not items with "display:none" should be ignored + + Notes + ----- + To subclass this class effectively you must override the following methods: + * :func:`_build_doc` + * :func:`_attr_getter` + * :func:`_text_getter` + * :func:`_parse_td` + * :func:`_parse_thead_tr` + * :func:`_parse_tbody_tr` + * :func:`_parse_tfoot_tr` + * :func:`_parse_tables` + * :func:`_equals_tag` + See each method's respective documentation for details on their + functionality. + """ + + def __init__( + self, + io: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + match: str | Pattern, + attrs: dict[str, str] | None, + encoding: str, + displayed_only: bool, + ): + self.io = io + self.match = match + self.attrs = attrs + self.encoding = encoding + self.displayed_only = displayed_only + + def parse_tables(self): + """ + Parse and return all tables from the DOM. + + Returns + ------- + list of parsed (header, body, footer) tuples from tables. + """ + tables = self._parse_tables(self._build_doc(), self.match, self.attrs) + return (self._parse_thead_tbody_tfoot(table) for table in tables) + + def _attr_getter(self, obj, attr): + """ + Return the attribute value of an individual DOM node. + + Parameters + ---------- + obj : node-like + A DOM node. + + attr : str or unicode + The attribute, such as "colspan" + + Returns + ------- + str or unicode + The attribute value. + """ + # Both lxml and BeautifulSoup have the same implementation: + return obj.get(attr) + + def _text_getter(self, obj): + """ + Return the text of an individual DOM node. + + Parameters + ---------- + obj : node-like + A DOM node. + + Returns + ------- + text : str or unicode + The text from an individual DOM node. + """ + raise AbstractMethodError(self) + + def _parse_td(self, obj): + """ + Return the td elements from a row element. + + Parameters + ---------- + obj : node-like + A DOM node. + + Returns + ------- + list of node-like + These are the elements of each row, i.e., the columns. + """ + raise AbstractMethodError(self) + + def _parse_thead_tr(self, table): + """ + Return the list of thead row elements from the parsed table element. + + Parameters + ---------- + table : a table element that contains zero or more thead elements. + + Returns + ------- + list of node-like + These are the row elements of a table. + """ + raise AbstractMethodError(self) + + def _parse_tbody_tr(self, table): + """ + Return the list of tbody row elements from the parsed table element. + + HTML5 table bodies consist of either 0 or more elements (which + only contain elements) or 0 or more elements. This method + checks for both structures. + + Parameters + ---------- + table : a table element that contains row elements. + + Returns + ------- + list of node-like + These are the row elements of a table. + """ + raise AbstractMethodError(self) + + def _parse_tfoot_tr(self, table): + """ + Return the list of tfoot row elements from the parsed table element. + + Parameters + ---------- + table : a table element that contains row elements. + + Returns + ------- + list of node-like + These are the row elements of a table. + """ + raise AbstractMethodError(self) + + def _parse_tables(self, doc, match, attrs): + """ + Return all tables from the parsed DOM. + + Parameters + ---------- + doc : the DOM from which to parse the table element. + + match : str or regular expression + The text to search for in the DOM tree. + + attrs : dict + A dictionary of table attributes that can be used to disambiguate + multiple tables on a page. + + Raises + ------ + ValueError : `match` does not match any text in the document. + + Returns + ------- + list of node-like + HTML
elements to be parsed into raw data. + """ + raise AbstractMethodError(self) + + def _equals_tag(self, obj, tag): + """ + Return whether an individual DOM node matches a tag + + Parameters + ---------- + obj : node-like + A DOM node. + + tag : str + Tag name to be checked for equality. + + Returns + ------- + boolean + Whether `obj`'s tag name is `tag` + """ + raise AbstractMethodError(self) + + def _build_doc(self): + """ + Return a tree-like object that can be used to iterate over the DOM. + + Returns + ------- + node-like + The DOM from which to parse the table element. + """ + raise AbstractMethodError(self) + + def _parse_thead_tbody_tfoot(self, table_html): + """ + Given a table, return parsed header, body, and foot. + + Parameters + ---------- + table_html : node-like + + Returns + ------- + tuple of (header, body, footer), each a list of list-of-text rows. + + Notes + ----- + Header and body are lists-of-lists. Top level list is a list of + rows. Each row is a list of str text. + + Logic: Use , , elements to identify + header, body, and footer, otherwise: + - Put all rows into body + - Move rows from top of body to header only if + all elements inside row are . Move the top all- or + while body_rows and row_is_all_th(body_rows[0]): + header_rows.append(body_rows.pop(0)) + + header = self._expand_colspan_rowspan(header_rows) + body = self._expand_colspan_rowspan(body_rows) + footer = self._expand_colspan_rowspan(footer_rows) + + return header, body, footer + + def _expand_colspan_rowspan(self, rows): + """ + Given a list of s, return a list of text rows. + + Parameters + ---------- + rows : list of node-like + List of s + + Returns + ------- + list of list + Each returned row is a list of str text. + + Notes + ----- + Any cell with ``rowspan`` or ``colspan`` will have its contents copied + to subsequent cells. + """ + all_texts = [] # list of rows, each a list of str + remainder: list[tuple[int, str, int]] = [] # list of (index, text, nrows) + + for tr in rows: + texts = [] # the output for this row + next_remainder = [] + + index = 0 + tds = self._parse_td(tr) + for td in tds: + # Append texts from previous rows with rowspan>1 that come + # before this or (see _parse_thead_tr). + return row.xpath("./td|./th") + + def _parse_tables(self, doc, match, kwargs): + pattern = match.pattern + + # 1. check all descendants for the given pattern and only search tables + # 2. go up the tree until we find a table + xpath_expr = f"//table//*[re:test(text(), {repr(pattern)})]/ancestor::table" + + # if any table attributes were given build an xpath expression to + # search for them + if kwargs: + xpath_expr += _build_xpath_expr(kwargs) + + tables = doc.xpath(xpath_expr, namespaces=_re_namespace) + + tables = self._handle_hidden_tables(tables, "attrib") + if self.displayed_only: + for table in tables: + # lxml utilizes XPATH 1.0 which does not have regex + # support. As a result, we find all elements with a style + # attribute and iterate them to check for display:none + for elem in table.xpath(".//*[@style]"): + if "display:none" in elem.attrib.get("style", "").replace(" ", ""): + elem.getparent().remove(elem) + + if not tables: + raise ValueError(f"No tables found matching regex {repr(pattern)}") + return tables + + def _equals_tag(self, obj, tag): + return obj.tag == tag + + def _build_doc(self): + """ + Raises + ------ + ValueError + * If a URL that lxml cannot parse is passed. + + Exception + * Any other ``Exception`` thrown. For example, trying to parse a + URL that is syntactically correct on a machine with no internet + connection will fail. + + See Also + -------- + pandas.io.html._HtmlFrameParser._build_doc + """ + from lxml.etree import XMLSyntaxError + from lxml.html import ( + HTMLParser, + fromstring, + parse, + ) + + parser = HTMLParser(recover=True, encoding=self.encoding) + + try: + if is_url(self.io): + with urlopen(self.io) as f: + r = parse(f, parser=parser) + else: + # try to parse the input in the simplest way + r = parse(self.io, parser=parser) + try: + r = r.getroot() + except AttributeError: + pass + except (UnicodeDecodeError, OSError) as e: + # if the input is a blob of html goop + if not is_url(self.io): + r = fromstring(self.io, parser=parser) + + try: + r = r.getroot() + except AttributeError: + pass + else: + raise e + else: + if not hasattr(r, "text_content"): + raise XMLSyntaxError("no text parsed from document", 0, 0, 0) + return r + + def _parse_thead_tr(self, table): + rows = [] + + for thead in table.xpath(".//thead"): + rows.extend(thead.xpath("./tr")) + + # HACK: lxml does not clean up the clearly-erroneous + # . (Missing ). Add + # the and _pretend_ it's a ; _parse_td() will find its + # children as though it's a . + # + # Better solution would be to use html5lib. + elements_at_root = thead.xpath("./td|./th") + if elements_at_root: + rows.append(thead) + + return rows + + def _parse_tbody_tr(self, table): + from_tbody = table.xpath(".//tbody//tr") + from_root = table.xpath("./tr") + # HTML spec: at most one of these lists has content + return from_tbody + from_root + + def _parse_tfoot_tr(self, table): + return table.xpath(".//tfoot//tr") + + +def _expand_elements(body): + data = [len(elem) for elem in body] + lens = create_series_with_explicit_dtype(data, dtype_if_empty=object) + lens_max = lens.max() + not_max = lens[lens != lens_max] + + empty = [""] + for ind, length in not_max.items(): + body[ind] += empty * (lens_max - length) + + +def _data_to_frame(**kwargs): + head, body, foot = kwargs.pop("data") + header = kwargs.pop("header") + kwargs["skiprows"] = _get_skiprows(kwargs["skiprows"]) + if head: + body = head + body + + # Infer header when there is a or top ' in styler.to_html() + + +def test_rowspan_w3(): + # GH 38533 + df = DataFrame(data=[[1, 2]], index=[["l0", "l0"], ["l1a", "l1b"]]) + styler = Styler(df, uuid="_", cell_ids=False) + assert '' in styler.to_html() + + +def test_styles(styler): + styler.set_uuid("abc") + styler.set_table_styles([{"selector": "td", "props": "color: red;"}]) + result = styler.to_html(doctype_html=True) + expected = dedent( + """\ + + + + + + + +
+ - Move rows from bottom of body to footer only if + all elements inside row are + """ + header_rows = self._parse_thead_tr(table_html) + body_rows = self._parse_tbody_tr(table_html) + footer_rows = self._parse_tfoot_tr(table_html) + + def row_is_all_th(row): + return all(self._equals_tag(t, "th") for t in self._parse_td(row)) + + if not header_rows: + # The table has no
rows from + # body_rows to header_rows. (This is a common case because many + # tables in the wild have no
+ while remainder and remainder[0][0] <= index: + prev_i, prev_text, prev_rowspan = remainder.pop(0) + texts.append(prev_text) + if prev_rowspan > 1: + next_remainder.append((prev_i, prev_text, prev_rowspan - 1)) + index += 1 + + # Append the text from this , colspan times + text = _remove_whitespace(self._text_getter(td)) + rowspan = int(self._attr_getter(td, "rowspan") or 1) + colspan = int(self._attr_getter(td, "colspan") or 1) + + for _ in range(colspan): + texts.append(text) + if rowspan > 1: + next_remainder.append((index, text, rowspan - 1)) + index += 1 + + # Append texts from previous rows at the final position + for prev_i, prev_text, prev_rowspan in remainder: + texts.append(prev_text) + if prev_rowspan > 1: + next_remainder.append((prev_i, prev_text, prev_rowspan - 1)) + + all_texts.append(texts) + remainder = next_remainder + + # Append rows that only appear because the previous row had non-1 + # rowspan + while remainder: + next_remainder = [] + texts = [] + for prev_i, prev_text, prev_rowspan in remainder: + texts.append(prev_text) + if prev_rowspan > 1: + next_remainder.append((prev_i, prev_text, prev_rowspan - 1)) + all_texts.append(texts) + remainder = next_remainder + + return all_texts + + def _handle_hidden_tables(self, tbl_list, attr_name): + """ + Return list of tables, potentially removing hidden elements + + Parameters + ---------- + tbl_list : list of node-like + Type of list elements will vary depending upon parser used + attr_name : str + Name of the accessor for retrieving HTML attributes + + Returns + ------- + list of node-like + Return type matches `tbl_list` + """ + if not self.displayed_only: + return tbl_list + + return [ + x + for x in tbl_list + if "display:none" + not in getattr(x, attr_name).get("style", "").replace(" ", "") + ] + + +class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser): + """ + HTML to DataFrame parser that uses BeautifulSoup under the hood. + + See Also + -------- + pandas.io.html._HtmlFrameParser + pandas.io.html._LxmlFrameParser + + Notes + ----- + Documentation strings for this class are in the base class + :class:`pandas.io.html._HtmlFrameParser`. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + from bs4 import SoupStrainer + + self._strainer = SoupStrainer("table") + + def _parse_tables(self, doc, match, attrs): + element_name = self._strainer.name + tables = doc.find_all(element_name, attrs=attrs) + + if not tables: + raise ValueError("No tables found") + + result = [] + unique_tables = set() + tables = self._handle_hidden_tables(tables, "attrs") + + for table in tables: + if self.displayed_only: + for elem in table.find_all(style=re.compile(r"display:\s*none")): + elem.decompose() + + if table not in unique_tables and table.find(text=match) is not None: + result.append(table) + unique_tables.add(table) + + if not result: + raise ValueError(f"No tables found matching pattern {repr(match.pattern)}") + return result + + def _text_getter(self, obj): + return obj.text + + def _equals_tag(self, obj, tag): + return obj.name == tag + + def _parse_td(self, row): + return row.find_all(("td", "th"), recursive=False) + + def _parse_thead_tr(self, table): + return table.select("thead tr") + + def _parse_tbody_tr(self, table): + from_tbody = table.select("tbody tr") + from_root = table.find_all("tr", recursive=False) + # HTML spec: at most one of these lists has content + return from_tbody + from_root + + def _parse_tfoot_tr(self, table): + return table.select("tfoot tr") + + def _setup_build_doc(self): + raw_text = _read(self.io, self.encoding) + if not raw_text: + raise ValueError(f"No text parsed from document: {self.io}") + return raw_text + + def _build_doc(self): + from bs4 import BeautifulSoup + + bdoc = self._setup_build_doc() + if isinstance(bdoc, bytes) and self.encoding is not None: + udoc = bdoc.decode(self.encoding) + from_encoding = None + else: + udoc = bdoc + from_encoding = self.encoding + return BeautifulSoup(udoc, features="html5lib", from_encoding=from_encoding) + + +def _build_xpath_expr(attrs) -> str: + """ + Build an xpath expression to simulate bs4's ability to pass in kwargs to + search for attributes when using the lxml parser. + + Parameters + ---------- + attrs : dict + A dict of HTML attributes. These are NOT checked for validity. + + Returns + ------- + expr : unicode + An XPath expression that checks for the given HTML attributes. + """ + # give class attribute as class_ because class is a python keyword + if "class_" in attrs: + attrs["class"] = attrs.pop("class_") + + s = " and ".join([f"@{k}={repr(v)}" for k, v in attrs.items()]) + return f"[{s}]" + + +_re_namespace = {"re": "http://exslt.org/regular-expressions"} + + +class _LxmlFrameParser(_HtmlFrameParser): + """ + HTML to DataFrame parser that uses lxml under the hood. + + Warning + ------- + This parser can only handle HTTP, FTP, and FILE urls. + + See Also + -------- + _HtmlFrameParser + _BeautifulSoupLxmlFrameParser + + Notes + ----- + Documentation strings for this class are in the base class + :class:`_HtmlFrameParser`. + """ + + def _text_getter(self, obj): + return obj.text_content() + + def _parse_td(self, row): + # Look for direct children only: the "row" element here may be a + #
foobar
-only rows + if header is None: + if len(head) == 1: + header = 0 + else: + # ignore all-empty-text rows + header = [i for i, row in enumerate(head) if any(text for text in row)] + + if foot: + body += foot + + # fill out elements of body that are "ragged" + _expand_elements(body) + with TextParser(body, header=header, **kwargs) as tp: + return tp.read() + + +_valid_parsers = { + "lxml": _LxmlFrameParser, + None: _LxmlFrameParser, + "html5lib": _BeautifulSoupHtml5LibFrameParser, + "bs4": _BeautifulSoupHtml5LibFrameParser, +} + + +def _parser_dispatch(flavor: str | None) -> type[_HtmlFrameParser]: + """ + Choose the parser based on the input flavor. + + Parameters + ---------- + flavor : str + The type of parser to use. This must be a valid backend. + + Returns + ------- + cls : _HtmlFrameParser subclass + The parser class based on the requested input flavor. + + Raises + ------ + ValueError + * If `flavor` is not a valid backend. + ImportError + * If you do not have the requested `flavor` + """ + valid_parsers = list(_valid_parsers.keys()) + if flavor not in valid_parsers: + raise ValueError( + f"{repr(flavor)} is not a valid flavor, valid flavors are {valid_parsers}" + ) + + if flavor in ("bs4", "html5lib"): + if not _HAS_HTML5LIB: + raise ImportError("html5lib not found, please install it") + if not _HAS_BS4: + raise ImportError("BeautifulSoup4 (bs4) not found, please install it") + # Although we call this above, we want to raise here right before use. + bs4 = import_optional_dependency("bs4") # noqa:F841 + + else: + if not _HAS_LXML: + raise ImportError("lxml not found, please install it") + return _valid_parsers[flavor] + + +def _print_as_set(s) -> str: + arg = ", ".join([pprint_thing(el) for el in s]) + return f"{{{arg}}}" + + +def _validate_flavor(flavor): + if flavor is None: + flavor = "lxml", "bs4" + elif isinstance(flavor, str): + flavor = (flavor,) + elif isinstance(flavor, abc.Iterable): + if not all(isinstance(flav, str) for flav in flavor): + raise TypeError( + f"Object of type {repr(type(flavor).__name__)} " + f"is not an iterable of strings" + ) + else: + msg = repr(flavor) if isinstance(flavor, str) else str(flavor) + msg += " is not a valid flavor" + raise ValueError(msg) + + flavor = tuple(flavor) + valid_flavors = set(_valid_parsers) + flavor_set = set(flavor) + + if not flavor_set & valid_flavors: + raise ValueError( + f"{_print_as_set(flavor_set)} is not a valid set of flavors, valid " + f"flavors are {_print_as_set(valid_flavors)}" + ) + return flavor + + +def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs): + flavor = _validate_flavor(flavor) + compiled_match = re.compile(match) # you can pass a compiled regex here + + retained = None + for flav in flavor: + parser = _parser_dispatch(flav) + p = parser(io, compiled_match, attrs, encoding, displayed_only) + + try: + tables = p.parse_tables() + except ValueError as caught: + # if `io` is an io-like object, check if it's seekable + # and try to rewind it before trying the next parser + if hasattr(io, "seekable") and io.seekable(): + io.seek(0) + elif hasattr(io, "seekable") and not io.seekable(): + # if we couldn't rewind it, let the user know + raise ValueError( + f"The flavor {flav} failed to parse your input. " + "Since you passed a non-rewindable file " + "object, we can't rewind it to try " + "another parser. Try read_html() with a different flavor." + ) from caught + + retained = caught + else: + break + else: + assert retained is not None # for mypy + raise retained + + ret = [] + for table in tables: + try: + ret.append(_data_to_frame(data=table, **kwargs)) + except EmptyDataError: # empty table + continue + return ret + + +@deprecate_nonkeyword_arguments(version="2.0") +def read_html( + io: FilePath | ReadBuffer[str], + match: str | Pattern = ".+", + flavor: str | None = None, + header: int | Sequence[int] | None = None, + index_col: int | Sequence[int] | None = None, + skiprows: int | Sequence[int] | slice | None = None, + attrs: dict[str, str] | None = None, + parse_dates: bool = False, + thousands: str | None = ",", + encoding: str | None = None, + decimal: str = ".", + converters: dict | None = None, + na_values=None, + keep_default_na: bool = True, + displayed_only: bool = True, +) -> list[DataFrame]: + r""" + Read HTML tables into a ``list`` of ``DataFrame`` objects. + + Parameters + ---------- + io : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a string ``read()`` function. + The string can represent a URL or the HTML itself. Note that + lxml only accepts the http, ftp and file url protocols. If you have a + URL that starts with ``'https'`` you might try removing the ``'s'``. + + match : str or compiled regular expression, optional + The set of tables containing text matching this regex or string will be + returned. Unless the HTML is extremely simple you will probably need to + pass a non-empty string here. Defaults to '.+' (match any non-empty + string). The default value will return all tables contained on a page. + This value is converted to a regular expression so that there is + consistent behavior between Beautiful Soup and lxml. + + flavor : str, optional + The parsing engine to use. 'bs4' and 'html5lib' are synonymous with + each other, they are both there for backwards compatibility. The + default of ``None`` tries to use ``lxml`` to parse and if that fails it + falls back on ``bs4`` + ``html5lib``. + + header : int or list-like, optional + The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to + make the columns headers. + + index_col : int or list-like, optional + The column (or list of columns) to use to create the index. + + skiprows : int, list-like or slice, optional + Number of rows to skip after parsing the column integer. 0-based. If a + sequence of integers or a slice is given, will skip the rows indexed by + that sequence. Note that a single element sequence means 'skip the nth + row' whereas an integer means 'skip n rows'. + + attrs : dict, optional + This is a dictionary of attributes that you can pass to use to identify + the table in the HTML. These are not checked for validity before being + passed to lxml or Beautiful Soup. However, these attributes must be + valid HTML table attributes to work correctly. For example, :: + + attrs = {'id': 'table'} + + is a valid attribute dictionary because the 'id' HTML tag attribute is + a valid HTML attribute for *any* HTML tag as per `this document + `__. :: + + attrs = {'asdf': 'table'} + + is *not* a valid attribute dictionary because 'asdf' is not a valid + HTML attribute even if it is a valid XML attribute. Valid HTML 4.01 + table attributes can be found `here + `__. A + working draft of the HTML 5 spec can be found `here + `__. It contains the + latest information on table attributes for the modern web. + + parse_dates : bool, optional + See :func:`~read_csv` for more details. + + thousands : str, optional + Separator to use to parse thousands. Defaults to ``','``. + + encoding : str, optional + The encoding used to decode the web page. Defaults to ``None``.``None`` + preserves the previous encoding behavior, which depends on the + underlying parser library (e.g., the parser library will try to use + the encoding provided by the document). + + decimal : str, default '.' + Character to recognize as decimal point (e.g. use ',' for European + data). + + converters : dict, default None + Dict of functions for converting values in certain columns. Keys can + either be integers or column labels, values are functions that take one + input argument, the cell (not column) content, and return the + transformed content. + + na_values : iterable, default None + Custom NA values. + + keep_default_na : bool, default True + If na_values are specified and keep_default_na is False the default NaN + values are overridden, otherwise they're appended to. + + displayed_only : bool, default True + Whether elements with "display: none" should be parsed. + + Returns + ------- + dfs + A list of DataFrames. + + See Also + -------- + read_csv : Read a comma-separated values (csv) file into DataFrame. + + Notes + ----- + Before using this function you should read the :ref:`gotchas about the + HTML parsing libraries `. + + Expect to do some cleanup after you call this function. For example, you + might need to manually assign column names if the column names are + converted to NaN when you pass the `header=0` argument. We try to assume as + little as possible about the structure of the table and push the + idiosyncrasies of the HTML contained in the table to the user. + + This function searches for ```` elements and only for ```` + and ```` or ```` argument, it is used to construct + the header, otherwise the function attempts to find the header within + the body (by putting rows with only ``' + assert expected in s.to_html() + + # only the value should be escaped before passing to the formatter + s = Styler(df, uuid_len=0).format("&{0}&", escape=escape) + expected = f'' + assert expected in s.to_html() + + # also test format_index() + styler = Styler(DataFrame(columns=[chars]), uuid_len=0) + styler.format_index("&{0}&", escape=None, axis=1) + assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{chars}&" + styler.format_index("&{0}&", escape=escape, axis=1) + assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&" + + +def test_format_escape_na_rep(): + # tests the na_rep is not escaped + df = DataFrame([['<>&"', None]]) + s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&") + ex = '' + expected2 = '' + assert ex in s.to_html() + assert expected2 in s.to_html() + + # also test for format_index() + df = DataFrame(columns=['<>&"', None]) + styler = Styler(df, uuid_len=0) + styler.format_index("X&{0}>X", escape="html", na_rep="&", axis=1) + ctx = styler._translate(True, True) + assert ctx["head"][0][1]["display_value"] == "X&<>&">X" + assert ctx["head"][0][2]["display_value"] == "&" + + +def test_format_escape_floats(styler): + # test given formatter for number format is not impacted by escape + s = styler.format("{:.1f}", escape="html") + for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]: + assert expected in s.to_html() + # tests precision of floats is not impacted by escape + s = styler.format(precision=1, escape="html") + for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]: + assert expected in s.to_html() + + +@pytest.mark.parametrize("formatter", [5, True, [2.0]]) +@pytest.mark.parametrize("func", ["format", "format_index"]) +def test_format_raises(styler, formatter, func): + with pytest.raises(TypeError, match="expected str or callable"): + getattr(styler, func)(formatter) + + +@pytest.mark.parametrize( + "precision, expected", + [ + (1, ["1.0", "2.0", "3.2", "4.6"]), + (2, ["1.00", "2.01", "3.21", "4.57"]), + (3, ["1.000", "2.009", "3.212", "4.566"]), + ], +) +def test_format_with_precision(precision, expected): + # Issue #13257 + df = DataFrame([[1.0, 2.0090, 3.2121, 4.566]], columns=[1.0, 2.0090, 3.2121, 4.566]) + styler = Styler(df) + styler.format(precision=precision) + styler.format_index(precision=precision, axis=1) + + ctx = styler._translate(True, True) + for col, exp in enumerate(expected): + assert ctx["body"][0][col + 1]["display_value"] == exp # format test + assert ctx["head"][0][col + 1]["display_value"] == exp # format_index test + + +@pytest.mark.parametrize("axis", [0, 1]) +@pytest.mark.parametrize( + "level, expected", + [ + (0, ["X", "X", "_", "_"]), # level int + ("zero", ["X", "X", "_", "_"]), # level name + (1, ["_", "_", "X", "X"]), # other level int + ("one", ["_", "_", "X", "X"]), # other level name + ([0, 1], ["X", "X", "X", "X"]), # both levels + ([0, "zero"], ["X", "X", "_", "_"]), # level int and name simultaneous + ([0, "one"], ["X", "X", "X", "X"]), # both levels as int and name + (["one", "zero"], ["X", "X", "X", "X"]), # both level names, reversed + ], +) +def test_format_index_level(axis, level, expected): + midx = MultiIndex.from_arrays([["_", "_"], ["_", "_"]], names=["zero", "one"]) + df = DataFrame([[1, 2], [3, 4]]) + if axis == 0: + df.index = midx + else: + df.columns = midx + + styler = df.style.format_index(lambda v: "X", level=level, axis=axis) + ctx = styler._translate(True, True) + + if axis == 0: # compare index + result = [ctx["body"][s][0]["display_value"] for s in range(2)] + result += [ctx["body"][s][1]["display_value"] for s in range(2)] + else: # compare columns + result = [ctx["head"][0][s + 1]["display_value"] for s in range(2)] + result += [ctx["head"][1][s + 1]["display_value"] for s in range(2)] + + assert expected == result + + +def test_format_subset(): + df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"]) + ctx = df.style.format( + {"a": "{:0.1f}", "b": "{0:.2%}"}, subset=IndexSlice[0, :] + )._translate(True, True) + expected = "0.1" + raw_11 = "1.123400" + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == raw_11 + assert ctx["body"][0][2]["display_value"] == "12.34%" + + ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, :])._translate(True, True) + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == raw_11 + + ctx = df.style.format("{:0.1f}", subset=IndexSlice["a"])._translate(True, True) + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][0][2]["display_value"] == "0.123400" + + ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, "a"])._translate(True, True) + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == raw_11 + + ctx = df.style.format("{:0.1f}", subset=IndexSlice[[0, 1], ["a"]])._translate( + True, True + ) + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == "1.1" + assert ctx["body"][0][2]["display_value"] == "0.123400" + assert ctx["body"][1][2]["display_value"] == raw_11 + + +@pytest.mark.parametrize("formatter", [None, "{:,.1f}"]) +@pytest.mark.parametrize("decimal", [".", "*"]) +@pytest.mark.parametrize("precision", [None, 2]) +@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)]) +def test_format_thousands(formatter, decimal, precision, func, col): + styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style + result = getattr(styler, func)( # testing float + thousands="_", formatter=formatter, decimal=decimal, precision=precision + )._translate(True, True) + assert "1_000_000" in result["body"][0][col]["display_value"] + + styler = DataFrame([[1000000]], index=[1000000]).style + result = getattr(styler, func)( # testing int + thousands="_", formatter=formatter, decimal=decimal, precision=precision + )._translate(True, True) + assert "1_000_000" in result["body"][0][col]["display_value"] + + styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style + result = getattr(styler, func)( # testing complex + thousands="_", formatter=formatter, decimal=decimal, precision=precision + )._translate(True, True) + assert "1_000_000" in result["body"][0][col]["display_value"] + + +@pytest.mark.parametrize("formatter", [None, "{:,.4f}"]) +@pytest.mark.parametrize("thousands", [None, ",", "*"]) +@pytest.mark.parametrize("precision", [None, 4]) +@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)]) +def test_format_decimal(formatter, thousands, precision, func, col): + styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style + result = getattr(styler, func)( # testing float + decimal="_", formatter=formatter, thousands=thousands, precision=precision + )._translate(True, True) + assert "000_123" in result["body"][0][col]["display_value"] + + styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style + result = getattr(styler, func)( # testing complex + decimal="_", formatter=formatter, thousands=thousands, precision=precision + )._translate(True, True) + assert "000_123" in result["body"][0][col]["display_value"] + + +def test_str_escape_error(): + msg = "`escape` only permitted in {'html', 'latex'}, got " + with pytest.raises(ValueError, match=msg): + _str_escape("text", "bad_escape") + + with pytest.raises(ValueError, match=msg): + _str_escape("text", []) + + _str_escape(2.00, "bad_escape") # OK since dtype is float + + +def test_format_options(): + df = DataFrame({"int": [2000, 1], "float": [1.009, None], "str": ["&<", "&~"]}) + ctx = df.style._translate(True, True) + + # test option: na_rep + assert ctx["body"][1][2]["display_value"] == "nan" + with option_context("styler.format.na_rep", "MISSING"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][1][2]["display_value"] == "MISSING" + + # test option: decimal and precision + assert ctx["body"][0][2]["display_value"] == "1.009000" + with option_context("styler.format.decimal", "_"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][2]["display_value"] == "1_009000" + with option_context("styler.format.precision", 2): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][2]["display_value"] == "1.01" + + # test option: thousands + assert ctx["body"][0][1]["display_value"] == "2000" + with option_context("styler.format.thousands", "_"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][1]["display_value"] == "2_000" + + # test option: escape + assert ctx["body"][0][3]["display_value"] == "&<" + assert ctx["body"][1][3]["display_value"] == "&~" + with option_context("styler.format.escape", "html"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][3]["display_value"] == "&<" + with option_context("styler.format.escape", "latex"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde " + + # test option: formatter + with option_context("styler.format.formatter", {"int": "{:,.2f}"}): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][1]["display_value"] == "2,000.00" + + +def test_precision_zero(df): + styler = Styler(df, precision=0) + ctx = styler._translate(True, True) + assert ctx["body"][0][2]["display_value"] == "-1" + assert ctx["body"][1][2]["display_value"] == "-1" + + +@pytest.mark.parametrize( + "formatter, exp", + [ + (lambda x: f"{x:.3f}", "9.000"), + ("{:.2f}", "9.00"), + ({0: "{:.1f}"}, "9.0"), + (None, "9"), + ], +) +def test_formatter_options_validator(formatter, exp): + df = DataFrame([[9]]) + with option_context("styler.format.formatter", formatter): + assert f" {exp} " in df.style.to_latex() + + +def test_formatter_options_raises(): + msg = "Value must be an instance of" + with pytest.raises(ValueError, match=msg): + with option_context("styler.format.formatter", ["bad", "type"]): + DataFrame().style.to_latex() + + +def test_1level_multiindex(): + # GH 43383 + midx = MultiIndex.from_product([[1, 2]], names=[""]) + df = DataFrame(-1, index=midx, columns=[0, 1]) + ctx = df.style._translate(True, True) + assert ctx["body"][0][0]["display_value"] == "1" + assert ctx["body"][0][0]["is_visible"] is True + assert ctx["body"][1][0]["display_value"] == "2" + assert ctx["body"][1][0]["is_visible"] is True diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_highlight.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_highlight.py new file mode 100644 index 0000000..89aa068 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_highlight.py @@ -0,0 +1,219 @@ +import numpy as np +import pytest + +from pandas import ( + NA, + DataFrame, + IndexSlice, +) + +pytest.importorskip("jinja2") + +from pandas.io.formats.style import Styler + + +@pytest.fixture(params=[(None, "float64"), (NA, "Int64")]) +def df(request): + # GH 45804 + return DataFrame( + {"A": [0, np.nan, 10], "B": [1, request.param[0], 2]}, dtype=request.param[1] + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +def test_highlight_null(styler): + result = styler.highlight_null()._compute().ctx + expected = { + (1, 0): [("background-color", "red")], + (1, 1): [("background-color", "red")], + } + assert result == expected + + +def test_highlight_null_subset(styler): + # GH 31345 + result = ( + styler.highlight_null(null_color="red", subset=["A"]) + .highlight_null(null_color="green", subset=["B"]) + ._compute() + .ctx + ) + expected = { + (1, 0): [("background-color", "red")], + (1, 1): [("background-color", "green")], + } + assert result == expected + + +@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"]) +def test_highlight_minmax_basic(df, f): + expected = { + (0, 1): [("background-color", "red")], + # ignores NaN row, + (2, 0): [("background-color", "red")], + } + if f == "highlight_min": + df = -df + result = getattr(df.style, f)(axis=1, color="red")._compute().ctx + assert result == expected + + +@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"]) +@pytest.mark.parametrize( + "kwargs", + [ + {"axis": None, "color": "red"}, # test axis + {"axis": 0, "subset": ["A"], "color": "red"}, # test subset and ignores NaN + {"axis": None, "props": "background-color: red"}, # test props + ], +) +def test_highlight_minmax_ext(df, f, kwargs): + expected = {(2, 0): [("background-color", "red")]} + if f == "highlight_min": + df = -df + result = getattr(df.style, f)(**kwargs)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"]) +@pytest.mark.parametrize("axis", [None, 0, 1]) +def test_highlight_minmax_nulls(f, axis): + # GH 42750 + expected = { + (1, 0): [("background-color", "yellow")], + (1, 1): [("background-color", "yellow")], + } + if axis == 1: + expected.update({(2, 1): [("background-color", "yellow")]}) + + if f == "highlight_max": + df = DataFrame({"a": [NA, 1, None], "b": [np.nan, 1, -1]}) + else: + df = DataFrame({"a": [NA, -1, None], "b": [np.nan, -1, 1]}) + + result = getattr(df.style, f)(axis=axis)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "kwargs", + [ + {"left": 0, "right": 1}, # test basic range + {"left": 0, "right": 1, "props": "background-color: yellow"}, # test props + {"left": -100, "right": 100, "subset": IndexSlice[[0, 1], :]}, # test subset + {"left": 0, "subset": IndexSlice[[0, 1], :]}, # test no right + {"right": 1}, # test no left + {"left": [0, 0, 11], "axis": 0}, # test left as sequence + {"left": DataFrame({"A": [0, 0, 11], "B": [1, 1, 11]}), "axis": None}, # axis + {"left": 0, "right": [0, 1], "axis": 1}, # test sequence right + ], +) +def test_highlight_between(styler, kwargs): + expected = { + (0, 0): [("background-color", "yellow")], + (0, 1): [("background-color", "yellow")], + } + result = styler.highlight_between(**kwargs)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "arg, map, axis", + [ + ("left", [1, 2], 0), # 0 axis has 3 elements not 2 + ("left", [1, 2, 3], 1), # 1 axis has 2 elements not 3 + ("left", np.array([[1, 2], [1, 2]]), None), # df is (2,3) not (2,2) + ("right", [1, 2], 0), # same tests as above for 'right' not 'left' + ("right", [1, 2, 3], 1), # .. + ("right", np.array([[1, 2], [1, 2]]), None), # .. + ], +) +def test_highlight_between_raises(arg, styler, map, axis): + msg = f"supplied '{arg}' is not correct shape" + with pytest.raises(ValueError, match=msg): + styler.highlight_between(**{arg: map, "axis": axis})._compute() + + +def test_highlight_between_raises2(styler): + msg = "values can be 'both', 'left', 'right', or 'neither'" + with pytest.raises(ValueError, match=msg): + styler.highlight_between(inclusive="badstring")._compute() + + with pytest.raises(ValueError, match=msg): + styler.highlight_between(inclusive=1)._compute() + + +@pytest.mark.parametrize( + "inclusive, expected", + [ + ( + "both", + { + (0, 0): [("background-color", "yellow")], + (0, 1): [("background-color", "yellow")], + }, + ), + ("neither", {}), + ("left", {(0, 0): [("background-color", "yellow")]}), + ("right", {(0, 1): [("background-color", "yellow")]}), + ], +) +def test_highlight_between_inclusive(styler, inclusive, expected): + kwargs = {"left": 0, "right": 1, "subset": IndexSlice[[0, 1], :]} + result = styler.highlight_between(**kwargs, inclusive=inclusive)._compute() + assert result.ctx == expected + + +@pytest.mark.parametrize( + "kwargs", + [ + {"q_left": 0.5, "q_right": 1, "axis": 0}, # base case + {"q_left": 0.5, "q_right": 1, "axis": None}, # test axis + {"q_left": 0, "q_right": 1, "subset": IndexSlice[2, :]}, # test subset + {"q_left": 0.5, "axis": 0}, # test no high + {"q_right": 1, "subset": IndexSlice[2, :], "axis": 1}, # test no low + {"q_left": 0.5, "axis": 0, "props": "background-color: yellow"}, # tst prop + ], +) +def test_highlight_quantile(styler, kwargs): + expected = { + (2, 0): [("background-color", "yellow")], + (2, 1): [("background-color", "yellow")], + } + result = styler.highlight_quantile(**kwargs)._compute().ctx + assert result == expected + + +@pytest.mark.skipif(np.__version__[:4] in ["1.16", "1.17"], reason="Numpy Issue #14831") +@pytest.mark.parametrize( + "f,kwargs", + [ + ("highlight_min", {"axis": 1, "subset": IndexSlice[1, :]}), + ("highlight_max", {"axis": 0, "subset": [0]}), + ("highlight_quantile", {"axis": None, "q_left": 0.6, "q_right": 0.8}), + ("highlight_between", {"subset": [0]}), + ], +) +@pytest.mark.parametrize( + "df", + [ + DataFrame([[0, 10], [20, 30]], dtype=int), + DataFrame([[0, 10], [20, 30]], dtype=float), + DataFrame([[0, 10], [20, 30]], dtype="datetime64[ns]"), + DataFrame([[0, 10], [20, 30]], dtype=str), + DataFrame([[0, 10], [20, 30]], dtype="timedelta64[ns]"), + ], +) +def test_all_highlight_dtypes(f, kwargs, df): + if f == "highlight_quantile" and isinstance(df.iloc[0, 0], (str)): + return None # quantile incompatible with str + if f == "highlight_between": + kwargs["left"] = df.iloc[1, 0] # set the range low for testing + + expected = {(1, 0): [("background-color", "yellow")]} + result = getattr(df.style, f)(**kwargs)._compute().ctx + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_html.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_html.py new file mode 100644 index 0000000..fad289d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_html.py @@ -0,0 +1,806 @@ +from textwrap import dedent + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + option_context, +) + +jinja2 = pytest.importorskip("jinja2") +from pandas.io.formats.style import Styler + +loader = jinja2.PackageLoader("pandas", "io/formats/templates") +env = jinja2.Environment(loader=loader, trim_blocks=True) + + +@pytest.fixture +def styler(): + return Styler(DataFrame([[2.61], [2.69]], index=["a", "b"], columns=["A"])) + + +@pytest.fixture +def styler_mi(): + midx = MultiIndex.from_product([["a", "b"], ["c", "d"]]) + return Styler(DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=midx)) + + +@pytest.fixture +def tpl_style(): + return env.get_template("html_style.tpl") + + +@pytest.fixture +def tpl_table(): + return env.get_template("html_table.tpl") + + +def test_html_template_extends_options(): + # make sure if templates are edited tests are updated as are setup fixtures + # to understand the dependency + with open("pandas/io/formats/templates/html.tpl") as file: + result = file.read() + assert "{% include html_style_tpl %}" in result + assert "{% include html_table_tpl %}" in result + + +def test_exclude_styles(styler): + result = styler.to_html(exclude_styles=True, doctype_html=True) + expected = dedent( + """\ + + + + + + +
`` rows and ```` elements within each ``
`` + element in the table. ```` stands for "table data". This function + attempts to properly handle ``colspan`` and ``rowspan`` attributes. + If the function has a ``
`` elements into the header). + + Similar to :func:`~read_csv` the `header` argument is applied + **after** `skiprows` is applied. + + This function will *always* return a list of :class:`DataFrame` *or* + it will fail, e.g., it will *not* return an empty list. + + Examples + -------- + See the :ref:`read_html documentation in the IO section of the docs + ` for some examples of reading in HTML tables. + """ + _importers() + + # Type check here. We don't want to parse only to fail because of an + # invalid value of an integer skiprows. + if isinstance(skiprows, numbers.Integral) and skiprows < 0: + raise ValueError( + "cannot skip rows starting from the end of the " + "data (you passed a negative value)" + ) + validate_header_arg(header) + + io = stringify_path(io) + + return _parse( + flavor=flavor, + io=io, + match=match, + header=header, + index_col=index_col, + skiprows=skiprows, + parse_dates=parse_dates, + thousands=thousands, + attrs=attrs, + encoding=encoding, + decimal=decimal, + converters=converters, + na_values=na_values, + keep_default_na=keep_default_na, + displayed_only=displayed_only, + ) diff --git a/.local/lib/python3.8/site-packages/pandas/io/json/__init__.py b/.local/lib/python3.8/site-packages/pandas/io/json/__init__.py new file mode 100644 index 0000000..1de1abc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/json/__init__.py @@ -0,0 +1,21 @@ +from pandas.io.json._json import ( + dumps, + loads, + read_json, + to_json, +) +from pandas.io.json._normalize import ( + _json_normalize, + json_normalize, +) +from pandas.io.json._table_schema import build_table_schema + +__all__ = [ + "dumps", + "loads", + "read_json", + "to_json", + "_json_normalize", + "json_normalize", + "build_table_schema", +] diff --git a/.local/lib/python3.8/site-packages/pandas/io/json/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/json/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..2ba2a94 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/json/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/json/__pycache__/_json.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/json/__pycache__/_json.cpython-38.pyc new file mode 100644 index 0000000..b18d8c1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/json/__pycache__/_json.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/json/__pycache__/_normalize.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/json/__pycache__/_normalize.cpython-38.pyc new file mode 100644 index 0000000..49759dd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/json/__pycache__/_normalize.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/json/__pycache__/_table_schema.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/json/__pycache__/_table_schema.cpython-38.pyc new file mode 100644 index 0000000..19dbb5a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/json/__pycache__/_table_schema.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/json/_json.py b/.local/lib/python3.8/site-packages/pandas/io/json/_json.py new file mode 100644 index 0000000..910ba63 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/json/_json.py @@ -0,0 +1,1226 @@ +from __future__ import annotations + +from abc import ( + ABC, + abstractmethod, +) +from collections import abc +import functools +from io import StringIO +from itertools import islice +from typing import ( + Any, + Callable, + Mapping, +) + +import numpy as np + +import pandas._libs.json as json +from pandas._libs.tslibs import iNaT +from pandas._typing import ( + CompressionOptions, + DtypeArg, + IndexLabel, + JSONSerializable, + StorageOptions, +) +from pandas.errors import AbstractMethodError +from pandas.util._decorators import ( + deprecate_kwarg, + deprecate_nonkeyword_arguments, + doc, +) + +from pandas.core.dtypes.common import ( + ensure_str, + is_period_dtype, +) + +from pandas import ( + DataFrame, + MultiIndex, + Series, + isna, + notna, + to_datetime, +) +from pandas.core.construction import create_series_with_explicit_dtype +from pandas.core.generic import NDFrame +from pandas.core.reshape.concat import concat +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import ( + IOHandles, + file_exists, + get_handle, + is_fsspec_url, + is_url, + stringify_path, +) +from pandas.io.json._normalize import convert_to_line_delimits +from pandas.io.json._table_schema import ( + build_table_schema, + parse_table_schema, +) +from pandas.io.parsers.readers import validate_integer + +loads = json.loads +dumps = json.dumps + + +# interface to/from +def to_json( + path_or_buf, + obj: NDFrame, + orient: str | None = None, + date_format: str = "epoch", + double_precision: int = 10, + force_ascii: bool = True, + date_unit: str = "ms", + default_handler: Callable[[Any], JSONSerializable] | None = None, + lines: bool = False, + compression: CompressionOptions = "infer", + index: bool = True, + indent: int = 0, + storage_options: StorageOptions = None, +): + + if not index and orient not in ["split", "table"]: + raise ValueError( + "'index=False' is only valid when 'orient' is 'split' or 'table'" + ) + + if lines and orient != "records": + raise ValueError("'lines' keyword only valid when 'orient' is records") + + if orient == "table" and isinstance(obj, Series): + obj = obj.to_frame(name=obj.name or "values") + + writer: type[Writer] + if orient == "table" and isinstance(obj, DataFrame): + writer = JSONTableWriter + elif isinstance(obj, Series): + writer = SeriesWriter + elif isinstance(obj, DataFrame): + writer = FrameWriter + else: + raise NotImplementedError("'obj' should be a Series or a DataFrame") + + s = writer( + obj, + orient=orient, + date_format=date_format, + double_precision=double_precision, + ensure_ascii=force_ascii, + date_unit=date_unit, + default_handler=default_handler, + index=index, + indent=indent, + ).write() + + if lines: + s = convert_to_line_delimits(s) + + if path_or_buf is not None: + # apply compression and byte/text conversion + with get_handle( + path_or_buf, "w", compression=compression, storage_options=storage_options + ) as handles: + handles.handle.write(s) + else: + return s + + +class Writer(ABC): + _default_orient: str + + def __init__( + self, + obj, + orient: str | None, + date_format: str, + double_precision: int, + ensure_ascii: bool, + date_unit: str, + index: bool, + default_handler: Callable[[Any], JSONSerializable] | None = None, + indent: int = 0, + ): + self.obj = obj + + if orient is None: + orient = self._default_orient + + self.orient = orient + self.date_format = date_format + self.double_precision = double_precision + self.ensure_ascii = ensure_ascii + self.date_unit = date_unit + self.default_handler = default_handler + self.index = index + self.indent = indent + + self.is_copy = None + self._format_axes() + + def _format_axes(self): + raise AbstractMethodError(self) + + def write(self): + iso_dates = self.date_format == "iso" + return dumps( + self.obj_to_write, + orient=self.orient, + double_precision=self.double_precision, + ensure_ascii=self.ensure_ascii, + date_unit=self.date_unit, + iso_dates=iso_dates, + default_handler=self.default_handler, + indent=self.indent, + ) + + @property + @abstractmethod + def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: + """Object to write in JSON format.""" + pass + + +class SeriesWriter(Writer): + _default_orient = "index" + + @property + def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: + if not self.index and self.orient == "split": + return {"name": self.obj.name, "data": self.obj.values} + else: + return self.obj + + def _format_axes(self): + if not self.obj.index.is_unique and self.orient == "index": + raise ValueError(f"Series index must be unique for orient='{self.orient}'") + + +class FrameWriter(Writer): + _default_orient = "columns" + + @property + def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: + if not self.index and self.orient == "split": + obj_to_write = self.obj.to_dict(orient="split") + del obj_to_write["index"] + else: + obj_to_write = self.obj + return obj_to_write + + def _format_axes(self): + """ + Try to format axes if they are datelike. + """ + if not self.obj.index.is_unique and self.orient in ("index", "columns"): + raise ValueError( + f"DataFrame index must be unique for orient='{self.orient}'." + ) + if not self.obj.columns.is_unique and self.orient in ( + "index", + "columns", + "records", + ): + raise ValueError( + f"DataFrame columns must be unique for orient='{self.orient}'." + ) + + +class JSONTableWriter(FrameWriter): + _default_orient = "records" + + def __init__( + self, + obj, + orient: str | None, + date_format: str, + double_precision: int, + ensure_ascii: bool, + date_unit: str, + index: bool, + default_handler: Callable[[Any], JSONSerializable] | None = None, + indent: int = 0, + ): + """ + Adds a `schema` attribute with the Table Schema, resets + the index (can't do in caller, because the schema inference needs + to know what the index is, forces orient to records, and forces + date_format to 'iso'. + """ + super().__init__( + obj, + orient, + date_format, + double_precision, + ensure_ascii, + date_unit, + index, + default_handler=default_handler, + indent=indent, + ) + + if date_format != "iso": + msg = ( + "Trying to write with `orient='table'` and " + f"`date_format='{date_format}'`. Table Schema requires dates " + "to be formatted with `date_format='iso'`" + ) + raise ValueError(msg) + + self.schema = build_table_schema(obj, index=self.index) + + # NotImplemented on a column MultiIndex + if obj.ndim == 2 and isinstance(obj.columns, MultiIndex): + raise NotImplementedError( + "orient='table' is not supported for MultiIndex columns" + ) + + # TODO: Do this timedelta properly in objToJSON.c See GH #15137 + if ( + (obj.ndim == 1) + and (obj.name in set(obj.index.names)) + or len(obj.columns.intersection(obj.index.names)) + ): + msg = "Overlapping names between the index and columns" + raise ValueError(msg) + + obj = obj.copy() + timedeltas = obj.select_dtypes(include=["timedelta"]).columns + if len(timedeltas): + obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat()) + # Convert PeriodIndex to datetimes before serializing + if is_period_dtype(obj.index.dtype): + obj.index = obj.index.to_timestamp() + + # exclude index from obj if index=False + if not self.index: + self.obj = obj.reset_index(drop=True) + else: + self.obj = obj.reset_index(drop=False) + self.date_format = "iso" + self.orient = "records" + self.index = index + + @property + def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: + return {"schema": self.schema, "data": self.obj} + + +@doc( + storage_options=_shared_docs["storage_options"], + decompression_options=_shared_docs["decompression_options"] % "path_or_buf", +) +@deprecate_kwarg(old_arg_name="numpy", new_arg_name=None) +@deprecate_nonkeyword_arguments( + version="2.0", allowed_args=["path_or_buf"], stacklevel=3 +) +def read_json( + path_or_buf=None, + orient=None, + typ="frame", + dtype: DtypeArg | None = None, + convert_axes=None, + convert_dates=True, + keep_default_dates: bool = True, + numpy: bool = False, + precise_float: bool = False, + date_unit=None, + encoding=None, + encoding_errors: str | None = "strict", + lines: bool = False, + chunksize: int | None = None, + compression: CompressionOptions = "infer", + nrows: int | None = None, + storage_options: StorageOptions = None, +): + """ + Convert a JSON string to pandas object. + + Parameters + ---------- + path_or_buf : a valid JSON str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: + ``file://localhost/path/to/table.json``. + + If you want to pass in a path object, pandas accepts any + ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handle (e.g. via builtin ``open`` function) + or ``StringIO``. + orient : str + Indication of expected JSON string format. + Compatible JSON strings can be produced by ``to_json()`` with a + corresponding orient value. + The set of possible orients is: + + - ``'split'`` : dict like + ``{{index -> [index], columns -> [columns], data -> [values]}}`` + - ``'records'`` : list like + ``[{{column -> value}}, ... , {{column -> value}}]`` + - ``'index'`` : dict like ``{{index -> {{column -> value}}}}`` + - ``'columns'`` : dict like ``{{column -> {{index -> value}}}}`` + - ``'values'`` : just the values array + + The allowed and default values depend on the value + of the `typ` parameter. + + * when ``typ == 'series'``, + + - allowed orients are ``{{'split','records','index'}}`` + - default is ``'index'`` + - The Series index must be unique for orient ``'index'``. + + * when ``typ == 'frame'``, + + - allowed orients are ``{{'split','records','index', + 'columns','values', 'table'}}`` + - default is ``'columns'`` + - The DataFrame index must be unique for orients ``'index'`` and + ``'columns'``. + - The DataFrame columns must be unique for orients ``'index'``, + ``'columns'``, and ``'records'``. + + typ : {{'frame', 'series'}}, default 'frame' + The type of object to recover. + + dtype : bool or dict, default None + If True, infer dtypes; if a dict of column to dtype, then use those; + if False, then don't infer dtypes at all, applies only to the data. + + For all ``orient`` values except ``'table'``, default is True. + + .. versionchanged:: 0.25.0 + + Not applicable for ``orient='table'``. + + convert_axes : bool, default None + Try to convert the axes to the proper dtypes. + + For all ``orient`` values except ``'table'``, default is True. + + .. versionchanged:: 0.25.0 + + Not applicable for ``orient='table'``. + + convert_dates : bool or list of str, default True + If True then default datelike columns may be converted (depending on + keep_default_dates). + If False, no dates will be converted. + If a list of column names, then those columns will be converted and + default datelike columns may also be converted (depending on + keep_default_dates). + + keep_default_dates : bool, default True + If parsing dates (convert_dates is not False), then try to parse the + default datelike columns. + A column label is datelike if + + * it ends with ``'_at'``, + + * it ends with ``'_time'``, + + * it begins with ``'timestamp'``, + + * it is ``'modified'``, or + + * it is ``'date'``. + + numpy : bool, default False + Direct decoding to numpy arrays. Supports numeric data only, but + non-numeric column and index labels are supported. Note also that the + JSON ordering MUST be the same for each term if numpy=True. + + .. deprecated:: 1.0.0 + + precise_float : bool, default False + Set to enable usage of higher precision (strtod) function when + decoding string to double values. Default (False) is to use fast but + less precise builtin functionality. + + date_unit : str, default None + The timestamp unit to detect if converting dates. The default behaviour + is to try and detect the correct precision, but if this is not desired + then pass one of 's', 'ms', 'us' or 'ns' to force parsing only seconds, + milliseconds, microseconds or nanoseconds respectively. + + encoding : str, default is 'utf-8' + The encoding to use to decode py3 bytes. + + encoding_errors : str, optional, default "strict" + How encoding errors are treated. `List of possible values + `_ . + + .. versionadded:: 1.3.0 + + lines : bool, default False + Read the file as a json object per line. + + chunksize : int, optional + Return JsonReader object for iteration. + See the `line-delimited json docs + `_ + for more information on ``chunksize``. + This can only be passed if `lines=True`. + If this is None, the file will be read into memory all at once. + + .. versionchanged:: 1.2 + + ``JsonReader`` is a context manager. + + {decompression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + nrows : int, optional + The number of lines from the line-delimited jsonfile that has to be read. + This can only be passed if `lines=True`. + If this is None, all the rows will be returned. + + .. versionadded:: 1.1 + + {storage_options} + + .. versionadded:: 1.2.0 + + Returns + ------- + Series or DataFrame + The type returned depends on the value of `typ`. + + See Also + -------- + DataFrame.to_json : Convert a DataFrame to a JSON string. + Series.to_json : Convert a Series to a JSON string. + json_normalize : Normalize semi-structured JSON data into a flat table. + + Notes + ----- + Specific to ``orient='table'``, if a :class:`DataFrame` with a literal + :class:`Index` name of `index` gets written with :func:`to_json`, the + subsequent read operation will incorrectly set the :class:`Index` name to + ``None``. This is because `index` is also used by :func:`DataFrame.to_json` + to denote a missing :class:`Index` name, and the subsequent + :func:`read_json` operation cannot distinguish between the two. The same + limitation is encountered with a :class:`MultiIndex` and any names + beginning with ``'level_'``. + + Examples + -------- + >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], + ... index=['row 1', 'row 2'], + ... columns=['col 1', 'col 2']) + + Encoding/decoding a Dataframe using ``'split'`` formatted JSON: + + >>> df.to_json(orient='split') + '\ +{{\ +"columns":["col 1","col 2"],\ +"index":["row 1","row 2"],\ +"data":[["a","b"],["c","d"]]\ +}}\ +' + >>> pd.read_json(_, orient='split') + col 1 col 2 + row 1 a b + row 2 c d + + Encoding/decoding a Dataframe using ``'index'`` formatted JSON: + + >>> df.to_json(orient='index') + '{{"row 1":{{"col 1":"a","col 2":"b"}},"row 2":{{"col 1":"c","col 2":"d"}}}}' + + >>> pd.read_json(_, orient='index') + col 1 col 2 + row 1 a b + row 2 c d + + Encoding/decoding a Dataframe using ``'records'`` formatted JSON. + Note that index labels are not preserved with this encoding. + + >>> df.to_json(orient='records') + '[{{"col 1":"a","col 2":"b"}},{{"col 1":"c","col 2":"d"}}]' + >>> pd.read_json(_, orient='records') + col 1 col 2 + 0 a b + 1 c d + + Encoding with Table Schema + + >>> df.to_json(orient='table') + '\ +{{"schema":{{"fields":[\ +{{"name":"index","type":"string"}},\ +{{"name":"col 1","type":"string"}},\ +{{"name":"col 2","type":"string"}}],\ +"primaryKey":["index"],\ +"pandas_version":"1.4.0"}},\ +"data":[\ +{{"index":"row 1","col 1":"a","col 2":"b"}},\ +{{"index":"row 2","col 1":"c","col 2":"d"}}]\ +}}\ +' + """ + if orient == "table" and dtype: + raise ValueError("cannot pass both dtype and orient='table'") + if orient == "table" and convert_axes: + raise ValueError("cannot pass both convert_axes and orient='table'") + + if dtype is None and orient != "table": + # error: Incompatible types in assignment (expression has type "bool", variable + # has type "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float], + # Type[int], Type[complex], Type[bool], Type[object], Dict[Hashable, + # Union[ExtensionDtype, Union[str, dtype[Any]], Type[str], Type[float], + # Type[int], Type[complex], Type[bool], Type[object]]], None]") + dtype = True # type: ignore[assignment] + if convert_axes is None and orient != "table": + convert_axes = True + + json_reader = JsonReader( + path_or_buf, + orient=orient, + typ=typ, + dtype=dtype, + convert_axes=convert_axes, + convert_dates=convert_dates, + keep_default_dates=keep_default_dates, + numpy=numpy, + precise_float=precise_float, + date_unit=date_unit, + encoding=encoding, + lines=lines, + chunksize=chunksize, + compression=compression, + nrows=nrows, + storage_options=storage_options, + encoding_errors=encoding_errors, + ) + + if chunksize: + return json_reader + + with json_reader: + return json_reader.read() + + +class JsonReader(abc.Iterator): + """ + JsonReader provides an interface for reading in a JSON file. + + If initialized with ``lines=True`` and ``chunksize``, can be iterated over + ``chunksize`` lines at a time. Otherwise, calling ``read`` reads in the + whole document. + """ + + def __init__( + self, + filepath_or_buffer, + orient, + typ, + dtype, + convert_axes, + convert_dates, + keep_default_dates: bool, + numpy: bool, + precise_float: bool, + date_unit, + encoding, + lines: bool, + chunksize: int | None, + compression: CompressionOptions, + nrows: int | None, + storage_options: StorageOptions = None, + encoding_errors: str | None = "strict", + ): + + self.orient = orient + self.typ = typ + self.dtype = dtype + self.convert_axes = convert_axes + self.convert_dates = convert_dates + self.keep_default_dates = keep_default_dates + self.numpy = numpy + self.precise_float = precise_float + self.date_unit = date_unit + self.encoding = encoding + self.compression = compression + self.storage_options = storage_options + self.lines = lines + self.chunksize = chunksize + self.nrows_seen = 0 + self.nrows = nrows + self.encoding_errors = encoding_errors + self.handles: IOHandles[str] | None = None + + if self.chunksize is not None: + self.chunksize = validate_integer("chunksize", self.chunksize, 1) + if not self.lines: + raise ValueError("chunksize can only be passed if lines=True") + if self.nrows is not None: + self.nrows = validate_integer("nrows", self.nrows, 0) + if not self.lines: + raise ValueError("nrows can only be passed if lines=True") + + data = self._get_data_from_filepath(filepath_or_buffer) + self.data = self._preprocess_data(data) + + def _preprocess_data(self, data): + """ + At this point, the data either has a `read` attribute (e.g. a file + object or a StringIO) or is a string that is a JSON document. + + If self.chunksize, we prepare the data for the `__next__` method. + Otherwise, we read it into memory for the `read` method. + """ + if hasattr(data, "read") and not (self.chunksize or self.nrows): + with self: + data = data.read() + if not hasattr(data, "read") and (self.chunksize or self.nrows): + data = StringIO(data) + + return data + + def _get_data_from_filepath(self, filepath_or_buffer): + """ + The function read_json accepts three input types: + 1. filepath (string-like) + 2. file-like object (e.g. open file object, StringIO) + 3. JSON string + + This method turns (1) into (2) to simplify the rest of the processing. + It returns input types (2) and (3) unchanged. + """ + # if it is a string but the file does not exist, it might be a JSON string + filepath_or_buffer = stringify_path(filepath_or_buffer) + if ( + not isinstance(filepath_or_buffer, str) + or is_url(filepath_or_buffer) + or is_fsspec_url(filepath_or_buffer) + or file_exists(filepath_or_buffer) + ): + self.handles = get_handle( + filepath_or_buffer, + "r", + encoding=self.encoding, + compression=self.compression, + storage_options=self.storage_options, + errors=self.encoding_errors, + ) + filepath_or_buffer = self.handles.handle + + return filepath_or_buffer + + def _combine_lines(self, lines) -> str: + """ + Combines a list of JSON objects into one JSON object. + """ + return ( + f'[{",".join([line for line in (line.strip() for line in lines) if line])}]' + ) + + def read(self): + """ + Read the whole JSON input into a pandas object. + """ + if self.lines: + if self.chunksize: + obj = concat(self) + elif self.nrows: + lines = list(islice(self.data, self.nrows)) + lines_json = self._combine_lines(lines) + obj = self._get_object_parser(lines_json) + else: + data = ensure_str(self.data) + data_lines = data.split("\n") + obj = self._get_object_parser(self._combine_lines(data_lines)) + else: + obj = self._get_object_parser(self.data) + self.close() + return obj + + def _get_object_parser(self, json): + """ + Parses a json document into a pandas object. + """ + typ = self.typ + dtype = self.dtype + kwargs = { + "orient": self.orient, + "dtype": self.dtype, + "convert_axes": self.convert_axes, + "convert_dates": self.convert_dates, + "keep_default_dates": self.keep_default_dates, + "numpy": self.numpy, + "precise_float": self.precise_float, + "date_unit": self.date_unit, + } + obj = None + if typ == "frame": + obj = FrameParser(json, **kwargs).parse() + + if typ == "series" or obj is None: + if not isinstance(dtype, bool): + kwargs["dtype"] = dtype + obj = SeriesParser(json, **kwargs).parse() + + return obj + + def close(self): + """ + If we opened a stream earlier, in _get_data_from_filepath, we should + close it. + + If an open stream or file was passed, we leave it open. + """ + if self.handles is not None: + self.handles.close() + + def __next__(self): + if self.nrows: + if self.nrows_seen >= self.nrows: + self.close() + raise StopIteration + + lines = list(islice(self.data, self.chunksize)) + if lines: + lines_json = self._combine_lines(lines) + obj = self._get_object_parser(lines_json) + + # Make sure that the returned objects have the right index. + obj.index = range(self.nrows_seen, self.nrows_seen + len(obj)) + self.nrows_seen += len(obj) + + return obj + + self.close() + raise StopIteration + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + +class Parser: + _split_keys: tuple[str, ...] + _default_orient: str + + _STAMP_UNITS = ("s", "ms", "us", "ns") + _MIN_STAMPS = { + "s": 31536000, + "ms": 31536000000, + "us": 31536000000000, + "ns": 31536000000000000, + } + + def __init__( + self, + json, + orient, + dtype: DtypeArg | None = None, + convert_axes=True, + convert_dates=True, + keep_default_dates=False, + numpy=False, + precise_float=False, + date_unit=None, + ): + self.json = json + + if orient is None: + orient = self._default_orient + + self.orient = orient + + self.dtype = dtype + + if orient == "split": + numpy = False + + if date_unit is not None: + date_unit = date_unit.lower() + if date_unit not in self._STAMP_UNITS: + raise ValueError(f"date_unit must be one of {self._STAMP_UNITS}") + self.min_stamp = self._MIN_STAMPS[date_unit] + else: + self.min_stamp = self._MIN_STAMPS["s"] + + self.numpy = numpy + self.precise_float = precise_float + self.convert_axes = convert_axes + self.convert_dates = convert_dates + self.date_unit = date_unit + self.keep_default_dates = keep_default_dates + self.obj: DataFrame | Series | None = None + + def check_keys_split(self, decoded): + """ + Checks that dict has only the appropriate keys for orient='split'. + """ + bad_keys = set(decoded.keys()).difference(set(self._split_keys)) + if bad_keys: + bad_keys_joined = ", ".join(bad_keys) + raise ValueError(f"JSON data had unexpected key(s): {bad_keys_joined}") + + def parse(self): + + if self.numpy: + self._parse_numpy() + else: + self._parse_no_numpy() + + if self.obj is None: + return None + if self.convert_axes: + self._convert_axes() + self._try_convert_types() + return self.obj + + def _parse_numpy(self): + raise AbstractMethodError(self) + + def _parse_no_numpy(self): + raise AbstractMethodError(self) + + def _convert_axes(self): + """ + Try to convert axes. + """ + obj = self.obj + assert obj is not None # for mypy + for axis_name in obj._AXIS_ORDERS: + new_axis, result = self._try_convert_data( + name=axis_name, + data=obj._get_axis(axis_name), + use_dtypes=False, + convert_dates=True, + ) + if result: + setattr(self.obj, axis_name, new_axis) + + def _try_convert_types(self): + raise AbstractMethodError(self) + + def _try_convert_data( + self, name, data, use_dtypes: bool = True, convert_dates: bool = True + ): + """ + Try to parse a ndarray like into a column by inferring dtype. + """ + # don't try to coerce, unless a force conversion + if use_dtypes: + if not self.dtype: + if all(notna(data)): + return data, False + return data.fillna(np.nan), True + + # error: Non-overlapping identity check (left operand type: + # "Union[ExtensionDtype, str, dtype[Any], Type[object], + # Dict[Hashable, Union[ExtensionDtype, Union[str, dtype[Any]], + # Type[str], Type[float], Type[int], Type[complex], Type[bool], + # Type[object]]]]", right operand type: "Literal[True]") + elif self.dtype is True: # type: ignore[comparison-overlap] + pass + else: + # dtype to force + dtype = ( + self.dtype.get(name) if isinstance(self.dtype, dict) else self.dtype + ) + if dtype is not None: + try: + return data.astype(dtype), True + except (TypeError, ValueError): + return data, False + + if convert_dates: + new_data, result = self._try_convert_to_date(data) + if result: + return new_data, True + + if data.dtype == "object": + + # try float + try: + data = data.astype("float64") + except (TypeError, ValueError): + pass + + if data.dtype.kind == "f": + + if data.dtype != "float64": + + # coerce floats to 64 + try: + data = data.astype("float64") + except (TypeError, ValueError): + pass + + # don't coerce 0-len data + if len(data) and (data.dtype == "float" or data.dtype == "object"): + + # coerce ints if we can + try: + new_data = data.astype("int64") + if (new_data == data).all(): + data = new_data + except (TypeError, ValueError, OverflowError): + pass + + # coerce ints to 64 + if data.dtype == "int": + + # coerce floats to 64 + try: + data = data.astype("int64") + except (TypeError, ValueError): + pass + + # if we have an index, we want to preserve dtypes + if name == "index" and len(data): + if self.orient == "split": + return data, False + + return data, True + + def _try_convert_to_date(self, data): + """ + Try to parse a ndarray like into a date column. + + Try to coerce object in epoch/iso formats and integer/float in epoch + formats. Return a boolean if parsing was successful. + """ + # no conversion on empty + if not len(data): + return data, False + + new_data = data + if new_data.dtype == "object": + try: + new_data = data.astype("int64") + except (TypeError, ValueError, OverflowError): + pass + + # ignore numbers that are out of range + if issubclass(new_data.dtype.type, np.number): + in_range = ( + isna(new_data._values) + | (new_data > self.min_stamp) + | (new_data._values == iNaT) + ) + if not in_range.all(): + return data, False + + date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS + for date_unit in date_units: + try: + new_data = to_datetime(new_data, errors="raise", unit=date_unit) + except (ValueError, OverflowError, TypeError): + continue + return new_data, True + return data, False + + def _try_convert_dates(self): + raise AbstractMethodError(self) + + +class SeriesParser(Parser): + _default_orient = "index" + _split_keys = ("name", "index", "data") + + def _parse_no_numpy(self): + data = loads(self.json, precise_float=self.precise_float) + + if self.orient == "split": + decoded = {str(k): v for k, v in data.items()} + self.check_keys_split(decoded) + self.obj = create_series_with_explicit_dtype(**decoded) + else: + self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object) + + def _parse_numpy(self): + load_kwargs = { + "dtype": None, + "numpy": True, + "precise_float": self.precise_float, + } + if self.orient in ["columns", "index"]: + load_kwargs["labelled"] = True + loads_ = functools.partial(loads, **load_kwargs) + data = loads_(self.json) + + if self.orient == "split": + decoded = {str(k): v for k, v in data.items()} + self.check_keys_split(decoded) + self.obj = create_series_with_explicit_dtype(**decoded) + elif self.orient in ["columns", "index"]: + # error: "create_series_with_explicit_dtype" + # gets multiple values for keyword argument "dtype_if_empty + self.obj = create_series_with_explicit_dtype( + *data, dtype_if_empty=object + ) # type:ignore[misc] + else: + self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object) + + def _try_convert_types(self): + if self.obj is None: + return + obj, result = self._try_convert_data( + "data", self.obj, convert_dates=self.convert_dates + ) + if result: + self.obj = obj + + +class FrameParser(Parser): + _default_orient = "columns" + _split_keys = ("columns", "index", "data") + + def _parse_numpy(self): + + json = self.json + orient = self.orient + + if orient == "columns": + args = loads( + json, + dtype=None, + numpy=True, + labelled=True, + precise_float=self.precise_float, + ) + if len(args): + args = (args[0].T, args[2], args[1]) + self.obj = DataFrame(*args) + elif orient == "split": + decoded = loads( + json, dtype=None, numpy=True, precise_float=self.precise_float + ) + decoded = {str(k): v for k, v in decoded.items()} + self.check_keys_split(decoded) + self.obj = DataFrame(**decoded) + elif orient == "values": + self.obj = DataFrame( + loads(json, dtype=None, numpy=True, precise_float=self.precise_float) + ) + else: + self.obj = DataFrame( + *loads( + json, + dtype=None, + numpy=True, + labelled=True, + precise_float=self.precise_float, + ) + ) + + def _parse_no_numpy(self): + + json = self.json + orient = self.orient + + if orient == "columns": + self.obj = DataFrame( + loads(json, precise_float=self.precise_float), dtype=None + ) + elif orient == "split": + decoded = { + str(k): v + for k, v in loads(json, precise_float=self.precise_float).items() + } + self.check_keys_split(decoded) + self.obj = DataFrame(dtype=None, **decoded) + elif orient == "index": + self.obj = DataFrame.from_dict( + loads(json, precise_float=self.precise_float), + dtype=None, + orient="index", + ) + elif orient == "table": + self.obj = parse_table_schema(json, precise_float=self.precise_float) + else: + self.obj = DataFrame( + loads(json, precise_float=self.precise_float), dtype=None + ) + + def _process_converter(self, f, filt=None): + """ + Take a conversion function and possibly recreate the frame. + """ + if filt is None: + filt = lambda col, c: True + + obj = self.obj + assert obj is not None # for mypy + + needs_new_obj = False + new_obj = {} + for i, (col, c) in enumerate(obj.items()): + if filt(col, c): + new_data, result = f(col, c) + if result: + c = new_data + needs_new_obj = True + new_obj[i] = c + + if needs_new_obj: + + # possibly handle dup columns + new_frame = DataFrame(new_obj, index=obj.index) + new_frame.columns = obj.columns + self.obj = new_frame + + def _try_convert_types(self): + if self.obj is None: + return + if self.convert_dates: + self._try_convert_dates() + + self._process_converter( + lambda col, c: self._try_convert_data(col, c, convert_dates=False) + ) + + def _try_convert_dates(self): + if self.obj is None: + return + + # our columns to parse + convert_dates = self.convert_dates + if convert_dates is True: + convert_dates = [] + convert_dates = set(convert_dates) + + def is_ok(col) -> bool: + """ + Return if this col is ok to try for a date parse. + """ + if not isinstance(col, str): + return False + + col_lower = col.lower() + if ( + col_lower.endswith("_at") + or col_lower.endswith("_time") + or col_lower == "modified" + or col_lower == "date" + or col_lower == "datetime" + or col_lower.startswith("timestamp") + ): + return True + return False + + self._process_converter( + lambda col, c: self._try_convert_to_date(c), + lambda col, c: ( + (self.keep_default_dates and is_ok(col)) or col in convert_dates + ), + ) diff --git a/.local/lib/python3.8/site-packages/pandas/io/json/_normalize.py b/.local/lib/python3.8/site-packages/pandas/io/json/_normalize.py new file mode 100644 index 0000000..36a7949 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/json/_normalize.py @@ -0,0 +1,541 @@ +# --------------------------------------------------------------------- +# JSON normalization routines +from __future__ import annotations + +from collections import ( + abc, + defaultdict, +) +import copy +from typing import ( + Any, + DefaultDict, + Iterable, +) + +import numpy as np + +from pandas._libs.writers import convert_json_to_lines +from pandas._typing import Scalar +from pandas.util._decorators import deprecate + +import pandas as pd +from pandas import DataFrame + + +def convert_to_line_delimits(s: str) -> str: + """ + Helper function that converts JSON lists to line delimited JSON. + """ + # Determine we have a JSON list to turn to lines otherwise just return the + # json object, only lists can + if not s[0] == "[" and s[-1] == "]": + return s + s = s[1:-1] + + return convert_json_to_lines(s) + + +def nested_to_record( + ds, + prefix: str = "", + sep: str = ".", + level: int = 0, + max_level: int | None = None, +): + """ + A simplified json_normalize + + Converts a nested dict into a flat dict ("record"), unlike json_normalize, + it does not attempt to extract a subset of the data. + + Parameters + ---------- + ds : dict or list of dicts + prefix: the prefix, optional, default: "" + sep : str, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + level: int, optional, default: 0 + The number of levels in the json string. + + max_level: int, optional, default: None + The max depth to normalize. + + .. versionadded:: 0.25.0 + + Returns + ------- + d - dict or list of dicts, matching `ds` + + Examples + -------- + >>> nested_to_record( + ... dict(flat1=1, dict1=dict(c=1, d=2), nested=dict(e=dict(c=1, d=2), d=2)) + ... ) + {\ +'flat1': 1, \ +'dict1.c': 1, \ +'dict1.d': 2, \ +'nested.e.c': 1, \ +'nested.e.d': 2, \ +'nested.d': 2\ +} + """ + singleton = False + if isinstance(ds, dict): + ds = [ds] + singleton = True + new_ds = [] + for d in ds: + new_d = copy.deepcopy(d) + for k, v in d.items(): + # each key gets renamed with prefix + if not isinstance(k, str): + k = str(k) + if level == 0: + newkey = k + else: + newkey = prefix + sep + k + + # flatten if type is dict and + # current dict level < maximum level provided and + # only dicts gets recurse-flattened + # only at level>1 do we rename the rest of the keys + if not isinstance(v, dict) or ( + max_level is not None and level >= max_level + ): + if level != 0: # so we skip copying for top level, common case + v = new_d.pop(k) + new_d[newkey] = v + continue + else: + v = new_d.pop(k) + new_d.update(nested_to_record(v, newkey, sep, level + 1, max_level)) + new_ds.append(new_d) + + if singleton: + return new_ds[0] + return new_ds + + +def _normalise_json( + data: Any, + key_string: str, + normalized_dict: dict[str, Any], + separator: str, +) -> dict[str, Any]: + """ + Main recursive function + Designed for the most basic use case of pd.json_normalize(data) + intended as a performance improvement, see #15621 + + Parameters + ---------- + data : Any + Type dependent on types contained within nested Json + key_string : str + New key (with separator(s) in) for data + normalized_dict : dict + The new normalized/flattened Json dict + separator : str, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + """ + if isinstance(data, dict): + for key, value in data.items(): + new_key = f"{key_string}{separator}{key}" + _normalise_json( + data=value, + # to avoid adding the separator to the start of every key + # GH#43831 avoid adding key if key_string blank + key_string=new_key + if new_key[: len(separator)] != separator + else new_key[len(separator) :], + normalized_dict=normalized_dict, + separator=separator, + ) + else: + normalized_dict[key_string] = data + return normalized_dict + + +def _normalise_json_ordered(data: dict[str, Any], separator: str) -> dict[str, Any]: + """ + Order the top level keys and then recursively go to depth + + Parameters + ---------- + data : dict or list of dicts + separator : str, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + + Returns + ------- + dict or list of dicts, matching `normalised_json_object` + """ + top_dict_ = {k: v for k, v in data.items() if not isinstance(v, dict)} + nested_dict_ = _normalise_json( + data={k: v for k, v in data.items() if isinstance(v, dict)}, + key_string="", + normalized_dict={}, + separator=separator, + ) + return {**top_dict_, **nested_dict_} + + +def _simple_json_normalize( + ds: dict | list[dict], + sep: str = ".", +) -> dict | list[dict] | Any: + """ + A optimized basic json_normalize + + Converts a nested dict into a flat dict ("record"), unlike + json_normalize and nested_to_record it doesn't do anything clever. + But for the most basic use cases it enhances performance. + E.g. pd.json_normalize(data) + + Parameters + ---------- + ds : dict or list of dicts + sep : str, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + + Returns + ------- + frame : DataFrame + d - dict or list of dicts, matching `normalised_json_object` + + Examples + -------- + >>> _simple_json_normalize( + ... { + ... "flat1": 1, + ... "dict1": {"c": 1, "d": 2}, + ... "nested": {"e": {"c": 1, "d": 2}, "d": 2}, + ... } + ... ) + {\ +'flat1': 1, \ +'dict1.c': 1, \ +'dict1.d': 2, \ +'nested.e.c': 1, \ +'nested.e.d': 2, \ +'nested.d': 2\ +} + + """ + normalised_json_object = {} + # expect a dictionary, as most jsons are. However, lists are perfectly valid + if isinstance(ds, dict): + normalised_json_object = _normalise_json_ordered(data=ds, separator=sep) + elif isinstance(ds, list): + normalised_json_list = [_simple_json_normalize(row, sep=sep) for row in ds] + return normalised_json_list + return normalised_json_object + + +def _json_normalize( + data: dict | list[dict], + record_path: str | list | None = None, + meta: str | list[str | list[str]] | None = None, + meta_prefix: str | None = None, + record_prefix: str | None = None, + errors: str = "raise", + sep: str = ".", + max_level: int | None = None, +) -> DataFrame: + """ + Normalize semi-structured JSON data into a flat table. + + Parameters + ---------- + data : dict or list of dicts + Unserialized JSON objects. + record_path : str or list of str, default None + Path in each object to list of records. If not passed, data will be + assumed to be an array of records. + meta : list of paths (str or list of str), default None + Fields to use as metadata for each record in resulting table. + meta_prefix : str, default None + If True, prefix records with dotted (?) path, e.g. foo.bar.field if + meta is ['foo', 'bar']. + record_prefix : str, default None + If True, prefix records with dotted (?) path, e.g. foo.bar.field if + path to records is ['foo', 'bar']. + errors : {'raise', 'ignore'}, default 'raise' + Configures error handling. + + * 'ignore' : will ignore KeyError if keys listed in meta are not + always present. + * 'raise' : will raise KeyError if keys listed in meta are not + always present. + sep : str, default '.' + Nested records will generate names separated by sep. + e.g., for sep='.', {'foo': {'bar': 0}} -> foo.bar. + max_level : int, default None + Max number of levels(depth of dict) to normalize. + if None, normalizes all levels. + + .. versionadded:: 0.25.0 + + Returns + ------- + frame : DataFrame + Normalize semi-structured JSON data into a flat table. + + Examples + -------- + >>> data = [ + ... {"id": 1, "name": {"first": "Coleen", "last": "Volk"}}, + ... {"name": {"given": "Mark", "family": "Regner"}}, + ... {"id": 2, "name": "Faye Raker"}, + ... ] + >>> pd.json_normalize(data) + id name.first name.last name.given name.family name + 0 1.0 Coleen Volk NaN NaN NaN + 1 NaN NaN NaN Mark Regner NaN + 2 2.0 NaN NaN NaN NaN Faye Raker + + >>> data = [ + ... { + ... "id": 1, + ... "name": "Cole Volk", + ... "fitness": {"height": 130, "weight": 60}, + ... }, + ... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}}, + ... { + ... "id": 2, + ... "name": "Faye Raker", + ... "fitness": {"height": 130, "weight": 60}, + ... }, + ... ] + >>> pd.json_normalize(data, max_level=0) + id name fitness + 0 1.0 Cole Volk {'height': 130, 'weight': 60} + 1 NaN Mark Reg {'height': 130, 'weight': 60} + 2 2.0 Faye Raker {'height': 130, 'weight': 60} + + Normalizes nested data up to level 1. + + >>> data = [ + ... { + ... "id": 1, + ... "name": "Cole Volk", + ... "fitness": {"height": 130, "weight": 60}, + ... }, + ... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}}, + ... { + ... "id": 2, + ... "name": "Faye Raker", + ... "fitness": {"height": 130, "weight": 60}, + ... }, + ... ] + >>> pd.json_normalize(data, max_level=1) + id name fitness.height fitness.weight + 0 1.0 Cole Volk 130 60 + 1 NaN Mark Reg 130 60 + 2 2.0 Faye Raker 130 60 + + >>> data = [ + ... { + ... "state": "Florida", + ... "shortname": "FL", + ... "info": {"governor": "Rick Scott"}, + ... "counties": [ + ... {"name": "Dade", "population": 12345}, + ... {"name": "Broward", "population": 40000}, + ... {"name": "Palm Beach", "population": 60000}, + ... ], + ... }, + ... { + ... "state": "Ohio", + ... "shortname": "OH", + ... "info": {"governor": "John Kasich"}, + ... "counties": [ + ... {"name": "Summit", "population": 1234}, + ... {"name": "Cuyahoga", "population": 1337}, + ... ], + ... }, + ... ] + >>> result = pd.json_normalize( + ... data, "counties", ["state", "shortname", ["info", "governor"]] + ... ) + >>> result + name population state shortname info.governor + 0 Dade 12345 Florida FL Rick Scott + 1 Broward 40000 Florida FL Rick Scott + 2 Palm Beach 60000 Florida FL Rick Scott + 3 Summit 1234 Ohio OH John Kasich + 4 Cuyahoga 1337 Ohio OH John Kasich + + >>> data = {"A": [1, 2]} + >>> pd.json_normalize(data, "A", record_prefix="Prefix.") + Prefix.0 + 0 1 + 1 2 + + Returns normalized data with columns prefixed with the given string. + """ + + def _pull_field( + js: dict[str, Any], spec: list | str, extract_record: bool = False + ) -> Scalar | Iterable: + """Internal function to pull field""" + result = js + try: + if isinstance(spec, list): + for field in spec: + if result is None: + raise KeyError(field) + result = result[field] + else: + result = result[spec] + except KeyError as e: + if extract_record: + raise KeyError( + f"Key {e} not found. If specifying a record_path, all elements of " + f"data should have the path." + ) from e + elif errors == "ignore": + return np.nan + else: + raise KeyError( + f"Key {e} not found. To replace missing values of {e} with " + f"np.nan, pass in errors='ignore'" + ) from e + + return result + + def _pull_records(js: dict[str, Any], spec: list | str) -> list: + """ + Internal function to pull field for records, and similar to + _pull_field, but require to return list. And will raise error + if has non iterable value. + """ + result = _pull_field(js, spec, extract_record=True) + + # GH 31507 GH 30145, GH 26284 if result is not list, raise TypeError if not + # null, otherwise return an empty list + if not isinstance(result, list): + if pd.isnull(result): + result = [] + else: + raise TypeError( + f"{js} has non list value {result} for path {spec}. " + "Must be list or null." + ) + return result + + if isinstance(data, list) and not data: + return DataFrame() + elif isinstance(data, dict): + # A bit of a hackjob + data = [data] + elif isinstance(data, abc.Iterable) and not isinstance(data, str): + # GH35923 Fix pd.json_normalize to not skip the first element of a + # generator input + data = list(data) + else: + raise NotImplementedError + + # check to see if a simple recursive function is possible to + # improve performance (see #15621) but only for cases such + # as pd.Dataframe(data) or pd.Dataframe(data, sep) + if ( + record_path is None + and meta is None + and meta_prefix is None + and record_prefix is None + and max_level is None + ): + return DataFrame(_simple_json_normalize(data, sep=sep)) + + if record_path is None: + if any([isinstance(x, dict) for x in y.values()] for y in data): + # naive normalization, this is idempotent for flat records + # and potentially will inflate the data considerably for + # deeply nested structures: + # {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@} + # + # TODO: handle record value which are lists, at least error + # reasonably + data = nested_to_record(data, sep=sep, max_level=max_level) + return DataFrame(data) + elif not isinstance(record_path, list): + record_path = [record_path] + + if meta is None: + meta = [] + elif not isinstance(meta, list): + meta = [meta] + + _meta = [m if isinstance(m, list) else [m] for m in meta] + + # Disastrously inefficient for now + records: list = [] + lengths = [] + + meta_vals: DefaultDict = defaultdict(list) + meta_keys = [sep.join(val) for val in _meta] + + def _recursive_extract(data, path, seen_meta, level=0): + if isinstance(data, dict): + data = [data] + if len(path) > 1: + for obj in data: + for val, key in zip(_meta, meta_keys): + if level + 1 == len(val): + seen_meta[key] = _pull_field(obj, val[-1]) + + _recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1) + else: + for obj in data: + recs = _pull_records(obj, path[0]) + recs = [ + nested_to_record(r, sep=sep, max_level=max_level) + if isinstance(r, dict) + else r + for r in recs + ] + + # For repeating the metadata later + lengths.append(len(recs)) + for val, key in zip(_meta, meta_keys): + if level + 1 > len(val): + meta_val = seen_meta[key] + else: + meta_val = _pull_field(obj, val[level:]) + meta_vals[key].append(meta_val) + records.extend(recs) + + _recursive_extract(data, record_path, {}, level=0) + + result = DataFrame(records) + + if record_prefix is not None: + # Incompatible types in assignment (expression has type "Optional[DataFrame]", + # variable has type "DataFrame") + result = result.rename( # type: ignore[assignment] + columns=lambda x: f"{record_prefix}{x}" + ) + + # Data types, a problem + for k, v in meta_vals.items(): + if meta_prefix is not None: + k = meta_prefix + k + + if k in result: + raise ValueError( + f"Conflicting metadata name {k}, need distinguishing prefix " + ) + result[k] = np.array(v, dtype=object).repeat(lengths) + return result + + +json_normalize = deprecate( + "pandas.io.json.json_normalize", _json_normalize, "1.0.0", "pandas.json_normalize" +) diff --git a/.local/lib/python3.8/site-packages/pandas/io/json/_table_schema.py b/.local/lib/python3.8/site-packages/pandas/io/json/_table_schema.py new file mode 100644 index 0000000..c630f0d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/json/_table_schema.py @@ -0,0 +1,368 @@ +""" +Table Schema builders + +https://specs.frictionlessdata.io/json-table-schema/ +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + cast, +) +import warnings + +import pandas._libs.json as json +from pandas._typing import ( + DtypeObj, + JSONSerializable, +) + +from pandas.core.dtypes.base import _registry as registry +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_categorical_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_extension_array_dtype, + is_integer_dtype, + is_numeric_dtype, + is_period_dtype, + is_string_dtype, + is_timedelta64_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype + +from pandas import DataFrame +import pandas.core.common as com + +if TYPE_CHECKING: + from pandas import Series + from pandas.core.indexes.multi import MultiIndex + +loads = json.loads + +TABLE_SCHEMA_VERSION = "1.4.0" + + +def as_json_table_type(x: DtypeObj) -> str: + """ + Convert a NumPy / pandas type to its corresponding json_table. + + Parameters + ---------- + x : np.dtype or ExtensionDtype + + Returns + ------- + str + the Table Schema data types + + Notes + ----- + This table shows the relationship between NumPy / pandas dtypes, + and Table Schema dtypes. + + ============== ================= + Pandas type Table Schema type + ============== ================= + int64 integer + float64 number + bool boolean + datetime64[ns] datetime + timedelta64[ns] duration + object str + categorical any + =============== ================= + """ + if is_integer_dtype(x): + return "integer" + elif is_bool_dtype(x): + return "boolean" + elif is_numeric_dtype(x): + return "number" + elif is_datetime64_dtype(x) or is_datetime64tz_dtype(x) or is_period_dtype(x): + return "datetime" + elif is_timedelta64_dtype(x): + return "duration" + elif is_categorical_dtype(x): + return "any" + elif is_extension_array_dtype(x): + return "any" + elif is_string_dtype(x): + return "string" + else: + return "any" + + +def set_default_names(data): + """Sets index names to 'index' for regular, or 'level_x' for Multi""" + if com.all_not_none(*data.index.names): + nms = data.index.names + if len(nms) == 1 and data.index.name == "index": + warnings.warn("Index name of 'index' is not round-trippable.") + elif len(nms) > 1 and any(x.startswith("level_") for x in nms): + warnings.warn( + "Index names beginning with 'level_' are not round-trippable." + ) + return data + + data = data.copy() + if data.index.nlevels > 1: + data.index.names = com.fill_missing_names(data.index.names) + else: + data.index.name = data.index.name or "index" + return data + + +def convert_pandas_type_to_json_field(arr): + dtype = arr.dtype + if arr.name is None: + name = "values" + else: + name = arr.name + field: dict[str, JSONSerializable] = { + "name": name, + "type": as_json_table_type(dtype), + } + + if is_categorical_dtype(dtype): + cats = dtype.categories + ordered = dtype.ordered + + field["constraints"] = {"enum": list(cats)} + field["ordered"] = ordered + elif is_period_dtype(dtype): + field["freq"] = dtype.freq.freqstr + elif is_datetime64tz_dtype(dtype): + field["tz"] = dtype.tz.zone + elif is_extension_array_dtype(dtype): + field["extDtype"] = dtype.name + return field + + +def convert_json_field_to_pandas_type(field): + """ + Converts a JSON field descriptor into its corresponding NumPy / pandas type + + Parameters + ---------- + field + A JSON field descriptor + + Returns + ------- + dtype + + Raises + ------ + ValueError + If the type of the provided field is unknown or currently unsupported + + Examples + -------- + >>> convert_json_field_to_pandas_type({"name": "an_int", "type": "integer"}) + 'int64' + + >>> convert_json_field_to_pandas_type( + ... { + ... "name": "a_categorical", + ... "type": "any", + ... "constraints": {"enum": ["a", "b", "c"]}, + ... "ordered": True, + ... } + ... ) + CategoricalDtype(categories=['a', 'b', 'c'], ordered=True) + + >>> convert_json_field_to_pandas_type({"name": "a_datetime", "type": "datetime"}) + 'datetime64[ns]' + + >>> convert_json_field_to_pandas_type( + ... {"name": "a_datetime_with_tz", "type": "datetime", "tz": "US/Central"} + ... ) + 'datetime64[ns, US/Central]' + """ + typ = field["type"] + if typ == "string": + return "object" + elif typ == "integer": + return "int64" + elif typ == "number": + return "float64" + elif typ == "boolean": + return "bool" + elif typ == "duration": + return "timedelta64" + elif typ == "datetime": + if field.get("tz"): + return f"datetime64[ns, {field['tz']}]" + else: + return "datetime64[ns]" + elif typ == "any": + if "constraints" in field and "ordered" in field: + return CategoricalDtype( + categories=field["constraints"]["enum"], ordered=field["ordered"] + ) + elif "extDtype" in field: + return registry.find(field["extDtype"]) + else: + return "object" + + raise ValueError(f"Unsupported or invalid field type: {typ}") + + +def build_table_schema( + data: DataFrame | Series, + index: bool = True, + primary_key: bool | None = None, + version: bool = True, +) -> dict[str, JSONSerializable]: + """ + Create a Table schema from ``data``. + + Parameters + ---------- + data : Series, DataFrame + index : bool, default True + Whether to include ``data.index`` in the schema. + primary_key : bool or None, default True + Column names to designate as the primary key. + The default `None` will set `'primaryKey'` to the index + level or levels if the index is unique. + version : bool, default True + Whether to include a field `pandas_version` with the version + of pandas that last revised the table schema. This version + can be different from the installed pandas version. + + Returns + ------- + schema : dict + + Notes + ----- + See `Table Schema + `__ for + conversion types. + Timedeltas as converted to ISO8601 duration format with + 9 decimal places after the seconds field for nanosecond precision. + + Categoricals are converted to the `any` dtype, and use the `enum` field + constraint to list the allowed values. The `ordered` attribute is included + in an `ordered` field. + + Examples + -------- + >>> df = pd.DataFrame( + ... {'A': [1, 2, 3], + ... 'B': ['a', 'b', 'c'], + ... 'C': pd.date_range('2016-01-01', freq='d', periods=3), + ... }, index=pd.Index(range(3), name='idx')) + >>> build_table_schema(df) + {'fields': \ +[{'name': 'idx', 'type': 'integer'}, \ +{'name': 'A', 'type': 'integer'}, \ +{'name': 'B', 'type': 'string'}, \ +{'name': 'C', 'type': 'datetime'}], \ +'primaryKey': ['idx'], \ +'pandas_version': '1.4.0'} + """ + if index is True: + data = set_default_names(data) + + schema: dict[str, Any] = {} + fields = [] + + if index: + if data.index.nlevels > 1: + data.index = cast("MultiIndex", data.index) + for level, name in zip(data.index.levels, data.index.names): + new_field = convert_pandas_type_to_json_field(level) + new_field["name"] = name + fields.append(new_field) + else: + fields.append(convert_pandas_type_to_json_field(data.index)) + + if data.ndim > 1: + for column, s in data.items(): + fields.append(convert_pandas_type_to_json_field(s)) + else: + fields.append(convert_pandas_type_to_json_field(data)) + + schema["fields"] = fields + if index and data.index.is_unique and primary_key is None: + if data.index.nlevels == 1: + schema["primaryKey"] = [data.index.name] + else: + schema["primaryKey"] = data.index.names + elif primary_key is not None: + schema["primaryKey"] = primary_key + + if version: + schema["pandas_version"] = TABLE_SCHEMA_VERSION + return schema + + +def parse_table_schema(json, precise_float): + """ + Builds a DataFrame from a given schema + + Parameters + ---------- + json : + A JSON table schema + precise_float : bool + Flag controlling precision when decoding string to double values, as + dictated by ``read_json`` + + Returns + ------- + df : DataFrame + + Raises + ------ + NotImplementedError + If the JSON table schema contains either timezone or timedelta data + + Notes + ----- + Because :func:`DataFrame.to_json` uses the string 'index' to denote a + name-less :class:`Index`, this function sets the name of the returned + :class:`DataFrame` to ``None`` when said string is encountered with a + normal :class:`Index`. For a :class:`MultiIndex`, the same limitation + applies to any strings beginning with 'level_'. Therefore, an + :class:`Index` name of 'index' and :class:`MultiIndex` names starting + with 'level_' are not supported. + + See Also + -------- + build_table_schema : Inverse function. + pandas.read_json + """ + table = loads(json, precise_float=precise_float) + col_order = [field["name"] for field in table["schema"]["fields"]] + df = DataFrame(table["data"], columns=col_order)[col_order] + + dtypes = { + field["name"]: convert_json_field_to_pandas_type(field) + for field in table["schema"]["fields"] + } + + # No ISO constructor for Timedelta as of yet, so need to raise + if "timedelta64" in dtypes.values(): + raise NotImplementedError( + 'table="orient" can not yet read ISO-formatted Timedelta data' + ) + + df = df.astype(dtypes) + + if "primaryKey" in table["schema"]: + df = df.set_index(table["schema"]["primaryKey"]) + if len(df.index.names) == 1: + if df.index.name == "index": + df.index.name = None + else: + df.index.names = [ + None if x.startswith("level_") else x for x in df.index.names + ] + + return df diff --git a/.local/lib/python3.8/site-packages/pandas/io/orc.py b/.local/lib/python3.8/site-packages/pandas/io/orc.py new file mode 100644 index 0000000..6dd4de5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/orc.py @@ -0,0 +1,54 @@ +""" orc compat """ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from pandas._typing import ( + FilePath, + ReadBuffer, +) +from pandas.compat._optional import import_optional_dependency + +from pandas.io.common import get_handle + +if TYPE_CHECKING: + from pandas import DataFrame + + +def read_orc( + path: FilePath | ReadBuffer[bytes], columns: list[str] | None = None, **kwargs +) -> DataFrame: + """ + Load an ORC object from the file path, returning a DataFrame. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + path : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``read()`` function. The string could be a URL. + Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: + ``file://localhost/path/to/table.orc``. + columns : list, default None + If not None, only these columns will be read from the file. + **kwargs + Any additional kwargs are passed to pyarrow. + + Returns + ------- + DataFrame + + Notes + ------- + Before using this function you should read the :ref:`user guide about ORC ` + and :ref:`install optional dependencies `. + """ + # we require a newer version of pyarrow than we support for parquet + + orc = import_optional_dependency("pyarrow.orc") + + with get_handle(path, "rb", is_text=False) as handles: + orc_file = orc.ORCFile(handles.handle) + return orc_file.read(columns=columns, **kwargs).to_pandas() diff --git a/.local/lib/python3.8/site-packages/pandas/io/parquet.py b/.local/lib/python3.8/site-packages/pandas/io/parquet.py new file mode 100644 index 0000000..4880c77 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/parquet.py @@ -0,0 +1,499 @@ +""" parquet compat """ +from __future__ import annotations + +import io +import os +from typing import Any +from warnings import catch_warnings + +from pandas._typing import ( + FilePath, + ReadBuffer, + StorageOptions, + WriteBuffer, +) +from pandas.compat._optional import import_optional_dependency +from pandas.errors import AbstractMethodError +from pandas.util._decorators import doc + +from pandas import ( + DataFrame, + MultiIndex, + get_option, +) +from pandas.core.shared_docs import _shared_docs +from pandas.util.version import Version + +from pandas.io.common import ( + IOHandles, + get_handle, + is_fsspec_url, + is_url, + stringify_path, +) + + +def get_engine(engine: str) -> BaseImpl: + """return our implementation""" + if engine == "auto": + engine = get_option("io.parquet.engine") + + if engine == "auto": + # try engines in this order + engine_classes = [PyArrowImpl, FastParquetImpl] + + error_msgs = "" + for engine_class in engine_classes: + try: + return engine_class() + except ImportError as err: + error_msgs += "\n - " + str(err) + + raise ImportError( + "Unable to find a usable engine; " + "tried using: 'pyarrow', 'fastparquet'.\n" + "A suitable version of " + "pyarrow or fastparquet is required for parquet " + "support.\n" + "Trying to import the above resulted in these errors:" + f"{error_msgs}" + ) + + if engine == "pyarrow": + return PyArrowImpl() + elif engine == "fastparquet": + return FastParquetImpl() + + raise ValueError("engine must be one of 'pyarrow', 'fastparquet'") + + +def _get_path_or_handle( + path: FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], + fs: Any, + storage_options: StorageOptions = None, + mode: str = "rb", + is_dir: bool = False, +) -> tuple[ + FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], IOHandles[bytes] | None, Any +]: + """File handling for PyArrow.""" + path_or_handle = stringify_path(path) + if is_fsspec_url(path_or_handle) and fs is None: + fsspec = import_optional_dependency("fsspec") + + fs, path_or_handle = fsspec.core.url_to_fs( + path_or_handle, **(storage_options or {}) + ) + elif storage_options and (not is_url(path_or_handle) or mode != "rb"): + # can't write to a remote url + # without making use of fsspec at the moment + raise ValueError("storage_options passed with buffer, or non-supported URL") + + handles = None + if ( + not fs + and not is_dir + and isinstance(path_or_handle, str) + and not os.path.isdir(path_or_handle) + ): + # use get_handle only when we are very certain that it is not a directory + # fsspec resources can also point to directories + # this branch is used for example when reading from non-fsspec URLs + handles = get_handle( + path_or_handle, mode, is_text=False, storage_options=storage_options + ) + fs = None + path_or_handle = handles.handle + return path_or_handle, handles, fs + + +class BaseImpl: + @staticmethod + def validate_dataframe(df: DataFrame) -> None: + + if not isinstance(df, DataFrame): + raise ValueError("to_parquet only supports IO with DataFrames") + + # must have value column names for all index levels (strings only) + if isinstance(df.columns, MultiIndex): + if not all( + x.inferred_type in {"string", "empty"} for x in df.columns.levels + ): + raise ValueError( + """ + parquet must have string column names for all values in + each level of the MultiIndex + """ + ) + else: + if df.columns.inferred_type not in {"string", "empty"}: + raise ValueError("parquet must have string column names") + + # index level names must be strings + valid_names = all( + isinstance(name, str) for name in df.index.names if name is not None + ) + if not valid_names: + raise ValueError("Index level names must be strings") + + def write(self, df: DataFrame, path, compression, **kwargs): + raise AbstractMethodError(self) + + def read(self, path, columns=None, **kwargs) -> DataFrame: + raise AbstractMethodError(self) + + +class PyArrowImpl(BaseImpl): + def __init__(self): + import_optional_dependency( + "pyarrow", extra="pyarrow is required for parquet support." + ) + import pyarrow.parquet + + # import utils to register the pyarrow extension types + import pandas.core.arrays._arrow_utils # noqa:F401 + + self.api = pyarrow + + def write( + self, + df: DataFrame, + path: FilePath | WriteBuffer[bytes], + compression: str | None = "snappy", + index: bool | None = None, + storage_options: StorageOptions = None, + partition_cols: list[str] | None = None, + **kwargs, + ) -> None: + self.validate_dataframe(df) + + from_pandas_kwargs: dict[str, Any] = {"schema": kwargs.pop("schema", None)} + if index is not None: + from_pandas_kwargs["preserve_index"] = index + + table = self.api.Table.from_pandas(df, **from_pandas_kwargs) + + path_or_handle, handles, kwargs["filesystem"] = _get_path_or_handle( + path, + kwargs.pop("filesystem", None), + storage_options=storage_options, + mode="wb", + is_dir=partition_cols is not None, + ) + try: + if partition_cols is not None: + # writes to multiple files under the given path + self.api.parquet.write_to_dataset( + table, + path_or_handle, + compression=compression, + partition_cols=partition_cols, + **kwargs, + ) + else: + # write to single output file + self.api.parquet.write_table( + table, path_or_handle, compression=compression, **kwargs + ) + finally: + if handles is not None: + handles.close() + + def read( + self, + path, + columns=None, + use_nullable_dtypes=False, + storage_options: StorageOptions = None, + **kwargs, + ) -> DataFrame: + kwargs["use_pandas_metadata"] = True + + to_pandas_kwargs = {} + if use_nullable_dtypes: + import pandas as pd + + mapping = { + self.api.int8(): pd.Int8Dtype(), + self.api.int16(): pd.Int16Dtype(), + self.api.int32(): pd.Int32Dtype(), + self.api.int64(): pd.Int64Dtype(), + self.api.uint8(): pd.UInt8Dtype(), + self.api.uint16(): pd.UInt16Dtype(), + self.api.uint32(): pd.UInt32Dtype(), + self.api.uint64(): pd.UInt64Dtype(), + self.api.bool_(): pd.BooleanDtype(), + self.api.string(): pd.StringDtype(), + } + to_pandas_kwargs["types_mapper"] = mapping.get + manager = get_option("mode.data_manager") + if manager == "array": + to_pandas_kwargs["split_blocks"] = True # type: ignore[assignment] + + path_or_handle, handles, kwargs["filesystem"] = _get_path_or_handle( + path, + kwargs.pop("filesystem", None), + storage_options=storage_options, + mode="rb", + ) + try: + result = self.api.parquet.read_table( + path_or_handle, columns=columns, **kwargs + ).to_pandas(**to_pandas_kwargs) + if manager == "array": + result = result._as_manager("array", copy=False) + return result + finally: + if handles is not None: + handles.close() + + +class FastParquetImpl(BaseImpl): + def __init__(self): + # since pandas is a dependency of fastparquet + # we need to import on first use + fastparquet = import_optional_dependency( + "fastparquet", extra="fastparquet is required for parquet support." + ) + self.api = fastparquet + + def write( + self, + df: DataFrame, + path, + compression="snappy", + index=None, + partition_cols=None, + storage_options: StorageOptions = None, + **kwargs, + ) -> None: + self.validate_dataframe(df) + # thriftpy/protocol/compact.py:339: + # DeprecationWarning: tostring() is deprecated. + # Use tobytes() instead. + + if "partition_on" in kwargs and partition_cols is not None: + raise ValueError( + "Cannot use both partition_on and " + "partition_cols. Use partition_cols for partitioning data" + ) + elif "partition_on" in kwargs: + partition_cols = kwargs.pop("partition_on") + + if partition_cols is not None: + kwargs["file_scheme"] = "hive" + + # cannot use get_handle as write() does not accept file buffers + path = stringify_path(path) + if is_fsspec_url(path): + fsspec = import_optional_dependency("fsspec") + + # if filesystem is provided by fsspec, file must be opened in 'wb' mode. + kwargs["open_with"] = lambda path, _: fsspec.open( + path, "wb", **(storage_options or {}) + ).open() + elif storage_options: + raise ValueError( + "storage_options passed with file object or non-fsspec file path" + ) + + with catch_warnings(record=True): + self.api.write( + path, + df, + compression=compression, + write_index=index, + partition_on=partition_cols, + **kwargs, + ) + + def read( + self, path, columns=None, storage_options: StorageOptions = None, **kwargs + ) -> DataFrame: + parquet_kwargs: dict[str, Any] = {} + use_nullable_dtypes = kwargs.pop("use_nullable_dtypes", False) + if Version(self.api.__version__) >= Version("0.7.1"): + # We are disabling nullable dtypes for fastparquet pending discussion + parquet_kwargs["pandas_nulls"] = False + if use_nullable_dtypes: + raise ValueError( + "The 'use_nullable_dtypes' argument is not supported for the " + "fastparquet engine" + ) + path = stringify_path(path) + handles = None + if is_fsspec_url(path): + fsspec = import_optional_dependency("fsspec") + + if Version(self.api.__version__) > Version("0.6.1"): + parquet_kwargs["fs"] = fsspec.open( + path, "rb", **(storage_options or {}) + ).fs + else: + parquet_kwargs["open_with"] = lambda path, _: fsspec.open( + path, "rb", **(storage_options or {}) + ).open() + elif isinstance(path, str) and not os.path.isdir(path): + # use get_handle only when we are very certain that it is not a directory + # fsspec resources can also point to directories + # this branch is used for example when reading from non-fsspec URLs + handles = get_handle( + path, "rb", is_text=False, storage_options=storage_options + ) + path = handles.handle + + parquet_file = self.api.ParquetFile(path, **parquet_kwargs) + + result = parquet_file.to_pandas(columns=columns, **kwargs) + + if handles is not None: + handles.close() + return result + + +@doc(storage_options=_shared_docs["storage_options"]) +def to_parquet( + df: DataFrame, + path: FilePath | WriteBuffer[bytes] | None = None, + engine: str = "auto", + compression: str | None = "snappy", + index: bool | None = None, + storage_options: StorageOptions = None, + partition_cols: list[str] | None = None, + **kwargs, +) -> bytes | None: + """ + Write a DataFrame to the parquet format. + + Parameters + ---------- + df : DataFrame + path : str, path object, file-like object, or None, default None + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``write()`` function. If None, the result is + returned as bytes. If a string, it will be used as Root Directory path + when writing a partitioned dataset. The engine fastparquet does not + accept file-like objects. + + .. versionchanged:: 1.2.0 + + engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto' + Parquet library to use. If 'auto', then the option + ``io.parquet.engine`` is used. The default ``io.parquet.engine`` + behavior is to try 'pyarrow', falling back to 'fastparquet' if + 'pyarrow' is unavailable. + compression : {{'snappy', 'gzip', 'brotli', 'lz4', 'zstd', None}}, + default 'snappy'. Name of the compression to use. Use ``None`` + for no compression. The supported compression methods actually + depend on which engine is used. For 'pyarrow', 'snappy', 'gzip', + 'brotli', 'lz4', 'zstd' are all supported. For 'fastparquet', + only 'gzip' and 'snappy' are supported. + index : bool, default None + If ``True``, include the dataframe's index(es) in the file output. If + ``False``, they will not be written to the file. + If ``None``, similar to ``True`` the dataframe's index(es) + will be saved. However, instead of being saved as values, + the RangeIndex will be stored as a range in the metadata so it + doesn't require much space and is faster. Other indexes will + be included as columns in the file output. + partition_cols : str or list, optional, default None + Column names by which to partition the dataset. + Columns are partitioned in the order they are given. + Must be None if path is not a string. + {storage_options} + + .. versionadded:: 1.2.0 + + kwargs + Additional keyword arguments passed to the engine + + Returns + ------- + bytes if no path argument is provided else None + """ + if isinstance(partition_cols, str): + partition_cols = [partition_cols] + impl = get_engine(engine) + + path_or_buf: FilePath | WriteBuffer[bytes] = io.BytesIO() if path is None else path + + impl.write( + df, + path_or_buf, + compression=compression, + index=index, + partition_cols=partition_cols, + storage_options=storage_options, + **kwargs, + ) + + if path is None: + assert isinstance(path_or_buf, io.BytesIO) + return path_or_buf.getvalue() + else: + return None + + +@doc(storage_options=_shared_docs["storage_options"]) +def read_parquet( + path, + engine: str = "auto", + columns=None, + storage_options: StorageOptions = None, + use_nullable_dtypes: bool = False, + **kwargs, +) -> DataFrame: + """ + Load a parquet object from the file path, returning a DataFrame. + + Parameters + ---------- + path : str, path object or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``read()`` function. + The string could be a URL. Valid URL schemes include http, ftp, s3, + gs, and file. For file URLs, a host is expected. A local file could be: + ``file://localhost/path/to/table.parquet``. + A file URL can also be a path to a directory that contains multiple + partitioned parquet files. Both pyarrow and fastparquet support + paths to directories as well as file URLs. A directory path could be: + ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``. + engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto' + Parquet library to use. If 'auto', then the option + ``io.parquet.engine`` is used. The default ``io.parquet.engine`` + behavior is to try 'pyarrow', falling back to 'fastparquet' if + 'pyarrow' is unavailable. + columns : list, default=None + If not None, only these columns will be read from the file. + + {storage_options} + + .. versionadded:: 1.3.0 + + use_nullable_dtypes : bool, default False + If True, use dtypes that use ``pd.NA`` as missing value indicator + for the resulting DataFrame. (only applicable for the ``pyarrow`` + engine) + As new dtypes are added that support ``pd.NA`` in the future, the + output with this option will change to use those dtypes. + Note: this is an experimental option, and behaviour (e.g. additional + support dtypes) may change without notice. + + .. versionadded:: 1.2.0 + + **kwargs + Any additional kwargs are passed to the engine. + + Returns + ------- + DataFrame + """ + impl = get_engine(engine) + + return impl.read( + path, + columns=columns, + storage_options=storage_options, + use_nullable_dtypes=use_nullable_dtypes, + **kwargs, + ) diff --git a/.local/lib/python3.8/site-packages/pandas/io/parsers/__init__.py b/.local/lib/python3.8/site-packages/pandas/io/parsers/__init__.py new file mode 100644 index 0000000..ff11968 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/parsers/__init__.py @@ -0,0 +1,9 @@ +from pandas.io.parsers.readers import ( + TextFileReader, + TextParser, + read_csv, + read_fwf, + read_table, +) + +__all__ = ["TextFileReader", "TextParser", "read_csv", "read_fwf", "read_table"] diff --git a/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..b5e3840 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/arrow_parser_wrapper.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/arrow_parser_wrapper.cpython-38.pyc new file mode 100644 index 0000000..184f4dd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/arrow_parser_wrapper.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/base_parser.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/base_parser.cpython-38.pyc new file mode 100644 index 0000000..ccbaba0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/base_parser.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/c_parser_wrapper.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/c_parser_wrapper.cpython-38.pyc new file mode 100644 index 0000000..084a999 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/c_parser_wrapper.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/python_parser.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/python_parser.cpython-38.pyc new file mode 100644 index 0000000..b336c41 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/python_parser.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/readers.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/readers.cpython-38.pyc new file mode 100644 index 0000000..5b27f71 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/parsers/__pycache__/readers.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/parsers/arrow_parser_wrapper.py b/.local/lib/python3.8/site-packages/pandas/io/parsers/arrow_parser_wrapper.py new file mode 100644 index 0000000..618da9d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/parsers/arrow_parser_wrapper.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +from pandas._typing import ReadBuffer +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.inference import is_integer + +from pandas.core.frame import DataFrame + +from pandas.io.parsers.base_parser import ParserBase + + +class ArrowParserWrapper(ParserBase): + """ + Wrapper for the pyarrow engine for read_csv() + """ + + def __init__(self, src: ReadBuffer[bytes], **kwds): + super().__init__(kwds) + self.kwds = kwds + self.src = src + + self._parse_kwds() + + def _parse_kwds(self): + """ + Validates keywords before passing to pyarrow. + """ + encoding: str | None = self.kwds.get("encoding") + self.encoding = "utf-8" if encoding is None else encoding + + self.usecols, self.usecols_dtype = self._validate_usecols_arg( + self.kwds["usecols"] + ) + na_values = self.kwds["na_values"] + if isinstance(na_values, dict): + raise ValueError( + "The pyarrow engine doesn't support passing a dict for na_values" + ) + self.na_values = list(self.kwds["na_values"]) + + def _get_pyarrow_options(self): + """ + Rename some arguments to pass to pyarrow + """ + mapping = { + "usecols": "include_columns", + "na_values": "null_values", + "escapechar": "escape_char", + "skip_blank_lines": "ignore_empty_lines", + } + for pandas_name, pyarrow_name in mapping.items(): + if pandas_name in self.kwds and self.kwds.get(pandas_name) is not None: + self.kwds[pyarrow_name] = self.kwds.pop(pandas_name) + + self.parse_options = { + option_name: option_value + for option_name, option_value in self.kwds.items() + if option_value is not None + and option_name + in ("delimiter", "quote_char", "escape_char", "ignore_empty_lines") + } + self.convert_options = { + option_name: option_value + for option_name, option_value in self.kwds.items() + if option_value is not None + and option_name + in ("include_columns", "null_values", "true_values", "false_values") + } + self.read_options = { + "autogenerate_column_names": self.header is None, + "skip_rows": self.header + if self.header is not None + else self.kwds["skiprows"], + } + + def _finalize_output(self, frame: DataFrame) -> DataFrame: + """ + Processes data read in based on kwargs. + + Parameters + ---------- + frame: DataFrame + The DataFrame to process. + + Returns + ------- + DataFrame + The processed DataFrame. + """ + num_cols = len(frame.columns) + multi_index_named = True + if self.header is None: + if self.names is None: + if self.prefix is not None: + self.names = [f"{self.prefix}{i}" for i in range(num_cols)] + elif self.header is None: + self.names = range(num_cols) + if len(self.names) != num_cols: + # usecols is passed through to pyarrow, we only handle index col here + # The only way self.names is not the same length as number of cols is + # if we have int index_col. We should just pad the names(they will get + # removed anyways) to expected length then. + self.names = list(range(num_cols - len(self.names))) + self.names + multi_index_named = False + frame.columns = self.names + # we only need the frame not the names + # error: Incompatible types in assignment (expression has type + # "Union[List[Union[Union[str, int, float, bool], Union[Period, Timestamp, + # Timedelta, Any]]], Index]", variable has type "Index") [assignment] + frame.columns, frame = self._do_date_conversions( # type: ignore[assignment] + frame.columns, frame + ) + if self.index_col is not None: + for i, item in enumerate(self.index_col): + if is_integer(item): + self.index_col[i] = frame.columns[item] + else: + # String case + if item not in frame.columns: + raise ValueError(f"Index {item} invalid") + frame.set_index(self.index_col, drop=True, inplace=True) + # Clear names if headerless and no name given + if self.header is None and not multi_index_named: + frame.index.names = [None] * len(frame.index.names) + + if self.kwds.get("dtype") is not None: + try: + frame = frame.astype(self.kwds.get("dtype")) + except TypeError as e: + # GH#44901 reraise to keep api consistent + raise ValueError(e) + return frame + + def read(self) -> DataFrame: + """ + Reads the contents of a CSV file into a DataFrame and + processes it according to the kwargs passed in the + constructor. + + Returns + ------- + DataFrame + The DataFrame created from the CSV file. + """ + pyarrow_csv = import_optional_dependency("pyarrow.csv") + self._get_pyarrow_options() + + table = pyarrow_csv.read_csv( + self.src, + read_options=pyarrow_csv.ReadOptions(**self.read_options), + parse_options=pyarrow_csv.ParseOptions(**self.parse_options), + convert_options=pyarrow_csv.ConvertOptions(**self.convert_options), + ) + + frame = table.to_pandas() + return self._finalize_output(frame) diff --git a/.local/lib/python3.8/site-packages/pandas/io/parsers/base_parser.py b/.local/lib/python3.8/site-packages/pandas/io/parsers/base_parser.py new file mode 100644 index 0000000..2a3a2b0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/parsers/base_parser.py @@ -0,0 +1,1341 @@ +from __future__ import annotations + +from collections import defaultdict +from copy import copy +import csv +import datetime +from enum import Enum +import itertools +from typing import ( + Callable, + DefaultDict, + Hashable, + Iterable, + List, + Mapping, + Sequence, + Tuple, + cast, + final, + overload, +) +import warnings + +import numpy as np + +import pandas._libs.lib as lib +import pandas._libs.ops as libops +import pandas._libs.parsers as parsers +from pandas._libs.parsers import STR_NA_VALUES +from pandas._libs.tslibs import parsing +from pandas._typing import ( + ArrayLike, + DtypeArg, +) +from pandas.errors import ( + ParserError, + ParserWarning, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import astype_nansafe +from pandas.core.dtypes.common import ( + ensure_object, + is_bool_dtype, + is_categorical_dtype, + is_dict_like, + is_dtype_equal, + is_extension_array_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_object_dtype, + is_scalar, + is_string_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.missing import isna + +from pandas import DataFrame +from pandas.core import algorithms +from pandas.core.arrays import Categorical +from pandas.core.indexes.api import ( + Index, + MultiIndex, + ensure_index_from_sequences, +) +from pandas.core.series import Series +from pandas.core.tools import datetimes as tools + +from pandas.io.date_converters import generic_parser + + +class ParserBase: + class BadLineHandleMethod(Enum): + ERROR = 0 + WARN = 1 + SKIP = 2 + + _implicit_index: bool = False + _first_chunk: bool + + def __init__(self, kwds): + + self.names = kwds.get("names") + self.orig_names: list | None = None + self.prefix = kwds.pop("prefix", None) + + self.index_col = kwds.get("index_col", None) + self.unnamed_cols: set = set() + self.index_names: list | None = None + self.col_names = None + + self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False)) + self._parse_date_cols: Iterable = [] + self.date_parser = kwds.pop("date_parser", None) + self.dayfirst = kwds.pop("dayfirst", False) + self.keep_date_col = kwds.pop("keep_date_col", False) + + self.na_values = kwds.get("na_values") + self.na_fvalues = kwds.get("na_fvalues") + self.na_filter = kwds.get("na_filter", False) + self.keep_default_na = kwds.get("keep_default_na", True) + + self.dtype = copy(kwds.get("dtype", None)) + + self.true_values = kwds.get("true_values") + self.false_values = kwds.get("false_values") + self.mangle_dupe_cols = kwds.get("mangle_dupe_cols", True) + self.infer_datetime_format = kwds.pop("infer_datetime_format", False) + self.cache_dates = kwds.pop("cache_dates", True) + + self._date_conv = _make_date_converter( + date_parser=self.date_parser, + dayfirst=self.dayfirst, + infer_datetime_format=self.infer_datetime_format, + cache_dates=self.cache_dates, + ) + + # validate header options for mi + self.header = kwds.get("header") + if isinstance(self.header, (list, tuple, np.ndarray)): + if not all(map(is_integer, self.header)): + raise ValueError("header must be integer or list of integers") + if any(i < 0 for i in self.header): + raise ValueError( + "cannot specify multi-index header with negative integers" + ) + if kwds.get("usecols"): + raise ValueError( + "cannot specify usecols when specifying a multi-index header" + ) + if kwds.get("names"): + raise ValueError( + "cannot specify names when specifying a multi-index header" + ) + + # validate index_col that only contains integers + if self.index_col is not None: + is_sequence = isinstance(self.index_col, (list, tuple, np.ndarray)) + if not ( + is_sequence + and all(map(is_integer, self.index_col)) + or is_integer(self.index_col) + ): + raise ValueError( + "index_col must only contain row numbers " + "when specifying a multi-index header" + ) + elif self.header is not None: + # GH 27394 + if self.prefix is not None: + raise ValueError( + "Argument prefix must be None if argument header is not None" + ) + # GH 16338 + elif not is_integer(self.header): + raise ValueError("header must be integer or list of integers") + # GH 27779 + elif self.header < 0: + raise ValueError( + "Passing negative integer to header is invalid. " + "For no header, use header=None instead" + ) + + self._name_processed = False + + self._first_chunk = True + + self.usecols, self.usecols_dtype = self._validate_usecols_arg(kwds["usecols"]) + + # Fallback to error to pass a sketchy test(test_override_set_noconvert_columns) + # Normally, this arg would get pre-processed earlier on + self.on_bad_lines = kwds.get("on_bad_lines", self.BadLineHandleMethod.ERROR) + + def _validate_parse_dates_presence(self, columns: Sequence[Hashable]) -> Iterable: + """ + Check if parse_dates are in columns. + + If user has provided names for parse_dates, check if those columns + are available. + + Parameters + ---------- + columns : list + List of names of the dataframe. + + Returns + ------- + The names of the columns which will get parsed later if a dict or list + is given as specification. + + Raises + ------ + ValueError + If column to parse_date is not in dataframe. + + """ + cols_needed: Iterable + if is_dict_like(self.parse_dates): + cols_needed = itertools.chain(*self.parse_dates.values()) + elif is_list_like(self.parse_dates): + # a column in parse_dates could be represented + # ColReference = Union[int, str] + # DateGroups = List[ColReference] + # ParseDates = Union[DateGroups, List[DateGroups], + # Dict[ColReference, DateGroups]] + cols_needed = itertools.chain.from_iterable( + col if is_list_like(col) and not isinstance(col, tuple) else [col] + for col in self.parse_dates + ) + else: + cols_needed = [] + + cols_needed = list(cols_needed) + + # get only columns that are references using names (str), not by index + missing_cols = ", ".join( + sorted( + { + col + for col in cols_needed + if isinstance(col, str) and col not in columns + } + ) + ) + if missing_cols: + raise ValueError( + f"Missing column provided to 'parse_dates': '{missing_cols}'" + ) + # Convert positions to actual column names + return [ + col if (isinstance(col, str) or col in columns) else columns[col] + for col in cols_needed + ] + + def close(self): + pass + + @final + @property + def _has_complex_date_col(self) -> bool: + return isinstance(self.parse_dates, dict) or ( + isinstance(self.parse_dates, list) + and len(self.parse_dates) > 0 + and isinstance(self.parse_dates[0], list) + ) + + @final + def _should_parse_dates(self, i: int) -> bool: + if isinstance(self.parse_dates, bool): + return self.parse_dates + else: + if self.index_names is not None: + name = self.index_names[i] + else: + name = None + j = i if self.index_col is None else self.index_col[i] + + if is_scalar(self.parse_dates): + return (j == self.parse_dates) or ( + name is not None and name == self.parse_dates + ) + else: + return (j in self.parse_dates) or ( + name is not None and name in self.parse_dates + ) + + @final + def _extract_multi_indexer_columns( + self, + header, + index_names: list | None, + passed_names: bool = False, + ): + """ + Extract and return the names, index_names, col_names if the column + names are a MultiIndex. + + Parameters + ---------- + header: list of lists + The header rows + index_names: list, optional + The names of the future index + passed_names: bool, default False + A flag specifying if names where passed + + """ + if len(header) < 2: + return header[0], index_names, None, passed_names + + # the names are the tuples of the header that are not the index cols + # 0 is the name of the index, assuming index_col is a list of column + # numbers + ic = self.index_col + if ic is None: + ic = [] + + if not isinstance(ic, (list, tuple, np.ndarray)): + ic = [ic] + sic = set(ic) + + # clean the index_names + index_names = header.pop(-1) + index_names, _, _ = self._clean_index_names( + index_names, self.index_col, self.unnamed_cols + ) + + # extract the columns + field_count = len(header[0]) + + # check if header lengths are equal + if not all(len(header_iter) == field_count for header_iter in header[1:]): + raise ParserError("Header rows must have an equal number of columns.") + + def extract(r): + return tuple(r[i] for i in range(field_count) if i not in sic) + + columns = list(zip(*(extract(r) for r in header))) + names = columns.copy() + for single_ic in sorted(ic): + names.insert(single_ic, single_ic) + + # Clean the column names (if we have an index_col). + if len(ic): + col_names = [ + r[ic[0]] + if ((r[ic[0]] is not None) and r[ic[0]] not in self.unnamed_cols) + else None + for r in header + ] + else: + col_names = [None] * len(header) + + passed_names = True + + return names, index_names, col_names, passed_names + + @final + def _maybe_dedup_names(self, names: Sequence[Hashable]) -> Sequence[Hashable]: + # see gh-7160 and gh-9424: this helps to provide + # immediate alleviation of the duplicate names + # issue and appears to be satisfactory to users, + # but ultimately, not needing to butcher the names + # would be nice! + if self.mangle_dupe_cols: + names = list(names) # so we can index + counts: DefaultDict[Hashable, int] = defaultdict(int) + is_potential_mi = _is_potential_multi_index(names, self.index_col) + + for i, col in enumerate(names): + cur_count = counts[col] + + while cur_count > 0: + counts[col] = cur_count + 1 + + if is_potential_mi: + # for mypy + assert isinstance(col, tuple) + col = col[:-1] + (f"{col[-1]}.{cur_count}",) + else: + col = f"{col}.{cur_count}" + cur_count = counts[col] + + names[i] = col + counts[col] = cur_count + 1 + + return names + + @final + def _maybe_make_multi_index_columns( + self, + columns: Sequence[Hashable], + col_names: Sequence[Hashable] | None = None, + ) -> Sequence[Hashable] | MultiIndex: + # possibly create a column mi here + if _is_potential_multi_index(columns): + list_columns = cast(List[Tuple], columns) + return MultiIndex.from_tuples(list_columns, names=col_names) + return columns + + @final + def _make_index(self, data, alldata, columns, indexnamerow=False): + if not is_index_col(self.index_col) or not self.index_col: + index = None + + elif not self._has_complex_date_col: + index = self._get_simple_index(alldata, columns) + index = self._agg_index(index) + elif self._has_complex_date_col: + if not self._name_processed: + (self.index_names, _, self.index_col) = self._clean_index_names( + list(columns), self.index_col, self.unnamed_cols + ) + self._name_processed = True + index = self._get_complex_date_index(data, columns) + index = self._agg_index(index, try_parse_dates=False) + + # add names for the index + if indexnamerow: + coffset = len(indexnamerow) - len(columns) + assert index is not None + index = index.set_names(indexnamerow[:coffset]) + + # maybe create a mi on the columns + columns = self._maybe_make_multi_index_columns(columns, self.col_names) + + return index, columns + + @final + def _get_simple_index(self, data, columns): + def ix(col): + if not isinstance(col, str): + return col + raise ValueError(f"Index {col} invalid") + + to_remove = [] + index = [] + for idx in self.index_col: + i = ix(idx) + to_remove.append(i) + index.append(data[i]) + + # remove index items from content and columns, don't pop in + # loop + for i in sorted(to_remove, reverse=True): + data.pop(i) + if not self._implicit_index: + columns.pop(i) + + return index + + @final + def _get_complex_date_index(self, data, col_names): + def _get_name(icol): + if isinstance(icol, str): + return icol + + if col_names is None: + raise ValueError(f"Must supply column order to use {icol!s} as index") + + for i, c in enumerate(col_names): + if i == icol: + return c + + to_remove = [] + index = [] + for idx in self.index_col: + name = _get_name(idx) + to_remove.append(name) + index.append(data[name]) + + # remove index items from content and columns, don't pop in + # loop + for c in sorted(to_remove, reverse=True): + data.pop(c) + col_names.remove(c) + + return index + + def _clean_mapping(self, mapping): + """converts col numbers to names""" + if not isinstance(mapping, dict): + return mapping + clean = {} + for col, v in mapping.items(): + # for mypy + assert self.orig_names is not None + if isinstance(col, int) and col not in self.orig_names: + col = self.orig_names[col] + clean[col] = v + return clean + + @final + def _agg_index(self, index, try_parse_dates: bool = True) -> Index: + arrays = [] + + for i, arr in enumerate(index): + + if try_parse_dates and self._should_parse_dates(i): + arr = self._date_conv(arr) + + if self.na_filter: + col_na_values = self.na_values + col_na_fvalues = self.na_fvalues + else: + col_na_values = set() + col_na_fvalues = set() + + if isinstance(self.na_values, dict): + assert self.index_names is not None + col_name = self.index_names[i] + if col_name is not None: + col_na_values, col_na_fvalues = _get_na_values( + col_name, self.na_values, self.na_fvalues, self.keep_default_na + ) + + clean_dtypes = self._clean_mapping(self.dtype) + + cast_type = None + if isinstance(clean_dtypes, dict) and self.index_names is not None: + cast_type = clean_dtypes.get(self.index_names[i], None) + + try_num_bool = not (cast_type and is_string_dtype(cast_type)) + + arr, _ = self._infer_types( + arr, col_na_values | col_na_fvalues, try_num_bool + ) + arrays.append(arr) + + names = self.index_names + index = ensure_index_from_sequences(arrays, names) + + return index + + @final + def _convert_to_ndarrays( + self, + dct: Mapping, + na_values, + na_fvalues, + verbose: bool = False, + converters=None, + dtypes=None, + ): + result = {} + for c, values in dct.items(): + conv_f = None if converters is None else converters.get(c, None) + if isinstance(dtypes, dict): + cast_type = dtypes.get(c, None) + else: + # single dtype or None + cast_type = dtypes + + if self.na_filter: + col_na_values, col_na_fvalues = _get_na_values( + c, na_values, na_fvalues, self.keep_default_na + ) + else: + col_na_values, col_na_fvalues = set(), set() + + if c in self._parse_date_cols: + # GH#26203 Do not convert columns which get converted to dates + # but replace nans to ensure to_datetime works + mask = algorithms.isin(values, set(col_na_values) | col_na_fvalues) + np.putmask(values, mask, np.nan) + result[c] = values + continue + + if conv_f is not None: + # conv_f applied to data before inference + if cast_type is not None: + warnings.warn( + ( + "Both a converter and dtype were specified " + f"for column {c} - only the converter will be used." + ), + ParserWarning, + stacklevel=find_stack_level(), + ) + + try: + values = lib.map_infer(values, conv_f) + except ValueError: + # error: Argument 2 to "isin" has incompatible type "List[Any]"; + # expected "Union[Union[ExtensionArray, ndarray], Index, Series]" + mask = algorithms.isin( + values, list(na_values) # type: ignore[arg-type] + ).view(np.uint8) + values = lib.map_infer_mask(values, conv_f, mask) + + cvals, na_count = self._infer_types( + values, set(col_na_values) | col_na_fvalues, try_num_bool=False + ) + else: + is_ea = is_extension_array_dtype(cast_type) + is_str_or_ea_dtype = is_ea or is_string_dtype(cast_type) + # skip inference if specified dtype is object + # or casting to an EA + try_num_bool = not (cast_type and is_str_or_ea_dtype) + + # general type inference and conversion + cvals, na_count = self._infer_types( + values, set(col_na_values) | col_na_fvalues, try_num_bool + ) + + # type specified in dtype param or cast_type is an EA + if cast_type and ( + not is_dtype_equal(cvals, cast_type) + or is_extension_array_dtype(cast_type) + ): + if not is_ea and na_count > 0: + try: + if is_bool_dtype(cast_type): + raise ValueError( + f"Bool column has NA values in column {c}" + ) + except (AttributeError, TypeError): + # invalid input to is_bool_dtype + pass + cast_type = pandas_dtype(cast_type) + cvals = self._cast_types(cvals, cast_type, c) + + result[c] = cvals + if verbose and na_count: + print(f"Filled {na_count} NA values in column {c!s}") + return result + + @final + def _set_noconvert_dtype_columns( + self, col_indices: list[int], names: Sequence[Hashable] + ) -> set[int]: + """ + Set the columns that should not undergo dtype conversions. + + Currently, any column that is involved with date parsing will not + undergo such conversions. If usecols is specified, the positions of the columns + not to cast is relative to the usecols not to all columns. + + Parameters + ---------- + col_indices: The indices specifying order and positions of the columns + names: The column names which order is corresponding with the order + of col_indices + + Returns + ------- + A set of integers containing the positions of the columns not to convert. + """ + usecols: list[int] | list[str] | None + noconvert_columns = set() + if self.usecols_dtype == "integer": + # A set of integers will be converted to a list in + # the correct order every single time. + usecols = sorted(self.usecols) + elif callable(self.usecols) or self.usecols_dtype not in ("empty", None): + # The names attribute should have the correct columns + # in the proper order for indexing with parse_dates. + usecols = col_indices + else: + # Usecols is empty. + usecols = None + + def _set(x) -> int: + if usecols is not None and is_integer(x): + x = usecols[x] + + if not is_integer(x): + x = col_indices[names.index(x)] + + return x + + if isinstance(self.parse_dates, list): + for val in self.parse_dates: + if isinstance(val, list): + for k in val: + noconvert_columns.add(_set(k)) + else: + noconvert_columns.add(_set(val)) + + elif isinstance(self.parse_dates, dict): + for val in self.parse_dates.values(): + if isinstance(val, list): + for k in val: + noconvert_columns.add(_set(k)) + else: + noconvert_columns.add(_set(val)) + + elif self.parse_dates: + if isinstance(self.index_col, list): + for k in self.index_col: + noconvert_columns.add(_set(k)) + elif self.index_col is not None: + noconvert_columns.add(_set(self.index_col)) + + return noconvert_columns + + def _infer_types(self, values, na_values, try_num_bool=True): + """ + Infer types of values, possibly casting + + Parameters + ---------- + values : ndarray + na_values : set + try_num_bool : bool, default try + try to cast values to numeric (first preference) or boolean + + Returns + ------- + converted : ndarray + na_count : int + """ + na_count = 0 + if issubclass(values.dtype.type, (np.number, np.bool_)): + # If our array has numeric dtype, we don't have to check for strings in isin + na_values = np.array([val for val in na_values if not isinstance(val, str)]) + mask = algorithms.isin(values, na_values) + na_count = mask.astype("uint8", copy=False).sum() + if na_count > 0: + if is_integer_dtype(values): + values = values.astype(np.float64) + np.putmask(values, mask, np.nan) + return values, na_count + + if try_num_bool and is_object_dtype(values.dtype): + # exclude e.g DatetimeIndex here + try: + result, _ = lib.maybe_convert_numeric(values, na_values, False) + except (ValueError, TypeError): + # e.g. encountering datetime string gets ValueError + # TypeError can be raised in floatify + result = values + na_count = parsers.sanitize_objects(result, na_values) + else: + na_count = isna(result).sum() + else: + result = values + if values.dtype == np.object_: + na_count = parsers.sanitize_objects(values, na_values) + + if result.dtype == np.object_ and try_num_bool: + result, _ = libops.maybe_convert_bool( + np.asarray(values), + true_values=self.true_values, + false_values=self.false_values, + ) + + return result, na_count + + def _cast_types(self, values, cast_type, column): + """ + Cast values to specified type + + Parameters + ---------- + values : ndarray + cast_type : string or np.dtype + dtype to cast values to + column : string + column name - used only for error reporting + + Returns + ------- + converted : ndarray + """ + if is_categorical_dtype(cast_type): + known_cats = ( + isinstance(cast_type, CategoricalDtype) + and cast_type.categories is not None + ) + + if not is_object_dtype(values) and not known_cats: + # TODO: this is for consistency with + # c-parser which parses all categories + # as strings + + values = astype_nansafe(values, np.dtype(str)) + + cats = Index(values).unique().dropna() + values = Categorical._from_inferred_categories( + cats, cats.get_indexer(values), cast_type, true_values=self.true_values + ) + + # use the EA's implementation of casting + elif is_extension_array_dtype(cast_type): + # ensure cast_type is an actual dtype and not a string + cast_type = pandas_dtype(cast_type) + array_type = cast_type.construct_array_type() + try: + if is_bool_dtype(cast_type): + return array_type._from_sequence_of_strings( + values, + dtype=cast_type, + true_values=self.true_values, + false_values=self.false_values, + ) + else: + return array_type._from_sequence_of_strings(values, dtype=cast_type) + except NotImplementedError as err: + raise NotImplementedError( + f"Extension Array: {array_type} must implement " + "_from_sequence_of_strings in order to be used in parser methods" + ) from err + + else: + try: + values = astype_nansafe(values, cast_type, copy=True, skipna=True) + except ValueError as err: + raise ValueError( + f"Unable to convert column {column} to type {cast_type}" + ) from err + return values + + @overload + def _do_date_conversions( + self, + names: Index, + data: DataFrame, + ) -> tuple[Sequence[Hashable] | Index, DataFrame]: + ... + + @overload + def _do_date_conversions( + self, + names: Sequence[Hashable], + data: Mapping[Hashable, ArrayLike], + ) -> tuple[Sequence[Hashable], Mapping[Hashable, ArrayLike]]: + ... + + def _do_date_conversions( + self, + names: Sequence[Hashable] | Index, + data: Mapping[Hashable, ArrayLike] | DataFrame, + ) -> tuple[Sequence[Hashable] | Index, Mapping[Hashable, ArrayLike] | DataFrame]: + # returns data, columns + + if self.parse_dates is not None: + data, names = _process_date_conversion( + data, + self._date_conv, + self.parse_dates, + self.index_col, + self.index_names, + names, + keep_date_col=self.keep_date_col, + ) + + return names, data + + def _check_data_length( + self, + columns: Sequence[Hashable], + data: Sequence[ArrayLike], + ) -> None: + """Checks if length of data is equal to length of column names. + + One set of trailing commas is allowed. self.index_col not False + results in a ParserError previously when lengths do not match. + + Parameters + ---------- + columns: list of column names + data: list of array-likes containing the data column-wise. + """ + if not self.index_col and len(columns) != len(data) and columns: + if len(columns) == len(data) - 1 and np.all( + (is_object_dtype(data[-1]) and data[-1] == "") | isna(data[-1]) + ): + return + warnings.warn( + "Length of header or names does not match length of data. This leads " + "to a loss of data with index_col=False.", + ParserWarning, + stacklevel=find_stack_level(), + ) + + @overload + def _evaluate_usecols( + self, + usecols: set[int] | Callable[[Hashable], object], + names: Sequence[Hashable], + ) -> set[int]: + ... + + @overload + def _evaluate_usecols( + self, usecols: set[str], names: Sequence[Hashable] + ) -> set[str]: + ... + + def _evaluate_usecols( + self, + usecols: Callable[[Hashable], object] | set[str] | set[int], + names: Sequence[Hashable], + ) -> set[str] | set[int]: + """ + Check whether or not the 'usecols' parameter + is a callable. If so, enumerates the 'names' + parameter and returns a set of indices for + each entry in 'names' that evaluates to True. + If not a callable, returns 'usecols'. + """ + if callable(usecols): + return {i for i, name in enumerate(names) if usecols(name)} + return usecols + + def _validate_usecols_names(self, usecols, names): + """ + Validates that all usecols are present in a given + list of names. If not, raise a ValueError that + shows what usecols are missing. + + Parameters + ---------- + usecols : iterable of usecols + The columns to validate are present in names. + names : iterable of names + The column names to check against. + + Returns + ------- + usecols : iterable of usecols + The `usecols` parameter if the validation succeeds. + + Raises + ------ + ValueError : Columns were missing. Error message will list them. + """ + missing = [c for c in usecols if c not in names] + if len(missing) > 0: + raise ValueError( + f"Usecols do not match columns, columns expected but not found: " + f"{missing}" + ) + + return usecols + + def _validate_usecols_arg(self, usecols): + """ + Validate the 'usecols' parameter. + + Checks whether or not the 'usecols' parameter contains all integers + (column selection by index), strings (column by name) or is a callable. + Raises a ValueError if that is not the case. + + Parameters + ---------- + usecols : list-like, callable, or None + List of columns to use when parsing or a callable that can be used + to filter a list of table columns. + + Returns + ------- + usecols_tuple : tuple + A tuple of (verified_usecols, usecols_dtype). + + 'verified_usecols' is either a set if an array-like is passed in or + 'usecols' if a callable or None is passed in. + + 'usecols_dtype` is the inferred dtype of 'usecols' if an array-like + is passed in or None if a callable or None is passed in. + """ + msg = ( + "'usecols' must either be list-like of all strings, all unicode, " + "all integers or a callable." + ) + if usecols is not None: + if callable(usecols): + return usecols, None + + if not is_list_like(usecols): + # see gh-20529 + # + # Ensure it is iterable container but not string. + raise ValueError(msg) + + usecols_dtype = lib.infer_dtype(usecols, skipna=False) + + if usecols_dtype not in ("empty", "integer", "string"): + raise ValueError(msg) + + usecols = set(usecols) + + return usecols, usecols_dtype + return usecols, None + + def _clean_index_names(self, columns, index_col, unnamed_cols): + if not is_index_col(index_col): + return None, columns, index_col + + columns = list(columns) + + # In case of no rows and multiindex columns we have to set index_names to + # list of Nones GH#38292 + if not columns: + return [None] * len(index_col), columns, index_col + + cp_cols = list(columns) + index_names: list[str | int | None] = [] + + # don't mutate + index_col = list(index_col) + + for i, c in enumerate(index_col): + if isinstance(c, str): + index_names.append(c) + for j, name in enumerate(cp_cols): + if name == c: + index_col[i] = j + columns.remove(name) + break + else: + name = cp_cols[c] + columns.remove(name) + index_names.append(name) + + # Only clean index names that were placeholders. + for i, name in enumerate(index_names): + if isinstance(name, str) and name in unnamed_cols: + index_names[i] = None + + return index_names, columns, index_col + + def _get_empty_meta( + self, columns, index_col, index_names, dtype: DtypeArg | None = None + ): + columns = list(columns) + + # Convert `dtype` to a defaultdict of some kind. + # This will enable us to write `dtype[col_name]` + # without worrying about KeyError issues later on. + if not is_dict_like(dtype): + # if dtype == None, default will be object. + default_dtype = dtype or object + # error: Argument 1 to "defaultdict" has incompatible type "Callable[[], + # Union[ExtensionDtype, str, dtype[Any], Type[object], Dict[Hashable, + # Union[ExtensionDtype, Union[str, dtype[Any]], Type[str], Type[float], + # Type[int], Type[complex], Type[bool], Type[object]]]]]"; expected + # "Optional[Callable[[], Union[ExtensionDtype, str, dtype[Any], + # Type[object]]]]" + # error: Incompatible return value type (got "Union[ExtensionDtype, str, + # dtype[Any], Type[object], Dict[Hashable, Union[ExtensionDtype, Union[str, + # dtype[Any]], Type[str], Type[float], Type[int], Type[complex], Type[bool], + # Type[object]]]]", expected "Union[ExtensionDtype, str, dtype[Any], + # Type[object]]") + dtype = defaultdict( + lambda: default_dtype # type: ignore[arg-type, return-value] + ) + else: + dtype = cast(dict, dtype) + dtype = defaultdict( + lambda: object, + {columns[k] if is_integer(k) else k: v for k, v in dtype.items()}, + ) + + # Even though we have no data, the "index" of the empty DataFrame + # could for example still be an empty MultiIndex. Thus, we need to + # check whether we have any index columns specified, via either: + # + # 1) index_col (column indices) + # 2) index_names (column names) + # + # Both must be non-null to ensure a successful construction. Otherwise, + # we have to create a generic empty Index. + if (index_col is None or index_col is False) or index_names is None: + index = Index([]) + else: + data = [Series([], dtype=dtype[name]) for name in index_names] + index = ensure_index_from_sequences(data, names=index_names) + index_col.sort() + + for i, n in enumerate(index_col): + columns.pop(n - i) + + col_dict = {col_name: Series([], dtype=dtype[col_name]) for col_name in columns} + + return index, columns, col_dict + + +def _make_date_converter( + date_parser=None, dayfirst=False, infer_datetime_format=False, cache_dates=True +): + def converter(*date_cols): + if date_parser is None: + strs = parsing.concat_date_cols(date_cols) + + try: + return tools.to_datetime( + ensure_object(strs), + utc=None, + dayfirst=dayfirst, + errors="ignore", + infer_datetime_format=infer_datetime_format, + cache=cache_dates, + ).to_numpy() + + except ValueError: + return tools.to_datetime( + parsing.try_parse_dates(strs, dayfirst=dayfirst), cache=cache_dates + ) + else: + try: + result = tools.to_datetime( + date_parser(*date_cols), errors="ignore", cache=cache_dates + ) + if isinstance(result, datetime.datetime): + raise Exception("scalar parser") + return result + except Exception: + try: + return tools.to_datetime( + parsing.try_parse_dates( + parsing.concat_date_cols(date_cols), + parser=date_parser, + dayfirst=dayfirst, + ), + errors="ignore", + ) + except Exception: + return generic_parser(date_parser, *date_cols) + + return converter + + +parser_defaults = { + "delimiter": None, + "escapechar": None, + "quotechar": '"', + "quoting": csv.QUOTE_MINIMAL, + "doublequote": True, + "skipinitialspace": False, + "lineterminator": None, + "header": "infer", + "index_col": None, + "names": None, + "prefix": None, + "skiprows": None, + "skipfooter": 0, + "nrows": None, + "na_values": None, + "keep_default_na": True, + "true_values": None, + "false_values": None, + "converters": None, + "dtype": None, + "cache_dates": True, + "thousands": None, + "comment": None, + "decimal": ".", + # 'engine': 'c', + "parse_dates": False, + "keep_date_col": False, + "dayfirst": False, + "date_parser": None, + "usecols": None, + # 'iterator': False, + "chunksize": None, + "verbose": False, + "encoding": None, + "squeeze": None, + "compression": None, + "mangle_dupe_cols": True, + "infer_datetime_format": False, + "skip_blank_lines": True, + "encoding_errors": "strict", + "on_bad_lines": ParserBase.BadLineHandleMethod.ERROR, + "error_bad_lines": None, + "warn_bad_lines": None, +} + + +def _process_date_conversion( + data_dict, + converter: Callable, + parse_spec, + index_col, + index_names, + columns, + keep_date_col: bool = False, +): + def _isindex(colspec): + return (isinstance(index_col, list) and colspec in index_col) or ( + isinstance(index_names, list) and colspec in index_names + ) + + new_cols = [] + new_data = {} + + orig_names = columns + columns = list(columns) + + date_cols = set() + + if parse_spec is None or isinstance(parse_spec, bool): + return data_dict, columns + + if isinstance(parse_spec, list): + # list of column lists + for colspec in parse_spec: + if is_scalar(colspec) or isinstance(colspec, tuple): + if isinstance(colspec, int) and colspec not in data_dict: + colspec = orig_names[colspec] + if _isindex(colspec): + continue + # Pyarrow engine returns Series which we need to convert to + # numpy array before converter, its a no-op for other parsers + data_dict[colspec] = converter(np.asarray(data_dict[colspec])) + else: + new_name, col, old_names = _try_convert_dates( + converter, colspec, data_dict, orig_names + ) + if new_name in data_dict: + raise ValueError(f"New date column already in dict {new_name}") + new_data[new_name] = col + new_cols.append(new_name) + date_cols.update(old_names) + + elif isinstance(parse_spec, dict): + # dict of new name to column list + for new_name, colspec in parse_spec.items(): + if new_name in data_dict: + raise ValueError(f"Date column {new_name} already in dict") + + _, col, old_names = _try_convert_dates( + converter, colspec, data_dict, orig_names + ) + + new_data[new_name] = col + + # If original column can be converted to date we keep the converted values + # This can only happen if values are from single column + if len(colspec) == 1: + new_data[colspec[0]] = col + + new_cols.append(new_name) + date_cols.update(old_names) + + data_dict.update(new_data) + new_cols.extend(columns) + + if not keep_date_col: + for c in list(date_cols): + data_dict.pop(c) + new_cols.remove(c) + + return data_dict, new_cols + + +def _try_convert_dates(parser: Callable, colspec, data_dict, columns): + colset = set(columns) + colnames = [] + + for c in colspec: + if c in colset: + colnames.append(c) + elif isinstance(c, int) and c not in columns: + colnames.append(columns[c]) + else: + colnames.append(c) + + new_name: tuple | str + if all(isinstance(x, tuple) for x in colnames): + new_name = tuple(map("_".join, zip(*colnames))) + else: + new_name = "_".join([str(x) for x in colnames]) + to_parse = [np.asarray(data_dict[c]) for c in colnames if c in data_dict] + + new_col = parser(*to_parse) + return new_name, new_col, colnames + + +def _get_na_values(col, na_values, na_fvalues, keep_default_na): + """ + Get the NaN values for a given column. + + Parameters + ---------- + col : str + The name of the column. + na_values : array-like, dict + The object listing the NaN values as strings. + na_fvalues : array-like, dict + The object listing the NaN values as floats. + keep_default_na : bool + If `na_values` is a dict, and the column is not mapped in the + dictionary, whether to return the default NaN values or the empty set. + + Returns + ------- + nan_tuple : A length-two tuple composed of + + 1) na_values : the string NaN values for that column. + 2) na_fvalues : the float NaN values for that column. + """ + if isinstance(na_values, dict): + if col in na_values: + return na_values[col], na_fvalues[col] + else: + if keep_default_na: + return STR_NA_VALUES, set() + + return set(), set() + else: + return na_values, na_fvalues + + +def _is_potential_multi_index( + columns: Sequence[Hashable] | MultiIndex, + index_col: bool | Sequence[int] | None = None, +) -> bool: + """ + Check whether or not the `columns` parameter + could be converted into a MultiIndex. + + Parameters + ---------- + columns : array-like + Object which may or may not be convertible into a MultiIndex + index_col : None, bool or list, optional + Column or columns to use as the (possibly hierarchical) index + + Returns + ------- + bool : Whether or not columns could become a MultiIndex + """ + if index_col is None or isinstance(index_col, bool): + index_col = [] + + return bool( + len(columns) + and not isinstance(columns, MultiIndex) + and all(isinstance(c, tuple) for c in columns if c not in list(index_col)) + ) + + +def _validate_parse_dates_arg(parse_dates): + """ + Check whether or not the 'parse_dates' parameter + is a non-boolean scalar. Raises a ValueError if + that is the case. + """ + msg = ( + "Only booleans, lists, and dictionaries are accepted " + "for the 'parse_dates' parameter" + ) + + if parse_dates is not None: + if is_scalar(parse_dates): + if not lib.is_bool(parse_dates): + raise TypeError(msg) + + elif not isinstance(parse_dates, (list, dict)): + raise TypeError(msg) + + return parse_dates + + +def is_index_col(col) -> bool: + return col is not None and col is not False diff --git a/.local/lib/python3.8/site-packages/pandas/io/parsers/c_parser_wrapper.py b/.local/lib/python3.8/site-packages/pandas/io/parsers/c_parser_wrapper.py new file mode 100644 index 0000000..fc0f572 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/parsers/c_parser_wrapper.py @@ -0,0 +1,419 @@ +from __future__ import annotations + +from typing import ( + Hashable, + Mapping, + Sequence, +) +import warnings + +import numpy as np + +import pandas._libs.parsers as parsers +from pandas._typing import ( + ArrayLike, + DtypeArg, + DtypeObj, + ReadCsvBuffer, +) +from pandas.errors import DtypeWarning +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + pandas_dtype, +) +from pandas.core.dtypes.concat import union_categoricals +from pandas.core.dtypes.dtypes import ExtensionDtype + +from pandas import ( + Index, + MultiIndex, +) +from pandas.core.indexes.api import ensure_index_from_sequences + +from pandas.io.parsers.base_parser import ( + ParserBase, + is_index_col, +) + + +class CParserWrapper(ParserBase): + low_memory: bool + _reader: parsers.TextReader + + def __init__(self, src: ReadCsvBuffer[str], **kwds): + super().__init__(kwds) + self.kwds = kwds + kwds = kwds.copy() + + self.low_memory = kwds.pop("low_memory", False) + + # #2442 + # error: Cannot determine type of 'index_col' + kwds["allow_leading_cols"] = ( + self.index_col is not False # type: ignore[has-type] + ) + + # GH20529, validate usecol arg before TextReader + kwds["usecols"] = self.usecols + + # Have to pass int, would break tests using TextReader directly otherwise :( + kwds["on_bad_lines"] = self.on_bad_lines.value + + for key in ( + "storage_options", + "encoding", + "memory_map", + "compression", + "error_bad_lines", + "warn_bad_lines", + ): + kwds.pop(key, None) + + kwds["dtype"] = ensure_dtype_objs(kwds.get("dtype", None)) + self._reader = parsers.TextReader(src, **kwds) + + self.unnamed_cols = self._reader.unnamed_cols + + # error: Cannot determine type of 'names' + passed_names = self.names is None # type: ignore[has-type] + + if self._reader.header is None: + self.names = None + else: + # error: Cannot determine type of 'names' + # error: Cannot determine type of 'index_names' + ( + self.names, # type: ignore[has-type] + self.index_names, + self.col_names, + passed_names, + ) = self._extract_multi_indexer_columns( + self._reader.header, + self.index_names, # type: ignore[has-type] + passed_names, + ) + + # error: Cannot determine type of 'names' + if self.names is None: # type: ignore[has-type] + if self.prefix: + # error: Cannot determine type of 'names' + self.names = [ # type: ignore[has-type] + f"{self.prefix}{i}" for i in range(self._reader.table_width) + ] + else: + # error: Cannot determine type of 'names' + self.names = list( # type: ignore[has-type] + range(self._reader.table_width) + ) + + # gh-9755 + # + # need to set orig_names here first + # so that proper indexing can be done + # with _set_noconvert_columns + # + # once names has been filtered, we will + # then set orig_names again to names + # error: Cannot determine type of 'names' + self.orig_names = self.names[:] # type: ignore[has-type] + + if self.usecols: + usecols = self._evaluate_usecols(self.usecols, self.orig_names) + + # GH 14671 + # assert for mypy, orig_names is List or None, None would error in issubset + assert self.orig_names is not None + if self.usecols_dtype == "string" and not set(usecols).issubset( + self.orig_names + ): + self._validate_usecols_names(usecols, self.orig_names) + + # error: Cannot determine type of 'names' + if len(self.names) > len(usecols): # type: ignore[has-type] + # error: Cannot determine type of 'names' + self.names = [ # type: ignore[has-type] + n + # error: Cannot determine type of 'names' + for i, n in enumerate(self.names) # type: ignore[has-type] + if (i in usecols or n in usecols) + ] + + # error: Cannot determine type of 'names' + if len(self.names) < len(usecols): # type: ignore[has-type] + # error: Cannot determine type of 'names' + self._validate_usecols_names( + usecols, + self.names, # type: ignore[has-type] + ) + + # error: Cannot determine type of 'names' + self._validate_parse_dates_presence(self.names) # type: ignore[has-type] + self._set_noconvert_columns() + + # error: Cannot determine type of 'names' + self.orig_names = self.names # type: ignore[has-type] + + if not self._has_complex_date_col: + # error: Cannot determine type of 'index_col' + if self._reader.leading_cols == 0 and is_index_col( + self.index_col # type: ignore[has-type] + ): + + self._name_processed = True + ( + index_names, + # error: Cannot determine type of 'names' + self.names, # type: ignore[has-type] + self.index_col, + ) = self._clean_index_names( + # error: Cannot determine type of 'names' + self.names, # type: ignore[has-type] + # error: Cannot determine type of 'index_col' + self.index_col, # type: ignore[has-type] + self.unnamed_cols, + ) + + if self.index_names is None: + self.index_names = index_names + + if self._reader.header is None and not passed_names: + assert self.index_names is not None + self.index_names = [None] * len(self.index_names) + + self._implicit_index = self._reader.leading_cols > 0 + + def close(self) -> None: + # close handles opened by C parser + try: + self._reader.close() + except ValueError: + pass + + def _set_noconvert_columns(self) -> None: + """ + Set the columns that should not undergo dtype conversions. + + Currently, any column that is involved with date parsing will not + undergo such conversions. + """ + assert self.orig_names is not None + # error: Cannot determine type of 'names' + + # much faster than using orig_names.index(x) xref GH#44106 + names_dict = {x: i for i, x in enumerate(self.orig_names)} + col_indices = [names_dict[x] for x in self.names] # type: ignore[has-type] + # error: Cannot determine type of 'names' + noconvert_columns = self._set_noconvert_dtype_columns( + col_indices, + self.names, # type: ignore[has-type] + ) + for col in noconvert_columns: + self._reader.set_noconvert(col) + + def read( + self, + nrows: int | None = None, + ) -> tuple[ + Index | MultiIndex | None, + Sequence[Hashable] | MultiIndex, + Mapping[Hashable, ArrayLike], + ]: + try: + if self.low_memory: + chunks = self._reader.read_low_memory(nrows) + # destructive to chunks + data = _concatenate_chunks(chunks) + + else: + data = self._reader.read(nrows) + except StopIteration: + if self._first_chunk: + self._first_chunk = False + names = self._maybe_dedup_names(self.orig_names) + index, columns, col_dict = self._get_empty_meta( + names, + self.index_col, + self.index_names, + dtype=self.kwds.get("dtype"), + ) + columns = self._maybe_make_multi_index_columns(columns, self.col_names) + + if self.usecols is not None: + columns = self._filter_usecols(columns) + + col_dict = {k: v for k, v in col_dict.items() if k in columns} + + return index, columns, col_dict + + else: + self.close() + raise + + # Done with first read, next time raise StopIteration + self._first_chunk = False + + # error: Cannot determine type of 'names' + names = self.names # type: ignore[has-type] + + if self._reader.leading_cols: + if self._has_complex_date_col: + raise NotImplementedError("file structure not yet supported") + + # implicit index, no index names + arrays = [] + + for i in range(self._reader.leading_cols): + if self.index_col is None: + values = data.pop(i) + else: + values = data.pop(self.index_col[i]) + + values = self._maybe_parse_dates(values, i, try_parse_dates=True) + arrays.append(values) + + index = ensure_index_from_sequences(arrays) + + if self.usecols is not None: + names = self._filter_usecols(names) + + names = self._maybe_dedup_names(names) + + # rename dict keys + data_tups = sorted(data.items()) + data = {k: v for k, (i, v) in zip(names, data_tups)} + + names, date_data = self._do_date_conversions(names, data) + + else: + # rename dict keys + data_tups = sorted(data.items()) + + # ugh, mutation + + # assert for mypy, orig_names is List or None, None would error in list(...) + assert self.orig_names is not None + names = list(self.orig_names) + names = self._maybe_dedup_names(names) + + if self.usecols is not None: + names = self._filter_usecols(names) + + # columns as list + alldata = [x[1] for x in data_tups] + if self.usecols is None: + self._check_data_length(names, alldata) + + data = {k: v for k, (i, v) in zip(names, data_tups)} + + names, date_data = self._do_date_conversions(names, data) + index, names = self._make_index(date_data, alldata, names) + + # maybe create a mi on the columns + conv_names = self._maybe_make_multi_index_columns(names, self.col_names) + + return index, conv_names, date_data + + def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]: + # hackish + usecols = self._evaluate_usecols(self.usecols, names) + if usecols is not None and len(names) != len(usecols): + names = [ + name for i, name in enumerate(names) if i in usecols or name in usecols + ] + return names + + def _get_index_names(self): + names = list(self._reader.header[0]) + idx_names = None + + if self._reader.leading_cols == 0 and self.index_col is not None: + (idx_names, names, self.index_col) = self._clean_index_names( + names, self.index_col, self.unnamed_cols + ) + + return names, idx_names + + def _maybe_parse_dates(self, values, index: int, try_parse_dates: bool = True): + if try_parse_dates and self._should_parse_dates(index): + values = self._date_conv(values) + return values + + +def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict: + """ + Concatenate chunks of data read with low_memory=True. + + The tricky part is handling Categoricals, where different chunks + may have different inferred categories. + """ + names = list(chunks[0].keys()) + warning_columns = [] + + result = {} + for name in names: + arrs = [chunk.pop(name) for chunk in chunks] + # Check each arr for consistent types. + dtypes = {a.dtype for a in arrs} + # TODO: shouldn't we exclude all EA dtypes here? + numpy_dtypes = {x for x in dtypes if not is_categorical_dtype(x)} + if len(numpy_dtypes) > 1: + # error: Argument 1 to "find_common_type" has incompatible type + # "Set[Any]"; expected "Sequence[Union[dtype[Any], None, type, + # _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, + # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]]" + common_type = np.find_common_type( + numpy_dtypes, # type: ignore[arg-type] + [], + ) + if common_type == object: + warning_columns.append(str(name)) + + dtype = dtypes.pop() + if is_categorical_dtype(dtype): + result[name] = union_categoricals(arrs, sort_categories=False) + else: + if isinstance(dtype, ExtensionDtype): + # TODO: concat_compat? + array_type = dtype.construct_array_type() + # error: Argument 1 to "_concat_same_type" of "ExtensionArray" + # has incompatible type "List[Union[ExtensionArray, ndarray]]"; + # expected "Sequence[ExtensionArray]" + result[name] = array_type._concat_same_type( + arrs # type: ignore[arg-type] + ) + else: + # Argument 1 to "concatenate" has incompatible type + # "List[Union[ExtensionArray, ndarray[Any, Any]]]"; expected + # "Union[_SupportsArray[dtype[Any]], + # Sequence[_SupportsArray[dtype[Any]]], + # Sequence[Sequence[_SupportsArray[dtype[Any]]]], + # Sequence[Sequence[Sequence[_SupportsArray[dtype[Any]]]]], + # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[Any]]]]]]]" + result[name] = np.concatenate(arrs) # type: ignore[arg-type] + + if warning_columns: + warning_names = ",".join(warning_columns) + warning_message = " ".join( + [ + f"Columns ({warning_names}) have mixed types. " + f"Specify dtype option on import or set low_memory=False." + ] + ) + warnings.warn(warning_message, DtypeWarning, stacklevel=find_stack_level()) + return result + + +def ensure_dtype_objs( + dtype: DtypeArg | dict[Hashable, DtypeArg] | None +) -> DtypeObj | dict[Hashable, DtypeObj] | None: + """ + Ensure we have either None, a dtype object, or a dictionary mapping to + dtype objects. + """ + if isinstance(dtype, dict): + return {k: pandas_dtype(dtype[k]) for k in dtype} + elif dtype is not None: + return pandas_dtype(dtype) + return dtype diff --git a/.local/lib/python3.8/site-packages/pandas/io/parsers/python_parser.py b/.local/lib/python3.8/site-packages/pandas/io/parsers/python_parser.py new file mode 100644 index 0000000..52fa3be --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/parsers/python_parser.py @@ -0,0 +1,1310 @@ +from __future__ import annotations + +from collections import ( + abc, + defaultdict, +) +import csv +from io import StringIO +import re +import sys +from typing import ( + IO, + DefaultDict, + Hashable, + Iterator, + Literal, + Mapping, + Sequence, + cast, +) +import warnings + +import numpy as np + +import pandas._libs.lib as lib +from pandas._typing import ( + ArrayLike, + ReadCsvBuffer, + Scalar, +) +from pandas.errors import ( + EmptyDataError, + ParserError, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import is_integer +from pandas.core.dtypes.inference import is_dict_like + +from pandas.io.parsers.base_parser import ( + ParserBase, + parser_defaults, +) + +# BOM character (byte order mark) +# This exists at the beginning of a file to indicate endianness +# of a file (stream). Unfortunately, this marker screws up parsing, +# so we need to remove it if we see it. +_BOM = "\ufeff" + + +class PythonParser(ParserBase): + def __init__(self, f: ReadCsvBuffer[str] | list, **kwds): + """ + Workhorse function for processing nested list into DataFrame + """ + super().__init__(kwds) + + self.data: Iterator[str] | None = None + self.buf: list = [] + self.pos = 0 + self.line_pos = 0 + + self.skiprows = kwds["skiprows"] + + if callable(self.skiprows): + self.skipfunc = self.skiprows + else: + self.skipfunc = lambda x: x in self.skiprows + + self.skipfooter = _validate_skipfooter_arg(kwds["skipfooter"]) + self.delimiter = kwds["delimiter"] + + self.quotechar = kwds["quotechar"] + if isinstance(self.quotechar, str): + self.quotechar = str(self.quotechar) + + self.escapechar = kwds["escapechar"] + self.doublequote = kwds["doublequote"] + self.skipinitialspace = kwds["skipinitialspace"] + self.lineterminator = kwds["lineterminator"] + self.quoting = kwds["quoting"] + self.skip_blank_lines = kwds["skip_blank_lines"] + + self.names_passed = kwds["names"] or None + + self.has_index_names = False + if "has_index_names" in kwds: + self.has_index_names = kwds["has_index_names"] + + self.verbose = kwds["verbose"] + self.converters = kwds["converters"] + + self.thousands = kwds["thousands"] + self.decimal = kwds["decimal"] + + self.comment = kwds["comment"] + + # Set self.data to something that can read lines. + if isinstance(f, list): + # read_excel: f is a list + self.data = cast(Iterator[str], f) + else: + assert hasattr(f, "readline") + self._make_reader(f) + + # Get columns in two steps: infer from data, then + # infer column indices from self.usecols if it is specified. + self._col_indices: list[int] | None = None + columns: list[list[Scalar | None]] + ( + columns, + self.num_original_columns, + self.unnamed_cols, + ) = self._infer_columns() + + # Now self.columns has the set of columns that we will process. + # The original set is stored in self.original_columns. + # error: Cannot determine type of 'index_names' + self.columns: list[Hashable] + ( + self.columns, + self.index_names, + self.col_names, + _, + ) = self._extract_multi_indexer_columns( + columns, + self.index_names, # type: ignore[has-type] + ) + + # get popped off for index + self.orig_names: list[Hashable] = list(self.columns) + + # needs to be cleaned/refactored + # multiple date column thing turning into a real spaghetti factory + + if not self._has_complex_date_col: + (index_names, self.orig_names, self.columns) = self._get_index_name( + self.columns + ) + self._name_processed = True + if self.index_names is None: + self.index_names = index_names + + if self._col_indices is None: + self._col_indices = list(range(len(self.columns))) + + self._parse_date_cols = self._validate_parse_dates_presence(self.columns) + no_thousands_columns: set[int] | None = None + if self.parse_dates: + no_thousands_columns = self._set_noconvert_dtype_columns( + self._col_indices, self.columns + ) + self._no_thousands_columns = no_thousands_columns + + if len(self.decimal) != 1: + raise ValueError("Only length-1 decimal markers supported") + + decimal = re.escape(self.decimal) + if self.thousands is None: + regex = fr"^[\-\+]?[0-9]*({decimal}[0-9]*)?([0-9]?(E|e)\-?[0-9]+)?$" + else: + thousands = re.escape(self.thousands) + regex = ( + fr"^[\-\+]?([0-9]+{thousands}|[0-9])*({decimal}[0-9]*)?" + fr"([0-9]?(E|e)\-?[0-9]+)?$" + ) + self.num = re.compile(regex) + + def _make_reader(self, f) -> None: + sep = self.delimiter + + if sep is None or len(sep) == 1: + if self.lineterminator: + raise ValueError( + "Custom line terminators not supported in python parser (yet)" + ) + + class MyDialect(csv.Dialect): + delimiter = self.delimiter + quotechar = self.quotechar + escapechar = self.escapechar + doublequote = self.doublequote + skipinitialspace = self.skipinitialspace + quoting = self.quoting + lineterminator = "\n" + + dia = MyDialect + + if sep is not None: + dia.delimiter = sep + else: + # attempt to sniff the delimiter from the first valid line, + # i.e. no comment line and not in skiprows + line = f.readline() + lines = self._check_comments([[line]])[0] + while self.skipfunc(self.pos) or not lines: + self.pos += 1 + line = f.readline() + lines = self._check_comments([[line]])[0] + + # since `line` was a string, lines will be a list containing + # only a single string + line = lines[0] + + self.pos += 1 + self.line_pos += 1 + sniffed = csv.Sniffer().sniff(line) + dia.delimiter = sniffed.delimiter + + # Note: encoding is irrelevant here + line_rdr = csv.reader(StringIO(line), dialect=dia) + self.buf.extend(list(line_rdr)) + + # Note: encoding is irrelevant here + reader = csv.reader(f, dialect=dia, strict=True) + + else: + + def _read(): + line = f.readline() + pat = re.compile(sep) + + yield pat.split(line.strip()) + + for line in f: + yield pat.split(line.strip()) + + reader = _read() + + # error: Incompatible types in assignment (expression has type "_reader", + # variable has type "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, + # TextIOWrapper, mmap, None]") + self.data = reader # type: ignore[assignment] + + def read(self, rows: int | None = None): + try: + content = self._get_lines(rows) + except StopIteration: + if self._first_chunk: + content = [] + else: + self.close() + raise + + # done with first read, next time raise StopIteration + self._first_chunk = False + + columns: Sequence[Hashable] = list(self.orig_names) + if not len(content): # pragma: no cover + # DataFrame with the right metadata, even though it's length 0 + names = self._maybe_dedup_names(self.orig_names) + # error: Cannot determine type of 'index_col' + index, columns, col_dict = self._get_empty_meta( + names, + self.index_col, # type: ignore[has-type] + self.index_names, + self.dtype, + ) + conv_columns = self._maybe_make_multi_index_columns(columns, self.col_names) + return index, conv_columns, col_dict + + # handle new style for names in index + count_empty_content_vals = count_empty_vals(content[0]) + indexnamerow = None + if self.has_index_names and count_empty_content_vals == len(columns): + indexnamerow = content[0] + content = content[1:] + + alldata = self._rows_to_cols(content) + data, columns = self._exclude_implicit_index(alldata) + + conv_data = self._convert_data(data) + columns, conv_data = self._do_date_conversions(columns, conv_data) + + index, columns = self._make_index(conv_data, alldata, columns, indexnamerow) + + return index, columns, conv_data + + def _exclude_implicit_index( + self, + alldata: list[np.ndarray], + ) -> tuple[Mapping[Hashable, np.ndarray], Sequence[Hashable]]: + names = self._maybe_dedup_names(self.orig_names) + + offset = 0 + if self._implicit_index: + # error: Cannot determine type of 'index_col' + offset = len(self.index_col) # type: ignore[has-type] + + len_alldata = len(alldata) + self._check_data_length(names, alldata) + + return { + name: alldata[i + offset] for i, name in enumerate(names) if i < len_alldata + }, names + + # legacy + def get_chunk(self, size=None): + if size is None: + # error: "PythonParser" has no attribute "chunksize" + size = self.chunksize # type: ignore[attr-defined] + return self.read(rows=size) + + def _convert_data( + self, + data: Mapping[Hashable, np.ndarray], + ) -> Mapping[Hashable, ArrayLike]: + # apply converters + clean_conv = self._clean_mapping(self.converters) + clean_dtypes = self._clean_mapping(self.dtype) + + # Apply NA values. + clean_na_values = {} + clean_na_fvalues = {} + + if isinstance(self.na_values, dict): + for col in self.na_values: + na_value = self.na_values[col] + na_fvalue = self.na_fvalues[col] + + if isinstance(col, int) and col not in self.orig_names: + col = self.orig_names[col] + + clean_na_values[col] = na_value + clean_na_fvalues[col] = na_fvalue + else: + clean_na_values = self.na_values + clean_na_fvalues = self.na_fvalues + + return self._convert_to_ndarrays( + data, + clean_na_values, + clean_na_fvalues, + self.verbose, + clean_conv, + clean_dtypes, + ) + + def _infer_columns( + self, + ) -> tuple[list[list[Scalar | None]], int, set[Scalar | None]]: + names = self.names + num_original_columns = 0 + clear_buffer = True + unnamed_cols: set[Scalar | None] = set() + self._header_line = None + + if self.header is not None: + header = self.header + + if isinstance(header, (list, tuple, np.ndarray)): + have_mi_columns = len(header) > 1 + # we have a mi columns, so read an extra line + if have_mi_columns: + header = list(header) + [header[-1] + 1] + else: + have_mi_columns = False + header = [header] + + columns: list[list[Scalar | None]] = [] + for level, hr in enumerate(header): + try: + line = self._buffered_line() + + while self.line_pos <= hr: + line = self._next_line() + + except StopIteration as err: + if self.line_pos < hr: + raise ValueError( + f"Passed header={hr} but only {self.line_pos + 1} lines in " + "file" + ) from err + + # We have an empty file, so check + # if columns are provided. That will + # serve as the 'line' for parsing + if have_mi_columns and hr > 0: + if clear_buffer: + self._clear_buffer() + columns.append([None] * len(columns[-1])) + return columns, num_original_columns, unnamed_cols + + if not self.names: + raise EmptyDataError("No columns to parse from file") from err + + line = self.names[:] + + this_columns: list[Scalar | None] = [] + this_unnamed_cols = [] + + for i, c in enumerate(line): + if c == "": + if have_mi_columns: + col_name = f"Unnamed: {i}_level_{level}" + else: + col_name = f"Unnamed: {i}" + + this_unnamed_cols.append(i) + this_columns.append(col_name) + else: + this_columns.append(c) + + if not have_mi_columns and self.mangle_dupe_cols: + counts: DefaultDict = defaultdict(int) + # Ensure that regular columns are used before unnamed ones + # to keep given names and mangle unnamed columns + col_loop_order = [ + i + for i in range(len(this_columns)) + if i not in this_unnamed_cols + ] + this_unnamed_cols + + for i in col_loop_order: + col = this_columns[i] + old_col = col + cur_count = counts[col] + + if cur_count > 0: + while cur_count > 0: + counts[old_col] = cur_count + 1 + col = f"{old_col}.{cur_count}" + if col in this_columns: + cur_count += 1 + else: + cur_count = counts[col] + + if ( + self.dtype is not None + and is_dict_like(self.dtype) + and self.dtype.get(old_col) is not None + and self.dtype.get(col) is None + ): + self.dtype.update({col: self.dtype.get(old_col)}) + this_columns[i] = col + counts[col] = cur_count + 1 + elif have_mi_columns: + + # if we have grabbed an extra line, but its not in our + # format so save in the buffer, and create an blank extra + # line for the rest of the parsing code + if hr == header[-1]: + lc = len(this_columns) + # error: Cannot determine type of 'index_col' + sic = self.index_col # type: ignore[has-type] + ic = len(sic) if sic is not None else 0 + unnamed_count = len(this_unnamed_cols) + + # if wrong number of blanks or no index, not our format + if (lc != unnamed_count and lc - ic > unnamed_count) or ic == 0: + clear_buffer = False + this_columns = [None] * lc + self.buf = [self.buf[-1]] + + columns.append(this_columns) + unnamed_cols.update({this_columns[i] for i in this_unnamed_cols}) + + if len(columns) == 1: + num_original_columns = len(this_columns) + + if clear_buffer: + self._clear_buffer() + + first_line: list[Scalar] | None + if names is not None: + # Read first row after header to check if data are longer + try: + first_line = self._next_line() + except StopIteration: + first_line = None + + len_first_data_row = 0 if first_line is None else len(first_line) + + if len(names) > len(columns[0]) and len(names) > len_first_data_row: + raise ValueError( + "Number of passed names did not match " + "number of header fields in the file" + ) + if len(columns) > 1: + raise TypeError("Cannot pass names with multi-index columns") + + if self.usecols is not None: + # Set _use_cols. We don't store columns because they are + # overwritten. + self._handle_usecols(columns, names, num_original_columns) + else: + num_original_columns = len(names) + if self._col_indices is not None and len(names) != len( + self._col_indices + ): + columns = [[names[i] for i in sorted(self._col_indices)]] + else: + columns = [names] + else: + columns = self._handle_usecols( + columns, columns[0], num_original_columns + ) + else: + try: + line = self._buffered_line() + + except StopIteration as err: + if not names: + raise EmptyDataError("No columns to parse from file") from err + + line = names[:] + + # Store line, otherwise it is lost for guessing the index + self._header_line = line + ncols = len(line) + num_original_columns = ncols + + if not names: + if self.prefix: + columns = [[f"{self.prefix}{i}" for i in range(ncols)]] + else: + columns = [list(range(ncols))] + columns = self._handle_usecols( + columns, columns[0], num_original_columns + ) + else: + if self.usecols is None or len(names) >= num_original_columns: + columns = self._handle_usecols([names], names, num_original_columns) + num_original_columns = len(names) + else: + if not callable(self.usecols) and len(names) != len(self.usecols): + raise ValueError( + "Number of passed names did not match number of " + "header fields in the file" + ) + # Ignore output but set used columns. + self._handle_usecols([names], names, ncols) + columns = [names] + num_original_columns = ncols + + return columns, num_original_columns, unnamed_cols + + def _handle_usecols( + self, + columns: list[list[Scalar | None]], + usecols_key: list[Scalar | None], + num_original_columns: int, + ) -> list[list[Scalar | None]]: + """ + Sets self._col_indices + + usecols_key is used if there are string usecols. + """ + col_indices: set[int] | list[int] + if self.usecols is not None: + if callable(self.usecols): + col_indices = self._evaluate_usecols(self.usecols, usecols_key) + elif any(isinstance(u, str) for u in self.usecols): + if len(columns) > 1: + raise ValueError( + "If using multiple headers, usecols must be integers." + ) + col_indices = [] + + for col in self.usecols: + if isinstance(col, str): + try: + col_indices.append(usecols_key.index(col)) + except ValueError: + self._validate_usecols_names(self.usecols, usecols_key) + else: + col_indices.append(col) + else: + missing_usecols = [ + col for col in self.usecols if col >= num_original_columns + ] + if missing_usecols: + warnings.warn( + "Defining usecols with out of bounds indices is deprecated " + "and will raise a ParserError in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + col_indices = self.usecols + + columns = [ + [n for i, n in enumerate(column) if i in col_indices] + for column in columns + ] + self._col_indices = sorted(col_indices) + return columns + + def _buffered_line(self): + """ + Return a line from buffer, filling buffer if required. + """ + if len(self.buf) > 0: + return self.buf[0] + else: + return self._next_line() + + def _check_for_bom(self, first_row: list[Scalar]) -> list[Scalar]: + """ + Checks whether the file begins with the BOM character. + If it does, remove it. In addition, if there is quoting + in the field subsequent to the BOM, remove it as well + because it technically takes place at the beginning of + the name, not the middle of it. + """ + # first_row will be a list, so we need to check + # that that list is not empty before proceeding. + if not first_row: + return first_row + + # The first element of this row is the one that could have the + # BOM that we want to remove. Check that the first element is a + # string before proceeding. + if not isinstance(first_row[0], str): + return first_row + + # Check that the string is not empty, as that would + # obviously not have a BOM at the start of it. + if not first_row[0]: + return first_row + + # Since the string is non-empty, check that it does + # in fact begin with a BOM. + first_elt = first_row[0][0] + if first_elt != _BOM: + return first_row + + first_row_bom = first_row[0] + new_row: str + + if len(first_row_bom) > 1 and first_row_bom[1] == self.quotechar: + start = 2 + quote = first_row_bom[1] + end = first_row_bom[2:].index(quote) + 2 + + # Extract the data between the quotation marks + new_row = first_row_bom[start:end] + + # Extract any remaining data after the second + # quotation mark. + if len(first_row_bom) > end + 1: + new_row += first_row_bom[end + 1 :] + + else: + + # No quotation so just remove BOM from first element + new_row = first_row_bom[1:] + + new_row_list: list[Scalar] = [new_row] + return new_row_list + first_row[1:] + + def _is_line_empty(self, line: list[Scalar]) -> bool: + """ + Check if a line is empty or not. + + Parameters + ---------- + line : str, array-like + The line of data to check. + + Returns + ------- + boolean : Whether or not the line is empty. + """ + return not line or all(not x for x in line) + + def _next_line(self) -> list[Scalar]: + if isinstance(self.data, list): + while self.skipfunc(self.pos): + self.pos += 1 + + while True: + try: + line = self._check_comments([self.data[self.pos]])[0] + self.pos += 1 + # either uncommented or blank to begin with + if not self.skip_blank_lines and ( + self._is_line_empty(self.data[self.pos - 1]) or line + ): + break + elif self.skip_blank_lines: + ret = self._remove_empty_lines([line]) + if ret: + line = ret[0] + break + except IndexError: + raise StopIteration + else: + while self.skipfunc(self.pos): + self.pos += 1 + # assert for mypy, data is Iterator[str] or None, would error in next + assert self.data is not None + next(self.data) + + while True: + orig_line = self._next_iter_line(row_num=self.pos + 1) + self.pos += 1 + + if orig_line is not None: + line = self._check_comments([orig_line])[0] + + if self.skip_blank_lines: + ret = self._remove_empty_lines([line]) + + if ret: + line = ret[0] + break + elif self._is_line_empty(orig_line) or line: + break + + # This was the first line of the file, + # which could contain the BOM at the + # beginning of it. + if self.pos == 1: + line = self._check_for_bom(line) + + self.line_pos += 1 + self.buf.append(line) + return line + + def _alert_malformed(self, msg: str, row_num: int) -> None: + """ + Alert a user about a malformed row, depending on value of + `self.on_bad_lines` enum. + + If `self.on_bad_lines` is ERROR, the alert will be `ParserError`. + If `self.on_bad_lines` is WARN, the alert will be printed out. + + Parameters + ---------- + msg: str + The error message to display. + row_num: int + The row number where the parsing error occurred. + Because this row number is displayed, we 1-index, + even though we 0-index internally. + """ + if self.on_bad_lines == self.BadLineHandleMethod.ERROR: + raise ParserError(msg) + elif self.on_bad_lines == self.BadLineHandleMethod.WARN: + base = f"Skipping line {row_num}: " + sys.stderr.write(base + msg + "\n") + + def _next_iter_line(self, row_num: int) -> list[Scalar] | None: + """ + Wrapper around iterating through `self.data` (CSV source). + + When a CSV error is raised, we check for specific + error messages that allow us to customize the + error message displayed to the user. + + Parameters + ---------- + row_num: int + The row number of the line being parsed. + """ + try: + # assert for mypy, data is Iterator[str] or None, would error in next + assert self.data is not None + line = next(self.data) + # for mypy + assert isinstance(line, list) + return line + except csv.Error as e: + if ( + self.on_bad_lines == self.BadLineHandleMethod.ERROR + or self.on_bad_lines == self.BadLineHandleMethod.WARN + ): + msg = str(e) + + if "NULL byte" in msg or "line contains NUL" in msg: + msg = ( + "NULL byte detected. This byte " + "cannot be processed in Python's " + "native csv library at the moment, " + "so please pass in engine='c' instead" + ) + + if self.skipfooter > 0: + reason = ( + "Error could possibly be due to " + "parsing errors in the skipped footer rows " + "(the skipfooter keyword is only applied " + "after Python's csv library has parsed " + "all rows)." + ) + msg += ". " + reason + + self._alert_malformed(msg, row_num) + return None + + def _check_comments(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + if self.comment is None: + return lines + ret = [] + for line in lines: + rl = [] + for x in line: + if ( + not isinstance(x, str) + or self.comment not in x + or x in self.na_values + ): + rl.append(x) + else: + x = x[: x.find(self.comment)] + if len(x) > 0: + rl.append(x) + break + ret.append(rl) + return ret + + def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + """ + Iterate through the lines and remove any that are + either empty or contain only one whitespace value + + Parameters + ---------- + lines : list of list of Scalars + The array of lines that we are to filter. + + Returns + ------- + filtered_lines : list of list of Scalars + The same array of lines with the "empty" ones removed. + """ + ret = [] + for line in lines: + # Remove empty lines and lines with only one whitespace value + if ( + len(line) > 1 + or len(line) == 1 + and (not isinstance(line[0], str) or line[0].strip()) + ): + ret.append(line) + return ret + + def _check_thousands(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + if self.thousands is None: + return lines + + return self._search_replace_num_columns( + lines=lines, search=self.thousands, replace="" + ) + + def _search_replace_num_columns( + self, lines: list[list[Scalar]], search: str, replace: str + ) -> list[list[Scalar]]: + ret = [] + for line in lines: + rl = [] + for i, x in enumerate(line): + if ( + not isinstance(x, str) + or search not in x + or (self._no_thousands_columns and i in self._no_thousands_columns) + or not self.num.search(x.strip()) + ): + rl.append(x) + else: + rl.append(x.replace(search, replace)) + ret.append(rl) + return ret + + def _check_decimal(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + if self.decimal == parser_defaults["decimal"]: + return lines + + return self._search_replace_num_columns( + lines=lines, search=self.decimal, replace="." + ) + + def _clear_buffer(self) -> None: + self.buf = [] + + _implicit_index = False + + def _get_index_name(self, columns: list[Hashable]): + """ + Try several cases to get lines: + + 0) There are headers on row 0 and row 1 and their + total summed lengths equals the length of the next line. + Treat row 0 as columns and row 1 as indices + 1) Look for implicit index: there are more columns + on row 1 than row 0. If this is true, assume that row + 1 lists index columns and row 0 lists normal columns. + 2) Get index from the columns if it was listed. + """ + orig_names = list(columns) + columns = list(columns) + + line: list[Scalar] | None + if self._header_line is not None: + line = self._header_line + else: + try: + line = self._next_line() + except StopIteration: + line = None + + next_line: list[Scalar] | None + try: + next_line = self._next_line() + except StopIteration: + next_line = None + + # implicitly index_col=0 b/c 1 fewer column names + implicit_first_cols = 0 + if line is not None: + # leave it 0, #2442 + # Case 1 + # error: Cannot determine type of 'index_col' + index_col = self.index_col # type: ignore[has-type] + if index_col is not False: + implicit_first_cols = len(line) - self.num_original_columns + + # Case 0 + if next_line is not None: + if len(next_line) == len(line) + self.num_original_columns: + # column and index names on diff rows + self.index_col = list(range(len(line))) + self.buf = self.buf[1:] + + for c in reversed(line): + columns.insert(0, c) + + # Update list of original names to include all indices. + orig_names = list(columns) + self.num_original_columns = len(columns) + return line, orig_names, columns + + if implicit_first_cols > 0: + # Case 1 + self._implicit_index = True + if self.index_col is None: + self.index_col = list(range(implicit_first_cols)) + + index_name = None + + else: + # Case 2 + (index_name, columns_, self.index_col) = self._clean_index_names( + columns, self.index_col, self.unnamed_cols + ) + + return index_name, orig_names, columns + + def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]: + col_len = self.num_original_columns + + if self._implicit_index: + col_len += len(self.index_col) + + max_len = max(len(row) for row in content) + + # Check that there are no rows with too many + # elements in their row (rows with too few + # elements are padded with NaN). + # error: Non-overlapping identity check (left operand type: "List[int]", + # right operand type: "Literal[False]") + if ( + max_len > col_len + and self.index_col is not False # type: ignore[comparison-overlap] + and self.usecols is None + ): + + footers = self.skipfooter if self.skipfooter else 0 + bad_lines = [] + + iter_content = enumerate(content) + content_len = len(content) + content = [] + + for (i, l) in iter_content: + actual_len = len(l) + + if actual_len > col_len: + if callable(self.on_bad_lines): + new_l = self.on_bad_lines(l) + if new_l is not None: + content.append(new_l) + elif ( + self.on_bad_lines == self.BadLineHandleMethod.ERROR + or self.on_bad_lines == self.BadLineHandleMethod.WARN + ): + row_num = self.pos - (content_len - i + footers) + bad_lines.append((row_num, actual_len)) + + if self.on_bad_lines == self.BadLineHandleMethod.ERROR: + break + else: + content.append(l) + + for row_num, actual_len in bad_lines: + msg = ( + f"Expected {col_len} fields in line {row_num + 1}, saw " + f"{actual_len}" + ) + if ( + self.delimiter + and len(self.delimiter) > 1 + and self.quoting != csv.QUOTE_NONE + ): + # see gh-13374 + reason = ( + "Error could possibly be due to quotes being " + "ignored when a multi-char delimiter is used." + ) + msg += ". " + reason + + self._alert_malformed(msg, row_num + 1) + + # see gh-13320 + zipped_content = list(lib.to_object_array(content, min_width=col_len).T) + + if self.usecols: + assert self._col_indices is not None + col_indices = self._col_indices + + if self._implicit_index: + zipped_content = [ + a + for i, a in enumerate(zipped_content) + if ( + i < len(self.index_col) + or i - len(self.index_col) in col_indices + ) + ] + else: + zipped_content = [ + a for i, a in enumerate(zipped_content) if i in col_indices + ] + return zipped_content + + def _get_lines(self, rows: int | None = None): + lines = self.buf + new_rows = None + + # already fetched some number + if rows is not None: + # we already have the lines in the buffer + if len(self.buf) >= rows: + new_rows, self.buf = self.buf[:rows], self.buf[rows:] + + # need some lines + else: + rows -= len(self.buf) + + if new_rows is None: + if isinstance(self.data, list): + if self.pos > len(self.data): + raise StopIteration + if rows is None: + new_rows = self.data[self.pos :] + new_pos = len(self.data) + else: + new_rows = self.data[self.pos : self.pos + rows] + new_pos = self.pos + rows + + new_rows = self._remove_skipped_rows(new_rows) + lines.extend(new_rows) + self.pos = new_pos + + else: + new_rows = [] + try: + if rows is not None: + + rows_to_skip = 0 + if self.skiprows is not None and self.pos is not None: + # Only read additional rows if pos is in skiprows + rows_to_skip = len( + set(self.skiprows) - set(range(self.pos)) + ) + + for _ in range(rows + rows_to_skip): + # assert for mypy, data is Iterator[str] or None, would + # error in next + assert self.data is not None + new_rows.append(next(self.data)) + + len_new_rows = len(new_rows) + new_rows = self._remove_skipped_rows(new_rows) + lines.extend(new_rows) + else: + rows = 0 + + while True: + new_row = self._next_iter_line(row_num=self.pos + rows + 1) + rows += 1 + + if new_row is not None: + new_rows.append(new_row) + len_new_rows = len(new_rows) + + except StopIteration: + len_new_rows = len(new_rows) + new_rows = self._remove_skipped_rows(new_rows) + lines.extend(new_rows) + if len(lines) == 0: + raise + self.pos += len_new_rows + + self.buf = [] + else: + lines = new_rows + + if self.skipfooter: + lines = lines[: -self.skipfooter] + + lines = self._check_comments(lines) + if self.skip_blank_lines: + lines = self._remove_empty_lines(lines) + lines = self._check_thousands(lines) + return self._check_decimal(lines) + + def _remove_skipped_rows(self, new_rows: list[list[Scalar]]) -> list[list[Scalar]]: + if self.skiprows: + return [ + row for i, row in enumerate(new_rows) if not self.skipfunc(i + self.pos) + ] + return new_rows + + +class FixedWidthReader(abc.Iterator): + """ + A reader of fixed-width lines. + """ + + def __init__( + self, + f: IO[str], + colspecs: list[tuple[int, int]] | Literal["infer"], + delimiter: str | None, + comment: str | None, + skiprows: set[int] | None = None, + infer_nrows: int = 100, + ) -> None: + self.f = f + self.buffer: Iterator | None = None + self.delimiter = "\r\n" + delimiter if delimiter else "\n\r\t " + self.comment = comment + if colspecs == "infer": + self.colspecs = self.detect_colspecs( + infer_nrows=infer_nrows, skiprows=skiprows + ) + else: + self.colspecs = colspecs + + if not isinstance(self.colspecs, (tuple, list)): + raise TypeError( + "column specifications must be a list or tuple, " + f"input was a {type(colspecs).__name__}" + ) + + for colspec in self.colspecs: + if not ( + isinstance(colspec, (tuple, list)) + and len(colspec) == 2 + and isinstance(colspec[0], (int, np.integer, type(None))) + and isinstance(colspec[1], (int, np.integer, type(None))) + ): + raise TypeError( + "Each column specification must be " + "2 element tuple or list of integers" + ) + + def get_rows(self, infer_nrows: int, skiprows: set[int] | None = None) -> list[str]: + """ + Read rows from self.f, skipping as specified. + + We distinguish buffer_rows (the first <= infer_nrows + lines) from the rows returned to detect_colspecs + because it's simpler to leave the other locations + with skiprows logic alone than to modify them to + deal with the fact we skipped some rows here as + well. + + Parameters + ---------- + infer_nrows : int + Number of rows to read from self.f, not counting + rows that are skipped. + skiprows: set, optional + Indices of rows to skip. + + Returns + ------- + detect_rows : list of str + A list containing the rows to read. + + """ + if skiprows is None: + skiprows = set() + buffer_rows = [] + detect_rows = [] + for i, row in enumerate(self.f): + if i not in skiprows: + detect_rows.append(row) + buffer_rows.append(row) + if len(detect_rows) >= infer_nrows: + break + self.buffer = iter(buffer_rows) + return detect_rows + + def detect_colspecs( + self, infer_nrows: int = 100, skiprows: set[int] | None = None + ) -> list[tuple[int, int]]: + # Regex escape the delimiters + delimiters = "".join([fr"\{x}" for x in self.delimiter]) + pattern = re.compile(f"([^{delimiters}]+)") + rows = self.get_rows(infer_nrows, skiprows) + if not rows: + raise EmptyDataError("No rows from which to infer column width") + max_len = max(map(len, rows)) + mask = np.zeros(max_len + 1, dtype=int) + if self.comment is not None: + rows = [row.partition(self.comment)[0] for row in rows] + for row in rows: + for m in pattern.finditer(row): + mask[m.start() : m.end()] = 1 + shifted = np.roll(mask, 1) + shifted[0] = 0 + edges = np.where((mask ^ shifted) == 1)[0] + edge_pairs = list(zip(edges[::2], edges[1::2])) + return edge_pairs + + def __next__(self) -> list[str]: + if self.buffer is not None: + try: + line = next(self.buffer) + except StopIteration: + self.buffer = None + line = next(self.f) + else: + line = next(self.f) + # Note: 'colspecs' is a sequence of half-open intervals. + return [line[fromm:to].strip(self.delimiter) for (fromm, to) in self.colspecs] + + +class FixedWidthFieldParser(PythonParser): + """ + Specialization that Converts fixed-width fields into DataFrames. + See PythonParser for details. + """ + + def __init__(self, f: ReadCsvBuffer[str], **kwds) -> None: + # Support iterators, convert to a list. + self.colspecs = kwds.pop("colspecs") + self.infer_nrows = kwds.pop("infer_nrows") + PythonParser.__init__(self, f, **kwds) + + def _make_reader(self, f: IO[str]) -> None: + self.data = FixedWidthReader( + f, + self.colspecs, + self.delimiter, + self.comment, + self.skiprows, + self.infer_nrows, + ) + + def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + """ + Returns the list of lines without the empty ones. With fixed-width + fields, empty lines become arrays of empty strings. + + See PythonParser._remove_empty_lines. + """ + return [ + line + for line in lines + if any(not isinstance(e, str) or e.strip() for e in line) + ] + + +def count_empty_vals(vals) -> int: + return sum(1 for v in vals if v == "" or v is None) + + +def _validate_skipfooter_arg(skipfooter: int) -> int: + """ + Validate the 'skipfooter' parameter. + + Checks whether 'skipfooter' is a non-negative integer. + Raises a ValueError if that is not the case. + + Parameters + ---------- + skipfooter : non-negative integer + The number of rows to skip at the end of the file. + + Returns + ------- + validated_skipfooter : non-negative integer + The original input if the validation succeeds. + + Raises + ------ + ValueError : 'skipfooter' was not a non-negative integer. + """ + if not is_integer(skipfooter): + raise ValueError("skipfooter must be an integer") + + if skipfooter < 0: + raise ValueError("skipfooter cannot be negative") + + return skipfooter diff --git a/.local/lib/python3.8/site-packages/pandas/io/parsers/readers.py b/.local/lib/python3.8/site-packages/pandas/io/parsers/readers.py new file mode 100644 index 0000000..577c38d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/parsers/readers.py @@ -0,0 +1,1718 @@ +""" +Module contains tools for processing files into DataFrames or other objects +""" +from __future__ import annotations + +from collections import abc +import csv +import sys +from textwrap import fill +from typing import ( + IO, + Any, + Callable, + NamedTuple, +) +import warnings + +import numpy as np + +import pandas._libs.lib as lib +from pandas._libs.parsers import STR_NA_VALUES +from pandas._typing import ( + ArrayLike, + CompressionOptions, + CSVEngine, + DtypeArg, + FilePath, + ReadCsvBuffer, + StorageOptions, +) +from pandas.errors import ( + AbstractMethodError, + ParserWarning, +) +from pandas.util._decorators import ( + Appender, + deprecate_nonkeyword_arguments, +) +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.common import ( + is_file_like, + is_float, + is_integer, + is_list_like, +) + +from pandas.core.frame import DataFrame +from pandas.core.indexes.api import RangeIndex +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import ( + IOHandles, + get_handle, + validate_header_arg, +) +from pandas.io.parsers.arrow_parser_wrapper import ArrowParserWrapper +from pandas.io.parsers.base_parser import ( + ParserBase, + is_index_col, + parser_defaults, +) +from pandas.io.parsers.c_parser_wrapper import CParserWrapper +from pandas.io.parsers.python_parser import ( + FixedWidthFieldParser, + PythonParser, +) + +_doc_read_csv_and_table = ( + r""" +{summary} + +Also supports optionally iterating or breaking of the file +into chunks. + +Additional help can be found in the online docs for +`IO Tools `_. + +Parameters +---------- +filepath_or_buffer : str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is + expected. A local file could be: file://localhost/path/to/table.csv. + + If you want to pass in a path object, pandas accepts any ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, such as + a file handle (e.g. via builtin ``open`` function) or ``StringIO``. +sep : str, default {_default_sep} + Delimiter to use. If sep is None, the C engine cannot automatically detect + the separator, but the Python parsing engine can, meaning the latter will + be used and automatically detect the separator by Python's builtin sniffer + tool, ``csv.Sniffer``. In addition, separators longer than 1 character and + different from ``'\s+'`` will be interpreted as regular expressions and + will also force the use of the Python parsing engine. Note that regex + delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``. +delimiter : str, default ``None`` + Alias for sep. +header : int, list of int, None, default 'infer' + Row number(s) to use as the column names, and the start of the + data. Default behavior is to infer the column names: if no names + are passed the behavior is identical to ``header=0`` and column + names are inferred from the first line of the file, if column + names are passed explicitly then the behavior is identical to + ``header=None``. Explicitly pass ``header=0`` to be able to + replace existing names. The header can be a list of integers that + specify row locations for a multi-index on the columns + e.g. [0,1,3]. Intervening rows that are not specified will be + skipped (e.g. 2 in this example is skipped). Note that this + parameter ignores commented lines and empty lines if + ``skip_blank_lines=True``, so ``header=0`` denotes the first line of + data rather than the first line of the file. +names : array-like, optional + List of column names to use. If the file contains a header row, + then you should explicitly pass ``header=0`` to override the column names. + Duplicates in this list are not allowed. +index_col : int, str, sequence of int / str, or False, optional, default ``None`` + Column(s) to use as the row labels of the ``DataFrame``, either given as + string name or column index. If a sequence of int / str is given, a + MultiIndex is used. + + Note: ``index_col=False`` can be used to force pandas to *not* use the first + column as the index, e.g. when you have a malformed file with delimiters at + the end of each line. +usecols : list-like or callable, optional + Return a subset of the columns. If list-like, all elements must either + be positional (i.e. integer indices into the document columns) or strings + that correspond to column names provided either by the user in `names` or + inferred from the document header row(s). If ``names`` are given, the document + header row(s) are not taken into account. For example, a valid list-like + `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``. + Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. + To instantiate a DataFrame from ``data`` with element order preserved use + ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns + in ``['foo', 'bar']`` order or + ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]`` + for ``['bar', 'foo']`` order. + + If callable, the callable function will be evaluated against the column + names, returning names where the callable function evaluates to True. An + example of a valid callable argument would be ``lambda x: x.upper() in + ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster + parsing time and lower memory usage. +squeeze : bool, default False + If the parsed data only contains one column then return a Series. + + .. deprecated:: 1.4.0 + Append ``.squeeze("columns")`` to the call to ``{func_name}`` to squeeze + the data. +prefix : str, optional + Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... + + .. deprecated:: 1.4.0 + Use a list comprehension on the DataFrame's columns after calling ``read_csv``. +mangle_dupe_cols : bool, default True + Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than + 'X'...'X'. Passing in False will cause data to be overwritten if there + are duplicate names in the columns. +dtype : Type name or dict of column -> type, optional + Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32, + 'c': 'Int64'}} + Use `str` or `object` together with suitable `na_values` settings + to preserve and not interpret dtype. + If converters are specified, they will be applied INSTEAD + of dtype conversion. +engine : {{'c', 'python', 'pyarrow'}}, optional + Parser engine to use. The C and pyarrow engines are faster, while the python engine + is currently more feature-complete. Multithreading is currently only supported by + the pyarrow engine. + + .. versionadded:: 1.4.0 + + The "pyarrow" engine was added as an *experimental* engine, and some features + are unsupported, or may not work correctly, with this engine. +converters : dict, optional + Dict of functions for converting values in certain columns. Keys can either + be integers or column labels. +true_values : list, optional + Values to consider as True. +false_values : list, optional + Values to consider as False. +skipinitialspace : bool, default False + Skip spaces after delimiter. +skiprows : list-like, int or callable, optional + Line numbers to skip (0-indexed) or number of lines to skip (int) + at the start of the file. + + If callable, the callable function will be evaluated against the row + indices, returning True if the row should be skipped and False otherwise. + An example of a valid callable argument would be ``lambda x: x in [0, 2]``. +skipfooter : int, default 0 + Number of lines at bottom of file to skip (Unsupported with engine='c'). +nrows : int, optional + Number of rows of file to read. Useful for reading pieces of large files. +na_values : scalar, str, list-like, or dict, optional + Additional strings to recognize as NA/NaN. If dict passed, specific + per-column NA values. By default the following values are interpreted as + NaN: '""" + + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") + + """'. +keep_default_na : bool, default True + Whether or not to include the default NaN values when parsing the data. + Depending on whether `na_values` is passed in, the behavior is as follows: + + * If `keep_default_na` is True, and `na_values` are specified, `na_values` + is appended to the default NaN values used for parsing. + * If `keep_default_na` is True, and `na_values` are not specified, only + the default NaN values are used for parsing. + * If `keep_default_na` is False, and `na_values` are specified, only + the NaN values specified `na_values` are used for parsing. + * If `keep_default_na` is False, and `na_values` are not specified, no + strings will be parsed as NaN. + + Note that if `na_filter` is passed in as False, the `keep_default_na` and + `na_values` parameters will be ignored. +na_filter : bool, default True + Detect missing value markers (empty strings and the value of na_values). In + data without any NAs, passing na_filter=False can improve the performance + of reading a large file. +verbose : bool, default False + Indicate number of NA values placed in non-numeric columns. +skip_blank_lines : bool, default True + If True, skip over blank lines rather than interpreting as NaN values. +parse_dates : bool or list of int or names or list of lists or dict, \ +default False + The behavior is as follows: + + * boolean. If True -> try parsing the index. + * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 + each as a separate date column. + * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as + a single date column. + * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call + result 'foo' + + If a column or index cannot be represented as an array of datetimes, + say because of an unparsable value or a mixture of timezones, the column + or index will be returned unaltered as an object data type. For + non-standard datetime parsing, use ``pd.to_datetime`` after + ``pd.read_csv``. To parse an index or column with a mixture of timezones, + specify ``date_parser`` to be a partially-applied + :func:`pandas.to_datetime` with ``utc=True``. See + :ref:`io.csv.mixed_timezones` for more. + + Note: A fast-path exists for iso8601-formatted dates. +infer_datetime_format : bool, default False + If True and `parse_dates` is enabled, pandas will attempt to infer the + format of the datetime strings in the columns, and if it can be inferred, + switch to a faster method of parsing them. In some cases this can increase + the parsing speed by 5-10x. +keep_date_col : bool, default False + If True and `parse_dates` specifies combining multiple columns then + keep the original columns. +date_parser : function, optional + Function to use for converting a sequence of string columns to an array of + datetime instances. The default uses ``dateutil.parser.parser`` to do the + conversion. Pandas will try to call `date_parser` in three different ways, + advancing to the next if an exception occurs: 1) Pass one or more arrays + (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the + string values from the columns defined by `parse_dates` into a single array + and pass that; and 3) call `date_parser` once for each row using one or + more strings (corresponding to the columns defined by `parse_dates`) as + arguments. +dayfirst : bool, default False + DD/MM format dates, international and European format. +cache_dates : bool, default True + If True, use a cache of unique, converted dates to apply the datetime + conversion. May produce significant speed-up when parsing duplicate + date strings, especially ones with timezone offsets. + + .. versionadded:: 0.25.0 +iterator : bool, default False + Return TextFileReader object for iteration or getting chunks with + ``get_chunk()``. + + .. versionchanged:: 1.2 + + ``TextFileReader`` is a context manager. +chunksize : int, optional + Return TextFileReader object for iteration. + See the `IO Tools docs + `_ + for more information on ``iterator`` and ``chunksize``. + + .. versionchanged:: 1.2 + + ``TextFileReader`` is a context manager. +{decompression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + +thousands : str, optional + Thousands separator. +decimal : str, default '.' + Character to recognize as decimal point (e.g. use ',' for European data). +lineterminator : str (length 1), optional + Character to break file into lines. Only valid with C parser. +quotechar : str (length 1), optional + The character used to denote the start and end of a quoted item. Quoted + items can include the delimiter and it will be ignored. +quoting : int or csv.QUOTE_* instance, default 0 + Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of + QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3). +doublequote : bool, default ``True`` + When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate + whether or not to interpret two consecutive quotechar elements INSIDE a + field as a single ``quotechar`` element. +escapechar : str (length 1), optional + One-character string used to escape other characters. +comment : str, optional + Indicates remainder of line should not be parsed. If found at the beginning + of a line, the line will be ignored altogether. This parameter must be a + single character. Like empty lines (as long as ``skip_blank_lines=True``), + fully commented lines are ignored by the parameter `header` but not by + `skiprows`. For example, if ``comment='#'``, parsing + ``#empty\\na,b,c\\n1,2,3`` with ``header=0`` will result in 'a,b,c' being + treated as the header. +encoding : str, optional + Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python + standard encodings + `_ . + + .. versionchanged:: 1.2 + + When ``encoding`` is ``None``, ``errors="replace"`` is passed to + ``open()``. Otherwise, ``errors="strict"`` is passed to ``open()``. + This behavior was previously only the case for ``engine="python"``. + + .. versionchanged:: 1.3.0 + + ``encoding_errors`` is a new argument. ``encoding`` has no longer an + influence on how encoding errors are handled. + +encoding_errors : str, optional, default "strict" + How encoding errors are treated. `List of possible values + `_ . + + .. versionadded:: 1.3.0 + +dialect : str or csv.Dialect, optional + If provided, this parameter will override values (default or not) for the + following parameters: `delimiter`, `doublequote`, `escapechar`, + `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to + override values, a ParserWarning will be issued. See csv.Dialect + documentation for more details. +error_bad_lines : bool, optional, default ``None`` + Lines with too many fields (e.g. a csv line with too many commas) will by + default cause an exception to be raised, and no DataFrame will be returned. + If False, then these "bad lines" will be dropped from the DataFrame that is + returned. + + .. deprecated:: 1.3.0 + The ``on_bad_lines`` parameter should be used instead to specify behavior upon + encountering a bad line instead. +warn_bad_lines : bool, optional, default ``None`` + If error_bad_lines is False, and warn_bad_lines is True, a warning for each + "bad line" will be output. + + .. deprecated:: 1.3.0 + The ``on_bad_lines`` parameter should be used instead to specify behavior upon + encountering a bad line instead. +on_bad_lines : {{'error', 'warn', 'skip'}} or callable, default 'error' + Specifies what to do upon encountering a bad line (a line with too many fields). + Allowed values are : + + - 'error', raise an Exception when a bad line is encountered. + - 'warn', raise a warning when a bad line is encountered and skip that line. + - 'skip', skip bad lines without raising or warning when they are encountered. + + .. versionadded:: 1.3.0 + + - callable, function with signature + ``(bad_line: list[str]) -> list[str] | None`` that will process a single + bad line. ``bad_line`` is a list of strings split by the ``sep``. + If the function returns ``None``, the bad line will be ignored. + If the function returns a new list of strings with more elements than + expected, a ``ParserWarning`` will be emitted while dropping extra elements. + Only supported when ``engine="python"`` + + .. versionadded:: 1.4.0 + +delim_whitespace : bool, default False + Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be + used as the sep. Equivalent to setting ``sep='\\s+'``. If this option + is set to True, nothing should be passed in for the ``delimiter`` + parameter. +low_memory : bool, default True + Internally process the file in chunks, resulting in lower memory use + while parsing, but possibly mixed type inference. To ensure no mixed + types either set False, or specify the type with the `dtype` parameter. + Note that the entire file is read into a single DataFrame regardless, + use the `chunksize` or `iterator` parameter to return the data in chunks. + (Only valid with C parser). +memory_map : bool, default False + If a filepath is provided for `filepath_or_buffer`, map the file object + directly onto memory and access the data directly from there. Using this + option can improve performance because there is no longer any I/O overhead. +float_precision : str, optional + Specifies which converter the C engine should use for floating-point + values. The options are ``None`` or 'high' for the ordinary converter, + 'legacy' for the original lower precision pandas converter, and + 'round_trip' for the round-trip converter. + + .. versionchanged:: 1.2 + +{storage_options} + + .. versionadded:: 1.2 + +Returns +------- +DataFrame or TextParser + A comma-separated values (csv) file is returned as two-dimensional + data structure with labeled axes. + +See Also +-------- +DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file. +read_csv : Read a comma-separated values (csv) file into DataFrame. +read_fwf : Read a table of fixed-width formatted lines into DataFrame. + +Examples +-------- +>>> pd.{func_name}('data.csv') # doctest: +SKIP +""" +) + + +_c_parser_defaults = { + "delim_whitespace": False, + "na_filter": True, + "low_memory": True, + "memory_map": False, + "float_precision": None, +} + +_fwf_defaults = {"colspecs": "infer", "infer_nrows": 100, "widths": None} + +_c_unsupported = {"skipfooter"} +_python_unsupported = {"low_memory", "float_precision"} +_pyarrow_unsupported = { + "skipfooter", + "float_precision", + "chunksize", + "comment", + "nrows", + "thousands", + "memory_map", + "dialect", + "warn_bad_lines", + "error_bad_lines", + "on_bad_lines", + "delim_whitespace", + "quoting", + "lineterminator", + "converters", + "decimal", + "iterator", + "dayfirst", + "infer_datetime_format", + "verbose", + "skipinitialspace", + "low_memory", +} + + +class _DeprecationConfig(NamedTuple): + default_value: Any + msg: str | None + + +_deprecated_defaults: dict[str, _DeprecationConfig] = { + "error_bad_lines": _DeprecationConfig(None, "Use on_bad_lines in the future."), + "warn_bad_lines": _DeprecationConfig(None, "Use on_bad_lines in the future."), + "squeeze": _DeprecationConfig( + None, 'Append .squeeze("columns") to the call to squeeze.' + ), + "prefix": _DeprecationConfig( + None, "Use a list comprehension on the column names in the future." + ), +} + + +def validate_integer(name, val, min_val=0): + """ + Checks whether the 'name' parameter for parsing is either + an integer OR float that can SAFELY be cast to an integer + without losing accuracy. Raises a ValueError if that is + not the case. + + Parameters + ---------- + name : str + Parameter name (used for error reporting) + val : int or float + The value to check + min_val : int + Minimum allowed value (val < min_val will result in a ValueError) + """ + msg = f"'{name:s}' must be an integer >={min_val:d}" + + if val is not None: + if is_float(val): + if int(val) != val: + raise ValueError(msg) + val = int(val) + elif not (is_integer(val) and val >= min_val): + raise ValueError(msg) + + return val + + +def _validate_names(names): + """ + Raise ValueError if the `names` parameter contains duplicates or has an + invalid data type. + + Parameters + ---------- + names : array-like or None + An array containing a list of the names used for the output DataFrame. + + Raises + ------ + ValueError + If names are not unique or are not ordered (e.g. set). + """ + if names is not None: + if len(names) != len(set(names)): + raise ValueError("Duplicate names are not allowed.") + if not ( + is_list_like(names, allow_sets=False) or isinstance(names, abc.KeysView) + ): + raise ValueError("Names should be an ordered collection.") + + +def _read( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], kwds +): + """Generic reader of line files.""" + # if we pass a date_parser and parse_dates=False, we should not parse the + # dates GH#44366 + if ( + kwds.get("date_parser", None) is not None + and kwds.get("parse_dates", None) is None + ): + kwds["parse_dates"] = True + elif kwds.get("parse_dates", None) is None: + kwds["parse_dates"] = False + + # Extract some of the arguments (pass chunksize on). + iterator = kwds.get("iterator", False) + chunksize = kwds.get("chunksize", None) + if kwds.get("engine") == "pyarrow": + if iterator: + raise ValueError( + "The 'iterator' option is not supported with the 'pyarrow' engine" + ) + + if chunksize is not None: + raise ValueError( + "The 'chunksize' option is not supported with the 'pyarrow' engine" + ) + else: + chunksize = validate_integer("chunksize", kwds.get("chunksize", None), 1) + + nrows = kwds.get("nrows", None) + + # Check for duplicates in names. + _validate_names(kwds.get("names", None)) + + # Create the parser. + parser = TextFileReader(filepath_or_buffer, **kwds) + + if chunksize or iterator: + return parser + + with parser: + return parser.read(nrows) + + +@deprecate_nonkeyword_arguments( + version=None, allowed_args=["filepath_or_buffer"], stacklevel=3 +) +@Appender( + _doc_read_csv_and_table.format( + func_name="read_csv", + summary="Read a comma-separated values (csv) file into DataFrame.", + _default_sep="','", + storage_options=_shared_docs["storage_options"], + decompression_options=_shared_docs["decompression_options"], + ) +) +def read_csv( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + sep=lib.no_default, + delimiter=None, + # Column and Index Locations and Names + header="infer", + names=lib.no_default, + index_col=None, + usecols=None, + squeeze=None, + prefix=lib.no_default, + mangle_dupe_cols=True, + # General Parsing Configuration + dtype: DtypeArg | None = None, + engine: CSVEngine | None = None, + converters=None, + true_values=None, + false_values=None, + skipinitialspace=False, + skiprows=None, + skipfooter=0, + nrows=None, + # NA and Missing Data Handling + na_values=None, + keep_default_na=True, + na_filter=True, + verbose=False, + skip_blank_lines=True, + # Datetime Handling + parse_dates=None, + infer_datetime_format=False, + keep_date_col=False, + date_parser=None, + dayfirst=False, + cache_dates=True, + # Iteration + iterator=False, + chunksize=None, + # Quoting, Compression, and File Format + compression: CompressionOptions = "infer", + thousands=None, + decimal: str = ".", + lineterminator=None, + quotechar='"', + quoting=csv.QUOTE_MINIMAL, + doublequote=True, + escapechar=None, + comment=None, + encoding=None, + encoding_errors: str | None = "strict", + dialect=None, + # Error Handling + error_bad_lines=None, + warn_bad_lines=None, + # TODO(2.0): set on_bad_lines to "error". + # See _refine_defaults_read comment for why we do this. + on_bad_lines=None, + # Internal + delim_whitespace=False, + low_memory=_c_parser_defaults["low_memory"], + memory_map=False, + float_precision=None, + storage_options: StorageOptions = None, +): + # locals() should never be modified + kwds = locals().copy() + del kwds["filepath_or_buffer"] + del kwds["sep"] + + kwds_defaults = _refine_defaults_read( + dialect, + delimiter, + delim_whitespace, + engine, + sep, + error_bad_lines, + warn_bad_lines, + on_bad_lines, + names, + prefix, + defaults={"delimiter": ","}, + ) + kwds.update(kwds_defaults) + + return _read(filepath_or_buffer, kwds) + + +@deprecate_nonkeyword_arguments( + version=None, allowed_args=["filepath_or_buffer"], stacklevel=3 +) +@Appender( + _doc_read_csv_and_table.format( + func_name="read_table", + summary="Read general delimited file into DataFrame.", + _default_sep=r"'\\t' (tab-stop)", + storage_options=_shared_docs["storage_options"], + decompression_options=_shared_docs["decompression_options"], + ) +) +def read_table( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + sep=lib.no_default, + delimiter=None, + # Column and Index Locations and Names + header="infer", + names=lib.no_default, + index_col=None, + usecols=None, + squeeze=None, + prefix=lib.no_default, + mangle_dupe_cols=True, + # General Parsing Configuration + dtype: DtypeArg | None = None, + engine: CSVEngine | None = None, + converters=None, + true_values=None, + false_values=None, + skipinitialspace=False, + skiprows=None, + skipfooter=0, + nrows=None, + # NA and Missing Data Handling + na_values=None, + keep_default_na=True, + na_filter=True, + verbose=False, + skip_blank_lines=True, + # Datetime Handling + parse_dates=False, + infer_datetime_format=False, + keep_date_col=False, + date_parser=None, + dayfirst=False, + cache_dates=True, + # Iteration + iterator=False, + chunksize=None, + # Quoting, Compression, and File Format + compression: CompressionOptions = "infer", + thousands=None, + decimal: str = ".", + lineterminator=None, + quotechar='"', + quoting=csv.QUOTE_MINIMAL, + doublequote=True, + escapechar=None, + comment=None, + encoding=None, + encoding_errors: str | None = "strict", + dialect=None, + # Error Handling + error_bad_lines=None, + warn_bad_lines=None, + # TODO(2.0): set on_bad_lines to "error". + # See _refine_defaults_read comment for why we do this. + on_bad_lines=None, + # Internal + delim_whitespace=False, + low_memory=_c_parser_defaults["low_memory"], + memory_map=False, + float_precision=None, + storage_options: StorageOptions = None, +): + # locals() should never be modified + kwds = locals().copy() + del kwds["filepath_or_buffer"] + del kwds["sep"] + + kwds_defaults = _refine_defaults_read( + dialect, + delimiter, + delim_whitespace, + engine, + sep, + error_bad_lines, + warn_bad_lines, + on_bad_lines, + names, + prefix, + defaults={"delimiter": "\t"}, + ) + kwds.update(kwds_defaults) + + return _read(filepath_or_buffer, kwds) + + +@deprecate_nonkeyword_arguments( + version=None, allowed_args=["filepath_or_buffer"], stacklevel=2 +) +def read_fwf( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + colspecs: list[tuple[int, int]] | str | None = "infer", + widths: list[int] | None = None, + infer_nrows: int = 100, + **kwds, +) -> DataFrame | TextFileReader: + r""" + Read a table of fixed-width formatted lines into DataFrame. + + Also supports optionally iterating or breaking of the file + into chunks. + + Additional help can be found in the `online docs for IO Tools + `_. + + Parameters + ---------- + filepath_or_buffer : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a text ``read()`` function.The string could be a URL. + Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: + ``file://localhost/path/to/table.csv``. + colspecs : list of tuple (int, int) or 'infer'. optional + A list of tuples giving the extents of the fixed-width + fields of each line as half-open intervals (i.e., [from, to[ ). + String value 'infer' can be used to instruct the parser to try + detecting the column specifications from the first 100 rows of + the data which are not being skipped via skiprows (default='infer'). + widths : list of int, optional + A list of field widths which can be used instead of 'colspecs' if + the intervals are contiguous. + infer_nrows : int, default 100 + The number of rows to consider when letting the parser determine the + `colspecs`. + **kwds : optional + Optional keyword arguments can be passed to ``TextFileReader``. + + Returns + ------- + DataFrame or TextFileReader + A comma-separated values (csv) file is returned as two-dimensional + data structure with labeled axes. + + See Also + -------- + DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file. + read_csv : Read a comma-separated values (csv) file into DataFrame. + + Examples + -------- + >>> pd.read_fwf('data.csv') # doctest: +SKIP + """ + # Check input arguments. + if colspecs is None and widths is None: + raise ValueError("Must specify either colspecs or widths") + elif colspecs not in (None, "infer") and widths is not None: + raise ValueError("You must specify only one of 'widths' and 'colspecs'") + + # Compute 'colspecs' from 'widths', if specified. + if widths is not None: + colspecs, col = [], 0 + for w in widths: + colspecs.append((col, col + w)) + col += w + + # for mypy + assert colspecs is not None + + # GH#40830 + # Ensure length of `colspecs` matches length of `names` + names = kwds.get("names") + if names is not None: + if len(names) != len(colspecs) and colspecs != "infer": + # need to check len(index_col) as it might contain + # unnamed indices, in which case it's name is not required + len_index = 0 + if kwds.get("index_col") is not None: + index_col: Any = kwds.get("index_col") + if index_col is not False: + if not is_list_like(index_col): + len_index = 1 + else: + len_index = len(index_col) + if len(names) + len_index != len(colspecs): + raise ValueError("Length of colspecs must match length of names") + + kwds["colspecs"] = colspecs + kwds["infer_nrows"] = infer_nrows + kwds["engine"] = "python-fwf" + return _read(filepath_or_buffer, kwds) + + +class TextFileReader(abc.Iterator): + """ + + Passed dialect overrides any of the related parser options + + """ + + def __init__( + self, + f: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str] | list, + engine: CSVEngine | None = None, + **kwds, + ): + if engine is not None: + engine_specified = True + else: + engine = "python" + engine_specified = False + self.engine = engine + self._engine_specified = kwds.get("engine_specified", engine_specified) + + _validate_skipfooter(kwds) + + dialect = _extract_dialect(kwds) + if dialect is not None: + if engine == "pyarrow": + raise ValueError( + "The 'dialect' option is not supported with the 'pyarrow' engine" + ) + kwds = _merge_with_dialect_properties(dialect, kwds) + + if kwds.get("header", "infer") == "infer": + kwds["header"] = 0 if kwds.get("names") is None else None + + self.orig_options = kwds + + # miscellanea + self._currow = 0 + + options = self._get_options_with_defaults(engine) + options["storage_options"] = kwds.get("storage_options", None) + + self.chunksize = options.pop("chunksize", None) + self.nrows = options.pop("nrows", None) + + self._check_file_or_buffer(f, engine) + self.options, self.engine = self._clean_options(options, engine) + + self.squeeze = self.options.pop("squeeze", False) + + if "has_index_names" in kwds: + self.options["has_index_names"] = kwds["has_index_names"] + + self.handles: IOHandles | None = None + self._engine = self._make_engine(f, self.engine) + + def close(self): + if self.handles is not None: + self.handles.close() + self._engine.close() + + def _get_options_with_defaults(self, engine): + kwds = self.orig_options + + options = {} + default: object | None + + for argname, default in parser_defaults.items(): + value = kwds.get(argname, default) + + # see gh-12935 + if ( + engine == "pyarrow" + and argname in _pyarrow_unsupported + and value != default + and value != getattr(value, "value", default) + ): + if ( + argname == "on_bad_lines" + and kwds.get("error_bad_lines") is not None + ): + argname = "error_bad_lines" + elif ( + argname == "on_bad_lines" and kwds.get("warn_bad_lines") is not None + ): + argname = "warn_bad_lines" + + raise ValueError( + f"The {repr(argname)} option is not supported with the " + f"'pyarrow' engine" + ) + elif argname == "mangle_dupe_cols" and value is False: + # GH12935 + raise ValueError("Setting mangle_dupe_cols=False is not supported yet") + else: + options[argname] = value + + for argname, default in _c_parser_defaults.items(): + if argname in kwds: + value = kwds[argname] + + if engine != "c" and value != default: + if "python" in engine and argname not in _python_unsupported: + pass + elif ( + value + == _deprecated_defaults.get( + argname, _DeprecationConfig(default, None) + ).default_value + ): + pass + else: + raise ValueError( + f"The {repr(argname)} option is not supported with the " + f"{repr(engine)} engine" + ) + else: + value = _deprecated_defaults.get( + argname, _DeprecationConfig(default, None) + ).default_value + options[argname] = value + + if engine == "python-fwf": + for argname, default in _fwf_defaults.items(): + options[argname] = kwds.get(argname, default) + + return options + + def _check_file_or_buffer(self, f, engine): + # see gh-16530 + if is_file_like(f) and engine != "c" and not hasattr(f, "__iter__"): + # The C engine doesn't need the file-like to have the "__iter__" + # attribute. However, the Python engine needs "__iter__(...)" + # when iterating through such an object, meaning it + # needs to have that attribute + raise ValueError( + "The 'python' engine cannot iterate through this file buffer." + ) + + def _clean_options(self, options, engine): + result = options.copy() + + fallback_reason = None + + # C engine not supported yet + if engine == "c": + if options["skipfooter"] > 0: + fallback_reason = "the 'c' engine does not support skipfooter" + engine = "python" + + sep = options["delimiter"] + delim_whitespace = options["delim_whitespace"] + + if sep is None and not delim_whitespace: + if engine in ("c", "pyarrow"): + fallback_reason = ( + f"the '{engine}' engine does not support " + "sep=None with delim_whitespace=False" + ) + engine = "python" + elif sep is not None and len(sep) > 1: + if engine == "c" and sep == r"\s+": + result["delim_whitespace"] = True + del result["delimiter"] + elif engine not in ("python", "python-fwf"): + # wait until regex engine integrated + fallback_reason = ( + f"the '{engine}' engine does not support " + "regex separators (separators > 1 char and " + r"different from '\s+' are interpreted as regex)" + ) + engine = "python" + elif delim_whitespace: + if "python" in engine: + result["delimiter"] = r"\s+" + elif sep is not None: + encodeable = True + encoding = sys.getfilesystemencoding() or "utf-8" + try: + if len(sep.encode(encoding)) > 1: + encodeable = False + except UnicodeDecodeError: + encodeable = False + if not encodeable and engine not in ("python", "python-fwf"): + fallback_reason = ( + f"the separator encoded in {encoding} " + f"is > 1 char long, and the '{engine}' engine " + "does not support such separators" + ) + engine = "python" + + quotechar = options["quotechar"] + if quotechar is not None and isinstance(quotechar, (str, bytes)): + if ( + len(quotechar) == 1 + and ord(quotechar) > 127 + and engine not in ("python", "python-fwf") + ): + fallback_reason = ( + "ord(quotechar) > 127, meaning the " + "quotechar is larger than one byte, " + f"and the '{engine}' engine does not support such quotechars" + ) + engine = "python" + + if fallback_reason and self._engine_specified: + raise ValueError(fallback_reason) + + if engine == "c": + for arg in _c_unsupported: + del result[arg] + + if "python" in engine: + for arg in _python_unsupported: + if fallback_reason and result[arg] != _c_parser_defaults[arg]: + raise ValueError( + "Falling back to the 'python' engine because " + f"{fallback_reason}, but this causes {repr(arg)} to be " + "ignored as it is not supported by the 'python' engine." + ) + del result[arg] + + if fallback_reason: + warnings.warn( + ( + "Falling back to the 'python' engine because " + f"{fallback_reason}; you can avoid this warning by specifying " + "engine='python'." + ), + ParserWarning, + stacklevel=find_stack_level(), + ) + + index_col = options["index_col"] + names = options["names"] + converters = options["converters"] + na_values = options["na_values"] + skiprows = options["skiprows"] + + validate_header_arg(options["header"]) + + for arg in _deprecated_defaults.keys(): + parser_default = _c_parser_defaults.get(arg, parser_defaults[arg]) + depr_default = _deprecated_defaults[arg] + if result.get(arg, depr_default) != depr_default.default_value: + msg = ( + f"The {arg} argument has been deprecated and will be " + f"removed in a future version. {depr_default.msg}\n\n" + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + else: + result[arg] = parser_default + + if index_col is True: + raise ValueError("The value of index_col couldn't be 'True'") + if is_index_col(index_col): + if not isinstance(index_col, (list, tuple, np.ndarray)): + index_col = [index_col] + result["index_col"] = index_col + + names = list(names) if names is not None else names + + # type conversion-related + if converters is not None: + if not isinstance(converters, dict): + raise TypeError( + "Type converters must be a dict or subclass, " + f"input was a {type(converters).__name__}" + ) + else: + converters = {} + + # Converting values to NA + keep_default_na = options["keep_default_na"] + na_values, na_fvalues = _clean_na_values(na_values, keep_default_na) + + # handle skiprows; this is internally handled by the + # c-engine, so only need for python and pyarrow parsers + if engine == "pyarrow": + if not is_integer(skiprows) and skiprows is not None: + # pyarrow expects skiprows to be passed as an integer + raise ValueError( + "skiprows argument must be an integer when using " + "engine='pyarrow'" + ) + else: + if is_integer(skiprows): + skiprows = list(range(skiprows)) + if skiprows is None: + skiprows = set() + elif not callable(skiprows): + skiprows = set(skiprows) + + # put stuff back + result["names"] = names + result["converters"] = converters + result["na_values"] = na_values + result["na_fvalues"] = na_fvalues + result["skiprows"] = skiprows + # Default for squeeze is none since we need to check + # if user sets it. We then set to False to preserve + # previous behavior. + result["squeeze"] = False if options["squeeze"] is None else options["squeeze"] + + return result, engine + + def __next__(self): + try: + return self.get_chunk() + except StopIteration: + self.close() + raise + + def _make_engine( + self, + f: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str] | list | IO, + engine: CSVEngine = "c", + ): + mapping: dict[str, type[ParserBase]] = { + "c": CParserWrapper, + "python": PythonParser, + "pyarrow": ArrowParserWrapper, + "python-fwf": FixedWidthFieldParser, + } + if engine not in mapping: + raise ValueError( + f"Unknown engine: {engine} (valid options are {mapping.keys()})" + ) + if not isinstance(f, list): + # open file here + is_text = True + mode = "r" + if engine == "pyarrow": + is_text = False + mode = "rb" + # error: No overload variant of "get_handle" matches argument types + # "Union[str, PathLike[str], ReadCsvBuffer[bytes], ReadCsvBuffer[str]]" + # , "str", "bool", "Any", "Any", "Any", "Any", "Any" + self.handles = get_handle( # type: ignore[call-overload] + f, + mode, + encoding=self.options.get("encoding", None), + compression=self.options.get("compression", None), + memory_map=self.options.get("memory_map", False), + is_text=is_text, + errors=self.options.get("encoding_errors", "strict"), + storage_options=self.options.get("storage_options", None), + ) + assert self.handles is not None + f = self.handles.handle + + try: + return mapping[engine](f, **self.options) + except Exception: + if self.handles is not None: + self.handles.close() + raise + + def _failover_to_python(self): + raise AbstractMethodError(self) + + def read(self, nrows=None): + if self.engine == "pyarrow": + try: + df = self._engine.read() + except Exception: + self.close() + raise + else: + nrows = validate_integer("nrows", nrows) + try: + index, columns, col_dict = self._engine.read(nrows) + except Exception: + self.close() + raise + + if index is None: + if col_dict: + # Any column is actually fine: + new_rows = len(next(iter(col_dict.values()))) + index = RangeIndex(self._currow, self._currow + new_rows) + else: + new_rows = 0 + else: + new_rows = len(index) + + df = DataFrame(col_dict, columns=columns, index=index) + + self._currow += new_rows + + if self.squeeze and len(df.columns) == 1: + return df.squeeze("columns").copy() + return df + + def get_chunk(self, size=None): + if size is None: + size = self.chunksize + if self.nrows is not None: + if self._currow >= self.nrows: + raise StopIteration + size = min(size, self.nrows - self._currow) + return self.read(nrows=size) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + +def TextParser(*args, **kwds): + """ + Converts lists of lists/tuples into DataFrames with proper type inference + and optional (e.g. string to datetime) conversion. Also enables iterating + lazily over chunks of large files + + Parameters + ---------- + data : file-like object or list + delimiter : separator character to use + dialect : str or csv.Dialect instance, optional + Ignored if delimiter is longer than 1 character + names : sequence, default + header : int, default 0 + Row to use to parse column labels. Defaults to the first row. Prior + rows will be discarded + index_col : int or list, optional + Column or columns to use as the (possibly hierarchical) index + has_index_names: bool, default False + True if the cols defined in index_col have an index name and are + not in the header. + na_values : scalar, str, list-like, or dict, optional + Additional strings to recognize as NA/NaN. + keep_default_na : bool, default True + thousands : str, optional + Thousands separator + comment : str, optional + Comment out remainder of line + parse_dates : bool, default False + keep_date_col : bool, default False + date_parser : function, optional + skiprows : list of integers + Row numbers to skip + skipfooter : int + Number of line at bottom of file to skip + converters : dict, optional + Dict of functions for converting values in certain columns. Keys can + either be integers or column labels, values are functions that take one + input argument, the cell (not column) content, and return the + transformed content. + encoding : str, optional + Encoding to use for UTF when reading/writing (ex. 'utf-8') + squeeze : bool, default False + returns Series if only one column. + infer_datetime_format: bool, default False + If True and `parse_dates` is True for a column, try to infer the + datetime format based on the first datetime string. If the format + can be inferred, there often will be a large parsing speed-up. + float_precision : str, optional + Specifies which converter the C engine should use for floating-point + values. The options are `None` or `high` for the ordinary converter, + `legacy` for the original lower precision pandas converter, and + `round_trip` for the round-trip converter. + + .. versionchanged:: 1.2 + """ + kwds["engine"] = "python" + return TextFileReader(*args, **kwds) + + +def _clean_na_values(na_values, keep_default_na=True): + na_fvalues: set | dict + if na_values is None: + if keep_default_na: + na_values = STR_NA_VALUES + else: + na_values = set() + na_fvalues = set() + elif isinstance(na_values, dict): + old_na_values = na_values.copy() + na_values = {} # Prevent aliasing. + + # Convert the values in the na_values dictionary + # into array-likes for further use. This is also + # where we append the default NaN values, provided + # that `keep_default_na=True`. + for k, v in old_na_values.items(): + if not is_list_like(v): + v = [v] + + if keep_default_na: + v = set(v) | STR_NA_VALUES + + na_values[k] = v + na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()} + else: + if not is_list_like(na_values): + na_values = [na_values] + na_values = _stringify_na_values(na_values) + if keep_default_na: + na_values = na_values | STR_NA_VALUES + + na_fvalues = _floatify_na_values(na_values) + + return na_values, na_fvalues + + +def _floatify_na_values(na_values): + # create float versions of the na_values + result = set() + for v in na_values: + try: + v = float(v) + if not np.isnan(v): + result.add(v) + except (TypeError, ValueError, OverflowError): + pass + return result + + +def _stringify_na_values(na_values): + """return a stringified and numeric for these values""" + result: list[int | str | float] = [] + for x in na_values: + result.append(str(x)) + result.append(x) + try: + v = float(x) + + # we are like 999 here + if v == int(v): + v = int(v) + result.append(f"{v}.0") + result.append(str(v)) + + result.append(v) + except (TypeError, ValueError, OverflowError): + pass + try: + result.append(int(x)) + except (TypeError, ValueError, OverflowError): + pass + return set(result) + + +def _refine_defaults_read( + dialect: str | csv.Dialect, + delimiter: str | object, + delim_whitespace: bool, + engine: CSVEngine | None, + sep: str | object, + error_bad_lines: bool | None, + warn_bad_lines: bool | None, + on_bad_lines: str | Callable | None, + names: ArrayLike | None | object, + prefix: str | None | object, + defaults: dict[str, Any], +): + """Validate/refine default values of input parameters of read_csv, read_table. + + Parameters + ---------- + dialect : str or csv.Dialect + If provided, this parameter will override values (default or not) for the + following parameters: `delimiter`, `doublequote`, `escapechar`, + `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to + override values, a ParserWarning will be issued. See csv.Dialect + documentation for more details. + delimiter : str or object + Alias for sep. + delim_whitespace : bool + Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be + used as the sep. Equivalent to setting ``sep='\\s+'``. If this option + is set to True, nothing should be passed in for the ``delimiter`` + parameter. + engine : {{'c', 'python'}} + Parser engine to use. The C engine is faster while the python engine is + currently more feature-complete. + sep : str or object + A delimiter provided by the user (str) or a sentinel value, i.e. + pandas._libs.lib.no_default. + error_bad_lines : str or None + Whether to error on a bad line or not. + warn_bad_lines : str or None + Whether to warn on a bad line or not. + on_bad_lines : str, callable or None + An option for handling bad lines or a sentinel value(None). + names : array-like, optional + List of column names to use. If the file contains a header row, + then you should explicitly pass ``header=0`` to override the column names. + Duplicates in this list are not allowed. + prefix : str, optional + Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... + defaults: dict + Default values of input parameters. + + Returns + ------- + kwds : dict + Input parameters with correct values. + + Raises + ------ + ValueError : + If a delimiter was specified with ``sep`` (or ``delimiter``) and + ``delim_whitespace=True``. + If on_bad_lines is specified(not ``None``) and ``error_bad_lines``/ + ``warn_bad_lines`` is True. + """ + # fix types for sep, delimiter to Union(str, Any) + delim_default = defaults["delimiter"] + kwds: dict[str, Any] = {} + # gh-23761 + # + # When a dialect is passed, it overrides any of the overlapping + # parameters passed in directly. We don't want to warn if the + # default parameters were passed in (since it probably means + # that the user didn't pass them in explicitly in the first place). + # + # "delimiter" is the annoying corner case because we alias it to + # "sep" before doing comparison to the dialect values later on. + # Thus, we need a flag to indicate that we need to "override" + # the comparison to dialect values by checking if default values + # for BOTH "delimiter" and "sep" were provided. + if dialect is not None: + kwds["sep_override"] = delimiter is None and ( + sep is lib.no_default or sep == delim_default + ) + + if delimiter and (sep is not lib.no_default): + raise ValueError("Specified a sep and a delimiter; you can only specify one.") + + if ( + names is not None + and names is not lib.no_default + and prefix is not None + and prefix is not lib.no_default + ): + raise ValueError("Specified named and prefix; you can only specify one.") + + kwds["names"] = None if names is lib.no_default else names + kwds["prefix"] = None if prefix is lib.no_default else prefix + + # Alias sep -> delimiter. + if delimiter is None: + delimiter = sep + + if delim_whitespace and (delimiter is not lib.no_default): + raise ValueError( + "Specified a delimiter with both sep and " + "delim_whitespace=True; you can only specify one." + ) + + if delimiter == "\n": + raise ValueError( + r"Specified \n as separator or delimiter. This forces the python engine " + "which does not accept a line terminator. Hence it is not allowed to use " + "the line terminator as separator.", + ) + + if delimiter is lib.no_default: + # assign default separator value + kwds["delimiter"] = delim_default + else: + kwds["delimiter"] = delimiter + + if engine is not None: + kwds["engine_specified"] = True + else: + kwds["engine"] = "c" + kwds["engine_specified"] = False + + # Ensure that on_bad_lines and error_bad_lines/warn_bad_lines + # aren't specified at the same time. If so, raise. Otherwise, + # alias on_bad_lines to "error" if error/warn_bad_lines not set + # and on_bad_lines is not set. on_bad_lines is defaulted to None + # so we can tell if it is set (this is why this hack exists). + if on_bad_lines is not None: + if error_bad_lines is not None or warn_bad_lines is not None: + raise ValueError( + "Both on_bad_lines and error_bad_lines/warn_bad_lines are set. " + "Please only set on_bad_lines." + ) + if on_bad_lines == "error": + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.ERROR + elif on_bad_lines == "warn": + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.WARN + elif on_bad_lines == "skip": + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.SKIP + elif callable(on_bad_lines): + if engine != "python": + raise ValueError( + "on_bad_line can only be a callable function if engine='python'" + ) + kwds["on_bad_lines"] = on_bad_lines + else: + raise ValueError(f"Argument {on_bad_lines} is invalid for on_bad_lines") + else: + if error_bad_lines is not None: + # Must check is_bool, because other stuff(e.g. non-empty lists) eval to true + validate_bool_kwarg(error_bad_lines, "error_bad_lines") + if error_bad_lines: + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.ERROR + else: + if warn_bad_lines is not None: + # This is the case where error_bad_lines is False + # We can only warn/skip if error_bad_lines is False + # None doesn't work because backwards-compatibility reasons + validate_bool_kwarg(warn_bad_lines, "warn_bad_lines") + if warn_bad_lines: + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.WARN + else: + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.SKIP + else: + # Backwards compat, when only error_bad_lines = false, we warn + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.WARN + else: + # Everything None -> Error + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.ERROR + + return kwds + + +def _extract_dialect(kwds: dict[str, Any]) -> csv.Dialect | None: + """ + Extract concrete csv dialect instance. + + Returns + ------- + csv.Dialect or None + """ + if kwds.get("dialect") is None: + return None + + dialect = kwds["dialect"] + if dialect in csv.list_dialects(): + dialect = csv.get_dialect(dialect) + + _validate_dialect(dialect) + + return dialect + + +MANDATORY_DIALECT_ATTRS = ( + "delimiter", + "doublequote", + "escapechar", + "skipinitialspace", + "quotechar", + "quoting", +) + + +def _validate_dialect(dialect: csv.Dialect) -> None: + """ + Validate csv dialect instance. + + Raises + ------ + ValueError + If incorrect dialect is provided. + """ + for param in MANDATORY_DIALECT_ATTRS: + if not hasattr(dialect, param): + raise ValueError(f"Invalid dialect {dialect} provided") + + +def _merge_with_dialect_properties( + dialect: csv.Dialect, + defaults: dict[str, Any], +) -> dict[str, Any]: + """ + Merge default kwargs in TextFileReader with dialect parameters. + + Parameters + ---------- + dialect : csv.Dialect + Concrete csv dialect. See csv.Dialect documentation for more details. + defaults : dict + Keyword arguments passed to TextFileReader. + + Returns + ------- + kwds : dict + Updated keyword arguments, merged with dialect parameters. + """ + kwds = defaults.copy() + + for param in MANDATORY_DIALECT_ATTRS: + dialect_val = getattr(dialect, param) + + parser_default = parser_defaults[param] + provided = kwds.get(param, parser_default) + + # Messages for conflicting values between the dialect + # instance and the actual parameters provided. + conflict_msgs = [] + + # Don't warn if the default parameter was passed in, + # even if it conflicts with the dialect (gh-23761). + if provided != parser_default and provided != dialect_val: + msg = ( + f"Conflicting values for '{param}': '{provided}' was " + f"provided, but the dialect specifies '{dialect_val}'. " + "Using the dialect-specified value." + ) + + # Annoying corner case for not warning about + # conflicts between dialect and delimiter parameter. + # Refer to the outer "_read_" function for more info. + if not (param == "delimiter" and kwds.pop("sep_override", False)): + conflict_msgs.append(msg) + + if conflict_msgs: + warnings.warn( + "\n\n".join(conflict_msgs), ParserWarning, stacklevel=find_stack_level() + ) + kwds[param] = dialect_val + return kwds + + +def _validate_skipfooter(kwds: dict[str, Any]) -> None: + """ + Check whether skipfooter is compatible with other kwargs in TextFileReader. + + Parameters + ---------- + kwds : dict + Keyword arguments passed to TextFileReader. + + Raises + ------ + ValueError + If skipfooter is not compatible with other parameters. + """ + if kwds.get("skipfooter"): + if kwds.get("iterator") or kwds.get("chunksize"): + raise ValueError("'skipfooter' not supported for iteration") + if kwds.get("nrows"): + raise ValueError("'skipfooter' not supported with 'nrows'") diff --git a/.local/lib/python3.8/site-packages/pandas/io/pickle.py b/.local/lib/python3.8/site-packages/pandas/io/pickle.py new file mode 100644 index 0000000..2928d8c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/pickle.py @@ -0,0 +1,213 @@ +""" pickle compat """ +from __future__ import annotations + +import pickle +from typing import Any +import warnings + +from pandas._typing import ( + CompressionOptions, + FilePath, + ReadPickleBuffer, + StorageOptions, + WriteBuffer, +) +from pandas.compat import pickle_compat as pc +from pandas.util._decorators import doc + +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import get_handle + + +@doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "filepath_or_buffer", +) +def to_pickle( + obj: Any, + filepath_or_buffer: FilePath | WriteBuffer[bytes], + compression: CompressionOptions = "infer", + protocol: int = pickle.HIGHEST_PROTOCOL, + storage_options: StorageOptions = None, +) -> None: + """ + Pickle (serialize) object to file. + + Parameters + ---------- + obj : any object + Any python object. + filepath_or_buffer : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``write()`` function. + + .. versionchanged:: 1.0.0 + Accept URL. URL has to be of S3 or GCS. + {compression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + protocol : int + Int which indicates which protocol should be used by the pickler, + default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible + values for this parameter depend on the version of Python. For Python + 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value. + For Python >= 3.4, 4 is a valid value. A negative value for the + protocol parameter is equivalent to setting its value to + HIGHEST_PROTOCOL. + + {storage_options} + + .. versionadded:: 1.2.0 + + .. [1] https://docs.python.org/3/library/pickle.html + + See Also + -------- + read_pickle : Load pickled pandas object (or any object) from file. + DataFrame.to_hdf : Write DataFrame to an HDF5 file. + DataFrame.to_sql : Write DataFrame to a SQL database. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + + Examples + -------- + >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP + >>> original_df # doctest: +SKIP + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP + + >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP + >>> unpickled_df # doctest: +SKIP + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + """ # noqa: E501 + if protocol < 0: + protocol = pickle.HIGHEST_PROTOCOL + + with get_handle( + filepath_or_buffer, + "wb", + compression=compression, + is_text=False, + storage_options=storage_options, + ) as handles: + if handles.compression["method"] in ("bz2", "xz") and protocol >= 5: + # some weird TypeError GH#39002 with pickle 5: fallback to letting + # pickle create the entire object and then write it to the buffer. + # "zip" would also be here if pandas.io.common._BytesZipFile + # wouldn't buffer write calls + handles.handle.write(pickle.dumps(obj, protocol=protocol)) + else: + # letting pickle write directly to the buffer is more memory-efficient + pickle.dump(obj, handles.handle, protocol=protocol) + + +@doc( + storage_options=_shared_docs["storage_options"], + decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer", +) +def read_pickle( + filepath_or_buffer: FilePath | ReadPickleBuffer, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, +): + """ + Load pickled pandas object (or any object) from file. + + .. warning:: + + Loading pickled data received from untrusted sources can be + unsafe. See `here `__. + + Parameters + ---------- + filepath_or_buffer : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``readlines()`` function. + + .. versionchanged:: 1.0.0 + Accept URL. URL is not limited to S3 and GCS. + + {decompression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + {storage_options} + + .. versionadded:: 1.2.0 + + Returns + ------- + unpickled : same type as object stored in file + + See Also + -------- + DataFrame.to_pickle : Pickle (serialize) DataFrame object to file. + Series.to_pickle : Pickle (serialize) Series object to file. + read_hdf : Read HDF5 file into a DataFrame. + read_sql : Read SQL query or database table into a DataFrame. + read_parquet : Load a parquet object, returning a DataFrame. + + Notes + ----- + read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3. + + Examples + -------- + >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP + >>> original_df # doctest: +SKIP + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP + + >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP + >>> unpickled_df # doctest: +SKIP + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + """ # noqa: E501 + excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError, TypeError) + with get_handle( + filepath_or_buffer, + "rb", + compression=compression, + is_text=False, + storage_options=storage_options, + ) as handles: + + # 1) try standard library Pickle + # 2) try pickle_compat (older pandas version) to handle subclass changes + # 3) try pickle_compat with latin-1 encoding upon a UnicodeDecodeError + + try: + # TypeError for Cython complaints about object.__new__ vs Tick.__new__ + try: + with warnings.catch_warnings(record=True): + # We want to silence any warnings about, e.g. moved modules. + warnings.simplefilter("ignore", Warning) + return pickle.load(handles.handle) + except excs_to_catch: + # e.g. + # "No module named 'pandas.core.sparse.series'" + # "Can't get attribute '__nat_unpickle' on %s,key->%s] [items->%s] +""" + +# formats +_FORMAT_MAP = {"f": "fixed", "fixed": "fixed", "t": "table", "table": "table"} + +# axes map +_AXES_MAP = {DataFrame: [0]} + +# register our configuration options +dropna_doc = """ +: boolean + drop ALL nan rows when appending to a table +""" +format_doc = """ +: format + default format writing format, if None, then + put will default to 'fixed' and append will default to 'table' +""" + +with config.config_prefix("io.hdf"): + config.register_option("dropna_table", False, dropna_doc, validator=config.is_bool) + config.register_option( + "default_format", + None, + format_doc, + validator=config.is_one_of_factory(["fixed", "table", None]), + ) + +# oh the troubles to reduce import time +_table_mod = None +_table_file_open_policy_is_strict = False + + +def _tables(): + global _table_mod + global _table_file_open_policy_is_strict + if _table_mod is None: + import tables + + _table_mod = tables + + # set the file open policy + # return the file open policy; this changes as of pytables 3.1 + # depending on the HDF5 version + with suppress(AttributeError): + _table_file_open_policy_is_strict = ( + tables.file._FILE_OPEN_POLICY == "strict" + ) + + return _table_mod + + +# interface to/from ### + + +def to_hdf( + path_or_buf, + key: str, + value: DataFrame | Series, + mode: str = "a", + complevel: int | None = None, + complib: str | None = None, + append: bool = False, + format: str | None = None, + index: bool = True, + min_itemsize: int | dict[str, int] | None = None, + nan_rep=None, + dropna: bool | None = None, + data_columns: Literal[True] | list[str] | None = None, + errors: str = "strict", + encoding: str = "UTF-8", +) -> None: + """store this object, close it if we opened it""" + if append: + f = lambda store: store.append( + key, + value, + format=format, + index=index, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + dropna=dropna, + data_columns=data_columns, + errors=errors, + encoding=encoding, + ) + else: + # NB: dropna is not passed to `put` + f = lambda store: store.put( + key, + value, + format=format, + index=index, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + data_columns=data_columns, + errors=errors, + encoding=encoding, + dropna=dropna, + ) + + path_or_buf = stringify_path(path_or_buf) + if isinstance(path_or_buf, str): + with HDFStore( + path_or_buf, mode=mode, complevel=complevel, complib=complib + ) as store: + f(store) + else: + f(path_or_buf) + + +def read_hdf( + path_or_buf, + key=None, + mode: str = "r", + errors: str = "strict", + where=None, + start: int | None = None, + stop: int | None = None, + columns=None, + iterator=False, + chunksize: int | None = None, + **kwargs, +): + """ + Read from the store, close it if we opened it. + + Retrieve pandas object stored in file, optionally based on where + criteria. + + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + + Parameters + ---------- + path_or_buf : str, path object, pandas.HDFStore + Any valid string path is acceptable. Only supports the local file system, + remote URLs and file-like objects are not supported. + + If you want to pass in a path object, pandas accepts any + ``os.PathLike``. + + Alternatively, pandas accepts an open :class:`pandas.HDFStore` object. + + key : object, optional + The group identifier in the store. Can be omitted if the HDF file + contains a single pandas object. + mode : {'r', 'r+', 'a'}, default 'r' + Mode to use when opening the file. Ignored if path_or_buf is a + :class:`pandas.HDFStore`. Default is 'r'. + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. + where : list, optional + A list of Term (or convertible) objects. + start : int, optional + Row number to start selection. + stop : int, optional + Row number to stop selection. + columns : list, optional + A list of columns names to return. + iterator : bool, optional + Return an iterator object. + chunksize : int, optional + Number of rows to include in an iteration when using an iterator. + **kwargs + Additional keyword arguments passed to HDFStore. + + Returns + ------- + item : object + The selected object. Return type depends on the object stored. + + See Also + -------- + DataFrame.to_hdf : Write a HDF file from a DataFrame. + HDFStore : Low-level access to HDF files. + + Examples + -------- + >>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z']) # doctest: +SKIP + >>> df.to_hdf('./store.h5', 'data') # doctest: +SKIP + >>> reread = pd.read_hdf('./store.h5') # doctest: +SKIP + """ + if mode not in ["r", "r+", "a"]: + raise ValueError( + f"mode {mode} is not allowed while performing a read. " + f"Allowed modes are r, r+ and a." + ) + # grab the scope + if where is not None: + where = _ensure_term(where, scope_level=1) + + if isinstance(path_or_buf, HDFStore): + if not path_or_buf.is_open: + raise OSError("The HDFStore must be open for reading.") + + store = path_or_buf + auto_close = False + else: + path_or_buf = stringify_path(path_or_buf) + if not isinstance(path_or_buf, str): + raise NotImplementedError( + "Support for generic buffers has not been implemented." + ) + try: + exists = os.path.exists(path_or_buf) + + # if filepath is too long + except (TypeError, ValueError): + exists = False + + if not exists: + raise FileNotFoundError(f"File {path_or_buf} does not exist") + + store = HDFStore(path_or_buf, mode=mode, errors=errors, **kwargs) + # can't auto open/close if we are using an iterator + # so delegate to the iterator + auto_close = True + + try: + if key is None: + groups = store.groups() + if len(groups) == 0: + raise ValueError( + "Dataset(s) incompatible with Pandas data types, " + "not table, or no datasets found in HDF5 file." + ) + candidate_only_group = groups[0] + + # For the HDF file to have only one dataset, all other groups + # should then be metadata groups for that candidate group. (This + # assumes that the groups() method enumerates parent groups + # before their children.) + for group_to_check in groups[1:]: + if not _is_metadata_of(group_to_check, candidate_only_group): + raise ValueError( + "key must be provided when HDF5 " + "file contains multiple datasets." + ) + key = candidate_only_group._v_pathname + return store.select( + key, + where=where, + start=start, + stop=stop, + columns=columns, + iterator=iterator, + chunksize=chunksize, + auto_close=auto_close, + ) + except (ValueError, TypeError, KeyError): + if not isinstance(path_or_buf, HDFStore): + # if there is an error, close the store if we opened it. + with suppress(AttributeError): + store.close() + + raise + + +def _is_metadata_of(group: Node, parent_group: Node) -> bool: + """Check if a given group is a metadata group for a given parent_group.""" + if group._v_depth <= parent_group._v_depth: + return False + + current = group + while current._v_depth > 1: + parent = current._v_parent + if parent == parent_group and current._v_name == "meta": + return True + current = current._v_parent + return False + + +class HDFStore: + """ + Dict-like IO interface for storing pandas objects in PyTables. + + Either Fixed or Table format. + + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + + Parameters + ---------- + path : str + File path to HDF5 file. + mode : {'a', 'w', 'r', 'r+'}, default 'a' + + ``'r'`` + Read-only; no data can be modified. + ``'w'`` + Write; a new file is created (an existing file with the same + name would be deleted). + ``'a'`` + Append; an existing file is opened for reading and writing, + and if the file does not exist it is created. + ``'r+'`` + It is similar to ``'a'``, but the file must already exist. + complevel : int, 0-9, default None + Specifies a compression level for data. + A value of 0 or None disables compression. + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' + Specifies the compression library to be used. + As of v0.20.2 these additional compressors for Blosc are supported + (default if no compressor specified: 'blosc:blosclz'): + {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + 'blosc:zlib', 'blosc:zstd'}. + Specifying a compression library which is not available issues + a ValueError. + fletcher32 : bool, default False + If applying compression use the fletcher32 checksum. + **kwargs + These parameters will be passed to the PyTables open_file method. + + Examples + -------- + >>> bar = pd.DataFrame(np.random.randn(10, 4)) + >>> store = pd.HDFStore('test.h5') + >>> store['foo'] = bar # write to HDF5 + >>> bar = store['foo'] # retrieve + >>> store.close() + + **Create or load HDF5 file in-memory** + + When passing the `driver` option to the PyTables open_file method through + **kwargs, the HDF5 file is loaded or created in-memory and will only be + written when closed: + + >>> bar = pd.DataFrame(np.random.randn(10, 4)) + >>> store = pd.HDFStore('test.h5', driver='H5FD_CORE') + >>> store['foo'] = bar + >>> store.close() # only now, data is written to disk + """ + + _handle: File | None + _mode: str + _complevel: int + _fletcher32: bool + + def __init__( + self, + path, + mode: str = "a", + complevel: int | None = None, + complib=None, + fletcher32: bool = False, + **kwargs, + ): + + if "format" in kwargs: + raise ValueError("format is not a defined argument for HDFStore") + + tables = import_optional_dependency("tables") + + if complib is not None and complib not in tables.filters.all_complibs: + raise ValueError( + f"complib only supports {tables.filters.all_complibs} compression." + ) + + if complib is None and complevel is not None: + complib = tables.filters.default_complib + + self._path = stringify_path(path) + if mode is None: + mode = "a" + self._mode = mode + self._handle = None + self._complevel = complevel if complevel else 0 + self._complib = complib + self._fletcher32 = fletcher32 + self._filters = None + self.open(mode=mode, **kwargs) + + def __fspath__(self): + return self._path + + @property + def root(self): + """return the root node""" + self._check_if_open() + assert self._handle is not None # for mypy + return self._handle.root + + @property + def filename(self): + return self._path + + def __getitem__(self, key: str): + return self.get(key) + + def __setitem__(self, key: str, value): + self.put(key, value) + + def __delitem__(self, key: str): + return self.remove(key) + + def __getattr__(self, name: str): + """allow attribute access to get stores""" + try: + return self.get(name) + except (KeyError, ClosedFileError): + pass + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{name}'" + ) + + def __contains__(self, key: str) -> bool: + """ + check for existence of this key + can match the exact pathname or the pathnm w/o the leading '/' + """ + node = self.get_node(key) + if node is not None: + name = node._v_pathname + if name == key or name[1:] == key: + return True + return False + + def __len__(self) -> int: + return len(self.groups()) + + def __repr__(self) -> str: + pstr = pprint_thing(self._path) + return f"{type(self)}\nFile path: {pstr}\n" + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def keys(self, include: str = "pandas") -> list[str]: + """ + Return a list of keys corresponding to objects stored in HDFStore. + + Parameters + ---------- + + include : str, default 'pandas' + When kind equals 'pandas' return pandas objects. + When kind equals 'native' return native HDF5 Table objects. + + .. versionadded:: 1.1.0 + + Returns + ------- + list + List of ABSOLUTE path-names (e.g. have the leading '/'). + + Raises + ------ + raises ValueError if kind has an illegal value + """ + if include == "pandas": + return [n._v_pathname for n in self.groups()] + + elif include == "native": + assert self._handle is not None # mypy + return [ + n._v_pathname for n in self._handle.walk_nodes("/", classname="Table") + ] + raise ValueError( + f"`include` should be either 'pandas' or 'native' but is '{include}'" + ) + + def __iter__(self): + return iter(self.keys()) + + def items(self): + """ + iterate on key->group + """ + for g in self.groups(): + yield g._v_pathname, g + + iteritems = items + + def open(self, mode: str = "a", **kwargs): + """ + Open the file in the specified mode + + Parameters + ---------- + mode : {'a', 'w', 'r', 'r+'}, default 'a' + See HDFStore docstring or tables.open_file for info about modes + **kwargs + These parameters will be passed to the PyTables open_file method. + """ + tables = _tables() + + if self._mode != mode: + # if we are changing a write mode to read, ok + if self._mode in ["a", "w"] and mode in ["r", "r+"]: + pass + elif mode in ["w"]: + # this would truncate, raise here + if self.is_open: + raise PossibleDataLossError( + f"Re-opening the file [{self._path}] with mode [{self._mode}] " + "will delete the current file!" + ) + + self._mode = mode + + # close and reopen the handle + if self.is_open: + self.close() + + if self._complevel and self._complevel > 0: + self._filters = _tables().Filters( + self._complevel, self._complib, fletcher32=self._fletcher32 + ) + + if _table_file_open_policy_is_strict and self.is_open: + msg = ( + "Cannot open HDF5 file, which is already opened, " + "even in read-only mode." + ) + raise ValueError(msg) + + self._handle = tables.open_file(self._path, self._mode, **kwargs) + + def close(self): + """ + Close the PyTables file handle + """ + if self._handle is not None: + self._handle.close() + self._handle = None + + @property + def is_open(self) -> bool: + """ + return a boolean indicating whether the file is open + """ + if self._handle is None: + return False + return bool(self._handle.isopen) + + def flush(self, fsync: bool = False): + """ + Force all buffered modifications to be written to disk. + + Parameters + ---------- + fsync : bool (default False) + call ``os.fsync()`` on the file handle to force writing to disk. + + Notes + ----- + Without ``fsync=True``, flushing may not guarantee that the OS writes + to disk. With fsync, the operation will block until the OS claims the + file has been written; however, other caching layers may still + interfere. + """ + if self._handle is not None: + self._handle.flush() + if fsync: + with suppress(OSError): + os.fsync(self._handle.fileno()) + + def get(self, key: str): + """ + Retrieve pandas object stored in file. + + Parameters + ---------- + key : str + + Returns + ------- + object + Same type as object stored in file. + """ + with patch_pickle(): + # GH#31167 Without this patch, pickle doesn't know how to unpickle + # old DateOffset objects now that they are cdef classes. + group = self.get_node(key) + if group is None: + raise KeyError(f"No object named {key} in the file") + return self._read_group(group) + + def select( + self, + key: str, + where=None, + start=None, + stop=None, + columns=None, + iterator=False, + chunksize=None, + auto_close: bool = False, + ): + """ + Retrieve pandas object stored in file, optionally based on where criteria. + + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + + Parameters + ---------- + key : str + Object being retrieved from file. + where : list or None + List of Term (or convertible) objects, optional. + start : int or None + Row number to start selection. + stop : int, default None + Row number to stop selection. + columns : list or None + A list of columns that if not None, will limit the return columns. + iterator : bool or False + Returns an iterator. + chunksize : int or None + Number or rows to include in iteration, return an iterator. + auto_close : bool or False + Should automatically close the store when finished. + + Returns + ------- + object + Retrieved object from file. + """ + group = self.get_node(key) + if group is None: + raise KeyError(f"No object named {key} in the file") + + # create the storer and axes + where = _ensure_term(where, scope_level=1) + s = self._create_storer(group) + s.infer_axes() + + # function to call on iteration + def func(_start, _stop, _where): + return s.read(start=_start, stop=_stop, where=_where, columns=columns) + + # create the iterator + it = TableIterator( + self, + s, + func, + where=where, + nrows=s.nrows, + start=start, + stop=stop, + iterator=iterator, + chunksize=chunksize, + auto_close=auto_close, + ) + + return it.get_result() + + def select_as_coordinates( + self, + key: str, + where=None, + start: int | None = None, + stop: int | None = None, + ): + """ + return the selection as an Index + + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + + + Parameters + ---------- + key : str + where : list of Term (or convertible) objects, optional + start : integer (defaults to None), row number to start selection + stop : integer (defaults to None), row number to stop selection + """ + where = _ensure_term(where, scope_level=1) + tbl = self.get_storer(key) + if not isinstance(tbl, Table): + raise TypeError("can only read_coordinates with a table") + return tbl.read_coordinates(where=where, start=start, stop=stop) + + def select_column( + self, + key: str, + column: str, + start: int | None = None, + stop: int | None = None, + ): + """ + return a single column from the table. This is generally only useful to + select an indexable + + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + + Parameters + ---------- + key : str + column : str + The column of interest. + start : int or None, default None + stop : int or None, default None + + Raises + ------ + raises KeyError if the column is not found (or key is not a valid + store) + raises ValueError if the column can not be extracted individually (it + is part of a data block) + + """ + tbl = self.get_storer(key) + if not isinstance(tbl, Table): + raise TypeError("can only read_column with a table") + return tbl.read_column(column=column, start=start, stop=stop) + + def select_as_multiple( + self, + keys, + where=None, + selector=None, + columns=None, + start=None, + stop=None, + iterator=False, + chunksize=None, + auto_close: bool = False, + ): + """ + Retrieve pandas objects from multiple tables. + + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + + Parameters + ---------- + keys : a list of the tables + selector : the table to apply the where criteria (defaults to keys[0] + if not supplied) + columns : the columns I want back + start : integer (defaults to None), row number to start selection + stop : integer (defaults to None), row number to stop selection + iterator : bool, return an iterator, default False + chunksize : nrows to include in iteration, return an iterator + auto_close : bool, default False + Should automatically close the store when finished. + + Raises + ------ + raises KeyError if keys or selector is not found or keys is empty + raises TypeError if keys is not a list or tuple + raises ValueError if the tables are not ALL THE SAME DIMENSIONS + """ + # default to single select + where = _ensure_term(where, scope_level=1) + if isinstance(keys, (list, tuple)) and len(keys) == 1: + keys = keys[0] + if isinstance(keys, str): + return self.select( + key=keys, + where=where, + columns=columns, + start=start, + stop=stop, + iterator=iterator, + chunksize=chunksize, + auto_close=auto_close, + ) + + if not isinstance(keys, (list, tuple)): + raise TypeError("keys must be a list/tuple") + + if not len(keys): + raise ValueError("keys must have a non-zero length") + + if selector is None: + selector = keys[0] + + # collect the tables + tbls = [self.get_storer(k) for k in keys] + s = self.get_storer(selector) + + # validate rows + nrows = None + for t, k in itertools.chain([(s, selector)], zip(tbls, keys)): + if t is None: + raise KeyError(f"Invalid table [{k}]") + if not t.is_table: + raise TypeError( + f"object [{t.pathname}] is not a table, and cannot be used in all " + "select as multiple" + ) + + if nrows is None: + nrows = t.nrows + elif t.nrows != nrows: + raise ValueError("all tables must have exactly the same nrows!") + + # The isinstance checks here are redundant with the check above, + # but necessary for mypy; see GH#29757 + _tbls = [x for x in tbls if isinstance(x, Table)] + + # axis is the concentration axes + axis = list({t.non_index_axes[0][0] for t in _tbls})[0] + + def func(_start, _stop, _where): + + # retrieve the objs, _where is always passed as a set of + # coordinates here + objs = [ + t.read(where=_where, columns=columns, start=_start, stop=_stop) + for t in tbls + ] + + # concat and return + return concat(objs, axis=axis, verify_integrity=False)._consolidate() + + # create the iterator + it = TableIterator( + self, + s, + func, + where=where, + nrows=nrows, + start=start, + stop=stop, + iterator=iterator, + chunksize=chunksize, + auto_close=auto_close, + ) + + return it.get_result(coordinates=True) + + def put( + self, + key: str, + value: DataFrame | Series, + format=None, + index=True, + append=False, + complib=None, + complevel: int | None = None, + min_itemsize: int | dict[str, int] | None = None, + nan_rep=None, + data_columns: Literal[True] | list[str] | None = None, + encoding=None, + errors: str = "strict", + track_times: bool = True, + dropna: bool = False, + ): + """ + Store object in HDFStore. + + Parameters + ---------- + key : str + value : {Series, DataFrame} + format : 'fixed(f)|table(t)', default is 'fixed' + Format to use when storing object in HDFStore. Value can be one of: + + ``'fixed'`` + Fixed format. Fast writing/reading. Not-appendable, nor searchable. + ``'table'`` + Table format. Write as a PyTables Table structure which may perform + worse but allow more flexible operations like searching / selecting + subsets of the data. + append : bool, default False + This will force Table format, append the input data to the existing. + data_columns : list of columns or True, default None + List of columns to create as data columns, or True to use all columns. + See `here + `__. + encoding : str, default None + Provide an encoding for strings. + track_times : bool, default True + Parameter is propagated to 'create_table' method of 'PyTables'. + If set to False it enables to have the same h5 files (same hashes) + independent on creation time. + + .. versionadded:: 1.1.0 + """ + if format is None: + format = get_option("io.hdf.default_format") or "fixed" + format = self._validate_format(format) + self._write_to_group( + key, + value, + format=format, + index=index, + append=append, + complib=complib, + complevel=complevel, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + data_columns=data_columns, + encoding=encoding, + errors=errors, + track_times=track_times, + dropna=dropna, + ) + + def remove(self, key: str, where=None, start=None, stop=None): + """ + Remove pandas object partially by specifying the where condition + + Parameters + ---------- + key : str + Node to remove or delete rows from + where : list of Term (or convertible) objects, optional + start : integer (defaults to None), row number to start selection + stop : integer (defaults to None), row number to stop selection + + Returns + ------- + number of rows removed (or None if not a Table) + + Raises + ------ + raises KeyError if key is not a valid store + + """ + where = _ensure_term(where, scope_level=1) + try: + s = self.get_storer(key) + except KeyError: + # the key is not a valid store, re-raising KeyError + raise + except AssertionError: + # surface any assertion errors for e.g. debugging + raise + except Exception as err: + # In tests we get here with ClosedFileError, TypeError, and + # _table_mod.NoSuchNodeError. TODO: Catch only these? + + if where is not None: + raise ValueError( + "trying to remove a node with a non-None where clause!" + ) from err + + # we are actually trying to remove a node (with children) + node = self.get_node(key) + if node is not None: + node._f_remove(recursive=True) + return None + + # remove the node + if com.all_none(where, start, stop): + s.group._f_remove(recursive=True) + + # delete from the table + else: + if not s.is_table: + raise ValueError( + "can only remove with where on objects written as tables" + ) + return s.delete(where=where, start=start, stop=stop) + + def append( + self, + key: str, + value: DataFrame | Series, + format=None, + axes=None, + index=True, + append=True, + complib=None, + complevel: int | None = None, + columns=None, + min_itemsize: int | dict[str, int] | None = None, + nan_rep=None, + chunksize=None, + expectedrows=None, + dropna: bool | None = None, + data_columns: Literal[True] | list[str] | None = None, + encoding=None, + errors: str = "strict", + ): + """ + Append to Table in file. Node must already exist and be Table + format. + + Parameters + ---------- + key : str + value : {Series, DataFrame} + format : 'table' is the default + Format to use when storing object in HDFStore. Value can be one of: + + ``'table'`` + Table format. Write as a PyTables Table structure which may perform + worse but allow more flexible operations like searching / selecting + subsets of the data. + append : bool, default True + Append the input data to the existing. + data_columns : list of columns, or True, default None + List of columns to create as indexed data columns for on-disk + queries, or True to use all columns. By default only the axes + of the object are indexed. See `here + `__. + min_itemsize : dict of columns that specify minimum str sizes + nan_rep : str to use as str nan representation + chunksize : size to chunk the writing + expectedrows : expected TOTAL row size of this table + encoding : default None, provide an encoding for str + dropna : bool, default False + Do not write an ALL nan row to the store settable + by the option 'io.hdf.dropna_table'. + + Notes + ----- + Does *not* check if data being appended overlaps with existing + data in the table, so be careful + """ + if columns is not None: + raise TypeError( + "columns is not a supported keyword in append, try data_columns" + ) + + if dropna is None: + dropna = get_option("io.hdf.dropna_table") + if format is None: + format = get_option("io.hdf.default_format") or "table" + format = self._validate_format(format) + self._write_to_group( + key, + value, + format=format, + axes=axes, + index=index, + append=append, + complib=complib, + complevel=complevel, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + chunksize=chunksize, + expectedrows=expectedrows, + dropna=dropna, + data_columns=data_columns, + encoding=encoding, + errors=errors, + ) + + def append_to_multiple( + self, + d: dict, + value, + selector, + data_columns=None, + axes=None, + dropna=False, + **kwargs, + ): + """ + Append to multiple tables + + Parameters + ---------- + d : a dict of table_name to table_columns, None is acceptable as the + values of one node (this will get all the remaining columns) + value : a pandas object + selector : a string that designates the indexable table; all of its + columns will be designed as data_columns, unless data_columns is + passed, in which case these are used + data_columns : list of columns to create as data columns, or True to + use all columns + dropna : if evaluates to True, drop rows from all tables if any single + row in each table has all NaN. Default False. + + Notes + ----- + axes parameter is currently not accepted + + """ + if axes is not None: + raise TypeError( + "axes is currently not accepted as a parameter to append_to_multiple; " + "you can create the tables independently instead" + ) + + if not isinstance(d, dict): + raise ValueError( + "append_to_multiple must have a dictionary specified as the " + "way to split the value" + ) + + if selector not in d: + raise ValueError( + "append_to_multiple requires a selector that is in passed dict" + ) + + # figure out the splitting axis (the non_index_axis) + axis = list(set(range(value.ndim)) - set(_AXES_MAP[type(value)]))[0] + + # figure out how to split the value + remain_key = None + remain_values: list = [] + for k, v in d.items(): + if v is None: + if remain_key is not None: + raise ValueError( + "append_to_multiple can only have one value in d that is None" + ) + remain_key = k + else: + remain_values.extend(v) + if remain_key is not None: + ordered = value.axes[axis] + ordd = ordered.difference(Index(remain_values)) + ordd = sorted(ordered.get_indexer(ordd)) + d[remain_key] = ordered.take(ordd) + + # data_columns + if data_columns is None: + data_columns = d[selector] + + # ensure rows are synchronized across the tables + if dropna: + idxs = (value[cols].dropna(how="all").index for cols in d.values()) + valid_index = next(idxs) + for index in idxs: + valid_index = valid_index.intersection(index) + value = value.loc[valid_index] + + min_itemsize = kwargs.pop("min_itemsize", None) + + # append + for k, v in d.items(): + dc = data_columns if k == selector else None + + # compute the val + val = value.reindex(v, axis=axis) + + filtered = ( + {key: value for (key, value) in min_itemsize.items() if key in v} + if min_itemsize is not None + else None + ) + self.append(k, val, data_columns=dc, min_itemsize=filtered, **kwargs) + + def create_table_index( + self, + key: str, + columns=None, + optlevel: int | None = None, + kind: str | None = None, + ): + """ + Create a pytables index on the table. + + Parameters + ---------- + key : str + columns : None, bool, or listlike[str] + Indicate which columns to create an index on. + + * False : Do not create any indexes. + * True : Create indexes on all columns. + * None : Create indexes on all columns. + * listlike : Create indexes on the given columns. + + optlevel : int or None, default None + Optimization level, if None, pytables defaults to 6. + kind : str or None, default None + Kind of index, if None, pytables defaults to "medium". + + Raises + ------ + TypeError: raises if the node is not a table + """ + # version requirements + _tables() + s = self.get_storer(key) + if s is None: + return + + if not isinstance(s, Table): + raise TypeError("cannot create table index on a Fixed format store") + s.create_index(columns=columns, optlevel=optlevel, kind=kind) + + def groups(self): + """ + Return a list of all the top-level nodes. + + Each node returned is not a pandas storage object. + + Returns + ------- + list + List of objects. + """ + _tables() + self._check_if_open() + assert self._handle is not None # for mypy + assert _table_mod is not None # for mypy + return [ + g + for g in self._handle.walk_groups() + if ( + not isinstance(g, _table_mod.link.Link) + and ( + getattr(g._v_attrs, "pandas_type", None) + or getattr(g, "table", None) + or (isinstance(g, _table_mod.table.Table) and g._v_name != "table") + ) + ) + ] + + def walk(self, where="/"): + """ + Walk the pytables group hierarchy for pandas objects. + + This generator will yield the group path, subgroups and pandas object + names for each group. + + Any non-pandas PyTables objects that are not a group will be ignored. + + The `where` group itself is listed first (preorder), then each of its + child groups (following an alphanumerical order) is also traversed, + following the same procedure. + + Parameters + ---------- + where : str, default "/" + Group where to start walking. + + Yields + ------ + path : str + Full path to a group (without trailing '/'). + groups : list + Names (strings) of the groups contained in `path`. + leaves : list + Names (strings) of the pandas objects contained in `path`. + """ + _tables() + self._check_if_open() + assert self._handle is not None # for mypy + assert _table_mod is not None # for mypy + + for g in self._handle.walk_groups(where): + if getattr(g._v_attrs, "pandas_type", None) is not None: + continue + + groups = [] + leaves = [] + for child in g._v_children.values(): + pandas_type = getattr(child._v_attrs, "pandas_type", None) + if pandas_type is None: + if isinstance(child, _table_mod.group.Group): + groups.append(child._v_name) + else: + leaves.append(child._v_name) + + yield (g._v_pathname.rstrip("/"), groups, leaves) + + def get_node(self, key: str) -> Node | None: + """return the node with the key or None if it does not exist""" + self._check_if_open() + if not key.startswith("/"): + key = "/" + key + + assert self._handle is not None + assert _table_mod is not None # for mypy + try: + node = self._handle.get_node(self.root, key) + except _table_mod.exceptions.NoSuchNodeError: + return None + + assert isinstance(node, _table_mod.Node), type(node) + return node + + def get_storer(self, key: str) -> GenericFixed | Table: + """return the storer object for a key, raise if not in the file""" + group = self.get_node(key) + if group is None: + raise KeyError(f"No object named {key} in the file") + + s = self._create_storer(group) + s.infer_axes() + return s + + def copy( + self, + file, + mode="w", + propindexes: bool = True, + keys=None, + complib=None, + complevel: int | None = None, + fletcher32: bool = False, + overwrite=True, + ): + """ + Copy the existing store to a new file, updating in place. + + Parameters + ---------- + propindexes : bool, default True + Restore indexes in copied file. + keys : list, optional + List of keys to include in the copy (defaults to all). + overwrite : bool, default True + Whether to overwrite (remove and replace) existing nodes in the new store. + mode, complib, complevel, fletcher32 same as in HDFStore.__init__ + + Returns + ------- + open file handle of the new store + """ + new_store = HDFStore( + file, mode=mode, complib=complib, complevel=complevel, fletcher32=fletcher32 + ) + if keys is None: + keys = list(self.keys()) + if not isinstance(keys, (tuple, list)): + keys = [keys] + for k in keys: + s = self.get_storer(k) + if s is not None: + + if k in new_store: + if overwrite: + new_store.remove(k) + + data = self.select(k) + if isinstance(s, Table): + + index: bool | list[str] = False + if propindexes: + index = [a.name for a in s.axes if a.is_indexed] + new_store.append( + k, + data, + index=index, + data_columns=getattr(s, "data_columns", None), + encoding=s.encoding, + ) + else: + new_store.put(k, data, encoding=s.encoding) + + return new_store + + def info(self) -> str: + """ + Print detailed information on the store. + + Returns + ------- + str + """ + path = pprint_thing(self._path) + output = f"{type(self)}\nFile path: {path}\n" + + if self.is_open: + lkeys = sorted(self.keys()) + if len(lkeys): + keys = [] + values = [] + + for k in lkeys: + try: + s = self.get_storer(k) + if s is not None: + keys.append(pprint_thing(s.pathname or k)) + values.append(pprint_thing(s or "invalid_HDFStore node")) + except AssertionError: + # surface any assertion errors for e.g. debugging + raise + except Exception as detail: + keys.append(k) + dstr = pprint_thing(detail) + values.append(f"[invalid_HDFStore node: {dstr}]") + + output += adjoin(12, keys, values) + else: + output += "Empty" + else: + output += "File is CLOSED" + + return output + + # ------------------------------------------------------------------------ + # private methods + + def _check_if_open(self): + if not self.is_open: + raise ClosedFileError(f"{self._path} file is not open!") + + def _validate_format(self, format: str) -> str: + """validate / deprecate formats""" + # validate + try: + format = _FORMAT_MAP[format.lower()] + except KeyError as err: + raise TypeError(f"invalid HDFStore format specified [{format}]") from err + + return format + + def _create_storer( + self, + group, + format=None, + value: DataFrame | Series | None = None, + encoding: str = "UTF-8", + errors: str = "strict", + ) -> GenericFixed | Table: + """return a suitable class to operate""" + cls: type[GenericFixed] | type[Table] + + if value is not None and not isinstance(value, (Series, DataFrame)): + raise TypeError("value must be None, Series, or DataFrame") + + def error(t): + # return instead of raising so mypy can tell where we are raising + return TypeError( + f"cannot properly create the storer for: [{t}] [group->" + f"{group},value->{type(value)},format->{format}" + ) + + pt = _ensure_decoded(getattr(group._v_attrs, "pandas_type", None)) + tt = _ensure_decoded(getattr(group._v_attrs, "table_type", None)) + + # infer the pt from the passed value + if pt is None: + if value is None: + _tables() + assert _table_mod is not None # for mypy + if getattr(group, "table", None) or isinstance( + group, _table_mod.table.Table + ): + pt = "frame_table" + tt = "generic_table" + else: + raise TypeError( + "cannot create a storer if the object is not existing " + "nor a value are passed" + ) + else: + if isinstance(value, Series): + pt = "series" + else: + pt = "frame" + + # we are actually a table + if format == "table": + pt += "_table" + + # a storer node + if "table" not in pt: + _STORER_MAP = {"series": SeriesFixed, "frame": FrameFixed} + try: + cls = _STORER_MAP[pt] + except KeyError as err: + raise error("_STORER_MAP") from err + return cls(self, group, encoding=encoding, errors=errors) + + # existing node (and must be a table) + if tt is None: + # if we are a writer, determine the tt + if value is not None: + if pt == "series_table": + index = getattr(value, "index", None) + if index is not None: + if index.nlevels == 1: + tt = "appendable_series" + elif index.nlevels > 1: + tt = "appendable_multiseries" + elif pt == "frame_table": + index = getattr(value, "index", None) + if index is not None: + if index.nlevels == 1: + tt = "appendable_frame" + elif index.nlevels > 1: + tt = "appendable_multiframe" + + _TABLE_MAP = { + "generic_table": GenericTable, + "appendable_series": AppendableSeriesTable, + "appendable_multiseries": AppendableMultiSeriesTable, + "appendable_frame": AppendableFrameTable, + "appendable_multiframe": AppendableMultiFrameTable, + "worm": WORMTable, + } + try: + cls = _TABLE_MAP[tt] + except KeyError as err: + raise error("_TABLE_MAP") from err + + return cls(self, group, encoding=encoding, errors=errors) + + def _write_to_group( + self, + key: str, + value: DataFrame | Series, + format, + axes=None, + index=True, + append=False, + complib=None, + complevel: int | None = None, + fletcher32=None, + min_itemsize: int | dict[str, int] | None = None, + chunksize=None, + expectedrows=None, + dropna=False, + nan_rep=None, + data_columns=None, + encoding=None, + errors: str = "strict", + track_times: bool = True, + ) -> None: + # we don't want to store a table node at all if our object is 0-len + # as there are not dtypes + if getattr(value, "empty", None) and (format == "table" or append): + return + + group = self._identify_group(key, append) + + s = self._create_storer(group, format, value, encoding=encoding, errors=errors) + if append: + # raise if we are trying to append to a Fixed format, + # or a table that exists (and we are putting) + if not s.is_table or (s.is_table and format == "fixed" and s.is_exists): + raise ValueError("Can only append to Tables") + if not s.is_exists: + s.set_object_info() + else: + s.set_object_info() + + if not s.is_table and complib: + raise ValueError("Compression not supported on Fixed format stores") + + # write the object + s.write( + obj=value, + axes=axes, + append=append, + complib=complib, + complevel=complevel, + fletcher32=fletcher32, + min_itemsize=min_itemsize, + chunksize=chunksize, + expectedrows=expectedrows, + dropna=dropna, + nan_rep=nan_rep, + data_columns=data_columns, + track_times=track_times, + ) + + if isinstance(s, Table) and index: + s.create_index(columns=index) + + def _read_group(self, group: Node): + s = self._create_storer(group) + s.infer_axes() + return s.read() + + def _identify_group(self, key: str, append: bool) -> Node: + """Identify HDF5 group based on key, delete/create group if needed.""" + group = self.get_node(key) + + # we make this assertion for mypy; the get_node call will already + # have raised if this is incorrect + assert self._handle is not None + + # remove the node if we are not appending + if group is not None and not append: + self._handle.remove_node(group, recursive=True) + group = None + + if group is None: + group = self._create_nodes_and_group(key) + + return group + + def _create_nodes_and_group(self, key: str) -> Node: + """Create nodes from key and return group name.""" + # assertion for mypy + assert self._handle is not None + + paths = key.split("/") + # recursively create the groups + path = "/" + for p in paths: + if not len(p): + continue + new_path = path + if not path.endswith("/"): + new_path += "/" + new_path += p + group = self.get_node(new_path) + if group is None: + group = self._handle.create_group(path, p) + path = new_path + return group + + +class TableIterator: + """ + Define the iteration interface on a table + + Parameters + ---------- + store : HDFStore + s : the referred storer + func : the function to execute the query + where : the where of the query + nrows : the rows to iterate on + start : the passed start value (default is None) + stop : the passed stop value (default is None) + iterator : bool, default False + Whether to use the default iterator. + chunksize : the passed chunking value (default is 100000) + auto_close : bool, default False + Whether to automatically close the store at the end of iteration. + """ + + chunksize: int | None + store: HDFStore + s: GenericFixed | Table + + def __init__( + self, + store: HDFStore, + s: GenericFixed | Table, + func, + where, + nrows, + start=None, + stop=None, + iterator: bool = False, + chunksize: int | None = None, + auto_close: bool = False, + ): + self.store = store + self.s = s + self.func = func + self.where = where + + # set start/stop if they are not set if we are a table + if self.s.is_table: + if nrows is None: + nrows = 0 + if start is None: + start = 0 + if stop is None: + stop = nrows + stop = min(nrows, stop) + + self.nrows = nrows + self.start = start + self.stop = stop + + self.coordinates = None + if iterator or chunksize is not None: + if chunksize is None: + chunksize = 100000 + self.chunksize = int(chunksize) + else: + self.chunksize = None + + self.auto_close = auto_close + + def __iter__(self): + # iterate + current = self.start + if self.coordinates is None: + raise ValueError("Cannot iterate until get_result is called.") + while current < self.stop: + stop = min(current + self.chunksize, self.stop) + value = self.func(None, None, self.coordinates[current:stop]) + current = stop + if value is None or not len(value): + continue + + yield value + + self.close() + + def close(self): + if self.auto_close: + self.store.close() + + def get_result(self, coordinates: bool = False): + # return the actual iterator + if self.chunksize is not None: + if not isinstance(self.s, Table): + raise TypeError("can only use an iterator or chunksize on a table") + + self.coordinates = self.s.read_coordinates(where=self.where) + + return self + + # if specified read via coordinates (necessary for multiple selections + if coordinates: + if not isinstance(self.s, Table): + raise TypeError("can only read_coordinates on a table") + where = self.s.read_coordinates( + where=self.where, start=self.start, stop=self.stop + ) + else: + where = self.where + + # directly return the result + results = self.func(self.start, self.stop, where) + self.close() + return results + + +class IndexCol: + """ + an index column description class + + Parameters + ---------- + axis : axis which I reference + values : the ndarray like converted values + kind : a string description of this type + typ : the pytables type + pos : the position in the pytables + + """ + + is_an_indexable = True + is_data_indexable = True + _info_fields = ["freq", "tz", "index_name"] + + name: str + cname: str + + def __init__( + self, + name: str, + values=None, + kind=None, + typ=None, + cname: str | None = None, + axis=None, + pos=None, + freq=None, + tz=None, + index_name=None, + ordered=None, + table=None, + meta=None, + metadata=None, + ): + + if not isinstance(name, str): + raise ValueError("`name` must be a str.") + + self.values = values + self.kind = kind + self.typ = typ + self.name = name + self.cname = cname or name + self.axis = axis + self.pos = pos + self.freq = freq + self.tz = tz + self.index_name = index_name + self.ordered = ordered + self.table = table + self.meta = meta + self.metadata = metadata + + if pos is not None: + self.set_pos(pos) + + # These are ensured as long as the passed arguments match the + # constructor annotations. + assert isinstance(self.name, str) + assert isinstance(self.cname, str) + + @property + def itemsize(self) -> int: + # Assumes self.typ has already been initialized + return self.typ.itemsize + + @property + def kind_attr(self) -> str: + return f"{self.name}_kind" + + def set_pos(self, pos: int): + """set the position of this column in the Table""" + self.pos = pos + if pos is not None and self.typ is not None: + self.typ._v_pos = pos + + def __repr__(self) -> str: + temp = tuple( + map(pprint_thing, (self.name, self.cname, self.axis, self.pos, self.kind)) + ) + return ",".join( + [ + f"{key}->{value}" + for key, value in zip(["name", "cname", "axis", "pos", "kind"], temp) + ] + ) + + def __eq__(self, other: Any) -> bool: + """compare 2 col items""" + return all( + getattr(self, a, None) == getattr(other, a, None) + for a in ["name", "cname", "axis", "pos"] + ) + + def __ne__(self, other) -> bool: + return not self.__eq__(other) + + @property + def is_indexed(self) -> bool: + """return whether I am an indexed column""" + if not hasattr(self.table, "cols"): + # e.g. if infer hasn't been called yet, self.table will be None. + return False + return getattr(self.table.cols, self.cname).is_indexed + + def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): + """ + Convert the data from this selection to the appropriate pandas type. + """ + assert isinstance(values, np.ndarray), type(values) + + # values is a recarray + if values.dtype.fields is not None: + values = values[self.cname] + + val_kind = _ensure_decoded(self.kind) + values = _maybe_convert(values, val_kind, encoding, errors) + + kwargs = {} + kwargs["name"] = _ensure_decoded(self.index_name) + + if self.freq is not None: + kwargs["freq"] = _ensure_decoded(self.freq) + + factory: type[Index] | type[DatetimeIndex] = Index + if is_datetime64_dtype(values.dtype) or is_datetime64tz_dtype(values.dtype): + factory = DatetimeIndex + elif values.dtype == "i8" and "freq" in kwargs: + # PeriodIndex data is stored as i8 + # error: Incompatible types in assignment (expression has type + # "Callable[[Any, KwArg(Any)], PeriodIndex]", variable has type + # "Union[Type[Index], Type[DatetimeIndex]]") + factory = lambda x, **kwds: PeriodIndex( # type: ignore[assignment] + ordinal=x, **kwds + ) + + # making an Index instance could throw a number of different errors + try: + new_pd_index = factory(values, **kwargs) + except ValueError: + # if the output freq is different that what we recorded, + # it should be None (see also 'doc example part 2') + if "freq" in kwargs: + kwargs["freq"] = None + new_pd_index = factory(values, **kwargs) + final_pd_index = _set_tz(new_pd_index, self.tz) + return final_pd_index, final_pd_index + + def take_data(self): + """return the values""" + return self.values + + @property + def attrs(self): + return self.table._v_attrs + + @property + def description(self): + return self.table.description + + @property + def col(self): + """return my current col description""" + return getattr(self.description, self.cname, None) + + @property + def cvalues(self): + """return my cython values""" + return self.values + + def __iter__(self): + return iter(self.values) + + def maybe_set_size(self, min_itemsize=None): + """ + maybe set a string col itemsize: + min_itemsize can be an integer or a dict with this columns name + with an integer size + """ + if _ensure_decoded(self.kind) == "string": + if isinstance(min_itemsize, dict): + min_itemsize = min_itemsize.get(self.name) + + if min_itemsize is not None and self.typ.itemsize < min_itemsize: + self.typ = _tables().StringCol(itemsize=min_itemsize, pos=self.pos) + + def validate_names(self): + pass + + def validate_and_set(self, handler: AppendableTable, append: bool): + self.table = handler.table + self.validate_col() + self.validate_attr(append) + self.validate_metadata(handler) + self.write_metadata(handler) + self.set_attr() + + def validate_col(self, itemsize=None): + """validate this column: return the compared against itemsize""" + # validate this column for string truncation (or reset to the max size) + if _ensure_decoded(self.kind) == "string": + c = self.col + if c is not None: + if itemsize is None: + itemsize = self.itemsize + if c.itemsize < itemsize: + raise ValueError( + f"Trying to store a string with len [{itemsize}] in " + f"[{self.cname}] column but\nthis column has a limit of " + f"[{c.itemsize}]!\nConsider using min_itemsize to " + "preset the sizes on these columns" + ) + return c.itemsize + + return None + + def validate_attr(self, append: bool): + # check for backwards incompatibility + if append: + existing_kind = getattr(self.attrs, self.kind_attr, None) + if existing_kind is not None and existing_kind != self.kind: + raise TypeError( + f"incompatible kind in col [{existing_kind} - {self.kind}]" + ) + + def update_info(self, info): + """ + set/update the info for this indexable with the key/value + if there is a conflict raise/warn as needed + """ + for key in self._info_fields: + + value = getattr(self, key, None) + idx = info.setdefault(self.name, {}) + + existing_value = idx.get(key) + if key in idx and value is not None and existing_value != value: + # frequency/name just warn + if key in ["freq", "index_name"]: + ws = attribute_conflict_doc % (key, existing_value, value) + warnings.warn( + ws, AttributeConflictWarning, stacklevel=find_stack_level() + ) + + # reset + idx[key] = None + setattr(self, key, None) + + else: + raise ValueError( + f"invalid info for [{self.name}] for [{key}], " + f"existing_value [{existing_value}] conflicts with " + f"new value [{value}]" + ) + else: + if value is not None or existing_value is not None: + idx[key] = value + + def set_info(self, info): + """set my state from the passed info""" + idx = info.get(self.name) + if idx is not None: + self.__dict__.update(idx) + + def set_attr(self): + """set the kind for this column""" + setattr(self.attrs, self.kind_attr, self.kind) + + def validate_metadata(self, handler: AppendableTable): + """validate that kind=category does not change the categories""" + if self.meta == "category": + new_metadata = self.metadata + cur_metadata = handler.read_metadata(self.cname) + if ( + new_metadata is not None + and cur_metadata is not None + and not array_equivalent(new_metadata, cur_metadata) + ): + raise ValueError( + "cannot append a categorical with " + "different categories to the existing" + ) + + def write_metadata(self, handler: AppendableTable): + """set the meta data""" + if self.metadata is not None: + handler.write_metadata(self.cname, self.metadata) + + +class GenericIndexCol(IndexCol): + """an index which is not represented in the data of the table""" + + @property + def is_indexed(self) -> bool: + return False + + def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): + """ + Convert the data from this selection to the appropriate pandas type. + + Parameters + ---------- + values : np.ndarray + nan_rep : str + encoding : str + errors : str + """ + assert isinstance(values, np.ndarray), type(values) + + # error: Incompatible types in assignment (expression has type + # "Int64Index", variable has type "ndarray") + values = Int64Index(np.arange(len(values))) # type: ignore[assignment] + return values, values + + def set_attr(self): + pass + + +class DataCol(IndexCol): + """ + a data holding column, by definition this is not indexable + + Parameters + ---------- + data : the actual data + cname : the column name in the table to hold the data (typically + values) + meta : a string description of the metadata + metadata : the actual metadata + """ + + is_an_indexable = False + is_data_indexable = False + _info_fields = ["tz", "ordered"] + + def __init__( + self, + name: str, + values=None, + kind=None, + typ=None, + cname=None, + pos=None, + tz=None, + ordered=None, + table=None, + meta=None, + metadata=None, + dtype: DtypeArg | None = None, + data=None, + ): + super().__init__( + name=name, + values=values, + kind=kind, + typ=typ, + pos=pos, + cname=cname, + tz=tz, + ordered=ordered, + table=table, + meta=meta, + metadata=metadata, + ) + self.dtype = dtype + self.data = data + + @property + def dtype_attr(self) -> str: + return f"{self.name}_dtype" + + @property + def meta_attr(self) -> str: + return f"{self.name}_meta" + + def __repr__(self) -> str: + temp = tuple( + map( + pprint_thing, (self.name, self.cname, self.dtype, self.kind, self.shape) + ) + ) + return ",".join( + [ + f"{key}->{value}" + for key, value in zip(["name", "cname", "dtype", "kind", "shape"], temp) + ] + ) + + def __eq__(self, other: Any) -> bool: + """compare 2 col items""" + return all( + getattr(self, a, None) == getattr(other, a, None) + for a in ["name", "cname", "dtype", "pos"] + ) + + def set_data(self, data: ArrayLike): + assert data is not None + assert self.dtype is None + + data, dtype_name = _get_data_and_dtype_name(data) + + self.data = data + self.dtype = dtype_name + self.kind = _dtype_to_kind(dtype_name) + + def take_data(self): + """return the data""" + return self.data + + @classmethod + def _get_atom(cls, values: ArrayLike) -> Col: + """ + Get an appropriately typed and shaped pytables.Col object for values. + """ + dtype = values.dtype + # error: Item "ExtensionDtype" of "Union[ExtensionDtype, dtype[Any]]" has no + # attribute "itemsize" + itemsize = dtype.itemsize # type: ignore[union-attr] + + shape = values.shape + if values.ndim == 1: + # EA, use block shape pretending it is 2D + # TODO(EA2D): not necessary with 2D EAs + shape = (1, values.size) + + if isinstance(values, Categorical): + codes = values.codes + atom = cls.get_atom_data(shape, kind=codes.dtype.name) + elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): + atom = cls.get_atom_datetime64(shape) + elif is_timedelta64_dtype(dtype): + atom = cls.get_atom_timedelta64(shape) + elif is_complex_dtype(dtype): + atom = _tables().ComplexCol(itemsize=itemsize, shape=shape[0]) + elif is_string_dtype(dtype): + atom = cls.get_atom_string(shape, itemsize) + else: + atom = cls.get_atom_data(shape, kind=dtype.name) + + return atom + + @classmethod + def get_atom_string(cls, shape, itemsize): + return _tables().StringCol(itemsize=itemsize, shape=shape[0]) + + @classmethod + def get_atom_coltype(cls, kind: str) -> type[Col]: + """return the PyTables column class for this column""" + if kind.startswith("uint"): + k4 = kind[4:] + col_name = f"UInt{k4}Col" + elif kind.startswith("period"): + # we store as integer + col_name = "Int64Col" + else: + kcap = kind.capitalize() + col_name = f"{kcap}Col" + + return getattr(_tables(), col_name) + + @classmethod + def get_atom_data(cls, shape, kind: str) -> Col: + return cls.get_atom_coltype(kind=kind)(shape=shape[0]) + + @classmethod + def get_atom_datetime64(cls, shape): + return _tables().Int64Col(shape=shape[0]) + + @classmethod + def get_atom_timedelta64(cls, shape): + return _tables().Int64Col(shape=shape[0]) + + @property + def shape(self): + return getattr(self.data, "shape", None) + + @property + def cvalues(self): + """return my cython values""" + return self.data + + def validate_attr(self, append): + """validate that we have the same order as the existing & same dtype""" + if append: + existing_fields = getattr(self.attrs, self.kind_attr, None) + if existing_fields is not None and existing_fields != list(self.values): + raise ValueError("appended items do not match existing items in table!") + + existing_dtype = getattr(self.attrs, self.dtype_attr, None) + if existing_dtype is not None and existing_dtype != self.dtype: + raise ValueError( + "appended items dtype do not match existing items dtype in table!" + ) + + def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): + """ + Convert the data from this selection to the appropriate pandas type. + + Parameters + ---------- + values : np.ndarray + nan_rep : + encoding : str + errors : str + + Returns + ------- + index : listlike to become an Index + data : ndarraylike to become a column + """ + assert isinstance(values, np.ndarray), type(values) + + # values is a recarray + if values.dtype.fields is not None: + values = values[self.cname] + + assert self.typ is not None + if self.dtype is None: + # Note: in tests we never have timedelta64 or datetime64, + # so the _get_data_and_dtype_name may be unnecessary + converted, dtype_name = _get_data_and_dtype_name(values) + kind = _dtype_to_kind(dtype_name) + else: + converted = values + dtype_name = self.dtype + kind = self.kind + + assert isinstance(converted, np.ndarray) # for mypy + + # use the meta if needed + meta = _ensure_decoded(self.meta) + metadata = self.metadata + ordered = self.ordered + tz = self.tz + + assert dtype_name is not None + # convert to the correct dtype + dtype = _ensure_decoded(dtype_name) + + # reverse converts + if dtype == "datetime64": + # recreate with tz if indicated + converted = _set_tz(converted, tz, coerce=True) + + elif dtype == "timedelta64": + converted = np.asarray(converted, dtype="m8[ns]") + elif dtype == "date": + try: + converted = np.asarray( + [date.fromordinal(v) for v in converted], dtype=object + ) + except ValueError: + converted = np.asarray( + [date.fromtimestamp(v) for v in converted], dtype=object + ) + + elif meta == "category": + # we have a categorical + categories = metadata + codes = converted.ravel() + + # if we have stored a NaN in the categories + # then strip it; in theory we could have BOTH + # -1s in the codes and nulls :< + if categories is None: + # Handle case of NaN-only categorical columns in which case + # the categories are an empty array; when this is stored, + # pytables cannot write a zero-len array, so on readback + # the categories would be None and `read_hdf()` would fail. + categories = Index([], dtype=np.float64) + else: + mask = isna(categories) + if mask.any(): + categories = categories[~mask] + codes[codes != -1] -= mask.astype(int).cumsum()._values + + converted = Categorical.from_codes( + codes, categories=categories, ordered=ordered + ) + + else: + + try: + converted = converted.astype(dtype, copy=False) + except TypeError: + converted = converted.astype("O", copy=False) + + # convert nans / decode + if _ensure_decoded(kind) == "string": + converted = _unconvert_string_array( + converted, nan_rep=nan_rep, encoding=encoding, errors=errors + ) + + return self.values, converted + + def set_attr(self): + """set the data for this column""" + setattr(self.attrs, self.kind_attr, self.values) + setattr(self.attrs, self.meta_attr, self.meta) + assert self.dtype is not None + setattr(self.attrs, self.dtype_attr, self.dtype) + + +class DataIndexableCol(DataCol): + """represent a data column that can be indexed""" + + is_data_indexable = True + + def validate_names(self): + if not Index(self.values).is_object(): + # TODO: should the message here be more specifically non-str? + raise ValueError("cannot have non-object label DataIndexableCol") + + @classmethod + def get_atom_string(cls, shape, itemsize): + return _tables().StringCol(itemsize=itemsize) + + @classmethod + def get_atom_data(cls, shape, kind: str) -> Col: + return cls.get_atom_coltype(kind=kind)() + + @classmethod + def get_atom_datetime64(cls, shape): + return _tables().Int64Col() + + @classmethod + def get_atom_timedelta64(cls, shape): + return _tables().Int64Col() + + +class GenericDataIndexableCol(DataIndexableCol): + """represent a generic pytables data column""" + + pass + + +class Fixed: + """ + represent an object in my store + facilitate read/write of various types of objects + this is an abstract base class + + Parameters + ---------- + parent : HDFStore + group : Node + The group node where the table resides. + """ + + pandas_kind: str + format_type: str = "fixed" # GH#30962 needed by dask + obj_type: type[DataFrame | Series] + ndim: int + encoding: str + parent: HDFStore + group: Node + errors: str + is_table = False + + def __init__( + self, + parent: HDFStore, + group: Node, + encoding: str = "UTF-8", + errors: str = "strict", + ): + assert isinstance(parent, HDFStore), type(parent) + assert _table_mod is not None # needed for mypy + assert isinstance(group, _table_mod.Node), type(group) + self.parent = parent + self.group = group + self.encoding = _ensure_encoding(encoding) + self.errors = errors + + @property + def is_old_version(self) -> bool: + return self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1 + + @property + def version(self) -> tuple[int, int, int]: + """compute and set our version""" + version = _ensure_decoded(getattr(self.group._v_attrs, "pandas_version", None)) + try: + version = tuple(int(x) for x in version.split(".")) + if len(version) == 2: + version = version + (0,) + except AttributeError: + version = (0, 0, 0) + return version + + @property + def pandas_type(self): + return _ensure_decoded(getattr(self.group._v_attrs, "pandas_type", None)) + + def __repr__(self) -> str: + """return a pretty representation of myself""" + self.infer_axes() + s = self.shape + if s is not None: + if isinstance(s, (list, tuple)): + jshape = ",".join([pprint_thing(x) for x in s]) + s = f"[{jshape}]" + return f"{self.pandas_type:12.12} (shape->{s})" + return self.pandas_type + + def set_object_info(self): + """set my pandas type & version""" + self.attrs.pandas_type = str(self.pandas_kind) + self.attrs.pandas_version = str(_version) + + def copy(self): + new_self = copy.copy(self) + return new_self + + @property + def shape(self): + return self.nrows + + @property + def pathname(self): + return self.group._v_pathname + + @property + def _handle(self): + return self.parent._handle + + @property + def _filters(self): + return self.parent._filters + + @property + def _complevel(self) -> int: + return self.parent._complevel + + @property + def _fletcher32(self) -> bool: + return self.parent._fletcher32 + + @property + def attrs(self): + return self.group._v_attrs + + def set_attrs(self): + """set our object attributes""" + pass + + def get_attrs(self): + """get our object attributes""" + pass + + @property + def storable(self): + """return my storable""" + return self.group + + @property + def is_exists(self) -> bool: + return False + + @property + def nrows(self): + return getattr(self.storable, "nrows", None) + + def validate(self, other): + """validate against an existing storable""" + if other is None: + return + return True + + def validate_version(self, where=None): + """are we trying to operate on an old version?""" + return True + + def infer_axes(self): + """ + infer the axes of my storer + return a boolean indicating if we have a valid storer or not + """ + s = self.storable + if s is None: + return False + self.get_attrs() + return True + + def read( + self, + where=None, + columns=None, + start: int | None = None, + stop: int | None = None, + ): + raise NotImplementedError( + "cannot read on an abstract storer: subclasses should implement" + ) + + def write(self, **kwargs): + raise NotImplementedError( + "cannot write on an abstract storer: subclasses should implement" + ) + + def delete(self, where=None, start: int | None = None, stop: int | None = None): + """ + support fully deleting the node in its entirety (only) - where + specification must be None + """ + if com.all_none(where, start, stop): + self._handle.remove_node(self.group, recursive=True) + return None + + raise TypeError("cannot delete on an abstract storer") + + +class GenericFixed(Fixed): + """a generified fixed version""" + + _index_type_map = {DatetimeIndex: "datetime", PeriodIndex: "period"} + _reverse_index_map = {v: k for k, v in _index_type_map.items()} + attributes: list[str] = [] + + # indexer helpers + def _class_to_alias(self, cls) -> str: + return self._index_type_map.get(cls, "") + + def _alias_to_class(self, alias): + if isinstance(alias, type): # pragma: no cover + # compat: for a short period of time master stored types + return alias + return self._reverse_index_map.get(alias, Index) + + def _get_index_factory(self, attrs): + index_class = self._alias_to_class( + _ensure_decoded(getattr(attrs, "index_class", "")) + ) + + factory: Callable + + if index_class == DatetimeIndex: + + def f(values, freq=None, tz=None): + # data are already in UTC, localize and convert if tz present + dta = DatetimeArray._simple_new(values.values, freq=freq) + result = DatetimeIndex._simple_new(dta, name=None) + if tz is not None: + result = result.tz_localize("UTC").tz_convert(tz) + return result + + factory = f + elif index_class == PeriodIndex: + + def f(values, freq=None, tz=None): + parr = PeriodArray._simple_new(values, freq=freq) + return PeriodIndex._simple_new(parr, name=None) + + factory = f + else: + factory = index_class + + kwargs = {} + if "freq" in attrs: + kwargs["freq"] = attrs["freq"] + if index_class is Index: + # DTI/PI would be gotten by _alias_to_class + factory = TimedeltaIndex + + if "tz" in attrs: + if isinstance(attrs["tz"], bytes): + # created by python2 + kwargs["tz"] = attrs["tz"].decode("utf-8") + else: + # created by python3 + kwargs["tz"] = attrs["tz"] + assert index_class is DatetimeIndex # just checking + + return factory, kwargs + + def validate_read(self, columns, where): + """ + raise if any keywords are passed which are not-None + """ + if columns is not None: + raise TypeError( + "cannot pass a column specification when reading " + "a Fixed format store. this store must be selected in its entirety" + ) + if where is not None: + raise TypeError( + "cannot pass a where specification when reading " + "from a Fixed format store. this store must be selected in its entirety" + ) + + @property + def is_exists(self) -> bool: + return True + + def set_attrs(self): + """set our object attributes""" + self.attrs.encoding = self.encoding + self.attrs.errors = self.errors + + def get_attrs(self): + """retrieve our attributes""" + self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None)) + self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict")) + for n in self.attributes: + setattr(self, n, _ensure_decoded(getattr(self.attrs, n, None))) + + def write(self, obj, **kwargs): + self.set_attrs() + + def read_array(self, key: str, start: int | None = None, stop: int | None = None): + """read an array for the specified node (off of group""" + import tables + + node = getattr(self.group, key) + attrs = node._v_attrs + + transposed = getattr(attrs, "transposed", False) + + if isinstance(node, tables.VLArray): + ret = node[0][start:stop] + else: + dtype = _ensure_decoded(getattr(attrs, "value_type", None)) + shape = getattr(attrs, "shape", None) + + if shape is not None: + # length 0 axis + ret = np.empty(shape, dtype=dtype) + else: + ret = node[start:stop] + + if dtype == "datetime64": + # reconstruct a timezone if indicated + tz = getattr(attrs, "tz", None) + ret = _set_tz(ret, tz, coerce=True) + + elif dtype == "timedelta64": + ret = np.asarray(ret, dtype="m8[ns]") + + if transposed: + return ret.T + else: + return ret + + def read_index( + self, key: str, start: int | None = None, stop: int | None = None + ) -> Index: + variety = _ensure_decoded(getattr(self.attrs, f"{key}_variety")) + + if variety == "multi": + return self.read_multi_index(key, start=start, stop=stop) + elif variety == "regular": + node = getattr(self.group, key) + index = self.read_index_node(node, start=start, stop=stop) + return index + else: # pragma: no cover + raise TypeError(f"unrecognized index variety: {variety}") + + def write_index(self, key: str, index: Index): + if isinstance(index, MultiIndex): + setattr(self.attrs, f"{key}_variety", "multi") + self.write_multi_index(key, index) + else: + setattr(self.attrs, f"{key}_variety", "regular") + converted = _convert_index("index", index, self.encoding, self.errors) + + self.write_array(key, converted.values) + + node = getattr(self.group, key) + node._v_attrs.kind = converted.kind + node._v_attrs.name = index.name + + if isinstance(index, (DatetimeIndex, PeriodIndex)): + node._v_attrs.index_class = self._class_to_alias(type(index)) + + if isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + node._v_attrs.freq = index.freq + + if isinstance(index, DatetimeIndex) and index.tz is not None: + node._v_attrs.tz = _get_tz(index.tz) + + def write_multi_index(self, key: str, index: MultiIndex): + setattr(self.attrs, f"{key}_nlevels", index.nlevels) + + for i, (lev, level_codes, name) in enumerate( + zip(index.levels, index.codes, index.names) + ): + # write the level + if is_extension_array_dtype(lev): + raise NotImplementedError( + "Saving a MultiIndex with an extension dtype is not supported." + ) + level_key = f"{key}_level{i}" + conv_level = _convert_index(level_key, lev, self.encoding, self.errors) + self.write_array(level_key, conv_level.values) + node = getattr(self.group, level_key) + node._v_attrs.kind = conv_level.kind + node._v_attrs.name = name + + # write the name + setattr(node._v_attrs, f"{key}_name{name}", name) + + # write the labels + label_key = f"{key}_label{i}" + self.write_array(label_key, level_codes) + + def read_multi_index( + self, key: str, start: int | None = None, stop: int | None = None + ) -> MultiIndex: + nlevels = getattr(self.attrs, f"{key}_nlevels") + + levels = [] + codes = [] + names: list[Hashable] = [] + for i in range(nlevels): + level_key = f"{key}_level{i}" + node = getattr(self.group, level_key) + lev = self.read_index_node(node, start=start, stop=stop) + levels.append(lev) + names.append(lev.name) + + label_key = f"{key}_label{i}" + level_codes = self.read_array(label_key, start=start, stop=stop) + codes.append(level_codes) + + return MultiIndex( + levels=levels, codes=codes, names=names, verify_integrity=True + ) + + def read_index_node( + self, node: Node, start: int | None = None, stop: int | None = None + ) -> Index: + data = node[start:stop] + # If the index was an empty array write_array_empty() will + # have written a sentinel. Here we replace it with the original. + if "shape" in node._v_attrs and np.prod(node._v_attrs.shape) == 0: + data = np.empty(node._v_attrs.shape, dtype=node._v_attrs.value_type) + kind = _ensure_decoded(node._v_attrs.kind) + name = None + + if "name" in node._v_attrs: + name = _ensure_str(node._v_attrs.name) + name = _ensure_decoded(name) + + attrs = node._v_attrs + factory, kwargs = self._get_index_factory(attrs) + + if kind == "date": + index = factory( + _unconvert_index( + data, kind, encoding=self.encoding, errors=self.errors + ), + dtype=object, + **kwargs, + ) + else: + index = factory( + _unconvert_index( + data, kind, encoding=self.encoding, errors=self.errors + ), + **kwargs, + ) + + index.name = name + + return index + + def write_array_empty(self, key: str, value: ArrayLike): + """write a 0-len array""" + # ugly hack for length 0 axes + arr = np.empty((1,) * value.ndim) + self._handle.create_array(self.group, key, arr) + node = getattr(self.group, key) + node._v_attrs.value_type = str(value.dtype) + node._v_attrs.shape = value.shape + + def write_array( + self, key: str, obj: DataFrame | Series, items: Index | None = None + ) -> None: + # TODO: we only have a few tests that get here, the only EA + # that gets passed is DatetimeArray, and we never have + # both self._filters and EA + + value = extract_array(obj, extract_numpy=True) + + if key in self.group: + self._handle.remove_node(self.group, key) + + # Transform needed to interface with pytables row/col notation + empty_array = value.size == 0 + transposed = False + + if is_categorical_dtype(value.dtype): + raise NotImplementedError( + "Cannot store a category dtype in a HDF5 dataset that uses format=" + '"fixed". Use format="table".' + ) + if not empty_array: + if hasattr(value, "T"): + # ExtensionArrays (1d) may not have transpose. + value = value.T + transposed = True + + atom = None + if self._filters is not None: + with suppress(ValueError): + # get the atom for this datatype + atom = _tables().Atom.from_dtype(value.dtype) + + if atom is not None: + # We only get here if self._filters is non-None and + # the Atom.from_dtype call succeeded + + # create an empty chunked array and fill it from value + if not empty_array: + ca = self._handle.create_carray( + self.group, key, atom, value.shape, filters=self._filters + ) + ca[:] = value + + else: + self.write_array_empty(key, value) + + elif value.dtype.type == np.object_: + # infer the type, warn if we have a non-string type here (for + # performance) + inferred_type = lib.infer_dtype(value, skipna=False) + if empty_array: + pass + elif inferred_type == "string": + pass + else: + ws = performance_doc % (inferred_type, key, items) + warnings.warn(ws, PerformanceWarning, stacklevel=find_stack_level()) + + vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom()) + vlarr.append(value) + + elif is_datetime64_dtype(value.dtype): + self._handle.create_array(self.group, key, value.view("i8")) + getattr(self.group, key)._v_attrs.value_type = "datetime64" + elif is_datetime64tz_dtype(value.dtype): + # store as UTC + # with a zone + + # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no + # attribute "asi8" + self._handle.create_array( + self.group, key, value.asi8 # type: ignore[union-attr] + ) + + node = getattr(self.group, key) + # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no + # attribute "tz" + node._v_attrs.tz = _get_tz(value.tz) # type: ignore[union-attr] + node._v_attrs.value_type = "datetime64" + elif is_timedelta64_dtype(value.dtype): + self._handle.create_array(self.group, key, value.view("i8")) + getattr(self.group, key)._v_attrs.value_type = "timedelta64" + elif empty_array: + self.write_array_empty(key, value) + else: + self._handle.create_array(self.group, key, value) + + getattr(self.group, key)._v_attrs.transposed = transposed + + +class SeriesFixed(GenericFixed): + pandas_kind = "series" + attributes = ["name"] + + name: Hashable + + @property + def shape(self): + try: + return (len(self.group.values),) + except (TypeError, AttributeError): + return None + + def read( + self, + where=None, + columns=None, + start: int | None = None, + stop: int | None = None, + ): + self.validate_read(columns, where) + index = self.read_index("index", start=start, stop=stop) + values = self.read_array("values", start=start, stop=stop) + return Series(values, index=index, name=self.name) + + def write(self, obj, **kwargs): + super().write(obj, **kwargs) + self.write_index("index", obj.index) + self.write_array("values", obj) + self.attrs.name = obj.name + + +class BlockManagerFixed(GenericFixed): + attributes = ["ndim", "nblocks"] + + nblocks: int + + @property + def shape(self) -> Shape | None: + try: + ndim = self.ndim + + # items + items = 0 + for i in range(self.nblocks): + node = getattr(self.group, f"block{i}_items") + shape = getattr(node, "shape", None) + if shape is not None: + items += shape[0] + + # data shape + node = self.group.block0_values + shape = getattr(node, "shape", None) + if shape is not None: + shape = list(shape[0 : (ndim - 1)]) + else: + shape = [] + + shape.append(items) + + return shape + except AttributeError: + return None + + def read( + self, + where=None, + columns=None, + start: int | None = None, + stop: int | None = None, + ): + # start, stop applied to rows, so 0th axis only + self.validate_read(columns, where) + select_axis = self.obj_type()._get_block_manager_axis(0) + + axes = [] + for i in range(self.ndim): + + _start, _stop = (start, stop) if i == select_axis else (None, None) + ax = self.read_index(f"axis{i}", start=_start, stop=_stop) + axes.append(ax) + + items = axes[0] + dfs = [] + + for i in range(self.nblocks): + + blk_items = self.read_index(f"block{i}_items") + values = self.read_array(f"block{i}_values", start=_start, stop=_stop) + + columns = items[items.get_indexer(blk_items)] + df = DataFrame(values.T, columns=columns, index=axes[1]) + dfs.append(df) + + if len(dfs) > 0: + out = concat(dfs, axis=1) + out = out.reindex(columns=items, copy=False) + return out + + return DataFrame(columns=axes[0], index=axes[1]) + + def write(self, obj, **kwargs): + super().write(obj, **kwargs) + + # TODO(ArrayManager) HDFStore relies on accessing the blocks + if isinstance(obj._mgr, ArrayManager): + obj = obj._as_manager("block") + + data = obj._mgr + if not data.is_consolidated(): + data = data.consolidate() + + self.attrs.ndim = data.ndim + for i, ax in enumerate(data.axes): + if i == 0 and (not ax.is_unique): + raise ValueError("Columns index has to be unique for fixed format") + self.write_index(f"axis{i}", ax) + + # Supporting mixed-type DataFrame objects...nontrivial + self.attrs.nblocks = len(data.blocks) + for i, blk in enumerate(data.blocks): + # I have no idea why, but writing values before items fixed #2299 + blk_items = data.items.take(blk.mgr_locs) + self.write_array(f"block{i}_values", blk.values, items=blk_items) + self.write_index(f"block{i}_items", blk_items) + + +class FrameFixed(BlockManagerFixed): + pandas_kind = "frame" + obj_type = DataFrame + + +class Table(Fixed): + """ + represent a table: + facilitate read/write of various types of tables + + Attrs in Table Node + ------------------- + These are attributes that are store in the main table node, they are + necessary to recreate these tables when read back in. + + index_axes : a list of tuples of the (original indexing axis and + index column) + non_index_axes: a list of tuples of the (original index axis and + columns on a non-indexing axis) + values_axes : a list of the columns which comprise the data of this + table + data_columns : a list of the columns that we are allowing indexing + (these become single columns in values_axes) + nan_rep : the string to use for nan representations for string + objects + levels : the names of levels + metadata : the names of the metadata columns + """ + + pandas_kind = "wide_table" + format_type: str = "table" # GH#30962 needed by dask + table_type: str + levels: int | list[Hashable] = 1 + is_table = True + + index_axes: list[IndexCol] + non_index_axes: list[tuple[int, Any]] + values_axes: list[DataCol] + data_columns: list + metadata: list + info: dict + + def __init__( + self, + parent: HDFStore, + group: Node, + encoding=None, + errors: str = "strict", + index_axes=None, + non_index_axes=None, + values_axes=None, + data_columns=None, + info=None, + nan_rep=None, + ): + super().__init__(parent, group, encoding=encoding, errors=errors) + self.index_axes = index_axes or [] + self.non_index_axes = non_index_axes or [] + self.values_axes = values_axes or [] + self.data_columns = data_columns or [] + self.info = info or {} + self.nan_rep = nan_rep + + @property + def table_type_short(self) -> str: + return self.table_type.split("_")[0] + + def __repr__(self) -> str: + """return a pretty representation of myself""" + self.infer_axes() + jdc = ",".join(self.data_columns) if len(self.data_columns) else "" + dc = f",dc->[{jdc}]" + + ver = "" + if self.is_old_version: + jver = ".".join([str(x) for x in self.version]) + ver = f"[{jver}]" + + jindex_axes = ",".join([a.name for a in self.index_axes]) + return ( + f"{self.pandas_type:12.12}{ver} " + f"(typ->{self.table_type_short},nrows->{self.nrows}," + f"ncols->{self.ncols},indexers->[{jindex_axes}]{dc})" + ) + + def __getitem__(self, c: str): + """return the axis for c""" + for a in self.axes: + if c == a.name: + return a + return None + + def validate(self, other): + """validate against an existing table""" + if other is None: + return + + if other.table_type != self.table_type: + raise TypeError( + "incompatible table_type with existing " + f"[{other.table_type} - {self.table_type}]" + ) + + for c in ["index_axes", "non_index_axes", "values_axes"]: + sv = getattr(self, c, None) + ov = getattr(other, c, None) + if sv != ov: + + # show the error for the specific axes + # Argument 1 to "enumerate" has incompatible type + # "Optional[Any]"; expected "Iterable[Any]" [arg-type] + for i, sax in enumerate(sv): # type: ignore[arg-type] + # Value of type "Optional[Any]" is not indexable [index] + oax = ov[i] # type: ignore[index] + if sax != oax: + raise ValueError( + f"invalid combination of [{c}] on appending data " + f"[{sax}] vs current table [{oax}]" + ) + + # should never get here + raise Exception( + f"invalid combination of [{c}] on appending data [{sv}] vs " + f"current table [{ov}]" + ) + + @property + def is_multi_index(self) -> bool: + """the levels attribute is 1 or a list in the case of a multi-index""" + return isinstance(self.levels, list) + + def validate_multiindex( + self, obj: DataFrame | Series + ) -> tuple[DataFrame, list[Hashable]]: + """ + validate that we can store the multi-index; reset and return the + new object + """ + levels = com.fill_missing_names(obj.index.names) + try: + reset_obj = obj.reset_index() + except ValueError as err: + raise ValueError( + "duplicate names/columns in the multi-index when storing as a table" + ) from err + assert isinstance(reset_obj, DataFrame) # for mypy + return reset_obj, levels + + @property + def nrows_expected(self) -> int: + """based on our axes, compute the expected nrows""" + return np.prod([i.cvalues.shape[0] for i in self.index_axes]) + + @property + def is_exists(self) -> bool: + """has this table been created""" + return "table" in self.group + + @property + def storable(self): + return getattr(self.group, "table", None) + + @property + def table(self): + """return the table group (this is my storable)""" + return self.storable + + @property + def dtype(self): + return self.table.dtype + + @property + def description(self): + return self.table.description + + @property + def axes(self): + return itertools.chain(self.index_axes, self.values_axes) + + @property + def ncols(self) -> int: + """the number of total columns in the values axes""" + return sum(len(a.values) for a in self.values_axes) + + @property + def is_transposed(self) -> bool: + return False + + @property + def data_orientation(self): + """return a tuple of my permutated axes, non_indexable at the front""" + return tuple( + itertools.chain( + [int(a[0]) for a in self.non_index_axes], + [int(a.axis) for a in self.index_axes], + ) + ) + + def queryables(self) -> dict[str, Any]: + """return a dict of the kinds allowable columns for this object""" + # mypy doesn't recognize DataFrame._AXIS_NAMES, so we re-write it here + axis_names = {0: "index", 1: "columns"} + + # compute the values_axes queryables + d1 = [(a.cname, a) for a in self.index_axes] + d2 = [(axis_names[axis], None) for axis, values in self.non_index_axes] + d3 = [ + (v.cname, v) for v in self.values_axes if v.name in set(self.data_columns) + ] + + # error: Unsupported operand types for + ("List[Tuple[str, IndexCol]]" and + # "List[Tuple[str, None]]") + return dict(d1 + d2 + d3) # type: ignore[operator] + + def index_cols(self): + """return a list of my index cols""" + # Note: each `i.cname` below is assured to be a str. + return [(i.axis, i.cname) for i in self.index_axes] + + def values_cols(self) -> list[str]: + """return a list of my values cols""" + return [i.cname for i in self.values_axes] + + def _get_metadata_path(self, key: str) -> str: + """return the metadata pathname for this key""" + group = self.group._v_pathname + return f"{group}/meta/{key}/meta" + + def write_metadata(self, key: str, values: np.ndarray): + """ + Write out a metadata array to the key as a fixed-format Series. + + Parameters + ---------- + key : str + values : ndarray + """ + self.parent.put( + self._get_metadata_path(key), + Series(values), + format="table", + encoding=self.encoding, + errors=self.errors, + nan_rep=self.nan_rep, + ) + + def read_metadata(self, key: str): + """return the meta data array for this key""" + if getattr(getattr(self.group, "meta", None), key, None) is not None: + return self.parent.select(self._get_metadata_path(key)) + return None + + def set_attrs(self): + """set our table type & indexables""" + self.attrs.table_type = str(self.table_type) + self.attrs.index_cols = self.index_cols() + self.attrs.values_cols = self.values_cols() + self.attrs.non_index_axes = self.non_index_axes + self.attrs.data_columns = self.data_columns + self.attrs.nan_rep = self.nan_rep + self.attrs.encoding = self.encoding + self.attrs.errors = self.errors + self.attrs.levels = self.levels + self.attrs.info = self.info + + def get_attrs(self): + """retrieve our attributes""" + self.non_index_axes = getattr(self.attrs, "non_index_axes", None) or [] + self.data_columns = getattr(self.attrs, "data_columns", None) or [] + self.info = getattr(self.attrs, "info", None) or {} + self.nan_rep = getattr(self.attrs, "nan_rep", None) + self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None)) + self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict")) + self.levels: list[Hashable] = getattr(self.attrs, "levels", None) or [] + self.index_axes = [a for a in self.indexables if a.is_an_indexable] + self.values_axes = [a for a in self.indexables if not a.is_an_indexable] + + def validate_version(self, where=None): + """are we trying to operate on an old version?""" + if where is not None: + if self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1: + ws = incompatibility_doc % ".".join([str(x) for x in self.version]) + warnings.warn(ws, IncompatibilityWarning) + + def validate_min_itemsize(self, min_itemsize): + """ + validate the min_itemsize doesn't contain items that are not in the + axes this needs data_columns to be defined + """ + if min_itemsize is None: + return + if not isinstance(min_itemsize, dict): + return + + q = self.queryables() + for k in min_itemsize: + + # ok, apply generally + if k == "values": + continue + if k not in q: + raise ValueError( + f"min_itemsize has the key [{k}] which is not an axis or " + "data_column" + ) + + @cache_readonly + def indexables(self): + """create/cache the indexables if they don't exist""" + _indexables = [] + + desc = self.description + table_attrs = self.table.attrs + + # Note: each of the `name` kwargs below are str, ensured + # by the definition in index_cols. + # index columns + for i, (axis, name) in enumerate(self.attrs.index_cols): + atom = getattr(desc, name) + md = self.read_metadata(name) + meta = "category" if md is not None else None + + kind_attr = f"{name}_kind" + kind = getattr(table_attrs, kind_attr, None) + + index_col = IndexCol( + name=name, + axis=axis, + pos=i, + kind=kind, + typ=atom, + table=self.table, + meta=meta, + metadata=md, + ) + _indexables.append(index_col) + + # values columns + dc = set(self.data_columns) + base_pos = len(_indexables) + + def f(i, c): + assert isinstance(c, str) + klass = DataCol + if c in dc: + klass = DataIndexableCol + + atom = getattr(desc, c) + adj_name = _maybe_adjust_name(c, self.version) + + # TODO: why kind_attr here? + values = getattr(table_attrs, f"{adj_name}_kind", None) + dtype = getattr(table_attrs, f"{adj_name}_dtype", None) + # Argument 1 to "_dtype_to_kind" has incompatible type + # "Optional[Any]"; expected "str" [arg-type] + kind = _dtype_to_kind(dtype) # type: ignore[arg-type] + + md = self.read_metadata(c) + # TODO: figure out why these two versions of `meta` dont always match. + # meta = "category" if md is not None else None + meta = getattr(table_attrs, f"{adj_name}_meta", None) + + obj = klass( + name=adj_name, + cname=c, + values=values, + kind=kind, + pos=base_pos + i, + typ=atom, + table=self.table, + meta=meta, + metadata=md, + dtype=dtype, + ) + return obj + + # Note: the definition of `values_cols` ensures that each + # `c` below is a str. + _indexables.extend([f(i, c) for i, c in enumerate(self.attrs.values_cols)]) + + return _indexables + + def create_index(self, columns=None, optlevel=None, kind: str | None = None): + """ + Create a pytables index on the specified columns. + + Parameters + ---------- + columns : None, bool, or listlike[str] + Indicate which columns to create an index on. + + * False : Do not create any indexes. + * True : Create indexes on all columns. + * None : Create indexes on all columns. + * listlike : Create indexes on the given columns. + + optlevel : int or None, default None + Optimization level, if None, pytables defaults to 6. + kind : str or None, default None + Kind of index, if None, pytables defaults to "medium". + + Raises + ------ + TypeError if trying to create an index on a complex-type column. + + Notes + ----- + Cannot index Time64Col or ComplexCol. + Pytables must be >= 3.0. + """ + if not self.infer_axes(): + return + if columns is False: + return + + # index all indexables and data_columns + if columns is None or columns is True: + columns = [a.cname for a in self.axes if a.is_data_indexable] + if not isinstance(columns, (tuple, list)): + columns = [columns] + + kw = {} + if optlevel is not None: + kw["optlevel"] = optlevel + if kind is not None: + kw["kind"] = kind + + table = self.table + for c in columns: + v = getattr(table.cols, c, None) + if v is not None: + # remove the index if the kind/optlevel have changed + if v.is_indexed: + index = v.index + cur_optlevel = index.optlevel + cur_kind = index.kind + + if kind is not None and cur_kind != kind: + v.remove_index() + else: + kw["kind"] = cur_kind + + if optlevel is not None and cur_optlevel != optlevel: + v.remove_index() + else: + kw["optlevel"] = cur_optlevel + + # create the index + if not v.is_indexed: + if v.type.startswith("complex"): + raise TypeError( + "Columns containing complex values can be stored but " + "cannot be indexed when using table format. Either use " + "fixed format, set index=False, or do not include " + "the columns containing complex values to " + "data_columns when initializing the table." + ) + v.create_index(**kw) + elif c in self.non_index_axes[0][1]: + # GH 28156 + raise AttributeError( + f"column {c} is not a data_column.\n" + f"In order to read column {c} you must reload the dataframe \n" + f"into HDFStore and include {c} with the data_columns argument." + ) + + def _read_axes( + self, where, start: int | None = None, stop: int | None = None + ) -> list[tuple[ArrayLike, ArrayLike]]: + """ + Create the axes sniffed from the table. + + Parameters + ---------- + where : ??? + start : int or None, default None + stop : int or None, default None + + Returns + ------- + List[Tuple[index_values, column_values]] + """ + # create the selection + selection = Selection(self, where=where, start=start, stop=stop) + values = selection.select() + + results = [] + # convert the data + for a in self.axes: + a.set_info(self.info) + res = a.convert( + values, + nan_rep=self.nan_rep, + encoding=self.encoding, + errors=self.errors, + ) + results.append(res) + + return results + + @classmethod + def get_object(cls, obj, transposed: bool): + """return the data for this obj""" + return obj + + def validate_data_columns(self, data_columns, min_itemsize, non_index_axes): + """ + take the input data_columns and min_itemize and create a data + columns spec + """ + if not len(non_index_axes): + return [] + + axis, axis_labels = non_index_axes[0] + info = self.info.get(axis, {}) + if info.get("type") == "MultiIndex" and data_columns: + raise ValueError( + f"cannot use a multi-index on axis [{axis}] with " + f"data_columns {data_columns}" + ) + + # evaluate the passed data_columns, True == use all columns + # take only valid axis labels + if data_columns is True: + data_columns = list(axis_labels) + elif data_columns is None: + data_columns = [] + + # if min_itemsize is a dict, add the keys (exclude 'values') + if isinstance(min_itemsize, dict): + existing_data_columns = set(data_columns) + data_columns = list(data_columns) # ensure we do not modify + data_columns.extend( + [ + k + for k in min_itemsize.keys() + if k != "values" and k not in existing_data_columns + ] + ) + + # return valid columns in the order of our axis + return [c for c in data_columns if c in axis_labels] + + def _create_axes( + self, + axes, + obj: DataFrame, + validate: bool = True, + nan_rep=None, + data_columns=None, + min_itemsize=None, + ): + """ + Create and return the axes. + + Parameters + ---------- + axes: list or None + The names or numbers of the axes to create. + obj : DataFrame + The object to create axes on. + validate: bool, default True + Whether to validate the obj against an existing object already written. + nan_rep : + A value to use for string column nan_rep. + data_columns : List[str], True, or None, default None + Specify the columns that we want to create to allow indexing on. + + * True : Use all available columns. + * None : Use no columns. + * List[str] : Use the specified columns. + + min_itemsize: Dict[str, int] or None, default None + The min itemsize for a column in bytes. + """ + if not isinstance(obj, DataFrame): + group = self.group._v_name + raise TypeError( + f"cannot properly create the storer for: [group->{group}," + f"value->{type(obj)}]" + ) + + # set the default axes if needed + if axes is None: + axes = [0] + + # map axes to numbers + axes = [obj._get_axis_number(a) for a in axes] + + # do we have an existing table (if so, use its axes & data_columns) + if self.infer_axes(): + table_exists = True + axes = [a.axis for a in self.index_axes] + data_columns = list(self.data_columns) + nan_rep = self.nan_rep + # TODO: do we always have validate=True here? + else: + table_exists = False + + new_info = self.info + + assert self.ndim == 2 # with next check, we must have len(axes) == 1 + # currently support on ndim-1 axes + if len(axes) != self.ndim - 1: + raise ValueError( + "currently only support ndim-1 indexers in an AppendableTable" + ) + + # create according to the new data + new_non_index_axes: list = [] + + # nan_representation + if nan_rep is None: + nan_rep = "nan" + + # We construct the non-index-axis first, since that alters new_info + idx = [x for x in [0, 1] if x not in axes][0] + + a = obj.axes[idx] + # we might be able to change the axes on the appending data if necessary + append_axis = list(a) + if table_exists: + indexer = len(new_non_index_axes) # i.e. 0 + exist_axis = self.non_index_axes[indexer][1] + if not array_equivalent(np.array(append_axis), np.array(exist_axis)): + + # ahah! -> reindex + if array_equivalent( + np.array(sorted(append_axis)), np.array(sorted(exist_axis)) + ): + append_axis = exist_axis + + # the non_index_axes info + info = new_info.setdefault(idx, {}) + info["names"] = list(a.names) + info["type"] = type(a).__name__ + + new_non_index_axes.append((idx, append_axis)) + + # Now we can construct our new index axis + idx = axes[0] + a = obj.axes[idx] + axis_name = obj._get_axis_name(idx) + new_index = _convert_index(axis_name, a, self.encoding, self.errors) + new_index.axis = idx + + # Because we are always 2D, there is only one new_index, so + # we know it will have pos=0 + new_index.set_pos(0) + new_index.update_info(new_info) + new_index.maybe_set_size(min_itemsize) # check for column conflicts + + new_index_axes = [new_index] + j = len(new_index_axes) # i.e. 1 + assert j == 1 + + # reindex by our non_index_axes & compute data_columns + assert len(new_non_index_axes) == 1 + for a in new_non_index_axes: + obj = _reindex_axis(obj, a[0], a[1]) + + transposed = new_index.axis == 1 + + # figure out data_columns and get out blocks + data_columns = self.validate_data_columns( + data_columns, min_itemsize, new_non_index_axes + ) + + frame = self.get_object(obj, transposed)._consolidate() + + blocks, blk_items = self._get_blocks_and_items( + frame, table_exists, new_non_index_axes, self.values_axes, data_columns + ) + + # add my values + vaxes = [] + for i, (blk, b_items) in enumerate(zip(blocks, blk_items)): + + # shape of the data column are the indexable axes + klass = DataCol + name = None + + # we have a data_column + if data_columns and len(b_items) == 1 and b_items[0] in data_columns: + klass = DataIndexableCol + name = b_items[0] + if not (name is None or isinstance(name, str)): + # TODO: should the message here be more specifically non-str? + raise ValueError("cannot have non-object label DataIndexableCol") + + # make sure that we match up the existing columns + # if we have an existing table + existing_col: DataCol | None + + if table_exists and validate: + try: + existing_col = self.values_axes[i] + except (IndexError, KeyError) as err: + raise ValueError( + f"Incompatible appended table [{blocks}]" + f"with existing table [{self.values_axes}]" + ) from err + else: + existing_col = None + + new_name = name or f"values_block_{i}" + data_converted = _maybe_convert_for_string_atom( + new_name, + blk.values, + existing_col=existing_col, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + encoding=self.encoding, + errors=self.errors, + columns=b_items, + ) + adj_name = _maybe_adjust_name(new_name, self.version) + + typ = klass._get_atom(data_converted) + kind = _dtype_to_kind(data_converted.dtype.name) + tz = None + if getattr(data_converted, "tz", None) is not None: + tz = _get_tz(data_converted.tz) + + meta = metadata = ordered = None + if is_categorical_dtype(data_converted.dtype): + ordered = data_converted.ordered + meta = "category" + metadata = np.array(data_converted.categories, copy=False).ravel() + + data, dtype_name = _get_data_and_dtype_name(data_converted) + + col = klass( + name=adj_name, + cname=new_name, + values=list(b_items), + typ=typ, + pos=j, + kind=kind, + tz=tz, + ordered=ordered, + meta=meta, + metadata=metadata, + dtype=dtype_name, + data=data, + ) + col.update_info(new_info) + + vaxes.append(col) + + j += 1 + + dcs = [col.name for col in vaxes if col.is_data_indexable] + + new_table = type(self)( + parent=self.parent, + group=self.group, + encoding=self.encoding, + errors=self.errors, + index_axes=new_index_axes, + non_index_axes=new_non_index_axes, + values_axes=vaxes, + data_columns=dcs, + info=new_info, + nan_rep=nan_rep, + ) + if hasattr(self, "levels"): + # TODO: get this into constructor, only for appropriate subclass + new_table.levels = self.levels + + new_table.validate_min_itemsize(min_itemsize) + + if validate and table_exists: + new_table.validate(self) + + return new_table + + @staticmethod + def _get_blocks_and_items( + frame: DataFrame, + table_exists: bool, + new_non_index_axes, + values_axes, + data_columns, + ): + # Helper to clarify non-state-altering parts of _create_axes + + # TODO(ArrayManager) HDFStore relies on accessing the blocks + if isinstance(frame._mgr, ArrayManager): + frame = frame._as_manager("block") + + def get_blk_items(mgr): + return [mgr.items.take(blk.mgr_locs) for blk in mgr.blocks] + + mgr = frame._mgr + mgr = cast(BlockManager, mgr) + blocks: list[Block] = list(mgr.blocks) + blk_items: list[Index] = get_blk_items(mgr) + + if len(data_columns): + axis, axis_labels = new_non_index_axes[0] + new_labels = Index(axis_labels).difference(Index(data_columns)) + mgr = frame.reindex(new_labels, axis=axis)._mgr + + # error: Item "ArrayManager" of "Union[ArrayManager, BlockManager]" has no + # attribute "blocks" + blocks = list(mgr.blocks) # type: ignore[union-attr] + blk_items = get_blk_items(mgr) + for c in data_columns: + mgr = frame.reindex([c], axis=axis)._mgr + # error: Item "ArrayManager" of "Union[ArrayManager, BlockManager]" has + # no attribute "blocks" + blocks.extend(mgr.blocks) # type: ignore[union-attr] + blk_items.extend(get_blk_items(mgr)) + + # reorder the blocks in the same order as the existing table if we can + if table_exists: + by_items = { + tuple(b_items.tolist()): (b, b_items) + for b, b_items in zip(blocks, blk_items) + } + new_blocks: list[Block] = [] + new_blk_items = [] + for ea in values_axes: + items = tuple(ea.values) + try: + b, b_items = by_items.pop(items) + new_blocks.append(b) + new_blk_items.append(b_items) + except (IndexError, KeyError) as err: + jitems = ",".join([pprint_thing(item) for item in items]) + raise ValueError( + f"cannot match existing table structure for [{jitems}] " + "on appending data" + ) from err + blocks = new_blocks + blk_items = new_blk_items + + return blocks, blk_items + + def process_axes(self, obj, selection: Selection, columns=None): + """process axes filters""" + # make a copy to avoid side effects + if columns is not None: + columns = list(columns) + + # make sure to include levels if we have them + if columns is not None and self.is_multi_index: + assert isinstance(self.levels, list) # assured by is_multi_index + for n in self.levels: + if n not in columns: + columns.insert(0, n) + + # reorder by any non_index_axes & limit to the select columns + for axis, labels in self.non_index_axes: + obj = _reindex_axis(obj, axis, labels, columns) + + # apply the selection filters (but keep in the same order) + if selection.filter is not None: + for field, op, filt in selection.filter.format(): + + def process_filter(field, filt): + + for axis_name in obj._AXIS_ORDERS: + axis_number = obj._get_axis_number(axis_name) + axis_values = obj._get_axis(axis_name) + assert axis_number is not None + + # see if the field is the name of an axis + if field == axis_name: + + # if we have a multi-index, then need to include + # the levels + if self.is_multi_index: + filt = filt.union(Index(self.levels)) + + takers = op(axis_values, filt) + return obj.loc(axis=axis_number)[takers] + + # this might be the name of a file IN an axis + elif field in axis_values: + + # we need to filter on this dimension + values = ensure_index(getattr(obj, field).values) + filt = ensure_index(filt) + + # hack until we support reversed dim flags + if isinstance(obj, DataFrame): + axis_number = 1 - axis_number + takers = op(values, filt) + return obj.loc(axis=axis_number)[takers] + + raise ValueError(f"cannot find the field [{field}] for filtering!") + + obj = process_filter(field, filt) + + return obj + + def create_description( + self, + complib, + complevel: int | None, + fletcher32: bool, + expectedrows: int | None, + ) -> dict[str, Any]: + """create the description of the table from the axes & values""" + # provided expected rows if its passed + if expectedrows is None: + expectedrows = max(self.nrows_expected, 10000) + + d = {"name": "table", "expectedrows": expectedrows} + + # description from the axes & values + d["description"] = {a.cname: a.typ for a in self.axes} + + if complib: + if complevel is None: + complevel = self._complevel or 9 + filters = _tables().Filters( + complevel=complevel, + complib=complib, + fletcher32=fletcher32 or self._fletcher32, + ) + d["filters"] = filters + elif self._filters is not None: + d["filters"] = self._filters + + return d + + def read_coordinates( + self, where=None, start: int | None = None, stop: int | None = None + ): + """ + select coordinates (row numbers) from a table; return the + coordinates object + """ + # validate the version + self.validate_version(where) + + # infer the data kind + if not self.infer_axes(): + return False + + # create the selection + selection = Selection(self, where=where, start=start, stop=stop) + coords = selection.select_coords() + if selection.filter is not None: + for field, op, filt in selection.filter.format(): + data = self.read_column( + field, start=coords.min(), stop=coords.max() + 1 + ) + coords = coords[op(data.iloc[coords - coords.min()], filt).values] + + return Index(coords) + + def read_column( + self, + column: str, + where=None, + start: int | None = None, + stop: int | None = None, + ): + """ + return a single column from the table, generally only indexables + are interesting + """ + # validate the version + self.validate_version() + + # infer the data kind + if not self.infer_axes(): + return False + + if where is not None: + raise TypeError("read_column does not currently accept a where clause") + + # find the axes + for a in self.axes: + if column == a.name: + if not a.is_data_indexable: + raise ValueError( + f"column [{column}] can not be extracted individually; " + "it is not data indexable" + ) + + # column must be an indexable or a data column + c = getattr(self.table.cols, column) + a.set_info(self.info) + col_values = a.convert( + c[start:stop], + nan_rep=self.nan_rep, + encoding=self.encoding, + errors=self.errors, + ) + return Series(_set_tz(col_values[1], a.tz), name=column) + + raise KeyError(f"column [{column}] not found in the table") + + +class WORMTable(Table): + """ + a write-once read-many table: this format DOES NOT ALLOW appending to a + table. writing is a one-time operation the data are stored in a format + that allows for searching the data on disk + """ + + table_type = "worm" + + def read( + self, + where=None, + columns=None, + start: int | None = None, + stop: int | None = None, + ): + """ + read the indices and the indexing array, calculate offset rows and return + """ + raise NotImplementedError("WORMTable needs to implement read") + + def write(self, **kwargs): + """ + write in a format that we can search later on (but cannot append + to): write out the indices and the values using _write_array + (e.g. a CArray) create an indexing table so that we can search + """ + raise NotImplementedError("WORMTable needs to implement write") + + +class AppendableTable(Table): + """support the new appendable table formats""" + + table_type = "appendable" + + def write( + self, + obj, + axes=None, + append=False, + complib=None, + complevel=None, + fletcher32=None, + min_itemsize=None, + chunksize=None, + expectedrows=None, + dropna=False, + nan_rep=None, + data_columns=None, + track_times=True, + ): + if not append and self.is_exists: + self._handle.remove_node(self.group, "table") + + # create the axes + table = self._create_axes( + axes=axes, + obj=obj, + validate=append, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + data_columns=data_columns, + ) + + for a in table.axes: + a.validate_names() + + if not table.is_exists: + + # create the table + options = table.create_description( + complib=complib, + complevel=complevel, + fletcher32=fletcher32, + expectedrows=expectedrows, + ) + + # set the table attributes + table.set_attrs() + + options["track_times"] = track_times + + # create the table + table._handle.create_table(table.group, **options) + + # update my info + table.attrs.info = table.info + + # validate the axes and set the kinds + for a in table.axes: + a.validate_and_set(table, append) + + # add the rows + table.write_data(chunksize, dropna=dropna) + + def write_data(self, chunksize: int | None, dropna: bool = False): + """ + we form the data into a 2-d including indexes,values,mask write chunk-by-chunk + """ + names = self.dtype.names + nrows = self.nrows_expected + + # if dropna==True, then drop ALL nan rows + masks = [] + if dropna: + for a in self.values_axes: + # figure the mask: only do if we can successfully process this + # column, otherwise ignore the mask + mask = isna(a.data).all(axis=0) + if isinstance(mask, np.ndarray): + masks.append(mask.astype("u1", copy=False)) + + # consolidate masks + if len(masks): + mask = masks[0] + for m in masks[1:]: + mask = mask & m + mask = mask.ravel() + else: + mask = None + + # broadcast the indexes if needed + indexes = [a.cvalues for a in self.index_axes] + nindexes = len(indexes) + assert nindexes == 1, nindexes # ensures we dont need to broadcast + + # transpose the values so first dimension is last + # reshape the values if needed + values = [a.take_data() for a in self.values_axes] + values = [v.transpose(np.roll(np.arange(v.ndim), v.ndim - 1)) for v in values] + bvalues = [] + for i, v in enumerate(values): + new_shape = (nrows,) + self.dtype[names[nindexes + i]].shape + bvalues.append(values[i].reshape(new_shape)) + + # write the chunks + if chunksize is None: + chunksize = 100000 + + rows = np.empty(min(chunksize, nrows), dtype=self.dtype) + chunks = nrows // chunksize + 1 + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, nrows) + if start_i >= end_i: + break + + self.write_data_chunk( + rows, + indexes=[a[start_i:end_i] for a in indexes], + mask=mask[start_i:end_i] if mask is not None else None, + values=[v[start_i:end_i] for v in bvalues], + ) + + def write_data_chunk( + self, + rows: np.ndarray, + indexes: list[np.ndarray], + mask: np.ndarray | None, + values: list[np.ndarray], + ): + """ + Parameters + ---------- + rows : an empty memory space where we are putting the chunk + indexes : an array of the indexes + mask : an array of the masks + values : an array of the values + """ + # 0 len + for v in values: + if not np.prod(v.shape): + return + + nrows = indexes[0].shape[0] + if nrows != len(rows): + rows = np.empty(nrows, dtype=self.dtype) + names = self.dtype.names + nindexes = len(indexes) + + # indexes + for i, idx in enumerate(indexes): + rows[names[i]] = idx + + # values + for i, v in enumerate(values): + rows[names[i + nindexes]] = v + + # mask + if mask is not None: + m = ~mask.ravel().astype(bool, copy=False) + if not m.all(): + rows = rows[m] + + if len(rows): + self.table.append(rows) + self.table.flush() + + def delete(self, where=None, start: int | None = None, stop: int | None = None): + + # delete all rows (and return the nrows) + if where is None or not len(where): + if start is None and stop is None: + nrows = self.nrows + self._handle.remove_node(self.group, recursive=True) + else: + # pytables<3.0 would remove a single row with stop=None + if stop is None: + stop = self.nrows + nrows = self.table.remove_rows(start=start, stop=stop) + self.table.flush() + return nrows + + # infer the data kind + if not self.infer_axes(): + return None + + # create the selection + table = self.table + selection = Selection(self, where, start=start, stop=stop) + values = selection.select_coords() + + # delete the rows in reverse order + sorted_series = Series(values).sort_values() + ln = len(sorted_series) + + if ln: + + # construct groups of consecutive rows + diff = sorted_series.diff() + groups = list(diff[diff > 1].index) + + # 1 group + if not len(groups): + groups = [0] + + # final element + if groups[-1] != ln: + groups.append(ln) + + # initial element + if groups[0] != 0: + groups.insert(0, 0) + + # we must remove in reverse order! + pg = groups.pop() + for g in reversed(groups): + rows = sorted_series.take(range(g, pg)) + table.remove_rows( + start=rows[rows.index[0]], stop=rows[rows.index[-1]] + 1 + ) + pg = g + + self.table.flush() + + # return the number of rows removed + return ln + + +class AppendableFrameTable(AppendableTable): + """support the new appendable table formats""" + + pandas_kind = "frame_table" + table_type = "appendable_frame" + ndim = 2 + obj_type: type[DataFrame | Series] = DataFrame + + @property + def is_transposed(self) -> bool: + return self.index_axes[0].axis == 1 + + @classmethod + def get_object(cls, obj, transposed: bool): + """these are written transposed""" + if transposed: + obj = obj.T + return obj + + def read( + self, + where=None, + columns=None, + start: int | None = None, + stop: int | None = None, + ): + + # validate the version + self.validate_version(where) + + # infer the data kind + if not self.infer_axes(): + return None + + result = self._read_axes(where=where, start=start, stop=stop) + + info = ( + self.info.get(self.non_index_axes[0][0], {}) + if len(self.non_index_axes) + else {} + ) + + inds = [i for i, ax in enumerate(self.axes) if ax is self.index_axes[0]] + assert len(inds) == 1 + ind = inds[0] + + index = result[ind][0] + + frames = [] + for i, a in enumerate(self.axes): + if a not in self.values_axes: + continue + index_vals, cvalues = result[i] + + # we could have a multi-index constructor here + # ensure_index doesn't recognized our list-of-tuples here + if info.get("type") != "MultiIndex": + cols = Index(index_vals) + else: + cols = MultiIndex.from_tuples(index_vals) + + names = info.get("names") + if names is not None: + cols.set_names(names, inplace=True) + + if self.is_transposed: + values = cvalues + index_ = cols + cols_ = Index(index, name=getattr(index, "name", None)) + else: + values = cvalues.T + index_ = Index(index, name=getattr(index, "name", None)) + cols_ = cols + + # if we have a DataIndexableCol, its shape will only be 1 dim + if values.ndim == 1 and isinstance(values, np.ndarray): + values = values.reshape((1, values.shape[0])) + + if isinstance(values, np.ndarray): + df = DataFrame(values.T, columns=cols_, index=index_) + elif isinstance(values, Index): + df = DataFrame(values, columns=cols_, index=index_) + else: + # Categorical + df = DataFrame._from_arrays([values], columns=cols_, index=index_) + assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype) + frames.append(df) + + if len(frames) == 1: + df = frames[0] + else: + df = concat(frames, axis=1) + + selection = Selection(self, where=where, start=start, stop=stop) + # apply the selection filters & axis orderings + df = self.process_axes(df, selection=selection, columns=columns) + + return df + + +class AppendableSeriesTable(AppendableFrameTable): + """support the new appendable table formats""" + + pandas_kind = "series_table" + table_type = "appendable_series" + ndim = 2 + obj_type = Series + + @property + def is_transposed(self) -> bool: + return False + + @classmethod + def get_object(cls, obj, transposed: bool): + return obj + + def write(self, obj, data_columns=None, **kwargs): + """we are going to write this as a frame table""" + if not isinstance(obj, DataFrame): + name = obj.name or "values" + obj = obj.to_frame(name) + return super().write(obj=obj, data_columns=obj.columns.tolist(), **kwargs) + + def read( + self, + where=None, + columns=None, + start: int | None = None, + stop: int | None = None, + ) -> Series: + + is_multi_index = self.is_multi_index + if columns is not None and is_multi_index: + assert isinstance(self.levels, list) # needed for mypy + for n in self.levels: + if n not in columns: + columns.insert(0, n) + s = super().read(where=where, columns=columns, start=start, stop=stop) + if is_multi_index: + s.set_index(self.levels, inplace=True) + + s = s.iloc[:, 0] + + # remove the default name + if s.name == "values": + s.name = None + return s + + +class AppendableMultiSeriesTable(AppendableSeriesTable): + """support the new appendable table formats""" + + pandas_kind = "series_table" + table_type = "appendable_multiseries" + + def write(self, obj, **kwargs): + """we are going to write this as a frame table""" + name = obj.name or "values" + newobj, self.levels = self.validate_multiindex(obj) + assert isinstance(self.levels, list) # for mypy + cols = list(self.levels) + cols.append(name) + newobj.columns = Index(cols) + return super().write(obj=newobj, **kwargs) + + +class GenericTable(AppendableFrameTable): + """a table that read/writes the generic pytables table format""" + + pandas_kind = "frame_table" + table_type = "generic_table" + ndim = 2 + obj_type = DataFrame + levels: list[Hashable] + + @property + def pandas_type(self) -> str: + return self.pandas_kind + + @property + def storable(self): + return getattr(self.group, "table", None) or self.group + + def get_attrs(self): + """retrieve our attributes""" + self.non_index_axes = [] + self.nan_rep = None + self.levels = [] + + self.index_axes = [a for a in self.indexables if a.is_an_indexable] + self.values_axes = [a for a in self.indexables if not a.is_an_indexable] + self.data_columns = [a.name for a in self.values_axes] + + @cache_readonly + def indexables(self): + """create the indexables from the table description""" + d = self.description + + # TODO: can we get a typ for this? AFAICT it is the only place + # where we aren't passing one + # the index columns is just a simple index + md = self.read_metadata("index") + meta = "category" if md is not None else None + index_col = GenericIndexCol( + name="index", axis=0, table=self.table, meta=meta, metadata=md + ) + + _indexables: list[GenericIndexCol | GenericDataIndexableCol] = [index_col] + + for i, n in enumerate(d._v_names): + assert isinstance(n, str) + + atom = getattr(d, n) + md = self.read_metadata(n) + meta = "category" if md is not None else None + dc = GenericDataIndexableCol( + name=n, + pos=i, + values=[n], + typ=atom, + table=self.table, + meta=meta, + metadata=md, + ) + _indexables.append(dc) + + return _indexables + + def write(self, **kwargs): + raise NotImplementedError("cannot write on an generic table") + + +class AppendableMultiFrameTable(AppendableFrameTable): + """a frame with a multi-index""" + + table_type = "appendable_multiframe" + obj_type = DataFrame + ndim = 2 + _re_levels = re.compile(r"^level_\d+$") + + @property + def table_type_short(self) -> str: + return "appendable_multi" + + def write(self, obj, data_columns=None, **kwargs): + if data_columns is None: + data_columns = [] + elif data_columns is True: + data_columns = obj.columns.tolist() + obj, self.levels = self.validate_multiindex(obj) + assert isinstance(self.levels, list) # for mypy + for n in self.levels: + if n not in data_columns: + data_columns.insert(0, n) + return super().write(obj=obj, data_columns=data_columns, **kwargs) + + def read( + self, + where=None, + columns=None, + start: int | None = None, + stop: int | None = None, + ): + + df = super().read(where=where, columns=columns, start=start, stop=stop) + df = df.set_index(self.levels) + + # remove names for 'level_%d' + df.index = df.index.set_names( + [None if self._re_levels.search(name) else name for name in df.index.names] + ) + + return df + + +def _reindex_axis(obj: DataFrame, axis: int, labels: Index, other=None) -> DataFrame: + ax = obj._get_axis(axis) + labels = ensure_index(labels) + + # try not to reindex even if other is provided + # if it equals our current index + if other is not None: + other = ensure_index(other) + if (other is None or labels.equals(other)) and labels.equals(ax): + return obj + + labels = ensure_index(labels.unique()) + if other is not None: + labels = ensure_index(other.unique()).intersection(labels, sort=False) + if not labels.equals(ax): + slicer: list[slice | Index] = [slice(None, None)] * obj.ndim + slicer[axis] = labels + obj = obj.loc[tuple(slicer)] + return obj + + +# tz to/from coercion + + +def _get_tz(tz: tzinfo) -> str | tzinfo: + """for a tz-aware type, return an encoded zone""" + zone = timezones.get_timezone(tz) + return zone + + +def _set_tz( + values: np.ndarray | Index, + tz: str | tzinfo | None, + coerce: bool = False, +) -> np.ndarray | DatetimeIndex: + """ + coerce the values to a DatetimeIndex if tz is set + preserve the input shape if possible + + Parameters + ---------- + values : ndarray or Index + tz : str or tzinfo + coerce : if we do not have a passed timezone, coerce to M8[ns] ndarray + """ + if isinstance(values, DatetimeIndex): + # If values is tzaware, the tz gets dropped in the values.ravel() + # call below (which returns an ndarray). So we are only non-lossy + # if `tz` matches `values.tz`. + assert values.tz is None or values.tz == tz + + if tz is not None: + if isinstance(values, DatetimeIndex): + name = values.name + values = values.asi8 + else: + name = None + values = values.ravel() + + tz = _ensure_decoded(tz) + values = DatetimeIndex(values, name=name) + values = values.tz_localize("UTC").tz_convert(tz) + elif coerce: + values = np.asarray(values, dtype="M8[ns]") + + # error: Incompatible return value type (got "Union[ndarray, Index]", + # expected "Union[ndarray, DatetimeIndex]") + return values # type: ignore[return-value] + + +def _convert_index(name: str, index: Index, encoding: str, errors: str) -> IndexCol: + assert isinstance(name, str) + + index_name = index.name + # error: Argument 1 to "_get_data_and_dtype_name" has incompatible type "Index"; + # expected "Union[ExtensionArray, ndarray]" + converted, dtype_name = _get_data_and_dtype_name(index) # type: ignore[arg-type] + kind = _dtype_to_kind(dtype_name) + atom = DataIndexableCol._get_atom(converted) + + if isinstance(index, Int64Index) or needs_i8_conversion(index.dtype): + # Includes Int64Index, RangeIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, + # in which case "kind" is "integer", "integer", "datetime64", + # "timedelta64", and "integer", respectively. + return IndexCol( + name, + values=converted, + kind=kind, + typ=atom, + freq=getattr(index, "freq", None), + tz=getattr(index, "tz", None), + index_name=index_name, + ) + + if isinstance(index, MultiIndex): + raise TypeError("MultiIndex not supported here!") + + inferred_type = lib.infer_dtype(index, skipna=False) + # we won't get inferred_type of "datetime64" or "timedelta64" as these + # would go through the DatetimeIndex/TimedeltaIndex paths above + + values = np.asarray(index) + + if inferred_type == "date": + converted = np.asarray([v.toordinal() for v in values], dtype=np.int32) + return IndexCol( + name, converted, "date", _tables().Time32Col(), index_name=index_name + ) + elif inferred_type == "string": + + converted = _convert_string_array(values, encoding, errors) + itemsize = converted.dtype.itemsize + return IndexCol( + name, + converted, + "string", + _tables().StringCol(itemsize), + index_name=index_name, + ) + + elif inferred_type in ["integer", "floating"]: + return IndexCol( + name, values=converted, kind=kind, typ=atom, index_name=index_name + ) + else: + assert isinstance(converted, np.ndarray) and converted.dtype == object + assert kind == "object", kind + atom = _tables().ObjectAtom() + return IndexCol(name, converted, kind, atom, index_name=index_name) + + +def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray | Index: + index: Index | np.ndarray + + if kind == "datetime64": + index = DatetimeIndex(data) + elif kind == "timedelta64": + index = TimedeltaIndex(data) + elif kind == "date": + try: + index = np.asarray([date.fromordinal(v) for v in data], dtype=object) + except (ValueError): + index = np.asarray([date.fromtimestamp(v) for v in data], dtype=object) + elif kind in ("integer", "float"): + index = np.asarray(data) + elif kind in ("string"): + index = _unconvert_string_array( + data, nan_rep=None, encoding=encoding, errors=errors + ) + elif kind == "object": + index = np.asarray(data[0]) + else: # pragma: no cover + raise ValueError(f"unrecognized index type {kind}") + return index + + +def _maybe_convert_for_string_atom( + name: str, + bvalues: ArrayLike, + existing_col, + min_itemsize, + nan_rep, + encoding, + errors, + columns: list[str], +): + + if bvalues.dtype != object: + return bvalues + + bvalues = cast(np.ndarray, bvalues) + + dtype_name = bvalues.dtype.name + inferred_type = lib.infer_dtype(bvalues, skipna=False) + + if inferred_type == "date": + raise TypeError("[date] is not implemented as a table column") + elif inferred_type == "datetime": + # after GH#8260 + # this only would be hit for a multi-timezone dtype which is an error + raise TypeError( + "too many timezones in this block, create separate data columns" + ) + + elif not (inferred_type == "string" or dtype_name == "object"): + return bvalues + + mask = isna(bvalues) + data = bvalues.copy() + data[mask] = nan_rep + + # see if we have a valid string type + inferred_type = lib.infer_dtype(data, skipna=False) + if inferred_type != "string": + + # we cannot serialize this data, so report an exception on a column + # by column basis + + # expected behaviour: + # search block for a non-string object column by column + for i in range(data.shape[0]): + col = data[i] + inferred_type = lib.infer_dtype(col, skipna=False) + if inferred_type != "string": + error_column_label = columns[i] if len(columns) > i else f"No.{i}" + raise TypeError( + f"Cannot serialize the column [{error_column_label}]\n" + f"because its data contents are not [string] but " + f"[{inferred_type}] object dtype" + ) + + # itemsize is the maximum length of a string (along any dimension) + + data_converted = _convert_string_array(data, encoding, errors).reshape(data.shape) + itemsize = data_converted.itemsize + + # specified min_itemsize? + if isinstance(min_itemsize, dict): + min_itemsize = int(min_itemsize.get(name) or min_itemsize.get("values") or 0) + itemsize = max(min_itemsize or 0, itemsize) + + # check for column in the values conflicts + if existing_col is not None: + eci = existing_col.validate_col(itemsize) + if eci is not None and eci > itemsize: + itemsize = eci + + data_converted = data_converted.astype(f"|S{itemsize}", copy=False) + return data_converted + + +def _convert_string_array(data: np.ndarray, encoding: str, errors: str) -> np.ndarray: + """ + Take a string-like that is object dtype and coerce to a fixed size string type. + + Parameters + ---------- + data : np.ndarray[object] + encoding : str + errors : str + Handler for encoding errors. + + Returns + ------- + np.ndarray[fixed-length-string] + """ + # encode if needed + if len(data): + data = ( + Series(data.ravel()) + .str.encode(encoding, errors) + ._values.reshape(data.shape) + ) + + # create the sized dtype + ensured = ensure_object(data.ravel()) + itemsize = max(1, libwriters.max_len_string_array(ensured)) + + data = np.asarray(data, dtype=f"S{itemsize}") + return data + + +def _unconvert_string_array( + data: np.ndarray, nan_rep, encoding: str, errors: str +) -> np.ndarray: + """ + Inverse of _convert_string_array. + + Parameters + ---------- + data : np.ndarray[fixed-length-string] + nan_rep : the storage repr of NaN + encoding : str + errors : str + Handler for encoding errors. + + Returns + ------- + np.ndarray[object] + Decoded data. + """ + shape = data.shape + data = np.asarray(data.ravel(), dtype=object) + + if len(data): + + itemsize = libwriters.max_len_string_array(ensure_object(data)) + dtype = f"U{itemsize}" + + if isinstance(data[0], bytes): + data = Series(data).str.decode(encoding, errors=errors)._values + else: + data = data.astype(dtype, copy=False).astype(object, copy=False) + + if nan_rep is None: + nan_rep = "nan" + + libwriters.string_array_replace_from_nan_rep(data, nan_rep) + return data.reshape(shape) + + +def _maybe_convert(values: np.ndarray, val_kind: str, encoding: str, errors: str): + assert isinstance(val_kind, str), type(val_kind) + if _need_convert(val_kind): + conv = _get_converter(val_kind, encoding, errors) + values = conv(values) + return values + + +def _get_converter(kind: str, encoding: str, errors: str): + if kind == "datetime64": + return lambda x: np.asarray(x, dtype="M8[ns]") + elif kind == "string": + return lambda x: _unconvert_string_array( + x, nan_rep=None, encoding=encoding, errors=errors + ) + else: # pragma: no cover + raise ValueError(f"invalid kind {kind}") + + +def _need_convert(kind: str) -> bool: + if kind in ("datetime64", "string"): + return True + return False + + +def _maybe_adjust_name(name: str, version: Sequence[int]) -> str: + """ + Prior to 0.10.1, we named values blocks like: values_block_0 an the + name values_0, adjust the given name if necessary. + + Parameters + ---------- + name : str + version : Tuple[int, int, int] + + Returns + ------- + str + """ + if isinstance(version, str) or len(version) < 3: + raise ValueError("Version is incorrect, expected sequence of 3 integers.") + + if version[0] == 0 and version[1] <= 10 and version[2] == 0: + m = re.search(r"values_block_(\d+)", name) + if m: + grp = m.groups()[0] + name = f"values_{grp}" + return name + + +def _dtype_to_kind(dtype_str: str) -> str: + """ + Find the "kind" string describing the given dtype name. + """ + dtype_str = _ensure_decoded(dtype_str) + + if dtype_str.startswith("string") or dtype_str.startswith("bytes"): + kind = "string" + elif dtype_str.startswith("float"): + kind = "float" + elif dtype_str.startswith("complex"): + kind = "complex" + elif dtype_str.startswith("int") or dtype_str.startswith("uint"): + kind = "integer" + elif dtype_str.startswith("datetime64"): + kind = "datetime64" + elif dtype_str.startswith("timedelta"): + kind = "timedelta64" + elif dtype_str.startswith("bool"): + kind = "bool" + elif dtype_str.startswith("category"): + kind = "category" + elif dtype_str.startswith("period"): + # We store the `freq` attr so we can restore from integers + kind = "integer" + elif dtype_str == "object": + kind = "object" + else: + raise ValueError(f"cannot interpret dtype of [{dtype_str}]") + + return kind + + +def _get_data_and_dtype_name(data: ArrayLike): + """ + Convert the passed data into a storable form and a dtype string. + """ + if isinstance(data, Categorical): + data = data.codes + + # For datetime64tz we need to drop the TZ in tests TODO: why? + dtype_name = data.dtype.name.split("[")[0] + + if data.dtype.kind in ["m", "M"]: + data = np.asarray(data.view("i8")) + # TODO: we used to reshape for the dt64tz case, but no longer + # doing that doesn't seem to break anything. why? + + elif isinstance(data, PeriodIndex): + data = data.asi8 + + data = np.asarray(data) + return data, dtype_name + + +class Selection: + """ + Carries out a selection operation on a tables.Table object. + + Parameters + ---------- + table : a Table object + where : list of Terms (or convertible to) + start, stop: indices to start and/or stop selection + + """ + + def __init__( + self, + table: Table, + where=None, + start: int | None = None, + stop: int | None = None, + ): + self.table = table + self.where = where + self.start = start + self.stop = stop + self.condition = None + self.filter = None + self.terms = None + self.coordinates = None + + if is_list_like(where): + + # see if we have a passed coordinate like + with suppress(ValueError): + inferred = lib.infer_dtype(where, skipna=False) + if inferred == "integer" or inferred == "boolean": + where = np.asarray(where) + if where.dtype == np.bool_: + start, stop = self.start, self.stop + if start is None: + start = 0 + if stop is None: + stop = self.table.nrows + self.coordinates = np.arange(start, stop)[where] + elif issubclass(where.dtype.type, np.integer): + if (self.start is not None and (where < self.start).any()) or ( + self.stop is not None and (where >= self.stop).any() + ): + raise ValueError( + "where must have index locations >= start and < stop" + ) + self.coordinates = where + + if self.coordinates is None: + + self.terms = self.generate(where) + + # create the numexpr & the filter + if self.terms is not None: + self.condition, self.filter = self.terms.evaluate() + + def generate(self, where): + """where can be a : dict,list,tuple,string""" + if where is None: + return None + + q = self.table.queryables() + try: + return PyTablesExpr(where, queryables=q, encoding=self.table.encoding) + except NameError as err: + # raise a nice message, suggesting that the user should use + # data_columns + qkeys = ",".join(q.keys()) + msg = dedent( + f"""\ + The passed where expression: {where} + contains an invalid variable reference + all of the variable references must be a reference to + an axis (e.g. 'index' or 'columns'), or a data_column + The currently defined references are: {qkeys} + """ + ) + raise ValueError(msg) from err + + def select(self): + """ + generate the selection + """ + if self.condition is not None: + return self.table.table.read_where( + self.condition.format(), start=self.start, stop=self.stop + ) + elif self.coordinates is not None: + return self.table.table.read_coordinates(self.coordinates) + return self.table.table.read(start=self.start, stop=self.stop) + + def select_coords(self): + """ + generate the selection + """ + start, stop = self.start, self.stop + nrows = self.table.nrows + if start is None: + start = 0 + elif start < 0: + start += nrows + if stop is None: + stop = nrows + elif stop < 0: + stop += nrows + + if self.condition is not None: + return self.table.table.get_where_list( + self.condition.format(), start=start, stop=stop, sort=True + ) + elif self.coordinates is not None: + return self.coordinates + + return np.arange(start, stop) diff --git a/.local/lib/python3.8/site-packages/pandas/io/sas/__init__.py b/.local/lib/python3.8/site-packages/pandas/io/sas/__init__.py new file mode 100644 index 0000000..71027fd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/sas/__init__.py @@ -0,0 +1 @@ +from pandas.io.sas.sasreader import read_sas # noqa:F401 diff --git a/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..9c353df Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/sas7bdat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/sas7bdat.cpython-38.pyc new file mode 100644 index 0000000..4edf1d4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/sas7bdat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/sas_constants.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/sas_constants.cpython-38.pyc new file mode 100644 index 0000000..afc98c4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/sas_constants.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/sas_xport.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/sas_xport.cpython-38.pyc new file mode 100644 index 0000000..de77396 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/sas_xport.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/sasreader.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/sasreader.cpython-38.pyc new file mode 100644 index 0000000..aa4dc22 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/sas/__pycache__/sasreader.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/sas/_sas.cpython-38-x86_64-linux-gnu.so b/.local/lib/python3.8/site-packages/pandas/io/sas/_sas.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..228ce26 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/io/sas/_sas.cpython-38-x86_64-linux-gnu.so differ diff --git a/.local/lib/python3.8/site-packages/pandas/io/sas/sas.pyx b/.local/lib/python3.8/site-packages/pandas/io/sas/sas.pyx new file mode 100644 index 0000000..17b41fd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/sas/sas.pyx @@ -0,0 +1,439 @@ +# cython: profile=False +# cython: boundscheck=False, initializedcheck=False +from cython import Py_ssize_t +import numpy as np + +import pandas.io.sas.sas_constants as const + +ctypedef signed long long int64_t +ctypedef unsigned char uint8_t +ctypedef unsigned short uint16_t + +# rle_decompress decompresses data using a Run Length Encoding +# algorithm. It is partially documented here: +# +# https://cran.r-project.org/package=sas7bdat/vignettes/sas7bdat.pdf +cdef const uint8_t[:] rle_decompress(int result_length, const uint8_t[:] inbuff): + + cdef: + uint8_t control_byte, x + uint8_t[:] result = np.zeros(result_length, np.uint8) + int rpos = 0 + int i, nbytes, end_of_first_byte + Py_ssize_t ipos = 0, length = len(inbuff) + + while ipos < length: + control_byte = inbuff[ipos] & 0xF0 + end_of_first_byte = (inbuff[ipos] & 0x0F) + ipos += 1 + + if control_byte == 0x00: + if end_of_first_byte != 0: + raise ValueError("Unexpected non-zero end_of_first_byte") + nbytes = (inbuff[ipos]) + 64 + ipos += 1 + for _ in range(nbytes): + result[rpos] = inbuff[ipos] + rpos += 1 + ipos += 1 + elif control_byte == 0x40: + # not documented + nbytes = end_of_first_byte * 16 + nbytes += (inbuff[ipos]) + ipos += 1 + for _ in range(nbytes): + result[rpos] = inbuff[ipos] + rpos += 1 + ipos += 1 + elif control_byte == 0x60: + nbytes = end_of_first_byte * 256 + (inbuff[ipos]) + 17 + ipos += 1 + for _ in range(nbytes): + result[rpos] = 0x20 + rpos += 1 + elif control_byte == 0x70: + nbytes = end_of_first_byte * 256 + (inbuff[ipos]) + 17 + ipos += 1 + for _ in range(nbytes): + result[rpos] = 0x00 + rpos += 1 + elif control_byte == 0x80: + nbytes = end_of_first_byte + 1 + for i in range(nbytes): + result[rpos] = inbuff[ipos + i] + rpos += 1 + ipos += nbytes + elif control_byte == 0x90: + nbytes = end_of_first_byte + 17 + for i in range(nbytes): + result[rpos] = inbuff[ipos + i] + rpos += 1 + ipos += nbytes + elif control_byte == 0xA0: + nbytes = end_of_first_byte + 33 + for i in range(nbytes): + result[rpos] = inbuff[ipos + i] + rpos += 1 + ipos += nbytes + elif control_byte == 0xB0: + nbytes = end_of_first_byte + 49 + for i in range(nbytes): + result[rpos] = inbuff[ipos + i] + rpos += 1 + ipos += nbytes + elif control_byte == 0xC0: + nbytes = end_of_first_byte + 3 + x = inbuff[ipos] + ipos += 1 + for _ in range(nbytes): + result[rpos] = x + rpos += 1 + elif control_byte == 0xD0: + nbytes = end_of_first_byte + 2 + for _ in range(nbytes): + result[rpos] = 0x40 + rpos += 1 + elif control_byte == 0xE0: + nbytes = end_of_first_byte + 2 + for _ in range(nbytes): + result[rpos] = 0x20 + rpos += 1 + elif control_byte == 0xF0: + nbytes = end_of_first_byte + 2 + for _ in range(nbytes): + result[rpos] = 0x00 + rpos += 1 + else: + raise ValueError(f"unknown control byte: {control_byte}") + + # In py37 cython/clang sees `len(outbuff)` as size_t and not Py_ssize_t + if len(result) != result_length: + raise ValueError(f"RLE: {len(result)} != {result_length}") + + return np.asarray(result) + + +# rdc_decompress decompresses data using the Ross Data Compression algorithm: +# +# http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm +cdef const uint8_t[:] rdc_decompress(int result_length, const uint8_t[:] inbuff): + + cdef: + uint8_t cmd + uint16_t ctrl_bits = 0, ctrl_mask = 0, ofs, cnt + int rpos = 0, k + uint8_t[:] outbuff = np.zeros(result_length, dtype=np.uint8) + Py_ssize_t ipos = 0, length = len(inbuff) + + ii = -1 + + while ipos < length: + ii += 1 + ctrl_mask = ctrl_mask >> 1 + if ctrl_mask == 0: + ctrl_bits = ((inbuff[ipos] << 8) + + inbuff[ipos + 1]) + ipos += 2 + ctrl_mask = 0x8000 + + if ctrl_bits & ctrl_mask == 0: + outbuff[rpos] = inbuff[ipos] + ipos += 1 + rpos += 1 + continue + + cmd = (inbuff[ipos] >> 4) & 0x0F + cnt = (inbuff[ipos] & 0x0F) + ipos += 1 + + # short RLE + if cmd == 0: + cnt += 3 + for k in range(cnt): + outbuff[rpos + k] = inbuff[ipos] + rpos += cnt + ipos += 1 + + # long RLE + elif cmd == 1: + cnt += inbuff[ipos] << 4 + cnt += 19 + ipos += 1 + for k in range(cnt): + outbuff[rpos + k] = inbuff[ipos] + rpos += cnt + ipos += 1 + + # long pattern + elif cmd == 2: + ofs = cnt + 3 + ofs += inbuff[ipos] << 4 + ipos += 1 + cnt = inbuff[ipos] + ipos += 1 + cnt += 16 + for k in range(cnt): + outbuff[rpos + k] = outbuff[rpos - ofs + k] + rpos += cnt + + # short pattern + elif (cmd >= 3) & (cmd <= 15): + ofs = cnt + 3 + ofs += inbuff[ipos] << 4 + ipos += 1 + for k in range(cmd): + outbuff[rpos + k] = outbuff[rpos - ofs + k] + rpos += cmd + + else: + raise ValueError("unknown RDC command") + + # In py37 cython/clang sees `len(outbuff)` as size_t and not Py_ssize_t + if len(outbuff) != result_length: + raise ValueError(f"RDC: {len(outbuff)} != {result_length}\n") + + return np.asarray(outbuff) + + +cdef enum ColumnTypes: + column_type_decimal = 1 + column_type_string = 2 + + +# type the page_data types +cdef: + int page_meta_type = const.page_meta_type + int page_mix_types_0 = const.page_mix_types[0] + int page_mix_types_1 = const.page_mix_types[1] + int page_data_type = const.page_data_type + int subheader_pointers_offset = const.subheader_pointers_offset + + +cdef class Parser: + + cdef: + int column_count + int64_t[:] lengths + int64_t[:] offsets + int64_t[:] column_types + uint8_t[:, :] byte_chunk + object[:, :] string_chunk + char *cached_page + int current_row_on_page_index + int current_page_block_count + int current_page_data_subheader_pointers_len + int current_page_subheaders_count + int current_row_in_chunk_index + int current_row_in_file_index + int header_length + int row_length + int bit_offset + int subheader_pointer_length + int current_page_type + bint is_little_endian + const uint8_t[:] (*decompress)(int result_length, const uint8_t[:] inbuff) + object parser + + def __init__(self, object parser): + cdef: + int j + char[:] column_types + + self.parser = parser + self.header_length = self.parser.header_length + self.column_count = parser.column_count + self.lengths = parser.column_data_lengths() + self.offsets = parser.column_data_offsets() + self.byte_chunk = parser._byte_chunk + self.string_chunk = parser._string_chunk + self.row_length = parser.row_length + self.bit_offset = self.parser._page_bit_offset + self.subheader_pointer_length = self.parser._subheader_pointer_length + self.is_little_endian = parser.byte_order == "<" + self.column_types = np.empty(self.column_count, dtype='int64') + + # page indicators + self.update_next_page() + + column_types = parser.column_types() + + # map column types + for j in range(self.column_count): + if column_types[j] == b'd': + self.column_types[j] = column_type_decimal + elif column_types[j] == b's': + self.column_types[j] = column_type_string + else: + raise ValueError(f"unknown column type: {self.parser.columns[j].ctype}") + + # compression + if parser.compression == const.rle_compression: + self.decompress = rle_decompress + elif parser.compression == const.rdc_compression: + self.decompress = rdc_decompress + else: + self.decompress = NULL + + # update to current state of the parser + self.current_row_in_chunk_index = parser._current_row_in_chunk_index + self.current_row_in_file_index = parser._current_row_in_file_index + self.current_row_on_page_index = parser._current_row_on_page_index + + def read(self, int nrows): + cdef: + bint done + int i + + for _ in range(nrows): + done = self.readline() + if done: + break + + # update the parser + self.parser._current_row_on_page_index = self.current_row_on_page_index + self.parser._current_row_in_chunk_index = self.current_row_in_chunk_index + self.parser._current_row_in_file_index = self.current_row_in_file_index + + cdef bint read_next_page(self): + cdef done + + done = self.parser._read_next_page() + if done: + self.cached_page = NULL + else: + self.update_next_page() + return done + + cdef update_next_page(self): + # update data for the current page + + self.cached_page = self.parser._cached_page + self.current_row_on_page_index = 0 + self.current_page_type = self.parser._current_page_type + self.current_page_block_count = self.parser._current_page_block_count + self.current_page_data_subheader_pointers_len = len( + self.parser._current_page_data_subheader_pointers + ) + self.current_page_subheaders_count = self.parser._current_page_subheaders_count + + cdef readline(self): + + cdef: + int offset, bit_offset, align_correction + int subheader_pointer_length, mn + bint done, flag + + bit_offset = self.bit_offset + subheader_pointer_length = self.subheader_pointer_length + + # If there is no page, go to the end of the header and read a page. + if self.cached_page == NULL: + self.parser._path_or_buf.seek(self.header_length) + done = self.read_next_page() + if done: + return True + + # Loop until a data row is read + while True: + if self.current_page_type == page_meta_type: + flag = self.current_row_on_page_index >=\ + self.current_page_data_subheader_pointers_len + if flag: + done = self.read_next_page() + if done: + return True + continue + current_subheader_pointer = ( + self.parser._current_page_data_subheader_pointers[ + self.current_row_on_page_index]) + self.process_byte_array_with_data( + current_subheader_pointer.offset, + current_subheader_pointer.length) + return False + elif (self.current_page_type == page_mix_types_0 or + self.current_page_type == page_mix_types_1): + align_correction = ( + bit_offset + + subheader_pointers_offset + + self.current_page_subheaders_count * subheader_pointer_length + ) + align_correction = align_correction % 8 + offset = bit_offset + align_correction + offset += subheader_pointers_offset + offset += self.current_page_subheaders_count * subheader_pointer_length + offset += self.current_row_on_page_index * self.row_length + self.process_byte_array_with_data(offset, self.row_length) + mn = min(self.parser.row_count, self.parser._mix_page_row_count) + if self.current_row_on_page_index == mn: + done = self.read_next_page() + if done: + return True + return False + elif self.current_page_type & page_data_type == page_data_type: + self.process_byte_array_with_data( + bit_offset + + subheader_pointers_offset + + self.current_row_on_page_index * self.row_length, + self.row_length, + ) + flag = self.current_row_on_page_index == self.current_page_block_count + if flag: + done = self.read_next_page() + if done: + return True + return False + else: + raise ValueError(f"unknown page type: {self.current_page_type}") + + cdef void process_byte_array_with_data(self, int offset, int length): + + cdef: + Py_ssize_t j + int s, k, m, jb, js, current_row + int64_t lngt, start, ct + const uint8_t[:] source + int64_t[:] column_types + int64_t[:] lengths + int64_t[:] offsets + uint8_t[:, :] byte_chunk + object[:, :] string_chunk + + source = np.frombuffer( + self.cached_page[offset:offset + length], dtype=np.uint8) + + if self.decompress != NULL and (length < self.row_length): + source = self.decompress(self.row_length, source) + + current_row = self.current_row_in_chunk_index + column_types = self.column_types + lengths = self.lengths + offsets = self.offsets + byte_chunk = self.byte_chunk + string_chunk = self.string_chunk + s = 8 * self.current_row_in_chunk_index + js = 0 + jb = 0 + for j in range(self.column_count): + lngt = lengths[j] + if lngt == 0: + break + start = offsets[j] + ct = column_types[j] + if ct == column_type_decimal: + # decimal + if self.is_little_endian: + m = s + 8 - lngt + else: + m = s + for k in range(lngt): + byte_chunk[jb, m + k] = source[start + k] + jb += 1 + elif column_types[j] == column_type_string: + # string + string_chunk[js, current_row] = np.array(source[start:( + start + lngt)]).tobytes().rstrip(b"\x00 ") + js += 1 + + self.current_row_on_page_index += 1 + self.current_row_in_chunk_index += 1 + self.current_row_in_file_index += 1 diff --git a/.local/lib/python3.8/site-packages/pandas/io/sas/sas7bdat.py b/.local/lib/python3.8/site-packages/pandas/io/sas/sas7bdat.py new file mode 100644 index 0000000..ef77ff2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/sas/sas7bdat.py @@ -0,0 +1,824 @@ +""" +Read SAS7BDAT files + +Based on code written by Jared Hobbs: + https://bitbucket.org/jaredhobbs/sas7bdat + +See also: + https://github.com/BioStatMatt/sas7bdat + +Partial documentation of the file format: + https://cran.r-project.org/package=sas7bdat/vignettes/sas7bdat.pdf + +Reference for binary data compression: + http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm +""" +from __future__ import annotations + +from collections import abc +from datetime import ( + datetime, + timedelta, +) +import struct +from typing import cast + +import numpy as np + +from pandas._typing import ( + FilePath, + ReadBuffer, +) +from pandas.errors import ( + EmptyDataError, + OutOfBoundsDatetime, +) + +import pandas as pd +from pandas import ( + DataFrame, + isna, +) + +from pandas.io.common import get_handle +from pandas.io.sas._sas import Parser +import pandas.io.sas.sas_constants as const +from pandas.io.sas.sasreader import ReaderBase + + +def _parse_datetime(sas_datetime: float, unit: str): + if isna(sas_datetime): + return pd.NaT + + if unit == "s": + return datetime(1960, 1, 1) + timedelta(seconds=sas_datetime) + + elif unit == "d": + return datetime(1960, 1, 1) + timedelta(days=sas_datetime) + + else: + raise ValueError("unit must be 'd' or 's'") + + +def _convert_datetimes(sas_datetimes: pd.Series, unit: str) -> pd.Series: + """ + Convert to Timestamp if possible, otherwise to datetime.datetime. + SAS float64 lacks precision for more than ms resolution so the fit + to datetime.datetime is ok. + + Parameters + ---------- + sas_datetimes : {Series, Sequence[float]} + Dates or datetimes in SAS + unit : {str} + "d" if the floats represent dates, "s" for datetimes + + Returns + ------- + Series + Series of datetime64 dtype or datetime.datetime. + """ + try: + return pd.to_datetime(sas_datetimes, unit=unit, origin="1960-01-01") + except OutOfBoundsDatetime: + s_series = sas_datetimes.apply(_parse_datetime, unit=unit) + s_series = cast(pd.Series, s_series) + return s_series + + +class _SubheaderPointer: + offset: int + length: int + compression: int + ptype: int + + def __init__(self, offset: int, length: int, compression: int, ptype: int): + self.offset = offset + self.length = length + self.compression = compression + self.ptype = ptype + + +class _Column: + col_id: int + name: str | bytes + label: str | bytes + format: str | bytes + ctype: bytes + length: int + + def __init__( + self, + col_id: int, + # These can be bytes when convert_header_text is False + name: str | bytes, + label: str | bytes, + format: str | bytes, + ctype: bytes, + length: int, + ): + self.col_id = col_id + self.name = name + self.label = label + self.format = format + self.ctype = ctype + self.length = length + + +# SAS7BDAT represents a SAS data file in SAS7BDAT format. +class SAS7BDATReader(ReaderBase, abc.Iterator): + """ + Read SAS files in SAS7BDAT format. + + Parameters + ---------- + path_or_buf : path name or buffer + Name of SAS file or file-like object pointing to SAS file + contents. + index : column identifier, defaults to None + Column to use as index. + convert_dates : bool, defaults to True + Attempt to convert dates to Pandas datetime values. Note that + some rarely used SAS date formats may be unsupported. + blank_missing : bool, defaults to True + Convert empty strings to missing values (SAS uses blanks to + indicate missing character variables). + chunksize : int, defaults to None + Return SAS7BDATReader object for iterations, returns chunks + with given number of lines. + encoding : string, defaults to None + String encoding. + convert_text : bool, defaults to True + If False, text variables are left as raw bytes. + convert_header_text : bool, defaults to True + If False, header text, including column names, are left as raw + bytes. + """ + + _int_length: int + _cached_page: bytes | None + + def __init__( + self, + path_or_buf: FilePath | ReadBuffer[bytes], + index=None, + convert_dates=True, + blank_missing=True, + chunksize=None, + encoding=None, + convert_text=True, + convert_header_text=True, + ): + + self.index = index + self.convert_dates = convert_dates + self.blank_missing = blank_missing + self.chunksize = chunksize + self.encoding = encoding + self.convert_text = convert_text + self.convert_header_text = convert_header_text + + self.default_encoding = "latin-1" + self.compression = b"" + self.column_names_strings: list[str] = [] + self.column_names: list[str] = [] + self.column_formats: list[str] = [] + self.columns: list[_Column] = [] + + self._current_page_data_subheader_pointers: list[_SubheaderPointer] = [] + self._cached_page = None + self._column_data_lengths: list[int] = [] + self._column_data_offsets: list[int] = [] + self._column_types: list[bytes] = [] + + self._current_row_in_file_index = 0 + self._current_row_on_page_index = 0 + self._current_row_in_file_index = 0 + + self.handles = get_handle(path_or_buf, "rb", is_text=False) + + self._path_or_buf = self.handles.handle + + try: + self._get_properties() + self._parse_metadata() + except Exception: + self.close() + raise + + def column_data_lengths(self) -> np.ndarray: + """Return a numpy int64 array of the column data lengths""" + return np.asarray(self._column_data_lengths, dtype=np.int64) + + def column_data_offsets(self) -> np.ndarray: + """Return a numpy int64 array of the column offsets""" + return np.asarray(self._column_data_offsets, dtype=np.int64) + + def column_types(self) -> np.ndarray: + """ + Returns a numpy character array of the column types: + s (string) or d (double) + """ + return np.asarray(self._column_types, dtype=np.dtype("S1")) + + def close(self) -> None: + self.handles.close() + + def _get_properties(self) -> None: + + # Check magic number + self._path_or_buf.seek(0) + self._cached_page = self._path_or_buf.read(288) + if self._cached_page[0 : len(const.magic)] != const.magic: + raise ValueError("magic number mismatch (not a SAS file?)") + + # Get alignment information + align1, align2 = 0, 0 + buf = self._read_bytes(const.align_1_offset, const.align_1_length) + if buf == const.u64_byte_checker_value: + align2 = const.align_2_value + self.U64 = True + self._int_length = 8 + self._page_bit_offset = const.page_bit_offset_x64 + self._subheader_pointer_length = const.subheader_pointer_length_x64 + else: + self.U64 = False + self._page_bit_offset = const.page_bit_offset_x86 + self._subheader_pointer_length = const.subheader_pointer_length_x86 + self._int_length = 4 + buf = self._read_bytes(const.align_2_offset, const.align_2_length) + if buf == const.align_1_checker_value: + align1 = const.align_2_value + total_align = align1 + align2 + + # Get endianness information + buf = self._read_bytes(const.endianness_offset, const.endianness_length) + if buf == b"\x01": + self.byte_order = "<" + else: + self.byte_order = ">" + + # Get encoding information + buf = self._read_bytes(const.encoding_offset, const.encoding_length)[0] + if buf in const.encoding_names: + self.file_encoding = const.encoding_names[buf] + else: + self.file_encoding = f"unknown (code={buf})" + + # Get platform information + buf = self._read_bytes(const.platform_offset, const.platform_length) + if buf == b"1": + self.platform = "unix" + elif buf == b"2": + self.platform = "windows" + else: + self.platform = "unknown" + + buf = self._read_bytes(const.dataset_offset, const.dataset_length) + self.name = buf.rstrip(b"\x00 ") + if self.convert_header_text: + self.name = self.name.decode(self.encoding or self.default_encoding) + + buf = self._read_bytes(const.file_type_offset, const.file_type_length) + self.file_type = buf.rstrip(b"\x00 ") + if self.convert_header_text: + self.file_type = self.file_type.decode( + self.encoding or self.default_encoding + ) + + # Timestamp is epoch 01/01/1960 + epoch = datetime(1960, 1, 1) + x = self._read_float( + const.date_created_offset + align1, const.date_created_length + ) + self.date_created = epoch + pd.to_timedelta(x, unit="s") + x = self._read_float( + const.date_modified_offset + align1, const.date_modified_length + ) + self.date_modified = epoch + pd.to_timedelta(x, unit="s") + + self.header_length = self._read_int( + const.header_size_offset + align1, const.header_size_length + ) + + # Read the rest of the header into cached_page. + buf = self._path_or_buf.read(self.header_length - 288) + self._cached_page += buf + # error: Argument 1 to "len" has incompatible type "Optional[bytes]"; + # expected "Sized" + if len(self._cached_page) != self.header_length: # type: ignore[arg-type] + raise ValueError("The SAS7BDAT file appears to be truncated.") + + self._page_length = self._read_int( + const.page_size_offset + align1, const.page_size_length + ) + self._page_count = self._read_int( + const.page_count_offset + align1, const.page_count_length + ) + + buf = self._read_bytes( + const.sas_release_offset + total_align, const.sas_release_length + ) + self.sas_release = buf.rstrip(b"\x00 ") + if self.convert_header_text: + self.sas_release = self.sas_release.decode( + self.encoding or self.default_encoding + ) + + buf = self._read_bytes( + const.sas_server_type_offset + total_align, const.sas_server_type_length + ) + self.server_type = buf.rstrip(b"\x00 ") + if self.convert_header_text: + self.server_type = self.server_type.decode( + self.encoding or self.default_encoding + ) + + buf = self._read_bytes( + const.os_version_number_offset + total_align, const.os_version_number_length + ) + self.os_version = buf.rstrip(b"\x00 ") + if self.convert_header_text: + self.os_version = self.os_version.decode( + self.encoding or self.default_encoding + ) + + buf = self._read_bytes(const.os_name_offset + total_align, const.os_name_length) + buf = buf.rstrip(b"\x00 ") + if len(buf) > 0: + self.os_name = buf.decode(self.encoding or self.default_encoding) + else: + buf = self._read_bytes( + const.os_maker_offset + total_align, const.os_maker_length + ) + self.os_name = buf.rstrip(b"\x00 ") + if self.convert_header_text: + self.os_name = self.os_name.decode( + self.encoding or self.default_encoding + ) + + def __next__(self): + da = self.read(nrows=self.chunksize or 1) + if da is None: + self.close() + raise StopIteration + return da + + # Read a single float of the given width (4 or 8). + def _read_float(self, offset: int, width: int): + if width not in (4, 8): + self.close() + raise ValueError("invalid float width") + buf = self._read_bytes(offset, width) + fd = "f" if width == 4 else "d" + return struct.unpack(self.byte_order + fd, buf)[0] + + # Read a single signed integer of the given width (1, 2, 4 or 8). + def _read_int(self, offset: int, width: int) -> int: + if width not in (1, 2, 4, 8): + self.close() + raise ValueError("invalid int width") + buf = self._read_bytes(offset, width) + it = {1: "b", 2: "h", 4: "l", 8: "q"}[width] + iv = struct.unpack(self.byte_order + it, buf)[0] + return iv + + def _read_bytes(self, offset: int, length: int): + if self._cached_page is None: + self._path_or_buf.seek(offset) + buf = self._path_or_buf.read(length) + if len(buf) < length: + self.close() + msg = f"Unable to read {length:d} bytes from file position {offset:d}." + raise ValueError(msg) + return buf + else: + if offset + length > len(self._cached_page): + self.close() + raise ValueError("The cached page is too small.") + return self._cached_page[offset : offset + length] + + def _parse_metadata(self) -> None: + done = False + while not done: + self._cached_page = self._path_or_buf.read(self._page_length) + if len(self._cached_page) <= 0: + break + if len(self._cached_page) != self._page_length: + raise ValueError("Failed to read a meta data page from the SAS file.") + done = self._process_page_meta() + + def _process_page_meta(self) -> bool: + self._read_page_header() + pt = [const.page_meta_type, const.page_amd_type] + const.page_mix_types + if self._current_page_type in pt: + self._process_page_metadata() + is_data_page = self._current_page_type & const.page_data_type + is_mix_page = self._current_page_type in const.page_mix_types + return bool( + is_data_page + or is_mix_page + or self._current_page_data_subheader_pointers != [] + ) + + def _read_page_header(self): + bit_offset = self._page_bit_offset + tx = const.page_type_offset + bit_offset + self._current_page_type = self._read_int(tx, const.page_type_length) + tx = const.block_count_offset + bit_offset + self._current_page_block_count = self._read_int(tx, const.block_count_length) + tx = const.subheader_count_offset + bit_offset + self._current_page_subheaders_count = self._read_int( + tx, const.subheader_count_length + ) + + def _process_page_metadata(self) -> None: + bit_offset = self._page_bit_offset + + for i in range(self._current_page_subheaders_count): + pointer = self._process_subheader_pointers( + const.subheader_pointers_offset + bit_offset, i + ) + if pointer.length == 0: + continue + if pointer.compression == const.truncated_subheader_id: + continue + subheader_signature = self._read_subheader_signature(pointer.offset) + subheader_index = self._get_subheader_index( + subheader_signature, pointer.compression, pointer.ptype + ) + self._process_subheader(subheader_index, pointer) + + def _get_subheader_index(self, signature: bytes, compression, ptype) -> int: + # TODO: return here could be made an enum + index = const.subheader_signature_to_index.get(signature) + if index is None: + f1 = (compression == const.compressed_subheader_id) or (compression == 0) + f2 = ptype == const.compressed_subheader_type + if (self.compression != b"") and f1 and f2: + index = const.SASIndex.data_subheader_index + else: + self.close() + raise ValueError("Unknown subheader signature") + return index + + def _process_subheader_pointers( + self, offset: int, subheader_pointer_index: int + ) -> _SubheaderPointer: + + subheader_pointer_length = self._subheader_pointer_length + total_offset = offset + subheader_pointer_length * subheader_pointer_index + + subheader_offset = self._read_int(total_offset, self._int_length) + total_offset += self._int_length + + subheader_length = self._read_int(total_offset, self._int_length) + total_offset += self._int_length + + subheader_compression = self._read_int(total_offset, 1) + total_offset += 1 + + subheader_type = self._read_int(total_offset, 1) + + x = _SubheaderPointer( + subheader_offset, subheader_length, subheader_compression, subheader_type + ) + + return x + + def _read_subheader_signature(self, offset: int) -> bytes: + subheader_signature = self._read_bytes(offset, self._int_length) + return subheader_signature + + def _process_subheader( + self, subheader_index: int, pointer: _SubheaderPointer + ) -> None: + offset = pointer.offset + length = pointer.length + + if subheader_index == const.SASIndex.row_size_index: + processor = self._process_rowsize_subheader + elif subheader_index == const.SASIndex.column_size_index: + processor = self._process_columnsize_subheader + elif subheader_index == const.SASIndex.column_text_index: + processor = self._process_columntext_subheader + elif subheader_index == const.SASIndex.column_name_index: + processor = self._process_columnname_subheader + elif subheader_index == const.SASIndex.column_attributes_index: + processor = self._process_columnattributes_subheader + elif subheader_index == const.SASIndex.format_and_label_index: + processor = self._process_format_subheader + elif subheader_index == const.SASIndex.column_list_index: + processor = self._process_columnlist_subheader + elif subheader_index == const.SASIndex.subheader_counts_index: + processor = self._process_subheader_counts + elif subheader_index == const.SASIndex.data_subheader_index: + self._current_page_data_subheader_pointers.append(pointer) + return + else: + raise ValueError("unknown subheader index") + + processor(offset, length) + + def _process_rowsize_subheader(self, offset: int, length: int) -> None: + + int_len = self._int_length + lcs_offset = offset + lcp_offset = offset + if self.U64: + lcs_offset += 682 + lcp_offset += 706 + else: + lcs_offset += 354 + lcp_offset += 378 + + self.row_length = self._read_int( + offset + const.row_length_offset_multiplier * int_len, int_len + ) + self.row_count = self._read_int( + offset + const.row_count_offset_multiplier * int_len, int_len + ) + self.col_count_p1 = self._read_int( + offset + const.col_count_p1_multiplier * int_len, int_len + ) + self.col_count_p2 = self._read_int( + offset + const.col_count_p2_multiplier * int_len, int_len + ) + mx = const.row_count_on_mix_page_offset_multiplier * int_len + self._mix_page_row_count = self._read_int(offset + mx, int_len) + self._lcs = self._read_int(lcs_offset, 2) + self._lcp = self._read_int(lcp_offset, 2) + + def _process_columnsize_subheader(self, offset: int, length: int) -> None: + int_len = self._int_length + offset += int_len + self.column_count = self._read_int(offset, int_len) + if self.col_count_p1 + self.col_count_p2 != self.column_count: + print( + f"Warning: column count mismatch ({self.col_count_p1} + " + f"{self.col_count_p2} != {self.column_count})\n" + ) + + # Unknown purpose + def _process_subheader_counts(self, offset: int, length: int) -> None: + pass + + def _process_columntext_subheader(self, offset: int, length: int) -> None: + + offset += self._int_length + text_block_size = self._read_int(offset, const.text_block_size_length) + + buf = self._read_bytes(offset, text_block_size) + cname_raw = buf[0:text_block_size].rstrip(b"\x00 ") + cname = cname_raw + if self.convert_header_text: + cname = cname.decode(self.encoding or self.default_encoding) + self.column_names_strings.append(cname) + + if len(self.column_names_strings) == 1: + compression_literal = b"" + for cl in const.compression_literals: + if cl in cname_raw: + compression_literal = cl + self.compression = compression_literal + offset -= self._int_length + + offset1 = offset + 16 + if self.U64: + offset1 += 4 + + buf = self._read_bytes(offset1, self._lcp) + compression_literal = buf.rstrip(b"\x00") + if compression_literal == b"": + self._lcs = 0 + offset1 = offset + 32 + if self.U64: + offset1 += 4 + buf = self._read_bytes(offset1, self._lcp) + self.creator_proc = buf[0 : self._lcp] + elif compression_literal == const.rle_compression: + offset1 = offset + 40 + if self.U64: + offset1 += 4 + buf = self._read_bytes(offset1, self._lcp) + self.creator_proc = buf[0 : self._lcp] + elif self._lcs > 0: + self._lcp = 0 + offset1 = offset + 16 + if self.U64: + offset1 += 4 + buf = self._read_bytes(offset1, self._lcs) + self.creator_proc = buf[0 : self._lcp] + if self.convert_header_text: + if hasattr(self, "creator_proc"): + self.creator_proc = self.creator_proc.decode( + self.encoding or self.default_encoding + ) + + def _process_columnname_subheader(self, offset: int, length: int) -> None: + int_len = self._int_length + offset += int_len + column_name_pointers_count = (length - 2 * int_len - 12) // 8 + for i in range(column_name_pointers_count): + text_subheader = ( + offset + + const.column_name_pointer_length * (i + 1) + + const.column_name_text_subheader_offset + ) + col_name_offset = ( + offset + + const.column_name_pointer_length * (i + 1) + + const.column_name_offset_offset + ) + col_name_length = ( + offset + + const.column_name_pointer_length * (i + 1) + + const.column_name_length_offset + ) + + idx = self._read_int( + text_subheader, const.column_name_text_subheader_length + ) + col_offset = self._read_int( + col_name_offset, const.column_name_offset_length + ) + col_len = self._read_int(col_name_length, const.column_name_length_length) + + name_str = self.column_names_strings[idx] + self.column_names.append(name_str[col_offset : col_offset + col_len]) + + def _process_columnattributes_subheader(self, offset: int, length: int) -> None: + int_len = self._int_length + column_attributes_vectors_count = (length - 2 * int_len - 12) // (int_len + 8) + for i in range(column_attributes_vectors_count): + col_data_offset = ( + offset + int_len + const.column_data_offset_offset + i * (int_len + 8) + ) + col_data_len = ( + offset + + 2 * int_len + + const.column_data_length_offset + + i * (int_len + 8) + ) + col_types = ( + offset + 2 * int_len + const.column_type_offset + i * (int_len + 8) + ) + + x = self._read_int(col_data_offset, int_len) + self._column_data_offsets.append(x) + + x = self._read_int(col_data_len, const.column_data_length_length) + self._column_data_lengths.append(x) + + x = self._read_int(col_types, const.column_type_length) + self._column_types.append(b"d" if x == 1 else b"s") + + def _process_columnlist_subheader(self, offset: int, length: int) -> None: + # unknown purpose + pass + + def _process_format_subheader(self, offset: int, length: int) -> None: + int_len = self._int_length + text_subheader_format = ( + offset + const.column_format_text_subheader_index_offset + 3 * int_len + ) + col_format_offset = offset + const.column_format_offset_offset + 3 * int_len + col_format_len = offset + const.column_format_length_offset + 3 * int_len + text_subheader_label = ( + offset + const.column_label_text_subheader_index_offset + 3 * int_len + ) + col_label_offset = offset + const.column_label_offset_offset + 3 * int_len + col_label_len = offset + const.column_label_length_offset + 3 * int_len + + x = self._read_int( + text_subheader_format, const.column_format_text_subheader_index_length + ) + format_idx = min(x, len(self.column_names_strings) - 1) + + format_start = self._read_int( + col_format_offset, const.column_format_offset_length + ) + format_len = self._read_int(col_format_len, const.column_format_length_length) + + label_idx = self._read_int( + text_subheader_label, const.column_label_text_subheader_index_length + ) + label_idx = min(label_idx, len(self.column_names_strings) - 1) + + label_start = self._read_int(col_label_offset, const.column_label_offset_length) + label_len = self._read_int(col_label_len, const.column_label_length_length) + + label_names = self.column_names_strings[label_idx] + column_label = label_names[label_start : label_start + label_len] + format_names = self.column_names_strings[format_idx] + column_format = format_names[format_start : format_start + format_len] + current_column_number = len(self.columns) + + col = _Column( + current_column_number, + self.column_names[current_column_number], + column_label, + column_format, + self._column_types[current_column_number], + self._column_data_lengths[current_column_number], + ) + + self.column_formats.append(column_format) + self.columns.append(col) + + def read(self, nrows: int | None = None) -> DataFrame | None: + + if (nrows is None) and (self.chunksize is not None): + nrows = self.chunksize + elif nrows is None: + nrows = self.row_count + + if len(self._column_types) == 0: + self.close() + raise EmptyDataError("No columns to parse from file") + + if self._current_row_in_file_index >= self.row_count: + return None + + m = self.row_count - self._current_row_in_file_index + if nrows > m: + nrows = m + + nd = self._column_types.count(b"d") + ns = self._column_types.count(b"s") + + self._string_chunk = np.empty((ns, nrows), dtype=object) + self._byte_chunk = np.zeros((nd, 8 * nrows), dtype=np.uint8) + + self._current_row_in_chunk_index = 0 + p = Parser(self) + p.read(nrows) + + rslt = self._chunk_to_dataframe() + if self.index is not None: + rslt = rslt.set_index(self.index) + + return rslt + + def _read_next_page(self): + self._current_page_data_subheader_pointers = [] + self._cached_page = self._path_or_buf.read(self._page_length) + if len(self._cached_page) <= 0: + return True + elif len(self._cached_page) != self._page_length: + self.close() + msg = ( + "failed to read complete page from file (read " + f"{len(self._cached_page):d} of {self._page_length:d} bytes)" + ) + raise ValueError(msg) + + self._read_page_header() + page_type = self._current_page_type + if page_type == const.page_meta_type: + self._process_page_metadata() + + is_data_page = page_type & const.page_data_type + pt = [const.page_meta_type] + const.page_mix_types + if not is_data_page and self._current_page_type not in pt: + return self._read_next_page() + + return False + + def _chunk_to_dataframe(self) -> DataFrame: + + n = self._current_row_in_chunk_index + m = self._current_row_in_file_index + ix = range(m - n, m) + rslt = {} + + js, jb = 0, 0 + for j in range(self.column_count): + + name = self.column_names[j] + + if self._column_types[j] == b"d": + col_arr = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d") + rslt[name] = pd.Series(col_arr, dtype=np.float64, index=ix) + if self.convert_dates: + if self.column_formats[j] in const.sas_date_formats: + rslt[name] = _convert_datetimes(rslt[name], "d") + elif self.column_formats[j] in const.sas_datetime_formats: + rslt[name] = _convert_datetimes(rslt[name], "s") + jb += 1 + elif self._column_types[j] == b"s": + rslt[name] = pd.Series(self._string_chunk[js, :], index=ix) + if self.convert_text and (self.encoding is not None): + rslt[name] = rslt[name].str.decode( + self.encoding or self.default_encoding + ) + if self.blank_missing: + ii = rslt[name].str.len() == 0 + rslt[name][ii] = np.nan + js += 1 + else: + self.close() + raise ValueError(f"unknown column type {repr(self._column_types[j])}") + + df = DataFrame(rslt, columns=self.column_names, index=ix, copy=False) + return df diff --git a/.local/lib/python3.8/site-packages/pandas/io/sas/sas_constants.py b/.local/lib/python3.8/site-packages/pandas/io/sas/sas_constants.py new file mode 100644 index 0000000..23b23a1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/sas/sas_constants.py @@ -0,0 +1,253 @@ +magic = ( + b"\x00\x00\x00\x00\x00\x00\x00\x00" + + b"\x00\x00\x00\x00\xc2\xea\x81\x60" + + b"\xb3\x14\x11\xcf\xbd\x92\x08\x00" + + b"\x09\xc7\x31\x8c\x18\x1f\x10\x11" +) + +align_1_checker_value = b"3" +align_1_offset = 32 +align_1_length = 1 +align_1_value = 4 +u64_byte_checker_value = b"3" +align_2_offset = 35 +align_2_length = 1 +align_2_value = 4 +endianness_offset = 37 +endianness_length = 1 +platform_offset = 39 +platform_length = 1 +encoding_offset = 70 +encoding_length = 1 +dataset_offset = 92 +dataset_length = 64 +file_type_offset = 156 +file_type_length = 8 +date_created_offset = 164 +date_created_length = 8 +date_modified_offset = 172 +date_modified_length = 8 +header_size_offset = 196 +header_size_length = 4 +page_size_offset = 200 +page_size_length = 4 +page_count_offset = 204 +page_count_length = 4 +sas_release_offset = 216 +sas_release_length = 8 +sas_server_type_offset = 224 +sas_server_type_length = 16 +os_version_number_offset = 240 +os_version_number_length = 16 +os_maker_offset = 256 +os_maker_length = 16 +os_name_offset = 272 +os_name_length = 16 +page_bit_offset_x86 = 16 +page_bit_offset_x64 = 32 +subheader_pointer_length_x86 = 12 +subheader_pointer_length_x64 = 24 +page_type_offset = 0 +page_type_length = 2 +block_count_offset = 2 +block_count_length = 2 +subheader_count_offset = 4 +subheader_count_length = 2 +page_meta_type = 0 +page_data_type = 256 +page_amd_type = 1024 +page_metc_type = 16384 +page_comp_type = -28672 +page_mix_types = [512, 640] +subheader_pointers_offset = 8 +truncated_subheader_id = 1 +compressed_subheader_id = 4 +compressed_subheader_type = 1 +text_block_size_length = 2 +row_length_offset_multiplier = 5 +row_count_offset_multiplier = 6 +col_count_p1_multiplier = 9 +col_count_p2_multiplier = 10 +row_count_on_mix_page_offset_multiplier = 15 +column_name_pointer_length = 8 +column_name_text_subheader_offset = 0 +column_name_text_subheader_length = 2 +column_name_offset_offset = 2 +column_name_offset_length = 2 +column_name_length_offset = 4 +column_name_length_length = 2 +column_data_offset_offset = 8 +column_data_length_offset = 8 +column_data_length_length = 4 +column_type_offset = 14 +column_type_length = 1 +column_format_text_subheader_index_offset = 22 +column_format_text_subheader_index_length = 2 +column_format_offset_offset = 24 +column_format_offset_length = 2 +column_format_length_offset = 26 +column_format_length_length = 2 +column_label_text_subheader_index_offset = 28 +column_label_text_subheader_index_length = 2 +column_label_offset_offset = 30 +column_label_offset_length = 2 +column_label_length_offset = 32 +column_label_length_length = 2 +rle_compression = b"SASYZCRL" +rdc_compression = b"SASYZCR2" + +compression_literals = [rle_compression, rdc_compression] + +# Incomplete list of encodings, using SAS nomenclature: +# http://support.sas.com/documentation/cdl/en/nlsref/61893/HTML/default/viewer.htm#a002607278.htm +encoding_names = { + 29: "latin1", + 20: "utf-8", + 33: "cyrillic", + 60: "wlatin2", + 61: "wcyrillic", + 62: "wlatin1", + 90: "ebcdic870", +} + + +class SASIndex: + row_size_index = 0 + column_size_index = 1 + subheader_counts_index = 2 + column_text_index = 3 + column_name_index = 4 + column_attributes_index = 5 + format_and_label_index = 6 + column_list_index = 7 + data_subheader_index = 8 + + +subheader_signature_to_index = { + b"\xF7\xF7\xF7\xF7": SASIndex.row_size_index, + b"\x00\x00\x00\x00\xF7\xF7\xF7\xF7": SASIndex.row_size_index, + b"\xF7\xF7\xF7\xF7\x00\x00\x00\x00": SASIndex.row_size_index, + b"\xF7\xF7\xF7\xF7\xFF\xFF\xFB\xFE": SASIndex.row_size_index, + b"\xF6\xF6\xF6\xF6": SASIndex.column_size_index, + b"\x00\x00\x00\x00\xF6\xF6\xF6\xF6": SASIndex.column_size_index, + b"\xF6\xF6\xF6\xF6\x00\x00\x00\x00": SASIndex.column_size_index, + b"\xF6\xF6\xF6\xF6\xFF\xFF\xFB\xFE": SASIndex.column_size_index, + b"\x00\xFC\xFF\xFF": SASIndex.subheader_counts_index, + b"\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index, + b"\x00\xFC\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.subheader_counts_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index, + b"\xFD\xFF\xFF\xFF": SASIndex.column_text_index, + b"\xFF\xFF\xFF\xFD": SASIndex.column_text_index, + b"\xFD\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_text_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD": SASIndex.column_text_index, + b"\xFF\xFF\xFF\xFF": SASIndex.column_name_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_name_index, + b"\xFC\xFF\xFF\xFF": SASIndex.column_attributes_index, + b"\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index, + b"\xFC\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_attributes_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index, + b"\xFE\xFB\xFF\xFF": SASIndex.format_and_label_index, + b"\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index, + b"\xFE\xFB\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.format_and_label_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index, + b"\xFE\xFF\xFF\xFF": SASIndex.column_list_index, + b"\xFF\xFF\xFF\xFE": SASIndex.column_list_index, + b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_list_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": SASIndex.column_list_index, +} + + +# List of frequently used SAS date and datetime formats +# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm +# https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java +sas_date_formats = ( + "DATE", + "DAY", + "DDMMYY", + "DOWNAME", + "JULDAY", + "JULIAN", + "MMDDYY", + "MMYY", + "MMYYC", + "MMYYD", + "MMYYP", + "MMYYS", + "MMYYN", + "MONNAME", + "MONTH", + "MONYY", + "QTR", + "QTRR", + "NENGO", + "WEEKDATE", + "WEEKDATX", + "WEEKDAY", + "WEEKV", + "WORDDATE", + "WORDDATX", + "YEAR", + "YYMM", + "YYMMC", + "YYMMD", + "YYMMP", + "YYMMS", + "YYMMN", + "YYMON", + "YYMMDD", + "YYQ", + "YYQC", + "YYQD", + "YYQP", + "YYQS", + "YYQN", + "YYQR", + "YYQRC", + "YYQRD", + "YYQRP", + "YYQRS", + "YYQRN", + "YYMMDDP", + "YYMMDDC", + "E8601DA", + "YYMMDDN", + "MMDDYYC", + "MMDDYYS", + "MMDDYYD", + "YYMMDDS", + "B8601DA", + "DDMMYYN", + "YYMMDDD", + "DDMMYYB", + "DDMMYYP", + "MMDDYYP", + "YYMMDDB", + "MMDDYYN", + "DDMMYYC", + "DDMMYYD", + "DDMMYYS", + "MINGUO", +) + +sas_datetime_formats = ( + "DATETIME", + "DTWKDATX", + "B8601DN", + "B8601DT", + "B8601DX", + "B8601DZ", + "B8601LX", + "E8601DN", + "E8601DT", + "E8601DX", + "E8601DZ", + "E8601LX", + "DATEAMPM", + "DTDATE", + "DTMONYY", + "DTMONYY", + "DTWKDATX", + "DTYEAR", + "TOD", + "MDYAMPM", +) diff --git a/.local/lib/python3.8/site-packages/pandas/io/sas/sas_xport.py b/.local/lib/python3.8/site-packages/pandas/io/sas/sas_xport.py new file mode 100644 index 0000000..eefb619 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/sas/sas_xport.py @@ -0,0 +1,496 @@ +""" +Read a SAS XPort format file into a Pandas DataFrame. + +Based on code from Jack Cushman (github.com/jcushman/xport). + +The file format is defined here: + +https://support.sas.com/content/dam/SAS/support/en/technical-papers/record-layout-of-a-sas-version-5-or-6-data-set-in-sas-transport-xport-format.pdf +""" +from __future__ import annotations + +from collections import abc +from datetime import datetime +import struct +import warnings + +import numpy as np + +from pandas._typing import ( + FilePath, + ReadBuffer, +) +from pandas.util._decorators import Appender + +import pandas as pd + +from pandas.io.common import get_handle +from pandas.io.sas.sasreader import ReaderBase + +_correct_line1 = ( + "HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!" + "000000000000000000000000000000 " +) +_correct_header1 = ( + "HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000" +) +_correct_header2 = ( + "HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!" + "000000000000000000000000000000 " +) +_correct_obs_header = ( + "HEADER RECORD*******OBS HEADER RECORD!!!!!!!" + "000000000000000000000000000000 " +) +_fieldkeys = [ + "ntype", + "nhfun", + "field_length", + "nvar0", + "name", + "label", + "nform", + "nfl", + "num_decimals", + "nfj", + "nfill", + "niform", + "nifl", + "nifd", + "npos", + "_", +] + + +_base_params_doc = """\ +Parameters +---------- +filepath_or_buffer : str or file-like object + Path to SAS file or object implementing binary read method.""" + +_params2_doc = """\ +index : identifier of index column + Identifier of column that should be used as index of the DataFrame. +encoding : str + Encoding for text data. +chunksize : int + Read file `chunksize` lines at a time, returns iterator.""" + +_format_params_doc = """\ +format : str + File format, only `xport` is currently supported.""" + +_iterator_doc = """\ +iterator : bool, default False + Return XportReader object for reading file incrementally.""" + + +_read_sas_doc = f"""Read a SAS file into a DataFrame. + +{_base_params_doc} +{_format_params_doc} +{_params2_doc} +{_iterator_doc} + +Returns +------- +DataFrame or XportReader + +Examples +-------- +Read a SAS Xport file: + +>>> df = pd.read_sas('filename.XPT') + +Read a Xport file in 10,000 line chunks: + +>>> itr = pd.read_sas('filename.XPT', chunksize=10000) +>>> for chunk in itr: +>>> do_something(chunk) + +""" + +_xport_reader_doc = f"""\ +Class for reading SAS Xport files. + +{_base_params_doc} +{_params2_doc} + +Attributes +---------- +member_info : list + Contains information about the file +fields : list + Contains information about the variables in the file +""" + +_read_method_doc = """\ +Read observations from SAS Xport file, returning as data frame. + +Parameters +---------- +nrows : int + Number of rows to read from data file; if None, read whole + file. + +Returns +------- +A DataFrame. +""" + + +def _parse_date(datestr: str) -> datetime: + """Given a date in xport format, return Python date.""" + try: + # e.g. "16FEB11:10:07:55" + return datetime.strptime(datestr, "%d%b%y:%H:%M:%S") + except ValueError: + return pd.NaT + + +def _split_line(s: str, parts): + """ + Parameters + ---------- + s: str + Fixed-length string to split + parts: list of (name, length) pairs + Used to break up string, name '_' will be filtered from output. + + Returns + ------- + Dict of name:contents of string at given location. + """ + out = {} + start = 0 + for name, length in parts: + out[name] = s[start : start + length].strip() + start += length + del out["_"] + return out + + +def _handle_truncated_float_vec(vec, nbytes): + # This feature is not well documented, but some SAS XPORT files + # have 2-7 byte "truncated" floats. To read these truncated + # floats, pad them with zeros on the right to make 8 byte floats. + # + # References: + # https://github.com/jcushman/xport/pull/3 + # The R "foreign" library + + if nbytes != 8: + vec1 = np.zeros(len(vec), np.dtype("S8")) + dtype = np.dtype(f"S{nbytes},S{8 - nbytes}") + vec2 = vec1.view(dtype=dtype) + vec2["f0"] = vec + return vec2 + + return vec + + +def _parse_float_vec(vec): + """ + Parse a vector of float values representing IBM 8 byte floats into + native 8 byte floats. + """ + dtype = np.dtype(">u4,>u4") + vec1 = vec.view(dtype=dtype) + xport1 = vec1["f0"] + xport2 = vec1["f1"] + + # Start by setting first half of ieee number to first half of IBM + # number sans exponent + ieee1 = xport1 & 0x00FFFFFF + + # The fraction bit to the left of the binary point in the ieee + # format was set and the number was shifted 0, 1, 2, or 3 + # places. This will tell us how to adjust the ibm exponent to be a + # power of 2 ieee exponent and how to shift the fraction bits to + # restore the correct magnitude. + shift = np.zeros(len(vec), dtype=np.uint8) + shift[np.where(xport1 & 0x00200000)] = 1 + shift[np.where(xport1 & 0x00400000)] = 2 + shift[np.where(xport1 & 0x00800000)] = 3 + + # shift the ieee number down the correct number of places then + # set the second half of the ieee number to be the second half + # of the ibm number shifted appropriately, ored with the bits + # from the first half that would have been shifted in if we + # could shift a double. All we are worried about are the low + # order 3 bits of the first half since we're only shifting by + # 1, 2, or 3. + ieee1 >>= shift + ieee2 = (xport2 >> shift) | ((xport1 & 0x00000007) << (29 + (3 - shift))) + + # clear the 1 bit to the left of the binary point + ieee1 &= 0xFFEFFFFF + + # set the exponent of the ieee number to be the actual exponent + # plus the shift count + 1023. Or this into the first half of the + # ieee number. The ibm exponent is excess 64 but is adjusted by 65 + # since during conversion to ibm format the exponent is + # incremented by 1 and the fraction bits left 4 positions to the + # right of the radix point. (had to add >> 24 because C treats & + # 0x7f as 0x7f000000 and Python doesn't) + ieee1 |= ((((((xport1 >> 24) & 0x7F) - 65) << 2) + shift + 1023) << 20) | ( + xport1 & 0x80000000 + ) + + ieee = np.empty((len(ieee1),), dtype=">u4,>u4") + ieee["f0"] = ieee1 + ieee["f1"] = ieee2 + ieee = ieee.view(dtype=">f8") + ieee = ieee.astype("f8") + + return ieee + + +class XportReader(ReaderBase, abc.Iterator): + __doc__ = _xport_reader_doc + + def __init__( + self, + filepath_or_buffer: FilePath | ReadBuffer[bytes], + index=None, + encoding: str | None = "ISO-8859-1", + chunksize=None, + ): + + self._encoding = encoding + self._lines_read = 0 + self._index = index + self._chunksize = chunksize + + self.handles = get_handle( + filepath_or_buffer, "rb", encoding=encoding, is_text=False + ) + self.filepath_or_buffer = self.handles.handle + + try: + self._read_header() + except Exception: + self.close() + raise + + def close(self): + self.handles.close() + + def _get_row(self): + return self.filepath_or_buffer.read(80).decode() + + def _read_header(self): + self.filepath_or_buffer.seek(0) + + # read file header + line1 = self._get_row() + if line1 != _correct_line1: + if "**COMPRESSED**" in line1: + # this was created with the PROC CPORT method and can't be read + # https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/movefile/p1bm6aqp3fw4uin1hucwh718f6kp.htm + raise ValueError( + "Header record indicates a CPORT file, which is not readable." + ) + raise ValueError("Header record is not an XPORT file.") + + line2 = self._get_row() + fif = [["prefix", 24], ["version", 8], ["OS", 8], ["_", 24], ["created", 16]] + file_info = _split_line(line2, fif) + if file_info["prefix"] != "SAS SAS SASLIB": + raise ValueError("Header record has invalid prefix.") + file_info["created"] = _parse_date(file_info["created"]) + self.file_info = file_info + + line3 = self._get_row() + file_info["modified"] = _parse_date(line3[:16]) + + # read member header + header1 = self._get_row() + header2 = self._get_row() + headflag1 = header1.startswith(_correct_header1) + headflag2 = header2 == _correct_header2 + if not (headflag1 and headflag2): + raise ValueError("Member header not found") + # usually 140, could be 135 + fieldnamelength = int(header1[-5:-2]) + + # member info + mem = [ + ["prefix", 8], + ["set_name", 8], + ["sasdata", 8], + ["version", 8], + ["OS", 8], + ["_", 24], + ["created", 16], + ] + member_info = _split_line(self._get_row(), mem) + mem = [["modified", 16], ["_", 16], ["label", 40], ["type", 8]] + member_info.update(_split_line(self._get_row(), mem)) + member_info["modified"] = _parse_date(member_info["modified"]) + member_info["created"] = _parse_date(member_info["created"]) + self.member_info = member_info + + # read field names + types = {1: "numeric", 2: "char"} + fieldcount = int(self._get_row()[54:58]) + datalength = fieldnamelength * fieldcount + # round up to nearest 80 + if datalength % 80: + datalength += 80 - datalength % 80 + fielddata = self.filepath_or_buffer.read(datalength) + fields = [] + obs_length = 0 + while len(fielddata) >= fieldnamelength: + # pull data for one field + fieldbytes, fielddata = ( + fielddata[:fieldnamelength], + fielddata[fieldnamelength:], + ) + + # rest at end gets ignored, so if field is short, pad out + # to match struct pattern below + fieldbytes = fieldbytes.ljust(140) + + fieldstruct = struct.unpack(">hhhh8s40s8shhh2s8shhl52s", fieldbytes) + field = dict(zip(_fieldkeys, fieldstruct)) + del field["_"] + field["ntype"] = types[field["ntype"]] + fl = field["field_length"] + if field["ntype"] == "numeric" and ((fl < 2) or (fl > 8)): + msg = f"Floating field width {fl} is not between 2 and 8." + raise TypeError(msg) + + for k, v in field.items(): + try: + field[k] = v.strip() + except AttributeError: + pass + + obs_length += field["field_length"] + fields += [field] + + header = self._get_row() + if not header == _correct_obs_header: + raise ValueError("Observation header not found.") + + self.fields = fields + self.record_length = obs_length + self.record_start = self.filepath_or_buffer.tell() + + self.nobs = self._record_count() + self.columns = [x["name"].decode() for x in self.fields] + + # Setup the dtype. + dtypel = [ + ("s" + str(i), "S" + str(field["field_length"])) + for i, field in enumerate(self.fields) + ] + dtype = np.dtype(dtypel) + self._dtype = dtype + + def __next__(self): + return self.read(nrows=self._chunksize or 1) + + def _record_count(self) -> int: + """ + Get number of records in file. + + This is maybe suboptimal because we have to seek to the end of + the file. + + Side effect: returns file position to record_start. + """ + self.filepath_or_buffer.seek(0, 2) + total_records_length = self.filepath_or_buffer.tell() - self.record_start + + if total_records_length % 80 != 0: + warnings.warn("xport file may be corrupted.") + + if self.record_length > 80: + self.filepath_or_buffer.seek(self.record_start) + return total_records_length // self.record_length + + self.filepath_or_buffer.seek(-80, 2) + last_card_bytes = self.filepath_or_buffer.read(80) + last_card = np.frombuffer(last_card_bytes, dtype=np.uint64) + + # 8 byte blank + ix = np.flatnonzero(last_card == 2314885530818453536) + + if len(ix) == 0: + tail_pad = 0 + else: + tail_pad = 8 * len(ix) + + self.filepath_or_buffer.seek(self.record_start) + + return (total_records_length - tail_pad) // self.record_length + + def get_chunk(self, size=None): + """ + Reads lines from Xport file and returns as dataframe + + Parameters + ---------- + size : int, defaults to None + Number of lines to read. If None, reads whole file. + + Returns + ------- + DataFrame + """ + if size is None: + size = self._chunksize + return self.read(nrows=size) + + def _missing_double(self, vec): + v = vec.view(dtype="u1,u1,u2,u4") + miss = (v["f1"] == 0) & (v["f2"] == 0) & (v["f3"] == 0) + miss1 = ( + ((v["f0"] >= 0x41) & (v["f0"] <= 0x5A)) + | (v["f0"] == 0x5F) + | (v["f0"] == 0x2E) + ) + miss &= miss1 + return miss + + @Appender(_read_method_doc) + def read(self, nrows=None): + + if nrows is None: + nrows = self.nobs + + read_lines = min(nrows, self.nobs - self._lines_read) + read_len = read_lines * self.record_length + if read_len <= 0: + self.close() + raise StopIteration + raw = self.filepath_or_buffer.read(read_len) + data = np.frombuffer(raw, dtype=self._dtype, count=read_lines) + + df = pd.DataFrame(index=range(read_lines)) + for j, x in enumerate(self.columns): + vec = data["s" + str(j)] + ntype = self.fields[j]["ntype"] + if ntype == "numeric": + vec = _handle_truncated_float_vec(vec, self.fields[j]["field_length"]) + miss = self._missing_double(vec) + v = _parse_float_vec(vec) + v[miss] = np.nan + elif self.fields[j]["ntype"] == "char": + v = [y.rstrip() for y in vec] + + if self._encoding is not None: + v = [y.decode(self._encoding) for y in v] + + df[x] = v + + if self._index is None: + df.index = pd.Index(range(self._lines_read, self._lines_read + read_lines)) + else: + df = df.set_index(self._index) + + self._lines_read += read_lines + + return df diff --git a/.local/lib/python3.8/site-packages/pandas/io/sas/sasreader.py b/.local/lib/python3.8/site-packages/pandas/io/sas/sasreader.py new file mode 100644 index 0000000..f50fc77 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/sas/sasreader.py @@ -0,0 +1,158 @@ +""" +Read SAS sas7bdat or xport files. +""" +from __future__ import annotations + +from abc import ( + ABCMeta, + abstractmethod, +) +from typing import ( + TYPE_CHECKING, + Hashable, + overload, +) + +from pandas._typing import ( + FilePath, + ReadBuffer, +) + +from pandas.io.common import stringify_path + +if TYPE_CHECKING: + from pandas import DataFrame + + +# TODO(PY38): replace with Protocol in Python 3.8 +class ReaderBase(metaclass=ABCMeta): + """ + Protocol for XportReader and SAS7BDATReader classes. + """ + + @abstractmethod + def read(self, nrows=None): + pass + + @abstractmethod + def close(self): + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + +@overload +def read_sas( + filepath_or_buffer: FilePath | ReadBuffer[bytes], + format: str | None = ..., + index: Hashable | None = ..., + encoding: str | None = ..., + chunksize: int = ..., + iterator: bool = ..., +) -> ReaderBase: + ... + + +@overload +def read_sas( + filepath_or_buffer: FilePath | ReadBuffer[bytes], + format: str | None = ..., + index: Hashable | None = ..., + encoding: str | None = ..., + chunksize: None = ..., + iterator: bool = ..., +) -> DataFrame | ReaderBase: + ... + + +def read_sas( + filepath_or_buffer: FilePath | ReadBuffer[bytes], + format: str | None = None, + index: Hashable | None = None, + encoding: str | None = None, + chunksize: int | None = None, + iterator: bool = False, +) -> DataFrame | ReaderBase: + """ + Read SAS files stored as either XPORT or SAS7BDAT format files. + + Parameters + ---------- + filepath_or_buffer : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``read()`` function. The string could be a URL. + Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: + ``file://localhost/path/to/table.sas``. + format : str {'xport', 'sas7bdat'} or None + If None, file format is inferred from file extension. If 'xport' or + 'sas7bdat', uses the corresponding format. + index : identifier of index column, defaults to None + Identifier of column that should be used as index of the DataFrame. + encoding : str, default is None + Encoding for text data. If None, text data are stored as raw bytes. + chunksize : int + Read file `chunksize` lines at a time, returns iterator. + + .. versionchanged:: 1.2 + + ``TextFileReader`` is a context manager. + iterator : bool, defaults to False + If True, returns an iterator for reading the file incrementally. + + .. versionchanged:: 1.2 + + ``TextFileReader`` is a context manager. + + Returns + ------- + DataFrame if iterator=False and chunksize=None, else SAS7BDATReader + or XportReader + """ + if format is None: + buffer_error_msg = ( + "If this is a buffer object rather " + "than a string name, you must specify a format string" + ) + filepath_or_buffer = stringify_path(filepath_or_buffer) + if not isinstance(filepath_or_buffer, str): + raise ValueError(buffer_error_msg) + fname = filepath_or_buffer.lower() + if fname.endswith(".xpt"): + format = "xport" + elif fname.endswith(".sas7bdat"): + format = "sas7bdat" + else: + raise ValueError("unable to infer format of SAS file") + + reader: ReaderBase + if format.lower() == "xport": + from pandas.io.sas.sas_xport import XportReader + + reader = XportReader( + filepath_or_buffer, + index=index, + encoding=encoding, + chunksize=chunksize, + ) + elif format.lower() == "sas7bdat": + from pandas.io.sas.sas7bdat import SAS7BDATReader + + reader = SAS7BDATReader( + filepath_or_buffer, + index=index, + encoding=encoding, + chunksize=chunksize, + ) + else: + raise ValueError("unknown SAS format") + + if iterator or chunksize: + return reader + + with reader: + return reader.read() diff --git a/.local/lib/python3.8/site-packages/pandas/io/spss.py b/.local/lib/python3.8/site-packages/pandas/io/spss.py new file mode 100644 index 0000000..533cf7a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/spss.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Sequence + +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.inference import is_list_like + +from pandas.core.api import DataFrame + +from pandas.io.common import stringify_path + + +def read_spss( + path: str | Path, + usecols: Sequence[str] | None = None, + convert_categoricals: bool = True, +) -> DataFrame: + """ + Load an SPSS file from the file path, returning a DataFrame. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + path : str or Path + File path. + usecols : list-like, optional + Return a subset of the columns. If None, return all columns. + convert_categoricals : bool, default is True + Convert categorical columns into pd.Categorical. + + Returns + ------- + DataFrame + """ + pyreadstat = import_optional_dependency("pyreadstat") + + if usecols is not None: + if not is_list_like(usecols): + raise TypeError("usecols must be list-like.") + else: + usecols = list(usecols) # pyreadstat requires a list + + df, _ = pyreadstat.read_sav( + stringify_path(path), usecols=usecols, apply_value_formats=convert_categoricals + ) + return df diff --git a/.local/lib/python3.8/site-packages/pandas/io/sql.py b/.local/lib/python3.8/site-packages/pandas/io/sql.py new file mode 100644 index 0000000..cf0769a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/sql.py @@ -0,0 +1,2259 @@ +""" +Collection of query wrappers / abstractions to both facilitate data +retrieval and to reduce dependency on DB-specific API. +""" + +from __future__ import annotations + +from contextlib import contextmanager +from datetime import ( + date, + datetime, + time, +) +from functools import partial +import re +from typing import ( + Any, + Iterator, + Sequence, + cast, + overload, +) +import warnings + +import numpy as np + +import pandas._libs.lib as lib +from pandas._typing import DtypeArg +from pandas.compat._optional import import_optional_dependency +from pandas.errors import AbstractMethodError +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_datetime64tz_dtype, + is_dict_like, + is_list_like, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import isna + +from pandas import get_option +from pandas.core.api import ( + DataFrame, + Series, +) +from pandas.core.base import PandasObject +import pandas.core.common as com +from pandas.core.tools.datetimes import to_datetime +from pandas.util.version import Version + + +class DatabaseError(OSError): + pass + + +# ----------------------------------------------------------------------------- +# -- Helper functions + + +def _gt14() -> bool: + """ + Check if sqlalchemy.__version__ is at least 1.4.0, when several + deprecations were made. + """ + import sqlalchemy + + return Version(sqlalchemy.__version__) >= Version("1.4.0") + + +def _convert_params(sql, params): + """Convert SQL and params args to DBAPI2.0 compliant format.""" + args = [sql] + if params is not None: + if hasattr(params, "keys"): # test if params is a mapping + args += [params] + else: + args += [list(params)] + return args + + +def _process_parse_dates_argument(parse_dates): + """Process parse_dates argument for read_sql functions""" + # handle non-list entries for parse_dates gracefully + if parse_dates is True or parse_dates is None or parse_dates is False: + parse_dates = [] + + elif not hasattr(parse_dates, "__iter__"): + parse_dates = [parse_dates] + return parse_dates + + +def _handle_date_column( + col, utc: bool | None = None, format: str | dict[str, Any] | None = None +): + if isinstance(format, dict): + # GH35185 Allow custom error values in parse_dates argument of + # read_sql like functions. + # Format can take on custom to_datetime argument values such as + # {"errors": "coerce"} or {"dayfirst": True} + error = format.pop("errors", None) or "ignore" + return to_datetime(col, errors=error, **format) + else: + # Allow passing of formatting string for integers + # GH17855 + if format is None and ( + issubclass(col.dtype.type, np.floating) + or issubclass(col.dtype.type, np.integer) + ): + format = "s" + if format in ["D", "d", "h", "m", "s", "ms", "us", "ns"]: + return to_datetime(col, errors="coerce", unit=format, utc=utc) + elif is_datetime64tz_dtype(col.dtype): + # coerce to UTC timezone + # GH11216 + return to_datetime(col, utc=True) + else: + return to_datetime(col, errors="coerce", format=format, utc=utc) + + +def _parse_date_columns(data_frame, parse_dates): + """ + Force non-datetime columns to be read as such. + Supports both string formatted and integer timestamp columns. + """ + parse_dates = _process_parse_dates_argument(parse_dates) + + # we want to coerce datetime64_tz dtypes for now to UTC + # we could in theory do a 'nice' conversion from a FixedOffset tz + # GH11216 + for col_name, df_col in data_frame.items(): + if is_datetime64tz_dtype(df_col.dtype) or col_name in parse_dates: + try: + fmt = parse_dates[col_name] + except TypeError: + fmt = None + data_frame[col_name] = _handle_date_column(df_col, format=fmt) + + return data_frame + + +def _wrap_result( + data, + columns, + index_col=None, + coerce_float: bool = True, + parse_dates=None, + dtype: DtypeArg | None = None, +): + """Wrap result set of query in a DataFrame.""" + frame = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float) + + if dtype: + frame = frame.astype(dtype) + + frame = _parse_date_columns(frame, parse_dates) + + if index_col is not None: + frame.set_index(index_col, inplace=True) + + return frame + + +def execute(sql, con, params=None): + """ + Execute the given SQL query using the provided connection object. + + Parameters + ---------- + sql : string + SQL query to be executed. + con : SQLAlchemy connectable(engine/connection) or sqlite3 connection + Using SQLAlchemy makes it possible to use any DB supported by the + library. + If a DBAPI2 object, only sqlite3 is supported. + params : list or tuple, optional, default: None + List of parameters to pass to execute method. + + Returns + ------- + Results Iterable + """ + pandas_sql = pandasSQL_builder(con) + args = _convert_params(sql, params) + return pandas_sql.execute(*args) + + +# ----------------------------------------------------------------------------- +# -- Read and write to DataFrames + + +@overload +def read_sql_table( + table_name, + con, + schema=..., + index_col=..., + coerce_float=..., + parse_dates=..., + columns=..., + chunksize: None = ..., +) -> DataFrame: + ... + + +@overload +def read_sql_table( + table_name, + con, + schema=..., + index_col=..., + coerce_float=..., + parse_dates=..., + columns=..., + chunksize: int = ..., +) -> Iterator[DataFrame]: + ... + + +def read_sql_table( + table_name: str, + con, + schema: str | None = None, + index_col: str | Sequence[str] | None = None, + coerce_float: bool = True, + parse_dates=None, + columns=None, + chunksize: int | None = None, +) -> DataFrame | Iterator[DataFrame]: + """ + Read SQL database table into a DataFrame. + + Given a table name and a SQLAlchemy connectable, returns a DataFrame. + This function does not support DBAPI connections. + + Parameters + ---------- + table_name : str + Name of SQL table in database. + con : SQLAlchemy connectable or str + A database URI could be provided as str. + SQLite DBAPI connection mode not supported. + schema : str, default None + Name of SQL schema in database to query (if database flavor + supports this). Uses default schema if None (default). + index_col : str or list of str, optional, default: None + Column(s) to set as index(MultiIndex). + coerce_float : bool, default True + Attempts to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point. Can result in loss of Precision. + parse_dates : list or dict, default None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg dict}``, where the arg dict corresponds + to the keyword arguments of :func:`pandas.to_datetime` + Especially useful with databases without native Datetime support, + such as SQLite. + columns : list, default None + List of column names to select from SQL table. + chunksize : int, default None + If specified, returns an iterator where `chunksize` is the number of + rows to include in each chunk. + + Returns + ------- + DataFrame or Iterator[DataFrame] + A SQL table is returned as two-dimensional data structure with labeled + axes. + + See Also + -------- + read_sql_query : Read SQL query into a DataFrame. + read_sql : Read SQL query or database table into a DataFrame. + + Notes + ----- + Any datetime values with time zone information will be converted to UTC. + + Examples + -------- + >>> pd.read_sql_table('table_name', 'postgres:///db_name') # doctest:+SKIP + """ + pandas_sql = pandasSQL_builder(con, schema=schema) + if not pandas_sql.has_table(table_name): + raise ValueError(f"Table {table_name} not found") + + table = pandas_sql.read_table( + table_name, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + columns=columns, + chunksize=chunksize, + ) + + if table is not None: + return table + else: + raise ValueError(f"Table {table_name} not found", con) + + +@overload +def read_sql_query( + sql, + con, + index_col=..., + coerce_float=..., + params=..., + parse_dates=..., + chunksize: None = ..., + dtype: DtypeArg | None = ..., +) -> DataFrame: + ... + + +@overload +def read_sql_query( + sql, + con, + index_col=..., + coerce_float=..., + params=..., + parse_dates=..., + chunksize: int = ..., + dtype: DtypeArg | None = ..., +) -> Iterator[DataFrame]: + ... + + +def read_sql_query( + sql, + con, + index_col=None, + coerce_float: bool = True, + params=None, + parse_dates=None, + chunksize: int | None = None, + dtype: DtypeArg | None = None, +) -> DataFrame | Iterator[DataFrame]: + """ + Read SQL query into a DataFrame. + + Returns a DataFrame corresponding to the result set of the query + string. Optionally provide an `index_col` parameter to use one of the + columns as the index, otherwise default integer index will be used. + + Parameters + ---------- + sql : str SQL query or SQLAlchemy Selectable (select or text object) + SQL query to be executed. + con : SQLAlchemy connectable, str, or sqlite3 connection + Using SQLAlchemy makes it possible to use any DB supported by that + library. If a DBAPI2 object, only sqlite3 is supported. + index_col : str or list of str, optional, default: None + Column(s) to set as index(MultiIndex). + coerce_float : bool, default True + Attempts to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point. Useful for SQL result sets. + params : list, tuple or dict, optional, default: None + List of parameters to pass to execute method. The syntax used + to pass parameters is database driver dependent. Check your + database driver documentation for which of the five syntax styles, + described in PEP 249's paramstyle, is supported. + Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}. + parse_dates : list or dict, default: None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg dict}``, where the arg dict corresponds + to the keyword arguments of :func:`pandas.to_datetime` + Especially useful with databases without native Datetime support, + such as SQLite. + chunksize : int, default None + If specified, return an iterator where `chunksize` is the number of + rows to include in each chunk. + dtype : Type name or dict of columns + Data type for data or columns. E.g. np.float64 or + {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}. + + .. versionadded:: 1.3.0 + + Returns + ------- + DataFrame or Iterator[DataFrame] + + See Also + -------- + read_sql_table : Read SQL database table into a DataFrame. + read_sql : Read SQL query or database table into a DataFrame. + + Notes + ----- + Any datetime values with time zone information parsed via the `parse_dates` + parameter will be converted to UTC. + """ + pandas_sql = pandasSQL_builder(con) + return pandas_sql.read_query( + sql, + index_col=index_col, + params=params, + coerce_float=coerce_float, + parse_dates=parse_dates, + chunksize=chunksize, + dtype=dtype, + ) + + +@overload +def read_sql( + sql, + con, + index_col=..., + coerce_float=..., + params=..., + parse_dates=..., + columns=..., + chunksize: None = ..., +) -> DataFrame: + ... + + +@overload +def read_sql( + sql, + con, + index_col=..., + coerce_float=..., + params=..., + parse_dates=..., + columns=..., + chunksize: int = ..., +) -> Iterator[DataFrame]: + ... + + +def read_sql( + sql, + con, + index_col: str | Sequence[str] | None = None, + coerce_float: bool = True, + params=None, + parse_dates=None, + columns=None, + chunksize: int | None = None, +) -> DataFrame | Iterator[DataFrame]: + """ + Read SQL query or database table into a DataFrame. + + This function is a convenience wrapper around ``read_sql_table`` and + ``read_sql_query`` (for backward compatibility). It will delegate + to the specific function depending on the provided input. A SQL query + will be routed to ``read_sql_query``, while a database table name will + be routed to ``read_sql_table``. Note that the delegated function might + have more specific notes about their functionality not listed here. + + Parameters + ---------- + sql : str or SQLAlchemy Selectable (select or text object) + SQL query to be executed or a table name. + con : SQLAlchemy connectable, str, or sqlite3 connection + Using SQLAlchemy makes it possible to use any DB supported by that + library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible + for engine disposal and connection closure for the SQLAlchemy connectable; str + connections are closed automatically. See + `here `_. + index_col : str or list of str, optional, default: None + Column(s) to set as index(MultiIndex). + coerce_float : bool, default True + Attempts to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets. + params : list, tuple or dict, optional, default: None + List of parameters to pass to execute method. The syntax used + to pass parameters is database driver dependent. Check your + database driver documentation for which of the five syntax styles, + described in PEP 249's paramstyle, is supported. + Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}. + parse_dates : list or dict, default: None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg dict}``, where the arg dict corresponds + to the keyword arguments of :func:`pandas.to_datetime` + Especially useful with databases without native Datetime support, + such as SQLite. + columns : list, default: None + List of column names to select from SQL table (only used when reading + a table). + chunksize : int, default None + If specified, return an iterator where `chunksize` is the + number of rows to include in each chunk. + + Returns + ------- + DataFrame or Iterator[DataFrame] + + See Also + -------- + read_sql_table : Read SQL database table into a DataFrame. + read_sql_query : Read SQL query into a DataFrame. + + Examples + -------- + Read data from SQL via either a SQL query or a SQL tablename. + When using a SQLite database only SQL queries are accepted, + providing only the SQL tablename will result in an error. + + >>> from sqlite3 import connect + >>> conn = connect(':memory:') + >>> df = pd.DataFrame(data=[[0, '10/11/12'], [1, '12/11/10']], + ... columns=['int_column', 'date_column']) + >>> df.to_sql('test_data', conn) + 2 + + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', conn) + int_column date_column + 0 0 10/11/12 + 1 1 12/11/10 + + >>> pd.read_sql('test_data', 'postgres:///db_name') # doctest:+SKIP + + Apply date parsing to columns through the ``parse_dates`` argument + + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', + ... conn, + ... parse_dates=["date_column"]) + int_column date_column + 0 0 2012-10-11 + 1 1 2010-12-11 + + The ``parse_dates`` argument calls ``pd.to_datetime`` on the provided columns. + Custom argument values for applying ``pd.to_datetime`` on a column are specified + via a dictionary format: + 1. Ignore errors while parsing the values of "date_column" + + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', + ... conn, + ... parse_dates={"date_column": {"errors": "ignore"}}) + int_column date_column + 0 0 2012-10-11 + 1 1 2010-12-11 + + 2. Apply a dayfirst date parsing order on the values of "date_column" + + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', + ... conn, + ... parse_dates={"date_column": {"dayfirst": True}}) + int_column date_column + 0 0 2012-11-10 + 1 1 2010-11-12 + + 3. Apply custom formatting when date parsing the values of "date_column" + + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', + ... conn, + ... parse_dates={"date_column": {"format": "%d/%m/%y"}}) + int_column date_column + 0 0 2012-11-10 + 1 1 2010-11-12 + """ + pandas_sql = pandasSQL_builder(con) + + if isinstance(pandas_sql, SQLiteDatabase): + return pandas_sql.read_query( + sql, + index_col=index_col, + params=params, + coerce_float=coerce_float, + parse_dates=parse_dates, + chunksize=chunksize, + ) + + try: + _is_table_name = pandas_sql.has_table(sql) + except Exception: + # using generic exception to catch errors from sql drivers (GH24988) + _is_table_name = False + + if _is_table_name: + pandas_sql.meta.reflect(bind=pandas_sql.connectable, only=[sql]) + return pandas_sql.read_table( + sql, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + columns=columns, + chunksize=chunksize, + ) + else: + return pandas_sql.read_query( + sql, + index_col=index_col, + params=params, + coerce_float=coerce_float, + parse_dates=parse_dates, + chunksize=chunksize, + ) + + +def to_sql( + frame, + name: str, + con, + schema: str | None = None, + if_exists: str = "fail", + index: bool = True, + index_label=None, + chunksize: int | None = None, + dtype: DtypeArg | None = None, + method: str | None = None, + engine: str = "auto", + **engine_kwargs, +) -> int | None: + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame : DataFrame, Series + name : str + Name of SQL table. + con : SQLAlchemy connectable(engine/connection) or database string URI + or sqlite3 DBAPI2 connection + Using SQLAlchemy makes it possible to use any DB supported by that + library. + If a DBAPI2 object, only sqlite3 is supported. + schema : str, optional + Name of SQL schema in database to write to (if database flavor + supports this). If None, use default schema (default). + if_exists : {'fail', 'replace', 'append'}, default 'fail' + - fail: If table exists, do nothing. + - replace: If table exists, drop it, recreate it, and insert data. + - append: If table exists, insert data. Create if does not exist. + index : bool, default True + Write DataFrame index as a column. + index_label : str or sequence, optional + Column label for index column(s). If None is given (default) and + `index` is True, then the index names are used. + A sequence should be given if the DataFrame uses MultiIndex. + chunksize : int, optional + Specify the number of rows in each batch to be written at a time. + By default, all rows will be written at once. + dtype : dict or scalar, optional + Specifying the datatype for columns. If a dictionary is used, the + keys should be the column names and the values should be the + SQLAlchemy types or strings for the sqlite3 fallback mode. If a + scalar is provided, it will be applied to all columns. + method : {None, 'multi', callable}, optional + Controls the SQL insertion clause used: + + - None : Uses standard SQL ``INSERT`` clause (one per row). + - ``'multi'``: Pass multiple values in a single ``INSERT`` clause. + - callable with signature ``(pd_table, conn, keys, data_iter) -> int | None``. + + Details and a sample callable implementation can be found in the + section :ref:`insert method `. + engine : {'auto', 'sqlalchemy'}, default 'auto' + SQL engine library to use. If 'auto', then the option + ``io.sql.engine`` is used. The default ``io.sql.engine`` + behavior is 'sqlalchemy' + + .. versionadded:: 1.3.0 + + **engine_kwargs + Any additional kwargs are passed to the engine. + + Returns + ------- + None or int + Number of rows affected by to_sql. None is returned if the callable + passed into ``method`` does not return the number of rows. + + .. versionadded:: 1.4.0 + + Notes + ----- + The returned rows affected is the sum of the ``rowcount`` attribute of ``sqlite3.Cursor`` + or SQLAlchemy connectable. The returned value may not reflect the exact number of written + rows as stipulated in the + `sqlite3 `__ or + `SQLAlchemy `__ + """ # noqa:E501 + if if_exists not in ("fail", "replace", "append"): + raise ValueError(f"'{if_exists}' is not valid for if_exists") + + pandas_sql = pandasSQL_builder(con, schema=schema) + + if isinstance(frame, Series): + frame = frame.to_frame() + elif not isinstance(frame, DataFrame): + raise NotImplementedError( + "'frame' argument should be either a Series or a DataFrame" + ) + + return pandas_sql.to_sql( + frame, + name, + if_exists=if_exists, + index=index, + index_label=index_label, + schema=schema, + chunksize=chunksize, + dtype=dtype, + method=method, + engine=engine, + **engine_kwargs, + ) + + +def has_table(table_name: str, con, schema: str | None = None): + """ + Check if DataBase has named table. + + Parameters + ---------- + table_name: string + Name of SQL table. + con: SQLAlchemy connectable(engine/connection) or sqlite3 DBAPI2 connection + Using SQLAlchemy makes it possible to use any DB supported by that + library. + If a DBAPI2 object, only sqlite3 is supported. + schema : string, default None + Name of SQL schema in database to write to (if database flavor supports + this). If None, use default schema (default). + + Returns + ------- + boolean + """ + pandas_sql = pandasSQL_builder(con, schema=schema) + return pandas_sql.has_table(table_name) + + +table_exists = has_table + + +def pandasSQL_builder(con, schema: str | None = None): + """ + Convenience function to return the correct PandasSQL subclass based on the + provided parameters. + """ + import sqlite3 + import warnings + + if isinstance(con, sqlite3.Connection) or con is None: + return SQLiteDatabase(con) + + sqlalchemy = import_optional_dependency("sqlalchemy", errors="ignore") + + if isinstance(con, str): + if sqlalchemy is None: + raise ImportError("Using URI string without sqlalchemy installed.") + else: + con = sqlalchemy.create_engine(con) + + if sqlalchemy is not None and isinstance(con, sqlalchemy.engine.Connectable): + return SQLDatabase(con, schema=schema) + + warnings.warn( + "pandas only support SQLAlchemy connectable(engine/connection) or" + "database string URI or sqlite3 DBAPI2 connection" + "other DBAPI2 objects are not tested, please consider using SQLAlchemy", + UserWarning, + ) + return SQLiteDatabase(con) + + +class SQLTable(PandasObject): + """ + For mapping Pandas tables to SQL tables. + Uses fact that table is reflected by SQLAlchemy to + do better type conversions. + Also holds various flags needed to avoid having to + pass them between functions all the time. + """ + + # TODO: support for multiIndex + + def __init__( + self, + name: str, + pandas_sql_engine, + frame=None, + index=True, + if_exists="fail", + prefix="pandas", + index_label=None, + schema=None, + keys=None, + dtype: DtypeArg | None = None, + ): + self.name = name + self.pd_sql = pandas_sql_engine + self.prefix = prefix + self.frame = frame + self.index = self._index_name(index, index_label) + self.schema = schema + self.if_exists = if_exists + self.keys = keys + self.dtype = dtype + + if frame is not None: + # We want to initialize based on a dataframe + self.table = self._create_table_setup() + else: + # no data provided, read-only mode + self.table = self.pd_sql.get_table(self.name, self.schema) + + if self.table is None: + raise ValueError(f"Could not init table '{name}'") + + def exists(self): + return self.pd_sql.has_table(self.name, self.schema) + + def sql_schema(self): + from sqlalchemy.schema import CreateTable + + return str(CreateTable(self.table).compile(self.pd_sql.connectable)) + + def _execute_create(self): + # Inserting table into database, add to MetaData object + if _gt14(): + self.table = self.table.to_metadata(self.pd_sql.meta) + else: + self.table = self.table.tometadata(self.pd_sql.meta) + self.table.create(bind=self.pd_sql.connectable) + + def create(self): + if self.exists(): + if self.if_exists == "fail": + raise ValueError(f"Table '{self.name}' already exists.") + elif self.if_exists == "replace": + self.pd_sql.drop_table(self.name, self.schema) + self._execute_create() + elif self.if_exists == "append": + pass + else: + raise ValueError(f"'{self.if_exists}' is not valid for if_exists") + else: + self._execute_create() + + def _execute_insert(self, conn, keys: list[str], data_iter) -> int: + """ + Execute SQL statement inserting data + + Parameters + ---------- + conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection + keys : list of str + Column names + data_iter : generator of list + Each item contains a list of values to be inserted + """ + data = [dict(zip(keys, row)) for row in data_iter] + result = conn.execute(self.table.insert(), data) + return result.rowcount + + def _execute_insert_multi(self, conn, keys: list[str], data_iter) -> int: + """ + Alternative to _execute_insert for DBs support multivalue INSERT. + + Note: multi-value insert is usually faster for analytics DBs + and tables containing a few columns + but performance degrades quickly with increase of columns. + """ + + from sqlalchemy import insert + + data = [dict(zip(keys, row)) for row in data_iter] + stmt = insert(self.table).values(data) + result = conn.execute(stmt) + return result.rowcount + + def insert_data(self): + if self.index is not None: + temp = self.frame.copy() + temp.index.names = self.index + try: + temp.reset_index(inplace=True) + except ValueError as err: + raise ValueError(f"duplicate name in index/columns: {err}") from err + else: + temp = self.frame + + column_names = list(map(str, temp.columns)) + ncols = len(column_names) + data_list = [None] * ncols + + for i, (_, ser) in enumerate(temp.items()): + vals = ser._values + if vals.dtype.kind == "M": + d = vals.to_pydatetime() + elif vals.dtype.kind == "m": + # store as integers, see GH#6921, GH#7076 + d = vals.view("i8").astype(object) + else: + d = vals.astype(object) + + assert isinstance(d, np.ndarray), type(d) + + if ser._can_hold_na: + # Note: this will miss timedeltas since they are converted to int + mask = isna(d) + d[mask] = None + + # error: No overload variant of "__setitem__" of "list" matches + # argument types "int", "ndarray" + data_list[i] = d # type: ignore[call-overload] + + return column_names, data_list + + def insert( + self, chunksize: int | None = None, method: str | None = None + ) -> int | None: + + # set insert method + if method is None: + exec_insert = self._execute_insert + elif method == "multi": + exec_insert = self._execute_insert_multi + elif callable(method): + exec_insert = partial(method, self) + else: + raise ValueError(f"Invalid parameter `method`: {method}") + + keys, data_list = self.insert_data() + + nrows = len(self.frame) + + if nrows == 0: + return 0 + + if chunksize is None: + chunksize = nrows + elif chunksize == 0: + raise ValueError("chunksize argument should be non-zero") + + chunks = (nrows // chunksize) + 1 + total_inserted = 0 + with self.pd_sql.run_transaction() as conn: + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, nrows) + if start_i >= end_i: + break + + chunk_iter = zip(*(arr[start_i:end_i] for arr in data_list)) + num_inserted = exec_insert(conn, keys, chunk_iter) + if num_inserted is None: + total_inserted = None + else: + total_inserted += num_inserted + return total_inserted + + def _query_iterator( + self, + result, + chunksize: str | None, + columns, + coerce_float: bool = True, + parse_dates=None, + ): + """Return generator through chunked result set.""" + has_read_data = False + while True: + data = result.fetchmany(chunksize) + if not data: + if not has_read_data: + yield DataFrame.from_records( + [], columns=columns, coerce_float=coerce_float + ) + break + else: + has_read_data = True + self.frame = DataFrame.from_records( + data, columns=columns, coerce_float=coerce_float + ) + + self._harmonize_columns(parse_dates=parse_dates) + + if self.index is not None: + self.frame.set_index(self.index, inplace=True) + + yield self.frame + + def read(self, coerce_float=True, parse_dates=None, columns=None, chunksize=None): + from sqlalchemy import select + + if columns is not None and len(columns) > 0: + cols = [self.table.c[n] for n in columns] + if self.index is not None: + for idx in self.index[::-1]: + cols.insert(0, self.table.c[idx]) + sql_select = select(*cols) if _gt14() else select(cols) + else: + sql_select = select(self.table) if _gt14() else self.table.select() + + result = self.pd_sql.execute(sql_select) + column_names = result.keys() + + if chunksize is not None: + return self._query_iterator( + result, + chunksize, + column_names, + coerce_float=coerce_float, + parse_dates=parse_dates, + ) + else: + data = result.fetchall() + self.frame = DataFrame.from_records( + data, columns=column_names, coerce_float=coerce_float + ) + + self._harmonize_columns(parse_dates=parse_dates) + + if self.index is not None: + self.frame.set_index(self.index, inplace=True) + + return self.frame + + def _index_name(self, index, index_label): + # for writing: index=True to include index in sql table + if index is True: + nlevels = self.frame.index.nlevels + # if index_label is specified, set this as index name(s) + if index_label is not None: + if not isinstance(index_label, list): + index_label = [index_label] + if len(index_label) != nlevels: + raise ValueError( + "Length of 'index_label' should match number of " + f"levels, which is {nlevels}" + ) + else: + return index_label + # return the used column labels for the index columns + if ( + nlevels == 1 + and "index" not in self.frame.columns + and self.frame.index.name is None + ): + return ["index"] + else: + return com.fill_missing_names(self.frame.index.names) + + # for reading: index=(list of) string to specify column to set as index + elif isinstance(index, str): + return [index] + elif isinstance(index, list): + return index + else: + return None + + def _get_column_names_and_types(self, dtype_mapper): + column_names_and_types = [] + if self.index is not None: + for i, idx_label in enumerate(self.index): + idx_type = dtype_mapper(self.frame.index._get_level_values(i)) + column_names_and_types.append((str(idx_label), idx_type, True)) + + column_names_and_types += [ + (str(self.frame.columns[i]), dtype_mapper(self.frame.iloc[:, i]), False) + for i in range(len(self.frame.columns)) + ] + + return column_names_and_types + + def _create_table_setup(self): + from sqlalchemy import ( + Column, + PrimaryKeyConstraint, + Table, + ) + from sqlalchemy.schema import MetaData + + column_names_and_types = self._get_column_names_and_types(self._sqlalchemy_type) + + columns = [ + Column(name, typ, index=is_index) + for name, typ, is_index in column_names_and_types + ] + + if self.keys is not None: + if not is_list_like(self.keys): + keys = [self.keys] + else: + keys = self.keys + pkc = PrimaryKeyConstraint(*keys, name=self.name + "_pk") + columns.append(pkc) + + schema = self.schema or self.pd_sql.meta.schema + + # At this point, attach to new metadata, only attach to self.meta + # once table is created. + meta = MetaData() + return Table(self.name, meta, *columns, schema=schema) + + def _harmonize_columns(self, parse_dates=None): + """ + Make the DataFrame's column types align with the SQL table + column types. + Need to work around limited NA value support. Floats are always + fine, ints must always be floats if there are Null values. + Booleans are hard because converting bool column with None replaces + all Nones with false. Therefore only convert bool if there are no + NA values. + Datetimes should already be converted to np.datetime64 if supported, + but here we also force conversion if required. + """ + parse_dates = _process_parse_dates_argument(parse_dates) + + for sql_col in self.table.columns: + col_name = sql_col.name + try: + df_col = self.frame[col_name] + + # Handle date parsing upfront; don't try to convert columns + # twice + if col_name in parse_dates: + try: + fmt = parse_dates[col_name] + except TypeError: + fmt = None + self.frame[col_name] = _handle_date_column(df_col, format=fmt) + continue + + # the type the dataframe column should have + col_type = self._get_dtype(sql_col.type) + + if ( + col_type is datetime + or col_type is date + or col_type is DatetimeTZDtype + ): + # Convert tz-aware Datetime SQL columns to UTC + utc = col_type is DatetimeTZDtype + self.frame[col_name] = _handle_date_column(df_col, utc=utc) + elif col_type is float: + # floats support NA, can always convert! + self.frame[col_name] = df_col.astype(col_type, copy=False) + + elif len(df_col) == df_col.count(): + # No NA values, can convert ints and bools + if col_type is np.dtype("int64") or col_type is bool: + self.frame[col_name] = df_col.astype(col_type, copy=False) + except KeyError: + pass # this column not in results + + def _sqlalchemy_type(self, col): + + dtype: DtypeArg = self.dtype or {} + if is_dict_like(dtype): + dtype = cast(dict, dtype) + if col.name in dtype: + return dtype[col.name] + + # Infer type of column, while ignoring missing values. + # Needed for inserting typed data containing NULLs, GH 8778. + col_type = lib.infer_dtype(col, skipna=True) + + from sqlalchemy.types import ( + TIMESTAMP, + BigInteger, + Boolean, + Date, + DateTime, + Float, + Integer, + SmallInteger, + Text, + Time, + ) + + if col_type == "datetime64" or col_type == "datetime": + # GH 9086: TIMESTAMP is the suggested type if the column contains + # timezone information + try: + if col.dt.tz is not None: + return TIMESTAMP(timezone=True) + except AttributeError: + # The column is actually a DatetimeIndex + # GH 26761 or an Index with date-like data e.g. 9999-01-01 + if getattr(col, "tz", None) is not None: + return TIMESTAMP(timezone=True) + return DateTime + if col_type == "timedelta64": + warnings.warn( + "the 'timedelta' type is not supported, and will be " + "written as integer values (ns frequency) to the database.", + UserWarning, + stacklevel=find_stack_level(), + ) + return BigInteger + elif col_type == "floating": + if col.dtype == "float32": + return Float(precision=23) + else: + return Float(precision=53) + elif col_type == "integer": + # GH35076 Map pandas integer to optimal SQLAlchemy integer type + if col.dtype.name.lower() in ("int8", "uint8", "int16"): + return SmallInteger + elif col.dtype.name.lower() in ("uint16", "int32"): + return Integer + elif col.dtype.name.lower() == "uint64": + raise ValueError("Unsigned 64 bit integer datatype is not supported") + else: + return BigInteger + elif col_type == "boolean": + return Boolean + elif col_type == "date": + return Date + elif col_type == "time": + return Time + elif col_type == "complex": + raise ValueError("Complex datatypes not supported") + + return Text + + def _get_dtype(self, sqltype): + from sqlalchemy.types import ( + TIMESTAMP, + Boolean, + Date, + DateTime, + Float, + Integer, + ) + + if isinstance(sqltype, Float): + return float + elif isinstance(sqltype, Integer): + # TODO: Refine integer size. + return np.dtype("int64") + elif isinstance(sqltype, TIMESTAMP): + # we have a timezone capable type + if not sqltype.timezone: + return datetime + return DatetimeTZDtype + elif isinstance(sqltype, DateTime): + # Caution: np.datetime64 is also a subclass of np.number. + return datetime + elif isinstance(sqltype, Date): + return date + elif isinstance(sqltype, Boolean): + return bool + return object + + +class PandasSQL(PandasObject): + """ + Subclasses Should define read_sql and to_sql. + """ + + def read_sql(self, *args, **kwargs): + raise ValueError( + "PandasSQL must be created with an SQLAlchemy " + "connectable or sqlite connection" + ) + + def to_sql( + self, + frame, + name, + if_exists="fail", + index=True, + index_label=None, + schema=None, + chunksize=None, + dtype: DtypeArg | None = None, + method=None, + ) -> int | None: + raise ValueError( + "PandasSQL must be created with an SQLAlchemy " + "connectable or sqlite connection" + ) + + +class BaseEngine: + def insert_records( + self, + table: SQLTable, + con, + frame, + name, + index=True, + schema=None, + chunksize=None, + method=None, + **engine_kwargs, + ) -> int | None: + """ + Inserts data into already-prepared table + """ + raise AbstractMethodError(self) + + +class SQLAlchemyEngine(BaseEngine): + def __init__(self): + import_optional_dependency( + "sqlalchemy", extra="sqlalchemy is required for SQL support." + ) + + def insert_records( + self, + table: SQLTable, + con, + frame, + name, + index=True, + schema=None, + chunksize=None, + method=None, + **engine_kwargs, + ) -> int | None: + from sqlalchemy import exc + + try: + return table.insert(chunksize=chunksize, method=method) + except exc.SQLAlchemyError as err: + # GH34431 + # https://stackoverflow.com/a/67358288/6067848 + msg = r"""(\(1054, "Unknown column 'inf(e0)?' in 'field list'"\))(?# + )|inf can not be used with MySQL""" + err_text = str(err.orig) + if re.search(msg, err_text): + raise ValueError("inf cannot be used with MySQL") from err + else: + raise err + + +def get_engine(engine: str) -> BaseEngine: + """return our implementation""" + if engine == "auto": + engine = get_option("io.sql.engine") + + if engine == "auto": + # try engines in this order + engine_classes = [SQLAlchemyEngine] + + error_msgs = "" + for engine_class in engine_classes: + try: + return engine_class() + except ImportError as err: + error_msgs += "\n - " + str(err) + + raise ImportError( + "Unable to find a usable engine; " + "tried using: 'sqlalchemy'.\n" + "A suitable version of " + "sqlalchemy is required for sql I/O " + "support.\n" + "Trying to import the above resulted in these errors:" + f"{error_msgs}" + ) + + elif engine == "sqlalchemy": + return SQLAlchemyEngine() + + raise ValueError("engine must be one of 'auto', 'sqlalchemy'") + + +class SQLDatabase(PandasSQL): + """ + This class enables conversion between DataFrame and SQL databases + using SQLAlchemy to handle DataBase abstraction. + + Parameters + ---------- + engine : SQLAlchemy connectable + Connectable to connect with the database. Using SQLAlchemy makes it + possible to use any DB supported by that library. + schema : string, default None + Name of SQL schema in database to write to (if database flavor + supports this). If None, use default schema (default). + + """ + + def __init__(self, engine, schema: str | None = None): + from sqlalchemy.schema import MetaData + + self.connectable = engine + self.meta = MetaData(schema=schema) + + @contextmanager + def run_transaction(self): + from sqlalchemy.engine import Engine + + if isinstance(self.connectable, Engine): + with self.connectable.connect() as conn: + with conn.begin(): + yield conn + else: + yield self.connectable + + def execute(self, *args, **kwargs): + """Simple passthrough to SQLAlchemy connectable""" + return self.connectable.execution_options().execute(*args, **kwargs) + + def read_table( + self, + table_name: str, + index_col: str | Sequence[str] | None = None, + coerce_float: bool = True, + parse_dates=None, + columns=None, + schema: str | None = None, + chunksize: int | None = None, + ): + """ + Read SQL database table into a DataFrame. + + Parameters + ---------- + table_name : str + Name of SQL table in database. + index_col : string, optional, default: None + Column to set as index. + coerce_float : bool, default True + Attempts to convert values of non-string, non-numeric objects + (like decimal.Decimal) to floating point. This can result in + loss of precision. + parse_dates : list or dict, default: None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg}``, where the arg corresponds + to the keyword arguments of :func:`pandas.to_datetime`. + Especially useful with databases without native Datetime support, + such as SQLite. + columns : list, default: None + List of column names to select from SQL table. + schema : string, default None + Name of SQL schema in database to query (if database flavor + supports this). If specified, this overwrites the default + schema of the SQL database object. + chunksize : int, default None + If specified, return an iterator where `chunksize` is the number + of rows to include in each chunk. + + Returns + ------- + DataFrame + + See Also + -------- + pandas.read_sql_table + SQLDatabase.read_query + + """ + table = SQLTable(table_name, self, index=index_col, schema=schema) + return table.read( + coerce_float=coerce_float, + parse_dates=parse_dates, + columns=columns, + chunksize=chunksize, + ) + + @staticmethod + def _query_iterator( + result, + chunksize: int, + columns, + index_col=None, + coerce_float=True, + parse_dates=None, + dtype: DtypeArg | None = None, + ): + """Return generator through chunked result set""" + has_read_data = False + while True: + data = result.fetchmany(chunksize) + if not data: + if not has_read_data: + yield _wrap_result( + [], + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + ) + break + else: + has_read_data = True + yield _wrap_result( + data, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + dtype=dtype, + ) + + def read_query( + self, + sql: str, + index_col: str | None = None, + coerce_float: bool = True, + parse_dates=None, + params=None, + chunksize: int | None = None, + dtype: DtypeArg | None = None, + ): + """ + Read SQL query into a DataFrame. + + Parameters + ---------- + sql : str + SQL query to be executed. + index_col : string, optional, default: None + Column name to use as index for the returned DataFrame object. + coerce_float : bool, default True + Attempt to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets. + params : list, tuple or dict, optional, default: None + List of parameters to pass to execute method. The syntax used + to pass parameters is database driver dependent. Check your + database driver documentation for which of the five syntax styles, + described in PEP 249's paramstyle, is supported. + Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'} + parse_dates : list or dict, default: None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg dict}``, where the arg dict + corresponds to the keyword arguments of + :func:`pandas.to_datetime` Especially useful with databases + without native Datetime support, such as SQLite. + chunksize : int, default None + If specified, return an iterator where `chunksize` is the number + of rows to include in each chunk. + dtype : Type name or dict of columns + Data type for data or columns. E.g. np.float64 or + {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’} + + .. versionadded:: 1.3.0 + + Returns + ------- + DataFrame + + See Also + -------- + read_sql_table : Read SQL database table into a DataFrame. + read_sql + + """ + args = _convert_params(sql, params) + + result = self.execute(*args) + columns = result.keys() + + if chunksize is not None: + return self._query_iterator( + result, + chunksize, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + dtype=dtype, + ) + else: + data = result.fetchall() + frame = _wrap_result( + data, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + dtype=dtype, + ) + return frame + + read_sql = read_query + + def prep_table( + self, + frame, + name, + if_exists="fail", + index=True, + index_label=None, + schema=None, + dtype: DtypeArg | None = None, + ) -> SQLTable: + """ + Prepares table in the database for data insertion. Creates it if needed, etc. + """ + if dtype: + if not is_dict_like(dtype): + # error: Value expression in dictionary comprehension has incompatible + # type "Union[ExtensionDtype, str, dtype[Any], Type[object], + # Dict[Hashable, Union[ExtensionDtype, Union[str, dtype[Any]], + # Type[str], Type[float], Type[int], Type[complex], Type[bool], + # Type[object]]]]"; expected type "Union[ExtensionDtype, str, + # dtype[Any], Type[object]]" + dtype = {col_name: dtype for col_name in frame} # type: ignore[misc] + else: + dtype = cast(dict, dtype) + + from sqlalchemy.types import ( + TypeEngine, + to_instance, + ) + + for col, my_type in dtype.items(): + if not isinstance(to_instance(my_type), TypeEngine): + raise ValueError(f"The type of {col} is not a SQLAlchemy type") + + table = SQLTable( + name, + self, + frame=frame, + index=index, + if_exists=if_exists, + index_label=index_label, + schema=schema, + dtype=dtype, + ) + table.create() + return table + + def check_case_sensitive( + self, + name, + schema, + ): + """ + Checks table name for issues with case-sensitivity. + Method is called after data is inserted. + """ + if not name.isdigit() and not name.islower(): + # check for potentially case sensitivity issues (GH7815) + # Only check when name is not a number and name is not lower case + engine = self.connectable.engine + with self.connectable.connect() as conn: + if _gt14(): + from sqlalchemy import inspect + + insp = inspect(conn) + table_names = insp.get_table_names( + schema=schema or self.meta.schema + ) + else: + table_names = engine.table_names( + schema=schema or self.meta.schema, connection=conn + ) + if name not in table_names: + msg = ( + f"The provided table name '{name}' is not found exactly as " + "such in the database after writing the table, possibly " + "due to case sensitivity issues. Consider using lower " + "case table names." + ) + warnings.warn(msg, UserWarning) + + def to_sql( + self, + frame, + name, + if_exists="fail", + index=True, + index_label=None, + schema=None, + chunksize=None, + dtype: DtypeArg | None = None, + method=None, + engine="auto", + **engine_kwargs, + ) -> int | None: + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame : DataFrame + name : string + Name of SQL table. + if_exists : {'fail', 'replace', 'append'}, default 'fail' + - fail: If table exists, do nothing. + - replace: If table exists, drop it, recreate it, and insert data. + - append: If table exists, insert data. Create if does not exist. + index : boolean, default True + Write DataFrame index as a column. + index_label : string or sequence, default None + Column label for index column(s). If None is given (default) and + `index` is True, then the index names are used. + A sequence should be given if the DataFrame uses MultiIndex. + schema : string, default None + Name of SQL schema in database to write to (if database flavor + supports this). If specified, this overwrites the default + schema of the SQLDatabase object. + chunksize : int, default None + If not None, then rows will be written in batches of this size at a + time. If None, all rows will be written at once. + dtype : single type or dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a SQLAlchemy type. If all columns are of the same type, one + single value can be used. + method : {None', 'multi', callable}, default None + Controls the SQL insertion clause used: + + * None : Uses standard SQL ``INSERT`` clause (one per row). + * 'multi': Pass multiple values in a single ``INSERT`` clause. + * callable with signature ``(pd_table, conn, keys, data_iter)``. + + Details and a sample callable implementation can be found in the + section :ref:`insert method `. + engine : {'auto', 'sqlalchemy'}, default 'auto' + SQL engine library to use. If 'auto', then the option + ``io.sql.engine`` is used. The default ``io.sql.engine`` + behavior is 'sqlalchemy' + + .. versionadded:: 1.3.0 + + **engine_kwargs + Any additional kwargs are passed to the engine. + """ + sql_engine = get_engine(engine) + + table = self.prep_table( + frame=frame, + name=name, + if_exists=if_exists, + index=index, + index_label=index_label, + schema=schema, + dtype=dtype, + ) + + total_inserted = sql_engine.insert_records( + table=table, + con=self.connectable, + frame=frame, + name=name, + index=index, + schema=schema, + chunksize=chunksize, + method=method, + **engine_kwargs, + ) + + self.check_case_sensitive(name=name, schema=schema) + return total_inserted + + @property + def tables(self): + return self.meta.tables + + def has_table(self, name: str, schema: str | None = None): + if _gt14(): + from sqlalchemy import inspect + + insp = inspect(self.connectable) + return insp.has_table(name, schema or self.meta.schema) + else: + return self.connectable.run_callable( + self.connectable.dialect.has_table, name, schema or self.meta.schema + ) + + def get_table(self, table_name: str, schema: str | None = None): + from sqlalchemy import ( + Numeric, + Table, + ) + + schema = schema or self.meta.schema + tbl = Table( + table_name, self.meta, autoload_with=self.connectable, schema=schema + ) + for column in tbl.columns: + if isinstance(column.type, Numeric): + column.type.asdecimal = False + return tbl + + def drop_table(self, table_name: str, schema: str | None = None): + schema = schema or self.meta.schema + if self.has_table(table_name, schema): + self.meta.reflect(bind=self.connectable, only=[table_name], schema=schema) + self.get_table(table_name, schema).drop(bind=self.connectable) + self.meta.clear() + + def _create_sql_schema( + self, + frame: DataFrame, + table_name: str, + keys: list[str] | None = None, + dtype: DtypeArg | None = None, + schema: str | None = None, + ): + table = SQLTable( + table_name, + self, + frame=frame, + index=False, + keys=keys, + dtype=dtype, + schema=schema, + ) + return str(table.sql_schema()) + + +# ---- SQL without SQLAlchemy --- +# sqlite-specific sql strings and handler class +# dictionary used for readability purposes +_SQL_TYPES = { + "string": "TEXT", + "floating": "REAL", + "integer": "INTEGER", + "datetime": "TIMESTAMP", + "date": "DATE", + "time": "TIME", + "boolean": "INTEGER", +} + + +def _get_unicode_name(name): + try: + uname = str(name).encode("utf-8", "strict").decode("utf-8") + except UnicodeError as err: + raise ValueError(f"Cannot convert identifier to UTF-8: '{name}'") from err + return uname + + +def _get_valid_sqlite_name(name): + # See https://stackoverflow.com/questions/6514274/how-do-you-escape-strings\ + # -for-sqlite-table-column-names-in-python + # Ensure the string can be encoded as UTF-8. + # Ensure the string does not include any NUL characters. + # Replace all " with "". + # Wrap the entire thing in double quotes. + + uname = _get_unicode_name(name) + if not len(uname): + raise ValueError("Empty table or column name specified") + + nul_index = uname.find("\x00") + if nul_index >= 0: + raise ValueError("SQLite identifier cannot contain NULs") + return '"' + uname.replace('"', '""') + '"' + + +class SQLiteTable(SQLTable): + """ + Patch the SQLTable for fallback support. + Instead of a table variable just use the Create Table statement. + """ + + def __init__(self, *args, **kwargs): + # GH 8341 + # register an adapter callable for datetime.time object + import sqlite3 + + # this will transform time(12,34,56,789) into '12:34:56.000789' + # (this is what sqlalchemy does) + sqlite3.register_adapter(time, lambda _: _.strftime("%H:%M:%S.%f")) + super().__init__(*args, **kwargs) + + def sql_schema(self): + return str(";\n".join(self.table)) + + def _execute_create(self): + with self.pd_sql.run_transaction() as conn: + for stmt in self.table: + conn.execute(stmt) + + def insert_statement(self, *, num_rows: int): + names = list(map(str, self.frame.columns)) + wld = "?" # wildcard char + escape = _get_valid_sqlite_name + + if self.index is not None: + for idx in self.index[::-1]: + names.insert(0, idx) + + bracketed_names = [escape(column) for column in names] + col_names = ",".join(bracketed_names) + + row_wildcards = ",".join([wld] * len(names)) + wildcards = ",".join([f"({row_wildcards})" for _ in range(num_rows)]) + insert_statement = ( + f"INSERT INTO {escape(self.name)} ({col_names}) VALUES {wildcards}" + ) + return insert_statement + + def _execute_insert(self, conn, keys, data_iter) -> int: + data_list = list(data_iter) + conn.executemany(self.insert_statement(num_rows=1), data_list) + return conn.rowcount + + def _execute_insert_multi(self, conn, keys, data_iter) -> int: + data_list = list(data_iter) + flattened_data = [x for row in data_list for x in row] + conn.execute(self.insert_statement(num_rows=len(data_list)), flattened_data) + return conn.rowcount + + def _create_table_setup(self): + """ + Return a list of SQL statements that creates a table reflecting the + structure of a DataFrame. The first entry will be a CREATE TABLE + statement while the rest will be CREATE INDEX statements. + """ + column_names_and_types = self._get_column_names_and_types(self._sql_type_name) + escape = _get_valid_sqlite_name + + create_tbl_stmts = [ + escape(cname) + " " + ctype for cname, ctype, _ in column_names_and_types + ] + + if self.keys is not None and len(self.keys): + if not is_list_like(self.keys): + keys = [self.keys] + else: + keys = self.keys + cnames_br = ", ".join([escape(c) for c in keys]) + create_tbl_stmts.append( + f"CONSTRAINT {self.name}_pk PRIMARY KEY ({cnames_br})" + ) + if self.schema: + schema_name = self.schema + "." + else: + schema_name = "" + create_stmts = [ + "CREATE TABLE " + + schema_name + + escape(self.name) + + " (\n" + + ",\n ".join(create_tbl_stmts) + + "\n)" + ] + + ix_cols = [cname for cname, _, is_index in column_names_and_types if is_index] + if len(ix_cols): + cnames = "_".join(ix_cols) + cnames_br = ",".join([escape(c) for c in ix_cols]) + create_stmts.append( + "CREATE INDEX " + + escape("ix_" + self.name + "_" + cnames) + + "ON " + + escape(self.name) + + " (" + + cnames_br + + ")" + ) + + return create_stmts + + def _sql_type_name(self, col): + dtype: DtypeArg = self.dtype or {} + if is_dict_like(dtype): + dtype = cast(dict, dtype) + if col.name in dtype: + return dtype[col.name] + + # Infer type of column, while ignoring missing values. + # Needed for inserting typed data containing NULLs, GH 8778. + col_type = lib.infer_dtype(col, skipna=True) + + if col_type == "timedelta64": + warnings.warn( + "the 'timedelta' type is not supported, and will be " + "written as integer values (ns frequency) to the database.", + UserWarning, + stacklevel=find_stack_level(), + ) + col_type = "integer" + + elif col_type == "datetime64": + col_type = "datetime" + + elif col_type == "empty": + col_type = "string" + + elif col_type == "complex": + raise ValueError("Complex datatypes not supported") + + if col_type not in _SQL_TYPES: + col_type = "string" + + return _SQL_TYPES[col_type] + + +class SQLiteDatabase(PandasSQL): + """ + Version of SQLDatabase to support SQLite connections (fallback without + SQLAlchemy). This should only be used internally. + + Parameters + ---------- + con : sqlite connection object + + """ + + def __init__(self, con): + self.con = con + + @contextmanager + def run_transaction(self): + cur = self.con.cursor() + try: + yield cur + self.con.commit() + except Exception: + self.con.rollback() + raise + finally: + cur.close() + + def execute(self, *args, **kwargs): + cur = self.con.cursor() + try: + cur.execute(*args, **kwargs) + return cur + except Exception as exc: + try: + self.con.rollback() + except Exception as inner_exc: # pragma: no cover + ex = DatabaseError( + f"Execution failed on sql: {args[0]}\n{exc}\nunable to rollback" + ) + raise ex from inner_exc + + ex = DatabaseError(f"Execution failed on sql '{args[0]}': {exc}") + raise ex from exc + + @staticmethod + def _query_iterator( + cursor, + chunksize: int, + columns, + index_col=None, + coerce_float: bool = True, + parse_dates=None, + dtype: DtypeArg | None = None, + ): + """Return generator through chunked result set""" + has_read_data = False + while True: + data = cursor.fetchmany(chunksize) + if type(data) == tuple: + data = list(data) + if not data: + cursor.close() + if not has_read_data: + yield DataFrame.from_records( + [], columns=columns, coerce_float=coerce_float + ) + break + else: + has_read_data = True + yield _wrap_result( + data, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + dtype=dtype, + ) + + def read_query( + self, + sql, + index_col=None, + coerce_float: bool = True, + params=None, + parse_dates=None, + chunksize: int | None = None, + dtype: DtypeArg | None = None, + ): + + args = _convert_params(sql, params) + cursor = self.execute(*args) + columns = [col_desc[0] for col_desc in cursor.description] + + if chunksize is not None: + return self._query_iterator( + cursor, + chunksize, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + dtype=dtype, + ) + else: + data = self._fetchall_as_list(cursor) + cursor.close() + + frame = _wrap_result( + data, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + dtype=dtype, + ) + return frame + + def _fetchall_as_list(self, cur): + result = cur.fetchall() + if not isinstance(result, list): + result = list(result) + return result + + def to_sql( + self, + frame, + name, + if_exists="fail", + index=True, + index_label=None, + schema=None, + chunksize=None, + dtype: DtypeArg | None = None, + method=None, + **kwargs, + ) -> int | None: + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame: DataFrame + name: string + Name of SQL table. + if_exists: {'fail', 'replace', 'append'}, default 'fail' + fail: If table exists, do nothing. + replace: If table exists, drop it, recreate it, and insert data. + append: If table exists, insert data. Create if it does not exist. + index : bool, default True + Write DataFrame index as a column + index_label : string or sequence, default None + Column label for index column(s). If None is given (default) and + `index` is True, then the index names are used. + A sequence should be given if the DataFrame uses MultiIndex. + schema : string, default None + Ignored parameter included for compatibility with SQLAlchemy + version of ``to_sql``. + chunksize : int, default None + If not None, then rows will be written in batches of this + size at a time. If None, all rows will be written at once. + dtype : single type or dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a string. If all columns are of the same type, one single value + can be used. + method : {None, 'multi', callable}, default None + Controls the SQL insertion clause used: + + * None : Uses standard SQL ``INSERT`` clause (one per row). + * 'multi': Pass multiple values in a single ``INSERT`` clause. + * callable with signature ``(pd_table, conn, keys, data_iter)``. + + Details and a sample callable implementation can be found in the + section :ref:`insert method `. + """ + if dtype: + if not is_dict_like(dtype): + # error: Value expression in dictionary comprehension has incompatible + # type "Union[ExtensionDtype, str, dtype[Any], Type[object], + # Dict[Hashable, Union[ExtensionDtype, Union[str, dtype[Any]], + # Type[str], Type[float], Type[int], Type[complex], Type[bool], + # Type[object]]]]"; expected type "Union[ExtensionDtype, str, + # dtype[Any], Type[object]]" + dtype = {col_name: dtype for col_name in frame} # type: ignore[misc] + else: + dtype = cast(dict, dtype) + + for col, my_type in dtype.items(): + if not isinstance(my_type, str): + raise ValueError(f"{col} ({my_type}) not a string") + + table = SQLiteTable( + name, + self, + frame=frame, + index=index, + if_exists=if_exists, + index_label=index_label, + dtype=dtype, + ) + table.create() + return table.insert(chunksize, method) + + def has_table(self, name: str, schema: str | None = None): + + wld = "?" + query = f"SELECT name FROM sqlite_master WHERE type='table' AND name={wld};" + + return len(self.execute(query, [name]).fetchall()) > 0 + + def get_table(self, table_name: str, schema: str | None = None): + return None # not supported in fallback mode + + def drop_table(self, name: str, schema: str | None = None): + drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}" + self.execute(drop_sql) + + def _create_sql_schema( + self, + frame, + table_name: str, + keys=None, + dtype: DtypeArg | None = None, + schema: str | None = None, + ): + table = SQLiteTable( + table_name, + self, + frame=frame, + index=False, + keys=keys, + dtype=dtype, + schema=schema, + ) + return str(table.sql_schema()) + + +def get_schema( + frame, + name: str, + keys=None, + con=None, + dtype: DtypeArg | None = None, + schema: str | None = None, +): + """ + Get the SQL db table schema for the given frame. + + Parameters + ---------- + frame : DataFrame + name : str + name of SQL table + keys : string or sequence, default: None + columns to use a primary key + con: an open SQL database connection object or a SQLAlchemy connectable + Using SQLAlchemy makes it possible to use any DB supported by that + library, default: None + If a DBAPI2 object, only sqlite3 is supported. + dtype : dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a SQLAlchemy type, or a string for sqlite3 fallback connection. + schema: str, default: None + Optional specifying the schema to be used in creating the table. + + .. versionadded:: 1.2.0 + """ + pandas_sql = pandasSQL_builder(con=con) + return pandas_sql._create_sql_schema( + frame, name, keys=keys, dtype=dtype, schema=schema + ) diff --git a/.local/lib/python3.8/site-packages/pandas/io/stata.py b/.local/lib/python3.8/site-packages/pandas/io/stata.py new file mode 100644 index 0000000..4a50a3d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/stata.py @@ -0,0 +1,3652 @@ +""" +Module contains tools for processing Stata files into DataFrames + +The StataReader below was originally written by Joe Presbrey as part of PyDTA. +It has been extended and improved by Skipper Seabold from the Statsmodels +project who also developed the StataWriter and was finally added to pandas in +a once again improved version. + +You can find more information on http://presbrey.mit.edu/PyDTA and +https://www.statsmodels.org/devel/ +""" +from __future__ import annotations + +from collections import abc +import datetime +from io import BytesIO +import os +import struct +import sys +from typing import ( + IO, + TYPE_CHECKING, + Any, + AnyStr, + Hashable, + Sequence, + cast, +) +import warnings + +from dateutil.relativedelta import relativedelta +import numpy as np + +from pandas._libs.lib import infer_dtype +from pandas._libs.writers import max_len_string_array +from pandas._typing import ( + CompressionOptions, + FilePath, + ReadBuffer, + StorageOptions, + WriteBuffer, +) +from pandas.util._decorators import ( + Appender, + doc, +) + +from pandas.core.dtypes.common import ( + ensure_object, + is_categorical_dtype, + is_datetime64_dtype, + is_numeric_dtype, +) + +from pandas import ( + Categorical, + DatetimeIndex, + NaT, + Timestamp, + isna, + to_datetime, + to_timedelta, +) +from pandas.core.arrays.boolean import BooleanDtype +from pandas.core.arrays.integer import _IntegerDtype +from pandas.core.frame import DataFrame +from pandas.core.indexes.base import Index +from pandas.core.series import Series +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import get_handle + +if TYPE_CHECKING: + from typing import Literal + +_version_error = ( + "Version of given Stata file is {version}. pandas supports importing " + "versions 105, 108, 111 (Stata 7SE), 113 (Stata 8/9), " + "114 (Stata 10/11), 115 (Stata 12), 117 (Stata 13), 118 (Stata 14/15/16)," + "and 119 (Stata 15/16, over 32,767 variables)." +) + +_statafile_processing_params1 = """\ +convert_dates : bool, default True + Convert date variables to DataFrame time values. +convert_categoricals : bool, default True + Read value labels and convert columns to Categorical/Factor variables.""" + +_statafile_processing_params2 = """\ +index_col : str, optional + Column to set as index. +convert_missing : bool, default False + Flag indicating whether to convert missing values to their Stata + representations. If False, missing values are replaced with nan. + If True, columns containing missing values are returned with + object data types and missing values are represented by + StataMissingValue objects. +preserve_dtypes : bool, default True + Preserve Stata datatypes. If False, numeric data are upcast to pandas + default types for foreign data (float64 or int64). +columns : list or None + Columns to retain. Columns will be returned in the given order. None + returns all columns. +order_categoricals : bool, default True + Flag indicating whether converted categorical data are ordered.""" + +_chunksize_params = """\ +chunksize : int, default None + Return StataReader object for iterations, returns chunks with + given number of lines.""" + +_iterator_params = """\ +iterator : bool, default False + Return StataReader object.""" + +_reader_notes = """\ +Notes +----- +Categorical variables read through an iterator may not have the same +categories and dtype. This occurs when a variable stored in a DTA +file is associated to an incomplete set of value labels that only +label a strict subset of the values.""" + +_read_stata_doc = f""" +Read Stata file into DataFrame. + +Parameters +---------- +filepath_or_buffer : str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: ``file://localhost/path/to/table.dta``. + + If you want to pass in a path object, pandas accepts any ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handle (e.g. via builtin ``open`` function) + or ``StringIO``. +{_statafile_processing_params1} +{_statafile_processing_params2} +{_chunksize_params} +{_iterator_params} +{_shared_docs["decompression_options"]} +{_shared_docs["storage_options"]} + +Returns +------- +DataFrame or StataReader + +See Also +-------- +io.stata.StataReader : Low-level reader for Stata data files. +DataFrame.to_stata: Export Stata data files. + +{_reader_notes} + +Examples +-------- + +Creating a dummy stata for this example +>>> df = pd.DataFrame({{'animal': ['falcon', 'parrot', 'falcon', +... 'parrot'], +... 'speed': [350, 18, 361, 15]}}) # doctest: +SKIP +>>> df.to_stata('animals.dta') # doctest: +SKIP + +Read a Stata dta file: + +>>> df = pd.read_stata('animals.dta') # doctest: +SKIP + +Read a Stata dta file in 10,000 line chunks: +>>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8") # doctest: +SKIP +>>> df = pd.DataFrame(values, columns=["i"]) # doctest: +SKIP +>>> df.to_stata('filename.dta') # doctest: +SKIP + +>>> itr = pd.read_stata('filename.dta', chunksize=10000) # doctest: +SKIP +>>> for chunk in itr: +... # Operate on a single chunk, e.g., chunk.mean() +... pass # doctest: +SKIP +""" + +_read_method_doc = f"""\ +Reads observations from Stata file, converting them into a dataframe + +Parameters +---------- +nrows : int + Number of lines to read from data file, if None read whole file. +{_statafile_processing_params1} +{_statafile_processing_params2} + +Returns +------- +DataFrame +""" + +_stata_reader_doc = f"""\ +Class for reading Stata dta files. + +Parameters +---------- +path_or_buf : path (string), buffer or path object + string, path object (pathlib.Path or py._path.local.LocalPath) or object + implementing a binary read() functions. +{_statafile_processing_params1} +{_statafile_processing_params2} +{_chunksize_params} +{_shared_docs["decompression_options"]} +{_shared_docs["storage_options"]} + +{_reader_notes} +""" + + +_date_formats = ["%tc", "%tC", "%td", "%d", "%tw", "%tm", "%tq", "%th", "%ty"] + + +stata_epoch = datetime.datetime(1960, 1, 1) + + +# TODO: Add typing. As of January 2020 it is not possible to type this function since +# mypy doesn't understand that a Series and an int can be combined using mathematical +# operations. (+, -). +def _stata_elapsed_date_to_datetime_vec(dates, fmt) -> Series: + """ + Convert from SIF to datetime. https://www.stata.com/help.cgi?datetime + + Parameters + ---------- + dates : Series + The Stata Internal Format date to convert to datetime according to fmt + fmt : str + The format to convert to. Can be, tc, td, tw, tm, tq, th, ty + Returns + + Returns + ------- + converted : Series + The converted dates + + Examples + -------- + >>> dates = pd.Series([52]) + >>> _stata_elapsed_date_to_datetime_vec(dates , "%tw") + 0 1961-01-01 + dtype: datetime64[ns] + + Notes + ----- + datetime/c - tc + milliseconds since 01jan1960 00:00:00.000, assuming 86,400 s/day + datetime/C - tC - NOT IMPLEMENTED + milliseconds since 01jan1960 00:00:00.000, adjusted for leap seconds + date - td + days since 01jan1960 (01jan1960 = 0) + weekly date - tw + weeks since 1960w1 + This assumes 52 weeks in a year, then adds 7 * remainder of the weeks. + The datetime value is the start of the week in terms of days in the + year, not ISO calendar weeks. + monthly date - tm + months since 1960m1 + quarterly date - tq + quarters since 1960q1 + half-yearly date - th + half-years since 1960h1 yearly + date - ty + years since 0000 + """ + MIN_YEAR, MAX_YEAR = Timestamp.min.year, Timestamp.max.year + MAX_DAY_DELTA = (Timestamp.max - datetime.datetime(1960, 1, 1)).days + MIN_DAY_DELTA = (Timestamp.min - datetime.datetime(1960, 1, 1)).days + MIN_MS_DELTA = MIN_DAY_DELTA * 24 * 3600 * 1000 + MAX_MS_DELTA = MAX_DAY_DELTA * 24 * 3600 * 1000 + + def convert_year_month_safe(year, month) -> Series: + """ + Convert year and month to datetimes, using pandas vectorized versions + when the date range falls within the range supported by pandas. + Otherwise it falls back to a slower but more robust method + using datetime. + """ + if year.max() < MAX_YEAR and year.min() > MIN_YEAR: + return to_datetime(100 * year + month, format="%Y%m") + else: + index = getattr(year, "index", None) + return Series( + [datetime.datetime(y, m, 1) for y, m in zip(year, month)], index=index + ) + + def convert_year_days_safe(year, days) -> Series: + """ + Converts year (e.g. 1999) and days since the start of the year to a + datetime or datetime64 Series + """ + if year.max() < (MAX_YEAR - 1) and year.min() > MIN_YEAR: + return to_datetime(year, format="%Y") + to_timedelta(days, unit="d") + else: + index = getattr(year, "index", None) + value = [ + datetime.datetime(y, 1, 1) + relativedelta(days=int(d)) + for y, d in zip(year, days) + ] + return Series(value, index=index) + + def convert_delta_safe(base, deltas, unit) -> Series: + """ + Convert base dates and deltas to datetimes, using pandas vectorized + versions if the deltas satisfy restrictions required to be expressed + as dates in pandas. + """ + index = getattr(deltas, "index", None) + if unit == "d": + if deltas.max() > MAX_DAY_DELTA or deltas.min() < MIN_DAY_DELTA: + values = [base + relativedelta(days=int(d)) for d in deltas] + return Series(values, index=index) + elif unit == "ms": + if deltas.max() > MAX_MS_DELTA or deltas.min() < MIN_MS_DELTA: + values = [ + base + relativedelta(microseconds=(int(d) * 1000)) for d in deltas + ] + return Series(values, index=index) + else: + raise ValueError("format not understood") + base = to_datetime(base) + deltas = to_timedelta(deltas, unit=unit) + return base + deltas + + # TODO(non-nano): If/when pandas supports more than datetime64[ns], this + # should be improved to use correct range, e.g. datetime[Y] for yearly + bad_locs = np.isnan(dates) + has_bad_values = False + if bad_locs.any(): + has_bad_values = True + data_col = Series(dates) + data_col[bad_locs] = 1.0 # Replace with NaT + dates = dates.astype(np.int64) + + if fmt.startswith(("%tc", "tc")): # Delta ms relative to base + base = stata_epoch + ms = dates + conv_dates = convert_delta_safe(base, ms, "ms") + elif fmt.startswith(("%tC", "tC")): + + warnings.warn("Encountered %tC format. Leaving in Stata Internal Format.") + conv_dates = Series(dates, dtype=object) + if has_bad_values: + conv_dates[bad_locs] = NaT + return conv_dates + # Delta days relative to base + elif fmt.startswith(("%td", "td", "%d", "d")): + base = stata_epoch + days = dates + conv_dates = convert_delta_safe(base, days, "d") + # does not count leap days - 7 days is a week. + # 52nd week may have more than 7 days + elif fmt.startswith(("%tw", "tw")): + year = stata_epoch.year + dates // 52 + days = (dates % 52) * 7 + conv_dates = convert_year_days_safe(year, days) + elif fmt.startswith(("%tm", "tm")): # Delta months relative to base + year = stata_epoch.year + dates // 12 + month = (dates % 12) + 1 + conv_dates = convert_year_month_safe(year, month) + elif fmt.startswith(("%tq", "tq")): # Delta quarters relative to base + year = stata_epoch.year + dates // 4 + quarter_month = (dates % 4) * 3 + 1 + conv_dates = convert_year_month_safe(year, quarter_month) + elif fmt.startswith(("%th", "th")): # Delta half-years relative to base + year = stata_epoch.year + dates // 2 + month = (dates % 2) * 6 + 1 + conv_dates = convert_year_month_safe(year, month) + elif fmt.startswith(("%ty", "ty")): # Years -- not delta + year = dates + first_month = np.ones_like(dates) + conv_dates = convert_year_month_safe(year, first_month) + else: + raise ValueError(f"Date fmt {fmt} not understood") + + if has_bad_values: # Restore NaT for bad values + conv_dates[bad_locs] = NaT + + return conv_dates + + +def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series: + """ + Convert from datetime to SIF. https://www.stata.com/help.cgi?datetime + + Parameters + ---------- + dates : Series + Series or array containing datetime.datetime or datetime64[ns] to + convert to the Stata Internal Format given by fmt + fmt : str + The format to convert to. Can be, tc, td, tw, tm, tq, th, ty + """ + index = dates.index + NS_PER_DAY = 24 * 3600 * 1000 * 1000 * 1000 + US_PER_DAY = NS_PER_DAY / 1000 + + def parse_dates_safe(dates, delta=False, year=False, days=False): + d = {} + if is_datetime64_dtype(dates.dtype): + if delta: + time_delta = dates - stata_epoch + d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds + if days or year: + date_index = DatetimeIndex(dates) + d["year"] = date_index._data.year + d["month"] = date_index._data.month + if days: + days_in_ns = dates.view(np.int64) - to_datetime( + d["year"], format="%Y" + ).view(np.int64) + d["days"] = days_in_ns // NS_PER_DAY + + elif infer_dtype(dates, skipna=False) == "datetime": + if delta: + delta = dates._values - stata_epoch + + def f(x: datetime.timedelta) -> float: + return US_PER_DAY * x.days + 1000000 * x.seconds + x.microseconds + + v = np.vectorize(f) + d["delta"] = v(delta) + if year: + year_month = dates.apply(lambda x: 100 * x.year + x.month) + d["year"] = year_month._values // 100 + d["month"] = year_month._values - d["year"] * 100 + if days: + + def g(x: datetime.datetime) -> int: + return (x - datetime.datetime(x.year, 1, 1)).days + + v = np.vectorize(g) + d["days"] = v(dates) + else: + raise ValueError( + "Columns containing dates must contain either " + "datetime64, datetime.datetime or null values." + ) + + return DataFrame(d, index=index) + + bad_loc = isna(dates) + index = dates.index + if bad_loc.any(): + dates = Series(dates) + if is_datetime64_dtype(dates): + dates[bad_loc] = to_datetime(stata_epoch) + else: + dates[bad_loc] = stata_epoch + + if fmt in ["%tc", "tc"]: + d = parse_dates_safe(dates, delta=True) + conv_dates = d.delta / 1000 + elif fmt in ["%tC", "tC"]: + warnings.warn("Stata Internal Format tC not supported.") + conv_dates = dates + elif fmt in ["%td", "td"]: + d = parse_dates_safe(dates, delta=True) + conv_dates = d.delta // US_PER_DAY + elif fmt in ["%tw", "tw"]: + d = parse_dates_safe(dates, year=True, days=True) + conv_dates = 52 * (d.year - stata_epoch.year) + d.days // 7 + elif fmt in ["%tm", "tm"]: + d = parse_dates_safe(dates, year=True) + conv_dates = 12 * (d.year - stata_epoch.year) + d.month - 1 + elif fmt in ["%tq", "tq"]: + d = parse_dates_safe(dates, year=True) + conv_dates = 4 * (d.year - stata_epoch.year) + (d.month - 1) // 3 + elif fmt in ["%th", "th"]: + d = parse_dates_safe(dates, year=True) + conv_dates = 2 * (d.year - stata_epoch.year) + (d.month > 6).astype(int) + elif fmt in ["%ty", "ty"]: + d = parse_dates_safe(dates, year=True) + conv_dates = d.year + else: + raise ValueError(f"Format {fmt} is not a known Stata date format") + + conv_dates = Series(conv_dates, dtype=np.float64) + missing_value = struct.unpack(" DataFrame: + """ + Checks the dtypes of the columns of a pandas DataFrame for + compatibility with the data types and ranges supported by Stata, and + converts if necessary. + + Parameters + ---------- + data : DataFrame + The DataFrame to check and convert + + Notes + ----- + Numeric columns in Stata must be one of int8, int16, int32, float32 or + float64, with some additional value restrictions. int8 and int16 columns + are checked for violations of the value restrictions and upcast if needed. + int64 data is not usable in Stata, and so it is downcast to int32 whenever + the value are in the int32 range, and sidecast to float64 when larger than + this range. If the int64 values are outside of the range of those + perfectly representable as float64 values, a warning is raised. + + bool columns are cast to int8. uint columns are converted to int of the + same size if there is no loss in precision, otherwise are upcast to a + larger type. uint64 is currently not supported since it is concerted to + object in a DataFrame. + """ + ws = "" + # original, if small, if large + conversion_data = ( + (np.bool_, np.int8, np.int8), + (np.uint8, np.int8, np.int16), + (np.uint16, np.int16, np.int32), + (np.uint32, np.int32, np.int64), + (np.uint64, np.int64, np.float64), + ) + + float32_max = struct.unpack("= 2 ** 53: + ws = precision_loss_doc.format("uint64", "float64") + + data[col] = data[col].astype(dtype) + + # Check values and upcast if necessary + if dtype == np.int8: + if data[col].max() > 100 or data[col].min() < -127: + data[col] = data[col].astype(np.int16) + elif dtype == np.int16: + if data[col].max() > 32740 or data[col].min() < -32767: + data[col] = data[col].astype(np.int32) + elif dtype == np.int64: + if data[col].max() <= 2147483620 and data[col].min() >= -2147483647: + data[col] = data[col].astype(np.int32) + else: + data[col] = data[col].astype(np.float64) + if data[col].max() >= 2 ** 53 or data[col].min() <= -(2 ** 53): + ws = precision_loss_doc.format("int64", "float64") + elif dtype in (np.float32, np.float64): + value = data[col].max() + if np.isinf(value): + raise ValueError( + f"Column {col} has a maximum value of infinity which is outside " + "the range supported by Stata." + ) + if dtype == np.float32 and value > float32_max: + data[col] = data[col].astype(np.float64) + elif dtype == np.float64: + if value > float64_max: + raise ValueError( + f"Column {col} has a maximum value ({value}) outside the range " + f"supported by Stata ({float64_max})" + ) + if is_nullable_int: + if orig_missing.any(): + # Replace missing by Stata sentinel value + sentinel = StataMissingValue.BASE_MISSING_VALUES[data[col].dtype.name] + data.loc[orig_missing, col] = sentinel + if ws: + warnings.warn(ws, PossiblePrecisionLoss) + + return data + + +class StataValueLabel: + """ + Parse a categorical column and prepare formatted output + + Parameters + ---------- + catarray : Series + Categorical Series to encode + encoding : {"latin-1", "utf-8"} + Encoding to use for value labels. + """ + + def __init__(self, catarray: Series, encoding: str = "latin-1"): + + if encoding not in ("latin-1", "utf-8"): + raise ValueError("Only latin-1 and utf-8 are supported.") + self.labname = catarray.name + self._encoding = encoding + categories = catarray.cat.categories + self.value_labels: list[tuple[int | float, str]] = list( + zip(np.arange(len(categories)), categories) + ) + self.value_labels.sort(key=lambda x: x[0]) + + self._prepare_value_labels() + + def _prepare_value_labels(self): + """Encode value labels.""" + + self.text_len = 0 + self.txt: list[bytes] = [] + self.n = 0 + # Offsets (length of categories), converted to int32 + self.off = np.array([], dtype=np.int32) + # Values, converted to int32 + self.val = np.array([], dtype=np.int32) + self.len = 0 + + # Compute lengths and setup lists of offsets and labels + offsets: list[int] = [] + values: list[int | float] = [] + for vl in self.value_labels: + category: str | bytes = vl[1] + if not isinstance(category, str): + category = str(category) + warnings.warn( + value_label_mismatch_doc.format(self.labname), + ValueLabelTypeMismatch, + ) + category = category.encode(self._encoding) + offsets.append(self.text_len) + self.text_len += len(category) + 1 # +1 for the padding + values.append(vl[0]) + self.txt.append(category) + self.n += 1 + + if self.text_len > 32000: + raise ValueError( + "Stata value labels for a single variable must " + "have a combined length less than 32,000 characters." + ) + + # Ensure int32 + self.off = np.array(offsets, dtype=np.int32) + self.val = np.array(values, dtype=np.int32) + + # Total length + self.len = 4 + 4 + 4 * self.n + 4 * self.n + self.text_len + + def generate_value_label(self, byteorder: str) -> bytes: + """ + Generate the binary representation of the value labels. + + Parameters + ---------- + byteorder : str + Byte order of the output + + Returns + ------- + value_label : bytes + Bytes containing the formatted value label + """ + encoding = self._encoding + bio = BytesIO() + null_byte = b"\x00" + + # len + bio.write(struct.pack(byteorder + "i", self.len)) + + # labname + labname = str(self.labname)[:32].encode(encoding) + lab_len = 32 if encoding not in ("utf-8", "utf8") else 128 + labname = _pad_bytes(labname, lab_len + 1) + bio.write(labname) + + # padding - 3 bytes + for i in range(3): + bio.write(struct.pack("c", null_byte)) + + # value_label_table + # n - int32 + bio.write(struct.pack(byteorder + "i", self.n)) + + # textlen - int32 + bio.write(struct.pack(byteorder + "i", self.text_len)) + + # off - int32 array (n elements) + for offset in self.off: + bio.write(struct.pack(byteorder + "i", offset)) + + # val - int32 array (n elements) + for value in self.val: + bio.write(struct.pack(byteorder + "i", value)) + + # txt - Text labels, null terminated + for text in self.txt: + bio.write(text + null_byte) + + return bio.getvalue() + + +class StataNonCatValueLabel(StataValueLabel): + """ + Prepare formatted version of value labels + + Parameters + ---------- + labname : str + Value label name + value_labels: Dictionary + Mapping of values to labels + encoding : {"latin-1", "utf-8"} + Encoding to use for value labels. + """ + + def __init__( + self, + labname: str, + value_labels: dict[float | int, str], + encoding: Literal["latin-1", "utf-8"] = "latin-1", + ): + + if encoding not in ("latin-1", "utf-8"): + raise ValueError("Only latin-1 and utf-8 are supported.") + + self.labname = labname + self._encoding = encoding + self.value_labels: list[tuple[int | float, str]] = sorted( + value_labels.items(), key=lambda x: x[0] + ) + self._prepare_value_labels() + + +class StataMissingValue: + """ + An observation's missing value. + + Parameters + ---------- + value : {int, float} + The Stata missing value code + + Notes + ----- + More information: + + Integer missing values make the code '.', '.a', ..., '.z' to the ranges + 101 ... 127 (for int8), 32741 ... 32767 (for int16) and 2147483621 ... + 2147483647 (for int32). Missing values for floating point data types are + more complex but the pattern is simple to discern from the following table. + + np.float32 missing values (float in Stata) + 0000007f . + 0008007f .a + 0010007f .b + ... + 00c0007f .x + 00c8007f .y + 00d0007f .z + + np.float64 missing values (double in Stata) + 000000000000e07f . + 000000000001e07f .a + 000000000002e07f .b + ... + 000000000018e07f .x + 000000000019e07f .y + 00000000001ae07f .z + """ + + # Construct a dictionary of missing values + MISSING_VALUES: dict[float, str] = {} + bases = (101, 32741, 2147483621) + for b in bases: + # Conversion to long to avoid hash issues on 32 bit platforms #8968 + MISSING_VALUES[b] = "." + for i in range(1, 27): + MISSING_VALUES[i + b] = "." + chr(96 + i) + + float32_base = b"\x00\x00\x00\x7f" + increment = struct.unpack(" 0: + MISSING_VALUES[key] += chr(96 + i) + int_value = struct.unpack(" 0: + MISSING_VALUES[key] += chr(96 + i) + int_value = struct.unpack("q", struct.pack(" str: + """ + The Stata representation of the missing value: '.', '.a'..'.z' + + Returns + ------- + str + The representation of the missing value. + """ + return self._str + + @property + def value(self) -> int | float: + """ + The binary representation of the missing value. + + Returns + ------- + {int, float} + The binary representation of the missing value. + """ + return self._value + + def __str__(self) -> str: + return self.string + + def __repr__(self) -> str: + return f"{type(self)}({self})" + + def __eq__(self, other: Any) -> bool: + return ( + isinstance(other, type(self)) + and self.string == other.string + and self.value == other.value + ) + + @classmethod + def get_base_missing_value(cls, dtype: np.dtype) -> int | float: + if dtype.type is np.int8: + value = cls.BASE_MISSING_VALUES["int8"] + elif dtype.type is np.int16: + value = cls.BASE_MISSING_VALUES["int16"] + elif dtype.type is np.int32: + value = cls.BASE_MISSING_VALUES["int32"] + elif dtype.type is np.float32: + value = cls.BASE_MISSING_VALUES["float32"] + elif dtype.type is np.float64: + value = cls.BASE_MISSING_VALUES["float64"] + else: + raise ValueError("Unsupported dtype") + return value + + +class StataParser: + def __init__(self): + + # type code. + # -------------------- + # str1 1 = 0x01 + # str2 2 = 0x02 + # ... + # str244 244 = 0xf4 + # byte 251 = 0xfb (sic) + # int 252 = 0xfc + # long 253 = 0xfd + # float 254 = 0xfe + # double 255 = 0xff + # -------------------- + # NOTE: the byte type seems to be reserved for categorical variables + # with a label, but the underlying variable is -127 to 100 + # we're going to drop the label and cast to int + self.DTYPE_MAP = dict( + list(zip(range(1, 245), [np.dtype("a" + str(i)) for i in range(1, 245)])) + + [ + (251, np.dtype(np.int8)), + (252, np.dtype(np.int16)), + (253, np.dtype(np.int32)), + (254, np.dtype(np.float32)), + (255, np.dtype(np.float64)), + ] + ) + self.DTYPE_MAP_XML = { + 32768: np.dtype(np.uint8), # Keys to GSO + 65526: np.dtype(np.float64), + 65527: np.dtype(np.float32), + 65528: np.dtype(np.int32), + 65529: np.dtype(np.int16), + 65530: np.dtype(np.int8), + } + # error: Argument 1 to "list" has incompatible type "str"; + # expected "Iterable[int]" [arg-type] + self.TYPE_MAP = list(range(251)) + list("bhlfd") # type: ignore[arg-type] + self.TYPE_MAP_XML = { + # Not really a Q, unclear how to handle byteswap + 32768: "Q", + 65526: "d", + 65527: "f", + 65528: "l", + 65529: "h", + 65530: "b", + } + # NOTE: technically, some of these are wrong. there are more numbers + # that can be represented. it's the 27 ABOVE and BELOW the max listed + # numeric data type in [U] 12.2.2 of the 11.2 manual + float32_min = b"\xff\xff\xff\xfe" + float32_max = b"\xff\xff\xff\x7e" + float64_min = b"\xff\xff\xff\xff\xff\xff\xef\xff" + float64_max = b"\xff\xff\xff\xff\xff\xff\xdf\x7f" + self.VALID_RANGE = { + "b": (-127, 100), + "h": (-32767, 32740), + "l": (-2147483647, 2147483620), + "f": ( + np.float32(struct.unpack(" StataReader: + """enter context manager""" + return self + + def __exit__(self, exc_type, exc_value, traceback) -> None: + """exit context manager""" + self.close() + + def close(self) -> None: + """close the handle if its open""" + self.path_or_buf.close() + + def _set_encoding(self) -> None: + """ + Set string encoding which depends on file version + """ + if self.format_version < 118: + self._encoding = "latin-1" + else: + self._encoding = "utf-8" + + def _read_header(self) -> None: + first_char = self.path_or_buf.read(1) + if struct.unpack("c", first_char)[0] == b"<": + self._read_new_header() + else: + self._read_old_header(first_char) + + self.has_string_data = len([x for x in self.typlist if type(x) is int]) > 0 + + # calculate size of a data record + self.col_sizes = [self._calcsize(typ) for typ in self.typlist] + + def _read_new_header(self) -> None: + # The first part of the header is common to 117 - 119. + self.path_or_buf.read(27) # stata_dta>
+ self.format_version = int(self.path_or_buf.read(3)) + if self.format_version not in [117, 118, 119]: + raise ValueError(_version_error.format(version=self.format_version)) + self._set_encoding() + self.path_or_buf.read(21) # + self.byteorder = self.path_or_buf.read(3) == b"MSF" and ">" or "<" + self.path_or_buf.read(15) # + nvar_type = "H" if self.format_version <= 118 else "I" + nvar_size = 2 if self.format_version <= 118 else 4 + self.nvar = struct.unpack( + self.byteorder + nvar_type, self.path_or_buf.read(nvar_size) + )[0] + self.path_or_buf.read(7) # + + self.nobs = self._get_nobs() + self.path_or_buf.read(11) # + self.time_stamp = self._get_time_stamp() + self.path_or_buf.read(26) #
+ self.path_or_buf.read(8) # 0x0000000000000000 + self.path_or_buf.read(8) # position of + + self._seek_vartypes = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 16 + ) + self._seek_varnames = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 10 + ) + self._seek_sortlist = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 10 + ) + self._seek_formats = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 9 + ) + self._seek_value_label_names = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 19 + ) + + # Requires version-specific treatment + self._seek_variable_labels = self._get_seek_variable_labels() + + self.path_or_buf.read(8) # + self.data_location = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 6 + ) + self.seek_strls = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 7 + ) + self.seek_value_labels = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 14 + ) + + self.typlist, self.dtyplist = self._get_dtypes(self._seek_vartypes) + + self.path_or_buf.seek(self._seek_varnames) + self.varlist = self._get_varlist() + + self.path_or_buf.seek(self._seek_sortlist) + self.srtlist = struct.unpack( + self.byteorder + ("h" * (self.nvar + 1)), + self.path_or_buf.read(2 * (self.nvar + 1)), + )[:-1] + + self.path_or_buf.seek(self._seek_formats) + self.fmtlist = self._get_fmtlist() + + self.path_or_buf.seek(self._seek_value_label_names) + self.lbllist = self._get_lbllist() + + self.path_or_buf.seek(self._seek_variable_labels) + self._variable_labels = self._get_variable_labels() + + # Get data type information, works for versions 117-119. + def _get_dtypes( + self, seek_vartypes: int + ) -> tuple[list[int | str], list[str | np.dtype]]: + + self.path_or_buf.seek(seek_vartypes) + raw_typlist = [ + struct.unpack(self.byteorder + "H", self.path_or_buf.read(2))[0] + for _ in range(self.nvar) + ] + + def f(typ: int) -> int | str: + if typ <= 2045: + return typ + try: + return self.TYPE_MAP_XML[typ] + except KeyError as err: + raise ValueError(f"cannot convert stata types [{typ}]") from err + + typlist = [f(x) for x in raw_typlist] + + def g(typ: int) -> str | np.dtype: + if typ <= 2045: + return str(typ) + try: + # error: Incompatible return value type (got "Type[number]", expected + # "Union[str, dtype]") + return self.DTYPE_MAP_XML[typ] # type: ignore[return-value] + except KeyError as err: + raise ValueError(f"cannot convert stata dtype [{typ}]") from err + + dtyplist = [g(x) for x in raw_typlist] + + return typlist, dtyplist + + def _get_varlist(self) -> list[str]: + # 33 in order formats, 129 in formats 118 and 119 + b = 33 if self.format_version < 118 else 129 + return [self._decode(self.path_or_buf.read(b)) for _ in range(self.nvar)] + + # Returns the format list + def _get_fmtlist(self) -> list[str]: + if self.format_version >= 118: + b = 57 + elif self.format_version > 113: + b = 49 + elif self.format_version > 104: + b = 12 + else: + b = 7 + + return [self._decode(self.path_or_buf.read(b)) for _ in range(self.nvar)] + + # Returns the label list + def _get_lbllist(self) -> list[str]: + if self.format_version >= 118: + b = 129 + elif self.format_version > 108: + b = 33 + else: + b = 9 + return [self._decode(self.path_or_buf.read(b)) for _ in range(self.nvar)] + + def _get_variable_labels(self) -> list[str]: + if self.format_version >= 118: + vlblist = [ + self._decode(self.path_or_buf.read(321)) for _ in range(self.nvar) + ] + elif self.format_version > 105: + vlblist = [ + self._decode(self.path_or_buf.read(81)) for _ in range(self.nvar) + ] + else: + vlblist = [ + self._decode(self.path_or_buf.read(32)) for _ in range(self.nvar) + ] + return vlblist + + def _get_nobs(self) -> int: + if self.format_version >= 118: + return struct.unpack(self.byteorder + "Q", self.path_or_buf.read(8))[0] + else: + return struct.unpack(self.byteorder + "I", self.path_or_buf.read(4))[0] + + def _get_data_label(self) -> str: + if self.format_version >= 118: + strlen = struct.unpack(self.byteorder + "H", self.path_or_buf.read(2))[0] + return self._decode(self.path_or_buf.read(strlen)) + elif self.format_version == 117: + strlen = struct.unpack("b", self.path_or_buf.read(1))[0] + return self._decode(self.path_or_buf.read(strlen)) + elif self.format_version > 105: + return self._decode(self.path_or_buf.read(81)) + else: + return self._decode(self.path_or_buf.read(32)) + + def _get_time_stamp(self) -> str: + if self.format_version >= 118: + strlen = struct.unpack("b", self.path_or_buf.read(1))[0] + return self.path_or_buf.read(strlen).decode("utf-8") + elif self.format_version == 117: + strlen = struct.unpack("b", self.path_or_buf.read(1))[0] + return self._decode(self.path_or_buf.read(strlen)) + elif self.format_version > 104: + return self._decode(self.path_or_buf.read(18)) + else: + raise ValueError() + + def _get_seek_variable_labels(self) -> int: + if self.format_version == 117: + self.path_or_buf.read(8) # , throw away + # Stata 117 data files do not follow the described format. This is + # a work around that uses the previous label, 33 bytes for each + # variable, 20 for the closing tag and 17 for the opening tag + return self._seek_value_label_names + (33 * self.nvar) + 20 + 17 + elif self.format_version >= 118: + return struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 17 + else: + raise ValueError() + + def _read_old_header(self, first_char: bytes) -> None: + self.format_version = struct.unpack("b", first_char)[0] + if self.format_version not in [104, 105, 108, 111, 113, 114, 115]: + raise ValueError(_version_error.format(version=self.format_version)) + self._set_encoding() + self.byteorder = ( + struct.unpack("b", self.path_or_buf.read(1))[0] == 0x1 and ">" or "<" + ) + self.filetype = struct.unpack("b", self.path_or_buf.read(1))[0] + self.path_or_buf.read(1) # unused + + self.nvar = struct.unpack(self.byteorder + "H", self.path_or_buf.read(2))[0] + self.nobs = self._get_nobs() + + self._data_label = self._get_data_label() + + self.time_stamp = self._get_time_stamp() + + # descriptors + if self.format_version > 108: + typlist = [ord(self.path_or_buf.read(1)) for _ in range(self.nvar)] + else: + buf = self.path_or_buf.read(self.nvar) + typlistb = np.frombuffer(buf, dtype=np.uint8) + typlist = [] + for tp in typlistb: + if tp in self.OLD_TYPE_MAPPING: + typlist.append(self.OLD_TYPE_MAPPING[tp]) + else: + typlist.append(tp - 127) # bytes + + try: + self.typlist = [self.TYPE_MAP[typ] for typ in typlist] + except ValueError as err: + invalid_types = ",".join([str(x) for x in typlist]) + raise ValueError(f"cannot convert stata types [{invalid_types}]") from err + try: + self.dtyplist = [self.DTYPE_MAP[typ] for typ in typlist] + except ValueError as err: + invalid_dtypes = ",".join([str(x) for x in typlist]) + raise ValueError(f"cannot convert stata dtypes [{invalid_dtypes}]") from err + + if self.format_version > 108: + self.varlist = [ + self._decode(self.path_or_buf.read(33)) for _ in range(self.nvar) + ] + else: + self.varlist = [ + self._decode(self.path_or_buf.read(9)) for _ in range(self.nvar) + ] + self.srtlist = struct.unpack( + self.byteorder + ("h" * (self.nvar + 1)), + self.path_or_buf.read(2 * (self.nvar + 1)), + )[:-1] + + self.fmtlist = self._get_fmtlist() + + self.lbllist = self._get_lbllist() + + self._variable_labels = self._get_variable_labels() + + # ignore expansion fields (Format 105 and later) + # When reading, read five bytes; the last four bytes now tell you + # the size of the next read, which you discard. You then continue + # like this until you read 5 bytes of zeros. + + if self.format_version > 104: + while True: + data_type = struct.unpack( + self.byteorder + "b", self.path_or_buf.read(1) + )[0] + if self.format_version > 108: + data_len = struct.unpack( + self.byteorder + "i", self.path_or_buf.read(4) + )[0] + else: + data_len = struct.unpack( + self.byteorder + "h", self.path_or_buf.read(2) + )[0] + if data_type == 0: + break + self.path_or_buf.read(data_len) + + # necessary data to continue parsing + self.data_location = self.path_or_buf.tell() + + def _setup_dtype(self) -> np.dtype: + """Map between numpy and state dtypes""" + if self._dtype is not None: + return self._dtype + + dtypes = [] # Convert struct data types to numpy data type + for i, typ in enumerate(self.typlist): + if typ in self.NUMPY_TYPE_MAP: + typ = cast(str, typ) # only strs in NUMPY_TYPE_MAP + dtypes.append(("s" + str(i), self.byteorder + self.NUMPY_TYPE_MAP[typ])) + else: + dtypes.append(("s" + str(i), "S" + str(typ))) + self._dtype = np.dtype(dtypes) + + return self._dtype + + def _calcsize(self, fmt: int | str) -> int: + if isinstance(fmt, int): + return fmt + return struct.calcsize(self.byteorder + fmt) + + def _decode(self, s: bytes) -> str: + # have bytes not strings, so must decode + s = s.partition(b"\0")[0] + try: + return s.decode(self._encoding) + except UnicodeDecodeError: + # GH 25960, fallback to handle incorrect format produced when 117 + # files are converted to 118 files in Stata + encoding = self._encoding + msg = f""" +One or more strings in the dta file could not be decoded using {encoding}, and +so the fallback encoding of latin-1 is being used. This can happen when a file +has been incorrectly encoded by Stata or some other software. You should verify +the string values returned are correct.""" + warnings.warn(msg, UnicodeWarning) + return s.decode("latin-1") + + def _read_value_labels(self) -> None: + if self._value_labels_read: + # Don't read twice + return + if self.format_version <= 108: + # Value labels are not supported in version 108 and earlier. + self._value_labels_read = True + self.value_label_dict: dict[str, dict[float | int, str]] = {} + return + + if self.format_version >= 117: + self.path_or_buf.seek(self.seek_value_labels) + else: + assert self._dtype is not None + offset = self.nobs * self._dtype.itemsize + self.path_or_buf.seek(self.data_location + offset) + + self._value_labels_read = True + self.value_label_dict = {} + + while True: + if self.format_version >= 117: + if self.path_or_buf.read(5) == b" + break # end of value label table + + slength = self.path_or_buf.read(4) + if not slength: + break # end of value label table (format < 117) + if self.format_version <= 117: + labname = self._decode(self.path_or_buf.read(33)) + else: + labname = self._decode(self.path_or_buf.read(129)) + self.path_or_buf.read(3) # padding + + n = struct.unpack(self.byteorder + "I", self.path_or_buf.read(4))[0] + txtlen = struct.unpack(self.byteorder + "I", self.path_or_buf.read(4))[0] + off = np.frombuffer( + self.path_or_buf.read(4 * n), dtype=self.byteorder + "i4", count=n + ) + val = np.frombuffer( + self.path_or_buf.read(4 * n), dtype=self.byteorder + "i4", count=n + ) + ii = np.argsort(off) + off = off[ii] + val = val[ii] + txt = self.path_or_buf.read(txtlen) + self.value_label_dict[labname] = {} + for i in range(n): + end = off[i + 1] if i < n - 1 else txtlen + self.value_label_dict[labname][val[i]] = self._decode(txt[off[i] : end]) + if self.format_version >= 117: + self.path_or_buf.read(6) # + self._value_labels_read = True + + def _read_strls(self) -> None: + self.path_or_buf.seek(self.seek_strls) + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + self.GSO = {"0": ""} + while True: + if self.path_or_buf.read(3) != b"GSO": + break + + if self.format_version == 117: + v_o = struct.unpack(self.byteorder + "Q", self.path_or_buf.read(8))[0] + else: + buf = self.path_or_buf.read(12) + # Only tested on little endian file on little endian machine. + v_size = 2 if self.format_version == 118 else 3 + if self.byteorder == "<": + buf = buf[0:v_size] + buf[4 : (12 - v_size)] + else: + # This path may not be correct, impossible to test + buf = buf[0:v_size] + buf[(4 + v_size) :] + v_o = struct.unpack("Q", buf)[0] + typ = struct.unpack("B", self.path_or_buf.read(1))[0] + length = struct.unpack(self.byteorder + "I", self.path_or_buf.read(4))[0] + va = self.path_or_buf.read(length) + if typ == 130: + decoded_va = va[0:-1].decode(self._encoding) + else: + # Stata says typ 129 can be binary, so use str + decoded_va = str(va) + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + self.GSO[str(v_o)] = decoded_va + + def __next__(self) -> DataFrame: + self._using_iterator = True + return self.read(nrows=self._chunksize) + + def get_chunk(self, size: int | None = None) -> DataFrame: + """ + Reads lines from Stata file and returns as dataframe + + Parameters + ---------- + size : int, defaults to None + Number of lines to read. If None, reads whole file. + + Returns + ------- + DataFrame + """ + if size is None: + size = self._chunksize + return self.read(nrows=size) + + @Appender(_read_method_doc) + def read( + self, + nrows: int | None = None, + convert_dates: bool | None = None, + convert_categoricals: bool | None = None, + index_col: str | None = None, + convert_missing: bool | None = None, + preserve_dtypes: bool | None = None, + columns: Sequence[str] | None = None, + order_categoricals: bool | None = None, + ) -> DataFrame: + # Handle empty file or chunk. If reading incrementally raise + # StopIteration. If reading the whole thing return an empty + # data frame. + if (self.nobs == 0) and (nrows is None): + self._can_read_value_labels = True + self._data_read = True + self.close() + return DataFrame(columns=self.varlist) + + # Handle options + if convert_dates is None: + convert_dates = self._convert_dates + if convert_categoricals is None: + convert_categoricals = self._convert_categoricals + if convert_missing is None: + convert_missing = self._convert_missing + if preserve_dtypes is None: + preserve_dtypes = self._preserve_dtypes + if columns is None: + columns = self._columns + if order_categoricals is None: + order_categoricals = self._order_categoricals + if index_col is None: + index_col = self._index_col + + if nrows is None: + nrows = self.nobs + + if (self.format_version >= 117) and (not self._value_labels_read): + self._can_read_value_labels = True + self._read_strls() + + # Read data + assert self._dtype is not None + dtype = self._dtype + max_read_len = (self.nobs - self._lines_read) * dtype.itemsize + read_len = nrows * dtype.itemsize + read_len = min(read_len, max_read_len) + if read_len <= 0: + # Iterator has finished, should never be here unless + # we are reading the file incrementally + if convert_categoricals: + self._read_value_labels() + self.close() + raise StopIteration + offset = self._lines_read * dtype.itemsize + self.path_or_buf.seek(self.data_location + offset) + read_lines = min(nrows, self.nobs - self._lines_read) + raw_data = np.frombuffer( + self.path_or_buf.read(read_len), dtype=dtype, count=read_lines + ) + + self._lines_read += read_lines + if self._lines_read == self.nobs: + self._can_read_value_labels = True + self._data_read = True + # if necessary, swap the byte order to native here + if self.byteorder != self._native_byteorder: + raw_data = raw_data.byteswap().newbyteorder() + + if convert_categoricals: + self._read_value_labels() + + if len(raw_data) == 0: + data = DataFrame(columns=self.varlist) + else: + data = DataFrame.from_records(raw_data) + data.columns = Index(self.varlist) + + # If index is not specified, use actual row number rather than + # restarting at 0 for each chunk. + if index_col is None: + rng = np.arange(self._lines_read - read_lines, self._lines_read) + data.index = Index(rng) # set attr instead of set_index to avoid copy + + if columns is not None: + try: + data = self._do_select_columns(data, columns) + except ValueError: + self.close() + raise + + # Decode strings + for col, typ in zip(data, self.typlist): + if type(typ) is int: + data[col] = data[col].apply(self._decode, convert_dtype=True) + + data = self._insert_strls(data) + + cols_ = np.where([dtyp is not None for dtyp in self.dtyplist])[0] + # Convert columns (if needed) to match input type + ix = data.index + requires_type_conversion = False + data_formatted = [] + for i in cols_: + if self.dtyplist[i] is not None: + col = data.columns[i] + dtype = data[col].dtype + if dtype != np.dtype(object) and dtype != self.dtyplist[i]: + requires_type_conversion = True + data_formatted.append( + (col, Series(data[col], ix, self.dtyplist[i])) + ) + else: + data_formatted.append((col, data[col])) + if requires_type_conversion: + data = DataFrame.from_dict(dict(data_formatted)) + del data_formatted + + data = self._do_convert_missing(data, convert_missing) + + if convert_dates: + + def any_startswith(x: str) -> bool: + return any(x.startswith(fmt) for fmt in _date_formats) + + cols = np.where([any_startswith(x) for x in self.fmtlist])[0] + for i in cols: + col = data.columns[i] + try: + data[col] = _stata_elapsed_date_to_datetime_vec( + data[col], self.fmtlist[i] + ) + except ValueError: + self.close() + raise + + if convert_categoricals and self.format_version > 108: + data = self._do_convert_categoricals( + data, self.value_label_dict, self.lbllist, order_categoricals + ) + + if not preserve_dtypes: + retyped_data = [] + convert = False + for col in data: + dtype = data[col].dtype + if dtype in (np.dtype(np.float16), np.dtype(np.float32)): + dtype = np.dtype(np.float64) + convert = True + elif dtype in ( + np.dtype(np.int8), + np.dtype(np.int16), + np.dtype(np.int32), + ): + dtype = np.dtype(np.int64) + convert = True + retyped_data.append((col, data[col].astype(dtype))) + if convert: + data = DataFrame.from_dict(dict(retyped_data)) + + if index_col is not None: + data = data.set_index(data.pop(index_col)) + + return data + + def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFrame: + # Check for missing values, and replace if found + replacements = {} + for i, colname in enumerate(data): + fmt = self.typlist[i] + if fmt not in self.VALID_RANGE: + continue + + fmt = cast(str, fmt) # only strs in VALID_RANGE + nmin, nmax = self.VALID_RANGE[fmt] + series = data[colname] + + # appreciably faster to do this with ndarray instead of Series + svals = series._values + missing = (svals < nmin) | (svals > nmax) + + if not missing.any(): + continue + + if convert_missing: # Replacement follows Stata notation + missing_loc = np.nonzero(np.asarray(missing))[0] + umissing, umissing_loc = np.unique(series[missing], return_inverse=True) + replacement = Series(series, dtype=object) + for j, um in enumerate(umissing): + missing_value = StataMissingValue(um) + + loc = missing_loc[umissing_loc == j] + replacement.iloc[loc] = missing_value + else: # All replacements are identical + dtype = series.dtype + if dtype not in (np.float32, np.float64): + dtype = np.float64 + replacement = Series(series, dtype=dtype) + if not replacement._values.flags["WRITEABLE"]: + # only relevant for ArrayManager; construction + # path for BlockManager ensures writeability + replacement = replacement.copy() + # Note: operating on ._values is much faster than directly + # TODO: can we fix that? + replacement._values[missing] = np.nan + replacements[colname] = replacement + + if replacements: + for col in replacements: + data[col] = replacements[col] + return data + + def _insert_strls(self, data: DataFrame) -> DataFrame: + if not hasattr(self, "GSO") or len(self.GSO) == 0: + return data + for i, typ in enumerate(self.typlist): + if typ != "Q": + continue + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + data.iloc[:, i] = [self.GSO[str(k)] for k in data.iloc[:, i]] + return data + + def _do_select_columns(self, data: DataFrame, columns: Sequence[str]) -> DataFrame: + + if not self._column_selector_set: + column_set = set(columns) + if len(column_set) != len(columns): + raise ValueError("columns contains duplicate entries") + unmatched = column_set.difference(data.columns) + if unmatched: + joined = ", ".join(list(unmatched)) + raise ValueError( + "The following columns were not " + f"found in the Stata data set: {joined}" + ) + # Copy information for retained columns for later processing + dtyplist = [] + typlist = [] + fmtlist = [] + lbllist = [] + for col in columns: + i = data.columns.get_loc(col) + dtyplist.append(self.dtyplist[i]) + typlist.append(self.typlist[i]) + fmtlist.append(self.fmtlist[i]) + lbllist.append(self.lbllist[i]) + + self.dtyplist = dtyplist + self.typlist = typlist + self.fmtlist = fmtlist + self.lbllist = lbllist + self._column_selector_set = True + + return data[columns] + + def _do_convert_categoricals( + self, + data: DataFrame, + value_label_dict: dict[str, dict[float | int, str]], + lbllist: Sequence[str], + order_categoricals: bool, + ) -> DataFrame: + """ + Converts categorical columns to Categorical type. + """ + value_labels = list(value_label_dict.keys()) + cat_converted_data = [] + for col, label in zip(data, lbllist): + if label in value_labels: + # Explicit call with ordered=True + vl = value_label_dict[label] + keys = np.array(list(vl.keys())) + column = data[col] + key_matches = column.isin(keys) + if self._using_iterator and key_matches.all(): + initial_categories: np.ndarray | None = keys + # If all categories are in the keys and we are iterating, + # use the same keys for all chunks. If some are missing + # value labels, then we will fall back to the categories + # varying across chunks. + else: + if self._using_iterator: + # warn is using an iterator + warnings.warn( + categorical_conversion_warning, CategoricalConversionWarning + ) + initial_categories = None + cat_data = Categorical( + column, categories=initial_categories, ordered=order_categoricals + ) + if initial_categories is None: + # If None here, then we need to match the cats in the Categorical + categories = [] + for category in cat_data.categories: + if category in vl: + categories.append(vl[category]) + else: + categories.append(category) + else: + # If all cats are matched, we can use the values + categories = list(vl.values()) + try: + # Try to catch duplicate categories + cat_data.categories = categories + except ValueError as err: + vc = Series(categories).value_counts() + repeated_cats = list(vc.index[vc > 1]) + repeats = "-" * 80 + "\n" + "\n".join(repeated_cats) + # GH 25772 + msg = f""" +Value labels for column {col} are not unique. These cannot be converted to +pandas categoricals. + +Either read the file with `convert_categoricals` set to False or use the +low level interface in `StataReader` to separately read the values and the +value_labels. + +The repeated labels are: +{repeats} +""" + raise ValueError(msg) from err + # TODO: is the next line needed above in the data(...) method? + cat_series = Series(cat_data, index=data.index) + cat_converted_data.append((col, cat_series)) + else: + cat_converted_data.append((col, data[col])) + data = DataFrame(dict(cat_converted_data), copy=False) + return data + + @property + def data_label(self) -> str: + """ + Return data label of Stata file. + """ + return self._data_label + + def variable_labels(self) -> dict[str, str]: + """ + Return variable labels as a dict, associating each variable name + with corresponding label. + + Returns + ------- + dict + """ + return dict(zip(self.varlist, self._variable_labels)) + + def value_labels(self) -> dict[str, dict[float | int, str]]: + """ + Return a dict, associating each variable name a dict, associating + each value its corresponding label. + + Returns + ------- + dict + """ + if not self._value_labels_read: + self._read_value_labels() + + return self.value_label_dict + + +@Appender(_read_stata_doc) +def read_stata( + filepath_or_buffer: FilePath | ReadBuffer[bytes], + convert_dates: bool = True, + convert_categoricals: bool = True, + index_col: str | None = None, + convert_missing: bool = False, + preserve_dtypes: bool = True, + columns: Sequence[str] | None = None, + order_categoricals: bool = True, + chunksize: int | None = None, + iterator: bool = False, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, +) -> DataFrame | StataReader: + + reader = StataReader( + filepath_or_buffer, + convert_dates=convert_dates, + convert_categoricals=convert_categoricals, + index_col=index_col, + convert_missing=convert_missing, + preserve_dtypes=preserve_dtypes, + columns=columns, + order_categoricals=order_categoricals, + chunksize=chunksize, + storage_options=storage_options, + compression=compression, + ) + + if iterator or chunksize: + return reader + + with reader: + return reader.read() + + +def _set_endianness(endianness: str) -> str: + if endianness.lower() in ["<", "little"]: + return "<" + elif endianness.lower() in [">", "big"]: + return ">" + else: # pragma : no cover + raise ValueError(f"Endianness {endianness} not understood") + + +def _pad_bytes(name: AnyStr, length: int) -> AnyStr: + """ + Take a char string and pads it with null bytes until it's length chars. + """ + if isinstance(name, bytes): + return name + b"\x00" * (length - len(name)) + return name + "\x00" * (length - len(name)) + + +def _convert_datetime_to_stata_type(fmt: str) -> np.dtype: + """ + Convert from one of the stata date formats to a type in TYPE_MAP. + """ + if fmt in [ + "tc", + "%tc", + "td", + "%td", + "tw", + "%tw", + "tm", + "%tm", + "tq", + "%tq", + "th", + "%th", + "ty", + "%ty", + ]: + return np.dtype(np.float64) # Stata expects doubles for SIFs + else: + raise NotImplementedError(f"Format {fmt} not implemented") + + +def _maybe_convert_to_int_keys(convert_dates: dict, varlist: list[Hashable]) -> dict: + new_dict = {} + for key in convert_dates: + if not convert_dates[key].startswith("%"): # make sure proper fmts + convert_dates[key] = "%" + convert_dates[key] + if key in varlist: + new_dict.update({varlist.index(key): convert_dates[key]}) + else: + if not isinstance(key, int): + raise ValueError("convert_dates key must be a column or an integer") + new_dict.update({key: convert_dates[key]}) + return new_dict + + +def _dtype_to_stata_type(dtype: np.dtype, column: Series) -> int: + """ + Convert dtype types to stata types. Returns the byte of the given ordinal. + See TYPE_MAP and comments for an explanation. This is also explained in + the dta spec. + 1 - 244 are strings of this length + Pandas Stata + 251 - for int8 byte + 252 - for int16 int + 253 - for int32 long + 254 - for float32 float + 255 - for double double + + If there are dates to convert, then dtype will already have the correct + type inserted. + """ + # TODO: expand to handle datetime to integer conversion + if dtype.type is np.object_: # try to coerce it to the biggest string + # not memory efficient, what else could we + # do? + itemsize = max_len_string_array(ensure_object(column._values)) + return max(itemsize, 1) + elif dtype.type is np.float64: + return 255 + elif dtype.type is np.float32: + return 254 + elif dtype.type is np.int32: + return 253 + elif dtype.type is np.int16: + return 252 + elif dtype.type is np.int8: + return 251 + else: # pragma : no cover + raise NotImplementedError(f"Data type {dtype} not supported.") + + +def _dtype_to_default_stata_fmt( + dtype, column: Series, dta_version: int = 114, force_strl: bool = False +) -> str: + """ + Map numpy dtype to stata's default format for this type. Not terribly + important since users can change this in Stata. Semantics are + + object -> "%DDs" where DD is the length of the string. If not a string, + raise ValueError + float64 -> "%10.0g" + float32 -> "%9.0g" + int64 -> "%9.0g" + int32 -> "%12.0g" + int16 -> "%8.0g" + int8 -> "%8.0g" + strl -> "%9s" + """ + # TODO: Refactor to combine type with format + # TODO: expand this to handle a default datetime format? + if dta_version < 117: + max_str_len = 244 + else: + max_str_len = 2045 + if force_strl: + return "%9s" + if dtype.type is np.object_: + itemsize = max_len_string_array(ensure_object(column._values)) + if itemsize > max_str_len: + if dta_version >= 117: + return "%9s" + else: + raise ValueError(excessive_string_length_error.format(column.name)) + return "%" + str(max(itemsize, 1)) + "s" + elif dtype == np.float64: + return "%10.0g" + elif dtype == np.float32: + return "%9.0g" + elif dtype == np.int32: + return "%12.0g" + elif dtype == np.int8 or dtype == np.int16: + return "%8.0g" + else: # pragma : no cover + raise NotImplementedError(f"Data type {dtype} not supported.") + + +@doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "fname", +) +class StataWriter(StataParser): + """ + A class for writing Stata binary dta files + + Parameters + ---------- + fname : path (string), buffer or path object + string, path object (pathlib.Path or py._path.local.LocalPath) or + object implementing a binary write() functions. If using a buffer + then the buffer will not be automatically closed after the file + is written. + data : DataFrame + Input to save + convert_dates : dict + Dictionary mapping columns containing datetime types to stata internal + format to use when writing the dates. Options are 'tc', 'td', 'tm', + 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name. + Datetime columns that do not have a conversion type specified will be + converted to 'tc'. Raises NotImplementedError if a datetime column has + timezone information + write_index : bool + Write the index to Stata dataset. + byteorder : str + Can be ">", "<", "little", or "big". default is `sys.byteorder` + time_stamp : datetime + A datetime to use as file creation date. Default is the current time + data_label : str + A label for the data set. Must be 80 characters or smaller. + variable_labels : dict + Dictionary containing columns as keys and variable labels as values. + Each label must be 80 characters or smaller. + {compression_options} + + .. versionadded:: 1.1.0 + + .. versionchanged:: 1.4.0 Zstandard support. + + {storage_options} + + .. versionadded:: 1.2.0 + + value_labels : dict of dicts + Dictionary containing columns as keys and dictionaries of column value + to labels as values. The combined length of all labels for a single + variable must be 32,000 characters or smaller. + + .. versionadded:: 1.4.0 + + Returns + ------- + writer : StataWriter instance + The StataWriter instance has a write_file method, which will + write the file to the given `fname`. + + Raises + ------ + NotImplementedError + * If datetimes contain timezone information + ValueError + * Columns listed in convert_dates are neither datetime64[ns] + or datetime.datetime + * Column dtype is not representable in Stata + * Column listed in convert_dates is not in DataFrame + * Categorical label contains more than 32,000 characters + + Examples + -------- + >>> data = pd.DataFrame([[1.0, 1]], columns=['a', 'b']) + >>> writer = StataWriter('./data_file.dta', data) + >>> writer.write_file() + + Directly write a zip file + >>> compression = {{"method": "zip", "archive_name": "data_file.dta"}} + >>> writer = StataWriter('./data_file.zip', data, compression=compression) + >>> writer.write_file() + + Save a DataFrame with dates + >>> from datetime import datetime + >>> data = pd.DataFrame([[datetime(2000,1,1)]], columns=['date']) + >>> writer = StataWriter('./date_data_file.dta', data, {{'date' : 'tw'}}) + >>> writer.write_file() + """ + + _max_string_length = 244 + _encoding = "latin-1" + + def __init__( + self, + fname: FilePath | WriteBuffer[bytes], + data: DataFrame, + convert_dates: dict[Hashable, str] | None = None, + write_index: bool = True, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, + *, + value_labels: dict[Hashable, dict[float | int, str]] | None = None, + ): + super().__init__() + self.data = data + self._convert_dates = {} if convert_dates is None else convert_dates + self._write_index = write_index + self._time_stamp = time_stamp + self._data_label = data_label + self._variable_labels = variable_labels + self._non_cat_value_labels = value_labels + self._value_labels: list[StataValueLabel] = [] + self._has_value_labels = np.array([], dtype=bool) + self._compression = compression + self._output_file: IO[bytes] | None = None + self._converted_names: dict[Hashable, str] = {} + # attach nobs, nvars, data, varlist, typlist + self._prepare_pandas(data) + self.storage_options = storage_options + + if byteorder is None: + byteorder = sys.byteorder + self._byteorder = _set_endianness(byteorder) + self._fname = fname + self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8} + + def _write(self, to_write: str) -> None: + """ + Helper to call encode before writing to file for Python 3 compat. + """ + self.handles.handle.write(to_write.encode(self._encoding)) + + def _write_bytes(self, value: bytes) -> None: + """ + Helper to assert file is open before writing. + """ + self.handles.handle.write(value) + + def _prepare_non_cat_value_labels( + self, data: DataFrame + ) -> list[StataNonCatValueLabel]: + """ + Check for value labels provided for non-categorical columns. Value + labels + """ + non_cat_value_labels: list[StataNonCatValueLabel] = [] + if self._non_cat_value_labels is None: + return non_cat_value_labels + + for labname, labels in self._non_cat_value_labels.items(): + if labname in self._converted_names: + colname = self._converted_names[labname] + elif labname in data.columns: + colname = str(labname) + else: + raise KeyError( + f"Can't create value labels for {labname}, it wasn't " + "found in the dataset." + ) + + if not is_numeric_dtype(data[colname].dtype): + # Labels should not be passed explicitly for categorical + # columns that will be converted to int + raise ValueError( + f"Can't create value labels for {labname}, value labels " + "can only be applied to numeric columns." + ) + svl = StataNonCatValueLabel(colname, labels) + non_cat_value_labels.append(svl) + return non_cat_value_labels + + def _prepare_categoricals(self, data: DataFrame) -> DataFrame: + """ + Check for categorical columns, retain categorical information for + Stata file and convert categorical data to int + """ + is_cat = [is_categorical_dtype(data[col].dtype) for col in data] + if not any(is_cat): + return data + + self._has_value_labels |= np.array(is_cat) + + get_base_missing_value = StataMissingValue.get_base_missing_value + data_formatted = [] + for col, col_is_cat in zip(data, is_cat): + if col_is_cat: + svl = StataValueLabel(data[col], encoding=self._encoding) + self._value_labels.append(svl) + dtype = data[col].cat.codes.dtype + if dtype == np.int64: + raise ValueError( + "It is not possible to export " + "int64-based categorical data to Stata." + ) + values = data[col].cat.codes._values.copy() + + # Upcast if needed so that correct missing values can be set + if values.max() >= get_base_missing_value(dtype): + if dtype == np.int8: + dtype = np.dtype(np.int16) + elif dtype == np.int16: + dtype = np.dtype(np.int32) + else: + dtype = np.dtype(np.float64) + values = np.array(values, dtype=dtype) + + # Replace missing values with Stata missing value for type + values[values == -1] = get_base_missing_value(dtype) + data_formatted.append((col, values)) + else: + data_formatted.append((col, data[col])) + return DataFrame.from_dict(dict(data_formatted)) + + def _replace_nans(self, data: DataFrame) -> DataFrame: + # return data + """ + Checks floating point data columns for nans, and replaces these with + the generic Stata for missing value (.) + """ + for c in data: + dtype = data[c].dtype + if dtype in (np.float32, np.float64): + if dtype == np.float32: + replacement = self.MISSING_VALUES["f"] + else: + replacement = self.MISSING_VALUES["d"] + data[c] = data[c].fillna(replacement) + + return data + + def _update_strl_names(self) -> None: + """No-op, forward compatibility""" + pass + + def _validate_variable_name(self, name: str) -> str: + """ + Validate variable names for Stata export. + + Parameters + ---------- + name : str + Variable name + + Returns + ------- + str + The validated name with invalid characters replaced with + underscores. + + Notes + ----- + Stata 114 and 117 support ascii characters in a-z, A-Z, 0-9 + and _. + """ + for c in name: + if ( + (c < "A" or c > "Z") + and (c < "a" or c > "z") + and (c < "0" or c > "9") + and c != "_" + ): + name = name.replace(c, "_") + return name + + def _check_column_names(self, data: DataFrame) -> DataFrame: + """ + Checks column names to ensure that they are valid Stata column names. + This includes checks for: + * Non-string names + * Stata keywords + * Variables that start with numbers + * Variables with names that are too long + + When an illegal variable name is detected, it is converted, and if + dates are exported, the variable name is propagated to the date + conversion dictionary + """ + converted_names: dict[Hashable, str] = {} + columns = list(data.columns) + original_columns = columns[:] + + duplicate_var_id = 0 + for j, name in enumerate(columns): + orig_name = name + if not isinstance(name, str): + name = str(name) + + name = self._validate_variable_name(name) + + # Variable name must not be a reserved word + if name in self.RESERVED_WORDS: + name = "_" + name + + # Variable name may not start with a number + if "0" <= name[0] <= "9": + name = "_" + name + + name = name[: min(len(name), 32)] + + if not name == orig_name: + # check for duplicates + while columns.count(name) > 0: + # prepend ascending number to avoid duplicates + name = "_" + str(duplicate_var_id) + name + name = name[: min(len(name), 32)] + duplicate_var_id += 1 + converted_names[orig_name] = name + + columns[j] = name + + data.columns = Index(columns) + + # Check date conversion, and fix key if needed + if self._convert_dates: + for c, o in zip(columns, original_columns): + if c != o: + self._convert_dates[c] = self._convert_dates[o] + del self._convert_dates[o] + + if converted_names: + conversion_warning = [] + for orig_name, name in converted_names.items(): + msg = f"{orig_name} -> {name}" + conversion_warning.append(msg) + + ws = invalid_name_doc.format("\n ".join(conversion_warning)) + warnings.warn(ws, InvalidColumnName) + + self._converted_names = converted_names + self._update_strl_names() + + return data + + def _set_formats_and_types(self, dtypes: Series) -> None: + self.fmtlist: list[str] = [] + self.typlist: list[int] = [] + for col, dtype in dtypes.items(): + self.fmtlist.append(_dtype_to_default_stata_fmt(dtype, self.data[col])) + self.typlist.append(_dtype_to_stata_type(dtype, self.data[col])) + + def _prepare_pandas(self, data: DataFrame) -> None: + # NOTE: we might need a different API / class for pandas objects so + # we can set different semantics - handle this with a PR to pandas.io + + data = data.copy() + + if self._write_index: + temp = data.reset_index() + if isinstance(temp, DataFrame): + data = temp + + # Ensure column names are strings + data = self._check_column_names(data) + + # Check columns for compatibility with stata, upcast if necessary + # Raise if outside the supported range + data = _cast_to_stata_types(data) + + # Replace NaNs with Stata missing values + data = self._replace_nans(data) + + # Set all columns to initially unlabelled + self._has_value_labels = np.repeat(False, data.shape[1]) + + # Create value labels for non-categorical data + non_cat_value_labels = self._prepare_non_cat_value_labels(data) + + non_cat_columns = [svl.labname for svl in non_cat_value_labels] + has_non_cat_val_labels = data.columns.isin(non_cat_columns) + self._has_value_labels |= has_non_cat_val_labels + self._value_labels.extend(non_cat_value_labels) + + # Convert categoricals to int data, and strip labels + data = self._prepare_categoricals(data) + + self.nobs, self.nvar = data.shape + self.data = data + self.varlist = data.columns.tolist() + + dtypes = data.dtypes + + # Ensure all date columns are converted + for col in data: + if col in self._convert_dates: + continue + if is_datetime64_dtype(data[col]): + self._convert_dates[col] = "tc" + + self._convert_dates = _maybe_convert_to_int_keys( + self._convert_dates, self.varlist + ) + for key in self._convert_dates: + new_type = _convert_datetime_to_stata_type(self._convert_dates[key]) + dtypes[key] = np.dtype(new_type) + + # Verify object arrays are strings and encode to bytes + self._encode_strings() + + self._set_formats_and_types(dtypes) + + # set the given format for the datetime cols + if self._convert_dates is not None: + for key in self._convert_dates: + if isinstance(key, int): + self.fmtlist[key] = self._convert_dates[key] + + def _encode_strings(self) -> None: + """ + Encode strings in dta-specific encoding + + Do not encode columns marked for date conversion or for strL + conversion. The strL converter independently handles conversion and + also accepts empty string arrays. + """ + convert_dates = self._convert_dates + # _convert_strl is not available in dta 114 + convert_strl = getattr(self, "_convert_strl", []) + for i, col in enumerate(self.data): + # Skip columns marked for date conversion or strl conversion + if i in convert_dates or col in convert_strl: + continue + column = self.data[col] + dtype = column.dtype + if dtype.type is np.object_: + inferred_dtype = infer_dtype(column, skipna=True) + if not ((inferred_dtype == "string") or len(column) == 0): + col = column.name + raise ValueError( + f"""\ +Column `{col}` cannot be exported.\n\nOnly string-like object arrays +containing all strings or a mix of strings and None can be exported. +Object arrays containing only null values are prohibited. Other object +types cannot be exported and must first be converted to one of the +supported types.""" + ) + encoded = self.data[col].str.encode(self._encoding) + # If larger than _max_string_length do nothing + if ( + max_len_string_array(ensure_object(encoded._values)) + <= self._max_string_length + ): + self.data[col] = encoded + + def write_file(self) -> None: + """ + Export DataFrame object to Stata dta format. + """ + with get_handle( + self._fname, + "wb", + compression=self._compression, + is_text=False, + storage_options=self.storage_options, + ) as self.handles: + + if self.handles.compression["method"] is not None: + # ZipFile creates a file (with the same name) for each write call. + # Write it first into a buffer and then write the buffer to the ZipFile. + self._output_file, self.handles.handle = self.handles.handle, BytesIO() + self.handles.created_handles.append(self.handles.handle) + + try: + self._write_header( + data_label=self._data_label, time_stamp=self._time_stamp + ) + self._write_map() + self._write_variable_types() + self._write_varnames() + self._write_sortlist() + self._write_formats() + self._write_value_label_names() + self._write_variable_labels() + self._write_expansion_fields() + self._write_characteristics() + records = self._prepare_data() + self._write_data(records) + self._write_strls() + self._write_value_labels() + self._write_file_close_tag() + self._write_map() + self._close() + except Exception as exc: + self.handles.close() + if isinstance(self._fname, (str, os.PathLike)) and os.path.isfile( + self._fname + ): + try: + os.unlink(self._fname) + except OSError: + warnings.warn( + f"This save was not successful but {self._fname} could not " + "be deleted. This file is not valid.", + ResourceWarning, + ) + raise exc + + def _close(self) -> None: + """ + Close the file if it was created by the writer. + + If a buffer or file-like object was passed in, for example a GzipFile, + then leave this file open for the caller to close. + """ + # write compression + if self._output_file is not None: + assert isinstance(self.handles.handle, BytesIO) + bio, self.handles.handle = self.handles.handle, self._output_file + self.handles.handle.write(bio.getvalue()) + + def _write_map(self) -> None: + """No-op, future compatibility""" + pass + + def _write_file_close_tag(self) -> None: + """No-op, future compatibility""" + pass + + def _write_characteristics(self) -> None: + """No-op, future compatibility""" + pass + + def _write_strls(self) -> None: + """No-op, future compatibility""" + pass + + def _write_expansion_fields(self) -> None: + """Write 5 zeros for expansion fields""" + self._write(_pad_bytes("", 5)) + + def _write_value_labels(self) -> None: + for vl in self._value_labels: + self._write_bytes(vl.generate_value_label(self._byteorder)) + + def _write_header( + self, + data_label: str | None = None, + time_stamp: datetime.datetime | None = None, + ) -> None: + byteorder = self._byteorder + # ds_format - just use 114 + self._write_bytes(struct.pack("b", 114)) + # byteorder + self._write(byteorder == ">" and "\x01" or "\x02") + # filetype + self._write("\x01") + # unused + self._write("\x00") + # number of vars, 2 bytes + self._write_bytes(struct.pack(byteorder + "h", self.nvar)[:2]) + # number of obs, 4 bytes + self._write_bytes(struct.pack(byteorder + "i", self.nobs)[:4]) + # data label 81 bytes, char, null terminated + if data_label is None: + self._write_bytes(self._null_terminate_bytes(_pad_bytes("", 80))) + else: + self._write_bytes( + self._null_terminate_bytes(_pad_bytes(data_label[:80], 80)) + ) + # time stamp, 18 bytes, char, null terminated + # format dd Mon yyyy hh:mm + if time_stamp is None: + time_stamp = datetime.datetime.now() + elif not isinstance(time_stamp, datetime.datetime): + raise ValueError("time_stamp should be datetime type") + # GH #13856 + # Avoid locale-specific month conversion + months = [ + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", + ] + month_lookup = {i + 1: month for i, month in enumerate(months)} + ts = ( + time_stamp.strftime("%d ") + + month_lookup[time_stamp.month] + + time_stamp.strftime(" %Y %H:%M") + ) + self._write_bytes(self._null_terminate_bytes(ts)) + + def _write_variable_types(self) -> None: + for typ in self.typlist: + self._write_bytes(struct.pack("B", typ)) + + def _write_varnames(self) -> None: + # varlist names are checked by _check_column_names + # varlist, requires null terminated + for name in self.varlist: + name = self._null_terminate_str(name) + name = _pad_bytes(name[:32], 33) + self._write(name) + + def _write_sortlist(self) -> None: + # srtlist, 2*(nvar+1), int array, encoded by byteorder + srtlist = _pad_bytes("", 2 * (self.nvar + 1)) + self._write(srtlist) + + def _write_formats(self) -> None: + # fmtlist, 49*nvar, char array + for fmt in self.fmtlist: + self._write(_pad_bytes(fmt, 49)) + + def _write_value_label_names(self) -> None: + # lbllist, 33*nvar, char array + for i in range(self.nvar): + # Use variable name when categorical + if self._has_value_labels[i]: + name = self.varlist[i] + name = self._null_terminate_str(name) + name = _pad_bytes(name[:32], 33) + self._write(name) + else: # Default is empty label + self._write(_pad_bytes("", 33)) + + def _write_variable_labels(self) -> None: + # Missing labels are 80 blank characters plus null termination + blank = _pad_bytes("", 81) + + if self._variable_labels is None: + for i in range(self.nvar): + self._write(blank) + return + + for col in self.data: + if col in self._variable_labels: + label = self._variable_labels[col] + if len(label) > 80: + raise ValueError("Variable labels must be 80 characters or fewer") + is_latin1 = all(ord(c) < 256 for c in label) + if not is_latin1: + raise ValueError( + "Variable labels must contain only characters that " + "can be encoded in Latin-1" + ) + self._write(_pad_bytes(label, 81)) + else: + self._write(blank) + + def _convert_strls(self, data: DataFrame) -> DataFrame: + """No-op, future compatibility""" + return data + + def _prepare_data(self) -> np.recarray: + data = self.data + typlist = self.typlist + convert_dates = self._convert_dates + # 1. Convert dates + if self._convert_dates is not None: + for i, col in enumerate(data): + if i in convert_dates: + data[col] = _datetime_to_stata_elapsed_vec( + data[col], self.fmtlist[i] + ) + # 2. Convert strls + data = self._convert_strls(data) + + # 3. Convert bad string data to '' and pad to correct length + dtypes = {} + native_byteorder = self._byteorder == _set_endianness(sys.byteorder) + for i, col in enumerate(data): + typ = typlist[i] + if typ <= self._max_string_length: + data[col] = data[col].fillna("").apply(_pad_bytes, args=(typ,)) + stype = f"S{typ}" + dtypes[col] = stype + data[col] = data[col].astype(stype) + else: + dtype = data[col].dtype + if not native_byteorder: + dtype = dtype.newbyteorder(self._byteorder) + dtypes[col] = dtype + + return data.to_records(index=False, column_dtypes=dtypes) + + def _write_data(self, records: np.recarray) -> None: + self._write_bytes(records.tobytes()) + + @staticmethod + def _null_terminate_str(s: str) -> str: + s += "\x00" + return s + + def _null_terminate_bytes(self, s: str) -> bytes: + return self._null_terminate_str(s).encode(self._encoding) + + +def _dtype_to_stata_type_117(dtype: np.dtype, column: Series, force_strl: bool) -> int: + """ + Converts dtype types to stata types. Returns the byte of the given ordinal. + See TYPE_MAP and comments for an explanation. This is also explained in + the dta spec. + 1 - 2045 are strings of this length + Pandas Stata + 32768 - for object strL + 65526 - for int8 byte + 65527 - for int16 int + 65528 - for int32 long + 65529 - for float32 float + 65530 - for double double + + If there are dates to convert, then dtype will already have the correct + type inserted. + """ + # TODO: expand to handle datetime to integer conversion + if force_strl: + return 32768 + if dtype.type is np.object_: # try to coerce it to the biggest string + # not memory efficient, what else could we + # do? + itemsize = max_len_string_array(ensure_object(column._values)) + itemsize = max(itemsize, 1) + if itemsize <= 2045: + return itemsize + return 32768 + elif dtype.type is np.float64: + return 65526 + elif dtype.type is np.float32: + return 65527 + elif dtype.type is np.int32: + return 65528 + elif dtype.type is np.int16: + return 65529 + elif dtype.type is np.int8: + return 65530 + else: # pragma : no cover + raise NotImplementedError(f"Data type {dtype} not supported.") + + +def _pad_bytes_new(name: str | bytes, length: int) -> bytes: + """ + Takes a bytes instance and pads it with null bytes until it's length chars. + """ + if isinstance(name, str): + name = bytes(name, "utf-8") + return name + b"\x00" * (length - len(name)) + + +class StataStrLWriter: + """ + Converter for Stata StrLs + + Stata StrLs map 8 byte values to strings which are stored using a + dictionary-like format where strings are keyed to two values. + + Parameters + ---------- + df : DataFrame + DataFrame to convert + columns : Sequence[str] + List of columns names to convert to StrL + version : int, optional + dta version. Currently supports 117, 118 and 119 + byteorder : str, optional + Can be ">", "<", "little", or "big". default is `sys.byteorder` + + Notes + ----- + Supports creation of the StrL block of a dta file for dta versions + 117, 118 and 119. These differ in how the GSO is stored. 118 and + 119 store the GSO lookup value as a uint32 and a uint64, while 117 + uses two uint32s. 118 and 119 also encode all strings as unicode + which is required by the format. 117 uses 'latin-1' a fixed width + encoding that extends the 7-bit ascii table with an additional 128 + characters. + """ + + def __init__( + self, + df: DataFrame, + columns: Sequence[str], + version: int = 117, + byteorder: str | None = None, + ): + if version not in (117, 118, 119): + raise ValueError("Only dta versions 117, 118 and 119 supported") + self._dta_ver = version + + self.df = df + self.columns = columns + self._gso_table = {"": (0, 0)} + if byteorder is None: + byteorder = sys.byteorder + self._byteorder = _set_endianness(byteorder) + + gso_v_type = "I" # uint32 + gso_o_type = "Q" # uint64 + self._encoding = "utf-8" + if version == 117: + o_size = 4 + gso_o_type = "I" # 117 used uint32 + self._encoding = "latin-1" + elif version == 118: + o_size = 6 + else: # version == 119 + o_size = 5 + self._o_offet = 2 ** (8 * (8 - o_size)) + self._gso_o_type = gso_o_type + self._gso_v_type = gso_v_type + + def _convert_key(self, key: tuple[int, int]) -> int: + v, o = key + return v + self._o_offet * o + + def generate_table(self) -> tuple[dict[str, tuple[int, int]], DataFrame]: + """ + Generates the GSO lookup table for the DataFrame + + Returns + ------- + gso_table : dict + Ordered dictionary using the string found as keys + and their lookup position (v,o) as values + gso_df : DataFrame + DataFrame where strl columns have been converted to + (v,o) values + + Notes + ----- + Modifies the DataFrame in-place. + + The DataFrame returned encodes the (v,o) values as uint64s. The + encoding depends on the dta version, and can be expressed as + + enc = v + o * 2 ** (o_size * 8) + + so that v is stored in the lower bits and o is in the upper + bits. o_size is + + * 117: 4 + * 118: 6 + * 119: 5 + """ + gso_table = self._gso_table + gso_df = self.df + columns = list(gso_df.columns) + selected = gso_df[self.columns] + col_index = [(col, columns.index(col)) for col in self.columns] + keys = np.empty(selected.shape, dtype=np.uint64) + for o, (idx, row) in enumerate(selected.iterrows()): + for j, (col, v) in enumerate(col_index): + val = row[col] + # Allow columns with mixed str and None (GH 23633) + val = "" if val is None else val + key = gso_table.get(val, None) + if key is None: + # Stata prefers human numbers + key = (v + 1, o + 1) + gso_table[val] = key + keys[o, j] = self._convert_key(key) + for i, col in enumerate(self.columns): + gso_df[col] = keys[:, i] + + return gso_table, gso_df + + def generate_blob(self, gso_table: dict[str, tuple[int, int]]) -> bytes: + """ + Generates the binary blob of GSOs that is written to the dta file. + + Parameters + ---------- + gso_table : dict + Ordered dictionary (str, vo) + + Returns + ------- + gso : bytes + Binary content of dta file to be placed between strl tags + + Notes + ----- + Output format depends on dta version. 117 uses two uint32s to + express v and o while 118+ uses a uint32 for v and a uint64 for o. + """ + # Format information + # Length includes null term + # 117 + # GSOvvvvooootllllxxxxxxxxxxxxxxx...x + # 3 u4 u4 u1 u4 string + null term + # + # 118, 119 + # GSOvvvvooooooootllllxxxxxxxxxxxxxxx...x + # 3 u4 u8 u1 u4 string + null term + + bio = BytesIO() + gso = bytes("GSO", "ascii") + gso_type = struct.pack(self._byteorder + "B", 130) + null = struct.pack(self._byteorder + "B", 0) + v_type = self._byteorder + self._gso_v_type + o_type = self._byteorder + self._gso_o_type + len_type = self._byteorder + "I" + for strl, vo in gso_table.items(): + if vo == (0, 0): + continue + v, o = vo + + # GSO + bio.write(gso) + + # vvvv + bio.write(struct.pack(v_type, v)) + + # oooo / oooooooo + bio.write(struct.pack(o_type, o)) + + # t + bio.write(gso_type) + + # llll + utf8_string = bytes(strl, "utf-8") + bio.write(struct.pack(len_type, len(utf8_string) + 1)) + + # xxx...xxx + bio.write(utf8_string) + bio.write(null) + + return bio.getvalue() + + +class StataWriter117(StataWriter): + """ + A class for writing Stata binary dta files in Stata 13 format (117) + + Parameters + ---------- + fname : path (string), buffer or path object + string, path object (pathlib.Path or py._path.local.LocalPath) or + object implementing a binary write() functions. If using a buffer + then the buffer will not be automatically closed after the file + is written. + data : DataFrame + Input to save + convert_dates : dict + Dictionary mapping columns containing datetime types to stata internal + format to use when writing the dates. Options are 'tc', 'td', 'tm', + 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name. + Datetime columns that do not have a conversion type specified will be + converted to 'tc'. Raises NotImplementedError if a datetime column has + timezone information + write_index : bool + Write the index to Stata dataset. + byteorder : str + Can be ">", "<", "little", or "big". default is `sys.byteorder` + time_stamp : datetime + A datetime to use as file creation date. Default is the current time + data_label : str + A label for the data set. Must be 80 characters or smaller. + variable_labels : dict + Dictionary containing columns as keys and variable labels as values. + Each label must be 80 characters or smaller. + convert_strl : list + List of columns names to convert to Stata StrL format. Columns with + more than 2045 characters are automatically written as StrL. + Smaller columns can be converted by including the column name. Using + StrLs can reduce output file size when strings are longer than 8 + characters, and either frequently repeated or sparse. + {compression_options} + + .. versionadded:: 1.1.0 + + .. versionchanged:: 1.4.0 Zstandard support. + + value_labels : dict of dicts + Dictionary containing columns as keys and dictionaries of column value + to labels as values. The combined length of all labels for a single + variable must be 32,000 characters or smaller. + + .. versionadded:: 1.4.0 + + Returns + ------- + writer : StataWriter117 instance + The StataWriter117 instance has a write_file method, which will + write the file to the given `fname`. + + Raises + ------ + NotImplementedError + * If datetimes contain timezone information + ValueError + * Columns listed in convert_dates are neither datetime64[ns] + or datetime.datetime + * Column dtype is not representable in Stata + * Column listed in convert_dates is not in DataFrame + * Categorical label contains more than 32,000 characters + + Examples + -------- + >>> from pandas.io.stata import StataWriter117 + >>> data = pd.DataFrame([[1.0, 1, 'a']], columns=['a', 'b', 'c']) + >>> writer = StataWriter117('./data_file.dta', data) + >>> writer.write_file() + + Directly write a zip file + >>> compression = {"method": "zip", "archive_name": "data_file.dta"} + >>> writer = StataWriter117('./data_file.zip', data, compression=compression) + >>> writer.write_file() + + Or with long strings stored in strl format + >>> data = pd.DataFrame([['A relatively long string'], [''], ['']], + ... columns=['strls']) + >>> writer = StataWriter117('./data_file_with_long_strings.dta', data, + ... convert_strl=['strls']) + >>> writer.write_file() + """ + + _max_string_length = 2045 + _dta_version = 117 + + def __init__( + self, + fname: FilePath | WriteBuffer[bytes], + data: DataFrame, + convert_dates: dict[Hashable, str] | None = None, + write_index: bool = True, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, + convert_strl: Sequence[Hashable] | None = None, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, + *, + value_labels: dict[Hashable, dict[float | int, str]] | None = None, + ): + # Copy to new list since convert_strl might be modified later + self._convert_strl: list[Hashable] = [] + if convert_strl is not None: + self._convert_strl.extend(convert_strl) + + super().__init__( + fname, + data, + convert_dates, + write_index, + byteorder=byteorder, + time_stamp=time_stamp, + data_label=data_label, + variable_labels=variable_labels, + value_labels=value_labels, + compression=compression, + storage_options=storage_options, + ) + self._map: dict[str, int] = {} + self._strl_blob = b"" + + @staticmethod + def _tag(val: str | bytes, tag: str) -> bytes: + """Surround val with """ + if isinstance(val, str): + val = bytes(val, "utf-8") + return bytes("<" + tag + ">", "utf-8") + val + bytes("", "utf-8") + + def _update_map(self, tag: str) -> None: + """Update map location for tag with file position""" + assert self.handles.handle is not None + self._map[tag] = self.handles.handle.tell() + + def _write_header( + self, + data_label: str | None = None, + time_stamp: datetime.datetime | None = None, + ) -> None: + """Write the file header""" + byteorder = self._byteorder + self._write_bytes(bytes("", "utf-8")) + bio = BytesIO() + # ds_format - 117 + bio.write(self._tag(bytes(str(self._dta_version), "utf-8"), "release")) + # byteorder + bio.write(self._tag(byteorder == ">" and "MSF" or "LSF", "byteorder")) + # number of vars, 2 bytes in 117 and 118, 4 byte in 119 + nvar_type = "H" if self._dta_version <= 118 else "I" + bio.write(self._tag(struct.pack(byteorder + nvar_type, self.nvar), "K")) + # 117 uses 4 bytes, 118 uses 8 + nobs_size = "I" if self._dta_version == 117 else "Q" + bio.write(self._tag(struct.pack(byteorder + nobs_size, self.nobs), "N")) + # data label 81 bytes, char, null terminated + label = data_label[:80] if data_label is not None else "" + encoded_label = label.encode(self._encoding) + label_size = "B" if self._dta_version == 117 else "H" + label_len = struct.pack(byteorder + label_size, len(encoded_label)) + encoded_label = label_len + encoded_label + bio.write(self._tag(encoded_label, "label")) + # time stamp, 18 bytes, char, null terminated + # format dd Mon yyyy hh:mm + if time_stamp is None: + time_stamp = datetime.datetime.now() + elif not isinstance(time_stamp, datetime.datetime): + raise ValueError("time_stamp should be datetime type") + # Avoid locale-specific month conversion + months = [ + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", + ] + month_lookup = {i + 1: month for i, month in enumerate(months)} + ts = ( + time_stamp.strftime("%d ") + + month_lookup[time_stamp.month] + + time_stamp.strftime(" %Y %H:%M") + ) + # '\x11' added due to inspection of Stata file + stata_ts = b"\x11" + bytes(ts, "utf-8") + bio.write(self._tag(stata_ts, "timestamp")) + self._write_bytes(self._tag(bio.getvalue(), "header")) + + def _write_map(self) -> None: + """ + Called twice during file write. The first populates the values in + the map with 0s. The second call writes the final map locations when + all blocks have been written. + """ + if not self._map: + self._map = { + "stata_data": 0, + "map": self.handles.handle.tell(), + "variable_types": 0, + "varnames": 0, + "sortlist": 0, + "formats": 0, + "value_label_names": 0, + "variable_labels": 0, + "characteristics": 0, + "data": 0, + "strls": 0, + "value_labels": 0, + "stata_data_close": 0, + "end-of-file": 0, + } + # Move to start of map + self.handles.handle.seek(self._map["map"]) + bio = BytesIO() + for val in self._map.values(): + bio.write(struct.pack(self._byteorder + "Q", val)) + self._write_bytes(self._tag(bio.getvalue(), "map")) + + def _write_variable_types(self) -> None: + self._update_map("variable_types") + bio = BytesIO() + for typ in self.typlist: + bio.write(struct.pack(self._byteorder + "H", typ)) + self._write_bytes(self._tag(bio.getvalue(), "variable_types")) + + def _write_varnames(self) -> None: + self._update_map("varnames") + bio = BytesIO() + # 118 scales by 4 to accommodate utf-8 data worst case encoding + vn_len = 32 if self._dta_version == 117 else 128 + for name in self.varlist: + name = self._null_terminate_str(name) + name = _pad_bytes_new(name[:32].encode(self._encoding), vn_len + 1) + bio.write(name) + self._write_bytes(self._tag(bio.getvalue(), "varnames")) + + def _write_sortlist(self) -> None: + self._update_map("sortlist") + sort_size = 2 if self._dta_version < 119 else 4 + self._write_bytes(self._tag(b"\x00" * sort_size * (self.nvar + 1), "sortlist")) + + def _write_formats(self) -> None: + self._update_map("formats") + bio = BytesIO() + fmt_len = 49 if self._dta_version == 117 else 57 + for fmt in self.fmtlist: + bio.write(_pad_bytes_new(fmt.encode(self._encoding), fmt_len)) + self._write_bytes(self._tag(bio.getvalue(), "formats")) + + def _write_value_label_names(self) -> None: + self._update_map("value_label_names") + bio = BytesIO() + # 118 scales by 4 to accommodate utf-8 data worst case encoding + vl_len = 32 if self._dta_version == 117 else 128 + for i in range(self.nvar): + # Use variable name when categorical + name = "" # default name + if self._has_value_labels[i]: + name = self.varlist[i] + name = self._null_terminate_str(name) + encoded_name = _pad_bytes_new(name[:32].encode(self._encoding), vl_len + 1) + bio.write(encoded_name) + self._write_bytes(self._tag(bio.getvalue(), "value_label_names")) + + def _write_variable_labels(self) -> None: + # Missing labels are 80 blank characters plus null termination + self._update_map("variable_labels") + bio = BytesIO() + # 118 scales by 4 to accommodate utf-8 data worst case encoding + vl_len = 80 if self._dta_version == 117 else 320 + blank = _pad_bytes_new("", vl_len + 1) + + if self._variable_labels is None: + for _ in range(self.nvar): + bio.write(blank) + self._write_bytes(self._tag(bio.getvalue(), "variable_labels")) + return + + for col in self.data: + if col in self._variable_labels: + label = self._variable_labels[col] + if len(label) > 80: + raise ValueError("Variable labels must be 80 characters or fewer") + try: + encoded = label.encode(self._encoding) + except UnicodeEncodeError as err: + raise ValueError( + "Variable labels must contain only characters that " + f"can be encoded in {self._encoding}" + ) from err + + bio.write(_pad_bytes_new(encoded, vl_len + 1)) + else: + bio.write(blank) + self._write_bytes(self._tag(bio.getvalue(), "variable_labels")) + + def _write_characteristics(self) -> None: + self._update_map("characteristics") + self._write_bytes(self._tag(b"", "characteristics")) + + def _write_data(self, records) -> None: + self._update_map("data") + self._write_bytes(b"") + self._write_bytes(records.tobytes()) + self._write_bytes(b"") + + def _write_strls(self) -> None: + self._update_map("strls") + self._write_bytes(self._tag(self._strl_blob, "strls")) + + def _write_expansion_fields(self) -> None: + """No-op in dta 117+""" + pass + + def _write_value_labels(self) -> None: + self._update_map("value_labels") + bio = BytesIO() + for vl in self._value_labels: + lab = vl.generate_value_label(self._byteorder) + lab = self._tag(lab, "lbl") + bio.write(lab) + self._write_bytes(self._tag(bio.getvalue(), "value_labels")) + + def _write_file_close_tag(self) -> None: + self._update_map("stata_data_close") + self._write_bytes(bytes("", "utf-8")) + self._update_map("end-of-file") + + def _update_strl_names(self) -> None: + """ + Update column names for conversion to strl if they might have been + changed to comply with Stata naming rules + """ + # Update convert_strl if names changed + for orig, new in self._converted_names.items(): + if orig in self._convert_strl: + idx = self._convert_strl.index(orig) + self._convert_strl[idx] = new + + def _convert_strls(self, data: DataFrame) -> DataFrame: + """ + Convert columns to StrLs if either very large or in the + convert_strl variable + """ + convert_cols = [ + col + for i, col in enumerate(data) + if self.typlist[i] == 32768 or col in self._convert_strl + ] + + if convert_cols: + ssw = StataStrLWriter(data, convert_cols, version=self._dta_version) + tab, new_data = ssw.generate_table() + data = new_data + self._strl_blob = ssw.generate_blob(tab) + return data + + def _set_formats_and_types(self, dtypes: Series) -> None: + self.typlist = [] + self.fmtlist = [] + for col, dtype in dtypes.items(): + force_strl = col in self._convert_strl + fmt = _dtype_to_default_stata_fmt( + dtype, + self.data[col], + dta_version=self._dta_version, + force_strl=force_strl, + ) + self.fmtlist.append(fmt) + self.typlist.append( + _dtype_to_stata_type_117(dtype, self.data[col], force_strl) + ) + + +class StataWriterUTF8(StataWriter117): + """ + Stata binary dta file writing in Stata 15 (118) and 16 (119) formats + + DTA 118 and 119 format files support unicode string data (both fixed + and strL) format. Unicode is also supported in value labels, variable + labels and the dataset label. Format 119 is automatically used if the + file contains more than 32,767 variables. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + fname : path (string), buffer or path object + string, path object (pathlib.Path or py._path.local.LocalPath) or + object implementing a binary write() functions. If using a buffer + then the buffer will not be automatically closed after the file + is written. + data : DataFrame + Input to save + convert_dates : dict, default None + Dictionary mapping columns containing datetime types to stata internal + format to use when writing the dates. Options are 'tc', 'td', 'tm', + 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name. + Datetime columns that do not have a conversion type specified will be + converted to 'tc'. Raises NotImplementedError if a datetime column has + timezone information + write_index : bool, default True + Write the index to Stata dataset. + byteorder : str, default None + Can be ">", "<", "little", or "big". default is `sys.byteorder` + time_stamp : datetime, default None + A datetime to use as file creation date. Default is the current time + data_label : str, default None + A label for the data set. Must be 80 characters or smaller. + variable_labels : dict, default None + Dictionary containing columns as keys and variable labels as values. + Each label must be 80 characters or smaller. + convert_strl : list, default None + List of columns names to convert to Stata StrL format. Columns with + more than 2045 characters are automatically written as StrL. + Smaller columns can be converted by including the column name. Using + StrLs can reduce output file size when strings are longer than 8 + characters, and either frequently repeated or sparse. + version : int, default None + The dta version to use. By default, uses the size of data to determine + the version. 118 is used if data.shape[1] <= 32767, and 119 is used + for storing larger DataFrames. + {compression_options} + + .. versionadded:: 1.1.0 + + .. versionchanged:: 1.4.0 Zstandard support. + + value_labels : dict of dicts + Dictionary containing columns as keys and dictionaries of column value + to labels as values. The combined length of all labels for a single + variable must be 32,000 characters or smaller. + + .. versionadded:: 1.4.0 + + Returns + ------- + StataWriterUTF8 + The instance has a write_file method, which will write the file to the + given `fname`. + + Raises + ------ + NotImplementedError + * If datetimes contain timezone information + ValueError + * Columns listed in convert_dates are neither datetime64[ns] + or datetime.datetime + * Column dtype is not representable in Stata + * Column listed in convert_dates is not in DataFrame + * Categorical label contains more than 32,000 characters + + Examples + -------- + Using Unicode data and column names + + >>> from pandas.io.stata import StataWriterUTF8 + >>> data = pd.DataFrame([[1.0, 1, 'ᴬ']], columns=['a', 'β', 'ĉ']) + >>> writer = StataWriterUTF8('./data_file.dta', data) + >>> writer.write_file() + + Directly write a zip file + >>> compression = {"method": "zip", "archive_name": "data_file.dta"} + >>> writer = StataWriterUTF8('./data_file.zip', data, compression=compression) + >>> writer.write_file() + + Or with long strings stored in strl format + + >>> data = pd.DataFrame([['ᴀ relatively long ŝtring'], [''], ['']], + ... columns=['strls']) + >>> writer = StataWriterUTF8('./data_file_with_long_strings.dta', data, + ... convert_strl=['strls']) + >>> writer.write_file() + """ + + _encoding = "utf-8" + + def __init__( + self, + fname: FilePath | WriteBuffer[bytes], + data: DataFrame, + convert_dates: dict[Hashable, str] | None = None, + write_index: bool = True, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, + convert_strl: Sequence[Hashable] | None = None, + version: int | None = None, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, + *, + value_labels: dict[Hashable, dict[float | int, str]] | None = None, + ): + if version is None: + version = 118 if data.shape[1] <= 32767 else 119 + elif version not in (118, 119): + raise ValueError("version must be either 118 or 119.") + elif version == 118 and data.shape[1] > 32767: + raise ValueError( + "You must use version 119 for data sets containing more than" + "32,767 variables" + ) + + super().__init__( + fname, + data, + convert_dates=convert_dates, + write_index=write_index, + byteorder=byteorder, + time_stamp=time_stamp, + data_label=data_label, + variable_labels=variable_labels, + value_labels=value_labels, + convert_strl=convert_strl, + compression=compression, + storage_options=storage_options, + ) + # Override version set in StataWriter117 init + self._dta_version = version + + def _validate_variable_name(self, name: str) -> str: + """ + Validate variable names for Stata export. + + Parameters + ---------- + name : str + Variable name + + Returns + ------- + str + The validated name with invalid characters replaced with + underscores. + + Notes + ----- + Stata 118+ support most unicode characters. The only limitation is in + the ascii range where the characters supported are a-z, A-Z, 0-9 and _. + """ + # High code points appear to be acceptable + for c in name: + if ( + ord(c) < 128 + and (c < "A" or c > "Z") + and (c < "a" or c > "z") + and (c < "0" or c > "9") + and c != "_" + ) or 128 <= ord(c) < 256: + name = name.replace(c, "_") + + return name diff --git a/.local/lib/python3.8/site-packages/pandas/io/xml.py b/.local/lib/python3.8/site-packages/pandas/io/xml.py new file mode 100644 index 0000000..ad87b18 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/io/xml.py @@ -0,0 +1,950 @@ +""" +:mod:`pandas.io.xml` is a module for reading XML. +""" + +from __future__ import annotations + +import io +from typing import Sequence + +from pandas._typing import ( + CompressionOptions, + FilePath, + ReadBuffer, + StorageOptions, + XMLParsers, +) +from pandas.compat._optional import import_optional_dependency +from pandas.errors import ( + AbstractMethodError, + ParserError, +) +from pandas.util._decorators import ( + deprecate_nonkeyword_arguments, + doc, +) + +from pandas.core.dtypes.common import is_list_like + +from pandas.core.frame import DataFrame +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import ( + file_exists, + get_handle, + is_fsspec_url, + is_url, + stringify_path, +) +from pandas.io.parsers import TextParser + + +@doc(decompression_options=_shared_docs["decompression_options"] % "path_or_buffer") +class _XMLFrameParser: + """ + Internal subclass to parse XML into DataFrames. + + Parameters + ---------- + path_or_buffer : a valid JSON str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. + + xpath : str or regex + The XPath expression to parse required set of nodes for + migration to `Data Frame`. `etree` supports limited XPath. + + namespacess : dict + The namespaces defined in XML document (`xmlns:namespace='URI') + as dicts with key being namespace and value the URI. + + elems_only : bool + Parse only the child elements at the specified `xpath`. + + attrs_only : bool + Parse only the attributes at the specified `xpath`. + + names : list + Column names for Data Frame of parsed XML data. + + encoding : str + Encoding of xml object or document. + + stylesheet : str or file-like + URL, file, file-like object, or a raw string containing XSLT, + `etree` does not support XSLT but retained for consistency. + + {decompression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + storage_options : dict, optional + Extra options that make sense for a particular storage connection, + e.g. host, port, username, password, etc., + + See also + -------- + pandas.io.xml._EtreeFrameParser + pandas.io.xml._LxmlFrameParser + + Notes + ----- + To subclass this class effectively you must override the following methods:` + * :func:`parse_data` + * :func:`_parse_nodes` + * :func:`_parse_doc` + * :func:`_validate_names` + * :func:`_validate_path` + + + See each method's respective documentation for details on their + functionality. + """ + + def __init__( + self, + path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str], + xpath: str, + namespaces: dict[str, str] | None, + elems_only: bool, + attrs_only: bool, + names: Sequence[str] | None, + encoding: str | None, + stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None, + compression: CompressionOptions, + storage_options: StorageOptions, + ): + self.path_or_buffer = path_or_buffer + self.xpath = xpath + self.namespaces = namespaces + self.elems_only = elems_only + self.attrs_only = attrs_only + self.names = names + self.encoding = encoding + self.stylesheet = stylesheet + self.is_style = None + self.compression = compression + self.storage_options = storage_options + + def parse_data(self) -> list[dict[str, str | None]]: + """ + Parse xml data. + + This method will call the other internal methods to + validate xpath, names, parse and return specific nodes. + """ + + raise AbstractMethodError(self) + + def _parse_nodes(self) -> list[dict[str, str | None]]: + """ + Parse xml nodes. + + This method will parse the children and attributes of elements + in xpath, conditionally for only elements, only attributes + or both while optionally renaming node names. + + Raises + ------ + ValueError + * If only elements and only attributes are specified. + + Notes + ----- + Namespace URIs will be removed from return node values.Also, + elements with missing children or attributes compared to siblings + will have optional keys filled withi None values. + """ + + raise AbstractMethodError(self) + + def _validate_path(self) -> None: + """ + Validate xpath. + + This method checks for syntax, evaluation, or empty nodes return. + + Raises + ------ + SyntaxError + * If xpah is not supported or issues with namespaces. + + ValueError + * If xpah does not return any nodes. + """ + + raise AbstractMethodError(self) + + def _validate_names(self) -> None: + """ + Validate names. + + This method will check if names is a list-like and aligns + with length of parse nodes. + + Raises + ------ + ValueError + * If value is not a list and less then length of nodes. + """ + raise AbstractMethodError(self) + + def _parse_doc(self, raw_doc) -> bytes: + """ + Build tree from path_or_buffer. + + This method will parse XML object into tree + either from string/bytes or file location. + """ + raise AbstractMethodError(self) + + +class _EtreeFrameParser(_XMLFrameParser): + """ + Internal class to parse XML into DataFrames with the Python + standard library XML module: `xml.etree.ElementTree`. + """ + + def parse_data(self) -> list[dict[str, str | None]]: + from xml.etree.ElementTree import XML + + if self.stylesheet is not None: + raise ValueError( + "To use stylesheet, you need lxml installed and selected as parser." + ) + + self.xml_doc = XML(self._parse_doc(self.path_or_buffer)) + + self._validate_path() + self._validate_names() + + return self._parse_nodes() + + def _parse_nodes(self) -> list[dict[str, str | None]]: + elems = self.xml_doc.findall(self.xpath, namespaces=self.namespaces) + dicts: list[dict[str, str | None]] + + if self.elems_only and self.attrs_only: + raise ValueError("Either element or attributes can be parsed not both.") + elif self.elems_only: + if self.names: + dicts = [ + { + **( + {el.tag: el.text.strip()} + if el.text and not el.text.isspace() + else {} + ), + **{ + nm: ch.text.strip() if ch.text else None + for nm, ch in zip(self.names, el.findall("*")) + }, + } + for el in elems + ] + else: + dicts = [ + { + ch.tag: ch.text.strip() if ch.text else None + for ch in el.findall("*") + } + for el in elems + ] + + elif self.attrs_only: + dicts = [ + {k: v.strip() if v else None for k, v in el.attrib.items()} + for el in elems + ] + + else: + if self.names: + dicts = [ + { + **el.attrib, + **( + {el.tag: el.text.strip()} + if el.text and not el.text.isspace() + else {} + ), + **{ + nm: ch.text.strip() if ch.text else None + for nm, ch in zip(self.names, el.findall("*")) + }, + } + for el in elems + ] + + else: + dicts = [ + { + **el.attrib, + **( + {el.tag: el.text.strip()} + if el.text and not el.text.isspace() + else {} + ), + **{ + ch.tag: ch.text.strip() if ch.text else None + for ch in el.findall("*") + }, + } + for el in elems + ] + + dicts = [ + {k.split("}")[1] if "}" in k else k: v for k, v in d.items()} for d in dicts + ] + + keys = list(dict.fromkeys([k for d in dicts for k in d.keys()])) + dicts = [{k: d[k] if k in d.keys() else None for k in keys} for d in dicts] + + if self.names: + dicts = [{nm: v for nm, v in zip(self.names, d.values())} for d in dicts] + + return dicts + + def _validate_path(self) -> None: + """ + Notes + ----- + `etree` supports limited XPath. If user attempts a more complex + expression syntax error will raise. + """ + + msg = ( + "xpath does not return any nodes. " + "If document uses namespaces denoted with " + "xmlns, be sure to define namespaces and " + "use them in xpath." + ) + try: + elems = self.xml_doc.find(self.xpath, namespaces=self.namespaces) + if elems is None: + raise ValueError(msg) + + if elems is not None and elems.find("*") is None and elems.attrib is None: + raise ValueError(msg) + + except (KeyError, SyntaxError): + raise SyntaxError( + "You have used an incorrect or unsupported XPath " + "expression for etree library or you used an " + "undeclared namespace prefix." + ) + + def _validate_names(self) -> None: + if self.names: + parent = self.xml_doc.find(self.xpath, namespaces=self.namespaces) + children = parent.findall("*") if parent else [] + + if is_list_like(self.names): + if len(self.names) < len(children): + raise ValueError( + "names does not match length of child elements in xpath." + ) + else: + raise TypeError( + f"{type(self.names).__name__} is not a valid type for names" + ) + + def _parse_doc(self, raw_doc) -> bytes: + from xml.etree.ElementTree import ( + XMLParser, + parse, + tostring, + ) + + handle_data = get_data_from_filepath( + filepath_or_buffer=raw_doc, + encoding=self.encoding, + compression=self.compression, + storage_options=self.storage_options, + ) + + with preprocess_data(handle_data) as xml_data: + curr_parser = XMLParser(encoding=self.encoding) + r = parse(xml_data, parser=curr_parser) + + return tostring(r.getroot()) + + +class _LxmlFrameParser(_XMLFrameParser): + """ + Internal class to parse XML into DataFrames with third-party + full-featured XML library, `lxml`, that supports + XPath 1.0 and XSLT 1.0. + """ + + def parse_data(self) -> list[dict[str, str | None]]: + """ + Parse xml data. + + This method will call the other internal methods to + validate xpath, names, optionally parse and run XSLT, + and parse original or transformed XML and return specific nodes. + """ + from lxml.etree import XML + + self.xml_doc = XML(self._parse_doc(self.path_or_buffer)) + + if self.stylesheet is not None: + self.xsl_doc = XML(self._parse_doc(self.stylesheet)) + self.xml_doc = XML(self._transform_doc()) + + self._validate_path() + self._validate_names() + + return self._parse_nodes() + + def _parse_nodes(self) -> list[dict[str, str | None]]: + elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces) + dicts: list[dict[str, str | None]] + + if self.elems_only and self.attrs_only: + raise ValueError("Either element or attributes can be parsed not both.") + + elif self.elems_only: + if self.names: + dicts = [ + { + **( + {el.tag: el.text.strip()} + if el.text and not el.text.isspace() + else {} + ), + **{ + nm: ch.text.strip() if ch.text else None + for nm, ch in zip(self.names, el.xpath("*")) + }, + } + for el in elems + ] + else: + dicts = [ + { + ch.tag: ch.text.strip() if ch.text else None + for ch in el.xpath("*") + } + for el in elems + ] + + elif self.attrs_only: + dicts = [el.attrib for el in elems] + + else: + if self.names: + dicts = [ + { + **el.attrib, + **( + {el.tag: el.text.strip()} + if el.text and not el.text.isspace() + else {} + ), + **{ + nm: ch.text.strip() if ch.text else None + for nm, ch in zip(self.names, el.xpath("*")) + }, + } + for el in elems + ] + else: + dicts = [ + { + **el.attrib, + **( + {el.tag: el.text.strip()} + if el.text and not el.text.isspace() + else {} + ), + **{ + ch.tag: ch.text.strip() if ch.text else None + for ch in el.xpath("*") + }, + } + for el in elems + ] + + if self.namespaces or "}" in list(dicts[0].keys())[0]: + dicts = [ + {k.split("}")[1] if "}" in k else k: v for k, v in d.items()} + for d in dicts + ] + + keys = list(dict.fromkeys([k for d in dicts for k in d.keys()])) + dicts = [{k: d[k] if k in d.keys() else None for k in keys} for d in dicts] + + if self.names: + dicts = [{nm: v for nm, v in zip(self.names, d.values())} for d in dicts] + + return dicts + + def _validate_path(self) -> None: + + msg = ( + "xpath does not return any nodes. " + "Be sure row level nodes are in xpath. " + "If document uses namespaces denoted with " + "xmlns, be sure to define namespaces and " + "use them in xpath." + ) + + elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces) + children = self.xml_doc.xpath(self.xpath + "/*", namespaces=self.namespaces) + attrs = self.xml_doc.xpath(self.xpath + "/@*", namespaces=self.namespaces) + + if elems == []: + raise ValueError(msg) + + if elems != [] and attrs == [] and children == []: + raise ValueError(msg) + + def _validate_names(self) -> None: + """ + Validate names. + + This method will check if names is a list and aligns with + length of parse nodes. + + Raises + ------ + ValueError + * If value is not a list and less then length of nodes. + """ + if self.names: + children = self.xml_doc.xpath( + self.xpath + "[1]/*", namespaces=self.namespaces + ) + + if is_list_like(self.names): + if len(self.names) < len(children): + raise ValueError( + "names does not match length of child elements in xpath." + ) + else: + raise TypeError( + f"{type(self.names).__name__} is not a valid type for names" + ) + + def _parse_doc(self, raw_doc) -> bytes: + from lxml.etree import ( + XMLParser, + fromstring, + parse, + tostring, + ) + + handle_data = get_data_from_filepath( + filepath_or_buffer=raw_doc, + encoding=self.encoding, + compression=self.compression, + storage_options=self.storage_options, + ) + + with preprocess_data(handle_data) as xml_data: + curr_parser = XMLParser(encoding=self.encoding) + + if isinstance(xml_data, io.StringIO): + if self.encoding is None: + raise TypeError( + "Can not pass encoding None when input is StringIO." + ) + + doc = fromstring( + xml_data.getvalue().encode(self.encoding), parser=curr_parser + ) + else: + doc = parse(xml_data, parser=curr_parser) + + return tostring(doc) + + def _transform_doc(self) -> bytes: + """ + Transform original tree using stylesheet. + + This method will transform original xml using XSLT script into + am ideally flatter xml document for easier parsing and migration + to Data Frame. + """ + from lxml.etree import XSLT + + transformer = XSLT(self.xsl_doc) + new_doc = transformer(self.xml_doc) + + return bytes(new_doc) + + +def get_data_from_filepath( + filepath_or_buffer: FilePath | bytes | ReadBuffer[bytes] | ReadBuffer[str], + encoding: str | None, + compression: CompressionOptions, + storage_options: StorageOptions, +) -> str | bytes | ReadBuffer[bytes] | ReadBuffer[str]: + """ + Extract raw XML data. + + The method accepts three input types: + 1. filepath (string-like) + 2. file-like object (e.g. open file object, StringIO) + 3. XML string or bytes + + This method turns (1) into (2) to simplify the rest of the processing. + It returns input types (2) and (3) unchanged. + """ + if not isinstance(filepath_or_buffer, bytes): + filepath_or_buffer = stringify_path(filepath_or_buffer) + + if ( + isinstance(filepath_or_buffer, str) + and not filepath_or_buffer.startswith((" io.StringIO | io.BytesIO: + """ + Convert extracted raw data. + + This method will return underlying data of extracted XML content. + The data either has a `read` attribute (e.g. a file object or a + StringIO/BytesIO) or is a string or bytes that is an XML document. + """ + + if isinstance(data, str): + data = io.StringIO(data) + + elif isinstance(data, bytes): + data = io.BytesIO(data) + + return data + + +def _data_to_frame(data, **kwargs) -> DataFrame: + """ + Convert parsed data to Data Frame. + + This method will bind xml dictionary data of keys and values + into named columns of Data Frame using the built-in TextParser + class that build Data Frame and infers specific dtypes. + """ + + tags = next(iter(data)) + nodes = [list(d.values()) for d in data] + + try: + with TextParser(nodes, names=tags, **kwargs) as tp: + return tp.read() + except ParserError: + raise ParserError( + "XML document may be too complex for import. " + "Try to flatten document and use distinct " + "element and attribute names." + ) + + +def _parse( + path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str], + xpath: str, + namespaces: dict[str, str] | None, + elems_only: bool, + attrs_only: bool, + names: Sequence[str] | None, + encoding: str | None, + parser: XMLParsers, + stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None, + compression: CompressionOptions, + storage_options: StorageOptions, + **kwargs, +) -> DataFrame: + """ + Call internal parsers. + + This method will conditionally call internal parsers: + LxmlFrameParser and/or EtreeParser. + + Raises + ------ + ImportError + * If lxml is not installed if selected as parser. + + ValueError + * If parser is not lxml or etree. + """ + + p: _EtreeFrameParser | _LxmlFrameParser + + if parser == "lxml": + lxml = import_optional_dependency("lxml.etree", errors="ignore") + + if lxml is not None: + p = _LxmlFrameParser( + path_or_buffer, + xpath, + namespaces, + elems_only, + attrs_only, + names, + encoding, + stylesheet, + compression, + storage_options, + ) + else: + raise ImportError("lxml not found, please install or use the etree parser.") + + elif parser == "etree": + p = _EtreeFrameParser( + path_or_buffer, + xpath, + namespaces, + elems_only, + attrs_only, + names, + encoding, + stylesheet, + compression, + storage_options, + ) + else: + raise ValueError("Values for parser can only be lxml or etree.") + + data_dicts = p.parse_data() + + return _data_to_frame(data=data_dicts, **kwargs) + + +@deprecate_nonkeyword_arguments( + version=None, allowed_args=["path_or_buffer"], stacklevel=2 +) +@doc( + storage_options=_shared_docs["storage_options"], + decompression_options=_shared_docs["decompression_options"] % "path_or_buffer", +) +def read_xml( + path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str], + xpath: str = "./*", + namespaces: dict[str, str] | None = None, + elems_only: bool = False, + attrs_only: bool = False, + names: Sequence[str] | None = None, + # encoding can not be None for lxml and StringIO input + encoding: str | None = "utf-8", + parser: XMLParsers = "lxml", + stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None = None, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, +) -> DataFrame: + r""" + Read XML document into a ``DataFrame`` object. + + .. versionadded:: 1.3.0 + + Parameters + ---------- + path_or_buffer : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a ``read()`` function. The string can be any valid XML + string or a path. The string can further be a URL. Valid URL schemes + include http, ftp, s3, and file. + + xpath : str, optional, default './\*' + The XPath to parse required set of nodes for migration to DataFrame. + XPath should return a collection of elements and not a single + element. Note: The ``etree`` parser supports limited XPath + expressions. For more complex XPath, use ``lxml`` which requires + installation. + + namespaces : dict, optional + The namespaces defined in XML document as dicts with key being + namespace prefix and value the URI. There is no need to include all + namespaces in XML, only the ones used in ``xpath`` expression. + Note: if XML document uses default namespace denoted as + `xmlns=''` without a prefix, you must assign any temporary + namespace prefix such as 'doc' to the URI in order to parse + underlying nodes and/or attributes. For example, :: + + namespaces = {{"doc": "https://example.com"}} + + elems_only : bool, optional, default False + Parse only the child elements at the specified ``xpath``. By default, + all child elements and non-empty text nodes are returned. + + attrs_only : bool, optional, default False + Parse only the attributes at the specified ``xpath``. + By default, all attributes are returned. + + names : list-like, optional + Column names for DataFrame of parsed XML data. Use this parameter to + rename original element names and distinguish same named elements. + + encoding : str, optional, default 'utf-8' + Encoding of XML document. + + parser : {{'lxml','etree'}}, default 'lxml' + Parser module to use for retrieval of data. Only 'lxml' and + 'etree' are supported. With 'lxml' more complex XPath searches + and ability to use XSLT stylesheet are supported. + + stylesheet : str, path object or file-like object + A URL, file-like object, or a raw string containing an XSLT script. + This stylesheet should flatten complex, deeply nested XML documents + for easier parsing. To use this feature you must have ``lxml`` module + installed and specify 'lxml' as ``parser``. The ``xpath`` must + reference nodes of transformed XML document generated after XSLT + transformation and not the original XML document. Only XSLT 1.0 + scripts and not later versions is currently supported. + + {decompression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + {storage_options} + + Returns + ------- + df + A DataFrame. + + See Also + -------- + read_json : Convert a JSON string to pandas object. + read_html : Read HTML tables into a list of DataFrame objects. + + Notes + ----- + This method is best designed to import shallow XML documents in + following format which is the ideal fit for the two-dimensions of a + ``DataFrame`` (row by column). :: + + + + data + data + data + ... + + + ... + + ... + + + As a file format, XML documents can be designed any way including + layout of elements and attributes as long as it conforms to W3C + specifications. Therefore, this method is a convenience handler for + a specific flatter design and not all possible XML structures. + + However, for more complex XML documents, ``stylesheet`` allows you to + temporarily redesign original document with XSLT (a special purpose + language) for a flatter version for migration to a DataFrame. + + This function will *always* return a single :class:`DataFrame` or raise + exceptions due to issues with XML document, ``xpath``, or other + parameters. + + Examples + -------- + >>> xml = ''' + ... + ... + ... square + ... 360 + ... 4.0 + ... + ... + ... circle + ... 360 + ... + ... + ... + ... triangle + ... 180 + ... 3.0 + ... + ... ''' + + >>> df = pd.read_xml(xml) + >>> df + shape degrees sides + 0 square 360 4.0 + 1 circle 360 NaN + 2 triangle 180 3.0 + + >>> xml = ''' + ... + ... + ... + ... + ... ''' + + >>> df = pd.read_xml(xml, xpath=".//row") + >>> df + shape degrees sides + 0 square 360 4.0 + 1 circle 360 NaN + 2 triangle 180 3.0 + + >>> xml = ''' + ... + ... + ... square + ... 360 + ... 4.0 + ... + ... + ... circle + ... 360 + ... + ... + ... + ... triangle + ... 180 + ... 3.0 + ... + ... ''' + + >>> df = pd.read_xml(xml, + ... xpath="//doc:row", + ... namespaces={{"doc": "https://example.com"}}) + >>> df + shape degrees sides + 0 square 360 4.0 + 1 circle 360 NaN + 2 triangle 180 3.0 + """ + + return _parse( + path_or_buffer=path_or_buffer, + xpath=xpath, + namespaces=namespaces, + elems_only=elems_only, + attrs_only=attrs_only, + names=names, + encoding=encoding, + parser=parser, + stylesheet=stylesheet, + compression=compression, + storage_options=storage_options, + ) diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/__init__.py b/.local/lib/python3.8/site-packages/pandas/plotting/__init__.py new file mode 100644 index 0000000..55c861e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/plotting/__init__.py @@ -0,0 +1,98 @@ +""" +Plotting public API. + +Authors of third-party plotting backends should implement a module with a +public ``plot(data, kind, **kwargs)``. The parameter `data` will contain +the data structure and can be a `Series` or a `DataFrame`. For example, +for ``df.plot()`` the parameter `data` will contain the DataFrame `df`. +In some cases, the data structure is transformed before being sent to +the backend (see PlotAccessor.__call__ in pandas/plotting/_core.py for +the exact transformations). + +The parameter `kind` will be one of: + +- line +- bar +- barh +- box +- hist +- kde +- area +- pie +- scatter +- hexbin + +See the pandas API reference for documentation on each kind of plot. + +Any other keyword argument is currently assumed to be backend specific, +but some parameters may be unified and added to the signature in the +future (e.g. `title` which should be useful for any backend). + +Currently, all the Matplotlib functions in pandas are accessed through +the selected backend. For example, `pandas.plotting.boxplot` (equivalent +to `DataFrame.boxplot`) is also accessed in the selected backend. This +is expected to change, and the exact API is under discussion. But with +the current version, backends are expected to implement the next functions: + +- plot (describe above, used for `Series.plot` and `DataFrame.plot`) +- hist_series and hist_frame (for `Series.hist` and `DataFrame.hist`) +- boxplot (`pandas.plotting.boxplot(df)` equivalent to `DataFrame.boxplot`) +- boxplot_frame and boxplot_frame_groupby +- register and deregister (register converters for the tick formats) +- Plots not called as `Series` and `DataFrame` methods: + - table + - andrews_curves + - autocorrelation_plot + - bootstrap_plot + - lag_plot + - parallel_coordinates + - radviz + - scatter_matrix + +Use the code in pandas/plotting/_matplotib.py and +https://github.com/pyviz/hvplot as a reference on how to write a backend. + +For the discussion about the API see +https://github.com/pandas-dev/pandas/issues/26747. +""" +from pandas.plotting._core import ( + PlotAccessor, + boxplot, + boxplot_frame, + boxplot_frame_groupby, + hist_frame, + hist_series, +) +from pandas.plotting._misc import ( + andrews_curves, + autocorrelation_plot, + bootstrap_plot, + deregister as deregister_matplotlib_converters, + lag_plot, + parallel_coordinates, + plot_params, + radviz, + register as register_matplotlib_converters, + scatter_matrix, + table, +) + +__all__ = [ + "PlotAccessor", + "boxplot", + "boxplot_frame", + "boxplot_frame_groupby", + "hist_frame", + "hist_series", + "scatter_matrix", + "radviz", + "andrews_curves", + "bootstrap_plot", + "parallel_coordinates", + "lag_plot", + "autocorrelation_plot", + "table", + "plot_params", + "register_matplotlib_converters", + "deregister_matplotlib_converters", +] diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/plotting/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..8594d4e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/plotting/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/__pycache__/_core.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/plotting/__pycache__/_core.cpython-38.pyc new file mode 100644 index 0000000..086a640 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/plotting/__pycache__/_core.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/__pycache__/_misc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/plotting/__pycache__/_misc.cpython-38.pyc new file mode 100644 index 0000000..de82e56 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/plotting/__pycache__/_misc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_core.py b/.local/lib/python3.8/site-packages/pandas/plotting/_core.py new file mode 100644 index 0000000..216ccdd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/plotting/_core.py @@ -0,0 +1,1851 @@ +from __future__ import annotations + +import importlib +import types +from typing import ( + TYPE_CHECKING, + Sequence, +) + +from pandas._config import get_option + +from pandas._typing import IndexLabel +from pandas.util._decorators import ( + Appender, + Substitution, +) + +from pandas.core.dtypes.common import ( + is_integer, + is_list_like, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +from pandas.core.base import PandasObject + +if TYPE_CHECKING: + from pandas import DataFrame + + +def hist_series( + self, + by=None, + ax=None, + grid: bool = True, + xlabelsize: int | None = None, + xrot: float | None = None, + ylabelsize: int | None = None, + yrot: float | None = None, + figsize: tuple[int, int] | None = None, + bins: int | Sequence[int] = 10, + backend: str | None = None, + legend: bool = False, + **kwargs, +): + """ + Draw histogram of the input series using matplotlib. + + Parameters + ---------- + by : object, optional + If passed, then used to form histograms for separate groups. + ax : matplotlib axis object + If not passed, uses gca(). + grid : bool, default True + Whether to show axis grid lines. + xlabelsize : int, default None + If specified changes the x-axis label size. + xrot : float, default None + Rotation of x axis labels. + ylabelsize : int, default None + If specified changes the y-axis label size. + yrot : float, default None + Rotation of y axis labels. + figsize : tuple, default None + Figure size in inches by default. + bins : int or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 + + legend : bool, default False + Whether to show the legend. + + .. versionadded:: 1.1.0 + + **kwargs + To be passed to the actual plotting function. + + Returns + ------- + matplotlib.AxesSubplot + A histogram plot. + + See Also + -------- + matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. + """ + plot_backend = _get_plot_backend(backend) + return plot_backend.hist_series( + self, + by=by, + ax=ax, + grid=grid, + xlabelsize=xlabelsize, + xrot=xrot, + ylabelsize=ylabelsize, + yrot=yrot, + figsize=figsize, + bins=bins, + legend=legend, + **kwargs, + ) + + +def hist_frame( + data: DataFrame, + column: IndexLabel = None, + by=None, + grid: bool = True, + xlabelsize: int | None = None, + xrot: float | None = None, + ylabelsize: int | None = None, + yrot: float | None = None, + ax=None, + sharex: bool = False, + sharey: bool = False, + figsize: tuple[int, int] | None = None, + layout: tuple[int, int] | None = None, + bins: int | Sequence[int] = 10, + backend: str | None = None, + legend: bool = False, + **kwargs, +): + """ + Make a histogram of the DataFrame's columns. + + A `histogram`_ is a representation of the distribution of data. + This function calls :meth:`matplotlib.pyplot.hist`, on each series in + the DataFrame, resulting in one histogram per column. + + .. _histogram: https://en.wikipedia.org/wiki/Histogram + + Parameters + ---------- + data : DataFrame + The pandas object holding the data. + column : str or sequence, optional + If passed, will be used to limit data to a subset of columns. + by : object, optional + If passed, then used to form histograms for separate groups. + grid : bool, default True + Whether to show axis grid lines. + xlabelsize : int, default None + If specified changes the x-axis label size. + xrot : float, default None + Rotation of x axis labels. For example, a value of 90 displays the + x labels rotated 90 degrees clockwise. + ylabelsize : int, default None + If specified changes the y-axis label size. + yrot : float, default None + Rotation of y axis labels. For example, a value of 90 displays the + y labels rotated 90 degrees clockwise. + ax : Matplotlib axes object, default None + The axes to plot the histogram on. + sharex : bool, default True if ax is None else False + In case subplots=True, share x axis and set some x axis labels to + invisible; defaults to True if ax is None otherwise False if an ax + is passed in. + Note that passing in both an ax and sharex=True will alter all x axis + labels for all subplots in a figure. + sharey : bool, default False + In case subplots=True, share y axis and set some y axis labels to + invisible. + figsize : tuple, optional + The size in inches of the figure to create. Uses the value in + `matplotlib.rcParams` by default. + layout : tuple, optional + Tuple of (rows, columns) for the layout of the histograms. + bins : int or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 + + legend : bool, default False + Whether to show the legend. + + .. versionadded:: 1.1.0 + + **kwargs + All other plotting keyword arguments to be passed to + :meth:`matplotlib.pyplot.hist`. + + Returns + ------- + matplotlib.AxesSubplot or numpy.ndarray of them + + See Also + -------- + matplotlib.pyplot.hist : Plot a histogram using matplotlib. + + Examples + -------- + This example draws a histogram based on the length and width of + some animals, displayed in three bins + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({ + ... 'length': [1.5, 0.5, 1.2, 0.9, 3], + ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1] + ... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse']) + >>> hist = df.hist(bins=3) + """ + plot_backend = _get_plot_backend(backend) + return plot_backend.hist_frame( + data, + column=column, + by=by, + grid=grid, + xlabelsize=xlabelsize, + xrot=xrot, + ylabelsize=ylabelsize, + yrot=yrot, + ax=ax, + sharex=sharex, + sharey=sharey, + figsize=figsize, + layout=layout, + legend=legend, + bins=bins, + **kwargs, + ) + + +_boxplot_doc = """ +Make a box plot from DataFrame columns. + +Make a box-and-whisker plot from DataFrame columns, optionally grouped +by some other columns. A box plot is a method for graphically depicting +groups of numerical data through their quartiles. +The box extends from the Q1 to Q3 quartile values of the data, +with a line at the median (Q2). The whiskers extend from the edges +of box to show the range of the data. By default, they extend no more than +`1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box, ending at the farthest +data point within that interval. Outliers are plotted as separate dots. + +For further details see +Wikipedia's entry for `boxplot `_. + +Parameters +---------- +column : str or list of str, optional + Column name or list of names, or vector. + Can be any valid input to :meth:`pandas.DataFrame.groupby`. +by : str or array-like, optional + Column in the DataFrame to :meth:`pandas.DataFrame.groupby`. + One box-plot will be done per value of columns in `by`. +ax : object of class matplotlib.axes.Axes, optional + The matplotlib axes to be used by boxplot. +fontsize : float or str + Tick label font size in points or as a string (e.g., `large`). +rot : int or float, default 0 + The rotation angle of labels (in degrees) + with respect to the screen coordinate system. +grid : bool, default True + Setting this to True will show the grid. +figsize : A tuple (width, height) in inches + The size of the figure to create in matplotlib. +layout : tuple (rows, columns), optional + For example, (3, 5) will display the subplots + using 3 columns and 5 rows, starting from the top-left. +return_type : {'axes', 'dict', 'both'} or None, default 'axes' + The kind of object to return. The default is ``axes``. + + * 'axes' returns the matplotlib axes the boxplot is drawn on. + * 'dict' returns a dictionary whose values are the matplotlib + Lines of the boxplot. + * 'both' returns a namedtuple with the axes and dict. + * when grouping with ``by``, a Series mapping columns to + ``return_type`` is returned. + + If ``return_type`` is `None`, a NumPy array + of axes with the same shape as ``layout`` is returned. +%(backend)s\ + +**kwargs + All other plotting keyword arguments to be passed to + :func:`matplotlib.pyplot.boxplot`. + +Returns +------- +result + See Notes. + +See Also +-------- +Series.plot.hist: Make a histogram. +matplotlib.pyplot.boxplot : Matplotlib equivalent plot. + +Notes +----- +The return type depends on the `return_type` parameter: + +* 'axes' : object of class matplotlib.axes.Axes +* 'dict' : dict of matplotlib.lines.Line2D objects +* 'both' : a namedtuple with structure (ax, lines) + +For data grouped with ``by``, return a Series of the above or a numpy +array: + +* :class:`~pandas.Series` +* :class:`~numpy.array` (for ``return_type = None``) + +Use ``return_type='dict'`` when you want to tweak the appearance +of the lines after plotting. In this case a dict containing the Lines +making up the boxes, caps, fliers, medians, and whiskers is returned. + +Examples +-------- + +Boxplots can be created for every column in the dataframe +by ``df.boxplot()`` or indicating the columns to be used: + +.. plot:: + :context: close-figs + + >>> np.random.seed(1234) + >>> df = pd.DataFrame(np.random.randn(10, 4), + ... columns=['Col1', 'Col2', 'Col3', 'Col4']) + >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) # doctest: +SKIP + +Boxplots of variables distributions grouped by the values of a third +variable can be created using the option ``by``. For instance: + +.. plot:: + :context: close-figs + + >>> df = pd.DataFrame(np.random.randn(10, 2), + ... columns=['Col1', 'Col2']) + >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', + ... 'B', 'B', 'B', 'B', 'B']) + >>> boxplot = df.boxplot(by='X') + +A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot +in order to group the data by combination of the variables in the x-axis: + +.. plot:: + :context: close-figs + + >>> df = pd.DataFrame(np.random.randn(10, 3), + ... columns=['Col1', 'Col2', 'Col3']) + >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', + ... 'B', 'B', 'B', 'B', 'B']) + >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A', + ... 'B', 'A', 'B', 'A', 'B']) + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y']) + +The layout of boxplot can be adjusted giving a tuple to ``layout``: + +.. plot:: + :context: close-figs + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... layout=(2, 1)) + +Additional formatting can be done to the boxplot, like suppressing the grid +(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``) +or changing the fontsize (i.e. ``fontsize=15``): + +.. plot:: + :context: close-figs + + >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) # doctest: +SKIP + +The parameter ``return_type`` can be used to select the type of element +returned by `boxplot`. When ``return_type='axes'`` is selected, +the matplotlib axes on which the boxplot is drawn are returned: + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes') + >>> type(boxplot) + + +When grouping with ``by``, a Series mapping columns to ``return_type`` +is returned: + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... return_type='axes') + >>> type(boxplot) + + +If ``return_type`` is `None`, a NumPy array of axes with the same shape +as ``layout`` is returned: + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... return_type=None) + >>> type(boxplot) + +""" + +_backend_doc = """\ +backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 +""" + + +_bar_or_line_doc = """ + Parameters + ---------- + x : label or position, optional + Allows plotting of one column versus another. If not specified, + the index of the DataFrame is used. + y : label or position, optional + Allows plotting of one column versus another. If not specified, + all numerical columns are used. + color : str, array-like, or dict, optional + The color for each of the DataFrame's columns. Possible values are: + + - A single color string referred to by name, RGB or RGBA code, + for instance 'red' or '#a98d19'. + + - A sequence of color strings referred to by name, RGB or RGBA + code, which will be used for each column recursively. For + instance ['green','yellow'] each column's %(kind)s will be filled in + green or yellow, alternatively. If there is only a single column to + be plotted, then only the first color from the color list will be + used. + + - A dict of the form {column name : color}, so that each column will be + colored accordingly. For example, if your columns are called `a` and + `b`, then passing {'a': 'green', 'b': 'red'} will color %(kind)ss for + column `a` in green and %(kind)ss for column `b` in red. + + .. versionadded:: 1.1.0 + + **kwargs + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + matplotlib.axes.Axes or np.ndarray of them + An ndarray is returned with one :class:`matplotlib.axes.Axes` + per column when ``subplots=True``. +""" + + +@Substitution(backend="") +@Appender(_boxplot_doc) +def boxplot( + data, + column=None, + by=None, + ax=None, + fontsize=None, + rot=0, + grid=True, + figsize=None, + layout=None, + return_type=None, + **kwargs, +): + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.boxplot( + data, + column=column, + by=by, + ax=ax, + fontsize=fontsize, + rot=rot, + grid=grid, + figsize=figsize, + layout=layout, + return_type=return_type, + **kwargs, + ) + + +@Substitution(backend=_backend_doc) +@Appender(_boxplot_doc) +def boxplot_frame( + self, + column=None, + by=None, + ax=None, + fontsize=None, + rot=0, + grid=True, + figsize=None, + layout=None, + return_type=None, + backend=None, + **kwargs, +): + plot_backend = _get_plot_backend(backend) + return plot_backend.boxplot_frame( + self, + column=column, + by=by, + ax=ax, + fontsize=fontsize, + rot=rot, + grid=grid, + figsize=figsize, + layout=layout, + return_type=return_type, + **kwargs, + ) + + +def boxplot_frame_groupby( + grouped, + subplots=True, + column=None, + fontsize=None, + rot=0, + grid=True, + ax=None, + figsize=None, + layout=None, + sharex=False, + sharey=True, + backend=None, + **kwargs, +): + """ + Make box plots from DataFrameGroupBy data. + + Parameters + ---------- + grouped : Grouped DataFrame + subplots : bool + * ``False`` - no subplots will be used + * ``True`` - create a subplot for each group. + + column : column name or list of names, or vector + Can be any valid input to groupby. + fontsize : int or str + rot : label rotation angle + grid : Setting this to True will show the grid + ax : Matplotlib axis object, default None + figsize : A tuple (width, height) in inches + layout : tuple (optional) + The layout of the plot: (rows, columns). + sharex : bool, default False + Whether x-axes will be shared among subplots. + sharey : bool, default True + Whether y-axes will be shared among subplots. + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 + + **kwargs + All other plotting keyword arguments to be passed to + matplotlib's boxplot function. + + Returns + ------- + dict of key/value = group key/DataFrame.boxplot return value + or DataFrame.boxplot return value in case subplots=figures=False + + Examples + -------- + You can create boxplots for grouped data and show them as separate subplots: + + .. plot:: + :context: close-figs + + >>> import itertools + >>> tuples = [t for t in itertools.product(range(1000), range(4))] + >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) + >>> data = np.random.randn(len(index),4) + >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index) + >>> grouped = df.groupby(level='lvl1') + >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8,10)) # doctest: +SKIP + + The ``subplots=False`` option shows the boxplots in a single figure. + + .. plot:: + :context: close-figs + + >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP + """ + plot_backend = _get_plot_backend(backend) + return plot_backend.boxplot_frame_groupby( + grouped, + subplots=subplots, + column=column, + fontsize=fontsize, + rot=rot, + grid=grid, + ax=ax, + figsize=figsize, + layout=layout, + sharex=sharex, + sharey=sharey, + **kwargs, + ) + + +class PlotAccessor(PandasObject): + """ + Make plots of Series or DataFrame. + + Uses the backend specified by the + option ``plotting.backend``. By default, matplotlib is used. + + Parameters + ---------- + data : Series or DataFrame + The object for which the method is called. + x : label or position, default None + Only used if data is a DataFrame. + y : label, position or list of label, positions, default None + Allows plotting of one column versus another. Only used if data is a + DataFrame. + kind : str + The kind of plot to produce: + + - 'line' : line plot (default) + - 'bar' : vertical bar plot + - 'barh' : horizontal bar plot + - 'hist' : histogram + - 'box' : boxplot + - 'kde' : Kernel Density Estimation plot + - 'density' : same as 'kde' + - 'area' : area plot + - 'pie' : pie plot + - 'scatter' : scatter plot (DataFrame only) + - 'hexbin' : hexbin plot (DataFrame only) + ax : matplotlib axes object, default None + An axes of the current figure. + subplots : bool, default False + Make separate subplots for each column. + sharex : bool, default True if ax is None else False + In case ``subplots=True``, share x axis and set some x axis labels + to invisible; defaults to True if ax is None otherwise False if + an ax is passed in; Be aware, that passing in both an ax and + ``sharex=True`` will alter all x axis labels for all axis in a figure. + sharey : bool, default False + In case ``subplots=True``, share y axis and set some y axis labels to invisible. + layout : tuple, optional + (rows, columns) for the layout of subplots. + figsize : a tuple (width, height) in inches + Size of a figure object. + use_index : bool, default True + Use index as ticks for x axis. + title : str or list + Title to use for the plot. If a string is passed, print the string + at the top of the figure. If a list is passed and `subplots` is + True, print each item in the list above the corresponding subplot. + grid : bool, default None (matlab style default) + Axis grid lines. + legend : bool or {'reverse'} + Place legend on axis subplots. + style : list or dict + The matplotlib line style per column. + logx : bool or 'sym', default False + Use log scaling or symlog scaling on x axis. + .. versionchanged:: 0.25.0 + + logy : bool or 'sym' default False + Use log scaling or symlog scaling on y axis. + .. versionchanged:: 0.25.0 + + loglog : bool or 'sym', default False + Use log scaling or symlog scaling on both x and y axes. + .. versionchanged:: 0.25.0 + + xticks : sequence + Values to use for the xticks. + yticks : sequence + Values to use for the yticks. + xlim : 2-tuple/list + Set the x limits of the current axes. + ylim : 2-tuple/list + Set the y limits of the current axes. + xlabel : label, optional + Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the + x-column name for planar plots. + + .. versionadded:: 1.1.0 + + .. versionchanged:: 1.2.0 + + Now applicable to planar plots (`scatter`, `hexbin`). + + ylabel : label, optional + Name to use for the ylabel on y-axis. Default will show no ylabel, or the + y-column name for planar plots. + + .. versionadded:: 1.1.0 + + .. versionchanged:: 1.2.0 + + Now applicable to planar plots (`scatter`, `hexbin`). + + rot : int, default None + Rotation for ticks (xticks for vertical, yticks for horizontal + plots). + fontsize : int, default None + Font size for xticks and yticks. + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that + name from matplotlib. + colorbar : bool, optional + If True, plot colorbar (only relevant for 'scatter' and 'hexbin' + plots). + position : float + Specify relative alignments for bar plot layout. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center). + table : bool, Series or DataFrame, default False + If True, draw a table using the data in the DataFrame and the data + will be transposed to meet matplotlib's default layout. + If a Series or DataFrame is passed, use passed data to draw a + table. + yerr : DataFrame, Series, array-like, dict and str + See :ref:`Plotting with Error Bars ` for + detail. + xerr : DataFrame, Series, array-like, dict and str + Equivalent to yerr. + stacked : bool, default False in line and bar plots, and True in area plot + If True, create stacked plot. + sort_columns : bool, default False + Sort column names to determine plot ordering. + secondary_y : bool or sequence, default False + Whether to plot on the secondary y-axis if a list/tuple, which + columns to plot on secondary y-axis. + mark_right : bool, default True + When using a secondary_y axis, automatically mark the column + labels with "(right)" in the legend. + include_bool : bool, default is False + If True, boolean values can be plotted. + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 + + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them + If the backend is not the default matplotlib one, the return value + will be the object returned by the backend. + + Notes + ----- + - See matplotlib documentation online for more on this subject + - If `kind` = 'bar' or 'barh', you can specify relative alignments + for bar plot layout by `position` keyword. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center) + """ + + _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box") + _series_kinds = ("pie",) + _dataframe_kinds = ("scatter", "hexbin") + _kind_aliases = {"density": "kde"} + _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds + + def __init__(self, data): + self._parent = data + + @staticmethod + def _get_call_args(backend_name, data, args, kwargs): + """ + This function makes calls to this accessor `__call__` method compatible + with the previous `SeriesPlotMethods.__call__` and + `DataFramePlotMethods.__call__`. Those had slightly different + signatures, since `DataFramePlotMethods` accepted `x` and `y` + parameters. + """ + if isinstance(data, ABCSeries): + arg_def = [ + ("kind", "line"), + ("ax", None), + ("figsize", None), + ("use_index", True), + ("title", None), + ("grid", None), + ("legend", False), + ("style", None), + ("logx", False), + ("logy", False), + ("loglog", False), + ("xticks", None), + ("yticks", None), + ("xlim", None), + ("ylim", None), + ("rot", None), + ("fontsize", None), + ("colormap", None), + ("table", False), + ("yerr", None), + ("xerr", None), + ("label", None), + ("secondary_y", False), + ("xlabel", None), + ("ylabel", None), + ] + elif isinstance(data, ABCDataFrame): + arg_def = [ + ("x", None), + ("y", None), + ("kind", "line"), + ("ax", None), + ("subplots", False), + ("sharex", None), + ("sharey", False), + ("layout", None), + ("figsize", None), + ("use_index", True), + ("title", None), + ("grid", None), + ("legend", True), + ("style", None), + ("logx", False), + ("logy", False), + ("loglog", False), + ("xticks", None), + ("yticks", None), + ("xlim", None), + ("ylim", None), + ("rot", None), + ("fontsize", None), + ("colormap", None), + ("table", False), + ("yerr", None), + ("xerr", None), + ("secondary_y", False), + ("sort_columns", False), + ("xlabel", None), + ("ylabel", None), + ] + else: + raise TypeError( + f"Called plot accessor for type {type(data).__name__}, " + "expected Series or DataFrame" + ) + + if args and isinstance(data, ABCSeries): + positional_args = str(args)[1:-1] + keyword_args = ", ".join( + [f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)] + ) + msg = ( + "`Series.plot()` should not be called with positional " + "arguments, only keyword arguments. The order of " + "positional arguments will change in the future. " + f"Use `Series.plot({keyword_args})` instead of " + f"`Series.plot({positional_args})`." + ) + raise TypeError(msg) + + pos_args = {name: value for (name, _), value in zip(arg_def, args)} + if backend_name == "pandas.plotting._matplotlib": + kwargs = dict(arg_def, **pos_args, **kwargs) + else: + kwargs = dict(pos_args, **kwargs) + + x = kwargs.pop("x", None) + y = kwargs.pop("y", None) + kind = kwargs.pop("kind", "line") + return x, y, kind, kwargs + + def __call__(self, *args, **kwargs): + plot_backend = _get_plot_backend(kwargs.pop("backend", None)) + + x, y, kind, kwargs = self._get_call_args( + plot_backend.__name__, self._parent, args, kwargs + ) + + kind = self._kind_aliases.get(kind, kind) + + # when using another backend, get out of the way + if plot_backend.__name__ != "pandas.plotting._matplotlib": + return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs) + + if kind not in self._all_kinds: + raise ValueError(f"{kind} is not a valid plot kind") + + # The original data structured can be transformed before passed to the + # backend. For example, for DataFrame is common to set the index as the + # `x` parameter, and return a Series with the parameter `y` as values. + data = self._parent.copy() + + if isinstance(data, ABCSeries): + kwargs["reuse_plot"] = True + + if kind in self._dataframe_kinds: + if isinstance(data, ABCDataFrame): + return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs) + else: + raise ValueError(f"plot kind {kind} can only be used for data frames") + elif kind in self._series_kinds: + if isinstance(data, ABCDataFrame): + if y is None and kwargs.get("subplots") is False: + raise ValueError( + f"{kind} requires either y column or 'subplots=True'" + ) + elif y is not None: + if is_integer(y) and not data.columns.holds_integer(): + y = data.columns[y] + # converted to series actually. copy to not modify + data = data[y].copy() + data.index.name = y + elif isinstance(data, ABCDataFrame): + data_cols = data.columns + if x is not None: + if is_integer(x) and not data.columns.holds_integer(): + x = data_cols[x] + elif not isinstance(data[x], ABCSeries): + raise ValueError("x must be a label or position") + data = data.set_index(x) + if y is not None: + # check if we have y as int or list of ints + int_ylist = is_list_like(y) and all(is_integer(c) for c in y) + int_y_arg = is_integer(y) or int_ylist + if int_y_arg and not data.columns.holds_integer(): + y = data_cols[y] + + label_kw = kwargs["label"] if "label" in kwargs else False + for kw in ["xerr", "yerr"]: + if kw in kwargs and ( + isinstance(kwargs[kw], str) or is_integer(kwargs[kw]) + ): + try: + kwargs[kw] = data[kwargs[kw]] + except (IndexError, KeyError, TypeError): + pass + + # don't overwrite + data = data[y].copy() + + if isinstance(data, ABCSeries): + label_name = label_kw or y + data.name = label_name + else: + match = is_list_like(label_kw) and len(label_kw) == len(y) + if label_kw and not match: + raise ValueError( + "label should be list-like and same length as y" + ) + label_name = label_kw or data.columns + data.columns = label_name + + return plot_backend.plot(data, kind=kind, **kwargs) + + __call__.__doc__ = __doc__ + + @Appender( + """ + See Also + -------- + matplotlib.pyplot.plot : Plot y versus x as lines and/or markers. + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> s = pd.Series([1, 3, 2]) + >>> s.plot.line() + + + .. plot:: + :context: close-figs + + The following example shows the populations for some animals + over the years. + + >>> df = pd.DataFrame({ + ... 'pig': [20, 18, 489, 675, 1776], + ... 'horse': [4, 25, 281, 600, 1900] + ... }, index=[1990, 1997, 2003, 2009, 2014]) + >>> lines = df.plot.line() + + .. plot:: + :context: close-figs + + An example with subplots, so an array of axes is returned. + + >>> axes = df.plot.line(subplots=True) + >>> type(axes) + + + .. plot:: + :context: close-figs + + Let's repeat the same example, but specifying colors for + each column (in this case, for each animal). + + >>> axes = df.plot.line( + ... subplots=True, color={"pig": "pink", "horse": "#742802"} + ... ) + + .. plot:: + :context: close-figs + + The following example shows the relationship between both + populations. + + >>> lines = df.plot.line(x='pig', y='horse') + """ + ) + @Substitution(kind="line") + @Appender(_bar_or_line_doc) + def line(self, x=None, y=None, **kwargs): + """ + Plot Series or DataFrame as lines. + + This function is useful to plot lines using DataFrame's values + as coordinates. + """ + return self(kind="line", x=x, y=y, **kwargs) + + @Appender( + """ + See Also + -------- + DataFrame.plot.barh : Horizontal bar plot. + DataFrame.plot : Make plots of a DataFrame. + matplotlib.pyplot.bar : Make a bar plot with matplotlib. + + Examples + -------- + Basic plot. + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]}) + >>> ax = df.plot.bar(x='lab', y='val', rot=0) + + Plot a whole dataframe to a bar plot. Each column is assigned a + distinct color, and each row is nested in a group along the + horizontal axis. + + .. plot:: + :context: close-figs + + >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] + >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] + >>> index = ['snail', 'pig', 'elephant', + ... 'rabbit', 'giraffe', 'coyote', 'horse'] + >>> df = pd.DataFrame({'speed': speed, + ... 'lifespan': lifespan}, index=index) + >>> ax = df.plot.bar(rot=0) + + Plot stacked bar charts for the DataFrame + + .. plot:: + :context: close-figs + + >>> ax = df.plot.bar(stacked=True) + + Instead of nesting, the figure can be split by column with + ``subplots=True``. In this case, a :class:`numpy.ndarray` of + :class:`matplotlib.axes.Axes` are returned. + + .. plot:: + :context: close-figs + + >>> axes = df.plot.bar(rot=0, subplots=True) + >>> axes[1].legend(loc=2) # doctest: +SKIP + + If you don't like the default colours, you can specify how you'd + like each column to be colored. + + .. plot:: + :context: close-figs + + >>> axes = df.plot.bar( + ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"} + ... ) + >>> axes[1].legend(loc=2) # doctest: +SKIP + + Plot a single column. + + .. plot:: + :context: close-figs + + >>> ax = df.plot.bar(y='speed', rot=0) + + Plot only selected categories for the DataFrame. + + .. plot:: + :context: close-figs + + >>> ax = df.plot.bar(x='lifespan', rot=0) + """ + ) + @Substitution(kind="bar") + @Appender(_bar_or_line_doc) + def bar(self, x=None, y=None, **kwargs): + """ + Vertical bar plot. + + A bar plot is a plot that presents categorical data with + rectangular bars with lengths proportional to the values that they + represent. A bar plot shows comparisons among discrete categories. One + axis of the plot shows the specific categories being compared, and the + other axis represents a measured value. + """ + return self(kind="bar", x=x, y=y, **kwargs) + + @Appender( + """ + See Also + -------- + DataFrame.plot.bar: Vertical bar plot. + DataFrame.plot : Make plots of DataFrame using matplotlib. + matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib. + + Examples + -------- + Basic example + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]}) + >>> ax = df.plot.barh(x='lab', y='val') + + Plot a whole DataFrame to a horizontal bar plot + + .. plot:: + :context: close-figs + + >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] + >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] + >>> index = ['snail', 'pig', 'elephant', + ... 'rabbit', 'giraffe', 'coyote', 'horse'] + >>> df = pd.DataFrame({'speed': speed, + ... 'lifespan': lifespan}, index=index) + >>> ax = df.plot.barh() + + Plot stacked barh charts for the DataFrame + + .. plot:: + :context: close-figs + + >>> ax = df.plot.barh(stacked=True) + + We can specify colors for each column + + .. plot:: + :context: close-figs + + >>> ax = df.plot.barh(color={"speed": "red", "lifespan": "green"}) + + Plot a column of the DataFrame to a horizontal bar plot + + .. plot:: + :context: close-figs + + >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] + >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] + >>> index = ['snail', 'pig', 'elephant', + ... 'rabbit', 'giraffe', 'coyote', 'horse'] + >>> df = pd.DataFrame({'speed': speed, + ... 'lifespan': lifespan}, index=index) + >>> ax = df.plot.barh(y='speed') + + Plot DataFrame versus the desired column + + .. plot:: + :context: close-figs + + >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] + >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] + >>> index = ['snail', 'pig', 'elephant', + ... 'rabbit', 'giraffe', 'coyote', 'horse'] + >>> df = pd.DataFrame({'speed': speed, + ... 'lifespan': lifespan}, index=index) + >>> ax = df.plot.barh(x='lifespan') + """ + ) + @Substitution(kind="bar") + @Appender(_bar_or_line_doc) + def barh(self, x=None, y=None, **kwargs): + """ + Make a horizontal bar plot. + + A horizontal bar plot is a plot that presents quantitative data with + rectangular bars with lengths proportional to the values that they + represent. A bar plot shows comparisons among discrete categories. One + axis of the plot shows the specific categories being compared, and the + other axis represents a measured value. + """ + return self(kind="barh", x=x, y=y, **kwargs) + + def box(self, by=None, **kwargs): + r""" + Make a box plot of the DataFrame columns. + + A box plot is a method for graphically depicting groups of numerical + data through their quartiles. + The box extends from the Q1 to Q3 quartile values of the data, + with a line at the median (Q2). The whiskers extend from the edges + of box to show the range of the data. The position of the whiskers + is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the + box. Outlier points are those past the end of the whiskers. + + For further details see Wikipedia's + entry for `boxplot `__. + + A consideration when using this chart is that the box and the whiskers + can overlap, which is very common when plotting small sets of data. + + Parameters + ---------- + by : str or sequence + Column in the DataFrame to group by. + + .. versionchanged:: 1.4.0 + + Previously, `by` is silently ignore and makes no groupings + + **kwargs + Additional keywords are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them + + See Also + -------- + DataFrame.boxplot: Another method to draw a box plot. + Series.plot.box: Draw a box plot from a Series object. + matplotlib.pyplot.boxplot: Draw a box plot in matplotlib. + + Examples + -------- + Draw a box plot from a DataFrame with four columns of randomly + generated data. + + .. plot:: + :context: close-figs + + >>> data = np.random.randn(25, 4) + >>> df = pd.DataFrame(data, columns=list('ABCD')) + >>> ax = df.plot.box() + + You can also generate groupings if you specify the `by` parameter (which + can take a column name, or a list or tuple of column names): + + .. versionchanged:: 1.4.0 + + .. plot:: + :context: close-figs + + >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85] + >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list}) + >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8)) + """ + return self(kind="box", by=by, **kwargs) + + def hist(self, by=None, bins=10, **kwargs): + """ + Draw one histogram of the DataFrame's columns. + + A histogram is a representation of the distribution of data. + This function groups the values of all given Series in the DataFrame + into bins and draws all bins in one :class:`matplotlib.axes.Axes`. + This is useful when the DataFrame's Series are in a similar scale. + + Parameters + ---------- + by : str or sequence, optional + Column in the DataFrame to group by. + + .. versionchanged:: 1.4.0 + + Previously, `by` is silently ignore and makes no groupings + + bins : int, default 10 + Number of histogram bins to be used. + **kwargs + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + class:`matplotlib.AxesSubplot` + Return a histogram plot. + + See Also + -------- + DataFrame.hist : Draw histograms per DataFrame's Series. + Series.hist : Draw a histogram with Series' data. + + Examples + -------- + When we roll a die 6000 times, we expect to get each value around 1000 + times. But when we roll two dice and sum the result, the distribution + is going to be quite different. A histogram illustrates those + distributions. + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame( + ... np.random.randint(1, 7, 6000), + ... columns = ['one']) + >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000) + >>> ax = df.plot.hist(bins=12, alpha=0.5) + + A grouped histogram can be generated by providing the parameter `by` (which + can be a column name, or a list of column names): + + .. plot:: + :context: close-figs + + >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85] + >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list}) + >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8)) + """ + return self(kind="hist", by=by, bins=bins, **kwargs) + + def kde(self, bw_method=None, ind=None, **kwargs): + """ + Generate Kernel Density Estimate plot using Gaussian kernels. + + In statistics, `kernel density estimation`_ (KDE) is a non-parametric + way to estimate the probability density function (PDF) of a random + variable. This function uses Gaussian kernels and includes automatic + bandwidth determination. + + .. _kernel density estimation: + https://en.wikipedia.org/wiki/Kernel_density_estimation + + Parameters + ---------- + bw_method : str, scalar or callable, optional + The method used to calculate the estimator bandwidth. This can be + 'scott', 'silverman', a scalar constant or a callable. + If None (default), 'scott' is used. + See :class:`scipy.stats.gaussian_kde` for more information. + ind : NumPy array or int, optional + Evaluation points for the estimated PDF. If None (default), + 1000 equally spaced points are used. If `ind` is a NumPy array, the + KDE is evaluated at the points passed. If `ind` is an integer, + `ind` number of equally spaced points are used. + **kwargs + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + See Also + -------- + scipy.stats.gaussian_kde : Representation of a kernel-density + estimate using Gaussian kernels. This is the function used + internally to estimate the PDF. + + Examples + -------- + Given a Series of points randomly sampled from an unknown + distribution, estimate its PDF using KDE with automatic + bandwidth determination and plot the results, evaluating them at + 1000 equally spaced points (default): + + .. plot:: + :context: close-figs + + >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5]) + >>> ax = s.plot.kde() + + A scalar bandwidth can be specified. Using a small bandwidth value can + lead to over-fitting, while using a large bandwidth value may result + in under-fitting: + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(bw_method=0.3) + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(bw_method=3) + + Finally, the `ind` parameter determines the evaluation points for the + plot of the estimated PDF: + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5]) + + For DataFrame, it works in the same way: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({ + ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5], + ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6], + ... }) + >>> ax = df.plot.kde() + + A scalar bandwidth can be specified. Using a small bandwidth value can + lead to over-fitting, while using a large bandwidth value may result + in under-fitting: + + .. plot:: + :context: close-figs + + >>> ax = df.plot.kde(bw_method=0.3) + + .. plot:: + :context: close-figs + + >>> ax = df.plot.kde(bw_method=3) + + Finally, the `ind` parameter determines the evaluation points for the + plot of the estimated PDF: + + .. plot:: + :context: close-figs + + >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6]) + """ + return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs) + + density = kde + + def area(self, x=None, y=None, **kwargs): + """ + Draw a stacked area plot. + + An area plot displays quantitative data visually. + This function wraps the matplotlib area function. + + Parameters + ---------- + x : label or position, optional + Coordinates for the X axis. By default uses the index. + y : label or position, optional + Column to plot. By default uses all columns. + stacked : bool, default True + Area plots are stacked by default. Set to False to create a + unstacked plot. + **kwargs + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray + Area plot, or array of area plots if subplots is True. + + See Also + -------- + DataFrame.plot : Make plots of DataFrame using matplotlib / pylab. + + Examples + -------- + Draw an area plot based on basic business metrics: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({ + ... 'sales': [3, 2, 3, 9, 10, 6], + ... 'signups': [5, 5, 6, 12, 14, 13], + ... 'visits': [20, 42, 28, 62, 81, 50], + ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01', + ... freq='M')) + >>> ax = df.plot.area() + + Area plots are stacked by default. To produce an unstacked plot, + pass ``stacked=False``: + + .. plot:: + :context: close-figs + + >>> ax = df.plot.area(stacked=False) + + Draw an area plot for a single column: + + .. plot:: + :context: close-figs + + >>> ax = df.plot.area(y='sales') + + Draw with a different `x`: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({ + ... 'sales': [3, 2, 3], + ... 'visits': [20, 42, 28], + ... 'day': [1, 2, 3], + ... }) + >>> ax = df.plot.area(x='day') + """ + return self(kind="area", x=x, y=y, **kwargs) + + def pie(self, **kwargs): + """ + Generate a pie plot. + + A pie plot is a proportional representation of the numerical data in a + column. This function wraps :meth:`matplotlib.pyplot.pie` for the + specified column. If no column reference is passed and + ``subplots=True`` a pie plot is drawn for each numerical column + independently. + + Parameters + ---------- + y : int or label, optional + Label or position of the column to plot. + If not provided, ``subplots=True`` argument must be passed. + **kwargs + Keyword arguments to pass on to :meth:`DataFrame.plot`. + + Returns + ------- + matplotlib.axes.Axes or np.ndarray of them + A NumPy array is returned when `subplots` is True. + + See Also + -------- + Series.plot.pie : Generate a pie plot for a Series. + DataFrame.plot : Make plots of a DataFrame. + + Examples + -------- + In the example below we have a DataFrame with the information about + planet's mass and radius. We pass the 'mass' column to the + pie function to get a pie plot. + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97], + ... 'radius': [2439.7, 6051.8, 6378.1]}, + ... index=['Mercury', 'Venus', 'Earth']) + >>> plot = df.plot.pie(y='mass', figsize=(5, 5)) + + .. plot:: + :context: close-figs + + >>> plot = df.plot.pie(subplots=True, figsize=(11, 6)) + """ + if ( + isinstance(self._parent, ABCDataFrame) + and kwargs.get("y", None) is None + and not kwargs.get("subplots", False) + ): + raise ValueError("pie requires either y column or 'subplots=True'") + return self(kind="pie", **kwargs) + + def scatter(self, x, y, s=None, c=None, **kwargs): + """ + Create a scatter plot with varying marker point size and color. + + The coordinates of each point are defined by two dataframe columns and + filled circles are used to represent each point. This kind of plot is + useful to see complex correlations between two variables. Points could + be for instance natural 2D coordinates like longitude and latitude in + a map or, in general, any pair of metrics that can be plotted against + each other. + + Parameters + ---------- + x : int or str + The column name or column position to be used as horizontal + coordinates for each point. + y : int or str + The column name or column position to be used as vertical + coordinates for each point. + s : str, scalar or array-like, optional + The size of each point. Possible values are: + + - A string with the name of the column to be used for marker's size. + + - A single scalar so all points have the same size. + + - A sequence of scalars, which will be used for each point's size + recursively. For instance, when passing [2,14] all points size + will be either 2 or 14, alternatively. + + .. versionchanged:: 1.1.0 + + c : str, int or array-like, optional + The color of each point. Possible values are: + + - A single color string referred to by name, RGB or RGBA code, + for instance 'red' or '#a98d19'. + + - A sequence of color strings referred to by name, RGB or RGBA + code, which will be used for each point's color recursively. For + instance ['green','yellow'] all points will be filled in green or + yellow, alternatively. + + - A column name or position whose values will be used to color the + marker points according to a colormap. + + **kwargs + Keyword arguments to pass on to :meth:`DataFrame.plot`. + + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them + + See Also + -------- + matplotlib.pyplot.scatter : Scatter plot using multiple input data + formats. + + Examples + -------- + Let's see how to draw a scatter plot using coordinates from the values + in a DataFrame's columns. + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1], + ... [6.4, 3.2, 1], [5.9, 3.0, 2]], + ... columns=['length', 'width', 'species']) + >>> ax1 = df.plot.scatter(x='length', + ... y='width', + ... c='DarkBlue') + + And now with the color determined by a column as well. + + .. plot:: + :context: close-figs + + >>> ax2 = df.plot.scatter(x='length', + ... y='width', + ... c='species', + ... colormap='viridis') + """ + return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs) + + def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs): + """ + Generate a hexagonal binning plot. + + Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None` + (the default), this is a histogram of the number of occurrences + of the observations at ``(x[i], y[i])``. + + If `C` is specified, specifies values at given coordinates + ``(x[i], y[i])``. These values are accumulated for each hexagonal + bin and then reduced according to `reduce_C_function`, + having as default the NumPy's mean function (:meth:`numpy.mean`). + (If `C` is specified, it must also be a 1-D sequence + of the same length as `x` and `y`, or a column label.) + + Parameters + ---------- + x : int or str + The column label or position for x points. + y : int or str + The column label or position for y points. + C : int or str, optional + The column label or position for the value of `(x, y)` point. + reduce_C_function : callable, default `np.mean` + Function of one argument that reduces all the values in a bin to + a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`). + gridsize : int or tuple of (int, int), default 100 + The number of hexagons in the x-direction. + The corresponding number of hexagons in the y-direction is + chosen in a way that the hexagons are approximately regular. + Alternatively, gridsize can be a tuple with two elements + specifying the number of hexagons in the x-direction and the + y-direction. + **kwargs + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + matplotlib.AxesSubplot + The matplotlib ``Axes`` on which the hexbin is plotted. + + See Also + -------- + DataFrame.plot : Make plots of a DataFrame. + matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib, + the matplotlib function that is used under the hood. + + Examples + -------- + The following examples are generated with random data from + a normal distribution. + + .. plot:: + :context: close-figs + + >>> n = 10000 + >>> df = pd.DataFrame({'x': np.random.randn(n), + ... 'y': np.random.randn(n)}) + >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20) + + The next example uses `C` and `np.sum` as `reduce_C_function`. + Note that `'observations'` values ranges from 1 to 5 but the result + plot shows values up to more than 25. This is because of the + `reduce_C_function`. + + .. plot:: + :context: close-figs + + >>> n = 500 + >>> df = pd.DataFrame({ + ... 'coord_x': np.random.uniform(-3, 3, size=n), + ... 'coord_y': np.random.uniform(30, 50, size=n), + ... 'observations': np.random.randint(1,5, size=n) + ... }) + >>> ax = df.plot.hexbin(x='coord_x', + ... y='coord_y', + ... C='observations', + ... reduce_C_function=np.sum, + ... gridsize=10, + ... cmap="viridis") + """ + if reduce_C_function is not None: + kwargs["reduce_C_function"] = reduce_C_function + if gridsize is not None: + kwargs["gridsize"] = gridsize + + return self(kind="hexbin", x=x, y=y, C=C, **kwargs) + + +_backends: dict[str, types.ModuleType] = {} + + +def _load_backend(backend: str) -> types.ModuleType: + """ + Load a pandas plotting backend. + + Parameters + ---------- + backend : str + The identifier for the backend. Either an entrypoint item registered + with pkg_resources, "matplotlib", or a module name. + + Returns + ------- + types.ModuleType + The imported backend. + """ + from importlib.metadata import entry_points + + if backend == "matplotlib": + # Because matplotlib is an optional dependency and first-party backend, + # we need to attempt an import here to raise an ImportError if needed. + try: + module = importlib.import_module("pandas.plotting._matplotlib") + except ImportError: + raise ImportError( + "matplotlib is required for plotting when the " + 'default backend "matplotlib" is selected.' + ) from None + return module + + found_backend = False + + eps = entry_points() + if "pandas_plotting_backends" in eps: + for entry_point in eps["pandas_plotting_backends"]: + found_backend = entry_point.name == backend + if found_backend: + module = entry_point.load() + break + + if not found_backend: + # Fall back to unregistered, module name approach. + try: + module = importlib.import_module(backend) + found_backend = True + except ImportError: + # We re-raise later on. + pass + + if found_backend: + if hasattr(module, "plot"): + # Validate that the interface is implemented when the option is set, + # rather than at plot time. + return module + + raise ValueError( + f"Could not find plotting backend '{backend}'. Ensure that you've " + f"installed the package providing the '{backend}' entrypoint, or that " + "the package has a top-level `.plot` method." + ) + + +def _get_plot_backend(backend: str | None = None): + """ + Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`). + + The plotting system of pandas uses matplotlib by default, but the idea here + is that it can also work with other third-party backends. This function + returns the module which provides a top-level `.plot` method that will + actually do the plotting. The backend is specified from a string, which + either comes from the keyword argument `backend`, or, if not specified, from + the option `pandas.options.plotting.backend`. All the rest of the code in + this file uses the backend specified there for the plotting. + + The backend is imported lazily, as matplotlib is a soft dependency, and + pandas can be used without it being installed. + + Notes + ----- + Modifies `_backends` with imported backend as a side effect. + """ + backend = backend or get_option("plotting.backend") + + if backend in _backends: + return _backends[backend] + + module = _load_backend(backend) + _backends[backend] = module + return module diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__init__.py b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__init__.py new file mode 100644 index 0000000..75c61da --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__init__.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from pandas.plotting._matplotlib.boxplot import ( + BoxPlot, + boxplot, + boxplot_frame, + boxplot_frame_groupby, +) +from pandas.plotting._matplotlib.converter import ( + deregister, + register, +) +from pandas.plotting._matplotlib.core import ( + AreaPlot, + BarhPlot, + BarPlot, + HexBinPlot, + LinePlot, + PiePlot, + ScatterPlot, +) +from pandas.plotting._matplotlib.hist import ( + HistPlot, + KdePlot, + hist_frame, + hist_series, +) +from pandas.plotting._matplotlib.misc import ( + andrews_curves, + autocorrelation_plot, + bootstrap_plot, + lag_plot, + parallel_coordinates, + radviz, + scatter_matrix, +) +from pandas.plotting._matplotlib.tools import table + +if TYPE_CHECKING: + from pandas.plotting._matplotlib.core import MPLPlot + +PLOT_CLASSES: dict[str, type[MPLPlot]] = { + "line": LinePlot, + "bar": BarPlot, + "barh": BarhPlot, + "box": BoxPlot, + "hist": HistPlot, + "kde": KdePlot, + "area": AreaPlot, + "pie": PiePlot, + "scatter": ScatterPlot, + "hexbin": HexBinPlot, +} + + +def plot(data, kind, **kwargs): + # Importing pyplot at the top of the file (before the converters are + # registered) causes problems in matplotlib 2 (converters seem to not + # work) + import matplotlib.pyplot as plt + + if kwargs.pop("reuse_plot", False): + ax = kwargs.get("ax") + if ax is None and len(plt.get_fignums()) > 0: + with plt.rc_context(): + ax = plt.gca() + kwargs["ax"] = getattr(ax, "left_ax", ax) + plot_obj = PLOT_CLASSES[kind](data, **kwargs) + plot_obj.generate() + plot_obj.draw() + return plot_obj.result + + +__all__ = [ + "plot", + "hist_series", + "hist_frame", + "boxplot", + "boxplot_frame", + "boxplot_frame_groupby", + "table", + "andrews_curves", + "autocorrelation_plot", + "bootstrap_plot", + "lag_plot", + "parallel_coordinates", + "radviz", + "scatter_matrix", + "register", + "deregister", +] diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..48ffe00 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/boxplot.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/boxplot.cpython-38.pyc new file mode 100644 index 0000000..b3b5142 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/boxplot.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/compat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/compat.cpython-38.pyc new file mode 100644 index 0000000..49d30b4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/compat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/converter.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/converter.cpython-38.pyc new file mode 100644 index 0000000..3c2a70d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/converter.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/core.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/core.cpython-38.pyc new file mode 100644 index 0000000..e05616a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/core.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/groupby.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/groupby.cpython-38.pyc new file mode 100644 index 0000000..7acd63f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/groupby.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/hist.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/hist.cpython-38.pyc new file mode 100644 index 0000000..b8dddcf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/hist.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/misc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/misc.cpython-38.pyc new file mode 100644 index 0000000..b286fbc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/misc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/style.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/style.cpython-38.pyc new file mode 100644 index 0000000..9b270cc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/style.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/timeseries.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/timeseries.cpython-38.pyc new file mode 100644 index 0000000..31f10d4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/timeseries.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/tools.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/tools.cpython-38.pyc new file mode 100644 index 0000000..48f63c2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/__pycache__/tools.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/boxplot.py b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/boxplot.py new file mode 100644 index 0000000..a2089de --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/boxplot.py @@ -0,0 +1,506 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + NamedTuple, +) +import warnings + +from matplotlib.artist import setp +import numpy as np + +from pandas.core.dtypes.common import is_dict_like +from pandas.core.dtypes.missing import remove_na_arraylike + +import pandas as pd +import pandas.core.common as com + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.core import ( + LinePlot, + MPLPlot, +) +from pandas.plotting._matplotlib.groupby import create_iter_data_given_by +from pandas.plotting._matplotlib.style import get_standard_colors +from pandas.plotting._matplotlib.tools import ( + create_subplots, + flatten_axes, + maybe_adjust_figure, +) + +if TYPE_CHECKING: + from matplotlib.axes import Axes + from matplotlib.lines import Line2D + + +class BoxPlot(LinePlot): + _kind = "box" + _layout_type = "horizontal" + + _valid_return_types = (None, "axes", "dict", "both") + + class BP(NamedTuple): + # namedtuple to hold results + ax: Axes + lines: dict[str, list[Line2D]] + + def __init__(self, data, return_type="axes", **kwargs): + # Do not call LinePlot.__init__ which may fill nan + if return_type not in self._valid_return_types: + raise ValueError("return_type must be {None, 'axes', 'dict', 'both'}") + + self.return_type = return_type + MPLPlot.__init__(self, data, **kwargs) + + def _args_adjust(self): + if self.subplots: + # Disable label ax sharing. Otherwise, all subplots shows last + # column label + if self.orientation == "vertical": + self.sharex = False + else: + self.sharey = False + + @classmethod + def _plot(cls, ax, y, column_num=None, return_type="axes", **kwds): + if y.ndim == 2: + y = [remove_na_arraylike(v) for v in y] + # Boxplot fails with empty arrays, so need to add a NaN + # if any cols are empty + # GH 8181 + y = [v if v.size > 0 else np.array([np.nan]) for v in y] + else: + y = remove_na_arraylike(y) + bp = ax.boxplot(y, **kwds) + + if return_type == "dict": + return bp, bp + elif return_type == "both": + return cls.BP(ax=ax, lines=bp), bp + else: + return ax, bp + + def _validate_color_args(self): + if "color" in self.kwds: + if self.colormap is not None: + warnings.warn( + "'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'" + ) + self.color = self.kwds.pop("color") + + if isinstance(self.color, dict): + valid_keys = ["boxes", "whiskers", "medians", "caps"] + for key in self.color: + if key not in valid_keys: + raise ValueError( + f"color dict contains invalid key '{key}'. " + f"The key must be either {valid_keys}" + ) + else: + self.color = None + + # get standard colors for default + colors = get_standard_colors(num_colors=3, colormap=self.colormap, color=None) + # use 2 colors by default, for box/whisker and median + # flier colors isn't needed here + # because it can be specified by ``sym`` kw + self._boxes_c = colors[0] + self._whiskers_c = colors[0] + self._medians_c = colors[2] + self._caps_c = colors[0] + + def _get_colors(self, num_colors=None, color_kwds="color"): + pass + + def maybe_color_bp(self, bp): + if isinstance(self.color, dict): + boxes = self.color.get("boxes", self._boxes_c) + whiskers = self.color.get("whiskers", self._whiskers_c) + medians = self.color.get("medians", self._medians_c) + caps = self.color.get("caps", self._caps_c) + else: + # Other types are forwarded to matplotlib + # If None, use default colors + boxes = self.color or self._boxes_c + whiskers = self.color or self._whiskers_c + medians = self.color or self._medians_c + caps = self.color or self._caps_c + + # GH 30346, when users specifying those arguments explicitly, our defaults + # for these four kwargs should be overridden; if not, use Pandas settings + if not self.kwds.get("boxprops"): + setp(bp["boxes"], color=boxes, alpha=1) + if not self.kwds.get("whiskerprops"): + setp(bp["whiskers"], color=whiskers, alpha=1) + if not self.kwds.get("medianprops"): + setp(bp["medians"], color=medians, alpha=1) + if not self.kwds.get("capprops"): + setp(bp["caps"], color=caps, alpha=1) + + def _make_plot(self): + if self.subplots: + self._return_obj = pd.Series(dtype=object) + + # Re-create iterated data if `by` is assigned by users + data = ( + create_iter_data_given_by(self.data, self._kind) + if self.by is not None + else self.data + ) + + for i, (label, y) in enumerate(self._iter_data(data=data)): + ax = self._get_ax(i) + kwds = self.kwds.copy() + + # When by is applied, show title for subplots to know which group it is + # just like df.boxplot, and need to apply T on y to provide right input + if self.by is not None: + y = y.T + ax.set_title(pprint_thing(label)) + + # When `by` is assigned, the ticklabels will become unique grouped + # values, instead of label which is used as subtitle in this case. + ticklabels = [ + pprint_thing(col) for col in self.data.columns.levels[0] + ] + else: + ticklabels = [pprint_thing(label)] + + ret, bp = self._plot( + ax, y, column_num=i, return_type=self.return_type, **kwds + ) + self.maybe_color_bp(bp) + self._return_obj[label] = ret + self._set_ticklabels(ax, ticklabels) + else: + y = self.data.values.T + ax = self._get_ax(0) + kwds = self.kwds.copy() + + ret, bp = self._plot( + ax, y, column_num=0, return_type=self.return_type, **kwds + ) + self.maybe_color_bp(bp) + self._return_obj = ret + + labels = [left for left, _ in self._iter_data()] + labels = [pprint_thing(left) for left in labels] + if not self.use_index: + labels = [pprint_thing(key) for key in range(len(labels))] + self._set_ticklabels(ax, labels) + + def _set_ticklabels(self, ax: Axes, labels): + if self.orientation == "vertical": + ax.set_xticklabels(labels) + else: + ax.set_yticklabels(labels) + + def _make_legend(self): + pass + + def _post_plot_logic(self, ax, data): + pass + + @property + def orientation(self): + if self.kwds.get("vert", True): + return "vertical" + else: + return "horizontal" + + @property + def result(self): + if self.return_type is None: + return super().result + else: + return self._return_obj + + +def _grouped_plot_by_column( + plotf, + data, + columns=None, + by=None, + numeric_only=True, + grid=False, + figsize=None, + ax=None, + layout=None, + return_type=None, + **kwargs, +): + grouped = data.groupby(by) + if columns is None: + if not isinstance(by, (list, tuple)): + by = [by] + columns = data._get_numeric_data().columns.difference(by) + naxes = len(columns) + fig, axes = create_subplots( + naxes=naxes, sharex=True, sharey=True, figsize=figsize, ax=ax, layout=layout + ) + + _axes = flatten_axes(axes) + + ax_values = [] + + for i, col in enumerate(columns): + ax = _axes[i] + gp_col = grouped[col] + keys, values = zip(*gp_col) + re_plotf = plotf(keys, values, ax, **kwargs) + ax.set_title(col) + ax.set_xlabel(pprint_thing(by)) + ax_values.append(re_plotf) + ax.grid(grid) + + result = pd.Series(ax_values, index=columns) + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type is None: + result = axes + + byline = by[0] if len(by) == 1 else by + fig.suptitle(f"Boxplot grouped by {byline}") + maybe_adjust_figure(fig, bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) + + return result + + +def boxplot( + data, + column=None, + by=None, + ax=None, + fontsize=None, + rot=0, + grid=True, + figsize=None, + layout=None, + return_type=None, + **kwds, +): + + import matplotlib.pyplot as plt + + # validate return_type: + if return_type not in BoxPlot._valid_return_types: + raise ValueError("return_type must be {'axes', 'dict', 'both'}") + + if isinstance(data, pd.Series): + data = data.to_frame("x") + column = "x" + + def _get_colors(): + # num_colors=3 is required as method maybe_color_bp takes the colors + # in positions 0 and 2. + # if colors not provided, use same defaults as DataFrame.plot.box + result = get_standard_colors(num_colors=3) + result = np.take(result, [0, 0, 2]) + result = np.append(result, "k") + + colors = kwds.pop("color", None) + if colors: + if is_dict_like(colors): + # replace colors in result array with user-specified colors + # taken from the colors dict parameter + # "boxes" value placed in position 0, "whiskers" in 1, etc. + valid_keys = ["boxes", "whiskers", "medians", "caps"] + key_to_index = dict(zip(valid_keys, range(4))) + for key, value in colors.items(): + if key in valid_keys: + result[key_to_index[key]] = value + else: + raise ValueError( + f"color dict contains invalid key '{key}'. " + f"The key must be either {valid_keys}" + ) + else: + result.fill(colors) + + return result + + def maybe_color_bp(bp, **kwds): + # GH 30346, when users specifying those arguments explicitly, our defaults + # for these four kwargs should be overridden; if not, use Pandas settings + if not kwds.get("boxprops"): + setp(bp["boxes"], color=colors[0], alpha=1) + if not kwds.get("whiskerprops"): + setp(bp["whiskers"], color=colors[1], alpha=1) + if not kwds.get("medianprops"): + setp(bp["medians"], color=colors[2], alpha=1) + if not kwds.get("capprops"): + setp(bp["caps"], color=colors[3], alpha=1) + + def plot_group(keys, values, ax: Axes): + keys = [pprint_thing(x) for x in keys] + values = [np.asarray(remove_na_arraylike(v), dtype=object) for v in values] + bp = ax.boxplot(values, **kwds) + if fontsize is not None: + ax.tick_params(axis="both", labelsize=fontsize) + if kwds.get("vert", 1): + ticks = ax.get_xticks() + if len(ticks) != len(keys): + i, remainder = divmod(len(ticks), len(keys)) + assert remainder == 0, remainder + keys *= i + ax.set_xticklabels(keys, rotation=rot) + else: + ax.set_yticklabels(keys, rotation=rot) + maybe_color_bp(bp, **kwds) + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type == "dict": + return bp + elif return_type == "both": + return BoxPlot.BP(ax=ax, lines=bp) + else: + return ax + + colors = _get_colors() + if column is None: + columns = None + else: + if isinstance(column, (list, tuple)): + columns = column + else: + columns = [column] + + if by is not None: + # Prefer array return type for 2-D plots to match the subplot layout + # https://github.com/pandas-dev/pandas/pull/12216#issuecomment-241175580 + result = _grouped_plot_by_column( + plot_group, + data, + columns=columns, + by=by, + grid=grid, + figsize=figsize, + ax=ax, + layout=layout, + return_type=return_type, + ) + else: + if return_type is None: + return_type = "axes" + if layout is not None: + raise ValueError("The 'layout' keyword is not supported when 'by' is None") + + if ax is None: + rc = {"figure.figsize": figsize} if figsize is not None else {} + with plt.rc_context(rc): + ax = plt.gca() + data = data._get_numeric_data() + naxes = len(data.columns) + if naxes == 0: + raise ValueError( + "boxplot method requires numerical columns, nothing to plot." + ) + if columns is None: + columns = data.columns + else: + data = data[columns] + + result = plot_group(columns, data.values.T, ax) + ax.grid(grid) + + return result + + +def boxplot_frame( + self, + column=None, + by=None, + ax=None, + fontsize=None, + rot=0, + grid=True, + figsize=None, + layout=None, + return_type=None, + **kwds, +): + import matplotlib.pyplot as plt + + ax = boxplot( + self, + column=column, + by=by, + ax=ax, + fontsize=fontsize, + grid=grid, + rot=rot, + figsize=figsize, + layout=layout, + return_type=return_type, + **kwds, + ) + plt.draw_if_interactive() + return ax + + +def boxplot_frame_groupby( + grouped, + subplots=True, + column=None, + fontsize=None, + rot=0, + grid=True, + ax=None, + figsize=None, + layout=None, + sharex=False, + sharey=True, + **kwds, +): + if subplots is True: + naxes = len(grouped) + fig, axes = create_subplots( + naxes=naxes, + squeeze=False, + ax=ax, + sharex=sharex, + sharey=sharey, + figsize=figsize, + layout=layout, + ) + axes = flatten_axes(axes) + + ret = pd.Series(dtype=object) + + for (key, group), ax in zip(grouped, axes): + d = group.boxplot( + ax=ax, column=column, fontsize=fontsize, rot=rot, grid=grid, **kwds + ) + ax.set_title(pprint_thing(key)) + ret.loc[key] = d + maybe_adjust_figure(fig, bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) + else: + keys, frames = zip(*grouped) + if grouped.axis == 0: + df = pd.concat(frames, keys=keys, axis=1) + else: + if len(frames) > 1: + df = frames[0].join(frames[1::]) + else: + df = frames[0] + + # GH 16748, DataFrameGroupby fails when subplots=False and `column` argument + # is assigned, and in this case, since `df` here becomes MI after groupby, + # so we need to couple the keys (grouped values) and column (original df + # column) together to search for subset to plot + if column is not None: + column = com.convert_to_list_like(column) + multi_key = pd.MultiIndex.from_product([keys, column]) + column = list(multi_key.values) + ret = df.boxplot( + column=column, + fontsize=fontsize, + rot=rot, + grid=grid, + ax=ax, + figsize=figsize, + layout=layout, + **kwds, + ) + return ret diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/compat.py b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/compat.py new file mode 100644 index 0000000..5569b1f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/compat.py @@ -0,0 +1,27 @@ +# being a bit too dynamic +import operator + +from pandas.util.version import Version + + +def _mpl_version(version, op): + def inner(): + try: + import matplotlib as mpl + except ImportError: + return False + return ( + op(Version(mpl.__version__), Version(version)) + and str(mpl.__version__)[0] != "0" + ) + + return inner + + +mpl_ge_2_2_3 = _mpl_version("2.2.3", operator.ge) +mpl_ge_3_0_0 = _mpl_version("3.0.0", operator.ge) +mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge) +mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge) +mpl_ge_3_3_0 = _mpl_version("3.3.0", operator.ge) +mpl_ge_3_4_0 = _mpl_version("3.4.0", operator.ge) +mpl_ge_3_5_0 = _mpl_version("3.5.0", operator.ge) diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/converter.py b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/converter.py new file mode 100644 index 0000000..90d3f8d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/converter.py @@ -0,0 +1,1104 @@ +from __future__ import annotations + +import contextlib +import datetime as pydt +from datetime import ( + datetime, + timedelta, + tzinfo, +) +import functools +from typing import ( + Any, + cast, +) + +from dateutil.relativedelta import relativedelta +import matplotlib.dates as dates +from matplotlib.ticker import ( + AutoLocator, + Formatter, + Locator, +) +from matplotlib.transforms import nonsingular +import matplotlib.units as units +import numpy as np + +from pandas._libs import lib +from pandas._libs.tslibs import ( + Timestamp, + to_offset, +) +from pandas._libs.tslibs.dtypes import FreqGroup +from pandas._libs.tslibs.offsets import BaseOffset +from pandas._typing import F + +from pandas.core.dtypes.common import ( + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_nested_list_like, +) + +from pandas import ( + Index, + Series, + get_option, +) +import pandas.core.common as com +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import ( + Period, + PeriodIndex, + period_range, +) +import pandas.core.tools.datetimes as tools + +# constants +HOURS_PER_DAY = 24.0 +MIN_PER_HOUR = 60.0 +SEC_PER_MIN = 60.0 + +SEC_PER_HOUR = SEC_PER_MIN * MIN_PER_HOUR +SEC_PER_DAY = SEC_PER_HOUR * HOURS_PER_DAY + +MUSEC_PER_DAY = 10 ** 6 * SEC_PER_DAY + +_mpl_units = {} # Cache for units overwritten by us + + +def get_pairs(): + pairs = [ + (Timestamp, DatetimeConverter), + (Period, PeriodConverter), + (pydt.datetime, DatetimeConverter), + (pydt.date, DatetimeConverter), + (pydt.time, TimeConverter), + (np.datetime64, DatetimeConverter), + ] + return pairs + + +def register_pandas_matplotlib_converters(func: F) -> F: + """ + Decorator applying pandas_converters. + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + with pandas_converters(): + return func(*args, **kwargs) + + return cast(F, wrapper) + + +@contextlib.contextmanager +def pandas_converters(): + """ + Context manager registering pandas' converters for a plot. + + See Also + -------- + register_pandas_matplotlib_converters : Decorator that applies this. + """ + value = get_option("plotting.matplotlib.register_converters") + + if value: + # register for True or "auto" + register() + try: + yield + finally: + if value == "auto": + # only deregister for "auto" + deregister() + + +def register(): + pairs = get_pairs() + for type_, cls in pairs: + # Cache previous converter if present + if type_ in units.registry and not isinstance(units.registry[type_], cls): + previous = units.registry[type_] + _mpl_units[type_] = previous + # Replace with pandas converter + units.registry[type_] = cls() + + +def deregister(): + # Renamed in pandas.plotting.__init__ + for type_, cls in get_pairs(): + # We use type to catch our classes directly, no inheritance + if type(units.registry.get(type_)) is cls: + units.registry.pop(type_) + + # restore the old keys + for unit, formatter in _mpl_units.items(): + if type(formatter) not in {DatetimeConverter, PeriodConverter, TimeConverter}: + # make it idempotent by excluding ours. + units.registry[unit] = formatter + + +def _to_ordinalf(tm: pydt.time) -> float: + tot_sec = tm.hour * 3600 + tm.minute * 60 + tm.second + tm.microsecond / 10 ** 6 + return tot_sec + + +def time2num(d): + if isinstance(d, str): + parsed = tools.to_datetime(d) + if not isinstance(parsed, datetime): + raise ValueError(f"Could not parse time {d}") + return _to_ordinalf(parsed.time()) + if isinstance(d, pydt.time): + return _to_ordinalf(d) + return d + + +class TimeConverter(units.ConversionInterface): + @staticmethod + def convert(value, unit, axis): + valid_types = (str, pydt.time) + if isinstance(value, valid_types) or is_integer(value) or is_float(value): + return time2num(value) + if isinstance(value, Index): + return value.map(time2num) + if isinstance(value, (list, tuple, np.ndarray, Index)): + return [time2num(x) for x in value] + return value + + @staticmethod + def axisinfo(unit, axis) -> units.AxisInfo | None: + if unit != "time": + return None + + majloc = AutoLocator() + majfmt = TimeFormatter(majloc) + return units.AxisInfo(majloc=majloc, majfmt=majfmt, label="time") + + @staticmethod + def default_units(x, axis) -> str: + return "time" + + +# time formatter +class TimeFormatter(Formatter): + def __init__(self, locs): + self.locs = locs + + def __call__(self, x, pos=0) -> str: + """ + Return the time of day as a formatted string. + + Parameters + ---------- + x : float + The time of day specified as seconds since 00:00 (midnight), + with up to microsecond precision. + pos + Unused + + Returns + ------- + str + A string in HH:MM:SS.mmmuuu format. Microseconds, + milliseconds and seconds are only displayed if non-zero. + """ + fmt = "%H:%M:%S.%f" + s = int(x) + msus = round((x - s) * 10 ** 6) + ms = msus // 1000 + us = msus % 1000 + m, s = divmod(s, 60) + h, m = divmod(m, 60) + _, h = divmod(h, 24) + if us != 0: + return pydt.time(h, m, s, msus).strftime(fmt) + elif ms != 0: + return pydt.time(h, m, s, msus).strftime(fmt)[:-3] + elif s != 0: + return pydt.time(h, m, s).strftime("%H:%M:%S") + + return pydt.time(h, m).strftime("%H:%M") + + +# Period Conversion + + +class PeriodConverter(dates.DateConverter): + @staticmethod + def convert(values, units, axis): + if is_nested_list_like(values): + values = [PeriodConverter._convert_1d(v, units, axis) for v in values] + else: + values = PeriodConverter._convert_1d(values, units, axis) + return values + + @staticmethod + def _convert_1d(values, units, axis): + if not hasattr(axis, "freq"): + raise TypeError("Axis must have `freq` set to convert to Periods") + valid_types = (str, datetime, Period, pydt.date, pydt.time, np.datetime64) + if isinstance(values, valid_types) or is_integer(values) or is_float(values): + return get_datevalue(values, axis.freq) + elif isinstance(values, PeriodIndex): + return values.asfreq(axis.freq).asi8 + elif isinstance(values, Index): + return values.map(lambda x: get_datevalue(x, axis.freq)) + elif lib.infer_dtype(values, skipna=False) == "period": + # https://github.com/pandas-dev/pandas/issues/24304 + # convert ndarray[period] -> PeriodIndex + return PeriodIndex(values, freq=axis.freq).asi8 + elif isinstance(values, (list, tuple, np.ndarray, Index)): + return [get_datevalue(x, axis.freq) for x in values] + return values + + +def get_datevalue(date, freq): + if isinstance(date, Period): + return date.asfreq(freq).ordinal + elif isinstance(date, (str, datetime, pydt.date, pydt.time, np.datetime64)): + return Period(date, freq).ordinal + elif ( + is_integer(date) + or is_float(date) + or (isinstance(date, (np.ndarray, Index)) and (date.size == 1)) + ): + return date + elif date is None: + return None + raise ValueError(f"Unrecognizable date '{date}'") + + +# Datetime Conversion +class DatetimeConverter(dates.DateConverter): + @staticmethod + def convert(values, unit, axis): + # values might be a 1-d array, or a list-like of arrays. + if is_nested_list_like(values): + values = [DatetimeConverter._convert_1d(v, unit, axis) for v in values] + else: + values = DatetimeConverter._convert_1d(values, unit, axis) + return values + + @staticmethod + def _convert_1d(values, unit, axis): + def try_parse(values): + try: + return dates.date2num(tools.to_datetime(values)) + except Exception: + return values + + if isinstance(values, (datetime, pydt.date, np.datetime64, pydt.time)): + return dates.date2num(values) + elif is_integer(values) or is_float(values): + return values + elif isinstance(values, str): + return try_parse(values) + elif isinstance(values, (list, tuple, np.ndarray, Index, Series)): + if isinstance(values, Series): + # https://github.com/matplotlib/matplotlib/issues/11391 + # Series was skipped. Convert to DatetimeIndex to get asi8 + values = Index(values) + if isinstance(values, Index): + values = values.values + if not isinstance(values, np.ndarray): + values = com.asarray_tuplesafe(values) + + if is_integer_dtype(values) or is_float_dtype(values): + return values + + try: + values = tools.to_datetime(values) + except Exception: + pass + + values = dates.date2num(values) + + return values + + @staticmethod + def axisinfo(unit: tzinfo | None, axis) -> units.AxisInfo: + """ + Return the :class:`~matplotlib.units.AxisInfo` for *unit*. + + *unit* is a tzinfo instance or None. + The *axis* argument is required but not used. + """ + tz = unit + + majloc = PandasAutoDateLocator(tz=tz) + majfmt = PandasAutoDateFormatter(majloc, tz=tz) + datemin = pydt.date(2000, 1, 1) + datemax = pydt.date(2010, 1, 1) + + return units.AxisInfo( + majloc=majloc, majfmt=majfmt, label="", default_limits=(datemin, datemax) + ) + + +class PandasAutoDateFormatter(dates.AutoDateFormatter): + def __init__(self, locator, tz=None, defaultfmt="%Y-%m-%d"): + dates.AutoDateFormatter.__init__(self, locator, tz, defaultfmt) + + +class PandasAutoDateLocator(dates.AutoDateLocator): + def get_locator(self, dmin, dmax): + """Pick the best locator based on a distance.""" + delta = relativedelta(dmax, dmin) + + num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days + num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds + tot_sec = num_days * 86400.0 + num_sec + + if abs(tot_sec) < self.minticks: + self._freq = -1 + locator = MilliSecondLocator(self.tz) + locator.set_axis(self.axis) + + locator.axis.set_view_interval(*self.axis.get_view_interval()) + locator.axis.set_data_interval(*self.axis.get_data_interval()) + return locator + + return dates.AutoDateLocator.get_locator(self, dmin, dmax) + + def _get_unit(self): + return MilliSecondLocator.get_unit_generic(self._freq) + + +class MilliSecondLocator(dates.DateLocator): + + UNIT = 1.0 / (24 * 3600 * 1000) + + def __init__(self, tz): + dates.DateLocator.__init__(self, tz) + self._interval = 1.0 + + def _get_unit(self): + return self.get_unit_generic(-1) + + @staticmethod + def get_unit_generic(freq): + unit = dates.RRuleLocator.get_unit_generic(freq) + if unit < 0: + return MilliSecondLocator.UNIT + return unit + + def __call__(self): + # if no data have been set, this will tank with a ValueError + try: + dmin, dmax = self.viewlim_to_dt() + except ValueError: + return [] + + # We need to cap at the endpoints of valid datetime + nmax, nmin = dates.date2num((dmax, dmin)) + + num = (nmax - nmin) * 86400 * 1000 + max_millis_ticks = 6 + for interval in [1, 10, 50, 100, 200, 500]: + if num <= interval * (max_millis_ticks - 1): + self._interval = interval + break + else: + # We went through the whole loop without breaking, default to 1 + self._interval = 1000.0 + + estimate = (nmax - nmin) / (self._get_unit() * self._get_interval()) + + if estimate > self.MAXTICKS * 2: + raise RuntimeError( + "MillisecondLocator estimated to generate " + f"{estimate:d} ticks from {dmin} to {dmax}: exceeds Locator.MAXTICKS" + f"* 2 ({self.MAXTICKS * 2:d}) " + ) + + interval = self._get_interval() + freq = f"{interval}L" + tz = self.tz.tzname(None) + st = dmin.replace(tzinfo=None) + ed = dmin.replace(tzinfo=None) + all_dates = date_range(start=st, end=ed, freq=freq, tz=tz).astype(object) + + try: + if len(all_dates) > 0: + locs = self.raise_if_exceeds(dates.date2num(all_dates)) + return locs + except Exception: # pragma: no cover + pass + + lims = dates.date2num([dmin, dmax]) + return lims + + def _get_interval(self): + return self._interval + + def autoscale(self): + """ + Set the view limits to include the data range. + """ + # We need to cap at the endpoints of valid datetime + dmin, dmax = self.datalim_to_dt() + + vmin = dates.date2num(dmin) + vmax = dates.date2num(dmax) + + return self.nonsingular(vmin, vmax) + + +def _from_ordinal(x, tz: tzinfo | None = None) -> datetime: + ix = int(x) + dt = datetime.fromordinal(ix) + remainder = float(x) - ix + hour, remainder = divmod(24 * remainder, 1) + minute, remainder = divmod(60 * remainder, 1) + second, remainder = divmod(60 * remainder, 1) + microsecond = int(1_000_000 * remainder) + if microsecond < 10: + microsecond = 0 # compensate for rounding errors + dt = datetime( + dt.year, dt.month, dt.day, int(hour), int(minute), int(second), microsecond + ) + if tz is not None: + dt = dt.astimezone(tz) + + if microsecond > 999990: # compensate for rounding errors + dt += timedelta(microseconds=1_000_000 - microsecond) + + return dt + + +# Fixed frequency dynamic tick locators and formatters + +# ------------------------------------------------------------------------- +# --- Locators --- +# ------------------------------------------------------------------------- + + +def _get_default_annual_spacing(nyears) -> tuple[int, int]: + """ + Returns a default spacing between consecutive ticks for annual data. + """ + if nyears < 11: + (min_spacing, maj_spacing) = (1, 1) + elif nyears < 20: + (min_spacing, maj_spacing) = (1, 2) + elif nyears < 50: + (min_spacing, maj_spacing) = (1, 5) + elif nyears < 100: + (min_spacing, maj_spacing) = (5, 10) + elif nyears < 200: + (min_spacing, maj_spacing) = (5, 25) + elif nyears < 600: + (min_spacing, maj_spacing) = (10, 50) + else: + factor = nyears // 1000 + 1 + (min_spacing, maj_spacing) = (factor * 20, factor * 100) + return (min_spacing, maj_spacing) + + +def period_break(dates: PeriodIndex, period: str) -> np.ndarray: + """ + Returns the indices where the given period changes. + + Parameters + ---------- + dates : PeriodIndex + Array of intervals to monitor. + period : str + Name of the period to monitor. + """ + current = getattr(dates, period) + previous = getattr(dates - 1 * dates.freq, period) + return np.nonzero(current - previous)[0] + + +def has_level_label(label_flags: np.ndarray, vmin: float) -> bool: + """ + Returns true if the ``label_flags`` indicate there is at least one label + for this level. + + if the minimum view limit is not an exact integer, then the first tick + label won't be shown, so we must adjust for that. + """ + if label_flags.size == 0 or ( + label_flags.size == 1 and label_flags[0] == 0 and vmin % 1 > 0.0 + ): + return False + else: + return True + + +def _daily_finder(vmin, vmax, freq: BaseOffset): + dtype_code = freq._period_dtype_code + + periodsperday = -1 + + if dtype_code >= FreqGroup.FR_HR.value: + if dtype_code == FreqGroup.FR_NS.value: + periodsperday = 24 * 60 * 60 * 1000000000 + elif dtype_code == FreqGroup.FR_US.value: + periodsperday = 24 * 60 * 60 * 1000000 + elif dtype_code == FreqGroup.FR_MS.value: + periodsperday = 24 * 60 * 60 * 1000 + elif dtype_code == FreqGroup.FR_SEC.value: + periodsperday = 24 * 60 * 60 + elif dtype_code == FreqGroup.FR_MIN.value: + periodsperday = 24 * 60 + elif dtype_code == FreqGroup.FR_HR.value: + periodsperday = 24 + else: # pragma: no cover + raise ValueError(f"unexpected frequency: {dtype_code}") + periodsperyear = 365 * periodsperday + periodspermonth = 28 * periodsperday + + elif dtype_code == FreqGroup.FR_BUS.value: + periodsperyear = 261 + periodspermonth = 19 + elif dtype_code == FreqGroup.FR_DAY.value: + periodsperyear = 365 + periodspermonth = 28 + elif FreqGroup.get_freq_group(dtype_code) == FreqGroup.FR_WK: + periodsperyear = 52 + periodspermonth = 3 + else: # pragma: no cover + raise ValueError("unexpected frequency") + + # save this for later usage + vmin_orig = vmin + + (vmin, vmax) = ( + Period(ordinal=int(vmin), freq=freq), + Period(ordinal=int(vmax), freq=freq), + ) + span = vmax.ordinal - vmin.ordinal + 1 + dates_ = period_range(start=vmin, end=vmax, freq=freq) + # Initialize the output + info = np.zeros( + span, dtype=[("val", np.int64), ("maj", bool), ("min", bool), ("fmt", "|S20")] + ) + info["val"][:] = dates_.asi8 + info["fmt"][:] = "" + info["maj"][[0, -1]] = True + # .. and set some shortcuts + info_maj = info["maj"] + info_min = info["min"] + info_fmt = info["fmt"] + + def first_label(label_flags): + if (label_flags[0] == 0) and (label_flags.size > 1) and ((vmin_orig % 1) > 0.0): + return label_flags[1] + else: + return label_flags[0] + + # Case 1. Less than a month + if span <= periodspermonth: + day_start = period_break(dates_, "day") + month_start = period_break(dates_, "month") + + def _hour_finder(label_interval, force_year_start): + _hour = dates_.hour + _prev_hour = (dates_ - 1 * dates_.freq).hour + hour_start = (_hour - _prev_hour) != 0 + info_maj[day_start] = True + info_min[hour_start & (_hour % label_interval == 0)] = True + year_start = period_break(dates_, "year") + info_fmt[hour_start & (_hour % label_interval == 0)] = "%H:%M" + info_fmt[day_start] = "%H:%M\n%d-%b" + info_fmt[year_start] = "%H:%M\n%d-%b\n%Y" + if force_year_start and not has_level_label(year_start, vmin_orig): + info_fmt[first_label(day_start)] = "%H:%M\n%d-%b\n%Y" + + def _minute_finder(label_interval): + hour_start = period_break(dates_, "hour") + _minute = dates_.minute + _prev_minute = (dates_ - 1 * dates_.freq).minute + minute_start = (_minute - _prev_minute) != 0 + info_maj[hour_start] = True + info_min[minute_start & (_minute % label_interval == 0)] = True + year_start = period_break(dates_, "year") + info_fmt = info["fmt"] + info_fmt[minute_start & (_minute % label_interval == 0)] = "%H:%M" + info_fmt[day_start] = "%H:%M\n%d-%b" + info_fmt[year_start] = "%H:%M\n%d-%b\n%Y" + + def _second_finder(label_interval): + minute_start = period_break(dates_, "minute") + _second = dates_.second + _prev_second = (dates_ - 1 * dates_.freq).second + second_start = (_second - _prev_second) != 0 + info["maj"][minute_start] = True + info["min"][second_start & (_second % label_interval == 0)] = True + year_start = period_break(dates_, "year") + info_fmt = info["fmt"] + info_fmt[second_start & (_second % label_interval == 0)] = "%H:%M:%S" + info_fmt[day_start] = "%H:%M:%S\n%d-%b" + info_fmt[year_start] = "%H:%M:%S\n%d-%b\n%Y" + + if span < periodsperday / 12000: + _second_finder(1) + elif span < periodsperday / 6000: + _second_finder(2) + elif span < periodsperday / 2400: + _second_finder(5) + elif span < periodsperday / 1200: + _second_finder(10) + elif span < periodsperday / 800: + _second_finder(15) + elif span < periodsperday / 400: + _second_finder(30) + elif span < periodsperday / 150: + _minute_finder(1) + elif span < periodsperday / 70: + _minute_finder(2) + elif span < periodsperday / 24: + _minute_finder(5) + elif span < periodsperday / 12: + _minute_finder(15) + elif span < periodsperday / 6: + _minute_finder(30) + elif span < periodsperday / 2.5: + _hour_finder(1, False) + elif span < periodsperday / 1.5: + _hour_finder(2, False) + elif span < periodsperday * 1.25: + _hour_finder(3, False) + elif span < periodsperday * 2.5: + _hour_finder(6, True) + elif span < periodsperday * 4: + _hour_finder(12, True) + else: + info_maj[month_start] = True + info_min[day_start] = True + year_start = period_break(dates_, "year") + info_fmt = info["fmt"] + info_fmt[day_start] = "%d" + info_fmt[month_start] = "%d\n%b" + info_fmt[year_start] = "%d\n%b\n%Y" + if not has_level_label(year_start, vmin_orig): + if not has_level_label(month_start, vmin_orig): + info_fmt[first_label(day_start)] = "%d\n%b\n%Y" + else: + info_fmt[first_label(month_start)] = "%d\n%b\n%Y" + + # Case 2. Less than three months + elif span <= periodsperyear // 4: + month_start = period_break(dates_, "month") + info_maj[month_start] = True + if dtype_code < FreqGroup.FR_HR.value: + info["min"] = True + else: + day_start = period_break(dates_, "day") + info["min"][day_start] = True + week_start = period_break(dates_, "week") + year_start = period_break(dates_, "year") + info_fmt[week_start] = "%d" + info_fmt[month_start] = "\n\n%b" + info_fmt[year_start] = "\n\n%b\n%Y" + if not has_level_label(year_start, vmin_orig): + if not has_level_label(month_start, vmin_orig): + info_fmt[first_label(week_start)] = "\n\n%b\n%Y" + else: + info_fmt[first_label(month_start)] = "\n\n%b\n%Y" + # Case 3. Less than 14 months ............... + elif span <= 1.15 * periodsperyear: + year_start = period_break(dates_, "year") + month_start = period_break(dates_, "month") + week_start = period_break(dates_, "week") + info_maj[month_start] = True + info_min[week_start] = True + info_min[year_start] = False + info_min[month_start] = False + info_fmt[month_start] = "%b" + info_fmt[year_start] = "%b\n%Y" + if not has_level_label(year_start, vmin_orig): + info_fmt[first_label(month_start)] = "%b\n%Y" + # Case 4. Less than 2.5 years ............... + elif span <= 2.5 * periodsperyear: + year_start = period_break(dates_, "year") + quarter_start = period_break(dates_, "quarter") + month_start = period_break(dates_, "month") + info_maj[quarter_start] = True + info_min[month_start] = True + info_fmt[quarter_start] = "%b" + info_fmt[year_start] = "%b\n%Y" + # Case 4. Less than 4 years ................. + elif span <= 4 * periodsperyear: + year_start = period_break(dates_, "year") + month_start = period_break(dates_, "month") + info_maj[year_start] = True + info_min[month_start] = True + info_min[year_start] = False + + month_break = dates_[month_start].month + jan_or_jul = month_start[(month_break == 1) | (month_break == 7)] + info_fmt[jan_or_jul] = "%b" + info_fmt[year_start] = "%b\n%Y" + # Case 5. Less than 11 years ................ + elif span <= 11 * periodsperyear: + year_start = period_break(dates_, "year") + quarter_start = period_break(dates_, "quarter") + info_maj[year_start] = True + info_min[quarter_start] = True + info_min[year_start] = False + info_fmt[year_start] = "%Y" + # Case 6. More than 12 years ................ + else: + year_start = period_break(dates_, "year") + year_break = dates_[year_start].year + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + major_idx = year_start[(year_break % maj_anndef == 0)] + info_maj[major_idx] = True + minor_idx = year_start[(year_break % min_anndef == 0)] + info_min[minor_idx] = True + info_fmt[major_idx] = "%Y" + + return info + + +def _monthly_finder(vmin, vmax, freq): + periodsperyear = 12 + + vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 + + # Initialize the output + info = np.zeros( + span, dtype=[("val", int), ("maj", bool), ("min", bool), ("fmt", "|S8")] + ) + info["val"] = np.arange(vmin, vmax + 1) + dates_ = info["val"] + info["fmt"] = "" + year_start = (dates_ % 12 == 0).nonzero()[0] + info_maj = info["maj"] + info_fmt = info["fmt"] + + if span <= 1.15 * periodsperyear: + info_maj[year_start] = True + info["min"] = True + + info_fmt[:] = "%b" + info_fmt[year_start] = "%b\n%Y" + + if not has_level_label(year_start, vmin_orig): + if dates_.size > 1: + idx = 1 + else: + idx = 0 + info_fmt[idx] = "%b\n%Y" + + elif span <= 2.5 * periodsperyear: + quarter_start = (dates_ % 3 == 0).nonzero() + info_maj[year_start] = True + # TODO: Check the following : is it really info['fmt'] ? + info["fmt"][quarter_start] = True + info["min"] = True + + info_fmt[quarter_start] = "%b" + info_fmt[year_start] = "%b\n%Y" + + elif span <= 4 * periodsperyear: + info_maj[year_start] = True + info["min"] = True + + jan_or_jul = (dates_ % 12 == 0) | (dates_ % 12 == 6) + info_fmt[jan_or_jul] = "%b" + info_fmt[year_start] = "%b\n%Y" + + elif span <= 11 * periodsperyear: + quarter_start = (dates_ % 3 == 0).nonzero() + info_maj[year_start] = True + info["min"][quarter_start] = True + + info_fmt[year_start] = "%Y" + + else: + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + years = dates_[year_start] // 12 + 1 + major_idx = year_start[(years % maj_anndef == 0)] + info_maj[major_idx] = True + info["min"][year_start[(years % min_anndef == 0)]] = True + + info_fmt[major_idx] = "%Y" + + return info + + +def _quarterly_finder(vmin, vmax, freq): + periodsperyear = 4 + vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 + + info = np.zeros( + span, dtype=[("val", int), ("maj", bool), ("min", bool), ("fmt", "|S8")] + ) + info["val"] = np.arange(vmin, vmax + 1) + info["fmt"] = "" + dates_ = info["val"] + info_maj = info["maj"] + info_fmt = info["fmt"] + year_start = (dates_ % 4 == 0).nonzero()[0] + + if span <= 3.5 * periodsperyear: + info_maj[year_start] = True + info["min"] = True + + info_fmt[:] = "Q%q" + info_fmt[year_start] = "Q%q\n%F" + if not has_level_label(year_start, vmin_orig): + if dates_.size > 1: + idx = 1 + else: + idx = 0 + info_fmt[idx] = "Q%q\n%F" + + elif span <= 11 * periodsperyear: + info_maj[year_start] = True + info["min"] = True + info_fmt[year_start] = "%F" + + else: + years = dates_[year_start] // 4 + 1 + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + major_idx = year_start[(years % maj_anndef == 0)] + info_maj[major_idx] = True + info["min"][year_start[(years % min_anndef == 0)]] = True + info_fmt[major_idx] = "%F" + + return info + + +def _annual_finder(vmin, vmax, freq): + (vmin, vmax) = (int(vmin), int(vmax + 1)) + span = vmax - vmin + 1 + + info = np.zeros( + span, dtype=[("val", int), ("maj", bool), ("min", bool), ("fmt", "|S8")] + ) + info["val"] = np.arange(vmin, vmax + 1) + info["fmt"] = "" + dates_ = info["val"] + + (min_anndef, maj_anndef) = _get_default_annual_spacing(span) + major_idx = dates_ % maj_anndef == 0 + info["maj"][major_idx] = True + info["min"][(dates_ % min_anndef == 0)] = True + info["fmt"][major_idx] = "%Y" + + return info + + +def get_finder(freq: BaseOffset): + dtype_code = freq._period_dtype_code + fgroup = (dtype_code // 1000) * 1000 + fgroup = FreqGroup(fgroup) + + if fgroup == FreqGroup.FR_ANN: + return _annual_finder + elif fgroup == FreqGroup.FR_QTR: + return _quarterly_finder + elif dtype_code == FreqGroup.FR_MTH.value: + return _monthly_finder + elif (dtype_code >= FreqGroup.FR_BUS.value) or fgroup == FreqGroup.FR_WK: + return _daily_finder + else: # pragma: no cover + raise NotImplementedError(f"Unsupported frequency: {dtype_code}") + + +class TimeSeries_DateLocator(Locator): + """ + Locates the ticks along an axis controlled by a :class:`Series`. + + Parameters + ---------- + freq : {var} + Valid frequency specifier. + minor_locator : {False, True}, optional + Whether the locator is for minor ticks (True) or not. + dynamic_mode : {True, False}, optional + Whether the locator should work in dynamic mode. + base : {int}, optional + quarter : {int}, optional + month : {int}, optional + day : {int}, optional + """ + + def __init__( + self, + freq, + minor_locator=False, + dynamic_mode=True, + base=1, + quarter=1, + month=1, + day=1, + plot_obj=None, + ): + freq = to_offset(freq) + self.freq = freq + self.base = base + (self.quarter, self.month, self.day) = (quarter, month, day) + self.isminor = minor_locator + self.isdynamic = dynamic_mode + self.offset = 0 + self.plot_obj = plot_obj + self.finder = get_finder(freq) + + def _get_default_locs(self, vmin, vmax): + """Returns the default locations of ticks.""" + if self.plot_obj.date_axis_info is None: + self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) + + locator = self.plot_obj.date_axis_info + + if self.isminor: + return np.compress(locator["min"], locator["val"]) + return np.compress(locator["maj"], locator["val"]) + + def __call__(self): + """Return the locations of the ticks.""" + # axis calls Locator.set_axis inside set_m_formatter + + vi = tuple(self.axis.get_view_interval()) + if vi != self.plot_obj.view_interval: + self.plot_obj.date_axis_info = None + self.plot_obj.view_interval = vi + vmin, vmax = vi + if vmax < vmin: + vmin, vmax = vmax, vmin + if self.isdynamic: + locs = self._get_default_locs(vmin, vmax) + else: # pragma: no cover + base = self.base + (d, m) = divmod(vmin, base) + vmin = (d + 1) * base + locs = list(range(vmin, vmax + 1, base)) + return locs + + def autoscale(self): + """ + Sets the view limits to the nearest multiples of base that contain the + data. + """ + # requires matplotlib >= 0.98.0 + (vmin, vmax) = self.axis.get_data_interval() + + locs = self._get_default_locs(vmin, vmax) + (vmin, vmax) = locs[[0, -1]] + if vmin == vmax: + vmin -= 1 + vmax += 1 + return nonsingular(vmin, vmax) + + +# ------------------------------------------------------------------------- +# --- Formatter --- +# ------------------------------------------------------------------------- + + +class TimeSeries_DateFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`PeriodIndex`. + + Parameters + ---------- + freq : {int, string} + Valid frequency specifier. + minor_locator : bool, default False + Whether the current formatter should apply to minor ticks (True) or + major ticks (False). + dynamic_mode : bool, default True + Whether the formatter works in dynamic mode or not. + """ + + def __init__( + self, + freq, + minor_locator: bool = False, + dynamic_mode: bool = True, + plot_obj=None, + ): + freq = to_offset(freq) + self.format = None + self.freq = freq + self.locs: list[Any] = [] # unused, for matplotlib compat + self.formatdict: dict[Any, Any] | None = None + self.isminor = minor_locator + self.isdynamic = dynamic_mode + self.offset = 0 + self.plot_obj = plot_obj + self.finder = get_finder(freq) + + def _set_default_format(self, vmin, vmax): + """Returns the default ticks spacing.""" + if self.plot_obj.date_axis_info is None: + self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) + info = self.plot_obj.date_axis_info + + if self.isminor: + format = np.compress(info["min"] & np.logical_not(info["maj"]), info) + else: + format = np.compress(info["maj"], info) + self.formatdict = {x: f for (x, _, _, f) in format} + return self.formatdict + + def set_locs(self, locs): + """Sets the locations of the ticks""" + # don't actually use the locs. This is just needed to work with + # matplotlib. Force to use vmin, vmax + + self.locs = locs + + (vmin, vmax) = vi = tuple(self.axis.get_view_interval()) + if vi != self.plot_obj.view_interval: + self.plot_obj.date_axis_info = None + self.plot_obj.view_interval = vi + if vmax < vmin: + (vmin, vmax) = (vmax, vmin) + self._set_default_format(vmin, vmax) + + def __call__(self, x, pos=0) -> str: + + if self.formatdict is None: + return "" + else: + fmt = self.formatdict.pop(x, "") + if isinstance(fmt, np.bytes_): + fmt = fmt.decode("utf-8") + return Period(ordinal=int(x), freq=self.freq).strftime(fmt) + + +class TimeSeries_TimedeltaFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`TimedeltaIndex`. + """ + + @staticmethod + def format_timedelta_ticks(x, pos, n_decimals: int) -> str: + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 10 ** 9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10 ** (n_decimals - 9)) + s = f"{int(h):02d}:{int(m):02d}:{int(s):02d}" + if n_decimals > 0: + s += f".{decimals:0{n_decimals}d}" + if d != 0: + s = f"{int(d):d} days {s}" + return s + + def __call__(self, x, pos=0) -> str: + (vmin, vmax) = tuple(self.axis.get_view_interval()) + n_decimals = int(np.ceil(np.log10(100 * 10 ** 9 / abs(vmax - vmin)))) + if n_decimals > 9: + n_decimals = 9 + return self.format_timedelta_ticks(x, pos, n_decimals) diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/core.py b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/core.py new file mode 100644 index 0000000..ca3eb75 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/core.py @@ -0,0 +1,1681 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Hashable, +) +import warnings + +from matplotlib.artist import Artist +import numpy as np + +from pandas._typing import IndexLabel +from pandas.errors import AbstractMethodError +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_hashable, + is_integer, + is_integer_dtype, + is_iterator, + is_list_like, + is_number, + is_numeric_dtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCMultiIndex, + ABCPeriodIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +import pandas.core.common as com +from pandas.core.frame import DataFrame + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.compat import mpl_ge_3_0_0 +from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters +from pandas.plotting._matplotlib.groupby import reconstruct_data_with_by +from pandas.plotting._matplotlib.style import get_standard_colors +from pandas.plotting._matplotlib.timeseries import ( + decorate_axes, + format_dateaxis, + maybe_convert_index, + maybe_resample, + use_dynamic_x, +) +from pandas.plotting._matplotlib.tools import ( + create_subplots, + flatten_axes, + format_date_labels, + get_all_lines, + get_xlim, + handle_shared_axes, + table, +) + +if TYPE_CHECKING: + from matplotlib.axes import Axes + from matplotlib.axis import Axis + + +def _color_in_style(style: str) -> bool: + """ + Check if there is a color letter in the style string. + """ + from matplotlib.colors import BASE_COLORS + + return not set(BASE_COLORS).isdisjoint(style) + + +class MPLPlot: + """ + Base class for assembling a pandas plot using matplotlib + + Parameters + ---------- + data : + + """ + + @property + def _kind(self): + """Specify kind str. Must be overridden in child class""" + raise NotImplementedError + + _layout_type = "vertical" + _default_rot = 0 + orientation: str | None = None + + axes: np.ndarray # of Axes objects + + def __init__( + self, + data, + kind=None, + by: IndexLabel | None = None, + subplots=False, + sharex=None, + sharey=False, + use_index=True, + figsize=None, + grid=None, + legend=True, + rot=None, + ax=None, + fig=None, + title=None, + xlim=None, + ylim=None, + xticks=None, + yticks=None, + xlabel: Hashable | None = None, + ylabel: Hashable | None = None, + sort_columns=False, + fontsize=None, + secondary_y=False, + colormap=None, + table=False, + layout=None, + include_bool=False, + column: IndexLabel | None = None, + **kwds, + ): + + import matplotlib.pyplot as plt + + self.data = data + + # if users assign an empty list or tuple, raise `ValueError` + # similar to current `df.box` and `df.hist` APIs. + if by in ([], ()): + raise ValueError("No group keys passed!") + self.by = com.maybe_make_list(by) + + # Assign the rest of columns into self.columns if by is explicitly defined + # while column is not, only need `columns` in hist/box plot when it's DF + # TODO: Might deprecate `column` argument in future PR (#28373) + if isinstance(data, DataFrame): + if column: + self.columns = com.maybe_make_list(column) + else: + if self.by is None: + self.columns = [ + col for col in data.columns if is_numeric_dtype(data[col]) + ] + else: + self.columns = [ + col + for col in data.columns + if col not in self.by and is_numeric_dtype(data[col]) + ] + + # For `hist` plot, need to get grouped original data before `self.data` is + # updated later + if self.by is not None and self._kind == "hist": + self._grouped = data.groupby(self.by) + + self.kind = kind + + self.sort_columns = sort_columns + + self.subplots = subplots + + if sharex is None: + + # if by is defined, subplots are used and sharex should be False + if ax is None and by is None: + self.sharex = True + else: + # if we get an axis, the users should do the visibility + # setting... + self.sharex = False + else: + self.sharex = sharex + + self.sharey = sharey + self.figsize = figsize + self.layout = layout + + self.xticks = xticks + self.yticks = yticks + self.xlim = xlim + self.ylim = ylim + self.title = title + self.use_index = use_index + self.xlabel = xlabel + self.ylabel = ylabel + + self.fontsize = fontsize + + if rot is not None: + self.rot = rot + # need to know for format_date_labels since it's rotated to 30 by + # default + self._rot_set = True + else: + self._rot_set = False + self.rot = self._default_rot + + if grid is None: + grid = False if secondary_y else plt.rcParams["axes.grid"] + + self.grid = grid + self.legend = legend + self.legend_handles: list[Artist] = [] + self.legend_labels: list[Hashable] = [] + + self.logx = kwds.pop("logx", False) + self.logy = kwds.pop("logy", False) + self.loglog = kwds.pop("loglog", False) + self.label = kwds.pop("label", None) + self.style = kwds.pop("style", None) + self.mark_right = kwds.pop("mark_right", True) + self.stacked = kwds.pop("stacked", False) + + self.ax = ax + self.fig = fig + self.axes = np.array([], dtype=object) # "real" version get set in `generate` + + # parse errorbar input if given + xerr = kwds.pop("xerr", None) + yerr = kwds.pop("yerr", None) + self.errors = { + kw: self._parse_errorbars(kw, err) + for kw, err in zip(["xerr", "yerr"], [xerr, yerr]) + } + + if not isinstance(secondary_y, (bool, tuple, list, np.ndarray, ABCIndex)): + secondary_y = [secondary_y] + self.secondary_y = secondary_y + + # ugly TypeError if user passes matplotlib's `cmap` name. + # Probably better to accept either. + if "cmap" in kwds and colormap: + raise TypeError("Only specify one of `cmap` and `colormap`.") + elif "cmap" in kwds: + self.colormap = kwds.pop("cmap") + else: + self.colormap = colormap + + self.table = table + self.include_bool = include_bool + + self.kwds = kwds + + self._validate_color_args() + + def _validate_color_args(self): + if ( + "color" in self.kwds + and self.nseries == 1 + and not is_list_like(self.kwds["color"]) + ): + # support series.plot(color='green') + self.kwds["color"] = [self.kwds["color"]] + + if ( + "color" in self.kwds + and isinstance(self.kwds["color"], tuple) + and self.nseries == 1 + and len(self.kwds["color"]) in (3, 4) + ): + # support RGB and RGBA tuples in series plot + self.kwds["color"] = [self.kwds["color"]] + + if ( + "color" in self.kwds or "colors" in self.kwds + ) and self.colormap is not None: + warnings.warn( + "'color' and 'colormap' cannot be used simultaneously. Using 'color'" + ) + + if "color" in self.kwds and self.style is not None: + if is_list_like(self.style): + styles = self.style + else: + styles = [self.style] + # need only a single match + for s in styles: + if _color_in_style(s): + raise ValueError( + "Cannot pass 'style' string with a color symbol and " + "'color' keyword argument. Please use one or the " + "other or pass 'style' without a color symbol" + ) + + def _iter_data(self, data=None, keep_index=False, fillna=None): + if data is None: + data = self.data + if fillna is not None: + data = data.fillna(fillna) + + for col, values in data.items(): + if keep_index is True: + yield col, values + else: + yield col, values.values + + @property + def nseries(self) -> int: + + # When `by` is explicitly assigned, grouped data size will be defined, and + # this will determine number of subplots to have, aka `self.nseries` + if self.data.ndim == 1: + return 1 + elif self.by is not None and self._kind == "hist": + return len(self._grouped) + elif self.by is not None and self._kind == "box": + return len(self.columns) + else: + return self.data.shape[1] + + def draw(self): + self.plt.draw_if_interactive() + + def generate(self): + self._args_adjust() + self._compute_plot_data() + self._setup_subplots() + self._make_plot() + self._add_table() + self._make_legend() + self._adorn_subplots() + + for ax in self.axes: + self._post_plot_logic_common(ax, self.data) + self._post_plot_logic(ax, self.data) + + def _args_adjust(self): + pass + + def _has_plotted_object(self, ax: Axes) -> bool: + """check whether ax has data""" + return len(ax.lines) != 0 or len(ax.artists) != 0 or len(ax.containers) != 0 + + def _maybe_right_yaxis(self, ax: Axes, axes_num): + if not self.on_right(axes_num): + # secondary axes may be passed via ax kw + return self._get_ax_layer(ax) + + if hasattr(ax, "right_ax"): + # if it has right_ax property, ``ax`` must be left axes + return ax.right_ax + elif hasattr(ax, "left_ax"): + # if it has left_ax property, ``ax`` must be right axes + return ax + else: + # otherwise, create twin axes + orig_ax, new_ax = ax, ax.twinx() + # TODO: use Matplotlib public API when available + new_ax._get_lines = orig_ax._get_lines + new_ax._get_patches_for_fill = orig_ax._get_patches_for_fill + orig_ax.right_ax, new_ax.left_ax = new_ax, orig_ax + + if not self._has_plotted_object(orig_ax): # no data on left y + orig_ax.get_yaxis().set_visible(False) + + if self.logy is True or self.loglog is True: + new_ax.set_yscale("log") + elif self.logy == "sym" or self.loglog == "sym": + new_ax.set_yscale("symlog") + return new_ax + + def _setup_subplots(self): + if self.subplots: + fig, axes = create_subplots( + naxes=self.nseries, + sharex=self.sharex, + sharey=self.sharey, + figsize=self.figsize, + ax=self.ax, + layout=self.layout, + layout_type=self._layout_type, + ) + else: + if self.ax is None: + fig = self.plt.figure(figsize=self.figsize) + axes = fig.add_subplot(111) + else: + fig = self.ax.get_figure() + if self.figsize is not None: + fig.set_size_inches(self.figsize) + axes = self.ax + + axes = flatten_axes(axes) + + valid_log = {False, True, "sym", None} + input_log = {self.logx, self.logy, self.loglog} + if input_log - valid_log: + invalid_log = next(iter(input_log - valid_log)) + raise ValueError( + f"Boolean, None and 'sym' are valid options, '{invalid_log}' is given." + ) + + if self.logx is True or self.loglog is True: + [a.set_xscale("log") for a in axes] + elif self.logx == "sym" or self.loglog == "sym": + [a.set_xscale("symlog") for a in axes] + + if self.logy is True or self.loglog is True: + [a.set_yscale("log") for a in axes] + elif self.logy == "sym" or self.loglog == "sym": + [a.set_yscale("symlog") for a in axes] + + self.fig = fig + self.axes = axes + + @property + def result(self): + """ + Return result axes + """ + if self.subplots: + if self.layout is not None and not is_list_like(self.ax): + return self.axes.reshape(*self.layout) + else: + return self.axes + else: + sec_true = isinstance(self.secondary_y, bool) and self.secondary_y + all_sec = ( + is_list_like(self.secondary_y) and len(self.secondary_y) == self.nseries + ) + if sec_true or all_sec: + # if all data is plotted on secondary, return right axes + return self._get_ax_layer(self.axes[0], primary=False) + else: + return self.axes[0] + + def _convert_to_ndarray(self, data): + # GH31357: categorical columns are processed separately + if is_categorical_dtype(data): + return data + + # GH32073: cast to float if values contain nulled integers + if ( + is_integer_dtype(data.dtype) or is_float_dtype(data.dtype) + ) and is_extension_array_dtype(data.dtype): + return data.to_numpy(dtype="float", na_value=np.nan) + + # GH25587: cast ExtensionArray of pandas (IntegerArray, etc.) to + # np.ndarray before plot. + if len(data) > 0: + return np.asarray(data) + + return data + + def _compute_plot_data(self): + data = self.data + + if isinstance(data, ABCSeries): + label = self.label + if label is None and data.name is None: + label = "None" + if label is None: + # We'll end up with columns of [0] instead of [None] + data = data.to_frame() + else: + data = data.to_frame(name=label) + elif self._kind in ("hist", "box"): + cols = self.columns if self.by is None else self.columns + self.by + data = data.loc[:, cols] + + # GH15079 reconstruct data if by is defined + if self.by is not None: + self.subplots = True + data = reconstruct_data_with_by(self.data, by=self.by, cols=self.columns) + + # GH16953, _convert is needed as fallback, for ``Series`` + # with ``dtype == object`` + data = data._convert(datetime=True, timedelta=True) + include_type = [np.number, "datetime", "datetimetz", "timedelta"] + + # GH23719, allow plotting boolean + if self.include_bool is True: + include_type.append(np.bool_) + + # GH22799, exclude datetime-like type for boxplot + exclude_type = None + if self._kind == "box": + # TODO: change after solving issue 27881 + include_type = [np.number] + exclude_type = ["timedelta"] + + # GH 18755, include object and category type for scatter plot + if self._kind == "scatter": + include_type.extend(["object", "category"]) + + numeric_data = data.select_dtypes(include=include_type, exclude=exclude_type) + + try: + is_empty = numeric_data.columns.empty + except AttributeError: + is_empty = not len(numeric_data) + + # no non-numeric frames or series allowed + if is_empty: + raise TypeError("no numeric data to plot") + + self.data = numeric_data.apply(self._convert_to_ndarray) + + def _make_plot(self): + raise AbstractMethodError(self) + + def _add_table(self): + if self.table is False: + return + elif self.table is True: + data = self.data.transpose() + else: + data = self.table + ax = self._get_ax(0) + table(ax, data) + + def _post_plot_logic_common(self, ax, data): + """Common post process for each axes""" + if self.orientation == "vertical" or self.orientation is None: + self._apply_axis_properties(ax.xaxis, rot=self.rot, fontsize=self.fontsize) + self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize) + + if hasattr(ax, "right_ax"): + self._apply_axis_properties(ax.right_ax.yaxis, fontsize=self.fontsize) + + elif self.orientation == "horizontal": + self._apply_axis_properties(ax.yaxis, rot=self.rot, fontsize=self.fontsize) + self._apply_axis_properties(ax.xaxis, fontsize=self.fontsize) + + if hasattr(ax, "right_ax"): + self._apply_axis_properties(ax.right_ax.yaxis, fontsize=self.fontsize) + else: # pragma no cover + raise ValueError + + def _post_plot_logic(self, ax, data): + """Post process for each axes. Overridden in child classes""" + pass + + def _adorn_subplots(self): + """Common post process unrelated to data""" + if len(self.axes) > 0: + all_axes = self._get_subplots() + nrows, ncols = self._get_axes_layout() + handle_shared_axes( + axarr=all_axes, + nplots=len(all_axes), + naxes=nrows * ncols, + nrows=nrows, + ncols=ncols, + sharex=self.sharex, + sharey=self.sharey, + ) + + for ax in self.axes: + if self.yticks is not None: + ax.set_yticks(self.yticks) + + if self.xticks is not None: + ax.set_xticks(self.xticks) + + if self.ylim is not None: + ax.set_ylim(self.ylim) + + if self.xlim is not None: + ax.set_xlim(self.xlim) + + # GH9093, currently Pandas does not show ylabel, so if users provide + # ylabel will set it as ylabel in the plot. + if self.ylabel is not None: + ax.set_ylabel(pprint_thing(self.ylabel)) + + ax.grid(self.grid) + + if self.title: + if self.subplots: + if is_list_like(self.title): + if len(self.title) != self.nseries: + raise ValueError( + "The length of `title` must equal the number " + "of columns if using `title` of type `list` " + "and `subplots=True`.\n" + f"length of title = {len(self.title)}\n" + f"number of columns = {self.nseries}" + ) + + for (ax, title) in zip(self.axes, self.title): + ax.set_title(title) + else: + self.fig.suptitle(self.title) + else: + if is_list_like(self.title): + msg = ( + "Using `title` of type `list` is not supported " + "unless `subplots=True` is passed" + ) + raise ValueError(msg) + self.axes[0].set_title(self.title) + + def _apply_axis_properties(self, axis: Axis, rot=None, fontsize=None): + """ + Tick creation within matplotlib is reasonably expensive and is + internally deferred until accessed as Ticks are created/destroyed + multiple times per draw. It's therefore beneficial for us to avoid + accessing unless we will act on the Tick. + """ + if rot is not None or fontsize is not None: + # rot=0 is a valid setting, hence the explicit None check + labels = axis.get_majorticklabels() + axis.get_minorticklabels() + for label in labels: + if rot is not None: + label.set_rotation(rot) + if fontsize is not None: + label.set_fontsize(fontsize) + + @property + def legend_title(self) -> str | None: + if not isinstance(self.data.columns, ABCMultiIndex): + name = self.data.columns.name + if name is not None: + name = pprint_thing(name) + return name + else: + stringified = map(pprint_thing, self.data.columns.names) + return ",".join(stringified) + + def _mark_right_label(self, label: str, index: int) -> str: + """ + Append ``(right)`` to the label of a line if it's plotted on the right axis. + + Note that ``(right)`` is only appended when ``subplots=False``. + """ + if not self.subplots and self.mark_right and self.on_right(index): + label += " (right)" + return label + + def _append_legend_handles_labels(self, handle: Artist, label: str) -> None: + """ + Append current handle and label to ``legend_handles`` and ``legend_labels``. + + These will be used to make the legend. + """ + self.legend_handles.append(handle) + self.legend_labels.append(label) + + def _make_legend(self) -> None: + ax, leg = self._get_ax_legend(self.axes[0]) + + handles = [] + labels = [] + title = "" + + if not self.subplots: + if leg is not None: + title = leg.get_title().get_text() + # Replace leg.LegendHandles because it misses marker info + handles = leg.legendHandles + labels = [x.get_text() for x in leg.get_texts()] + + if self.legend: + if self.legend == "reverse": + handles += reversed(self.legend_handles) + labels += reversed(self.legend_labels) + else: + handles += self.legend_handles + labels += self.legend_labels + + if self.legend_title is not None: + title = self.legend_title + + if len(handles) > 0: + ax.legend(handles, labels, loc="best", title=title) + + elif self.subplots and self.legend: + for ax in self.axes: + if ax.get_visible(): + ax.legend(loc="best") + + def _get_ax_legend(self, ax: Axes): + """ + Take in axes and return ax and legend under different scenarios + """ + leg = ax.get_legend() + + other_ax = getattr(ax, "left_ax", None) or getattr(ax, "right_ax", None) + other_leg = None + if other_ax is not None: + other_leg = other_ax.get_legend() + if leg is None and other_leg is not None: + leg = other_leg + ax = other_ax + return ax, leg + + @cache_readonly + def plt(self): + import matplotlib.pyplot as plt + + return plt + + _need_to_set_index = False + + def _get_xticks(self, convert_period: bool = False): + index = self.data.index + is_datetype = index.inferred_type in ("datetime", "date", "datetime64", "time") + + if self.use_index: + if convert_period and isinstance(index, ABCPeriodIndex): + self.data = self.data.reindex(index=index.sort_values()) + x = self.data.index.to_timestamp()._mpl_repr() + elif index.is_numeric(): + """ + Matplotlib supports numeric values or datetime objects as + xaxis values. Taking LBYL approach here, by the time + matplotlib raises exception when using non numeric/datetime + values for xaxis, several actions are already taken by plt. + """ + x = index._mpl_repr() + elif is_datetype: + self.data = self.data[notna(self.data.index)] + self.data = self.data.sort_index() + x = self.data.index._mpl_repr() + else: + self._need_to_set_index = True + x = list(range(len(index))) + else: + x = list(range(len(index))) + + return x + + @classmethod + @register_pandas_matplotlib_converters + def _plot(cls, ax: Axes, x, y, style=None, is_errorbar: bool = False, **kwds): + mask = isna(y) + if mask.any(): + y = np.ma.array(y) + y = np.ma.masked_where(mask, y) + + if isinstance(x, ABCIndex): + x = x._mpl_repr() + + if is_errorbar: + if "xerr" in kwds: + kwds["xerr"] = np.array(kwds.get("xerr")) + if "yerr" in kwds: + kwds["yerr"] = np.array(kwds.get("yerr")) + return ax.errorbar(x, y, **kwds) + else: + # prevent style kwarg from going to errorbar, where it is unsupported + args = (x, y, style) if style is not None else (x, y) + return ax.plot(*args, **kwds) + + def _get_index_name(self) -> str | None: + if isinstance(self.data.index, ABCMultiIndex): + name = self.data.index.names + if com.any_not_none(*name): + name = ",".join([pprint_thing(x) for x in name]) + else: + name = None + else: + name = self.data.index.name + if name is not None: + name = pprint_thing(name) + + # GH 9093, override the default xlabel if xlabel is provided. + if self.xlabel is not None: + name = pprint_thing(self.xlabel) + + return name + + @classmethod + def _get_ax_layer(cls, ax, primary=True): + """get left (primary) or right (secondary) axes""" + if primary: + return getattr(ax, "left_ax", ax) + else: + return getattr(ax, "right_ax", ax) + + def _get_ax(self, i: int): + # get the twinx ax if appropriate + if self.subplots: + ax = self.axes[i] + ax = self._maybe_right_yaxis(ax, i) + self.axes[i] = ax + else: + ax = self.axes[0] + ax = self._maybe_right_yaxis(ax, i) + + ax.get_yaxis().set_visible(True) + return ax + + @classmethod + def get_default_ax(cls, ax): + import matplotlib.pyplot as plt + + if ax is None and len(plt.get_fignums()) > 0: + with plt.rc_context(): + ax = plt.gca() + ax = cls._get_ax_layer(ax) + + def on_right(self, i): + if isinstance(self.secondary_y, bool): + return self.secondary_y + + if isinstance(self.secondary_y, (tuple, list, np.ndarray, ABCIndex)): + return self.data.columns[i] in self.secondary_y + + def _apply_style_colors(self, colors, kwds, col_num, label): + """ + Manage style and color based on column number and its label. + Returns tuple of appropriate style and kwds which "color" may be added. + """ + style = None + if self.style is not None: + if isinstance(self.style, list): + try: + style = self.style[col_num] + except IndexError: + pass + elif isinstance(self.style, dict): + style = self.style.get(label, style) + else: + style = self.style + + has_color = "color" in kwds or self.colormap is not None + nocolor_style = style is None or not _color_in_style(style) + if (has_color or self.subplots) and nocolor_style: + if isinstance(colors, dict): + kwds["color"] = colors[label] + else: + kwds["color"] = colors[col_num % len(colors)] + return style, kwds + + def _get_colors(self, num_colors=None, color_kwds="color"): + if num_colors is None: + num_colors = self.nseries + + return get_standard_colors( + num_colors=num_colors, + colormap=self.colormap, + color=self.kwds.get(color_kwds), + ) + + def _parse_errorbars(self, label, err): + """ + Look for error keyword arguments and return the actual errorbar data + or return the error DataFrame/dict + + Error bars can be specified in several ways: + Series: the user provides a pandas.Series object of the same + length as the data + ndarray: provides a np.ndarray of the same length as the data + DataFrame/dict: error values are paired with keys matching the + key in the plotted DataFrame + str: the name of the column within the plotted DataFrame + + Asymmetrical error bars are also supported, however raw error values + must be provided in this case. For a ``N`` length :class:`Series`, a + ``2xN`` array should be provided indicating lower and upper (or left + and right) errors. For a ``MxN`` :class:`DataFrame`, asymmetrical errors + should be in a ``Mx2xN`` array. + """ + if err is None: + return None + + def match_labels(data, e): + e = e.reindex(data.index) + return e + + # key-matched DataFrame + if isinstance(err, ABCDataFrame): + + err = match_labels(self.data, err) + # key-matched dict + elif isinstance(err, dict): + pass + + # Series of error values + elif isinstance(err, ABCSeries): + # broadcast error series across data + err = match_labels(self.data, err) + err = np.atleast_2d(err) + err = np.tile(err, (self.nseries, 1)) + + # errors are a column in the dataframe + elif isinstance(err, str): + evalues = self.data[err].values + self.data = self.data[self.data.columns.drop(err)] + err = np.atleast_2d(evalues) + err = np.tile(err, (self.nseries, 1)) + + elif is_list_like(err): + if is_iterator(err): + err = np.atleast_2d(list(err)) + else: + # raw error values + err = np.atleast_2d(err) + + err_shape = err.shape + + # asymmetrical error bars + if isinstance(self.data, ABCSeries) and err_shape[0] == 2: + err = np.expand_dims(err, 0) + err_shape = err.shape + if err_shape[2] != len(self.data): + raise ValueError( + "Asymmetrical error bars should be provided " + f"with the shape (2, {len(self.data)})" + ) + elif isinstance(self.data, ABCDataFrame) and err.ndim == 3: + if ( + (err_shape[0] != self.nseries) + or (err_shape[1] != 2) + or (err_shape[2] != len(self.data)) + ): + raise ValueError( + "Asymmetrical error bars should be provided " + f"with the shape ({self.nseries}, 2, {len(self.data)})" + ) + + # broadcast errors to each data series + if len(err) == 1: + err = np.tile(err, (self.nseries, 1)) + + elif is_number(err): + err = np.tile([err], (self.nseries, len(self.data))) + + else: + msg = f"No valid {label} detected" + raise ValueError(msg) + + return err + + def _get_errorbars(self, label=None, index=None, xerr=True, yerr=True): + errors = {} + + for kw, flag in zip(["xerr", "yerr"], [xerr, yerr]): + if flag: + err = self.errors[kw] + # user provided label-matched dataframe of errors + if isinstance(err, (ABCDataFrame, dict)): + if label is not None and label in err.keys(): + err = err[label] + else: + err = None + elif index is not None and err is not None: + err = err[index] + + if err is not None: + errors[kw] = err + return errors + + def _get_subplots(self): + from matplotlib.axes import Subplot + + return [ + ax for ax in self.axes[0].get_figure().get_axes() if isinstance(ax, Subplot) + ] + + def _get_axes_layout(self) -> tuple[int, int]: + axes = self._get_subplots() + x_set = set() + y_set = set() + for ax in axes: + # check axes coordinates to estimate layout + points = ax.get_position().get_points() + x_set.add(points[0][0]) + y_set.add(points[0][1]) + return (len(y_set), len(x_set)) + + +class PlanePlot(MPLPlot): + """ + Abstract class for plotting on plane, currently scatter and hexbin. + """ + + _layout_type = "single" + + def __init__(self, data, x, y, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + if x is None or y is None: + raise ValueError(self._kind + " requires an x and y column") + if is_integer(x) and not self.data.columns.holds_integer(): + x = self.data.columns[x] + if is_integer(y) and not self.data.columns.holds_integer(): + y = self.data.columns[y] + + # Scatter plot allows to plot objects data + if self._kind == "hexbin": + if len(self.data[x]._get_numeric_data()) == 0: + raise ValueError(self._kind + " requires x column to be numeric") + if len(self.data[y]._get_numeric_data()) == 0: + raise ValueError(self._kind + " requires y column to be numeric") + + self.x = x + self.y = y + + @property + def nseries(self) -> int: + return 1 + + def _post_plot_logic(self, ax: Axes, data): + x, y = self.x, self.y + xlabel = self.xlabel if self.xlabel is not None else pprint_thing(x) + ylabel = self.ylabel if self.ylabel is not None else pprint_thing(y) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + + def _plot_colorbar(self, ax: Axes, **kwds): + # Addresses issues #10611 and #10678: + # When plotting scatterplots and hexbinplots in IPython + # inline backend the colorbar axis height tends not to + # exactly match the parent axis height. + # The difference is due to small fractional differences + # in floating points with similar representation. + # To deal with this, this method forces the colorbar + # height to take the height of the parent axes. + # For a more detailed description of the issue + # see the following link: + # https://github.com/ipython/ipython/issues/11215 + + # GH33389, if ax is used multiple times, we should always + # use the last one which contains the latest information + # about the ax + img = ax.collections[-1] + cbar = self.fig.colorbar(img, ax=ax, **kwds) + + if mpl_ge_3_0_0(): + # The workaround below is no longer necessary. + return cbar + + points = ax.get_position().get_points() + cbar_points = cbar.ax.get_position().get_points() + + cbar.ax.set_position( + [ + cbar_points[0, 0], + points[0, 1], + cbar_points[1, 0] - cbar_points[0, 0], + points[1, 1] - points[0, 1], + ] + ) + # To see the discrepancy in axis heights uncomment + # the following two lines: + # print(points[1, 1] - points[0, 1]) + # print(cbar_points[1, 1] - cbar_points[0, 1]) + + return cbar + + +class ScatterPlot(PlanePlot): + _kind = "scatter" + + def __init__(self, data, x, y, s=None, c=None, **kwargs): + if s is None: + # hide the matplotlib default for size, in case we want to change + # the handling of this argument later + s = 20 + elif is_hashable(s) and s in data.columns: + s = data[s] + super().__init__(data, x, y, s=s, **kwargs) + if is_integer(c) and not self.data.columns.holds_integer(): + c = self.data.columns[c] + self.c = c + + def _make_plot(self): + x, y, c, data = self.x, self.y, self.c, self.data + ax = self.axes[0] + + c_is_column = is_hashable(c) and c in self.data.columns + + color_by_categorical = c_is_column and is_categorical_dtype(self.data[c]) + + # pandas uses colormap, matplotlib uses cmap. + cmap = self.colormap or "Greys" + cmap = self.plt.cm.get_cmap(cmap) + color = self.kwds.pop("color", None) + if c is not None and color is not None: + raise TypeError("Specify exactly one of `c` and `color`") + elif c is None and color is None: + c_values = self.plt.rcParams["patch.facecolor"] + elif color is not None: + c_values = color + elif color_by_categorical: + c_values = self.data[c].cat.codes + elif c_is_column: + c_values = self.data[c].values + else: + c_values = c + + if color_by_categorical: + from matplotlib import colors + + n_cats = len(self.data[c].cat.categories) + cmap = colors.ListedColormap([cmap(i) for i in range(cmap.N)]) + bounds = np.linspace(0, n_cats, n_cats + 1) + norm = colors.BoundaryNorm(bounds, cmap.N) + else: + norm = None + # plot colorbar if + # 1. colormap is assigned, and + # 2.`c` is a column containing only numeric values + plot_colorbar = self.colormap or c_is_column + cb = self.kwds.pop("colorbar", is_numeric_dtype(c_values) and plot_colorbar) + + if self.legend and hasattr(self, "label"): + label = self.label + else: + label = None + scatter = ax.scatter( + data[x].values, + data[y].values, + c=c_values, + label=label, + cmap=cmap, + norm=norm, + **self.kwds, + ) + if cb: + cbar_label = c if c_is_column else "" + cbar = self._plot_colorbar(ax, label=cbar_label) + if color_by_categorical: + cbar.set_ticks(np.linspace(0.5, n_cats - 0.5, n_cats)) + cbar.ax.set_yticklabels(self.data[c].cat.categories) + + if label is not None: + self._append_legend_handles_labels(scatter, label) + else: + self.legend = False + + errors_x = self._get_errorbars(label=x, index=0, yerr=False) + errors_y = self._get_errorbars(label=y, index=0, xerr=False) + if len(errors_x) > 0 or len(errors_y) > 0: + err_kwds = dict(errors_x, **errors_y) + err_kwds["ecolor"] = scatter.get_facecolor()[0] + ax.errorbar(data[x].values, data[y].values, linestyle="none", **err_kwds) + + +class HexBinPlot(PlanePlot): + _kind = "hexbin" + + def __init__(self, data, x, y, C=None, **kwargs): + super().__init__(data, x, y, **kwargs) + if is_integer(C) and not self.data.columns.holds_integer(): + C = self.data.columns[C] + self.C = C + + def _make_plot(self): + x, y, data, C = self.x, self.y, self.data, self.C + ax = self.axes[0] + # pandas uses colormap, matplotlib uses cmap. + cmap = self.colormap or "BuGn" + cmap = self.plt.cm.get_cmap(cmap) + cb = self.kwds.pop("colorbar", True) + + if C is None: + c_values = None + else: + c_values = data[C].values + + ax.hexbin(data[x].values, data[y].values, C=c_values, cmap=cmap, **self.kwds) + if cb: + self._plot_colorbar(ax) + + def _make_legend(self): + pass + + +class LinePlot(MPLPlot): + _kind = "line" + _default_rot = 0 + orientation = "vertical" + + def __init__(self, data, **kwargs): + from pandas.plotting import plot_params + + MPLPlot.__init__(self, data, **kwargs) + if self.stacked: + self.data = self.data.fillna(value=0) + self.x_compat = plot_params["x_compat"] + if "x_compat" in self.kwds: + self.x_compat = bool(self.kwds.pop("x_compat")) + + def _is_ts_plot(self) -> bool: + # this is slightly deceptive + return not self.x_compat and self.use_index and self._use_dynamic_x() + + def _use_dynamic_x(self): + return use_dynamic_x(self._get_ax(0), self.data) + + def _make_plot(self): + if self._is_ts_plot(): + data = maybe_convert_index(self._get_ax(0), self.data) + + x = data.index # dummy, not used + plotf = self._ts_plot + it = self._iter_data(data=data, keep_index=True) + else: + x = self._get_xticks(convert_period=True) + # error: Incompatible types in assignment (expression has type + # "Callable[[Any, Any, Any, Any, Any, Any, KwArg(Any)], Any]", variable has + # type "Callable[[Any, Any, Any, Any, KwArg(Any)], Any]") + plotf = self._plot # type: ignore[assignment] + it = self._iter_data() + + stacking_id = self._get_stacking_id() + is_errorbar = com.any_not_none(*self.errors.values()) + + colors = self._get_colors() + for i, (label, y) in enumerate(it): + ax = self._get_ax(i) + kwds = self.kwds.copy() + style, kwds = self._apply_style_colors(colors, kwds, i, label) + + errors = self._get_errorbars(label=label, index=i) + kwds = dict(kwds, **errors) + + label = pprint_thing(label) # .encode('utf-8') + label = self._mark_right_label(label, index=i) + kwds["label"] = label + + newlines = plotf( + ax, + x, + y, + style=style, + column_num=i, + stacking_id=stacking_id, + is_errorbar=is_errorbar, + **kwds, + ) + self._append_legend_handles_labels(newlines[0], label) + + if self._is_ts_plot(): + + # reset of xlim should be used for ts data + # TODO: GH28021, should find a way to change view limit on xaxis + lines = get_all_lines(ax) + left, right = get_xlim(lines) + ax.set_xlim(left, right) + + # error: Signature of "_plot" incompatible with supertype "MPLPlot" + @classmethod + def _plot( # type: ignore[override] + cls, ax: Axes, x, y, style=None, column_num=None, stacking_id=None, **kwds + ): + # column_num is used to get the target column from plotf in line and + # area plots + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(y)) + y_values = cls._get_stacked_values(ax, stacking_id, y, kwds["label"]) + lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds) + cls._update_stacker(ax, stacking_id, y) + return lines + + @classmethod + def _ts_plot(cls, ax: Axes, x, data, style=None, **kwds): + # accept x to be consistent with normal plot func, + # x is not passed to tsplot as it uses data.index as x coordinate + # column_num must be in kwds for stacking purpose + freq, data = maybe_resample(data, ax, kwds) + + # Set ax with freq info + decorate_axes(ax, freq, kwds) + # digging deeper + if hasattr(ax, "left_ax"): + decorate_axes(ax.left_ax, freq, kwds) + if hasattr(ax, "right_ax"): + decorate_axes(ax.right_ax, freq, kwds) + ax._plot_data.append((data, cls._kind, kwds)) + + lines = cls._plot(ax, data.index, data.values, style=style, **kwds) + # set date formatter, locators and rescale limits + format_dateaxis(ax, ax.freq, data.index) + return lines + + def _get_stacking_id(self): + if self.stacked: + return id(self.data) + else: + return None + + @classmethod + def _initialize_stacker(cls, ax: Axes, stacking_id, n: int): + if stacking_id is None: + return + if not hasattr(ax, "_stacker_pos_prior"): + ax._stacker_pos_prior = {} + if not hasattr(ax, "_stacker_neg_prior"): + ax._stacker_neg_prior = {} + ax._stacker_pos_prior[stacking_id] = np.zeros(n) + ax._stacker_neg_prior[stacking_id] = np.zeros(n) + + @classmethod + def _get_stacked_values(cls, ax: Axes, stacking_id, values, label): + if stacking_id is None: + return values + if not hasattr(ax, "_stacker_pos_prior"): + # stacker may not be initialized for subplots + cls._initialize_stacker(ax, stacking_id, len(values)) + + if (values >= 0).all(): + return ax._stacker_pos_prior[stacking_id] + values + elif (values <= 0).all(): + return ax._stacker_neg_prior[stacking_id] + values + + raise ValueError( + "When stacked is True, each column must be either " + "all positive or all negative. " + f"Column '{label}' contains both positive and negative values" + ) + + @classmethod + def _update_stacker(cls, ax: Axes, stacking_id, values): + if stacking_id is None: + return + if (values >= 0).all(): + ax._stacker_pos_prior[stacking_id] += values + elif (values <= 0).all(): + ax._stacker_neg_prior[stacking_id] += values + + def _post_plot_logic(self, ax: Axes, data): + from matplotlib.ticker import FixedLocator + + def get_label(i): + if is_float(i) and i.is_integer(): + i = int(i) + try: + return pprint_thing(data.index[i]) + except Exception: + return "" + + if self._need_to_set_index: + xticks = ax.get_xticks() + xticklabels = [get_label(x) for x in xticks] + ax.xaxis.set_major_locator(FixedLocator(xticks)) + ax.set_xticklabels(xticklabels) + + # If the index is an irregular time series, then by default + # we rotate the tick labels. The exception is if there are + # subplots which don't share their x-axes, in which we case + # we don't rotate the ticklabels as by default the subplots + # would be too close together. + condition = ( + not self._use_dynamic_x() + and (data.index._is_all_dates and self.use_index) + and (not self.subplots or (self.subplots and self.sharex)) + ) + + index_name = self._get_index_name() + + if condition: + # irregular TS rotated 30 deg. by default + # probably a better place to check / set this. + if not self._rot_set: + self.rot = 30 + format_date_labels(ax, rot=self.rot) + + if index_name is not None and self.use_index: + ax.set_xlabel(index_name) + + +class AreaPlot(LinePlot): + _kind = "area" + + def __init__(self, data, **kwargs): + kwargs.setdefault("stacked", True) + data = data.fillna(value=0) + LinePlot.__init__(self, data, **kwargs) + + if not self.stacked: + # use smaller alpha to distinguish overlap + self.kwds.setdefault("alpha", 0.5) + + if self.logy or self.loglog: + raise ValueError("Log-y scales are not supported in area plot") + + # error: Signature of "_plot" incompatible with supertype "MPLPlot" + @classmethod + def _plot( # type: ignore[override] + cls, + ax: Axes, + x, + y, + style=None, + column_num=None, + stacking_id=None, + is_errorbar: bool = False, + **kwds, + ): + + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(y)) + y_values = cls._get_stacked_values(ax, stacking_id, y, kwds["label"]) + + # need to remove label, because subplots uses mpl legend as it is + line_kwds = kwds.copy() + line_kwds.pop("label") + lines = MPLPlot._plot(ax, x, y_values, style=style, **line_kwds) + + # get data from the line to get coordinates for fill_between + xdata, y_values = lines[0].get_data(orig=False) + + # unable to use ``_get_stacked_values`` here to get starting point + if stacking_id is None: + start = np.zeros(len(y)) + elif (y >= 0).all(): + start = ax._stacker_pos_prior[stacking_id] + elif (y <= 0).all(): + start = ax._stacker_neg_prior[stacking_id] + else: + start = np.zeros(len(y)) + + if "color" not in kwds: + kwds["color"] = lines[0].get_color() + + rect = ax.fill_between(xdata, start, y_values, **kwds) + cls._update_stacker(ax, stacking_id, y) + + # LinePlot expects list of artists + res = [rect] + return res + + def _post_plot_logic(self, ax: Axes, data): + LinePlot._post_plot_logic(self, ax, data) + + is_shared_y = len(list(ax.get_shared_y_axes())) > 0 + # do not override the default axis behaviour in case of shared y axes + if self.ylim is None and not is_shared_y: + if (data >= 0).all().all(): + ax.set_ylim(0, None) + elif (data <= 0).all().all(): + ax.set_ylim(None, 0) + + +class BarPlot(MPLPlot): + _kind = "bar" + _default_rot = 90 + orientation = "vertical" + + def __init__(self, data, **kwargs): + # we have to treat a series differently than a + # 1-column DataFrame w.r.t. color handling + self._is_series = isinstance(data, ABCSeries) + self.bar_width = kwargs.pop("width", 0.5) + pos = kwargs.pop("position", 0.5) + kwargs.setdefault("align", "center") + self.tick_pos = np.arange(len(data)) + + self.bottom = kwargs.pop("bottom", 0) + self.left = kwargs.pop("left", 0) + + self.log = kwargs.pop("log", False) + MPLPlot.__init__(self, data, **kwargs) + + if self.stacked or self.subplots: + self.tickoffset = self.bar_width * pos + if kwargs["align"] == "edge": + self.lim_offset = self.bar_width / 2 + else: + self.lim_offset = 0 + else: + if kwargs["align"] == "edge": + w = self.bar_width / self.nseries + self.tickoffset = self.bar_width * (pos - 0.5) + w * 0.5 + self.lim_offset = w * 0.5 + else: + self.tickoffset = self.bar_width * pos + self.lim_offset = 0 + + self.ax_pos = self.tick_pos - self.tickoffset + + def _args_adjust(self): + if is_list_like(self.bottom): + self.bottom = np.array(self.bottom) + if is_list_like(self.left): + self.left = np.array(self.left) + + # error: Signature of "_plot" incompatible with supertype "MPLPlot" + @classmethod + def _plot( # type: ignore[override] + cls, ax: Axes, x, y, w, start=0, log=False, **kwds + ): + return ax.bar(x, y, w, bottom=start, log=log, **kwds) + + @property + def _start_base(self): + return self.bottom + + def _make_plot(self): + import matplotlib as mpl + + colors = self._get_colors() + ncolors = len(colors) + + pos_prior = neg_prior = np.zeros(len(self.data)) + K = self.nseries + + for i, (label, y) in enumerate(self._iter_data(fillna=0)): + ax = self._get_ax(i) + kwds = self.kwds.copy() + if self._is_series: + kwds["color"] = colors + elif isinstance(colors, dict): + kwds["color"] = colors[label] + else: + kwds["color"] = colors[i % ncolors] + + errors = self._get_errorbars(label=label, index=i) + kwds = dict(kwds, **errors) + + label = pprint_thing(label) + label = self._mark_right_label(label, index=i) + + if (("yerr" in kwds) or ("xerr" in kwds)) and (kwds.get("ecolor") is None): + kwds["ecolor"] = mpl.rcParams["xtick.color"] + + start = 0 + if self.log and (y >= 1).all(): + start = 1 + start = start + self._start_base + + if self.subplots: + w = self.bar_width / 2 + rect = self._plot( + ax, + self.ax_pos + w, + y, + self.bar_width, + start=start, + label=label, + log=self.log, + **kwds, + ) + ax.set_title(label) + elif self.stacked: + mask = y > 0 + start = np.where(mask, pos_prior, neg_prior) + self._start_base + w = self.bar_width / 2 + rect = self._plot( + ax, + self.ax_pos + w, + y, + self.bar_width, + start=start, + label=label, + log=self.log, + **kwds, + ) + pos_prior = pos_prior + np.where(mask, y, 0) + neg_prior = neg_prior + np.where(mask, 0, y) + else: + w = self.bar_width / K + rect = self._plot( + ax, + self.ax_pos + (i + 0.5) * w, + y, + w, + start=start, + label=label, + log=self.log, + **kwds, + ) + self._append_legend_handles_labels(rect, label) + + def _post_plot_logic(self, ax: Axes, data): + if self.use_index: + str_index = [pprint_thing(key) for key in data.index] + else: + str_index = [pprint_thing(key) for key in range(data.shape[0])] + name = self._get_index_name() + + s_edge = self.ax_pos[0] - 0.25 + self.lim_offset + e_edge = self.ax_pos[-1] + 0.25 + self.bar_width + self.lim_offset + + self._decorate_ticks(ax, name, str_index, s_edge, e_edge) + + def _decorate_ticks(self, ax: Axes, name, ticklabels, start_edge, end_edge): + ax.set_xlim((start_edge, end_edge)) + + if self.xticks is not None: + ax.set_xticks(np.array(self.xticks)) + else: + ax.set_xticks(self.tick_pos) + ax.set_xticklabels(ticklabels) + + if name is not None and self.use_index: + ax.set_xlabel(name) + + +class BarhPlot(BarPlot): + _kind = "barh" + _default_rot = 0 + orientation = "horizontal" + + @property + def _start_base(self): + return self.left + + # error: Signature of "_plot" incompatible with supertype "MPLPlot" + @classmethod + def _plot( # type: ignore[override] + cls, ax: Axes, x, y, w, start=0, log=False, **kwds + ): + return ax.barh(x, y, w, left=start, log=log, **kwds) + + def _decorate_ticks(self, ax: Axes, name, ticklabels, start_edge, end_edge): + # horizontal bars + ax.set_ylim((start_edge, end_edge)) + ax.set_yticks(self.tick_pos) + ax.set_yticklabels(ticklabels) + if name is not None and self.use_index: + ax.set_ylabel(name) + + +class PiePlot(MPLPlot): + _kind = "pie" + _layout_type = "horizontal" + + def __init__(self, data, kind=None, **kwargs): + data = data.fillna(value=0) + if (data < 0).any().any(): + raise ValueError(f"{self._kind} plot doesn't allow negative values") + MPLPlot.__init__(self, data, kind=kind, **kwargs) + + def _args_adjust(self): + self.grid = False + self.logy = False + self.logx = False + self.loglog = False + + def _validate_color_args(self): + pass + + def _make_plot(self): + colors = self._get_colors(num_colors=len(self.data), color_kwds="colors") + self.kwds.setdefault("colors", colors) + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + if label is not None: + label = pprint_thing(label) + ax.set_ylabel(label) + + kwds = self.kwds.copy() + + def blank_labeler(label, value): + if value == 0: + return "" + else: + return label + + idx = [pprint_thing(v) for v in self.data.index] + labels = kwds.pop("labels", idx) + # labels is used for each wedge's labels + # Blank out labels for values of 0 so they don't overlap + # with nonzero wedges + if labels is not None: + blabels = [blank_labeler(left, value) for left, value in zip(labels, y)] + else: + blabels = None + results = ax.pie(y, labels=blabels, **kwds) + + if kwds.get("autopct", None) is not None: + patches, texts, autotexts = results + else: + patches, texts = results + autotexts = [] + + if self.fontsize is not None: + for t in texts + autotexts: + t.set_fontsize(self.fontsize) + + # leglabels is used for legend labels + leglabels = labels if labels is not None else idx + for p, l in zip(patches, leglabels): + self._append_legend_handles_labels(p, l) diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/groupby.py b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/groupby.py new file mode 100644 index 0000000..1b16eef --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/groupby.py @@ -0,0 +1,134 @@ +from __future__ import annotations + +import numpy as np + +from pandas._typing import ( + Dict, + IndexLabel, +) + +from pandas.core.dtypes.missing import remove_na_arraylike + +from pandas import ( + DataFrame, + MultiIndex, + Series, + concat, +) + + +def create_iter_data_given_by( + data: DataFrame, kind: str = "hist" +) -> Dict[str, DataFrame | Series]: + """ + Create data for iteration given `by` is assigned or not, and it is only + used in both hist and boxplot. + + If `by` is assigned, return a dictionary of DataFrames in which the key of + dictionary is the values in groups. + If `by` is not assigned, return input as is, and this preserves current + status of iter_data. + + Parameters + ---------- + data : reformatted grouped data from `_compute_plot_data` method. + kind : str, plot kind. This function is only used for `hist` and `box` plots. + + Returns + ------- + iter_data : DataFrame or Dictionary of DataFrames + + Examples + -------- + If `by` is assigned: + + >>> import numpy as np + >>> tuples = [('h1', 'a'), ('h1', 'b'), ('h2', 'a'), ('h2', 'b')] + >>> mi = MultiIndex.from_tuples(tuples) + >>> value = [[1, 3, np.nan, np.nan], + ... [3, 4, np.nan, np.nan], [np.nan, np.nan, 5, 6]] + >>> data = DataFrame(value, columns=mi) + >>> create_iter_data_given_by(data) + {'h1': h1 + a b + 0 1.0 3.0 + 1 3.0 4.0 + 2 NaN NaN, 'h2': h2 + a b + 0 NaN NaN + 1 NaN NaN + 2 5.0 6.0} + """ + + # For `hist` plot, before transformation, the values in level 0 are values + # in groups and subplot titles, and later used for column subselection and + # iteration; For `box` plot, values in level 1 are column names to show, + # and are used for iteration and as subplots titles. + if kind == "hist": + level = 0 + else: + level = 1 + + # Select sub-columns based on the value of level of MI, and if `by` is + # assigned, data must be a MI DataFrame + assert isinstance(data.columns, MultiIndex) + return { + col: data.loc[:, data.columns.get_level_values(level) == col] + for col in data.columns.levels[level] + } + + +def reconstruct_data_with_by( + data: DataFrame, by: IndexLabel, cols: IndexLabel +) -> DataFrame: + """ + Internal function to group data, and reassign multiindex column names onto the + result in order to let grouped data be used in _compute_plot_data method. + + Parameters + ---------- + data : Original DataFrame to plot + by : grouped `by` parameter selected by users + cols : columns of data set (excluding columns used in `by`) + + Returns + ------- + Output is the reconstructed DataFrame with MultiIndex columns. The first level + of MI is unique values of groups, and second level of MI is the columns + selected by users. + + Examples + -------- + >>> d = {'h': ['h1', 'h1', 'h2'], 'a': [1, 3, 5], 'b': [3, 4, 6]} + >>> df = DataFrame(d) + >>> reconstruct_data_with_by(df, by='h', cols=['a', 'b']) + h1 h2 + a b a b + 0 1.0 3.0 NaN NaN + 1 3.0 4.0 NaN NaN + 2 NaN NaN 5.0 6.0 + """ + grouped = data.groupby(by) + + data_list = [] + for key, group in grouped: + columns = MultiIndex.from_product([[key], cols]) + sub_group = group[cols] + sub_group.columns = columns + data_list.append(sub_group) + + data = concat(data_list, axis=1) + return data + + +def reformat_hist_y_given_by( + y: Series | np.ndarray, by: IndexLabel | None +) -> Series | np.ndarray: + """Internal function to reformat y given `by` is applied or not for hist plot. + + If by is None, input y is 1-d with NaN removed; and if by is not None, groupby + will take place and input y is multi-dimensional array. + """ + if by is not None and len(y.shape) > 1: + return np.array([remove_na_arraylike(col) for col in y.T]).T + return remove_na_arraylike(y) diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/hist.py b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/hist.py new file mode 100644 index 0000000..08cffbf --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/hist.py @@ -0,0 +1,512 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +from pandas.core.dtypes.common import ( + is_integer, + is_list_like, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, +) +from pandas.core.dtypes.missing import ( + isna, + remove_na_arraylike, +) + +from pandas.core.frame import DataFrame + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.core import ( + LinePlot, + MPLPlot, +) +from pandas.plotting._matplotlib.groupby import ( + create_iter_data_given_by, + reformat_hist_y_given_by, +) +from pandas.plotting._matplotlib.tools import ( + create_subplots, + flatten_axes, + maybe_adjust_figure, + set_ticks_props, +) + +if TYPE_CHECKING: + from matplotlib.axes import Axes + + +class HistPlot(LinePlot): + _kind = "hist" + + def __init__(self, data, bins=10, bottom=0, **kwargs): + self.bins = bins # use mpl default + self.bottom = bottom + # Do not call LinePlot.__init__ which may fill nan + MPLPlot.__init__(self, data, **kwargs) + + def _args_adjust(self): + + # calculate bin number separately in different subplots + # where subplots are created based on by argument + if is_integer(self.bins): + if self.by is not None: + grouped = self.data.groupby(self.by)[self.columns] + self.bins = [self._calculate_bins(group) for key, group in grouped] + else: + self.bins = self._calculate_bins(self.data) + + if is_list_like(self.bottom): + self.bottom = np.array(self.bottom) + + def _calculate_bins(self, data: DataFrame) -> np.ndarray: + """Calculate bins given data""" + values = data._convert(datetime=True)._get_numeric_data() + values = np.ravel(values) + values = values[~isna(values)] + + hist, bins = np.histogram( + values, bins=self.bins, range=self.kwds.get("range", None) + ) + return bins + + @classmethod + def _plot( + cls, + ax, + y, + style=None, + bins=None, + bottom=0, + column_num=0, + stacking_id=None, + **kwds, + ): + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(bins) - 1) + + base = np.zeros(len(bins) - 1) + bottom = bottom + cls._get_stacked_values(ax, stacking_id, base, kwds["label"]) + # ignore style + n, bins, patches = ax.hist(y, bins=bins, bottom=bottom, **kwds) + cls._update_stacker(ax, stacking_id, n) + return patches + + def _make_plot(self): + colors = self._get_colors() + stacking_id = self._get_stacking_id() + + # Re-create iterated data if `by` is assigned by users + data = ( + create_iter_data_given_by(self.data, self._kind) + if self.by is not None + else self.data + ) + + for i, (label, y) in enumerate(self._iter_data(data=data)): + ax = self._get_ax(i) + + kwds = self.kwds.copy() + + label = pprint_thing(label) + label = self._mark_right_label(label, index=i) + kwds["label"] = label + + style, kwds = self._apply_style_colors(colors, kwds, i, label) + if style is not None: + kwds["style"] = style + + kwds = self._make_plot_keywords(kwds, y) + + # the bins is multi-dimension array now and each plot need only 1-d and + # when by is applied, label should be columns that are grouped + if self.by is not None: + kwds["bins"] = kwds["bins"][i] + kwds["label"] = self.columns + kwds.pop("color") + + y = reformat_hist_y_given_by(y, self.by) + + # We allow weights to be a multi-dimensional array, e.g. a (10, 2) array, + # and each sub-array (10,) will be called in each iteration. If users only + # provide 1D array, we assume the same weights is used for all iterations + weights = kwds.get("weights", None) + if weights is not None and np.ndim(weights) != 1: + kwds["weights"] = weights[:, i] + + artists = self._plot(ax, y, column_num=i, stacking_id=stacking_id, **kwds) + + # when by is applied, show title for subplots to know which group it is + if self.by is not None: + ax.set_title(pprint_thing(label)) + + self._append_legend_handles_labels(artists[0], label) + + def _make_plot_keywords(self, kwds, y): + """merge BoxPlot/KdePlot properties to passed kwds""" + # y is required for KdePlot + kwds["bottom"] = self.bottom + kwds["bins"] = self.bins + return kwds + + def _post_plot_logic(self, ax: Axes, data): + if self.orientation == "horizontal": + ax.set_xlabel("Frequency") + else: + ax.set_ylabel("Frequency") + + @property + def orientation(self): + if self.kwds.get("orientation", None) == "horizontal": + return "horizontal" + else: + return "vertical" + + +class KdePlot(HistPlot): + _kind = "kde" + orientation = "vertical" + + def __init__(self, data, bw_method=None, ind=None, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + self.bw_method = bw_method + self.ind = ind + + def _args_adjust(self): + pass + + def _get_ind(self, y): + if self.ind is None: + # np.nanmax() and np.nanmin() ignores the missing values + sample_range = np.nanmax(y) - np.nanmin(y) + ind = np.linspace( + np.nanmin(y) - 0.5 * sample_range, + np.nanmax(y) + 0.5 * sample_range, + 1000, + ) + elif is_integer(self.ind): + sample_range = np.nanmax(y) - np.nanmin(y) + ind = np.linspace( + np.nanmin(y) - 0.5 * sample_range, + np.nanmax(y) + 0.5 * sample_range, + self.ind, + ) + else: + ind = self.ind + return ind + + @classmethod + def _plot( + cls, + ax, + y, + style=None, + bw_method=None, + ind=None, + column_num=None, + stacking_id=None, + **kwds, + ): + from scipy.stats import gaussian_kde + + y = remove_na_arraylike(y) + gkde = gaussian_kde(y, bw_method=bw_method) + + y = gkde.evaluate(ind) + lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) + return lines + + def _make_plot_keywords(self, kwds, y): + kwds["bw_method"] = self.bw_method + kwds["ind"] = self._get_ind(y) + return kwds + + def _post_plot_logic(self, ax, data): + ax.set_ylabel("Density") + + +def _grouped_plot( + plotf, + data, + column=None, + by=None, + numeric_only=True, + figsize=None, + sharex=True, + sharey=True, + layout=None, + rot=0, + ax=None, + **kwargs, +): + + if figsize == "default": + # allowed to specify mpl default with 'default' + raise ValueError( + "figsize='default' is no longer supported. " + "Specify figure size by tuple instead" + ) + + grouped = data.groupby(by) + if column is not None: + grouped = grouped[column] + + naxes = len(grouped) + fig, axes = create_subplots( + naxes=naxes, figsize=figsize, sharex=sharex, sharey=sharey, ax=ax, layout=layout + ) + + _axes = flatten_axes(axes) + + for i, (key, group) in enumerate(grouped): + ax = _axes[i] + if numeric_only and isinstance(group, ABCDataFrame): + group = group._get_numeric_data() + plotf(group, ax, **kwargs) + ax.set_title(pprint_thing(key)) + + return fig, axes + + +def _grouped_hist( + data, + column=None, + by=None, + ax=None, + bins=50, + figsize=None, + layout=None, + sharex=False, + sharey=False, + rot=90, + grid=True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + legend=False, + **kwargs, +): + """ + Grouped histogram + + Parameters + ---------- + data : Series/DataFrame + column : object, optional + by : object, optional + ax : axes, optional + bins : int, default 50 + figsize : tuple, optional + layout : optional + sharex : bool, default False + sharey : bool, default False + rot : int, default 90 + grid : bool, default True + legend: : bool, default False + kwargs : dict, keyword arguments passed to matplotlib.Axes.hist + + Returns + ------- + collection of Matplotlib Axes + """ + if legend: + assert "label" not in kwargs + if data.ndim == 1: + kwargs["label"] = data.name + elif column is None: + kwargs["label"] = data.columns + else: + kwargs["label"] = column + + def plot_group(group, ax): + ax.hist(group.dropna().values, bins=bins, **kwargs) + if legend: + ax.legend() + + if xrot is None: + xrot = rot + + fig, axes = _grouped_plot( + plot_group, + data, + column=column, + by=by, + sharex=sharex, + sharey=sharey, + ax=ax, + figsize=figsize, + layout=layout, + rot=rot, + ) + + set_ticks_props( + axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot + ) + + maybe_adjust_figure( + fig, bottom=0.15, top=0.9, left=0.1, right=0.9, hspace=0.5, wspace=0.3 + ) + return axes + + +def hist_series( + self, + by=None, + ax=None, + grid=True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + figsize=None, + bins=10, + legend: bool = False, + **kwds, +): + import matplotlib.pyplot as plt + + if legend and "label" in kwds: + raise ValueError("Cannot use both legend and label") + + if by is None: + if kwds.get("layout", None) is not None: + raise ValueError("The 'layout' keyword is not supported when 'by' is None") + # hack until the plotting interface is a bit more unified + fig = kwds.pop( + "figure", plt.gcf() if plt.get_fignums() else plt.figure(figsize=figsize) + ) + if figsize is not None and tuple(figsize) != tuple(fig.get_size_inches()): + fig.set_size_inches(*figsize, forward=True) + if ax is None: + ax = fig.gca() + elif ax.get_figure() != fig: + raise AssertionError("passed axis not bound to passed figure") + values = self.dropna().values + if legend: + kwds["label"] = self.name + ax.hist(values, bins=bins, **kwds) + if legend: + ax.legend() + ax.grid(grid) + axes = np.array([ax]) + + set_ticks_props( + axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot + ) + + else: + if "figure" in kwds: + raise ValueError( + "Cannot pass 'figure' when using the " + "'by' argument, since a new 'Figure' instance will be created" + ) + axes = _grouped_hist( + self, + by=by, + ax=ax, + grid=grid, + figsize=figsize, + bins=bins, + xlabelsize=xlabelsize, + xrot=xrot, + ylabelsize=ylabelsize, + yrot=yrot, + legend=legend, + **kwds, + ) + + if hasattr(axes, "ndim"): + if axes.ndim == 1 and len(axes) == 1: + return axes[0] + return axes + + +def hist_frame( + data, + column=None, + by=None, + grid=True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + ax=None, + sharex=False, + sharey=False, + figsize=None, + layout=None, + bins=10, + legend: bool = False, + **kwds, +): + if legend and "label" in kwds: + raise ValueError("Cannot use both legend and label") + if by is not None: + axes = _grouped_hist( + data, + column=column, + by=by, + ax=ax, + grid=grid, + figsize=figsize, + sharex=sharex, + sharey=sharey, + layout=layout, + bins=bins, + xlabelsize=xlabelsize, + xrot=xrot, + ylabelsize=ylabelsize, + yrot=yrot, + legend=legend, + **kwds, + ) + return axes + + if column is not None: + if not isinstance(column, (list, np.ndarray, ABCIndex)): + column = [column] + data = data[column] + # GH32590 + data = data.select_dtypes( + include=(np.number, "datetime64", "datetimetz"), exclude="timedelta" + ) + naxes = len(data.columns) + + if naxes == 0: + raise ValueError( + "hist method requires numerical or datetime columns, nothing to plot." + ) + + fig, axes = create_subplots( + naxes=naxes, + ax=ax, + squeeze=False, + sharex=sharex, + sharey=sharey, + figsize=figsize, + layout=layout, + ) + _axes = flatten_axes(axes) + + can_set_label = "label" not in kwds + + for i, col in enumerate(data.columns): + ax = _axes[i] + if legend and can_set_label: + kwds["label"] = col + ax.hist(data[col].dropna().values, bins=bins, **kwds) + ax.set_title(col) + ax.grid(grid) + if legend: + ax.legend() + + set_ticks_props( + axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot + ) + maybe_adjust_figure(fig, wspace=0.3, hspace=0.3) + + return axes diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/misc.py b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/misc.py new file mode 100644 index 0000000..6583328 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/misc.py @@ -0,0 +1,475 @@ +from __future__ import annotations + +import random +from typing import ( + TYPE_CHECKING, + Hashable, +) + +import matplotlib.lines as mlines +import matplotlib.patches as patches +import numpy as np + +from pandas.core.dtypes.missing import notna + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.style import get_standard_colors +from pandas.plotting._matplotlib.tools import ( + create_subplots, + do_adjust_figure, + maybe_adjust_figure, + set_ticks_props, +) + +if TYPE_CHECKING: + from matplotlib.axes import Axes + from matplotlib.figure import Figure + + from pandas import ( + DataFrame, + Series, + ) + + +def scatter_matrix( + frame: DataFrame, + alpha=0.5, + figsize=None, + ax=None, + grid=False, + diagonal="hist", + marker=".", + density_kwds=None, + hist_kwds=None, + range_padding=0.05, + **kwds, +): + df = frame._get_numeric_data() + n = df.columns.size + naxes = n * n + fig, axes = create_subplots(naxes=naxes, figsize=figsize, ax=ax, squeeze=False) + + # no gaps between subplots + maybe_adjust_figure(fig, wspace=0, hspace=0) + + mask = notna(df) + + marker = _get_marker_compat(marker) + + hist_kwds = hist_kwds or {} + density_kwds = density_kwds or {} + + # GH 14855 + kwds.setdefault("edgecolors", "none") + + boundaries_list = [] + for a in df.columns: + values = df[a].values[mask[a].values] + rmin_, rmax_ = np.min(values), np.max(values) + rdelta_ext = (rmax_ - rmin_) * range_padding / 2 + boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) + + for i, a in enumerate(df.columns): + for j, b in enumerate(df.columns): + ax = axes[i, j] + + if i == j: + values = df[a].values[mask[a].values] + + # Deal with the diagonal by drawing a histogram there. + if diagonal == "hist": + ax.hist(values, **hist_kwds) + + elif diagonal in ("kde", "density"): + from scipy.stats import gaussian_kde + + y = values + gkde = gaussian_kde(y) + ind = np.linspace(y.min(), y.max(), 1000) + ax.plot(ind, gkde.evaluate(ind), **density_kwds) + + ax.set_xlim(boundaries_list[i]) + + else: + common = (mask[a] & mask[b]).values + + ax.scatter( + df[b][common], df[a][common], marker=marker, alpha=alpha, **kwds + ) + + ax.set_xlim(boundaries_list[j]) + ax.set_ylim(boundaries_list[i]) + + ax.set_xlabel(b) + ax.set_ylabel(a) + + if j != 0: + ax.yaxis.set_visible(False) + if i != n - 1: + ax.xaxis.set_visible(False) + + if len(df.columns) > 1: + lim1 = boundaries_list[0] + locs = axes[0][1].yaxis.get_majorticklocs() + locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] + adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) + + lim0 = axes[0][0].get_ylim() + adj = adj * (lim0[1] - lim0[0]) + lim0[0] + axes[0][0].yaxis.set_ticks(adj) + + if np.all(locs == locs.astype(int)): + # if all ticks are int + locs = locs.astype(int) + axes[0][0].yaxis.set_ticklabels(locs) + + set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + + return axes + + +def _get_marker_compat(marker): + if marker not in mlines.lineMarkers: + return "o" + return marker + + +def radviz( + frame: DataFrame, + class_column, + ax: Axes | None = None, + color=None, + colormap=None, + **kwds, +) -> Axes: + import matplotlib.pyplot as plt + + def normalize(series): + a = min(series) + b = max(series) + return (series - a) / (b - a) + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + df = frame.drop(class_column, axis=1).apply(normalize) + + if ax is None: + ax = plt.gca() + ax.set_xlim(-1, 1) + ax.set_ylim(-1, 1) + + to_plot: dict[Hashable, list[list]] = {} + colors = get_standard_colors( + num_colors=len(classes), colormap=colormap, color_type="random", color=color + ) + + for kls in classes: + to_plot[kls] = [[], []] + + m = len(frame.columns) - 1 + s = np.array( + [(np.cos(t), np.sin(t)) for t in [2 * np.pi * (i / m) for i in range(m)]] + ) + + for i in range(n): + row = df.iloc[i].values + row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) + y = (s * row_).sum(axis=0) / row.sum() + kls = class_col.iat[i] + to_plot[kls][0].append(y[0]) + to_plot[kls][1].append(y[1]) + + for i, kls in enumerate(classes): + ax.scatter( + to_plot[kls][0], + to_plot[kls][1], + color=colors[i], + label=pprint_thing(kls), + **kwds, + ) + ax.legend() + + ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor="none")) + + for xy, name in zip(s, df.columns): + + ax.add_patch(patches.Circle(xy, radius=0.025, facecolor="gray")) + + if xy[0] < 0.0 and xy[1] < 0.0: + ax.text( + xy[0] - 0.025, xy[1] - 0.025, name, ha="right", va="top", size="small" + ) + elif xy[0] < 0.0 and xy[1] >= 0.0: + ax.text( + xy[0] - 0.025, + xy[1] + 0.025, + name, + ha="right", + va="bottom", + size="small", + ) + elif xy[0] >= 0.0 and xy[1] < 0.0: + ax.text( + xy[0] + 0.025, xy[1] - 0.025, name, ha="left", va="top", size="small" + ) + elif xy[0] >= 0.0 and xy[1] >= 0.0: + ax.text( + xy[0] + 0.025, xy[1] + 0.025, name, ha="left", va="bottom", size="small" + ) + + ax.axis("equal") + return ax + + +def andrews_curves( + frame: DataFrame, + class_column, + ax: Axes | None = None, + samples: int = 200, + color=None, + colormap=None, + **kwds, +) -> Axes: + import matplotlib.pyplot as plt + + def function(amplitudes): + def f(t): + x1 = amplitudes[0] + result = x1 / np.sqrt(2.0) + + # Take the rest of the coefficients and resize them + # appropriately. Take a copy of amplitudes as otherwise numpy + # deletes the element from amplitudes itself. + coeffs = np.delete(np.copy(amplitudes), 0) + coeffs = np.resize(coeffs, (int((coeffs.size + 1) / 2), 2)) + + # Generate the harmonics and arguments for the sin and cos + # functions. + harmonics = np.arange(0, coeffs.shape[0]) + 1 + trig_args = np.outer(harmonics, t) + + result += np.sum( + coeffs[:, 0, np.newaxis] * np.sin(trig_args) + + coeffs[:, 1, np.newaxis] * np.cos(trig_args), + axis=0, + ) + return result + + return f + + n = len(frame) + class_col = frame[class_column] + classes = frame[class_column].drop_duplicates() + df = frame.drop(class_column, axis=1) + t = np.linspace(-np.pi, np.pi, samples) + used_legends: set[str] = set() + + color_values = get_standard_colors( + num_colors=len(classes), colormap=colormap, color_type="random", color=color + ) + colors = dict(zip(classes, color_values)) + if ax is None: + ax = plt.gca() + ax.set_xlim(-np.pi, np.pi) + for i in range(n): + row = df.iloc[i].values + f = function(row) + y = f(t) + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(t, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(t, y, color=colors[kls], **kwds) + + ax.legend(loc="upper right") + ax.grid() + return ax + + +def bootstrap_plot( + series: Series, + fig: Figure | None = None, + size: int = 50, + samples: int = 500, + **kwds, +) -> Figure: + + import matplotlib.pyplot as plt + + # TODO: is the failure mentioned below still relevant? + # random.sample(ndarray, int) fails on python 3.3, sigh + data = list(series.values) + samplings = [random.sample(data, size) for _ in range(samples)] + + means = np.array([np.mean(sampling) for sampling in samplings]) + medians = np.array([np.median(sampling) for sampling in samplings]) + midranges = np.array( + [(min(sampling) + max(sampling)) * 0.5 for sampling in samplings] + ) + if fig is None: + fig = plt.figure() + x = list(range(samples)) + axes = [] + ax1 = fig.add_subplot(2, 3, 1) + ax1.set_xlabel("Sample") + axes.append(ax1) + ax1.plot(x, means, **kwds) + ax2 = fig.add_subplot(2, 3, 2) + ax2.set_xlabel("Sample") + axes.append(ax2) + ax2.plot(x, medians, **kwds) + ax3 = fig.add_subplot(2, 3, 3) + ax3.set_xlabel("Sample") + axes.append(ax3) + ax3.plot(x, midranges, **kwds) + ax4 = fig.add_subplot(2, 3, 4) + ax4.set_xlabel("Mean") + axes.append(ax4) + ax4.hist(means, **kwds) + ax5 = fig.add_subplot(2, 3, 5) + ax5.set_xlabel("Median") + axes.append(ax5) + ax5.hist(medians, **kwds) + ax6 = fig.add_subplot(2, 3, 6) + ax6.set_xlabel("Midrange") + axes.append(ax6) + ax6.hist(midranges, **kwds) + for axis in axes: + plt.setp(axis.get_xticklabels(), fontsize=8) + plt.setp(axis.get_yticklabels(), fontsize=8) + if do_adjust_figure(fig): + plt.tight_layout() + return fig + + +def parallel_coordinates( + frame: DataFrame, + class_column, + cols=None, + ax: Axes | None = None, + color=None, + use_columns=False, + xticks=None, + colormap=None, + axvlines: bool = True, + axvlines_kwds=None, + sort_labels: bool = False, + **kwds, +) -> Axes: + import matplotlib.pyplot as plt + + if axvlines_kwds is None: + axvlines_kwds = {"linewidth": 1, "color": "black"} + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + + if cols is None: + df = frame.drop(class_column, axis=1) + else: + df = frame[cols] + + used_legends: set[str] = set() + + ncols = len(df.columns) + + # determine values to use for xticks + if use_columns is True: + if not np.all(np.isreal(list(df.columns))): + raise ValueError("Columns must be numeric to be used as xticks") + x = df.columns + elif xticks is not None: + if not np.all(np.isreal(xticks)): + raise ValueError("xticks specified must be numeric") + elif len(xticks) != ncols: + raise ValueError("Length of xticks must match number of columns") + x = xticks + else: + x = list(range(ncols)) + + if ax is None: + ax = plt.gca() + + color_values = get_standard_colors( + num_colors=len(classes), colormap=colormap, color_type="random", color=color + ) + + if sort_labels: + classes = sorted(classes) + color_values = sorted(color_values) + colors = dict(zip(classes, color_values)) + + for i in range(n): + y = df.iloc[i].values + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(x, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(x, y, color=colors[kls], **kwds) + + if axvlines: + for i in x: + ax.axvline(i, **axvlines_kwds) + + ax.set_xticks(x) + ax.set_xticklabels(df.columns) + ax.set_xlim(x[0], x[-1]) + ax.legend(loc="upper right") + ax.grid() + return ax + + +def lag_plot(series: Series, lag: int = 1, ax: Axes | None = None, **kwds) -> Axes: + # workaround because `c='b'` is hardcoded in matplotlib's scatter method + import matplotlib.pyplot as plt + + kwds.setdefault("c", plt.rcParams["patch.facecolor"]) + + data = series.values + y1 = data[:-lag] + y2 = data[lag:] + if ax is None: + ax = plt.gca() + ax.set_xlabel("y(t)") + ax.set_ylabel(f"y(t + {lag})") + ax.scatter(y1, y2, **kwds) + return ax + + +def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwds) -> Axes: + import matplotlib.pyplot as plt + + n = len(series) + data = np.asarray(series) + if ax is None: + ax = plt.gca() + ax.set_xlim(1, n) + ax.set_ylim(-1.0, 1.0) + mean = np.mean(data) + c0 = np.sum((data - mean) ** 2) / n + + def r(h): + return ((data[: n - h] - mean) * (data[h:] - mean)).sum() / n / c0 + + x = np.arange(n) + 1 + y = [r(loc) for loc in x] + z95 = 1.959963984540054 + z99 = 2.5758293035489004 + ax.axhline(y=z99 / np.sqrt(n), linestyle="--", color="grey") + ax.axhline(y=z95 / np.sqrt(n), color="grey") + ax.axhline(y=0.0, color="black") + ax.axhline(y=-z95 / np.sqrt(n), color="grey") + ax.axhline(y=-z99 / np.sqrt(n), linestyle="--", color="grey") + ax.set_xlabel("Lag") + ax.set_ylabel("Autocorrelation") + ax.plot(x, y, **kwds) + if "label" in kwds: + ax.legend() + ax.grid() + return ax diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/style.py b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/style.py new file mode 100644 index 0000000..597c0da --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/style.py @@ -0,0 +1,275 @@ +from __future__ import annotations + +import itertools +from typing import ( + TYPE_CHECKING, + Collection, + Iterator, + Sequence, + Union, + cast, +) +import warnings + +import matplotlib.cm as cm +import matplotlib.colors +import numpy as np + +from pandas.core.dtypes.common import is_list_like + +import pandas.core.common as com + +if TYPE_CHECKING: + from matplotlib.colors import Colormap + + +Color = Union[str, Sequence[float]] + + +def get_standard_colors( + num_colors: int, + colormap: Colormap | None = None, + color_type: str = "default", + color: dict[str, Color] | Color | Collection[Color] | None = None, +): + """ + Get standard colors based on `colormap`, `color_type` or `color` inputs. + + Parameters + ---------- + num_colors : int + Minimum number of colors to be returned. + Ignored if `color` is a dictionary. + colormap : :py:class:`matplotlib.colors.Colormap`, optional + Matplotlib colormap. + When provided, the resulting colors will be derived from the colormap. + color_type : {"default", "random"}, optional + Type of colors to derive. Used if provided `color` and `colormap` are None. + Ignored if either `color` or `colormap` are not None. + color : dict or str or sequence, optional + Color(s) to be used for deriving sequence of colors. + Can be either be a dictionary, or a single color (single color string, + or sequence of floats representing a single color), + or a sequence of colors. + + Returns + ------- + dict or list + Standard colors. Can either be a mapping if `color` was a dictionary, + or a list of colors with a length of `num_colors` or more. + + Warns + ----- + UserWarning + If both `colormap` and `color` are provided. + Parameter `color` will override. + """ + if isinstance(color, dict): + return color + + colors = _derive_colors( + color=color, + colormap=colormap, + color_type=color_type, + num_colors=num_colors, + ) + + return list(_cycle_colors(colors, num_colors=num_colors)) + + +def _derive_colors( + *, + color: Color | Collection[Color] | None, + colormap: str | Colormap | None, + color_type: str, + num_colors: int, +) -> list[Color]: + """ + Derive colors from either `colormap`, `color_type` or `color` inputs. + + Get a list of colors either from `colormap`, or from `color`, + or from `color_type` (if both `colormap` and `color` are None). + + Parameters + ---------- + color : str or sequence, optional + Color(s) to be used for deriving sequence of colors. + Can be either be a single color (single color string, or sequence of floats + representing a single color), or a sequence of colors. + colormap : :py:class:`matplotlib.colors.Colormap`, optional + Matplotlib colormap. + When provided, the resulting colors will be derived from the colormap. + color_type : {"default", "random"}, optional + Type of colors to derive. Used if provided `color` and `colormap` are None. + Ignored if either `color` or `colormap`` are not None. + num_colors : int + Number of colors to be extracted. + + Returns + ------- + list + List of colors extracted. + + Warns + ----- + UserWarning + If both `colormap` and `color` are provided. + Parameter `color` will override. + """ + if color is None and colormap is not None: + return _get_colors_from_colormap(colormap, num_colors=num_colors) + elif color is not None: + if colormap is not None: + warnings.warn( + "'color' and 'colormap' cannot be used simultaneously. Using 'color'" + ) + return _get_colors_from_color(color) + else: + return _get_colors_from_color_type(color_type, num_colors=num_colors) + + +def _cycle_colors(colors: list[Color], num_colors: int) -> Iterator[Color]: + """Cycle colors until achieving max of `num_colors` or length of `colors`. + + Extra colors will be ignored by matplotlib if there are more colors + than needed and nothing needs to be done here. + """ + max_colors = max(num_colors, len(colors)) + yield from itertools.islice(itertools.cycle(colors), max_colors) + + +def _get_colors_from_colormap( + colormap: str | Colormap, + num_colors: int, +) -> list[Color]: + """Get colors from colormap.""" + colormap = _get_cmap_instance(colormap) + return [colormap(num) for num in np.linspace(0, 1, num=num_colors)] + + +def _get_cmap_instance(colormap: str | Colormap) -> Colormap: + """Get instance of matplotlib colormap.""" + if isinstance(colormap, str): + cmap = colormap + colormap = cm.get_cmap(colormap) + if colormap is None: + raise ValueError(f"Colormap {cmap} is not recognized") + return colormap + + +def _get_colors_from_color( + color: Color | Collection[Color], +) -> list[Color]: + """Get colors from user input color.""" + if len(color) == 0: + raise ValueError(f"Invalid color argument: {color}") + + if _is_single_color(color): + color = cast(Color, color) + return [color] + + color = cast(Collection[Color], color) + return list(_gen_list_of_colors_from_iterable(color)) + + +def _is_single_color(color: Color | Collection[Color]) -> bool: + """Check if `color` is a single color, not a sequence of colors. + + Single color is of these kinds: + - Named color "red", "C0", "firebrick" + - Alias "g" + - Sequence of floats, such as (0.1, 0.2, 0.3) or (0.1, 0.2, 0.3, 0.4). + + See Also + -------- + _is_single_string_color + """ + if isinstance(color, str) and _is_single_string_color(color): + # GH #36972 + return True + + if _is_floats_color(color): + return True + + return False + + +def _gen_list_of_colors_from_iterable(color: Collection[Color]) -> Iterator[Color]: + """ + Yield colors from string of several letters or from collection of colors. + """ + for x in color: + if _is_single_color(x): + yield x + else: + raise ValueError(f"Invalid color {x}") + + +def _is_floats_color(color: Color | Collection[Color]) -> bool: + """Check if color comprises a sequence of floats representing color.""" + return bool( + is_list_like(color) + and (len(color) == 3 or len(color) == 4) + and all(isinstance(x, (int, float)) for x in color) + ) + + +def _get_colors_from_color_type(color_type: str, num_colors: int) -> list[Color]: + """Get colors from user input color type.""" + if color_type == "default": + return _get_default_colors(num_colors) + elif color_type == "random": + return _get_random_colors(num_colors) + else: + raise ValueError("color_type must be either 'default' or 'random'") + + +def _get_default_colors(num_colors: int) -> list[Color]: + """Get `num_colors` of default colors from matplotlib rc params.""" + import matplotlib.pyplot as plt + + colors = [c["color"] for c in plt.rcParams["axes.prop_cycle"]] + return colors[0:num_colors] + + +def _get_random_colors(num_colors: int) -> list[Color]: + """Get `num_colors` of random colors.""" + return [_random_color(num) for num in range(num_colors)] + + +def _random_color(column: int) -> list[float]: + """Get a random color represented as a list of length 3""" + # GH17525 use common._random_state to avoid resetting the seed + rs = com.random_state(column) + return rs.rand(3).tolist() + + +def _is_single_string_color(color: Color) -> bool: + """Check if `color` is a single string color. + + Examples of single string colors: + - 'r' + - 'g' + - 'red' + - 'green' + - 'C3' + - 'firebrick' + + Parameters + ---------- + color : Color + Color string or sequence of floats. + + Returns + ------- + bool + True if `color` looks like a valid color. + False otherwise. + """ + conv = matplotlib.colors.ColorConverter() + try: + conv.to_rgba(color) + except ValueError: + return False + else: + return True diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/timeseries.py b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/timeseries.py new file mode 100644 index 0000000..3cd312b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/timeseries.py @@ -0,0 +1,333 @@ +# TODO: Use the fact that axis can have units to simplify the process + +from __future__ import annotations + +import functools +from typing import ( + TYPE_CHECKING, + cast, +) + +import numpy as np + +from pandas._libs.tslibs import ( + BaseOffset, + Period, + to_offset, +) +from pandas._libs.tslibs.dtypes import FreqGroup + +from pandas.core.dtypes.generic import ( + ABCDatetimeIndex, + ABCPeriodIndex, + ABCTimedeltaIndex, +) + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.converter import ( + TimeSeries_DateFormatter, + TimeSeries_DateLocator, + TimeSeries_TimedeltaFormatter, +) +from pandas.tseries.frequencies import ( + get_period_alias, + is_subperiod, + is_superperiod, +) + +if TYPE_CHECKING: + from matplotlib.axes import Axes + + from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Series, + ) + +# --------------------------------------------------------------------- +# Plotting functions and monkey patches + + +def maybe_resample(series: Series, ax: Axes, kwargs): + # resample against axes freq if necessary + freq, ax_freq = _get_freq(ax, series) + + if freq is None: # pragma: no cover + raise ValueError("Cannot use dynamic axis without frequency info") + + # Convert DatetimeIndex to PeriodIndex + if isinstance(series.index, ABCDatetimeIndex): + series = series.to_period(freq=freq) + + if ax_freq is not None and freq != ax_freq: + if is_superperiod(freq, ax_freq): # upsample input + series = series.copy() + # error: "Index" has no attribute "asfreq" + series.index = series.index.asfreq( # type: ignore[attr-defined] + ax_freq, how="s" + ) + freq = ax_freq + elif _is_sup(freq, ax_freq): # one is weekly + how = kwargs.pop("how", "last") + series = getattr(series.resample("D"), how)().dropna() + series = getattr(series.resample(ax_freq), how)().dropna() + freq = ax_freq + elif is_subperiod(freq, ax_freq) or _is_sub(freq, ax_freq): + _upsample_others(ax, freq, kwargs) + else: # pragma: no cover + raise ValueError("Incompatible frequency conversion") + return freq, series + + +def _is_sub(f1: str, f2: str) -> bool: + return (f1.startswith("W") and is_subperiod("D", f2)) or ( + f2.startswith("W") and is_subperiod(f1, "D") + ) + + +def _is_sup(f1: str, f2: str) -> bool: + return (f1.startswith("W") and is_superperiod("D", f2)) or ( + f2.startswith("W") and is_superperiod(f1, "D") + ) + + +def _upsample_others(ax: Axes, freq, kwargs): + legend = ax.get_legend() + lines, labels = _replot_ax(ax, freq, kwargs) + _replot_ax(ax, freq, kwargs) + + other_ax = None + if hasattr(ax, "left_ax"): + other_ax = ax.left_ax + if hasattr(ax, "right_ax"): + other_ax = ax.right_ax + + if other_ax is not None: + rlines, rlabels = _replot_ax(other_ax, freq, kwargs) + lines.extend(rlines) + labels.extend(rlabels) + + if legend is not None and kwargs.get("legend", True) and len(lines) > 0: + title = legend.get_title().get_text() + if title == "None": + title = None + ax.legend(lines, labels, loc="best", title=title) + + +def _replot_ax(ax: Axes, freq, kwargs): + data = getattr(ax, "_plot_data", None) + + # clear current axes and data + ax._plot_data = [] + ax.clear() + + decorate_axes(ax, freq, kwargs) + + lines = [] + labels = [] + if data is not None: + for series, plotf, kwds in data: + series = series.copy() + idx = series.index.asfreq(freq, how="S") + series.index = idx + ax._plot_data.append((series, plotf, kwds)) + + # for tsplot + if isinstance(plotf, str): + from pandas.plotting._matplotlib import PLOT_CLASSES + + plotf = PLOT_CLASSES[plotf]._plot + + lines.append(plotf(ax, series.index._mpl_repr(), series.values, **kwds)[0]) + labels.append(pprint_thing(series.name)) + + return lines, labels + + +def decorate_axes(ax: Axes, freq, kwargs): + """Initialize axes for time-series plotting""" + if not hasattr(ax, "_plot_data"): + ax._plot_data = [] + + ax.freq = freq + xaxis = ax.get_xaxis() + xaxis.freq = freq + if not hasattr(ax, "legendlabels"): + ax.legendlabels = [kwargs.get("label", None)] + else: + ax.legendlabels.append(kwargs.get("label", None)) + ax.view_interval = None + ax.date_axis_info = None + + +def _get_ax_freq(ax: Axes): + """ + Get the freq attribute of the ax object if set. + Also checks shared axes (eg when using secondary yaxis, sharex=True + or twinx) + """ + ax_freq = getattr(ax, "freq", None) + if ax_freq is None: + # check for left/right ax in case of secondary yaxis + if hasattr(ax, "left_ax"): + ax_freq = getattr(ax.left_ax, "freq", None) + elif hasattr(ax, "right_ax"): + ax_freq = getattr(ax.right_ax, "freq", None) + if ax_freq is None: + # check if a shared ax (sharex/twinx) has already freq set + shared_axes = ax.get_shared_x_axes().get_siblings(ax) + if len(shared_axes) > 1: + for shared_ax in shared_axes: + ax_freq = getattr(shared_ax, "freq", None) + if ax_freq is not None: + break + return ax_freq + + +def _get_period_alias(freq) -> str | None: + freqstr = to_offset(freq).rule_code + + freq = get_period_alias(freqstr) + return freq + + +def _get_freq(ax: Axes, series: Series): + # get frequency from data + freq = getattr(series.index, "freq", None) + if freq is None: + freq = getattr(series.index, "inferred_freq", None) + freq = to_offset(freq) + + ax_freq = _get_ax_freq(ax) + + # use axes freq if no data freq + if freq is None: + freq = ax_freq + + # get the period frequency + freq = _get_period_alias(freq) + return freq, ax_freq + + +def use_dynamic_x(ax: Axes, data: DataFrame | Series) -> bool: + freq = _get_index_freq(data.index) + ax_freq = _get_ax_freq(ax) + + if freq is None: # convert irregular if axes has freq info + freq = ax_freq + else: # do not use tsplot if irregular was plotted first + if (ax_freq is None) and (len(ax.get_lines()) > 0): + return False + + if freq is None: + return False + + freq = _get_period_alias(freq) + + if freq is None: + return False + + # FIXME: hack this for 0.10.1, creating more technical debt...sigh + if isinstance(data.index, ABCDatetimeIndex): + base = to_offset(freq)._period_dtype_code + x = data.index + if base <= FreqGroup.FR_DAY.value: + return x[:1].is_normalized + return Period(x[0], freq).to_timestamp().tz_localize(x.tz) == x[0] + return True + + +def _get_index_freq(index: Index) -> BaseOffset | None: + freq = getattr(index, "freq", None) + if freq is None: + freq = getattr(index, "inferred_freq", None) + if freq == "B": + # error: "Index" has no attribute "dayofweek" + weekdays = np.unique(index.dayofweek) # type: ignore[attr-defined] + if (5 in weekdays) or (6 in weekdays): + freq = None + + freq = to_offset(freq) + return freq + + +def maybe_convert_index(ax: Axes, data): + # tsplot converts automatically, but don't want to convert index + # over and over for DataFrames + if isinstance(data.index, (ABCDatetimeIndex, ABCPeriodIndex)): + freq = data.index.freq + + if freq is None: + # We only get here for DatetimeIndex + data.index = cast("DatetimeIndex", data.index) + freq = data.index.inferred_freq + freq = to_offset(freq) + + if freq is None: + freq = _get_ax_freq(ax) + + if freq is None: + raise ValueError("Could not get frequency alias for plotting") + + freq = _get_period_alias(freq) + + if isinstance(data.index, ABCDatetimeIndex): + data = data.tz_localize(None).to_period(freq=freq) + elif isinstance(data.index, ABCPeriodIndex): + data.index = data.index.asfreq(freq=freq) + return data + + +# Patch methods for subplot. Only format_dateaxis is currently used. +# Do we need the rest for convenience? + + +def _format_coord(freq, t, y) -> str: + time_period = Period(ordinal=int(t), freq=freq) + return f"t = {time_period} y = {y:8f}" + + +def format_dateaxis(subplot, freq, index): + """ + Pretty-formats the date axis (x-axis). + + Major and minor ticks are automatically set for the frequency of the + current underlying series. As the dynamic mode is activated by + default, changing the limits of the x axis will intelligently change + the positions of the ticks. + """ + from matplotlib import pylab + + # handle index specific formatting + # Note: DatetimeIndex does not use this + # interface. DatetimeIndex uses matplotlib.date directly + if isinstance(index, ABCPeriodIndex): + + majlocator = TimeSeries_DateLocator( + freq, dynamic_mode=True, minor_locator=False, plot_obj=subplot + ) + minlocator = TimeSeries_DateLocator( + freq, dynamic_mode=True, minor_locator=True, plot_obj=subplot + ) + subplot.xaxis.set_major_locator(majlocator) + subplot.xaxis.set_minor_locator(minlocator) + + majformatter = TimeSeries_DateFormatter( + freq, dynamic_mode=True, minor_locator=False, plot_obj=subplot + ) + minformatter = TimeSeries_DateFormatter( + freq, dynamic_mode=True, minor_locator=True, plot_obj=subplot + ) + subplot.xaxis.set_major_formatter(majformatter) + subplot.xaxis.set_minor_formatter(minformatter) + + # x and y coord info + subplot.format_coord = functools.partial(_format_coord, freq) + + elif isinstance(index, ABCTimedeltaIndex): + subplot.xaxis.set_major_formatter(TimeSeries_TimedeltaFormatter()) + else: + raise TypeError("index type not supported") + + pylab.draw_if_interactive() diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/tools.py b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/tools.py new file mode 100644 index 0000000..5314a61 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/plotting/_matplotlib/tools.py @@ -0,0 +1,491 @@ +# being a bit too dynamic +from __future__ import annotations + +from math import ceil +from typing import ( + TYPE_CHECKING, + Iterable, + Sequence, +) +import warnings + +import matplotlib.table +import matplotlib.ticker as ticker +import numpy as np + +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) + +from pandas.plotting._matplotlib import compat + +if TYPE_CHECKING: + from matplotlib.axes import Axes + from matplotlib.axis import Axis + from matplotlib.figure import Figure + from matplotlib.lines import Line2D + from matplotlib.table import Table + + from pandas import ( + DataFrame, + Series, + ) + + +def do_adjust_figure(fig: Figure): + """Whether fig has constrained_layout enabled.""" + if not hasattr(fig, "get_constrained_layout"): + return False + return not fig.get_constrained_layout() + + +def maybe_adjust_figure(fig: Figure, *args, **kwargs): + """Call fig.subplots_adjust unless fig has constrained_layout enabled.""" + if do_adjust_figure(fig): + fig.subplots_adjust(*args, **kwargs) + + +def format_date_labels(ax: Axes, rot): + # mini version of autofmt_xdate + for label in ax.get_xticklabels(): + label.set_ha("right") + label.set_rotation(rot) + fig = ax.get_figure() + maybe_adjust_figure(fig, bottom=0.2) + + +def table( + ax, data: DataFrame | Series, rowLabels=None, colLabels=None, **kwargs +) -> Table: + if isinstance(data, ABCSeries): + data = data.to_frame() + elif isinstance(data, ABCDataFrame): + pass + else: + raise ValueError("Input data must be DataFrame or Series") + + if rowLabels is None: + rowLabels = data.index + + if colLabels is None: + colLabels = data.columns + + cellText = data.values + + table = matplotlib.table.table( + ax, cellText=cellText, rowLabels=rowLabels, colLabels=colLabels, **kwargs + ) + return table + + +def _get_layout(nplots: int, layout=None, layout_type: str = "box") -> tuple[int, int]: + if layout is not None: + if not isinstance(layout, (tuple, list)) or len(layout) != 2: + raise ValueError("Layout must be a tuple of (rows, columns)") + + nrows, ncols = layout + + if nrows == -1 and ncols > 0: + layout = nrows, ncols = (ceil(nplots / ncols), ncols) + elif ncols == -1 and nrows > 0: + layout = nrows, ncols = (nrows, ceil(nplots / nrows)) + elif ncols <= 0 and nrows <= 0: + msg = "At least one dimension of layout must be positive" + raise ValueError(msg) + + if nrows * ncols < nplots: + raise ValueError( + f"Layout of {nrows}x{ncols} must be larger than required size {nplots}" + ) + + return layout + + if layout_type == "single": + return (1, 1) + elif layout_type == "horizontal": + return (1, nplots) + elif layout_type == "vertical": + return (nplots, 1) + + layouts = {1: (1, 1), 2: (1, 2), 3: (2, 2), 4: (2, 2)} + try: + return layouts[nplots] + except KeyError: + k = 1 + while k ** 2 < nplots: + k += 1 + + if (k - 1) * k >= nplots: + return k, (k - 1) + else: + return k, k + + +# copied from matplotlib/pyplot.py and modified for pandas.plotting + + +def create_subplots( + naxes: int, + sharex: bool = False, + sharey: bool = False, + squeeze: bool = True, + subplot_kw=None, + ax=None, + layout=None, + layout_type: str = "box", + **fig_kw, +): + """ + Create a figure with a set of subplots already made. + + This utility wrapper makes it convenient to create common layouts of + subplots, including the enclosing figure object, in a single call. + + Parameters + ---------- + naxes : int + Number of required axes. Exceeded axes are set invisible. Default is + nrows * ncols. + + sharex : bool + If True, the X axis will be shared amongst all subplots. + + sharey : bool + If True, the Y axis will be shared amongst all subplots. + + squeeze : bool + + If True, extra dimensions are squeezed out from the returned axis object: + - if only one subplot is constructed (nrows=ncols=1), the resulting + single Axis object is returned as a scalar. + - for Nx1 or 1xN subplots, the returned object is a 1-d numpy object + array of Axis objects are returned as numpy 1-d arrays. + - for NxM subplots with N>1 and M>1 are returned as a 2d array. + + If False, no squeezing is done: the returned axis object is always + a 2-d array containing Axis instances, even if it ends up being 1x1. + + subplot_kw : dict + Dict with keywords passed to the add_subplot() call used to create each + subplots. + + ax : Matplotlib axis object, optional + + layout : tuple + Number of rows and columns of the subplot grid. + If not specified, calculated from naxes and layout_type + + layout_type : {'box', 'horizontal', 'vertical'}, default 'box' + Specify how to layout the subplot grid. + + fig_kw : Other keyword arguments to be passed to the figure() call. + Note that all keywords not recognized above will be + automatically included here. + + Returns + ------- + fig, ax : tuple + - fig is the Matplotlib Figure object + - ax can be either a single axis object or an array of axis objects if + more than one subplot was created. The dimensions of the resulting array + can be controlled with the squeeze keyword, see above. + + Examples + -------- + x = np.linspace(0, 2*np.pi, 400) + y = np.sin(x**2) + + # Just a figure and one subplot + f, ax = plt.subplots() + ax.plot(x, y) + ax.set_title('Simple plot') + + # Two subplots, unpack the output array immediately + f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) + ax1.plot(x, y) + ax1.set_title('Sharing Y axis') + ax2.scatter(x, y) + + # Four polar axes + plt.subplots(2, 2, subplot_kw=dict(polar=True)) + """ + import matplotlib.pyplot as plt + + if subplot_kw is None: + subplot_kw = {} + + if ax is None: + fig = plt.figure(**fig_kw) + else: + if is_list_like(ax): + if squeeze: + ax = flatten_axes(ax) + if layout is not None: + warnings.warn( + "When passing multiple axes, layout keyword is ignored.", + UserWarning, + ) + if sharex or sharey: + warnings.warn( + "When passing multiple axes, sharex and sharey " + "are ignored. These settings must be specified when creating axes.", + UserWarning, + stacklevel=find_stack_level(), + ) + if ax.size == naxes: + fig = ax.flat[0].get_figure() + return fig, ax + else: + raise ValueError( + f"The number of passed axes must be {naxes}, the " + "same as the output plot" + ) + + fig = ax.get_figure() + # if ax is passed and a number of subplots is 1, return ax as it is + if naxes == 1: + if squeeze: + return fig, ax + else: + return fig, flatten_axes(ax) + else: + warnings.warn( + "To output multiple subplots, the figure containing " + "the passed axes is being cleared.", + UserWarning, + stacklevel=find_stack_level(), + ) + fig.clear() + + nrows, ncols = _get_layout(naxes, layout=layout, layout_type=layout_type) + nplots = nrows * ncols + + # Create empty object array to hold all axes. It's easiest to make it 1-d + # so we can just append subplots upon creation, and then + axarr = np.empty(nplots, dtype=object) + + # Create first subplot separately, so we can share it if requested + ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw) + + if sharex: + subplot_kw["sharex"] = ax0 + if sharey: + subplot_kw["sharey"] = ax0 + axarr[0] = ax0 + + # Note off-by-one counting because add_subplot uses the MATLAB 1-based + # convention. + for i in range(1, nplots): + kwds = subplot_kw.copy() + # Set sharex and sharey to None for blank/dummy axes, these can + # interfere with proper axis limits on the visible axes if + # they share axes e.g. issue #7528 + if i >= naxes: + kwds["sharex"] = None + kwds["sharey"] = None + ax = fig.add_subplot(nrows, ncols, i + 1, **kwds) + axarr[i] = ax + + if naxes != nplots: + for ax in axarr[naxes:]: + ax.set_visible(False) + + handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey) + + if squeeze: + # Reshape the array to have the final desired dimension (nrow,ncol), + # though discarding unneeded dimensions that equal 1. If we only have + # one subplot, just return it instead of a 1-element array. + if nplots == 1: + axes = axarr[0] + else: + axes = axarr.reshape(nrows, ncols).squeeze() + else: + # returned axis array will be always 2-d, even if nrows=ncols=1 + axes = axarr.reshape(nrows, ncols) + + return fig, axes + + +def _remove_labels_from_axis(axis: Axis): + for t in axis.get_majorticklabels(): + t.set_visible(False) + + # set_visible will not be effective if + # minor axis has NullLocator and NullFormatter (default) + if isinstance(axis.get_minor_locator(), ticker.NullLocator): + axis.set_minor_locator(ticker.AutoLocator()) + if isinstance(axis.get_minor_formatter(), ticker.NullFormatter): + axis.set_minor_formatter(ticker.FormatStrFormatter("")) + for t in axis.get_minorticklabels(): + t.set_visible(False) + + axis.get_label().set_visible(False) + + +def _has_externally_shared_axis(ax1: matplotlib.axes, compare_axis: str) -> bool: + """ + Return whether an axis is externally shared. + + Parameters + ---------- + ax1 : matplotlib.axes + Axis to query. + compare_axis : str + `"x"` or `"y"` according to whether the X-axis or Y-axis is being + compared. + + Returns + ------- + bool + `True` if the axis is externally shared. Otherwise `False`. + + Notes + ----- + If two axes with different positions are sharing an axis, they can be + referred to as *externally* sharing the common axis. + + If two axes sharing an axis also have the same position, they can be + referred to as *internally* sharing the common axis (a.k.a twinning). + + _handle_shared_axes() is only interested in axes externally sharing an + axis, regardless of whether either of the axes is also internally sharing + with a third axis. + """ + if compare_axis == "x": + axes = ax1.get_shared_x_axes() + elif compare_axis == "y": + axes = ax1.get_shared_y_axes() + else: + raise ValueError( + "_has_externally_shared_axis() needs 'x' or 'y' as a second parameter" + ) + + axes = axes.get_siblings(ax1) + + # Retain ax1 and any of its siblings which aren't in the same position as it + ax1_points = ax1.get_position().get_points() + + for ax2 in axes: + if not np.array_equal(ax1_points, ax2.get_position().get_points()): + return True + + return False + + +def handle_shared_axes( + axarr: Iterable[Axes], + nplots: int, + naxes: int, + nrows: int, + ncols: int, + sharex: bool, + sharey: bool, +): + if nplots > 1: + if compat.mpl_ge_3_2_0(): + row_num = lambda x: x.get_subplotspec().rowspan.start + col_num = lambda x: x.get_subplotspec().colspan.start + else: + row_num = lambda x: x.rowNum + col_num = lambda x: x.colNum + + if compat.mpl_ge_3_4_0(): + is_first_col = lambda x: x.get_subplotspec().is_first_col() + else: + is_first_col = lambda x: x.is_first_col() + + if nrows > 1: + try: + # first find out the ax layout, + # so that we can correctly handle 'gaps" + layout = np.zeros((nrows + 1, ncols + 1), dtype=np.bool_) + for ax in axarr: + layout[row_num(ax), col_num(ax)] = ax.get_visible() + + for ax in axarr: + # only the last row of subplots should get x labels -> all + # other off layout handles the case that the subplot is + # the last in the column, because below is no subplot/gap. + if not layout[row_num(ax) + 1, col_num(ax)]: + continue + if sharex or _has_externally_shared_axis(ax, "x"): + _remove_labels_from_axis(ax.xaxis) + + except IndexError: + # if gridspec is used, ax.rowNum and ax.colNum may different + # from layout shape. in this case, use last_row logic + if compat.mpl_ge_3_4_0(): + is_last_row = lambda x: x.get_subplotspec().is_last_row() + else: + is_last_row = lambda x: x.is_last_row() + for ax in axarr: + if is_last_row(ax): + continue + if sharex or _has_externally_shared_axis(ax, "x"): + _remove_labels_from_axis(ax.xaxis) + + if ncols > 1: + for ax in axarr: + # only the first column should get y labels -> set all other to + # off as we only have labels in the first column and we always + # have a subplot there, we can skip the layout test + if is_first_col(ax): + continue + if sharey or _has_externally_shared_axis(ax, "y"): + _remove_labels_from_axis(ax.yaxis) + + +def flatten_axes(axes: Axes | Sequence[Axes]) -> np.ndarray: + if not is_list_like(axes): + return np.array([axes]) + elif isinstance(axes, (np.ndarray, ABCIndex)): + return np.asarray(axes).ravel() + return np.array(axes) + + +def set_ticks_props( + axes: Axes | Sequence[Axes], + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, +): + import matplotlib.pyplot as plt + + for ax in flatten_axes(axes): + if xlabelsize is not None: + plt.setp(ax.get_xticklabels(), fontsize=xlabelsize) + if xrot is not None: + plt.setp(ax.get_xticklabels(), rotation=xrot) + if ylabelsize is not None: + plt.setp(ax.get_yticklabels(), fontsize=ylabelsize) + if yrot is not None: + plt.setp(ax.get_yticklabels(), rotation=yrot) + return axes + + +def get_all_lines(ax: Axes) -> list[Line2D]: + lines = ax.get_lines() + + if hasattr(ax, "right_ax"): + lines += ax.right_ax.get_lines() + + if hasattr(ax, "left_ax"): + lines += ax.left_ax.get_lines() + + return lines + + +def get_xlim(lines: Iterable[Line2D]) -> tuple[float, float]: + left, right = np.inf, -np.inf + for line in lines: + x = line.get_xdata(orig=False) + left = min(np.nanmin(x), left) + right = max(np.nanmax(x), right) + return left, right diff --git a/.local/lib/python3.8/site-packages/pandas/plotting/_misc.py b/.local/lib/python3.8/site-packages/pandas/plotting/_misc.py new file mode 100644 index 0000000..49cd7de --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/plotting/_misc.py @@ -0,0 +1,576 @@ +from contextlib import contextmanager + +from pandas.plotting._core import _get_plot_backend + + +def table(ax, data, rowLabels=None, colLabels=None, **kwargs): + """ + Helper function to convert DataFrame and Series to matplotlib.table. + + Parameters + ---------- + ax : Matplotlib axes object + data : DataFrame or Series + Data for table contents. + **kwargs + Keyword arguments to be passed to matplotlib.table.table. + If `rowLabels` or `colLabels` is not specified, data index or column + name will be used. + + Returns + ------- + matplotlib table object + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.table( + ax=ax, data=data, rowLabels=None, colLabels=None, **kwargs + ) + + +def register(): + """ + Register pandas formatters and converters with matplotlib. + + This function modifies the global ``matplotlib.units.registry`` + dictionary. pandas adds custom converters for + + * pd.Timestamp + * pd.Period + * np.datetime64 + * datetime.datetime + * datetime.date + * datetime.time + + See Also + -------- + deregister_matplotlib_converters : Remove pandas formatters and converters. + """ + plot_backend = _get_plot_backend("matplotlib") + plot_backend.register() + + +def deregister(): + """ + Remove pandas formatters and converters. + + Removes the custom converters added by :func:`register`. This + attempts to set the state of the registry back to the state before + pandas registered its own units. Converters for pandas' own types like + Timestamp and Period are removed completely. Converters for types + pandas overwrites, like ``datetime.datetime``, are restored to their + original value. + + See Also + -------- + register_matplotlib_converters : Register pandas formatters and converters + with matplotlib. + """ + plot_backend = _get_plot_backend("matplotlib") + plot_backend.deregister() + + +def scatter_matrix( + frame, + alpha=0.5, + figsize=None, + ax=None, + grid=False, + diagonal="hist", + marker=".", + density_kwds=None, + hist_kwds=None, + range_padding=0.05, + **kwargs, +): + """ + Draw a matrix of scatter plots. + + Parameters + ---------- + frame : DataFrame + alpha : float, optional + Amount of transparency applied. + figsize : (float,float), optional + A tuple (width, height) in inches. + ax : Matplotlib axis object, optional + grid : bool, optional + Setting this to True will show the grid. + diagonal : {'hist', 'kde'} + Pick between 'kde' and 'hist' for either Kernel Density Estimation or + Histogram plot in the diagonal. + marker : str, optional + Matplotlib marker type, default '.'. + density_kwds : keywords + Keyword arguments to be passed to kernel density estimate plot. + hist_kwds : keywords + Keyword arguments to be passed to hist function. + range_padding : float, default 0.05 + Relative extension of axis range in x and y with respect to + (x_max - x_min) or (y_max - y_min). + **kwargs + Keyword arguments to be passed to scatter function. + + Returns + ------- + numpy.ndarray + A matrix of scatter plots. + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) + >>> pd.plotting.scatter_matrix(df, alpha=0.2) + array([[, + , + , + ], + [, + , + , + ], + [, + , + , + ], + [, + , + , + ]], dtype=object) + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.scatter_matrix( + frame=frame, + alpha=alpha, + figsize=figsize, + ax=ax, + grid=grid, + diagonal=diagonal, + marker=marker, + density_kwds=density_kwds, + hist_kwds=hist_kwds, + range_padding=range_padding, + **kwargs, + ) + + +def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): + """ + Plot a multidimensional dataset in 2D. + + Each Series in the DataFrame is represented as a evenly distributed + slice on a circle. Each data point is rendered in the circle according to + the value on each Series. Highly correlated `Series` in the `DataFrame` + are placed closer on the unit circle. + + RadViz allow to project a N-dimensional data set into a 2D space where the + influence of each dimension can be interpreted as a balance between the + influence of all dimensions. + + More info available at the `original article + `_ + describing RadViz. + + Parameters + ---------- + frame : `DataFrame` + Object holding the data. + class_column : str + Column name containing the name of the data point category. + ax : :class:`matplotlib.axes.Axes`, optional + A plot instance to which to add the information. + color : list[str] or tuple[str], optional + Assign a color to each category. Example: ['blue', 'green']. + colormap : str or :class:`matplotlib.colors.Colormap`, default None + Colormap to select colors from. If string, load colormap with that + name from matplotlib. + **kwds + Options to pass to matplotlib scatter plotting method. + + Returns + ------- + class:`matplotlib.axes.Axes` + + See Also + -------- + plotting.andrews_curves : Plot clustering visualization. + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame( + ... { + ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, 6.7, 4.6], + ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, 3.3, 3.6], + ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, 5.7, 1.0], + ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, 2.1, 0.2], + ... 'Category': [ + ... 'virginica', + ... 'virginica', + ... 'setosa', + ... 'virginica', + ... 'virginica', + ... 'versicolor', + ... 'versicolor', + ... 'setosa', + ... 'virginica', + ... 'setosa' + ... ] + ... } + ... ) + >>> pd.plotting.radviz(df, 'Category') + + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.radviz( + frame=frame, + class_column=class_column, + ax=ax, + color=color, + colormap=colormap, + **kwds, + ) + + +def andrews_curves( + frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwargs +): + """ + Generate a matplotlib plot of Andrews curves, for visualising clusters of + multivariate data. + + Andrews curves have the functional form: + + f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + + x_4 sin(2t) + x_5 cos(2t) + ... + + Where x coefficients correspond to the values of each dimension and t is + linearly spaced between -pi and +pi. Each row of frame then corresponds to + a single curve. + + Parameters + ---------- + frame : DataFrame + Data to be plotted, preferably normalized to (0.0, 1.0). + class_column : Name of the column containing class names + ax : matplotlib axes object, default None + samples : Number of points to plot in each curve + color : list or tuple, optional + Colors to use for the different classes. + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that name + from matplotlib. + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + class:`matplotlip.axis.Axes` + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> df = pd.read_csv( + ... 'https://raw.github.com/pandas-dev/' + ... 'pandas/main/pandas/tests/io/data/csv/iris.csv' + ... ) + >>> pd.plotting.andrews_curves(df, 'Name') + + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.andrews_curves( + frame=frame, + class_column=class_column, + ax=ax, + samples=samples, + color=color, + colormap=colormap, + **kwargs, + ) + + +def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): + """ + Bootstrap plot on mean, median and mid-range statistics. + + The bootstrap plot is used to estimate the uncertainty of a statistic + by relaying on random sampling with replacement [1]_. This function will + generate bootstrapping plots for mean, median and mid-range statistics + for the given number of samples of the given size. + + .. [1] "Bootstrapping (statistics)" in \ + https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29 + + Parameters + ---------- + series : pandas.Series + Series from where to get the samplings for the bootstrapping. + fig : matplotlib.figure.Figure, default None + If given, it will use the `fig` reference for plotting instead of + creating a new one with default parameters. + size : int, default 50 + Number of data points to consider during each sampling. It must be + less than or equal to the length of the `series`. + samples : int, default 500 + Number of times the bootstrap procedure is performed. + **kwds + Options to pass to matplotlib plotting method. + + Returns + ------- + matplotlib.figure.Figure + Matplotlib figure. + + See Also + -------- + DataFrame.plot : Basic plotting for DataFrame objects. + Series.plot : Basic plotting for Series objects. + + Examples + -------- + This example draws a basic bootstrap plot for a Series. + + .. plot:: + :context: close-figs + + >>> s = pd.Series(np.random.uniform(size=100)) + >>> pd.plotting.bootstrap_plot(s) +
+ """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.bootstrap_plot( + series=series, fig=fig, size=size, samples=samples, **kwds + ) + + +def parallel_coordinates( + frame, + class_column, + cols=None, + ax=None, + color=None, + use_columns=False, + xticks=None, + colormap=None, + axvlines=True, + axvlines_kwds=None, + sort_labels=False, + **kwargs, +): + """ + Parallel coordinates plotting. + + Parameters + ---------- + frame : DataFrame + class_column : str + Column name containing class names. + cols : list, optional + A list of column names to use. + ax : matplotlib.axis, optional + Matplotlib axis object. + color : list or tuple, optional + Colors to use for the different classes. + use_columns : bool, optional + If true, columns will be used as xticks. + xticks : list or tuple, optional + A list of values to use for xticks. + colormap : str or matplotlib colormap, default None + Colormap to use for line colors. + axvlines : bool, optional + If true, vertical lines will be added at each xtick. + axvlines_kwds : keywords, optional + Options to be passed to axvline method for vertical lines. + sort_labels : bool, default False + Sort class_column labels, useful when assigning colors. + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + class:`matplotlib.axis.Axes` + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> df = pd.read_csv( + ... 'https://raw.github.com/pandas-dev/' + ... 'pandas/main/pandas/tests/io/data/csv/iris.csv' + ... ) + >>> pd.plotting.parallel_coordinates( + ... df, 'Name', color=('#556270', '#4ECDC4', '#C7F464') + ... ) + + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.parallel_coordinates( + frame=frame, + class_column=class_column, + cols=cols, + ax=ax, + color=color, + use_columns=use_columns, + xticks=xticks, + colormap=colormap, + axvlines=axvlines, + axvlines_kwds=axvlines_kwds, + sort_labels=sort_labels, + **kwargs, + ) + + +def lag_plot(series, lag=1, ax=None, **kwds): + """ + Lag plot for time series. + + Parameters + ---------- + series : Time series + lag : lag of the scatter plot, default 1 + ax : Matplotlib axis object, optional + **kwds + Matplotlib scatter method keyword arguments. + + Returns + ------- + class:`matplotlib.axis.Axes` + + Examples + -------- + + Lag plots are most commonly used to look for patterns in time series data. + + Given the following time series + + .. plot:: + :context: close-figs + + >>> np.random.seed(5) + >>> x = np.cumsum(np.random.normal(loc=1, scale=5, size=50)) + >>> s = pd.Series(x) + >>> s.plot() + + + A lag plot with ``lag=1`` returns + + .. plot:: + :context: close-figs + + >>> pd.plotting.lag_plot(s, lag=1) + + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds) + + +def autocorrelation_plot(series, ax=None, **kwargs): + """ + Autocorrelation plot for time series. + + Parameters + ---------- + series : Time series + ax : Matplotlib axis object, optional + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + class:`matplotlib.axis.Axes` + + Examples + -------- + + The horizontal lines in the plot correspond to 95% and 99% confidence bands. + + The dashed line is 99% confidence band. + + .. plot:: + :context: close-figs + + >>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000) + >>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing)) + >>> pd.plotting.autocorrelation_plot(s) + + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs) + + +class _Options(dict): + """ + Stores pandas plotting options. + + Allows for parameter aliasing so you can just use parameter names that are + the same as the plot function parameters, but is stored in a canonical + format that makes it easy to breakdown into groups later. + """ + + # alias so the names are same as plotting method parameter names + _ALIASES = {"x_compat": "xaxis.compat"} + _DEFAULT_KEYS = ["xaxis.compat"] + + def __init__(self, deprecated=False): + self._deprecated = deprecated + super().__setitem__("xaxis.compat", False) + + def __getitem__(self, key): + key = self._get_canonical_key(key) + if key not in self: + raise ValueError(f"{key} is not a valid pandas plotting option") + return super().__getitem__(key) + + def __setitem__(self, key, value): + key = self._get_canonical_key(key) + return super().__setitem__(key, value) + + def __delitem__(self, key): + key = self._get_canonical_key(key) + if key in self._DEFAULT_KEYS: + raise ValueError(f"Cannot remove default parameter {key}") + return super().__delitem__(key) + + def __contains__(self, key) -> bool: + key = self._get_canonical_key(key) + return super().__contains__(key) + + def reset(self): + """ + Reset the option store to its initial state + + Returns + ------- + None + """ + # error: Cannot access "__init__" directly + self.__init__() # type: ignore[misc] + + def _get_canonical_key(self, key): + return self._ALIASES.get(key, key) + + @contextmanager + def use(self, key, value): + """ + Temporarily set a parameter value using the with statement. + Aliasing allowed. + """ + old_value = self[key] + try: + self[key] = value + yield self + finally: + self[key] = old_value + + +plot_params = _Options() diff --git a/.local/lib/python3.8/site-packages/pandas/testing.py b/.local/lib/python3.8/site-packages/pandas/testing.py new file mode 100644 index 0000000..841b55d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/testing.py @@ -0,0 +1,18 @@ +""" +Public testing utility functions. +""" + + +from pandas._testing import ( + assert_extension_array_equal, + assert_frame_equal, + assert_index_equal, + assert_series_equal, +) + +__all__ = [ + "assert_extension_array_equal", + "assert_frame_equal", + "assert_series_equal", + "assert_index_equal", +] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..11414d5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_aggregation.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_aggregation.cpython-38.pyc new file mode 100644 index 0000000..02616d0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_aggregation.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_algos.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_algos.cpython-38.pyc new file mode 100644 index 0000000..d2a3f67 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_algos.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_common.cpython-38.pyc new file mode 100644 index 0000000..59e4c20 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_downstream.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_downstream.cpython-38.pyc new file mode 100644 index 0000000..7eca577 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_downstream.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_errors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_errors.cpython-38.pyc new file mode 100644 index 0000000..ea05bfd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_errors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_expressions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_expressions.cpython-38.pyc new file mode 100644 index 0000000..e0ba6a0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_expressions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_flags.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_flags.cpython-38.pyc new file mode 100644 index 0000000..5cf4672 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_flags.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_multilevel.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_multilevel.cpython-38.pyc new file mode 100644 index 0000000..86cd037 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_multilevel.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_nanops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_nanops.cpython-38.pyc new file mode 100644 index 0000000..6ca963c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_nanops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_optional_dependency.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_optional_dependency.cpython-38.pyc new file mode 100644 index 0000000..c686a3b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_optional_dependency.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_register_accessor.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_register_accessor.cpython-38.pyc new file mode 100644 index 0000000..3c8c627 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_register_accessor.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_sorting.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_sorting.cpython-38.pyc new file mode 100644 index 0000000..cf40f9f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_sorting.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_take.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_take.cpython-38.pyc new file mode 100644 index 0000000..d1fd923 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/__pycache__/test_take.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/api/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/api/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/api/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..e8b6c2b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/api/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/api/__pycache__/test_api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/api/__pycache__/test_api.cpython-38.pyc new file mode 100644 index 0000000..7a5637c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/api/__pycache__/test_api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/api/__pycache__/test_types.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/api/__pycache__/test_types.cpython-38.pyc new file mode 100644 index 0000000..b2bbe50 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/api/__pycache__/test_types.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/api/test_api.py b/.local/lib/python3.8/site-packages/pandas/tests/api/test_api.py new file mode 100644 index 0000000..2e306c7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/api/test_api.py @@ -0,0 +1,328 @@ +from __future__ import annotations + +import subprocess +import sys + +import pytest + +import pandas as pd +from pandas import api +import pandas._testing as tm + + +class Base: + def check(self, namespace, expected, ignored=None): + # see which names are in the namespace, minus optional + # ignored ones + # compare vs the expected + + result = sorted(f for f in dir(namespace) if not f.startswith("__")) + if ignored is not None: + result = sorted(set(result) - set(ignored)) + + expected = sorted(expected) + tm.assert_almost_equal(result, expected) + + +class TestPDApi(Base): + # these are optionally imported based on testing + # & need to be ignored + ignored = ["tests", "locale", "conftest"] + + # top-level sub-packages + public_lib = [ + "api", + "arrays", + "options", + "test", + "testing", + "errors", + "plotting", + "io", + "tseries", + ] + private_lib = ["compat", "core", "pandas", "util"] + + # these are already deprecated; awaiting removal + deprecated_modules: list[str] = ["np", "datetime"] + + # misc + misc = ["IndexSlice", "NaT", "NA"] + + # top-level classes + classes = [ + "Categorical", + "CategoricalIndex", + "DataFrame", + "DateOffset", + "DatetimeIndex", + "ExcelFile", + "ExcelWriter", + "Float64Index", + "Flags", + "Grouper", + "HDFStore", + "Index", + "Int64Index", + "MultiIndex", + "Period", + "PeriodIndex", + "RangeIndex", + "UInt64Index", + "Series", + "SparseDtype", + "StringDtype", + "Timedelta", + "TimedeltaIndex", + "Timestamp", + "Interval", + "IntervalIndex", + "CategoricalDtype", + "PeriodDtype", + "IntervalDtype", + "DatetimeTZDtype", + "BooleanDtype", + "Int8Dtype", + "Int16Dtype", + "Int32Dtype", + "Int64Dtype", + "UInt8Dtype", + "UInt16Dtype", + "UInt32Dtype", + "UInt64Dtype", + "Float32Dtype", + "Float64Dtype", + "NamedAgg", + ] + + # these are already deprecated; awaiting removal + deprecated_classes: list[str] = ["Float64Index", "Int64Index", "UInt64Index"] + + # these should be deprecated in the future + deprecated_classes_in_future: list[str] = ["SparseArray"] + + # external modules exposed in pandas namespace + modules: list[str] = [] + + # top-level functions + funcs = [ + "array", + "bdate_range", + "concat", + "crosstab", + "cut", + "date_range", + "interval_range", + "eval", + "factorize", + "get_dummies", + "infer_freq", + "isna", + "isnull", + "lreshape", + "melt", + "notna", + "notnull", + "offsets", + "merge", + "merge_ordered", + "merge_asof", + "period_range", + "pivot", + "pivot_table", + "qcut", + "show_versions", + "timedelta_range", + "unique", + "value_counts", + "wide_to_long", + ] + + # top-level option funcs + funcs_option = [ + "reset_option", + "describe_option", + "get_option", + "option_context", + "set_option", + "set_eng_float_format", + ] + + # top-level read_* funcs + funcs_read = [ + "read_clipboard", + "read_csv", + "read_excel", + "read_fwf", + "read_gbq", + "read_hdf", + "read_html", + "read_xml", + "read_json", + "read_pickle", + "read_sas", + "read_sql", + "read_sql_query", + "read_sql_table", + "read_stata", + "read_table", + "read_feather", + "read_parquet", + "read_orc", + "read_spss", + ] + + # top-level json funcs + funcs_json = ["json_normalize"] + + # top-level to_* funcs + funcs_to = ["to_datetime", "to_numeric", "to_pickle", "to_timedelta"] + + # top-level to deprecate in the future + deprecated_funcs_in_future: list[str] = [] + + # these are already deprecated; awaiting removal + deprecated_funcs: list[str] = [] + + # private modules in pandas namespace + private_modules = [ + "_config", + "_libs", + "_is_numpy_dev", + "_testing", + "_typing", + "_version", + ] + + def test_api(self): + + checkthese = ( + self.public_lib + + self.private_lib + + self.misc + + self.modules + + self.classes + + self.funcs + + self.funcs_option + + self.funcs_read + + self.funcs_json + + self.funcs_to + + self.private_modules + ) + self.check(namespace=pd, expected=checkthese, ignored=self.ignored) + + def test_api_all(self): + expected = set( + self.public_lib + + self.misc + + self.modules + + self.classes + + self.funcs + + self.funcs_option + + self.funcs_read + + self.funcs_json + + self.funcs_to + ) - set(self.deprecated_classes) + actual = set(pd.__all__) + + extraneous = actual - expected + assert not extraneous + + missing = expected - actual + assert not missing + + def test_depr(self): + deprecated_list = ( + self.deprecated_modules + + self.deprecated_classes + + self.deprecated_classes_in_future + + self.deprecated_funcs + + self.deprecated_funcs_in_future + ) + for depr in deprecated_list: + with tm.assert_produces_warning(FutureWarning): + _ = getattr(pd, depr) + + +def test_datetime(): + from datetime import datetime + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + assert datetime(2015, 1, 2, 0, 0) == datetime(2015, 1, 2, 0, 0) + + assert isinstance(datetime(2015, 1, 2, 0, 0), datetime) + + +def test_sparsearray(): + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + assert isinstance(pd.array([1, 2, 3], dtype="Sparse"), pd.SparseArray) + + +def test_np(): + import warnings + + import numpy as np + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + assert (pd.np.arange(0, 10) == np.arange(0, 10)).all() + + +class TestApi(Base): + allowed = ["types", "extensions", "indexers"] + + def test_api(self): + self.check(api, self.allowed) + + +class TestTesting(Base): + funcs = [ + "assert_frame_equal", + "assert_series_equal", + "assert_index_equal", + "assert_extension_array_equal", + ] + + def test_testing(self): + from pandas import testing # noqa: PDF015 + + self.check(testing, self.funcs) + + def test_util_testing_deprecated(self): + # avoid cache state affecting the test + sys.modules.pop("pandas.util.testing", None) + + with tm.assert_produces_warning(FutureWarning) as m: + import pandas.util.testing # noqa: F401 + + assert "pandas.util.testing is deprecated" in str(m[0].message) + assert "pandas.testing instead" in str(m[0].message) + + def test_util_testing_deprecated_direct(self): + # avoid cache state affecting the test + sys.modules.pop("pandas.util.testing", None) + with tm.assert_produces_warning(FutureWarning) as m: + from pandas.util.testing import assert_series_equal # noqa: F401 + + assert "pandas.util.testing is deprecated" in str(m[0].message) + assert "pandas.testing instead" in str(m[0].message) + + def test_util_in_top_level(self): + # in a subprocess to avoid import caching issues + out = subprocess.check_output( + [ + sys.executable, + "-c", + "import pandas; pandas.util.testing.assert_series_equal", + ], + stderr=subprocess.STDOUT, + ).decode() + assert "pandas.util.testing is deprecated" in out + + with pytest.raises(AttributeError, match="foo"): + pd.util.foo diff --git a/.local/lib/python3.8/site-packages/pandas/tests/api/test_types.py b/.local/lib/python3.8/site-packages/pandas/tests/api/test_types.py new file mode 100644 index 0000000..7b6cc94 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/api/test_types.py @@ -0,0 +1,63 @@ +import pandas._testing as tm +from pandas.api import types +from pandas.tests.api.test_api import Base + + +class TestTypes(Base): + + allowed = [ + "is_bool", + "is_bool_dtype", + "is_categorical", + "is_categorical_dtype", + "is_complex", + "is_complex_dtype", + "is_datetime64_any_dtype", + "is_datetime64_dtype", + "is_datetime64_ns_dtype", + "is_datetime64tz_dtype", + "is_dtype_equal", + "is_float", + "is_float_dtype", + "is_int64_dtype", + "is_integer", + "is_integer_dtype", + "is_number", + "is_numeric_dtype", + "is_object_dtype", + "is_scalar", + "is_sparse", + "is_string_dtype", + "is_signed_integer_dtype", + "is_timedelta64_dtype", + "is_timedelta64_ns_dtype", + "is_unsigned_integer_dtype", + "is_period_dtype", + "is_interval", + "is_interval_dtype", + "is_re", + "is_re_compilable", + "is_dict_like", + "is_iterator", + "is_file_like", + "is_list_like", + "is_hashable", + "is_array_like", + "is_named_tuple", + "pandas_dtype", + "union_categoricals", + "infer_dtype", + "is_extension_array_dtype", + ] + deprecated = ["is_extension_type"] + dtypes = ["CategoricalDtype", "DatetimeTZDtype", "PeriodDtype", "IntervalDtype"] + + def test_types(self): + + self.check(types, self.allowed + self.dtypes + self.deprecated) + + def test_deprecated_from_api_types(self): + + for t in self.deprecated: + with tm.assert_produces_warning(FutureWarning): + getattr(types, t)(1) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/apply/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..2d8be66 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..56744cf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..0cefa0d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_frame_apply.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_frame_apply.cpython-38.pyc new file mode 100644 index 0000000..a37c3f0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_frame_apply.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_frame_apply_relabeling.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_frame_apply_relabeling.cpython-38.pyc new file mode 100644 index 0000000..e0800d7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_frame_apply_relabeling.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_frame_transform.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_frame_transform.cpython-38.pyc new file mode 100644 index 0000000..bdcef62 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_frame_transform.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_invalid_arg.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_invalid_arg.cpython-38.pyc new file mode 100644 index 0000000..53242ae Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_invalid_arg.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_series_apply.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_series_apply.cpython-38.pyc new file mode 100644 index 0000000..702fd27 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_series_apply.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_series_apply_relabeling.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_series_apply_relabeling.cpython-38.pyc new file mode 100644 index 0000000..099ddab Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_series_apply_relabeling.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_series_transform.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_series_transform.cpython-38.pyc new file mode 100644 index 0000000..89b17c9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_series_transform.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_str.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_str.cpython-38.pyc new file mode 100644 index 0000000..0ce4314 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/apply/__pycache__/test_str.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/common.py b/.local/lib/python3.8/site-packages/pandas/tests/apply/common.py new file mode 100644 index 0000000..91b831b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/apply/common.py @@ -0,0 +1,10 @@ +from pandas.core.groupby.base import transformation_kernels + +# tshift only works on time index and is deprecated +# There is no Series.cumcount or DataFrame.cumcount +series_transform_kernels = [ + x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"] +] +frame_transform_kernels = [ + x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"] +] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/apply/conftest.py new file mode 100644 index 0000000..b68c623 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/apply/conftest.py @@ -0,0 +1,18 @@ +import numpy as np +import pytest + +from pandas import DataFrame + + +@pytest.fixture +def int_frame_const_col(): + """ + Fixture for DataFrame of ints which are constant per column + + Columns are ['A', 'B', 'C'], with values (per column): [1, 2, 3] + """ + df = DataFrame( + np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1, + columns=["A", "B", "C"], + ) + return df diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/test_frame_apply.py b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_frame_apply.py new file mode 100644 index 0000000..9887257 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_frame_apply.py @@ -0,0 +1,1553 @@ +from datetime import datetime +import warnings + +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.tests.frame.common import zip_frames + + +def test_apply(float_frame): + with np.errstate(all="ignore"): + # ufunc + result = np.sqrt(float_frame["A"]) + expected = float_frame.apply(np.sqrt)["A"] + tm.assert_series_equal(result, expected) + + # aggregator + result = float_frame.apply(np.mean)["A"] + expected = np.mean(float_frame["A"]) + assert result == expected + + d = float_frame.index[0] + result = float_frame.apply(np.mean, axis=1) + expected = np.mean(float_frame.xs(d)) + assert result[d] == expected + assert result.index is float_frame.index + + +def test_apply_categorical_func(): + # GH 9573 + df = DataFrame({"c0": ["A", "A", "B", "B"], "c1": ["C", "C", "D", "D"]}) + result = df.apply(lambda ts: ts.astype("category")) + + assert result.shape == (4, 2) + assert isinstance(result["c0"].dtype, CategoricalDtype) + assert isinstance(result["c1"].dtype, CategoricalDtype) + + +def test_apply_axis1_with_ea(): + # GH#36785 + expected = DataFrame({"A": [Timestamp("2013-01-01", tz="UTC")]}) + result = expected.apply(lambda x: x, axis=1) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data, dtype", + [(1, None), (1, CategoricalDtype([1])), (Timestamp("2013-01-01", tz="UTC"), None)], +) +def test_agg_axis1_duplicate_index(data, dtype): + # GH 42380 + expected = DataFrame([[data], [data]], index=["a", "a"], dtype=dtype) + result = expected.agg(lambda x: x, axis=1) + tm.assert_frame_equal(result, expected) + + +def test_apply_mixed_datetimelike(): + # mixed datetimelike + # GH 7778 + expected = DataFrame( + { + "A": date_range("20130101", periods=3), + "B": pd.to_timedelta(np.arange(3), unit="s"), + } + ) + result = expected.apply(lambda x: x, axis=1) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func", [np.sqrt, np.mean]) +def test_apply_empty(func): + # empty + empty_frame = DataFrame() + + result = empty_frame.apply(func) + assert result.empty + + +def test_apply_float_frame(float_frame): + no_rows = float_frame[:0] + result = no_rows.apply(lambda x: x.mean()) + expected = Series(np.nan, index=float_frame.columns) + tm.assert_series_equal(result, expected) + + no_cols = float_frame.loc[:, []] + result = no_cols.apply(lambda x: x.mean(), axis=1) + expected = Series(np.nan, index=float_frame.index) + tm.assert_series_equal(result, expected) + + +def test_apply_empty_except_index(): + # GH 2476 + expected = DataFrame(index=["a"]) + result = expected.apply(lambda x: x["a"], axis=1) + tm.assert_frame_equal(result, expected) + + +def test_apply_with_reduce_empty(): + # reduce with an empty DataFrame + empty_frame = DataFrame() + + x = [] + result = empty_frame.apply(x.append, axis=1, result_type="expand") + tm.assert_frame_equal(result, empty_frame) + result = empty_frame.apply(x.append, axis=1, result_type="reduce") + expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64) + tm.assert_series_equal(result, expected) + + empty_with_cols = DataFrame(columns=["a", "b", "c"]) + result = empty_with_cols.apply(x.append, axis=1, result_type="expand") + tm.assert_frame_equal(result, empty_with_cols) + result = empty_with_cols.apply(x.append, axis=1, result_type="reduce") + expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64) + tm.assert_series_equal(result, expected) + + # Ensure that x.append hasn't been called + assert x == [] + + +@pytest.mark.parametrize("func", ["sum", "prod", "any", "all"]) +def test_apply_funcs_over_empty(func): + # GH 28213 + df = DataFrame(columns=["a", "b", "c"]) + + result = df.apply(getattr(np, func)) + expected = getattr(df, func)() + tm.assert_series_equal(result, expected) + + +def test_nunique_empty(): + # GH 28213 + df = DataFrame(columns=["a", "b", "c"]) + + result = df.nunique() + expected = Series(0, index=df.columns) + tm.assert_series_equal(result, expected) + + result = df.T.nunique() + expected = Series([], index=pd.Index([]), dtype=np.float64) + tm.assert_series_equal(result, expected) + + +def test_apply_standard_nonunique(): + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"]) + + result = df.apply(lambda s: s[0], axis=1) + expected = Series([1, 4, 7], ["a", "a", "c"]) + tm.assert_series_equal(result, expected) + + result = df.T.apply(lambda s: s[0], axis=0) + tm.assert_series_equal(result, expected) + + +def test_apply_broadcast_scalars(float_frame): + # scalars + result = float_frame.apply(np.mean, result_type="broadcast") + expected = DataFrame([float_frame.mean()], index=float_frame.index) + tm.assert_frame_equal(result, expected) + + +def test_apply_broadcast_scalars_axis1(float_frame): + result = float_frame.apply(np.mean, axis=1, result_type="broadcast") + m = float_frame.mean(axis=1) + expected = DataFrame({c: m for c in float_frame.columns}) + tm.assert_frame_equal(result, expected) + + +def test_apply_broadcast_lists_columns(float_frame): + # lists + result = float_frame.apply( + lambda x: list(range(len(float_frame.columns))), + axis=1, + result_type="broadcast", + ) + m = list(range(len(float_frame.columns))) + expected = DataFrame( + [m] * len(float_frame.index), + dtype="float64", + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(result, expected) + + +def test_apply_broadcast_lists_index(float_frame): + result = float_frame.apply( + lambda x: list(range(len(float_frame.index))), result_type="broadcast" + ) + m = list(range(len(float_frame.index))) + expected = DataFrame( + {c: m for c in float_frame.columns}, + dtype="float64", + index=float_frame.index, + ) + tm.assert_frame_equal(result, expected) + + +def test_apply_broadcast_list_lambda_func(int_frame_const_col): + # preserve columns + df = int_frame_const_col + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="broadcast") + tm.assert_frame_equal(result, df) + + +def test_apply_broadcast_series_lambda_func(int_frame_const_col): + df = int_frame_const_col + result = df.apply( + lambda x: Series([1, 2, 3], index=list("abc")), + axis=1, + result_type="broadcast", + ) + expected = df.copy() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("axis", [0, 1]) +def test_apply_raw_float_frame(float_frame, axis): + def _assert_raw(x): + assert isinstance(x, np.ndarray) + assert x.ndim == 1 + + float_frame.apply(_assert_raw, axis=axis, raw=True) + + +@pytest.mark.parametrize("axis", [0, 1]) +def test_apply_raw_float_frame_lambda(float_frame, axis): + result = float_frame.apply(np.mean, axis=axis, raw=True) + expected = float_frame.apply(lambda x: x.values.mean(), axis=axis) + tm.assert_series_equal(result, expected) + + +def test_apply_raw_float_frame_no_reduction(float_frame): + # no reduction + result = float_frame.apply(lambda x: x * 2, raw=True) + expected = float_frame * 2 + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("axis", [0, 1]) +def test_apply_raw_mixed_type_frame(mixed_type_frame, axis): + def _assert_raw(x): + assert isinstance(x, np.ndarray) + assert x.ndim == 1 + + # Mixed dtype (GH-32423) + mixed_type_frame.apply(_assert_raw, axis=axis, raw=True) + + +def test_apply_axis1(float_frame): + d = float_frame.index[0] + result = float_frame.apply(np.mean, axis=1)[d] + expected = np.mean(float_frame.xs(d)) + assert result == expected + + +def test_apply_mixed_dtype_corner(): + df = DataFrame({"A": ["foo"], "B": [1.0]}) + result = df[:0].apply(np.mean, axis=1) + # the result here is actually kind of ambiguous, should it be a Series + # or a DataFrame? + expected = Series(np.nan, index=pd.Index([], dtype="int64")) + tm.assert_series_equal(result, expected) + + +def test_apply_mixed_dtype_corner_indexing(): + df = DataFrame({"A": ["foo"], "B": [1.0]}) + result = df.apply(lambda x: x["A"], axis=1) + expected = Series(["foo"], index=[0]) + tm.assert_series_equal(result, expected) + + result = df.apply(lambda x: x["B"], axis=1) + expected = Series([1.0], index=[0]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ax", ["index", "columns"]) +@pytest.mark.parametrize( + "func", [lambda x: x, lambda x: x.mean()], ids=["identity", "mean"] +) +@pytest.mark.parametrize("raw", [True, False]) +@pytest.mark.parametrize("axis", [0, 1]) +def test_apply_empty_infer_type(ax, func, raw, axis): + df = DataFrame(**{ax: ["a", "b", "c"]}) + + with np.errstate(all="ignore"): + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + test_res = func(np.array([], dtype="f8")) + is_reduction = not isinstance(test_res, np.ndarray) + + result = df.apply(func, axis=axis, raw=raw) + if is_reduction: + agg_axis = df._get_agg_axis(axis) + assert isinstance(result, Series) + assert result.index is agg_axis + else: + assert isinstance(result, DataFrame) + + +def test_apply_empty_infer_type_broadcast(): + no_cols = DataFrame(index=["a", "b", "c"]) + result = no_cols.apply(lambda x: x.mean(), result_type="broadcast") + assert isinstance(result, DataFrame) + + +def test_apply_with_args_kwds_add_some(float_frame): + def add_some(x, howmuch=0): + return x + howmuch + + result = float_frame.apply(add_some, howmuch=2) + expected = float_frame.apply(lambda x: x + 2) + tm.assert_frame_equal(result, expected) + + +def test_apply_with_args_kwds_agg_and_add(float_frame): + def agg_and_add(x, howmuch=0): + return x.mean() + howmuch + + result = float_frame.apply(agg_and_add, howmuch=2) + expected = float_frame.apply(lambda x: x.mean() + 2) + tm.assert_series_equal(result, expected) + + +def test_apply_with_args_kwds_subtract_and_divide(float_frame): + def subtract_and_divide(x, sub, divide=1): + return (x - sub) / divide + + result = float_frame.apply(subtract_and_divide, args=(2,), divide=2) + expected = float_frame.apply(lambda x: (x - 2.0) / 2.0) + tm.assert_frame_equal(result, expected) + + +def test_apply_yield_list(float_frame): + result = float_frame.apply(list) + tm.assert_frame_equal(result, float_frame) + + +def test_apply_reduce_Series(float_frame): + float_frame["A"].iloc[::2] = np.nan + expected = float_frame.mean(1) + result = float_frame.apply(np.mean, axis=1) + tm.assert_series_equal(result, expected) + + +def test_apply_reduce_to_dict(): + # GH 25196 37544 + data = DataFrame([[1, 2], [3, 4]], columns=["c0", "c1"], index=["i0", "i1"]) + + result = data.apply(dict, axis=0) + expected = Series([{"i0": 1, "i1": 3}, {"i0": 2, "i1": 4}], index=data.columns) + tm.assert_series_equal(result, expected) + + result = data.apply(dict, axis=1) + expected = Series([{"c0": 1, "c1": 2}, {"c0": 3, "c1": 4}], index=data.index) + tm.assert_series_equal(result, expected) + + +def test_apply_differently_indexed(): + df = DataFrame(np.random.randn(20, 10)) + + result = df.apply(Series.describe, axis=0) + expected = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns) + tm.assert_frame_equal(result, expected) + + result = df.apply(Series.describe, axis=1) + expected = DataFrame({i: v.describe() for i, v in df.T.items()}, columns=df.index).T + tm.assert_frame_equal(result, expected) + + +def test_apply_bug(): + + # GH 6125 + positions = DataFrame( + [ + [1, "ABC0", 50], + [1, "YUM0", 20], + [1, "DEF0", 20], + [2, "ABC1", 50], + [2, "YUM1", 20], + [2, "DEF1", 20], + ], + columns=["a", "market", "position"], + ) + + def f(r): + return r["market"] + + expected = positions.apply(f, axis=1) + + positions = DataFrame( + [ + [datetime(2013, 1, 1), "ABC0", 50], + [datetime(2013, 1, 2), "YUM0", 20], + [datetime(2013, 1, 3), "DEF0", 20], + [datetime(2013, 1, 4), "ABC1", 50], + [datetime(2013, 1, 5), "YUM1", 20], + [datetime(2013, 1, 6), "DEF1", 20], + ], + columns=["a", "market", "position"], + ) + result = positions.apply(f, axis=1) + tm.assert_series_equal(result, expected) + + +def test_apply_convert_objects(): + expected = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + result = expected.apply(lambda x: x, axis=1)._convert(datetime=True) + tm.assert_frame_equal(result, expected) + + +def test_apply_attach_name(float_frame): + result = float_frame.apply(lambda x: x.name) + expected = Series(float_frame.columns, index=float_frame.columns) + tm.assert_series_equal(result, expected) + + +def test_apply_attach_name_axis1(float_frame): + result = float_frame.apply(lambda x: x.name, axis=1) + expected = Series(float_frame.index, index=float_frame.index) + tm.assert_series_equal(result, expected) + + +def test_apply_attach_name_non_reduction(float_frame): + # non-reductions + result = float_frame.apply(lambda x: np.repeat(x.name, len(x))) + expected = DataFrame( + np.tile(float_frame.columns, (len(float_frame.index), 1)), + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(result, expected) + + +def test_apply_attach_name_non_reduction_axis1(float_frame): + result = float_frame.apply(lambda x: np.repeat(x.name, len(x)), axis=1) + expected = Series( + np.repeat(t[0], len(float_frame.columns)) for t in float_frame.itertuples() + ) + expected.index = float_frame.index + tm.assert_series_equal(result, expected) + + +def test_apply_multi_index(): + index = MultiIndex.from_arrays([["a", "a", "b"], ["c", "d", "d"]]) + s = DataFrame([[1, 2], [3, 4], [5, 6]], index=index, columns=["col1", "col2"]) + result = s.apply(lambda x: Series({"min": min(x), "max": max(x)}), 1) + expected = DataFrame([[1, 2], [3, 4], [5, 6]], index=index, columns=["min", "max"]) + tm.assert_frame_equal(result, expected, check_like=True) + + +@pytest.mark.parametrize( + "df, dicts", + [ + [ + DataFrame([["foo", "bar"], ["spam", "eggs"]]), + Series([{0: "foo", 1: "spam"}, {0: "bar", 1: "eggs"}]), + ], + [DataFrame([[0, 1], [2, 3]]), Series([{0: 0, 1: 2}, {0: 1, 1: 3}])], + ], +) +def test_apply_dict(df, dicts): + # GH 8735 + fn = lambda x: x.to_dict() + reduce_true = df.apply(fn, result_type="reduce") + reduce_false = df.apply(fn, result_type="expand") + reduce_none = df.apply(fn) + + tm.assert_series_equal(reduce_true, dicts) + tm.assert_frame_equal(reduce_false, df) + tm.assert_series_equal(reduce_none, dicts) + + +def test_applymap(float_frame): + applied = float_frame.applymap(lambda x: x * 2) + tm.assert_frame_equal(applied, float_frame * 2) + float_frame.applymap(type) + + # GH 465: function returning tuples + result = float_frame.applymap(lambda x: (x, x))["A"][0] + assert isinstance(result, tuple) + + +@pytest.mark.parametrize("val", [1, 1.0]) +def test_applymap_float_object_conversion(val): + # GH 2909: object conversion to float in constructor? + df = DataFrame(data=[val, "a"]) + result = df.applymap(lambda x: x).dtypes[0] + assert result == object + + +def test_applymap_str(): + # GH 2786 + df = DataFrame(np.random.random((3, 4))) + df2 = df.copy() + cols = ["a", "a", "a", "a"] + df.columns = cols + + expected = df2.applymap(str) + expected.columns = cols + result = df.applymap(str) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "col, val", + [["datetime", Timestamp("20130101")], ["timedelta", pd.Timedelta("1 min")]], +) +def test_applymap_datetimelike(col, val): + # datetime/timedelta + df = DataFrame(np.random.random((3, 4))) + df[col] = val + result = df.applymap(str) + assert result.loc[0, col] == str(df.loc[0, col]) + + +@pytest.mark.parametrize( + "expected", + [ + DataFrame(), + DataFrame(columns=list("ABC")), + DataFrame(index=list("ABC")), + DataFrame({"A": [], "B": [], "C": []}), + ], +) +@pytest.mark.parametrize("func", [round, lambda x: x]) +def test_applymap_empty(expected, func): + # GH 8222 + result = expected.applymap(func) + tm.assert_frame_equal(result, expected) + + +def test_applymap_kwargs(): + # GH 40652 + result = DataFrame([[1, 2], [3, 4]]).applymap(lambda x, y: x + y, y=2) + expected = DataFrame([[3, 4], [5, 6]]) + tm.assert_frame_equal(result, expected) + + +def test_applymap_na_ignore(float_frame): + # GH 23803 + strlen_frame = float_frame.applymap(lambda x: len(str(x))) + float_frame_with_na = float_frame.copy() + mask = np.random.randint(0, 2, size=float_frame.shape, dtype=bool) + float_frame_with_na[mask] = pd.NA + strlen_frame_na_ignore = float_frame_with_na.applymap( + lambda x: len(str(x)), na_action="ignore" + ) + strlen_frame_with_na = strlen_frame.copy() + strlen_frame_with_na[mask] = pd.NA + tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na) + + +def test_applymap_box_timestamps(): + # GH 2689, GH 2627 + ser = Series(date_range("1/1/2000", periods=10)) + + def func(x): + return (x.hour, x.day, x.month) + + # it works! + DataFrame(ser).applymap(func) + + +def test_applymap_box(): + # ufunc will not be boxed. Same test cases as the test_map_box + df = DataFrame( + { + "a": [Timestamp("2011-01-01"), Timestamp("2011-01-02")], + "b": [ + Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + ], + "c": [pd.Timedelta("1 days"), pd.Timedelta("2 days")], + "d": [ + pd.Period("2011-01-01", freq="M"), + pd.Period("2011-01-02", freq="M"), + ], + } + ) + + result = df.applymap(lambda x: type(x).__name__) + expected = DataFrame( + { + "a": ["Timestamp", "Timestamp"], + "b": ["Timestamp", "Timestamp"], + "c": ["Timedelta", "Timedelta"], + "d": ["Period", "Period"], + } + ) + tm.assert_frame_equal(result, expected) + + +def test_frame_apply_dont_convert_datetime64(): + from pandas.tseries.offsets import BDay + + df = DataFrame({"x1": [datetime(1996, 1, 1)]}) + + df = df.applymap(lambda x: x + BDay()) + df = df.applymap(lambda x: x + BDay()) + + result = df.x1.dtype + assert result == "M8[ns]" + + +def test_apply_non_numpy_dtype(): + # GH 12244 + df = DataFrame({"dt": date_range("2015-01-01", periods=3, tz="Europe/Brussels")}) + result = df.apply(lambda x: x) + tm.assert_frame_equal(result, df) + + result = df.apply(lambda x: x + pd.Timedelta("1day")) + expected = DataFrame( + {"dt": date_range("2015-01-02", periods=3, tz="Europe/Brussels")} + ) + tm.assert_frame_equal(result, expected) + + +def test_apply_non_numpy_dtype_category(): + df = DataFrame({"dt": ["a", "b", "c", "a"]}, dtype="category") + result = df.apply(lambda x: x) + tm.assert_frame_equal(result, df) + + +def test_apply_dup_names_multi_agg(): + # GH 21063 + df = DataFrame([[0, 1], [2, 3]], columns=["a", "a"]) + expected = DataFrame([[0, 1]], columns=["a", "a"], index=["min"]) + result = df.agg(["min"]) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("op", ["apply", "agg"]) +def test_apply_nested_result_axis_1(op): + # GH 13820 + def apply_list(row): + return [2 * row["A"], 2 * row["C"], 2 * row["B"]] + + df = DataFrame(np.zeros((4, 4)), columns=list("ABCD")) + result = getattr(df, op)(apply_list, axis=1) + expected = Series( + [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] + ) + tm.assert_series_equal(result, expected) + + +def test_apply_noreduction_tzaware_object(): + # https://github.com/pandas-dev/pandas/issues/31505 + expected = DataFrame( + {"foo": [Timestamp("2020", tz="UTC")]}, dtype="datetime64[ns, UTC]" + ) + result = expected.apply(lambda x: x) + tm.assert_frame_equal(result, expected) + result = expected.apply(lambda x: x.copy()) + tm.assert_frame_equal(result, expected) + + +def test_apply_function_runs_once(): + # https://github.com/pandas-dev/pandas/issues/30815 + + df = DataFrame({"a": [1, 2, 3]}) + names = [] # Save row names function is applied to + + def reducing_function(row): + names.append(row.name) + + def non_reducing_function(row): + names.append(row.name) + return row + + for func in [reducing_function, non_reducing_function]: + del names[:] + + df.apply(func, axis=1) + assert names == list(df.index) + + +def test_apply_raw_function_runs_once(): + # https://github.com/pandas-dev/pandas/issues/34506 + + df = DataFrame({"a": [1, 2, 3]}) + values = [] # Save row values function is applied to + + def reducing_function(row): + values.extend(row) + + def non_reducing_function(row): + values.extend(row) + return row + + for func in [reducing_function, non_reducing_function]: + del values[:] + + df.apply(func, raw=True, axis=1) + assert values == list(df.a.to_list()) + + +def test_applymap_function_runs_once(): + + df = DataFrame({"a": [1, 2, 3]}) + values = [] # Save values function is applied to + + def reducing_function(val): + values.append(val) + + def non_reducing_function(val): + values.append(val) + return val + + for func in [reducing_function, non_reducing_function]: + del values[:] + + df.applymap(func) + assert values == df.a.to_list() + + +def test_apply_with_byte_string(): + # GH 34529 + df = DataFrame(np.array([b"abcd", b"efgh"]), columns=["col"]) + expected = DataFrame(np.array([b"abcd", b"efgh"]), columns=["col"], dtype=object) + # After we make the apply we expect a dataframe just + # like the original but with the object datatype + result = df.apply(lambda x: x.astype("object")) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("val", ["asd", 12, None, np.NaN]) +def test_apply_category_equalness(val): + # Check if categorical comparisons on apply, GH 21239 + df_values = ["asd", None, 12, "asd", "cde", np.NaN] + df = DataFrame({"a": df_values}, dtype="category") + + result = df.a.apply(lambda x: x == val) + expected = Series( + [np.NaN if pd.isnull(x) else x == val for x in df_values], name="a" + ) + tm.assert_series_equal(result, expected) + + +# the user has supplied an opaque UDF where +# they are transforming the input that requires +# us to infer the output + + +def test_infer_row_shape(): + # GH 17437 + # if row shape is changing, infer it + df = DataFrame(np.random.rand(10, 2)) + result = df.apply(np.fft.fft, axis=0).shape + assert result == (10, 2) + + result = df.apply(np.fft.rfft, axis=0).shape + assert result == (6, 2) + + +def test_with_dictlike_columns(): + # GH 17602 + df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + result = df.apply(lambda x: {"s": x["a"] + x["b"]}, axis=1) + expected = Series([{"s": 3} for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + df["tm"] = [ + Timestamp("2017-05-01 00:00:00"), + Timestamp("2017-05-02 00:00:00"), + ] + result = df.apply(lambda x: {"s": x["a"] + x["b"]}, axis=1) + tm.assert_series_equal(result, expected) + + # compose a series + result = (df["a"] + df["b"]).apply(lambda x: {"s": x}) + expected = Series([{"s": 3}, {"s": 3}]) + tm.assert_series_equal(result, expected) + + +def test_with_dictlike_columns_with_datetime(): + # GH 18775 + df = DataFrame() + df["author"] = ["X", "Y", "Z"] + df["publisher"] = ["BBC", "NBC", "N24"] + df["date"] = pd.to_datetime( + ["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"] + ) + result = df.apply(lambda x: {}, axis=1) + expected = Series([{}, {}, {}]) + tm.assert_series_equal(result, expected) + + +def test_with_dictlike_columns_with_infer(): + # GH 17602 + df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + result = df.apply(lambda x: {"s": x["a"] + x["b"]}, axis=1, result_type="expand") + expected = DataFrame({"s": [3, 3]}) + tm.assert_frame_equal(result, expected) + + df["tm"] = [ + Timestamp("2017-05-01 00:00:00"), + Timestamp("2017-05-02 00:00:00"), + ] + result = df.apply(lambda x: {"s": x["a"] + x["b"]}, axis=1, result_type="expand") + tm.assert_frame_equal(result, expected) + + +def test_with_listlike_columns(): + # GH 17348 + df = DataFrame( + { + "a": Series(np.random.randn(4)), + "b": ["a", "list", "of", "words"], + "ts": date_range("2016-10-01", periods=4, freq="H"), + } + ) + + result = df[["a", "b"]].apply(tuple, axis=1) + expected = Series([t[1:] for t in df[["a", "b"]].itertuples()]) + tm.assert_series_equal(result, expected) + + result = df[["a", "ts"]].apply(tuple, axis=1) + expected = Series([t[1:] for t in df[["a", "ts"]].itertuples()]) + tm.assert_series_equal(result, expected) + + +def test_with_listlike_columns_returning_list(): + # GH 18919 + df = DataFrame({"x": Series([["a", "b"], ["q"]]), "y": Series([["z"], ["q", "t"]])}) + df.index = MultiIndex.from_tuples([("i0", "j0"), ("i1", "j1")]) + + result = df.apply(lambda row: [el for el in row["x"] if el in row["y"]], axis=1) + expected = Series([[], ["q"]], index=df.index) + tm.assert_series_equal(result, expected) + + +def test_infer_output_shape_columns(): + # GH 18573 + + df = DataFrame( + { + "number": [1.0, 2.0], + "string": ["foo", "bar"], + "datetime": [ + Timestamp("2017-11-29 03:30:00"), + Timestamp("2017-11-29 03:45:00"), + ], + } + ) + result = df.apply(lambda row: (row.number, row.string), axis=1) + expected = Series([(t.number, t.string) for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + +def test_infer_output_shape_listlike_columns(): + # GH 16353 + + df = DataFrame(np.random.randn(6, 3), columns=["A", "B", "C"]) + + result = df.apply(lambda x: [1, 2, 3], axis=1) + expected = Series([[1, 2, 3] for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + result = df.apply(lambda x: [1, 2], axis=1) + expected = Series([[1, 2] for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("val", [1, 2]) +def test_infer_output_shape_listlike_columns_np_func(val): + # GH 17970 + df = DataFrame({"a": [1, 2, 3]}, index=list("abc")) + + result = df.apply(lambda row: np.ones(val), axis=1) + expected = Series([np.ones(val) for t in df.itertuples()], index=df.index) + tm.assert_series_equal(result, expected) + + +def test_infer_output_shape_listlike_columns_with_timestamp(): + # GH 17892 + df = DataFrame( + { + "a": [ + Timestamp("2010-02-01"), + Timestamp("2010-02-04"), + Timestamp("2010-02-05"), + Timestamp("2010-02-06"), + ], + "b": [9, 5, 4, 3], + "c": [5, 3, 4, 2], + "d": [1, 2, 3, 4], + } + ) + + def fun(x): + return (1, 2) + + result = df.apply(fun, axis=1) + expected = Series([(1, 2) for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("lst", [[1, 2, 3], [1, 2]]) +def test_consistent_coerce_for_shapes(lst): + # we want column names to NOT be propagated + # just because the shape matches the input shape + df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"]) + + result = df.apply(lambda x: lst, axis=1) + expected = Series([lst for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + +def test_consistent_names(int_frame_const_col): + # if a Series is returned, we should use the resulting index names + df = int_frame_const_col + + result = df.apply( + lambda x: Series([1, 2, 3], index=["test", "other", "cols"]), axis=1 + ) + expected = int_frame_const_col.rename( + columns={"A": "test", "B": "other", "C": "cols"} + ) + tm.assert_frame_equal(result, expected) + + result = df.apply(lambda x: Series([1, 2], index=["test", "other"]), axis=1) + expected = expected[["test", "other"]] + tm.assert_frame_equal(result, expected) + + +def test_result_type(int_frame_const_col): + # result_type should be consistent no matter which + # path we take in the code + df = int_frame_const_col + + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand") + expected = df.copy() + expected.columns = [0, 1, 2] + tm.assert_frame_equal(result, expected) + + +def test_result_type_shorter_list(int_frame_const_col): + # result_type should be consistent no matter which + # path we take in the code + df = int_frame_const_col + result = df.apply(lambda x: [1, 2], axis=1, result_type="expand") + expected = df[["A", "B"]].copy() + expected.columns = [0, 1] + tm.assert_frame_equal(result, expected) + + +def test_result_type_broadcast(int_frame_const_col): + # result_type should be consistent no matter which + # path we take in the code + df = int_frame_const_col + # broadcast result + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="broadcast") + expected = df.copy() + tm.assert_frame_equal(result, expected) + + +def test_result_type_broadcast_series_func(int_frame_const_col): + # result_type should be consistent no matter which + # path we take in the code + df = int_frame_const_col + columns = ["other", "col", "names"] + result = df.apply( + lambda x: Series([1, 2, 3], index=columns), axis=1, result_type="broadcast" + ) + expected = df.copy() + tm.assert_frame_equal(result, expected) + + +def test_result_type_series_result(int_frame_const_col): + # result_type should be consistent no matter which + # path we take in the code + df = int_frame_const_col + # series result + result = df.apply(lambda x: Series([1, 2, 3], index=x.index), axis=1) + expected = df.copy() + tm.assert_frame_equal(result, expected) + + +def test_result_type_series_result_other_index(int_frame_const_col): + # result_type should be consistent no matter which + # path we take in the code + df = int_frame_const_col + # series result with other index + columns = ["other", "col", "names"] + result = df.apply(lambda x: Series([1, 2, 3], index=columns), axis=1) + expected = df.copy() + expected.columns = columns + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "box", + [lambda x: list(x), lambda x: tuple(x), lambda x: np.array(x, dtype="int64")], + ids=["list", "tuple", "array"], +) +def test_consistency_for_boxed(box, int_frame_const_col): + # passing an array or list should not affect the output shape + df = int_frame_const_col + + result = df.apply(lambda x: box([1, 2]), axis=1) + expected = Series([box([1, 2]) for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + result = df.apply(lambda x: box([1, 2]), axis=1, result_type="expand") + expected = int_frame_const_col[["A", "B"]].rename(columns={"A": 0, "B": 1}) + tm.assert_frame_equal(result, expected) + + +def test_agg_transform(axis, float_frame): + other_axis = 1 if axis in {0, "index"} else 0 + + with np.errstate(all="ignore"): + + f_abs = np.abs(float_frame) + f_sqrt = np.sqrt(float_frame) + + # ufunc + expected = f_sqrt.copy() + result = float_frame.apply(np.sqrt, axis=axis) + tm.assert_frame_equal(result, expected) + + # list-like + result = float_frame.apply([np.sqrt], axis=axis) + expected = f_sqrt.copy() + if axis in {0, "index"}: + expected.columns = MultiIndex.from_product([float_frame.columns, ["sqrt"]]) + else: + expected.index = MultiIndex.from_product([float_frame.index, ["sqrt"]]) + tm.assert_frame_equal(result, expected) + + # multiple items in list + # these are in the order as if we are applying both + # functions per series and then concatting + result = float_frame.apply([np.abs, np.sqrt], axis=axis) + expected = zip_frames([f_abs, f_sqrt], axis=other_axis) + if axis in {0, "index"}: + expected.columns = MultiIndex.from_product( + [float_frame.columns, ["absolute", "sqrt"]] + ) + else: + expected.index = MultiIndex.from_product( + [float_frame.index, ["absolute", "sqrt"]] + ) + tm.assert_frame_equal(result, expected) + + +def test_demo(): + # demonstration tests + df = DataFrame({"A": range(5), "B": 5}) + + result = df.agg(["min", "max"]) + expected = DataFrame( + {"A": [0, 4], "B": [5, 5]}, columns=["A", "B"], index=["min", "max"] + ) + tm.assert_frame_equal(result, expected) + + +def test_demo_dict_agg(): + # demonstration tests + df = DataFrame({"A": range(5), "B": 5}) + result = df.agg({"A": ["min", "max"], "B": ["sum", "max"]}) + expected = DataFrame( + {"A": [4.0, 0.0, np.nan], "B": [5.0, np.nan, 25.0]}, + columns=["A", "B"], + index=["max", "min", "sum"], + ) + tm.assert_frame_equal(result.reindex_like(expected), expected) + + +def test_agg_with_name_as_column_name(): + # GH 36212 - Column name is "name" + data = {"name": ["foo", "bar"]} + df = DataFrame(data) + + # result's name should be None + result = df.agg({"name": "count"}) + expected = Series({"name": 2}) + tm.assert_series_equal(result, expected) + + # Check if name is still preserved when aggregating series instead + result = df["name"].agg({"name": "count"}) + expected = Series({"name": 2}, name="name") + tm.assert_series_equal(result, expected) + + +def test_agg_multiple_mixed_no_warning(): + # GH 20909 + mdf = DataFrame( + { + "A": [1, 2, 3], + "B": [1.0, 2.0, 3.0], + "C": ["foo", "bar", "baz"], + "D": date_range("20130101", periods=3), + } + ) + expected = DataFrame( + { + "A": [1, 6], + "B": [1.0, 6.0], + "C": ["bar", "foobarbaz"], + "D": [Timestamp("2013-01-01"), pd.NaT], + }, + index=["min", "sum"], + ) + # sorted index + with tm.assert_produces_warning( + FutureWarning, match=r"\['D'\] did not aggregate successfully" + ): + result = mdf.agg(["min", "sum"]) + + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match=r"\['D'\] did not aggregate successfully" + ): + result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"]) + + # GH40420: the result of .agg should have an index that is sorted + # according to the arguments provided to agg. + expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"]) + tm.assert_frame_equal(result, expected) + + +def test_agg_reduce(axis, float_frame): + other_axis = 1 if axis in {0, "index"} else 0 + name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values() + + # all reducers + expected = pd.concat( + [ + float_frame.mean(axis=axis), + float_frame.max(axis=axis), + float_frame.sum(axis=axis), + ], + axis=1, + ) + expected.columns = ["mean", "max", "sum"] + expected = expected.T if axis in {0, "index"} else expected + + result = float_frame.agg(["mean", "max", "sum"], axis=axis) + tm.assert_frame_equal(result, expected) + + # dict input with scalars + func = {name1: "mean", name2: "sum"} + result = float_frame.agg(func, axis=axis) + expected = Series( + [ + float_frame.loc(other_axis)[name1].mean(), + float_frame.loc(other_axis)[name2].sum(), + ], + index=[name1, name2], + ) + tm.assert_series_equal(result, expected) + + # dict input with lists + func = {name1: ["mean"], name2: ["sum"]} + result = float_frame.agg(func, axis=axis) + expected = DataFrame( + { + name1: Series([float_frame.loc(other_axis)[name1].mean()], index=["mean"]), + name2: Series([float_frame.loc(other_axis)[name2].sum()], index=["sum"]), + } + ) + expected = expected.T if axis in {1, "columns"} else expected + tm.assert_frame_equal(result, expected) + + # dict input with lists with multiple + func = {name1: ["mean", "sum"], name2: ["sum", "max"]} + result = float_frame.agg(func, axis=axis) + expected = pd.concat( + { + name1: Series( + [ + float_frame.loc(other_axis)[name1].mean(), + float_frame.loc(other_axis)[name1].sum(), + ], + index=["mean", "sum"], + ), + name2: Series( + [ + float_frame.loc(other_axis)[name2].sum(), + float_frame.loc(other_axis)[name2].max(), + ], + index=["sum", "max"], + ), + }, + axis=1, + ) + expected = expected.T if axis in {1, "columns"} else expected + tm.assert_frame_equal(result, expected) + + +def test_nuiscance_columns(): + + # GH 15015 + df = DataFrame( + { + "A": [1, 2, 3], + "B": [1.0, 2.0, 3.0], + "C": ["foo", "bar", "baz"], + "D": date_range("20130101", periods=3), + } + ) + + result = df.agg("min") + expected = Series([1, 1.0, "bar", Timestamp("20130101")], index=df.columns) + tm.assert_series_equal(result, expected) + + result = df.agg(["min"]) + expected = DataFrame( + [[1, 1.0, "bar", Timestamp("20130101")]], + index=["min"], + columns=df.columns, + ) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + result = df.agg("sum") + expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"]) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match=r"\['D'\] did not aggregate successfully" + ): + result = df.agg(["sum"]) + expected = DataFrame( + [[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"] + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_non_callable_aggregates(how): + + # GH 16405 + # 'size' is a property of frame/series + # validate that this is working + # GH 39116 - expand to apply + df = DataFrame( + {"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]} + ) + + # Function aggregate + result = getattr(df, how)({"A": "count"}) + expected = Series({"A": 2}) + + tm.assert_series_equal(result, expected) + + # Non-function aggregate + result = getattr(df, how)({"A": "size"}) + expected = Series({"A": 3}) + + tm.assert_series_equal(result, expected) + + # Mix function and non-function aggs + result1 = getattr(df, how)(["count", "size"]) + result2 = getattr(df, how)( + {"A": ["count", "size"], "B": ["count", "size"], "C": ["count", "size"]} + ) + expected = DataFrame( + { + "A": {"count": 2, "size": 3}, + "B": {"count": 2, "size": 3}, + "C": {"count": 2, "size": 3}, + } + ) + + tm.assert_frame_equal(result1, result2, check_like=True) + tm.assert_frame_equal(result2, expected, check_like=True) + + # Just functional string arg is same as calling df.arg() + result = getattr(df, how)("count") + expected = df.count() + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_size_as_str(how, axis): + # GH 39934 + df = DataFrame( + {"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]} + ) + # Just a string attribute arg same as calling df.arg + # on the columns + result = getattr(df, how)("size", axis=axis) + if axis == 0 or axis == "index": + expected = Series(df.shape[0], index=df.columns) + else: + expected = Series(df.shape[1], index=df.index) + tm.assert_series_equal(result, expected) + + +def test_agg_listlike_result(): + # GH-29587 user defined function returning list-likes + df = DataFrame({"A": [2, 2, 3], "B": [1.5, np.nan, 1.5], "C": ["foo", None, "bar"]}) + + def func(group_col): + return list(group_col.dropna().unique()) + + result = df.agg(func) + expected = Series([[2, 3], [1.5], ["foo", "bar"]], index=["A", "B", "C"]) + tm.assert_series_equal(result, expected) + + result = df.agg([func]) + expected = expected.to_frame("func").T + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("axis", [0, 1]) +@pytest.mark.parametrize( + "args, kwargs", + [ + ((1, 2, 3), {}), + ((8, 7, 15), {}), + ((1, 2), {}), + ((1,), {"b": 2}), + ((), {"a": 1, "b": 2}), + ((), {"a": 2, "b": 1}), + ((), {"a": 1, "b": 2, "c": 3}), + ], +) +def test_agg_args_kwargs(axis, args, kwargs): + def f(x, a, b, c=3): + return x.sum() + (a + b) / c + + df = DataFrame([[1, 2], [3, 4]]) + + if axis == 0: + expected = Series([5.0, 7.0]) + else: + expected = Series([4.0, 8.0]) + + result = df.agg(f, axis, *args, **kwargs) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("num_cols", [2, 3, 5]) +def test_frequency_is_original(num_cols): + # GH 22150 + index = pd.DatetimeIndex(["1950-06-30", "1952-10-24", "1953-05-29"]) + original = index.copy() + df = DataFrame(1, index=index, columns=range(num_cols)) + df.apply(lambda x: x) + assert index.freq == original.freq + + +def test_apply_datetime_tz_issue(): + # GH 29052 + + timestamps = [ + Timestamp("2019-03-15 12:34:31.909000+0000", tz="UTC"), + Timestamp("2019-03-15 12:34:34.359000+0000", tz="UTC"), + Timestamp("2019-03-15 12:34:34.660000+0000", tz="UTC"), + ] + df = DataFrame(data=[0, 1, 2], index=timestamps) + result = df.apply(lambda x: x.name, axis=1) + expected = Series(index=timestamps, data=timestamps) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("df", [DataFrame({"A": ["a", None], "B": ["c", "d"]})]) +@pytest.mark.parametrize("method", ["min", "max", "sum"]) +def test_consistency_of_aggregates_of_columns_with_missing_values(df, method): + # GH 16832 + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + none_in_first_column_result = getattr(df[["A", "B"]], method)() + none_in_second_column_result = getattr(df[["B", "A"]], method)() + + tm.assert_series_equal(none_in_first_column_result, none_in_second_column_result) + + +@pytest.mark.parametrize("col", [1, 1.0, True, "a", np.nan]) +def test_apply_dtype(col): + # GH 31466 + df = DataFrame([[1.0, col]], columns=["a", "b"]) + result = df.apply(lambda x: x.dtype) + expected = df.dtypes + + tm.assert_series_equal(result, expected) + + +def test_apply_mutating(using_array_manager): + # GH#35462 case where applied func pins a new BlockManager to a row + df = DataFrame({"a": range(100), "b": range(100, 200)}) + df_orig = df.copy() + + def func(row): + mgr = row._mgr + row.loc["a"] += 1 + assert row._mgr is not mgr + return row + + expected = df.copy() + expected["a"] += 1 + + result = df.apply(func, axis=1) + + tm.assert_frame_equal(result, expected) + if not using_array_manager: + # INFO(ArrayManager) With BlockManager, the row is a view and mutated in place, + # with ArrayManager the row is not a view, and thus not mutated in place + tm.assert_frame_equal(df, result) + else: + tm.assert_frame_equal(df, df_orig) + + +def test_apply_empty_list_reduce(): + # GH#35683 get columns correct + df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]], columns=["a", "b"]) + + result = df.apply(lambda x: [], result_type="reduce") + expected = Series({"a": [], "b": []}, dtype=object) + tm.assert_series_equal(result, expected) + + +def test_apply_no_suffix_index(): + # GH36189 + pdf = DataFrame([[4, 9]] * 3, columns=["A", "B"]) + result = pdf.apply(["sum", lambda x: x.sum(), lambda x: x.sum()]) + expected = DataFrame( + {"A": [12, 12, 12], "B": [27, 27, 27]}, index=["sum", "", ""] + ) + + tm.assert_frame_equal(result, expected) + + +def test_apply_raw_returns_string(): + # https://github.com/pandas-dev/pandas/issues/35940 + df = DataFrame({"A": ["aa", "bbb"]}) + result = df.apply(lambda x: x[0], axis=1, raw=True) + expected = Series(["aa", "bbb"]) + tm.assert_series_equal(result, expected) + + +def test_aggregation_func_column_order(): + # GH40420: the result of .agg should have an index that is sorted + # according to the arguments provided to agg. + df = DataFrame( + [ + ("1", 1, 0, 0), + ("2", 2, 0, 0), + ("3", 3, 0, 0), + ("4", 4, 5, 4), + ("5", 5, 6, 6), + ("6", 6, 7, 7), + ], + columns=("item", "att1", "att2", "att3"), + ) + + def foo(s): + return s.sum() / 2 + + aggs = ["sum", foo, "count", "min"] + with tm.assert_produces_warning( + FutureWarning, match=r"\['item'\] did not aggregate successfully" + ): + result = df.agg(aggs) + expected = DataFrame( + { + "item": ["123456", np.nan, 6, "1"], + "att1": [21.0, 10.5, 6.0, 1.0], + "att2": [18.0, 9.0, 6.0, 0.0], + "att3": [17.0, 8.5, 6.0, 0.0], + }, + index=["sum", "foo", "count", "min"], + ) + tm.assert_frame_equal(result, expected) + + +def test_apply_getitem_axis_1(): + # GH 13427 + df = DataFrame({"a": [0, 1, 2], "b": [1, 2, 3]}) + result = df[["a", "a"]].apply(lambda x: x[0] + x[1], axis=1) + expected = Series([0, 2, 4]) + tm.assert_series_equal(result, expected) + + +def test_nuisance_depr_passes_through_warnings(): + # GH 43740 + # DataFrame.agg with list-likes may emit warnings for both individual + # args and for entire columns, but we only want to emit once. We + # catch and suppress the warnings for individual args, but need to make + # sure if some other warnings were raised, they get passed through to + # the user. + + def foo(x): + warnings.warn("Hello, World!") + return x.sum() + + df = DataFrame({"a": [1, 2, 3]}) + with tm.assert_produces_warning(UserWarning, match="Hello, World!"): + df.agg([foo]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/test_frame_apply_relabeling.py b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_frame_apply_relabeling.py new file mode 100644 index 0000000..2da4a78 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_frame_apply_relabeling.py @@ -0,0 +1,97 @@ +import numpy as np + +import pandas as pd +import pandas._testing as tm + + +def test_agg_relabel(): + # GH 26513 + df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]}) + + # simplest case with one column, one func + result = df.agg(foo=("B", "sum")) + expected = pd.DataFrame({"B": [10]}, index=pd.Index(["foo"])) + tm.assert_frame_equal(result, expected) + + # test on same column with different methods + result = df.agg(foo=("B", "sum"), bar=("B", "min")) + expected = pd.DataFrame({"B": [10, 1]}, index=pd.Index(["foo", "bar"])) + + tm.assert_frame_equal(result, expected) + + +def test_agg_relabel_multi_columns_multi_methods(): + # GH 26513, test on multiple columns with multiple methods + df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]}) + result = df.agg( + foo=("A", "sum"), + bar=("B", "mean"), + cat=("A", "min"), + dat=("B", "max"), + f=("A", "max"), + g=("C", "min"), + ) + expected = pd.DataFrame( + { + "A": [6.0, np.nan, 1.0, np.nan, 2.0, np.nan], + "B": [np.nan, 2.5, np.nan, 4.0, np.nan, np.nan], + "C": [np.nan, np.nan, np.nan, np.nan, np.nan, 3.0], + }, + index=pd.Index(["foo", "bar", "cat", "dat", "f", "g"]), + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_relabel_partial_functions(): + # GH 26513, test on partial, functools or more complex cases + df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]}) + result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min)) + expected = pd.DataFrame( + {"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"]) + ) + tm.assert_frame_equal(result, expected) + + result = df.agg( + foo=("A", min), + bar=("A", np.min), + cat=("B", max), + dat=("C", "min"), + f=("B", np.sum), + kk=("B", lambda x: min(x)), + ) + expected = pd.DataFrame( + { + "A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan], + "B": [np.nan, np.nan, 4.0, np.nan, 10.0, 1.0], + "C": [np.nan, np.nan, np.nan, 3.0, np.nan, np.nan], + }, + index=pd.Index(["foo", "bar", "cat", "dat", "f", "kk"]), + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_namedtuple(): + # GH 26513 + df = pd.DataFrame({"A": [0, 1], "B": [1, 2]}) + result = df.agg( + foo=pd.NamedAgg("B", "sum"), + bar=pd.NamedAgg("B", min), + cat=pd.NamedAgg(column="B", aggfunc="count"), + fft=pd.NamedAgg("B", aggfunc="max"), + ) + + expected = pd.DataFrame( + {"B": [3, 1, 2, 2]}, index=pd.Index(["foo", "bar", "cat", "fft"]) + ) + tm.assert_frame_equal(result, expected) + + result = df.agg( + foo=pd.NamedAgg("A", "min"), + bar=pd.NamedAgg(column="B", aggfunc="max"), + cat=pd.NamedAgg(column="A", aggfunc="max"), + ) + expected = pd.DataFrame( + {"A": [0.0, np.nan, 1.0], "B": [np.nan, 2.0, np.nan]}, + index=pd.Index(["foo", "bar", "cat"]), + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/test_frame_transform.py b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_frame_transform.py new file mode 100644 index 0000000..3384f9d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_frame_transform.py @@ -0,0 +1,249 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + Series, +) +import pandas._testing as tm +from pandas.tests.apply.common import frame_transform_kernels +from pandas.tests.frame.common import zip_frames + + +def unpack_obj(obj, klass, axis): + """ + Helper to ensure we have the right type of object for a test parametrized + over frame_or_series. + """ + if klass is not DataFrame: + obj = obj["A"] + if axis != 0: + pytest.skip(f"Test is only for DataFrame with axis={axis}") + return obj + + +def test_transform_ufunc(axis, float_frame, frame_or_series): + # GH 35964 + obj = unpack_obj(float_frame, frame_or_series, axis) + + with np.errstate(all="ignore"): + f_sqrt = np.sqrt(obj) + + # ufunc + result = obj.transform(np.sqrt, axis=axis) + expected = f_sqrt + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "ops, names", + [ + ([np.sqrt], ["sqrt"]), + ([np.abs, np.sqrt], ["absolute", "sqrt"]), + (np.array([np.sqrt]), ["sqrt"]), + (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), + ], +) +def test_transform_listlike(axis, float_frame, ops, names): + # GH 35964 + other_axis = 1 if axis in {0, "index"} else 0 + with np.errstate(all="ignore"): + expected = zip_frames([op(float_frame) for op in ops], axis=other_axis) + if axis in {0, "index"}: + expected.columns = MultiIndex.from_product([float_frame.columns, names]) + else: + expected.index = MultiIndex.from_product([float_frame.index, names]) + result = float_frame.transform(ops, axis=axis) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("ops", [[], np.array([])]) +def test_transform_empty_listlike(float_frame, ops, frame_or_series): + obj = unpack_obj(float_frame, frame_or_series, 0) + + with pytest.raises(ValueError, match="No transform functions were provided"): + obj.transform(ops) + + +@pytest.mark.parametrize("box", [dict, Series]) +def test_transform_dictlike(axis, float_frame, box): + # GH 35964 + if axis == 0 or axis == "index": + e = float_frame.columns[0] + expected = float_frame[[e]].transform(np.abs) + else: + e = float_frame.index[0] + expected = float_frame.iloc[[0]].transform(np.abs) + result = float_frame.transform(box({e: np.abs}), axis=axis) + tm.assert_frame_equal(result, expected) + + +def test_transform_dictlike_mixed(): + # GH 40018 - mix of lists and non-lists in values of a dictionary + df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]}) + result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"}) + expected = DataFrame( + [[1.0, 1, 1.0], [2.0, 4, 2.0]], + columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "ops", + [ + {}, + {"A": []}, + {"A": [], "B": "cumsum"}, + {"A": "cumsum", "B": []}, + {"A": [], "B": ["cumsum"]}, + {"A": ["cumsum"], "B": []}, + ], +) +def test_transform_empty_dictlike(float_frame, ops, frame_or_series): + obj = unpack_obj(float_frame, frame_or_series, 0) + + with pytest.raises(ValueError, match="No transform functions were provided"): + obj.transform(ops) + + +@pytest.mark.parametrize("use_apply", [True, False]) +def test_transform_udf(axis, float_frame, use_apply, frame_or_series): + # GH 35964 + obj = unpack_obj(float_frame, frame_or_series, axis) + + # transform uses UDF either via apply or passing the entire DataFrame + def func(x): + # transform is using apply iff x is not a DataFrame + if use_apply == isinstance(x, frame_or_series): + # Force transform to fallback + raise ValueError + return x + 1 + + result = obj.transform(func, axis=axis) + expected = obj + 1 + tm.assert_equal(result, expected) + + +wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"] +frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail] + + +@pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1]) +def test_transform_bad_dtype(op, frame_or_series, request): + # GH 35964 + if op == "rank": + request.node.add_marker( + pytest.mark.xfail( + raises=ValueError, reason="GH 40418: rank does not raise a TypeError" + ) + ) + + obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms + obj = tm.get_obj(obj, frame_or_series) + + # tshift is deprecated + warn = None if op != "tshift" else FutureWarning + with tm.assert_produces_warning(warn): + with pytest.raises(TypeError, match="unsupported operand|not supported"): + obj.transform(op) + with pytest.raises(TypeError, match="Transform function failed"): + obj.transform([op]) + with pytest.raises(TypeError, match="Transform function failed"): + obj.transform({"A": op}) + with pytest.raises(TypeError, match="Transform function failed"): + obj.transform({"A": [op]}) + + +@pytest.mark.parametrize("op", frame_kernels_raise) +def test_transform_partial_failure_typeerror(op): + # GH 35964 + + # Using object makes most transform kernels fail + df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]}) + + expected = df[["B"]].transform([op]) + match = r"\['A'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform([op]) + tm.assert_equal(result, expected) + + expected = df[["B"]].transform({"B": op}) + match = r"\['A'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform({"A": op, "B": op}) + tm.assert_equal(result, expected) + + expected = df[["B"]].transform({"B": [op]}) + match = r"\['A'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform({"A": [op], "B": [op]}) + tm.assert_equal(result, expected) + + expected = df.transform({"A": ["shift"], "B": [op]}) + match = rf"\['{op}'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform({"A": [op, "shift"], "B": [op]}) + tm.assert_equal(result, expected) + + +def test_transform_partial_failure_valueerror(): + # GH 40211 + match = ".*did not transform successfully" + + def op(x): + if np.sum(np.sum(x)) < 10: + raise ValueError + return x + + df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]}) + + expected = df[["B"]].transform([op]) + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform([op]) + tm.assert_equal(result, expected) + + expected = df[["B"]].transform({"B": op}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform({"A": op, "B": op}) + tm.assert_equal(result, expected) + + expected = df[["B"]].transform({"B": [op]}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform({"A": [op], "B": [op]}) + tm.assert_equal(result, expected) + + expected = df.transform({"A": ["shift"], "B": [op]}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform({"A": [op, "shift"], "B": [op]}) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("use_apply", [True, False]) +def test_transform_passes_args(use_apply, frame_or_series): + # GH 35964 + # transform uses UDF either via apply or passing the entire DataFrame + expected_args = [1, 2] + expected_kwargs = {"c": 3} + + def f(x, a, b, c): + # transform is using apply iff x is not a DataFrame + if use_apply == isinstance(x, frame_or_series): + # Force transform to fallback + raise ValueError + assert [a, b] == expected_args + assert c == expected_kwargs["c"] + return x + + frame_or_series([1]).transform(f, 0, *expected_args, **expected_kwargs) + + +def test_transform_empty_dataframe(): + # https://github.com/pandas-dev/pandas/issues/39636 + df = DataFrame([], columns=["col1", "col2"]) + result = df.transform(lambda x: x + 10) + tm.assert_frame_equal(result, df) + + result = df["col1"].transform(lambda x: x + 10) + tm.assert_series_equal(result, df["col1"]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/test_invalid_arg.py b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_invalid_arg.py new file mode 100644 index 0000000..410a8f6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_invalid_arg.py @@ -0,0 +1,359 @@ +# Tests specifically aimed at detecting bad arguments. +# This file is organized by reason for exception. +# 1. always invalid argument values +# 2. missing column(s) +# 3. incompatible ops/dtype/args/kwargs +# 4. invalid result shape/type +# If your test does not fit into one of these categories, add to this list. + +from itertools import chain +import re + +import numpy as np +import pytest + +from pandas import ( + Categorical, + DataFrame, + Series, + date_range, + notna, +) +import pandas._testing as tm +from pandas.core.base import SpecificationError + + +@pytest.mark.parametrize("result_type", ["foo", 1]) +def test_result_type_error(result_type, int_frame_const_col): + # allowed result_type + df = int_frame_const_col + + msg = ( + "invalid value for result_type, must be one of " + "{None, 'reduce', 'broadcast', 'expand'}" + ) + with pytest.raises(ValueError, match=msg): + df.apply(lambda x: [1, 2, 3], axis=1, result_type=result_type) + + +def test_apply_invalid_axis_value(): + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"]) + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.apply(lambda x: x, 2) + + +def test_applymap_invalid_na_action(float_frame): + # GH 23803 + with pytest.raises(ValueError, match="na_action must be .*Got 'abc'"): + float_frame.applymap(lambda x: len(str(x)), na_action="abc") + + +def test_agg_raises(): + # GH 26513 + df = DataFrame({"A": [0, 1], "B": [1, 2]}) + msg = "Must provide" + + with pytest.raises(TypeError, match=msg): + df.agg() + + +def test_map_with_invalid_na_action_raises(): + # https://github.com/pandas-dev/pandas/issues/32815 + s = Series([1, 2, 3]) + msg = "na_action must either be 'ignore' or None" + with pytest.raises(ValueError, match=msg): + s.map(lambda x: x, na_action="____") + + +def test_map_categorical_na_action(): + values = Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) + s = Series(values, name="XX", index=list("abcdefg")) + with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN): + s.map(lambda x: x, na_action="ignore") + + +def test_map_datetimetz_na_action(): + values = date_range("2011-01-01", "2011-01-02", freq="H").tz_localize("Asia/Tokyo") + s = Series(values, name="XX") + with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN): + s.map(lambda x: x, na_action="ignore") + + +@pytest.mark.parametrize("box", [DataFrame, Series]) +@pytest.mark.parametrize("method", ["apply", "agg", "transform"]) +@pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}]) +def test_nested_renamer(box, method, func): + # GH 35964 + obj = box({"A": [1]}) + match = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=match): + getattr(obj, method)(func) + + +@pytest.mark.parametrize( + "renamer", + [{"foo": ["min", "max"]}, {"foo": ["min", "max"], "bar": ["sum", "mean"]}], +) +def test_series_nested_renamer(renamer): + s = Series(range(6), dtype="int64", name="series") + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + s.agg(renamer) + + +def test_apply_dict_depr(): + + tsdf = DataFrame( + np.random.randn(10, 3), + columns=["A", "B", "C"], + index=date_range("1/1/2000", periods=10), + ) + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + tsdf.A.agg({"foo": ["sum", "mean"]}) + + +@pytest.mark.parametrize("method", ["agg", "transform"]) +def test_dict_nested_renaming_depr(method): + + df = DataFrame({"A": range(5), "B": 5}) + + # nested renaming + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + getattr(df, method)({"A": {"foo": "min"}, "B": {"bar": "max"}}) + + +@pytest.mark.parametrize("method", ["apply", "agg", "transform"]) +@pytest.mark.parametrize("func", [{"B": "sum"}, {"B": ["sum"]}]) +def test_missing_column(method, func): + # GH 40004 + obj = DataFrame({"A": [1]}) + match = re.escape("Column(s) ['B'] do not exist") + with pytest.raises(KeyError, match=match): + getattr(obj, method)(func) + + +def test_transform_mixed_column_name_dtypes(): + # GH39025 + df = DataFrame({"a": ["1"]}) + msg = r"Column\(s\) \[1, 'b'\] do not exist" + with pytest.raises(KeyError, match=msg): + df.transform({"a": int, 1: str, "b": int}) + + +@pytest.mark.parametrize( + "how, args", [("pct_change", ()), ("nsmallest", (1, ["a", "b"])), ("tail", 1)] +) +def test_apply_str_axis_1_raises(how, args): + # GH 39211 - some ops don't support axis=1 + df = DataFrame({"a": [1, 2], "b": [3, 4]}) + msg = f"Operation {how} does not support axis=1" + with pytest.raises(ValueError, match=msg): + df.apply(how, axis=1, args=args) + + +def test_transform_axis_1_raises(): + # GH 35964 + msg = "No axis named 1 for object type Series" + with pytest.raises(ValueError, match=msg): + Series([1]).transform("sum", axis=1) + + +def test_apply_modify_traceback(): + data = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + data.loc[4, "C"] = np.nan + + def transform(row): + if row["C"].startswith("shin") and row["A"] == "foo": + row["D"] = 7 + return row + + def transform2(row): + if notna(row["C"]) and row["C"].startswith("shin") and row["A"] == "foo": + row["D"] = 7 + return row + + msg = "'float' object has no attribute 'startswith'" + with pytest.raises(AttributeError, match=msg): + data.apply(transform, axis=1) + + +@pytest.mark.parametrize( + "df, func, expected", + tm.get_cython_table_params( + DataFrame([["a", "b"], ["b", "a"]]), [["cumprod", TypeError]] + ), +) +def test_agg_cython_table_raises_frame(df, func, expected, axis): + # GH 21224 + msg = "can't multiply sequence by non-int of type 'str'" + with pytest.raises(expected, match=msg): + df.agg(func, axis=axis) + + +@pytest.mark.parametrize( + "series, func, expected", + chain( + tm.get_cython_table_params( + Series("a b c".split()), + [ + ("mean", TypeError), # mean raises TypeError + ("prod", TypeError), + ("std", TypeError), + ("var", TypeError), + ("median", TypeError), + ("cumprod", TypeError), + ], + ) + ), +) +def test_agg_cython_table_raises_series(series, func, expected): + # GH21224 + msg = r"[Cc]ould not convert|can't multiply sequence by non-int of type" + with pytest.raises(expected, match=msg): + # e.g. Series('a b'.split()).cumprod() will raise + series.agg(func) + + +def test_agg_none_to_type(): + # GH 40543 + df = DataFrame({"a": [None]}) + msg = re.escape("int() argument must be a string") + with pytest.raises(TypeError, match=msg): + df.agg({"a": int}) + + +def test_transform_none_to_type(): + # GH#34377 + df = DataFrame({"a": [None]}) + msg = "Transform function failed" + with pytest.raises(TypeError, match=msg): + df.transform({"a": int}) + + +@pytest.mark.parametrize( + "func", + [ + lambda x: np.array([1, 2]).reshape(-1, 2), + lambda x: [1, 2], + lambda x: Series([1, 2]), + ], +) +def test_apply_broadcast_error(int_frame_const_col, func): + df = int_frame_const_col + + # > 1 ndim + msg = "too many dims to broadcast|cannot broadcast result" + with pytest.raises(ValueError, match=msg): + df.apply(func, axis=1, result_type="broadcast") + + +def test_transform_and_agg_err_agg(axis, float_frame): + # cannot both transform and agg + msg = "cannot combine transform and aggregation operations" + with pytest.raises(ValueError, match=msg): + with np.errstate(all="ignore"): + float_frame.agg(["max", "sqrt"], axis=axis) + + +@pytest.mark.parametrize( + "func, msg", + [ + (["sqrt", "max"], "cannot combine transform and aggregation"), + ( + {"foo": np.sqrt, "bar": "sum"}, + "cannot perform both aggregation and transformation", + ), + ], +) +def test_transform_and_agg_err_series(string_series, func, msg): + # we are trying to transform with an aggregator + with pytest.raises(ValueError, match=msg): + with np.errstate(all="ignore"): + string_series.agg(func) + + +@pytest.mark.parametrize("func", [["max", "min"], ["max", "sqrt"]]) +def test_transform_wont_agg_frame(axis, float_frame, func): + # GH 35964 + # cannot both transform and agg + msg = "Function did not transform" + with pytest.raises(ValueError, match=msg): + float_frame.transform(func, axis=axis) + + +@pytest.mark.parametrize("func", [["min", "max"], ["sqrt", "max"]]) +def test_transform_wont_agg_series(string_series, func): + # GH 35964 + # we are trying to transform with an aggregator + msg = "Function did not transform" + + warn = RuntimeWarning if func[0] == "sqrt" else None + warn_msg = "invalid value encountered in sqrt" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(warn, match=warn_msg): + string_series.transform(func) + + +@pytest.mark.parametrize( + "op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}] +) +@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") +def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper): + # GH 35964 + op = op_wrapper(all_reductions) + + obj = DataFrame({"A": [1, 2, 3]}) + obj = tm.get_obj(obj, frame_or_series) + + msg = "Function did not transform" + with pytest.raises(ValueError, match=msg): + obj.transform(op) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/test_series_apply.py b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_series_apply.py new file mode 100644 index 0000000..b7084e2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_series_apply.py @@ -0,0 +1,889 @@ +from collections import ( + Counter, + defaultdict, +) + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + concat, + isna, + timedelta_range, +) +import pandas._testing as tm +from pandas.tests.apply.common import series_transform_kernels + + +def test_series_map_box_timedelta(): + # GH#11349 + ser = Series(timedelta_range("1 day 1 s", periods=5, freq="h")) + + def f(x): + return x.total_seconds() + + ser.map(f) + ser.apply(f) + DataFrame(ser).applymap(f) + + +def test_apply(datetime_series): + with np.errstate(all="ignore"): + tm.assert_series_equal(datetime_series.apply(np.sqrt), np.sqrt(datetime_series)) + + # element-wise apply + import math + + tm.assert_series_equal(datetime_series.apply(math.exp), np.exp(datetime_series)) + + # empty series + s = Series(dtype=object, name="foo", index=Index([], name="bar")) + rs = s.apply(lambda x: x) + tm.assert_series_equal(s, rs) + + # check all metadata (GH 9322) + assert s is not rs + assert s.index is rs.index + assert s.dtype == rs.dtype + assert s.name == rs.name + + # index but no data + s = Series(index=[1, 2, 3], dtype=np.float64) + rs = s.apply(lambda x: x) + tm.assert_series_equal(s, rs) + + +def test_apply_same_length_inference_bug(): + s = Series([1, 2]) + + def f(x): + return (x, x + 1) + + result = s.apply(f) + expected = s.map(f) + tm.assert_series_equal(result, expected) + + s = Series([1, 2, 3]) + result = s.apply(f) + expected = s.map(f) + tm.assert_series_equal(result, expected) + + +def test_apply_dont_convert_dtype(): + s = Series(np.random.randn(10)) + + def f(x): + return x if x > 0 else np.nan + + result = s.apply(f, convert_dtype=False) + assert result.dtype == object + + +def test_apply_args(): + s = Series(["foo,bar"]) + + result = s.apply(str.split, args=(",",)) + assert result[0] == ["foo", "bar"] + assert isinstance(result[0], list) + + +@pytest.mark.parametrize( + "args, kwargs, increment", + [((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)], +) +def test_agg_args(args, kwargs, increment): + # GH 43357 + def f(x, a=0, b=0, c=0): + return x + a + 10 * b + 100 * c + + s = Series([1, 2]) + result = s.agg(f, 0, *args, **kwargs) + expected = s + increment + tm.assert_series_equal(result, expected) + + +def test_series_map_box_timestamps(): + # GH#2689, GH#2627 + ser = Series(pd.date_range("1/1/2000", periods=10)) + + def func(x): + return (x.hour, x.day, x.month) + + # it works! + ser.map(func) + ser.apply(func) + + +def test_series_map_stringdtype(any_string_dtype): + # map test on StringDType, GH#40823 + ser1 = Series( + data=["cat", "dog", "rabbit"], + index=["id1", "id2", "id3"], + dtype=any_string_dtype, + ) + ser2 = Series(data=["id3", "id2", "id1", "id7000"], dtype=any_string_dtype) + result = ser2.map(ser1) + expected = Series(data=["rabbit", "dog", "cat", pd.NA], dtype=any_string_dtype) + + tm.assert_series_equal(result, expected) + + +def test_apply_box(): + # ufunc will not be boxed. Same test cases as the test_map_box + vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")] + s = Series(vals) + assert s.dtype == "datetime64[ns]" + # boxed value must be Timestamp instance + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") + exp = Series(["Timestamp_1_None", "Timestamp_2_None"]) + tm.assert_series_equal(res, exp) + + vals = [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + ] + s = Series(vals) + assert s.dtype == "datetime64[ns, US/Eastern]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") + exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) + tm.assert_series_equal(res, exp) + + # timedelta + vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] + s = Series(vals) + assert s.dtype == "timedelta64[ns]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.days}") + exp = Series(["Timedelta_1", "Timedelta_2"]) + tm.assert_series_equal(res, exp) + + # period + vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] + s = Series(vals) + assert s.dtype == "Period[M]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}") + exp = Series(["Period_M", "Period_M"]) + tm.assert_series_equal(res, exp) + + +def test_apply_datetimetz(): + values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize( + "Asia/Tokyo" + ) + s = Series(values, name="XX") + + result = s.apply(lambda x: x + pd.offsets.Day()) + exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize( + "Asia/Tokyo" + ) + exp = Series(exp_values, name="XX") + tm.assert_series_equal(result, exp) + + # change dtype + # GH 14506 : Returned dtype changed from int32 to int64 + result = s.apply(lambda x: x.hour) + exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) + tm.assert_series_equal(result, exp) + + # not vectorized + def f(x): + if not isinstance(x, pd.Timestamp): + raise ValueError + return str(x.tz) + + result = s.map(f) + exp = Series(["Asia/Tokyo"] * 25, name="XX") + tm.assert_series_equal(result, exp) + + +def test_apply_categorical(): + values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) + ser = Series(values, name="XX", index=list("abcdefg")) + result = ser.apply(lambda x: x.lower()) + + # should be categorical dtype when the number of categories are + # the same + values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True) + exp = Series(values, name="XX", index=list("abcdefg")) + tm.assert_series_equal(result, exp) + tm.assert_categorical_equal(result.values, exp.values) + + result = ser.apply(lambda x: "A") + exp = Series(["A"] * 7, name="XX", index=list("abcdefg")) + tm.assert_series_equal(result, exp) + assert result.dtype == object + + +@pytest.mark.parametrize("series", [["1-1", "1-1", np.NaN], ["1-1", "1-2", np.NaN]]) +def test_apply_categorical_with_nan_values(series): + # GH 20714 bug fixed in: GH 24275 + s = Series(series, dtype="category") + result = s.apply(lambda x: x.split("-")[0]) + result = result.astype(object) + expected = Series(["1", "1", np.NaN], dtype="category") + expected = expected.astype(object) + tm.assert_series_equal(result, expected) + + +def test_apply_empty_integer_series_with_datetime_index(): + # GH 21245 + s = Series([], index=pd.date_range(start="2018-01-01", periods=0), dtype=int) + result = s.apply(lambda x: x) + tm.assert_series_equal(result, s) + + +def test_transform(string_series): + # transforming functions + + with np.errstate(all="ignore"): + + f_sqrt = np.sqrt(string_series) + f_abs = np.abs(string_series) + + # ufunc + result = string_series.apply(np.sqrt) + expected = f_sqrt.copy() + tm.assert_series_equal(result, expected) + + # list-like + result = string_series.apply([np.sqrt]) + expected = f_sqrt.to_frame().copy() + expected.columns = ["sqrt"] + tm.assert_frame_equal(result, expected) + + result = string_series.apply(["sqrt"]) + tm.assert_frame_equal(result, expected) + + # multiple items in list + # these are in the order as if we are applying both functions per + # series and then concatting + expected = concat([f_sqrt, f_abs], axis=1) + expected.columns = ["sqrt", "absolute"] + result = string_series.apply([np.sqrt, np.abs]) + tm.assert_frame_equal(result, expected) + + # dict, provide renaming + expected = concat([f_sqrt, f_abs], axis=1) + expected.columns = ["foo", "bar"] + expected = expected.unstack().rename("series") + + result = string_series.apply({"foo": np.sqrt, "bar": np.abs}) + tm.assert_series_equal(result.reindex_like(expected), expected) + + +@pytest.mark.parametrize("op", series_transform_kernels) +def test_transform_partial_failure(op, request): + # GH 35964 + if op in ("ffill", "bfill", "pad", "backfill", "shift"): + request.node.add_marker( + pytest.mark.xfail( + raises=AssertionError, reason=f"{op} is successful on any dtype" + ) + ) + if op in ("rank", "fillna"): + pytest.skip(f"{op} doesn't raise TypeError on object") + + # Using object makes most transform kernels fail + ser = Series(3 * [object]) + + expected = ser.transform(["shift"]) + match = rf"\['{op}'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform([op, "shift"]) + tm.assert_equal(result, expected) + + expected = ser.transform({"B": "shift"}) + match = r"\['A'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": op, "B": "shift"}) + tm.assert_equal(result, expected) + + expected = ser.transform({"B": ["shift"]}) + match = r"\['A'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": [op], "B": ["shift"]}) + tm.assert_equal(result, expected) + + match = r"\['B'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + expected = ser.transform({"A": ["shift"], "B": [op]}) + match = rf"\['{op}'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": [op, "shift"], "B": [op]}) + tm.assert_equal(result, expected) + + +def test_transform_partial_failure_valueerror(): + # GH 40211 + match = ".*did not transform successfully" + + def noop(x): + return x + + def raising_op(_): + raise ValueError + + ser = Series(3 * [object]) + + expected = ser.transform([noop]) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform([noop, raising_op]) + tm.assert_equal(result, expected) + + expected = ser.transform({"B": noop}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": raising_op, "B": noop}) + tm.assert_equal(result, expected) + + expected = ser.transform({"B": [noop]}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": [raising_op], "B": [noop]}) + tm.assert_equal(result, expected) + + expected = ser.transform({"A": [noop], "B": [noop]}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": [noop, raising_op], "B": [noop]}) + tm.assert_equal(result, expected) + + +def test_demo(): + # demonstration tests + s = Series(range(6), dtype="int64", name="series") + + result = s.agg(["min", "max"]) + expected = Series([0, 5], index=["min", "max"], name="series") + tm.assert_series_equal(result, expected) + + result = s.agg({"foo": "min"}) + expected = Series([0], index=["foo"], name="series") + tm.assert_series_equal(result, expected) + + +def test_agg_apply_evaluate_lambdas_the_same(string_series): + # test that we are evaluating row-by-row first + # before vectorized evaluation + result = string_series.apply(lambda x: str(x)) + expected = string_series.agg(lambda x: str(x)) + tm.assert_series_equal(result, expected) + + result = string_series.apply(str) + expected = string_series.agg(str) + tm.assert_series_equal(result, expected) + + +def test_with_nested_series(datetime_series): + # GH 2316 + # .agg with a reducer and a transform, what to do + result = datetime_series.apply(lambda x: Series([x, x ** 2], index=["x", "x^2"])) + expected = DataFrame({"x": datetime_series, "x^2": datetime_series ** 2}) + tm.assert_frame_equal(result, expected) + + result = datetime_series.agg(lambda x: Series([x, x ** 2], index=["x", "x^2"])) + tm.assert_frame_equal(result, expected) + + +def test_replicate_describe(string_series): + # this also tests a result set that is all scalars + expected = string_series.describe() + result = string_series.apply( + { + "count": "count", + "mean": "mean", + "std": "std", + "min": "min", + "25%": lambda x: x.quantile(0.25), + "50%": "median", + "75%": lambda x: x.quantile(0.75), + "max": "max", + } + ) + tm.assert_series_equal(result, expected) + + +def test_reduce(string_series): + # reductions with named functions + result = string_series.agg(["sum", "mean"]) + expected = Series( + [string_series.sum(), string_series.mean()], + ["sum", "mean"], + name=string_series.name, + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_non_callable_aggregates(how): + # test agg using non-callable series attributes + # GH 39116 - expand to apply + s = Series([1, 2, None]) + + # Calling agg w/ just a string arg same as calling s.arg + result = getattr(s, how)("size") + expected = s.size + assert result == expected + + # test when mixed w/ callable reducers + result = getattr(s, how)(["size", "count", "mean"]) + expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5}) + tm.assert_series_equal(result, expected) + + +def test_series_apply_no_suffix_index(): + # GH36189 + s = Series([4] * 3) + result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()]) + expected = Series([12, 12, 12], index=["sum", "", ""]) + + tm.assert_series_equal(result, expected) + + +def test_map(datetime_series): + index, data = tm.getMixedTypeDict() + + source = Series(data["B"], index=data["C"]) + target = Series(data["C"][:4], index=data["D"][:4]) + + merged = target.map(source) + + for k, v in merged.items(): + assert v == source[target[k]] + + # input could be a dict + merged = target.map(source.to_dict()) + + for k, v in merged.items(): + assert v == source[target[k]] + + # function + result = datetime_series.map(lambda x: x * 2) + tm.assert_series_equal(result, datetime_series * 2) + + # GH 10324 + a = Series([1, 2, 3, 4]) + b = Series(["even", "odd", "even", "odd"], dtype="category") + c = Series(["even", "odd", "even", "odd"]) + + exp = Series(["odd", "even", "odd", np.nan], dtype="category") + tm.assert_series_equal(a.map(b), exp) + exp = Series(["odd", "even", "odd", np.nan]) + tm.assert_series_equal(a.map(c), exp) + + a = Series(["a", "b", "c", "d"]) + b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"])) + c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"])) + + exp = Series([np.nan, 1, 2, 3]) + tm.assert_series_equal(a.map(b), exp) + exp = Series([np.nan, 1, 2, 3]) + tm.assert_series_equal(a.map(c), exp) + + a = Series(["a", "b", "c", "d"]) + b = Series( + ["B", "C", "D", "E"], + dtype="category", + index=pd.CategoricalIndex(["b", "c", "d", "e"]), + ) + c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"])) + + exp = Series( + pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"]) + ) + tm.assert_series_equal(a.map(b), exp) + exp = Series([np.nan, "B", "C", "D"]) + tm.assert_series_equal(a.map(c), exp) + + +def test_map_empty(index): + if isinstance(index, MultiIndex): + pytest.skip("Initializing a Series from a MultiIndex is not supported") + + s = Series(index) + result = s.map({}) + + expected = Series(np.nan, index=s.index) + tm.assert_series_equal(result, expected) + + +def test_map_compat(): + # related GH 8024 + s = Series([True, True, False], index=[1, 2, 3]) + result = s.map({True: "foo", False: "bar"}) + expected = Series(["foo", "foo", "bar"], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + +def test_map_int(): + left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4}) + right = Series({1: 11, 2: 22, 3: 33}) + + assert left.dtype == np.float_ + assert issubclass(right.dtype.type, np.integer) + + merged = left.map(right) + assert merged.dtype == np.float_ + assert isna(merged["d"]) + assert not isna(merged["c"]) + + +def test_map_type_inference(): + s = Series(range(3)) + s2 = s.map(lambda x: np.where(x == 0, 0, 1)) + assert issubclass(s2.dtype.type, np.integer) + + +def test_map_decimal(string_series): + from decimal import Decimal + + result = string_series.map(lambda x: Decimal(str(x))) + assert result.dtype == np.object_ + assert isinstance(result[0], Decimal) + + +def test_map_na_exclusion(): + s = Series([1.5, np.nan, 3, np.nan, 5]) + + result = s.map(lambda x: x * 2, na_action="ignore") + exp = s * 2 + tm.assert_series_equal(result, exp) + + +def test_map_dict_with_tuple_keys(): + """ + Due to new MultiIndex-ing behaviour in v0.14.0, + dicts with tuple keys passed to map were being + converted to a multi-index, preventing tuple values + from being mapped properly. + """ + # GH 18496 + df = DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]}) + label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"} + + df["labels"] = df["a"].map(label_mappings) + df["expected_labels"] = Series(["A", "B", "A", "B"], index=df.index) + # All labels should be filled now + tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False) + + +def test_map_counter(): + s = Series(["a", "b", "c"], index=[1, 2, 3]) + counter = Counter() + counter["b"] = 5 + counter["c"] += 1 + result = s.map(counter) + expected = Series([0, 5, 1], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + +def test_map_defaultdict(): + s = Series([1, 2, 3], index=["a", "b", "c"]) + default_dict = defaultdict(lambda: "blank") + default_dict[1] = "stuff" + result = s.map(default_dict) + expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"]) + tm.assert_series_equal(result, expected) + + +def test_map_dict_na_key(): + # https://github.com/pandas-dev/pandas/issues/17648 + # Checks that np.nan key is appropriately mapped + s = Series([1, 2, np.nan]) + expected = Series(["a", "b", "c"]) + result = s.map({1: "a", 2: "b", np.nan: "c"}) + tm.assert_series_equal(result, expected) + + +def test_map_dict_subclass_with_missing(): + """ + Test Series.map with a dictionary subclass that defines __missing__, + i.e. sets a default value (GH #15999). + """ + + class DictWithMissing(dict): + def __missing__(self, key): + return "missing" + + s = Series([1, 2, 3]) + dictionary = DictWithMissing({3: "three"}) + result = s.map(dictionary) + expected = Series(["missing", "missing", "three"]) + tm.assert_series_equal(result, expected) + + +def test_map_dict_subclass_without_missing(): + class DictWithoutMissing(dict): + pass + + s = Series([1, 2, 3]) + dictionary = DictWithoutMissing({3: "three"}) + result = s.map(dictionary) + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) + + +def test_map_abc_mapping(non_dict_mapping_subclass): + # https://github.com/pandas-dev/pandas/issues/29733 + # Check collections.abc.Mapping support as mapper for Series.map + s = Series([1, 2, 3]) + not_a_dictionary = non_dict_mapping_subclass({3: "three"}) + result = s.map(not_a_dictionary) + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) + + +def test_map_abc_mapping_with_missing(non_dict_mapping_subclass): + # https://github.com/pandas-dev/pandas/issues/29733 + # Check collections.abc.Mapping support as mapper for Series.map + class NonDictMappingWithMissing(non_dict_mapping_subclass): + def __missing__(key): + return "missing" + + s = Series([1, 2, 3]) + not_a_dictionary = NonDictMappingWithMissing({3: "three"}) + result = s.map(not_a_dictionary) + # __missing__ is a dict concept, not a Mapping concept, + # so it should not change the result! + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) + + +def test_map_box(): + vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")] + s = Series(vals) + assert s.dtype == "datetime64[ns]" + # boxed value must be Timestamp instance + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") + exp = Series(["Timestamp_1_None", "Timestamp_2_None"]) + tm.assert_series_equal(res, exp) + + vals = [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + ] + s = Series(vals) + assert s.dtype == "datetime64[ns, US/Eastern]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") + exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) + tm.assert_series_equal(res, exp) + + # timedelta + vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] + s = Series(vals) + assert s.dtype == "timedelta64[ns]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.days}") + exp = Series(["Timedelta_1", "Timedelta_2"]) + tm.assert_series_equal(res, exp) + + # period + vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] + s = Series(vals) + assert s.dtype == "Period[M]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}") + exp = Series(["Period_M", "Period_M"]) + tm.assert_series_equal(res, exp) + + +def test_map_categorical(): + values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) + s = Series(values, name="XX", index=list("abcdefg")) + + result = s.map(lambda x: x.lower()) + exp_values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True) + exp = Series(exp_values, name="XX", index=list("abcdefg")) + tm.assert_series_equal(result, exp) + tm.assert_categorical_equal(result.values, exp_values) + + result = s.map(lambda x: "A") + exp = Series(["A"] * 7, name="XX", index=list("abcdefg")) + tm.assert_series_equal(result, exp) + assert result.dtype == object + + +def test_map_datetimetz(): + values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize( + "Asia/Tokyo" + ) + s = Series(values, name="XX") + + # keep tz + result = s.map(lambda x: x + pd.offsets.Day()) + exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize( + "Asia/Tokyo" + ) + exp = Series(exp_values, name="XX") + tm.assert_series_equal(result, exp) + + # change dtype + # GH 14506 : Returned dtype changed from int32 to int64 + result = s.map(lambda x: x.hour) + exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) + tm.assert_series_equal(result, exp) + + # not vectorized + def f(x): + if not isinstance(x, pd.Timestamp): + raise ValueError + return str(x.tz) + + result = s.map(f) + exp = Series(["Asia/Tokyo"] * 25, name="XX") + tm.assert_series_equal(result, exp) + + +@pytest.mark.parametrize( + "vals,mapping,exp", + [ + (list("abc"), {np.nan: "not NaN"}, [np.nan] * 3 + ["not NaN"]), + (list("abc"), {"a": "a letter"}, ["a letter"] + [np.nan] * 3), + (list(range(3)), {0: 42}, [42] + [np.nan] * 3), + ], +) +def test_map_missing_mixed(vals, mapping, exp): + # GH20495 + s = Series(vals + [np.nan]) + result = s.map(mapping) + + tm.assert_series_equal(result, Series(exp)) + + +@pytest.mark.parametrize( + "dti,exp", + [ + ( + Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])), + DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"), + ), + ( + tm.makeTimeSeries(nper=30), + DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"), + ), + ], +) +@pytest.mark.parametrize("aware", [True, False]) +def test_apply_series_on_date_time_index_aware_series(dti, exp, aware): + # GH 25959 + # Calling apply on a localized time series should not cause an error + if aware: + index = dti.tz_localize("UTC").index + else: + index = dti.index + result = Series(index).apply(lambda x: Series([1, 2])) + tm.assert_frame_equal(result, exp) + + +def test_apply_scalar_on_date_time_index_aware_series(): + # GH 25959 + # Calling apply on a localized time series should not cause an error + series = tm.makeTimeSeries(nper=30).tz_localize("UTC") + result = Series(series.index).apply(lambda x: 1) + tm.assert_series_equal(result, Series(np.ones(30), dtype="int64")) + + +def test_map_float_to_string_precision(): + # GH 13228 + ser = Series(1 / 3) + result = ser.map(lambda val: str(val)).to_dict() + expected = {0: "0.3333333333333333"} + assert result == expected + + +def test_apply_to_timedelta(): + list_of_valid_strings = ["00:00:01", "00:00:02"] + a = pd.to_timedelta(list_of_valid_strings) + b = Series(list_of_valid_strings).apply(pd.to_timedelta) + tm.assert_series_equal(Series(a), b) + + list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT] + + a = pd.to_timedelta(list_of_strings) + with tm.assert_produces_warning(FutureWarning, match="Inferring timedelta64"): + ser = Series(list_of_strings) + b = ser.apply(pd.to_timedelta) + tm.assert_series_equal(Series(a), b) + + +@pytest.mark.parametrize( + "ops, names", + [ + ([np.sum], ["sum"]), + ([np.sum, np.mean], ["sum", "mean"]), + (np.array([np.sum]), ["sum"]), + (np.array([np.sum, np.mean]), ["sum", "mean"]), + ], +) +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_apply_listlike_reducer(string_series, ops, names, how): + # GH 39140 + expected = Series({name: op(string_series) for name, op in zip(names, ops)}) + expected.name = "series" + result = getattr(string_series, how)(ops) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "ops", + [ + {"A": np.sum}, + {"A": np.sum, "B": np.mean}, + Series({"A": np.sum}), + Series({"A": np.sum, "B": np.mean}), + ], +) +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_apply_dictlike_reducer(string_series, ops, how): + # GH 39140 + expected = Series({name: op(string_series) for name, op in ops.items()}) + expected.name = string_series.name + result = getattr(string_series, how)(ops) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "ops, names", + [ + ([np.sqrt], ["sqrt"]), + ([np.abs, np.sqrt], ["absolute", "sqrt"]), + (np.array([np.sqrt]), ["sqrt"]), + (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), + ], +) +def test_apply_listlike_transformer(string_series, ops, names): + # GH 39140 + with np.errstate(all="ignore"): + expected = concat([op(string_series) for op in ops], axis=1) + expected.columns = names + result = string_series.apply(ops) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "ops", + [ + {"A": np.sqrt}, + {"A": np.sqrt, "B": np.exp}, + Series({"A": np.sqrt}), + Series({"A": np.sqrt, "B": np.exp}), + ], +) +def test_apply_dictlike_transformer(string_series, ops): + # GH 39140 + with np.errstate(all="ignore"): + expected = concat({name: op(string_series) for name, op in ops.items()}) + expected.name = string_series.name + result = string_series.apply(ops) + tm.assert_series_equal(result, expected) + + +def test_apply_retains_column_name(): + # GH 16380 + df = DataFrame({"x": range(3)}, Index(range(3), name="x")) + result = df.x.apply(lambda x: Series(range(x + 1), Index(range(x + 1), name="y"))) + expected = DataFrame( + [[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]], + columns=Index(range(3), name="y"), + index=Index(range(3), name="x"), + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/test_series_apply_relabeling.py b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_series_apply_relabeling.py new file mode 100644 index 0000000..c0a285e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_series_apply_relabeling.py @@ -0,0 +1,33 @@ +import pandas as pd +import pandas._testing as tm + + +def test_relabel_no_duplicated_method(): + # this is to test there is no duplicated method used in agg + df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]}) + + result = df["A"].agg(foo="sum") + expected = df["A"].agg({"foo": "sum"}) + tm.assert_series_equal(result, expected) + + result = df["B"].agg(foo="min", bar="max") + expected = df["B"].agg({"foo": "min", "bar": "max"}) + tm.assert_series_equal(result, expected) + + result = df["B"].agg(foo=sum, bar=min, cat="max") + expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"}) + tm.assert_series_equal(result, expected) + + +def test_relabel_duplicated_method(): + # this is to test with nested renaming, duplicated method can be used + # if they are assigned with different new names + df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]}) + + result = df["A"].agg(foo="sum", bar="sum") + expected = pd.Series([6, 6], index=["foo", "bar"], name="A") + tm.assert_series_equal(result, expected) + + result = df["B"].agg(foo=min, bar="min") + expected = pd.Series([1, 1], index=["foo", "bar"], name="B") + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/test_series_transform.py b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_series_transform.py new file mode 100644 index 0000000..b10af13 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_series_transform.py @@ -0,0 +1,49 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + Series, + concat, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "ops, names", + [ + ([np.sqrt], ["sqrt"]), + ([np.abs, np.sqrt], ["absolute", "sqrt"]), + (np.array([np.sqrt]), ["sqrt"]), + (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), + ], +) +def test_transform_listlike(string_series, ops, names): + # GH 35964 + with np.errstate(all="ignore"): + expected = concat([op(string_series) for op in ops], axis=1) + expected.columns = names + result = string_series.transform(ops) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("box", [dict, Series]) +def test_transform_dictlike(string_series, box): + # GH 35964 + with np.errstate(all="ignore"): + expected = concat([np.sqrt(string_series), np.abs(string_series)], axis=1) + expected.columns = ["foo", "bar"] + result = string_series.transform(box({"foo": np.sqrt, "bar": np.abs})) + tm.assert_frame_equal(result, expected) + + +def test_transform_dictlike_mixed(): + # GH 40018 - mix of lists and non-lists in values of a dictionary + df = Series([1, 4]) + result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"}) + expected = DataFrame( + [[1.0, 1, 1.0], [2.0, 4, 2.0]], + columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]), + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/apply/test_str.py b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_str.py new file mode 100644 index 0000000..8299732 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/apply/test_str.py @@ -0,0 +1,302 @@ +from itertools import chain +import operator + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_number + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm +from pandas.core.groupby.base import maybe_normalize_deprecated_kernels +from pandas.tests.apply.common import ( + frame_transform_kernels, + series_transform_kernels, +) + + +@pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"]) +@pytest.mark.parametrize( + "args,kwds", + [ + pytest.param([], {}, id="no_args_or_kwds"), + pytest.param([1], {}, id="axis_from_args"), + pytest.param([], {"axis": 1}, id="axis_from_kwds"), + pytest.param([], {"numeric_only": True}, id="optional_kwds"), + pytest.param([1, True], {"numeric_only": True}, id="args_and_kwds"), + ], +) +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how): + if len(args) > 1 and how == "agg": + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason="agg/apply signature mismatch - agg passes 2nd " + "argument to func", + ) + ) + result = getattr(float_frame, how)(func, *args, **kwds) + expected = getattr(float_frame, func)(*args, **kwds) + tm.assert_series_equal(result, expected) + + +def test_with_string_args(datetime_series): + + for arg in ["sum", "mean", "min", "max", "std"]: + result = datetime_series.apply(arg) + expected = getattr(datetime_series, arg)() + assert result == expected + + +@pytest.mark.parametrize("op", ["mean", "median", "std", "var"]) +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_apply_np_reducer(float_frame, op, how): + # GH 39116 + float_frame = DataFrame({"a": [1, 2], "b": [3, 4]}) + result = getattr(float_frame, how)(op) + # pandas ddof defaults to 1, numpy to 0 + kwargs = {"ddof": 1} if op in ("std", "var") else {} + expected = Series( + getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"] +) +@pytest.mark.parametrize("how", ["transform", "apply"]) +def test_apply_np_transformer(float_frame, op, how): + # GH 39116 + + # float_frame will _usually_ have negative values, which will + # trigger the warning here, but let's put one in just to be sure + float_frame.iloc[0, 0] = -1.0 + warn = None + if op in ["log", "sqrt"]: + warn = RuntimeWarning + + with tm.assert_produces_warning(warn): + result = getattr(float_frame, how)(op) + expected = getattr(np, op)(float_frame) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "series, func, expected", + chain( + tm.get_cython_table_params( + Series(dtype=np.float64), + [ + ("sum", 0), + ("max", np.nan), + ("min", np.nan), + ("all", True), + ("any", False), + ("mean", np.nan), + ("prod", 1), + ("std", np.nan), + ("var", np.nan), + ("median", np.nan), + ], + ), + tm.get_cython_table_params( + Series([np.nan, 1, 2, 3]), + [ + ("sum", 6), + ("max", 3), + ("min", 1), + ("all", True), + ("any", True), + ("mean", 2), + ("prod", 6), + ("std", 1), + ("var", 1), + ("median", 2), + ], + ), + tm.get_cython_table_params( + Series("a b c".split()), + [ + ("sum", "abc"), + ("max", "c"), + ("min", "a"), + ("all", True), + ("any", True), + ], + ), + ), +) +def test_agg_cython_table_series(series, func, expected): + # GH21224 + # test reducing functions in + # pandas.core.base.SelectionMixin._cython_table + result = series.agg(func) + if is_number(expected): + assert np.isclose(result, expected, equal_nan=True) + else: + assert result == expected + + +@pytest.mark.parametrize( + "series, func, expected", + chain( + tm.get_cython_table_params( + Series(dtype=np.float64), + [ + ("cumprod", Series([], Index([]), dtype=np.float64)), + ("cumsum", Series([], Index([]), dtype=np.float64)), + ], + ), + tm.get_cython_table_params( + Series([np.nan, 1, 2, 3]), + [ + ("cumprod", Series([np.nan, 1, 2, 6])), + ("cumsum", Series([np.nan, 1, 3, 6])), + ], + ), + tm.get_cython_table_params( + Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))] + ), + ), +) +def test_agg_cython_table_transform_series(series, func, expected): + # GH21224 + # test transforming functions in + # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) + result = series.agg(func) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "df, func, expected", + chain( + tm.get_cython_table_params( + DataFrame(), + [ + ("sum", Series(dtype="float64")), + ("max", Series(dtype="float64")), + ("min", Series(dtype="float64")), + ("all", Series(dtype=bool)), + ("any", Series(dtype=bool)), + ("mean", Series(dtype="float64")), + ("prod", Series(dtype="float64")), + ("std", Series(dtype="float64")), + ("var", Series(dtype="float64")), + ("median", Series(dtype="float64")), + ], + ), + tm.get_cython_table_params( + DataFrame([[np.nan, 1], [1, 2]]), + [ + ("sum", Series([1.0, 3])), + ("max", Series([1.0, 2])), + ("min", Series([1.0, 1])), + ("all", Series([True, True])), + ("any", Series([True, True])), + ("mean", Series([1, 1.5])), + ("prod", Series([1.0, 2])), + ("std", Series([np.nan, 0.707107])), + ("var", Series([np.nan, 0.5])), + ("median", Series([1, 1.5])), + ], + ), + ), +) +def test_agg_cython_table_frame(df, func, expected, axis): + # GH 21224 + # test reducing functions in + # pandas.core.base.SelectionMixin._cython_table + result = df.agg(func, axis=axis) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "df, func, expected", + chain( + tm.get_cython_table_params( + DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())] + ), + tm.get_cython_table_params( + DataFrame([[np.nan, 1], [1, 2]]), + [ + ("cumprod", DataFrame([[np.nan, 1], [1, 2]])), + ("cumsum", DataFrame([[np.nan, 1], [1, 3]])), + ], + ), + ), +) +def test_agg_cython_table_transform_frame(df, func, expected, axis): + # GH 21224 + # test transforming functions in + # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) + if axis == "columns" or axis == 1: + # operating blockwise doesn't let us preserve dtypes + expected = expected.astype("float64") + + result = df.agg(func, axis=axis) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("op", series_transform_kernels) +def test_transform_groupby_kernel_series(string_series, op): + # GH 35964 + # TODO(2.0) Remove after pad/backfill deprecation enforced + op = maybe_normalize_deprecated_kernels(op) + args = [0.0] if op == "fillna" else [] + ones = np.ones(string_series.shape[0]) + expected = string_series.groupby(ones).transform(op, *args) + result = string_series.transform(op, 0, *args) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("op", frame_transform_kernels) +def test_transform_groupby_kernel_frame( + axis, float_frame, op, using_array_manager, request +): + # TODO(2.0) Remove after pad/backfill deprecation enforced + op = maybe_normalize_deprecated_kernels(op) + # GH 35964 + if using_array_manager and op == "pct_change" and axis in (1, "columns"): + # TODO(ArrayManager) shift with axis=1 + request.node.add_marker( + pytest.mark.xfail( + reason="shift axis=1 not yet implemented for ArrayManager" + ) + ) + + args = [0.0] if op == "fillna" else [] + if axis == 0 or axis == "index": + ones = np.ones(float_frame.shape[0]) + else: + ones = np.ones(float_frame.shape[1]) + expected = float_frame.groupby(ones, axis=axis).transform(op, *args) + result = float_frame.transform(op, axis, *args) + tm.assert_frame_equal(result, expected) + + # same thing, but ensuring we have multiple blocks + assert "E" not in float_frame.columns + float_frame["E"] = float_frame["A"].copy() + assert len(float_frame._mgr.arrays) > 1 + + if axis == 0 or axis == "index": + ones = np.ones(float_frame.shape[0]) + else: + ones = np.ones(float_frame.shape[1]) + expected2 = float_frame.groupby(ones, axis=axis).transform(op, *args) + result2 = float_frame.transform(op, axis, *args) + tm.assert_frame_equal(result2, expected2) + + +@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"]) +def test_transform_method_name(method): + # GH 19760 + df = DataFrame({"A": [-1, 2]}) + result = df.transform(method) + expected = operator.methodcaller(method)(df) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..9ff9ac8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..86129fd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..bc3f839 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_array_ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_array_ops.cpython-38.pyc new file mode 100644 index 0000000..d13344a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_array_ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_categorical.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_categorical.cpython-38.pyc new file mode 100644 index 0000000..1eb6755 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_categorical.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_datetime64.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_datetime64.cpython-38.pyc new file mode 100644 index 0000000..b3f5558 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_datetime64.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_interval.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_interval.cpython-38.pyc new file mode 100644 index 0000000..8121083 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_interval.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_numeric.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_numeric.cpython-38.pyc new file mode 100644 index 0000000..f74374c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_numeric.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_object.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_object.cpython-38.pyc new file mode 100644 index 0000000..321f5f3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_object.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_period.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_period.cpython-38.pyc new file mode 100644 index 0000000..115faf0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_period.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_timedelta64.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_timedelta64.cpython-38.pyc new file mode 100644 index 0000000..847d3ca Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/__pycache__/test_timedelta64.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/common.py b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/common.py new file mode 100644 index 0000000..f3173e8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/common.py @@ -0,0 +1,155 @@ +""" +Assertion helpers for arithmetic tests. +""" +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + Series, + array, +) +import pandas._testing as tm +from pandas.core.arrays import ( + BooleanArray, + PandasArray, +) + + +def assert_cannot_add(left, right, msg="cannot add"): + """ + Helper to assert that left and right cannot be added. + + Parameters + ---------- + left : object + right : object + msg : str, default "cannot add" + """ + with pytest.raises(TypeError, match=msg): + left + right + with pytest.raises(TypeError, match=msg): + right + left + + +def assert_invalid_addsub_type(left, right, msg=None): + """ + Helper to assert that left and right can be neither added nor subtracted. + + Parameters + ---------- + left : object + right : object + msg : str or None, default None + """ + with pytest.raises(TypeError, match=msg): + left + right + with pytest.raises(TypeError, match=msg): + right + left + with pytest.raises(TypeError, match=msg): + left - right + with pytest.raises(TypeError, match=msg): + right - left + + +def get_upcast_box(left, right, is_cmp: bool = False): + """ + Get the box to use for 'expected' in an arithmetic or comparison operation. + + Parameters + left : Any + right : Any + is_cmp : bool, default False + Whether the operation is a comparison method. + """ + + if isinstance(left, DataFrame) or isinstance(right, DataFrame): + return DataFrame + if isinstance(left, Series) or isinstance(right, Series): + if is_cmp and isinstance(left, Index): + # Index does not defer for comparisons + return np.array + return Series + if isinstance(left, Index) or isinstance(right, Index): + if is_cmp: + return np.array + return Index + return tm.to_array + + +def assert_invalid_comparison(left, right, box): + """ + Assert that comparison operations with mismatched types behave correctly. + + Parameters + ---------- + left : np.ndarray, ExtensionArray, Index, or Series + right : object + box : {pd.DataFrame, pd.Series, pd.Index, pd.array, tm.to_array} + """ + # Not for tznaive-tzaware comparison + + # Note: not quite the same as how we do this for tm.box_expected + xbox = box if box not in [Index, array] else np.array + + def xbox2(x): + # Eventually we'd like this to be tighter, but for now we'll + # just exclude PandasArray[bool] + if isinstance(x, PandasArray): + return x._ndarray + if isinstance(x, BooleanArray): + # NB: we are assuming no pd.NAs for now + return x.astype(bool) + return x + + # rev_box: box to use for reversed comparisons + rev_box = xbox + if isinstance(right, Index) and isinstance(left, Series): + rev_box = np.array + + result = xbox2(left == right) + expected = xbox(np.zeros(result.shape, dtype=np.bool_)) + + tm.assert_equal(result, expected) + + result = xbox2(right == left) + tm.assert_equal(result, rev_box(expected)) + + result = xbox2(left != right) + tm.assert_equal(result, ~expected) + + result = xbox2(right != left) + tm.assert_equal(result, rev_box(~expected)) + + msg = "|".join( + [ + "Invalid comparison between", + "Cannot compare type", + "not supported between", + "invalid type promotion", + ( + # GH#36706 npdev 1.20.0 2020-09-28 + r"The DTypes and " + r" do not have a common DType. " + "For example they cannot be stored in a single array unless the " + "dtype is `object`." + ), + ] + ) + with pytest.raises(TypeError, match=msg): + left < right + with pytest.raises(TypeError, match=msg): + left <= right + with pytest.raises(TypeError, match=msg): + left > right + with pytest.raises(TypeError, match=msg): + left >= right + with pytest.raises(TypeError, match=msg): + right < left + with pytest.raises(TypeError, match=msg): + right <= left + with pytest.raises(TypeError, match=msg): + right > left + with pytest.raises(TypeError, match=msg): + right >= left diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/conftest.py new file mode 100644 index 0000000..e847f31 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/conftest.py @@ -0,0 +1,232 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import RangeIndex +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) +from pandas.core.computation import expressions as expr + + +@pytest.fixture( + autouse=True, scope="module", params=[0, 1000000], ids=["numexpr", "python"] +) +def switch_numexpr_min_elements(request): + _MIN_ELEMENTS = expr._MIN_ELEMENTS + expr._MIN_ELEMENTS = request.param + yield request.param + expr._MIN_ELEMENTS = _MIN_ELEMENTS + + +# ------------------------------------------------------------------ + +# doctest with +SKIP for one fixture fails during setup with +# 'DoctestItem' object has no attribute 'callspec' +# due to switch_numexpr_min_elements fixture +@pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) +def one(request): + """ + Several variants of integer value 1. The zero-dim integer array + behaves like an integer. + + This fixture can be used to check that datetimelike indexes handle + addition and subtraction of integers and zero-dimensional arrays + of integers. + + Examples + -------- + dti = pd.date_range('2016-01-01', periods=2, freq='H') + dti + DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00'], + dtype='datetime64[ns]', freq='H') + dti + one + DatetimeIndex(['2016-01-01 01:00:00', '2016-01-01 02:00:00'], + dtype='datetime64[ns]', freq='H') + """ + return request.param + + +zeros = [ + box_cls([0] * 5, dtype=dtype) + for box_cls in [pd.Index, np.array, pd.array] + for dtype in [np.int64, np.uint64, np.float64] +] +zeros.extend( + [box_cls([-0.0] * 5, dtype=np.float64) for box_cls in [pd.Index, np.array]] +) +zeros.extend([np.array(0, dtype=dtype) for dtype in [np.int64, np.uint64, np.float64]]) +zeros.extend([np.array(-0.0, dtype=np.float64)]) +zeros.extend([0, 0.0, -0.0]) + + +# doctest with +SKIP for zero fixture fails during setup with +# 'DoctestItem' object has no attribute 'callspec' +# due to switch_numexpr_min_elements fixture +@pytest.fixture(params=zeros) +def zero(request): + """ + Several types of scalar zeros and length 5 vectors of zeros. + + This fixture can be used to check that numeric-dtype indexes handle + division by any zero numeric-dtype. + + Uses vector of length 5 for broadcasting with `numeric_idx` fixture, + which creates numeric-dtype vectors also of length 5. + + Examples + -------- + arr = RangeIndex(5) + arr / zeros + Float64Index([nan, inf, inf, inf, inf], dtype='float64') + """ + return request.param + + +# ------------------------------------------------------------------ +# Vector Fixtures + + +@pytest.fixture( + params=[ + Float64Index(np.arange(5, dtype="float64")), + Int64Index(np.arange(5, dtype="int64")), + UInt64Index(np.arange(5, dtype="uint64")), + RangeIndex(5), + ], + ids=lambda x: type(x).__name__, +) +def numeric_idx(request): + """ + Several types of numeric-dtypes Index objects + """ + return request.param + + +# ------------------------------------------------------------------ +# Scalar Fixtures + + +@pytest.fixture( + params=[ + pd.Timedelta("10m7s").to_pytimedelta(), + pd.Timedelta("10m7s"), + pd.Timedelta("10m7s").to_timedelta64(), + ], + ids=lambda x: type(x).__name__, +) +def scalar_td(request): + """ + Several variants of Timedelta scalars representing 10 minutes and 7 seconds. + """ + return request.param + + +@pytest.fixture( + params=[ + pd.offsets.Day(3), + pd.offsets.Hour(72), + pd.Timedelta(days=3).to_pytimedelta(), + pd.Timedelta("72:00:00"), + np.timedelta64(3, "D"), + np.timedelta64(72, "h"), + ], + ids=lambda x: type(x).__name__, +) +def three_days(request): + """ + Several timedelta-like and DateOffset objects that each represent + a 3-day timedelta + """ + return request.param + + +@pytest.fixture( + params=[ + pd.offsets.Hour(2), + pd.offsets.Minute(120), + pd.Timedelta(hours=2).to_pytimedelta(), + pd.Timedelta(seconds=2 * 3600), + np.timedelta64(2, "h"), + np.timedelta64(120, "m"), + ], + ids=lambda x: type(x).__name__, +) +def two_hours(request): + """ + Several timedelta-like and DateOffset objects that each represent + a 2-hour timedelta + """ + return request.param + + +_common_mismatch = [ + pd.offsets.YearBegin(2), + pd.offsets.MonthBegin(1), + pd.offsets.Minute(), +] + + +@pytest.fixture( + params=[ + pd.Timedelta(minutes=30).to_pytimedelta(), + np.timedelta64(30, "s"), + pd.Timedelta(seconds=30), + ] + + _common_mismatch +) +def not_hourly(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Hourly frequencies. + """ + return request.param + + +@pytest.fixture( + params=[ + np.timedelta64(4, "h"), + pd.Timedelta(hours=23).to_pytimedelta(), + pd.Timedelta("23:00:00"), + ] + + _common_mismatch +) +def not_daily(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Daily frequencies. + """ + return request.param + + +@pytest.fixture( + params=[ + np.timedelta64(365, "D"), + pd.Timedelta(days=365).to_pytimedelta(), + pd.Timedelta(days=365), + ] + + _common_mismatch +) +def mismatched_freq(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Monthly or Annual frequencies. + """ + return request.param + + +# ------------------------------------------------------------------ + + +@pytest.fixture( + params=[pd.Index, pd.Series, tm.to_array, np.array, list], ids=lambda x: x.__name__ +) +def box_1d_array(request): + """ + Fixture to test behavior for Index, Series, tm.to_array, numpy Array and list + classes + """ + return request.param diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_array_ops.py b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_array_ops.py new file mode 100644 index 0000000..2c347d9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_array_ops.py @@ -0,0 +1,39 @@ +import operator + +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.ops.array_ops import ( + comparison_op, + na_logical_op, +) + + +def test_na_logical_op_2d(): + left = np.arange(8).reshape(4, 2) + right = left.astype(object) + right[0, 0] = np.nan + + # Check that we fall back to the vec_binop branch + with pytest.raises(TypeError, match="unsupported operand type"): + operator.or_(left, right) + + result = na_logical_op(left, right, operator.or_) + expected = right + tm.assert_numpy_array_equal(result, expected) + + +def test_object_comparison_2d(): + left = np.arange(9).reshape(3, 3).astype(object) + right = left.T + + result = comparison_op(left, right, operator.eq) + expected = np.eye(3).astype(bool) + tm.assert_numpy_array_equal(result, expected) + + # Ensure that cython doesn't raise on non-writeable arg, which + # we can get from np.broadcast_to + right.flags.writeable = False + result = comparison_op(left, right, operator.ne) + tm.assert_numpy_array_equal(result, ~expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_categorical.py b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_categorical.py new file mode 100644 index 0000000..d6f3a13 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_categorical.py @@ -0,0 +1,25 @@ +import numpy as np + +from pandas import ( + Categorical, + Series, +) +import pandas._testing as tm + + +class TestCategoricalComparisons: + def test_categorical_nan_equality(self): + cat = Series(Categorical(["a", "b", "c", np.nan])) + expected = Series([True, True, True, False]) + result = cat == cat + tm.assert_series_equal(result, expected) + + def test_categorical_tuple_equality(self): + # GH 18050 + ser = Series([(0, 0), (0, 1), (0, 0), (1, 0), (1, 1)]) + expected = Series([True, False, True, False, False]) + result = ser == (0, 0) + tm.assert_series_equal(result, expected) + + result = ser.astype("category") == (0, 0) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_datetime64.py b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_datetime64.py new file mode 100644 index 0000000..212cfc2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_datetime64.py @@ -0,0 +1,2432 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +# Specifically for datetime64 and datetime64tz dtypes +from datetime import ( + datetime, + time, + timedelta, +) +from itertools import ( + product, + starmap, +) +import operator +import warnings + +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs.conversion import localize_pydatetime +from pandas._libs.tslibs.offsets import shift_months +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + DateOffset, + DatetimeIndex, + NaT, + Period, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + TimedeltaArray, +) +from pandas.core.ops import roperator +from pandas.tests.arithmetic.common import ( + assert_cannot_add, + assert_invalid_addsub_type, + assert_invalid_comparison, + get_upcast_box, +) + +# ------------------------------------------------------------------ +# Comparisons + + +class TestDatetime64ArrayLikeComparisons: + # Comparison tests for datetime64 vectors fully parametrized over + # DataFrame/Series/DatetimeIndex/DatetimeArray. Ideally all comparison + # tests will eventually end up here. + + def test_compare_zerodim(self, tz_naive_fixture, box_with_array): + # Test comparison with zero-dimensional array is unboxed + tz = tz_naive_fixture + box = box_with_array + dti = date_range("20130101", periods=3, tz=tz) + + other = np.array(dti.to_numpy()[0]) + + dtarr = tm.box_expected(dti, box) + xbox = get_upcast_box(dtarr, other, True) + result = dtarr <= other + expected = np.array([True, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + "foo", + -1, + 99, + 4.0, + object(), + timedelta(days=2), + # GH#19800, GH#19301 datetime.date comparison raises to + # match DatetimeIndex/Timestamp. This also matches the behavior + # of stdlib datetime.datetime + datetime(2001, 1, 1).date(), + # GH#19301 None and NaN are *not* cast to NaT for comparisons + None, + np.nan, + ], + ) + def test_dt64arr_cmp_scalar_invalid(self, other, tz_naive_fixture, box_with_array): + # GH#22074, GH#15966 + tz = tz_naive_fixture + + rng = date_range("1/1/2000", periods=10, tz=tz) + dtarr = tm.box_expected(rng, box_with_array) + assert_invalid_comparison(dtarr, other, box_with_array) + + @pytest.mark.parametrize( + "other", + [ + # GH#4968 invalid date/int comparisons + list(range(10)), + np.arange(10), + np.arange(10).astype(np.float32), + np.arange(10).astype(object), + pd.timedelta_range("1ns", periods=10).array, + np.array(pd.timedelta_range("1ns", periods=10)), + list(pd.timedelta_range("1ns", periods=10)), + pd.timedelta_range("1 Day", periods=10).astype(object), + pd.period_range("1971-01-01", freq="D", periods=10).array, + pd.period_range("1971-01-01", freq="D", periods=10).astype(object), + ], + ) + def test_dt64arr_cmp_arraylike_invalid( + self, other, tz_naive_fixture, box_with_array + ): + tz = tz_naive_fixture + + dta = date_range("1970-01-01", freq="ns", periods=10, tz=tz)._data + obj = tm.box_expected(dta, box_with_array) + assert_invalid_comparison(obj, other, box_with_array) + + def test_dt64arr_cmp_mixed_invalid(self, tz_naive_fixture): + tz = tz_naive_fixture + + dta = date_range("1970-01-01", freq="h", periods=5, tz=tz)._data + + other = np.array([0, 1, 2, dta[3], Timedelta(days=1)]) + result = dta == other + expected = np.array([False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = dta != other + tm.assert_numpy_array_equal(result, ~expected) + + msg = "Invalid comparison between|Cannot compare type|not supported between" + with pytest.raises(TypeError, match=msg): + dta < other + with pytest.raises(TypeError, match=msg): + dta > other + with pytest.raises(TypeError, match=msg): + dta <= other + with pytest.raises(TypeError, match=msg): + dta >= other + + def test_dt64arr_nat_comparison(self, tz_naive_fixture, box_with_array): + # GH#22242, GH#22163 DataFrame considered NaT == ts incorrectly + tz = tz_naive_fixture + box = box_with_array + + ts = Timestamp("2021-01-01", tz=tz) + ser = Series([ts, NaT]) + + obj = tm.box_expected(ser, box) + xbox = get_upcast_box(obj, ts, True) + + expected = Series([True, False], dtype=np.bool_) + expected = tm.box_expected(expected, xbox) + + result = obj == ts + tm.assert_equal(result, expected) + + +class TestDatetime64SeriesComparison: + # TODO: moved from tests.series.test_operators; needs cleanup + + @pytest.mark.parametrize( + "pair", + [ + ( + [Timestamp("2011-01-01"), NaT, Timestamp("2011-01-03")], + [NaT, NaT, Timestamp("2011-01-03")], + ), + ( + [Timedelta("1 days"), NaT, Timedelta("3 days")], + [NaT, NaT, Timedelta("3 days")], + ), + ( + [Period("2011-01", freq="M"), NaT, Period("2011-03", freq="M")], + [NaT, NaT, Period("2011-03", freq="M")], + ), + ], + ) + @pytest.mark.parametrize("reverse", [True, False]) + @pytest.mark.parametrize("dtype", [None, object]) + @pytest.mark.parametrize( + "op, expected", + [ + (operator.eq, Series([False, False, True])), + (operator.ne, Series([True, True, False])), + (operator.lt, Series([False, False, False])), + (operator.gt, Series([False, False, False])), + (operator.ge, Series([False, False, True])), + (operator.le, Series([False, False, True])), + ], + ) + def test_nat_comparisons( + self, + dtype, + index_or_series, + reverse, + pair, + op, + expected, + ): + box = index_or_series + l, r = pair + if reverse: + # add lhs / rhs switched data + l, r = r, l + + left = Series(l, dtype=dtype) + right = box(r, dtype=dtype) + + result = op(left, right) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data", + [ + [Timestamp("2011-01-01"), NaT, Timestamp("2011-01-03")], + [Timedelta("1 days"), NaT, Timedelta("3 days")], + [Period("2011-01", freq="M"), NaT, Period("2011-03", freq="M")], + ], + ) + @pytest.mark.parametrize("dtype", [None, object]) + def test_nat_comparisons_scalar(self, dtype, data, box_with_array): + box = box_with_array + + left = Series(data, dtype=dtype) + left = tm.box_expected(left, box) + xbox = get_upcast_box(left, NaT, True) + + expected = [False, False, False] + expected = tm.box_expected(expected, xbox) + if box is pd.array and dtype is object: + expected = pd.array(expected, dtype="bool") + + tm.assert_equal(left == NaT, expected) + tm.assert_equal(NaT == left, expected) + + expected = [True, True, True] + expected = tm.box_expected(expected, xbox) + if box is pd.array and dtype is object: + expected = pd.array(expected, dtype="bool") + tm.assert_equal(left != NaT, expected) + tm.assert_equal(NaT != left, expected) + + expected = [False, False, False] + expected = tm.box_expected(expected, xbox) + if box is pd.array and dtype is object: + expected = pd.array(expected, dtype="bool") + tm.assert_equal(left < NaT, expected) + tm.assert_equal(NaT > left, expected) + tm.assert_equal(left <= NaT, expected) + tm.assert_equal(NaT >= left, expected) + + tm.assert_equal(left > NaT, expected) + tm.assert_equal(NaT < left, expected) + tm.assert_equal(left >= NaT, expected) + tm.assert_equal(NaT <= left, expected) + + @pytest.mark.parametrize("val", [datetime(2000, 1, 4), datetime(2000, 1, 5)]) + def test_series_comparison_scalars(self, val): + series = Series(date_range("1/1/2000", periods=10)) + + result = series > val + expected = Series([x > val for x in series]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "left,right", [("lt", "gt"), ("le", "ge"), ("eq", "eq"), ("ne", "ne")] + ) + def test_timestamp_compare_series(self, left, right): + # see gh-4982 + # Make sure we can compare Timestamps on the right AND left hand side. + ser = Series(date_range("20010101", periods=10), name="dates") + s_nat = ser.copy(deep=True) + + ser[0] = Timestamp("nat") + ser[3] = Timestamp("nat") + + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # No NaT + expected = left_f(ser, Timestamp("20010109")) + result = right_f(Timestamp("20010109"), ser) + tm.assert_series_equal(result, expected) + + # NaT + expected = left_f(ser, Timestamp("nat")) + result = right_f(Timestamp("nat"), ser) + tm.assert_series_equal(result, expected) + + # Compare to Timestamp with series containing NaT + expected = left_f(s_nat, Timestamp("20010109")) + result = right_f(Timestamp("20010109"), s_nat) + tm.assert_series_equal(result, expected) + + # Compare to NaT with series containing NaT + expected = left_f(s_nat, NaT) + result = right_f(NaT, s_nat) + tm.assert_series_equal(result, expected) + + def test_dt64arr_timestamp_equality(self, box_with_array): + # GH#11034 + + ser = Series([Timestamp("2000-01-29 01:59:00"), Timestamp("2000-01-30"), NaT]) + ser = tm.box_expected(ser, box_with_array) + xbox = get_upcast_box(ser, ser, True) + + result = ser != ser + expected = tm.box_expected([False, False, True], xbox) + tm.assert_equal(result, expected) + + warn = FutureWarning if box_with_array is pd.DataFrame else None + with tm.assert_produces_warning(warn): + # alignment for frame vs series comparisons deprecated + result = ser != ser[0] + expected = tm.box_expected([False, True, True], xbox) + tm.assert_equal(result, expected) + + with tm.assert_produces_warning(warn): + # alignment for frame vs series comparisons deprecated + result = ser != ser[2] + expected = tm.box_expected([True, True, True], xbox) + tm.assert_equal(result, expected) + + result = ser == ser + expected = tm.box_expected([True, True, False], xbox) + tm.assert_equal(result, expected) + + with tm.assert_produces_warning(warn): + # alignment for frame vs series comparisons deprecated + result = ser == ser[0] + expected = tm.box_expected([True, False, False], xbox) + tm.assert_equal(result, expected) + + with tm.assert_produces_warning(warn): + # alignment for frame vs series comparisons deprecated + result = ser == ser[2] + expected = tm.box_expected([False, False, False], xbox) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "datetimelike", + [ + Timestamp("20130101"), + datetime(2013, 1, 1), + np.datetime64("2013-01-01T00:00", "ns"), + ], + ) + @pytest.mark.parametrize( + "op,expected", + [ + (operator.lt, [True, False, False, False]), + (operator.le, [True, True, False, False]), + (operator.eq, [False, True, False, False]), + (operator.gt, [False, False, False, True]), + ], + ) + def test_dt64_compare_datetime_scalar(self, datetimelike, op, expected): + # GH#17965, test for ability to compare datetime64[ns] columns + # to datetimelike + ser = Series( + [ + Timestamp("20120101"), + Timestamp("20130101"), + np.nan, + Timestamp("20130103"), + ], + name="A", + ) + result = op(ser, datetimelike) + expected = Series(expected, name="A") + tm.assert_series_equal(result, expected) + + +class TestDatetimeIndexComparisons: + + # TODO: moved from tests.indexes.test_base; parametrize and de-duplicate + def test_comparators(self, comparison_op): + index = tm.makeDateIndex(100) + element = index[len(index) // 2] + element = Timestamp(element).to_datetime64() + + arr = np.array(index) + arr_result = comparison_op(arr, element) + index_result = comparison_op(index, element) + + assert isinstance(index_result, np.ndarray) + tm.assert_numpy_array_equal(arr_result, index_result) + + @pytest.mark.parametrize( + "other", + [datetime(2016, 1, 1), Timestamp("2016-01-01"), np.datetime64("2016-01-01")], + ) + def test_dti_cmp_datetimelike(self, other, tz_naive_fixture): + tz = tz_naive_fixture + dti = date_range("2016-01-01", periods=2, tz=tz) + if tz is not None: + if isinstance(other, np.datetime64): + # no tzaware version available + return + other = localize_pydatetime(other, dti.tzinfo) + + result = dti == other + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = dti > other + expected = np.array([False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = dti >= other + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = dti < other + expected = np.array([False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = dti <= other + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dtype", [None, object]) + def test_dti_cmp_nat(self, dtype, box_with_array): + + left = DatetimeIndex([Timestamp("2011-01-01"), NaT, Timestamp("2011-01-03")]) + right = DatetimeIndex([NaT, NaT, Timestamp("2011-01-03")]) + + left = tm.box_expected(left, box_with_array) + right = tm.box_expected(right, box_with_array) + xbox = get_upcast_box(left, right, True) + + lhs, rhs = left, right + if dtype is object: + lhs, rhs = left.astype(object), right.astype(object) + + result = rhs == lhs + expected = np.array([False, False, True]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + result = lhs != rhs + expected = np.array([True, True, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + expected = np.array([False, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(lhs == NaT, expected) + tm.assert_equal(NaT == rhs, expected) + + expected = np.array([True, True, True]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(lhs != NaT, expected) + tm.assert_equal(NaT != lhs, expected) + + expected = np.array([False, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(lhs < NaT, expected) + tm.assert_equal(NaT > lhs, expected) + + def test_dti_cmp_nat_behaves_like_float_cmp_nan(self): + fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0]) + fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0]) + + didx1 = DatetimeIndex( + ["2014-01-01", NaT, "2014-03-01", NaT, "2014-05-01", "2014-07-01"] + ) + didx2 = DatetimeIndex( + ["2014-02-01", "2014-03-01", NaT, NaT, "2014-06-01", "2014-07-01"] + ) + darr = np.array( + [ + np.datetime64("2014-02-01 00:00"), + np.datetime64("2014-03-01 00:00"), + np.datetime64("nat"), + np.datetime64("nat"), + np.datetime64("2014-06-01 00:00"), + np.datetime64("2014-07-01 00:00"), + ] + ) + + cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] + + # Check pd.NaT is handles as the same as np.nan + with tm.assert_produces_warning(None): + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + with tm.assert_produces_warning(None): + for idx1, val in [(fidx1, np.nan), (didx1, NaT)]: + result = idx1 < val + expected = np.array([False, False, False, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 > val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= val + tm.assert_numpy_array_equal(result, expected) + result = idx1 >= val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != val + expected = np.array([True, True, True, True, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # Check pd.NaT is handles as the same as np.nan + with tm.assert_produces_warning(None): + for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]: + result = idx1 < val + expected = np.array([True, False, False, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 > val + expected = np.array([False, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= val + expected = np.array([True, False, True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 >= val + expected = np.array([False, False, True, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == val + expected = np.array([False, False, True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != val + expected = np.array([True, True, False, True, True, True]) + tm.assert_numpy_array_equal(result, expected) + + def test_comparison_tzawareness_compat(self, comparison_op, box_with_array): + # GH#18162 + op = comparison_op + box = box_with_array + + dr = date_range("2016-01-01", periods=6) + dz = dr.tz_localize("US/Pacific") + + dr = tm.box_expected(dr, box) + dz = tm.box_expected(dz, box) + + if box is pd.DataFrame: + tolist = lambda x: x.astype(object).values.tolist()[0] + else: + tolist = list + + if op not in [operator.eq, operator.ne]: + msg = ( + r"Invalid comparison between dtype=datetime64\[ns.*\] " + "and (Timestamp|DatetimeArray|list|ndarray)" + ) + with pytest.raises(TypeError, match=msg): + op(dr, dz) + + with pytest.raises(TypeError, match=msg): + op(dr, tolist(dz)) + with pytest.raises(TypeError, match=msg): + op(dr, np.array(tolist(dz), dtype=object)) + with pytest.raises(TypeError, match=msg): + op(dz, dr) + + with pytest.raises(TypeError, match=msg): + op(dz, tolist(dr)) + with pytest.raises(TypeError, match=msg): + op(dz, np.array(tolist(dr), dtype=object)) + + # The aware==aware and naive==naive comparisons should *not* raise + assert np.all(dr == dr) + assert np.all(dr == tolist(dr)) + assert np.all(tolist(dr) == dr) + assert np.all(np.array(tolist(dr), dtype=object) == dr) + assert np.all(dr == np.array(tolist(dr), dtype=object)) + + assert np.all(dz == dz) + assert np.all(dz == tolist(dz)) + assert np.all(tolist(dz) == dz) + assert np.all(np.array(tolist(dz), dtype=object) == dz) + assert np.all(dz == np.array(tolist(dz), dtype=object)) + + def test_comparison_tzawareness_compat_scalars(self, comparison_op, box_with_array): + # GH#18162 + op = comparison_op + + dr = date_range("2016-01-01", periods=6) + dz = dr.tz_localize("US/Pacific") + + dr = tm.box_expected(dr, box_with_array) + dz = tm.box_expected(dz, box_with_array) + + # Check comparisons against scalar Timestamps + ts = Timestamp("2000-03-14 01:59") + ts_tz = Timestamp("2000-03-14 01:59", tz="Europe/Amsterdam") + + assert np.all(dr > ts) + msg = r"Invalid comparison between dtype=datetime64\[ns.*\] and Timestamp" + if op not in [operator.eq, operator.ne]: + with pytest.raises(TypeError, match=msg): + op(dr, ts_tz) + + assert np.all(dz > ts_tz) + if op not in [operator.eq, operator.ne]: + with pytest.raises(TypeError, match=msg): + op(dz, ts) + + if op not in [operator.eq, operator.ne]: + # GH#12601: Check comparison against Timestamps and DatetimeIndex + with pytest.raises(TypeError, match=msg): + op(ts, dz) + + @pytest.mark.parametrize( + "other", + [datetime(2016, 1, 1), Timestamp("2016-01-01"), np.datetime64("2016-01-01")], + ) + # Bug in NumPy? https://github.com/numpy/numpy/issues/13841 + # Raising in __eq__ will fallback to NumPy, which warns, fails, + # then re-raises the original exception. So we just need to ignore. + @pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning") + @pytest.mark.filterwarnings("ignore:Converting timezone-aware:FutureWarning") + def test_scalar_comparison_tzawareness( + self, comparison_op, other, tz_aware_fixture, box_with_array + ): + op = comparison_op + tz = tz_aware_fixture + dti = date_range("2016-01-01", periods=2, tz=tz) + + dtarr = tm.box_expected(dti, box_with_array) + xbox = get_upcast_box(dtarr, other, True) + if op in [operator.eq, operator.ne]: + exbool = op is operator.ne + expected = np.array([exbool, exbool], dtype=bool) + expected = tm.box_expected(expected, xbox) + + result = op(dtarr, other) + tm.assert_equal(result, expected) + + result = op(other, dtarr) + tm.assert_equal(result, expected) + else: + msg = ( + r"Invalid comparison between dtype=datetime64\[ns, .*\] " + f"and {type(other).__name__}" + ) + with pytest.raises(TypeError, match=msg): + op(dtarr, other) + with pytest.raises(TypeError, match=msg): + op(other, dtarr) + + def test_nat_comparison_tzawareness(self, comparison_op): + # GH#19276 + # tzaware DatetimeIndex should not raise when compared to NaT + op = comparison_op + + dti = DatetimeIndex( + ["2014-01-01", NaT, "2014-03-01", NaT, "2014-05-01", "2014-07-01"] + ) + expected = np.array([op == operator.ne] * len(dti)) + result = op(dti, NaT) + tm.assert_numpy_array_equal(result, expected) + + result = op(dti.tz_localize("US/Pacific"), NaT) + tm.assert_numpy_array_equal(result, expected) + + def test_dti_cmp_str(self, tz_naive_fixture): + # GH#22074 + # regardless of tz, we expect these comparisons are valid + tz = tz_naive_fixture + rng = date_range("1/1/2000", periods=10, tz=tz) + other = "1/1/2000" + + result = rng == other + expected = np.array([True] + [False] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng != other + expected = np.array([False] + [True] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng < other + expected = np.array([False] * 10) + tm.assert_numpy_array_equal(result, expected) + + result = rng <= other + expected = np.array([True] + [False] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng > other + expected = np.array([False] + [True] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng >= other + expected = np.array([True] * 10) + tm.assert_numpy_array_equal(result, expected) + + def test_dti_cmp_list(self): + rng = date_range("1/1/2000", periods=10) + + result = rng == list(rng) + expected = rng == rng + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + pd.timedelta_range("1D", periods=10), + pd.timedelta_range("1D", periods=10).to_series(), + pd.timedelta_range("1D", periods=10).asi8.view("m8[ns]"), + ], + ids=lambda x: type(x).__name__, + ) + def test_dti_cmp_tdi_tzawareness(self, other): + # GH#22074 + # reversion test that we _don't_ call _assert_tzawareness_compat + # when comparing against TimedeltaIndex + dti = date_range("2000-01-01", periods=10, tz="Asia/Tokyo") + + result = dti == other + expected = np.array([False] * 10) + tm.assert_numpy_array_equal(result, expected) + + result = dti != other + expected = np.array([True] * 10) + tm.assert_numpy_array_equal(result, expected) + msg = "Invalid comparison between" + with pytest.raises(TypeError, match=msg): + dti < other + with pytest.raises(TypeError, match=msg): + dti <= other + with pytest.raises(TypeError, match=msg): + dti > other + with pytest.raises(TypeError, match=msg): + dti >= other + + def test_dti_cmp_object_dtype(self): + # GH#22074 + dti = date_range("2000-01-01", periods=10, tz="Asia/Tokyo") + + other = dti.astype("O") + + result = dti == other + expected = np.array([True] * 10) + tm.assert_numpy_array_equal(result, expected) + + other = dti.tz_localize(None) + result = dti != other + tm.assert_numpy_array_equal(result, expected) + + other = np.array(list(dti[:5]) + [Timedelta(days=1)] * 5) + result = dti == other + expected = np.array([True] * 5 + [False] * 5) + tm.assert_numpy_array_equal(result, expected) + msg = ">=' not supported between instances of 'Timestamp' and 'Timedelta'" + with pytest.raises(TypeError, match=msg): + dti >= other + + +# ------------------------------------------------------------------ +# Arithmetic + + +class TestDatetime64Arithmetic: + # This class is intended for "finished" tests that are fully parametrized + # over DataFrame/Series/Index/DatetimeArray + + # ------------------------------------------------------------- + # Addition/Subtraction of timedelta-like + + @pytest.mark.arm_slow + def test_dt64arr_add_timedeltalike_scalar( + self, tz_naive_fixture, two_hours, box_with_array + ): + # GH#22005, GH#22163 check DataFrame doesn't raise TypeError + tz = tz_naive_fixture + + rng = date_range("2000-01-01", "2000-02-01", tz=tz) + expected = date_range("2000-01-01 02:00", "2000-02-01 02:00", tz=tz) + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = rng + two_hours + tm.assert_equal(result, expected) + + result = two_hours + rng + tm.assert_equal(result, expected) + + rng += two_hours + tm.assert_equal(rng, expected) + + def test_dt64arr_sub_timedeltalike_scalar( + self, tz_naive_fixture, two_hours, box_with_array + ): + tz = tz_naive_fixture + + rng = date_range("2000-01-01", "2000-02-01", tz=tz) + expected = date_range("1999-12-31 22:00", "2000-01-31 22:00", tz=tz) + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = rng - two_hours + tm.assert_equal(result, expected) + + rng -= two_hours + tm.assert_equal(rng, expected) + + def test_dt64_array_sub_dt_with_different_timezone(self, box_with_array): + t1 = date_range("20130101", periods=3).tz_localize("US/Eastern") + t1 = tm.box_expected(t1, box_with_array) + t2 = Timestamp("20130101").tz_localize("CET") + tnaive = Timestamp(20130101) + + result = t1 - t2 + expected = TimedeltaIndex( + ["0 days 06:00:00", "1 days 06:00:00", "2 days 06:00:00"] + ) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + result = t2 - t1 + expected = TimedeltaIndex( + ["-1 days +18:00:00", "-2 days +18:00:00", "-3 days +18:00:00"] + ) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + msg = "Cannot subtract tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg): + t1 - tnaive + + with pytest.raises(TypeError, match=msg): + tnaive - t1 + + def test_dt64_array_sub_dt64_array_with_different_timezone(self, box_with_array): + t1 = date_range("20130101", periods=3).tz_localize("US/Eastern") + t1 = tm.box_expected(t1, box_with_array) + t2 = date_range("20130101", periods=3).tz_localize("CET") + t2 = tm.box_expected(t2, box_with_array) + tnaive = date_range("20130101", periods=3) + + result = t1 - t2 + expected = TimedeltaIndex( + ["0 days 06:00:00", "0 days 06:00:00", "0 days 06:00:00"] + ) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + result = t2 - t1 + expected = TimedeltaIndex( + ["-1 days +18:00:00", "-1 days +18:00:00", "-1 days +18:00:00"] + ) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + msg = "Cannot subtract tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg): + t1 - tnaive + + with pytest.raises(TypeError, match=msg): + tnaive - t1 + + def test_dt64arr_add_sub_td64_nat(self, box_with_array, tz_naive_fixture): + # GH#23320 special handling for timedelta64("NaT") + tz = tz_naive_fixture + + dti = date_range("1994-04-01", periods=9, tz=tz, freq="QS") + other = np.timedelta64("NaT") + expected = DatetimeIndex(["NaT"] * 9, tz=tz) + + obj = tm.box_expected(dti, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = obj + other + tm.assert_equal(result, expected) + result = other + obj + tm.assert_equal(result, expected) + result = obj - other + tm.assert_equal(result, expected) + msg = "cannot subtract" + with pytest.raises(TypeError, match=msg): + other - obj + + def test_dt64arr_add_sub_td64ndarray(self, tz_naive_fixture, box_with_array): + + tz = tz_naive_fixture + dti = date_range("2016-01-01", periods=3, tz=tz) + tdi = TimedeltaIndex(["-1 Day", "-1 Day", "-1 Day"]) + tdarr = tdi.values + + expected = date_range("2015-12-31", "2016-01-02", periods=3, tz=tz) + + dtarr = tm.box_expected(dti, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = dtarr + tdarr + tm.assert_equal(result, expected) + result = tdarr + dtarr + tm.assert_equal(result, expected) + + expected = date_range("2016-01-02", "2016-01-04", periods=3, tz=tz) + expected = tm.box_expected(expected, box_with_array) + + result = dtarr - tdarr + tm.assert_equal(result, expected) + msg = "cannot subtract|(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + tdarr - dtarr + + # ----------------------------------------------------------------- + # Subtraction of datetime-like scalars + + @pytest.mark.parametrize( + "ts", + [ + Timestamp("2013-01-01"), + Timestamp("2013-01-01").to_pydatetime(), + Timestamp("2013-01-01").to_datetime64(), + # GH#7996, GH#22163 ensure non-nano datetime64 is converted to nano + # for DataFrame operation + np.datetime64("2013-01-01", "D"), + ], + ) + def test_dt64arr_sub_dtscalar(self, box_with_array, ts): + # GH#8554, GH#22163 DataFrame op should _not_ return dt64 dtype + idx = date_range("2013-01-01", periods=3)._with_freq(None) + idx = tm.box_expected(idx, box_with_array) + + expected = TimedeltaIndex(["0 Days", "1 Day", "2 Days"]) + expected = tm.box_expected(expected, box_with_array) + + result = idx - ts + tm.assert_equal(result, expected) + + result = ts - idx + tm.assert_equal(result, -expected) + tm.assert_equal(result, -expected) + + def test_dt64arr_sub_timestamp_tzaware(self, box_with_array): + ser = date_range("2014-03-17", periods=2, freq="D", tz="US/Eastern") + ser = ser._with_freq(None) + ts = ser[0] + + ser = tm.box_expected(ser, box_with_array) + + delta_series = Series([np.timedelta64(0, "D"), np.timedelta64(1, "D")]) + expected = tm.box_expected(delta_series, box_with_array) + + tm.assert_equal(ser - ts, expected) + tm.assert_equal(ts - ser, -expected) + + def test_dt64arr_sub_NaT(self, box_with_array): + # GH#18808 + dti = DatetimeIndex([NaT, Timestamp("19900315")]) + ser = tm.box_expected(dti, box_with_array) + + result = ser - NaT + expected = Series([NaT, NaT], dtype="timedelta64[ns]") + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + dti_tz = dti.tz_localize("Asia/Tokyo") + ser_tz = tm.box_expected(dti_tz, box_with_array) + + result = ser_tz - NaT + expected = Series([NaT, NaT], dtype="timedelta64[ns]") + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + # ------------------------------------------------------------- + # Subtraction of datetime-like array-like + + def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture): + dti = date_range("2016-01-01", periods=3, tz=tz_naive_fixture) + expected = dti - dti + + obj = tm.box_expected(dti, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + with tm.assert_produces_warning(PerformanceWarning): + result = obj - obj.astype(object) + tm.assert_equal(result, expected) + + def test_dt64arr_naive_sub_dt64ndarray(self, box_with_array): + dti = date_range("2016-01-01", periods=3, tz=None) + dt64vals = dti.values + + dtarr = tm.box_expected(dti, box_with_array) + + expected = dtarr - dtarr + result = dtarr - dt64vals + tm.assert_equal(result, expected) + result = dt64vals - dtarr + tm.assert_equal(result, expected) + + def test_dt64arr_aware_sub_dt64ndarray_raises( + self, tz_aware_fixture, box_with_array + ): + + tz = tz_aware_fixture + dti = date_range("2016-01-01", periods=3, tz=tz) + dt64vals = dti.values + + dtarr = tm.box_expected(dti, box_with_array) + msg = "Cannot subtract tz-naive and tz-aware datetime" + with pytest.raises(TypeError, match=msg): + dtarr - dt64vals + with pytest.raises(TypeError, match=msg): + dt64vals - dtarr + + # ------------------------------------------------------------- + # Addition of datetime-like others (invalid) + + def test_dt64arr_add_dtlike_raises(self, tz_naive_fixture, box_with_array): + # GH#22163 ensure DataFrame doesn't cast Timestamp to i8 + # GH#9631 + tz = tz_naive_fixture + + dti = date_range("2016-01-01", periods=3, tz=tz) + if tz is None: + dti2 = dti.tz_localize("US/Eastern") + else: + dti2 = dti.tz_localize(None) + dtarr = tm.box_expected(dti, box_with_array) + + assert_cannot_add(dtarr, dti.values) + assert_cannot_add(dtarr, dti) + assert_cannot_add(dtarr, dtarr) + assert_cannot_add(dtarr, dti[0]) + assert_cannot_add(dtarr, dti[0].to_pydatetime()) + assert_cannot_add(dtarr, dti[0].to_datetime64()) + assert_cannot_add(dtarr, dti2[0]) + assert_cannot_add(dtarr, dti2[0].to_pydatetime()) + assert_cannot_add(dtarr, np.datetime64("2011-01-01", "D")) + + # ------------------------------------------------------------- + # Other Invalid Addition/Subtraction + + # Note: freq here includes both Tick and non-Tick offsets; this is + # relevant because historically integer-addition was allowed if we had + # a freq. + @pytest.mark.parametrize("freq", ["H", "D", "W", "M", "MS", "Q", "B", None]) + @pytest.mark.parametrize("dtype", [None, "uint8"]) + def test_dt64arr_addsub_intlike( + self, dtype, box_with_array, freq, tz_naive_fixture + ): + # GH#19959, GH#19123, GH#19012 + tz = tz_naive_fixture + if box_with_array is pd.DataFrame: + # alignment headaches + return + + if freq is None: + dti = DatetimeIndex(["NaT", "2017-04-05 06:07:08"], tz=tz) + else: + dti = date_range("2016-01-01", periods=2, freq=freq, tz=tz) + + obj = box_with_array(dti) + other = np.array([4, -1], dtype=dtype) + + msg = "|".join( + [ + "Addition/subtraction of integers", + "cannot subtract DatetimeArray from", + # IntegerArray + "can only perform ops with numeric values", + "unsupported operand type.*Categorical", + ] + ) + assert_invalid_addsub_type(obj, 1, msg) + assert_invalid_addsub_type(obj, np.int64(2), msg) + assert_invalid_addsub_type(obj, np.array(3, dtype=np.int64), msg) + assert_invalid_addsub_type(obj, other, msg) + assert_invalid_addsub_type(obj, np.array(other), msg) + assert_invalid_addsub_type(obj, pd.array(other), msg) + assert_invalid_addsub_type(obj, pd.Categorical(other), msg) + assert_invalid_addsub_type(obj, pd.Index(other), msg) + assert_invalid_addsub_type(obj, pd.core.indexes.api.NumericIndex(other), msg) + assert_invalid_addsub_type(obj, Series(other), msg) + + @pytest.mark.parametrize( + "other", + [ + 3.14, + np.array([2.0, 3.0]), + # GH#13078 datetime +/- Period is invalid + Period("2011-01-01", freq="D"), + # https://github.com/pandas-dev/pandas/issues/10329 + time(1, 2, 3), + ], + ) + @pytest.mark.parametrize("dti_freq", [None, "D"]) + def test_dt64arr_add_sub_invalid(self, dti_freq, other, box_with_array): + dti = DatetimeIndex(["2011-01-01", "2011-01-02"], freq=dti_freq) + dtarr = tm.box_expected(dti, box_with_array) + msg = "|".join( + [ + "unsupported operand type", + "cannot (add|subtract)", + "cannot use operands with types", + "ufunc '?(add|subtract)'? cannot use operands with types", + "Concatenation operation is not implemented for NumPy arrays", + ] + ) + assert_invalid_addsub_type(dtarr, other, msg) + + @pytest.mark.parametrize("pi_freq", ["D", "W", "Q", "H"]) + @pytest.mark.parametrize("dti_freq", [None, "D"]) + def test_dt64arr_add_sub_parr( + self, dti_freq, pi_freq, box_with_array, box_with_array2 + ): + # GH#20049 subtracting PeriodIndex should raise TypeError + dti = DatetimeIndex(["2011-01-01", "2011-01-02"], freq=dti_freq) + pi = dti.to_period(pi_freq) + + dtarr = tm.box_expected(dti, box_with_array) + parr = tm.box_expected(pi, box_with_array2) + msg = "|".join( + [ + "cannot (add|subtract)", + "unsupported operand", + "descriptor.*requires", + "ufunc.*cannot use operands", + ] + ) + assert_invalid_addsub_type(dtarr, parr, msg) + + def test_dt64arr_addsub_time_objects_raises(self, box_with_array, tz_naive_fixture): + # https://github.com/pandas-dev/pandas/issues/10329 + + tz = tz_naive_fixture + + obj1 = date_range("2012-01-01", periods=3, tz=tz) + obj2 = [time(i, i, i) for i in range(3)] + + obj1 = tm.box_expected(obj1, box_with_array) + obj2 = tm.box_expected(obj2, box_with_array) + + msg = "|".join( + [ + "unsupported operand", + "cannot subtract DatetimeArray from ndarray", + ] + ) + + with warnings.catch_warnings(record=True): + # pandas.errors.PerformanceWarning: Non-vectorized DateOffset being + # applied to Series or DatetimeIndex + # we aren't testing that here, so ignore. + warnings.simplefilter("ignore", PerformanceWarning) + + assert_invalid_addsub_type(obj1, obj2, msg=msg) + + # ------------------------------------------------------------- + # Other invalid operations + + @pytest.mark.parametrize( + "dt64_series", + [ + Series([Timestamp("19900315"), Timestamp("19900315")]), + Series([NaT, Timestamp("19900315")]), + Series([NaT, NaT], dtype="datetime64[ns]"), + ], + ) + @pytest.mark.parametrize("one", [1, 1.0, np.array(1)]) + def test_dt64_mul_div_numeric_invalid(self, one, dt64_series, box_with_array): + obj = tm.box_expected(dt64_series, box_with_array) + + msg = "cannot perform .* with this index type" + + # multiplication + with pytest.raises(TypeError, match=msg): + obj * one + with pytest.raises(TypeError, match=msg): + one * obj + + # division + with pytest.raises(TypeError, match=msg): + obj / one + with pytest.raises(TypeError, match=msg): + one / obj + + +class TestDatetime64DateOffsetArithmetic: + + # ------------------------------------------------------------- + # Tick DateOffsets + + # TODO: parametrize over timezone? + def test_dt64arr_series_add_tick_DateOffset(self, box_with_array): + # GH#4532 + # operate with pd.offsets + ser = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]) + expected = Series( + [Timestamp("20130101 9:01:05"), Timestamp("20130101 9:02:05")] + ) + + ser = tm.box_expected(ser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = ser + pd.offsets.Second(5) + tm.assert_equal(result, expected) + + result2 = pd.offsets.Second(5) + ser + tm.assert_equal(result2, expected) + + def test_dt64arr_series_sub_tick_DateOffset(self, box_with_array): + # GH#4532 + # operate with pd.offsets + ser = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]) + expected = Series( + [Timestamp("20130101 9:00:55"), Timestamp("20130101 9:01:55")] + ) + + ser = tm.box_expected(ser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = ser - pd.offsets.Second(5) + tm.assert_equal(result, expected) + + result2 = -pd.offsets.Second(5) + ser + tm.assert_equal(result2, expected) + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + pd.offsets.Second(5) - ser + + @pytest.mark.parametrize( + "cls_name", ["Day", "Hour", "Minute", "Second", "Milli", "Micro", "Nano"] + ) + def test_dt64arr_add_sub_tick_DateOffset_smoke(self, cls_name, box_with_array): + # GH#4532 + # smoke tests for valid DateOffsets + ser = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]) + ser = tm.box_expected(ser, box_with_array) + + offset_cls = getattr(pd.offsets, cls_name) + ser + offset_cls(5) + offset_cls(5) + ser + ser - offset_cls(5) + + def test_dti_add_tick_tzaware(self, tz_aware_fixture, box_with_array): + # GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype + tz = tz_aware_fixture + if tz == "US/Pacific": + dates = date_range("2012-11-01", periods=3, tz=tz) + offset = dates + pd.offsets.Hour(5) + assert dates[0] + pd.offsets.Hour(5) == offset[0] + + dates = date_range("2010-11-01 00:00", periods=3, tz=tz, freq="H") + expected = DatetimeIndex( + ["2010-11-01 05:00", "2010-11-01 06:00", "2010-11-01 07:00"], + freq="H", + tz=tz, + ) + + dates = tm.box_expected(dates, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + for scalar in [pd.offsets.Hour(5), np.timedelta64(5, "h"), timedelta(hours=5)]: + offset = dates + scalar + tm.assert_equal(offset, expected) + offset = scalar + dates + tm.assert_equal(offset, expected) + + roundtrip = offset - scalar + tm.assert_equal(roundtrip, dates) + + msg = "|".join( + ["bad operand type for unary -", "cannot subtract DatetimeArray"] + ) + with pytest.raises(TypeError, match=msg): + scalar - dates + + # ------------------------------------------------------------- + # RelativeDelta DateOffsets + + def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array): + # GH#10699 + vec = DatetimeIndex( + [ + Timestamp("2000-01-05 00:15:00"), + Timestamp("2000-01-31 00:23:00"), + Timestamp("2000-01-01"), + Timestamp("2000-03-31"), + Timestamp("2000-02-29"), + Timestamp("2000-12-31"), + Timestamp("2000-05-15"), + Timestamp("2001-06-15"), + ] + ) + vec = tm.box_expected(vec, box_with_array) + vec_items = vec.iloc[0] if box_with_array is pd.DataFrame else vec + + # DateOffset relativedelta fastpath + relative_kwargs = [ + ("years", 2), + ("months", 5), + ("days", 3), + ("hours", 5), + ("minutes", 10), + ("seconds", 2), + ("microseconds", 5), + ] + for i, (unit, value) in enumerate(relative_kwargs): + off = DateOffset(**{unit: value}) + + expected = DatetimeIndex([x + off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec + off) + + expected = DatetimeIndex([x - off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec - off) + + off = DateOffset(**dict(relative_kwargs[: i + 1])) + + expected = DatetimeIndex([x + off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec + off) + + expected = DatetimeIndex([x - off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec - off) + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + off - vec + + # ------------------------------------------------------------- + # Non-Tick, Non-RelativeDelta DateOffsets + + # TODO: redundant with test_dt64arr_add_sub_DateOffset? that includes + # tz-aware cases which this does not + @pytest.mark.parametrize( + "cls_and_kwargs", + [ + "YearBegin", + ("YearBegin", {"month": 5}), + "YearEnd", + ("YearEnd", {"month": 5}), + "MonthBegin", + "MonthEnd", + "SemiMonthEnd", + "SemiMonthBegin", + "Week", + ("Week", {"weekday": 3}), + "Week", + ("Week", {"weekday": 6}), + "BusinessDay", + "BDay", + "QuarterEnd", + "QuarterBegin", + "CustomBusinessDay", + "CDay", + "CBMonthEnd", + "CBMonthBegin", + "BMonthBegin", + "BMonthEnd", + "BusinessHour", + "BYearBegin", + "BYearEnd", + "BQuarterBegin", + ("LastWeekOfMonth", {"weekday": 2}), + ( + "FY5253Quarter", + { + "qtr_with_extra_week": 1, + "startingMonth": 1, + "weekday": 2, + "variation": "nearest", + }, + ), + ("FY5253", {"weekday": 0, "startingMonth": 2, "variation": "nearest"}), + ("WeekOfMonth", {"weekday": 2, "week": 2}), + "Easter", + ("DateOffset", {"day": 4}), + ("DateOffset", {"month": 5}), + ], + ) + @pytest.mark.parametrize("normalize", [True, False]) + @pytest.mark.parametrize("n", [0, 5]) + def test_dt64arr_add_sub_DateOffsets( + self, box_with_array, n, normalize, cls_and_kwargs + ): + # GH#10699 + # assert vectorized operation matches pointwise operations + + if isinstance(cls_and_kwargs, tuple): + # If cls_name param is a tuple, then 2nd entry is kwargs for + # the offset constructor + cls_name, kwargs = cls_and_kwargs + else: + cls_name = cls_and_kwargs + kwargs = {} + + if n == 0 and cls_name in [ + "WeekOfMonth", + "LastWeekOfMonth", + "FY5253Quarter", + "FY5253", + ]: + # passing n = 0 is invalid for these offset classes + return + + vec = DatetimeIndex( + [ + Timestamp("2000-01-05 00:15:00"), + Timestamp("2000-01-31 00:23:00"), + Timestamp("2000-01-01"), + Timestamp("2000-03-31"), + Timestamp("2000-02-29"), + Timestamp("2000-12-31"), + Timestamp("2000-05-15"), + Timestamp("2001-06-15"), + ] + ) + vec = tm.box_expected(vec, box_with_array) + vec_items = vec.iloc[0] if box_with_array is pd.DataFrame else vec + + offset_cls = getattr(pd.offsets, cls_name) + + with warnings.catch_warnings(record=True): + # pandas.errors.PerformanceWarning: Non-vectorized DateOffset being + # applied to Series or DatetimeIndex + # we aren't testing that here, so ignore. + warnings.simplefilter("ignore", PerformanceWarning) + + offset = offset_cls(n, normalize=normalize, **kwargs) + + expected = DatetimeIndex([x + offset for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec + offset) + + expected = DatetimeIndex([x - offset for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec - offset) + + expected = DatetimeIndex([offset + x for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, offset + vec) + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + offset - vec + + def test_dt64arr_add_sub_DateOffset(self, box_with_array): + # GH#10699 + s = date_range("2000-01-01", "2000-01-31", name="a") + s = tm.box_expected(s, box_with_array) + result = s + DateOffset(years=1) + result2 = DateOffset(years=1) + s + exp = date_range("2001-01-01", "2001-01-31", name="a")._with_freq(None) + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + result = s - DateOffset(years=1) + exp = date_range("1999-01-01", "1999-01-31", name="a")._with_freq(None) + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + + s = DatetimeIndex( + [ + Timestamp("2000-01-15 00:15:00", tz="US/Central"), + Timestamp("2000-02-15", tz="US/Central"), + ], + name="a", + ) + s = tm.box_expected(s, box_with_array) + result = s + pd.offsets.Day() + result2 = pd.offsets.Day() + s + exp = DatetimeIndex( + [ + Timestamp("2000-01-16 00:15:00", tz="US/Central"), + Timestamp("2000-02-16", tz="US/Central"), + ], + name="a", + ) + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + s = DatetimeIndex( + [ + Timestamp("2000-01-15 00:15:00", tz="US/Central"), + Timestamp("2000-02-15", tz="US/Central"), + ], + name="a", + ) + s = tm.box_expected(s, box_with_array) + result = s + pd.offsets.MonthEnd() + result2 = pd.offsets.MonthEnd() + s + exp = DatetimeIndex( + [ + Timestamp("2000-01-31 00:15:00", tz="US/Central"), + Timestamp("2000-02-29", tz="US/Central"), + ], + name="a", + ) + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + @pytest.mark.parametrize( + "other", + [ + np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]), + np.array([pd.offsets.DateOffset(years=1), pd.offsets.MonthEnd()]), + np.array( # matching offsets + [pd.offsets.DateOffset(years=1), pd.offsets.DateOffset(years=1)] + ), + ], + ) + @pytest.mark.parametrize("op", [operator.add, roperator.radd, operator.sub]) + @pytest.mark.parametrize("box_other", [True, False]) + def test_dt64arr_add_sub_offset_array( + self, tz_naive_fixture, box_with_array, box_other, op, other + ): + # GH#18849 + # GH#10699 array of offsets + + tz = tz_naive_fixture + dti = date_range("2017-01-01", periods=2, tz=tz) + dtarr = tm.box_expected(dti, box_with_array) + + other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + expected = DatetimeIndex([op(dti[n], other[n]) for n in range(len(dti))]) + expected = tm.box_expected(expected, box_with_array) + + if box_other: + other = tm.box_expected(other, box_with_array) + + with tm.assert_produces_warning(PerformanceWarning): + res = op(dtarr, other) + + tm.assert_equal(res, expected) + + @pytest.mark.parametrize( + "op, offset, exp, exp_freq", + [ + ( + "__add__", + DateOffset(months=3, days=10), + [ + Timestamp("2014-04-11"), + Timestamp("2015-04-11"), + Timestamp("2016-04-11"), + Timestamp("2017-04-11"), + ], + None, + ), + ( + "__add__", + DateOffset(months=3), + [ + Timestamp("2014-04-01"), + Timestamp("2015-04-01"), + Timestamp("2016-04-01"), + Timestamp("2017-04-01"), + ], + "AS-APR", + ), + ( + "__sub__", + DateOffset(months=3, days=10), + [ + Timestamp("2013-09-21"), + Timestamp("2014-09-21"), + Timestamp("2015-09-21"), + Timestamp("2016-09-21"), + ], + None, + ), + ( + "__sub__", + DateOffset(months=3), + [ + Timestamp("2013-10-01"), + Timestamp("2014-10-01"), + Timestamp("2015-10-01"), + Timestamp("2016-10-01"), + ], + "AS-OCT", + ), + ], + ) + def test_dti_add_sub_nonzero_mth_offset( + self, op, offset, exp, exp_freq, tz_aware_fixture, box_with_array + ): + # GH 26258 + tz = tz_aware_fixture + date = date_range(start="01 Jan 2014", end="01 Jan 2017", freq="AS", tz=tz) + date = tm.box_expected(date, box_with_array, False) + mth = getattr(date, op) + result = mth(offset) + + expected = DatetimeIndex(exp, tz=tz) + expected = tm.box_expected(expected, box_with_array, False) + tm.assert_equal(result, expected) + + +class TestDatetime64OverflowHandling: + # TODO: box + de-duplicate + + def test_dt64_overflow_masking(self, box_with_array): + # GH#25317 + left = Series([Timestamp("1969-12-31")]) + right = Series([NaT]) + + left = tm.box_expected(left, box_with_array) + right = tm.box_expected(right, box_with_array) + + expected = TimedeltaIndex([NaT]) + expected = tm.box_expected(expected, box_with_array) + + result = left - right + tm.assert_equal(result, expected) + + def test_dt64_series_arith_overflow(self): + # GH#12534, fixed by GH#19024 + dt = Timestamp("1700-01-31") + td = Timedelta("20000 Days") + dti = date_range("1949-09-30", freq="100Y", periods=4) + ser = Series(dti) + msg = "Overflow in int64 addition" + with pytest.raises(OverflowError, match=msg): + ser - dt + with pytest.raises(OverflowError, match=msg): + dt - ser + with pytest.raises(OverflowError, match=msg): + ser + td + with pytest.raises(OverflowError, match=msg): + td + ser + + ser.iloc[-1] = NaT + expected = Series( + ["2004-10-03", "2104-10-04", "2204-10-04", "NaT"], dtype="datetime64[ns]" + ) + res = ser + td + tm.assert_series_equal(res, expected) + res = td + ser + tm.assert_series_equal(res, expected) + + ser.iloc[1:] = NaT + expected = Series(["91279 Days", "NaT", "NaT", "NaT"], dtype="timedelta64[ns]") + res = ser - dt + tm.assert_series_equal(res, expected) + res = dt - ser + tm.assert_series_equal(res, -expected) + + def test_datetimeindex_sub_timestamp_overflow(self): + dtimax = pd.to_datetime(["2021-12-28 17:19", Timestamp.max]) + dtimin = pd.to_datetime(["2021-12-28 17:19", Timestamp.min]) + + tsneg = Timestamp("1950-01-01") + ts_neg_variants = [ + tsneg, + tsneg.to_pydatetime(), + tsneg.to_datetime64().astype("datetime64[ns]"), + tsneg.to_datetime64().astype("datetime64[D]"), + ] + + tspos = Timestamp("1980-01-01") + ts_pos_variants = [ + tspos, + tspos.to_pydatetime(), + tspos.to_datetime64().astype("datetime64[ns]"), + tspos.to_datetime64().astype("datetime64[D]"), + ] + msg = "Overflow in int64 addition" + for variant in ts_neg_variants: + with pytest.raises(OverflowError, match=msg): + dtimax - variant + + expected = Timestamp.max.value - tspos.value + for variant in ts_pos_variants: + res = dtimax - variant + assert res[1].value == expected + + expected = Timestamp.min.value - tsneg.value + for variant in ts_neg_variants: + res = dtimin - variant + assert res[1].value == expected + + for variant in ts_pos_variants: + with pytest.raises(OverflowError, match=msg): + dtimin - variant + + def test_datetimeindex_sub_datetimeindex_overflow(self): + # GH#22492, GH#22508 + dtimax = pd.to_datetime(["2021-12-28 17:19", Timestamp.max]) + dtimin = pd.to_datetime(["2021-12-28 17:19", Timestamp.min]) + + ts_neg = pd.to_datetime(["1950-01-01", "1950-01-01"]) + ts_pos = pd.to_datetime(["1980-01-01", "1980-01-01"]) + + # General tests + expected = Timestamp.max.value - ts_pos[1].value + result = dtimax - ts_pos + assert result[1].value == expected + + expected = Timestamp.min.value - ts_neg[1].value + result = dtimin - ts_neg + assert result[1].value == expected + msg = "Overflow in int64 addition" + with pytest.raises(OverflowError, match=msg): + dtimax - ts_neg + + with pytest.raises(OverflowError, match=msg): + dtimin - ts_pos + + # Edge cases + tmin = pd.to_datetime([Timestamp.min]) + t1 = tmin + Timedelta.max + Timedelta("1us") + with pytest.raises(OverflowError, match=msg): + t1 - tmin + + tmax = pd.to_datetime([Timestamp.max]) + t2 = tmax + Timedelta.min - Timedelta("1us") + with pytest.raises(OverflowError, match=msg): + tmax - t2 + + +class TestTimestampSeriesArithmetic: + def test_empty_series_add_sub(self, box_with_array): + # GH#13844 + a = Series(dtype="M8[ns]") + b = Series(dtype="m8[ns]") + a = box_with_array(a) + b = box_with_array(b) + tm.assert_equal(a, a + b) + tm.assert_equal(a, a - b) + tm.assert_equal(a, b + a) + msg = "cannot subtract" + with pytest.raises(TypeError, match=msg): + b - a + + def test_operators_datetimelike(self): + + # ## timedelta64 ### + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + # ## datetime64 ### + dt1 = Series( + [ + Timestamp("20111230"), + Timestamp("20120101"), + Timestamp("20120103"), + ] + ) + dt1.iloc[2] = np.nan + dt2 = Series( + [ + Timestamp("20111231"), + Timestamp("20120102"), + Timestamp("20120104"), + ] + ) + dt1 - dt2 + dt2 - dt1 + + # datetime64 with timetimedelta + dt1 + td1 + td1 + dt1 + dt1 - td1 + + # timetimedelta with datetime64 + td1 + dt1 + dt1 + td1 + + def test_dt64ser_sub_datetime_dtype(self): + ts = Timestamp(datetime(1993, 1, 7, 13, 30, 00)) + dt = datetime(1993, 6, 22, 13, 30) + ser = Series([ts]) + result = pd.to_timedelta(np.abs(ser - dt)) + assert result.dtype == "timedelta64[ns]" + + # ------------------------------------------------------------- + # TODO: This next block of tests came from tests.series.test_operators, + # needs to be de-duplicated and parametrized over `box` classes + + @pytest.mark.parametrize( + "left, right, op_fail", + [ + [ + [Timestamp("20111230"), Timestamp("20120101"), NaT], + [Timestamp("20111231"), Timestamp("20120102"), Timestamp("20120104")], + ["__sub__", "__rsub__"], + ], + [ + [Timestamp("20111230"), Timestamp("20120101"), NaT], + [timedelta(minutes=5, seconds=3), timedelta(minutes=5, seconds=3), NaT], + ["__add__", "__radd__", "__sub__"], + ], + [ + [ + Timestamp("20111230", tz="US/Eastern"), + Timestamp("20111230", tz="US/Eastern"), + NaT, + ], + [timedelta(minutes=5, seconds=3), NaT, timedelta(minutes=5, seconds=3)], + ["__add__", "__radd__", "__sub__"], + ], + ], + ) + def test_operators_datetimelike_invalid( + self, left, right, op_fail, all_arithmetic_operators + ): + # these are all TypeError ops + op_str = all_arithmetic_operators + arg1 = Series(left) + arg2 = Series(right) + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + op = getattr(arg1, op_str, None) + # Previously, _validate_for_numeric_binop in core/indexes/base.py + # did this for us. + if op_str not in op_fail: + with pytest.raises( + TypeError, match="operate|[cC]annot|unsupported operand" + ): + op(arg2) + else: + # Smoke test + op(arg2) + + def test_sub_single_tz(self): + # GH#12290 + s1 = Series([Timestamp("2016-02-10", tz="America/Sao_Paulo")]) + s2 = Series([Timestamp("2016-02-08", tz="America/Sao_Paulo")]) + result = s1 - s2 + expected = Series([Timedelta("2days")]) + tm.assert_series_equal(result, expected) + result = s2 - s1 + expected = Series([Timedelta("-2days")]) + tm.assert_series_equal(result, expected) + + def test_dt64tz_series_sub_dtitz(self): + # GH#19071 subtracting tzaware DatetimeIndex from tzaware Series + # (with same tz) raises, fixed by #19024 + dti = date_range("1999-09-30", periods=10, tz="US/Pacific") + ser = Series(dti) + expected = Series(TimedeltaIndex(["0days"] * 10)) + + res = dti - ser + tm.assert_series_equal(res, expected) + res = ser - dti + tm.assert_series_equal(res, expected) + + def test_sub_datetime_compat(self): + # see GH#14088 + s = Series([datetime(2016, 8, 23, 12, tzinfo=pytz.utc), NaT]) + dt = datetime(2016, 8, 22, 12, tzinfo=pytz.utc) + exp = Series([Timedelta("1 days"), NaT]) + tm.assert_series_equal(s - dt, exp) + tm.assert_series_equal(s - Timestamp(dt), exp) + + def test_dt64_series_add_mixed_tick_DateOffset(self): + # GH#4532 + # operate with pd.offsets + s = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]) + + result = s + pd.offsets.Milli(5) + result2 = pd.offsets.Milli(5) + s + expected = Series( + [Timestamp("20130101 9:01:00.005"), Timestamp("20130101 9:02:00.005")] + ) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + result = s + pd.offsets.Minute(5) + pd.offsets.Milli(5) + expected = Series( + [Timestamp("20130101 9:06:00.005"), Timestamp("20130101 9:07:00.005")] + ) + tm.assert_series_equal(result, expected) + + def test_datetime64_ops_nat(self): + # GH#11349 + datetime_series = Series([NaT, Timestamp("19900315")]) + nat_series_dtype_timestamp = Series([NaT, NaT], dtype="datetime64[ns]") + single_nat_dtype_datetime = Series([NaT], dtype="datetime64[ns]") + + # subtraction + tm.assert_series_equal(-NaT + datetime_series, nat_series_dtype_timestamp) + msg = "bad operand type for unary -: 'DatetimeArray'" + with pytest.raises(TypeError, match=msg): + -single_nat_dtype_datetime + datetime_series + + tm.assert_series_equal( + -NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp + ) + with pytest.raises(TypeError, match=msg): + -single_nat_dtype_datetime + nat_series_dtype_timestamp + + # addition + tm.assert_series_equal( + nat_series_dtype_timestamp + NaT, nat_series_dtype_timestamp + ) + tm.assert_series_equal( + NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp + ) + + tm.assert_series_equal( + nat_series_dtype_timestamp + NaT, nat_series_dtype_timestamp + ) + tm.assert_series_equal( + NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp + ) + + # ------------------------------------------------------------- + # Timezone-Centric Tests + + def test_operators_datetimelike_with_timezones(self): + tz = "US/Eastern" + dt1 = Series(date_range("2000-01-01 09:00:00", periods=5, tz=tz), name="foo") + dt2 = dt1.copy() + dt2.iloc[2] = np.nan + + td1 = Series(pd.timedelta_range("1 days 1 min", periods=5, freq="H")) + td2 = td1.copy() + td2.iloc[1] = np.nan + assert td2._values.freq is None + + result = dt1 + td1[0] + exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt2 + td2[0] + exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + # odd numpy behavior with scalar timedeltas + result = td1[0] + dt1 + exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = td2[0] + dt2 + exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt1 - td1[0] + exp = (dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + td1[0] - dt1 + + result = dt2 - td2[0] + exp = (dt2.dt.tz_localize(None) - td2[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + with pytest.raises(TypeError, match=msg): + td2[0] - dt2 + + result = dt1 + td1 + exp = (dt1.dt.tz_localize(None) + td1).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt2 + td2 + exp = (dt2.dt.tz_localize(None) + td2).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt1 - td1 + exp = (dt1.dt.tz_localize(None) - td1).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt2 - td2 + exp = (dt2.dt.tz_localize(None) - td2).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + msg = "cannot (add|subtract)" + with pytest.raises(TypeError, match=msg): + td1 - dt1 + with pytest.raises(TypeError, match=msg): + td2 - dt2 + + +class TestDatetimeIndexArithmetic: + # ------------------------------------------------------------- + # Binary operations DatetimeIndex and TimedeltaIndex/array + + def test_dti_add_tdi(self, tz_naive_fixture): + # GH#17558 + tz = tz_naive_fixture + dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + tdi = pd.timedelta_range("0 days", periods=10) + expected = date_range("2017-01-01", periods=10, tz=tz) + expected = expected._with_freq(None) + + # add with TimdeltaIndex + result = dti + tdi + tm.assert_index_equal(result, expected) + + result = tdi + dti + tm.assert_index_equal(result, expected) + + # add with timedelta64 array + result = dti + tdi.values + tm.assert_index_equal(result, expected) + + result = tdi.values + dti + tm.assert_index_equal(result, expected) + + def test_dti_iadd_tdi(self, tz_naive_fixture): + # GH#17558 + tz = tz_naive_fixture + dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + tdi = pd.timedelta_range("0 days", periods=10) + expected = date_range("2017-01-01", periods=10, tz=tz) + expected = expected._with_freq(None) + + # iadd with TimdeltaIndex + result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + result += tdi + tm.assert_index_equal(result, expected) + + result = pd.timedelta_range("0 days", periods=10) + result += dti + tm.assert_index_equal(result, expected) + + # iadd with timedelta64 array + result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + result += tdi.values + tm.assert_index_equal(result, expected) + + result = pd.timedelta_range("0 days", periods=10) + result += dti + tm.assert_index_equal(result, expected) + + def test_dti_sub_tdi(self, tz_naive_fixture): + # GH#17558 + tz = tz_naive_fixture + dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + tdi = pd.timedelta_range("0 days", periods=10) + expected = date_range("2017-01-01", periods=10, tz=tz, freq="-1D") + expected = expected._with_freq(None) + + # sub with TimedeltaIndex + result = dti - tdi + tm.assert_index_equal(result, expected) + + msg = "cannot subtract .*TimedeltaArray" + with pytest.raises(TypeError, match=msg): + tdi - dti + + # sub with timedelta64 array + result = dti - tdi.values + tm.assert_index_equal(result, expected) + + msg = "cannot subtract a datelike from a TimedeltaArray" + with pytest.raises(TypeError, match=msg): + tdi.values - dti + + def test_dti_isub_tdi(self, tz_naive_fixture): + # GH#17558 + tz = tz_naive_fixture + dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + tdi = pd.timedelta_range("0 days", periods=10) + expected = date_range("2017-01-01", periods=10, tz=tz, freq="-1D") + expected = expected._with_freq(None) + + # isub with TimedeltaIndex + result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + result -= tdi + tm.assert_index_equal(result, expected) + + # DTA.__isub__ GH#43904 + dta = dti._data.copy() + dta -= tdi + tm.assert_datetime_array_equal(dta, expected._data) + + out = dti._data.copy() + np.subtract(out, tdi, out=out) + tm.assert_datetime_array_equal(out, expected._data) + + msg = "cannot subtract a datelike from a TimedeltaArray" + with pytest.raises(TypeError, match=msg): + tdi -= dti + + # isub with timedelta64 array + result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + result -= tdi.values + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError, match=msg): + tdi.values -= dti + + with pytest.raises(TypeError, match=msg): + tdi._values -= dti + + # ------------------------------------------------------------- + # Binary Operations DatetimeIndex and datetime-like + # TODO: A couple other tests belong in this section. Move them in + # A PR where there isn't already a giant diff. + + # ------------------------------------------------------------- + + def test_dta_add_sub_index(self, tz_naive_fixture): + # Check that DatetimeArray defers to Index classes + dti = date_range("20130101", periods=3, tz=tz_naive_fixture) + dta = dti.array + result = dta - dti + expected = dti - dti + tm.assert_index_equal(result, expected) + + tdi = result + result = dta + tdi + expected = dti + tdi + tm.assert_index_equal(result, expected) + + result = dta - tdi + expected = dti - tdi + tm.assert_index_equal(result, expected) + + def test_sub_dti_dti(self): + # previously performed setop (deprecated in 0.16.0), now changed to + # return subtraction -> TimeDeltaIndex (GH ...) + + dti = date_range("20130101", periods=3) + dti_tz = date_range("20130101", periods=3).tz_localize("US/Eastern") + expected = TimedeltaIndex([0, 0, 0]) + + result = dti - dti + tm.assert_index_equal(result, expected) + + result = dti_tz - dti_tz + tm.assert_index_equal(result, expected) + msg = "Cannot subtract tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg): + dti_tz - dti + + with pytest.raises(TypeError, match=msg): + dti - dti_tz + + # isub + dti -= dti + tm.assert_index_equal(dti, expected) + + # different length raises ValueError + dti1 = date_range("20130101", periods=3) + dti2 = date_range("20130101", periods=4) + msg = "cannot add indices of unequal length" + with pytest.raises(ValueError, match=msg): + dti1 - dti2 + + # NaN propagation + dti1 = DatetimeIndex(["2012-01-01", np.nan, "2012-01-03"]) + dti2 = DatetimeIndex(["2012-01-02", "2012-01-03", np.nan]) + expected = TimedeltaIndex(["1 days", np.nan, np.nan]) + result = dti2 - dti1 + tm.assert_index_equal(result, expected) + + # ------------------------------------------------------------------- + # TODO: Most of this block is moved from series or frame tests, needs + # cleanup, box-parametrization, and de-duplication + + @pytest.mark.parametrize("op", [operator.add, operator.sub]) + def test_timedelta64_equal_timedelta_supported_ops(self, op, box_with_array): + ser = Series( + [ + Timestamp("20130301"), + Timestamp("20130228 23:00:00"), + Timestamp("20130228 22:00:00"), + Timestamp("20130228 21:00:00"), + ] + ) + obj = box_with_array(ser) + + intervals = ["D", "h", "m", "s", "us"] + + def timedelta64(*args): + # see casting notes in NumPy gh-12927 + return np.sum(list(starmap(np.timedelta64, zip(args, intervals)))) + + for d, h, m, s, us in product(*([range(2)] * 5)): + nptd = timedelta64(d, h, m, s, us) + pytd = timedelta(days=d, hours=h, minutes=m, seconds=s, microseconds=us) + lhs = op(obj, nptd) + rhs = op(obj, pytd) + + tm.assert_equal(lhs, rhs) + + def test_ops_nat_mixed_datetime64_timedelta64(self): + # GH#11349 + timedelta_series = Series([NaT, Timedelta("1s")]) + datetime_series = Series([NaT, Timestamp("19900315")]) + nat_series_dtype_timedelta = Series([NaT, NaT], dtype="timedelta64[ns]") + nat_series_dtype_timestamp = Series([NaT, NaT], dtype="datetime64[ns]") + single_nat_dtype_datetime = Series([NaT], dtype="datetime64[ns]") + single_nat_dtype_timedelta = Series([NaT], dtype="timedelta64[ns]") + + # subtraction + tm.assert_series_equal( + datetime_series - single_nat_dtype_datetime, nat_series_dtype_timedelta + ) + + tm.assert_series_equal( + datetime_series - single_nat_dtype_timedelta, nat_series_dtype_timestamp + ) + tm.assert_series_equal( + -single_nat_dtype_timedelta + datetime_series, nat_series_dtype_timestamp + ) + + # without a Series wrapping the NaT, it is ambiguous + # whether it is a datetime64 or timedelta64 + # defaults to interpreting it as timedelta64 + tm.assert_series_equal( + nat_series_dtype_timestamp - single_nat_dtype_datetime, + nat_series_dtype_timedelta, + ) + + tm.assert_series_equal( + nat_series_dtype_timestamp - single_nat_dtype_timedelta, + nat_series_dtype_timestamp, + ) + tm.assert_series_equal( + -single_nat_dtype_timedelta + nat_series_dtype_timestamp, + nat_series_dtype_timestamp, + ) + msg = "cannot subtract a datelike" + with pytest.raises(TypeError, match=msg): + timedelta_series - single_nat_dtype_datetime + + # addition + tm.assert_series_equal( + nat_series_dtype_timestamp + single_nat_dtype_timedelta, + nat_series_dtype_timestamp, + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + nat_series_dtype_timestamp, + nat_series_dtype_timestamp, + ) + + tm.assert_series_equal( + nat_series_dtype_timestamp + single_nat_dtype_timedelta, + nat_series_dtype_timestamp, + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + nat_series_dtype_timestamp, + nat_series_dtype_timestamp, + ) + + tm.assert_series_equal( + nat_series_dtype_timedelta + single_nat_dtype_datetime, + nat_series_dtype_timestamp, + ) + tm.assert_series_equal( + single_nat_dtype_datetime + nat_series_dtype_timedelta, + nat_series_dtype_timestamp, + ) + + def test_ufunc_coercions(self): + idx = date_range("2011-01-01", periods=3, freq="2D", name="x") + + delta = np.timedelta64(1, "D") + exp = date_range("2011-01-02", periods=3, freq="2D", name="x") + for result in [idx + delta, np.add(idx, delta)]: + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, exp) + assert result.freq == "2D" + + exp = date_range("2010-12-31", periods=3, freq="2D", name="x") + + for result in [idx - delta, np.subtract(idx, delta)]: + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, exp) + assert result.freq == "2D" + + # When adding/subtracting an ndarray (which has no .freq), the result + # does not infer freq + idx = idx._with_freq(None) + delta = np.array( + [np.timedelta64(1, "D"), np.timedelta64(2, "D"), np.timedelta64(3, "D")] + ) + exp = DatetimeIndex(["2011-01-02", "2011-01-05", "2011-01-08"], name="x") + + for result in [idx + delta, np.add(idx, delta)]: + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + exp = DatetimeIndex(["2010-12-31", "2011-01-01", "2011-01-02"], name="x") + for result in [idx - delta, np.subtract(idx, delta)]: + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + def test_dti_add_series(self, tz_naive_fixture, names): + # GH#13905 + tz = tz_naive_fixture + index = DatetimeIndex( + ["2016-06-28 05:30", "2016-06-28 05:31"], tz=tz, name=names[0] + ) + ser = Series([Timedelta(seconds=5)] * 2, index=index, name=names[1]) + expected = Series(index + Timedelta(seconds=5), index=index, name=names[2]) + + # passing name arg isn't enough when names[2] is None + expected.name = names[2] + assert expected.dtype == index.dtype + result = ser + index + tm.assert_series_equal(result, expected) + result2 = index + ser + tm.assert_series_equal(result2, expected) + + expected = index + Timedelta(seconds=5) + result3 = ser.values + index + tm.assert_index_equal(result3, expected) + result4 = index + ser.values + tm.assert_index_equal(result4, expected) + + @pytest.mark.parametrize("op", [operator.add, roperator.radd, operator.sub]) + def test_dti_addsub_offset_arraylike( + self, tz_naive_fixture, names, op, index_or_series + ): + # GH#18849, GH#19744 + other_box = index_or_series + + tz = tz_naive_fixture + dti = date_range("2017-01-01", periods=2, tz=tz, name=names[0]) + other = other_box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)], name=names[1]) + + xbox = get_upcast_box(dti, other) + + with tm.assert_produces_warning(PerformanceWarning): + res = op(dti, other) + + expected = DatetimeIndex( + [op(dti[n], other[n]) for n in range(len(dti))], name=names[2], freq="infer" + ) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(res, expected) + + @pytest.mark.parametrize("other_box", [pd.Index, np.array]) + def test_dti_addsub_object_arraylike( + self, tz_naive_fixture, box_with_array, other_box + ): + tz = tz_naive_fixture + + dti = date_range("2017-01-01", periods=2, tz=tz) + dtarr = tm.box_expected(dti, box_with_array) + other = other_box([pd.offsets.MonthEnd(), Timedelta(days=4)]) + xbox = get_upcast_box(dtarr, other) + + expected = DatetimeIndex(["2017-01-31", "2017-01-06"], tz=tz_naive_fixture) + expected = tm.box_expected(expected, xbox) + + with tm.assert_produces_warning(PerformanceWarning): + result = dtarr + other + tm.assert_equal(result, expected) + + expected = DatetimeIndex(["2016-12-31", "2016-12-29"], tz=tz_naive_fixture) + expected = tm.box_expected(expected, xbox) + + with tm.assert_produces_warning(PerformanceWarning): + result = dtarr - other + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("years", [-1, 0, 1]) +@pytest.mark.parametrize("months", [-2, 0, 2]) +def test_shift_months(years, months): + dti = DatetimeIndex( + [ + Timestamp("2000-01-05 00:15:00"), + Timestamp("2000-01-31 00:23:00"), + Timestamp("2000-01-01"), + Timestamp("2000-02-29"), + Timestamp("2000-12-31"), + ] + ) + actual = DatetimeIndex(shift_months(dti.asi8, years * 12 + months)) + + raw = [x + pd.offsets.DateOffset(years=years, months=months) for x in dti] + expected = DatetimeIndex(raw) + tm.assert_index_equal(actual, expected) + + +def test_dt64arr_addsub_object_dtype_2d(): + # block-wise DataFrame operations will require operating on 2D + # DatetimeArray/TimedeltaArray, so check that specifically. + dti = date_range("1994-02-13", freq="2W", periods=4) + dta = dti._data.reshape((4, 1)) + + other = np.array([[pd.offsets.Day(n)] for n in range(4)]) + assert other.shape == dta.shape + + with tm.assert_produces_warning(PerformanceWarning): + result = dta + other + with tm.assert_produces_warning(PerformanceWarning): + expected = (dta[:, 0] + other[:, 0]).reshape(-1, 1) + + assert isinstance(result, DatetimeArray) + assert result.freq is None + tm.assert_numpy_array_equal(result._data, expected._data) + + with tm.assert_produces_warning(PerformanceWarning): + # Case where we expect to get a TimedeltaArray back + result2 = dta - dta.astype(object) + + assert isinstance(result2, TimedeltaArray) + assert result2.shape == (4, 1) + assert result2.freq is None + assert (result2.asi8 == 0).all() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_interval.py b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_interval.py new file mode 100644 index 0000000..88e3dca --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_interval.py @@ -0,0 +1,316 @@ +import operator + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_list_like + +import pandas as pd +from pandas import ( + Categorical, + Index, + Interval, + IntervalIndex, + Period, + Series, + Timedelta, + Timestamp, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.arrays import ( + BooleanArray, + IntervalArray, +) +from pandas.tests.arithmetic.common import get_upcast_box + + +@pytest.fixture( + params=[ + (Index([0, 2, 4, 4]), Index([1, 3, 5, 8])), + (Index([0.0, 1.0, 2.0, np.nan]), Index([1.0, 2.0, 3.0, np.nan])), + ( + timedelta_range("0 days", periods=3).insert(3, pd.NaT), + timedelta_range("1 day", periods=3).insert(3, pd.NaT), + ), + ( + date_range("20170101", periods=3).insert(3, pd.NaT), + date_range("20170102", periods=3).insert(3, pd.NaT), + ), + ( + date_range("20170101", periods=3, tz="US/Eastern").insert(3, pd.NaT), + date_range("20170102", periods=3, tz="US/Eastern").insert(3, pd.NaT), + ), + ], + ids=lambda x: str(x[0].dtype), +) +def left_right_dtypes(request): + """ + Fixture for building an IntervalArray from various dtypes + """ + return request.param + + +@pytest.fixture +def interval_array(left_right_dtypes): + """ + Fixture to generate an IntervalArray of various dtypes containing NA if possible + """ + left, right = left_right_dtypes + return IntervalArray.from_arrays(left, right) + + +def create_categorical_intervals(left, right, closed="right"): + return Categorical(IntervalIndex.from_arrays(left, right, closed)) + + +def create_series_intervals(left, right, closed="right"): + return Series(IntervalArray.from_arrays(left, right, closed)) + + +def create_series_categorical_intervals(left, right, closed="right"): + return Series(Categorical(IntervalIndex.from_arrays(left, right, closed))) + + +class TestComparison: + @pytest.fixture(params=[operator.eq, operator.ne]) + def op(self, request): + return request.param + + @pytest.fixture( + params=[ + IntervalArray.from_arrays, + IntervalIndex.from_arrays, + create_categorical_intervals, + create_series_intervals, + create_series_categorical_intervals, + ], + ids=[ + "IntervalArray", + "IntervalIndex", + "Categorical[Interval]", + "Series[Interval]", + "Series[Categorical[Interval]]", + ], + ) + def interval_constructor(self, request): + """ + Fixture for all pandas native interval constructors. + To be used as the LHS of IntervalArray comparisons. + """ + return request.param + + def elementwise_comparison(self, op, interval_array, other): + """ + Helper that performs elementwise comparisons between `array` and `other` + """ + other = other if is_list_like(other) else [other] * len(interval_array) + expected = np.array([op(x, y) for x, y in zip(interval_array, other)]) + if isinstance(other, Series): + return Series(expected, index=other.index) + return expected + + def test_compare_scalar_interval(self, op, interval_array): + # matches first interval + other = interval_array[0] + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_numpy_array_equal(result, expected) + + # matches on a single endpoint but not both + other = Interval(interval_array.left[0], interval_array.right[1]) + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_numpy_array_equal(result, expected) + + def test_compare_scalar_interval_mixed_closed(self, op, closed, other_closed): + interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed) + other = Interval(0, 1, closed=other_closed) + + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_numpy_array_equal(result, expected) + + def test_compare_scalar_na( + self, op, interval_array, nulls_fixture, box_with_array, request + ): + box = box_with_array + + if box is pd.DataFrame: + if interval_array.dtype.subtype.kind not in "iuf": + mark = pytest.mark.xfail( + reason="raises on DataFrame.transpose (would be fixed by EA2D)" + ) + request.node.add_marker(mark) + + obj = tm.box_expected(interval_array, box) + result = op(obj, nulls_fixture) + + if nulls_fixture is pd.NA: + # GH#31882 + exp = np.ones(interval_array.shape, dtype=bool) + expected = BooleanArray(exp, exp) + else: + expected = self.elementwise_comparison(op, interval_array, nulls_fixture) + + if not (box is Index and nulls_fixture is pd.NA): + # don't cast expected from BooleanArray to ndarray[object] + xbox = get_upcast_box(obj, nulls_fixture, True) + expected = tm.box_expected(expected, xbox) + + tm.assert_equal(result, expected) + + rev = op(nulls_fixture, obj) + tm.assert_equal(rev, expected) + + @pytest.mark.parametrize( + "other", + [ + 0, + 1.0, + True, + "foo", + Timestamp("2017-01-01"), + Timestamp("2017-01-01", tz="US/Eastern"), + Timedelta("0 days"), + Period("2017-01-01", "D"), + ], + ) + def test_compare_scalar_other(self, op, interval_array, other): + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_numpy_array_equal(result, expected) + + def test_compare_list_like_interval(self, op, interval_array, interval_constructor): + # same endpoints + other = interval_constructor(interval_array.left, interval_array.right) + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_equal(result, expected) + + # different endpoints + other = interval_constructor( + interval_array.left[::-1], interval_array.right[::-1] + ) + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_equal(result, expected) + + # all nan endpoints + other = interval_constructor([np.nan] * 4, [np.nan] * 4) + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_equal(result, expected) + + def test_compare_list_like_interval_mixed_closed( + self, op, interval_constructor, closed, other_closed + ): + interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed) + other = interval_constructor(range(2), range(1, 3), closed=other_closed) + + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + ( + Interval(0, 1), + Interval(Timedelta("1 day"), Timedelta("2 days")), + Interval(4, 5, "both"), + Interval(10, 20, "neither"), + ), + (0, 1.5, Timestamp("20170103"), np.nan), + ( + Timestamp("20170102", tz="US/Eastern"), + Timedelta("2 days"), + "baz", + pd.NaT, + ), + ], + ) + def test_compare_list_like_object(self, op, interval_array, other): + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_numpy_array_equal(result, expected) + + def test_compare_list_like_nan(self, op, interval_array, nulls_fixture): + other = [nulls_fixture] * 4 + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + np.arange(4, dtype="int64"), + np.arange(4, dtype="float64"), + date_range("2017-01-01", periods=4), + date_range("2017-01-01", periods=4, tz="US/Eastern"), + timedelta_range("0 days", periods=4), + period_range("2017-01-01", periods=4, freq="D"), + Categorical(list("abab")), + Categorical(date_range("2017-01-01", periods=4)), + pd.array(list("abcd")), + pd.array(["foo", 3.14, None, object()], dtype=object), + ], + ids=lambda x: str(x.dtype), + ) + def test_compare_list_like_other(self, op, interval_array, other): + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("length", [1, 3, 5]) + @pytest.mark.parametrize("other_constructor", [IntervalArray, list]) + def test_compare_length_mismatch_errors(self, op, other_constructor, length): + interval_array = IntervalArray.from_arrays(range(4), range(1, 5)) + other = other_constructor([Interval(0, 1)] * length) + with pytest.raises(ValueError, match="Lengths must match to compare"): + op(interval_array, other) + + @pytest.mark.parametrize( + "constructor, expected_type, assert_func", + [ + (IntervalIndex, np.array, tm.assert_numpy_array_equal), + (Series, Series, tm.assert_series_equal), + ], + ) + def test_index_series_compat(self, op, constructor, expected_type, assert_func): + # IntervalIndex/Series that rely on IntervalArray for comparisons + breaks = range(4) + index = constructor(IntervalIndex.from_breaks(breaks)) + + # scalar comparisons + other = index[0] + result = op(index, other) + expected = expected_type(self.elementwise_comparison(op, index, other)) + assert_func(result, expected) + + other = breaks[0] + result = op(index, other) + expected = expected_type(self.elementwise_comparison(op, index, other)) + assert_func(result, expected) + + # list-like comparisons + other = IntervalArray.from_breaks(breaks) + result = op(index, other) + expected = expected_type(self.elementwise_comparison(op, index, other)) + assert_func(result, expected) + + other = [index[0], breaks[0], "foo"] + result = op(index, other) + expected = expected_type(self.elementwise_comparison(op, index, other)) + assert_func(result, expected) + + @pytest.mark.parametrize("scalars", ["a", False, 1, 1.0, None]) + def test_comparison_operations(self, scalars): + # GH #28981 + expected = Series([False, False]) + s = Series([Interval(0, 1), Interval(1, 2)], dtype="interval") + result = s == scalars + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_numeric.py b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_numeric.py new file mode 100644 index 0000000..bc7a929 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_numeric.py @@ -0,0 +1,1402 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +# Specifically for numeric dtypes +from __future__ import annotations + +from collections import abc +from decimal import Decimal +from itertools import combinations +import operator +from typing import Any + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + RangeIndex, + Series, + Timedelta, + TimedeltaIndex, + array, +) +import pandas._testing as tm +from pandas.core import ops +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) +from pandas.core.computation import expressions as expr +from pandas.tests.arithmetic.common import ( + assert_invalid_addsub_type, + assert_invalid_comparison, +) + + +@pytest.fixture(params=[Index, Series, tm.to_array]) +def box_pandas_1d_array(request): + """ + Fixture to test behavior for Index, Series and tm.to_array classes + """ + return request.param + + +def adjust_negative_zero(zero, expected): + """ + Helper to adjust the expected result if we are dividing by -0.0 + as opposed to 0.0 + """ + if np.signbit(np.array(zero)).any(): + # All entries in the `zero` fixture should be either + # all-negative or no-negative. + assert np.signbit(np.array(zero)).all() + + expected *= -1 + + return expected + + +def compare_op(series, other, op): + left = np.abs(series) if op in (ops.rpow, operator.pow) else series + right = np.abs(other) if op in (ops.rpow, operator.pow) else other + + cython_or_numpy = op(left, right) + python = left.combine(right, op) + if isinstance(other, Series) and not other.index.equals(series.index): + python.index = python.index._with_freq(None) + tm.assert_series_equal(cython_or_numpy, python) + + +# TODO: remove this kludge once mypy stops giving false positives here +# List comprehension has incompatible type List[PandasObject]; expected List[RangeIndex] +# See GH#29725 +ser_or_index: list[Any] = [Series, Index] +lefts: list[Any] = [RangeIndex(10, 40, 10)] +lefts.extend( + [ + cls([10, 20, 30], dtype=dtype) + for dtype in ["i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f2", "f4", "f8"] + for cls in ser_or_index + ] +) + +# ------------------------------------------------------------------ +# Comparisons + + +class TestNumericComparisons: + def test_operator_series_comparison_zerorank(self): + # GH#13006 + result = np.float64(0) > Series([1, 2, 3]) + expected = 0.0 > Series([1, 2, 3]) + tm.assert_series_equal(result, expected) + result = Series([1, 2, 3]) < np.float64(0) + expected = Series([1, 2, 3]) < 0.0 + tm.assert_series_equal(result, expected) + result = np.array([0, 1, 2])[0] > Series([0, 1, 2]) + expected = 0.0 > Series([1, 2, 3]) + tm.assert_series_equal(result, expected) + + def test_df_numeric_cmp_dt64_raises(self, box_with_array, fixed_now_ts): + # GH#8932, GH#22163 + ts = fixed_now_ts + obj = np.array(range(5)) + obj = tm.box_expected(obj, box_with_array) + + assert_invalid_comparison(obj, ts, box_with_array) + + def test_compare_invalid(self): + # GH#8058 + # ops testing + a = Series(np.random.randn(5), name=0) + b = Series(np.random.randn(5)) + b.name = pd.Timestamp("2000-01-01") + tm.assert_series_equal(a / b, 1 / (b / a)) + + def test_numeric_cmp_string_numexpr_path(self, box_with_array): + # GH#36377, GH#35700 + box = box_with_array + xbox = box if box is not Index else np.ndarray + + obj = Series(np.random.randn(10 ** 5)) + obj = tm.box_expected(obj, box, transpose=False) + + result = obj == "a" + + expected = Series(np.zeros(10 ** 5, dtype=bool)) + expected = tm.box_expected(expected, xbox, transpose=False) + tm.assert_equal(result, expected) + + result = obj != "a" + tm.assert_equal(result, ~expected) + + msg = "Invalid comparison between dtype=float64 and str" + with pytest.raises(TypeError, match=msg): + obj < "a" + + +# ------------------------------------------------------------------ +# Numeric dtypes Arithmetic with Datetime/Timedelta Scalar + + +class TestNumericArraylikeArithmeticWithDatetimeLike: + @pytest.mark.parametrize("box_cls", [np.array, Index, Series]) + @pytest.mark.parametrize( + "left", lefts, ids=lambda x: type(x).__name__ + str(x.dtype) + ) + def test_mul_td64arr(self, left, box_cls): + # GH#22390 + right = np.array([1, 2, 3], dtype="m8[s]") + right = box_cls(right) + + expected = TimedeltaIndex(["10s", "40s", "90s"]) + if isinstance(left, Series) or box_cls is Series: + expected = Series(expected) + + result = left * right + tm.assert_equal(result, expected) + + result = right * left + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("box_cls", [np.array, Index, Series]) + @pytest.mark.parametrize( + "left", lefts, ids=lambda x: type(x).__name__ + str(x.dtype) + ) + def test_div_td64arr(self, left, box_cls): + # GH#22390 + right = np.array([10, 40, 90], dtype="m8[s]") + right = box_cls(right) + + expected = TimedeltaIndex(["1s", "2s", "3s"]) + if isinstance(left, Series) or box_cls is Series: + expected = Series(expected) + + result = right / left + tm.assert_equal(result, expected) + + result = right // left + tm.assert_equal(result, expected) + + msg = "Cannot divide" + with pytest.raises(TypeError, match=msg): + left / right + + with pytest.raises(TypeError, match=msg): + left // right + + # TODO: also test Tick objects; + # see test_numeric_arr_rdiv_tdscalar for note on these failing + @pytest.mark.parametrize( + "scalar_td", + [ + Timedelta(days=1), + Timedelta(days=1).to_timedelta64(), + Timedelta(days=1).to_pytimedelta(), + Timedelta(days=1).to_timedelta64().astype("timedelta64[s]"), + Timedelta(days=1).to_timedelta64().astype("timedelta64[ms]"), + ], + ids=lambda x: type(x).__name__, + ) + def test_numeric_arr_mul_tdscalar(self, scalar_td, numeric_idx, box_with_array): + # GH#19333 + box = box_with_array + index = numeric_idx + expected = TimedeltaIndex([Timedelta(days=n) for n in range(len(index))]) + + index = tm.box_expected(index, box) + expected = tm.box_expected(expected, box) + + result = index * scalar_td + tm.assert_equal(result, expected) + + commute = scalar_td * index + tm.assert_equal(commute, expected) + + @pytest.mark.parametrize( + "scalar_td", + [ + Timedelta(days=1), + Timedelta(days=1).to_timedelta64(), + Timedelta(days=1).to_pytimedelta(), + ], + ids=lambda x: type(x).__name__, + ) + @pytest.mark.parametrize("dtype", [np.int64, np.float64]) + def test_numeric_arr_mul_tdscalar_numexpr_path( + self, dtype, scalar_td, box_with_array + ): + # GH#44772 for the float64 case + box = box_with_array + + arr_i8 = np.arange(2 * 10 ** 4).astype(np.int64, copy=False) + arr = arr_i8.astype(dtype, copy=False) + obj = tm.box_expected(arr, box, transpose=False) + + expected = arr_i8.view("timedelta64[D]").astype("timedelta64[ns]") + expected = tm.box_expected(expected, box, transpose=False) + + result = obj * scalar_td + tm.assert_equal(result, expected) + + result = scalar_td * obj + tm.assert_equal(result, expected) + + def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array): + box = box_with_array + + index = numeric_idx[1:3] + + expected = TimedeltaIndex(["3 Days", "36 Hours"]) + + index = tm.box_expected(index, box) + expected = tm.box_expected(expected, box) + + result = three_days / index + tm.assert_equal(result, expected) + + msg = "cannot use operands with types dtype" + with pytest.raises(TypeError, match=msg): + index / three_days + + @pytest.mark.parametrize( + "other", + [ + Timedelta(hours=31), + Timedelta(hours=31).to_pytimedelta(), + Timedelta(hours=31).to_timedelta64(), + Timedelta(hours=31).to_timedelta64().astype("m8[h]"), + np.timedelta64("NaT"), + np.timedelta64("NaT", "D"), + pd.offsets.Minute(3), + pd.offsets.Second(0), + # GH#28080 numeric+datetimelike should raise; Timestamp used + # to raise NullFrequencyError but that behavior was removed in 1.0 + pd.Timestamp("2021-01-01", tz="Asia/Tokyo"), + pd.Timestamp("2021-01-01"), + pd.Timestamp("2021-01-01").to_pydatetime(), + pd.Timestamp("2021-01-01", tz="UTC").to_pydatetime(), + pd.Timestamp("2021-01-01").to_datetime64(), + np.datetime64("NaT", "ns"), + pd.NaT, + ], + ) + def test_add_sub_datetimedeltalike_invalid( + self, numeric_idx, other, box_with_array + ): + box = box_with_array + + left = tm.box_expected(numeric_idx, box) + msg = "|".join( + [ + "unsupported operand type", + "Addition/subtraction of integers and integer-arrays", + "Instead of adding/subtracting", + "cannot use operands with types dtype", + "Concatenation operation is not implemented for NumPy arrays", + "Cannot (add|subtract) NaT (to|from) ndarray", + # pd.array vs np.datetime64 case + r"operand type\(s\) all returned NotImplemented from __array_ufunc__", + "can only perform ops with numeric values", + "cannot subtract DatetimeArray from ndarray", + ] + ) + assert_invalid_addsub_type(left, other, msg) + + +# ------------------------------------------------------------------ +# Arithmetic + + +class TestDivisionByZero: + def test_div_zero(self, zero, numeric_idx): + idx = numeric_idx + + expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], dtype=np.float64) + # We only adjust for Index, because Series does not yet apply + # the adjustment correctly. + expected2 = adjust_negative_zero(zero, expected) + + result = idx / zero + tm.assert_index_equal(result, expected2) + ser_compat = Series(idx).astype("i8") / np.array(zero).astype("i8") + tm.assert_series_equal(ser_compat, Series(expected)) + + def test_floordiv_zero(self, zero, numeric_idx): + idx = numeric_idx + + expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], dtype=np.float64) + # We only adjust for Index, because Series does not yet apply + # the adjustment correctly. + expected2 = adjust_negative_zero(zero, expected) + + result = idx // zero + tm.assert_index_equal(result, expected2) + ser_compat = Series(idx).astype("i8") // np.array(zero).astype("i8") + tm.assert_series_equal(ser_compat, Series(expected)) + + def test_mod_zero(self, zero, numeric_idx): + idx = numeric_idx + + expected = Index([np.nan, np.nan, np.nan, np.nan, np.nan], dtype=np.float64) + result = idx % zero + tm.assert_index_equal(result, expected) + ser_compat = Series(idx).astype("i8") % np.array(zero).astype("i8") + tm.assert_series_equal(ser_compat, Series(result)) + + def test_divmod_zero(self, zero, numeric_idx): + idx = numeric_idx + + exleft = Index([np.nan, np.inf, np.inf, np.inf, np.inf], dtype=np.float64) + exright = Index([np.nan, np.nan, np.nan, np.nan, np.nan], dtype=np.float64) + exleft = adjust_negative_zero(zero, exleft) + + result = divmod(idx, zero) + tm.assert_index_equal(result[0], exleft) + tm.assert_index_equal(result[1], exright) + + @pytest.mark.parametrize("op", [operator.truediv, operator.floordiv]) + def test_div_negative_zero(self, zero, numeric_idx, op): + # Check that -1 / -0.0 returns np.inf, not -np.inf + if isinstance(numeric_idx, UInt64Index): + return + idx = numeric_idx - 3 + + expected = Index([-np.inf, -np.inf, -np.inf, np.nan, np.inf], dtype=np.float64) + expected = adjust_negative_zero(zero, expected) + + result = op(idx, zero) + tm.assert_index_equal(result, expected) + + # ------------------------------------------------------------------ + + @pytest.mark.parametrize("dtype1", [np.int64, np.float64, np.uint64]) + def test_ser_div_ser( + self, + switch_numexpr_min_elements, + dtype1, + any_real_numpy_dtype, + ): + # no longer do integer div for any ops, but deal with the 0's + dtype2 = any_real_numpy_dtype + + first = Series([3, 4, 5, 8], name="first").astype(dtype1) + second = Series([0, 0, 0, 3], name="second").astype(dtype2) + + with np.errstate(all="ignore"): + expected = Series( + first.values.astype(np.float64) / second.values, + dtype="float64", + name=None, + ) + expected.iloc[0:3] = np.inf + if first.dtype == "int64" and second.dtype == "float32": + # when using numexpr, the casting rules are slightly different + # and int64/float32 combo results in float32 instead of float64 + if expr.USE_NUMEXPR and switch_numexpr_min_elements == 0: + expected = expected.astype("float32") + + result = first / second + tm.assert_series_equal(result, expected) + assert not result.equals(second / first) + + @pytest.mark.parametrize("dtype1", [np.int64, np.float64, np.uint64]) + def test_ser_divmod_zero(self, dtype1, any_real_numpy_dtype): + # GH#26987 + dtype2 = any_real_numpy_dtype + left = Series([1, 1]).astype(dtype1) + right = Series([0, 2]).astype(dtype2) + + # GH#27321 pandas convention is to set 1 // 0 to np.inf, as opposed + # to numpy which sets to np.nan; patch `expected[0]` below + expected = left // right, left % right + expected = list(expected) + expected[0] = expected[0].astype(np.float64) + expected[0][0] = np.inf + result = divmod(left, right) + + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + # rdivmod case + result = divmod(left.values, right) + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + def test_ser_divmod_inf(self): + left = Series([np.inf, 1.0]) + right = Series([np.inf, 2.0]) + + expected = left // right, left % right + result = divmod(left, right) + + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + # rdivmod case + result = divmod(left.values, right) + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + def test_rdiv_zero_compat(self): + # GH#8674 + zero_array = np.array([0] * 5) + data = np.random.randn(5) + expected = Series([0.0] * 5) + + result = zero_array / Series(data) + tm.assert_series_equal(result, expected) + + result = Series(zero_array) / data + tm.assert_series_equal(result, expected) + + result = Series(zero_array) / Series(data) + tm.assert_series_equal(result, expected) + + def test_div_zero_inf_signs(self): + # GH#9144, inf signing + ser = Series([-1, 0, 1], name="first") + expected = Series([-np.inf, np.nan, np.inf], name="first") + + result = ser / 0 + tm.assert_series_equal(result, expected) + + def test_rdiv_zero(self): + # GH#9144 + ser = Series([-1, 0, 1], name="first") + expected = Series([0.0, np.nan, 0.0], name="first") + + result = 0 / ser + tm.assert_series_equal(result, expected) + + def test_floordiv_div(self): + # GH#9144 + ser = Series([-1, 0, 1], name="first") + + result = ser // 0 + expected = Series([-np.inf, np.nan, np.inf], name="first") + tm.assert_series_equal(result, expected) + + def test_df_div_zero_df(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + result = df / df + + first = Series([1.0, 1.0, 1.0, 1.0]) + second = Series([np.nan, np.nan, np.nan, 1]) + expected = pd.DataFrame({"first": first, "second": second}) + tm.assert_frame_equal(result, expected) + + def test_df_div_zero_array(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + + first = Series([1.0, 1.0, 1.0, 1.0]) + second = Series([np.nan, np.nan, np.nan, 1]) + expected = pd.DataFrame({"first": first, "second": second}) + + with np.errstate(all="ignore"): + arr = df.values.astype("float") / df.values + result = pd.DataFrame(arr, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + def test_df_div_zero_int(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + + result = df / 0 + expected = pd.DataFrame(np.inf, index=df.index, columns=df.columns) + expected.iloc[0:3, 1] = np.nan + tm.assert_frame_equal(result, expected) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all="ignore"): + arr = df.values.astype("float64") / 0 + result2 = pd.DataFrame(arr, index=df.index, columns=df.columns) + tm.assert_frame_equal(result2, expected) + + def test_df_div_zero_series_does_not_commute(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame(np.random.randn(10, 5)) + ser = df[0] + res = ser / df + res2 = df / ser + assert not res.fillna(0).equals(res2.fillna(0)) + + # ------------------------------------------------------------------ + # Mod By Zero + + def test_df_mod_zero_df(self, using_array_manager): + # GH#3590, modulo as ints + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + # this is technically wrong, as the integer portion is coerced to float + first = Series([0, 0, 0, 0]) + if not using_array_manager: + # INFO(ArrayManager) BlockManager doesn't preserve dtype per column + # while ArrayManager performs op column-wisedoes and thus preserves + # dtype if possible + first = first.astype("float64") + second = Series([np.nan, np.nan, np.nan, 0]) + expected = pd.DataFrame({"first": first, "second": second}) + result = df % df + tm.assert_frame_equal(result, expected) + + # GH#38939 If we dont pass copy=False, df is consolidated and + # result["first"] is float64 instead of int64 + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}, copy=False) + first = Series([0, 0, 0, 0], dtype="int64") + second = Series([np.nan, np.nan, np.nan, 0]) + expected = pd.DataFrame({"first": first, "second": second}) + result = df % df + tm.assert_frame_equal(result, expected) + + def test_df_mod_zero_array(self): + # GH#3590, modulo as ints + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + + # this is technically wrong, as the integer portion is coerced to float + # ### + first = Series([0, 0, 0, 0], dtype="float64") + second = Series([np.nan, np.nan, np.nan, 0]) + expected = pd.DataFrame({"first": first, "second": second}) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all="ignore"): + arr = df.values % df.values + result2 = pd.DataFrame(arr, index=df.index, columns=df.columns, dtype="float64") + result2.iloc[0:3, 1] = np.nan + tm.assert_frame_equal(result2, expected) + + def test_df_mod_zero_int(self): + # GH#3590, modulo as ints + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + + result = df % 0 + expected = pd.DataFrame(np.nan, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all="ignore"): + arr = df.values.astype("float64") % 0 + result2 = pd.DataFrame(arr, index=df.index, columns=df.columns) + tm.assert_frame_equal(result2, expected) + + def test_df_mod_zero_series_does_not_commute(self): + # GH#3590, modulo as ints + # not commutative with series + df = pd.DataFrame(np.random.randn(10, 5)) + ser = df[0] + res = ser % df + res2 = df % ser + assert not res.fillna(0).equals(res2.fillna(0)) + + +class TestMultiplicationDivision: + # __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__ + # for non-timestamp/timedelta/period dtypes + + def test_divide_decimal(self, box_with_array): + # resolves issue GH#9787 + box = box_with_array + ser = Series([Decimal(10)]) + expected = Series([Decimal(5)]) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = ser / Decimal(2) + + tm.assert_equal(result, expected) + + result = ser // Decimal(2) + tm.assert_equal(result, expected) + + def test_div_equiv_binop(self): + # Test Series.div as well as Series.__div__ + # float/integer issue + # GH#7785 + first = Series([1, 0], name="first") + second = Series([-0.01, -0.02], name="second") + expected = Series([-0.01, -np.inf]) + + result = second.div(first) + tm.assert_series_equal(result, expected, check_names=False) + + result = second / first + tm.assert_series_equal(result, expected) + + def test_div_int(self, numeric_idx): + idx = numeric_idx + result = idx / 1 + expected = idx.astype("float64") + tm.assert_index_equal(result, expected) + + result = idx / 2 + expected = Index(idx.values / 2) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("op", [operator.mul, ops.rmul, operator.floordiv]) + def test_mul_int_identity(self, op, numeric_idx, box_with_array): + idx = numeric_idx + idx = tm.box_expected(idx, box_with_array) + + result = op(idx, 1) + tm.assert_equal(result, idx) + + def test_mul_int_array(self, numeric_idx): + idx = numeric_idx + didx = idx * idx + + result = idx * np.array(5, dtype="int64") + tm.assert_index_equal(result, idx * 5) + + arr_dtype = "uint64" if isinstance(idx, UInt64Index) else "int64" + result = idx * np.arange(5, dtype=arr_dtype) + tm.assert_index_equal(result, didx) + + def test_mul_int_series(self, numeric_idx): + idx = numeric_idx + didx = idx * idx + + arr_dtype = "uint64" if isinstance(idx, UInt64Index) else "int64" + result = idx * Series(np.arange(5, dtype=arr_dtype)) + tm.assert_series_equal(result, Series(didx)) + + def test_mul_float_series(self, numeric_idx): + idx = numeric_idx + rng5 = np.arange(5, dtype="float64") + + result = idx * Series(rng5 + 0.1) + expected = Series(rng5 * (rng5 + 0.1)) + tm.assert_series_equal(result, expected) + + def test_mul_index(self, numeric_idx): + idx = numeric_idx + + result = idx * idx + tm.assert_index_equal(result, idx ** 2) + + def test_mul_datelike_raises(self, numeric_idx): + idx = numeric_idx + msg = "cannot perform __rmul__ with this index type" + with pytest.raises(TypeError, match=msg): + idx * pd.date_range("20130101", periods=5) + + def test_mul_size_mismatch_raises(self, numeric_idx): + idx = numeric_idx + msg = "operands could not be broadcast together" + with pytest.raises(ValueError, match=msg): + idx * idx[0:3] + with pytest.raises(ValueError, match=msg): + idx * np.array([1, 2]) + + @pytest.mark.parametrize("op", [operator.pow, ops.rpow]) + def test_pow_float(self, op, numeric_idx, box_with_array): + # test power calculations both ways, GH#14973 + box = box_with_array + idx = numeric_idx + expected = Float64Index(op(idx.values, 2.0)) + + idx = tm.box_expected(idx, box) + expected = tm.box_expected(expected, box) + + result = op(idx, 2.0) + tm.assert_equal(result, expected) + + def test_modulo(self, numeric_idx, box_with_array): + # GH#9244 + box = box_with_array + idx = numeric_idx + expected = Index(idx.values % 2) + + idx = tm.box_expected(idx, box) + expected = tm.box_expected(expected, box) + + result = idx % 2 + tm.assert_equal(result, expected) + + def test_divmod_scalar(self, numeric_idx): + idx = numeric_idx + + result = divmod(idx, 2) + with np.errstate(all="ignore"): + div, mod = divmod(idx.values, 2) + + expected = Index(div), Index(mod) + for r, e in zip(result, expected): + tm.assert_index_equal(r, e) + + def test_divmod_ndarray(self, numeric_idx): + idx = numeric_idx + other = np.ones(idx.values.shape, dtype=idx.values.dtype) * 2 + + result = divmod(idx, other) + with np.errstate(all="ignore"): + div, mod = divmod(idx.values, other) + + expected = Index(div), Index(mod) + for r, e in zip(result, expected): + tm.assert_index_equal(r, e) + + def test_divmod_series(self, numeric_idx): + idx = numeric_idx + other = np.ones(idx.values.shape, dtype=idx.values.dtype) * 2 + + result = divmod(idx, Series(other)) + with np.errstate(all="ignore"): + div, mod = divmod(idx.values, other) + + expected = Series(div), Series(mod) + for r, e in zip(result, expected): + tm.assert_series_equal(r, e) + + @pytest.mark.parametrize("other", [np.nan, 7, -23, 2.718, -3.14, np.inf]) + def test_ops_np_scalar(self, other): + vals = np.random.randn(5, 3) + f = lambda x: pd.DataFrame( + x, index=list("ABCDE"), columns=["jim", "joe", "jolie"] + ) + + df = f(vals) + + tm.assert_frame_equal(df / np.array(other), f(vals / other)) + tm.assert_frame_equal(np.array(other) * df, f(vals * other)) + tm.assert_frame_equal(df + np.array(other), f(vals + other)) + tm.assert_frame_equal(np.array(other) - df, f(other - vals)) + + # TODO: This came from series.test.test_operators, needs cleanup + def test_operators_frame(self): + # rpow does not work with DataFrame + ts = tm.makeTimeSeries() + ts.name = "ts" + + df = pd.DataFrame({"A": ts}) + + tm.assert_series_equal(ts + ts, ts + df["A"], check_names=False) + tm.assert_series_equal(ts ** ts, ts ** df["A"], check_names=False) + tm.assert_series_equal(ts < ts, ts < df["A"], check_names=False) + tm.assert_series_equal(ts / ts, ts / df["A"], check_names=False) + + # TODO: this came from tests.series.test_analytics, needs cleanup and + # de-duplication with test_modulo above + def test_modulo2(self): + with np.errstate(all="ignore"): + + # GH#3590, modulo as ints + p = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + result = p["first"] % p["second"] + expected = Series(p["first"].values % p["second"].values, dtype="float64") + expected.iloc[0:3] = np.nan + tm.assert_series_equal(result, expected) + + result = p["first"] % 0 + expected = Series(np.nan, index=p.index, name="first") + tm.assert_series_equal(result, expected) + + p = p.astype("float64") + result = p["first"] % p["second"] + expected = Series(p["first"].values % p["second"].values) + tm.assert_series_equal(result, expected) + + p = p.astype("float64") + result = p["first"] % p["second"] + result2 = p["second"] % p["first"] + assert not result.equals(result2) + + def test_modulo_zero_int(self): + # GH#9144 + with np.errstate(all="ignore"): + s = Series([0, 1]) + + result = s % 0 + expected = Series([np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + result = 0 % s + expected = Series([np.nan, 0.0]) + tm.assert_series_equal(result, expected) + + +class TestAdditionSubtraction: + # __add__, __sub__, __radd__, __rsub__, __iadd__, __isub__ + # for non-timestamp/timedelta/period dtypes + + @pytest.mark.parametrize( + "first, second, expected", + [ + ( + Series([1, 2, 3], index=list("ABC"), name="x"), + Series([2, 2, 2], index=list("ABD"), name="x"), + Series([3.0, 4.0, np.nan, np.nan], index=list("ABCD"), name="x"), + ), + ( + Series([1, 2, 3], index=list("ABC"), name="x"), + Series([2, 2, 2, 2], index=list("ABCD"), name="x"), + Series([3, 4, 5, np.nan], index=list("ABCD"), name="x"), + ), + ], + ) + def test_add_series(self, first, second, expected): + # GH#1134 + tm.assert_series_equal(first + second, expected) + tm.assert_series_equal(second + first, expected) + + @pytest.mark.parametrize( + "first, second, expected", + [ + ( + pd.DataFrame({"x": [1, 2, 3]}, index=list("ABC")), + pd.DataFrame({"x": [2, 2, 2]}, index=list("ABD")), + pd.DataFrame({"x": [3.0, 4.0, np.nan, np.nan]}, index=list("ABCD")), + ), + ( + pd.DataFrame({"x": [1, 2, 3]}, index=list("ABC")), + pd.DataFrame({"x": [2, 2, 2, 2]}, index=list("ABCD")), + pd.DataFrame({"x": [3, 4, 5, np.nan]}, index=list("ABCD")), + ), + ], + ) + def test_add_frames(self, first, second, expected): + # GH#1134 + tm.assert_frame_equal(first + second, expected) + tm.assert_frame_equal(second + first, expected) + + # TODO: This came from series.test.test_operators, needs cleanup + def test_series_frame_radd_bug(self, fixed_now_ts): + # GH#353 + vals = Series(tm.rands_array(5, 10)) + result = "foo_" + vals + expected = vals.map(lambda x: "foo_" + x) + tm.assert_series_equal(result, expected) + + frame = pd.DataFrame({"vals": vals}) + result = "foo_" + frame + expected = pd.DataFrame({"vals": vals.map(lambda x: "foo_" + x)}) + tm.assert_frame_equal(result, expected) + + ts = tm.makeTimeSeries() + ts.name = "ts" + + # really raise this time + fix_now = fixed_now_ts.to_pydatetime() + msg = "|".join( + [ + "unsupported operand type", + # wrong error message, see https://github.com/numpy/numpy/issues/18832 + "Concatenation operation", + ] + ) + with pytest.raises(TypeError, match=msg): + fix_now + ts + + with pytest.raises(TypeError, match=msg): + ts + fix_now + + # TODO: This came from series.test.test_operators, needs cleanup + def test_datetime64_with_index(self): + # arithmetic integer ops with an index + ser = Series(np.random.randn(5)) + expected = ser - ser.index.to_series() + result = ser - ser.index + tm.assert_series_equal(result, expected) + + # GH#4629 + # arithmetic datetime64 ops with an index + ser = Series( + pd.date_range("20130101", periods=5), + index=pd.date_range("20130101", periods=5), + ) + expected = ser - ser.index.to_series() + result = ser - ser.index + tm.assert_series_equal(result, expected) + + msg = "cannot subtract period" + with pytest.raises(TypeError, match=msg): + # GH#18850 + result = ser - ser.index.to_period() + + df = pd.DataFrame( + np.random.randn(5, 2), index=pd.date_range("20130101", periods=5) + ) + df["date"] = pd.Timestamp("20130102") + df["expected"] = df["date"] - df.index.to_series() + df["result"] = df["date"] - df.index + tm.assert_series_equal(df["result"], df["expected"], check_names=False) + + # TODO: taken from tests.frame.test_operators, needs cleanup + def test_frame_operators(self, float_frame): + frame = float_frame + frame2 = pd.DataFrame(float_frame, columns=["D", "C", "B", "A"]) + + garbage = np.random.random(4) + colSeries = Series(garbage, index=np.array(frame.columns)) + + idSum = frame + frame + seriesSum = frame + colSeries + + for col, series in idSum.items(): + for idx, val in series.items(): + origVal = frame[col][idx] * 2 + if not np.isnan(val): + assert val == origVal + else: + assert np.isnan(origVal) + + for col, series in seriesSum.items(): + for idx, val in series.items(): + origVal = frame[col][idx] + colSeries[col] + if not np.isnan(val): + assert val == origVal + else: + assert np.isnan(origVal) + + added = frame2 + frame2 + expected = frame2 * 2 + tm.assert_frame_equal(added, expected) + + df = pd.DataFrame({"a": ["a", None, "b"]}) + tm.assert_frame_equal(df + df, pd.DataFrame({"a": ["aa", np.nan, "bb"]})) + + # Test for issue #10181 + for dtype in ("float", "int64"): + frames = [ + pd.DataFrame(dtype=dtype), + pd.DataFrame(columns=["A"], dtype=dtype), + pd.DataFrame(index=[0], dtype=dtype), + ] + for df in frames: + assert (df + df).equals(df) + tm.assert_frame_equal(df + df, df) + + @pytest.mark.parametrize( + "func", + [lambda x: x * 2, lambda x: x[::2], lambda x: 5], + ids=["multiply", "slice", "constant"], + ) + def test_series_operators_arithmetic(self, all_arithmetic_functions, func): + op = all_arithmetic_functions + series = tm.makeTimeSeries().rename("ts") + other = func(series) + compare_op(series, other, op) + + @pytest.mark.parametrize( + "func", [lambda x: x + 1, lambda x: 5], ids=["add", "constant"] + ) + def test_series_operators_compare(self, comparison_op, func): + op = comparison_op + series = tm.makeTimeSeries().rename("ts") + other = func(series) + compare_op(series, other, op) + + @pytest.mark.parametrize( + "func", + [lambda x: x * 2, lambda x: x[::2], lambda x: 5], + ids=["multiply", "slice", "constant"], + ) + def test_divmod(self, func): + series = tm.makeTimeSeries().rename("ts") + other = func(series) + results = divmod(series, other) + if isinstance(other, abc.Iterable) and len(series) != len(other): + # if the lengths don't match, this is the test where we use + # `tser[::2]`. Pad every other value in `other_np` with nan. + other_np = [] + for n in other: + other_np.append(n) + other_np.append(np.nan) + else: + other_np = other + other_np = np.asarray(other_np) + with np.errstate(all="ignore"): + expecteds = divmod(series.values, np.asarray(other_np)) + + for result, expected in zip(results, expecteds): + # check the values, name, and index separately + tm.assert_almost_equal(np.asarray(result), expected) + + assert result.name == series.name + tm.assert_index_equal(result.index, series.index._with_freq(None)) + + def test_series_divmod_zero(self): + # Check that divmod uses pandas convention for division by zero, + # which does not match numpy. + # pandas convention has + # 1/0 == np.inf + # -1/0 == -np.inf + # 1/-0.0 == -np.inf + # -1/-0.0 == np.inf + tser = tm.makeTimeSeries().rename("ts") + other = tser * 0 + + result = divmod(tser, other) + exp1 = Series([np.inf] * len(tser), index=tser.index, name="ts") + exp2 = Series([np.nan] * len(tser), index=tser.index, name="ts") + tm.assert_series_equal(result[0], exp1) + tm.assert_series_equal(result[1], exp2) + + +class TestUFuncCompat: + @pytest.mark.parametrize( + "holder", + [Int64Index, UInt64Index, Float64Index, RangeIndex, Series], + ) + def test_ufunc_compat(self, holder): + box = Series if holder is Series else Index + + if holder is RangeIndex: + idx = RangeIndex(0, 5, name="foo") + else: + idx = holder(np.arange(5, dtype="int64"), name="foo") + result = np.sin(idx) + expected = box(np.sin(np.arange(5, dtype="int64")), name="foo") + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("holder", [Int64Index, UInt64Index, Float64Index, Series]) + def test_ufunc_coercions(self, holder): + idx = holder([1, 2, 3, 4, 5], name="x") + box = Series if holder is Series else Index + + result = np.sqrt(idx) + assert result.dtype == "f8" and isinstance(result, box) + exp = Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = np.divide(idx, 2.0) + assert result.dtype == "f8" and isinstance(result, box) + exp = Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + # _evaluate_numeric_binop + result = idx + 2.0 + assert result.dtype == "f8" and isinstance(result, box) + exp = Float64Index([3.0, 4.0, 5.0, 6.0, 7.0], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = idx - 2.0 + assert result.dtype == "f8" and isinstance(result, box) + exp = Float64Index([-1.0, 0.0, 1.0, 2.0, 3.0], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = idx * 1.0 + assert result.dtype == "f8" and isinstance(result, box) + exp = Float64Index([1.0, 2.0, 3.0, 4.0, 5.0], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = idx / 2.0 + assert result.dtype == "f8" and isinstance(result, box) + exp = Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + @pytest.mark.parametrize("holder", [Int64Index, UInt64Index, Float64Index, Series]) + def test_ufunc_multiple_return_values(self, holder): + obj = holder([1, 2, 3], name="x") + box = Series if holder is Series else Index + + result = np.modf(obj) + assert isinstance(result, tuple) + exp1 = Float64Index([0.0, 0.0, 0.0], name="x") + exp2 = Float64Index([1.0, 2.0, 3.0], name="x") + tm.assert_equal(result[0], tm.box_expected(exp1, box)) + tm.assert_equal(result[1], tm.box_expected(exp2, box)) + + def test_ufunc_at(self): + s = Series([0, 1, 2], index=[1, 2, 3], name="x") + np.add.at(s, [0, 2], 10) + expected = Series([10, 1, 12], index=[1, 2, 3], name="x") + tm.assert_series_equal(s, expected) + + +class TestObjectDtypeEquivalence: + # Tests that arithmetic operations match operations executed elementwise + + @pytest.mark.parametrize("dtype", [None, object]) + def test_numarr_with_dtype_add_nan(self, dtype, box_with_array): + box = box_with_array + ser = Series([1, 2, 3], dtype=dtype) + expected = Series([np.nan, np.nan, np.nan], dtype=dtype) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = np.nan + ser + tm.assert_equal(result, expected) + + result = ser + np.nan + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("dtype", [None, object]) + def test_numarr_with_dtype_add_int(self, dtype, box_with_array): + box = box_with_array + ser = Series([1, 2, 3], dtype=dtype) + expected = Series([2, 3, 4], dtype=dtype) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = 1 + ser + tm.assert_equal(result, expected) + + result = ser + 1 + tm.assert_equal(result, expected) + + # TODO: moved from tests.series.test_operators; needs cleanup + @pytest.mark.parametrize( + "op", + [operator.add, operator.sub, operator.mul, operator.truediv, operator.floordiv], + ) + def test_operators_reverse_object(self, op): + # GH#56 + arr = Series(np.random.randn(10), index=np.arange(10), dtype=object) + + result = op(1.0, arr) + expected = op(1.0, arr.astype(float)) + tm.assert_series_equal(result.astype(float), expected) + + +class TestNumericArithmeticUnsorted: + # Tests in this class have been moved from type-specific test modules + # but not yet sorted, parametrized, and de-duplicated + + def check_binop(self, ops, scalars, idxs): + for op in ops: + for a, b in combinations(idxs, 2): + a = a._rename("foo") + b = b._rename("bar") + result = op(a, b) + expected = op(Int64Index(a), Int64Index(b)) + tm.assert_index_equal(result, expected, exact="equiv") + for idx in idxs: + for scalar in scalars: + result = op(idx, scalar) + expected = op(Int64Index(idx), scalar) + tm.assert_index_equal(result, expected, exact="equiv") + + def test_binops(self): + ops = [ + operator.add, + operator.sub, + operator.mul, + operator.floordiv, + operator.truediv, + ] + scalars = [-1, 1, 2] + idxs = [ + RangeIndex(0, 10, 1), + RangeIndex(0, 20, 2), + RangeIndex(-10, 10, 2), + RangeIndex(5, -5, -1), + ] + self.check_binop(ops, scalars, idxs) + + def test_binops_pow(self): + # numpy does not allow powers of negative integers so test separately + # https://github.com/numpy/numpy/pull/8127 + ops = [pow] + scalars = [1, 2] + idxs = [RangeIndex(0, 10, 1), RangeIndex(0, 20, 2)] + self.check_binop(ops, scalars, idxs) + + # TODO: divmod? + @pytest.mark.parametrize( + "op", + [ + operator.add, + operator.sub, + operator.mul, + operator.floordiv, + operator.truediv, + operator.pow, + operator.mod, + ], + ) + def test_arithmetic_with_frame_or_series(self, op): + # check that we return NotImplemented when operating with Series + # or DataFrame + index = RangeIndex(5) + other = Series(np.random.randn(5)) + + expected = op(Series(index), other) + result = op(index, other) + tm.assert_series_equal(result, expected) + + other = pd.DataFrame(np.random.randn(2, 5)) + expected = op(pd.DataFrame([index, index]), other) + result = op(index, other) + tm.assert_frame_equal(result, expected) + + def test_numeric_compat2(self): + # validate that we are handling the RangeIndex overrides to numeric ops + # and returning RangeIndex where possible + + idx = RangeIndex(0, 10, 2) + + result = idx * 2 + expected = RangeIndex(0, 20, 4) + tm.assert_index_equal(result, expected, exact=True) + + result = idx + 2 + expected = RangeIndex(2, 12, 2) + tm.assert_index_equal(result, expected, exact=True) + + result = idx - 2 + expected = RangeIndex(-2, 8, 2) + tm.assert_index_equal(result, expected, exact=True) + + result = idx / 2 + expected = RangeIndex(0, 5, 1).astype("float64") + tm.assert_index_equal(result, expected, exact=True) + + result = idx / 4 + expected = RangeIndex(0, 10, 2) / 4 + tm.assert_index_equal(result, expected, exact=True) + + result = idx // 1 + expected = idx + tm.assert_index_equal(result, expected, exact=True) + + # __mul__ + result = idx * idx + expected = Index(idx.values * idx.values) + tm.assert_index_equal(result, expected, exact=True) + + # __pow__ + idx = RangeIndex(0, 1000, 2) + result = idx ** 2 + expected = Int64Index(idx._values) ** 2 + tm.assert_index_equal(Index(result.values), expected, exact=True) + + # __floordiv__ + cases_exact = [ + (RangeIndex(0, 1000, 2), 2, RangeIndex(0, 500, 1)), + (RangeIndex(-99, -201, -3), -3, RangeIndex(33, 67, 1)), + ( + RangeIndex(0, 1000, 1), + 2, + Int64Index(RangeIndex(0, 1000, 1)._values) // 2, + ), + ( + RangeIndex(0, 100, 1), + 2.0, + Int64Index(RangeIndex(0, 100, 1)._values) // 2.0, + ), + (RangeIndex(0), 50, RangeIndex(0)), + (RangeIndex(2, 4, 2), 3, RangeIndex(0, 1, 1)), + (RangeIndex(-5, -10, -6), 4, RangeIndex(-2, -1, 1)), + (RangeIndex(-100, -200, 3), 2, RangeIndex(0)), + ] + for idx, div, expected in cases_exact: + tm.assert_index_equal(idx // div, expected, exact=True) + + @pytest.mark.parametrize("dtype", [np.int64, np.float64]) + @pytest.mark.parametrize("delta", [1, 0, -1]) + def test_addsub_arithmetic(self, dtype, delta): + # GH#8142 + delta = dtype(delta) + index = Index([10, 11, 12], dtype=dtype) + result = index + delta + expected = Index(index.values + delta, dtype=dtype) + tm.assert_index_equal(result, expected) + + # this subtraction used to fail + result = index - delta + expected = Index(index.values - delta, dtype=dtype) + tm.assert_index_equal(result, expected) + + tm.assert_index_equal(index + index, 2 * index) + tm.assert_index_equal(index - index, 0 * index) + assert not (index - index).empty + + +def test_fill_value_inf_masking(): + # GH #27464 make sure we mask 0/1 with Inf and not NaN + df = pd.DataFrame({"A": [0, 1, 2], "B": [1.1, None, 1.1]}) + + other = pd.DataFrame({"A": [1.1, 1.2, 1.3]}, index=[0, 2, 3]) + + result = df.rfloordiv(other, fill_value=1) + + expected = pd.DataFrame( + {"A": [np.inf, 1.0, 0.0, 1.0], "B": [0.0, np.nan, 0.0, np.nan]} + ) + tm.assert_frame_equal(result, expected) + + +def test_dataframe_div_silenced(): + # GH#26793 + pdf1 = pd.DataFrame( + { + "A": np.arange(10), + "B": [np.nan, 1, 2, 3, 4] * 2, + "C": [np.nan] * 10, + "D": np.arange(10), + }, + index=list("abcdefghij"), + columns=list("ABCD"), + ) + pdf2 = pd.DataFrame( + np.random.randn(10, 4), index=list("abcdefghjk"), columns=list("ABCX") + ) + with tm.assert_produces_warning(None): + pdf1.div(pdf2, fill_value=0) + + +@pytest.mark.parametrize( + "data, expected_data", + [([0, 1, 2], [0, 2, 4])], +) +def test_integer_array_add_list_like( + box_pandas_1d_array, box_1d_array, data, expected_data +): + # GH22606 Verify operators with IntegerArray and list-likes + arr = array(data, dtype="Int64") + container = box_pandas_1d_array(arr) + left = container + box_1d_array(data) + right = box_1d_array(data) + container + + if Series in [box_1d_array, box_pandas_1d_array]: + cls = Series + elif Index in [box_1d_array, box_pandas_1d_array]: + cls = Index + else: + cls = array + + expected = cls(expected_data, dtype="Int64") + + tm.assert_equal(left, expected) + tm.assert_equal(right, expected) + + +def test_sub_multiindex_swapped_levels(): + # GH 9952 + df = pd.DataFrame( + {"a": np.random.randn(6)}, + index=pd.MultiIndex.from_product( + [["a", "b"], [0, 1, 2]], names=["levA", "levB"] + ), + ) + df2 = df.copy() + df2.index = df2.index.swaplevel(0, 1) + result = df - df2 + expected = pd.DataFrame([0.0] * 6, columns=["a"], index=df.index) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("power", [1, 2, 5]) +@pytest.mark.parametrize("string_size", [0, 1, 2, 5]) +def test_empty_str_comparison(power, string_size): + # GH 37348 + a = np.array(range(10 ** power)) + right = pd.DataFrame(a, dtype=np.int64) + left = " " * string_size + + result = right == left + expected = pd.DataFrame(np.zeros(right.shape, dtype=bool)) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_object.py b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_object.py new file mode 100644 index 0000000..c96d7c0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_object.py @@ -0,0 +1,379 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +# Specifically for object dtype +import datetime +from decimal import Decimal +import operator + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Series, + Timestamp, +) +import pandas._testing as tm +from pandas.core import ops + +# ------------------------------------------------------------------ +# Comparisons + + +class TestObjectComparisons: + def test_comparison_object_numeric_nas(self, comparison_op): + ser = Series(np.random.randn(10), dtype=object) + shifted = ser.shift(2) + + func = comparison_op + + result = func(ser, shifted) + expected = func(ser.astype(float), shifted.astype(float)) + tm.assert_series_equal(result, expected) + + def test_object_comparisons(self): + ser = Series(["a", "b", np.nan, "c", "a"]) + + result = ser == "a" + expected = Series([True, False, False, False, True]) + tm.assert_series_equal(result, expected) + + result = ser < "a" + expected = Series([False, False, False, False, False]) + tm.assert_series_equal(result, expected) + + result = ser != "a" + expected = -(ser == "a") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [None, object]) + def test_more_na_comparisons(self, dtype): + left = Series(["a", np.nan, "c"], dtype=dtype) + right = Series(["a", np.nan, "d"], dtype=dtype) + + result = left == right + expected = Series([True, False, False]) + tm.assert_series_equal(result, expected) + + result = left != right + expected = Series([False, True, True]) + tm.assert_series_equal(result, expected) + + result = left == np.nan + expected = Series([False, False, False]) + tm.assert_series_equal(result, expected) + + result = left != np.nan + expected = Series([True, True, True]) + tm.assert_series_equal(result, expected) + + +# ------------------------------------------------------------------ +# Arithmetic + + +class TestArithmetic: + + # TODO: parametrize + def test_pow_ops_object(self): + # GH#22922 + # pow is weird with masking & 1, so testing here + a = Series([1, np.nan, 1, np.nan], dtype=object) + b = Series([1, np.nan, np.nan, 1], dtype=object) + result = a ** b + expected = Series(a.values ** b.values, dtype=object) + tm.assert_series_equal(result, expected) + + result = b ** a + expected = Series(b.values ** a.values, dtype=object) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + @pytest.mark.parametrize("other", ["category", "Int64"]) + def test_add_extension_scalar(self, other, box_with_array, op): + # GH#22378 + # Check that scalars satisfying is_extension_array_dtype(obj) + # do not incorrectly try to dispatch to an ExtensionArray operation + + arr = Series(["a", "b", "c"]) + expected = Series([op(x, other) for x in arr]) + + arr = tm.box_expected(arr, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = op(arr, other) + tm.assert_equal(result, expected) + + def test_objarr_add_str(self, box_with_array): + ser = Series(["x", np.nan, "x"]) + expected = Series(["xa", np.nan, "xa"]) + + ser = tm.box_expected(ser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = ser + "a" + tm.assert_equal(result, expected) + + def test_objarr_radd_str(self, box_with_array): + ser = Series(["x", np.nan, "x"]) + expected = Series(["ax", np.nan, "ax"]) + + ser = tm.box_expected(ser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = "a" + ser + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "data", + [ + [1, 2, 3], + [1.1, 2.2, 3.3], + [Timestamp("2011-01-01"), Timestamp("2011-01-02"), pd.NaT], + ["x", "y", 1], + ], + ) + @pytest.mark.parametrize("dtype", [None, object]) + def test_objarr_radd_str_invalid(self, dtype, data, box_with_array): + ser = Series(data, dtype=dtype) + + ser = tm.box_expected(ser, box_with_array) + msg = "|".join( + [ + "can only concatenate str", + "did not contain a loop with signature matching types", + "unsupported operand type", + "must be str", + ] + ) + with pytest.raises(TypeError, match=msg): + "foo_" + ser + + @pytest.mark.parametrize("op", [operator.add, ops.radd, operator.sub, ops.rsub]) + def test_objarr_add_invalid(self, op, box_with_array): + # invalid ops + box = box_with_array + + obj_ser = tm.makeObjectSeries() + obj_ser.name = "objects" + + obj_ser = tm.box_expected(obj_ser, box) + msg = "|".join( + ["can only concatenate str", "unsupported operand type", "must be str"] + ) + with pytest.raises(Exception, match=msg): + op(obj_ser, 1) + with pytest.raises(Exception, match=msg): + op(obj_ser, np.array(1, dtype=np.int64)) + + # TODO: Moved from tests.series.test_operators; needs cleanup + def test_operators_na_handling(self): + ser = Series(["foo", "bar", "baz", np.nan]) + result = "prefix_" + ser + expected = Series(["prefix_foo", "prefix_bar", "prefix_baz", np.nan]) + tm.assert_series_equal(result, expected) + + result = ser + "_suffix" + expected = Series(["foo_suffix", "bar_suffix", "baz_suffix", np.nan]) + tm.assert_series_equal(result, expected) + + # TODO: parametrize over box + @pytest.mark.parametrize("dtype", [None, object]) + def test_series_with_dtype_radd_timedelta(self, dtype): + # note this test is _not_ aimed at timedelta64-dtyped Series + ser = Series( + [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")], + dtype=dtype, + ) + expected = Series( + [pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")] + ) + + result = pd.Timedelta("3 days") + ser + tm.assert_series_equal(result, expected) + + result = ser + pd.Timedelta("3 days") + tm.assert_series_equal(result, expected) + + # TODO: cleanup & parametrize over box + def test_mixed_timezone_series_ops_object(self): + # GH#13043 + ser = Series( + [ + Timestamp("2015-01-01", tz="US/Eastern"), + Timestamp("2015-01-01", tz="Asia/Tokyo"), + ], + name="xxx", + ) + assert ser.dtype == object + + exp = Series( + [ + Timestamp("2015-01-02", tz="US/Eastern"), + Timestamp("2015-01-02", tz="Asia/Tokyo"), + ], + name="xxx", + ) + tm.assert_series_equal(ser + pd.Timedelta("1 days"), exp) + tm.assert_series_equal(pd.Timedelta("1 days") + ser, exp) + + # object series & object series + ser2 = Series( + [ + Timestamp("2015-01-03", tz="US/Eastern"), + Timestamp("2015-01-05", tz="Asia/Tokyo"), + ], + name="xxx", + ) + assert ser2.dtype == object + exp = Series([pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx") + tm.assert_series_equal(ser2 - ser, exp) + tm.assert_series_equal(ser - ser2, -exp) + + ser = Series( + [pd.Timedelta("01:00:00"), pd.Timedelta("02:00:00")], + name="xxx", + dtype=object, + ) + assert ser.dtype == object + + exp = Series([pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")], name="xxx") + tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp) + tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp) + + # TODO: cleanup & parametrize over box + def test_iadd_preserves_name(self): + # GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name + ser = Series([1, 2, 3]) + ser.index.name = "foo" + + ser.index += 1 + assert ser.index.name == "foo" + + ser.index -= 1 + assert ser.index.name == "foo" + + def test_add_string(self): + # from bug report + index = pd.Index(["a", "b", "c"]) + index2 = index + "foo" + + assert "a" not in index2 + assert "afoo" in index2 + + def test_iadd_string(self): + index = pd.Index(["a", "b", "c"]) + # doesn't fail test unless there is a check before `+=` + assert "a" in index + + index += "_x" + assert "a_x" in index + + def test_add(self): + index = tm.makeStringIndex(100) + expected = pd.Index(index.values * 2) + tm.assert_index_equal(index + index, expected) + tm.assert_index_equal(index + index.tolist(), expected) + tm.assert_index_equal(index.tolist() + index, expected) + + # test add and radd + index = pd.Index(list("abc")) + expected = pd.Index(["a1", "b1", "c1"]) + tm.assert_index_equal(index + "1", expected) + expected = pd.Index(["1a", "1b", "1c"]) + tm.assert_index_equal("1" + index, expected) + + def test_sub_fail(self): + index = tm.makeStringIndex(100) + + msg = "unsupported operand type|Cannot broadcast" + with pytest.raises(TypeError, match=msg): + index - "a" + with pytest.raises(TypeError, match=msg): + index - index + with pytest.raises(TypeError, match=msg): + index - index.tolist() + with pytest.raises(TypeError, match=msg): + index.tolist() - index + + def test_sub_object(self): + # GH#19369 + index = pd.Index([Decimal(1), Decimal(2)]) + expected = pd.Index([Decimal(0), Decimal(1)]) + + result = index - Decimal(1) + tm.assert_index_equal(result, expected) + + result = index - pd.Index([Decimal(1), Decimal(1)]) + tm.assert_index_equal(result, expected) + + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + index - "foo" + + with pytest.raises(TypeError, match=msg): + index - np.array([2, "foo"], dtype=object) + + def test_rsub_object(self, fixed_now_ts): + # GH#19369 + index = pd.Index([Decimal(1), Decimal(2)]) + expected = pd.Index([Decimal(1), Decimal(0)]) + + result = Decimal(2) - index + tm.assert_index_equal(result, expected) + + result = np.array([Decimal(2), Decimal(2)]) - index + tm.assert_index_equal(result, expected) + + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + "foo" - index + + with pytest.raises(TypeError, match=msg): + np.array([True, fixed_now_ts]) - index + + +class MyIndex(pd.Index): + # Simple index subclass that tracks ops calls. + + _calls: int + + @classmethod + def _simple_new(cls, values, name=None, dtype=None): + result = object.__new__(cls) + result._data = values + result._name = name + result._calls = 0 + result._reset_identity() + + return result + + def __add__(self, other): + self._calls += 1 + return self._simple_new(self._data) + + def __radd__(self, other): + return self.__add__(other) + + +@pytest.mark.parametrize( + "other", + [ + [datetime.timedelta(1), datetime.timedelta(2)], + [datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)], + [pd.Period("2000"), pd.Period("2001")], + ["a", "b"], + ], + ids=["timedelta", "datetime", "period", "object"], +) +def test_index_ops_defer_to_unknown_subclasses(other): + # https://github.com/pandas-dev/pandas/issues/31109 + values = np.array( + [datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object + ) + a = MyIndex._simple_new(values) + other = pd.Index(other) + result = other + a + assert isinstance(result, MyIndex) + assert a._calls == 1 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_period.py b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_period.py new file mode 100644 index 0000000..c315560 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_period.py @@ -0,0 +1,1553 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +# Specifically for Period dtype +import operator + +import numpy as np +import pytest + +from pandas._libs.tslibs import ( + IncompatibleFrequency, + Period, + Timestamp, + to_offset, +) +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + PeriodIndex, + Series, + Timedelta, + TimedeltaIndex, + period_range, +) +import pandas._testing as tm +from pandas.core import ops +from pandas.core.arrays import TimedeltaArray +from pandas.tests.arithmetic.common import ( + assert_invalid_addsub_type, + assert_invalid_comparison, + get_upcast_box, +) + +# ------------------------------------------------------------------ +# Comparisons + + +class TestPeriodArrayLikeComparisons: + # Comparison tests for PeriodDtype vectors fully parametrized over + # DataFrame/Series/PeriodIndex/PeriodArray. Ideally all comparison + # tests will eventually end up here. + + @pytest.mark.parametrize("other", ["2017", Period("2017", freq="D")]) + def test_eq_scalar(self, other, box_with_array): + + idx = PeriodIndex(["2017", "2017", "2018"], freq="D") + idx = tm.box_expected(idx, box_with_array) + xbox = get_upcast_box(idx, other, True) + + expected = np.array([True, True, False]) + expected = tm.box_expected(expected, xbox) + + result = idx == other + + tm.assert_equal(result, expected) + + def test_compare_zerodim(self, box_with_array): + # GH#26689 make sure we unbox zero-dimensional arrays + + pi = period_range("2000", periods=4) + other = np.array(pi.to_numpy()[0]) + + pi = tm.box_expected(pi, box_with_array) + xbox = get_upcast_box(pi, other, True) + + result = pi <= other + expected = np.array([True, False, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "scalar", + [ + "foo", + Timestamp("2021-01-01"), + Timedelta(days=4), + 9, + 9.5, + 2000, # specifically don't consider 2000 to match Period("2000", "D") + False, + None, + ], + ) + def test_compare_invalid_scalar(self, box_with_array, scalar): + # GH#28980 + # comparison with scalar that cannot be interpreted as a Period + pi = period_range("2000", periods=4) + parr = tm.box_expected(pi, box_with_array) + assert_invalid_comparison(parr, scalar, box_with_array) + + @pytest.mark.parametrize( + "other", + [ + pd.date_range("2000", periods=4).array, + pd.timedelta_range("1D", periods=4).array, + np.arange(4), + np.arange(4).astype(np.float64), + list(range(4)), + # match Period semantics by not treating integers as Periods + [2000, 2001, 2002, 2003], + np.arange(2000, 2004), + np.arange(2000, 2004).astype(object), + pd.Index([2000, 2001, 2002, 2003]), + ], + ) + def test_compare_invalid_listlike(self, box_with_array, other): + pi = period_range("2000", periods=4) + parr = tm.box_expected(pi, box_with_array) + assert_invalid_comparison(parr, other, box_with_array) + + @pytest.mark.parametrize("other_box", [list, np.array, lambda x: x.astype(object)]) + def test_compare_object_dtype(self, box_with_array, other_box): + pi = period_range("2000", periods=5) + parr = tm.box_expected(pi, box_with_array) + + other = other_box(pi) + xbox = get_upcast_box(parr, other, True) + + expected = np.array([True, True, True, True, True]) + expected = tm.box_expected(expected, xbox) + + result = parr == other + tm.assert_equal(result, expected) + result = parr <= other + tm.assert_equal(result, expected) + result = parr >= other + tm.assert_equal(result, expected) + + result = parr != other + tm.assert_equal(result, ~expected) + result = parr < other + tm.assert_equal(result, ~expected) + result = parr > other + tm.assert_equal(result, ~expected) + + other = other_box(pi[::-1]) + + expected = np.array([False, False, True, False, False]) + expected = tm.box_expected(expected, xbox) + result = parr == other + tm.assert_equal(result, expected) + + expected = np.array([True, True, True, False, False]) + expected = tm.box_expected(expected, xbox) + result = parr <= other + tm.assert_equal(result, expected) + + expected = np.array([False, False, True, True, True]) + expected = tm.box_expected(expected, xbox) + result = parr >= other + tm.assert_equal(result, expected) + + expected = np.array([True, True, False, True, True]) + expected = tm.box_expected(expected, xbox) + result = parr != other + tm.assert_equal(result, expected) + + expected = np.array([True, True, False, False, False]) + expected = tm.box_expected(expected, xbox) + result = parr < other + tm.assert_equal(result, expected) + + expected = np.array([False, False, False, True, True]) + expected = tm.box_expected(expected, xbox) + result = parr > other + tm.assert_equal(result, expected) + + +class TestPeriodIndexComparisons: + # TODO: parameterize over boxes + + def test_pi_cmp_period(self): + idx = period_range("2007-01", periods=20, freq="M") + per = idx[10] + + result = idx < per + exp = idx.values < idx.values[10] + tm.assert_numpy_array_equal(result, exp) + + # Tests Period.__richcmp__ against ndarray[object, ndim=2] + result = idx.values.reshape(10, 2) < per + tm.assert_numpy_array_equal(result, exp.reshape(10, 2)) + + # Tests Period.__richcmp__ against ndarray[object, ndim=0] + result = idx < np.array(per) + tm.assert_numpy_array_equal(result, exp) + + # TODO: moved from test_datetime64; de-duplicate with version below + def test_parr_cmp_period_scalar2(self, box_with_array): + pi = period_range("2000-01-01", periods=10, freq="D") + + val = pi[3] + expected = [x > val for x in pi] + + ser = tm.box_expected(pi, box_with_array) + xbox = get_upcast_box(ser, val, True) + + expected = tm.box_expected(expected, xbox) + result = ser > val + tm.assert_equal(result, expected) + + val = pi[5] + result = ser > val + expected = [x > val for x in pi] + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_parr_cmp_period_scalar(self, freq, box_with_array): + # GH#13200 + base = PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq=freq) + base = tm.box_expected(base, box_with_array) + per = Period("2011-02", freq=freq) + xbox = get_upcast_box(base, per, True) + + exp = np.array([False, True, False, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base == per, exp) + tm.assert_equal(per == base, exp) + + exp = np.array([True, False, True, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base != per, exp) + tm.assert_equal(per != base, exp) + + exp = np.array([False, False, True, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base > per, exp) + tm.assert_equal(per < base, exp) + + exp = np.array([True, False, False, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base < per, exp) + tm.assert_equal(per > base, exp) + + exp = np.array([False, True, True, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base >= per, exp) + tm.assert_equal(per <= base, exp) + + exp = np.array([True, True, False, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base <= per, exp) + tm.assert_equal(per >= base, exp) + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_parr_cmp_pi(self, freq, box_with_array): + # GH#13200 + base = PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq=freq) + base = tm.box_expected(base, box_with_array) + + # TODO: could also box idx? + idx = PeriodIndex(["2011-02", "2011-01", "2011-03", "2011-05"], freq=freq) + + xbox = get_upcast_box(base, idx, True) + + exp = np.array([False, False, True, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base == idx, exp) + + exp = np.array([True, True, False, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base != idx, exp) + + exp = np.array([False, True, False, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base > idx, exp) + + exp = np.array([True, False, False, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base < idx, exp) + + exp = np.array([False, True, True, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base >= idx, exp) + + exp = np.array([True, False, True, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base <= idx, exp) + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_parr_cmp_pi_mismatched_freq(self, freq, box_with_array): + # GH#13200 + # different base freq + base = PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq=freq) + base = tm.box_expected(base, box_with_array) + + msg = rf"Invalid comparison between dtype=period\[{freq}\] and Period" + with pytest.raises(TypeError, match=msg): + base <= Period("2011", freq="A") + + with pytest.raises(TypeError, match=msg): + Period("2011", freq="A") >= base + + # TODO: Could parametrize over boxes for idx? + idx = PeriodIndex(["2011", "2012", "2013", "2014"], freq="A") + rev_msg = r"Invalid comparison between dtype=period\[A-DEC\] and PeriodArray" + idx_msg = rev_msg if box_with_array in [tm.to_array, pd.array] else msg + with pytest.raises(TypeError, match=idx_msg): + base <= idx + + # Different frequency + msg = rf"Invalid comparison between dtype=period\[{freq}\] and Period" + with pytest.raises(TypeError, match=msg): + base <= Period("2011", freq="4M") + + with pytest.raises(TypeError, match=msg): + Period("2011", freq="4M") >= base + + idx = PeriodIndex(["2011", "2012", "2013", "2014"], freq="4M") + rev_msg = r"Invalid comparison between dtype=period\[4M\] and PeriodArray" + idx_msg = rev_msg if box_with_array in [tm.to_array, pd.array] else msg + with pytest.raises(TypeError, match=idx_msg): + base <= idx + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_pi_cmp_nat(self, freq): + idx1 = PeriodIndex(["2011-01", "2011-02", "NaT", "2011-05"], freq=freq) + per = idx1[1] + + result = idx1 > per + exp = np.array([False, False, False, True]) + tm.assert_numpy_array_equal(result, exp) + result = per < idx1 + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == pd.NaT + exp = np.array([False, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + result = pd.NaT == idx1 + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != pd.NaT + exp = np.array([True, True, True, True]) + tm.assert_numpy_array_equal(result, exp) + result = pd.NaT != idx1 + tm.assert_numpy_array_equal(result, exp) + + idx2 = PeriodIndex(["2011-02", "2011-01", "2011-04", "NaT"], freq=freq) + result = idx1 < idx2 + exp = np.array([True, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == idx2 + exp = np.array([False, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != idx2 + exp = np.array([True, True, True, True]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == idx1 + exp = np.array([True, True, False, True]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != idx1 + exp = np.array([False, False, True, False]) + tm.assert_numpy_array_equal(result, exp) + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_pi_cmp_nat_mismatched_freq_raises(self, freq): + idx1 = PeriodIndex(["2011-01", "2011-02", "NaT", "2011-05"], freq=freq) + + diff = PeriodIndex(["2011-02", "2011-01", "2011-04", "NaT"], freq="4M") + msg = rf"Invalid comparison between dtype=period\[{freq}\] and PeriodArray" + with pytest.raises(TypeError, match=msg): + idx1 > diff + + result = idx1 == diff + expected = np.array([False, False, False, False], dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + # TODO: De-duplicate with test_pi_cmp_nat + @pytest.mark.parametrize("dtype", [object, None]) + def test_comp_nat(self, dtype): + left = PeriodIndex([Period("2011-01-01"), pd.NaT, Period("2011-01-03")]) + right = PeriodIndex([pd.NaT, pd.NaT, Period("2011-01-03")]) + + if dtype is not None: + left = left.astype(dtype) + right = right.astype(dtype) + + result = left == right + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = left != right + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(left == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == right, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(left != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != left, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(left < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > left, expected) + + +class TestPeriodSeriesComparisons: + def test_cmp_series_period_series_mixed_freq(self): + # GH#13200 + base = Series( + [ + Period("2011", freq="A"), + Period("2011-02", freq="M"), + Period("2013", freq="A"), + Period("2011-04", freq="M"), + ] + ) + + ser = Series( + [ + Period("2012", freq="A"), + Period("2011-01", freq="M"), + Period("2013", freq="A"), + Period("2011-05", freq="M"), + ] + ) + + exp = Series([False, False, True, False]) + tm.assert_series_equal(base == ser, exp) + + exp = Series([True, True, False, True]) + tm.assert_series_equal(base != ser, exp) + + exp = Series([False, True, False, False]) + tm.assert_series_equal(base > ser, exp) + + exp = Series([True, False, False, True]) + tm.assert_series_equal(base < ser, exp) + + exp = Series([False, True, True, False]) + tm.assert_series_equal(base >= ser, exp) + + exp = Series([True, False, True, True]) + tm.assert_series_equal(base <= ser, exp) + + +class TestPeriodIndexSeriesComparisonConsistency: + """Test PeriodIndex and Period Series Ops consistency""" + + # TODO: needs parametrization+de-duplication + + def _check(self, values, func, expected): + # Test PeriodIndex and Period Series Ops consistency + + idx = PeriodIndex(values) + result = func(idx) + + # check that we don't pass an unwanted type to tm.assert_equal + assert isinstance(expected, (pd.Index, np.ndarray)) + tm.assert_equal(result, expected) + + s = Series(values) + result = func(s) + + exp = Series(expected, name=values.name) + tm.assert_series_equal(result, exp) + + def test_pi_comp_period(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "2011-03", "2011-04"], freq="M", name="idx" + ) + per = idx[2] + + f = lambda x: x == per + exp = np.array([False, False, True, False], dtype=np.bool_) + self._check(idx, f, exp) + f = lambda x: per == x + self._check(idx, f, exp) + + f = lambda x: x != per + exp = np.array([True, True, False, True], dtype=np.bool_) + self._check(idx, f, exp) + f = lambda x: per != x + self._check(idx, f, exp) + + f = lambda x: per >= x + exp = np.array([True, True, True, False], dtype=np.bool_) + self._check(idx, f, exp) + + f = lambda x: x > per + exp = np.array([False, False, False, True], dtype=np.bool_) + self._check(idx, f, exp) + + f = lambda x: per >= x + exp = np.array([True, True, True, False], dtype=np.bool_) + self._check(idx, f, exp) + + def test_pi_comp_period_nat(self): + idx = PeriodIndex( + ["2011-01", "NaT", "2011-03", "2011-04"], freq="M", name="idx" + ) + per = idx[2] + + f = lambda x: x == per + exp = np.array([False, False, True, False], dtype=np.bool_) + self._check(idx, f, exp) + f = lambda x: per == x + self._check(idx, f, exp) + + f = lambda x: x == pd.NaT + exp = np.array([False, False, False, False], dtype=np.bool_) + self._check(idx, f, exp) + f = lambda x: pd.NaT == x + self._check(idx, f, exp) + + f = lambda x: x != per + exp = np.array([True, True, False, True], dtype=np.bool_) + self._check(idx, f, exp) + f = lambda x: per != x + self._check(idx, f, exp) + + f = lambda x: x != pd.NaT + exp = np.array([True, True, True, True], dtype=np.bool_) + self._check(idx, f, exp) + f = lambda x: pd.NaT != x + self._check(idx, f, exp) + + f = lambda x: per >= x + exp = np.array([True, False, True, False], dtype=np.bool_) + self._check(idx, f, exp) + + f = lambda x: x < per + exp = np.array([True, False, False, False], dtype=np.bool_) + self._check(idx, f, exp) + + f = lambda x: x > pd.NaT + exp = np.array([False, False, False, False], dtype=np.bool_) + self._check(idx, f, exp) + + f = lambda x: pd.NaT >= x + exp = np.array([False, False, False, False], dtype=np.bool_) + self._check(idx, f, exp) + + +# ------------------------------------------------------------------ +# Arithmetic + + +class TestPeriodFrameArithmetic: + def test_ops_frame_period(self): + # GH#13043 + df = pd.DataFrame( + { + "A": [Period("2015-01", freq="M"), Period("2015-02", freq="M")], + "B": [Period("2014-01", freq="M"), Period("2014-02", freq="M")], + } + ) + assert df["A"].dtype == "Period[M]" + assert df["B"].dtype == "Period[M]" + + p = Period("2015-03", freq="M") + off = p.freq + # dtype will be object because of original dtype + exp = pd.DataFrame( + { + "A": np.array([2 * off, 1 * off], dtype=object), + "B": np.array([14 * off, 13 * off], dtype=object), + } + ) + tm.assert_frame_equal(p - df, exp) + tm.assert_frame_equal(df - p, -1 * exp) + + df2 = pd.DataFrame( + { + "A": [Period("2015-05", freq="M"), Period("2015-06", freq="M")], + "B": [Period("2015-05", freq="M"), Period("2015-06", freq="M")], + } + ) + assert df2["A"].dtype == "Period[M]" + assert df2["B"].dtype == "Period[M]" + + exp = pd.DataFrame( + { + "A": np.array([4 * off, 4 * off], dtype=object), + "B": np.array([16 * off, 16 * off], dtype=object), + } + ) + tm.assert_frame_equal(df2 - df, exp) + tm.assert_frame_equal(df - df2, -1 * exp) + + +class TestPeriodIndexArithmetic: + # --------------------------------------------------------------- + # __add__/__sub__ with PeriodIndex + # PeriodIndex + other is defined for integers and timedelta-like others + # PeriodIndex - other is defined for integers, timedelta-like others, + # and PeriodIndex (with matching freq) + + def test_parr_add_iadd_parr_raises(self, box_with_array): + rng = period_range("1/1/2000", freq="D", periods=5) + other = period_range("1/6/2000", freq="D", periods=5) + # TODO: parametrize over boxes for other? + + rng = tm.box_expected(rng, box_with_array) + # An earlier implementation of PeriodIndex addition performed + # a set operation (union). This has since been changed to + # raise a TypeError. See GH#14164 and GH#13077 for historical + # reference. + msg = r"unsupported operand type\(s\) for \+: .* and .*" + with pytest.raises(TypeError, match=msg): + rng + other + + with pytest.raises(TypeError, match=msg): + rng += other + + def test_pi_sub_isub_pi(self): + # GH#20049 + # For historical reference see GH#14164, GH#13077. + # PeriodIndex subtraction originally performed set difference, + # then changed to raise TypeError before being implemented in GH#20049 + rng = period_range("1/1/2000", freq="D", periods=5) + other = period_range("1/6/2000", freq="D", periods=5) + + off = rng.freq + expected = pd.Index([-5 * off] * 5) + result = rng - other + tm.assert_index_equal(result, expected) + + rng -= other + tm.assert_index_equal(rng, expected) + + def test_pi_sub_pi_with_nat(self): + rng = period_range("1/1/2000", freq="D", periods=5) + other = rng[1:].insert(0, pd.NaT) + assert other[1:].equals(rng[1:]) + + result = rng - other + off = rng.freq + expected = pd.Index([pd.NaT, 0 * off, 0 * off, 0 * off, 0 * off]) + tm.assert_index_equal(result, expected) + + def test_parr_sub_pi_mismatched_freq(self, box_with_array, box_with_array2): + rng = period_range("1/1/2000", freq="D", periods=5) + other = period_range("1/6/2000", freq="H", periods=5) + + rng = tm.box_expected(rng, box_with_array) + other = tm.box_expected(other, box_with_array2) + msg = r"Input has different freq=[HD] from PeriodArray\(freq=[DH]\)" + with pytest.raises(IncompatibleFrequency, match=msg): + rng - other + + @pytest.mark.parametrize("n", [1, 2, 3, 4]) + def test_sub_n_gt_1_ticks(self, tick_classes, n): + # GH 23878 + p1_d = "19910905" + p2_d = "19920406" + p1 = PeriodIndex([p1_d], freq=tick_classes(n)) + p2 = PeriodIndex([p2_d], freq=tick_classes(n)) + + expected = PeriodIndex([p2_d], freq=p2.freq.base) - PeriodIndex( + [p1_d], freq=p1.freq.base + ) + + tm.assert_index_equal((p2 - p1), expected) + + @pytest.mark.parametrize("n", [1, 2, 3, 4]) + @pytest.mark.parametrize( + "offset, kwd_name", + [ + (pd.offsets.YearEnd, "month"), + (pd.offsets.QuarterEnd, "startingMonth"), + (pd.offsets.MonthEnd, None), + (pd.offsets.Week, "weekday"), + ], + ) + def test_sub_n_gt_1_offsets(self, offset, kwd_name, n): + # GH 23878 + kwds = {kwd_name: 3} if kwd_name is not None else {} + p1_d = "19910905" + p2_d = "19920406" + freq = offset(n, normalize=False, **kwds) + p1 = PeriodIndex([p1_d], freq=freq) + p2 = PeriodIndex([p2_d], freq=freq) + + result = p2 - p1 + expected = PeriodIndex([p2_d], freq=freq.base) - PeriodIndex( + [p1_d], freq=freq.base + ) + + tm.assert_index_equal(result, expected) + + # ------------------------------------------------------------- + # Invalid Operations + + @pytest.mark.parametrize( + "other", + [ + # datetime scalars + Timestamp("2016-01-01"), + Timestamp("2016-01-01").to_pydatetime(), + Timestamp("2016-01-01").to_datetime64(), + # datetime-like arrays + pd.date_range("2016-01-01", periods=3, freq="H"), + pd.date_range("2016-01-01", periods=3, tz="Europe/Brussels"), + pd.date_range("2016-01-01", periods=3, freq="S")._data, + pd.date_range("2016-01-01", periods=3, tz="Asia/Tokyo")._data, + # Miscellaneous invalid types + 3.14, + np.array([2.0, 3.0, 4.0]), + ], + ) + def test_parr_add_sub_invalid(self, other, box_with_array): + # GH#23215 + rng = period_range("1/1/2000", freq="D", periods=3) + rng = tm.box_expected(rng, box_with_array) + + msg = "|".join( + [ + r"(:?cannot add PeriodArray and .*)", + r"(:?cannot subtract .* from (:?a\s)?.*)", + r"(:?unsupported operand type\(s\) for \+: .* and .*)", + r"unsupported operand type\(s\) for [+-]: .* and .*", + ] + ) + assert_invalid_addsub_type(rng, other, msg) + with pytest.raises(TypeError, match=msg): + rng + other + with pytest.raises(TypeError, match=msg): + other + rng + with pytest.raises(TypeError, match=msg): + rng - other + with pytest.raises(TypeError, match=msg): + other - rng + + # ----------------------------------------------------------------- + # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64] + + def test_pi_add_sub_td64_array_non_tick_raises(self): + rng = period_range("1/1/2000", freq="Q", periods=3) + tdi = TimedeltaIndex(["-1 Day", "-1 Day", "-1 Day"]) + tdarr = tdi.values + + msg = r"Cannot add or subtract timedelta64\[ns\] dtype from period\[Q-DEC\]" + with pytest.raises(TypeError, match=msg): + rng + tdarr + with pytest.raises(TypeError, match=msg): + tdarr + rng + + with pytest.raises(TypeError, match=msg): + rng - tdarr + msg = r"cannot subtract period\[Q-DEC\]-dtype from TimedeltaArray" + with pytest.raises(TypeError, match=msg): + tdarr - rng + + def test_pi_add_sub_td64_array_tick(self): + # PeriodIndex + Timedelta-like is allowed only with + # tick-like frequencies + rng = period_range("1/1/2000", freq="90D", periods=3) + tdi = TimedeltaIndex(["-1 Day", "-1 Day", "-1 Day"]) + tdarr = tdi.values + + expected = period_range("12/31/1999", freq="90D", periods=3) + result = rng + tdi + tm.assert_index_equal(result, expected) + result = rng + tdarr + tm.assert_index_equal(result, expected) + result = tdi + rng + tm.assert_index_equal(result, expected) + result = tdarr + rng + tm.assert_index_equal(result, expected) + + expected = period_range("1/2/2000", freq="90D", periods=3) + + result = rng - tdi + tm.assert_index_equal(result, expected) + result = rng - tdarr + tm.assert_index_equal(result, expected) + + msg = r"cannot subtract .* from .*" + with pytest.raises(TypeError, match=msg): + tdarr - rng + + with pytest.raises(TypeError, match=msg): + tdi - rng + + @pytest.mark.parametrize("pi_freq", ["D", "W", "Q", "H"]) + @pytest.mark.parametrize("tdi_freq", [None, "H"]) + def test_parr_sub_td64array(self, box_with_array, tdi_freq, pi_freq): + box = box_with_array + xbox = box if box not in [pd.array, tm.to_array] else pd.Index + + tdi = TimedeltaIndex(["1 hours", "2 hours"], freq=tdi_freq) + dti = Timestamp("2018-03-07 17:16:40") + tdi + pi = dti.to_period(pi_freq) + + # TODO: parametrize over box for pi? + td64obj = tm.box_expected(tdi, box) + + if pi_freq == "H": + result = pi - td64obj + expected = (pi.to_timestamp("S") - tdi).to_period(pi_freq) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + # Subtract from scalar + result = pi[0] - td64obj + expected = (pi[0].to_timestamp("S") - tdi).to_period(pi_freq) + expected = tm.box_expected(expected, box) + tm.assert_equal(result, expected) + + elif pi_freq == "D": + # Tick, but non-compatible + msg = "Input has different freq=None from PeriodArray" + with pytest.raises(IncompatibleFrequency, match=msg): + pi - td64obj + with pytest.raises(IncompatibleFrequency, match=msg): + pi[0] - td64obj + + else: + # With non-Tick freq, we could not add timedelta64 array regardless + # of what its resolution is + msg = "Cannot add or subtract timedelta64" + with pytest.raises(TypeError, match=msg): + pi - td64obj + with pytest.raises(TypeError, match=msg): + pi[0] - td64obj + + # ----------------------------------------------------------------- + # operations with array/Index of DateOffset objects + + @pytest.mark.parametrize("box", [np.array, pd.Index]) + def test_pi_add_offset_array(self, box): + # GH#18849 + pi = PeriodIndex([Period("2015Q1"), Period("2016Q2")]) + offs = box( + [ + pd.offsets.QuarterEnd(n=1, startingMonth=12), + pd.offsets.QuarterEnd(n=-2, startingMonth=12), + ] + ) + expected = PeriodIndex([Period("2015Q2"), Period("2015Q4")]) + + with tm.assert_produces_warning(PerformanceWarning): + res = pi + offs + tm.assert_index_equal(res, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = offs + pi + tm.assert_index_equal(res2, expected) + + unanchored = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) + # addition/subtraction ops with incompatible offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + msg = r"Input cannot be converted to Period\(freq=Q-DEC\)" + with pytest.raises(IncompatibleFrequency, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + pi + unanchored + with pytest.raises(IncompatibleFrequency, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + unanchored + pi + + @pytest.mark.parametrize("box", [np.array, pd.Index]) + def test_pi_sub_offset_array(self, box): + # GH#18824 + pi = PeriodIndex([Period("2015Q1"), Period("2016Q2")]) + other = box( + [ + pd.offsets.QuarterEnd(n=1, startingMonth=12), + pd.offsets.QuarterEnd(n=-2, startingMonth=12), + ] + ) + + expected = PeriodIndex([pi[n] - other[n] for n in range(len(pi))]) + + with tm.assert_produces_warning(PerformanceWarning): + res = pi - other + tm.assert_index_equal(res, expected) + + anchored = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + + # addition/subtraction ops with anchored offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + msg = r"Input has different freq=-1M from Period\(freq=Q-DEC\)" + with pytest.raises(IncompatibleFrequency, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + pi - anchored + with pytest.raises(IncompatibleFrequency, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + anchored - pi + + def test_pi_add_iadd_int(self, one): + # Variants of `one` for #19012 + rng = period_range("2000-01-01 09:00", freq="H", periods=10) + result = rng + one + expected = period_range("2000-01-01 10:00", freq="H", periods=10) + tm.assert_index_equal(result, expected) + rng += one + tm.assert_index_equal(rng, expected) + + def test_pi_sub_isub_int(self, one): + """ + PeriodIndex.__sub__ and __isub__ with several representations of + the integer 1, e.g. int, np.int64, np.uint8, ... + """ + rng = period_range("2000-01-01 09:00", freq="H", periods=10) + result = rng - one + expected = period_range("2000-01-01 08:00", freq="H", periods=10) + tm.assert_index_equal(result, expected) + rng -= one + tm.assert_index_equal(rng, expected) + + @pytest.mark.parametrize("five", [5, np.array(5, dtype=np.int64)]) + def test_pi_sub_intlike(self, five): + rng = period_range("2007-01", periods=50) + + result = rng - five + exp = rng + (-five) + tm.assert_index_equal(result, exp) + + def test_pi_sub_isub_offset(self): + # offset + # DateOffset + rng = period_range("2014", "2024", freq="A") + result = rng - pd.offsets.YearEnd(5) + expected = period_range("2009", "2019", freq="A") + tm.assert_index_equal(result, expected) + rng -= pd.offsets.YearEnd(5) + tm.assert_index_equal(rng, expected) + + rng = period_range("2014-01", "2016-12", freq="M") + result = rng - pd.offsets.MonthEnd(5) + expected = period_range("2013-08", "2016-07", freq="M") + tm.assert_index_equal(result, expected) + + rng -= pd.offsets.MonthEnd(5) + tm.assert_index_equal(rng, expected) + + @pytest.mark.parametrize("transpose", [True, False]) + def test_pi_add_offset_n_gt1(self, box_with_array, transpose): + # GH#23215 + # add offset to PeriodIndex with freq.n > 1 + + per = Period("2016-01", freq="2M") + pi = PeriodIndex([per]) + + expected = PeriodIndex(["2016-03"], freq="2M") + + pi = tm.box_expected(pi, box_with_array, transpose=transpose) + expected = tm.box_expected(expected, box_with_array, transpose=transpose) + + result = pi + per.freq + tm.assert_equal(result, expected) + + result = per.freq + pi + tm.assert_equal(result, expected) + + def test_pi_add_offset_n_gt1_not_divisible(self, box_with_array): + # GH#23215 + # PeriodIndex with freq.n > 1 add offset with offset.n % freq.n != 0 + pi = PeriodIndex(["2016-01"], freq="2M") + expected = PeriodIndex(["2016-04"], freq="2M") + + pi = tm.box_expected(pi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = pi + to_offset("3M") + tm.assert_equal(result, expected) + + result = to_offset("3M") + pi + tm.assert_equal(result, expected) + + # --------------------------------------------------------------- + # __add__/__sub__ with integer arrays + + @pytest.mark.parametrize("int_holder", [np.array, pd.Index]) + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_pi_add_intarray(self, int_holder, op): + # GH#19959 + pi = PeriodIndex([Period("2015Q1"), Period("NaT")]) + other = int_holder([4, -1]) + + result = op(pi, other) + expected = PeriodIndex([Period("2016Q1"), Period("NaT")]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("int_holder", [np.array, pd.Index]) + def test_pi_sub_intarray(self, int_holder): + # GH#19959 + pi = PeriodIndex([Period("2015Q1"), Period("NaT")]) + other = int_holder([4, -1]) + + result = pi - other + expected = PeriodIndex([Period("2014Q1"), Period("NaT")]) + tm.assert_index_equal(result, expected) + + msg = r"bad operand type for unary -: 'PeriodArray'" + with pytest.raises(TypeError, match=msg): + other - pi + + # --------------------------------------------------------------- + # Timedelta-like (timedelta, timedelta64, Timedelta, Tick) + # TODO: Some of these are misnomers because of non-Tick DateOffsets + + def test_parr_add_timedeltalike_minute_gt1(self, three_days, box_with_array): + # GH#23031 adding a time-delta-like offset to a PeriodArray that has + # minute frequency with n != 1. A more general case is tested below + # in test_pi_add_timedeltalike_tick_gt1, but here we write out the + # expected result more explicitly. + other = three_days + rng = period_range("2014-05-01", periods=3, freq="2D") + rng = tm.box_expected(rng, box_with_array) + + expected = PeriodIndex(["2014-05-04", "2014-05-06", "2014-05-08"], freq="2D") + expected = tm.box_expected(expected, box_with_array) + + result = rng + other + tm.assert_equal(result, expected) + + result = other + rng + tm.assert_equal(result, expected) + + # subtraction + expected = PeriodIndex(["2014-04-28", "2014-04-30", "2014-05-02"], freq="2D") + expected = tm.box_expected(expected, box_with_array) + result = rng - other + tm.assert_equal(result, expected) + + msg = "|".join( + [ + r"bad operand type for unary -: 'PeriodArray'", + r"cannot subtract PeriodArray from timedelta64\[[hD]\]", + ] + ) + with pytest.raises(TypeError, match=msg): + other - rng + + @pytest.mark.parametrize("freqstr", ["5ns", "5us", "5ms", "5s", "5T", "5h", "5d"]) + def test_parr_add_timedeltalike_tick_gt1(self, three_days, freqstr, box_with_array): + # GH#23031 adding a time-delta-like offset to a PeriodArray that has + # tick-like frequency with n != 1 + other = three_days + rng = period_range("2014-05-01", periods=6, freq=freqstr) + first = rng[0] + rng = tm.box_expected(rng, box_with_array) + + expected = period_range(first + other, periods=6, freq=freqstr) + expected = tm.box_expected(expected, box_with_array) + + result = rng + other + tm.assert_equal(result, expected) + + result = other + rng + tm.assert_equal(result, expected) + + # subtraction + expected = period_range(first - other, periods=6, freq=freqstr) + expected = tm.box_expected(expected, box_with_array) + result = rng - other + tm.assert_equal(result, expected) + msg = "|".join( + [ + r"bad operand type for unary -: 'PeriodArray'", + r"cannot subtract PeriodArray from timedelta64\[[hD]\]", + ] + ) + with pytest.raises(TypeError, match=msg): + other - rng + + def test_pi_add_iadd_timedeltalike_daily(self, three_days): + # Tick + other = three_days + rng = period_range("2014-05-01", "2014-05-15", freq="D") + expected = period_range("2014-05-04", "2014-05-18", freq="D") + + result = rng + other + tm.assert_index_equal(result, expected) + + rng += other + tm.assert_index_equal(rng, expected) + + def test_pi_sub_isub_timedeltalike_daily(self, three_days): + # Tick-like 3 Days + other = three_days + rng = period_range("2014-05-01", "2014-05-15", freq="D") + expected = period_range("2014-04-28", "2014-05-12", freq="D") + + result = rng - other + tm.assert_index_equal(result, expected) + + rng -= other + tm.assert_index_equal(rng, expected) + + def test_parr_add_sub_timedeltalike_freq_mismatch_daily( + self, not_daily, box_with_array + ): + other = not_daily + rng = period_range("2014-05-01", "2014-05-15", freq="D") + rng = tm.box_expected(rng, box_with_array) + + msg = "Input has different freq(=.+)? from Period.*?\\(freq=D\\)" + with pytest.raises(IncompatibleFrequency, match=msg): + rng + other + with pytest.raises(IncompatibleFrequency, match=msg): + rng += other + with pytest.raises(IncompatibleFrequency, match=msg): + rng - other + with pytest.raises(IncompatibleFrequency, match=msg): + rng -= other + + def test_pi_add_iadd_timedeltalike_hourly(self, two_hours): + other = two_hours + rng = period_range("2014-01-01 10:00", "2014-01-05 10:00", freq="H") + expected = period_range("2014-01-01 12:00", "2014-01-05 12:00", freq="H") + + result = rng + other + tm.assert_index_equal(result, expected) + + rng += other + tm.assert_index_equal(rng, expected) + + def test_parr_add_timedeltalike_mismatched_freq_hourly( + self, not_hourly, box_with_array + ): + other = not_hourly + rng = period_range("2014-01-01 10:00", "2014-01-05 10:00", freq="H") + rng = tm.box_expected(rng, box_with_array) + msg = "Input has different freq(=.+)? from Period.*?\\(freq=H\\)" + + with pytest.raises(IncompatibleFrequency, match=msg): + rng + other + + with pytest.raises(IncompatibleFrequency, match=msg): + rng += other + + def test_pi_sub_isub_timedeltalike_hourly(self, two_hours): + other = two_hours + rng = period_range("2014-01-01 10:00", "2014-01-05 10:00", freq="H") + expected = period_range("2014-01-01 08:00", "2014-01-05 08:00", freq="H") + + result = rng - other + tm.assert_index_equal(result, expected) + + rng -= other + tm.assert_index_equal(rng, expected) + + def test_add_iadd_timedeltalike_annual(self): + # offset + # DateOffset + rng = period_range("2014", "2024", freq="A") + result = rng + pd.offsets.YearEnd(5) + expected = period_range("2019", "2029", freq="A") + tm.assert_index_equal(result, expected) + rng += pd.offsets.YearEnd(5) + tm.assert_index_equal(rng, expected) + + def test_pi_add_sub_timedeltalike_freq_mismatch_annual(self, mismatched_freq): + other = mismatched_freq + rng = period_range("2014", "2024", freq="A") + msg = "Input has different freq(=.+)? from Period.*?\\(freq=A-DEC\\)" + with pytest.raises(IncompatibleFrequency, match=msg): + rng + other + with pytest.raises(IncompatibleFrequency, match=msg): + rng += other + with pytest.raises(IncompatibleFrequency, match=msg): + rng - other + with pytest.raises(IncompatibleFrequency, match=msg): + rng -= other + + def test_pi_add_iadd_timedeltalike_M(self): + rng = period_range("2014-01", "2016-12", freq="M") + expected = period_range("2014-06", "2017-05", freq="M") + + result = rng + pd.offsets.MonthEnd(5) + tm.assert_index_equal(result, expected) + + rng += pd.offsets.MonthEnd(5) + tm.assert_index_equal(rng, expected) + + def test_pi_add_sub_timedeltalike_freq_mismatch_monthly(self, mismatched_freq): + other = mismatched_freq + rng = period_range("2014-01", "2016-12", freq="M") + msg = "Input has different freq(=.+)? from Period.*?\\(freq=M\\)" + with pytest.raises(IncompatibleFrequency, match=msg): + rng + other + with pytest.raises(IncompatibleFrequency, match=msg): + rng += other + with pytest.raises(IncompatibleFrequency, match=msg): + rng - other + with pytest.raises(IncompatibleFrequency, match=msg): + rng -= other + + @pytest.mark.parametrize("transpose", [True, False]) + def test_parr_add_sub_td64_nat(self, box_with_array, transpose): + # GH#23320 special handling for timedelta64("NaT") + pi = period_range("1994-04-01", periods=9, freq="19D") + other = np.timedelta64("NaT") + expected = PeriodIndex(["NaT"] * 9, freq="19D") + + obj = tm.box_expected(pi, box_with_array, transpose=transpose) + expected = tm.box_expected(expected, box_with_array, transpose=transpose) + + result = obj + other + tm.assert_equal(result, expected) + result = other + obj + tm.assert_equal(result, expected) + result = obj - other + tm.assert_equal(result, expected) + msg = r"cannot subtract .* from .*" + with pytest.raises(TypeError, match=msg): + other - obj + + @pytest.mark.parametrize( + "other", + [ + np.array(["NaT"] * 9, dtype="m8[ns]"), + TimedeltaArray._from_sequence(["NaT"] * 9), + ], + ) + def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other): + pi = period_range("1994-04-01", periods=9, freq="19D") + expected = PeriodIndex(["NaT"] * 9, freq="19D") + + obj = tm.box_expected(pi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = obj + other + tm.assert_equal(result, expected) + result = other + obj + tm.assert_equal(result, expected) + result = obj - other + tm.assert_equal(result, expected) + msg = r"cannot subtract .* from .*" + with pytest.raises(TypeError, match=msg): + other - obj + + # --------------------------------------------------------------- + # Unsorted + + def test_parr_add_sub_index(self): + # Check that PeriodArray defers to Index on arithmetic ops + pi = period_range("2000-12-31", periods=3) + parr = pi.array + + result = parr - pi + expected = pi - pi + tm.assert_index_equal(result, expected) + + def test_parr_add_sub_object_array(self): + pi = period_range("2000-12-31", periods=3, freq="D") + parr = pi.array + + other = np.array([Timedelta(days=1), pd.offsets.Day(2), 3]) + + with tm.assert_produces_warning(PerformanceWarning): + result = parr + other + + expected = PeriodIndex( + ["2001-01-01", "2001-01-03", "2001-01-05"], freq="D" + ).array + tm.assert_equal(result, expected) + + with tm.assert_produces_warning(PerformanceWarning): + result = parr - other + + expected = PeriodIndex(["2000-12-30"] * 3, freq="D").array + tm.assert_equal(result, expected) + + +class TestPeriodSeriesArithmetic: + def test_parr_add_timedeltalike_scalar(self, three_days, box_with_array): + # GH#13043 + ser = Series( + [Period("2015-01-01", freq="D"), Period("2015-01-02", freq="D")], + name="xxx", + ) + assert ser.dtype == "Period[D]" + + expected = Series( + [Period("2015-01-04", freq="D"), Period("2015-01-05", freq="D")], + name="xxx", + ) + + obj = tm.box_expected(ser, box_with_array) + if box_with_array is pd.DataFrame: + assert (obj.dtypes == "Period[D]").all() + + expected = tm.box_expected(expected, box_with_array) + + result = obj + three_days + tm.assert_equal(result, expected) + + result = three_days + obj + tm.assert_equal(result, expected) + + def test_ops_series_period(self): + # GH#13043 + ser = Series( + [Period("2015-01-01", freq="D"), Period("2015-01-02", freq="D")], + name="xxx", + ) + assert ser.dtype == "Period[D]" + + per = Period("2015-01-10", freq="D") + off = per.freq + # dtype will be object because of original dtype + expected = Series([9 * off, 8 * off], name="xxx", dtype=object) + tm.assert_series_equal(per - ser, expected) + tm.assert_series_equal(ser - per, -1 * expected) + + s2 = Series( + [Period("2015-01-05", freq="D"), Period("2015-01-04", freq="D")], + name="xxx", + ) + assert s2.dtype == "Period[D]" + + expected = Series([4 * off, 2 * off], name="xxx", dtype=object) + tm.assert_series_equal(s2 - ser, expected) + tm.assert_series_equal(ser - s2, -1 * expected) + + +class TestPeriodIndexSeriesMethods: + """Test PeriodIndex and Period Series Ops consistency""" + + def _check(self, values, func, expected): + idx = PeriodIndex(values) + result = func(idx) + tm.assert_equal(result, expected) + + ser = Series(values) + result = func(ser) + + exp = Series(expected, name=values.name) + tm.assert_series_equal(result, exp) + + def test_pi_ops(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "2011-03", "2011-04"], freq="M", name="idx" + ) + + expected = PeriodIndex( + ["2011-03", "2011-04", "2011-05", "2011-06"], freq="M", name="idx" + ) + + self._check(idx, lambda x: x + 2, expected) + self._check(idx, lambda x: 2 + x, expected) + + self._check(idx + 2, lambda x: x - 2, idx) + + result = idx - Period("2011-01", freq="M") + off = idx.freq + exp = pd.Index([0 * off, 1 * off, 2 * off, 3 * off], name="idx") + tm.assert_index_equal(result, exp) + + result = Period("2011-01", freq="M") - idx + exp = pd.Index([0 * off, -1 * off, -2 * off, -3 * off], name="idx") + tm.assert_index_equal(result, exp) + + @pytest.mark.parametrize("ng", ["str", 1.5]) + @pytest.mark.parametrize( + "func", + [ + lambda obj, ng: obj + ng, + lambda obj, ng: ng + obj, + lambda obj, ng: obj - ng, + lambda obj, ng: ng - obj, + lambda obj, ng: np.add(obj, ng), + lambda obj, ng: np.add(ng, obj), + lambda obj, ng: np.subtract(obj, ng), + lambda obj, ng: np.subtract(ng, obj), + ], + ) + def test_parr_ops_errors(self, ng, func, box_with_array): + idx = PeriodIndex( + ["2011-01", "2011-02", "2011-03", "2011-04"], freq="M", name="idx" + ) + obj = tm.box_expected(idx, box_with_array) + msg = "|".join( + [ + r"unsupported operand type\(s\)", + "can only concatenate", + r"must be str", + "object to str implicitly", + ] + ) + + with pytest.raises(TypeError, match=msg): + func(obj, ng) + + def test_pi_ops_nat(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + expected = PeriodIndex( + ["2011-03", "2011-04", "NaT", "2011-06"], freq="M", name="idx" + ) + + self._check(idx, lambda x: x + 2, expected) + self._check(idx, lambda x: 2 + x, expected) + self._check(idx, lambda x: np.add(x, 2), expected) + + self._check(idx + 2, lambda x: x - 2, idx) + self._check(idx + 2, lambda x: np.subtract(x, 2), idx) + + # freq with mult + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="2M", name="idx" + ) + expected = PeriodIndex( + ["2011-07", "2011-08", "NaT", "2011-10"], freq="2M", name="idx" + ) + + self._check(idx, lambda x: x + 3, expected) + self._check(idx, lambda x: 3 + x, expected) + self._check(idx, lambda x: np.add(x, 3), expected) + + self._check(idx + 3, lambda x: x - 3, idx) + self._check(idx + 3, lambda x: np.subtract(x, 3), idx) + + def test_pi_ops_array_int(self): + + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + f = lambda x: x + np.array([1, 2, 3, 4]) + exp = PeriodIndex( + ["2011-02", "2011-04", "NaT", "2011-08"], freq="M", name="idx" + ) + self._check(idx, f, exp) + + f = lambda x: np.add(x, np.array([4, -1, 1, 2])) + exp = PeriodIndex( + ["2011-05", "2011-01", "NaT", "2011-06"], freq="M", name="idx" + ) + self._check(idx, f, exp) + + f = lambda x: x - np.array([1, 2, 3, 4]) + exp = PeriodIndex( + ["2010-12", "2010-12", "NaT", "2010-12"], freq="M", name="idx" + ) + self._check(idx, f, exp) + + f = lambda x: np.subtract(x, np.array([3, 2, 3, -2])) + exp = PeriodIndex( + ["2010-10", "2010-12", "NaT", "2011-06"], freq="M", name="idx" + ) + self._check(idx, f, exp) + + def test_pi_ops_offset(self): + idx = PeriodIndex( + ["2011-01-01", "2011-02-01", "2011-03-01", "2011-04-01"], + freq="D", + name="idx", + ) + f = lambda x: x + pd.offsets.Day() + exp = PeriodIndex( + ["2011-01-02", "2011-02-02", "2011-03-02", "2011-04-02"], + freq="D", + name="idx", + ) + self._check(idx, f, exp) + + f = lambda x: x + pd.offsets.Day(2) + exp = PeriodIndex( + ["2011-01-03", "2011-02-03", "2011-03-03", "2011-04-03"], + freq="D", + name="idx", + ) + self._check(idx, f, exp) + + f = lambda x: x - pd.offsets.Day(2) + exp = PeriodIndex( + ["2010-12-30", "2011-01-30", "2011-02-27", "2011-03-30"], + freq="D", + name="idx", + ) + self._check(idx, f, exp) + + def test_pi_offset_errors(self): + idx = PeriodIndex( + ["2011-01-01", "2011-02-01", "2011-03-01", "2011-04-01"], + freq="D", + name="idx", + ) + ser = Series(idx) + + # Series op is applied per Period instance, thus error is raised + # from Period + for obj in [idx, ser]: + msg = r"Input has different freq=2H from Period.*?\(freq=D\)" + with pytest.raises(IncompatibleFrequency, match=msg): + obj + pd.offsets.Hour(2) + + with pytest.raises(IncompatibleFrequency, match=msg): + pd.offsets.Hour(2) + obj + + msg = r"Input has different freq=-2H from Period.*?\(freq=D\)" + with pytest.raises(IncompatibleFrequency, match=msg): + obj - pd.offsets.Hour(2) + + def test_pi_sub_period(self): + # GH#13071 + idx = PeriodIndex( + ["2011-01", "2011-02", "2011-03", "2011-04"], freq="M", name="idx" + ) + + result = idx - Period("2012-01", freq="M") + off = idx.freq + exp = pd.Index([-12 * off, -11 * off, -10 * off, -9 * off], name="idx") + tm.assert_index_equal(result, exp) + + result = np.subtract(idx, Period("2012-01", freq="M")) + tm.assert_index_equal(result, exp) + + result = Period("2012-01", freq="M") - idx + exp = pd.Index([12 * off, 11 * off, 10 * off, 9 * off], name="idx") + tm.assert_index_equal(result, exp) + + result = np.subtract(Period("2012-01", freq="M"), idx) + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name="idx") + result = idx - Period("NaT", freq="M") + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + result = Period("NaT", freq="M") - idx + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + def test_pi_sub_pdnat(self): + # GH#13071 + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + exp = TimedeltaIndex([pd.NaT] * 4, name="idx") + tm.assert_index_equal(pd.NaT - idx, exp) + tm.assert_index_equal(idx - pd.NaT, exp) + + def test_pi_sub_period_nat(self): + # GH#13071 + idx = PeriodIndex( + ["2011-01", "NaT", "2011-03", "2011-04"], freq="M", name="idx" + ) + + result = idx - Period("2012-01", freq="M") + off = idx.freq + exp = pd.Index([-12 * off, pd.NaT, -10 * off, -9 * off], name="idx") + tm.assert_index_equal(result, exp) + + result = Period("2012-01", freq="M") - idx + exp = pd.Index([12 * off, pd.NaT, 10 * off, 9 * off], name="idx") + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name="idx") + tm.assert_index_equal(idx - Period("NaT", freq="M"), exp) + tm.assert_index_equal(Period("NaT", freq="M") - idx, exp) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_timedelta64.py b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_timedelta64.py new file mode 100644 index 0000000..5435318 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arithmetic/test_timedelta64.py @@ -0,0 +1,2112 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas.errors import ( + OutOfBoundsDatetime, + PerformanceWarning, +) + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + NaT, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, + offsets, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) +from pandas.tests.arithmetic.common import ( + assert_invalid_addsub_type, + assert_invalid_comparison, + get_upcast_box, +) + + +def assert_dtype(obj, expected_dtype): + """ + Helper to check the dtype for a Series, Index, or single-column DataFrame. + """ + dtype = tm.get_dtype(obj) + + assert dtype == expected_dtype + + +def get_expected_name(box, names): + if box is DataFrame: + # Since we are operating with a DataFrame and a non-DataFrame, + # the non-DataFrame is cast to Series and its name ignored. + exname = names[0] + elif box in [tm.to_array, pd.array]: + exname = names[1] + else: + exname = names[2] + return exname + + +# ------------------------------------------------------------------ +# Timedelta64[ns] dtype Comparisons + + +class TestTimedelta64ArrayLikeComparisons: + # Comparison tests for timedelta64[ns] vectors fully parametrized over + # DataFrame/Series/TimedeltaIndex/TimedeltaArray. Ideally all comparison + # tests will eventually end up here. + + def test_compare_timedelta64_zerodim(self, box_with_array): + # GH#26689 should unbox when comparing with zerodim array + box = box_with_array + xbox = ( + box_with_array if box_with_array not in [pd.Index, pd.array] else np.ndarray + ) + + tdi = timedelta_range("2H", periods=4) + other = np.array(tdi.to_numpy()[0]) + + tdi = tm.box_expected(tdi, box) + res = tdi <= other + expected = np.array([True, False, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(res, expected) + + @pytest.mark.parametrize( + "td_scalar", + [ + timedelta(days=1), + Timedelta(days=1), + Timedelta(days=1).to_timedelta64(), + offsets.Hour(24), + ], + ) + def test_compare_timedeltalike_scalar(self, box_with_array, td_scalar): + # regression test for GH#5963 + box = box_with_array + xbox = box if box not in [pd.Index, pd.array] else np.ndarray + + ser = Series([timedelta(days=1), timedelta(days=2)]) + ser = tm.box_expected(ser, box) + actual = ser > td_scalar + expected = Series([False, True]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(actual, expected) + + @pytest.mark.parametrize( + "invalid", + [ + 345600000000000, + "a", + Timestamp("2021-01-01"), + Timestamp("2021-01-01").now("UTC"), + Timestamp("2021-01-01").now().to_datetime64(), + Timestamp("2021-01-01").now().to_pydatetime(), + Timestamp("2021-01-01").date(), + np.array(4), # zero-dim mismatched dtype + ], + ) + def test_td64_comparisons_invalid(self, box_with_array, invalid): + # GH#13624 for str + box = box_with_array + + rng = timedelta_range("1 days", periods=10) + obj = tm.box_expected(rng, box) + + assert_invalid_comparison(obj, invalid, box) + + @pytest.mark.parametrize( + "other", + [ + list(range(10)), + np.arange(10), + np.arange(10).astype(np.float32), + np.arange(10).astype(object), + pd.date_range("1970-01-01", periods=10, tz="UTC").array, + np.array(pd.date_range("1970-01-01", periods=10)), + list(pd.date_range("1970-01-01", periods=10)), + pd.date_range("1970-01-01", periods=10).astype(object), + pd.period_range("1971-01-01", freq="D", periods=10).array, + pd.period_range("1971-01-01", freq="D", periods=10).astype(object), + ], + ) + def test_td64arr_cmp_arraylike_invalid(self, other, box_with_array): + # We don't parametrize this over box_with_array because listlike + # other plays poorly with assert_invalid_comparison reversed checks + + rng = timedelta_range("1 days", periods=10)._data + rng = tm.box_expected(rng, box_with_array) + assert_invalid_comparison(rng, other, box_with_array) + + def test_td64arr_cmp_mixed_invalid(self): + rng = timedelta_range("1 days", periods=5)._data + other = np.array([0, 1, 2, rng[3], Timestamp("2021-01-01")]) + + result = rng == other + expected = np.array([False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = rng != other + tm.assert_numpy_array_equal(result, ~expected) + + msg = "Invalid comparison between|Cannot compare type|not supported between" + with pytest.raises(TypeError, match=msg): + rng < other + with pytest.raises(TypeError, match=msg): + rng > other + with pytest.raises(TypeError, match=msg): + rng <= other + with pytest.raises(TypeError, match=msg): + rng >= other + + +class TestTimedelta64ArrayComparisons: + # TODO: All of these need to be parametrized over box + + @pytest.mark.parametrize("dtype", [None, object]) + def test_comp_nat(self, dtype): + left = TimedeltaIndex([Timedelta("1 days"), NaT, Timedelta("3 days")]) + right = TimedeltaIndex([NaT, NaT, Timedelta("3 days")]) + + lhs, rhs = left, right + if dtype is object: + lhs, rhs = left.astype(object), right.astype(object) + + result = rhs == lhs + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = rhs != lhs + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs == NaT, expected) + tm.assert_numpy_array_equal(NaT == rhs, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(lhs != NaT, expected) + tm.assert_numpy_array_equal(NaT != lhs, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs < NaT, expected) + tm.assert_numpy_array_equal(NaT > lhs, expected) + + def test_comparisons_nat(self): + tdidx1 = TimedeltaIndex( + [ + "1 day", + NaT, + "1 day 00:00:01", + NaT, + "1 day 00:00:01", + "5 day 00:00:03", + ] + ) + tdidx2 = TimedeltaIndex( + ["2 day", "2 day", NaT, NaT, "1 day 00:00:02", "5 days 00:00:03"] + ) + tdarr = np.array( + [ + np.timedelta64(2, "D"), + np.timedelta64(2, "D"), + np.timedelta64("nat"), + np.timedelta64("nat"), + np.timedelta64(1, "D") + np.timedelta64(2, "s"), + np.timedelta64(5, "D") + np.timedelta64(3, "s"), + ] + ) + + cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] + + # Check pd.NaT is handles as the same as np.nan + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + # TODO: better name + def test_comparisons_coverage(self): + rng = timedelta_range("1 days", periods=10) + + result = rng < rng[3] + expected = np.array([True, True, True] + [False] * 7) + tm.assert_numpy_array_equal(result, expected) + + result = rng == list(rng) + exp = rng == rng + tm.assert_numpy_array_equal(result, exp) + + +# ------------------------------------------------------------------ +# Timedelta64[ns] dtype Arithmetic Operations + + +class TestTimedelta64ArithmeticUnsorted: + # Tests moved from type-specific test files but not + # yet sorted/parametrized/de-duplicated + + def test_ufunc_coercions(self): + # normal ops are also tested in tseries/test_timedeltas.py + idx = TimedeltaIndex(["2H", "4H", "6H", "8H", "10H"], freq="2H", name="x") + + for result in [idx * 2, np.multiply(idx, 2)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex(["4H", "8H", "12H", "16H", "20H"], freq="4H", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "4H" + + for result in [idx / 2, np.divide(idx, 2)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex(["1H", "2H", "3H", "4H", "5H"], freq="H", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "H" + + for result in [-idx, np.negative(idx)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex( + ["-2H", "-4H", "-6H", "-8H", "-10H"], freq="-2H", name="x" + ) + tm.assert_index_equal(result, exp) + assert result.freq == "-2H" + + idx = TimedeltaIndex(["-2H", "-1H", "0H", "1H", "2H"], freq="H", name="x") + for result in [abs(idx), np.absolute(idx)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex(["2H", "1H", "0H", "1H", "2H"], freq=None, name="x") + tm.assert_index_equal(result, exp) + assert result.freq is None + + def test_subtraction_ops(self): + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo") + dti = pd.date_range("20130101", periods=3, name="bar") + td = Timedelta("1 days") + dt = Timestamp("20130101") + + msg = "cannot subtract a datelike from a TimedeltaArray" + with pytest.raises(TypeError, match=msg): + tdi - dt + with pytest.raises(TypeError, match=msg): + tdi - dti + + msg = r"unsupported operand type\(s\) for -" + with pytest.raises(TypeError, match=msg): + td - dt + + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + td - dti + + result = dt - dti + expected = TimedeltaIndex(["0 days", "-1 days", "-2 days"], name="bar") + tm.assert_index_equal(result, expected) + + result = dti - dt + expected = TimedeltaIndex(["0 days", "1 days", "2 days"], name="bar") + tm.assert_index_equal(result, expected) + + result = tdi - td + expected = TimedeltaIndex(["0 days", NaT, "1 days"], name="foo") + tm.assert_index_equal(result, expected, check_names=False) + + result = td - tdi + expected = TimedeltaIndex(["0 days", NaT, "-1 days"], name="foo") + tm.assert_index_equal(result, expected, check_names=False) + + result = dti - td + expected = DatetimeIndex( + ["20121231", "20130101", "20130102"], freq="D", name="bar" + ) + tm.assert_index_equal(result, expected, check_names=False) + + result = dt - tdi + expected = DatetimeIndex(["20121231", NaT, "20121230"], name="foo") + tm.assert_index_equal(result, expected) + + def test_subtraction_ops_with_tz(self, box_with_array): + + # check that dt/dti subtraction ops with tz are validated + dti = pd.date_range("20130101", periods=3) + dti = tm.box_expected(dti, box_with_array) + ts = Timestamp("20130101") + dt = ts.to_pydatetime() + dti_tz = pd.date_range("20130101", periods=3).tz_localize("US/Eastern") + dti_tz = tm.box_expected(dti_tz, box_with_array) + ts_tz = Timestamp("20130101").tz_localize("US/Eastern") + ts_tz2 = Timestamp("20130101").tz_localize("CET") + dt_tz = ts_tz.to_pydatetime() + td = Timedelta("1 days") + + def _check(result, expected): + assert result == expected + assert isinstance(result, Timedelta) + + # scalars + result = ts - ts + expected = Timedelta("0 days") + _check(result, expected) + + result = dt_tz - ts_tz + expected = Timedelta("0 days") + _check(result, expected) + + result = ts_tz - dt_tz + expected = Timedelta("0 days") + _check(result, expected) + + # tz mismatches + msg = "Cannot subtract tz-naive and tz-aware datetime-like objects." + with pytest.raises(TypeError, match=msg): + dt_tz - ts + msg = "can't subtract offset-naive and offset-aware datetimes" + with pytest.raises(TypeError, match=msg): + dt_tz - dt + msg = "can't subtract offset-naive and offset-aware datetimes" + with pytest.raises(TypeError, match=msg): + dt - dt_tz + msg = "Cannot subtract tz-naive and tz-aware datetime-like objects." + with pytest.raises(TypeError, match=msg): + ts - dt_tz + with pytest.raises(TypeError, match=msg): + ts_tz2 - ts + with pytest.raises(TypeError, match=msg): + ts_tz2 - dt + + msg = "Cannot subtract tz-naive and tz-aware" + # with dti + with pytest.raises(TypeError, match=msg): + dti - ts_tz + with pytest.raises(TypeError, match=msg): + dti_tz - ts + + result = dti_tz - dt_tz + expected = TimedeltaIndex(["0 days", "1 days", "2 days"]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + result = dt_tz - dti_tz + expected = TimedeltaIndex(["0 days", "-1 days", "-2 days"]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + result = dti_tz - ts_tz + expected = TimedeltaIndex(["0 days", "1 days", "2 days"]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + result = ts_tz - dti_tz + expected = TimedeltaIndex(["0 days", "-1 days", "-2 days"]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + result = td - td + expected = Timedelta("0 days") + _check(result, expected) + + result = dti_tz - td + expected = DatetimeIndex(["20121231", "20130101", "20130102"], tz="US/Eastern") + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + def test_dti_tdi_numeric_ops(self): + # These are normally union/diff set-like ops + tdi = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo") + dti = pd.date_range("20130101", periods=3, name="bar") + + result = tdi - tdi + expected = TimedeltaIndex(["0 days", NaT, "0 days"], name="foo") + tm.assert_index_equal(result, expected) + + result = tdi + tdi + expected = TimedeltaIndex(["2 days", NaT, "4 days"], name="foo") + tm.assert_index_equal(result, expected) + + result = dti - tdi # name will be reset + expected = DatetimeIndex(["20121231", NaT, "20130101"]) + tm.assert_index_equal(result, expected) + + def test_addition_ops(self): + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo") + dti = pd.date_range("20130101", periods=3, name="bar") + td = Timedelta("1 days") + dt = Timestamp("20130101") + + result = tdi + dt + expected = DatetimeIndex(["20130102", NaT, "20130103"], name="foo") + tm.assert_index_equal(result, expected) + + result = dt + tdi + expected = DatetimeIndex(["20130102", NaT, "20130103"], name="foo") + tm.assert_index_equal(result, expected) + + result = td + tdi + expected = TimedeltaIndex(["2 days", NaT, "3 days"], name="foo") + tm.assert_index_equal(result, expected) + + result = tdi + td + expected = TimedeltaIndex(["2 days", NaT, "3 days"], name="foo") + tm.assert_index_equal(result, expected) + + # unequal length + msg = "cannot add indices of unequal length" + with pytest.raises(ValueError, match=msg): + tdi + dti[0:1] + with pytest.raises(ValueError, match=msg): + tdi[0:1] + dti + + # random indexes + msg = "Addition/subtraction of integers and integer-arrays" + with pytest.raises(TypeError, match=msg): + tdi + Int64Index([1, 2, 3]) + + # this is a union! + # pytest.raises(TypeError, lambda : Int64Index([1,2,3]) + tdi) + + result = tdi + dti # name will be reset + expected = DatetimeIndex(["20130102", NaT, "20130105"]) + tm.assert_index_equal(result, expected) + + result = dti + tdi # name will be reset + expected = DatetimeIndex(["20130102", NaT, "20130105"]) + tm.assert_index_equal(result, expected) + + result = dt + td + expected = Timestamp("20130102") + assert result == expected + + result = td + dt + expected = Timestamp("20130102") + assert result == expected + + # TODO: Needs more informative name, probably split up into + # more targeted tests + @pytest.mark.parametrize("freq", ["D", "B"]) + def test_timedelta(self, freq): + index = pd.date_range("1/1/2000", periods=50, freq=freq) + + shifted = index + timedelta(1) + back = shifted + timedelta(-1) + back = back._with_freq("infer") + tm.assert_index_equal(index, back) + + if freq == "D": + expected = pd.tseries.offsets.Day(1) + assert index.freq == expected + assert shifted.freq == expected + assert back.freq == expected + else: # freq == 'B' + assert index.freq == pd.tseries.offsets.BusinessDay(1) + assert shifted.freq is None + assert back.freq == pd.tseries.offsets.BusinessDay(1) + + result = index - timedelta(1) + expected = index + timedelta(-1) + tm.assert_index_equal(result, expected) + + # GH#4134, buggy with timedeltas + rng = pd.date_range("2013", "2014") + s = Series(rng) + result1 = rng - offsets.Hour(1) + result2 = DatetimeIndex(s - np.timedelta64(100000000)) + result3 = rng - np.timedelta64(100000000) + result4 = DatetimeIndex(s - offsets.Hour(1)) + + assert result1.freq == rng.freq + result1 = result1._with_freq(None) + tm.assert_index_equal(result1, result4) + + assert result3.freq == rng.freq + result3 = result3._with_freq(None) + tm.assert_index_equal(result2, result3) + + def test_tda_add_sub_index(self): + # Check that TimedeltaArray defers to Index on arithmetic ops + tdi = TimedeltaIndex(["1 days", NaT, "2 days"]) + tda = tdi.array + + dti = pd.date_range("1999-12-31", periods=3, freq="D") + + result = tda + dti + expected = tdi + dti + tm.assert_index_equal(result, expected) + + result = tda + tdi + expected = tdi + tdi + tm.assert_index_equal(result, expected) + + result = tda - tdi + expected = tdi - tdi + tm.assert_index_equal(result, expected) + + def test_tda_add_dt64_object_array(self, box_with_array, tz_naive_fixture): + # Result should be cast back to DatetimeArray + box = box_with_array + + dti = pd.date_range("2016-01-01", periods=3, tz=tz_naive_fixture) + dti = dti._with_freq(None) + tdi = dti - dti + + obj = tm.box_expected(tdi, box) + other = tm.box_expected(dti, box) + + with tm.assert_produces_warning(PerformanceWarning): + result = obj + other.astype(object) + tm.assert_equal(result, other) + + # ------------------------------------------------------------- + # Binary operations TimedeltaIndex and timedelta-like + + def test_tdi_iadd_timedeltalike(self, two_hours, box_with_array): + # only test adding/sub offsets as + is now numeric + rng = timedelta_range("1 days", "10 days") + expected = timedelta_range("1 days 02:00:00", "10 days 02:00:00", freq="D") + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + orig_rng = rng + rng += two_hours + tm.assert_equal(rng, expected) + if box_with_array is not pd.Index: + # Check that operation is actually inplace + tm.assert_equal(orig_rng, expected) + + def test_tdi_isub_timedeltalike(self, two_hours, box_with_array): + # only test adding/sub offsets as - is now numeric + rng = timedelta_range("1 days", "10 days") + expected = timedelta_range("0 days 22:00:00", "9 days 22:00:00") + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + orig_rng = rng + rng -= two_hours + tm.assert_equal(rng, expected) + if box_with_array is not pd.Index: + # Check that operation is actually inplace + tm.assert_equal(orig_rng, expected) + + # ------------------------------------------------------------- + + def test_tdi_ops_attributes(self): + rng = timedelta_range("2 days", periods=5, freq="2D", name="x") + + result = rng + 1 * rng.freq + exp = timedelta_range("4 days", periods=5, freq="2D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "2D" + + result = rng - 2 * rng.freq + exp = timedelta_range("-2 days", periods=5, freq="2D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "2D" + + result = rng * 2 + exp = timedelta_range("4 days", periods=5, freq="4D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "4D" + + result = rng / 2 + exp = timedelta_range("1 days", periods=5, freq="D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "D" + + result = -rng + exp = timedelta_range("-2 days", periods=5, freq="-2D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "-2D" + + rng = timedelta_range("-2 days", periods=5, freq="D", name="x") + + result = abs(rng) + exp = TimedeltaIndex( + ["2 days", "1 days", "0 days", "1 days", "2 days"], name="x" + ) + tm.assert_index_equal(result, exp) + assert result.freq is None + + +class TestAddSubNaTMasking: + # TODO: parametrize over boxes + + def test_tdarr_add_timestamp_nat_masking(self, box_with_array): + # GH#17991 checking for overflow-masking with NaT + tdinat = pd.to_timedelta(["24658 days 11:15:00", "NaT"]) + tdobj = tm.box_expected(tdinat, box_with_array) + + tsneg = Timestamp("1950-01-01") + ts_neg_variants = [ + tsneg, + tsneg.to_pydatetime(), + tsneg.to_datetime64().astype("datetime64[ns]"), + tsneg.to_datetime64().astype("datetime64[D]"), + ] + + tspos = Timestamp("1980-01-01") + ts_pos_variants = [ + tspos, + tspos.to_pydatetime(), + tspos.to_datetime64().astype("datetime64[ns]"), + tspos.to_datetime64().astype("datetime64[D]"), + ] + + for variant in ts_neg_variants + ts_pos_variants: + res = tdobj + variant + if box_with_array is DataFrame: + assert res.iloc[1, 1] is NaT + else: + assert res[1] is NaT + + def test_tdi_add_overflow(self): + # See GH#14068 + # preliminary test scalar analogue of vectorized tests below + # TODO: Make raised error message more informative and test + with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): + pd.to_timedelta(106580, "D") + Timestamp("2000") + with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): + Timestamp("2000") + pd.to_timedelta(106580, "D") + + _NaT = NaT.value + 1 + msg = "Overflow in int64 addition" + with pytest.raises(OverflowError, match=msg): + pd.to_timedelta([106580], "D") + Timestamp("2000") + with pytest.raises(OverflowError, match=msg): + Timestamp("2000") + pd.to_timedelta([106580], "D") + with pytest.raises(OverflowError, match=msg): + pd.to_timedelta([_NaT]) - Timedelta("1 days") + with pytest.raises(OverflowError, match=msg): + pd.to_timedelta(["5 days", _NaT]) - Timedelta("1 days") + with pytest.raises(OverflowError, match=msg): + ( + pd.to_timedelta([_NaT, "5 days", "1 hours"]) + - pd.to_timedelta(["7 seconds", _NaT, "4 hours"]) + ) + + # These should not overflow! + exp = TimedeltaIndex([NaT]) + result = pd.to_timedelta([NaT]) - Timedelta("1 days") + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex(["4 days", NaT]) + result = pd.to_timedelta(["5 days", NaT]) - Timedelta("1 days") + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex([NaT, NaT, "5 hours"]) + result = pd.to_timedelta([NaT, "5 days", "1 hours"]) + pd.to_timedelta( + ["7 seconds", NaT, "4 hours"] + ) + tm.assert_index_equal(result, exp) + + +class TestTimedeltaArraylikeAddSubOps: + # Tests for timedelta64[ns] __add__, __sub__, __radd__, __rsub__ + + # TODO: moved from tests.indexes.timedeltas.test_arithmetic; needs + # parametrization+de-duplication + def test_timedelta_ops_with_missing_values(self): + # setup + s1 = pd.to_timedelta(Series(["00:00:01"])) + s2 = pd.to_timedelta(Series(["00:00:02"])) + + msg = r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]" + with pytest.raises(TypeError, match=msg): + # Passing datetime64-dtype data to TimedeltaIndex is no longer + # supported GH#29794 + pd.to_timedelta(Series([NaT])) # TODO: belongs elsewhere? + + sn = pd.to_timedelta(Series([NaT], dtype="m8[ns]")) + + df1 = DataFrame(["00:00:01"]).apply(pd.to_timedelta) + df2 = DataFrame(["00:00:02"]).apply(pd.to_timedelta) + with pytest.raises(TypeError, match=msg): + # Passing datetime64-dtype data to TimedeltaIndex is no longer + # supported GH#29794 + DataFrame([NaT]).apply(pd.to_timedelta) # TODO: belongs elsewhere? + + dfn = DataFrame([NaT.value]).apply(pd.to_timedelta) + + scalar1 = pd.to_timedelta("00:00:01") + scalar2 = pd.to_timedelta("00:00:02") + timedelta_NaT = pd.to_timedelta("NaT") + + actual = scalar1 + scalar1 + assert actual == scalar2 + actual = scalar2 - scalar1 + assert actual == scalar1 + + actual = s1 + s1 + tm.assert_series_equal(actual, s2) + actual = s2 - s1 + tm.assert_series_equal(actual, s1) + + actual = s1 + scalar1 + tm.assert_series_equal(actual, s2) + actual = scalar1 + s1 + tm.assert_series_equal(actual, s2) + actual = s2 - scalar1 + tm.assert_series_equal(actual, s1) + actual = -scalar1 + s2 + tm.assert_series_equal(actual, s1) + + actual = s1 + timedelta_NaT + tm.assert_series_equal(actual, sn) + actual = timedelta_NaT + s1 + tm.assert_series_equal(actual, sn) + actual = s1 - timedelta_NaT + tm.assert_series_equal(actual, sn) + actual = -timedelta_NaT + s1 + tm.assert_series_equal(actual, sn) + + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + s1 + np.nan + with pytest.raises(TypeError, match=msg): + np.nan + s1 + with pytest.raises(TypeError, match=msg): + s1 - np.nan + with pytest.raises(TypeError, match=msg): + -np.nan + s1 + + actual = s1 + NaT + tm.assert_series_equal(actual, sn) + actual = s2 - NaT + tm.assert_series_equal(actual, sn) + + actual = s1 + df1 + tm.assert_frame_equal(actual, df2) + actual = s2 - df1 + tm.assert_frame_equal(actual, df1) + actual = df1 + s1 + tm.assert_frame_equal(actual, df2) + actual = df2 - s1 + tm.assert_frame_equal(actual, df1) + + actual = df1 + df1 + tm.assert_frame_equal(actual, df2) + actual = df2 - df1 + tm.assert_frame_equal(actual, df1) + + actual = df1 + scalar1 + tm.assert_frame_equal(actual, df2) + actual = df2 - scalar1 + tm.assert_frame_equal(actual, df1) + + actual = df1 + timedelta_NaT + tm.assert_frame_equal(actual, dfn) + actual = df1 - timedelta_NaT + tm.assert_frame_equal(actual, dfn) + + msg = "cannot subtract a datelike from|unsupported operand type" + with pytest.raises(TypeError, match=msg): + df1 + np.nan + with pytest.raises(TypeError, match=msg): + df1 - np.nan + + actual = df1 + NaT # NaT is datetime, not timedelta + tm.assert_frame_equal(actual, dfn) + actual = df1 - NaT + tm.assert_frame_equal(actual, dfn) + + # TODO: moved from tests.series.test_operators, needs splitting, cleanup, + # de-duplication, box-parametrization... + def test_operators_timedelta64(self): + # series ops + v1 = pd.date_range("2012-1-1", periods=3, freq="D") + v2 = pd.date_range("2012-1-2", periods=3, freq="D") + rs = Series(v2) - Series(v1) + xp = Series(1e9 * 3600 * 24, rs.index).astype("int64").astype("timedelta64[ns]") + tm.assert_series_equal(rs, xp) + assert rs.dtype == "timedelta64[ns]" + + df = DataFrame({"A": v1}) + td = Series([timedelta(days=i) for i in range(3)]) + assert td.dtype == "timedelta64[ns]" + + # series on the rhs + result = df["A"] - df["A"].shift() + assert result.dtype == "timedelta64[ns]" + + result = df["A"] + td + assert result.dtype == "M8[ns]" + + # scalar Timestamp on rhs + maxa = df["A"].max() + assert isinstance(maxa, Timestamp) + + resultb = df["A"] - df["A"].max() + assert resultb.dtype == "timedelta64[ns]" + + # timestamp on lhs + result = resultb + df["A"] + values = [Timestamp("20111230"), Timestamp("20120101"), Timestamp("20120103")] + expected = Series(values, name="A") + tm.assert_series_equal(result, expected) + + # datetimes on rhs + result = df["A"] - datetime(2001, 1, 1) + expected = Series([timedelta(days=4017 + i) for i in range(3)], name="A") + tm.assert_series_equal(result, expected) + assert result.dtype == "m8[ns]" + + d = datetime(2001, 1, 1, 3, 4) + resulta = df["A"] - d + assert resulta.dtype == "m8[ns]" + + # roundtrip + resultb = resulta + d + tm.assert_series_equal(df["A"], resultb) + + # timedeltas on rhs + td = timedelta(days=1) + resulta = df["A"] + td + resultb = resulta - td + tm.assert_series_equal(resultb, df["A"]) + assert resultb.dtype == "M8[ns]" + + # roundtrip + td = timedelta(minutes=5, seconds=3) + resulta = df["A"] + td + resultb = resulta - td + tm.assert_series_equal(df["A"], resultb) + assert resultb.dtype == "M8[ns]" + + # inplace + value = rs[2] + np.timedelta64(timedelta(minutes=5, seconds=1)) + rs[2] += np.timedelta64(timedelta(minutes=5, seconds=1)) + assert rs[2] == value + + def test_timedelta64_ops_nat(self): + # GH 11349 + timedelta_series = Series([NaT, Timedelta("1s")]) + nat_series_dtype_timedelta = Series([NaT, NaT], dtype="timedelta64[ns]") + single_nat_dtype_timedelta = Series([NaT], dtype="timedelta64[ns]") + + # subtraction + tm.assert_series_equal(timedelta_series - NaT, nat_series_dtype_timedelta) + tm.assert_series_equal(-NaT + timedelta_series, nat_series_dtype_timedelta) + + tm.assert_series_equal( + timedelta_series - single_nat_dtype_timedelta, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + -single_nat_dtype_timedelta + timedelta_series, nat_series_dtype_timedelta + ) + + # addition + tm.assert_series_equal( + nat_series_dtype_timedelta + NaT, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + NaT + nat_series_dtype_timedelta, nat_series_dtype_timedelta + ) + + tm.assert_series_equal( + nat_series_dtype_timedelta + single_nat_dtype_timedelta, + nat_series_dtype_timedelta, + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + nat_series_dtype_timedelta, + nat_series_dtype_timedelta, + ) + + tm.assert_series_equal(timedelta_series + NaT, nat_series_dtype_timedelta) + tm.assert_series_equal(NaT + timedelta_series, nat_series_dtype_timedelta) + + tm.assert_series_equal( + timedelta_series + single_nat_dtype_timedelta, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + timedelta_series, nat_series_dtype_timedelta + ) + + tm.assert_series_equal( + nat_series_dtype_timedelta + NaT, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + NaT + nat_series_dtype_timedelta, nat_series_dtype_timedelta + ) + + tm.assert_series_equal( + nat_series_dtype_timedelta + single_nat_dtype_timedelta, + nat_series_dtype_timedelta, + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + nat_series_dtype_timedelta, + nat_series_dtype_timedelta, + ) + + # multiplication + tm.assert_series_equal( + nat_series_dtype_timedelta * 1.0, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + 1.0 * nat_series_dtype_timedelta, nat_series_dtype_timedelta + ) + + tm.assert_series_equal(timedelta_series * 1, timedelta_series) + tm.assert_series_equal(1 * timedelta_series, timedelta_series) + + tm.assert_series_equal(timedelta_series * 1.5, Series([NaT, Timedelta("1.5s")])) + tm.assert_series_equal(1.5 * timedelta_series, Series([NaT, Timedelta("1.5s")])) + + tm.assert_series_equal(timedelta_series * np.nan, nat_series_dtype_timedelta) + tm.assert_series_equal(np.nan * timedelta_series, nat_series_dtype_timedelta) + + # division + tm.assert_series_equal(timedelta_series / 2, Series([NaT, Timedelta("0.5s")])) + tm.assert_series_equal(timedelta_series / 2.0, Series([NaT, Timedelta("0.5s")])) + tm.assert_series_equal(timedelta_series / np.nan, nat_series_dtype_timedelta) + + # ------------------------------------------------------------- + # Binary operations td64 arraylike and datetime-like + + @pytest.mark.parametrize("cls", [Timestamp, datetime, np.datetime64]) + def test_td64arr_add_sub_datetimelike_scalar( + self, cls, box_with_array, tz_naive_fixture + ): + # GH#11925, GH#29558, GH#23215 + tz = tz_naive_fixture + + dt_scalar = Timestamp("2012-01-01", tz=tz) + if cls is datetime: + ts = dt_scalar.to_pydatetime() + elif cls is np.datetime64: + if tz_naive_fixture is not None: + return + ts = dt_scalar.to_datetime64() + else: + ts = dt_scalar + + tdi = timedelta_range("1 day", periods=3) + expected = pd.date_range("2012-01-02", periods=3, tz=tz) + + tdarr = tm.box_expected(tdi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + tm.assert_equal(ts + tdarr, expected) + tm.assert_equal(tdarr + ts, expected) + + expected2 = pd.date_range("2011-12-31", periods=3, freq="-1D", tz=tz) + expected2 = tm.box_expected(expected2, box_with_array) + + tm.assert_equal(ts - tdarr, expected2) + tm.assert_equal(ts + (-tdarr), expected2) + + msg = "cannot subtract a datelike" + with pytest.raises(TypeError, match=msg): + tdarr - ts + + def test_td64arr_add_datetime64_nat(self, box_with_array): + # GH#23215 + other = np.datetime64("NaT") + + tdi = timedelta_range("1 day", periods=3) + expected = DatetimeIndex(["NaT", "NaT", "NaT"]) + + tdser = tm.box_expected(tdi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + tm.assert_equal(tdser + other, expected) + tm.assert_equal(other + tdser, expected) + + def test_td64arr_sub_dt64_array(self, box_with_array): + dti = pd.date_range("2016-01-01", periods=3) + tdi = TimedeltaIndex(["-1 Day"] * 3) + dtarr = dti.values + expected = DatetimeIndex(dtarr) - tdi + + tdi = tm.box_expected(tdi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + msg = "cannot subtract a datelike from" + with pytest.raises(TypeError, match=msg): + tdi - dtarr + + # TimedeltaIndex.__rsub__ + result = dtarr - tdi + tm.assert_equal(result, expected) + + def test_td64arr_add_dt64_array(self, box_with_array): + dti = pd.date_range("2016-01-01", periods=3) + tdi = TimedeltaIndex(["-1 Day"] * 3) + dtarr = dti.values + expected = DatetimeIndex(dtarr) + tdi + + tdi = tm.box_expected(tdi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = tdi + dtarr + tm.assert_equal(result, expected) + result = dtarr + tdi + tm.assert_equal(result, expected) + + # ------------------------------------------------------------------ + # Invalid __add__/__sub__ operations + + @pytest.mark.parametrize("pi_freq", ["D", "W", "Q", "H"]) + @pytest.mark.parametrize("tdi_freq", [None, "H"]) + def test_td64arr_sub_periodlike( + self, box_with_array, box_with_array2, tdi_freq, pi_freq + ): + # GH#20049 subtracting PeriodIndex should raise TypeError + tdi = TimedeltaIndex(["1 hours", "2 hours"], freq=tdi_freq) + dti = Timestamp("2018-03-07 17:16:40") + tdi + pi = dti.to_period(pi_freq) + per = pi[0] + + tdi = tm.box_expected(tdi, box_with_array) + pi = tm.box_expected(pi, box_with_array2) + msg = "cannot subtract|unsupported operand type" + with pytest.raises(TypeError, match=msg): + tdi - pi + + # GH#13078 subtraction of Period scalar not supported + with pytest.raises(TypeError, match=msg): + tdi - per + + @pytest.mark.parametrize( + "other", + [ + # GH#12624 for str case + "a", + # GH#19123 + 1, + 1.5, + np.array(2), + ], + ) + def test_td64arr_addsub_numeric_scalar_invalid(self, box_with_array, other): + # vector-like others are tested in test_td64arr_add_sub_numeric_arr_invalid + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + tdarr = tm.box_expected(tdser, box_with_array) + + assert_invalid_addsub_type(tdarr, other) + + @pytest.mark.parametrize( + "vec", + [ + np.array([1, 2, 3]), + pd.Index([1, 2, 3]), + Series([1, 2, 3]), + DataFrame([[1, 2, 3]]), + ], + ids=lambda x: type(x).__name__, + ) + def test_td64arr_addsub_numeric_arr_invalid( + self, box_with_array, vec, any_real_numpy_dtype + ): + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + tdarr = tm.box_expected(tdser, box_with_array) + + vector = vec.astype(any_real_numpy_dtype) + assert_invalid_addsub_type(tdarr, vector) + + def test_td64arr_add_sub_int(self, box_with_array, one): + # Variants of `one` for #19012, deprecated GH#22535 + rng = timedelta_range("1 days 09:00:00", freq="H", periods=10) + tdarr = tm.box_expected(rng, box_with_array) + + msg = "Addition/subtraction of integers" + assert_invalid_addsub_type(tdarr, one, msg) + + # TODO: get inplace ops into assert_invalid_addsub_type + with pytest.raises(TypeError, match=msg): + tdarr += one + with pytest.raises(TypeError, match=msg): + tdarr -= one + + def test_td64arr_add_sub_integer_array(self, box_with_array): + # GH#19959, deprecated GH#22535 + # GH#22696 for DataFrame case, check that we don't dispatch to numpy + # implementation, which treats int64 as m8[ns] + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + rng = timedelta_range("1 days 09:00:00", freq="H", periods=3) + tdarr = tm.box_expected(rng, box) + other = tm.box_expected([4, 3, 2], xbox) + + msg = "Addition/subtraction of integers and integer-arrays" + assert_invalid_addsub_type(tdarr, other, msg) + + def test_td64arr_addsub_integer_array_no_freq(self, box_with_array): + # GH#19959 + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + tdi = TimedeltaIndex(["1 Day", "NaT", "3 Hours"]) + tdarr = tm.box_expected(tdi, box) + other = tm.box_expected([14, -1, 16], xbox) + + msg = "Addition/subtraction of integers" + assert_invalid_addsub_type(tdarr, other, msg) + + # ------------------------------------------------------------------ + # Operations with timedelta-like others + + def test_td64arr_add_sub_td64_array(self, box_with_array): + box = box_with_array + dti = pd.date_range("2016-01-01", periods=3) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + expected = 2 * tdi + tdi = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + + result = tdi + tdarr + tm.assert_equal(result, expected) + result = tdarr + tdi + tm.assert_equal(result, expected) + + expected_sub = 0 * tdi + result = tdi - tdarr + tm.assert_equal(result, expected_sub) + result = tdarr - tdi + tm.assert_equal(result, expected_sub) + + def test_td64arr_add_sub_tdi(self, box_with_array, names): + # GH#17250 make sure result dtype is correct + # GH#19043 make sure names are propagated correctly + box = box_with_array + exname = get_expected_name(box, names) + + tdi = TimedeltaIndex(["0 days", "1 day"], name=names[1]) + tdi = np.array(tdi) if box in [tm.to_array, pd.array] else tdi + ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[0]) + expected = Series([Timedelta(hours=3), Timedelta(days=1, hours=4)], name=exname) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = tdi + ser + tm.assert_equal(result, expected) + assert_dtype(result, "timedelta64[ns]") + + result = ser + tdi + tm.assert_equal(result, expected) + assert_dtype(result, "timedelta64[ns]") + + expected = Series( + [Timedelta(hours=-3), Timedelta(days=1, hours=-4)], name=exname + ) + expected = tm.box_expected(expected, box) + + result = tdi - ser + tm.assert_equal(result, expected) + assert_dtype(result, "timedelta64[ns]") + + result = ser - tdi + tm.assert_equal(result, -expected) + assert_dtype(result, "timedelta64[ns]") + + @pytest.mark.parametrize("tdnat", [np.timedelta64("NaT"), NaT]) + def test_td64arr_add_sub_td64_nat(self, box_with_array, tdnat): + # GH#18808, GH#23320 special handling for timedelta64("NaT") + box = box_with_array + tdi = TimedeltaIndex([NaT, Timedelta("1s")]) + expected = TimedeltaIndex(["NaT"] * 2) + + obj = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + + result = obj + tdnat + tm.assert_equal(result, expected) + result = tdnat + obj + tm.assert_equal(result, expected) + result = obj - tdnat + tm.assert_equal(result, expected) + result = tdnat - obj + tm.assert_equal(result, expected) + + def test_td64arr_add_timedeltalike(self, two_hours, box_with_array): + # only test adding/sub offsets as + is now numeric + # GH#10699 for Tick cases + box = box_with_array + rng = timedelta_range("1 days", "10 days") + expected = timedelta_range("1 days 02:00:00", "10 days 02:00:00", freq="D") + rng = tm.box_expected(rng, box) + expected = tm.box_expected(expected, box) + + result = rng + two_hours + tm.assert_equal(result, expected) + + result = two_hours + rng + tm.assert_equal(result, expected) + + def test_td64arr_sub_timedeltalike(self, two_hours, box_with_array): + # only test adding/sub offsets as - is now numeric + # GH#10699 for Tick cases + box = box_with_array + rng = timedelta_range("1 days", "10 days") + expected = timedelta_range("0 days 22:00:00", "9 days 22:00:00") + + rng = tm.box_expected(rng, box) + expected = tm.box_expected(expected, box) + + result = rng - two_hours + tm.assert_equal(result, expected) + + result = two_hours - rng + tm.assert_equal(result, -expected) + + # ------------------------------------------------------------------ + # __add__/__sub__ with DateOffsets and arrays of DateOffsets + + def test_td64arr_add_sub_offset_index(self, names, box_with_array): + # GH#18849, GH#19744 + box = box_with_array + exname = get_expected_name(box, names) + + tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"], name=names[0]) + other = pd.Index([offsets.Hour(n=1), offsets.Minute(n=-2)], name=names[1]) + other = np.array(other) if box in [tm.to_array, pd.array] else other + + expected = TimedeltaIndex( + [tdi[n] + other[n] for n in range(len(tdi))], freq="infer", name=exname + ) + expected_sub = TimedeltaIndex( + [tdi[n] - other[n] for n in range(len(tdi))], freq="infer", name=exname + ) + + tdi = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + expected_sub = tm.box_expected(expected_sub, box) + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi + other + tm.assert_equal(res, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + tdi + tm.assert_equal(res2, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res_sub = tdi - other + tm.assert_equal(res_sub, expected_sub) + + def test_td64arr_add_sub_offset_array(self, box_with_array): + # GH#18849, GH#18824 + box = box_with_array + tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"]) + other = np.array([offsets.Hour(n=1), offsets.Minute(n=-2)]) + + expected = TimedeltaIndex( + [tdi[n] + other[n] for n in range(len(tdi))], freq="infer" + ) + expected_sub = TimedeltaIndex( + [tdi[n] - other[n] for n in range(len(tdi))], freq="infer" + ) + + tdi = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi + other + tm.assert_equal(res, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + tdi + tm.assert_equal(res2, expected) + + expected_sub = tm.box_expected(expected_sub, box_with_array) + with tm.assert_produces_warning(PerformanceWarning): + res_sub = tdi - other + tm.assert_equal(res_sub, expected_sub) + + def test_td64arr_with_offset_series(self, names, box_with_array): + # GH#18849 + box = box_with_array + box2 = Series if box in [pd.Index, tm.to_array, pd.array] else box + exname = get_expected_name(box, names) + + tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"], name=names[0]) + other = Series([offsets.Hour(n=1), offsets.Minute(n=-2)], name=names[1]) + + expected_add = Series([tdi[n] + other[n] for n in range(len(tdi))], name=exname) + obj = tm.box_expected(tdi, box) + expected_add = tm.box_expected(expected_add, box2) + + with tm.assert_produces_warning(PerformanceWarning): + res = obj + other + tm.assert_equal(res, expected_add) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + obj + tm.assert_equal(res2, expected_add) + + expected_sub = Series([tdi[n] - other[n] for n in range(len(tdi))], name=exname) + expected_sub = tm.box_expected(expected_sub, box2) + + with tm.assert_produces_warning(PerformanceWarning): + res3 = obj - other + tm.assert_equal(res3, expected_sub) + + @pytest.mark.parametrize("obox", [np.array, pd.Index, Series]) + def test_td64arr_addsub_anchored_offset_arraylike(self, obox, box_with_array): + # GH#18824 + tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"]) + tdi = tm.box_expected(tdi, box_with_array) + + anchored = obox([offsets.MonthEnd(), offsets.Day(n=2)]) + + # addition/subtraction ops with anchored offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + msg = "has incorrect type|cannot add the type MonthEnd" + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + tdi + anchored + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + anchored + tdi + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + tdi - anchored + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + anchored - tdi + + # ------------------------------------------------------------------ + # Unsorted + + def test_td64arr_add_sub_object_array(self, box_with_array): + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + tdi = timedelta_range("1 day", periods=3, freq="D") + tdarr = tm.box_expected(tdi, box) + + other = np.array([Timedelta(days=1), offsets.Day(2), Timestamp("2000-01-04")]) + + with tm.assert_produces_warning(PerformanceWarning): + result = tdarr + other + + expected = pd.Index( + [Timedelta(days=2), Timedelta(days=4), Timestamp("2000-01-07")] + ) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + msg = "unsupported operand type|cannot subtract a datelike" + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + tdarr - other + + with tm.assert_produces_warning(PerformanceWarning): + result = other - tdarr + + expected = pd.Index([Timedelta(0), Timedelta(0), Timestamp("2000-01-01")]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + +class TestTimedeltaArraylikeMulDivOps: + # Tests for timedelta64[ns] + # __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__ + + # ------------------------------------------------------------------ + # Multiplication + # organized with scalar others first, then array-like + + def test_td64arr_mul_int(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + + result = idx * 1 + tm.assert_equal(result, idx) + + result = 1 * idx + tm.assert_equal(result, idx) + + def test_td64arr_mul_tdlike_scalar_raises(self, two_hours, box_with_array): + rng = timedelta_range("1 days", "10 days", name="foo") + rng = tm.box_expected(rng, box_with_array) + msg = "argument must be an integer|cannot use operands with types dtype" + with pytest.raises(TypeError, match=msg): + rng * two_hours + + def test_tdi_mul_int_array_zerodim(self, box_with_array): + rng5 = np.arange(5, dtype="int64") + idx = TimedeltaIndex(rng5) + expected = TimedeltaIndex(rng5 * 5) + + idx = tm.box_expected(idx, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = idx * np.array(5, dtype="int64") + tm.assert_equal(result, expected) + + def test_tdi_mul_int_array(self, box_with_array): + rng5 = np.arange(5, dtype="int64") + idx = TimedeltaIndex(rng5) + expected = TimedeltaIndex(rng5 ** 2) + + idx = tm.box_expected(idx, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = idx * rng5 + tm.assert_equal(result, expected) + + def test_tdi_mul_int_series(self, box_with_array): + box = box_with_array + xbox = Series if box in [pd.Index, tm.to_array, pd.array] else box + + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + expected = TimedeltaIndex(np.arange(5, dtype="int64") ** 2) + + idx = tm.box_expected(idx, box) + expected = tm.box_expected(expected, xbox) + + result = idx * Series(np.arange(5, dtype="int64")) + tm.assert_equal(result, expected) + + def test_tdi_mul_float_series(self, box_with_array): + box = box_with_array + xbox = Series if box in [pd.Index, tm.to_array, pd.array] else box + + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box) + + rng5f = np.arange(5, dtype="float64") + expected = TimedeltaIndex(rng5f * (rng5f + 1.0)) + expected = tm.box_expected(expected, xbox) + + result = idx * Series(rng5f + 1.0) + tm.assert_equal(result, expected) + + # TODO: Put Series/DataFrame in others? + @pytest.mark.parametrize( + "other", + [ + np.arange(1, 11), + Int64Index(range(1, 11)), + UInt64Index(range(1, 11)), + Float64Index(range(1, 11)), + pd.RangeIndex(1, 11), + ], + ids=lambda x: type(x).__name__, + ) + def test_tdi_rmul_arraylike(self, other, box_with_array): + box = box_with_array + + tdi = TimedeltaIndex(["1 Day"] * 10) + expected = timedelta_range("1 days", "10 days")._with_freq(None) + + tdi = tm.box_expected(tdi, box) + xbox = get_upcast_box(tdi, other) + + expected = tm.box_expected(expected, xbox) + + result = other * tdi + tm.assert_equal(result, expected) + commute = tdi * other + tm.assert_equal(commute, expected) + + # ------------------------------------------------------------------ + # __div__, __rdiv__ + + def test_td64arr_div_nat_invalid(self, box_with_array): + # don't allow division by NaT (maybe could in the future) + rng = timedelta_range("1 days", "10 days", name="foo") + rng = tm.box_expected(rng, box_with_array) + + with pytest.raises(TypeError, match="unsupported operand type"): + rng / NaT + with pytest.raises(TypeError, match="Cannot divide NaTType by"): + NaT / rng + + dt64nat = np.datetime64("NaT", "ns") + msg = "|".join( + [ + # 'divide' on npdev as of 2021-12-18 + "ufunc '(true_divide|divide)' cannot use operands", + "cannot perform __r?truediv__", + "Cannot divide datetime64 by TimedeltaArray", + ] + ) + with pytest.raises(TypeError, match=msg): + rng / dt64nat + with pytest.raises(TypeError, match=msg): + dt64nat / rng + + def test_td64arr_div_td64nat(self, box_with_array): + # GH#23829 + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + rng = timedelta_range("1 days", "10 days") + rng = tm.box_expected(rng, box) + + other = np.timedelta64("NaT") + + expected = np.array([np.nan] * 10) + expected = tm.box_expected(expected, xbox) + + result = rng / other + tm.assert_equal(result, expected) + + result = other / rng + tm.assert_equal(result, expected) + + def test_td64arr_div_int(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + + result = idx / 1 + tm.assert_equal(result, idx) + + with pytest.raises(TypeError, match="Cannot divide"): + # GH#23829 + 1 / idx + + def test_td64arr_div_tdlike_scalar(self, two_hours, box_with_array): + # GH#20088, GH#22163 ensure DataFrame returns correct dtype + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + rng = timedelta_range("1 days", "10 days", name="foo") + expected = Float64Index((np.arange(10) + 1) * 12, name="foo") + + rng = tm.box_expected(rng, box) + expected = tm.box_expected(expected, xbox) + + result = rng / two_hours + tm.assert_equal(result, expected) + + result = two_hours / rng + expected = 1 / expected + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("m", [1, 3, 10]) + @pytest.mark.parametrize("unit", ["D", "h", "m", "s", "ms", "us", "ns"]) + def test_td64arr_div_td64_scalar(self, m, unit, box_with_array): + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + ser = Series([Timedelta(days=59)] * 3) + ser[2] = np.nan + flat = ser + ser = tm.box_expected(ser, box) + + # op + expected = Series([x / np.timedelta64(m, unit) for x in flat]) + expected = tm.box_expected(expected, xbox) + result = ser / np.timedelta64(m, unit) + tm.assert_equal(result, expected) + + # reverse op + expected = Series([Timedelta(np.timedelta64(m, unit)) / x for x in flat]) + expected = tm.box_expected(expected, xbox) + result = np.timedelta64(m, unit) / ser + tm.assert_equal(result, expected) + + def test_td64arr_div_tdlike_scalar_with_nat(self, two_hours, box_with_array): + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + rng = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo") + expected = Float64Index([12, np.nan, 24], name="foo") + + rng = tm.box_expected(rng, box) + expected = tm.box_expected(expected, xbox) + + result = rng / two_hours + tm.assert_equal(result, expected) + + result = two_hours / rng + expected = 1 / expected + tm.assert_equal(result, expected) + + def test_td64arr_div_td64_ndarray(self, box_with_array): + # GH#22631 + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + rng = TimedeltaIndex(["1 days", NaT, "2 days"]) + expected = Float64Index([12, np.nan, 24]) + + rng = tm.box_expected(rng, box) + expected = tm.box_expected(expected, xbox) + + other = np.array([2, 4, 2], dtype="m8[h]") + result = rng / other + tm.assert_equal(result, expected) + + result = rng / tm.box_expected(other, box) + tm.assert_equal(result, expected) + + result = rng / other.astype(object) + tm.assert_equal(result, expected) + + result = rng / list(other) + tm.assert_equal(result, expected) + + # reversed op + expected = 1 / expected + result = other / rng + tm.assert_equal(result, expected) + + result = tm.box_expected(other, box) / rng + tm.assert_equal(result, expected) + + result = other.astype(object) / rng + tm.assert_equal(result, expected) + + result = list(other) / rng + tm.assert_equal(result, expected) + + def test_tdarr_div_length_mismatch(self, box_with_array): + rng = TimedeltaIndex(["1 days", NaT, "2 days"]) + mismatched = [1, 2, 3, 4] + + rng = tm.box_expected(rng, box_with_array) + msg = "Cannot divide vectors|Unable to coerce to Series" + for obj in [mismatched, mismatched[:2]]: + # one shorter, one longer + for other in [obj, np.array(obj), pd.Index(obj)]: + with pytest.raises(ValueError, match=msg): + rng / other + with pytest.raises(ValueError, match=msg): + other / rng + + # ------------------------------------------------------------------ + # __floordiv__, __rfloordiv__ + + def test_td64arr_floordiv_td64arr_with_nat( + self, box_with_array, using_array_manager + ): + # GH#35529 + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + left = Series([1000, 222330, 30], dtype="timedelta64[ns]") + right = Series([1000, 222330, None], dtype="timedelta64[ns]") + + left = tm.box_expected(left, box) + right = tm.box_expected(right, box) + + expected = np.array([1.0, 1.0, np.nan], dtype=np.float64) + expected = tm.box_expected(expected, xbox) + if box is DataFrame and using_array_manager: + # INFO(ArrayManager) floorfiv returns integer, and ArrayManager + # performs ops column-wise and thus preserves int64 dtype for + # columns without missing values + expected[[0, 1]] = expected[[0, 1]].astype("int64") + + result = left // right + + tm.assert_equal(result, expected) + + # case that goes through __rfloordiv__ with arraylike + result = np.asarray(left) // right + tm.assert_equal(result, expected) + + def test_td64arr_floordiv_tdscalar(self, box_with_array, scalar_td): + # GH#18831, GH#19125 + box = box_with_array + xbox = np.ndarray if box is pd.array else box + td = Timedelta("5m3s") # i.e. (scalar_td - 1sec) / 2 + + td1 = Series([td, td, NaT], dtype="m8[ns]") + td1 = tm.box_expected(td1, box, transpose=False) + + expected = Series([0, 0, np.nan]) + expected = tm.box_expected(expected, xbox, transpose=False) + + result = td1 // scalar_td + tm.assert_equal(result, expected) + + # Reversed op + expected = Series([2, 2, np.nan]) + expected = tm.box_expected(expected, xbox, transpose=False) + + result = scalar_td // td1 + tm.assert_equal(result, expected) + + # same thing buts let's be explicit about calling __rfloordiv__ + result = td1.__rfloordiv__(scalar_td) + tm.assert_equal(result, expected) + + def test_td64arr_floordiv_int(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + result = idx // 1 + tm.assert_equal(result, idx) + + pattern = "floor_divide cannot use operands|Cannot divide int by Timedelta*" + with pytest.raises(TypeError, match=pattern): + 1 // idx + + # ------------------------------------------------------------------ + # mod, divmod + # TODO: operations with timedelta-like arrays, numeric arrays, + # reversed ops + + def test_td64arr_mod_tdscalar(self, box_with_array, three_days): + tdi = timedelta_range("1 Day", "9 days") + tdarr = tm.box_expected(tdi, box_with_array) + + expected = TimedeltaIndex(["1 Day", "2 Days", "0 Days"] * 3) + expected = tm.box_expected(expected, box_with_array) + + result = tdarr % three_days + tm.assert_equal(result, expected) + + warn = None + if box_with_array is DataFrame and isinstance(three_days, pd.DateOffset): + warn = PerformanceWarning + + with tm.assert_produces_warning(warn): + result = divmod(tdarr, three_days) + + tm.assert_equal(result[1], expected) + tm.assert_equal(result[0], tdarr // three_days) + + def test_td64arr_mod_int(self, box_with_array): + tdi = timedelta_range("1 ns", "10 ns", periods=10) + tdarr = tm.box_expected(tdi, box_with_array) + + expected = TimedeltaIndex(["1 ns", "0 ns"] * 5) + expected = tm.box_expected(expected, box_with_array) + + result = tdarr % 2 + tm.assert_equal(result, expected) + + msg = "Cannot divide int by" + with pytest.raises(TypeError, match=msg): + 2 % tdarr + + result = divmod(tdarr, 2) + tm.assert_equal(result[1], expected) + tm.assert_equal(result[0], tdarr // 2) + + def test_td64arr_rmod_tdscalar(self, box_with_array, three_days): + tdi = timedelta_range("1 Day", "9 days") + tdarr = tm.box_expected(tdi, box_with_array) + + expected = ["0 Days", "1 Day", "0 Days"] + ["3 Days"] * 6 + expected = TimedeltaIndex(expected) + expected = tm.box_expected(expected, box_with_array) + + result = three_days % tdarr + tm.assert_equal(result, expected) + + result = divmod(three_days, tdarr) + tm.assert_equal(result[1], expected) + tm.assert_equal(result[0], three_days // tdarr) + + # ------------------------------------------------------------------ + # Operations with invalid others + + def test_td64arr_mul_tdscalar_invalid(self, box_with_array, scalar_td): + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + td1 = tm.box_expected(td1, box_with_array) + + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + pattern = "operate|unsupported|cannot|not supported" + with pytest.raises(TypeError, match=pattern): + td1 * scalar_td + with pytest.raises(TypeError, match=pattern): + scalar_td * td1 + + def test_td64arr_mul_too_short_raises(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + msg = "|".join( + [ + "cannot use operands with types dtype", + "Cannot multiply with unequal lengths", + "Unable to coerce to Series", + ] + ) + with pytest.raises(TypeError, match=msg): + # length check before dtype check + idx * idx[:3] + with pytest.raises(ValueError, match=msg): + idx * np.array([1, 2]) + + def test_td64arr_mul_td64arr_raises(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + msg = "cannot use operands with types dtype" + with pytest.raises(TypeError, match=msg): + idx * idx + + # ------------------------------------------------------------------ + # Operations with numeric others + + def test_td64arr_mul_numeric_scalar(self, box_with_array, one): + # GH#4521 + # divide/multiply by integers + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + expected = Series(["-59 Days", "-59 Days", "NaT"], dtype="timedelta64[ns]") + + tdser = tm.box_expected(tdser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = tdser * (-one) + tm.assert_equal(result, expected) + result = (-one) * tdser + tm.assert_equal(result, expected) + + expected = Series(["118 Days", "118 Days", "NaT"], dtype="timedelta64[ns]") + expected = tm.box_expected(expected, box_with_array) + + result = tdser * (2 * one) + tm.assert_equal(result, expected) + result = (2 * one) * tdser + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("two", [2, 2.0, np.array(2), np.array(2.0)]) + def test_td64arr_div_numeric_scalar(self, box_with_array, two): + # GH#4521 + # divide/multiply by integers + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + expected = Series(["29.5D", "29.5D", "NaT"], dtype="timedelta64[ns]") + + tdser = tm.box_expected(tdser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = tdser / two + tm.assert_equal(result, expected) + + with pytest.raises(TypeError, match="Cannot divide"): + two / tdser + + @pytest.mark.parametrize("two", [2, 2.0, np.array(2), np.array(2.0)]) + def test_td64arr_floordiv_numeric_scalar(self, box_with_array, two): + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + expected = Series(["29.5D", "29.5D", "NaT"], dtype="timedelta64[ns]") + + tdser = tm.box_expected(tdser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = tdser // two + tm.assert_equal(result, expected) + + with pytest.raises(TypeError, match="Cannot divide"): + two // tdser + + @pytest.mark.parametrize( + "vector", + [np.array([20, 30, 40]), pd.Index([20, 30, 40]), Series([20, 30, 40])], + ids=lambda x: type(x).__name__, + ) + def test_td64arr_rmul_numeric_array( + self, + box_with_array, + vector, + any_real_numpy_dtype, + ): + # GH#4521 + # divide/multiply by integers + + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + vector = vector.astype(any_real_numpy_dtype) + + expected = Series(["1180 Days", "1770 Days", "NaT"], dtype="timedelta64[ns]") + + tdser = tm.box_expected(tdser, box_with_array) + xbox = get_upcast_box(tdser, vector) + + expected = tm.box_expected(expected, xbox) + + result = tdser * vector + tm.assert_equal(result, expected) + + result = vector * tdser + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "vector", + [np.array([20, 30, 40]), pd.Index([20, 30, 40]), Series([20, 30, 40])], + ids=lambda x: type(x).__name__, + ) + def test_td64arr_div_numeric_array( + self, box_with_array, vector, any_real_numpy_dtype + ): + # GH#4521 + # divide/multiply by integers + + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + vector = vector.astype(any_real_numpy_dtype) + + expected = Series(["2.95D", "1D 23H 12m", "NaT"], dtype="timedelta64[ns]") + + tdser = tm.box_expected(tdser, box_with_array) + xbox = get_upcast_box(tdser, vector) + expected = tm.box_expected(expected, xbox) + + result = tdser / vector + tm.assert_equal(result, expected) + + pattern = "|".join( + [ + "true_divide'? cannot use operands", + "cannot perform __div__", + "cannot perform __truediv__", + "unsupported operand", + "Cannot divide", + ] + ) + with pytest.raises(TypeError, match=pattern): + vector / tdser + + result = tdser / vector.astype(object) + if box_with_array is DataFrame: + expected = [tdser.iloc[0, n] / vector[n] for n in range(len(vector))] + else: + expected = [tdser[n] / vector[n] for n in range(len(tdser))] + expected = pd.Index(expected) # do dtype inference + expected = tm.box_expected(expected, xbox) + assert tm.get_dtype(expected) == "m8[ns]" + + tm.assert_equal(result, expected) + + with pytest.raises(TypeError, match=pattern): + vector.astype(object) / tdser + + def test_td64arr_mul_int_series(self, box_with_array, names): + # GH#19042 test for correct name attachment + box = box_with_array + exname = get_expected_name(box, names) + + tdi = TimedeltaIndex( + ["0days", "1day", "2days", "3days", "4days"], name=names[0] + ) + # TODO: Should we be parametrizing over types for `ser` too? + ser = Series([0, 1, 2, 3, 4], dtype=np.int64, name=names[1]) + + expected = Series( + ["0days", "1day", "4days", "9days", "16days"], + dtype="timedelta64[ns]", + name=exname, + ) + + tdi = tm.box_expected(tdi, box) + xbox = get_upcast_box(tdi, ser) + + expected = tm.box_expected(expected, xbox) + + result = ser * tdi + tm.assert_equal(result, expected) + + result = tdi * ser + tm.assert_equal(result, expected) + + # TODO: Should we be parametrizing over types for `ser` too? + def test_float_series_rdiv_td64arr(self, box_with_array, names): + # GH#19042 test for correct name attachment + box = box_with_array + tdi = TimedeltaIndex( + ["0days", "1day", "2days", "3days", "4days"], name=names[0] + ) + ser = Series([1.5, 3, 4.5, 6, 7.5], dtype=np.float64, name=names[1]) + + xname = names[2] if box not in [tm.to_array, pd.array] else names[1] + expected = Series( + [tdi[n] / ser[n] for n in range(len(ser))], + dtype="timedelta64[ns]", + name=xname, + ) + + tdi = tm.box_expected(tdi, box) + xbox = get_upcast_box(tdi, ser) + expected = tm.box_expected(expected, xbox) + + result = ser.__rtruediv__(tdi) + if box is DataFrame: + assert result is NotImplemented + else: + tm.assert_equal(result, expected) + + def test_td64arr_all_nat_div_object_dtype_numeric(self, box_with_array): + # GH#39750 make sure we infer the result as td64 + tdi = TimedeltaIndex([NaT, NaT]) + + left = tm.box_expected(tdi, box_with_array) + right = np.array([2, 2.0], dtype=object) + + result = left / right + tm.assert_equal(result, left) + + result = left // right + tm.assert_equal(result, left) + + +class TestTimedelta64ArrayLikeArithmetic: + # Arithmetic tests for timedelta64[ns] vectors fully parametrized over + # DataFrame/Series/TimedeltaIndex/TimedeltaArray. Ideally all arithmetic + # tests will eventually end up here. + + def test_td64arr_pow_invalid(self, scalar_td, box_with_array): + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + td1 = tm.box_expected(td1, box_with_array) + + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + pattern = "operate|unsupported|cannot|not supported" + with pytest.raises(TypeError, match=pattern): + scalar_td ** td1 + + with pytest.raises(TypeError, match=pattern): + td1 ** scalar_td + + +def test_add_timestamp_to_timedelta(): + # GH: 35897 + timestamp = Timestamp("2021-01-01") + result = timestamp + timedelta_range("0s", "1s", periods=31) + expected = DatetimeIndex( + [ + timestamp + + ( + pd.to_timedelta("0.033333333s") * i + + pd.to_timedelta("0.000000001s") * divmod(i, 3)[0] + ) + for i in range(31) + ] + ) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..229f87d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/masked_shared.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/masked_shared.cpython-38.pyc new file mode 100644 index 0000000..00f7c6b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/masked_shared.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_array.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_array.cpython-38.pyc new file mode 100644 index 0000000..481be93 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_array.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_datetimelike.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_datetimelike.cpython-38.pyc new file mode 100644 index 0000000..e19499a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_datetimelike.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_datetimes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_datetimes.cpython-38.pyc new file mode 100644 index 0000000..1105c78 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_datetimes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_ndarray_backed.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_ndarray_backed.cpython-38.pyc new file mode 100644 index 0000000..7334d68 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_ndarray_backed.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_numpy.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_numpy.cpython-38.pyc new file mode 100644 index 0000000..3dcb100 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_numpy.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_period.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_period.cpython-38.pyc new file mode 100644 index 0000000..a052076 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_period.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_timedeltas.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_timedeltas.cpython-38.pyc new file mode 100644 index 0000000..03c45ec Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/__pycache__/test_timedeltas.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..7636ada Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_arithmetic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_arithmetic.cpython-38.pyc new file mode 100644 index 0000000..da81ef7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_arithmetic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..8031a4c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_comparison.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_comparison.cpython-38.pyc new file mode 100644 index 0000000..ac0957c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_comparison.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_construction.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_construction.cpython-38.pyc new file mode 100644 index 0000000..394af2d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_construction.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_function.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_function.cpython-38.pyc new file mode 100644 index 0000000..3aa6cdf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_function.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..6e0529f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_logical.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_logical.cpython-38.pyc new file mode 100644 index 0000000..3016146 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_logical.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_ops.cpython-38.pyc new file mode 100644 index 0000000..1583742 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_reduction.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_reduction.cpython-38.pyc new file mode 100644 index 0000000..042e500 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_reduction.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_repr.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_repr.cpython-38.pyc new file mode 100644 index 0000000..fdce508 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/__pycache__/test_repr.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_arithmetic.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_arithmetic.py new file mode 100644 index 0000000..f8f1af4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_arithmetic.py @@ -0,0 +1,121 @@ +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import FloatingArray + + +@pytest.fixture +def data(): + return pd.array( + [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False], + dtype="boolean", + ) + + +@pytest.fixture +def left_array(): + return pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + + +@pytest.fixture +def right_array(): + return pd.array([True, False, None] * 3, dtype="boolean") + + +# Basic test for the arithmetic array ops +# ----------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "opname, exp", + [ + ("add", [True, True, None, True, False, None, None, None, None]), + ("mul", [True, False, None, False, False, None, None, None, None]), + ], + ids=["add", "mul"], +) +def test_add_mul(left_array, right_array, opname, exp): + op = getattr(operator, opname) + result = op(left_array, right_array) + expected = pd.array(exp, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_sub(left_array, right_array): + msg = ( + r"numpy boolean subtract, the `-` operator, is (?:deprecated|not supported), " + r"use the bitwise_xor, the `\^` operator, or the logical_xor function instead\." + ) + with pytest.raises(TypeError, match=msg): + left_array - right_array + + +def test_div(left_array, right_array): + result = left_array / right_array + expected = FloatingArray( + np.array( + [1.0, np.inf, np.nan, 0.0, np.nan, np.nan, np.nan, np.nan, np.nan], + dtype="float64", + ), + np.array([False, False, True, False, False, True, True, True, True]), + ) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "opname", + [ + "floordiv", + "mod", + pytest.param( + "pow", marks=pytest.mark.xfail(reason="TODO follow int8 behaviour? GH34686") + ), + ], +) +def test_op_int8(left_array, right_array, opname): + op = getattr(operator, opname) + result = op(left_array, right_array) + expected = op(left_array.astype("Int8"), right_array.astype("Int8")) + tm.assert_extension_array_equal(result, expected) + + +# Test generic characteristics / errors +# ----------------------------------------------------------------------------- + + +def test_error_invalid_values(data, all_arithmetic_operators): + # invalid ops + + op = all_arithmetic_operators + s = pd.Series(data) + ops = getattr(s, op) + + # invalid scalars + msg = ( + "did not contain a loop with signature matching types|" + "BooleanArray cannot perform the operation|" + "not supported for the input types, and the inputs could not be safely coerced " + "to any supported types according to the casting rule ''safe''" + ) + with pytest.raises(TypeError, match=msg): + ops("foo") + msg = ( + r"unsupported operand type\(s\) for|" + "Concatenation operation is not implemented for NumPy arrays" + ) + with pytest.raises(TypeError, match=msg): + ops(pd.Timestamp("20180101")) + + # invalid array-likes + if op not in ("__mul__", "__rmul__"): + # TODO(extension) numpy's mul with object array sees booleans as numbers + msg = ( + r"unsupported operand type\(s\) for|can only concatenate str|" + "not all arguments converted during string formatting" + ) + with pytest.raises(TypeError, match=msg): + ops(pd.Series("foo", index=s.index)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_astype.py new file mode 100644 index 0000000..57cec70 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_astype.py @@ -0,0 +1,53 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_astype(): + # with missing values + arr = pd.array([True, False, None], dtype="boolean") + + with pytest.raises(ValueError, match="cannot convert NA to integer"): + arr.astype("int64") + + with pytest.raises(ValueError, match="cannot convert float NaN to"): + arr.astype("bool") + + result = arr.astype("float64") + expected = np.array([1, 0, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + result = arr.astype("str") + expected = np.array(["True", "False", ""], dtype=" object dtype + arr = pd.array([True, False, None], dtype="boolean") + result = np.array(arr) + expected = np.array([True, False, pd.NA], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + # also with no missing values -> object dtype + arr = pd.array([True, False, True], dtype="boolean") + result = np.array(arr) + expected = np.array([True, False, True], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + # force bool dtype + result = np.array(arr, dtype="bool") + expected = np.array([True, False, True], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + # with missing values will raise error + arr = pd.array([True, False, None], dtype="boolean") + msg = ( + "cannot convert to 'bool'-dtype NumPy array with missing values. " + "Specify an appropriate 'na_value' for this dtype." + ) + with pytest.raises(ValueError, match=msg): + np.array(arr, dtype="bool") + + +def test_to_boolean_array_from_strings(): + result = BooleanArray._from_sequence_of_strings( + np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object) + ) + expected = BooleanArray( + np.array([True, False, True, True, False, False, False]), + np.array([False, False, False, False, False, False, True]), + ) + + tm.assert_extension_array_equal(result, expected) + + +def test_to_boolean_array_from_strings_invalid_string(): + with pytest.raises(ValueError, match="cannot be cast"): + BooleanArray._from_sequence_of_strings(["donkey"]) + + +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy(box): + con = pd.Series if box else pd.array + # default (with or without missing values) -> object dtype + arr = con([True, False, True], dtype="boolean") + result = arr.to_numpy() + expected = np.array([True, False, True], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + arr = con([True, False, None], dtype="boolean") + result = arr.to_numpy() + expected = np.array([True, False, pd.NA], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + arr = con([True, False, None], dtype="boolean") + result = arr.to_numpy(dtype="str") + expected = np.array([True, False, pd.NA], dtype=" can convert to bool, otherwise raises + arr = con([True, False, True], dtype="boolean") + result = arr.to_numpy(dtype="bool") + expected = np.array([True, False, True], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + arr = con([True, False, None], dtype="boolean") + with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"): + result = arr.to_numpy(dtype="bool") + + # specify dtype and na_value + arr = con([True, False, None], dtype="boolean") + result = arr.to_numpy(dtype=object, na_value=None) + expected = np.array([True, False, None], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype=bool, na_value=False) + expected = np.array([True, False, False], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype="int64", na_value=-99) + expected = np.array([1, 0, -99], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype="float64", na_value=np.nan) + expected = np.array([1, 0, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + # converting to int or float without specifying na_value raises + with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"): + arr.to_numpy(dtype="int64") + with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"): + arr.to_numpy(dtype="float64") + + +def test_to_numpy_copy(): + # to_numpy can be zero-copy if no missing values + arr = pd.array([True, False, True], dtype="boolean") + result = arr.to_numpy(dtype=bool) + result[0] = False + tm.assert_extension_array_equal( + arr, pd.array([False, False, True], dtype="boolean") + ) + + arr = pd.array([True, False, True], dtype="boolean") + result = arr.to_numpy(dtype=bool, copy=True) + result[0] = False + tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean")) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_function.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_function.py new file mode 100644 index 0000000..8e9112b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_function.py @@ -0,0 +1,126 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "ufunc", [np.add, np.logical_or, np.logical_and, np.logical_xor] +) +def test_ufuncs_binary(ufunc): + # two BooleanArrays + a = pd.array([True, False, None], dtype="boolean") + result = ufunc(a, a) + expected = pd.array(ufunc(a._data, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = ufunc(s, a) + expected = pd.Series(ufunc(a._data, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_series_equal(result, expected) + + # Boolean with numpy array + arr = np.array([True, True, False]) + result = ufunc(a, arr) + expected = pd.array(ufunc(a._data, arr), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + result = ufunc(arr, a) + expected = pd.array(ufunc(arr, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + # BooleanArray with scalar + result = ufunc(a, True) + expected = pd.array(ufunc(a._data, True), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + result = ufunc(True, a) + expected = pd.array(ufunc(True, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + # not handled types + msg = r"operand type\(s\) all returned NotImplemented from __array_ufunc__" + with pytest.raises(TypeError, match=msg): + ufunc(a, "test") + + +@pytest.mark.parametrize("ufunc", [np.logical_not]) +def test_ufuncs_unary(ufunc): + a = pd.array([True, False, None], dtype="boolean") + result = ufunc(a) + expected = pd.array(ufunc(a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + ser = pd.Series(a) + result = ufunc(ser) + expected = pd.Series(ufunc(a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_series_equal(result, expected) + + +def test_ufunc_numeric(): + # np.sqrt on np.bool returns float16, which we upcast to Float32 + # bc we do not have Float16 + arr = pd.array([True, False, None], dtype="boolean") + + res = np.sqrt(arr) + + expected = pd.array([1, 0, None], dtype="Float32") + tm.assert_extension_array_equal(res, expected) + + +@pytest.mark.parametrize("values", [[True, False], [True, None]]) +def test_ufunc_reduce_raises(values): + arr = pd.array(values, dtype="boolean") + + res = np.add.reduce(arr) + if arr[-1] is pd.NA: + expected = pd.NA + else: + expected = arr._data.sum() + tm.assert_almost_equal(res, expected) + + +def test_value_counts_na(): + arr = pd.array([True, False, pd.NA], dtype="boolean") + result = arr.value_counts(dropna=False) + expected = pd.Series([1, 1, 1], index=arr, dtype="Int64") + assert expected.index.dtype == arr.dtype + tm.assert_series_equal(result, expected) + + result = arr.value_counts(dropna=True) + expected = pd.Series([1, 1], index=arr[:-1], dtype="Int64") + assert expected.index.dtype == arr.dtype + tm.assert_series_equal(result, expected) + + +def test_value_counts_with_normalize(): + ser = pd.Series([True, False, pd.NA], dtype="boolean") + result = ser.value_counts(normalize=True) + expected = pd.Series([1, 1], index=ser[:-1], dtype="Float64") / 2 + assert expected.index.dtype == "boolean" + tm.assert_series_equal(result, expected) + + +def test_diff(): + a = pd.array( + [True, True, False, False, True, None, True, None, False], dtype="boolean" + ) + result = pd.core.algorithms.diff(a, 1) + expected = pd.array( + [None, False, True, False, True, None, None, None, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + ser = pd.Series(a) + result = ser.diff() + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_indexing.py new file mode 100644 index 0000000..6a7daea --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_indexing.py @@ -0,0 +1,13 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize("na", [None, np.nan, pd.NA]) +def test_setitem_missing_values(na): + arr = pd.array([True, False, None], dtype="boolean") + expected = pd.array([True, None, None], dtype="boolean") + arr[1] = na + tm.assert_extension_array_equal(arr, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_logical.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_logical.py new file mode 100644 index 0000000..b4cca63 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_logical.py @@ -0,0 +1,254 @@ +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import BooleanArray +from pandas.core.ops.mask_ops import ( + kleene_and, + kleene_or, + kleene_xor, +) +from pandas.tests.extension.base import BaseOpsUtil + + +class TestLogicalOps(BaseOpsUtil): + def test_numpy_scalars_ok(self, all_logical_operators): + a = pd.array([True, False, None], dtype="boolean") + op = getattr(a, all_logical_operators) + + tm.assert_extension_array_equal(op(True), op(np.bool_(True))) + tm.assert_extension_array_equal(op(False), op(np.bool_(False))) + + def get_op_from_name(self, op_name): + short_opname = op_name.strip("_") + short_opname = short_opname if "xor" in short_opname else short_opname + "_" + try: + op = getattr(operator, short_opname) + except AttributeError: + # Assume it is the reverse operator + rop = getattr(operator, short_opname[1:]) + op = lambda x, y: rop(y, x) + + return op + + def test_empty_ok(self, all_logical_operators): + a = pd.array([], dtype="boolean") + op_name = all_logical_operators + result = getattr(a, op_name)(True) + tm.assert_extension_array_equal(a, result) + + result = getattr(a, op_name)(False) + tm.assert_extension_array_equal(a, result) + + result = getattr(a, op_name)(pd.NA) + tm.assert_extension_array_equal(a, result) + + @pytest.mark.parametrize( + "other", ["a", pd.Timestamp(2017, 1, 1, 12), np.timedelta64(4)] + ) + def test_eq_mismatched_type(self, other): + # GH-44499 + arr = pd.array([True, False]) + result = arr == other + expected = pd.array([False, False]) + tm.assert_extension_array_equal(result, expected) + + result = arr != other + expected = pd.array([True, True]) + tm.assert_extension_array_equal(result, expected) + + def test_logical_length_mismatch_raises(self, all_logical_operators): + op_name = all_logical_operators + a = pd.array([True, False, None], dtype="boolean") + msg = "Lengths must match to compare" + + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)([True, False]) + + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)(np.array([True, False])) + + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)(pd.array([True, False], dtype="boolean")) + + def test_logical_nan_raises(self, all_logical_operators): + op_name = all_logical_operators + a = pd.array([True, False, None], dtype="boolean") + msg = "Got float instead" + + with pytest.raises(TypeError, match=msg): + getattr(a, op_name)(np.nan) + + @pytest.mark.parametrize("other", ["a", 1]) + def test_non_bool_or_na_other_raises(self, other, all_logical_operators): + a = pd.array([True, False], dtype="boolean") + with pytest.raises(TypeError, match=str(type(other).__name__)): + getattr(a, all_logical_operators)(other) + + def test_kleene_or(self): + # A clear test of behavior. + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a | b + expected = pd.array( + [True, True, True, True, False, None, True, None, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b | a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (pd.NA, [True, None, None]), + (True, [True, True, True]), + (np.bool_(True), [True, True, True]), + (False, [True, False, None]), + (np.bool_(False), [True, False, None]), + ], + ) + def test_kleene_or_scalar(self, other, expected): + # TODO: test True & False + a = pd.array([True, False, None], dtype="boolean") + result = a | other + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = other | a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + + def test_kleene_and(self): + # A clear test of behavior. + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a & b + expected = pd.array( + [True, False, None, False, False, False, None, False, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b & a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (pd.NA, [None, False, None]), + (True, [True, False, None]), + (False, [False, False, False]), + (np.bool_(True), [True, False, None]), + (np.bool_(False), [False, False, False]), + ], + ) + def test_kleene_and_scalar(self, other, expected): + a = pd.array([True, False, None], dtype="boolean") + result = a & other + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = other & a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + + def test_kleene_xor(self): + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a ^ b + expected = pd.array( + [False, True, None, True, False, None, None, None, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b ^ a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (pd.NA, [None, None, None]), + (True, [False, True, None]), + (np.bool_(True), [False, True, None]), + (np.bool_(False), [True, False, None]), + ], + ) + def test_kleene_xor_scalar(self, other, expected): + a = pd.array([True, False, None], dtype="boolean") + result = a ^ other + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = other ^ a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + + @pytest.mark.parametrize("other", [True, False, pd.NA, [True, False, None] * 3]) + def test_no_masked_assumptions(self, other, all_logical_operators): + # The logical operations should not assume that masked values are False! + a = pd.arrays.BooleanArray( + np.array([True, True, True, False, False, False, True, False, True]), + np.array([False] * 6 + [True, True, True]), + ) + b = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + if isinstance(other, list): + other = pd.array(other, dtype="boolean") + + result = getattr(a, all_logical_operators)(other) + expected = getattr(b, all_logical_operators)(other) + tm.assert_extension_array_equal(result, expected) + + if isinstance(other, BooleanArray): + other._data[other._mask] = True + a._data[a._mask] = False + + result = getattr(a, all_logical_operators)(other) + expected = getattr(b, all_logical_operators)(other) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("operation", [kleene_or, kleene_xor, kleene_and]) +def test_error_both_scalar(operation): + msg = r"Either `left` or `right` need to be a np\.ndarray." + with pytest.raises(TypeError, match=msg): + # masks need to be non-None, otherwise it ends up in an infinite recursion + operation(True, True, np.zeros(1), np.zeros(1)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_ops.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_ops.py new file mode 100644 index 0000000..95ebe85 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_ops.py @@ -0,0 +1,27 @@ +import pandas as pd +import pandas._testing as tm + + +class TestUnaryOps: + def test_invert(self): + a = pd.array([True, False, None], dtype="boolean") + expected = pd.array([False, True, None], dtype="boolean") + tm.assert_extension_array_equal(~a, expected) + + expected = pd.Series(expected, index=["a", "b", "c"], name="name") + result = ~pd.Series(a, index=["a", "b", "c"], name="name") + tm.assert_series_equal(result, expected) + + df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"]) + result = ~df + expected = pd.DataFrame( + {"A": expected, "B": [False, True, True]}, index=["a", "b", "c"] + ) + tm.assert_frame_equal(result, expected) + + def test_abs(self): + # matching numpy behavior, abs is the identity function + arr = pd.array([True, False, None], dtype="boolean") + result = abs(arr) + + tm.assert_extension_array_equal(result, arr) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_reduction.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_reduction.py new file mode 100644 index 0000000..a5c18a2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_reduction.py @@ -0,0 +1,60 @@ +import numpy as np +import pytest + +import pandas as pd + + +@pytest.fixture +def data(): + return pd.array( + [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False], + dtype="boolean", + ) + + +@pytest.mark.parametrize( + "values, exp_any, exp_all, exp_any_noskip, exp_all_noskip", + [ + ([True, pd.NA], True, True, True, pd.NA), + ([False, pd.NA], False, False, pd.NA, False), + ([pd.NA], False, True, pd.NA, pd.NA), + ([], False, True, False, True), + # GH-33253: all True / all False values buggy with skipna=False + ([True, True], True, True, True, True), + ([False, False], False, False, False, False), + ], +) +def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip): + # the methods return numpy scalars + exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any) + exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all) + exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip) + exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip) + + for con in [pd.array, pd.Series]: + a = con(values, dtype="boolean") + assert a.any() is exp_any + assert a.all() is exp_all + assert a.any(skipna=False) is exp_any_noskip + assert a.all(skipna=False) is exp_all_noskip + + assert np.any(a.any()) is exp_any + assert np.all(a.all()) is exp_all + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_reductions_return_types(dropna, data, all_numeric_reductions): + op = all_numeric_reductions + s = pd.Series(data) + if dropna: + s = s.dropna() + + if op == "sum": + assert isinstance(getattr(s, op)(), np.int_) + elif op == "prod": + assert isinstance(getattr(s, op)(), np.int_) + elif op in ("min", "max"): + assert isinstance(getattr(s, op)(), np.bool_) + else: + # "mean", "std", "var", "median", "kurt", "skew" + assert isinstance(getattr(s, op)(), np.float64) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_repr.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_repr.py new file mode 100644 index 0000000..0ee904b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/boolean/test_repr.py @@ -0,0 +1,13 @@ +import pandas as pd + + +def test_repr(): + df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")}) + expected = " A\n0 True\n1 False\n2 " + assert repr(df) == expected + + expected = "0 True\n1 False\n2 \nName: A, dtype: boolean" + assert repr(df.A) == expected + + expected = "\n[True, False, ]\nLength: 3, dtype: boolean" + assert repr(df.A.array) == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..15d41fd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..8df3848 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..8fd6bc2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_algos.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_algos.cpython-38.pyc new file mode 100644 index 0000000..176b18e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_algos.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_analytics.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_analytics.cpython-38.pyc new file mode 100644 index 0000000..637972c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_analytics.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_api.cpython-38.pyc new file mode 100644 index 0000000..c242882 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..e084356 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..cc0ec68 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_dtypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_dtypes.cpython-38.pyc new file mode 100644 index 0000000..85001f6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_dtypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..e1b72bc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_missing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_missing.cpython-38.pyc new file mode 100644 index 0000000..31d16e2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_missing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_operators.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_operators.cpython-38.pyc new file mode 100644 index 0000000..3c80898 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_operators.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_replace.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_replace.cpython-38.pyc new file mode 100644 index 0000000..1ad41ee Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_replace.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_repr.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_repr.cpython-38.pyc new file mode 100644 index 0000000..c69200e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_repr.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_sorting.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_sorting.cpython-38.pyc new file mode 100644 index 0000000..0726c60 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_sorting.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_subclass.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_subclass.cpython-38.pyc new file mode 100644 index 0000000..b090cc1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_subclass.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_take.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_take.cpython-38.pyc new file mode 100644 index 0000000..f6f8a16 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_take.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_warnings.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_warnings.cpython-38.pyc new file mode 100644 index 0000000..ea1b1b8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/__pycache__/test_warnings.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/common.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/common.py new file mode 100644 index 0000000..4ef9390 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/common.py @@ -0,0 +1,8 @@ +from pandas import Categorical + + +class TestCategorical: + def setup_method(self, method): + self.factor = Categorical( + ["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True + ) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/conftest.py new file mode 100644 index 0000000..640f5df --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(params=[True, False]) +def allow_fill(request): + """Boolean 'allow_fill' parameter for Categorical.take""" + return request.param diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_algos.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_algos.py new file mode 100644 index 0000000..5b0004a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_algos.py @@ -0,0 +1,83 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize("ordered", [True, False]) +@pytest.mark.parametrize("categories", [["b", "a", "c"], ["a", "b", "c", "d"]]) +def test_factorize(categories, ordered): + cat = pd.Categorical( + ["b", "b", "a", "c", None], categories=categories, ordered=ordered + ) + codes, uniques = pd.factorize(cat) + expected_codes = np.array([0, 0, 1, 2, -1], dtype=np.intp) + expected_uniques = pd.Categorical( + ["b", "a", "c"], categories=categories, ordered=ordered + ) + + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_categorical_equal(uniques, expected_uniques) + + +def test_factorized_sort(): + cat = pd.Categorical(["b", "b", None, "a"]) + codes, uniques = pd.factorize(cat, sort=True) + expected_codes = np.array([1, 1, -1, 0], dtype=np.intp) + expected_uniques = pd.Categorical(["a", "b"]) + + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_categorical_equal(uniques, expected_uniques) + + +def test_factorized_sort_ordered(): + cat = pd.Categorical( + ["b", "b", None, "a"], categories=["c", "b", "a"], ordered=True + ) + + codes, uniques = pd.factorize(cat, sort=True) + expected_codes = np.array([0, 0, -1, 1], dtype=np.intp) + expected_uniques = pd.Categorical( + ["b", "a"], categories=["c", "b", "a"], ordered=True + ) + + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_categorical_equal(uniques, expected_uniques) + + +def test_isin_cats(): + # GH2003 + cat = pd.Categorical(["a", "b", np.nan]) + + result = cat.isin(["a", np.nan]) + expected = np.array([True, False, True], dtype=bool) + tm.assert_numpy_array_equal(expected, result) + + result = cat.isin(["a", "c"]) + expected = np.array([True, False, False], dtype=bool) + tm.assert_numpy_array_equal(expected, result) + + +@pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])]) +def test_isin_empty(empty): + s = pd.Categorical(["a", "b"]) + expected = np.array([False, False], dtype=bool) + + result = s.isin(empty) + tm.assert_numpy_array_equal(expected, result) + + +def test_diff(): + s = pd.Series([1, 2, 3], dtype="category") + with tm.assert_produces_warning(FutureWarning): + result = s.diff() + expected = pd.Series([np.nan, 1, 1]) + tm.assert_series_equal(result, expected) + + expected = expected.to_frame(name="A") + df = s.to_frame(name="A") + with tm.assert_produces_warning(FutureWarning): + result = df.diff() + + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_analytics.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_analytics.py new file mode 100644 index 0000000..59bb34f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_analytics.py @@ -0,0 +1,365 @@ +import re +import sys + +import numpy as np +import pytest + +from pandas.compat import PYPY + +from pandas import ( + Categorical, + CategoricalDtype, + Index, + NaT, + Series, + date_range, +) +import pandas._testing as tm +from pandas.api.types import is_scalar + + +class TestCategoricalAnalytics: + @pytest.mark.parametrize("aggregation", ["min", "max"]) + def test_min_max_not_ordered_raises(self, aggregation): + # unordered cats have no min/max + cat = Categorical(["a", "b", "c", "d"], ordered=False) + msg = f"Categorical is not ordered for operation {aggregation}" + agg_func = getattr(cat, aggregation) + + with pytest.raises(TypeError, match=msg): + agg_func() + + ufunc = np.minimum if aggregation == "min" else np.maximum + with pytest.raises(TypeError, match=msg): + ufunc.reduce(cat) + + def test_min_max_ordered(self, index_or_series_or_array): + cat = Categorical(["a", "b", "c", "d"], ordered=True) + obj = index_or_series_or_array(cat) + _min = obj.min() + _max = obj.max() + assert _min == "a" + assert _max == "d" + + assert np.minimum.reduce(obj) == "a" + assert np.maximum.reduce(obj) == "d" + # TODO: raises if we pass axis=0 (on Index and Categorical, not Series) + + cat = Categorical( + ["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True + ) + obj = index_or_series_or_array(cat) + _min = obj.min() + _max = obj.max() + assert _min == "d" + assert _max == "a" + assert np.minimum.reduce(obj) == "d" + assert np.maximum.reduce(obj) == "a" + + @pytest.mark.parametrize( + "categories,expected", + [ + (list("ABC"), np.NaN), + ([1, 2, 3], np.NaN), + pytest.param( + Series(date_range("2020-01-01", periods=3), dtype="category"), + NaT, + marks=pytest.mark.xfail( + reason="https://github.com/pandas-dev/pandas/issues/29962" + ), + ), + ], + ) + @pytest.mark.parametrize("aggregation", ["min", "max"]) + def test_min_max_ordered_empty(self, categories, expected, aggregation): + # GH 30227 + cat = Categorical([], categories=categories, ordered=True) + + agg_func = getattr(cat, aggregation) + result = agg_func() + assert result is expected + + @pytest.mark.parametrize( + "values, categories", + [(["a", "b", "c", np.nan], list("cba")), ([1, 2, 3, np.nan], [3, 2, 1])], + ) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("function", ["min", "max"]) + def test_min_max_with_nan(self, values, categories, function, skipna): + # GH 25303 + cat = Categorical(values, categories=categories, ordered=True) + result = getattr(cat, function)(skipna=skipna) + + if skipna is False: + assert result is np.nan + else: + expected = categories[0] if function == "min" else categories[2] + assert result == expected + + @pytest.mark.parametrize("function", ["min", "max"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_only_nan(self, function, skipna): + # https://github.com/pandas-dev/pandas/issues/33450 + cat = Categorical([np.nan], categories=[1, 2], ordered=True) + result = getattr(cat, function)(skipna=skipna) + assert result is np.nan + + @pytest.mark.parametrize("method", ["min", "max"]) + def test_deprecate_numeric_only_min_max(self, method): + # GH 25303 + cat = Categorical( + [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True + ) + with tm.assert_produces_warning(expected_warning=FutureWarning): + getattr(cat, method)(numeric_only=True) + + @pytest.mark.parametrize("method", ["min", "max"]) + def test_numpy_min_max_raises(self, method): + cat = Categorical(["a", "b", "c", "b"], ordered=False) + msg = ( + f"Categorical is not ordered for operation {method}\n" + "you can use .as_ordered() to change the Categorical to an ordered one" + ) + method = getattr(np, method) + with pytest.raises(TypeError, match=re.escape(msg)): + method(cat) + + @pytest.mark.parametrize("kwarg", ["axis", "out", "keepdims"]) + @pytest.mark.parametrize("method", ["min", "max"]) + def test_numpy_min_max_unsupported_kwargs_raises(self, method, kwarg): + cat = Categorical(["a", "b", "c", "b"], ordered=True) + msg = ( + f"the '{kwarg}' parameter is not supported in the pandas implementation " + f"of {method}" + ) + if kwarg == "axis": + msg = r"`axis` must be fewer than the number of dimensions \(1\)" + kwargs = {kwarg: 42} + method = getattr(np, method) + with pytest.raises(ValueError, match=msg): + method(cat, **kwargs) + + @pytest.mark.parametrize("method, expected", [("min", "a"), ("max", "c")]) + def test_numpy_min_max_axis_equals_none(self, method, expected): + cat = Categorical(["a", "b", "c", "b"], ordered=True) + method = getattr(np, method) + result = method(cat, axis=None) + assert result == expected + + @pytest.mark.parametrize( + "values,categories,exp_mode", + [ + ([1, 1, 2, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5]), + ([1, 1, 1, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5, 1]), + ([1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1]), + ([np.nan, np.nan, np.nan, 4, 5], [5, 4, 3, 2, 1], [5, 4]), + ([np.nan, np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]), + ([np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]), + ], + ) + def test_mode(self, values, categories, exp_mode): + s = Categorical(values, categories=categories, ordered=True) + msg = "Use Series.mode instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = s.mode() + exp = Categorical(exp_mode, categories=categories, ordered=True) + tm.assert_categorical_equal(res, exp) + + def test_searchsorted(self, ordered): + # https://github.com/pandas-dev/pandas/issues/8420 + # https://github.com/pandas-dev/pandas/issues/14522 + + cat = Categorical( + ["cheese", "milk", "apple", "bread", "bread"], + categories=["cheese", "milk", "apple", "bread"], + ordered=ordered, + ) + ser = Series(cat) + + # Searching for single item argument, side='left' (default) + res_cat = cat.searchsorted("apple") + assert res_cat == 2 + assert is_scalar(res_cat) + + res_ser = ser.searchsorted("apple") + assert res_ser == 2 + assert is_scalar(res_ser) + + # Searching for single item array, side='left' (default) + res_cat = cat.searchsorted(["bread"]) + res_ser = ser.searchsorted(["bread"]) + exp = np.array([3], dtype=np.intp) + tm.assert_numpy_array_equal(res_cat, exp) + tm.assert_numpy_array_equal(res_ser, exp) + + # Searching for several items array, side='right' + res_cat = cat.searchsorted(["apple", "bread"], side="right") + res_ser = ser.searchsorted(["apple", "bread"], side="right") + exp = np.array([3, 5], dtype=np.intp) + tm.assert_numpy_array_equal(res_cat, exp) + tm.assert_numpy_array_equal(res_ser, exp) + + # Searching for a single value that is not from the Categorical + with pytest.raises(TypeError, match="cucumber"): + cat.searchsorted("cucumber") + with pytest.raises(TypeError, match="cucumber"): + ser.searchsorted("cucumber") + + # Searching for multiple values one of each is not from the Categorical + msg = ( + "Cannot setitem on a Categorical with a new category, " + "set the categories first" + ) + with pytest.raises(TypeError, match=msg): + cat.searchsorted(["bread", "cucumber"]) + with pytest.raises(TypeError, match=msg): + ser.searchsorted(["bread", "cucumber"]) + + def test_unique(self, ordered): + # GH38140 + dtype = CategoricalDtype(["a", "b", "c"], ordered=ordered) + + # categories are reordered based on value when ordered=False + cat = Categorical(["a", "b", "c"], dtype=dtype) + res = cat.unique() + tm.assert_categorical_equal(res, cat) + + cat = Categorical(["a", "b", "a", "a"], dtype=dtype) + res = cat.unique() + tm.assert_categorical_equal(res, Categorical(["a", "b"], dtype=dtype)) + + cat = Categorical(["c", "a", "b", "a", "a"], dtype=dtype) + res = cat.unique() + exp_cat = Categorical(["c", "a", "b"], dtype=dtype) + tm.assert_categorical_equal(res, exp_cat) + + # nan must be removed + cat = Categorical(["b", np.nan, "b", np.nan, "a"], dtype=dtype) + res = cat.unique() + exp_cat = Categorical(["b", np.nan, "a"], dtype=dtype) + tm.assert_categorical_equal(res, exp_cat) + + def test_unique_index_series(self, ordered): + # GH38140 + dtype = CategoricalDtype([3, 2, 1], ordered=ordered) + + c = Categorical([3, 1, 2, 2, 1], dtype=dtype) + # Categorical.unique sorts categories by appearance order + # if ordered=False + exp = Categorical([3, 1, 2], dtype=dtype) + tm.assert_categorical_equal(c.unique(), exp) + + tm.assert_index_equal(Index(c).unique(), Index(exp)) + tm.assert_categorical_equal(Series(c).unique(), exp) + + c = Categorical([1, 1, 2, 2], dtype=dtype) + exp = Categorical([1, 2], dtype=dtype) + tm.assert_categorical_equal(c.unique(), exp) + tm.assert_index_equal(Index(c).unique(), Index(exp)) + tm.assert_categorical_equal(Series(c).unique(), exp) + + def test_shift(self): + # GH 9416 + cat = Categorical(["a", "b", "c", "d", "a"]) + + # shift forward + sp1 = cat.shift(1) + xp1 = Categorical([np.nan, "a", "b", "c", "d"]) + tm.assert_categorical_equal(sp1, xp1) + tm.assert_categorical_equal(cat[:-1], sp1[1:]) + + # shift back + sn2 = cat.shift(-2) + xp2 = Categorical( + ["c", "d", "a", np.nan, np.nan], categories=["a", "b", "c", "d"] + ) + tm.assert_categorical_equal(sn2, xp2) + tm.assert_categorical_equal(cat[2:], sn2[:-2]) + + # shift by zero + tm.assert_categorical_equal(cat, cat.shift(0)) + + def test_nbytes(self): + cat = Categorical([1, 2, 3]) + exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories + assert cat.nbytes == exp + + def test_memory_usage(self): + cat = Categorical([1, 2, 3]) + + # .categories is an index, so we include the hashtable + assert 0 < cat.nbytes <= cat.memory_usage() + assert 0 < cat.nbytes <= cat.memory_usage(deep=True) + + cat = Categorical(["foo", "foo", "bar"]) + assert cat.memory_usage(deep=True) > cat.nbytes + + if not PYPY: + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) + assert abs(diff) < 100 + + def test_map(self): + c = Categorical(list("ABABC"), categories=list("CBA"), ordered=True) + result = c.map(lambda x: x.lower()) + exp = Categorical(list("ababc"), categories=list("cba"), ordered=True) + tm.assert_categorical_equal(result, exp) + + c = Categorical(list("ABABC"), categories=list("ABC"), ordered=False) + result = c.map(lambda x: x.lower()) + exp = Categorical(list("ababc"), categories=list("abc"), ordered=False) + tm.assert_categorical_equal(result, exp) + + result = c.map(lambda x: 1) + # GH 12766: Return an index not an array + tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64))) + + @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) + def test_validate_inplace_raises(self, value): + cat = Categorical(["A", "B", "B", "C", "A"]) + msg = ( + 'For argument "inplace" expected type bool, ' + f"received type {type(value).__name__}" + ) + with pytest.raises(ValueError, match=msg): + cat.set_ordered(value=True, inplace=value) + + with pytest.raises(ValueError, match=msg): + cat.as_ordered(inplace=value) + + with pytest.raises(ValueError, match=msg): + cat.as_unordered(inplace=value) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.set_categories(["X", "Y", "Z"], rename=True, inplace=value) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.rename_categories(["X", "Y", "Z"], inplace=value) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.reorder_categories(["X", "Y", "Z"], ordered=True, inplace=value) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.add_categories(new_categories=["D", "E", "F"], inplace=value) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.remove_categories(removals=["D", "E", "F"], inplace=value) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.remove_unused_categories(inplace=value) + + with pytest.raises(ValueError, match=msg): + cat.sort_values(inplace=value) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_api.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_api.py new file mode 100644 index 0000000..bde7505 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_api.py @@ -0,0 +1,561 @@ +import re + +import numpy as np +import pytest + +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Index, + Series, +) +import pandas._testing as tm +from pandas.core.arrays.categorical import recode_for_categories +from pandas.tests.arrays.categorical.common import TestCategorical + + +class TestCategoricalAPI: + def test_ordered_api(self): + # GH 9347 + cat1 = Categorical(list("acb"), ordered=False) + tm.assert_index_equal(cat1.categories, Index(["a", "b", "c"])) + assert not cat1.ordered + + cat2 = Categorical(list("acb"), categories=list("bca"), ordered=False) + tm.assert_index_equal(cat2.categories, Index(["b", "c", "a"])) + assert not cat2.ordered + + cat3 = Categorical(list("acb"), ordered=True) + tm.assert_index_equal(cat3.categories, Index(["a", "b", "c"])) + assert cat3.ordered + + cat4 = Categorical(list("acb"), categories=list("bca"), ordered=True) + tm.assert_index_equal(cat4.categories, Index(["b", "c", "a"])) + assert cat4.ordered + + def test_set_ordered(self): + + cat = Categorical(["a", "b", "c", "a"], ordered=True) + cat2 = cat.as_unordered() + assert not cat2.ordered + cat2 = cat.as_ordered() + assert cat2.ordered + cat2.as_unordered(inplace=True) + assert not cat2.ordered + cat2.as_ordered(inplace=True) + assert cat2.ordered + + assert cat2.set_ordered(True).ordered + assert not cat2.set_ordered(False).ordered + cat2.set_ordered(True, inplace=True) + assert cat2.ordered + cat2.set_ordered(False, inplace=True) + assert not cat2.ordered + + # removed in 0.19.0 + msg = "can't set attribute" + with pytest.raises(AttributeError, match=msg): + cat.ordered = True + with pytest.raises(AttributeError, match=msg): + cat.ordered = False + + def test_rename_categories(self): + cat = Categorical(["a", "b", "c", "a"]) + + # inplace=False: the old one must not be changed + res = cat.rename_categories([1, 2, 3]) + tm.assert_numpy_array_equal( + res.__array__(), np.array([1, 2, 3, 1], dtype=np.int64) + ) + tm.assert_index_equal(res.categories, Index([1, 2, 3])) + + exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) + tm.assert_numpy_array_equal(cat.__array__(), exp_cat) + + exp_cat = Index(["a", "b", "c"]) + tm.assert_index_equal(cat.categories, exp_cat) + + # GH18862 (let rename_categories take callables) + result = cat.rename_categories(lambda x: x.upper()) + expected = Categorical(["A", "B", "C", "A"]) + tm.assert_categorical_equal(result, expected) + + # and now inplace + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + res = cat.rename_categories([1, 2, 3], inplace=True) + + assert res is None + tm.assert_numpy_array_equal( + cat.__array__(), np.array([1, 2, 3, 1], dtype=np.int64) + ) + tm.assert_index_equal(cat.categories, Index([1, 2, 3])) + + @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) + def test_rename_categories_wrong_length_raises(self, new_categories): + cat = Categorical(["a", "b", "c", "a"]) + msg = ( + "new categories need to have the same number of items as the " + "old categories!" + ) + with pytest.raises(ValueError, match=msg): + cat.rename_categories(new_categories) + + def test_rename_categories_series(self): + # https://github.com/pandas-dev/pandas/issues/17981 + c = Categorical(["a", "b"]) + result = c.rename_categories(Series([0, 1], index=["a", "b"])) + expected = Categorical([0, 1]) + tm.assert_categorical_equal(result, expected) + + def test_rename_categories_dict(self): + # GH 17336 + cat = Categorical(["a", "b", "c", "d"]) + res = cat.rename_categories({"a": 4, "b": 3, "c": 2, "d": 1}) + expected = Index([4, 3, 2, 1]) + tm.assert_index_equal(res.categories, expected) + + # Test for inplace + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + res = cat.rename_categories({"a": 4, "b": 3, "c": 2, "d": 1}, inplace=True) + + assert res is None + tm.assert_index_equal(cat.categories, expected) + + # Test for dicts of smaller length + cat = Categorical(["a", "b", "c", "d"]) + res = cat.rename_categories({"a": 1, "c": 3}) + + expected = Index([1, "b", 3, "d"]) + tm.assert_index_equal(res.categories, expected) + + # Test for dicts with bigger length + cat = Categorical(["a", "b", "c", "d"]) + res = cat.rename_categories({"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6}) + expected = Index([1, 2, 3, 4]) + tm.assert_index_equal(res.categories, expected) + + # Test for dicts with no items from old categories + cat = Categorical(["a", "b", "c", "d"]) + res = cat.rename_categories({"f": 1, "g": 3}) + + expected = Index(["a", "b", "c", "d"]) + tm.assert_index_equal(res.categories, expected) + + def test_reorder_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + old = cat.copy() + new = Categorical( + ["a", "b", "c", "a"], categories=["c", "b", "a"], ordered=True + ) + + # first inplace == False + res = cat.reorder_categories(["c", "b", "a"]) + # cat must be the same as before + tm.assert_categorical_equal(cat, old) + # only res is changed + tm.assert_categorical_equal(res, new) + + # inplace == True + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + res = cat.reorder_categories(["c", "b", "a"], inplace=True) + + assert res is None + tm.assert_categorical_equal(cat, new) + + @pytest.mark.parametrize( + "new_categories", + [ + ["a"], # not all "old" included in "new" + ["a", "b", "d"], # still not all "old" in "new" + ["a", "b", "c", "d"], # all "old" included in "new", but too long + ], + ) + def test_reorder_categories_raises(self, new_categories): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + msg = "items in new_categories are not the same as in old categories" + with pytest.raises(ValueError, match=msg): + cat.reorder_categories(new_categories) + + def test_add_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + old = cat.copy() + new = Categorical( + ["a", "b", "c", "a"], categories=["a", "b", "c", "d"], ordered=True + ) + + # first inplace == False + res = cat.add_categories("d") + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + res = cat.add_categories(["d"]) + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + # inplace == True + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + res = cat.add_categories("d", inplace=True) + + tm.assert_categorical_equal(cat, new) + assert res is None + + # GH 9927 + cat = Categorical(list("abc"), ordered=True) + expected = Categorical(list("abc"), categories=list("abcde"), ordered=True) + # test with Series, np.array, index, list + res = cat.add_categories(Series(["d", "e"])) + tm.assert_categorical_equal(res, expected) + res = cat.add_categories(np.array(["d", "e"])) + tm.assert_categorical_equal(res, expected) + res = cat.add_categories(Index(["d", "e"])) + tm.assert_categorical_equal(res, expected) + res = cat.add_categories(["d", "e"]) + tm.assert_categorical_equal(res, expected) + + def test_add_categories_existing_raises(self): + # new is in old categories + cat = Categorical(["a", "b", "c", "d"], ordered=True) + msg = re.escape("new categories must not include old categories: {'d'}") + with pytest.raises(ValueError, match=msg): + cat.add_categories(["d"]) + + def test_set_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + exp_categories = Index(["c", "b", "a"]) + exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) + + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + res = cat.set_categories(["c", "b", "a"], inplace=True) + + tm.assert_index_equal(cat.categories, exp_categories) + tm.assert_numpy_array_equal(cat.__array__(), exp_values) + assert res is None + + res = cat.set_categories(["a", "b", "c"]) + # cat must be the same as before + tm.assert_index_equal(cat.categories, exp_categories) + tm.assert_numpy_array_equal(cat.__array__(), exp_values) + # only res is changed + exp_categories_back = Index(["a", "b", "c"]) + tm.assert_index_equal(res.categories, exp_categories_back) + tm.assert_numpy_array_equal(res.__array__(), exp_values) + + # not all "old" included in "new" -> all not included ones are now + # np.nan + cat = Categorical(["a", "b", "c", "a"], ordered=True) + res = cat.set_categories(["a"]) + tm.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0], dtype=np.int8)) + + # still not all "old" in "new" + res = cat.set_categories(["a", "b", "d"]) + tm.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0], dtype=np.int8)) + tm.assert_index_equal(res.categories, Index(["a", "b", "d"])) + + # all "old" included in "new" + cat = cat.set_categories(["a", "b", "c", "d"]) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_index_equal(cat.categories, exp_categories) + + # internals... + c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True) + tm.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0], dtype=np.int8)) + tm.assert_index_equal(c.categories, Index([1, 2, 3, 4])) + + exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) + tm.assert_numpy_array_equal(np.asarray(c), exp) + + # all "pointers" to '4' must be changed from 3 to 0,... + c = c.set_categories([4, 3, 2, 1]) + + # positions are changed + tm.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3], dtype=np.int8)) + + # categories are now in new order + tm.assert_index_equal(c.categories, Index([4, 3, 2, 1])) + + # output is the same + exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) + tm.assert_numpy_array_equal(np.asarray(c), exp) + assert c.min() == 4 + assert c.max() == 1 + + # set_categories should set the ordering if specified + c2 = c.set_categories([4, 3, 2, 1], ordered=False) + assert not c2.ordered + + tm.assert_numpy_array_equal(np.asarray(c), np.asarray(c2)) + + # set_categories should pass thru the ordering + c2 = c.set_ordered(False).set_categories([4, 3, 2, 1]) + assert not c2.ordered + + tm.assert_numpy_array_equal(np.asarray(c), np.asarray(c2)) + + def test_to_dense_deprecated(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + + with tm.assert_produces_warning(FutureWarning): + cat.to_dense() + + @pytest.mark.parametrize( + "values, categories, new_categories", + [ + # No NaNs, same cats, same order + (["a", "b", "a"], ["a", "b"], ["a", "b"]), + # No NaNs, same cats, different order + (["a", "b", "a"], ["a", "b"], ["b", "a"]), + # Same, unsorted + (["b", "a", "a"], ["a", "b"], ["a", "b"]), + # No NaNs, same cats, different order + (["b", "a", "a"], ["a", "b"], ["b", "a"]), + # NaNs + (["a", "b", "c"], ["a", "b"], ["a", "b"]), + (["a", "b", "c"], ["a", "b"], ["b", "a"]), + (["b", "a", "c"], ["a", "b"], ["a", "b"]), + (["b", "a", "c"], ["a", "b"], ["a", "b"]), + # Introduce NaNs + (["a", "b", "c"], ["a", "b"], ["a"]), + (["a", "b", "c"], ["a", "b"], ["b"]), + (["b", "a", "c"], ["a", "b"], ["a"]), + (["b", "a", "c"], ["a", "b"], ["a"]), + # No overlap + (["a", "b", "c"], ["a", "b"], ["d", "e"]), + ], + ) + @pytest.mark.parametrize("ordered", [True, False]) + def test_set_categories_many(self, values, categories, new_categories, ordered): + c = Categorical(values, categories) + expected = Categorical(values, new_categories, ordered) + result = c.set_categories(new_categories, ordered=ordered) + tm.assert_categorical_equal(result, expected) + + def test_set_categories_rename_less(self): + # GH 24675 + cat = Categorical(["A", "B"]) + result = cat.set_categories(["A"], rename=True) + expected = Categorical(["A", np.nan]) + tm.assert_categorical_equal(result, expected) + + def test_set_categories_private(self): + cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"]) + cat._set_categories(["a", "c", "d", "e"]) + expected = Categorical(["a", "c", "d"], categories=list("acde")) + tm.assert_categorical_equal(cat, expected) + + # fastpath + cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"]) + cat._set_categories(["a", "c", "d", "e"], fastpath=True) + expected = Categorical(["a", "c", "d"], categories=list("acde")) + tm.assert_categorical_equal(cat, expected) + + def test_remove_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + old = cat.copy() + new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], ordered=True) + + # first inplace == False + res = cat.remove_categories("c") + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + res = cat.remove_categories(["c"]) + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + # inplace == True + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + res = cat.remove_categories("c", inplace=True) + + tm.assert_categorical_equal(cat, new) + assert res is None + + @pytest.mark.parametrize("removals", [["c"], ["c", np.nan], "c", ["c", "c"]]) + def test_remove_categories_raises(self, removals): + cat = Categorical(["a", "b", "a"]) + message = re.escape("removals must all be in old categories: {'c'}") + + with pytest.raises(ValueError, match=message): + cat.remove_categories(removals) + + def test_remove_unused_categories(self): + c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"]) + exp_categories_all = Index(["a", "b", "c", "d", "e"]) + exp_categories_dropped = Index(["a", "b", "c", "d"]) + + tm.assert_index_equal(c.categories, exp_categories_all) + + res = c.remove_unused_categories() + tm.assert_index_equal(res.categories, exp_categories_dropped) + tm.assert_index_equal(c.categories, exp_categories_all) + + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + res = c.remove_unused_categories(inplace=True) + + tm.assert_index_equal(c.categories, exp_categories_dropped) + assert res is None + + # with NaN values (GH11599) + c = Categorical(["a", "b", "c", np.nan], categories=["a", "b", "c", "d", "e"]) + res = c.remove_unused_categories() + tm.assert_index_equal(res.categories, Index(np.array(["a", "b", "c"]))) + exp_codes = np.array([0, 1, 2, -1], dtype=np.int8) + tm.assert_numpy_array_equal(res.codes, exp_codes) + tm.assert_index_equal(c.categories, exp_categories_all) + + val = ["F", np.nan, "D", "B", "D", "F", np.nan] + cat = Categorical(values=val, categories=list("ABCDEFG")) + out = cat.remove_unused_categories() + tm.assert_index_equal(out.categories, Index(["B", "D", "F"])) + exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8) + tm.assert_numpy_array_equal(out.codes, exp_codes) + assert out.tolist() == val + + alpha = list("abcdefghijklmnopqrstuvwxyz") + val = np.random.choice(alpha[::2], 10000).astype("object") + val[np.random.choice(len(val), 100)] = np.nan + + cat = Categorical(values=val, categories=alpha) + out = cat.remove_unused_categories() + assert out.tolist() == val.tolist() + + +class TestCategoricalAPIWithFactor(TestCategorical): + def test_describe(self): + # string type + desc = self.factor.describe() + assert self.factor.ordered + exp_index = CategoricalIndex( + ["a", "b", "c"], name="categories", ordered=self.factor.ordered + ) + expected = DataFrame( + {"counts": [3, 2, 3], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0]}, index=exp_index + ) + tm.assert_frame_equal(desc, expected) + + # check unused categories + cat = self.factor.copy() + + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.set_categories(["a", "b", "c", "d"], inplace=True) + + desc = cat.describe() + + exp_index = CategoricalIndex( + list("abcd"), ordered=self.factor.ordered, name="categories" + ) + expected = DataFrame( + {"counts": [3, 2, 3, 0], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0, 0]}, + index=exp_index, + ) + tm.assert_frame_equal(desc, expected) + + # check an integer one + cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1]) + desc = cat.describe() + exp_index = CategoricalIndex([1, 2, 3], ordered=cat.ordered, name="categories") + expected = DataFrame( + {"counts": [5, 3, 3], "freqs": [5 / 11.0, 3 / 11.0, 3 / 11.0]}, + index=exp_index, + ) + tm.assert_frame_equal(desc, expected) + + # https://github.com/pandas-dev/pandas/issues/3678 + # describe should work with NaN + cat = Categorical([np.nan, 1, 2, 2]) + desc = cat.describe() + expected = DataFrame( + {"counts": [1, 2, 1], "freqs": [1 / 4.0, 2 / 4.0, 1 / 4.0]}, + index=CategoricalIndex( + [1, 2, np.nan], categories=[1, 2], name="categories" + ), + ) + tm.assert_frame_equal(desc, expected) + + def test_set_categories_inplace(self): + cat = self.factor.copy() + + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.set_categories(["a", "b", "c", "d"], inplace=True) + + tm.assert_index_equal(cat.categories, Index(["a", "b", "c", "d"])) + + def test_codes_setter_deprecated(self): + cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1]) + new_codes = cat._codes + 1 + with tm.assert_produces_warning(FutureWarning): + # GH#40606 + cat._codes = new_codes + + assert cat._codes is new_codes + + +class TestPrivateCategoricalAPI: + def test_codes_immutable(self): + + # Codes should be read only + c = Categorical(["a", "b", "c", "a", np.nan]) + exp = np.array([0, 1, 2, 0, -1], dtype="int8") + tm.assert_numpy_array_equal(c.codes, exp) + + # Assignments to codes should raise + with pytest.raises(AttributeError, match="can't set attribute"): + c.codes = np.array([0, 1, 2, 0, 1], dtype="int8") + + # changes in the codes array should raise + codes = c.codes + + with pytest.raises(ValueError, match="assignment destination is read-only"): + codes[4] = 1 + + # But even after getting the codes, the original array should still be + # writeable! + c[4] = "a" + exp = np.array([0, 1, 2, 0, 0], dtype="int8") + tm.assert_numpy_array_equal(c.codes, exp) + c._codes[4] = 2 + exp = np.array([0, 1, 2, 0, 2], dtype="int8") + tm.assert_numpy_array_equal(c.codes, exp) + + @pytest.mark.parametrize( + "codes, old, new, expected", + [ + ([0, 1], ["a", "b"], ["a", "b"], [0, 1]), + ([0, 1], ["b", "a"], ["b", "a"], [0, 1]), + ([0, 1], ["a", "b"], ["b", "a"], [1, 0]), + ([0, 1], ["b", "a"], ["a", "b"], [1, 0]), + ([0, 1, 0, 1], ["a", "b"], ["a", "b", "c"], [0, 1, 0, 1]), + ([0, 1, 2, 2], ["a", "b", "c"], ["a", "b"], [0, 1, -1, -1]), + ([0, 1, -1], ["a", "b", "c"], ["a", "b", "c"], [0, 1, -1]), + ([0, 1, -1], ["a", "b", "c"], ["b"], [-1, 0, -1]), + ([0, 1, -1], ["a", "b", "c"], ["d"], [-1, -1, -1]), + ([0, 1, -1], ["a", "b", "c"], [], [-1, -1, -1]), + ([-1, -1], [], ["a", "b"], [-1, -1]), + ([1, 0], ["b", "a"], ["a", "b"], [0, 1]), + ], + ) + def test_recode_to_categories(self, codes, old, new, expected): + codes = np.asanyarray(codes, dtype=np.int8) + expected = np.asanyarray(expected, dtype=np.int8) + old = Index(old) + new = Index(new) + result = recode_for_categories(codes, old, new) + tm.assert_numpy_array_equal(result, expected) + + def test_recode_to_categories_large(self): + N = 1000 + codes = np.arange(N) + old = Index(codes) + expected = np.arange(N - 1, -1, -1, dtype=np.int16) + new = Index(expected) + result = recode_for_categories(codes, old, new) + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_astype.py new file mode 100644 index 0000000..7c4b4d0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_astype.py @@ -0,0 +1,89 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + CategoricalDtype, + NaT, + Timestamp, + array, + to_datetime, +) +import pandas._testing as tm + + +class TestAstype: + def test_astype_str_int_categories_to_nullable_int(self): + # GH#39616 + dtype = CategoricalDtype([str(i) for i in range(5)]) + codes = np.random.randint(5, size=20) + arr = Categorical.from_codes(codes, dtype=dtype) + + res = arr.astype("Int64") + expected = array(codes, dtype="Int64") + tm.assert_extension_array_equal(res, expected) + + @pytest.mark.parametrize("ordered", [True, False]) + def test_astype(self, ordered): + # string + cat = Categorical(list("abbaaccc"), ordered=ordered) + result = cat.astype(object) + expected = np.array(cat) + tm.assert_numpy_array_equal(result, expected) + + msg = r"Cannot cast object dtype to float64" + with pytest.raises(ValueError, match=msg): + cat.astype(float) + + # numeric + cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered) + result = cat.astype(object) + expected = np.array(cat, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = cat.astype(int) + expected = np.array(cat, dtype="int") + tm.assert_numpy_array_equal(result, expected) + + result = cat.astype(float) + expected = np.array(cat, dtype=float) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dtype_ordered", [True, False]) + @pytest.mark.parametrize("cat_ordered", [True, False]) + def test_astype_category(self, dtype_ordered, cat_ordered): + # GH#10696/GH#18593 + data = list("abcaacbab") + cat = Categorical(data, categories=list("bac"), ordered=cat_ordered) + + # standard categories + dtype = CategoricalDtype(ordered=dtype_ordered) + result = cat.astype(dtype) + expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered) + tm.assert_categorical_equal(result, expected) + + # non-standard categories + dtype = CategoricalDtype(list("adc"), dtype_ordered) + result = cat.astype(dtype) + expected = Categorical(data, dtype=dtype) + tm.assert_categorical_equal(result, expected) + + if dtype_ordered is False: + # dtype='category' can't specify ordered, so only test once + result = cat.astype("category") + expected = cat + tm.assert_categorical_equal(result, expected) + + def test_astype_object_datetime_categories(self): + # GH#40754 + cat = Categorical(to_datetime(["2021-03-27", NaT])) + result = cat.astype(object) + expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + def test_astype_object_timestamp_categories(self): + # GH#18024 + cat = Categorical([Timestamp("2014-01-01")]) + result = cat.astype(object) + expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object") + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_constructors.py new file mode 100644 index 0000000..716ef68 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_constructors.py @@ -0,0 +1,762 @@ +from datetime import ( + date, + datetime, +) + +import numpy as np +import pytest + +from pandas.compat import ( + IS64, + is_platform_windows, +) + +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DatetimeIndex, + Index, + Interval, + IntervalIndex, + MultiIndex, + NaT, + Series, + Timestamp, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import Int64Index + + +class TestCategoricalConstructors: + def test_categorical_scalar_deprecated(self): + # GH#38433 + with tm.assert_produces_warning(FutureWarning): + Categorical("A", categories=["A", "B"]) + + def test_categorical_1d_only(self): + # ndim > 1 + msg = "> 1 ndim Categorical are not supported at this time" + with pytest.raises(NotImplementedError, match=msg): + Categorical(np.array([list("abcd")])) + + def test_validate_ordered(self): + # see gh-14058 + exp_msg = "'ordered' must either be 'True' or 'False'" + exp_err = TypeError + + # This should be a boolean. + ordered = np.array([0, 1, 2]) + + with pytest.raises(exp_err, match=exp_msg): + Categorical([1, 2, 3], ordered=ordered) + + with pytest.raises(exp_err, match=exp_msg): + Categorical.from_codes( + [0, 0, 1], categories=["a", "b", "c"], ordered=ordered + ) + + def test_constructor_empty(self): + # GH 17248 + c = Categorical([]) + expected = Index([]) + tm.assert_index_equal(c.categories, expected) + + c = Categorical([], categories=[1, 2, 3]) + expected = Int64Index([1, 2, 3]) + tm.assert_index_equal(c.categories, expected) + + def test_constructor_empty_boolean(self): + # see gh-22702 + cat = Categorical([], categories=[True, False]) + categories = sorted(cat.categories.tolist()) + assert categories == [False, True] + + def test_constructor_tuples(self): + values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object) + result = Categorical(values) + expected = Index([(1,), (1, 2)], tupleize_cols=False) + tm.assert_index_equal(result.categories, expected) + assert result.ordered is False + + def test_constructor_tuples_datetimes(self): + # numpy will auto reshape when all of the tuples are the + # same len, so add an extra one with 2 items and slice it off + values = np.array( + [ + (Timestamp("2010-01-01"),), + (Timestamp("2010-01-02"),), + (Timestamp("2010-01-01"),), + (Timestamp("2010-01-02"),), + ("a", "b"), + ], + dtype=object, + )[:-1] + result = Categorical(values) + expected = Index( + [(Timestamp("2010-01-01"),), (Timestamp("2010-01-02"),)], + tupleize_cols=False, + ) + tm.assert_index_equal(result.categories, expected) + + def test_constructor_unsortable(self): + + # it works! + arr = np.array([1, 2, 3, datetime.now()], dtype="O") + factor = Categorical(arr, ordered=False) + assert not factor.ordered + + # this however will raise as cannot be sorted + msg = ( + "'values' is not ordered, please explicitly specify the " + "categories order by passing in a categories argument." + ) + with pytest.raises(TypeError, match=msg): + Categorical(arr, ordered=True) + + def test_constructor_interval(self): + result = Categorical( + [Interval(1, 2), Interval(2, 3), Interval(3, 6)], ordered=True + ) + ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)]) + exp = Categorical(ii, ordered=True) + tm.assert_categorical_equal(result, exp) + tm.assert_index_equal(result.categories, ii) + + def test_constructor(self): + + exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_) + c1 = Categorical(exp_arr) + tm.assert_numpy_array_equal(c1.__array__(), exp_arr) + c2 = Categorical(exp_arr, categories=["a", "b", "c"]) + tm.assert_numpy_array_equal(c2.__array__(), exp_arr) + c2 = Categorical(exp_arr, categories=["c", "b", "a"]) + tm.assert_numpy_array_equal(c2.__array__(), exp_arr) + + # categories must be unique + msg = "Categorical categories must be unique" + with pytest.raises(ValueError, match=msg): + Categorical([1, 2], [1, 2, 2]) + + with pytest.raises(ValueError, match=msg): + Categorical(["a", "b"], ["a", "b", "b"]) + + # The default should be unordered + c1 = Categorical(["a", "b", "c", "a"]) + assert not c1.ordered + + # Categorical as input + c1 = Categorical(["a", "b", "c", "a"]) + c2 = Categorical(c1) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + c2 = Categorical(c1) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) + c2 = Categorical(c1) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) + c2 = Categorical(c1, categories=["a", "b", "c"]) + tm.assert_numpy_array_equal(c1.__array__(), c2.__array__()) + tm.assert_index_equal(c2.categories, Index(["a", "b", "c"])) + + # Series of dtype category + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + c2 = Categorical(Series(c1)) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) + c2 = Categorical(Series(c1)) + tm.assert_categorical_equal(c1, c2) + + # Series + c1 = Categorical(["a", "b", "c", "a"]) + c2 = Categorical(Series(["a", "b", "c", "a"])) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"]) + tm.assert_categorical_equal(c1, c2) + + # This should result in integer categories, not float! + cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) + assert is_integer_dtype(cat.categories) + + # https://github.com/pandas-dev/pandas/issues/3678 + cat = Categorical([np.nan, 1, 2, 3]) + assert is_integer_dtype(cat.categories) + + # this should result in floats + cat = Categorical([np.nan, 1, 2.0, 3]) + assert is_float_dtype(cat.categories) + + cat = Categorical([np.nan, 1.0, 2.0, 3.0]) + assert is_float_dtype(cat.categories) + + # This doesn't work -> this would probably need some kind of "remember + # the original type" feature to try to cast the array interface result + # to... + + # vals = np.asarray(cat[cat.notna()]) + # assert is_integer_dtype(vals) + + # corner cases + cat = Categorical([1]) + assert len(cat.categories) == 1 + assert cat.categories[0] == 1 + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 + + cat = Categorical(["a"]) + assert len(cat.categories) == 1 + assert cat.categories[0] == "a" + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 + + with tm.assert_produces_warning(FutureWarning): + # GH#38433 + cat = Categorical(1) + assert len(cat.categories) == 1 + assert cat.categories[0] == 1 + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 + # two arrays + # - when the first is an integer dtype and the second is not + # - when the resulting codes are all -1/NaN + with tm.assert_produces_warning(None): + Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"]) + + with tm.assert_produces_warning(None): + Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5]) + + # the next one are from the old docs + with tm.assert_produces_warning(None): + Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) + cat = Categorical([1, 2], categories=[1, 2, 3]) + + # this is a legitimate constructor + with tm.assert_produces_warning(None): + Categorical(np.array([], dtype="int64"), categories=[3, 2, 1], ordered=True) + + def test_constructor_with_existing_categories(self): + # GH25318: constructing with pd.Series used to bogusly skip recoding + # categories + c0 = Categorical(["a", "b", "c", "a"]) + c1 = Categorical(["a", "b", "c", "a"], categories=["b", "c"]) + + c2 = Categorical(c0, categories=c1.categories) + tm.assert_categorical_equal(c1, c2) + + c3 = Categorical(Series(c0), categories=c1.categories) + tm.assert_categorical_equal(c1, c3) + + def test_constructor_not_sequence(self): + # https://github.com/pandas-dev/pandas/issues/16022 + msg = r"^Parameter 'categories' must be list-like, was" + with pytest.raises(TypeError, match=msg): + Categorical(["a", "b"], categories="a") + + def test_constructor_with_null(self): + + # Cannot have NaN in categories + msg = "Categorical categories cannot be null" + with pytest.raises(ValueError, match=msg): + Categorical([np.nan, "a", "b", "c"], categories=[np.nan, "a", "b", "c"]) + + with pytest.raises(ValueError, match=msg): + Categorical([None, "a", "b", "c"], categories=[None, "a", "b", "c"]) + + with pytest.raises(ValueError, match=msg): + Categorical( + DatetimeIndex(["nat", "20160101"]), + categories=[NaT, Timestamp("20160101")], + ) + + def test_constructor_with_index(self): + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + tm.assert_categorical_equal(ci.values, Categorical(ci)) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + tm.assert_categorical_equal( + ci.values, Categorical(ci.astype(object), categories=ci.categories) + ) + + def test_constructor_with_generator(self): + # This was raising an Error in isna(single_val).any() because isna + # returned a scalar for a generator + + exp = Categorical([0, 1, 2]) + cat = Categorical(x for x in [0, 1, 2]) + tm.assert_categorical_equal(cat, exp) + cat = Categorical(range(3)) + tm.assert_categorical_equal(cat, exp) + + MultiIndex.from_product([range(5), ["a", "b", "c"]]) + + # check that categories accept generators and sequences + cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2])) + tm.assert_categorical_equal(cat, exp) + cat = Categorical([0, 1, 2], categories=range(3)) + tm.assert_categorical_equal(cat, exp) + + def test_constructor_with_rangeindex(self): + # RangeIndex is preserved in Categories + rng = Index(range(3)) + + cat = Categorical(rng) + tm.assert_index_equal(cat.categories, rng, exact=True) + + cat = Categorical([1, 2, 0], categories=rng) + tm.assert_index_equal(cat.categories, rng, exact=True) + + @pytest.mark.parametrize( + "dtl", + [ + date_range("1995-01-01 00:00:00", periods=5, freq="s"), + date_range("1995-01-01 00:00:00", periods=5, freq="s", tz="US/Eastern"), + timedelta_range("1 day", periods=5, freq="s"), + ], + ) + def test_constructor_with_datetimelike(self, dtl): + # see gh-12077 + # constructor with a datetimelike and NaT + + s = Series(dtl) + c = Categorical(s) + + expected = type(dtl)(s) + expected._data.freq = None + + tm.assert_index_equal(c.categories, expected) + tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype="int8")) + + # with NaT + s2 = s.copy() + s2.iloc[-1] = NaT + c = Categorical(s2) + + expected = type(dtl)(s2.dropna()) + expected._data.freq = None + + tm.assert_index_equal(c.categories, expected) + + exp = np.array([0, 1, 2, 3, -1], dtype=np.int8) + tm.assert_numpy_array_equal(c.codes, exp) + + result = repr(c) + assert "NaT" in result + + def test_constructor_from_index_series_datetimetz(self): + idx = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern") + idx = idx._with_freq(None) # freq not preserved in result.categories + result = Categorical(idx) + tm.assert_index_equal(result.categories, idx) + + result = Categorical(Series(idx)) + tm.assert_index_equal(result.categories, idx) + + def test_constructor_date_objects(self): + # we dont cast date objects to timestamps, matching Index constructor + v = date.today() + + cat = Categorical([v, v]) + assert cat.categories.dtype == object + assert type(cat.categories[0]) is date + + def test_constructor_from_index_series_timedelta(self): + idx = timedelta_range("1 days", freq="D", periods=3) + idx = idx._with_freq(None) # freq not preserved in result.categories + result = Categorical(idx) + tm.assert_index_equal(result.categories, idx) + + result = Categorical(Series(idx)) + tm.assert_index_equal(result.categories, idx) + + def test_constructor_from_index_series_period(self): + idx = period_range("2015-01-01", freq="D", periods=3) + result = Categorical(idx) + tm.assert_index_equal(result.categories, idx) + + result = Categorical(Series(idx)) + tm.assert_index_equal(result.categories, idx) + + @pytest.mark.parametrize( + "values", + [ + np.array([1.0, 1.2, 1.8, np.nan]), + np.array([1, 2, 3], dtype="int64"), + ["a", "b", "c", np.nan], + [pd.Period("2014-01"), pd.Period("2014-02"), NaT], + [Timestamp("2014-01-01"), Timestamp("2014-01-02"), NaT], + [ + Timestamp("2014-01-01", tz="US/Eastern"), + Timestamp("2014-01-02", tz="US/Eastern"), + NaT, + ], + ], + ) + def test_constructor_invariant(self, values): + # GH 14190 + c = Categorical(values) + c2 = Categorical(c) + tm.assert_categorical_equal(c, c2) + + @pytest.mark.parametrize("ordered", [True, False]) + def test_constructor_with_dtype(self, ordered): + categories = ["b", "a", "c"] + dtype = CategoricalDtype(categories, ordered=ordered) + result = Categorical(["a", "b", "a", "c"], dtype=dtype) + expected = Categorical( + ["a", "b", "a", "c"], categories=categories, ordered=ordered + ) + tm.assert_categorical_equal(result, expected) + assert result.ordered is ordered + + def test_constructor_dtype_and_others_raises(self): + dtype = CategoricalDtype(["a", "b"], ordered=True) + msg = "Cannot specify `categories` or `ordered` together with `dtype`." + with pytest.raises(ValueError, match=msg): + Categorical(["a", "b"], categories=["a", "b"], dtype=dtype) + + with pytest.raises(ValueError, match=msg): + Categorical(["a", "b"], ordered=True, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + Categorical(["a", "b"], ordered=False, dtype=dtype) + + @pytest.mark.parametrize("categories", [None, ["a", "b"], ["a", "c"]]) + @pytest.mark.parametrize("ordered", [True, False]) + def test_constructor_str_category(self, categories, ordered): + result = Categorical( + ["a", "b"], categories=categories, ordered=ordered, dtype="category" + ) + expected = Categorical(["a", "b"], categories=categories, ordered=ordered) + tm.assert_categorical_equal(result, expected) + + def test_constructor_str_unknown(self): + with pytest.raises(ValueError, match="Unknown dtype"): + Categorical([1, 2], dtype="foo") + + def test_constructor_np_strs(self): + # GH#31499 Hastable.map_locations needs to work on np.str_ objects + cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")]) + assert all(isinstance(x, np.str_) for x in cat.categories) + + def test_constructor_from_categorical_with_dtype(self): + dtype = CategoricalDtype(["a", "b", "c"], ordered=True) + values = Categorical(["a", "b", "d"]) + result = Categorical(values, dtype=dtype) + # We use dtype.categories, not values.categories + expected = Categorical( + ["a", "b", "d"], categories=["a", "b", "c"], ordered=True + ) + tm.assert_categorical_equal(result, expected) + + def test_constructor_from_categorical_with_unknown_dtype(self): + dtype = CategoricalDtype(None, ordered=True) + values = Categorical(["a", "b", "d"]) + result = Categorical(values, dtype=dtype) + # We use values.categories, not dtype.categories + expected = Categorical( + ["a", "b", "d"], categories=["a", "b", "d"], ordered=True + ) + tm.assert_categorical_equal(result, expected) + + def test_constructor_from_categorical_string(self): + values = Categorical(["a", "b", "d"]) + # use categories, ordered + result = Categorical( + values, categories=["a", "b", "c"], ordered=True, dtype="category" + ) + expected = Categorical( + ["a", "b", "d"], categories=["a", "b", "c"], ordered=True + ) + tm.assert_categorical_equal(result, expected) + + # No string + result = Categorical(values, categories=["a", "b", "c"], ordered=True) + tm.assert_categorical_equal(result, expected) + + def test_constructor_with_categorical_categories(self): + # GH17884 + expected = Categorical(["a", "b"], categories=["a", "b", "c"]) + + result = Categorical(["a", "b"], categories=Categorical(["a", "b", "c"])) + tm.assert_categorical_equal(result, expected) + + result = Categorical(["a", "b"], categories=CategoricalIndex(["a", "b", "c"])) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("klass", [lambda x: np.array(x, dtype=object), list]) + def test_construction_with_null(self, klass, nulls_fixture): + # https://github.com/pandas-dev/pandas/issues/31927 + values = klass(["a", nulls_fixture, "b"]) + result = Categorical(values) + + dtype = CategoricalDtype(["a", "b"]) + codes = [0, -1, 1] + expected = Categorical.from_codes(codes=codes, dtype=dtype) + + tm.assert_categorical_equal(result, expected) + + def test_from_codes_nullable_int_categories(self, any_numeric_ea_dtype): + # GH#39649 + cats = pd.array(range(5), dtype=any_numeric_ea_dtype) + codes = np.random.randint(5, size=3) + dtype = CategoricalDtype(cats) + arr = Categorical.from_codes(codes, dtype=dtype) + assert arr.categories.dtype == cats.dtype + tm.assert_index_equal(arr.categories, Index(cats)) + + def test_from_codes_empty(self): + cat = ["a", "b", "c"] + result = Categorical.from_codes([], categories=cat) + expected = Categorical([], categories=cat) + + tm.assert_categorical_equal(result, expected) + + def test_from_codes_too_few_categories(self): + dtype = CategoricalDtype(categories=[1, 2]) + msg = "codes need to be between " + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([1, 2], categories=dtype.categories) + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([1, 2], dtype=dtype) + + def test_from_codes_non_int_codes(self): + dtype = CategoricalDtype(categories=[1, 2]) + msg = "codes need to be array-like integers" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(["a"], categories=dtype.categories) + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(["a"], dtype=dtype) + + def test_from_codes_non_unique_categories(self): + with pytest.raises(ValueError, match="Categorical categories must be unique"): + Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"]) + + def test_from_codes_nan_cat_included(self): + with pytest.raises(ValueError, match="Categorical categories cannot be null"): + Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan]) + + def test_from_codes_too_negative(self): + dtype = CategoricalDtype(categories=["a", "b", "c"]) + msg = r"codes need to be between -1 and len\(categories\)-1" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([-2, 1, 2], categories=dtype.categories) + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([-2, 1, 2], dtype=dtype) + + def test_from_codes(self): + dtype = CategoricalDtype(categories=["a", "b", "c"]) + exp = Categorical(["a", "b", "c"], ordered=False) + res = Categorical.from_codes([0, 1, 2], categories=dtype.categories) + tm.assert_categorical_equal(exp, res) + + res = Categorical.from_codes([0, 1, 2], dtype=dtype) + tm.assert_categorical_equal(exp, res) + + @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) + def test_from_codes_with_categorical_categories(self, klass): + # GH17884 + expected = Categorical(["a", "b"], categories=["a", "b", "c"]) + + result = Categorical.from_codes([0, 1], categories=klass(["a", "b", "c"])) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) + def test_from_codes_with_non_unique_categorical_categories(self, klass): + with pytest.raises(ValueError, match="Categorical categories must be unique"): + Categorical.from_codes([0, 1], klass(["a", "b", "a"])) + + def test_from_codes_with_nan_code(self): + # GH21767 + codes = [1, 2, np.nan] + dtype = CategoricalDtype(categories=["a", "b", "c"]) + with pytest.raises(ValueError, match="codes need to be array-like integers"): + Categorical.from_codes(codes, categories=dtype.categories) + with pytest.raises(ValueError, match="codes need to be array-like integers"): + Categorical.from_codes(codes, dtype=dtype) + + @pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]]) + def test_from_codes_with_float(self, codes): + # GH21767 + # float codes should raise even if values are equal to integers + dtype = CategoricalDtype(categories=["a", "b", "c"]) + + msg = "codes need to be array-like integers" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(codes, dtype.categories) + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(codes, dtype=dtype) + + def test_from_codes_with_dtype_raises(self): + msg = "Cannot specify" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes( + [0, 1], categories=["a", "b"], dtype=CategoricalDtype(["a", "b"]) + ) + + with pytest.raises(ValueError, match=msg): + Categorical.from_codes( + [0, 1], ordered=True, dtype=CategoricalDtype(["a", "b"]) + ) + + def test_from_codes_neither(self): + msg = "Both were None" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([0, 1]) + + def test_from_codes_with_nullable_int(self): + codes = pd.array([0, 1], dtype="Int64") + categories = ["a", "b"] + + result = Categorical.from_codes(codes, categories=categories) + expected = Categorical.from_codes(codes.to_numpy(int), categories=categories) + + tm.assert_categorical_equal(result, expected) + + def test_from_codes_with_nullable_int_na_raises(self): + codes = pd.array([0, None], dtype="Int64") + categories = ["a", "b"] + + msg = "codes cannot contain NA values" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(codes, categories=categories) + + @pytest.mark.parametrize("dtype", [None, "category"]) + def test_from_inferred_categories(self, dtype): + cats = ["a", "b"] + codes = np.array([0, 0, 1, 1], dtype="i8") + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical.from_codes(codes, cats) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("dtype", [None, "category"]) + def test_from_inferred_categories_sorts(self, dtype): + cats = ["b", "a"] + codes = np.array([0, 1, 1, 1], dtype="i8") + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical.from_codes([1, 0, 0, 0], ["a", "b"]) + tm.assert_categorical_equal(result, expected) + + def test_from_inferred_categories_dtype(self): + cats = ["a", "b", "d"] + codes = np.array([0, 1, 0, 2], dtype="i8") + dtype = CategoricalDtype(["c", "b", "a"], ordered=True) + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical( + ["a", "b", "a", "d"], categories=["c", "b", "a"], ordered=True + ) + tm.assert_categorical_equal(result, expected) + + def test_from_inferred_categories_coerces(self): + cats = ["1", "2", "bad"] + codes = np.array([0, 0, 1, 2], dtype="i8") + dtype = CategoricalDtype([1, 2]) + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical([1, 1, 2, np.nan]) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("ordered", [None, True, False]) + def test_construction_with_ordered(self, ordered): + # GH 9347, 9190 + cat = Categorical([0, 1, 2], ordered=ordered) + assert cat.ordered == bool(ordered) + + @pytest.mark.xfail(reason="Imaginary values not supported in Categorical") + def test_constructor_imaginary(self): + values = [1, 2, 3 + 1j] + c1 = Categorical(values) + tm.assert_index_equal(c1.categories, Index(values)) + tm.assert_numpy_array_equal(np.array(c1), np.array(values)) + + def test_constructor_string_and_tuples(self): + # GH 21416 + c = Categorical(np.array(["c", ("a", "b"), ("b", "a"), "c"], dtype=object)) + expected_index = Index([("a", "b"), ("b", "a"), "c"]) + assert c.categories.equals(expected_index) + + def test_interval(self): + idx = pd.interval_range(0, 10, periods=10) + cat = Categorical(idx, categories=idx) + expected_codes = np.arange(10, dtype="int8") + tm.assert_numpy_array_equal(cat.codes, expected_codes) + tm.assert_index_equal(cat.categories, idx) + + # infer categories + cat = Categorical(idx) + tm.assert_numpy_array_equal(cat.codes, expected_codes) + tm.assert_index_equal(cat.categories, idx) + + # list values + cat = Categorical(list(idx)) + tm.assert_numpy_array_equal(cat.codes, expected_codes) + tm.assert_index_equal(cat.categories, idx) + + # list values, categories + cat = Categorical(list(idx), categories=list(idx)) + tm.assert_numpy_array_equal(cat.codes, expected_codes) + tm.assert_index_equal(cat.categories, idx) + + # shuffled + values = idx.take([1, 2, 0]) + cat = Categorical(values, categories=idx) + tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype="int8")) + tm.assert_index_equal(cat.categories, idx) + + # extra + values = pd.interval_range(8, 11, periods=3) + cat = Categorical(values, categories=idx) + expected_codes = np.array([8, 9, -1], dtype="int8") + tm.assert_numpy_array_equal(cat.codes, expected_codes) + tm.assert_index_equal(cat.categories, idx) + + # overlapping + idx = IntervalIndex([Interval(0, 2), Interval(0, 1)]) + cat = Categorical(idx, categories=idx) + expected_codes = np.array([0, 1], dtype="int8") + tm.assert_numpy_array_equal(cat.codes, expected_codes) + tm.assert_index_equal(cat.categories, idx) + + def test_categorical_extension_array_nullable(self, nulls_fixture): + # GH: + arr = pd.arrays.StringArray._from_sequence([nulls_fixture] * 2) + result = Categorical(arr) + assert arr.dtype == result.categories.dtype + expected = Categorical(Series([pd.NA, pd.NA], dtype=arr.dtype)) + tm.assert_categorical_equal(result, expected) + + def test_from_sequence_copy(self): + cat = Categorical(np.arange(5).repeat(2)) + result = Categorical._from_sequence(cat, dtype=None, copy=False) + + # more generally, we'd be OK with a view + assert result._codes is cat._codes + + result = Categorical._from_sequence(cat, dtype=None, copy=True) + + assert not tm.shares_memory(result, cat) + + @pytest.mark.xfail( + not IS64 or is_platform_windows(), + reason="Incorrectly raising in ensure_datetime64ns", + ) + def test_constructor_datetime64_non_nano(self): + categories = np.arange(10).view("M8[D]") + values = categories[::2].copy() + + cat = Categorical(values, categories=categories) + assert (cat == values).all() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_dtypes.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_dtypes.py new file mode 100644 index 0000000..5acb62c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_dtypes.py @@ -0,0 +1,136 @@ +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +from pandas import ( + Categorical, + CategoricalIndex, + Index, + Series, + Timestamp, +) +import pandas._testing as tm + + +class TestCategoricalDtypes: + def test_is_dtype_equal_deprecated(self): + # GH#37545 + c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False) + + with tm.assert_produces_warning(FutureWarning): + c1.is_dtype_equal(c1) + + def test_categories_match_up_to_permutation(self): + + # test dtype comparisons between cats + + c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False) + c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False) + c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True) + assert c1._categories_match_up_to_permutation(c1) + assert c2._categories_match_up_to_permutation(c2) + assert c3._categories_match_up_to_permutation(c3) + assert c1._categories_match_up_to_permutation(c2) + assert not c1._categories_match_up_to_permutation(c3) + assert not c1._categories_match_up_to_permutation(Index(list("aabca"))) + assert not c1._categories_match_up_to_permutation(c1.astype(object)) + assert c1._categories_match_up_to_permutation(CategoricalIndex(c1)) + assert c1._categories_match_up_to_permutation( + CategoricalIndex(c1, categories=list("cab")) + ) + assert not c1._categories_match_up_to_permutation( + CategoricalIndex(c1, ordered=True) + ) + + # GH 16659 + s1 = Series(c1) + s2 = Series(c2) + s3 = Series(c3) + assert c1._categories_match_up_to_permutation(s1) + assert c2._categories_match_up_to_permutation(s2) + assert c3._categories_match_up_to_permutation(s3) + assert c1._categories_match_up_to_permutation(s2) + assert not c1._categories_match_up_to_permutation(s3) + assert not c1._categories_match_up_to_permutation(s1.astype(object)) + + def test_set_dtype_same(self): + c = Categorical(["a", "b", "c"]) + result = c._set_dtype(CategoricalDtype(["a", "b", "c"])) + tm.assert_categorical_equal(result, c) + + def test_set_dtype_new_categories(self): + c = Categorical(["a", "b", "c"]) + result = c._set_dtype(CategoricalDtype(list("abcd"))) + tm.assert_numpy_array_equal(result.codes, c.codes) + tm.assert_index_equal(result.dtype.categories, Index(list("abcd"))) + + @pytest.mark.parametrize( + "values, categories, new_categories", + [ + # No NaNs, same cats, same order + (["a", "b", "a"], ["a", "b"], ["a", "b"]), + # No NaNs, same cats, different order + (["a", "b", "a"], ["a", "b"], ["b", "a"]), + # Same, unsorted + (["b", "a", "a"], ["a", "b"], ["a", "b"]), + # No NaNs, same cats, different order + (["b", "a", "a"], ["a", "b"], ["b", "a"]), + # NaNs + (["a", "b", "c"], ["a", "b"], ["a", "b"]), + (["a", "b", "c"], ["a", "b"], ["b", "a"]), + (["b", "a", "c"], ["a", "b"], ["a", "b"]), + (["b", "a", "c"], ["a", "b"], ["a", "b"]), + # Introduce NaNs + (["a", "b", "c"], ["a", "b"], ["a"]), + (["a", "b", "c"], ["a", "b"], ["b"]), + (["b", "a", "c"], ["a", "b"], ["a"]), + (["b", "a", "c"], ["a", "b"], ["a"]), + # No overlap + (["a", "b", "c"], ["a", "b"], ["d", "e"]), + ], + ) + @pytest.mark.parametrize("ordered", [True, False]) + def test_set_dtype_many(self, values, categories, new_categories, ordered): + c = Categorical(values, categories) + expected = Categorical(values, new_categories, ordered) + result = c._set_dtype(expected.dtype) + tm.assert_categorical_equal(result, expected) + + def test_set_dtype_no_overlap(self): + c = Categorical(["a", "b", "c"], ["d", "e"]) + result = c._set_dtype(CategoricalDtype(["a", "b"])) + expected = Categorical([None, None, None], categories=["a", "b"]) + tm.assert_categorical_equal(result, expected) + + def test_codes_dtypes(self): + + # GH 8453 + result = Categorical(["foo", "bar", "baz"]) + assert result.codes.dtype == "int8" + + result = Categorical([f"foo{i:05d}" for i in range(400)]) + assert result.codes.dtype == "int16" + + result = Categorical([f"foo{i:05d}" for i in range(40000)]) + assert result.codes.dtype == "int32" + + # adding cats + result = Categorical(["foo", "bar", "baz"]) + assert result.codes.dtype == "int8" + result = result.add_categories([f"foo{i:05d}" for i in range(400)]) + assert result.codes.dtype == "int16" + + # removing cats + result = result.remove_categories([f"foo{i:05d}" for i in range(300)]) + assert result.codes.dtype == "int8" + + def test_iter_python_types(self): + # GH-19909 + cat = Categorical([1, 2]) + assert isinstance(list(cat)[0], int) + assert isinstance(cat.tolist()[0], int) + + def test_iter_python_types_datetime(self): + cat = Categorical([Timestamp("2017-01-01"), Timestamp("2017-01-02")]) + assert isinstance(list(cat)[0], Timestamp) + assert isinstance(cat.tolist()[0], Timestamp) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_indexing.py new file mode 100644 index 0000000..617d186 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_indexing.py @@ -0,0 +1,377 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + CategoricalIndex, + Index, + Interval, + IntervalIndex, + PeriodIndex, + Series, + Timedelta, + Timestamp, +) +import pandas._testing as tm +import pandas.core.common as com +from pandas.tests.arrays.categorical.common import TestCategorical + + +class TestCategoricalIndexingWithFactor(TestCategorical): + def test_getitem(self): + assert self.factor[0] == "a" + assert self.factor[-1] == "c" + + subf = self.factor[[0, 1, 2]] + tm.assert_numpy_array_equal(subf._codes, np.array([0, 1, 1], dtype=np.int8)) + + subf = self.factor[np.asarray(self.factor) == "c"] + tm.assert_numpy_array_equal(subf._codes, np.array([2, 2, 2], dtype=np.int8)) + + def test_setitem(self): + + # int/positional + c = self.factor.copy() + c[0] = "b" + assert c[0] == "b" + c[-1] = "a" + assert c[-1] == "a" + + # boolean + c = self.factor.copy() + indexer = np.zeros(len(c), dtype="bool") + indexer[0] = True + indexer[-1] = True + c[indexer] = "c" + expected = Categorical(["c", "b", "b", "a", "a", "c", "c", "c"], ordered=True) + + tm.assert_categorical_equal(c, expected) + + @pytest.mark.parametrize( + "other", + [Categorical(["b", "a"]), Categorical(["b", "a"], categories=["b", "a"])], + ) + def test_setitem_same_but_unordered(self, other): + # GH-24142 + target = Categorical(["a", "b"], categories=["a", "b"]) + mask = np.array([True, False]) + target[mask] = other[mask] + expected = Categorical(["b", "b"], categories=["a", "b"]) + tm.assert_categorical_equal(target, expected) + + @pytest.mark.parametrize( + "other", + [ + Categorical(["b", "a"], categories=["b", "a", "c"]), + Categorical(["b", "a"], categories=["a", "b", "c"]), + Categorical(["a", "a"], categories=["a"]), + Categorical(["b", "b"], categories=["b"]), + ], + ) + def test_setitem_different_unordered_raises(self, other): + # GH-24142 + target = Categorical(["a", "b"], categories=["a", "b"]) + mask = np.array([True, False]) + msg = "Cannot set a Categorical with another, without identical categories" + with pytest.raises(TypeError, match=msg): + target[mask] = other[mask] + + @pytest.mark.parametrize( + "other", + [ + Categorical(["b", "a"]), + Categorical(["b", "a"], categories=["b", "a"], ordered=True), + Categorical(["b", "a"], categories=["a", "b", "c"], ordered=True), + ], + ) + def test_setitem_same_ordered_raises(self, other): + # Gh-24142 + target = Categorical(["a", "b"], categories=["a", "b"], ordered=True) + mask = np.array([True, False]) + msg = "Cannot set a Categorical with another, without identical categories" + with pytest.raises(TypeError, match=msg): + target[mask] = other[mask] + + def test_setitem_tuple(self): + # GH#20439 + cat = Categorical([(0, 1), (0, 2), (0, 1)]) + + # This should not raise + cat[1] = cat[0] + assert cat[1] == (0, 1) + + def test_setitem_listlike(self): + + # GH#9469 + # properly coerce the input indexers + np.random.seed(1) + cat = Categorical( + np.random.randint(0, 5, size=150000).astype(np.int8) + ).add_categories([-1000]) + indexer = np.array([100000]).astype(np.int64) + cat[indexer] = -1000 + + # we are asserting the code result here + # which maps to the -1000 category + result = cat.codes[np.array([100000]).astype(np.int64)] + tm.assert_numpy_array_equal(result, np.array([5], dtype="int8")) + + +class TestCategoricalIndexing: + def test_getitem_slice(self): + cat = Categorical(["a", "b", "c", "d", "a", "b", "c"]) + sliced = cat[3] + assert sliced == "d" + + sliced = cat[3:5] + expected = Categorical(["d", "a"], categories=["a", "b", "c", "d"]) + tm.assert_categorical_equal(sliced, expected) + + def test_getitem_listlike(self): + + # GH 9469 + # properly coerce the input indexers + np.random.seed(1) + c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8)) + result = c.codes[np.array([100000]).astype(np.int64)] + expected = c[np.array([100000]).astype(np.int64)].codes + tm.assert_numpy_array_equal(result, expected) + + def test_periodindex(self): + idx1 = PeriodIndex( + ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M" + ) + + cat1 = Categorical(idx1) + str(cat1) + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8) + exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") + tm.assert_numpy_array_equal(cat1._codes, exp_arr) + tm.assert_index_equal(cat1.categories, exp_idx) + + idx2 = PeriodIndex( + ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M" + ) + cat2 = Categorical(idx2, ordered=True) + str(cat2) + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8) + exp_idx2 = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") + tm.assert_numpy_array_equal(cat2._codes, exp_arr) + tm.assert_index_equal(cat2.categories, exp_idx2) + + idx3 = PeriodIndex( + [ + "2013-12", + "2013-11", + "2013-10", + "2013-09", + "2013-08", + "2013-07", + "2013-05", + ], + freq="M", + ) + cat3 = Categorical(idx3, ordered=True) + exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8) + exp_idx = PeriodIndex( + [ + "2013-05", + "2013-07", + "2013-08", + "2013-09", + "2013-10", + "2013-11", + "2013-12", + ], + freq="M", + ) + tm.assert_numpy_array_equal(cat3._codes, exp_arr) + tm.assert_index_equal(cat3.categories, exp_idx) + + def test_categories_assignments(self): + cat = Categorical(["a", "b", "c", "a"]) + exp = np.array([1, 2, 3, 1], dtype=np.int64) + cat.categories = [1, 2, 3] + tm.assert_numpy_array_equal(cat.__array__(), exp) + tm.assert_index_equal(cat.categories, Index([1, 2, 3])) + + @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) + def test_categories_assignments_wrong_length_raises(self, new_categories): + cat = Categorical(["a", "b", "c", "a"]) + msg = ( + "new categories need to have the same number of items " + "as the old categories!" + ) + with pytest.raises(ValueError, match=msg): + cat.categories = new_categories + + # Combinations of sorted/unique: + @pytest.mark.parametrize( + "idx_values", [[1, 2, 3, 4], [1, 3, 2, 4], [1, 3, 3, 4], [1, 2, 2, 4]] + ) + # Combinations of missing/unique + @pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]]) + @pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex]) + @pytest.mark.parametrize("dtype", [None, "category", "key"]) + def test_get_indexer_non_unique(self, idx_values, key_values, key_class, dtype): + # GH 21448 + key = key_class(key_values, categories=range(1, 5)) + + if dtype == "key": + dtype = key.dtype + + # Test for flat index and CategoricalIndex with same/different cats: + idx = Index(idx_values, dtype=dtype) + expected, exp_miss = idx.get_indexer_non_unique(key_values) + result, res_miss = idx.get_indexer_non_unique(key) + + tm.assert_numpy_array_equal(expected, result) + tm.assert_numpy_array_equal(exp_miss, res_miss) + + exp_unique = idx.unique().get_indexer(key_values) + res_unique = idx.unique().get_indexer(key) + tm.assert_numpy_array_equal(res_unique, exp_unique) + + def test_where_unobserved_nan(self): + ser = Series(Categorical(["a", "b"])) + result = ser.where([True, False]) + expected = Series(Categorical(["a", None], categories=["a", "b"])) + tm.assert_series_equal(result, expected) + + # all NA + ser = Series(Categorical(["a", "b"])) + result = ser.where([False, False]) + expected = Series(Categorical([None, None], categories=["a", "b"])) + tm.assert_series_equal(result, expected) + + def test_where_unobserved_categories(self): + ser = Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"])) + result = ser.where([True, True, False], other="b") + expected = Series(Categorical(["a", "b", "b"], categories=ser.cat.categories)) + tm.assert_series_equal(result, expected) + + def test_where_other_categorical(self): + ser = Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"])) + other = Categorical(["b", "c", "a"], categories=["a", "c", "b", "d"]) + result = ser.where([True, False, True], other) + expected = Series(Categorical(["a", "c", "c"], dtype=ser.dtype)) + tm.assert_series_equal(result, expected) + + def test_where_new_category_raises(self): + ser = Series(Categorical(["a", "b", "c"])) + msg = "Cannot setitem on a Categorical with a new category" + with pytest.raises(TypeError, match=msg): + ser.where([True, False, True], "d") + + def test_where_ordered_differs_rasies(self): + ser = Series( + Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"], ordered=True) + ) + other = Categorical( + ["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True + ) + with pytest.raises(TypeError, match="without identical categories"): + ser.where([True, False, True], other) + + +class TestContains: + def test_contains(self): + # GH#21508 + cat = Categorical(list("aabbca"), categories=list("cab")) + + assert "b" in cat + assert "z" not in cat + assert np.nan not in cat + with pytest.raises(TypeError, match="unhashable type: 'list'"): + assert [1] in cat + + # assert codes NOT in index + assert 0 not in cat + assert 1 not in cat + + cat = Categorical(list("aabbca") + [np.nan], categories=list("cab")) + assert np.nan in cat + + @pytest.mark.parametrize( + "item, expected", + [ + (Interval(0, 1), True), + (1.5, True), + (Interval(0.5, 1.5), False), + ("a", False), + (Timestamp(1), False), + (Timedelta(1), False), + ], + ids=str, + ) + def test_contains_interval(self, item, expected): + # GH#23705 + cat = Categorical(IntervalIndex.from_breaks(range(3))) + result = item in cat + assert result is expected + + def test_contains_list(self): + # GH#21729 + cat = Categorical([1, 2, 3]) + + assert "a" not in cat + + with pytest.raises(TypeError, match="unhashable type"): + ["a"] in cat + + with pytest.raises(TypeError, match="unhashable type"): + ["a", "b"] in cat + + +@pytest.mark.parametrize("index", [True, False]) +def test_mask_with_boolean(index): + ser = Series(range(3)) + idx = Categorical([True, False, True]) + if index: + idx = CategoricalIndex(idx) + + assert com.is_bool_indexer(idx) + result = ser[idx] + expected = ser[idx.astype("object")] + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("index", [True, False]) +def test_mask_with_boolean_na_treated_as_false(index): + # https://github.com/pandas-dev/pandas/issues/31503 + ser = Series(range(3)) + idx = Categorical([True, False, None]) + if index: + idx = CategoricalIndex(idx) + + result = ser[idx] + expected = ser[idx.fillna(False)] + + tm.assert_series_equal(result, expected) + + +@pytest.fixture +def non_coercible_categorical(monkeypatch): + """ + Monkeypatch Categorical.__array__ to ensure no implicit conversion. + + Raises + ------ + ValueError + When Categorical.__array__ is called. + """ + # TODO(Categorical): identify other places where this may be + # useful and move to a conftest.py + def array(self, dtype=None): + raise ValueError("I cannot be converted.") + + with monkeypatch.context() as m: + m.setattr(Categorical, "__array__", array) + yield + + +def test_series_at(non_coercible_categorical): + arr = Categorical(["a", "b", "c"]) + ser = Series(arr) + result = ser.at[0] + assert result == "a" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_missing.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_missing.py new file mode 100644 index 0000000..fb5330a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_missing.py @@ -0,0 +1,213 @@ +import collections + +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Index, + Series, + isna, +) +import pandas._testing as tm + + +class TestCategoricalMissing: + def test_isna(self): + exp = np.array([False, False, True]) + cat = Categorical(["a", "b", np.nan]) + res = cat.isna() + + tm.assert_numpy_array_equal(res, exp) + + def test_na_flags_int_categories(self): + # #1457 + + categories = list(range(10)) + labels = np.random.randint(0, 10, 20) + labels[::5] = -1 + + cat = Categorical(labels, categories, fastpath=True) + repr(cat) + + tm.assert_numpy_array_equal(isna(cat), labels == -1) + + def test_nan_handling(self): + + # Nans are represented as -1 in codes + c = Categorical(["a", "b", np.nan, "a"]) + tm.assert_index_equal(c.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8)) + c[1] = np.nan + tm.assert_index_equal(c.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8)) + + # Adding nan to categories should make assigned nan point to the + # category! + c = Categorical(["a", "b", np.nan, "a"]) + tm.assert_index_equal(c.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8)) + + def test_set_dtype_nans(self): + c = Categorical(["a", "b", np.nan]) + result = c._set_dtype(CategoricalDtype(["a", "c"])) + tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8")) + + def test_set_item_nan(self): + cat = Categorical([1, 2, 3]) + cat[1] = np.nan + + exp = Categorical([1, np.nan, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(cat, exp) + + @pytest.mark.parametrize( + "fillna_kwargs, msg", + [ + ( + {"value": 1, "method": "ffill"}, + "Cannot specify both 'value' and 'method'.", + ), + ({}, "Must specify a fill 'value' or 'method'."), + ({"method": "bad"}, "Invalid fill method. Expecting .* bad"), + ( + {"value": Series([1, 2, 3, 4, "a"])}, + "Cannot setitem on a Categorical with a new category", + ), + ], + ) + def test_fillna_raises(self, fillna_kwargs, msg): + # https://github.com/pandas-dev/pandas/issues/19682 + # https://github.com/pandas-dev/pandas/issues/13628 + cat = Categorical([1, 2, 3, None, None]) + + if len(fillna_kwargs) == 1 and "value" in fillna_kwargs: + err = TypeError + else: + err = ValueError + + with pytest.raises(err, match=msg): + cat.fillna(**fillna_kwargs) + + @pytest.mark.parametrize("named", [True, False]) + def test_fillna_iterable_category(self, named): + # https://github.com/pandas-dev/pandas/issues/21097 + if named: + Point = collections.namedtuple("Point", "x y") + else: + Point = lambda *args: args # tuple + cat = Categorical(np.array([Point(0, 0), Point(0, 1), None], dtype=object)) + result = cat.fillna(Point(0, 0)) + expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)]) + + tm.assert_categorical_equal(result, expected) + + # Case where the Point is not among our categories; we want ValueError, + # not NotImplementedError GH#41914 + cat = Categorical(np.array([Point(1, 0), Point(0, 1), None], dtype=object)) + msg = "Cannot setitem on a Categorical with a new category" + with pytest.raises(TypeError, match=msg): + cat.fillna(Point(0, 0)) + + def test_fillna_array(self): + # accept Categorical or ndarray value if it holds appropriate values + cat = Categorical(["A", "B", "C", None, None]) + + other = cat.fillna("C") + result = cat.fillna(other) + tm.assert_categorical_equal(result, other) + assert isna(cat[-1]) # didn't modify original inplace + + other = np.array(["A", "B", "C", "B", "A"]) + result = cat.fillna(other) + expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype) + tm.assert_categorical_equal(result, expected) + assert isna(cat[-1]) # didn't modify original inplace + + @pytest.mark.parametrize( + "values, expected", + [ + ([1, 2, 3], np.array([False, False, False])), + ([1, 2, np.nan], np.array([False, False, True])), + ([1, 2, np.inf], np.array([False, False, True])), + ([1, 2, pd.NA], np.array([False, False, True])), + ], + ) + def test_use_inf_as_na(self, values, expected): + # https://github.com/pandas-dev/pandas/issues/33594 + with pd.option_context("mode.use_inf_as_na", True): + cat = Categorical(values) + result = cat.isna() + tm.assert_numpy_array_equal(result, expected) + + result = Series(cat).isna() + expected = Series(expected) + tm.assert_series_equal(result, expected) + + result = DataFrame(cat).isna() + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "values, expected", + [ + ([1, 2, 3], np.array([False, False, False])), + ([1, 2, np.nan], np.array([False, False, True])), + ([1, 2, np.inf], np.array([False, False, True])), + ([1, 2, pd.NA], np.array([False, False, True])), + ], + ) + def test_use_inf_as_na_outside_context(self, values, expected): + # https://github.com/pandas-dev/pandas/issues/33594 + # Using isna directly for Categorical will fail in general here + cat = Categorical(values) + + with pd.option_context("mode.use_inf_as_na", True): + result = isna(cat) + tm.assert_numpy_array_equal(result, expected) + + result = isna(Series(cat)) + expected = Series(expected) + tm.assert_series_equal(result, expected) + + result = isna(DataFrame(cat)) + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "a1, a2, categories", + [ + (["a", "b", "c"], [np.nan, "a", "b"], ["a", "b", "c"]), + ([1, 2, 3], [np.nan, 1, 2], [1, 2, 3]), + ], + ) + def test_compare_categorical_with_missing(self, a1, a2, categories): + # GH 28384 + cat_type = CategoricalDtype(categories) + + # != + result = Series(a1, dtype=cat_type) != Series(a2, dtype=cat_type) + expected = Series(a1) != Series(a2) + tm.assert_series_equal(result, expected) + + # == + result = Series(a1, dtype=cat_type) == Series(a2, dtype=cat_type) + expected = Series(a1) == Series(a2) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "na_value, dtype", + [ + (pd.NaT, "datetime64[ns]"), + (None, "float64"), + (np.nan, "float64"), + (pd.NA, "float64"), + ], + ) + def test_categorical_only_missing_values_no_cast(self, na_value, dtype): + # GH#44900 + result = Categorical([na_value, na_value]) + tm.assert_index_equal(result.categories, Index([], dtype=dtype)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_operators.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_operators.py new file mode 100644 index 0000000..9f19e83 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_operators.py @@ -0,0 +1,399 @@ +import warnings + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Series, + date_range, +) +import pandas._testing as tm +from pandas.tests.arrays.categorical.common import TestCategorical + + +class TestCategoricalOpsWithFactor(TestCategorical): + def test_categories_none_comparisons(self): + factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) + tm.assert_categorical_equal(factor, self.factor) + + def test_comparisons(self): + result = self.factor[self.factor == "a"] + expected = self.factor[np.asarray(self.factor) == "a"] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor != "a"] + expected = self.factor[np.asarray(self.factor) != "a"] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor < "c"] + expected = self.factor[np.asarray(self.factor) < "c"] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor > "a"] + expected = self.factor[np.asarray(self.factor) > "a"] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor >= "b"] + expected = self.factor[np.asarray(self.factor) >= "b"] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor <= "b"] + expected = self.factor[np.asarray(self.factor) <= "b"] + tm.assert_categorical_equal(result, expected) + + n = len(self.factor) + + other = self.factor[np.random.permutation(n)] + result = self.factor == other + expected = np.asarray(self.factor) == np.asarray(other) + tm.assert_numpy_array_equal(result, expected) + + result = self.factor == "d" + expected = np.zeros(len(self.factor), dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + # comparisons with categoricals + cat_rev = Categorical(["a", "b", "c"], categories=["c", "b", "a"], ordered=True) + cat_rev_base = Categorical( + ["b", "b", "b"], categories=["c", "b", "a"], ordered=True + ) + cat = Categorical(["a", "b", "c"], ordered=True) + cat_base = Categorical(["b", "b", "b"], categories=cat.categories, ordered=True) + + # comparisons need to take categories ordering into account + res_rev = cat_rev > cat_rev_base + exp_rev = np.array([True, False, False]) + tm.assert_numpy_array_equal(res_rev, exp_rev) + + res_rev = cat_rev < cat_rev_base + exp_rev = np.array([False, False, True]) + tm.assert_numpy_array_equal(res_rev, exp_rev) + + res = cat > cat_base + exp = np.array([False, False, True]) + tm.assert_numpy_array_equal(res, exp) + + # Only categories with same categories can be compared + msg = "Categoricals can only be compared if 'categories' are the same" + with pytest.raises(TypeError, match=msg): + cat > cat_rev + + cat_rev_base2 = Categorical(["b", "b", "b"], categories=["c", "b", "a", "d"]) + + with pytest.raises(TypeError, match=msg): + cat_rev > cat_rev_base2 + + # Only categories with same ordering information can be compared + cat_unorderd = cat.set_ordered(False) + assert not (cat > cat).any() + + with pytest.raises(TypeError, match=msg): + cat > cat_unorderd + + # comparison (in both directions) with Series will raise + s = Series(["b", "b", "b"]) + msg = ( + "Cannot compare a Categorical for op __gt__ with type " + r"" + ) + with pytest.raises(TypeError, match=msg): + cat > s + with pytest.raises(TypeError, match=msg): + cat_rev > s + with pytest.raises(TypeError, match=msg): + s < cat + with pytest.raises(TypeError, match=msg): + s < cat_rev + + # comparison with numpy.array will raise in both direction, but only on + # newer numpy versions + a = np.array(["b", "b", "b"]) + with pytest.raises(TypeError, match=msg): + cat > a + with pytest.raises(TypeError, match=msg): + cat_rev > a + + # Make sure that unequal comparison take the categories order in + # account + cat_rev = Categorical(list("abc"), categories=list("cba"), ordered=True) + exp = np.array([True, False, False]) + res = cat_rev > "b" + tm.assert_numpy_array_equal(res, exp) + + # check that zero-dim array gets unboxed + res = cat_rev > np.array("b") + tm.assert_numpy_array_equal(res, exp) + + +class TestCategoricalOps: + def test_compare_frame(self): + # GH#24282 check that Categorical.__cmp__(DataFrame) defers to frame + data = ["a", "b", 2, "a"] + cat = Categorical(data) + + df = DataFrame(cat) + + result = cat == df.T + expected = DataFrame([[True, True, True, True]]) + tm.assert_frame_equal(result, expected) + + result = cat[::-1] != df.T + expected = DataFrame([[False, True, True, False]]) + tm.assert_frame_equal(result, expected) + + def test_compare_frame_raises(self, comparison_op): + # alignment raises unless we transpose + op = comparison_op + cat = Categorical(["a", "b", 2, "a"]) + df = DataFrame(cat) + msg = "Unable to coerce to Series, length must be 1: given 4" + with pytest.raises(ValueError, match=msg): + op(cat, df) + + def test_datetime_categorical_comparison(self): + dt_cat = Categorical(date_range("2014-01-01", periods=3), ordered=True) + tm.assert_numpy_array_equal(dt_cat > dt_cat[0], np.array([False, True, True])) + tm.assert_numpy_array_equal(dt_cat[0] < dt_cat, np.array([False, True, True])) + + def test_reflected_comparison_with_scalars(self): + # GH8658 + cat = Categorical([1, 2, 3], ordered=True) + tm.assert_numpy_array_equal(cat > cat[0], np.array([False, True, True])) + tm.assert_numpy_array_equal(cat[0] < cat, np.array([False, True, True])) + + def test_comparison_with_unknown_scalars(self): + # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 + # and following comparisons with scalars not in categories should raise + # for unequal comps, but not for equal/not equal + cat = Categorical([1, 2, 3], ordered=True) + + msg = "Invalid comparison between dtype=category and int" + with pytest.raises(TypeError, match=msg): + cat < 4 + with pytest.raises(TypeError, match=msg): + cat > 4 + with pytest.raises(TypeError, match=msg): + 4 < cat + with pytest.raises(TypeError, match=msg): + 4 > cat + + tm.assert_numpy_array_equal(cat == 4, np.array([False, False, False])) + tm.assert_numpy_array_equal(cat != 4, np.array([True, True, True])) + + def test_comparison_with_tuple(self): + cat = Categorical(np.array(["foo", (0, 1), 3, (0, 1)], dtype=object)) + + result = cat == "foo" + expected = np.array([True, False, False, False], dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + result = cat == (0, 1) + expected = np.array([False, True, False, True], dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + result = cat != (0, 1) + tm.assert_numpy_array_equal(result, ~expected) + + def test_comparison_of_ordered_categorical_with_nan_to_scalar( + self, compare_operators_no_eq_ne + ): + # https://github.com/pandas-dev/pandas/issues/26504 + # BUG: fix ordered categorical comparison with missing values (#26504 ) + # and following comparisons with scalars in categories with missing + # values should be evaluated as False + + cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True) + scalar = 2 + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + expected = getattr(np.array(cat), compare_operators_no_eq_ne)(scalar) + actual = getattr(cat, compare_operators_no_eq_ne)(scalar) + tm.assert_numpy_array_equal(actual, expected) + + def test_comparison_of_ordered_categorical_with_nan_to_listlike( + self, compare_operators_no_eq_ne + ): + # https://github.com/pandas-dev/pandas/issues/26504 + # and following comparisons of missing values in ordered Categorical + # with listlike should be evaluated as False + + cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True) + other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2) + actual = getattr(cat, compare_operators_no_eq_ne)(other) + tm.assert_numpy_array_equal(actual, expected) + + @pytest.mark.parametrize( + "data,reverse,base", + [(list("abc"), list("cba"), list("bbb")), ([1, 2, 3], [3, 2, 1], [2, 2, 2])], + ) + def test_comparisons(self, data, reverse, base): + cat_rev = Series(Categorical(data, categories=reverse, ordered=True)) + cat_rev_base = Series(Categorical(base, categories=reverse, ordered=True)) + cat = Series(Categorical(data, ordered=True)) + cat_base = Series( + Categorical(base, categories=cat.cat.categories, ordered=True) + ) + s = Series(base) + a = np.array(base) + + # comparisons need to take categories ordering into account + res_rev = cat_rev > cat_rev_base + exp_rev = Series([True, False, False]) + tm.assert_series_equal(res_rev, exp_rev) + + res_rev = cat_rev < cat_rev_base + exp_rev = Series([False, False, True]) + tm.assert_series_equal(res_rev, exp_rev) + + res = cat > cat_base + exp = Series([False, False, True]) + tm.assert_series_equal(res, exp) + + scalar = base[1] + res = cat > scalar + exp = Series([False, False, True]) + exp2 = cat.values > scalar + tm.assert_series_equal(res, exp) + tm.assert_numpy_array_equal(res.values, exp2) + res_rev = cat_rev > scalar + exp_rev = Series([True, False, False]) + exp_rev2 = cat_rev.values > scalar + tm.assert_series_equal(res_rev, exp_rev) + tm.assert_numpy_array_equal(res_rev.values, exp_rev2) + + # Only categories with same categories can be compared + msg = "Categoricals can only be compared if 'categories' are the same" + with pytest.raises(TypeError, match=msg): + cat > cat_rev + + # categorical cannot be compared to Series or numpy array, and also + # not the other way around + msg = ( + "Cannot compare a Categorical for op __gt__ with type " + r"" + ) + with pytest.raises(TypeError, match=msg): + cat > s + with pytest.raises(TypeError, match=msg): + cat_rev > s + with pytest.raises(TypeError, match=msg): + cat > a + with pytest.raises(TypeError, match=msg): + cat_rev > a + + with pytest.raises(TypeError, match=msg): + s < cat + with pytest.raises(TypeError, match=msg): + s < cat_rev + + with pytest.raises(TypeError, match=msg): + a < cat + with pytest.raises(TypeError, match=msg): + a < cat_rev + + @pytest.mark.parametrize( + "ctor", + [ + lambda *args, **kwargs: Categorical(*args, **kwargs), + lambda *args, **kwargs: Series(Categorical(*args, **kwargs)), + ], + ) + def test_unordered_different_order_equal(self, ctor): + # https://github.com/pandas-dev/pandas/issues/16014 + c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False) + c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False) + assert (c1 == c2).all() + + c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False) + c2 = ctor(["b", "a"], categories=["b", "a"], ordered=False) + assert (c1 != c2).all() + + c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False) + c2 = ctor(["b", "b"], categories=["b", "a"], ordered=False) + assert (c1 != c2).all() + + c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False) + c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False) + result = c1 == c2 + tm.assert_numpy_array_equal(np.array(result), np.array([True, False])) + + def test_unordered_different_categories_raises(self): + c1 = Categorical(["a", "b"], categories=["a", "b"], ordered=False) + c2 = Categorical(["a", "c"], categories=["c", "a"], ordered=False) + + with pytest.raises(TypeError, match=("Categoricals can only be compared")): + c1 == c2 + + def test_compare_different_lengths(self): + c1 = Categorical([], categories=["a", "b"]) + c2 = Categorical([], categories=["a"]) + + msg = "Categoricals can only be compared if 'categories' are the same." + with pytest.raises(TypeError, match=msg): + c1 == c2 + + def test_compare_unordered_different_order(self): + # https://github.com/pandas-dev/pandas/issues/16603#issuecomment- + # 349290078 + a = Categorical(["a"], categories=["a", "b"]) + b = Categorical(["b"], categories=["b", "a"]) + assert not a.equals(b) + + def test_numeric_like_ops(self): + + df = DataFrame({"value": np.random.randint(0, 10000, 100)}) + labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) + + df = df.sort_values(by=["value"], ascending=True) + df["value_group"] = pd.cut( + df.value, range(0, 10500, 500), right=False, labels=cat_labels + ) + + # numeric ops should not succeed + for op, str_rep in [ + ("__add__", r"\+"), + ("__sub__", "-"), + ("__mul__", r"\*"), + ("__truediv__", "/"), + ]: + msg = f"Series cannot perform the operation {str_rep}|unsupported operand" + with pytest.raises(TypeError, match=msg): + getattr(df, op)(df) + + # reduction ops should not succeed (unless specifically defined, e.g. + # min/max) + s = df["value_group"] + for op in ["kurt", "skew", "var", "std", "mean", "sum", "median"]: + msg = f"does not support reduction '{op}'" + with pytest.raises(TypeError, match=msg): + getattr(s, op)(numeric_only=False) + + # mad technically works because it takes always the numeric data + + # numpy ops + s = Series(Categorical([1, 2, 3, 4])) + with pytest.raises(TypeError, match="does not support reduction 'sum'"): + np.sum(s) + + # numeric ops on a Series + for op, str_rep in [ + ("__add__", r"\+"), + ("__sub__", "-"), + ("__mul__", r"\*"), + ("__truediv__", "/"), + ]: + msg = f"Series cannot perform the operation {str_rep}|unsupported operand" + with pytest.raises(TypeError, match=msg): + getattr(s, op)(2) + + # invalid ufunc + msg = "Object with dtype category cannot perform the numpy op log" + with pytest.raises(TypeError, match=msg): + np.log(s) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_replace.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_replace.py new file mode 100644 index 0000000..a50b1ed --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_replace.py @@ -0,0 +1,72 @@ +import pytest + +import pandas as pd +from pandas import Categorical +import pandas._testing as tm + + +@pytest.mark.parametrize( + "to_replace,value,expected,flip_categories", + [ + # one-to-one + (1, 2, [2, 2, 3], False), + (1, 4, [4, 2, 3], False), + (4, 1, [1, 2, 3], False), + (5, 6, [1, 2, 3], False), + # many-to-one + ([1], 2, [2, 2, 3], False), + ([1, 2], 3, [3, 3, 3], False), + ([1, 2], 4, [4, 4, 3], False), + ((1, 2, 4), 5, [5, 5, 3], False), + ((5, 6), 2, [1, 2, 3], False), + ([1], [2], [2, 2, 3], False), + ([1, 4], [5, 2], [5, 2, 3], False), + # check_categorical sorts categories, which crashes on mixed dtypes + (3, "4", [1, 2, "4"], False), + ([1, 2, "3"], "5", ["5", "5", 3], True), + ], +) +def test_replace_categorical_series(to_replace, value, expected, flip_categories): + # GH 31720 + + ser = pd.Series([1, 2, 3], dtype="category") + result = ser.replace(to_replace, value) + expected = pd.Series(expected, dtype="category") + ser.replace(to_replace, value, inplace=True) + + if flip_categories: + expected = expected.cat.set_categories(expected.cat.categories[::-1]) + + tm.assert_series_equal(expected, result, check_category_order=False) + tm.assert_series_equal(expected, ser, check_category_order=False) + + +@pytest.mark.parametrize( + "to_replace, value, result, expected_error_msg", + [ + ("b", "c", ["a", "c"], "Categorical.categories are different"), + ("c", "d", ["a", "b"], None), + # https://github.com/pandas-dev/pandas/issues/33288 + ("a", "a", ["a", "b"], None), + ("b", None, ["a", None], "Categorical.categories length are different"), + ], +) +def test_replace_categorical(to_replace, value, result, expected_error_msg): + # GH#26988 + cat = Categorical(["a", "b"]) + expected = Categorical(result) + with tm.assert_produces_warning(FutureWarning, match="Series.replace"): + # GH#44929 replace->_replace + result = cat.replace(to_replace, value) + + tm.assert_categorical_equal(result, expected) + if to_replace == "b": # the "c" test is supposed to be unchanged + with pytest.raises(AssertionError, match=expected_error_msg): + # ensure non-inplace call does not affect original + tm.assert_categorical_equal(cat, expected) + + with tm.assert_produces_warning(FutureWarning, match="Series.replace"): + # GH#44929 replace->_replace + cat.replace(to_replace, value, inplace=True) + + tm.assert_categorical_equal(cat, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_repr.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_repr.py new file mode 100644 index 0000000..678109b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_repr.py @@ -0,0 +1,534 @@ +import numpy as np + +from pandas import ( + Categorical, + CategoricalIndex, + Series, + date_range, + option_context, + period_range, + timedelta_range, +) +from pandas.tests.arrays.categorical.common import TestCategorical + + +class TestCategoricalReprWithFactor(TestCategorical): + def test_print(self): + expected = [ + "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']", + "Categories (3, object): ['a' < 'b' < 'c']", + ] + expected = "\n".join(expected) + actual = repr(self.factor) + assert actual == expected + + +class TestCategoricalRepr: + def test_big_print(self): + factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ["a", "b", "c"], fastpath=True) + expected = [ + "['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']", + "Length: 600", + "Categories (3, object): ['a', 'b', 'c']", + ] + expected = "\n".join(expected) + + actual = repr(factor) + + assert actual == expected + + def test_empty_print(self): + factor = Categorical([], ["a", "b", "c"]) + expected = "[], Categories (3, object): ['a', 'b', 'c']" + actual = repr(factor) + assert actual == expected + + assert expected == actual + factor = Categorical([], ["a", "b", "c"], ordered=True) + expected = "[], Categories (3, object): ['a' < 'b' < 'c']" + actual = repr(factor) + assert expected == actual + + factor = Categorical([], []) + expected = "[], Categories (0, object): []" + assert expected == repr(factor) + + def test_print_none_width(self): + # GH10087 + a = Series(Categorical([1, 2, 3, 4])) + exp = ( + "0 1\n1 2\n2 3\n3 4\n" + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]" + ) + + with option_context("display.width", None): + assert exp == repr(a) + + def test_unicode_print(self): + c = Categorical(["aaaaa", "bb", "cccc"] * 20) + expected = """\ +['aaaaa', 'bb', 'cccc', 'aaaaa', 'bb', ..., 'bb', 'cccc', 'aaaaa', 'bb', 'cccc'] +Length: 60 +Categories (3, object): ['aaaaa', 'bb', 'cccc']""" + + assert repr(c) == expected + + c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) + expected = """\ +['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] +Length: 60 +Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa:E501 + + assert repr(c) == expected + + # unicode option should not affect to Categorical, as it doesn't care + # the repr width + with option_context("display.unicode.east_asian_width", True): + + c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) + expected = """['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] +Length: 60 +Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa:E501 + + assert repr(c) == expected + + def test_categorical_repr(self): + c = Categorical([1, 2, 3]) + exp = """[1, 2, 3] +Categories (3, int64): [1, 2, 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) + exp = """[1, 2, 3, 1, 2, 3] +Categories (3, int64): [1, 2, 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 4, 5] * 10) + exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] +Length: 50 +Categories (5, int64): [1, 2, 3, 4, 5]""" + + assert repr(c) == exp + + c = Categorical(np.arange(20)) + exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] +Length: 20 +Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]""" + + assert repr(c) == exp + + def test_categorical_repr_ordered(self): + c = Categorical([1, 2, 3], ordered=True) + exp = """[1, 2, 3] +Categories (3, int64): [1 < 2 < 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True) + exp = """[1, 2, 3, 1, 2, 3] +Categories (3, int64): [1 < 2 < 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 4, 5] * 10, ordered=True) + exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] +Length: 50 +Categories (5, int64): [1 < 2 < 3 < 4 < 5]""" + + assert repr(c) == exp + + c = Categorical(np.arange(20), ordered=True) + exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] +Length: 20 +Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]""" + + assert repr(c) == exp + + def test_categorical_repr_datetime(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + c = Categorical(idx) + + exp = ( + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " + "2011-01-01 12:00:00, 2011-01-01 13:00:00]\n" + "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]" + "" + ) + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = ( + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " + "2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]\n" + "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]" + ) + + assert repr(c) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + c = Categorical(idx) + exp = ( + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " + "2011-01-01 13:00:00-05:00]\n" + "Categories (5, datetime64[ns, US/Eastern]): " + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" + " " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " " + "2011-01-01 13:00:00-05:00]" + ) + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = ( + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " + "2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, " + "2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, " + "2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n" + "Categories (5, datetime64[ns, US/Eastern]): " + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" + " " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " " + "2011-01-01 13:00:00-05:00]" + ) + + assert repr(c) == exp + + def test_categorical_repr_datetime_ordered(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + c = Categorical(idx, ordered=True) + exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] +Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa:E501 + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] +Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa:E501 + + assert repr(c) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + c = Categorical(idx, ordered=True) + exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] +Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < + 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < + 2011-01-01 13:00:00-05:00]""" # noqa:E501 + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] +Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < + 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < + 2011-01-01 13:00:00-05:00]""" # noqa:E501 + + assert repr(c) == exp + + def test_categorical_repr_int_with_nan(self): + c = Categorical([1, 2, np.nan]) + c_exp = """[1, 2, NaN]\nCategories (2, int64): [1, 2]""" + assert repr(c) == c_exp + + s = Series([1, 2, np.nan], dtype="object").astype("category") + s_exp = """0 1\n1 2\n2 NaN +dtype: category +Categories (2, int64): [1, 2]""" + assert repr(s) == s_exp + + def test_categorical_repr_period(self): + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + c = Categorical(idx) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, + 2011-01-01 13:00]""" # noqa:E501 + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, + 2011-01-01 13:00]""" # noqa:E501 + + assert repr(c) == exp + + idx = period_range("2011-01", freq="M", periods=5) + c = Categorical(idx) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa:E501 + + assert repr(c) == exp + + def test_categorical_repr_period_ordered(self): + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + c = Categorical(idx, ordered=True) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < + 2011-01-01 13:00]""" # noqa:E501 + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < + 2011-01-01 13:00]""" # noqa:E501 + + assert repr(c) == exp + + idx = period_range("2011-01", freq="M", periods=5) + c = Categorical(idx, ordered=True) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa:E501 + + assert repr(c) == exp + + def test_categorical_repr_timedelta(self): + idx = timedelta_range("1 days", periods=5) + c = Categorical(idx) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" + + assert repr(c) == exp + + idx = timedelta_range("1 hours", periods=20) + c = Categorical(idx) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 20 +Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, + 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, + 18 days 01:00:00, 19 days 01:00:00]""" # noqa:E501 + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 40 +Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, + 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, + 18 days 01:00:00, 19 days 01:00:00]""" # noqa:E501 + + assert repr(c) == exp + + def test_categorical_repr_timedelta_ordered(self): + idx = timedelta_range("1 days", periods=5) + c = Categorical(idx, ordered=True) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" + + assert repr(c) == exp + + idx = timedelta_range("1 hours", periods=20) + c = Categorical(idx, ordered=True) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 20 +Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < + 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < + 18 days 01:00:00 < 19 days 01:00:00]""" # noqa:E501 + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 40 +Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < + 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < + 18 days 01:00:00 < 19 days 01:00:00]""" # noqa:E501 + + assert repr(c) == exp + + def test_categorical_index_repr(self): + idx = CategoricalIndex(Categorical([1, 2, 3])) + exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa:E501 + assert repr(idx) == exp + + i = CategoricalIndex(Categorical(np.arange(10))) + exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + def test_categorical_index_repr_ordered(self): + i = CategoricalIndex(Categorical([1, 2, 3], ordered=True)) + exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + i = CategoricalIndex(Categorical(np.arange(10), ordered=True)) + exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + def test_categorical_index_repr_datetime(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', + '2011-01-01 11:00:00', '2011-01-01 12:00:00', + '2011-01-01 13:00:00'], + categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', + '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', + '2011-01-01 13:00:00-05:00'], + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + def test_categorical_index_repr_datetime_ordered(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', + '2011-01-01 11:00:00', '2011-01-01 12:00:00', + '2011-01-01 13:00:00'], + categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', + '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', + '2011-01-01 13:00:00-05:00'], + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + i = CategoricalIndex(Categorical(idx.append(idx), ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', + '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', + '2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00', + '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', + '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + def test_categorical_index_repr_period(self): + # test all length + idx = period_range("2011-01-01 09:00", freq="H", periods=1) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + idx = period_range("2011-01-01 09:00", freq="H", periods=2) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + idx = period_range("2011-01-01 09:00", freq="H", periods=3) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', + '2011-01-01 12:00', '2011-01-01 13:00'], + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + i = CategoricalIndex(Categorical(idx.append(idx))) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', + '2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00', + '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', + '2011-01-01 13:00'], + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + idx = period_range("2011-01", freq="M", periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + def test_categorical_index_repr_period_ordered(self): + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', + '2011-01-01 12:00', '2011-01-01 13:00'], + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + idx = period_range("2011-01", freq="M", periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + def test_categorical_index_repr_timedelta(self): + idx = timedelta_range("1 days", periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + idx = timedelta_range("1 hours", periods=10) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', + '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', + '9 days 01:00:00'], + categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + def test_categorical_index_repr_timedelta_ordered(self): + idx = timedelta_range("1 days", periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + idx = timedelta_range("1 hours", periods=10) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', + '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', + '9 days 01:00:00'], + categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + def test_categorical_str_repr(self): + # GH 33676 + result = repr(Categorical([1, "2", 3, 4])) + expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']" + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_sorting.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_sorting.py new file mode 100644 index 0000000..4f65c8d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_sorting.py @@ -0,0 +1,129 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + Index, +) +import pandas._testing as tm + + +class TestCategoricalSort: + def test_argsort(self): + c = Categorical([5, 3, 1, 4, 2], ordered=True) + + expected = np.array([2, 4, 1, 3, 0]) + tm.assert_numpy_array_equal( + c.argsort(ascending=True), expected, check_dtype=False + ) + + expected = expected[::-1] + tm.assert_numpy_array_equal( + c.argsort(ascending=False), expected, check_dtype=False + ) + + def test_numpy_argsort(self): + c = Categorical([5, 3, 1, 4, 2], ordered=True) + + expected = np.array([2, 4, 1, 3, 0]) + tm.assert_numpy_array_equal(np.argsort(c), expected, check_dtype=False) + + tm.assert_numpy_array_equal( + np.argsort(c, kind="mergesort"), expected, check_dtype=False + ) + + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(c, axis=0) + + msg = "the 'order' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(c, order="C") + + def test_sort_values(self): + + # unordered cats are sortable + cat = Categorical(["a", "b", "b", "a"], ordered=False) + cat.sort_values() + + cat = Categorical(["a", "c", "b", "d"], ordered=True) + + # sort_values + res = cat.sort_values() + exp = np.array(["a", "b", "c", "d"], dtype=object) + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + cat = Categorical( + ["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True + ) + res = cat.sort_values() + exp = np.array(["a", "b", "c", "d"], dtype=object) + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + res = cat.sort_values(ascending=False) + exp = np.array(["d", "c", "b", "a"], dtype=object) + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + # sort (inplace order) + cat1 = cat.copy() + orig_codes = cat1._codes + cat1.sort_values(inplace=True) + assert cat1._codes is orig_codes + exp = np.array(["a", "b", "c", "d"], dtype=object) + tm.assert_numpy_array_equal(cat1.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + # reverse + cat = Categorical(["a", "c", "c", "b", "d"], ordered=True) + res = cat.sort_values(ascending=False) + exp_val = np.array(["d", "c", "c", "b", "a"], dtype=object) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_numpy_array_equal(res.__array__(), exp_val) + tm.assert_index_equal(res.categories, exp_categories) + + def test_sort_values_na_position(self): + # see gh-12882 + cat = Categorical([5, 2, np.nan, 2, np.nan], ordered=True) + exp_categories = Index([2, 5]) + + exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan]) + res = cat.sort_values() # default arguments + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([np.nan, np.nan, 2.0, 2.0, 5.0]) + res = cat.sort_values(ascending=True, na_position="first") + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([np.nan, np.nan, 5.0, 2.0, 2.0]) + res = cat.sort_values(ascending=False, na_position="first") + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan]) + res = cat.sort_values(ascending=True, na_position="last") + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([5.0, 2.0, 2.0, np.nan, np.nan]) + res = cat.sort_values(ascending=False, na_position="last") + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) + res = cat.sort_values(ascending=False, na_position="last") + exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_numpy_array_equal(res.__array__(), exp_val) + tm.assert_index_equal(res.categories, exp_categories) + + cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) + res = cat.sort_values(ascending=False, na_position="first") + exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_numpy_array_equal(res.__array__(), exp_val) + tm.assert_index_equal(res.categories, exp_categories) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_subclass.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_subclass.py new file mode 100644 index 0000000..b80d0ff --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_subclass.py @@ -0,0 +1,22 @@ +from pandas import Categorical +import pandas._testing as tm + + +class TestCategoricalSubclassing: + def test_constructor(self): + sc = tm.SubclassedCategorical(["a", "b", "c"]) + assert isinstance(sc, tm.SubclassedCategorical) + tm.assert_categorical_equal(sc, Categorical(["a", "b", "c"])) + + def test_from_codes(self): + sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ["a", "b", "c"]) + assert isinstance(sc, tm.SubclassedCategorical) + exp = Categorical.from_codes([1, 0, 2], ["a", "b", "c"]) + tm.assert_categorical_equal(sc, exp) + + def test_map(self): + sc = tm.SubclassedCategorical(["a", "b", "c"]) + res = sc.map(lambda x: x.upper()) + assert isinstance(res, tm.SubclassedCategorical) + exp = Categorical(["A", "B", "C"]) + tm.assert_categorical_equal(res, exp) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_take.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_take.py new file mode 100644 index 0000000..fbdbea1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_take.py @@ -0,0 +1,95 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + Index, +) +import pandas._testing as tm + + +class TestTake: + # https://github.com/pandas-dev/pandas/issues/20664 + + def test_take_default_allow_fill(self): + cat = Categorical(["a", "b"]) + with tm.assert_produces_warning(None): + result = cat.take([0, -1]) + + assert result.equals(cat) + + def test_take_positive_no_warning(self): + cat = Categorical(["a", "b"]) + with tm.assert_produces_warning(None): + cat.take([0, 0]) + + def test_take_bounds(self, allow_fill): + # https://github.com/pandas-dev/pandas/issues/20664 + cat = Categorical(["a", "b", "a"]) + if allow_fill: + msg = "indices are out-of-bounds" + else: + msg = "index 4 is out of bounds for( axis 0 with)? size 3" + with pytest.raises(IndexError, match=msg): + cat.take([4, 5], allow_fill=allow_fill) + + def test_take_empty(self, allow_fill): + # https://github.com/pandas-dev/pandas/issues/20664 + cat = Categorical([], categories=["a", "b"]) + if allow_fill: + msg = "indices are out-of-bounds" + else: + msg = "cannot do a non-empty take from an empty axes" + with pytest.raises(IndexError, match=msg): + cat.take([0], allow_fill=allow_fill) + + def test_positional_take(self, ordered): + cat = Categorical(["a", "a", "b", "b"], categories=["b", "a"], ordered=ordered) + result = cat.take([0, 1, 2], allow_fill=False) + expected = Categorical( + ["a", "a", "b"], categories=cat.categories, ordered=ordered + ) + tm.assert_categorical_equal(result, expected) + + def test_positional_take_unobserved(self, ordered): + cat = Categorical(["a", "b"], categories=["a", "b", "c"], ordered=ordered) + result = cat.take([1, 0], allow_fill=False) + expected = Categorical(["b", "a"], categories=cat.categories, ordered=ordered) + tm.assert_categorical_equal(result, expected) + + def test_take_allow_fill(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = Categorical(["a", "a", "b"]) + result = cat.take([0, -1, -1], allow_fill=True) + expected = Categorical(["a", np.nan, np.nan], categories=["a", "b"]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_with_negative_one(self): + # -1 was a category + cat = Categorical([-1, 0, 1]) + result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1) + expected = Categorical([-1, -1, 0], categories=[-1, 0, 1]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_value(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = Categorical(["a", "b", "c"]) + result = cat.take([0, 1, -1], fill_value="a", allow_fill=True) + expected = Categorical(["a", "b", "a"], categories=["a", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_value_new_raises(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = Categorical(["a", "b", "c"]) + xpr = r"Cannot setitem on a Categorical with a new category \(d\)" + with pytest.raises(TypeError, match=xpr): + cat.take([0, 1, -1], fill_value="d", allow_fill=True) + + def test_take_nd_deprecated(self): + cat = Categorical(["a", "b", "c"]) + with tm.assert_produces_warning(FutureWarning): + cat.take_nd([0, 1]) + + ci = Index(cat) + with tm.assert_produces_warning(FutureWarning): + ci.take_nd([0, 1]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_warnings.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_warnings.py new file mode 100644 index 0000000..6ba3347 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/categorical/test_warnings.py @@ -0,0 +1,22 @@ +import pytest + +from pandas.util._test_decorators import async_mark + +import pandas._testing as tm + + +class TestCategoricalWarnings: + @async_mark() + async def test_tab_complete_warning(self, ip): + # https://github.com/pandas-dev/pandas/issues/16409 + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.completer import provisionalcompleter + + code = "import pandas as pd; c = Categorical([])" + await ip.run_code(code) + + # GH 31324 newer jedi version raises Deprecation warning; + # appears resolved 2021-02-02 + with tm.assert_produces_warning(None): + with provisionalcompleter("ignore"): + list(ip.Completer.completions("c.", 1)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..4710edc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..21d3f6d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/__pycache__/test_reductions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/__pycache__/test_reductions.cpython-38.pyc new file mode 100644 index 0000000..6477521 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/__pycache__/test_reductions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/test_constructors.py new file mode 100644 index 0000000..e6c6549 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/test_constructors.py @@ -0,0 +1,156 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray +from pandas.core.arrays.datetimes import _sequence_to_dt64ns + + +class TestDatetimeArrayConstructor: + def test_from_sequence_invalid_type(self): + mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)]) + with pytest.raises(TypeError, match="Cannot create a DatetimeArray"): + DatetimeArray._from_sequence(mi) + + def test_only_1dim_accepted(self): + arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]") + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 + DatetimeArray(arr.reshape(2, 2, 1)) + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 0-dim + DatetimeArray(arr[[0]].squeeze()) + + def test_freq_validation(self): + # GH#24623 check that invalid instances cannot be created with the + # public constructor + arr = np.arange(5, dtype=np.int64) * 3600 * 10 ** 9 + + msg = ( + "Inferred frequency H from passed values does not " + "conform to passed frequency W-SUN" + ) + with pytest.raises(ValueError, match=msg): + DatetimeArray(arr, freq="W") + + @pytest.mark.parametrize( + "meth", + [ + DatetimeArray._from_sequence, + _sequence_to_dt64ns, + pd.to_datetime, + pd.DatetimeIndex, + ], + ) + def test_mixing_naive_tzaware_raises(self, meth): + # GH#24569 + arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")]) + + msg = ( + "Cannot mix tz-aware with tz-naive values|" + "Tz-aware datetime.datetime cannot be converted " + "to datetime64 unless utc=True" + ) + + for obj in [arr, arr[::-1]]: + # check that we raise regardless of whether naive is found + # before aware or vice-versa + with pytest.raises(ValueError, match=msg): + meth(obj) + + def test_from_pandas_array(self): + arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10 ** 9 + + result = DatetimeArray._from_sequence(arr)._with_freq("infer") + + expected = pd.date_range("1970-01-01", periods=5, freq="H")._data + tm.assert_datetime_array_equal(result, expected) + + def test_mismatched_timezone_raises(self): + arr = DatetimeArray( + np.array(["2000-01-01T06:00:00"], dtype="M8[ns]"), + dtype=DatetimeTZDtype(tz="US/Central"), + ) + dtype = DatetimeTZDtype(tz="US/Eastern") + with pytest.raises(TypeError, match="Timezone of the array"): + DatetimeArray(arr, dtype=dtype) + + def test_non_array_raises(self): + with pytest.raises(ValueError, match="list"): + DatetimeArray([1, 2, 3]) + + def test_bool_dtype_raises(self): + arr = np.array([1, 2, 3], dtype="bool") + + with pytest.raises( + ValueError, match="The dtype of 'values' is incorrect.*bool" + ): + DatetimeArray(arr) + + msg = r"dtype bool cannot be converted to datetime64\[ns\]" + with pytest.raises(TypeError, match=msg): + DatetimeArray._from_sequence(arr) + + with pytest.raises(TypeError, match=msg): + _sequence_to_dt64ns(arr) + + with pytest.raises(TypeError, match=msg): + pd.DatetimeIndex(arr) + + with pytest.raises(TypeError, match=msg): + pd.to_datetime(arr) + + def test_incorrect_dtype_raises(self): + with pytest.raises(ValueError, match="Unexpected value for 'dtype'."): + DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="category") + + def test_freq_infer_raises(self): + with pytest.raises(ValueError, match="Frequency inference"): + DatetimeArray(np.array([1, 2, 3], dtype="i8"), freq="infer") + + def test_copy(self): + data = np.array([1, 2, 3], dtype="M8[ns]") + arr = DatetimeArray(data, copy=False) + assert arr._data is data + + arr = DatetimeArray(data, copy=True) + assert arr._data is not data + + +class TestSequenceToDT64NS: + def test_tz_dtype_mismatch_raises(self): + arr = DatetimeArray._from_sequence( + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + ) + with pytest.raises(TypeError, match="data is already tz-aware"): + _sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="UTC")) + + def test_tz_dtype_matches(self): + arr = DatetimeArray._from_sequence( + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + ) + result, _, _ = _sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="US/Central")) + tm.assert_numpy_array_equal(arr._data, result) + + @pytest.mark.parametrize("order", ["F", "C"]) + def test_2d(self, order): + dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") + arr = np.array(dti, dtype=object).reshape(3, 2) + if order == "F": + arr = arr.T + + res = _sequence_to_dt64ns(arr) + expected = _sequence_to_dt64ns(arr.ravel()) + + tm.assert_numpy_array_equal(res[0].ravel(), expected[0]) + assert res[1] == expected[1] + assert res[2] == expected[2] + + res = DatetimeArray._from_sequence(arr) + expected = DatetimeArray._from_sequence(arr.ravel()).reshape(arr.shape) + tm.assert_datetime_array_equal(res, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/test_reductions.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/test_reductions.py new file mode 100644 index 0000000..6e9c8f7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/datetimes/test_reductions.py @@ -0,0 +1,175 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas import NaT +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray + + +class TestReductions: + @pytest.fixture + def arr1d(self, tz_naive_fixture): + tz = tz_naive_fixture + dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") + arr = DatetimeArray._from_sequence( + [ + "2000-01-03", + "2000-01-03", + "NaT", + "2000-01-02", + "2000-01-05", + "2000-01-04", + ], + dtype=dtype, + ) + return arr + + def test_min_max(self, arr1d): + arr = arr1d + tz = arr.tz + + result = arr.min() + expected = pd.Timestamp("2000-01-02", tz=tz) + assert result == expected + + result = arr.max() + expected = pd.Timestamp("2000-01-05", tz=tz) + assert result == expected + + result = arr.min(skipna=False) + assert result is NaT + + result = arr.max(skipna=False) + assert result is NaT + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_empty(self, skipna, tz): + dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") + arr = DatetimeArray._from_sequence([], dtype=dtype) + result = arr.min(skipna=skipna) + assert result is NaT + + result = arr.max(skipna=skipna) + assert result is NaT + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_median_empty(self, skipna, tz): + dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") + arr = DatetimeArray._from_sequence([], dtype=dtype) + result = arr.median(skipna=skipna) + assert result is NaT + + arr = arr.reshape(0, 3) + result = arr.median(axis=0, skipna=skipna) + expected = type(arr)._from_sequence([NaT, NaT, NaT], dtype=arr.dtype) + tm.assert_equal(result, expected) + + result = arr.median(axis=1, skipna=skipna) + expected = type(arr)._from_sequence([], dtype=arr.dtype) + tm.assert_equal(result, expected) + + def test_median(self, arr1d): + arr = arr1d + + result = arr.median() + assert result == arr[0] + result = arr.median(skipna=False) + assert result is NaT + + result = arr.dropna().median(skipna=False) + assert result == arr[0] + + result = arr.median(axis=0) + assert result == arr[0] + + def test_median_axis(self, arr1d): + arr = arr1d + assert arr.median(axis=0) == arr.median() + assert arr.median(axis=0, skipna=False) is NaT + + msg = r"abs\(axis\) must be less than ndim" + with pytest.raises(ValueError, match=msg): + arr.median(axis=1) + + @pytest.mark.filterwarnings("ignore:All-NaN slice encountered:RuntimeWarning") + def test_median_2d(self, arr1d): + arr = arr1d.reshape(1, -1) + + # axis = None + assert arr.median() == arr1d.median() + assert arr.median(skipna=False) is NaT + + # axis = 0 + result = arr.median(axis=0) + expected = arr1d + tm.assert_equal(result, expected) + + # Since column 3 is all-NaT, we get NaT there with or without skipna + result = arr.median(axis=0, skipna=False) + expected = arr1d + tm.assert_equal(result, expected) + + # axis = 1 + result = arr.median(axis=1) + expected = type(arr)._from_sequence([arr1d.median()]) + tm.assert_equal(result, expected) + + result = arr.median(axis=1, skipna=False) + expected = type(arr)._from_sequence([NaT], dtype=arr.dtype) + tm.assert_equal(result, expected) + + def test_mean(self, arr1d): + arr = arr1d + + # manually verified result + expected = arr[0] + 0.4 * pd.Timedelta(days=1) + + result = arr.mean() + assert result == expected + result = arr.mean(skipna=False) + assert result is NaT + + result = arr.dropna().mean(skipna=False) + assert result == expected + + result = arr.mean(axis=0) + assert result == expected + + def test_mean_2d(self): + dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") + dta = dti._data.reshape(3, 2) + + result = dta.mean(axis=0) + expected = dta[1] + tm.assert_datetime_array_equal(result, expected) + + result = dta.mean(axis=1) + expected = dta[:, 0] + pd.Timedelta(hours=12) + tm.assert_datetime_array_equal(result, expected) + + result = dta.mean(axis=None) + expected = dti.mean() + assert result == expected + + @pytest.mark.parametrize("skipna", [True, False]) + def test_mean_empty(self, arr1d, skipna): + arr = arr1d[:0] + + assert arr.mean(skipna=skipna) is NaT + + arr2d = arr.reshape(0, 3) + result = arr2d.mean(axis=0, skipna=skipna) + expected = DatetimeArray._from_sequence([NaT, NaT, NaT], dtype=arr.dtype) + tm.assert_datetime_array_equal(result, expected) + + result = arr2d.mean(axis=1, skipna=skipna) + expected = arr # i.e. 1D, empty + tm.assert_datetime_array_equal(result, expected) + + result = arr2d.mean(axis=None, skipna=skipna) + assert result is NaT diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..f36ae1b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..6160939 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_arithmetic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_arithmetic.cpython-38.pyc new file mode 100644 index 0000000..6e1ec3b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_arithmetic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..97cfe95 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_comparison.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_comparison.cpython-38.pyc new file mode 100644 index 0000000..4d2ddec Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_comparison.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_concat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_concat.cpython-38.pyc new file mode 100644 index 0000000..51971df Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_concat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_construction.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_construction.cpython-38.pyc new file mode 100644 index 0000000..089d51d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_construction.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_function.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_function.cpython-38.pyc new file mode 100644 index 0000000..4e1da9c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_function.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_repr.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_repr.cpython-38.pyc new file mode 100644 index 0000000..d07b7ec Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_repr.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_to_numpy.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_to_numpy.cpython-38.pyc new file mode 100644 index 0000000..a59e9a2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/__pycache__/test_to_numpy.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/conftest.py new file mode 100644 index 0000000..9eab115 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/conftest.py @@ -0,0 +1,39 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) + + +@pytest.fixture(params=[Float32Dtype, Float64Dtype]) +def dtype(request): + return request.param() + + +@pytest.fixture +def data(dtype): + return pd.array( + list(np.arange(0.1, 0.9, 0.1)) + + [pd.NA] + + list(np.arange(1, 9.8, 0.1)) + + [pd.NA] + + [9.9, 10.0], + dtype=dtype, + ) + + +@pytest.fixture +def data_missing(dtype): + return pd.array([np.nan, 0.1], dtype=dtype) + + +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_arithmetic.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_arithmetic.py new file mode 100644 index 0000000..e5f67a2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_arithmetic.py @@ -0,0 +1,203 @@ +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import FloatingArray + +# Basic test for the arithmetic array ops +# ----------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "opname, exp", + [ + ("add", [1.1, 2.2, None, None, 5.5]), + ("mul", [0.1, 0.4, None, None, 2.5]), + ("sub", [0.9, 1.8, None, None, 4.5]), + ("truediv", [10.0, 10.0, None, None, 10.0]), + ("floordiv", [9.0, 9.0, None, None, 10.0]), + ("mod", [0.1, 0.2, None, None, 0.0]), + ], + ids=["add", "mul", "sub", "div", "floordiv", "mod"], +) +def test_array_op(dtype, opname, exp): + a = pd.array([1.0, 2.0, None, 4.0, 5.0], dtype=dtype) + b = pd.array([0.1, 0.2, 0.3, None, 0.5], dtype=dtype) + + op = getattr(operator, opname) + + result = op(a, b) + expected = pd.array(exp, dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)]) +def test_divide_by_zero(dtype, zero, negative): + # TODO pending NA/NaN discussion + # https://github.com/pandas-dev/pandas/issues/32265/ + a = pd.array([0, 1, -1, None], dtype=dtype) + result = a / zero + expected = FloatingArray( + np.array([np.nan, np.inf, -np.inf, np.nan], dtype=dtype.numpy_dtype), + np.array([False, False, False, True]), + ) + if negative: + expected *= -1 + tm.assert_extension_array_equal(result, expected) + + +def test_pow_scalar(dtype): + a = pd.array([-1, 0, 1, None, 2], dtype=dtype) + result = a ** 0 + expected = pd.array([1, 1, 1, 1, 1], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = a ** 1 + expected = pd.array([-1, 0, 1, None, 2], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = a ** pd.NA + expected = pd.array([None, None, 1, None, None], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = a ** np.nan + # TODO np.nan should be converted to pd.NA / missing before operation? + expected = FloatingArray( + np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype=dtype.numpy_dtype), + mask=a._mask, + ) + tm.assert_extension_array_equal(result, expected) + + # reversed + a = a[1:] # Can't raise integers to negative powers. + + result = 0 ** a + expected = pd.array([1, 0, None, 0], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = 1 ** a + expected = pd.array([1, 1, 1, 1], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = pd.NA ** a + expected = pd.array([1, None, None, None], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = np.nan ** a + expected = FloatingArray( + np.array([1, np.nan, np.nan, np.nan], dtype=dtype.numpy_dtype), mask=a._mask + ) + tm.assert_extension_array_equal(result, expected) + + +def test_pow_array(dtype): + a = pd.array([0, 0, 0, 1, 1, 1, None, None, None], dtype=dtype) + b = pd.array([0, 1, None, 0, 1, None, 0, 1, None], dtype=dtype) + result = a ** b + expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + +def test_rpow_one_to_na(): + # https://github.com/pandas-dev/pandas/issues/22022 + # https://github.com/pandas-dev/pandas/issues/29997 + arr = pd.array([np.nan, np.nan], dtype="Float64") + result = np.array([1.0, 2.0]) ** arr + expected = pd.array([1.0, np.nan], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("other", [0, 0.5]) +def test_arith_zero_dim_ndarray(other): + arr = pd.array([1, None, 2], dtype="Float64") + result = arr + np.array(other) + expected = arr + other + tm.assert_equal(result, expected) + + +# Test generic characteristics / errors +# ----------------------------------------------------------------------------- + + +def test_error_invalid_values(data, all_arithmetic_operators): + + op = all_arithmetic_operators + s = pd.Series(data) + ops = getattr(s, op) + + # invalid scalars + msg = ( + r"(:?can only perform ops with numeric values)" + r"|(:?FloatingArray cannot perform the operation mod)" + ) + with pytest.raises(TypeError, match=msg): + ops("foo") + with pytest.raises(TypeError, match=msg): + ops(pd.Timestamp("20180101")) + + # invalid array-likes + with pytest.raises(TypeError, match=msg): + ops(pd.Series("foo", index=s.index)) + + msg = "|".join( + [ + "can only perform ops with numeric values", + "cannot perform .* with this index type: DatetimeArray", + "Addition/subtraction of integers and integer-arrays " + "with DatetimeArray is no longer supported. *", + ] + ) + with pytest.raises(TypeError, match=msg): + ops(pd.Series(pd.date_range("20180101", periods=len(s)))) + + +# Various +# ----------------------------------------------------------------------------- + + +def test_cross_type_arithmetic(): + + df = pd.DataFrame( + { + "A": pd.array([1, 2, np.nan], dtype="Float64"), + "B": pd.array([1, np.nan, 3], dtype="Float32"), + "C": np.array([1, 2, 3], dtype="float64"), + } + ) + + result = df.A + df.C + expected = pd.Series([2, 4, np.nan], dtype="Float64") + tm.assert_series_equal(result, expected) + + result = (df.A + df.C) * 3 == 12 + expected = pd.Series([False, True, None], dtype="boolean") + tm.assert_series_equal(result, expected) + + result = df.A + df.B + expected = pd.Series([2, np.nan, np.nan], dtype="Float64") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "source, neg_target, abs_target", + [ + ([1.1, 2.2, 3.3], [-1.1, -2.2, -3.3], [1.1, 2.2, 3.3]), + ([1.1, 2.2, None], [-1.1, -2.2, None], [1.1, 2.2, None]), + ([-1.1, 0.0, 1.1], [1.1, 0.0, -1.1], [1.1, 0.0, 1.1]), + ], +) +def test_unary_float_operators(float_ea_dtype, source, neg_target, abs_target): + # GH38794 + dtype = float_ea_dtype + arr = pd.array(source, dtype=dtype) + neg_result, pos_result, abs_result = -arr, +arr, abs(arr) + neg_target = pd.array(neg_target, dtype=dtype) + abs_target = pd.array(abs_target, dtype=dtype) + + tm.assert_extension_array_equal(neg_result, neg_target) + tm.assert_extension_array_equal(pos_result, arr) + assert not tm.shares_memory(pos_result, arr) + tm.assert_extension_array_equal(abs_result, abs_target) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_astype.py new file mode 100644 index 0000000..f0e91cd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_astype.py @@ -0,0 +1,118 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_astype(): + # with missing values + arr = pd.array([0.1, 0.2, None], dtype="Float64") + + with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype NumPy"): + arr.astype("int64") + + with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype NumPy"): + arr.astype("bool") + + result = arr.astype("float64") + expected = np.array([0.1, 0.2, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + # no missing values + arr = pd.array([0.0, 1.0, 0.5], dtype="Float64") + result = arr.astype("int64") + expected = np.array([0, 1, 0], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + result = arr.astype("bool") + expected = np.array([False, True, True], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + +def test_astype_to_floating_array(): + # astype to FloatingArray + arr = pd.array([0.0, 1.0, None], dtype="Float64") + + result = arr.astype("Float64") + tm.assert_extension_array_equal(result, arr) + result = arr.astype(pd.Float64Dtype()) + tm.assert_extension_array_equal(result, arr) + result = arr.astype("Float32") + expected = pd.array([0.0, 1.0, None], dtype="Float32") + tm.assert_extension_array_equal(result, expected) + + +def test_astype_to_boolean_array(): + # astype to BooleanArray + arr = pd.array([0.0, 1.0, None], dtype="Float64") + + result = arr.astype("boolean") + expected = pd.array([False, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + result = arr.astype(pd.BooleanDtype()) + tm.assert_extension_array_equal(result, expected) + + +def test_astype_to_integer_array(): + # astype to IntegerArray + arr = pd.array([0.0, 1.5, None], dtype="Float64") + + result = arr.astype("Int64") + expected = pd.array([0, 1, None], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + +def test_astype_str(): + a = pd.array([0.1, 0.2, None], dtype="Float64") + expected = np.array(["0.1", "0.2", ""], dtype=object) + + tm.assert_numpy_array_equal(a.astype(str), expected) + tm.assert_numpy_array_equal(a.astype("str"), expected) + + +def test_astype_copy(): + arr = pd.array([0.1, 0.2, None], dtype="Float64") + orig = pd.array([0.1, 0.2, None], dtype="Float64") + + # copy=True -> ensure both data and mask are actual copies + result = arr.astype("Float64", copy=True) + assert result is not arr + assert not tm.shares_memory(result, arr) + result[0] = 10 + tm.assert_extension_array_equal(arr, orig) + result[0] = pd.NA + tm.assert_extension_array_equal(arr, orig) + + # copy=False + result = arr.astype("Float64", copy=False) + assert result is arr + assert np.shares_memory(result._data, arr._data) + assert np.shares_memory(result._mask, arr._mask) + result[0] = 10 + assert arr[0] == 10 + result[0] = pd.NA + assert arr[0] is pd.NA + + # astype to different dtype -> always needs a copy -> even with copy=False + # we need to ensure that also the mask is actually copied + arr = pd.array([0.1, 0.2, None], dtype="Float64") + orig = pd.array([0.1, 0.2, None], dtype="Float64") + + result = arr.astype("Float32", copy=False) + assert not tm.shares_memory(result, arr) + result[0] = 10 + tm.assert_extension_array_equal(arr, orig) + result[0] = pd.NA + tm.assert_extension_array_equal(arr, orig) + + +def test_astype_object(dtype): + arr = pd.array([1.0, pd.NA], dtype=dtype) + + result = arr.astype(object) + expected = np.array([1.0, pd.NA], dtype=object) + tm.assert_numpy_array_equal(result, expected) + # check exact element types + assert isinstance(result[0], float) + assert result[1] is pd.NA diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_comparison.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_comparison.py new file mode 100644 index 0000000..a429649 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_comparison.py @@ -0,0 +1,65 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import FloatingArray +from pandas.tests.arrays.masked_shared import ( + ComparisonOps, + NumericOps, +) + + +class TestComparisonOps(NumericOps, ComparisonOps): + @pytest.mark.parametrize("other", [True, False, pd.NA, -1.0, 0.0, 1]) + def test_scalar(self, other, comparison_op, dtype): + ComparisonOps.test_scalar(self, other, comparison_op, dtype) + + def test_compare_with_integerarray(self, comparison_op): + op = comparison_op + a = pd.array([0, 1, None] * 3, dtype="Int64") + b = pd.array([0] * 3 + [1] * 3 + [None] * 3, dtype="Float64") + other = b.astype("Int64") + expected = op(a, other) + result = op(a, b) + tm.assert_extension_array_equal(result, expected) + expected = op(other, a) + result = op(b, a) + tm.assert_extension_array_equal(result, expected) + + +def test_equals(): + # GH-30652 + # equals is generally tested in /tests/extension/base/methods, but this + # specifically tests that two arrays of the same class but different dtype + # do not evaluate equal + a1 = pd.array([1, 2, None], dtype="Float64") + a2 = pd.array([1, 2, None], dtype="Float32") + assert a1.equals(a2) is False + + +def test_equals_nan_vs_na(): + # GH#44382 + + mask = np.zeros(3, dtype=bool) + data = np.array([1.0, np.nan, 3.0], dtype=np.float64) + + left = FloatingArray(data, mask) + assert left.equals(left) + tm.assert_extension_array_equal(left, left) + + assert left.equals(left.copy()) + assert left.equals(FloatingArray(data.copy(), mask.copy())) + + mask2 = np.array([False, True, False], dtype=bool) + data2 = np.array([1.0, 2.0, 3.0], dtype=np.float64) + right = FloatingArray(data2, mask2) + assert right.equals(right) + tm.assert_extension_array_equal(right, right) + + assert not left.equals(right) + + # with mask[1] = True, the only difference is data[1], which should + # not matter for equals + mask[1] = True + assert left.equals(right) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_concat.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_concat.py new file mode 100644 index 0000000..dcb0210 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_concat.py @@ -0,0 +1,21 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "to_concat_dtypes, result_dtype", + [ + (["Float64", "Float64"], "Float64"), + (["Float32", "Float64"], "Float64"), + (["Float32", "Float32"], "Float32"), + ], +) +def test_concat_series(to_concat_dtypes, result_dtype): + + result = pd.concat([pd.Series([1, 2, pd.NA], dtype=t) for t in to_concat_dtypes]) + expected = pd.concat([pd.Series([1, 2, pd.NA], dtype=object)] * 2).astype( + result_dtype + ) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_construction.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_construction.py new file mode 100644 index 0000000..f2b31b8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_construction.py @@ -0,0 +1,197 @@ +import numpy as np +import pytest + +from pandas.compat import np_version_under1p20 + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import FloatingArray +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) + + +def test_uses_pandas_na(): + a = pd.array([1, None], dtype=Float64Dtype()) + assert a[1] is pd.NA + + +def test_floating_array_constructor(): + values = np.array([1, 2, 3, 4], dtype="float64") + mask = np.array([False, False, False, True], dtype="bool") + + result = FloatingArray(values, mask) + expected = pd.array([1, 2, 3, np.nan], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + tm.assert_numpy_array_equal(result._data, values) + tm.assert_numpy_array_equal(result._mask, mask) + + msg = r".* should be .* numpy array. Use the 'pd.array' function instead" + with pytest.raises(TypeError, match=msg): + FloatingArray(values.tolist(), mask) + + with pytest.raises(TypeError, match=msg): + FloatingArray(values, mask.tolist()) + + with pytest.raises(TypeError, match=msg): + FloatingArray(values.astype(int), mask) + + msg = r"__init__\(\) missing 1 required positional argument: 'mask'" + with pytest.raises(TypeError, match=msg): + FloatingArray(values) + + +def test_floating_array_disallows_float16(): + # GH#44715 + arr = np.array([1, 2], dtype=np.float16) + mask = np.array([False, False]) + + msg = "FloatingArray does not support np.float16 dtype" + with pytest.raises(TypeError, match=msg): + FloatingArray(arr, mask) + + +def test_floating_array_disallows_Float16_dtype(request): + # GH#44715 + if np_version_under1p20: + # https://github.com/numpy/numpy/issues/20512 + mark = pytest.mark.xfail(reason="numpy does not raise on np.dtype('Float16')") + request.node.add_marker(mark) + + with pytest.raises(TypeError, match="data type 'Float16' not understood"): + pd.array([1.0, 2.0], dtype="Float16") + + +def test_floating_array_constructor_copy(): + values = np.array([1, 2, 3, 4], dtype="float64") + mask = np.array([False, False, False, True], dtype="bool") + + result = FloatingArray(values, mask) + assert result._data is values + assert result._mask is mask + + result = FloatingArray(values, mask, copy=True) + assert result._data is not values + assert result._mask is not mask + + +def test_to_array(): + result = pd.array([0.1, 0.2, 0.3, 0.4]) + expected = pd.array([0.1, 0.2, 0.3, 0.4], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "a, b", + [ + ([1, None], [1, pd.NA]), + ([None], [pd.NA]), + ([None, np.nan], [pd.NA, pd.NA]), + ([1, np.nan], [1, pd.NA]), + ([np.nan], [pd.NA]), + ], +) +def test_to_array_none_is_nan(a, b): + result = pd.array(a, dtype="Float64") + expected = pd.array(b, dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + +def test_to_array_mixed_integer_float(): + result = pd.array([1, 2.0]) + expected = pd.array([1.0, 2.0], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + result = pd.array([1, None, 2.0]) + expected = pd.array([1.0, None, 2.0], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + ["foo", "bar"], + ["1", "2"], + "foo", + 1, + 1.0, + pd.date_range("20130101", periods=2), + np.array(["foo"]), + [[1, 2], [3, 4]], + [np.nan, {"a": 1}], + # GH#44514 all-NA case used to get quietly swapped out before checking ndim + np.array([pd.NA] * 6, dtype=object).reshape(3, 2), + ], +) +def test_to_array_error(values): + # error in converting existing arrays to FloatingArray + msg = "|".join( + [ + "cannot be converted to a FloatingDtype", + "values must be a 1D list-like", + "Cannot pass scalar", + ] + ) + with pytest.raises((TypeError, ValueError), match=msg): + pd.array(values, dtype="Float64") + + +def test_to_array_inferred_dtype(): + # if values has dtype -> respect it + result = pd.array(np.array([1, 2], dtype="float32")) + assert result.dtype == Float32Dtype() + + # if values have no dtype -> always float64 + result = pd.array([1.0, 2.0]) + assert result.dtype == Float64Dtype() + + +def test_to_array_dtype_keyword(): + result = pd.array([1, 2], dtype="Float32") + assert result.dtype == Float32Dtype() + + # if values has dtype -> override it + result = pd.array(np.array([1, 2], dtype="float32"), dtype="Float64") + assert result.dtype == Float64Dtype() + + +def test_to_array_integer(): + result = pd.array([1, 2], dtype="Float64") + expected = pd.array([1.0, 2.0], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + # for integer dtypes, the itemsize is not preserved + # TODO can we specify "floating" in general? + result = pd.array(np.array([1, 2], dtype="int32"), dtype="Float64") + assert result.dtype == Float64Dtype() + + +@pytest.mark.parametrize( + "bool_values, values, target_dtype, expected_dtype", + [ + ([False, True], [0, 1], Float64Dtype(), Float64Dtype()), + ([False, True], [0, 1], "Float64", Float64Dtype()), + ([False, True, np.nan], [0, 1, np.nan], Float64Dtype(), Float64Dtype()), + ], +) +def test_to_array_bool(bool_values, values, target_dtype, expected_dtype): + result = pd.array(bool_values, dtype=target_dtype) + assert result.dtype == expected_dtype + expected = pd.array(values, dtype=target_dtype) + tm.assert_extension_array_equal(result, expected) + + +def test_series_from_float(data): + # construct from our dtype & string dtype + dtype = data.dtype + + # from float + expected = pd.Series(data) + result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype)) + tm.assert_series_equal(result, expected) + + # from list + expected = pd.Series(data) + result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_function.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_function.py new file mode 100644 index 0000000..fbdf419 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_function.py @@ -0,0 +1,192 @@ +import numpy as np +import pytest + +from pandas.compat import IS64 + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize("ufunc", [np.abs, np.sign]) +# np.sign emits a warning with nans, +@pytest.mark.filterwarnings("ignore:invalid value encountered in sign") +def test_ufuncs_single(ufunc): + a = pd.array([1, 2, -3, np.nan], dtype="Float64") + result = ufunc(a) + expected = pd.array(ufunc(a.astype(float)), dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = ufunc(s) + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt]) +def test_ufuncs_single_float(ufunc): + a = pd.array([1.0, 0.2, 3.0, np.nan], dtype="Float64") + with np.errstate(invalid="ignore"): + result = ufunc(a) + expected = pd.array(ufunc(a.astype(float)), dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + with np.errstate(invalid="ignore"): + result = ufunc(s) + expected = pd.Series(ufunc(s.astype(float)), dtype="Float64") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.add, np.subtract]) +def test_ufuncs_binary_float(ufunc): + # two FloatingArrays + a = pd.array([1, 0.2, -3, np.nan], dtype="Float64") + result = ufunc(a, a) + expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + # FloatingArray with numpy array + arr = np.array([1, 2, 3, 4]) + result = ufunc(a, arr) + expected = pd.array(ufunc(a.astype(float), arr), dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + result = ufunc(arr, a) + expected = pd.array(ufunc(arr, a.astype(float)), dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + # FloatingArray with scalar + result = ufunc(a, 1) + expected = pd.array(ufunc(a.astype(float), 1), dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + result = ufunc(1, a) + expected = pd.array(ufunc(1, a.astype(float)), dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("values", [[0, 1], [0, None]]) +def test_ufunc_reduce_raises(values): + arr = pd.array(values, dtype="Float64") + + res = np.add.reduce(arr) + expected = arr.sum(skipna=False) + tm.assert_almost_equal(res, expected) + + +@pytest.mark.skipif(not IS64, reason="GH 36579: fail on 32-bit system") +@pytest.mark.parametrize( + "pandasmethname, kwargs", + [ + ("var", {"ddof": 0}), + ("var", {"ddof": 1}), + ("kurtosis", {}), + ("skew", {}), + ("sem", {}), + ], +) +def test_stat_method(pandasmethname, kwargs): + s = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, np.nan, np.nan], dtype="Float64") + pandasmeth = getattr(s, pandasmethname) + result = pandasmeth(**kwargs) + s2 = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype="float64") + pandasmeth = getattr(s2, pandasmethname) + expected = pandasmeth(**kwargs) + assert expected == result + + +def test_value_counts_na(): + arr = pd.array([0.1, 0.2, 0.1, pd.NA], dtype="Float64") + result = arr.value_counts(dropna=False) + idx = pd.Index([0.1, 0.2, pd.NA], dtype=arr.dtype) + assert idx.dtype == arr.dtype + expected = pd.Series([2, 1, 1], index=idx, dtype="Int64") + tm.assert_series_equal(result, expected) + + result = arr.value_counts(dropna=True) + expected = pd.Series([2, 1], index=idx[:-1], dtype="Int64") + tm.assert_series_equal(result, expected) + + +def test_value_counts_empty(): + ser = pd.Series([], dtype="Float64") + result = ser.value_counts() + idx = pd.Index([], dtype="Float64") + assert idx.dtype == "Float64" + expected = pd.Series([], index=idx, dtype="Int64") + tm.assert_series_equal(result, expected) + + +def test_value_counts_with_normalize(): + ser = pd.Series([0.1, 0.2, 0.1, pd.NA], dtype="Float64") + result = ser.value_counts(normalize=True) + expected = pd.Series([2, 1], index=ser[:2], dtype="Float64") / 3 + assert expected.index.dtype == ser.dtype + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("min_count", [0, 4]) +def test_floating_array_sum(skipna, min_count, dtype): + arr = pd.array([1, 2, 3, None], dtype=dtype) + result = arr.sum(skipna=skipna, min_count=min_count) + if skipna and min_count == 0: + assert result == 6.0 + else: + assert result is pd.NA + + +@pytest.mark.parametrize( + "values, expected", [([1, 2, 3], 6.0), ([1, 2, 3, None], 6.0), ([None], 0.0)] +) +def test_floating_array_numpy_sum(values, expected): + arr = pd.array(values, dtype="Float64") + result = np.sum(arr) + assert result == expected + + +@pytest.mark.parametrize("op", ["sum", "min", "max", "prod"]) +def test_preserve_dtypes(op): + df = pd.DataFrame( + { + "A": ["a", "b", "b"], + "B": [1, None, 3], + "C": pd.array([0.1, None, 3.0], dtype="Float64"), + } + ) + + # op + result = getattr(df.C, op)() + assert isinstance(result, np.float64) + + # groupby + result = getattr(df.groupby("A"), op)() + + expected = pd.DataFrame( + {"B": np.array([1.0, 3.0]), "C": pd.array([0.1, 3], dtype="Float64")}, + index=pd.Index(["a", "b"], name="A"), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("method", ["min", "max"]) +def test_floating_array_min_max(skipna, method, dtype): + arr = pd.array([0.0, 1.0, None], dtype=dtype) + func = getattr(arr, method) + result = func(skipna=skipna) + if skipna: + assert result == (0 if method == "min" else 1) + else: + assert result is pd.NA + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("min_count", [0, 9]) +def test_floating_array_prod(skipna, min_count, dtype): + arr = pd.array([1.0, 2.0, None], dtype=dtype) + result = arr.prod(skipna=skipna, min_count=min_count) + if skipna and min_count == 0: + assert result == 2 + else: + assert result is pd.NA diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_repr.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_repr.py new file mode 100644 index 0000000..a8868fd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_repr.py @@ -0,0 +1,48 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) + + +def test_dtypes(dtype): + # smoke tests on auto dtype construction + + np.dtype(dtype.type).kind == "f" + assert dtype.name is not None + + +@pytest.mark.parametrize( + "dtype, expected", + [(Float32Dtype(), "Float32Dtype()"), (Float64Dtype(), "Float64Dtype()")], +) +def test_repr_dtype(dtype, expected): + assert repr(dtype) == expected + + +def test_repr_array(): + result = repr(pd.array([1.0, None, 3.0])) + expected = "\n[1.0, , 3.0]\nLength: 3, dtype: Float64" + assert result == expected + + +def test_repr_array_long(): + data = pd.array([1.0, 2.0, None] * 1000) + expected = """ +[ 1.0, 2.0, , 1.0, 2.0, , 1.0, 2.0, , 1.0, + ... + , 1.0, 2.0, , 1.0, 2.0, , 1.0, 2.0, ] +Length: 3000, dtype: Float64""" + result = repr(data) + assert result == expected + + +def test_frame_repr(data_missing): + + df = pd.DataFrame({"A": data_missing}) + result = repr(df) + expected = " A\n0 \n1 0.1" + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_to_numpy.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_to_numpy.py new file mode 100644 index 0000000..26e5687 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/floating/test_to_numpy.py @@ -0,0 +1,132 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import FloatingArray + + +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy(box): + con = pd.Series if box else pd.array + + # default (with or without missing values) -> object dtype + arr = con([0.1, 0.2, 0.3], dtype="Float64") + result = arr.to_numpy() + expected = np.array([0.1, 0.2, 0.3], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + arr = con([0.1, 0.2, None], dtype="Float64") + result = arr.to_numpy() + expected = np.array([0.1, 0.2, pd.NA], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy_float(box): + con = pd.Series if box else pd.array + + # no missing values -> can convert to float, otherwise raises + arr = con([0.1, 0.2, 0.3], dtype="Float64") + result = arr.to_numpy(dtype="float64") + expected = np.array([0.1, 0.2, 0.3], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + arr = con([0.1, 0.2, None], dtype="Float64") + with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"): + result = arr.to_numpy(dtype="float64") + + # need to explicitly specify na_value + result = arr.to_numpy(dtype="float64", na_value=np.nan) + expected = np.array([0.1, 0.2, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy_int(box): + con = pd.Series if box else pd.array + + # no missing values -> can convert to int, otherwise raises + arr = con([1.0, 2.0, 3.0], dtype="Float64") + result = arr.to_numpy(dtype="int64") + expected = np.array([1, 2, 3], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + arr = con([1.0, 2.0, None], dtype="Float64") + with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"): + result = arr.to_numpy(dtype="int64") + + # automatic casting (floors the values) + arr = con([0.1, 0.9, 1.1], dtype="Float64") + result = arr.to_numpy(dtype="int64") + expected = np.array([0, 0, 1], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy_na_value(box): + con = pd.Series if box else pd.array + + arr = con([0.0, 1.0, None], dtype="Float64") + result = arr.to_numpy(dtype=object, na_value=None) + expected = np.array([0.0, 1.0, None], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype=bool, na_value=False) + expected = np.array([False, True, False], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype="int64", na_value=-99) + expected = np.array([0, 1, -99], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + +def test_to_numpy_na_value_with_nan(): + # array with both NaN and NA -> only fill NA with `na_value` + arr = FloatingArray(np.array([0.0, np.nan, 0.0]), np.array([False, False, True])) + result = arr.to_numpy(dtype="float64", na_value=-1) + expected = np.array([0.0, np.nan, -1.0], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["float64", "float32", "int32", "int64", "bool"]) +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy_dtype(box, dtype): + con = pd.Series if box else pd.array + arr = con([0.0, 1.0], dtype="Float64") + + result = arr.to_numpy(dtype=dtype) + expected = np.array([0, 1], dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["float64", "float32", "int32", "int64", "bool"]) +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy_na_raises(box, dtype): + con = pd.Series if box else pd.array + arr = con([0.0, 1.0, None], dtype="Float64") + with pytest.raises(ValueError, match=dtype): + arr.to_numpy(dtype=dtype) + + +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy_string(box, dtype): + con = pd.Series if box else pd.array + arr = con([0.0, 1.0, None], dtype="Float64") + + result = arr.to_numpy(dtype="str") + expected = np.array([0.0, 1.0, pd.NA], dtype=" unmask it + elif all_arithmetic_operators == "__rmod__": + mask = (s == 0).fillna(False).to_numpy(bool) + expected.array._mask[mask] = False + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("other", [1.0, np.array(1.0)]) +def test_arithmetic_conversion(all_arithmetic_operators, other): + # if we have a float operand we should have a float result + # if that is equal to an integer + op = tm.get_op_from_name(all_arithmetic_operators) + + s = pd.Series([1, 2, 3], dtype="Int64") + result = op(s, other) + assert result.dtype == "Float64" + + +def test_cross_type_arithmetic(): + + df = pd.DataFrame( + { + "A": pd.Series([1, 2, np.nan], dtype="Int64"), + "B": pd.Series([1, np.nan, 3], dtype="UInt8"), + "C": [1, 2, 3], + } + ) + + result = df.A + df.C + expected = pd.Series([2, 4, np.nan], dtype="Int64") + tm.assert_series_equal(result, expected) + + result = (df.A + df.C) * 3 == 12 + expected = pd.Series([False, True, None], dtype="boolean") + tm.assert_series_equal(result, expected) + + result = df.A + df.B + expected = pd.Series([2, np.nan, np.nan], dtype="Int64") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("op", ["mean"]) +def test_reduce_to_float(op): + # some reduce ops always return float, even if the result + # is a rounded number + df = pd.DataFrame( + { + "A": ["a", "b", "b"], + "B": [1, None, 3], + "C": pd.array([1, None, 3], dtype="Int64"), + } + ) + + # op + result = getattr(df.C, op)() + assert isinstance(result, float) + + # groupby + result = getattr(df.groupby("A"), op)() + + expected = pd.DataFrame( + {"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Float64")}, + index=pd.Index(["a", "b"], name="A"), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "source, neg_target, abs_target", + [ + ([1, 2, 3], [-1, -2, -3], [1, 2, 3]), + ([1, 2, None], [-1, -2, None], [1, 2, None]), + ([-1, 0, 1], [1, 0, -1], [1, 0, 1]), + ], +) +def test_unary_int_operators(any_signed_int_ea_dtype, source, neg_target, abs_target): + dtype = any_signed_int_ea_dtype + arr = pd.array(source, dtype=dtype) + neg_result, pos_result, abs_result = -arr, +arr, abs(arr) + neg_target = pd.array(neg_target, dtype=dtype) + abs_target = pd.array(abs_target, dtype=dtype) + + tm.assert_extension_array_equal(neg_result, neg_target) + tm.assert_extension_array_equal(pos_result, arr) + assert not tm.shares_memory(pos_result, arr) + tm.assert_extension_array_equal(abs_result, abs_target) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_comparison.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_comparison.py new file mode 100644 index 0000000..3bbf686 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_comparison.py @@ -0,0 +1,38 @@ +import pytest + +import pandas as pd +from pandas.tests.arrays.masked_shared import ( + ComparisonOps, + NumericOps, +) + + +class TestComparisonOps(NumericOps, ComparisonOps): + @pytest.mark.parametrize("other", [True, False, pd.NA, -1, 0, 1]) + def test_scalar(self, other, comparison_op, dtype): + ComparisonOps.test_scalar(self, other, comparison_op, dtype) + + def test_compare_to_int(self, dtype, comparison_op): + # GH 28930 + op_name = f"__{comparison_op.__name__}__" + s1 = pd.Series([1, None, 3], dtype=dtype) + s2 = pd.Series([1, None, 3], dtype="float") + + method = getattr(s1, op_name) + result = method(2) + + method = getattr(s2, op_name) + expected = method(2).astype("boolean") + expected[s2.isna()] = pd.NA + + self.assert_series_equal(result, expected) + + +def test_equals(): + # GH-30652 + # equals is generally tested in /tests/extension/base/methods, but this + # specifically tests that two arrays of the same class but different dtype + # do not evaluate equal + a1 = pd.array([1, 2, None], dtype="Int64") + a2 = pd.array([1, 2, None], dtype="Int32") + assert a1.equals(a2) is False diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_concat.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_concat.py new file mode 100644 index 0000000..2e8ef50 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_concat.py @@ -0,0 +1,65 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "to_concat_dtypes, result_dtype", + [ + (["Int64", "Int64"], "Int64"), + (["UInt64", "UInt64"], "UInt64"), + (["Int8", "Int8"], "Int8"), + (["Int8", "Int16"], "Int16"), + (["UInt8", "Int8"], "Int16"), + (["Int32", "UInt32"], "Int64"), + (["Int64", "UInt64"], "Float64"), + (["Int64", "boolean"], "Int64"), + (["UInt8", "boolean"], "UInt8"), + ], +) +def test_concat_series(to_concat_dtypes, result_dtype): + + result = pd.concat([pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes]) + expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype( + result_dtype + ) + tm.assert_series_equal(result, expected) + + # order doesn't matter for result + result = pd.concat( + [pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes[::-1]] + ) + expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype( + result_dtype + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "to_concat_dtypes, result_dtype", + [ + (["Int64", "int64"], "Int64"), + (["UInt64", "uint64"], "UInt64"), + (["Int8", "int8"], "Int8"), + (["Int8", "int16"], "Int16"), + (["UInt8", "int8"], "Int16"), + (["Int32", "uint32"], "Int64"), + (["Int64", "uint64"], "Float64"), + (["Int64", "bool"], "Int64"), + (["UInt8", "bool"], "UInt8"), + ], +) +def test_concat_series_with_numpy(to_concat_dtypes, result_dtype): + + s1 = pd.Series([0, 1, pd.NA], dtype=to_concat_dtypes[0]) + s2 = pd.Series(np.array([0, 1], dtype=to_concat_dtypes[1])) + result = pd.concat([s1, s2], ignore_index=True) + expected = pd.Series([0, 1, pd.NA, 0, 1], dtype=object).astype(result_dtype) + tm.assert_series_equal(result, expected) + + # order doesn't matter for result + result = pd.concat([s2, s1], ignore_index=True) + expected = pd.Series([0, 1, 0, 1, pd.NA], dtype=object).astype(result_dtype) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_construction.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_construction.py new file mode 100644 index 0000000..e5fd497 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_construction.py @@ -0,0 +1,231 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import is_integer +from pandas.core.arrays import IntegerArray +from pandas.core.arrays.integer import ( + Int8Dtype, + Int32Dtype, + Int64Dtype, +) + + +@pytest.fixture(params=[pd.array, IntegerArray._from_sequence]) +def constructor(request): + return request.param + + +def test_uses_pandas_na(): + a = pd.array([1, None], dtype=Int64Dtype()) + assert a[1] is pd.NA + + +def test_from_dtype_from_float(data): + # construct from our dtype & string dtype + dtype = data.dtype + + # from float + expected = pd.Series(data) + result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype)) + tm.assert_series_equal(result, expected) + + # from int / list + expected = pd.Series(data) + result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) + tm.assert_series_equal(result, expected) + + # from int / array + expected = pd.Series(data).dropna().reset_index(drop=True) + dropped = np.array(data.dropna()).astype(np.dtype(dtype.type)) + result = pd.Series(dropped, dtype=str(dtype)) + tm.assert_series_equal(result, expected) + + +def test_conversions(data_missing): + # astype to object series + df = pd.DataFrame({"A": data_missing}) + result = df["A"].astype("object") + expected = pd.Series(np.array([np.nan, 1], dtype=object), name="A") + tm.assert_series_equal(result, expected) + + # convert to object ndarray + # we assert that we are exactly equal + # including type conversions of scalars + result = df["A"].astype("object").values + expected = np.array([pd.NA, 1], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + for r, e in zip(result, expected): + if pd.isnull(r): + assert pd.isnull(e) + elif is_integer(r): + assert r == e + assert is_integer(e) + else: + assert r == e + assert type(r) == type(e) + + +def test_integer_array_constructor(): + values = np.array([1, 2, 3, 4], dtype="int64") + mask = np.array([False, False, False, True], dtype="bool") + + result = IntegerArray(values, mask) + expected = pd.array([1, 2, 3, np.nan], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + msg = r".* should be .* numpy array. Use the 'pd.array' function instead" + with pytest.raises(TypeError, match=msg): + IntegerArray(values.tolist(), mask) + + with pytest.raises(TypeError, match=msg): + IntegerArray(values, mask.tolist()) + + with pytest.raises(TypeError, match=msg): + IntegerArray(values.astype(float), mask) + msg = r"__init__\(\) missing 1 required positional argument: 'mask'" + with pytest.raises(TypeError, match=msg): + IntegerArray(values) + + +def test_integer_array_constructor_copy(): + values = np.array([1, 2, 3, 4], dtype="int64") + mask = np.array([False, False, False, True], dtype="bool") + + result = IntegerArray(values, mask) + assert result._data is values + assert result._mask is mask + + result = IntegerArray(values, mask, copy=True) + assert result._data is not values + assert result._mask is not mask + + +@pytest.mark.parametrize( + "a, b", + [ + ([1, None], [1, np.nan]), + ([None], [np.nan]), + ([None, np.nan], [np.nan, np.nan]), + ([np.nan, np.nan], [np.nan, np.nan]), + ], +) +def test_to_integer_array_none_is_nan(a, b): + result = pd.array(a, dtype="Int64") + expected = pd.array(b, dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + ["foo", "bar"], + "foo", + 1, + 1.0, + pd.date_range("20130101", periods=2), + np.array(["foo"]), + [[1, 2], [3, 4]], + [np.nan, {"a": 1}], + ], +) +def test_to_integer_array_error(values): + # error in converting existing arrays to IntegerArrays + msg = "|".join( + [ + r"cannot be converted to an IntegerDtype", + r"invalid literal for int\(\) with base 10:", + r"values must be a 1D list-like", + r"Cannot pass scalar", + ] + ) + with pytest.raises((ValueError, TypeError), match=msg): + pd.array(values, dtype="Int64") + + with pytest.raises((ValueError, TypeError), match=msg): + IntegerArray._from_sequence(values) + + +def test_to_integer_array_inferred_dtype(constructor): + # if values has dtype -> respect it + result = constructor(np.array([1, 2], dtype="int8")) + assert result.dtype == Int8Dtype() + result = constructor(np.array([1, 2], dtype="int32")) + assert result.dtype == Int32Dtype() + + # if values have no dtype -> always int64 + result = constructor([1, 2]) + assert result.dtype == Int64Dtype() + + +def test_to_integer_array_dtype_keyword(constructor): + result = constructor([1, 2], dtype="Int8") + assert result.dtype == Int8Dtype() + + # if values has dtype -> override it + result = constructor(np.array([1, 2], dtype="int8"), dtype="Int32") + assert result.dtype == Int32Dtype() + + +def test_to_integer_array_float(): + result = IntegerArray._from_sequence([1.0, 2.0]) + expected = pd.array([1, 2], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + with pytest.raises(TypeError, match="cannot safely cast non-equivalent"): + IntegerArray._from_sequence([1.5, 2.0]) + + # for float dtypes, the itemsize is not preserved + result = IntegerArray._from_sequence(np.array([1.0, 2.0], dtype="float32")) + assert result.dtype == Int64Dtype() + + +def test_to_integer_array_str(): + result = IntegerArray._from_sequence(["1", "2", None]) + expected = pd.array([1, 2, np.nan], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + with pytest.raises( + ValueError, match=r"invalid literal for int\(\) with base 10: .*" + ): + IntegerArray._from_sequence(["1", "2", ""]) + + with pytest.raises( + ValueError, match=r"invalid literal for int\(\) with base 10: .*" + ): + IntegerArray._from_sequence(["1.5", "2.0"]) + + +@pytest.mark.parametrize( + "bool_values, int_values, target_dtype, expected_dtype", + [ + ([False, True], [0, 1], Int64Dtype(), Int64Dtype()), + ([False, True], [0, 1], "Int64", Int64Dtype()), + ([False, True, np.nan], [0, 1, np.nan], Int64Dtype(), Int64Dtype()), + ], +) +def test_to_integer_array_bool( + constructor, bool_values, int_values, target_dtype, expected_dtype +): + result = constructor(bool_values, dtype=target_dtype) + assert result.dtype == expected_dtype + expected = pd.array(int_values, dtype=target_dtype) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values, to_dtype, result_dtype", + [ + (np.array([1], dtype="int64"), None, Int64Dtype), + (np.array([1, np.nan]), None, Int64Dtype), + (np.array([1, np.nan]), "int8", Int8Dtype), + ], +) +def test_to_integer_array(values, to_dtype, result_dtype): + # convert existing arrays to IntegerArrays + result = IntegerArray._from_sequence(values, dtype=to_dtype) + assert result.dtype == result_dtype() + expected = pd.array(values, dtype=result_dtype()) + tm.assert_extension_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_dtypes.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_dtypes.py new file mode 100644 index 0000000..3911b7f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_dtypes.py @@ -0,0 +1,297 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.generic import ABCIndex + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.integer import ( + Int8Dtype, + UInt32Dtype, +) + + +def test_dtypes(dtype): + # smoke tests on auto dtype construction + + if dtype.is_signed_integer: + assert np.dtype(dtype.type).kind == "i" + else: + assert np.dtype(dtype.type).kind == "u" + assert dtype.name is not None + + +@pytest.mark.parametrize("op", ["sum", "min", "max", "prod"]) +def test_preserve_dtypes(op): + # TODO(#22346): preserve Int64 dtype + # for ops that enable (mean would actually work here + # but generally it is a float return value) + df = pd.DataFrame( + { + "A": ["a", "b", "b"], + "B": [1, None, 3], + "C": pd.array([1, None, 3], dtype="Int64"), + } + ) + + # op + result = getattr(df.C, op)() + if op in {"sum", "prod", "min", "max"}: + assert isinstance(result, np.int64) + else: + assert isinstance(result, int) + + # groupby + result = getattr(df.groupby("A"), op)() + + expected = pd.DataFrame( + {"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Int64")}, + index=pd.Index(["a", "b"], name="A"), + ) + tm.assert_frame_equal(result, expected) + + +def test_astype_nansafe(): + # see gh-22343 + arr = pd.array([np.nan, 1, 2], dtype="Int8") + msg = "cannot convert to 'uint32'-dtype NumPy array with missing values." + + with pytest.raises(ValueError, match=msg): + arr.astype("uint32") + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_construct_index(all_data, dropna): + # ensure that we do not coerce to Float64Index, rather + # keep as Index + + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Index(pd.array(other, dtype=all_data.dtype)) + expected = pd.Index(other, dtype=all_data.dtype) + assert all_data.dtype == expected.dtype # dont coerce to object + + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_astype_index(all_data, dropna): + # as an int/uint index to Index + + all_data = all_data[:10] + if dropna: + other = all_data[~all_data.isna()] + else: + other = all_data + + dtype = all_data.dtype + idx = pd.Index._with_infer(np.array(other)) + assert isinstance(idx, ABCIndex) + + result = idx.astype(dtype) + expected = idx.astype(object).astype(dtype) + tm.assert_index_equal(result, expected) + + +def test_astype(all_data): + all_data = all_data[:10] + + ints = all_data[~all_data.isna()] + mixed = all_data + dtype = Int8Dtype() + + # coerce to same type - ints + s = pd.Series(ints) + result = s.astype(all_data.dtype) + expected = pd.Series(ints) + tm.assert_series_equal(result, expected) + + # coerce to same other - ints + s = pd.Series(ints) + result = s.astype(dtype) + expected = pd.Series(ints, dtype=dtype) + tm.assert_series_equal(result, expected) + + # coerce to same numpy_dtype - ints + s = pd.Series(ints) + result = s.astype(all_data.dtype.numpy_dtype) + expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype)) + tm.assert_series_equal(result, expected) + + # coerce to same type - mixed + s = pd.Series(mixed) + result = s.astype(all_data.dtype) + expected = pd.Series(mixed) + tm.assert_series_equal(result, expected) + + # coerce to same other - mixed + s = pd.Series(mixed) + result = s.astype(dtype) + expected = pd.Series(mixed, dtype=dtype) + tm.assert_series_equal(result, expected) + + # coerce to same numpy_dtype - mixed + s = pd.Series(mixed) + msg = r"cannot convert to .*-dtype NumPy array with missing values.*" + with pytest.raises(ValueError, match=msg): + s.astype(all_data.dtype.numpy_dtype) + + # coerce to object + s = pd.Series(mixed) + result = s.astype("object") + expected = pd.Series(np.asarray(mixed)) + tm.assert_series_equal(result, expected) + + +def test_astype_copy(): + arr = pd.array([1, 2, 3, None], dtype="Int64") + orig = pd.array([1, 2, 3, None], dtype="Int64") + + # copy=True -> ensure both data and mask are actual copies + result = arr.astype("Int64", copy=True) + assert result is not arr + assert not tm.shares_memory(result, arr) + result[0] = 10 + tm.assert_extension_array_equal(arr, orig) + result[0] = pd.NA + tm.assert_extension_array_equal(arr, orig) + + # copy=False + result = arr.astype("Int64", copy=False) + assert result is arr + assert np.shares_memory(result._data, arr._data) + assert np.shares_memory(result._mask, arr._mask) + result[0] = 10 + assert arr[0] == 10 + result[0] = pd.NA + assert arr[0] is pd.NA + + # astype to different dtype -> always needs a copy -> even with copy=False + # we need to ensure that also the mask is actually copied + arr = pd.array([1, 2, 3, None], dtype="Int64") + orig = pd.array([1, 2, 3, None], dtype="Int64") + + result = arr.astype("Int32", copy=False) + assert not tm.shares_memory(result, arr) + result[0] = 10 + tm.assert_extension_array_equal(arr, orig) + result[0] = pd.NA + tm.assert_extension_array_equal(arr, orig) + + +def test_astype_to_larger_numpy(): + a = pd.array([1, 2], dtype="Int32") + result = a.astype("int64") + expected = np.array([1, 2], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + a = pd.array([1, 2], dtype="UInt32") + result = a.astype("uint64") + expected = np.array([1, 2], dtype="uint64") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"]) +def test_astype_specific_casting(dtype): + s = pd.Series([1, 2, 3], dtype="Int64") + result = s.astype(dtype) + expected = pd.Series([1, 2, 3], dtype=dtype) + tm.assert_series_equal(result, expected) + + s = pd.Series([1, 2, 3, None], dtype="Int64") + result = s.astype(dtype) + expected = pd.Series([1, 2, 3, None], dtype=dtype) + tm.assert_series_equal(result, expected) + + +def test_astype_floating(): + arr = pd.array([1, 2, None], dtype="Int64") + result = arr.astype("Float64") + expected = pd.array([1.0, 2.0, None], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + +def test_astype_dt64(): + # GH#32435 + arr = pd.array([1, 2, 3, pd.NA]) * 10 ** 9 + + result = arr.astype("datetime64[ns]") + + expected = np.array([1, 2, 3, "NaT"], dtype="M8[s]").astype("M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + +def test_construct_cast_invalid(dtype): + + msg = "cannot safely" + arr = [1.2, 2.3, 3.7] + with pytest.raises(TypeError, match=msg): + pd.array(arr, dtype=dtype) + + with pytest.raises(TypeError, match=msg): + pd.Series(arr).astype(dtype) + + arr = [1.2, 2.3, 3.7, np.nan] + with pytest.raises(TypeError, match=msg): + pd.array(arr, dtype=dtype) + + with pytest.raises(TypeError, match=msg): + pd.Series(arr).astype(dtype) + + +@pytest.mark.parametrize("in_series", [True, False]) +def test_to_numpy_na_nan(in_series): + a = pd.array([0, 1, None], dtype="Int64") + if in_series: + a = pd.Series(a) + + result = a.to_numpy(dtype="float64", na_value=np.nan) + expected = np.array([0.0, 1.0, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + result = a.to_numpy(dtype="int64", na_value=-1) + expected = np.array([0, 1, -1], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + result = a.to_numpy(dtype="bool", na_value=False) + expected = np.array([False, True, False], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("in_series", [True, False]) +@pytest.mark.parametrize("dtype", ["int32", "int64", "bool"]) +def test_to_numpy_dtype(dtype, in_series): + a = pd.array([0, 1], dtype="Int64") + if in_series: + a = pd.Series(a) + + result = a.to_numpy(dtype=dtype) + expected = np.array([0, 1], dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["float64", "int64", "bool"]) +def test_to_numpy_na_raises(dtype): + a = pd.array([0, 1, None], dtype="Int64") + with pytest.raises(ValueError, match=dtype): + a.to_numpy(dtype=dtype) + + +def test_astype_str(): + a = pd.array([1, 2, None], dtype="Int64") + expected = np.array(["1", "2", ""], dtype=" +@pytest.mark.filterwarnings("ignore:invalid value encountered in sign") +def test_ufuncs_single_int(ufunc): + a = pd.array([1, 2, -3, np.nan]) + result = ufunc(a) + expected = pd.array(ufunc(a.astype(float)), dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = ufunc(s) + expected = pd.Series(pd.array(ufunc(a.astype(float)), dtype="Int64")) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt]) +def test_ufuncs_single_float(ufunc): + a = pd.array([1, 2, -3, np.nan]) + with np.errstate(invalid="ignore"): + result = ufunc(a) + expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask) + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + with np.errstate(invalid="ignore"): + result = ufunc(s) + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.add, np.subtract]) +def test_ufuncs_binary_int(ufunc): + # two IntegerArrays + a = pd.array([1, 2, -3, np.nan]) + result = ufunc(a, a) + expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + # IntegerArray with numpy array + arr = np.array([1, 2, 3, 4]) + result = ufunc(a, arr) + expected = pd.array(ufunc(a.astype(float), arr), dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = ufunc(arr, a) + expected = pd.array(ufunc(arr, a.astype(float)), dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + # IntegerArray with scalar + result = ufunc(a, 1) + expected = pd.array(ufunc(a.astype(float), 1), dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = ufunc(1, a) + expected = pd.array(ufunc(1, a.astype(float)), dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + +def test_ufunc_binary_output(): + a = pd.array([1, 2, np.nan]) + result = np.modf(a) + expected = np.modf(a.to_numpy(na_value=np.nan, dtype="float")) + expected = (pd.array(expected[0]), pd.array(expected[1])) + + assert isinstance(result, tuple) + assert len(result) == 2 + + for x, y in zip(result, expected): + tm.assert_extension_array_equal(x, y) + + +@pytest.mark.parametrize("values", [[0, 1], [0, None]]) +def test_ufunc_reduce_raises(values): + arr = pd.array(values) + + res = np.add.reduce(arr) + expected = arr.sum(skipna=False) + tm.assert_almost_equal(res, expected) + + +@pytest.mark.parametrize( + "pandasmethname, kwargs", + [ + ("var", {"ddof": 0}), + ("var", {"ddof": 1}), + ("kurtosis", {}), + ("skew", {}), + ("sem", {}), + ], +) +def test_stat_method(pandasmethname, kwargs): + s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") + pandasmeth = getattr(s, pandasmethname) + result = pandasmeth(**kwargs) + s2 = pd.Series(data=[1, 2, 3, 4, 5, 6], dtype="Int64") + pandasmeth = getattr(s2, pandasmethname) + expected = pandasmeth(**kwargs) + assert expected == result + + +def test_value_counts_na(): + arr = pd.array([1, 2, 1, pd.NA], dtype="Int64") + result = arr.value_counts(dropna=False) + ex_index = pd.Index([1, 2, pd.NA], dtype="Int64") + assert ex_index.dtype == "Int64" + expected = pd.Series([2, 1, 1], index=ex_index, dtype="Int64") + tm.assert_series_equal(result, expected) + + result = arr.value_counts(dropna=True) + expected = pd.Series([2, 1], index=arr[:2], dtype="Int64") + assert expected.index.dtype == arr.dtype + tm.assert_series_equal(result, expected) + + +def test_value_counts_empty(): + # https://github.com/pandas-dev/pandas/issues/33317 + ser = pd.Series([], dtype="Int64") + result = ser.value_counts() + idx = pd.Index([], dtype=ser.dtype) + assert idx.dtype == ser.dtype + expected = pd.Series([], index=idx, dtype="Int64") + tm.assert_series_equal(result, expected) + + +def test_value_counts_with_normalize(): + # GH 33172 + ser = pd.Series([1, 2, 1, pd.NA], dtype="Int64") + result = ser.value_counts(normalize=True) + expected = pd.Series([2, 1], index=ser[:2], dtype="Float64") / 3 + assert expected.index.dtype == ser.dtype + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("min_count", [0, 4]) +def test_integer_array_sum(skipna, min_count, any_int_ea_dtype): + dtype = any_int_ea_dtype + arr = pd.array([1, 2, 3, None], dtype=dtype) + result = arr.sum(skipna=skipna, min_count=min_count) + if skipna and min_count == 0: + assert result == 6 + else: + assert result is pd.NA + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("method", ["min", "max"]) +def test_integer_array_min_max(skipna, method, any_int_ea_dtype): + dtype = any_int_ea_dtype + arr = pd.array([0, 1, None], dtype=dtype) + func = getattr(arr, method) + result = func(skipna=skipna) + if skipna: + assert result == (0 if method == "min" else 1) + else: + assert result is pd.NA + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("min_count", [0, 9]) +def test_integer_array_prod(skipna, min_count, any_int_ea_dtype): + dtype = any_int_ea_dtype + arr = pd.array([1, 2, None], dtype=dtype) + result = arr.prod(skipna=skipna, min_count=min_count) + if skipna and min_count == 0: + assert result == 2 + else: + assert result is pd.NA + + +@pytest.mark.parametrize( + "values, expected", [([1, 2, 3], 6), ([1, 2, 3, None], 6), ([None], 0)] +) +def test_integer_array_numpy_sum(values, expected): + arr = pd.array(values, dtype="Int64") + result = np.sum(arr) + assert result == expected + + +@pytest.mark.parametrize("op", ["sum", "prod", "min", "max"]) +def test_dataframe_reductions(op): + # https://github.com/pandas-dev/pandas/pull/32867 + # ensure the integers are not cast to float during reductions + df = pd.DataFrame({"a": pd.array([1, 2], dtype="Int64")}) + result = df.max() + assert isinstance(result["a"], np.int64) + + +# TODO(jreback) - these need testing / are broken + +# shift + +# set_index (destroys type) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_indexing.py new file mode 100644 index 0000000..4b953d6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_indexing.py @@ -0,0 +1,19 @@ +import pandas as pd +import pandas._testing as tm + + +def test_array_setitem_nullable_boolean_mask(): + # GH 31446 + ser = pd.Series([1, 2], dtype="Int64") + result = ser.where(ser > 1) + expected = pd.Series([pd.NA, 2], dtype="Int64") + tm.assert_series_equal(result, expected) + + +def test_array_setitem(): + # GH 31446 + arr = pd.Series([1, 2], dtype="Int64").array + arr[arr > 1] = 1 + + expected = pd.array([1, 1], dtype="Int64") + tm.assert_extension_array_equal(arr, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_repr.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_repr.py new file mode 100644 index 0000000..35d07bd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/integer/test_repr.py @@ -0,0 +1,68 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas.core.arrays.integer import ( + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, +) + + +def test_dtypes(dtype): + # smoke tests on auto dtype construction + + if dtype.is_signed_integer: + assert np.dtype(dtype.type).kind == "i" + else: + assert np.dtype(dtype.type).kind == "u" + assert dtype.name is not None + + +@pytest.mark.parametrize( + "dtype, expected", + [ + (Int8Dtype(), "Int8Dtype()"), + (Int16Dtype(), "Int16Dtype()"), + (Int32Dtype(), "Int32Dtype()"), + (Int64Dtype(), "Int64Dtype()"), + (UInt8Dtype(), "UInt8Dtype()"), + (UInt16Dtype(), "UInt16Dtype()"), + (UInt32Dtype(), "UInt32Dtype()"), + (UInt64Dtype(), "UInt64Dtype()"), + ], +) +def test_repr_dtype(dtype, expected): + assert repr(dtype) == expected + + +def test_repr_array(): + result = repr(pd.array([1, None, 3])) + expected = "\n[1, , 3]\nLength: 3, dtype: Int64" + assert result == expected + + +def test_repr_array_long(): + data = pd.array([1, 2, None] * 1000) + expected = ( + "\n" + "[ 1, 2, , 1, 2, , 1, 2, , 1,\n" + " ...\n" + " , 1, 2, , 1, 2, , 1, 2, ]\n" + "Length: 3000, dtype: Int64" + ) + result = repr(data) + assert result == expected + + +def test_frame_repr(data_missing): + + df = pd.DataFrame({"A": data_missing}) + result = repr(df) + expected = " A\n0 \n1 1" + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ecdfd4d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..55a4139 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/__pycache__/test_interval.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/__pycache__/test_interval.cpython-38.pyc new file mode 100644 index 0000000..0d821f3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/__pycache__/test_interval.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/__pycache__/test_ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/__pycache__/test_ops.cpython-38.pyc new file mode 100644 index 0000000..b5dbe4b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/__pycache__/test_ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/test_astype.py new file mode 100644 index 0000000..d7a2140 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/test_astype.py @@ -0,0 +1,28 @@ +import pytest + +from pandas import ( + Categorical, + CategoricalDtype, + Index, + IntervalIndex, +) +import pandas._testing as tm + + +class TestAstype: + @pytest.mark.parametrize("ordered", [True, False]) + def test_astype_categorical_retains_ordered(self, ordered): + index = IntervalIndex.from_breaks(range(5)) + arr = index._data + + dtype = CategoricalDtype(None, ordered=ordered) + + expected = Categorical(list(arr), ordered=ordered) + result = arr.astype(dtype) + assert result.ordered is ordered + tm.assert_categorical_equal(result, expected) + + # test IntervalIndex.astype while we're at it. + result = index.astype(dtype) + expected = Index(expected) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/test_interval.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/test_interval.py new file mode 100644 index 0000000..400846c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/test_interval.py @@ -0,0 +1,369 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Index, + Interval, + IntervalIndex, + Timedelta, + Timestamp, + date_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.arrays import IntervalArray + + +@pytest.fixture( + params=[ + (Index([0, 2, 4]), Index([1, 3, 5])), + (Index([0.0, 1.0, 2.0]), Index([1.0, 2.0, 3.0])), + (timedelta_range("0 days", periods=3), timedelta_range("1 day", periods=3)), + (date_range("20170101", periods=3), date_range("20170102", periods=3)), + ( + date_range("20170101", periods=3, tz="US/Eastern"), + date_range("20170102", periods=3, tz="US/Eastern"), + ), + ], + ids=lambda x: str(x[0].dtype), +) +def left_right_dtypes(request): + """ + Fixture for building an IntervalArray from various dtypes + """ + return request.param + + +class TestAttributes: + @pytest.mark.parametrize( + "left, right", + [ + (0, 1), + (Timedelta("0 days"), Timedelta("1 day")), + (Timestamp("2018-01-01"), Timestamp("2018-01-02")), + ( + Timestamp("2018-01-01", tz="US/Eastern"), + Timestamp("2018-01-02", tz="US/Eastern"), + ), + ], + ) + @pytest.mark.parametrize("constructor", [IntervalArray, IntervalIndex]) + def test_is_empty(self, constructor, left, right, closed): + # GH27219 + tuples = [(left, left), (left, right), np.nan] + expected = np.array([closed != "both", False, False]) + result = constructor.from_tuples(tuples, closed=closed).is_empty + tm.assert_numpy_array_equal(result, expected) + + +class TestMethods: + @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) + def test_set_closed(self, closed, new_closed): + # GH 21670 + array = IntervalArray.from_breaks(range(10), closed=closed) + result = array.set_closed(new_closed) + expected = IntervalArray.from_breaks(range(10), closed=new_closed) + tm.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + Interval(0, 1, closed="right"), + IntervalArray.from_breaks([1, 2, 3, 4], closed="right"), + ], + ) + def test_where_raises(self, other): + ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed="left")) + match = "'value.closed' is 'right', expected 'left'." + with pytest.raises(ValueError, match=match): + ser.where([True, False, True], other=other) + + def test_shift(self): + # https://github.com/pandas-dev/pandas/issues/31495 + a = IntervalArray.from_breaks([1, 2, 3]) + result = a.shift() + # int -> float + expected = IntervalArray.from_tuples([(np.nan, np.nan), (1.0, 2.0)]) + tm.assert_interval_array_equal(result, expected) + + def test_shift_datetime(self): + a = IntervalArray.from_breaks(date_range("2000", periods=4)) + result = a.shift(2) + expected = a.take([-1, -1, 0], allow_fill=True) + tm.assert_interval_array_equal(result, expected) + + result = a.shift(-1) + expected = a.take([1, 2, -1], allow_fill=True) + tm.assert_interval_array_equal(result, expected) + + +class TestSetitem: + def test_set_na(self, left_right_dtypes): + left, right = left_right_dtypes + left = left.copy(deep=True) + right = right.copy(deep=True) + result = IntervalArray.from_arrays(left, right) + + if result.dtype.subtype.kind not in ["m", "M"]: + msg = "'value' should be an interval type, got <.*NaTType'> instead." + with pytest.raises(TypeError, match=msg): + result[0] = pd.NaT + if result.dtype.subtype.kind in ["i", "u"]: + msg = "Cannot set float NaN to integer-backed IntervalArray" + with pytest.raises(ValueError, match=msg): + result[0] = np.NaN + return + + result[0] = np.nan + + expected_left = Index([left._na_value] + list(left[1:])) + expected_right = Index([right._na_value] + list(right[1:])) + expected = IntervalArray.from_arrays(expected_left, expected_right) + + tm.assert_extension_array_equal(result, expected) + + def test_setitem_mismatched_closed(self): + arr = IntervalArray.from_breaks(range(4)) + orig = arr.copy() + other = arr.set_closed("both") + + msg = "'value.closed' is 'both', expected 'right'" + with pytest.raises(ValueError, match=msg): + arr[0] = other[0] + with pytest.raises(ValueError, match=msg): + arr[:1] = other[:1] + with pytest.raises(ValueError, match=msg): + arr[:0] = other[:0] + with pytest.raises(ValueError, match=msg): + arr[:] = other[::-1] + with pytest.raises(ValueError, match=msg): + arr[:] = list(other[::-1]) + with pytest.raises(ValueError, match=msg): + arr[:] = other[::-1].astype(object) + with pytest.raises(ValueError, match=msg): + arr[:] = other[::-1].astype("category") + + # empty list should be no-op + arr[:0] = [] + tm.assert_interval_array_equal(arr, orig) + + +def test_repr(): + # GH 25022 + arr = IntervalArray.from_tuples([(0, 1), (1, 2)]) + result = repr(arr) + expected = ( + "\n" + "[(0, 1], (1, 2]]\n" + "Length: 2, dtype: interval[int64, right]" + ) + assert result == expected + + +class TestReductions: + def test_min_max_invalid_axis(self, left_right_dtypes): + left, right = left_right_dtypes + left = left.copy(deep=True) + right = right.copy(deep=True) + arr = IntervalArray.from_arrays(left, right) + + msg = "`axis` must be fewer than the number of dimensions" + for axis in [-2, 1]: + with pytest.raises(ValueError, match=msg): + arr.min(axis=axis) + with pytest.raises(ValueError, match=msg): + arr.max(axis=axis) + + msg = "'>=' not supported between" + with pytest.raises(TypeError, match=msg): + arr.min(axis="foo") + with pytest.raises(TypeError, match=msg): + arr.max(axis="foo") + + def test_min_max(self, left_right_dtypes, index_or_series_or_array): + # GH#44746 + left, right = left_right_dtypes + left = left.copy(deep=True) + right = right.copy(deep=True) + arr = IntervalArray.from_arrays(left, right) + + # The expected results below are only valid if monotonic + assert left.is_monotonic_increasing + assert Index(arr).is_monotonic_increasing + + MIN = arr[0] + MAX = arr[-1] + + indexer = np.arange(len(arr)) + np.random.shuffle(indexer) + arr = arr.take(indexer) + + arr_na = arr.insert(2, np.nan) + + arr = index_or_series_or_array(arr) + arr_na = index_or_series_or_array(arr_na) + + for skipna in [True, False]: + res = arr.min(skipna=skipna) + assert res == MIN + assert type(res) == type(MIN) + + res = arr.max(skipna=skipna) + assert res == MAX + assert type(res) == type(MAX) + + res = arr_na.min(skipna=False) + assert np.isnan(res) + res = arr_na.max(skipna=False) + assert np.isnan(res) + + res = arr_na.min(skipna=True) + assert res == MIN + assert type(res) == type(MIN) + res = arr_na.max(skipna=True) + assert res == MAX + assert type(res) == type(MAX) + + +# ---------------------------------------------------------------------------- +# Arrow interaction + + +pyarrow_skip = td.skip_if_no("pyarrow") + + +@pyarrow_skip +def test_arrow_extension_type(): + import pyarrow as pa + + from pandas.core.arrays._arrow_utils import ArrowIntervalType + + p1 = ArrowIntervalType(pa.int64(), "left") + p2 = ArrowIntervalType(pa.int64(), "left") + p3 = ArrowIntervalType(pa.int64(), "right") + + assert p1.closed == "left" + assert p1 == p2 + assert not p1 == p3 + assert hash(p1) == hash(p2) + assert not hash(p1) == hash(p3) + + +@pyarrow_skip +def test_arrow_array(): + import pyarrow as pa + + from pandas.core.arrays._arrow_utils import ArrowIntervalType + + intervals = pd.interval_range(1, 5, freq=1).array + + result = pa.array(intervals) + assert isinstance(result.type, ArrowIntervalType) + assert result.type.closed == intervals.closed + assert result.type.subtype == pa.int64() + assert result.storage.field("left").equals(pa.array([1, 2, 3, 4], type="int64")) + assert result.storage.field("right").equals(pa.array([2, 3, 4, 5], type="int64")) + + expected = pa.array([{"left": i, "right": i + 1} for i in range(1, 5)]) + assert result.storage.equals(expected) + + # convert to its storage type + result = pa.array(intervals, type=expected.type) + assert result.equals(expected) + + # unsupported conversions + with pytest.raises(TypeError, match="Not supported to convert IntervalArray"): + pa.array(intervals, type="float64") + + with pytest.raises(TypeError, match="different 'subtype'"): + pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left")) + + +@pyarrow_skip +def test_arrow_array_missing(): + import pyarrow as pa + + from pandas.core.arrays._arrow_utils import ArrowIntervalType + + arr = IntervalArray.from_breaks([0.0, 1.0, 2.0, 3.0]) + arr[1] = None + + result = pa.array(arr) + assert isinstance(result.type, ArrowIntervalType) + assert result.type.closed == arr.closed + assert result.type.subtype == pa.float64() + + # fields have missing values (not NaN) + left = pa.array([0.0, None, 2.0], type="float64") + right = pa.array([1.0, None, 3.0], type="float64") + assert result.storage.field("left").equals(left) + assert result.storage.field("right").equals(right) + + # structarray itself also has missing values on the array level + vals = [ + {"left": 0.0, "right": 1.0}, + {"left": None, "right": None}, + {"left": 2.0, "right": 3.0}, + ] + expected = pa.StructArray.from_pandas(vals, mask=np.array([False, True, False])) + assert result.storage.equals(expected) + + +@pyarrow_skip +@pytest.mark.parametrize( + "breaks", + [[0.0, 1.0, 2.0, 3.0], date_range("2017", periods=4, freq="D")], + ids=["float", "datetime64[ns]"], +) +def test_arrow_table_roundtrip(breaks): + import pyarrow as pa + + from pandas.core.arrays._arrow_utils import ArrowIntervalType + + arr = IntervalArray.from_breaks(breaks) + arr[1] = None + df = pd.DataFrame({"a": arr}) + + table = pa.table(df) + assert isinstance(table.field("a").type, ArrowIntervalType) + result = table.to_pandas() + assert isinstance(result["a"].dtype, pd.IntervalDtype) + tm.assert_frame_equal(result, df) + + table2 = pa.concat_tables([table, table]) + result = table2.to_pandas() + expected = pd.concat([df, df], ignore_index=True) + tm.assert_frame_equal(result, expected) + + # GH-41040 + table = pa.table( + [pa.chunked_array([], type=table.column(0).type)], schema=table.schema + ) + result = table.to_pandas() + tm.assert_frame_equal(result, expected[0:0]) + + +@pyarrow_skip +@pytest.mark.parametrize( + "breaks", + [[0.0, 1.0, 2.0, 3.0], date_range("2017", periods=4, freq="D")], + ids=["float", "datetime64[ns]"], +) +def test_arrow_table_roundtrip_without_metadata(breaks): + import pyarrow as pa + + arr = IntervalArray.from_breaks(breaks) + arr[1] = None + df = pd.DataFrame({"a": arr}) + + table = pa.table(df) + # remove the metadata + table = table.replace_schema_metadata() + assert table.schema.metadata is None + + result = table.to_pandas() + assert isinstance(result["a"].dtype, pd.IntervalDtype) + tm.assert_frame_equal(result, df) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/test_ops.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/test_ops.py new file mode 100644 index 0000000..4853bec --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/interval/test_ops.py @@ -0,0 +1,93 @@ +"""Tests for Interval-Interval operations, such as overlaps, contains, etc.""" +import numpy as np +import pytest + +from pandas import ( + Interval, + IntervalIndex, + Timedelta, + Timestamp, +) +import pandas._testing as tm +from pandas.core.arrays import IntervalArray + + +@pytest.fixture(params=[IntervalArray, IntervalIndex]) +def constructor(request): + """ + Fixture for testing both interval container classes. + """ + return request.param + + +@pytest.fixture( + params=[ + (Timedelta("0 days"), Timedelta("1 day")), + (Timestamp("2018-01-01"), Timedelta("1 day")), + (0, 1), + ], + ids=lambda x: type(x[0]).__name__, +) +def start_shift(request): + """ + Fixture for generating intervals of different types from a start value + and a shift value that can be added to start to generate an endpoint. + """ + return request.param + + +class TestOverlaps: + def test_overlaps_interval(self, constructor, start_shift, closed, other_closed): + start, shift = start_shift + interval = Interval(start, start + 3 * shift, other_closed) + + # intervals: identical, nested, spanning, partial, adjacent, disjoint + tuples = [ + (start, start + 3 * shift), + (start + shift, start + 2 * shift), + (start - shift, start + 4 * shift), + (start + 2 * shift, start + 4 * shift), + (start + 3 * shift, start + 4 * shift), + (start + 4 * shift, start + 5 * shift), + ] + interval_container = constructor.from_tuples(tuples, closed) + + adjacent = interval.closed_right and interval_container.closed_left + expected = np.array([True, True, True, True, adjacent, False]) + result = interval_container.overlaps(interval) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("other_constructor", [IntervalArray, IntervalIndex]) + def test_overlaps_interval_container(self, constructor, other_constructor): + # TODO: modify this test when implemented + interval_container = constructor.from_breaks(range(5)) + other_container = other_constructor.from_breaks(range(5)) + with pytest.raises(NotImplementedError, match="^$"): + interval_container.overlaps(other_container) + + def test_overlaps_na(self, constructor, start_shift): + """NA values are marked as False""" + start, shift = start_shift + interval = Interval(start, start + shift) + + tuples = [ + (start, start + shift), + np.nan, + (start + 2 * shift, start + 3 * shift), + ] + interval_container = constructor.from_tuples(tuples) + + expected = np.array([True, False, False]) + result = interval_container.overlaps(interval) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [10, True, "foo", Timedelta("1 day"), Timestamp("2018-01-01")], + ids=lambda x: type(x).__name__, + ) + def test_overlaps_invalid_type(self, constructor, other): + interval_container = constructor.from_breaks(range(5)) + msg = f"`other` must be Interval-like, got {type(other).__name__}" + with pytest.raises(TypeError, match=msg): + interval_container.overlaps(other) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..d63f9ae Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/__pycache__/test_arithmetic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/__pycache__/test_arithmetic.cpython-38.pyc new file mode 100644 index 0000000..9fe83af Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/__pycache__/test_arithmetic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/__pycache__/test_arrow_compat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/__pycache__/test_arrow_compat.cpython-38.pyc new file mode 100644 index 0000000..58cad17 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/__pycache__/test_arrow_compat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/__pycache__/test_function.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/__pycache__/test_function.cpython-38.pyc new file mode 100644 index 0000000..6a91154 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/__pycache__/test_function.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/test_arithmetic.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/test_arithmetic.py new file mode 100644 index 0000000..ab6e511 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/test_arithmetic.py @@ -0,0 +1,181 @@ +from __future__ import annotations + +from typing import Any + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +# integer dtypes +arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES] +scalars: list[Any] = [2] * len(arrays) +# floating dtypes +arrays += [pd.array([0.1, 0.2, 0.3, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES] +scalars += [0.2, 0.2] +# boolean +arrays += [pd.array([True, False, True, None], dtype="boolean")] +scalars += [False] + + +@pytest.fixture(params=zip(arrays, scalars), ids=[a.dtype.name for a in arrays]) +def data(request): + return request.param + + +def check_skip(data, op_name): + if isinstance(data.dtype, pd.BooleanDtype) and "sub" in op_name: + pytest.skip("subtract not implemented for boolean") + + +# Test equivalence of scalars, numpy arrays with array ops +# ----------------------------------------------------------------------------- + + +def test_array_scalar_like_equivalence(data, all_arithmetic_operators): + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + scalar_array = pd.array([scalar] * len(data), dtype=data.dtype) + + # TODO also add len-1 array (np.array([scalar], dtype=data.dtype.numpy_dtype)) + for scalar in [scalar, data.dtype.type(scalar)]: + result = op(data, scalar) + expected = op(data, scalar_array) + tm.assert_extension_array_equal(result, expected) + + +def test_array_NA(data, all_arithmetic_operators, request): + data, _ = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + scalar = pd.NA + scalar_array = pd.array([pd.NA] * len(data), dtype=data.dtype) + + result = op(data, scalar) + expected = op(data, scalar_array) + tm.assert_extension_array_equal(result, expected) + + +def test_numpy_array_equivalence(data, all_arithmetic_operators): + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + numpy_array = np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype) + pd_array = pd.array(numpy_array, dtype=data.dtype) + + result = op(data, numpy_array) + expected = op(data, pd_array) + tm.assert_extension_array_equal(result, expected) + + +# Test equivalence with Series and DataFrame ops +# ----------------------------------------------------------------------------- + + +def test_frame(data, all_arithmetic_operators): + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + # DataFrame with scalar + df = pd.DataFrame({"A": data}) + + result = op(df, scalar) + expected = pd.DataFrame({"A": op(data, scalar)}) + tm.assert_frame_equal(result, expected) + + +def test_series(data, all_arithmetic_operators): + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + s = pd.Series(data) + + # Series with scalar + result = op(s, scalar) + expected = pd.Series(op(data, scalar)) + tm.assert_series_equal(result, expected) + + # Series with np.ndarray + other = np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype) + result = op(s, other) + expected = pd.Series(op(data, other)) + tm.assert_series_equal(result, expected) + + # Series with pd.array + other = pd.array([scalar] * len(data), dtype=data.dtype) + result = op(s, other) + expected = pd.Series(op(data, other)) + tm.assert_series_equal(result, expected) + + # Series with Series + other = pd.Series([scalar] * len(data), dtype=data.dtype) + result = op(s, other) + expected = pd.Series(op(data, other.array)) + tm.assert_series_equal(result, expected) + + +# Test generic characteristics / errors +# ----------------------------------------------------------------------------- + + +def test_error_invalid_object(data, all_arithmetic_operators): + data, _ = data + + op = all_arithmetic_operators + opa = getattr(data, op) + + # 2d -> return NotImplemented + result = opa(pd.DataFrame({"A": data})) + assert result is NotImplemented + + msg = r"can only perform ops with 1-d structures" + with pytest.raises(NotImplementedError, match=msg): + opa(np.arange(len(data)).reshape(-1, len(data))) + + +def test_error_len_mismatch(data, all_arithmetic_operators): + # operating with a list-like with non-matching length raises + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + + other = [scalar] * (len(data) - 1) + + for other in [other, np.array(other)]: + with pytest.raises(ValueError, match="Lengths must match"): + op(data, other) + + s = pd.Series(data) + with pytest.raises(ValueError, match="Lengths must match"): + op(s, other) + + +@pytest.mark.parametrize("op", ["__neg__", "__abs__", "__invert__"]) +def test_unary_op_does_not_propagate_mask(data, op, request): + # https://github.com/pandas-dev/pandas/issues/39943 + data, _ = data + ser = pd.Series(data) + + if op == "__invert__" and data.dtype.kind == "f": + # we follow numpy in raising + msg = "ufunc 'invert' not supported for the input types" + with pytest.raises(TypeError, match=msg): + getattr(ser, op)() + with pytest.raises(TypeError, match=msg): + getattr(data, op)() + with pytest.raises(TypeError, match=msg): + # Check that this is still the numpy behavior + getattr(data._data, op)() + + return + + result = getattr(ser, op)() + expected = result.copy(deep=True) + ser[0] = None + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/test_arrow_compat.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/test_arrow_compat.py new file mode 100644 index 0000000..0517625 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/test_arrow_compat.py @@ -0,0 +1,187 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +pa = pytest.importorskip("pyarrow", minversion="1.0.1") + +from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask + +arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES] +arrays += [pd.array([0.1, 0.2, 0.3, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES] +arrays += [pd.array([True, False, True, None], dtype="boolean")] + + +@pytest.fixture(params=arrays, ids=[a.dtype.name for a in arrays]) +def data(request): + return request.param + + +def test_arrow_array(data): + arr = pa.array(data) + expected = pa.array( + data.to_numpy(object, na_value=None), + type=pa.from_numpy_dtype(data.dtype.numpy_dtype), + ) + assert arr.equals(expected) + + +def test_arrow_roundtrip(data): + df = pd.DataFrame({"a": data}) + table = pa.table(df) + assert table.field("a").type == str(data.dtype.numpy_dtype) + result = table.to_pandas() + assert result["a"].dtype == data.dtype + tm.assert_frame_equal(result, df) + + +def test_dataframe_from_arrow_types_mapper(): + def types_mapper(arrow_type): + if pa.types.is_boolean(arrow_type): + return pd.BooleanDtype() + elif pa.types.is_integer(arrow_type): + return pd.Int64Dtype() + + bools_array = pa.array([True, None, False], type=pa.bool_()) + ints_array = pa.array([1, None, 2], type=pa.int64()) + small_ints_array = pa.array([-1, 0, 7], type=pa.int8()) + record_batch = pa.RecordBatch.from_arrays( + [bools_array, ints_array, small_ints_array], ["bools", "ints", "small_ints"] + ) + result = record_batch.to_pandas(types_mapper=types_mapper) + bools = pd.Series([True, None, False], dtype="boolean") + ints = pd.Series([1, None, 2], dtype="Int64") + small_ints = pd.Series([-1, 0, 7], dtype="Int64") + expected = pd.DataFrame({"bools": bools, "ints": ints, "small_ints": small_ints}) + tm.assert_frame_equal(result, expected) + + +def test_arrow_load_from_zero_chunks(data): + # GH-41040 + + df = pd.DataFrame({"a": data[0:0]}) + table = pa.table(df) + assert table.field("a").type == str(data.dtype.numpy_dtype) + table = pa.table( + [pa.chunked_array([], type=table.field("a").type)], schema=table.schema + ) + result = table.to_pandas() + assert result["a"].dtype == data.dtype + tm.assert_frame_equal(result, df) + + +def test_arrow_from_arrow_uint(): + # https://github.com/pandas-dev/pandas/issues/31896 + # possible mismatch in types + + dtype = pd.UInt32Dtype() + result = dtype.__from_arrow__(pa.array([1, 2, 3, 4, None], type="int64")) + expected = pd.array([1, 2, 3, 4, None], dtype="UInt32") + + tm.assert_extension_array_equal(result, expected) + + +def test_arrow_sliced(data): + # https://github.com/pandas-dev/pandas/issues/38525 + + df = pd.DataFrame({"a": data}) + table = pa.table(df) + result = table.slice(2, None).to_pandas() + expected = df.iloc[2:].reset_index(drop=True) + tm.assert_frame_equal(result, expected) + + # no missing values + df2 = df.fillna(data[0]) + table = pa.table(df2) + result = table.slice(2, None).to_pandas() + expected = df2.iloc[2:].reset_index(drop=True) + tm.assert_frame_equal(result, expected) + + +@pytest.fixture +def np_dtype_to_arrays(any_real_numpy_dtype): + np_dtype = np.dtype(any_real_numpy_dtype) + pa_type = pa.from_numpy_dtype(np_dtype) + + # None ensures the creation of a bitmask buffer. + pa_array = pa.array([0, 1, 2, None], type=pa_type) + # Since masked Arrow buffer slots are not required to contain a specific + # value, assert only the first three values of the created np.array + np_expected = np.array([0, 1, 2], dtype=np_dtype) + mask_expected = np.array([True, True, True, False]) + return np_dtype, pa_array, np_expected, mask_expected + + +def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays): + """ + Test conversion from pyarrow array to numpy array. + + Modifies the pyarrow buffer to contain padding and offset, which are + considered valid buffers by pyarrow. + + Also tests empty pyarrow arrays with non empty buffers. + See https://github.com/pandas-dev/pandas/issues/40896 + """ + np_dtype, pa_array, np_expected, mask_expected = np_dtype_to_arrays + data, mask = pyarrow_array_to_numpy_and_mask(pa_array, np_dtype) + tm.assert_numpy_array_equal(data[:3], np_expected) + tm.assert_numpy_array_equal(mask, mask_expected) + + mask_buffer = pa_array.buffers()[0] + data_buffer = pa_array.buffers()[1] + data_buffer_bytes = pa_array.buffers()[1].to_pybytes() + + # Add trailing padding to the buffer. + data_buffer_trail = pa.py_buffer(data_buffer_bytes + b"\x00") + pa_array_trail = pa.Array.from_buffers( + type=pa_array.type, + length=len(pa_array), + buffers=[mask_buffer, data_buffer_trail], + offset=pa_array.offset, + ) + pa_array_trail.validate() + data, mask = pyarrow_array_to_numpy_and_mask(pa_array_trail, np_dtype) + tm.assert_numpy_array_equal(data[:3], np_expected) + tm.assert_numpy_array_equal(mask, mask_expected) + + # Add offset to the buffer. + offset = b"\x00" * (pa_array.type.bit_width // 8) + data_buffer_offset = pa.py_buffer(offset + data_buffer_bytes) + mask_buffer_offset = pa.py_buffer(b"\x0E") + pa_array_offset = pa.Array.from_buffers( + type=pa_array.type, + length=len(pa_array), + buffers=[mask_buffer_offset, data_buffer_offset], + offset=pa_array.offset + 1, + ) + pa_array_offset.validate() + data, mask = pyarrow_array_to_numpy_and_mask(pa_array_offset, np_dtype) + tm.assert_numpy_array_equal(data[:3], np_expected) + tm.assert_numpy_array_equal(mask, mask_expected) + + # Empty array + np_expected_empty = np.array([], dtype=np_dtype) + mask_expected_empty = np.array([], dtype=np.bool_) + + pa_array_offset = pa.Array.from_buffers( + type=pa_array.type, + length=0, + buffers=[mask_buffer, data_buffer], + offset=pa_array.offset, + ) + pa_array_offset.validate() + data, mask = pyarrow_array_to_numpy_and_mask(pa_array_offset, np_dtype) + tm.assert_numpy_array_equal(data[:3], np_expected_empty) + tm.assert_numpy_array_equal(mask, mask_expected_empty) + + +def test_from_arrow_type_error(data): + # ensure that __from_arrow__ returns a TypeError when getting a wrong + # array type + + arr = pa.array(data).cast("string") + with pytest.raises(TypeError, match=None): + # we don't test the exact error message, only the fact that it raises + # a TypeError is relevant + data.dtype.__from_arrow__(arr) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/test_function.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/test_function.py new file mode 100644 index 0000000..bf310c7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked/test_function.py @@ -0,0 +1,44 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_integer_dtype + +import pandas as pd +import pandas._testing as tm + +arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES] +arrays += [ + pd.array([0.141, -0.268, 5.895, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES +] + + +@pytest.fixture(params=arrays, ids=[a.dtype.name for a in arrays]) +def data(request): + return request.param + + +@pytest.fixture() +def numpy_dtype(data): + # For integer dtype, the numpy conversion must be done to float + if is_integer_dtype(data): + numpy_dtype = float + else: + numpy_dtype = data.dtype.type + return numpy_dtype + + +def test_round(data, numpy_dtype): + # No arguments + result = data.round() + expected = pd.array( + np.round(data.to_numpy(dtype=numpy_dtype, na_value=None)), dtype=data.dtype + ) + tm.assert_extension_array_equal(result, expected) + + # Decimals argument + result = data.round(decimals=2) + expected = pd.array( + np.round(data.to_numpy(dtype=numpy_dtype, na_value=None), decimals=2), + dtype=data.dtype, + ) + tm.assert_extension_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked_shared.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked_shared.py new file mode 100644 index 0000000..454be70 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/masked_shared.py @@ -0,0 +1,138 @@ +""" +Tests shared by MaskedArray subclasses. +""" +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension.base import BaseOpsUtil + + +class ComparisonOps(BaseOpsUtil): + def _compare_other(self, data, op, other): + + # array + result = pd.Series(op(data, other)) + expected = pd.Series(op(data._data, other), dtype="boolean") + + # fill the nan locations + expected[data._mask] = pd.NA + + tm.assert_series_equal(result, expected) + + # series + ser = pd.Series(data) + result = op(ser, other) + + expected = op(pd.Series(data._data), other) + + # fill the nan locations + expected[data._mask] = pd.NA + expected = expected.astype("boolean") + + tm.assert_series_equal(result, expected) + + # subclass will override to parametrize 'other' + def test_scalar(self, other, comparison_op, dtype): + op = comparison_op + left = pd.array([1, 0, None], dtype=dtype) + + result = op(left, other) + + if other is pd.NA: + expected = pd.array([None, None, None], dtype="boolean") + else: + values = op(left._data, other) + expected = pd.arrays.BooleanArray(values, left._mask, copy=True) + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + result[0] = pd.NA + tm.assert_extension_array_equal(left, pd.array([1, 0, None], dtype=dtype)) + + +class NumericOps: + # Shared by IntegerArray and FloatingArray, not BooleanArray + + def test_no_shared_mask(self, data): + result = data + 1 + assert not tm.shares_memory(result, data) + + def test_array(self, comparison_op, dtype): + op = comparison_op + + left = pd.array([0, 1, 2, None, None, None], dtype=dtype) + right = pd.array([0, 1, None, 0, 1, None], dtype=dtype) + + result = op(left, right) + values = op(left._data, right._data) + mask = left._mask | right._mask + + expected = pd.arrays.BooleanArray(values, mask) + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + result[0] = pd.NA + tm.assert_extension_array_equal( + left, pd.array([0, 1, 2, None, None, None], dtype=dtype) + ) + tm.assert_extension_array_equal( + right, pd.array([0, 1, None, 0, 1, None], dtype=dtype) + ) + + def test_compare_with_booleanarray(self, comparison_op, dtype): + op = comparison_op + + left = pd.array([True, False, None] * 3, dtype="boolean") + right = pd.array([0] * 3 + [1] * 3 + [None] * 3, dtype=dtype) + other = pd.array([False] * 3 + [True] * 3 + [None] * 3, dtype="boolean") + + expected = op(left, other) + result = op(left, right) + tm.assert_extension_array_equal(result, expected) + + # reversed op + expected = op(other, left) + result = op(right, left) + tm.assert_extension_array_equal(result, expected) + + def test_compare_to_string(self, dtype): + # GH#28930 + ser = pd.Series([1, None], dtype=dtype) + result = ser == "a" + expected = pd.Series([False, pd.NA], dtype="boolean") + + self.assert_series_equal(result, expected) + + def test_ufunc_with_out(self, dtype): + arr = pd.array([1, 2, 3], dtype=dtype) + arr2 = pd.array([1, 2, pd.NA], dtype=dtype) + + mask = arr == arr + mask2 = arr2 == arr2 + + result = np.zeros(3, dtype=bool) + result |= mask + # If MaskedArray.__array_ufunc__ handled "out" appropriately, + # `result` should still be an ndarray. + assert isinstance(result, np.ndarray) + assert result.all() + + # result |= mask worked because mask could be cast lossslessly to + # boolean ndarray. mask2 can't, so this raises + result = np.zeros(3, dtype=bool) + msg = "Specify an appropriate 'na_value' for this dtype" + with pytest.raises(ValueError, match=msg): + result |= mask2 + + # addition + res = np.add(arr, arr2) + expected = pd.array([2, 4, pd.NA], dtype=dtype) + tm.assert_extension_array_equal(res, expected) + + # when passing out=arr, we will modify 'arr' inplace. + res = np.add(arr, arr2, out=arr) + assert res is arr + tm.assert_extension_array_equal(res, expected) + tm.assert_extension_array_equal(arr, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..285a021 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/test_arrow_compat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/test_arrow_compat.cpython-38.pyc new file mode 100644 index 0000000..d3e3828 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/test_arrow_compat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..19ef00c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..febcb8d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/test_reductions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/test_reductions.cpython-38.pyc new file mode 100644 index 0000000..23c8b34 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/__pycache__/test_reductions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/test_arrow_compat.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/test_arrow_compat.py new file mode 100644 index 0000000..560299a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/test_arrow_compat.py @@ -0,0 +1,121 @@ +import pytest + +from pandas.core.dtypes.dtypes import PeriodDtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import ( + PeriodArray, + period_array, +) + +pa = pytest.importorskip("pyarrow", minversion="1.0.1") + + +def test_arrow_extension_type(): + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + p1 = ArrowPeriodType("D") + p2 = ArrowPeriodType("D") + p3 = ArrowPeriodType("M") + + assert p1.freq == "D" + assert p1 == p2 + assert not p1 == p3 + assert hash(p1) == hash(p2) + assert not hash(p1) == hash(p3) + + +@pytest.mark.parametrize( + "data, freq", + [ + (pd.date_range("2017", periods=3), "D"), + (pd.date_range("2017", periods=3, freq="A"), "A-DEC"), + ], +) +def test_arrow_array(data, freq): + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + periods = period_array(data, freq=freq) + result = pa.array(periods) + assert isinstance(result.type, ArrowPeriodType) + assert result.type.freq == freq + expected = pa.array(periods.asi8, type="int64") + assert result.storage.equals(expected) + + # convert to its storage type + result = pa.array(periods, type=pa.int64()) + assert result.equals(expected) + + # unsupported conversions + msg = "Not supported to convert PeriodArray to 'double' type" + with pytest.raises(TypeError, match=msg): + pa.array(periods, type="float64") + + with pytest.raises(TypeError, match="different 'freq'"): + pa.array(periods, type=ArrowPeriodType("T")) + + +def test_arrow_array_missing(): + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + arr = PeriodArray([1, 2, 3], freq="D") + arr[1] = pd.NaT + + result = pa.array(arr) + assert isinstance(result.type, ArrowPeriodType) + assert result.type.freq == "D" + expected = pa.array([1, None, 3], type="int64") + assert result.storage.equals(expected) + + +def test_arrow_table_roundtrip(): + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + arr = PeriodArray([1, 2, 3], freq="D") + arr[1] = pd.NaT + df = pd.DataFrame({"a": arr}) + + table = pa.table(df) + assert isinstance(table.field("a").type, ArrowPeriodType) + result = table.to_pandas() + assert isinstance(result["a"].dtype, PeriodDtype) + tm.assert_frame_equal(result, df) + + table2 = pa.concat_tables([table, table]) + result = table2.to_pandas() + expected = pd.concat([df, df], ignore_index=True) + tm.assert_frame_equal(result, expected) + + +def test_arrow_load_from_zero_chunks(): + # GH-41040 + + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + arr = PeriodArray([], freq="D") + df = pd.DataFrame({"a": arr}) + + table = pa.table(df) + assert isinstance(table.field("a").type, ArrowPeriodType) + table = pa.table( + [pa.chunked_array([], type=table.column(0).type)], schema=table.schema + ) + result = table.to_pandas() + assert isinstance(result["a"].dtype, PeriodDtype) + tm.assert_frame_equal(result, df) + + +def test_arrow_table_roundtrip_without_metadata(): + arr = PeriodArray([1, 2, 3], freq="H") + arr[1] = pd.NaT + df = pd.DataFrame({"a": arr}) + + table = pa.table(df) + # remove the metadata + table = table.replace_schema_metadata() + assert table.schema.metadata is None + + result = table.to_pandas() + assert isinstance(result["a"].dtype, PeriodDtype) + tm.assert_frame_equal(result, df) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/test_astype.py new file mode 100644 index 0000000..f05265c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/test_astype.py @@ -0,0 +1,69 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import PeriodDtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import period_array + + +@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) +def test_astype(dtype): + # We choose to ignore the sign and size of integers for + # Period/Datetime/Timedelta astype + arr = period_array(["2000", "2001", None], freq="D") + result = arr.astype(dtype) + + if np.dtype(dtype).kind == "u": + expected_dtype = np.dtype("uint64") + else: + expected_dtype = np.dtype("int64") + + expected = arr.astype(expected_dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) + + +def test_astype_copies(): + arr = period_array(["2000", "2001", None], freq="D") + result = arr.astype(np.int64, copy=False) + + # Add the `.base`, since we now use `.asi8` which returns a view. + # We could maybe override it in PeriodArray to return ._data directly. + assert result.base is arr._data + + result = arr.astype(np.int64, copy=True) + assert result is not arr._data + tm.assert_numpy_array_equal(result, arr._data.view("i8")) + + +def test_astype_categorical(): + arr = period_array(["2000", "2001", "2001", None], freq="D") + result = arr.astype("category") + categories = pd.PeriodIndex(["2000", "2001"], freq="D") + expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories) + tm.assert_categorical_equal(result, expected) + + +def test_astype_period(): + arr = period_array(["2000", "2001", None], freq="D") + result = arr.astype(PeriodDtype("M")) + expected = period_array(["2000", "2001", None], freq="M") + tm.assert_period_array_equal(result, expected) + + +@pytest.mark.parametrize("other", ["datetime64[ns]", "timedelta64[ns]"]) +def test_astype_datetime(other): + arr = period_array(["2000", "2001", None], freq="D") + # slice off the [ns] so that the regex matches. + if other == "timedelta64[ns]": + with pytest.raises(TypeError, match=other[:-4]): + arr.astype(other) + + else: + # GH#45038 allow period->dt64 because we allow dt64->period + result = arr.astype(other) + expected = pd.DatetimeIndex(["2000", "2001", pd.NaT])._data + tm.assert_datetime_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/test_constructors.py new file mode 100644 index 0000000..cf97490 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/test_constructors.py @@ -0,0 +1,123 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import iNaT +from pandas._libs.tslibs.period import IncompatibleFrequency + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import ( + PeriodArray, + period_array, +) + + +@pytest.mark.parametrize( + "data, freq, expected", + [ + ([pd.Period("2017", "D")], None, [17167]), + ([pd.Period("2017", "D")], "D", [17167]), + ([2017], "D", [17167]), + (["2017"], "D", [17167]), + ([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]), + ([pd.Period("2017", "D"), None], None, [17167, iNaT]), + (pd.Series(pd.date_range("2017", periods=3)), None, [17167, 17168, 17169]), + (pd.date_range("2017", periods=3), None, [17167, 17168, 17169]), + (pd.period_range("2017", periods=4, freq="Q"), None, [188, 189, 190, 191]), + ], +) +def test_period_array_ok(data, freq, expected): + result = period_array(data, freq=freq).asi8 + expected = np.asarray(expected, dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + +def test_period_array_readonly_object(): + # https://github.com/pandas-dev/pandas/issues/25403 + pa = period_array([pd.Period("2019-01-01")]) + arr = np.asarray(pa, dtype="object") + arr.setflags(write=False) + + result = period_array(arr) + tm.assert_period_array_equal(result, pa) + + result = pd.Series(arr) + tm.assert_series_equal(result, pd.Series(pa)) + + result = pd.DataFrame({"A": arr}) + tm.assert_frame_equal(result, pd.DataFrame({"A": pa})) + + +def test_from_datetime64_freq_changes(): + # https://github.com/pandas-dev/pandas/issues/23438 + arr = pd.date_range("2017", periods=3, freq="D") + result = PeriodArray._from_datetime64(arr, freq="M") + expected = period_array(["2017-01-01", "2017-01-01", "2017-01-01"], freq="M") + tm.assert_period_array_equal(result, expected) + + +@pytest.mark.parametrize( + "data, freq, msg", + [ + ( + [pd.Period("2017", "D"), pd.Period("2017", "A")], + None, + "Input has different freq", + ), + ([pd.Period("2017", "D")], "A", "Input has different freq"), + ], +) +def test_period_array_raises(data, freq, msg): + with pytest.raises(IncompatibleFrequency, match=msg): + period_array(data, freq) + + +def test_period_array_non_period_series_raies(): + ser = pd.Series([1, 2, 3]) + with pytest.raises(TypeError, match="dtype"): + PeriodArray(ser, freq="D") + + +def test_period_array_freq_mismatch(): + arr = period_array(["2000", "2001"], freq="D") + with pytest.raises(IncompatibleFrequency, match="freq"): + PeriodArray(arr, freq="M") + + with pytest.raises(IncompatibleFrequency, match="freq"): + PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd()) + + +def test_from_sequence_disallows_i8(): + arr = period_array(["2000", "2001"], freq="D") + + msg = str(arr[0].ordinal) + with pytest.raises(TypeError, match=msg): + PeriodArray._from_sequence(arr.asi8, dtype=arr.dtype) + + with pytest.raises(TypeError, match=msg): + PeriodArray._from_sequence(list(arr.asi8), dtype=arr.dtype) + + +def test_from_td64nat_sequence_raises(): + # GH#44507 + td = pd.NaT.to_numpy("m8[ns]") + + dtype = pd.period_range("2005-01-01", periods=3, freq="D").dtype + + arr = np.array([None], dtype=object) + arr[0] = td + + msg = "Value must be Period, string, integer, or datetime" + with pytest.raises(ValueError, match=msg): + PeriodArray._from_sequence(arr, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + pd.PeriodIndex(arr, dtype=dtype) + with pytest.raises(ValueError, match=msg): + pd.Index(arr, dtype=dtype) + with pytest.raises(ValueError, match=msg): + pd.array(arr, dtype=dtype) + with pytest.raises(ValueError, match=msg): + pd.Series(arr, dtype=dtype) + with pytest.raises(ValueError, match=msg): + pd.DataFrame(arr, dtype=dtype) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/test_reductions.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/test_reductions.py new file mode 100644 index 0000000..2889cc7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/period/test_reductions.py @@ -0,0 +1,42 @@ +import pytest + +import pandas as pd +from pandas.core.arrays import period_array + + +class TestReductions: + def test_min_max(self): + arr = period_array( + [ + "2000-01-03", + "2000-01-03", + "NaT", + "2000-01-02", + "2000-01-05", + "2000-01-04", + ], + freq="D", + ) + + result = arr.min() + expected = pd.Period("2000-01-02", freq="D") + assert result == expected + + result = arr.max() + expected = pd.Period("2000-01-05", freq="D") + assert result == expected + + result = arr.min(skipna=False) + assert result is pd.NaT + + result = arr.max(skipna=False) + assert result is pd.NaT + + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_empty(self, skipna): + arr = period_array([], freq="D") + result = arr.min(skipna=skipna) + assert result is pd.NaT + + result = arr.max(skipna=skipna) + assert result is pd.NaT diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..cd76e39 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_accessor.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_accessor.cpython-38.pyc new file mode 100644 index 0000000..cc8284f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_accessor.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_arithmetics.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_arithmetics.cpython-38.pyc new file mode 100644 index 0000000..0e4b731 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_arithmetics.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_array.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_array.cpython-38.pyc new file mode 100644 index 0000000..61acc9a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_array.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_combine_concat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_combine_concat.cpython-38.pyc new file mode 100644 index 0000000..9032fe2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_combine_concat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_dtype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_dtype.cpython-38.pyc new file mode 100644 index 0000000..19a2aca Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_dtype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_libsparse.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_libsparse.cpython-38.pyc new file mode 100644 index 0000000..7428b89 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/__pycache__/test_libsparse.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_accessor.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_accessor.py new file mode 100644 index 0000000..e45dbb3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_accessor.py @@ -0,0 +1,159 @@ +import string + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) + + +class TestSeriesAccessor: + # TODO: collect other Series accessor tests + def test_to_dense(self): + s = pd.Series([0, 1, 0, 10], dtype="Sparse[int64]") + result = s.sparse.to_dense() + expected = pd.Series([0, 1, 0, 10]) + tm.assert_series_equal(result, expected) + + +class TestFrameAccessor: + def test_accessor_raises(self): + df = pd.DataFrame({"A": [0, 1]}) + with pytest.raises(AttributeError, match="sparse"): + df.sparse + + @pytest.mark.parametrize("format", ["csc", "csr", "coo"]) + @pytest.mark.parametrize("labels", [None, list(string.ascii_letters[:10])]) + @pytest.mark.parametrize("dtype", ["float64", "int64"]) + @td.skip_if_no_scipy + def test_from_spmatrix(self, format, labels, dtype): + import scipy.sparse + + sp_dtype = SparseDtype(dtype, np.array(0, dtype=dtype).item()) + + mat = scipy.sparse.eye(10, format=format, dtype=dtype) + result = pd.DataFrame.sparse.from_spmatrix(mat, index=labels, columns=labels) + expected = pd.DataFrame( + np.eye(10, dtype=dtype), index=labels, columns=labels + ).astype(sp_dtype) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("format", ["csc", "csr", "coo"]) + @td.skip_if_no_scipy + def test_from_spmatrix_including_explicit_zero(self, format): + import scipy.sparse + + mat = scipy.sparse.random(10, 2, density=0.5, format=format) + mat.data[0] = 0 + result = pd.DataFrame.sparse.from_spmatrix(mat) + dtype = SparseDtype("float64", 0.0) + expected = pd.DataFrame(mat.todense()).astype(dtype) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "columns", + [["a", "b"], pd.MultiIndex.from_product([["A"], ["a", "b"]]), ["a", "a"]], + ) + @td.skip_if_no_scipy + def test_from_spmatrix_columns(self, columns): + import scipy.sparse + + dtype = SparseDtype("float64", 0.0) + + mat = scipy.sparse.random(10, 2, density=0.5) + result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns) + expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)] + ) + @td.skip_if_no_scipy + def test_to_coo(self, colnames): + import scipy.sparse + + df = pd.DataFrame( + {colnames[0]: [0, 1, 0], colnames[1]: [1, 0, 0]}, dtype="Sparse[int64, 0]" + ) + result = df.sparse.to_coo() + expected = scipy.sparse.coo_matrix(np.asarray(df)) + assert (result != expected).nnz == 0 + + @pytest.mark.parametrize("fill_value", [1, np.nan]) + @td.skip_if_no_scipy + def test_to_coo_nonzero_fill_val_raises(self, fill_value): + df = pd.DataFrame( + { + "A": SparseArray( + [fill_value, fill_value, fill_value, 2], fill_value=fill_value + ), + "B": SparseArray( + [fill_value, 2, fill_value, fill_value], fill_value=fill_value + ), + } + ) + with pytest.raises(ValueError, match="fill value must be 0"): + df.sparse.to_coo() + + def test_to_dense(self): + df = pd.DataFrame( + { + "A": SparseArray([1, 0], dtype=SparseDtype("int64", 0)), + "B": SparseArray([1, 0], dtype=SparseDtype("int64", 1)), + "C": SparseArray([1.0, 0.0], dtype=SparseDtype("float64", 0.0)), + }, + index=["b", "a"], + ) + result = df.sparse.to_dense() + expected = pd.DataFrame( + {"A": [1, 0], "B": [1, 0], "C": [1.0, 0.0]}, index=["b", "a"] + ) + tm.assert_frame_equal(result, expected) + + def test_density(self): + df = pd.DataFrame( + { + "A": SparseArray([1, 0, 2, 1], fill_value=0), + "B": SparseArray([0, 1, 1, 1], fill_value=0), + } + ) + res = df.sparse.density + expected = 0.75 + assert res == expected + + @pytest.mark.parametrize("dtype", ["int64", "float64"]) + @pytest.mark.parametrize("dense_index", [True, False]) + @td.skip_if_no_scipy + def test_series_from_coo(self, dtype, dense_index): + import scipy.sparse + + A = scipy.sparse.eye(3, format="coo", dtype=dtype) + result = pd.Series.sparse.from_coo(A, dense_index=dense_index) + index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index) + if dense_index: + expected = expected.reindex(pd.MultiIndex.from_product(index.levels)) + + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_series_from_coo_incorrect_format_raises(self): + # gh-26554 + import scipy.sparse + + m = scipy.sparse.csr_matrix(np.array([[0, 1], [0, 0]])) + with pytest.raises( + TypeError, match="Expected coo_matrix. Got csr_matrix instead." + ): + pd.Series.sparse.from_coo(m) + + def test_with_column_named_sparse(self): + # https://github.com/pandas-dev/pandas/issues/30758 + df = pd.DataFrame({"sparse": pd.arrays.SparseArray([1, 2])}) + assert isinstance(df.sparse, pd.core.arrays.sparse.accessor.SparseFrameAccessor) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_arithmetics.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_arithmetics.py new file mode 100644 index 0000000..3db1ee9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_arithmetics.py @@ -0,0 +1,530 @@ +import operator + +import numpy as np +import pytest + +from pandas.compat import np_version_under1p20 + +import pandas as pd +import pandas._testing as tm +from pandas.core import ops +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) + + +@pytest.fixture(params=["integer", "block"]) +def kind(request): + """kind kwarg to pass to SparseArray/SparseSeries""" + return request.param + + +@pytest.fixture(params=[True, False]) +def mix(request): + # whether to operate op(sparse, dense) instead of op(sparse, sparse) + return request.param + + +class TestSparseArrayArithmetics: + + _base = np.array + _klass = SparseArray + + def _assert(self, a, b): + # We have to use tm.assert_sp_array_equal. See GH #45126 + tm.assert_numpy_array_equal(a, b) + + def _check_numeric_ops(self, a, b, a_dense, b_dense, mix: bool, op): + # Check that arithmetic behavior matches non-Sparse Series arithmetic + + if isinstance(a_dense, np.ndarray): + expected = op(pd.Series(a_dense), b_dense).values + elif isinstance(b_dense, np.ndarray): + expected = op(a_dense, pd.Series(b_dense)).values + else: + raise NotImplementedError + + with np.errstate(invalid="ignore", divide="ignore"): + if mix: + result = op(a, b_dense).to_dense() + else: + result = op(a, b).to_dense() + + self._assert(result, expected) + + def _check_bool_result(self, res): + assert isinstance(res, self._klass) + assert isinstance(res.dtype, SparseDtype) + assert res.dtype.subtype == np.bool_ + assert isinstance(res.fill_value, bool) + + def _check_comparison_ops(self, a, b, a_dense, b_dense): + with np.errstate(invalid="ignore"): + # Unfortunately, trying to wrap the computation of each expected + # value is with np.errstate() is too tedious. + # + # sparse & sparse + self._check_bool_result(a == b) + self._assert((a == b).to_dense(), a_dense == b_dense) + + self._check_bool_result(a != b) + self._assert((a != b).to_dense(), a_dense != b_dense) + + self._check_bool_result(a >= b) + self._assert((a >= b).to_dense(), a_dense >= b_dense) + + self._check_bool_result(a <= b) + self._assert((a <= b).to_dense(), a_dense <= b_dense) + + self._check_bool_result(a > b) + self._assert((a > b).to_dense(), a_dense > b_dense) + + self._check_bool_result(a < b) + self._assert((a < b).to_dense(), a_dense < b_dense) + + # sparse & dense + self._check_bool_result(a == b_dense) + self._assert((a == b_dense).to_dense(), a_dense == b_dense) + + self._check_bool_result(a != b_dense) + self._assert((a != b_dense).to_dense(), a_dense != b_dense) + + self._check_bool_result(a >= b_dense) + self._assert((a >= b_dense).to_dense(), a_dense >= b_dense) + + self._check_bool_result(a <= b_dense) + self._assert((a <= b_dense).to_dense(), a_dense <= b_dense) + + self._check_bool_result(a > b_dense) + self._assert((a > b_dense).to_dense(), a_dense > b_dense) + + self._check_bool_result(a < b_dense) + self._assert((a < b_dense).to_dense(), a_dense < b_dense) + + def _check_logical_ops(self, a, b, a_dense, b_dense): + # sparse & sparse + self._check_bool_result(a & b) + self._assert((a & b).to_dense(), a_dense & b_dense) + + self._check_bool_result(a | b) + self._assert((a | b).to_dense(), a_dense | b_dense) + # sparse & dense + self._check_bool_result(a & b_dense) + self._assert((a & b_dense).to_dense(), a_dense & b_dense) + + self._check_bool_result(a | b_dense) + self._assert((a | b_dense).to_dense(), a_dense | b_dense) + + @pytest.mark.parametrize("scalar", [0, 1, 3]) + @pytest.mark.parametrize("fill_value", [None, 0, 2]) + def test_float_scalar( + self, kind, mix, all_arithmetic_functions, fill_value, scalar, request + ): + op = all_arithmetic_functions + + if np_version_under1p20: + if op in [operator.floordiv, ops.rfloordiv]: + if op is operator.floordiv and scalar != 0: + pass + elif op is ops.rfloordiv and scalar == 0: + pass + else: + mark = pytest.mark.xfail(raises=AssertionError, reason="GH#38172") + request.node.add_marker(mark) + + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + + a = self._klass(values, kind=kind, fill_value=fill_value) + self._check_numeric_ops(a, scalar, values, scalar, mix, op) + + def test_float_scalar_comparison(self, kind): + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + + a = self._klass(values, kind=kind) + self._check_comparison_ops(a, 1, values, 1) + self._check_comparison_ops(a, 0, values, 0) + self._check_comparison_ops(a, 3, values, 3) + + a = self._klass(values, kind=kind, fill_value=0) + self._check_comparison_ops(a, 1, values, 1) + self._check_comparison_ops(a, 0, values, 0) + self._check_comparison_ops(a, 3, values, 3) + + a = self._klass(values, kind=kind, fill_value=2) + self._check_comparison_ops(a, 1, values, 1) + self._check_comparison_ops(a, 0, values, 0) + self._check_comparison_ops(a, 3, values, 3) + + def test_float_same_index_without_nans(self, kind, mix, all_arithmetic_functions): + # when sp_index are the same + op = all_arithmetic_functions + + values = self._base([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0]) + rvalues = self._base([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0]) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind, fill_value=0) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_float_same_index_with_nans( + self, kind, mix, all_arithmetic_functions, request + ): + # when sp_index are the same + op = all_arithmetic_functions + + if ( + np_version_under1p20 + and op is ops.rfloordiv + and not (mix and kind == "block") + ): + mark = pytest.mark.xfail(raises=AssertionError, reason="GH#38172") + request.node.add_marker(mark) + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan]) + + a = self._klass(values, kind=kind) + b = self._klass(rvalues, kind=kind) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_float_same_index_comparison(self, kind): + # when sp_index are the same + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan]) + + a = self._klass(values, kind=kind) + b = self._klass(rvalues, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + + values = self._base([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0]) + rvalues = self._base([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0]) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind, fill_value=0) + self._check_comparison_ops(a, b, values, rvalues) + + def test_float_array(self, kind, mix, all_arithmetic_functions): + op = all_arithmetic_functions + + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) + + a = self._klass(values, kind=kind) + b = self._klass(rvalues, kind=kind) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind, fill_value=0) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, kind=kind, fill_value=1) + b = self._klass(rvalues, kind=kind, fill_value=2) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_float_array_different_kind(self, mix, all_arithmetic_functions): + op = all_arithmetic_functions + + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) + + a = self._klass(values, kind="integer") + b = self._klass(rvalues, kind="block") + self._check_numeric_ops(a, b, values, rvalues, mix, op) + self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) + + a = self._klass(values, kind="integer", fill_value=0) + b = self._klass(rvalues, kind="block") + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, kind="integer", fill_value=0) + b = self._klass(rvalues, kind="block", fill_value=0) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, kind="integer", fill_value=1) + b = self._klass(rvalues, kind="block", fill_value=2) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_float_array_comparison(self, kind): + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) + + a = self._klass(values, kind=kind) + b = self._klass(rvalues, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + self._check_comparison_ops(a, b * 0, values, rvalues * 0) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind, fill_value=0) + self._check_comparison_ops(a, b, values, rvalues) + + a = self._klass(values, kind=kind, fill_value=1) + b = self._klass(rvalues, kind=kind, fill_value=2) + self._check_comparison_ops(a, b, values, rvalues) + + def test_int_array(self, kind, mix, all_arithmetic_functions): + op = all_arithmetic_functions + + # have to specify dtype explicitly until fixing GH 667 + dtype = np.int64 + + values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype) + rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype) + + a = self._klass(values, dtype=dtype, kind=kind) + assert a.dtype == SparseDtype(dtype) + b = self._klass(rvalues, dtype=dtype, kind=kind) + assert b.dtype == SparseDtype(dtype) + + self._check_numeric_ops(a, b, values, rvalues, mix, op) + self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) + + a = self._klass(values, fill_value=0, dtype=dtype, kind=kind) + assert a.dtype == SparseDtype(dtype) + b = self._klass(rvalues, dtype=dtype, kind=kind) + assert b.dtype == SparseDtype(dtype) + + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, fill_value=0, dtype=dtype, kind=kind) + assert a.dtype == SparseDtype(dtype) + b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind) + assert b.dtype == SparseDtype(dtype) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, fill_value=1, dtype=dtype, kind=kind) + assert a.dtype == SparseDtype(dtype, fill_value=1) + b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind) + assert b.dtype == SparseDtype(dtype, fill_value=2) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_int_array_comparison(self, kind): + dtype = "int64" + # int32 NI ATM + + values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype) + rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype) + + a = self._klass(values, dtype=dtype, kind=kind) + b = self._klass(rvalues, dtype=dtype, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + self._check_comparison_ops(a, b * 0, values, rvalues * 0) + + a = self._klass(values, dtype=dtype, kind=kind, fill_value=0) + b = self._klass(rvalues, dtype=dtype, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + + a = self._klass(values, dtype=dtype, kind=kind, fill_value=0) + b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=0) + self._check_comparison_ops(a, b, values, rvalues) + + a = self._klass(values, dtype=dtype, kind=kind, fill_value=1) + b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=2) + self._check_comparison_ops(a, b, values, rvalues) + + @pytest.mark.parametrize("fill_value", [True, False, np.nan]) + def test_bool_same_index(self, kind, fill_value): + # GH 14000 + # when sp_index are the same + values = self._base([True, False, True, True], dtype=np.bool_) + rvalues = self._base([True, False, True, True], dtype=np.bool_) + + a = self._klass(values, kind=kind, dtype=np.bool_, fill_value=fill_value) + b = self._klass(rvalues, kind=kind, dtype=np.bool_, fill_value=fill_value) + self._check_logical_ops(a, b, values, rvalues) + + @pytest.mark.parametrize("fill_value", [True, False, np.nan]) + def test_bool_array_logical(self, kind, fill_value): + # GH 14000 + # when sp_index are the same + values = self._base([True, False, True, False, True, True], dtype=np.bool_) + rvalues = self._base([True, False, False, True, False, True], dtype=np.bool_) + + a = self._klass(values, kind=kind, dtype=np.bool_, fill_value=fill_value) + b = self._klass(rvalues, kind=kind, dtype=np.bool_, fill_value=fill_value) + self._check_logical_ops(a, b, values, rvalues) + + def test_mixed_array_float_int(self, kind, mix, all_arithmetic_functions, request): + op = all_arithmetic_functions + + if np_version_under1p20 and op in [operator.floordiv, ops.rfloordiv] and mix: + mark = pytest.mark.xfail(raises=AssertionError, reason="GH#38172") + request.node.add_marker(mark) + + rdtype = "int64" + + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype) + + a = self._klass(values, kind=kind) + b = self._klass(rvalues, kind=kind) + assert b.dtype == SparseDtype(rdtype) + + self._check_numeric_ops(a, b, values, rvalues, mix, op) + self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind) + assert b.dtype == SparseDtype(rdtype) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind, fill_value=0) + assert b.dtype == SparseDtype(rdtype) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, kind=kind, fill_value=1) + b = self._klass(rvalues, kind=kind, fill_value=2) + assert b.dtype == SparseDtype(rdtype, fill_value=2) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_mixed_array_comparison(self, kind): + rdtype = "int64" + # int32 NI ATM + + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype) + + a = self._klass(values, kind=kind) + b = self._klass(rvalues, kind=kind) + assert b.dtype == SparseDtype(rdtype) + + self._check_comparison_ops(a, b, values, rvalues) + self._check_comparison_ops(a, b * 0, values, rvalues * 0) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind) + assert b.dtype == SparseDtype(rdtype) + self._check_comparison_ops(a, b, values, rvalues) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind, fill_value=0) + assert b.dtype == SparseDtype(rdtype) + self._check_comparison_ops(a, b, values, rvalues) + + a = self._klass(values, kind=kind, fill_value=1) + b = self._klass(rvalues, kind=kind, fill_value=2) + assert b.dtype == SparseDtype(rdtype, fill_value=2) + self._check_comparison_ops(a, b, values, rvalues) + + def test_xor(self): + s = SparseArray([True, True, False, False]) + t = SparseArray([True, False, True, False]) + result = s ^ t + sp_index = pd.core.arrays.sparse.IntIndex(4, np.array([0, 1, 2], dtype="int32")) + expected = SparseArray([False, True, True], sparse_index=sp_index) + tm.assert_sp_array_equal(result, expected) + + +@pytest.mark.parametrize("op", [operator.eq, operator.add]) +def test_with_list(op): + arr = SparseArray([0, 1], fill_value=0) + result = op(arr, [0, 1]) + expected = op(arr, SparseArray([0, 1])) + tm.assert_sp_array_equal(result, expected) + + +def test_with_dataframe(): + # GH#27910 + arr = SparseArray([0, 1], fill_value=0) + df = pd.DataFrame([[1, 2], [3, 4]]) + result = arr.__add__(df) + assert result is NotImplemented + + +def test_with_zerodim_ndarray(): + # GH#27910 + arr = SparseArray([0, 1], fill_value=0) + + result = arr * np.array(2) + expected = arr * 2 + tm.assert_sp_array_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.abs, np.exp]) +@pytest.mark.parametrize( + "arr", [SparseArray([0, 0, -1, 1]), SparseArray([None, None, -1, 1])] +) +def test_ufuncs(ufunc, arr): + result = ufunc(arr) + fill_value = ufunc(arr.fill_value) + expected = SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value) + tm.assert_sp_array_equal(result, expected) + + +@pytest.mark.parametrize( + "a, b", + [ + (SparseArray([0, 0, 0]), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + ], +) +@pytest.mark.parametrize("ufunc", [np.add, np.greater]) +def test_binary_ufuncs(ufunc, a, b): + # can't say anything about fill value here. + result = ufunc(a, b) + expected = ufunc(np.asarray(a), np.asarray(b)) + assert isinstance(result, SparseArray) + tm.assert_numpy_array_equal(np.asarray(result), expected) + + +def test_ndarray_inplace(): + sparray = SparseArray([0, 2, 0, 0]) + ndarray = np.array([0, 1, 2, 3]) + ndarray += sparray + expected = np.array([0, 3, 2, 3]) + tm.assert_numpy_array_equal(ndarray, expected) + + +def test_sparray_inplace(): + sparray = SparseArray([0, 2, 0, 0]) + ndarray = np.array([0, 1, 2, 3]) + sparray += ndarray + expected = SparseArray([0, 3, 2, 3], fill_value=0) + tm.assert_sp_array_equal(sparray, expected) + + +@pytest.mark.parametrize("fill_value", [True, False]) +def test_invert(fill_value): + arr = np.array([True, False, False, True]) + sparray = SparseArray(arr, fill_value=fill_value) + result = ~sparray + expected = SparseArray(~arr, fill_value=not fill_value) + tm.assert_sp_array_equal(result, expected) + + result = ~pd.Series(sparray) + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + result = ~pd.DataFrame({"A": sparray}) + expected = pd.DataFrame({"A": expected}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("fill_value", [0, np.nan]) +@pytest.mark.parametrize("op", [operator.pos, operator.neg]) +def test_unary_op(op, fill_value): + arr = np.array([0, 1, np.nan, 2]) + sparray = SparseArray(arr, fill_value=fill_value) + result = op(sparray) + expected = SparseArray(op(arr), fill_value=op(fill_value)) + tm.assert_sp_array_equal(result, expected) + + +@pytest.mark.parametrize("cons", [list, np.array, SparseArray]) +def test_mismatched_length_cmp_op(cons): + left = SparseArray([True, True]) + right = cons([True, True, True]) + with pytest.raises(ValueError, match="operands have mismatched length"): + left & right diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_array.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_array.py new file mode 100644 index 0000000..0ebe03d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_array.py @@ -0,0 +1,1518 @@ +import operator +import re +import warnings + +import numpy as np +import pytest + +from pandas._libs.sparse import IntIndex +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import isna +import pandas._testing as tm +from pandas.core.api import Int64Index +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) + + +class TestSparseArray: + def setup_method(self, method): + self.arr_data = np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) + self.arr = SparseArray(self.arr_data) + self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) + + def test_constructor_dtype(self): + arr = SparseArray([np.nan, 1, 2, np.nan]) + assert arr.dtype == SparseDtype(np.float64, np.nan) + assert arr.dtype.subtype == np.float64 + assert np.isnan(arr.fill_value) + + arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0) + assert arr.dtype == SparseDtype(np.float64, 0) + assert arr.fill_value == 0 + + arr = SparseArray([0, 1, 2, 4], dtype=np.float64) + assert arr.dtype == SparseDtype(np.float64, np.nan) + assert np.isnan(arr.fill_value) + + arr = SparseArray([0, 1, 2, 4], dtype=np.int64) + assert arr.dtype == SparseDtype(np.int64, 0) + assert arr.fill_value == 0 + + arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64) + assert arr.dtype == SparseDtype(np.int64, 0) + assert arr.fill_value == 0 + + arr = SparseArray([0, 1, 2, 4], dtype=None) + assert arr.dtype == SparseDtype(np.int64, 0) + assert arr.fill_value == 0 + + arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None) + assert arr.dtype == SparseDtype(np.int64, 0) + assert arr.fill_value == 0 + + def test_constructor_dtype_str(self): + result = SparseArray([1, 2, 3], dtype="int") + expected = SparseArray([1, 2, 3], dtype=int) + tm.assert_sp_array_equal(result, expected) + + def test_constructor_sparse_dtype(self): + result = SparseArray([1, 0, 0, 1], dtype=SparseDtype("int64", -1)) + expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64) + tm.assert_sp_array_equal(result, expected) + assert result.sp_values.dtype == np.dtype("int64") + + def test_constructor_sparse_dtype_str(self): + result = SparseArray([1, 0, 0, 1], dtype="Sparse[int32]") + expected = SparseArray([1, 0, 0, 1], dtype=np.int32) + tm.assert_sp_array_equal(result, expected) + assert result.sp_values.dtype == np.dtype("int32") + + def test_constructor_object_dtype(self): + # GH 11856 + arr = SparseArray(["A", "A", np.nan, "B"], dtype=object) + assert arr.dtype == SparseDtype(object) + assert np.isnan(arr.fill_value) + + arr = SparseArray(["A", "A", np.nan, "B"], dtype=object, fill_value="A") + assert arr.dtype == SparseDtype(object, "A") + assert arr.fill_value == "A" + + # GH 17574 + data = [False, 0, 100.0, 0.0] + arr = SparseArray(data, dtype=object, fill_value=False) + assert arr.dtype == SparseDtype(object, False) + assert arr.fill_value is False + arr_expected = np.array(data, dtype=object) + it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected)) + assert np.fromiter(it, dtype=np.bool_).all() + + @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int]) + def test_constructor_na_dtype(self, dtype): + with pytest.raises(ValueError, match="Cannot convert"): + SparseArray([0, 1, np.nan], dtype=dtype) + + def test_constructor_warns_when_losing_timezone(self): + # GH#32501 warn when losing timezone information + dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") + + expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]")) + + with tm.assert_produces_warning(UserWarning): + result = SparseArray(dti) + + tm.assert_sp_array_equal(result, expected) + + with tm.assert_produces_warning(UserWarning): + result = SparseArray(pd.Series(dti)) + + tm.assert_sp_array_equal(result, expected) + + def test_constructor_spindex_dtype(self): + arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2])) + # XXX: Behavior change: specifying SparseIndex no longer changes the + # fill_value + expected = SparseArray([0, 1, 2, 0], kind="integer") + tm.assert_sp_array_equal(arr, expected) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + arr = SparseArray( + data=[1, 2, 3], + sparse_index=IntIndex(4, [1, 2, 3]), + dtype=np.int64, + fill_value=0, + ) + exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + arr = SparseArray( + data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=np.int64 + ) + exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + arr = SparseArray( + data=[1, 2, 3], + sparse_index=IntIndex(4, [1, 2, 3]), + dtype=None, + fill_value=0, + ) + exp = SparseArray([0, 1, 2, 3], dtype=None) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + @pytest.mark.parametrize("sparse_index", [None, IntIndex(1, [0])]) + def test_constructor_spindex_dtype_scalar(self, sparse_index): + # scalar input + arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None) + exp = SparseArray([1], dtype=None) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None) + exp = SparseArray([1], dtype=None) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + def test_constructor_spindex_dtype_scalar_broadcasts(self): + arr = SparseArray( + data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=None + ) + exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + @pytest.mark.parametrize( + "data, fill_value", + [ + (np.array([1, 2]), 0), + (np.array([1.0, 2.0]), np.nan), + ([True, False], False), + ([pd.Timestamp("2017-01-01")], pd.NaT), + ], + ) + def test_constructor_inferred_fill_value(self, data, fill_value): + result = SparseArray(data).fill_value + + if isna(fill_value): + assert isna(result) + else: + assert result == fill_value + + @pytest.mark.parametrize("format", ["coo", "csc", "csr"]) + @pytest.mark.parametrize("size", [0, 10]) + @td.skip_if_no_scipy + def test_from_spmatrix(self, size, format): + import scipy.sparse + + mat = scipy.sparse.random(size, 1, density=0.5, format=format) + result = SparseArray.from_spmatrix(mat) + + result = np.asarray(result) + expected = mat.toarray().ravel() + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("format", ["coo", "csc", "csr"]) + @td.skip_if_no_scipy + def test_from_spmatrix_including_explicit_zero(self, format): + import scipy.sparse + + mat = scipy.sparse.random(10, 1, density=0.5, format=format) + mat.data[0] = 0 + result = SparseArray.from_spmatrix(mat) + + result = np.asarray(result) + expected = mat.toarray().ravel() + tm.assert_numpy_array_equal(result, expected) + + @td.skip_if_no_scipy + def test_from_spmatrix_raises(self): + import scipy.sparse + + mat = scipy.sparse.eye(5, 4, format="csc") + + with pytest.raises(ValueError, match="not '4'"): + SparseArray.from_spmatrix(mat) + + @pytest.mark.parametrize( + "scalar,dtype", + [ + (False, SparseDtype(bool, False)), + (0.0, SparseDtype("float64", 0)), + (1, SparseDtype("int64", 1)), + ("z", SparseDtype("object", "z")), + ], + ) + def test_scalar_with_index_infer_dtype(self, scalar, dtype): + # GH 19163 + with tm.assert_produces_warning( + FutureWarning, match="The index argument has been deprecated" + ): + arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar) + exp = SparseArray([scalar, scalar, scalar], fill_value=scalar) + + tm.assert_sp_array_equal(arr, exp) + + assert arr.dtype == dtype + assert exp.dtype == dtype + + def test_getitem_bool_sparse_array(self): + # GH 23122 + spar_bool = SparseArray([False, True] * 5, dtype=np.bool8, fill_value=True) + exp = SparseArray([np.nan, 2, np.nan, 5, 6]) + tm.assert_sp_array_equal(self.arr[spar_bool], exp) + + spar_bool = ~spar_bool + res = self.arr[spar_bool] + exp = SparseArray([np.nan, 1, 3, 4, np.nan]) + tm.assert_sp_array_equal(res, exp) + + spar_bool = SparseArray( + [False, True, np.nan] * 3, dtype=np.bool8, fill_value=np.nan + ) + res = self.arr[spar_bool] + exp = SparseArray([np.nan, 3, 5]) + tm.assert_sp_array_equal(res, exp) + + def test_getitem_bool_sparse_array_as_comparison(self): + # GH 45110 + arr = SparseArray([1, 2, 3, 4, np.nan, np.nan], fill_value=np.nan) + res = arr[arr > 2] + exp = SparseArray([3.0, 4.0], fill_value=np.nan) + tm.assert_sp_array_equal(res, exp) + + def test_get_item(self): + + assert np.isnan(self.arr[1]) + assert self.arr[2] == 1 + assert self.arr[7] == 5 + + assert self.zarr[0] == 0 + assert self.zarr[2] == 1 + assert self.zarr[7] == 5 + + errmsg = "must be an integer between -10 and 10" + + with pytest.raises(IndexError, match=errmsg): + self.arr[11] + + with pytest.raises(IndexError, match=errmsg): + self.arr[-11] + + assert self.arr[-1] == self.arr[len(self.arr) - 1] + + def test_take_scalar_raises(self): + msg = "'indices' must be an array, not a scalar '2'." + with pytest.raises(ValueError, match=msg): + self.arr.take(2) + + def test_take(self): + exp = SparseArray(np.take(self.arr_data, [2, 3])) + tm.assert_sp_array_equal(self.arr.take([2, 3]), exp) + + exp = SparseArray(np.take(self.arr_data, [0, 1, 2])) + tm.assert_sp_array_equal(self.arr.take([0, 1, 2]), exp) + + def test_take_all_empty(self): + a = pd.array([0, 0], dtype=SparseDtype("int64")) + result = a.take([0, 1], allow_fill=True, fill_value=np.nan) + tm.assert_sp_array_equal(a, result) + + def test_take_fill_value(self): + data = np.array([1, np.nan, 0, 3, 0]) + sparse = SparseArray(data, fill_value=0) + + exp = SparseArray(np.take(data, [0]), fill_value=0) + tm.assert_sp_array_equal(sparse.take([0]), exp) + + exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0) + tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp) + + def test_take_negative(self): + exp = SparseArray(np.take(self.arr_data, [-1])) + tm.assert_sp_array_equal(self.arr.take([-1]), exp) + + exp = SparseArray(np.take(self.arr_data, [-4, -3, -2])) + tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp) + + @pytest.mark.parametrize("fill_value", [0, None, np.nan]) + def test_shift_fill_value(self, fill_value): + # GH #24128 + sparse = SparseArray(np.array([1, 0, 0, 3, 0]), fill_value=8.0) + res = sparse.shift(1, fill_value=fill_value) + if isna(fill_value): + fill_value = res.dtype.na_value + exp = SparseArray(np.array([fill_value, 1, 0, 0, 3]), fill_value=8.0) + tm.assert_sp_array_equal(res, exp) + + def test_bad_take(self): + with pytest.raises(IndexError, match="bounds"): + self.arr.take([11]) + + def test_take_filling(self): + # similar tests as GH 12631 + sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4]) + result = sparse.take(np.array([1, 0, -1])) + expected = SparseArray([np.nan, np.nan, 4]) + tm.assert_sp_array_equal(result, expected) + + # XXX: test change: fill_value=True -> allow_fill=True + result = sparse.take(np.array([1, 0, -1]), allow_fill=True) + expected = SparseArray([np.nan, np.nan, np.nan]) + tm.assert_sp_array_equal(result, expected) + + # allow_fill=False + result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = SparseArray([np.nan, np.nan, 4]) + tm.assert_sp_array_equal(result, expected) + + msg = "Invalid value in 'indices'" + with pytest.raises(ValueError, match=msg): + sparse.take(np.array([1, 0, -2]), allow_fill=True) + + with pytest.raises(ValueError, match=msg): + sparse.take(np.array([1, 0, -5]), allow_fill=True) + + msg = "out of bounds value in 'indices'" + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, -6])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5]), allow_fill=True) + + def test_take_filling_fill_value(self): + # same tests as GH 12631 + sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0) + result = sparse.take(np.array([1, 0, -1])) + expected = SparseArray([0, np.nan, 4], fill_value=0) + tm.assert_sp_array_equal(result, expected) + + # fill_value + result = sparse.take(np.array([1, 0, -1]), allow_fill=True) + # XXX: behavior change. + # the old way of filling self.fill_value doesn't follow EA rules. + # It's supposed to be self.dtype.na_value (nan in this case) + expected = SparseArray([0, np.nan, np.nan], fill_value=0) + tm.assert_sp_array_equal(result, expected) + + # allow_fill=False + result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = SparseArray([0, np.nan, 4], fill_value=0) + tm.assert_sp_array_equal(result, expected) + + msg = "Invalid value in 'indices'." + with pytest.raises(ValueError, match=msg): + sparse.take(np.array([1, 0, -2]), allow_fill=True) + with pytest.raises(ValueError, match=msg): + sparse.take(np.array([1, 0, -5]), allow_fill=True) + + msg = "out of bounds value in 'indices'" + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, -6])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5]), fill_value=True) + + @pytest.mark.parametrize("kind", ["block", "integer"]) + def test_take_filling_all_nan(self, kind): + sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan], kind=kind) + result = sparse.take(np.array([1, 0, -1])) + expected = SparseArray([np.nan, np.nan, np.nan], kind=kind) + tm.assert_sp_array_equal(result, expected) + + result = sparse.take(np.array([1, 0, -1]), fill_value=True) + expected = SparseArray([np.nan, np.nan, np.nan], kind=kind) + tm.assert_sp_array_equal(result, expected) + + msg = "out of bounds value in 'indices'" + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, -6])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5]), fill_value=True) + + def test_set_item(self): + def setitem(): + self.arr[5] = 3 + + def setslice(): + self.arr[1:5] = 2 + + with pytest.raises(TypeError, match="assignment via setitem"): + setitem() + + with pytest.raises(TypeError, match="assignment via setitem"): + setslice() + + def test_constructor_from_too_large_array(self): + with pytest.raises(TypeError, match="expected dimension <= 1 data"): + SparseArray(np.arange(10).reshape((2, 5))) + + def test_constructor_from_sparse(self): + res = SparseArray(self.zarr) + assert res.fill_value == 0 + tm.assert_almost_equal(res.sp_values, self.zarr.sp_values) + + def test_constructor_copy(self): + cp = SparseArray(self.arr, copy=True) + cp.sp_values[:3] = 0 + assert not (self.arr.sp_values[:3] == 0).any() + + not_copy = SparseArray(self.arr) + not_copy.sp_values[:3] = 0 + assert (self.arr.sp_values[:3] == 0).all() + + def test_constructor_bool(self): + # GH 10648 + data = np.array([False, False, True, True, False, False]) + arr = SparseArray(data, fill_value=False, dtype=bool) + + assert arr.dtype == SparseDtype(bool) + tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True])) + # Behavior change: np.asarray densifies. + # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) + tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32)) + + dense = arr.to_dense() + assert dense.dtype == bool + tm.assert_numpy_array_equal(dense, data) + + def test_constructor_bool_fill_value(self): + arr = SparseArray([True, False, True], dtype=None) + assert arr.dtype == SparseDtype(np.bool_) + assert not arr.fill_value + + arr = SparseArray([True, False, True], dtype=np.bool_) + assert arr.dtype == SparseDtype(np.bool_) + assert not arr.fill_value + + arr = SparseArray([True, False, True], dtype=np.bool_, fill_value=True) + assert arr.dtype == SparseDtype(np.bool_, True) + assert arr.fill_value + + def test_constructor_float32(self): + # GH 10648 + data = np.array([1.0, np.nan, 3], dtype=np.float32) + arr = SparseArray(data, dtype=np.float32) + + assert arr.dtype == SparseDtype(np.float32) + tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3], dtype=np.float32)) + # Behavior change: np.asarray densifies. + # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) + tm.assert_numpy_array_equal( + arr.sp_index.indices, np.array([0, 2], dtype=np.int32) + ) + + dense = arr.to_dense() + assert dense.dtype == np.float32 + tm.assert_numpy_array_equal(dense, data) + + def test_astype(self): + # float -> float + arr = SparseArray([None, None, 0, 2]) + result = arr.astype("Sparse[float32]") + expected = SparseArray([None, None, 0, 2], dtype=np.dtype("float32")) + tm.assert_sp_array_equal(result, expected) + + dtype = SparseDtype("float64", fill_value=0) + result = arr.astype(dtype) + expected = SparseArray._simple_new( + np.array([0.0, 2.0], dtype=dtype.subtype), IntIndex(4, [2, 3]), dtype + ) + tm.assert_sp_array_equal(result, expected) + + dtype = SparseDtype("int64", 0) + result = arr.astype(dtype) + expected = SparseArray._simple_new( + np.array([0, 2], dtype=np.int64), IntIndex(4, [2, 3]), dtype + ) + tm.assert_sp_array_equal(result, expected) + + arr = SparseArray([0, np.nan, 0, 1], fill_value=0) + with pytest.raises(ValueError, match="NA"): + arr.astype("Sparse[i8]") + + def test_astype_bool(self): + a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0)) + result = a.astype(bool) + expected = SparseArray( + [True, False, False, True], dtype=SparseDtype(bool, False) + ) + tm.assert_sp_array_equal(result, expected) + + # update fill value + result = a.astype(SparseDtype(bool, False)) + expected = SparseArray( + [True, False, False, True], dtype=SparseDtype(bool, False) + ) + tm.assert_sp_array_equal(result, expected) + + def test_astype_all(self, any_real_numpy_dtype): + vals = np.array([1, 2, 3]) + arr = SparseArray(vals, fill_value=1) + typ = np.dtype(any_real_numpy_dtype) + res = arr.astype(typ) + assert res.dtype == SparseDtype(typ, 1) + assert res.sp_values.dtype == typ + + tm.assert_numpy_array_equal(np.asarray(res.to_dense()), vals.astype(typ)) + + @pytest.mark.parametrize( + "arr, dtype, expected", + [ + ( + SparseArray([0, 1]), + "float", + SparseArray([0.0, 1.0], dtype=SparseDtype(float, 0.0)), + ), + (SparseArray([0, 1]), bool, SparseArray([False, True])), + ( + SparseArray([0, 1], fill_value=1), + bool, + SparseArray([False, True], dtype=SparseDtype(bool, True)), + ), + pytest.param( + SparseArray([0, 1]), + "datetime64[ns]", + SparseArray( + np.array([0, 1], dtype="datetime64[ns]"), + dtype=SparseDtype("datetime64[ns]", pd.Timestamp("1970")), + ), + marks=[pytest.mark.xfail(reason="NumPy-7619")], + ), + ( + SparseArray([0, 1, 10]), + str, + SparseArray(["0", "1", "10"], dtype=SparseDtype(str, "0")), + ), + (SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])), + ( + SparseArray([0, 1, 0]), + object, + SparseArray([0, 1, 0], dtype=SparseDtype(object, 0)), + ), + ], + ) + def test_astype_more(self, arr, dtype, expected): + result = arr.astype(dtype) + tm.assert_sp_array_equal(result, expected) + + def test_astype_nan_raises(self): + arr = SparseArray([1.0, np.nan]) + with pytest.raises(ValueError, match="Cannot convert non-finite"): + arr.astype(int) + + def test_astype_copy_false(self): + # GH#34456 bug caused by using .view instead of .astype in astype_nansafe + arr = SparseArray([1, 2, 3]) + + result = arr.astype(float, copy=False) + expected = SparseArray([1.0, 2.0, 3.0], fill_value=0.0) + tm.assert_sp_array_equal(result, expected) + + def test_set_fill_value(self): + arr = SparseArray([1.0, np.nan, 2.0], fill_value=np.nan) + arr.fill_value = 2 + assert arr.fill_value == 2 + + arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64) + arr.fill_value = 2 + assert arr.fill_value == 2 + + # XXX: this seems fine? You can construct an integer + # sparsearray with NaN fill value, why not update one? + # coerces to int + # msg = "unable to set fill_value 3\\.1 to int64 dtype" + # with pytest.raises(ValueError, match=msg): + arr.fill_value = 3.1 + assert arr.fill_value == 3.1 + + # msg = "unable to set fill_value nan to int64 dtype" + # with pytest.raises(ValueError, match=msg): + arr.fill_value = np.nan + assert np.isnan(arr.fill_value) + + arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_) + arr.fill_value = True + assert arr.fill_value + + # coerces to bool + # XXX: we can construct an sparse array of bool + # type and use as fill_value any value + # msg = "fill_value must be True, False or nan" + # with pytest.raises(ValueError, match=msg): + # arr.fill_value = 0 + + # msg = "unable to set fill_value nan to bool dtype" + # with pytest.raises(ValueError, match=msg): + arr.fill_value = np.nan + assert np.isnan(arr.fill_value) + + @pytest.mark.parametrize("val", [[1, 2, 3], np.array([1, 2]), (1, 2, 3)]) + def test_set_fill_invalid_non_scalar(self, val): + arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_) + msg = "fill_value must be a scalar" + + with pytest.raises(ValueError, match=msg): + arr.fill_value = val + + def test_copy(self): + arr2 = self.arr.copy() + assert arr2.sp_values is not self.arr.sp_values + assert arr2.sp_index is self.arr.sp_index + + def test_values_asarray(self): + tm.assert_almost_equal(self.arr.to_dense(), self.arr_data) + + @pytest.mark.parametrize( + "data,shape,dtype", + [ + ([0, 0, 0, 0, 0], (5,), None), + ([], (0,), None), + ([0], (1,), None), + (["A", "A", np.nan, "B"], (4,), object), + ], + ) + def test_shape(self, data, shape, dtype): + # GH 21126 + out = SparseArray(data, dtype=dtype) + assert out.shape == shape + + @pytest.mark.parametrize( + "vals", + [ + [np.nan, np.nan, np.nan, np.nan, np.nan], + [1, np.nan, np.nan, 3, np.nan], + [1, np.nan, 0, 3, 0], + ], + ) + @pytest.mark.parametrize("fill_value", [None, 0]) + def test_dense_repr(self, vals, fill_value): + vals = np.array(vals) + arr = SparseArray(vals, fill_value=fill_value) + + res = arr.to_dense() + tm.assert_numpy_array_equal(res, vals) + + res2 = arr._internal_get_values() + + tm.assert_numpy_array_equal(res2, vals) + + def test_getitem(self): + def _checkit(i): + tm.assert_almost_equal(self.arr[i], self.arr.to_dense()[i]) + + for i in range(len(self.arr)): + _checkit(i) + _checkit(-i) + + def test_getitem_arraylike_mask(self): + arr = SparseArray([0, 1, 2]) + result = arr[[True, False, True]] + expected = SparseArray([0, 2]) + tm.assert_sp_array_equal(result, expected) + + @pytest.mark.parametrize( + "slc", + [ + np.s_[:], + np.s_[1:10], + np.s_[1:100], + np.s_[10:1], + np.s_[:-3], + np.s_[-5:-4], + np.s_[:-12], + np.s_[-12:], + np.s_[2:], + np.s_[2::3], + np.s_[::2], + np.s_[::-1], + np.s_[::-2], + np.s_[1:6:2], + np.s_[:-6:-2], + ], + ) + @pytest.mark.parametrize( + "as_dense", [[np.nan] * 10, [1] * 10, [np.nan] * 5 + [1] * 5, []] + ) + def test_getslice(self, slc, as_dense): + as_dense = np.array(as_dense) + arr = SparseArray(as_dense) + + result = arr[slc] + expected = SparseArray(as_dense[slc]) + + tm.assert_sp_array_equal(result, expected) + + def test_getslice_tuple(self): + dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0]) + + sparse = SparseArray(dense) + res = sparse[(slice(4, None),)] + exp = SparseArray(dense[4:]) + tm.assert_sp_array_equal(res, exp) + + sparse = SparseArray(dense, fill_value=0) + res = sparse[(slice(4, None),)] + exp = SparseArray(dense[4:], fill_value=0) + tm.assert_sp_array_equal(res, exp) + + msg = "too many indices for array" + with pytest.raises(IndexError, match=msg): + sparse[4:, :] + + with pytest.raises(IndexError, match=msg): + # check numpy compat + dense[4:, :] + + def test_boolean_slice_empty(self): + arr = SparseArray([0, 1, 2]) + res = arr[[False, False, False]] + assert res.dtype == arr.dtype + + def test_neg_operator(self): + arr = SparseArray([-1, -2, np.nan, 3], fill_value=np.nan, dtype=np.int8) + res = -arr + exp = SparseArray([1, 2, np.nan, -3], fill_value=np.nan, dtype=np.int8) + tm.assert_sp_array_equal(exp, res) + + arr = SparseArray([-1, -2, 1, 3], fill_value=-1, dtype=np.int8) + res = -arr + exp = SparseArray([1, 2, -1, -3], fill_value=1, dtype=np.int8) + tm.assert_sp_array_equal(exp, res) + + def test_abs_operator(self): + arr = SparseArray([-1, -2, np.nan, 3], fill_value=np.nan, dtype=np.int8) + res = abs(arr) + exp = SparseArray([1, 2, np.nan, 3], fill_value=np.nan, dtype=np.int8) + tm.assert_sp_array_equal(exp, res) + + arr = SparseArray([-1, -2, 1, 3], fill_value=-1, dtype=np.int8) + res = abs(arr) + exp = SparseArray([1, 2, 1, 3], fill_value=1, dtype=np.int8) + tm.assert_sp_array_equal(exp, res) + + def test_invert_operator(self): + arr = SparseArray([False, True, False, True], fill_value=False, dtype=np.bool8) + res = ~arr + exp = SparseArray( + np.invert([False, True, False, True]), fill_value=True, dtype=np.bool8 + ) + res = ~arr + tm.assert_sp_array_equal(exp, res) + + arr = SparseArray([0, 1, 0, 2, 3, 0], fill_value=0, dtype=np.int32) + res = ~arr + exp = SparseArray([-1, -2, -1, -3, -4, -1], fill_value=-1, dtype=np.int32) + + @pytest.mark.parametrize("op", ["add", "sub", "mul", "truediv", "floordiv", "pow"]) + def test_binary_operators(self, op): + op = getattr(operator, op) + data1 = np.random.randn(20) + data2 = np.random.randn(20) + + data1[::2] = np.nan + data2[::3] = np.nan + + arr1 = SparseArray(data1) + arr2 = SparseArray(data2) + + data1[::2] = 3 + data2[::3] = 3 + farr1 = SparseArray(data1, fill_value=3) + farr2 = SparseArray(data2, fill_value=3) + + def _check_op(op, first, second): + res = op(first, second) + exp = SparseArray( + op(first.to_dense(), second.to_dense()), fill_value=first.fill_value + ) + assert isinstance(res, SparseArray) + tm.assert_almost_equal(res.to_dense(), exp.to_dense()) + + res2 = op(first, second.to_dense()) + assert isinstance(res2, SparseArray) + tm.assert_sp_array_equal(res, res2) + + res3 = op(first.to_dense(), second) + assert isinstance(res3, SparseArray) + tm.assert_sp_array_equal(res, res3) + + res4 = op(first, 4) + assert isinstance(res4, SparseArray) + + # Ignore this if the actual op raises (e.g. pow). + try: + exp = op(first.to_dense(), 4) + exp_fv = op(first.fill_value, 4) + except ValueError: + pass + else: + tm.assert_almost_equal(res4.fill_value, exp_fv) + tm.assert_almost_equal(res4.to_dense(), exp) + + with np.errstate(all="ignore"): + for first_arr, second_arr in [(arr1, arr2), (farr1, farr2)]: + _check_op(op, first_arr, second_arr) + + def test_pickle(self): + def _check_roundtrip(obj): + unpickled = tm.round_trip_pickle(obj) + tm.assert_sp_array_equal(unpickled, obj) + + _check_roundtrip(self.arr) + _check_roundtrip(self.zarr) + + def test_generator_warnings(self): + sp_arr = SparseArray([1, 2, 3]) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings(action="always", category=DeprecationWarning) + warnings.filterwarnings(action="always", category=PendingDeprecationWarning) + for _ in sp_arr: + pass + assert len(w) == 0 + + def test_fillna(self): + s = SparseArray([1, np.nan, np.nan, 3, np.nan]) + res = s.fillna(-1) + exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) + res = s.fillna(-1) + exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([1, np.nan, 0, 3, 0]) + res = s.fillna(-1) + exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0) + res = s.fillna(-1) + exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([np.nan, np.nan, np.nan, np.nan]) + res = s.fillna(-1) + exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0) + res = s.fillna(-1) + exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + # float dtype's fill_value is np.nan, replaced by -1 + s = SparseArray([0.0, 0.0, 0.0, 0.0]) + res = s.fillna(-1) + exp = SparseArray([0.0, 0.0, 0.0, 0.0], fill_value=-1) + tm.assert_sp_array_equal(res, exp) + + # int dtype shouldn't have missing. No changes. + s = SparseArray([0, 0, 0, 0]) + assert s.dtype == SparseDtype(np.int64) + assert s.fill_value == 0 + res = s.fillna(-1) + tm.assert_sp_array_equal(res, s) + + s = SparseArray([0, 0, 0, 0], fill_value=0) + assert s.dtype == SparseDtype(np.int64) + assert s.fill_value == 0 + res = s.fillna(-1) + exp = SparseArray([0, 0, 0, 0], fill_value=0) + tm.assert_sp_array_equal(res, exp) + + # fill_value can be nan if there is no missing hole. + # only fill_value will be changed + s = SparseArray([0, 0, 0, 0], fill_value=np.nan) + assert s.dtype == SparseDtype(np.int64, fill_value=np.nan) + assert np.isnan(s.fill_value) + res = s.fillna(-1) + exp = SparseArray([0, 0, 0, 0], fill_value=-1) + tm.assert_sp_array_equal(res, exp) + + def test_fillna_overlap(self): + s = SparseArray([1, np.nan, np.nan, 3, np.nan]) + # filling with existing value doesn't replace existing value with + # fill_value, i.e. existing 3 remains in sp_values + res = s.fillna(3) + exp = np.array([1, 3, 3, 3, 3], dtype=np.float64) + tm.assert_numpy_array_equal(res.to_dense(), exp) + + s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) + res = s.fillna(3) + exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + def test_nonzero(self): + # Tests regression #21172. + sa = SparseArray([float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) + expected = np.array([2, 5, 9], dtype=np.int32) + (result,) = sa.nonzero() + tm.assert_numpy_array_equal(expected, result) + + sa = SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) + (result,) = sa.nonzero() + tm.assert_numpy_array_equal(expected, result) + + +class TestSparseArrayAnalytics: + @pytest.mark.parametrize( + "data,pos,neg", + [ + ([True, True, True], True, False), + ([1, 2, 1], 1, 0), + ([1.0, 2.0, 1.0], 1.0, 0.0), + ], + ) + def test_all(self, data, pos, neg): + # GH 17570 + out = SparseArray(data).all() + assert out + + out = SparseArray(data, fill_value=pos).all() + assert out + + data[1] = neg + out = SparseArray(data).all() + assert not out + + out = SparseArray(data, fill_value=pos).all() + assert not out + + @pytest.mark.parametrize( + "data,pos,neg", + [ + ([True, True, True], True, False), + ([1, 2, 1], 1, 0), + ([1.0, 2.0, 1.0], 1.0, 0.0), + ], + ) + def test_numpy_all(self, data, pos, neg): + # GH 17570 + out = np.all(SparseArray(data)) + assert out + + out = np.all(SparseArray(data, fill_value=pos)) + assert out + + data[1] = neg + out = np.all(SparseArray(data)) + assert not out + + out = np.all(SparseArray(data, fill_value=pos)) + assert not out + + # raises with a different message on py2. + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.all(SparseArray(data), out=np.array([])) + + @pytest.mark.parametrize( + "data,pos,neg", + [ + ([False, True, False], True, False), + ([0, 2, 0], 2, 0), + ([0.0, 2.0, 0.0], 2.0, 0.0), + ], + ) + def test_any(self, data, pos, neg): + # GH 17570 + out = SparseArray(data).any() + assert out + + out = SparseArray(data, fill_value=pos).any() + assert out + + data[1] = neg + out = SparseArray(data).any() + assert not out + + out = SparseArray(data, fill_value=pos).any() + assert not out + + @pytest.mark.parametrize( + "data,pos,neg", + [ + ([False, True, False], True, False), + ([0, 2, 0], 2, 0), + ([0.0, 2.0, 0.0], 2.0, 0.0), + ], + ) + def test_numpy_any(self, data, pos, neg): + # GH 17570 + out = np.any(SparseArray(data)) + assert out + + out = np.any(SparseArray(data, fill_value=pos)) + assert out + + data[1] = neg + out = np.any(SparseArray(data)) + assert not out + + out = np.any(SparseArray(data, fill_value=pos)) + assert not out + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.any(SparseArray(data), out=out) + + def test_sum(self): + data = np.arange(10).astype(float) + out = SparseArray(data).sum() + assert out == 45.0 + + data[5] = np.nan + out = SparseArray(data, fill_value=2).sum() + assert out == 40.0 + + out = SparseArray(data, fill_value=np.nan).sum() + assert out == 40.0 + + @pytest.mark.parametrize( + "arr", + [np.array([0, 1, np.nan, 1]), np.array([0, 1, 1])], + ) + @pytest.mark.parametrize("fill_value", [0, 1, np.nan]) + @pytest.mark.parametrize("min_count, expected", [(3, 2), (4, np.nan)]) + def test_sum_min_count(self, arr, fill_value, min_count, expected): + # https://github.com/pandas-dev/pandas/issues/25777 + sparray = SparseArray(arr, fill_value=fill_value) + result = sparray.sum(min_count=min_count) + if np.isnan(expected): + assert np.isnan(result) + else: + assert result == expected + + def test_bool_sum_min_count(self): + spar_bool = pd.arrays.SparseArray( + [False, True] * 5, dtype=np.bool8, fill_value=True + ) + res = spar_bool.sum(min_count=1) + assert res == 5 + res = spar_bool.sum(min_count=11) + assert isna(res) + + def test_numpy_sum(self): + data = np.arange(10).astype(float) + out = np.sum(SparseArray(data)) + assert out == 45.0 + + data[5] = np.nan + out = np.sum(SparseArray(data, fill_value=2)) + assert out == 40.0 + + out = np.sum(SparseArray(data, fill_value=np.nan)) + assert out == 40.0 + + msg = "the 'dtype' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.sum(SparseArray(data), dtype=np.int64) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.sum(SparseArray(data), out=out) + + @pytest.mark.parametrize( + "data,expected", + [ + ( + np.array([1, 2, 3, 4, 5], dtype=float), # non-null data + SparseArray(np.array([1.0, 3.0, 6.0, 10.0, 15.0])), + ), + ( + np.array([1, 2, np.nan, 4, 5], dtype=float), # null data + SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0])), + ), + ], + ) + @pytest.mark.parametrize("numpy", [True, False]) + def test_cumsum(self, data, expected, numpy): + cumsum = np.cumsum if numpy else lambda s: s.cumsum() + + out = cumsum(SparseArray(data)) + tm.assert_sp_array_equal(out, expected) + + out = cumsum(SparseArray(data, fill_value=np.nan)) + tm.assert_sp_array_equal(out, expected) + + out = cumsum(SparseArray(data, fill_value=2)) + tm.assert_sp_array_equal(out, expected) + + if numpy: # numpy compatibility checks. + msg = "the 'dtype' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.cumsum(SparseArray(data), dtype=np.int64) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.cumsum(SparseArray(data), out=out) + else: + axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid. + msg = re.escape(f"axis(={axis}) out of bounds") + with pytest.raises(ValueError, match=msg): + SparseArray(data).cumsum(axis=axis) + + def test_mean(self): + data = np.arange(10).astype(float) + out = SparseArray(data).mean() + assert out == 4.5 + + data[5] = np.nan + out = SparseArray(data).mean() + assert out == 40.0 / 9 + + def test_numpy_mean(self): + data = np.arange(10).astype(float) + out = np.mean(SparseArray(data)) + assert out == 4.5 + + data[5] = np.nan + out = np.mean(SparseArray(data)) + assert out == 40.0 / 9 + + msg = "the 'dtype' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.mean(SparseArray(data), dtype=np.int64) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.mean(SparseArray(data), out=out) + + def test_ufunc(self): + # GH 13853 make sure ufunc is applied to fill_value + sparse = SparseArray([1, np.nan, 2, np.nan, -2]) + result = SparseArray([1, np.nan, 2, np.nan, 2]) + tm.assert_sp_array_equal(abs(sparse), result) + tm.assert_sp_array_equal(np.abs(sparse), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=1) + result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index, fill_value=1) + tm.assert_sp_array_equal(abs(sparse), result) + tm.assert_sp_array_equal(np.abs(sparse), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=-1) + exp = SparseArray([1, 1, 2, 2], fill_value=1) + tm.assert_sp_array_equal(abs(sparse), exp) + tm.assert_sp_array_equal(np.abs(sparse), exp) + + sparse = SparseArray([1, np.nan, 2, np.nan, -2]) + result = SparseArray(np.sin([1, np.nan, 2, np.nan, -2])) + tm.assert_sp_array_equal(np.sin(sparse), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=1) + result = SparseArray(np.sin([1, -1, 2, -2]), fill_value=np.sin(1)) + tm.assert_sp_array_equal(np.sin(sparse), result) + + sparse = SparseArray([1, -1, 0, -2], fill_value=0) + result = SparseArray(np.sin([1, -1, 0, -2]), fill_value=np.sin(0)) + tm.assert_sp_array_equal(np.sin(sparse), result) + + def test_ufunc_args(self): + # GH 13853 make sure ufunc is applied to fill_value, including its arg + sparse = SparseArray([1, np.nan, 2, np.nan, -2]) + result = SparseArray([2, np.nan, 3, np.nan, -1]) + tm.assert_sp_array_equal(np.add(sparse, 1), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=1) + result = SparseArray([2, 0, 3, -1], fill_value=2) + tm.assert_sp_array_equal(np.add(sparse, 1), result) + + sparse = SparseArray([1, -1, 0, -2], fill_value=0) + result = SparseArray([2, 0, 1, -1], fill_value=1) + tm.assert_sp_array_equal(np.add(sparse, 1), result) + + @pytest.mark.parametrize("fill_value", [0.0, np.nan]) + def test_modf(self, fill_value): + # https://github.com/pandas-dev/pandas/issues/26946 + sparse = SparseArray([fill_value] * 10 + [1.1, 2.2], fill_value=fill_value) + r1, r2 = np.modf(sparse) + e1, e2 = np.modf(np.asarray(sparse)) + tm.assert_sp_array_equal(r1, SparseArray(e1, fill_value=fill_value)) + tm.assert_sp_array_equal(r2, SparseArray(e2, fill_value=fill_value)) + + def test_nbytes_integer(self): + arr = SparseArray([1, 0, 0, 0, 2], kind="integer") + result = arr.nbytes + # (2 * 8) + 2 * 4 + assert result == 24 + + def test_nbytes_block(self): + arr = SparseArray([1, 2, 0, 0, 0], kind="block") + result = arr.nbytes + # (2 * 8) + 4 + 4 + # sp_values, blocs, blengths + assert result == 24 + + def test_asarray_datetime64(self): + s = SparseArray(pd.to_datetime(["2012", None, None, "2013"])) + np.asarray(s) + + def test_density(self): + arr = SparseArray([0, 1]) + assert arr.density == 0.5 + + def test_npoints(self): + arr = SparseArray([0, 1]) + assert arr.npoints == 1 + + +class TestAccessor: + @pytest.mark.parametrize("attr", ["npoints", "density", "fill_value", "sp_values"]) + def test_get_attributes(self, attr): + arr = SparseArray([0, 1]) + ser = pd.Series(arr) + + result = getattr(ser.sparse, attr) + expected = getattr(arr, attr) + assert result == expected + + @td.skip_if_no_scipy + def test_from_coo(self): + import scipy.sparse + + row = [0, 3, 1, 0] + col = [0, 3, 1, 2] + data = [4, 5, 7, 9] + # TODO(scipy#13585): Remove dtype when scipy is fixed + # https://github.com/scipy/scipy/issues/13585 + sp_array = scipy.sparse.coo_matrix((data, (row, col)), dtype="int") + result = pd.Series.sparse.from_coo(sp_array) + + index = pd.MultiIndex.from_arrays([[0, 0, 1, 3], [0, 2, 1, 3]]) + expected = pd.Series([4, 9, 7, 5], index=index, dtype="Sparse[int]") + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + @pytest.mark.parametrize( + "sort_labels, expected_rows, expected_cols, expected_values_pos", + [ + ( + False, + [("b", 2), ("a", 2), ("b", 1), ("a", 1)], + [("z", 1), ("z", 2), ("x", 2), ("z", 0)], + {1: (1, 0), 3: (3, 3)}, + ), + ( + True, + [("a", 1), ("a", 2), ("b", 1), ("b", 2)], + [("x", 2), ("z", 0), ("z", 1), ("z", 2)], + {1: (1, 2), 3: (0, 1)}, + ), + ], + ) + def test_to_coo( + self, sort_labels, expected_rows, expected_cols, expected_values_pos + ): + import scipy.sparse + + values = SparseArray([0, np.nan, 1, 0, None, 3], fill_value=0) + index = pd.MultiIndex.from_tuples( + [ + ("b", 2, "z", 1), + ("a", 2, "z", 2), + ("a", 2, "z", 1), + ("a", 2, "x", 2), + ("b", 1, "z", 1), + ("a", 1, "z", 0), + ] + ) + ss = pd.Series(values, index=index) + + expected_A = np.zeros((4, 4)) + for value, (row, col) in expected_values_pos.items(): + expected_A[row, col] = value + + A, rows, cols = ss.sparse.to_coo( + row_levels=(0, 1), column_levels=(2, 3), sort_labels=sort_labels + ) + assert isinstance(A, scipy.sparse.coo_matrix) + tm.assert_numpy_array_equal(A.toarray(), expected_A) + assert rows == expected_rows + assert cols == expected_cols + + def test_non_sparse_raises(self): + ser = pd.Series([1, 2, 3]) + with pytest.raises(AttributeError, match=".sparse"): + ser.sparse.density + + +def test_setting_fill_value_fillna_still_works(): + # This is why letting users update fill_value / dtype is bad + # astype has the same problem. + arr = SparseArray([1.0, np.nan, 1.0], fill_value=0.0) + arr.fill_value = np.nan + result = arr.isna() + # Can't do direct comparison, since the sp_index will be different + # So let's convert to ndarray and check there. + result = np.asarray(result) + + expected = np.array([False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + +def test_setting_fill_value_updates(): + arr = SparseArray([0.0, np.nan], fill_value=0) + arr.fill_value = np.nan + # use private constructor to get the index right + # otherwise both nans would be un-stored. + expected = SparseArray._simple_new( + sparse_array=np.array([np.nan]), + sparse_index=IntIndex(2, [1]), + dtype=SparseDtype(float, np.nan), + ) + tm.assert_sp_array_equal(arr, expected) + + +@pytest.mark.parametrize( + "arr, loc", + [ + ([None, 1, 2], 0), + ([0, None, 2], 1), + ([0, 1, None], 2), + ([0, 1, 1, None, None], 3), + ([1, 1, 1, 2], -1), + ([], -1), + ], +) +def test_first_fill_value_loc(arr, loc): + result = SparseArray(arr)._first_fill_value_loc() + assert result == loc + + +@pytest.mark.parametrize( + "arr", [[1, 2, np.nan, np.nan], [1, np.nan, 2, np.nan], [1, 2, np.nan]] +) +@pytest.mark.parametrize("fill_value", [np.nan, 0, 1]) +def test_unique_na_fill(arr, fill_value): + a = SparseArray(arr, fill_value=fill_value).unique() + b = pd.Series(arr).unique() + assert isinstance(a, SparseArray) + a = np.asarray(a) + tm.assert_numpy_array_equal(a, b) + + +def test_unique_all_sparse(): + # https://github.com/pandas-dev/pandas/issues/23168 + arr = SparseArray([0, 0]) + result = arr.unique() + expected = SparseArray([0]) + tm.assert_sp_array_equal(result, expected) + + +def test_map(): + arr = SparseArray([0, 1, 2]) + expected = SparseArray([10, 11, 12], fill_value=10) + + # dict + result = arr.map({0: 10, 1: 11, 2: 12}) + tm.assert_sp_array_equal(result, expected) + + # series + result = arr.map(pd.Series({0: 10, 1: 11, 2: 12})) + tm.assert_sp_array_equal(result, expected) + + # function + result = arr.map(pd.Series({0: 10, 1: 11, 2: 12})) + expected = SparseArray([10, 11, 12], fill_value=10) + tm.assert_sp_array_equal(result, expected) + + +def test_map_missing(): + arr = SparseArray([0, 1, 2]) + expected = SparseArray([10, 11, None], fill_value=10) + + result = arr.map({0: 10, 1: 11}) + tm.assert_sp_array_equal(result, expected) + + +@pytest.mark.parametrize("fill_value", [np.nan, 1]) +def test_dropna(fill_value): + # GH-28287 + arr = SparseArray([np.nan, 1], fill_value=fill_value) + exp = SparseArray([1.0], fill_value=fill_value) + tm.assert_sp_array_equal(arr.dropna(), exp) + + df = pd.DataFrame({"a": [0, 1], "b": arr}) + expected_df = pd.DataFrame({"a": [1], "b": exp}, index=Int64Index([1])) + tm.assert_equal(df.dropna(), expected_df) + + +def test_drop_duplicates_fill_value(): + # GH 11726 + df = pd.DataFrame(np.zeros((5, 5))).apply(lambda x: SparseArray(x, fill_value=0)) + result = df.drop_duplicates() + expected = pd.DataFrame({i: SparseArray([0.0], fill_value=0) for i in range(5)}) + tm.assert_frame_equal(result, expected) + + +class TestMinMax: + @pytest.mark.parametrize( + "raw_data,max_expected,min_expected", + [ + (np.arange(5.0), [4], [0]), + (-np.arange(5.0), [0], [-4]), + (np.array([0, 1, 2, np.nan, 4]), [4], [0]), + (np.array([np.nan] * 5), [np.nan], [np.nan]), + (np.array([]), [np.nan], [np.nan]), + ], + ) + def test_nan_fill_value(self, raw_data, max_expected, min_expected): + arr = SparseArray(raw_data) + max_result = arr.max() + min_result = arr.min() + assert max_result in max_expected + assert min_result in min_expected + + max_result = arr.max(skipna=False) + min_result = arr.min(skipna=False) + if np.isnan(raw_data).any(): + assert np.isnan(max_result) + assert np.isnan(min_result) + else: + assert max_result in max_expected + assert min_result in min_expected + + @pytest.mark.parametrize( + "fill_value,max_expected,min_expected", + [ + (100, 100, 0), + (-100, 1, -100), + ], + ) + def test_fill_value(self, fill_value, max_expected, min_expected): + arr = SparseArray( + np.array([fill_value, 0, 1]), dtype=SparseDtype("int", fill_value) + ) + max_result = arr.max() + assert max_result == max_expected + + min_result = arr.min() + assert min_result == min_expected + + def test_only_fill_value(self): + fv = 100 + arr = SparseArray(np.array([fv, fv, fv]), dtype=SparseDtype("int", fv)) + assert len(arr._valid_sp_values) == 0 + + assert arr.max() == fv + assert arr.min() == fv + assert arr.max(skipna=False) == fv + assert arr.min(skipna=False) == fv + + @pytest.mark.parametrize("func", ["min", "max"]) + @pytest.mark.parametrize("data", [np.array([]), np.array([np.nan, np.nan])]) + @pytest.mark.parametrize( + "dtype,expected", + [ + (SparseDtype(np.float64, np.nan), np.nan), + (SparseDtype(np.float64, 5.0), np.nan), + (SparseDtype("datetime64[ns]", pd.NaT), pd.NaT), + (SparseDtype("datetime64[ns]", pd.to_datetime("2018-05-05")), pd.NaT), + ], + ) + def test_na_value_if_no_valid_values(self, func, data, dtype, expected): + arr = SparseArray(data, dtype=dtype) + result = getattr(arr, func)() + if expected is pd.NaT: + # TODO: pin down whether we wrap datetime64("NaT") + assert result is pd.NaT or np.isnat(result) + else: + assert np.isnan(result) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_combine_concat.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_combine_concat.py new file mode 100644 index 0000000..0f09af2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_combine_concat.py @@ -0,0 +1,62 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.sparse import SparseArray + + +class TestSparseArrayConcat: + @pytest.mark.parametrize("kind", ["integer", "block"]) + def test_basic(self, kind): + a = SparseArray([1, 0, 0, 2], kind=kind) + b = SparseArray([1, 0, 2, 2], kind=kind) + + result = SparseArray._concat_same_type([a, b]) + # Can't make any assertions about the sparse index itself + # since we aren't don't merge sparse blocs across arrays + # in to_concat + expected = np.array([1, 2, 1, 2, 2], dtype="int64") + tm.assert_numpy_array_equal(result.sp_values, expected) + assert result.kind == kind + + @pytest.mark.parametrize("kind", ["integer", "block"]) + def test_uses_first_kind(self, kind): + other = "integer" if kind == "block" else "block" + a = SparseArray([1, 0, 0, 2], kind=kind) + b = SparseArray([1, 0, 2, 2], kind=other) + + result = SparseArray._concat_same_type([a, b]) + expected = np.array([1, 2, 1, 2, 2], dtype="int64") + tm.assert_numpy_array_equal(result.sp_values, expected) + assert result.kind == kind + + +@pytest.mark.parametrize( + "other, expected_dtype", + [ + # compatible dtype -> preserve sparse + (pd.Series([3, 4, 5], dtype="int64"), pd.SparseDtype("int64", 0)), + # (pd.Series([3, 4, 5], dtype="Int64"), pd.SparseDtype("int64", 0)), + # incompatible dtype -> Sparse[common dtype] + (pd.Series([1.5, 2.5, 3.5], dtype="float64"), pd.SparseDtype("float64", 0)), + # incompatible dtype -> Sparse[object] dtype + (pd.Series(["a", "b", "c"], dtype=object), pd.SparseDtype(object, 0)), + # categorical with compatible categories -> dtype of the categories + (pd.Series([3, 4, 5], dtype="category"), np.dtype("int64")), + (pd.Series([1.5, 2.5, 3.5], dtype="category"), np.dtype("float64")), + # categorical with incompatible categories -> object dtype + (pd.Series(["a", "b", "c"], dtype="category"), np.dtype(object)), + ], +) +def test_concat_with_non_sparse(other, expected_dtype): + # https://github.com/pandas-dev/pandas/issues/34336 + s_sparse = pd.Series([1, 0, 2], dtype=pd.SparseDtype("int64", 0)) + + result = pd.concat([s_sparse, other], ignore_index=True) + expected = pd.Series(list(s_sparse) + list(other)).astype(expected_dtype) + tm.assert_series_equal(result, expected) + + result = pd.concat([other, s_sparse], ignore_index=True) + expected = pd.Series(list(other) + list(s_sparse)).astype(expected_dtype) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_dtype.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_dtype.py new file mode 100644 index 0000000..58fedbd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_dtype.py @@ -0,0 +1,209 @@ +import re + +import numpy as np +import pytest + +import pandas as pd +from pandas.core.arrays.sparse import SparseDtype + + +@pytest.mark.parametrize( + "dtype, fill_value", + [ + ("int", 0), + ("float", np.nan), + ("bool", False), + ("object", np.nan), + ("datetime64[ns]", np.datetime64("NaT", "ns")), + ("timedelta64[ns]", np.timedelta64("NaT", "ns")), + ], +) +def test_inferred_dtype(dtype, fill_value): + sparse_dtype = SparseDtype(dtype) + result = sparse_dtype.fill_value + if pd.isna(fill_value): + assert pd.isna(result) and type(result) == type(fill_value) + else: + assert result == fill_value + + +def test_from_sparse_dtype(): + dtype = SparseDtype("float", 0) + result = SparseDtype(dtype) + assert result.fill_value == 0 + + +def test_from_sparse_dtype_fill_value(): + dtype = SparseDtype("int", 1) + result = SparseDtype(dtype, fill_value=2) + expected = SparseDtype("int", 2) + assert result == expected + + +@pytest.mark.parametrize( + "dtype, fill_value", + [ + ("int", None), + ("float", None), + ("bool", None), + ("object", None), + ("datetime64[ns]", None), + ("timedelta64[ns]", None), + ("int", np.nan), + ("float", 0), + ], +) +def test_equal(dtype, fill_value): + a = SparseDtype(dtype, fill_value) + b = SparseDtype(dtype, fill_value) + assert a == b + assert b == a + + +def test_nans_equal(): + a = SparseDtype(float, float("nan")) + b = SparseDtype(float, np.nan) + assert a == b + assert b == a + + +@pytest.mark.parametrize( + "a, b", + [ + (SparseDtype("float64"), SparseDtype("float32")), + (SparseDtype("float64"), SparseDtype("float64", 0)), + (SparseDtype("float64"), SparseDtype("datetime64[ns]", np.nan)), + (SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)), + (SparseDtype("float64"), np.dtype("float64")), + ], +) +def test_not_equal(a, b): + assert a != b + + +def test_construct_from_string_raises(): + with pytest.raises( + TypeError, match="Cannot construct a 'SparseDtype' from 'not a dtype'" + ): + SparseDtype.construct_from_string("not a dtype") + + +@pytest.mark.parametrize( + "dtype, expected", + [ + (SparseDtype(int), True), + (SparseDtype(float), True), + (SparseDtype(bool), True), + (SparseDtype(object), False), + (SparseDtype(str), False), + ], +) +def test_is_numeric(dtype, expected): + assert dtype._is_numeric is expected + + +def test_str_uses_object(): + result = SparseDtype(str).subtype + assert result == np.dtype("object") + + +@pytest.mark.parametrize( + "string, expected", + [ + ("Sparse[float64]", SparseDtype(np.dtype("float64"))), + ("Sparse[float32]", SparseDtype(np.dtype("float32"))), + ("Sparse[int]", SparseDtype(np.dtype("int"))), + ("Sparse[str]", SparseDtype(np.dtype("str"))), + ("Sparse[datetime64[ns]]", SparseDtype(np.dtype("datetime64[ns]"))), + ("Sparse", SparseDtype(np.dtype("float"), np.nan)), + ], +) +def test_construct_from_string(string, expected): + result = SparseDtype.construct_from_string(string) + assert result == expected + + +@pytest.mark.parametrize( + "a, b, expected", + [ + (SparseDtype(float, 0.0), SparseDtype(np.dtype("float"), 0.0), True), + (SparseDtype(int, 0), SparseDtype(int, 0), True), + (SparseDtype(float, float("nan")), SparseDtype(float, np.nan), True), + (SparseDtype(float, 0), SparseDtype(float, np.nan), False), + (SparseDtype(int, 0.0), SparseDtype(float, 0.0), False), + ], +) +def test_hash_equal(a, b, expected): + result = a == b + assert result is expected + + result = hash(a) == hash(b) + assert result is expected + + +@pytest.mark.parametrize( + "string, expected", + [ + ("Sparse[int]", "int"), + ("Sparse[int, 0]", "int"), + ("Sparse[int64]", "int64"), + ("Sparse[int64, 0]", "int64"), + ("Sparse[datetime64[ns], 0]", "datetime64[ns]"), + ], +) +def test_parse_subtype(string, expected): + subtype, _ = SparseDtype._parse_subtype(string) + assert subtype == expected + + +@pytest.mark.parametrize( + "string", ["Sparse[int, 1]", "Sparse[float, 0.0]", "Sparse[bool, True]"] +) +def test_construct_from_string_fill_value_raises(string): + with pytest.raises(TypeError, match="fill_value in the string is not"): + SparseDtype.construct_from_string(string) + + +@pytest.mark.parametrize( + "original, dtype, expected", + [ + (SparseDtype(int, 0), float, SparseDtype(float, 0.0)), + (SparseDtype(int, 1), float, SparseDtype(float, 1.0)), + (SparseDtype(int, 1), str, SparseDtype(object, "1")), + (SparseDtype(float, 1.5), int, SparseDtype(int, 1)), + ], +) +def test_update_dtype(original, dtype, expected): + result = original.update_dtype(dtype) + assert result == expected + + +@pytest.mark.parametrize( + "original, dtype, expected_error_msg", + [ + ( + SparseDtype(float, np.nan), + int, + re.escape("Cannot convert non-finite values (NA or inf) to integer"), + ), + ( + SparseDtype(str, "abc"), + int, + re.escape("invalid literal for int() with base 10: 'abc'"), + ), + ], +) +def test_update_dtype_raises(original, dtype, expected_error_msg): + with pytest.raises(ValueError, match=expected_error_msg): + original.update_dtype(dtype) + + +def test_repr(): + # GH-34352 + result = str(SparseDtype("int64", fill_value=0)) + expected = "Sparse[int64, 0]" + assert result == expected + + result = str(SparseDtype(object, fill_value="0")) + expected = "Sparse[object, '0']" + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_libsparse.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_libsparse.py new file mode 100644 index 0000000..527ae3c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/sparse/test_libsparse.py @@ -0,0 +1,618 @@ +import operator + +import numpy as np +import pytest + +import pandas._libs.sparse as splib +import pandas.util._test_decorators as td + +from pandas import Series +import pandas._testing as tm +from pandas.core.arrays.sparse import ( + BlockIndex, + IntIndex, + make_sparse_index, +) + +TEST_LENGTH = 20 + +plain_case = { + "xloc": [0, 7, 15], + "xlen": [3, 5, 5], + "yloc": [2, 9, 14], + "ylen": [2, 3, 5], + "intersect_loc": [2, 9, 15], + "intersect_len": [1, 3, 4], +} +delete_blocks = { + "xloc": [0, 5], + "xlen": [4, 4], + "yloc": [1], + "ylen": [4], + "intersect_loc": [1], + "intersect_len": [3], +} +split_blocks = { + "xloc": [0], + "xlen": [10], + "yloc": [0, 5], + "ylen": [3, 7], + "intersect_loc": [0, 5], + "intersect_len": [3, 5], +} +skip_block = { + "xloc": [10], + "xlen": [5], + "yloc": [0, 12], + "ylen": [5, 3], + "intersect_loc": [12], + "intersect_len": [3], +} + +no_intersect = { + "xloc": [0, 10], + "xlen": [4, 6], + "yloc": [5, 17], + "ylen": [4, 2], + "intersect_loc": [], + "intersect_len": [], +} + + +def check_cases(_check_case): + def _check_case_dict(case): + _check_case( + case["xloc"], + case["xlen"], + case["yloc"], + case["ylen"], + case["intersect_loc"], + case["intersect_len"], + ) + + _check_case_dict(plain_case) + _check_case_dict(delete_blocks) + _check_case_dict(split_blocks) + _check_case_dict(skip_block) + _check_case_dict(no_intersect) + + # one or both is empty + _check_case([0], [5], [], [], [], []) + _check_case([], [], [], [], [], []) + + +class TestSparseIndexUnion: + def test_index_make_union(self): + def _check_case(xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + bresult = xindex.make_union(yindex) + assert isinstance(bresult, BlockIndex) + tm.assert_numpy_array_equal(bresult.blocs, np.array(eloc, dtype=np.int32)) + tm.assert_numpy_array_equal( + bresult.blengths, np.array(elen, dtype=np.int32) + ) + + ixindex = xindex.to_int_index() + iyindex = yindex.to_int_index() + iresult = ixindex.make_union(iyindex) + assert isinstance(iresult, IntIndex) + tm.assert_numpy_array_equal(iresult.indices, bresult.to_int_index().indices) + + """ + x: ---- + y: ---- + r: -------- + """ + xloc = [0] + xlen = [5] + yloc = [5] + ylen = [4] + eloc = [0] + elen = [9] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ----- ----- + y: ----- -- + """ + xloc = [0, 10] + xlen = [5, 5] + yloc = [2, 17] + ylen = [5, 2] + eloc = [0, 10, 17] + elen = [7, 5, 2] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ------ + y: ------- + r: ---------- + """ + xloc = [1] + xlen = [5] + yloc = [3] + ylen = [5] + eloc = [1] + elen = [7] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ------ ----- + y: ------- + r: ------------- + """ + xloc = [2, 10] + xlen = [4, 4] + yloc = [4] + ylen = [8] + eloc = [2] + elen = [12] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: --- ----- + y: ------- + r: ------------- + """ + xloc = [0, 5] + xlen = [3, 5] + yloc = [0] + ylen = [7] + eloc = [0] + elen = [10] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ------ ----- + y: ------- --- + r: ------------- + """ + xloc = [2, 10] + xlen = [4, 4] + yloc = [4, 13] + ylen = [8, 4] + eloc = [2] + elen = [15] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ---------------------- + y: ---- ---- --- + r: ---------------------- + """ + xloc = [2] + xlen = [15] + yloc = [4, 9, 14] + ylen = [3, 2, 2] + eloc = [2] + elen = [15] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ---- --- + y: --- --- + """ + xloc = [0, 10] + xlen = [3, 3] + yloc = [5, 15] + ylen = [2, 2] + eloc = [0, 5, 10, 15] + elen = [3, 2, 3, 2] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + + def test_int_index_make_union(self): + a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32)) + b = IntIndex(5, np.array([0, 2], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([], dtype=np.int32)) + b = IntIndex(5, np.array([0, 2], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([0, 2], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([], dtype=np.int32)) + b = IntIndex(5, np.array([], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) + b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([0, 1], dtype=np.int32)) + b = IntIndex(4, np.array([0, 1], dtype=np.int32)) + + msg = "Indices must reference same underlying length" + with pytest.raises(ValueError, match=msg): + a.make_union(b) + + +class TestSparseIndexIntersect: + @td.skip_if_windows + def test_intersect(self): + def _check_correct(a, b, expected): + result = a.intersect(b) + assert result.equals(expected) + + def _check_length_exc(a, longer): + msg = "Indices must reference same underlying length" + with pytest.raises(Exception, match=msg): + a.intersect(longer) + + def _check_case(xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + expected = BlockIndex(TEST_LENGTH, eloc, elen) + longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen) + + _check_correct(xindex, yindex, expected) + _check_correct( + xindex.to_int_index(), yindex.to_int_index(), expected.to_int_index() + ) + + _check_length_exc(xindex, longer_index) + _check_length_exc(xindex.to_int_index(), longer_index.to_int_index()) + + check_cases(_check_case) + + def test_intersect_empty(self): + xindex = IntIndex(4, np.array([], dtype=np.int32)) + yindex = IntIndex(4, np.array([2, 3], dtype=np.int32)) + assert xindex.intersect(yindex).equals(xindex) + assert yindex.intersect(xindex).equals(xindex) + + xindex = xindex.to_block_index() + yindex = yindex.to_block_index() + assert xindex.intersect(yindex).equals(xindex) + assert yindex.intersect(xindex).equals(xindex) + + def test_intersect_identical(self): + cases = [ + IntIndex(5, np.array([1, 2], dtype=np.int32)), + IntIndex(5, np.array([0, 2, 4], dtype=np.int32)), + IntIndex(0, np.array([], dtype=np.int32)), + IntIndex(5, np.array([], dtype=np.int32)), + ] + + for case in cases: + assert case.intersect(case).equals(case) + case = case.to_block_index() + assert case.intersect(case).equals(case) + + +class TestSparseIndexCommon: + def test_int_internal(self): + idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="integer") + assert isinstance(idx, IntIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="integer") + assert isinstance(idx, IntIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32)) + + idx = make_sparse_index( + 4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer" + ) + assert isinstance(idx, IntIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32)) + + def test_block_internal(self): + idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 3 + tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32)) + + def test_lookup(self): + for kind in ["integer", "block"]: + idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == -1 + assert idx.lookup(1) == -1 + assert idx.lookup(2) == 0 + assert idx.lookup(3) == 1 + assert idx.lookup(4) == -1 + + idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind) + + for i in range(-1, 5): + assert idx.lookup(i) == -1 + + idx = make_sparse_index( + 4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind + ) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == 0 + assert idx.lookup(1) == 1 + assert idx.lookup(2) == 2 + assert idx.lookup(3) == 3 + assert idx.lookup(4) == -1 + + idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == 0 + assert idx.lookup(1) == -1 + assert idx.lookup(2) == 1 + assert idx.lookup(3) == 2 + assert idx.lookup(4) == -1 + + def test_lookup_array(self): + for kind in ["integer", "block"]: + idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind) + + res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) + exp = np.array([-1, -1, 0], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) + exp = np.array([-1, 0, -1, 1], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind) + res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) + exp = np.array([-1, -1, -1, -1], dtype=np.int32) + + idx = make_sparse_index( + 4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind + ) + res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) + exp = np.array([-1, 0, 2], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) + exp = np.array([-1, 2, 1, 3], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) + res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) + exp = np.array([1, -1, 2, 0], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) + exp = np.array([-1, -1, 1, -1], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + @pytest.mark.parametrize( + "idx, expected", + [ + [0, -1], + [5, 0], + [7, 2], + [8, -1], + [9, -1], + [10, -1], + [11, -1], + [12, 3], + [17, 8], + [18, -1], + ], + ) + def test_lookup_basics(self, idx, expected): + bindex = BlockIndex(20, [5, 12], [3, 6]) + assert bindex.lookup(idx) == expected + + iindex = bindex.to_int_index() + assert iindex.lookup(idx) == expected + + +class TestBlockIndex: + def test_block_internal(self): + idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 3 + tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32)) + + def test_make_block_boundary(self): + for i in [5, 10, 100, 101]: + idx = make_sparse_index(i, np.arange(0, i, 2, dtype=np.int32), kind="block") + + exp = np.arange(0, i, 2, dtype=np.int32) + tm.assert_numpy_array_equal(idx.blocs, exp) + tm.assert_numpy_array_equal(idx.blengths, np.ones(len(exp), dtype=np.int32)) + + def test_equals(self): + index = BlockIndex(10, [0, 4], [2, 5]) + + assert index.equals(index) + assert not index.equals(BlockIndex(10, [0, 4], [2, 6])) + + def test_check_integrity(self): + locs = [] + lengths = [] + + # 0-length OK + BlockIndex(0, locs, lengths) + + # also OK even though empty + BlockIndex(1, locs, lengths) + + msg = "Block 0 extends beyond end" + with pytest.raises(ValueError, match=msg): + BlockIndex(10, [5], [10]) + + msg = "Block 0 overlaps" + with pytest.raises(ValueError, match=msg): + BlockIndex(10, [2, 5], [5, 3]) + + def test_to_int_index(self): + locs = [0, 10] + lengths = [4, 6] + exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15] + + block = BlockIndex(20, locs, lengths) + dense = block.to_int_index() + + tm.assert_numpy_array_equal(dense.indices, np.array(exp_inds, dtype=np.int32)) + + def test_to_block_index(self): + index = BlockIndex(10, [0, 5], [4, 5]) + assert index.to_block_index() is index + + +class TestIntIndex: + def test_check_integrity(self): + + # Too many indices than specified in self.length + msg = "Too many indices" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=1, indices=[1, 2, 3]) + + # No index can be negative. + msg = "No index can be less than zero" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, -2, 3]) + + # No index can be negative. + msg = "No index can be less than zero" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, -2, 3]) + + # All indices must be less than the length. + msg = "All indices must be less than the length" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, 2, 5]) + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, 2, 6]) + + # Indices must be strictly ascending. + msg = "Indices must be strictly increasing" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, 3, 2]) + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, 3, 3]) + + def test_int_internal(self): + idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="integer") + assert isinstance(idx, IntIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="integer") + assert isinstance(idx, IntIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32)) + + idx = make_sparse_index( + 4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer" + ) + assert isinstance(idx, IntIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32)) + + def test_equals(self): + index = IntIndex(10, [0, 1, 2, 3, 4]) + assert index.equals(index) + assert not index.equals(IntIndex(10, [0, 1, 2, 3])) + + def test_to_block_index(self): + def _check_case(xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + + # see if survive the round trip + xbindex = xindex.to_int_index().to_block_index() + ybindex = yindex.to_int_index().to_block_index() + assert isinstance(xbindex, BlockIndex) + assert xbindex.equals(xindex) + assert ybindex.equals(yindex) + + check_cases(_check_case) + + def test_to_int_index(self): + index = IntIndex(10, [2, 3, 4, 5, 6]) + assert index.to_int_index() is index + + +class TestSparseOperators: + def _op_tests(self, sparse_op, python_op): + def _check_case(xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + + xdindex = xindex.to_int_index() + ydindex = yindex.to_int_index() + + x = np.arange(xindex.npoints) * 10.0 + 1 + y = np.arange(yindex.npoints) * 100.0 + 1 + + xfill = 0 + yfill = 2 + + result_block_vals, rb_index, bfill = sparse_op( + x, xindex, xfill, y, yindex, yfill + ) + result_int_vals, ri_index, ifill = sparse_op( + x, xdindex, xfill, y, ydindex, yfill + ) + + assert rb_index.to_int_index().equals(ri_index) + tm.assert_numpy_array_equal(result_block_vals, result_int_vals) + assert bfill == ifill + + # check versus Series... + xseries = Series(x, xdindex.indices) + xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill) + + yseries = Series(y, ydindex.indices) + yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill) + + series_result = python_op(xseries, yseries) + series_result = series_result.reindex(ri_index.indices) + + tm.assert_numpy_array_equal(result_block_vals, series_result.values) + tm.assert_numpy_array_equal(result_int_vals, series_result.values) + + check_cases(_check_case) + + @pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv", "floordiv"]) + def test_op(self, opname): + sparse_op = getattr(splib, f"sparse_{opname}_float64") + python_op = getattr(operator, opname) + self._op_tests(sparse_op, python_op) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ff68a47 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/__pycache__/test_string.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/__pycache__/test_string.cpython-38.pyc new file mode 100644 index 0000000..5ac5ebe Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/__pycache__/test_string.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/__pycache__/test_string_arrow.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/__pycache__/test_string_arrow.cpython-38.pyc new file mode 100644 index 0000000..cb315e3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/__pycache__/test_string_arrow.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/test_string.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/test_string.py new file mode 100644 index 0000000..22fe7bb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/test_string.py @@ -0,0 +1,555 @@ +""" +This module tests the functionality of StringArray and ArrowStringArray. +Tests for the str accessors are in pandas/tests/strings/test_string_array.py +""" +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_dtype_equal + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.string_arrow import ArrowStringArray + + +@pytest.fixture +def dtype(string_storage): + return pd.StringDtype(storage=string_storage) + + +@pytest.fixture +def cls(dtype): + return dtype.construct_array_type() + + +def test_repr(dtype): + df = pd.DataFrame({"A": pd.array(["a", pd.NA, "b"], dtype=dtype)}) + expected = " A\n0 a\n1 \n2 b" + assert repr(df) == expected + + expected = "0 a\n1 \n2 b\nName: A, dtype: string" + assert repr(df.A) == expected + + arr_name = "ArrowStringArray" if dtype.storage == "pyarrow" else "StringArray" + expected = f"<{arr_name}>\n['a', , 'b']\nLength: 3, dtype: string" + assert repr(df.A.array) == expected + + +def test_none_to_nan(cls): + a = cls._from_sequence(["a", None, "b"]) + assert a[1] is not None + assert a[1] is pd.NA + + +def test_setitem_validates(cls): + arr = cls._from_sequence(["a", "b"]) + + if cls is pd.arrays.StringArray: + msg = "Cannot set non-string value '10' into a StringArray." + else: + msg = "Scalar must be NA or str" + with pytest.raises(ValueError, match=msg): + arr[0] = 10 + + if cls is pd.arrays.StringArray: + msg = "Must provide strings." + else: + msg = "Scalar must be NA or str" + with pytest.raises(ValueError, match=msg): + arr[:] = np.array([1, 2]) + + +def test_setitem_with_scalar_string(dtype): + # is_float_dtype considers some strings, like 'd', to be floats + # which can cause issues. + arr = pd.array(["a", "c"], dtype=dtype) + arr[0] = "d" + expected = pd.array(["d", "c"], dtype=dtype) + tm.assert_extension_array_equal(arr, expected) + + +def test_astype_roundtrip(dtype, request): + if dtype.storage == "pyarrow": + reason = "ValueError: Could not convert object to NumPy datetime" + mark = pytest.mark.xfail(reason=reason, raises=ValueError) + request.node.add_marker(mark) + else: + mark = pytest.mark.xfail( + reason="GH#36153 casting from StringArray to dt64 fails", raises=ValueError + ) + request.node.add_marker(mark) + + ser = pd.Series(pd.date_range("2000", periods=12)) + ser[0] = None + + casted = ser.astype(dtype) + assert is_dtype_equal(casted.dtype, dtype) + + result = casted.astype("datetime64[ns]") + tm.assert_series_equal(result, ser) + + +def test_add(dtype, request): + if dtype.storage == "pyarrow": + reason = ( + "unsupported operand type(s) for +: 'ArrowStringArray' and " + "'ArrowStringArray'" + ) + mark = pytest.mark.xfail(raises=TypeError, reason=reason) + request.node.add_marker(mark) + + a = pd.Series(["a", "b", "c", None, None], dtype=dtype) + b = pd.Series(["x", "y", None, "z", None], dtype=dtype) + + result = a + b + expected = pd.Series(["ax", "by", None, None, None], dtype=dtype) + tm.assert_series_equal(result, expected) + + result = a.add(b) + tm.assert_series_equal(result, expected) + + result = a.radd(b) + expected = pd.Series(["xa", "yb", None, None, None], dtype=dtype) + tm.assert_series_equal(result, expected) + + result = a.add(b, fill_value="-") + expected = pd.Series(["ax", "by", "c-", "-z", None], dtype=dtype) + tm.assert_series_equal(result, expected) + + +def test_add_2d(dtype, request): + if dtype.storage == "pyarrow": + reason = "Failed: DID NOT RAISE " + mark = pytest.mark.xfail(raises=None, reason=reason) + request.node.add_marker(mark) + + a = pd.array(["a", "b", "c"], dtype=dtype) + b = np.array([["a", "b", "c"]], dtype=object) + with pytest.raises(ValueError, match="3 != 1"): + a + b + + s = pd.Series(a) + with pytest.raises(ValueError, match="3 != 1"): + s + b + + +def test_add_sequence(dtype, request): + if dtype.storage == "pyarrow": + reason = "unsupported operand type(s) for +: 'ArrowStringArray' and 'list'" + mark = pytest.mark.xfail(raises=TypeError, reason=reason) + request.node.add_marker(mark) + + a = pd.array(["a", "b", None, None], dtype=dtype) + other = ["x", None, "y", None] + + result = a + other + expected = pd.array(["ax", None, None, None], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = other + a + expected = pd.array(["xa", None, None, None], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + +def test_mul(dtype, request): + if dtype.storage == "pyarrow": + reason = "unsupported operand type(s) for *: 'ArrowStringArray' and 'int'" + mark = pytest.mark.xfail(raises=TypeError, reason=reason) + request.node.add_marker(mark) + + a = pd.array(["a", "b", None], dtype=dtype) + result = a * 2 + expected = pd.array(["aa", "bb", None], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = 2 * a + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.xfail(reason="GH-28527") +def test_add_strings(dtype): + arr = pd.array(["a", "b", "c", "d"], dtype=dtype) + df = pd.DataFrame([["t", "u", "v", "w"]]) + assert arr.__add__(df) is NotImplemented + + result = arr + df + expected = pd.DataFrame([["at", "bu", "cv", "dw"]]).astype(dtype) + tm.assert_frame_equal(result, expected) + + result = df + arr + expected = pd.DataFrame([["ta", "ub", "vc", "wd"]]).astype(dtype) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.xfail(reason="GH-28527") +def test_add_frame(dtype): + arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype) + df = pd.DataFrame([["x", np.nan, "y", np.nan]]) + + assert arr.__add__(df) is NotImplemented + + result = arr + df + expected = pd.DataFrame([["ax", np.nan, np.nan, np.nan]]).astype(dtype) + tm.assert_frame_equal(result, expected) + + result = df + arr + expected = pd.DataFrame([["xa", np.nan, np.nan, np.nan]]).astype(dtype) + tm.assert_frame_equal(result, expected) + + +def test_comparison_methods_scalar(comparison_op, dtype): + op_name = f"__{comparison_op.__name__}__" + a = pd.array(["a", None, "c"], dtype=dtype) + other = "a" + result = getattr(a, op_name)(other) + expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object) + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_comparison_methods_scalar_pd_na(comparison_op, dtype): + op_name = f"__{comparison_op.__name__}__" + a = pd.array(["a", None, "c"], dtype=dtype) + result = getattr(a, op_name)(pd.NA) + expected = pd.array([None, None, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_comparison_methods_scalar_not_string(comparison_op, dtype): + op_name = f"__{comparison_op.__name__}__" + + a = pd.array(["a", None, "c"], dtype=dtype) + other = 42 + + if op_name not in ["__eq__", "__ne__"]: + with pytest.raises(TypeError, match="not supported between"): + getattr(a, op_name)(other) + + return + + result = getattr(a, op_name)(other) + expected_data = {"__eq__": [False, None, False], "__ne__": [True, None, True]}[ + op_name + ] + expected = pd.array(expected_data, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_comparison_methods_array(comparison_op, dtype): + + op_name = f"__{comparison_op.__name__}__" + + a = pd.array(["a", None, "c"], dtype=dtype) + other = [None, None, "c"] + result = getattr(a, op_name)(other) + expected = np.empty_like(a, dtype="object") + expected[-1] = getattr(other[-1], op_name)(a[-1]) + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = getattr(a, op_name)(pd.NA) + expected = pd.array([None, None, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_constructor_raises(cls): + if cls is pd.arrays.StringArray: + msg = "StringArray requires a sequence of strings or pandas.NA" + else: + msg = "Unsupported type '' for ArrowStringArray" + + with pytest.raises(ValueError, match=msg): + cls(np.array(["a", "b"], dtype="S1")) + + with pytest.raises(ValueError, match=msg): + cls(np.array([])) + + with pytest.raises(ValueError, match=msg): + cls(np.array(["a", np.nan], dtype=object)) + + with pytest.raises(ValueError, match=msg): + cls(np.array(["a", None], dtype=object)) + + with pytest.raises(ValueError, match=msg): + cls(np.array(["a", pd.NaT], dtype=object)) + + +@pytest.mark.parametrize("copy", [True, False]) +def test_from_sequence_no_mutate(copy, cls, request): + if cls is ArrowStringArray and copy is False: + mark = pytest.mark.xfail( + raises=AssertionError, reason="numpy array are different" + ) + request.node.add_marker(mark) + + nan_arr = np.array(["a", np.nan], dtype=object) + na_arr = np.array(["a", pd.NA], dtype=object) + + result = cls._from_sequence(nan_arr, copy=copy) + + if cls is ArrowStringArray: + import pyarrow as pa + + expected = cls(pa.array(na_arr, type=pa.string(), from_pandas=True)) + else: + expected = cls(na_arr) + + tm.assert_extension_array_equal(result, expected) + + expected = nan_arr if copy else na_arr + tm.assert_numpy_array_equal(nan_arr, expected) + + +def test_astype_int(dtype): + arr = pd.array(["1", "2", "3"], dtype=dtype) + result = arr.astype("int64") + expected = np.array([1, 2, 3], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + arr = pd.array(["1", pd.NA, "3"], dtype=dtype) + msg = r"int\(\) argument must be a string, a bytes-like object or a( real)? number" + with pytest.raises(TypeError, match=msg): + arr.astype("int64") + + +def test_astype_nullable_int(dtype): + arr = pd.array(["1", pd.NA, "3"], dtype=dtype) + + result = arr.astype("Int64") + expected = pd.array([1, pd.NA, 3], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + +def test_astype_float(dtype, any_float_dtype): + # Don't compare arrays (37974) + ser = pd.Series(["1.1", pd.NA, "3.3"], dtype=dtype) + result = ser.astype(any_float_dtype) + expected = pd.Series([1.1, np.nan, 3.3], dtype=any_float_dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.xfail(reason="Not implemented StringArray.sum") +def test_reduce(skipna, dtype): + arr = pd.Series(["a", "b", "c"], dtype=dtype) + result = arr.sum(skipna=skipna) + assert result == "abc" + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.xfail(reason="Not implemented StringArray.sum") +def test_reduce_missing(skipna, dtype): + arr = pd.Series([None, "a", None, "b", "c", None], dtype=dtype) + result = arr.sum(skipna=skipna) + if skipna: + assert result == "abc" + else: + assert pd.isna(result) + + +@pytest.mark.parametrize("method", ["min", "max"]) +@pytest.mark.parametrize("skipna", [True, False]) +def test_min_max(method, skipna, dtype, request): + if dtype.storage == "pyarrow": + reason = "'ArrowStringArray' object has no attribute 'max'" + mark = pytest.mark.xfail(raises=TypeError, reason=reason) + request.node.add_marker(mark) + + arr = pd.Series(["a", "b", "c", None], dtype=dtype) + result = getattr(arr, method)(skipna=skipna) + if skipna: + expected = "a" if method == "min" else "c" + assert result == expected + else: + assert result is pd.NA + + +@pytest.mark.parametrize("method", ["min", "max"]) +@pytest.mark.parametrize("box", [pd.Series, pd.array]) +def test_min_max_numpy(method, box, dtype, request): + if dtype.storage == "pyarrow": + if box is pd.array: + reason = "'<=' not supported between instances of 'str' and 'NoneType'" + else: + reason = "'ArrowStringArray' object has no attribute 'max'" + mark = pytest.mark.xfail(raises=TypeError, reason=reason) + request.node.add_marker(mark) + + arr = box(["a", "b", "c", None], dtype=dtype) + result = getattr(np, method)(arr) + expected = "a" if method == "min" else "c" + assert result == expected + + +def test_fillna_args(dtype, request): + # GH 37987 + + if dtype.storage == "pyarrow": + reason = ( + "Regex pattern \"Cannot set non-string value '1' into " + "a StringArray.\" does not match 'Scalar must be NA or str'" + ) + mark = pytest.mark.xfail(raises=AssertionError, reason=reason) + request.node.add_marker(mark) + + arr = pd.array(["a", pd.NA], dtype=dtype) + + res = arr.fillna(value="b") + expected = pd.array(["a", "b"], dtype=dtype) + tm.assert_extension_array_equal(res, expected) + + res = arr.fillna(value=np.str_("b")) + expected = pd.array(["a", "b"], dtype=dtype) + tm.assert_extension_array_equal(res, expected) + + msg = "Cannot set non-string value '1' into a StringArray." + with pytest.raises(ValueError, match=msg): + arr.fillna(value=1) + + +@td.skip_if_no("pyarrow") +def test_arrow_array(dtype): + # protocol added in 0.15.0 + import pyarrow as pa + + data = pd.array(["a", "b", "c"], dtype=dtype) + arr = pa.array(data) + expected = pa.array(list(data), type=pa.string(), from_pandas=True) + if dtype.storage == "pyarrow": + expected = pa.chunked_array(expected) + + assert arr.equals(expected) + + +@td.skip_if_no("pyarrow") +def test_arrow_roundtrip(dtype, string_storage2): + # roundtrip possible from arrow 1.0.0 + import pyarrow as pa + + data = pd.array(["a", "b", None], dtype=dtype) + df = pd.DataFrame({"a": data}) + table = pa.table(df) + assert table.field("a").type == "string" + with pd.option_context("string_storage", string_storage2): + result = table.to_pandas() + assert isinstance(result["a"].dtype, pd.StringDtype) + expected = df.astype(f"string[{string_storage2}]") + tm.assert_frame_equal(result, expected) + # ensure the missing value is represented by NA and not np.nan or None + assert result.loc[2, "a"] is pd.NA + + +@td.skip_if_no("pyarrow") +def test_arrow_load_from_zero_chunks(dtype, string_storage2): + # GH-41040 + import pyarrow as pa + + data = pd.array([], dtype=dtype) + df = pd.DataFrame({"a": data}) + table = pa.table(df) + assert table.field("a").type == "string" + # Instantiate the same table with no chunks at all + table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema) + with pd.option_context("string_storage", string_storage2): + result = table.to_pandas() + assert isinstance(result["a"].dtype, pd.StringDtype) + expected = df.astype(f"string[{string_storage2}]") + tm.assert_frame_equal(result, expected) + + +def test_value_counts_na(dtype): + arr = pd.array(["a", "b", "a", pd.NA], dtype=dtype) + result = arr.value_counts(dropna=False) + expected = pd.Series([2, 1, 1], index=arr[[0, 1, 3]], dtype="Int64") + tm.assert_series_equal(result, expected) + + result = arr.value_counts(dropna=True) + expected = pd.Series([2, 1], index=arr[:2], dtype="Int64") + tm.assert_series_equal(result, expected) + + +def test_value_counts_with_normalize(dtype): + ser = pd.Series(["a", "b", "a", pd.NA], dtype=dtype) + result = ser.value_counts(normalize=True) + expected = pd.Series([2, 1], index=ser[:2], dtype="Float64") / 3 + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "values, expected", + [ + (["a", "b", "c"], np.array([False, False, False])), + (["a", "b", None], np.array([False, False, True])), + ], +) +def test_use_inf_as_na(values, expected, dtype): + # https://github.com/pandas-dev/pandas/issues/33655 + values = pd.array(values, dtype=dtype) + with pd.option_context("mode.use_inf_as_na", True): + result = values.isna() + tm.assert_numpy_array_equal(result, expected) + + result = pd.Series(values).isna() + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + result = pd.DataFrame(values).isna() + expected = pd.DataFrame(expected) + tm.assert_frame_equal(result, expected) + + +def test_memory_usage(dtype): + # GH 33963 + + if dtype.storage == "pyarrow": + pytest.skip("not applicable") + + series = pd.Series(["a", "b", "c"], dtype=dtype) + + assert 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True) + + +@pytest.mark.parametrize("float_dtype", [np.float16, np.float32, np.float64]) +def test_astype_from_float_dtype(float_dtype, dtype): + # https://github.com/pandas-dev/pandas/issues/36451 + ser = pd.Series([0.1], dtype=float_dtype) + result = ser.astype(dtype) + expected = pd.Series(["0.1"], dtype=dtype) + tm.assert_series_equal(result, expected) + + +def test_to_numpy_returns_pdna_default(dtype): + arr = pd.array(["a", pd.NA, "b"], dtype=dtype) + result = np.array(arr) + expected = np.array(["a", pd.NA, "b"], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +def test_to_numpy_na_value(dtype, nulls_fixture): + na_value = nulls_fixture + arr = pd.array(["a", pd.NA, "b"], dtype=dtype) + result = arr.to_numpy(na_value=na_value) + expected = np.array(["a", na_value, "b"], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +def test_isin(dtype, request, fixed_now_ts): + s = pd.Series(["a", "b", None], dtype=dtype) + + result = s.isin(["a", "c"]) + expected = pd.Series([True, False, False]) + tm.assert_series_equal(result, expected) + + result = s.isin(["a", pd.NA]) + expected = pd.Series([True, False, True]) + tm.assert_series_equal(result, expected) + + result = s.isin([]) + expected = pd.Series([False, False, False]) + tm.assert_series_equal(result, expected) + + result = s.isin(["a", fixed_now_ts]) + expected = pd.Series([True, False, False]) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/test_string_arrow.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/test_string_arrow.py new file mode 100644 index 0000000..265afa8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/string_/test_string_arrow.py @@ -0,0 +1,134 @@ +import re + +import numpy as np +import pytest + +from pandas.compat import pa_version_under1p01 + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.string_ import ( + StringArray, + StringDtype, +) +from pandas.core.arrays.string_arrow import ArrowStringArray + +skip_if_no_pyarrow = pytest.mark.skipif( + pa_version_under1p01, + reason="pyarrow>=1.0.0 is required for PyArrow backed StringArray", +) + + +@skip_if_no_pyarrow +def test_eq_all_na(): + a = pd.array([pd.NA, pd.NA], dtype=StringDtype("pyarrow")) + result = a == a + expected = pd.array([pd.NA, pd.NA], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_config(string_storage): + with pd.option_context("string_storage", string_storage): + assert StringDtype().storage == string_storage + result = pd.array(["a", "b"]) + assert result.dtype.storage == string_storage + + expected = ( + StringDtype(string_storage).construct_array_type()._from_sequence(["a", "b"]) + ) + tm.assert_equal(result, expected) + + +def test_config_bad_storage_raises(): + msg = re.escape("Value must be one of python|pyarrow") + with pytest.raises(ValueError, match=msg): + pd.options.mode.string_storage = "foo" + + +@skip_if_no_pyarrow +@pytest.mark.parametrize("chunked", [True, False]) +@pytest.mark.parametrize("array", ["numpy", "pyarrow"]) +def test_constructor_not_string_type_raises(array, chunked): + import pyarrow as pa + + array = pa if array == "pyarrow" else np + + arr = array.array([1, 2, 3]) + if chunked: + if array is np: + pytest.skip("chunked not applicable to numpy array") + arr = pa.chunked_array(arr) + if array is np: + msg = "Unsupported type '' for ArrowStringArray" + else: + msg = re.escape( + "ArrowStringArray requires a PyArrow (chunked) array of string type" + ) + with pytest.raises(ValueError, match=msg): + ArrowStringArray(arr) + + +@skip_if_no_pyarrow +def test_from_sequence_wrong_dtype_raises(): + with pd.option_context("string_storage", "python"): + ArrowStringArray._from_sequence(["a", None, "c"], dtype="string") + + with pd.option_context("string_storage", "pyarrow"): + ArrowStringArray._from_sequence(["a", None, "c"], dtype="string") + + with pytest.raises(AssertionError, match=None): + ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[python]") + + ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]") + + with pytest.raises(AssertionError, match=None): + with pd.option_context("string_storage", "python"): + ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) + + with pd.option_context("string_storage", "pyarrow"): + ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) + + with pytest.raises(AssertionError, match=None): + ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python")) + + ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow")) + + with pd.option_context("string_storage", "python"): + StringArray._from_sequence(["a", None, "c"], dtype="string") + + with pd.option_context("string_storage", "pyarrow"): + StringArray._from_sequence(["a", None, "c"], dtype="string") + + StringArray._from_sequence(["a", None, "c"], dtype="string[python]") + + with pytest.raises(AssertionError, match=None): + StringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]") + + with pd.option_context("string_storage", "python"): + StringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) + + with pytest.raises(AssertionError, match=None): + with pd.option_context("string_storage", "pyarrow"): + StringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) + + StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python")) + + with pytest.raises(AssertionError, match=None): + StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow")) + + +@pytest.mark.skipif( + not pa_version_under1p01, + reason="pyarrow is installed", +) +def test_pyarrow_not_installed_raises(): + msg = re.escape("pyarrow>=1.0.0 is required for PyArrow backed StringArray") + + with pytest.raises(ImportError, match=msg): + StringDtype(storage="pyarrow") + + with pytest.raises(ImportError, match=msg): + ArrowStringArray([]) + + with pytest.raises(ImportError, match=msg): + ArrowStringArray._from_sequence(["a", None, "b"]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_array.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_array.py new file mode 100644 index 0000000..329d28c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_array.py @@ -0,0 +1,451 @@ +import datetime +import decimal + +import numpy as np +import pytest +import pytz + +from pandas.core.dtypes.base import _registry as registry + +import pandas as pd +import pandas._testing as tm +from pandas.api.extensions import register_extension_dtype +from pandas.api.types import is_scalar +from pandas.arrays import ( + BooleanArray, + DatetimeArray, + FloatingArray, + IntegerArray, + IntervalArray, + SparseArray, + TimedeltaArray, +) +from pandas.core.arrays import ( + PandasArray, + period_array, +) +from pandas.tests.extension.decimal import ( + DecimalArray, + DecimalDtype, + to_decimal, +) + + +@pytest.mark.parametrize( + "data, dtype, expected", + [ + # Basic NumPy defaults. + ([1, 2], None, IntegerArray._from_sequence([1, 2])), + ([1, 2], object, PandasArray(np.array([1, 2], dtype=object))), + ( + [1, 2], + np.dtype("float32"), + PandasArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))), + ), + (np.array([1, 2], dtype="int64"), None, IntegerArray._from_sequence([1, 2])), + ( + np.array([1.0, 2.0], dtype="float64"), + None, + FloatingArray._from_sequence([1.0, 2.0]), + ), + # String alias passes through to NumPy + ([1, 2], "float32", PandasArray(np.array([1, 2], dtype="float32"))), + ([1, 2], "int64", PandasArray(np.array([1, 2], dtype=np.int64))), + # GH#44715 FloatingArray does not support float16, so fall back to PandasArray + ( + np.array([1, 2], dtype=np.float16), + None, + PandasArray(np.array([1, 2], dtype=np.float16)), + ), + # idempotency with e.g. pd.array(pd.array([1, 2], dtype="int64")) + ( + PandasArray(np.array([1, 2], dtype=np.int32)), + None, + PandasArray(np.array([1, 2], dtype=np.int32)), + ), + # Period alias + ( + [pd.Period("2000", "D"), pd.Period("2001", "D")], + "Period[D]", + period_array(["2000", "2001"], freq="D"), + ), + # Period dtype + ( + [pd.Period("2000", "D")], + pd.PeriodDtype("D"), + period_array(["2000"], freq="D"), + ), + # Datetime (naive) + ( + [1, 2], + np.dtype("datetime64[ns]"), + DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")), + ), + ( + np.array([1, 2], dtype="datetime64[ns]"), + None, + DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")), + ), + ( + pd.DatetimeIndex(["2000", "2001"]), + np.dtype("datetime64[ns]"), + DatetimeArray._from_sequence(["2000", "2001"]), + ), + ( + pd.DatetimeIndex(["2000", "2001"]), + None, + DatetimeArray._from_sequence(["2000", "2001"]), + ), + ( + ["2000", "2001"], + np.dtype("datetime64[ns]"), + DatetimeArray._from_sequence(["2000", "2001"]), + ), + # Datetime (tz-aware) + ( + ["2000", "2001"], + pd.DatetimeTZDtype(tz="CET"), + DatetimeArray._from_sequence( + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET") + ), + ), + # Timedelta + ( + ["1H", "2H"], + np.dtype("timedelta64[ns]"), + TimedeltaArray._from_sequence(["1H", "2H"]), + ), + ( + pd.TimedeltaIndex(["1H", "2H"]), + np.dtype("timedelta64[ns]"), + TimedeltaArray._from_sequence(["1H", "2H"]), + ), + ( + pd.TimedeltaIndex(["1H", "2H"]), + None, + TimedeltaArray._from_sequence(["1H", "2H"]), + ), + # Category + (["a", "b"], "category", pd.Categorical(["a", "b"])), + ( + ["a", "b"], + pd.CategoricalDtype(None, ordered=True), + pd.Categorical(["a", "b"], ordered=True), + ), + # Interval + ( + [pd.Interval(1, 2), pd.Interval(3, 4)], + "interval", + IntervalArray.from_tuples([(1, 2), (3, 4)]), + ), + # Sparse + ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")), + # IntegerNA + ([1, None], "Int16", pd.array([1, None], dtype="Int16")), + (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), + # String + ( + ["a", None], + "string", + pd.StringDtype().construct_array_type()._from_sequence(["a", None]), + ), + ( + ["a", None], + pd.StringDtype(), + pd.StringDtype().construct_array_type()._from_sequence(["a", None]), + ), + # Boolean + ([True, None], "boolean", BooleanArray._from_sequence([True, None])), + ([True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None])), + # Index + (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), + # Series[EA] returns the EA + ( + pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])), + None, + pd.Categorical(["a", "b"], categories=["a", "b", "c"]), + ), + # "3rd party" EAs work + ([decimal.Decimal(0), decimal.Decimal(1)], "decimal", to_decimal([0, 1])), + # pass an ExtensionArray, but a different dtype + ( + period_array(["2000", "2001"], freq="D"), + "category", + pd.Categorical([pd.Period("2000", "D"), pd.Period("2001", "D")]), + ), + ], +) +def test_array(data, dtype, expected): + result = pd.array(data, dtype=dtype) + tm.assert_equal(result, expected) + + +def test_array_copy(): + a = np.array([1, 2]) + # default is to copy + b = pd.array(a, dtype=a.dtype) + assert not tm.shares_memory(a, b) + + # copy=True + b = pd.array(a, dtype=a.dtype, copy=True) + assert not tm.shares_memory(a, b) + + # copy=False + b = pd.array(a, dtype=a.dtype, copy=False) + assert tm.shares_memory(a, b) + + +cet = pytz.timezone("CET") + + +@pytest.mark.parametrize( + "data, expected", + [ + # period + ( + [pd.Period("2000", "D"), pd.Period("2001", "D")], + period_array(["2000", "2001"], freq="D"), + ), + # interval + ([pd.Interval(0, 1), pd.Interval(1, 2)], IntervalArray.from_breaks([0, 1, 2])), + # datetime + ( + [pd.Timestamp("2000"), pd.Timestamp("2001")], + DatetimeArray._from_sequence(["2000", "2001"]), + ), + ( + [datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)], + DatetimeArray._from_sequence(["2000", "2001"]), + ), + ( + np.array([1, 2], dtype="M8[ns]"), + DatetimeArray(np.array([1, 2], dtype="M8[ns]")), + ), + ( + np.array([1, 2], dtype="M8[us]"), + DatetimeArray(np.array([1000, 2000], dtype="M8[ns]")), + ), + # datetimetz + ( + [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")], + DatetimeArray._from_sequence( + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET") + ), + ), + ( + [ + datetime.datetime(2000, 1, 1, tzinfo=cet), + datetime.datetime(2001, 1, 1, tzinfo=cet), + ], + DatetimeArray._from_sequence( + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet) + ), + ), + # timedelta + ( + [pd.Timedelta("1H"), pd.Timedelta("2H")], + TimedeltaArray._from_sequence(["1H", "2H"]), + ), + ( + np.array([1, 2], dtype="m8[ns]"), + TimedeltaArray(np.array([1, 2], dtype="m8[ns]")), + ), + ( + np.array([1, 2], dtype="m8[us]"), + TimedeltaArray(np.array([1000, 2000], dtype="m8[ns]")), + ), + # integer + ([1, 2], IntegerArray._from_sequence([1, 2])), + ([1, None], IntegerArray._from_sequence([1, None])), + ([1, pd.NA], IntegerArray._from_sequence([1, pd.NA])), + ([1, np.nan], IntegerArray._from_sequence([1, np.nan])), + # float + ([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2])), + ([0.1, None], FloatingArray._from_sequence([0.1, pd.NA])), + ([0.1, np.nan], FloatingArray._from_sequence([0.1, pd.NA])), + ([0.1, pd.NA], FloatingArray._from_sequence([0.1, pd.NA])), + # integer-like float + ([1.0, 2.0], FloatingArray._from_sequence([1.0, 2.0])), + ([1.0, None], FloatingArray._from_sequence([1.0, pd.NA])), + ([1.0, np.nan], FloatingArray._from_sequence([1.0, pd.NA])), + ([1.0, pd.NA], FloatingArray._from_sequence([1.0, pd.NA])), + # mixed-integer-float + ([1, 2.0], FloatingArray._from_sequence([1.0, 2.0])), + ([1, np.nan, 2.0], FloatingArray._from_sequence([1.0, None, 2.0])), + # string + ( + ["a", "b"], + pd.StringDtype().construct_array_type()._from_sequence(["a", "b"]), + ), + ( + ["a", None], + pd.StringDtype().construct_array_type()._from_sequence(["a", None]), + ), + # Boolean + ([True, False], BooleanArray._from_sequence([True, False])), + ([True, None], BooleanArray._from_sequence([True, None])), + ], +) +def test_array_inference(data, expected): + result = pd.array(data) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [ + # mix of frequencies + [pd.Period("2000", "D"), pd.Period("2001", "A")], + # mix of closed + [pd.Interval(0, 1, closed="left"), pd.Interval(1, 2, closed="right")], + # Mix of timezones + [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000", tz="UTC")], + # Mix of tz-aware and tz-naive + [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000")], + np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")]), + ], +) +def test_array_inference_fails(data): + result = pd.array(data) + expected = PandasArray(np.array(data, dtype=object)) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("data", [np.array(0)]) +def test_nd_raises(data): + with pytest.raises(ValueError, match="PandasArray must be 1-dimensional"): + pd.array(data, dtype="int64") + + +def test_scalar_raises(): + with pytest.raises(ValueError, match="Cannot pass scalar '1'"): + pd.array(1) + + +def test_bounds_check(): + # GH21796 + with pytest.raises( + TypeError, match=r"cannot safely cast non-equivalent int(32|64) to uint16" + ): + pd.array([-1, 2, 3], dtype="UInt16") + + +# --------------------------------------------------------------------------- +# A couple dummy classes to ensure that Series and Indexes are unboxed before +# getting to the EA classes. + + +@register_extension_dtype +class DecimalDtype2(DecimalDtype): + name = "decimal2" + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return DecimalArray2 + + +class DecimalArray2(DecimalArray): + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + if isinstance(scalars, (pd.Series, pd.Index)): + raise TypeError("scalars should not be of type pd.Series or pd.Index") + + return super()._from_sequence(scalars, dtype=dtype, copy=copy) + + +def test_array_unboxes(index_or_series): + box = index_or_series + + data = box([decimal.Decimal("1"), decimal.Decimal("2")]) + # make sure it works + with pytest.raises( + TypeError, match="scalars should not be of type pd.Series or pd.Index" + ): + DecimalArray2._from_sequence(data) + + result = pd.array(data, dtype="decimal2") + expected = DecimalArray2._from_sequence(data.values) + tm.assert_equal(result, expected) + + +@pytest.fixture +def registry_without_decimal(): + idx = registry.dtypes.index(DecimalDtype) + registry.dtypes.pop(idx) + yield + registry.dtypes.append(DecimalDtype) + + +def test_array_not_registered(registry_without_decimal): + # check we aren't on it + assert registry.find("decimal") is None + data = [decimal.Decimal("1"), decimal.Decimal("2")] + + result = pd.array(data, dtype=DecimalDtype) + expected = DecimalArray._from_sequence(data) + tm.assert_equal(result, expected) + + +class TestArrayAnalytics: + def test_searchsorted(self, string_dtype): + arr = pd.array(["a", "b", "c"], dtype=string_dtype) + + result = arr.searchsorted("a", side="left") + assert is_scalar(result) + assert result == 0 + + result = arr.searchsorted("a", side="right") + assert is_scalar(result) + assert result == 1 + + def test_searchsorted_numeric_dtypes_scalar(self, any_real_numpy_dtype): + arr = pd.array([1, 3, 90], dtype=any_real_numpy_dtype) + result = arr.searchsorted(30) + assert is_scalar(result) + assert result == 2 + + result = arr.searchsorted([30]) + expected = np.array([2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_searchsorted_numeric_dtypes_vector(self, any_real_numpy_dtype): + arr = pd.array([1, 3, 90], dtype=any_real_numpy_dtype) + result = arr.searchsorted([2, 30]) + expected = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "arr, val", + [ + [ + pd.date_range("20120101", periods=10, freq="2D"), + pd.Timestamp("20120102"), + ], + [ + pd.date_range("20120101", periods=10, freq="2D", tz="Asia/Hong_Kong"), + pd.Timestamp("20120102", tz="Asia/Hong_Kong"), + ], + [ + pd.timedelta_range(start="1 day", end="10 days", periods=10), + pd.Timedelta("2 days"), + ], + ], + ) + def test_search_sorted_datetime64_scalar(self, arr, val): + arr = pd.array(arr) + result = arr.searchsorted(val) + assert is_scalar(result) + assert result == 1 + + def test_searchsorted_sorter(self, any_real_numpy_dtype): + arr = pd.array([3, 1, 2], dtype=any_real_numpy_dtype) + result = arr.searchsorted([0, 3], sorter=np.argsort(arr)) + expected = np.array([0, 2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_datetimelike.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_datetimelike.py new file mode 100644 index 0000000..7484fdc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_datetimelike.py @@ -0,0 +1,1446 @@ +from __future__ import annotations + +import re + +import numpy as np +import pytest + +from pandas._libs import ( + NaT, + OutOfBoundsDatetime, + Timestamp, +) +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DatetimeIndex, + Period, + PeriodIndex, + TimedeltaIndex, +) +import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + PandasArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.arrays.datetimes import _sequence_to_dt64ns +from pandas.core.arrays.timedeltas import sequence_to_td64ns + + +# TODO: more freq variants +@pytest.fixture(params=["D", "B", "W", "M", "Q", "Y"]) +def freqstr(request): + return request.param + + +@pytest.fixture +def period_index(freqstr): + """ + A fixture to provide PeriodIndex objects with different frequencies. + + Most PeriodArray behavior is already tested in PeriodIndex tests, + so here we just test that the PeriodArray behavior matches + the PeriodIndex behavior. + """ + # TODO: non-monotone indexes; NaTs, different start dates + pi = pd.period_range(start=Timestamp("2000-01-01"), periods=100, freq=freqstr) + return pi + + +@pytest.fixture +def datetime_index(freqstr): + """ + A fixture to provide DatetimeIndex objects with different frequencies. + + Most DatetimeArray behavior is already tested in DatetimeIndex tests, + so here we just test that the DatetimeArray behavior matches + the DatetimeIndex behavior. + """ + # TODO: non-monotone indexes; NaTs, different start dates, timezones + dti = pd.date_range(start=Timestamp("2000-01-01"), periods=100, freq=freqstr) + return dti + + +@pytest.fixture +def timedelta_index(): + """ + A fixture to provide TimedeltaIndex objects with different frequencies. + Most TimedeltaArray behavior is already tested in TimedeltaIndex tests, + so here we just test that the TimedeltaArray behavior matches + the TimedeltaIndex behavior. + """ + # TODO: flesh this out + return TimedeltaIndex(["1 Day", "3 Hours", "NaT"]) + + +class SharedTests: + index_cls: type[DatetimeIndex | PeriodIndex | TimedeltaIndex] + + @pytest.fixture + def arr1d(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + return arr + + def test_compare_len1_raises(self, arr1d): + # make sure we raise when comparing with different lengths, specific + # to the case where one has length-1, which numpy would broadcast + arr = arr1d + idx = self.index_cls(arr) + + with pytest.raises(ValueError, match="Lengths must match"): + arr == arr[:1] + + # test the index classes while we're at it, GH#23078 + with pytest.raises(ValueError, match="Lengths must match"): + idx <= idx[[0]] + + @pytest.mark.parametrize( + "result", + [ + pd.date_range("2020", periods=3), + pd.date_range("2020", periods=3, tz="UTC"), + pd.timedelta_range("0 days", periods=3), + pd.period_range("2020Q1", periods=3, freq="Q"), + ], + ) + def test_compare_with_Categorical(self, result): + expected = pd.Categorical(result) + assert all(result == expected) + assert not any(result != expected) + + @pytest.mark.parametrize("reverse", [True, False]) + @pytest.mark.parametrize("as_index", [True, False]) + def test_compare_categorical_dtype(self, arr1d, as_index, reverse, ordered): + other = pd.Categorical(arr1d, ordered=ordered) + if as_index: + other = pd.CategoricalIndex(other) + + left, right = arr1d, other + if reverse: + left, right = right, left + + ones = np.ones(arr1d.shape, dtype=bool) + zeros = ~ones + + result = left == right + tm.assert_numpy_array_equal(result, ones) + + result = left != right + tm.assert_numpy_array_equal(result, zeros) + + if not reverse and not as_index: + # Otherwise Categorical raises TypeError bc it is not ordered + # TODO: we should probably get the same behavior regardless? + result = left < right + tm.assert_numpy_array_equal(result, zeros) + + result = left <= right + tm.assert_numpy_array_equal(result, ones) + + result = left > right + tm.assert_numpy_array_equal(result, zeros) + + result = left >= right + tm.assert_numpy_array_equal(result, ones) + + def test_take(self): + data = np.arange(100, dtype="i8") * 24 * 3600 * 10 ** 9 + np.random.shuffle(data) + + freq = None if self.array_cls is not PeriodArray else "D" + + arr = self.array_cls(data, freq=freq) + idx = self.index_cls._simple_new(arr) + + takers = [1, 4, 94] + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + takers = np.array([1, 4, 94]) + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + @pytest.mark.parametrize("fill_value", [2, 2.0, Timestamp(2021, 1, 1, 12).time]) + def test_take_fill_raises(self, fill_value): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + + arr = self.array_cls(data, freq="D") + + msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got" + with pytest.raises(TypeError, match=msg): + arr.take([0, 1], allow_fill=True, fill_value=fill_value) + + def test_take_fill(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + + arr = self.array_cls(data, freq="D") + + result = arr.take([-1, 1], allow_fill=True, fill_value=None) + assert result[0] is NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan) + assert result[0] is NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=NaT) + assert result[0] is NaT + + def test_take_fill_str(self, arr1d): + # Cast str fill_value matching other fill_value-taking methods + result = arr1d.take([-1, 1], allow_fill=True, fill_value=str(arr1d[-1])) + expected = arr1d[[-1, 1]] + tm.assert_equal(result, expected) + + msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got" + with pytest.raises(TypeError, match=msg): + arr1d.take([-1, 1], allow_fill=True, fill_value="foo") + + def test_concat_same_type(self, arr1d): + arr = arr1d + idx = self.index_cls(arr) + idx = idx.insert(0, NaT) + arr = self.array_cls(idx) + + result = arr._concat_same_type([arr[:-1], arr[1:], arr]) + arr2 = arr.astype(object) + expected = self.index_cls(np.concatenate([arr2[:-1], arr2[1:], arr2]), None) + + tm.assert_index_equal(self.index_cls(result), expected) + + def test_unbox_scalar(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + result = arr._unbox_scalar(arr[0]) + expected = arr._data.dtype.type + assert isinstance(result, expected) + + result = arr._unbox_scalar(NaT) + assert isinstance(result, expected) + + msg = f"'value' should be a {self.scalar_type.__name__}." + with pytest.raises(ValueError, match=msg): + arr._unbox_scalar("foo") + + def test_check_compatible_with(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + + arr._check_compatible_with(arr[0]) + arr._check_compatible_with(arr[:1]) + arr._check_compatible_with(NaT) + + def test_scalar_from_string(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + result = arr._scalar_from_string(str(arr[0])) + assert result == arr[0] + + def test_reduce_invalid(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + + msg = "does not support reduction 'not a method'" + with pytest.raises(TypeError, match=msg): + arr._reduce("not a method") + + @pytest.mark.parametrize("method", ["pad", "backfill"]) + def test_fillna_method_doesnt_change_orig(self, method): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + arr[4] = NaT + + fill_value = arr[3] if method == "pad" else arr[5] + + result = arr.fillna(method=method) + assert result[4] == fill_value + + # check that the original was not changed + assert arr[4] is NaT + + def test_searchsorted(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + + # scalar + result = arr.searchsorted(arr[1]) + assert result == 1 + + result = arr.searchsorted(arr[2], side="right") + assert result == 3 + + # own-type + result = arr.searchsorted(arr[1:3]) + expected = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + result = arr.searchsorted(arr[1:3], side="right") + expected = np.array([2, 3], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + # GH#29884 match numpy convention on whether NaT goes + # at the end or the beginning + result = arr.searchsorted(NaT) + assert result == 10 + + @pytest.mark.parametrize("box", [None, "index", "series"]) + def test_searchsorted_castable_strings(self, arr1d, box, request, string_storage): + if isinstance(arr1d, DatetimeArray): + tz = arr1d.tz + ts1, ts2 = arr1d[1:3] + if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2): + # If we have e.g. tzutc(), when we cast to string and parse + # back we get pytz.UTC, and then consider them different timezones + # so incorrectly raise. + mark = pytest.mark.xfail( + raises=TypeError, reason="timezone comparisons inconsistent" + ) + request.node.add_marker(mark) + + arr = arr1d + if box is None: + pass + elif box == "index": + # Test the equivalent Index.searchsorted method while we're here + arr = self.index_cls(arr) + else: + # Test the equivalent Series.searchsorted method while we're here + arr = pd.Series(arr) + + # scalar + result = arr.searchsorted(str(arr[1])) + assert result == 1 + + result = arr.searchsorted(str(arr[2]), side="right") + assert result == 3 + + result = arr.searchsorted([str(x) for x in arr[1:3]]) + expected = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + with pytest.raises( + TypeError, + match=re.escape( + f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', " + "or array of those. Got 'str' instead." + ), + ): + arr.searchsorted("foo") + + arr_type = "StringArray" if string_storage == "python" else "ArrowStringArray" + + with pd.option_context("string_storage", string_storage): + with pytest.raises( + TypeError, + match=re.escape( + f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', " + f"or array of those. Got '{arr_type}' instead." + ), + ): + arr.searchsorted([str(arr[1]), "baz"]) + + def test_getitem_near_implementation_bounds(self): + # We only check tz-naive for DTA bc the bounds are slightly different + # for other tzs + i8vals = np.asarray([NaT.value + n for n in range(1, 5)], dtype="i8") + arr = self.array_cls(i8vals, freq="ns") + arr[0] # should not raise OutOfBoundsDatetime + + index = pd.Index(arr) + index[0] # should not raise OutOfBoundsDatetime + + ser = pd.Series(arr) + ser[0] # should not raise OutOfBoundsDatetime + + def test_getitem_2d(self, arr1d): + # 2d slicing on a 1D array + expected = type(arr1d)(arr1d._data[:, np.newaxis], dtype=arr1d.dtype) + result = arr1d[:, np.newaxis] + tm.assert_equal(result, expected) + + # Lookup on a 2D array + arr2d = expected + expected = type(arr2d)(arr2d._data[:3, 0], dtype=arr2d.dtype) + result = arr2d[:3, 0] + tm.assert_equal(result, expected) + + # Scalar lookup + result = arr2d[-1, 0] + expected = arr1d[-1] + assert result == expected + + def test_iter_2d(self, arr1d): + data2d = arr1d._data[:3, np.newaxis] + arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype) + result = list(arr2d) + assert len(result) == 3 + for x in result: + assert isinstance(x, type(arr1d)) + assert x.ndim == 1 + assert x.dtype == arr1d.dtype + + def test_repr_2d(self, arr1d): + data2d = arr1d._data[:3, np.newaxis] + arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype) + + result = repr(arr2d) + + if isinstance(arr2d, TimedeltaArray): + expected = ( + f"<{type(arr2d).__name__}>\n" + "[\n" + f"['{arr1d[0]._repr_base()}'],\n" + f"['{arr1d[1]._repr_base()}'],\n" + f"['{arr1d[2]._repr_base()}']\n" + "]\n" + f"Shape: (3, 1), dtype: {arr1d.dtype}" + ) + else: + expected = ( + f"<{type(arr2d).__name__}>\n" + "[\n" + f"['{arr1d[0]}'],\n" + f"['{arr1d[1]}'],\n" + f"['{arr1d[2]}']\n" + "]\n" + f"Shape: (3, 1), dtype: {arr1d.dtype}" + ) + + assert result == expected + + def test_setitem(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + + arr[0] = arr[1] + expected = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + expected[0] = expected[1] + + tm.assert_numpy_array_equal(arr.asi8, expected) + + arr[:2] = arr[-2:] + expected[:2] = expected[-2:] + tm.assert_numpy_array_equal(arr.asi8, expected) + + @pytest.mark.parametrize( + "box", + [ + pd.Index, + pd.Series, + np.array, + list, + PandasArray, + ], + ) + def test_setitem_object_dtype(self, box, arr1d): + + expected = arr1d.copy()[::-1] + if expected.dtype.kind in ["m", "M"]: + expected = expected._with_freq(None) + + vals = expected + if box is list: + vals = list(vals) + elif box is np.array: + # if we do np.array(x).astype(object) then dt64 and td64 cast to ints + vals = np.array(vals.astype(object)) + elif box is PandasArray: + vals = box(np.asarray(vals, dtype=object)) + else: + vals = box(vals).astype(object) + + arr1d[:] = vals + + tm.assert_equal(arr1d, expected) + + def test_setitem_strs(self, arr1d, request): + # Check that we parse strs in both scalar and listlike + if isinstance(arr1d, DatetimeArray): + tz = arr1d.tz + ts1, ts2 = arr1d[-2:] + if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2): + # If we have e.g. tzutc(), when we cast to string and parse + # back we get pytz.UTC, and then consider them different timezones + # so incorrectly raise. + mark = pytest.mark.xfail( + raises=TypeError, reason="timezone comparisons inconsistent" + ) + request.node.add_marker(mark) + + # Setting list-like of strs + expected = arr1d.copy() + expected[[0, 1]] = arr1d[-2:] + + result = arr1d.copy() + result[:2] = [str(x) for x in arr1d[-2:]] + tm.assert_equal(result, expected) + + # Same thing but now for just a scalar str + expected = arr1d.copy() + expected[0] = arr1d[-1] + + result = arr1d.copy() + result[0] = str(arr1d[-1]) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("as_index", [True, False]) + def test_setitem_categorical(self, arr1d, as_index): + expected = arr1d.copy()[::-1] + if not isinstance(expected, PeriodArray): + expected = expected._with_freq(None) + + cat = pd.Categorical(arr1d) + if as_index: + cat = pd.CategoricalIndex(cat) + + arr1d[:] = cat[::-1] + + tm.assert_equal(arr1d, expected) + + def test_setitem_raises(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + val = arr[0] + + with pytest.raises(IndexError, match="index 12 is out of bounds"): + arr[12] = val + + with pytest.raises(TypeError, match="value should be a.* 'object'"): + arr[0] = object() + + msg = "cannot set using a list-like indexer with a different length" + with pytest.raises(ValueError, match=msg): + # GH#36339 + arr[[]] = [arr[1]] + + msg = "cannot set using a slice indexer with a different length than" + with pytest.raises(ValueError, match=msg): + # GH#36339 + arr[1:1] = arr[:3] + + @pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series]) + def test_setitem_numeric_raises(self, arr1d, box): + # We dont case e.g. int64 to our own dtype for setitem + + msg = ( + f"value should be a '{arr1d._scalar_type.__name__}', " + "'NaT', or array of those. Got" + ) + with pytest.raises(TypeError, match=msg): + arr1d[:2] = box([0, 1]) + + with pytest.raises(TypeError, match=msg): + arr1d[:2] = box([0.0, 1.0]) + + def test_inplace_arithmetic(self): + # GH#24115 check that iadd and isub are actually in-place + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + + expected = arr + pd.Timedelta(days=1) + arr += pd.Timedelta(days=1) + tm.assert_equal(arr, expected) + + expected = arr - pd.Timedelta(days=1) + arr -= pd.Timedelta(days=1) + tm.assert_equal(arr, expected) + + def test_shift_fill_int_deprecated(self): + # GH#31971 + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + + msg = "Passing to shift" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = arr.shift(1, fill_value=1) + + expected = arr.copy() + if self.array_cls is PeriodArray: + fill_val = PeriodArray._scalar_type._from_ordinal(1, freq=arr.freq) + else: + fill_val = arr._scalar_type(1) + expected[0] = fill_val + expected[1:] = arr[:-1] + tm.assert_equal(result, expected) + + def test_median(self, arr1d): + arr = arr1d + if len(arr) % 2 == 0: + # make it easier to define `expected` + arr = arr[:-1] + + expected = arr[len(arr) // 2] + + result = arr.median() + assert type(result) is type(expected) + assert result == expected + + arr[len(arr) // 2] = NaT + if not isinstance(expected, Period): + expected = arr[len(arr) // 2 - 1 : len(arr) // 2 + 2].mean() + + assert arr.median(skipna=False) is NaT + + result = arr.median() + assert type(result) is type(expected) + assert result == expected + + assert arr[:0].median() is NaT + assert arr[:0].median(skipna=False) is NaT + + # 2d Case + arr2 = arr.reshape(-1, 1) + + result = arr2.median(axis=None) + assert type(result) is type(expected) + assert result == expected + + assert arr2.median(axis=None, skipna=False) is NaT + + result = arr2.median(axis=0) + expected2 = type(arr)._from_sequence([expected], dtype=arr.dtype) + tm.assert_equal(result, expected2) + + result = arr2.median(axis=0, skipna=False) + expected2 = type(arr)._from_sequence([NaT], dtype=arr.dtype) + tm.assert_equal(result, expected2) + + result = arr2.median(axis=1) + tm.assert_equal(result, arr) + + result = arr2.median(axis=1, skipna=False) + tm.assert_equal(result, arr) + + def test_from_integer_array(self): + arr = np.array([1, 2, 3], dtype=np.int64) + expected = self.array_cls(arr, dtype=self.example_dtype) + + data = pd.array(arr, dtype="Int64") + result = self.array_cls(data, dtype=self.example_dtype) + + tm.assert_extension_array_equal(result, expected) + + +class TestDatetimeArray(SharedTests): + index_cls = DatetimeIndex + array_cls = DatetimeArray + scalar_type = Timestamp + example_dtype = "M8[ns]" + + @pytest.fixture + def arr1d(self, tz_naive_fixture, freqstr): + tz = tz_naive_fixture + dti = pd.date_range("2016-01-01 01:01:00", periods=5, freq=freqstr, tz=tz) + dta = dti._data + return dta + + def test_round(self, arr1d): + # GH#24064 + dti = self.index_cls(arr1d) + + result = dti.round(freq="2T") + expected = dti - pd.Timedelta(minutes=1) + expected = expected._with_freq(None) + tm.assert_index_equal(result, expected) + + dta = dti._data + result = dta.round(freq="2T") + expected = expected._data._with_freq(None) + tm.assert_datetime_array_equal(result, expected) + + def test_array_interface(self, datetime_index): + arr = DatetimeArray(datetime_index) + + # default asarray gives the same underlying data (for tz naive) + result = np.asarray(arr) + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + + # specifying M8[ns] gives the same result as default + result = np.asarray(arr, dtype="datetime64[ns]") + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype="datetime64[ns]", copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype="datetime64[ns]") + assert result is not expected + tm.assert_numpy_array_equal(result, expected) + + # to object dtype + result = np.asarray(arr, dtype=object) + expected = np.array(list(arr), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # to other dtype always copies + result = np.asarray(arr, dtype="int64") + assert result is not arr.asi8 + assert not np.may_share_memory(arr, result) + expected = arr.asi8.copy() + tm.assert_numpy_array_equal(result, expected) + + # other dtypes handled by numpy + for dtype in ["float64", str]: + result = np.asarray(arr, dtype=dtype) + expected = np.asarray(arr).astype(dtype) + tm.assert_numpy_array_equal(result, expected) + + def test_array_object_dtype(self, arr1d): + # GH#23524 + arr = arr1d + dti = self.index_cls(arr1d) + + expected = np.array(list(dti)) + + result = np.array(arr, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # also test the DatetimeIndex method while we're at it + result = np.array(dti, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_array_tz(self, arr1d): + # GH#23524 + arr = arr1d + dti = self.index_cls(arr1d) + + expected = dti.asi8.view("M8[ns]") + result = np.array(arr, dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + result = np.array(arr, dtype="datetime64[ns]") + tm.assert_numpy_array_equal(result, expected) + + # check that we are not making copies when setting copy=False + result = np.array(arr, dtype="M8[ns]", copy=False) + assert result.base is expected.base + assert result.base is not None + result = np.array(arr, dtype="datetime64[ns]", copy=False) + assert result.base is expected.base + assert result.base is not None + + def test_array_i8_dtype(self, arr1d): + arr = arr1d + dti = self.index_cls(arr1d) + + expected = dti.asi8 + result = np.array(arr, dtype="i8") + tm.assert_numpy_array_equal(result, expected) + + result = np.array(arr, dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + # check that we are still making copies when setting copy=False + result = np.array(arr, dtype="i8", copy=False) + assert result.base is not expected.base + assert result.base is None + + def test_from_array_keeps_base(self): + # Ensure that DatetimeArray._data.base isn't lost. + arr = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]") + dta = DatetimeArray(arr) + + assert dta._data is arr + dta = DatetimeArray(arr[:0]) + assert dta._data.base is arr + + def test_from_dti(self, arr1d): + arr = arr1d + dti = self.index_cls(arr1d) + assert list(dti) == list(arr) + + # Check that Index.__new__ knows what to do with DatetimeArray + dti2 = pd.Index(arr) + assert isinstance(dti2, DatetimeIndex) + assert list(dti2) == list(arr) + + def test_astype_object(self, arr1d): + arr = arr1d + dti = self.index_cls(arr1d) + + asobj = arr.astype("O") + assert isinstance(asobj, np.ndarray) + assert asobj.dtype == "O" + assert list(asobj) == list(dti) + + def test_to_perioddelta(self, datetime_index, freqstr): + # GH#23113 + dti = datetime_index + arr = DatetimeArray(dti) + + msg = "to_perioddelta is deprecated and will be removed" + with tm.assert_produces_warning(FutureWarning, match=msg): + # Deprecation GH#34853 + expected = dti.to_perioddelta(freq=freqstr) + with tm.assert_produces_warning(FutureWarning, match=msg): + # stacklevel is chosen to be "correct" for DatetimeIndex, not + # DatetimeArray + result = arr.to_perioddelta(freq=freqstr) + assert isinstance(result, TimedeltaArray) + + # placeholder until these become actual EA subclasses and we can use + # an EA-specific tm.assert_ function + tm.assert_index_equal(pd.Index(result), pd.Index(expected)) + + def test_to_period(self, datetime_index, freqstr): + dti = datetime_index + arr = DatetimeArray(dti) + + expected = dti.to_period(freq=freqstr) + result = arr.to_period(freq=freqstr) + assert isinstance(result, PeriodArray) + + # placeholder until these become actual EA subclasses and we can use + # an EA-specific tm.assert_ function + tm.assert_index_equal(pd.Index(result), pd.Index(expected)) + + def test_to_period_2d(self, arr1d): + arr2d = arr1d.reshape(1, -1) + + warn = None if arr1d.tz is None else UserWarning + with tm.assert_produces_warning(warn): + result = arr2d.to_period("D") + expected = arr1d.to_period("D").reshape(1, -1) + tm.assert_period_array_equal(result, expected) + + @pytest.mark.parametrize("propname", DatetimeArray._bool_ops) + def test_bool_properties(self, arr1d, propname): + # in this case _bool_ops is just `is_leap_year` + dti = self.index_cls(arr1d) + arr = arr1d + assert dti.freq == arr.freq + + result = getattr(arr, propname) + expected = np.array(getattr(dti, propname), dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("propname", DatetimeArray._field_ops) + def test_int_properties(self, arr1d, propname): + warn = None + msg = "weekofyear and week have been deprecated, please use" + if propname in ["week", "weekofyear"]: + # GH#33595 Deprecate week and weekofyear + warn = FutureWarning + + dti = self.index_cls(arr1d) + arr = arr1d + + with tm.assert_produces_warning(warn, match=msg): + result = getattr(arr, propname) + expected = np.array(getattr(dti, propname), dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + + def test_take_fill_valid(self, arr1d, fixed_now_ts): + arr = arr1d + dti = self.index_cls(arr1d) + + now = fixed_now_ts.tz_localize(dti.tz) + result = arr.take([-1, 1], allow_fill=True, fill_value=now) + assert result[0] == now + + msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got" + with pytest.raises(TypeError, match=msg): + # fill_value Timedelta invalid + arr.take([-1, 1], allow_fill=True, fill_value=now - now) + + with pytest.raises(TypeError, match=msg): + # fill_value Period invalid + arr.take([-1, 1], allow_fill=True, fill_value=Period("2014Q1")) + + tz = None if dti.tz is not None else "US/Eastern" + now = fixed_now_ts.tz_localize(tz) + msg = "Cannot compare tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg): + # Timestamp with mismatched tz-awareness + arr.take([-1, 1], allow_fill=True, fill_value=now) + + value = NaT.value + msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got" + with pytest.raises(TypeError, match=msg): + # require NaT, not iNaT, as it could be confused with an integer + arr.take([-1, 1], allow_fill=True, fill_value=value) + + value = np.timedelta64("NaT", "ns") + with pytest.raises(TypeError, match=msg): + # require appropriate-dtype if we have a NA value + arr.take([-1, 1], allow_fill=True, fill_value=value) + + if arr.tz is not None: + # GH#37356 + # Assuming here that arr1d fixture does not include Australia/Melbourne + value = fixed_now_ts.tz_localize("Australia/Melbourne") + msg = "Timezones don't match. .* != 'Australia/Melbourne'" + with pytest.raises(ValueError, match=msg): + # require tz match, not just tzawareness match + with tm.assert_produces_warning( + FutureWarning, match="mismatched timezone" + ): + result = arr.take([-1, 1], allow_fill=True, fill_value=value) + + # once deprecation is enforced + # expected = arr.take([-1, 1], allow_fill=True, + # fill_value=value.tz_convert(arr.dtype.tz)) + # tm.assert_equal(result, expected) + + def test_concat_same_type_invalid(self, arr1d): + # different timezones + arr = arr1d + + if arr.tz is None: + other = arr.tz_localize("UTC") + else: + other = arr.tz_localize(None) + + with pytest.raises(ValueError, match="to_concat must have the same"): + arr._concat_same_type([arr, other]) + + def test_concat_same_type_different_freq(self): + # we *can* concatenate DTI with different freqs. + a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central")) + b = DatetimeArray(pd.date_range("2000", periods=2, freq="H", tz="US/Central")) + result = DatetimeArray._concat_same_type([a, b]) + expected = DatetimeArray( + pd.to_datetime( + [ + "2000-01-01 00:00:00", + "2000-01-02 00:00:00", + "2000-01-01 00:00:00", + "2000-01-01 01:00:00", + ] + ).tz_localize("US/Central") + ) + + tm.assert_datetime_array_equal(result, expected) + + def test_strftime(self, arr1d): + arr = arr1d + + result = arr.strftime("%Y %b") + expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_strftime_nat(self): + # GH 29578 + arr = DatetimeArray(DatetimeIndex(["2019-01-01", NaT])) + + result = arr.strftime("%Y-%m-%d") + expected = np.array(["2019-01-01", np.nan], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +class TestTimedeltaArray(SharedTests): + index_cls = TimedeltaIndex + array_cls = TimedeltaArray + scalar_type = pd.Timedelta + example_dtype = "m8[ns]" + + def test_from_tdi(self): + tdi = TimedeltaIndex(["1 Day", "3 Hours"]) + arr = TimedeltaArray(tdi) + assert list(arr) == list(tdi) + + # Check that Index.__new__ knows what to do with TimedeltaArray + tdi2 = pd.Index(arr) + assert isinstance(tdi2, TimedeltaIndex) + assert list(tdi2) == list(arr) + + def test_astype_object(self): + tdi = TimedeltaIndex(["1 Day", "3 Hours"]) + arr = TimedeltaArray(tdi) + asobj = arr.astype("O") + assert isinstance(asobj, np.ndarray) + assert asobj.dtype == "O" + assert list(asobj) == list(tdi) + + def test_to_pytimedelta(self, timedelta_index): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + expected = tdi.to_pytimedelta() + result = arr.to_pytimedelta() + + tm.assert_numpy_array_equal(result, expected) + + def test_total_seconds(self, timedelta_index): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + expected = tdi.total_seconds() + result = arr.total_seconds() + + tm.assert_numpy_array_equal(result, expected.values) + + @pytest.mark.parametrize("propname", TimedeltaArray._field_ops) + def test_int_properties(self, timedelta_index, propname): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + result = getattr(arr, propname) + expected = np.array(getattr(tdi, propname), dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + + def test_array_interface(self, timedelta_index): + arr = TimedeltaArray(timedelta_index) + + # default asarray gives the same underlying data + result = np.asarray(arr) + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + + # specifying m8[ns] gives the same result as default + result = np.asarray(arr, dtype="timedelta64[ns]") + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype="timedelta64[ns]", copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype="timedelta64[ns]") + assert result is not expected + tm.assert_numpy_array_equal(result, expected) + + # to object dtype + result = np.asarray(arr, dtype=object) + expected = np.array(list(arr), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # to other dtype always copies + result = np.asarray(arr, dtype="int64") + assert result is not arr.asi8 + assert not np.may_share_memory(arr, result) + expected = arr.asi8.copy() + tm.assert_numpy_array_equal(result, expected) + + # other dtypes handled by numpy + for dtype in ["float64", str]: + result = np.asarray(arr, dtype=dtype) + expected = np.asarray(arr).astype(dtype) + tm.assert_numpy_array_equal(result, expected) + + def test_take_fill_valid(self, timedelta_index, fixed_now_ts): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + td1 = pd.Timedelta(days=1) + result = arr.take([-1, 1], allow_fill=True, fill_value=td1) + assert result[0] == td1 + + value = fixed_now_ts + msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got" + with pytest.raises(TypeError, match=msg): + # fill_value Timestamp invalid + arr.take([0, 1], allow_fill=True, fill_value=value) + + value = fixed_now_ts.to_period("D") + with pytest.raises(TypeError, match=msg): + # fill_value Period invalid + arr.take([0, 1], allow_fill=True, fill_value=value) + + value = np.datetime64("NaT", "ns") + with pytest.raises(TypeError, match=msg): + # require appropriate-dtype if we have a NA value + arr.take([-1, 1], allow_fill=True, fill_value=value) + + +class TestPeriodArray(SharedTests): + index_cls = PeriodIndex + array_cls = PeriodArray + scalar_type = Period + example_dtype = PeriodIndex([], freq="W").dtype + + @pytest.fixture + def arr1d(self, period_index): + return period_index._data + + def test_from_pi(self, arr1d): + pi = self.index_cls(arr1d) + arr = arr1d + assert list(arr) == list(pi) + + # Check that Index.__new__ knows what to do with PeriodArray + pi2 = pd.Index(arr) + assert isinstance(pi2, PeriodIndex) + assert list(pi2) == list(arr) + + def test_astype_object(self, arr1d): + pi = self.index_cls(arr1d) + arr = arr1d + asobj = arr.astype("O") + assert isinstance(asobj, np.ndarray) + assert asobj.dtype == "O" + assert list(asobj) == list(pi) + + def test_take_fill_valid(self, arr1d): + arr = arr1d + + value = NaT.value + msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got" + with pytest.raises(TypeError, match=msg): + # require NaT, not iNaT, as it could be confused with an integer + arr.take([-1, 1], allow_fill=True, fill_value=value) + + value = np.timedelta64("NaT", "ns") + with pytest.raises(TypeError, match=msg): + # require appropriate-dtype if we have a NA value + arr.take([-1, 1], allow_fill=True, fill_value=value) + + @pytest.mark.parametrize("how", ["S", "E"]) + def test_to_timestamp(self, how, arr1d): + pi = self.index_cls(arr1d) + arr = arr1d + + expected = DatetimeArray(pi.to_timestamp(how=how)) + result = arr.to_timestamp(how=how) + assert isinstance(result, DatetimeArray) + + # placeholder until these become actual EA subclasses and we can use + # an EA-specific tm.assert_ function + tm.assert_index_equal(pd.Index(result), pd.Index(expected)) + + def test_to_timestamp_roundtrip_bday(self): + # Case where infer_freq inside would choose "D" instead of "B" + dta = pd.date_range("2021-10-18", periods=3, freq="B")._data + parr = dta.to_period() + result = parr.to_timestamp() + assert result.freq == "B" + tm.assert_extension_array_equal(result, dta) + + dta2 = dta[::2] + parr2 = dta2.to_period() + result2 = parr2.to_timestamp() + assert result2.freq == "2B" + tm.assert_extension_array_equal(result2, dta2) + + parr3 = dta.to_period("2B") + result3 = parr3.to_timestamp() + assert result3.freq == "B" + tm.assert_extension_array_equal(result3, dta) + + def test_to_timestamp_out_of_bounds(self): + # GH#19643 previously overflowed silently + pi = pd.period_range("1500", freq="Y", periods=3) + msg = "Out of bounds nanosecond timestamp: 1500-01-01 00:00:00" + with pytest.raises(OutOfBoundsDatetime, match=msg): + pi.to_timestamp() + + with pytest.raises(OutOfBoundsDatetime, match=msg): + pi._data.to_timestamp() + + @pytest.mark.parametrize("propname", PeriodArray._bool_ops) + def test_bool_properties(self, arr1d, propname): + # in this case _bool_ops is just `is_leap_year` + pi = self.index_cls(arr1d) + arr = arr1d + + result = getattr(arr, propname) + expected = np.array(getattr(pi, propname)) + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("propname", PeriodArray._field_ops) + def test_int_properties(self, arr1d, propname): + pi = self.index_cls(arr1d) + arr = arr1d + + result = getattr(arr, propname) + expected = np.array(getattr(pi, propname)) + + tm.assert_numpy_array_equal(result, expected) + + def test_array_interface(self, arr1d): + arr = arr1d + + # default asarray gives objects + result = np.asarray(arr) + expected = np.array(list(arr), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # to object dtype (same as default) + result = np.asarray(arr, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(arr, dtype="int64") + tm.assert_numpy_array_equal(result, arr.asi8) + + # to other dtypes + msg = r"float\(\) argument must be a string or a( real)? number, not 'Period'" + with pytest.raises(TypeError, match=msg): + np.asarray(arr, dtype="float64") + + result = np.asarray(arr, dtype="S20") + expected = np.asarray(arr).astype("S20") + tm.assert_numpy_array_equal(result, expected) + + def test_strftime(self, arr1d): + arr = arr1d + + result = arr.strftime("%Y") + expected = np.array([per.strftime("%Y") for per in arr], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_strftime_nat(self): + # GH 29578 + arr = PeriodArray(PeriodIndex(["2019-01-01", NaT], dtype="period[D]")) + + result = arr.strftime("%Y-%m-%d") + expected = np.array(["2019-01-01", np.nan], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "arr,casting_nats", + [ + ( + TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data, + (NaT, np.timedelta64("NaT", "ns")), + ), + ( + pd.date_range("2000-01-01", periods=3, freq="D")._data, + (NaT, np.datetime64("NaT", "ns")), + ), + (pd.period_range("2000-01-01", periods=3, freq="D")._data, (NaT,)), + ], + ids=lambda x: type(x).__name__, +) +def test_casting_nat_setitem_array(arr, casting_nats): + expected = type(arr)._from_sequence([NaT, arr[1], arr[2]]) + + for nat in casting_nats: + arr = arr.copy() + arr[0] = nat + tm.assert_equal(arr, expected) + + +@pytest.mark.parametrize( + "arr,non_casting_nats", + [ + ( + TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data, + (np.datetime64("NaT", "ns"), NaT.value), + ), + ( + pd.date_range("2000-01-01", periods=3, freq="D")._data, + (np.timedelta64("NaT", "ns"), NaT.value), + ), + ( + pd.period_range("2000-01-01", periods=3, freq="D")._data, + (np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns"), NaT.value), + ), + ], + ids=lambda x: type(x).__name__, +) +def test_invalid_nat_setitem_array(arr, non_casting_nats): + msg = ( + "value should be a '(Timestamp|Timedelta|Period)', 'NaT', or array of those. " + "Got '(timedelta64|datetime64|int)' instead." + ) + + for nat in non_casting_nats: + with pytest.raises(TypeError, match=msg): + arr[0] = nat + + +@pytest.mark.parametrize( + "arr", + [ + pd.date_range("2000", periods=4).array, + pd.timedelta_range("2000", periods=4).array, + ], +) +def test_to_numpy_extra(arr): + arr[0] = NaT + original = arr.copy() + + result = arr.to_numpy() + assert np.isnan(result[0]) + + result = arr.to_numpy(dtype="int64") + assert result[0] == -9223372036854775808 + + result = arr.to_numpy(dtype="int64", na_value=0) + assert result[0] == 0 + + result = arr.to_numpy(na_value=arr[1].to_numpy()) + assert result[0] == result[1] + + result = arr.to_numpy(na_value=arr[1].to_numpy(copy=False)) + assert result[0] == result[1] + + tm.assert_equal(arr, original) + + +@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize( + "values", + [ + pd.to_datetime(["2020-01-01", "2020-02-01"]), + TimedeltaIndex([1, 2], unit="D"), + PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"), + ], +) +@pytest.mark.parametrize( + "klass", + [ + list, + np.array, + pd.array, + pd.Series, + pd.Index, + pd.Categorical, + pd.CategoricalIndex, + ], +) +def test_searchsorted_datetimelike_with_listlike(values, klass, as_index): + # https://github.com/pandas-dev/pandas/issues/32762 + if not as_index: + values = values._data + + result = values.searchsorted(klass(values)) + expected = np.array([0, 1], dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + pd.to_datetime(["2020-01-01", "2020-02-01"]), + TimedeltaIndex([1, 2], unit="D"), + PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"), + ], +) +@pytest.mark.parametrize( + "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2] +) +def test_searchsorted_datetimelike_with_listlike_invalid_dtype(values, arg): + # https://github.com/pandas-dev/pandas/issues/32762 + msg = "[Unexpected type|Cannot compare]" + with pytest.raises(TypeError, match=msg): + values.searchsorted(arg) + + +@pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series]) +def test_period_index_construction_from_strings(klass): + # https://github.com/pandas-dev/pandas/issues/26109 + strings = ["2020Q1", "2020Q2"] * 2 + data = klass(strings) + result = PeriodIndex(data, freq="Q") + expected = PeriodIndex([Period(s) for s in strings]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) +def test_from_pandas_array(dtype): + # GH#24615 + data = np.array([1, 2, 3], dtype=dtype) + arr = PandasArray(data) + + cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype] + + result = cls(arr) + expected = cls(data) + tm.assert_extension_array_equal(result, expected) + + result = cls._from_sequence(arr) + expected = cls._from_sequence(data) + tm.assert_extension_array_equal(result, expected) + + func = {"M8[ns]": _sequence_to_dt64ns, "m8[ns]": sequence_to_td64ns}[dtype] + result = func(arr)[0] + expected = func(data)[0] + tm.assert_equal(result, expected) + + func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype] + result = func(arr).array + expected = func(data).array + tm.assert_equal(result, expected) + + # Let's check the Indexes while we're here + idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype] + result = idx_cls(arr) + expected = idx_cls(data) + tm.assert_index_equal(result, expected) + + +@pytest.fixture( + params=[ + "memoryview", + "array", + pytest.param("dask", marks=td.skip_if_no("dask.array")), + pytest.param("xarray", marks=td.skip_if_no("xarray")), + ] +) +def array_likes(request): + # GH#24539 recognize e.g xarray, dask, ... + arr = np.array([1, 2, 3], dtype=np.int64) + + name = request.param + if name == "memoryview": + data = memoryview(arr) + elif name == "array": + # stdlib array + import array + + data = array.array("i", arr) + elif name == "dask": + import dask.array + + data = dask.array.array(arr) + elif name == "xarray": + import xarray as xr + + data = xr.DataArray(arr) + + return arr, data + + +@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) +def test_from_obscure_array(dtype, array_likes): + # GH#24539 recognize e.g xarray, dask, ... + # Note: we dont do this for PeriodArray bc _from_sequence won't accept + # an array of integers + # TODO: could check with arraylike of Period objects + arr, data = array_likes + + cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype] + + expected = cls(arr) + result = cls._from_sequence(data) + tm.assert_extension_array_equal(result, expected) + + func = {"M8[ns]": _sequence_to_dt64ns, "m8[ns]": sequence_to_td64ns}[dtype] + result = func(arr)[0] + expected = func(data)[0] + tm.assert_equal(result, expected) + + if not isinstance(data, memoryview): + # FIXME(GH#44431) these raise on memoryview and attempted fix + # fails on py3.10 + func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype] + result = func(arr).array + expected = func(data).array + tm.assert_equal(result, expected) + + # Let's check the Indexes while we're here + idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype] + result = idx_cls(arr) + expected = idx_cls(data) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_datetimes.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_datetimes.py new file mode 100644 index 0000000..73b4e7c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_datetimes.py @@ -0,0 +1,408 @@ +""" +Tests for DatetimeArray +""" +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray + + +class TestDatetimeArrayComparisons: + # TODO: merge this into tests/arithmetic/test_datetime64 once it is + # sufficiently robust + + def test_cmp_dt64_arraylike_tznaive(self, comparison_op): + # arbitrary tz-naive DatetimeIndex + op = comparison_op + + dti = pd.date_range("2016-01-1", freq="MS", periods=9, tz=None) + arr = DatetimeArray(dti) + assert arr.freq == dti.freq + assert arr.tz == dti.tz + + right = dti + + expected = np.ones(len(arr), dtype=bool) + if comparison_op.__name__ in ["ne", "gt", "lt"]: + # for these the comparisons should be all-False + expected = ~expected + + result = op(arr, arr) + tm.assert_numpy_array_equal(result, expected) + for other in [ + right, + np.array(right), + list(right), + tuple(right), + right.astype(object), + ]: + result = op(arr, other) + tm.assert_numpy_array_equal(result, expected) + + result = op(other, arr) + tm.assert_numpy_array_equal(result, expected) + + +class TestDatetimeArray: + def test_astype_to_same(self): + arr = DatetimeArray._from_sequence( + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + ) + result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False) + assert result is arr + + @pytest.mark.parametrize("dtype", ["datetime64[ns]", "datetime64[ns, UTC]"]) + @pytest.mark.parametrize( + "other", ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, CET]"] + ) + def test_astype_copies(self, dtype, other): + # https://github.com/pandas-dev/pandas/pull/32490 + ser = pd.Series([1, 2], dtype=dtype) + orig = ser.copy() + + warn = None + if (dtype == "datetime64[ns]") ^ (other == "datetime64[ns]"): + # deprecated in favor of tz_localize + warn = FutureWarning + + with tm.assert_produces_warning(warn): + t = ser.astype(other) + t[:] = pd.NaT + tm.assert_series_equal(ser, orig) + + @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) + def test_astype_int(self, dtype): + arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]) + result = arr.astype(dtype) + + if np.dtype(dtype).kind == "u": + expected_dtype = np.dtype("uint64") + else: + expected_dtype = np.dtype("int64") + expected = arr.astype(expected_dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) + + def test_tz_setter_raises(self): + arr = DatetimeArray._from_sequence( + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + ) + with pytest.raises(AttributeError, match="tz_localize"): + arr.tz = "UTC" + + def test_setitem_str_impute_tz(self, tz_naive_fixture): + # Like for getitem, if we are passed a naive-like string, we impute + # our own timezone. + tz = tz_naive_fixture + + data = np.array([1, 2, 3], dtype="M8[ns]") + dtype = data.dtype if tz is None else DatetimeTZDtype(tz=tz) + arr = DatetimeArray(data, dtype=dtype) + expected = arr.copy() + + ts = pd.Timestamp("2020-09-08 16:50").tz_localize(tz) + setter = str(ts.tz_localize(None)) + + # Setting a scalar tznaive string + expected[0] = ts + arr[0] = setter + tm.assert_equal(arr, expected) + + # Setting a listlike of tznaive strings + expected[1] = ts + arr[:2] = [setter, setter] + tm.assert_equal(arr, expected) + + def test_setitem_different_tz_raises(self): + data = np.array([1, 2, 3], dtype="M8[ns]") + arr = DatetimeArray(data, copy=False, dtype=DatetimeTZDtype(tz="US/Central")) + with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"): + arr[0] = pd.Timestamp("2000") + + ts = pd.Timestamp("2000", tz="US/Eastern") + with pytest.raises(ValueError, match="US/Central"): + with tm.assert_produces_warning( + FutureWarning, match="mismatched timezones" + ): + arr[0] = ts + # once deprecation is enforced + # assert arr[0] == ts.tz_convert("US/Central") + + def test_setitem_clears_freq(self): + a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central")) + a[0] = pd.Timestamp("2000", tz="US/Central") + assert a.freq is None + + @pytest.mark.parametrize( + "obj", + [ + pd.Timestamp("2021-01-01"), + pd.Timestamp("2021-01-01").to_datetime64(), + pd.Timestamp("2021-01-01").to_pydatetime(), + ], + ) + def test_setitem_objects(self, obj): + # make sure we accept datetime64 and datetime in addition to Timestamp + dti = pd.date_range("2000", periods=2, freq="D") + arr = dti._data + + arr[0] = obj + assert arr[0] == obj + + def test_repeat_preserves_tz(self): + dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central") + arr = DatetimeArray(dti) + + repeated = arr.repeat([1, 1]) + + # preserves tz and values, but not freq + expected = DatetimeArray(arr.asi8, freq=None, dtype=arr.dtype) + tm.assert_equal(repeated, expected) + + def test_value_counts_preserves_tz(self): + dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central") + arr = DatetimeArray(dti).repeat([4, 3]) + + result = arr.value_counts() + + # Note: not tm.assert_index_equal, since `freq`s do not match + assert result.index.equals(dti) + + arr[-2] = pd.NaT + result = arr.value_counts(dropna=False) + expected = pd.Series([4, 2, 1], index=[dti[0], dti[1], pd.NaT]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("method", ["pad", "backfill"]) + def test_fillna_preserves_tz(self, method): + dti = pd.date_range("2000-01-01", periods=5, freq="D", tz="US/Central") + arr = DatetimeArray(dti, copy=True) + arr[2] = pd.NaT + + fill_val = dti[1] if method == "pad" else dti[3] + expected = DatetimeArray._from_sequence( + [dti[0], dti[1], fill_val, dti[3], dti[4]], + dtype=DatetimeTZDtype(tz="US/Central"), + ) + + result = arr.fillna(method=method) + tm.assert_extension_array_equal(result, expected) + + # assert that arr and dti were not modified in-place + assert arr[2] is pd.NaT + assert dti[2] == pd.Timestamp("2000-01-03", tz="US/Central") + + def test_fillna_2d(self): + dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") + dta = dti._data.reshape(3, 2).copy() + dta[0, 1] = pd.NaT + dta[1, 0] = pd.NaT + + res1 = dta.fillna(method="pad") + expected1 = dta.copy() + expected1[1, 0] = dta[0, 0] + tm.assert_extension_array_equal(res1, expected1) + + res2 = dta.fillna(method="backfill") + expected2 = dta.copy() + expected2 = dta.copy() + expected2[1, 0] = dta[2, 0] + expected2[0, 1] = dta[1, 1] + tm.assert_extension_array_equal(res2, expected2) + + # with different ordering for underlying ndarray; behavior should + # be unchanged + dta2 = dta._from_backing_data(dta._ndarray.copy(order="F")) + assert dta2._ndarray.flags["F_CONTIGUOUS"] + assert not dta2._ndarray.flags["C_CONTIGUOUS"] + tm.assert_extension_array_equal(dta, dta2) + + res3 = dta2.fillna(method="pad") + tm.assert_extension_array_equal(res3, expected1) + + res4 = dta2.fillna(method="backfill") + tm.assert_extension_array_equal(res4, expected2) + + # test the DataFrame method while we're here + df = pd.DataFrame(dta) + res = df.fillna(method="pad") + expected = pd.DataFrame(expected1) + tm.assert_frame_equal(res, expected) + + res = df.fillna(method="backfill") + expected = pd.DataFrame(expected2) + tm.assert_frame_equal(res, expected) + + def test_array_interface_tz(self): + tz = "US/Central" + data = DatetimeArray(pd.date_range("2017", periods=2, tz=tz)) + result = np.asarray(data) + + expected = np.array( + [ + pd.Timestamp("2017-01-01T00:00:00", tz=tz), + pd.Timestamp("2017-01-02T00:00:00", tz=tz), + ], + dtype=object, + ) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(data, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(data, dtype="M8[ns]") + + expected = np.array( + ["2017-01-01T06:00:00", "2017-01-02T06:00:00"], dtype="M8[ns]" + ) + tm.assert_numpy_array_equal(result, expected) + + def test_array_interface(self): + data = DatetimeArray(pd.date_range("2017", periods=2)) + expected = np.array( + ["2017-01-01T00:00:00", "2017-01-02T00:00:00"], dtype="datetime64[ns]" + ) + + result = np.asarray(data) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(data, dtype=object) + expected = np.array( + [pd.Timestamp("2017-01-01T00:00:00"), pd.Timestamp("2017-01-02T00:00:00")], + dtype=object, + ) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("index", [True, False]) + def test_searchsorted_different_tz(self, index): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = DatetimeArray(data, freq="D").tz_localize("Asia/Tokyo") + if index: + arr = pd.Index(arr) + + expected = arr.searchsorted(arr[2]) + result = arr.searchsorted(arr[2].tz_convert("UTC")) + assert result == expected + + expected = arr.searchsorted(arr[2:6]) + result = arr.searchsorted(arr[2:6].tz_convert("UTC")) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("index", [True, False]) + def test_searchsorted_tzawareness_compat(self, index): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = DatetimeArray(data, freq="D") + if index: + arr = pd.Index(arr) + + mismatch = arr.tz_localize("Asia/Tokyo") + + msg = "Cannot compare tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg): + arr.searchsorted(mismatch[0]) + with pytest.raises(TypeError, match=msg): + arr.searchsorted(mismatch) + + with pytest.raises(TypeError, match=msg): + mismatch.searchsorted(arr[0]) + with pytest.raises(TypeError, match=msg): + mismatch.searchsorted(arr) + + @pytest.mark.parametrize( + "other", + [ + 1, + np.int64(1), + 1.0, + np.timedelta64("NaT"), + pd.Timedelta(days=2), + "invalid", + np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9, + np.arange(10).view("timedelta64[ns]") * 24 * 3600 * 10 ** 9, + pd.Timestamp("2021-01-01").to_period("D"), + ], + ) + @pytest.mark.parametrize("index", [True, False]) + def test_searchsorted_invalid_types(self, other, index): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = DatetimeArray(data, freq="D") + if index: + arr = pd.Index(arr) + + msg = "|".join( + [ + "searchsorted requires compatible dtype or scalar", + "value should be a 'Timestamp', 'NaT', or array of those. Got", + ] + ) + with pytest.raises(TypeError, match=msg): + arr.searchsorted(other) + + def test_shift_fill_value(self): + dti = pd.date_range("2016-01-01", periods=3) + + dta = dti._data + expected = DatetimeArray(np.roll(dta._data, 1)) + + fv = dta[-1] + for fill_value in [fv, fv.to_pydatetime(), fv.to_datetime64()]: + result = dta.shift(1, fill_value=fill_value) + tm.assert_datetime_array_equal(result, expected) + + dta = dta.tz_localize("UTC") + expected = expected.tz_localize("UTC") + fv = dta[-1] + for fill_value in [fv, fv.to_pydatetime()]: + result = dta.shift(1, fill_value=fill_value) + tm.assert_datetime_array_equal(result, expected) + + def test_shift_value_tzawareness_mismatch(self): + dti = pd.date_range("2016-01-01", periods=3) + + dta = dti._data + + fv = dta[-1].tz_localize("UTC") + for invalid in [fv, fv.to_pydatetime()]: + with pytest.raises(TypeError, match="Cannot compare"): + dta.shift(1, fill_value=invalid) + + dta = dta.tz_localize("UTC") + fv = dta[-1].tz_localize(None) + for invalid in [fv, fv.to_pydatetime(), fv.to_datetime64()]: + with pytest.raises(TypeError, match="Cannot compare"): + dta.shift(1, fill_value=invalid) + + def test_shift_requires_tzmatch(self): + # since filling is setitem-like, we require a matching timezone, + # not just matching tzawawreness + dti = pd.date_range("2016-01-01", periods=3, tz="UTC") + dta = dti._data + + fill_value = pd.Timestamp("2020-10-18 18:44", tz="US/Pacific") + + msg = "Timezones don't match. 'UTC' != 'US/Pacific'" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning( + FutureWarning, match="mismatched timezones" + ): + dta.shift(1, fill_value=fill_value) + + # once deprecation is enforced + # expected = dta.shift(1, fill_value=fill_value.tz_convert("UTC")) + # tm.assert_equal(result, expected) + + def test_tz_localize_t2d(self): + dti = pd.date_range("1994-05-12", periods=12, tz="US/Pacific") + dta = dti._data.reshape(3, 4) + result = dta.tz_localize(None) + + expected = dta.ravel().tz_localize(None).reshape(dta.shape) + tm.assert_datetime_array_equal(result, expected) + + roundtrip = expected.tz_localize("US/Pacific") + tm.assert_datetime_array_equal(roundtrip, dta) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_ndarray_backed.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_ndarray_backed.py new file mode 100644 index 0000000..c48fb7e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_ndarray_backed.py @@ -0,0 +1,75 @@ +""" +Tests for subclasses of NDArrayBackedExtensionArray +""" +import numpy as np + +from pandas import ( + CategoricalIndex, + date_range, +) +from pandas.core.arrays import ( + Categorical, + DatetimeArray, + PandasArray, + TimedeltaArray, +) + + +class TestEmpty: + def test_empty_categorical(self): + ci = CategoricalIndex(["a", "b", "c"], ordered=True) + dtype = ci.dtype + + # case with int8 codes + shape = (4,) + result = Categorical._empty(shape, dtype=dtype) + assert isinstance(result, Categorical) + assert result.shape == shape + assert result._ndarray.dtype == np.int8 + + # case where repr would segfault if we didn't override base implementation + result = Categorical._empty((4096,), dtype=dtype) + assert isinstance(result, Categorical) + assert result.shape == (4096,) + assert result._ndarray.dtype == np.int8 + repr(result) + + # case with int16 codes + ci = CategoricalIndex(list(range(512)) * 4, ordered=False) + dtype = ci.dtype + result = Categorical._empty(shape, dtype=dtype) + assert isinstance(result, Categorical) + assert result.shape == shape + assert result._ndarray.dtype == np.int16 + + def test_empty_dt64tz(self): + dti = date_range("2016-01-01", periods=2, tz="Asia/Tokyo") + dtype = dti.dtype + + shape = (0,) + result = DatetimeArray._empty(shape, dtype=dtype) + assert result.dtype == dtype + assert isinstance(result, DatetimeArray) + assert result.shape == shape + + def test_empty_dt64(self): + shape = (3, 9) + result = DatetimeArray._empty(shape, dtype="datetime64[ns]") + assert isinstance(result, DatetimeArray) + assert result.shape == shape + + def test_empty_td64(self): + shape = (3, 9) + result = TimedeltaArray._empty(shape, dtype="m8[ns]") + assert isinstance(result, TimedeltaArray) + assert result.shape == shape + + def test_empty_pandas_array(self): + arr = PandasArray(np.array([1, 2])) + dtype = arr.dtype + + shape = (3, 9) + result = PandasArray._empty(shape, dtype=dtype) + assert isinstance(result, PandasArray) + assert result.dtype == dtype + assert result.shape == shape diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_numpy.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_numpy.py new file mode 100644 index 0000000..66f7bf1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_numpy.py @@ -0,0 +1,295 @@ +""" +Additional tests for PandasArray that aren't covered by +the interface tests. +""" +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import PandasDtype + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import PandasArray + + +@pytest.fixture( + params=[ + np.array(["a", "b"], dtype=object), + np.array([0, 1], dtype=float), + np.array([0, 1], dtype=int), + np.array([0, 1 + 2j], dtype=complex), + np.array([True, False], dtype=bool), + np.array([0, 1], dtype="datetime64[ns]"), + np.array([0, 1], dtype="timedelta64[ns]"), + ] +) +def any_numpy_array(request): + """ + Parametrized fixture for NumPy arrays with different dtypes. + + This excludes string and bytes. + """ + return request.param + + +# ---------------------------------------------------------------------------- +# PandasDtype + + +@pytest.mark.parametrize( + "dtype, expected", + [ + ("bool", True), + ("int", True), + ("uint", True), + ("float", True), + ("complex", True), + ("str", False), + ("bytes", False), + ("datetime64[ns]", False), + ("object", False), + ("void", False), + ], +) +def test_is_numeric(dtype, expected): + dtype = PandasDtype(dtype) + assert dtype._is_numeric is expected + + +@pytest.mark.parametrize( + "dtype, expected", + [ + ("bool", True), + ("int", False), + ("uint", False), + ("float", False), + ("complex", False), + ("str", False), + ("bytes", False), + ("datetime64[ns]", False), + ("object", False), + ("void", False), + ], +) +def test_is_boolean(dtype, expected): + dtype = PandasDtype(dtype) + assert dtype._is_boolean is expected + + +def test_repr(): + dtype = PandasDtype(np.dtype("int64")) + assert repr(dtype) == "PandasDtype('int64')" + + +def test_constructor_from_string(): + result = PandasDtype.construct_from_string("int64") + expected = PandasDtype(np.dtype("int64")) + assert result == expected + + +def test_dtype_univalent(any_numpy_dtype): + dtype = PandasDtype(any_numpy_dtype) + + result = PandasDtype(dtype) + assert result == dtype + + +# ---------------------------------------------------------------------------- +# Construction + + +def test_constructor_no_coercion(): + with pytest.raises(ValueError, match="NumPy array"): + PandasArray([1, 2, 3]) + + +def test_series_constructor_with_copy(): + ndarray = np.array([1, 2, 3]) + ser = pd.Series(PandasArray(ndarray), copy=True) + + assert ser.values is not ndarray + + +def test_series_constructor_with_astype(): + ndarray = np.array([1, 2, 3]) + result = pd.Series(PandasArray(ndarray), dtype="float64") + expected = pd.Series([1.0, 2.0, 3.0], dtype="float64") + tm.assert_series_equal(result, expected) + + +def test_from_sequence_dtype(): + arr = np.array([1, 2, 3], dtype="int64") + result = PandasArray._from_sequence(arr, dtype="uint64") + expected = PandasArray(np.array([1, 2, 3], dtype="uint64")) + tm.assert_extension_array_equal(result, expected) + + +def test_constructor_copy(): + arr = np.array([0, 1]) + result = PandasArray(arr, copy=True) + + assert not tm.shares_memory(result, arr) + + +def test_constructor_with_data(any_numpy_array): + nparr = any_numpy_array + arr = PandasArray(nparr) + assert arr.dtype.numpy_dtype == nparr.dtype + + +# ---------------------------------------------------------------------------- +# Conversion + + +def test_to_numpy(): + arr = PandasArray(np.array([1, 2, 3])) + result = arr.to_numpy() + assert result is arr._ndarray + + result = arr.to_numpy(copy=True) + assert result is not arr._ndarray + + result = arr.to_numpy(dtype="f8") + expected = np.array([1, 2, 3], dtype="f8") + tm.assert_numpy_array_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# Setitem + + +def test_setitem_series(): + ser = pd.Series([1, 2, 3]) + ser.array[0] = 10 + expected = pd.Series([10, 2, 3]) + tm.assert_series_equal(ser, expected) + + +def test_setitem(any_numpy_array): + nparr = any_numpy_array + arr = PandasArray(nparr, copy=True) + + arr[0] = arr[1] + nparr[0] = nparr[1] + + tm.assert_numpy_array_equal(arr.to_numpy(), nparr) + + +# ---------------------------------------------------------------------------- +# Reductions + + +def test_bad_reduce_raises(): + arr = np.array([1, 2, 3], dtype="int64") + arr = PandasArray(arr) + msg = "cannot perform not_a_method with type int" + with pytest.raises(TypeError, match=msg): + arr._reduce(msg) + + +def test_validate_reduction_keyword_args(): + arr = PandasArray(np.array([1, 2, 3])) + msg = "the 'keepdims' parameter is not supported .*all" + with pytest.raises(ValueError, match=msg): + arr.all(keepdims=True) + + +def test_np_max_nested_tuples(): + # case where checking in ufunc.nout works while checking for tuples + # does not + vals = [ + (("j", "k"), ("l", "m")), + (("l", "m"), ("o", "p")), + (("o", "p"), ("j", "k")), + ] + ser = pd.Series(vals) + arr = ser.array + + assert arr.max() is arr[2] + assert ser.max() is arr[2] + + result = np.maximum.reduce(arr) + assert result == arr[2] + + result = np.maximum.reduce(ser) + assert result == arr[2] + + +def test_np_reduce_2d(): + raw = np.arange(12).reshape(4, 3) + arr = PandasArray(raw) + + res = np.maximum.reduce(arr, axis=0) + tm.assert_extension_array_equal(res, arr[-1]) + + alt = arr.max(axis=0) + tm.assert_extension_array_equal(alt, arr[-1]) + + +# ---------------------------------------------------------------------------- +# Ops + + +@pytest.mark.parametrize("ufunc", [np.abs, np.negative, np.positive]) +def test_ufunc_unary(ufunc): + arr = PandasArray(np.array([-1.0, 0.0, 1.0])) + result = ufunc(arr) + expected = PandasArray(ufunc(arr._ndarray)) + tm.assert_extension_array_equal(result, expected) + + +def test_ufunc(): + arr = PandasArray(np.array([-1.0, 0.0, 1.0])) + + r1, r2 = np.divmod(arr, np.add(arr, 2)) + e1, e2 = np.divmod(arr._ndarray, np.add(arr._ndarray, 2)) + e1 = PandasArray(e1) + e2 = PandasArray(e2) + tm.assert_extension_array_equal(r1, e1) + tm.assert_extension_array_equal(r2, e2) + + +def test_basic_binop(): + # Just a basic smoke test. The EA interface tests exercise this + # more thoroughly. + x = PandasArray(np.array([1, 2, 3])) + result = x + x + expected = PandasArray(np.array([2, 4, 6])) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", [None, object]) +def test_setitem_object_typecode(dtype): + arr = PandasArray(np.array(["a", "b", "c"], dtype=dtype)) + arr[0] = "t" + expected = PandasArray(np.array(["t", "b", "c"], dtype=dtype)) + tm.assert_extension_array_equal(arr, expected) + + +def test_setitem_no_coercion(): + # https://github.com/pandas-dev/pandas/issues/28150 + arr = PandasArray(np.array([1, 2, 3])) + with pytest.raises(ValueError, match="int"): + arr[0] = "a" + + # With a value that we do coerce, check that we coerce the value + # and not the underlying array. + arr[0] = 2.5 + assert isinstance(arr[0], (int, np.integer)), type(arr[0]) + + +def test_setitem_preserves_views(): + # GH#28150, see also extension test of the same name + arr = PandasArray(np.array([1, 2, 3])) + view1 = arr.view() + view2 = arr[:] + view3 = np.asarray(arr) + + arr[0] = 9 + assert view1[0] == 9 + assert view2[0] == 9 + assert view3[0] == 9 + + arr[-1] = 2.5 + view1[-1] = 5 + assert arr[-1] == 5 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_period.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_period.py new file mode 100644 index 0000000..2592a02 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_period.py @@ -0,0 +1,162 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import iNaT +from pandas._libs.tslibs.period import IncompatibleFrequency + +from pandas.core.dtypes.base import _registry as registry +from pandas.core.dtypes.dtypes import PeriodDtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import ( + PeriodArray, + period_array, +) + +# ---------------------------------------------------------------------------- +# Dtype + + +def test_registered(): + assert PeriodDtype in registry.dtypes + result = registry.find("Period[D]") + expected = PeriodDtype("D") + assert result == expected + + +# ---------------------------------------------------------------------------- +# period_array + + +def test_asi8(): + result = period_array(["2000", "2001", None], freq="D").asi8 + expected = np.array([10957, 11323, iNaT]) + tm.assert_numpy_array_equal(result, expected) + + +def test_take_raises(): + arr = period_array(["2000", "2001"], freq="D") + with pytest.raises(IncompatibleFrequency, match="freq"): + arr.take([0, -1], allow_fill=True, fill_value=pd.Period("2000", freq="W")) + + msg = "value should be a 'Period' or 'NaT'. Got 'str' instead" + with pytest.raises(TypeError, match=msg): + arr.take([0, -1], allow_fill=True, fill_value="foo") + + +def test_fillna_raises(): + arr = period_array(["2000", "2001", "2002"], freq="D") + with pytest.raises(ValueError, match="Length"): + arr.fillna(arr[:2]) + + +def test_fillna_copies(): + arr = period_array(["2000", "2001", "2002"], freq="D") + result = arr.fillna(pd.Period("2000", "D")) + assert result is not arr + + +# ---------------------------------------------------------------------------- +# setitem + + +@pytest.mark.parametrize( + "key, value, expected", + [ + ([0], pd.Period("2000", "D"), [10957, 1, 2]), + ([0], None, [iNaT, 1, 2]), + ([0], np.nan, [iNaT, 1, 2]), + ([0, 1, 2], pd.Period("2000", "D"), [10957] * 3), + ( + [0, 1, 2], + [pd.Period("2000", "D"), pd.Period("2001", "D"), pd.Period("2002", "D")], + [10957, 11323, 11688], + ), + ], +) +def test_setitem(key, value, expected): + arr = PeriodArray(np.arange(3), freq="D") + expected = PeriodArray(expected, freq="D") + arr[key] = value + tm.assert_period_array_equal(arr, expected) + + +def test_setitem_raises_incompatible_freq(): + arr = PeriodArray(np.arange(3), freq="D") + with pytest.raises(IncompatibleFrequency, match="freq"): + arr[0] = pd.Period("2000", freq="A") + + other = period_array(["2000", "2001"], freq="A") + with pytest.raises(IncompatibleFrequency, match="freq"): + arr[[0, 1]] = other + + +def test_setitem_raises_length(): + arr = PeriodArray(np.arange(3), freq="D") + with pytest.raises(ValueError, match="length"): + arr[[0, 1]] = [pd.Period("2000", freq="D")] + + +def test_setitem_raises_type(): + arr = PeriodArray(np.arange(3), freq="D") + with pytest.raises(TypeError, match="int"): + arr[0] = 1 + + +# ---------------------------------------------------------------------------- +# Ops + + +def test_sub_period(): + arr = period_array(["2000", "2001"], freq="D") + other = pd.Period("2000", freq="M") + with pytest.raises(IncompatibleFrequency, match="freq"): + arr - other + + +# ---------------------------------------------------------------------------- +# Methods + + +@pytest.mark.parametrize( + "other", + [pd.Period("2000", freq="H"), period_array(["2000", "2001", "2000"], freq="H")], +) +def test_where_different_freq_raises(other): + ser = pd.Series(period_array(["2000", "2001", "2002"], freq="D")) + cond = np.array([True, False, True]) + with pytest.raises(IncompatibleFrequency, match="freq"): + ser.where(cond, other) + + +# ---------------------------------------------------------------------------- +# Printing + + +def test_repr_small(): + arr = period_array(["2000", "2001"], freq="D") + result = str(arr) + expected = ( + "\n['2000-01-01', '2001-01-01']\nLength: 2, dtype: period[D]" + ) + assert result == expected + + +def test_repr_large(): + arr = period_array(["2000", "2001"] * 500, freq="D") + result = str(arr) + expected = ( + "\n" + "['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', " + "'2000-01-01',\n" + " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', " + "'2001-01-01',\n" + " ...\n" + " '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', " + "'2000-01-01',\n" + " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', " + "'2001-01-01']\n" + "Length: 1000, dtype: period[D]" + ) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_timedeltas.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_timedeltas.py new file mode 100644 index 0000000..1db0f6a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/test_timedeltas.py @@ -0,0 +1,126 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Timedelta +import pandas._testing as tm +from pandas.core.arrays import TimedeltaArray + + +class TestTimedeltaArray: + @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) + def test_astype_int(self, dtype): + arr = TimedeltaArray._from_sequence([Timedelta("1H"), Timedelta("2H")]) + result = arr.astype(dtype) + + if np.dtype(dtype).kind == "u": + expected_dtype = np.dtype("uint64") + else: + expected_dtype = np.dtype("int64") + expected = arr.astype(expected_dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) + + def test_setitem_clears_freq(self): + a = TimedeltaArray(pd.timedelta_range("1H", periods=2, freq="H")) + a[0] = Timedelta("1H") + assert a.freq is None + + @pytest.mark.parametrize( + "obj", + [ + Timedelta(seconds=1), + Timedelta(seconds=1).to_timedelta64(), + Timedelta(seconds=1).to_pytimedelta(), + ], + ) + def test_setitem_objects(self, obj): + # make sure we accept timedelta64 and timedelta in addition to Timedelta + tdi = pd.timedelta_range("2 Days", periods=4, freq="H") + arr = TimedeltaArray(tdi, freq=tdi.freq) + + arr[0] = obj + assert arr[0] == Timedelta(seconds=1) + + @pytest.mark.parametrize( + "other", + [ + 1, + np.int64(1), + 1.0, + np.datetime64("NaT"), + pd.Timestamp("2021-01-01"), + "invalid", + np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9, + (np.arange(10) * 24 * 3600 * 10 ** 9).view("datetime64[ns]"), + pd.Timestamp("2021-01-01").to_period("D"), + ], + ) + @pytest.mark.parametrize("index", [True, False]) + def test_searchsorted_invalid_types(self, other, index): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = TimedeltaArray(data, freq="D") + if index: + arr = pd.Index(arr) + + msg = "|".join( + [ + "searchsorted requires compatible dtype or scalar", + "value should be a 'Timedelta', 'NaT', or array of those. Got", + ] + ) + with pytest.raises(TypeError, match=msg): + arr.searchsorted(other) + + +class TestUnaryOps: + def test_abs(self): + vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]") + arr = TimedeltaArray(vals) + + evals = np.array([3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]") + expected = TimedeltaArray(evals) + + result = abs(arr) + tm.assert_timedelta_array_equal(result, expected) + + result2 = np.abs(arr) + tm.assert_timedelta_array_equal(result2, expected) + + def test_pos(self): + vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]") + arr = TimedeltaArray(vals) + + result = +arr + tm.assert_timedelta_array_equal(result, arr) + assert not tm.shares_memory(result, arr) + + result2 = np.positive(arr) + tm.assert_timedelta_array_equal(result2, arr) + assert not tm.shares_memory(result2, arr) + + def test_neg(self): + vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]") + arr = TimedeltaArray(vals) + + evals = np.array([3600 * 10 ** 9, "NaT", -7200 * 10 ** 9], dtype="m8[ns]") + expected = TimedeltaArray(evals) + + result = -arr + tm.assert_timedelta_array_equal(result, expected) + + result2 = np.negative(arr) + tm.assert_timedelta_array_equal(result2, expected) + + def test_neg_freq(self): + tdi = pd.timedelta_range("2 Days", periods=4, freq="H") + arr = TimedeltaArray(tdi, freq=tdi.freq) + + expected = TimedeltaArray(-tdi._data, freq=-tdi.freq) + + result = -arr + tm.assert_timedelta_array_equal(result, expected) + + result2 = np.negative(arr) + tm.assert_timedelta_array_equal(result2, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..becafbf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..332a1dc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/__pycache__/test_reductions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/__pycache__/test_reductions.cpython-38.pyc new file mode 100644 index 0000000..81c7cc7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/__pycache__/test_reductions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/test_constructors.py new file mode 100644 index 0000000..d297e74 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/test_constructors.py @@ -0,0 +1,63 @@ +import numpy as np +import pytest + +from pandas.core.arrays import TimedeltaArray + + +class TestTimedeltaArrayConstructor: + def test_only_1dim_accepted(self): + # GH#25282 + arr = np.array([0, 1, 2, 3], dtype="m8[h]").astype("m8[ns]") + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 + TimedeltaArray(arr.reshape(2, 2, 1)) + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 0-dim + TimedeltaArray(arr[[0]].squeeze()) + + def test_freq_validation(self): + # ensure that the public constructor cannot create an invalid instance + arr = np.array([0, 0, 1], dtype=np.int64) * 3600 * 10 ** 9 + + msg = ( + "Inferred frequency None from passed values does not " + "conform to passed frequency D" + ) + with pytest.raises(ValueError, match=msg): + TimedeltaArray(arr.view("timedelta64[ns]"), freq="D") + + def test_non_array_raises(self): + with pytest.raises(ValueError, match="list"): + TimedeltaArray([1, 2, 3]) + + def test_other_type_raises(self): + with pytest.raises(ValueError, match="dtype bool cannot be converted"): + TimedeltaArray(np.array([1, 2, 3], dtype="bool")) + + def test_incorrect_dtype_raises(self): + # TODO: why TypeError for 'category' but ValueError for i8? + with pytest.raises( + ValueError, match=r"category cannot be converted to timedelta64\[ns\]" + ): + TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype="category") + + with pytest.raises( + ValueError, match=r"dtype int64 cannot be converted to timedelta64\[ns\]" + ): + TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype=np.dtype("int64")) + + def test_copy(self): + data = np.array([1, 2, 3], dtype="m8[ns]") + arr = TimedeltaArray(data, copy=False) + assert arr._data is data + + arr = TimedeltaArray(data, copy=True) + assert arr._data is not data + assert arr._data.base is not data + + def test_from_sequence_dtype(self): + msg = "dtype .*object.* cannot be converted to timedelta64" + with pytest.raises(ValueError, match=msg): + TimedeltaArray._from_sequence([], dtype=object) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/test_reductions.py b/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/test_reductions.py new file mode 100644 index 0000000..586a918 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/arrays/timedeltas/test_reductions.py @@ -0,0 +1,214 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Timedelta +import pandas._testing as tm +from pandas.core import nanops +from pandas.core.arrays import TimedeltaArray + + +class TestReductions: + @pytest.mark.parametrize("name", ["std", "min", "max", "median", "mean"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_reductions_empty(self, name, skipna): + tdi = pd.TimedeltaIndex([]) + arr = tdi.array + + result = getattr(tdi, name)(skipna=skipna) + assert result is pd.NaT + + result = getattr(arr, name)(skipna=skipna) + assert result is pd.NaT + + @pytest.mark.parametrize("skipna", [True, False]) + def test_sum_empty(self, skipna): + tdi = pd.TimedeltaIndex([]) + arr = tdi.array + + result = tdi.sum(skipna=skipna) + assert isinstance(result, Timedelta) + assert result == Timedelta(0) + + result = arr.sum(skipna=skipna) + assert isinstance(result, Timedelta) + assert result == Timedelta(0) + + def test_min_max(self): + arr = TimedeltaArray._from_sequence(["3H", "3H", "NaT", "2H", "5H", "4H"]) + + result = arr.min() + expected = Timedelta("2H") + assert result == expected + + result = arr.max() + expected = Timedelta("5H") + assert result == expected + + result = arr.min(skipna=False) + assert result is pd.NaT + + result = arr.max(skipna=False) + assert result is pd.NaT + + def test_sum(self): + tdi = pd.TimedeltaIndex(["3H", "3H", "NaT", "2H", "5H", "4H"]) + arr = tdi.array + + result = arr.sum(skipna=True) + expected = Timedelta(hours=17) + assert isinstance(result, Timedelta) + assert result == expected + + result = tdi.sum(skipna=True) + assert isinstance(result, Timedelta) + assert result == expected + + result = arr.sum(skipna=False) + assert result is pd.NaT + + result = tdi.sum(skipna=False) + assert result is pd.NaT + + result = arr.sum(min_count=9) + assert result is pd.NaT + + result = tdi.sum(min_count=9) + assert result is pd.NaT + + result = arr.sum(min_count=1) + assert isinstance(result, Timedelta) + assert result == expected + + result = tdi.sum(min_count=1) + assert isinstance(result, Timedelta) + assert result == expected + + def test_npsum(self): + # GH#25282, GH#25335 np.sum should return a Timedelta, not timedelta64 + tdi = pd.TimedeltaIndex(["3H", "3H", "2H", "5H", "4H"]) + arr = tdi.array + + result = np.sum(tdi) + expected = Timedelta(hours=17) + assert isinstance(result, Timedelta) + assert result == expected + + result = np.sum(arr) + assert isinstance(result, Timedelta) + assert result == expected + + def test_sum_2d_skipna_false(self): + arr = np.arange(8).astype(np.int64).view("m8[s]").astype("m8[ns]").reshape(4, 2) + arr[-1, -1] = "Nat" + + tda = TimedeltaArray(arr) + + result = tda.sum(skipna=False) + assert result is pd.NaT + + result = tda.sum(axis=0, skipna=False) + expected = pd.TimedeltaIndex([Timedelta(seconds=12), pd.NaT])._values + tm.assert_timedelta_array_equal(result, expected) + + result = tda.sum(axis=1, skipna=False) + expected = pd.TimedeltaIndex( + [ + Timedelta(seconds=1), + Timedelta(seconds=5), + Timedelta(seconds=9), + pd.NaT, + ] + )._values + tm.assert_timedelta_array_equal(result, expected) + + # Adding a Timestamp makes this a test for DatetimeArray.std + @pytest.mark.parametrize( + "add", + [ + Timedelta(0), + pd.Timestamp("2021-01-01"), + pd.Timestamp("2021-01-01", tz="UTC"), + pd.Timestamp("2021-01-01", tz="Asia/Tokyo"), + ], + ) + def test_std(self, add): + tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"]) + add + arr = tdi.array + + result = arr.std(skipna=True) + expected = Timedelta(hours=2) + assert isinstance(result, Timedelta) + assert result == expected + + result = tdi.std(skipna=True) + assert isinstance(result, Timedelta) + assert result == expected + + if getattr(arr, "tz", None) is None: + result = nanops.nanstd(np.asarray(arr), skipna=True) + assert isinstance(result, Timedelta) + assert result == expected + + result = arr.std(skipna=False) + assert result is pd.NaT + + result = tdi.std(skipna=False) + assert result is pd.NaT + + if getattr(arr, "tz", None) is None: + result = nanops.nanstd(np.asarray(arr), skipna=False) + assert result is pd.NaT + + def test_median(self): + tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"]) + arr = tdi.array + + result = arr.median(skipna=True) + expected = Timedelta(hours=2) + assert isinstance(result, Timedelta) + assert result == expected + + result = tdi.median(skipna=True) + assert isinstance(result, Timedelta) + assert result == expected + + result = arr.median(skipna=False) + assert result is pd.NaT + + result = tdi.median(skipna=False) + assert result is pd.NaT + + def test_mean(self): + tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"]) + arr = tdi._data + + # manually verified result + expected = Timedelta(arr.dropna()._ndarray.mean()) + + result = arr.mean() + assert result == expected + result = arr.mean(skipna=False) + assert result is pd.NaT + + result = arr.dropna().mean(skipna=False) + assert result == expected + + result = arr.mean(axis=0) + assert result == expected + + def test_mean_2d(self): + tdi = pd.timedelta_range("14 days", periods=6) + tda = tdi._data.reshape(3, 2) + + result = tda.mean(axis=0) + expected = tda[1] + tm.assert_timedelta_array_equal(result, expected) + + result = tda.mean(axis=1) + expected = tda[:, 0] + Timedelta(hours=12) + tm.assert_timedelta_array_equal(result, expected) + + result = tda.mean(axis=None) + expected = tdi.mean() + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/base/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..657f78b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..6fe1d56 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..b59402c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_conversion.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_conversion.cpython-38.pyc new file mode 100644 index 0000000..2789ea7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_conversion.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_fillna.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_fillna.cpython-38.pyc new file mode 100644 index 0000000..5c38e64 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_fillna.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_misc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_misc.cpython-38.pyc new file mode 100644 index 0000000..6b9beb3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_misc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_transpose.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_transpose.cpython-38.pyc new file mode 100644 index 0000000..fd2f7a7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_transpose.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_unique.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_unique.cpython-38.pyc new file mode 100644 index 0000000..50041bb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_unique.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_value_counts.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_value_counts.cpython-38.pyc new file mode 100644 index 0000000..2d53b51 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/base/__pycache__/test_value_counts.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/common.py b/.local/lib/python3.8/site-packages/pandas/tests/base/common.py new file mode 100644 index 0000000..b09710a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/base/common.py @@ -0,0 +1,9 @@ +from typing import Any + +from pandas import Index + + +def allow_na_ops(obj: Any) -> bool: + """Whether to skip test cases including NaN""" + is_bool_index = isinstance(obj, Index) and obj.is_boolean() + return not is_bool_index and obj._can_hold_na diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/base/test_constructors.py new file mode 100644 index 0000000..16ce709 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/base/test_constructors.py @@ -0,0 +1,176 @@ +from datetime import datetime +import sys + +import numpy as np +import pytest + +from pandas.compat import PYPY + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm +from pandas.core.accessor import PandasDelegate +from pandas.core.base import ( + NoNewAttributesMixin, + PandasObject, +) + + +@pytest.fixture( + params=[ + Series, + lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"], + lambda x, **kwargs: DataFrame(x, **kwargs)[0], + Index, + ], + ids=["Series", "DataFrame-dict", "DataFrame-array", "Index"], +) +def constructor(request): + return request.param + + +class TestPandasDelegate: + class Delegator: + _properties = ["foo"] + _methods = ["bar"] + + def _set_foo(self, value): + self.foo = value + + def _get_foo(self): + return self.foo + + foo = property(_get_foo, _set_foo, doc="foo property") + + def bar(self, *args, **kwargs): + """a test bar method""" + pass + + class Delegate(PandasDelegate, PandasObject): + def __init__(self, obj): + self.obj = obj + + def setup_method(self, method): + pass + + def test_invalid_delegation(self): + # these show that in order for the delegation to work + # the _delegate_* methods need to be overridden to not raise + # a TypeError + + self.Delegate._add_delegate_accessors( + delegate=self.Delegator, + accessors=self.Delegator._properties, + typ="property", + ) + self.Delegate._add_delegate_accessors( + delegate=self.Delegator, accessors=self.Delegator._methods, typ="method" + ) + + delegate = self.Delegate(self.Delegator()) + + msg = "You cannot access the property foo" + with pytest.raises(TypeError, match=msg): + delegate.foo + + msg = "The property foo cannot be set" + with pytest.raises(TypeError, match=msg): + delegate.foo = 5 + + msg = "You cannot access the property foo" + with pytest.raises(TypeError, match=msg): + delegate.foo() + + @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") + def test_memory_usage(self): + # Delegate does not implement memory_usage. + # Check that we fall back to in-built `__sizeof__` + # GH 12924 + delegate = self.Delegate(self.Delegator()) + sys.getsizeof(delegate) + + +class TestNoNewAttributesMixin: + def test_mixin(self): + class T(NoNewAttributesMixin): + pass + + t = T() + assert not hasattr(t, "__frozen") + + t.a = "test" + assert t.a == "test" + + t._freeze() + assert "__frozen" in dir(t) + assert getattr(t, "__frozen") + msg = "You cannot add any new attribute" + with pytest.raises(AttributeError, match=msg): + t.b = "test" + + assert not hasattr(t, "b") + + +class TestConstruction: + # test certain constructor behaviours on dtype inference across Series, + # Index and DataFrame + + @pytest.mark.parametrize( + "klass", + [ + Series, + lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"], + lambda x, **kwargs: DataFrame(x, **kwargs)[0], + Index, + ], + ) + @pytest.mark.parametrize( + "a", + [ + np.array(["2263-01-01"], dtype="datetime64[D]"), + np.array([datetime(2263, 1, 1)], dtype=object), + np.array([np.datetime64("2263-01-01", "D")], dtype=object), + np.array(["2263-01-01"], dtype=object), + ], + ids=[ + "datetime64[D]", + "object-datetime.datetime", + "object-numpy-scalar", + "object-string", + ], + ) + def test_constructor_datetime_outofbound(self, a, klass): + # GH-26853 (+ bug GH-26206 out of bound non-ns unit) + + # No dtype specified (dtype inference) + # datetime64[non-ns] raise error, other cases result in object dtype + # and preserve original data + if a.dtype.kind == "M": + msg = "Out of bounds" + with pytest.raises(pd.errors.OutOfBoundsDatetime, match=msg): + klass(a) + else: + result = klass(a) + assert result.dtype == "object" + tm.assert_numpy_array_equal(result.to_numpy(), a) + + # Explicit dtype specified + # Forced conversion fails for all -> all cases raise error + msg = "Out of bounds" + with pytest.raises(pd.errors.OutOfBoundsDatetime, match=msg): + klass(a, dtype="datetime64[ns]") + + def test_constructor_datetime_nonns(self, constructor): + arr = np.array(["2020-01-01T00:00:00.000000"], dtype="datetime64[us]") + expected = constructor(pd.to_datetime(["2020-01-01"])) + result = constructor(arr) + tm.assert_equal(result, expected) + + # https://github.com/pandas-dev/pandas/issues/34843 + arr.flags.writeable = False + result = constructor(arr) + tm.assert_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/test_conversion.py b/.local/lib/python3.8/site-packages/pandas/tests/base/test_conversion.py new file mode 100644 index 0000000..84e4992 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/base/test_conversion.py @@ -0,0 +1,509 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import ( + is_datetime64_dtype, + is_timedelta64_dtype, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas import ( + CategoricalIndex, + Series, + Timedelta, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + IntervalArray, + PandasArray, + PeriodArray, + SparseArray, + TimedeltaArray, +) + + +class TestToIterable: + # test that we convert an iterable to python types + + dtypes = [ + ("int8", int), + ("int16", int), + ("int32", int), + ("int64", int), + ("uint8", int), + ("uint16", int), + ("uint32", int), + ("uint64", int), + ("float16", float), + ("float32", float), + ("float64", float), + ("datetime64[ns]", Timestamp), + ("datetime64[ns, US/Eastern]", Timestamp), + ("timedelta64[ns]", Timedelta), + ] + + @pytest.mark.parametrize("dtype, rdtype", dtypes) + @pytest.mark.parametrize( + "method", + [ + lambda x: x.tolist(), + lambda x: x.to_list(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], + ids=["tolist", "to_list", "list", "iter"], + ) + def test_iterable(self, index_or_series, method, dtype, rdtype): + # gh-10904 + # gh-13258 + # coerce iteration to underlying python / pandas types + typ = index_or_series + s = typ([1], dtype=dtype) + result = method(s)[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize( + "dtype, rdtype, obj", + [ + ("object", object, "a"), + ("object", int, 1), + ("category", object, "a"), + ("category", int, 1), + ], + ) + @pytest.mark.parametrize( + "method", + [ + lambda x: x.tolist(), + lambda x: x.to_list(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], + ids=["tolist", "to_list", "list", "iter"], + ) + def test_iterable_object_and_category( + self, index_or_series, method, dtype, rdtype, obj + ): + # gh-10904 + # gh-13258 + # coerce iteration to underlying python / pandas types + typ = index_or_series + s = typ([obj], dtype=dtype) + result = method(s)[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize("dtype, rdtype", dtypes) + def test_iterable_items(self, dtype, rdtype): + # gh-13258 + # test if items yields the correct boxed scalars + # this only applies to series + s = Series([1], dtype=dtype) + _, result = list(s.items())[0] + assert isinstance(result, rdtype) + + _, result = list(s.items())[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize( + "dtype, rdtype", dtypes + [("object", int), ("category", int)] + ) + def test_iterable_map(self, index_or_series, dtype, rdtype): + # gh-13236 + # coerce iteration to underlying python / pandas types + typ = index_or_series + s = typ([1], dtype=dtype) + result = s.map(type)[0] + if not isinstance(rdtype, tuple): + rdtype = (rdtype,) + assert result in rdtype + + @pytest.mark.parametrize( + "method", + [ + lambda x: x.tolist(), + lambda x: x.to_list(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], + ids=["tolist", "to_list", "list", "iter"], + ) + def test_categorial_datetimelike(self, method): + i = CategoricalIndex([Timestamp("1999-12-31"), Timestamp("2000-12-31")]) + + result = method(i)[0] + assert isinstance(result, Timestamp) + + def test_iter_box(self): + vals = [Timestamp("2011-01-01"), Timestamp("2011-01-02")] + s = Series(vals) + assert s.dtype == "datetime64[ns]" + for res, exp in zip(s, vals): + assert isinstance(res, Timestamp) + assert res.tz is None + assert res == exp + + vals = [ + Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + ] + s = Series(vals) + + assert s.dtype == "datetime64[ns, US/Eastern]" + for res, exp in zip(s, vals): + assert isinstance(res, Timestamp) + assert res.tz == exp.tz + assert res == exp + + # timedelta + vals = [Timedelta("1 days"), Timedelta("2 days")] + s = Series(vals) + assert s.dtype == "timedelta64[ns]" + for res, exp in zip(s, vals): + assert isinstance(res, Timedelta) + assert res == exp + + # period + vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] + s = Series(vals) + assert s.dtype == "Period[M]" + for res, exp in zip(s, vals): + assert isinstance(res, pd.Period) + assert res.freq == "M" + assert res == exp + + +@pytest.mark.parametrize( + "arr, expected_type, dtype", + [ + (np.array([0, 1], dtype=np.int64), np.ndarray, "int64"), + (np.array(["a", "b"]), np.ndarray, "object"), + (pd.Categorical(["a", "b"]), pd.Categorical, "category"), + ( + pd.DatetimeIndex(["2017", "2018"], tz="US/Central"), + DatetimeArray, + "datetime64[ns, US/Central]", + ), + ( + pd.PeriodIndex([2018, 2019], freq="A"), + PeriodArray, + pd.core.dtypes.dtypes.PeriodDtype("A-DEC"), + ), + (pd.IntervalIndex.from_breaks([0, 1, 2]), IntervalArray, "interval"), + ( + pd.DatetimeIndex(["2017", "2018"]), + DatetimeArray, + "datetime64[ns]", + ), + ( + pd.TimedeltaIndex([10 ** 10]), + TimedeltaArray, + "m8[ns]", + ), + ], +) +def test_values_consistent(arr, expected_type, dtype): + l_values = Series(arr)._values + r_values = pd.Index(arr)._values + assert type(l_values) is expected_type + assert type(l_values) is type(r_values) + + tm.assert_equal(l_values, r_values) + + +@pytest.mark.parametrize("arr", [np.array([1, 2, 3])]) +def test_numpy_array(arr): + ser = Series(arr) + result = ser.array + expected = PandasArray(arr) + tm.assert_extension_array_equal(result, expected) + + +def test_numpy_array_all_dtypes(any_numpy_dtype): + ser = Series(dtype=any_numpy_dtype) + result = ser.array + if is_datetime64_dtype(any_numpy_dtype): + assert isinstance(result, DatetimeArray) + elif is_timedelta64_dtype(any_numpy_dtype): + assert isinstance(result, TimedeltaArray) + else: + assert isinstance(result, PandasArray) + + +@pytest.mark.parametrize( + "arr, attr", + [ + (pd.Categorical(["a", "b"]), "_codes"), + (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_data"), + (pd.array([0, np.nan], dtype="Int64"), "_data"), + (IntervalArray.from_breaks([0, 1]), "_left"), + (SparseArray([0, 1]), "_sparse_values"), + (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_data"), + # tz-aware Datetime + ( + DatetimeArray( + np.array( + ["2000-01-01T12:00:00", "2000-01-02T12:00:00"], dtype="M8[ns]" + ), + dtype=DatetimeTZDtype(tz="US/Central"), + ), + "_data", + ), + ], +) +def test_array(arr, attr, index_or_series, request): + box = index_or_series + warn = None + if arr.dtype.name in ("Sparse[int64, 0]") and box is pd.Index: + mark = pytest.mark.xfail(reason="Index cannot yet store sparse dtype") + request.node.add_marker(mark) + warn = FutureWarning + + with tm.assert_produces_warning(warn): + result = box(arr, copy=False).array + + if attr: + arr = getattr(arr, attr) + result = getattr(result, attr) + + assert result is arr + + +def test_array_multiindex_raises(): + idx = pd.MultiIndex.from_product([["A"], ["a", "b"]]) + msg = "MultiIndex has no single backing array" + with pytest.raises(ValueError, match=msg): + idx.array + + +@pytest.mark.parametrize( + "arr, expected", + [ + (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)), + (pd.Categorical(["a", "b"]), np.array(["a", "b"], dtype=object)), + ( + pd.core.arrays.period_array(["2000", "2001"], freq="D"), + np.array([pd.Period("2000", freq="D"), pd.Period("2001", freq="D")]), + ), + (pd.array([0, np.nan], dtype="Int64"), np.array([0, pd.NA], dtype=object)), + ( + IntervalArray.from_breaks([0, 1, 2]), + np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object), + ), + (SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)), + # tz-naive datetime + ( + DatetimeArray(np.array(["2000", "2001"], dtype="M8[ns]")), + np.array(["2000", "2001"], dtype="M8[ns]"), + ), + # tz-aware stays tz`-aware + ( + DatetimeArray( + np.array( + ["2000-01-01T06:00:00", "2000-01-02T06:00:00"], dtype="M8[ns]" + ), + dtype=DatetimeTZDtype(tz="US/Central"), + ), + np.array( + [ + Timestamp("2000-01-01", tz="US/Central"), + Timestamp("2000-01-02", tz="US/Central"), + ] + ), + ), + # Timedelta + ( + TimedeltaArray(np.array([0, 3600000000000], dtype="i8"), freq="H"), + np.array([0, 3600000000000], dtype="m8[ns]"), + ), + # GH#26406 tz is preserved in Categorical[dt64tz] + ( + pd.Categorical(date_range("2016-01-01", periods=2, tz="US/Pacific")), + np.array( + [ + Timestamp("2016-01-01", tz="US/Pacific"), + Timestamp("2016-01-02", tz="US/Pacific"), + ] + ), + ), + ], +) +def test_to_numpy(arr, expected, index_or_series_or_array, request): + box = index_or_series_or_array + + warn = None + if index_or_series_or_array is pd.Index and isinstance(arr, SparseArray): + warn = FutureWarning + with tm.assert_produces_warning(warn): + thing = box(arr) + + if arr.dtype.name == "int64" and box is pd.array: + mark = pytest.mark.xfail(reason="thing is Int64 and to_numpy() returns object") + request.node.add_marker(mark) + + result = thing.to_numpy() + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(thing) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("as_series", [True, False]) +@pytest.mark.parametrize( + "arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)] +) +def test_to_numpy_copy(arr, as_series): + obj = pd.Index(arr, copy=False) + if as_series: + obj = Series(obj.values, copy=False) + + # no copy by default + result = obj.to_numpy() + assert np.shares_memory(arr, result) is True + + result = obj.to_numpy(copy=False) + assert np.shares_memory(arr, result) is True + + # copy=True + result = obj.to_numpy(copy=True) + assert np.shares_memory(arr, result) is False + + +@pytest.mark.parametrize("as_series", [True, False]) +def test_to_numpy_dtype(as_series): + tz = "US/Eastern" + obj = pd.DatetimeIndex(["2000", "2001"], tz=tz) + if as_series: + obj = Series(obj) + + # preserve tz by default + result = obj.to_numpy() + expected = np.array( + [Timestamp("2000", tz=tz), Timestamp("2001", tz=tz)], dtype=object + ) + tm.assert_numpy_array_equal(result, expected) + + result = obj.to_numpy(dtype="object") + tm.assert_numpy_array_equal(result, expected) + + result = obj.to_numpy(dtype="M8[ns]") + expected = np.array(["2000-01-01T05", "2001-01-01T05"], dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values, dtype, na_value, expected", + [ + ([1, 2, None], "float64", 0, [1.0, 2.0, 0.0]), + ( + [Timestamp("2000"), Timestamp("2000"), pd.NaT], + None, + Timestamp("2000"), + [np.datetime64("2000-01-01T00:00:00.000000000")] * 3, + ), + ], +) +def test_to_numpy_na_value_numpy_dtype( + index_or_series, values, dtype, na_value, expected +): + obj = index_or_series(values) + result = obj.to_numpy(dtype=dtype, na_value=na_value) + expected = np.array(expected) + tm.assert_numpy_array_equal(result, expected) + + +def test_to_numpy_kwargs_raises(): + # numpy + s = Series([1, 2, 3]) + msg = r"to_numpy\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + s.to_numpy(foo=True) + + # extension + s = Series([1, 2, 3], dtype="Int64") + with pytest.raises(TypeError, match=msg): + s.to_numpy(foo=True) + + +@pytest.mark.parametrize( + "data", + [ + {"a": [1, 2, 3], "b": [1, 2, None]}, + {"a": np.array([1, 2, 3]), "b": np.array([1, 2, np.nan])}, + {"a": pd.array([1, 2, 3]), "b": pd.array([1, 2, None])}, + ], +) +@pytest.mark.parametrize("dtype, na_value", [(float, np.nan), (object, None)]) +def test_to_numpy_dataframe_na_value(data, dtype, na_value): + # https://github.com/pandas-dev/pandas/issues/33820 + df = pd.DataFrame(data) + result = df.to_numpy(dtype=dtype, na_value=na_value) + expected = np.array([[1, 1], [2, 2], [3, na_value]], dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "data, expected", + [ + ( + {"a": pd.array([1, 2, None])}, + np.array([[1.0], [2.0], [np.nan]], dtype=float), + ), + ( + {"a": [1, 2, 3], "b": [1, 2, 3]}, + np.array([[1, 1], [2, 2], [3, 3]], dtype=float), + ), + ], +) +def test_to_numpy_dataframe_single_block(data, expected): + # https://github.com/pandas-dev/pandas/issues/33820 + df = pd.DataFrame(data) + result = df.to_numpy(dtype=float, na_value=np.nan) + tm.assert_numpy_array_equal(result, expected) + + +def test_to_numpy_dataframe_single_block_no_mutate(): + # https://github.com/pandas-dev/pandas/issues/33820 + result = pd.DataFrame(np.array([1.0, 2.0, np.nan])) + expected = pd.DataFrame(np.array([1.0, 2.0, np.nan])) + result.to_numpy(na_value=0.0) + tm.assert_frame_equal(result, expected) + + +class TestAsArray: + @pytest.mark.parametrize("tz", [None, "US/Central"]) + def test_asarray_object_dt64(self, tz): + ser = Series(date_range("2000", periods=2, tz=tz)) + + with tm.assert_produces_warning(None): + # Future behavior (for tzaware case) with no warning + result = np.asarray(ser, dtype=object) + + expected = np.array( + [Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)] + ) + tm.assert_numpy_array_equal(result, expected) + + def test_asarray_tz_naive(self): + # This shouldn't produce a warning. + ser = Series(date_range("2000", periods=2)) + expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]") + result = np.asarray(ser) + + tm.assert_numpy_array_equal(result, expected) + + def test_asarray_tz_aware(self): + tz = "US/Central" + ser = Series(date_range("2000", periods=2, tz=tz)) + expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]") + result = np.asarray(ser, dtype="datetime64[ns]") + + tm.assert_numpy_array_equal(result, expected) + + # Old behavior with no warning + result = np.asarray(ser, dtype="M8[ns]") + + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/test_fillna.py b/.local/lib/python3.8/site-packages/pandas/tests/base/test_fillna.py new file mode 100644 index 0000000..7300d30 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/base/test_fillna.py @@ -0,0 +1,60 @@ +""" +Though Index.fillna and Series.fillna has separate impl, +test here to confirm these works as the same +""" + +import numpy as np +import pytest + +from pandas import MultiIndex +import pandas._testing as tm +from pandas.tests.base.common import allow_na_ops + + +def test_fillna(index_or_series_obj): + # GH 11343 + obj = index_or_series_obj + + if isinstance(obj, MultiIndex): + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + obj.fillna(0) + return + + # values will not be changed + fill_value = obj.values[0] if len(obj) > 0 else 0 + result = obj.fillna(fill_value) + + tm.assert_equal(obj, result) + + # check shallow_copied + assert obj is not result + + +@pytest.mark.parametrize("null_obj", [np.nan, None]) +def test_fillna_null(null_obj, index_or_series_obj): + # GH 11343 + obj = index_or_series_obj + klass = type(obj) + + if not allow_na_ops(obj): + pytest.skip(f"{klass} doesn't allow for NA operations") + elif len(obj) < 1: + pytest.skip("Test doesn't make sense on empty data") + elif isinstance(obj, MultiIndex): + pytest.skip(f"MultiIndex can't hold '{null_obj}'") + + values = obj._values + fill_value = values[0] + expected = values.copy() + values[0:2] = null_obj + expected[0:2] = fill_value + + expected = klass(expected) + obj = klass(values) + + result = obj.fillna(fill_value) + tm.assert_equal(result, expected) + + # check shallow_copied + assert obj is not result diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/test_misc.py b/.local/lib/python3.8/site-packages/pandas/tests/base/test_misc.py new file mode 100644 index 0000000..bcab890 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/base/test_misc.py @@ -0,0 +1,188 @@ +import sys + +import numpy as np +import pytest + +from pandas.compat import ( + IS64, + PYPY, +) + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_dtype_equal, + is_object_dtype, +) + +import pandas as pd +from pandas import ( + Index, + Series, +) +import pandas._testing as tm + + +def test_isnull_notnull_docstrings(): + # GH#41855 make sure its clear these are aliases + doc = pd.DataFrame.notnull.__doc__ + assert doc.startswith("\nDataFrame.notnull is an alias for DataFrame.notna.\n") + doc = pd.DataFrame.isnull.__doc__ + assert doc.startswith("\nDataFrame.isnull is an alias for DataFrame.isna.\n") + + doc = Series.notnull.__doc__ + assert doc.startswith("\nSeries.notnull is an alias for Series.notna.\n") + doc = Series.isnull.__doc__ + assert doc.startswith("\nSeries.isnull is an alias for Series.isna.\n") + + +@pytest.mark.parametrize( + "op_name, op", + [ + ("add", "+"), + ("sub", "-"), + ("mul", "*"), + ("mod", "%"), + ("pow", "**"), + ("truediv", "/"), + ("floordiv", "//"), + ], +) +def test_binary_ops_docstring(frame_or_series, op_name, op): + # not using the all_arithmetic_functions fixture with _get_opstr + # as _get_opstr is used internally in the dynamic implementation of the docstring + klass = frame_or_series + + operand1 = klass.__name__.lower() + operand2 = "other" + expected_str = " ".join([operand1, op, operand2]) + assert expected_str in getattr(klass, op_name).__doc__ + + # reverse version of the binary ops + expected_str = " ".join([operand2, op, operand1]) + assert expected_str in getattr(klass, "r" + op_name).__doc__ + + +def test_ndarray_compat_properties(index_or_series_obj): + obj = index_or_series_obj + + # Check that we work. + for p in ["shape", "dtype", "T", "nbytes"]: + assert getattr(obj, p, None) is not None + + # deprecated properties + for p in ["strides", "itemsize", "base", "data"]: + assert not hasattr(obj, p) + + msg = "can only convert an array of size 1 to a Python scalar" + with pytest.raises(ValueError, match=msg): + obj.item() # len > 1 + + assert obj.ndim == 1 + assert obj.size == len(obj) + + assert Index([1]).item() == 1 + assert Series([1]).item() == 1 + + +def test_array_wrap_compat(): + # Note: at time of dask 2022.01.0, this is still used by eg dask + # (https://github.com/dask/dask/issues/8580). + # This test is a small dummy ensuring coverage + orig = Series([1, 2, 3], dtype="int64", index=["a", "b", "c"]) + result = orig.__array_wrap__(np.array([2, 4, 6], dtype="int64")) + expected = orig * 2 + tm.assert_series_equal(result, expected) + + +@pytest.mark.skipif(PYPY, reason="not relevant for PyPy") +def test_memory_usage(index_or_series_obj): + obj = index_or_series_obj + + res = obj.memory_usage() + res_deep = obj.memory_usage(deep=True) + + is_ser = isinstance(obj, Series) + is_object = is_object_dtype(obj) or ( + isinstance(obj, Series) and is_object_dtype(obj.index) + ) + is_categorical = is_categorical_dtype(obj.dtype) or ( + isinstance(obj, Series) and is_categorical_dtype(obj.index.dtype) + ) + is_object_string = is_dtype_equal(obj, "string[python]") or ( + is_ser and is_dtype_equal(obj.index.dtype, "string[python]") + ) + + if len(obj) == 0: + if isinstance(obj, Index): + expected = 0 + else: + expected = 108 if IS64 else 64 + assert res_deep == res == expected + elif is_object or is_categorical or is_object_string: + # only deep will pick them up + assert res_deep > res + else: + assert res == res_deep + + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = res_deep - sys.getsizeof(obj) + assert abs(diff) < 100 + + +def test_memory_usage_components_series(series_with_simple_index): + series = series_with_simple_index + total_usage = series.memory_usage(index=True) + non_index_usage = series.memory_usage(index=False) + index_usage = series.index.memory_usage() + assert total_usage == non_index_usage + index_usage + + +@pytest.mark.parametrize("dtype", tm.NARROW_NP_DTYPES) +def test_memory_usage_components_narrow_series(dtype): + series = tm.makeFloatSeries(name="a").astype(dtype) + total_usage = series.memory_usage(index=True) + non_index_usage = series.memory_usage(index=False) + index_usage = series.index.memory_usage() + assert total_usage == non_index_usage + index_usage + + +def test_searchsorted(index_or_series_obj): + # numpy.searchsorted calls obj.searchsorted under the hood. + # See gh-12238 + obj = index_or_series_obj + + if isinstance(obj, pd.MultiIndex): + # See gh-14833 + pytest.skip("np.searchsorted doesn't work on pd.MultiIndex") + + max_obj = max(obj, default=0) + index = np.searchsorted(obj, max_obj) + assert 0 <= index <= len(obj) + + index = np.searchsorted(obj, max_obj, sorter=range(len(obj))) + assert 0 <= index <= len(obj) + + +def test_access_by_position(index_flat): + index = index_flat + + if len(index) == 0: + pytest.skip("Test doesn't make sense on empty data") + + series = Series(index) + assert index[0] == series.iloc[0] + assert index[5] == series.iloc[5] + assert index[-1] == series.iloc[-1] + + size = len(index) + assert index[-1] == index[size - 1] + + msg = f"index {size} is out of bounds for axis 0 with size {size}" + if is_dtype_equal(index.dtype, "string[pyarrow]"): + msg = "index out of bounds" + with pytest.raises(IndexError, match=msg): + index[size] + msg = "single positional indexer is out-of-bounds" + with pytest.raises(IndexError, match=msg): + series.iloc[size] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/test_transpose.py b/.local/lib/python3.8/site-packages/pandas/tests/base/test_transpose.py new file mode 100644 index 0000000..246f33d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/base/test_transpose.py @@ -0,0 +1,56 @@ +import numpy as np +import pytest + +from pandas import ( + CategoricalDtype, + DataFrame, +) +import pandas._testing as tm + + +def test_transpose(index_or_series_obj): + obj = index_or_series_obj + tm.assert_equal(obj.transpose(), obj) + + +def test_transpose_non_default_axes(index_or_series_obj): + msg = "the 'axes' parameter is not supported" + obj = index_or_series_obj + with pytest.raises(ValueError, match=msg): + obj.transpose(1) + with pytest.raises(ValueError, match=msg): + obj.transpose(axes=1) + + +def test_numpy_transpose(index_or_series_obj): + msg = "the 'axes' parameter is not supported" + obj = index_or_series_obj + tm.assert_equal(np.transpose(obj), obj) + + with pytest.raises(ValueError, match=msg): + np.transpose(obj, axes=1) + + +@pytest.mark.parametrize( + "data, transposed_data, index, columns, dtype", + [ + ([[1], [2]], [[1, 2]], ["a", "a"], ["b"], int), + ([[1], [2]], [[1, 2]], ["a", "a"], ["b"], CategoricalDtype([1, 2])), + ([[1, 2]], [[1], [2]], ["b"], ["a", "a"], int), + ([[1, 2]], [[1], [2]], ["b"], ["a", "a"], CategoricalDtype([1, 2])), + ([[1, 2], [3, 4]], [[1, 3], [2, 4]], ["a", "a"], ["b", "b"], int), + ( + [[1, 2], [3, 4]], + [[1, 3], [2, 4]], + ["a", "a"], + ["b", "b"], + CategoricalDtype([1, 2, 3, 4]), + ), + ], +) +def test_duplicate_labels(data, transposed_data, index, columns, dtype): + # GH 42380 + df = DataFrame(data, index=index, columns=columns, dtype=dtype) + result = df.T + expected = DataFrame(transposed_data, index=columns, columns=index, dtype=dtype) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/test_unique.py b/.local/lib/python3.8/site-packages/pandas/tests/base/test_unique.py new file mode 100644 index 0000000..e399ae4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/base/test_unique.py @@ -0,0 +1,131 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_datetime64tz_dtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.api import NumericIndex +from pandas.tests.base.common import allow_na_ops + + +def test_unique(index_or_series_obj): + obj = index_or_series_obj + obj = np.repeat(obj, range(1, len(obj) + 1)) + result = obj.unique() + + # dict.fromkeys preserves the order + unique_values = list(dict.fromkeys(obj.values)) + if isinstance(obj, pd.MultiIndex): + expected = pd.MultiIndex.from_tuples(unique_values) + expected.names = obj.names + tm.assert_index_equal(result, expected, exact=True) + elif isinstance(obj, pd.Index) and obj._is_backward_compat_public_numeric_index: + expected = NumericIndex(unique_values, dtype=obj.dtype) + tm.assert_index_equal(result, expected, exact=True) + elif isinstance(obj, pd.Index): + expected = pd.Index(unique_values, dtype=obj.dtype) + if is_datetime64tz_dtype(obj.dtype): + expected = expected.normalize() + tm.assert_index_equal(result, expected, exact=True) + else: + expected = np.array(unique_values) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("null_obj", [np.nan, None]) +def test_unique_null(null_obj, index_or_series_obj): + obj = index_or_series_obj + + if not allow_na_ops(obj): + pytest.skip("type doesn't allow for NA operations") + elif len(obj) < 1: + pytest.skip("Test doesn't make sense on empty data") + elif isinstance(obj, pd.MultiIndex): + pytest.skip(f"MultiIndex can't hold '{null_obj}'") + + values = obj._values + values[0:2] = null_obj + + klass = type(obj) + repeated_values = np.repeat(values, range(1, len(values) + 1)) + obj = klass(repeated_values, dtype=obj.dtype) + result = obj.unique() + + unique_values_raw = dict.fromkeys(obj.values) + # because np.nan == np.nan is False, but None == None is True + # np.nan would be duplicated, whereas None wouldn't + unique_values_not_null = [val for val in unique_values_raw if not pd.isnull(val)] + unique_values = [null_obj] + unique_values_not_null + + if isinstance(obj, pd.Index) and obj._is_backward_compat_public_numeric_index: + expected = NumericIndex(unique_values, dtype=obj.dtype) + tm.assert_index_equal(result, expected, exact=True) + elif isinstance(obj, pd.Index): + expected = pd.Index(unique_values, dtype=obj.dtype) + if is_datetime64tz_dtype(obj.dtype): + result = result.normalize() + expected = expected.normalize() + tm.assert_index_equal(result, expected, exact=True) + else: + expected = np.array(unique_values, dtype=obj.dtype) + tm.assert_numpy_array_equal(result, expected) + + +def test_nunique(index_or_series_obj): + obj = index_or_series_obj + obj = np.repeat(obj, range(1, len(obj) + 1)) + expected = len(obj.unique()) + assert obj.nunique(dropna=False) == expected + + +@pytest.mark.parametrize("null_obj", [np.nan, None]) +def test_nunique_null(null_obj, index_or_series_obj): + obj = index_or_series_obj + + if not allow_na_ops(obj): + pytest.skip("type doesn't allow for NA operations") + elif isinstance(obj, pd.MultiIndex): + pytest.skip(f"MultiIndex can't hold '{null_obj}'") + + values = obj._values + values[0:2] = null_obj + + klass = type(obj) + repeated_values = np.repeat(values, range(1, len(values) + 1)) + obj = klass(repeated_values, dtype=obj.dtype) + + if isinstance(obj, pd.CategoricalIndex): + assert obj.nunique() == len(obj.categories) + assert obj.nunique(dropna=False) == len(obj.categories) + 1 + else: + num_unique_values = len(obj.unique()) + assert obj.nunique() == max(0, num_unique_values - 1) + assert obj.nunique(dropna=False) == max(0, num_unique_values) + + +@pytest.mark.single +@pytest.mark.xfail( + reason="Flaky in the CI. Remove once CI has a single build: GH 44584", strict=False +) +def test_unique_bad_unicode(index_or_series): + # regression test for #34550 + uval = "\ud83d" # smiley emoji + + obj = index_or_series([uval] * 2) + result = obj.unique() + + if isinstance(obj, pd.Index): + expected = pd.Index(["\ud83d"], dtype=object) + tm.assert_index_equal(result, expected, exact=True) + else: + expected = np.array(["\ud83d"], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_nunique_dropna(dropna): + # GH37566 + ser = pd.Series(["yes", "yes", pd.NA, np.nan, None, pd.NaT]) + res = ser.nunique(dropna) + assert res == 1 if dropna else 5 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/base/test_value_counts.py b/.local/lib/python3.8/site-packages/pandas/tests/base/test_value_counts.py new file mode 100644 index 0000000..13bf096 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/base/test_value_counts.py @@ -0,0 +1,290 @@ +import collections +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + Interval, + IntervalIndex, + Series, + Timedelta, + TimedeltaIndex, +) +import pandas._testing as tm +from pandas.tests.base.common import allow_na_ops + + +def test_value_counts(index_or_series_obj): + obj = index_or_series_obj + obj = np.repeat(obj, range(1, len(obj) + 1)) + result = obj.value_counts() + + counter = collections.Counter(obj) + expected = Series(dict(counter.most_common()), dtype=np.int64, name=obj.name) + expected.index = expected.index.astype(obj.dtype) + if isinstance(obj, pd.MultiIndex): + expected.index = Index(expected.index) + + if not isinstance(result.dtype, np.dtype): + # i.e IntegerDtype + expected = expected.astype("Int64") + + # TODO(GH#32514): Order of entries with the same count is inconsistent + # on CI (gh-32449) + if obj.duplicated().any(): + result = result.sort_index() + expected = expected.sort_index() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("null_obj", [np.nan, None]) +def test_value_counts_null(null_obj, index_or_series_obj): + orig = index_or_series_obj + obj = orig.copy() + + if not allow_na_ops(obj): + pytest.skip("type doesn't allow for NA operations") + elif len(obj) < 1: + pytest.skip("Test doesn't make sense on empty data") + elif isinstance(orig, pd.MultiIndex): + pytest.skip(f"MultiIndex can't hold '{null_obj}'") + + values = obj._values + values[0:2] = null_obj + + klass = type(obj) + repeated_values = np.repeat(values, range(1, len(values) + 1)) + obj = klass(repeated_values, dtype=obj.dtype) + + # because np.nan == np.nan is False, but None == None is True + # np.nan would be duplicated, whereas None wouldn't + counter = collections.Counter(obj.dropna()) + expected = Series(dict(counter.most_common()), dtype=np.int64) + expected.index = expected.index.astype(obj.dtype) + + result = obj.value_counts() + if obj.duplicated().any(): + # TODO(GH#32514): + # Order of entries with the same count is inconsistent on CI (gh-32449) + expected = expected.sort_index() + result = result.sort_index() + + if not isinstance(result.dtype, np.dtype): + # i.e IntegerDtype + expected = expected.astype("Int64") + tm.assert_series_equal(result, expected) + + expected[null_obj] = 3 + + result = obj.value_counts(dropna=False) + if obj.duplicated().any(): + # TODO(GH#32514): + # Order of entries with the same count is inconsistent on CI (gh-32449) + expected = expected.sort_index() + result = result.sort_index() + tm.assert_series_equal(result, expected) + + +def test_value_counts_inferred(index_or_series): + klass = index_or_series + s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"] + s = klass(s_values) + expected = Series([4, 3, 2, 1], index=["b", "a", "d", "c"]) + tm.assert_series_equal(s.value_counts(), expected) + + if isinstance(s, Index): + exp = Index(np.unique(np.array(s_values, dtype=np.object_))) + tm.assert_index_equal(s.unique(), exp) + else: + exp = np.unique(np.array(s_values, dtype=np.object_)) + tm.assert_numpy_array_equal(s.unique(), exp) + + assert s.nunique() == 4 + # don't sort, have to sort after the fact as not sorting is + # platform-dep + hist = s.value_counts(sort=False).sort_values() + expected = Series([3, 1, 4, 2], index=list("acbd")).sort_values() + tm.assert_series_equal(hist, expected) + + # sort ascending + hist = s.value_counts(ascending=True) + expected = Series([1, 2, 3, 4], index=list("cdab")) + tm.assert_series_equal(hist, expected) + + # relative histogram. + hist = s.value_counts(normalize=True) + expected = Series([0.4, 0.3, 0.2, 0.1], index=["b", "a", "d", "c"]) + tm.assert_series_equal(hist, expected) + + +def test_value_counts_bins(index_or_series): + klass = index_or_series + s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"] + s = klass(s_values) + + # bins + msg = "bins argument only works with numeric data" + with pytest.raises(TypeError, match=msg): + s.value_counts(bins=1) + + s1 = Series([1, 1, 2, 3]) + res1 = s1.value_counts(bins=1) + exp1 = Series({Interval(0.997, 3.0): 4}) + tm.assert_series_equal(res1, exp1) + res1n = s1.value_counts(bins=1, normalize=True) + exp1n = Series({Interval(0.997, 3.0): 1.0}) + tm.assert_series_equal(res1n, exp1n) + + if isinstance(s1, Index): + tm.assert_index_equal(s1.unique(), Index([1, 2, 3])) + else: + exp = np.array([1, 2, 3], dtype=np.int64) + tm.assert_numpy_array_equal(s1.unique(), exp) + + assert s1.nunique() == 3 + + # these return the same + res4 = s1.value_counts(bins=4, dropna=True) + intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) + exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 1, 3, 2])) + tm.assert_series_equal(res4, exp4) + + res4 = s1.value_counts(bins=4, dropna=False) + intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) + exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 1, 3, 2])) + tm.assert_series_equal(res4, exp4) + + res4n = s1.value_counts(bins=4, normalize=True) + exp4n = Series([0.5, 0.25, 0.25, 0], index=intervals.take([0, 1, 3, 2])) + tm.assert_series_equal(res4n, exp4n) + + # handle NA's properly + s_values = ["a", "b", "b", "b", np.nan, np.nan, "d", "d", "a", "a", "b"] + s = klass(s_values) + expected = Series([4, 3, 2], index=["b", "a", "d"]) + tm.assert_series_equal(s.value_counts(), expected) + + if isinstance(s, Index): + exp = Index(["a", "b", np.nan, "d"]) + tm.assert_index_equal(s.unique(), exp) + else: + exp = np.array(["a", "b", np.nan, "d"], dtype=object) + tm.assert_numpy_array_equal(s.unique(), exp) + assert s.nunique() == 3 + + s = klass({}) if klass is dict else klass({}, dtype=object) + expected = Series([], dtype=np.int64) + tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) + # returned dtype differs depending on original + if isinstance(s, Index): + tm.assert_index_equal(s.unique(), Index([]), exact=False) + else: + tm.assert_numpy_array_equal(s.unique(), np.array([]), check_dtype=False) + + assert s.nunique() == 0 + + +def test_value_counts_datetime64(index_or_series): + klass = index_or_series + + # GH 3002, datetime64[ns] + # don't test names though + df = pd.DataFrame( + { + "person_id": ["xxyyzz", "xxyyzz", "xxyyzz", "xxyyww", "foofoo", "foofoo"], + "dt": pd.to_datetime( + [ + "2010-01-01", + "2010-01-01", + "2010-01-01", + "2009-01-01", + "2008-09-09", + "2008-09-09", + ] + ), + "food": ["PIE", "GUM", "EGG", "EGG", "PIE", "GUM"], + } + ) + + s = klass(df["dt"].copy()) + s.name = None + idx = pd.to_datetime( + ["2010-01-01 00:00:00", "2008-09-09 00:00:00", "2009-01-01 00:00:00"] + ) + expected_s = Series([3, 2, 1], index=idx) + tm.assert_series_equal(s.value_counts(), expected_s) + + expected = np.array( + ["2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00"], + dtype="datetime64[ns]", + ) + if isinstance(s, Index): + tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) + else: + tm.assert_numpy_array_equal(s.unique(), expected) + + assert s.nunique() == 3 + + # with NaT + s = df["dt"].copy() + s = klass(list(s.values) + [pd.NaT] * 4) + + result = s.value_counts() + assert result.index.dtype == "datetime64[ns]" + tm.assert_series_equal(result, expected_s) + + result = s.value_counts(dropna=False) + expected_s = pd.concat([Series([4], index=DatetimeIndex([pd.NaT])), expected_s]) + tm.assert_series_equal(result, expected_s) + + assert s.dtype == "datetime64[ns]" + unique = s.unique() + assert unique.dtype == "datetime64[ns]" + + # numpy_array_equal cannot compare pd.NaT + if isinstance(s, Index): + exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT]) + tm.assert_index_equal(unique, exp_idx) + else: + tm.assert_numpy_array_equal(unique[:3], expected) + assert pd.isna(unique[3]) + + assert s.nunique() == 3 + assert s.nunique(dropna=False) == 4 + + # timedelta64[ns] + td = df.dt - df.dt + timedelta(1) + td = klass(td, name="dt") + + result = td.value_counts() + expected_s = Series([6], index=[Timedelta("1day")], name="dt") + tm.assert_series_equal(result, expected_s) + + expected = TimedeltaIndex(["1 days"], name="dt") + if isinstance(td, Index): + tm.assert_index_equal(td.unique(), expected) + else: + tm.assert_numpy_array_equal(td.unique(), expected.values) + + td2 = timedelta(1) + (df.dt - df.dt) + td2 = klass(td2, name="dt") + result2 = td2.value_counts() + tm.assert_series_equal(result2, expected_s) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_value_counts_with_nan(dropna, index_or_series): + # GH31944 + klass = index_or_series + values = [True, pd.NA, np.nan] + obj = klass(values) + res = obj.value_counts(dropna=dropna) + if dropna is True: + expected = Series([1], index=[True]) + else: + expected = Series([1, 1, 1], index=[True, pd.NA, np.nan]) + tm.assert_series_equal(res, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/computation/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/computation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/computation/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/computation/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..eda635d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/computation/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/computation/__pycache__/test_compat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/computation/__pycache__/test_compat.cpython-38.pyc new file mode 100644 index 0000000..f7635dc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/computation/__pycache__/test_compat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/computation/__pycache__/test_eval.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/computation/__pycache__/test_eval.cpython-38.pyc new file mode 100644 index 0000000..8ab1ab9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/computation/__pycache__/test_eval.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/computation/test_compat.py b/.local/lib/python3.8/site-packages/pandas/tests/computation/test_compat.py new file mode 100644 index 0000000..cfc0842 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/computation/test_compat.py @@ -0,0 +1,32 @@ +import pytest + +from pandas.compat._optional import VERSIONS + +import pandas as pd +from pandas.core.computation.engines import ENGINES +import pandas.core.computation.expr as expr +from pandas.util.version import Version + + +def test_compat(): + # test we have compat with our version of numexpr + + from pandas.core.computation.check import NUMEXPR_INSTALLED + + ne = pytest.importorskip("numexpr") + + ver = ne.__version__ + if Version(ver) < Version(VERSIONS["numexpr"]): + assert not NUMEXPR_INSTALLED + else: + assert NUMEXPR_INSTALLED + + +@pytest.mark.parametrize("engine", ENGINES) +@pytest.mark.parametrize("parser", expr.PARSERS) +def test_invalid_numexpr_version(engine, parser): + if engine == "numexpr": + pytest.importorskip("numexpr") + a, b = 1, 2 # noqa:F841 + res = pd.eval("a + b", engine=engine, parser=parser) + assert res == 3 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/computation/test_eval.py b/.local/lib/python3.8/site-packages/pandas/tests/computation/test_eval.py new file mode 100644 index 0000000..8fa2830 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/computation/test_eval.py @@ -0,0 +1,1960 @@ +from __future__ import annotations + +from functools import reduce +from itertools import product +import operator +import warnings + +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import ( + is_bool, + is_float, + is_list_like, + is_scalar, +) + +import pandas as pd +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm +from pandas.core.computation import pytables +from pandas.core.computation.engines import ( + ENGINES, + NumExprClobberingError, +) +import pandas.core.computation.expr as expr +from pandas.core.computation.expr import ( + BaseExprVisitor, + PandasExprVisitor, + PythonExprVisitor, +) +from pandas.core.computation.expressions import ( + NUMEXPR_INSTALLED, + USE_NUMEXPR, +) +from pandas.core.computation.ops import ( + ARITH_OPS_SYMS, + SPECIAL_CASE_ARITH_OPS_SYMS, + _binary_math_ops, + _binary_ops_dict, + _unary_math_ops, +) + + +@pytest.fixture( + params=( + pytest.param( + engine, + marks=[ + pytest.mark.skipif( + engine == "numexpr" and not USE_NUMEXPR, + reason=f"numexpr enabled->{USE_NUMEXPR}, " + f"installed->{NUMEXPR_INSTALLED}", + ), + td.skip_if_no_ne, + ], + ) + for engine in ENGINES + ) +) +def engine(request): + return request.param + + +@pytest.fixture(params=expr.PARSERS) +def parser(request): + return request.param + + +def _get_unary_fns_for_ne(): + return list(_unary_math_ops) if NUMEXPR_INSTALLED else [] + + +@pytest.fixture(params=_get_unary_fns_for_ne()) +def unary_fns_for_ne(request): + return request.param + + +def engine_has_neg_frac(engine): + return ENGINES[engine].has_neg_frac + + +def _eval_single_bin(lhs, cmp1, rhs, engine): + c = _binary_ops_dict[cmp1] + if engine_has_neg_frac(engine): + try: + return c(lhs, rhs) + except ValueError as e: + if str(e).startswith( + "negative number cannot be raised to a fractional power" + ): + return np.nan + raise + return c(lhs, rhs) + + +def _series_and_2d_ndarray(lhs, rhs): + return ( + isinstance(lhs, Series) and isinstance(rhs, np.ndarray) and rhs.ndim > 1 + ) or (isinstance(rhs, Series) and isinstance(lhs, np.ndarray) and lhs.ndim > 1) + + +def _series_and_frame(lhs, rhs): + return (isinstance(lhs, Series) and isinstance(rhs, DataFrame)) or ( + isinstance(rhs, Series) and isinstance(lhs, DataFrame) + ) + + +def _bool_and_frame(lhs, rhs): + return isinstance(lhs, bool) and isinstance(rhs, pd.core.generic.NDFrame) + + +def _is_py3_complex_incompat(result, expected): + return isinstance(expected, (complex, np.complexfloating)) and np.isnan(result) + + +_good_arith_ops = sorted(set(ARITH_OPS_SYMS).difference(SPECIAL_CASE_ARITH_OPS_SYMS)) + + +# TODO: using range(5) here is a kludge +@pytest.fixture( + params=list(range(5)), + ids=["DataFrame", "Series", "SeriesNaN", "DataFrameNaN", "float"], +) +def lhs(request): + + nan_df1 = DataFrame(np.random.rand(10, 5)) + nan_df1[nan_df1 > 0.5] = np.nan + + opts = ( + DataFrame(np.random.randn(10, 5)), + Series(np.random.randn(5)), + Series([1, 2, np.nan, np.nan, 5]), + nan_df1, + np.random.randn(), + ) + return opts[request.param] + + +rhs = lhs +midhs = lhs + + +class TestEval: + @pytest.fixture(autouse=True) + def set_engine_parser_attrs(self, engine, parser): + # Older tests look for these as attributes, so we set them here. + self.engine = engine + self.parser = parser + + @classmethod + def setup_class(cls): + import numexpr as ne + + cls.ne = ne + + @property + def current_engines(self): + return (engine for engine in ENGINES if engine != self.engine) + + @pytest.mark.parametrize( + "cmp1", + ["!=", "==", "<=", ">=", "<", ">"], + ids=["ne", "eq", "le", "ge", "lt", "gt"], + ) + @pytest.mark.parametrize("cmp2", [">", "<"], ids=["gt", "lt"]) + @pytest.mark.parametrize("binop", expr.BOOL_OPS_SYMS) + def test_complex_cmp_ops(self, cmp1, cmp2, binop, lhs, rhs): + if self.parser == "python" and binop in ["and", "or"]: + msg = "'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)" + result = pd.eval(ex, engine=self.engine, parser=self.parser) + return + + lhs_new = _eval_single_bin(lhs, cmp1, rhs, self.engine) + rhs_new = _eval_single_bin(lhs, cmp2, rhs, self.engine) + expected = _eval_single_bin(lhs_new, binop, rhs_new, self.engine) + + ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)" + result = pd.eval(ex, engine=self.engine, parser=self.parser) + self.check_equal(result, expected) + + @pytest.mark.parametrize("cmp_op", expr.CMP_OPS_SYMS) + def test_simple_cmp_ops(self, cmp_op, lhs, rhs): + lhs = lhs < 0 + rhs = rhs < 0 + + if self.parser == "python" and cmp_op in ["in", "not in"]: + msg = "'(In|NotIn)' nodes are not implemented" + + with pytest.raises(NotImplementedError, match=msg): + self.check_simple_cmp_op(lhs, cmp_op, rhs) + return + + self.check_simple_cmp_op(lhs, cmp_op, rhs) + + @pytest.mark.parametrize("op", expr.CMP_OPS_SYMS) + def test_compound_invert_op(self, op, lhs, rhs, request): + if self.parser == "python" and op in ["in", "not in"]: + + msg = "'(In|NotIn)' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + self.check_compound_invert_op(lhs, op, rhs) + return + + if ( + is_float(lhs) + and not is_float(rhs) + and op in ["in", "not in"] + and self.engine == "python" + and self.parser == "pandas" + ): + mark = pytest.mark.xfail( + reason="Looks like expected is negative, unclear whether " + "expected is incorrect or result is incorrect" + ) + request.node.add_marker(mark) + + self.check_compound_invert_op(lhs, op, rhs) + + @pytest.mark.parametrize("cmp1", ["<", ">"]) + @pytest.mark.parametrize("cmp2", ["<", ">"]) + def test_chained_cmp_op(self, cmp1, cmp2, lhs, midhs, rhs): + self.check_chained_cmp_op(lhs, cmp1, midhs, cmp2, rhs) + + def check_equal(self, result, expected): + if isinstance(result, DataFrame): + tm.assert_frame_equal(result, expected) + elif isinstance(result, Series): + tm.assert_series_equal(result, expected) + elif isinstance(result, np.ndarray): + tm.assert_numpy_array_equal(result, expected) + else: + assert result == expected + + def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): + + if self.parser == "python": + ex1 = f"lhs {cmp1} mid {cmp2} rhs" + msg = "'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + pd.eval(ex1, engine=self.engine, parser=self.parser) + return + + lhs_new = _eval_single_bin(lhs, cmp1, mid, self.engine) + rhs_new = _eval_single_bin(mid, cmp2, rhs, self.engine) + + if lhs_new is not None and rhs_new is not None: + ex1 = f"lhs {cmp1} mid {cmp2} rhs" + ex2 = f"lhs {cmp1} mid and mid {cmp2} rhs" + ex3 = f"(lhs {cmp1} mid) & (mid {cmp2} rhs)" + expected = _eval_single_bin(lhs_new, "&", rhs_new, self.engine) + + for ex in (ex1, ex2, ex3): + result = pd.eval(ex, engine=self.engine, parser=self.parser) + + tm.assert_almost_equal(result, expected) + + def check_simple_cmp_op(self, lhs, cmp1, rhs): + ex = f"lhs {cmp1} rhs" + msg = "|".join( + [ + r"only list-like( or dict-like)? objects are allowed to be " + r"passed to (DataFrame\.)?isin\(\), you passed a " + r"(\[|')bool(\]|')", + "argument of type 'bool' is not iterable", + ] + ) + if cmp1 in ("in", "not in") and not is_list_like(rhs): + with pytest.raises(TypeError, match=msg): + pd.eval( + ex, + engine=self.engine, + parser=self.parser, + local_dict={"lhs": lhs, "rhs": rhs}, + ) + else: + expected = _eval_single_bin(lhs, cmp1, rhs, self.engine) + result = pd.eval(ex, engine=self.engine, parser=self.parser) + self.check_equal(result, expected) + + @pytest.mark.parametrize("arith1", _good_arith_ops) + def test_binary_arith_ops(self, arith1, lhs, rhs): + ex = f"lhs {arith1} rhs" + result = pd.eval(ex, engine=self.engine, parser=self.parser) + expected = _eval_single_bin(lhs, arith1, rhs, self.engine) + + tm.assert_almost_equal(result, expected) + ex = f"lhs {arith1} rhs {arith1} rhs" + result = pd.eval(ex, engine=self.engine, parser=self.parser) + nlhs = _eval_single_bin(lhs, arith1, rhs, self.engine) + self.check_alignment(result, nlhs, rhs, arith1) + + def check_alignment(self, result, nlhs, ghs, op): + try: + nlhs, ghs = nlhs.align(ghs) + except (ValueError, TypeError, AttributeError): + # ValueError: series frame or frame series align + # TypeError, AttributeError: series or frame with scalar align + pass + else: + if self.engine == "numexpr": + # direct numpy comparison + expected = self.ne.evaluate(f"nlhs {op} ghs") + # Update assert statement due to unreliable numerical + # precision component (GH37328) + # TODO: update testing code so that assert_almost_equal statement + # can be replaced again by the assert_numpy_array_equal statement + tm.assert_almost_equal(result.values, expected) + else: + expected = eval(f"nlhs {op} ghs") + tm.assert_almost_equal(result, expected) + + # modulus, pow, and floor division require special casing + + def test_modulus(self, lhs, rhs): + ex = r"lhs % rhs" + result = pd.eval(ex, engine=self.engine, parser=self.parser) + expected = lhs % rhs + tm.assert_almost_equal(result, expected) + + if self.engine == "numexpr": + expected = self.ne.evaluate(r"expected % rhs") + if isinstance(result, (DataFrame, Series)): + tm.assert_almost_equal(result.values, expected) + else: + tm.assert_almost_equal(result, expected.item()) + else: + expected = _eval_single_bin(expected, "%", rhs, self.engine) + tm.assert_almost_equal(result, expected) + + def test_floor_division(self, lhs, rhs): + ex = "lhs // rhs" + + if self.engine == "python": + res = pd.eval(ex, engine=self.engine, parser=self.parser) + expected = lhs // rhs + self.check_equal(res, expected) + else: + msg = ( + r"unsupported operand type\(s\) for //: 'VariableNode' and " + "'VariableNode'" + ) + with pytest.raises(TypeError, match=msg): + pd.eval( + ex, + local_dict={"lhs": lhs, "rhs": rhs}, + engine=self.engine, + parser=self.parser, + ) + + @td.skip_if_windows + def test_pow(self, lhs, rhs): + # odd failure on win32 platform, so skip + ex = "lhs ** rhs" + expected = _eval_single_bin(lhs, "**", rhs, self.engine) + result = pd.eval(ex, engine=self.engine, parser=self.parser) + + if ( + is_scalar(lhs) + and is_scalar(rhs) + and _is_py3_complex_incompat(result, expected) + ): + msg = "(DataFrame.columns|numpy array) are different" + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(result, expected) + else: + tm.assert_almost_equal(result, expected) + + ex = "(lhs ** rhs) ** rhs" + result = pd.eval(ex, engine=self.engine, parser=self.parser) + + middle = _eval_single_bin(lhs, "**", rhs, self.engine) + expected = _eval_single_bin(middle, "**", rhs, self.engine) + tm.assert_almost_equal(result, expected) + + def check_single_invert_op(self, lhs): + # simple + try: + elb = lhs.astype(bool) + except AttributeError: + elb = np.array([bool(lhs)]) + expected = ~elb + result = pd.eval("~elb", engine=self.engine, parser=self.parser) + tm.assert_almost_equal(expected, result) + + for engine in self.current_engines: + tm.assert_almost_equal( + result, pd.eval("~elb", engine=engine, parser=self.parser) + ) + + def check_compound_invert_op(self, lhs, cmp1, rhs): + skip_these = ["in", "not in"] + ex = f"~(lhs {cmp1} rhs)" + + msg = "|".join( + [ + r"only list-like( or dict-like)? objects are allowed to be " + r"passed to (DataFrame\.)?isin\(\), you passed a " + r"(\[|')float(\]|')", + "argument of type 'float' is not iterable", + ] + ) + if is_scalar(rhs) and cmp1 in skip_these: + with pytest.raises(TypeError, match=msg): + pd.eval( + ex, + engine=self.engine, + parser=self.parser, + local_dict={"lhs": lhs, "rhs": rhs}, + ) + else: + # compound + if is_scalar(lhs) and is_scalar(rhs): + lhs, rhs = map(lambda x: np.array([x]), (lhs, rhs)) + expected = _eval_single_bin(lhs, cmp1, rhs, self.engine) + if is_scalar(expected): + expected = not expected + else: + expected = ~expected + result = pd.eval(ex, engine=self.engine, parser=self.parser) + tm.assert_almost_equal(expected, result) + + # make sure the other engines work the same as this one + for engine in self.current_engines: + ev = pd.eval(ex, engine=self.engine, parser=self.parser) + tm.assert_almost_equal(ev, result) + + def test_frame_invert(self): + expr = "~lhs" + + # ~ ## + # frame + # float always raises + lhs = DataFrame(np.random.randn(5, 2)) + if self.engine == "numexpr": + msg = "couldn't find matching opcode for 'invert_dd'" + with pytest.raises(NotImplementedError, match=msg): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + msg = "ufunc 'invert' not supported for the input types" + with pytest.raises(TypeError, match=msg): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + + # int raises on numexpr + lhs = DataFrame(np.random.randint(5, size=(5, 2))) + if self.engine == "numexpr": + msg = "couldn't find matching opcode for 'invert" + with pytest.raises(NotImplementedError, match=msg): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + expect = ~lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(expect, result) + + # bool always works + lhs = DataFrame(np.random.rand(5, 2) > 0.5) + expect = ~lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(expect, result) + + # object raises + lhs = DataFrame({"b": ["a", 1, 2.0], "c": np.random.rand(3) > 0.5}) + if self.engine == "numexpr": + with pytest.raises(ValueError, match="unknown type object"): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + msg = "bad operand type for unary ~: 'str'" + with pytest.raises(TypeError, match=msg): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + + def test_series_invert(self): + # ~ #### + expr = "~lhs" + + # series + # float raises + lhs = Series(np.random.randn(5)) + if self.engine == "numexpr": + msg = "couldn't find matching opcode for 'invert_dd'" + with pytest.raises(NotImplementedError, match=msg): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + msg = "ufunc 'invert' not supported for the input types" + with pytest.raises(TypeError, match=msg): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + + # int raises on numexpr + lhs = Series(np.random.randint(5, size=5)) + if self.engine == "numexpr": + msg = "couldn't find matching opcode for 'invert" + with pytest.raises(NotImplementedError, match=msg): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + expect = ~lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_series_equal(expect, result) + + # bool + lhs = Series(np.random.rand(5) > 0.5) + expect = ~lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_series_equal(expect, result) + + # float + # int + # bool + + # object + lhs = Series(["a", 1, 2.0]) + if self.engine == "numexpr": + with pytest.raises(ValueError, match="unknown type object"): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + msg = "bad operand type for unary ~: 'str'" + with pytest.raises(TypeError, match=msg): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + + def test_frame_negate(self): + expr = "-lhs" + + # float + lhs = DataFrame(np.random.randn(5, 2)) + expect = -lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(expect, result) + + # int + lhs = DataFrame(np.random.randint(5, size=(5, 2))) + expect = -lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(expect, result) + + # bool doesn't work with numexpr but works elsewhere + lhs = DataFrame(np.random.rand(5, 2) > 0.5) + if self.engine == "numexpr": + msg = "couldn't find matching opcode for 'neg_bb'" + with pytest.raises(NotImplementedError, match=msg): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + expect = -lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(expect, result) + + def test_series_negate(self): + expr = "-lhs" + + # float + lhs = Series(np.random.randn(5)) + expect = -lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_series_equal(expect, result) + + # int + lhs = Series(np.random.randint(5, size=5)) + expect = -lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_series_equal(expect, result) + + # bool doesn't work with numexpr but works elsewhere + lhs = Series(np.random.rand(5) > 0.5) + if self.engine == "numexpr": + msg = "couldn't find matching opcode for 'neg_bb'" + with pytest.raises(NotImplementedError, match=msg): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + expect = -lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_series_equal(expect, result) + + @pytest.mark.parametrize( + "lhs", + [ + # Float + DataFrame(np.random.randn(5, 2)), + # Int + DataFrame(np.random.randint(5, size=(5, 2))), + # bool doesn't work with numexpr but works elsewhere + DataFrame(np.random.rand(5, 2) > 0.5), + ], + ) + def test_frame_pos(self, lhs): + expr = "+lhs" + expect = lhs + + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(expect, result) + + @pytest.mark.parametrize( + "lhs", + [ + # Float + Series(np.random.randn(5)), + # Int + Series(np.random.randint(5, size=5)), + # bool doesn't work with numexpr but works elsewhere + Series(np.random.rand(5) > 0.5), + ], + ) + def test_series_pos(self, lhs): + expr = "+lhs" + expect = lhs + + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_series_equal(expect, result) + + def test_scalar_unary(self): + msg = "bad operand type for unary ~: 'float'" + with pytest.raises(TypeError, match=msg): + pd.eval("~1.0", engine=self.engine, parser=self.parser) + + assert pd.eval("-1.0", parser=self.parser, engine=self.engine) == -1.0 + assert pd.eval("+1.0", parser=self.parser, engine=self.engine) == +1.0 + assert pd.eval("~1", parser=self.parser, engine=self.engine) == ~1 + assert pd.eval("-1", parser=self.parser, engine=self.engine) == -1 + assert pd.eval("+1", parser=self.parser, engine=self.engine) == +1 + assert pd.eval("~True", parser=self.parser, engine=self.engine) == ~True + assert pd.eval("~False", parser=self.parser, engine=self.engine) == ~False + assert pd.eval("-True", parser=self.parser, engine=self.engine) == -True + assert pd.eval("-False", parser=self.parser, engine=self.engine) == -False + assert pd.eval("+True", parser=self.parser, engine=self.engine) == +True + assert pd.eval("+False", parser=self.parser, engine=self.engine) == +False + + def test_unary_in_array(self): + # GH 11235 + # TODO: 2022-01-29: result return list with numexpr 2.7.3 in CI + # but cannot reproduce locally + result = np.array( + pd.eval( + "[-True, True, ~True, +True," + "-False, False, ~False, +False," + "-37, 37, ~37, +37]" + ), + dtype=np.object_, + ) + expected = np.array( + [ + -True, + True, + ~True, + +True, + -False, + False, + ~False, + +False, + -37, + 37, + ~37, + +37, + ], + dtype=np.object_, + ) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dtype", [np.float32, np.float64]) + def test_float_comparison_bin_op(self, dtype): + # GH 16363 + df = DataFrame({"x": np.array([0], dtype=dtype)}) + res = df.eval("x < -0.1") + assert res.values == np.array([False]) + + res = df.eval("-5 > x") + assert res.values == np.array([False]) + + def test_disallow_scalar_bool_ops(self): + exprs = "1 or 2", "1 and 2" + exprs += "a and b", "a or b" + exprs += ("1 or 2 and (3 + 2) > 3",) + exprs += ("2 * x > 2 or 1 and 2",) + exprs += ("2 * df > 3 and 1 or a",) + + x, a, b = np.random.randn(3), 1, 2 # noqa:F841 + df = DataFrame(np.random.randn(3, 2)) # noqa:F841 + + for ex in exprs: + msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not" + with pytest.raises(NotImplementedError, match=msg): + pd.eval(ex, engine=self.engine, parser=self.parser) + + def test_identical(self): + # see gh-10546 + x = 1 + result = pd.eval("x", engine=self.engine, parser=self.parser) + assert result == 1 + assert is_scalar(result) + + x = 1.5 + result = pd.eval("x", engine=self.engine, parser=self.parser) + assert result == 1.5 + assert is_scalar(result) + + x = False + result = pd.eval("x", engine=self.engine, parser=self.parser) + assert not result + assert is_bool(result) + assert is_scalar(result) + + x = np.array([1]) + result = pd.eval("x", engine=self.engine, parser=self.parser) + tm.assert_numpy_array_equal(result, np.array([1])) + assert result.shape == (1,) + + x = np.array([1.5]) + result = pd.eval("x", engine=self.engine, parser=self.parser) + tm.assert_numpy_array_equal(result, np.array([1.5])) + assert result.shape == (1,) + + x = np.array([False]) # noqa:F841 + result = pd.eval("x", engine=self.engine, parser=self.parser) + tm.assert_numpy_array_equal(result, np.array([False])) + assert result.shape == (1,) + + def test_line_continuation(self): + # GH 11149 + exp = """1 + 2 * \ + 5 - 1 + 2 """ + result = pd.eval(exp, engine=self.engine, parser=self.parser) + assert result == 12 + + def test_float_truncation(self): + # GH 14241 + exp = "1000000000.006" + result = pd.eval(exp, engine=self.engine, parser=self.parser) + expected = np.float64(exp) + assert result == expected + + df = DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]}) + cutoff = 1000000000.0006 + result = df.query(f"A < {cutoff:.4f}") + assert result.empty + + cutoff = 1000000000.0010 + result = df.query(f"A > {cutoff:.4f}") + expected = df.loc[[1, 2], :] + tm.assert_frame_equal(expected, result) + + exact = 1000000000.0011 + result = df.query(f"A == {exact:.4f}") + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) + + def test_disallow_python_keywords(self): + # GH 18221 + df = DataFrame([[0, 0, 0]], columns=["foo", "bar", "class"]) + msg = "Python keyword not valid identifier in numexpr query" + with pytest.raises(SyntaxError, match=msg): + df.query("class == 0") + + df = DataFrame() + df.index.name = "lambda" + with pytest.raises(SyntaxError, match=msg): + df.query("lambda == 0") + + +f = lambda *args, **kwargs: np.random.randn() + + +# ------------------------------------- +# gh-12388: Typecasting rules consistency with python + + +class TestTypeCasting: + @pytest.mark.parametrize("op", ["+", "-", "*", "**", "/"]) + # maybe someday... numexpr has too many upcasting rules now + # chain(*(np.sctypes[x] for x in ['uint', 'int', 'float'])) + @pytest.mark.parametrize("dt", [np.float32, np.float64]) + def test_binop_typecasting(self, engine, parser, op, dt): + df = tm.makeCustomDataframe(5, 3, data_gen_f=f, dtype=dt) + s = f"df {op} 3" + res = pd.eval(s, engine=engine, parser=parser) + assert df.values.dtype == dt + assert res.values.dtype == dt + tm.assert_frame_equal(res, eval(s)) + + s = f"3 {op} df" + res = pd.eval(s, engine=engine, parser=parser) + assert df.values.dtype == dt + assert res.values.dtype == dt + tm.assert_frame_equal(res, eval(s)) + + +# ------------------------------------- +# Basic and complex alignment + + +def _is_datetime(x): + return issubclass(x.dtype.type, np.datetime64) + + +def should_warn(*args): + not_mono = not any(map(operator.attrgetter("is_monotonic"), args)) + only_one_dt = reduce(operator.xor, map(_is_datetime, args)) + return not_mono and only_one_dt + + +class TestAlignment: + + index_types = ["i", "u", "dt"] + lhs_index_types = index_types + ["s"] # 'p' + + def test_align_nested_unary_op(self, engine, parser): + s = "df * ~2" + df = tm.makeCustomDataframe(5, 3, data_gen_f=f) + res = pd.eval(s, engine=engine, parser=parser) + tm.assert_frame_equal(res, df * ~2) + + @pytest.mark.parametrize("lr_idx_type", lhs_index_types) + @pytest.mark.parametrize("rr_idx_type", index_types) + @pytest.mark.parametrize("c_idx_type", index_types) + def test_basic_frame_alignment( + self, engine, parser, lr_idx_type, rr_idx_type, c_idx_type + ): + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + + df = tm.makeCustomDataframe( + 10, 10, data_gen_f=f, r_idx_type=lr_idx_type, c_idx_type=c_idx_type + ) + df2 = tm.makeCustomDataframe( + 20, 10, data_gen_f=f, r_idx_type=rr_idx_type, c_idx_type=c_idx_type + ) + # only warns if not monotonic and not sortable + if should_warn(df.index, df2.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("df + df2", engine=engine, parser=parser) + else: + res = pd.eval("df + df2", engine=engine, parser=parser) + tm.assert_frame_equal(res, df + df2) + + @pytest.mark.parametrize("r_idx_type", lhs_index_types) + @pytest.mark.parametrize("c_idx_type", lhs_index_types) + def test_frame_comparison(self, engine, parser, r_idx_type, c_idx_type): + df = tm.makeCustomDataframe( + 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ) + res = pd.eval("df < 2", engine=engine, parser=parser) + tm.assert_frame_equal(res, df < 2) + + df3 = DataFrame(np.random.randn(*df.shape), index=df.index, columns=df.columns) + res = pd.eval("df < df3", engine=engine, parser=parser) + tm.assert_frame_equal(res, df < df3) + + @pytest.mark.parametrize("r1", lhs_index_types) + @pytest.mark.parametrize("c1", index_types) + @pytest.mark.parametrize("r2", index_types) + @pytest.mark.parametrize("c2", index_types) + def test_medium_complex_frame_alignment(self, engine, parser, r1, c1, r2, c2): + + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + + df = tm.makeCustomDataframe( + 3, 2, data_gen_f=f, r_idx_type=r1, c_idx_type=c1 + ) + df2 = tm.makeCustomDataframe( + 4, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2 + ) + df3 = tm.makeCustomDataframe( + 5, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2 + ) + if should_warn(df.index, df2.index, df3.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("df + df2 + df3", engine=engine, parser=parser) + else: + res = pd.eval("df + df2 + df3", engine=engine, parser=parser) + tm.assert_frame_equal(res, df + df2 + df3) + + @pytest.mark.parametrize("index_name", ["index", "columns"]) + @pytest.mark.parametrize("c_idx_type", index_types) + @pytest.mark.parametrize("r_idx_type", lhs_index_types) + def test_basic_frame_series_alignment( + self, engine, parser, index_name, r_idx_type, c_idx_type + ): + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + df = tm.makeCustomDataframe( + 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ) + index = getattr(df, index_name) + s = Series(np.random.randn(5), index[:5]) + + if should_warn(df.index, s.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("df + s", engine=engine, parser=parser) + else: + res = pd.eval("df + s", engine=engine, parser=parser) + + if r_idx_type == "dt" or c_idx_type == "dt": + expected = df.add(s) if engine == "numexpr" else df + s + else: + expected = df + s + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize("index_name", ["index", "columns"]) + @pytest.mark.parametrize( + "r_idx_type, c_idx_type", + list(product(["i", "u", "s"], ["i", "u", "s"])) + [("dt", "dt")], + ) + @pytest.mark.filterwarnings("ignore::RuntimeWarning") + def test_basic_series_frame_alignment( + self, engine, parser, index_name, r_idx_type, c_idx_type + ): + df = tm.makeCustomDataframe( + 10, 7, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ) + index = getattr(df, index_name) + s = Series(np.random.randn(5), index[:5]) + if should_warn(s.index, df.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("s + df", engine=engine, parser=parser) + else: + res = pd.eval("s + df", engine=engine, parser=parser) + + if r_idx_type == "dt" or c_idx_type == "dt": + expected = df.add(s) if engine == "numexpr" else s + df + else: + expected = s + df + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize("c_idx_type", index_types) + @pytest.mark.parametrize("r_idx_type", lhs_index_types) + @pytest.mark.parametrize("index_name", ["index", "columns"]) + @pytest.mark.parametrize("op", ["+", "*"]) + def test_series_frame_commutativity( + self, engine, parser, index_name, op, r_idx_type, c_idx_type + ): + + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + + df = tm.makeCustomDataframe( + 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ) + index = getattr(df, index_name) + s = Series(np.random.randn(5), index[:5]) + + lhs = f"s {op} df" + rhs = f"df {op} s" + if should_warn(df.index, s.index): + with tm.assert_produces_warning(RuntimeWarning): + a = pd.eval(lhs, engine=engine, parser=parser) + with tm.assert_produces_warning(RuntimeWarning): + b = pd.eval(rhs, engine=engine, parser=parser) + else: + a = pd.eval(lhs, engine=engine, parser=parser) + b = pd.eval(rhs, engine=engine, parser=parser) + + if r_idx_type != "dt" and c_idx_type != "dt": + if engine == "numexpr": + tm.assert_frame_equal(a, b) + + @pytest.mark.parametrize("r1", lhs_index_types) + @pytest.mark.parametrize("c1", index_types) + @pytest.mark.parametrize("r2", index_types) + @pytest.mark.parametrize("c2", index_types) + def test_complex_series_frame_alignment(self, engine, parser, r1, c1, r2, c2): + import random + + n = 3 + m1 = 5 + m2 = 2 * m1 + + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + + index_name = random.choice(["index", "columns"]) + obj_name = random.choice(["df", "df2"]) + + df = tm.makeCustomDataframe( + m1, n, data_gen_f=f, r_idx_type=r1, c_idx_type=c1 + ) + df2 = tm.makeCustomDataframe( + m2, n, data_gen_f=f, r_idx_type=r2, c_idx_type=c2 + ) + index = getattr(locals().get(obj_name), index_name) + ser = Series(np.random.randn(n), index[:n]) + + if r2 == "dt" or c2 == "dt": + if engine == "numexpr": + expected2 = df2.add(ser) + else: + expected2 = df2 + ser + else: + expected2 = df2 + ser + + if r1 == "dt" or c1 == "dt": + if engine == "numexpr": + expected = expected2.add(df) + else: + expected = expected2 + df + else: + expected = expected2 + df + + if should_warn(df2.index, ser.index, df.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("df2 + ser + df", engine=engine, parser=parser) + else: + res = pd.eval("df2 + ser + df", engine=engine, parser=parser) + assert res.shape == expected.shape + tm.assert_frame_equal(res, expected) + + def test_performance_warning_for_poor_alignment(self, engine, parser): + df = DataFrame(np.random.randn(1000, 10)) + s = Series(np.random.randn(10000)) + if engine == "numexpr": + seen = PerformanceWarning + else: + seen = False + + with tm.assert_produces_warning(seen): + pd.eval("df + s", engine=engine, parser=parser) + + s = Series(np.random.randn(1000)) + with tm.assert_produces_warning(False): + pd.eval("df + s", engine=engine, parser=parser) + + df = DataFrame(np.random.randn(10, 10000)) + s = Series(np.random.randn(10000)) + with tm.assert_produces_warning(False): + pd.eval("df + s", engine=engine, parser=parser) + + df = DataFrame(np.random.randn(10, 10)) + s = Series(np.random.randn(10000)) + + is_python_engine = engine == "python" + + if not is_python_engine: + wrn = PerformanceWarning + else: + wrn = False + + with tm.assert_produces_warning(wrn) as w: + pd.eval("df + s", engine=engine, parser=parser) + + if not is_python_engine: + assert len(w) == 1 + msg = str(w[0].message) + logged = np.log10(s.size - df.shape[1]) + expected = ( + f"Alignment difference on axis 1 is larger " + f"than an order of magnitude on term 'df', " + f"by more than {logged:.4g}; performance may suffer." + ) + assert msg == expected + + +# ------------------------------------ +# Slightly more complex ops + + +class TestOperations: + @pytest.fixture(autouse=True) + def set_engine_parser_attrs(self, engine, parser): + # Older tests look for these as attributes, so we set them here. + self.engine = engine + self.parser = parser + + def eval(self, *args, **kwargs): + kwargs["engine"] = self.engine + kwargs["parser"] = self.parser + kwargs["level"] = kwargs.pop("level", 0) + 1 + return pd.eval(*args, **kwargs) + + def test_simple_arith_ops(self): + exclude_arith = [] + if self.parser == "python": + exclude_arith = ["in", "not in"] + + arith_ops = [ + op + for op in expr.ARITH_OPS_SYMS + expr.CMP_OPS_SYMS + if op not in exclude_arith + ] + + ops = (op for op in arith_ops if op != "//") + + for op in ops: + ex = f"1 {op} 1" + ex2 = f"x {op} 1" + ex3 = f"1 {op} (x + 1)" + + if op in ("in", "not in"): + msg = "argument of type 'int' is not iterable" + with pytest.raises(TypeError, match=msg): + pd.eval(ex, engine=self.engine, parser=self.parser) + else: + expec = _eval_single_bin(1, op, 1, self.engine) + x = self.eval(ex, engine=self.engine, parser=self.parser) + assert x == expec + + expec = _eval_single_bin(x, op, 1, self.engine) + y = self.eval( + ex2, local_dict={"x": x}, engine=self.engine, parser=self.parser + ) + assert y == expec + + expec = _eval_single_bin(1, op, x + 1, self.engine) + y = self.eval( + ex3, local_dict={"x": x}, engine=self.engine, parser=self.parser + ) + assert y == expec + + @pytest.mark.parametrize("rhs", [True, False]) + @pytest.mark.parametrize("lhs", [True, False]) + @pytest.mark.parametrize("op", expr.BOOL_OPS_SYMS) + def test_simple_bool_ops(self, rhs, lhs, op): + ex = f"{lhs} {op} {rhs}" + + if self.parser == "python" and op in ["and", "or"]: + msg = "'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + self.eval(ex) + return + + res = self.eval(ex) + exp = eval(ex) + assert res == exp + + @pytest.mark.parametrize("rhs", [True, False]) + @pytest.mark.parametrize("lhs", [True, False]) + @pytest.mark.parametrize("op", expr.BOOL_OPS_SYMS) + def test_bool_ops_with_constants(self, rhs, lhs, op): + ex = f"{lhs} {op} {rhs}" + + if self.parser == "python" and op in ["and", "or"]: + msg = "'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + self.eval(ex) + return + + res = self.eval(ex) + exp = eval(ex) + assert res == exp + + def test_4d_ndarray_fails(self): + x = np.random.randn(3, 4, 5, 6) + y = Series(np.random.randn(10)) + msg = "N-dimensional objects, where N > 2, are not supported with eval" + with pytest.raises(NotImplementedError, match=msg): + self.eval("x + y", local_dict={"x": x, "y": y}) + + def test_constant(self): + x = self.eval("1") + assert x == 1 + + def test_single_variable(self): + df = DataFrame(np.random.randn(10, 2)) + df2 = self.eval("df", local_dict={"df": df}) + tm.assert_frame_equal(df, df2) + + def test_truediv(self): + s = np.array([1]) # noqa:F841 + ex = "s / 1" + + # FutureWarning: The `truediv` parameter in pd.eval is deprecated and will be + # removed in a future version. + with tm.assert_produces_warning(FutureWarning): + res = self.eval(ex, truediv=False) + tm.assert_numpy_array_equal(res, np.array([1.0])) + + with tm.assert_produces_warning(FutureWarning): + res = self.eval(ex, truediv=True) + tm.assert_numpy_array_equal(res, np.array([1.0])) + + with tm.assert_produces_warning(FutureWarning): + res = self.eval("1 / 2", truediv=True) + expec = 0.5 + assert res == expec + + with tm.assert_produces_warning(FutureWarning): + res = self.eval("1 / 2", truediv=False) + expec = 0.5 + assert res == expec + + with tm.assert_produces_warning(FutureWarning): + res = self.eval("s / 2", truediv=False) + expec = 0.5 + assert res == expec + + with tm.assert_produces_warning(FutureWarning): + res = self.eval("s / 2", truediv=True) + expec = 0.5 + assert res == expec + + def test_failing_subscript_with_name_error(self): + df = DataFrame(np.random.randn(5, 3)) # noqa:F841 + with pytest.raises(NameError, match="name 'x' is not defined"): + self.eval("df[x > 2] > 2") + + def test_lhs_expression_subscript(self): + df = DataFrame(np.random.randn(5, 3)) + result = self.eval("(df + 1)[df > 2]", local_dict={"df": df}) + expected = (df + 1)[df > 2] + tm.assert_frame_equal(result, expected) + + def test_attr_expression(self): + df = DataFrame(np.random.randn(5, 3), columns=list("abc")) + expr1 = "df.a < df.b" + expec1 = df.a < df.b + expr2 = "df.a + df.b + df.c" + expec2 = df.a + df.b + df.c + expr3 = "df.a + df.b + df.c[df.b < 0]" + expec3 = df.a + df.b + df.c[df.b < 0] + exprs = expr1, expr2, expr3 + expecs = expec1, expec2, expec3 + for e, expec in zip(exprs, expecs): + tm.assert_series_equal(expec, self.eval(e, local_dict={"df": df})) + + def test_assignment_fails(self): + df = DataFrame(np.random.randn(5, 3), columns=list("abc")) + df2 = DataFrame(np.random.randn(5, 3)) + expr1 = "df = df2" + msg = "cannot assign without a target object" + with pytest.raises(ValueError, match=msg): + self.eval(expr1, local_dict={"df": df, "df2": df2}) + + def test_assignment_column(self): + df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + orig_df = df.copy() + + # multiple assignees + with pytest.raises(SyntaxError, match="invalid syntax"): + df.eval("d c = a + b") + + # invalid assignees + msg = "left hand side of an assignment must be a single name" + with pytest.raises(SyntaxError, match=msg): + df.eval("d,c = a + b") + + msg = "cannot assign to function call" + with pytest.raises(SyntaxError, match=msg): + df.eval('Timestamp("20131001") = a + b') + + # single assignment - existing variable + expected = orig_df.copy() + expected["a"] = expected["a"] + expected["b"] + df = orig_df.copy() + df.eval("a = a + b", inplace=True) + tm.assert_frame_equal(df, expected) + + # single assignment - new variable + expected = orig_df.copy() + expected["c"] = expected["a"] + expected["b"] + df = orig_df.copy() + df.eval("c = a + b", inplace=True) + tm.assert_frame_equal(df, expected) + + # with a local name overlap + def f(): + df = orig_df.copy() + a = 1 # noqa:F841 + df.eval("a = 1 + b", inplace=True) + return df + + df = f() + expected = orig_df.copy() + expected["a"] = 1 + expected["b"] + tm.assert_frame_equal(df, expected) + + df = orig_df.copy() + + def f(): + a = 1 # noqa:F841 + old_a = df.a.copy() + df.eval("a = a + b", inplace=True) + result = old_a + df.b + tm.assert_series_equal(result, df.a, check_names=False) + assert result.name is None + + f() + + # multiple assignment + df = orig_df.copy() + df.eval("c = a + b", inplace=True) + msg = "can only assign a single expression" + with pytest.raises(SyntaxError, match=msg): + df.eval("c = a = b") + + # explicit targets + df = orig_df.copy() + self.eval("c = df.a + df.b", local_dict={"df": df}, target=df, inplace=True) + expected = orig_df.copy() + expected["c"] = expected["a"] + expected["b"] + tm.assert_frame_equal(df, expected) + + def test_column_in(self): + # GH 11235 + df = DataFrame({"a": [11], "b": [-32]}) + result = df.eval("a in [11, -32]") + expected = Series([True]) + # TODO: 2022-01-29: Name check failed with numexpr 2.7.3 in CI + # but cannot reproduce locally + tm.assert_series_equal(result, expected, check_names=False) + + @pytest.mark.xfail(reason="Unknown: Omitted test_ in name prior.") + def test_assignment_not_inplace(self): + # see gh-9297 + df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + + actual = df.eval("c = a + b", inplace=False) + assert actual is not None + + expected = df.copy() + expected["c"] = expected["a"] + expected["b"] + tm.assert_frame_equal(df, expected) + + def test_multi_line_expression(self): + # GH 11149 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + expected = df.copy() + + expected["c"] = expected["a"] + expected["b"] + expected["d"] = expected["c"] + expected["b"] + answer = df.eval( + """ + c = a + b + d = c + b""", + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert answer is None + + expected["a"] = expected["a"] - 1 + expected["e"] = expected["a"] + 2 + answer = df.eval( + """ + a = a - 1 + e = a + 2""", + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert answer is None + + # multi-line not valid if not all assignments + msg = "Multi-line expressions are only valid if all expressions contain" + with pytest.raises(ValueError, match=msg): + df.eval( + """ + a = b + 2 + b - 2""", + inplace=False, + ) + + def test_multi_line_expression_not_inplace(self): + # GH 11149 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + expected = df.copy() + + expected["c"] = expected["a"] + expected["b"] + expected["d"] = expected["c"] + expected["b"] + df = df.eval( + """ + c = a + b + d = c + b""", + inplace=False, + ) + tm.assert_frame_equal(expected, df) + + expected["a"] = expected["a"] - 1 + expected["e"] = expected["a"] + 2 + df = df.eval( + """ + a = a - 1 + e = a + 2""", + inplace=False, + ) + tm.assert_frame_equal(expected, df) + + def test_multi_line_expression_local_variable(self): + # GH 15342 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + expected = df.copy() + + local_var = 7 + expected["c"] = expected["a"] * local_var + expected["d"] = expected["c"] + local_var + answer = df.eval( + """ + c = a * @local_var + d = c + @local_var + """, + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert answer is None + + def test_multi_line_expression_callable_local_variable(self): + # 26426 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + def local_func(a, b): + return b + + expected = df.copy() + expected["c"] = expected["a"] * local_func(1, 7) + expected["d"] = expected["c"] + local_func(1, 7) + answer = df.eval( + """ + c = a * @local_func(1, 7) + d = c + @local_func(1, 7) + """, + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert answer is None + + def test_multi_line_expression_callable_local_variable_with_kwargs(self): + # 26426 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + def local_func(a, b): + return b + + expected = df.copy() + expected["c"] = expected["a"] * local_func(b=7, a=1) + expected["d"] = expected["c"] + local_func(b=7, a=1) + answer = df.eval( + """ + c = a * @local_func(b=7, a=1) + d = c + @local_func(b=7, a=1) + """, + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert answer is None + + def test_assignment_in_query(self): + # GH 8664 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df_orig = df.copy() + msg = "cannot assign without a target object" + with pytest.raises(ValueError, match=msg): + df.query("a = 1") + tm.assert_frame_equal(df, df_orig) + + def test_query_inplace(self): + # see gh-11149 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + expected = df.copy() + expected = expected[expected["a"] == 2] + df.query("a == 2", inplace=True) + tm.assert_frame_equal(expected, df) + + df = {} + expected = {"a": 3} + + self.eval("a = 1 + 2", target=df, inplace=True) + tm.assert_dict_equal(df, expected) + + @pytest.mark.parametrize("invalid_target", [1, "cat", [1, 2], np.array([]), (1, 3)]) + @pytest.mark.filterwarnings("ignore::FutureWarning") + def test_cannot_item_assign(self, invalid_target): + msg = "Cannot assign expression output to target" + expression = "a = 1 + 2" + + with pytest.raises(ValueError, match=msg): + self.eval(expression, target=invalid_target, inplace=True) + + if hasattr(invalid_target, "copy"): + with pytest.raises(ValueError, match=msg): + self.eval(expression, target=invalid_target, inplace=False) + + @pytest.mark.parametrize("invalid_target", [1, "cat", (1, 3)]) + def test_cannot_copy_item(self, invalid_target): + msg = "Cannot return a copy of the target" + expression = "a = 1 + 2" + + with pytest.raises(ValueError, match=msg): + self.eval(expression, target=invalid_target, inplace=False) + + @pytest.mark.parametrize("target", [1, "cat", [1, 2], np.array([]), (1, 3), {1: 2}]) + def test_inplace_no_assignment(self, target): + expression = "1 + 2" + + assert self.eval(expression, target=target, inplace=False) == 3 + + msg = "Cannot operate inplace if there is no assignment" + with pytest.raises(ValueError, match=msg): + self.eval(expression, target=target, inplace=True) + + def test_basic_period_index_boolean_expression(self): + df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i") + + e = df < 2 + r = self.eval("df < 2", local_dict={"df": df}) + x = df < 2 + + tm.assert_frame_equal(r, e) + tm.assert_frame_equal(x, e) + + def test_basic_period_index_subscript_expression(self): + df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i") + r = self.eval("df[df < 2 + 3]", local_dict={"df": df}) + e = df[df < 2 + 3] + tm.assert_frame_equal(r, e) + + def test_nested_period_index_subscript_expression(self): + df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i") + r = self.eval("df[df[df < 2] < 2] + df * 2", local_dict={"df": df}) + e = df[df[df < 2] < 2] + df * 2 + tm.assert_frame_equal(r, e) + + def test_date_boolean(self): + df = DataFrame(np.random.randn(5, 3)) + df["dates1"] = date_range("1/1/2012", periods=5) + res = self.eval( + "df.dates1 < 20130101", + local_dict={"df": df}, + engine=self.engine, + parser=self.parser, + ) + expec = df.dates1 < "20130101" + tm.assert_series_equal(res, expec, check_names=False) + + def test_simple_in_ops(self): + if self.parser != "python": + res = pd.eval("1 in [1, 2]", engine=self.engine, parser=self.parser) + assert res + + res = pd.eval("2 in (1, 2)", engine=self.engine, parser=self.parser) + assert res + + res = pd.eval("3 in (1, 2)", engine=self.engine, parser=self.parser) + assert not res + + res = pd.eval("3 not in (1, 2)", engine=self.engine, parser=self.parser) + assert res + + res = pd.eval("[3] not in (1, 2)", engine=self.engine, parser=self.parser) + assert res + + res = pd.eval("[3] in ([3], 2)", engine=self.engine, parser=self.parser) + assert res + + res = pd.eval("[[3]] in [[[3]], 2]", engine=self.engine, parser=self.parser) + assert res + + res = pd.eval("(3,) in [(3,), 2]", engine=self.engine, parser=self.parser) + assert res + + res = pd.eval( + "(3,) not in [(3,), 2]", engine=self.engine, parser=self.parser + ) + assert not res + + res = pd.eval( + "[(3,)] in [[(3,)], 2]", engine=self.engine, parser=self.parser + ) + assert res + else: + msg = "'In' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + pd.eval("1 in [1, 2]", engine=self.engine, parser=self.parser) + with pytest.raises(NotImplementedError, match=msg): + pd.eval("2 in (1, 2)", engine=self.engine, parser=self.parser) + with pytest.raises(NotImplementedError, match=msg): + pd.eval("3 in (1, 2)", engine=self.engine, parser=self.parser) + with pytest.raises(NotImplementedError, match=msg): + pd.eval( + "[(3,)] in (1, 2, [(3,)])", engine=self.engine, parser=self.parser + ) + msg = "'NotIn' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + pd.eval("3 not in (1, 2)", engine=self.engine, parser=self.parser) + with pytest.raises(NotImplementedError, match=msg): + pd.eval( + "[3] not in (1, 2, [[3]])", engine=self.engine, parser=self.parser + ) + + def test_check_many_exprs(self): + a = 1 # noqa:F841 + expr = " * ".join("a" * 33) + expected = 1 + res = pd.eval(expr, engine=self.engine, parser=self.parser) + assert res == expected + + @pytest.mark.parametrize( + "expr", + [ + "df > 2 and df > 3", + "df > 2 or df > 3", + "not df > 2", + ], + ) + def test_fails_and_or_not(self, expr): + df = DataFrame(np.random.randn(5, 3)) + if self.parser == "python": + msg = "'BoolOp' nodes are not implemented" + if "not" in expr: + msg = "'Not' nodes are not implemented" + + with pytest.raises(NotImplementedError, match=msg): + pd.eval( + expr, + local_dict={"df": df}, + parser=self.parser, + engine=self.engine, + ) + else: + # smoke-test, should not raise + pd.eval( + expr, + local_dict={"df": df}, + parser=self.parser, + engine=self.engine, + ) + + @pytest.mark.parametrize("char", ["|", "&"]) + def test_fails_ampersand_pipe(self, char): + df = DataFrame(np.random.randn(5, 3)) # noqa:F841 + ex = f"(df + 2)[df > 1] > 0 {char} (df > 0)" + if self.parser == "python": + msg = "cannot evaluate scalar only bool ops" + with pytest.raises(NotImplementedError, match=msg): + pd.eval(ex, parser=self.parser, engine=self.engine) + else: + # smoke-test, should not raise + pd.eval(ex, parser=self.parser, engine=self.engine) + + +class TestMath: + @pytest.fixture(autouse=True) + def set_engine_parser_attrs(self, engine, parser): + # Older tests look for these as attributes, so we set them here. + self.engine = engine + self.parser = parser + + def eval(self, *args, **kwargs): + kwargs["engine"] = self.engine + kwargs["parser"] = self.parser + kwargs["level"] = kwargs.pop("level", 0) + 1 + return pd.eval(*args, **kwargs) + + def test_unary_functions(self, unary_fns_for_ne): + df = DataFrame({"a": np.random.randn(10)}) + a = df.a + + fn = unary_fns_for_ne + + expr = f"{fn}(a)" + got = self.eval(expr) + with np.errstate(all="ignore"): + expect = getattr(np, fn)(a) + tm.assert_series_equal(got, expect, check_names=False) + + @pytest.mark.parametrize("fn", _binary_math_ops) + def test_binary_functions(self, fn): + df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + a = df.a + b = df.b + + expr = f"{fn}(a, b)" + got = self.eval(expr) + with np.errstate(all="ignore"): + expect = getattr(np, fn)(a, b) + tm.assert_almost_equal(got, expect, check_names=False) + + def test_df_use_case(self): + df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + df.eval( + "e = arctan2(sin(a), b)", + engine=self.engine, + parser=self.parser, + inplace=True, + ) + got = df.e + expect = np.arctan2(np.sin(df.a), df.b) + tm.assert_series_equal(got, expect, check_names=False) + + def test_df_arithmetic_subexpression(self): + df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + df.eval("e = sin(a + b)", engine=self.engine, parser=self.parser, inplace=True) + got = df.e + expect = np.sin(df.a + df.b) + tm.assert_series_equal(got, expect, check_names=False) + + def check_result_type(self, dtype, expect_dtype): + df = DataFrame({"a": np.random.randn(10).astype(dtype)}) + assert df.a.dtype == dtype + df.eval("b = sin(a)", engine=self.engine, parser=self.parser, inplace=True) + got = df.b + expect = np.sin(df.a) + assert expect.dtype == got.dtype + assert expect_dtype == got.dtype + tm.assert_series_equal(got, expect, check_names=False) + + def test_result_types(self): + self.check_result_type(np.int32, np.float64) + self.check_result_type(np.int64, np.float64) + self.check_result_type(np.float32, np.float32) + self.check_result_type(np.float64, np.float64) + + @td.skip_if_windows + def test_result_complex128(self): + # xref https://github.com/pandas-dev/pandas/issues/12293 + # this fails on Windows, apparently a floating point precision issue + + # Did not test complex64 because DataFrame is converting it to + # complex128. Due to https://github.com/pandas-dev/pandas/issues/10952 + self.check_result_type(np.complex128, np.complex128) + + def test_undefined_func(self): + df = DataFrame({"a": np.random.randn(10)}) + msg = '"mysin" is not a supported function' + + with pytest.raises(ValueError, match=msg): + df.eval("mysin(a)", engine=self.engine, parser=self.parser) + + def test_keyword_arg(self): + df = DataFrame({"a": np.random.randn(10)}) + msg = 'Function "sin" does not support keyword arguments' + + with pytest.raises(TypeError, match=msg): + df.eval("sin(x=a)", engine=self.engine, parser=self.parser) + + +_var_s = np.random.randn(10) + + +class TestScope: + def test_global_scope(self, engine, parser): + e = "_var_s * 2" + tm.assert_numpy_array_equal( + _var_s * 2, pd.eval(e, engine=engine, parser=parser) + ) + + def test_no_new_locals(self, engine, parser): + x = 1 + lcls = locals().copy() + pd.eval("x + 1", local_dict=lcls, engine=engine, parser=parser) + lcls2 = locals().copy() + lcls2.pop("lcls") + assert lcls == lcls2 + + def test_no_new_globals(self, engine, parser): + x = 1 # noqa:F841 + gbls = globals().copy() + pd.eval("x + 1", engine=engine, parser=parser) + gbls2 = globals().copy() + assert gbls == gbls2 + + +@td.skip_if_no_ne +def test_invalid_engine(): + msg = "Invalid engine 'asdf' passed" + with pytest.raises(KeyError, match=msg): + pd.eval("x + y", local_dict={"x": 1, "y": 2}, engine="asdf") + + +@td.skip_if_no_ne +@pytest.mark.parametrize( + ("use_numexpr", "expected"), + ( + (True, "numexpr"), + (False, "python"), + ), +) +def test_numexpr_option_respected(use_numexpr, expected): + # GH 32556 + from pandas.core.computation.eval import _check_engine + + with pd.option_context("compute.use_numexpr", use_numexpr): + result = _check_engine(None) + assert result == expected + + +@td.skip_if_no_ne +def test_numexpr_option_incompatible_op(): + # GH 32556 + with pd.option_context("compute.use_numexpr", False): + df = DataFrame( + {"A": [True, False, True, False, None, None], "B": [1, 2, 3, 4, 5, 6]} + ) + result = df.query("A.isnull()") + expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=[4, 5]) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no_ne +def test_invalid_parser(): + msg = "Invalid parser 'asdf' passed" + with pytest.raises(KeyError, match=msg): + pd.eval("x + y", local_dict={"x": 1, "y": 2}, parser="asdf") + + +_parsers: dict[str, type[BaseExprVisitor]] = { + "python": PythonExprVisitor, + "pytables": pytables.PyTablesExprVisitor, + "pandas": PandasExprVisitor, +} + + +@pytest.mark.parametrize("engine", ENGINES) +@pytest.mark.parametrize("parser", _parsers) +def test_disallowed_nodes(engine, parser): + VisitorClass = _parsers[parser] + inst = VisitorClass("x + 1", engine, parser) + + for ops in VisitorClass.unsupported_nodes: + + msg = "nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + getattr(inst, ops)() + + +def test_syntax_error_exprs(engine, parser): + e = "s +" + with pytest.raises(SyntaxError, match="invalid syntax"): + pd.eval(e, engine=engine, parser=parser) + + +def test_name_error_exprs(engine, parser): + e = "s + t" + msg = "name 's' is not defined" + with pytest.raises(NameError, match=msg): + pd.eval(e, engine=engine, parser=parser) + + +@pytest.mark.parametrize("express", ["a + @b", "@a + b", "@a + @b"]) +def test_invalid_local_variable_reference(engine, parser, express): + a, b = 1, 2 # noqa:F841 + + if parser != "pandas": + with pytest.raises(SyntaxError, match="The '@' prefix is only"): + pd.eval(express, engine=engine, parser=parser) + else: + with pytest.raises(SyntaxError, match="The '@' prefix is not"): + pd.eval(express, engine=engine, parser=parser) + + +def test_numexpr_builtin_raises(engine, parser): + sin, dotted_line = 1, 2 + if engine == "numexpr": + msg = "Variables in expression .+" + with pytest.raises(NumExprClobberingError, match=msg): + pd.eval("sin + dotted_line", engine=engine, parser=parser) + else: + res = pd.eval("sin + dotted_line", engine=engine, parser=parser) + assert res == sin + dotted_line + + +def test_bad_resolver_raises(engine, parser): + cannot_resolve = 42, 3.0 + with pytest.raises(TypeError, match="Resolver of type .+"): + pd.eval("1 + 2", resolvers=cannot_resolve, engine=engine, parser=parser) + + +def test_empty_string_raises(engine, parser): + # GH 13139 + with pytest.raises(ValueError, match="expr cannot be an empty string"): + pd.eval("", engine=engine, parser=parser) + + +def test_more_than_one_expression_raises(engine, parser): + with pytest.raises(SyntaxError, match=("only a single expression is allowed")): + pd.eval("1 + 1; 2 + 2", engine=engine, parser=parser) + + +@pytest.mark.parametrize("cmp", ("and", "or")) +@pytest.mark.parametrize("lhs", (int, float)) +@pytest.mark.parametrize("rhs", (int, float)) +def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser): + gen = {int: lambda: np.random.randint(10), float: np.random.randn} + + mid = gen[lhs]() # noqa:F841 + lhs = gen[lhs]() + rhs = gen[rhs]() + + ex1 = f"lhs {cmp} mid {cmp} rhs" + ex2 = f"lhs {cmp} mid and mid {cmp} rhs" + ex3 = f"(lhs {cmp} mid) & (mid {cmp} rhs)" + for ex in (ex1, ex2, ex3): + msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not" + with pytest.raises(NotImplementedError, match=msg): + pd.eval(ex, engine=engine, parser=parser) + + +@pytest.mark.parametrize( + "other", + [ + "'x'", + "...", + ], +) +def test_equals_various(other): + df = DataFrame({"A": ["a", "b", "c"]}) + result = df.eval(f"A == {other}") + expected = Series([False, False, False], name="A") + if USE_NUMEXPR: + # https://github.com/pandas-dev/pandas/issues/10239 + # lose name with numexpr engine. Remove when that's fixed. + expected.name = None + tm.assert_series_equal(result, expected) + + +def test_inf(engine, parser): + s = "inf + 1" + expected = np.inf + result = pd.eval(s, engine=engine, parser=parser) + assert result == expected + + +def test_truediv_deprecated(engine, parser): + # GH#29182 + match = "The `truediv` parameter in pd.eval is deprecated" + + with tm.assert_produces_warning(FutureWarning) as m: + pd.eval("1+1", engine=engine, parser=parser, truediv=True) + + assert len(m) == 1 + assert match in str(m[0].message) + + with tm.assert_produces_warning(FutureWarning) as m: + pd.eval("1+1", engine=engine, parser=parser, truediv=False) + + assert len(m) == 1 + assert match in str(m[0].message) + + +@pytest.mark.parametrize("column", ["Temp(°C)", "Capacitance(μF)"]) +def test_query_token(engine, column): + # See: https://github.com/pandas-dev/pandas/pull/42826 + df = DataFrame(np.random.randn(5, 2), columns=[column, "b"]) + expected = df[df[column] > 5] + query_string = f"`{column}` > 5" + result = df.query(query_string, engine=engine) + tm.assert_frame_equal(result, expected) + + +def test_negate_lt_eq_le(engine, parser): + df = DataFrame([[0, 10], [1, 20]], columns=["cat", "count"]) + expected = df[~(df.cat > 0)] + + result = df.query("~(cat > 0)", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + if parser == "python": + msg = "'Not' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + df.query("not (cat > 0)", engine=engine, parser=parser) + else: + result = df.query("not (cat > 0)", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + +class TestValidate: + @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, value): + + msg = 'For argument "inplace" expected type bool, received type' + with pytest.raises(ValueError, match=msg): + pd.eval("2+2", inplace=value) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/config/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/config/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/config/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..9f79eb9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/config/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/config/__pycache__/test_config.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/config/__pycache__/test_config.cpython-38.pyc new file mode 100644 index 0000000..129afcf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/config/__pycache__/test_config.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/config/__pycache__/test_localization.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/config/__pycache__/test_localization.cpython-38.pyc new file mode 100644 index 0000000..3854d93 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/config/__pycache__/test_localization.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/config/test_config.py b/.local/lib/python3.8/site-packages/pandas/tests/config/test_config.py new file mode 100644 index 0000000..761c853 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/config/test_config.py @@ -0,0 +1,477 @@ +import warnings + +import pytest + +from pandas._config import config as cf +from pandas._config.config import OptionError + +import pandas as pd + + +class TestConfig: + @classmethod + def setup_class(cls): + from copy import deepcopy + + cls.cf = cf + cls.gc = deepcopy(getattr(cls.cf, "_global_config")) + cls.do = deepcopy(getattr(cls.cf, "_deprecated_options")) + cls.ro = deepcopy(getattr(cls.cf, "_registered_options")) + + def setup_method(self, method): + setattr(self.cf, "_global_config", {}) + setattr(self.cf, "options", self.cf.DictWrapper(self.cf._global_config)) + setattr(self.cf, "_deprecated_options", {}) + setattr(self.cf, "_registered_options", {}) + + # Our test fixture in conftest.py sets "chained_assignment" + # to "raise" only after all test methods have been setup. + # However, after this setup, there is no longer any + # "chained_assignment" option, so re-register it. + self.cf.register_option("chained_assignment", "raise") + + def teardown_method(self, method): + setattr(self.cf, "_global_config", self.gc) + setattr(self.cf, "_deprecated_options", self.do) + setattr(self.cf, "_registered_options", self.ro) + + def test_api(self): + + # the pandas object exposes the user API + assert hasattr(pd, "get_option") + assert hasattr(pd, "set_option") + assert hasattr(pd, "reset_option") + assert hasattr(pd, "describe_option") + + def test_is_one_of_factory(self): + v = self.cf.is_one_of_factory([None, 12]) + + v(12) + v(None) + msg = r"Value must be one of None\|12" + with pytest.raises(ValueError, match=msg): + v(1.1) + + def test_register_option(self): + self.cf.register_option("a", 1, "doc") + + # can't register an already registered option + msg = "Option 'a' has already been registered" + with pytest.raises(OptionError, match=msg): + self.cf.register_option("a", 1, "doc") + + # can't register an already registered option + msg = "Path prefix to option 'a' is already an option" + with pytest.raises(OptionError, match=msg): + self.cf.register_option("a.b.c.d1", 1, "doc") + with pytest.raises(OptionError, match=msg): + self.cf.register_option("a.b.c.d2", 1, "doc") + + # no python keywords + msg = "for is a python keyword" + with pytest.raises(ValueError, match=msg): + self.cf.register_option("for", 0) + with pytest.raises(ValueError, match=msg): + self.cf.register_option("a.for.b", 0) + # must be valid identifier (ensure attribute access works) + msg = "oh my goddess! is not a valid identifier" + with pytest.raises(ValueError, match=msg): + self.cf.register_option("Oh my Goddess!", 0) + + # we can register options several levels deep + # without predefining the intermediate steps + # and we can define differently named options + # in the same namespace + self.cf.register_option("k.b.c.d1", 1, "doc") + self.cf.register_option("k.b.c.d2", 1, "doc") + + def test_describe_option(self): + self.cf.register_option("a", 1, "doc") + self.cf.register_option("b", 1, "doc2") + self.cf.deprecate_option("b") + + self.cf.register_option("c.d.e1", 1, "doc3") + self.cf.register_option("c.d.e2", 1, "doc4") + self.cf.register_option("f", 1) + self.cf.register_option("g.h", 1) + self.cf.register_option("k", 2) + self.cf.deprecate_option("g.h", rkey="k") + self.cf.register_option("l", "foo") + + # non-existent keys raise KeyError + msg = r"No such keys\(s\)" + with pytest.raises(OptionError, match=msg): + self.cf.describe_option("no.such.key") + + # we can get the description for any key we registered + assert "doc" in self.cf.describe_option("a", _print_desc=False) + assert "doc2" in self.cf.describe_option("b", _print_desc=False) + assert "precated" in self.cf.describe_option("b", _print_desc=False) + assert "doc3" in self.cf.describe_option("c.d.e1", _print_desc=False) + assert "doc4" in self.cf.describe_option("c.d.e2", _print_desc=False) + + # if no doc is specified we get a default message + # saying "description not available" + assert "available" in self.cf.describe_option("f", _print_desc=False) + assert "available" in self.cf.describe_option("g.h", _print_desc=False) + assert "precated" in self.cf.describe_option("g.h", _print_desc=False) + assert "k" in self.cf.describe_option("g.h", _print_desc=False) + + # default is reported + assert "foo" in self.cf.describe_option("l", _print_desc=False) + # current value is reported + assert "bar" not in self.cf.describe_option("l", _print_desc=False) + self.cf.set_option("l", "bar") + assert "bar" in self.cf.describe_option("l", _print_desc=False) + + def test_case_insensitive(self): + self.cf.register_option("KanBAN", 1, "doc") + + assert "doc" in self.cf.describe_option("kanbaN", _print_desc=False) + assert self.cf.get_option("kanBaN") == 1 + self.cf.set_option("KanBan", 2) + assert self.cf.get_option("kAnBaN") == 2 + + # gets of non-existent keys fail + msg = r"No such keys\(s\): 'no_such_option'" + with pytest.raises(OptionError, match=msg): + self.cf.get_option("no_such_option") + self.cf.deprecate_option("KanBan") + + assert self.cf._is_deprecated("kAnBaN") + + def test_get_option(self): + self.cf.register_option("a", 1, "doc") + self.cf.register_option("b.c", "hullo", "doc2") + self.cf.register_option("b.b", None, "doc2") + + # gets of existing keys succeed + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + assert self.cf.get_option("b.b") is None + + # gets of non-existent keys fail + msg = r"No such keys\(s\): 'no_such_option'" + with pytest.raises(OptionError, match=msg): + self.cf.get_option("no_such_option") + + def test_set_option(self): + self.cf.register_option("a", 1, "doc") + self.cf.register_option("b.c", "hullo", "doc2") + self.cf.register_option("b.b", None, "doc2") + + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + assert self.cf.get_option("b.b") is None + + self.cf.set_option("a", 2) + self.cf.set_option("b.c", "wurld") + self.cf.set_option("b.b", 1.1) + + assert self.cf.get_option("a") == 2 + assert self.cf.get_option("b.c") == "wurld" + assert self.cf.get_option("b.b") == 1.1 + + msg = r"No such keys\(s\): 'no.such.key'" + with pytest.raises(OptionError, match=msg): + self.cf.set_option("no.such.key", None) + + def test_set_option_empty_args(self): + msg = "Must provide an even number of non-keyword arguments" + with pytest.raises(ValueError, match=msg): + self.cf.set_option() + + def test_set_option_uneven_args(self): + msg = "Must provide an even number of non-keyword arguments" + with pytest.raises(ValueError, match=msg): + self.cf.set_option("a.b", 2, "b.c") + + def test_set_option_invalid_single_argument_type(self): + msg = "Must provide an even number of non-keyword arguments" + with pytest.raises(ValueError, match=msg): + self.cf.set_option(2) + + def test_set_option_multiple(self): + self.cf.register_option("a", 1, "doc") + self.cf.register_option("b.c", "hullo", "doc2") + self.cf.register_option("b.b", None, "doc2") + + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + assert self.cf.get_option("b.b") is None + + self.cf.set_option("a", "2", "b.c", None, "b.b", 10.0) + + assert self.cf.get_option("a") == "2" + assert self.cf.get_option("b.c") is None + assert self.cf.get_option("b.b") == 10.0 + + def test_validation(self): + self.cf.register_option("a", 1, "doc", validator=self.cf.is_int) + self.cf.register_option("d", 1, "doc", validator=self.cf.is_nonnegative_int) + self.cf.register_option("b.c", "hullo", "doc2", validator=self.cf.is_text) + + msg = "Value must have type ''" + with pytest.raises(ValueError, match=msg): + self.cf.register_option("a.b.c.d2", "NO", "doc", validator=self.cf.is_int) + + self.cf.set_option("a", 2) # int is_int + self.cf.set_option("b.c", "wurld") # str is_str + self.cf.set_option("d", 2) + self.cf.set_option("d", None) # non-negative int can be None + + # None not is_int + with pytest.raises(ValueError, match=msg): + self.cf.set_option("a", None) + with pytest.raises(ValueError, match=msg): + self.cf.set_option("a", "ab") + + msg = "Value must be a nonnegative integer or None" + with pytest.raises(ValueError, match=msg): + self.cf.register_option( + "a.b.c.d3", "NO", "doc", validator=self.cf.is_nonnegative_int + ) + with pytest.raises(ValueError, match=msg): + self.cf.register_option( + "a.b.c.d3", -2, "doc", validator=self.cf.is_nonnegative_int + ) + + msg = r"Value must be an instance of \|" + with pytest.raises(ValueError, match=msg): + self.cf.set_option("b.c", 1) + + validator = self.cf.is_one_of_factory([None, self.cf.is_callable]) + self.cf.register_option("b", lambda: None, "doc", validator=validator) + self.cf.set_option("b", "%.1f".format) # Formatter is callable + self.cf.set_option("b", None) # Formatter is none (default) + with pytest.raises(ValueError, match="Value must be a callable"): + self.cf.set_option("b", "%.1f") + + def test_reset_option(self): + self.cf.register_option("a", 1, "doc", validator=self.cf.is_int) + self.cf.register_option("b.c", "hullo", "doc2", validator=self.cf.is_str) + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + + self.cf.set_option("a", 2) + self.cf.set_option("b.c", "wurld") + assert self.cf.get_option("a") == 2 + assert self.cf.get_option("b.c") == "wurld" + + self.cf.reset_option("a") + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "wurld" + self.cf.reset_option("b.c") + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + + def test_reset_option_all(self): + self.cf.register_option("a", 1, "doc", validator=self.cf.is_int) + self.cf.register_option("b.c", "hullo", "doc2", validator=self.cf.is_str) + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + + self.cf.set_option("a", 2) + self.cf.set_option("b.c", "wurld") + assert self.cf.get_option("a") == 2 + assert self.cf.get_option("b.c") == "wurld" + + self.cf.reset_option("all") + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + + def test_deprecate_option(self): + # we can deprecate non-existent options + self.cf.deprecate_option("foo") + + assert self.cf._is_deprecated("foo") + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + with pytest.raises(KeyError, match="No such keys.s.: 'foo'"): + self.cf.get_option("foo") + assert len(w) == 1 # should have raised one warning + assert "deprecated" in str(w[-1]) # we get the default message + + self.cf.register_option("a", 1, "doc", validator=self.cf.is_int) + self.cf.register_option("b.c", "hullo", "doc2") + self.cf.register_option("foo", "hullo", "doc2") + + self.cf.deprecate_option("a", removal_ver="nifty_ver") + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + self.cf.get_option("a") + + assert len(w) == 1 # should have raised one warning + assert "eprecated" in str(w[-1]) # we get the default message + assert "nifty_ver" in str(w[-1]) # with the removal_ver quoted + + msg = "Option 'a' has already been defined as deprecated" + with pytest.raises(OptionError, match=msg): + self.cf.deprecate_option("a") + + self.cf.deprecate_option("b.c", "zounds!") + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + self.cf.get_option("b.c") + + assert len(w) == 1 # should have raised one warning + assert "zounds!" in str(w[-1]) # we get the custom message + + # test rerouting keys + self.cf.register_option("d.a", "foo", "doc2") + self.cf.register_option("d.dep", "bar", "doc2") + assert self.cf.get_option("d.a") == "foo" + assert self.cf.get_option("d.dep") == "bar" + + self.cf.deprecate_option("d.dep", rkey="d.a") # reroute d.dep to d.a + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert self.cf.get_option("d.dep") == "foo" + + assert len(w) == 1 # should have raised one warning + assert "eprecated" in str(w[-1]) # we get the custom message + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + self.cf.set_option("d.dep", "baz") # should overwrite "d.a" + + assert len(w) == 1 # should have raised one warning + assert "eprecated" in str(w[-1]) # we get the custom message + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert self.cf.get_option("d.dep") == "baz" + + assert len(w) == 1 # should have raised one warning + assert "eprecated" in str(w[-1]) # we get the custom message + + def test_config_prefix(self): + with self.cf.config_prefix("base"): + self.cf.register_option("a", 1, "doc1") + self.cf.register_option("b", 2, "doc2") + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b") == 2 + + self.cf.set_option("a", 3) + self.cf.set_option("b", 4) + assert self.cf.get_option("a") == 3 + assert self.cf.get_option("b") == 4 + + assert self.cf.get_option("base.a") == 3 + assert self.cf.get_option("base.b") == 4 + assert "doc1" in self.cf.describe_option("base.a", _print_desc=False) + assert "doc2" in self.cf.describe_option("base.b", _print_desc=False) + + self.cf.reset_option("base.a") + self.cf.reset_option("base.b") + + with self.cf.config_prefix("base"): + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b") == 2 + + def test_callback(self): + k = [None] + v = [None] + + def callback(key): + k.append(key) + v.append(self.cf.get_option(key)) + + self.cf.register_option("d.a", "foo", cb=callback) + self.cf.register_option("d.b", "foo", cb=callback) + + del k[-1], v[-1] + self.cf.set_option("d.a", "fooz") + assert k[-1] == "d.a" + assert v[-1] == "fooz" + + del k[-1], v[-1] + self.cf.set_option("d.b", "boo") + assert k[-1] == "d.b" + assert v[-1] == "boo" + + del k[-1], v[-1] + self.cf.reset_option("d.b") + assert k[-1] == "d.b" + + def test_set_ContextManager(self): + def eq(val): + assert self.cf.get_option("a") == val + + self.cf.register_option("a", 0) + eq(0) + with self.cf.option_context("a", 15): + eq(15) + with self.cf.option_context("a", 25): + eq(25) + eq(15) + eq(0) + + self.cf.set_option("a", 17) + eq(17) + + # Test that option_context can be used as a decorator too (#34253). + @self.cf.option_context("a", 123) + def f(): + eq(123) + + f() + + def test_attribute_access(self): + holder = [] + + def f3(key): + holder.append(True) + + self.cf.register_option("a", 0) + self.cf.register_option("c", 0, cb=f3) + options = self.cf.options + + assert options.a == 0 + with self.cf.option_context("a", 15): + assert options.a == 15 + + options.a = 500 + assert self.cf.get_option("a") == 500 + + self.cf.reset_option("a") + assert options.a == self.cf.get_option("a", 0) + + msg = "You can only set the value of existing options" + with pytest.raises(OptionError, match=msg): + options.b = 1 + with pytest.raises(OptionError, match=msg): + options.display = 1 + + # make sure callback kicks when using this form of setting + options.c = 1 + assert len(holder) == 1 + + def test_option_context_scope(self): + # Ensure that creating a context does not affect the existing + # environment as it is supposed to be used with the `with` statement. + # See https://github.com/pandas-dev/pandas/issues/8514 + + original_value = 60 + context_value = 10 + option_name = "a" + + self.cf.register_option(option_name, original_value) + + # Ensure creating contexts didn't affect the current context. + ctx = self.cf.option_context(option_name, context_value) + assert self.cf.get_option(option_name) == original_value + + # Ensure the correct value is available inside the context. + with ctx: + assert self.cf.get_option(option_name) == context_value + + # Ensure the current context is reset + assert self.cf.get_option(option_name) == original_value + + def test_dictwrapper_getattr(self): + options = self.cf.options + # GH 19789 + with pytest.raises(OptionError, match="No such option"): + options.bananas + assert not hasattr(options, "bananas") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/config/test_localization.py b/.local/lib/python3.8/site-packages/pandas/tests/config/test_localization.py new file mode 100644 index 0000000..21b1b7e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/config/test_localization.py @@ -0,0 +1,109 @@ +import codecs +import locale +import os + +import pytest + +from pandas._config.localization import ( + can_set_locale, + get_locales, + set_locale, +) + +from pandas.compat import is_platform_windows + +import pandas as pd + +_all_locales = get_locales() or [] +_current_locale = locale.getlocale() + +# Don't run any of these tests if we are on Windows or have no locales. +pytestmark = pytest.mark.skipif( + is_platform_windows() or not _all_locales, reason="Need non-Windows and locales" +) + +_skip_if_only_one_locale = pytest.mark.skipif( + len(_all_locales) <= 1, reason="Need multiple locales for meaningful test" +) + + +def test_can_set_locale_valid_set(): + # Can set the default locale. + assert can_set_locale("") + + +def test_can_set_locale_invalid_set(): + # Cannot set an invalid locale. + assert not can_set_locale("non-existent_locale") + + +def test_can_set_locale_invalid_get(monkeypatch): + # see GH#22129 + # In some cases, an invalid locale can be set, + # but a subsequent getlocale() raises a ValueError. + + def mock_get_locale(): + raise ValueError() + + with monkeypatch.context() as m: + m.setattr(locale, "getlocale", mock_get_locale) + assert not can_set_locale("") + + +def test_get_locales_at_least_one(): + # see GH#9744 + assert len(_all_locales) > 0 + + +@_skip_if_only_one_locale +def test_get_locales_prefix(): + first_locale = _all_locales[0] + assert len(get_locales(prefix=first_locale[:2])) > 0 + + +@_skip_if_only_one_locale +@pytest.mark.parametrize( + "lang,enc", + [ + ("it_CH", "UTF-8"), + ("en_US", "ascii"), + ("zh_CN", "GB2312"), + ("it_IT", "ISO-8859-1"), + ], +) +def test_set_locale(lang, enc): + if all(x is None for x in _current_locale): + # Not sure why, but on some Travis runs with pytest, + # getlocale() returned (None, None). + pytest.skip("Current locale is not set.") + + enc = codecs.lookup(enc).name + new_locale = lang, enc + + if not can_set_locale(new_locale): + msg = "unsupported locale setting" + + with pytest.raises(locale.Error, match=msg): + with set_locale(new_locale): + pass + else: + with set_locale(new_locale) as normalized_locale: + new_lang, new_enc = normalized_locale.split(".") + new_enc = codecs.lookup(enc).name + + normalized_locale = new_lang, new_enc + assert normalized_locale == new_locale + + # Once we exit the "with" statement, locale should be back to what it was. + current_locale = locale.getlocale() + assert current_locale == _current_locale + + +def test_encoding_detected(): + system_locale = os.environ.get("LC_ALL") + system_encoding = system_locale.split(".")[-1] if system_locale else "utf-8" + + assert ( + codecs.lookup(pd.options.display.encoding).name + == codecs.lookup(system_encoding).name + ) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/construction/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/construction/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/construction/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/construction/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..f40ef6e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/construction/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/construction/__pycache__/test_extract_array.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/construction/__pycache__/test_extract_array.cpython-38.pyc new file mode 100644 index 0000000..b24717e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/construction/__pycache__/test_extract_array.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/construction/test_extract_array.py b/.local/lib/python3.8/site-packages/pandas/tests/construction/test_extract_array.py new file mode 100644 index 0000000..4dd3eda --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/construction/test_extract_array.py @@ -0,0 +1,18 @@ +from pandas import Index +import pandas._testing as tm +from pandas.core.construction import extract_array + + +def test_extract_array_rangeindex(): + ri = Index(range(5)) + + expected = ri._values + res = extract_array(ri, extract_numpy=True, extract_range=True) + tm.assert_numpy_array_equal(res, expected) + res = extract_array(ri, extract_numpy=False, extract_range=True) + tm.assert_numpy_array_equal(res, expected) + + res = extract_array(ri, extract_numpy=True, extract_range=False) + tm.assert_index_equal(res, ri) + res = extract_array(ri, extract_numpy=False, extract_range=False) + tm.assert_index_equal(res, ri) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..36e3afe Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_common.cpython-38.pyc new file mode 100644 index 0000000..c7b6912 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_concat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_concat.cpython-38.pyc new file mode 100644 index 0000000..debd706 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_concat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_dtypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_dtypes.cpython-38.pyc new file mode 100644 index 0000000..fc2f5ed Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_dtypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_generic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_generic.cpython-38.pyc new file mode 100644 index 0000000..ea9d809 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_generic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_inference.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_inference.cpython-38.pyc new file mode 100644 index 0000000..64d1710 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_inference.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_missing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_missing.cpython-38.pyc new file mode 100644 index 0000000..f4db86e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/__pycache__/test_missing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..2a251b2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_can_hold_element.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_can_hold_element.cpython-38.pyc new file mode 100644 index 0000000..d5efc01 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_can_hold_element.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_construct_from_scalar.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_construct_from_scalar.cpython-38.pyc new file mode 100644 index 0000000..2f0a69e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_construct_from_scalar.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_construct_ndarray.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_construct_ndarray.cpython-38.pyc new file mode 100644 index 0000000..d01747a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_construct_ndarray.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_construct_object_arr.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_construct_object_arr.cpython-38.pyc new file mode 100644 index 0000000..e4723c3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_construct_object_arr.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_dict_compat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_dict_compat.cpython-38.pyc new file mode 100644 index 0000000..119107c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_dict_compat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_downcast.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_downcast.cpython-38.pyc new file mode 100644 index 0000000..a81f7a6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_downcast.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_find_common_type.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_find_common_type.cpython-38.pyc new file mode 100644 index 0000000..c73620d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_find_common_type.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_infer_datetimelike.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_infer_datetimelike.cpython-38.pyc new file mode 100644 index 0000000..f453e82 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_infer_datetimelike.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_infer_dtype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_infer_dtype.cpython-38.pyc new file mode 100644 index 0000000..772a6ee Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_infer_dtype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_maybe_box_native.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_maybe_box_native.cpython-38.pyc new file mode 100644 index 0000000..1f6548d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_maybe_box_native.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_promote.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_promote.cpython-38.pyc new file mode 100644 index 0000000..ac8e765 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/__pycache__/test_promote.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_can_hold_element.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_can_hold_element.py new file mode 100644 index 0000000..906123b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_can_hold_element.py @@ -0,0 +1,70 @@ +import numpy as np + +from pandas.core.dtypes.cast import can_hold_element + + +def test_can_hold_element_range(any_int_numpy_dtype): + # GH#44261 + dtype = np.dtype(any_int_numpy_dtype) + arr = np.array([], dtype=dtype) + + rng = range(2, 127) + assert can_hold_element(arr, rng) + + # negatives -> can't be held by uint dtypes + rng = range(-2, 127) + if dtype.kind == "i": + assert can_hold_element(arr, rng) + else: + assert not can_hold_element(arr, rng) + + rng = range(2, 255) + if dtype == "int8": + assert not can_hold_element(arr, rng) + else: + assert can_hold_element(arr, rng) + + rng = range(-255, 65537) + if dtype.kind == "u": + assert not can_hold_element(arr, rng) + elif dtype.itemsize < 4: + assert not can_hold_element(arr, rng) + else: + assert can_hold_element(arr, rng) + + # empty + rng = range(-(10 ** 10), -(10 ** 10)) + assert len(rng) == 0 + # assert can_hold_element(arr, rng) + + rng = range(10 ** 10, 10 ** 10) + assert len(rng) == 0 + assert can_hold_element(arr, rng) + + +def test_can_hold_element_int_values_float_ndarray(): + arr = np.array([], dtype=np.int64) + + element = np.array([1.0, 2.0]) + assert can_hold_element(arr, element) + + assert not can_hold_element(arr, element + 0.5) + + # integer but not losslessly castable to int64 + element = np.array([3, 2 ** 65], dtype=np.float64) + assert not can_hold_element(arr, element) + + +def test_can_hold_element_int8_int(): + arr = np.array([], dtype=np.int8) + + element = 2 + assert can_hold_element(arr, element) + assert can_hold_element(arr, np.int8(element)) + assert can_hold_element(arr, np.uint8(element)) + assert can_hold_element(arr, np.int16(element)) + assert can_hold_element(arr, np.uint16(element)) + assert can_hold_element(arr, np.int32(element)) + assert can_hold_element(arr, np.uint32(element)) + assert can_hold_element(arr, np.int64(element)) + assert can_hold_element(arr, np.uint64(element)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_construct_from_scalar.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_construct_from_scalar.py new file mode 100644 index 0000000..0ce04ce --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_construct_from_scalar.py @@ -0,0 +1,55 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.cast import construct_1d_arraylike_from_scalar +from pandas.core.dtypes.dtypes import CategoricalDtype + +from pandas import ( + Categorical, + Timedelta, +) +import pandas._testing as tm + + +def test_cast_1d_array_like_from_scalar_categorical(): + # see gh-19565 + # + # Categorical result from scalar did not maintain + # categories and ordering of the passed dtype. + cats = ["a", "b", "c"] + cat_type = CategoricalDtype(categories=cats, ordered=False) + expected = Categorical(["a", "a"], categories=cats) + + result = construct_1d_arraylike_from_scalar("a", len(expected), cat_type) + tm.assert_categorical_equal(result, expected) + + +def test_cast_1d_array_like_from_timestamp(fixed_now_ts): + # check we dont lose nanoseconds + ts = fixed_now_ts + Timedelta(1) + res = construct_1d_arraylike_from_scalar(ts, 2, np.dtype("M8[ns]")) + assert res[0] == ts + + +def test_cast_1d_array_like_from_timedelta(): + # check we dont lose nanoseconds + td = Timedelta(1) + res = construct_1d_arraylike_from_scalar(td, 2, np.dtype("m8[ns]")) + assert res[0] == td + + +def test_cast_1d_array_like_mismatched_datetimelike(): + td = np.timedelta64("NaT", "ns") + dt = np.datetime64("NaT", "ns") + + with pytest.raises(TypeError, match="Cannot cast"): + construct_1d_arraylike_from_scalar(td, 2, dt.dtype) + + with pytest.raises(TypeError, match="Cannot cast"): + construct_1d_arraylike_from_scalar(np.timedelta64(4, "ns"), 2, dt.dtype) + + with pytest.raises(TypeError, match="Cannot cast"): + construct_1d_arraylike_from_scalar(dt, 2, td.dtype) + + with pytest.raises(TypeError, match="Cannot cast"): + construct_1d_arraylike_from_scalar(np.datetime64(4, "ns"), 2, td.dtype) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_construct_ndarray.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_construct_ndarray.py new file mode 100644 index 0000000..10085dd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_construct_ndarray.py @@ -0,0 +1,30 @@ +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.construction import sanitize_array + + +@pytest.mark.parametrize( + "values, dtype, expected", + [ + ([1, 2, 3], None, np.array([1, 2, 3], dtype=np.int64)), + (np.array([1, 2, 3]), None, np.array([1, 2, 3])), + (["1", "2", None], None, np.array(["1", "2", None])), + (["1", "2", None], np.dtype("str"), np.array(["1", "2", None])), + ([1, 2, None], np.dtype("str"), np.array(["1", "2", None])), + ], +) +def test_construct_1d_ndarray_preserving_na(values, dtype, expected): + result = sanitize_array(values, index=None, dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]"]) +def test_construct_1d_ndarray_preserving_na_datetimelike(dtype): + arr = np.arange(5, dtype=np.int64).view(dtype) + expected = np.array(list(arr), dtype=object) + assert all(isinstance(x, type(arr[0])) for x in expected) + + result = sanitize_array(arr, index=None, dtype=np.dtype(object)) + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_construct_object_arr.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_construct_object_arr.py new file mode 100644 index 0000000..cb44f91 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_construct_object_arr.py @@ -0,0 +1,20 @@ +import pytest + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + + +@pytest.mark.parametrize("datum1", [1, 2.0, "3", (4, 5), [6, 7], None]) +@pytest.mark.parametrize("datum2", [8, 9.0, "10", (11, 12), [13, 14], None]) +def test_cast_1d_array(datum1, datum2): + data = [datum1, datum2] + result = construct_1d_object_array_from_listlike(data) + + # Direct comparison fails: https://github.com/numpy/numpy/issues/10218 + assert result.dtype == "object" + assert list(result) == data + + +@pytest.mark.parametrize("val", [1, 2.0, None]) +def test_cast_1d_array_invalid_scalar(val): + with pytest.raises(TypeError, match="has no len()"): + construct_1d_object_array_from_listlike(val) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_dict_compat.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_dict_compat.py new file mode 100644 index 0000000..13dc82d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_dict_compat.py @@ -0,0 +1,14 @@ +import numpy as np + +from pandas.core.dtypes.cast import dict_compat + +from pandas import Timestamp + + +def test_dict_compat(): + data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2} + data_unchanged = {1: 2, 3: 4, 5: 6} + expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2} + assert dict_compat(data_datetime64) == expected + assert dict_compat(expected) == expected + assert dict_compat(data_unchanged) == data_unchanged diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_downcast.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_downcast.py new file mode 100644 index 0000000..b1efc1e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_downcast.py @@ -0,0 +1,87 @@ +import decimal + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import maybe_downcast_to_dtype + +from pandas import Series +import pandas._testing as tm + + +@pytest.mark.parametrize( + "arr,dtype,expected", + [ + ( + np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]), + "infer", + np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]), + ), + ( + np.array([8.0, 8.0, 8.0, 8.0, 8.9999999999995]), + "infer", + np.array([8, 8, 8, 8, 9], dtype=np.int64), + ), + ( + np.array([8.0, 8.0, 8.0, 8.0, 9.0000000000005]), + "infer", + np.array([8, 8, 8, 8, 9], dtype=np.int64), + ), + ( + # This is a judgement call, but we do _not_ downcast Decimal + # objects + np.array([decimal.Decimal(0.0)]), + "int64", + np.array([decimal.Decimal(0.0)]), + ), + ], +) +def test_downcast(arr, expected, dtype): + result = maybe_downcast_to_dtype(arr, dtype) + tm.assert_numpy_array_equal(result, expected) + + +def test_downcast_booleans(): + # see gh-16875: coercing of booleans. + ser = Series([True, True, False]) + result = maybe_downcast_to_dtype(ser, np.dtype(np.float64)) + + expected = ser + tm.assert_series_equal(result, expected) + + +def test_downcast_conversion_no_nan(any_real_numpy_dtype): + dtype = any_real_numpy_dtype + expected = np.array([1, 2]) + arr = np.array([1.0, 2.0], dtype=dtype) + + result = maybe_downcast_to_dtype(arr, "infer") + tm.assert_almost_equal(result, expected, check_dtype=False) + + +def test_downcast_conversion_nan(float_numpy_dtype): + dtype = float_numpy_dtype + data = [1.0, 2.0, np.nan] + + expected = np.array(data, dtype=dtype) + arr = np.array(data, dtype=dtype) + + result = maybe_downcast_to_dtype(arr, "infer") + tm.assert_almost_equal(result, expected) + + +def test_downcast_conversion_empty(any_real_numpy_dtype): + dtype = any_real_numpy_dtype + arr = np.array([], dtype=dtype) + result = maybe_downcast_to_dtype(arr, np.dtype("int64")) + tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64)) + + +@pytest.mark.parametrize("klass", [np.datetime64, np.timedelta64]) +def test_datetime_likes_nan(klass): + dtype = klass.__name__ + "[ns]" + arr = np.array([1, 2, np.nan]) + + exp = np.array([1, 2, klass("NaT")], dtype) + res = maybe_downcast_to_dtype(arr, dtype) + tm.assert_numpy_array_equal(res, exp) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_find_common_type.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_find_common_type.py new file mode 100644 index 0000000..8484b55 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_find_common_type.py @@ -0,0 +1,173 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.cast import find_common_type +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) + +from pandas import ( + Categorical, + Index, +) + + +@pytest.mark.parametrize( + "source_dtypes,expected_common_dtype", + [ + ((np.int64,), np.int64), + ((np.uint64,), np.uint64), + ((np.float32,), np.float32), + ((object,), object), + # Into ints. + ((np.int16, np.int64), np.int64), + ((np.int32, np.uint32), np.int64), + ((np.uint16, np.uint64), np.uint64), + # Into floats. + ((np.float16, np.float32), np.float32), + ((np.float16, np.int16), np.float32), + ((np.float32, np.int16), np.float32), + ((np.uint64, np.int64), np.float64), + ((np.int16, np.float64), np.float64), + ((np.float16, np.int64), np.float64), + # Into others. + ((np.complex128, np.int32), np.complex128), + ((object, np.float32), object), + ((object, np.int16), object), + # Bool with int. + ((np.dtype("bool"), np.int64), object), + ((np.dtype("bool"), np.int32), object), + ((np.dtype("bool"), np.int16), object), + ((np.dtype("bool"), np.int8), object), + ((np.dtype("bool"), np.uint64), object), + ((np.dtype("bool"), np.uint32), object), + ((np.dtype("bool"), np.uint16), object), + ((np.dtype("bool"), np.uint8), object), + # Bool with float. + ((np.dtype("bool"), np.float64), object), + ((np.dtype("bool"), np.float32), object), + ( + (np.dtype("datetime64[ns]"), np.dtype("datetime64[ns]")), + np.dtype("datetime64[ns]"), + ), + ( + (np.dtype("timedelta64[ns]"), np.dtype("timedelta64[ns]")), + np.dtype("timedelta64[ns]"), + ), + ( + (np.dtype("datetime64[ns]"), np.dtype("datetime64[ms]")), + np.dtype("datetime64[ns]"), + ), + ( + (np.dtype("timedelta64[ms]"), np.dtype("timedelta64[ns]")), + np.dtype("timedelta64[ns]"), + ), + ((np.dtype("datetime64[ns]"), np.dtype("timedelta64[ns]")), object), + ((np.dtype("datetime64[ns]"), np.int64), object), + ], +) +def test_numpy_dtypes(source_dtypes, expected_common_dtype): + assert find_common_type(source_dtypes) == expected_common_dtype + + +def test_raises_empty_input(): + with pytest.raises(ValueError, match="no types given"): + find_common_type([]) + + +@pytest.mark.parametrize( + "dtypes,exp_type", + [ + ([CategoricalDtype()], "category"), + ([object, CategoricalDtype()], object), + ([CategoricalDtype(), CategoricalDtype()], "category"), + ], +) +def test_categorical_dtype(dtypes, exp_type): + assert find_common_type(dtypes) == exp_type + + +def test_datetimetz_dtype_match(): + dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern") + assert find_common_type([dtype, dtype]) == "datetime64[ns, US/Eastern]" + + +@pytest.mark.parametrize( + "dtype2", + [ + DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"), + np.dtype("datetime64[ns]"), + object, + np.int64, + ], +) +def test_datetimetz_dtype_mismatch(dtype2): + dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern") + assert find_common_type([dtype, dtype2]) == object + assert find_common_type([dtype2, dtype]) == object + + +def test_period_dtype_match(): + dtype = PeriodDtype(freq="D") + assert find_common_type([dtype, dtype]) == "period[D]" + + +@pytest.mark.parametrize( + "dtype2", + [ + DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"), + PeriodDtype(freq="2D"), + PeriodDtype(freq="H"), + np.dtype("datetime64[ns]"), + object, + np.int64, + ], +) +def test_period_dtype_mismatch(dtype2): + dtype = PeriodDtype(freq="D") + assert find_common_type([dtype, dtype2]) == object + assert find_common_type([dtype2, dtype]) == object + + +interval_dtypes = [ + IntervalDtype(np.int64, "right"), + IntervalDtype(np.float64, "right"), + IntervalDtype(np.uint64, "right"), + IntervalDtype(DatetimeTZDtype(unit="ns", tz="US/Eastern"), "right"), + IntervalDtype("M8[ns]", "right"), + IntervalDtype("m8[ns]", "right"), +] + + +@pytest.mark.parametrize("left", interval_dtypes) +@pytest.mark.parametrize("right", interval_dtypes) +def test_interval_dtype(left, right): + result = find_common_type([left, right]) + + if left is right: + assert result is left + + elif left.subtype.kind in ["i", "u", "f"]: + # i.e. numeric + if right.subtype.kind in ["i", "u", "f"]: + # both numeric -> common numeric subtype + expected = IntervalDtype(np.float64, "right") + assert result == expected + else: + assert result == object + + else: + assert result == object + + +@pytest.mark.parametrize("dtype", interval_dtypes) +def test_interval_dtype_with_categorical(dtype): + obj = Index([], dtype=dtype) + + cat = Categorical([], categories=obj) + + result = find_common_type([dtype, cat.dtype]) + assert result == dtype diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_infer_datetimelike.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_infer_datetimelike.py new file mode 100644 index 0000000..3c3844e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_infer_datetimelike.py @@ -0,0 +1,28 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + NaT, + Series, + Timestamp, +) + + +@pytest.mark.parametrize( + "data,exp_size", + [ + # see gh-16362. + ([[NaT, "a", "b", 0], [NaT, "b", "c", 1]], 8), + ([[NaT, "a", 0], [NaT, "b", 1]], 6), + ], +) +def test_maybe_infer_to_datetimelike_df_construct(data, exp_size): + result = DataFrame(np.array(data)) + assert result.size == exp_size + + +def test_maybe_infer_to_datetimelike_ser_construct(): + # see gh-19671. + result = Series(["M1701", Timestamp("20130101")]) + assert result.dtype.kind == "O" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_infer_dtype.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_infer_dtype.py new file mode 100644 index 0000000..902130b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -0,0 +1,208 @@ +from datetime import ( + date, + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import ( + infer_dtype_from, + infer_dtype_from_array, + infer_dtype_from_scalar, +) +from pandas.core.dtypes.common import is_dtype_equal + +from pandas import ( + Categorical, + Interval, + Period, + Series, + Timedelta, + Timestamp, + date_range, +) + + +@pytest.fixture(params=[True, False]) +def pandas_dtype(request): + return request.param + + +def test_infer_dtype_from_int_scalar(any_int_numpy_dtype): + # Test that infer_dtype_from_scalar is + # returning correct dtype for int and float. + data = np.dtype(any_int_numpy_dtype).type(12) + dtype, val = infer_dtype_from_scalar(data) + assert dtype == type(data) + + +def test_infer_dtype_from_float_scalar(float_numpy_dtype): + float_numpy_dtype = np.dtype(float_numpy_dtype).type + data = float_numpy_dtype(12) + + dtype, val = infer_dtype_from_scalar(data) + assert dtype == float_numpy_dtype + + +@pytest.mark.parametrize( + "data,exp_dtype", [(12, np.int64), (np.float_(12), np.float64)] +) +def test_infer_dtype_from_python_scalar(data, exp_dtype): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == exp_dtype + + +@pytest.mark.parametrize("bool_val", [True, False]) +def test_infer_dtype_from_boolean(bool_val): + dtype, val = infer_dtype_from_scalar(bool_val) + assert dtype == np.bool_ + + +def test_infer_dtype_from_complex(complex_dtype): + data = np.dtype(complex_dtype).type(1) + dtype, val = infer_dtype_from_scalar(data) + assert dtype == np.complex_ + + +@pytest.mark.parametrize( + "data", [np.datetime64(1, "ns"), Timestamp(1), datetime(2000, 1, 1, 0, 0)] +) +def test_infer_dtype_from_datetime(data): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == "M8[ns]" + + +@pytest.mark.parametrize("data", [np.timedelta64(1, "ns"), Timedelta(1), timedelta(1)]) +def test_infer_dtype_from_timedelta(data): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == "m8[ns]" + + +@pytest.mark.parametrize("freq", ["M", "D"]) +def test_infer_dtype_from_period(freq, pandas_dtype): + p = Period("2011-01-01", freq=freq) + dtype, val = infer_dtype_from_scalar(p, pandas_dtype=pandas_dtype) + + if pandas_dtype: + exp_dtype = f"period[{freq}]" + else: + exp_dtype = np.object_ + + assert dtype == exp_dtype + assert val == p + + +@pytest.mark.parametrize( + "data", [date(2000, 1, 1), "foo", Timestamp(1, tz="US/Eastern")] +) +def test_infer_dtype_misc(data): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == np.object_ + + +@pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo"]) +def test_infer_from_scalar_tz(tz, pandas_dtype): + dt = Timestamp(1, tz=tz) + dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=pandas_dtype) + + if pandas_dtype: + exp_dtype = f"datetime64[ns, {tz}]" + else: + exp_dtype = np.object_ + + assert dtype == exp_dtype + assert val == dt + + +@pytest.mark.parametrize( + "left, right, subtype", + [ + (0, 1, "int64"), + (0.0, 1.0, "float64"), + (Timestamp(0), Timestamp(1), "datetime64[ns]"), + (Timestamp(0, tz="UTC"), Timestamp(1, tz="UTC"), "datetime64[ns, UTC]"), + (Timedelta(0), Timedelta(1), "timedelta64[ns]"), + ], +) +def test_infer_from_interval(left, right, subtype, closed, pandas_dtype): + # GH 30337 + interval = Interval(left, right, closed) + result_dtype, result_value = infer_dtype_from_scalar(interval, pandas_dtype) + expected_dtype = f"interval[{subtype}, {closed}]" if pandas_dtype else np.object_ + assert result_dtype == expected_dtype + assert result_value == interval + + +def test_infer_dtype_from_scalar_errors(): + msg = "invalid ndarray passed to infer_dtype_from_scalar" + + with pytest.raises(ValueError, match=msg): + infer_dtype_from_scalar(np.array([1])) + + +@pytest.mark.parametrize( + "value, expected, pandas_dtype", + [ + ("foo", np.object_, False), + (b"foo", np.object_, False), + (1, np.int64, False), + (1.5, np.float_, False), + (np.datetime64("2016-01-01"), np.dtype("M8[ns]"), False), + (Timestamp("20160101"), np.dtype("M8[ns]"), False), + (Timestamp("20160101", tz="UTC"), np.object_, False), + (Timestamp("20160101", tz="UTC"), "datetime64[ns, UTC]", True), + ], +) +def test_infer_dtype_from_scalar(value, expected, pandas_dtype): + dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=pandas_dtype) + assert is_dtype_equal(dtype, expected) + + with pytest.raises(TypeError, match="must be list-like"): + infer_dtype_from_array(value, pandas_dtype=pandas_dtype) + + +@pytest.mark.parametrize( + "arr, expected, pandas_dtype", + [ + ([1], np.int_, False), + (np.array([1], dtype=np.int64), np.int64, False), + ([np.nan, 1, ""], np.object_, False), + (np.array([[1.0, 2.0]]), np.float_, False), + (Categorical(list("aabc")), np.object_, False), + (Categorical([1, 2, 3]), np.int64, False), + (Categorical(list("aabc")), "category", True), + (Categorical([1, 2, 3]), "category", True), + (date_range("20160101", periods=3), np.dtype("=M8[ns]"), False), + ( + date_range("20160101", periods=3, tz="US/Eastern"), + "datetime64[ns, US/Eastern]", + True, + ), + (Series([1.0, 2, 3]), np.float64, False), + (Series(list("abc")), np.object_, False), + ( + Series(date_range("20160101", periods=3, tz="US/Eastern")), + "datetime64[ns, US/Eastern]", + True, + ), + ], +) +def test_infer_dtype_from_array(arr, expected, pandas_dtype): + dtype, _ = infer_dtype_from_array(arr, pandas_dtype=pandas_dtype) + assert is_dtype_equal(dtype, expected) + + +@pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64]) +def test_infer_dtype_from_scalar_zerodim_datetimelike(cls): + # ndarray.item() can incorrectly return int instead of td64/dt64 + val = cls(1234, "ns") + arr = np.array(val) + + dtype, res = infer_dtype_from_scalar(arr) + assert dtype.type is cls + assert isinstance(res, cls) + + dtype, res = infer_dtype_from(arr) + assert dtype.type is cls diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_maybe_box_native.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_maybe_box_native.py new file mode 100644 index 0000000..3f62f31 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_maybe_box_native.py @@ -0,0 +1,40 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import maybe_box_native + +from pandas import ( + Interval, + Period, + Timedelta, + Timestamp, +) + + +@pytest.mark.parametrize( + "obj,expected_dtype", + [ + (b"\x00\x10", bytes), + (int(4), int), + (np.uint(4), int), + (np.int32(-4), int), + (np.uint8(4), int), + (float(454.98), float), + (np.float16(0.4), float), + (np.float64(1.4), float), + (np.bool_(False), bool), + (datetime(2005, 2, 25), datetime), + (np.datetime64("2005-02-25"), Timestamp), + (Timestamp("2005-02-25"), Timestamp), + (np.timedelta64(1, "D"), Timedelta), + (Timedelta(1, "D"), Timedelta), + (Interval(0, 1), Interval), + (Period("4Q2005"), Period), + ], +) +def test_maybe_box_native(obj, expected_dtype): + boxed_obj = maybe_box_native(obj) + result_dtype = type(boxed_obj) + assert result_dtype is expected_dtype diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_promote.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_promote.py new file mode 100644 index 0000000..a514a9c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/cast/test_promote.py @@ -0,0 +1,586 @@ +""" +These test the method maybe_promote from core/dtypes/cast.py +""" + +import datetime +from decimal import Decimal + +import numpy as np +import pytest + +from pandas._libs.tslibs import NaT + +from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.common import ( + is_complex_dtype, + is_datetime64_dtype, + is_datetime_or_timedelta_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + is_scalar, + is_timedelta64_dtype, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import isna + +import pandas as pd +import pandas._testing as tm + + +@pytest.fixture( + params=[ + bool, + "uint8", + "int32", + "uint64", + "float32", + "float64", + "complex64", + "complex128", + "M8[ns]", + "m8[ns]", + str, + bytes, + object, + ] +) +def any_numpy_dtype_reduced(request): + """ + Parameterized fixture for numpy dtypes, reduced from any_numpy_dtype. + + * bool + * 'int32' + * 'uint64' + * 'float32' + * 'float64' + * 'complex64' + * 'complex128' + * 'M8[ns]' + * 'M8[ns]' + * str + * bytes + * object + """ + return request.param + + +def _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar=None): + """ + Auxiliary function to unify testing of scalar/array promotion. + + Parameters + ---------- + dtype : dtype + The value to pass on as the first argument to maybe_promote. + fill_value : scalar + The value to pass on as the second argument to maybe_promote as + a scalar. + expected_dtype : dtype + The expected dtype returned by maybe_promote (by design this is the + same regardless of whether fill_value was passed as a scalar or in an + array!). + exp_val_for_scalar : scalar + The expected value for the (potentially upcast) fill_value returned by + maybe_promote. + """ + assert is_scalar(fill_value) + + # here, we pass on fill_value as a scalar directly; the expected value + # returned from maybe_promote is fill_value, potentially upcast to the + # returned dtype. + result_dtype, result_fill_value = maybe_promote(dtype, fill_value) + expected_fill_value = exp_val_for_scalar + + assert result_dtype == expected_dtype + _assert_match(result_fill_value, expected_fill_value) + + +def _assert_match(result_fill_value, expected_fill_value): + # GH#23982/25425 require the same type in addition to equality/NA-ness + res_type = type(result_fill_value) + ex_type = type(expected_fill_value) + + if hasattr(result_fill_value, "dtype"): + # Compare types in a way that is robust to platform-specific + # idiosyncrasies where e.g. sometimes we get "ulonglong" as an alias + # for "uint64" or "intc" as an alias for "int32" + assert result_fill_value.dtype.kind == expected_fill_value.dtype.kind + assert result_fill_value.dtype.itemsize == expected_fill_value.dtype.itemsize + else: + # On some builds, type comparison fails, e.g. np.int32 != np.int32 + assert res_type == ex_type or res_type.__name__ == ex_type.__name__ + + match_value = result_fill_value == expected_fill_value + if match_value is pd.NA: + match_value = False + + # Note: type check above ensures that we have the _same_ NA value + # for missing values, None == None (which is checked + # through match_value above), but np.nan != np.nan and pd.NaT != pd.NaT + match_missing = isna(result_fill_value) and isna(expected_fill_value) + + assert match_value or match_missing + + +@pytest.mark.parametrize( + "dtype, fill_value, expected_dtype", + [ + # size 8 + ("int8", 1, "int8"), + ("int8", np.iinfo("int8").max + 1, "int16"), + ("int8", np.iinfo("int16").max + 1, "int32"), + ("int8", np.iinfo("int32").max + 1, "int64"), + ("int8", np.iinfo("int64").max + 1, "object"), + ("int8", -1, "int8"), + ("int8", np.iinfo("int8").min - 1, "int16"), + ("int8", np.iinfo("int16").min - 1, "int32"), + ("int8", np.iinfo("int32").min - 1, "int64"), + ("int8", np.iinfo("int64").min - 1, "object"), + # keep signed-ness as long as possible + ("uint8", 1, "uint8"), + ("uint8", np.iinfo("int8").max + 1, "uint8"), + ("uint8", np.iinfo("uint8").max + 1, "uint16"), + ("uint8", np.iinfo("int16").max + 1, "uint16"), + ("uint8", np.iinfo("uint16").max + 1, "uint32"), + ("uint8", np.iinfo("int32").max + 1, "uint32"), + ("uint8", np.iinfo("uint32").max + 1, "uint64"), + ("uint8", np.iinfo("int64").max + 1, "uint64"), + ("uint8", np.iinfo("uint64").max + 1, "object"), + # max of uint8 cannot be contained in int8 + ("uint8", -1, "int16"), + ("uint8", np.iinfo("int8").min - 1, "int16"), + ("uint8", np.iinfo("int16").min - 1, "int32"), + ("uint8", np.iinfo("int32").min - 1, "int64"), + ("uint8", np.iinfo("int64").min - 1, "object"), + # size 16 + ("int16", 1, "int16"), + ("int16", np.iinfo("int8").max + 1, "int16"), + ("int16", np.iinfo("int16").max + 1, "int32"), + ("int16", np.iinfo("int32").max + 1, "int64"), + ("int16", np.iinfo("int64").max + 1, "object"), + ("int16", -1, "int16"), + ("int16", np.iinfo("int8").min - 1, "int16"), + ("int16", np.iinfo("int16").min - 1, "int32"), + ("int16", np.iinfo("int32").min - 1, "int64"), + ("int16", np.iinfo("int64").min - 1, "object"), + ("uint16", 1, "uint16"), + ("uint16", np.iinfo("int8").max + 1, "uint16"), + ("uint16", np.iinfo("uint8").max + 1, "uint16"), + ("uint16", np.iinfo("int16").max + 1, "uint16"), + ("uint16", np.iinfo("uint16").max + 1, "uint32"), + ("uint16", np.iinfo("int32").max + 1, "uint32"), + ("uint16", np.iinfo("uint32").max + 1, "uint64"), + ("uint16", np.iinfo("int64").max + 1, "uint64"), + ("uint16", np.iinfo("uint64").max + 1, "object"), + ("uint16", -1, "int32"), + ("uint16", np.iinfo("int8").min - 1, "int32"), + ("uint16", np.iinfo("int16").min - 1, "int32"), + ("uint16", np.iinfo("int32").min - 1, "int64"), + ("uint16", np.iinfo("int64").min - 1, "object"), + # size 32 + ("int32", 1, "int32"), + ("int32", np.iinfo("int8").max + 1, "int32"), + ("int32", np.iinfo("int16").max + 1, "int32"), + ("int32", np.iinfo("int32").max + 1, "int64"), + ("int32", np.iinfo("int64").max + 1, "object"), + ("int32", -1, "int32"), + ("int32", np.iinfo("int8").min - 1, "int32"), + ("int32", np.iinfo("int16").min - 1, "int32"), + ("int32", np.iinfo("int32").min - 1, "int64"), + ("int32", np.iinfo("int64").min - 1, "object"), + ("uint32", 1, "uint32"), + ("uint32", np.iinfo("int8").max + 1, "uint32"), + ("uint32", np.iinfo("uint8").max + 1, "uint32"), + ("uint32", np.iinfo("int16").max + 1, "uint32"), + ("uint32", np.iinfo("uint16").max + 1, "uint32"), + ("uint32", np.iinfo("int32").max + 1, "uint32"), + ("uint32", np.iinfo("uint32").max + 1, "uint64"), + ("uint32", np.iinfo("int64").max + 1, "uint64"), + ("uint32", np.iinfo("uint64").max + 1, "object"), + ("uint32", -1, "int64"), + ("uint32", np.iinfo("int8").min - 1, "int64"), + ("uint32", np.iinfo("int16").min - 1, "int64"), + ("uint32", np.iinfo("int32").min - 1, "int64"), + ("uint32", np.iinfo("int64").min - 1, "object"), + # size 64 + ("int64", 1, "int64"), + ("int64", np.iinfo("int8").max + 1, "int64"), + ("int64", np.iinfo("int16").max + 1, "int64"), + ("int64", np.iinfo("int32").max + 1, "int64"), + ("int64", np.iinfo("int64").max + 1, "object"), + ("int64", -1, "int64"), + ("int64", np.iinfo("int8").min - 1, "int64"), + ("int64", np.iinfo("int16").min - 1, "int64"), + ("int64", np.iinfo("int32").min - 1, "int64"), + ("int64", np.iinfo("int64").min - 1, "object"), + ("uint64", 1, "uint64"), + ("uint64", np.iinfo("int8").max + 1, "uint64"), + ("uint64", np.iinfo("uint8").max + 1, "uint64"), + ("uint64", np.iinfo("int16").max + 1, "uint64"), + ("uint64", np.iinfo("uint16").max + 1, "uint64"), + ("uint64", np.iinfo("int32").max + 1, "uint64"), + ("uint64", np.iinfo("uint32").max + 1, "uint64"), + ("uint64", np.iinfo("int64").max + 1, "uint64"), + ("uint64", np.iinfo("uint64").max + 1, "object"), + ("uint64", -1, "object"), + ("uint64", np.iinfo("int8").min - 1, "object"), + ("uint64", np.iinfo("int16").min - 1, "object"), + ("uint64", np.iinfo("int32").min - 1, "object"), + ("uint64", np.iinfo("int64").min - 1, "object"), + ], +) +def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype): + dtype = np.dtype(dtype) + expected_dtype = np.dtype(expected_dtype) + + # output is not a generic int, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_int_with_float(any_int_numpy_dtype, float_numpy_dtype): + dtype = np.dtype(any_int_numpy_dtype) + fill_dtype = np.dtype(float_numpy_dtype) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling int with float always upcasts to float64 + expected_dtype = np.float64 + # fill_value can be different float type + exp_val_for_scalar = np.float64(fill_value) + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_float_with_int(float_numpy_dtype, any_int_numpy_dtype): + + dtype = np.dtype(float_numpy_dtype) + fill_dtype = np.dtype(any_int_numpy_dtype) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling float with int always keeps float dtype + # because: np.finfo('float32').max > np.iinfo('uint64').max + expected_dtype = dtype + # output is not a generic float, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize( + "dtype, fill_value, expected_dtype", + [ + # float filled with float + ("float32", 1, "float32"), + ("float32", np.finfo("float32").max * 1.1, "float64"), + ("float64", 1, "float64"), + ("float64", np.finfo("float32").max * 1.1, "float64"), + # complex filled with float + ("complex64", 1, "complex64"), + ("complex64", np.finfo("float32").max * 1.1, "complex128"), + ("complex128", 1, "complex128"), + ("complex128", np.finfo("float32").max * 1.1, "complex128"), + # float filled with complex + ("float32", 1 + 1j, "complex64"), + ("float32", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ("float64", 1 + 1j, "complex128"), + ("float64", np.finfo("float32").max * (1.1 + 1j), "complex128"), + # complex filled with complex + ("complex64", 1 + 1j, "complex64"), + ("complex64", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ("complex128", 1 + 1j, "complex128"), + ("complex128", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ], +) +def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype): + + dtype = np.dtype(dtype) + expected_dtype = np.dtype(expected_dtype) + + # output is not a generic float, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_bool_with_any(any_numpy_dtype_reduced): + dtype = np.dtype(bool) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling bool with anything but bool casts to object + expected_dtype = np.dtype(object) if fill_dtype != bool else fill_dtype + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_any_with_bool(any_numpy_dtype_reduced): + dtype = np.dtype(any_numpy_dtype_reduced) + fill_value = True + + # filling anything but bool with bool casts to object + expected_dtype = np.dtype(object) if dtype != bool else dtype + # output is not a generic bool, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(bytes_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # we never use bytes dtype internally, always promote to object + expected_dtype = np.dtype(np.object_) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_any_with_bytes(any_numpy_dtype_reduced, bytes_dtype): + dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype + fill_value = b"abc" + + # we never use bytes dtype internally, always promote to object + expected_dtype = np.dtype(np.object_) + # output is not a generic bytes, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_datetime64_with_any(datetime64_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(datetime64_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling datetime with anything but datetime casts to object + if is_datetime64_dtype(fill_dtype): + expected_dtype = dtype + # for datetime dtypes, scalar values get cast to to_datetime64 + exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64() + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize( + "fill_value", + [ + pd.Timestamp("now"), + np.datetime64("now"), + datetime.datetime.now(), + datetime.date.today(), + ], + ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"], +) +def test_maybe_promote_any_with_datetime64( + any_numpy_dtype_reduced, datetime64_dtype, fill_value +): + dtype = np.dtype(any_numpy_dtype_reduced) + + # filling datetime with anything but datetime casts to object + if is_datetime64_dtype(dtype): + expected_dtype = dtype + # for datetime dtypes, scalar values get cast to pd.Timestamp.value + exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64() + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + warn = None + msg = "Using a `date` object for fill_value" + if type(fill_value) is datetime.date and dtype.kind == "M": + # Casting date to dt64 is deprecated + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=msg): + # stacklevel is chosen to make sense when called from higher-level functions + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize( + "fill_value", + [ + pd.Timestamp("now"), + np.datetime64("now"), + datetime.datetime.now(), + datetime.date.today(), + ], + ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"], +) +def test_maybe_promote_any_numpy_dtype_with_datetimetz( + any_numpy_dtype_reduced, tz_aware_fixture, fill_value +): + dtype = np.dtype(any_numpy_dtype_reduced) + fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture) + + fill_value = pd.Series([fill_value], dtype=fill_dtype)[0] + + # filling any numpy dtype with datetimetz casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(timedelta64_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling timedelta with anything but timedelta casts to object + if is_timedelta64_dtype(fill_dtype): + expected_dtype = dtype + # for timedelta dtypes, scalar values get cast to pd.Timedelta.value + exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64() + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize( + "fill_value", + [pd.Timedelta(days=1), np.timedelta64(24, "h"), datetime.timedelta(1)], + ids=["pd.Timedelta", "np.timedelta64", "datetime.timedelta"], +) +def test_maybe_promote_any_with_timedelta64( + any_numpy_dtype_reduced, timedelta64_dtype, fill_value +): + dtype = np.dtype(any_numpy_dtype_reduced) + + # filling anything but timedelta with timedelta casts to object + if is_timedelta64_dtype(dtype): + expected_dtype = dtype + # for timedelta dtypes, scalar values get cast to pd.Timedelta.value + exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64() + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(string_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling string with anything casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_any_with_string(any_numpy_dtype_reduced, string_dtype): + dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype + fill_value = "abc" + + # filling anything with a string casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_object_with_any(object_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(object_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling object with anything stays object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_any_with_object(any_numpy_dtype_reduced, object_dtype): + dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of object dtype from a scalar value (i.e. passing + # dtypes.common.is_scalar), which can however not be cast to int/float etc. + fill_value = pd.DateOffset(1) + + # filling object with anything stays object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype_reduced, nulls_fixture): + fill_value = nulls_fixture + dtype = np.dtype(any_numpy_dtype_reduced) + + if isinstance(fill_value, Decimal): + # Subject to change, but ATM (When Decimal(NAN) is being added to nulls_fixture) + # this is the existing behavior in maybe_promote, + # hinges on is_valid_na_for_dtype + if dtype.kind in ["i", "u", "f", "c"]: + if dtype.kind in ["i", "u"]: + expected_dtype = np.dtype(np.float64) + else: + expected_dtype = dtype + exp_val_for_scalar = np.nan + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + elif is_integer_dtype(dtype) and fill_value is not NaT: + # integer + other missing value (np.nan / None) casts to float + expected_dtype = np.float64 + exp_val_for_scalar = np.nan + elif is_object_dtype(dtype) and fill_value is NaT: + # inserting into object does not cast the value + # but *does* cast None to np.nan + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + elif is_datetime_or_timedelta_dtype(dtype): + # datetime / timedelta cast all missing values to dtyped-NaT + expected_dtype = dtype + exp_val_for_scalar = dtype.type("NaT", "ns") + elif fill_value is NaT: + # NaT upcasts everything that's not datetime/timedelta to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = NaT + elif is_float_dtype(dtype) or is_complex_dtype(dtype): + # float / complex + missing value (!= NaT) stays the same + expected_dtype = dtype + exp_val_for_scalar = np.nan + else: + # all other cases cast to object, and use np.nan as missing value + expected_dtype = np.dtype(object) + if fill_value is pd.NA: + exp_val_for_scalar = pd.NA + else: + exp_val_for_scalar = np.nan + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/test_common.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/test_common.py new file mode 100644 index 0000000..b59ee47 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/test_common.py @@ -0,0 +1,800 @@ +from __future__ import annotations + +from datetime import datetime + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.cast import astype_nansafe +import pandas.core.dtypes.common as com +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + CategoricalDtypeType, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) +from pandas.core.dtypes.missing import isna + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import pandas_dtype +from pandas.arrays import SparseArray + + +# EA & Actual Dtypes +def to_ea_dtypes(dtypes): + """convert list of string dtypes to EA dtype""" + return [getattr(pd, dt + "Dtype") for dt in dtypes] + + +def to_numpy_dtypes(dtypes): + """convert list of string dtypes to numpy dtype""" + return [getattr(np, dt) for dt in dtypes if isinstance(dt, str)] + + +class TestPandasDtype: + + # Passing invalid dtype, both as a string or object, must raise TypeError + # Per issue GH15520 + @pytest.mark.parametrize("box", [pd.Timestamp, "pd.Timestamp", list]) + def test_invalid_dtype_error(self, box): + with pytest.raises(TypeError, match="not understood"): + com.pandas_dtype(box) + + @pytest.mark.parametrize( + "dtype", + [ + object, + "float64", + np.object_, + np.dtype("object"), + "O", + np.float64, + float, + np.dtype("float64"), + ], + ) + def test_pandas_dtype_valid(self, dtype): + assert com.pandas_dtype(dtype) == dtype + + @pytest.mark.parametrize( + "dtype", ["M8[ns]", "m8[ns]", "object", "float64", "int64"] + ) + def test_numpy_dtype(self, dtype): + assert com.pandas_dtype(dtype) == np.dtype(dtype) + + def test_numpy_string_dtype(self): + # do not parse freq-like string as period dtype + assert com.pandas_dtype("U") == np.dtype("U") + assert com.pandas_dtype("S") == np.dtype("S") + + @pytest.mark.parametrize( + "dtype", + [ + "datetime64[ns, US/Eastern]", + "datetime64[ns, Asia/Tokyo]", + "datetime64[ns, UTC]", + # GH#33885 check that the M8 alias is understood + "M8[ns, US/Eastern]", + "M8[ns, Asia/Tokyo]", + "M8[ns, UTC]", + ], + ) + def test_datetimetz_dtype(self, dtype): + assert com.pandas_dtype(dtype) == DatetimeTZDtype.construct_from_string(dtype) + assert com.pandas_dtype(dtype) == dtype + + def test_categorical_dtype(self): + assert com.pandas_dtype("category") == CategoricalDtype() + + @pytest.mark.parametrize( + "dtype", + [ + "period[D]", + "period[3M]", + "period[U]", + "Period[D]", + "Period[3M]", + "Period[U]", + ], + ) + def test_period_dtype(self, dtype): + assert com.pandas_dtype(dtype) is PeriodDtype(dtype) + assert com.pandas_dtype(dtype) == PeriodDtype(dtype) + assert com.pandas_dtype(dtype) == dtype + + +dtypes = { + "datetime_tz": com.pandas_dtype("datetime64[ns, US/Eastern]"), + "datetime": com.pandas_dtype("datetime64[ns]"), + "timedelta": com.pandas_dtype("timedelta64[ns]"), + "period": PeriodDtype("D"), + "integer": np.dtype(np.int64), + "float": np.dtype(np.float64), + "object": np.dtype(object), + "category": com.pandas_dtype("category"), + "string": pd.StringDtype(), +} + + +@pytest.mark.parametrize("name1,dtype1", list(dtypes.items()), ids=lambda x: str(x)) +@pytest.mark.parametrize("name2,dtype2", list(dtypes.items()), ids=lambda x: str(x)) +def test_dtype_equal(name1, dtype1, name2, dtype2): + + # match equal to self, but not equal to other + assert com.is_dtype_equal(dtype1, dtype1) + if name1 != name2: + assert not com.is_dtype_equal(dtype1, dtype2) + + +@pytest.mark.parametrize("name,dtype", list(dtypes.items()), ids=lambda x: str(x)) +def test_pyarrow_string_import_error(name, dtype): + # GH-44276 + assert not com.is_dtype_equal(dtype, "string[pyarrow]") + + +@pytest.mark.parametrize( + "dtype1,dtype2", + [ + (np.int8, np.int64), + (np.int16, np.int64), + (np.int32, np.int64), + (np.float32, np.float64), + (PeriodDtype("D"), PeriodDtype("2D")), # PeriodType + ( + com.pandas_dtype("datetime64[ns, US/Eastern]"), + com.pandas_dtype("datetime64[ns, CET]"), + ), # Datetime + (None, None), # gh-15941: no exception should be raised. + ], +) +def test_dtype_equal_strict(dtype1, dtype2): + assert not com.is_dtype_equal(dtype1, dtype2) + + +def get_is_dtype_funcs(): + """ + Get all functions in pandas.core.dtypes.common that + begin with 'is_' and end with 'dtype' + + """ + fnames = [f for f in dir(com) if (f.startswith("is_") and f.endswith("dtype"))] + fnames.remove("is_string_or_object_np_dtype") # fastpath requires np.dtype obj + return [getattr(com, fname) for fname in fnames] + + +@pytest.mark.parametrize("func", get_is_dtype_funcs(), ids=lambda x: x.__name__) +def test_get_dtype_error_catch(func): + # see gh-15941 + # + # No exception should be raised. + + assert not func(None) + + +def test_is_object(): + assert com.is_object_dtype(object) + assert com.is_object_dtype(np.array([], dtype=object)) + + assert not com.is_object_dtype(int) + assert not com.is_object_dtype(np.array([], dtype=int)) + assert not com.is_object_dtype([1, 2, 3]) + + +@pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] +) +def test_is_sparse(check_scipy): + assert com.is_sparse(SparseArray([1, 2, 3])) + + assert not com.is_sparse(np.array([1, 2, 3])) + + if check_scipy: + import scipy.sparse + + assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3])) + + +@td.skip_if_no_scipy +def test_is_scipy_sparse(): + from scipy.sparse import bsr_matrix + + assert com.is_scipy_sparse(bsr_matrix([1, 2, 3])) + + assert not com.is_scipy_sparse(SparseArray([1, 2, 3])) + + +def test_is_categorical(): + cat = pd.Categorical([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + assert com.is_categorical(cat) + assert com.is_categorical(pd.Series(cat)) + assert com.is_categorical(pd.CategoricalIndex([1, 2, 3])) + + assert not com.is_categorical([1, 2, 3]) + + +def test_is_categorical_deprecation(): + # GH#33385 + with tm.assert_produces_warning(FutureWarning): + com.is_categorical([1, 2, 3]) + + +def test_is_datetime64_dtype(): + assert not com.is_datetime64_dtype(object) + assert not com.is_datetime64_dtype([1, 2, 3]) + assert not com.is_datetime64_dtype(np.array([], dtype=int)) + + assert com.is_datetime64_dtype(np.datetime64) + assert com.is_datetime64_dtype(np.array([], dtype=np.datetime64)) + + +def test_is_datetime64tz_dtype(): + assert not com.is_datetime64tz_dtype(object) + assert not com.is_datetime64tz_dtype([1, 2, 3]) + assert not com.is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) + assert com.is_datetime64tz_dtype(pd.DatetimeIndex(["2000"], tz="US/Eastern")) + + +def test_is_timedelta64_dtype(): + assert not com.is_timedelta64_dtype(object) + assert not com.is_timedelta64_dtype(None) + assert not com.is_timedelta64_dtype([1, 2, 3]) + assert not com.is_timedelta64_dtype(np.array([], dtype=np.datetime64)) + assert not com.is_timedelta64_dtype("0 days") + assert not com.is_timedelta64_dtype("0 days 00:00:00") + assert not com.is_timedelta64_dtype(["0 days 00:00:00"]) + assert not com.is_timedelta64_dtype("NO DATE") + + assert com.is_timedelta64_dtype(np.timedelta64) + assert com.is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + assert com.is_timedelta64_dtype(pd.to_timedelta(["0 days", "1 days"])) + + +def test_is_period_dtype(): + assert not com.is_period_dtype(object) + assert not com.is_period_dtype([1, 2, 3]) + assert not com.is_period_dtype(pd.Period("2017-01-01")) + + assert com.is_period_dtype(PeriodDtype(freq="D")) + assert com.is_period_dtype(pd.PeriodIndex([], freq="A")) + + +def test_is_interval_dtype(): + assert not com.is_interval_dtype(object) + assert not com.is_interval_dtype([1, 2, 3]) + + assert com.is_interval_dtype(IntervalDtype()) + + interval = pd.Interval(1, 2, closed="right") + assert not com.is_interval_dtype(interval) + assert com.is_interval_dtype(pd.IntervalIndex([interval])) + + +def test_is_categorical_dtype(): + assert not com.is_categorical_dtype(object) + assert not com.is_categorical_dtype([1, 2, 3]) + + assert com.is_categorical_dtype(CategoricalDtype()) + assert com.is_categorical_dtype(pd.Categorical([1, 2, 3])) + assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) + + +def test_is_string_dtype(): + assert not com.is_string_dtype(int) + assert not com.is_string_dtype(pd.Series([1, 2])) + + assert com.is_string_dtype(str) + assert com.is_string_dtype(object) + assert com.is_string_dtype(np.array(["a", "b"])) + assert com.is_string_dtype(pd.StringDtype()) + + +def test_is_string_dtype_nullable(nullable_string_dtype): + assert com.is_string_dtype(pd.array(["a", "b"], dtype=nullable_string_dtype)) + + +integer_dtypes: list = [] + + +@pytest.mark.parametrize( + "dtype", + integer_dtypes + + [pd.Series([1, 2])] + + tm.ALL_INT_NUMPY_DTYPES + + to_numpy_dtypes(tm.ALL_INT_NUMPY_DTYPES) + + tm.ALL_INT_EA_DTYPES + + to_ea_dtypes(tm.ALL_INT_EA_DTYPES), +) +def test_is_integer_dtype(dtype): + assert com.is_integer_dtype(dtype) + + +@pytest.mark.parametrize( + "dtype", + [ + str, + float, + np.datetime64, + np.timedelta64, + pd.Index([1, 2.0]), + np.array(["a", "b"]), + np.array([], dtype=np.timedelta64), + ], +) +def test_is_not_integer_dtype(dtype): + assert not com.is_integer_dtype(dtype) + + +signed_integer_dtypes: list = [] + + +@pytest.mark.parametrize( + "dtype", + signed_integer_dtypes + + [pd.Series([1, 2])] + + tm.SIGNED_INT_NUMPY_DTYPES + + to_numpy_dtypes(tm.SIGNED_INT_NUMPY_DTYPES) + + tm.SIGNED_INT_EA_DTYPES + + to_ea_dtypes(tm.SIGNED_INT_EA_DTYPES), +) +def test_is_signed_integer_dtype(dtype): + assert com.is_integer_dtype(dtype) + + +@pytest.mark.parametrize( + "dtype", + [ + str, + float, + np.datetime64, + np.timedelta64, + pd.Index([1, 2.0]), + np.array(["a", "b"]), + np.array([], dtype=np.timedelta64), + ] + + tm.UNSIGNED_INT_NUMPY_DTYPES + + to_numpy_dtypes(tm.UNSIGNED_INT_NUMPY_DTYPES) + + tm.UNSIGNED_INT_EA_DTYPES + + to_ea_dtypes(tm.UNSIGNED_INT_EA_DTYPES), +) +def test_is_not_signed_integer_dtype(dtype): + assert not com.is_signed_integer_dtype(dtype) + + +unsigned_integer_dtypes: list = [] + + +@pytest.mark.parametrize( + "dtype", + unsigned_integer_dtypes + + [pd.Series([1, 2], dtype=np.uint32)] + + tm.UNSIGNED_INT_NUMPY_DTYPES + + to_numpy_dtypes(tm.UNSIGNED_INT_NUMPY_DTYPES) + + tm.UNSIGNED_INT_EA_DTYPES + + to_ea_dtypes(tm.UNSIGNED_INT_EA_DTYPES), +) +def test_is_unsigned_integer_dtype(dtype): + assert com.is_unsigned_integer_dtype(dtype) + + +@pytest.mark.parametrize( + "dtype", + [ + str, + float, + np.datetime64, + np.timedelta64, + pd.Index([1, 2.0]), + np.array(["a", "b"]), + np.array([], dtype=np.timedelta64), + ] + + tm.SIGNED_INT_NUMPY_DTYPES + + to_numpy_dtypes(tm.SIGNED_INT_NUMPY_DTYPES) + + tm.SIGNED_INT_EA_DTYPES + + to_ea_dtypes(tm.SIGNED_INT_EA_DTYPES), +) +def test_is_not_unsigned_integer_dtype(dtype): + assert not com.is_unsigned_integer_dtype(dtype) + + +@pytest.mark.parametrize( + "dtype", [np.int64, np.array([1, 2], dtype=np.int64), "Int64", pd.Int64Dtype] +) +def test_is_int64_dtype(dtype): + assert com.is_int64_dtype(dtype) + + +def test_type_comparison_with_numeric_ea_dtype(any_numeric_ea_dtype): + # GH#43038 + assert pandas_dtype(any_numeric_ea_dtype) == any_numeric_ea_dtype + + +def test_type_comparison_with_real_numpy_dtype(any_real_numpy_dtype): + # GH#43038 + assert pandas_dtype(any_real_numpy_dtype) == any_real_numpy_dtype + + +def test_type_comparison_with_signed_int_ea_dtype_and_signed_int_numpy_dtype( + any_signed_int_ea_dtype, any_signed_int_numpy_dtype +): + # GH#43038 + assert not pandas_dtype(any_signed_int_ea_dtype) == any_signed_int_numpy_dtype + + +@pytest.mark.parametrize( + "dtype", + [ + str, + float, + np.int32, + np.uint64, + pd.Index([1, 2.0]), + np.array(["a", "b"]), + np.array([1, 2], dtype=np.uint32), + "int8", + "Int8", + pd.Int8Dtype, + ], +) +def test_is_not_int64_dtype(dtype): + assert not com.is_int64_dtype(dtype) + + +def test_is_datetime64_any_dtype(): + assert not com.is_datetime64_any_dtype(int) + assert not com.is_datetime64_any_dtype(str) + assert not com.is_datetime64_any_dtype(np.array([1, 2])) + assert not com.is_datetime64_any_dtype(np.array(["a", "b"])) + + assert com.is_datetime64_any_dtype(np.datetime64) + assert com.is_datetime64_any_dtype(np.array([], dtype=np.datetime64)) + assert com.is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) + assert com.is_datetime64_any_dtype( + pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]") + ) + + +def test_is_datetime64_ns_dtype(): + assert not com.is_datetime64_ns_dtype(int) + assert not com.is_datetime64_ns_dtype(str) + assert not com.is_datetime64_ns_dtype(np.datetime64) + assert not com.is_datetime64_ns_dtype(np.array([1, 2])) + assert not com.is_datetime64_ns_dtype(np.array(["a", "b"])) + assert not com.is_datetime64_ns_dtype(np.array([], dtype=np.datetime64)) + + # This datetime array has the wrong unit (ps instead of ns) + assert not com.is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) + + assert com.is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) + assert com.is_datetime64_ns_dtype( + pd.DatetimeIndex([1, 2, 3], dtype=np.dtype("datetime64[ns]")) + ) + + +def test_is_timedelta64_ns_dtype(): + assert not com.is_timedelta64_ns_dtype(np.dtype("m8[ps]")) + assert not com.is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) + + assert com.is_timedelta64_ns_dtype(np.dtype("m8[ns]")) + assert com.is_timedelta64_ns_dtype(np.array([1, 2], dtype="m8[ns]")) + + +def test_is_datetime_or_timedelta_dtype(): + assert not com.is_datetime_or_timedelta_dtype(int) + assert not com.is_datetime_or_timedelta_dtype(str) + assert not com.is_datetime_or_timedelta_dtype(pd.Series([1, 2])) + assert not com.is_datetime_or_timedelta_dtype(np.array(["a", "b"])) + + # TODO(jreback), this is slightly suspect + assert not com.is_datetime_or_timedelta_dtype(DatetimeTZDtype("ns", "US/Eastern")) + + assert com.is_datetime_or_timedelta_dtype(np.datetime64) + assert com.is_datetime_or_timedelta_dtype(np.timedelta64) + assert com.is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64)) + assert com.is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) + + +def test_is_numeric_v_string_like(): + assert not com.is_numeric_v_string_like(np.array([1]), 1) + assert not com.is_numeric_v_string_like(np.array([1]), np.array([2])) + assert not com.is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + + assert com.is_numeric_v_string_like(np.array([1]), "foo") + assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + + +def test_is_datetimelike_v_numeric(): + dt = np.datetime64(datetime(2017, 1, 1)) + + assert not com.is_datetimelike_v_numeric(1, 1) + assert not com.is_datetimelike_v_numeric(dt, dt) + assert not com.is_datetimelike_v_numeric(np.array([1]), np.array([2])) + assert not com.is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + + assert com.is_datetimelike_v_numeric(1, dt) + assert com.is_datetimelike_v_numeric(1, dt) + assert com.is_datetimelike_v_numeric(np.array([dt]), 1) + assert com.is_datetimelike_v_numeric(np.array([1]), dt) + assert com.is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + + +def test_needs_i8_conversion(): + assert not com.needs_i8_conversion(str) + assert not com.needs_i8_conversion(np.int64) + assert not com.needs_i8_conversion(pd.Series([1, 2])) + assert not com.needs_i8_conversion(np.array(["a", "b"])) + + assert com.needs_i8_conversion(np.datetime64) + assert com.needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) + assert com.needs_i8_conversion(pd.DatetimeIndex(["2000"], tz="US/Eastern")) + + +def test_is_numeric_dtype(): + assert not com.is_numeric_dtype(str) + assert not com.is_numeric_dtype(np.datetime64) + assert not com.is_numeric_dtype(np.timedelta64) + assert not com.is_numeric_dtype(np.array(["a", "b"])) + assert not com.is_numeric_dtype(np.array([], dtype=np.timedelta64)) + + assert com.is_numeric_dtype(int) + assert com.is_numeric_dtype(float) + assert com.is_numeric_dtype(np.uint64) + assert com.is_numeric_dtype(pd.Series([1, 2])) + assert com.is_numeric_dtype(pd.Index([1, 2.0])) + + +def test_is_float_dtype(): + assert not com.is_float_dtype(str) + assert not com.is_float_dtype(int) + assert not com.is_float_dtype(pd.Series([1, 2])) + assert not com.is_float_dtype(np.array(["a", "b"])) + + assert com.is_float_dtype(float) + assert com.is_float_dtype(pd.Index([1, 2.0])) + + +def test_is_bool_dtype(): + assert not com.is_bool_dtype(int) + assert not com.is_bool_dtype(str) + assert not com.is_bool_dtype(pd.Series([1, 2])) + assert not com.is_bool_dtype(pd.Series(["a", "b"], dtype="category")) + assert not com.is_bool_dtype(np.array(["a", "b"])) + assert not com.is_bool_dtype(pd.Index(["a", "b"])) + assert not com.is_bool_dtype("Int64") + + assert com.is_bool_dtype(bool) + assert com.is_bool_dtype(np.bool_) + assert com.is_bool_dtype(pd.Series([True, False], dtype="category")) + assert com.is_bool_dtype(np.array([True, False])) + assert com.is_bool_dtype(pd.Index([True, False])) + + assert com.is_bool_dtype(pd.BooleanDtype()) + assert com.is_bool_dtype(pd.array([True, False, None], dtype="boolean")) + assert com.is_bool_dtype("boolean") + + +def test_is_bool_dtype_numpy_error(): + # GH39010 + assert not com.is_bool_dtype("0 - Name") + + +@pytest.mark.filterwarnings("ignore:'is_extension_type' is deprecated:FutureWarning") +@pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] +) +def test_is_extension_type(check_scipy): + assert not com.is_extension_type([1, 2, 3]) + assert not com.is_extension_type(np.array([1, 2, 3])) + assert not com.is_extension_type(pd.DatetimeIndex([1, 2, 3])) + + cat = pd.Categorical([1, 2, 3]) + assert com.is_extension_type(cat) + assert com.is_extension_type(pd.Series(cat)) + assert com.is_extension_type(SparseArray([1, 2, 3])) + assert com.is_extension_type(pd.DatetimeIndex(["2000"], tz="US/Eastern")) + + dtype = DatetimeTZDtype("ns", tz="US/Eastern") + s = pd.Series([], dtype=dtype) + assert com.is_extension_type(s) + + if check_scipy: + import scipy.sparse + + assert not com.is_extension_type(scipy.sparse.bsr_matrix([1, 2, 3])) + + +def test_is_extension_type_deprecation(): + with tm.assert_produces_warning(FutureWarning): + com.is_extension_type([1, 2, 3]) + + +@pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] +) +def test_is_extension_array_dtype(check_scipy): + assert not com.is_extension_array_dtype([1, 2, 3]) + assert not com.is_extension_array_dtype(np.array([1, 2, 3])) + assert not com.is_extension_array_dtype(pd.DatetimeIndex([1, 2, 3])) + + cat = pd.Categorical([1, 2, 3]) + assert com.is_extension_array_dtype(cat) + assert com.is_extension_array_dtype(pd.Series(cat)) + assert com.is_extension_array_dtype(SparseArray([1, 2, 3])) + assert com.is_extension_array_dtype(pd.DatetimeIndex(["2000"], tz="US/Eastern")) + + dtype = DatetimeTZDtype("ns", tz="US/Eastern") + s = pd.Series([], dtype=dtype) + assert com.is_extension_array_dtype(s) + + if check_scipy: + import scipy.sparse + + assert not com.is_extension_array_dtype(scipy.sparse.bsr_matrix([1, 2, 3])) + + +def test_is_complex_dtype(): + assert not com.is_complex_dtype(int) + assert not com.is_complex_dtype(str) + assert not com.is_complex_dtype(pd.Series([1, 2])) + assert not com.is_complex_dtype(np.array(["a", "b"])) + + assert com.is_complex_dtype(np.complex_) + assert com.is_complex_dtype(complex) + assert com.is_complex_dtype(np.array([1 + 1j, 5])) + + +@pytest.mark.parametrize( + "input_param,result", + [ + (int, np.dtype(int)), + ("int32", np.dtype("int32")), + (float, np.dtype(float)), + ("float64", np.dtype("float64")), + (np.dtype("float64"), np.dtype("float64")), + (str, np.dtype(str)), + (pd.Series([1, 2], dtype=np.dtype("int16")), np.dtype("int16")), + (pd.Series(["a", "b"]), np.dtype(object)), + (pd.Index([1, 2]), np.dtype("int64")), + (pd.Index(["a", "b"]), np.dtype(object)), + ("category", "category"), + (pd.Categorical(["a", "b"]).dtype, CategoricalDtype(["a", "b"])), + (pd.Categorical(["a", "b"]), CategoricalDtype(["a", "b"])), + (pd.CategoricalIndex(["a", "b"]).dtype, CategoricalDtype(["a", "b"])), + (pd.CategoricalIndex(["a", "b"]), CategoricalDtype(["a", "b"])), + (CategoricalDtype(), CategoricalDtype()), + (pd.DatetimeIndex([1, 2]), np.dtype("=M8[ns]")), + (pd.DatetimeIndex([1, 2]).dtype, np.dtype("=M8[ns]")), + (" df.two.sum() + + with catch_warnings(record=True) as w: + # successfully modify column in place + # this should not raise a warning + df.one += 1 + assert len(w) == 0 + assert df.one.iloc[0] == 2 + + with catch_warnings(record=True) as w: + # successfully add an attribute to a series + # this should not raise a warning + df.two.not_an_index = [1, 2] + assert len(w) == 0 + + with tm.assert_produces_warning(UserWarning): + # warn when setting column to nonexistent name + df.four = df.two + 2 + assert df.four.sum() > df.two.sum() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/test_inference.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/test_inference.py new file mode 100644 index 0000000..7953d65 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/test_inference.py @@ -0,0 +1,1954 @@ +""" +These the test the public routines exposed in types/common.py +related to inference and not otherwise tested in types/test_common.py + +""" +import collections +from collections import namedtuple +from datetime import ( + date, + datetime, + time, + timedelta, +) +from decimal import Decimal +from fractions import Fraction +from io import StringIO +import itertools +from numbers import Number +import re + +import numpy as np +import pytest +import pytz + +from pandas._libs import ( + lib, + missing as libmissing, + ops as libops, +) +import pandas.util._test_decorators as td + +from pandas.core.dtypes import inference +from pandas.core.dtypes.common import ( + ensure_int32, + is_bool, + is_complex, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64tz_dtype, + is_float, + is_integer, + is_number, + is_scalar, + is_scipy_sparse, + is_timedelta64_dtype, + is_timedelta64_ns_dtype, +) + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DateOffset, + DatetimeIndex, + Index, + Interval, + Period, + PeriodIndex, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, +) +import pandas._testing as tm +from pandas.core.arrays import ( + BooleanArray, + FloatingArray, + IntegerArray, +) + + +@pytest.fixture(params=[True, False], ids=str) +def coerce(request): + return request.param + + +class MockNumpyLikeArray: + """ + A class which is numpy-like (e.g. Pint's Quantity) but not actually numpy + + The key is that it is not actually a numpy array so + ``util.is_array(mock_numpy_like_array_instance)`` returns ``False``. Other + important properties are that the class defines a :meth:`__iter__` method + (so that ``isinstance(abc.Iterable)`` returns ``True``) and has a + :meth:`ndim` property, as pandas special-cases 0-dimensional arrays in some + cases. + + We expect pandas to behave with respect to such duck arrays exactly as + with real numpy arrays. In particular, a 0-dimensional duck array is *NOT* + a scalar (`is_scalar(np.array(1)) == False`), but it is not list-like either. + """ + + def __init__(self, values): + self._values = values + + def __iter__(self): + iter_values = iter(self._values) + + def it_outer(): + yield from iter_values + + return it_outer() + + def __len__(self): + return len(self._values) + + def __array__(self, t=None): + return np.asarray(self._values, dtype=t) + + @property + def ndim(self): + return self._values.ndim + + @property + def dtype(self): + return self._values.dtype + + @property + def size(self): + return self._values.size + + @property + def shape(self): + return self._values.shape + + +# collect all objects to be tested for list-like-ness; use tuples of objects, +# whether they are list-like or not (special casing for sets), and their ID +ll_params = [ + ([1], True, "list"), + ([], True, "list-empty"), + ((1,), True, "tuple"), + ((), True, "tuple-empty"), + ({"a": 1}, True, "dict"), + ({}, True, "dict-empty"), + ({"a", 1}, "set", "set"), + (set(), "set", "set-empty"), + (frozenset({"a", 1}), "set", "frozenset"), + (frozenset(), "set", "frozenset-empty"), + (iter([1, 2]), True, "iterator"), + (iter([]), True, "iterator-empty"), + ((x for x in [1, 2]), True, "generator"), + ((_ for _ in []), True, "generator-empty"), + (Series([1]), True, "Series"), + (Series([], dtype=object), True, "Series-empty"), + (Series(["a"]).str, True, "StringMethods"), + (Series([], dtype="O").str, True, "StringMethods-empty"), + (Index([1]), True, "Index"), + (Index([]), True, "Index-empty"), + (DataFrame([[1]]), True, "DataFrame"), + (DataFrame(), True, "DataFrame-empty"), + (np.ndarray((2,) * 1), True, "ndarray-1d"), + (np.array([]), True, "ndarray-1d-empty"), + (np.ndarray((2,) * 2), True, "ndarray-2d"), + (np.array([[]]), True, "ndarray-2d-empty"), + (np.ndarray((2,) * 3), True, "ndarray-3d"), + (np.array([[[]]]), True, "ndarray-3d-empty"), + (np.ndarray((2,) * 4), True, "ndarray-4d"), + (np.array([[[[]]]]), True, "ndarray-4d-empty"), + (np.array(2), False, "ndarray-0d"), + (MockNumpyLikeArray(np.ndarray((2,) * 1)), True, "duck-ndarray-1d"), + (MockNumpyLikeArray(np.array([])), True, "duck-ndarray-1d-empty"), + (MockNumpyLikeArray(np.ndarray((2,) * 2)), True, "duck-ndarray-2d"), + (MockNumpyLikeArray(np.array([[]])), True, "duck-ndarray-2d-empty"), + (MockNumpyLikeArray(np.ndarray((2,) * 3)), True, "duck-ndarray-3d"), + (MockNumpyLikeArray(np.array([[[]]])), True, "duck-ndarray-3d-empty"), + (MockNumpyLikeArray(np.ndarray((2,) * 4)), True, "duck-ndarray-4d"), + (MockNumpyLikeArray(np.array([[[[]]]])), True, "duck-ndarray-4d-empty"), + (MockNumpyLikeArray(np.array(2)), False, "duck-ndarray-0d"), + (1, False, "int"), + (b"123", False, "bytes"), + (b"", False, "bytes-empty"), + ("123", False, "string"), + ("", False, "string-empty"), + (str, False, "string-type"), + (object(), False, "object"), + (np.nan, False, "NaN"), + (None, False, "None"), +] +objs, expected, ids = zip(*ll_params) + + +@pytest.fixture(params=zip(objs, expected), ids=ids) +def maybe_list_like(request): + return request.param + + +def test_is_list_like(maybe_list_like): + obj, expected = maybe_list_like + expected = True if expected == "set" else expected + assert inference.is_list_like(obj) == expected + + +def test_is_list_like_disallow_sets(maybe_list_like): + obj, expected = maybe_list_like + expected = False if expected == "set" else expected + assert inference.is_list_like(obj, allow_sets=False) == expected + + +def test_is_list_like_recursion(): + # GH 33721 + # interpreter would crash with SIGABRT + def foo(): + inference.is_list_like([]) + foo() + + with tm.external_error_raised(RecursionError): + foo() + + +def test_is_list_like_iter_is_none(): + # GH 43373 + # is_list_like was yielding false positives with __iter__ == None + class NotListLike: + def __getitem__(self, item): + return self + + __iter__ = None + + assert not inference.is_list_like(NotListLike()) + + +def test_is_sequence(): + is_seq = inference.is_sequence + assert is_seq((1, 2)) + assert is_seq([1, 2]) + assert not is_seq("abcd") + assert not is_seq(np.int64) + + class A: + def __getitem__(self): + return 1 + + assert not is_seq(A()) + + +def test_is_array_like(): + assert inference.is_array_like(Series([], dtype=object)) + assert inference.is_array_like(Series([1, 2])) + assert inference.is_array_like(np.array(["a", "b"])) + assert inference.is_array_like(Index(["2016-01-01"])) + assert inference.is_array_like(np.array([2, 3])) + assert inference.is_array_like(MockNumpyLikeArray(np.array([2, 3]))) + + class DtypeList(list): + dtype = "special" + + assert inference.is_array_like(DtypeList()) + + assert not inference.is_array_like([1, 2, 3]) + assert not inference.is_array_like(()) + assert not inference.is_array_like("foo") + assert not inference.is_array_like(123) + + +@pytest.mark.parametrize( + "inner", + [ + [], + [1], + (1,), + (1, 2), + {"a": 1}, + {1, "a"}, + Series([1]), + Series([], dtype=object), + Series(["a"]).str, + (x for x in range(5)), + ], +) +@pytest.mark.parametrize("outer", [list, Series, np.array, tuple]) +def test_is_nested_list_like_passes(inner, outer): + result = outer([inner for _ in range(5)]) + assert inference.is_list_like(result) + + +@pytest.mark.parametrize( + "obj", + [ + "abc", + [], + [1], + (1,), + ["a"], + "a", + {"a"}, + [1, 2, 3], + Series([1]), + DataFrame({"A": [1]}), + ([1, 2] for _ in range(5)), + ], +) +def test_is_nested_list_like_fails(obj): + assert not inference.is_nested_list_like(obj) + + +@pytest.mark.parametrize("ll", [{}, {"A": 1}, Series([1]), collections.defaultdict()]) +def test_is_dict_like_passes(ll): + assert inference.is_dict_like(ll) + + +@pytest.mark.parametrize( + "ll", + [ + "1", + 1, + [1, 2], + (1, 2), + range(2), + Index([1]), + dict, + collections.defaultdict, + Series, + ], +) +def test_is_dict_like_fails(ll): + assert not inference.is_dict_like(ll) + + +@pytest.mark.parametrize("has_keys", [True, False]) +@pytest.mark.parametrize("has_getitem", [True, False]) +@pytest.mark.parametrize("has_contains", [True, False]) +def test_is_dict_like_duck_type(has_keys, has_getitem, has_contains): + class DictLike: + def __init__(self, d): + self.d = d + + if has_keys: + + def keys(self): + return self.d.keys() + + if has_getitem: + + def __getitem__(self, key): + return self.d.__getitem__(key) + + if has_contains: + + def __contains__(self, key) -> bool: + return self.d.__contains__(key) + + d = DictLike({1: 2}) + result = inference.is_dict_like(d) + expected = has_keys and has_getitem and has_contains + + assert result is expected + + +def test_is_file_like(): + class MockFile: + pass + + is_file = inference.is_file_like + + data = StringIO("data") + assert is_file(data) + + # No read / write attributes + # No iterator attributes + m = MockFile() + assert not is_file(m) + + MockFile.write = lambda self: 0 + + # Write attribute but not an iterator + m = MockFile() + assert not is_file(m) + + # gh-16530: Valid iterator just means we have the + # __iter__ attribute for our purposes. + MockFile.__iter__ = lambda self: self + + # Valid write-only file + m = MockFile() + assert is_file(m) + + del MockFile.write + MockFile.read = lambda self: 0 + + # Valid read-only file + m = MockFile() + assert is_file(m) + + # Iterator but no read / write attributes + data = [1, 2, 3] + assert not is_file(data) + + +test_tuple = collections.namedtuple("test_tuple", ["a", "b", "c"]) + + +@pytest.mark.parametrize("ll", [test_tuple(1, 2, 3)]) +def test_is_names_tuple_passes(ll): + assert inference.is_named_tuple(ll) + + +@pytest.mark.parametrize("ll", [(1, 2, 3), "a", Series({"pi": 3.14})]) +def test_is_names_tuple_fails(ll): + assert not inference.is_named_tuple(ll) + + +def test_is_hashable(): + + # all new-style classes are hashable by default + class HashableClass: + pass + + class UnhashableClass1: + __hash__ = None + + class UnhashableClass2: + def __hash__(self): + raise TypeError("Not hashable") + + hashable = (1, 3.14, np.float64(3.14), "a", (), (1,), HashableClass()) + not_hashable = ([], UnhashableClass1()) + abc_hashable_not_really_hashable = (([],), UnhashableClass2()) + + for i in hashable: + assert inference.is_hashable(i) + for i in not_hashable: + assert not inference.is_hashable(i) + for i in abc_hashable_not_really_hashable: + assert not inference.is_hashable(i) + + # numpy.array is no longer collections.abc.Hashable as of + # https://github.com/numpy/numpy/pull/5326, just test + # is_hashable() + assert not inference.is_hashable(np.array([])) + + +@pytest.mark.parametrize("ll", [re.compile("ad")]) +def test_is_re_passes(ll): + assert inference.is_re(ll) + + +@pytest.mark.parametrize("ll", ["x", 2, 3, object()]) +def test_is_re_fails(ll): + assert not inference.is_re(ll) + + +@pytest.mark.parametrize( + "ll", [r"a", "x", r"asdf", re.compile("adsf"), r"\u2233\s*", re.compile(r"")] +) +def test_is_recompilable_passes(ll): + assert inference.is_re_compilable(ll) + + +@pytest.mark.parametrize("ll", [1, [], object()]) +def test_is_recompilable_fails(ll): + assert not inference.is_re_compilable(ll) + + +class TestInference: + @pytest.mark.parametrize( + "arr", + [ + np.array(list("abc"), dtype="S1"), + np.array(list("abc"), dtype="S1").astype(object), + [b"a", np.nan, b"c"], + ], + ) + def test_infer_dtype_bytes(self, arr): + result = lib.infer_dtype(arr, skipna=True) + assert result == "bytes" + + @pytest.mark.parametrize( + "value, expected", + [ + (float("inf"), True), + (np.inf, True), + (-np.inf, False), + (1, False), + ("a", False), + ], + ) + def test_isposinf_scalar(self, value, expected): + # GH 11352 + result = libmissing.isposinf_scalar(value) + assert result is expected + + @pytest.mark.parametrize( + "value, expected", + [ + (float("-inf"), True), + (-np.inf, True), + (np.inf, False), + (1, False), + ("a", False), + ], + ) + def test_isneginf_scalar(self, value, expected): + result = libmissing.isneginf_scalar(value) + assert result is expected + + @pytest.mark.parametrize( + "convert_to_masked_nullable, exp", + [ + ( + True, + BooleanArray( + np.array([True, False], dtype="bool"), np.array([False, True]) + ), + ), + (False, np.array([True, np.nan], dtype="object")), + ], + ) + def test_maybe_convert_nullable_boolean(self, convert_to_masked_nullable, exp): + # GH 40687 + arr = np.array([True, np.NaN], dtype=object) + result = libops.maybe_convert_bool( + arr, set(), convert_to_masked_nullable=convert_to_masked_nullable + ) + if convert_to_masked_nullable: + tm.assert_extension_array_equal(BooleanArray(*result), exp) + else: + result = result[0] + tm.assert_numpy_array_equal(result, exp) + + @pytest.mark.parametrize("convert_to_masked_nullable", [True, False]) + @pytest.mark.parametrize("coerce_numeric", [True, False]) + @pytest.mark.parametrize( + "infinity", ["inf", "inF", "iNf", "Inf", "iNF", "InF", "INf", "INF"] + ) + @pytest.mark.parametrize("prefix", ["", "-", "+"]) + def test_maybe_convert_numeric_infinities( + self, coerce_numeric, infinity, prefix, convert_to_masked_nullable + ): + # see gh-13274 + result, _ = lib.maybe_convert_numeric( + np.array([prefix + infinity], dtype=object), + na_values={"", "NULL", "nan"}, + coerce_numeric=coerce_numeric, + convert_to_masked_nullable=convert_to_masked_nullable, + ) + expected = np.array([np.inf if prefix in ["", "+"] else -np.inf]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("convert_to_masked_nullable", [True, False]) + def test_maybe_convert_numeric_infinities_raises(self, convert_to_masked_nullable): + msg = "Unable to parse string" + with pytest.raises(ValueError, match=msg): + lib.maybe_convert_numeric( + np.array(["foo_inf"], dtype=object), + na_values={"", "NULL", "nan"}, + coerce_numeric=False, + convert_to_masked_nullable=convert_to_masked_nullable, + ) + + @pytest.mark.parametrize("convert_to_masked_nullable", [True, False]) + def test_maybe_convert_numeric_post_floatify_nan( + self, coerce, convert_to_masked_nullable + ): + # see gh-13314 + data = np.array(["1.200", "-999.000", "4.500"], dtype=object) + expected = np.array([1.2, np.nan, 4.5], dtype=np.float64) + nan_values = {-999, -999.0} + + out = lib.maybe_convert_numeric( + data, + nan_values, + coerce, + convert_to_masked_nullable=convert_to_masked_nullable, + ) + if convert_to_masked_nullable: + expected = FloatingArray(expected, np.isnan(expected)) + tm.assert_extension_array_equal(expected, FloatingArray(*out)) + else: + out = out[0] + tm.assert_numpy_array_equal(out, expected) + + def test_convert_infs(self): + arr = np.array(["inf", "inf", "inf"], dtype="O") + result, _ = lib.maybe_convert_numeric(arr, set(), False) + assert result.dtype == np.float64 + + arr = np.array(["-inf", "-inf", "-inf"], dtype="O") + result, _ = lib.maybe_convert_numeric(arr, set(), False) + assert result.dtype == np.float64 + + def test_scientific_no_exponent(self): + # See PR 12215 + arr = np.array(["42E", "2E", "99e", "6e"], dtype="O") + result, _ = lib.maybe_convert_numeric(arr, set(), False, True) + assert np.all(np.isnan(result)) + + def test_convert_non_hashable(self): + # GH13324 + # make sure that we are handing non-hashables + arr = np.array([[10.0, 2], 1.0, "apple"], dtype=object) + result, _ = lib.maybe_convert_numeric(arr, set(), False, True) + tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan])) + + def test_convert_numeric_uint64(self): + arr = np.array([2 ** 63], dtype=object) + exp = np.array([2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set())[0], exp) + + arr = np.array([str(2 ** 63)], dtype=object) + exp = np.array([2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set())[0], exp) + + arr = np.array([np.uint64(2 ** 63)], dtype=object) + exp = np.array([2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set())[0], exp) + + @pytest.mark.parametrize( + "arr", + [ + np.array([2 ** 63, np.nan], dtype=object), + np.array([str(2 ** 63), np.nan], dtype=object), + np.array([np.nan, 2 ** 63], dtype=object), + np.array([np.nan, str(2 ** 63)], dtype=object), + ], + ) + def test_convert_numeric_uint64_nan(self, coerce, arr): + expected = arr.astype(float) if coerce else arr.copy() + result, _ = lib.maybe_convert_numeric(arr, set(), coerce_numeric=coerce) + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize("convert_to_masked_nullable", [True, False]) + def test_convert_numeric_uint64_nan_values( + self, coerce, convert_to_masked_nullable + ): + arr = np.array([2 ** 63, 2 ** 63 + 1], dtype=object) + na_values = {2 ** 63} + + expected = ( + np.array([np.nan, 2 ** 63 + 1], dtype=float) if coerce else arr.copy() + ) + result = lib.maybe_convert_numeric( + arr, + na_values, + coerce_numeric=coerce, + convert_to_masked_nullable=convert_to_masked_nullable, + ) + if convert_to_masked_nullable and coerce: + expected = IntegerArray( + np.array([0, 2 ** 63 + 1], dtype="u8"), + np.array([True, False], dtype="bool"), + ) + result = IntegerArray(*result) + else: + result = result[0] # discard mask + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize( + "case", + [ + np.array([2 ** 63, -1], dtype=object), + np.array([str(2 ** 63), -1], dtype=object), + np.array([str(2 ** 63), str(-1)], dtype=object), + np.array([-1, 2 ** 63], dtype=object), + np.array([-1, str(2 ** 63)], dtype=object), + np.array([str(-1), str(2 ** 63)], dtype=object), + ], + ) + @pytest.mark.parametrize("convert_to_masked_nullable", [True, False]) + def test_convert_numeric_int64_uint64( + self, case, coerce, convert_to_masked_nullable + ): + expected = case.astype(float) if coerce else case.copy() + result, _ = lib.maybe_convert_numeric( + case, + set(), + coerce_numeric=coerce, + convert_to_masked_nullable=convert_to_masked_nullable, + ) + + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize("convert_to_masked_nullable", [True, False]) + def test_convert_numeric_string_uint64(self, convert_to_masked_nullable): + # GH32394 + result = lib.maybe_convert_numeric( + np.array(["uint64"], dtype=object), + set(), + coerce_numeric=True, + convert_to_masked_nullable=convert_to_masked_nullable, + ) + if convert_to_masked_nullable: + result = FloatingArray(*result) + else: + result = result[0] + assert np.isnan(result) + + @pytest.mark.parametrize("value", [-(2 ** 63) - 1, 2 ** 64]) + def test_convert_int_overflow(self, value): + # see gh-18584 + arr = np.array([value], dtype=object) + result = lib.maybe_convert_objects(arr) + tm.assert_numpy_array_equal(arr, result) + + def test_maybe_convert_objects_uint64(self): + # see gh-4471 + arr = np.array([2 ** 63], dtype=object) + exp = np.array([2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + # NumPy bug: can't compare uint64 to int64, as that + # results in both casting to float64, so we should + # make sure that this function is robust against it + arr = np.array([np.uint64(2 ** 63)], dtype=object) + exp = np.array([2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + arr = np.array([2, -1], dtype=object) + exp = np.array([2, -1], dtype=np.int64) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + arr = np.array([2 ** 63, -1], dtype=object) + exp = np.array([2 ** 63, -1], dtype=object) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + def test_maybe_convert_objects_datetime(self): + # GH27438 + arr = np.array( + [np.datetime64("2000-01-01"), np.timedelta64(1, "s")], dtype=object + ) + exp = arr.copy() + out = lib.maybe_convert_objects( + arr, convert_datetime=True, convert_timedelta=True + ) + tm.assert_numpy_array_equal(out, exp) + + arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object) + exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]") + out = lib.maybe_convert_objects( + arr, convert_datetime=True, convert_timedelta=True + ) + tm.assert_numpy_array_equal(out, exp) + + # with convert_timedelta=True, the nan is a valid NA value for td64 + arr = np.array([np.timedelta64(1, "s"), np.nan], dtype=object) + exp = exp[::-1] + out = lib.maybe_convert_objects( + arr, convert_datetime=True, convert_timedelta=True + ) + tm.assert_numpy_array_equal(out, exp) + + def test_maybe_convert_objects_dtype_if_all_nat(self): + arr = np.array([pd.NaT, pd.NaT], dtype=object) + out = lib.maybe_convert_objects( + arr, convert_datetime=True, convert_timedelta=True + ) + # no dtype_if_all_nat passed -> we dont guess + tm.assert_numpy_array_equal(out, arr) + + out = lib.maybe_convert_objects( + arr, + convert_datetime=True, + convert_timedelta=True, + dtype_if_all_nat=np.dtype("timedelta64[ns]"), + ) + exp = np.array(["NaT", "NaT"], dtype="timedelta64[ns]") + tm.assert_numpy_array_equal(out, exp) + + out = lib.maybe_convert_objects( + arr, + convert_datetime=True, + convert_timedelta=True, + dtype_if_all_nat=np.dtype("datetime64[ns]"), + ) + exp = np.array(["NaT", "NaT"], dtype="datetime64[ns]") + tm.assert_numpy_array_equal(out, exp) + + def test_maybe_convert_objects_dtype_if_all_nat_invalid(self): + # we accept datetime64[ns], timedelta64[ns], and EADtype + arr = np.array([pd.NaT, pd.NaT], dtype=object) + + with pytest.raises(ValueError, match="int64"): + lib.maybe_convert_objects( + arr, + convert_datetime=True, + convert_timedelta=True, + dtype_if_all_nat=np.dtype("int64"), + ) + + @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) + def test_maybe_convert_objects_datetime_overflow_safe(self, dtype): + stamp = datetime(2363, 10, 4) # Enterprise-D launch date + if dtype == "timedelta64[ns]": + stamp = stamp - datetime(1970, 1, 1) + arr = np.array([stamp], dtype=object) + + out = lib.maybe_convert_objects( + arr, convert_datetime=True, convert_timedelta=True + ) + # no OutOfBoundsDatetime/OutOfBoundsTimedeltas + tm.assert_numpy_array_equal(out, arr) + + def test_maybe_convert_objects_mixed_datetimes(self): + ts = Timestamp("now") + vals = [ts, ts.to_pydatetime(), ts.to_datetime64(), pd.NaT, np.nan, None] + + for data in itertools.permutations(vals): + data = np.array(list(data), dtype=object) + expected = DatetimeIndex(data)._data._ndarray + result = lib.maybe_convert_objects(data, convert_datetime=True) + tm.assert_numpy_array_equal(result, expected) + + def test_maybe_convert_objects_timedelta64_nat(self): + obj = np.timedelta64("NaT", "ns") + arr = np.array([obj], dtype=object) + assert arr[0] is obj + + result = lib.maybe_convert_objects(arr, convert_timedelta=True) + + expected = np.array([obj], dtype="m8[ns]") + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "exp", + [ + IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True])), + IntegerArray(np.array([2, 0], dtype="int64"), np.array([False, True])), + ], + ) + def test_maybe_convert_objects_nullable_integer(self, exp): + # GH27335 + arr = np.array([2, np.NaN], dtype=object) + result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=True) + + tm.assert_extension_array_equal(result, exp) + + @pytest.mark.parametrize( + "convert_to_masked_nullable, exp", + [ + (True, IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True]))), + (False, np.array([2, np.nan], dtype="float64")), + ], + ) + def test_maybe_convert_numeric_nullable_integer( + self, convert_to_masked_nullable, exp + ): + # GH 40687 + arr = np.array([2, np.NaN], dtype=object) + result = lib.maybe_convert_numeric( + arr, set(), convert_to_masked_nullable=convert_to_masked_nullable + ) + if convert_to_masked_nullable: + result = IntegerArray(*result) + tm.assert_extension_array_equal(result, exp) + else: + result = result[0] + tm.assert_numpy_array_equal(result, exp) + + @pytest.mark.parametrize( + "convert_to_masked_nullable, exp", + [ + ( + True, + FloatingArray( + np.array([2.0, 0.0], dtype="float64"), np.array([False, True]) + ), + ), + (False, np.array([2.0, np.nan], dtype="float64")), + ], + ) + def test_maybe_convert_numeric_floating_array( + self, convert_to_masked_nullable, exp + ): + # GH 40687 + arr = np.array([2.0, np.nan], dtype=object) + result = lib.maybe_convert_numeric( + arr, set(), convert_to_masked_nullable=convert_to_masked_nullable + ) + if convert_to_masked_nullable: + tm.assert_extension_array_equal(FloatingArray(*result), exp) + else: + result = result[0] + tm.assert_numpy_array_equal(result, exp) + + def test_maybe_convert_objects_bool_nan(self): + # GH32146 + ind = Index([True, False, np.nan], dtype=object) + exp = np.array([True, False, np.nan], dtype=object) + out = lib.maybe_convert_objects(ind.values, safe=1) + tm.assert_numpy_array_equal(out, exp) + + @pytest.mark.parametrize( + "data0", + [ + True, + 1, + 1.0, + 1.0 + 1.0j, + np.int8(1), + np.int16(1), + np.int32(1), + np.int64(1), + np.float16(1), + np.float32(1), + np.float64(1), + np.complex64(1), + np.complex128(1), + ], + ) + @pytest.mark.parametrize( + "data1", + [ + True, + 1, + 1.0, + 1.0 + 1.0j, + np.int8(1), + np.int16(1), + np.int32(1), + np.int64(1), + np.float16(1), + np.float32(1), + np.float64(1), + np.complex64(1), + np.complex128(1), + ], + ) + def test_maybe_convert_objects_itemsize(self, data0, data1): + # GH 40908 + data = [data0, data1] + arr = np.array(data, dtype="object") + + common_kind = np.find_common_type( + [type(data0), type(data1)], scalar_types=[] + ).kind + kind0 = "python" if not hasattr(data0, "dtype") else data0.dtype.kind + kind1 = "python" if not hasattr(data1, "dtype") else data1.dtype.kind + if kind0 != "python" and kind1 != "python": + kind = common_kind + itemsize = max(data0.dtype.itemsize, data1.dtype.itemsize) + elif is_bool(data0) or is_bool(data1): + kind = "bool" if (is_bool(data0) and is_bool(data1)) else "object" + itemsize = "" + elif is_complex(data0) or is_complex(data1): + kind = common_kind + itemsize = 16 + else: + kind = common_kind + itemsize = 8 + + expected = np.array(data, dtype=f"{kind}{itemsize}") + result = lib.maybe_convert_objects(arr) + tm.assert_numpy_array_equal(result, expected) + + def test_mixed_dtypes_remain_object_array(self): + # GH14956 + arr = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object) + result = lib.maybe_convert_objects(arr, convert_datetime=True) + tm.assert_numpy_array_equal(result, arr) + + @pytest.mark.parametrize( + "idx", + [ + pd.IntervalIndex.from_breaks(range(5), closed="both"), + pd.period_range("2016-01-01", periods=3, freq="D"), + ], + ) + def test_maybe_convert_objects_ea(self, idx): + + result = lib.maybe_convert_objects( + np.array(idx, dtype=object), + convert_period=True, + convert_interval=True, + ) + tm.assert_extension_array_equal(result, idx._data) + + +class TestTypeInference: + + # Dummy class used for testing with Python objects + class Dummy: + pass + + def test_inferred_dtype_fixture(self, any_skipna_inferred_dtype): + # see pandas/conftest.py + inferred_dtype, values = any_skipna_inferred_dtype + + # make sure the inferred dtype of the fixture is as requested + assert inferred_dtype == lib.infer_dtype(values, skipna=True) + + @pytest.mark.parametrize("skipna", [True, False]) + def test_length_zero(self, skipna): + result = lib.infer_dtype(np.array([], dtype="i4"), skipna=skipna) + assert result == "integer" + + result = lib.infer_dtype([], skipna=skipna) + assert result == "empty" + + # GH 18004 + arr = np.array([np.array([], dtype=object), np.array([], dtype=object)]) + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "empty" + + def test_integers(self): + arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "integer" + + arr = np.array([1, 2, 3, np.int64(4), np.int32(5), "foo"], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "mixed-integer" + + arr = np.array([1, 2, 3, 4, 5], dtype="i4") + result = lib.infer_dtype(arr, skipna=True) + assert result == "integer" + + @pytest.mark.parametrize( + "arr, skipna", + [ + (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), False), + (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), True), + (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), False), + (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), True), + ], + ) + def test_integer_na(self, arr, skipna): + # GH 27392 + result = lib.infer_dtype(arr, skipna=skipna) + expected = "integer" if skipna else "integer-na" + assert result == expected + + def test_infer_dtype_skipna_default(self): + # infer_dtype `skipna` default deprecated in GH#24050, + # changed to True in GH#29876 + arr = np.array([1, 2, 3, np.nan], dtype=object) + + result = lib.infer_dtype(arr) + assert result == "integer" + + def test_bools(self): + arr = np.array([True, False, True, True, True], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "boolean" + + arr = np.array([np.bool_(True), np.bool_(False)], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "boolean" + + arr = np.array([True, False, True, "foo"], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "mixed" + + arr = np.array([True, False, True], dtype=bool) + result = lib.infer_dtype(arr, skipna=True) + assert result == "boolean" + + arr = np.array([True, np.nan, False], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "boolean" + + result = lib.infer_dtype(arr, skipna=False) + assert result == "mixed" + + def test_floats(self): + arr = np.array([1.0, 2.0, 3.0, np.float64(4), np.float32(5)], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "floating" + + arr = np.array([1, 2, 3, np.float64(4), np.float32(5), "foo"], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "mixed-integer" + + arr = np.array([1, 2, 3, 4, 5], dtype="f4") + result = lib.infer_dtype(arr, skipna=True) + assert result == "floating" + + arr = np.array([1, 2, 3, 4, 5], dtype="f8") + result = lib.infer_dtype(arr, skipna=True) + assert result == "floating" + + def test_decimals(self): + # GH15690 + arr = np.array([Decimal(1), Decimal(2), Decimal(3)]) + result = lib.infer_dtype(arr, skipna=True) + assert result == "decimal" + + arr = np.array([1.0, 2.0, Decimal(3)]) + result = lib.infer_dtype(arr, skipna=True) + assert result == "mixed" + + result = lib.infer_dtype(arr[::-1], skipna=True) + assert result == "mixed" + + arr = np.array([Decimal(1), Decimal("NaN"), Decimal(3)]) + result = lib.infer_dtype(arr, skipna=True) + assert result == "decimal" + + arr = np.array([Decimal(1), np.nan, Decimal(3)], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "decimal" + + # complex is compatible with nan, so skipna has no effect + @pytest.mark.parametrize("skipna", [True, False]) + def test_complex(self, skipna): + # gets cast to complex on array construction + arr = np.array([1.0, 2.0, 1 + 1j]) + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "complex" + + arr = np.array([1.0, 2.0, 1 + 1j], dtype="O") + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "mixed" + + result = lib.infer_dtype(arr[::-1], skipna=skipna) + assert result == "mixed" + + # gets cast to complex on array construction + arr = np.array([1, np.nan, 1 + 1j]) + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "complex" + + arr = np.array([1.0, np.nan, 1 + 1j], dtype="O") + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "mixed" + + # complex with nans stays complex + arr = np.array([1 + 1j, np.nan, 3 + 3j], dtype="O") + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "complex" + + # test smaller complex dtype; will pass through _try_infer_map fastpath + arr = np.array([1 + 1j, np.nan, 3 + 3j], dtype=np.complex64) + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "complex" + + def test_string(self): + pass + + def test_unicode(self): + arr = ["a", np.nan, "c"] + result = lib.infer_dtype(arr, skipna=False) + # This currently returns "mixed", but it's not clear that's optimal. + # This could also return "string" or "mixed-string" + assert result == "mixed" + + arr = ["a", np.nan, "c"] + result = lib.infer_dtype(arr, skipna=True) + assert result == "string" + + arr = ["a", "c"] + result = lib.infer_dtype(arr, skipna=False) + assert result == "string" + + @pytest.mark.parametrize( + "dtype, missing, skipna, expected", + [ + (float, np.nan, False, "floating"), + (float, np.nan, True, "floating"), + (object, np.nan, False, "floating"), + (object, np.nan, True, "empty"), + (object, None, False, "mixed"), + (object, None, True, "empty"), + ], + ) + @pytest.mark.parametrize("box", [Series, np.array]) + def test_object_empty(self, box, missing, dtype, skipna, expected): + # GH 23421 + arr = box([missing, missing], dtype=dtype) + + result = lib.infer_dtype(arr, skipna=skipna) + assert result == expected + + def test_datetime(self): + + dates = [datetime(2012, 1, x) for x in range(1, 20)] + index = Index(dates) + assert index.inferred_type == "datetime64" + + def test_infer_dtype_datetime64(self): + arr = np.array( + [np.datetime64("2011-01-01"), np.datetime64("2011-01-01")], dtype=object + ) + assert lib.infer_dtype(arr, skipna=True) == "datetime64" + + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + def test_infer_dtype_datetime64_with_na(self, na_value): + # starts with nan + arr = np.array([na_value, np.datetime64("2011-01-02")]) + assert lib.infer_dtype(arr, skipna=True) == "datetime64" + + arr = np.array([na_value, np.datetime64("2011-01-02"), na_value]) + assert lib.infer_dtype(arr, skipna=True) == "datetime64" + + @pytest.mark.parametrize( + "arr", + [ + np.array( + [np.timedelta64("nat"), np.datetime64("2011-01-02")], dtype=object + ), + np.array( + [np.datetime64("2011-01-02"), np.timedelta64("nat")], dtype=object + ), + np.array([np.datetime64("2011-01-01"), Timestamp("2011-01-02")]), + np.array([Timestamp("2011-01-02"), np.datetime64("2011-01-01")]), + np.array([np.nan, Timestamp("2011-01-02"), 1.1]), + np.array([np.nan, "2011-01-01", Timestamp("2011-01-02")], dtype=object), + np.array([np.datetime64("nat"), np.timedelta64(1, "D")], dtype=object), + np.array([np.timedelta64(1, "D"), np.datetime64("nat")], dtype=object), + ], + ) + def test_infer_datetimelike_dtype_mixed(self, arr): + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + def test_infer_dtype_mixed_integer(self): + arr = np.array([np.nan, Timestamp("2011-01-02"), 1]) + assert lib.infer_dtype(arr, skipna=True) == "mixed-integer" + + @pytest.mark.parametrize( + "arr", + [ + np.array([Timestamp("2011-01-01"), Timestamp("2011-01-02")]), + np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]), + np.array([datetime(2011, 1, 1), Timestamp("2011-01-02")]), + ], + ) + def test_infer_dtype_datetime(self, arr): + assert lib.infer_dtype(arr, skipna=True) == "datetime" + + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + @pytest.mark.parametrize( + "time_stamp", [Timestamp("2011-01-01"), datetime(2011, 1, 1)] + ) + def test_infer_dtype_datetime_with_na(self, na_value, time_stamp): + # starts with nan + arr = np.array([na_value, time_stamp]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" + + arr = np.array([na_value, time_stamp, na_value]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" + + @pytest.mark.parametrize( + "arr", + [ + np.array([Timedelta("1 days"), Timedelta("2 days")]), + np.array([np.timedelta64(1, "D"), np.timedelta64(2, "D")], dtype=object), + np.array([timedelta(1), timedelta(2)]), + ], + ) + def test_infer_dtype_timedelta(self, arr): + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + @pytest.mark.parametrize( + "delta", [Timedelta("1 days"), np.timedelta64(1, "D"), timedelta(1)] + ) + def test_infer_dtype_timedelta_with_na(self, na_value, delta): + # starts with nan + arr = np.array([na_value, delta]) + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + arr = np.array([na_value, delta, na_value]) + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + def test_infer_dtype_period(self): + # GH 13664 + arr = np.array([Period("2011-01", freq="D"), Period("2011-02", freq="D")]) + assert lib.infer_dtype(arr, skipna=True) == "period" + + # non-homogeneous freqs -> mixed + arr = np.array([Period("2011-01", freq="D"), Period("2011-02", freq="M")]) + assert lib.infer_dtype(arr, skipna=True) == "mixed" + + @pytest.mark.parametrize("klass", [pd.array, Series, Index]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_infer_dtype_period_array(self, klass, skipna): + # https://github.com/pandas-dev/pandas/issues/23553 + values = klass( + [ + Period("2011-01-01", freq="D"), + Period("2011-01-02", freq="D"), + pd.NaT, + ] + ) + assert lib.infer_dtype(values, skipna=skipna) == "period" + + # periods but mixed freq + values = klass( + [ + Period("2011-01-01", freq="D"), + Period("2011-01-02", freq="M"), + pd.NaT, + ] + ) + # with pd.array this becomes PandasArray which ends up as "unknown-array" + exp = "unknown-array" if klass is pd.array else "mixed" + assert lib.infer_dtype(values, skipna=skipna) == exp + + def test_infer_dtype_period_mixed(self): + arr = np.array( + [Period("2011-01", freq="M"), np.datetime64("nat")], dtype=object + ) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array( + [np.datetime64("nat"), Period("2011-01", freq="M")], dtype=object + ) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + def test_infer_dtype_period_with_na(self, na_value): + # starts with nan + arr = np.array([na_value, Period("2011-01", freq="D")]) + assert lib.infer_dtype(arr, skipna=True) == "period" + + arr = np.array([na_value, Period("2011-01", freq="D"), na_value]) + assert lib.infer_dtype(arr, skipna=True) == "period" + + @pytest.mark.parametrize( + "data", + [ + [datetime(2017, 6, 12, 19, 30), datetime(2017, 3, 11, 1, 15)], + [Timestamp("20170612"), Timestamp("20170311")], + [ + Timestamp("20170612", tz="US/Eastern"), + Timestamp("20170311", tz="US/Eastern"), + ], + [date(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")], + [np.datetime64("2017-06-12"), np.datetime64("2017-03-11")], + [np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)], + ], + ) + def test_infer_datetimelike_array_datetime(self, data): + assert lib.infer_datetimelike_array(data) == ("datetime", False) + + @pytest.mark.parametrize( + "data", + [ + [timedelta(2017, 6, 12), timedelta(2017, 3, 11)], + [timedelta(2017, 6, 12), date(2017, 3, 11)], + [np.timedelta64(2017, "D"), np.timedelta64(6, "s")], + [np.timedelta64(2017, "D"), timedelta(2017, 3, 11)], + ], + ) + def test_infer_datetimelike_array_timedelta(self, data): + assert lib.infer_datetimelike_array(data) == ("timedelta", False) + + def test_infer_datetimelike_array_date(self): + arr = [date(2017, 6, 12), date(2017, 3, 11)] + assert lib.infer_datetimelike_array(arr) == ("date", False) + + @pytest.mark.parametrize( + "data", + [ + ["2017-06-12", "2017-03-11"], + [20170612, 20170311], + [20170612.5, 20170311.8], + [Dummy(), Dummy()], + [Timestamp("20170612"), Timestamp("20170311", tz="US/Eastern")], + [Timestamp("20170612"), 20170311], + [timedelta(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")], + ], + ) + def test_infer_datetimelike_array_mixed(self, data): + assert lib.infer_datetimelike_array(data)[0] == "mixed" + + @pytest.mark.parametrize( + "first, expected", + [ + [[None], "mixed"], + [[np.nan], "mixed"], + [[pd.NaT], "nat"], + [[datetime(2017, 6, 12, 19, 30), pd.NaT], "datetime"], + [[np.datetime64("2017-06-12"), pd.NaT], "datetime"], + [[date(2017, 6, 12), pd.NaT], "date"], + [[timedelta(2017, 6, 12), pd.NaT], "timedelta"], + [[np.timedelta64(2017, "D"), pd.NaT], "timedelta"], + ], + ) + @pytest.mark.parametrize("second", [None, np.nan]) + def test_infer_datetimelike_array_nan_nat_like(self, first, second, expected): + first.append(second) + assert lib.infer_datetimelike_array(first) == (expected, False) + + def test_infer_dtype_all_nan_nat_like(self): + arr = np.array([np.nan, np.nan]) + assert lib.infer_dtype(arr, skipna=True) == "floating" + + # nan and None mix are result in mixed + arr = np.array([np.nan, np.nan, None]) + assert lib.infer_dtype(arr, skipna=True) == "empty" + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array([None, np.nan, np.nan]) + assert lib.infer_dtype(arr, skipna=True) == "empty" + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + # pd.NaT + arr = np.array([pd.NaT]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + arr = np.array([pd.NaT, np.nan]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + arr = np.array([np.nan, pd.NaT]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + arr = np.array([np.nan, pd.NaT, np.nan]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + arr = np.array([None, pd.NaT, None]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + # np.datetime64(nat) + arr = np.array([np.datetime64("nat")]) + assert lib.infer_dtype(arr, skipna=False) == "datetime64" + + for n in [np.nan, pd.NaT, None]: + arr = np.array([n, np.datetime64("nat"), n]) + assert lib.infer_dtype(arr, skipna=False) == "datetime64" + + arr = np.array([pd.NaT, n, np.datetime64("nat"), n]) + assert lib.infer_dtype(arr, skipna=False) == "datetime64" + + arr = np.array([np.timedelta64("nat")], dtype=object) + assert lib.infer_dtype(arr, skipna=False) == "timedelta" + + for n in [np.nan, pd.NaT, None]: + arr = np.array([n, np.timedelta64("nat"), n]) + assert lib.infer_dtype(arr, skipna=False) == "timedelta" + + arr = np.array([pd.NaT, n, np.timedelta64("nat"), n]) + assert lib.infer_dtype(arr, skipna=False) == "timedelta" + + # datetime / timedelta mixed + arr = np.array([pd.NaT, np.datetime64("nat"), np.timedelta64("nat"), np.nan]) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array([np.timedelta64("nat"), np.datetime64("nat")], dtype=object) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + def test_is_datetimelike_array_all_nan_nat_like(self): + arr = np.array([np.nan, pd.NaT, np.datetime64("nat")]) + assert lib.is_datetime_array(arr) + assert lib.is_datetime64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, pd.NaT, np.timedelta64("nat")]) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) + assert lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, pd.NaT, np.datetime64("nat"), np.timedelta64("nat")]) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, pd.NaT]) + assert lib.is_datetime_array(arr) + assert lib.is_datetime64_array(arr) + assert lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, np.nan], dtype=object) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) + + assert lib.is_datetime_with_singletz_array( + np.array( + [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130102", tz="US/Eastern"), + ], + dtype=object, + ) + ) + assert not lib.is_datetime_with_singletz_array( + np.array( + [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130102", tz="CET"), + ], + dtype=object, + ) + ) + + @pytest.mark.parametrize( + "func", + [ + "is_datetime_array", + "is_datetime64_array", + "is_bool_array", + "is_timedelta_or_timedelta64_array", + "is_date_array", + "is_time_array", + "is_interval_array", + ], + ) + def test_other_dtypes_for_array(self, func): + func = getattr(lib, func) + arr = np.array(["foo", "bar"]) + assert not func(arr) + assert not func(arr.reshape(2, 1)) + + arr = np.array([1, 2]) + assert not func(arr) + assert not func(arr.reshape(2, 1)) + + def test_date(self): + + dates = [date(2012, 1, day) for day in range(1, 20)] + index = Index(dates) + assert index.inferred_type == "date" + + dates = [date(2012, 1, day) for day in range(1, 20)] + [np.nan] + result = lib.infer_dtype(dates, skipna=False) + assert result == "mixed" + + result = lib.infer_dtype(dates, skipna=True) + assert result == "date" + + @pytest.mark.parametrize( + "values", + [ + [date(2020, 1, 1), Timestamp("2020-01-01")], + [Timestamp("2020-01-01"), date(2020, 1, 1)], + [date(2020, 1, 1), pd.NaT], + [pd.NaT, date(2020, 1, 1)], + ], + ) + @pytest.mark.parametrize("skipna", [True, False]) + def test_infer_dtype_date_order_invariant(self, values, skipna): + # https://github.com/pandas-dev/pandas/issues/33741 + result = lib.infer_dtype(values, skipna=skipna) + assert result == "date" + + def test_is_numeric_array(self): + + assert lib.is_float_array(np.array([1, 2.0])) + assert lib.is_float_array(np.array([1, 2.0, np.nan])) + assert not lib.is_float_array(np.array([1, 2])) + + assert lib.is_integer_array(np.array([1, 2])) + assert not lib.is_integer_array(np.array([1, 2.0])) + + def test_is_string_array(self): + + assert lib.is_string_array(np.array(["foo", "bar"])) + assert not lib.is_string_array( + np.array(["foo", "bar", pd.NA], dtype=object), skipna=False + ) + assert lib.is_string_array( + np.array(["foo", "bar", pd.NA], dtype=object), skipna=True + ) + # NaN is not valid for string array, just NA + assert not lib.is_string_array( + np.array(["foo", "bar", np.nan], dtype=object), skipna=True + ) + + assert not lib.is_string_array(np.array([1, 2])) + + def test_to_object_array_tuples(self): + r = (5, 6) + values = [r] + lib.to_object_array_tuples(values) + + # make sure record array works + record = namedtuple("record", "x y") + r = record(5, 6) + values = [r] + lib.to_object_array_tuples(values) + + def test_object(self): + + # GH 7431 + # cannot infer more than this as only a single element + arr = np.array([None], dtype="O") + result = lib.infer_dtype(arr, skipna=False) + assert result == "mixed" + result = lib.infer_dtype(arr, skipna=True) + assert result == "empty" + + def test_to_object_array_width(self): + # see gh-13320 + rows = [[1, 2, 3], [4, 5, 6]] + + expected = np.array(rows, dtype=object) + out = lib.to_object_array(rows) + tm.assert_numpy_array_equal(out, expected) + + expected = np.array(rows, dtype=object) + out = lib.to_object_array(rows, min_width=1) + tm.assert_numpy_array_equal(out, expected) + + expected = np.array( + [[1, 2, 3, None, None], [4, 5, 6, None, None]], dtype=object + ) + out = lib.to_object_array(rows, min_width=5) + tm.assert_numpy_array_equal(out, expected) + + def test_is_period(self): + assert lib.is_period(Period("2011-01", freq="M")) + assert not lib.is_period(PeriodIndex(["2011-01"], freq="M")) + assert not lib.is_period(Timestamp("2011-01")) + assert not lib.is_period(1) + assert not lib.is_period(np.nan) + + def test_categorical(self): + + # GH 8974 + arr = Categorical(list("abc")) + result = lib.infer_dtype(arr, skipna=True) + assert result == "categorical" + + result = lib.infer_dtype(Series(arr), skipna=True) + assert result == "categorical" + + arr = Categorical(list("abc"), categories=["cegfab"], ordered=True) + result = lib.infer_dtype(arr, skipna=True) + assert result == "categorical" + + result = lib.infer_dtype(Series(arr), skipna=True) + assert result == "categorical" + + @pytest.mark.parametrize("asobject", [True, False]) + def test_interval(self, asobject): + idx = pd.IntervalIndex.from_breaks(range(5), closed="both") + if asobject: + idx = idx.astype(object) + + inferred = lib.infer_dtype(idx, skipna=False) + assert inferred == "interval" + + inferred = lib.infer_dtype(idx._data, skipna=False) + assert inferred == "interval" + + inferred = lib.infer_dtype(Series(idx, dtype=idx.dtype), skipna=False) + assert inferred == "interval" + + @pytest.mark.parametrize("value", [Timestamp(0), Timedelta(0), 0, 0.0]) + def test_interval_mismatched_closed(self, value): + + first = Interval(value, value, closed="left") + second = Interval(value, value, closed="right") + + # if closed match, we should infer "interval" + arr = np.array([first, first], dtype=object) + assert lib.infer_dtype(arr, skipna=False) == "interval" + + # if closed dont match, we should _not_ get "interval" + arr2 = np.array([first, second], dtype=object) + assert lib.infer_dtype(arr2, skipna=False) == "mixed" + + def test_interval_mismatched_subtype(self): + first = Interval(0, 1, closed="left") + second = Interval(Timestamp(0), Timestamp(1), closed="left") + third = Interval(Timedelta(0), Timedelta(1), closed="left") + + arr = np.array([first, second]) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array([second, third]) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array([first, third]) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + # float vs int subdtype are compatible + flt_interval = Interval(1.5, 2.5, closed="left") + arr = np.array([first, flt_interval], dtype=object) + assert lib.infer_dtype(arr, skipna=False) == "interval" + + @pytest.mark.parametrize("klass", [pd.array, Series]) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("data", [["a", "b", "c"], ["a", "b", pd.NA]]) + def test_string_dtype(self, data, skipna, klass, nullable_string_dtype): + # StringArray + val = klass(data, dtype=nullable_string_dtype) + inferred = lib.infer_dtype(val, skipna=skipna) + assert inferred == "string" + + @pytest.mark.parametrize("klass", [pd.array, Series]) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("data", [[True, False, True], [True, False, pd.NA]]) + def test_boolean_dtype(self, data, skipna, klass): + # BooleanArray + val = klass(data, dtype="boolean") + inferred = lib.infer_dtype(val, skipna=skipna) + assert inferred == "boolean" + + +class TestNumberScalar: + def test_is_number(self): + + assert is_number(True) + assert is_number(1) + assert is_number(1.1) + assert is_number(1 + 3j) + assert is_number(np.int64(1)) + assert is_number(np.float64(1.1)) + assert is_number(np.complex128(1 + 3j)) + assert is_number(np.nan) + + assert not is_number(None) + assert not is_number("x") + assert not is_number(datetime(2011, 1, 1)) + assert not is_number(np.datetime64("2011-01-01")) + assert not is_number(Timestamp("2011-01-01")) + assert not is_number(Timestamp("2011-01-01", tz="US/Eastern")) + assert not is_number(timedelta(1000)) + assert not is_number(Timedelta("1 days")) + + # questionable + assert not is_number(np.bool_(False)) + assert is_number(np.timedelta64(1, "D")) + + def test_is_bool(self): + assert is_bool(True) + assert is_bool(False) + assert is_bool(np.bool_(False)) + + assert not is_bool(1) + assert not is_bool(1.1) + assert not is_bool(1 + 3j) + assert not is_bool(np.int64(1)) + assert not is_bool(np.float64(1.1)) + assert not is_bool(np.complex128(1 + 3j)) + assert not is_bool(np.nan) + assert not is_bool(None) + assert not is_bool("x") + assert not is_bool(datetime(2011, 1, 1)) + assert not is_bool(np.datetime64("2011-01-01")) + assert not is_bool(Timestamp("2011-01-01")) + assert not is_bool(Timestamp("2011-01-01", tz="US/Eastern")) + assert not is_bool(timedelta(1000)) + assert not is_bool(np.timedelta64(1, "D")) + assert not is_bool(Timedelta("1 days")) + + def test_is_integer(self): + assert is_integer(1) + assert is_integer(np.int64(1)) + + assert not is_integer(True) + assert not is_integer(1.1) + assert not is_integer(1 + 3j) + assert not is_integer(False) + assert not is_integer(np.bool_(False)) + assert not is_integer(np.float64(1.1)) + assert not is_integer(np.complex128(1 + 3j)) + assert not is_integer(np.nan) + assert not is_integer(None) + assert not is_integer("x") + assert not is_integer(datetime(2011, 1, 1)) + assert not is_integer(np.datetime64("2011-01-01")) + assert not is_integer(Timestamp("2011-01-01")) + assert not is_integer(Timestamp("2011-01-01", tz="US/Eastern")) + assert not is_integer(timedelta(1000)) + assert not is_integer(Timedelta("1 days")) + assert not is_integer(np.timedelta64(1, "D")) + + def test_is_float(self): + assert is_float(1.1) + assert is_float(np.float64(1.1)) + assert is_float(np.nan) + + assert not is_float(True) + assert not is_float(1) + assert not is_float(1 + 3j) + assert not is_float(False) + assert not is_float(np.bool_(False)) + assert not is_float(np.int64(1)) + assert not is_float(np.complex128(1 + 3j)) + assert not is_float(None) + assert not is_float("x") + assert not is_float(datetime(2011, 1, 1)) + assert not is_float(np.datetime64("2011-01-01")) + assert not is_float(Timestamp("2011-01-01")) + assert not is_float(Timestamp("2011-01-01", tz="US/Eastern")) + assert not is_float(timedelta(1000)) + assert not is_float(np.timedelta64(1, "D")) + assert not is_float(Timedelta("1 days")) + + def test_is_datetime_dtypes(self): + + ts = pd.date_range("20130101", periods=3) + tsa = pd.date_range("20130101", periods=3, tz="US/Eastern") + + assert is_datetime64_dtype("datetime64") + assert is_datetime64_dtype("datetime64[ns]") + assert is_datetime64_dtype(ts) + assert not is_datetime64_dtype(tsa) + + assert not is_datetime64_ns_dtype("datetime64") + assert is_datetime64_ns_dtype("datetime64[ns]") + assert is_datetime64_ns_dtype(ts) + assert is_datetime64_ns_dtype(tsa) + + assert is_datetime64_any_dtype("datetime64") + assert is_datetime64_any_dtype("datetime64[ns]") + assert is_datetime64_any_dtype(ts) + assert is_datetime64_any_dtype(tsa) + + assert not is_datetime64tz_dtype("datetime64") + assert not is_datetime64tz_dtype("datetime64[ns]") + assert not is_datetime64tz_dtype(ts) + assert is_datetime64tz_dtype(tsa) + + for tz in ["US/Eastern", "UTC"]: + dtype = f"datetime64[ns, {tz}]" + assert not is_datetime64_dtype(dtype) + assert is_datetime64tz_dtype(dtype) + assert is_datetime64_ns_dtype(dtype) + assert is_datetime64_any_dtype(dtype) + + def test_is_timedelta(self): + assert is_timedelta64_dtype("timedelta64") + assert is_timedelta64_dtype("timedelta64[ns]") + assert not is_timedelta64_ns_dtype("timedelta64") + assert is_timedelta64_ns_dtype("timedelta64[ns]") + + tdi = TimedeltaIndex([1e14, 2e14], dtype="timedelta64[ns]") + assert is_timedelta64_dtype(tdi) + assert is_timedelta64_ns_dtype(tdi) + assert is_timedelta64_ns_dtype(tdi.astype("timedelta64[ns]")) + + # Conversion to Int64Index: + assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64")) + assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64[h]")) + + +class TestIsScalar: + def test_is_scalar_builtin_scalars(self): + assert is_scalar(None) + assert is_scalar(True) + assert is_scalar(False) + assert is_scalar(Fraction()) + assert is_scalar(0.0) + assert is_scalar(1) + assert is_scalar(complex(2)) + assert is_scalar(float("NaN")) + assert is_scalar(np.nan) + assert is_scalar("foobar") + assert is_scalar(b"foobar") + assert is_scalar(datetime(2014, 1, 1)) + assert is_scalar(date(2014, 1, 1)) + assert is_scalar(time(12, 0)) + assert is_scalar(timedelta(hours=1)) + assert is_scalar(pd.NaT) + assert is_scalar(pd.NA) + + def test_is_scalar_builtin_nonscalars(self): + assert not is_scalar({}) + assert not is_scalar([]) + assert not is_scalar([1]) + assert not is_scalar(()) + assert not is_scalar((1,)) + assert not is_scalar(slice(None)) + assert not is_scalar(Ellipsis) + + def test_is_scalar_numpy_array_scalars(self): + assert is_scalar(np.int64(1)) + assert is_scalar(np.float64(1.0)) + assert is_scalar(np.int32(1)) + assert is_scalar(np.complex64(2)) + assert is_scalar(np.object_("foobar")) + assert is_scalar(np.str_("foobar")) + assert is_scalar(np.unicode_("foobar")) + assert is_scalar(np.bytes_(b"foobar")) + assert is_scalar(np.datetime64("2014-01-01")) + assert is_scalar(np.timedelta64(1, "h")) + + def test_is_scalar_numpy_zerodim_arrays(self): + for zerodim in [ + np.array(1), + np.array("foobar"), + np.array(np.datetime64("2014-01-01")), + np.array(np.timedelta64(1, "h")), + np.array(np.datetime64("NaT")), + ]: + assert not is_scalar(zerodim) + assert is_scalar(lib.item_from_zerodim(zerodim)) + + @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") + def test_is_scalar_numpy_arrays(self): + for a in [ + np.array([]), + np.array([[]]), + np.matrix("1; 2"), + ]: + assert not is_scalar(a) + assert not is_scalar(MockNumpyLikeArray(a)) + + def test_is_scalar_pandas_scalars(self): + assert is_scalar(Timestamp("2014-01-01")) + assert is_scalar(Timedelta(hours=1)) + assert is_scalar(Period("2014-01-01")) + assert is_scalar(Interval(left=0, right=1)) + assert is_scalar(DateOffset(days=1)) + assert is_scalar(pd.offsets.Minute(3)) + + def test_is_scalar_pandas_containers(self): + assert not is_scalar(Series(dtype=object)) + assert not is_scalar(Series([1])) + assert not is_scalar(DataFrame()) + assert not is_scalar(DataFrame([[1]])) + assert not is_scalar(Index([])) + assert not is_scalar(Index([1])) + assert not is_scalar(Categorical([])) + assert not is_scalar(DatetimeIndex([])._data) + assert not is_scalar(TimedeltaIndex([])._data) + assert not is_scalar(DatetimeIndex([])._data.to_period("D")) + assert not is_scalar(pd.array([1, 2, 3])) + + def test_is_scalar_number(self): + # Number() is not recognied by PyNumber_Check, so by extension + # is not recognized by is_scalar, but instances of non-abstract + # subclasses are. + + class Numeric(Number): + def __init__(self, value): + self.value = value + + def __int__(self): + return self.value + + num = Numeric(1) + assert is_scalar(num) + + +def test_datetimeindex_from_empty_datetime64_array(): + for unit in ["ms", "us", "ns"]: + idx = DatetimeIndex(np.array([], dtype=f"datetime64[{unit}]")) + assert len(idx) == 0 + + +def test_nan_to_nat_conversions(): + + df = DataFrame( + {"A": np.asarray(range(10), dtype="float64"), "B": Timestamp("20010101")} + ) + df.iloc[3:6, :] = np.nan + result = df.loc[4, "B"] + assert result is pd.NaT + + s = df["B"].copy() + s[8:9] = np.nan + assert s[8] is pd.NaT + + +@td.skip_if_no_scipy +@pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") +def test_is_scipy_sparse(spmatrix): + assert is_scipy_sparse(spmatrix([[0, 1]])) + assert not is_scipy_sparse(np.array([1])) + + +def test_ensure_int32(): + values = np.arange(10, dtype=np.int32) + result = ensure_int32(values) + assert result.dtype == np.int32 + + values = np.arange(10, dtype=np.int64) + result = ensure_int32(values) + assert result.dtype == np.int32 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/dtypes/test_missing.py b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/test_missing.py new file mode 100644 index 0000000..1917fc6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/dtypes/test_missing.py @@ -0,0 +1,725 @@ +from contextlib import nullcontext +from datetime import datetime +from decimal import Decimal + +import numpy as np +import pytest + +from pandas._config import config as cf + +from pandas._libs import missing as libmissing +from pandas._libs.tslibs import iNaT + +from pandas.core.dtypes.common import ( + is_float, + is_scalar, +) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) +from pandas.core.dtypes.missing import ( + array_equivalent, + is_valid_na_for_dtype, + isna, + isnull, + na_value_for_dtype, + notna, + notnull, +) + +import pandas as pd +from pandas import ( + DatetimeIndex, + NaT, + Series, + TimedeltaIndex, + date_range, +) +import pandas._testing as tm +from pandas.core.api import Float64Index + +fix_now = pd.Timestamp("2021-01-01") +fix_utcnow = pd.Timestamp("2021-01-01", tz="UTC") + + +@pytest.mark.parametrize("notna_f", [notna, notnull]) +def test_notna_notnull(notna_f): + assert notna_f(1.0) + assert not notna_f(None) + assert not notna_f(np.NaN) + + with cf.option_context("mode.use_inf_as_na", False): + assert notna_f(np.inf) + assert notna_f(-np.inf) + + arr = np.array([1.5, np.inf, 3.5, -np.inf]) + result = notna_f(arr) + assert result.all() + + with cf.option_context("mode.use_inf_as_na", True): + assert not notna_f(np.inf) + assert not notna_f(-np.inf) + + arr = np.array([1.5, np.inf, 3.5, -np.inf]) + result = notna_f(arr) + assert result.sum() == 2 + + with cf.option_context("mode.use_inf_as_na", False): + for s in [ + tm.makeFloatSeries(), + tm.makeStringSeries(), + tm.makeObjectSeries(), + tm.makeTimeSeries(), + tm.makePeriodSeries(), + ]: + assert isinstance(notna_f(s), Series) + + +class TestIsNA: + def test_0d_array(self): + assert isna(np.array(np.nan)) + assert not isna(np.array(0.0)) + assert not isna(np.array(0)) + # test object dtype + assert isna(np.array(np.nan, dtype=object)) + assert not isna(np.array(0.0, dtype=object)) + assert not isna(np.array(0, dtype=object)) + + def test_empty_object(self): + + for shape in [(4, 0), (4,)]: + arr = np.empty(shape=shape, dtype=object) + result = isna(arr) + expected = np.ones(shape=shape, dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("isna_f", [isna, isnull]) + def test_isna_isnull(self, isna_f): + assert not isna_f(1.0) + assert isna_f(None) + assert isna_f(np.NaN) + assert float("nan") + assert not isna_f(np.inf) + assert not isna_f(-np.inf) + + # type + assert not isna_f(type(Series(dtype=object))) + assert not isna_f(type(Series(dtype=np.float64))) + assert not isna_f(type(pd.DataFrame())) + + # series + for s in [ + tm.makeFloatSeries(), + tm.makeStringSeries(), + tm.makeObjectSeries(), + tm.makeTimeSeries(), + tm.makePeriodSeries(), + ]: + assert isinstance(isna_f(s), Series) + + # frame + for df in [ + tm.makeTimeDataFrame(), + tm.makePeriodFrame(), + tm.makeMixedDataFrame(), + ]: + result = isna_f(df) + expected = df.apply(isna_f) + tm.assert_frame_equal(result, expected) + + def test_isna_lists(self): + result = isna([[False]]) + exp = np.array([[False]]) + tm.assert_numpy_array_equal(result, exp) + + result = isna([[1], [2]]) + exp = np.array([[False], [False]]) + tm.assert_numpy_array_equal(result, exp) + + # list of strings / unicode + result = isna(["foo", "bar"]) + exp = np.array([False, False]) + tm.assert_numpy_array_equal(result, exp) + + result = isna(["foo", "bar"]) + exp = np.array([False, False]) + tm.assert_numpy_array_equal(result, exp) + + # GH20675 + result = isna([np.NaN, "world"]) + exp = np.array([True, False]) + tm.assert_numpy_array_equal(result, exp) + + def test_isna_nat(self): + result = isna([NaT]) + exp = np.array([True]) + tm.assert_numpy_array_equal(result, exp) + + result = isna(np.array([NaT], dtype=object)) + exp = np.array([True]) + tm.assert_numpy_array_equal(result, exp) + + def test_isna_numpy_nat(self): + arr = np.array( + [ + NaT, + np.datetime64("NaT"), + np.timedelta64("NaT"), + np.datetime64("NaT", "s"), + ] + ) + result = isna(arr) + expected = np.array([True] * 4) + tm.assert_numpy_array_equal(result, expected) + + def test_isna_datetime(self): + assert not isna(datetime.now()) + assert notna(datetime.now()) + + idx = date_range("1/1/1990", periods=20) + exp = np.ones(len(idx), dtype=bool) + tm.assert_numpy_array_equal(notna(idx), exp) + + idx = np.asarray(idx) + idx[0] = iNaT + idx = DatetimeIndex(idx) + mask = isna(idx) + assert mask[0] + exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) + tm.assert_numpy_array_equal(mask, exp) + + # GH 9129 + pidx = idx.to_period(freq="M") + mask = isna(pidx) + assert mask[0] + exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) + tm.assert_numpy_array_equal(mask, exp) + + mask = isna(pidx[1:]) + exp = np.zeros(len(mask), dtype=bool) + tm.assert_numpy_array_equal(mask, exp) + + def test_isna_old_datetimelike(self): + # isna_old should work for dt64tz, td64, and period, not just tznaive + dti = date_range("2016-01-01", periods=3) + dta = dti._data + dta[-1] = NaT + expected = np.array([False, False, True], dtype=bool) + + objs = [dta, dta.tz_localize("US/Eastern"), dta - dta, dta.to_period("D")] + + for obj in objs: + with cf.option_context("mode.use_inf_as_na", True): + result = isna(obj) + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "value, expected", + [ + (np.complex128(np.nan), True), + (np.float64(1), False), + (np.array([1, 1 + 0j, np.nan, 3]), np.array([False, False, True, False])), + ( + np.array([1, 1 + 0j, np.nan, 3], dtype=object), + np.array([False, False, True, False]), + ), + ( + np.array([1, 1 + 0j, np.nan, 3]).astype(object), + np.array([False, False, True, False]), + ), + ], + ) + def test_complex(self, value, expected): + result = isna(value) + if is_scalar(result): + assert result is expected + else: + tm.assert_numpy_array_equal(result, expected) + + def test_datetime_other_units(self): + idx = DatetimeIndex(["2011-01-01", "NaT", "2011-01-02"]) + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx.values), exp) + tm.assert_numpy_array_equal(notna(idx.values), ~exp) + + for dtype in [ + "datetime64[D]", + "datetime64[h]", + "datetime64[m]", + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[ns]", + ]: + values = idx.values.astype(dtype) + + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(values), exp) + tm.assert_numpy_array_equal(notna(values), ~exp) + + exp = Series([False, True, False]) + s = Series(values) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + s = Series(values, dtype=object) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + + def test_timedelta_other_units(self): + idx = TimedeltaIndex(["1 days", "NaT", "2 days"]) + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx.values), exp) + tm.assert_numpy_array_equal(notna(idx.values), ~exp) + + for dtype in [ + "timedelta64[D]", + "timedelta64[h]", + "timedelta64[m]", + "timedelta64[s]", + "timedelta64[ms]", + "timedelta64[us]", + "timedelta64[ns]", + ]: + values = idx.values.astype(dtype) + + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(values), exp) + tm.assert_numpy_array_equal(notna(values), ~exp) + + exp = Series([False, True, False]) + s = Series(values) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + s = Series(values, dtype=object) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + + def test_period(self): + idx = pd.PeriodIndex(["2011-01", "NaT", "2012-01"], freq="M") + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + + exp = Series([False, True, False]) + s = Series(idx) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + s = Series(idx, dtype=object) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + + def test_decimal(self): + # scalars GH#23530 + a = Decimal(1.0) + assert isna(a) is False + assert notna(a) is True + + b = Decimal("NaN") + assert isna(b) is True + assert notna(b) is False + + # array + arr = np.array([a, b]) + expected = np.array([False, True]) + result = isna(arr) + tm.assert_numpy_array_equal(result, expected) + + result = notna(arr) + tm.assert_numpy_array_equal(result, ~expected) + + # series + ser = Series(arr) + expected = Series(expected) + result = isna(ser) + tm.assert_series_equal(result, expected) + + result = notna(ser) + tm.assert_series_equal(result, ~expected) + + # index + idx = pd.Index(arr) + expected = np.array([False, True]) + result = isna(idx) + tm.assert_numpy_array_equal(result, expected) + + result = notna(idx) + tm.assert_numpy_array_equal(result, ~expected) + + +@pytest.mark.parametrize("dtype_equal", [True, False]) +def test_array_equivalent(dtype_equal): + assert array_equivalent( + np.array([np.nan, np.nan]), np.array([np.nan, np.nan]), dtype_equal=dtype_equal + ) + assert array_equivalent( + np.array([np.nan, 1, np.nan]), + np.array([np.nan, 1, np.nan]), + dtype_equal=dtype_equal, + ) + assert array_equivalent( + np.array([np.nan, None], dtype="object"), + np.array([np.nan, None], dtype="object"), + dtype_equal=dtype_equal, + ) + # Check the handling of nested arrays in array_equivalent_object + assert array_equivalent( + np.array([np.array([np.nan, None], dtype="object"), None], dtype="object"), + np.array([np.array([np.nan, None], dtype="object"), None], dtype="object"), + dtype_equal=dtype_equal, + ) + assert array_equivalent( + np.array([np.nan, 1 + 1j], dtype="complex"), + np.array([np.nan, 1 + 1j], dtype="complex"), + dtype_equal=dtype_equal, + ) + assert not array_equivalent( + np.array([np.nan, 1 + 1j], dtype="complex"), + np.array([np.nan, 1 + 2j], dtype="complex"), + dtype_equal=dtype_equal, + ) + assert not array_equivalent( + np.array([np.nan, 1, np.nan]), + np.array([np.nan, 2, np.nan]), + dtype_equal=dtype_equal, + ) + assert not array_equivalent( + np.array(["a", "b", "c", "d"]), np.array(["e", "e"]), dtype_equal=dtype_equal + ) + assert array_equivalent( + Float64Index([0, np.nan]), Float64Index([0, np.nan]), dtype_equal=dtype_equal + ) + assert not array_equivalent( + Float64Index([0, np.nan]), Float64Index([1, np.nan]), dtype_equal=dtype_equal + ) + assert array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]), dtype_equal=dtype_equal + ) + assert not array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan]), dtype_equal=dtype_equal + ) + assert array_equivalent( + TimedeltaIndex([0, np.nan]), + TimedeltaIndex([0, np.nan]), + dtype_equal=dtype_equal, + ) + assert not array_equivalent( + TimedeltaIndex([0, np.nan]), + TimedeltaIndex([1, np.nan]), + dtype_equal=dtype_equal, + ) + assert array_equivalent( + DatetimeIndex([0, np.nan], tz="US/Eastern"), + DatetimeIndex([0, np.nan], tz="US/Eastern"), + dtype_equal=dtype_equal, + ) + assert not array_equivalent( + DatetimeIndex([0, np.nan], tz="US/Eastern"), + DatetimeIndex([1, np.nan], tz="US/Eastern"), + dtype_equal=dtype_equal, + ) + # The rest are not dtype_equal + assert not array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan], tz="US/Eastern") + ) + assert not array_equivalent( + DatetimeIndex([0, np.nan], tz="CET"), + DatetimeIndex([0, np.nan], tz="US/Eastern"), + ) + + assert not array_equivalent(DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) + + +@pytest.mark.parametrize( + "val", [1, 1.1, 1 + 1j, True, "abc", [1, 2], (1, 2), {1, 2}, {"a": 1}, None] +) +def test_array_equivalent_series(val): + arr = np.array([1, 2]) + msg = "elementwise comparison failed" + cm = ( + # stacklevel is chosen to make sense when called from .equals + tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False) + if isinstance(val, str) + else nullcontext() + ) + with cm: + assert not array_equivalent(Series([arr, arr]), Series([arr, val])) + + +def test_array_equivalent_different_dtype_but_equal(): + # Unclear if this is exposed anywhere in the public-facing API + assert array_equivalent(np.array([1, 2]), np.array([1.0, 2.0])) + + +@pytest.mark.parametrize( + "lvalue, rvalue", + [ + # There are 3 variants for each of lvalue and rvalue. We include all + # three for the tz-naive `now` and exclude the datetim64 variant + # for utcnow because it drops tzinfo. + (fix_now, fix_utcnow), + (fix_now.to_datetime64(), fix_utcnow), + (fix_now.to_pydatetime(), fix_utcnow), + (fix_now, fix_utcnow), + (fix_now.to_datetime64(), fix_utcnow.to_pydatetime()), + (fix_now.to_pydatetime(), fix_utcnow.to_pydatetime()), + ], +) +def test_array_equivalent_tzawareness(lvalue, rvalue): + # we shouldn't raise if comparing tzaware and tznaive datetimes + left = np.array([lvalue], dtype=object) + right = np.array([rvalue], dtype=object) + + assert not array_equivalent(left, right, strict_nan=True) + assert not array_equivalent(left, right, strict_nan=False) + + +def test_array_equivalent_compat(): + # see gh-13388 + m = np.array([(1, 2), (3, 4)], dtype=[("a", int), ("b", float)]) + n = np.array([(1, 2), (3, 4)], dtype=[("a", int), ("b", float)]) + assert array_equivalent(m, n, strict_nan=True) + assert array_equivalent(m, n, strict_nan=False) + + m = np.array([(1, 2), (3, 4)], dtype=[("a", int), ("b", float)]) + n = np.array([(1, 2), (4, 3)], dtype=[("a", int), ("b", float)]) + assert not array_equivalent(m, n, strict_nan=True) + assert not array_equivalent(m, n, strict_nan=False) + + m = np.array([(1, 2), (3, 4)], dtype=[("a", int), ("b", float)]) + n = np.array([(1, 2), (3, 4)], dtype=[("b", int), ("a", float)]) + assert not array_equivalent(m, n, strict_nan=True) + assert not array_equivalent(m, n, strict_nan=False) + + +def test_array_equivalent_str(): + for dtype in ["O", "S", "U"]: + assert array_equivalent( + np.array(["A", "B"], dtype=dtype), np.array(["A", "B"], dtype=dtype) + ) + assert not array_equivalent( + np.array(["A", "B"], dtype=dtype), np.array(["A", "X"], dtype=dtype) + ) + + +def test_array_equivalent_nested(): + # reached in groupby aggregations, make sure we use np.any when checking + # if the comparison is truthy + left = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object) + right = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object) + + assert array_equivalent(left, right, strict_nan=True) + assert not array_equivalent(left, right[::-1], strict_nan=True) + + left = np.array([np.array([50, 50, 50]), np.array([40, 40, 40])], dtype=object) + right = np.array([50, 40]) + assert not array_equivalent(left, right, strict_nan=True) + + +@pytest.mark.parametrize( + "dtype, na_value", + [ + # Datetime-like + (np.dtype("M8[ns]"), np.datetime64("NaT", "ns")), + (np.dtype("m8[ns]"), np.timedelta64("NaT", "ns")), + (DatetimeTZDtype.construct_from_string("datetime64[ns, US/Eastern]"), NaT), + (PeriodDtype("M"), NaT), + # Integer + ("u1", 0), + ("u2", 0), + ("u4", 0), + ("u8", 0), + ("i1", 0), + ("i2", 0), + ("i4", 0), + ("i8", 0), + # Bool + ("bool", False), + # Float + ("f2", np.nan), + ("f4", np.nan), + ("f8", np.nan), + # Object + ("O", np.nan), + # Interval + (IntervalDtype(), np.nan), + ], +) +def test_na_value_for_dtype(dtype, na_value): + result = na_value_for_dtype(dtype) + # identify check doesn't work for datetime64/timedelta64("NaT") bc they + # are not singletons + assert result is na_value or ( + isna(result) and isna(na_value) and type(result) is type(na_value) + ) + + +class TestNAObj: + def _check_behavior(self, arr, expected): + result = libmissing.isnaobj(arr) + tm.assert_numpy_array_equal(result, expected) + result = libmissing.isnaobj(arr, inf_as_na=True) + tm.assert_numpy_array_equal(result, expected) + + arr = np.atleast_2d(arr) + expected = np.atleast_2d(expected) + + result = libmissing.isnaobj2d(arr) + tm.assert_numpy_array_equal(result, expected) + result = libmissing.isnaobj2d(arr, inf_as_na=True) + tm.assert_numpy_array_equal(result, expected) + + def test_basic(self): + arr = np.array([1, None, "foo", -5.1, NaT, np.nan]) + expected = np.array([False, True, False, False, True, True]) + + self._check_behavior(arr, expected) + + def test_non_obj_dtype(self): + arr = np.array([1, 3, np.nan, 5], dtype=float) + expected = np.array([False, False, True, False]) + + self._check_behavior(arr, expected) + + def test_empty_arr(self): + arr = np.array([]) + expected = np.array([], dtype=bool) + + self._check_behavior(arr, expected) + + def test_empty_str_inp(self): + arr = np.array([""]) # empty but not na + expected = np.array([False]) + + self._check_behavior(arr, expected) + + def test_empty_like(self): + # see gh-13717: no segfaults! + arr = np.empty_like([None]) + expected = np.array([True]) + + self._check_behavior(arr, expected) + + +m8_units = ["as", "ps", "ns", "us", "ms", "s", "m", "h", "D", "W", "M", "Y"] + +na_vals = ( + [ + None, + NaT, + float("NaN"), + complex("NaN"), + np.nan, + np.float64("NaN"), + np.float32("NaN"), + np.complex64(np.nan), + np.complex128(np.nan), + np.datetime64("NaT"), + np.timedelta64("NaT"), + ] + + [np.datetime64("NaT", unit) for unit in m8_units] + + [np.timedelta64("NaT", unit) for unit in m8_units] +) + +inf_vals = [ + float("inf"), + float("-inf"), + complex("inf"), + complex("-inf"), + np.inf, + np.NINF, +] + +int_na_vals = [ + # Values that match iNaT, which we treat as null in specific cases + np.int64(NaT.value), + int(NaT.value), +] + +sometimes_na_vals = [Decimal("NaN")] + +never_na_vals = [ + # float/complex values that when viewed as int64 match iNaT + -0.0, + np.float64("-0.0"), + -0j, + np.complex64(-0j), +] + + +class TestLibMissing: + @pytest.mark.parametrize("func", [libmissing.checknull, isna]) + def test_checknull(self, func): + for value in na_vals + sometimes_na_vals: + assert func(value) + + for value in inf_vals: + assert not func(value) + + for value in int_na_vals: + assert not func(value) + + for value in never_na_vals: + assert not func(value) + + def test_checknull_old(self): + for value in na_vals + sometimes_na_vals: + assert libmissing.checknull(value, inf_as_na=True) + + for value in inf_vals: + assert libmissing.checknull(value, inf_as_na=True) + + for value in int_na_vals: + assert not libmissing.checknull(value, inf_as_na=True) + + for value in never_na_vals: + assert not libmissing.checknull(value, inf_as_na=True) + + def test_is_matching_na(self, nulls_fixture, nulls_fixture2): + left = nulls_fixture + right = nulls_fixture2 + + assert libmissing.is_matching_na(left, left) + + if left is right: + assert libmissing.is_matching_na(left, right) + elif is_float(left) and is_float(right): + # np.nan vs float("NaN") we consider as matching + assert libmissing.is_matching_na(left, right) + elif type(left) is type(right): + # e.g. both Decimal("NaN") + assert libmissing.is_matching_na(left, right) + else: + assert not libmissing.is_matching_na(left, right) + + def test_is_matching_na_nan_matches_none(self): + + assert not libmissing.is_matching_na(None, np.nan) + assert not libmissing.is_matching_na(np.nan, None) + + assert libmissing.is_matching_na(None, np.nan, nan_matches_none=True) + assert libmissing.is_matching_na(np.nan, None, nan_matches_none=True) + + +class TestIsValidNAForDtype: + def test_is_valid_na_for_dtype_interval(self): + dtype = IntervalDtype("int64", "left") + assert not is_valid_na_for_dtype(NaT, dtype) + + dtype = IntervalDtype("datetime64[ns]", "both") + assert not is_valid_na_for_dtype(NaT, dtype) + + def test_is_valid_na_for_dtype_categorical(self): + dtype = CategoricalDtype(categories=[0, 1, 2]) + assert is_valid_na_for_dtype(np.nan, dtype) + + assert not is_valid_na_for_dtype(NaT, dtype) + assert not is_valid_na_for_dtype(np.datetime64("NaT", "ns"), dtype) + assert not is_valid_na_for_dtype(np.timedelta64("NaT", "ns"), dtype) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ac4cbd7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..30030f1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_boolean.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_boolean.cpython-38.pyc new file mode 100644 index 0000000..e9fe110 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_boolean.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_categorical.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_categorical.cpython-38.pyc new file mode 100644 index 0000000..cd59a94 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_categorical.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_common.cpython-38.pyc new file mode 100644 index 0000000..7401b1f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_datetime.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_datetime.cpython-38.pyc new file mode 100644 index 0000000..6dc223a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_datetime.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_extension.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_extension.cpython-38.pyc new file mode 100644 index 0000000..545d2a0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_extension.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_external_block.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_external_block.cpython-38.pyc new file mode 100644 index 0000000..3786c2d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_external_block.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_floating.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_floating.cpython-38.pyc new file mode 100644 index 0000000..dbf8e80 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_floating.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_integer.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_integer.cpython-38.pyc new file mode 100644 index 0000000..41d8e4e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_integer.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_interval.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_interval.cpython-38.pyc new file mode 100644 index 0000000..a5bfdd7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_interval.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_numpy.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_numpy.cpython-38.pyc new file mode 100644 index 0000000..01021c8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_numpy.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_period.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_period.cpython-38.pyc new file mode 100644 index 0000000..e7780bb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_period.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_sparse.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_sparse.cpython-38.pyc new file mode 100644 index 0000000..1257c55 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_sparse.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_string.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_string.cpython-38.pyc new file mode 100644 index 0000000..85a1346 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/__pycache__/test_string.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..4c2c8d6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/arrays.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/arrays.cpython-38.pyc new file mode 100644 index 0000000..a32dde2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/arrays.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/test_bool.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/test_bool.cpython-38.pyc new file mode 100644 index 0000000..302b4b5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/test_bool.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/test_string.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/test_string.cpython-38.pyc new file mode 100644 index 0000000..f05eb26 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/test_string.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/test_timestamp.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/test_timestamp.cpython-38.pyc new file mode 100644 index 0000000..dd0134c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/__pycache__/test_timestamp.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/arrays.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/arrays.py new file mode 100644 index 0000000..fad28c1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/arrays.py @@ -0,0 +1,225 @@ +""" +Rudimentary Apache Arrow-backed ExtensionArray. + +At the moment, just a boolean array / type is implemented. +Eventually, we'll want to parametrize the type and support +multiple dtypes. Not all methods are implemented yet, and the +current implementation is not efficient. +""" +from __future__ import annotations + +import copy +import itertools +import operator + +import numpy as np +import pyarrow as pa + +from pandas._typing import type_t + +import pandas as pd +from pandas.api.extensions import ( + ExtensionArray, + ExtensionDtype, + register_extension_dtype, + take, +) +from pandas.api.types import is_scalar +from pandas.core.arraylike import OpsMixin +from pandas.core.construction import extract_array + + +@register_extension_dtype +class ArrowBoolDtype(ExtensionDtype): + + type = np.bool_ + kind = "b" + name = "arrow_bool" + na_value = pa.NULL + + @classmethod + def construct_array_type(cls) -> type_t[ArrowBoolArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return ArrowBoolArray + + @property + def _is_boolean(self) -> bool: + return True + + +@register_extension_dtype +class ArrowStringDtype(ExtensionDtype): + + type = str + kind = "U" + name = "arrow_string" + na_value = pa.NULL + + @classmethod + def construct_array_type(cls) -> type_t[ArrowStringArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return ArrowStringArray + + +class ArrowExtensionArray(OpsMixin, ExtensionArray): + _data: pa.ChunkedArray + + @classmethod + def from_scalars(cls, values): + if isinstance(values, cls): + # in particular for empty cases the pa.array(np.asarray(...)) + # does not round-trip + return cls(values._data) + + elif not len(values): + if isinstance(values, list): + dtype = bool if cls is ArrowBoolArray else str + values = np.array([], dtype=dtype) + + arr = pa.chunked_array([pa.array(np.asarray(values))]) + return cls(arr) + + @classmethod + def from_array(cls, arr): + assert isinstance(arr, pa.Array) + return cls(pa.chunked_array([arr])) + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return cls.from_scalars(scalars) + + def __repr__(self): + return f"{type(self).__name__}({repr(self._data)})" + + def __contains__(self, obj) -> bool: + if obj is None or obj is self.dtype.na_value: + # None -> EA.__contains__ only checks for self._dtype.na_value, not + # any compatible NA value. + # self.dtype.na_value -> isn't recognized by pd.isna + return bool(self.isna().any()) + return bool(super().__contains__(obj)) + + def __getitem__(self, item): + if is_scalar(item): + return self._data.to_pandas()[item] + else: + vals = self._data.to_pandas()[item] + return type(self).from_scalars(vals) + + def __len__(self): + return len(self._data) + + def astype(self, dtype, copy=True): + # needed to fix this astype for the Series constructor. + if isinstance(dtype, type(self.dtype)) and dtype == self.dtype: + if copy: + return self.copy() + return self + return super().astype(dtype, copy) + + @property + def dtype(self): + return self._dtype + + def _logical_method(self, other, op): + if not isinstance(other, type(self)): + raise NotImplementedError() + + result = op(np.array(self._data), np.array(other._data)) + return ArrowBoolArray( + pa.chunked_array([pa.array(result, mask=pd.isna(self._data.to_pandas()))]) + ) + + def __eq__(self, other): + if not isinstance(other, type(self)): + # TODO: use some pyarrow function here? + return np.asarray(self).__eq__(other) + + return self._logical_method(other, operator.eq) + + @property + def nbytes(self) -> int: + return sum( + x.size + for chunk in self._data.chunks + for x in chunk.buffers() + if x is not None + ) + + def isna(self): + nas = pd.isna(self._data.to_pandas()) + return type(self).from_scalars(nas) + + def take(self, indices, allow_fill=False, fill_value=None): + data = self._data.to_pandas() + data = extract_array(data, extract_numpy=True) + + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill) + return self._from_sequence(result, dtype=self.dtype) + + def copy(self): + return type(self)(copy.copy(self._data)) + + @classmethod + def _concat_same_type(cls, to_concat): + chunks = list(itertools.chain.from_iterable(x._data.chunks for x in to_concat)) + arr = pa.chunked_array(chunks) + return cls(arr) + + def __invert__(self): + return type(self).from_scalars(~self._data.to_pandas()) + + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + if skipna: + arr = self[~self.isna()] + else: + arr = self + + try: + op = getattr(arr, name) + except AttributeError as err: + raise TypeError from err + return op(**kwargs) + + def any(self, axis=0, out=None): + # Explicitly return a plain bool to reproduce GH-34660 + return bool(self._data.to_pandas().any()) + + def all(self, axis=0, out=None): + # Explicitly return a plain bool to reproduce GH-34660 + return bool(self._data.to_pandas().all()) + + +class ArrowBoolArray(ArrowExtensionArray): + def __init__(self, values): + if not isinstance(values, pa.ChunkedArray): + raise ValueError + + assert values.type == pa.bool_() + self._data = values + self._dtype = ArrowBoolDtype() + + +class ArrowStringArray(ArrowExtensionArray): + def __init__(self, values): + if not isinstance(values, pa.ChunkedArray): + raise ValueError + + assert values.type == pa.string() + self._data = values + self._dtype = ArrowStringDtype() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/test_bool.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/test_bool.py new file mode 100644 index 0000000..a736848 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/test_bool.py @@ -0,0 +1,104 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import is_bool_dtype +from pandas.tests.extension import base + +pytest.importorskip("pyarrow", minversion="1.0.1") + +from pandas.tests.extension.arrow.arrays import ( # isort:skip + ArrowBoolArray, + ArrowBoolDtype, +) + + +@pytest.fixture +def dtype(): + return ArrowBoolDtype() + + +@pytest.fixture +def data(): + values = np.random.randint(0, 2, size=100, dtype=bool) + values[1] = ~values[0] + return ArrowBoolArray.from_scalars(values) + + +@pytest.fixture +def data_missing(): + return ArrowBoolArray.from_scalars([None, True]) + + +def test_basic_equals(data): + # https://github.com/pandas-dev/pandas/issues/34660 + assert pd.Series(data).equals(pd.Series(data)) + + +class BaseArrowTests: + pass + + +class TestDtype(BaseArrowTests, base.BaseDtypeTests): + pass + + +class TestInterface(BaseArrowTests, base.BaseInterfaceTests): + def test_copy(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.copy() + + def test_view(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.view() + + @pytest.mark.xfail( + raises=AssertionError, + reason="Doesn't recognize data._na_value as NA", + ) + def test_contains(self, data, data_missing): + super().test_contains(data, data_missing) + + +class TestConstructors(BaseArrowTests, base.BaseConstructorsTests): + # seems like some bug in isna on empty BoolArray returning floats. + @pytest.mark.xfail(reason="bad is-na for empty data") + def test_from_sequence_from_cls(self, data): + super().test_from_sequence_from_cls(data) + + @pytest.mark.xfail(reason="pa.NULL is not recognised as scalar, GH-33899") + def test_series_constructor_no_data_with_index(self, dtype, na_value): + # pyarrow.lib.ArrowInvalid: only handle 1-dimensional arrays + super().test_series_constructor_no_data_with_index(dtype, na_value) + + @pytest.mark.xfail(reason="pa.NULL is not recognised as scalar, GH-33899") + def test_series_constructor_scalar_na_with_index(self, dtype, na_value): + # pyarrow.lib.ArrowInvalid: only handle 1-dimensional arrays + super().test_series_constructor_scalar_na_with_index(dtype, na_value) + + @pytest.mark.xfail(reason="ufunc 'invert' not supported for the input types") + def test_construct_empty_dataframe(self, dtype): + super().test_construct_empty_dataframe(dtype) + + @pytest.mark.xfail(reason="_from_sequence ignores dtype keyword") + def test_empty(self, dtype): + super().test_empty(dtype) + + +class TestReduce(base.BaseNoReduceTests): + def test_reduce_series_boolean(self): + pass + + +class TestReduceBoolean(base.BaseBooleanReduceTests): + pass + + +def test_is_bool_dtype(data): + assert is_bool_dtype(data) + assert pd.core.common.is_bool_indexer(data) + s = pd.Series(range(len(data))) + result = s[data] + expected = s[np.asarray(data)] + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/test_string.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/test_string.py new file mode 100644 index 0000000..67a6297 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/test_string.py @@ -0,0 +1,12 @@ +import pytest + +import pandas as pd + +pytest.importorskip("pyarrow", minversion="1.0.0") + + +def test_constructor_from_list(): + # GH 27673 + result = pd.Series(["E"], dtype=pd.StringDtype(storage="pyarrow")) + assert isinstance(result.dtype, pd.StringDtype) + assert result.dtype.storage == "pyarrow" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/test_timestamp.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/test_timestamp.py new file mode 100644 index 0000000..fe2c484 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/arrow/test_timestamp.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +import datetime + +import pytest + +from pandas._typing import type_t + +import pandas as pd +from pandas.api.extensions import ( + ExtensionDtype, + register_extension_dtype, +) + +pytest.importorskip("pyarrow", minversion="1.0.1") + +import pyarrow as pa # isort:skip + +from pandas.tests.extension.arrow.arrays import ArrowExtensionArray # isort:skip + + +@register_extension_dtype +class ArrowTimestampUSDtype(ExtensionDtype): + + type = datetime.datetime + kind = "M" + name = "arrow_timestamp_us" + na_value = pa.NULL + + @classmethod + def construct_array_type(cls) -> type_t[ArrowTimestampUSArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return ArrowTimestampUSArray + + +class ArrowTimestampUSArray(ArrowExtensionArray): + def __init__(self, values): + if not isinstance(values, pa.ChunkedArray): + raise ValueError + + assert values.type == pa.timestamp("us") + self._data = values + self._dtype = ArrowTimestampUSDtype() + + +def test_constructor_extensionblock(): + # GH 34986 + pd.DataFrame( + { + "timestamp": ArrowTimestampUSArray.from_scalars( + [None, datetime.datetime(2010, 9, 8, 7, 6, 5, 4)] + ) + } + ) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__init__.py new file mode 100644 index 0000000..571ab3d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__init__.py @@ -0,0 +1,71 @@ +""" +Base test suite for extension arrays. + +These tests are intended for third-party libraries to subclass to validate +that their extension arrays and dtypes satisfy the interface. Moving or +renaming the tests should not be done lightly. + +Libraries are expected to implement a few pytest fixtures to provide data +for the tests. The fixtures may be located in either + +* The same module as your test class. +* A ``conftest.py`` in the same directory as your test class. + +The full list of fixtures may be found in the ``conftest.py`` next to this +file. + +.. code-block:: python + + import pytest + from pandas.tests.extension.base import BaseDtypeTests + + + @pytest.fixture + def dtype(): + return MyDtype() + + + class TestMyDtype(BaseDtypeTests): + pass + + +Your class ``TestDtype`` will inherit all the tests defined on +``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype`` +wherever the test requires it. You're free to implement additional tests. + +All the tests in these modules use ``self.assert_frame_equal`` or +``self.assert_series_equal`` for dataframe or series comparisons. By default, +they use the usual ``pandas.testing.assert_frame_equal`` and +``pandas.testing.assert_series_equal``. You can override the checks used +by defining the staticmethods ``assert_frame_equal`` and +``assert_series_equal`` on your base test class. + +""" +from pandas.tests.extension.base.casting import BaseCastingTests # noqa +from pandas.tests.extension.base.constructors import BaseConstructorsTests # noqa +from pandas.tests.extension.base.dim2 import ( # noqa + Dim2CompatTests, + NDArrayBacked2DTests, +) +from pandas.tests.extension.base.dtype import BaseDtypeTests # noqa +from pandas.tests.extension.base.getitem import BaseGetitemTests # noqa +from pandas.tests.extension.base.groupby import BaseGroupbyTests # noqa +from pandas.tests.extension.base.index import BaseIndexTests # noqa +from pandas.tests.extension.base.interface import BaseInterfaceTests # noqa +from pandas.tests.extension.base.io import BaseParsingTests # noqa +from pandas.tests.extension.base.methods import BaseMethodsTests # noqa +from pandas.tests.extension.base.missing import BaseMissingTests # noqa +from pandas.tests.extension.base.ops import ( # noqa + BaseArithmeticOpsTests, + BaseComparisonOpsTests, + BaseOpsUtil, + BaseUnaryOpsTests, +) +from pandas.tests.extension.base.printing import BasePrintingTests # noqa +from pandas.tests.extension.base.reduce import ( # noqa + BaseBooleanReduceTests, + BaseNoReduceTests, + BaseNumericReduceTests, +) +from pandas.tests.extension.base.reshaping import BaseReshapingTests # noqa +from pandas.tests.extension.base.setitem import BaseSetitemTests # noqa diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..1d783b9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/base.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/base.cpython-38.pyc new file mode 100644 index 0000000..8147e9e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/casting.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/casting.cpython-38.pyc new file mode 100644 index 0000000..16f3c60 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/casting.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/constructors.cpython-38.pyc new file mode 100644 index 0000000..61b7b10 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/dim2.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/dim2.cpython-38.pyc new file mode 100644 index 0000000..6437204 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/dim2.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/dtype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/dtype.cpython-38.pyc new file mode 100644 index 0000000..3eeda11 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/dtype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/getitem.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/getitem.cpython-38.pyc new file mode 100644 index 0000000..b262c57 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/getitem.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/groupby.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/groupby.cpython-38.pyc new file mode 100644 index 0000000..e55c9eb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/groupby.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/index.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/index.cpython-38.pyc new file mode 100644 index 0000000..4760ac6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/index.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/interface.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/interface.cpython-38.pyc new file mode 100644 index 0000000..4cf92d6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/interface.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/io.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/io.cpython-38.pyc new file mode 100644 index 0000000..6bfb325 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/io.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/methods.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/methods.cpython-38.pyc new file mode 100644 index 0000000..6474c72 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/methods.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/missing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/missing.cpython-38.pyc new file mode 100644 index 0000000..fb0014f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/missing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/ops.cpython-38.pyc new file mode 100644 index 0000000..d2c5a26 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/printing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/printing.cpython-38.pyc new file mode 100644 index 0000000..3c45096 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/printing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/reduce.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/reduce.cpython-38.pyc new file mode 100644 index 0000000..2ebde8b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/reduce.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/reshaping.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/reshaping.cpython-38.pyc new file mode 100644 index 0000000..82cca6b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/reshaping.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/setitem.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/setitem.cpython-38.pyc new file mode 100644 index 0000000..724beb4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/__pycache__/setitem.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/base.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/base.py new file mode 100644 index 0000000..97d8e7c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/base.py @@ -0,0 +1,21 @@ +import pandas._testing as tm + + +class BaseExtensionTests: + # classmethod and different signature is needed + # to make inheritance compliant with mypy + @classmethod + def assert_equal(cls, left, right, **kwargs): + return tm.assert_equal(left, right, **kwargs) + + @classmethod + def assert_series_equal(cls, left, right, *args, **kwargs): + return tm.assert_series_equal(left, right, *args, **kwargs) + + @classmethod + def assert_frame_equal(cls, left, right, *args, **kwargs): + return tm.assert_frame_equal(left, right, *args, **kwargs) + + @classmethod + def assert_extension_array_equal(cls, left, right, *args, **kwargs): + return tm.assert_extension_array_equal(left, right, *args, **kwargs) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/casting.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/casting.py new file mode 100644 index 0000000..4987751 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/casting.py @@ -0,0 +1,87 @@ +import numpy as np +import pytest + +from pandas.compat import np_version_under1p20 +import pandas.util._test_decorators as td + +import pandas as pd +from pandas.core.internals import ObjectBlock +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseCastingTests(BaseExtensionTests): + """Casting to and from ExtensionDtypes""" + + def test_astype_object_series(self, all_data): + ser = pd.Series(all_data, name="A") + result = ser.astype(object) + assert result.dtype == np.dtype(object) + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], ObjectBlock) + assert isinstance(result._mgr.array, np.ndarray) + assert result._mgr.array.dtype == np.dtype(object) + + def test_astype_object_frame(self, all_data): + df = pd.DataFrame({"A": all_data}) + + result = df.astype(object) + if hasattr(result._mgr, "blocks"): + blk = result._data.blocks[0] + assert isinstance(blk, ObjectBlock), type(blk) + assert isinstance(result._mgr.arrays[0], np.ndarray) + assert result._mgr.arrays[0].dtype == np.dtype(object) + + # earlier numpy raises TypeError on e.g. np.dtype(np.int64) == "Int64" + # instead of returning False + if not np_version_under1p20: + # check that we can compare the dtypes + comp = result.dtypes == df.dtypes + assert not comp.any() + + def test_tolist(self, data): + result = pd.Series(data).tolist() + expected = list(data) + assert result == expected + + def test_astype_str(self, data): + result = pd.Series(data[:5]).astype(str) + expected = pd.Series([str(x) for x in data[:5]], dtype=str) + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "nullable_string_dtype", + [ + "string[python]", + pytest.param( + "string[pyarrow]", marks=td.skip_if_no("pyarrow", min_version="1.0.0") + ), + ], + ) + def test_astype_string(self, data, nullable_string_dtype): + # GH-33465 + result = pd.Series(data[:5]).astype(nullable_string_dtype) + expected = pd.Series([str(x) for x in data[:5]], dtype=nullable_string_dtype) + self.assert_series_equal(result, expected) + + def test_to_numpy(self, data): + expected = np.asarray(data) + + result = data.to_numpy() + self.assert_equal(result, expected) + + result = pd.Series(data).to_numpy() + self.assert_equal(result, expected) + + def test_astype_empty_dataframe(self, dtype): + # https://github.com/pandas-dev/pandas/issues/33113 + df = pd.DataFrame() + result = df.astype(dtype) + self.assert_frame_equal(result, df) + + @pytest.mark.parametrize("copy", [True, False]) + def test_astype_own_type(self, data, copy): + # ensure that astype returns the original object for equal dtype and copy=False + # https://github.com/pandas-dev/pandas/issues/28488 + result = data.astype(data.dtype, copy=copy) + assert (result is data) is (not copy) + self.assert_extension_array_equal(result, data) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/constructors.py new file mode 100644 index 0000000..b9f8f85 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/constructors.py @@ -0,0 +1,142 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas.api.extensions import ExtensionArray +from pandas.core.internals.blocks import EABackedBlock +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseConstructorsTests(BaseExtensionTests): + def test_from_sequence_from_cls(self, data): + result = type(data)._from_sequence(data, dtype=data.dtype) + self.assert_extension_array_equal(result, data) + + data = data[:0] + result = type(data)._from_sequence(data, dtype=data.dtype) + self.assert_extension_array_equal(result, data) + + def test_array_from_scalars(self, data): + scalars = [data[0], data[1], data[2]] + result = data._from_sequence(scalars) + assert isinstance(result, type(data)) + + def test_series_constructor(self, data): + result = pd.Series(data) + assert result.dtype == data.dtype + assert len(result) == len(data) + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], EABackedBlock) + assert result._mgr.array is data + + # Series[EA] is unboxed / boxed correctly + result2 = pd.Series(result) + assert result2.dtype == data.dtype + if hasattr(result._mgr, "blocks"): + assert isinstance(result2._mgr.blocks[0], EABackedBlock) + + def test_series_constructor_no_data_with_index(self, dtype, na_value): + result = pd.Series(index=[1, 2, 3], dtype=dtype) + expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype) + self.assert_series_equal(result, expected) + + # GH 33559 - empty index + result = pd.Series(index=[], dtype=dtype) + expected = pd.Series([], index=pd.Index([], dtype="object"), dtype=dtype) + self.assert_series_equal(result, expected) + + def test_series_constructor_scalar_na_with_index(self, dtype, na_value): + result = pd.Series(na_value, index=[1, 2, 3], dtype=dtype) + expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype) + self.assert_series_equal(result, expected) + + def test_series_constructor_scalar_with_index(self, data, dtype): + scalar = data[0] + result = pd.Series(scalar, index=[1, 2, 3], dtype=dtype) + expected = pd.Series([scalar] * 3, index=[1, 2, 3], dtype=dtype) + self.assert_series_equal(result, expected) + + result = pd.Series(scalar, index=["foo"], dtype=dtype) + expected = pd.Series([scalar], index=["foo"], dtype=dtype) + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("from_series", [True, False]) + def test_dataframe_constructor_from_dict(self, data, from_series): + if from_series: + data = pd.Series(data) + result = pd.DataFrame({"A": data}) + assert result.dtypes["A"] == data.dtype + assert result.shape == (len(data), 1) + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], EABackedBlock) + assert isinstance(result._mgr.arrays[0], ExtensionArray) + + def test_dataframe_from_series(self, data): + result = pd.DataFrame(pd.Series(data)) + assert result.dtypes[0] == data.dtype + assert result.shape == (len(data), 1) + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], EABackedBlock) + assert isinstance(result._mgr.arrays[0], ExtensionArray) + + def test_series_given_mismatched_index_raises(self, data): + msg = r"Length of values \(3\) does not match length of index \(5\)" + with pytest.raises(ValueError, match=msg): + pd.Series(data[:3], index=[0, 1, 2, 3, 4]) + + def test_from_dtype(self, data): + # construct from our dtype & string dtype + dtype = data.dtype + + expected = pd.Series(data) + result = pd.Series(list(data), dtype=dtype) + self.assert_series_equal(result, expected) + + result = pd.Series(list(data), dtype=str(dtype)) + self.assert_series_equal(result, expected) + + # gh-30280 + + expected = pd.DataFrame(data).astype(dtype) + result = pd.DataFrame(list(data), dtype=dtype) + self.assert_frame_equal(result, expected) + + result = pd.DataFrame(list(data), dtype=str(dtype)) + self.assert_frame_equal(result, expected) + + def test_pandas_array(self, data): + # pd.array(extension_array) should be idempotent... + result = pd.array(data) + self.assert_extension_array_equal(result, data) + + def test_pandas_array_dtype(self, data): + # ... but specifying dtype will override idempotency + result = pd.array(data, dtype=np.dtype(object)) + expected = pd.arrays.PandasArray(np.asarray(data, dtype=object)) + self.assert_equal(result, expected) + + def test_construct_empty_dataframe(self, dtype): + # GH 33623 + result = pd.DataFrame(columns=["a"], dtype=dtype) + expected = pd.DataFrame( + {"a": pd.array([], dtype=dtype)}, index=pd.Index([], dtype="object") + ) + self.assert_frame_equal(result, expected) + + def test_empty(self, dtype): + cls = dtype.construct_array_type() + result = cls._empty((4,), dtype=dtype) + assert isinstance(result, cls) + assert result.dtype == dtype + assert result.shape == (4,) + + # GH#19600 method on ExtensionDtype + result2 = dtype.empty((4,)) + assert isinstance(result2, cls) + assert result2.dtype == dtype + assert result2.shape == (4,) + + result2 = dtype.empty(4) + assert isinstance(result2, cls) + assert result2.dtype == dtype + assert result2.shape == (4,) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/dim2.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/dim2.py new file mode 100644 index 0000000..f71f3cf --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/dim2.py @@ -0,0 +1,301 @@ +""" +Tests for 2D compatibility. +""" +import numpy as np +import pytest + +from pandas._libs.missing import is_matching_na + +import pandas as pd +from pandas.core.arrays.integer import INT_STR_TO_DTYPE +from pandas.tests.extension.base.base import BaseExtensionTests + + +class Dim2CompatTests(BaseExtensionTests): + def test_transpose(self, data): + arr2d = data.repeat(2).reshape(-1, 2) + shape = arr2d.shape + assert shape[0] != shape[-1] # otherwise the rest of the test is useless + + assert arr2d.T.shape == shape[::-1] + + def test_frame_from_2d_array(self, data): + arr2d = data.repeat(2).reshape(-1, 2) + + df = pd.DataFrame(arr2d) + expected = pd.DataFrame({0: arr2d[:, 0], 1: arr2d[:, 1]}) + self.assert_frame_equal(df, expected) + + def test_swapaxes(self, data): + arr2d = data.repeat(2).reshape(-1, 2) + + result = arr2d.swapaxes(0, 1) + expected = arr2d.T + self.assert_extension_array_equal(result, expected) + + def test_delete_2d(self, data): + arr2d = data.repeat(3).reshape(-1, 3) + + # axis = 0 + result = arr2d.delete(1, axis=0) + expected = data.delete(1).repeat(3).reshape(-1, 3) + self.assert_extension_array_equal(result, expected) + + # axis = 1 + result = arr2d.delete(1, axis=1) + expected = data.repeat(2).reshape(-1, 2) + self.assert_extension_array_equal(result, expected) + + def test_take_2d(self, data): + arr2d = data.reshape(-1, 1) + + result = arr2d.take([0, 0, -1], axis=0) + + expected = data.take([0, 0, -1]).reshape(-1, 1) + self.assert_extension_array_equal(result, expected) + + def test_repr_2d(self, data): + # this could fail in a corner case where an element contained the name + res = repr(data.reshape(1, -1)) + assert res.count(f"<{type(data).__name__}") == 1 + + res = repr(data.reshape(-1, 1)) + assert res.count(f"<{type(data).__name__}") == 1 + + def test_reshape(self, data): + arr2d = data.reshape(-1, 1) + assert arr2d.shape == (data.size, 1) + assert len(arr2d) == len(data) + + arr2d = data.reshape((-1, 1)) + assert arr2d.shape == (data.size, 1) + assert len(arr2d) == len(data) + + with pytest.raises(ValueError): + data.reshape((data.size, 2)) + with pytest.raises(ValueError): + data.reshape(data.size, 2) + + def test_getitem_2d(self, data): + arr2d = data.reshape(1, -1) + + result = arr2d[0] + self.assert_extension_array_equal(result, data) + + with pytest.raises(IndexError): + arr2d[1] + + with pytest.raises(IndexError): + arr2d[-2] + + result = arr2d[:] + self.assert_extension_array_equal(result, arr2d) + + result = arr2d[:, :] + self.assert_extension_array_equal(result, arr2d) + + result = arr2d[:, 0] + expected = data[[0]] + self.assert_extension_array_equal(result, expected) + + # dimension-expanding getitem on 1D + result = data[:, np.newaxis] + self.assert_extension_array_equal(result, arr2d.T) + + def test_iter_2d(self, data): + arr2d = data.reshape(1, -1) + + objs = list(iter(arr2d)) + assert len(objs) == arr2d.shape[0] + + for obj in objs: + assert isinstance(obj, type(data)) + assert obj.dtype == data.dtype + assert obj.ndim == 1 + assert len(obj) == arr2d.shape[1] + + def test_tolist_2d(self, data): + arr2d = data.reshape(1, -1) + + result = arr2d.tolist() + expected = [data.tolist()] + + assert isinstance(result, list) + assert all(isinstance(x, list) for x in result) + + assert result == expected + + def test_concat_2d(self, data): + left = type(data)._concat_same_type([data, data]).reshape(-1, 2) + right = left.copy() + + # axis=0 + result = left._concat_same_type([left, right], axis=0) + expected = data._concat_same_type([data] * 4).reshape(-1, 2) + self.assert_extension_array_equal(result, expected) + + # axis=1 + result = left._concat_same_type([left, right], axis=1) + assert result.shape == (len(data), 4) + self.assert_extension_array_equal(result[:, :2], left) + self.assert_extension_array_equal(result[:, 2:], right) + + # axis > 1 -> invalid + msg = "axis 2 is out of bounds for array of dimension 2" + with pytest.raises(ValueError, match=msg): + left._concat_same_type([left, right], axis=2) + + @pytest.mark.parametrize("method", ["backfill", "pad"]) + def test_fillna_2d_method(self, data_missing, method): + arr = data_missing.repeat(2).reshape(2, 2) + assert arr[0].isna().all() + assert not arr[1].isna().any() + + result = arr.fillna(method=method) + + expected = data_missing.fillna(method=method).repeat(2).reshape(2, 2) + self.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"]) + def test_reductions_2d_axis_none(self, data, method): + arr2d = data.reshape(1, -1) + + err_expected = None + err_result = None + try: + expected = getattr(data, method)() + except Exception as err: + # if the 1D reduction is invalid, the 2D reduction should be as well + err_expected = err + try: + result = getattr(arr2d, method)(axis=None) + except Exception as err2: + err_result = err2 + + else: + result = getattr(arr2d, method)(axis=None) + + if err_result is not None or err_expected is not None: + assert type(err_result) == type(err_expected) + return + + assert is_matching_na(result, expected) or result == expected + + @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"]) + def test_reductions_2d_axis0(self, data, method): + arr2d = data.reshape(1, -1) + + kwargs = {} + if method == "std": + # pass ddof=0 so we get all-zero std instead of all-NA std + kwargs["ddof"] = 0 + + try: + result = getattr(arr2d, method)(axis=0, **kwargs) + except Exception as err: + try: + getattr(data, method)() + except Exception as err2: + assert type(err) == type(err2) + return + else: + raise AssertionError("Both reductions should raise or neither") + + def get_reduction_result_dtype(dtype): + # windows and 32bit builds will in some cases have int32/uint32 + # where other builds will have int64/uint64. + if dtype.itemsize == 8: + return dtype + elif dtype.kind in "ib": + return INT_STR_TO_DTYPE[np.dtype(int).name] + else: + # i.e. dtype.kind == "u" + return INT_STR_TO_DTYPE[np.dtype(np.uint).name] + + if method in ["mean", "median", "sum", "prod"]: + # std and var are not dtype-preserving + expected = data + if method in ["sum", "prod"] and data.dtype.kind in "iub": + dtype = get_reduction_result_dtype(data.dtype) + + expected = data.astype(dtype) + if data.dtype.kind == "b" and method in ["sum", "prod"]: + # We get IntegerArray instead of BooleanArray + pass + else: + assert type(expected) == type(data), type(expected) + assert dtype == expected.dtype + + self.assert_extension_array_equal(result, expected) + elif method == "std": + self.assert_extension_array_equal(result, data - data) + # punt on method == "var" + + @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"]) + def test_reductions_2d_axis1(self, data, method): + arr2d = data.reshape(1, -1) + + try: + result = getattr(arr2d, method)(axis=1) + except Exception as err: + try: + getattr(data, method)() + except Exception as err2: + assert type(err) == type(err2) + return + else: + raise AssertionError("Both reductions should raise or neither") + + # not necessarily type/dtype-preserving, so weaker assertions + assert result.shape == (1,) + expected_scalar = getattr(data, method)() + res = result[0] + assert is_matching_na(res, expected_scalar) or res == expected_scalar + + +class NDArrayBacked2DTests(Dim2CompatTests): + # More specific tests for NDArrayBackedExtensionArray subclasses + + def test_copy_order(self, data): + # We should be matching numpy semantics for the "order" keyword in 'copy' + arr2d = data.repeat(2).reshape(-1, 2) + assert arr2d._ndarray.flags["C_CONTIGUOUS"] + + res = arr2d.copy() + assert res._ndarray.flags["C_CONTIGUOUS"] + + res = arr2d[::2, ::2].copy() + assert res._ndarray.flags["C_CONTIGUOUS"] + + res = arr2d.copy("F") + assert not res._ndarray.flags["C_CONTIGUOUS"] + assert res._ndarray.flags["F_CONTIGUOUS"] + + res = arr2d.copy("K") + assert res._ndarray.flags["C_CONTIGUOUS"] + + res = arr2d.T.copy("K") + assert not res._ndarray.flags["C_CONTIGUOUS"] + assert res._ndarray.flags["F_CONTIGUOUS"] + + # order not accepted by numpy + msg = r"order must be one of 'C', 'F', 'A', or 'K' \(got 'Q'\)" + with pytest.raises(ValueError, match=msg): + arr2d.copy("Q") + + # neither contiguity + arr_nc = arr2d[::2] + assert not arr_nc._ndarray.flags["C_CONTIGUOUS"] + assert not arr_nc._ndarray.flags["F_CONTIGUOUS"] + + assert arr_nc.copy()._ndarray.flags["C_CONTIGUOUS"] + assert not arr_nc.copy()._ndarray.flags["F_CONTIGUOUS"] + + assert arr_nc.copy("C")._ndarray.flags["C_CONTIGUOUS"] + assert not arr_nc.copy("C")._ndarray.flags["F_CONTIGUOUS"] + + assert not arr_nc.copy("F")._ndarray.flags["C_CONTIGUOUS"] + assert arr_nc.copy("F")._ndarray.flags["F_CONTIGUOUS"] + + assert arr_nc.copy("K")._ndarray.flags["C_CONTIGUOUS"] + assert not arr_nc.copy("K")._ndarray.flags["F_CONTIGUOUS"] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/dtype.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/dtype.py new file mode 100644 index 0000000..ea44430 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/dtype.py @@ -0,0 +1,137 @@ +import warnings + +import numpy as np +import pytest + +import pandas as pd +from pandas.api.types import ( + infer_dtype, + is_object_dtype, + is_string_dtype, +) +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseDtypeTests(BaseExtensionTests): + """Base class for ExtensionDtype classes""" + + def test_name(self, dtype): + assert isinstance(dtype.name, str) + + def test_kind(self, dtype): + valid = set("biufcmMOSUV") + assert dtype.kind in valid + + def test_construct_from_string_own_name(self, dtype): + result = dtype.construct_from_string(dtype.name) + assert type(result) is type(dtype) + + # check OK as classmethod + result = type(dtype).construct_from_string(dtype.name) + assert type(result) is type(dtype) + + def test_is_dtype_from_name(self, dtype): + result = type(dtype).is_dtype(dtype.name) + assert result is True + + def test_is_dtype_unboxes_dtype(self, data, dtype): + assert dtype.is_dtype(data) is True + + def test_is_dtype_from_self(self, dtype): + result = type(dtype).is_dtype(dtype) + assert result is True + + def test_is_dtype_other_input(self, dtype): + assert dtype.is_dtype([1, 2, 3]) is False + + def test_is_not_string_type(self, dtype): + return not is_string_dtype(dtype) + + def test_is_not_object_type(self, dtype): + return not is_object_dtype(dtype) + + def test_eq_with_str(self, dtype): + assert dtype == dtype.name + assert dtype != dtype.name + "-suffix" + + def test_eq_with_numpy_object(self, dtype): + assert dtype != np.dtype("object") + + def test_eq_with_self(self, dtype): + assert dtype == dtype + assert dtype != object() + + def test_array_type(self, data, dtype): + assert dtype.construct_array_type() is type(data) + + def test_check_dtype(self, data): + dtype = data.dtype + + # check equivalency for using .dtypes + df = pd.DataFrame( + {"A": pd.Series(data, dtype=dtype), "B": data, "C": "foo", "D": 1} + ) + + # TODO(numpy-1.20): This warnings filter and if block can be removed + # once we require numpy>=1.20 + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + result = df.dtypes == str(dtype) + # NumPy>=1.20.0, but not pandas.compat.numpy till there + # is a wheel available with this change. + try: + new_numpy_behavior = np.dtype("int64") != "Int64" + except TypeError: + new_numpy_behavior = True + + if dtype.name == "Int64" and not new_numpy_behavior: + expected = pd.Series([True, True, False, True], index=list("ABCD")) + else: + expected = pd.Series([True, True, False, False], index=list("ABCD")) + + self.assert_series_equal(result, expected) + + expected = pd.Series([True, True, False, False], index=list("ABCD")) + result = df.dtypes.apply(str) == str(dtype) + self.assert_series_equal(result, expected) + + def test_hashable(self, dtype): + hash(dtype) # no error + + def test_str(self, dtype): + assert str(dtype) == dtype.name + + def test_eq(self, dtype): + assert dtype == dtype.name + assert dtype != "anonther_type" + + def test_construct_from_string(self, dtype): + dtype_instance = type(dtype).construct_from_string(dtype.name) + assert isinstance(dtype_instance, type(dtype)) + + def test_construct_from_string_another_type_raises(self, dtype): + msg = f"Cannot construct a '{type(dtype).__name__}' from 'another_type'" + with pytest.raises(TypeError, match=msg): + type(dtype).construct_from_string("another_type") + + def test_construct_from_string_wrong_type_raises(self, dtype): + with pytest.raises( + TypeError, + match="'construct_from_string' expects a string, got ", + ): + type(dtype).construct_from_string(0) + + def test_get_common_dtype(self, dtype): + # in practice we will not typically call this with a 1-length list + # (we shortcut to just use that dtype as the common dtype), but + # still testing as good practice to have this working (and it is the + # only case we can test in general) + assert dtype._get_common_dtype([dtype]) == dtype + + @pytest.mark.parametrize("skipna", [True, False]) + def test_infer_dtype(self, data, data_missing, skipna): + # only testing that this works without raising an error + res = infer_dtype(data, skipna=skipna) + assert isinstance(res, str) + res = infer_dtype(data_missing, skipna=skipna) + assert isinstance(res, str) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/getitem.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/getitem.py new file mode 100644 index 0000000..71e1e61 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/getitem.py @@ -0,0 +1,486 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseGetitemTests(BaseExtensionTests): + """Tests for ExtensionArray.__getitem__.""" + + def test_iloc_series(self, data): + ser = pd.Series(data) + result = ser.iloc[:4] + expected = pd.Series(data[:4]) + self.assert_series_equal(result, expected) + + result = ser.iloc[[0, 1, 2, 3]] + self.assert_series_equal(result, expected) + + def test_iloc_frame(self, data): + df = pd.DataFrame({"A": data, "B": np.arange(len(data), dtype="int64")}) + expected = pd.DataFrame({"A": data[:4]}) + + # slice -> frame + result = df.iloc[:4, [0]] + self.assert_frame_equal(result, expected) + + # sequence -> frame + result = df.iloc[[0, 1, 2, 3], [0]] + self.assert_frame_equal(result, expected) + + expected = pd.Series(data[:4], name="A") + + # slice -> series + result = df.iloc[:4, 0] + self.assert_series_equal(result, expected) + + # sequence -> series + result = df.iloc[:4, 0] + self.assert_series_equal(result, expected) + + # GH#32959 slice columns with step + result = df.iloc[:, ::2] + self.assert_frame_equal(result, df[["A"]]) + result = df[["B", "A"]].iloc[:, ::2] + self.assert_frame_equal(result, df[["B"]]) + + def test_iloc_frame_single_block(self, data): + # GH#32959 null slice along index, slice along columns with single-block + df = pd.DataFrame({"A": data}) + + result = df.iloc[:, :] + self.assert_frame_equal(result, df) + + result = df.iloc[:, :1] + self.assert_frame_equal(result, df) + + result = df.iloc[:, :2] + self.assert_frame_equal(result, df) + + result = df.iloc[:, ::2] + self.assert_frame_equal(result, df) + + result = df.iloc[:, 1:2] + self.assert_frame_equal(result, df.iloc[:, :0]) + + result = df.iloc[:, -1:] + self.assert_frame_equal(result, df) + + def test_loc_series(self, data): + ser = pd.Series(data) + result = ser.loc[:3] + expected = pd.Series(data[:4]) + self.assert_series_equal(result, expected) + + result = ser.loc[[0, 1, 2, 3]] + self.assert_series_equal(result, expected) + + def test_loc_frame(self, data): + df = pd.DataFrame({"A": data, "B": np.arange(len(data), dtype="int64")}) + expected = pd.DataFrame({"A": data[:4]}) + + # slice -> frame + result = df.loc[:3, ["A"]] + self.assert_frame_equal(result, expected) + + # sequence -> frame + result = df.loc[[0, 1, 2, 3], ["A"]] + self.assert_frame_equal(result, expected) + + expected = pd.Series(data[:4], name="A") + + # slice -> series + result = df.loc[:3, "A"] + self.assert_series_equal(result, expected) + + # sequence -> series + result = df.loc[:3, "A"] + self.assert_series_equal(result, expected) + + def test_loc_iloc_frame_single_dtype(self, data): + # GH#27110 bug in ExtensionBlock.iget caused df.iloc[n] to incorrectly + # return a scalar + df = pd.DataFrame({"A": data}) + expected = pd.Series([data[2]], index=["A"], name=2, dtype=data.dtype) + + result = df.loc[2] + self.assert_series_equal(result, expected) + + expected = pd.Series( + [data[-1]], index=["A"], name=len(data) - 1, dtype=data.dtype + ) + result = df.iloc[-1] + self.assert_series_equal(result, expected) + + def test_getitem_scalar(self, data): + result = data[0] + assert isinstance(result, data.dtype.type) + + result = pd.Series(data)[0] + assert isinstance(result, data.dtype.type) + + def test_getitem_invalid(self, data): + # TODO: box over scalar, [scalar], (scalar,)? + + msg = ( + r"only integers, slices \(`:`\), ellipsis \(`...`\), numpy.newaxis " + r"\(`None`\) and integer or boolean arrays are valid indices" + ) + with pytest.raises(IndexError, match=msg): + data["foo"] + with pytest.raises(IndexError, match=msg): + data[2.5] + + ub = len(data) + msg = "|".join( + [ + "list index out of range", # json + "index out of bounds", # pyarrow + "Out of bounds access", # Sparse + f"loc must be an integer between -{ub} and {ub}", # Sparse + f"index {ub+1} is out of bounds for axis 0 with size {ub}", + f"index -{ub+1} is out of bounds for axis 0 with size {ub}", + ] + ) + with pytest.raises(IndexError, match=msg): + data[ub + 1] + with pytest.raises(IndexError, match=msg): + data[-ub - 1] + + def test_getitem_scalar_na(self, data_missing, na_cmp, na_value): + result = data_missing[0] + assert na_cmp(result, na_value) + + def test_getitem_empty(self, data): + # Indexing with empty list + result = data[[]] + assert len(result) == 0 + assert isinstance(result, type(data)) + + expected = data[np.array([], dtype="int64")] + self.assert_extension_array_equal(result, expected) + + def test_getitem_mask(self, data): + # Empty mask, raw array + mask = np.zeros(len(data), dtype=bool) + result = data[mask] + assert len(result) == 0 + assert isinstance(result, type(data)) + + # Empty mask, in series + mask = np.zeros(len(data), dtype=bool) + result = pd.Series(data)[mask] + assert len(result) == 0 + assert result.dtype == data.dtype + + # non-empty mask, raw array + mask[0] = True + result = data[mask] + assert len(result) == 1 + assert isinstance(result, type(data)) + + # non-empty mask, in series + result = pd.Series(data)[mask] + assert len(result) == 1 + assert result.dtype == data.dtype + + def test_getitem_mask_raises(self, data): + mask = np.array([True, False]) + msg = f"Boolean index has wrong length: 2 instead of {len(data)}" + with pytest.raises(IndexError, match=msg): + data[mask] + + mask = pd.array(mask, dtype="boolean") + with pytest.raises(IndexError, match=msg): + data[mask] + + def test_getitem_boolean_array_mask(self, data): + mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") + result = data[mask] + assert len(result) == 0 + assert isinstance(result, type(data)) + + result = pd.Series(data)[mask] + assert len(result) == 0 + assert result.dtype == data.dtype + + mask[:5] = True + expected = data.take([0, 1, 2, 3, 4]) + result = data[mask] + self.assert_extension_array_equal(result, expected) + + expected = pd.Series(expected) + result = pd.Series(data)[mask] + self.assert_series_equal(result, expected) + + def test_getitem_boolean_na_treated_as_false(self, data): + # https://github.com/pandas-dev/pandas/issues/31503 + mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") + mask[:2] = pd.NA + mask[2:4] = True + + result = data[mask] + expected = data[mask.fillna(False)] + + self.assert_extension_array_equal(result, expected) + + s = pd.Series(data) + + result = s[mask] + expected = s[mask.fillna(False)] + + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "idx", + [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_getitem_integer_array(self, data, idx): + result = data[idx] + assert len(result) == 3 + assert isinstance(result, type(data)) + expected = data.take([0, 1, 2]) + self.assert_extension_array_equal(result, expected) + + expected = pd.Series(expected) + result = pd.Series(data)[idx] + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "idx", + [[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")], + ids=["list", "integer-array"], + ) + def test_getitem_integer_with_missing_raises(self, data, idx): + msg = "Cannot index with an integer indexer containing NA values" + with pytest.raises(ValueError, match=msg): + data[idx] + + @pytest.mark.xfail( + reason="Tries label-based and raises KeyError; " + "in some cases raises when calling np.asarray" + ) + @pytest.mark.parametrize( + "idx", + [[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")], + ids=["list", "integer-array"], + ) + def test_getitem_series_integer_with_missing_raises(self, data, idx): + msg = "Cannot index with an integer indexer containing NA values" + # TODO: this raises KeyError about labels not found (it tries label-based) + + ser = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))]) + with pytest.raises(ValueError, match=msg): + ser[idx] + + def test_getitem_slice(self, data): + # getitem[slice] should return an array + result = data[slice(0)] # empty + assert isinstance(result, type(data)) + + result = data[slice(1)] # scalar + assert isinstance(result, type(data)) + + def test_getitem_ellipsis_and_slice(self, data): + # GH#40353 this is called from getitem_block_index + result = data[..., :] + self.assert_extension_array_equal(result, data) + + result = data[:, ...] + self.assert_extension_array_equal(result, data) + + result = data[..., :3] + self.assert_extension_array_equal(result, data[:3]) + + result = data[:3, ...] + self.assert_extension_array_equal(result, data[:3]) + + result = data[..., ::2] + self.assert_extension_array_equal(result, data[::2]) + + result = data[::2, ...] + self.assert_extension_array_equal(result, data[::2]) + + def test_get(self, data): + # GH 20882 + s = pd.Series(data, index=[2 * i for i in range(len(data))]) + assert s.get(4) == s.iloc[2] + + result = s.get([4, 6]) + expected = s.iloc[[2, 3]] + self.assert_series_equal(result, expected) + + result = s.get(slice(2)) + expected = s.iloc[[0, 1]] + self.assert_series_equal(result, expected) + + assert s.get(-1) is None + assert s.get(s.index.max() + 1) is None + + s = pd.Series(data[:6], index=list("abcdef")) + assert s.get("c") == s.iloc[2] + + result = s.get(slice("b", "d")) + expected = s.iloc[[1, 2, 3]] + self.assert_series_equal(result, expected) + + result = s.get("Z") + assert result is None + + assert s.get(4) == s.iloc[4] + assert s.get(-1) == s.iloc[-1] + assert s.get(len(s)) is None + + # GH 21257 + s = pd.Series(data) + s2 = s[::2] + assert s2.get(1) is None + + def test_take_sequence(self, data): + result = pd.Series(data)[[0, 1, 3]] + assert result.iloc[0] == data[0] + assert result.iloc[1] == data[1] + assert result.iloc[2] == data[3] + + def test_take(self, data, na_value, na_cmp): + result = data.take([0, -1]) + assert result.dtype == data.dtype + assert result[0] == data[0] + assert result[1] == data[-1] + + result = data.take([0, -1], allow_fill=True, fill_value=na_value) + assert result[0] == data[0] + assert na_cmp(result[1], na_value) + + with pytest.raises(IndexError, match="out of bounds"): + data.take([len(data) + 1]) + + def test_take_empty(self, data, na_value, na_cmp): + empty = data[:0] + + result = empty.take([-1], allow_fill=True) + assert na_cmp(result[0], na_value) + + msg = "cannot do a non-empty take from an empty axes|out of bounds" + + with pytest.raises(IndexError, match=msg): + empty.take([-1]) + + with pytest.raises(IndexError, match="cannot do a non-empty take"): + empty.take([0, 1]) + + def test_take_negative(self, data): + # https://github.com/pandas-dev/pandas/issues/20640 + n = len(data) + result = data.take([0, -n, n - 1, -1]) + expected = data.take([0, 0, n - 1, n - 1]) + self.assert_extension_array_equal(result, expected) + + def test_take_non_na_fill_value(self, data_missing): + fill_value = data_missing[1] # valid + na = data_missing[0] + + arr = data_missing._from_sequence( + [na, fill_value, na], dtype=data_missing.dtype + ) + result = arr.take([-1, 1], fill_value=fill_value, allow_fill=True) + expected = arr.take([1, 1]) + self.assert_extension_array_equal(result, expected) + + def test_take_pandas_style_negative_raises(self, data, na_value): + with pytest.raises(ValueError, match=""): + data.take([0, -2], fill_value=na_value, allow_fill=True) + + @pytest.mark.parametrize("allow_fill", [True, False]) + def test_take_out_of_bounds_raises(self, data, allow_fill): + arr = data[:3] + + with pytest.raises(IndexError, match="out of bounds|out-of-bounds"): + arr.take(np.asarray([0, 3]), allow_fill=allow_fill) + + def test_take_series(self, data): + s = pd.Series(data) + result = s.take([0, -1]) + expected = pd.Series( + data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype), + index=[0, len(data) - 1], + ) + self.assert_series_equal(result, expected) + + def test_reindex(self, data, na_value): + s = pd.Series(data) + result = s.reindex([0, 1, 3]) + expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3]) + self.assert_series_equal(result, expected) + + n = len(data) + result = s.reindex([-1, 0, n]) + expected = pd.Series( + data._from_sequence([na_value, data[0], na_value], dtype=s.dtype), + index=[-1, 0, n], + ) + self.assert_series_equal(result, expected) + + result = s.reindex([n, n + 1]) + expected = pd.Series( + data._from_sequence([na_value, na_value], dtype=s.dtype), index=[n, n + 1] + ) + self.assert_series_equal(result, expected) + + def test_reindex_non_na_fill_value(self, data_missing): + valid = data_missing[1] + na = data_missing[0] + + arr = data_missing._from_sequence([na, valid], dtype=data_missing.dtype) + ser = pd.Series(arr) + result = ser.reindex([0, 1, 2], fill_value=valid) + expected = pd.Series( + data_missing._from_sequence([na, valid, valid], dtype=data_missing.dtype) + ) + + self.assert_series_equal(result, expected) + + def test_loc_len1(self, data): + # see GH-27785 take_nd with indexer of len 1 resulting in wrong ndim + df = pd.DataFrame({"A": data}) + res = df.loc[[0], "A"] + assert res.ndim == 1 + assert res._mgr.arrays[0].ndim == 1 + if hasattr(res._mgr, "blocks"): + assert res._mgr._block.ndim == 1 + + def test_item(self, data): + # https://github.com/pandas-dev/pandas/pull/30175 + s = pd.Series(data) + result = s[:1].item() + assert result == data[0] + + msg = "can only convert an array of size 1 to a Python scalar" + with pytest.raises(ValueError, match=msg): + s[:0].item() + + with pytest.raises(ValueError, match=msg): + s.item() + + def test_ellipsis_index(self): + # GH42430 1D slices over extension types turn into N-dimensional slices over + # ExtensionArrays + class CapturingStringArray(pd.arrays.StringArray): + """Extend StringArray to capture arguments to __getitem__""" + + def __getitem__(self, item): + self.last_item_arg = item + return super().__getitem__(item) + + df = pd.DataFrame( + {"col1": CapturingStringArray(np.array(["hello", "world"], dtype=object))} + ) + _ = df.iloc[:1] + + # String comparison because there's no native way to compare slices. + # Before the fix for GH42430, last_item_arg would get set to the 2D slice + # (Ellipsis, slice(None, 1, None)) + self.assert_equal(str(df["col1"].array.last_item_arg), "slice(None, 1, None)") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/groupby.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/groupby.py new file mode 100644 index 0000000..db0190d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/groupby.py @@ -0,0 +1,106 @@ +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseGroupbyTests(BaseExtensionTests): + """Groupby-specific tests.""" + + def test_grouping_grouper(self, data_for_grouping): + df = pd.DataFrame( + {"A": ["B", "B", None, None, "A", "A", "B", "C"], "B": data_for_grouping} + ) + gr1 = df.groupby("A").grouper.groupings[0] + gr2 = df.groupby("B").grouper.groupings[0] + + tm.assert_numpy_array_equal(gr1.grouping_vector, df.A.values) + tm.assert_extension_array_equal(gr2.grouping_vector, data_for_grouping) + + @pytest.mark.parametrize("as_index", [True, False]) + def test_groupby_extension_agg(self, as_index, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + result = df.groupby("B", as_index=as_index).A.mean() + _, uniques = pd.factorize(data_for_grouping, sort=True) + + if as_index: + index = pd.Index._with_infer(uniques, name="B") + expected = pd.Series([3.0, 1.0, 4.0], index=index, name="A") + self.assert_series_equal(result, expected) + else: + expected = pd.DataFrame({"B": uniques, "A": [3.0, 1.0, 4.0]}) + self.assert_frame_equal(result, expected) + + def test_groupby_agg_extension(self, data_for_grouping): + # GH#38980 groupby agg on extension type fails for non-numeric types + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + + expected = df.iloc[[0, 2, 4, 7]] + expected = expected.set_index("A") + + result = df.groupby("A").agg({"B": "first"}) + self.assert_frame_equal(result, expected) + + result = df.groupby("A").agg("first") + self.assert_frame_equal(result, expected) + + result = df.groupby("A").first() + self.assert_frame_equal(result, expected) + + def test_groupby_extension_no_sort(self, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + result = df.groupby("B", sort=False).A.mean() + _, index = pd.factorize(data_for_grouping, sort=False) + + index = pd.Index._with_infer(index, name="B") + expected = pd.Series([1.0, 3.0, 4.0], index=index, name="A") + self.assert_series_equal(result, expected) + + def test_groupby_extension_transform(self, data_for_grouping): + valid = data_for_grouping[~data_for_grouping.isna()] + df = pd.DataFrame({"A": [1, 1, 3, 3, 1, 4], "B": valid}) + + result = df.groupby("B").A.transform(len) + expected = pd.Series([3, 3, 2, 2, 3, 1], name="A") + + self.assert_series_equal(result, expected) + + def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + df.groupby("B").apply(groupby_apply_op) + df.groupby("B").A.apply(groupby_apply_op) + df.groupby("A").apply(groupby_apply_op) + df.groupby("A").B.apply(groupby_apply_op) + + def test_groupby_apply_identity(self, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + result = df.groupby("A").B.apply(lambda x: x.array) + expected = pd.Series( + [ + df.B.iloc[[0, 1, 6]].array, + df.B.iloc[[2, 3]].array, + df.B.iloc[[4, 5]].array, + df.B.iloc[[7]].array, + ], + index=pd.Index([1, 2, 3, 4], name="A"), + name="B", + ) + self.assert_series_equal(result, expected) + + def test_in_numeric_groupby(self, data_for_grouping): + df = pd.DataFrame( + { + "A": [1, 1, 2, 2, 3, 3, 1, 4], + "B": data_for_grouping, + "C": [1, 1, 1, 1, 1, 1, 1, 1], + } + ) + result = df.groupby("A").sum().columns + + if data_for_grouping.dtype._is_numeric: + expected = pd.Index(["B", "C"]) + else: + expected = pd.Index(["C"]) + + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/index.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/index.py new file mode 100644 index 0000000..2539c38 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/index.py @@ -0,0 +1,20 @@ +""" +Tests for Indexes backed by arbitrary ExtensionArrays. +""" +import pandas as pd +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseIndexTests(BaseExtensionTests): + """Tests for Index object backed by an ExtensionArray""" + + def test_index_from_array(self, data): + idx = pd.Index(data) + assert data.dtype == idx.dtype + + def test_index_from_listlike_with_dtype(self, data): + idx = pd.Index(data, dtype=data.dtype) + assert idx.dtype == data.dtype + + idx = pd.Index(list(data), dtype=data.dtype) + assert idx.dtype == data.dtype diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/interface.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/interface.py new file mode 100644 index 0000000..3e8a754 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/interface.py @@ -0,0 +1,127 @@ +import numpy as np + +from pandas.core.dtypes.common import is_extension_array_dtype +from pandas.core.dtypes.dtypes import ExtensionDtype + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseInterfaceTests(BaseExtensionTests): + """Tests that the basic interface is satisfied.""" + + # ------------------------------------------------------------------------ + # Interface + # ------------------------------------------------------------------------ + + def test_len(self, data): + assert len(data) == 100 + + def test_size(self, data): + assert data.size == 100 + + def test_ndim(self, data): + assert data.ndim == 1 + + def test_can_hold_na_valid(self, data): + # GH-20761 + assert data._can_hold_na is True + + def test_contains(self, data, data_missing): + # GH-37867 + # Tests for membership checks. Membership checks for nan-likes is tricky and + # the settled on rule is: `nan_like in arr` is True if nan_like is + # arr.dtype.na_value and arr.isna().any() is True. Else the check returns False. + + na_value = data.dtype.na_value + # ensure data without missing values + data = data[~data.isna()] + + # first elements are non-missing + assert data[0] in data + assert data_missing[0] in data_missing + + # check the presence of na_value + assert na_value in data_missing + assert na_value not in data + + # the data can never contain other nan-likes than na_value + for na_value_obj in tm.NULL_OBJECTS: + if na_value_obj is na_value or type(na_value_obj) == type(na_value): + # type check for e.g. two instances of Decimal("NAN") + continue + assert na_value_obj not in data + assert na_value_obj not in data_missing + + def test_memory_usage(self, data): + s = pd.Series(data) + result = s.memory_usage(index=False) + assert result == s.nbytes + + def test_array_interface(self, data): + result = np.array(data) + assert result[0] == data[0] + + result = np.array(data, dtype=object) + expected = np.array(list(data), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_is_extension_array_dtype(self, data): + assert is_extension_array_dtype(data) + assert is_extension_array_dtype(data.dtype) + assert is_extension_array_dtype(pd.Series(data)) + assert isinstance(data.dtype, ExtensionDtype) + + def test_no_values_attribute(self, data): + # GH-20735: EA's with .values attribute give problems with internal + # code, disallowing this for now until solved + assert not hasattr(data, "values") + assert not hasattr(data, "_values") + + def test_is_numeric_honored(self, data): + result = pd.Series(data) + if hasattr(result._mgr, "blocks"): + assert result._mgr.blocks[0].is_numeric is data.dtype._is_numeric + + def test_isna_extension_array(self, data_missing): + # If your `isna` returns an ExtensionArray, you must also implement + # _reduce. At the *very* least, you must implement any and all + na = data_missing.isna() + if is_extension_array_dtype(na): + assert na._reduce("any") + assert na.any() + + assert not na._reduce("all") + assert not na.all() + + assert na.dtype._is_boolean + + def test_copy(self, data): + # GH#27083 removing deep keyword from EA.copy + assert data[0] != data[1] + result = data.copy() + + data[1] = data[0] + assert result[1] != result[0] + + def test_view(self, data): + # view with no dtype should return a shallow copy, *not* the same + # object + assert data[1] != data[0] + + result = data.view() + assert result is not data + assert type(result) == type(data) + + result[1] = result[0] + assert data[1] == data[0] + + # check specifically that the `dtype` kwarg is accepted + data.view(dtype=None) + + def test_tolist(self, data): + result = data.tolist() + expected = list(data) + assert isinstance(result, list) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/io.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/io.py new file mode 100644 index 0000000..a8c25db --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/io.py @@ -0,0 +1,19 @@ +from io import StringIO + +import numpy as np +import pytest + +import pandas as pd +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseParsingTests(BaseExtensionTests): + @pytest.mark.parametrize("engine", ["c", "python"]) + def test_EA_types(self, engine, data): + df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))}) + csv_output = df.to_csv(index=False, na_rep=np.nan) + result = pd.read_csv( + StringIO(csv_output), dtype={"with_dtype": str(data.dtype)}, engine=engine + ) + expected = df + self.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/methods.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/methods.py new file mode 100644 index 0000000..c96e2fb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/methods.py @@ -0,0 +1,583 @@ +import inspect +import operator + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_bool_dtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.sorting import nargsort +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseMethodsTests(BaseExtensionTests): + """Various Series and DataFrame methods.""" + + def test_value_counts_default_dropna(self, data): + # make sure we have consistent default dropna kwarg + if not hasattr(data, "value_counts"): + pytest.skip("value_counts is not implemented") + sig = inspect.signature(data.value_counts) + kwarg = sig.parameters["dropna"] + assert kwarg.default is True + + @pytest.mark.parametrize("dropna", [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + + self.assert_series_equal(result, expected) + + def test_value_counts_with_normalize(self, data): + # GH 33172 + data = data[:10].unique() + values = np.array(data[~data.isna()]) + ser = pd.Series(data, dtype=data.dtype) + + result = ser.value_counts(normalize=True).sort_index() + + if not isinstance(data, pd.Categorical): + expected = pd.Series([1 / len(values)] * len(values), index=result.index) + else: + expected = pd.Series(0.0, index=result.index) + expected[result > 0] = 1 / len(values) + + self.assert_series_equal(result, expected) + + def test_count(self, data_missing): + df = pd.DataFrame({"A": data_missing}) + result = df.count(axis="columns") + expected = pd.Series([0, 1]) + self.assert_series_equal(result, expected) + + def test_series_count(self, data_missing): + # GH#26835 + ser = pd.Series(data_missing) + result = ser.count() + expected = 1 + assert result == expected + + def test_apply_simple_series(self, data): + result = pd.Series(data).apply(id) + assert isinstance(result, pd.Series) + + def test_argsort(self, data_for_sorting): + result = pd.Series(data_for_sorting).argsort() + # argsort result gets passed to take, so should be np.intp + expected = pd.Series(np.array([2, 0, 1], dtype=np.intp)) + self.assert_series_equal(result, expected) + + def test_argsort_missing_array(self, data_missing_for_sorting): + result = data_missing_for_sorting.argsort() + # argsort result gets passed to take, so should be np.intp + expected = np.array([2, 0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_argsort_missing(self, data_missing_for_sorting): + result = pd.Series(data_missing_for_sorting).argsort() + expected = pd.Series(np.array([1, -1, 0], dtype=np.intp)) + self.assert_series_equal(result, expected) + + def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value): + # GH 24382 + + # data_for_sorting -> [B, C, A] with A < B < C + assert data_for_sorting.argmax() == 1 + assert data_for_sorting.argmin() == 2 + + # with repeated values -> first occurrence + data = data_for_sorting.take([2, 0, 0, 1, 1, 2]) + assert data.argmax() == 3 + assert data.argmin() == 0 + + # with missing values + # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing. + assert data_missing_for_sorting.argmax() == 0 + assert data_missing_for_sorting.argmin() == 2 + + @pytest.mark.parametrize("method", ["argmax", "argmin"]) + def test_argmin_argmax_empty_array(self, method, data): + # GH 24382 + err_msg = "attempt to get" + with pytest.raises(ValueError, match=err_msg): + getattr(data[:0], method)() + + @pytest.mark.parametrize("method", ["argmax", "argmin"]) + def test_argmin_argmax_all_na(self, method, data, na_value): + # all missing with skipna=True is the same as empty + err_msg = "attempt to get" + data_na = type(data)._from_sequence([na_value, na_value], dtype=data.dtype) + with pytest.raises(ValueError, match=err_msg): + getattr(data_na, method)() + + @pytest.mark.parametrize( + "op_name, skipna, expected", + [ + ("idxmax", True, 0), + ("idxmin", True, 2), + ("argmax", True, 0), + ("argmin", True, 2), + ("idxmax", False, np.nan), + ("idxmin", False, np.nan), + ("argmax", False, -1), + ("argmin", False, -1), + ], + ) + def test_argreduce_series( + self, data_missing_for_sorting, op_name, skipna, expected + ): + # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing. + ser = pd.Series(data_missing_for_sorting) + result = getattr(ser, op_name)(skipna=skipna) + tm.assert_almost_equal(result, expected) + + def test_argmax_argmin_no_skipna_notimplemented(self, data_missing_for_sorting): + # GH#38733 + data = data_missing_for_sorting + + with pytest.raises(NotImplementedError, match=""): + data.argmin(skipna=False) + + with pytest.raises(NotImplementedError, match=""): + data.argmax(skipna=False) + + @pytest.mark.parametrize( + "na_position, expected", + [ + ("last", np.array([2, 0, 1], dtype=np.dtype("intp"))), + ("first", np.array([1, 2, 0], dtype=np.dtype("intp"))), + ], + ) + def test_nargsort(self, data_missing_for_sorting, na_position, expected): + # GH 25439 + result = nargsort(data_missing_for_sorting, na_position=na_position) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values(self, data_for_sorting, ascending, sort_by_key): + ser = pd.Series(data_for_sorting) + result = ser.sort_values(ascending=ascending, key=sort_by_key) + expected = ser.iloc[[2, 0, 1]] + if not ascending: + # GH 35922. Expect stable sort + if ser.nunique() == 2: + expected = ser.iloc[[0, 1, 2]] + else: + expected = ser.iloc[[1, 0, 2]] + + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values_missing( + self, data_missing_for_sorting, ascending, sort_by_key + ): + ser = pd.Series(data_missing_for_sorting) + result = ser.sort_values(ascending=ascending, key=sort_by_key) + if ascending: + expected = ser.iloc[[2, 0, 1]] + else: + expected = ser.iloc[[0, 2, 1]] + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values_frame(self, data_for_sorting, ascending): + df = pd.DataFrame({"A": [1, 2, 1], "B": data_for_sorting}) + result = df.sort_values(["A", "B"]) + expected = pd.DataFrame( + {"A": [1, 1, 2], "B": data_for_sorting.take([2, 0, 1])}, index=[2, 0, 1] + ) + self.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("box", [pd.Series, lambda x: x]) + @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique]) + def test_unique(self, data, box, method): + duplicated = box(data._from_sequence([data[0], data[0]])) + + result = method(duplicated) + + assert len(result) == 1 + assert isinstance(result, type(data)) + assert result[0] == duplicated[0] + + @pytest.mark.parametrize("na_sentinel", [-1, -2]) + def test_factorize(self, data_for_grouping, na_sentinel): + codes, uniques = pd.factorize(data_for_grouping, na_sentinel=na_sentinel) + expected_codes = np.array( + [0, 0, na_sentinel, na_sentinel, 1, 1, 0, 2], dtype=np.intp + ) + expected_uniques = data_for_grouping.take([0, 4, 7]) + + tm.assert_numpy_array_equal(codes, expected_codes) + self.assert_extension_array_equal(uniques, expected_uniques) + + @pytest.mark.parametrize("na_sentinel", [-1, -2]) + def test_factorize_equivalence(self, data_for_grouping, na_sentinel): + codes_1, uniques_1 = pd.factorize(data_for_grouping, na_sentinel=na_sentinel) + codes_2, uniques_2 = data_for_grouping.factorize(na_sentinel=na_sentinel) + + tm.assert_numpy_array_equal(codes_1, codes_2) + self.assert_extension_array_equal(uniques_1, uniques_2) + assert len(uniques_1) == len(pd.unique(uniques_1)) + assert uniques_1.dtype == data_for_grouping.dtype + + def test_factorize_empty(self, data): + codes, uniques = pd.factorize(data[:0]) + expected_codes = np.array([], dtype=np.intp) + expected_uniques = type(data)._from_sequence([], dtype=data[:0].dtype) + + tm.assert_numpy_array_equal(codes, expected_codes) + self.assert_extension_array_equal(uniques, expected_uniques) + + def test_fillna_copy_frame(self, data_missing): + arr = data_missing.take([1, 1]) + df = pd.DataFrame({"A": arr}) + + filled_val = df.iloc[0, 0] + result = df.fillna(filled_val) + + assert df.A.values is not result.A.values + + def test_fillna_copy_series(self, data_missing): + arr = data_missing.take([1, 1]) + ser = pd.Series(arr) + + filled_val = ser[0] + result = ser.fillna(filled_val) + + assert ser._values is not result._values + assert ser._values is arr + + def test_fillna_length_mismatch(self, data_missing): + msg = "Length of 'value' does not match." + with pytest.raises(ValueError, match=msg): + data_missing.fillna(data_missing.take([1])) + + def test_combine_le(self, data_repeated): + # GH 20825 + # Test that combine works when doing a <= (le) comparison + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 <= x2) + expected = pd.Series( + [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))] + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 <= x2) + expected = pd.Series([a <= val for a in list(orig_data1)]) + self.assert_series_equal(result, expected) + + def test_combine_add(self, data_repeated): + # GH 20825 + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 + x2) + with np.errstate(over="ignore"): + expected = pd.Series( + orig_data1._from_sequence( + [a + b for (a, b) in zip(list(orig_data1), list(orig_data2))] + ) + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 + x2) + expected = pd.Series( + orig_data1._from_sequence([a + val for a in list(orig_data1)]) + ) + self.assert_series_equal(result, expected) + + def test_combine_first(self, data): + # https://github.com/pandas-dev/pandas/issues/24147 + a = pd.Series(data[:3]) + b = pd.Series(data[2:5], index=[2, 3, 4]) + result = a.combine_first(b) + expected = pd.Series(data[:5]) + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("frame", [True, False]) + @pytest.mark.parametrize( + "periods, indices", + [(-2, [2, 3, 4, -1, -1]), (0, [0, 1, 2, 3, 4]), (2, [-1, -1, 0, 1, 2])], + ) + def test_container_shift(self, data, frame, periods, indices): + # https://github.com/pandas-dev/pandas/issues/22386 + subset = data[:5] + data = pd.Series(subset, name="A") + expected = pd.Series(subset.take(indices, allow_fill=True), name="A") + + if frame: + result = data.to_frame(name="A").assign(B=1).shift(periods) + expected = pd.concat( + [expected, pd.Series([1] * 5, name="B").shift(periods)], axis=1 + ) + compare = self.assert_frame_equal + else: + result = data.shift(periods) + compare = self.assert_series_equal + + compare(result, expected) + + def test_shift_0_periods(self, data): + # GH#33856 shifting with periods=0 should return a copy, not same obj + result = data.shift(0) + assert data[0] != data[1] # otherwise below is invalid + data[0] = data[1] + assert result[0] != result[1] # i.e. not the same object/view + + @pytest.mark.parametrize("periods", [1, -2]) + def test_diff(self, data, periods): + data = data[:5] + if is_bool_dtype(data.dtype): + op = operator.xor + else: + op = operator.sub + try: + # does this array implement ops? + op(data, data) + except Exception: + pytest.skip(f"{type(data)} does not support diff") + s = pd.Series(data) + result = s.diff(periods) + expected = pd.Series(op(data, data.shift(periods))) + self.assert_series_equal(result, expected) + + df = pd.DataFrame({"A": data, "B": [1.0] * 5}) + result = df.diff(periods) + if periods == 1: + b = [np.nan, 0, 0, 0, 0] + else: + b = [0, 0, 0, np.nan, np.nan] + expected = pd.DataFrame({"A": expected, "B": b}) + self.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "periods, indices", + [[-4, [-1, -1]], [-1, [1, -1]], [0, [0, 1]], [1, [-1, 0]], [4, [-1, -1]]], + ) + def test_shift_non_empty_array(self, data, periods, indices): + # https://github.com/pandas-dev/pandas/issues/23911 + subset = data[:2] + result = subset.shift(periods) + expected = subset.take(indices, allow_fill=True) + self.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize("periods", [-4, -1, 0, 1, 4]) + def test_shift_empty_array(self, data, periods): + # https://github.com/pandas-dev/pandas/issues/23911 + empty = data[:0] + result = empty.shift(periods) + expected = empty + self.assert_extension_array_equal(result, expected) + + def test_shift_zero_copies(self, data): + result = data.shift(0) + assert result is not data + + result = data[:0].shift(2) + assert result is not data + + def test_shift_fill_value(self, data): + arr = data[:4] + fill_value = data[0] + result = arr.shift(1, fill_value=fill_value) + expected = data.take([0, 0, 1, 2]) + self.assert_extension_array_equal(result, expected) + + result = arr.shift(-2, fill_value=fill_value) + expected = data.take([2, 3, 0, 0]) + self.assert_extension_array_equal(result, expected) + + def test_not_hashable(self, data): + # We are in general mutable, so not hashable + with pytest.raises(TypeError, match="unhashable type"): + hash(data) + + def test_hash_pandas_object_works(self, data, as_frame): + # https://github.com/pandas-dev/pandas/issues/23066 + data = pd.Series(data) + if as_frame: + data = data.to_frame() + a = pd.util.hash_pandas_object(data) + b = pd.util.hash_pandas_object(data) + self.assert_equal(a, b) + + def test_searchsorted(self, data_for_sorting, as_series): + b, c, a = data_for_sorting + arr = data_for_sorting.take([2, 0, 1]) # to get [a, b, c] + + if as_series: + arr = pd.Series(arr) + assert arr.searchsorted(a) == 0 + assert arr.searchsorted(a, side="right") == 1 + + assert arr.searchsorted(b) == 1 + assert arr.searchsorted(b, side="right") == 2 + + assert arr.searchsorted(c) == 2 + assert arr.searchsorted(c, side="right") == 3 + + result = arr.searchsorted(arr.take([0, 2])) + expected = np.array([0, 2], dtype=np.intp) + + tm.assert_numpy_array_equal(result, expected) + + # sorter + sorter = np.array([1, 2, 0]) + assert data_for_sorting.searchsorted(a, sorter=sorter) == 0 + + def test_where_series(self, data, na_value, as_frame): + assert data[0] != data[1] + cls = type(data) + a, b = data[:2] + + ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype)) + cond = np.array([True, True, False, False]) + + if as_frame: + ser = ser.to_frame(name="a") + cond = cond.reshape(-1, 1) + + result = ser.where(cond) + expected = pd.Series( + cls._from_sequence([a, a, na_value, na_value], dtype=data.dtype) + ) + + if as_frame: + expected = expected.to_frame(name="a") + self.assert_equal(result, expected) + + # array other + cond = np.array([True, False, True, True]) + other = cls._from_sequence([a, b, a, b], dtype=data.dtype) + if as_frame: + other = pd.DataFrame({"a": other}) + cond = pd.DataFrame({"a": cond}) + result = ser.where(cond, other) + expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype)) + if as_frame: + expected = expected.to_frame(name="a") + self.assert_equal(result, expected) + + @pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]]) + def test_repeat(self, data, repeats, as_series, use_numpy): + arr = type(data)._from_sequence(data[:3], dtype=data.dtype) + if as_series: + arr = pd.Series(arr) + + result = np.repeat(arr, repeats) if use_numpy else arr.repeat(repeats) + + repeats = [repeats] * 3 if isinstance(repeats, int) else repeats + expected = [x for x, n in zip(arr, repeats) for _ in range(n)] + expected = type(data)._from_sequence(expected, dtype=data.dtype) + if as_series: + expected = pd.Series(expected, index=arr.index.repeat(repeats)) + + self.assert_equal(result, expected) + + @pytest.mark.parametrize( + "repeats, kwargs, error, msg", + [ + (2, {"axis": 1}, ValueError, "axis"), + (-1, {}, ValueError, "negative"), + ([1, 2], {}, ValueError, "shape"), + (2, {"foo": "bar"}, TypeError, "'foo'"), + ], + ) + def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy): + with pytest.raises(error, match=msg): + if use_numpy: + np.repeat(data, repeats, **kwargs) + else: + data.repeat(repeats, **kwargs) + + def test_delete(self, data): + result = data.delete(0) + expected = data[1:] + self.assert_extension_array_equal(result, expected) + + result = data.delete([1, 3]) + expected = data._concat_same_type([data[[0]], data[[2]], data[4:]]) + self.assert_extension_array_equal(result, expected) + + def test_insert(self, data): + # insert at the beginning + result = data[1:].insert(0, data[0]) + self.assert_extension_array_equal(result, data) + + result = data[1:].insert(-len(data[1:]), data[0]) + self.assert_extension_array_equal(result, data) + + # insert at the middle + result = data[:-1].insert(4, data[-1]) + + taker = np.arange(len(data)) + taker[5:] = taker[4:-1] + taker[4] = len(data) - 1 + expected = data.take(taker) + self.assert_extension_array_equal(result, expected) + + def test_insert_invalid(self, data, invalid_scalar): + item = invalid_scalar + + with pytest.raises((TypeError, ValueError)): + data.insert(0, item) + + with pytest.raises((TypeError, ValueError)): + data.insert(4, item) + + with pytest.raises((TypeError, ValueError)): + data.insert(len(data) - 1, item) + + def test_insert_invalid_loc(self, data): + ub = len(data) + + with pytest.raises(IndexError): + data.insert(ub + 1, data[0]) + + with pytest.raises(IndexError): + data.insert(-ub - 1, data[0]) + + with pytest.raises(TypeError): + # we expect TypeError here instead of IndexError to match np.insert + data.insert(1.5, data[0]) + + @pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame]) + def test_equals(self, data, na_value, as_series, box): + data2 = type(data)._from_sequence([data[0]] * len(data), dtype=data.dtype) + data_na = type(data)._from_sequence([na_value] * len(data), dtype=data.dtype) + + data = tm.box_expected(data, box, transpose=False) + data2 = tm.box_expected(data2, box, transpose=False) + data_na = tm.box_expected(data_na, box, transpose=False) + + # we are asserting with `is True/False` explicitly, to test that the + # result is an actual Python bool, and not something "truthy" + + assert data.equals(data) is True + assert data.equals(data.copy()) is True + + # unequal other data + assert data.equals(data2) is False + assert data.equals(data_na) is False + + # different length + assert data[:2].equals(data[:3]) is False + + # empty are equal + assert data[:0].equals(data[:0]) is True + + # other types + assert data.equals(None) is False + assert data[[0]].equals(data[0]) is False diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/missing.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/missing.py new file mode 100644 index 0000000..3d43dc4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/missing.py @@ -0,0 +1,160 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import is_sparse +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseMissingTests(BaseExtensionTests): + def test_isna(self, data_missing): + expected = np.array([True, False]) + + result = pd.isna(data_missing) + tm.assert_numpy_array_equal(result, expected) + + result = pd.Series(data_missing).isna() + expected = pd.Series(expected) + self.assert_series_equal(result, expected) + + # GH 21189 + result = pd.Series(data_missing).drop([0, 1]).isna() + expected = pd.Series([], dtype=bool) + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("na_func", ["isna", "notna"]) + def test_isna_returns_copy(self, data_missing, na_func): + result = pd.Series(data_missing) + expected = result.copy() + mask = getattr(result, na_func)() + if is_sparse(mask): + mask = np.array(mask) + + mask[:] = True + self.assert_series_equal(result, expected) + + def test_dropna_array(self, data_missing): + result = data_missing.dropna() + expected = data_missing[[1]] + self.assert_extension_array_equal(result, expected) + + def test_dropna_series(self, data_missing): + ser = pd.Series(data_missing) + result = ser.dropna() + expected = ser.iloc[[1]] + self.assert_series_equal(result, expected) + + def test_dropna_frame(self, data_missing): + df = pd.DataFrame({"A": data_missing}) + + # defaults + result = df.dropna() + expected = df.iloc[[1]] + self.assert_frame_equal(result, expected) + + # axis = 1 + result = df.dropna(axis="columns") + expected = pd.DataFrame(index=[0, 1]) + self.assert_frame_equal(result, expected) + + # multiple + df = pd.DataFrame({"A": data_missing, "B": [1, np.nan]}) + result = df.dropna() + expected = df.iloc[:0] + self.assert_frame_equal(result, expected) + + def test_fillna_scalar(self, data_missing): + valid = data_missing[1] + result = data_missing.fillna(valid) + expected = data_missing.fillna(valid) + self.assert_extension_array_equal(result, expected) + + def test_fillna_limit_pad(self, data_missing): + arr = data_missing.take([1, 0, 0, 0, 1]) + result = pd.Series(arr).fillna(method="ffill", limit=2) + expected = pd.Series(data_missing.take([1, 1, 1, 0, 1])) + self.assert_series_equal(result, expected) + + def test_fillna_limit_backfill(self, data_missing): + arr = data_missing.take([1, 0, 0, 0, 1]) + result = pd.Series(arr).fillna(method="backfill", limit=2) + expected = pd.Series(data_missing.take([1, 0, 1, 1, 1])) + self.assert_series_equal(result, expected) + + def test_fillna_no_op_returns_copy(self, data): + data = data[~data.isna()] + + valid = data[0] + result = data.fillna(valid) + assert result is not data + self.assert_extension_array_equal(result, data) + + result = data.fillna(method="backfill") + assert result is not data + self.assert_extension_array_equal(result, data) + + def test_fillna_series(self, data_missing): + fill_value = data_missing[1] + ser = pd.Series(data_missing) + + result = ser.fillna(fill_value) + expected = pd.Series( + data_missing._from_sequence( + [fill_value, fill_value], dtype=data_missing.dtype + ) + ) + self.assert_series_equal(result, expected) + + # Fill with a series + result = ser.fillna(expected) + self.assert_series_equal(result, expected) + + # Fill with a series not affecting the missing values + result = ser.fillna(ser) + self.assert_series_equal(result, ser) + + def test_fillna_series_method(self, data_missing, fillna_method): + fill_value = data_missing[1] + + if fillna_method == "ffill": + data_missing = data_missing[::-1] + + result = pd.Series(data_missing).fillna(method=fillna_method) + expected = pd.Series( + data_missing._from_sequence( + [fill_value, fill_value], dtype=data_missing.dtype + ) + ) + + self.assert_series_equal(result, expected) + + def test_fillna_frame(self, data_missing): + fill_value = data_missing[1] + + result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value) + + expected = pd.DataFrame( + { + "A": data_missing._from_sequence( + [fill_value, fill_value], dtype=data_missing.dtype + ), + "B": [1, 2], + } + ) + + self.assert_frame_equal(result, expected) + + def test_fillna_fill_other(self, data): + result = pd.DataFrame({"A": data, "B": [np.nan] * len(data)}).fillna({"B": 0.0}) + + expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)}) + + self.assert_frame_equal(result, expected) + + def test_use_inf_as_na_no_effect(self, data_missing): + ser = pd.Series(data_missing) + expected = ser.isna() + with pd.option_context("mode.use_inf_as_na", True): + result = ser.isna() + self.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/ops.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/ops.py new file mode 100644 index 0000000..1d3d736 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/ops.py @@ -0,0 +1,212 @@ +from __future__ import annotations + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core import ops +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseOpsUtil(BaseExtensionTests): + def get_op_from_name(self, op_name: str): + return tm.get_op_from_name(op_name) + + def check_opname(self, ser: pd.Series, op_name: str, other, exc=Exception): + op = self.get_op_from_name(op_name) + + self._check_op(ser, op, other, op_name, exc) + + def _combine(self, obj, other, op): + if isinstance(obj, pd.DataFrame): + if len(obj.columns) != 1: + raise NotImplementedError + expected = obj.iloc[:, 0].combine(other, op).to_frame() + else: + expected = obj.combine(other, op) + return expected + + def _check_op( + self, ser: pd.Series, op, other, op_name: str, exc=NotImplementedError + ): + if exc is None: + result = op(ser, other) + expected = self._combine(ser, other, op) + assert isinstance(result, type(ser)) + self.assert_equal(result, expected) + else: + with pytest.raises(exc): + op(ser, other) + + def _check_divmod_op(self, ser: pd.Series, op, other, exc=Exception): + # divmod has multiple return values, so check separately + if exc is None: + result_div, result_mod = op(ser, other) + if op is divmod: + expected_div, expected_mod = ser // other, ser % other + else: + expected_div, expected_mod = other // ser, other % ser + self.assert_series_equal(result_div, expected_div) + self.assert_series_equal(result_mod, expected_mod) + else: + with pytest.raises(exc): + divmod(ser, other) + + +class BaseArithmeticOpsTests(BaseOpsUtil): + """ + Various Series and DataFrame arithmetic ops methods. + + Subclasses supporting various ops should set the class variables + to indicate that they support ops of that kind + + * series_scalar_exc = TypeError + * frame_scalar_exc = TypeError + * series_array_exc = TypeError + * divmod_exc = TypeError + """ + + series_scalar_exc: type[TypeError] | None = TypeError + frame_scalar_exc: type[TypeError] | None = TypeError + series_array_exc: type[TypeError] | None = TypeError + divmod_exc: type[TypeError] | None = TypeError + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + # series & scalar + op_name = all_arithmetic_operators + ser = pd.Series(data) + self.check_opname(ser, op_name, ser.iloc[0], exc=self.series_scalar_exc) + + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + # frame & scalar + op_name = all_arithmetic_operators + df = pd.DataFrame({"A": data}) + self.check_opname(df, op_name, data[0], exc=self.frame_scalar_exc) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + # ndarray & other series + op_name = all_arithmetic_operators + ser = pd.Series(data) + self.check_opname( + ser, op_name, pd.Series([ser.iloc[0]] * len(ser)), exc=self.series_array_exc + ) + + def test_divmod(self, data): + ser = pd.Series(data) + self._check_divmod_op(ser, divmod, 1, exc=self.divmod_exc) + self._check_divmod_op(1, ops.rdivmod, ser, exc=self.divmod_exc) + + def test_divmod_series_array(self, data, data_for_twos): + ser = pd.Series(data) + self._check_divmod_op(ser, divmod, data) + + other = data_for_twos + self._check_divmod_op(other, ops.rdivmod, ser) + + other = pd.Series(other) + self._check_divmod_op(other, ops.rdivmod, ser) + + def test_add_series_with_extension_array(self, data): + ser = pd.Series(data) + result = ser + data + expected = pd.Series(data + data) + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) + def test_direct_arith_with_ndframe_returns_not_implemented(self, data, box): + # EAs should return NotImplemented for ops with Series/DataFrame + # Pandas takes care of unboxing the series and calling the EA's op. + other = pd.Series(data) + if box is pd.DataFrame: + other = other.to_frame() + if hasattr(data, "__add__"): + result = data.__add__(other) + assert result is NotImplemented + else: + raise pytest.skip(f"{type(data).__name__} does not implement add") + + +class BaseComparisonOpsTests(BaseOpsUtil): + """Various Series and DataFrame comparison ops methods.""" + + def _compare_other(self, ser: pd.Series, data, op, other): + + if op.__name__ in ["eq", "ne"]: + # comparison should match point-wise comparisons + result = op(ser, other) + expected = ser.combine(other, op) + self.assert_series_equal(result, expected) + + else: + exc = None + try: + result = op(ser, other) + except Exception as err: + exc = err + + if exc is None: + # Didn't error, then should match pointwise behavior + expected = ser.combine(other, op) + self.assert_series_equal(result, expected) + else: + with pytest.raises(type(exc)): + ser.combine(other, op) + + def test_compare_scalar(self, data, comparison_op): + ser = pd.Series(data) + self._compare_other(ser, data, comparison_op, 0) + + def test_compare_array(self, data, comparison_op): + ser = pd.Series(data) + other = pd.Series([data[0]] * len(data)) + self._compare_other(ser, data, comparison_op, other) + + @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) + def test_direct_arith_with_ndframe_returns_not_implemented(self, data, box): + # EAs should return NotImplemented for ops with Series/DataFrame + # Pandas takes care of unboxing the series and calling the EA's op. + other = pd.Series(data) + if box is pd.DataFrame: + other = other.to_frame() + + if hasattr(data, "__eq__"): + result = data.__eq__(other) + assert result is NotImplemented + else: + raise pytest.skip(f"{type(data).__name__} does not implement __eq__") + + if hasattr(data, "__ne__"): + result = data.__ne__(other) + assert result is NotImplemented + else: + raise pytest.skip(f"{type(data).__name__} does not implement __ne__") + + +class BaseUnaryOpsTests(BaseOpsUtil): + def test_invert(self, data): + ser = pd.Series(data, name="name") + result = ~ser + expected = pd.Series(~data, name="name") + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs]) + def test_unary_ufunc_dunder_equivalence(self, data, ufunc): + # the dunder __pos__ works if and only if np.positive works, + # same for __neg__/np.negative and __abs__/np.abs + attr = {np.positive: "__pos__", np.negative: "__neg__", np.abs: "__abs__"}[ + ufunc + ] + + exc = None + try: + result = getattr(data, attr)() + except Exception as err: + exc = err + + # if __pos__ raised, then so should the ufunc + with pytest.raises((type(exc), TypeError)): + ufunc(data) + else: + alt = ufunc(data) + self.assert_extension_array_equal(result, alt) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/printing.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/printing.py new file mode 100644 index 0000000..eab75be --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/printing.py @@ -0,0 +1,42 @@ +import io + +import pytest + +import pandas as pd +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BasePrintingTests(BaseExtensionTests): + """Tests checking the formatting of your EA when printed.""" + + @pytest.mark.parametrize("size", ["big", "small"]) + def test_array_repr(self, data, size): + if size == "small": + data = data[:5] + else: + data = type(data)._concat_same_type([data] * 5) + + result = repr(data) + assert type(data).__name__ in result + assert f"Length: {len(data)}" in result + assert str(data.dtype) in result + if size == "big": + assert "..." in result + + def test_array_repr_unicode(self, data): + result = str(data) + assert isinstance(result, str) + + def test_series_repr(self, data): + ser = pd.Series(data) + assert data.dtype.name in repr(ser) + + def test_dataframe_repr(self, data): + df = pd.DataFrame({"A": data}) + repr(df) + + def test_dtype_name_in_info(self, data): + buf = io.StringIO() + pd.DataFrame({"A": data}).info(buf=buf) + result = buf.getvalue() + assert data.dtype.name in result diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/reduce.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/reduce.py new file mode 100644 index 0000000..e363fda --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/reduce.py @@ -0,0 +1,69 @@ +import warnings + +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseReduceTests(BaseExtensionTests): + """ + Reduction specific tests. Generally these only + make sense for numeric/boolean operations. + """ + + def check_reduce(self, s, op_name, skipna): + result = getattr(s, op_name)(skipna=skipna) + expected = getattr(s.astype("float64"), op_name)(skipna=skipna) + tm.assert_almost_equal(result, expected) + + +class BaseNoReduceTests(BaseReduceTests): + """we don't define any reductions""" + + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + s = pd.Series(data) + + msg = ( + "[Cc]annot perform|Categorical is not ordered for operation|" + "does not support reduction|" + ) + + with pytest.raises(TypeError, match=msg): + getattr(s, op_name)(skipna=skipna) + + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna): + op_name = all_boolean_reductions + s = pd.Series(data) + + msg = ( + "[Cc]annot perform|Categorical is not ordered for operation|" + "does not support reduction|" + ) + + with pytest.raises(TypeError, match=msg): + getattr(s, op_name)(skipna=skipna) + + +class BaseNumericReduceTests(BaseReduceTests): + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + s = pd.Series(data) + + # min/max with empty produce numpy warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + self.check_reduce(s, op_name, skipna) + + +class BaseBooleanReduceTests(BaseReduceTests): + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series(self, data, all_boolean_reductions, skipna): + op_name = all_boolean_reductions + s = pd.Series(data) + self.check_reduce(s, op_name, skipna) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/reshaping.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/reshaping.py new file mode 100644 index 0000000..45bc5b7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/reshaping.py @@ -0,0 +1,370 @@ +import itertools + +import numpy as np +import pytest + +import pandas as pd +from pandas.api.extensions import ExtensionArray +from pandas.core.internals.blocks import EABackedBlock +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseReshapingTests(BaseExtensionTests): + """Tests for reshaping and concatenation.""" + + @pytest.mark.parametrize("in_frame", [True, False]) + def test_concat(self, data, in_frame): + wrapped = pd.Series(data) + if in_frame: + wrapped = pd.DataFrame(wrapped) + result = pd.concat([wrapped, wrapped], ignore_index=True) + + assert len(result) == len(data) * 2 + + if in_frame: + dtype = result.dtypes[0] + else: + dtype = result.dtype + + assert dtype == data.dtype + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], EABackedBlock) + assert isinstance(result._mgr.arrays[0], ExtensionArray) + + @pytest.mark.parametrize("in_frame", [True, False]) + def test_concat_all_na_block(self, data_missing, in_frame): + valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1]) + na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3]) + if in_frame: + valid_block = pd.DataFrame({"a": valid_block}) + na_block = pd.DataFrame({"a": na_block}) + result = pd.concat([valid_block, na_block]) + if in_frame: + expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])}) + self.assert_frame_equal(result, expected) + else: + expected = pd.Series(data_missing.take([1, 1, 0, 0])) + self.assert_series_equal(result, expected) + + def test_concat_mixed_dtypes(self, data): + # https://github.com/pandas-dev/pandas/issues/20762 + df1 = pd.DataFrame({"A": data[:3]}) + df2 = pd.DataFrame({"A": [1, 2, 3]}) + df3 = pd.DataFrame({"A": ["a", "b", "c"]}).astype("category") + dfs = [df1, df2, df3] + + # dataframes + result = pd.concat(dfs) + expected = pd.concat([x.astype(object) for x in dfs]) + self.assert_frame_equal(result, expected) + + # series + result = pd.concat([x["A"] for x in dfs]) + expected = pd.concat([x["A"].astype(object) for x in dfs]) + self.assert_series_equal(result, expected) + + # simple test for just EA and one other + result = pd.concat([df1, df2.astype(object)]) + expected = pd.concat([df1.astype("object"), df2.astype("object")]) + self.assert_frame_equal(result, expected) + + result = pd.concat([df1["A"], df2["A"].astype(object)]) + expected = pd.concat([df1["A"].astype("object"), df2["A"].astype("object")]) + self.assert_series_equal(result, expected) + + def test_concat_columns(self, data, na_value): + df1 = pd.DataFrame({"A": data[:3]}) + df2 = pd.DataFrame({"B": [1, 2, 3]}) + + expected = pd.DataFrame({"A": data[:3], "B": [1, 2, 3]}) + result = pd.concat([df1, df2], axis=1) + self.assert_frame_equal(result, expected) + result = pd.concat([df1["A"], df2["B"]], axis=1) + self.assert_frame_equal(result, expected) + + # non-aligned + df2 = pd.DataFrame({"B": [1, 2, 3]}, index=[1, 2, 3]) + expected = pd.DataFrame( + { + "A": data._from_sequence(list(data[:3]) + [na_value], dtype=data.dtype), + "B": [np.nan, 1, 2, 3], + } + ) + + result = pd.concat([df1, df2], axis=1) + self.assert_frame_equal(result, expected) + result = pd.concat([df1["A"], df2["B"]], axis=1) + self.assert_frame_equal(result, expected) + + def test_concat_extension_arrays_copy_false(self, data, na_value): + # GH 20756 + df1 = pd.DataFrame({"A": data[:3]}) + df2 = pd.DataFrame({"B": data[3:7]}) + expected = pd.DataFrame( + { + "A": data._from_sequence(list(data[:3]) + [na_value], dtype=data.dtype), + "B": data[3:7], + } + ) + result = pd.concat([df1, df2], axis=1, copy=False) + self.assert_frame_equal(result, expected) + + def test_concat_with_reindex(self, data): + # GH-33027 + a = pd.DataFrame({"a": data[:5]}) + b = pd.DataFrame({"b": data[:5]}) + result = pd.concat([a, b], ignore_index=True) + expected = pd.DataFrame( + { + "a": data.take(list(range(5)) + ([-1] * 5), allow_fill=True), + "b": data.take(([-1] * 5) + list(range(5)), allow_fill=True), + } + ) + self.assert_frame_equal(result, expected) + + def test_align(self, data, na_value): + a = data[:3] + b = data[2:5] + r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3])) + + # Assumes that the ctor can take a list of scalars of the type + e1 = pd.Series(data._from_sequence(list(a) + [na_value], dtype=data.dtype)) + e2 = pd.Series(data._from_sequence([na_value] + list(b), dtype=data.dtype)) + self.assert_series_equal(r1, e1) + self.assert_series_equal(r2, e2) + + def test_align_frame(self, data, na_value): + a = data[:3] + b = data[2:5] + r1, r2 = pd.DataFrame({"A": a}).align(pd.DataFrame({"A": b}, index=[1, 2, 3])) + + # Assumes that the ctor can take a list of scalars of the type + e1 = pd.DataFrame( + {"A": data._from_sequence(list(a) + [na_value], dtype=data.dtype)} + ) + e2 = pd.DataFrame( + {"A": data._from_sequence([na_value] + list(b), dtype=data.dtype)} + ) + self.assert_frame_equal(r1, e1) + self.assert_frame_equal(r2, e2) + + def test_align_series_frame(self, data, na_value): + # https://github.com/pandas-dev/pandas/issues/20576 + ser = pd.Series(data, name="a") + df = pd.DataFrame({"col": np.arange(len(ser) + 1)}) + r1, r2 = ser.align(df) + + e1 = pd.Series( + data._from_sequence(list(data) + [na_value], dtype=data.dtype), + name=ser.name, + ) + + self.assert_series_equal(r1, e1) + self.assert_frame_equal(r2, df) + + def test_set_frame_expand_regular_with_extension(self, data): + df = pd.DataFrame({"A": [1] * len(data)}) + df["B"] = data + expected = pd.DataFrame({"A": [1] * len(data), "B": data}) + self.assert_frame_equal(df, expected) + + def test_set_frame_expand_extension_with_regular(self, data): + df = pd.DataFrame({"A": data}) + df["B"] = [1] * len(data) + expected = pd.DataFrame({"A": data, "B": [1] * len(data)}) + self.assert_frame_equal(df, expected) + + def test_set_frame_overwrite_object(self, data): + # https://github.com/pandas-dev/pandas/issues/20555 + df = pd.DataFrame({"A": [1] * len(data)}, dtype=object) + df["A"] = data + assert df.dtypes["A"] == data.dtype + + def test_merge(self, data, na_value): + # GH-20743 + df1 = pd.DataFrame({"ext": data[:3], "int1": [1, 2, 3], "key": [0, 1, 2]}) + df2 = pd.DataFrame({"int2": [1, 2, 3, 4], "key": [0, 0, 1, 3]}) + + res = pd.merge(df1, df2) + exp = pd.DataFrame( + { + "int1": [1, 1, 2], + "int2": [1, 2, 3], + "key": [0, 0, 1], + "ext": data._from_sequence( + [data[0], data[0], data[1]], dtype=data.dtype + ), + } + ) + self.assert_frame_equal(res, exp[["ext", "int1", "key", "int2"]]) + + res = pd.merge(df1, df2, how="outer") + exp = pd.DataFrame( + { + "int1": [1, 1, 2, 3, np.nan], + "int2": [1, 2, 3, np.nan, 4], + "key": [0, 0, 1, 2, 3], + "ext": data._from_sequence( + [data[0], data[0], data[1], data[2], na_value], dtype=data.dtype + ), + } + ) + self.assert_frame_equal(res, exp[["ext", "int1", "key", "int2"]]) + + def test_merge_on_extension_array(self, data): + # GH 23020 + a, b = data[:2] + key = type(data)._from_sequence([a, b], dtype=data.dtype) + + df = pd.DataFrame({"key": key, "val": [1, 2]}) + result = pd.merge(df, df, on="key") + expected = pd.DataFrame({"key": key, "val_x": [1, 2], "val_y": [1, 2]}) + self.assert_frame_equal(result, expected) + + # order + result = pd.merge(df.iloc[[1, 0]], df, on="key") + expected = expected.iloc[[1, 0]].reset_index(drop=True) + self.assert_frame_equal(result, expected) + + def test_merge_on_extension_array_duplicates(self, data): + # GH 23020 + a, b = data[:2] + key = type(data)._from_sequence([a, b, a], dtype=data.dtype) + df1 = pd.DataFrame({"key": key, "val": [1, 2, 3]}) + df2 = pd.DataFrame({"key": key, "val": [1, 2, 3]}) + + result = pd.merge(df1, df2, on="key") + expected = pd.DataFrame( + { + "key": key.take([0, 0, 0, 0, 1]), + "val_x": [1, 1, 3, 3, 2], + "val_y": [1, 3, 1, 3, 2], + } + ) + self.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "columns", + [ + ["A", "B"], + pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b")], names=["outer", "inner"] + ), + ], + ) + def test_stack(self, data, columns): + df = pd.DataFrame({"A": data[:5], "B": data[:5]}) + df.columns = columns + result = df.stack() + expected = df.astype(object).stack() + # we need a second astype(object), in case the constructor inferred + # object -> specialized, as is done for period. + expected = expected.astype(object) + + if isinstance(expected, pd.Series): + assert result.dtype == df.iloc[:, 0].dtype + else: + assert all(result.dtypes == df.iloc[:, 0].dtype) + + result = result.astype(object) + self.assert_equal(result, expected) + + @pytest.mark.parametrize( + "index", + [ + # Two levels, uniform. + pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]), + # non-uniform + pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "b")]), + # three levels, non-uniform + pd.MultiIndex.from_product([("A", "B"), ("a", "b", "c"), (0, 1, 2)]), + pd.MultiIndex.from_tuples( + [ + ("A", "a", 1), + ("A", "b", 0), + ("A", "a", 0), + ("B", "a", 0), + ("B", "c", 1), + ] + ), + ], + ) + @pytest.mark.parametrize("obj", ["series", "frame"]) + def test_unstack(self, data, index, obj): + data = data[: len(index)] + if obj == "series": + ser = pd.Series(data, index=index) + else: + ser = pd.DataFrame({"A": data, "B": data}, index=index) + + n = index.nlevels + levels = list(range(n)) + # [0, 1, 2] + # [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)] + combinations = itertools.chain.from_iterable( + itertools.permutations(levels, i) for i in range(1, n) + ) + + for level in combinations: + result = ser.unstack(level=level) + assert all( + isinstance(result[col].array, type(data)) for col in result.columns + ) + + if obj == "series": + # We should get the same result with to_frame+unstack+droplevel + df = ser.to_frame() + + alt = df.unstack(level=level).droplevel(0, axis=1) + self.assert_frame_equal(result, alt) + + expected = ser.astype(object).unstack( + level=level, fill_value=data.dtype.na_value + ) + if obj == "series" and not isinstance(ser.dtype, pd.SparseDtype): + # GH#34457 SparseArray.astype(object) gives Sparse[object] + # instead of np.dtype(object) + assert (expected.dtypes == object).all() + + result = result.astype(object) + + self.assert_frame_equal(result, expected) + + def test_ravel(self, data): + # as long as EA is 1D-only, ravel is a no-op + result = data.ravel() + assert type(result) == type(data) + + # Check that we have a view, not a copy + result[0] = result[1] + assert data[0] == data[1] + + def test_transpose(self, data): + result = data.transpose() + assert type(result) == type(data) + + # check we get a new object + assert result is not data + + # If we ever _did_ support 2D, shape should be reversed + assert result.shape == data.shape[::-1] + + # Check that we have a view, not a copy + result[0] = result[1] + assert data[0] == data[1] + + def test_transpose_frame(self, data): + df = pd.DataFrame({"A": data[:4], "B": data[:4]}, index=["a", "b", "c", "d"]) + result = df.T + expected = pd.DataFrame( + { + "a": type(data)._from_sequence([data[0]] * 2, dtype=data.dtype), + "b": type(data)._from_sequence([data[1]] * 2, dtype=data.dtype), + "c": type(data)._from_sequence([data[2]] * 2, dtype=data.dtype), + "d": type(data)._from_sequence([data[3]] * 2, dtype=data.dtype), + }, + index=["A", "B"], + ) + self.assert_frame_equal(result, expected) + self.assert_frame_equal(np.transpose(np.transpose(df)), df) + self.assert_frame_equal(np.transpose(np.transpose(df[["A"]])), df[["A"]]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/base/setitem.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/setitem.py new file mode 100644 index 0000000..208a1a1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/base/setitem.py @@ -0,0 +1,414 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + IntervalDtype, + PandasDtype, + PeriodDtype, +) + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseSetitemTests(BaseExtensionTests): + @pytest.fixture( + params=[ + lambda x: x.index, + lambda x: list(x.index), + lambda x: slice(None), + lambda x: slice(0, len(x)), + lambda x: range(len(x)), + lambda x: list(range(len(x))), + lambda x: np.ones(len(x), dtype=bool), + ], + ids=[ + "index", + "list[index]", + "null_slice", + "full_slice", + "range", + "list(range)", + "mask", + ], + ) + def full_indexer(self, request): + """ + Fixture for an indexer to pass to obj.loc to get/set the full length of the + object. + + In some cases, assumes that obj.index is the default RangeIndex. + """ + return request.param + + def test_setitem_scalar_series(self, data, box_in_series): + if box_in_series: + data = pd.Series(data) + data[0] = data[1] + assert data[0] == data[1] + + def test_setitem_sequence(self, data, box_in_series): + if box_in_series: + data = pd.Series(data) + original = data.copy() + + data[[0, 1]] = [data[1], data[0]] + assert data[0] == original[1] + assert data[1] == original[0] + + def test_setitem_sequence_mismatched_length_raises(self, data, as_array): + ser = pd.Series(data) + original = ser.copy() + value = [data[0]] + if as_array: + value = data._from_sequence(value) + + xpr = "cannot set using a {} indexer with a different length" + with pytest.raises(ValueError, match=xpr.format("list-like")): + ser[[0, 1]] = value + # Ensure no modifications made before the exception + self.assert_series_equal(ser, original) + + with pytest.raises(ValueError, match=xpr.format("slice")): + ser[slice(3)] = value + self.assert_series_equal(ser, original) + + def test_setitem_empty_indexer(self, data, box_in_series): + if box_in_series: + data = pd.Series(data) + original = data.copy() + data[np.array([], dtype=int)] = [] + self.assert_equal(data, original) + + def test_setitem_sequence_broadcasts(self, data, box_in_series): + if box_in_series: + data = pd.Series(data) + data[[0, 1]] = data[2] + assert data[0] == data[2] + assert data[1] == data[2] + + @pytest.mark.parametrize("setter", ["loc", "iloc"]) + def test_setitem_scalar(self, data, setter): + arr = pd.Series(data) + setter = getattr(arr, setter) + setter[0] = data[1] + assert arr[0] == data[1] + + def test_setitem_loc_scalar_mixed(self, data): + df = pd.DataFrame({"A": np.arange(len(data)), "B": data}) + df.loc[0, "B"] = data[1] + assert df.loc[0, "B"] == data[1] + + def test_setitem_loc_scalar_single(self, data): + df = pd.DataFrame({"B": data}) + df.loc[10, "B"] = data[1] + assert df.loc[10, "B"] == data[1] + + def test_setitem_loc_scalar_multiple_homogoneous(self, data): + df = pd.DataFrame({"A": data, "B": data}) + df.loc[10, "B"] = data[1] + assert df.loc[10, "B"] == data[1] + + def test_setitem_iloc_scalar_mixed(self, data): + df = pd.DataFrame({"A": np.arange(len(data)), "B": data}) + df.iloc[0, 1] = data[1] + assert df.loc[0, "B"] == data[1] + + def test_setitem_iloc_scalar_single(self, data): + df = pd.DataFrame({"B": data}) + df.iloc[10, 0] = data[1] + assert df.loc[10, "B"] == data[1] + + def test_setitem_iloc_scalar_multiple_homogoneous(self, data): + df = pd.DataFrame({"A": data, "B": data}) + df.iloc[10, 1] = data[1] + assert df.loc[10, "B"] == data[1] + + @pytest.mark.parametrize( + "mask", + [ + np.array([True, True, True, False, False]), + pd.array([True, True, True, False, False], dtype="boolean"), + pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"), + ], + ids=["numpy-array", "boolean-array", "boolean-array-na"], + ) + def test_setitem_mask(self, data, mask, box_in_series): + arr = data[:5].copy() + expected = arr.take([0, 0, 0, 3, 4]) + if box_in_series: + arr = pd.Series(arr) + expected = pd.Series(expected) + arr[mask] = data[0] + self.assert_equal(expected, arr) + + def test_setitem_mask_raises(self, data, box_in_series): + # wrong length + mask = np.array([True, False]) + + if box_in_series: + data = pd.Series(data) + + with pytest.raises(IndexError, match="wrong length"): + data[mask] = data[0] + + mask = pd.array(mask, dtype="boolean") + with pytest.raises(IndexError, match="wrong length"): + data[mask] = data[0] + + def test_setitem_mask_boolean_array_with_na(self, data, box_in_series): + mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") + mask[:3] = True + mask[3:5] = pd.NA + + if box_in_series: + data = pd.Series(data) + + data[mask] = data[0] + + assert (data[:3] == data[0]).all() + + @pytest.mark.parametrize( + "idx", + [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_setitem_integer_array(self, data, idx, box_in_series): + arr = data[:5].copy() + expected = data.take([0, 0, 0, 3, 4]) + + if box_in_series: + arr = pd.Series(arr) + expected = pd.Series(expected) + + arr[idx] = arr[0] + self.assert_equal(arr, expected) + + @pytest.mark.parametrize( + "idx, box_in_series", + [ + ([0, 1, 2, pd.NA], False), + pytest.param( + [0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948") + ), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + ], + ids=["list-False", "list-True", "integer-array-False", "integer-array-True"], + ) + def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series): + arr = data.copy() + + # TODO(xfail) this raises KeyError about labels not found (it tries label-based) + # for list of labels with Series + if box_in_series: + arr = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))]) + + msg = "Cannot index with an integer indexer containing NA values" + with pytest.raises(ValueError, match=msg): + arr[idx] = arr[0] + + @pytest.mark.parametrize("as_callable", [True, False]) + @pytest.mark.parametrize("setter", ["loc", None]) + def test_setitem_mask_aligned(self, data, as_callable, setter): + ser = pd.Series(data) + mask = np.zeros(len(data), dtype=bool) + mask[:2] = True + + if as_callable: + mask2 = lambda x: mask + else: + mask2 = mask + + if setter: + # loc + target = getattr(ser, setter) + else: + # Series.__setitem__ + target = ser + + target[mask2] = data[5:7] + + ser[mask2] = data[5:7] + assert ser[0] == data[5] + assert ser[1] == data[6] + + @pytest.mark.parametrize("setter", ["loc", None]) + def test_setitem_mask_broadcast(self, data, setter): + ser = pd.Series(data) + mask = np.zeros(len(data), dtype=bool) + mask[:2] = True + + if setter: # loc + target = getattr(ser, setter) + else: # __setitem__ + target = ser + + target[mask] = data[10] + assert ser[0] == data[10] + assert ser[1] == data[10] + + def test_setitem_expand_columns(self, data): + df = pd.DataFrame({"A": data}) + result = df.copy() + result["B"] = 1 + expected = pd.DataFrame({"A": data, "B": [1] * len(data)}) + self.assert_frame_equal(result, expected) + + result = df.copy() + result.loc[:, "B"] = 1 + self.assert_frame_equal(result, expected) + + # overwrite with new type + result["B"] = data + expected = pd.DataFrame({"A": data, "B": data}) + self.assert_frame_equal(result, expected) + + def test_setitem_expand_with_extension(self, data): + df = pd.DataFrame({"A": [1] * len(data)}) + result = df.copy() + result["B"] = data + expected = pd.DataFrame({"A": [1] * len(data), "B": data}) + self.assert_frame_equal(result, expected) + + result = df.copy() + result.loc[:, "B"] = data + self.assert_frame_equal(result, expected) + + def test_setitem_frame_invalid_length(self, data): + df = pd.DataFrame({"A": [1] * len(data)}) + xpr = ( + rf"Length of values \({len(data[:5])}\) " + rf"does not match length of index \({len(df)}\)" + ) + with pytest.raises(ValueError, match=xpr): + df["B"] = data[:5] + + def test_setitem_tuple_index(self, data): + ser = pd.Series(data[:2], index=[(0, 0), (0, 1)]) + expected = pd.Series(data.take([1, 1]), index=ser.index) + ser[(0, 0)] = data[1] + self.assert_series_equal(ser, expected) + + def test_setitem_slice(self, data, box_in_series): + arr = data[:5].copy() + expected = data.take([0, 0, 0, 3, 4]) + if box_in_series: + arr = pd.Series(arr) + expected = pd.Series(expected) + + arr[:3] = data[0] + self.assert_equal(arr, expected) + + def test_setitem_loc_iloc_slice(self, data): + arr = data[:5].copy() + s = pd.Series(arr, index=["a", "b", "c", "d", "e"]) + expected = pd.Series(data.take([0, 0, 0, 3, 4]), index=s.index) + + result = s.copy() + result.iloc[:3] = data[0] + self.assert_equal(result, expected) + + result = s.copy() + result.loc[:"c"] = data[0] + self.assert_equal(result, expected) + + def test_setitem_slice_mismatch_length_raises(self, data): + arr = data[:5] + with pytest.raises(ValueError): + arr[:1] = arr[:2] + + def test_setitem_slice_array(self, data): + arr = data[:5].copy() + arr[:5] = data[-5:] + self.assert_extension_array_equal(arr, data[-5:]) + + def test_setitem_scalar_key_sequence_raise(self, data): + arr = data[:5].copy() + with pytest.raises(ValueError): + arr[0] = arr[[0, 1]] + + def test_setitem_preserves_views(self, data): + # GH#28150 setitem shouldn't swap the underlying data + view1 = data.view() + view2 = data[:] + + data[0] = data[1] + assert view1[0] == data[1] + assert view2[0] == data[1] + + def test_setitem_with_expansion_dataframe_column(self, data, full_indexer): + # https://github.com/pandas-dev/pandas/issues/32395 + df = expected = pd.DataFrame({"data": pd.Series(data)}) + result = pd.DataFrame(index=df.index) + + key = full_indexer(df) + result.loc[key, "data"] = df["data"] + + self.assert_frame_equal(result, expected) + + def test_setitem_series(self, data, full_indexer): + # https://github.com/pandas-dev/pandas/issues/32395 + ser = pd.Series(data, name="data") + result = pd.Series(index=ser.index, dtype=object, name="data") + + # because result has object dtype, the attempt to do setting inplace + # is successful, and object dtype is retained + key = full_indexer(ser) + result.loc[key] = ser + + expected = pd.Series( + data.astype(object), index=ser.index, name="data", dtype=object + ) + self.assert_series_equal(result, expected) + + def test_setitem_frame_2d_values(self, data, request): + # GH#44514 + df = pd.DataFrame({"A": data}) + + # Avoiding using_array_manager fixture + # https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410 + using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager) + if using_array_manager: + if not isinstance( + data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype) + ): + # These dtypes have non-broken implementations of _can_hold_element + mark = pytest.mark.xfail(reason="Goes through split path, loses dtype") + request.node.add_marker(mark) + + df = pd.DataFrame({"A": data}) + orig = df.copy() + + df.iloc[:] = df + self.assert_frame_equal(df, orig) + + df.iloc[:-1] = df.iloc[:-1] + self.assert_frame_equal(df, orig) + + df.iloc[:] = df.values + self.assert_frame_equal(df, orig) + + df.iloc[:-1] = df.values[:-1] + self.assert_frame_equal(df, orig) + + def test_delitem_series(self, data): + # GH#40763 + ser = pd.Series(data, name="data") + + taker = np.arange(len(ser)) + taker = np.delete(taker, 1) + + expected = ser[taker] + del ser[1] + self.assert_series_equal(ser, expected) + + def test_setitem_invalid(self, data, invalid_scalar): + msg = "" # messages vary by subclass, so we do not test it + with pytest.raises((ValueError, TypeError), match=msg): + data[0] = invalid_scalar + + with pytest.raises((ValueError, TypeError), match=msg): + data[:] = invalid_scalar diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/conftest.py new file mode 100644 index 0000000..3827ba2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/conftest.py @@ -0,0 +1,195 @@ +import operator + +import pytest + +from pandas import Series + + +@pytest.fixture +def dtype(): + """A fixture providing the ExtensionDtype to validate.""" + raise NotImplementedError + + +@pytest.fixture +def data(): + """ + Length-100 array for this type. + + * data[0] and data[1] should both be non missing + * data[0] and data[1] should not be equal + """ + raise NotImplementedError + + +@pytest.fixture +def data_for_twos(): + """Length-100 array in which all the elements are two.""" + raise NotImplementedError + + +@pytest.fixture +def data_missing(): + """Length-2 array with [NA, Valid]""" + raise NotImplementedError + + +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing + + +@pytest.fixture +def data_repeated(data): + """ + Generate many datasets. + + Parameters + ---------- + data : fixture implementing `data` + + Returns + ------- + Callable[[int], Generator]: + A callable that takes a `count` argument and + returns a generator yielding `count` datasets. + """ + + def gen(count): + for _ in range(count): + yield data + + return gen + + +@pytest.fixture +def data_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + """ + raise NotImplementedError + + +@pytest.fixture +def data_missing_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + """ + raise NotImplementedError + + +@pytest.fixture +def na_cmp(): + """ + Binary operator for comparing NA values. + + Should return a function of two arguments that returns + True if both arguments are (scalar) NA for your type. + + By default, uses ``operator.is_`` + """ + return operator.is_ + + +@pytest.fixture +def na_value(): + """The scalar missing value for this type. Default 'None'""" + return None + + +@pytest.fixture +def data_for_grouping(): + """ + Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + """ + raise NotImplementedError + + +@pytest.fixture(params=[True, False]) +def box_in_series(request): + """Whether to box the data in a Series""" + return request.param + + +@pytest.fixture( + params=[ + lambda x: 1, + lambda x: [1] * len(x), + lambda x: Series([1] * len(x)), + lambda x: x, + ], + ids=["scalar", "list", "series", "object"], +) +def groupby_apply_op(request): + """ + Functions to test groupby.apply(). + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_frame(request): + """ + Boolean fixture to support Series and Series.to_frame() comparison testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_series(request): + """ + Boolean fixture to support arr and Series(arr) comparison testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def use_numpy(request): + """ + Boolean fixture to support comparison testing of ExtensionDtype array + and numpy array. + """ + return request.param + + +@pytest.fixture(params=["ffill", "bfill"]) +def fillna_method(request): + """ + Parametrized fixture giving method parameters 'ffill' and 'bfill' for + Series.fillna(method=) testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_array(request): + """ + Boolean fixture to support ExtensionDtype _from_sequence method testing. + """ + return request.param + + +@pytest.fixture +def invalid_scalar(data): + """ + A scalar that *cannot* be held by this ExtensionArray. + + The default should work for most subclasses, but is not guaranteed. + + If the array can hold any item (i.e. object dtype), then use pytest.skip. + """ + return object.__new__(object) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/date/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/date/__init__.py new file mode 100644 index 0000000..2a8c7e9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/date/__init__.py @@ -0,0 +1,6 @@ +from pandas.tests.extension.date.array import ( + DateArray, + DateDtype, +) + +__all__ = ["DateArray", "DateDtype"] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/date/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/date/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..10f144b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/date/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/date/__pycache__/array.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/date/__pycache__/array.cpython-38.pyc new file mode 100644 index 0000000..f566fc7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/date/__pycache__/array.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/date/array.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/date/array.py new file mode 100644 index 0000000..b14b992 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/date/array.py @@ -0,0 +1,183 @@ +import datetime as dt +from typing import ( + Any, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +import numpy as np + +from pandas._typing import ( + Dtype, + PositionalIndexer, +) + +from pandas.core.dtypes.dtypes import register_extension_dtype + +from pandas.api.extensions import ( + ExtensionArray, + ExtensionDtype, +) +from pandas.api.types import pandas_dtype + + +@register_extension_dtype +class DateDtype(ExtensionDtype): + @property + def type(self): + return dt.date + + @property + def name(self): + return "DateDtype" + + @classmethod + def construct_from_string(cls, string: str): + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + + if string == cls.__name__: + return cls() + else: + raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") + + @classmethod + def construct_array_type(cls): + return DateArray + + @property + def na_value(self): + return dt.date.min + + def __repr__(self) -> str: + return self.name + + +class DateArray(ExtensionArray): + def __init__( + self, + dates: Union[ + dt.date, + Sequence[dt.date], + Tuple[np.ndarray, np.ndarray, np.ndarray], + np.ndarray, + ], + ) -> None: + if isinstance(dates, dt.date): + self._year = np.array([dates.year]) + self._month = np.array([dates.month]) + self._day = np.array([dates.year]) + return + + ldates = len(dates) + if isinstance(dates, list): + # pre-allocate the arrays since we know the size before hand + self._year = np.zeros(ldates, dtype=np.uint16) # 65535 (0, 9999) + self._month = np.zeros(ldates, dtype=np.uint8) # 255 (1, 31) + self._day = np.zeros(ldates, dtype=np.uint8) # 255 (1, 12) + # populate them + for i, (y, m, d) in enumerate( + map(lambda date: (date.year, date.month, date.day), dates) + ): + self._year[i] = y + self._month[i] = m + self._day[i] = d + + elif isinstance(dates, tuple): + # only support triples + if ldates != 3: + raise ValueError("only triples are valid") + # check if all elements have the same type + if any(map(lambda x: not isinstance(x, np.ndarray), dates)): + raise TypeError("invalid type") + ly, lm, ld = (len(cast(np.ndarray, d)) for d in dates) + if not ly == lm == ld: + raise ValueError( + f"tuple members must have the same length: {(ly, lm, ld)}" + ) + self._year = dates[0].astype(np.uint16) + self._month = dates[1].astype(np.uint8) + self._day = dates[2].astype(np.uint8) + + elif isinstance(dates, np.ndarray) and dates.dtype == "U10": + self._year = np.zeros(ldates, dtype=np.uint16) # 65535 (0, 9999) + self._month = np.zeros(ldates, dtype=np.uint8) # 255 (1, 31) + self._day = np.zeros(ldates, dtype=np.uint8) # 255 (1, 12) + + # "object_" object is not iterable [misc] + for (i,), (y, m, d) in np.ndenumerate( # type: ignore[misc] + np.char.split(dates, sep="-") + ): + self._year[i] = int(y) + self._month[i] = int(m) + self._day[i] = int(d) + + else: + raise TypeError(f"{type(dates)} is not supported") + + @property + def dtype(self) -> ExtensionDtype: + return DateDtype() + + def astype(self, dtype, copy=True): + dtype = pandas_dtype(dtype) + + if isinstance(dtype, DateDtype): + data = self.copy() if copy else self + else: + data = self.to_numpy(dtype=dtype, copy=copy, na_value=dt.date.min) + + return data + + @property + def nbytes(self) -> int: + return self._year.nbytes + self._month.nbytes + self._day.nbytes + + def __len__(self) -> int: + return len(self._year) # all 3 arrays are enforced to have the same length + + def __getitem__(self, item: PositionalIndexer): + if isinstance(item, int): + return dt.date(self._year[item], self._month[item], self._day[item]) + else: + raise NotImplementedError("only ints are supported as indexes") + + def __setitem__(self, key: Union[int, slice, np.ndarray], value: Any): + if not isinstance(key, int): + raise NotImplementedError("only ints are supported as indexes") + + if not isinstance(value, dt.date): + raise TypeError("you can only set datetime.date types") + + self._year[key] = value.year + self._month[key] = value.month + self._day[key] = value.day + + def __repr__(self) -> str: + return f"DateArray{list(zip(self._year, self._month, self._day))}" + + def copy(self) -> "DateArray": + return DateArray((self._year.copy(), self._month.copy(), self._day.copy())) + + def isna(self) -> np.ndarray: + return np.logical_and( + np.logical_and( + self._year == dt.date.min.year, self._month == dt.date.min.month + ), + self._day == dt.date.min.day, + ) + + @classmethod + def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False): + if isinstance(scalars, dt.date): + pass + elif isinstance(scalars, DateArray): + pass + elif isinstance(scalars, np.ndarray): + scalars = scalars.astype("U10") # 10 chars for yyyy-mm-dd + return DateArray(scalars) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/__init__.py new file mode 100644 index 0000000..34727b4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/__init__.py @@ -0,0 +1,8 @@ +from pandas.tests.extension.decimal.array import ( + DecimalArray, + DecimalDtype, + make_data, + to_decimal, +) + +__all__ = ["DecimalArray", "DecimalDtype", "to_decimal", "make_data"] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..fc65d22 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/__pycache__/array.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/__pycache__/array.cpython-38.pyc new file mode 100644 index 0000000..29eaad9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/__pycache__/array.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/__pycache__/test_decimal.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/__pycache__/test_decimal.cpython-38.pyc new file mode 100644 index 0000000..1c6af7e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/__pycache__/test_decimal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/array.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/array.py new file mode 100644 index 0000000..1dbafb2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/array.py @@ -0,0 +1,272 @@ +from __future__ import annotations + +import decimal +import numbers +import random +import sys + +import numpy as np + +from pandas._typing import type_t + +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.common import ( + is_dtype_equal, + is_float, + pandas_dtype, +) + +import pandas as pd +from pandas.api.extensions import ( + no_default, + register_extension_dtype, +) +from pandas.api.types import ( + is_list_like, + is_scalar, +) +from pandas.core import arraylike +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays import ( + ExtensionArray, + ExtensionScalarOpsMixin, +) +from pandas.core.indexers import check_array_indexer + + +@register_extension_dtype +class DecimalDtype(ExtensionDtype): + type = decimal.Decimal + name = "decimal" + na_value = decimal.Decimal("NaN") + _metadata = ("context",) + + def __init__(self, context=None): + self.context = context or decimal.getcontext() + + def __repr__(self) -> str: + return f"DecimalDtype(context={self.context})" + + @classmethod + def construct_array_type(cls) -> type_t[DecimalArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return DecimalArray + + @property + def _is_numeric(self) -> bool: + return True + + +class DecimalArray(OpsMixin, ExtensionScalarOpsMixin, ExtensionArray): + __array_priority__ = 1000 + + def __init__(self, values, dtype=None, copy=False, context=None): + for i, val in enumerate(values): + if is_float(val): + if np.isnan(val): + values[i] = DecimalDtype.na_value + else: + values[i] = DecimalDtype.type(val) + elif not isinstance(val, decimal.Decimal): + raise TypeError("All values must be of type " + str(decimal.Decimal)) + values = np.asarray(values, dtype=object) + + self._data = values + # Some aliases for common attribute names to ensure pandas supports + # these + self._items = self.data = self._data + # those aliases are currently not working due to assumptions + # in internal code (GH-20735) + # self._values = self.values = self.data + self._dtype = DecimalDtype(context) + + @property + def dtype(self): + return self._dtype + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return cls(scalars) + + @classmethod + def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + return cls._from_sequence([decimal.Decimal(x) for x in strings], dtype, copy) + + @classmethod + def _from_factorized(cls, values, original): + return cls(values) + + _HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray) + + def to_numpy( + self, dtype=None, copy: bool = False, na_value=no_default, decimals=None + ) -> np.ndarray: + result = np.asarray(self, dtype=dtype) + if decimals is not None: + result = np.asarray([round(x, decimals) for x in result]) + return result + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + # + if not all( + isinstance(t, self._HANDLED_TYPES + (DecimalArray,)) for t in inputs + ): + return NotImplemented + + inputs = tuple(x._data if isinstance(x, DecimalArray) else x for x in inputs) + result = getattr(ufunc, method)(*inputs, **kwargs) + + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + def reconstruct(x): + if isinstance(x, (decimal.Decimal, numbers.Number)): + return x + else: + return DecimalArray._from_sequence(x) + + if ufunc.nout > 1: + return tuple(reconstruct(x) for x in result) + else: + return reconstruct(result) + + def __getitem__(self, item): + if isinstance(item, numbers.Integral): + return self._data[item] + else: + # array, slice. + item = pd.api.indexers.check_array_indexer(self, item) + return type(self)(self._data[item]) + + def take(self, indexer, allow_fill=False, fill_value=None): + from pandas.api.extensions import take + + data = self._data + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill) + return self._from_sequence(result) + + def copy(self): + return type(self)(self._data.copy(), dtype=self.dtype) + + def astype(self, dtype, copy=True): + if is_dtype_equal(dtype, self._dtype): + if not copy: + return self + dtype = pandas_dtype(dtype) + if isinstance(dtype, type(self.dtype)): + return type(self)(self._data, copy=copy, context=dtype.context) + + return super().astype(dtype, copy=copy) + + def __setitem__(self, key, value): + if is_list_like(value): + if is_scalar(key): + raise ValueError("setting an array element with a sequence.") + value = [decimal.Decimal(v) for v in value] + else: + value = decimal.Decimal(value) + + key = check_array_indexer(self, key) + self._data[key] = value + + def __len__(self) -> int: + return len(self._data) + + def __contains__(self, item) -> bool | np.bool_: + if not isinstance(item, decimal.Decimal): + return False + elif item.is_nan(): + return self.isna().any() + else: + return super().__contains__(item) + + @property + def nbytes(self) -> int: + n = len(self) + if n: + return n * sys.getsizeof(self[0]) + return 0 + + def isna(self): + return np.array([x.is_nan() for x in self._data], dtype=bool) + + @property + def _na_value(self): + return decimal.Decimal("NaN") + + def _formatter(self, boxed=False): + if boxed: + return "Decimal: {}".format + return repr + + @classmethod + def _concat_same_type(cls, to_concat): + return cls(np.concatenate([x._data for x in to_concat])) + + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + + if skipna: + # If we don't have any NAs, we can ignore skipna + if self.isna().any(): + other = self[~self.isna()] + return other._reduce(name, **kwargs) + + if name == "sum" and len(self) == 0: + # GH#29630 avoid returning int 0 or np.bool_(False) on old numpy + return decimal.Decimal(0) + + try: + op = getattr(self.data, name) + except AttributeError as err: + raise NotImplementedError( + f"decimal does not support the {name} operation" + ) from err + return op(axis=0) + + def _cmp_method(self, other, op): + # For use with OpsMixin + def convert_values(param): + if isinstance(param, ExtensionArray) or is_list_like(param): + ovalues = param + else: + # Assume it's an object + ovalues = [param] * len(self) + return ovalues + + lvalues = self + rvalues = convert_values(other) + + # If the operator is not defined for the underlying objects, + # a TypeError should be raised + res = [op(a, b) for (a, b) in zip(lvalues, rvalues)] + + return np.asarray(res, dtype=bool) + + def value_counts(self, dropna: bool = True): + from pandas.core.algorithms import value_counts + + return value_counts(self.to_numpy(), dropna=dropna) + + +def to_decimal(values, context=None): + return DecimalArray([decimal.Decimal(x) for x in values], context=context) + + +def make_data(): + return [decimal.Decimal(random.random()) for _ in range(100)] + + +DecimalArray._add_arithmetic_ops() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/test_decimal.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/test_decimal.py new file mode 100644 index 0000000..c2e42ae --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/decimal/test_decimal.py @@ -0,0 +1,481 @@ +import decimal +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import infer_dtype +from pandas.tests.extension import base +from pandas.tests.extension.decimal.array import ( + DecimalArray, + DecimalDtype, + make_data, + to_decimal, +) + + +@pytest.fixture +def dtype(): + return DecimalDtype() + + +@pytest.fixture +def data(): + return DecimalArray(make_data()) + + +@pytest.fixture +def data_for_twos(): + return DecimalArray([decimal.Decimal(2) for _ in range(100)]) + + +@pytest.fixture +def data_missing(): + return DecimalArray([decimal.Decimal("NaN"), decimal.Decimal(1)]) + + +@pytest.fixture +def data_for_sorting(): + return DecimalArray( + [decimal.Decimal("1"), decimal.Decimal("2"), decimal.Decimal("0")] + ) + + +@pytest.fixture +def data_missing_for_sorting(): + return DecimalArray( + [decimal.Decimal("1"), decimal.Decimal("NaN"), decimal.Decimal("0")] + ) + + +@pytest.fixture +def na_cmp(): + return lambda x, y: x.is_nan() and y.is_nan() + + +@pytest.fixture +def na_value(): + return decimal.Decimal("NaN") + + +@pytest.fixture +def data_for_grouping(): + b = decimal.Decimal("1.0") + a = decimal.Decimal("0.0") + c = decimal.Decimal("2.0") + na = decimal.Decimal("NaN") + return DecimalArray([b, b, na, na, a, a, b, c]) + + +class TestDtype(base.BaseDtypeTests): + def test_hashable(self, dtype): + pass + + @pytest.mark.parametrize("skipna", [True, False]) + def test_infer_dtype(self, data, data_missing, skipna): + # here overriding base test to ensure we fall back to return + # "unknown-array" for an EA pandas doesn't know + assert infer_dtype(data, skipna=skipna) == "unknown-array" + assert infer_dtype(data_missing, skipna=skipna) == "unknown-array" + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + def test_take_na_value_other_decimal(self): + arr = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("2.0")]) + result = arr.take([0, -1], allow_fill=True, fill_value=decimal.Decimal("-1.0")) + expected = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("-1.0")]) + self.assert_extension_array_equal(result, expected) + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class Reduce: + def check_reduce(self, s, op_name, skipna): + + if op_name in ["median", "skew", "kurt"]: + msg = r"decimal does not support the .* operation" + with pytest.raises(NotImplementedError, match=msg): + getattr(s, op_name)(skipna=skipna) + + else: + result = getattr(s, op_name)(skipna=skipna) + expected = getattr(np.asarray(s), op_name)() + tm.assert_almost_equal(result, expected) + + +class TestNumericReduce(Reduce, base.BaseNumericReduceTests): + pass + + +class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.parametrize("dropna", [True, False]) + def test_value_counts(self, all_data, dropna, request): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + vcs = pd.Series(all_data).value_counts(dropna=dropna) + vcs_ex = pd.Series(other).value_counts(dropna=dropna) + + with decimal.localcontext() as ctx: + # avoid raising when comparing Decimal("NAN") < Decimal(2) + ctx.traps[decimal.InvalidOperation] = False + + result = vcs.sort_index() + expected = vcs_ex.sort_index() + + tm.assert_series_equal(result, expected) + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + def test_groupby_agg_extension(self, data_for_grouping): + super().test_groupby_agg_extension(data_for_grouping) + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestPrinting(base.BasePrintingTests): + def test_series_repr(self, data): + # Overriding this base test to explicitly test that + # the custom _formatter is used + ser = pd.Series(data) + assert data.dtype.name in repr(ser) + assert "Decimal: " in repr(ser) + + +@pytest.mark.xfail( + reason=( + "DecimalArray constructor raises bc _from_sequence wants Decimals, not ints." + "Easy to fix, just need to do it." + ), + raises=TypeError, +) +def test_series_constructor_coerce_data_to_extension_dtype_raises(): + xpr = ( + "Cannot cast data to extension dtype 'decimal'. Pass the " + "extension array directly." + ) + with pytest.raises(ValueError, match=xpr): + pd.Series([0, 1, 2], dtype=DecimalDtype()) + + +def test_series_constructor_with_dtype(): + arr = DecimalArray([decimal.Decimal("10.0")]) + result = pd.Series(arr, dtype=DecimalDtype()) + expected = pd.Series(arr) + tm.assert_series_equal(result, expected) + + result = pd.Series(arr, dtype="int64") + expected = pd.Series([10]) + tm.assert_series_equal(result, expected) + + +def test_dataframe_constructor_with_dtype(): + arr = DecimalArray([decimal.Decimal("10.0")]) + + result = pd.DataFrame({"A": arr}, dtype=DecimalDtype()) + expected = pd.DataFrame({"A": arr}) + tm.assert_frame_equal(result, expected) + + arr = DecimalArray([decimal.Decimal("10.0")]) + result = pd.DataFrame({"A": arr}, dtype="int64") + expected = pd.DataFrame({"A": [10]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("frame", [True, False]) +def test_astype_dispatches(frame): + # This is a dtype-specific test that ensures Series[decimal].astype + # gets all the way through to ExtensionArray.astype + # Designing a reliable smoke test that works for arbitrary data types + # is difficult. + data = pd.Series(DecimalArray([decimal.Decimal(2)]), name="a") + ctx = decimal.Context() + ctx.prec = 5 + + if frame: + data = data.to_frame() + + result = data.astype(DecimalDtype(ctx)) + + if frame: + result = result["a"] + + assert result.dtype.context.prec == ctx.prec + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + def check_opname(self, s, op_name, other, exc=None): + super().check_opname(s, op_name, other, exc=None) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + op_name = all_arithmetic_operators + s = pd.Series(data) + + context = decimal.getcontext() + divbyzerotrap = context.traps[decimal.DivisionByZero] + invalidoptrap = context.traps[decimal.InvalidOperation] + context.traps[decimal.DivisionByZero] = 0 + context.traps[decimal.InvalidOperation] = 0 + + # Decimal supports ops with int, but not float + other = pd.Series([int(d * 100) for d in data]) + self.check_opname(s, op_name, other) + + if "mod" not in op_name: + self.check_opname(s, op_name, s * 2) + + self.check_opname(s, op_name, 0) + self.check_opname(s, op_name, 5) + context.traps[decimal.DivisionByZero] = divbyzerotrap + context.traps[decimal.InvalidOperation] = invalidoptrap + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + # We implement divmod + super()._check_divmod_op(s, op, other, exc=None) + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def test_compare_scalar(self, data, comparison_op): + s = pd.Series(data) + self._compare_other(s, data, comparison_op, 0.5) + + def test_compare_array(self, data, comparison_op): + s = pd.Series(data) + + alter = np.random.choice([-1, 0, 1], len(data)) + # Randomly double, halve or keep same value + other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter] + self._compare_other(s, data, comparison_op, other) + + +class DecimalArrayWithoutFromSequence(DecimalArray): + """Helper class for testing error handling in _from_sequence.""" + + def _from_sequence(cls, scalars, dtype=None, copy=False): + raise KeyError("For the test") + + +class DecimalArrayWithoutCoercion(DecimalArrayWithoutFromSequence): + @classmethod + def _create_arithmetic_method(cls, op): + return cls._create_method(op, coerce_to_dtype=False) + + +DecimalArrayWithoutCoercion._add_arithmetic_ops() + + +def test_combine_from_sequence_raises(monkeypatch): + # https://github.com/pandas-dev/pandas/issues/22850 + cls = DecimalArrayWithoutFromSequence + + @classmethod + def construct_array_type(cls): + return DecimalArrayWithoutFromSequence + + monkeypatch.setattr(DecimalDtype, "construct_array_type", construct_array_type) + + arr = cls([decimal.Decimal("1.0"), decimal.Decimal("2.0")]) + ser = pd.Series(arr) + result = ser.combine(ser, operator.add) + + # note: object dtype + expected = pd.Series( + [decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object" + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "class_", [DecimalArrayWithoutFromSequence, DecimalArrayWithoutCoercion] +) +def test_scalar_ops_from_sequence_raises(class_): + # op(EA, EA) should return an EA, or an ndarray if it's not possible + # to return an EA with the return values. + arr = class_([decimal.Decimal("1.0"), decimal.Decimal("2.0")]) + result = arr + arr + expected = np.array( + [decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object" + ) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "reverse, expected_div, expected_mod", + [(False, [0, 1, 1, 2], [1, 0, 1, 0]), (True, [2, 1, 0, 0], [0, 0, 2, 2])], +) +def test_divmod_array(reverse, expected_div, expected_mod): + # https://github.com/pandas-dev/pandas/issues/22930 + arr = to_decimal([1, 2, 3, 4]) + if reverse: + div, mod = divmod(2, arr) + else: + div, mod = divmod(arr, 2) + expected_div = to_decimal(expected_div) + expected_mod = to_decimal(expected_mod) + + tm.assert_extension_array_equal(div, expected_div) + tm.assert_extension_array_equal(mod, expected_mod) + + +def test_ufunc_fallback(data): + a = data[:5] + s = pd.Series(a, index=range(3, 8)) + result = np.abs(s) + expected = pd.Series(np.abs(a), index=range(3, 8)) + tm.assert_series_equal(result, expected) + + +def test_array_ufunc(): + a = to_decimal([1, 2, 3]) + result = np.exp(a) + expected = to_decimal(np.exp(a._data)) + tm.assert_extension_array_equal(result, expected) + + +def test_array_ufunc_series(): + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + result = np.exp(s) + expected = pd.Series(to_decimal(np.exp(a._data))) + tm.assert_series_equal(result, expected) + + +def test_array_ufunc_series_scalar_other(): + # check _HANDLED_TYPES + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + result = np.add(s, decimal.Decimal(1)) + expected = pd.Series(np.add(a, decimal.Decimal(1))) + tm.assert_series_equal(result, expected) + + +def test_array_ufunc_series_defer(): + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + + expected = pd.Series(to_decimal([2, 4, 6])) + r1 = np.add(s, a) + r2 = np.add(a, s) + + tm.assert_series_equal(r1, expected) + tm.assert_series_equal(r2, expected) + + +def test_groupby_agg(): + # Ensure that the result of agg is inferred to be decimal dtype + # https://github.com/pandas-dev/pandas/issues/29141 + + data = make_data()[:5] + df = pd.DataFrame( + {"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)} + ) + + # single key, selected column + expected = pd.Series(to_decimal([data[0], data[3]])) + result = df.groupby("id1")["decimals"].agg(lambda x: x.iloc[0]) + tm.assert_series_equal(result, expected, check_names=False) + result = df["decimals"].groupby(df["id1"]).agg(lambda x: x.iloc[0]) + tm.assert_series_equal(result, expected, check_names=False) + + # multiple keys, selected column + expected = pd.Series( + to_decimal([data[0], data[1], data[3]]), + index=pd.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 1)]), + ) + result = df.groupby(["id1", "id2"])["decimals"].agg(lambda x: x.iloc[0]) + tm.assert_series_equal(result, expected, check_names=False) + result = df["decimals"].groupby([df["id1"], df["id2"]]).agg(lambda x: x.iloc[0]) + tm.assert_series_equal(result, expected, check_names=False) + + # multiple columns + expected = pd.DataFrame({"id2": [0, 1], "decimals": to_decimal([data[0], data[3]])}) + result = df.groupby("id1").agg(lambda x: x.iloc[0]) + tm.assert_frame_equal(result, expected, check_names=False) + + +def test_groupby_agg_ea_method(monkeypatch): + # Ensure that the result of agg is inferred to be decimal dtype + # https://github.com/pandas-dev/pandas/issues/29141 + + def DecimalArray__my_sum(self): + return np.sum(np.array(self)) + + monkeypatch.setattr(DecimalArray, "my_sum", DecimalArray__my_sum, raising=False) + + data = make_data()[:5] + df = pd.DataFrame({"id": [0, 0, 0, 1, 1], "decimals": DecimalArray(data)}) + expected = pd.Series(to_decimal([data[0] + data[1] + data[2], data[3] + data[4]])) + + result = df.groupby("id")["decimals"].agg(lambda x: x.values.my_sum()) + tm.assert_series_equal(result, expected, check_names=False) + s = pd.Series(DecimalArray(data)) + result = s.groupby(np.array([0, 0, 0, 1, 1])).agg(lambda x: x.values.my_sum()) + tm.assert_series_equal(result, expected, check_names=False) + + +def test_indexing_no_materialize(monkeypatch): + # See https://github.com/pandas-dev/pandas/issues/29708 + # Ensure that indexing operations do not materialize (convert to a numpy + # array) the ExtensionArray unnecessary + + def DecimalArray__array__(self, dtype=None): + raise Exception("tried to convert a DecimalArray to a numpy array") + + monkeypatch.setattr(DecimalArray, "__array__", DecimalArray__array__, raising=False) + + data = make_data() + s = pd.Series(DecimalArray(data)) + df = pd.DataFrame({"a": s, "b": range(len(s))}) + + # ensure the following operations do not raise an error + s[s > 0.5] + df[s > 0.5] + s.at[0] + df.at[0, "a"] + + +def test_to_numpy_keyword(): + # test the extra keyword + values = [decimal.Decimal("1.1111"), decimal.Decimal("2.2222")] + expected = np.array( + [decimal.Decimal("1.11"), decimal.Decimal("2.22")], dtype="object" + ) + a = pd.array(values, dtype="decimal") + result = a.to_numpy(decimals=2) + tm.assert_numpy_array_equal(result, expected) + + result = pd.Series(a).to_numpy(decimals=2) + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/json/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/json/__init__.py new file mode 100644 index 0000000..7ebfd54 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/json/__init__.py @@ -0,0 +1,7 @@ +from pandas.tests.extension.json.array import ( + JSONArray, + JSONDtype, + make_data, +) + +__all__ = ["JSONArray", "JSONDtype", "make_data"] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/json/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/json/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..cd149df Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/json/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/json/__pycache__/array.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/json/__pycache__/array.cpython-38.pyc new file mode 100644 index 0000000..ad7e452 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/json/__pycache__/array.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/json/__pycache__/test_json.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/json/__pycache__/test_json.cpython-38.pyc new file mode 100644 index 0000000..c7d35fa Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/json/__pycache__/test_json.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/json/array.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/json/array.py new file mode 100644 index 0000000..538ca83 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/json/array.py @@ -0,0 +1,241 @@ +""" +Test extension array for storing nested data in a pandas container. + +The JSONArray stores lists of dictionaries. The storage mechanism is a list, +not an ndarray. + +Note +---- +We currently store lists of UserDicts. Pandas has a few places +internally that specifically check for dicts, and does non-scalar things +in that case. We *want* the dictionaries to be treated as scalars, so we +hack around pandas by using UserDicts. +""" +from __future__ import annotations + +from collections import ( + UserDict, + abc, +) +import itertools +import numbers +import random +import string +import sys +from typing import ( + Any, + Mapping, +) + +import numpy as np + +from pandas._typing import type_t + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_list_like, + pandas_dtype, +) + +import pandas as pd +from pandas.api.extensions import ( + ExtensionArray, + ExtensionDtype, +) +from pandas.core.indexers import unpack_tuple_and_ellipses + + +class JSONDtype(ExtensionDtype): + type = abc.Mapping + name = "json" + na_value: Mapping[str, Any] = UserDict() + + @classmethod + def construct_array_type(cls) -> type_t[JSONArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return JSONArray + + +class JSONArray(ExtensionArray): + dtype = JSONDtype() + __array_priority__ = 1000 + + def __init__(self, values, dtype=None, copy=False): + for val in values: + if not isinstance(val, self.dtype.type): + raise TypeError("All values must be of type " + str(self.dtype.type)) + self.data = values + + # Some aliases for common attribute names to ensure pandas supports + # these + self._items = self._data = self.data + # those aliases are currently not working due to assumptions + # in internal code (GH-20735) + # self._values = self.values = self.data + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return cls(scalars) + + @classmethod + def _from_factorized(cls, values, original): + return cls([UserDict(x) for x in values if x != ()]) + + def __getitem__(self, item): + if isinstance(item, tuple): + item = unpack_tuple_and_ellipses(item) + + if isinstance(item, numbers.Integral): + return self.data[item] + elif isinstance(item, slice) and item == slice(None): + # Make sure we get a view + return type(self)(self.data) + elif isinstance(item, slice): + # slice + return type(self)(self.data[item]) + elif not is_list_like(item): + # e.g. "foo" or 2.5 + # exception message copied from numpy + raise IndexError( + r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis " + r"(`None`) and integer or boolean arrays are valid indices" + ) + else: + item = pd.api.indexers.check_array_indexer(self, item) + if is_bool_dtype(item.dtype): + return self._from_sequence([x for x, m in zip(self, item) if m]) + # integer + return type(self)([self.data[i] for i in item]) + + def __setitem__(self, key, value): + if isinstance(key, numbers.Integral): + self.data[key] = value + else: + if not isinstance(value, (type(self), abc.Sequence)): + # broadcast value + value = itertools.cycle([value]) + + if isinstance(key, np.ndarray) and key.dtype == "bool": + # masking + for i, (k, v) in enumerate(zip(key, value)): + if k: + assert isinstance(v, self.dtype.type) + self.data[i] = v + else: + for k, v in zip(key, value): + assert isinstance(v, self.dtype.type) + self.data[k] = v + + def __len__(self) -> int: + return len(self.data) + + def __eq__(self, other): + return NotImplemented + + def __ne__(self, other): + return NotImplemented + + def __array__(self, dtype=None): + if dtype is None: + dtype = object + return np.asarray(self.data, dtype=dtype) + + @property + def nbytes(self) -> int: + return sys.getsizeof(self.data) + + def isna(self): + return np.array([x == self.dtype.na_value for x in self.data], dtype=bool) + + def take(self, indexer, allow_fill=False, fill_value=None): + # re-implement here, since NumPy has trouble setting + # sized objects like UserDicts into scalar slots of + # an ndarary. + indexer = np.asarray(indexer) + msg = ( + "Index is out of bounds or cannot do a " + "non-empty take from an empty array." + ) + + if allow_fill: + if fill_value is None: + fill_value = self.dtype.na_value + # bounds check + if (indexer < -1).any(): + raise ValueError + try: + output = [ + self.data[loc] if loc != -1 else fill_value for loc in indexer + ] + except IndexError as err: + raise IndexError(msg) from err + else: + try: + output = [self.data[loc] for loc in indexer] + except IndexError as err: + raise IndexError(msg) from err + + return self._from_sequence(output) + + def copy(self): + return type(self)(self.data[:]) + + def astype(self, dtype, copy=True): + # NumPy has issues when all the dicts are the same length. + # np.array([UserDict(...), UserDict(...)]) fails, + # but np.array([{...}, {...}]) works, so cast. + from pandas.core.arrays.string_ import StringDtype + + dtype = pandas_dtype(dtype) + # needed to add this check for the Series constructor + if isinstance(dtype, type(self.dtype)) and dtype == self.dtype: + if copy: + return self.copy() + return self + elif isinstance(dtype, StringDtype): + value = self.astype(str) # numpy doesn'y like nested dicts + return dtype.construct_array_type()._from_sequence(value, copy=False) + + return np.array([dict(x) for x in self], dtype=dtype, copy=copy) + + def unique(self): + # Parent method doesn't work since np.array will try to infer + # a 2-dim object. + return type(self)([dict(x) for x in {tuple(d.items()) for d in self.data}]) + + @classmethod + def _concat_same_type(cls, to_concat): + data = list(itertools.chain.from_iterable(x.data for x in to_concat)) + return cls(data) + + def _values_for_factorize(self): + frozen = self._values_for_argsort() + if len(frozen) == 0: + # factorize_array expects 1-d array, this is a len-0 2-d array. + frozen = frozen.ravel() + return frozen, () + + def _values_for_argsort(self): + # Bypass NumPy's shape inference to get a (N,) array of tuples. + frozen = [tuple(x.items()) for x in self] + return construct_1d_object_array_from_listlike(frozen) + + +def make_data(): + # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer + return [ + UserDict( + [ + (random.choice(string.ascii_letters), random.randint(0, 100)) + for _ in range(random.randint(0, 10)) + ] + ) + for _ in range(100) + ] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/json/test_json.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/json/test_json.py new file mode 100644 index 0000000..d530a75 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/json/test_json.py @@ -0,0 +1,358 @@ +import collections +import operator + +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension import base +from pandas.tests.extension.json.array import ( + JSONArray, + JSONDtype, + make_data, +) + + +@pytest.fixture +def dtype(): + return JSONDtype() + + +@pytest.fixture +def data(): + """Length-100 PeriodArray for semantics test.""" + data = make_data() + + # Why the while loop? NumPy is unable to construct an ndarray from + # equal-length ndarrays. Many of our operations involve coercing the + # EA to an ndarray of objects. To avoid random test failures, we ensure + # that our data is coercible to an ndarray. Several tests deal with only + # the first two elements, so that's what we'll check. + + while len(data[0]) == len(data[1]): + data = make_data() + + return JSONArray(data) + + +@pytest.fixture +def data_missing(): + """Length 2 array with [NA, Valid]""" + return JSONArray([{}, {"a": 10}]) + + +@pytest.fixture +def data_for_sorting(): + return JSONArray([{"b": 1}, {"c": 4}, {"a": 2, "c": 3}]) + + +@pytest.fixture +def data_missing_for_sorting(): + return JSONArray([{"b": 1}, {}, {"a": 4}]) + + +@pytest.fixture +def na_value(dtype): + return dtype.na_value + + +@pytest.fixture +def na_cmp(): + return operator.eq + + +@pytest.fixture +def data_for_grouping(): + return JSONArray( + [ + {"b": 1}, + {"b": 1}, + {}, + {}, + {"a": 0, "c": 2}, + {"a": 0, "c": 2}, + {"b": 1}, + {"c": 2}, + ] + ) + + +class BaseJSON: + # NumPy doesn't handle an array of equal-length UserDicts. + # The default assert_series_equal eventually does a + # Series.values, which raises. We work around it by + # converting the UserDicts to dicts. + @classmethod + def assert_series_equal(cls, left, right, *args, **kwargs): + if left.dtype.name == "json": + assert left.dtype == right.dtype + left = pd.Series( + JSONArray(left.values.astype(object)), index=left.index, name=left.name + ) + right = pd.Series( + JSONArray(right.values.astype(object)), + index=right.index, + name=right.name, + ) + tm.assert_series_equal(left, right, *args, **kwargs) + + @classmethod + def assert_frame_equal(cls, left, right, *args, **kwargs): + obj_type = kwargs.get("obj", "DataFrame") + tm.assert_index_equal( + left.columns, + right.columns, + exact=kwargs.get("check_column_type", "equiv"), + check_names=kwargs.get("check_names", True), + check_exact=kwargs.get("check_exact", False), + check_categorical=kwargs.get("check_categorical", True), + obj=f"{obj_type}.columns", + ) + + jsons = (left.dtypes == "json").index + + for col in jsons: + cls.assert_series_equal(left[col], right[col], *args, **kwargs) + + left = left.drop(columns=jsons) + right = right.drop(columns=jsons) + tm.assert_frame_equal(left, right, *args, **kwargs) + + +class TestDtype(BaseJSON, base.BaseDtypeTests): + pass + + +class TestInterface(BaseJSON, base.BaseInterfaceTests): + def test_custom_asserts(self): + # This would always trigger the KeyError from trying to put + # an array of equal-length UserDicts inside an ndarray. + data = JSONArray( + [ + collections.UserDict({"a": 1}), + collections.UserDict({"b": 2}), + collections.UserDict({"c": 3}), + ] + ) + a = pd.Series(data) + self.assert_series_equal(a, a) + self.assert_frame_equal(a.to_frame(), a.to_frame()) + + b = pd.Series(data.take([0, 0, 1])) + msg = r"ExtensionArray are different" + with pytest.raises(AssertionError, match=msg): + self.assert_series_equal(a, b) + + with pytest.raises(AssertionError, match=msg): + self.assert_frame_equal(a.to_frame(), b.to_frame()) + + @pytest.mark.xfail( + reason="comparison method not implemented for JSONArray (GH-37867)" + ) + def test_contains(self, data): + # GH-37867 + super().test_contains(data) + + +class TestConstructors(BaseJSON, base.BaseConstructorsTests): + @pytest.mark.skip(reason="not implemented constructor from dtype") + def test_from_dtype(self, data): + # construct from our dtype & string dtype + pass + + @pytest.mark.xfail(reason="RecursionError, GH-33900") + def test_series_constructor_no_data_with_index(self, dtype, na_value): + # RecursionError: maximum recursion depth exceeded in comparison + super().test_series_constructor_no_data_with_index(dtype, na_value) + + @pytest.mark.xfail(reason="RecursionError, GH-33900") + def test_series_constructor_scalar_na_with_index(self, dtype, na_value): + # RecursionError: maximum recursion depth exceeded in comparison + super().test_series_constructor_scalar_na_with_index(dtype, na_value) + + @pytest.mark.xfail(reason="collection as scalar, GH-33901") + def test_series_constructor_scalar_with_index(self, data, dtype): + # TypeError: All values must be of type + super().test_series_constructor_scalar_with_index(data, dtype) + + +class TestReshaping(BaseJSON, base.BaseReshapingTests): + @pytest.mark.skip(reason="Different definitions of NA") + def test_stack(self): + """ + The test does .astype(object).stack(). If we happen to have + any missing values in `data`, then we'll end up with different + rows since we consider `{}` NA, but `.astype(object)` doesn't. + """ + + @pytest.mark.xfail(reason="dict for NA") + def test_unstack(self, data, index): + # The base test has NaN for the expected NA value. + # this matches otherwise + return super().test_unstack(data, index) + + +class TestGetitem(BaseJSON, base.BaseGetitemTests): + pass + + +class TestIndex(BaseJSON, base.BaseIndexTests): + pass + + +class TestMissing(BaseJSON, base.BaseMissingTests): + @pytest.mark.skip(reason="Setting a dict as a scalar") + def test_fillna_series(self): + """We treat dictionaries as a mapping in fillna, not a scalar.""" + + @pytest.mark.skip(reason="Setting a dict as a scalar") + def test_fillna_frame(self): + """We treat dictionaries as a mapping in fillna, not a scalar.""" + + +unhashable = pytest.mark.skip(reason="Unhashable") + + +class TestReduce(base.BaseNoReduceTests): + pass + + +class TestMethods(BaseJSON, base.BaseMethodsTests): + @unhashable + def test_value_counts(self, all_data, dropna): + pass + + @unhashable + def test_value_counts_with_normalize(self, data): + pass + + @unhashable + def test_sort_values_frame(self): + # TODO (EA.factorize): see if _values_for_factorize allows this. + pass + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values(self, data_for_sorting, ascending, sort_by_key): + super().test_sort_values(data_for_sorting, ascending, sort_by_key) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values_missing( + self, data_missing_for_sorting, ascending, sort_by_key + ): + super().test_sort_values_missing( + data_missing_for_sorting, ascending, sort_by_key + ) + + @pytest.mark.skip(reason="combine for JSONArray not supported") + def test_combine_le(self, data_repeated): + pass + + @pytest.mark.skip(reason="combine for JSONArray not supported") + def test_combine_add(self, data_repeated): + pass + + @pytest.mark.skip(reason="combine for JSONArray not supported") + def test_combine_first(self, data): + pass + + @unhashable + def test_hash_pandas_object_works(self, data, kind): + super().test_hash_pandas_object_works(data, kind) + + @pytest.mark.skip(reason="broadcasting error") + def test_where_series(self, data, na_value): + # Fails with + # *** ValueError: operands could not be broadcast together + # with shapes (4,) (4,) (0,) + super().test_where_series(data, na_value) + + @pytest.mark.skip(reason="Can't compare dicts.") + def test_searchsorted(self, data_for_sorting): + super().test_searchsorted(data_for_sorting) + + @pytest.mark.skip(reason="Can't compare dicts.") + def test_equals(self, data, na_value, as_series): + pass + + +class TestCasting(BaseJSON, base.BaseCastingTests): + @pytest.mark.skip(reason="failing on np.array(self, dtype=str)") + def test_astype_str(self): + """This currently fails in NumPy on np.array(self, dtype=str) with + + *** ValueError: setting an array element with a sequence + """ + + +# We intentionally don't run base.BaseSetitemTests because pandas' +# internals has trouble setting sequences of values into scalar positions. + + +class TestGroupby(BaseJSON, base.BaseGroupbyTests): + @unhashable + def test_groupby_extension_transform(self): + """ + This currently fails in Series.name.setter, since the + name must be hashable, but the value is a dictionary. + I think this is what we want, i.e. `.name` should be the original + values, and not the values for factorization. + """ + + @unhashable + def test_groupby_extension_apply(self): + """ + This fails in Index._do_unique_check with + + > hash(val) + E TypeError: unhashable type: 'UserDict' with + + I suspect that once we support Index[ExtensionArray], + we'll be able to dispatch unique. + """ + + @unhashable + def test_groupby_extension_agg(self): + """ + This fails when we get to tm.assert_series_equal when left.index + contains dictionaries, which are not hashable. + """ + + @unhashable + def test_groupby_extension_no_sort(self): + """ + This fails when we get to tm.assert_series_equal when left.index + contains dictionaries, which are not hashable. + """ + + @pytest.mark.xfail(reason="GH#39098: Converts agg result to object") + def test_groupby_agg_extension(self, data_for_grouping): + super().test_groupby_agg_extension(data_for_grouping) + + +class TestArithmeticOps(BaseJSON, base.BaseArithmeticOpsTests): + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): + if len(data[0]) != 1: + mark = pytest.mark.xfail(reason="raises in coercing to Series") + request.node.add_marker(mark) + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + + def test_add_series_with_extension_array(self, data): + ser = pd.Series(data) + with pytest.raises(TypeError, match="unsupported"): + ser + data + + def test_divmod_series_array(self): + # GH 23287 + # skipping because it is not implemented + pass + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + return super()._check_divmod_op(s, op, other, exc=TypeError) + + +class TestComparisonOps(BaseJSON, base.BaseComparisonOpsTests): + pass + + +class TestPrinting(BaseJSON, base.BasePrintingTests): + pass diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/list/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/list/__init__.py new file mode 100644 index 0000000..0f3f2f3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/list/__init__.py @@ -0,0 +1,7 @@ +from pandas.tests.extension.list.array import ( + ListArray, + ListDtype, + make_data, +) + +__all__ = ["ListArray", "ListDtype", "make_data"] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/list/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/list/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..4de263c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/list/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/list/__pycache__/array.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/list/__pycache__/array.cpython-38.pyc new file mode 100644 index 0000000..beb7f25 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/list/__pycache__/array.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/list/__pycache__/test_list.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/extension/list/__pycache__/test_list.cpython-38.pyc new file mode 100644 index 0000000..0d0a788 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/extension/list/__pycache__/test_list.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/list/array.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/list/array.py new file mode 100644 index 0000000..47015ed --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/list/array.py @@ -0,0 +1,132 @@ +""" +Test extension array for storing nested data in a pandas container. + +The ListArray stores an ndarray of lists. +""" +from __future__ import annotations + +import numbers +import random +import string + +import numpy as np + +from pandas._typing import type_t + +from pandas.core.dtypes.base import ExtensionDtype + +import pandas as pd +from pandas.api.types import ( + is_object_dtype, + is_string_dtype, +) +from pandas.core.arrays import ExtensionArray + + +class ListDtype(ExtensionDtype): + type = list + name = "list" + na_value = np.nan + + @classmethod + def construct_array_type(cls) -> type_t[ListArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return ListArray + + +class ListArray(ExtensionArray): + dtype = ListDtype() + __array_priority__ = 1000 + + def __init__(self, values, dtype=None, copy=False): + if not isinstance(values, np.ndarray): + raise TypeError("Need to pass a numpy array as values") + for val in values: + if not isinstance(val, self.dtype.type) and not pd.isna(val): + raise TypeError("All values must be of type " + str(self.dtype.type)) + self.data = values + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + data = np.empty(len(scalars), dtype=object) + data[:] = scalars + return cls(data) + + def __getitem__(self, item): + if isinstance(item, numbers.Integral): + return self.data[item] + else: + # slice, list-like, mask + return type(self)(self.data[item]) + + def __len__(self) -> int: + return len(self.data) + + def isna(self): + return np.array( + [not isinstance(x, list) and np.isnan(x) for x in self.data], dtype=bool + ) + + def take(self, indexer, allow_fill=False, fill_value=None): + # re-implement here, since NumPy has trouble setting + # sized objects like UserDicts into scalar slots of + # an ndarary. + indexer = np.asarray(indexer) + msg = ( + "Index is out of bounds or cannot do a " + "non-empty take from an empty array." + ) + + if allow_fill: + if fill_value is None: + fill_value = self.dtype.na_value + # bounds check + if (indexer < -1).any(): + raise ValueError + try: + output = [ + self.data[loc] if loc != -1 else fill_value for loc in indexer + ] + except IndexError as err: + raise IndexError(msg) from err + else: + try: + output = [self.data[loc] for loc in indexer] + except IndexError as err: + raise IndexError(msg) from err + + return self._from_sequence(output) + + def copy(self): + return type(self)(self.data[:]) + + def astype(self, dtype, copy=True): + if isinstance(dtype, type(self.dtype)) and dtype == self.dtype: + if copy: + return self.copy() + return self + elif is_string_dtype(dtype) and not is_object_dtype(dtype): + # numpy has problems with astype(str) for nested elements + return np.array([str(x) for x in self.data], dtype=dtype) + return np.array(self.data, dtype=dtype, copy=copy) + + @classmethod + def _concat_same_type(cls, to_concat): + data = np.concatenate([x.data for x in to_concat]) + return cls(data) + + +def make_data(): + # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer + data = np.empty(100, dtype=object) + data[:] = [ + [random.choice(string.ascii_letters) for _ in range(random.randint(0, 10))] + for _ in range(100) + ] + return data diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/list/test_list.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/list/test_list.py new file mode 100644 index 0000000..295f086 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/list/test_list.py @@ -0,0 +1,33 @@ +import pytest + +import pandas as pd +from pandas.tests.extension.list.array import ( + ListArray, + ListDtype, + make_data, +) + + +@pytest.fixture +def dtype(): + return ListDtype() + + +@pytest.fixture +def data(): + """Length-100 ListArray for semantics test.""" + data = make_data() + + while len(data[0]) == len(data[1]): + data = make_data() + + return ListArray(data) + + +def test_to_csv(data): + # https://github.com/pandas-dev/pandas/issues/28840 + # array with list-likes fail when doing astype(str) on the numpy array + # which was done in to_native_types + df = pd.DataFrame({"a": data}) + res = df.to_csv() + assert str(data[0]) in res diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/test_boolean.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_boolean.py new file mode 100644 index 0000000..1f44889 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_boolean.py @@ -0,0 +1,394 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.boolean import BooleanDtype +from pandas.tests.extension import base + + +def make_data(): + return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] + + +@pytest.fixture +def dtype(): + return BooleanDtype() + + +@pytest.fixture +def data(dtype): + return pd.array(make_data(), dtype=dtype) + + +@pytest.fixture +def data_for_twos(dtype): + return pd.array(np.ones(100), dtype=dtype) + + +@pytest.fixture +def data_missing(dtype): + return pd.array([np.nan, True], dtype=dtype) + + +@pytest.fixture +def data_for_sorting(dtype): + return pd.array([True, True, False], dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + return pd.array([True, np.nan, False], dtype=dtype) + + +@pytest.fixture +def na_cmp(): + # we are pd.NA + return lambda x, y: x is pd.NA and y is pd.NA + + +@pytest.fixture +def na_value(): + return pd.NA + + +@pytest.fixture +def data_for_grouping(dtype): + b = True + a = False + na = np.nan + return pd.array([b, b, na, na, a, a, b], dtype=dtype) + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + implements = {"__sub__", "__rsub__"} + + def check_opname(self, s, op_name, other, exc=None): + # overwriting to indicate ops don't raise an error + super().check_opname(s, op_name, other, exc=None) + + def _check_op(self, obj, op, other, op_name, exc=NotImplementedError): + if exc is None: + if op_name in self.implements: + msg = r"numpy boolean subtract" + with pytest.raises(TypeError, match=msg): + op(obj, other) + return + + result = op(obj, other) + expected = self._combine(obj, other, op) + + if op_name in ( + "__floordiv__", + "__rfloordiv__", + "__pow__", + "__rpow__", + "__mod__", + "__rmod__", + ): + # combine keeps boolean type + expected = expected.astype("Int8") + elif op_name in ("__truediv__", "__rtruediv__"): + # combine with bools does not generate the correct result + # (numpy behaviour for div is to regard the bools as numeric) + expected = self._combine(obj.astype(float), other, op) + expected = expected.astype("Float64") + if op_name == "__rpow__": + # for rpow, combine does not propagate NaN + expected[result.isna()] = np.nan + self.assert_equal(result, expected) + else: + with pytest.raises(exc): + op(obj, other) + + def _check_divmod_op(self, s, op, other, exc=None): + # override to not raise an error + super()._check_divmod_op(s, op, other, None) + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def check_opname(self, s, op_name, other, exc=None): + # overwriting to indicate ops don't raise an error + super().check_opname(s, op_name, other, exc=None) + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.parametrize("na_sentinel", [-1, -2]) + def test_factorize(self, data_for_grouping, na_sentinel): + # override because we only have 2 unique values + labels, uniques = pd.factorize(data_for_grouping, na_sentinel=na_sentinel) + expected_labels = np.array( + [0, 0, na_sentinel, na_sentinel, 1, 1, 0], dtype=np.intp + ) + expected_uniques = data_for_grouping.take([0, 4]) + + tm.assert_numpy_array_equal(labels, expected_labels) + self.assert_extension_array_equal(uniques, expected_uniques) + + def test_combine_le(self, data_repeated): + # override because expected needs to be boolean instead of bool dtype + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 <= x2) + expected = pd.Series( + [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], + dtype="boolean", + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 <= x2) + expected = pd.Series([a <= val for a in list(orig_data1)], dtype="boolean") + self.assert_series_equal(result, expected) + + def test_searchsorted(self, data_for_sorting, as_series): + # override because we only have 2 unique values + data_for_sorting = pd.array([True, False], dtype="boolean") + b, a = data_for_sorting + arr = type(data_for_sorting)._from_sequence([a, b]) + + if as_series: + arr = pd.Series(arr) + assert arr.searchsorted(a) == 0 + assert arr.searchsorted(a, side="right") == 1 + + assert arr.searchsorted(b) == 1 + assert arr.searchsorted(b, side="right") == 2 + + result = arr.searchsorted(arr.take([0, 1])) + expected = np.array([0, 1], dtype=np.intp) + + tm.assert_numpy_array_equal(result, expected) + + # sorter + sorter = np.array([1, 0]) + assert data_for_sorting.searchsorted(a, sorter=sorter) == 0 + + @pytest.mark.xfail(reason="uses nullable integer") + def test_value_counts(self, all_data, dropna): + return super().test_value_counts(all_data, dropna) + + @pytest.mark.xfail(reason="uses nullable integer") + def test_value_counts_with_normalize(self, data): + super().test_value_counts_with_normalize(data) + + def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting): + # override because there are only 2 unique values + + # data_for_sorting -> [B, C, A] with A < B < C -> here True, True, False + assert data_for_sorting.argmax() == 0 + assert data_for_sorting.argmin() == 2 + + # with repeated values -> first occurrence + data = data_for_sorting.take([2, 0, 0, 1, 1, 2]) + assert data.argmax() == 1 + assert data.argmin() == 0 + + # with missing values + # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing. + assert data_missing_for_sorting.argmax() == 0 + assert data_missing_for_sorting.argmin() == 2 + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + """ + Groupby-specific tests are overridden because boolean only has 2 + unique values, base tests uses 3 groups. + """ + + def test_grouping_grouper(self, data_for_grouping): + df = pd.DataFrame( + {"A": ["B", "B", None, None, "A", "A", "B"], "B": data_for_grouping} + ) + gr1 = df.groupby("A").grouper.groupings[0] + gr2 = df.groupby("B").grouper.groupings[0] + + tm.assert_numpy_array_equal(gr1.grouping_vector, df.A.values) + tm.assert_extension_array_equal(gr2.grouping_vector, data_for_grouping) + + @pytest.mark.parametrize("as_index", [True, False]) + def test_groupby_extension_agg(self, as_index, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + result = df.groupby("B", as_index=as_index).A.mean() + _, uniques = pd.factorize(data_for_grouping, sort=True) + + if as_index: + index = pd.Index(uniques, name="B") + expected = pd.Series([3.0, 1.0], index=index, name="A") + self.assert_series_equal(result, expected) + else: + expected = pd.DataFrame({"B": uniques, "A": [3.0, 1.0]}) + self.assert_frame_equal(result, expected) + + def test_groupby_agg_extension(self, data_for_grouping): + # GH#38980 groupby agg on extension type fails for non-numeric types + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + + expected = df.iloc[[0, 2, 4]] + expected = expected.set_index("A") + + result = df.groupby("A").agg({"B": "first"}) + self.assert_frame_equal(result, expected) + + result = df.groupby("A").agg("first") + self.assert_frame_equal(result, expected) + + result = df.groupby("A").first() + self.assert_frame_equal(result, expected) + + def test_groupby_extension_no_sort(self, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + result = df.groupby("B", sort=False).A.mean() + _, index = pd.factorize(data_for_grouping, sort=False) + + index = pd.Index(index, name="B") + expected = pd.Series([1.0, 3.0], index=index, name="A") + self.assert_series_equal(result, expected) + + def test_groupby_extension_transform(self, data_for_grouping): + valid = data_for_grouping[~data_for_grouping.isna()] + df = pd.DataFrame({"A": [1, 1, 3, 3, 1], "B": valid}) + + result = df.groupby("B").A.transform(len) + expected = pd.Series([3, 3, 2, 2, 3], name="A") + + self.assert_series_equal(result, expected) + + def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + df.groupby("B").apply(groupby_apply_op) + df.groupby("B").A.apply(groupby_apply_op) + df.groupby("A").apply(groupby_apply_op) + df.groupby("A").B.apply(groupby_apply_op) + + def test_groupby_apply_identity(self, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + result = df.groupby("A").B.apply(lambda x: x.array) + expected = pd.Series( + [ + df.B.iloc[[0, 1, 6]].array, + df.B.iloc[[2, 3]].array, + df.B.iloc[[4, 5]].array, + ], + index=pd.Index([1, 2, 3], name="A"), + name="B", + ) + self.assert_series_equal(result, expected) + + def test_in_numeric_groupby(self, data_for_grouping): + df = pd.DataFrame( + { + "A": [1, 1, 2, 2, 3, 3, 1], + "B": data_for_grouping, + "C": [1, 1, 1, 1, 1, 1, 1], + } + ) + result = df.groupby("A").sum().columns + + if data_for_grouping.dtype._is_numeric: + expected = pd.Index(["B", "C"]) + else: + expected = pd.Index(["C"]) + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("min_count", [0, 10]) + def test_groupby_sum_mincount(self, data_for_grouping, min_count): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + result = df.groupby("A").sum(min_count=min_count) + if min_count == 0: + expected = pd.DataFrame( + {"B": pd.array([3, 0, 0], dtype="Int64")}, + index=pd.Index([1, 2, 3], name="A"), + ) + tm.assert_frame_equal(result, expected) + else: + expected = pd.DataFrame( + {"B": pd.array([pd.NA] * 3, dtype="Int64")}, + index=pd.Index([1, 2, 3], name="A"), + ) + tm.assert_frame_equal(result, expected) + + +class TestNumericReduce(base.BaseNumericReduceTests): + def check_reduce(self, s, op_name, skipna): + result = getattr(s, op_name)(skipna=skipna) + expected = getattr(s.astype("float64"), op_name)(skipna=skipna) + # override parent function to cast to bool for min/max + if np.isnan(expected): + expected = pd.NA + elif op_name in ("min", "max"): + expected = bool(expected) + tm.assert_almost_equal(result, expected) + + +class TestBooleanReduce(base.BaseBooleanReduceTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +class TestUnaryOps(base.BaseUnaryOpsTests): + pass + + +class TestParsing(base.BaseParsingTests): + pass + + +class Test2DCompat(base.Dim2CompatTests): + pass diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/test_categorical.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_categorical.py new file mode 100644 index 0000000..d21110e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_categorical.py @@ -0,0 +1,320 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import string + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + Timestamp, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype +from pandas.tests.extension import base + + +def make_data(): + while True: + values = np.random.choice(list(string.ascii_letters), size=100) + # ensure we meet the requirements + # 1. first two not null + # 2. first and second are different + if values[0] != values[1]: + break + return values + + +@pytest.fixture +def dtype(): + return CategoricalDtype() + + +@pytest.fixture +def data(): + """Length-100 array for this type. + + * data[0] and data[1] should both be non missing + * data[0] and data[1] should not gbe equal + """ + return Categorical(make_data()) + + +@pytest.fixture +def data_missing(): + """Length 2 array with [NA, Valid]""" + return Categorical([np.nan, "A"]) + + +@pytest.fixture +def data_for_sorting(): + return Categorical(["A", "B", "C"], categories=["C", "A", "B"], ordered=True) + + +@pytest.fixture +def data_missing_for_sorting(): + return Categorical(["A", None, "B"], categories=["B", "A"], ordered=True) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def data_for_grouping(): + return Categorical(["a", "a", None, None, "b", "b", "a", "c"]) + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + @pytest.mark.skip(reason="Memory usage doesn't match") + def test_memory_usage(self, data): + # Is this deliberate? + super().test_memory_usage(data) + + def test_contains(self, data, data_missing): + # GH-37867 + # na value handling in Categorical.__contains__ is deprecated. + # See base.BaseInterFaceTests.test_contains for more details. + + na_value = data.dtype.na_value + # ensure data without missing values + data = data[~data.isna()] + + # first elements are non-missing + assert data[0] in data + assert data_missing[0] in data_missing + + # check the presence of na_value + assert na_value in data_missing + assert na_value not in data + + # Categoricals can contain other nan-likes than na_value + for na_value_obj in tm.NULL_OBJECTS: + if na_value_obj is na_value: + continue + assert na_value_obj not in data + assert na_value_obj in data_missing # this line differs from super method + + +class TestConstructors(base.BaseConstructorsTests): + def test_empty(self, dtype): + cls = dtype.construct_array_type() + result = cls._empty((4,), dtype=dtype) + + assert isinstance(result, cls) + # the dtype we passed is not initialized, so will not match the + # dtype on our result. + assert result.dtype == CategoricalDtype([]) + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + @pytest.mark.skip(reason="Backwards compatibility") + def test_getitem_scalar(self, data): + # CategoricalDtype.type isn't "correct" since it should + # be a parent of the elements (object). But don't want + # to break things by changing. + super().test_getitem_scalar(data) + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMissing(base.BaseMissingTests): + @pytest.mark.skip(reason="Not implemented") + def test_fillna_limit_pad(self, data_missing): + super().test_fillna_limit_pad(data_missing) + + @pytest.mark.skip(reason="Not implemented") + def test_fillna_limit_backfill(self, data_missing): + super().test_fillna_limit_backfill(data_missing) + + +class TestReduce(base.BaseNoReduceTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.skip(reason="Unobserved categories included") + def test_value_counts(self, all_data, dropna): + return super().test_value_counts(all_data, dropna) + + def test_combine_add(self, data_repeated): + # GH 20825 + # When adding categoricals in combine, result is a string + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 + x2) + expected = pd.Series( + [a + b for (a, b) in zip(list(orig_data1), list(orig_data2))] + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 + x2) + expected = pd.Series([a + val for a in list(orig_data1)]) + self.assert_series_equal(result, expected) + + @pytest.mark.skip(reason="Not Applicable") + def test_fillna_length_mismatch(self, data_missing): + super().test_fillna_length_mismatch(data_missing) + + +class TestCasting(base.BaseCastingTests): + @pytest.mark.parametrize("cls", [Categorical, CategoricalIndex]) + @pytest.mark.parametrize("values", [[1, np.nan], [Timestamp("2000"), pd.NaT]]) + def test_cast_nan_to_int(self, cls, values): + # GH 28406 + s = cls(values) + + msg = "Cannot (cast|convert)" + with pytest.raises((ValueError, TypeError), match=msg): + s.astype(int) + + @pytest.mark.parametrize( + "expected", + [ + pd.Series(["2019", "2020"], dtype="datetime64[ns, UTC]"), + pd.Series([0, 0], dtype="timedelta64[ns]"), + pd.Series([pd.Period("2019"), pd.Period("2020")], dtype="period[A-DEC]"), + pd.Series([pd.Interval(0, 1), pd.Interval(1, 2)], dtype="interval"), + pd.Series([1, np.nan], dtype="Int64"), + ], + ) + def test_cast_category_to_extension_dtype(self, expected): + # GH 28668 + result = expected.astype("category").astype(expected.dtype) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, expected", + [ + ( + "datetime64[ns]", + np.array(["2015-01-01T00:00:00.000000000"], dtype="datetime64[ns]"), + ), + ( + "datetime64[ns, MET]", + pd.DatetimeIndex( + [Timestamp("2015-01-01 00:00:00+0100", tz="MET")] + ).array, + ), + ], + ) + def test_consistent_casting(self, dtype, expected): + # GH 28448 + result = Categorical(["2015-01-01"]).astype(dtype) + assert result == expected + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): + # frame & scalar + op_name = all_arithmetic_operators + if op_name == "__rmod__": + request.node.add_marker( + pytest.mark.xfail( + reason="rmod never called when string is first argument" + ) + ) + super().test_arith_frame_with_scalar(data, op_name) + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): + op_name = all_arithmetic_operators + if op_name == "__rmod__": + request.node.add_marker( + pytest.mark.xfail( + reason="rmod never called when string is first argument" + ) + ) + super().test_arith_series_with_scalar(data, op_name) + + def test_add_series_with_extension_array(self, data): + ser = pd.Series(data) + with pytest.raises(TypeError, match="cannot perform|unsupported operand"): + ser + data + + def test_divmod_series_array(self): + # GH 23287 + # skipping because it is not implemented + pass + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + return super()._check_divmod_op(s, op, other, exc=TypeError) + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def _compare_other(self, s, data, op, other): + op_name = f"__{op.__name__}__" + if op_name == "__eq__": + result = op(s, other) + expected = s.combine(other, lambda x, y: x == y) + assert (result == expected).all() + + elif op_name == "__ne__": + result = op(s, other) + expected = s.combine(other, lambda x, y: x != y) + assert (result == expected).all() + + else: + msg = "Unordered Categoricals can only compare equality or not" + with pytest.raises(TypeError, match=msg): + op(data, other) + + @pytest.mark.parametrize( + "categories", + [["a", "b"], [0, 1], [Timestamp("2019"), Timestamp("2020")]], + ) + def test_not_equal_with_na(self, categories): + # https://github.com/pandas-dev/pandas/issues/32276 + c1 = Categorical.from_codes([-1, 0], categories=categories) + c2 = Categorical.from_codes([0, 1], categories=categories) + + result = c1 != c2 + + assert result.all() + + +class TestParsing(base.BaseParsingTests): + pass + + +class Test2DCompat(base.NDArrayBacked2DTests): + def test_repr_2d(self, data): + # Categorical __repr__ doesn't include "Categorical", so we need + # to special-case + res = repr(data.reshape(1, -1)) + assert res.count("\nCategories") == 1 + + res = repr(data.reshape(-1, 1)) + assert res.count("\nCategories") == 1 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/test_common.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_common.py new file mode 100644 index 0000000..e43650c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_common.py @@ -0,0 +1,81 @@ +import numpy as np +import pytest + +from pandas.core.dtypes import dtypes +from pandas.core.dtypes.common import is_extension_array_dtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import ExtensionArray + + +class DummyDtype(dtypes.ExtensionDtype): + pass + + +class DummyArray(ExtensionArray): + def __init__(self, data): + self.data = data + + def __array__(self, dtype): + return self.data + + @property + def dtype(self): + return DummyDtype() + + def astype(self, dtype, copy=True): + # we don't support anything but a single dtype + if isinstance(dtype, DummyDtype): + if copy: + return type(self)(self.data) + return self + + return np.array(self, dtype=dtype, copy=copy) + + +class TestExtensionArrayDtype: + @pytest.mark.parametrize( + "values", + [ + pd.Categorical([]), + pd.Categorical([]).dtype, + pd.Series(pd.Categorical([])), + DummyDtype(), + DummyArray(np.array([1, 2])), + ], + ) + def test_is_extension_array_dtype(self, values): + assert is_extension_array_dtype(values) + + @pytest.mark.parametrize("values", [np.array([]), pd.Series(np.array([]))]) + def test_is_not_extension_array_dtype(self, values): + assert not is_extension_array_dtype(values) + + +def test_astype(): + + arr = DummyArray(np.array([1, 2, 3])) + expected = np.array([1, 2, 3], dtype=object) + + result = arr.astype(object) + tm.assert_numpy_array_equal(result, expected) + + result = arr.astype("object") + tm.assert_numpy_array_equal(result, expected) + + +def test_astype_no_copy(): + arr = DummyArray(np.array([1, 2, 3], dtype=np.int64)) + result = arr.astype(arr.dtype, copy=False) + + assert arr is result + + result = arr.astype(arr.dtype) + assert arr is not result + + +@pytest.mark.parametrize("dtype", [dtypes.CategoricalDtype(), dtypes.IntervalDtype()]) +def test_is_extension_array_dtype(dtype): + assert isinstance(dtype, dtypes.ExtensionDtype) + assert is_extension_array_dtype(dtype) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/test_datetime.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_datetime.py new file mode 100644 index 0000000..a64b42f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_datetime.py @@ -0,0 +1,196 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas.core.arrays import DatetimeArray +from pandas.tests.extension import base + + +@pytest.fixture(params=["US/Central"]) +def dtype(request): + return DatetimeTZDtype(unit="ns", tz=request.param) + + +@pytest.fixture +def data(dtype): + data = DatetimeArray(pd.date_range("2000", periods=100, tz=dtype.tz), dtype=dtype) + return data + + +@pytest.fixture +def data_missing(dtype): + return DatetimeArray( + np.array(["NaT", "2000-01-01"], dtype="datetime64[ns]"), dtype=dtype + ) + + +@pytest.fixture +def data_for_sorting(dtype): + a = pd.Timestamp("2000-01-01") + b = pd.Timestamp("2000-01-02") + c = pd.Timestamp("2000-01-03") + return DatetimeArray(np.array([b, c, a], dtype="datetime64[ns]"), dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + a = pd.Timestamp("2000-01-01") + b = pd.Timestamp("2000-01-02") + return DatetimeArray(np.array([b, "NaT", a], dtype="datetime64[ns]"), dtype=dtype) + + +@pytest.fixture +def data_for_grouping(dtype): + """ + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + """ + a = pd.Timestamp("2000-01-01") + b = pd.Timestamp("2000-01-02") + c = pd.Timestamp("2000-01-03") + na = "NaT" + return DatetimeArray( + np.array([b, b, na, na, a, a, b, c], dtype="datetime64[ns]"), dtype=dtype + ) + + +@pytest.fixture +def na_cmp(): + def cmp(a, b): + return a is pd.NaT and a is b + + return cmp + + +@pytest.fixture +def na_value(): + return pd.NaT + + +# ---------------------------------------------------------------------------- +class BaseDatetimeTests: + pass + + +# ---------------------------------------------------------------------------- +# Tests +class TestDatetimeDtype(BaseDatetimeTests, base.BaseDtypeTests): + pass + + +class TestConstructors(BaseDatetimeTests, base.BaseConstructorsTests): + def test_series_constructor(self, data): + # Series construction drops any .freq attr + data = data._with_freq(None) + super().test_series_constructor(data) + + +class TestGetitem(BaseDatetimeTests, base.BaseGetitemTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMethods(BaseDatetimeTests, base.BaseMethodsTests): + def test_combine_add(self, data_repeated): + # Timestamp.__add__(Timestamp) not defined + pass + + +class TestInterface(BaseDatetimeTests, base.BaseInterfaceTests): + pass + + +class TestArithmeticOps(BaseDatetimeTests, base.BaseArithmeticOpsTests): + implements = {"__sub__", "__rsub__"} + + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + # frame & scalar + if all_arithmetic_operators in self.implements: + df = pd.DataFrame({"A": data}) + self.check_opname(df, all_arithmetic_operators, data[0], exc=None) + else: + # ... but not the rest. + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + if all_arithmetic_operators in self.implements: + ser = pd.Series(data) + self.check_opname(ser, all_arithmetic_operators, ser.iloc[0], exc=None) + else: + # ... but not the rest. + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_add_series_with_extension_array(self, data): + # Datetime + Datetime not implemented + ser = pd.Series(data) + msg = "cannot add DatetimeArray and DatetimeArray" + with pytest.raises(TypeError, match=msg): + ser + data + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + if all_arithmetic_operators in self.implements: + ser = pd.Series(data) + self.check_opname(ser, all_arithmetic_operators, ser.iloc[0], exc=None) + else: + # ... but not the rest. + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_divmod_series_array(self): + # GH 23287 + # skipping because it is not implemented + pass + + +class TestCasting(BaseDatetimeTests, base.BaseCastingTests): + pass + + +class TestComparisonOps(BaseDatetimeTests, base.BaseComparisonOpsTests): + pass + + +class TestMissing(BaseDatetimeTests, base.BaseMissingTests): + pass + + +class TestReshaping(BaseDatetimeTests, base.BaseReshapingTests): + @pytest.mark.skip(reason="We have DatetimeTZBlock") + def test_concat(self, data, in_frame): + pass + + +class TestSetitem(BaseDatetimeTests, base.BaseSetitemTests): + pass + + +class TestGroupby(BaseDatetimeTests, base.BaseGroupbyTests): + pass + + +class TestPrinting(BaseDatetimeTests, base.BasePrintingTests): + pass + + +class Test2DCompat(BaseDatetimeTests, base.NDArrayBacked2DTests): + pass diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/test_extension.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_extension.py new file mode 100644 index 0000000..939b836 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_extension.py @@ -0,0 +1,26 @@ +""" +Tests for behavior if an author does *not* implement EA methods. +""" +import numpy as np +import pytest + +from pandas.core.arrays import ExtensionArray + + +class MyEA(ExtensionArray): + def __init__(self, values): + self._values = values + + +@pytest.fixture +def data(): + arr = np.arange(10) + return MyEA(arr) + + +class TestExtensionArray: + def test_errors(self, data, all_arithmetic_operators): + # invalid ops + op_name = all_arithmetic_operators + with pytest.raises(AttributeError): + getattr(data, op_name) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/test_external_block.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_external_block.py new file mode 100644 index 0000000..13dec96 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_external_block.py @@ -0,0 +1,40 @@ +import numpy as np +import pytest + +from pandas._libs.internals import BlockPlacement +import pandas.util._test_decorators as td + +import pandas as pd +from pandas.core.internals import BlockManager +from pandas.core.internals.blocks import ExtensionBlock + +pytestmark = td.skip_array_manager_invalid_test + + +class CustomBlock(ExtensionBlock): + + _holder = np.ndarray + + # Cannot override final attribute "_can_hold_na" + @property # type: ignore[misc] + def _can_hold_na(self) -> bool: + return False + + +@pytest.fixture +def df(): + df1 = pd.DataFrame({"a": [1, 2, 3]}) + blocks = df1._mgr.blocks + values = np.arange(3, dtype="int64") + bp = BlockPlacement(slice(1, 2)) + custom_block = CustomBlock(values, placement=bp, ndim=2) + blocks = blocks + (custom_block,) + block_manager = BlockManager(blocks, [pd.Index(["a", "b"]), df1.index]) + return pd.DataFrame(block_manager) + + +def test_concat_axis1(df): + # GH17954 + df2 = pd.DataFrame({"c": [0.1, 0.2, 0.3]}) + res = pd.concat([df, df2], axis=1) + assert isinstance(res._mgr.blocks[1], CustomBlock) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/test_floating.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_floating.py new file mode 100644 index 0000000..4f8c46c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_floating.py @@ -0,0 +1,232 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_extension_array_dtype + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import is_float_dtype +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) +from pandas.tests.extension import base + + +def make_data(): + return ( + list(np.arange(0.1, 0.9, 0.1)) + + [pd.NA] + + list(np.arange(1, 9.8, 0.1)) + + [pd.NA] + + [9.9, 10.0] + ) + + +@pytest.fixture(params=[Float32Dtype, Float64Dtype]) +def dtype(request): + return request.param() + + +@pytest.fixture +def data(dtype): + return pd.array(make_data(), dtype=dtype) + + +@pytest.fixture +def data_for_twos(dtype): + return pd.array(np.ones(100) * 2, dtype=dtype) + + +@pytest.fixture +def data_missing(dtype): + return pd.array([pd.NA, 0.1], dtype=dtype) + + +@pytest.fixture +def data_for_sorting(dtype): + return pd.array([0.1, 0.2, 0.0], dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + return pd.array([0.1, pd.NA, 0.0], dtype=dtype) + + +@pytest.fixture +def na_cmp(): + # we are pd.NA + return lambda x, y: x is pd.NA and y is pd.NA + + +@pytest.fixture +def na_value(): + return pd.NA + + +@pytest.fixture +def data_for_grouping(dtype): + b = 0.1 + a = 0.0 + c = 0.2 + na = pd.NA + return pd.array([b, b, na, na, a, a, b, c], dtype=dtype) + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + def check_opname(self, s, op_name, other, exc=None): + # overwriting to indicate ops don't raise an error + super().check_opname(s, op_name, other, exc=None) + + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + sdtype = tm.get_dtype(s) + if ( + hasattr(other, "dtype") + and not is_extension_array_dtype(other.dtype) + and is_float_dtype(other.dtype) + ): + # other is np.float64 and would therefore always result in + # upcasting, so keeping other as same numpy_dtype + other = other.astype(sdtype.numpy_dtype) + + result = op(s, other) + expected = self._combine(s, other, op) + + # combine method result in 'biggest' (float64) dtype + expected = expected.astype(sdtype) + + self.assert_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) + + def _check_divmod_op(self, s, op, other, exc=None): + super()._check_divmod_op(s, op, other, None) + + +class TestComparisonOps(base.BaseComparisonOpsTests): + # TODO: share with IntegerArray? + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + result = op(s, other) + # Override to do the astype to boolean + expected = s.combine(other, op).astype("boolean") + self.assert_series_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) + + def check_opname(self, s, op_name, other, exc=None): + super().check_opname(s, op_name, other, exc=None) + + def _compare_other(self, s, data, op, other): + op_name = f"__{op.__name__}__" + self.check_opname(s, op_name, other) + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.parametrize("dropna", [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + expected = expected.astype("Int64") + expected.index = expected.index.astype(all_data.dtype) + + self.assert_series_equal(result, expected) + + @pytest.mark.xfail(reason="uses nullable integer") + def test_value_counts_with_normalize(self, data): + super().test_value_counts_with_normalize(data) + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + pass + + +class TestNumericReduce(base.BaseNumericReduceTests): + def check_reduce(self, s, op_name, skipna): + # overwrite to ensure pd.NA is tested instead of np.nan + # https://github.com/pandas-dev/pandas/issues/30958 + result = getattr(s, op_name)(skipna=skipna) + if not skipna and s.isna().any(): + expected = pd.NA + else: + expected = getattr(s.dropna().astype(s.dtype.numpy_dtype), op_name)( + skipna=skipna + ) + tm.assert_almost_equal(result, expected) + + +@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py") +class TestBooleanReduce(base.BaseBooleanReduceTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +class TestParsing(base.BaseParsingTests): + pass + + +class Test2DCompat(base.Dim2CompatTests): + pass diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/test_integer.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_integer.py new file mode 100644 index 0000000..8c8202e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_integer.py @@ -0,0 +1,253 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import ( + is_extension_array_dtype, + is_integer_dtype, +) +from pandas.core.arrays.integer import ( + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, +) +from pandas.tests.extension import base + + +def make_data(): + return list(range(1, 9)) + [pd.NA] + list(range(10, 98)) + [pd.NA] + [99, 100] + + +@pytest.fixture( + params=[ + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, + ] +) +def dtype(request): + return request.param() + + +@pytest.fixture +def data(dtype): + return pd.array(make_data(), dtype=dtype) + + +@pytest.fixture +def data_for_twos(dtype): + return pd.array(np.ones(100) * 2, dtype=dtype) + + +@pytest.fixture +def data_missing(dtype): + return pd.array([pd.NA, 1], dtype=dtype) + + +@pytest.fixture +def data_for_sorting(dtype): + return pd.array([1, 2, 0], dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + return pd.array([1, pd.NA, 0], dtype=dtype) + + +@pytest.fixture +def na_cmp(): + # we are pd.NA + return lambda x, y: x is pd.NA and y is pd.NA + + +@pytest.fixture +def na_value(): + return pd.NA + + +@pytest.fixture +def data_for_grouping(dtype): + b = 1 + a = 0 + c = 2 + na = pd.NA + return pd.array([b, b, na, na, a, a, b, c], dtype=dtype) + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + def check_opname(self, s, op_name, other, exc=None): + # overwriting to indicate ops don't raise an error + super().check_opname(s, op_name, other, exc=None) + + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + sdtype = tm.get_dtype(s) + + if ( + hasattr(other, "dtype") + and not is_extension_array_dtype(other.dtype) + and is_integer_dtype(other.dtype) + and sdtype.is_unsigned_integer + ): + # TODO: comment below is inaccurate; other can be int8, int16, ... + # and the trouble is that e.g. if s is UInt8 and other is int8, + # then result is UInt16 + # other is np.int64 and would therefore always result in + # upcasting, so keeping other as same numpy_dtype + other = other.astype(sdtype.numpy_dtype) + + result = op(s, other) + expected = self._combine(s, other, op) + + if op_name in ("__rtruediv__", "__truediv__", "__div__"): + expected = expected.fillna(np.nan).astype("Float64") + else: + # combine method result in 'biggest' (int64) dtype + expected = expected.astype(sdtype) + + self.assert_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) + + def _check_divmod_op(self, s, op, other, exc=None): + super()._check_divmod_op(s, op, other, None) + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + result = op(s, other) + # Override to do the astype to boolean + expected = s.combine(other, op).astype("boolean") + self.assert_series_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) + + def check_opname(self, s, op_name, other, exc=None): + super().check_opname(s, op_name, other, exc=None) + + def _compare_other(self, s, data, op, other): + op_name = f"__{op.__name__}__" + self.check_opname(s, op_name, other) + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + # for test_concat_mixed_dtypes test + # concat of an Integer and Int coerces to object dtype + # TODO(jreback) once integrated this would + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.parametrize("dropna", [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + expected = expected.astype("Int64") + expected.index = expected.index.astype(all_data.dtype) + + self.assert_series_equal(result, expected) + + @pytest.mark.xfail(reason="uses nullable integer") + def test_value_counts_with_normalize(self, data): + super().test_value_counts_with_normalize(data) + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + pass + + +class TestNumericReduce(base.BaseNumericReduceTests): + def check_reduce(self, s, op_name, skipna): + # overwrite to ensure pd.NA is tested instead of np.nan + # https://github.com/pandas-dev/pandas/issues/30958 + result = getattr(s, op_name)(skipna=skipna) + if not skipna and s.isna().any(): + expected = pd.NA + else: + expected = getattr(s.dropna().astype("int64"), op_name)(skipna=skipna) + tm.assert_almost_equal(result, expected) + + +@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py") +class TestBooleanReduce(base.BaseBooleanReduceTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +class TestParsing(base.BaseParsingTests): + pass + + +class Test2DCompat(base.Dim2CompatTests): + pass diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/test_interval.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_interval.py new file mode 100644 index 0000000..e2f4d69 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_interval.py @@ -0,0 +1,186 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import IntervalDtype + +from pandas import ( + Interval, + Series, +) +from pandas.core.arrays import IntervalArray +from pandas.tests.extension import base + + +def make_data(): + N = 100 + left_array = np.random.uniform(size=N).cumsum() + right_array = left_array + np.random.uniform(size=N) + return [Interval(left, right) for left, right in zip(left_array, right_array)] + + +@pytest.fixture +def dtype(): + return IntervalDtype() + + +@pytest.fixture +def data(): + """Length-100 PeriodArray for semantics test.""" + return IntervalArray(make_data()) + + +@pytest.fixture +def data_missing(): + """Length 2 array with [NA, Valid]""" + return IntervalArray.from_tuples([None, (0, 1)]) + + +@pytest.fixture +def data_for_sorting(): + return IntervalArray.from_tuples([(1, 2), (2, 3), (0, 1)]) + + +@pytest.fixture +def data_missing_for_sorting(): + return IntervalArray.from_tuples([(1, 2), None, (0, 1)]) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def data_for_grouping(): + a = (0, 1) + b = (1, 2) + c = (2, 3) + return IntervalArray.from_tuples([b, b, None, None, a, a, b, c]) + + +class BaseInterval: + pass + + +class TestDtype(BaseInterval, base.BaseDtypeTests): + pass + + +class TestCasting(BaseInterval, base.BaseCastingTests): + pass + + +class TestConstructors(BaseInterval, base.BaseConstructorsTests): + pass + + +class TestGetitem(BaseInterval, base.BaseGetitemTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestGrouping(BaseInterval, base.BaseGroupbyTests): + pass + + +class TestInterface(BaseInterval, base.BaseInterfaceTests): + pass + + +class TestReduce(base.BaseNoReduceTests): + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + ser = Series(data) + + if op_name in ["min", "max"]: + # IntervalArray *does* implement these + assert getattr(ser, op_name)(skipna=skipna) in data + assert getattr(data, op_name)(skipna=skipna) in data + return + + super().test_reduce_series_numeric(data, all_numeric_reductions, skipna) + + +class TestMethods(BaseInterval, base.BaseMethodsTests): + @pytest.mark.skip(reason="addition is not defined for intervals") + def test_combine_add(self, data_repeated): + pass + + @pytest.mark.xfail( + reason="Raises with incorrect message bc it disallows *all* listlikes " + "instead of just wrong-length listlikes" + ) + def test_fillna_length_mismatch(self, data_missing): + super().test_fillna_length_mismatch(data_missing) + + +class TestMissing(BaseInterval, base.BaseMissingTests): + # Index.fillna only accepts scalar `value`, so we have to skip all + # non-scalar fill tests. + unsupported_fill = pytest.mark.skip("Unsupported fillna option.") + + @unsupported_fill + def test_fillna_limit_pad(self): + pass + + @unsupported_fill + def test_fillna_series_method(self): + pass + + @unsupported_fill + def test_fillna_limit_backfill(self): + pass + + @unsupported_fill + def test_fillna_no_op_returns_copy(self): + pass + + @unsupported_fill + def test_fillna_series(self): + pass + + def test_fillna_non_scalar_raises(self, data_missing): + msg = "can only insert Interval objects and NA into an IntervalArray" + with pytest.raises(TypeError, match=msg): + data_missing.fillna([1, 1]) + + +class TestReshaping(BaseInterval, base.BaseReshapingTests): + pass + + +class TestSetitem(BaseInterval, base.BaseSetitemTests): + pass + + +class TestPrinting(BaseInterval, base.BasePrintingTests): + @pytest.mark.skip(reason="custom repr") + def test_array_repr(self, data, size): + pass + + +class TestParsing(BaseInterval, base.BaseParsingTests): + @pytest.mark.parametrize("engine", ["c", "python"]) + def test_EA_types(self, engine, data): + expected_msg = r".*must implement _from_sequence_of_strings.*" + with pytest.raises(NotImplementedError, match=expected_msg): + super().test_EA_types(engine, data) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/test_numpy.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_numpy.py new file mode 100644 index 0000000..2e1112c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_numpy.py @@ -0,0 +1,463 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +Note: we do not bother with base.BaseIndexTests because PandasArray +will never be held in an Index. +""" +import numpy as np +import pytest + +from pandas.core.dtypes.cast import can_hold_element +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + PandasDtype, +) + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.numpy_ import PandasArray +from pandas.core.internals import blocks +from pandas.tests.extension import base + + +def _can_hold_element_patched(obj, element) -> bool: + if isinstance(element, PandasArray): + element = element.to_numpy() + return can_hold_element(obj, element) + + +orig_assert_attr_equal = tm.assert_attr_equal + + +def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"): + """ + patch tm.assert_attr_equal so PandasDtype("object") is closed enough to + np.dtype("object") + """ + if attr == "dtype": + lattr = getattr(left, "dtype", None) + rattr = getattr(right, "dtype", None) + if isinstance(lattr, PandasDtype) and not isinstance(rattr, PandasDtype): + left = left.astype(lattr.numpy_dtype) + elif isinstance(rattr, PandasDtype) and not isinstance(lattr, PandasDtype): + right = right.astype(rattr.numpy_dtype) + + orig_assert_attr_equal(attr, left, right, obj) + + +@pytest.fixture(params=["float", "object"]) +def dtype(request): + return PandasDtype(np.dtype(request.param)) + + +@pytest.fixture +def allow_in_pandas(monkeypatch): + """ + A monkeypatch to tells pandas to let us in. + + By default, passing a PandasArray to an index / series / frame + constructor will unbox that PandasArray to an ndarray, and treat + it as a non-EA column. We don't want people using EAs without + reason. + + The mechanism for this is a check against ABCPandasArray + in each constructor. + + But, for testing, we need to allow them in pandas. So we patch + the _typ of PandasArray, so that we evade the ABCPandasArray + check. + """ + with monkeypatch.context() as m: + m.setattr(PandasArray, "_typ", "extension") + m.setattr(blocks, "can_hold_element", _can_hold_element_patched) + m.setattr(tm.asserters, "assert_attr_equal", _assert_attr_equal) + yield + + +@pytest.fixture +def data(allow_in_pandas, dtype): + if dtype.numpy_dtype == "object": + return pd.Series([(i,) for i in range(100)]).array + return PandasArray(np.arange(1, 101, dtype=dtype._dtype)) + + +@pytest.fixture +def data_missing(allow_in_pandas, dtype): + if dtype.numpy_dtype == "object": + return PandasArray(np.array([np.nan, (1,)], dtype=object)) + return PandasArray(np.array([np.nan, 1.0])) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def na_cmp(): + def cmp(a, b): + return np.isnan(a) and np.isnan(b) + + return cmp + + +@pytest.fixture +def data_for_sorting(allow_in_pandas, dtype): + """Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + """ + if dtype.numpy_dtype == "object": + # Use an empty tuple for first element, then remove, + # to disable np.array's shape inference. + return PandasArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:]) + return PandasArray(np.array([1, 2, 0])) + + +@pytest.fixture +def data_missing_for_sorting(allow_in_pandas, dtype): + """Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + """ + if dtype.numpy_dtype == "object": + return PandasArray(np.array([(1,), np.nan, (0,)], dtype=object)) + return PandasArray(np.array([1, np.nan, 0])) + + +@pytest.fixture +def data_for_grouping(allow_in_pandas, dtype): + """Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + """ + if dtype.numpy_dtype == "object": + a, b, c = (1,), (2,), (3,) + else: + a, b, c = np.arange(3) + return PandasArray( + np.array([b, b, np.nan, np.nan, a, a, b, c], dtype=dtype.numpy_dtype) + ) + + +@pytest.fixture +def skip_numpy_object(dtype, request): + """ + Tests for PandasArray with nested data. Users typically won't create + these objects via `pd.array`, but they can show up through `.array` + on a Series with nested data. Many of the base tests fail, as they aren't + appropriate for nested data. + + This fixture allows these tests to be skipped when used as a usefixtures + marker to either an individual test or a test class. + """ + if dtype == "object": + mark = pytest.mark.xfail(reason="Fails for object dtype") + request.node.add_marker(mark) + + +skip_nested = pytest.mark.usefixtures("skip_numpy_object") + + +class BaseNumPyTests: + @classmethod + def assert_series_equal(cls, left, right, *args, **kwargs): + # base class tests hard-code expected values with numpy dtypes, + # whereas we generally want the corresponding PandasDtype + if ( + isinstance(right, pd.Series) + and not isinstance(right.dtype, ExtensionDtype) + and isinstance(left.dtype, PandasDtype) + ): + right = right.astype(PandasDtype(right.dtype)) + return tm.assert_series_equal(left, right, *args, **kwargs) + + +class TestCasting(BaseNumPyTests, base.BaseCastingTests): + @skip_nested + def test_astype_str(self, data): + # ValueError: setting an array element with a sequence + super().test_astype_str(data) + + +class TestConstructors(BaseNumPyTests, base.BaseConstructorsTests): + @pytest.mark.skip(reason="We don't register our dtype") + # We don't want to register. This test should probably be split in two. + def test_from_dtype(self, data): + pass + + @skip_nested + def test_series_constructor_scalar_with_index(self, data, dtype): + # ValueError: Length of passed values is 1, index implies 3. + super().test_series_constructor_scalar_with_index(data, dtype) + + +class TestDtype(BaseNumPyTests, base.BaseDtypeTests): + @pytest.mark.skip(reason="Incorrect expected.") + # we unsurprisingly clash with a NumPy name. + def test_check_dtype(self, data): + pass + + +class TestGetitem(BaseNumPyTests, base.BaseGetitemTests): + @skip_nested + def test_getitem_scalar(self, data): + # AssertionError + super().test_getitem_scalar(data) + + +class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests): + def test_groupby_extension_apply( + self, data_for_grouping, groupby_apply_op, request + ): + dummy = groupby_apply_op([None]) + if ( + isinstance(dummy, pd.Series) + and data_for_grouping.dtype.numpy_dtype == object + ): + mark = pytest.mark.xfail(reason="raises in MultiIndex construction") + request.node.add_marker(mark) + super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) + + +class TestInterface(BaseNumPyTests, base.BaseInterfaceTests): + @skip_nested + def test_array_interface(self, data): + # NumPy array shape inference + super().test_array_interface(data) + + +class TestMethods(BaseNumPyTests, base.BaseMethodsTests): + @skip_nested + def test_shift_fill_value(self, data): + # np.array shape inference. Shift implementation fails. + super().test_shift_fill_value(data) + + @skip_nested + def test_fillna_copy_frame(self, data_missing): + # The "scalar" for this array isn't a scalar. + super().test_fillna_copy_frame(data_missing) + + @skip_nested + def test_fillna_copy_series(self, data_missing): + # The "scalar" for this array isn't a scalar. + super().test_fillna_copy_series(data_missing) + + @skip_nested + def test_searchsorted(self, data_for_sorting, as_series): + # Test setup fails. + super().test_searchsorted(data_for_sorting, as_series) + + @pytest.mark.xfail(reason="PandasArray.diff may fail on dtype") + def test_diff(self, data, periods): + return super().test_diff(data, periods) + + def test_insert(self, data, request): + if data.dtype.numpy_dtype == object: + mark = pytest.mark.xfail(reason="Dimension mismatch in np.concatenate") + request.node.add_marker(mark) + + super().test_insert(data) + + @skip_nested + def test_insert_invalid(self, data, invalid_scalar): + # PandasArray[object] can hold anything, so skip + super().test_insert_invalid(data, invalid_scalar) + + +class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests): + divmod_exc = None + series_scalar_exc = None + frame_scalar_exc = None + series_array_exc = None + + @skip_nested + def test_divmod(self, data): + super().test_divmod(data) + + @skip_nested + def test_divmod_series_array(self, data): + ser = pd.Series(data) + self._check_divmod_op(ser, divmod, data, exc=None) + + @skip_nested + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_array(self, data, all_arithmetic_operators, request): + opname = all_arithmetic_operators + if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]: + mark = pytest.mark.xfail(reason="Fails for object dtype") + request.node.add_marker(mark) + super().test_arith_series_with_array(data, all_arithmetic_operators) + + @skip_nested + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + + +class TestPrinting(BaseNumPyTests, base.BasePrintingTests): + pass + + +class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests): + def check_reduce(self, s, op_name, skipna): + result = getattr(s, op_name)(skipna=skipna) + # avoid coercing int -> float. Just cast to the actual numpy type. + expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna) + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series(self, data, all_boolean_reductions, skipna): + super().test_reduce_series(data, all_boolean_reductions, skipna) + + +@skip_nested +class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests): + pass + + +class TestMissing(BaseNumPyTests, base.BaseMissingTests): + @skip_nested + def test_fillna_series(self, data_missing): + # Non-scalar "scalar" values. + super().test_fillna_series(data_missing) + + @skip_nested + def test_fillna_frame(self, data_missing): + # Non-scalar "scalar" values. + super().test_fillna_frame(data_missing) + + +class TestReshaping(BaseNumPyTests, base.BaseReshapingTests): + @pytest.mark.skip(reason="Incorrect expected.") + def test_merge(self, data, na_value): + # Fails creating expected (key column becomes a PandasDtype because) + super().test_merge(data, na_value) + + @pytest.mark.parametrize( + "in_frame", + [ + True, + pytest.param( + False, + marks=pytest.mark.xfail(reason="PandasArray inconsistently extracted"), + ), + ], + ) + def test_concat(self, data, in_frame): + super().test_concat(data, in_frame) + + +class TestSetitem(BaseNumPyTests, base.BaseSetitemTests): + @skip_nested + def test_setitem_invalid(self, data, invalid_scalar): + # object dtype can hold anything, so doesn't raise + super().test_setitem_invalid(data, invalid_scalar) + + @skip_nested + def test_setitem_sequence_broadcasts(self, data, box_in_series): + # ValueError: cannot set using a list-like indexer with a different + # length than the value + super().test_setitem_sequence_broadcasts(data, box_in_series) + + @skip_nested + @pytest.mark.parametrize("setter", ["loc", None]) + def test_setitem_mask_broadcast(self, data, setter): + # ValueError: cannot set using a list-like indexer with a different + # length than the value + super().test_setitem_mask_broadcast(data, setter) + + @skip_nested + def test_setitem_scalar_key_sequence_raise(self, data): + # Failed: DID NOT RAISE + super().test_setitem_scalar_key_sequence_raise(data) + + # TODO: there is some issue with PandasArray, therefore, + # skip the setitem test for now, and fix it later (GH 31446) + + @skip_nested + @pytest.mark.parametrize( + "mask", + [ + np.array([True, True, True, False, False]), + pd.array([True, True, True, False, False], dtype="boolean"), + ], + ids=["numpy-array", "boolean-array"], + ) + def test_setitem_mask(self, data, mask, box_in_series): + super().test_setitem_mask(data, mask, box_in_series) + + def test_setitem_mask_raises(self, data, box_in_series): + super().test_setitem_mask_raises(data, box_in_series) + + @skip_nested + @pytest.mark.parametrize( + "idx", + [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_setitem_integer_array(self, data, idx, box_in_series): + super().test_setitem_integer_array(data, idx, box_in_series) + + @pytest.mark.parametrize( + "idx, box_in_series", + [ + ([0, 1, 2, pd.NA], False), + pytest.param([0, 1, 2, pd.NA], True, marks=pytest.mark.xfail), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + ], + ids=["list-False", "list-True", "integer-array-False", "integer-array-True"], + ) + def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series): + super().test_setitem_integer_with_missing_raises(data, idx, box_in_series) + + @skip_nested + def test_setitem_slice(self, data, box_in_series): + super().test_setitem_slice(data, box_in_series) + + @skip_nested + def test_setitem_loc_iloc_slice(self, data): + super().test_setitem_loc_iloc_slice(data) + + def test_setitem_with_expansion_dataframe_column(self, data, full_indexer): + # https://github.com/pandas-dev/pandas/issues/32395 + df = expected = pd.DataFrame({"data": pd.Series(data)}) + result = pd.DataFrame(index=df.index) + + # because result has object dtype, the attempt to do setting inplace + # is successful, and object dtype is retained + key = full_indexer(df) + result.loc[key, "data"] = df["data"] + + # base class method has expected = df; PandasArray behaves oddly because + # we patch _typ for these tests. + if data.dtype.numpy_dtype != object: + if not isinstance(key, slice) or key != slice(None): + expected = pd.DataFrame({"data": data.to_numpy()}) + self.assert_frame_equal(result, expected) + + +@skip_nested +class TestParsing(BaseNumPyTests, base.BaseParsingTests): + pass + + +class Test2DCompat(BaseNumPyTests, base.NDArrayBacked2DTests): + pass diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/test_period.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_period.py new file mode 100644 index 0000000..bbb464c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_period.py @@ -0,0 +1,191 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +from pandas._libs import iNaT + +from pandas.core.dtypes.dtypes import PeriodDtype + +import pandas as pd +from pandas.core.arrays import PeriodArray +from pandas.tests.extension import base + + +@pytest.fixture +def dtype(): + return PeriodDtype(freq="D") + + +@pytest.fixture +def data(dtype): + return PeriodArray(np.arange(1970, 2070), freq=dtype.freq) + + +@pytest.fixture +def data_for_twos(dtype): + return PeriodArray(np.ones(100) * 2, freq=dtype.freq) + + +@pytest.fixture +def data_for_sorting(dtype): + return PeriodArray([2018, 2019, 2017], freq=dtype.freq) + + +@pytest.fixture +def data_missing(dtype): + return PeriodArray([iNaT, 2017], freq=dtype.freq) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + return PeriodArray([2018, iNaT, 2017], freq=dtype.freq) + + +@pytest.fixture +def data_for_grouping(dtype): + B = 2018 + NA = iNaT + A = 2017 + C = 2019 + return PeriodArray([B, B, NA, NA, A, A, B, C], freq=dtype.freq) + + +@pytest.fixture +def na_value(): + return pd.NaT + + +class BasePeriodTests: + pass + + +class TestPeriodDtype(BasePeriodTests, base.BaseDtypeTests): + pass + + +class TestConstructors(BasePeriodTests, base.BaseConstructorsTests): + pass + + +class TestGetitem(BasePeriodTests, base.BaseGetitemTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMethods(BasePeriodTests, base.BaseMethodsTests): + def test_combine_add(self, data_repeated): + # Period + Period is not defined. + pass + + +class TestInterface(BasePeriodTests, base.BaseInterfaceTests): + + pass + + +class TestArithmeticOps(BasePeriodTests, base.BaseArithmeticOpsTests): + implements = {"__sub__", "__rsub__"} + + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + # frame & scalar + if all_arithmetic_operators in self.implements: + df = pd.DataFrame({"A": data}) + self.check_opname(df, all_arithmetic_operators, data[0], exc=None) + else: + # ... but not the rest. + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + # we implement substitution... + if all_arithmetic_operators in self.implements: + s = pd.Series(data) + self.check_opname(s, all_arithmetic_operators, s.iloc[0], exc=None) + else: + # ... but not the rest. + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + if all_arithmetic_operators in self.implements: + s = pd.Series(data) + self.check_opname(s, all_arithmetic_operators, s.iloc[0], exc=None) + else: + # ... but not the rest. + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + super()._check_divmod_op(s, op, other, exc=TypeError) + + def test_add_series_with_extension_array(self, data): + # we don't implement + for Period + s = pd.Series(data) + msg = ( + r"unsupported operand type\(s\) for \+: " + r"\'PeriodArray\' and \'PeriodArray\'" + ) + with pytest.raises(TypeError, match=msg): + s + data + + @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) + def test_direct_arith_with_ndframe_returns_not_implemented(self, data, box): + # Override to use __sub__ instead of __add__ + other = pd.Series(data) + if box is pd.DataFrame: + other = other.to_frame() + + result = data.__sub__(other) + assert result is NotImplemented + + +class TestCasting(BasePeriodTests, base.BaseCastingTests): + pass + + +class TestComparisonOps(BasePeriodTests, base.BaseComparisonOpsTests): + pass + + +class TestMissing(BasePeriodTests, base.BaseMissingTests): + pass + + +class TestReshaping(BasePeriodTests, base.BaseReshapingTests): + pass + + +class TestSetitem(BasePeriodTests, base.BaseSetitemTests): + pass + + +class TestGroupby(BasePeriodTests, base.BaseGroupbyTests): + pass + + +class TestPrinting(BasePeriodTests, base.BasePrintingTests): + pass + + +class TestParsing(BasePeriodTests, base.BaseParsingTests): + @pytest.mark.parametrize("engine", ["c", "python"]) + def test_EA_types(self, engine, data): + super().test_EA_types(engine, data) + + +class Test2DCompat(BasePeriodTests, base.NDArrayBacked2DTests): + pass diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/test_sparse.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_sparse.py new file mode 100644 index 0000000..5890b69 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_sparse.py @@ -0,0 +1,513 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" + +import numpy as np +import pytest + +from pandas.compat import np_version_under1p20 +from pandas.errors import PerformanceWarning + +from pandas.core.dtypes.common import is_object_dtype + +import pandas as pd +from pandas import SparseDtype +import pandas._testing as tm +from pandas.arrays import SparseArray +from pandas.tests.extension import base + + +def make_data(fill_value): + if np.isnan(fill_value): + data = np.random.uniform(size=100) + else: + data = np.random.randint(1, 100, size=100) + if data[0] == data[1]: + data[0] += 1 + + data[2::3] = fill_value + return data + + +@pytest.fixture +def dtype(): + return SparseDtype() + + +@pytest.fixture(params=[0, np.nan]) +def data(request): + """Length-100 PeriodArray for semantics test.""" + res = SparseArray(make_data(request.param), fill_value=request.param) + return res + + +@pytest.fixture +def data_for_twos(request): + return SparseArray(np.ones(100) * 2) + + +@pytest.fixture(params=[0, np.nan]) +def data_missing(request): + """Length 2 array with [NA, Valid]""" + return SparseArray([np.nan, 1], fill_value=request.param) + + +@pytest.fixture(params=[0, np.nan]) +def data_repeated(request): + """Return different versions of data for count times""" + + def gen(count): + for _ in range(count): + yield SparseArray(make_data(request.param), fill_value=request.param) + + yield gen + + +@pytest.fixture(params=[0, np.nan]) +def data_for_sorting(request): + return SparseArray([2, 3, 1], fill_value=request.param) + + +@pytest.fixture(params=[0, np.nan]) +def data_missing_for_sorting(request): + return SparseArray([2, np.nan, 1], fill_value=request.param) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def na_cmp(): + return lambda left, right: pd.isna(left) and pd.isna(right) + + +@pytest.fixture(params=[0, np.nan]) +def data_for_grouping(request): + return SparseArray([1, 1, np.nan, np.nan, 2, 2, 1, 3], fill_value=request.param) + + +@pytest.fixture(params=[0, np.nan]) +def data_for_compare(request): + return SparseArray([0, 0, np.nan, -2, -1, 4, 2, 3, 0, 0], fill_value=request.param) + + +class BaseSparseTests: + def _check_unsupported(self, data): + if data.dtype == SparseDtype(int, 0): + pytest.skip("Can't store nan in int array.") + + @pytest.mark.xfail(reason="SparseArray does not support setitem") + def test_ravel(self, data): + super().test_ravel(data) + + +class TestDtype(BaseSparseTests, base.BaseDtypeTests): + def test_array_type_with_arg(self, data, dtype): + assert dtype.construct_array_type() is SparseArray + + +class TestInterface(BaseSparseTests, base.BaseInterfaceTests): + def test_copy(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.copy() + + def test_view(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.view() + + +class TestConstructors(BaseSparseTests, base.BaseConstructorsTests): + pass + + +class TestReshaping(BaseSparseTests, base.BaseReshapingTests): + def test_concat_mixed_dtypes(self, data): + # https://github.com/pandas-dev/pandas/issues/20762 + # This should be the same, aside from concat([sparse, float]) + df1 = pd.DataFrame({"A": data[:3]}) + df2 = pd.DataFrame({"A": [1, 2, 3]}) + df3 = pd.DataFrame({"A": ["a", "b", "c"]}).astype("category") + dfs = [df1, df2, df3] + + # dataframes + result = pd.concat(dfs) + expected = pd.concat( + [x.apply(lambda s: np.asarray(s).astype(object)) for x in dfs] + ) + self.assert_frame_equal(result, expected) + + def test_concat_columns(self, data, na_value): + self._check_unsupported(data) + super().test_concat_columns(data, na_value) + + def test_concat_extension_arrays_copy_false(self, data, na_value): + self._check_unsupported(data) + super().test_concat_extension_arrays_copy_false(data, na_value) + + def test_align(self, data, na_value): + self._check_unsupported(data) + super().test_align(data, na_value) + + def test_align_frame(self, data, na_value): + self._check_unsupported(data) + super().test_align_frame(data, na_value) + + def test_align_series_frame(self, data, na_value): + self._check_unsupported(data) + super().test_align_series_frame(data, na_value) + + def test_merge(self, data, na_value): + self._check_unsupported(data) + super().test_merge(data, na_value) + + @pytest.mark.xfail(reason="SparseArray does not support setitem") + def test_transpose(self, data): + super().test_transpose(data) + + +class TestGetitem(BaseSparseTests, base.BaseGetitemTests): + def test_get(self, data): + ser = pd.Series(data, index=[2 * i for i in range(len(data))]) + if np.isnan(ser.values.fill_value): + assert np.isnan(ser.get(4)) and np.isnan(ser.iloc[2]) + else: + assert ser.get(4) == ser.iloc[2] + assert ser.get(2) == ser.iloc[1] + + def test_reindex(self, data, na_value): + self._check_unsupported(data) + super().test_reindex(data, na_value) + + +# Skipping TestSetitem, since we don't implement it. + + +class TestIndex(base.BaseIndexTests): + def test_index_from_array(self, data): + msg = "will store that array directly" + with tm.assert_produces_warning(FutureWarning, match=msg): + idx = pd.Index(data) + + if data.dtype.subtype == "f": + assert idx.dtype == np.float64 + elif data.dtype.subtype == "i": + assert idx.dtype == np.int64 + else: + assert idx.dtype == data.dtype.subtype + + # TODO(2.0): should pass once SparseArray is stored directly in Index. + @pytest.mark.xfail(reason="Index cannot yet store sparse dtype") + def test_index_from_listlike_with_dtype(self, data): + msg = "passing a SparseArray to pd.Index" + with tm.assert_produces_warning(FutureWarning, match=msg): + super().test_index_from_listlike_with_dtype(data) + + +class TestMissing(BaseSparseTests, base.BaseMissingTests): + def test_isna(self, data_missing): + sarr = SparseArray(data_missing) + expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) + expected = SparseArray([True, False], dtype=expected_dtype) + result = sarr.isna() + tm.assert_sp_array_equal(result, expected) + + # test isna for arr without na + sarr = sarr.fillna(0) + expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) + expected = SparseArray([False, False], fill_value=False, dtype=expected_dtype) + self.assert_equal(sarr.isna(), expected) + + def test_fillna_limit_pad(self, data_missing): + with tm.assert_produces_warning(PerformanceWarning): + super().test_fillna_limit_pad(data_missing) + + def test_fillna_limit_backfill(self, data_missing): + with tm.assert_produces_warning(PerformanceWarning): + super().test_fillna_limit_backfill(data_missing) + + def test_fillna_no_op_returns_copy(self, data, request): + if np.isnan(data.fill_value): + request.node.add_marker( + pytest.mark.xfail(reason="returns array with different fill value") + ) + with tm.assert_produces_warning(PerformanceWarning): + super().test_fillna_no_op_returns_copy(data) + + def test_fillna_series_method(self, data_missing): + with tm.assert_produces_warning(PerformanceWarning): + super().test_fillna_limit_backfill(data_missing) + + @pytest.mark.skip(reason="Unsupported") + def test_fillna_series(self): + # this one looks doable. + pass + + def test_fillna_frame(self, data_missing): + # Have to override to specify that fill_value will change. + fill_value = data_missing[1] + + result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value) + + if pd.isna(data_missing.fill_value): + dtype = SparseDtype(data_missing.dtype, fill_value) + else: + dtype = data_missing.dtype + + expected = pd.DataFrame( + { + "A": data_missing._from_sequence([fill_value, fill_value], dtype=dtype), + "B": [1, 2], + } + ) + + self.assert_frame_equal(result, expected) + + +class TestMethods(BaseSparseTests, base.BaseMethodsTests): + def test_combine_le(self, data_repeated): + # We return a Series[SparseArray].__le__ returns a + # Series[Sparse[bool]] + # rather than Series[bool] + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 <= x2) + expected = pd.Series( + SparseArray( + [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], + fill_value=False, + ) + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 <= x2) + expected = pd.Series( + SparseArray([a <= val for a in list(orig_data1)], fill_value=False) + ) + self.assert_series_equal(result, expected) + + def test_fillna_copy_frame(self, data_missing): + arr = data_missing.take([1, 1]) + df = pd.DataFrame({"A": arr}, copy=False) + + filled_val = df.iloc[0, 0] + result = df.fillna(filled_val) + + if hasattr(df._mgr, "blocks"): + assert df.values.base is not result.values.base + assert df.A._values.to_dense() is arr.to_dense() + + def test_fillna_copy_series(self, data_missing): + arr = data_missing.take([1, 1]) + ser = pd.Series(arr) + + filled_val = ser[0] + result = ser.fillna(filled_val) + + assert ser._values is not result._values + assert ser._values.to_dense() is arr.to_dense() + + @pytest.mark.skip(reason="Not Applicable") + def test_fillna_length_mismatch(self, data_missing): + pass + + def test_where_series(self, data, na_value): + assert data[0] != data[1] + cls = type(data) + a, b = data[:2] + + ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype)) + + cond = np.array([True, True, False, False]) + result = ser.where(cond) + + new_dtype = SparseDtype("float", 0.0) + expected = pd.Series( + cls._from_sequence([a, a, na_value, na_value], dtype=new_dtype) + ) + self.assert_series_equal(result, expected) + + other = cls._from_sequence([a, b, a, b], dtype=data.dtype) + cond = np.array([True, False, True, True]) + result = ser.where(cond, other) + expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype)) + self.assert_series_equal(result, expected) + + def test_combine_first(self, data, request): + if data.dtype.subtype == "int": + # Right now this is upcasted to float, just like combine_first + # for Series[int] + mark = pytest.mark.xfail( + reason="TODO(SparseArray.__setitem__) will preserve dtype." + ) + request.node.add_marker(mark) + super().test_combine_first(data) + + def test_searchsorted(self, data_for_sorting, as_series): + with tm.assert_produces_warning(PerformanceWarning): + super().test_searchsorted(data_for_sorting, as_series) + + def test_shift_0_periods(self, data): + # GH#33856 shifting with periods=0 should return a copy, not same obj + result = data.shift(0) + + data._sparse_values[0] = data._sparse_values[1] + assert result._sparse_values[0] != result._sparse_values[1] + + @pytest.mark.parametrize("method", ["argmax", "argmin"]) + def test_argmin_argmax_all_na(self, method, data, na_value): + # overriding because Sparse[int64, 0] cannot handle na_value + self._check_unsupported(data) + super().test_argmin_argmax_all_na(method, data, na_value) + + @pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame]) + def test_equals(self, data, na_value, as_series, box): + self._check_unsupported(data) + super().test_equals(data, na_value, as_series, box) + + +class TestCasting(BaseSparseTests, base.BaseCastingTests): + def test_astype_object_series(self, all_data): + # Unlike the base class, we do not expect the resulting Block + # to be ObjectBlock / resulting array to be np.dtype("object") + ser = pd.Series(all_data, name="A") + result = ser.astype(object) + assert is_object_dtype(result.dtype) + assert is_object_dtype(result._mgr.array.dtype) + + def test_astype_object_frame(self, all_data): + # Unlike the base class, we do not expect the resulting Block + # to be ObjectBlock / resulting array to be np.dtype("object") + df = pd.DataFrame({"A": all_data}) + + result = df.astype(object) + assert is_object_dtype(result._mgr.arrays[0].dtype) + + # earlier numpy raises TypeError on e.g. np.dtype(np.int64) == "Int64" + # instead of returning False + if not np_version_under1p20: + # check that we can compare the dtypes + comp = result.dtypes == df.dtypes + assert not comp.any() + + def test_astype_str(self, data): + result = pd.Series(data[:5]).astype(str) + expected_dtype = SparseDtype(str, str(data.fill_value)) + expected = pd.Series([str(x) for x in data[:5]], dtype=expected_dtype) + self.assert_series_equal(result, expected) + + @pytest.mark.xfail(raises=TypeError, reason="no sparse StringDtype") + def test_astype_string(self, data): + super().test_astype_string(data) + + +class TestArithmeticOps(BaseSparseTests, base.BaseArithmeticOpsTests): + series_scalar_exc = None + frame_scalar_exc = None + divmod_exc = None + series_array_exc = None + + def _skip_if_different_combine(self, data): + if data.fill_value == 0: + # arith ops call on dtype.fill_value so that the sparsity + # is maintained. Combine can't be called on a dtype in + # general, so we can't make the expected. This is tested elsewhere + raise pytest.skip("Incorrected expected from Series.combine") + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + self._skip_if_different_combine(data) + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + self._skip_if_different_combine(data) + super().test_arith_series_with_array(data, all_arithmetic_operators) + + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): + if data.dtype.fill_value != 0: + pass + elif all_arithmetic_operators.strip("_") not in [ + "mul", + "rmul", + "floordiv", + "rfloordiv", + "pow", + "mod", + "rmod", + ]: + mark = pytest.mark.xfail(reason="result dtype.fill_value mismatch") + request.node.add_marker(mark) + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + + def _check_divmod_op(self, ser, op, other, exc=NotImplementedError): + # We implement divmod + super()._check_divmod_op(ser, op, other, exc=None) + + +class TestComparisonOps(BaseSparseTests): + def _compare_other(self, data_for_compare: SparseArray, comparison_op, other): + op = comparison_op + + result = op(data_for_compare, other) + assert isinstance(result, SparseArray) + assert result.dtype.subtype == np.bool_ + + if isinstance(other, SparseArray): + fill_value = op(data_for_compare.fill_value, other.fill_value) + else: + fill_value = np.all( + op(np.asarray(data_for_compare.fill_value), np.asarray(other)) + ) + + expected = SparseArray( + op(data_for_compare.to_dense(), np.asarray(other)), + fill_value=fill_value, + dtype=np.bool_, + ) + tm.assert_sp_array_equal(result, expected) + + def test_scalar(self, data_for_compare: SparseArray, comparison_op): + self._compare_other(data_for_compare, comparison_op, 0) + self._compare_other(data_for_compare, comparison_op, 1) + self._compare_other(data_for_compare, comparison_op, -1) + self._compare_other(data_for_compare, comparison_op, np.nan) + + @pytest.mark.xfail(reason="Wrong indices") + def test_array(self, data_for_compare: SparseArray, comparison_op): + arr = np.linspace(-4, 5, 10) + self._compare_other(data_for_compare, comparison_op, arr) + + @pytest.mark.xfail(reason="Wrong indices") + def test_sparse_array(self, data_for_compare: SparseArray, comparison_op): + arr = data_for_compare + 1 + self._compare_other(data_for_compare, comparison_op, arr) + arr = data_for_compare * 2 + self._compare_other(data_for_compare, comparison_op, arr) + + +class TestPrinting(BaseSparseTests, base.BasePrintingTests): + @pytest.mark.xfail(reason="Different repr") + def test_array_repr(self, data, size): + super().test_array_repr(data, size) + + +class TestParsing(BaseSparseTests, base.BaseParsingTests): + @pytest.mark.parametrize("engine", ["c", "python"]) + def test_EA_types(self, engine, data): + expected_msg = r".*must implement _from_sequence_of_strings.*" + with pytest.raises(NotImplementedError, match=expected_msg): + super().test_EA_types(engine, data) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/extension/test_string.py b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_string.py new file mode 100644 index 0000000..4256142 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/extension/test_string.py @@ -0,0 +1,210 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import string + +import numpy as np +import pytest + +from pandas.compat import pa_version_under2p0 + +import pandas as pd +from pandas.core.arrays import ArrowStringArray +from pandas.core.arrays.string_ import StringDtype +from pandas.tests.extension import base + + +def split_array(arr): + if arr.dtype.storage != "pyarrow": + pytest.skip("chunked array n/a") + + def _split_array(arr): + import pyarrow as pa + + arrow_array = arr._data + split = len(arrow_array) // 2 + arrow_array = pa.chunked_array( + [*arrow_array[:split].chunks, *arrow_array[split:].chunks] + ) + assert arrow_array.num_chunks == 2 + return type(arr)(arrow_array) + + return _split_array(arr) + + +@pytest.fixture(params=[True, False]) +def chunked(request): + return request.param + + +@pytest.fixture +def dtype(string_storage): + return StringDtype(storage=string_storage) + + +@pytest.fixture +def data(dtype, chunked): + strings = np.random.choice(list(string.ascii_letters), size=100) + while strings[0] == strings[1]: + strings = np.random.choice(list(string.ascii_letters), size=100) + + arr = dtype.construct_array_type()._from_sequence(strings) + return split_array(arr) if chunked else arr + + +@pytest.fixture +def data_missing(dtype, chunked): + """Length 2 array with [NA, Valid]""" + arr = dtype.construct_array_type()._from_sequence([pd.NA, "A"]) + return split_array(arr) if chunked else arr + + +@pytest.fixture +def data_for_sorting(dtype, chunked): + arr = dtype.construct_array_type()._from_sequence(["B", "C", "A"]) + return split_array(arr) if chunked else arr + + +@pytest.fixture +def data_missing_for_sorting(dtype, chunked): + arr = dtype.construct_array_type()._from_sequence(["B", pd.NA, "A"]) + return split_array(arr) if chunked else arr + + +@pytest.fixture +def na_value(): + return pd.NA + + +@pytest.fixture +def data_for_grouping(dtype, chunked): + arr = dtype.construct_array_type()._from_sequence( + ["B", "B", pd.NA, pd.NA, "A", "A", "B", "C"] + ) + return split_array(arr) if chunked else arr + + +class TestDtype(base.BaseDtypeTests): + def test_eq_with_str(self, dtype): + assert dtype == f"string[{dtype.storage}]" + super().test_eq_with_str(dtype) + + +class TestInterface(base.BaseInterfaceTests): + def test_view(self, data, request): + if data.dtype.storage == "pyarrow": + mark = pytest.mark.xfail(reason="not implemented") + request.node.add_marker(mark) + super().test_view(data) + + +class TestConstructors(base.BaseConstructorsTests): + def test_from_dtype(self, data): + # base test uses string representation of dtype + pass + + +class TestReshaping(base.BaseReshapingTests): + def test_transpose(self, data, request): + if data.dtype.storage == "pyarrow": + mark = pytest.mark.xfail(reason="not implemented") + request.node.add_marker(mark) + super().test_transpose(data) + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + def test_setitem_preserves_views(self, data, request): + if data.dtype.storage == "pyarrow": + mark = pytest.mark.xfail(reason="not implemented") + request.node.add_marker(mark) + super().test_setitem_preserves_views(data) + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestNoReduce(base.BaseNoReduceTests): + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + + if op_name in ["min", "max"]: + return None + + ser = pd.Series(data) + with pytest.raises(TypeError): + getattr(ser, op_name)(skipna=skipna) + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.skip(reason="returns nullable") + def test_value_counts(self, all_data, dropna): + return super().test_value_counts(all_data, dropna) + + @pytest.mark.skip(reason="returns nullable") + def test_value_counts_with_normalize(self, data): + pass + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def _compare_other(self, ser, data, op, other): + op_name = f"__{op.__name__}__" + result = getattr(ser, op_name)(other) + expected = getattr(ser.astype(object), op_name)(other).astype("boolean") + self.assert_series_equal(result, expected) + + def test_compare_scalar(self, data, comparison_op): + ser = pd.Series(data) + self._compare_other(ser, data, comparison_op, "abc") + + +class TestParsing(base.BaseParsingTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +class TestGroupBy(base.BaseGroupbyTests): + def test_groupby_extension_transform(self, data_for_grouping, request): + if data_for_grouping.dtype.storage == "pyarrow" and pa_version_under2p0: + # failure observed in 1.0.1, not in 2.0 or later + mark = pytest.mark.xfail(reason="pyarrow raises in self._data[item]") + request.node.add_marker(mark) + super().test_groupby_extension_transform(data_for_grouping) + + +class Test2DCompat(base.Dim2CompatTests): + @pytest.fixture(autouse=True) + def arrow_not_supported(self, data, request): + if isinstance(data, ArrowStringArray): + mark = pytest.mark.xfail( + reason="2D support not implemented for ArrowStringArray" + ) + request.node.add_marker(mark) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..72c052c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..003780a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..67138f6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_alter_axes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_alter_axes.cpython-38.pyc new file mode 100644 index 0000000..063ac2e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_alter_axes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_api.cpython-38.pyc new file mode 100644 index 0000000..a1783c9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_arithmetic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_arithmetic.cpython-38.pyc new file mode 100644 index 0000000..1042885 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_arithmetic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_block_internals.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_block_internals.cpython-38.pyc new file mode 100644 index 0000000..1752411 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_block_internals.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..2b4d617 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_cumulative.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_cumulative.cpython-38.pyc new file mode 100644 index 0000000..17bdb16 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_cumulative.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_iteration.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_iteration.cpython-38.pyc new file mode 100644 index 0000000..73b0c44 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_iteration.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_logical_ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_logical_ops.cpython-38.pyc new file mode 100644 index 0000000..9af3139 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_logical_ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_nonunique_indexes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_nonunique_indexes.cpython-38.pyc new file mode 100644 index 0000000..0d69c3d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_nonunique_indexes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_npfuncs.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_npfuncs.cpython-38.pyc new file mode 100644 index 0000000..6fc5ca2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_npfuncs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_query_eval.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_query_eval.cpython-38.pyc new file mode 100644 index 0000000..6b604a2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_query_eval.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_reductions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_reductions.cpython-38.pyc new file mode 100644 index 0000000..6b175fb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_reductions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_repr_info.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_repr_info.cpython-38.pyc new file mode 100644 index 0000000..432c817 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_repr_info.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_stack_unstack.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_stack_unstack.cpython-38.pyc new file mode 100644 index 0000000..8d4a466 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_stack_unstack.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_subclass.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_subclass.cpython-38.pyc new file mode 100644 index 0000000..59b3992 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_subclass.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_ufunc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_ufunc.cpython-38.pyc new file mode 100644 index 0000000..460a76c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_ufunc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_unary.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_unary.cpython-38.pyc new file mode 100644 index 0000000..9f4f949 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_unary.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_validate.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_validate.cpython-38.pyc new file mode 100644 index 0000000..bd07709 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/__pycache__/test_validate.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/common.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/common.py new file mode 100644 index 0000000..a1603ea --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/common.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from pandas import ( + DataFrame, + concat, +) + + +def _check_mixed_float(df, dtype=None): + # float16 are most likely to be upcasted to float32 + dtypes = {"A": "float32", "B": "float32", "C": "float16", "D": "float64"} + if isinstance(dtype, str): + dtypes = {k: dtype for k, v in dtypes.items()} + elif isinstance(dtype, dict): + dtypes.update(dtype) + if dtypes.get("A"): + assert df.dtypes["A"] == dtypes["A"] + if dtypes.get("B"): + assert df.dtypes["B"] == dtypes["B"] + if dtypes.get("C"): + assert df.dtypes["C"] == dtypes["C"] + if dtypes.get("D"): + assert df.dtypes["D"] == dtypes["D"] + + +def _check_mixed_int(df, dtype=None): + dtypes = {"A": "int32", "B": "uint64", "C": "uint8", "D": "int64"} + if isinstance(dtype, str): + dtypes = {k: dtype for k, v in dtypes.items()} + elif isinstance(dtype, dict): + dtypes.update(dtype) + if dtypes.get("A"): + assert df.dtypes["A"] == dtypes["A"] + if dtypes.get("B"): + assert df.dtypes["B"] == dtypes["B"] + if dtypes.get("C"): + assert df.dtypes["C"] == dtypes["C"] + if dtypes.get("D"): + assert df.dtypes["D"] == dtypes["D"] + + +def zip_frames(frames: list[DataFrame], axis: int = 1) -> DataFrame: + """ + take a list of frames, zip them together under the + assumption that these all have the first frames' index/columns. + + Returns + ------- + new_frame : DataFrame + """ + if axis == 1: + columns = frames[0].columns + zipped = [f.loc[:, c] for c in columns for f in frames] + return concat(zipped, axis=1) + else: + index = frames[0].index + zipped = [f.loc[i, :] for i in index for f in frames] + return DataFrame(zipped) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/conftest.py new file mode 100644 index 0000000..b512664 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/conftest.py @@ -0,0 +1,284 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + NaT, + date_range, +) +import pandas._testing as tm + + +@pytest.fixture +def float_frame_with_na(): + """ + Fixture for DataFrame of floats with index of unique strings + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + + A B C D + ABwBzA0ljw -1.128865 -0.897161 0.046603 0.274997 + DJiRzmbyQF 0.728869 0.233502 0.722431 -0.890872 + neMgPD5UBF 0.486072 -1.027393 -0.031553 1.449522 + 0yWA4n8VeX -1.937191 -1.142531 0.805215 -0.462018 + 3slYUbbqU1 0.153260 1.164691 1.489795 -0.545826 + soujjZ0A08 NaN NaN NaN NaN + 7W6NLGsjB9 NaN NaN NaN NaN + ... ... ... ... ... + uhfeaNkCR1 -0.231210 -0.340472 0.244717 -0.901590 + n6p7GYuBIV -0.419052 1.922721 -0.125361 -0.727717 + ZhzAeY6p1y 1.234374 -1.425359 -0.827038 -0.633189 + uWdPsORyUh 0.046738 -0.980445 -1.102965 0.605503 + 3DJA6aN590 -0.091018 -1.684734 -1.100900 0.215947 + 2GBPAzdbMk -2.883405 -1.021071 1.209877 1.633083 + sHadBoyVHw -2.223032 -0.326384 0.258931 0.245517 + + [30 rows x 4 columns] + """ + df = DataFrame(tm.getSeriesData()) + # set some NAs + df.iloc[5:10] = np.nan + df.iloc[15:20, -2:] = np.nan + return df + + +@pytest.fixture +def bool_frame_with_na(): + """ + Fixture for DataFrame of booleans with index of unique strings + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + + A B C D + zBZxY2IDGd False False False False + IhBWBMWllt False True True True + ctjdvZSR6R True False True True + AVTujptmxb False True False True + G9lrImrSWq False False False True + sFFwdIUfz2 NaN NaN NaN NaN + s15ptEJnRb NaN NaN NaN NaN + ... ... ... ... ... + UW41KkDyZ4 True True False False + l9l6XkOdqV True False False False + X2MeZfzDYA False True False False + xWkIKU7vfX False True False True + QOhL6VmpGU False False False True + 22PwkRJdat False True False False + kfboQ3VeIK True False True False + + [30 rows x 4 columns] + """ + df = DataFrame(tm.getSeriesData()) > 0 + df = df.astype(object) + # set some NAs + df.iloc[5:10] = np.nan + df.iloc[15:20, -2:] = np.nan + + # For `any` tests we need to have at least one True before the first NaN + # in each column + for i in range(4): + df.iloc[i, i] = True + return df + + +@pytest.fixture +def float_string_frame(): + """ + Fixture for DataFrame of floats and strings with index of unique strings + + Columns are ['A', 'B', 'C', 'D', 'foo']. + + A B C D foo + w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar + PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar + ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar + 3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar + khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar + LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar + HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar + ... ... ... ... ... ... + 9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar + h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar + mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar + oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar + 9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar + jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar + lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar + + [30 rows x 5 columns] + """ + df = DataFrame(tm.getSeriesData()) + df["foo"] = "bar" + return df + + +@pytest.fixture +def mixed_float_frame(): + """ + Fixture for DataFrame of different float types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + + A B C D + GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993 + KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588 + VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731 + kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607 + CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266 + 0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541 + tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710 + ... ... ... ... ... + 7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237 + 4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612 + B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653 + hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427 + 1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827 + 9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204 + xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502 + + [30 rows x 4 columns] + """ + df = DataFrame(tm.getSeriesData()) + df.A = df.A.astype("float32") + df.B = df.B.astype("float32") + df.C = df.C.astype("float16") + df.D = df.D.astype("float64") + return df + + +@pytest.fixture +def mixed_int_frame(): + """ + Fixture for DataFrame of different int types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + + A B C D + mUrCZ67juP 0 1 2 2 + rw99ACYaKS 0 1 0 0 + 7QsEcpaaVU 0 1 1 1 + xkrimI2pcE 0 1 0 0 + dz01SuzoS8 0 1 255 255 + ccQkqOHX75 -1 1 0 0 + DN0iXaoDLd 0 1 0 0 + ... .. .. ... ... + Dfb141wAaQ 1 1 254 254 + IPD8eQOVu5 0 1 0 0 + CcaKulsCmv 0 1 0 0 + rIBa8gu7E5 0 1 0 0 + RP6peZmh5o 0 1 1 1 + NMb9pipQWQ 0 1 0 0 + PqgbJEzjib 0 1 3 3 + + [30 rows x 4 columns] + """ + df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) + df.A = df.A.astype("int32") + df.B = np.ones(len(df.B), dtype="uint64") + df.C = df.C.astype("uint8") + df.D = df.C.astype("int64") + return df + + +@pytest.fixture +def timezone_frame(): + """ + Fixture for DataFrame of date_range Series with different time zones + + Columns are ['A', 'B', 'C']; some entries are missing + + A B C + 0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00 + 1 2013-01-02 NaT NaT + 2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00 + """ + df = DataFrame( + { + "A": date_range("20130101", periods=3), + "B": date_range("20130101", periods=3, tz="US/Eastern"), + "C": date_range("20130101", periods=3, tz="CET"), + } + ) + df.iloc[1, 1] = NaT + df.iloc[1, 2] = NaT + return df + + +@pytest.fixture +def uint64_frame(): + """ + Fixture for DataFrame with uint64 values + + Columns are ['A', 'B'] + """ + return DataFrame( + {"A": np.arange(3), "B": [2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10]}, dtype=np.uint64 + ) + + +@pytest.fixture +def simple_frame(): + """ + Fixture for simple 3x3 DataFrame + + Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c']. + + one two three + a 1.0 2.0 3.0 + b 4.0 5.0 6.0 + c 7.0 8.0 9.0 + """ + arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) + + return DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"]) + + +@pytest.fixture +def frame_of_index_cols(): + """ + Fixture for DataFrame of columns that can be used for indexing + + Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')]; + 'A' & 'B' contain duplicates (but are jointly unique), the rest are unique. + + A B C D E (tuple, as, label) + 0 foo one a 0.608477 -0.012500 -1.664297 + 1 foo two b -0.633460 0.249614 -0.364411 + 2 foo three c 0.615256 2.154968 -0.834666 + 3 bar one d 0.234246 1.085675 0.718445 + 4 bar two e 0.533841 -0.005702 -3.533912 + """ + df = DataFrame( + { + "A": ["foo", "foo", "foo", "bar", "bar"], + "B": ["one", "two", "three", "one", "two"], + "C": ["a", "b", "c", "d", "e"], + "D": np.random.randn(5), + "E": np.random.randn(5), + ("tuple", "as", "label"): np.random.randn(5), + } + ) + return df + + +@pytest.fixture( + params=[ + "any", + "all", + "count", + "sum", + "prod", + "max", + "min", + "mean", + "median", + "skew", + "kurt", + "sem", + "var", + "std", + "mad", + ] +) +def reduction_functions(request): + return request.param diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..c821b98 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/__pycache__/test_from_dict.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/__pycache__/test_from_dict.cpython-38.pyc new file mode 100644 index 0000000..3d35def Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/__pycache__/test_from_dict.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/__pycache__/test_from_records.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/__pycache__/test_from_records.cpython-38.pyc new file mode 100644 index 0000000..63b7919 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/__pycache__/test_from_records.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/test_from_dict.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/test_from_dict.py new file mode 100644 index 0000000..72107d8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/test_from_dict.py @@ -0,0 +1,191 @@ +from collections import OrderedDict + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm +from pandas.core.construction import create_series_with_explicit_dtype + + +class TestFromDict: + # Note: these tests are specific to the from_dict method, not for + # passing dictionaries to DataFrame.__init__ + + def test_from_dict_scalars_requires_index(self): + msg = "If using all scalar values, you must pass an index" + with pytest.raises(ValueError, match=msg): + DataFrame.from_dict(OrderedDict([("b", 8), ("a", 5), ("a", 6)])) + + def test_constructor_list_of_odicts(self): + data = [ + OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]), + OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]), + OrderedDict([["a", 1.5], ["d", 6]]), + OrderedDict(), + OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]), + OrderedDict([["b", 3], ["c", 4], ["d", 6]]), + ] + + result = DataFrame(data) + expected = DataFrame.from_dict( + dict(zip(range(len(data)), data)), orient="index" + ) + tm.assert_frame_equal(result, expected.reindex(result.index)) + + def test_constructor_single_row(self): + data = [OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]])] + + result = DataFrame(data) + expected = DataFrame.from_dict(dict(zip([0], data)), orient="index").reindex( + result.index + ) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_of_series(self): + data = [ + OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]), + OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]), + ] + sdict = OrderedDict(zip(["x", "y"], data)) + idx = Index(["a", "b", "c"]) + + # all named + data2 = [ + Series([1.5, 3, 4], idx, dtype="O", name="x"), + Series([1.5, 3, 6], idx, name="y"), + ] + result = DataFrame(data2) + expected = DataFrame.from_dict(sdict, orient="index") + tm.assert_frame_equal(result, expected) + + # some unnamed + data2 = [ + Series([1.5, 3, 4], idx, dtype="O", name="x"), + Series([1.5, 3, 6], idx), + ] + result = DataFrame(data2) + + sdict = OrderedDict(zip(["x", "Unnamed 0"], data)) + expected = DataFrame.from_dict(sdict, orient="index") + tm.assert_frame_equal(result, expected) + + # none named + data = [ + OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]), + OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]), + OrderedDict([["a", 1.5], ["d", 6]]), + OrderedDict(), + OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]), + OrderedDict([["b", 3], ["c", 4], ["d", 6]]), + ] + data = [ + create_series_with_explicit_dtype(d, dtype_if_empty=object) for d in data + ] + + result = DataFrame(data) + sdict = OrderedDict(zip(range(len(data)), data)) + expected = DataFrame.from_dict(sdict, orient="index") + tm.assert_frame_equal(result, expected.reindex(result.index)) + + result2 = DataFrame(data, index=np.arange(6)) + tm.assert_frame_equal(result, result2) + + result = DataFrame([Series(dtype=object)]) + expected = DataFrame(index=[0]) + tm.assert_frame_equal(result, expected) + + data = [ + OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]), + OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]), + ] + sdict = OrderedDict(zip(range(len(data)), data)) + + idx = Index(["a", "b", "c"]) + data2 = [Series([1.5, 3, 4], idx, dtype="O"), Series([1.5, 3, 6], idx)] + result = DataFrame(data2) + expected = DataFrame.from_dict(sdict, orient="index") + tm.assert_frame_equal(result, expected) + + def test_constructor_orient(self, float_string_frame): + data_dict = float_string_frame.T._series + recons = DataFrame.from_dict(data_dict, orient="index") + expected = float_string_frame.reindex(index=recons.index) + tm.assert_frame_equal(recons, expected) + + # dict of sequence + a = {"hi": [32, 3, 3], "there": [3, 5, 3]} + rs = DataFrame.from_dict(a, orient="index") + xp = DataFrame.from_dict(a).T.reindex(list(a.keys())) + tm.assert_frame_equal(rs, xp) + + def test_constructor_from_ordered_dict(self): + # GH#8425 + a = OrderedDict( + [ + ("one", OrderedDict([("col_a", "foo1"), ("col_b", "bar1")])), + ("two", OrderedDict([("col_a", "foo2"), ("col_b", "bar2")])), + ("three", OrderedDict([("col_a", "foo3"), ("col_b", "bar3")])), + ] + ) + expected = DataFrame.from_dict(a, orient="columns").T + result = DataFrame.from_dict(a, orient="index") + tm.assert_frame_equal(result, expected) + + def test_from_dict_columns_parameter(self): + # GH#18529 + # Test new columns parameter for from_dict that was added to make + # from_items(..., orient='index', columns=[...]) easier to replicate + result = DataFrame.from_dict( + OrderedDict([("A", [1, 2]), ("B", [4, 5])]), + orient="index", + columns=["one", "two"], + ) + expected = DataFrame([[1, 2], [4, 5]], index=["A", "B"], columns=["one", "two"]) + tm.assert_frame_equal(result, expected) + + msg = "cannot use columns parameter with orient='columns'" + with pytest.raises(ValueError, match=msg): + DataFrame.from_dict( + {"A": [1, 2], "B": [4, 5]}, + orient="columns", + columns=["one", "two"], + ) + with pytest.raises(ValueError, match=msg): + DataFrame.from_dict({"A": [1, 2], "B": [4, 5]}, columns=["one", "two"]) + + @pytest.mark.parametrize( + "data_dict, keys, orient", + [ + ({}, [], "index"), + ([{("a",): 1}, {("a",): 2}], [("a",)], "columns"), + ([OrderedDict([(("a",), 1), (("b",), 2)])], [("a",), ("b",)], "columns"), + ([{("a", "b"): 1}], [("a", "b")], "columns"), + ], + ) + def test_constructor_from_dict_tuples(self, data_dict, keys, orient): + # GH#16769 + df = DataFrame.from_dict(data_dict, orient) + + result = df.columns + expected = Index(keys, dtype="object", tupleize_cols=False) + + tm.assert_index_equal(result, expected) + + def test_frame_dict_constructor_empty_series(self): + s1 = Series( + [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]) + ) + s2 = Series( + [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]) + ) + s3 = Series(dtype=object) + + # it works! + DataFrame({"foo": s1, "bar": s2, "baz": s3}) + DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2}) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/test_from_records.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/test_from_records.py new file mode 100644 index 0000000..4aa150a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/constructors/test_from_records.py @@ -0,0 +1,466 @@ +from datetime import datetime +from decimal import Decimal + +import numpy as np +import pytest +import pytz + +from pandas.compat import is_platform_little_endian + +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + Interval, + RangeIndex, + Series, +) +import pandas._testing as tm + + +class TestFromRecords: + def test_from_records_with_datetimes(self): + + # this may fail on certain platforms because of a numpy issue + # related GH#6140 + if not is_platform_little_endian(): + pytest.skip("known failure of test on non-little endian") + + # construction with a null in a recarray + # GH#6140 + expected = DataFrame({"EXPIRY": [datetime(2005, 3, 1, 0, 0), None]}) + + arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])] + dtypes = [("EXPIRY", " 1] + expected = df3.iloc[[]] + tm.assert_frame_equal(result, expected) + + # unordered + # cannot be compared + + # CategoricalIndex([1, 1, 2, 1, 3, 2], + # categories=[3, 2, 1], + # ordered=False, + # name='B') + msg = "Unordered Categoricals can only compare equality or not" + with pytest.raises(TypeError, match=msg): + df4[df4.index < 2] + with pytest.raises(TypeError, match=msg): + df4[df4.index > 1] + + @pytest.mark.parametrize( + "data1,data2,expected_data", + ( + ( + [[1, 2], [3, 4]], + [[0.5, 6], [7, 8]], + [[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]], + ), + ( + [[1, 2], [3, 4]], + [[5, 6], [7, 8]], + [[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]], + ), + ), + ) + def test_getitem_bool_mask_duplicate_columns_mixed_dtypes( + self, + data1, + data2, + expected_data, + ): + # GH#31954 + + df1 = DataFrame(np.array(data1)) + df2 = DataFrame(np.array(data2)) + df = concat([df1, df2], axis=1) + + result = df[df > 2] + + exdict = {i: np.array(col) for i, col in enumerate(expected_data)} + expected = DataFrame(exdict).rename(columns={2: 0, 3: 1}) + tm.assert_frame_equal(result, expected) + + @pytest.fixture + def df_dup_cols(self): + dups = ["A", "A", "C", "D"] + df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") + return df + + def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_cols): + # `df.A > 6` is a DataFrame with a different shape from df + + # boolean with the duplicate raises + df = df_dup_cols + msg = "cannot reindex on an axis with duplicate labels" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df[df.A > 6] + + def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols): + # boolean indexing + # GH#4879 + df = DataFrame( + np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" + ) + expected = df[df.C > 6] + expected.columns = df_dup_cols.columns + + df = df_dup_cols + result = df[df.C > 6] + + tm.assert_frame_equal(result, expected) + result.dtypes + str(result) + + def test_getitem_boolean_frame_with_duplicate_columns(self, df_dup_cols): + + # where + df = DataFrame( + np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" + ) + # `df > 6` is a DataFrame with the same shape+alignment as df + expected = df[df > 6] + expected.columns = df_dup_cols.columns + + df = df_dup_cols + result = df[df > 6] + + tm.assert_frame_equal(result, expected) + result.dtypes + str(result) + + def test_getitem_empty_frame_with_boolean(self): + # Test for issue GH#11859 + + df = DataFrame() + df2 = df[df > 0] + tm.assert_frame_equal(df, df2) + + +class TestGetitemSlice: + def test_getitem_slice_float64(self, frame_or_series): + values = np.arange(10.0, 50.0, 2) + index = Index(values) + + start, end = values[[5, 15]] + + data = np.random.randn(20, 3) + if frame_or_series is not DataFrame: + data = data[:, 0] + + obj = frame_or_series(data, index=index) + + result = obj[start:end] + expected = obj.iloc[5:16] + tm.assert_equal(result, expected) + + result = obj.loc[start:end] + tm.assert_equal(result, expected) + + def test_getitem_datetime_slice(self): + # GH#43223 + df = DataFrame( + {"a": 0}, + index=DatetimeIndex( + [ + "11.01.2011 22:00", + "11.01.2011 23:00", + "12.01.2011 00:00", + "2011-01-13 00:00", + ] + ), + ) + with tm.assert_produces_warning(FutureWarning): + result = df["2011-01-01":"2011-11-01"] + expected = DataFrame( + {"a": 0}, + index=DatetimeIndex( + ["11.01.2011 22:00", "11.01.2011 23:00", "2011-01-13 00:00"] + ), + ) + tm.assert_frame_equal(result, expected) + + +class TestGetitemDeprecatedIndexers: + @pytest.mark.parametrize("key", [{"a", "b"}, {"a": "a"}]) + def test_getitem_dict_and_set_deprecated(self, key): + # GH#42825 + df = DataFrame( + [[1, 2], [3, 4]], columns=MultiIndex.from_tuples([("a", 1), ("b", 2)]) + ) + with tm.assert_produces_warning(FutureWarning): + df[key] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_indexing.py new file mode 100644 index 0000000..b186a55 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_indexing.py @@ -0,0 +1,1610 @@ +from datetime import ( + datetime, + timedelta, +) +import re + +import numpy as np +import pytest + +from pandas._libs import iNaT +from pandas.errors import InvalidIndexError +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_integer + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + Timestamp, + date_range, + isna, + notna, +) +import pandas._testing as tm +import pandas.core.common as com + +# We pass through a TypeError raised by numpy +_slice_msg = "slice indices must be integers or None or have an __index__ method" + + +class TestDataFrameIndexing: + def test_getitem(self, float_frame): + # Slicing + sl = float_frame[:20] + assert len(sl.index) == 20 + + # Column access + for _, series in sl.items(): + assert len(series.index) == 20 + assert tm.equalContents(series.index, sl.index) + + for key, _ in float_frame._series.items(): + assert float_frame[key] is not None + + assert "random" not in float_frame + with pytest.raises(KeyError, match="random"): + float_frame["random"] + + def test_getitem2(self, float_frame): + + df = float_frame.copy() + df["$10"] = np.random.randn(len(df)) + + ad = np.random.randn(len(df)) + df["@awesome_domain"] = ad + + with pytest.raises(KeyError, match=re.escape("'df[\"$10\"]'")): + df.__getitem__('df["$10"]') + + res = df["@awesome_domain"] + tm.assert_numpy_array_equal(ad, res.values) + + def test_setitem_list(self, float_frame): + + float_frame["E"] = "foo" + data = float_frame[["A", "B"]] + float_frame[["B", "A"]] = data + + tm.assert_series_equal(float_frame["B"], data["A"], check_names=False) + tm.assert_series_equal(float_frame["A"], data["B"], check_names=False) + + msg = "Columns must be same length as key" + with pytest.raises(ValueError, match=msg): + data[["A"]] = float_frame[["A", "B"]] + newcolumndata = range(len(data.index) - 1) + msg = ( + rf"Length of values \({len(newcolumndata)}\) " + rf"does not match length of index \({len(data)}\)" + ) + with pytest.raises(ValueError, match=msg): + data["A"] = newcolumndata + + def test_setitem_list2(self): + + df = DataFrame(0, index=range(3), columns=["tt1", "tt2"], dtype=np.int_) + df.loc[1, ["tt1", "tt2"]] = [1, 2] + + result = df.loc[df.index[1], ["tt1", "tt2"]] + expected = Series([1, 2], df.columns, dtype=np.int_, name=1) + tm.assert_series_equal(result, expected) + + df["tt1"] = df["tt2"] = "0" + df.loc[df.index[1], ["tt1", "tt2"]] = ["1", "2"] + result = df.loc[df.index[1], ["tt1", "tt2"]] + expected = Series(["1", "2"], df.columns, name=1) + tm.assert_series_equal(result, expected) + + def test_getitem_boolean(self, mixed_float_frame, mixed_int_frame, datetime_frame): + # boolean indexing + d = datetime_frame.index[10] + indexer = datetime_frame.index > d + indexer_obj = indexer.astype(object) + + subindex = datetime_frame.index[indexer] + subframe = datetime_frame[indexer] + + tm.assert_index_equal(subindex, subframe.index) + with pytest.raises(ValueError, match="Item wrong length"): + datetime_frame[indexer[:-1]] + + subframe_obj = datetime_frame[indexer_obj] + tm.assert_frame_equal(subframe_obj, subframe) + + with pytest.raises(ValueError, match="Boolean array expected"): + datetime_frame[datetime_frame] + + # test that Series work + indexer_obj = Series(indexer_obj, datetime_frame.index) + + subframe_obj = datetime_frame[indexer_obj] + tm.assert_frame_equal(subframe_obj, subframe) + + # test that Series indexers reindex + # we are producing a warning that since the passed boolean + # key is not the same as the given index, we will reindex + # not sure this is really necessary + with tm.assert_produces_warning(UserWarning): + indexer_obj = indexer_obj.reindex(datetime_frame.index[::-1]) + subframe_obj = datetime_frame[indexer_obj] + tm.assert_frame_equal(subframe_obj, subframe) + + # test df[df > 0] + for df in [ + datetime_frame, + mixed_float_frame, + mixed_int_frame, + ]: + + data = df._get_numeric_data() + bif = df[df > 0] + bifw = DataFrame( + {c: np.where(data[c] > 0, data[c], np.nan) for c in data.columns}, + index=data.index, + columns=data.columns, + ) + + # add back other columns to compare + for c in df.columns: + if c not in bifw: + bifw[c] = df[c] + bifw = bifw.reindex(columns=df.columns) + + tm.assert_frame_equal(bif, bifw, check_dtype=False) + for c in df.columns: + if bif[c].dtype != bifw[c].dtype: + assert bif[c].dtype == df[c].dtype + + def test_getitem_boolean_casting(self, datetime_frame): + + # don't upcast if we don't need to + df = datetime_frame.copy() + df["E"] = 1 + df["E"] = df["E"].astype("int32") + df["E1"] = df["E"].copy() + df["F"] = 1 + df["F"] = df["F"].astype("int64") + df["F1"] = df["F"].copy() + + casted = df[df > 0] + result = casted.dtypes + expected = Series( + [np.dtype("float64")] * 4 + + [np.dtype("int32")] * 2 + + [np.dtype("int64")] * 2, + index=["A", "B", "C", "D", "E", "E1", "F", "F1"], + ) + tm.assert_series_equal(result, expected) + + # int block splitting + df.loc[df.index[1:3], ["E1", "F1"]] = 0 + casted = df[df > 0] + result = casted.dtypes + expected = Series( + [np.dtype("float64")] * 4 + + [np.dtype("int32")] + + [np.dtype("float64")] + + [np.dtype("int64")] + + [np.dtype("float64")], + index=["A", "B", "C", "D", "E", "E1", "F", "F1"], + ) + tm.assert_series_equal(result, expected) + + def test_getitem_boolean_list(self): + df = DataFrame(np.arange(12).reshape(3, 4)) + + def _checkit(lst): + result = df[lst] + expected = df.loc[df.index[lst]] + tm.assert_frame_equal(result, expected) + + _checkit([True, False, True]) + _checkit([True, True, True]) + _checkit([False, False, False]) + + def test_getitem_boolean_iadd(self): + arr = np.random.randn(5, 5) + + df = DataFrame(arr.copy(), columns=["A", "B", "C", "D", "E"]) + + df[df < 0] += 1 + arr[arr < 0] += 1 + + tm.assert_almost_equal(df.values, arr) + + def test_boolean_index_empty_corner(self): + # #2096 + blah = DataFrame(np.empty([0, 1]), columns=["A"], index=DatetimeIndex([])) + + # both of these should succeed trivially + k = np.array([], bool) + + blah[k] + blah[k] = 0 + + def test_getitem_ix_mixed_integer(self): + df = DataFrame( + np.random.randn(4, 3), index=[1, 10, "C", "E"], columns=[1, 2, 3] + ) + + result = df.iloc[:-1] + expected = df.loc[df.index[:-1]] + tm.assert_frame_equal(result, expected) + + result = df.loc[[1, 10]] + expected = df.loc[Index([1, 10])] + tm.assert_frame_equal(result, expected) + + def test_getitem_ix_mixed_integer2(self): + # 11320 + df = DataFrame( + { + "rna": (1.5, 2.2, 3.2, 4.5), + -1000: [11, 21, 36, 40], + 0: [10, 22, 43, 34], + 1000: [0, 10, 20, 30], + }, + columns=["rna", -1000, 0, 1000], + ) + result = df[[1000]] + expected = df.iloc[:, [3]] + tm.assert_frame_equal(result, expected) + result = df[[-1000]] + expected = df.iloc[:, [1]] + tm.assert_frame_equal(result, expected) + + def test_getattr(self, float_frame): + tm.assert_series_equal(float_frame.A, float_frame["A"]) + msg = "'DataFrame' object has no attribute 'NONEXISTENT_NAME'" + with pytest.raises(AttributeError, match=msg): + float_frame.NONEXISTENT_NAME + + def test_setattr_column(self): + df = DataFrame({"foobar": 1}, index=range(10)) + + df.foobar = 5 + assert (df.foobar == 5).all() + + def test_setitem(self, float_frame): + # not sure what else to do here + series = float_frame["A"][::2] + float_frame["col5"] = series + assert "col5" in float_frame + + assert len(series) == 15 + assert len(float_frame) == 30 + + exp = np.ravel(np.column_stack((series.values, [np.nan] * 15))) + exp = Series(exp, index=float_frame.index, name="col5") + tm.assert_series_equal(float_frame["col5"], exp) + + series = float_frame["A"] + float_frame["col6"] = series + tm.assert_series_equal(series, float_frame["col6"], check_names=False) + + # set ndarray + arr = np.random.randn(len(float_frame)) + float_frame["col9"] = arr + assert (float_frame["col9"] == arr).all() + + float_frame["col7"] = 5 + assert (float_frame["col7"] == 5).all() + + float_frame["col0"] = 3.14 + assert (float_frame["col0"] == 3.14).all() + + float_frame["col8"] = "foo" + assert (float_frame["col8"] == "foo").all() + + # this is partially a view (e.g. some blocks are view) + # so raise/warn + smaller = float_frame[:2] + + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + smaller["col10"] = ["1", "2"] + + assert smaller["col10"].dtype == np.object_ + assert (smaller["col10"] == ["1", "2"]).all() + + def test_setitem2(self): + # dtype changing GH4204 + df = DataFrame([[0, 0]]) + df.iloc[0] = np.nan + expected = DataFrame([[np.nan, np.nan]]) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[0, 0]]) + df.loc[0] = np.nan + tm.assert_frame_equal(df, expected) + + def test_setitem_boolean(self, float_frame): + df = float_frame.copy() + values = float_frame.values + + df[df["A"] > 0] = 4 + values[values[:, 0] > 0] = 4 + tm.assert_almost_equal(df.values, values) + + # test that column reindexing works + series = df["A"] == 4 + series = series.reindex(df.index[::-1]) + df[series] = 1 + values[values[:, 0] == 4] = 1 + tm.assert_almost_equal(df.values, values) + + df[df > 0] = 5 + values[values > 0] = 5 + tm.assert_almost_equal(df.values, values) + + df[df == 5] = 0 + values[values == 5] = 0 + tm.assert_almost_equal(df.values, values) + + # a df that needs alignment first + df[df[:-1] < 0] = 2 + np.putmask(values[:-1], values[:-1] < 0, 2) + tm.assert_almost_equal(df.values, values) + + # indexed with same shape but rows-reversed df + df[df[::-1] == 2] = 3 + values[values == 2] = 3 + tm.assert_almost_equal(df.values, values) + + msg = "Must pass DataFrame or 2-d ndarray with boolean values only" + with pytest.raises(TypeError, match=msg): + df[df * 0] = 2 + + # index with DataFrame + mask = df > np.abs(df) + expected = df.copy() + df[df > np.abs(df)] = np.nan + expected.values[mask.values] = np.nan + tm.assert_frame_equal(df, expected) + + # set from DataFrame + expected = df.copy() + df[df > np.abs(df)] = df * 2 + np.putmask(expected.values, mask.values, df.values * 2) + tm.assert_frame_equal(df, expected) + + def test_setitem_cast(self, float_frame): + float_frame["D"] = float_frame["D"].astype("i8") + assert float_frame["D"].dtype == np.int64 + + # #669, should not cast? + # this is now set to int64, which means a replacement of the column to + # the value dtype (and nothing to do with the existing dtype) + float_frame["B"] = 0 + assert float_frame["B"].dtype == np.int64 + + # cast if pass array of course + float_frame["B"] = np.arange(len(float_frame)) + assert issubclass(float_frame["B"].dtype.type, np.integer) + + float_frame["foo"] = "bar" + float_frame["foo"] = 0 + assert float_frame["foo"].dtype == np.int64 + + float_frame["foo"] = "bar" + float_frame["foo"] = 2.5 + assert float_frame["foo"].dtype == np.float64 + + float_frame["something"] = 0 + assert float_frame["something"].dtype == np.int64 + float_frame["something"] = 2 + assert float_frame["something"].dtype == np.int64 + float_frame["something"] = 2.5 + assert float_frame["something"].dtype == np.float64 + + def test_setitem_corner(self, float_frame): + # corner case + df = DataFrame({"B": [1.0, 2.0, 3.0], "C": ["a", "b", "c"]}, index=np.arange(3)) + del df["B"] + df["B"] = [1.0, 2.0, 3.0] + assert "B" in df + assert len(df.columns) == 2 + + df["A"] = "beginning" + df["E"] = "foo" + df["D"] = "bar" + df[datetime.now()] = "date" + df[datetime.now()] = 5.0 + + # what to do when empty frame with index + dm = DataFrame(index=float_frame.index) + dm["A"] = "foo" + dm["B"] = "bar" + assert len(dm.columns) == 2 + assert dm.values.dtype == np.object_ + + # upcast + dm["C"] = 1 + assert dm["C"].dtype == np.int64 + + dm["E"] = 1.0 + assert dm["E"].dtype == np.float64 + + # set existing column + dm["A"] = "bar" + assert "bar" == dm["A"][0] + + dm = DataFrame(index=np.arange(3)) + dm["A"] = 1 + dm["foo"] = "bar" + del dm["foo"] + dm["foo"] = "bar" + assert dm["foo"].dtype == np.object_ + + dm["coercible"] = ["1", "2", "3"] + assert dm["coercible"].dtype == np.object_ + + def test_setitem_corner2(self): + data = { + "title": ["foobar", "bar", "foobar"] + ["foobar"] * 17, + "cruft": np.random.random(20), + } + + df = DataFrame(data) + ix = df[df["title"] == "bar"].index + + df.loc[ix, ["title"]] = "foobar" + df.loc[ix, ["cruft"]] = 0 + + assert df.loc[1, "title"] == "foobar" + assert df.loc[1, "cruft"] == 0 + + def test_setitem_ambig(self): + # Difficulties with mixed-type data + from decimal import Decimal + + # Created as float type + dm = DataFrame(index=range(3), columns=range(3)) + + coercable_series = Series([Decimal(1) for _ in range(3)], index=range(3)) + uncoercable_series = Series(["foo", "bzr", "baz"], index=range(3)) + + dm[0] = np.ones(3) + assert len(dm.columns) == 3 + + dm[1] = coercable_series + assert len(dm.columns) == 3 + + dm[2] = uncoercable_series + assert len(dm.columns) == 3 + assert dm[2].dtype == np.object_ + + def test_setitem_None(self, float_frame): + # GH #766 + float_frame[None] = float_frame["A"] + tm.assert_series_equal( + float_frame.iloc[:, -1], float_frame["A"], check_names=False + ) + tm.assert_series_equal( + float_frame.loc[:, None], float_frame["A"], check_names=False + ) + tm.assert_series_equal(float_frame[None], float_frame["A"], check_names=False) + repr(float_frame) + + def test_loc_setitem_boolean_mask_allfalse(self): + # GH 9596 + df = DataFrame( + {"a": ["1", "2", "3"], "b": ["11", "22", "33"], "c": ["111", "222", "333"]} + ) + + result = df.copy() + result.loc[result.b.isna(), "a"] = result.a + tm.assert_frame_equal(result, df) + + def test_getitem_fancy_slice_integers_step(self): + df = DataFrame(np.random.randn(10, 5)) + + # this is OK + result = df.iloc[:8:2] # noqa + df.iloc[:8:2] = np.nan + assert isna(df.iloc[:8:2]).values.all() + + def test_getitem_setitem_integer_slice_keyerrors(self): + df = DataFrame(np.random.randn(10, 5), index=range(0, 20, 2)) + + # this is OK + cp = df.copy() + cp.iloc[4:10] = 0 + assert (cp.iloc[4:10] == 0).values.all() + + # so is this + cp = df.copy() + cp.iloc[3:11] = 0 + assert (cp.iloc[3:11] == 0).values.all() + + result = df.iloc[2:6] + result2 = df.loc[3:11] + expected = df.reindex([4, 6, 8, 10]) + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + # non-monotonic, raise KeyError + df2 = df.iloc[list(range(5)) + list(range(5, 10))[::-1]] + with pytest.raises(KeyError, match=r"^3$"): + df2.loc[3:11] + with pytest.raises(KeyError, match=r"^3$"): + df2.loc[3:11] = 0 + + @td.skip_array_manager_invalid_test # already covered in test_iloc_col_slice_view + def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame): + + sliced = float_string_frame.iloc[:, -3:] + assert sliced["D"].dtype == np.float64 + + # get view with single block + # setting it triggers setting with copy + sliced = float_frame.iloc[:, -3:] + + assert np.shares_memory(sliced["C"]._values, float_frame["C"]._values) + + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + sliced.loc[:, "C"] = 4.0 + + assert (float_frame["C"] == 4).all() + + def test_getitem_setitem_non_ix_labels(self): + df = tm.makeTimeDataFrame() + + start, end = df.index[[5, 10]] + + result = df.loc[start:end] + result2 = df[start:end] + expected = df[5:11] + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + result = df.copy() + result.loc[start:end] = 0 + result2 = df.copy() + result2[start:end] = 0 + expected = df.copy() + expected[5:11] = 0 + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + def test_ix_multi_take(self): + df = DataFrame(np.random.randn(3, 2)) + rs = df.loc[df.index == 0, :] + xp = df.reindex([0]) + tm.assert_frame_equal(rs, xp) + + # GH#1321 + df = DataFrame(np.random.randn(3, 2)) + rs = df.loc[df.index == 0, df.columns == 1] + xp = df.reindex(index=[0], columns=[1]) + tm.assert_frame_equal(rs, xp) + + def test_getitem_fancy_scalar(self, float_frame): + f = float_frame + ix = f.loc + + # individual value + for col in f.columns: + ts = f[col] + for idx in f.index[::5]: + assert ix[idx, col] == ts[idx] + + @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values + def test_setitem_fancy_scalar(self, float_frame): + f = float_frame + expected = float_frame.copy() + ix = f.loc + + # individual value + for j, col in enumerate(f.columns): + ts = f[col] # noqa + for idx in f.index[::5]: + i = f.index.get_loc(idx) + val = np.random.randn() + expected.values[i, j] = val + + ix[idx, col] = val + tm.assert_frame_equal(f, expected) + + def test_getitem_fancy_boolean(self, float_frame): + f = float_frame + ix = f.loc + + expected = f.reindex(columns=["B", "D"]) + result = ix[:, [False, True, False, True]] + tm.assert_frame_equal(result, expected) + + expected = f.reindex(index=f.index[5:10], columns=["B", "D"]) + result = ix[f.index[5:10], [False, True, False, True]] + tm.assert_frame_equal(result, expected) + + boolvec = f.index > f.index[7] + expected = f.reindex(index=f.index[boolvec]) + result = ix[boolvec] + tm.assert_frame_equal(result, expected) + result = ix[boolvec, :] + tm.assert_frame_equal(result, expected) + + result = ix[boolvec, f.columns[2:]] + expected = f.reindex(index=f.index[boolvec], columns=["C", "D"]) + tm.assert_frame_equal(result, expected) + + @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values + def test_setitem_fancy_boolean(self, float_frame): + # from 2d, set with booleans + frame = float_frame.copy() + expected = float_frame.copy() + + mask = frame["A"] > 0 + frame.loc[mask] = 0.0 + expected.values[mask.values] = 0.0 + tm.assert_frame_equal(frame, expected) + + frame = float_frame.copy() + expected = float_frame.copy() + frame.loc[mask, ["A", "B"]] = 0.0 + expected.values[mask.values, :2] = 0.0 + tm.assert_frame_equal(frame, expected) + + def test_getitem_fancy_ints(self, float_frame): + result = float_frame.iloc[[1, 4, 7]] + expected = float_frame.loc[float_frame.index[[1, 4, 7]]] + tm.assert_frame_equal(result, expected) + + result = float_frame.iloc[:, [2, 0, 1]] + expected = float_frame.loc[:, float_frame.columns[[2, 0, 1]]] + tm.assert_frame_equal(result, expected) + + def test_getitem_setitem_boolean_misaligned(self, float_frame): + # boolean index misaligned labels + mask = float_frame["A"][::-1] > 1 + + result = float_frame.loc[mask] + expected = float_frame.loc[mask[::-1]] + tm.assert_frame_equal(result, expected) + + cp = float_frame.copy() + expected = float_frame.copy() + cp.loc[mask] = 0 + expected.loc[mask] = 0 + tm.assert_frame_equal(cp, expected) + + def test_getitem_setitem_boolean_multi(self): + df = DataFrame(np.random.randn(3, 2)) + + # get + k1 = np.array([True, False, True]) + k2 = np.array([False, True]) + result = df.loc[k1, k2] + expected = df.loc[[0, 2], [1]] + tm.assert_frame_equal(result, expected) + + expected = df.copy() + df.loc[np.array([True, False, True]), np.array([False, True])] = 5 + expected.loc[[0, 2], [1]] = 5 + tm.assert_frame_equal(df, expected) + + def test_getitem_setitem_float_labels(self): + index = Index([1.5, 2, 3, 4, 5]) + df = DataFrame(np.random.randn(5, 5), index=index) + + result = df.loc[1.5:4] + expected = df.reindex([1.5, 2, 3, 4]) + tm.assert_frame_equal(result, expected) + assert len(result) == 4 + + result = df.loc[4:5] + expected = df.reindex([4, 5]) # reindex with int + tm.assert_frame_equal(result, expected, check_index_type=False) + assert len(result) == 2 + + result = df.loc[4:5] + expected = df.reindex([4.0, 5.0]) # reindex with float + tm.assert_frame_equal(result, expected) + assert len(result) == 2 + + # loc_float changes this to work properly + result = df.loc[1:2] + expected = df.iloc[0:2] + tm.assert_frame_equal(result, expected) + + df.loc[1:2] = 0 + result = df[1:2] + assert (result == 0).all().all() + + # #2727 + index = Index([1.0, 2.5, 3.5, 4.5, 5.0]) + df = DataFrame(np.random.randn(5, 5), index=index) + + # positional slicing only via iloc! + msg = ( + "cannot do positional indexing on Float64Index with " + r"these indexers \[1.0\] of type float" + ) + with pytest.raises(TypeError, match=msg): + df.iloc[1.0:5] + + result = df.iloc[4:5] + expected = df.reindex([5.0]) + tm.assert_frame_equal(result, expected) + assert len(result) == 1 + + cp = df.copy() + + with pytest.raises(TypeError, match=_slice_msg): + cp.iloc[1.0:5] = 0 + + with pytest.raises(TypeError, match=msg): + result = cp.iloc[1.0:5] == 0 + + assert result.values.all() + assert (cp.iloc[0:1] == df.iloc[0:1]).values.all() + + cp = df.copy() + cp.iloc[4:5] = 0 + assert (cp.iloc[4:5] == 0).values.all() + assert (cp.iloc[0:4] == df.iloc[0:4]).values.all() + + # float slicing + result = df.loc[1.0:5] + expected = df + tm.assert_frame_equal(result, expected) + assert len(result) == 5 + + result = df.loc[1.1:5] + expected = df.reindex([2.5, 3.5, 4.5, 5.0]) + tm.assert_frame_equal(result, expected) + assert len(result) == 4 + + result = df.loc[4.51:5] + expected = df.reindex([5.0]) + tm.assert_frame_equal(result, expected) + assert len(result) == 1 + + result = df.loc[1.0:5.0] + expected = df.reindex([1.0, 2.5, 3.5, 4.5, 5.0]) + tm.assert_frame_equal(result, expected) + assert len(result) == 5 + + cp = df.copy() + cp.loc[1.0:5.0] = 0 + result = cp.loc[1.0:5.0] + assert (result == 0).values.all() + + def test_setitem_single_column_mixed_datetime(self): + df = DataFrame( + np.random.randn(5, 3), + index=["a", "b", "c", "d", "e"], + columns=["foo", "bar", "baz"], + ) + + df["timestamp"] = Timestamp("20010102") + + # check our dtypes + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 3 + [np.dtype("datetime64[ns]")], + index=["foo", "bar", "baz", "timestamp"], + ) + tm.assert_series_equal(result, expected) + + # GH#16674 iNaT is treated as an integer when given by the user + df.loc["b", "timestamp"] = iNaT + assert not isna(df.loc["b", "timestamp"]) + assert df["timestamp"].dtype == np.object_ + assert df.loc["b", "timestamp"] == iNaT + + # allow this syntax (as of GH#3216) + df.loc["c", "timestamp"] = np.nan + assert isna(df.loc["c", "timestamp"]) + + # allow this syntax + df.loc["d", :] = np.nan + assert not isna(df.loc["c", :]).all() + + def test_setitem_mixed_datetime(self): + # GH 9336 + expected = DataFrame( + { + "a": [0, 0, 0, 0, 13, 14], + "b": [ + datetime(2012, 1, 1), + 1, + "x", + "y", + datetime(2013, 1, 1), + datetime(2014, 1, 1), + ], + } + ) + df = DataFrame(0, columns=list("ab"), index=range(6)) + df["b"] = pd.NaT + df.loc[0, "b"] = datetime(2012, 1, 1) + df.loc[1, "b"] = 1 + df.loc[[2, 3], "b"] = "x", "y" + A = np.array( + [ + [13, np.datetime64("2013-01-01T00:00:00")], + [14, np.datetime64("2014-01-01T00:00:00")], + ] + ) + df.loc[[4, 5], ["a", "b"]] = A + tm.assert_frame_equal(df, expected) + + def test_setitem_frame_float(self, float_frame): + piece = float_frame.loc[float_frame.index[:2], ["A", "B"]] + float_frame.loc[float_frame.index[-2] :, ["A", "B"]] = piece.values + result = float_frame.loc[float_frame.index[-2:], ["A", "B"]].values + expected = piece.values + tm.assert_almost_equal(result, expected) + + def test_setitem_frame_mixed(self, float_string_frame): + # GH 3216 + + # already aligned + f = float_string_frame.copy() + piece = DataFrame( + [[1.0, 2.0], [3.0, 4.0]], index=f.index[0:2], columns=["A", "B"] + ) + key = (f.index[slice(None, 2)], ["A", "B"]) + f.loc[key] = piece + tm.assert_almost_equal(f.loc[f.index[0:2], ["A", "B"]].values, piece.values) + + def test_setitem_frame_mixed_rows_unaligned(self, float_string_frame): + # GH#3216 rows unaligned + f = float_string_frame.copy() + piece = DataFrame( + [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]], + index=list(f.index[0:2]) + ["foo", "bar"], + columns=["A", "B"], + ) + key = (f.index[slice(None, 2)], ["A", "B"]) + f.loc[key] = piece + tm.assert_almost_equal( + f.loc[f.index[0:2:], ["A", "B"]].values, piece.values[0:2] + ) + + def test_setitem_frame_mixed_key_unaligned(self, float_string_frame): + # GH#3216 key is unaligned with values + f = float_string_frame.copy() + piece = f.loc[f.index[:2], ["A"]] + piece.index = f.index[-2:] + key = (f.index[slice(-2, None)], ["A", "B"]) + f.loc[key] = piece + piece["B"] = np.nan + tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values) + + def test_setitem_frame_mixed_ndarray(self, float_string_frame): + # GH#3216 ndarray + f = float_string_frame.copy() + piece = float_string_frame.loc[f.index[:2], ["A", "B"]] + key = (f.index[slice(-2, None)], ["A", "B"]) + f.loc[key] = piece.values + tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values) + + def test_setitem_frame_upcast(self): + # needs upcasting + df = DataFrame([[1, 2, "foo"], [3, 4, "bar"]], columns=["A", "B", "C"]) + df2 = df.copy() + df2.loc[:, ["A", "B"]] = df.loc[:, ["A", "B"]] + 0.5 + expected = df.reindex(columns=["A", "B"]) + expected += 0.5 + expected["C"] = df["C"] + tm.assert_frame_equal(df2, expected) + + def test_setitem_frame_align(self, float_frame): + piece = float_frame.loc[float_frame.index[:2], ["A", "B"]] + piece.index = float_frame.index[-2:] + piece.columns = ["A", "B"] + float_frame.loc[float_frame.index[-2:], ["A", "B"]] = piece + result = float_frame.loc[float_frame.index[-2:], ["A", "B"]].values + expected = piece.values + tm.assert_almost_equal(result, expected) + + def test_getitem_setitem_ix_duplicates(self): + # #1201 + df = DataFrame(np.random.randn(5, 3), index=["foo", "foo", "bar", "baz", "bar"]) + + result = df.loc["foo"] + expected = df[:2] + tm.assert_frame_equal(result, expected) + + result = df.loc["bar"] + expected = df.iloc[[2, 4]] + tm.assert_frame_equal(result, expected) + + result = df.loc["baz"] + expected = df.iloc[3] + tm.assert_series_equal(result, expected) + + def test_getitem_ix_boolean_duplicates_multiple(self): + # #1201 + df = DataFrame(np.random.randn(5, 3), index=["foo", "foo", "bar", "baz", "bar"]) + + result = df.loc[["bar"]] + exp = df.iloc[[2, 4]] + tm.assert_frame_equal(result, exp) + + result = df.loc[df[1] > 0] + exp = df[df[1] > 0] + tm.assert_frame_equal(result, exp) + + result = df.loc[df[0] > 0] + exp = df[df[0] > 0] + tm.assert_frame_equal(result, exp) + + @pytest.mark.parametrize("bool_value", [True, False]) + def test_getitem_setitem_ix_bool_keyerror(self, bool_value): + # #2199 + df = DataFrame({"a": [1, 2, 3]}) + message = f"{bool_value}: boolean label can not be used without a boolean index" + with pytest.raises(KeyError, match=message): + df.loc[bool_value] + + msg = "cannot use a single bool to index into setitem" + with pytest.raises(KeyError, match=msg): + df.loc[bool_value] = 0 + + # TODO: rename? remove? + def test_single_element_ix_dont_upcast(self, float_frame): + float_frame["E"] = 1 + assert issubclass(float_frame["E"].dtype.type, (int, np.integer)) + + result = float_frame.loc[float_frame.index[5], "E"] + assert is_integer(result) + + # GH 11617 + df = DataFrame({"a": [1.23]}) + df["b"] = 666 + + result = df.loc[0, "b"] + assert is_integer(result) + + expected = Series([666], [0], name="b") + result = df.loc[[0], "b"] + tm.assert_series_equal(result, expected) + + def test_iloc_row(self): + df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2)) + + result = df.iloc[1] + exp = df.loc[2] + tm.assert_series_equal(result, exp) + + result = df.iloc[2] + exp = df.loc[4] + tm.assert_series_equal(result, exp) + + # slice + result = df.iloc[slice(4, 8)] + expected = df.loc[8:14] + tm.assert_frame_equal(result, expected) + + # list of integers + result = df.iloc[[1, 2, 4, 6]] + expected = df.reindex(df.index[[1, 2, 4, 6]]) + tm.assert_frame_equal(result, expected) + + def test_iloc_row_slice_view(self, using_array_manager): + df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2)) + original = df.copy() + + # verify slice is view + # setting it makes it raise/warn + subset = df.iloc[slice(4, 8)] + + assert np.shares_memory(df[2], subset[2]) + + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + subset.loc[:, 2] = 0.0 + + exp_col = original[2].copy() + # TODO(ArrayManager) verify it is expected that the original didn't change + if not using_array_manager: + exp_col[4:8] = 0.0 + tm.assert_series_equal(df[2], exp_col) + + def test_iloc_col(self): + + df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2)) + + result = df.iloc[:, 1] + exp = df.loc[:, 2] + tm.assert_series_equal(result, exp) + + result = df.iloc[:, 2] + exp = df.loc[:, 4] + tm.assert_series_equal(result, exp) + + # slice + result = df.iloc[:, slice(4, 8)] + expected = df.loc[:, 8:14] + tm.assert_frame_equal(result, expected) + + # list of integers + result = df.iloc[:, [1, 2, 4, 6]] + expected = df.reindex(columns=df.columns[[1, 2, 4, 6]]) + tm.assert_frame_equal(result, expected) + + def test_iloc_col_slice_view(self, using_array_manager): + df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2)) + original = df.copy() + subset = df.iloc[:, slice(4, 8)] + + if not using_array_manager: + # verify slice is view + + assert np.shares_memory(df[8]._values, subset[8]._values) + + # and that we are setting a copy + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + subset.loc[:, 8] = 0.0 + + assert (df[8] == 0).all() + else: + # TODO(ArrayManager) verify this is the desired behaviour + subset[8] = 0.0 + # subset changed + assert (subset[8] == 0).all() + # but df itself did not change (setitem replaces full column) + tm.assert_frame_equal(df, original) + + def test_loc_duplicates(self): + # gh-17105 + + # insert a duplicate element to the index + trange = date_range( + start=Timestamp(year=2017, month=1, day=1), + end=Timestamp(year=2017, month=1, day=5), + ) + + trange = trange.insert(loc=5, item=Timestamp(year=2017, month=1, day=5)) + + df = DataFrame(0, index=trange, columns=["A", "B"]) + bool_idx = np.array([False, False, False, False, False, True]) + + # assignment + df.loc[trange[bool_idx], "A"] = 6 + + expected = DataFrame( + {"A": [0, 0, 0, 0, 6, 6], "B": [0, 0, 0, 0, 0, 0]}, index=trange + ) + tm.assert_frame_equal(df, expected) + + # in-place + df = DataFrame(0, index=trange, columns=["A", "B"]) + df.loc[trange[bool_idx], "A"] += 6 + tm.assert_frame_equal(df, expected) + + def test_setitem_with_unaligned_tz_aware_datetime_column(self): + # GH 12981 + # Assignment of unaligned offset-aware datetime series. + # Make sure timezone isn't lost + column = Series(date_range("2015-01-01", periods=3, tz="utc"), name="dates") + df = DataFrame({"dates": column}) + df["dates"] = column[[1, 0, 2]] + tm.assert_series_equal(df["dates"], column) + + df = DataFrame({"dates": column}) + df.loc[[0, 1, 2], "dates"] = column[[1, 0, 2]] + tm.assert_series_equal(df["dates"], column) + + def test_loc_setitem_datetimelike_with_inference(self): + # GH 7592 + # assignment of timedeltas with NaT + + one_hour = timedelta(hours=1) + df = DataFrame(index=date_range("20130101", periods=4)) + df["A"] = np.array([1 * one_hour] * 4, dtype="m8[ns]") + df.loc[:, "B"] = np.array([2 * one_hour] * 4, dtype="m8[ns]") + df.loc[df.index[:3], "C"] = np.array([3 * one_hour] * 3, dtype="m8[ns]") + df.loc[:, "D"] = np.array([4 * one_hour] * 4, dtype="m8[ns]") + df.loc[df.index[:3], "E"] = np.array([5 * one_hour] * 3, dtype="m8[ns]") + df["F"] = np.timedelta64("NaT") + df.loc[df.index[:-1], "F"] = np.array([6 * one_hour] * 3, dtype="m8[ns]") + df.loc[df.index[-3] :, "G"] = date_range("20130101", periods=3) + df["H"] = np.datetime64("NaT") + result = df.dtypes + expected = Series( + [np.dtype("timedelta64[ns]")] * 6 + [np.dtype("datetime64[ns]")] * 2, + index=list("ABCDEFGH"), + ) + tm.assert_series_equal(result, expected) + + def test_getitem_boolean_indexing_mixed(self): + df = DataFrame( + { + 0: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + 1: { + 35: np.nan, + 40: 0.32632316859446198, + 43: np.nan, + 49: 0.32632316859446198, + 50: 0.39114724480578139, + }, + 2: { + 35: np.nan, + 40: np.nan, + 43: 0.29012581014105987, + 49: np.nan, + 50: np.nan, + }, + 3: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + 4: { + 35: 0.34215328467153283, + 40: np.nan, + 43: np.nan, + 49: np.nan, + 50: np.nan, + }, + "y": {35: 0, 40: 0, 43: 0, 49: 0, 50: 1}, + } + ) + + # mixed int/float ok + df2 = df.copy() + df2[df2 > 0.3] = 1 + expected = df.copy() + expected.loc[40, 1] = 1 + expected.loc[49, 1] = 1 + expected.loc[50, 1] = 1 + expected.loc[35, 4] = 1 + tm.assert_frame_equal(df2, expected) + + df["foo"] = "test" + msg = "not supported between instances|unorderable types" + + with pytest.raises(TypeError, match=msg): + df[df > 0.3] = 1 + + def test_type_error_multiindex(self): + # See gh-12218 + mi = MultiIndex.from_product([["x", "y"], [0, 1]], names=[None, "c"]) + dg = DataFrame( + [[1, 1, 2, 2], [3, 3, 4, 4]], columns=mi, index=Index([0, 1], name="i") + ) + with pytest.raises(InvalidIndexError, match="slice"): + dg[:, 0] + + index = Index(range(2), name="i") + columns = MultiIndex( + levels=[["x", "y"], [0, 1]], codes=[[0, 1], [0, 0]], names=[None, "c"] + ) + expected = DataFrame([[1, 2], [3, 4]], columns=columns, index=index) + + result = dg.loc[:, (slice(None), 0)] + tm.assert_frame_equal(result, expected) + + name = ("x", 0) + index = Index(range(2), name="i") + expected = Series([1, 3], index=index, name=name) + + result = dg["x", 0] + tm.assert_series_equal(result, expected) + + def test_getitem_interval_index_partial_indexing(self): + # GH#36490 + df = DataFrame( + np.ones((3, 4)), columns=pd.IntervalIndex.from_breaks(np.arange(5)) + ) + + expected = df.iloc[:, 0] + + res = df[0.5] + tm.assert_series_equal(res, expected) + + res = df.loc[:, 0.5] + tm.assert_series_equal(res, expected) + + def test_setitem_array_as_cell_value(self): + # GH#43422 + df = DataFrame(columns=["a", "b"], dtype=object) + df.loc[0] = {"a": np.zeros((2,)), "b": np.zeros((2, 2))} + expected = DataFrame({"a": [np.zeros((2,))], "b": [np.zeros((2, 2))]}) + tm.assert_frame_equal(df, expected) + + # with AM goes through split-path, loses dtype + @td.skip_array_manager_not_yet_implemented + def test_iloc_setitem_nullable_2d_values(self): + df = DataFrame({"A": [1, 2, 3]}, dtype="Int64") + orig = df.copy() + + df.loc[:] = df.values[:, ::-1] + tm.assert_frame_equal(df, orig) + + df.loc[:] = pd.core.arrays.PandasArray(df.values[:, ::-1]) + tm.assert_frame_equal(df, orig) + + df.iloc[:] = df.iloc[:, :] + tm.assert_frame_equal(df, orig) + + @pytest.mark.parametrize( + "null", [pd.NaT, pd.NaT.to_numpy("M8[ns]"), pd.NaT.to_numpy("m8[ns]")] + ) + def test_setting_mismatched_na_into_nullable_fails( + self, null, any_numeric_ea_dtype + ): + # GH#44514 don't cast mismatched nulls to pd.NA + df = DataFrame({"A": [1, 2, 3]}, dtype=any_numeric_ea_dtype) + ser = df["A"] + arr = ser._values + + msg = "|".join( + [ + r"int\(\) argument must be a string, a bytes-like object or a " + "(real )?number, not 'NaTType'", + r"timedelta64\[ns\] cannot be converted to an? (Floating|Integer)Dtype", + r"datetime64\[ns\] cannot be converted to an? (Floating|Integer)Dtype", + "object cannot be converted to a FloatingDtype", + "'values' contains non-numeric NA", + ] + ) + with pytest.raises(TypeError, match=msg): + arr[0] = null + + with pytest.raises(TypeError, match=msg): + arr[:2] = [null, null] + + with pytest.raises(TypeError, match=msg): + ser[0] = null + + with pytest.raises(TypeError, match=msg): + ser[:2] = [null, null] + + with pytest.raises(TypeError, match=msg): + ser.iloc[0] = null + + with pytest.raises(TypeError, match=msg): + ser.iloc[:2] = [null, null] + + with pytest.raises(TypeError, match=msg): + df.iloc[0, 0] = null + + with pytest.raises(TypeError, match=msg): + df.iloc[:2, 0] = [null, null] + + # Multi-Block + df2 = df.copy() + df2["B"] = ser.copy() + with pytest.raises(TypeError, match=msg): + df2.iloc[0, 0] = null + + with pytest.raises(TypeError, match=msg): + df2.iloc[:2, 0] = [null, null] + + def test_loc_expand_empty_frame_keep_index_name(self): + # GH#45621 + df = DataFrame(columns=["b"], index=Index([], name="a")) + df.loc[0] = 1 + expected = DataFrame({"b": [1]}, index=Index([0], name="a")) + tm.assert_frame_equal(df, expected) + + +class TestDataFrameIndexingUInt64: + def test_setitem(self, uint64_frame): + + df = uint64_frame + idx = df["A"].rename("foo") + + # setitem + assert "C" not in df.columns + df["C"] = idx + tm.assert_series_equal(df["C"], Series(idx, name="C")) + + assert "D" not in df.columns + df["D"] = "foo" + df["D"] = idx + tm.assert_series_equal(df["D"], Series(idx, name="D")) + del df["D"] + + # With NaN: because uint64 has no NaN element, + # the column should be cast to object. + df2 = df.copy() + df2.iloc[1, 1] = pd.NaT + df2.iloc[1, 2] = pd.NaT + result = df2["B"] + tm.assert_series_equal(notna(result), Series([True, False, True], name="B")) + tm.assert_series_equal( + df2.dtypes, + Series( + [np.dtype("uint64"), np.dtype("O"), np.dtype("O")], + index=["A", "B", "C"], + ), + ) + + +def test_object_casting_indexing_wraps_datetimelike(using_array_manager): + # GH#31649, check the indexing methods all the way down the stack + df = DataFrame( + { + "A": [1, 2], + "B": date_range("2000", periods=2), + "C": pd.timedelta_range("1 Day", periods=2), + } + ) + + ser = df.loc[0] + assert isinstance(ser.values[1], Timestamp) + assert isinstance(ser.values[2], pd.Timedelta) + + ser = df.iloc[0] + assert isinstance(ser.values[1], Timestamp) + assert isinstance(ser.values[2], pd.Timedelta) + + ser = df.xs(0, axis=0) + assert isinstance(ser.values[1], Timestamp) + assert isinstance(ser.values[2], pd.Timedelta) + + if using_array_manager: + # remainder of the test checking BlockManager internals + return + + mgr = df._mgr + mgr._rebuild_blknos_and_blklocs() + arr = mgr.fast_xs(0) + assert isinstance(arr[1], Timestamp) + assert isinstance(arr[2], pd.Timedelta) + + blk = mgr.blocks[mgr.blknos[1]] + assert blk.dtype == "M8[ns]" # we got the right block + val = blk.iget((0, 0)) + assert isinstance(val, Timestamp) + + blk = mgr.blocks[mgr.blknos[2]] + assert blk.dtype == "m8[ns]" # we got the right block + val = blk.iget((0, 0)) + assert isinstance(val, pd.Timedelta) + + +msg1 = r"Cannot setitem on a Categorical with a new category( \(.*\))?, set the" +msg2 = "Cannot set a Categorical with another, without identical categories" + + +class TestLocILocDataFrameCategorical: + @pytest.fixture + def orig(self): + cats = Categorical(["a", "a", "a", "a", "a", "a", "a"], categories=["a", "b"]) + idx = Index(["h", "i", "j", "k", "l", "m", "n"]) + values = [1, 1, 1, 1, 1, 1, 1] + orig = DataFrame({"cats": cats, "values": values}, index=idx) + return orig + + @pytest.fixture + def exp_single_row(self): + # The expected values if we change a single row + cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) + idx1 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values1 = [1, 1, 2, 1, 1, 1, 1] + exp_single_row = DataFrame({"cats": cats1, "values": values1}, index=idx1) + return exp_single_row + + @pytest.fixture + def exp_multi_row(self): + # assign multiple rows (mixed values) (-> array) -> exp_multi_row + # changed multiple rows + cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) + idx2 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values2 = [1, 1, 2, 2, 1, 1, 1] + exp_multi_row = DataFrame({"cats": cats2, "values": values2}, index=idx2) + return exp_multi_row + + @pytest.fixture + def exp_parts_cats_col(self): + # changed part of the cats column + cats3 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) + idx3 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values3 = [1, 1, 1, 1, 1, 1, 1] + exp_parts_cats_col = DataFrame({"cats": cats3, "values": values3}, index=idx3) + return exp_parts_cats_col + + @pytest.fixture + def exp_single_cats_value(self): + # changed single value in cats col + cats4 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) + idx4 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values4 = [1, 1, 1, 1, 1, 1, 1] + exp_single_cats_value = DataFrame( + {"cats": cats4, "values": values4}, index=idx4 + ) + return exp_single_cats_value + + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_loc_iloc_setitem_list_of_lists(self, orig, exp_multi_row, indexer): + # - assign multiple rows (mixed values) -> exp_multi_row + df = orig.copy() + + key = slice(2, 4) + if indexer is tm.loc: + key = slice("j", "k") + + indexer(df)[key, :] = [["b", 2], ["b", 2]] + tm.assert_frame_equal(df, exp_multi_row) + + df = orig.copy() + with pytest.raises(TypeError, match=msg1): + indexer(df)[key, :] = [["c", 2], ["c", 2]] + + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc, tm.at, tm.iat]) + def test_loc_iloc_at_iat_setitem_single_value_in_categories( + self, orig, exp_single_cats_value, indexer + ): + # - assign a single value -> exp_single_cats_value + df = orig.copy() + + key = (2, 0) + if indexer in [tm.loc, tm.at]: + key = (df.index[2], df.columns[0]) + + # "b" is among the categories for df["cat"}] + indexer(df)[key] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + # "c" is not among the categories for df["cat"] + with pytest.raises(TypeError, match=msg1): + indexer(df)[key] = "c" + + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_loc_iloc_setitem_mask_single_value_in_categories( + self, orig, exp_single_cats_value, indexer + ): + # mask with single True + df = orig.copy() + + mask = df.index == "j" + key = 0 + if indexer is tm.loc: + key = df.columns[key] + + indexer(df)[mask, key] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_loc_iloc_setitem_full_row_non_categorical_rhs( + self, orig, exp_single_row, indexer + ): + # - assign a complete row (mixed values) -> exp_single_row + df = orig.copy() + + key = 2 + if indexer is tm.loc: + key = df.index[2] + + # not categorical dtype, but "b" _is_ among the categories for df["cat"] + indexer(df)[key, :] = ["b", 2] + tm.assert_frame_equal(df, exp_single_row) + + # "c" is not among the categories for df["cat"] + with pytest.raises(TypeError, match=msg1): + indexer(df)[key, :] = ["c", 2] + + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_loc_iloc_setitem_partial_col_categorical_rhs( + self, orig, exp_parts_cats_col, indexer + ): + # assign a part of a column with dtype == categorical -> + # exp_parts_cats_col + df = orig.copy() + + key = (slice(2, 4), 0) + if indexer is tm.loc: + key = (slice("j", "k"), df.columns[0]) + + # same categories as we currently have in df["cats"] + compat = Categorical(["b", "b"], categories=["a", "b"]) + indexer(df)[key] = compat + tm.assert_frame_equal(df, exp_parts_cats_col) + + # categories do not match df["cat"]'s, but "b" is among them + semi_compat = Categorical(list("bb"), categories=list("abc")) + with pytest.raises(TypeError, match=msg2): + # different categories but holdable values + # -> not sure if this should fail or pass + indexer(df)[key] = semi_compat + + # categories do not match df["cat"]'s, and "c" is not among them + incompat = Categorical(list("cc"), categories=list("abc")) + with pytest.raises(TypeError, match=msg2): + # different values + indexer(df)[key] = incompat + + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_loc_iloc_setitem_non_categorical_rhs( + self, orig, exp_parts_cats_col, indexer + ): + # assign a part of a column with dtype != categorical -> exp_parts_cats_col + df = orig.copy() + + key = (slice(2, 4), 0) + if indexer is tm.loc: + key = (slice("j", "k"), df.columns[0]) + + # "b" is among the categories for df["cat"] + indexer(df)[key] = ["b", "b"] + tm.assert_frame_equal(df, exp_parts_cats_col) + + # "c" not part of the categories + with pytest.raises(TypeError, match=msg1): + indexer(df)[key] = ["c", "c"] + + def test_loc_on_multiindex_one_level(self): + # GH#45779 + df = DataFrame( + data=[[0], [1]], + index=MultiIndex.from_tuples([("a",), ("b",)], names=["first"]), + ) + expected = DataFrame( + data=[[0]], index=MultiIndex.from_tuples([("a",)], names=["first"]) + ) + result = df.loc["a"] + tm.assert_frame_equal(result, expected) + + +class TestDepreactedIndexers: + @pytest.mark.parametrize( + "key", [{1}, {1: 1}, ({1}, "a"), ({1: 1}, "a"), (1, {"a"}), (1, {"a": "a"})] + ) + def test_getitem_dict_and_set_deprecated(self, key): + # GH#42825 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] + + @pytest.mark.parametrize( + "key", + [ + {1}, + {1: 1}, + (({1}, 2), "a"), + (({1: 1}, 2), "a"), + ((1, 2), {"a"}), + ((1, 2), {"a": "a"}), + ], + ) + def test_getitem_dict_and_set_deprecated_multiindex(self, key): + # GH#42825 + df = DataFrame( + [[1, 2], [3, 4]], + columns=["a", "b"], + index=MultiIndex.from_tuples([(1, 2), (3, 4)]), + ) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] + + @pytest.mark.parametrize( + "key", [{1}, {1: 1}, ({1}, "a"), ({1: 1}, "a"), (1, {"a"}), (1, {"a": "a"})] + ) + def test_setitem_dict_and_set_deprecated(self, key): + # GH#42825 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] = 1 + + @pytest.mark.parametrize( + "key", + [ + {1}, + {1: 1}, + (({1}, 2), "a"), + (({1: 1}, 2), "a"), + ((1, 2), {"a"}), + ((1, 2), {"a": "a"}), + ], + ) + def test_setitem_dict_and_set_deprecated_multiindex(self, key): + # GH#42825 + df = DataFrame( + [[1, 2], [3, 4]], + columns=["a", "b"], + index=MultiIndex.from_tuples([(1, 2), (3, 4)]), + ) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] = 1 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_insert.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_insert.py new file mode 100644 index 0000000..f67ecf6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_insert.py @@ -0,0 +1,106 @@ +""" +test_insert is specifically for the DataFrame.insert method; not to be +confused with tests with "insert" in their names that are really testing +__setitem__. +""" +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +from pandas import ( + DataFrame, + Index, +) +import pandas._testing as tm + + +class TestDataFrameInsert: + def test_insert(self): + df = DataFrame( + np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"] + ) + + df.insert(0, "foo", df["a"]) + tm.assert_index_equal(df.columns, Index(["foo", "c", "b", "a"])) + tm.assert_series_equal(df["a"], df["foo"], check_names=False) + + df.insert(2, "bar", df["c"]) + tm.assert_index_equal(df.columns, Index(["foo", "c", "bar", "b", "a"])) + tm.assert_almost_equal(df["c"], df["bar"], check_names=False) + + with pytest.raises(ValueError, match="already exists"): + df.insert(1, "a", df["b"]) + + msg = "cannot insert c, already exists" + with pytest.raises(ValueError, match=msg): + df.insert(1, "c", df["b"]) + + df.columns.name = "some_name" + # preserve columns name field + df.insert(0, "baz", df["c"]) + assert df.columns.name == "some_name" + + def test_insert_column_bug_4032(self): + + # GH#4032, inserting a column and renaming causing errors + df = DataFrame({"b": [1.1, 2.2]}) + + df = df.rename(columns={}) + df.insert(0, "a", [1, 2]) + result = df.rename(columns={}) + + str(result) + expected = DataFrame([[1, 1.1], [2, 2.2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + df.insert(0, "c", [1.3, 2.3]) + result = df.rename(columns={}) + + str(result) + expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"]) + tm.assert_frame_equal(result, expected) + + def test_insert_with_columns_dups(self): + # GH#14291 + df = DataFrame() + df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True) + df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True) + df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True) + exp = DataFrame( + [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"] + ) + tm.assert_frame_equal(df, exp) + + def test_insert_item_cache(self, using_array_manager): + df = DataFrame(np.random.randn(4, 3)) + ser = df[0] + + if using_array_manager: + expected_warning = None + else: + # with BlockManager warn about high fragmentation of single dtype + expected_warning = PerformanceWarning + + with tm.assert_produces_warning(expected_warning): + for n in range(100): + df[n + 3] = df[1] * n + + ser.values[0] = 99 + + assert df.iloc[0, 0] == df[0][0] + + def test_insert_EA_no_warning(self): + # PerformanceWarning about fragmented frame should not be raised when + # using EAs (https://github.com/pandas-dev/pandas/issues/44098) + df = DataFrame(np.random.randint(0, 100, size=(3, 100)), dtype="Int64") + with tm.assert_produces_warning(None): + df["a"] = np.array([1, 2, 3]) + + def test_insert_frame(self): + # GH#42403 + df = DataFrame({"col1": [1, 2], "col2": [3, 4]}) + + msg = r"Expected a 1D array, got an array with shape \(2, 2\)" + with pytest.raises(ValueError, match=msg): + df.insert(1, "newcol", df) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_lookup.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_lookup.py new file mode 100644 index 0000000..caab5fe --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_lookup.py @@ -0,0 +1,94 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestLookup: + def test_lookup_float(self, float_frame): + df = float_frame + rows = list(df.index) * len(df.columns) + cols = list(df.columns) * len(df.index) + with tm.assert_produces_warning(FutureWarning): + result = df.lookup(rows, cols) + + expected = np.array([df.loc[r, c] for r, c in zip(rows, cols)]) + tm.assert_numpy_array_equal(result, expected) + + def test_lookup_mixed(self, float_string_frame): + df = float_string_frame + rows = list(df.index) * len(df.columns) + cols = list(df.columns) * len(df.index) + with tm.assert_produces_warning(FutureWarning): + result = df.lookup(rows, cols) + + expected = np.array( + [df.loc[r, c] for r, c in zip(rows, cols)], dtype=np.object_ + ) + tm.assert_almost_equal(result, expected) + + def test_lookup_bool(self): + df = DataFrame( + { + "label": ["a", "b", "a", "c"], + "mask_a": [True, True, False, True], + "mask_b": [True, False, False, False], + "mask_c": [False, True, False, True], + } + ) + with tm.assert_produces_warning(FutureWarning): + df["mask"] = df.lookup(df.index, "mask_" + df["label"]) + + exp_mask = np.array( + [df.loc[r, c] for r, c in zip(df.index, "mask_" + df["label"])] + ) + + tm.assert_series_equal(df["mask"], Series(exp_mask, name="mask")) + assert df["mask"].dtype == np.bool_ + + def test_lookup_raises(self, float_frame): + with pytest.raises(KeyError, match="'One or more row labels was not found'"): + with tm.assert_produces_warning(FutureWarning): + float_frame.lookup(["xyz"], ["A"]) + + with pytest.raises(KeyError, match="'One or more column labels was not found'"): + with tm.assert_produces_warning(FutureWarning): + float_frame.lookup([float_frame.index[0]], ["xyz"]) + + with pytest.raises(ValueError, match="same size"): + with tm.assert_produces_warning(FutureWarning): + float_frame.lookup(["a", "b", "c"], ["a"]) + + def test_lookup_requires_unique_axes(self): + # GH#33041 raise with a helpful error message + df = DataFrame(np.random.randn(6).reshape(3, 2), columns=["A", "A"]) + + rows = [0, 1] + cols = ["A", "A"] + + # homogeneous-dtype case + with pytest.raises(ValueError, match="requires unique index and columns"): + with tm.assert_produces_warning(FutureWarning): + df.lookup(rows, cols) + with pytest.raises(ValueError, match="requires unique index and columns"): + with tm.assert_produces_warning(FutureWarning): + df.T.lookup(cols, rows) + + # heterogeneous dtype + df["B"] = 0 + with pytest.raises(ValueError, match="requires unique index and columns"): + with tm.assert_produces_warning(FutureWarning): + df.lookup(rows, cols) + + +def test_lookup_deprecated(): + # GH#18262 + df = DataFrame( + {"col": ["A", "A", "B", "B"], "A": [80, 23, np.nan, 22], "B": [80, 55, 76, 67]} + ) + with tm.assert_produces_warning(FutureWarning): + df.lookup(df.index, df["col"]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_mask.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_mask.py new file mode 100644 index 0000000..bd7ffe9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_mask.py @@ -0,0 +1,162 @@ +""" +Tests for DataFrame.mask; tests DataFrame.where as a side-effect. +""" + +import numpy as np + +from pandas import ( + NA, + DataFrame, + Series, + StringDtype, + Timedelta, + isna, +) +import pandas._testing as tm + + +class TestDataFrameMask: + def test_mask(self): + df = DataFrame(np.random.randn(5, 3)) + cond = df > 0 + + rs = df.where(cond, np.nan) + tm.assert_frame_equal(rs, df.mask(df <= 0)) + tm.assert_frame_equal(rs, df.mask(~cond)) + + other = DataFrame(np.random.randn(5, 3)) + rs = df.where(cond, other) + tm.assert_frame_equal(rs, df.mask(df <= 0, other)) + tm.assert_frame_equal(rs, df.mask(~cond, other)) + + def test_mask2(self): + # see GH#21891 + df = DataFrame([1, 2]) + res = df.mask([[True], [False]]) + + exp = DataFrame([np.nan, 2]) + tm.assert_frame_equal(res, exp) + + def test_mask_inplace(self): + # GH#8801 + df = DataFrame(np.random.randn(5, 3)) + cond = df > 0 + + rdf = df.copy() + + return_value = rdf.where(cond, inplace=True) + assert return_value is None + tm.assert_frame_equal(rdf, df.where(cond)) + tm.assert_frame_equal(rdf, df.mask(~cond)) + + rdf = df.copy() + return_value = rdf.where(cond, -df, inplace=True) + assert return_value is None + tm.assert_frame_equal(rdf, df.where(cond, -df)) + tm.assert_frame_equal(rdf, df.mask(~cond, -df)) + + def test_mask_edge_case_1xN_frame(self): + # GH#4071 + df = DataFrame([[1, 2]]) + res = df.mask(DataFrame([[True, False]])) + expec = DataFrame([[np.nan, 2]]) + tm.assert_frame_equal(res, expec) + + def test_mask_callable(self): + # GH#12533 + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + result = df.mask(lambda x: x > 4, lambda x: x + 1) + exp = DataFrame([[1, 2, 3], [4, 6, 7], [8, 9, 10]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.mask(df > 4, df + 1)) + + # return ndarray and scalar + result = df.mask(lambda x: (x % 2 == 0).values, lambda x: 99) + exp = DataFrame([[1, 99, 3], [99, 5, 99], [7, 99, 9]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.mask(df % 2 == 0, 99)) + + # chain + result = (df + 2).mask(lambda x: x > 8, lambda x: x + 10) + exp = DataFrame([[3, 4, 5], [6, 7, 8], [19, 20, 21]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10)) + + def test_mask_dtype_bool_conversion(self): + # GH#3733 + df = DataFrame(data=np.random.randn(100, 50)) + df = df.where(df > 0) # create nans + bools = df > 0 + mask = isna(df) + expected = bools.astype(object).mask(mask) + result = bools.mask(mask) + tm.assert_frame_equal(result, expected) + + def test_mask_pos_args_deprecation(self, frame_or_series): + # https://github.com/pandas-dev/pandas/issues/41485 + obj = DataFrame({"a": range(5)}) + expected = DataFrame({"a": [-1, 1, -1, 3, -1]}) + obj = tm.get_obj(obj, frame_or_series) + expected = tm.get_obj(expected, frame_or_series) + + cond = obj % 2 == 0 + msg = ( + r"In a future version of pandas all arguments of " + f"{frame_or_series.__name__}.mask except for " + r"the arguments 'cond' and 'other' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = obj.mask(cond, -1, False) + tm.assert_equal(result, expected) + + +def test_mask_try_cast_deprecated(frame_or_series): + + obj = DataFrame(np.random.randn(4, 3)) + if frame_or_series is not DataFrame: + obj = obj[0] + + mask = obj > 0 + + with tm.assert_produces_warning(FutureWarning): + # try_cast keyword deprecated + obj.mask(mask, -1, try_cast=True) + + +def test_mask_stringdtype(frame_or_series): + # GH 40824 + obj = DataFrame( + {"A": ["foo", "bar", "baz", NA]}, + index=["id1", "id2", "id3", "id4"], + dtype=StringDtype(), + ) + filtered_obj = DataFrame( + {"A": ["this", "that"]}, index=["id2", "id3"], dtype=StringDtype() + ) + expected = DataFrame( + {"A": [NA, "this", "that", NA]}, + index=["id1", "id2", "id3", "id4"], + dtype=StringDtype(), + ) + if frame_or_series is Series: + obj = obj["A"] + filtered_obj = filtered_obj["A"] + expected = expected["A"] + + filter_ser = Series([False, True, True, False]) + result = obj.mask(filter_ser, filtered_obj) + + tm.assert_equal(result, expected) + + +def test_mask_where_dtype_timedelta(): + # https://github.com/pandas-dev/pandas/issues/39548 + df = DataFrame([Timedelta(i, unit="d") for i in range(5)]) + + expected = DataFrame(np.full(5, np.nan, dtype="timedelta64[ns]")) + tm.assert_frame_equal(df.mask(df.notna()), expected) + + expected = DataFrame( + [np.nan, np.nan, np.nan, Timedelta("3 day"), Timedelta("4 day")] + ) + tm.assert_frame_equal(df.where(df > Timedelta(2, unit="d")), expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_set_value.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_set_value.py new file mode 100644 index 0000000..7b68566 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_set_value.py @@ -0,0 +1,68 @@ +import numpy as np + +from pandas.core.dtypes.common import is_float_dtype + +from pandas import ( + DataFrame, + isna, +) + + +class TestSetValue: + def test_set_value(self, float_frame): + for idx in float_frame.index: + for col in float_frame.columns: + float_frame._set_value(idx, col, 1) + assert float_frame[col][idx] == 1 + + def test_set_value_resize(self, float_frame): + + res = float_frame._set_value("foobar", "B", 0) + assert res is None + assert float_frame.index[-1] == "foobar" + assert float_frame._get_value("foobar", "B") == 0 + + float_frame.loc["foobar", "qux"] = 0 + assert float_frame._get_value("foobar", "qux") == 0 + + res = float_frame.copy() + res._set_value("foobar", "baz", "sam") + assert res["baz"].dtype == np.object_ + + res = float_frame.copy() + res._set_value("foobar", "baz", True) + assert res["baz"].dtype == np.object_ + + res = float_frame.copy() + res._set_value("foobar", "baz", 5) + assert is_float_dtype(res["baz"]) + assert isna(res["baz"].drop(["foobar"])).all() + + res._set_value("foobar", "baz", "sam") + assert res.loc["foobar", "baz"] == "sam" + + def test_set_value_with_index_dtype_change(self): + df_orig = DataFrame(np.random.randn(3, 3), index=range(3), columns=list("ABC")) + + # this is actually ambiguous as the 2 is interpreted as a positional + # so column is not created + df = df_orig.copy() + df._set_value("C", 2, 1.0) + assert list(df.index) == list(df_orig.index) + ["C"] + # assert list(df.columns) == list(df_orig.columns) + [2] + + df = df_orig.copy() + df.loc["C", 2] = 1.0 + assert list(df.index) == list(df_orig.index) + ["C"] + # assert list(df.columns) == list(df_orig.columns) + [2] + + # create both new + df = df_orig.copy() + df._set_value("C", "D", 1.0) + assert list(df.index) == list(df_orig.index) + ["C"] + assert list(df.columns) == list(df_orig.columns) + ["D"] + + df = df_orig.copy() + df.loc["C", "D"] = 1.0 + assert list(df.index) == list(df_orig.index) + ["C"] + assert list(df.columns) == list(df_orig.columns) + ["D"] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_setitem.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_setitem.py new file mode 100644 index 0000000..cd0a0a0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_setitem.py @@ -0,0 +1,1160 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.base import _registry as ea_registry +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_interval_dtype, + is_object_dtype, +) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + Interval, + IntervalIndex, + MultiIndex, + NaT, + Period, + PeriodIndex, + Series, + Timestamp, + cut, + date_range, + notna, + period_range, +) +import pandas._testing as tm +from pandas.core.arrays import SparseArray + +from pandas.tseries.offsets import BDay + + +class TestDataFrameSetItem: + def test_setitem_str_subclass(self): + # GH#37366 + class mystring(str): + pass + + data = ["2020-10-22 01:21:00+00:00"] + index = DatetimeIndex(data) + df = DataFrame({"a": [1]}, index=index) + df["b"] = 2 + df[mystring("c")] = 3 + expected = DataFrame({"a": [1], "b": [2], mystring("c"): [3]}, index=index) + tm.assert_equal(df, expected) + + @pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"]) + def test_setitem_dtype(self, dtype, float_frame): + arr = np.random.randn(len(float_frame)) + + float_frame[dtype] = np.array(arr, dtype=dtype) + assert float_frame[dtype].dtype.name == dtype + + def test_setitem_list_not_dataframe(self, float_frame): + data = np.random.randn(len(float_frame), 2) + float_frame[["A", "B"]] = data + tm.assert_almost_equal(float_frame[["A", "B"]].values, data) + + def test_setitem_error_msmgs(self): + + # GH 7432 + df = DataFrame( + {"bar": [1, 2, 3], "baz": ["d", "e", "f"]}, + index=Index(["a", "b", "c"], name="foo"), + ) + ser = Series( + ["g", "h", "i", "j"], + index=Index(["a", "b", "c", "a"], name="foo"), + name="fiz", + ) + msg = "cannot reindex on an axis with duplicate labels" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df["newcol"] = ser + + # GH 4107, more descriptive error message + df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"]) + + msg = "incompatible index of inserted column with frame index" + with pytest.raises(TypeError, match=msg): + df["gr"] = df.groupby(["b", "c"]).count() + + def test_setitem_benchmark(self): + # from the vb_suite/frame_methods/frame_insert_columns + N = 10 + K = 5 + df = DataFrame(index=range(N)) + new_col = np.random.randn(N) + for i in range(K): + df[i] = new_col + expected = DataFrame(np.repeat(new_col, K).reshape(N, K), index=range(N)) + tm.assert_frame_equal(df, expected) + + def test_setitem_different_dtype(self): + df = DataFrame( + np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"] + ) + df.insert(0, "foo", df["a"]) + df.insert(2, "bar", df["c"]) + + # diff dtype + + # new item + df["x"] = df["a"].astype("float32") + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 5 + [np.dtype("float32")], + index=["foo", "c", "bar", "b", "a", "x"], + ) + tm.assert_series_equal(result, expected) + + # replacing current (in different block) + df["a"] = df["a"].astype("float32") + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2, + index=["foo", "c", "bar", "b", "a", "x"], + ) + tm.assert_series_equal(result, expected) + + df["y"] = df["a"].astype("int32") + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2 + [np.dtype("int32")], + index=["foo", "c", "bar", "b", "a", "x", "y"], + ) + tm.assert_series_equal(result, expected) + + def test_setitem_empty_columns(self): + # GH 13522 + df = DataFrame(index=["A", "B", "C"]) + df["X"] = df.index + df["X"] = ["x", "y", "z"] + exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"]) + tm.assert_frame_equal(df, exp) + + def test_setitem_dt64_index_empty_columns(self): + rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") + df = DataFrame(index=np.arange(len(rng))) + + df["A"] = rng + assert df["A"].dtype == np.dtype("M8[ns]") + + def test_setitem_timestamp_empty_columns(self): + # GH#19843 + df = DataFrame(index=range(3)) + df["now"] = Timestamp("20130101", tz="UTC") + + expected = DataFrame( + [[Timestamp("20130101", tz="UTC")]] * 3, index=[0, 1, 2], columns=["now"] + ) + tm.assert_frame_equal(df, expected) + + def test_setitem_wrong_length_categorical_dtype_raises(self): + # GH#29523 + cat = Categorical.from_codes([0, 1, 1, 0, 1, 2], ["a", "b", "c"]) + df = DataFrame(range(10), columns=["bar"]) + + msg = ( + rf"Length of values \({len(cat)}\) " + rf"does not match length of index \({len(df)}\)" + ) + with pytest.raises(ValueError, match=msg): + df["foo"] = cat + + def test_setitem_with_sparse_value(self): + # GH#8131 + df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) + sp_array = SparseArray([0, 0, 1]) + df["new_column"] = sp_array + + expected = Series(sp_array, name="new_column") + tm.assert_series_equal(df["new_column"], expected) + + def test_setitem_with_unaligned_sparse_value(self): + df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) + sp_series = Series(SparseArray([0, 0, 1]), index=[2, 1, 0]) + + df["new_column"] = sp_series + expected = Series(SparseArray([1, 0, 0]), name="new_column") + tm.assert_series_equal(df["new_column"], expected) + + def test_setitem_period_preserves_dtype(self): + # GH: 26861 + data = [Period("2003-12", "D")] + result = DataFrame([]) + result["a"] = data + + expected = DataFrame({"a": data}) + + tm.assert_frame_equal(result, expected) + + def test_setitem_dict_preserves_dtypes(self): + # https://github.com/pandas-dev/pandas/issues/34573 + expected = DataFrame( + { + "a": Series([0, 1, 2], dtype="int64"), + "b": Series([1, 2, 3], dtype=float), + "c": Series([1, 2, 3], dtype=float), + } + ) + df = DataFrame( + { + "a": Series([], dtype="int64"), + "b": Series([], dtype=float), + "c": Series([], dtype=float), + } + ) + for idx, b in enumerate([1, 2, 3]): + df.loc[df.shape[0]] = {"a": int(idx), "b": float(b), "c": float(b)} + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "obj,dtype", + [ + (Period("2020-01"), PeriodDtype("M")), + (Interval(left=0, right=5), IntervalDtype("int64", "right")), + ( + Timestamp("2011-01-01", tz="US/Eastern"), + DatetimeTZDtype(tz="US/Eastern"), + ), + ], + ) + def test_setitem_extension_types(self, obj, dtype): + # GH: 34832 + expected = DataFrame({"idx": [1, 2, 3], "obj": Series([obj] * 3, dtype=dtype)}) + + df = DataFrame({"idx": [1, 2, 3]}) + df["obj"] = obj + + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "ea_name", + [ + dtype.name + for dtype in ea_registry.dtypes + # property would require instantiation + if not isinstance(dtype.name, property) + ] + # mypy doesn't allow adding lists of different types + # https://github.com/python/mypy/issues/5492 + + ["datetime64[ns, UTC]", "period[D]"], # type: ignore[list-item] + ) + def test_setitem_with_ea_name(self, ea_name): + # GH 38386 + result = DataFrame([0]) + result[ea_name] = [1] + expected = DataFrame({0: [0], ea_name: [1]}) + tm.assert_frame_equal(result, expected) + + def test_setitem_dt64_ndarray_with_NaT_and_diff_time_units(self): + # GH#7492 + data_ns = np.array([1, "nat"], dtype="datetime64[ns]") + result = Series(data_ns).to_frame() + result["new"] = data_ns + expected = DataFrame({0: [1, None], "new": [1, None]}, dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + # OutOfBoundsDatetime error shouldn't occur + data_s = np.array([1, "nat"], dtype="datetime64[s]") + result["new"] = data_s + expected = DataFrame({0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) + def test_frame_setitem_datetime64_col_other_units(self, unit): + # Check that non-nano dt64 values get cast to dt64 on setitem + # into a not-yet-existing column + n = 100 + + dtype = np.dtype(f"M8[{unit}]") + vals = np.arange(n, dtype=np.int64).view(dtype) + ex_vals = vals.astype("datetime64[ns]") + + df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) + df[unit] = vals + + assert df[unit].dtype == np.dtype("M8[ns]") + assert (df[unit].values == ex_vals).all() + + @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) + def test_frame_setitem_existing_datetime64_col_other_units(self, unit): + # Check that non-nano dt64 values get cast to dt64 on setitem + # into an already-existing dt64 column + n = 100 + + dtype = np.dtype(f"M8[{unit}]") + vals = np.arange(n, dtype=np.int64).view(dtype) + ex_vals = vals.astype("datetime64[ns]") + + df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) + df["dates"] = np.arange(n, dtype=np.int64).view("M8[ns]") + + # We overwrite existing dt64 column with new, non-nano dt64 vals + df["dates"] = vals + assert (df["dates"].values == ex_vals).all() + + def test_setitem_dt64tz(self, timezone_frame): + + df = timezone_frame + idx = df["B"].rename("foo") + + # setitem + df["C"] = idx + tm.assert_series_equal(df["C"], Series(idx, name="C")) + + df["D"] = "foo" + df["D"] = idx + tm.assert_series_equal(df["D"], Series(idx, name="D")) + del df["D"] + + # assert that A & C are not sharing the same base (e.g. they + # are copies) + v1 = df._mgr.arrays[1] + v2 = df._mgr.arrays[2] + tm.assert_extension_array_equal(v1, v2) + v1base = v1._data.base + v2base = v2._data.base + assert v1base is None or (id(v1base) != id(v2base)) + + # with nan + df2 = df.copy() + df2.iloc[1, 1] = NaT + df2.iloc[1, 2] = NaT + result = df2["B"] + tm.assert_series_equal(notna(result), Series([True, False, True], name="B")) + tm.assert_series_equal(df2.dtypes, df.dtypes) + + def test_setitem_periodindex(self): + rng = period_range("1/1/2000", periods=5, name="index") + df = DataFrame(np.random.randn(5, 3), index=rng) + + df["Index"] = rng + rs = Index(df["Index"]) + tm.assert_index_equal(rs, rng, check_names=False) + assert rs.name == "Index" + assert rng.name == "index" + + rs = df.reset_index().set_index("index") + assert isinstance(rs.index, PeriodIndex) + tm.assert_index_equal(rs.index, rng) + + def test_setitem_complete_column_with_array(self): + # GH#37954 + df = DataFrame({"a": ["one", "two", "three"], "b": [1, 2, 3]}) + arr = np.array([[1, 1], [3, 1], [5, 1]]) + df[["c", "d"]] = arr + expected = DataFrame( + { + "a": ["one", "two", "three"], + "b": [1, 2, 3], + "c": [1, 3, 5], + "d": [1, 1, 1], + } + ) + expected["c"] = expected["c"].astype(arr.dtype) + expected["d"] = expected["d"].astype(arr.dtype) + assert expected["c"].dtype == arr.dtype + assert expected["d"].dtype == arr.dtype + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"]) + def test_setitem_bool_with_numeric_index(self, dtype): + # GH#36319 + cols = Index([1, 2, 3], dtype=dtype) + df = DataFrame(np.random.randn(3, 3), columns=cols) + + df[False] = ["a", "b", "c"] + + expected_cols = Index([1, 2, 3, False], dtype=object) + if dtype == "f8": + expected_cols = Index([1.0, 2.0, 3.0, False], dtype=object) + + tm.assert_index_equal(df.columns, expected_cols) + + @pytest.mark.parametrize("indexer", ["B", ["B"]]) + def test_setitem_frame_length_0_str_key(self, indexer): + # GH#38831 + df = DataFrame(columns=["A", "B"]) + other = DataFrame({"B": [1, 2]}) + df[indexer] = other + expected = DataFrame({"A": [np.nan] * 2, "B": [1, 2]}) + expected["A"] = expected["A"].astype("object") + tm.assert_frame_equal(df, expected) + + def test_setitem_frame_duplicate_columns(self, using_array_manager, request): + # GH#15695 + cols = ["A", "B", "C"] * 2 + df = DataFrame(index=range(3), columns=cols) + df.loc[0, "A"] = (0, 3) + df.loc[:, "B"] = (1, 4) + df["C"] = (2, 5) + expected = DataFrame( + [ + [0, 1, 2, 3, 4, 5], + [np.nan, 1, 2, np.nan, 4, 5], + [np.nan, 1, 2, np.nan, 4, 5], + ], + dtype="object", + ) + + if using_array_manager: + # setitem replaces column so changes dtype + + expected.columns = cols + expected["C"] = expected["C"].astype("int64") + # TODO(ArrayManager) .loc still overwrites + expected["B"] = expected["B"].astype("int64") + + mark = pytest.mark.xfail( + reason="Both 'A' columns get set with 3 instead of 0 and 3" + ) + request.node.add_marker(mark) + else: + # set these with unique columns to be extra-unambiguous + expected[2] = expected[2].astype(np.int64) + expected[5] = expected[5].astype(np.int64) + expected.columns = cols + + tm.assert_frame_equal(df, expected) + + def test_setitem_frame_duplicate_columns_size_mismatch(self): + # GH#39510 + cols = ["A", "B", "C"] * 2 + df = DataFrame(index=range(3), columns=cols) + with pytest.raises(ValueError, match="Columns must be same length as key"): + df[["A"]] = (0, 3, 5) + + df2 = df.iloc[:, :3] # unique columns + with pytest.raises(ValueError, match="Columns must be same length as key"): + df2[["A"]] = (0, 3, 5) + + @pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]]) + def test_setitem_df_wrong_column_number(self, cols): + # GH#38604 + df = DataFrame([[1, 2, 3]], columns=cols) + rhs = DataFrame([[10, 11]], columns=["d", "e"]) + msg = "Columns must be same length as key" + with pytest.raises(ValueError, match=msg): + df["a"] = rhs + + def test_setitem_listlike_indexer_duplicate_columns(self): + # GH#38604 + df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) + rhs = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) + df[["a", "b"]] = rhs + expected = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) + tm.assert_frame_equal(df, expected) + + df[["c", "b"]] = rhs + expected = DataFrame([[10, 11, 12, 10]], columns=["a", "b", "b", "c"]) + tm.assert_frame_equal(df, expected) + + def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self): + # GH#39403 + df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) + rhs = DataFrame([[10, 11]], columns=["a", "b"]) + msg = "Columns must be same length as key" + with pytest.raises(ValueError, match=msg): + df[["a", "b"]] = rhs + + def test_setitem_intervals(self): + + df = DataFrame({"A": range(10)}) + ser = cut(df["A"], 5) + assert isinstance(ser.cat.categories, IntervalIndex) + + # B & D end up as Categoricals + # the remainder are converted to in-line objects + # containing an IntervalIndex.values + df["B"] = ser + df["C"] = np.array(ser) + df["D"] = ser.values + df["E"] = np.array(ser.values) + df["F"] = ser.astype(object) + + assert is_categorical_dtype(df["B"].dtype) + assert is_interval_dtype(df["B"].cat.categories) + assert is_categorical_dtype(df["D"].dtype) + assert is_interval_dtype(df["D"].cat.categories) + + # These go through the Series constructor and so get inferred back + # to IntervalDtype + assert is_interval_dtype(df["C"]) + assert is_interval_dtype(df["E"]) + + # But the Series constructor doesn't do inference on Series objects, + # so setting df["F"] doesn't get cast back to IntervalDtype + assert is_object_dtype(df["F"]) + + # they compare equal as Index + # when converted to numpy objects + c = lambda x: Index(np.array(x)) + tm.assert_index_equal(c(df.B), c(df.B)) + tm.assert_index_equal(c(df.B), c(df.C), check_names=False) + tm.assert_index_equal(c(df.B), c(df.D), check_names=False) + tm.assert_index_equal(c(df.C), c(df.D), check_names=False) + + # B & D are the same Series + tm.assert_series_equal(df["B"], df["B"]) + tm.assert_series_equal(df["B"], df["D"], check_names=False) + + # C & E are the same Series + tm.assert_series_equal(df["C"], df["C"]) + tm.assert_series_equal(df["C"], df["E"], check_names=False) + + def test_setitem_categorical(self): + # GH#35369 + df = DataFrame({"h": Series(list("mn")).astype("category")}) + df.h = df.h.cat.reorder_categories(["n", "m"]) + expected = DataFrame( + {"h": Categorical(["m", "n"]).reorder_categories(["n", "m"])} + ) + tm.assert_frame_equal(df, expected) + + def test_setitem_with_empty_listlike(self): + # GH#17101 + index = Index([], name="idx") + result = DataFrame(columns=["A"], index=index) + result["A"] = [] + expected = DataFrame(columns=["A"], index=index) + tm.assert_index_equal(result.index, expected.index) + + @pytest.mark.parametrize( + "cols, values, expected", + [ + (["C", "D", "D", "a"], [1, 2, 3, 4], 4), # with duplicates + (["D", "C", "D", "a"], [1, 2, 3, 4], 4), # mixed order + (["C", "B", "B", "a"], [1, 2, 3, 4], 4), # other duplicate cols + (["C", "B", "a"], [1, 2, 3], 3), # no duplicates + (["B", "C", "a"], [3, 2, 1], 1), # alphabetical order + (["C", "a", "B"], [3, 2, 1], 2), # in the middle + ], + ) + def test_setitem_same_column(self, cols, values, expected): + # GH#23239 + df = DataFrame([values], columns=cols) + df["a"] = df["a"] + result = df["a"].values[0] + assert result == expected + + def test_setitem_multi_index(self): + # GH#7655, test that assigning to a sub-frame of a frame + # with multi-index columns aligns both rows and columns + it = ["jim", "joe", "jolie"], ["first", "last"], ["left", "center", "right"] + + cols = MultiIndex.from_product(it) + index = date_range("20141006", periods=20) + vals = np.random.randint(1, 1000, (len(index), len(cols))) + df = DataFrame(vals, columns=cols, index=index) + + i, j = df.index.values.copy(), it[-1][:] + + np.random.shuffle(i) + df["jim"] = df["jolie"].loc[i, ::-1] + tm.assert_frame_equal(df["jim"], df["jolie"]) + + np.random.shuffle(j) + df[("joe", "first")] = df[("jolie", "last")].loc[i, j] + tm.assert_frame_equal(df[("joe", "first")], df[("jolie", "last")]) + + np.random.shuffle(j) + df[("joe", "last")] = df[("jolie", "first")].loc[i, j] + tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")]) + + @pytest.mark.parametrize( + "columns,box,expected", + [ + ( + ["A", "B", "C", "D"], + 7, + DataFrame( + [[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["C", "D"], + [7, 8], + DataFrame( + [[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["A", "B", "C"], + np.array([7, 8, 9], dtype=np.int64), + DataFrame([[7, 8, 9], [7, 8, 9], [7, 8, 9]], columns=["A", "B", "C"]), + ), + ( + ["B", "C", "D"], + [[7, 8, 9], [10, 11, 12], [13, 14, 15]], + DataFrame( + [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["C", "A", "D"], + np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.int64), + DataFrame( + [[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["A", "C"], + DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), + DataFrame( + [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"] + ), + ), + ], + ) + def test_setitem_list_missing_columns(self, columns, box, expected): + # GH#29334 + df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) + df[columns] = box + tm.assert_frame_equal(df, expected) + + def test_setitem_list_of_tuples(self, float_frame): + tuples = list(zip(float_frame["A"], float_frame["B"])) + float_frame["tuples"] = tuples + + result = float_frame["tuples"] + expected = Series(tuples, index=float_frame.index, name="tuples") + tm.assert_series_equal(result, expected) + + def test_setitem_iloc_generator(self): + # GH#39614 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + indexer = (x for x in [1, 2]) + df.iloc[indexer] = 1 + expected = DataFrame({"a": [1, 1, 1], "b": [4, 1, 1]}) + tm.assert_frame_equal(df, expected) + + def test_setitem_iloc_two_dimensional_generator(self): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + indexer = (x for x in [1, 2]) + df.iloc[indexer, 1] = 1 + expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]}) + tm.assert_frame_equal(df, expected) + + def test_setitem_dtypes_bytes_type_to_object(self): + # GH 20734 + index = Series(name="id", dtype="S24") + df = DataFrame(index=index) + df["a"] = Series(name="a", index=index, dtype=np.uint32) + df["b"] = Series(name="b", index=index, dtype="S64") + df["c"] = Series(name="c", index=index, dtype="S64") + df["d"] = Series(name="d", index=index, dtype=np.uint8) + result = df.dtypes + expected = Series([np.uint32, object, object, np.uint8], index=list("abcd")) + tm.assert_series_equal(result, expected) + + def test_boolean_mask_nullable_int64(self): + # GH 28928 + result = DataFrame({"a": [3, 4], "b": [5, 6]}).astype( + {"a": "int64", "b": "Int64"} + ) + mask = Series(False, index=result.index) + result.loc[mask, "a"] = result["a"] + result.loc[mask, "b"] = result["b"] + expected = DataFrame({"a": [3, 4], "b": [5, 6]}).astype( + {"a": "int64", "b": "Int64"} + ) + tm.assert_frame_equal(result, expected) + + # TODO(ArrayManager) set column with 2d column array, see #44788 + @td.skip_array_manager_not_yet_implemented + def test_setitem_npmatrix_2d(self): + # GH#42376 + # for use-case df["x"] = sparse.random(10, 10).mean(axis=1) + expected = DataFrame( + {"np-array": np.ones(10), "np-matrix": np.ones(10)}, index=np.arange(10) + ) + + a = np.ones((10, 1)) + df = DataFrame(index=np.arange(10)) + df["np-array"] = a + + # Instantiation of `np.matrix` gives PendingDeprecationWarning + with tm.assert_produces_warning(PendingDeprecationWarning): + df["np-matrix"] = np.matrix(a) + + tm.assert_frame_equal(df, expected) + + +class TestSetitemTZAwareValues: + @pytest.fixture + def idx(self): + naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B") + idx = naive.tz_localize("US/Pacific") + return idx + + @pytest.fixture + def expected(self, idx): + expected = Series(np.array(idx.tolist(), dtype="object"), name="B") + assert expected.dtype == idx.dtype + return expected + + def test_setitem_dt64series(self, idx, expected): + # convert to utc + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + df["B"] = idx + + with tm.assert_produces_warning(FutureWarning) as m: + df["B"] = idx.to_series(keep_tz=False, index=[0, 1]) + msg = "do 'idx.tz_convert(None)' before calling" + assert msg in str(m[0].message) + + result = df["B"] + comp = Series(idx.tz_convert("UTC").tz_localize(None), name="B") + tm.assert_series_equal(result, comp) + + def test_setitem_datetimeindex(self, idx, expected): + # setting a DataFrame column with a tzaware DTI retains the dtype + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + + # assign to frame + df["B"] = idx + result = df["B"] + tm.assert_series_equal(result, expected) + + def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected): + # setting a DataFrame column with a tzaware DTI retains the dtype + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + + # object array of datetimes with a tz + df["B"] = idx.to_pydatetime() + result = df["B"] + tm.assert_series_equal(result, expected) + + +class TestDataFrameSetItemWithExpansion: + def test_setitem_listlike_views(self): + # GH#38148 + df = DataFrame({"a": [1, 2, 3], "b": [4, 4, 6]}) + + # get one column as a view of df + ser = df["a"] + + # add columns with list-like indexer + df[["c", "d"]] = np.array([[0.1, 0.2], [0.3, 0.4], [0.4, 0.5]]) + + # edit in place the first column to check view semantics + df.iloc[0, 0] = 100 + + expected = Series([100, 2, 3], name="a") + tm.assert_series_equal(ser, expected) + + def test_setitem_string_column_numpy_dtype_raising(self): + # GH#39010 + df = DataFrame([[1, 2], [3, 4]]) + df["0 - Name"] = [5, 6] + expected = DataFrame([[1, 2, 5], [3, 4, 6]], columns=[0, 1, "0 - Name"]) + tm.assert_frame_equal(df, expected) + + def test_setitem_empty_df_duplicate_columns(self): + # GH#38521 + df = DataFrame(columns=["a", "b", "b"], dtype="float64") + df.loc[:, "a"] = list(range(2)) + expected = DataFrame( + [[0, np.nan, np.nan], [1, np.nan, np.nan]], columns=["a", "b", "b"] + ) + tm.assert_frame_equal(df, expected) + + def test_setitem_with_expansion_categorical_dtype(self): + # assignment + df = DataFrame( + {"value": np.array(np.random.randint(0, 10000, 100), dtype="int32")} + ) + labels = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) + + df = df.sort_values(by=["value"], ascending=True) + ser = cut(df.value, range(0, 10500, 500), right=False, labels=labels) + cat = ser.values + + # setting with a Categorical + df["D"] = cat + str(df) + + result = df.dtypes + expected = Series( + [np.dtype("int32"), CategoricalDtype(categories=labels, ordered=False)], + index=["value", "D"], + ) + tm.assert_series_equal(result, expected) + + # setting with a Series + df["E"] = ser + str(df) + + result = df.dtypes + expected = Series( + [ + np.dtype("int32"), + CategoricalDtype(categories=labels, ordered=False), + CategoricalDtype(categories=labels, ordered=False), + ], + index=["value", "D", "E"], + ) + tm.assert_series_equal(result, expected) + + result1 = df["D"] + result2 = df["E"] + tm.assert_categorical_equal(result1._mgr.array, cat) + + # sorting + ser.name = "E" + tm.assert_series_equal(result2.sort_index(), ser.sort_index()) + + def test_setitem_scalars_no_index(self): + # GH#16823 / GH#17894 + df = DataFrame() + df["foo"] = 1 + expected = DataFrame(columns=["foo"]).astype(np.int64) + tm.assert_frame_equal(df, expected) + + def test_setitem_newcol_tuple_key(self, float_frame): + assert ( + "A", + "B", + ) not in float_frame.columns + float_frame["A", "B"] = float_frame["A"] + assert ("A", "B") in float_frame.columns + + result = float_frame["A", "B"] + expected = float_frame["A"] + tm.assert_series_equal(result, expected, check_names=False) + + def test_frame_setitem_newcol_timestamp(self): + # GH#2155 + columns = date_range(start="1/1/2012", end="2/1/2012", freq=BDay()) + data = DataFrame(columns=columns, index=range(10)) + t = datetime(2012, 11, 1) + ts = Timestamp(t) + data[ts] = np.nan # works, mostly a smoke-test + assert np.isnan(data[ts]).all() + + +class TestDataFrameSetItemSlicing: + def test_setitem_slice_position(self): + # GH#31469 + df = DataFrame(np.zeros((100, 1))) + df[-4:] = 1 + arr = np.zeros((100, 1)) + arr[-4:] = 1 + expected = DataFrame(arr) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("indexer", [tm.setitem, tm.iloc]) + @pytest.mark.parametrize("box", [Series, np.array, list, pd.array]) + @pytest.mark.parametrize("n", [1, 2, 3]) + def test_setitem_slice_indexer_broadcasting_rhs(self, n, box, indexer): + # GH#40440 + df = DataFrame([[1, 3, 5]] + [[2, 4, 6]] * n, columns=["a", "b", "c"]) + indexer(df)[1:] = box([10, 11, 12]) + expected = DataFrame([[1, 3, 5]] + [[10, 11, 12]] * n, columns=["a", "b", "c"]) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("box", [Series, np.array, list, pd.array]) + @pytest.mark.parametrize("n", [1, 2, 3]) + def test_setitem_list_indexer_broadcasting_rhs(self, n, box): + # GH#40440 + df = DataFrame([[1, 3, 5]] + [[2, 4, 6]] * n, columns=["a", "b", "c"]) + df.iloc[list(range(1, n + 1))] = box([10, 11, 12]) + expected = DataFrame([[1, 3, 5]] + [[10, 11, 12]] * n, columns=["a", "b", "c"]) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("indexer", [tm.setitem, tm.iloc]) + @pytest.mark.parametrize("box", [Series, np.array, list, pd.array]) + @pytest.mark.parametrize("n", [1, 2, 3]) + def test_setitem_slice_broadcasting_rhs_mixed_dtypes(self, n, box, indexer): + # GH#40440 + df = DataFrame( + [[1, 3, 5], ["x", "y", "z"]] + [[2, 4, 6]] * n, columns=["a", "b", "c"] + ) + indexer(df)[1:] = box([10, 11, 12]) + expected = DataFrame( + [[1, 3, 5]] + [[10, 11, 12]] * (n + 1), + columns=["a", "b", "c"], + dtype="object", + ) + tm.assert_frame_equal(df, expected) + + +class TestDataFrameSetItemCallable: + def test_setitem_callable(self): + # GH#12533 + df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}) + df[lambda x: "A"] = [11, 12, 13, 14] + + exp = DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]}) + tm.assert_frame_equal(df, exp) + + def test_setitem_other_callable(self): + # GH#13299 + def inc(x): + return x + 1 + + df = DataFrame([[-1, 1], [1, -1]]) + df[df > 0] = inc + + expected = DataFrame([[-1, inc], [inc, -1]]) + tm.assert_frame_equal(df, expected) + + +class TestDataFrameSetItemBooleanMask: + @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values + @pytest.mark.parametrize( + "mask_type", + [lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values], + ids=["dataframe", "array"], + ) + def test_setitem_boolean_mask(self, mask_type, float_frame): + + # Test for issue #18582 + df = float_frame.copy() + mask = mask_type(df) + + # index with boolean mask + result = df.copy() + result[mask] = np.nan + + expected = df.copy() + expected.values[np.array(mask)] = np.nan + tm.assert_frame_equal(result, expected) + + @pytest.mark.xfail(reason="Currently empty indexers are treated as all False") + @pytest.mark.parametrize("box", [list, np.array, Series]) + def test_setitem_loc_empty_indexer_raises_with_non_empty_value(self, box): + # GH#37672 + df = DataFrame({"a": ["a"], "b": [1], "c": [1]}) + if box == Series: + indexer = box([], dtype="object") + else: + indexer = box([]) + msg = "Must have equal len keys and value when setting with an iterable" + with pytest.raises(ValueError, match=msg): + df.loc[indexer, ["b"]] = [1] + + @pytest.mark.parametrize("box", [list, np.array, Series]) + def test_setitem_loc_only_false_indexer_dtype_changed(self, box): + # GH#37550 + # Dtype is only changed when value to set is a Series and indexer is + # empty/bool all False + df = DataFrame({"a": ["a"], "b": [1], "c": [1]}) + indexer = box([False]) + df.loc[indexer, ["b"]] = 10 - df["c"] + expected = DataFrame({"a": ["a"], "b": [1], "c": [1]}) + tm.assert_frame_equal(df, expected) + + df.loc[indexer, ["b"]] = 9 + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc]) + def test_setitem_boolean_mask_aligning(self, indexer): + # GH#39931 + df = DataFrame({"a": [1, 4, 2, 3], "b": [5, 6, 7, 8]}) + expected = df.copy() + mask = df["a"] >= 3 + indexer(df)[mask] = indexer(df)[mask].sort_values("a") + tm.assert_frame_equal(df, expected) + + def test_setitem_mask_categorical(self): + # assign multiple rows (mixed values) (-> array) -> exp_multi_row + # changed multiple rows + cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) + idx2 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values2 = [1, 1, 2, 2, 1, 1, 1] + exp_multi_row = DataFrame({"cats": cats2, "values": values2}, index=idx2) + + catsf = Categorical( + ["a", "a", "c", "c", "a", "a", "a"], categories=["a", "b", "c"] + ) + idxf = Index(["h", "i", "j", "k", "l", "m", "n"]) + valuesf = [1, 1, 3, 3, 1, 1, 1] + df = DataFrame({"cats": catsf, "values": valuesf}, index=idxf) + + exp_fancy = exp_multi_row.copy() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # issue #37643 inplace kwarg deprecated + return_value = exp_fancy["cats"].cat.set_categories( + ["a", "b", "c"], inplace=True + ) + assert return_value is None + + mask = df["cats"] == "c" + df[mask] = ["b", 2] + # category c is kept in .categories + tm.assert_frame_equal(df, exp_fancy) + + @pytest.mark.parametrize("dtype", ["float", "int64"]) + @pytest.mark.parametrize("kwargs", [{}, {"index": [1]}, {"columns": ["A"]}]) + def test_setitem_empty_frame_with_boolean(self, dtype, kwargs): + # see GH#10126 + kwargs["dtype"] = dtype + df = DataFrame(**kwargs) + + df2 = df.copy() + df[df > df2] = 47 + tm.assert_frame_equal(df, df2) + + def test_setitem_boolean_indexing(self): + idx = list(range(3)) + cols = ["A", "B", "C"] + df1 = DataFrame( + index=idx, + columns=cols, + data=np.array( + [[0.0, 0.5, 1.0], [1.5, 2.0, 2.5], [3.0, 3.5, 4.0]], dtype=float + ), + ) + df2 = DataFrame(index=idx, columns=cols, data=np.ones((len(idx), len(cols)))) + + expected = DataFrame( + index=idx, + columns=cols, + data=np.array([[0.0, 0.5, 1.0], [1.5, 2.0, -1], [-1, -1, -1]], dtype=float), + ) + + df1[df1 > 2.0 * df2] = -1 + tm.assert_frame_equal(df1, expected) + with pytest.raises(ValueError, match="Item wrong length"): + df1[df1.index[:-1] > 2] = -1 + + def test_loc_setitem_all_false_boolean_two_blocks(self): + # GH#40885 + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": "a"}) + expected = df.copy() + indexer = Series([False, False], name="c") + df.loc[indexer, ["b"]] = DataFrame({"b": [5, 6]}, index=[0, 1]) + tm.assert_frame_equal(df, expected) + + +class TestDataFrameSetitemCopyViewSemantics: + def test_setitem_always_copy(self, float_frame): + assert "E" not in float_frame.columns + s = float_frame["A"].copy() + float_frame["E"] = s + + float_frame["E"][5:10] = np.nan + assert notna(s[5:10]).all() + + @pytest.mark.parametrize("consolidate", [True, False]) + def test_setitem_partial_column_inplace(self, consolidate, using_array_manager): + # This setting should be in-place, regardless of whether frame is + # single-block or multi-block + # GH#304 this used to be incorrectly not-inplace, in which case + # we needed to ensure _item_cache was cleared. + + df = DataFrame( + {"x": [1.1, 2.1, 3.1, 4.1], "y": [5.1, 6.1, 7.1, 8.1]}, index=[0, 1, 2, 3] + ) + df.insert(2, "z", np.nan) + if not using_array_manager: + if consolidate: + df._consolidate_inplace() + assert len(df._mgr.blocks) == 1 + else: + assert len(df._mgr.blocks) == 2 + + zvals = df["z"]._values + + df.loc[2:, "z"] = 42 + + expected = Series([np.nan, np.nan, 42, 42], index=df.index, name="z") + tm.assert_series_equal(df["z"], expected) + + # check setting occurred in-place + tm.assert_numpy_array_equal(zvals, expected.values) + assert np.shares_memory(zvals, df["z"]._values) + if not consolidate: + assert df["z"]._values is zvals + + def test_setitem_duplicate_columns_not_inplace(self): + # GH#39510 + cols = ["A", "B"] * 2 + df = DataFrame(0.0, index=[0], columns=cols) + df_copy = df.copy() + df_view = df[:] + df["B"] = (2, 5) + + expected = DataFrame([[0.0, 2, 0.0, 5]], columns=cols) + tm.assert_frame_equal(df_view, df_copy) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "value", [1, np.array([[1], [1]], dtype="int64"), [[1], [1]]] + ) + def test_setitem_same_dtype_not_inplace(self, value, using_array_manager, request): + # GH#39510 + cols = ["A", "B"] + df = DataFrame(0, index=[0, 1], columns=cols) + df_copy = df.copy() + df_view = df[:] + df[["B"]] = value + + expected = DataFrame([[0, 1], [0, 1]], columns=cols) + tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df_view, df_copy) + + @pytest.mark.parametrize("value", [1.0, np.array([[1.0], [1.0]]), [[1.0], [1.0]]]) + def test_setitem_listlike_key_scalar_value_not_inplace(self, value): + # GH#39510 + cols = ["A", "B"] + df = DataFrame(0, index=[0, 1], columns=cols) + df_copy = df.copy() + df_view = df[:] + df[["B"]] = value + + expected = DataFrame([[0, 1.0], [0, 1.0]], columns=cols) + tm.assert_frame_equal(df_view, df_copy) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "indexer", + [ + "a", + ["a"], + pytest.param( + [True, False], + marks=pytest.mark.xfail( + reason="Boolean indexer incorrectly setting inplace", + strict=False, # passing on some builds, no obvious pattern + ), + ), + ], + ) + @pytest.mark.parametrize( + "value, set_value", + [ + (1, 5), + (1.0, 5.0), + (Timestamp("2020-12-31"), Timestamp("2021-12-31")), + ("a", "b"), + ], + ) + def test_setitem_not_operating_inplace(self, value, set_value, indexer): + # GH#43406 + df = DataFrame({"a": value}, index=[0, 1]) + expected = df.copy() + view = df[:] + df[indexer] = set_value + tm.assert_frame_equal(view, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_take.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_take.py new file mode 100644 index 0000000..3b59d3c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_take.py @@ -0,0 +1,88 @@ +import pytest + +import pandas._testing as tm + + +class TestDataFrameTake: + def test_take(self, float_frame): + # homogeneous + order = [3, 1, 2, 0] + for df in [float_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["D", "B", "C", "A"]] + tm.assert_frame_equal(result, expected, check_names=False) + + # negative indices + order = [2, 1, -1] + for df in [float_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + result = df.take(order, axis=0) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["C", "B", "D"]] + tm.assert_frame_equal(result, expected, check_names=False) + + # illegal indices + msg = "indices are out-of-bounds" + with pytest.raises(IndexError, match=msg): + df.take([3, 1, 2, 30], axis=0) + with pytest.raises(IndexError, match=msg): + df.take([3, 1, 2, -31], axis=0) + with pytest.raises(IndexError, match=msg): + df.take([3, 1, 2, 5], axis=1) + with pytest.raises(IndexError, match=msg): + df.take([3, 1, 2, -5], axis=1) + + def test_take_mixed_type(self, float_string_frame): + + # mixed-dtype + order = [4, 1, 2, 0, 3] + for df in [float_string_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["foo", "B", "C", "A", "D"]] + tm.assert_frame_equal(result, expected) + + # negative indices + order = [4, 1, -2] + for df in [float_string_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["foo", "B", "D"]] + tm.assert_frame_equal(result, expected) + + def test_take_mixed_numeric(self, mixed_float_frame, mixed_int_frame): + # by dtype + order = [1, 2, 0, 3] + for df in [mixed_float_frame, mixed_int_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["B", "C", "A", "D"]] + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_where.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_where.py new file mode 100644 index 0000000..9b6e04d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_where.py @@ -0,0 +1,929 @@ +from datetime import datetime + +from hypothesis import given +import numpy as np +import pytest + +from pandas.compat import np_version_under1p19 + +from pandas.core.dtypes.common import is_scalar + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + StringDtype, + Timestamp, + date_range, + isna, +) +import pandas._testing as tm +from pandas._testing._hypothesis import OPTIONAL_ONE_OF_ALL + + +@pytest.fixture(params=["default", "float_string", "mixed_float", "mixed_int"]) +def where_frame(request, float_string_frame, mixed_float_frame, mixed_int_frame): + if request.param == "default": + return DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"]) + if request.param == "float_string": + return float_string_frame + if request.param == "mixed_float": + return mixed_float_frame + if request.param == "mixed_int": + return mixed_int_frame + + +def _safe_add(df): + # only add to the numeric items + def is_ok(s): + return ( + issubclass(s.dtype.type, (np.integer, np.floating)) and s.dtype != "uint8" + ) + + return DataFrame(dict((c, s + 1) if is_ok(s) else (c, s) for c, s in df.items())) + + +class TestDataFrameIndexingWhere: + def test_where_get(self, where_frame, float_string_frame): + def _check_get(df, cond, check_dtypes=True): + other1 = _safe_add(df) + rs = df.where(cond, other1) + rs2 = df.where(cond.values, other1) + for k, v in rs.items(): + exp = Series(np.where(cond[k], df[k], other1[k]), index=v.index) + tm.assert_series_equal(v, exp, check_names=False) + tm.assert_frame_equal(rs, rs2) + + # dtypes + if check_dtypes: + assert (rs.dtypes == df.dtypes).all() + + # check getting + df = where_frame + if df is float_string_frame: + msg = "'>' not supported between instances of 'str' and 'int'" + with pytest.raises(TypeError, match=msg): + df > 0 + return + cond = df > 0 + _check_get(df, cond) + + def test_where_upcasting(self): + # upcasting case (GH # 2794) + df = DataFrame( + { + c: Series([1] * 3, dtype=c) + for c in ["float32", "float64", "int32", "int64"] + } + ) + df.iloc[1, :] = 0 + result = df.dtypes + expected = Series( + [ + np.dtype("float32"), + np.dtype("float64"), + np.dtype("int32"), + np.dtype("int64"), + ], + index=["float32", "float64", "int32", "int64"], + ) + + # when we don't preserve boolean casts + # + # expected = Series({ 'float32' : 1, 'float64' : 3 }) + + tm.assert_series_equal(result, expected) + + def test_where_alignment(self, where_frame, float_string_frame, mixed_int_frame): + # aligning + def _check_align(df, cond, other, check_dtypes=True): + rs = df.where(cond, other) + for i, k in enumerate(rs.columns): + result = rs[k] + d = df[k].values + c = cond[k].reindex(df[k].index).fillna(False).values + + if is_scalar(other): + o = other + else: + if isinstance(other, np.ndarray): + o = Series(other[:, i], index=result.index).values + else: + o = other[k].values + + new_values = d if c.all() else np.where(c, d, o) + expected = Series(new_values, index=result.index, name=k) + + # since we can't always have the correct numpy dtype + # as numpy doesn't know how to downcast, don't check + tm.assert_series_equal(result, expected, check_dtype=False) + + # dtypes + # can't check dtype when other is an ndarray + + if check_dtypes and not isinstance(other, np.ndarray): + assert (rs.dtypes == df.dtypes).all() + + df = where_frame + if df is float_string_frame: + msg = "'>' not supported between instances of 'str' and 'int'" + with pytest.raises(TypeError, match=msg): + df > 0 + return + + # other is a frame + cond = (df > 0)[1:] + _check_align(df, cond, _safe_add(df)) + + # check other is ndarray + cond = df > 0 + warn = None + if df is mixed_int_frame: + warn = FutureWarning + with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"): + _check_align(df, cond, (_safe_add(df).values)) + + # integers are upcast, so don't check the dtypes + cond = df > 0 + check_dtypes = all(not issubclass(s.type, np.integer) for s in df.dtypes) + _check_align(df, cond, np.nan, check_dtypes=check_dtypes) + + def test_where_invalid(self): + # invalid conditions + df = DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"]) + cond = df > 0 + + err1 = (df + 1).values[0:2, :] + msg = "other must be the same shape as self when an ndarray" + with pytest.raises(ValueError, match=msg): + df.where(cond, err1) + + err2 = cond.iloc[:2, :].values + other1 = _safe_add(df) + msg = "Array conditional must be same shape as self" + with pytest.raises(ValueError, match=msg): + df.where(err2, other1) + + with pytest.raises(ValueError, match=msg): + df.mask(True) + with pytest.raises(ValueError, match=msg): + df.mask(0) + + def test_where_set(self, where_frame, float_string_frame): + # where inplace + + def _check_set(df, cond, check_dtypes=True): + dfi = df.copy() + econd = cond.reindex_like(df).fillna(True) + expected = dfi.mask(~econd) + + return_value = dfi.where(cond, np.nan, inplace=True) + assert return_value is None + tm.assert_frame_equal(dfi, expected) + + # dtypes (and confirm upcasts)x + if check_dtypes: + for k, v in df.dtypes.items(): + if issubclass(v.type, np.integer) and not cond[k].all(): + v = np.dtype("float64") + assert dfi[k].dtype == v + + df = where_frame + if df is float_string_frame: + msg = "'>' not supported between instances of 'str' and 'int'" + with pytest.raises(TypeError, match=msg): + df > 0 + return + + cond = df > 0 + _check_set(df, cond) + + cond = df >= 0 + _check_set(df, cond) + + # aligning + cond = (df >= 0)[1:] + _check_set(df, cond) + + def test_where_series_slicing(self): + # GH 10218 + # test DataFrame.where with Series slicing + df = DataFrame({"a": range(3), "b": range(4, 7)}) + result = df.where(df["a"] == 1) + expected = df[df["a"] == 1].reindex(df.index) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("klass", [list, tuple, np.array]) + def test_where_array_like(self, klass): + # see gh-15414 + df = DataFrame({"a": [1, 2, 3]}) + cond = [[False], [True], [True]] + expected = DataFrame({"a": [np.nan, 2, 3]}) + + result = df.where(klass(cond)) + tm.assert_frame_equal(result, expected) + + df["b"] = 2 + expected["b"] = [2, np.nan, 2] + cond = [[False, True], [True, False], [True, True]] + + result = df.where(klass(cond)) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "cond", + [ + [[1], [0], [1]], + Series([[2], [5], [7]]), + DataFrame({"a": [2, 5, 7]}), + [["True"], ["False"], ["True"]], + [[Timestamp("2017-01-01")], [pd.NaT], [Timestamp("2017-01-02")]], + ], + ) + def test_where_invalid_input_single(self, cond): + # see gh-15414: only boolean arrays accepted + df = DataFrame({"a": [1, 2, 3]}) + msg = "Boolean array expected for the condition" + + with pytest.raises(ValueError, match=msg): + df.where(cond) + + @pytest.mark.parametrize( + "cond", + [ + [[0, 1], [1, 0], [1, 1]], + Series([[0, 2], [5, 0], [4, 7]]), + [["False", "True"], ["True", "False"], ["True", "True"]], + DataFrame({"a": [2, 5, 7], "b": [4, 8, 9]}), + [ + [pd.NaT, Timestamp("2017-01-01")], + [Timestamp("2017-01-02"), pd.NaT], + [Timestamp("2017-01-03"), Timestamp("2017-01-03")], + ], + ], + ) + def test_where_invalid_input_multiple(self, cond): + # see gh-15414: only boolean arrays accepted + df = DataFrame({"a": [1, 2, 3], "b": [2, 2, 2]}) + msg = "Boolean array expected for the condition" + + with pytest.raises(ValueError, match=msg): + df.where(cond) + + def test_where_dataframe_col_match(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + cond = DataFrame([[True, False, True], [False, False, True]]) + + result = df.where(cond) + expected = DataFrame([[1.0, np.nan, 3], [np.nan, np.nan, 6]]) + tm.assert_frame_equal(result, expected) + + # this *does* align, though has no matching columns + cond.columns = ["a", "b", "c"] + result = df.where(cond) + expected = DataFrame(np.nan, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + def test_where_ndframe_align(self): + msg = "Array conditional must be same shape as self" + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + + cond = [True] + with pytest.raises(ValueError, match=msg): + df.where(cond) + + expected = DataFrame([[1, 2, 3], [np.nan, np.nan, np.nan]]) + + out = df.where(Series(cond)) + tm.assert_frame_equal(out, expected) + + cond = np.array([False, True, False, True]) + with pytest.raises(ValueError, match=msg): + df.where(cond) + + expected = DataFrame([[np.nan, np.nan, np.nan], [4, 5, 6]]) + + out = df.where(Series(cond)) + tm.assert_frame_equal(out, expected) + + def test_where_bug(self): + # see gh-2793 + df = DataFrame( + {"a": [1.0, 2.0, 3.0, 4.0], "b": [4.0, 3.0, 2.0, 1.0]}, dtype="float64" + ) + expected = DataFrame( + {"a": [np.nan, np.nan, 3.0, 4.0], "b": [4.0, 3.0, np.nan, np.nan]}, + dtype="float64", + ) + result = df.where(df > 2, np.nan) + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result.where(result > 2, np.nan, inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + def test_where_bug_mixed(self, any_signed_int_numpy_dtype): + # see gh-2793 + df = DataFrame( + { + "a": np.array([1, 2, 3, 4], dtype=any_signed_int_numpy_dtype), + "b": np.array([4.0, 3.0, 2.0, 1.0], dtype="float64"), + } + ) + + expected = DataFrame( + {"a": [np.nan, np.nan, 3.0, 4.0], "b": [4.0, 3.0, np.nan, np.nan]}, + dtype="float64", + ) + + result = df.where(df > 2, np.nan) + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result.where(result > 2, np.nan, inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + def test_where_bug_transposition(self): + # see gh-7506 + a = DataFrame({0: [1, 2], 1: [3, 4], 2: [5, 6]}) + b = DataFrame({0: [np.nan, 8], 1: [9, np.nan], 2: [np.nan, np.nan]}) + do_not_replace = b.isna() | (a > b) + + expected = a.copy() + expected[~do_not_replace] = b + + result = a.where(do_not_replace, b) + tm.assert_frame_equal(result, expected) + + a = DataFrame({0: [4, 6], 1: [1, 0]}) + b = DataFrame({0: [np.nan, 3], 1: [3, np.nan]}) + do_not_replace = b.isna() | (a > b) + + expected = a.copy() + expected[~do_not_replace] = b + + result = a.where(do_not_replace, b) + tm.assert_frame_equal(result, expected) + + def test_where_datetime(self): + + # GH 3311 + df = DataFrame( + { + "A": date_range("20130102", periods=5), + "B": date_range("20130104", periods=5), + "C": np.random.randn(5), + } + ) + + stamp = datetime(2013, 1, 3) + msg = "'>' not supported between instances of 'float' and 'datetime.datetime'" + with pytest.raises(TypeError, match=msg): + df > stamp + + result = df[df.iloc[:, :-1] > stamp] + + expected = df.copy() + expected.loc[[0, 1], "A"] = np.nan + expected.loc[:, "C"] = np.nan + tm.assert_frame_equal(result, expected) + + def test_where_none(self): + # GH 4667 + # setting with None changes dtype + df = DataFrame({"series": Series(range(10))}).astype(float) + df[df > 7] = None + expected = DataFrame( + {"series": Series([0, 1, 2, 3, 4, 5, 6, 7, np.nan, np.nan])} + ) + tm.assert_frame_equal(df, expected) + + # GH 7656 + df = DataFrame( + [ + {"A": 1, "B": np.nan, "C": "Test"}, + {"A": np.nan, "B": "Test", "C": np.nan}, + ] + ) + msg = "boolean setting on mixed-type" + + with pytest.raises(TypeError, match=msg): + df.where(~isna(df), None, inplace=True) + + def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self): + # see gh-21947 + df = DataFrame(columns=["a"]) + cond = df + assert (cond.dtypes == object).all() + + result = df.where(cond) + tm.assert_frame_equal(result, df) + + def test_where_align(self): + def create(): + df = DataFrame(np.random.randn(10, 3)) + df.iloc[3:5, 0] = np.nan + df.iloc[4:6, 1] = np.nan + df.iloc[5:8, 2] = np.nan + return df + + # series + df = create() + expected = df.fillna(df.mean()) + result = df.where(pd.notna(df), df.mean(), axis="columns") + tm.assert_frame_equal(result, expected) + + return_value = df.where(pd.notna(df), df.mean(), inplace=True, axis="columns") + assert return_value is None + tm.assert_frame_equal(df, expected) + + df = create().fillna(0) + expected = df.apply(lambda x, y: x.where(x > 0, y), y=df[0]) + result = df.where(df > 0, df[0], axis="index") + tm.assert_frame_equal(result, expected) + result = df.where(df > 0, df[0], axis="rows") + tm.assert_frame_equal(result, expected) + + # frame + df = create() + expected = df.fillna(1) + result = df.where( + pd.notna(df), DataFrame(1, index=df.index, columns=df.columns) + ) + tm.assert_frame_equal(result, expected) + + def test_where_complex(self): + # GH 6345 + expected = DataFrame([[1 + 1j, 2], [np.nan, 4 + 1j]], columns=["a", "b"]) + df = DataFrame([[1 + 1j, 2], [5 + 1j, 4 + 1j]], columns=["a", "b"]) + df[df.abs() >= 5] = np.nan + tm.assert_frame_equal(df, expected) + + def test_where_axis(self, using_array_manager): + # GH 9736 + df = DataFrame(np.random.randn(2, 2)) + mask = DataFrame([[False, False], [False, False]]) + s = Series([0, 1]) + + expected = DataFrame([[0, 0], [1, 1]], dtype="float64") + result = df.where(mask, s, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result.where(mask, s, axis="index", inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[0, 1], [0, 1]], dtype="float64") + result = df.where(mask, s, axis="columns") + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result.where(mask, s, axis="columns", inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + # Upcast needed + df = DataFrame([[1, 2], [3, 4]], dtype="int64") + mask = DataFrame([[False, False], [False, False]]) + s = Series([0, np.nan]) + + expected = DataFrame([[0, 0], [np.nan, np.nan]], dtype="float64") + result = df.where(mask, s, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result.where(mask, s, axis="index", inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + warn = FutureWarning if using_array_manager else None + expected = DataFrame([[0, np.nan], [0, np.nan]]) + with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"): + result = df.where(mask, s, axis="columns") + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + { + 0: np.array([0, 0], dtype="int64"), + 1: np.array([np.nan, np.nan], dtype="float64"), + } + ) + result = df.copy() + return_value = result.where(mask, s, axis="columns", inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + def test_where_axis_multiple_dtypes(self): + # Multiple dtypes (=> multiple Blocks) + df = pd.concat( + [ + DataFrame(np.random.randn(10, 2)), + DataFrame(np.random.randint(0, 10, size=(10, 2)), dtype="int64"), + ], + ignore_index=True, + axis=1, + ) + mask = DataFrame(False, columns=df.columns, index=df.index) + s1 = Series(1, index=df.columns) + s2 = Series(2, index=df.index) + + result = df.where(mask, s1, axis="columns") + expected = DataFrame(1.0, columns=df.columns, index=df.index) + expected[2] = expected[2].astype("int64") + expected[3] = expected[3].astype("int64") + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result.where(mask, s1, axis="columns", inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + result = df.where(mask, s2, axis="index") + expected = DataFrame(2.0, columns=df.columns, index=df.index) + expected[2] = expected[2].astype("int64") + expected[3] = expected[3].astype("int64") + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result.where(mask, s2, axis="index", inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + # DataFrame vs DataFrame + d1 = df.copy().drop(1, axis=0) + expected = df.copy() + expected.loc[1, :] = np.nan + + result = df.where(mask, d1) + tm.assert_frame_equal(result, expected) + result = df.where(mask, d1, axis="index") + tm.assert_frame_equal(result, expected) + result = df.copy() + return_value = result.where(mask, d1, inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + result = df.copy() + return_value = result.where(mask, d1, inplace=True, axis="index") + assert return_value is None + tm.assert_frame_equal(result, expected) + + d2 = df.copy().drop(1, axis=1) + expected = df.copy() + expected.loc[:, 1] = np.nan + + result = df.where(mask, d2) + tm.assert_frame_equal(result, expected) + result = df.where(mask, d2, axis="columns") + tm.assert_frame_equal(result, expected) + result = df.copy() + return_value = result.where(mask, d2, inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + result = df.copy() + return_value = result.where(mask, d2, inplace=True, axis="columns") + assert return_value is None + tm.assert_frame_equal(result, expected) + + def test_where_callable(self): + # GH 12533 + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + result = df.where(lambda x: x > 4, lambda x: x + 1) + exp = DataFrame([[2, 3, 4], [5, 5, 6], [7, 8, 9]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.where(df > 4, df + 1)) + + # return ndarray and scalar + result = df.where(lambda x: (x % 2 == 0).values, lambda x: 99) + exp = DataFrame([[99, 2, 99], [4, 99, 6], [99, 8, 99]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.where(df % 2 == 0, 99)) + + # chain + result = (df + 2).where(lambda x: x > 8, lambda x: x + 10) + exp = DataFrame([[13, 14, 15], [16, 17, 18], [9, 10, 11]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, (df + 2).where((df + 2) > 8, (df + 2) + 10)) + + def test_where_tz_values(self, tz_naive_fixture, frame_or_series): + obj1 = DataFrame( + DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture), + columns=["date"], + ) + obj2 = DataFrame( + DatetimeIndex(["20150103", "20150104", "20150105"], tz=tz_naive_fixture), + columns=["date"], + ) + mask = DataFrame([True, True, False], columns=["date"]) + exp = DataFrame( + DatetimeIndex(["20150101", "20150102", "20150105"], tz=tz_naive_fixture), + columns=["date"], + ) + if frame_or_series is Series: + obj1 = obj1["date"] + obj2 = obj2["date"] + mask = mask["date"] + exp = exp["date"] + + result = obj1.where(mask, obj2) + tm.assert_equal(exp, result) + + def test_df_where_change_dtype(self): + # GH#16979 + df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + mask = np.array([[True, False, False], [False, False, True]]) + + result = df.where(mask) + expected = DataFrame( + [[0, np.nan, np.nan], [np.nan, np.nan, 5]], columns=list("ABC") + ) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("kwargs", [{}, {"other": None}]) + def test_df_where_with_category(self, kwargs): + # GH#16979 + df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + mask = np.array([[True, False, False], [False, False, True]]) + + # change type to category + df.A = df.A.astype("category") + df.B = df.B.astype("category") + df.C = df.C.astype("category") + + result = df.where(mask, **kwargs) + A = pd.Categorical([0, np.nan], categories=[0, 3]) + B = pd.Categorical([np.nan, np.nan], categories=[1, 4]) + C = pd.Categorical([np.nan, 5], categories=[2, 5]) + expected = DataFrame({"A": A, "B": B, "C": C}) + + tm.assert_frame_equal(result, expected) + + # Check Series.where while we're here + result = df.A.where(mask[:, 0], **kwargs) + expected = Series(A, name="A") + + tm.assert_series_equal(result, expected) + + def test_where_categorical_filtering(self): + # GH#22609 Verify filtering operations on DataFrames with categorical Series + df = DataFrame(data=[[0, 0], [1, 1]], columns=["a", "b"]) + df["b"] = df["b"].astype("category") + + result = df.where(df["a"] > 0) + expected = df.copy() + expected.loc[0, :] = np.nan + + tm.assert_equal(result, expected) + + def test_where_ea_other(self): + # GH#38729/GH#38742 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + arr = pd.array([7, pd.NA, 9]) + ser = Series(arr) + mask = np.ones(df.shape, dtype=bool) + mask[1, :] = False + + # TODO: ideally we would get Int64 instead of object + result = df.where(mask, ser, axis=0) + expected = DataFrame({"A": [1, pd.NA, 3], "B": [4, pd.NA, 6]}).astype(object) + tm.assert_frame_equal(result, expected) + + ser2 = Series(arr[:2], index=["A", "B"]) + expected = DataFrame({"A": [1, 7, 3], "B": [4, pd.NA, 6]}) + expected["B"] = expected["B"].astype(object) + result = df.where(mask, ser2, axis=1) + tm.assert_frame_equal(result, expected) + + def test_where_interval_noop(self): + # GH#44181 + df = DataFrame([pd.Interval(0, 0)]) + res = df.where(df.notna()) + tm.assert_frame_equal(res, df) + + ser = df[0] + res = ser.where(ser.notna()) + tm.assert_series_equal(res, ser) + + @pytest.mark.parametrize( + "dtype", + [ + "timedelta64[ns]", + "datetime64[ns]", + "datetime64[ns, Asia/Tokyo]", + "Period[D]", + ], + ) + def test_where_datetimelike_noop(self, dtype): + # GH#45135, analogue to GH#44181 for Period don't raise on no-op + # For td64/dt64/dt64tz we already don't raise, but also are + # checking that we don't unnecessarily upcast to object. + ser = Series(np.arange(3) * 10 ** 9, dtype=np.int64).view(dtype) + df = ser.to_frame() + mask = np.array([False, False, False]) + + res = ser.where(~mask, "foo") + tm.assert_series_equal(res, ser) + + mask2 = mask.reshape(-1, 1) + res2 = df.where(~mask2, "foo") + tm.assert_frame_equal(res2, df) + + res3 = ser.mask(mask, "foo") + tm.assert_series_equal(res3, ser) + + res4 = df.mask(mask2, "foo") + tm.assert_frame_equal(res4, df) + + +def test_where_try_cast_deprecated(frame_or_series): + obj = DataFrame(np.random.randn(4, 3)) + obj = tm.get_obj(obj, frame_or_series) + + mask = obj > 0 + + with tm.assert_produces_warning(FutureWarning): + # try_cast keyword deprecated + obj.where(mask, -1, try_cast=False) + + +def test_where_int_downcasting_deprecated(using_array_manager): + # GH#44597 + arr = np.arange(6).astype(np.int16).reshape(3, 2) + df = DataFrame(arr) + + mask = np.zeros(arr.shape, dtype=bool) + mask[:, 0] = True + + msg = "Downcasting integer-dtype" + warn = FutureWarning if not using_array_manager else None + with tm.assert_produces_warning(warn, match=msg): + res = df.where(mask, 2 ** 17) + + expected = DataFrame({0: arr[:, 0], 1: np.array([2 ** 17] * 3, dtype=np.int32)}) + tm.assert_frame_equal(res, expected) + + +def test_where_copies_with_noop(frame_or_series): + # GH-39595 + result = frame_or_series([1, 2, 3, 4]) + expected = result.copy() + col = result[0] if frame_or_series is DataFrame else result + + where_res = result.where(col < 5) + where_res *= 2 + + tm.assert_equal(result, expected) + + where_res = result.where(col > 5, [1, 2, 3, 4]) + where_res *= 2 + + tm.assert_equal(result, expected) + + +def test_where_string_dtype(frame_or_series): + # GH40824 + obj = frame_or_series( + ["a", "b", "c", "d"], index=["id1", "id2", "id3", "id4"], dtype=StringDtype() + ) + filtered_obj = frame_or_series( + ["b", "c"], index=["id2", "id3"], dtype=StringDtype() + ) + filter_ser = Series([False, True, True, False]) + + result = obj.where(filter_ser, filtered_obj) + expected = frame_or_series( + [pd.NA, "b", "c", pd.NA], + index=["id1", "id2", "id3", "id4"], + dtype=StringDtype(), + ) + tm.assert_equal(result, expected) + + +def test_where_bool_comparison(): + # GH 10336 + df_mask = DataFrame( + {"AAA": [True] * 4, "BBB": [False] * 4, "CCC": [True, False, True, False]} + ) + result = df_mask.where(df_mask == False) # noqa:E712 + expected = DataFrame( + { + "AAA": np.array([np.nan] * 4, dtype=object), + "BBB": [False] * 4, + "CCC": [np.nan, False, np.nan, False], + } + ) + tm.assert_frame_equal(result, expected) + + +def test_where_none_nan_coerce(): + # GH 15613 + expected = DataFrame( + { + "A": [Timestamp("20130101"), pd.NaT, Timestamp("20130103")], + "B": [1, 2, np.nan], + } + ) + result = expected.where(expected.notnull(), None) + tm.assert_frame_equal(result, expected) + + +def test_where_non_keyword_deprecation(frame_or_series): + # GH 41485 + obj = frame_or_series(range(5)) + msg = ( + "In a future version of pandas all arguments of " + f"{frame_or_series.__name__}.where except for the arguments 'cond' " + "and 'other' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = obj.where(obj > 1, 10, False) + expected = frame_or_series([10, 10, 2, 3, 4]) + tm.assert_equal(expected, result) + + +def test_where_columns_casting(): + # GH 42295 + + df = DataFrame({"a": [1.0, 2.0], "b": [3, np.nan]}) + expected = df.copy() + result = df.where(pd.notnull(df), None) + # make sure dtypes don't change + tm.assert_frame_equal(expected, result) + + +@pytest.mark.parametrize("as_cat", [True, False]) +def test_where_period_invalid_na(frame_or_series, as_cat, request): + # GH#44697 + idx = pd.period_range("2016-01-01", periods=3, freq="D") + if as_cat: + idx = idx.astype("category") + obj = frame_or_series(idx) + + # NA value that we should *not* cast to Period dtype + tdnat = pd.NaT.to_numpy("m8[ns]") + + mask = np.array([True, True, False], ndmin=obj.ndim).T + + if as_cat: + msg = ( + r"Cannot setitem on a Categorical with a new category \(NaT\), " + "set the categories first" + ) + if np_version_under1p19: + mark = pytest.mark.xfail( + reason="When evaluating the f-string to generate the exception " + "message, numpy somehow ends up trying to cast None to int, so " + "ends up raising TypeError but with an unrelated message." + ) + request.node.add_marker(mark) + else: + msg = "value should be a 'Period'" + + with pytest.raises(TypeError, match=msg): + obj.where(mask, tdnat) + + with pytest.raises(TypeError, match=msg): + obj.mask(mask, tdnat) + + with pytest.raises(TypeError, match=msg): + obj.mask(mask, tdnat, inplace=True) + + +def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype): + # GH#44697 + arr = pd.array([1, 2, 3], dtype=any_numeric_ea_dtype) + obj = frame_or_series(arr) + + mask = np.array([True, True, False], ndmin=obj.ndim).T + + msg = "|".join( + [ + r"datetime64\[.{1,2}\] cannot be converted to an? (Integer|Floating)Dtype", + r"timedelta64\[.{1,2}\] cannot be converted to an? (Integer|Floating)Dtype", + r"int\(\) argument must be a string, a bytes-like object or a number, " + "not 'NaTType'", + "object cannot be converted to a FloatingDtype", + "'values' contains non-numeric NA", + ] + ) + + for null in tm.NP_NAT_OBJECTS + [pd.NaT]: + # NaT is an NA value that we should *not* cast to pd.NA dtype + with pytest.raises(TypeError, match=msg): + obj.where(mask, null) + + with pytest.raises(TypeError, match=msg): + obj.mask(mask, null) + + +@given(data=OPTIONAL_ONE_OF_ALL) +def test_where_inplace_casting(data): + # GH 22051 + df = DataFrame({"a": data}) + df_copy = df.where(pd.notnull(df), None).copy() + df.where(pd.notnull(df), None, inplace=True) + tm.assert_equal(df, df_copy) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_xs.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_xs.py new file mode 100644 index 0000000..c6938ab --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/indexing/test_xs.py @@ -0,0 +1,392 @@ +import re + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + IndexSlice, + MultiIndex, + Series, + concat, +) +import pandas._testing as tm +import pandas.core.common as com + +from pandas.tseries.offsets import BDay + + +@pytest.fixture +def four_level_index_dataframe(): + arr = np.array( + [ + [-0.5109, -2.3358, -0.4645, 0.05076, 0.364], + [0.4473, 1.4152, 0.2834, 1.00661, 0.1744], + [-0.6662, -0.5243, -0.358, 0.89145, 2.5838], + ] + ) + index = MultiIndex( + levels=[["a", "x"], ["b", "q"], [10.0032, 20.0, 30.0], [3, 4, 5]], + codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]], + names=["one", "two", "three", "four"], + ) + return DataFrame(arr, index=index, columns=list("ABCDE")) + + +class TestXS: + def test_xs(self, float_frame, datetime_frame): + idx = float_frame.index[5] + xs = float_frame.xs(idx) + for item, value in xs.items(): + if np.isnan(value): + assert np.isnan(float_frame[item][idx]) + else: + assert value == float_frame[item][idx] + + # mixed-type xs + test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} + frame = DataFrame(test_data) + xs = frame.xs("1") + assert xs.dtype == np.object_ + assert xs["A"] == 1 + assert xs["B"] == "1" + + with pytest.raises( + KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00', freq='B')") + ): + datetime_frame.xs(datetime_frame.index[0] - BDay()) + + # xs get column + series = float_frame.xs("A", axis=1) + expected = float_frame["A"] + tm.assert_series_equal(series, expected) + + # view is returned if possible + series = float_frame.xs("A", axis=1) + series[:] = 5 + assert (expected == 5).all() + + def test_xs_corner(self): + # pathological mixed-type reordering case + df = DataFrame(index=[0]) + df["A"] = 1.0 + df["B"] = "foo" + df["C"] = 2.0 + df["D"] = "bar" + df["E"] = 3.0 + + xs = df.xs(0) + exp = Series([1.0, "foo", 2.0, "bar", 3.0], index=list("ABCDE"), name=0) + tm.assert_series_equal(xs, exp) + + # no columns but Index(dtype=object) + df = DataFrame(index=["a", "b", "c"]) + result = df.xs("a") + expected = Series([], name="a", index=Index([]), dtype=np.float64) + tm.assert_series_equal(result, expected) + + def test_xs_duplicates(self): + df = DataFrame(np.random.randn(5, 2), index=["b", "b", "c", "b", "a"]) + + cross = df.xs("c") + exp = df.iloc[2] + tm.assert_series_equal(cross, exp) + + def test_xs_keep_level(self): + df = DataFrame( + { + "day": {0: "sat", 1: "sun"}, + "flavour": {0: "strawberry", 1: "strawberry"}, + "sales": {0: 10, 1: 12}, + "year": {0: 2008, 1: 2008}, + } + ).set_index(["year", "flavour", "day"]) + result = df.xs("sat", level="day", drop_level=False) + expected = df[:1] + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = df.xs([2008, "sat"], level=["year", "day"], drop_level=False) + tm.assert_frame_equal(result, expected) + + def test_xs_view(self, using_array_manager): + # in 0.14 this will return a view if possible a copy otherwise, but + # this is numpy dependent + + dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5)) + + if using_array_manager: + # INFO(ArrayManager) with ArrayManager getting a row as a view is + # not possible + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + dm.xs(2)[:] = 20 + assert not (dm.xs(2) == 20).any() + else: + dm.xs(2)[:] = 20 + assert (dm.xs(2) == 20).all() + + +class TestXSWithMultiIndex: + def test_xs_doc_example(self): + # TODO: more descriptive name + # based on example in advanced.rst + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = list(zip(*arrays)) + + index = MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index) + + result = df.xs(("one", "bar"), level=("second", "first"), axis=1) + + expected = df.iloc[:, [0]] + tm.assert_frame_equal(result, expected) + + def test_xs_integer_key(self): + # see GH#2107 + dates = range(20111201, 20111205) + ids = list("abcde") + index = MultiIndex.from_product([dates, ids], names=["date", "secid"]) + df = DataFrame(np.random.randn(len(index), 3), index, ["X", "Y", "Z"]) + + result = df.xs(20111201, level="date") + expected = df.loc[20111201, :] + tm.assert_frame_equal(result, expected) + + def test_xs_level(self, multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.xs("two", level="second") + expected = df[df.index.get_level_values(1) == "two"] + expected.index = Index(["foo", "bar", "baz", "qux"], name="first") + tm.assert_frame_equal(result, expected) + + def test_xs_level_eq_2(self): + arr = np.random.randn(3, 5) + index = MultiIndex( + levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]], + codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]], + ) + df = DataFrame(arr, index=index) + expected = DataFrame(arr[1:2], index=[["a"], ["b"]]) + result = df.xs("c", level=2) + tm.assert_frame_equal(result, expected) + + def test_xs_setting_with_copy_error(self, multiindex_dataframe_random_data): + # this is a copy in 0.14 + df = multiindex_dataframe_random_data + result = df.xs("two", level="second") + + # setting this will give a SettingWithCopyError + # as we are trying to write a view + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + result[:] = 10 + + def test_xs_setting_with_copy_error_multiple(self, four_level_index_dataframe): + # this is a copy in 0.14 + df = four_level_index_dataframe + result = df.xs(("a", 4), level=["one", "four"]) + + # setting this will give a SettingWithCopyError + # as we are trying to write a view + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + result[:] = 10 + + @pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])]) + def test_xs_with_duplicates(self, key, level, multiindex_dataframe_random_data): + # see GH#13719 + frame = multiindex_dataframe_random_data + df = concat([frame] * 2) + assert df.index.is_unique is False + expected = concat([frame.xs("one", level="second")] * 2) + + if isinstance(key, list): + with tm.assert_produces_warning(FutureWarning): + result = df.xs(key, level=level) + else: + result = df.xs(key, level=level) + tm.assert_frame_equal(result, expected) + + def test_xs_missing_values_in_index(self): + # see GH#6574 + # missing values in returned index should be preserved + acc = [ + ("a", "abcde", 1), + ("b", "bbcde", 2), + ("y", "yzcde", 25), + ("z", "xbcde", 24), + ("z", None, 26), + ("z", "zbcde", 25), + ("z", "ybcde", 26), + ] + df = DataFrame(acc, columns=["a1", "a2", "cnt"]).set_index(["a1", "a2"]) + expected = DataFrame( + {"cnt": [24, 26, 25, 26]}, + index=Index(["xbcde", np.nan, "zbcde", "ybcde"], name="a2"), + ) + + result = df.xs("z", level="a1") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "key, level, exp_arr, exp_index", + [ + ("a", "lvl0", lambda x: x[:, 0:2], Index(["bar", "foo"], name="lvl1")), + ("foo", "lvl1", lambda x: x[:, 1:2], Index(["a"], name="lvl0")), + ], + ) + def test_xs_named_levels_axis_eq_1(self, key, level, exp_arr, exp_index): + # see GH#2903 + arr = np.random.randn(4, 4) + index = MultiIndex( + levels=[["a", "b"], ["bar", "foo", "hello", "world"]], + codes=[[0, 0, 1, 1], [0, 1, 2, 3]], + names=["lvl0", "lvl1"], + ) + df = DataFrame(arr, columns=index) + result = df.xs(key, level=level, axis=1) + expected = DataFrame(exp_arr(arr), columns=exp_index) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "indexer", + [ + lambda df: df.xs(("a", 4), level=["one", "four"]), + lambda df: df.xs("a").xs(4, level="four"), + ], + ) + def test_xs_level_multiple(self, indexer, four_level_index_dataframe): + df = four_level_index_dataframe + expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]] + expected_index = MultiIndex( + levels=[["q"], [20.0]], codes=[[0], [0]], names=["two", "three"] + ) + expected = DataFrame( + expected_values, index=expected_index, columns=list("ABCDE") + ) + result = indexer(df) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "indexer", [lambda df: df.xs("a", level=0), lambda df: df.xs("a")] + ) + def test_xs_level0(self, indexer, four_level_index_dataframe): + df = four_level_index_dataframe + expected_values = [ + [-0.5109, -2.3358, -0.4645, 0.05076, 0.364], + [0.4473, 1.4152, 0.2834, 1.00661, 0.1744], + ] + expected_index = MultiIndex( + levels=[["b", "q"], [10.0032, 20.0], [4, 5]], + codes=[[0, 1], [0, 1], [1, 0]], + names=["two", "three", "four"], + ) + expected = DataFrame( + expected_values, index=expected_index, columns=list("ABCDE") + ) + + result = indexer(df) + tm.assert_frame_equal(result, expected) + + def test_xs_values(self, multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.xs(("bar", "two")).values + expected = df.values[4] + tm.assert_almost_equal(result, expected) + + def test_xs_loc_equality(self, multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.xs(("bar", "two")) + expected = df.loc[("bar", "two")] + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("klass", [DataFrame, Series]) + def test_xs_IndexSlice_argument_not_implemented(self, klass): + # GH#35301 + + index = MultiIndex( + levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + ) + + obj = DataFrame(np.random.randn(6, 4), index=index) + if klass is Series: + obj = obj[0] + + expected = obj.iloc[-2:].droplevel(0) + + result = obj.xs(IndexSlice[("foo", "qux", 0), :]) + tm.assert_equal(result, expected) + + result = obj.loc[IndexSlice[("foo", "qux", 0), :]] + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("klass", [DataFrame, Series]) + def test_xs_levels_raises(self, klass): + obj = DataFrame({"A": [1, 2, 3]}) + if klass is Series: + obj = obj["A"] + + msg = "Index must be a MultiIndex" + with pytest.raises(TypeError, match=msg): + obj.xs(0, level="as") + + def test_xs_multiindex_droplevel_false(self): + # GH#19056 + mi = MultiIndex.from_tuples( + [("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"] + ) + df = DataFrame([[1, 2, 3]], columns=mi) + result = df.xs("a", axis=1, drop_level=False) + expected = DataFrame( + [[1, 2]], + columns=MultiIndex.from_tuples( + [("a", "x"), ("a", "y")], names=["level1", "level2"] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_xs_droplevel_false(self): + # GH#19056 + df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"])) + result = df.xs("a", axis=1, drop_level=False) + expected = DataFrame({"a": [1]}) + tm.assert_frame_equal(result, expected) + + def test_xs_droplevel_false_view(self, using_array_manager): + # GH#37832 + df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"])) + result = df.xs("a", axis=1, drop_level=False) + # check that result still views the same data as df + assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values) + # modifying original df also modifies result when having a single block + df.iloc[0, 0] = 2 + expected = DataFrame({"a": [2]}) + tm.assert_frame_equal(result, expected) + + # with mixed dataframe, modifying the parent doesn't modify result + # TODO the "split" path behaves differently here as with single block + df = DataFrame([[1, 2.5, "a"]], columns=Index(["a", "b", "c"])) + result = df.xs("a", axis=1, drop_level=False) + df.iloc[0, 0] = 2 + if using_array_manager: + # Here the behavior is consistent + expected = DataFrame({"a": [2]}) + else: + # FIXME: iloc does not update the array inplace using + # "split" path + expected = DataFrame({"a": [1]}) + tm.assert_frame_equal(result, expected) + + def test_xs_list_indexer_droplevel_false(self): + # GH#41760 + mi = MultiIndex.from_tuples([("x", "m", "a"), ("x", "n", "b"), ("y", "o", "c")]) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi) + with tm.assert_produces_warning(FutureWarning): + with pytest.raises(KeyError, match="y"): + df.xs(["x", "y"], drop_level=False, axis=1) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__init__.py new file mode 100644 index 0000000..245594b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__init__.py @@ -0,0 +1,7 @@ +""" +Test files dedicated to individual (stand-alone) DataFrame methods + +Ideally these files/tests should correspond 1-to-1 with tests.series.methods + +These may also present opportunities for sharing/de-duplicating test code. +""" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..bec0d90 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_add_prefix_suffix.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_add_prefix_suffix.cpython-38.pyc new file mode 100644 index 0000000..13b6eb5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_add_prefix_suffix.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_align.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_align.cpython-38.pyc new file mode 100644 index 0000000..7bf3005 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_align.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_append.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_append.cpython-38.pyc new file mode 100644 index 0000000..d5c26b1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_append.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_asfreq.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_asfreq.cpython-38.pyc new file mode 100644 index 0000000..675bbd5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_asfreq.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_asof.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_asof.cpython-38.pyc new file mode 100644 index 0000000..4455387 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_asof.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_assign.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_assign.cpython-38.pyc new file mode 100644 index 0000000..abecf1b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_assign.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..d26c6d8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_at_time.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_at_time.cpython-38.pyc new file mode 100644 index 0000000..8c2ccd1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_at_time.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_between_time.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_between_time.cpython-38.pyc new file mode 100644 index 0000000..f9543f9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_between_time.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_clip.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_clip.cpython-38.pyc new file mode 100644 index 0000000..dcca33f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_clip.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_combine.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_combine.cpython-38.pyc new file mode 100644 index 0000000..bb36d92 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_combine.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_combine_first.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_combine_first.cpython-38.pyc new file mode 100644 index 0000000..2a72081 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_combine_first.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_compare.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_compare.cpython-38.pyc new file mode 100644 index 0000000..d1d124e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_compare.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_convert.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_convert.cpython-38.pyc new file mode 100644 index 0000000..e8cd127 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_convert.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_convert_dtypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_convert_dtypes.cpython-38.pyc new file mode 100644 index 0000000..02c8bd9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_convert_dtypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_copy.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_copy.cpython-38.pyc new file mode 100644 index 0000000..d655a68 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_copy.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_count.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_count.cpython-38.pyc new file mode 100644 index 0000000..634177c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_count.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_count_with_level_deprecated.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_count_with_level_deprecated.cpython-38.pyc new file mode 100644 index 0000000..f64fc32 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_count_with_level_deprecated.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_cov_corr.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_cov_corr.cpython-38.pyc new file mode 100644 index 0000000..0723848 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_cov_corr.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_describe.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_describe.cpython-38.pyc new file mode 100644 index 0000000..4f5dea8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_describe.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_diff.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_diff.cpython-38.pyc new file mode 100644 index 0000000..890ba6c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_diff.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_dot.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_dot.cpython-38.pyc new file mode 100644 index 0000000..8d697ed Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_dot.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_drop.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_drop.cpython-38.pyc new file mode 100644 index 0000000..8352458 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_drop.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_drop_duplicates.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_drop_duplicates.cpython-38.pyc new file mode 100644 index 0000000..9138b84 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_drop_duplicates.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_droplevel.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_droplevel.cpython-38.pyc new file mode 100644 index 0000000..d4b3851 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_droplevel.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_dropna.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_dropna.cpython-38.pyc new file mode 100644 index 0000000..5584d08 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_dropna.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_dtypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_dtypes.cpython-38.pyc new file mode 100644 index 0000000..e0e2ded Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_dtypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_duplicated.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_duplicated.cpython-38.pyc new file mode 100644 index 0000000..011a65d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_duplicated.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_equals.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_equals.cpython-38.pyc new file mode 100644 index 0000000..7bdd9d4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_equals.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_explode.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_explode.cpython-38.pyc new file mode 100644 index 0000000..ff15950 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_explode.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_fillna.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_fillna.cpython-38.pyc new file mode 100644 index 0000000..0821390 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_fillna.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_filter.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_filter.cpython-38.pyc new file mode 100644 index 0000000..78682a8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_filter.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_first_and_last.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_first_and_last.cpython-38.pyc new file mode 100644 index 0000000..c1c5782 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_first_and_last.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_first_valid_index.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_first_valid_index.cpython-38.pyc new file mode 100644 index 0000000..ab6a36c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_first_valid_index.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_get_numeric_data.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_get_numeric_data.cpython-38.pyc new file mode 100644 index 0000000..5b4243d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_get_numeric_data.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_head_tail.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_head_tail.cpython-38.pyc new file mode 100644 index 0000000..a36323e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_head_tail.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_infer_objects.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_infer_objects.cpython-38.pyc new file mode 100644 index 0000000..222c042 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_infer_objects.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_interpolate.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_interpolate.cpython-38.pyc new file mode 100644 index 0000000..7c266c1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_interpolate.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_is_homogeneous_dtype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_is_homogeneous_dtype.cpython-38.pyc new file mode 100644 index 0000000..7e33a42 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_is_homogeneous_dtype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_isin.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_isin.cpython-38.pyc new file mode 100644 index 0000000..ea8e682 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_isin.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_join.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_join.cpython-38.pyc new file mode 100644 index 0000000..22d8ce1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_join.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_matmul.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_matmul.cpython-38.pyc new file mode 100644 index 0000000..023e528 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_matmul.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_nlargest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_nlargest.cpython-38.pyc new file mode 100644 index 0000000..ed30b3f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_nlargest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_pct_change.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_pct_change.cpython-38.pyc new file mode 100644 index 0000000..f13ae3b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_pct_change.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_pipe.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_pipe.cpython-38.pyc new file mode 100644 index 0000000..0251559 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_pipe.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_pop.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_pop.cpython-38.pyc new file mode 100644 index 0000000..3fe86cb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_pop.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_quantile.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_quantile.cpython-38.pyc new file mode 100644 index 0000000..119e5fd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_quantile.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_rank.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_rank.cpython-38.pyc new file mode 100644 index 0000000..9346199 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_rank.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_reindex.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_reindex.cpython-38.pyc new file mode 100644 index 0000000..ca9c63d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_reindex.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_reindex_like.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_reindex_like.cpython-38.pyc new file mode 100644 index 0000000..9276f67 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_reindex_like.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_rename.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_rename.cpython-38.pyc new file mode 100644 index 0000000..6ae7240 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_rename.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_rename_axis.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_rename_axis.cpython-38.pyc new file mode 100644 index 0000000..96e0d18 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_rename_axis.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_reorder_levels.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_reorder_levels.cpython-38.pyc new file mode 100644 index 0000000..6697fac Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_reorder_levels.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_replace.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_replace.cpython-38.pyc new file mode 100644 index 0000000..7fd240d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_replace.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_reset_index.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_reset_index.cpython-38.pyc new file mode 100644 index 0000000..dc6f00f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_reset_index.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_round.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_round.cpython-38.pyc new file mode 100644 index 0000000..7882682 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_round.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_sample.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_sample.cpython-38.pyc new file mode 100644 index 0000000..8e830eb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_sample.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_select_dtypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_select_dtypes.cpython-38.pyc new file mode 100644 index 0000000..b8b83a2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_select_dtypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_set_axis.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_set_axis.cpython-38.pyc new file mode 100644 index 0000000..fb0eca1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_set_axis.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_set_index.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_set_index.cpython-38.pyc new file mode 100644 index 0000000..776beb2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_set_index.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_shift.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_shift.cpython-38.pyc new file mode 100644 index 0000000..67101e3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_shift.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_sort_index.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_sort_index.cpython-38.pyc new file mode 100644 index 0000000..863bba4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_sort_index.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_sort_values.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_sort_values.cpython-38.pyc new file mode 100644 index 0000000..7d9d0d0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_sort_values.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_swapaxes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_swapaxes.cpython-38.pyc new file mode 100644 index 0000000..a4f0553 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_swapaxes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_swaplevel.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_swaplevel.cpython-38.pyc new file mode 100644 index 0000000..44bfb2c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_swaplevel.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_csv.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_csv.cpython-38.pyc new file mode 100644 index 0000000..c3ab04d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_csv.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_dict.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_dict.cpython-38.pyc new file mode 100644 index 0000000..7532386 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_dict.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_dict_of_blocks.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_dict_of_blocks.cpython-38.pyc new file mode 100644 index 0000000..75274e1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_dict_of_blocks.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_numpy.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_numpy.cpython-38.pyc new file mode 100644 index 0000000..33f490c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_numpy.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_period.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_period.cpython-38.pyc new file mode 100644 index 0000000..2b960ec Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_period.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_records.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_records.cpython-38.pyc new file mode 100644 index 0000000..b68f3c9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_records.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_timestamp.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_timestamp.cpython-38.pyc new file mode 100644 index 0000000..0a2dab2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_to_timestamp.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_transpose.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_transpose.cpython-38.pyc new file mode 100644 index 0000000..0ce120c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_transpose.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_truncate.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_truncate.cpython-38.pyc new file mode 100644 index 0000000..ec73ee4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_truncate.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_tz_convert.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_tz_convert.cpython-38.pyc new file mode 100644 index 0000000..28e6ce5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_tz_convert.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_tz_localize.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_tz_localize.cpython-38.pyc new file mode 100644 index 0000000..c04d12c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_tz_localize.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_update.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_update.cpython-38.pyc new file mode 100644 index 0000000..35db85b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_update.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_value_counts.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_value_counts.cpython-38.pyc new file mode 100644 index 0000000..1982900 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_value_counts.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_values.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_values.cpython-38.pyc new file mode 100644 index 0000000..0a7ac87 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/__pycache__/test_values.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_add_prefix_suffix.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_add_prefix_suffix.py new file mode 100644 index 0000000..ea75e9f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_add_prefix_suffix.py @@ -0,0 +1,20 @@ +from pandas import Index +import pandas._testing as tm + + +def test_add_prefix_suffix(float_frame): + with_prefix = float_frame.add_prefix("foo#") + expected = Index([f"foo#{c}" for c in float_frame.columns]) + tm.assert_index_equal(with_prefix.columns, expected) + + with_suffix = float_frame.add_suffix("#foo") + expected = Index([f"{c}#foo" for c in float_frame.columns]) + tm.assert_index_equal(with_suffix.columns, expected) + + with_pct_prefix = float_frame.add_prefix("%") + expected = Index([f"%{c}" for c in float_frame.columns]) + tm.assert_index_equal(with_pct_prefix.columns, expected) + + with_pct_suffix = float_frame.add_suffix("%") + expected = Index([f"{c}%" for c in float_frame.columns]) + tm.assert_index_equal(with_pct_suffix.columns, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_align.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_align.py new file mode 100644 index 0000000..03ea649 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_align.py @@ -0,0 +1,304 @@ +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameAlign: + def test_frame_align_aware(self): + idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") + idx2 = date_range("2001", periods=5, freq="2H", tz="US/Eastern") + df1 = DataFrame(np.random.randn(len(idx1), 3), idx1) + df2 = DataFrame(np.random.randn(len(idx2), 3), idx2) + new1, new2 = df1.align(df2) + assert df1.index.tz == new1.index.tz + assert df2.index.tz == new2.index.tz + + # different timezones convert to UTC + + # frame with frame + df1_central = df1.tz_convert("US/Central") + new1, new2 = df1.align(df1_central) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + # frame with Series + new1, new2 = df1.align(df1_central[0], axis=0) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + df1[0].align(df1_central, axis=0) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + def test_align_float(self, float_frame): + af, bf = float_frame.align(float_frame) + assert af._mgr is not float_frame._mgr + + af, bf = float_frame.align(float_frame, copy=False) + assert af._mgr is float_frame._mgr + + # axis = 0 + other = float_frame.iloc[:-5, :3] + af, bf = float_frame.align(other, axis=0, fill_value=-1) + + tm.assert_index_equal(bf.columns, other.columns) + + # test fill value + join_idx = float_frame.index.join(other.index) + diff_a = float_frame.index.difference(join_idx) + diff_a_vals = af.reindex(diff_a).values + assert (diff_a_vals == -1).all() + + af, bf = float_frame.align(other, join="right", axis=0) + tm.assert_index_equal(bf.columns, other.columns) + tm.assert_index_equal(bf.index, other.index) + tm.assert_index_equal(af.index, other.index) + + # axis = 1 + other = float_frame.iloc[:-5, :3].copy() + af, bf = float_frame.align(other, axis=1) + tm.assert_index_equal(bf.columns, float_frame.columns) + tm.assert_index_equal(bf.index, other.index) + + # test fill value + join_idx = float_frame.index.join(other.index) + diff_a = float_frame.index.difference(join_idx) + diff_a_vals = af.reindex(diff_a).values + + assert (diff_a_vals == -1).all() + + af, bf = float_frame.align(other, join="inner", axis=1) + tm.assert_index_equal(bf.columns, other.columns) + + af, bf = float_frame.align(other, join="inner", axis=1, method="pad") + tm.assert_index_equal(bf.columns, other.columns) + + af, bf = float_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=None + ) + tm.assert_index_equal(bf.index, Index([])) + + af, bf = float_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 + ) + tm.assert_index_equal(bf.index, Index([])) + + # Try to align DataFrame to Series along bad axis + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + float_frame.align(af.iloc[0, :3], join="inner", axis=2) + + # align dataframe to series with broadcast or not + idx = float_frame.index + s = Series(range(len(idx)), index=idx) + + left, right = float_frame.align(s, axis=0) + tm.assert_index_equal(left.index, float_frame.index) + tm.assert_index_equal(right.index, float_frame.index) + assert isinstance(right, Series) + + left, right = float_frame.align(s, broadcast_axis=1) + tm.assert_index_equal(left.index, float_frame.index) + expected = {c: s for c in float_frame.columns} + expected = DataFrame( + expected, index=float_frame.index, columns=float_frame.columns + ) + tm.assert_frame_equal(right, expected) + + # see gh-9558 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + result = df[df["a"] == 2] + expected = DataFrame([[2, 5]], index=[1], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + result = df.where(df["a"] == 2, 0) + expected = DataFrame({"a": [0, 2, 0], "b": [0, 5, 0]}) + tm.assert_frame_equal(result, expected) + + def test_align_int(self, int_frame): + # test other non-float types + other = DataFrame(index=range(5), columns=["A", "B", "C"]) + + af, bf = int_frame.align(other, join="inner", axis=1, method="pad") + tm.assert_index_equal(bf.columns, other.columns) + + def test_align_mixed_type(self, float_string_frame): + + af, bf = float_string_frame.align( + float_string_frame, join="inner", axis=1, method="pad" + ) + tm.assert_index_equal(bf.columns, float_string_frame.columns) + + def test_align_mixed_float(self, mixed_float_frame): + # mixed floats/ints + other = DataFrame(index=range(5), columns=["A", "B", "C"]) + + af, bf = mixed_float_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 + ) + tm.assert_index_equal(bf.index, Index([])) + + def test_align_mixed_int(self, mixed_int_frame): + other = DataFrame(index=range(5), columns=["A", "B", "C"]) + + af, bf = mixed_int_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 + ) + tm.assert_index_equal(bf.index, Index([])) + + @pytest.mark.parametrize( + "l_ordered,r_ordered,expected", + [ + [True, True, pd.CategoricalIndex], + [True, False, Index], + [False, True, Index], + [False, False, pd.CategoricalIndex], + ], + ) + def test_align_categorical(self, l_ordered, r_ordered, expected): + # GH-28397 + df_1 = DataFrame( + { + "A": np.arange(6, dtype="int64"), + "B": Series(list("aabbca")).astype( + pd.CategoricalDtype(list("cab"), ordered=l_ordered) + ), + } + ).set_index("B") + df_2 = DataFrame( + { + "A": np.arange(5, dtype="int64"), + "B": Series(list("babca")).astype( + pd.CategoricalDtype(list("cab"), ordered=r_ordered) + ), + } + ).set_index("B") + + aligned_1, aligned_2 = df_1.align(df_2) + assert isinstance(aligned_1.index, expected) + assert isinstance(aligned_2.index, expected) + tm.assert_index_equal(aligned_1.index, aligned_2.index) + + def test_align_multiindex(self): + # GH#10665 + # same test cases as test_align_multiindex in test_series.py + + midx = pd.MultiIndex.from_product( + [range(2), range(3), range(2)], names=("a", "b", "c") + ) + idx = Index(range(2), name="b") + df1 = DataFrame(np.arange(12, dtype="int64"), index=midx) + df2 = DataFrame(np.arange(2, dtype="int64"), index=idx) + + # these must be the same results (but flipped) + res1l, res1r = df1.align(df2, join="left") + res2l, res2r = df2.align(df1, join="right") + + expl = df1 + tm.assert_frame_equal(expl, res1l) + tm.assert_frame_equal(expl, res2r) + expr = DataFrame([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx) + tm.assert_frame_equal(expr, res1r) + tm.assert_frame_equal(expr, res2l) + + res1l, res1r = df1.align(df2, join="right") + res2l, res2r = df2.align(df1, join="left") + + exp_idx = pd.MultiIndex.from_product( + [range(2), range(2), range(2)], names=("a", "b", "c") + ) + expl = DataFrame([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx) + tm.assert_frame_equal(expl, res1l) + tm.assert_frame_equal(expl, res2r) + expr = DataFrame([0, 0, 1, 1] * 2, index=exp_idx) + tm.assert_frame_equal(expr, res1r) + tm.assert_frame_equal(expr, res2l) + + def test_align_series_combinations(self): + df = DataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")) + s = Series([1, 2, 4], index=list("ABD"), name="x") + + # frame + series + res1, res2 = df.align(s, axis=0) + exp1 = DataFrame( + {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]}, + index=list("ABCDE"), + ) + exp2 = Series([1, 2, np.nan, 4, np.nan], index=list("ABCDE"), name="x") + + tm.assert_frame_equal(res1, exp1) + tm.assert_series_equal(res2, exp2) + + # series + frame + res1, res2 = s.align(df) + tm.assert_series_equal(res1, exp2) + tm.assert_frame_equal(res2, exp1) + + def _check_align(self, a, b, axis, fill_axis, how, method, limit=None): + aa, ab = a.align( + b, axis=axis, join=how, method=method, limit=limit, fill_axis=fill_axis + ) + + join_index, join_columns = None, None + + ea, eb = a, b + if axis is None or axis == 0: + join_index = a.index.join(b.index, how=how) + ea = ea.reindex(index=join_index) + eb = eb.reindex(index=join_index) + + if axis is None or axis == 1: + join_columns = a.columns.join(b.columns, how=how) + ea = ea.reindex(columns=join_columns) + eb = eb.reindex(columns=join_columns) + + ea = ea.fillna(axis=fill_axis, method=method, limit=limit) + eb = eb.fillna(axis=fill_axis, method=method, limit=limit) + + tm.assert_frame_equal(aa, ea) + tm.assert_frame_equal(ab, eb) + + @pytest.mark.parametrize("meth", ["pad", "bfill"]) + @pytest.mark.parametrize("ax", [0, 1, None]) + @pytest.mark.parametrize("fax", [0, 1]) + @pytest.mark.parametrize("how", ["inner", "outer", "left", "right"]) + def test_align_fill_method(self, how, meth, ax, fax, float_frame): + df = float_frame + self._check_align_fill(df, how, meth, ax, fax) + + def _check_align_fill(self, frame, kind, meth, ax, fax): + left = frame.iloc[0:4, :10] + right = frame.iloc[2:, 6:] + empty = frame.iloc[:0, :0] + + self._check_align(left, right, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + left, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) + + # empty left + self._check_align(empty, right, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + empty, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) + + # empty right + self._check_align(left, empty, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + left, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) + + # both empty + self._check_align(empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_append.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_append.py new file mode 100644 index 0000000..5cfad47 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_append.py @@ -0,0 +1,278 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, + Timestamp, + date_range, + timedelta_range, +) +import pandas._testing as tm + + +class TestDataFrameAppend: + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") + def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_series): + obj = multiindex_dataframe_random_data + obj = tm.get_obj(obj, frame_or_series) + + a = obj[:5] + b = obj[5:] + + result = a.append(b) + tm.assert_equal(result, obj) + + def test_append_empty_list(self): + # GH 28769 + df = DataFrame() + result = df._append([]) + expected = df + tm.assert_frame_equal(result, expected) + assert result is not df + + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + result = df._append([]) + expected = df + tm.assert_frame_equal(result, expected) + assert result is not df # ._append() should return a new object + + def test_append_series_dict(self): + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + + series = df.loc[4] + msg = "Indexes have overlapping values" + with pytest.raises(ValueError, match=msg): + df._append(series, verify_integrity=True) + + series.name = None + msg = "Can only append a Series if ignore_index=True" + with pytest.raises(TypeError, match=msg): + df._append(series, verify_integrity=True) + + result = df._append(series[::-1], ignore_index=True) + expected = df._append( + DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True + ) + tm.assert_frame_equal(result, expected) + + # dict + result = df._append(series.to_dict(), ignore_index=True) + tm.assert_frame_equal(result, expected) + + result = df._append(series[::-1][:3], ignore_index=True) + expected = df._append( + DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True + ) + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + msg = "Can only append a dict if ignore_index=True" + with pytest.raises(TypeError, match=msg): + df._append(series.to_dict()) + + # can append when name set + row = df.loc[4] + row.name = 5 + result = df._append(row) + expected = df._append(df[-1:], ignore_index=True) + tm.assert_frame_equal(result, expected) + + def test_append_list_of_series_dicts(self): + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + + dicts = [x.to_dict() for idx, x in df.iterrows()] + + result = df._append(dicts, ignore_index=True) + expected = df._append(df, ignore_index=True) + tm.assert_frame_equal(result, expected) + + # different columns + dicts = [ + {"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4}, + {"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8}, + ] + result = df._append(dicts, ignore_index=True, sort=True) + expected = df._append(DataFrame(dicts), ignore_index=True, sort=True) + tm.assert_frame_equal(result, expected) + + def test_append_list_retain_index_name(self): + df = DataFrame( + [[1, 2], [3, 4]], index=pd.Index(["a", "b"], name="keepthisname") + ) + + serc = Series([5, 6], name="c") + + expected = DataFrame( + [[1, 2], [3, 4], [5, 6]], + index=pd.Index(["a", "b", "c"], name="keepthisname"), + ) + + # append series + result = df._append(serc) + tm.assert_frame_equal(result, expected) + + # append list of series + result = df._append([serc]) + tm.assert_frame_equal(result, expected) + + def test_append_missing_cols(self): + # GH22252 + # exercise the conditional branch in append method where the data + # to be appended is a list and does not contain all columns that are in + # the target DataFrame + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + + dicts = [{"foo": 9}, {"bar": 10}] + result = df._append(dicts, ignore_index=True, sort=True) + + expected = df._append(DataFrame(dicts), ignore_index=True, sort=True) + tm.assert_frame_equal(result, expected) + + def test_append_empty_dataframe(self): + + # Empty df append empty df + df1 = DataFrame() + df2 = DataFrame() + result = df1._append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + # Non-empty df append empty df + df1 = DataFrame(np.random.randn(5, 2)) + df2 = DataFrame() + result = df1._append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + # Empty df with columns append empty df + df1 = DataFrame(columns=["bar", "foo"]) + df2 = DataFrame() + result = df1._append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + # Non-Empty df with columns append empty df + df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"]) + df2 = DataFrame() + result = df1._append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + def test_append_dtypes(self): + + # GH 5754 + # row appends of different dtypes (so need to do by-item) + # can sometimes infer the correct type + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5)) + df2 = DataFrame() + result = df1._append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": "foo"}, index=range(1, 2)) + result = df1._append(df2) + expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]}) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": np.nan}, index=range(1, 2)) + result = df1._append(df2) + expected = DataFrame( + {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} + ) + expected = expected.astype(object) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object) + result = df1._append(df2) + expected = DataFrame( + {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} + ) + expected = expected.astype(object) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": np.nan}, index=range(1)) + df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2)) + result = df1._append(df2) + expected = DataFrame( + {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")} + ) + expected = expected.astype(object) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object) + result = df1._append(df2) + expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"] + ) + def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): + # GH 30238 + tz = tz_naive_fixture + df = DataFrame([Timestamp(timestamp, tz=tz)]) + result = df._append(df.iloc[0]).iloc[-1] + expected = Series(Timestamp(timestamp, tz=tz), name=0) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data, dtype", + [ + ([1], pd.Int64Dtype()), + ([1], pd.CategoricalDtype()), + ([pd.Interval(left=0, right=5)], pd.IntervalDtype()), + ([pd.Period("2000-03", freq="M")], pd.PeriodDtype("M")), + ([1], pd.SparseDtype()), + ], + ) + def test_other_dtypes(self, data, dtype): + df = DataFrame(data, dtype=dtype) + result = df._append(df.iloc[0]).iloc[-1] + expected = Series(data, name=0, dtype=dtype) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) + def test_append_numpy_bug_1681(self, dtype): + # another datetime64 bug + if dtype == "datetime64[ns]": + index = date_range("2011/1/1", "2012/1/1", freq="W-FRI") + else: + index = timedelta_range("1 days", "10 days", freq="2D") + + df = DataFrame() + other = DataFrame({"A": "foo", "B": index}, index=index) + + result = df._append(other) + assert (result["B"] == index).all() + + @pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning") + def test_multiindex_column_append_multiple(self): + # GH 29699 + df = DataFrame( + [[1, 11], [2, 12], [3, 13]], + columns=pd.MultiIndex.from_tuples( + [("multi", "col1"), ("multi", "col2")], names=["level1", None] + ), + ) + df2 = df.copy() + for i in range(1, 10): + df[i, "colA"] = 10 + df = df._append(df2, ignore_index=True) + result = df["multi"] + expected = DataFrame( + {"col1": [1, 2, 3] * (i + 1), "col2": [11, 12, 13] * (i + 1)} + ) + tm.assert_frame_equal(result, expected) + + def test_append_raises_future_warning(self): + # GH#35407 + df1 = DataFrame([[1, 2], [3, 4]]) + df2 = DataFrame([[5, 6], [7, 8]]) + with tm.assert_produces_warning(FutureWarning): + df1.append(df2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_asfreq.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_asfreq.py new file mode 100644 index 0000000..07eacb5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_asfreq.py @@ -0,0 +1,198 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + date_range, + period_range, + to_datetime, +) +import pandas._testing as tm + +from pandas.tseries import offsets + + +class TestAsFreq: + def test_asfreq2(self, frame_or_series): + ts = frame_or_series( + [0.0, 1.0, 2.0], + index=DatetimeIndex( + [ + datetime(2009, 10, 30), + datetime(2009, 11, 30), + datetime(2009, 12, 31), + ], + freq="BM", + ), + ) + + daily_ts = ts.asfreq("B") + monthly_ts = daily_ts.asfreq("BM") + tm.assert_equal(monthly_ts, ts) + + daily_ts = ts.asfreq("B", method="pad") + monthly_ts = daily_ts.asfreq("BM") + tm.assert_equal(monthly_ts, ts) + + daily_ts = ts.asfreq(offsets.BDay()) + monthly_ts = daily_ts.asfreq(offsets.BMonthEnd()) + tm.assert_equal(monthly_ts, ts) + + result = ts[:0].asfreq("M") + assert len(result) == 0 + assert result is not ts + + if frame_or_series is Series: + daily_ts = ts.asfreq("D", fill_value=-1) + result = daily_ts.value_counts().sort_index() + expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index() + tm.assert_series_equal(result, expected) + + def test_asfreq_datetimeindex_empty(self, frame_or_series): + # GH#14320 + index = DatetimeIndex(["2016-09-29 11:00"]) + expected = frame_or_series(index=index, dtype=object).asfreq("H") + result = frame_or_series([3], index=index.copy()).asfreq("H") + tm.assert_index_equal(expected.index, result.index) + + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_tz_aware_asfreq_smoke(self, tz, frame_or_series): + dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=tz) + + obj = frame_or_series(np.random.randn(len(dr)), index=dr) + + # it works! + obj.asfreq("T") + + def test_asfreq_normalize(self, frame_or_series): + rng = date_range("1/1/2000 09:30", periods=20) + norm = date_range("1/1/2000", periods=20) + + vals = np.random.randn(20, 3) + + obj = DataFrame(vals, index=rng) + expected = DataFrame(vals, index=norm) + if frame_or_series is Series: + obj = obj[0] + expected = expected[0] + + result = obj.asfreq("D", normalize=True) + tm.assert_equal(result, expected) + + def test_asfreq_keep_index_name(self, frame_or_series): + # GH#9854 + index_name = "bar" + index = date_range("20130101", periods=20, name=index_name) + obj = DataFrame(list(range(20)), columns=["foo"], index=index) + obj = tm.get_obj(obj, frame_or_series) + + assert index_name == obj.index.name + assert index_name == obj.asfreq("10D").index.name + + def test_asfreq_ts(self, frame_or_series): + index = period_range(freq="A", start="1/1/2001", end="12/31/2010") + obj = DataFrame(np.random.randn(len(index), 3), index=index) + obj = tm.get_obj(obj, frame_or_series) + + result = obj.asfreq("D", how="end") + exp_index = index.asfreq("D", how="end") + assert len(result) == len(obj) + tm.assert_index_equal(result.index, exp_index) + + result = obj.asfreq("D", how="start") + exp_index = index.asfreq("D", how="start") + assert len(result) == len(obj) + tm.assert_index_equal(result.index, exp_index) + + def test_asfreq_resample_set_correct_freq(self, frame_or_series): + # GH#5613 + # we test if .asfreq() and .resample() set the correct value for .freq + dti = to_datetime(["2012-01-01", "2012-01-02", "2012-01-03"]) + obj = DataFrame({"col": [1, 2, 3]}, index=dti) + obj = tm.get_obj(obj, frame_or_series) + + # testing the settings before calling .asfreq() and .resample() + assert obj.index.freq is None + assert obj.index.inferred_freq == "D" + + # does .asfreq() set .freq correctly? + assert obj.asfreq("D").index.freq == "D" + + # does .resample() set .freq correctly? + assert obj.resample("D").asfreq().index.freq == "D" + + def test_asfreq_empty(self, datetime_frame): + # test does not blow up on length-0 DataFrame + zero_length = datetime_frame.reindex([]) + result = zero_length.asfreq("BM") + assert result is not zero_length + + def test_asfreq(self, datetime_frame): + offset_monthly = datetime_frame.asfreq(offsets.BMonthEnd()) + rule_monthly = datetime_frame.asfreq("BM") + + tm.assert_frame_equal(offset_monthly, rule_monthly) + + filled = rule_monthly.asfreq("B", method="pad") # noqa + # TODO: actually check that this worked. + + # don't forget! + filled_dep = rule_monthly.asfreq("B", method="pad") # noqa + + def test_asfreq_datetimeindex(self): + df = DataFrame( + {"A": [1, 2, 3]}, + index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)], + ) + df = df.asfreq("B") + assert isinstance(df.index, DatetimeIndex) + + ts = df["A"].asfreq("B") + assert isinstance(ts.index, DatetimeIndex) + + def test_asfreq_fillvalue(self): + # test for fill value during upsampling, related to issue 3715 + + # setup + rng = date_range("1/1/2016", periods=10, freq="2S") + ts = Series(np.arange(len(rng)), index=rng) + df = DataFrame({"one": ts}) + + # insert pre-existing missing value + df.loc["2016-01-01 00:00:08", "one"] = None + + actual_df = df.asfreq(freq="1S", fill_value=9.0) + expected_df = df.asfreq(freq="1S").fillna(9.0) + expected_df.loc["2016-01-01 00:00:08", "one"] = None + tm.assert_frame_equal(expected_df, actual_df) + + expected_series = ts.asfreq(freq="1S").fillna(9.0) + actual_series = ts.asfreq(freq="1S", fill_value=9.0) + tm.assert_series_equal(expected_series, actual_series) + + def test_asfreq_with_date_object_index(self, frame_or_series): + rng = date_range("1/1/2000", periods=20) + ts = frame_or_series(np.random.randn(20), index=rng) + + ts2 = ts.copy() + ts2.index = [x.date() for x in ts2.index] + + result = ts2.asfreq("4H", method="ffill") + expected = ts.asfreq("4H", method="ffill") + tm.assert_equal(result, expected) + + def test_asfreq_with_unsorted_index(self, frame_or_series): + # GH#39805 + # Test that rows are not dropped when the datetime index is out of order + index = to_datetime(["2021-01-04", "2021-01-02", "2021-01-03", "2021-01-01"]) + result = frame_or_series(range(4), index=index) + + expected = result.reindex(sorted(index)) + expected.index = expected.index._with_freq("infer") + + result = result.asfreq("D") + tm.assert_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_asof.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_asof.py new file mode 100644 index 0000000..6931dd0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_asof.py @@ -0,0 +1,182 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import IncompatibleFrequency + +from pandas import ( + DataFrame, + Period, + Series, + Timestamp, + date_range, + period_range, + to_datetime, +) +import pandas._testing as tm + + +@pytest.fixture +def date_range_frame(): + """ + Fixture for DataFrame of ints with date_range index + + Columns are ['A', 'B']. + """ + N = 50 + rng = date_range("1/1/1990", periods=N, freq="53s") + return DataFrame({"A": np.arange(N), "B": np.arange(N)}, index=rng) + + +class TestFrameAsof: + def test_basic(self, date_range_frame): + df = date_range_frame + N = 50 + df.loc[df.index[15:30], "A"] = np.nan + dates = date_range("1/1/1990", periods=N * 3, freq="25s") + + result = df.asof(dates) + assert result.notna().all(1).all() + lb = df.index[14] + ub = df.index[30] + + dates = list(dates) + + result = df.asof(dates) + assert result.notna().all(1).all() + + mask = (result.index >= lb) & (result.index < ub) + rs = result[mask] + assert (rs == 14).all(1).all() + + def test_subset(self, date_range_frame): + N = 10 + df = date_range_frame.iloc[:N].copy() + df.loc[df.index[4:8], "A"] = np.nan + dates = date_range("1/1/1990", periods=N * 3, freq="25s") + + # with a subset of A should be the same + result = df.asof(dates, subset="A") + expected = df.asof(dates) + tm.assert_frame_equal(result, expected) + + # same with A/B + result = df.asof(dates, subset=["A", "B"]) + expected = df.asof(dates) + tm.assert_frame_equal(result, expected) + + # B gives df.asof + result = df.asof(dates, subset="B") + expected = df.resample("25s", closed="right").ffill().reindex(dates) + expected.iloc[20:] = 9 + + tm.assert_frame_equal(result, expected) + + def test_missing(self, date_range_frame): + # GH 15118 + # no match found - `where` value before earliest date in index + N = 10 + df = date_range_frame.iloc[:N].copy() + + result = df.asof("1989-12-31") + + expected = Series( + index=["A", "B"], name=Timestamp("1989-12-31"), dtype=np.float64 + ) + tm.assert_series_equal(result, expected) + + result = df.asof(to_datetime(["1989-12-31"])) + expected = DataFrame( + index=to_datetime(["1989-12-31"]), columns=["A", "B"], dtype="float64" + ) + tm.assert_frame_equal(result, expected) + + # Check that we handle PeriodIndex correctly, dont end up with + # period.ordinal for series name + df = df.to_period("D") + result = df.asof("1989-12-31") + assert isinstance(result.name, Period) + + def test_asof_all_nans(self, frame_or_series): + # GH 15713 + # DataFrame/Series is all nans + result = frame_or_series([np.nan]).asof([0]) + expected = frame_or_series([np.nan]) + tm.assert_equal(result, expected) + + def test_all_nans(self, date_range_frame): + # GH 15713 + # DataFrame is all nans + + # testing non-default indexes, multiple inputs + N = 150 + rng = date_range_frame.index + dates = date_range("1/1/1990", periods=N, freq="25s") + result = DataFrame(np.nan, index=rng, columns=["A"]).asof(dates) + expected = DataFrame(np.nan, index=dates, columns=["A"]) + tm.assert_frame_equal(result, expected) + + # testing multiple columns + dates = date_range("1/1/1990", periods=N, freq="25s") + result = DataFrame(np.nan, index=rng, columns=["A", "B", "C"]).asof(dates) + expected = DataFrame(np.nan, index=dates, columns=["A", "B", "C"]) + tm.assert_frame_equal(result, expected) + + # testing scalar input + result = DataFrame(np.nan, index=[1, 2], columns=["A", "B"]).asof([3]) + expected = DataFrame(np.nan, index=[3], columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + result = DataFrame(np.nan, index=[1, 2], columns=["A", "B"]).asof(3) + expected = Series(np.nan, index=["A", "B"], name=3) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "stamp,expected", + [ + ( + Timestamp("2018-01-01 23:22:43.325+00:00"), + Series(2.0, name=Timestamp("2018-01-01 23:22:43.325+00:00")), + ), + ( + Timestamp("2018-01-01 22:33:20.682+01:00"), + Series(1.0, name=Timestamp("2018-01-01 22:33:20.682+01:00")), + ), + ], + ) + def test_time_zone_aware_index(self, stamp, expected): + # GH21194 + # Testing awareness of DataFrame index considering different + # UTC and timezone + df = DataFrame( + data=[1, 2], + index=[ + Timestamp("2018-01-01 21:00:05.001+00:00"), + Timestamp("2018-01-01 22:35:10.550+00:00"), + ], + ) + + result = df.asof(stamp) + tm.assert_series_equal(result, expected) + + def test_is_copy(self, date_range_frame): + # GH-27357, GH-30784: ensure the result of asof is an actual copy and + # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings + df = date_range_frame + N = 50 + df.loc[df.index[15:30], "A"] = np.nan + dates = date_range("1/1/1990", periods=N * 3, freq="25s") + + result = df.asof(dates) + + with tm.assert_produces_warning(None): + result["C"] = 1 + + def test_asof_periodindex_mismatched_freq(self): + N = 50 + rng = period_range("1/1/1990", periods=N, freq="H") + df = DataFrame(np.random.randn(N), index=rng) + + # Mismatched freq + msg = "Input has different freq" + with pytest.raises(IncompatibleFrequency, match=msg): + df.asof(rng.asfreq("D")) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_assign.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_assign.py new file mode 100644 index 0000000..0ae501d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_assign.py @@ -0,0 +1,84 @@ +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestAssign: + def test_assign(self): + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + original = df.copy() + result = df.assign(C=df.B / df.A) + expected = df.copy() + expected["C"] = [4, 2.5, 2] + tm.assert_frame_equal(result, expected) + + # lambda syntax + result = df.assign(C=lambda x: x.B / x.A) + tm.assert_frame_equal(result, expected) + + # original is unmodified + tm.assert_frame_equal(df, original) + + # Non-Series array-like + result = df.assign(C=[4, 2.5, 2]) + tm.assert_frame_equal(result, expected) + # original is unmodified + tm.assert_frame_equal(df, original) + + result = df.assign(B=df.B / df.A) + expected = expected.drop("B", axis=1).rename(columns={"C": "B"}) + tm.assert_frame_equal(result, expected) + + # overwrite + result = df.assign(A=df.A + df.B) + expected = df.copy() + expected["A"] = [5, 7, 9] + tm.assert_frame_equal(result, expected) + + # lambda + result = df.assign(A=lambda x: x.A + x.B) + tm.assert_frame_equal(result, expected) + + def test_assign_multiple(self): + df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=["A", "B"]) + result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B) + expected = DataFrame( + [[1, 4, 7, 1, 4], [2, 5, 8, 2, 5], [3, 6, 9, 3, 6]], columns=list("ABCDE") + ) + tm.assert_frame_equal(result, expected) + + def test_assign_order(self): + # GH 9818 + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + result = df.assign(D=df.A + df.B, C=df.A - df.B) + + expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], columns=list("ABDC")) + tm.assert_frame_equal(result, expected) + result = df.assign(C=df.A - df.B, D=df.A + df.B) + + expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], columns=list("ABCD")) + + tm.assert_frame_equal(result, expected) + + def test_assign_bad(self): + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + # non-keyword argument + msg = r"assign\(\) takes 1 positional argument but 2 were given" + with pytest.raises(TypeError, match=msg): + df.assign(lambda x: x.A) + msg = "'DataFrame' object has no attribute 'C'" + with pytest.raises(AttributeError, match=msg): + df.assign(C=df.A, D=df.A + df.C) + + def test_assign_dependent(self): + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + + result = df.assign(C=df.A, D=lambda x: x["A"] + x["C"]) + expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) + tm.assert_frame_equal(result, expected) + + result = df.assign(C=lambda df: df.A, D=lambda df: df["A"] + df["C"]) + expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_astype.py new file mode 100644 index 0000000..d326ca3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_astype.py @@ -0,0 +1,789 @@ +import re + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Categorical, + CategoricalDtype, + DataFrame, + DatetimeTZDtype, + Interval, + IntervalDtype, + NaT, + Series, + Timedelta, + Timestamp, + concat, + date_range, + option_context, +) +import pandas._testing as tm +from pandas.core.api import UInt64Index +from pandas.core.arrays.integer import coerce_to_array + + +def _check_cast(df, v): + """ + Check if all dtypes of df are equal to v + """ + assert all(s.dtype.name == v for _, s in df.items()) + + +class TestAstype: + def test_astype_float(self, float_frame): + casted = float_frame.astype(int) + expected = DataFrame( + float_frame.values.astype(int), + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(casted, expected) + + casted = float_frame.astype(np.int32) + expected = DataFrame( + float_frame.values.astype(np.int32), + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(casted, expected) + + float_frame["foo"] = "5" + casted = float_frame.astype(int) + expected = DataFrame( + float_frame.values.astype(int), + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(casted, expected) + + def test_astype_mixed_float(self, mixed_float_frame): + # mixed casting + casted = mixed_float_frame.reindex(columns=["A", "B"]).astype("float32") + _check_cast(casted, "float32") + + casted = mixed_float_frame.reindex(columns=["A", "B"]).astype("float16") + _check_cast(casted, "float16") + + def test_astype_mixed_type(self, mixed_type_frame): + # mixed casting + mn = mixed_type_frame._get_numeric_data().copy() + mn["little_float"] = np.array(12345.0, dtype="float16") + mn["big_float"] = np.array(123456789101112.0, dtype="float64") + + casted = mn.astype("float64") + _check_cast(casted, "float64") + + casted = mn.astype("int64") + _check_cast(casted, "int64") + + casted = mn.reindex(columns=["little_float"]).astype("float16") + _check_cast(casted, "float16") + + casted = mn.astype("float32") + _check_cast(casted, "float32") + + casted = mn.astype("int32") + _check_cast(casted, "int32") + + # to object + casted = mn.astype("O") + _check_cast(casted, "object") + + def test_astype_with_exclude_string(self, float_frame): + df = float_frame.copy() + expected = float_frame.astype(int) + df["string"] = "foo" + casted = df.astype(int, errors="ignore") + + expected["string"] = "foo" + tm.assert_frame_equal(casted, expected) + + df = float_frame.copy() + expected = float_frame.astype(np.int32) + df["string"] = "foo" + casted = df.astype(np.int32, errors="ignore") + + expected["string"] = "foo" + tm.assert_frame_equal(casted, expected) + + def test_astype_with_view_float(self, float_frame): + + # this is the only real reason to do it this way + tf = np.round(float_frame).astype(np.int32) + casted = tf.astype(np.float32, copy=False) + + # TODO(wesm): verification? + tf = float_frame.astype(np.float64) + casted = tf.astype(np.int64, copy=False) # noqa + + def test_astype_with_view_mixed_float(self, mixed_float_frame): + + tf = mixed_float_frame.reindex(columns=["A", "B", "C"]) + + casted = tf.astype(np.int64) + casted = tf.astype(np.float32) # noqa + + @pytest.mark.parametrize("dtype", [np.int32, np.int64]) + @pytest.mark.parametrize("val", [np.nan, np.inf]) + def test_astype_cast_nan_inf_int(self, val, dtype): + # see GH#14265 + # + # Check NaN and inf --> raise error when converting to int. + msg = "Cannot convert non-finite values \\(NA or inf\\) to integer" + df = DataFrame([val]) + + with pytest.raises(ValueError, match=msg): + df.astype(dtype) + + def test_astype_str(self): + # see GH#9757 + a = Series(date_range("2010-01-04", periods=5)) + b = Series(date_range("3/6/2012 00:00", periods=5, tz="US/Eastern")) + c = Series([Timedelta(x, unit="d") for x in range(5)]) + d = Series(range(5)) + e = Series([0.0, 0.2, 0.4, 0.6, 0.8]) + + df = DataFrame({"a": a, "b": b, "c": c, "d": d, "e": e}) + + # Datetime-like + result = df.astype(str) + + expected = DataFrame( + { + "a": list(map(str, map(lambda x: Timestamp(x)._date_repr, a._values))), + "b": list(map(str, map(Timestamp, b._values))), + "c": list(map(lambda x: Timedelta(x)._repr_base(), c._values)), + "d": list(map(str, d._values)), + "e": list(map(str, e._values)), + } + ) + + tm.assert_frame_equal(result, expected) + + def test_astype_str_float(self): + # see GH#11302 + result = DataFrame([np.NaN]).astype(str) + expected = DataFrame(["nan"]) + + tm.assert_frame_equal(result, expected) + result = DataFrame([1.12345678901234567890]).astype(str) + + val = "1.1234567890123457" + expected = DataFrame([val]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype_class", [dict, Series]) + def test_astype_dict_like(self, dtype_class): + # GH7271 & GH16717 + a = Series(date_range("2010-01-04", periods=5)) + b = Series(range(5)) + c = Series([0.0, 0.2, 0.4, 0.6, 0.8]) + d = Series(["1.0", "2", "3.14", "4", "5.4"]) + df = DataFrame({"a": a, "b": b, "c": c, "d": d}) + original = df.copy(deep=True) + + # change type of a subset of columns + dt1 = dtype_class({"b": "str", "d": "float32"}) + result = df.astype(dt1) + expected = DataFrame( + { + "a": a, + "b": Series(["0", "1", "2", "3", "4"]), + "c": c, + "d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float32"), + } + ) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(df, original) + + dt2 = dtype_class({"b": np.float32, "c": "float32", "d": np.float64}) + result = df.astype(dt2) + expected = DataFrame( + { + "a": a, + "b": Series([0.0, 1.0, 2.0, 3.0, 4.0], dtype="float32"), + "c": Series([0.0, 0.2, 0.4, 0.6, 0.8], dtype="float32"), + "d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float64"), + } + ) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(df, original) + + # change all columns + dt3 = dtype_class({"a": str, "b": str, "c": str, "d": str}) + tm.assert_frame_equal(df.astype(dt3), df.astype(str)) + tm.assert_frame_equal(df, original) + + # error should be raised when using something other than column labels + # in the keys of the dtype dict + dt4 = dtype_class({"b": str, 2: str}) + dt5 = dtype_class({"e": str}) + msg_frame = ( + "Only a column name can be used for the key in a dtype mappings argument. " + "'{}' not found in columns." + ) + with pytest.raises(KeyError, match=msg_frame.format(2)): + df.astype(dt4) + with pytest.raises(KeyError, match=msg_frame.format("e")): + df.astype(dt5) + tm.assert_frame_equal(df, original) + + # if the dtypes provided are the same as the original dtypes, the + # resulting DataFrame should be the same as the original DataFrame + dt6 = dtype_class({col: df[col].dtype for col in df.columns}) + equiv = df.astype(dt6) + tm.assert_frame_equal(df, equiv) + tm.assert_frame_equal(df, original) + + # GH#16717 + # if dtypes provided is empty, the resulting DataFrame + # should be the same as the original DataFrame + dt7 = dtype_class({}) if dtype_class is dict else dtype_class({}, dtype=object) + equiv = df.astype(dt7) + tm.assert_frame_equal(df, equiv) + tm.assert_frame_equal(df, original) + + def test_astype_duplicate_col(self): + a1 = Series([1, 2, 3, 4, 5], name="a") + b = Series([0.1, 0.2, 0.4, 0.6, 0.8], name="b") + a2 = Series([0, 1, 2, 3, 4], name="a") + df = concat([a1, b, a2], axis=1) + + result = df.astype(str) + a1_str = Series(["1", "2", "3", "4", "5"], dtype="str", name="a") + b_str = Series(["0.1", "0.2", "0.4", "0.6", "0.8"], dtype=str, name="b") + a2_str = Series(["0", "1", "2", "3", "4"], dtype="str", name="a") + expected = concat([a1_str, b_str, a2_str], axis=1) + tm.assert_frame_equal(result, expected) + + result = df.astype({"a": "str"}) + expected = concat([a1_str, b, a2_str], axis=1) + tm.assert_frame_equal(result, expected) + + def test_astype_duplicate_col_series_arg(self): + # GH#44417 + vals = np.random.randn(3, 4) + df = DataFrame(vals, columns=["A", "B", "C", "A"]) + dtypes = df.dtypes + dtypes.iloc[0] = str + dtypes.iloc[2] = "Float64" + + result = df.astype(dtypes) + expected = DataFrame( + { + 0: vals[:, 0].astype(str), + 1: vals[:, 1], + 2: pd.array(vals[:, 2], dtype="Float64"), + 3: vals[:, 3], + } + ) + expected.columns = df.columns + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [ + "category", + CategoricalDtype(), + CategoricalDtype(ordered=True), + CategoricalDtype(ordered=False), + CategoricalDtype(categories=list("abcdef")), + CategoricalDtype(categories=list("edba"), ordered=False), + CategoricalDtype(categories=list("edcb"), ordered=True), + ], + ids=repr, + ) + def test_astype_categorical(self, dtype): + # GH#18099 + d = {"A": list("abbc"), "B": list("bccd"), "C": list("cdde")} + df = DataFrame(d) + result = df.astype(dtype) + expected = DataFrame({k: Categorical(d[k], dtype=dtype) for k in d}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("cls", [CategoricalDtype, DatetimeTZDtype, IntervalDtype]) + def test_astype_categoricaldtype_class_raises(self, cls): + df = DataFrame({"A": ["a", "a", "b", "c"]}) + xpr = f"Expected an instance of {cls.__name__}" + with pytest.raises(TypeError, match=xpr): + df.astype({"A": cls}) + + with pytest.raises(TypeError, match=xpr): + df["A"].astype(cls) + + @pytest.mark.parametrize("dtype", ["Int64", "Int32", "Int16"]) + def test_astype_extension_dtypes(self, dtype): + # GH#22578 + df = DataFrame([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], columns=["a", "b"]) + + expected1 = DataFrame( + { + "a": pd.array([1, 3, 5], dtype=dtype), + "b": pd.array([2, 4, 6], dtype=dtype), + } + ) + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) + tm.assert_frame_equal(df.astype(dtype).astype("float64"), df) + + df = DataFrame([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], columns=["a", "b"]) + df["b"] = df["b"].astype(dtype) + expected2 = DataFrame( + {"a": [1.0, 3.0, 5.0], "b": pd.array([2, 4, 6], dtype=dtype)} + ) + tm.assert_frame_equal(df, expected2) + + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) + + @pytest.mark.parametrize("dtype", ["Int64", "Int32", "Int16"]) + def test_astype_extension_dtypes_1d(self, dtype): + # GH#22578 + df = DataFrame({"a": [1.0, 2.0, 3.0]}) + + expected1 = DataFrame({"a": pd.array([1, 2, 3], dtype=dtype)}) + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) + + df = DataFrame({"a": [1.0, 2.0, 3.0]}) + df["a"] = df["a"].astype(dtype) + expected2 = DataFrame({"a": pd.array([1, 2, 3], dtype=dtype)}) + tm.assert_frame_equal(df, expected2) + + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) + + @pytest.mark.parametrize("dtype", ["category", "Int64"]) + def test_astype_extension_dtypes_duplicate_col(self, dtype): + # GH#24704 + a1 = Series([0, np.nan, 4], name="a") + a2 = Series([np.nan, 3, 5], name="a") + df = concat([a1, a2], axis=1) + + result = df.astype(dtype) + expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", [{100: "float64", 200: "uint64"}, "category", "float64"] + ) + def test_astype_column_metadata(self, dtype): + # GH#19920 + columns = UInt64Index([100, 200, 300], name="foo") + df = DataFrame(np.arange(15).reshape(5, 3), columns=columns) + df = df.astype(dtype) + tm.assert_index_equal(df.columns, columns) + + @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) + def test_astype_from_datetimelike_to_object(self, dtype, unit): + # tests astype to object dtype + # GH#19223 / GH#12425 + dtype = f"{dtype}[{unit}]" + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(object) + assert (result.dtypes == object).all() + + if dtype.startswith("M8"): + assert result.iloc[0, 0] == Timestamp(1, unit=unit) + else: + assert result.iloc[0, 0] == Timedelta(1, unit=unit) + + @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64]) + @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) + def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit): + # tests all units from numeric origination + # GH#19223 / GH#12425 + dtype = f"{dtype}[{unit}]" + arr = np.array([[1, 2, 3]], dtype=arr_dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) + def test_astype_to_datetime_unit(self, unit): + # tests all units from datetime origination + # GH#19223 + dtype = f"M8[{unit}]" + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["ns"]) + def test_astype_to_timedelta_unit_ns(self, unit): + # preserver the timedelta conversion + # GH#19223 + dtype = f"m8[{unit}]" + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["us", "ms", "s", "h", "m", "D"]) + def test_astype_to_timedelta_unit(self, unit): + # coerce to float + # GH#19223 + dtype = f"m8[{unit}]" + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(df.values.astype(dtype).astype(float)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) + def test_astype_to_incorrect_datetimelike(self, unit): + # trying to astype a m to a M, or vice-versa + # GH#19224 + dtype = f"M8[{unit}]" + other = f"m8[{unit}]" + + df = DataFrame(np.array([[1, 2, 3]], dtype=dtype)) + msg = "|".join( + [ + # BlockManager path + fr"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]", + # ArrayManager path + "cannot astype a datetimelike from " + fr"\[datetime64\[ns\]\] to \[timedelta64\[{unit}\]\]", + ] + ) + with pytest.raises(TypeError, match=msg): + df.astype(other) + + msg = "|".join( + [ + # BlockManager path + fr"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]", + # ArrayManager path + "cannot astype a timedelta from " + fr"\[timedelta64\[ns\]\] to \[datetime64\[{unit}\]\]", + ] + ) + df = DataFrame(np.array([[1, 2, 3]], dtype=other)) + with pytest.raises(TypeError, match=msg): + df.astype(dtype) + + def test_astype_arg_for_errors(self): + # GH#14878 + + df = DataFrame([1, 2, 3]) + + msg = ( + "Expected value of kwarg 'errors' to be one of " + "['raise', 'ignore']. Supplied value is 'True'" + ) + with pytest.raises(ValueError, match=re.escape(msg)): + df.astype(np.float64, errors=True) + + df.astype(np.int8, errors="ignore") + + def test_astype_arg_for_errors_dictlist(self): + # GH#25905 + df = DataFrame( + [ + {"a": "1", "b": "16.5%", "c": "test"}, + {"a": "2.2", "b": "15.3", "c": "another_test"}, + ] + ) + expected = DataFrame( + [ + {"a": 1.0, "b": "16.5%", "c": "test"}, + {"a": 2.2, "b": "15.3", "c": "another_test"}, + ] + ) + type_dict = {"a": "float64", "b": "float64", "c": "object"} + + result = df.astype(dtype=type_dict, errors="ignore") + + tm.assert_frame_equal(result, expected) + + def test_astype_dt64tz(self, timezone_frame): + # astype + expected = np.array( + [ + [ + Timestamp("2013-01-01 00:00:00"), + Timestamp("2013-01-02 00:00:00"), + Timestamp("2013-01-03 00:00:00"), + ], + [ + Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"), + NaT, + Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"), + ], + [ + Timestamp("2013-01-01 00:00:00+0100", tz="CET"), + NaT, + Timestamp("2013-01-03 00:00:00+0100", tz="CET"), + ], + ], + dtype=object, + ).T + expected = DataFrame( + expected, + index=timezone_frame.index, + columns=timezone_frame.columns, + dtype=object, + ) + result = timezone_frame.astype(object) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + # dt64tz->dt64 deprecated + result = timezone_frame.astype("datetime64[ns]") + expected = DataFrame( + { + "A": date_range("20130101", periods=3), + "B": ( + date_range("20130101", periods=3, tz="US/Eastern") + .tz_convert("UTC") + .tz_localize(None) + ), + "C": ( + date_range("20130101", periods=3, tz="CET") + .tz_convert("UTC") + .tz_localize(None) + ), + } + ) + expected.iloc[1, 1] = NaT + expected.iloc[1, 2] = NaT + tm.assert_frame_equal(result, expected) + + def test_astype_dt64tz_to_str(self, timezone_frame): + # str formatting + result = timezone_frame.astype(str) + expected = DataFrame( + [ + [ + "2013-01-01", + "2013-01-01 00:00:00-05:00", + "2013-01-01 00:00:00+01:00", + ], + ["2013-01-02", "NaT", "NaT"], + [ + "2013-01-03", + "2013-01-03 00:00:00-05:00", + "2013-01-03 00:00:00+01:00", + ], + ], + columns=timezone_frame.columns, + ) + tm.assert_frame_equal(result, expected) + + with option_context("display.max_columns", 20): + result = str(timezone_frame) + assert ( + "0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00" + ) in result + assert ( + "1 2013-01-02 NaT NaT" + ) in result + assert ( + "2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00" + ) in result + + def test_astype_empty_dtype_dict(self): + # issue mentioned further down in the following issue's thread + # https://github.com/pandas-dev/pandas/issues/33113 + df = DataFrame() + result = df.astype({}) + tm.assert_frame_equal(result, df) + assert result is not df + + @pytest.mark.parametrize( + "data, dtype", + [ + (["x", "y", "z"], "string[python]"), + pytest.param( + ["x", "y", "z"], + "string[pyarrow]", + marks=td.skip_if_no("pyarrow", min_version="1.0.0"), + ), + (["x", "y", "z"], "category"), + (3 * [Timestamp("2020-01-01", tz="UTC")], None), + (3 * [Interval(0, 1)], None), + ], + ) + @pytest.mark.parametrize("errors", ["raise", "ignore"]) + def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors): + # https://github.com/pandas-dev/pandas/issues/35471 + df = DataFrame(Series(data, dtype=dtype)) + if errors == "ignore": + expected = df + result = df.astype(float, errors=errors) + tm.assert_frame_equal(result, expected) + else: + msg = "(Cannot cast)|(could not convert)" + with pytest.raises((ValueError, TypeError), match=msg): + df.astype(float, errors=errors) + + def test_astype_tz_conversion(self): + # GH 35973 + val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} + df = DataFrame(val) + result = df.astype({"tz": "datetime64[ns, Europe/Berlin]"}) + + expected = df + expected["tz"] = expected["tz"].dt.tz_convert("Europe/Berlin") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz", ["UTC", "Europe/Berlin"]) + def test_astype_tz_object_conversion(self, tz): + # GH 35973 + val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} + expected = DataFrame(val) + + # convert expected to object dtype from other tz str (independently tested) + result = expected.astype({"tz": f"datetime64[ns, {tz}]"}) + result = result.astype({"tz": "object"}) + + # do real test: object dtype to a specified tz, different from construction tz. + result = result.astype({"tz": "datetime64[ns, Europe/London]"}) + tm.assert_frame_equal(result, expected) + + def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture): + # GH#41409 + tz = tz_naive_fixture + + dti = date_range("2016-01-01", periods=3, tz=tz) + dta = dti._data + dta[0] = NaT + + obj = frame_or_series(dta) + result = obj.astype("string") + + # Check that Series/DataFrame.astype matches DatetimeArray.astype + expected = frame_or_series(dta.astype("string")) + tm.assert_equal(result, expected) + + item = result.iloc[0] + if frame_or_series is DataFrame: + item = item.iloc[0] + assert item is pd.NA + + # For non-NA values, we should match what we get for non-EA str + alt = obj.astype(str) + assert np.all(alt.iloc[1:] == result.iloc[1:]) + + def test_astype_td64_to_string(self, frame_or_series): + # GH#41409 + tdi = pd.timedelta_range("1 Day", periods=3) + obj = frame_or_series(tdi) + + expected = frame_or_series(["1 days", "2 days", "3 days"], dtype="string") + result = obj.astype("string") + tm.assert_equal(result, expected) + + def test_astype_bytes(self): + # GH#39474 + result = DataFrame(["foo", "bar", "baz"]).astype(bytes) + assert result.dtypes[0] == np.dtype("S3") + + @pytest.mark.parametrize( + "index_slice", + [ + np.s_[:2, :2], + np.s_[:1, :2], + np.s_[:2, :1], + np.s_[::2, ::2], + np.s_[::1, ::2], + np.s_[::2, ::1], + ], + ) + def test_astype_noncontiguous(self, index_slice): + # GH#42396 + data = np.arange(16).reshape(4, 4) + df = DataFrame(data) + + result = df.iloc[index_slice].astype("int16") + expected = df.iloc[index_slice] + tm.assert_frame_equal(result, expected, check_dtype=False) + + def test_astype_retain_attrs(self, any_numpy_dtype): + # GH#44414 + df = DataFrame({"a": [0, 1, 2], "b": [3, 4, 5]}) + df.attrs["Location"] = "Michigan" + + result = df.astype({"a": any_numpy_dtype}).attrs + expected = df.attrs + + tm.assert_dict_equal(expected, result) + + +class TestAstypeCategorical: + def test_astype_from_categorical3(self): + df = DataFrame({"cats": [1, 2, 3, 4, 5, 6], "vals": [1, 2, 3, 4, 5, 6]}) + cats = Categorical([1, 2, 3, 4, 5, 6]) + exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) + df["cats"] = df["cats"].astype("category") + tm.assert_frame_equal(exp_df, df) + + def test_astype_from_categorical4(self): + df = DataFrame( + {"cats": ["a", "b", "b", "a", "a", "d"], "vals": [1, 2, 3, 4, 5, 6]} + ) + cats = Categorical(["a", "b", "b", "a", "a", "d"]) + exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) + df["cats"] = df["cats"].astype("category") + tm.assert_frame_equal(exp_df, df) + + def test_categorical_astype_to_int(self, any_int_dtype): + # GH#39402 + + df = DataFrame(data={"col1": pd.array([2.0, 1.0, 3.0])}) + df.col1 = df.col1.astype("category") + df.col1 = df.col1.astype(any_int_dtype) + expected = DataFrame({"col1": pd.array([2, 1, 3], dtype=any_int_dtype)}) + tm.assert_frame_equal(df, expected) + + def test_astype_categorical_to_string_missing(self): + # https://github.com/pandas-dev/pandas/issues/41797 + df = DataFrame(["a", "b", np.nan]) + expected = df.astype(str) + cat = df.astype("category") + result = cat.astype(str) + tm.assert_frame_equal(result, expected) + + +class IntegerArrayNoCopy(pd.core.arrays.IntegerArray): + # GH 42501 + + @classmethod + def _from_sequence(cls, scalars, *, dtype=None, copy=False): + values, mask = coerce_to_array(scalars, dtype=dtype, copy=copy) + return IntegerArrayNoCopy(values, mask) + + def copy(self): + assert False + + +class Int16DtypeNoCopy(pd.Int16Dtype): + # GH 42501 + + @classmethod + def construct_array_type(cls): + return IntegerArrayNoCopy + + +def test_frame_astype_no_copy(): + # GH 42501 + df = DataFrame({"a": [1, 4, None, 5], "b": [6, 7, 8, 9]}, dtype=object) + result = df.astype({"a": Int16DtypeNoCopy()}, copy=False) + + assert result.a.dtype == pd.Int16Dtype() + assert np.shares_memory(df.b.values, result.b.values) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_at_time.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_at_time.py new file mode 100644 index 0000000..8537c32 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_at_time.py @@ -0,0 +1,124 @@ +from datetime import time + +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import timezones + +from pandas import ( + DataFrame, + date_range, +) +import pandas._testing as tm + + +class TestAtTime: + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_localized_at_time(self, tzstr, frame_or_series): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range("4/16/2012", "5/1/2012", freq="H") + ts = frame_or_series(np.random.randn(len(rng)), index=rng) + + ts_local = ts.tz_localize(tzstr) + + result = ts_local.at_time(time(10, 0)) + expected = ts.at_time(time(10, 0)).tz_localize(tzstr) + tm.assert_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + def test_at_time(self, frame_or_series): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = tm.get_obj(ts, frame_or_series) + rs = ts.at_time(rng[1]) + assert (rs.index.hour == rng[1].hour).all() + assert (rs.index.minute == rng[1].minute).all() + assert (rs.index.second == rng[1].second).all() + + result = ts.at_time("9:30") + expected = ts.at_time(time(9, 30)) + tm.assert_equal(result, expected) + + def test_at_time_midnight(self, frame_or_series): + # midnight, everything + rng = date_range("1/1/2000", "1/31/2000") + ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + ts = tm.get_obj(ts, frame_or_series) + + result = ts.at_time(time(0, 0)) + tm.assert_equal(result, ts) + + def test_at_time_nonexistent(self, frame_or_series): + # time doesn't exist + rng = date_range("1/1/2012", freq="23Min", periods=384) + ts = DataFrame(np.random.randn(len(rng)), rng) + ts = tm.get_obj(ts, frame_or_series) + rs = ts.at_time("16:00") + assert len(rs) == 0 + + @pytest.mark.parametrize( + "hour", ["1:00", "1:00AM", time(1), time(1, tzinfo=pytz.UTC)] + ) + def test_at_time_errors(self, hour): + # GH#24043 + dti = date_range("2018", periods=3, freq="H") + df = DataFrame(list(range(len(dti))), index=dti) + if getattr(hour, "tzinfo", None) is None: + result = df.at_time(hour) + expected = df.iloc[1:2] + tm.assert_frame_equal(result, expected) + else: + with pytest.raises(ValueError, match="Index must be timezone"): + df.at_time(hour) + + def test_at_time_tz(self): + # GH#24043 + dti = date_range("2018", periods=3, freq="H", tz="US/Pacific") + df = DataFrame(list(range(len(dti))), index=dti) + result = df.at_time(time(4, tzinfo=pytz.timezone("US/Eastern"))) + expected = df.iloc[1:2] + tm.assert_frame_equal(result, expected) + + def test_at_time_raises(self, frame_or_series): + # GH#20725 + obj = DataFrame([[1, 2, 3], [4, 5, 6]]) + obj = tm.get_obj(obj, frame_or_series) + msg = "Index must be DatetimeIndex" + with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex + obj.at_time("00:00") + + @pytest.mark.parametrize("axis", ["index", "columns", 0, 1]) + def test_at_time_axis(self, axis): + # issue 8839 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), len(rng))) + ts.index, ts.columns = rng, rng + + indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)] + + if axis in ["index", 0]: + expected = ts.loc[indices, :] + elif axis in ["columns", 1]: + expected = ts.loc[:, indices] + + result = ts.at_time("9:30", axis=axis) + + # Without clearing freq, result has freq 1440T and expected 5T + result.index = result.index._with_freq(None) + expected.index = expected.index._with_freq(None) + tm.assert_frame_equal(result, expected) + + def test_at_time_datetimeindex(self): + index = date_range("2012-01-01", "2012-01-05", freq="30min") + df = DataFrame(np.random.randn(len(index), 5), index=index) + akey = time(12, 0, 0) + ainds = [24, 72, 120, 168] + + result = df.at_time(akey) + expected = df.loc[akey] + expected2 = df.iloc[ainds] + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected2) + assert len(result) == 4 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_between_time.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_between_time.py new file mode 100644 index 0000000..d8a742c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_between_time.py @@ -0,0 +1,289 @@ +from datetime import ( + datetime, + time, +) + +import numpy as np +import pytest + +from pandas._libs.tslibs import timezones +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +class TestBetweenTime: + @td.skip_if_has_locale + def test_between_time_formats(self, frame_or_series): + # GH#11818 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = tm.get_obj(ts, frame_or_series) + + strings = [ + ("2:00", "2:30"), + ("0200", "0230"), + ("2:00am", "2:30am"), + ("0200am", "0230am"), + ("2:00:00", "2:30:00"), + ("020000", "023000"), + ("2:00:00am", "2:30:00am"), + ("020000am", "023000am"), + ] + expected_length = 28 + + for time_string in strings: + assert len(ts.between_time(*time_string)) == expected_length + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_localized_between_time(self, tzstr, frame_or_series): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range("4/16/2012", "5/1/2012", freq="H") + ts = Series(np.random.randn(len(rng)), index=rng) + if frame_or_series is DataFrame: + ts = ts.to_frame() + + ts_local = ts.tz_localize(tzstr) + + t1, t2 = time(10, 0), time(11, 0) + result = ts_local.between_time(t1, t2) + expected = ts.between_time(t1, t2).tz_localize(tzstr) + tm.assert_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + def test_between_time_types(self, frame_or_series): + # GH11818 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + obj = DataFrame({"A": 0}, index=rng) + obj = tm.get_obj(obj, frame_or_series) + + msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" + with pytest.raises(ValueError, match=msg): + obj.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + def test_between_time(self, inclusive_endpoints_fixture, frame_or_series): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = tm.get_obj(ts, frame_or_series) + + stime = time(0, 0) + etime = time(1, 0) + inclusive = inclusive_endpoints_fixture + + filtered = ts.between_time(stime, etime, inclusive=inclusive) + exp_len = 13 * 4 + 1 + + if inclusive in ["right", "neither"]: + exp_len -= 5 + if inclusive in ["left", "neither"]: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inclusive in ["left", "both"]: + assert t >= stime + else: + assert t > stime + + if inclusive in ["right", "both"]: + assert t <= etime + else: + assert t < etime + + result = ts.between_time("00:00", "01:00") + expected = ts.between_time(stime, etime) + tm.assert_equal(result, expected) + + # across midnight + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = tm.get_obj(ts, frame_or_series) + stime = time(22, 0) + etime = time(9, 0) + + filtered = ts.between_time(stime, etime, inclusive=inclusive) + exp_len = (12 * 11 + 1) * 4 + 1 + if inclusive in ["right", "neither"]: + exp_len -= 4 + if inclusive in ["left", "neither"]: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inclusive in ["left", "both"]: + assert (t >= stime) or (t <= etime) + else: + assert (t > stime) or (t <= etime) + + if inclusive in ["right", "both"]: + assert (t <= etime) or (t >= stime) + else: + assert (t < etime) or (t >= stime) + + def test_between_time_raises(self, frame_or_series): + # GH#20725 + obj = DataFrame([[1, 2, 3], [4, 5, 6]]) + obj = tm.get_obj(obj, frame_or_series) + + msg = "Index must be DatetimeIndex" + with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex + obj.between_time(start_time="00:00", end_time="12:00") + + def test_between_time_axis(self, frame_or_series): + # GH#8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + ts = Series(np.random.randn(len(rng)), index=rng) + if frame_or_series is DataFrame: + ts = ts.to_frame() + + stime, etime = ("08:00:00", "09:00:00") + expected_length = 7 + + assert len(ts.between_time(stime, etime)) == expected_length + assert len(ts.between_time(stime, etime, axis=0)) == expected_length + msg = f"No axis named {ts.ndim} for object type {type(ts).__name__}" + with pytest.raises(ValueError, match=msg): + ts.between_time(stime, etime, axis=ts.ndim) + + def test_between_time_axis_aliases(self, axis): + # GH#8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + ts = DataFrame(np.random.randn(len(rng), len(rng))) + stime, etime = ("08:00:00", "09:00:00") + exp_len = 7 + + if axis in ["index", 0]: + ts.index = rng + assert len(ts.between_time(stime, etime)) == exp_len + assert len(ts.between_time(stime, etime, axis=0)) == exp_len + + if axis in ["columns", 1]: + ts.columns = rng + selected = ts.between_time(stime, etime, axis=1).columns + assert len(selected) == exp_len + + def test_between_time_axis_raises(self, axis): + # issue 8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + mask = np.arange(0, len(rng)) + rand_data = np.random.randn(len(rng), len(rng)) + ts = DataFrame(rand_data, index=rng, columns=rng) + stime, etime = ("08:00:00", "09:00:00") + + msg = "Index must be DatetimeIndex" + if axis in ["columns", 1]: + ts.index = mask + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime) + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime, axis=0) + + if axis in ["index", 0]: + ts.columns = mask + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime, axis=1) + + def test_between_time_datetimeindex(self): + index = date_range("2012-01-01", "2012-01-05", freq="30min") + df = DataFrame(np.random.randn(len(index), 5), index=index) + bkey = slice(time(13, 0, 0), time(14, 0, 0)) + binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172] + + result = df.between_time(bkey.start, bkey.stop) + expected = df.loc[bkey] + expected2 = df.iloc[binds] + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected2) + assert len(result) == 12 + + @pytest.mark.parametrize("include_start", [True, False]) + @pytest.mark.parametrize("include_end", [True, False]) + def test_between_time_warn(self, include_start, include_end, frame_or_series): + # GH40245 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = tm.get_obj(ts, frame_or_series) + + stime = time(0, 0) + etime = time(1, 0) + + match = ( + "`include_start` and `include_end` " + "are deprecated in favour of `inclusive`." + ) + with tm.assert_produces_warning(FutureWarning, match=match): + _ = ts.between_time(stime, etime, include_start, include_end) + + def test_between_time_incorr_arg_inclusive(self): + # GH40245 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + + stime = time(0, 0) + etime = time(1, 0) + inclusive = "bad_string" + msg = "Inclusive has to be either 'both', 'neither', 'left' or 'right'" + with pytest.raises(ValueError, match=msg): + ts.between_time(stime, etime, inclusive=inclusive) + + @pytest.mark.parametrize( + "include_start, include_end", [(True, None), (True, True), (None, True)] + ) + def test_between_time_incompatiable_args_given(self, include_start, include_end): + # GH40245 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + + stime = time(0, 0) + etime = time(1, 0) + msg = ( + "Deprecated arguments `include_start` and `include_end` cannot be " + "passed if `inclusive` has been given." + ) + with pytest.raises(ValueError, match=msg): + ts.between_time(stime, etime, include_start, include_end, inclusive="left") + + def test_between_time_same_functionality_old_and_new_args(self): + # GH40245 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(0, 0) + etime = time(1, 0) + match = ( + "`include_start` and `include_end` " + "are deprecated in favour of `inclusive`." + ) + + result = ts.between_time(stime, etime) + expected = ts.between_time(stime, etime, inclusive="both") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=match): + result = ts.between_time(stime, etime, include_start=False) + expected = ts.between_time(stime, etime, inclusive="right") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=match): + result = ts.between_time(stime, etime, include_end=False) + expected = ts.between_time(stime, etime, inclusive="left") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=match): + result = ts.between_time( + stime, etime, include_start=False, include_end=False + ) + expected = ts.between_time(stime, etime, inclusive="neither") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=match): + result = ts.between_time(stime, etime, include_start=True, include_end=True) + expected = ts.between_time(stime, etime, inclusive="both") + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_clip.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_clip.py new file mode 100644 index 0000000..e692948 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_clip.py @@ -0,0 +1,180 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestDataFrameClip: + def test_clip(self, float_frame): + median = float_frame.median().median() + original = float_frame.copy() + + double = float_frame.clip(upper=median, lower=median) + assert not (double.values != median).any() + + # Verify that float_frame was not changed inplace + assert (float_frame.values == original.values).all() + + def test_inplace_clip(self, float_frame): + # GH#15388 + median = float_frame.median().median() + frame_copy = float_frame.copy() + + return_value = frame_copy.clip(upper=median, lower=median, inplace=True) + assert return_value is None + assert not (frame_copy.values != median).any() + + def test_dataframe_clip(self): + # GH#2747 + df = DataFrame(np.random.randn(1000, 2)) + + for lb, ub in [(-1, 1), (1, -1)]: + clipped_df = df.clip(lb, ub) + + lb, ub = min(lb, ub), max(ub, lb) + lb_mask = df.values <= lb + ub_mask = df.values >= ub + mask = ~lb_mask & ~ub_mask + assert (clipped_df.values[lb_mask] == lb).all() + assert (clipped_df.values[ub_mask] == ub).all() + assert (clipped_df.values[mask] == df.values[mask]).all() + + def test_clip_mixed_numeric(self): + # clip on mixed integer or floats + # GH#24162, clipping now preserves numeric types per column + df = DataFrame({"A": [1, 2, 3], "B": [1.0, np.nan, 3.0]}) + result = df.clip(1, 2) + expected = DataFrame({"A": [1, 2, 2], "B": [1.0, np.nan, 2.0]}) + tm.assert_frame_equal(result, expected) + + df = DataFrame([[1, 2, 3.4], [3, 4, 5.6]], columns=["foo", "bar", "baz"]) + expected = df.dtypes + result = df.clip(upper=3).dtypes + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("inplace", [True, False]) + def test_clip_against_series(self, inplace): + # GH#6966 + + df = DataFrame(np.random.randn(1000, 2)) + lb = Series(np.random.randn(1000)) + ub = lb + 1 + + original = df.copy() + clipped_df = df.clip(lb, ub, axis=0, inplace=inplace) + + if inplace: + clipped_df = df + + for i in range(2): + lb_mask = original.iloc[:, i] <= lb + ub_mask = original.iloc[:, i] >= ub + mask = ~lb_mask & ~ub_mask + + result = clipped_df.loc[lb_mask, i] + tm.assert_series_equal(result, lb[lb_mask], check_names=False) + assert result.name == i + + result = clipped_df.loc[ub_mask, i] + tm.assert_series_equal(result, ub[ub_mask], check_names=False) + assert result.name == i + + tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i]) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])]) + @pytest.mark.parametrize( + "axis,res", + [ + (0, [[2.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 7.0, 7.0]]), + (1, [[2.0, 3.0, 4.0], [4.0, 5.0, 6.0], [5.0, 6.0, 7.0]]), + ], + ) + def test_clip_against_list_like(self, simple_frame, inplace, lower, axis, res): + # GH#15390 + original = simple_frame.copy(deep=True) + + result = original.clip(lower=lower, upper=[5, 6, 7], axis=axis, inplace=inplace) + + expected = DataFrame(res, columns=original.columns, index=original.index) + if inplace: + result = original + tm.assert_frame_equal(result, expected, check_exact=True) + + @pytest.mark.parametrize("axis", [0, 1, None]) + def test_clip_against_frame(self, axis): + df = DataFrame(np.random.randn(1000, 2)) + lb = DataFrame(np.random.randn(1000, 2)) + ub = lb + 1 + + clipped_df = df.clip(lb, ub, axis=axis) + + lb_mask = df <= lb + ub_mask = df >= ub + mask = ~lb_mask & ~ub_mask + + tm.assert_frame_equal(clipped_df[lb_mask], lb[lb_mask]) + tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask]) + tm.assert_frame_equal(clipped_df[mask], df[mask]) + + def test_clip_against_unordered_columns(self): + # GH#20911 + df1 = DataFrame(np.random.randn(1000, 4), columns=["A", "B", "C", "D"]) + df2 = DataFrame(np.random.randn(1000, 4), columns=["D", "A", "B", "C"]) + df3 = DataFrame(df2.values - 1, columns=["B", "D", "C", "A"]) + result_upper = df1.clip(lower=0, upper=df2) + expected_upper = df1.clip(lower=0, upper=df2[df1.columns]) + result_lower = df1.clip(lower=df3, upper=3) + expected_lower = df1.clip(lower=df3[df1.columns], upper=3) + result_lower_upper = df1.clip(lower=df3, upper=df2) + expected_lower_upper = df1.clip(lower=df3[df1.columns], upper=df2[df1.columns]) + tm.assert_frame_equal(result_upper, expected_upper) + tm.assert_frame_equal(result_lower, expected_lower) + tm.assert_frame_equal(result_lower_upper, expected_lower_upper) + + def test_clip_with_na_args(self, float_frame, using_array_manager): + """Should process np.nan argument as None""" + # GH#17276 + tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) + tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame) + + # GH#19992 and adjusted in GH#40420 + df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}) + + result = df.clip(lower=[4, 5, np.nan], axis=0) + expected = DataFrame( + {"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]} + ) + tm.assert_frame_equal(result, expected) + + warn = FutureWarning if using_array_manager else None + with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"): + result = df.clip(lower=[4, 5, np.nan], axis=1) + expected = DataFrame( + {"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [7, 8, 9]} + ) + tm.assert_frame_equal(result, expected) + + # GH#40420 + data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]} + df = DataFrame(data) + t = Series([2, -4, np.NaN, 6, 3]) + result = df.clip(lower=t, axis=0) + expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]}) + tm.assert_frame_equal(result, expected) + + def test_clip_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame.clip except " + r"for the arguments 'lower' and 'upper' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.clip(0, 1, 0) + expected = DataFrame({"a": [1, 1, 1]}) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_combine.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_combine.py new file mode 100644 index 0000000..bc6a67e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_combine.py @@ -0,0 +1,47 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestCombine: + @pytest.mark.parametrize( + "data", + [ + pd.date_range("2000", periods=4), + pd.date_range("2000", periods=4, tz="US/Central"), + pd.period_range("2000", periods=4), + pd.timedelta_range(0, periods=4), + ], + ) + def test_combine_datetlike_udf(self, data): + # GH#23079 + df = pd.DataFrame({"A": data}) + other = df.copy() + df.iloc[1, 0] = None + + def combiner(a, b): + return b + + result = df.combine(other, combiner) + tm.assert_frame_equal(result, other) + + def test_combine_generic(self, float_frame): + df1 = float_frame + df2 = float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]] + + combined = df1.combine(df2, np.add) + combined2 = df2.combine(df1, np.add) + assert combined["D"].isna().all() + assert combined2["D"].isna().all() + + chunk = combined.loc[combined.index[:-5], ["A", "B", "C"]] + chunk2 = combined2.loc[combined2.index[:-5], ["A", "B", "C"]] + + exp = ( + float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]].reindex_like(chunk) + * 2 + ) + tm.assert_frame_equal(chunk, exp) + tm.assert_frame_equal(chunk2, exp) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_combine_first.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_combine_first.py new file mode 100644 index 0000000..daddca7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_combine_first.py @@ -0,0 +1,528 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import ( + find_common_type, + is_dtype_equal, +) + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestDataFrameCombineFirst: + def test_combine_first_mixed(self): + a = Series(["a", "b"], index=range(2)) + b = Series(range(2), index=range(2)) + f = DataFrame({"A": a, "B": b}) + + a = Series(["a", "b"], index=range(5, 7)) + b = Series(range(2), index=range(5, 7)) + g = DataFrame({"A": a, "B": b}) + + exp = DataFrame({"A": list("abab"), "B": [0, 1, 0, 1]}, index=[0, 1, 5, 6]) + combined = f.combine_first(g) + tm.assert_frame_equal(combined, exp) + + def test_combine_first(self, float_frame): + # disjoint + head, tail = float_frame[:5], float_frame[5:] + + combined = head.combine_first(tail) + reordered_frame = float_frame.reindex(combined.index) + tm.assert_frame_equal(combined, reordered_frame) + assert tm.equalContents(combined.columns, float_frame.columns) + tm.assert_series_equal(combined["A"], reordered_frame["A"]) + + # same index + fcopy = float_frame.copy() + fcopy["A"] = 1 + del fcopy["C"] + + fcopy2 = float_frame.copy() + fcopy2["B"] = 0 + del fcopy2["D"] + + combined = fcopy.combine_first(fcopy2) + + assert (combined["A"] == 1).all() + tm.assert_series_equal(combined["B"], fcopy["B"]) + tm.assert_series_equal(combined["C"], fcopy2["C"]) + tm.assert_series_equal(combined["D"], fcopy["D"]) + + # overlap + head, tail = reordered_frame[:10].copy(), reordered_frame + head["A"] = 1 + + combined = head.combine_first(tail) + assert (combined["A"][:10] == 1).all() + + # reverse overlap + tail["A"][:10] = 0 + combined = tail.combine_first(head) + assert (combined["A"][:10] == 0).all() + + # no overlap + f = float_frame[:10] + g = float_frame[10:] + combined = f.combine_first(g) + tm.assert_series_equal(combined["A"].reindex(f.index), f["A"]) + tm.assert_series_equal(combined["A"].reindex(g.index), g["A"]) + + # corner cases + comb = float_frame.combine_first(DataFrame()) + tm.assert_frame_equal(comb, float_frame) + + comb = DataFrame().combine_first(float_frame) + tm.assert_frame_equal(comb, float_frame) + + comb = float_frame.combine_first(DataFrame(index=["faz", "boo"])) + assert "faz" in comb.index + + # #2525 + df = DataFrame({"a": [1]}, index=[datetime(2012, 1, 1)]) + df2 = DataFrame(columns=["b"]) + result = df.combine_first(df2) + assert "b" in result + + def test_combine_first_mixed_bug(self): + idx = Index(["a", "b", "c", "e"]) + ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx) + ser2 = Series(["a", "b", "c", "e"], index=idx) + ser3 = Series([12, 4, 5, 97], index=idx) + + frame1 = DataFrame({"col0": ser1, "col2": ser2, "col3": ser3}) + + idx = Index(["a", "b", "c", "f"]) + ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx) + ser2 = Series(["a", "b", "c", "f"], index=idx) + ser3 = Series([12, 4, 5, 97], index=idx) + + frame2 = DataFrame({"col1": ser1, "col2": ser2, "col5": ser3}) + + combined = frame1.combine_first(frame2) + assert len(combined.columns) == 5 + + def test_combine_first_same_as_in_update(self): + # gh 3016 (same as in update) + df = DataFrame( + [[1.0, 2.0, False, True], [4.0, 5.0, True, False]], + columns=["A", "B", "bool1", "bool2"], + ) + + other = DataFrame([[45, 45]], index=[0], columns=["A", "B"]) + result = df.combine_first(other) + tm.assert_frame_equal(result, df) + + df.loc[0, "A"] = np.nan + result = df.combine_first(other) + df.loc[0, "A"] = 45 + tm.assert_frame_equal(result, df) + + def test_combine_first_doc_example(self): + # doc example + df1 = DataFrame( + {"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]} + ) + + df2 = DataFrame( + { + "A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0], + "B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0], + } + ) + + result = df1.combine_first(df2) + expected = DataFrame({"A": [1, 2, 3, 5, 3, 7.0], "B": [np.nan, 2, 3, 4, 6, 8]}) + tm.assert_frame_equal(result, expected) + + def test_combine_first_return_obj_type_with_bools(self): + # GH3552 + + df1 = DataFrame( + [[np.nan, 3.0, True], [-4.6, np.nan, True], [np.nan, 7.0, False]] + ) + df2 = DataFrame([[-42.6, np.nan, True], [-5.0, 1.6, False]], index=[1, 2]) + + expected = Series([True, True, False], name=2, dtype=bool) + + result_12 = df1.combine_first(df2)[2] + tm.assert_series_equal(result_12, expected) + + result_21 = df2.combine_first(df1)[2] + tm.assert_series_equal(result_21, expected) + + @pytest.mark.parametrize( + "data1, data2, data_expected", + ( + ( + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + [pd.NaT, pd.NaT, pd.NaT], + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + ), + ( + [pd.NaT, pd.NaT, pd.NaT], + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + ), + ( + [datetime(2000, 1, 2), pd.NaT, pd.NaT], + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + [datetime(2000, 1, 2), datetime(2000, 1, 2), datetime(2000, 1, 3)], + ), + ( + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + [datetime(2000, 1, 2), pd.NaT, pd.NaT], + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + ), + ), + ) + def test_combine_first_convert_datatime_correctly( + self, data1, data2, data_expected + ): + # GH 3593 + + df1, df2 = DataFrame({"a": data1}), DataFrame({"a": data2}) + result = df1.combine_first(df2) + expected = DataFrame({"a": data_expected}) + tm.assert_frame_equal(result, expected) + + def test_combine_first_align_nan(self): + # GH 7509 (not fixed) + dfa = DataFrame([[pd.Timestamp("2011-01-01"), 2]], columns=["a", "b"]) + dfb = DataFrame([[4], [5]], columns=["b"]) + assert dfa["a"].dtype == "datetime64[ns]" + assert dfa["b"].dtype == "int64" + + res = dfa.combine_first(dfb) + exp = DataFrame( + {"a": [pd.Timestamp("2011-01-01"), pd.NaT], "b": [2, 5]}, + columns=["a", "b"], + ) + tm.assert_frame_equal(res, exp) + assert res["a"].dtype == "datetime64[ns]" + # TODO: this must be int64 + assert res["b"].dtype == "int64" + + res = dfa.iloc[:0].combine_first(dfb) + exp = DataFrame({"a": [np.nan, np.nan], "b": [4, 5]}, columns=["a", "b"]) + tm.assert_frame_equal(res, exp) + # TODO: this must be datetime64 + assert res["a"].dtype == "float64" + # TODO: this must be int64 + assert res["b"].dtype == "int64" + + def test_combine_first_timezone(self): + # see gh-7630 + data1 = pd.to_datetime("20100101 01:01").tz_localize("UTC") + df1 = DataFrame( + columns=["UTCdatetime", "abc"], + data=data1, + index=pd.date_range("20140627", periods=1), + ) + data2 = pd.to_datetime("20121212 12:12").tz_localize("UTC") + df2 = DataFrame( + columns=["UTCdatetime", "xyz"], + data=data2, + index=pd.date_range("20140628", periods=1), + ) + res = df2[["UTCdatetime"]].combine_first(df1) + exp = DataFrame( + { + "UTCdatetime": [ + pd.Timestamp("2010-01-01 01:01", tz="UTC"), + pd.Timestamp("2012-12-12 12:12", tz="UTC"), + ], + "abc": [pd.Timestamp("2010-01-01 01:01:00", tz="UTC"), pd.NaT], + }, + columns=["UTCdatetime", "abc"], + index=pd.date_range("20140627", periods=2, freq="D"), + ) + assert res["UTCdatetime"].dtype == "datetime64[ns, UTC]" + assert res["abc"].dtype == "datetime64[ns, UTC]" + + tm.assert_frame_equal(res, exp) + + # see gh-10567 + dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="UTC") + df1 = DataFrame({"DATE": dts1}) + dts2 = pd.date_range("2015-01-03", "2015-01-05", tz="UTC") + df2 = DataFrame({"DATE": dts2}) + + res = df1.combine_first(df2) + tm.assert_frame_equal(res, df1) + assert res["DATE"].dtype == "datetime64[ns, UTC]" + + dts1 = pd.DatetimeIndex( + ["2011-01-01", "NaT", "2011-01-03", "2011-01-04"], tz="US/Eastern" + ) + df1 = DataFrame({"DATE": dts1}, index=[1, 3, 5, 7]) + dts2 = pd.DatetimeIndex( + ["2012-01-01", "2012-01-02", "2012-01-03"], tz="US/Eastern" + ) + df2 = DataFrame({"DATE": dts2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = pd.DatetimeIndex( + [ + "2011-01-01", + "2012-01-01", + "NaT", + "2012-01-02", + "2011-01-03", + "2011-01-04", + ], + tz="US/Eastern", + ) + exp = DataFrame({"DATE": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + + # different tz + dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="US/Eastern") + df1 = DataFrame({"DATE": dts1}) + dts2 = pd.date_range("2015-01-03", "2015-01-05") + df2 = DataFrame({"DATE": dts2}) + + # if df1 doesn't have NaN, keep its dtype + res = df1.combine_first(df2) + tm.assert_frame_equal(res, df1) + assert res["DATE"].dtype == "datetime64[ns, US/Eastern]" + + dts1 = pd.date_range("2015-01-01", "2015-01-02", tz="US/Eastern") + df1 = DataFrame({"DATE": dts1}) + dts2 = pd.date_range("2015-01-01", "2015-01-03") + df2 = DataFrame({"DATE": dts2}) + + res = df1.combine_first(df2) + exp_dts = [ + pd.Timestamp("2015-01-01", tz="US/Eastern"), + pd.Timestamp("2015-01-02", tz="US/Eastern"), + pd.Timestamp("2015-01-03"), + ] + exp = DataFrame({"DATE": exp_dts}) + tm.assert_frame_equal(res, exp) + assert res["DATE"].dtype == "object" + + def test_combine_first_timedelta(self): + data1 = pd.TimedeltaIndex(["1 day", "NaT", "3 day", "4day"]) + df1 = DataFrame({"TD": data1}, index=[1, 3, 5, 7]) + data2 = pd.TimedeltaIndex(["10 day", "11 day", "12 day"]) + df2 = DataFrame({"TD": data2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = pd.TimedeltaIndex( + ["1 day", "10 day", "NaT", "11 day", "3 day", "4 day"] + ) + exp = DataFrame({"TD": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + assert res["TD"].dtype == "timedelta64[ns]" + + def test_combine_first_period(self): + data1 = pd.PeriodIndex(["2011-01", "NaT", "2011-03", "2011-04"], freq="M") + df1 = DataFrame({"P": data1}, index=[1, 3, 5, 7]) + data2 = pd.PeriodIndex(["2012-01-01", "2012-02", "2012-03"], freq="M") + df2 = DataFrame({"P": data2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = pd.PeriodIndex( + ["2011-01", "2012-01", "NaT", "2012-02", "2011-03", "2011-04"], freq="M" + ) + exp = DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + assert res["P"].dtype == data1.dtype + + # different freq + dts2 = pd.PeriodIndex(["2012-01-01", "2012-01-02", "2012-01-03"], freq="D") + df2 = DataFrame({"P": dts2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = [ + pd.Period("2011-01", freq="M"), + pd.Period("2012-01-01", freq="D"), + pd.NaT, + pd.Period("2012-01-02", freq="D"), + pd.Period("2011-03", freq="M"), + pd.Period("2011-04", freq="M"), + ] + exp = DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + assert res["P"].dtype == "object" + + def test_combine_first_int(self): + # GH14687 - integer series that do no align exactly + + df1 = DataFrame({"a": [0, 1, 3, 5]}, dtype="int64") + df2 = DataFrame({"a": [1, 4]}, dtype="int64") + + result_12 = df1.combine_first(df2) + expected_12 = DataFrame({"a": [0, 1, 3, 5]}) + tm.assert_frame_equal(result_12, expected_12) + + result_21 = df2.combine_first(df1) + expected_21 = DataFrame({"a": [1, 4, 3, 5]}) + tm.assert_frame_equal(result_21, expected_21) + + @pytest.mark.parametrize("val", [1, 1.0]) + def test_combine_first_with_asymmetric_other(self, val): + # see gh-20699 + df1 = DataFrame({"isNum": [val]}) + df2 = DataFrame({"isBool": [True]}) + + res = df1.combine_first(df2) + exp = DataFrame({"isBool": [True], "isNum": [val]}) + + tm.assert_frame_equal(res, exp) + + def test_combine_first_string_dtype_only_na(self, nullable_string_dtype): + # GH: 37519 + df = DataFrame( + {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype + ) + df2 = DataFrame({"a": ["85"], "b": [pd.NA]}, dtype=nullable_string_dtype) + df.set_index(["a", "b"], inplace=True) + df2.set_index(["a", "b"], inplace=True) + result = df.combine_first(df2) + expected = DataFrame( + {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype + ).set_index(["a", "b"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "scalar1, scalar2", + [ + (datetime(2020, 1, 1), datetime(2020, 1, 2)), + (pd.Period("2020-01-01", "D"), pd.Period("2020-01-02", "D")), + (pd.Timedelta("89 days"), pd.Timedelta("60 min")), + (pd.Interval(left=0, right=1), pd.Interval(left=2, right=3, closed="left")), + ], +) +def test_combine_first_timestamp_bug(scalar1, scalar2, nulls_fixture): + # GH28481 + na_value = nulls_fixture + + frame = DataFrame([[na_value, na_value]], columns=["a", "b"]) + other = DataFrame([[scalar1, scalar2]], columns=["b", "c"]) + + common_dtype = find_common_type([frame.dtypes["b"], other.dtypes["b"]]) + + if is_dtype_equal(common_dtype, "object") or frame.dtypes["b"] == other.dtypes["b"]: + val = scalar1 + else: + val = na_value + + result = frame.combine_first(other) + + expected = DataFrame([[na_value, val, scalar2]], columns=["a", "b", "c"]) + + expected["b"] = expected["b"].astype(common_dtype) + + tm.assert_frame_equal(result, expected) + + +def test_combine_first_timestamp_bug_NaT(): + # GH28481 + frame = DataFrame([[pd.NaT, pd.NaT]], columns=["a", "b"]) + other = DataFrame( + [[datetime(2020, 1, 1), datetime(2020, 1, 2)]], columns=["b", "c"] + ) + + result = frame.combine_first(other) + expected = DataFrame( + [[pd.NaT, datetime(2020, 1, 1), datetime(2020, 1, 2)]], columns=["a", "b", "c"] + ) + + tm.assert_frame_equal(result, expected) + + +def test_combine_first_with_nan_multiindex(): + # gh-36562 + + mi1 = MultiIndex.from_arrays( + [["b", "b", "c", "a", "b", np.nan], [1, 2, 3, 4, 5, 6]], names=["a", "b"] + ) + df = DataFrame({"c": [1, 1, 1, 1, 1, 1]}, index=mi1) + mi2 = MultiIndex.from_arrays( + [["a", "b", "c", "a", "b", "d"], [1, 1, 1, 1, 1, 1]], names=["a", "b"] + ) + s = Series([1, 2, 3, 4, 5, 6], index=mi2) + res = df.combine_first(DataFrame({"d": s})) + mi_expected = MultiIndex.from_arrays( + [ + ["a", "a", "a", "b", "b", "b", "b", "c", "c", "d", np.nan], + [1, 1, 4, 1, 1, 2, 5, 1, 3, 1, 6], + ], + names=["a", "b"], + ) + expected = DataFrame( + { + "c": [np.nan, np.nan, 1, 1, 1, 1, 1, np.nan, 1, np.nan, 1], + "d": [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan], + }, + index=mi_expected, + ) + tm.assert_frame_equal(res, expected) + + +def test_combine_preserve_dtypes(): + # GH7509 + a_column = Series(["a", "b"], index=range(2)) + b_column = Series(range(2), index=range(2)) + df1 = DataFrame({"A": a_column, "B": b_column}) + + c_column = Series(["a", "b"], index=range(5, 7)) + b_column = Series(range(-1, 1), index=range(5, 7)) + df2 = DataFrame({"B": b_column, "C": c_column}) + + expected = DataFrame( + { + "A": ["a", "b", np.nan, np.nan], + "B": [0, 1, -1, 0], + "C": [np.nan, np.nan, "a", "b"], + }, + index=[0, 1, 5, 6], + ) + combined = df1.combine_first(df2) + tm.assert_frame_equal(combined, expected) + + +def test_combine_first_duplicates_rows_for_nan_index_values(): + # GH39881 + df1 = DataFrame( + {"x": [9, 10, 11]}, + index=MultiIndex.from_arrays([[1, 2, 3], [np.nan, 5, 6]], names=["a", "b"]), + ) + + df2 = DataFrame( + {"y": [12, 13, 14]}, + index=MultiIndex.from_arrays([[1, 2, 4], [np.nan, 5, 7]], names=["a", "b"]), + ) + + expected = DataFrame( + { + "x": [9.0, 10.0, 11.0, np.nan], + "y": [12.0, 13.0, np.nan, 14.0], + }, + index=MultiIndex.from_arrays( + [[1, 2, 3, 4], [np.nan, 5.0, 6.0, 7.0]], names=["a", "b"] + ), + ) + combined = df1.combine_first(df2) + tm.assert_frame_equal(combined, expected) + + +def test_combine_first_int64_not_cast_to_float64(): + # GH 28613 + df_1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df_2 = DataFrame({"A": [1, 20, 30], "B": [40, 50, 60], "C": [12, 34, 65]}) + result = df_1.combine_first(df_2) + expected = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [12, 34, 65]}) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_compare.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_compare.py new file mode 100644 index 0000000..468811e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_compare.py @@ -0,0 +1,182 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"]) +def test_compare_axis(align_axis): + # GH#30429 + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df.copy() + df2.loc[0, "col1"] = "c" + df2.loc[2, "col3"] = 4.0 + + result = df.compare(df2, align_axis=align_axis) + + if align_axis in (1, "columns"): + indices = pd.Index([0, 2]) + columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]]) + expected = pd.DataFrame( + [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, 4.0]], + index=indices, + columns=columns, + ) + else: + indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]]) + columns = pd.Index(["col1", "col3"]) + expected = pd.DataFrame( + [["a", np.nan], ["c", np.nan], [np.nan, 3.0], [np.nan, 4.0]], + index=indices, + columns=columns, + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "keep_shape, keep_equal", + [ + (True, False), + (False, True), + (True, True), + # False, False case is already covered in test_compare_axis + ], +) +def test_compare_various_formats(keep_shape, keep_equal): + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df.copy() + df2.loc[0, "col1"] = "c" + df2.loc[2, "col3"] = 4.0 + + result = df.compare(df2, keep_shape=keep_shape, keep_equal=keep_equal) + + if keep_shape: + indices = pd.Index([0, 1, 2]) + columns = pd.MultiIndex.from_product( + [["col1", "col2", "col3"], ["self", "other"]] + ) + if keep_equal: + expected = pd.DataFrame( + [ + ["a", "c", 1.0, 1.0, 1.0, 1.0], + ["b", "b", 2.0, 2.0, 2.0, 2.0], + ["c", "c", np.nan, np.nan, 3.0, 4.0], + ], + index=indices, + columns=columns, + ) + else: + expected = pd.DataFrame( + [ + ["a", "c", np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, 3.0, 4.0], + ], + index=indices, + columns=columns, + ) + else: + indices = pd.Index([0, 2]) + columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]]) + expected = pd.DataFrame( + [["a", "c", 1.0, 1.0], ["c", "c", 3.0, 4.0]], index=indices, columns=columns + ) + tm.assert_frame_equal(result, expected) + + +def test_compare_with_equal_nulls(): + # We want to make sure two NaNs are considered the same + # and dropped where applicable + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df.copy() + df2.loc[0, "col1"] = "c" + + result = df.compare(df2) + indices = pd.Index([0]) + columns = pd.MultiIndex.from_product([["col1"], ["self", "other"]]) + expected = pd.DataFrame([["a", "c"]], index=indices, columns=columns) + tm.assert_frame_equal(result, expected) + + +def test_compare_with_non_equal_nulls(): + # We want to make sure the relevant NaNs do not get dropped + # even if the entire row or column are NaNs + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df.copy() + df2.loc[0, "col1"] = "c" + df2.loc[2, "col3"] = np.nan + + result = df.compare(df2) + + indices = pd.Index([0, 2]) + columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]]) + expected = pd.DataFrame( + [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, np.nan]], + index=indices, + columns=columns, + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("align_axis", [0, 1]) +def test_compare_multi_index(align_axis): + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]} + ) + df.columns = pd.MultiIndex.from_arrays([["a", "a", "b"], ["col1", "col2", "col3"]]) + df.index = pd.MultiIndex.from_arrays([["x", "x", "y"], [0, 1, 2]]) + + df2 = df.copy() + df2.iloc[0, 0] = "c" + df2.iloc[2, 2] = 4.0 + + result = df.compare(df2, align_axis=align_axis) + + if align_axis == 0: + indices = pd.MultiIndex.from_arrays( + [["x", "x", "y", "y"], [0, 0, 2, 2], ["self", "other", "self", "other"]] + ) + columns = pd.MultiIndex.from_arrays([["a", "b"], ["col1", "col3"]]) + data = [["a", np.nan], ["c", np.nan], [np.nan, 3.0], [np.nan, 4.0]] + else: + indices = pd.MultiIndex.from_arrays([["x", "y"], [0, 2]]) + columns = pd.MultiIndex.from_arrays( + [ + ["a", "a", "b", "b"], + ["col1", "col1", "col3", "col3"], + ["self", "other", "self", "other"], + ] + ) + data = [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, 4.0]] + + expected = pd.DataFrame(data=data, index=indices, columns=columns) + tm.assert_frame_equal(result, expected) + + +def test_compare_unaligned_objects(): + # test DataFrames with different indices + msg = "Can only compare identically-labeled DataFrame objects" + with pytest.raises(ValueError, match=msg): + df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"]) + df2 = pd.DataFrame([1, 2, 3], index=["a", "b", "d"]) + df1.compare(df2) + + # test DataFrames with different shapes + msg = "Can only compare identically-labeled DataFrame objects" + with pytest.raises(ValueError, match=msg): + df1 = pd.DataFrame(np.ones((3, 3))) + df2 = pd.DataFrame(np.zeros((2, 1))) + df1.compare(df2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_convert.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_convert.py new file mode 100644 index 0000000..118af9f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_convert.py @@ -0,0 +1,59 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestConvert: + def test_convert_objects(self, float_string_frame): + + oops = float_string_frame.T.T + converted = oops._convert(datetime=True) + tm.assert_frame_equal(converted, float_string_frame) + assert converted["A"].dtype == np.float64 + + # force numeric conversion + float_string_frame["H"] = "1." + float_string_frame["I"] = "1" + + # add in some items that will be nan + length = len(float_string_frame) + float_string_frame["J"] = "1." + float_string_frame["K"] = "1" + float_string_frame.loc[float_string_frame.index[0:5], ["J", "K"]] = "garbled" + converted = float_string_frame._convert(datetime=True, numeric=True) + assert converted["H"].dtype == "float64" + assert converted["I"].dtype == "int64" + assert converted["J"].dtype == "float64" + assert converted["K"].dtype == "float64" + assert len(converted["J"].dropna()) == length - 5 + assert len(converted["K"].dropna()) == length - 5 + + # via astype + converted = float_string_frame.copy() + converted["H"] = converted["H"].astype("float64") + converted["I"] = converted["I"].astype("int64") + assert converted["H"].dtype == "float64" + assert converted["I"].dtype == "int64" + + # via astype, but errors + converted = float_string_frame.copy() + with pytest.raises(ValueError, match="invalid literal"): + converted["H"].astype("int32") + + def test_convert_mixed_single_column(self): + # GH#4119, not converting a mixed type (e.g.floats and object) + # mixed in a single column + df = DataFrame({"s": Series([1, "na", 3, 4])}) + result = df._convert(datetime=True, numeric=True) + expected = DataFrame({"s": Series([1, np.nan, 3, 4])}) + tm.assert_frame_equal(result, expected) + + def test_convert_objects_no_conversion(self): + mixed1 = DataFrame({"a": [1, 2, 3], "b": [4.0, 5, 6], "c": ["x", "y", "z"]}) + mixed2 = mixed1._convert(datetime=True) + tm.assert_frame_equal(mixed1, mixed2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_convert_dtypes.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_convert_dtypes.py new file mode 100644 index 0000000..ec639ed --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_convert_dtypes.py @@ -0,0 +1,43 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestConvertDtypes: + @pytest.mark.parametrize( + "convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")] + ) + def test_convert_dtypes(self, convert_integer, expected, string_storage): + # Specific types are tested in tests/series/test_dtypes.py + # Just check that it works for DataFrame here + df = pd.DataFrame( + { + "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), + "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")), + } + ) + with pd.option_context("string_storage", string_storage): + result = df.convert_dtypes(True, True, convert_integer, False) + expected = pd.DataFrame( + { + "a": pd.Series([1, 2, 3], dtype=expected), + "b": pd.Series(["x", "y", "z"], dtype=f"string[{string_storage}]"), + } + ) + tm.assert_frame_equal(result, expected) + + def test_convert_empty(self): + # Empty DataFrame can pass convert_dtypes, see GH#40393 + empty_df = pd.DataFrame() + tm.assert_frame_equal(empty_df, empty_df.convert_dtypes()) + + def test_convert_dtypes_retain_column_names(self): + # GH#41435 + df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + df.columns.name = "cols" + + result = df.convert_dtypes() + tm.assert_index_equal(result.columns, df.columns) + assert result.columns.name == "cols" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_copy.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_copy.py new file mode 100644 index 0000000..1c0b075 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_copy.py @@ -0,0 +1,63 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame +import pandas._testing as tm + + +class TestCopy: + @pytest.mark.parametrize("attr", ["index", "columns"]) + def test_copy_index_name_checking(self, float_frame, attr): + # don't want to be able to modify the index stored elsewhere after + # making a copy + ind = getattr(float_frame, attr) + ind.name = None + cp = float_frame.copy() + getattr(cp, attr).name = "foo" + assert getattr(float_frame, attr).name is None + + def test_copy_cache(self): + # GH#31784 _item_cache not cleared on copy causes incorrect reads after updates + df = DataFrame({"a": [1]}) + + df["x"] = [0] + df["a"] + + df.copy() + + df["a"].values[0] = -1 + + tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0]})) + + df["y"] = [0] + + assert df["a"].values[0] == -1 + tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0], "y": [0]})) + + def test_copy(self, float_frame, float_string_frame): + cop = float_frame.copy() + cop["E"] = cop["A"] + assert "E" not in float_frame + + # copy objects + copy = float_string_frame.copy() + assert copy._mgr is not float_string_frame._mgr + + @td.skip_array_manager_invalid_test + def test_copy_consolidates(self): + # GH#42477 + df = DataFrame( + { + "a": np.random.randint(0, 100, size=55), + "b": np.random.randint(0, 100, size=55), + } + ) + + for i in range(0, 10): + df.loc[:, f"n_{i}"] = np.random.randint(0, 100, size=55) + + assert len(df._mgr.blocks) == 11 + result = df.copy() + assert len(result._mgr.blocks) == 1 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_count.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_count.py new file mode 100644 index 0000000..43eb96f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_count.py @@ -0,0 +1,39 @@ +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestDataFrameCount: + def test_count(self): + # corner case + frame = DataFrame() + ct1 = frame.count(1) + assert isinstance(ct1, Series) + + ct2 = frame.count(0) + assert isinstance(ct2, Series) + + # GH#423 + df = DataFrame(index=range(10)) + result = df.count(1) + expected = Series(0, index=df.index) + tm.assert_series_equal(result, expected) + + df = DataFrame(columns=range(10)) + result = df.count(0) + expected = Series(0, index=df.columns) + tm.assert_series_equal(result, expected) + + df = DataFrame() + result = df.count() + expected = Series(0, index=[]) + tm.assert_series_equal(result, expected) + + def test_count_objects(self, float_string_frame): + dm = DataFrame(float_string_frame._series) + df = DataFrame(float_string_frame._series) + + tm.assert_series_equal(dm.count(), df.count()) + tm.assert_series_equal(dm.count(1), df.count(1)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_count_with_level_deprecated.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_count_with_level_deprecated.py new file mode 100644 index 0000000..f6fbc28 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_count_with_level_deprecated.py @@ -0,0 +1,123 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm + + +class TestDataFrameCount: + def test_count_multiindex(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + frame = frame.copy() + frame.index.names = ["a", "b"] + + with tm.assert_produces_warning(FutureWarning): + result = frame.count(level="b") + with tm.assert_produces_warning(FutureWarning): + expected = frame.count(level=1) + tm.assert_frame_equal(result, expected, check_names=False) + + with tm.assert_produces_warning(FutureWarning): + result = frame.count(level="a") + with tm.assert_produces_warning(FutureWarning): + expected = frame.count(level=0) + tm.assert_frame_equal(result, expected, check_names=False) + + msg = "Level x not found" + with pytest.raises(KeyError, match=msg): + with tm.assert_produces_warning(FutureWarning): + frame.count(level="x") + + def test_count_level_corner(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + ser = frame["A"][:0] + with tm.assert_produces_warning(FutureWarning): + result = ser.count(level=0) + expected = Series(0, index=ser.index.levels[0], name="A") + tm.assert_series_equal(result, expected) + + df = frame[:0] + with tm.assert_produces_warning(FutureWarning): + result = df.count(level=0) + expected = ( + DataFrame( + index=ser.index.levels[0].set_names(["first"]), columns=df.columns + ) + .fillna(0) + .astype(np.int64) + ) + tm.assert_frame_equal(result, expected) + + def test_count_index_with_nan(self): + # https://github.com/pandas-dev/pandas/issues/21824 + df = DataFrame( + { + "Person": ["John", "Myla", None, "John", "Myla"], + "Age": [24.0, 5, 21.0, 33, 26], + "Single": [False, True, True, True, False], + } + ) + + # count on row labels + with tm.assert_produces_warning(FutureWarning): + res = df.set_index(["Person", "Single"]).count(level="Person") + expected = DataFrame( + index=Index(["John", "Myla"], name="Person"), + columns=Index(["Age"]), + data=[2, 2], + ) + tm.assert_frame_equal(res, expected) + + # count on column labels + with tm.assert_produces_warning(FutureWarning): + res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1) + expected = DataFrame( + columns=Index(["John", "Myla"], name="Person"), + index=Index(["Age"]), + data=[[2, 2]], + ) + tm.assert_frame_equal(res, expected) + + def test_count_level( + self, + multiindex_year_month_day_dataframe_random_data, + multiindex_dataframe_random_data, + ): + ymd = multiindex_year_month_day_dataframe_random_data + frame = multiindex_dataframe_random_data + + def _check_counts(frame, axis=0): + index = frame._get_axis(axis) + for i in range(index.nlevels): + with tm.assert_produces_warning(FutureWarning): + result = frame.count(axis=axis, level=i) + expected = frame.groupby(axis=axis, level=i).count() + expected = expected.reindex_like(result).astype("i8") + tm.assert_frame_equal(result, expected) + + frame.iloc[1, [1, 2]] = np.nan + frame.iloc[7, [0, 1]] = np.nan + ymd.iloc[1, [1, 2]] = np.nan + ymd.iloc[7, [0, 1]] = np.nan + + _check_counts(frame) + _check_counts(ymd) + _check_counts(frame.T, axis=1) + _check_counts(ymd.T, axis=1) + + # can't call with level on regular DataFrame + df = tm.makeTimeDataFrame() + with pytest.raises(TypeError, match="hierarchical"): + with tm.assert_produces_warning(FutureWarning): + df.count(level=0) + + frame["D"] = "foo" + with tm.assert_produces_warning(FutureWarning): + result = frame.count(level=0, numeric_only=True) + tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp")) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_cov_corr.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_cov_corr.py new file mode 100644 index 0000000..6a1466a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_cov_corr.py @@ -0,0 +1,361 @@ +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Series, + isna, +) +import pandas._testing as tm + + +class TestDataFrameCov: + def test_cov(self, float_frame, float_string_frame): + # min_periods no NAs (corner case) + expected = float_frame.cov() + result = float_frame.cov(min_periods=len(float_frame)) + + tm.assert_frame_equal(expected, result) + + result = float_frame.cov(min_periods=len(float_frame) + 1) + assert isna(result.values).all() + + # with NAs + frame = float_frame.copy() + frame["A"][:5] = np.nan + frame["B"][5:10] = np.nan + result = frame.cov(min_periods=len(frame) - 8) + expected = frame.cov() + expected.loc["A", "B"] = np.nan + expected.loc["B", "A"] = np.nan + tm.assert_frame_equal(result, expected) + + # regular + result = frame.cov() + expected = frame["A"].cov(frame["C"]) + tm.assert_almost_equal(result["A"]["C"], expected) + + # exclude non-numeric types + result = float_string_frame.cov() + expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].cov() + tm.assert_frame_equal(result, expected) + + # Single column frame + df = DataFrame(np.linspace(0.0, 1.0, 10)) + result = df.cov() + expected = DataFrame( + np.cov(df.values.T).reshape((1, 1)), index=df.columns, columns=df.columns + ) + tm.assert_frame_equal(result, expected) + df.loc[0] = np.nan + result = df.cov() + expected = DataFrame( + np.cov(df.values[1:].T).reshape((1, 1)), + index=df.columns, + columns=df.columns, + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3]) + def test_cov_ddof(self, test_ddof): + # GH#34611 + np_array1 = np.random.rand(10) + np_array2 = np.random.rand(10) + df = DataFrame({0: np_array1, 1: np_array2}) + result = df.cov(ddof=test_ddof) + expected_np = np.cov(np_array1, np_array2, ddof=test_ddof) + expected = DataFrame(expected_np) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "other_column", [pd.array([1, 2, 3]), np.array([1.0, 2.0, 3.0])] + ) + def test_cov_nullable_integer(self, other_column): + # https://github.com/pandas-dev/pandas/issues/33803 + data = DataFrame({"a": pd.array([1, 2, None]), "b": other_column}) + result = data.cov() + arr = np.array([[0.5, 0.5], [0.5, 1.0]]) + expected = DataFrame(arr, columns=["a", "b"], index=["a", "b"]) + tm.assert_frame_equal(result, expected) + + +class TestDataFrameCorr: + # DataFrame.corr(), as opposed to DataFrame.corrwith + + @pytest.mark.parametrize("method", ["pearson", "kendall", "spearman"]) + @td.skip_if_no_scipy + def test_corr_scipy_method(self, float_frame, method): + float_frame["A"][:5] = np.nan + float_frame["B"][5:10] = np.nan + float_frame["A"][:10] = float_frame["A"][10:20] + + correls = float_frame.corr(method=method) + expected = float_frame["A"].corr(float_frame["C"], method=method) + tm.assert_almost_equal(correls["A"]["C"], expected) + + # --------------------------------------------------------------------- + + def test_corr_non_numeric(self, float_string_frame): + # exclude non-numeric types + result = float_string_frame.corr() + expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].corr() + tm.assert_frame_equal(result, expected) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("meth", ["pearson", "kendall", "spearman"]) + def test_corr_nooverlap(self, meth): + # nothing in common + df = DataFrame( + { + "A": [1, 1.5, 1, np.nan, np.nan, np.nan], + "B": [np.nan, np.nan, np.nan, 1, 1.5, 1], + "C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + } + ) + rs = df.corr(meth) + assert isna(rs.loc["A", "B"]) + assert isna(rs.loc["B", "A"]) + assert rs.loc["A", "A"] == 1 + assert rs.loc["B", "B"] == 1 + assert isna(rs.loc["C", "C"]) + + @pytest.mark.parametrize("meth", ["pearson", "spearman"]) + def test_corr_constant(self, meth): + # constant --> all NA + df = DataFrame( + { + "A": [1, 1, 1, np.nan, np.nan, np.nan], + "B": [np.nan, np.nan, np.nan, 1, 1, 1], + } + ) + rs = df.corr(meth) + assert isna(rs.values).all() + + @td.skip_if_no_scipy + @pytest.mark.parametrize("meth", ["pearson", "kendall", "spearman"]) + def test_corr_int_and_boolean(self, meth): + # when dtypes of pandas series are different + # then ndarray will have dtype=object, + # so it need to be properly handled + df = DataFrame({"a": [True, False], "b": [1, 0]}) + + expected = DataFrame(np.ones((2, 2)), index=["a", "b"], columns=["a", "b"]) + + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + result = df.corr(meth) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("method", ["cov", "corr"]) + def test_corr_cov_independent_index_column(self, method): + # GH#14617 + df = DataFrame(np.random.randn(4 * 10).reshape(10, 4), columns=list("abcd")) + result = getattr(df, method)() + assert result.index is not result.columns + assert result.index.equals(result.columns) + + def test_corr_invalid_method(self): + # GH#22298 + df = DataFrame(np.random.normal(size=(10, 2))) + msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " + with pytest.raises(ValueError, match=msg): + df.corr(method="____") + + def test_corr_int(self): + # dtypes other than float64 GH#1761 + df = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]}) + + df.cov() + df.corr() + + @td.skip_if_no_scipy + @pytest.mark.parametrize( + "nullable_column", [pd.array([1, 2, 3]), pd.array([1, 2, None])] + ) + @pytest.mark.parametrize( + "other_column", + [pd.array([1, 2, 3]), np.array([1.0, 2.0, 3.0]), np.array([1.0, 2.0, np.nan])], + ) + @pytest.mark.parametrize("method", ["pearson", "spearman", "kendall"]) + def test_corr_nullable_integer(self, nullable_column, other_column, method): + # https://github.com/pandas-dev/pandas/issues/33803 + data = DataFrame({"a": nullable_column, "b": other_column}) + result = data.corr(method=method) + expected = DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"]) + tm.assert_frame_equal(result, expected) + + def test_corr_item_cache(self): + # Check that corr does not lead to incorrect entries in item_cache + + df = DataFrame({"A": range(10)}) + df["B"] = range(10)[::-1] + + ser = df["A"] # populate item_cache + assert len(df._mgr.arrays) == 2 # i.e. 2 blocks + + _ = df.corr() + + # Check that the corr didn't break link between ser and df + ser.values[0] = 99 + assert df.loc[0, "A"] == 99 + assert df["A"] is ser + assert df.values[0, 0] == 99 + + @pytest.mark.parametrize("length", [2, 20, 200, 2000]) + def test_corr_for_constant_columns(self, length): + # GH: 37448 + df = DataFrame(length * [[0.4, 0.1]], columns=["A", "B"]) + result = df.corr() + expected = DataFrame( + {"A": [np.nan, np.nan], "B": [np.nan, np.nan]}, index=["A", "B"] + ) + tm.assert_frame_equal(result, expected) + + def test_calc_corr_small_numbers(self): + # GH: 37452 + df = DataFrame( + {"A": [1.0e-20, 2.0e-20, 3.0e-20], "B": [1.0e-20, 2.0e-20, 3.0e-20]} + ) + result = df.corr() + expected = DataFrame({"A": [1.0, 1.0], "B": [1.0, 1.0]}, index=["A", "B"]) + tm.assert_frame_equal(result, expected) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("method", ["pearson", "spearman", "kendall"]) + def test_corr_min_periods_greater_than_length(self, method): + df = DataFrame({"A": [1, 2], "B": [1, 2]}) + result = df.corr(method=method, min_periods=3) + expected = DataFrame( + {"A": [np.nan, np.nan], "B": [np.nan, np.nan]}, index=["A", "B"] + ) + tm.assert_frame_equal(result, expected) + + +class TestDataFrameCorrWith: + def test_corrwith(self, datetime_frame): + a = datetime_frame + noise = Series(np.random.randn(len(a)), index=a.index) + + b = datetime_frame.add(noise, axis=0) + + # make sure order does not matter + b = b.reindex(columns=b.columns[::-1], index=b.index[::-1][10:]) + del b["B"] + + colcorr = a.corrwith(b, axis=0) + tm.assert_almost_equal(colcorr["A"], a["A"].corr(b["A"])) + + rowcorr = a.corrwith(b, axis=1) + tm.assert_series_equal(rowcorr, a.T.corrwith(b.T, axis=0)) + + dropped = a.corrwith(b, axis=0, drop=True) + tm.assert_almost_equal(dropped["A"], a["A"].corr(b["A"])) + assert "B" not in dropped + + dropped = a.corrwith(b, axis=1, drop=True) + assert a.index[-1] not in dropped.index + + # non time-series data + index = ["a", "b", "c", "d", "e"] + columns = ["one", "two", "three", "four"] + df1 = DataFrame(np.random.randn(5, 4), index=index, columns=columns) + df2 = DataFrame(np.random.randn(4, 4), index=index[:4], columns=columns) + correls = df1.corrwith(df2, axis=1) + for row in index[:4]: + tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row])) + + def test_corrwith_with_objects(self): + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame() + cols = ["A", "B", "C", "D"] + + df1["obj"] = "foo" + df2["obj"] = "bar" + + result = df1.corrwith(df2) + expected = df1.loc[:, cols].corrwith(df2.loc[:, cols]) + tm.assert_series_equal(result, expected) + + result = df1.corrwith(df2, axis=1) + expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1) + tm.assert_series_equal(result, expected) + + def test_corrwith_series(self, datetime_frame): + result = datetime_frame.corrwith(datetime_frame["A"]) + expected = datetime_frame.apply(datetime_frame["A"].corr) + + tm.assert_series_equal(result, expected) + + def test_corrwith_matches_corrcoef(self): + df1 = DataFrame(np.arange(10000), columns=["a"]) + df2 = DataFrame(np.arange(10000) ** 2, columns=["a"]) + c1 = df1.corrwith(df2)["a"] + c2 = np.corrcoef(df1["a"], df2["a"])[0][1] + + tm.assert_almost_equal(c1, c2) + assert c1 < 1 + + def test_corrwith_mixed_dtypes(self): + # GH#18570 + df = DataFrame( + {"a": [1, 4, 3, 2], "b": [4, 6, 7, 3], "c": ["a", "b", "c", "d"]} + ) + s = Series([0, 6, 7, 3]) + result = df.corrwith(s) + corrs = [df["a"].corr(s), df["b"].corr(s)] + expected = Series(data=corrs, index=["a", "b"]) + tm.assert_series_equal(result, expected) + + def test_corrwith_index_intersection(self): + df1 = DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) + df2 = DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) + + result = df1.corrwith(df2, drop=True).index.sort_values() + expected = df1.columns.intersection(df2.columns).sort_values() + tm.assert_index_equal(result, expected) + + def test_corrwith_index_union(self): + df1 = DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) + df2 = DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) + + result = df1.corrwith(df2, drop=False).index.sort_values() + expected = df1.columns.union(df2.columns).sort_values() + tm.assert_index_equal(result, expected) + + def test_corrwith_dup_cols(self): + # GH#21925 + df1 = DataFrame(np.vstack([np.arange(10)] * 3).T) + df2 = df1.copy() + df2 = pd.concat((df2, df2[0]), axis=1) + + result = df1.corrwith(df2) + expected = Series(np.ones(4), index=[0, 0, 1, 2]) + tm.assert_series_equal(result, expected) + + def test_corr_numerical_instabilities(self): + # GH#45640 + df = DataFrame([[0.2, 0.4], [0.4, 0.2]]) + result = df.corr() + expected = DataFrame({0: [1.0, -1.0], 1: [-1.0, 1.0]}) + tm.assert_frame_equal(result - 1, expected - 1, atol=1e-17) + + @td.skip_if_no_scipy + def test_corrwith_spearman(self): + # GH#21925 + df = DataFrame(np.random.random(size=(100, 3))) + result = df.corrwith(df ** 2, method="spearman") + expected = Series(np.ones(len(result))) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_corrwith_kendall(self): + # GH#21925 + df = DataFrame(np.random.random(size=(100, 3))) + result = df.corrwith(df ** 2, method="kendall") + expected = Series(np.ones(len(result))) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_describe.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_describe.py new file mode 100644 index 0000000..3a1228e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_describe.py @@ -0,0 +1,399 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameDescribe: + def test_describe_bool_in_mixed_frame(self): + df = DataFrame( + { + "string_data": ["a", "b", "c", "d", "e"], + "bool_data": [True, True, False, False, False], + "int_data": [10, 20, 30, 40, 50], + } + ) + + # Integer data are included in .describe() output, + # Boolean and string data are not. + result = df.describe() + expected = DataFrame( + {"int_data": [5, 30, df.int_data.std(), 10, 20, 30, 40, 50]}, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_frame_equal(result, expected) + + # Top value is a boolean value that is False + result = df.describe(include=["bool"]) + + expected = DataFrame( + {"bool_data": [5, 2, False, 3]}, index=["count", "unique", "top", "freq"] + ) + tm.assert_frame_equal(result, expected) + + def test_describe_empty_object(self): + # GH#27183 + df = DataFrame({"A": [None, None]}, dtype=object) + result = df.describe() + expected = DataFrame( + {"A": [0, 0, np.nan, np.nan]}, + dtype=object, + index=["count", "unique", "top", "freq"], + ) + tm.assert_frame_equal(result, expected) + + result = df.iloc[:0].describe() + tm.assert_frame_equal(result, expected) + + def test_describe_bool_frame(self): + # GH#13891 + df = DataFrame( + { + "bool_data_1": [False, False, True, True], + "bool_data_2": [False, True, True, True], + } + ) + result = df.describe() + expected = DataFrame( + {"bool_data_1": [4, 2, False, 2], "bool_data_2": [4, 2, True, 3]}, + index=["count", "unique", "top", "freq"], + ) + tm.assert_frame_equal(result, expected) + + df = DataFrame( + { + "bool_data": [False, False, True, True, False], + "int_data": [0, 1, 2, 3, 4], + } + ) + result = df.describe() + expected = DataFrame( + {"int_data": [5, 2, df.int_data.std(), 0, 1, 2, 3, 4]}, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_frame_equal(result, expected) + + df = DataFrame( + {"bool_data": [False, False, True, True], "str_data": ["a", "b", "c", "a"]} + ) + result = df.describe() + expected = DataFrame( + {"bool_data": [4, 2, False, 2], "str_data": [4, 3, "a", 2]}, + index=["count", "unique", "top", "freq"], + ) + tm.assert_frame_equal(result, expected) + + def test_describe_categorical(self): + df = DataFrame({"value": np.random.randint(0, 10000, 100)}) + labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) + + df = df.sort_values(by=["value"], ascending=True) + df["value_group"] = pd.cut( + df.value, range(0, 10500, 500), right=False, labels=cat_labels + ) + cat = df + + # Categoricals should not show up together with numerical columns + result = cat.describe() + assert len(result.columns) == 1 + + # In a frame, describe() for the cat should be the same as for string + # arrays (count, unique, top, freq) + + cat = Categorical( + ["a", "b", "b", "b"], categories=["a", "b", "c"], ordered=True + ) + s = Series(cat) + result = s.describe() + expected = Series([4, 2, "b", 3], index=["count", "unique", "top", "freq"]) + tm.assert_series_equal(result, expected) + + cat = Series(Categorical(["a", "b", "c", "c"])) + df3 = DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]}) + result = df3.describe() + tm.assert_numpy_array_equal(result["cat"].values, result["s"].values) + + def test_describe_empty_categorical_column(self): + # GH#26397 + # Ensure the index of an empty categorical DataFrame column + # also contains (count, unique, top, freq) + df = DataFrame({"empty_col": Categorical([])}) + result = df.describe() + expected = DataFrame( + {"empty_col": [0, 0, np.nan, np.nan]}, + index=["count", "unique", "top", "freq"], + dtype="object", + ) + tm.assert_frame_equal(result, expected) + # ensure NaN, not None + assert np.isnan(result.iloc[2, 0]) + assert np.isnan(result.iloc[3, 0]) + + def test_describe_categorical_columns(self): + # GH#11558 + columns = pd.CategoricalIndex(["int1", "int2", "obj"], ordered=True, name="XXX") + df = DataFrame( + { + "int1": [10, 20, 30, 40, 50], + "int2": [10, 20, 30, 40, 50], + "obj": ["A", 0, None, "X", 1], + }, + columns=columns, + ) + result = df.describe() + + exp_columns = pd.CategoricalIndex( + ["int1", "int2"], + categories=["int1", "int2", "obj"], + ordered=True, + name="XXX", + ) + expected = DataFrame( + { + "int1": [5, 30, df.int1.std(), 10, 20, 30, 40, 50], + "int2": [5, 30, df.int2.std(), 10, 20, 30, 40, 50], + }, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + columns=exp_columns, + ) + + tm.assert_frame_equal(result, expected) + tm.assert_categorical_equal(result.columns.values, expected.columns.values) + + def test_describe_datetime_columns(self): + columns = pd.DatetimeIndex( + ["2011-01-01", "2011-02-01", "2011-03-01"], + freq="MS", + tz="US/Eastern", + name="XXX", + ) + df = DataFrame( + { + 0: [10, 20, 30, 40, 50], + 1: [10, 20, 30, 40, 50], + 2: ["A", 0, None, "X", 1], + } + ) + df.columns = columns + result = df.describe() + + exp_columns = pd.DatetimeIndex( + ["2011-01-01", "2011-02-01"], freq="MS", tz="US/Eastern", name="XXX" + ) + expected = DataFrame( + { + 0: [5, 30, df.iloc[:, 0].std(), 10, 20, 30, 40, 50], + 1: [5, 30, df.iloc[:, 1].std(), 10, 20, 30, 40, 50], + }, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + expected.columns = exp_columns + tm.assert_frame_equal(result, expected) + assert result.columns.freq == "MS" + assert result.columns.tz == expected.columns.tz + + def test_describe_timedelta_values(self): + # GH#6145 + t1 = pd.timedelta_range("1 days", freq="D", periods=5) + t2 = pd.timedelta_range("1 hours", freq="H", periods=5) + df = DataFrame({"t1": t1, "t2": t2}) + + expected = DataFrame( + { + "t1": [ + 5, + pd.Timedelta("3 days"), + df.iloc[:, 0].std(), + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + pd.Timedelta("4 days"), + pd.Timedelta("5 days"), + ], + "t2": [ + 5, + pd.Timedelta("3 hours"), + df.iloc[:, 1].std(), + pd.Timedelta("1 hours"), + pd.Timedelta("2 hours"), + pd.Timedelta("3 hours"), + pd.Timedelta("4 hours"), + pd.Timedelta("5 hours"), + ], + }, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + + result = df.describe() + tm.assert_frame_equal(result, expected) + + exp_repr = ( + " t1 t2\n" + "count 5 5\n" + "mean 3 days 00:00:00 0 days 03:00:00\n" + "std 1 days 13:56:50.394919273 0 days 01:34:52.099788303\n" + "min 1 days 00:00:00 0 days 01:00:00\n" + "25% 2 days 00:00:00 0 days 02:00:00\n" + "50% 3 days 00:00:00 0 days 03:00:00\n" + "75% 4 days 00:00:00 0 days 04:00:00\n" + "max 5 days 00:00:00 0 days 05:00:00" + ) + assert repr(result) == exp_repr + + def test_describe_tz_values(self, tz_naive_fixture): + # GH#21332 + tz = tz_naive_fixture + s1 = Series(range(5)) + start = Timestamp(2018, 1, 1) + end = Timestamp(2018, 1, 5) + s2 = Series(date_range(start, end, tz=tz)) + df = DataFrame({"s1": s1, "s2": s2}) + + expected = DataFrame( + { + "s1": [5, 2, 0, 1, 2, 3, 4, 1.581139], + "s2": [ + 5, + Timestamp(2018, 1, 3).tz_localize(tz), + start.tz_localize(tz), + s2[1], + s2[2], + s2[3], + end.tz_localize(tz), + np.nan, + ], + }, + index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"], + ) + result = df.describe(include="all", datetime_is_numeric=True) + tm.assert_frame_equal(result, expected) + + def test_datetime_is_numeric_includes_datetime(self): + df = DataFrame({"a": date_range("2012", periods=3), "b": [1, 2, 3]}) + result = df.describe(datetime_is_numeric=True) + expected = DataFrame( + { + "a": [ + 3, + Timestamp("2012-01-02"), + Timestamp("2012-01-01"), + Timestamp("2012-01-01T12:00:00"), + Timestamp("2012-01-02"), + Timestamp("2012-01-02T12:00:00"), + Timestamp("2012-01-03"), + np.nan, + ], + "b": [3, 2, 1, 1.5, 2, 2.5, 3, 1], + }, + index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"], + ) + tm.assert_frame_equal(result, expected) + + def test_describe_tz_values2(self): + tz = "CET" + s1 = Series(range(5)) + start = Timestamp(2018, 1, 1) + end = Timestamp(2018, 1, 5) + s2 = Series(date_range(start, end, tz=tz)) + df = DataFrame({"s1": s1, "s2": s2}) + + s1_ = s1.describe() + s2_ = Series( + [ + 5, + 5, + s2.value_counts().index[0], + 1, + start.tz_localize(tz), + end.tz_localize(tz), + ], + index=["count", "unique", "top", "freq", "first", "last"], + ) + idx = [ + "count", + "unique", + "top", + "freq", + "first", + "last", + "mean", + "std", + "min", + "25%", + "50%", + "75%", + "max", + ] + expected = pd.concat([s1_, s2_], axis=1, keys=["s1", "s2"]).loc[idx] + + with tm.assert_produces_warning(FutureWarning): + result = df.describe(include="all") + tm.assert_frame_equal(result, expected) + + def test_describe_percentiles_integer_idx(self): + # GH#26660 + df = DataFrame({"x": [1]}) + pct = np.linspace(0, 1, 10 + 1) + result = df.describe(percentiles=pct) + + expected = DataFrame( + {"x": [1.0, 1.0, np.NaN, 1.0, *(1.0 for _ in pct), 1.0]}, + index=[ + "count", + "mean", + "std", + "min", + "0%", + "10%", + "20%", + "30%", + "40%", + "50%", + "60%", + "70%", + "80%", + "90%", + "100%", + "max", + ], + ) + tm.assert_frame_equal(result, expected) + + def test_describe_does_not_raise_error_for_dictlike_elements(self): + # GH#32409 + df = DataFrame([{"test": {"a": "1"}}, {"test": {"a": "2"}}]) + expected = DataFrame( + {"test": [2, 2, {"a": "1"}, 1]}, index=["count", "unique", "top", "freq"] + ) + result = df.describe() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("exclude", ["x", "y", ["x", "y"], ["x", "z"]]) + def test_describe_when_include_all_exclude_not_allowed(self, exclude): + """ + When include is 'all', then setting exclude != None is not allowed. + """ + df = DataFrame({"x": [1], "y": [2], "z": [3]}) + msg = "exclude must be None when include is 'all'" + with pytest.raises(ValueError, match=msg): + df.describe(include="all", exclude=exclude) + + def test_describe_with_duplicate_columns(self): + df = DataFrame( + [[1, 1, 1], [2, 2, 2], [3, 3, 3]], + columns=["bar", "a", "a"], + dtype="float64", + ) + result = df.describe() + ser = df.iloc[:, 0].describe() + expected = pd.concat([ser, ser, ser], keys=df.columns, axis=1) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_diff.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_diff.py new file mode 100644 index 0000000..f615296 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_diff.py @@ -0,0 +1,303 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameDiff: + def test_diff_requires_integer(self): + df = DataFrame(np.random.randn(2, 2)) + with pytest.raises(ValueError, match="periods must be an integer"): + df.diff(1.5) + + # GH#44572 np.int64 is accepted + @pytest.mark.parametrize("num", [1, np.int64(1)]) + def test_diff(self, datetime_frame, num): + df = datetime_frame + the_diff = df.diff(num) + + expected = df["A"] - df["A"].shift(num) + tm.assert_series_equal(the_diff["A"], expected) + + def test_diff_int_dtype(self): + # int dtype + a = 10_000_000_000_000_000 + b = a + 1 + ser = Series([a, b]) + + rs = DataFrame({"s": ser}).diff() + assert rs.s[1] == 1 + + def test_diff_mixed_numeric(self, datetime_frame): + # mixed numeric + tf = datetime_frame.astype("float32") + the_diff = tf.diff(1) + tm.assert_series_equal(the_diff["A"], tf["A"] - tf["A"].shift(1)) + + def test_diff_axis1_nonconsolidated(self): + # GH#10907 + df = DataFrame({"y": Series([2]), "z": Series([3])}) + df.insert(0, "x", 1) + result = df.diff(axis=1) + expected = DataFrame({"x": np.nan, "y": Series(1), "z": Series(1)}) + tm.assert_frame_equal(result, expected) + + def test_diff_timedelta64_with_nat(self): + # GH#32441 + arr = np.arange(6).reshape(3, 2).astype("timedelta64[ns]") + arr[:, 0] = np.timedelta64("NaT", "ns") + + df = DataFrame(arr) + result = df.diff(1, axis=0) + + expected = DataFrame({0: df[0], 1: [pd.NaT, pd.Timedelta(2), pd.Timedelta(2)]}) + tm.assert_equal(result, expected) + + result = df.diff(0) + expected = df - df + assert expected[0].isna().all() + tm.assert_equal(result, expected) + + result = df.diff(-1, axis=1) + expected = df * np.nan + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_diff_datetime_axis0_with_nat(self, tz): + # GH#32441 + dti = pd.DatetimeIndex(["NaT", "2019-01-01", "2019-01-02"], tz=tz) + ser = Series(dti) + + df = ser.to_frame() + + result = df.diff() + ex_index = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta(days=1)]) + expected = Series(ex_index).to_frame() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_diff_datetime_with_nat_zero_periods(self, tz): + # diff on NaT values should give NaT, not timedelta64(0) + dti = date_range("2016-01-01", periods=4, tz=tz) + ser = Series(dti) + df = ser.to_frame() + + df[1] = ser.copy() + df.iloc[:, 0] = pd.NaT + + expected = df - df + assert expected[0].isna().all() + + result = df.diff(0, axis=0) + tm.assert_frame_equal(result, expected) + + result = df.diff(0, axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_diff_datetime_axis0(self, tz): + # GH#18578 + df = DataFrame( + { + 0: date_range("2010", freq="D", periods=2, tz=tz), + 1: date_range("2010", freq="D", periods=2, tz=tz), + } + ) + + result = df.diff(axis=0) + expected = DataFrame( + { + 0: pd.TimedeltaIndex(["NaT", "1 days"]), + 1: pd.TimedeltaIndex(["NaT", "1 days"]), + } + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_diff_datetime_axis1(self, tz): + # GH#18578 + df = DataFrame( + { + 0: date_range("2010", freq="D", periods=2, tz=tz), + 1: date_range("2010", freq="D", periods=2, tz=tz), + } + ) + + result = df.diff(axis=1) + expected = DataFrame( + { + 0: pd.TimedeltaIndex(["NaT", "NaT"]), + 1: pd.TimedeltaIndex(["0 days", "0 days"]), + } + ) + tm.assert_frame_equal(result, expected) + + def test_diff_timedelta(self): + # GH#4533 + df = DataFrame( + { + "time": [Timestamp("20130101 9:01"), Timestamp("20130101 9:02")], + "value": [1.0, 2.0], + } + ) + + res = df.diff() + exp = DataFrame( + [[pd.NaT, np.nan], [pd.Timedelta("00:01:00"), 1]], columns=["time", "value"] + ) + tm.assert_frame_equal(res, exp) + + def test_diff_mixed_dtype(self): + df = DataFrame(np.random.randn(5, 3)) + df["A"] = np.array([1, 2, 3, 4, 5], dtype=object) + + result = df.diff() + assert result[0].dtype == np.float64 + + def test_diff_neg_n(self, datetime_frame): + rs = datetime_frame.diff(-1) + xp = datetime_frame - datetime_frame.shift(-1) + tm.assert_frame_equal(rs, xp) + + def test_diff_float_n(self, datetime_frame): + rs = datetime_frame.diff(1.0) + xp = datetime_frame.diff(1) + tm.assert_frame_equal(rs, xp) + + def test_diff_axis(self): + # GH#9727 + df = DataFrame([[1.0, 2.0], [3.0, 4.0]]) + tm.assert_frame_equal( + df.diff(axis=1), DataFrame([[np.nan, 1.0], [np.nan, 1.0]]) + ) + tm.assert_frame_equal( + df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]]) + ) + + def test_diff_period(self): + # GH#32995 Don't pass an incorrect axis + pi = date_range("2016-01-01", periods=3).to_period("D") + df = DataFrame({"A": pi}) + + result = df.diff(1, axis=1) + + expected = (df - pd.NaT).astype(object) + tm.assert_frame_equal(result, expected) + + def test_diff_axis1_mixed_dtypes(self): + # GH#32995 operate column-wise when we have mixed dtypes and axis=1 + df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) + + expected = DataFrame({"A": [np.nan, np.nan, np.nan], "B": df["B"] / 2}) + + result = df.diff(axis=1) + tm.assert_frame_equal(result, expected) + + # GH#21437 mixed-float-dtypes + df = DataFrame( + {"a": np.arange(3, dtype="float32"), "b": np.arange(3, dtype="float64")} + ) + result = df.diff(axis=1) + expected = DataFrame({"a": df["a"] * np.nan, "b": df["b"] * 0}) + tm.assert_frame_equal(result, expected) + + def test_diff_axis1_mixed_dtypes_large_periods(self): + # GH#32995 operate column-wise when we have mixed dtypes and axis=1 + df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) + + expected = df * np.nan + + result = df.diff(axis=1, periods=3) + tm.assert_frame_equal(result, expected) + + def test_diff_axis1_mixed_dtypes_negative_periods(self): + # GH#32995 operate column-wise when we have mixed dtypes and axis=1 + df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) + + expected = DataFrame({"A": -1.0 * df["A"], "B": df["B"] * np.nan}) + + result = df.diff(axis=1, periods=-1) + tm.assert_frame_equal(result, expected) + + def test_diff_sparse(self): + # GH#28813 .diff() should work for sparse dataframes as well + sparse_df = DataFrame([[0, 1], [1, 0]], dtype="Sparse[int]") + + result = sparse_df.diff() + expected = DataFrame( + [[np.nan, np.nan], [1.0, -1.0]], dtype=pd.SparseDtype("float", 0.0) + ) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "axis,expected", + [ + ( + 0, + DataFrame( + { + "a": [np.nan, 0, 1, 0, np.nan, np.nan, np.nan, 0], + "b": [np.nan, 1, np.nan, np.nan, -2, 1, np.nan, np.nan], + "c": np.repeat(np.nan, 8), + "d": [np.nan, 3, 5, 7, 9, 11, 13, 15], + }, + dtype="Int64", + ), + ), + ( + 1, + DataFrame( + { + "a": np.repeat(np.nan, 8), + "b": [0, 1, np.nan, 1, np.nan, np.nan, np.nan, 0], + "c": np.repeat(np.nan, 8), + "d": np.repeat(np.nan, 8), + }, + dtype="Int64", + ), + ), + ], + ) + def test_diff_integer_na(self, axis, expected): + # GH#24171 IntegerNA Support for DataFrame.diff() + df = DataFrame( + { + "a": np.repeat([0, 1, np.nan, 2], 2), + "b": np.tile([0, 1, np.nan, 2], 2), + "c": np.repeat(np.nan, 8), + "d": np.arange(1, 9) ** 2, + }, + dtype="Int64", + ) + + # Test case for default behaviour of diff + result = df.diff(axis=axis) + tm.assert_frame_equal(result, expected) + + def test_diff_readonly(self): + # https://github.com/pandas-dev/pandas/issues/35559 + arr = np.random.randn(5, 2) + arr.flags.writeable = False + df = DataFrame(arr) + result = df.diff() + expected = DataFrame(np.array(df)).diff() + tm.assert_frame_equal(result, expected) + + def test_diff_all_int_dtype(self, any_int_numpy_dtype): + # GH 14773 + df = DataFrame(range(5)) + df = df.astype(any_int_numpy_dtype) + result = df.diff() + expected_dtype = ( + "float32" if any_int_numpy_dtype in ("int8", "int16") else "float64" + ) + expected = DataFrame([np.nan, 1.0, 1.0, 1.0, 1.0], dtype=expected_dtype) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_dot.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_dot.py new file mode 100644 index 0000000..555e5f0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_dot.py @@ -0,0 +1,131 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class DotSharedTests: + @pytest.fixture + def obj(self): + raise NotImplementedError + + @pytest.fixture + def other(self) -> DataFrame: + """ + other is a DataFrame that is indexed so that obj.dot(other) is valid + """ + raise NotImplementedError + + @pytest.fixture + def expected(self, obj, other) -> DataFrame: + """ + The expected result of obj.dot(other) + """ + raise NotImplementedError + + @classmethod + def reduced_dim_assert(cls, result, expected): + """ + Assertion about results with 1 fewer dimension that self.obj + """ + raise NotImplementedError + + def test_dot_equiv_values_dot(self, obj, other, expected): + # `expected` is constructed from obj.values.dot(other.values) + result = obj.dot(other) + tm.assert_equal(result, expected) + + def test_dot_2d_ndarray(self, obj, other, expected): + # Check ndarray argument; in this case we get matching values, + # but index/columns may not match + result = obj.dot(other.values) + assert np.all(result == expected.values) + + def test_dot_1d_ndarray(self, obj, expected): + # can pass correct-length array + row = obj.iloc[0] if obj.ndim == 2 else obj + + result = obj.dot(row.values) + expected = obj.dot(row) + self.reduced_dim_assert(result, expected) + + def test_dot_series(self, obj, other, expected): + # Check series argument + result = obj.dot(other["1"]) + self.reduced_dim_assert(result, expected["1"]) + + def test_dot_series_alignment(self, obj, other, expected): + result = obj.dot(other.iloc[::-1]["1"]) + self.reduced_dim_assert(result, expected["1"]) + + def test_dot_aligns(self, obj, other, expected): + # Check index alignment + other2 = other.iloc[::-1] + result = obj.dot(other2) + tm.assert_equal(result, expected) + + def test_dot_shape_mismatch(self, obj): + msg = "Dot product shape mismatch" + # exception raised is of type Exception + with pytest.raises(Exception, match=msg): + obj.dot(obj.values[:3]) + + def test_dot_misaligned(self, obj, other): + msg = "matrices are not aligned" + with pytest.raises(ValueError, match=msg): + obj.dot(other.T) + + +class TestSeriesDot(DotSharedTests): + @pytest.fixture + def obj(self): + return Series(np.random.randn(4), index=["p", "q", "r", "s"]) + + @pytest.fixture + def other(self): + return DataFrame( + np.random.randn(3, 4), index=["1", "2", "3"], columns=["p", "q", "r", "s"] + ).T + + @pytest.fixture + def expected(self, obj, other): + return Series(np.dot(obj.values, other.values), index=other.columns) + + @classmethod + def reduced_dim_assert(cls, result, expected): + """ + Assertion about results with 1 fewer dimension that self.obj + """ + tm.assert_almost_equal(result, expected) + + +class TestDataFrameDot(DotSharedTests): + @pytest.fixture + def obj(self): + return DataFrame( + np.random.randn(3, 4), index=["a", "b", "c"], columns=["p", "q", "r", "s"] + ) + + @pytest.fixture + def other(self): + return DataFrame( + np.random.randn(4, 2), index=["p", "q", "r", "s"], columns=["1", "2"] + ) + + @pytest.fixture + def expected(self, obj, other): + return DataFrame( + np.dot(obj.values, other.values), index=obj.index, columns=other.columns + ) + + @classmethod + def reduced_dim_assert(cls, result, expected): + """ + Assertion about results with 1 fewer dimension that self.obj + """ + tm.assert_series_equal(result, expected, check_names=False) + assert result.name is None diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_drop.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_drop.py new file mode 100644 index 0000000..4f8ea6e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_drop.py @@ -0,0 +1,539 @@ +import re + +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + Timestamp, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "msg,labels,level", + [ + (r"labels \[4\] not found in level", 4, "a"), + (r"labels \[7\] not found in level", 7, "b"), + ], +) +def test_drop_raise_exception_if_labels_not_in_level(msg, labels, level): + # GH 8594 + mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"]) + s = Series([10, 20, 30], index=mi) + df = DataFrame([10, 20, 30], index=mi) + + with pytest.raises(KeyError, match=msg): + s.drop(labels, level=level) + with pytest.raises(KeyError, match=msg): + df.drop(labels, level=level) + + +@pytest.mark.parametrize("labels,level", [(4, "a"), (7, "b")]) +def test_drop_errors_ignore(labels, level): + # GH 8594 + mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"]) + s = Series([10, 20, 30], index=mi) + df = DataFrame([10, 20, 30], index=mi) + + expected_s = s.drop(labels, level=level, errors="ignore") + tm.assert_series_equal(s, expected_s) + + expected_df = df.drop(labels, level=level, errors="ignore") + tm.assert_frame_equal(df, expected_df) + + +def test_drop_with_non_unique_datetime_index_and_invalid_keys(): + # GH 30399 + + # define dataframe with unique datetime index + df = DataFrame( + np.random.randn(5, 3), + columns=["a", "b", "c"], + index=pd.date_range("2012", freq="H", periods=5), + ) + # create dataframe with non-unique datetime index + df = df.iloc[[0, 2, 2, 3]].copy() + + with pytest.raises(KeyError, match="not found in axis"): + df.drop(["a", "b"]) # Dropping with labels not exist in the index + + +class TestDataFrameDrop: + def test_drop_names(self): + df = DataFrame( + [[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=["a", "b", "c"], + columns=["d", "e", "f"], + ) + df.index.name, df.columns.name = "first", "second" + df_dropped_b = df.drop("b") + df_dropped_e = df.drop("e", axis=1) + df_inplace_b, df_inplace_e = df.copy(), df.copy() + return_value = df_inplace_b.drop("b", inplace=True) + assert return_value is None + return_value = df_inplace_e.drop("e", axis=1, inplace=True) + assert return_value is None + for obj in (df_dropped_b, df_dropped_e, df_inplace_b, df_inplace_e): + assert obj.index.name == "first" + assert obj.columns.name == "second" + assert list(df.columns) == ["d", "e", "f"] + + msg = r"\['g'\] not found in axis" + with pytest.raises(KeyError, match=msg): + df.drop(["g"]) + with pytest.raises(KeyError, match=msg): + df.drop(["g"], axis=1) + + # errors = 'ignore' + dropped = df.drop(["g"], errors="ignore") + expected = Index(["a", "b", "c"], name="first") + tm.assert_index_equal(dropped.index, expected) + + dropped = df.drop(["b", "g"], errors="ignore") + expected = Index(["a", "c"], name="first") + tm.assert_index_equal(dropped.index, expected) + + dropped = df.drop(["g"], axis=1, errors="ignore") + expected = Index(["d", "e", "f"], name="second") + tm.assert_index_equal(dropped.columns, expected) + + dropped = df.drop(["d", "g"], axis=1, errors="ignore") + expected = Index(["e", "f"], name="second") + tm.assert_index_equal(dropped.columns, expected) + + # GH 16398 + dropped = df.drop([], errors="ignore") + expected = Index(["a", "b", "c"], name="first") + tm.assert_index_equal(dropped.index, expected) + + def test_drop(self): + simple = DataFrame({"A": [1, 2, 3, 4], "B": [0, 1, 2, 3]}) + tm.assert_frame_equal(simple.drop("A", axis=1), simple[["B"]]) + tm.assert_frame_equal(simple.drop(["A", "B"], axis="columns"), simple[[]]) + tm.assert_frame_equal(simple.drop([0, 1, 3], axis=0), simple.loc[[2], :]) + tm.assert_frame_equal(simple.drop([0, 3], axis="index"), simple.loc[[1, 2], :]) + + with pytest.raises(KeyError, match=r"\[5\] not found in axis"): + simple.drop(5) + with pytest.raises(KeyError, match=r"\['C'\] not found in axis"): + simple.drop("C", axis=1) + with pytest.raises(KeyError, match=r"\[5\] not found in axis"): + simple.drop([1, 5]) + with pytest.raises(KeyError, match=r"\['C'\] not found in axis"): + simple.drop(["A", "C"], axis=1) + + # GH 42881 + with pytest.raises(KeyError, match=r"\['C', 'D', 'F'\] not found in axis"): + simple.drop(["C", "D", "F"], axis=1) + + # errors = 'ignore' + tm.assert_frame_equal(simple.drop(5, errors="ignore"), simple) + tm.assert_frame_equal( + simple.drop([0, 5], errors="ignore"), simple.loc[[1, 2, 3], :] + ) + tm.assert_frame_equal(simple.drop("C", axis=1, errors="ignore"), simple) + tm.assert_frame_equal( + simple.drop(["A", "C"], axis=1, errors="ignore"), simple[["B"]] + ) + + # non-unique - wheee! + nu_df = DataFrame( + list(zip(range(3), range(-3, 1), list("abc"))), columns=["a", "a", "b"] + ) + tm.assert_frame_equal(nu_df.drop("a", axis=1), nu_df[["b"]]) + tm.assert_frame_equal(nu_df.drop("b", axis="columns"), nu_df["a"]) + tm.assert_frame_equal(nu_df.drop([]), nu_df) # GH 16398 + + nu_df = nu_df.set_index(Index(["X", "Y", "X"])) + nu_df.columns = list("abc") + tm.assert_frame_equal(nu_df.drop("X", axis="rows"), nu_df.loc[["Y"], :]) + tm.assert_frame_equal(nu_df.drop(["X", "Y"], axis=0), nu_df.loc[[], :]) + + # inplace cache issue + # GH#5628 + df = DataFrame(np.random.randn(10, 3), columns=list("abc")) + expected = df[~(df.b > 0)] + return_value = df.drop(labels=df[df.b > 0].index, inplace=True) + assert return_value is None + tm.assert_frame_equal(df, expected) + + def test_drop_multiindex_not_lexsorted(self): + # GH#11640 + + # define the lexsorted version + lexsorted_mi = MultiIndex.from_tuples( + [("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"] + ) + lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi) + assert lexsorted_df.columns._is_lexsorted() + + # define the non-lexsorted version + not_lexsorted_df = DataFrame( + columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]] + ) + not_lexsorted_df = not_lexsorted_df.pivot_table( + index="a", columns=["b", "c"], values="d" + ) + not_lexsorted_df = not_lexsorted_df.reset_index() + assert not not_lexsorted_df.columns._is_lexsorted() + + # compare the results + tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) + + expected = lexsorted_df.drop("a", axis=1) + with tm.assert_produces_warning(PerformanceWarning): + result = not_lexsorted_df.drop("a", axis=1) + + tm.assert_frame_equal(result, expected) + + def test_drop_api_equivalence(self): + # equivalence of the labels/axis and index/columns API's (GH#12392) + df = DataFrame( + [[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=["a", "b", "c"], + columns=["d", "e", "f"], + ) + + res1 = df.drop("a") + res2 = df.drop(index="a") + tm.assert_frame_equal(res1, res2) + + res1 = df.drop("d", axis=1) + res2 = df.drop(columns="d") + tm.assert_frame_equal(res1, res2) + + res1 = df.drop(labels="e", axis=1) + res2 = df.drop(columns="e") + tm.assert_frame_equal(res1, res2) + + res1 = df.drop(["a"], axis=0) + res2 = df.drop(index=["a"]) + tm.assert_frame_equal(res1, res2) + + res1 = df.drop(["a"], axis=0).drop(["d"], axis=1) + res2 = df.drop(index=["a"], columns=["d"]) + tm.assert_frame_equal(res1, res2) + + msg = "Cannot specify both 'labels' and 'index'/'columns'" + with pytest.raises(ValueError, match=msg): + df.drop(labels="a", index="b") + + with pytest.raises(ValueError, match=msg): + df.drop(labels="a", columns="b") + + msg = "Need to specify at least one of 'labels', 'index' or 'columns'" + with pytest.raises(ValueError, match=msg): + df.drop(axis=1) + + data = [[1, 2, 3], [1, 2, 3]] + + @pytest.mark.parametrize( + "actual", + [ + DataFrame(data=data, index=["a", "a"]), + DataFrame(data=data, index=["a", "b"]), + DataFrame(data=data, index=["a", "b"]).set_index([0, 1]), + DataFrame(data=data, index=["a", "a"]).set_index([0, 1]), + ], + ) + def test_raise_on_drop_duplicate_index(self, actual): + + # GH#19186 + level = 0 if isinstance(actual.index, MultiIndex) else None + msg = re.escape("\"['c'] not found in axis\"") + with pytest.raises(KeyError, match=msg): + actual.drop("c", level=level, axis=0) + with pytest.raises(KeyError, match=msg): + actual.T.drop("c", level=level, axis=1) + expected_no_err = actual.drop("c", axis=0, level=level, errors="ignore") + tm.assert_frame_equal(expected_no_err, actual) + expected_no_err = actual.T.drop("c", axis=1, level=level, errors="ignore") + tm.assert_frame_equal(expected_no_err.T, actual) + + @pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 2]]) + @pytest.mark.parametrize("drop_labels", [[], [1], [2]]) + def test_drop_empty_list(self, index, drop_labels): + # GH#21494 + expected_index = [i for i in index if i not in drop_labels] + frame = DataFrame(index=index).drop(drop_labels) + tm.assert_frame_equal(frame, DataFrame(index=expected_index)) + + @pytest.mark.parametrize("index", [[1, 2, 3], [1, 2, 2]]) + @pytest.mark.parametrize("drop_labels", [[1, 4], [4, 5]]) + def test_drop_non_empty_list(self, index, drop_labels): + # GH# 21494 + with pytest.raises(KeyError, match="not found in axis"): + DataFrame(index=index).drop(drop_labels) + + @pytest.mark.parametrize( + "empty_listlike", + [ + [], + {}, + np.array([]), + Series([], dtype="datetime64[ns]"), + Index([]), + DatetimeIndex([]), + ], + ) + def test_drop_empty_listlike_non_unique_datetime_index(self, empty_listlike): + # GH#27994 + data = {"column_a": [5, 10], "column_b": ["one", "two"]} + index = [Timestamp("2021-01-01"), Timestamp("2021-01-01")] + df = DataFrame(data, index=index) + + # Passing empty list-like should return the same DataFrame. + expected = df.copy() + result = df.drop(empty_listlike) + tm.assert_frame_equal(result, expected) + + def test_mixed_depth_drop(self): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.randn(4, 6), columns=index) + + result = df.drop("a", axis=1) + expected = df.drop([("a", "", "")], axis=1) + tm.assert_frame_equal(expected, result) + + result = df.drop(["top"], axis=1) + expected = df.drop([("top", "OD", "wx")], axis=1) + expected = expected.drop([("top", "OD", "wy")], axis=1) + tm.assert_frame_equal(expected, result) + + result = df.drop(("top", "OD", "wx"), axis=1) + expected = df.drop([("top", "OD", "wx")], axis=1) + tm.assert_frame_equal(expected, result) + + expected = df.drop([("top", "OD", "wy")], axis=1) + expected = df.drop("top", axis=1) + + result = df.drop("result1", level=1, axis=1) + expected = df.drop( + [("routine1", "result1", ""), ("routine2", "result1", "")], axis=1 + ) + tm.assert_frame_equal(expected, result) + + def test_drop_multiindex_other_level_nan(self): + # GH#12754 + df = ( + DataFrame( + { + "A": ["one", "one", "two", "two"], + "B": [np.nan, 0.0, 1.0, 2.0], + "C": ["a", "b", "c", "c"], + "D": [1, 2, 3, 4], + } + ) + .set_index(["A", "B", "C"]) + .sort_index() + ) + result = df.drop("c", level="C") + expected = DataFrame( + [2, 1], + columns=["D"], + index=MultiIndex.from_tuples( + [("one", 0.0, "b"), ("one", np.nan, "a")], names=["A", "B", "C"] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_drop_nonunique(self): + df = DataFrame( + [ + ["x-a", "x", "a", 1.5], + ["x-a", "x", "a", 1.2], + ["z-c", "z", "c", 3.1], + ["x-a", "x", "a", 4.1], + ["x-b", "x", "b", 5.1], + ["x-b", "x", "b", 4.1], + ["x-b", "x", "b", 2.2], + ["y-a", "y", "a", 1.2], + ["z-b", "z", "b", 2.1], + ], + columns=["var1", "var2", "var3", "var4"], + ) + + grp_size = df.groupby("var1").size() + drop_idx = grp_size.loc[grp_size == 1] + + idf = df.set_index(["var1", "var2", "var3"]) + + # it works! GH#2101 + result = idf.drop(drop_idx.index, level=0).reset_index() + expected = df[-df.var1.isin(drop_idx.index)] + + result.index = expected.index + + tm.assert_frame_equal(result, expected) + + def test_drop_level(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + result = frame.drop(["bar", "qux"], level="first") + expected = frame.iloc[[0, 1, 2, 5, 6]] + tm.assert_frame_equal(result, expected) + + result = frame.drop(["two"], level="second") + expected = frame.iloc[[0, 2, 3, 6, 7, 9]] + tm.assert_frame_equal(result, expected) + + result = frame.T.drop(["bar", "qux"], axis=1, level="first") + expected = frame.iloc[[0, 1, 2, 5, 6]].T + tm.assert_frame_equal(result, expected) + + result = frame.T.drop(["two"], axis=1, level="second") + expected = frame.iloc[[0, 2, 3, 6, 7, 9]].T + tm.assert_frame_equal(result, expected) + + def test_drop_level_nonunique_datetime(self): + # GH#12701 + idx = Index([2, 3, 4, 4, 5], name="id") + idxdt = pd.to_datetime( + [ + "201603231400", + "201603231500", + "201603231600", + "201603231600", + "201603231700", + ] + ) + df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx) + df["tstamp"] = idxdt + df = df.set_index("tstamp", append=True) + ts = Timestamp("201603231600") + assert df.index.is_unique is False + + result = df.drop(ts, level="tstamp") + expected = df.loc[idx != 4] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("box", [Series, DataFrame]) + def test_drop_tz_aware_timestamp_across_dst(self, box): + # GH#21761 + start = Timestamp("2017-10-29", tz="Europe/Berlin") + end = Timestamp("2017-10-29 04:00:00", tz="Europe/Berlin") + index = pd.date_range(start, end, freq="15min") + data = box(data=[1] * len(index), index=index) + result = data.drop(start) + expected_start = Timestamp("2017-10-29 00:15:00", tz="Europe/Berlin") + expected_idx = pd.date_range(expected_start, end, freq="15min") + expected = box(data=[1] * len(expected_idx), index=expected_idx) + tm.assert_equal(result, expected) + + def test_drop_preserve_names(self): + index = MultiIndex.from_arrays( + [[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]], names=["one", "two"] + ) + + df = DataFrame(np.random.randn(6, 3), index=index) + + result = df.drop([(0, 2)]) + assert result.index.names == ("one", "two") + + @pytest.mark.parametrize( + "operation", ["__iadd__", "__isub__", "__imul__", "__ipow__"] + ) + @pytest.mark.parametrize("inplace", [False, True]) + def test_inplace_drop_and_operation(self, operation, inplace): + # GH#30484 + df = DataFrame({"x": range(5)}) + expected = df.copy() + df["y"] = range(5) + y = df["y"] + + with tm.assert_produces_warning(None): + if inplace: + df.drop("y", axis=1, inplace=inplace) + else: + df = df.drop("y", axis=1, inplace=inplace) + + # Perform operation and check result + getattr(y, operation)(1) + tm.assert_frame_equal(df, expected) + + def test_drop_with_non_unique_multiindex(self): + # GH#36293 + mi = MultiIndex.from_arrays([["x", "y", "x"], ["i", "j", "i"]]) + df = DataFrame([1, 2, 3], index=mi) + result = df.drop(index="x") + expected = DataFrame([2], index=MultiIndex.from_arrays([["y"], ["j"]])) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("indexer", [("a", "a"), [("a", "a")]]) + def test_drop_tuple_with_non_unique_multiindex(self, indexer): + # GH#42771 + idx = MultiIndex.from_product([["a", "b"], ["a", "a"]]) + df = DataFrame({"x": range(len(idx))}, index=idx) + result = df.drop(index=[("a", "a")]) + expected = DataFrame( + {"x": [2, 3]}, index=MultiIndex.from_tuples([("b", "a"), ("b", "a")]) + ) + tm.assert_frame_equal(result, expected) + + def test_drop_with_duplicate_columns(self): + df = DataFrame( + [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] + ) + result = df.drop(["a"], axis=1) + expected = DataFrame([[1], [1], [1]], columns=["bar"]) + tm.assert_frame_equal(result, expected) + result = df.drop("a", axis=1) + tm.assert_frame_equal(result, expected) + + def test_drop_with_duplicate_columns2(self): + # drop buggy GH#6240 + df = DataFrame( + { + "A": np.random.randn(5), + "B": np.random.randn(5), + "C": np.random.randn(5), + "D": ["a", "b", "c", "d", "e"], + } + ) + + expected = df.take([0, 1, 1], axis=1) + df2 = df.take([2, 0, 1, 2, 1], axis=1) + result = df2.drop("C", axis=1) + tm.assert_frame_equal(result, expected) + + def test_drop_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame\.drop " + r"except for the argument 'labels' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.drop("a", 1) + expected = DataFrame(index=[0, 1, 2]) + tm.assert_frame_equal(result, expected) + + def test_drop_inplace_no_leftover_column_reference(self): + # GH 13934 + df = DataFrame({"a": [1, 2, 3]}) + a = df.a + df.drop(["a"], axis=1, inplace=True) + tm.assert_index_equal(df.columns, Index([], dtype="object")) + a -= a.mean() + tm.assert_index_equal(df.columns, Index([], dtype="object")) + + def test_drop_level_missing_label_multiindex(self): + # GH 18561 + df = DataFrame(index=MultiIndex.from_product([range(3), range(3)])) + with pytest.raises(KeyError, match="labels \\[5\\] not found in level"): + df.drop(5, level=0) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_drop_duplicates.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_drop_duplicates.py new file mode 100644 index 0000000..cd61f59 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_drop_duplicates.py @@ -0,0 +1,488 @@ +from datetime import datetime +import re + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + NaT, + concat, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("subset", ["a", ["a"], ["a", "B"]]) +def test_drop_duplicates_with_misspelled_column_name(subset): + # GH 19730 + df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]}) + msg = re.escape("Index(['a'], dtype='object')") + + with pytest.raises(KeyError, match=msg): + df.drop_duplicates(subset) + + +def test_drop_duplicates(): + df = DataFrame( + { + "AAA": ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1, 1, 2, 2, 2, 2, 1, 2], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates("AAA") + expected = df[:2] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("AAA", keep="last") + expected = df.loc[[6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("AAA", keep=False) + expected = df.loc[[]] + tm.assert_frame_equal(result, expected) + assert len(result) == 0 + + # multi column + expected = df.loc[[0, 1, 2, 3]] + result = df.drop_duplicates(np.array(["AAA", "B"])) + tm.assert_frame_equal(result, expected) + result = df.drop_duplicates(["AAA", "B"]) + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(("AAA", "B"), keep="last") + expected = df.loc[[0, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(("AAA", "B"), keep=False) + expected = df.loc[[0]] + tm.assert_frame_equal(result, expected) + + # consider everything + df2 = df.loc[:, ["AAA", "B", "C"]] + + result = df2.drop_duplicates() + # in this case only + expected = df2.drop_duplicates(["AAA", "B"]) + tm.assert_frame_equal(result, expected) + + result = df2.drop_duplicates(keep="last") + expected = df2.drop_duplicates(["AAA", "B"], keep="last") + tm.assert_frame_equal(result, expected) + + result = df2.drop_duplicates(keep=False) + expected = df2.drop_duplicates(["AAA", "B"], keep=False) + tm.assert_frame_equal(result, expected) + + # integers + result = df.drop_duplicates("C") + expected = df.iloc[[0, 2]] + tm.assert_frame_equal(result, expected) + result = df.drop_duplicates("C", keep="last") + expected = df.iloc[[-2, -1]] + tm.assert_frame_equal(result, expected) + + df["E"] = df["C"].astype("int8") + result = df.drop_duplicates("E") + expected = df.iloc[[0, 2]] + tm.assert_frame_equal(result, expected) + result = df.drop_duplicates("E", keep="last") + expected = df.iloc[[-2, -1]] + tm.assert_frame_equal(result, expected) + + # GH 11376 + df = DataFrame({"x": [7, 6, 3, 3, 4, 8, 0], "y": [0, 6, 5, 5, 9, 1, 2]}) + expected = df.loc[df.index != 3] + tm.assert_frame_equal(df.drop_duplicates(), expected) + + df = DataFrame([[1, 0], [0, 2]]) + tm.assert_frame_equal(df.drop_duplicates(), df) + + df = DataFrame([[-2, 0], [0, -4]]) + tm.assert_frame_equal(df.drop_duplicates(), df) + + x = np.iinfo(np.int64).max / 3 * 2 + df = DataFrame([[-x, x], [0, x + 4]]) + tm.assert_frame_equal(df.drop_duplicates(), df) + + df = DataFrame([[-x, x], [x, x + 4]]) + tm.assert_frame_equal(df.drop_duplicates(), df) + + # GH 11864 + df = DataFrame([i] * 9 for i in range(16)) + df = concat([df, DataFrame([[1] + [0] * 8])], ignore_index=True) + + for keep in ["first", "last", False]: + assert df.duplicated(keep=keep).sum() == 0 + + +def test_drop_duplicates_with_duplicate_column_names(): + # GH17836 + df = DataFrame([[1, 2, 5], [3, 4, 6], [3, 4, 7]], columns=["a", "a", "b"]) + + result0 = df.drop_duplicates() + tm.assert_frame_equal(result0, df) + + result1 = df.drop_duplicates("a") + expected1 = df[:2] + tm.assert_frame_equal(result1, expected1) + + +def test_drop_duplicates_for_take_all(): + df = DataFrame( + { + "AAA": ["foo", "bar", "baz", "bar", "foo", "bar", "qux", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1, 1, 2, 2, 2, 2, 1, 2], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates("AAA") + expected = df.iloc[[0, 1, 2, 6]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("AAA", keep="last") + expected = df.iloc[[2, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("AAA", keep=False) + expected = df.iloc[[2, 6]] + tm.assert_frame_equal(result, expected) + + # multiple columns + result = df.drop_duplicates(["AAA", "B"]) + expected = df.iloc[[0, 1, 2, 3, 4, 6]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["AAA", "B"], keep="last") + expected = df.iloc[[0, 1, 2, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["AAA", "B"], keep=False) + expected = df.iloc[[0, 1, 2, 6]] + tm.assert_frame_equal(result, expected) + + +def test_drop_duplicates_tuple(): + df = DataFrame( + { + ("AA", "AB"): ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1, 1, 2, 2, 2, 2, 1, 2], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates(("AA", "AB")) + expected = df[:2] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(("AA", "AB"), keep="last") + expected = df.loc[[6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(("AA", "AB"), keep=False) + expected = df.loc[[]] # empty df + assert len(result) == 0 + tm.assert_frame_equal(result, expected) + + # multi column + expected = df.loc[[0, 1, 2, 3]] + result = df.drop_duplicates((("AA", "AB"), "B")) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "df", + [ + DataFrame(), + DataFrame(columns=[]), + DataFrame(columns=["A", "B", "C"]), + DataFrame(index=[]), + DataFrame(index=["A", "B", "C"]), + ], +) +def test_drop_duplicates_empty(df): + # GH 20516 + result = df.drop_duplicates() + tm.assert_frame_equal(result, df) + + result = df.copy() + result.drop_duplicates(inplace=True) + tm.assert_frame_equal(result, df) + + +def test_drop_duplicates_NA(): + # none + df = DataFrame( + { + "A": [None, None, "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1.0, np.nan, np.nan, np.nan, 1.0, 1.0, 1, 1.0], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates("A") + expected = df.loc[[0, 2, 3]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("A", keep="last") + expected = df.loc[[1, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("A", keep=False) + expected = df.loc[[]] # empty df + tm.assert_frame_equal(result, expected) + assert len(result) == 0 + + # multi column + result = df.drop_duplicates(["A", "B"]) + expected = df.loc[[0, 2, 3, 6]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["A", "B"], keep="last") + expected = df.loc[[1, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["A", "B"], keep=False) + expected = df.loc[[6]] + tm.assert_frame_equal(result, expected) + + # nan + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1.0, np.nan, np.nan, np.nan, 1.0, 1.0, 1, 1.0], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates("C") + expected = df[:2] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("C", keep="last") + expected = df.loc[[3, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("C", keep=False) + expected = df.loc[[]] # empty df + tm.assert_frame_equal(result, expected) + assert len(result) == 0 + + # multi column + result = df.drop_duplicates(["C", "B"]) + expected = df.loc[[0, 1, 2, 4]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["C", "B"], keep="last") + expected = df.loc[[1, 3, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["C", "B"], keep=False) + expected = df.loc[[1]] + tm.assert_frame_equal(result, expected) + + +def test_drop_duplicates_NA_for_take_all(): + # none + df = DataFrame( + { + "A": [None, None, "foo", "bar", "foo", "baz", "bar", "qux"], + "C": [1.0, np.nan, np.nan, np.nan, 1.0, 2.0, 3, 1.0], + } + ) + + # single column + result = df.drop_duplicates("A") + expected = df.iloc[[0, 2, 3, 5, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("A", keep="last") + expected = df.iloc[[1, 4, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("A", keep=False) + expected = df.iloc[[5, 7]] + tm.assert_frame_equal(result, expected) + + # nan + + # single column + result = df.drop_duplicates("C") + expected = df.iloc[[0, 1, 5, 6]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("C", keep="last") + expected = df.iloc[[3, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("C", keep=False) + expected = df.iloc[[5, 6]] + tm.assert_frame_equal(result, expected) + + +def test_drop_duplicates_inplace(): + orig = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1, 1, 2, 2, 2, 2, 1, 2], + "D": range(8), + } + ) + # single column + df = orig.copy() + return_value = df.drop_duplicates("A", inplace=True) + expected = orig[:2] + result = df + tm.assert_frame_equal(result, expected) + assert return_value is None + + df = orig.copy() + return_value = df.drop_duplicates("A", keep="last", inplace=True) + expected = orig.loc[[6, 7]] + result = df + tm.assert_frame_equal(result, expected) + assert return_value is None + + df = orig.copy() + return_value = df.drop_duplicates("A", keep=False, inplace=True) + expected = orig.loc[[]] + result = df + tm.assert_frame_equal(result, expected) + assert len(df) == 0 + assert return_value is None + + # multi column + df = orig.copy() + return_value = df.drop_duplicates(["A", "B"], inplace=True) + expected = orig.loc[[0, 1, 2, 3]] + result = df + tm.assert_frame_equal(result, expected) + assert return_value is None + + df = orig.copy() + return_value = df.drop_duplicates(["A", "B"], keep="last", inplace=True) + expected = orig.loc[[0, 5, 6, 7]] + result = df + tm.assert_frame_equal(result, expected) + assert return_value is None + + df = orig.copy() + return_value = df.drop_duplicates(["A", "B"], keep=False, inplace=True) + expected = orig.loc[[0]] + result = df + tm.assert_frame_equal(result, expected) + assert return_value is None + + # consider everything + orig2 = orig.loc[:, ["A", "B", "C"]].copy() + + df2 = orig2.copy() + return_value = df2.drop_duplicates(inplace=True) + # in this case only + expected = orig2.drop_duplicates(["A", "B"]) + result = df2 + tm.assert_frame_equal(result, expected) + assert return_value is None + + df2 = orig2.copy() + return_value = df2.drop_duplicates(keep="last", inplace=True) + expected = orig2.drop_duplicates(["A", "B"], keep="last") + result = df2 + tm.assert_frame_equal(result, expected) + assert return_value is None + + df2 = orig2.copy() + return_value = df2.drop_duplicates(keep=False, inplace=True) + expected = orig2.drop_duplicates(["A", "B"], keep=False) + result = df2 + tm.assert_frame_equal(result, expected) + assert return_value is None + + +@pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize( + "origin_dict, output_dict, ignore_index, output_index", + [ + ({"A": [2, 2, 3]}, {"A": [2, 3]}, True, [0, 1]), + ({"A": [2, 2, 3]}, {"A": [2, 3]}, False, [0, 2]), + ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, True, [0, 1]), + ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, False, [0, 2]), + ], +) +def test_drop_duplicates_ignore_index( + inplace, origin_dict, output_dict, ignore_index, output_index +): + # GH 30114 + df = DataFrame(origin_dict) + expected = DataFrame(output_dict, index=output_index) + + if inplace: + result_df = df.copy() + result_df.drop_duplicates(ignore_index=ignore_index, inplace=inplace) + else: + result_df = df.drop_duplicates(ignore_index=ignore_index, inplace=inplace) + + tm.assert_frame_equal(result_df, expected) + tm.assert_frame_equal(df, DataFrame(origin_dict)) + + +def test_drop_duplicates_null_in_object_column(nulls_fixture): + # https://github.com/pandas-dev/pandas/issues/32992 + df = DataFrame([[1, nulls_fixture], [2, "a"]], dtype=object) + result = df.drop_duplicates() + tm.assert_frame_equal(result, df) + + +@pytest.mark.parametrize("keep", ["first", "last", False]) +def test_drop_duplicates_series_vs_dataframe(keep): + # GH#14192 + df = DataFrame( + { + "a": [1, 1, 1, "one", "one"], + "b": [2, 2, np.nan, np.nan, np.nan], + "c": [3, 3, np.nan, np.nan, "three"], + "d": [1, 2, 3, 4, 4], + "e": [ + datetime(2015, 1, 1), + datetime(2015, 1, 1), + datetime(2015, 2, 1), + NaT, + NaT, + ], + } + ) + for column in df.columns: + dropped_frame = df[[column]].drop_duplicates(keep=keep) + dropped_series = df[column].drop_duplicates(keep=keep) + tm.assert_frame_equal(dropped_frame, dropped_series.to_frame()) + + +@pytest.mark.parametrize("arg", [[1], 1, "True", [], 0]) +def test_drop_duplicates_non_boolean_ignore_index(arg): + # GH#38274 + df = DataFrame({"a": [1, 2, 1, 3]}) + msg = '^For argument "ignore_index" expected type bool, received type .*.$' + with pytest.raises(ValueError, match=msg): + df.drop_duplicates(ignore_index=arg) + + +def test_drop_duplicates_pos_args_deprecation(): + # GH#41485 + df = DataFrame({"a": [1, 1, 2], "b": [1, 1, 3], "c": [1, 1, 3]}) + msg = ( + "In a future version of pandas all arguments of " + "DataFrame.drop_duplicates except for the argument 'subset' " + "will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.drop_duplicates(["b", "c"], "last") + expected = DataFrame({"a": [1, 2], "b": [1, 3], "c": [1, 3]}, index=[1, 2]) + tm.assert_frame_equal(expected, result) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_droplevel.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_droplevel.py new file mode 100644 index 0000000..e1302d4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_droplevel.py @@ -0,0 +1,36 @@ +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, +) +import pandas._testing as tm + + +class TestDropLevel: + def test_droplevel(self, frame_or_series): + # GH#20342 + cols = MultiIndex.from_tuples( + [("c", "e"), ("d", "f")], names=["level_1", "level_2"] + ) + mi = MultiIndex.from_tuples([(1, 2), (5, 6), (9, 10)], names=["a", "b"]) + df = DataFrame([[3, 4], [7, 8], [11, 12]], index=mi, columns=cols) + if frame_or_series is not DataFrame: + df = df.iloc[:, 0] + + # test that dropping of a level in index works + expected = df.reset_index("a", drop=True) + result = df.droplevel("a", axis="index") + tm.assert_equal(result, expected) + + if frame_or_series is DataFrame: + # test that dropping of a level in columns works + expected = df.copy() + expected.columns = Index(["c", "d"], name="level_1") + result = df.droplevel("level_2", axis="columns") + tm.assert_equal(result, expected) + else: + # test that droplevel raises ValueError on axis != 0 + with pytest.raises(ValueError, match="No axis named columns"): + df.droplevel(1, axis="columns") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_dropna.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_dropna.py new file mode 100644 index 0000000..d0b9eeb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_dropna.py @@ -0,0 +1,276 @@ +import datetime + +import dateutil +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestDataFrameMissingData: + def test_dropEmptyRows(self, float_frame): + N = len(float_frame.index) + mat = np.random.randn(N) + mat[:5] = np.nan + + frame = DataFrame({"foo": mat}, index=float_frame.index) + original = Series(mat, index=float_frame.index, name="foo") + expected = original.dropna() + inplace_frame1, inplace_frame2 = frame.copy(), frame.copy() + + smaller_frame = frame.dropna(how="all") + # check that original was preserved + tm.assert_series_equal(frame["foo"], original) + return_value = inplace_frame1.dropna(how="all", inplace=True) + tm.assert_series_equal(smaller_frame["foo"], expected) + tm.assert_series_equal(inplace_frame1["foo"], expected) + assert return_value is None + + smaller_frame = frame.dropna(how="all", subset=["foo"]) + return_value = inplace_frame2.dropna(how="all", subset=["foo"], inplace=True) + tm.assert_series_equal(smaller_frame["foo"], expected) + tm.assert_series_equal(inplace_frame2["foo"], expected) + assert return_value is None + + def test_dropIncompleteRows(self, float_frame): + N = len(float_frame.index) + mat = np.random.randn(N) + mat[:5] = np.nan + + frame = DataFrame({"foo": mat}, index=float_frame.index) + frame["bar"] = 5 + original = Series(mat, index=float_frame.index, name="foo") + inp_frame1, inp_frame2 = frame.copy(), frame.copy() + + smaller_frame = frame.dropna() + tm.assert_series_equal(frame["foo"], original) + return_value = inp_frame1.dropna(inplace=True) + + exp = Series(mat[5:], index=float_frame.index[5:], name="foo") + tm.assert_series_equal(smaller_frame["foo"], exp) + tm.assert_series_equal(inp_frame1["foo"], exp) + assert return_value is None + + samesize_frame = frame.dropna(subset=["bar"]) + tm.assert_series_equal(frame["foo"], original) + assert (frame["bar"] == 5).all() + return_value = inp_frame2.dropna(subset=["bar"], inplace=True) + tm.assert_index_equal(samesize_frame.index, float_frame.index) + tm.assert_index_equal(inp_frame2.index, float_frame.index) + assert return_value is None + + def test_dropna(self): + df = DataFrame(np.random.randn(6, 4)) + df.iloc[:2, 2] = np.nan + + dropped = df.dropna(axis=1) + expected = df.loc[:, [0, 1, 3]] + inp = df.copy() + return_value = inp.dropna(axis=1, inplace=True) + tm.assert_frame_equal(dropped, expected) + tm.assert_frame_equal(inp, expected) + assert return_value is None + + dropped = df.dropna(axis=0) + expected = df.loc[list(range(2, 6))] + inp = df.copy() + return_value = inp.dropna(axis=0, inplace=True) + tm.assert_frame_equal(dropped, expected) + tm.assert_frame_equal(inp, expected) + assert return_value is None + + # threshold + dropped = df.dropna(axis=1, thresh=5) + expected = df.loc[:, [0, 1, 3]] + inp = df.copy() + return_value = inp.dropna(axis=1, thresh=5, inplace=True) + tm.assert_frame_equal(dropped, expected) + tm.assert_frame_equal(inp, expected) + assert return_value is None + + dropped = df.dropna(axis=0, thresh=4) + expected = df.loc[range(2, 6)] + inp = df.copy() + return_value = inp.dropna(axis=0, thresh=4, inplace=True) + tm.assert_frame_equal(dropped, expected) + tm.assert_frame_equal(inp, expected) + assert return_value is None + + dropped = df.dropna(axis=1, thresh=4) + tm.assert_frame_equal(dropped, df) + + dropped = df.dropna(axis=1, thresh=3) + tm.assert_frame_equal(dropped, df) + + # subset + dropped = df.dropna(axis=0, subset=[0, 1, 3]) + inp = df.copy() + return_value = inp.dropna(axis=0, subset=[0, 1, 3], inplace=True) + tm.assert_frame_equal(dropped, df) + tm.assert_frame_equal(inp, df) + assert return_value is None + + # all + dropped = df.dropna(axis=1, how="all") + tm.assert_frame_equal(dropped, df) + + df[2] = np.nan + dropped = df.dropna(axis=1, how="all") + expected = df.loc[:, [0, 1, 3]] + tm.assert_frame_equal(dropped, expected) + + # bad input + msg = "No axis named 3 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.dropna(axis=3) + + def test_drop_and_dropna_caching(self): + # tst that cacher updates + original = Series([1, 2, np.nan], name="A") + expected = Series([1, 2], dtype=original.dtype, name="A") + df = DataFrame({"A": original.values.copy()}) + df2 = df.copy() + df["A"].dropna() + tm.assert_series_equal(df["A"], original) + + ser = df["A"] + return_value = ser.dropna(inplace=True) + tm.assert_series_equal(ser, expected) + tm.assert_series_equal(df["A"], original) + assert return_value is None + + df2["A"].drop([1]) + tm.assert_series_equal(df2["A"], original) + + ser = df2["A"] + return_value = ser.drop([1], inplace=True) + tm.assert_series_equal(ser, original.drop([1])) + tm.assert_series_equal(df2["A"], original) + assert return_value is None + + def test_dropna_corner(self, float_frame): + # bad input + msg = "invalid how option: foo" + with pytest.raises(ValueError, match=msg): + float_frame.dropna(how="foo") + msg = "must specify how or thresh" + with pytest.raises(TypeError, match=msg): + float_frame.dropna(how=None) + # non-existent column - 8303 + with pytest.raises(KeyError, match=r"^\['X'\]$"): + float_frame.dropna(subset=["A", "X"]) + + def test_dropna_multiple_axes(self): + df = DataFrame( + [ + [1, np.nan, 2, 3], + [4, np.nan, 5, 6], + [np.nan, np.nan, np.nan, np.nan], + [7, np.nan, 8, 9], + ] + ) + + # GH20987 + with pytest.raises(TypeError, match="supplying multiple axes"): + df.dropna(how="all", axis=[0, 1]) + with pytest.raises(TypeError, match="supplying multiple axes"): + df.dropna(how="all", axis=(0, 1)) + + inp = df.copy() + with pytest.raises(TypeError, match="supplying multiple axes"): + inp.dropna(how="all", axis=(0, 1), inplace=True) + + def test_dropna_tz_aware_datetime(self): + # GH13407 + df = DataFrame() + dt1 = datetime.datetime(2015, 1, 1, tzinfo=dateutil.tz.tzutc()) + dt2 = datetime.datetime(2015, 2, 2, tzinfo=dateutil.tz.tzutc()) + df["Time"] = [dt1] + result = df.dropna(axis=0) + expected = DataFrame({"Time": [dt1]}) + tm.assert_frame_equal(result, expected) + + # Ex2 + df = DataFrame({"Time": [dt1, None, np.nan, dt2]}) + result = df.dropna(axis=0) + expected = DataFrame([dt1, dt2], columns=["Time"], index=[0, 3]) + tm.assert_frame_equal(result, expected) + + def test_dropna_categorical_interval_index(self): + # GH 25087 + ii = pd.IntervalIndex.from_breaks([0, 2.78, 3.14, 6.28]) + ci = pd.CategoricalIndex(ii) + df = DataFrame({"A": list("abc")}, index=ci) + + expected = df + result = df.dropna() + tm.assert_frame_equal(result, expected) + + def test_dropna_with_duplicate_columns(self): + df = DataFrame( + { + "A": np.random.randn(5), + "B": np.random.randn(5), + "C": np.random.randn(5), + "D": ["a", "b", "c", "d", "e"], + } + ) + df.iloc[2, [0, 1, 2]] = np.nan + df.iloc[0, 0] = np.nan + df.iloc[1, 1] = np.nan + df.iloc[:, 3] = np.nan + expected = df.dropna(subset=["A", "B", "C"], how="all") + expected.columns = ["A", "A", "B", "C"] + + df.columns = ["A", "A", "B", "C"] + + result = df.dropna(subset=["A", "C"], how="all") + tm.assert_frame_equal(result, expected) + + def test_dropna_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame\.dropna " + r"will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.dropna(1) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + def test_set_single_column_subset(self): + # GH 41021 + df = DataFrame({"A": [1, 2, 3], "B": list("abc"), "C": [4, np.NaN, 5]}) + expected = DataFrame( + {"A": [1, 3], "B": list("ac"), "C": [4.0, 5.0]}, index=[0, 2] + ) + result = df.dropna(subset="C") + tm.assert_frame_equal(result, expected) + + def test_single_column_not_present_in_axis(self): + # GH 41021 + df = DataFrame({"A": [1, 2, 3]}) + + # Column not present + with pytest.raises(KeyError, match="['D']"): + df.dropna(subset="D", axis=0) + + def test_subset_is_nparray(self): + # GH 41021 + df = DataFrame({"A": [1, 2, np.NaN], "B": list("abc"), "C": [4, np.NaN, 5]}) + expected = DataFrame({"A": [1.0], "B": ["a"], "C": [4.0]}) + result = df.dropna(subset=np.array(["A", "C"])) + tm.assert_frame_equal(result, expected) + + def test_no_nans_in_frame(self, axis): + # GH#41965 + df = DataFrame([[1, 2], [3, 4]], columns=pd.RangeIndex(0, 2)) + expected = df.copy() + result = df.dropna(axis=axis) + tm.assert_frame_equal(result, expected, check_index_type=True) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_dtypes.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_dtypes.py new file mode 100644 index 0000000..84841ad --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_dtypes.py @@ -0,0 +1,133 @@ +from datetime import timedelta + +import numpy as np + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas import ( + DataFrame, + Series, + date_range, + option_context, +) +import pandas._testing as tm + + +def _check_cast(df, v): + """ + Check if all dtypes of df are equal to v + """ + assert all(s.dtype.name == v for _, s in df.items()) + + +class TestDataFrameDataTypes: + def test_empty_frame_dtypes(self): + empty_df = DataFrame() + tm.assert_series_equal(empty_df.dtypes, Series(dtype=object)) + + nocols_df = DataFrame(index=[1, 2, 3]) + tm.assert_series_equal(nocols_df.dtypes, Series(dtype=object)) + + norows_df = DataFrame(columns=list("abc")) + tm.assert_series_equal(norows_df.dtypes, Series(object, index=list("abc"))) + + norows_int_df = DataFrame(columns=list("abc")).astype(np.int32) + tm.assert_series_equal( + norows_int_df.dtypes, Series(np.dtype("int32"), index=list("abc")) + ) + + df = DataFrame({"a": 1, "b": True, "c": 1.0}, index=[1, 2, 3]) + ex_dtypes = Series({"a": np.int64, "b": np.bool_, "c": np.float64}) + tm.assert_series_equal(df.dtypes, ex_dtypes) + + # same but for empty slice of df + tm.assert_series_equal(df[:0].dtypes, ex_dtypes) + + def test_datetime_with_tz_dtypes(self): + tzframe = DataFrame( + { + "A": date_range("20130101", periods=3), + "B": date_range("20130101", periods=3, tz="US/Eastern"), + "C": date_range("20130101", periods=3, tz="CET"), + } + ) + tzframe.iloc[1, 1] = pd.NaT + tzframe.iloc[1, 2] = pd.NaT + result = tzframe.dtypes.sort_index() + expected = Series( + [ + np.dtype("datetime64[ns]"), + DatetimeTZDtype("ns", "US/Eastern"), + DatetimeTZDtype("ns", "CET"), + ], + ["A", "B", "C"], + ) + + tm.assert_series_equal(result, expected) + + def test_dtypes_are_correct_after_column_slice(self): + # GH6525 + df = DataFrame(index=range(5), columns=list("abc"), dtype=np.float_) + tm.assert_series_equal( + df.dtypes, + Series({"a": np.float_, "b": np.float_, "c": np.float_}), + ) + tm.assert_series_equal(df.iloc[:, 2:].dtypes, Series({"c": np.float_})) + tm.assert_series_equal( + df.dtypes, + Series({"a": np.float_, "b": np.float_, "c": np.float_}), + ) + + def test_dtypes_gh8722(self, float_string_frame): + float_string_frame["bool"] = float_string_frame["A"] > 0 + result = float_string_frame.dtypes + expected = Series( + {k: v.dtype for k, v in float_string_frame.items()}, index=result.index + ) + tm.assert_series_equal(result, expected) + + # compat, GH 8722 + with option_context("use_inf_as_na", True): + df = DataFrame([[1]]) + result = df.dtypes + tm.assert_series_equal(result, Series({0: np.dtype("int64")})) + + def test_dtypes_timedeltas(self): + df = DataFrame( + { + "A": Series(date_range("2012-1-1", periods=3, freq="D")), + "B": Series([timedelta(days=i) for i in range(3)]), + } + ) + result = df.dtypes + expected = Series( + [np.dtype("datetime64[ns]"), np.dtype("timedelta64[ns]")], index=list("AB") + ) + tm.assert_series_equal(result, expected) + + df["C"] = df["A"] + df["B"] + result = df.dtypes + expected = Series( + [ + np.dtype("datetime64[ns]"), + np.dtype("timedelta64[ns]"), + np.dtype("datetime64[ns]"), + ], + index=list("ABC"), + ) + tm.assert_series_equal(result, expected) + + # mixed int types + df["D"] = 1 + result = df.dtypes + expected = Series( + [ + np.dtype("datetime64[ns]"), + np.dtype("timedelta64[ns]"), + np.dtype("datetime64[ns]"), + np.dtype("int64"), + ], + index=list("ABCD"), + ) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_duplicated.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_duplicated.py new file mode 100644 index 0000000..0b90914 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_duplicated.py @@ -0,0 +1,113 @@ +import re + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("subset", ["a", ["a"], ["a", "B"]]) +def test_duplicated_with_misspelled_column_name(subset): + # GH 19730 + df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]}) + msg = re.escape("Index(['a'], dtype='object')") + + with pytest.raises(KeyError, match=msg): + df.duplicated(subset) + + +@pytest.mark.slow +def test_duplicated_do_not_fail_on_wide_dataframes(): + # gh-21524 + # Given the wide dataframe with a lot of columns + # with different (important!) values + data = {f"col_{i:02d}": np.random.randint(0, 1000, 30000) for i in range(100)} + df = DataFrame(data).T + result = df.duplicated() + + # Then duplicates produce the bool Series as a result and don't fail during + # calculation. Actual values doesn't matter here, though usually it's all + # False in this case + assert isinstance(result, Series) + assert result.dtype == np.bool_ + + +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", Series([False, False, True, False, True])), + ("last", Series([True, True, False, False, False])), + (False, Series([True, True, True, False, True])), + ], +) +def test_duplicated_keep(keep, expected): + df = DataFrame({"A": [0, 1, 1, 2, 0], "B": ["a", "b", "b", "c", "a"]}) + + result = df.duplicated(keep=keep) + tm.assert_series_equal(result, expected) + + +@pytest.mark.xfail(reason="GH#21720; nan/None falsely considered equal") +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", Series([False, False, True, False, True])), + ("last", Series([True, True, False, False, False])), + (False, Series([True, True, True, False, True])), + ], +) +def test_duplicated_nan_none(keep, expected): + df = DataFrame({"C": [np.nan, 3, 3, None, np.nan]}, dtype=object) + + result = df.duplicated(keep=keep) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("subset", [None, ["A", "B"], "A"]) +def test_duplicated_subset(subset, keep): + df = DataFrame( + { + "A": [0, 1, 1, 2, 0], + "B": ["a", "b", "b", "c", "a"], + "C": [np.nan, 3, 3, None, np.nan], + } + ) + + if subset is None: + subset = list(df.columns) + elif isinstance(subset, str): + # need to have a DataFrame, not a Series + # -> select columns with singleton list, not string + subset = [subset] + + expected = df[subset].duplicated(keep=keep) + result = df.duplicated(keep=keep, subset=subset) + tm.assert_series_equal(result, expected) + + +def test_duplicated_on_empty_frame(): + # GH 25184 + + df = DataFrame(columns=["a", "b"]) + dupes = df.duplicated("a") + + result = df[dupes] + expected = df.copy() + tm.assert_frame_equal(result, expected) + + +def test_frame_datetime64_duplicated(): + dates = date_range("2010-07-01", end="2010-08-05") + + tst = DataFrame({"symbol": "AAA", "date": dates}) + result = tst.duplicated(["date", "symbol"]) + assert (-result).all() + + tst = DataFrame({"date": dates}) + result = tst.duplicated() + assert (-result).all() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_equals.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_equals.py new file mode 100644 index 0000000..dddd6c6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_equals.py @@ -0,0 +1,82 @@ +import numpy as np + +from pandas import ( + DataFrame, + date_range, +) +import pandas._testing as tm + + +class TestEquals: + def test_dataframe_not_equal(self): + # see GH#28839 + df1 = DataFrame({"a": [1, 2], "b": ["s", "d"]}) + df2 = DataFrame({"a": ["s", "d"], "b": [1, 2]}) + assert df1.equals(df2) is False + + def test_equals_different_blocks(self, using_array_manager): + # GH#9330 + df0 = DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]}) + df1 = df0.reset_index()[["A", "B", "C"]] + if not using_array_manager: + # this assert verifies that the above operations have + # induced a block rearrangement + assert df0._mgr.blocks[0].dtype != df1._mgr.blocks[0].dtype + + # do the real tests + tm.assert_frame_equal(df0, df1) + assert df0.equals(df1) + assert df1.equals(df0) + + def test_equals(self): + # Add object dtype column with nans + index = np.random.random(10) + df1 = DataFrame(np.random.random(10), index=index, columns=["floats"]) + df1["text"] = "the sky is so blue. we could use more chocolate.".split() + df1["start"] = date_range("2000-1-1", periods=10, freq="T") + df1["end"] = date_range("2000-1-1", periods=10, freq="D") + df1["diff"] = df1["end"] - df1["start"] + df1["bool"] = np.arange(10) % 3 == 0 + df1.loc[::2] = np.nan + df2 = df1.copy() + assert df1["text"].equals(df2["text"]) + assert df1["start"].equals(df2["start"]) + assert df1["end"].equals(df2["end"]) + assert df1["diff"].equals(df2["diff"]) + assert df1["bool"].equals(df2["bool"]) + assert df1.equals(df2) + assert not df1.equals(object) + + # different dtype + different = df1.copy() + different["floats"] = different["floats"].astype("float32") + assert not df1.equals(different) + + # different index + different_index = -index + different = df2.set_index(different_index) + assert not df1.equals(different) + + # different columns + different = df2.copy() + different.columns = df2.columns[::-1] + assert not df1.equals(different) + + # DatetimeIndex + index = date_range("2000-1-1", periods=10, freq="T") + df1 = df1.set_index(index) + df2 = df1.copy() + assert df1.equals(df2) + + # MultiIndex + df3 = df1.set_index(["text"], append=True) + df2 = df1.set_index(["text"], append=True) + assert df3.equals(df2) + + df2 = df1.set_index(["floats"], append=True) + assert not df3.equals(df2) + + # NaN in index + df3 = df1.set_index(["floats"], append=True) + df2 = df1.set_index(["floats"], append=True) + assert df3.equals(df2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_explode.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_explode.py new file mode 100644 index 0000000..8716a18 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_explode.py @@ -0,0 +1,277 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_error(): + df = pd.DataFrame( + {"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1} + ) + with pytest.raises( + ValueError, match="column must be a scalar, tuple, or list thereof" + ): + df.explode([list("AA")]) + + with pytest.raises(ValueError, match="column must be unique"): + df.explode(list("AA")) + + df.columns = list("AA") + with pytest.raises(ValueError, match="columns must be unique"): + df.explode("A") + + +@pytest.mark.parametrize( + "input_subset, error_message", + [ + ( + list("AC"), + "columns must have matching element counts", + ), + ( + [], + "column must be nonempty", + ), + ( + list("AC"), + "columns must have matching element counts", + ), + ], +) +def test_error_multi_columns(input_subset, error_message): + # GH 39240 + df = pd.DataFrame( + { + "A": [[0, 1, 2], np.nan, [], (3, 4)], + "B": 1, + "C": [["a", "b", "c"], "foo", [], ["d", "e", "f"]], + }, + index=list("abcd"), + ) + with pytest.raises(ValueError, match=error_message): + df.explode(input_subset) + + +@pytest.mark.parametrize( + "scalar", + ["a", 0, 1.5, pd.Timedelta("1 days"), pd.Timestamp("2019-12-31")], +) +def test_basic(scalar): + df = pd.DataFrame( + {scalar: pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1} + ) + result = df.explode(scalar) + expected = pd.DataFrame( + { + scalar: pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object + ), + "B": 1, + } + ) + tm.assert_frame_equal(result, expected) + + +def test_multi_index_rows(): + df = pd.DataFrame( + {"A": np.array([[0, 1, 2], np.nan, [], (3, 4)], dtype=object), "B": 1}, + index=pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]), + ) + + result = df.explode("A") + expected = pd.DataFrame( + { + "A": pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4], + index=pd.MultiIndex.from_tuples( + [ + ("a", 1), + ("a", 1), + ("a", 1), + ("a", 2), + ("b", 1), + ("b", 2), + ("b", 2), + ] + ), + dtype=object, + ), + "B": 1, + } + ) + tm.assert_frame_equal(result, expected) + + +def test_multi_index_columns(): + df = pd.DataFrame( + {("A", 1): np.array([[0, 1, 2], np.nan, [], (3, 4)], dtype=object), ("A", 2): 1} + ) + + result = df.explode(("A", 1)) + expected = pd.DataFrame( + { + ("A", 1): pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4], + index=pd.Index([0, 0, 0, 1, 2, 3, 3]), + dtype=object, + ), + ("A", 2): 1, + } + ) + tm.assert_frame_equal(result, expected) + + +def test_usecase(): + # explode a single column + # gh-10511 + df = pd.DataFrame( + [[11, range(5), 10], [22, range(3), 20]], columns=list("ABC") + ).set_index("C") + result = df.explode("B") + + expected = pd.DataFrame( + { + "A": [11, 11, 11, 11, 11, 22, 22, 22], + "B": np.array([0, 1, 2, 3, 4, 0, 1, 2], dtype=object), + "C": [10, 10, 10, 10, 10, 20, 20, 20], + }, + columns=list("ABC"), + ).set_index("C") + + tm.assert_frame_equal(result, expected) + + # gh-8517 + df = pd.DataFrame( + [["2014-01-01", "Alice", "A B"], ["2014-01-02", "Bob", "C D"]], + columns=["dt", "name", "text"], + ) + result = df.assign(text=df.text.str.split(" ")).explode("text") + expected = pd.DataFrame( + [ + ["2014-01-01", "Alice", "A"], + ["2014-01-01", "Alice", "B"], + ["2014-01-02", "Bob", "C"], + ["2014-01-02", "Bob", "D"], + ], + columns=["dt", "name", "text"], + index=[0, 0, 1, 1], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "input_dict, input_index, expected_dict, expected_index", + [ + ( + {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]}, + [0, 0], + {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]}, + [0, 0, 0, 0], + ), + ( + {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]}, + pd.Index([0, 0], name="my_index"), + {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]}, + pd.Index([0, 0, 0, 0], name="my_index"), + ), + ( + {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]}, + pd.MultiIndex.from_arrays( + [[0, 0], [1, 1]], names=["my_first_index", "my_second_index"] + ), + {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]}, + pd.MultiIndex.from_arrays( + [[0, 0, 0, 0], [1, 1, 1, 1]], + names=["my_first_index", "my_second_index"], + ), + ), + ( + {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]}, + pd.MultiIndex.from_arrays([[0, 0], [1, 1]], names=["my_index", None]), + {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]}, + pd.MultiIndex.from_arrays( + [[0, 0, 0, 0], [1, 1, 1, 1]], names=["my_index", None] + ), + ), + ], +) +def test_duplicate_index(input_dict, input_index, expected_dict, expected_index): + # GH 28005 + df = pd.DataFrame(input_dict, index=input_index) + result = df.explode("col1") + expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object) + tm.assert_frame_equal(result, expected) + + +def test_ignore_index(): + # GH 34932 + df = pd.DataFrame({"id": range(0, 20, 10), "values": [list("ab"), list("cd")]}) + result = df.explode("values", ignore_index=True) + expected = pd.DataFrame( + {"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3] + ) + tm.assert_frame_equal(result, expected) + + +def test_explode_sets(): + # https://github.com/pandas-dev/pandas/issues/35614 + df = pd.DataFrame({"a": [{"x", "y"}], "b": [1]}, index=[1]) + result = df.explode(column="a").sort_values(by="a") + expected = pd.DataFrame({"a": ["x", "y"], "b": [1, 1]}, index=[1, 1]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "input_subset, expected_dict, expected_index", + [ + ( + list("AC"), + { + "A": pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4, np.nan], + index=list("aaabcdde"), + dtype=object, + ), + "B": 1, + "C": ["a", "b", "c", "foo", np.nan, "d", "e", np.nan], + }, + list("aaabcdde"), + ), + ( + list("A"), + { + "A": pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4, np.nan], + index=list("aaabcdde"), + dtype=object, + ), + "B": 1, + "C": [ + ["a", "b", "c"], + ["a", "b", "c"], + ["a", "b", "c"], + "foo", + [], + ["d", "e"], + ["d", "e"], + np.nan, + ], + }, + list("aaabcdde"), + ), + ], +) +def test_multi_columns(input_subset, expected_dict, expected_index): + # GH 39240 + df = pd.DataFrame( + { + "A": [[0, 1, 2], np.nan, [], (3, 4), np.nan], + "B": 1, + "C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan], + }, + index=list("abcde"), + ) + result = df.explode(input_subset) + expected = pd.DataFrame(expected_dict, expected_index) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_fillna.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_fillna.py new file mode 100644 index 0000000..21a45d9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_fillna.py @@ -0,0 +1,655 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + NaT, + PeriodIndex, + Series, + TimedeltaIndex, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.tests.frame.common import _check_mixed_float + + +class TestFillNA: + def test_fillna_datetime(self, datetime_frame): + tf = datetime_frame + tf.loc[tf.index[:5], "A"] = np.nan + tf.loc[tf.index[-5:], "A"] = np.nan + + zero_filled = datetime_frame.fillna(0) + assert (zero_filled.loc[zero_filled.index[:5], "A"] == 0).all() + + padded = datetime_frame.fillna(method="pad") + assert np.isnan(padded.loc[padded.index[:5], "A"]).all() + assert ( + padded.loc[padded.index[-5:], "A"] == padded.loc[padded.index[-5], "A"] + ).all() + + msg = "Must specify a fill 'value' or 'method'" + with pytest.raises(ValueError, match=msg): + datetime_frame.fillna() + msg = "Cannot specify both 'value' and 'method'" + with pytest.raises(ValueError, match=msg): + datetime_frame.fillna(5, method="ffill") + + def test_fillna_mixed_type(self, float_string_frame): + + mf = float_string_frame + mf.loc[mf.index[5:20], "foo"] = np.nan + mf.loc[mf.index[-10:], "A"] = np.nan + # TODO: make stronger assertion here, GH 25640 + mf.fillna(value=0) + mf.fillna(method="pad") + + def test_fillna_mixed_float(self, mixed_float_frame): + + # mixed numeric (but no float16) + mf = mixed_float_frame.reindex(columns=["A", "B", "D"]) + mf.loc[mf.index[-10:], "A"] = np.nan + result = mf.fillna(value=0) + _check_mixed_float(result, dtype={"C": None}) + + result = mf.fillna(method="pad") + _check_mixed_float(result, dtype={"C": None}) + + def test_fillna_empty(self): + # empty frame (GH#2778) + df = DataFrame(columns=["x"]) + for m in ["pad", "backfill"]: + df.x.fillna(method=m, inplace=True) + df.x.fillna(method=m) + + def test_fillna_different_dtype(self): + # with different dtype (GH#3386) + df = DataFrame( + [["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]] + ) + + result = df.fillna({2: "foo"}) + expected = DataFrame( + [["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]] + ) + tm.assert_frame_equal(result, expected) + + return_value = df.fillna({2: "foo"}, inplace=True) + tm.assert_frame_equal(df, expected) + assert return_value is None + + def test_fillna_limit_and_value(self): + # limit and value + df = DataFrame(np.random.randn(10, 3)) + df.iloc[2:7, 0] = np.nan + df.iloc[3:5, 2] = np.nan + + expected = df.copy() + expected.iloc[2, 0] = 999 + expected.iloc[3, 2] = 999 + result = df.fillna(999, limit=1) + tm.assert_frame_equal(result, expected) + + def test_fillna_datelike(self): + # with datelike + # GH#6344 + df = DataFrame( + { + "Date": [NaT, Timestamp("2014-1-1")], + "Date2": [Timestamp("2013-1-1"), NaT], + } + ) + + expected = df.copy() + expected["Date"] = expected["Date"].fillna(df.loc[df.index[0], "Date2"]) + result = df.fillna(value={"Date": df["Date2"]}) + tm.assert_frame_equal(result, expected) + + def test_fillna_tzaware(self): + # with timezone + # GH#15855 + df = DataFrame({"A": [Timestamp("2012-11-11 00:00:00+01:00"), NaT]}) + exp = DataFrame( + { + "A": [ + Timestamp("2012-11-11 00:00:00+01:00"), + Timestamp("2012-11-11 00:00:00+01:00"), + ] + } + ) + tm.assert_frame_equal(df.fillna(method="pad"), exp) + + df = DataFrame({"A": [NaT, Timestamp("2012-11-11 00:00:00+01:00")]}) + exp = DataFrame( + { + "A": [ + Timestamp("2012-11-11 00:00:00+01:00"), + Timestamp("2012-11-11 00:00:00+01:00"), + ] + } + ) + tm.assert_frame_equal(df.fillna(method="bfill"), exp) + + def test_fillna_tzaware_different_column(self): + # with timezone in another column + # GH#15522 + df = DataFrame( + { + "A": date_range("20130101", periods=4, tz="US/Eastern"), + "B": [1, 2, np.nan, np.nan], + } + ) + result = df.fillna(method="pad") + expected = DataFrame( + { + "A": date_range("20130101", periods=4, tz="US/Eastern"), + "B": [1.0, 2.0, 2.0, 2.0], + } + ) + tm.assert_frame_equal(result, expected) + + def test_na_actions_categorical(self): + + cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) + vals = ["a", "b", np.nan, "d"] + df = DataFrame({"cats": cat, "vals": vals}) + cat2 = Categorical([1, 2, 3, 3], categories=[1, 2, 3]) + vals2 = ["a", "b", "b", "d"] + df_exp_fill = DataFrame({"cats": cat2, "vals": vals2}) + cat3 = Categorical([1, 2, 3], categories=[1, 2, 3]) + vals3 = ["a", "b", np.nan] + df_exp_drop_cats = DataFrame({"cats": cat3, "vals": vals3}) + cat4 = Categorical([1, 2], categories=[1, 2, 3]) + vals4 = ["a", "b"] + df_exp_drop_all = DataFrame({"cats": cat4, "vals": vals4}) + + # fillna + res = df.fillna(value={"cats": 3, "vals": "b"}) + tm.assert_frame_equal(res, df_exp_fill) + + msg = "Cannot setitem on a Categorical with a new category" + with pytest.raises(TypeError, match=msg): + df.fillna(value={"cats": 4, "vals": "c"}) + + res = df.fillna(method="pad") + tm.assert_frame_equal(res, df_exp_fill) + + # dropna + res = df.dropna(subset=["cats"]) + tm.assert_frame_equal(res, df_exp_drop_cats) + + res = df.dropna() + tm.assert_frame_equal(res, df_exp_drop_all) + + # make sure that fillna takes missing values into account + c = Categorical([np.nan, "b", np.nan], categories=["a", "b"]) + df = DataFrame({"cats": c, "vals": [1, 2, 3]}) + + cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"]) + df_exp = DataFrame({"cats": cat_exp, "vals": [1, 2, 3]}) + + res = df.fillna("a") + tm.assert_frame_equal(res, df_exp) + + def test_fillna_categorical_nan(self): + # GH#14021 + # np.nan should always be a valid filler + cat = Categorical([np.nan, 2, np.nan]) + val = Categorical([np.nan, np.nan, np.nan]) + df = DataFrame({"cats": cat, "vals": val}) + + # GH#32950 df.median() is poorly behaved because there is no + # Categorical.median + median = Series({"cats": 2.0, "vals": np.nan}) + + res = df.fillna(median) + v_exp = [np.nan, np.nan, np.nan] + df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category") + tm.assert_frame_equal(res, df_exp) + + result = df.cats.fillna(np.nan) + tm.assert_series_equal(result, df.cats) + + result = df.vals.fillna(np.nan) + tm.assert_series_equal(result, df.vals) + + idx = DatetimeIndex( + ["2011-01-01 09:00", "2016-01-01 23:45", "2011-01-01 09:00", NaT, NaT] + ) + df = DataFrame({"a": Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=NaT), df) + + idx = PeriodIndex(["2011-01", "2011-01", "2011-01", NaT, NaT], freq="M") + df = DataFrame({"a": Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=NaT), df) + + idx = TimedeltaIndex(["1 days", "2 days", "1 days", NaT, NaT]) + df = DataFrame({"a": Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=NaT), df) + + def test_fillna_downcast(self): + # GH#15277 + # infer int64 from float64 + df = DataFrame({"a": [1.0, np.nan]}) + result = df.fillna(0, downcast="infer") + expected = DataFrame({"a": [1, 0]}) + tm.assert_frame_equal(result, expected) + + # infer int64 from float64 when fillna value is a dict + df = DataFrame({"a": [1.0, np.nan]}) + result = df.fillna({"a": 0}, downcast="infer") + expected = DataFrame({"a": [1, 0]}) + tm.assert_frame_equal(result, expected) + + def test_fillna_downcast_false(self, frame_or_series): + # GH#45603 preserve object dtype with downcast=False + obj = frame_or_series([1, 2, 3], dtype="object") + result = obj.fillna("", downcast=False) + tm.assert_equal(result, obj) + + @pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]]) + def test_fillna_dictlike_value_duplicate_colnames(self, columns): + # GH#43476 + df = DataFrame(np.nan, index=[0, 1], columns=columns) + with tm.assert_produces_warning(None): + result = df.fillna({"A": 0}) + + expected = df.copy() + expected["A"] = 0.0 + tm.assert_frame_equal(result, expected) + + def test_fillna_dtype_conversion(self): + # make sure that fillna on an empty frame works + df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) + result = df.dtypes + expected = Series([np.dtype("object")] * 5, index=[1, 2, 3, 4, 5]) + tm.assert_series_equal(result, expected) + + result = df.fillna(1) + expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) + tm.assert_frame_equal(result, expected) + + # empty block + df = DataFrame(index=range(3), columns=["A", "B"], dtype="float64") + result = df.fillna("nan") + expected = DataFrame("nan", index=range(3), columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("val", ["", 1, np.nan, 1.0]) + def test_fillna_dtype_conversion_equiv_replace(self, val): + df = DataFrame({"A": [1, np.nan], "B": [1.0, 2.0]}) + expected = df.replace(np.nan, val) + result = df.fillna(val) + tm.assert_frame_equal(result, expected) + + def test_fillna_datetime_columns(self): + # GH#7095 + df = DataFrame( + { + "A": [-1, -2, np.nan], + "B": date_range("20130101", periods=3), + "C": ["foo", "bar", None], + "D": ["foo2", "bar2", None], + }, + index=date_range("20130110", periods=3), + ) + result = df.fillna("?") + expected = DataFrame( + { + "A": [-1, -2, "?"], + "B": date_range("20130101", periods=3), + "C": ["foo", "bar", "?"], + "D": ["foo2", "bar2", "?"], + }, + index=date_range("20130110", periods=3), + ) + tm.assert_frame_equal(result, expected) + + df = DataFrame( + { + "A": [-1, -2, np.nan], + "B": [Timestamp("2013-01-01"), Timestamp("2013-01-02"), NaT], + "C": ["foo", "bar", None], + "D": ["foo2", "bar2", None], + }, + index=date_range("20130110", periods=3), + ) + result = df.fillna("?") + expected = DataFrame( + { + "A": [-1, -2, "?"], + "B": [Timestamp("2013-01-01"), Timestamp("2013-01-02"), "?"], + "C": ["foo", "bar", "?"], + "D": ["foo2", "bar2", "?"], + }, + index=date_range("20130110", periods=3), + ) + tm.assert_frame_equal(result, expected) + + def test_ffill(self, datetime_frame): + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + + tm.assert_frame_equal( + datetime_frame.ffill(), datetime_frame.fillna(method="ffill") + ) + + def test_ffill_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame.ffill " + r"will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.ffill(0) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + def test_bfill(self, datetime_frame): + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + + tm.assert_frame_equal( + datetime_frame.bfill(), datetime_frame.fillna(method="bfill") + ) + + def test_bfill_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame.bfill " + r"will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.bfill(0) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + def test_frame_pad_backfill_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + + result = df[:2].reindex(index, method="pad", limit=5) + + expected = df[:2].reindex(index).fillna(method="pad") + expected.iloc[-3:] = np.nan + tm.assert_frame_equal(result, expected) + + result = df[-2:].reindex(index, method="backfill", limit=5) + + expected = df[-2:].reindex(index).fillna(method="backfill") + expected.iloc[:3] = np.nan + tm.assert_frame_equal(result, expected) + + def test_frame_fillna_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + + result = df[:2].reindex(index) + result = result.fillna(method="pad", limit=5) + + expected = df[:2].reindex(index).fillna(method="pad") + expected.iloc[-3:] = np.nan + tm.assert_frame_equal(result, expected) + + result = df[-2:].reindex(index) + result = result.fillna(method="backfill", limit=5) + + expected = df[-2:].reindex(index).fillna(method="backfill") + expected.iloc[:3] = np.nan + tm.assert_frame_equal(result, expected) + + def test_fillna_skip_certain_blocks(self): + # don't try to fill boolean, int blocks + + df = DataFrame(np.random.randn(10, 4).astype(int)) + + # it works! + df.fillna(np.nan) + + @pytest.mark.parametrize("type", [int, float]) + def test_fillna_positive_limit(self, type): + df = DataFrame(np.random.randn(10, 4)).astype(type) + + msg = "Limit must be greater than 0" + with pytest.raises(ValueError, match=msg): + df.fillna(0, limit=-5) + + @pytest.mark.parametrize("type", [int, float]) + def test_fillna_integer_limit(self, type): + df = DataFrame(np.random.randn(10, 4)).astype(type) + + msg = "Limit must be an integer" + with pytest.raises(ValueError, match=msg): + df.fillna(0, limit=0.5) + + def test_fillna_inplace(self): + df = DataFrame(np.random.randn(10, 4)) + df[1][:4] = np.nan + df[3][-4:] = np.nan + + expected = df.fillna(value=0) + assert expected is not df + + df.fillna(value=0, inplace=True) + tm.assert_frame_equal(df, expected) + + expected = df.fillna(value={0: 0}, inplace=True) + assert expected is None + + df[1][:4] = np.nan + df[3][-4:] = np.nan + expected = df.fillna(method="ffill") + assert expected is not df + + df.fillna(method="ffill", inplace=True) + tm.assert_frame_equal(df, expected) + + def test_fillna_dict_series(self): + df = DataFrame( + { + "a": [np.nan, 1, 2, np.nan, np.nan], + "b": [1, 2, 3, np.nan, np.nan], + "c": [np.nan, 1, 2, 3, 4], + } + ) + + result = df.fillna({"a": 0, "b": 5}) + + expected = df.copy() + expected["a"] = expected["a"].fillna(0) + expected["b"] = expected["b"].fillna(5) + tm.assert_frame_equal(result, expected) + + # it works + result = df.fillna({"a": 0, "b": 5, "d": 7}) + + # Series treated same as dict + result = df.fillna(df.max()) + expected = df.fillna(df.max().to_dict()) + tm.assert_frame_equal(result, expected) + + # disable this for now + with pytest.raises(NotImplementedError, match="column by column"): + df.fillna(df.max(1), axis=1) + + def test_fillna_dataframe(self): + # GH#8377 + df = DataFrame( + { + "a": [np.nan, 1, 2, np.nan, np.nan], + "b": [1, 2, 3, np.nan, np.nan], + "c": [np.nan, 1, 2, 3, 4], + }, + index=list("VWXYZ"), + ) + + # df2 may have different index and columns + df2 = DataFrame( + { + "a": [np.nan, 10, 20, 30, 40], + "b": [50, 60, 70, 80, 90], + "foo": ["bar"] * 5, + }, + index=list("VWXuZ"), + ) + + result = df.fillna(df2) + + # only those columns and indices which are shared get filled + expected = DataFrame( + { + "a": [np.nan, 1, 2, np.nan, 40], + "b": [1, 2, 3, np.nan, 90], + "c": [np.nan, 1, 2, 3, 4], + }, + index=list("VWXYZ"), + ) + + tm.assert_frame_equal(result, expected) + + def test_fillna_columns(self): + df = DataFrame(np.random.randn(10, 10)) + df.values[:, ::2] = np.nan + + result = df.fillna(method="ffill", axis=1) + expected = df.T.fillna(method="pad").T + tm.assert_frame_equal(result, expected) + + df.insert(6, "foo", 5) + result = df.fillna(method="ffill", axis=1) + expected = df.astype(float).fillna(method="ffill", axis=1) + tm.assert_frame_equal(result, expected) + + def test_fillna_invalid_method(self, float_frame): + with pytest.raises(ValueError, match="ffil"): + float_frame.fillna(method="ffil") + + def test_fillna_invalid_value(self, float_frame): + # list + msg = '"value" parameter must be a scalar or dict, but you passed a "{}"' + with pytest.raises(TypeError, match=msg.format("list")): + float_frame.fillna([1, 2]) + # tuple + with pytest.raises(TypeError, match=msg.format("tuple")): + float_frame.fillna((1, 2)) + # frame with series + msg = ( + '"value" parameter must be a scalar, dict or Series, but you ' + 'passed a "DataFrame"' + ) + with pytest.raises(TypeError, match=msg): + float_frame.iloc[:, 0].fillna(float_frame) + + def test_fillna_col_reordering(self): + cols = ["COL." + str(i) for i in range(5, 0, -1)] + data = np.random.rand(20, 5) + df = DataFrame(index=range(20), columns=cols, data=data) + filled = df.fillna(method="ffill") + assert df.columns.tolist() == filled.columns.tolist() + + def test_fill_corner(self, float_frame, float_string_frame): + mf = float_string_frame + mf.loc[mf.index[5:20], "foo"] = np.nan + mf.loc[mf.index[-10:], "A"] = np.nan + + filled = float_string_frame.fillna(value=0) + assert (filled.loc[filled.index[5:20], "foo"] == 0).all() + del float_string_frame["foo"] + + empty_float = float_frame.reindex(columns=[]) + + # TODO(wesm): unused? + result = empty_float.fillna(value=0) # noqa + + def test_fillna_downcast_dict(self): + # GH#40809 + df = DataFrame({"col1": [1, np.nan]}) + result = df.fillna({"col1": 2}, downcast={"col1": "int64"}) + expected = DataFrame({"col1": [1, 2]}) + tm.assert_frame_equal(result, expected) + + def test_fillna_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3, np.nan]}, dtype=float) + msg = ( + r"In a future version of pandas all arguments of DataFrame.fillna " + r"except for the argument 'value' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.fillna(0, None, None) + expected = DataFrame({"a": [1, 2, 3, 0]}, dtype=float) + tm.assert_frame_equal(result, expected) + + def test_fillna_with_columns_and_limit(self): + # GH40989 + df = DataFrame( + [ + [np.nan, 2, np.nan, 0], + [3, 4, np.nan, 1], + [np.nan, np.nan, np.nan, 5], + [np.nan, 3, np.nan, 4], + ], + columns=list("ABCD"), + ) + result = df.fillna(axis=1, value=100, limit=1) + result2 = df.fillna(axis=1, value=100, limit=2) + + expected = DataFrame( + { + "A": Series([100, 3, 100, 100], dtype="float64"), + "B": [2, 4, np.nan, 3], + "C": [np.nan, 100, np.nan, np.nan], + "D": Series([0, 1, 5, 4], dtype="float64"), + }, + index=[0, 1, 2, 3], + ) + expected2 = DataFrame( + { + "A": Series([100, 3, 100, 100], dtype="float64"), + "B": Series([2, 4, 100, 3], dtype="float64"), + "C": [100, 100, np.nan, 100], + "D": Series([0, 1, 5, 4], dtype="float64"), + }, + index=[0, 1, 2, 3], + ) + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected2) + + def test_fillna_inplace_with_columns_limit_and_value(self): + # GH40989 + df = DataFrame( + [ + [np.nan, 2, np.nan, 0], + [3, 4, np.nan, 1], + [np.nan, np.nan, np.nan, 5], + [np.nan, 3, np.nan, 4], + ], + columns=list("ABCD"), + ) + + expected = df.fillna(axis=1, value=100, limit=1) + assert expected is not df + + df.fillna(axis=1, value=100, limit=1, inplace=True) + tm.assert_frame_equal(df, expected) + + +def test_fillna_nonconsolidated_frame(): + # https://github.com/pandas-dev/pandas/issues/36495 + df = DataFrame( + [ + [1, 1, 1, 1.0], + [2, 2, 2, 2.0], + [3, 3, 3, 3.0], + ], + columns=["i1", "i2", "i3", "f1"], + ) + df_nonconsol = df.pivot("i1", "i2") + result = df_nonconsol.fillna(0) + assert result.isna().sum().sum() == 0 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_filter.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_filter.py new file mode 100644 index 0000000..af77db4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_filter.py @@ -0,0 +1,139 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm + + +class TestDataFrameFilter: + def test_filter(self, float_frame, float_string_frame): + # Items + filtered = float_frame.filter(["A", "B", "E"]) + assert len(filtered.columns) == 2 + assert "E" not in filtered + + filtered = float_frame.filter(["A", "B", "E"], axis="columns") + assert len(filtered.columns) == 2 + assert "E" not in filtered + + # Other axis + idx = float_frame.index[0:4] + filtered = float_frame.filter(idx, axis="index") + expected = float_frame.reindex(index=idx) + tm.assert_frame_equal(filtered, expected) + + # like + fcopy = float_frame.copy() + fcopy["AA"] = 1 + + filtered = fcopy.filter(like="A") + assert len(filtered.columns) == 2 + assert "AA" in filtered + + # like with ints in column names + df = DataFrame(0.0, index=[0, 1, 2], columns=[0, 1, "_A", "_B"]) + filtered = df.filter(like="_") + assert len(filtered.columns) == 2 + + # regex with ints in column names + # from PR #10384 + df = DataFrame(0.0, index=[0, 1, 2], columns=["A1", 1, "B", 2, "C"]) + expected = DataFrame( + 0.0, index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object) + ) + filtered = df.filter(regex="^[0-9]+$") + tm.assert_frame_equal(filtered, expected) + + expected = DataFrame(0.0, index=[0, 1, 2], columns=[0, "0", 1, "1"]) + # shouldn't remove anything + filtered = expected.filter(regex="^[0-9]+$") + tm.assert_frame_equal(filtered, expected) + + # pass in None + with pytest.raises(TypeError, match="Must pass"): + float_frame.filter() + with pytest.raises(TypeError, match="Must pass"): + float_frame.filter(items=None) + with pytest.raises(TypeError, match="Must pass"): + float_frame.filter(axis=1) + + # test mutually exclusive arguments + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], regex="e$", like="bbi") + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], regex="e$", axis=1) + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], regex="e$") + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], like="bbi", axis=0) + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], like="bbi") + + # objects + filtered = float_string_frame.filter(like="foo") + assert "foo" in filtered + + # unicode columns, won't ascii-encode + df = float_frame.rename(columns={"B": "\u2202"}) + filtered = df.filter(like="C") + assert "C" in filtered + + def test_filter_regex_search(self, float_frame): + fcopy = float_frame.copy() + fcopy["AA"] = 1 + + # regex + filtered = fcopy.filter(regex="[A]+") + assert len(filtered.columns) == 2 + assert "AA" in filtered + + # doesn't have to be at beginning + df = DataFrame( + {"aBBa": [1, 2], "BBaBB": [1, 2], "aCCa": [1, 2], "aCCaBB": [1, 2]} + ) + + result = df.filter(regex="BB") + exp = df[[x for x in df.columns if "BB" in x]] + tm.assert_frame_equal(result, exp) + + @pytest.mark.parametrize( + "name,expected", + [ + ("a", DataFrame({"a": [1, 2]})), + ("a", DataFrame({"a": [1, 2]})), + ("あ", DataFrame({"あ": [3, 4]})), + ], + ) + def test_filter_unicode(self, name, expected): + # GH13101 + df = DataFrame({"a": [1, 2], "あ": [3, 4]}) + + tm.assert_frame_equal(df.filter(like=name), expected) + tm.assert_frame_equal(df.filter(regex=name), expected) + + @pytest.mark.parametrize("name", ["a", "a"]) + def test_filter_bytestring(self, name): + # GH13101 + df = DataFrame({b"a": [1, 2], b"b": [3, 4]}) + expected = DataFrame({b"a": [1, 2]}) + + tm.assert_frame_equal(df.filter(like=name), expected) + tm.assert_frame_equal(df.filter(regex=name), expected) + + def test_filter_corner(self): + empty = DataFrame() + + result = empty.filter([]) + tm.assert_frame_equal(result, empty) + + result = empty.filter(like="foo") + tm.assert_frame_equal(result, empty) + + def test_filter_regex_non_string(self): + # GH#5798 trying to filter on non-string columns should drop, + # not raise + df = DataFrame(np.random.random((3, 2)), columns=["STRING", 123]) + result = df.filter(regex="STRING") + expected = df[["STRING"]] + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_first_and_last.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_first_and_last.py new file mode 100644 index 0000000..6b11211 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_first_and_last.py @@ -0,0 +1,88 @@ +""" +Note: includes tests for `last` +""" +import pytest + +from pandas import ( + DataFrame, + bdate_range, +) +import pandas._testing as tm + + +class TestFirst: + def test_first_subset(self, frame_or_series): + ts = tm.makeTimeDataFrame(freq="12h") + ts = tm.get_obj(ts, frame_or_series) + result = ts.first("10d") + assert len(result) == 20 + + ts = tm.makeTimeDataFrame(freq="D") + ts = tm.get_obj(ts, frame_or_series) + result = ts.first("10d") + assert len(result) == 10 + + result = ts.first("3M") + expected = ts[:"3/31/2000"] + tm.assert_equal(result, expected) + + result = ts.first("21D") + expected = ts[:21] + tm.assert_equal(result, expected) + + result = ts[:0].first("3M") + tm.assert_equal(result, ts[:0]) + + def test_first_last_raises(self, frame_or_series): + # GH#20725 + obj = DataFrame([[1, 2, 3], [4, 5, 6]]) + obj = tm.get_obj(obj, frame_or_series) + + msg = "'first' only supports a DatetimeIndex index" + with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex + obj.first("1D") + + msg = "'last' only supports a DatetimeIndex index" + with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex + obj.last("1D") + + def test_last_subset(self, frame_or_series): + ts = tm.makeTimeDataFrame(freq="12h") + ts = tm.get_obj(ts, frame_or_series) + result = ts.last("10d") + assert len(result) == 20 + + ts = tm.makeTimeDataFrame(nper=30, freq="D") + ts = tm.get_obj(ts, frame_or_series) + result = ts.last("10d") + assert len(result) == 10 + + result = ts.last("21D") + expected = ts["2000-01-10":] + tm.assert_equal(result, expected) + + result = ts.last("21D") + expected = ts[-21:] + tm.assert_equal(result, expected) + + result = ts[:0].last("3M") + tm.assert_equal(result, ts[:0]) + + @pytest.mark.parametrize("start, periods", [("2010-03-31", 1), ("2010-03-30", 2)]) + def test_first_with_first_day_last_of_month(self, frame_or_series, start, periods): + # GH#29623 + x = frame_or_series([1] * 100, index=bdate_range(start, periods=100)) + result = x.first("1M") + expected = frame_or_series( + [1] * periods, index=bdate_range(start, periods=periods) + ) + tm.assert_equal(result, expected) + + def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series): + # GH#29623 + x = frame_or_series([1] * 100, index=bdate_range("2010-03-31", periods=100)) + result = x.first("2M") + expected = frame_or_series( + [1] * 23, index=bdate_range("2010-03-31", "2010-04-30") + ) + tm.assert_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_first_valid_index.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_first_valid_index.py new file mode 100644 index 0000000..e4cbd89 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_first_valid_index.py @@ -0,0 +1,94 @@ +""" +Includes test for last_valid_index. +""" +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +class TestFirstValidIndex: + def test_first_valid_index_single_nan(self, frame_or_series): + # GH#9752 Series/DataFrame should both return None, not raise + obj = frame_or_series([np.nan]) + + assert obj.first_valid_index() is None + assert obj.iloc[:0].first_valid_index() is None + + @pytest.mark.parametrize( + "empty", [DataFrame(), Series(dtype=object), Series([], index=[], dtype=object)] + ) + def test_first_valid_index_empty(self, empty): + # GH#12800 + assert empty.last_valid_index() is None + assert empty.first_valid_index() is None + + @pytest.mark.parametrize( + "data,idx,expected_first,expected_last", + [ + ({"A": [1, 2, 3]}, [1, 1, 2], 1, 2), + ({"A": [1, 2, 3]}, [1, 2, 2], 1, 2), + ({"A": [1, 2, 3, 4]}, ["d", "d", "d", "d"], "d", "d"), + ({"A": [1, np.nan, 3]}, [1, 1, 2], 1, 2), + ({"A": [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2), + ({"A": [1, np.nan, 3]}, [1, 2, 2], 1, 2), + ], + ) + def test_first_last_valid_frame(self, data, idx, expected_first, expected_last): + # GH#21441 + df = DataFrame(data, index=idx) + assert expected_first == df.first_valid_index() + assert expected_last == df.last_valid_index() + + @pytest.mark.parametrize("index_func", [tm.makeStringIndex, tm.makeDateIndex]) + def test_first_last_valid(self, index_func): + N = 30 + index = index_func(N) + mat = np.random.randn(N) + mat[:5] = np.nan + mat[-5:] = np.nan + + frame = DataFrame({"foo": mat}, index=index) + assert frame.first_valid_index() == frame.index[5] + assert frame.last_valid_index() == frame.index[-6] + + ser = frame["foo"] + assert ser.first_valid_index() == frame.index[5] + assert ser.last_valid_index() == frame.index[-6] + + @pytest.mark.parametrize("index_func", [tm.makeStringIndex, tm.makeDateIndex]) + def test_first_last_valid_all_nan(self, index_func): + # GH#17400: no valid entries + index = index_func(30) + frame = DataFrame(np.nan, columns=["foo"], index=index) + + assert frame.last_valid_index() is None + assert frame.first_valid_index() is None + + ser = frame["foo"] + assert ser.first_valid_index() is None + assert ser.last_valid_index() is None + + @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") + def test_first_last_valid_preserves_freq(self): + # GH#20499: its preserves freq with holes + index = date_range("20110101", periods=30, freq="B") + frame = DataFrame(np.nan, columns=["foo"], index=index) + + frame.iloc[1] = 1 + frame.iloc[-2] = 1 + assert frame.first_valid_index() == frame.index[1] + assert frame.last_valid_index() == frame.index[-2] + assert frame.first_valid_index().freq == frame.index.freq + assert frame.last_valid_index().freq == frame.index.freq + + ts = frame["foo"] + assert ts.first_valid_index() == ts.index[1] + assert ts.last_valid_index() == ts.index[-2] + assert ts.first_valid_index().freq == ts.index.freq + assert ts.last_valid_index().freq == ts.index.freq diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_get_numeric_data.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_get_numeric_data.py new file mode 100644 index 0000000..8628b76 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_get_numeric_data.py @@ -0,0 +1,103 @@ +import numpy as np + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Index, + Series, + Timestamp, +) +import pandas._testing as tm +from pandas.core.arrays import IntervalArray + + +class TestGetNumericData: + def test_get_numeric_data_preserve_dtype(self): + # get the numeric data + obj = DataFrame({"A": [1, "2", 3.0]}) + result = obj._get_numeric_data() + expected = DataFrame(index=[0, 1, 2], dtype=object) + tm.assert_frame_equal(result, expected) + + def test_get_numeric_data(self): + + datetime64name = np.dtype("M8[ns]").name + objectname = np.dtype(np.object_).name + + df = DataFrame( + {"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102")}, + index=np.arange(10), + ) + result = df.dtypes + expected = Series( + [ + np.dtype("float64"), + np.dtype("int64"), + np.dtype(objectname), + np.dtype(datetime64name), + ], + index=["a", "b", "c", "f"], + ) + tm.assert_series_equal(result, expected) + + df = DataFrame( + { + "a": 1.0, + "b": 2, + "c": "foo", + "d": np.array([1.0] * 10, dtype="float32"), + "e": np.array([1] * 10, dtype="int32"), + "f": np.array([1] * 10, dtype="int16"), + "g": Timestamp("20010102"), + }, + index=np.arange(10), + ) + + result = df._get_numeric_data() + expected = df.loc[:, ["a", "b", "d", "e", "f"]] + tm.assert_frame_equal(result, expected) + + only_obj = df.loc[:, ["c", "g"]] + result = only_obj._get_numeric_data() + expected = df.loc[:, []] + tm.assert_frame_equal(result, expected) + + df = DataFrame.from_dict({"a": [1, 2], "b": ["foo", "bar"], "c": [np.pi, np.e]}) + result = df._get_numeric_data() + expected = DataFrame.from_dict({"a": [1, 2], "c": [np.pi, np.e]}) + tm.assert_frame_equal(result, expected) + + df = result.copy() + result = df._get_numeric_data() + expected = df + tm.assert_frame_equal(result, expected) + + def test_get_numeric_data_mixed_dtype(self): + # numeric and object columns + + df = DataFrame( + { + "a": [1, 2, 3], + "b": [True, False, True], + "c": ["foo", "bar", "baz"], + "d": [None, None, None], + "e": [3.14, 0.577, 2.773], + } + ) + result = df._get_numeric_data() + tm.assert_index_equal(result.columns, Index(["a", "b", "e"])) + + def test_get_numeric_data_extension_dtype(self): + # GH#22290 + df = DataFrame( + { + "A": pd.array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"), + "B": Categorical(list("abcabc")), + "C": pd.array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"), + "D": IntervalArray.from_breaks(range(7)), + } + ) + result = df._get_numeric_data() + expected = df.loc[:, ["A", "C"]] + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_head_tail.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_head_tail.py new file mode 100644 index 0000000..99cb784 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_head_tail.py @@ -0,0 +1,57 @@ +import numpy as np + +from pandas import DataFrame +import pandas._testing as tm + + +def test_head_tail_generic(index, frame_or_series): + # GH#5370 + + ndim = 2 if frame_or_series is DataFrame else 1 + shape = (len(index),) * ndim + vals = np.random.randn(*shape) + obj = frame_or_series(vals, index=index) + + tm.assert_equal(obj.head(), obj.iloc[:5]) + tm.assert_equal(obj.tail(), obj.iloc[-5:]) + + # 0-len + tm.assert_equal(obj.head(0), obj.iloc[0:0]) + tm.assert_equal(obj.tail(0), obj.iloc[0:0]) + + # bounded + tm.assert_equal(obj.head(len(obj) + 1), obj) + tm.assert_equal(obj.tail(len(obj) + 1), obj) + + # neg index + tm.assert_equal(obj.head(-3), obj.head(len(index) - 3)) + tm.assert_equal(obj.tail(-3), obj.tail(len(index) - 3)) + + +def test_head_tail(float_frame): + tm.assert_frame_equal(float_frame.head(), float_frame[:5]) + tm.assert_frame_equal(float_frame.tail(), float_frame[-5:]) + + tm.assert_frame_equal(float_frame.head(0), float_frame[0:0]) + tm.assert_frame_equal(float_frame.tail(0), float_frame[0:0]) + + tm.assert_frame_equal(float_frame.head(-1), float_frame[:-1]) + tm.assert_frame_equal(float_frame.tail(-1), float_frame[1:]) + tm.assert_frame_equal(float_frame.head(1), float_frame[:1]) + tm.assert_frame_equal(float_frame.tail(1), float_frame[-1:]) + # with a float index + df = float_frame.copy() + df.index = np.arange(len(float_frame)) + 0.1 + tm.assert_frame_equal(df.head(), df.iloc[:5]) + tm.assert_frame_equal(df.tail(), df.iloc[-5:]) + tm.assert_frame_equal(df.head(0), df[0:0]) + tm.assert_frame_equal(df.tail(0), df[0:0]) + tm.assert_frame_equal(df.head(-1), df.iloc[:-1]) + tm.assert_frame_equal(df.tail(-1), df.iloc[1:]) + + +def test_head_tail_empty(): + # test empty dataframe + empty_df = DataFrame() + tm.assert_frame_equal(empty_df.tail(), empty_df) + tm.assert_frame_equal(empty_df.head(), empty_df) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_infer_objects.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_infer_objects.py new file mode 100644 index 0000000..a824a61 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_infer_objects.py @@ -0,0 +1,42 @@ +from datetime import datetime + +from pandas import DataFrame +import pandas._testing as tm + + +class TestInferObjects: + def test_infer_objects(self): + # GH#11221 + df = DataFrame( + { + "a": ["a", 1, 2, 3], + "b": ["b", 2.0, 3.0, 4.1], + "c": [ + "c", + datetime(2016, 1, 1), + datetime(2016, 1, 2), + datetime(2016, 1, 3), + ], + "d": [1, 2, 3, "d"], + }, + columns=["a", "b", "c", "d"], + ) + df = df.iloc[1:].infer_objects() + + assert df["a"].dtype == "int64" + assert df["b"].dtype == "float64" + assert df["c"].dtype == "M8[ns]" + assert df["d"].dtype == "object" + + expected = DataFrame( + { + "a": [1, 2, 3], + "b": [2.0, 3.0, 4.1], + "c": [datetime(2016, 1, 1), datetime(2016, 1, 2), datetime(2016, 1, 3)], + "d": [2, 3, "d"], + }, + columns=["a", "b", "c", "d"], + ) + # reconstruct frame to verify inference is same + result = df.reset_index(drop=True) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_interpolate.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_interpolate.py new file mode 100644 index 0000000..37fb075 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_interpolate.py @@ -0,0 +1,376 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameInterpolate: + def test_interpolate_inplace(self, frame_or_series, using_array_manager, request): + # GH#44749 + if using_array_manager and frame_or_series is DataFrame: + mark = pytest.mark.xfail(reason=".values-based in-place check is invalid") + request.node.add_marker(mark) + + obj = frame_or_series([1, np.nan, 2]) + orig = obj.values + + obj.interpolate(inplace=True) + expected = frame_or_series([1, 1.5, 2]) + tm.assert_equal(obj, expected) + + # check we operated *actually* inplace + assert np.shares_memory(orig, obj.values) + assert orig.squeeze()[1] == 1.5 + + def test_interp_basic(self): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + expected = DataFrame( + { + "A": [1.0, 2.0, 3.0, 4.0], + "B": [1.0, 4.0, 9.0, 9.0], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + result = df.interpolate() + tm.assert_frame_equal(result, expected) + + result = df.set_index("C").interpolate() + expected = df.set_index("C") + expected.loc[3, "A"] = 3 + expected.loc[5, "B"] = 9 + tm.assert_frame_equal(result, expected) + + def test_interp_empty(self): + # https://github.com/pandas-dev/pandas/issues/35598 + df = DataFrame() + result = df.interpolate() + assert result is not df + expected = df + tm.assert_frame_equal(result, expected) + + def test_interp_bad_method(self): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + msg = ( + r"method must be one of \['linear', 'time', 'index', 'values', " + r"'nearest', 'zero', 'slinear', 'quadratic', 'cubic', " + r"'barycentric', 'krogh', 'spline', 'polynomial', " + r"'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima', " + r"'cubicspline'\]. Got 'not_a_method' instead." + ) + with pytest.raises(ValueError, match=msg): + df.interpolate(method="not_a_method") + + def test_interp_combo(self): + df = DataFrame( + { + "A": [1.0, 2.0, np.nan, 4.0], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + + result = df["A"].interpolate() + expected = Series([1.0, 2.0, 3.0, 4.0], name="A") + tm.assert_series_equal(result, expected) + + result = df["A"].interpolate(downcast="infer") + expected = Series([1, 2, 3, 4], name="A") + tm.assert_series_equal(result, expected) + + def test_interp_nan_idx(self): + df = DataFrame({"A": [1, 2, np.nan, 4], "B": [np.nan, 2, 3, 4]}) + df = df.set_index("A") + msg = ( + "Interpolation with NaNs in the index has not been implemented. " + "Try filling those NaNs before interpolating." + ) + with pytest.raises(NotImplementedError, match=msg): + df.interpolate(method="values") + + @td.skip_if_no_scipy + def test_interp_various(self): + df = DataFrame( + {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]} + ) + df = df.set_index("C") + expected = df.copy() + result = df.interpolate(method="polynomial", order=1) + + expected.loc[3, "A"] = 2.66666667 + expected.loc[13, "A"] = 5.76923076 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="cubic") + # GH #15662. + expected.loc[3, "A"] = 2.81547781 + expected.loc[13, "A"] = 5.52964175 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="nearest") + expected.loc[3, "A"] = 2 + expected.loc[13, "A"] = 5 + tm.assert_frame_equal(result, expected, check_dtype=False) + + result = df.interpolate(method="quadratic") + expected.loc[3, "A"] = 2.82150771 + expected.loc[13, "A"] = 6.12648668 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="slinear") + expected.loc[3, "A"] = 2.66666667 + expected.loc[13, "A"] = 5.76923077 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="zero") + expected.loc[3, "A"] = 2.0 + expected.loc[13, "A"] = 5 + tm.assert_frame_equal(result, expected, check_dtype=False) + + @td.skip_if_no_scipy + def test_interp_alt_scipy(self): + df = DataFrame( + {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]} + ) + result = df.interpolate(method="barycentric") + expected = df.copy() + expected.loc[2, "A"] = 3 + expected.loc[5, "A"] = 6 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="barycentric", downcast="infer") + tm.assert_frame_equal(result, expected.astype(np.int64)) + + result = df.interpolate(method="krogh") + expectedk = df.copy() + expectedk["A"] = expected["A"] + tm.assert_frame_equal(result, expectedk) + + result = df.interpolate(method="pchip") + expected.loc[2, "A"] = 3 + expected.loc[5, "A"] = 6.0 + + tm.assert_frame_equal(result, expected) + + def test_interp_rowwise(self): + df = DataFrame( + { + 0: [1, 2, np.nan, 4], + 1: [2, 3, 4, np.nan], + 2: [np.nan, 4, 5, 6], + 3: [4, np.nan, 6, 7], + 4: [1, 2, 3, 4], + } + ) + result = df.interpolate(axis=1) + expected = df.copy() + expected.loc[3, 1] = 5 + expected.loc[0, 2] = 3 + expected.loc[1, 3] = 3 + expected[4] = expected[4].astype(np.float64) + tm.assert_frame_equal(result, expected) + + result = df.interpolate(axis=1, method="values") + tm.assert_frame_equal(result, expected) + + result = df.interpolate(axis=0) + expected = df.interpolate() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "axis_name, axis_number", + [ + pytest.param("rows", 0, id="rows_0"), + pytest.param("index", 0, id="index_0"), + pytest.param("columns", 1, id="columns_1"), + ], + ) + def test_interp_axis_names(self, axis_name, axis_number): + # GH 29132: test axis names + data = {0: [0, np.nan, 6], 1: [1, np.nan, 7], 2: [2, 5, 8]} + + df = DataFrame(data, dtype=np.float64) + result = df.interpolate(axis=axis_name, method="linear") + expected = df.interpolate(axis=axis_number, method="linear") + tm.assert_frame_equal(result, expected) + + def test_rowwise_alt(self): + df = DataFrame( + { + 0: [0, 0.5, 1.0, np.nan, 4, 8, np.nan, np.nan, 64], + 1: [1, 2, 3, 4, 3, 2, 1, 0, -1], + } + ) + df.interpolate(axis=0) + # TODO: assert something? + + @pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] + ) + def test_interp_leading_nans(self, check_scipy): + df = DataFrame( + {"A": [np.nan, np.nan, 0.5, 0.25, 0], "B": [np.nan, -3, -3.5, np.nan, -4]} + ) + result = df.interpolate() + expected = df.copy() + expected.loc[3, "B"] = -3.75 + tm.assert_frame_equal(result, expected) + + if check_scipy: + result = df.interpolate(method="polynomial", order=1) + tm.assert_frame_equal(result, expected) + + def test_interp_raise_on_only_mixed(self, axis): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": ["a", "b", "c", "d"], + "C": [np.nan, 2, 5, 7], + "D": [np.nan, np.nan, 9, 9], + "E": [1, 2, 3, 4], + } + ) + msg = ( + "Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype." + ) + with pytest.raises(TypeError, match=msg): + df.astype("object").interpolate(axis=axis) + + def test_interp_raise_on_all_object_dtype(self): + # GH 22985 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype="object") + msg = ( + "Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype." + ) + with pytest.raises(TypeError, match=msg): + df.interpolate() + + def test_interp_inplace(self): + df = DataFrame({"a": [1.0, 2.0, np.nan, 4.0]}) + expected = DataFrame({"a": [1.0, 2.0, 3.0, 4.0]}) + result = df.copy() + return_value = result["a"].interpolate(inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result["a"].interpolate(inplace=True, downcast="infer") + assert return_value is None + tm.assert_frame_equal(result, expected.astype("int64")) + + def test_interp_inplace_row(self): + # GH 10395 + result = DataFrame( + {"a": [1.0, 2.0, 3.0, 4.0], "b": [np.nan, 2.0, 3.0, 4.0], "c": [3, 2, 2, 2]} + ) + expected = result.interpolate(method="linear", axis=1, inplace=False) + return_value = result.interpolate(method="linear", axis=1, inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + def test_interp_ignore_all_good(self): + # GH + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 2, 3, 4], + "C": [1.0, 2.0, np.nan, 4.0], + "D": [1.0, 2.0, 3.0, 4.0], + } + ) + expected = DataFrame( + { + "A": np.array([1, 2, 3, 4], dtype="float64"), + "B": np.array([1, 2, 3, 4], dtype="int64"), + "C": np.array([1.0, 2.0, 3, 4.0], dtype="float64"), + "D": np.array([1.0, 2.0, 3.0, 4.0], dtype="float64"), + } + ) + + result = df.interpolate(downcast=None) + tm.assert_frame_equal(result, expected) + + # all good + result = df[["B", "D"]].interpolate(downcast=None) + tm.assert_frame_equal(result, df[["B", "D"]]) + + def test_interp_time_inplace_axis(self, axis): + # GH 9687 + periods = 5 + idx = date_range(start="2014-01-01", periods=periods) + data = np.random.rand(periods, periods) + data[data < 0.5] = np.nan + expected = DataFrame(index=idx, columns=idx, data=data) + + result = expected.interpolate(axis=0, method="time") + return_value = expected.interpolate(axis=0, method="time", inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("axis_name, axis_number", [("index", 0), ("columns", 1)]) + def test_interp_string_axis(self, axis_name, axis_number): + # https://github.com/pandas-dev/pandas/issues/25190 + x = np.linspace(0, 100, 1000) + y = np.sin(x) + df = DataFrame( + data=np.tile(y, (10, 1)), index=np.arange(10), columns=x + ).reindex(columns=x * 1.005) + result = df.interpolate(method="linear", axis=axis_name) + expected = df.interpolate(method="linear", axis=axis_number) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("method", ["ffill", "bfill", "pad"]) + def test_interp_fillna_methods(self, request, axis, method, using_array_manager): + # GH 12918 + if using_array_manager and (axis == 1 or axis == "columns"): + # TODO(ArrayManager) support axis=1 + td.mark_array_manager_not_yet_implemented(request) + + df = DataFrame( + { + "A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0], + "B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0], + "C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0], + } + ) + expected = df.fillna(axis=axis, method=method) + result = df.interpolate(method=method, axis=axis) + tm.assert_frame_equal(result, expected) + + def test_interpolate_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame.interpolate " + r"except for the argument 'method' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.interpolate("pad", 0) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_is_homogeneous_dtype.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_is_homogeneous_dtype.py new file mode 100644 index 0000000..a5f285d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_is_homogeneous_dtype.py @@ -0,0 +1,57 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + Categorical, + DataFrame, +) + +# _is_homogeneous_type always returns True for ArrayManager +pytestmark = td.skip_array_manager_invalid_test + + +@pytest.mark.parametrize( + "data, expected", + [ + # empty + (DataFrame(), True), + # multi-same + (DataFrame({"A": [1, 2], "B": [1, 2]}), True), + # multi-object + ( + DataFrame( + { + "A": np.array([1, 2], dtype=object), + "B": np.array(["a", "b"], dtype=object), + } + ), + True, + ), + # multi-extension + ( + DataFrame({"A": Categorical(["a", "b"]), "B": Categorical(["a", "b"])}), + True, + ), + # differ types + (DataFrame({"A": [1, 2], "B": [1.0, 2.0]}), False), + # differ sizes + ( + DataFrame( + { + "A": np.array([1, 2], dtype=np.int32), + "B": np.array([1, 2], dtype=np.int64), + } + ), + False, + ), + # multi-extension differ + ( + DataFrame({"A": Categorical(["a", "b"]), "B": Categorical(["b", "c"])}), + False, + ), + ], +) +def test_is_homogeneous_type(data, expected): + assert data._is_homogeneous_type is expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_isin.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_isin.py new file mode 100644 index 0000000..e924963 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_isin.py @@ -0,0 +1,219 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestDataFrameIsIn: + def test_isin(self): + # GH#4211 + df = DataFrame( + { + "vals": [1, 2, 3, 4], + "ids": ["a", "b", "f", "n"], + "ids2": ["a", "n", "c", "n"], + }, + index=["foo", "bar", "baz", "qux"], + ) + other = ["a", "b", "c"] + + result = df.isin(other) + expected = DataFrame([df.loc[s].isin(other) for s in df.index]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) + def test_isin_empty(self, empty): + # GH#16991 + df = DataFrame({"A": ["a", "b", "c"], "B": ["a", "e", "f"]}) + expected = DataFrame(False, df.index, df.columns) + + result = df.isin(empty) + tm.assert_frame_equal(result, expected) + + def test_isin_dict(self): + df = DataFrame({"A": ["a", "b", "c"], "B": ["a", "e", "f"]}) + d = {"A": ["a"]} + + expected = DataFrame(False, df.index, df.columns) + expected.loc[0, "A"] = True + + result = df.isin(d) + tm.assert_frame_equal(result, expected) + + # non unique columns + df = DataFrame({"A": ["a", "b", "c"], "B": ["a", "e", "f"]}) + df.columns = ["A", "A"] + expected = DataFrame(False, df.index, df.columns) + expected.loc[0, "A"] = True + result = df.isin(d) + tm.assert_frame_equal(result, expected) + + def test_isin_with_string_scalar(self): + # GH#4763 + df = DataFrame( + { + "vals": [1, 2, 3, 4], + "ids": ["a", "b", "f", "n"], + "ids2": ["a", "n", "c", "n"], + }, + index=["foo", "bar", "baz", "qux"], + ) + msg = ( + r"only list-like or dict-like objects are allowed " + r"to be passed to DataFrame.isin\(\), you passed a 'str'" + ) + with pytest.raises(TypeError, match=msg): + df.isin("a") + + with pytest.raises(TypeError, match=msg): + df.isin("aaa") + + def test_isin_df(self): + df1 = DataFrame({"A": [1, 2, 3, 4], "B": [2, np.nan, 4, 4]}) + df2 = DataFrame({"A": [0, 2, 12, 4], "B": [2, np.nan, 4, 5]}) + expected = DataFrame(False, df1.index, df1.columns) + result = df1.isin(df2) + expected.loc[[1, 3], "A"] = True + expected.loc[[0, 2], "B"] = True + tm.assert_frame_equal(result, expected) + + # partial overlapping columns + df2.columns = ["A", "C"] + result = df1.isin(df2) + expected["B"] = False + tm.assert_frame_equal(result, expected) + + def test_isin_tuples(self): + # GH#16394 + df = DataFrame({"A": [1, 2, 3], "B": ["a", "b", "f"]}) + df["C"] = list(zip(df["A"], df["B"])) + result = df["C"].isin([(1, "a")]) + tm.assert_series_equal(result, Series([True, False, False], name="C")) + + def test_isin_df_dupe_values(self): + df1 = DataFrame({"A": [1, 2, 3, 4], "B": [2, np.nan, 4, 4]}) + # just cols duped + df2 = DataFrame([[0, 2], [12, 4], [2, np.nan], [4, 5]], columns=["B", "B"]) + msg = r"cannot compute isin with a duplicate axis\." + with pytest.raises(ValueError, match=msg): + df1.isin(df2) + + # just index duped + df2 = DataFrame( + [[0, 2], [12, 4], [2, np.nan], [4, 5]], + columns=["A", "B"], + index=[0, 0, 1, 1], + ) + with pytest.raises(ValueError, match=msg): + df1.isin(df2) + + # cols and index: + df2.columns = ["B", "B"] + with pytest.raises(ValueError, match=msg): + df1.isin(df2) + + def test_isin_dupe_self(self): + other = DataFrame({"A": [1, 0, 1, 0], "B": [1, 1, 0, 0]}) + df = DataFrame([[1, 1], [1, 0], [0, 0]], columns=["A", "A"]) + result = df.isin(other) + expected = DataFrame(False, index=df.index, columns=df.columns) + expected.loc[0] = True + expected.iloc[1, 1] = True + tm.assert_frame_equal(result, expected) + + def test_isin_against_series(self): + df = DataFrame( + {"A": [1, 2, 3, 4], "B": [2, np.nan, 4, 4]}, index=["a", "b", "c", "d"] + ) + s = Series([1, 3, 11, 4], index=["a", "b", "c", "d"]) + expected = DataFrame(False, index=df.index, columns=df.columns) + expected.loc["a", "A"] = True + expected.loc["d"] = True + result = df.isin(s) + tm.assert_frame_equal(result, expected) + + def test_isin_multiIndex(self): + idx = MultiIndex.from_tuples( + [ + (0, "a", "foo"), + (0, "a", "bar"), + (0, "b", "bar"), + (0, "b", "baz"), + (2, "a", "foo"), + (2, "a", "bar"), + (2, "c", "bar"), + (2, "c", "baz"), + (1, "b", "foo"), + (1, "b", "bar"), + (1, "c", "bar"), + (1, "c", "baz"), + ] + ) + df1 = DataFrame({"A": np.ones(12), "B": np.zeros(12)}, index=idx) + df2 = DataFrame( + { + "A": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], + "B": [1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1], + } + ) + # against regular index + expected = DataFrame(False, index=df1.index, columns=df1.columns) + result = df1.isin(df2) + tm.assert_frame_equal(result, expected) + + df2.index = idx + expected = df2.values.astype(bool) + expected[:, 1] = ~expected[:, 1] + expected = DataFrame(expected, columns=["A", "B"], index=idx) + + result = df1.isin(df2) + tm.assert_frame_equal(result, expected) + + def test_isin_empty_datetimelike(self): + # GH#15473 + df1_ts = DataFrame({"date": pd.to_datetime(["2014-01-01", "2014-01-02"])}) + df1_td = DataFrame({"date": [pd.Timedelta(1, "s"), pd.Timedelta(2, "s")]}) + df2 = DataFrame({"date": []}) + df3 = DataFrame() + + expected = DataFrame({"date": [False, False]}) + + result = df1_ts.isin(df2) + tm.assert_frame_equal(result, expected) + result = df1_ts.isin(df3) + tm.assert_frame_equal(result, expected) + + result = df1_td.isin(df2) + tm.assert_frame_equal(result, expected) + result = df1_td.isin(df3) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "values", + [ + DataFrame({"a": [1, 2, 3]}, dtype="category"), + Series([1, 2, 3], dtype="category"), + ], + ) + def test_isin_category_frame(self, values): + # GH#34256 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + expected = DataFrame({"a": [True, True, True], "b": [False, False, False]}) + + result = df.isin(values) + tm.assert_frame_equal(result, expected) + + def test_isin_read_only(self): + # https://github.com/pandas-dev/pandas/issues/37174 + arr = np.array([1, 2, 3]) + arr.setflags(write=False) + df = DataFrame([1, 2, 3]) + result = df.isin(arr) + expected = DataFrame([True, True, True]) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_join.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_join.py new file mode 100644 index 0000000..c6bfd94 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_join.py @@ -0,0 +1,371 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + date_range, + period_range, +) +import pandas._testing as tm + + +@pytest.fixture +def frame_with_period_index(): + return DataFrame( + data=np.arange(20).reshape(4, 5), + columns=list("abcde"), + index=period_range(start="2000", freq="A", periods=4), + ) + + +@pytest.fixture +def left(): + return DataFrame({"a": [20, 10, 0]}, index=[2, 1, 0]) + + +@pytest.fixture +def right(): + return DataFrame({"b": [300, 100, 200]}, index=[3, 1, 2]) + + +@pytest.mark.parametrize( + "how, sort, expected", + [ + ("inner", False, DataFrame({"a": [20, 10], "b": [200, 100]}, index=[2, 1])), + ("inner", True, DataFrame({"a": [10, 20], "b": [100, 200]}, index=[1, 2])), + ( + "left", + False, + DataFrame({"a": [20, 10, 0], "b": [200, 100, np.nan]}, index=[2, 1, 0]), + ), + ( + "left", + True, + DataFrame({"a": [0, 10, 20], "b": [np.nan, 100, 200]}, index=[0, 1, 2]), + ), + ( + "right", + False, + DataFrame({"a": [np.nan, 10, 20], "b": [300, 100, 200]}, index=[3, 1, 2]), + ), + ( + "right", + True, + DataFrame({"a": [10, 20, np.nan], "b": [100, 200, 300]}, index=[1, 2, 3]), + ), + ( + "outer", + False, + DataFrame( + {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3], + ), + ), + ( + "outer", + True, + DataFrame( + {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3], + ), + ), + ], +) +def test_join(left, right, how, sort, expected): + + result = left.join(right, how=how, sort=sort) + tm.assert_frame_equal(result, expected) + + +def test_join_index(float_frame): + # left / right + + f = float_frame.loc[float_frame.index[:10], ["A", "B"]] + f2 = float_frame.loc[float_frame.index[5:], ["C", "D"]].iloc[::-1] + + joined = f.join(f2) + tm.assert_index_equal(f.index, joined.index) + expected_columns = Index(["A", "B", "C", "D"]) + tm.assert_index_equal(joined.columns, expected_columns) + + joined = f.join(f2, how="left") + tm.assert_index_equal(joined.index, f.index) + tm.assert_index_equal(joined.columns, expected_columns) + + joined = f.join(f2, how="right") + tm.assert_index_equal(joined.index, f2.index) + tm.assert_index_equal(joined.columns, expected_columns) + + # inner + + joined = f.join(f2, how="inner") + tm.assert_index_equal(joined.index, f.index[5:10]) + tm.assert_index_equal(joined.columns, expected_columns) + + # outer + + joined = f.join(f2, how="outer") + tm.assert_index_equal(joined.index, float_frame.index.sort_values()) + tm.assert_index_equal(joined.columns, expected_columns) + + with pytest.raises(ValueError, match="join method"): + f.join(f2, how="foo") + + # corner case - overlapping columns + msg = "columns overlap but no suffix" + for how in ("outer", "left", "inner"): + with pytest.raises(ValueError, match=msg): + float_frame.join(float_frame, how=how) + + +def test_join_index_more(float_frame): + af = float_frame.loc[:, ["A", "B"]] + bf = float_frame.loc[::2, ["C", "D"]] + + expected = af.copy() + expected["C"] = float_frame["C"][::2] + expected["D"] = float_frame["D"][::2] + + result = af.join(bf) + tm.assert_frame_equal(result, expected) + + result = af.join(bf, how="right") + tm.assert_frame_equal(result, expected[::2]) + + result = bf.join(af, how="right") + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + +def test_join_index_series(float_frame): + df = float_frame.copy() + ser = df.pop(float_frame.columns[-1]) + joined = df.join(ser) + + tm.assert_frame_equal(joined, float_frame) + + ser.name = None + with pytest.raises(ValueError, match="must have a name"): + df.join(ser) + + +def test_join_overlap(float_frame): + df1 = float_frame.loc[:, ["A", "B", "C"]] + df2 = float_frame.loc[:, ["B", "C", "D"]] + + joined = df1.join(df2, lsuffix="_df1", rsuffix="_df2") + df1_suf = df1.loc[:, ["B", "C"]].add_suffix("_df1") + df2_suf = df2.loc[:, ["B", "C"]].add_suffix("_df2") + + no_overlap = float_frame.loc[:, ["A", "D"]] + expected = df1_suf.join(df2_suf).join(no_overlap) + + # column order not necessarily sorted + tm.assert_frame_equal(joined, expected.loc[:, joined.columns]) + + +def test_join_period_index(frame_with_period_index): + other = frame_with_period_index.rename(columns=lambda key: f"{key}{key}") + + joined_values = np.concatenate([frame_with_period_index.values] * 2, axis=1) + + joined_cols = frame_with_period_index.columns.append(other.columns) + + joined = frame_with_period_index.join(other) + expected = DataFrame( + data=joined_values, columns=joined_cols, index=frame_with_period_index.index + ) + + tm.assert_frame_equal(joined, expected) + + +def test_join_left_sequence_non_unique_index(): + # https://github.com/pandas-dev/pandas/issues/19607 + df1 = DataFrame({"a": [0, 10, 20]}, index=[1, 2, 3]) + df2 = DataFrame({"b": [100, 200, 300]}, index=[4, 3, 2]) + df3 = DataFrame({"c": [400, 500, 600]}, index=[2, 2, 4]) + + joined = df1.join([df2, df3], how="left") + + expected = DataFrame( + { + "a": [0, 10, 10, 20], + "b": [np.nan, 300, 300, 200], + "c": [np.nan, 400, 500, np.nan], + }, + index=[1, 2, 2, 3], + ) + + tm.assert_frame_equal(joined, expected) + + +@pytest.mark.parametrize("sort_kw", [True, False]) +def test_suppress_future_warning_with_sort_kw(sort_kw): + a = DataFrame({"col1": [1, 2]}, index=["c", "a"]) + + b = DataFrame({"col2": [4, 5]}, index=["b", "a"]) + + c = DataFrame({"col3": [7, 8]}, index=["a", "b"]) + + expected = DataFrame( + { + "col1": {"a": 2.0, "b": float("nan"), "c": 1.0}, + "col2": {"a": 5.0, "b": 4.0, "c": float("nan")}, + "col3": {"a": 7.0, "b": 8.0, "c": float("nan")}, + } + ) + if sort_kw is False: + expected = expected.reindex(index=["c", "a", "b"]) + + with tm.assert_produces_warning(None): + result = a.join([b, c], how="outer", sort=sort_kw) + tm.assert_frame_equal(result, expected) + + +class TestDataFrameJoin: + def test_join(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + a = frame.loc[frame.index[:5], ["A"]] + b = frame.loc[frame.index[2:], ["B", "C"]] + + joined = a.join(b, how="outer").reindex(frame.index) + expected = frame.copy().values + expected[np.isnan(joined.values)] = np.nan + expected = DataFrame(expected, index=frame.index, columns=frame.columns) + + assert not np.isnan(joined.values).all() + + tm.assert_frame_equal(joined, expected) + + def test_join_segfault(self): + # GH#1532 + df1 = DataFrame({"a": [1, 1], "b": [1, 2], "x": [1, 2]}) + df2 = DataFrame({"a": [2, 2], "b": [1, 2], "y": [1, 2]}) + df1 = df1.set_index(["a", "b"]) + df2 = df2.set_index(["a", "b"]) + # it works! + for how in ["left", "right", "outer"]: + df1.join(df2, how=how) + + def test_join_str_datetime(self): + str_dates = ["20120209", "20120222"] + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + A = DataFrame(str_dates, index=range(2), columns=["aa"]) + C = DataFrame([[1, 2], [3, 4]], index=str_dates, columns=dt_dates) + + tst = A.join(C, on="aa") + + assert len(tst.columns) == 3 + + def test_join_multiindex_leftright(self): + # GH 10741 + df1 = DataFrame( + [ + ["a", "x", 0.471780], + ["a", "y", 0.774908], + ["a", "z", 0.563634], + ["b", "x", -0.353756], + ["b", "y", 0.368062], + ["b", "z", -1.721840], + ["c", "x", 1], + ["c", "y", 2], + ["c", "z", 3], + ], + columns=["first", "second", "value1"], + ).set_index(["first", "second"]) + + df2 = DataFrame([["a", 10], ["b", 20]], columns=["first", "value2"]).set_index( + ["first"] + ) + + exp = DataFrame( + [ + [0.471780, 10], + [0.774908, 10], + [0.563634, 10], + [-0.353756, 20], + [0.368062, 20], + [-1.721840, 20], + [1.000000, np.nan], + [2.000000, np.nan], + [3.000000, np.nan], + ], + index=df1.index, + columns=["value1", "value2"], + ) + + # these must be the same results (but columns are flipped) + tm.assert_frame_equal(df1.join(df2, how="left"), exp) + tm.assert_frame_equal(df2.join(df1, how="right"), exp[["value2", "value1"]]) + + exp_idx = MultiIndex.from_product( + [["a", "b"], ["x", "y", "z"]], names=["first", "second"] + ) + exp = DataFrame( + [ + [0.471780, 10], + [0.774908, 10], + [0.563634, 10], + [-0.353756, 20], + [0.368062, 20], + [-1.721840, 20], + ], + index=exp_idx, + columns=["value1", "value2"], + ) + + tm.assert_frame_equal(df1.join(df2, how="right"), exp) + tm.assert_frame_equal(df2.join(df1, how="left"), exp[["value2", "value1"]]) + + def test_merge_join_different_levels(self): + # GH#9455 + + # first dataframe + df1 = DataFrame(columns=["a", "b"], data=[[1, 11], [0, 22]]) + + # second dataframe + columns = MultiIndex.from_tuples([("a", ""), ("c", "c1")]) + df2 = DataFrame(columns=columns, data=[[1, 33], [0, 44]]) + + # merge + columns = ["a", "b", ("c", "c1")] + expected = DataFrame(columns=columns, data=[[1, 11, 33], [0, 22, 44]]) + with tm.assert_produces_warning(FutureWarning): + result = pd.merge(df1, df2, on="a") + tm.assert_frame_equal(result, expected) + + # join, see discussion in GH#12219 + columns = ["a", "b", ("a", ""), ("c", "c1")] + expected = DataFrame(columns=columns, data=[[1, 11, 0, 44], [0, 22, 1, 33]]) + msg = "merging between different levels is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + # stacklevel is chosen to be correct for pd.merge, not DataFrame.join + result = df1.join(df2, on="a") + tm.assert_frame_equal(result, expected) + + def test_frame_join_tzaware(self): + test1 = DataFrame( + np.zeros((6, 3)), + index=date_range( + "2012-11-15 00:00:00", periods=6, freq="100L", tz="US/Central" + ), + ) + test2 = DataFrame( + np.zeros((3, 3)), + index=date_range( + "2012-11-15 00:00:00", periods=3, freq="250L", tz="US/Central" + ), + columns=range(3, 6), + ) + + result = test1.join(test2, how="outer") + expected = test1.index.union(test2.index) + + tm.assert_index_equal(result.index, expected) + assert result.index.tz.zone == "US/Central" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_matmul.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_matmul.py new file mode 100644 index 0000000..702ab39 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_matmul.py @@ -0,0 +1,86 @@ +import operator + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm + + +class TestMatMul: + def test_matmul(self): + # matmul test is for GH#10259 + a = DataFrame( + np.random.randn(3, 4), index=["a", "b", "c"], columns=["p", "q", "r", "s"] + ) + b = DataFrame( + np.random.randn(4, 2), index=["p", "q", "r", "s"], columns=["one", "two"] + ) + + # DataFrame @ DataFrame + result = operator.matmul(a, b) + expected = DataFrame( + np.dot(a.values, b.values), index=["a", "b", "c"], columns=["one", "two"] + ) + tm.assert_frame_equal(result, expected) + + # DataFrame @ Series + result = operator.matmul(a, b.one) + expected = Series(np.dot(a.values, b.one.values), index=["a", "b", "c"]) + tm.assert_series_equal(result, expected) + + # np.array @ DataFrame + result = operator.matmul(a.values, b) + assert isinstance(result, DataFrame) + assert result.columns.equals(b.columns) + assert result.index.equals(Index(range(3))) + expected = np.dot(a.values, b.values) + tm.assert_almost_equal(result.values, expected) + + # nested list @ DataFrame (__rmatmul__) + result = operator.matmul(a.values.tolist(), b) + expected = DataFrame( + np.dot(a.values, b.values), index=["a", "b", "c"], columns=["one", "two"] + ) + tm.assert_almost_equal(result.values, expected.values) + + # mixed dtype DataFrame @ DataFrame + a["q"] = a.q.round().astype(int) + result = operator.matmul(a, b) + expected = DataFrame( + np.dot(a.values, b.values), index=["a", "b", "c"], columns=["one", "two"] + ) + tm.assert_frame_equal(result, expected) + + # different dtypes DataFrame @ DataFrame + a = a.astype(int) + result = operator.matmul(a, b) + expected = DataFrame( + np.dot(a.values, b.values), index=["a", "b", "c"], columns=["one", "two"] + ) + tm.assert_frame_equal(result, expected) + + # unaligned + df = DataFrame(np.random.randn(3, 4), index=[1, 2, 3], columns=range(4)) + df2 = DataFrame(np.random.randn(5, 3), index=range(5), columns=[1, 2, 3]) + + with pytest.raises(ValueError, match="aligned"): + operator.matmul(df, df2) + + def test_matmul_message_shapes(self): + # GH#21581 exception message should reflect original shapes, + # not transposed shapes + a = np.random.rand(10, 4) + b = np.random.rand(5, 3) + + df = DataFrame(b) + + msg = r"shapes \(10, 4\) and \(5, 3\) not aligned" + with pytest.raises(ValueError, match=msg): + a @ df + with pytest.raises(ValueError, match=msg): + a.tolist() @ df diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_nlargest.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_nlargest.py new file mode 100644 index 0000000..1b2db80 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_nlargest.py @@ -0,0 +1,218 @@ +""" +Note: for naming purposes, most tests are title with as e.g. "test_nlargest_foo" +but are implicitly also testing nsmallest_foo. +""" +from string import ascii_lowercase + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.fixture +def df_duplicates(): + return pd.DataFrame( + {"a": [1, 2, 3, 4, 4], "b": [1, 1, 1, 1, 1], "c": [0, 1, 2, 5, 4]}, + index=[0, 0, 1, 1, 1], + ) + + +@pytest.fixture +def df_strings(): + return pd.DataFrame( + { + "a": np.random.permutation(10), + "b": list(ascii_lowercase[:10]), + "c": np.random.permutation(10).astype("float64"), + } + ) + + +@pytest.fixture +def df_main_dtypes(): + return pd.DataFrame( + { + "group": [1, 1, 2], + "int": [1, 2, 3], + "float": [4.0, 5.0, 6.0], + "string": list("abc"), + "category_string": pd.Series(list("abc")).astype("category"), + "category_int": [7, 8, 9], + "datetime": pd.date_range("20130101", periods=3), + "datetimetz": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), + }, + columns=[ + "group", + "int", + "float", + "string", + "category_string", + "category_int", + "datetime", + "datetimetz", + "timedelta", + ], + ) + + +class TestNLargestNSmallest: + + # ---------------------------------------------------------------------- + # Top / bottom + @pytest.mark.parametrize( + "order", + [ + ["a"], + ["c"], + ["a", "b"], + ["a", "c"], + ["b", "a"], + ["b", "c"], + ["a", "b", "c"], + ["c", "a", "b"], + ["c", "b", "a"], + ["b", "c", "a"], + ["b", "a", "c"], + # dups! + ["b", "c", "c"], + ], + ) + @pytest.mark.parametrize("n", range(1, 11)) + def test_nlargest_n(self, df_strings, nselect_method, n, order): + # GH#10393 + df = df_strings + if "b" in order: + + error_msg = ( + f"Column 'b' has dtype object, " + f"cannot use method '{nselect_method}' with this dtype" + ) + with pytest.raises(TypeError, match=error_msg): + getattr(df, nselect_method)(n, order) + else: + ascending = nselect_method == "nsmallest" + result = getattr(df, nselect_method)(n, order) + expected = df.sort_values(order, ascending=ascending).head(n) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "columns", [["group", "category_string"], ["group", "string"]] + ) + def test_nlargest_error(self, df_main_dtypes, nselect_method, columns): + df = df_main_dtypes + col = columns[1] + error_msg = ( + f"Column '{col}' has dtype {df[col].dtype}, " + f"cannot use method '{nselect_method}' with this dtype" + ) + # escape some characters that may be in the repr + error_msg = ( + error_msg.replace("(", "\\(") + .replace(")", "\\)") + .replace("[", "\\[") + .replace("]", "\\]") + ) + with pytest.raises(TypeError, match=error_msg): + getattr(df, nselect_method)(2, columns) + + def test_nlargest_all_dtypes(self, df_main_dtypes): + df = df_main_dtypes + df.nsmallest(2, list(set(df) - {"category_string", "string"})) + df.nlargest(2, list(set(df) - {"category_string", "string"})) + + def test_nlargest_duplicates_on_starter_columns(self): + # regression test for GH#22752 + + df = pd.DataFrame({"a": [2, 2, 2, 1, 1, 1], "b": [1, 2, 3, 3, 2, 1]}) + + result = df.nlargest(4, columns=["a", "b"]) + expected = pd.DataFrame( + {"a": [2, 2, 2, 1], "b": [3, 2, 1, 3]}, index=[2, 1, 0, 3] + ) + tm.assert_frame_equal(result, expected) + + result = df.nsmallest(4, columns=["a", "b"]) + expected = pd.DataFrame( + {"a": [1, 1, 1, 2], "b": [1, 2, 3, 1]}, index=[5, 4, 3, 0] + ) + tm.assert_frame_equal(result, expected) + + def test_nlargest_n_identical_values(self): + # GH#15297 + df = pd.DataFrame({"a": [1] * 5, "b": [1, 2, 3, 4, 5]}) + + result = df.nlargest(3, "a") + expected = pd.DataFrame({"a": [1] * 3, "b": [1, 2, 3]}, index=[0, 1, 2]) + tm.assert_frame_equal(result, expected) + + result = df.nsmallest(3, "a") + expected = pd.DataFrame({"a": [1] * 3, "b": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "order", + [["a", "b", "c"], ["c", "b", "a"], ["a"], ["b"], ["a", "b"], ["c", "b"]], + ) + @pytest.mark.parametrize("n", range(1, 6)) + def test_nlargest_n_duplicate_index(self, df_duplicates, n, order): + # GH#13412 + + df = df_duplicates + result = df.nsmallest(n, order) + expected = df.sort_values(order).head(n) + tm.assert_frame_equal(result, expected) + + result = df.nlargest(n, order) + expected = df.sort_values(order, ascending=False).head(n) + tm.assert_frame_equal(result, expected) + + def test_nlargest_duplicate_keep_all_ties(self): + # GH#16818 + df = pd.DataFrame( + {"a": [5, 4, 4, 2, 3, 3, 3, 3], "b": [10, 9, 8, 7, 5, 50, 10, 20]} + ) + result = df.nlargest(4, "a", keep="all") + expected = pd.DataFrame( + { + "a": {0: 5, 1: 4, 2: 4, 4: 3, 5: 3, 6: 3, 7: 3}, + "b": {0: 10, 1: 9, 2: 8, 4: 5, 5: 50, 6: 10, 7: 20}, + } + ) + tm.assert_frame_equal(result, expected) + + result = df.nsmallest(2, "a", keep="all") + expected = pd.DataFrame( + { + "a": {3: 2, 4: 3, 5: 3, 6: 3, 7: 3}, + "b": {3: 7, 4: 5, 5: 50, 6: 10, 7: 20}, + } + ) + tm.assert_frame_equal(result, expected) + + def test_nlargest_multiindex_column_lookup(self): + # Check whether tuples are correctly treated as multi-level lookups. + # GH#23033 + df = pd.DataFrame( + columns=pd.MultiIndex.from_product([["x"], ["a", "b"]]), + data=[[0.33, 0.13], [0.86, 0.25], [0.25, 0.70], [0.85, 0.91]], + ) + + # nsmallest + result = df.nsmallest(3, ("x", "a")) + expected = df.iloc[[2, 0, 3]] + tm.assert_frame_equal(result, expected) + + # nlargest + result = df.nlargest(3, ("x", "b")) + expected = df.iloc[[3, 2, 1]] + tm.assert_frame_equal(result, expected) + + def test_nlargest_nan(self): + # GH#43060 + df = pd.DataFrame([np.nan, np.nan, 0, 1, 2, 3]) + result = df.nlargest(5, 0) + expected = df.sort_values(0, ascending=False).head(5) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_pct_change.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_pct_change.py new file mode 100644 index 0000000..8749218 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_pct_change.py @@ -0,0 +1,120 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestDataFramePctChange: + @pytest.mark.parametrize( + "periods,fill_method,limit,exp", + [ + (1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]), + (1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]), + (1, "bfill", None, [np.nan, 0, 0, 1, 1, 1.5, np.nan, np.nan]), + (1, "bfill", 1, [np.nan, np.nan, 0, 1, 1, 1.5, np.nan, np.nan]), + (-1, "ffill", None, [np.nan, np.nan, -0.5, -0.5, -0.6, 0, 0, np.nan]), + (-1, "ffill", 1, [np.nan, np.nan, -0.5, -0.5, -0.6, 0, np.nan, np.nan]), + (-1, "bfill", None, [0, 0, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]), + (-1, "bfill", 1, [np.nan, 0, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]), + ], + ) + @pytest.mark.parametrize("klass", [DataFrame, Series]) + def test_pct_change_with_nas(self, periods, fill_method, limit, exp, klass): + vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan] + obj = klass(vals) + + res = obj.pct_change(periods=periods, fill_method=fill_method, limit=limit) + tm.assert_equal(res, klass(exp)) + + def test_pct_change_numeric(self): + # GH#11150 + pnl = DataFrame( + [np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange(0, 40, 10)] + ).astype(np.float64) + pnl.iat[1, 0] = np.nan + pnl.iat[1, 1] = np.nan + pnl.iat[2, 3] = 60 + + for axis in range(2): + expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift(axis=axis) - 1 + result = pnl.pct_change(axis=axis, fill_method="pad") + + tm.assert_frame_equal(result, expected) + + def test_pct_change(self, datetime_frame): + rs = datetime_frame.pct_change(fill_method=None) + tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1) + + rs = datetime_frame.pct_change(2) + filled = datetime_frame.fillna(method="pad") + tm.assert_frame_equal(rs, filled / filled.shift(2) - 1) + + rs = datetime_frame.pct_change(fill_method="bfill", limit=1) + filled = datetime_frame.fillna(method="bfill", limit=1) + tm.assert_frame_equal(rs, filled / filled.shift(1) - 1) + + rs = datetime_frame.pct_change(freq="5D") + filled = datetime_frame.fillna(method="pad") + tm.assert_frame_equal( + rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) + ) + + def test_pct_change_shift_over_nas(self): + s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) + + df = DataFrame({"a": s, "b": s}) + + chg = df.pct_change() + expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) + edf = DataFrame({"a": expected, "b": expected}) + tm.assert_frame_equal(chg, edf) + + @pytest.mark.parametrize( + "freq, periods, fill_method, limit", + [ + ("5B", 5, None, None), + ("3B", 3, None, None), + ("3B", 3, "bfill", None), + ("7B", 7, "pad", 1), + ("7B", 7, "bfill", 3), + ("14B", 14, None, None), + ], + ) + def test_pct_change_periods_freq( + self, datetime_frame, freq, periods, fill_method, limit + ): + # GH#7292 + rs_freq = datetime_frame.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + rs_periods = datetime_frame.pct_change( + periods, fill_method=fill_method, limit=limit + ) + tm.assert_frame_equal(rs_freq, rs_periods) + + empty_ts = DataFrame(index=datetime_frame.index, columns=datetime_frame.columns) + rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) + rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) + tm.assert_frame_equal(rs_freq, rs_periods) + + +@pytest.mark.parametrize("fill_method", ["pad", "ffill", None]) +def test_pct_change_with_duplicated_indices(fill_method): + # GH30463 + data = DataFrame( + {0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3 + ) + result = data.pct_change(fill_method=fill_method) + if fill_method is None: + second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0] + else: + second_column = [np.nan, np.inf, 0.0, 2.0, 2.0, 1.0] + expected = DataFrame( + {0: [np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], 1: second_column}, + index=["a", "b"] * 3, + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_pipe.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_pipe.py new file mode 100644 index 0000000..1d7cc16 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_pipe.py @@ -0,0 +1,39 @@ +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestPipe: + def test_pipe(self, frame_or_series): + obj = DataFrame({"A": [1, 2, 3]}) + expected = DataFrame({"A": [1, 4, 9]}) + if frame_or_series is Series: + obj = obj["A"] + expected = expected["A"] + + f = lambda x, y: x ** y + result = obj.pipe(f, 2) + tm.assert_equal(result, expected) + + def test_pipe_tuple(self, frame_or_series): + obj = DataFrame({"A": [1, 2, 3]}) + obj = tm.get_obj(obj, frame_or_series) + + f = lambda x, y: y + result = obj.pipe((f, "y"), 0) + tm.assert_equal(result, obj) + + def test_pipe_tuple_error(self, frame_or_series): + obj = DataFrame({"A": [1, 2, 3]}) + obj = tm.get_obj(obj, frame_or_series) + + f = lambda x, y: y + + msg = "y is both the pipe target and a keyword argument" + + with pytest.raises(ValueError, match=msg): + obj.pipe((f, "y"), x=1, y=0) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_pop.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_pop.py new file mode 100644 index 0000000..a4f99b8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_pop.py @@ -0,0 +1,71 @@ +import numpy as np + +from pandas import ( + DataFrame, + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestDataFramePop: + def test_pop(self, float_frame): + float_frame.columns.name = "baz" + + float_frame.pop("A") + assert "A" not in float_frame + + float_frame["foo"] = "bar" + float_frame.pop("foo") + assert "foo" not in float_frame + assert float_frame.columns.name == "baz" + + # gh-10912: inplace ops cause caching issue + a = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"], index=["X", "Y"]) + b = a.pop("B") + b += 1 + + # original frame + expected = DataFrame([[1, 3], [4, 6]], columns=["A", "C"], index=["X", "Y"]) + tm.assert_frame_equal(a, expected) + + # result + expected = Series([2, 5], index=["X", "Y"], name="B") + 1 + tm.assert_series_equal(b, expected) + + def test_pop_non_unique_cols(self): + df = DataFrame({0: [0, 1], 1: [0, 1], 2: [4, 5]}) + df.columns = ["a", "b", "a"] + + res = df.pop("a") + assert type(res) == DataFrame + assert len(res) == 2 + assert len(df.columns) == 1 + assert "b" in df.columns + assert "a" not in df.columns + assert len(df.index) == 2 + + def test_mixed_depth_pop(self): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.randn(4, 6), columns=index) + + df1 = df.copy() + df2 = df.copy() + result = df1.pop("a") + expected = df2.pop(("a", "", "")) + tm.assert_series_equal(expected, result, check_names=False) + tm.assert_frame_equal(df1, df2) + assert result.name == "a" + + expected = df1["top"] + df1 = df1.drop(["top"], axis=1) + result = df2.pop("top") + tm.assert_frame_equal(expected, result) + tm.assert_frame_equal(df1, df2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_quantile.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_quantile.py new file mode 100644 index 0000000..8ff1b21 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_quantile.py @@ -0,0 +1,767 @@ +import numpy as np +import pytest + +from pandas.compat.numpy import np_percentile_argname + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + Timestamp, +) +import pandas._testing as tm + + +class TestDataFrameQuantile: + @pytest.mark.parametrize( + "df,expected", + [ + [ + DataFrame( + { + 0: Series(pd.arrays.SparseArray([1, 2])), + 1: Series(pd.arrays.SparseArray([3, 4])), + } + ), + Series([1.5, 3.5], name=0.5), + ], + [ + DataFrame(Series([0.0, None, 1.0, 2.0], dtype="Sparse[float]")), + Series([1.0], name=0.5), + ], + ], + ) + def test_quantile_sparse(self, df, expected): + # GH#17198 + # GH#24600 + result = df.quantile() + + tm.assert_series_equal(result, expected) + + def test_quantile(self, datetime_frame): + from numpy import percentile + + df = datetime_frame + q = df.quantile(0.1, axis=0) + assert q["A"] == percentile(df["A"], 10) + tm.assert_index_equal(q.index, df.columns) + + q = df.quantile(0.9, axis=1) + assert q["2000-01-17"] == percentile(df.loc["2000-01-17"], 90) + tm.assert_index_equal(q.index, df.index) + + # test degenerate case + q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0) + assert np.isnan(q["x"]) and np.isnan(q["y"]) + + # non-numeric exclusion + df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]}) + rs = df.quantile(0.5) + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + xp = df.median().rename(0.5) + tm.assert_series_equal(rs, xp) + + # axis + df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) + result = df.quantile(0.5, axis=1) + expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5) + tm.assert_series_equal(result, expected) + + result = df.quantile([0.5, 0.75], axis=1) + expected = DataFrame( + {1: [1.5, 1.75], 2: [2.5, 2.75], 3: [3.5, 3.75]}, index=[0.5, 0.75] + ) + tm.assert_frame_equal(result, expected, check_index_type=True) + + # We may want to break API in the future to change this + # so that we exclude non-numeric along the same axis + # See GH #7312 + df = DataFrame([[1, 2, 3], ["a", "b", 4]]) + result = df.quantile(0.5, axis=1) + expected = Series([3.0, 4.0], index=[0, 1], name=0.5) + tm.assert_series_equal(result, expected) + + def test_quantile_date_range(self): + # GH 2460 + + dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") + ser = Series(dti) + df = DataFrame(ser) + + result = df.quantile(numeric_only=False) + expected = Series( + ["2016-01-02 00:00:00"], name=0.5, dtype="datetime64[ns, US/Pacific]" + ) + + tm.assert_series_equal(result, expected) + + def test_quantile_axis_mixed(self): + + # mixed on axis=1 + df = DataFrame( + { + "A": [1, 2, 3], + "B": [2.0, 3.0, 4.0], + "C": pd.date_range("20130101", periods=3), + "D": ["foo", "bar", "baz"], + } + ) + result = df.quantile(0.5, axis=1) + expected = Series([1.5, 2.5, 3.5], name=0.5) + tm.assert_series_equal(result, expected) + + # must raise + msg = "'<' not supported between instances of 'Timestamp' and 'float'" + with pytest.raises(TypeError, match=msg): + df.quantile(0.5, axis=1, numeric_only=False) + + def test_quantile_axis_parameter(self): + # GH 9543/9544 + + df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) + + result = df.quantile(0.5, axis=0) + + expected = Series([2.0, 3.0], index=["A", "B"], name=0.5) + tm.assert_series_equal(result, expected) + + expected = df.quantile(0.5, axis="index") + tm.assert_series_equal(result, expected) + + result = df.quantile(0.5, axis=1) + + expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5) + tm.assert_series_equal(result, expected) + + result = df.quantile(0.5, axis="columns") + tm.assert_series_equal(result, expected) + + msg = "No axis named -1 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.quantile(0.1, axis=-1) + msg = "No axis named column for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.quantile(0.1, axis="column") + + def test_quantile_interpolation(self): + # see gh-10174 + + # interpolation method other than default linear + df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) + result = df.quantile(0.5, axis=1, interpolation="nearest") + expected = Series([1, 2, 3], index=[1, 2, 3], name=0.5) + tm.assert_series_equal(result, expected) + + # cross-check interpolation=nearest results in original dtype + exp = np.percentile( + np.array([[1, 2, 3], [2, 3, 4]]), + 0.5, + axis=0, + **{np_percentile_argname: "nearest"}, + ) + expected = Series(exp, index=[1, 2, 3], name=0.5, dtype="int64") + tm.assert_series_equal(result, expected) + + # float + df = DataFrame({"A": [1.0, 2.0, 3.0], "B": [2.0, 3.0, 4.0]}, index=[1, 2, 3]) + result = df.quantile(0.5, axis=1, interpolation="nearest") + expected = Series([1.0, 2.0, 3.0], index=[1, 2, 3], name=0.5) + tm.assert_series_equal(result, expected) + exp = np.percentile( + np.array([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]]), + 0.5, + axis=0, + **{np_percentile_argname: "nearest"}, + ) + expected = Series(exp, index=[1, 2, 3], name=0.5, dtype="float64") + tm.assert_series_equal(result, expected) + + # axis + result = df.quantile([0.5, 0.75], axis=1, interpolation="lower") + expected = DataFrame( + {1: [1.0, 1.0], 2: [2.0, 2.0], 3: [3.0, 3.0]}, index=[0.5, 0.75] + ) + tm.assert_frame_equal(result, expected) + + # test degenerate case + df = DataFrame({"x": [], "y": []}) + q = df.quantile(0.1, axis=0, interpolation="higher") + assert np.isnan(q["x"]) and np.isnan(q["y"]) + + # multi + df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) + result = df.quantile([0.25, 0.5], interpolation="midpoint") + + # https://github.com/numpy/numpy/issues/7163 + expected = DataFrame( + [[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], + index=[0.25, 0.5], + columns=["a", "b", "c"], + ) + tm.assert_frame_equal(result, expected) + + def test_quantile_interpolation_datetime(self, datetime_frame): + # see gh-10174 + + # interpolation = linear (default case) + df = datetime_frame + q = df.quantile(0.1, axis=0, interpolation="linear") + assert q["A"] == np.percentile(df["A"], 10) + + def test_quantile_interpolation_int(self, int_frame): + # see gh-10174 + + df = int_frame + # interpolation = linear (default case) + q = df.quantile(0.1) + assert q["A"] == np.percentile(df["A"], 10) + + # test with and without interpolation keyword + q1 = df.quantile(0.1, axis=0, interpolation="linear") + assert q1["A"] == np.percentile(df["A"], 10) + tm.assert_series_equal(q, q1) + + def test_quantile_multi(self): + df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) + result = df.quantile([0.25, 0.5]) + expected = DataFrame( + [[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], + index=[0.25, 0.5], + columns=["a", "b", "c"], + ) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.quantile([0.25, 0.5], axis=1) + expected = DataFrame( + [[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], index=[0.25, 0.5], columns=[0, 1, 2] + ) + + # empty + result = DataFrame({"x": [], "y": []}).quantile([0.1, 0.9], axis=0) + expected = DataFrame( + {"x": [np.nan, np.nan], "y": [np.nan, np.nan]}, index=[0.1, 0.9] + ) + tm.assert_frame_equal(result, expected) + + def test_quantile_datetime(self): + df = DataFrame({"a": pd.to_datetime(["2010", "2011"]), "b": [0, 5]}) + + # exclude datetime + result = df.quantile(0.5) + expected = Series([2.5], index=["b"]) + + # datetime + result = df.quantile(0.5, numeric_only=False) + expected = Series( + [Timestamp("2010-07-02 12:00:00"), 2.5], index=["a", "b"], name=0.5 + ) + tm.assert_series_equal(result, expected) + + # datetime w/ multi + result = df.quantile([0.5], numeric_only=False) + expected = DataFrame( + [[Timestamp("2010-07-02 12:00:00"), 2.5]], index=[0.5], columns=["a", "b"] + ) + tm.assert_frame_equal(result, expected) + + # axis = 1 + df["c"] = pd.to_datetime(["2011", "2012"]) + result = df[["a", "c"]].quantile(0.5, axis=1, numeric_only=False) + expected = Series( + [Timestamp("2010-07-02 12:00:00"), Timestamp("2011-07-02 12:00:00")], + index=[0, 1], + name=0.5, + ) + tm.assert_series_equal(result, expected) + + result = df[["a", "c"]].quantile([0.5], axis=1, numeric_only=False) + expected = DataFrame( + [[Timestamp("2010-07-02 12:00:00"), Timestamp("2011-07-02 12:00:00")]], + index=[0.5], + columns=[0, 1], + ) + tm.assert_frame_equal(result, expected) + + # empty when numeric_only=True + result = df[["a", "c"]].quantile(0.5) + expected = Series([], index=[], dtype=np.float64, name=0.5) + tm.assert_series_equal(result, expected) + + result = df[["a", "c"]].quantile([0.5]) + expected = DataFrame(index=[0.5]) + tm.assert_frame_equal(result, expected) + + def test_quantile_invalid(self, datetime_frame): + msg = "percentiles should all be in the interval \\[0, 1\\]" + for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: + with pytest.raises(ValueError, match=msg): + datetime_frame.quantile(invalid) + + def test_quantile_box(self): + df = DataFrame( + { + "A": [ + Timestamp("2011-01-01"), + Timestamp("2011-01-02"), + Timestamp("2011-01-03"), + ], + "B": [ + Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + Timestamp("2011-01-03", tz="US/Eastern"), + ], + "C": [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + ], + } + ) + + res = df.quantile(0.5, numeric_only=False) + + exp = Series( + [ + Timestamp("2011-01-02"), + Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timedelta("2 days"), + ], + name=0.5, + index=["A", "B", "C"], + ) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5], numeric_only=False) + exp = DataFrame( + [ + [ + Timestamp("2011-01-02"), + Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timedelta("2 days"), + ] + ], + index=[0.5], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(res, exp) + + # DatetimeLikeBlock may be consolidated and contain NaT in different loc + df = DataFrame( + { + "A": [ + Timestamp("2011-01-01"), + pd.NaT, + Timestamp("2011-01-02"), + Timestamp("2011-01-03"), + ], + "a": [ + Timestamp("2011-01-01"), + Timestamp("2011-01-02"), + pd.NaT, + Timestamp("2011-01-03"), + ], + "B": [ + Timestamp("2011-01-01", tz="US/Eastern"), + pd.NaT, + Timestamp("2011-01-02", tz="US/Eastern"), + Timestamp("2011-01-03", tz="US/Eastern"), + ], + "b": [ + Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + pd.NaT, + Timestamp("2011-01-03", tz="US/Eastern"), + ], + "C": [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + pd.NaT, + ], + "c": [ + pd.NaT, + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + ], + }, + columns=list("AaBbCc"), + ) + + res = df.quantile(0.5, numeric_only=False) + exp = Series( + [ + Timestamp("2011-01-02"), + Timestamp("2011-01-02"), + Timestamp("2011-01-02", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timedelta("2 days"), + pd.Timedelta("2 days"), + ], + name=0.5, + index=list("AaBbCc"), + ) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5], numeric_only=False) + exp = DataFrame( + [ + [ + Timestamp("2011-01-02"), + Timestamp("2011-01-02"), + Timestamp("2011-01-02", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timedelta("2 days"), + pd.Timedelta("2 days"), + ] + ], + index=[0.5], + columns=list("AaBbCc"), + ) + tm.assert_frame_equal(res, exp) + + def test_quantile_nan(self): + + # GH 14357 - float block where some cols have missing values + df = DataFrame({"a": np.arange(1, 6.0), "b": np.arange(1, 6.0)}) + df.iloc[-1, 1] = np.nan + + res = df.quantile(0.5) + exp = Series([3.0, 2.5], index=["a", "b"], name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5, 0.75]) + exp = DataFrame({"a": [3.0, 4.0], "b": [2.5, 3.25]}, index=[0.5, 0.75]) + tm.assert_frame_equal(res, exp) + + res = df.quantile(0.5, axis=1) + exp = Series(np.arange(1.0, 6.0), name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5, 0.75], axis=1) + exp = DataFrame([np.arange(1.0, 6.0)] * 2, index=[0.5, 0.75]) + tm.assert_frame_equal(res, exp) + + # full-nan column + df["b"] = np.nan + + res = df.quantile(0.5) + exp = Series([3.0, np.nan], index=["a", "b"], name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5, 0.75]) + exp = DataFrame({"a": [3.0, 4.0], "b": [np.nan, np.nan]}, index=[0.5, 0.75]) + tm.assert_frame_equal(res, exp) + + def test_quantile_nat(self): + + # full NaT column + df = DataFrame({"a": [pd.NaT, pd.NaT, pd.NaT]}) + + res = df.quantile(0.5, numeric_only=False) + exp = Series([pd.NaT], index=["a"], name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5], numeric_only=False) + exp = DataFrame({"a": [pd.NaT]}, index=[0.5]) + tm.assert_frame_equal(res, exp) + + # mixed non-null / full null column + df = DataFrame( + { + "a": [ + Timestamp("2012-01-01"), + Timestamp("2012-01-02"), + Timestamp("2012-01-03"), + ], + "b": [pd.NaT, pd.NaT, pd.NaT], + } + ) + + res = df.quantile(0.5, numeric_only=False) + exp = Series([Timestamp("2012-01-02"), pd.NaT], index=["a", "b"], name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5], numeric_only=False) + exp = DataFrame( + [[Timestamp("2012-01-02"), pd.NaT]], index=[0.5], columns=["a", "b"] + ) + tm.assert_frame_equal(res, exp) + + def test_quantile_empty_no_rows_floats(self): + + # floats + df = DataFrame(columns=["a", "b"], dtype="float64") + + res = df.quantile(0.5) + exp = Series([np.nan, np.nan], index=["a", "b"], name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5]) + exp = DataFrame([[np.nan, np.nan]], columns=["a", "b"], index=[0.5]) + tm.assert_frame_equal(res, exp) + + res = df.quantile(0.5, axis=1) + exp = Series([], index=[], dtype="float64", name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5], axis=1) + exp = DataFrame(columns=[], index=[0.5]) + tm.assert_frame_equal(res, exp) + + def test_quantile_empty_no_rows_ints(self): + # ints + df = DataFrame(columns=["a", "b"], dtype="int64") + + res = df.quantile(0.5) + exp = Series([np.nan, np.nan], index=["a", "b"], name=0.5) + tm.assert_series_equal(res, exp) + + def test_quantile_empty_no_rows_dt64(self): + # datetimes + df = DataFrame(columns=["a", "b"], dtype="datetime64[ns]") + + res = df.quantile(0.5, numeric_only=False) + exp = Series( + [pd.NaT, pd.NaT], index=["a", "b"], dtype="datetime64[ns]", name=0.5 + ) + tm.assert_series_equal(res, exp) + + # Mixed dt64/dt64tz + df["a"] = df["a"].dt.tz_localize("US/Central") + res = df.quantile(0.5, numeric_only=False) + exp = exp.astype(object) + tm.assert_series_equal(res, exp) + + # both dt64tz + df["b"] = df["b"].dt.tz_localize("US/Central") + res = df.quantile(0.5, numeric_only=False) + exp = exp.astype(df["b"].dtype) + tm.assert_series_equal(res, exp) + + def test_quantile_empty_no_columns(self): + # GH#23925 _get_numeric_data may drop all columns + df = DataFrame(pd.date_range("1/1/18", periods=5)) + df.columns.name = "captain tightpants" + result = df.quantile(0.5) + expected = Series([], index=[], name=0.5, dtype=np.float64) + expected.index.name = "captain tightpants" + tm.assert_series_equal(result, expected) + + result = df.quantile([0.5]) + expected = DataFrame([], index=[0.5], columns=[]) + expected.columns.name = "captain tightpants" + tm.assert_frame_equal(result, expected) + + def test_quantile_item_cache(self, using_array_manager): + # previous behavior incorrect retained an invalid _item_cache entry + df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"]) + df["D"] = df["A"] * 2 + ser = df["A"] + if not using_array_manager: + assert len(df._mgr.blocks) == 2 + + df.quantile(numeric_only=False) + ser.values[0] = 99 + + assert df.iloc[0, 0] == df["A"][0] + + +class TestQuantileExtensionDtype: + # TODO: tests for axis=1? + # TODO: empty case? + + @pytest.fixture( + params=[ + pytest.param( + pd.IntervalIndex.from_breaks(range(10)), + marks=pytest.mark.xfail(reason="raises when trying to add Intervals"), + ), + pd.period_range("2016-01-01", periods=9, freq="D"), + pd.date_range("2016-01-01", periods=9, tz="US/Pacific"), + pd.timedelta_range("1 Day", periods=9), + pd.array(np.arange(9), dtype="Int64"), + pd.array(np.arange(9), dtype="Float64"), + ], + ids=lambda x: str(x.dtype), + ) + def index(self, request): + # NB: not actually an Index object + idx = request.param + idx.name = "A" + return idx + + @pytest.fixture + def obj(self, index, frame_or_series): + # bc index is not always an Index (yet), we need to re-patch .name + obj = frame_or_series(index).copy() + + if frame_or_series is Series: + obj.name = "A" + else: + obj.columns = ["A"] + return obj + + def compute_quantile(self, obj, qs): + if isinstance(obj, Series): + result = obj.quantile(qs) + else: + result = obj.quantile(qs, numeric_only=False) + return result + + def test_quantile_ea(self, obj, index): + + # result should be invariant to shuffling + indexer = np.arange(len(index), dtype=np.intp) + np.random.shuffle(indexer) + obj = obj.iloc[indexer] + + qs = [0.5, 0, 1] + result = self.compute_quantile(obj, qs) + + # expected here assumes len(index) == 9 + expected = Series( + [index[4], index[0], index[-1]], dtype=index.dtype, index=qs, name="A" + ) + expected = type(obj)(expected) + + tm.assert_equal(result, expected) + + def test_quantile_ea_with_na(self, obj, index): + + obj.iloc[0] = index._na_value + obj.iloc[-1] = index._na_value + + # result should be invariant to shuffling + indexer = np.arange(len(index), dtype=np.intp) + np.random.shuffle(indexer) + obj = obj.iloc[indexer] + + qs = [0.5, 0, 1] + result = self.compute_quantile(obj, qs) + + # expected here assumes len(index) == 9 + expected = Series( + [index[4], index[1], index[-2]], dtype=index.dtype, index=qs, name="A" + ) + expected = type(obj)(expected) + tm.assert_equal(result, expected) + + # TODO(GH#39763): filtering can be removed after GH#39763 is fixed + @pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning") + def test_quantile_ea_all_na( + self, obj, index, frame_or_series, using_array_manager, request + ): + if ( + using_array_manager + and frame_or_series is DataFrame + and index.dtype == "m8[ns]" + ): + mark = pytest.mark.xfail( + reason="obj.astype fails bc obj is incorrectly dt64 at this point" + ) + request.node.add_marker(mark) + + obj.iloc[:] = index._na_value + + # TODO(ArrayManager): this casting should be unnecessary after GH#39763 is fixed + obj[:] = obj.astype(index.dtype) + assert np.all(obj.dtypes == index.dtype) + + # result should be invariant to shuffling + indexer = np.arange(len(index), dtype=np.intp) + np.random.shuffle(indexer) + obj = obj.iloc[indexer] + + qs = [0.5, 0, 1] + result = self.compute_quantile(obj, qs) + + expected = index.take([-1, -1, -1], allow_fill=True, fill_value=index._na_value) + expected = Series(expected, index=qs, name="A") + expected = type(obj)(expected) + tm.assert_equal(result, expected) + + def test_quantile_ea_scalar(self, obj, index): + # scalar qs + + # result should be invariant to shuffling + indexer = np.arange(len(index), dtype=np.intp) + np.random.shuffle(indexer) + obj = obj.iloc[indexer] + + qs = 0.5 + result = self.compute_quantile(obj, qs) + + expected = Series({"A": index[4]}, dtype=index.dtype, name=0.5) + if isinstance(obj, Series): + expected = expected["A"] + assert result == expected + else: + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, expected_data, expected_index, axis", + [ + ["float64", [], [], 1], + ["int64", [], [], 1], + ["float64", [np.nan, np.nan], ["a", "b"], 0], + ["int64", [np.nan, np.nan], ["a", "b"], 0], + ], + ) + def test_empty_numeric(self, dtype, expected_data, expected_index, axis): + # GH 14564 + df = DataFrame(columns=["a", "b"], dtype=dtype) + result = df.quantile(0.5, axis=axis) + expected = Series( + expected_data, name=0.5, index=Index(expected_index), dtype="float64" + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, expected_data, expected_index, axis, expected_dtype", + [ + pytest.param( + "datetime64[ns]", + [], + [], + 1, + "datetime64[ns]", + marks=pytest.mark.xfail(reason="#GH 41544"), + ), + ["datetime64[ns]", [pd.NaT, pd.NaT], ["a", "b"], 0, "datetime64[ns]"], + ], + ) + def test_empty_datelike( + self, dtype, expected_data, expected_index, axis, expected_dtype + ): + # GH 14564 + df = DataFrame(columns=["a", "b"], dtype=dtype) + result = df.quantile(0.5, axis=axis, numeric_only=False) + expected = Series( + expected_data, name=0.5, index=Index(expected_index), dtype=expected_dtype + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "expected_data, expected_index, axis", + [ + [[np.nan, np.nan], range(2), 1], + [[], [], 0], + ], + ) + def test_datelike_numeric_only(self, expected_data, expected_index, axis): + # GH 14564 + df = DataFrame( + { + "a": pd.to_datetime(["2010", "2011"]), + "b": [0, 5], + "c": pd.to_datetime(["2011", "2012"]), + } + ) + result = df[["a", "c"]].quantile(0.5, axis=axis) + expected = Series( + expected_data, name=0.5, index=Index(expected_index), dtype=np.float64 + ) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_rank.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_rank.py new file mode 100644 index 0000000..48188b6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_rank.py @@ -0,0 +1,498 @@ +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas._libs.algos import ( + Infinity, + NegInfinity, +) +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestRank: + s = Series([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3]) + df = DataFrame({"A": s, "B": s}) + + results = { + "average": np.array([1.5, 5.5, 7.0, 3.5, np.nan, 3.5, 1.5, 8.0, np.nan, 5.5]), + "min": np.array([1, 5, 7, 3, np.nan, 3, 1, 8, np.nan, 5]), + "max": np.array([2, 6, 7, 4, np.nan, 4, 2, 8, np.nan, 6]), + "first": np.array([1, 5, 7, 3, np.nan, 4, 2, 8, np.nan, 6]), + "dense": np.array([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3]), + } + + @pytest.fixture(params=["average", "min", "max", "first", "dense"]) + def method(self, request): + """ + Fixture for trying all rank methods + """ + return request.param + + @td.skip_if_no_scipy + def test_rank(self, float_frame): + import scipy.stats # noqa:F401 + from scipy.stats import rankdata + + float_frame["A"][::2] = np.nan + float_frame["B"][::3] = np.nan + float_frame["C"][::4] = np.nan + float_frame["D"][::5] = np.nan + + ranks0 = float_frame.rank() + ranks1 = float_frame.rank(1) + mask = np.isnan(float_frame.values) + + fvals = float_frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fvals) + exp0[mask] = np.nan + + exp1 = np.apply_along_axis(rankdata, 1, fvals) + exp1[mask] = np.nan + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # integers + df = DataFrame(np.random.randint(0, 5, size=40).reshape((10, 4))) + + result = df.rank() + exp = df.astype(float).rank() + tm.assert_frame_equal(result, exp) + + result = df.rank(1) + exp = df.astype(float).rank(1) + tm.assert_frame_equal(result, exp) + + def test_rank2(self): + df = DataFrame([[1, 3, 2], [1, 2, 3]]) + expected = DataFrame([[1.0, 3.0, 2.0], [1, 2, 3]]) / 3.0 + result = df.rank(1, pct=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame([[1, 3, 2], [1, 2, 3]]) + expected = df.rank(0) / 2.0 + result = df.rank(0, pct=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame([["b", "c", "a"], ["a", "c", "b"]]) + expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]]) + result = df.rank(1, numeric_only=False) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[2.0, 1.5, 1.0], [1, 1.5, 2]]) + result = df.rank(0, numeric_only=False) + tm.assert_frame_equal(result, expected) + + df = DataFrame([["b", np.nan, "a"], ["a", "c", "b"]]) + expected = DataFrame([[2.0, np.nan, 1.0], [1.0, 3.0, 2.0]]) + result = df.rank(1, numeric_only=False) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[2.0, np.nan, 1.0], [1.0, 1.0, 2.0]]) + result = df.rank(0, numeric_only=False) + tm.assert_frame_equal(result, expected) + + # f7u12, this does not work without extensive workaround + data = [ + [datetime(2001, 1, 5), np.nan, datetime(2001, 1, 2)], + [datetime(2000, 1, 2), datetime(2000, 1, 3), datetime(2000, 1, 1)], + ] + df = DataFrame(data) + + # check the rank + expected = DataFrame([[2.0, np.nan, 1.0], [2.0, 3.0, 1.0]]) + result = df.rank(1, numeric_only=False, ascending=True) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[1.0, np.nan, 2.0], [2.0, 1.0, 3.0]]) + result = df.rank(1, numeric_only=False, ascending=False) + tm.assert_frame_equal(result, expected) + + df = DataFrame({"a": [1e-20, -5, 1e-20 + 1e-40, 10, 1e60, 1e80, 1e-30]}) + exp = DataFrame({"a": [3.5, 1.0, 3.5, 5.0, 6.0, 7.0, 2.0]}) + tm.assert_frame_equal(df.rank(), exp) + + def test_rank_does_not_mutate(self): + # GH#18521 + # Check rank does not mutate DataFrame + df = DataFrame(np.random.randn(10, 3), dtype="float64") + expected = df.copy() + df.rank() + result = df + tm.assert_frame_equal(result, expected) + + def test_rank_mixed_frame(self, float_string_frame): + float_string_frame["datetime"] = datetime.now() + float_string_frame["timedelta"] = timedelta(days=1, seconds=1) + + with tm.assert_produces_warning(FutureWarning, match="numeric_only=None"): + float_string_frame.rank(numeric_only=None) + with tm.assert_produces_warning(FutureWarning, match="Dropping of nuisance"): + result = float_string_frame.rank(1) + expected = float_string_frame.rank(1, numeric_only=True) + tm.assert_frame_equal(result, expected) + + @td.skip_if_no_scipy + def test_rank_na_option(self, float_frame): + import scipy.stats # noqa:F401 + from scipy.stats import rankdata + + float_frame["A"][::2] = np.nan + float_frame["B"][::3] = np.nan + float_frame["C"][::4] = np.nan + float_frame["D"][::5] = np.nan + + # bottom + ranks0 = float_frame.rank(na_option="bottom") + ranks1 = float_frame.rank(1, na_option="bottom") + + fvals = float_frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fvals) + exp1 = np.apply_along_axis(rankdata, 1, fvals) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # top + ranks0 = float_frame.rank(na_option="top") + ranks1 = float_frame.rank(1, na_option="top") + + fval0 = float_frame.fillna((float_frame.min() - 1).to_dict()).values + fval1 = float_frame.T + fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T + fval1 = fval1.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fval0) + exp1 = np.apply_along_axis(rankdata, 1, fval1) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # descending + + # bottom + ranks0 = float_frame.rank(na_option="top", ascending=False) + ranks1 = float_frame.rank(1, na_option="top", ascending=False) + + fvals = float_frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, -fvals) + exp1 = np.apply_along_axis(rankdata, 1, -fvals) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # descending + + # top + ranks0 = float_frame.rank(na_option="bottom", ascending=False) + ranks1 = float_frame.rank(1, na_option="bottom", ascending=False) + + fval0 = float_frame.fillna((float_frame.min() - 1).to_dict()).values + fval1 = float_frame.T + fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T + fval1 = fval1.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, -fval0) + exp1 = np.apply_along_axis(rankdata, 1, -fval1) + + tm.assert_numpy_array_equal(ranks0.values, exp0) + tm.assert_numpy_array_equal(ranks1.values, exp1) + + # bad values throw error + msg = "na_option must be one of 'keep', 'top', or 'bottom'" + + with pytest.raises(ValueError, match=msg): + float_frame.rank(na_option="bad", ascending=False) + + # invalid type + with pytest.raises(ValueError, match=msg): + float_frame.rank(na_option=True, ascending=False) + + def test_rank_axis(self): + # check if using axes' names gives the same result + df = DataFrame([[2, 1], [4, 3]]) + tm.assert_frame_equal(df.rank(axis=0), df.rank(axis="index")) + tm.assert_frame_equal(df.rank(axis=1), df.rank(axis="columns")) + + @td.skip_if_no_scipy + def test_rank_methods_frame(self): + import scipy.stats # noqa:F401 + from scipy.stats import rankdata + + xs = np.random.randint(0, 21, (100, 26)) + xs = (xs - 10.0) / 10.0 + cols = [chr(ord("z") - i) for i in range(xs.shape[1])] + + for vals in [xs, xs + 1e6, xs * 1e-6]: + df = DataFrame(vals, columns=cols) + + for ax in [0, 1]: + for m in ["average", "min", "max", "first", "dense"]: + result = df.rank(axis=ax, method=m) + sprank = np.apply_along_axis( + rankdata, ax, vals, m if m != "first" else "ordinal" + ) + sprank = sprank.astype(np.float64) + expected = DataFrame(sprank, columns=cols).astype("float64") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) + @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") + def test_rank_descending(self, method, dtype): + if "i" in dtype: + df = self.df.dropna().astype(dtype) + else: + df = self.df.astype(dtype) + + res = df.rank(ascending=False) + expected = (df.max() - df).rank() + tm.assert_frame_equal(res, expected) + + expected = (df.max() - df).rank(method=method) + + if dtype != "O": + res2 = df.rank(method=method, ascending=False, numeric_only=True) + tm.assert_frame_equal(res2, expected) + + res3 = df.rank(method=method, ascending=False, numeric_only=False) + tm.assert_frame_equal(res3, expected) + + @pytest.mark.parametrize("axis", [0, 1]) + @pytest.mark.parametrize("dtype", [None, object]) + def test_rank_2d_tie_methods(self, method, axis, dtype): + df = self.df + + def _check2d(df, expected, method="average", axis=0): + exp_df = DataFrame({"A": expected, "B": expected}) + + if axis == 1: + df = df.T + exp_df = exp_df.T + + result = df.rank(method=method, axis=axis) + tm.assert_frame_equal(result, exp_df) + + frame = df if dtype is None else df.astype(dtype) + _check2d(frame, self.results[method], method=method, axis=axis) + + @pytest.mark.parametrize( + "method,exp", + [ + ("dense", [[1.0, 1.0, 1.0], [1.0, 0.5, 2.0 / 3], [1.0, 0.5, 1.0 / 3]]), + ( + "min", + [ + [1.0 / 3, 1.0, 1.0], + [1.0 / 3, 1.0 / 3, 2.0 / 3], + [1.0 / 3, 1.0 / 3, 1.0 / 3], + ], + ), + ( + "max", + [[1.0, 1.0, 1.0], [1.0, 2.0 / 3, 2.0 / 3], [1.0, 2.0 / 3, 1.0 / 3]], + ), + ( + "average", + [[2.0 / 3, 1.0, 1.0], [2.0 / 3, 0.5, 2.0 / 3], [2.0 / 3, 0.5, 1.0 / 3]], + ), + ( + "first", + [ + [1.0 / 3, 1.0, 1.0], + [2.0 / 3, 1.0 / 3, 2.0 / 3], + [3.0 / 3, 2.0 / 3, 1.0 / 3], + ], + ), + ], + ) + def test_rank_pct_true(self, method, exp): + # see gh-15630. + + df = DataFrame([[2012, 66, 3], [2012, 65, 2], [2012, 65, 1]]) + result = df.rank(method=method, pct=True) + + expected = DataFrame(exp) + tm.assert_frame_equal(result, expected) + + @pytest.mark.single + @pytest.mark.high_memory + def test_pct_max_many_rows(self): + # GH 18271 + df = DataFrame( + {"A": np.arange(2 ** 24 + 1), "B": np.arange(2 ** 24 + 1, 0, -1)} + ) + result = df.rank(pct=True).max() + assert (result == 1).all() + + @pytest.mark.parametrize( + "contents,dtype", + [ + ( + [ + -np.inf, + -50, + -1, + -1e-20, + -1e-25, + -1e-50, + 0, + 1e-40, + 1e-20, + 1e-10, + 2, + 40, + np.inf, + ], + "float64", + ), + ( + [ + -np.inf, + -50, + -1, + -1e-20, + -1e-25, + -1e-45, + 0, + 1e-40, + 1e-20, + 1e-10, + 2, + 40, + np.inf, + ], + "float32", + ), + ([np.iinfo(np.uint8).min, 1, 2, 100, np.iinfo(np.uint8).max], "uint8"), + ( + [ + np.iinfo(np.int64).min, + -100, + 0, + 1, + 9999, + 100000, + 1e10, + np.iinfo(np.int64).max, + ], + "int64", + ), + ([NegInfinity(), "1", "A", "BA", "Ba", "C", Infinity()], "object"), + ( + [datetime(2001, 1, 1), datetime(2001, 1, 2), datetime(2001, 1, 5)], + "datetime64", + ), + ], + ) + def test_rank_inf_and_nan(self, contents, dtype, frame_or_series): + dtype_na_map = { + "float64": np.nan, + "float32": np.nan, + "object": None, + "datetime64": np.datetime64("nat"), + } + # Insert nans at random positions if underlying dtype has missing + # value. Then adjust the expected order by adding nans accordingly + # This is for testing whether rank calculation is affected + # when values are interwined with nan values. + values = np.array(contents, dtype=dtype) + exp_order = np.array(range(len(values)), dtype="float64") + 1.0 + if dtype in dtype_na_map: + na_value = dtype_na_map[dtype] + nan_indices = np.random.choice(range(len(values)), 5) + values = np.insert(values, nan_indices, na_value) + exp_order = np.insert(exp_order, nan_indices, np.nan) + + # Shuffle the testing array and expected results in the same way + random_order = np.random.permutation(len(values)) + obj = frame_or_series(values[random_order]) + expected = frame_or_series(exp_order[random_order], dtype="float64") + result = obj.rank() + tm.assert_equal(result, expected) + + def test_df_series_inf_nan_consistency(self): + # GH#32593 + index = [5, 4, 3, 2, 1, 6, 7, 8, 9, 10] + col1 = [5, 4, 3, 5, 8, 5, 2, 1, 6, 6] + col2 = [5, 4, np.nan, 5, 8, 5, np.inf, np.nan, 6, -np.inf] + df = DataFrame( + data={ + "col1": col1, + "col2": col2, + }, + index=index, + dtype="f8", + ) + df_result = df.rank() + + series_result = df.copy() + series_result["col1"] = df["col1"].rank() + series_result["col2"] = df["col2"].rank() + + tm.assert_frame_equal(df_result, series_result) + + def test_rank_both_inf(self): + # GH#32593 + df = DataFrame({"a": [-np.inf, 0, np.inf]}) + expected = DataFrame({"a": [1.0, 2.0, 3.0]}) + result = df.rank() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "na_option,ascending,expected", + [ + ("top", True, [3.0, 1.0, 2.0]), + ("top", False, [2.0, 1.0, 3.0]), + ("bottom", True, [2.0, 3.0, 1.0]), + ("bottom", False, [1.0, 3.0, 2.0]), + ], + ) + def test_rank_inf_nans_na_option( + self, frame_or_series, method, na_option, ascending, expected + ): + obj = frame_or_series([np.inf, np.nan, -np.inf]) + result = obj.rank(method=method, na_option=na_option, ascending=ascending) + expected = frame_or_series(expected) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "na_option,ascending,expected", + [ + ("bottom", True, [1.0, 2.0, 4.0, 3.0]), + ("bottom", False, [1.0, 2.0, 4.0, 3.0]), + ("top", True, [2.0, 3.0, 1.0, 4.0]), + ("top", False, [2.0, 3.0, 1.0, 4.0]), + ], + ) + def test_rank_object_first(self, frame_or_series, na_option, ascending, expected): + obj = frame_or_series(["foo", "foo", None, "foo"]) + result = obj.rank(method="first", na_option=na_option, ascending=ascending) + expected = frame_or_series(expected) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "data,expected", + [ + ({"a": [1, 2, "a"], "b": [4, 5, 6]}, DataFrame({"b": [1.0, 2.0, 3.0]})), + ({"a": [1, 2, "a"]}, DataFrame(index=range(3))), + ], + ) + def test_rank_mixed_axis_zero(self, data, expected): + df = DataFrame(data) + msg = "Dropping of nuisance columns" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.rank() + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_reindex.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_reindex.py new file mode 100644 index 0000000..8575e78 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_reindex.py @@ -0,0 +1,1225 @@ +from datetime import ( + datetime, + timedelta, +) +import inspect + +import numpy as np +import pytest + +from pandas._libs.tslibs.timezones import dateutil_gettz as gettz +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + Series, + date_range, + isna, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype as CDT +import pandas.core.common as com + + +class TestReindexSetIndex: + # Tests that check both reindex and set_index + + def test_dti_set_index_reindex_datetimeindex(self): + # GH#6631 + df = DataFrame(np.random.random(6)) + idx1 = date_range("2011/01/01", periods=6, freq="M", tz="US/Eastern") + idx2 = date_range("2013", periods=6, freq="A", tz="Asia/Tokyo") + + df = df.set_index(idx1) + tm.assert_index_equal(df.index, idx1) + df = df.reindex(idx2) + tm.assert_index_equal(df.index, idx2) + + def test_dti_set_index_reindex_freq_with_tz(self): + # GH#11314 with tz + index = date_range( + datetime(2015, 10, 1), datetime(2015, 10, 1, 23), freq="H", tz="US/Eastern" + ) + df = DataFrame(np.random.randn(24, 1), columns=["a"], index=index) + new_index = date_range( + datetime(2015, 10, 2), datetime(2015, 10, 2, 23), freq="H", tz="US/Eastern" + ) + + result = df.set_index(new_index) + assert result.index.freq == index.freq + + def test_set_reset_index_intervalindex(self): + + df = DataFrame({"A": range(10)}) + ser = pd.cut(df.A, 5) + df["B"] = ser + df = df.set_index("B") + + df = df.reset_index() + + def test_setitem_reset_index_dtypes(self): + # GH 22060 + df = DataFrame(columns=["a", "b", "c"]).astype( + {"a": "datetime64[ns]", "b": np.int64, "c": np.float64} + ) + df1 = df.set_index(["a"]) + df1["d"] = [] + result = df1.reset_index() + expected = DataFrame(columns=["a", "b", "c", "d"], index=range(0)).astype( + {"a": "datetime64[ns]", "b": np.int64, "c": np.float64, "d": np.float64} + ) + tm.assert_frame_equal(result, expected) + + df2 = df.set_index(["a", "b"]) + df2["d"] = [] + result = df2.reset_index() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "timezone, year, month, day, hour", + [["America/Chicago", 2013, 11, 3, 1], ["America/Santiago", 2021, 4, 3, 23]], + ) + def test_reindex_timestamp_with_fold(self, timezone, year, month, day, hour): + # see gh-40817 + test_timezone = gettz(timezone) + transition_1 = pd.Timestamp( + year=year, + month=month, + day=day, + hour=hour, + minute=0, + fold=0, + tzinfo=test_timezone, + ) + transition_2 = pd.Timestamp( + year=year, + month=month, + day=day, + hour=hour, + minute=0, + fold=1, + tzinfo=test_timezone, + ) + df = ( + DataFrame({"index": [transition_1, transition_2], "vals": ["a", "b"]}) + .set_index("index") + .reindex(["1", "2"]) + ) + tm.assert_frame_equal( + df, + DataFrame({"index": ["1", "2"], "vals": [None, None]}).set_index("index"), + ) + + +class TestDataFrameSelectReindex: + # These are specific reindex-based tests; other indexing tests should go in + # test_indexing + + def test_reindex_copies(self): + # based on asv time_reindex_axis1 + N = 10 + df = DataFrame(np.random.randn(N * 10, N)) + cols = np.arange(N) + np.random.shuffle(cols) + + result = df.reindex(columns=cols, copy=True) + assert not np.shares_memory(result[0]._values, df[0]._values) + + # pass both columns and index + result2 = df.reindex(columns=cols, index=df.index, copy=True) + assert not np.shares_memory(result2[0]._values, df[0]._values) + + @td.skip_array_manager_not_yet_implemented + def test_reindex_date_fill_value(self): + # passing date to dt64 is deprecated + arr = date_range("2016-01-01", periods=6).values.reshape(3, 2) + df = DataFrame(arr, columns=["A", "B"], index=range(3)) + + ts = df.iloc[0, 0] + fv = ts.date() + + with tm.assert_produces_warning(FutureWarning): + res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv) + + expected = DataFrame( + {"A": df["A"].tolist() + [ts], "B": df["B"].tolist() + [ts], "C": [ts] * 4} + ) + tm.assert_frame_equal(res, expected) + + # only reindexing rows + with tm.assert_produces_warning(FutureWarning): + res = df.reindex(index=range(4), fill_value=fv) + tm.assert_frame_equal(res, expected[["A", "B"]]) + + # same with a datetime-castable str + res = df.reindex( + index=range(4), columns=["A", "B", "C"], fill_value="2016-01-01" + ) + tm.assert_frame_equal(res, expected) + + def test_reindex_with_multi_index(self): + # https://github.com/pandas-dev/pandas/issues/29896 + # tests for reindexing a multi-indexed DataFrame with a new MultiIndex + # + # confirms that we can reindex a multi-indexed DataFrame with a new + # MultiIndex object correctly when using no filling, backfilling, and + # padding + # + # The DataFrame, `df`, used in this test is: + # c + # a b + # -1 0 A + # 1 B + # 2 C + # 3 D + # 4 E + # 5 F + # 6 G + # 0 0 A + # 1 B + # 2 C + # 3 D + # 4 E + # 5 F + # 6 G + # 1 0 A + # 1 B + # 2 C + # 3 D + # 4 E + # 5 F + # 6 G + # + # and the other MultiIndex, `new_multi_index`, is: + # 0: 0 0.5 + # 1: 2.0 + # 2: 5.0 + # 3: 5.8 + df = DataFrame( + { + "a": [-1] * 7 + [0] * 7 + [1] * 7, + "b": list(range(7)) * 3, + "c": ["A", "B", "C", "D", "E", "F", "G"] * 3, + } + ).set_index(["a", "b"]) + new_index = [0.5, 2.0, 5.0, 5.8] + new_multi_index = MultiIndex.from_product([[0], new_index], names=["a", "b"]) + + # reindexing w/o a `method` value + reindexed = df.reindex(new_multi_index) + expected = DataFrame( + {"a": [0] * 4, "b": new_index, "c": [np.nan, "C", "F", np.nan]} + ).set_index(["a", "b"]) + tm.assert_frame_equal(expected, reindexed) + + # reindexing with backfilling + expected = DataFrame( + {"a": [0] * 4, "b": new_index, "c": ["B", "C", "F", "G"]} + ).set_index(["a", "b"]) + reindexed_with_backfilling = df.reindex(new_multi_index, method="bfill") + tm.assert_frame_equal(expected, reindexed_with_backfilling) + + reindexed_with_backfilling = df.reindex(new_multi_index, method="backfill") + tm.assert_frame_equal(expected, reindexed_with_backfilling) + + # reindexing with padding + expected = DataFrame( + {"a": [0] * 4, "b": new_index, "c": ["A", "C", "F", "F"]} + ).set_index(["a", "b"]) + reindexed_with_padding = df.reindex(new_multi_index, method="pad") + tm.assert_frame_equal(expected, reindexed_with_padding) + + reindexed_with_padding = df.reindex(new_multi_index, method="ffill") + tm.assert_frame_equal(expected, reindexed_with_padding) + + @pytest.mark.parametrize( + "method,expected_values", + [ + ("nearest", [0, 1, 1, 2]), + ("pad", [np.nan, 0, 1, 1]), + ("backfill", [0, 1, 2, 2]), + ], + ) + def test_reindex_methods(self, method, expected_values): + df = DataFrame({"x": list(range(5))}) + target = np.array([-0.1, 0.9, 1.1, 1.5]) + + expected = DataFrame({"x": expected_values}, index=target) + actual = df.reindex(target, method=method) + tm.assert_frame_equal(expected, actual) + + actual = df.reindex(target, method=method, tolerance=1) + tm.assert_frame_equal(expected, actual) + actual = df.reindex(target, method=method, tolerance=[1, 1, 1, 1]) + tm.assert_frame_equal(expected, actual) + + e2 = expected[::-1] + actual = df.reindex(target[::-1], method=method) + tm.assert_frame_equal(e2, actual) + + new_order = [3, 0, 2, 1] + e2 = expected.iloc[new_order] + actual = df.reindex(target[new_order], method=method) + tm.assert_frame_equal(e2, actual) + + switched_method = ( + "pad" if method == "backfill" else "backfill" if method == "pad" else method + ) + actual = df[::-1].reindex(target, method=switched_method) + tm.assert_frame_equal(expected, actual) + + def test_reindex_methods_nearest_special(self): + df = DataFrame({"x": list(range(5))}) + target = np.array([-0.1, 0.9, 1.1, 1.5]) + + expected = DataFrame({"x": [0, 1, 1, np.nan]}, index=target) + actual = df.reindex(target, method="nearest", tolerance=0.2) + tm.assert_frame_equal(expected, actual) + + expected = DataFrame({"x": [0, np.nan, 1, np.nan]}, index=target) + actual = df.reindex(target, method="nearest", tolerance=[0.5, 0.01, 0.4, 0.1]) + tm.assert_frame_equal(expected, actual) + + def test_reindex_nearest_tz(self, tz_aware_fixture): + # GH26683 + tz = tz_aware_fixture + idx = date_range("2019-01-01", periods=5, tz=tz) + df = DataFrame({"x": list(range(5))}, index=idx) + + expected = df.head(3) + actual = df.reindex(idx[:3], method="nearest") + tm.assert_frame_equal(expected, actual) + + def test_reindex_nearest_tz_empty_frame(self): + # https://github.com/pandas-dev/pandas/issues/31964 + dti = pd.DatetimeIndex(["2016-06-26 14:27:26+00:00"]) + df = DataFrame(index=pd.DatetimeIndex(["2016-07-04 14:00:59+00:00"])) + expected = DataFrame(index=dti) + result = df.reindex(dti, method="nearest") + tm.assert_frame_equal(result, expected) + + def test_reindex_frame_add_nat(self): + rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s") + df = DataFrame({"A": np.random.randn(len(rng)), "B": rng}) + + result = df.reindex(range(15)) + assert np.issubdtype(result["B"].dtype, np.dtype("M8[ns]")) + + mask = com.isna(result)["B"] + assert mask[-5:].all() + assert not mask[:-5].any() + + @pytest.mark.parametrize( + "method, exp_values", + [("ffill", [0, 1, 2, 3]), ("bfill", [1.0, 2.0, 3.0, np.nan])], + ) + def test_reindex_frame_tz_ffill_bfill(self, frame_or_series, method, exp_values): + # GH#38566 + obj = frame_or_series( + [0, 1, 2, 3], + index=date_range("2020-01-01 00:00:00", periods=4, freq="H", tz="UTC"), + ) + new_index = date_range("2020-01-01 00:01:00", periods=4, freq="H", tz="UTC") + result = obj.reindex(new_index, method=method, tolerance=pd.Timedelta("1 hour")) + expected = frame_or_series(exp_values, index=new_index) + tm.assert_equal(result, expected) + + def test_reindex_limit(self): + # GH 28631 + data = [["A", "A", "A"], ["B", "B", "B"], ["C", "C", "C"], ["D", "D", "D"]] + exp_data = [ + ["A", "A", "A"], + ["B", "B", "B"], + ["C", "C", "C"], + ["D", "D", "D"], + ["D", "D", "D"], + [np.nan, np.nan, np.nan], + ] + df = DataFrame(data) + result = df.reindex([0, 1, 2, 3, 4, 5], method="ffill", limit=1) + expected = DataFrame(exp_data) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "idx, check_index_type", + [ + [["C", "B", "A"], True], + [["F", "C", "A", "D"], True], + [["A"], True], + [["A", "B", "C"], True], + [["C", "A", "B"], True], + [["C", "B"], True], + [["C", "A"], True], + [["A", "B"], True], + [["B", "A", "C"], True], + # reindex by these causes different MultiIndex levels + [["D", "F"], False], + [["A", "C", "B"], False], + ], + ) + def test_reindex_level_verify_first_level(self, idx, check_index_type): + df = DataFrame( + { + "jim": list("B" * 4 + "A" * 2 + "C" * 3), + "joe": list("abcdeabcd")[::-1], + "jolie": [10, 20, 30] * 3, + "joline": np.random.randint(0, 1000, 9), + } + ) + icol = ["jim", "joe", "jolie"] + + def f(val): + return np.nonzero((df["jim"] == val).to_numpy())[0] + + i = np.concatenate(list(map(f, idx))) + left = df.set_index(icol).reindex(idx, level="jim") + right = df.iloc[i].set_index(icol) + tm.assert_frame_equal(left, right, check_index_type=check_index_type) + + @pytest.mark.parametrize( + "idx", + [ + ("mid",), + ("mid", "btm"), + ("mid", "btm", "top"), + ("mid",), + ("mid", "top"), + ("mid", "top", "btm"), + ("btm",), + ("btm", "mid"), + ("btm", "mid", "top"), + ("btm",), + ("btm", "top"), + ("btm", "top", "mid"), + ("top",), + ("top", "mid"), + ("top", "mid", "btm"), + ("top",), + ("top", "btm"), + ("top", "btm", "mid"), + ], + ) + def test_reindex_level_verify_first_level_repeats(self, idx): + df = DataFrame( + { + "jim": ["mid"] * 5 + ["btm"] * 8 + ["top"] * 7, + "joe": ["3rd"] * 2 + + ["1st"] * 3 + + ["2nd"] * 3 + + ["1st"] * 2 + + ["3rd"] * 3 + + ["1st"] * 2 + + ["3rd"] * 3 + + ["2nd"] * 2, + # this needs to be jointly unique with jim and joe or + # reindexing will fail ~1.5% of the time, this works + # out to needing unique groups of same size as joe + "jolie": np.concatenate( + [ + np.random.choice(1000, x, replace=False) + for x in [2, 3, 3, 2, 3, 2, 3, 2] + ] + ), + "joline": np.random.randn(20).round(3) * 10, + } + ) + icol = ["jim", "joe", "jolie"] + + def f(val): + return np.nonzero((df["jim"] == val).to_numpy())[0] + + i = np.concatenate(list(map(f, idx))) + left = df.set_index(icol).reindex(idx, level="jim") + right = df.iloc[i].set_index(icol) + tm.assert_frame_equal(left, right) + + @pytest.mark.parametrize( + "idx, indexer", + [ + [ + ["1st", "2nd", "3rd"], + [2, 3, 4, 0, 1, 8, 9, 5, 6, 7, 10, 11, 12, 13, 14, 18, 19, 15, 16, 17], + ], + [ + ["3rd", "2nd", "1st"], + [0, 1, 2, 3, 4, 10, 11, 12, 5, 6, 7, 8, 9, 15, 16, 17, 18, 19, 13, 14], + ], + [["2nd", "3rd"], [0, 1, 5, 6, 7, 10, 11, 12, 18, 19, 15, 16, 17]], + [["3rd", "1st"], [0, 1, 2, 3, 4, 10, 11, 12, 8, 9, 15, 16, 17, 13, 14]], + ], + ) + def test_reindex_level_verify_repeats(self, idx, indexer): + df = DataFrame( + { + "jim": ["mid"] * 5 + ["btm"] * 8 + ["top"] * 7, + "joe": ["3rd"] * 2 + + ["1st"] * 3 + + ["2nd"] * 3 + + ["1st"] * 2 + + ["3rd"] * 3 + + ["1st"] * 2 + + ["3rd"] * 3 + + ["2nd"] * 2, + # this needs to be jointly unique with jim and joe or + # reindexing will fail ~1.5% of the time, this works + # out to needing unique groups of same size as joe + "jolie": np.concatenate( + [ + np.random.choice(1000, x, replace=False) + for x in [2, 3, 3, 2, 3, 2, 3, 2] + ] + ), + "joline": np.random.randn(20).round(3) * 10, + } + ) + icol = ["jim", "joe", "jolie"] + left = df.set_index(icol).reindex(idx, level="joe") + right = df.iloc[indexer].set_index(icol) + tm.assert_frame_equal(left, right) + + @pytest.mark.parametrize( + "idx, indexer, check_index_type", + [ + [list("abcde"), [3, 2, 1, 0, 5, 4, 8, 7, 6], True], + [list("abcd"), [3, 2, 1, 0, 5, 8, 7, 6], True], + [list("abc"), [3, 2, 1, 8, 7, 6], True], + [list("eca"), [1, 3, 4, 6, 8], True], + [list("edc"), [0, 1, 4, 5, 6], True], + [list("eadbc"), [3, 0, 2, 1, 4, 5, 8, 7, 6], True], + [list("edwq"), [0, 4, 5], True], + [list("wq"), [], False], + ], + ) + def test_reindex_level_verify(self, idx, indexer, check_index_type): + df = DataFrame( + { + "jim": list("B" * 4 + "A" * 2 + "C" * 3), + "joe": list("abcdeabcd")[::-1], + "jolie": [10, 20, 30] * 3, + "joline": np.random.randint(0, 1000, 9), + } + ) + icol = ["jim", "joe", "jolie"] + left = df.set_index(icol).reindex(idx, level="joe") + right = df.iloc[indexer].set_index(icol) + tm.assert_frame_equal(left, right, check_index_type=check_index_type) + + def test_non_monotonic_reindex_methods(self): + dr = date_range("2013-08-01", periods=6, freq="B") + data = np.random.randn(6, 1) + df = DataFrame(data, index=dr, columns=list("A")) + df_rev = DataFrame(data, index=dr[[3, 4, 5] + [0, 1, 2]], columns=list("A")) + # index is not monotonic increasing or decreasing + msg = "index must be monotonic increasing or decreasing" + with pytest.raises(ValueError, match=msg): + df_rev.reindex(df.index, method="pad") + with pytest.raises(ValueError, match=msg): + df_rev.reindex(df.index, method="ffill") + with pytest.raises(ValueError, match=msg): + df_rev.reindex(df.index, method="bfill") + with pytest.raises(ValueError, match=msg): + df_rev.reindex(df.index, method="nearest") + + def test_reindex_sparse(self): + # https://github.com/pandas-dev/pandas/issues/35286 + df = DataFrame( + {"A": [0, 1], "B": pd.array([0, 1], dtype=pd.SparseDtype("int64", 0))} + ) + result = df.reindex([0, 2]) + expected = DataFrame( + { + "A": [0.0, np.nan], + "B": pd.array([0.0, np.nan], dtype=pd.SparseDtype("float64", 0.0)), + }, + index=[0, 2], + ) + tm.assert_frame_equal(result, expected) + + def test_reindex(self, float_frame): + datetime_series = tm.makeTimeSeries(nper=30) + + newFrame = float_frame.reindex(datetime_series.index) + + for col in newFrame.columns: + for idx, val in newFrame[col].items(): + if idx in float_frame.index: + if np.isnan(val): + assert np.isnan(float_frame[col][idx]) + else: + assert val == float_frame[col][idx] + else: + assert np.isnan(val) + + for col, series in newFrame.items(): + assert tm.equalContents(series.index, newFrame.index) + emptyFrame = float_frame.reindex(Index([])) + assert len(emptyFrame.index) == 0 + + # Cython code should be unit-tested directly + nonContigFrame = float_frame.reindex(datetime_series.index[::2]) + + for col in nonContigFrame.columns: + for idx, val in nonContigFrame[col].items(): + if idx in float_frame.index: + if np.isnan(val): + assert np.isnan(float_frame[col][idx]) + else: + assert val == float_frame[col][idx] + else: + assert np.isnan(val) + + for col, series in nonContigFrame.items(): + assert tm.equalContents(series.index, nonContigFrame.index) + + # corner cases + + # Same index, copies values but not index if copy=False + newFrame = float_frame.reindex(float_frame.index, copy=False) + assert newFrame.index is float_frame.index + + # length zero + newFrame = float_frame.reindex([]) + assert newFrame.empty + assert len(newFrame.columns) == len(float_frame.columns) + + # length zero with columns reindexed with non-empty index + newFrame = float_frame.reindex([]) + newFrame = newFrame.reindex(float_frame.index) + assert len(newFrame.index) == len(float_frame.index) + assert len(newFrame.columns) == len(float_frame.columns) + + # pass non-Index + newFrame = float_frame.reindex(list(datetime_series.index)) + expected = datetime_series.index._with_freq(None) + tm.assert_index_equal(newFrame.index, expected) + + # copy with no axes + result = float_frame.reindex() + tm.assert_frame_equal(result, float_frame) + assert result is not float_frame + + def test_reindex_nan(self): + df = DataFrame( + [[1, 2], [3, 5], [7, 11], [9, 23]], + index=[2, np.nan, 1, 5], + columns=["joe", "jim"], + ) + + i, j = [np.nan, 5, 5, np.nan, 1, 2, np.nan], [1, 3, 3, 1, 2, 0, 1] + tm.assert_frame_equal(df.reindex(i), df.iloc[j]) + + df.index = df.index.astype("object") + tm.assert_frame_equal(df.reindex(i), df.iloc[j], check_index_type=False) + + # GH10388 + df = DataFrame( + { + "other": ["a", "b", np.nan, "c"], + "date": ["2015-03-22", np.nan, "2012-01-08", np.nan], + "amount": [2, 3, 4, 5], + } + ) + + df["date"] = pd.to_datetime(df.date) + df["delta"] = (pd.to_datetime("2015-06-18") - df["date"]).shift(1) + + left = df.set_index(["delta", "other", "date"]).reset_index() + right = df.reindex(columns=["delta", "other", "date", "amount"]) + tm.assert_frame_equal(left, right) + + def test_reindex_name_remains(self): + s = Series(np.random.rand(10)) + df = DataFrame(s, index=np.arange(len(s))) + i = Series(np.arange(10), name="iname") + + df = df.reindex(i) + assert df.index.name == "iname" + + df = df.reindex(Index(np.arange(10), name="tmpname")) + assert df.index.name == "tmpname" + + s = Series(np.random.rand(10)) + df = DataFrame(s.T, index=np.arange(len(s))) + i = Series(np.arange(10), name="iname") + df = df.reindex(columns=i) + assert df.columns.name == "iname" + + def test_reindex_int(self, int_frame): + smaller = int_frame.reindex(int_frame.index[::2]) + + assert smaller["A"].dtype == np.int64 + + bigger = smaller.reindex(int_frame.index) + assert bigger["A"].dtype == np.float64 + + smaller = int_frame.reindex(columns=["A", "B"]) + assert smaller["A"].dtype == np.int64 + + def test_reindex_columns(self, float_frame): + new_frame = float_frame.reindex(columns=["A", "B", "E"]) + + tm.assert_series_equal(new_frame["B"], float_frame["B"]) + assert np.isnan(new_frame["E"]).all() + assert "C" not in new_frame + + # Length zero + new_frame = float_frame.reindex(columns=[]) + assert new_frame.empty + + def test_reindex_columns_method(self): + + # GH 14992, reindexing over columns ignored method + df = DataFrame( + data=[[11, 12, 13], [21, 22, 23], [31, 32, 33]], + index=[1, 2, 4], + columns=[1, 2, 4], + dtype=float, + ) + + # default method + result = df.reindex(columns=range(6)) + expected = DataFrame( + data=[ + [np.nan, 11, 12, np.nan, 13, np.nan], + [np.nan, 21, 22, np.nan, 23, np.nan], + [np.nan, 31, 32, np.nan, 33, np.nan], + ], + index=[1, 2, 4], + columns=range(6), + dtype=float, + ) + tm.assert_frame_equal(result, expected) + + # method='ffill' + result = df.reindex(columns=range(6), method="ffill") + expected = DataFrame( + data=[ + [np.nan, 11, 12, 12, 13, 13], + [np.nan, 21, 22, 22, 23, 23], + [np.nan, 31, 32, 32, 33, 33], + ], + index=[1, 2, 4], + columns=range(6), + dtype=float, + ) + tm.assert_frame_equal(result, expected) + + # method='bfill' + result = df.reindex(columns=range(6), method="bfill") + expected = DataFrame( + data=[ + [11, 11, 12, 13, 13, np.nan], + [21, 21, 22, 23, 23, np.nan], + [31, 31, 32, 33, 33, np.nan], + ], + index=[1, 2, 4], + columns=range(6), + dtype=float, + ) + tm.assert_frame_equal(result, expected) + + def test_reindex_axes(self): + # GH 3317, reindexing by both axes loses freq of the index + df = DataFrame( + np.ones((3, 3)), + index=[datetime(2012, 1, 1), datetime(2012, 1, 2), datetime(2012, 1, 3)], + columns=["a", "b", "c"], + ) + time_freq = date_range("2012-01-01", "2012-01-03", freq="d") + some_cols = ["a", "b"] + + index_freq = df.reindex(index=time_freq).index.freq + both_freq = df.reindex(index=time_freq, columns=some_cols).index.freq + seq_freq = df.reindex(index=time_freq).reindex(columns=some_cols).index.freq + assert index_freq == both_freq + assert index_freq == seq_freq + + def test_reindex_fill_value(self): + df = DataFrame(np.random.randn(10, 4)) + + # axis=0 + result = df.reindex(list(range(15))) + assert np.isnan(result.values[-5:]).all() + + result = df.reindex(range(15), fill_value=0) + expected = df.reindex(range(15)).fillna(0) + tm.assert_frame_equal(result, expected) + + # axis=1 + result = df.reindex(columns=range(5), fill_value=0.0) + expected = df.copy() + expected[4] = 0.0 + tm.assert_frame_equal(result, expected) + + result = df.reindex(columns=range(5), fill_value=0) + expected = df.copy() + expected[4] = 0 + tm.assert_frame_equal(result, expected) + + result = df.reindex(columns=range(5), fill_value="foo") + expected = df.copy() + expected[4] = "foo" + tm.assert_frame_equal(result, expected) + + # other dtypes + df["foo"] = "foo" + result = df.reindex(range(15), fill_value=0) + expected = df.reindex(range(15)).fillna(0) + tm.assert_frame_equal(result, expected) + + def test_reindex_dups(self): + + # GH4746, reindex on duplicate index error messages + arr = np.random.randn(10) + df = DataFrame(arr, index=[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]) + + # set index is ok + result = df.copy() + result.index = list(range(len(df))) + expected = DataFrame(arr, index=list(range(len(df)))) + tm.assert_frame_equal(result, expected) + + # reindex fails + msg = "cannot reindex on an axis with duplicate labels" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df.reindex(index=list(range(len(df)))) + + def test_reindex_with_duplicate_columns(self): + + # reindex is invalid! + df = DataFrame( + [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] + ) + msg = "cannot reindex on an axis with duplicate labels" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df.reindex(columns=["bar"]) + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df.reindex(columns=["bar", "foo"]) + + def test_reindex_axis_style(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + expected = DataFrame( + {"A": [1, 2, np.nan], "B": [4, 5, np.nan]}, index=[0, 1, 3] + ) + result = df.reindex([0, 1, 3]) + tm.assert_frame_equal(result, expected) + + result = df.reindex([0, 1, 3], axis=0) + tm.assert_frame_equal(result, expected) + + result = df.reindex([0, 1, 3], axis="index") + tm.assert_frame_equal(result, expected) + + def test_reindex_positional_warns(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + expected = DataFrame({"A": [1.0, 2], "B": [4.0, 5], "C": [np.nan, np.nan]}) + with tm.assert_produces_warning(FutureWarning): + result = df.reindex([0, 1], ["A", "B", "C"]) + + tm.assert_frame_equal(result, expected) + + def test_reindex_axis_style_raises(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex([0, 1], ["A"], axis=1) + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex([0, 1], ["A"], axis="index") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis="index") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis="columns") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(columns=[0, 1], axis="columns") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], columns=[0, 1], axis="columns") + + with pytest.raises(TypeError, match="Cannot specify all"): + df.reindex([0, 1], [0], ["A"]) + + # Mixing styles + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis="index") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis="columns") + + # Duplicates + with pytest.raises(TypeError, match="multiple values"): + df.reindex([0, 1], labels=[0, 1]) + + def test_reindex_single_named_indexer(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}) + result = df.reindex([0, 1], columns=["A"]) + expected = DataFrame({"A": [1, 2]}) + tm.assert_frame_equal(result, expected) + + def test_reindex_api_equivalence(self): + # https://github.com/pandas-dev/pandas/issues/12392 + # equivalence of the labels/axis and index/columns API's + df = DataFrame( + [[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=["a", "b", "c"], + columns=["d", "e", "f"], + ) + + res1 = df.reindex(["b", "a"]) + res2 = df.reindex(index=["b", "a"]) + res3 = df.reindex(labels=["b", "a"]) + res4 = df.reindex(labels=["b", "a"], axis=0) + res5 = df.reindex(["b", "a"], axis=0) + for res in [res2, res3, res4, res5]: + tm.assert_frame_equal(res1, res) + + res1 = df.reindex(columns=["e", "d"]) + res2 = df.reindex(["e", "d"], axis=1) + res3 = df.reindex(labels=["e", "d"], axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + + with tm.assert_produces_warning(FutureWarning) as m: + res1 = df.reindex(["b", "a"], ["e", "d"]) + assert "reindex" in str(m[0].message) + res2 = df.reindex(columns=["e", "d"], index=["b", "a"]) + res3 = df.reindex(labels=["b", "a"], axis=0).reindex(labels=["e", "d"], axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + + def test_reindex_boolean(self): + frame = DataFrame( + np.ones((10, 2), dtype=bool), index=np.arange(0, 20, 2), columns=[0, 2] + ) + + reindexed = frame.reindex(np.arange(10)) + assert reindexed.values.dtype == np.object_ + assert isna(reindexed[0][1]) + + reindexed = frame.reindex(columns=range(3)) + assert reindexed.values.dtype == np.object_ + assert isna(reindexed[1]).all() + + def test_reindex_objects(self, float_string_frame): + reindexed = float_string_frame.reindex(columns=["foo", "A", "B"]) + assert "foo" in reindexed + + reindexed = float_string_frame.reindex(columns=["A", "B"]) + assert "foo" not in reindexed + + def test_reindex_corner(self, int_frame): + index = Index(["a", "b", "c"]) + dm = DataFrame({}).reindex(index=[1, 2, 3]) + reindexed = dm.reindex(columns=index) + tm.assert_index_equal(reindexed.columns, index) + + # ints are weird + smaller = int_frame.reindex(columns=["A", "B", "E"]) + assert smaller["E"].dtype == np.float64 + + def test_reindex_with_nans(self): + df = DataFrame( + [[1, 2], [3, 4], [np.nan, np.nan], [7, 8], [9, 10]], + columns=["a", "b"], + index=[100.0, 101.0, np.nan, 102.0, 103.0], + ) + + result = df.reindex(index=[101.0, 102.0, 103.0]) + expected = df.iloc[[1, 3, 4]] + tm.assert_frame_equal(result, expected) + + result = df.reindex(index=[103.0]) + expected = df.iloc[[4]] + tm.assert_frame_equal(result, expected) + + result = df.reindex(index=[101.0]) + expected = df.iloc[[1]] + tm.assert_frame_equal(result, expected) + + def test_reindex_multi(self): + df = DataFrame(np.random.randn(3, 3)) + + result = df.reindex(index=range(4), columns=range(4)) + expected = df.reindex(list(range(4))).reindex(columns=range(4)) + + tm.assert_frame_equal(result, expected) + + df = DataFrame(np.random.randint(0, 10, (3, 3))) + + result = df.reindex(index=range(4), columns=range(4)) + expected = df.reindex(list(range(4))).reindex(columns=range(4)) + + tm.assert_frame_equal(result, expected) + + df = DataFrame(np.random.randint(0, 10, (3, 3))) + + result = df.reindex(index=range(2), columns=range(2)) + expected = df.reindex(range(2)).reindex(columns=range(2)) + + tm.assert_frame_equal(result, expected) + + df = DataFrame(np.random.randn(5, 3) + 1j, columns=["a", "b", "c"]) + + result = df.reindex(index=[0, 1], columns=["a", "b"]) + expected = df.reindex([0, 1]).reindex(columns=["a", "b"]) + + tm.assert_frame_equal(result, expected) + + def test_reindex_multi_categorical_time(self): + # https://github.com/pandas-dev/pandas/issues/21390 + midx = MultiIndex.from_product( + [ + Categorical(["a", "b", "c"]), + Categorical(date_range("2012-01-01", periods=3, freq="H")), + ] + ) + df = DataFrame({"a": range(len(midx))}, index=midx) + df2 = df.iloc[[0, 1, 2, 3, 4, 5, 6, 8]] + + result = df2.reindex(midx) + expected = DataFrame({"a": [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx) + tm.assert_frame_equal(result, expected) + + def test_reindex_with_categoricalindex(self): + df = DataFrame( + { + "A": np.arange(3, dtype="int64"), + }, + index=CategoricalIndex(list("abc"), dtype=CDT(list("cabe")), name="B"), + ) + + # reindexing + # convert to a regular index + result = df.reindex(["a", "b", "e"]) + expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index( + "B" + ) + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["a", "b"]) + expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["e"]) + expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["d"]) + expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + # since we are actually reindexing with a Categorical + # then return a Categorical + cats = list("cabe") + + result = df.reindex(Categorical(["a", "e"], categories=cats)) + expected = DataFrame( + {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))} + ).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(Categorical(["a"], categories=cats)) + expected = DataFrame( + {"A": [0], "B": Series(list("a")).astype(CDT(cats))} + ).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["a", "b", "e"]) + expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index( + "B" + ) + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["a", "b"]) + expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["e"]) + expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + # give back the type of categorical that we received + result = df.reindex(Categorical(["a", "e"], categories=cats, ordered=True)) + expected = DataFrame( + {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))} + ).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(Categorical(["a", "d"], categories=["a", "d"])) + expected = DataFrame( + {"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))} + ).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + df2 = DataFrame( + { + "A": np.arange(6, dtype="int64"), + }, + index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"), + ) + # passed duplicate indexers are not allowed + msg = "cannot reindex on an axis with duplicate labels" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df2.reindex(["a", "b"]) + + # args NotImplemented ATM + msg = r"argument {} is not implemented for CategoricalIndex\.reindex" + with pytest.raises(NotImplementedError, match=msg.format("method")): + df.reindex(["a"], method="ffill") + with pytest.raises(NotImplementedError, match=msg.format("level")): + df.reindex(["a"], level=1) + with pytest.raises(NotImplementedError, match=msg.format("limit")): + df.reindex(["a"], limit=2) + + def test_reindex_signature(self): + sig = inspect.signature(DataFrame.reindex) + parameters = set(sig.parameters) + assert parameters == { + "self", + "labels", + "index", + "columns", + "axis", + "limit", + "copy", + "level", + "method", + "fill_value", + "tolerance", + } + + def test_reindex_multiindex_ffill_added_rows(self): + # GH#23693 + # reindex added rows with nan values even when fill method was specified + mi = MultiIndex.from_tuples([("a", "b"), ("d", "e")]) + df = DataFrame([[0, 7], [3, 4]], index=mi, columns=["x", "y"]) + mi2 = MultiIndex.from_tuples([("a", "b"), ("d", "e"), ("h", "i")]) + result = df.reindex(mi2, axis=0, method="ffill") + expected = DataFrame([[0, 7], [3, 4], [3, 4]], index=mi2, columns=["x", "y"]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "kwargs", + [ + {"method": "pad", "tolerance": timedelta(seconds=9)}, + {"method": "backfill", "tolerance": timedelta(seconds=9)}, + {"method": "nearest"}, + {"method": None}, + ], + ) + def test_reindex_empty_frame(self, kwargs): + # GH#27315 + idx = date_range(start="2020", freq="30s", periods=3) + df = DataFrame([], index=Index([], name="time"), columns=["a"]) + result = df.reindex(idx, **kwargs) + expected = DataFrame({"a": [pd.NA] * 3}, index=idx) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "src_idx", + [ + Index([]), + CategoricalIndex([]), + ], + ) + @pytest.mark.parametrize( + "cat_idx", + [ + # No duplicates + Index([]), + CategoricalIndex([]), + Index(["A", "B"]), + CategoricalIndex(["A", "B"]), + # Duplicates: GH#38906 + Index(["A", "A"]), + CategoricalIndex(["A", "A"]), + ], + ) + def test_reindex_empty(self, src_idx, cat_idx): + df = DataFrame(columns=src_idx, index=["K"], dtype="f8") + + result = df.reindex(columns=cat_idx) + expected = DataFrame(index=["K"], columns=cat_idx, dtype="f8") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]"]) + def test_reindex_datetimelike_to_object(self, dtype): + # GH#39755 dont cast dt64/td64 to ints + mi = MultiIndex.from_product([list("ABCDE"), range(2)]) + + dti = date_range("2016-01-01", periods=10) + fv = np.timedelta64("NaT", "ns") + if dtype == "m8[ns]": + dti = dti - dti[0] + fv = np.datetime64("NaT", "ns") + + ser = Series(dti, index=mi) + ser[::3] = pd.NaT + + df = ser.unstack() + + index = df.index.append(Index([1])) + columns = df.columns.append(Index(["foo"])) + + res = df.reindex(index=index, columns=columns, fill_value=fv) + + expected = DataFrame( + { + 0: df[0].tolist() + [fv], + 1: df[1].tolist() + [fv], + "foo": np.array(["NaT"] * 6, dtype=fv.dtype), + }, + index=index, + ) + assert (res.dtypes[[0, 1]] == object).all() + assert res.iloc[0, 0] is pd.NaT + assert res.iloc[-1, 0] is fv + assert res.iloc[-1, 1] is fv + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize( + "index_df,index_res,index_exp", + [ + ( + CategoricalIndex([], categories=["A"]), + Index(["A"]), + Index(["A"]), + ), + ( + CategoricalIndex([], categories=["A"]), + Index(["B"]), + Index(["B"]), + ), + ( + CategoricalIndex([], categories=["A"]), + CategoricalIndex(["A"]), + CategoricalIndex(["A"]), + ), + ( + CategoricalIndex([], categories=["A"]), + CategoricalIndex(["B"]), + CategoricalIndex(["B"]), + ), + ], + ) + def test_reindex_not_category(self, index_df, index_res, index_exp): + # GH#28690 + df = DataFrame(index=index_df) + result = df.reindex(index=index_res) + expected = DataFrame(index=index_exp) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_reindex_like.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_reindex_like.py new file mode 100644 index 0000000..ce68ec2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_reindex_like.py @@ -0,0 +1,39 @@ +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestDataFrameReindexLike: + def test_reindex_like(self, float_frame): + other = float_frame.reindex(index=float_frame.index[:10], columns=["C", "B"]) + + tm.assert_frame_equal(other, float_frame.reindex_like(other)) + + @pytest.mark.parametrize( + "method,expected_values", + [ + ("nearest", [0, 1, 1, 2]), + ("pad", [np.nan, 0, 1, 1]), + ("backfill", [0, 1, 2, 2]), + ], + ) + def test_reindex_like_methods(self, method, expected_values): + df = DataFrame({"x": list(range(5))}) + + result = df.reindex_like(df, method=method, tolerance=0) + tm.assert_frame_equal(df, result) + result = df.reindex_like(df, method=method, tolerance=[0, 0, 0, 0]) + tm.assert_frame_equal(df, result) + + def test_reindex_like_subclass(self): + # https://github.com/pandas-dev/pandas/issues/31925 + class MyDataFrame(DataFrame): + pass + + expected = DataFrame() + df = MyDataFrame() + result = df.reindex_like(expected) + + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_rename.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_rename.py new file mode 100644 index 0000000..33fb191 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_rename.py @@ -0,0 +1,420 @@ +from collections import ChainMap +import inspect + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + merge, +) +import pandas._testing as tm + + +class TestRename: + def test_rename_signature(self): + sig = inspect.signature(DataFrame.rename) + parameters = set(sig.parameters) + assert parameters == { + "self", + "mapper", + "index", + "columns", + "axis", + "inplace", + "copy", + "level", + "errors", + } + + @pytest.mark.parametrize("klass", [Series, DataFrame]) + def test_rename_mi(self, klass): + obj = klass( + [11, 21, 31], + index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]]), + ) + obj.rename(str.lower) + + def test_rename(self, float_frame): + mapping = {"A": "a", "B": "b", "C": "c", "D": "d"} + + renamed = float_frame.rename(columns=mapping) + renamed2 = float_frame.rename(columns=str.lower) + + tm.assert_frame_equal(renamed, renamed2) + tm.assert_frame_equal( + renamed2.rename(columns=str.upper), float_frame, check_names=False + ) + + # index + data = {"A": {"foo": 0, "bar": 1}} + + # gets sorted alphabetical + df = DataFrame(data) + renamed = df.rename(index={"foo": "bar", "bar": "foo"}) + tm.assert_index_equal(renamed.index, Index(["foo", "bar"])) + + renamed = df.rename(index=str.upper) + tm.assert_index_equal(renamed.index, Index(["BAR", "FOO"])) + + # have to pass something + with pytest.raises(TypeError, match="must pass an index to rename"): + float_frame.rename() + + # partial columns + renamed = float_frame.rename(columns={"C": "foo", "D": "bar"}) + tm.assert_index_equal(renamed.columns, Index(["A", "B", "foo", "bar"])) + + # other axis + renamed = float_frame.T.rename(index={"C": "foo", "D": "bar"}) + tm.assert_index_equal(renamed.index, Index(["A", "B", "foo", "bar"])) + + # index with name + index = Index(["foo", "bar"], name="name") + renamer = DataFrame(data, index=index) + renamed = renamer.rename(index={"foo": "bar", "bar": "foo"}) + tm.assert_index_equal(renamed.index, Index(["bar", "foo"], name="name")) + assert renamed.index.name == renamer.index.name + + @pytest.mark.parametrize( + "args,kwargs", + [ + ((ChainMap({"A": "a"}, {"B": "b"}),), {"axis": "columns"}), + ((), {"columns": ChainMap({"A": "a"}, {"B": "b"})}), + ], + ) + def test_rename_chainmap(self, args, kwargs): + # see gh-23859 + colAData = range(1, 11) + colBdata = np.random.randn(10) + + df = DataFrame({"A": colAData, "B": colBdata}) + result = df.rename(*args, **kwargs) + + expected = DataFrame({"a": colAData, "b": colBdata}) + tm.assert_frame_equal(result, expected) + + def test_rename_multiindex(self): + + tuples_index = [("foo1", "bar1"), ("foo2", "bar2")] + tuples_columns = [("fizz1", "buzz1"), ("fizz2", "buzz2")] + index = MultiIndex.from_tuples(tuples_index, names=["foo", "bar"]) + columns = MultiIndex.from_tuples(tuples_columns, names=["fizz", "buzz"]) + df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns) + + # + # without specifying level -> across all levels + + renamed = df.rename( + index={"foo1": "foo3", "bar2": "bar3"}, + columns={"fizz1": "fizz3", "buzz2": "buzz3"}, + ) + new_index = MultiIndex.from_tuples( + [("foo3", "bar1"), ("foo2", "bar3")], names=["foo", "bar"] + ) + new_columns = MultiIndex.from_tuples( + [("fizz3", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"] + ) + tm.assert_index_equal(renamed.index, new_index) + tm.assert_index_equal(renamed.columns, new_columns) + assert renamed.index.names == df.index.names + assert renamed.columns.names == df.columns.names + + # + # with specifying a level (GH13766) + + # dict + new_columns = MultiIndex.from_tuples( + [("fizz3", "buzz1"), ("fizz2", "buzz2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz") + tm.assert_index_equal(renamed.columns, new_columns) + + new_columns = MultiIndex.from_tuples( + [("fizz1", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz") + tm.assert_index_equal(renamed.columns, new_columns) + + # function + func = str.upper + new_columns = MultiIndex.from_tuples( + [("FIZZ1", "buzz1"), ("FIZZ2", "buzz2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns=func, level=0) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns=func, level="fizz") + tm.assert_index_equal(renamed.columns, new_columns) + + new_columns = MultiIndex.from_tuples( + [("fizz1", "BUZZ1"), ("fizz2", "BUZZ2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns=func, level=1) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns=func, level="buzz") + tm.assert_index_equal(renamed.columns, new_columns) + + # index + new_index = MultiIndex.from_tuples( + [("foo3", "bar1"), ("foo2", "bar2")], names=["foo", "bar"] + ) + renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) + tm.assert_index_equal(renamed.index, new_index) + + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) setitem copy/view + def test_rename_nocopy(self, float_frame): + renamed = float_frame.rename(columns={"C": "foo"}, copy=False) + + assert np.shares_memory(renamed["foo"]._values, float_frame["C"]._values) + + renamed.loc[:, "foo"] = 1.0 + assert (float_frame["C"] == 1.0).all() + + def test_rename_inplace(self, float_frame): + float_frame.rename(columns={"C": "foo"}) + assert "C" in float_frame + assert "foo" not in float_frame + + c_values = float_frame["C"] + float_frame = float_frame.copy() + return_value = float_frame.rename(columns={"C": "foo"}, inplace=True) + assert return_value is None + + assert "C" not in float_frame + assert "foo" in float_frame + # GH 44153 + # Used to be id(float_frame["foo"]) != c_id, but flaky in the CI + assert float_frame["foo"] is not c_values + + def test_rename_bug(self): + # GH 5344 + # rename set ref_locs, and set_index was not resetting + df = DataFrame({0: ["foo", "bar"], 1: ["bah", "bas"], 2: [1, 2]}) + df = df.rename(columns={0: "a"}) + df = df.rename(columns={1: "b"}) + df = df.set_index(["a", "b"]) + df.columns = ["2001-01-01"] + expected = DataFrame( + [[1], [2]], + index=MultiIndex.from_tuples( + [("foo", "bah"), ("bar", "bas")], names=["a", "b"] + ), + columns=["2001-01-01"], + ) + tm.assert_frame_equal(df, expected) + + def test_rename_bug2(self): + # GH 19497 + # rename was changing Index to MultiIndex if Index contained tuples + + df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)], columns=["a"]) + df = df.rename({(1, 1): (5, 4)}, axis="index") + expected = DataFrame( + data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)], columns=["a"] + ) + tm.assert_frame_equal(df, expected) + + def test_rename_errors_raises(self): + df = DataFrame(columns=["A", "B", "C", "D"]) + with pytest.raises(KeyError, match="'E'] not found in axis"): + df.rename(columns={"A": "a", "E": "e"}, errors="raise") + + @pytest.mark.parametrize( + "mapper, errors, expected_columns", + [ + ({"A": "a", "E": "e"}, "ignore", ["a", "B", "C", "D"]), + ({"A": "a"}, "raise", ["a", "B", "C", "D"]), + (str.lower, "raise", ["a", "b", "c", "d"]), + ], + ) + def test_rename_errors(self, mapper, errors, expected_columns): + # GH 13473 + # rename now works with errors parameter + df = DataFrame(columns=["A", "B", "C", "D"]) + result = df.rename(columns=mapper, errors=errors) + expected = DataFrame(columns=expected_columns) + tm.assert_frame_equal(result, expected) + + def test_rename_objects(self, float_string_frame): + renamed = float_string_frame.rename(columns=str.upper) + + assert "FOO" in renamed + assert "foo" not in renamed + + def test_rename_axis_style(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"]) + expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"]) + + result = df.rename(str.lower, axis=1) + tm.assert_frame_equal(result, expected) + + result = df.rename(str.lower, axis="columns") + tm.assert_frame_equal(result, expected) + + result = df.rename({"A": "a", "B": "b"}, axis=1) + tm.assert_frame_equal(result, expected) + + result = df.rename({"A": "a", "B": "b"}, axis="columns") + tm.assert_frame_equal(result, expected) + + # Index + expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"]) + result = df.rename(str.lower, axis=0) + tm.assert_frame_equal(result, expected) + + result = df.rename(str.lower, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.rename({"X": "x", "Y": "y"}, axis=0) + tm.assert_frame_equal(result, expected) + + result = df.rename({"X": "x", "Y": "y"}, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.rename(mapper=str.lower, axis="index") + tm.assert_frame_equal(result, expected) + + def test_rename_mapper_multi(self): + df = DataFrame({"A": ["a", "b"], "B": ["c", "d"], "C": [1, 2]}).set_index( + ["A", "B"] + ) + result = df.rename(str.upper) + expected = df.rename(index=str.upper) + tm.assert_frame_equal(result, expected) + + def test_rename_positional_named(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"]) + result = df.rename(index=str.lower, columns=str.upper) + expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"]) + tm.assert_frame_equal(result, expected) + + def test_rename_axis_style_raises(self): + # see gh-12392 + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"]) + + # Named target and axis + over_spec_msg = "Cannot specify both 'axis' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis=1) + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis="columns") + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(columns=str.lower, axis="columns") + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis=0) + + # Multiple targets and axis + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(str.lower, index=str.lower, axis="columns") + + # Too many targets + over_spec_msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(str.lower, index=str.lower, columns=str.lower) + + # Duplicates + with pytest.raises(TypeError, match="multiple values"): + df.rename(id, mapper=id) + + def test_rename_positional_raises(self): + # GH 29136 + df = DataFrame(columns=["A", "B"]) + msg = r"rename\(\) takes from 1 to 2 positional arguments" + + with pytest.raises(TypeError, match=msg): + df.rename(None, str.lower) + + def test_rename_no_mappings_raises(self): + # GH 29136 + df = DataFrame([[1]]) + msg = "must pass an index to rename" + with pytest.raises(TypeError, match=msg): + df.rename() + + with pytest.raises(TypeError, match=msg): + df.rename(None, index=None) + + with pytest.raises(TypeError, match=msg): + df.rename(None, columns=None) + + with pytest.raises(TypeError, match=msg): + df.rename(None, columns=None, index=None) + + def test_rename_mapper_and_positional_arguments_raises(self): + # GH 29136 + df = DataFrame([[1]]) + msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=msg): + df.rename({}, index={}) + + with pytest.raises(TypeError, match=msg): + df.rename({}, columns={}) + + with pytest.raises(TypeError, match=msg): + df.rename({}, columns={}, index={}) + + def test_rename_with_duplicate_columns(self): + # GH#4403 + df4 = DataFrame( + {"RT": [0.0454], "TClose": [22.02], "TExg": [0.0422]}, + index=MultiIndex.from_tuples( + [(600809, 20130331)], names=["STK_ID", "RPT_Date"] + ), + ) + + df5 = DataFrame( + { + "RPT_Date": [20120930, 20121231, 20130331], + "STK_ID": [600809] * 3, + "STK_Name": ["饡驦", "饡驦", "饡驦"], + "TClose": [38.05, 41.66, 30.01], + }, + index=MultiIndex.from_tuples( + [(600809, 20120930), (600809, 20121231), (600809, 20130331)], + names=["STK_ID", "RPT_Date"], + ), + ) + # TODO: can we construct this without merge? + k = merge(df4, df5, how="inner", left_index=True, right_index=True) + result = k.rename(columns={"TClose_x": "TClose", "TClose_y": "QT_Close"}) + str(result) + result.dtypes + + expected = DataFrame( + [[0.0454, 22.02, 0.0422, 20130331, 600809, "饡驦", 30.01]], + columns=[ + "RT", + "TClose", + "TExg", + "RPT_Date", + "STK_ID", + "STK_Name", + "QT_Close", + ], + ).set_index(["STK_ID", "RPT_Date"], drop=False) + tm.assert_frame_equal(result, expected) + + def test_rename_boolean_index(self): + df = DataFrame(np.arange(15).reshape(3, 5), columns=[False, True, 2, 3, 4]) + mapper = {0: "foo", 1: "bar", 2: "bah"} + res = df.rename(index=mapper) + exp = DataFrame( + np.arange(15).reshape(3, 5), + columns=[False, True, 2, 3, 4], + index=["foo", "bar", "bah"], + ) + tm.assert_frame_equal(res, exp) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_rename_axis.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_rename_axis.py new file mode 100644 index 0000000..dd4a77c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_rename_axis.py @@ -0,0 +1,111 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, +) +import pandas._testing as tm + + +class TestDataFrameRenameAxis: + def test_rename_axis_inplace(self, float_frame): + # GH#15704 + expected = float_frame.rename_axis("foo") + result = float_frame.copy() + return_value = no_return = result.rename_axis("foo", inplace=True) + assert return_value is None + + assert no_return is None + tm.assert_frame_equal(result, expected) + + expected = float_frame.rename_axis("bar", axis=1) + result = float_frame.copy() + return_value = no_return = result.rename_axis("bar", axis=1, inplace=True) + assert return_value is None + + assert no_return is None + tm.assert_frame_equal(result, expected) + + def test_rename_axis_raises(self): + # GH#17833 + df = DataFrame({"A": [1, 2], "B": [1, 2]}) + with pytest.raises(ValueError, match="Use `.rename`"): + df.rename_axis(id, axis=0) + + with pytest.raises(ValueError, match="Use `.rename`"): + df.rename_axis({0: 10, 1: 20}, axis=0) + + with pytest.raises(ValueError, match="Use `.rename`"): + df.rename_axis(id, axis=1) + + with pytest.raises(ValueError, match="Use `.rename`"): + df["A"].rename_axis(id) + + def test_rename_axis_mapper(self): + # GH#19978 + mi = MultiIndex.from_product([["a", "b", "c"], [1, 2]], names=["ll", "nn"]) + df = DataFrame( + {"x": list(range(len(mi))), "y": [i * 10 for i in range(len(mi))]}, index=mi + ) + + # Test for rename of the Index object of columns + result = df.rename_axis("cols", axis=1) + tm.assert_index_equal(result.columns, Index(["x", "y"], name="cols")) + + # Test for rename of the Index object of columns using dict + result = result.rename_axis(columns={"cols": "new"}, axis=1) + tm.assert_index_equal(result.columns, Index(["x", "y"], name="new")) + + # Test for renaming index using dict + result = df.rename_axis(index={"ll": "foo"}) + assert result.index.names == ["foo", "nn"] + + # Test for renaming index using a function + result = df.rename_axis(index=str.upper, axis=0) + assert result.index.names == ["LL", "NN"] + + # Test for renaming index providing complete list + result = df.rename_axis(index=["foo", "goo"]) + assert result.index.names == ["foo", "goo"] + + # Test for changing index and columns at same time + sdf = df.reset_index().set_index("nn").drop(columns=["ll", "y"]) + result = sdf.rename_axis(index="foo", columns="meh") + assert result.index.name == "foo" + assert result.columns.name == "meh" + + # Test different error cases + with pytest.raises(TypeError, match="Must pass"): + df.rename_axis(index="wrong") + + with pytest.raises(ValueError, match="Length of names"): + df.rename_axis(index=["wrong"]) + + with pytest.raises(TypeError, match="bogus"): + df.rename_axis(bogus=None) + + @pytest.mark.parametrize( + "kwargs, rename_index, rename_columns", + [ + ({"mapper": None, "axis": 0}, True, False), + ({"mapper": None, "axis": 1}, False, True), + ({"index": None}, True, False), + ({"columns": None}, False, True), + ({"index": None, "columns": None}, True, True), + ({}, False, False), + ], + ) + def test_rename_axis_none(self, kwargs, rename_index, rename_columns): + # GH 25034 + index = Index(list("abc"), name="foo") + columns = Index(["col1", "col2"], name="bar") + data = np.arange(6).reshape(3, 2) + df = DataFrame(data, index, columns) + + result = df.rename_axis(**kwargs) + expected_index = index.rename(None) if rename_index else index + expected_columns = columns.rename(None) if rename_columns else columns + expected = DataFrame(data, expected_index, expected_columns) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_reorder_levels.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_reorder_levels.py new file mode 100644 index 0000000..9080bdb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_reorder_levels.py @@ -0,0 +1,75 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, +) +import pandas._testing as tm + + +class TestReorderLevels: + def test_reorder_levels(self, frame_or_series): + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + names=["L0", "L1", "L2"], + ) + df = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=index) + obj = tm.get_obj(df, frame_or_series) + + # no change, position + result = obj.reorder_levels([0, 1, 2]) + tm.assert_equal(obj, result) + + # no change, labels + result = obj.reorder_levels(["L0", "L1", "L2"]) + tm.assert_equal(obj, result) + + # rotate, position + result = obj.reorder_levels([1, 2, 0]) + e_idx = MultiIndex( + levels=[["one", "two", "three"], [0, 1], ["bar"]], + codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0]], + names=["L1", "L2", "L0"], + ) + expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) + expected = tm.get_obj(expected, frame_or_series) + tm.assert_equal(result, expected) + + result = obj.reorder_levels([0, 0, 0]) + e_idx = MultiIndex( + levels=[["bar"], ["bar"], ["bar"]], + codes=[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]], + names=["L0", "L0", "L0"], + ) + expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) + expected = tm.get_obj(expected, frame_or_series) + tm.assert_equal(result, expected) + + result = obj.reorder_levels(["L0", "L0", "L0"]) + tm.assert_equal(result, expected) + + def test_reorder_levels_swaplevel_equivalence( + self, multiindex_year_month_day_dataframe_random_data + ): + + ymd = multiindex_year_month_day_dataframe_random_data + + result = ymd.reorder_levels(["month", "day", "year"]) + expected = ymd.swaplevel(0, 1).swaplevel(1, 2) + tm.assert_frame_equal(result, expected) + + result = ymd["A"].reorder_levels(["month", "day", "year"]) + expected = ymd["A"].swaplevel(0, 1).swaplevel(1, 2) + tm.assert_series_equal(result, expected) + + result = ymd.T.reorder_levels(["month", "day", "year"], axis=1) + expected = ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) + tm.assert_frame_equal(result, expected) + + with pytest.raises(TypeError, match="hierarchical axis"): + ymd.reorder_levels([1, 2], axis=1) + + with pytest.raises(IndexError, match="Too many levels"): + ymd.index.reorder_levels([1, 2, 3]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_replace.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_replace.py new file mode 100644 index 0000000..b9024b4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_replace.py @@ -0,0 +1,1521 @@ +from __future__ import annotations + +from datetime import datetime +import re + +import numpy as np +import pytest + +from pandas.compat import np_version_under1p20 + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +@pytest.fixture +def mix_ab() -> dict[str, list[int | str]]: + return {"a": list(range(4)), "b": list("ab..")} + + +@pytest.fixture +def mix_abc() -> dict[str, list[float | str]]: + return {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]} + + +class TestDataFrameReplace: + def test_replace_inplace(self, datetime_frame, float_string_frame): + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + + tsframe = datetime_frame.copy() + return_value = tsframe.replace(np.nan, 0, inplace=True) + assert return_value is None + tm.assert_frame_equal(tsframe, datetime_frame.fillna(0)) + + # mixed type + mf = float_string_frame + mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan + mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan + + result = float_string_frame.replace(np.nan, 0) + expected = float_string_frame.fillna(value=0) + tm.assert_frame_equal(result, expected) + + tsframe = datetime_frame.copy() + return_value = tsframe.replace([np.nan], [0], inplace=True) + assert return_value is None + tm.assert_frame_equal(tsframe, datetime_frame.fillna(0)) + + @pytest.mark.parametrize( + "to_replace,values,expected", + [ + # lists of regexes and values + # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] + ( + [r"\s*\.\s*", r"e|f|g"], + [np.nan, "crap"], + { + "a": ["a", "b", np.nan, np.nan], + "b": ["crap"] * 3 + ["h"], + "c": ["h", "crap", "l", "o"], + }, + ), + # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] + ( + [r"\s*(\.)\s*", r"(e|f|g)"], + [r"\1\1", r"\1_crap"], + { + "a": ["a", "b", "..", ".."], + "b": ["e_crap", "f_crap", "g_crap", "h"], + "c": ["h", "e_crap", "l", "o"], + }, + ), + # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN + # or vN)] + ( + [r"\s*(\.)\s*", r"e"], + [r"\1\1", r"crap"], + { + "a": ["a", "b", "..", ".."], + "b": ["crap", "f", "g", "h"], + "c": ["h", "crap", "l", "o"], + }, + ), + ], + ) + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("use_value_regex_args", [True, False]) + def test_regex_replace_list_obj( + self, to_replace, values, expected, inplace, use_value_regex_args + ): + df = DataFrame({"a": list("ab.."), "b": list("efgh"), "c": list("helo")}) + + if use_value_regex_args: + result = df.replace(value=values, regex=to_replace, inplace=inplace) + else: + result = df.replace(to_replace, values, regex=True, inplace=inplace) + + if inplace: + assert result is None + result = df + + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + def test_regex_replace_list_mixed(self, mix_ab): + # mixed frame to make sure this doesn't break things + dfmix = DataFrame(mix_ab) + + # lists of regexes and values + # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] + to_replace_res = [r"\s*\.\s*", r"a"] + values = [np.nan, "crap"] + mix2 = {"a": list(range(4)), "b": list("ab.."), "c": list("halo")} + dfmix2 = DataFrame(mix2) + res = dfmix2.replace(to_replace_res, values, regex=True) + expec = DataFrame( + { + "a": mix2["a"], + "b": ["crap", "b", np.nan, np.nan], + "c": ["h", "crap", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] + to_replace_res = [r"\s*(\.)\s*", r"(a|b)"] + values = [r"\1\1", r"\1_crap"] + res = dfmix.replace(to_replace_res, values, regex=True) + expec = DataFrame({"a": mix_ab["a"], "b": ["a_crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN + # or vN)] + to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"] + values = [r"\1\1", r"crap", r"\1_crap"] + res = dfmix.replace(to_replace_res, values, regex=True) + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"] + values = [r"\1\1", r"crap", r"\1_crap"] + res = dfmix.replace(regex=to_replace_res, value=values) + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + def test_regex_replace_list_mixed_inplace(self, mix_ab): + dfmix = DataFrame(mix_ab) + # the same inplace + # lists of regexes and values + # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] + to_replace_res = [r"\s*\.\s*", r"a"] + values = [np.nan, "crap"] + res = dfmix.copy() + return_value = res.replace(to_replace_res, values, inplace=True, regex=True) + assert return_value is None + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b", np.nan, np.nan]}) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] + to_replace_res = [r"\s*(\.)\s*", r"(a|b)"] + values = [r"\1\1", r"\1_crap"] + res = dfmix.copy() + return_value = res.replace(to_replace_res, values, inplace=True, regex=True) + assert return_value is None + expec = DataFrame({"a": mix_ab["a"], "b": ["a_crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN + # or vN)] + to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"] + values = [r"\1\1", r"crap", r"\1_crap"] + res = dfmix.copy() + return_value = res.replace(to_replace_res, values, inplace=True, regex=True) + assert return_value is None + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"] + values = [r"\1\1", r"crap", r"\1_crap"] + res = dfmix.copy() + return_value = res.replace(regex=to_replace_res, value=values, inplace=True) + assert return_value is None + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + def test_regex_replace_dict_mixed(self, mix_abc): + dfmix = DataFrame(mix_abc) + + # dicts + # single dict {re1: v1}, search the whole frame + # need test for this... + + # list of dicts {re1: v1, re2: v2, ..., re3: v3}, search the whole + # frame + res = dfmix.replace({"b": r"\s*\.\s*"}, {"b": np.nan}, regex=True) + res2 = dfmix.copy() + return_value = res2.replace( + {"b": r"\s*\.\s*"}, {"b": np.nan}, inplace=True, regex=True + ) + assert return_value is None + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + # list of dicts {re1: re11, re2: re12, ..., reN: re1N}, search the + # whole frame + res = dfmix.replace({"b": r"\s*(\.)\s*"}, {"b": r"\1ty"}, regex=True) + res2 = dfmix.copy() + return_value = res2.replace( + {"b": r"\s*(\.)\s*"}, {"b": r"\1ty"}, inplace=True, regex=True + ) + assert return_value is None + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", ".ty", ".ty"], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + res = dfmix.replace(regex={"b": r"\s*(\.)\s*"}, value={"b": r"\1ty"}) + res2 = dfmix.copy() + return_value = res2.replace( + regex={"b": r"\s*(\.)\s*"}, value={"b": r"\1ty"}, inplace=True + ) + assert return_value is None + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", ".ty", ".ty"], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + # scalar -> dict + # to_replace regex, {value: value} + expec = DataFrame( + {"a": mix_abc["a"], "b": [np.nan, "b", ".", "."], "c": mix_abc["c"]} + ) + res = dfmix.replace("a", {"b": np.nan}, regex=True) + res2 = dfmix.copy() + return_value = res2.replace("a", {"b": np.nan}, regex=True, inplace=True) + assert return_value is None + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + res = dfmix.replace("a", {"b": np.nan}, regex=True) + res2 = dfmix.copy() + return_value = res2.replace(regex="a", value={"b": np.nan}, inplace=True) + assert return_value is None + expec = DataFrame( + {"a": mix_abc["a"], "b": [np.nan, "b", ".", "."], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + def test_regex_replace_dict_nested(self, mix_abc): + # nested dicts will not work until this is implemented for Series + dfmix = DataFrame(mix_abc) + res = dfmix.replace({"b": {r"\s*\.\s*": np.nan}}, regex=True) + res2 = dfmix.copy() + res4 = dfmix.copy() + return_value = res2.replace( + {"b": {r"\s*\.\s*": np.nan}}, inplace=True, regex=True + ) + assert return_value is None + res3 = dfmix.replace(regex={"b": {r"\s*\.\s*": np.nan}}) + return_value = res4.replace(regex={"b": {r"\s*\.\s*": np.nan}}, inplace=True) + assert return_value is None + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + tm.assert_frame_equal(res4, expec) + + def test_regex_replace_dict_nested_non_first_character(self, any_string_dtype): + # GH 25259 + dtype = any_string_dtype + df = DataFrame({"first": ["abc", "bca", "cab"]}, dtype=dtype) + expected = DataFrame({"first": [".bc", "bc.", "c.b"]}, dtype=dtype) + result = df.replace({"a": "."}, regex=True) + tm.assert_frame_equal(result, expected) + + def test_regex_replace_dict_nested_gh4115(self): + df = DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2}) + expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2}) + result = df.replace({"Type": {"Q": 0, "T": 1}}) + tm.assert_frame_equal(result, expected) + + def test_regex_replace_list_to_scalar(self, mix_abc): + df = DataFrame(mix_abc) + expec = DataFrame( + { + "a": mix_abc["a"], + "b": np.array([np.nan] * 4), + "c": [np.nan, np.nan, np.nan, "d"], + } + ) + res = df.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True) + res2 = df.copy() + res3 = df.copy() + return_value = res2.replace( + [r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True + ) + assert return_value is None + return_value = res3.replace( + regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True + ) + assert return_value is None + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + + def test_regex_replace_str_to_numeric(self, mix_abc): + # what happens when you try to replace a numeric value with a regex? + df = DataFrame(mix_abc) + res = df.replace(r"\s*\.\s*", 0, regex=True) + res2 = df.copy() + return_value = res2.replace(r"\s*\.\s*", 0, inplace=True, regex=True) + assert return_value is None + res3 = df.copy() + return_value = res3.replace(regex=r"\s*\.\s*", value=0, inplace=True) + assert return_value is None + expec = DataFrame({"a": mix_abc["a"], "b": ["a", "b", 0, 0], "c": mix_abc["c"]}) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + + def test_regex_replace_regex_list_to_numeric(self, mix_abc): + df = DataFrame(mix_abc) + res = df.replace([r"\s*\.\s*", "b"], 0, regex=True) + res2 = df.copy() + return_value = res2.replace([r"\s*\.\s*", "b"], 0, regex=True, inplace=True) + assert return_value is None + res3 = df.copy() + return_value = res3.replace(regex=[r"\s*\.\s*", "b"], value=0, inplace=True) + assert return_value is None + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", 0, 0, 0], "c": ["a", 0, np.nan, "d"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + + def test_regex_replace_series_of_regexes(self, mix_abc): + df = DataFrame(mix_abc) + s1 = Series({"b": r"\s*\.\s*"}) + s2 = Series({"b": np.nan}) + res = df.replace(s1, s2, regex=True) + res2 = df.copy() + return_value = res2.replace(s1, s2, inplace=True, regex=True) + assert return_value is None + res3 = df.copy() + return_value = res3.replace(regex=s1, value=s2, inplace=True) + assert return_value is None + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + + def test_regex_replace_numeric_to_object_conversion(self, mix_abc): + df = DataFrame(mix_abc) + expec = DataFrame({"a": ["a", 1, 2, 3], "b": mix_abc["b"], "c": mix_abc["c"]}) + res = df.replace(0, "a") + tm.assert_frame_equal(res, expec) + assert res.a.dtype == np.object_ + + @pytest.mark.parametrize( + "to_replace", [{"": np.nan, ",": ""}, {",": "", "": np.nan}] + ) + def test_joint_simple_replace_and_regex_replace(self, to_replace): + # GH-39338 + df = DataFrame( + { + "col1": ["1,000", "a", "3"], + "col2": ["a", "", "b"], + "col3": ["a", "b", "c"], + } + ) + result = df.replace(regex=to_replace) + expected = DataFrame( + { + "col1": ["1000", "a", "3"], + "col2": ["a", np.nan, "b"], + "col3": ["a", "b", "c"], + } + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("metachar", ["[]", "()", r"\d", r"\w", r"\s"]) + def test_replace_regex_metachar(self, metachar): + df = DataFrame({"a": [metachar, "else"]}) + result = df.replace({"a": {metachar: "paren"}}) + expected = DataFrame({"a": ["paren", "else"]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "data,to_replace,expected", + [ + (["xax", "xbx"], {"a": "c", "b": "d"}, ["xcx", "xdx"]), + (["d", "", ""], {r"^\s*$": pd.NA}, ["d", pd.NA, pd.NA]), + ], + ) + def test_regex_replace_string_types( + self, data, to_replace, expected, frame_or_series, any_string_dtype + ): + # GH-41333, GH-35977 + dtype = any_string_dtype + obj = frame_or_series(data, dtype=dtype) + result = obj.replace(to_replace, regex=True) + expected = frame_or_series(expected, dtype=dtype) + + tm.assert_equal(result, expected) + + def test_replace(self, datetime_frame): + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + + zero_filled = datetime_frame.replace(np.nan, -1e8) + tm.assert_frame_equal(zero_filled, datetime_frame.fillna(-1e8)) + tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), datetime_frame) + + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + datetime_frame["B"][:5] = -1e8 + + # empty + df = DataFrame(index=["a", "b"]) + tm.assert_frame_equal(df, df.replace(5, 7)) + + # GH 11698 + # test for mixed data types. + df = DataFrame( + [("-", pd.to_datetime("20150101")), ("a", pd.to_datetime("20150102"))] + ) + df1 = df.replace("-", np.nan) + expected_df = DataFrame( + [(np.nan, pd.to_datetime("20150101")), ("a", pd.to_datetime("20150102"))] + ) + tm.assert_frame_equal(df1, expected_df) + + def test_replace_list(self): + obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")} + dfobj = DataFrame(obj) + + # lists of regexes and values + # list of [v1, v2, ..., vN] -> [v1, v2, ..., vN] + to_replace_res = [r".", r"e"] + values = [np.nan, "crap"] + res = dfobj.replace(to_replace_res, values) + expec = DataFrame( + { + "a": ["a", "b", np.nan, np.nan], + "b": ["crap", "f", "g", "h"], + "c": ["h", "crap", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + # list of [v1, v2, ..., vN] -> [v1, v2, .., vN] + to_replace_res = [r".", r"f"] + values = [r"..", r"crap"] + res = dfobj.replace(to_replace_res, values) + expec = DataFrame( + { + "a": ["a", "b", "..", ".."], + "b": ["e", "crap", "g", "h"], + "c": ["h", "e", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + def test_replace_with_empty_list(self, frame_or_series): + # GH 21977 + ser = Series([["a", "b"], [], np.nan, [1]]) + obj = DataFrame({"col": ser}) + obj = tm.get_obj(obj, frame_or_series) + expected = obj + result = obj.replace([], np.nan) + tm.assert_equal(result, expected) + + # GH 19266 + msg = ( + "NumPy boolean array indexing assignment cannot assign {size} " + "input values to the 1 output values where the mask is true" + ) + with pytest.raises(ValueError, match=msg.format(size=0)): + obj.replace({np.nan: []}) + with pytest.raises(ValueError, match=msg.format(size=2)): + obj.replace({np.nan: ["dummy", "alt"]}) + + def test_replace_series_dict(self): + # from GH 3064 + df = DataFrame({"zero": {"a": 0.0, "b": 1}, "one": {"a": 2.0, "b": 0}}) + result = df.replace(0, {"zero": 0.5, "one": 1.0}) + expected = DataFrame({"zero": {"a": 0.5, "b": 1}, "one": {"a": 2.0, "b": 1.0}}) + tm.assert_frame_equal(result, expected) + + result = df.replace(0, df.mean()) + tm.assert_frame_equal(result, expected) + + # series to series/dict + df = DataFrame({"zero": {"a": 0.0, "b": 1}, "one": {"a": 2.0, "b": 0}}) + s = Series({"zero": 0.0, "one": 2.0}) + result = df.replace(s, {"zero": 0.5, "one": 1.0}) + expected = DataFrame({"zero": {"a": 0.5, "b": 1}, "one": {"a": 1.0, "b": 0.0}}) + tm.assert_frame_equal(result, expected) + + result = df.replace(s, df.mean()) + tm.assert_frame_equal(result, expected) + + def test_replace_convert(self): + # gh 3907 + df = DataFrame([["foo", "bar", "bah"], ["bar", "foo", "bah"]]) + m = {"foo": 1, "bar": 2, "bah": 3} + rep = df.replace(m) + expec = Series([np.int64] * 3) + res = rep.dtypes + tm.assert_series_equal(expec, res) + + def test_replace_mixed(self, float_string_frame): + mf = float_string_frame + mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan + mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan + + result = float_string_frame.replace(np.nan, -18) + expected = float_string_frame.fillna(value=-18) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result.replace(-18, np.nan), float_string_frame) + + result = float_string_frame.replace(np.nan, -1e8) + expected = float_string_frame.fillna(value=-1e8) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result.replace(-1e8, np.nan), float_string_frame) + + def test_replace_mixed_int_block_upcasting(self): + + # int block upcasting + df = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0, 1], dtype="int64"), + } + ) + expected = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0.5, 1], dtype="float64"), + } + ) + result = df.replace(0, 0.5) + tm.assert_frame_equal(result, expected) + + return_value = df.replace(0, 0.5, inplace=True) + assert return_value is None + tm.assert_frame_equal(df, expected) + + def test_replace_mixed_int_block_splitting(self): + + # int block splitting + df = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0, 1], dtype="int64"), + "C": Series([1, 2], dtype="int64"), + } + ) + expected = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0.5, 1], dtype="float64"), + "C": Series([1, 2], dtype="int64"), + } + ) + result = df.replace(0, 0.5) + tm.assert_frame_equal(result, expected) + + def test_replace_mixed2(self): + + # to object block upcasting + df = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0, 1], dtype="int64"), + } + ) + expected = DataFrame( + { + "A": Series([1, "foo"], dtype="object"), + "B": Series([0, 1], dtype="int64"), + } + ) + result = df.replace(2, "foo") + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + { + "A": Series(["foo", "bar"], dtype="object"), + "B": Series([0, "foo"], dtype="object"), + } + ) + result = df.replace([1, 2], ["foo", "bar"]) + tm.assert_frame_equal(result, expected) + + def test_replace_mixed3(self): + # test case from + df = DataFrame( + {"A": Series([3, 0], dtype="int64"), "B": Series([0, 3], dtype="int64")} + ) + result = df.replace(3, df.mean().to_dict()) + expected = df.copy().astype("float64") + m = df.mean() + expected.iloc[0, 0] = m[0] + expected.iloc[1, 1] = m[1] + tm.assert_frame_equal(result, expected) + + def test_replace_nullable_int_with_string_doesnt_cast(self): + # GH#25438 don't cast df['a'] to float64 + df = DataFrame({"a": [1, 2, 3, np.nan], "b": ["some", "strings", "here", "he"]}) + df["a"] = df["a"].astype("Int64") + + res = df.replace("", np.nan) + tm.assert_series_equal(res["a"], df["a"]) + + @pytest.mark.parametrize("dtype", ["boolean", "Int64", "Float64"]) + def test_replace_with_nullable_column(self, dtype): + # GH-44499 + nullable_ser = Series([1, 0, 1], dtype=dtype) + df = DataFrame({"A": ["A", "B", "x"], "B": nullable_ser}) + result = df.replace("x", "X") + expected = DataFrame({"A": ["A", "B", "X"], "B": nullable_ser}) + tm.assert_frame_equal(result, expected) + + def test_replace_simple_nested_dict(self): + df = DataFrame({"col": range(1, 5)}) + expected = DataFrame({"col": ["a", 2, 3, "b"]}) + + result = df.replace({"col": {1: "a", 4: "b"}}) + tm.assert_frame_equal(expected, result) + + # in this case, should be the same as the not nested version + result = df.replace({1: "a", 4: "b"}) + tm.assert_frame_equal(expected, result) + + def test_replace_simple_nested_dict_with_nonexistent_value(self): + df = DataFrame({"col": range(1, 5)}) + expected = DataFrame({"col": ["a", 2, 3, "b"]}) + + result = df.replace({-1: "-", 1: "a", 4: "b"}) + tm.assert_frame_equal(expected, result) + + result = df.replace({"col": {-1: "-", 1: "a", 4: "b"}}) + tm.assert_frame_equal(expected, result) + + def test_replace_value_is_none(self, datetime_frame): + orig_value = datetime_frame.iloc[0, 0] + orig2 = datetime_frame.iloc[1, 0] + + datetime_frame.iloc[0, 0] = np.nan + datetime_frame.iloc[1, 0] = 1 + + result = datetime_frame.replace(to_replace={np.nan: 0}) + expected = datetime_frame.T.replace(to_replace={np.nan: 0}).T + tm.assert_frame_equal(result, expected) + + result = datetime_frame.replace(to_replace={np.nan: 0, 1: -1e8}) + tsframe = datetime_frame.copy() + tsframe.iloc[0, 0] = 0 + tsframe.iloc[1, 0] = -1e8 + expected = tsframe + tm.assert_frame_equal(expected, result) + datetime_frame.iloc[0, 0] = orig_value + datetime_frame.iloc[1, 0] = orig2 + + def test_replace_for_new_dtypes(self, datetime_frame): + + # dtypes + tsframe = datetime_frame.copy().astype(np.float32) + tsframe["A"][:5] = np.nan + tsframe["A"][-5:] = np.nan + + zero_filled = tsframe.replace(np.nan, -1e8) + tm.assert_frame_equal(zero_filled, tsframe.fillna(-1e8)) + tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), tsframe) + + tsframe["A"][:5] = np.nan + tsframe["A"][-5:] = np.nan + tsframe["B"][:5] = -1e8 + + b = tsframe["B"] + b[b == -1e8] = np.nan + tsframe["B"] = b + result = tsframe.fillna(method="bfill") + tm.assert_frame_equal(result, tsframe.fillna(method="bfill")) + + @pytest.mark.parametrize( + "frame, to_replace, value, expected", + [ + (DataFrame({"ints": [1, 2, 3]}), 1, 0, DataFrame({"ints": [0, 2, 3]})), + ( + DataFrame({"ints": [1, 2, 3]}, dtype=np.int32), + 1, + 0, + DataFrame({"ints": [0, 2, 3]}, dtype=np.int32), + ), + ( + DataFrame({"ints": [1, 2, 3]}, dtype=np.int16), + 1, + 0, + DataFrame({"ints": [0, 2, 3]}, dtype=np.int16), + ), + ( + DataFrame({"bools": [True, False, True]}), + False, + True, + DataFrame({"bools": [True, True, True]}), + ), + ( + DataFrame({"complex": [1j, 2j, 3j]}), + 1j, + 0, + DataFrame({"complex": [0j, 2j, 3j]}), + ), + ( + DataFrame( + { + "datetime64": Index( + [ + datetime(2018, 5, 28), + datetime(2018, 7, 28), + datetime(2018, 5, 28), + ] + ) + } + ), + datetime(2018, 5, 28), + datetime(2018, 7, 28), + DataFrame({"datetime64": Index([datetime(2018, 7, 28)] * 3)}), + ), + # GH 20380 + ( + DataFrame({"dt": [datetime(3017, 12, 20)], "str": ["foo"]}), + "foo", + "bar", + DataFrame({"dt": [datetime(3017, 12, 20)], "str": ["bar"]}), + ), + ( + DataFrame( + { + "A": date_range("20130101", periods=3, tz="US/Eastern"), + "B": [0, np.nan, 2], + } + ), + Timestamp("20130102", tz="US/Eastern"), + Timestamp("20130104", tz="US/Eastern"), + DataFrame( + { + "A": [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130104", tz="US/Eastern"), + Timestamp("20130103", tz="US/Eastern"), + ], + "B": [0, np.nan, 2], + } + ), + ), + # GH 35376 + ( + DataFrame([[1, 1.0], [2, 2.0]]), + 1.0, + 5, + DataFrame([[5, 5.0], [2, 2.0]]), + ), + ( + DataFrame([[1, 1.0], [2, 2.0]]), + 1, + 5, + DataFrame([[5, 5.0], [2, 2.0]]), + ), + ( + DataFrame([[1, 1.0], [2, 2.0]]), + 1.0, + 5.0, + DataFrame([[5, 5.0], [2, 2.0]]), + ), + ( + DataFrame([[1, 1.0], [2, 2.0]]), + 1, + 5.0, + DataFrame([[5, 5.0], [2, 2.0]]), + ), + ], + ) + def test_replace_dtypes(self, frame, to_replace, value, expected): + result = getattr(frame, "replace")(to_replace, value) + tm.assert_frame_equal(result, expected) + + def test_replace_input_formats_listlike(self): + # both dicts + to_rep = {"A": np.nan, "B": 0, "C": ""} + values = {"A": 0, "B": -1, "C": "missing"} + df = DataFrame( + {"A": [np.nan, 0, np.inf], "B": [0, 2, 5], "C": ["", "asdf", "fd"]} + ) + filled = df.replace(to_rep, values) + expected = {k: v.replace(to_rep[k], values[k]) for k, v in df.items()} + tm.assert_frame_equal(filled, DataFrame(expected)) + + result = df.replace([0, 2, 5], [5, 2, 0]) + expected = DataFrame( + {"A": [np.nan, 5, np.inf], "B": [5, 2, 0], "C": ["", "asdf", "fd"]} + ) + tm.assert_frame_equal(result, expected) + + # scalar to dict + values = {"A": 0, "B": -1, "C": "missing"} + df = DataFrame( + {"A": [np.nan, 0, np.nan], "B": [0, 2, 5], "C": ["", "asdf", "fd"]} + ) + filled = df.replace(np.nan, values) + expected = {k: v.replace(np.nan, values[k]) for k, v in df.items()} + tm.assert_frame_equal(filled, DataFrame(expected)) + + # list to list + to_rep = [np.nan, 0, ""] + values = [-2, -1, "missing"] + result = df.replace(to_rep, values) + expected = df.copy() + for i in range(len(to_rep)): + return_value = expected.replace(to_rep[i], values[i], inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + msg = r"Replacement lists must match in length\. Expecting 3 got 2" + with pytest.raises(ValueError, match=msg): + df.replace(to_rep, values[1:]) + + def test_replace_input_formats_scalar(self): + df = DataFrame( + {"A": [np.nan, 0, np.inf], "B": [0, 2, 5], "C": ["", "asdf", "fd"]} + ) + + # dict to scalar + to_rep = {"A": np.nan, "B": 0, "C": ""} + filled = df.replace(to_rep, 0) + expected = {k: v.replace(to_rep[k], 0) for k, v in df.items()} + tm.assert_frame_equal(filled, DataFrame(expected)) + + msg = "value argument must be scalar, dict, or Series" + with pytest.raises(TypeError, match=msg): + df.replace(to_rep, [np.nan, 0, ""]) + + # list to scalar + to_rep = [np.nan, 0, ""] + result = df.replace(to_rep, -1) + expected = df.copy() + for i in range(len(to_rep)): + return_value = expected.replace(to_rep[i], -1, inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + def test_replace_limit(self): + pass + + def test_replace_dict_no_regex(self): + answer = Series( + { + 0: "Strongly Agree", + 1: "Agree", + 2: "Neutral", + 3: "Disagree", + 4: "Strongly Disagree", + } + ) + weights = { + "Agree": 4, + "Disagree": 2, + "Neutral": 3, + "Strongly Agree": 5, + "Strongly Disagree": 1, + } + expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1}) + result = answer.replace(weights) + tm.assert_series_equal(result, expected) + + def test_replace_series_no_regex(self): + answer = Series( + { + 0: "Strongly Agree", + 1: "Agree", + 2: "Neutral", + 3: "Disagree", + 4: "Strongly Disagree", + } + ) + weights = Series( + { + "Agree": 4, + "Disagree": 2, + "Neutral": 3, + "Strongly Agree": 5, + "Strongly Disagree": 1, + } + ) + expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1}) + result = answer.replace(weights) + tm.assert_series_equal(result, expected) + + def test_replace_dict_tuple_list_ordering_remains_the_same(self): + df = DataFrame({"A": [np.nan, 1]}) + res1 = df.replace(to_replace={np.nan: 0, 1: -1e8}) + res2 = df.replace(to_replace=(1, np.nan), value=[-1e8, 0]) + res3 = df.replace(to_replace=[1, np.nan], value=[-1e8, 0]) + + expected = DataFrame({"A": [0, -1e8]}) + tm.assert_frame_equal(res1, res2) + tm.assert_frame_equal(res2, res3) + tm.assert_frame_equal(res3, expected) + + def test_replace_doesnt_replace_without_regex(self): + df = DataFrame( + { + "fol": [1, 2, 2, 3], + "T_opp": ["0", "vr", "0", "0"], + "T_Dir": ["0", "0", "0", "bt"], + "T_Enh": ["vo", "0", "0", "0"], + } + ) + res = df.replace({r"\D": 1}) + tm.assert_frame_equal(df, res) + + def test_replace_bool_with_string(self): + df = DataFrame({"a": [True, False], "b": list("ab")}) + result = df.replace(True, "a") + expected = DataFrame({"a": ["a", False], "b": df.b}) + tm.assert_frame_equal(result, expected) + + def test_replace_pure_bool_with_string_no_op(self): + df = DataFrame(np.random.rand(2, 2) > 0.5) + result = df.replace("asdf", "fdsa") + tm.assert_frame_equal(df, result) + + def test_replace_bool_with_bool(self): + df = DataFrame(np.random.rand(2, 2) > 0.5) + result = df.replace(False, True) + expected = DataFrame(np.ones((2, 2), dtype=bool)) + tm.assert_frame_equal(result, expected) + + def test_replace_with_dict_with_bool_keys(self): + df = DataFrame({0: [True, False], 1: [False, True]}) + result = df.replace({"asdf": "asdb", True: "yes"}) + expected = DataFrame({0: ["yes", False], 1: [False, "yes"]}) + tm.assert_frame_equal(result, expected) + + def test_replace_dict_strings_vs_ints(self): + # GH#34789 + df = DataFrame({"Y0": [1, 2], "Y1": [3, 4]}) + result = df.replace({"replace_string": "test"}) + + tm.assert_frame_equal(result, df) + + result = df["Y0"].replace({"replace_string": "test"}) + tm.assert_series_equal(result, df["Y0"]) + + def test_replace_truthy(self): + df = DataFrame({"a": [True, True]}) + r = df.replace([np.inf, -np.inf], np.nan) + e = df + tm.assert_frame_equal(r, e) + + def test_nested_dict_overlapping_keys_replace_int(self): + # GH 27660 keep behaviour consistent for simple dictionary and + # nested dictionary replacement + df = DataFrame({"a": list(range(1, 5))}) + + result = df.replace({"a": dict(zip(range(1, 5), range(2, 6)))}) + expected = df.replace(dict(zip(range(1, 5), range(2, 6)))) + tm.assert_frame_equal(result, expected) + + def test_nested_dict_overlapping_keys_replace_str(self): + # GH 27660 + a = np.arange(1, 5) + astr = a.astype(str) + bstr = np.arange(2, 6).astype(str) + df = DataFrame({"a": astr}) + result = df.replace(dict(zip(astr, bstr))) + expected = df.replace({"a": dict(zip(astr, bstr))}) + tm.assert_frame_equal(result, expected) + + def test_replace_swapping_bug(self): + df = DataFrame({"a": [True, False, True]}) + res = df.replace({"a": {True: "Y", False: "N"}}) + expect = DataFrame({"a": ["Y", "N", "Y"]}) + tm.assert_frame_equal(res, expect) + + df = DataFrame({"a": [0, 1, 0]}) + res = df.replace({"a": {0: "Y", 1: "N"}}) + expect = DataFrame({"a": ["Y", "N", "Y"]}) + tm.assert_frame_equal(res, expect) + + def test_replace_period(self): + d = { + "fname": { + "out_augmented_AUG_2011.json": pd.Period(year=2011, month=8, freq="M"), + "out_augmented_JAN_2011.json": pd.Period(year=2011, month=1, freq="M"), + "out_augmented_MAY_2012.json": pd.Period(year=2012, month=5, freq="M"), + "out_augmented_SUBSIDY_WEEK.json": pd.Period( + year=2011, month=4, freq="M" + ), + "out_augmented_AUG_2012.json": pd.Period(year=2012, month=8, freq="M"), + "out_augmented_MAY_2011.json": pd.Period(year=2011, month=5, freq="M"), + "out_augmented_SEP_2013.json": pd.Period(year=2013, month=9, freq="M"), + } + } + + df = DataFrame( + [ + "out_augmented_AUG_2012.json", + "out_augmented_SEP_2013.json", + "out_augmented_SUBSIDY_WEEK.json", + "out_augmented_MAY_2012.json", + "out_augmented_MAY_2011.json", + "out_augmented_AUG_2011.json", + "out_augmented_JAN_2011.json", + ], + columns=["fname"], + ) + assert set(df.fname.values) == set(d["fname"].keys()) + + expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]}) + assert expected.dtypes[0] == "Period[M]" + result = df.replace(d) + tm.assert_frame_equal(result, expected) + + def test_replace_datetime(self): + d = { + "fname": { + "out_augmented_AUG_2011.json": Timestamp("2011-08"), + "out_augmented_JAN_2011.json": Timestamp("2011-01"), + "out_augmented_MAY_2012.json": Timestamp("2012-05"), + "out_augmented_SUBSIDY_WEEK.json": Timestamp("2011-04"), + "out_augmented_AUG_2012.json": Timestamp("2012-08"), + "out_augmented_MAY_2011.json": Timestamp("2011-05"), + "out_augmented_SEP_2013.json": Timestamp("2013-09"), + } + } + + df = DataFrame( + [ + "out_augmented_AUG_2012.json", + "out_augmented_SEP_2013.json", + "out_augmented_SUBSIDY_WEEK.json", + "out_augmented_MAY_2012.json", + "out_augmented_MAY_2011.json", + "out_augmented_AUG_2011.json", + "out_augmented_JAN_2011.json", + ], + columns=["fname"], + ) + assert set(df.fname.values) == set(d["fname"].keys()) + expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]}) + result = df.replace(d) + tm.assert_frame_equal(result, expected) + + def test_replace_datetimetz(self): + + # GH 11326 + # behaving poorly when presented with a datetime64[ns, tz] + df = DataFrame( + { + "A": date_range("20130101", periods=3, tz="US/Eastern"), + "B": [0, np.nan, 2], + } + ) + result = df.replace(np.nan, 1) + expected = DataFrame( + { + "A": date_range("20130101", periods=3, tz="US/Eastern"), + "B": Series([0, 1, 2], dtype="float64"), + } + ) + tm.assert_frame_equal(result, expected) + + result = df.fillna(1) + tm.assert_frame_equal(result, expected) + + result = df.replace(0, np.nan) + expected = DataFrame( + { + "A": date_range("20130101", periods=3, tz="US/Eastern"), + "B": [np.nan, np.nan, 2], + } + ) + tm.assert_frame_equal(result, expected) + + result = df.replace( + Timestamp("20130102", tz="US/Eastern"), + Timestamp("20130104", tz="US/Eastern"), + ) + expected = DataFrame( + { + "A": [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130104", tz="US/Eastern"), + Timestamp("20130103", tz="US/Eastern"), + ], + "B": [0, np.nan, 2], + } + ) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.iloc[1, 0] = np.nan + result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Eastern")) + tm.assert_frame_equal(result, expected) + + # coerce to object + result = df.copy() + result.iloc[1, 0] = np.nan + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = result.replace( + {"A": pd.NaT}, Timestamp("20130104", tz="US/Pacific") + ) + expected = DataFrame( + { + "A": [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130104", tz="US/Pacific"), + # once deprecation is enforced + # Timestamp("20130104", tz="US/Pacific").tz_convert("US/Eastern"), + Timestamp("20130103", tz="US/Eastern"), + ], + "B": [0, np.nan, 2], + } + ) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.iloc[1, 0] = np.nan + result = result.replace({"A": np.nan}, Timestamp("20130104")) + expected = DataFrame( + { + "A": [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130104"), + Timestamp("20130103", tz="US/Eastern"), + ], + "B": [0, np.nan, 2], + } + ) + tm.assert_frame_equal(result, expected) + + def test_replace_with_empty_dictlike(self, mix_abc): + # GH 15289 + df = DataFrame(mix_abc) + tm.assert_frame_equal(df, df.replace({})) + tm.assert_frame_equal(df, df.replace(Series([], dtype=object))) + + tm.assert_frame_equal(df, df.replace({"b": {}})) + tm.assert_frame_equal(df, df.replace(Series({"b": {}}))) + + @pytest.mark.parametrize( + "to_replace, method, expected", + [ + (0, "bfill", {"A": [1, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}), + ( + np.nan, + "bfill", + {"A": [0, 1, 2], "B": [5.0, 7.0, 7.0], "C": ["a", "b", "c"]}, + ), + ("d", "ffill", {"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}), + ( + [0, 2], + "bfill", + {"A": [1, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}, + ), + ( + [1, 2], + "pad", + {"A": [0, 0, 0], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}, + ), + ( + (1, 2), + "bfill", + {"A": [0, 2, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}, + ), + ( + ["b", "c"], + "ffill", + {"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "a", "a"]}, + ), + ], + ) + def test_replace_method(self, to_replace, method, expected): + # GH 19632 + df = DataFrame({"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}) + + result = df.replace(to_replace=to_replace, value=None, method=method) + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "replace_dict, final_data", + [({"a": 1, "b": 1}, [[3, 3], [2, 2]]), ({"a": 1, "b": 2}, [[3, 1], [2, 3]])], + ) + def test_categorical_replace_with_dict(self, replace_dict, final_data): + # GH 26988 + df = DataFrame([[1, 1], [2, 2]], columns=["a", "b"], dtype="category") + + final_data = np.array(final_data) + + a = pd.Categorical(final_data[:, 0], categories=[3, 2]) + + ex_cat = [3, 2] if replace_dict["b"] == 1 else [1, 3] + b = pd.Categorical(final_data[:, 1], categories=ex_cat) + + expected = DataFrame({"a": a, "b": b}) + result = df.replace(replace_dict, 3) + tm.assert_frame_equal(result, expected) + msg = ( + r"Attributes of DataFrame.iloc\[:, 0\] \(column name=\"a\"\) are " + "different" + ) + with pytest.raises(AssertionError, match=msg): + # ensure non-inplace call does not affect original + tm.assert_frame_equal(df, expected) + return_value = df.replace(replace_dict, 3, inplace=True) + assert return_value is None + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "df, to_replace, exp", + [ + ( + {"col1": [1, 2, 3], "col2": [4, 5, 6]}, + {4: 5, 5: 6, 6: 7}, + {"col1": [1, 2, 3], "col2": [5, 6, 7]}, + ), + ( + {"col1": [1, 2, 3], "col2": ["4", "5", "6"]}, + {"4": "5", "5": "6", "6": "7"}, + {"col1": [1, 2, 3], "col2": ["5", "6", "7"]}, + ), + ], + ) + def test_replace_commutative(self, df, to_replace, exp): + # GH 16051 + # DataFrame.replace() overwrites when values are non-numeric + # also added to data frame whilst issue was for series + + df = DataFrame(df) + + expected = DataFrame(exp) + result = df.replace(to_replace) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "replacer", + [ + Timestamp("20170827"), + np.int8(1), + np.int16(1), + np.float32(1), + np.float64(1), + ], + ) + def test_replace_replacer_dtype(self, request, replacer): + # GH26632 + if np.isscalar(replacer) and replacer.dtype.itemsize < 8: + request.node.add_marker( + pytest.mark.xfail( + np_version_under1p20, reason="np.putmask doesn't coerce dtype" + ) + ) + df = DataFrame(["a"]) + result = df.replace({"a": replacer, "b": replacer}) + expected = DataFrame([replacer]) + tm.assert_frame_equal(result, expected) + + def test_replace_after_convert_dtypes(self): + # GH31517 + df = DataFrame({"grp": [1, 2, 3, 4, 5]}, dtype="Int64") + result = df.replace(1, 10) + expected = DataFrame({"grp": [10, 2, 3, 4, 5]}, dtype="Int64") + tm.assert_frame_equal(result, expected) + + def test_replace_invalid_to_replace(self): + # GH 18634 + # API: replace() should raise an exception if invalid argument is given + df = DataFrame({"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}) + msg = ( + r"Expecting 'to_replace' to be either a scalar, array-like, " + r"dict or None, got invalid type.*" + ) + with pytest.raises(TypeError, match=msg): + df.replace(lambda x: x.strip()) + + @pytest.mark.parametrize("dtype", ["float", "float64", "int64", "Int64", "boolean"]) + @pytest.mark.parametrize("value", [np.nan, pd.NA]) + def test_replace_no_replacement_dtypes(self, dtype, value): + # https://github.com/pandas-dev/pandas/issues/32988 + df = DataFrame(np.eye(2), dtype=dtype) + result = df.replace(to_replace=[None, -np.inf, np.inf], value=value) + tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize("replacement", [np.nan, 5]) + def test_replace_with_duplicate_columns(self, replacement): + # GH 24798 + result = DataFrame({"A": [1, 2, 3], "A1": [4, 5, 6], "B": [7, 8, 9]}) + result.columns = list("AAB") + + expected = DataFrame( + {"A": [1, 2, 3], "A1": [4, 5, 6], "B": [replacement, 8, 9]} + ) + expected.columns = list("AAB") + + result["B"] = result["B"].replace(7, replacement) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) + def test_replace_ea_ignore_float(self, frame_or_series, value): + # GH#34871 + obj = DataFrame({"Per": [value] * 3}) + obj = tm.get_obj(obj, frame_or_series) + + expected = obj.copy() + result = obj.replace(1.0, 0.0) + tm.assert_equal(expected, result) + + def test_replace_value_category_type(self): + """ + Test for #23305: to ensure category dtypes are maintained + after replace with direct values + """ + + # create input data + input_dict = { + "col1": [1, 2, 3, 4], + "col2": ["a", "b", "c", "d"], + "col3": [1.5, 2.5, 3.5, 4.5], + "col4": ["cat1", "cat2", "cat3", "cat4"], + "col5": ["obj1", "obj2", "obj3", "obj4"], + } + # explicitly cast columns as category and order them + input_df = DataFrame(data=input_dict).astype( + {"col2": "category", "col4": "category"} + ) + input_df["col2"] = input_df["col2"].cat.reorder_categories( + ["a", "b", "c", "d"], ordered=True + ) + input_df["col4"] = input_df["col4"].cat.reorder_categories( + ["cat1", "cat2", "cat3", "cat4"], ordered=True + ) + + # create expected dataframe + expected_dict = { + "col1": [1, 2, 3, 4], + "col2": ["a", "b", "c", "z"], + "col3": [1.5, 2.5, 3.5, 4.5], + "col4": ["cat1", "catX", "cat3", "cat4"], + "col5": ["obj9", "obj2", "obj3", "obj4"], + } + # explicitly cast columns as category and order them + expected = DataFrame(data=expected_dict).astype( + {"col2": "category", "col4": "category"} + ) + expected["col2"] = expected["col2"].cat.reorder_categories( + ["a", "b", "c", "z"], ordered=True + ) + expected["col4"] = expected["col4"].cat.reorder_categories( + ["cat1", "catX", "cat3", "cat4"], ordered=True + ) + + # replace values in input dataframe + input_df = input_df.replace("d", "z") + input_df = input_df.replace("obj1", "obj9") + result = input_df.replace("cat2", "catX") + + tm.assert_frame_equal(result, expected) + + def test_replace_dict_category_type(self): + """ + Test to ensure category dtypes are maintained + after replace with dict values + """ + # GH#35268, GH#44940 + + # create input dataframe + input_dict = {"col1": ["a"], "col2": ["obj1"], "col3": ["cat1"]} + # explicitly cast columns as category + input_df = DataFrame(data=input_dict).astype( + {"col1": "category", "col2": "category", "col3": "category"} + ) + + # create expected dataframe + expected_dict = {"col1": ["z"], "col2": ["obj9"], "col3": ["catX"]} + # explicitly cast columns as category + expected = DataFrame(data=expected_dict).astype( + {"col1": "category", "col2": "category", "col3": "category"} + ) + + # replace values in input dataframe using a dict + result = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"}) + + tm.assert_frame_equal(result, expected) + + def test_replace_with_compiled_regex(self): + # https://github.com/pandas-dev/pandas/issues/35680 + df = DataFrame(["a", "b", "c"]) + regex = re.compile("^a$") + result = df.replace({regex: "z"}, regex=True) + expected = DataFrame(["z", "b", "c"]) + tm.assert_frame_equal(result, expected) + + def test_replace_intervals(self): + # https://github.com/pandas-dev/pandas/issues/35931 + df = DataFrame({"a": [pd.Interval(0, 1), pd.Interval(0, 1)]}) + result = df.replace({"a": {pd.Interval(0, 1): "x"}}) + expected = DataFrame({"a": ["x", "x"]}) + tm.assert_frame_equal(result, expected) + + def test_replace_unicode(self): + # GH: 16784 + columns_values_map = {"positive": {"正面": 1, "中立": 1, "负面": 0}} + df1 = DataFrame({"positive": np.ones(3)}) + result = df1.replace(columns_values_map) + expected = DataFrame({"positive": np.ones(3)}) + tm.assert_frame_equal(result, expected) + + def test_replace_bytes(self, frame_or_series): + # GH#38900 + obj = frame_or_series(["o"]).astype("|S") + expected = obj.copy() + obj = obj.replace({None: np.nan}) + tm.assert_equal(obj, expected) + + @pytest.mark.parametrize( + "data, to_replace, value, expected", + [ + ([1], [1.0], [0], [0]), + ([1], [1], [0], [0]), + ([1.0], [1.0], [0], [0.0]), + ([1.0], [1], [0], [0.0]), + ], + ) + @pytest.mark.parametrize("box", [list, tuple, np.array]) + def test_replace_list_with_mixed_type( + self, data, to_replace, value, expected, box, frame_or_series + ): + # GH#40371 + obj = frame_or_series(data) + expected = frame_or_series(expected) + result = obj.replace(box(to_replace), value) + tm.assert_equal(result, expected) + + +class TestDataFrameReplaceRegex: + @pytest.mark.parametrize( + "data", + [ + {"a": list("ab.."), "b": list("efgh")}, + {"a": list("ab.."), "b": list(range(4))}, + ], + ) + @pytest.mark.parametrize( + "to_replace,value", [(r"\s*\.\s*", np.nan), (r"\s*(\.)\s*", r"\1\1\1")] + ) + @pytest.mark.parametrize("compile_regex", [True, False]) + @pytest.mark.parametrize("regex_kwarg", [True, False]) + @pytest.mark.parametrize("inplace", [True, False]) + def test_regex_replace_scalar( + self, data, to_replace, value, compile_regex, regex_kwarg, inplace + ): + df = DataFrame(data) + expected = df.copy() + + if compile_regex: + to_replace = re.compile(to_replace) + + if regex_kwarg: + regex = to_replace + to_replace = None + else: + regex = True + + result = df.replace(to_replace, value, inplace=inplace, regex=regex) + + if inplace: + assert result is None + result = df + + if value is np.nan: + expected_replace_val = np.nan + else: + expected_replace_val = "..." + + expected.loc[expected["a"] == ".", "a"] = expected_replace_val + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("regex", [False, True]) + def test_replace_regex_dtype_frame(self, regex): + # GH-48644 + df1 = DataFrame({"A": ["0"], "B": ["0"]}) + expected_df1 = DataFrame({"A": [1], "B": [1]}) + result_df1 = df1.replace(to_replace="0", value=1, regex=regex) + tm.assert_frame_equal(result_df1, expected_df1) + + df2 = DataFrame({"A": ["0"], "B": ["1"]}) + expected_df2 = DataFrame({"A": [1], "B": ["1"]}) + result_df2 = df2.replace(to_replace="0", value=1, regex=regex) + tm.assert_frame_equal(result_df2, expected_df2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_reset_index.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_reset_index.py new file mode 100644 index 0000000..8130c4f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_reset_index.py @@ -0,0 +1,710 @@ +from datetime import datetime +from itertools import product + +import numpy as np +import pytest + +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, +) + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Index, + Interval, + IntervalIndex, + MultiIndex, + RangeIndex, + Series, + Timestamp, + cut, + date_range, +) +import pandas._testing as tm + + +class TestResetIndex: + def test_reset_index_empty_rangeindex(self): + # GH#45230 + df = DataFrame( + columns=["brand"], dtype=np.int64, index=RangeIndex(0, 0, 1, name="foo") + ) + + df2 = df.set_index([df.index, "brand"]) + + result = df2.reset_index([1], drop=True) + tm.assert_frame_equal(result, df[[]], check_index_type=True) + + def test_set_reset(self): + + idx = Index([2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10], name="foo") + + # set/reset + df = DataFrame({"A": [0, 1, 2]}, index=idx) + result = df.reset_index() + assert result["foo"].dtype == np.dtype("uint64") + + df = result.set_index("foo") + tm.assert_index_equal(df.index, idx) + + def test_set_index_reset_index_dt64tz(self): + + idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") + + # set/reset + df = DataFrame({"A": [0, 1, 2]}, index=idx) + result = df.reset_index() + assert result["foo"].dtype == "datetime64[ns, US/Eastern]" + + df = result.set_index("foo") + tm.assert_index_equal(df.index, idx) + + def test_reset_index_tz(self, tz_aware_fixture): + # GH 3950 + # reset_index with single level + tz = tz_aware_fixture + idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx") + df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx) + + expected = DataFrame( + { + "idx": [ + datetime(2011, 1, 1), + datetime(2011, 1, 2), + datetime(2011, 1, 3), + datetime(2011, 1, 4), + datetime(2011, 1, 5), + ], + "a": range(5), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx", "a", "b"], + ) + expected["idx"] = expected["idx"].apply(lambda d: Timestamp(d, tz=tz)) + tm.assert_frame_equal(df.reset_index(), expected) + + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_frame_reset_index_tzaware_index(self, tz): + dr = date_range("2012-06-02", periods=10, tz=tz) + df = DataFrame(np.random.randn(len(dr)), dr) + roundtripped = df.reset_index().set_index("index") + xp = df.index.tz + rs = roundtripped.index.tz + assert xp == rs + + def test_reset_index_with_intervals(self): + idx = IntervalIndex.from_breaks(np.arange(11), name="x") + original = DataFrame({"x": idx, "y": np.arange(10)})[["x", "y"]] + + result = original.set_index("x") + expected = DataFrame({"y": np.arange(10)}, index=idx) + tm.assert_frame_equal(result, expected) + + result2 = result.reset_index() + tm.assert_frame_equal(result2, original) + + def test_reset_index(self, float_frame): + stacked = float_frame.stack()[::2] + stacked = DataFrame({"foo": stacked, "bar": stacked}) + + names = ["first", "second"] + stacked.index.names = names + deleveled = stacked.reset_index() + for i, (lev, level_codes) in enumerate( + zip(stacked.index.levels, stacked.index.codes) + ): + values = lev.take(level_codes) + name = names[i] + tm.assert_index_equal(values, Index(deleveled[name])) + + stacked.index.names = [None, None] + deleveled2 = stacked.reset_index() + tm.assert_series_equal( + deleveled["first"], deleveled2["level_0"], check_names=False + ) + tm.assert_series_equal( + deleveled["second"], deleveled2["level_1"], check_names=False + ) + + # default name assigned + rdf = float_frame.reset_index() + exp = Series(float_frame.index.values, name="index") + tm.assert_series_equal(rdf["index"], exp) + + # default name assigned, corner case + df = float_frame.copy() + df["index"] = "foo" + rdf = df.reset_index() + exp = Series(float_frame.index.values, name="level_0") + tm.assert_series_equal(rdf["level_0"], exp) + + # but this is ok + float_frame.index.name = "index" + deleveled = float_frame.reset_index() + tm.assert_series_equal(deleveled["index"], Series(float_frame.index)) + tm.assert_index_equal(deleveled.index, Index(range(len(deleveled))), exact=True) + + # preserve column names + float_frame.columns.name = "columns" + reset = float_frame.reset_index() + assert reset.columns.name == "columns" + + # only remove certain columns + df = float_frame.reset_index().set_index(["index", "A", "B"]) + rs = df.reset_index(["A", "B"]) + + tm.assert_frame_equal(rs, float_frame) + + rs = df.reset_index(["index", "A", "B"]) + tm.assert_frame_equal(rs, float_frame.reset_index()) + + rs = df.reset_index(["index", "A", "B"]) + tm.assert_frame_equal(rs, float_frame.reset_index()) + + rs = df.reset_index("A") + xp = float_frame.reset_index().set_index(["index", "B"]) + tm.assert_frame_equal(rs, xp) + + # test resetting in place + df = float_frame.copy() + reset = float_frame.reset_index() + return_value = df.reset_index(inplace=True) + assert return_value is None + tm.assert_frame_equal(df, reset) + + df = float_frame.reset_index().set_index(["index", "A", "B"]) + rs = df.reset_index("A", drop=True) + xp = float_frame.copy() + del xp["A"] + xp = xp.set_index(["B"], append=True) + tm.assert_frame_equal(rs, xp) + + def test_reset_index_name(self): + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=["A", "B", "C", "D"], + index=Index(range(2), name="x"), + ) + assert df.reset_index().index.name is None + assert df.reset_index(drop=True).index.name is None + return_value = df.reset_index(inplace=True) + assert return_value is None + assert df.index.name is None + + def test_reset_index_level(self): + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"]) + + for levels in ["A", "B"], [0, 1]: + # With MultiIndex + result = df.set_index(["A", "B"]).reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = df.set_index(["A", "B"]).reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = df.set_index(["A", "B"]).reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True) + tm.assert_frame_equal(result, df[["C", "D"]]) + + # With single-level Index (GH 16263) + result = df.set_index("A").reset_index(level=levels[0]) + tm.assert_frame_equal(result, df) + + result = df.set_index("A").reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df) + + result = df.set_index(["A"]).reset_index(level=levels[0], drop=True) + tm.assert_frame_equal(result, df[["B", "C", "D"]]) + + # Missing levels - for both MultiIndex and single-level Index: + for idx_lev in ["A", "B"], ["A"]: + with pytest.raises(KeyError, match=r"(L|l)evel \(?E\)?"): + df.set_index(idx_lev).reset_index(level=["A", "E"]) + with pytest.raises(IndexError, match="Too many levels"): + df.set_index(idx_lev).reset_index(level=[0, 1, 2]) + + def test_reset_index_right_dtype(self): + time = np.arange(0.0, 10, np.sqrt(2) / 2) + s1 = Series( + (9.81 * time ** 2) / 2, index=Index(time, name="time"), name="speed" + ) + df = DataFrame(s1) + + reset = s1.reset_index() + assert reset["time"].dtype == np.float64 + + reset = df.reset_index() + assert reset["time"].dtype == np.float64 + + def test_reset_index_multiindex_col(self): + vals = np.random.randn(3, 3).astype(object) + idx = ["x", "y", "z"] + full = np.hstack(([[x] for x in idx], vals)) + df = DataFrame( + vals, + Index(idx, name="a"), + columns=[["b", "b", "c"], ["mean", "median", "mean"]], + ) + rs = df.reset_index() + xp = DataFrame( + full, columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index(col_fill=None) + xp = DataFrame( + full, columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index(col_level=1, col_fill="blah") + xp = DataFrame( + full, columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + df = DataFrame( + vals, + MultiIndex.from_arrays([[0, 1, 2], ["x", "y", "z"]], names=["d", "a"]), + columns=[["b", "b", "c"], ["mean", "median", "mean"]], + ) + rs = df.reset_index("a") + xp = DataFrame( + full, + Index([0, 1, 2], name="d"), + columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index("a", col_fill=None) + xp = DataFrame( + full, + Index(range(3), name="d"), + columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index("a", col_fill="blah", col_level=1) + xp = DataFrame( + full, + Index(range(3), name="d"), + columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + def test_reset_index_multiindex_nan(self): + # GH#6322, testing reset_index on MultiIndexes + # when we have a nan or all nan + df = DataFrame( + {"A": ["a", "b", "c"], "B": [0, 1, np.nan], "C": np.random.rand(3)} + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame( + {"A": [np.nan, "b", "c"], "B": [0, 1, 2], "C": np.random.rand(3)} + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame({"A": ["a", "b", "c"], "B": [0, 1, 2], "C": [np.nan, 1.1, 2.2]}) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame( + { + "A": ["a", "b", "c"], + "B": [np.nan, np.nan, np.nan], + "C": np.random.rand(3), + } + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + @pytest.mark.parametrize( + "name", + [ + None, + "foo", + 2, + 3.0, + pd.Timedelta(6), + Timestamp("2012-12-30", tz="UTC"), + "2012-12-31", + ], + ) + def test_reset_index_with_datetimeindex_cols(self, name): + # GH#5818 + warn = None + if isinstance(name, Timestamp) and name.tz is not None: + # _deprecate_mismatched_indexing + warn = FutureWarning + + df = DataFrame( + [[1, 2], [3, 4]], + columns=date_range("1/1/2013", "1/2/2013"), + index=["A", "B"], + ) + df.index.name = name + + with tm.assert_produces_warning(warn): + result = df.reset_index() + + item = name if name is not None else "index" + columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)]) + if isinstance(item, str) and item == "2012-12-31": + columns = columns.astype("datetime64[ns]") + else: + assert columns.dtype == object + + expected = DataFrame( + [["A", 1, 2], ["B", 3, 4]], + columns=columns, + ) + tm.assert_frame_equal(result, expected) + + def test_reset_index_range(self): + # GH#12071 + df = DataFrame([[0, 0], [1, 1]], columns=["A", "B"], index=RangeIndex(stop=2)) + result = df.reset_index() + assert isinstance(result.index, RangeIndex) + expected = DataFrame( + [[0, 0, 0], [1, 1, 1]], + columns=["index", "A", "B"], + index=RangeIndex(stop=2), + ) + tm.assert_frame_equal(result, expected) + + def test_reset_index_multiindex_columns(self): + levels = [["A", ""], ["B", "b"]] + df = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels)) + result = df[["B"]].rename_axis("A").reset_index() + tm.assert_frame_equal(result, df) + + # GH#16120: already existing column + msg = r"cannot insert \('A', ''\), already exists" + with pytest.raises(ValueError, match=msg): + df.rename_axis("A").reset_index() + + # GH#16164: multiindex (tuple) full key + result = df.set_index([("A", "")]).reset_index() + tm.assert_frame_equal(result, df) + + # with additional (unnamed) index level + idx_col = DataFrame( + [[0], [1]], columns=MultiIndex.from_tuples([("level_0", "")]) + ) + expected = pd.concat([idx_col, df[[("B", "b"), ("A", "")]]], axis=1) + result = df.set_index([("B", "b")], append=True).reset_index() + tm.assert_frame_equal(result, expected) + + # with index name which is a too long tuple... + msg = "Item must have length equal to number of levels." + with pytest.raises(ValueError, match=msg): + df.rename_axis([("C", "c", "i")]).reset_index() + + # or too short... + levels = [["A", "a", ""], ["B", "b", "i"]] + df2 = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels)) + idx_col = DataFrame( + [[0], [1]], columns=MultiIndex.from_tuples([("C", "c", "ii")]) + ) + expected = pd.concat([idx_col, df2], axis=1) + result = df2.rename_axis([("C", "c")]).reset_index(col_fill="ii") + tm.assert_frame_equal(result, expected) + + # ... which is incompatible with col_fill=None + with pytest.raises( + ValueError, + match=( + "col_fill=None is incompatible with " + r"incomplete column name \('C', 'c'\)" + ), + ): + df2.rename_axis([("C", "c")]).reset_index(col_fill=None) + + # with col_level != 0 + result = df2.rename_axis([("c", "ii")]).reset_index(col_level=1, col_fill="C") + tm.assert_frame_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") + def test_reset_index_datetime(self, tz_naive_fixture): + # GH#3950 + tz = tz_naive_fixture + idx1 = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") + idx2 = Index(range(5), name="idx2", dtype="int64") + idx = MultiIndex.from_arrays([idx1, idx2]) + df = DataFrame( + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, + index=idx, + ) + + expected = DataFrame( + { + "idx1": [ + datetime(2011, 1, 1), + datetime(2011, 1, 2), + datetime(2011, 1, 3), + datetime(2011, 1, 4), + datetime(2011, 1, 5), + ], + "idx2": np.arange(5, dtype="int64"), + "a": np.arange(5, dtype="int64"), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx1", "idx2", "a", "b"], + ) + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) + + tm.assert_frame_equal(df.reset_index(), expected) + + idx3 = date_range( + "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" + ) + idx = MultiIndex.from_arrays([idx1, idx2, idx3]) + df = DataFrame( + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, + index=idx, + ) + + expected = DataFrame( + { + "idx1": [ + datetime(2011, 1, 1), + datetime(2011, 1, 2), + datetime(2011, 1, 3), + datetime(2011, 1, 4), + datetime(2011, 1, 5), + ], + "idx2": np.arange(5, dtype="int64"), + "idx3": [ + datetime(2012, 1, 1), + datetime(2012, 2, 1), + datetime(2012, 3, 1), + datetime(2012, 4, 1), + datetime(2012, 5, 1), + ], + "a": np.arange(5, dtype="int64"), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx1", "idx2", "idx3", "a", "b"], + ) + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) + expected["idx3"] = expected["idx3"].apply( + lambda d: Timestamp(d, tz="Europe/Paris") + ) + tm.assert_frame_equal(df.reset_index(), expected) + + # GH#7793 + idx = MultiIndex.from_product( + [["a", "b"], date_range("20130101", periods=3, tz=tz)] + ) + df = DataFrame( + np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx + ) + + expected = DataFrame( + { + "level_0": "a a a b b b".split(), + "level_1": [ + datetime(2013, 1, 1), + datetime(2013, 1, 2), + datetime(2013, 1, 3), + ] + * 2, + "a": np.arange(6, dtype="int64"), + }, + columns=["level_0", "level_1", "a"], + ) + expected["level_1"] = expected["level_1"].apply(lambda d: Timestamp(d, tz=tz)) + result = df.reset_index() + tm.assert_frame_equal(result, expected) + + def test_reset_index_period(self): + # GH#7746 + idx = MultiIndex.from_product( + [pd.period_range("20130101", periods=3, freq="M"), list("abc")], + names=["month", "feature"], + ) + + df = DataFrame( + np.arange(9, dtype="int64").reshape(-1, 1), index=idx, columns=["a"] + ) + expected = DataFrame( + { + "month": ( + [pd.Period("2013-01", freq="M")] * 3 + + [pd.Period("2013-02", freq="M")] * 3 + + [pd.Period("2013-03", freq="M")] * 3 + ), + "feature": ["a", "b", "c"] * 3, + "a": np.arange(9, dtype="int64"), + }, + columns=["month", "feature", "a"], + ) + result = df.reset_index() + tm.assert_frame_equal(result, expected) + + def test_reset_index_delevel_infer_dtype(self): + tuples = list(product(["foo", "bar"], [10, 20], [1.0, 1.1])) + index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"]) + df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index) + deleveled = df.reset_index() + assert is_integer_dtype(deleveled["prm1"]) + assert is_float_dtype(deleveled["prm2"]) + + def test_reset_index_with_drop( + self, multiindex_year_month_day_dataframe_random_data + ): + ymd = multiindex_year_month_day_dataframe_random_data + + deleveled = ymd.reset_index(drop=True) + assert len(deleveled.columns) == len(ymd.columns) + assert deleveled.index.name == ymd.index.name + + @pytest.mark.parametrize( + "ix_data, exp_data", + [ + ( + [(pd.NaT, 1), (pd.NaT, 2)], + {"a": [pd.NaT, pd.NaT], "b": [1, 2], "x": [11, 12]}, + ), + ( + [(pd.NaT, 1), (Timestamp("2020-01-01"), 2)], + {"a": [pd.NaT, Timestamp("2020-01-01")], "b": [1, 2], "x": [11, 12]}, + ), + ( + [(pd.NaT, 1), (pd.Timedelta(123, "d"), 2)], + {"a": [pd.NaT, pd.Timedelta(123, "d")], "b": [1, 2], "x": [11, 12]}, + ), + ], + ) + def test_reset_index_nat_multiindex(self, ix_data, exp_data): + # GH#36541: that reset_index() does not raise ValueError + ix = MultiIndex.from_tuples(ix_data, names=["a", "b"]) + result = DataFrame({"x": [11, 12]}, index=ix) + result = result.reset_index() + + expected = DataFrame(exp_data) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "codes", ([[0, 0, 1, 1], [0, 1, 0, 1]], [[0, 0, -1, 1], [0, 1, 0, 1]]) + ) + def test_rest_index_multiindex_categorical_with_missing_values(self, codes): + # GH#24206 + + index = MultiIndex( + [CategoricalIndex(["A", "B"]), CategoricalIndex(["a", "b"])], codes + ) + data = {"col": range(len(index))} + df = DataFrame(data=data, index=index) + + expected = DataFrame( + { + "level_0": Categorical.from_codes(codes[0], categories=["A", "B"]), + "level_1": Categorical.from_codes(codes[1], categories=["a", "b"]), + "col": range(4), + } + ) + + res = df.reset_index() + tm.assert_frame_equal(res, expected) + + # roundtrip + res = expected.set_index(["level_0", "level_1"]).reset_index() + tm.assert_frame_equal(res, expected) + + +@pytest.mark.parametrize( + "array, dtype", + [ + (["a", "b"], object), + ( + pd.period_range("12-1-2000", periods=2, freq="Q-DEC"), + pd.PeriodDtype(freq="Q-DEC"), + ), + ], +) +def test_reset_index_dtypes_on_empty_frame_with_multiindex(array, dtype): + # GH 19602 - Preserve dtype on empty DataFrame with MultiIndex + idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array]) + result = DataFrame(index=idx)[:0].reset_index().dtypes + expected = Series({"level_0": np.int64, "level_1": np.float64, "level_2": dtype}) + tm.assert_series_equal(result, expected) + + +def test_reset_index_empty_frame_with_datetime64_multiindex(): + # https://github.com/pandas-dev/pandas/issues/35606 + idx = MultiIndex( + levels=[[Timestamp("2020-07-20 00:00:00")], [3, 4]], + codes=[[], []], + names=["a", "b"], + ) + df = DataFrame(index=idx, columns=["c", "d"]) + result = df.reset_index() + expected = DataFrame( + columns=list("abcd"), index=RangeIndex(start=0, stop=0, step=1) + ) + expected["a"] = expected["a"].astype("datetime64[ns]") + expected["b"] = expected["b"].astype("int64") + tm.assert_frame_equal(result, expected) + + +def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby(): + # https://github.com/pandas-dev/pandas/issues/35657 + df = DataFrame({"c1": [10.0], "c2": ["a"], "c3": pd.to_datetime("2020-01-01")}) + df = df.head(0).groupby(["c2", "c3"])[["c1"]].sum() + result = df.reset_index() + expected = DataFrame( + columns=["c2", "c3", "c1"], index=RangeIndex(start=0, stop=0, step=1) + ) + expected["c3"] = expected["c3"].astype("datetime64[ns]") + expected["c1"] = expected["c1"].astype("float64") + tm.assert_frame_equal(result, expected) + + +def test_reset_index_multiindex_nat(): + # GH 11479 + idx = range(3) + tstamp = date_range("2015-07-01", freq="D", periods=3) + df = DataFrame({"id": idx, "tstamp": tstamp, "a": list("abc")}) + df.loc[2, "tstamp"] = pd.NaT + result = df.set_index(["id", "tstamp"]).reset_index("id") + expected = DataFrame( + {"id": range(3), "a": list("abc")}, + index=pd.DatetimeIndex(["2015-07-01", "2015-07-02", "NaT"], name="tstamp"), + ) + tm.assert_frame_equal(result, expected) + + +def test_drop_pos_args_deprecation(): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}).set_index("a") + msg = ( + r"In a future version of pandas all arguments of DataFrame\.reset_index " + r"except for the argument 'level' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.reset_index("a", False) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + +def test_reset_index_interval_columns_object_cast(): + # GH 19136 + df = DataFrame( + np.eye(2), index=Index([1, 2], name="Year"), columns=cut([1, 2], [0, 1, 2]) + ) + result = df.reset_index() + expected = DataFrame( + [[1, 1.0, 0.0], [2, 0.0, 1.0]], + columns=Index(["Year", Interval(0, 1), Interval(1, 2)]), + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_round.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_round.py new file mode 100644 index 0000000..dd92069 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_round.py @@ -0,0 +1,218 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameRound: + def test_round(self): + # GH#2665 + + # Test that rounding an empty DataFrame does nothing + df = DataFrame() + tm.assert_frame_equal(df, df.round()) + + # Here's the test frame we'll be working with + df = DataFrame({"col1": [1.123, 2.123, 3.123], "col2": [1.234, 2.234, 3.234]}) + + # Default round to integer (i.e. decimals=0) + expected_rounded = DataFrame({"col1": [1.0, 2.0, 3.0], "col2": [1.0, 2.0, 3.0]}) + tm.assert_frame_equal(df.round(), expected_rounded) + + # Round with an integer + decimals = 2 + expected_rounded = DataFrame( + {"col1": [1.12, 2.12, 3.12], "col2": [1.23, 2.23, 3.23]} + ) + tm.assert_frame_equal(df.round(decimals), expected_rounded) + + # This should also work with np.round (since np.round dispatches to + # df.round) + tm.assert_frame_equal(np.round(df, decimals), expected_rounded) + + # Round with a list + round_list = [1, 2] + msg = "decimals must be an integer, a dict-like or a Series" + with pytest.raises(TypeError, match=msg): + df.round(round_list) + + # Round with a dictionary + expected_rounded = DataFrame( + {"col1": [1.1, 2.1, 3.1], "col2": [1.23, 2.23, 3.23]} + ) + round_dict = {"col1": 1, "col2": 2} + tm.assert_frame_equal(df.round(round_dict), expected_rounded) + + # Incomplete dict + expected_partially_rounded = DataFrame( + {"col1": [1.123, 2.123, 3.123], "col2": [1.2, 2.2, 3.2]} + ) + partial_round_dict = {"col2": 1} + tm.assert_frame_equal(df.round(partial_round_dict), expected_partially_rounded) + + # Dict with unknown elements + wrong_round_dict = {"col3": 2, "col2": 1} + tm.assert_frame_equal(df.round(wrong_round_dict), expected_partially_rounded) + + # float input to `decimals` + non_int_round_dict = {"col1": 1, "col2": 0.5} + msg = "Values in decimals must be integers" + with pytest.raises(TypeError, match=msg): + df.round(non_int_round_dict) + + # String input + non_int_round_dict = {"col1": 1, "col2": "foo"} + with pytest.raises(TypeError, match=msg): + df.round(non_int_round_dict) + + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError, match=msg): + df.round(non_int_round_Series) + + # List input + non_int_round_dict = {"col1": 1, "col2": [1, 2]} + with pytest.raises(TypeError, match=msg): + df.round(non_int_round_dict) + + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError, match=msg): + df.round(non_int_round_Series) + + # Non integer Series inputs + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError, match=msg): + df.round(non_int_round_Series) + + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError, match=msg): + df.round(non_int_round_Series) + + # Negative numbers + negative_round_dict = {"col1": -1, "col2": -2} + big_df = df * 100 + expected_neg_rounded = DataFrame( + {"col1": [110.0, 210, 310], "col2": [100.0, 200, 300]} + ) + tm.assert_frame_equal(big_df.round(negative_round_dict), expected_neg_rounded) + + # nan in Series round + nan_round_Series = Series({"col1": np.nan, "col2": 1}) + + with pytest.raises(TypeError, match=msg): + df.round(nan_round_Series) + + # Make sure this doesn't break existing Series.round + tm.assert_series_equal(df["col1"].round(1), expected_rounded["col1"]) + + # named columns + # GH#11986 + decimals = 2 + expected_rounded = DataFrame( + {"col1": [1.12, 2.12, 3.12], "col2": [1.23, 2.23, 3.23]} + ) + df.columns.name = "cols" + expected_rounded.columns.name = "cols" + tm.assert_frame_equal(df.round(decimals), expected_rounded) + + # interaction of named columns & series + tm.assert_series_equal(df["col1"].round(decimals), expected_rounded["col1"]) + tm.assert_series_equal(df.round(decimals)["col1"], expected_rounded["col1"]) + + def test_round_numpy(self): + # GH#12600 + df = DataFrame([[1.53, 1.36], [0.06, 7.01]]) + out = np.round(df, decimals=0) + expected = DataFrame([[2.0, 1.0], [0.0, 7.0]]) + tm.assert_frame_equal(out, expected) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.round(df, decimals=0, out=df) + + def test_round_numpy_with_nan(self): + # See GH#14197 + df = Series([1.53, np.nan, 0.06]).to_frame() + with tm.assert_produces_warning(None): + result = df.round() + expected = Series([2.0, np.nan, 0.0]).to_frame() + tm.assert_frame_equal(result, expected) + + def test_round_mixed_type(self): + # GH#11885 + df = DataFrame( + { + "col1": [1.1, 2.2, 3.3, 4.4], + "col2": ["1", "a", "c", "f"], + "col3": date_range("20111111", periods=4), + } + ) + round_0 = DataFrame( + { + "col1": [1.0, 2.0, 3.0, 4.0], + "col2": ["1", "a", "c", "f"], + "col3": date_range("20111111", periods=4), + } + ) + tm.assert_frame_equal(df.round(), round_0) + tm.assert_frame_equal(df.round(1), df) + tm.assert_frame_equal(df.round({"col1": 1}), df) + tm.assert_frame_equal(df.round({"col1": 0}), round_0) + tm.assert_frame_equal(df.round({"col1": 0, "col2": 1}), round_0) + tm.assert_frame_equal(df.round({"col3": 1}), df) + + def test_round_with_duplicate_columns(self): + # GH#11611 + + df = DataFrame( + np.random.random([3, 3]), + columns=["A", "B", "C"], + index=["first", "second", "third"], + ) + + dfs = pd.concat((df, df), axis=1) + rounded = dfs.round() + tm.assert_index_equal(rounded.index, dfs.index) + + decimals = Series([1, 0, 2], index=["A", "B", "A"]) + msg = "Index of decimals must be unique" + with pytest.raises(ValueError, match=msg): + df.round(decimals) + + def test_round_builtin(self): + # GH#11763 + # Here's the test frame we'll be working with + df = DataFrame({"col1": [1.123, 2.123, 3.123], "col2": [1.234, 2.234, 3.234]}) + + # Default round to integer (i.e. decimals=0) + expected_rounded = DataFrame({"col1": [1.0, 2.0, 3.0], "col2": [1.0, 2.0, 3.0]}) + tm.assert_frame_equal(round(df), expected_rounded) + + def test_round_nonunique_categorical(self): + # See GH#21809 + idx = pd.CategoricalIndex(["low"] * 3 + ["hi"] * 3) + df = DataFrame(np.random.rand(6, 3), columns=list("abc")) + + expected = df.round(3) + expected.index = idx + + df_categorical = df.copy().set_index(idx) + assert df_categorical.shape == (6, 3) + result = df_categorical.round(3) + assert result.shape == (6, 3) + + tm.assert_frame_equal(result, expected) + + def test_round_interval_category_columns(self): + # GH#30063 + columns = pd.CategoricalIndex(pd.interval_range(0, 2)) + df = DataFrame([[0.66, 1.1], [0.3, 0.25]], columns=columns) + + result = df.round() + expected = DataFrame([[1.0, 1.0], [0.0, 0.0]], columns=columns) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_sample.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_sample.py new file mode 100644 index 0000000..901987a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_sample.py @@ -0,0 +1,365 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm +import pandas.core.common as com + + +class TestSample: + @pytest.fixture(params=[Series, DataFrame]) + def obj(self, request): + klass = request.param + if klass is Series: + arr = np.random.randn(10) + else: + arr = np.random.randn(10, 10) + return klass(arr, dtype=None) + + @pytest.mark.parametrize("test", list(range(10))) + def test_sample(self, test, obj): + # Fixes issue: 2419 + # Check behavior of random_state argument + # Check for stability when receives seed or random state -- run 10 + # times. + + seed = np.random.randint(0, 100) + tm.assert_equal( + obj.sample(n=4, random_state=seed), obj.sample(n=4, random_state=seed) + ) + + tm.assert_equal( + obj.sample(frac=0.7, random_state=seed), + obj.sample(frac=0.7, random_state=seed), + ) + + tm.assert_equal( + obj.sample(n=4, random_state=np.random.RandomState(test)), + obj.sample(n=4, random_state=np.random.RandomState(test)), + ) + + tm.assert_equal( + obj.sample(frac=0.7, random_state=np.random.RandomState(test)), + obj.sample(frac=0.7, random_state=np.random.RandomState(test)), + ) + + tm.assert_equal( + obj.sample(frac=2, replace=True, random_state=np.random.RandomState(test)), + obj.sample(frac=2, replace=True, random_state=np.random.RandomState(test)), + ) + + os1, os2 = [], [] + for _ in range(2): + np.random.seed(test) + os1.append(obj.sample(n=4)) + os2.append(obj.sample(frac=0.7)) + tm.assert_equal(*os1) + tm.assert_equal(*os2) + + def test_sample_lengths(self, obj): + # Check lengths are right + assert len(obj.sample(n=4) == 4) + assert len(obj.sample(frac=0.34) == 3) + assert len(obj.sample(frac=0.36) == 4) + + def test_sample_invalid_random_state(self, obj): + # Check for error when random_state argument invalid. + msg = ( + "random_state must be an integer, array-like, a BitGenerator, Generator, " + "a numpy RandomState, or None" + ) + with pytest.raises(ValueError, match=msg): + obj.sample(random_state="a_string") + + def test_sample_wont_accept_n_and_frac(self, obj): + # Giving both frac and N throws error + msg = "Please enter a value for `frac` OR `n`, not both" + with pytest.raises(ValueError, match=msg): + obj.sample(n=3, frac=0.3) + + def test_sample_requires_positive_n_frac(self, obj): + with pytest.raises( + ValueError, + match="A negative number of rows requested. Please provide `n` >= 0", + ): + obj.sample(n=-3) + with pytest.raises( + ValueError, + match="A negative number of rows requested. Please provide `frac` >= 0", + ): + obj.sample(frac=-0.3) + + def test_sample_requires_integer_n(self, obj): + # Make sure float values of `n` give error + with pytest.raises(ValueError, match="Only integers accepted as `n` values"): + obj.sample(n=3.2) + + def test_sample_invalid_weight_lengths(self, obj): + # Weight length must be right + msg = "Weights and axis to be sampled must be of same length" + with pytest.raises(ValueError, match=msg): + obj.sample(n=3, weights=[0, 1]) + + with pytest.raises(ValueError, match=msg): + bad_weights = [0.5] * 11 + obj.sample(n=3, weights=bad_weights) + + with pytest.raises(ValueError, match="Fewer non-zero entries in p than size"): + bad_weight_series = Series([0, 0, 0.2]) + obj.sample(n=4, weights=bad_weight_series) + + def test_sample_negative_weights(self, obj): + # Check won't accept negative weights + bad_weights = [-0.1] * 10 + msg = "weight vector many not include negative values" + with pytest.raises(ValueError, match=msg): + obj.sample(n=3, weights=bad_weights) + + def test_sample_inf_weights(self, obj): + # Check inf and -inf throw errors: + + weights_with_inf = [0.1] * 10 + weights_with_inf[0] = np.inf + msg = "weight vector may not include `inf` values" + with pytest.raises(ValueError, match=msg): + obj.sample(n=3, weights=weights_with_inf) + + weights_with_ninf = [0.1] * 10 + weights_with_ninf[0] = -np.inf + with pytest.raises(ValueError, match=msg): + obj.sample(n=3, weights=weights_with_ninf) + + def test_sample_zero_weights(self, obj): + # All zeros raises errors + + zero_weights = [0] * 10 + with pytest.raises(ValueError, match="Invalid weights: weights sum to zero"): + obj.sample(n=3, weights=zero_weights) + + def test_sample_missing_weights(self, obj): + # All missing weights + + nan_weights = [np.nan] * 10 + with pytest.raises(ValueError, match="Invalid weights: weights sum to zero"): + obj.sample(n=3, weights=nan_weights) + + def test_sample_none_weights(self, obj): + # Check None are also replaced by zeros. + weights_with_None = [None] * 10 + weights_with_None[5] = 0.5 + tm.assert_equal( + obj.sample(n=1, axis=0, weights=weights_with_None), obj.iloc[5:6] + ) + + @pytest.mark.parametrize( + "func_str,arg", + [ + ("np.array", [2, 3, 1, 0]), + ("np.random.MT19937", 3), + ("np.random.PCG64", 11), + ], + ) + def test_sample_random_state(self, func_str, arg, frame_or_series): + # GH#32503 + obj = DataFrame({"col1": range(10, 20), "col2": range(20, 30)}) + obj = tm.get_obj(obj, frame_or_series) + result = obj.sample(n=3, random_state=eval(func_str)(arg)) + expected = obj.sample(n=3, random_state=com.random_state(eval(func_str)(arg))) + tm.assert_equal(result, expected) + + def test_sample_generator(self, frame_or_series): + # GH#38100 + obj = frame_or_series(np.arange(100)) + rng = np.random.default_rng() + + # Consecutive calls should advance the seed + result1 = obj.sample(n=50, random_state=rng) + result2 = obj.sample(n=50, random_state=rng) + assert not (result1.index.values == result2.index.values).all() + + # Matching generator initialization must give same result + # Consecutive calls should advance the seed + result1 = obj.sample(n=50, random_state=np.random.default_rng(11)) + result2 = obj.sample(n=50, random_state=np.random.default_rng(11)) + tm.assert_equal(result1, result2) + + def test_sample_upsampling_without_replacement(self, frame_or_series): + # GH#27451 + + obj = DataFrame({"A": list("abc")}) + obj = tm.get_obj(obj, frame_or_series) + + msg = ( + "Replace has to be set to `True` when " + "upsampling the population `frac` > 1." + ) + with pytest.raises(ValueError, match=msg): + obj.sample(frac=2, replace=False) + + +class TestSampleDataFrame: + # Tests which are relevant only for DataFrame, so these are + # as fully parametrized as they can get. + + def test_sample(self): + # GH#2419 + # additional specific object based tests + + # A few dataframe test with degenerate weights. + easy_weight_list = [0] * 10 + easy_weight_list[5] = 1 + + df = DataFrame( + { + "col1": range(10, 20), + "col2": range(20, 30), + "colString": ["a"] * 10, + "easyweights": easy_weight_list, + } + ) + sample1 = df.sample(n=1, weights="easyweights") + tm.assert_frame_equal(sample1, df.iloc[5:6]) + + # Ensure proper error if string given as weight for Series or + # DataFrame with axis = 1. + ser = Series(range(10)) + msg = "Strings cannot be passed as weights when sampling from a Series." + with pytest.raises(ValueError, match=msg): + ser.sample(n=3, weights="weight_column") + + msg = ( + "Strings can only be passed to weights when sampling from rows on a " + "DataFrame" + ) + with pytest.raises(ValueError, match=msg): + df.sample(n=1, weights="weight_column", axis=1) + + # Check weighting key error + with pytest.raises( + KeyError, match="'String passed to weights not a valid column'" + ): + df.sample(n=3, weights="not_a_real_column_name") + + # Check that re-normalizes weights that don't sum to one. + weights_less_than_1 = [0] * 10 + weights_less_than_1[0] = 0.5 + tm.assert_frame_equal(df.sample(n=1, weights=weights_less_than_1), df.iloc[:1]) + + ### + # Test axis argument + ### + + # Test axis argument + df = DataFrame({"col1": range(10), "col2": ["a"] * 10}) + second_column_weight = [0, 1] + tm.assert_frame_equal( + df.sample(n=1, axis=1, weights=second_column_weight), df[["col2"]] + ) + + # Different axis arg types + tm.assert_frame_equal( + df.sample(n=1, axis="columns", weights=second_column_weight), df[["col2"]] + ) + + weight = [0] * 10 + weight[5] = 0.5 + tm.assert_frame_equal(df.sample(n=1, axis="rows", weights=weight), df.iloc[5:6]) + tm.assert_frame_equal( + df.sample(n=1, axis="index", weights=weight), df.iloc[5:6] + ) + + # Check out of range axis values + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.sample(n=1, axis=2) + + msg = "No axis named not_a_name for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.sample(n=1, axis="not_a_name") + + ser = Series(range(10)) + with pytest.raises(ValueError, match="No axis named 1 for object type Series"): + ser.sample(n=1, axis=1) + + # Test weight length compared to correct axis + msg = "Weights and axis to be sampled must be of same length" + with pytest.raises(ValueError, match=msg): + df.sample(n=1, axis=1, weights=[0.5] * 10) + + def test_sample_axis1(self): + # Check weights with axis = 1 + easy_weight_list = [0] * 3 + easy_weight_list[2] = 1 + + df = DataFrame( + {"col1": range(10, 20), "col2": range(20, 30), "colString": ["a"] * 10} + ) + sample1 = df.sample(n=1, axis=1, weights=easy_weight_list) + tm.assert_frame_equal(sample1, df[["colString"]]) + + # Test default axes + tm.assert_frame_equal( + df.sample(n=3, random_state=42), df.sample(n=3, axis=0, random_state=42) + ) + + def test_sample_aligns_weights_with_frame(self): + + # Test that function aligns weights with frame + df = DataFrame({"col1": [5, 6, 7], "col2": ["a", "b", "c"]}, index=[9, 5, 3]) + ser = Series([1, 0, 0], index=[3, 5, 9]) + tm.assert_frame_equal(df.loc[[3]], df.sample(1, weights=ser)) + + # Weights have index values to be dropped because not in + # sampled DataFrame + ser2 = Series([0.001, 0, 10000], index=[3, 5, 10]) + tm.assert_frame_equal(df.loc[[3]], df.sample(1, weights=ser2)) + + # Weights have empty values to be filed with zeros + ser3 = Series([0.01, 0], index=[3, 5]) + tm.assert_frame_equal(df.loc[[3]], df.sample(1, weights=ser3)) + + # No overlap in weight and sampled DataFrame indices + ser4 = Series([1, 0], index=[1, 2]) + + with pytest.raises(ValueError, match="Invalid weights: weights sum to zero"): + df.sample(1, weights=ser4) + + def test_sample_is_copy(self): + # GH#27357, GH#30784: ensure the result of sample is an actual copy and + # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + df2 = df.sample(3) + + with tm.assert_produces_warning(None): + df2["d"] = 1 + + def test_sample_does_not_modify_weights(self): + # GH-42843 + result = np.array([np.nan, 1, np.nan]) + expected = result.copy() + ser = Series([1, 2, 3]) + + # Test numpy array weights won't be modified in place + ser.sample(weights=result) + tm.assert_numpy_array_equal(result, expected) + + # Test DataFrame column won't be modified in place + df = DataFrame({"values": [1, 1, 1], "weights": [1, np.nan, np.nan]}) + expected = df["weights"].copy() + + df.sample(frac=1.0, replace=True, weights="weights") + result = df["weights"] + tm.assert_series_equal(result, expected) + + def test_sample_ignore_index(self): + # GH 38581 + df = DataFrame( + {"col1": range(10, 20), "col2": range(20, 30), "colString": ["a"] * 10} + ) + result = df.sample(3, ignore_index=True) + expected_index = Index(range(3)) + tm.assert_index_equal(result.index, expected_index, exact=True) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_select_dtypes.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_select_dtypes.py new file mode 100644 index 0000000..4cfd997 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_select_dtypes.py @@ -0,0 +1,443 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import ExtensionDtype + +import pandas as pd +from pandas import ( + DataFrame, + Timestamp, +) +import pandas._testing as tm +from pandas.core.arrays import ExtensionArray + + +class DummyDtype(ExtensionDtype): + type = int + + def __init__(self, numeric): + self._numeric = numeric + + @property + def name(self): + return "Dummy" + + @property + def _is_numeric(self): + return self._numeric + + +class DummyArray(ExtensionArray): + def __init__(self, data, dtype): + self.data = data + self._dtype = dtype + + def __array__(self, dtype): + return self.data + + @property + def dtype(self): + return self._dtype + + def __len__(self) -> int: + return len(self.data) + + def __getitem__(self, item): + pass + + def copy(self): + return self + + +class TestSelectDtypes: + def test_select_dtypes_include_using_list_like(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=[np.number]) + ei = df[["b", "c", "d", "k"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=[np.number], exclude=["timedelta"]) + ei = df[["b", "c", "d"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=[np.number, "category"], exclude=["timedelta"]) + ei = df[["b", "c", "d", "f"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=["datetime"]) + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=["datetime64"]) + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=["datetimetz"]) + ei = df[["h", "i"]] + tm.assert_frame_equal(ri, ei) + + with pytest.raises(NotImplementedError, match=r"^$"): + df.select_dtypes(include=["period"]) + + def test_select_dtypes_exclude_using_list_like(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + } + ) + re = df.select_dtypes(exclude=[np.number]) + ee = df[["a", "e"]] + tm.assert_frame_equal(re, ee) + + def test_select_dtypes_exclude_include_using_list_like(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6, dtype="u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + exclude = (np.datetime64,) + include = np.bool_, "integer" + r = df.select_dtypes(include=include, exclude=exclude) + e = df[["b", "c", "e"]] + tm.assert_frame_equal(r, e) + + exclude = ("datetime",) + include = "bool", "int64", "int32" + r = df.select_dtypes(include=include, exclude=exclude) + e = df[["b", "e"]] + tm.assert_frame_equal(r, e) + + @pytest.mark.parametrize( + "include", [(np.bool_, "int"), (np.bool_, "integer"), ("bool", int)] + ) + def test_select_dtypes_exclude_include_int(self, include): + # Fix select_dtypes(include='int') for Windows, FYI #36596 + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6, dtype="int32"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + exclude = (np.datetime64,) + result = df.select_dtypes(include=include, exclude=exclude) + expected = df[["b", "c", "e"]] + tm.assert_frame_equal(result, expected) + + def test_select_dtypes_include_using_scalars(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=np.number) + ei = df[["b", "c", "d", "k"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include="datetime") + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include="datetime64") + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include="category") + ei = df[["f"]] + tm.assert_frame_equal(ri, ei) + + with pytest.raises(NotImplementedError, match=r"^$"): + df.select_dtypes(include="period") + + def test_select_dtypes_exclude_using_scalars(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(exclude=np.number) + ei = df[["a", "e", "f", "g", "h", "i", "j"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(exclude="category") + ei = df[["a", "b", "c", "d", "e", "g", "h", "i", "j", "k"]] + tm.assert_frame_equal(ri, ei) + + with pytest.raises(NotImplementedError, match=r"^$"): + df.select_dtypes(exclude="period") + + def test_select_dtypes_include_exclude_using_scalars(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=np.number, exclude="floating") + ei = df[["b", "c", "k"]] + tm.assert_frame_equal(ri, ei) + + def test_select_dtypes_include_exclude_mixed_scalars_lists(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=np.number, exclude=["floating", "timedelta"]) + ei = df[["b", "c"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=[np.number, "category"], exclude="floating") + ei = df[["b", "c", "f", "k"]] + tm.assert_frame_equal(ri, ei) + + def test_select_dtypes_duplicate_columns(self): + # GH20839 + df = DataFrame( + { + "a": ["a", "b", "c"], + "b": [1, 2, 3], + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + df.columns = ["a", "a", "b", "b", "b", "c"] + + expected = DataFrame( + {"a": list(range(1, 4)), "b": np.arange(3, 6).astype("u1")} + ) + + result = df.select_dtypes(include=[np.number], exclude=["floating"]) + tm.assert_frame_equal(result, expected) + + def test_select_dtypes_not_an_attr_but_still_valid_dtype(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + df["g"] = df.f.diff() + assert not hasattr(np, "u8") + r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"]) + e = df[["a", "b"]] + tm.assert_frame_equal(r, e) + + r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"]) + e = df[["a", "b", "g"]] + tm.assert_frame_equal(r, e) + + def test_select_dtypes_empty(self): + df = DataFrame({"a": list("abc"), "b": list(range(1, 4))}) + msg = "at least one of include or exclude must be nonempty" + with pytest.raises(ValueError, match=msg): + df.select_dtypes() + + def test_select_dtypes_bad_datetime64(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + with pytest.raises(ValueError, match=".+ is too specific"): + df.select_dtypes(include=["datetime64[D]"]) + + with pytest.raises(ValueError, match=".+ is too specific"): + df.select_dtypes(exclude=["datetime64[as]"]) + + def test_select_dtypes_datetime_with_tz(self): + + df2 = DataFrame( + { + "A": Timestamp("20130102", tz="US/Eastern"), + "B": Timestamp("20130603", tz="CET"), + }, + index=range(5), + ) + df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) + result = df3.select_dtypes(include=["datetime64[ns]"]) + expected = df3.reindex(columns=[]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", [str, "str", np.string_, "S1", "unicode", np.unicode_, "U1"] + ) + @pytest.mark.parametrize("arg", ["include", "exclude"]) + def test_select_dtypes_str_raises(self, dtype, arg): + df = DataFrame( + { + "a": list("abc"), + "g": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + msg = "string dtypes are not allowed" + kwargs = {arg: [dtype]} + + with pytest.raises(TypeError, match=msg): + df.select_dtypes(**kwargs) + + def test_select_dtypes_bad_arg_raises(self): + df = DataFrame( + { + "a": list("abc"), + "g": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + + msg = "data type.*not understood" + with pytest.raises(TypeError, match=msg): + df.select_dtypes(["blargy, blarg, blarg"]) + + def test_select_dtypes_typecodes(self): + # GH 11990 + df = tm.makeCustomDataframe(30, 3, data_gen_f=lambda x, y: np.random.random()) + expected = df + FLOAT_TYPES = list(np.typecodes["AllFloat"]) + tm.assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected) + + @pytest.mark.parametrize( + "arr,expected", + ( + (np.array([1, 2], dtype=np.int32), True), + (pd.array([1, 2], dtype="Int32"), True), + (DummyArray([1, 2], dtype=DummyDtype(numeric=True)), True), + (DummyArray([1, 2], dtype=DummyDtype(numeric=False)), False), + ), + ) + def test_select_dtypes_numeric(self, arr, expected): + # GH 35340 + + df = DataFrame(arr) + is_selected = df.select_dtypes(np.number).shape == df.shape + assert is_selected == expected + + def test_select_dtypes_numeric_nullable_string(self, nullable_string_dtype): + arr = pd.array(["a", "b"], dtype=nullable_string_dtype) + df = DataFrame(arr) + is_selected = df.select_dtypes(np.number).shape == df.shape + assert not is_selected + + @pytest.mark.parametrize( + "expected, float_dtypes", + [ + [ + DataFrame( + {"A": range(3), "B": range(5, 8), "C": range(10, 7, -1)} + ).astype(dtype={"A": float, "B": np.float64, "C": np.float32}), + float, + ], + [ + DataFrame( + {"A": range(3), "B": range(5, 8), "C": range(10, 7, -1)} + ).astype(dtype={"A": float, "B": np.float64, "C": np.float32}), + "float", + ], + [DataFrame({"C": range(10, 7, -1)}, dtype=np.float32), np.float32], + [ + DataFrame({"A": range(3), "B": range(5, 8)}).astype( + dtype={"A": float, "B": np.float64} + ), + np.float64, + ], + ], + ) + def test_select_dtypes_float_dtype(self, expected, float_dtypes): + # GH#42452 + dtype_dict = {"A": float, "B": np.float64, "C": np.float32} + df = DataFrame( + {"A": range(3), "B": range(5, 8), "C": range(10, 7, -1)}, + ) + df = df.astype(dtype_dict) + result = df.select_dtypes(include=float_dtypes) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_set_axis.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_set_axis.py new file mode 100644 index 0000000..3284243 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_set_axis.py @@ -0,0 +1,123 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class SharedSetAxisTests: + @pytest.fixture + def obj(self): + raise NotImplementedError("Implemented by subclasses") + + def test_set_axis(self, obj): + # GH14636; this tests setting index for both Series and DataFrame + new_index = list("abcd")[: len(obj)] + + expected = obj.copy() + expected.index = new_index + + # inplace=False + result = obj.set_axis(new_index, axis=0, inplace=False) + tm.assert_equal(expected, result) + + @pytest.mark.parametrize("axis", [0, "index", 1, "columns"]) + def test_set_axis_inplace_axis(self, axis, obj): + # GH#14636 + if obj.ndim == 1 and axis in [1, "columns"]: + # Series only has [0, "index"] + return + + new_index = list("abcd")[: len(obj)] + + expected = obj.copy() + if axis in [0, "index"]: + expected.index = new_index + else: + expected.columns = new_index + + result = obj.copy() + result.set_axis(new_index, axis=axis, inplace=True) + tm.assert_equal(result, expected) + + def test_set_axis_unnamed_kwarg_warns(self, obj): + # omitting the "axis" parameter + new_index = list("abcd")[: len(obj)] + + expected = obj.copy() + expected.index = new_index + + with tm.assert_produces_warning(None): + result = obj.set_axis(new_index, inplace=False) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("axis", [3, "foo"]) + def test_set_axis_invalid_axis_name(self, axis, obj): + # wrong values for the "axis" parameter + with pytest.raises(ValueError, match="No axis named"): + obj.set_axis(list("abc"), axis=axis) + + def test_set_axis_setattr_index_not_collection(self, obj): + # wrong type + msg = ( + r"Index\(\.\.\.\) must be called with a collection of some " + r"kind, None was passed" + ) + with pytest.raises(TypeError, match=msg): + obj.index = None + + def test_set_axis_setattr_index_wrong_length(self, obj): + # wrong length + msg = ( + f"Length mismatch: Expected axis has {len(obj)} elements, " + f"new values have {len(obj)-1} elements" + ) + with pytest.raises(ValueError, match=msg): + obj.index = np.arange(len(obj) - 1) + + if obj.ndim == 2: + with pytest.raises(ValueError, match="Length mismatch"): + obj.columns = obj.columns[::2] + + +class TestDataFrameSetAxis(SharedSetAxisTests): + @pytest.fixture + def obj(self): + df = DataFrame( + {"A": [1.1, 2.2, 3.3], "B": [5.0, 6.1, 7.2], "C": [4.4, 5.5, 6.6]}, + index=[2010, 2011, 2012], + ) + return df + + +class TestSeriesSetAxis(SharedSetAxisTests): + @pytest.fixture + def obj(self): + ser = Series(np.arange(4), index=[1, 3, 5, 7], dtype="int64") + return ser + + +def test_nonkeyword_arguments_deprecation_warning(): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame\.set_axis " + r"except for the argument 'labels' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.set_axis([1, 2, 4], 0) + expected = DataFrame({"a": [1, 2, 3]}, index=[1, 2, 4]) + tm.assert_frame_equal(result, expected) + + ser = Series([1, 2, 3]) + msg = ( + r"In a future version of pandas all arguments of Series\.set_axis " + r"except for the argument 'labels' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.set_axis([1, 2, 4], 0) + expected = Series([1, 2, 3], index=[1, 2, 4]) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_set_index.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_set_index.py new file mode 100644 index 0000000..1b3db10 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_set_index.py @@ -0,0 +1,718 @@ +""" +See also: test_reindex.py:TestReindexSetIndex +""" + +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + date_range, + period_range, + to_datetime, +) +import pandas._testing as tm + + +class TestSetIndex: + def test_set_index_multiindex(self): + # segfault in GH#3308 + d = {"t1": [2, 2.5, 3], "t2": [4, 5, 6]} + df = DataFrame(d) + tuples = [(0, 1), (0, 2), (1, 2)] + df["tuples"] = tuples + + index = MultiIndex.from_tuples(df["tuples"]) + # it works! + df.set_index(index) + + def test_set_index_empty_column(self): + # GH#1971 + df = DataFrame( + [ + {"a": 1, "p": 0}, + {"a": 2, "m": 10}, + {"a": 3, "m": 11, "p": 20}, + {"a": 4, "m": 12, "p": 21}, + ], + columns=["a", "m", "p", "x"], + ) + + result = df.set_index(["a", "x"]) + + expected = df[["m", "p"]] + expected.index = MultiIndex.from_arrays([df["a"], df["x"]], names=["a", "x"]) + tm.assert_frame_equal(result, expected) + + def test_set_index_empty_dataframe(self): + # GH#38419 + df1 = DataFrame( + {"a": Series(dtype="datetime64[ns]"), "b": Series(dtype="int64"), "c": []} + ) + + df2 = df1.set_index(["a", "b"]) + result = df2.index.to_frame().dtypes + expected = df1[["a", "b"]].dtypes + tm.assert_series_equal(result, expected) + + def test_set_index_multiindexcolumns(self): + columns = MultiIndex.from_tuples([("foo", 1), ("foo", 2), ("bar", 1)]) + df = DataFrame(np.random.randn(3, 3), columns=columns) + + result = df.set_index(df.columns[0]) + + expected = df.iloc[:, 1:] + expected.index = df.iloc[:, 0].values + expected.index.names = [df.columns[0]] + tm.assert_frame_equal(result, expected) + + def test_set_index_timezone(self): + # GH#12358 + # tz-aware Series should retain the tz + idx = DatetimeIndex(["2014-01-01 10:10:10"], tz="UTC").tz_convert("Europe/Rome") + df = DataFrame({"A": idx}) + assert df.set_index(idx).index[0].hour == 11 + assert DatetimeIndex(Series(df.A))[0].hour == 11 + assert df.set_index(df.A).index[0].hour == 11 + + def test_set_index_cast_datetimeindex(self): + df = DataFrame( + { + "A": [datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], + "B": np.random.randn(1000), + } + ) + + idf = df.set_index("A") + assert isinstance(idf.index, DatetimeIndex) + + def test_set_index_dst(self): + di = date_range("2006-10-29 00:00:00", periods=3, freq="H", tz="US/Pacific") + + df = DataFrame(data={"a": [0, 1, 2], "b": [3, 4, 5]}, index=di).reset_index() + # single level + res = df.set_index("index") + exp = DataFrame( + data={"a": [0, 1, 2], "b": [3, 4, 5]}, + index=Index(di, name="index"), + ) + exp.index = exp.index._with_freq(None) + tm.assert_frame_equal(res, exp) + + # GH#12920 + res = df.set_index(["index", "a"]) + exp_index = MultiIndex.from_arrays([di, [0, 1, 2]], names=["index", "a"]) + exp = DataFrame({"b": [3, 4, 5]}, index=exp_index) + tm.assert_frame_equal(res, exp) + + def test_set_index(self, float_string_frame): + df = float_string_frame + idx = Index(np.arange(len(df))[::-1]) + + df = df.set_index(idx) + tm.assert_index_equal(df.index, idx) + with pytest.raises(ValueError, match="Length mismatch"): + df.set_index(idx[::2]) + + def test_set_index_names(self): + df = tm.makeDataFrame() + df.index.name = "name" + + assert df.set_index(df.index).index.names == ["name"] + + mi = MultiIndex.from_arrays(df[["A", "B"]].T.values, names=["A", "B"]) + mi2 = MultiIndex.from_arrays( + df[["A", "B", "A", "B"]].T.values, names=["A", "B", "C", "D"] + ) + + df = df.set_index(["A", "B"]) + + assert df.set_index(df.index).index.names == ["A", "B"] + + # Check that set_index isn't converting a MultiIndex into an Index + assert isinstance(df.set_index(df.index).index, MultiIndex) + + # Check actual equality + tm.assert_index_equal(df.set_index(df.index).index, mi) + + idx2 = df.index.rename(["C", "D"]) + + # Check that [MultiIndex, MultiIndex] yields a MultiIndex rather + # than a pair of tuples + assert isinstance(df.set_index([df.index, idx2]).index, MultiIndex) + + # Check equality + tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2) + + def test_set_index_cast(self): + # issue casting an index then set_index + df = DataFrame( + {"A": [1.1, 2.2, 3.3], "B": [5.0, 6.1, 7.2]}, index=[2010, 2011, 2012] + ) + df2 = df.set_index(df.index.astype(np.int32)) + tm.assert_frame_equal(df, df2) + + # A has duplicate values, C does not + @pytest.mark.parametrize("keys", ["A", "C", ["A", "B"], ("tuple", "as", "label")]) + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_drop_inplace(self, frame_of_index_cols, drop, inplace, keys): + df = frame_of_index_cols + + if isinstance(keys, list): + idx = MultiIndex.from_arrays([df[x] for x in keys], names=keys) + else: + idx = Index(df[keys], name=keys) + expected = df.drop(keys, axis=1) if drop else df + expected.index = idx + + if inplace: + result = df.copy() + return_value = result.set_index(keys, drop=drop, inplace=True) + assert return_value is None + else: + result = df.set_index(keys, drop=drop) + + tm.assert_frame_equal(result, expected) + + # A has duplicate values, C does not + @pytest.mark.parametrize("keys", ["A", "C", ["A", "B"], ("tuple", "as", "label")]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_append(self, frame_of_index_cols, drop, keys): + df = frame_of_index_cols + + keys = keys if isinstance(keys, list) else [keys] + idx = MultiIndex.from_arrays( + [df.index] + [df[x] for x in keys], names=[None] + keys + ) + expected = df.drop(keys, axis=1) if drop else df.copy() + expected.index = idx + + result = df.set_index(keys, drop=drop, append=True) + + tm.assert_frame_equal(result, expected) + + # A has duplicate values, C does not + @pytest.mark.parametrize("keys", ["A", "C", ["A", "B"], ("tuple", "as", "label")]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_append_to_multiindex(self, frame_of_index_cols, drop, keys): + # append to existing multiindex + df = frame_of_index_cols.set_index(["D"], drop=drop, append=True) + + keys = keys if isinstance(keys, list) else [keys] + expected = frame_of_index_cols.set_index(["D"] + keys, drop=drop, append=True) + + result = df.set_index(keys, drop=drop, append=True) + + tm.assert_frame_equal(result, expected) + + def test_set_index_after_mutation(self): + # GH#1590 + df = DataFrame({"val": [0, 1, 2], "key": ["a", "b", "c"]}) + expected = DataFrame({"val": [1, 2]}, Index(["b", "c"], name="key")) + + df2 = df.loc[df.index.map(lambda indx: indx >= 1)] + result = df2.set_index("key") + tm.assert_frame_equal(result, expected) + + # MultiIndex constructor does not work directly on Series -> lambda + # Add list-of-list constructor because list is ambiguous -> lambda + # also test index name if append=True (name is duplicate here for B) + @pytest.mark.parametrize( + "box", + [ + Series, + Index, + np.array, + list, + lambda x: [list(x)], + lambda x: MultiIndex.from_arrays([x]), + ], + ) + @pytest.mark.parametrize( + "append, index_name", [(True, None), (True, "B"), (True, "test"), (False, None)] + ) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_pass_single_array( + self, frame_of_index_cols, drop, append, index_name, box + ): + df = frame_of_index_cols + df.index.name = index_name + + key = box(df["B"]) + if box == list: + # list of strings gets interpreted as list of keys + msg = "['one', 'two', 'three', 'one', 'two']" + with pytest.raises(KeyError, match=msg): + df.set_index(key, drop=drop, append=append) + else: + # np.array/list-of-list "forget" the name of B + name_mi = getattr(key, "names", None) + name = [getattr(key, "name", None)] if name_mi is None else name_mi + + result = df.set_index(key, drop=drop, append=append) + + # only valid column keys are dropped + # since B is always passed as array above, nothing is dropped + expected = df.set_index(["B"], drop=False, append=append) + expected.index.names = [index_name] + name if append else name + + tm.assert_frame_equal(result, expected) + + # MultiIndex constructor does not work directly on Series -> lambda + # also test index name if append=True (name is duplicate here for A & B) + @pytest.mark.parametrize( + "box", [Series, Index, np.array, list, lambda x: MultiIndex.from_arrays([x])] + ) + @pytest.mark.parametrize( + "append, index_name", + [(True, None), (True, "A"), (True, "B"), (True, "test"), (False, None)], + ) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_pass_arrays( + self, frame_of_index_cols, drop, append, index_name, box + ): + df = frame_of_index_cols + df.index.name = index_name + + keys = ["A", box(df["B"])] + # np.array/list "forget" the name of B + names = ["A", None if box in [np.array, list, tuple, iter] else "B"] + + result = df.set_index(keys, drop=drop, append=append) + + # only valid column keys are dropped + # since B is always passed as array above, only A is dropped, if at all + expected = df.set_index(["A", "B"], drop=False, append=append) + expected = expected.drop("A", axis=1) if drop else expected + expected.index.names = [index_name] + names if append else names + + tm.assert_frame_equal(result, expected) + + # MultiIndex constructor does not work directly on Series -> lambda + # We also emulate a "constructor" for the label -> lambda + # also test index name if append=True (name is duplicate here for A) + @pytest.mark.parametrize( + "box2", + [ + Series, + Index, + np.array, + list, + iter, + lambda x: MultiIndex.from_arrays([x]), + lambda x: x.name, + ], + ) + @pytest.mark.parametrize( + "box1", + [ + Series, + Index, + np.array, + list, + iter, + lambda x: MultiIndex.from_arrays([x]), + lambda x: x.name, + ], + ) + @pytest.mark.parametrize( + "append, index_name", [(True, None), (True, "A"), (True, "test"), (False, None)] + ) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_pass_arrays_duplicate( + self, frame_of_index_cols, drop, append, index_name, box1, box2 + ): + df = frame_of_index_cols + df.index.name = index_name + + keys = [box1(df["A"]), box2(df["A"])] + result = df.set_index(keys, drop=drop, append=append) + + # if either box is iter, it has been consumed; re-read + keys = [box1(df["A"]), box2(df["A"])] + + # need to adapt first drop for case that both keys are 'A' -- + # cannot drop the same column twice; + # plain == would give ambiguous Boolean error for containers + first_drop = ( + False + if ( + isinstance(keys[0], str) + and keys[0] == "A" + and isinstance(keys[1], str) + and keys[1] == "A" + ) + else drop + ) + # to test against already-tested behaviour, we add sequentially, + # hence second append always True; must wrap keys in list, otherwise + # box = list would be interpreted as keys + expected = df.set_index([keys[0]], drop=first_drop, append=append) + expected = expected.set_index([keys[1]], drop=drop, append=True) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("append", [True, False]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_pass_multiindex(self, frame_of_index_cols, drop, append): + df = frame_of_index_cols + keys = MultiIndex.from_arrays([df["A"], df["B"]], names=["A", "B"]) + + result = df.set_index(keys, drop=drop, append=append) + + # setting with a MultiIndex will never drop columns + expected = df.set_index(["A", "B"], drop=False, append=append) + + tm.assert_frame_equal(result, expected) + + def test_construction_with_categorical_index(self): + ci = tm.makeCategoricalIndex(10) + ci.name = "B" + + # with Categorical + df = DataFrame({"A": np.random.randn(10), "B": ci.values}) + idf = df.set_index("B") + tm.assert_index_equal(idf.index, ci) + + # from a CategoricalIndex + df = DataFrame({"A": np.random.randn(10), "B": ci}) + idf = df.set_index("B") + tm.assert_index_equal(idf.index, ci) + + # round-trip + idf = idf.reset_index().set_index("B") + tm.assert_index_equal(idf.index, ci) + + def test_set_index_preserve_categorical_dtype(self): + # GH#13743, GH#13854 + df = DataFrame( + { + "A": [1, 2, 1, 1, 2], + "B": [10, 16, 22, 28, 34], + "C1": Categorical(list("abaab"), categories=list("bac"), ordered=False), + "C2": Categorical(list("abaab"), categories=list("bac"), ordered=True), + } + ) + for cols in ["C1", "C2", ["A", "C1"], ["A", "C2"], ["C1", "C2"]]: + result = df.set_index(cols).reset_index() + result = result.reindex(columns=df.columns) + tm.assert_frame_equal(result, df) + + def test_set_index_datetime(self): + # GH#3950 + df = DataFrame( + { + "label": ["a", "a", "a", "b", "b", "b"], + "datetime": [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ], + "value": range(6), + } + ) + df.index = to_datetime(df.pop("datetime"), utc=True) + df.index = df.index.tz_convert("US/Pacific") + + expected = DatetimeIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + name="datetime", + ) + expected = expected.tz_localize("UTC").tz_convert("US/Pacific") + + df = df.set_index("label", append=True) + tm.assert_index_equal(df.index.levels[0], expected) + tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label")) + assert df.index.names == ["datetime", "label"] + + df = df.swaplevel(0, 1) + tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label")) + tm.assert_index_equal(df.index.levels[1], expected) + assert df.index.names == ["label", "datetime"] + + df = DataFrame(np.random.random(6)) + idx1 = DatetimeIndex( + [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ], + tz="US/Eastern", + ) + idx2 = DatetimeIndex( + [ + "2012-04-01 09:00", + "2012-04-01 09:00", + "2012-04-01 09:00", + "2012-04-02 09:00", + "2012-04-02 09:00", + "2012-04-02 09:00", + ], + tz="US/Eastern", + ) + idx3 = date_range("2011-01-01 09:00", periods=6, tz="Asia/Tokyo") + idx3 = idx3._with_freq(None) + + df = df.set_index(idx1) + df = df.set_index(idx2, append=True) + df = df.set_index(idx3, append=True) + + expected1 = DatetimeIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + tz="US/Eastern", + ) + expected2 = DatetimeIndex( + ["2012-04-01 09:00", "2012-04-02 09:00"], tz="US/Eastern" + ) + + tm.assert_index_equal(df.index.levels[0], expected1) + tm.assert_index_equal(df.index.levels[1], expected2) + tm.assert_index_equal(df.index.levels[2], idx3) + + # GH#7092 + tm.assert_index_equal(df.index.get_level_values(0), idx1) + tm.assert_index_equal(df.index.get_level_values(1), idx2) + tm.assert_index_equal(df.index.get_level_values(2), idx3) + + def test_set_index_period(self): + # GH#6631 + df = DataFrame(np.random.random(6)) + idx1 = period_range("2011-01-01", periods=3, freq="M") + idx1 = idx1.append(idx1) + idx2 = period_range("2013-01-01 09:00", periods=2, freq="H") + idx2 = idx2.append(idx2).append(idx2) + idx3 = period_range("2005", periods=6, freq="A") + + df = df.set_index(idx1) + df = df.set_index(idx2, append=True) + df = df.set_index(idx3, append=True) + + expected1 = period_range("2011-01-01", periods=3, freq="M") + expected2 = period_range("2013-01-01 09:00", periods=2, freq="H") + + tm.assert_index_equal(df.index.levels[0], expected1) + tm.assert_index_equal(df.index.levels[1], expected2) + tm.assert_index_equal(df.index.levels[2], idx3) + + tm.assert_index_equal(df.index.get_level_values(0), idx1) + tm.assert_index_equal(df.index.get_level_values(1), idx2) + tm.assert_index_equal(df.index.get_level_values(2), idx3) + + +class TestSetIndexInvalid: + def test_set_index_verify_integrity(self, frame_of_index_cols): + df = frame_of_index_cols + + with pytest.raises(ValueError, match="Index has duplicate keys"): + df.set_index("A", verify_integrity=True) + # with MultiIndex + with pytest.raises(ValueError, match="Index has duplicate keys"): + df.set_index([df["A"], df["A"]], verify_integrity=True) + + @pytest.mark.parametrize("append", [True, False]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_raise_keys(self, frame_of_index_cols, drop, append): + df = frame_of_index_cols + + with pytest.raises(KeyError, match="['foo', 'bar', 'baz']"): + # column names are A-E, as well as one tuple + df.set_index(["foo", "bar", "baz"], drop=drop, append=append) + + # non-existent key in list with arrays + with pytest.raises(KeyError, match="X"): + df.set_index([df["A"], df["B"], "X"], drop=drop, append=append) + + msg = "[('foo', 'foo', 'foo', 'bar', 'bar')]" + # tuples always raise KeyError + with pytest.raises(KeyError, match=msg): + df.set_index(tuple(df["A"]), drop=drop, append=append) + + # also within a list + with pytest.raises(KeyError, match=msg): + df.set_index(["A", df["A"], tuple(df["A"])], drop=drop, append=append) + + @pytest.mark.parametrize("append", [True, False]) + @pytest.mark.parametrize("drop", [True, False]) + @pytest.mark.parametrize("box", [set], ids=["set"]) + def test_set_index_raise_on_type(self, frame_of_index_cols, box, drop, append): + df = frame_of_index_cols + + msg = 'The parameter "keys" may be a column key, .*' + # forbidden type, e.g. set + with pytest.raises(TypeError, match=msg): + df.set_index(box(df["A"]), drop=drop, append=append) + + # forbidden type in list, e.g. set + with pytest.raises(TypeError, match=msg): + df.set_index(["A", df["A"], box(df["A"])], drop=drop, append=append) + + # MultiIndex constructor does not work directly on Series -> lambda + @pytest.mark.parametrize( + "box", + [Series, Index, np.array, iter, lambda x: MultiIndex.from_arrays([x])], + ids=["Series", "Index", "np.array", "iter", "MultiIndex"], + ) + @pytest.mark.parametrize("length", [4, 6], ids=["too_short", "too_long"]) + @pytest.mark.parametrize("append", [True, False]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_raise_on_len( + self, frame_of_index_cols, box, length, drop, append + ): + # GH 24984 + df = frame_of_index_cols # has length 5 + + values = np.random.randint(0, 10, (length,)) + + msg = "Length mismatch: Expected 5 rows, received array of length.*" + + # wrong length directly + with pytest.raises(ValueError, match=msg): + df.set_index(box(values), drop=drop, append=append) + + # wrong length in list + with pytest.raises(ValueError, match=msg): + df.set_index(["A", df.A, box(values)], drop=drop, append=append) + + +class TestSetIndexCustomLabelType: + def test_set_index_custom_label_type(self): + # GH#24969 + + class Thing: + def __init__(self, name, color): + self.name = name + self.color = color + + def __str__(self) -> str: + return f"" + + # necessary for pretty KeyError + __repr__ = __str__ + + thing1 = Thing("One", "red") + thing2 = Thing("Two", "blue") + df = DataFrame({thing1: [0, 1], thing2: [2, 3]}) + expected = DataFrame({thing1: [0, 1]}, index=Index([2, 3], name=thing2)) + + # use custom label directly + result = df.set_index(thing2) + tm.assert_frame_equal(result, expected) + + # custom label wrapped in list + result = df.set_index([thing2]) + tm.assert_frame_equal(result, expected) + + # missing key + thing3 = Thing("Three", "pink") + msg = "" + with pytest.raises(KeyError, match=msg): + # missing label directly + df.set_index(thing3) + + with pytest.raises(KeyError, match=msg): + # missing label in list + df.set_index([thing3]) + + def test_set_index_custom_label_hashable_iterable(self): + # GH#24969 + + # actual example discussed in GH 24984 was e.g. for shapely.geometry + # objects (e.g. a collection of Points) that can be both hashable and + # iterable; using frozenset as a stand-in for testing here + + class Thing(frozenset): + # need to stabilize repr for KeyError (due to random order in sets) + def __repr__(self) -> str: + tmp = sorted(self) + joined_reprs = ", ".join(map(repr, tmp)) + # double curly brace prints one brace in format string + return f"frozenset({{{joined_reprs}}})" + + thing1 = Thing(["One", "red"]) + thing2 = Thing(["Two", "blue"]) + df = DataFrame({thing1: [0, 1], thing2: [2, 3]}) + expected = DataFrame({thing1: [0, 1]}, index=Index([2, 3], name=thing2)) + + # use custom label directly + result = df.set_index(thing2) + tm.assert_frame_equal(result, expected) + + # custom label wrapped in list + result = df.set_index([thing2]) + tm.assert_frame_equal(result, expected) + + # missing key + thing3 = Thing(["Three", "pink"]) + msg = r"frozenset\(\{'Three', 'pink'\}\)" + with pytest.raises(KeyError, match=msg): + # missing label directly + df.set_index(thing3) + + with pytest.raises(KeyError, match=msg): + # missing label in list + df.set_index([thing3]) + + def test_set_index_custom_label_type_raises(self): + # GH#24969 + + # purposefully inherit from something unhashable + class Thing(set): + def __init__(self, name, color): + self.name = name + self.color = color + + def __str__(self) -> str: + return f"" + + thing1 = Thing("One", "red") + thing2 = Thing("Two", "blue") + df = DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2]) + + msg = 'The parameter "keys" may be a column key, .*' + + with pytest.raises(TypeError, match=msg): + # use custom label directly + df.set_index(thing2) + + with pytest.raises(TypeError, match=msg): + # custom label wrapped in list + df.set_index([thing2]) + + def test_set_index_periodindex(self): + # GH#6631 + df = DataFrame(np.random.random(6)) + idx1 = period_range("2011/01/01", periods=6, freq="M") + idx2 = period_range("2013", periods=6, freq="A") + + df = df.set_index(idx1) + tm.assert_index_equal(df.index, idx1) + df = df.set_index(idx2) + tm.assert_index_equal(df.index, idx2) + + def test_drop_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame\.set_index " + r"except for the argument 'keys' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.set_index("a", True) + expected = DataFrame(index=Index([1, 2, 3], name="a")) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_shift.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_shift.py new file mode 100644 index 0000000..2463e81 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_shift.py @@ -0,0 +1,678 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + NaT, + Series, + date_range, + offsets, +) +import pandas._testing as tm + + +class TestDataFrameShift: + @pytest.mark.parametrize( + "input_data, output_data", + [(np.empty(shape=(0,)), []), (np.ones(shape=(2,)), [np.nan, 1.0])], + ) + def test_shift_non_writable_array(self, input_data, output_data, frame_or_series): + # GH21049 Verify whether non writable numpy array is shiftable + input_data.setflags(write=False) + + result = frame_or_series(input_data).shift(1) + if frame_or_series is not Series: + # need to explicitly specify columns in the empty case + expected = frame_or_series( + output_data, + index=range(len(output_data)), + columns=range(1), + dtype="float64", + ) + else: + expected = frame_or_series(output_data, dtype="float64") + + tm.assert_equal(result, expected) + + def test_shift_mismatched_freq(self, frame_or_series): + ts = frame_or_series( + np.random.randn(5), index=date_range("1/1/2000", periods=5, freq="H") + ) + + result = ts.shift(1, freq="5T") + exp_index = ts.index.shift(1, freq="5T") + tm.assert_index_equal(result.index, exp_index) + + # GH#1063, multiple of same base + result = ts.shift(1, freq="4H") + exp_index = ts.index + offsets.Hour(4) + tm.assert_index_equal(result.index, exp_index) + + @pytest.mark.parametrize( + "obj", + [ + Series([np.arange(5)]), + date_range("1/1/2011", periods=24, freq="H"), + Series(range(5), index=date_range("2017", periods=5)), + ], + ) + @pytest.mark.parametrize("shift_size", [0, 1, 2]) + def test_shift_always_copy(self, obj, shift_size, frame_or_series): + # GH#22397 + if frame_or_series is not Series: + obj = obj.to_frame() + assert obj.shift(shift_size) is not obj + + def test_shift_object_non_scalar_fill(self): + # shift requires scalar fill_value except for object dtype + ser = Series(range(3)) + with pytest.raises(ValueError, match="fill_value must be a scalar"): + ser.shift(1, fill_value=[]) + + df = ser.to_frame() + with pytest.raises(ValueError, match="fill_value must be a scalar"): + df.shift(1, fill_value=np.arange(3)) + + obj_ser = ser.astype(object) + result = obj_ser.shift(1, fill_value={}) + assert result[0] == {} + + obj_df = obj_ser.to_frame() + result = obj_df.shift(1, fill_value={}) + assert result.iloc[0, 0] == {} + + def test_shift_int(self, datetime_frame, frame_or_series): + ts = tm.get_obj(datetime_frame, frame_or_series).astype(int) + shifted = ts.shift(1) + expected = ts.astype(float).shift(1) + tm.assert_equal(shifted, expected) + + def test_shift_32bit_take(self, frame_or_series): + # 32-bit taking + # GH#8129 + index = date_range("2000-01-01", periods=5) + for dtype in ["int32", "int64"]: + arr = np.arange(5, dtype=dtype) + s1 = frame_or_series(arr, index=index) + p = arr[1] + result = s1.shift(periods=p) + expected = frame_or_series([np.nan, 0, 1, 2, 3], index=index) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("periods", [1, 2, 3, 4]) + def test_shift_preserve_freqstr(self, periods, frame_or_series): + # GH#21275 + obj = frame_or_series( + range(periods), + index=date_range("2016-1-1 00:00:00", periods=periods, freq="H"), + ) + + result = obj.shift(1, "2H") + + expected = frame_or_series( + range(periods), + index=date_range("2016-1-1 02:00:00", periods=periods, freq="H"), + ) + tm.assert_equal(result, expected) + + def test_shift_dst(self, frame_or_series): + # GH#13926 + dates = date_range("2016-11-06", freq="H", periods=10, tz="US/Eastern") + obj = frame_or_series(dates) + + res = obj.shift(0) + tm.assert_equal(res, obj) + assert tm.get_dtype(res) == "datetime64[ns, US/Eastern]" + + res = obj.shift(1) + exp_vals = [NaT] + dates.astype(object).values.tolist()[:9] + exp = frame_or_series(exp_vals) + tm.assert_equal(res, exp) + assert tm.get_dtype(res) == "datetime64[ns, US/Eastern]" + + res = obj.shift(-2) + exp_vals = dates.astype(object).values.tolist()[2:] + [NaT, NaT] + exp = frame_or_series(exp_vals) + tm.assert_equal(res, exp) + assert tm.get_dtype(res) == "datetime64[ns, US/Eastern]" + + for ex in [10, -10, 20, -20]: + res = obj.shift(ex) + exp = frame_or_series([NaT] * 10, dtype="datetime64[ns, US/Eastern]") + tm.assert_equal(res, exp) + assert tm.get_dtype(res) == "datetime64[ns, US/Eastern]" + + def test_shift_by_zero(self, datetime_frame, frame_or_series): + # shift by 0 + obj = tm.get_obj(datetime_frame, frame_or_series) + unshifted = obj.shift(0) + tm.assert_equal(unshifted, obj) + + def test_shift(self, datetime_frame): + # naive shift + ser = datetime_frame["A"] + + shifted = datetime_frame.shift(5) + tm.assert_index_equal(shifted.index, datetime_frame.index) + + shifted_ser = ser.shift(5) + tm.assert_series_equal(shifted["A"], shifted_ser) + + shifted = datetime_frame.shift(-5) + tm.assert_index_equal(shifted.index, datetime_frame.index) + + shifted_ser = ser.shift(-5) + tm.assert_series_equal(shifted["A"], shifted_ser) + + unshifted = datetime_frame.shift(5).shift(-5) + tm.assert_numpy_array_equal( + unshifted.dropna().values, datetime_frame.values[:-5] + ) + + unshifted_ser = ser.shift(5).shift(-5) + tm.assert_numpy_array_equal(unshifted_ser.dropna().values, ser.values[:-5]) + + def test_shift_by_offset(self, datetime_frame, frame_or_series): + # shift by DateOffset + obj = tm.get_obj(datetime_frame, frame_or_series) + offset = offsets.BDay() + + shifted = obj.shift(5, freq=offset) + assert len(shifted) == len(obj) + unshifted = shifted.shift(-5, freq=offset) + tm.assert_equal(unshifted, obj) + + shifted2 = obj.shift(5, freq="B") + tm.assert_equal(shifted, shifted2) + + unshifted = obj.shift(0, freq=offset) + tm.assert_equal(unshifted, obj) + + d = obj.index[0] + shifted_d = d + offset * 5 + if frame_or_series is DataFrame: + tm.assert_series_equal(obj.xs(d), shifted.xs(shifted_d), check_names=False) + else: + tm.assert_almost_equal(obj.at[d], shifted.at[shifted_d]) + + def test_shift_with_periodindex(self, frame_or_series): + # Shifting with PeriodIndex + ps = tm.makePeriodFrame() + ps = tm.get_obj(ps, frame_or_series) + + shifted = ps.shift(1) + unshifted = shifted.shift(-1) + tm.assert_index_equal(shifted.index, ps.index) + tm.assert_index_equal(unshifted.index, ps.index) + if frame_or_series is DataFrame: + tm.assert_numpy_array_equal( + unshifted.iloc[:, 0].dropna().values, ps.iloc[:-1, 0].values + ) + else: + tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1]) + + shifted2 = ps.shift(1, "B") + shifted3 = ps.shift(1, offsets.BDay()) + tm.assert_equal(shifted2, shifted3) + tm.assert_equal(ps, shifted2.shift(-1, "B")) + + msg = "does not match PeriodIndex freq" + with pytest.raises(ValueError, match=msg): + ps.shift(freq="D") + + # legacy support + shifted4 = ps.shift(1, freq="B") + tm.assert_equal(shifted2, shifted4) + + shifted5 = ps.shift(1, freq=offsets.BDay()) + tm.assert_equal(shifted5, shifted4) + + def test_shift_other_axis(self): + # shift other axis + # GH#6371 + df = DataFrame(np.random.rand(10, 5)) + expected = pd.concat( + [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], + ignore_index=True, + axis=1, + ) + result = df.shift(1, axis=1) + tm.assert_frame_equal(result, expected) + + def test_shift_named_axis(self): + # shift named axis + df = DataFrame(np.random.rand(10, 5)) + expected = pd.concat( + [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], + ignore_index=True, + axis=1, + ) + result = df.shift(1, axis="columns") + tm.assert_frame_equal(result, expected) + + def test_shift_bool(self): + df = DataFrame({"high": [True, False], "low": [False, False]}) + rs = df.shift(1) + xp = DataFrame( + np.array([[np.nan, np.nan], [True, False]], dtype=object), + columns=["high", "low"], + ) + tm.assert_frame_equal(rs, xp) + + def test_shift_categorical1(self, frame_or_series): + # GH#9416 + obj = frame_or_series(["a", "b", "c", "d"], dtype="category") + + rt = obj.shift(1).shift(-1) + tm.assert_equal(obj.iloc[:-1], rt.dropna()) + + def get_cat_values(ndframe): + # For Series we could just do ._values; for DataFrame + # we may be able to do this if we ever have 2D Categoricals + return ndframe._mgr.arrays[0] + + cat = get_cat_values(obj) + + sp1 = obj.shift(1) + tm.assert_index_equal(obj.index, sp1.index) + assert np.all(get_cat_values(sp1).codes[:1] == -1) + assert np.all(cat.codes[:-1] == get_cat_values(sp1).codes[1:]) + + sn2 = obj.shift(-2) + tm.assert_index_equal(obj.index, sn2.index) + assert np.all(get_cat_values(sn2).codes[-2:] == -1) + assert np.all(cat.codes[2:] == get_cat_values(sn2).codes[:-2]) + + tm.assert_index_equal(cat.categories, get_cat_values(sp1).categories) + tm.assert_index_equal(cat.categories, get_cat_values(sn2).categories) + + def test_shift_categorical(self): + # GH#9416 + s1 = Series(["a", "b", "c"], dtype="category") + s2 = Series(["A", "B", "C"], dtype="category") + df = DataFrame({"one": s1, "two": s2}) + rs = df.shift(1) + xp = DataFrame({"one": s1.shift(1), "two": s2.shift(1)}) + tm.assert_frame_equal(rs, xp) + + def test_shift_categorical_fill_value(self, frame_or_series): + ts = frame_or_series(["a", "b", "c", "d"], dtype="category") + res = ts.shift(1, fill_value="a") + expected = frame_or_series( + pd.Categorical( + ["a", "a", "b", "c"], categories=["a", "b", "c", "d"], ordered=False + ) + ) + tm.assert_equal(res, expected) + + # check for incorrect fill_value + msg = r"Cannot setitem on a Categorical with a new category \(f\)" + with pytest.raises(TypeError, match=msg): + ts.shift(1, fill_value="f") + + def test_shift_fill_value(self, frame_or_series): + # GH#24128 + dti = date_range("1/1/2000", periods=5, freq="H") + + ts = frame_or_series([1.0, 2.0, 3.0, 4.0, 5.0], index=dti) + exp = frame_or_series([0.0, 1.0, 2.0, 3.0, 4.0], index=dti) + # check that fill value works + result = ts.shift(1, fill_value=0.0) + tm.assert_equal(result, exp) + + exp = frame_or_series([0.0, 0.0, 1.0, 2.0, 3.0], index=dti) + result = ts.shift(2, fill_value=0.0) + tm.assert_equal(result, exp) + + ts = frame_or_series([1, 2, 3]) + res = ts.shift(2, fill_value=0) + assert tm.get_dtype(res) == tm.get_dtype(ts) + + # retain integer dtype + obj = frame_or_series([1, 2, 3, 4, 5], index=dti) + exp = frame_or_series([0, 1, 2, 3, 4], index=dti) + result = obj.shift(1, fill_value=0) + tm.assert_equal(result, exp) + + exp = frame_or_series([0, 0, 1, 2, 3], index=dti) + result = obj.shift(2, fill_value=0) + tm.assert_equal(result, exp) + + def test_shift_empty(self): + # Regression test for GH#8019 + df = DataFrame({"foo": []}) + rs = df.shift(-1) + + tm.assert_frame_equal(df, rs) + + def test_shift_duplicate_columns(self): + # GH#9092; verify that position-based shifting works + # in the presence of duplicate columns + column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]] + data = np.random.randn(20, 5) + + shifted = [] + for columns in column_lists: + df = DataFrame(data.copy(), columns=columns) + for s in range(5): + df.iloc[:, s] = df.iloc[:, s].shift(s + 1) + df.columns = range(5) + shifted.append(df) + + # sanity check the base case + nulls = shifted[0].isna().sum() + tm.assert_series_equal(nulls, Series(range(1, 6), dtype="int64")) + + # check all answers are the same + tm.assert_frame_equal(shifted[0], shifted[1]) + tm.assert_frame_equal(shifted[0], shifted[2]) + + def test_shift_axis1_multiple_blocks(self, using_array_manager): + # GH#35488 + df1 = DataFrame(np.random.randint(1000, size=(5, 3))) + df2 = DataFrame(np.random.randint(1000, size=(5, 2))) + df3 = pd.concat([df1, df2], axis=1) + if not using_array_manager: + assert len(df3._mgr.blocks) == 2 + + result = df3.shift(2, axis=1) + + expected = df3.take([-1, -1, 0, 1, 2], axis=1) + expected.iloc[:, :2] = np.nan + expected.columns = df3.columns + + tm.assert_frame_equal(result, expected) + + # Case with periods < 0 + # rebuild df3 because `take` call above consolidated + df3 = pd.concat([df1, df2], axis=1) + if not using_array_manager: + assert len(df3._mgr.blocks) == 2 + result = df3.shift(-2, axis=1) + + expected = df3.take([2, 3, 4, -1, -1], axis=1) + expected.iloc[:, -2:] = np.nan + expected.columns = df3.columns + + tm.assert_frame_equal(result, expected) + + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) axis=1 support + def test_shift_axis1_multiple_blocks_with_int_fill(self): + # GH#42719 + df1 = DataFrame(np.random.randint(1000, size=(5, 3))) + df2 = DataFrame(np.random.randint(1000, size=(5, 2))) + df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1) + result = df3.shift(2, axis=1, fill_value=np.int_(0)) + assert len(df3._mgr.blocks) == 2 + + expected = df3.take([-1, -1, 0, 1], axis=1) + expected.iloc[:, :2] = np.int_(0) + expected.columns = df3.columns + + tm.assert_frame_equal(result, expected) + + # Case with periods < 0 + df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1) + result = df3.shift(-2, axis=1, fill_value=np.int_(0)) + assert len(df3._mgr.blocks) == 2 + + expected = df3.take([2, 3, -1, -1], axis=1) + expected.iloc[:, -2:] = np.int_(0) + expected.columns = df3.columns + + tm.assert_frame_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") + def test_tshift(self, datetime_frame, frame_or_series): + # TODO(2.0): remove this test when tshift deprecation is enforced + + # PeriodIndex + ps = tm.makePeriodFrame() + ps = tm.get_obj(ps, frame_or_series) + shifted = ps.tshift(1) + unshifted = shifted.tshift(-1) + + tm.assert_equal(unshifted, ps) + + shifted2 = ps.tshift(freq="B") + tm.assert_equal(shifted, shifted2) + + shifted3 = ps.tshift(freq=offsets.BDay()) + tm.assert_equal(shifted, shifted3) + + msg = "Given freq M does not match PeriodIndex freq B" + with pytest.raises(ValueError, match=msg): + ps.tshift(freq="M") + + # DatetimeIndex + dtobj = tm.get_obj(datetime_frame, frame_or_series) + shifted = dtobj.tshift(1) + unshifted = shifted.tshift(-1) + + tm.assert_equal(dtobj, unshifted) + + shifted2 = dtobj.tshift(freq=dtobj.index.freq) + tm.assert_equal(shifted, shifted2) + + inferred_ts = DataFrame( + datetime_frame.values, + Index(np.asarray(datetime_frame.index)), + columns=datetime_frame.columns, + ) + inferred_ts = tm.get_obj(inferred_ts, frame_or_series) + shifted = inferred_ts.tshift(1) + + expected = dtobj.tshift(1) + expected.index = expected.index._with_freq(None) + tm.assert_equal(shifted, expected) + + unshifted = shifted.tshift(-1) + tm.assert_equal(unshifted, inferred_ts) + + no_freq = dtobj.iloc[[0, 5, 7]] + msg = "Freq was not set in the index hence cannot be inferred" + with pytest.raises(ValueError, match=msg): + no_freq.tshift() + + def test_tshift_deprecated(self, datetime_frame, frame_or_series): + # GH#11631 + dtobj = tm.get_obj(datetime_frame, frame_or_series) + with tm.assert_produces_warning(FutureWarning): + dtobj.tshift() + + def test_period_index_frame_shift_with_freq(self, frame_or_series): + ps = tm.makePeriodFrame() + ps = tm.get_obj(ps, frame_or_series) + + shifted = ps.shift(1, freq="infer") + unshifted = shifted.shift(-1, freq="infer") + tm.assert_equal(unshifted, ps) + + shifted2 = ps.shift(freq="B") + tm.assert_equal(shifted, shifted2) + + shifted3 = ps.shift(freq=offsets.BDay()) + tm.assert_equal(shifted, shifted3) + + def test_datetime_frame_shift_with_freq(self, datetime_frame, frame_or_series): + dtobj = tm.get_obj(datetime_frame, frame_or_series) + shifted = dtobj.shift(1, freq="infer") + unshifted = shifted.shift(-1, freq="infer") + tm.assert_equal(dtobj, unshifted) + + shifted2 = dtobj.shift(freq=dtobj.index.freq) + tm.assert_equal(shifted, shifted2) + + inferred_ts = DataFrame( + datetime_frame.values, + Index(np.asarray(datetime_frame.index)), + columns=datetime_frame.columns, + ) + inferred_ts = tm.get_obj(inferred_ts, frame_or_series) + shifted = inferred_ts.shift(1, freq="infer") + expected = dtobj.shift(1, freq="infer") + expected.index = expected.index._with_freq(None) + tm.assert_equal(shifted, expected) + + unshifted = shifted.shift(-1, freq="infer") + tm.assert_equal(unshifted, inferred_ts) + + def test_period_index_frame_shift_with_freq_error(self, frame_or_series): + ps = tm.makePeriodFrame() + ps = tm.get_obj(ps, frame_or_series) + msg = "Given freq M does not match PeriodIndex freq B" + with pytest.raises(ValueError, match=msg): + ps.shift(freq="M") + + def test_datetime_frame_shift_with_freq_error( + self, datetime_frame, frame_or_series + ): + dtobj = tm.get_obj(datetime_frame, frame_or_series) + no_freq = dtobj.iloc[[0, 5, 7]] + msg = "Freq was not set in the index hence cannot be inferred" + with pytest.raises(ValueError, match=msg): + no_freq.shift(freq="infer") + + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) axis=1 support + def test_shift_dt64values_int_fill_deprecated(self): + # GH#31971 + ser = Series([pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")]) + + with tm.assert_produces_warning(FutureWarning): + result = ser.shift(1, fill_value=0) + expected = Series([pd.Timestamp(0), ser[0]]) + tm.assert_series_equal(result, expected) + + df = ser.to_frame() + with tm.assert_produces_warning(FutureWarning): + result = df.shift(1, fill_value=0) + expected = expected.to_frame() + tm.assert_frame_equal(result, expected) + + # axis = 1 + df2 = DataFrame({"A": ser, "B": ser}) + df2._consolidate_inplace() + + with tm.assert_produces_warning(FutureWarning): + result = df2.shift(1, axis=1, fill_value=0) + + expected = DataFrame({"A": [pd.Timestamp(0), pd.Timestamp(0)], "B": df2["A"]}) + tm.assert_frame_equal(result, expected) + + # same thing but not consolidated + # This isn't great that we get different behavior, but + # that will go away when the deprecation is enforced + df3 = DataFrame({"A": ser}) + df3["B"] = ser + assert len(df3._mgr.arrays) == 2 + result = df3.shift(1, axis=1, fill_value=0) + expected = DataFrame({"A": [0, 0], "B": df2["A"]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "as_cat", + [ + pytest.param( + True, + marks=pytest.mark.xfail( + reason="_can_hold_element incorrectly always returns True" + ), + ), + False, + ], + ) + @pytest.mark.parametrize( + "vals", + [ + date_range("2020-01-01", periods=2), + date_range("2020-01-01", periods=2, tz="US/Pacific"), + pd.period_range("2020-01-01", periods=2, freq="D"), + pd.timedelta_range("2020 Days", periods=2, freq="D"), + pd.interval_range(0, 3, periods=2), + pytest.param( + pd.array([1, 2], dtype="Int64"), + marks=pytest.mark.xfail( + reason="_can_hold_element incorrectly always returns True" + ), + ), + pytest.param( + pd.array([1, 2], dtype="Float32"), + marks=pytest.mark.xfail( + reason="_can_hold_element incorrectly always returns True" + ), + ), + ], + ids=lambda x: str(x.dtype), + ) + # TODO(2.0): remove filtering + @pytest.mark.filterwarnings("ignore:Index.ravel.*:FutureWarning") + def test_shift_dt64values_axis1_invalid_fill( + self, vals, as_cat, using_array_manager, request + ): + # GH#44564 + if using_array_manager: + mark = pytest.mark.xfail(raises=NotImplementedError) + request.node.add_marker(mark) + + ser = Series(vals) + if as_cat: + ser = ser.astype("category") + + df = DataFrame({"A": ser}) + result = df.shift(-1, axis=1, fill_value="foo") + expected = DataFrame({"A": ["foo", "foo"]}) + tm.assert_frame_equal(result, expected) + + # same thing but multiple blocks + df2 = DataFrame({"A": ser, "B": ser}) + df2._consolidate_inplace() + + result = df2.shift(-1, axis=1, fill_value="foo") + expected = DataFrame({"A": df2["B"], "B": ["foo", "foo"]}) + tm.assert_frame_equal(result, expected) + + # same thing but not consolidated + df3 = DataFrame({"A": ser}) + df3["B"] = ser + assert len(df3._mgr.arrays) == 2 + result = df3.shift(-1, axis=1, fill_value="foo") + tm.assert_frame_equal(result, expected) + + def test_shift_axis1_categorical_columns(self): + # GH#38434 + ci = CategoricalIndex(["a", "b", "c"]) + df = DataFrame( + {"a": [1, 3], "b": [2, 4], "c": [5, 6]}, index=ci[:-1], columns=ci + ) + result = df.shift(axis=1) + + expected = DataFrame( + {"a": [np.nan, np.nan], "b": [1, 3], "c": [2, 4]}, index=ci[:-1], columns=ci + ) + tm.assert_frame_equal(result, expected) + + # periods != 1 + result = df.shift(2, axis=1) + expected = DataFrame( + {"a": [np.nan, np.nan], "b": [np.nan, np.nan], "c": [1, 3]}, + index=ci[:-1], + columns=ci, + ) + tm.assert_frame_equal(result, expected) + + @td.skip_array_manager_not_yet_implemented + def test_shift_axis1_many_periods(self): + # GH#44978 periods > len(columns) + df = DataFrame(np.random.rand(5, 3)) + shifted = df.shift(6, axis=1, fill_value=None) + + expected = df * np.nan + tm.assert_frame_equal(shifted, expected) + + shifted2 = df.shift(-6, axis=1, fill_value=None) + tm.assert_frame_equal(shifted2, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_sort_index.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_sort_index.py new file mode 100644 index 0000000..99ff0f0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_sort_index.py @@ -0,0 +1,926 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + CategoricalDtype, + CategoricalIndex, + DataFrame, + IntervalIndex, + MultiIndex, + RangeIndex, + Series, + Timestamp, +) +import pandas._testing as tm + + +class TestDataFrameSortIndex: + def test_sort_index_and_reconstruction_doc_example(self): + # doc example + df = DataFrame( + {"value": [1, 2, 3, 4]}, + index=MultiIndex( + levels=[["a", "b"], ["bb", "aa"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] + ), + ) + assert df.index._is_lexsorted() + assert not df.index.is_monotonic + + # sort it + expected = DataFrame( + {"value": [2, 1, 4, 3]}, + index=MultiIndex( + levels=[["a", "b"], ["aa", "bb"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] + ), + ) + result = df.sort_index() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + # reconstruct + result = df.sort_index().copy() + result.index = result.index._sort_levels_monotonic() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + def test_sort_index_non_existent_label_multiindex(self): + # GH#12261 + df = DataFrame(0, columns=[], index=MultiIndex.from_product([[], []])) + df.loc["b", "2"] = 1 + df.loc["a", "3"] = 1 + result = df.sort_index().index.is_monotonic + assert result is True + + def test_sort_index_reorder_on_ops(self): + # GH#15687 + df = DataFrame( + np.random.randn(8, 2), + index=MultiIndex.from_product( + [["a", "b"], ["big", "small"], ["red", "blu"]], + names=["letter", "size", "color"], + ), + columns=["near", "far"], + ) + df = df.sort_index() + + def my_func(group): + group.index = ["newz", "newa"] + return group + + result = df.groupby(level=["letter", "size"]).apply(my_func).sort_index() + expected = MultiIndex.from_product( + [["a", "b"], ["big", "small"], ["newa", "newz"]], + names=["letter", "size", None], + ) + + tm.assert_index_equal(result.index, expected) + + def test_sort_index_nan_multiindex(self): + # GH#14784 + # incorrect sorting w.r.t. nans + tuples = [[12, 13], [np.nan, np.nan], [np.nan, 3], [1, 2]] + mi = MultiIndex.from_tuples(tuples) + + df = DataFrame(np.arange(16).reshape(4, 4), index=mi, columns=list("ABCD")) + s = Series(np.arange(4), index=mi) + + df2 = DataFrame( + { + "date": pd.DatetimeIndex( + [ + "20121002", + "20121007", + "20130130", + "20130202", + "20130305", + "20121002", + "20121207", + "20130130", + "20130202", + "20130305", + "20130202", + "20130305", + ] + ), + "user_id": [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5], + "whole_cost": [ + 1790, + np.nan, + 280, + 259, + np.nan, + 623, + 90, + 312, + np.nan, + 301, + 359, + 801, + ], + "cost": [12, 15, 10, 24, 39, 1, 0, np.nan, 45, 34, 1, 12], + } + ).set_index(["date", "user_id"]) + + # sorting frame, default nan position is last + result = df.sort_index() + expected = df.iloc[[3, 0, 2, 1], :] + tm.assert_frame_equal(result, expected) + + # sorting frame, nan position last + result = df.sort_index(na_position="last") + expected = df.iloc[[3, 0, 2, 1], :] + tm.assert_frame_equal(result, expected) + + # sorting frame, nan position first + result = df.sort_index(na_position="first") + expected = df.iloc[[1, 2, 3, 0], :] + tm.assert_frame_equal(result, expected) + + # sorting frame with removed rows + result = df2.dropna().sort_index() + expected = df2.sort_index().dropna() + tm.assert_frame_equal(result, expected) + + # sorting series, default nan position is last + result = s.sort_index() + expected = s.iloc[[3, 0, 2, 1]] + tm.assert_series_equal(result, expected) + + # sorting series, nan position last + result = s.sort_index(na_position="last") + expected = s.iloc[[3, 0, 2, 1]] + tm.assert_series_equal(result, expected) + + # sorting series, nan position first + result = s.sort_index(na_position="first") + expected = s.iloc[[1, 2, 3, 0]] + tm.assert_series_equal(result, expected) + + def test_sort_index_nan(self): + # GH#3917 + + # Test DataFrame with nan label + df = DataFrame( + {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]}, + index=[1, 2, 3, 4, 5, 6, np.nan], + ) + + # NaN label, ascending=True, na_position='last' + sorted_df = df.sort_index(kind="quicksort", ascending=True, na_position="last") + expected = DataFrame( + {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]}, + index=[1, 2, 3, 4, 5, 6, np.nan], + ) + tm.assert_frame_equal(sorted_df, expected) + + # NaN label, ascending=True, na_position='first' + sorted_df = df.sort_index(na_position="first") + expected = DataFrame( + {"A": [4, 1, 2, np.nan, 1, 6, 8], "B": [5, 9, np.nan, 5, 2, 5, 4]}, + index=[np.nan, 1, 2, 3, 4, 5, 6], + ) + tm.assert_frame_equal(sorted_df, expected) + + # NaN label, ascending=False, na_position='last' + sorted_df = df.sort_index(kind="quicksort", ascending=False) + expected = DataFrame( + {"A": [8, 6, 1, np.nan, 2, 1, 4], "B": [4, 5, 2, 5, np.nan, 9, 5]}, + index=[6, 5, 4, 3, 2, 1, np.nan], + ) + tm.assert_frame_equal(sorted_df, expected) + + # NaN label, ascending=False, na_position='first' + sorted_df = df.sort_index( + kind="quicksort", ascending=False, na_position="first" + ) + expected = DataFrame( + {"A": [4, 8, 6, 1, np.nan, 2, 1], "B": [5, 4, 5, 2, 5, np.nan, 9]}, + index=[np.nan, 6, 5, 4, 3, 2, 1], + ) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_index_multi_index(self): + # GH#25775, testing that sorting by index works with a multi-index. + df = DataFrame( + {"a": [3, 1, 2], "b": [0, 0, 0], "c": [0, 1, 2], "d": list("abc")} + ) + result = df.set_index(list("abc")).sort_index(level=list("ba")) + + expected = DataFrame( + {"a": [1, 2, 3], "b": [0, 0, 0], "c": [1, 2, 0], "d": list("bca")} + ) + expected = expected.set_index(list("abc")) + + tm.assert_frame_equal(result, expected) + + def test_sort_index_inplace(self): + frame = DataFrame( + np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"] + ) + + # axis=0 + unordered = frame.loc[[3, 2, 4, 1]] + a_values = unordered["A"] + df = unordered.copy() + return_value = df.sort_index(inplace=True) + assert return_value is None + expected = frame + tm.assert_frame_equal(df, expected) + # GH 44153 related + # Used to be a_id != id(df["A"]), but flaky in the CI + assert a_values is not df["A"] + + df = unordered.copy() + return_value = df.sort_index(ascending=False, inplace=True) + assert return_value is None + expected = frame[::-1] + tm.assert_frame_equal(df, expected) + + # axis=1 + unordered = frame.loc[:, ["D", "B", "C", "A"]] + df = unordered.copy() + return_value = df.sort_index(axis=1, inplace=True) + assert return_value is None + expected = frame + tm.assert_frame_equal(df, expected) + + df = unordered.copy() + return_value = df.sort_index(axis=1, ascending=False, inplace=True) + assert return_value is None + expected = frame.iloc[:, ::-1] + tm.assert_frame_equal(df, expected) + + def test_sort_index_different_sortorder(self): + A = np.arange(20).repeat(5) + B = np.tile(np.arange(5), 20) + + indexer = np.random.permutation(100) + A = A.take(indexer) + B = B.take(indexer) + + df = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) + + ex_indexer = np.lexsort((df.B.max() - df.B, df.A)) + expected = df.take(ex_indexer) + + # test with multiindex, too + idf = df.set_index(["A", "B"]) + + result = idf.sort_index(ascending=[1, 0]) + expected = idf.take(ex_indexer) + tm.assert_frame_equal(result, expected) + + # also, Series! + result = idf["C"].sort_index(ascending=[1, 0]) + tm.assert_series_equal(result, expected["C"]) + + def test_sort_index_level(self): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) + df = DataFrame([[1, 2], [3, 4]], mi) + + result = df.sort_index(level="A", sort_remaining=False) + expected = df + tm.assert_frame_equal(result, expected) + + result = df.sort_index(level=["A", "B"], sort_remaining=False) + expected = df + tm.assert_frame_equal(result, expected) + + # Error thrown by sort_index when + # first index is sorted last (GH#26053) + result = df.sort_index(level=["C", "B", "A"]) + expected = df.iloc[[1, 0]] + tm.assert_frame_equal(result, expected) + + result = df.sort_index(level=["B", "C", "A"]) + expected = df.iloc[[1, 0]] + tm.assert_frame_equal(result, expected) + + result = df.sort_index(level=["C", "A"]) + expected = df.iloc[[1, 0]] + tm.assert_frame_equal(result, expected) + + def test_sort_index_categorical_index(self): + + df = DataFrame( + { + "A": np.arange(6, dtype="int64"), + "B": Series(list("aabbca")).astype(CategoricalDtype(list("cab"))), + } + ).set_index("B") + + result = df.sort_index() + expected = df.iloc[[4, 0, 1, 5, 2, 3]] + tm.assert_frame_equal(result, expected) + + result = df.sort_index(ascending=False) + expected = df.iloc[[2, 3, 0, 1, 5, 4]] + tm.assert_frame_equal(result, expected) + + def test_sort_index(self): + # GH#13496 + + frame = DataFrame( + np.arange(16).reshape(4, 4), + index=[1, 2, 3, 4], + columns=["A", "B", "C", "D"], + ) + + # axis=0 : sort rows by index labels + unordered = frame.loc[[3, 2, 4, 1]] + result = unordered.sort_index(axis=0) + expected = frame + tm.assert_frame_equal(result, expected) + + result = unordered.sort_index(ascending=False) + expected = frame[::-1] + tm.assert_frame_equal(result, expected) + + # axis=1 : sort columns by column names + unordered = frame.iloc[:, [2, 1, 3, 0]] + result = unordered.sort_index(axis=1) + tm.assert_frame_equal(result, frame) + + result = unordered.sort_index(axis=1, ascending=False) + expected = frame.iloc[:, ::-1] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("level", ["A", 0]) # GH#21052 + def test_sort_index_multiindex(self, level): + # GH#13496 + + # sort rows by specified level of multi-index + mi = MultiIndex.from_tuples( + [[2, 1, 3], [2, 1, 2], [1, 1, 1]], names=list("ABC") + ) + df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mi) + + expected_mi = MultiIndex.from_tuples( + [[1, 1, 1], [2, 1, 2], [2, 1, 3]], names=list("ABC") + ) + expected = DataFrame([[5, 6], [3, 4], [1, 2]], index=expected_mi) + result = df.sort_index(level=level) + tm.assert_frame_equal(result, expected) + + # sort_remaining=False + expected_mi = MultiIndex.from_tuples( + [[1, 1, 1], [2, 1, 3], [2, 1, 2]], names=list("ABC") + ) + expected = DataFrame([[5, 6], [1, 2], [3, 4]], index=expected_mi) + result = df.sort_index(level=level, sort_remaining=False) + tm.assert_frame_equal(result, expected) + + def test_sort_index_intervalindex(self): + # this is a de-facto sort via unstack + # confirming that we sort in the order of the bins + y = Series(np.random.randn(100)) + x1 = Series(np.sign(np.random.randn(100))) + x2 = pd.cut(Series(np.random.randn(100)), bins=[-3, -0.5, 0, 0.5, 3]) + model = pd.concat([y, x1, x2], axis=1, keys=["Y", "X1", "X2"]) + + result = model.groupby(["X1", "X2"], observed=True).mean().unstack() + expected = IntervalIndex.from_tuples( + [(-3.0, -0.5), (-0.5, 0.0), (0.0, 0.5), (0.5, 3.0)], closed="right" + ) + result = result.columns.levels[1].categories + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize( + "original_dict, sorted_dict, ascending, ignore_index, output_index", + [ + ({"A": [1, 2, 3]}, {"A": [2, 3, 1]}, False, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [1, 3, 2]}, True, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [2, 3, 1]}, False, False, [5, 3, 2]), + ({"A": [1, 2, 3]}, {"A": [1, 3, 2]}, True, False, [2, 3, 5]), + ], + ) + def test_sort_index_ignore_index( + self, inplace, original_dict, sorted_dict, ascending, ignore_index, output_index + ): + # GH 30114 + original_index = [2, 5, 3] + df = DataFrame(original_dict, index=original_index) + expected_df = DataFrame(sorted_dict, index=output_index) + kwargs = { + "ascending": ascending, + "ignore_index": ignore_index, + "inplace": inplace, + } + + if inplace: + result_df = df.copy() + result_df.sort_index(**kwargs) + else: + result_df = df.sort_index(**kwargs) + + tm.assert_frame_equal(result_df, expected_df) + tm.assert_frame_equal(df, DataFrame(original_dict, index=original_index)) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("ignore_index", [True, False]) + def test_respect_ignore_index(self, inplace, ignore_index): + # GH 43591 + df = DataFrame({"a": [1, 2, 3]}, index=RangeIndex(4, -1, -2)) + result = df.sort_index( + ascending=False, ignore_index=ignore_index, inplace=inplace + ) + + if inplace: + result = df + if ignore_index: + expected = DataFrame({"a": [1, 2, 3]}) + else: + expected = DataFrame({"a": [1, 2, 3]}, index=RangeIndex(4, -1, -2)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize( + "original_dict, sorted_dict, ascending, ignore_index, output_index", + [ + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [1, 2], "M2": [3, 4]}, + True, + True, + [0, 1], + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [2, 1], "M2": [4, 3]}, + False, + True, + [0, 1], + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [1, 2], "M2": [3, 4]}, + True, + False, + MultiIndex.from_tuples([(2, 1), (3, 4)], names=list("AB")), + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [2, 1], "M2": [4, 3]}, + False, + False, + MultiIndex.from_tuples([(3, 4), (2, 1)], names=list("AB")), + ), + ], + ) + def test_sort_index_ignore_index_multi_index( + self, inplace, original_dict, sorted_dict, ascending, ignore_index, output_index + ): + # GH 30114, this is to test ignore_index on MulitIndex of index + mi = MultiIndex.from_tuples([(2, 1), (3, 4)], names=list("AB")) + df = DataFrame(original_dict, index=mi) + expected_df = DataFrame(sorted_dict, index=output_index) + + kwargs = { + "ascending": ascending, + "ignore_index": ignore_index, + "inplace": inplace, + } + + if inplace: + result_df = df.copy() + result_df.sort_index(**kwargs) + else: + result_df = df.sort_index(**kwargs) + + tm.assert_frame_equal(result_df, expected_df) + tm.assert_frame_equal(df, DataFrame(original_dict, index=mi)) + + def test_sort_index_categorical_multiindex(self): + # GH#15058 + df = DataFrame( + { + "a": range(6), + "l1": pd.Categorical( + ["a", "a", "b", "b", "c", "c"], + categories=["c", "a", "b"], + ordered=True, + ), + "l2": [0, 1, 0, 1, 0, 1], + } + ) + result = df.set_index(["l1", "l2"]).sort_index() + expected = DataFrame( + [4, 5, 0, 1, 2, 3], + columns=["a"], + index=MultiIndex( + levels=[ + CategoricalIndex( + ["c", "a", "b"], + categories=["c", "a", "b"], + ordered=True, + name="l1", + dtype="category", + ), + [0, 1], + ], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=["l1", "l2"], + ), + ) + tm.assert_frame_equal(result, expected) + + def test_sort_index_and_reconstruction(self): + + # GH#15622 + # lexsortedness should be identical + # across MultiIndex construction methods + + df = DataFrame([[1, 1], [2, 2]], index=list("ab")) + expected = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex.from_tuples( + [(0.5, "a"), (0.5, "b"), (0.8, "a"), (0.8, "b")] + ), + ) + assert expected.index._is_lexsorted() + + result = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex.from_product([[0.5, 0.8], list("ab")]), + ) + result = result.sort_index() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + result = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex( + levels=[[0.5, 0.8], ["a", "b"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] + ), + ) + result = result.sort_index() + assert result.index._is_lexsorted() + + tm.assert_frame_equal(result, expected) + + concatted = pd.concat([df, df], keys=[0.8, 0.5]) + result = concatted.sort_index() + + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + # GH#14015 + df = DataFrame( + [[1, 2], [6, 7]], + columns=MultiIndex.from_tuples( + [(0, "20160811 12:00:00"), (0, "20160809 12:00:00")], + names=["l1", "Date"], + ), + ) + + df.columns = df.columns.set_levels( + pd.to_datetime(df.columns.levels[1]), level=1 + ) + assert not df.columns.is_monotonic + result = df.sort_index(axis=1) + assert result.columns.is_monotonic + result = df.sort_index(axis=1, level=1) + assert result.columns.is_monotonic + + # TODO: better name, de-duplicate with test_sort_index_level above + def test_sort_index_level2(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + df = frame.copy() + df.index = np.arange(len(df)) + + # axis=1 + + # series + a_sorted = frame["A"].sort_index(level=0) + + # preserve names + assert a_sorted.index.names == frame.index.names + + # inplace + rs = frame.copy() + return_value = rs.sort_index(level=0, inplace=True) + assert return_value is None + tm.assert_frame_equal(rs, frame.sort_index(level=0)) + + def test_sort_index_level_large_cardinality(self): + + # GH#2684 (int64) + index = MultiIndex.from_arrays([np.arange(4000)] * 3) + df = DataFrame(np.random.randn(4000).astype("int64"), index=index) + + # it works! + result = df.sort_index(level=0) + assert result.index._lexsort_depth == 3 + + # GH#2684 (int32) + index = MultiIndex.from_arrays([np.arange(4000)] * 3) + df = DataFrame(np.random.randn(4000).astype("int32"), index=index) + + # it works! + result = df.sort_index(level=0) + assert (result.dtypes.values == df.dtypes.values).all() + assert result.index._lexsort_depth == 3 + + def test_sort_index_level_by_name(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + frame.index.names = ["first", "second"] + result = frame.sort_index(level="second") + expected = frame.sort_index(level=1) + tm.assert_frame_equal(result, expected) + + def test_sort_index_level_mixed(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + sorted_before = frame.sort_index(level=1) + + df = frame.copy() + df["foo"] = "bar" + sorted_after = df.sort_index(level=1) + tm.assert_frame_equal(sorted_before, sorted_after.drop(["foo"], axis=1)) + + dft = frame.T + sorted_before = dft.sort_index(level=1, axis=1) + dft["foo", "three"] = "bar" + + sorted_after = dft.sort_index(level=1, axis=1) + tm.assert_frame_equal( + sorted_before.drop([("foo", "three")], axis=1), + sorted_after.drop([("foo", "three")], axis=1), + ) + + def test_sort_index_preserve_levels(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + result = frame.sort_index() + assert result.index.names == frame.index.names + + @pytest.mark.parametrize( + "gen,extra", + [ + ([1.0, 3.0, 2.0, 5.0], 4.0), + ([1, 3, 2, 5], 4), + ( + [ + Timestamp("20130101"), + Timestamp("20130103"), + Timestamp("20130102"), + Timestamp("20130105"), + ], + Timestamp("20130104"), + ), + (["1one", "3one", "2one", "5one"], "4one"), + ], + ) + def test_sort_index_multilevel_repr_8017(self, gen, extra): + + np.random.seed(0) + data = np.random.randn(3, 4) + + columns = MultiIndex.from_tuples([("red", i) for i in gen]) + df = DataFrame(data, index=list("def"), columns=columns) + df2 = pd.concat( + [ + df, + DataFrame( + "world", + index=list("def"), + columns=MultiIndex.from_tuples([("red", extra)]), + ), + ], + axis=1, + ) + + # check that the repr is good + # make sure that we have a correct sparsified repr + # e.g. only 1 header of read + assert str(df2).splitlines()[0].split() == ["red"] + + # GH 8017 + # sorting fails after columns added + + # construct single-dtype then sort + result = df.copy().sort_index(axis=1) + expected = df.iloc[:, [0, 2, 1, 3]] + tm.assert_frame_equal(result, expected) + + result = df2.sort_index(axis=1) + expected = df2.iloc[:, [0, 2, 1, 4, 3]] + tm.assert_frame_equal(result, expected) + + # setitem then sort + result = df.copy() + result[("red", extra)] = "world" + + result = result.sort_index(axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "categories", + [ + pytest.param(["a", "b", "c"], id="str"), + pytest.param( + [pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(2, 3)], + id="pd.Interval", + ), + ], + ) + def test_sort_index_with_categories(self, categories): + # GH#23452 + df = DataFrame( + {"foo": range(len(categories))}, + index=CategoricalIndex( + data=categories, categories=categories, ordered=True + ), + ) + df.index = df.index.reorder_categories(df.index.categories[::-1]) + result = df.sort_index() + expected = DataFrame( + {"foo": reversed(range(len(categories)))}, + index=CategoricalIndex( + data=categories[::-1], categories=categories[::-1], ordered=True + ), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "ascending", + [ + None, + [True, None], + [False, "True"], + ], + ) + def test_sort_index_ascending_bad_value_raises(self, ascending): + # GH 39434 + df = DataFrame(np.arange(64)) + length = len(df.index) + df.index = [(i - length / 2) % length for i in range(length)] + match = 'For argument "ascending" expected type bool' + with pytest.raises(ValueError, match=match): + df.sort_index(axis=0, ascending=ascending, na_position="first") + + def test_sort_index_use_inf_as_na(self): + # GH 29687 + expected = DataFrame( + {"col1": [1, 2, 3], "col2": [3, 4, 5]}, + index=pd.date_range("2020", periods=3), + ) + with pd.option_context("mode.use_inf_as_na", True): + result = expected.sort_index() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "ascending", + [(True, False), [True, False]], + ) + def test_sort_index_ascending_tuple(self, ascending): + df = DataFrame( + { + "legs": [4, 2, 4, 2, 2], + }, + index=MultiIndex.from_tuples( + [ + ("mammal", "dog"), + ("bird", "duck"), + ("mammal", "horse"), + ("bird", "penguin"), + ("mammal", "kangaroo"), + ], + names=["class", "animal"], + ), + ) + + # parameter `ascending`` is a tuple + result = df.sort_index(level=(0, 1), ascending=ascending) + + expected = DataFrame( + { + "legs": [2, 2, 2, 4, 4], + }, + index=MultiIndex.from_tuples( + [ + ("bird", "penguin"), + ("bird", "duck"), + ("mammal", "kangaroo"), + ("mammal", "horse"), + ("mammal", "dog"), + ], + names=["class", "animal"], + ), + ) + + tm.assert_frame_equal(result, expected) + + +class TestDataFrameSortIndexKey: + def test_sort_multi_index_key(self): + # GH 25775, testing that sorting by index works with a multi-index. + df = DataFrame( + {"a": [3, 1, 2], "b": [0, 0, 0], "c": [0, 1, 2], "d": list("abc")} + ).set_index(list("abc")) + + result = df.sort_index(level=list("ac"), key=lambda x: x) + + expected = DataFrame( + {"a": [1, 2, 3], "b": [0, 0, 0], "c": [1, 2, 0], "d": list("bca")} + ).set_index(list("abc")) + tm.assert_frame_equal(result, expected) + + result = df.sort_index(level=list("ac"), key=lambda x: -x) + expected = DataFrame( + {"a": [3, 2, 1], "b": [0, 0, 0], "c": [0, 2, 1], "d": list("acb")} + ).set_index(list("abc")) + + tm.assert_frame_equal(result, expected) + + def test_sort_index_key(self): # issue 27237 + df = DataFrame(np.arange(6, dtype="int64"), index=list("aaBBca")) + + result = df.sort_index() + expected = df.iloc[[2, 3, 0, 1, 5, 4]] + tm.assert_frame_equal(result, expected) + + result = df.sort_index(key=lambda x: x.str.lower()) + expected = df.iloc[[0, 1, 5, 2, 3, 4]] + tm.assert_frame_equal(result, expected) + + result = df.sort_index(key=lambda x: x.str.lower(), ascending=False) + expected = df.iloc[[4, 2, 3, 0, 1, 5]] + tm.assert_frame_equal(result, expected) + + def test_sort_index_key_int(self): + df = DataFrame(np.arange(6, dtype="int64"), index=np.arange(6, dtype="int64")) + + result = df.sort_index() + tm.assert_frame_equal(result, df) + + result = df.sort_index(key=lambda x: -x) + expected = df.sort_index(ascending=False) + tm.assert_frame_equal(result, expected) + + result = df.sort_index(key=lambda x: 2 * x) + tm.assert_frame_equal(result, df) + + def test_sort_multi_index_key_str(self): + # GH 25775, testing that sorting by index works with a multi-index. + df = DataFrame( + {"a": ["B", "a", "C"], "b": [0, 1, 0], "c": list("abc"), "d": [0, 1, 2]} + ).set_index(list("abc")) + + result = df.sort_index(level="a", key=lambda x: x.str.lower()) + + expected = DataFrame( + {"a": ["a", "B", "C"], "b": [1, 0, 0], "c": list("bac"), "d": [1, 0, 2]} + ).set_index(list("abc")) + tm.assert_frame_equal(result, expected) + + result = df.sort_index( + level=list("abc"), # can refer to names + key=lambda x: x.str.lower() if x.name in ["a", "c"] else -x, + ) + + expected = DataFrame( + {"a": ["a", "B", "C"], "b": [1, 0, 0], "c": list("bac"), "d": [1, 0, 2]} + ).set_index(list("abc")) + tm.assert_frame_equal(result, expected) + + def test_changes_length_raises(self): + df = DataFrame({"A": [1, 2, 3]}) + with pytest.raises(ValueError, match="change the shape"): + df.sort_index(key=lambda x: x[:1]) + + def test_sort_index_multiindex_sparse_column(self): + # GH 29735, testing that sort_index on a multiindexed frame with sparse + # columns fills with 0. + expected = DataFrame( + { + i: pd.array([0.0, 0.0, 0.0, 0.0], dtype=pd.SparseDtype("float64", 0.0)) + for i in range(0, 4) + }, + index=MultiIndex.from_product([[1, 2], [1, 2]]), + ) + + result = expected.sort_index(level=0) + + tm.assert_frame_equal(result, expected) + + def test_sort_index_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame.sort_index " + r"will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.sort_index(1) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_sort_values.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_sort_values.py new file mode 100644 index 0000000..9f3fcb1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_sort_values.py @@ -0,0 +1,892 @@ +import random + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + NaT, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameSortValues: + def test_sort_values_sparse_no_warning(self): + # GH#45618 + # TODO(2.0): test will be unnecessary + ser = pd.Series(Categorical(["a", "b", "a"], categories=["a", "b", "c"])) + df = pd.get_dummies(ser, sparse=True) + + with tm.assert_produces_warning(None): + # No warnings about constructing Index from SparseArray + df.sort_values(by=df.columns.tolist()) + + def test_sort_values(self): + frame = DataFrame( + [[1, 1, 2], [3, 1, 0], [4, 5, 6]], index=[1, 2, 3], columns=list("ABC") + ) + + # by column (axis=0) + sorted_df = frame.sort_values(by="A") + indexer = frame["A"].argsort().values + expected = frame.loc[frame.index[indexer]] + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by="A", ascending=False) + indexer = indexer[::-1] + expected = frame.loc[frame.index[indexer]] + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by="A", ascending=False) + tm.assert_frame_equal(sorted_df, expected) + + # GH4839 + sorted_df = frame.sort_values(by=["A"], ascending=[False]) + tm.assert_frame_equal(sorted_df, expected) + + # multiple bys + sorted_df = frame.sort_values(by=["B", "C"]) + expected = frame.loc[[2, 1, 3]] + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=["B", "C"], ascending=False) + tm.assert_frame_equal(sorted_df, expected[::-1]) + + sorted_df = frame.sort_values(by=["B", "A"], ascending=[True, False]) + tm.assert_frame_equal(sorted_df, expected) + + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + frame.sort_values(by=["A", "B"], axis=2, inplace=True) + + # by row (axis=1): GH#10806 + sorted_df = frame.sort_values(by=3, axis=1) + expected = frame + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=3, axis=1, ascending=False) + expected = frame.reindex(columns=["C", "B", "A"]) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=[1, 2], axis="columns") + expected = frame.reindex(columns=["B", "A", "C"]) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=[1, 3], axis=1, ascending=[True, False]) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=[1, 3], axis=1, ascending=False) + expected = frame.reindex(columns=["C", "B", "A"]) + tm.assert_frame_equal(sorted_df, expected) + + msg = r"Length of ascending \(5\) != length of by \(2\)" + with pytest.raises(ValueError, match=msg): + frame.sort_values(by=["A", "B"], axis=0, ascending=[True] * 5) + + def test_sort_values_by_empty_list(self): + # https://github.com/pandas-dev/pandas/issues/40258 + expected = DataFrame({"a": [1, 4, 2, 5, 3, 6]}) + result = expected.sort_values(by=[]) + tm.assert_frame_equal(result, expected) + assert result is not expected + + def test_sort_values_inplace(self): + frame = DataFrame( + np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"] + ) + + sorted_df = frame.copy() + return_value = sorted_df.sort_values(by="A", inplace=True) + assert return_value is None + expected = frame.sort_values(by="A") + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + return_value = sorted_df.sort_values(by=1, axis=1, inplace=True) + assert return_value is None + expected = frame.sort_values(by=1, axis=1) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + return_value = sorted_df.sort_values(by="A", ascending=False, inplace=True) + assert return_value is None + expected = frame.sort_values(by="A", ascending=False) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + return_value = sorted_df.sort_values( + by=["A", "B"], ascending=False, inplace=True + ) + assert return_value is None + expected = frame.sort_values(by=["A", "B"], ascending=False) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_multicolumn(self): + A = np.arange(5).repeat(20) + B = np.tile(np.arange(5), 20) + random.shuffle(A) + random.shuffle(B) + frame = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) + + result = frame.sort_values(by=["A", "B"]) + indexer = np.lexsort((frame["B"], frame["A"])) + expected = frame.take(indexer) + tm.assert_frame_equal(result, expected) + + result = frame.sort_values(by=["A", "B"], ascending=False) + indexer = np.lexsort( + (frame["B"].rank(ascending=False), frame["A"].rank(ascending=False)) + ) + expected = frame.take(indexer) + tm.assert_frame_equal(result, expected) + + result = frame.sort_values(by=["B", "A"]) + indexer = np.lexsort((frame["A"], frame["B"])) + expected = frame.take(indexer) + tm.assert_frame_equal(result, expected) + + def test_sort_values_multicolumn_uint64(self): + # GH#9918 + # uint64 multicolumn sort + + df = DataFrame( + { + "a": pd.Series([18446637057563306014, 1162265347240853609]), + "b": pd.Series([1, 2]), + } + ) + df["a"] = df["a"].astype(np.uint64) + result = df.sort_values(["a", "b"]) + + expected = DataFrame( + { + "a": pd.Series([18446637057563306014, 1162265347240853609]), + "b": pd.Series([1, 2]), + }, + index=pd.Index([1, 0]), + ) + + tm.assert_frame_equal(result, expected) + + def test_sort_values_nan(self): + # GH#3917 + df = DataFrame( + {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]} + ) + + # sort one column only + expected = DataFrame( + {"A": [np.nan, 1, 1, 2, 4, 6, 8], "B": [5, 9, 2, np.nan, 5, 5, 4]}, + index=[2, 0, 3, 1, 6, 4, 5], + ) + sorted_df = df.sort_values(["A"], na_position="first") + tm.assert_frame_equal(sorted_df, expected) + + expected = DataFrame( + {"A": [np.nan, 8, 6, 4, 2, 1, 1], "B": [5, 4, 5, 5, np.nan, 9, 2]}, + index=[2, 5, 4, 6, 1, 0, 3], + ) + sorted_df = df.sort_values(["A"], na_position="first", ascending=False) + tm.assert_frame_equal(sorted_df, expected) + + expected = df.reindex(columns=["B", "A"]) + sorted_df = df.sort_values(by=1, axis=1, na_position="first") + tm.assert_frame_equal(sorted_df, expected) + + # na_position='last', order + expected = DataFrame( + {"A": [1, 1, 2, 4, 6, 8, np.nan], "B": [2, 9, np.nan, 5, 5, 4, 5]}, + index=[3, 0, 1, 6, 4, 5, 2], + ) + sorted_df = df.sort_values(["A", "B"]) + tm.assert_frame_equal(sorted_df, expected) + + # na_position='first', order + expected = DataFrame( + {"A": [np.nan, 1, 1, 2, 4, 6, 8], "B": [5, 2, 9, np.nan, 5, 5, 4]}, + index=[2, 3, 0, 1, 6, 4, 5], + ) + sorted_df = df.sort_values(["A", "B"], na_position="first") + tm.assert_frame_equal(sorted_df, expected) + + # na_position='first', not order + expected = DataFrame( + {"A": [np.nan, 1, 1, 2, 4, 6, 8], "B": [5, 9, 2, np.nan, 5, 5, 4]}, + index=[2, 0, 3, 1, 6, 4, 5], + ) + sorted_df = df.sort_values(["A", "B"], ascending=[1, 0], na_position="first") + tm.assert_frame_equal(sorted_df, expected) + + # na_position='last', not order + expected = DataFrame( + {"A": [8, 6, 4, 2, 1, 1, np.nan], "B": [4, 5, 5, np.nan, 2, 9, 5]}, + index=[5, 4, 6, 1, 3, 0, 2], + ) + sorted_df = df.sort_values(["A", "B"], ascending=[0, 1], na_position="last") + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_stable_descending_sort(self): + # GH#6399 + df = DataFrame( + [[2, "first"], [2, "second"], [1, "a"], [1, "b"]], + columns=["sort_col", "order"], + ) + sorted_df = df.sort_values(by="sort_col", kind="mergesort", ascending=False) + tm.assert_frame_equal(df, sorted_df) + + @pytest.mark.parametrize( + "expected_idx_non_na, ascending", + [ + [ + [3, 4, 5, 0, 1, 8, 6, 9, 7, 10, 13, 14], + [True, True], + ], + [ + [0, 3, 4, 5, 1, 8, 6, 7, 10, 13, 14, 9], + [True, False], + ], + [ + [9, 7, 10, 13, 14, 6, 8, 1, 3, 4, 5, 0], + [False, True], + ], + [ + [7, 10, 13, 14, 9, 6, 8, 1, 0, 3, 4, 5], + [False, False], + ], + ], + ) + @pytest.mark.parametrize("na_position", ["first", "last"]) + def test_sort_values_stable_multicolumn_sort( + self, expected_idx_non_na, ascending, na_position + ): + # GH#38426 Clarify sort_values with mult. columns / labels is stable + df = DataFrame( + { + "A": [1, 2, np.nan, 1, 1, 1, 6, 8, 4, 8, 8, np.nan, np.nan, 8, 8], + "B": [9, np.nan, 5, 2, 2, 2, 5, 4, 5, 3, 4, np.nan, np.nan, 4, 4], + } + ) + # All rows with NaN in col "B" only have unique values in "A", therefore, + # only the rows with NaNs in "A" have to be treated individually: + expected_idx = ( + [11, 12, 2] + expected_idx_non_na + if na_position == "first" + else expected_idx_non_na + [2, 11, 12] + ) + expected = df.take(expected_idx) + sorted_df = df.sort_values( + ["A", "B"], ascending=ascending, na_position=na_position + ) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_stable_categorial(self): + # GH#16793 + df = DataFrame({"x": Categorical(np.repeat([1, 2, 3, 4], 5), ordered=True)}) + expected = df.copy() + sorted_df = df.sort_values("x", kind="mergesort") + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_datetimes(self): + + # GH#3461, argsort / lexsort differences for a datetime column + df = DataFrame( + ["a", "a", "a", "b", "c", "d", "e", "f", "g"], + columns=["A"], + index=date_range("20130101", periods=9), + ) + dts = [ + Timestamp(x) + for x in [ + "2004-02-11", + "2004-01-21", + "2004-01-26", + "2005-09-20", + "2010-10-04", + "2009-05-12", + "2008-11-12", + "2010-09-28", + "2010-09-28", + ] + ] + df["B"] = dts[::2] + dts[1::2] + df["C"] = 2.0 + df["A1"] = 3.0 + + df1 = df.sort_values(by="A") + df2 = df.sort_values(by=["A"]) + tm.assert_frame_equal(df1, df2) + + df1 = df.sort_values(by="B") + df2 = df.sort_values(by=["B"]) + tm.assert_frame_equal(df1, df2) + + df1 = df.sort_values(by="B") + + df2 = df.sort_values(by=["C", "B"]) + tm.assert_frame_equal(df1, df2) + + def test_sort_values_frame_column_inplace_sort_exception(self, float_frame): + s = float_frame["A"] + with pytest.raises(ValueError, match="This Series is a view"): + s.sort_values(inplace=True) + + cp = s.copy() + cp.sort_values() # it works! + + def test_sort_values_nat_values_in_int_column(self): + + # GH#14922: "sorting with large float and multiple columns incorrect" + + # cause was that the int64 value NaT was considered as "na". Which is + # only correct for datetime64 columns. + + int_values = (2, int(NaT.value)) + float_values = (2.0, -1.797693e308) + + df = DataFrame( + {"int": int_values, "float": float_values}, columns=["int", "float"] + ) + + df_reversed = DataFrame( + {"int": int_values[::-1], "float": float_values[::-1]}, + columns=["int", "float"], + index=[1, 0], + ) + + # NaT is not a "na" for int64 columns, so na_position must not + # influence the result: + df_sorted = df.sort_values(["int", "float"], na_position="last") + tm.assert_frame_equal(df_sorted, df_reversed) + + df_sorted = df.sort_values(["int", "float"], na_position="first") + tm.assert_frame_equal(df_sorted, df_reversed) + + # reverse sorting order + df_sorted = df.sort_values(["int", "float"], ascending=False) + tm.assert_frame_equal(df_sorted, df) + + # and now check if NaT is still considered as "na" for datetime64 + # columns: + df = DataFrame( + {"datetime": [Timestamp("2016-01-01"), NaT], "float": float_values}, + columns=["datetime", "float"], + ) + + df_reversed = DataFrame( + {"datetime": [NaT, Timestamp("2016-01-01")], "float": float_values[::-1]}, + columns=["datetime", "float"], + index=[1, 0], + ) + + df_sorted = df.sort_values(["datetime", "float"], na_position="first") + tm.assert_frame_equal(df_sorted, df_reversed) + + df_sorted = df.sort_values(["datetime", "float"], na_position="last") + tm.assert_frame_equal(df_sorted, df) + + # Ascending should not affect the results. + df_sorted = df.sort_values(["datetime", "float"], ascending=False) + tm.assert_frame_equal(df_sorted, df) + + def test_sort_nat(self): + # GH 16836 + + d1 = [Timestamp(x) for x in ["2016-01-01", "2015-01-01", np.nan, "2016-01-01"]] + d2 = [ + Timestamp(x) + for x in ["2017-01-01", "2014-01-01", "2016-01-01", "2015-01-01"] + ] + df = DataFrame({"a": d1, "b": d2}, index=[0, 1, 2, 3]) + + d3 = [Timestamp(x) for x in ["2015-01-01", "2016-01-01", "2016-01-01", np.nan]] + d4 = [ + Timestamp(x) + for x in ["2014-01-01", "2015-01-01", "2017-01-01", "2016-01-01"] + ] + expected = DataFrame({"a": d3, "b": d4}, index=[1, 3, 0, 2]) + sorted_df = df.sort_values(by=["a", "b"]) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_na_position_with_categories(self): + # GH#22556 + # Positioning missing value properly when column is Categorical. + categories = ["A", "B", "C"] + category_indices = [0, 2, 4] + list_of_nans = [np.nan, np.nan] + na_indices = [1, 3] + na_position_first = "first" + na_position_last = "last" + column_name = "c" + + reversed_categories = sorted(categories, reverse=True) + reversed_category_indices = sorted(category_indices, reverse=True) + reversed_na_indices = sorted(na_indices) + + df = DataFrame( + { + column_name: Categorical( + ["A", np.nan, "B", np.nan, "C"], categories=categories, ordered=True + ) + } + ) + # sort ascending with na first + result = df.sort_values( + by=column_name, ascending=True, na_position=na_position_first + ) + expected = DataFrame( + { + column_name: Categorical( + list_of_nans + categories, categories=categories, ordered=True + ) + }, + index=na_indices + category_indices, + ) + + tm.assert_frame_equal(result, expected) + + # sort ascending with na last + result = df.sort_values( + by=column_name, ascending=True, na_position=na_position_last + ) + expected = DataFrame( + { + column_name: Categorical( + categories + list_of_nans, categories=categories, ordered=True + ) + }, + index=category_indices + na_indices, + ) + + tm.assert_frame_equal(result, expected) + + # sort descending with na first + result = df.sort_values( + by=column_name, ascending=False, na_position=na_position_first + ) + expected = DataFrame( + { + column_name: Categorical( + list_of_nans + reversed_categories, + categories=categories, + ordered=True, + ) + }, + index=reversed_na_indices + reversed_category_indices, + ) + + tm.assert_frame_equal(result, expected) + + # sort descending with na last + result = df.sort_values( + by=column_name, ascending=False, na_position=na_position_last + ) + expected = DataFrame( + { + column_name: Categorical( + reversed_categories + list_of_nans, + categories=categories, + ordered=True, + ) + }, + index=reversed_category_indices + reversed_na_indices, + ) + + tm.assert_frame_equal(result, expected) + + def test_sort_values_nat(self): + + # GH#16836 + + d1 = [Timestamp(x) for x in ["2016-01-01", "2015-01-01", np.nan, "2016-01-01"]] + d2 = [ + Timestamp(x) + for x in ["2017-01-01", "2014-01-01", "2016-01-01", "2015-01-01"] + ] + df = DataFrame({"a": d1, "b": d2}, index=[0, 1, 2, 3]) + + d3 = [Timestamp(x) for x in ["2015-01-01", "2016-01-01", "2016-01-01", np.nan]] + d4 = [ + Timestamp(x) + for x in ["2014-01-01", "2015-01-01", "2017-01-01", "2016-01-01"] + ] + expected = DataFrame({"a": d3, "b": d4}, index=[1, 3, 0, 2]) + sorted_df = df.sort_values(by=["a", "b"]) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_na_position_with_categories_raises(self): + df = DataFrame( + { + "c": Categorical( + ["A", np.nan, "B", np.nan, "C"], + categories=["A", "B", "C"], + ordered=True, + ) + } + ) + + with pytest.raises(ValueError, match="invalid na_position: bad_position"): + df.sort_values(by="c", ascending=False, na_position="bad_position") + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize( + "original_dict, sorted_dict, ignore_index, output_index", + [ + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]), + ( + {"A": [1, 2, 3], "B": [2, 3, 4]}, + {"A": [3, 2, 1], "B": [4, 3, 2]}, + True, + [0, 1, 2], + ), + ( + {"A": [1, 2, 3], "B": [2, 3, 4]}, + {"A": [3, 2, 1], "B": [4, 3, 2]}, + False, + [2, 1, 0], + ), + ], + ) + def test_sort_values_ignore_index( + self, inplace, original_dict, sorted_dict, ignore_index, output_index + ): + # GH 30114 + df = DataFrame(original_dict) + expected = DataFrame(sorted_dict, index=output_index) + kwargs = {"ignore_index": ignore_index, "inplace": inplace} + + if inplace: + result_df = df.copy() + result_df.sort_values("A", ascending=False, **kwargs) + else: + result_df = df.sort_values("A", ascending=False, **kwargs) + + tm.assert_frame_equal(result_df, expected) + tm.assert_frame_equal(df, DataFrame(original_dict)) + + def test_sort_values_nat_na_position_default(self): + # GH 13230 + expected = DataFrame( + { + "A": [1, 2, 3, 4, 4], + "date": pd.DatetimeIndex( + [ + "2010-01-01 09:00:00", + "2010-01-01 09:00:01", + "2010-01-01 09:00:02", + "2010-01-01 09:00:03", + "NaT", + ] + ), + } + ) + result = expected.sort_values(["A", "date"]) + tm.assert_frame_equal(result, expected) + + def test_sort_values_item_cache(self, using_array_manager): + # previous behavior incorrect retained an invalid _item_cache entry + df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"]) + df["D"] = df["A"] * 2 + ser = df["A"] + if not using_array_manager: + assert len(df._mgr.blocks) == 2 + + df.sort_values(by="A") + ser.values[0] = 99 + + assert df.iloc[0, 0] == df["A"][0] + + def test_sort_values_reshaping(self): + # GH 39426 + values = list(range(21)) + expected = DataFrame([values], columns=values) + df = expected.sort_values(expected.index[0], axis=1, ignore_index=True) + + tm.assert_frame_equal(df, expected) + + +class TestDataFrameSortKey: # test key sorting (issue 27237) + def test_sort_values_inplace_key(self, sort_by_key): + frame = DataFrame( + np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"] + ) + + sorted_df = frame.copy() + return_value = sorted_df.sort_values(by="A", inplace=True, key=sort_by_key) + assert return_value is None + expected = frame.sort_values(by="A", key=sort_by_key) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + return_value = sorted_df.sort_values( + by=1, axis=1, inplace=True, key=sort_by_key + ) + assert return_value is None + expected = frame.sort_values(by=1, axis=1, key=sort_by_key) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + return_value = sorted_df.sort_values( + by="A", ascending=False, inplace=True, key=sort_by_key + ) + assert return_value is None + expected = frame.sort_values(by="A", ascending=False, key=sort_by_key) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + sorted_df.sort_values( + by=["A", "B"], ascending=False, inplace=True, key=sort_by_key + ) + expected = frame.sort_values(by=["A", "B"], ascending=False, key=sort_by_key) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_key(self): + df = DataFrame(np.array([0, 5, np.nan, 3, 2, np.nan])) + + result = df.sort_values(0) + expected = df.iloc[[0, 4, 3, 1, 2, 5]] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(0, key=lambda x: x + 5) + expected = df.iloc[[0, 4, 3, 1, 2, 5]] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(0, key=lambda x: -x, ascending=False) + expected = df.iloc[[0, 4, 3, 1, 2, 5]] + tm.assert_frame_equal(result, expected) + + def test_sort_values_by_key(self): + df = DataFrame( + { + "a": np.array([0, 3, np.nan, 3, 2, np.nan]), + "b": np.array([0, 2, np.nan, 5, 2, np.nan]), + } + ) + + result = df.sort_values("a", key=lambda x: -x) + expected = df.iloc[[1, 3, 4, 0, 2, 5]] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(by=["a", "b"], key=lambda x: -x) + expected = df.iloc[[3, 1, 4, 0, 2, 5]] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(by=["a", "b"], key=lambda x: -x, ascending=False) + expected = df.iloc[[0, 4, 1, 3, 2, 5]] + tm.assert_frame_equal(result, expected) + + def test_sort_values_by_key_by_name(self): + df = DataFrame( + { + "a": np.array([0, 3, np.nan, 3, 2, np.nan]), + "b": np.array([0, 2, np.nan, 5, 2, np.nan]), + } + ) + + def key(col): + if col.name == "a": + return -col + else: + return col + + result = df.sort_values(by="a", key=key) + expected = df.iloc[[1, 3, 4, 0, 2, 5]] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(by=["a"], key=key) + expected = df.iloc[[1, 3, 4, 0, 2, 5]] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(by="b", key=key) + expected = df.iloc[[0, 1, 4, 3, 2, 5]] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(by=["a", "b"], key=key) + expected = df.iloc[[1, 3, 4, 0, 2, 5]] + tm.assert_frame_equal(result, expected) + + def test_sort_values_key_string(self): + df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]])) + + result = df.sort_values(1) + expected = df[::-1] + tm.assert_frame_equal(result, expected) + + result = df.sort_values([0, 1], key=lambda col: col.str.lower()) + tm.assert_frame_equal(result, df) + + result = df.sort_values( + [0, 1], key=lambda col: col.str.lower(), ascending=False + ) + expected = df.sort_values(1, key=lambda col: col.str.lower(), ascending=False) + tm.assert_frame_equal(result, expected) + + def test_sort_values_key_empty(self, sort_by_key): + df = DataFrame(np.array([])) + + df.sort_values(0, key=sort_by_key) + df.sort_index(key=sort_by_key) + + def test_changes_length_raises(self): + df = DataFrame({"A": [1, 2, 3]}) + with pytest.raises(ValueError, match="change the shape"): + df.sort_values("A", key=lambda x: x[:1]) + + def test_sort_values_key_axes(self): + df = DataFrame({0: ["Hello", "goodbye"], 1: [0, 1]}) + + result = df.sort_values(0, key=lambda col: col.str.lower()) + expected = df[::-1] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(1, key=lambda col: -col) + expected = df[::-1] + tm.assert_frame_equal(result, expected) + + def test_sort_values_key_dict_axis(self): + df = DataFrame({0: ["Hello", 0], 1: ["goodbye", 1]}) + + result = df.sort_values(0, key=lambda col: col.str.lower(), axis=1) + expected = df.loc[:, ::-1] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(1, key=lambda col: -col, axis=1) + expected = df.loc[:, ::-1] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("ordered", [True, False]) + def test_sort_values_key_casts_to_categorical(self, ordered): + # https://github.com/pandas-dev/pandas/issues/36383 + categories = ["c", "b", "a"] + df = DataFrame({"x": [1, 1, 1], "y": ["a", "b", "c"]}) + + def sorter(key): + if key.name == "y": + return pd.Series( + Categorical(key, categories=categories, ordered=ordered) + ) + return key + + result = df.sort_values(by=["x", "y"], key=sorter) + expected = DataFrame( + {"x": [1, 1, 1], "y": ["c", "b", "a"]}, index=pd.Index([2, 1, 0]) + ) + + tm.assert_frame_equal(result, expected) + + +@pytest.fixture +def df_none(): + return DataFrame( + { + "outer": ["a", "a", "a", "b", "b", "b"], + "inner": [1, 2, 2, 2, 1, 1], + "A": np.arange(6, 0, -1), + ("B", 5): ["one", "one", "two", "two", "one", "one"], + } + ) + + +@pytest.fixture(params=[["outer"], ["outer", "inner"]]) +def df_idx(request, df_none): + levels = request.param + return df_none.set_index(levels) + + +@pytest.fixture( + params=[ + "inner", # index level + ["outer"], # list of index level + "A", # column + [("B", 5)], # list of column + ["inner", "outer"], # two index levels + [("B", 5), "outer"], # index level and column + ["A", ("B", 5)], # Two columns + ["inner", "outer"], # two index levels and column + ] +) +def sort_names(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def ascending(request): + return request.param + + +class TestSortValuesLevelAsStr: + def test_sort_index_level_and_column_label( + self, df_none, df_idx, sort_names, ascending + ): + # GH#14353 + + # Get index levels from df_idx + levels = df_idx.index.names + + # Compute expected by sorting on columns and the setting index + expected = df_none.sort_values( + by=sort_names, ascending=ascending, axis=0 + ).set_index(levels) + + # Compute result sorting on mix on columns and index levels + result = df_idx.sort_values(by=sort_names, ascending=ascending, axis=0) + + tm.assert_frame_equal(result, expected) + + def test_sort_column_level_and_index_label( + self, df_none, df_idx, sort_names, ascending + ): + # GH#14353 + + # Get levels from df_idx + levels = df_idx.index.names + + # Compute expected by sorting on axis=0, setting index levels, and then + # transposing. For some cases this will result in a frame with + # multiple column levels + expected = ( + df_none.sort_values(by=sort_names, ascending=ascending, axis=0) + .set_index(levels) + .T + ) + + # Compute result by transposing and sorting on axis=1. + result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1) + + tm.assert_frame_equal(result, expected) + + def test_sort_values_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame\.sort_values " + r"except for the argument 'by' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.sort_values("a", 0) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + def test_sort_values_validate_ascending_for_value_error(self): + # GH41634 + df = DataFrame({"D": [23, 7, 21]}) + + msg = 'For argument "ascending" expected type bool, received type str.' + with pytest.raises(ValueError, match=msg): + df.sort_values(by="D", ascending="False") + + @pytest.mark.parametrize("ascending", [False, 0, 1, True]) + def test_sort_values_validate_ascending_functional(self, ascending): + df = DataFrame({"D": [23, 7, 21]}) + indexer = df["D"].argsort().values + + if not ascending: + indexer = indexer[::-1] + + expected = df.loc[df.index[indexer]] + result = df.sort_values(by="D", ascending=ascending) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_swapaxes.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_swapaxes.py new file mode 100644 index 0000000..306f7b2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_swapaxes.py @@ -0,0 +1,22 @@ +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestSwapAxes: + def test_swapaxes(self): + df = DataFrame(np.random.randn(10, 5)) + tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) + tm.assert_frame_equal(df.T, df.swapaxes(1, 0)) + + def test_swapaxes_noop(self): + df = DataFrame(np.random.randn(10, 5)) + tm.assert_frame_equal(df, df.swapaxes(0, 0)) + + def test_swapaxes_invalid_axis(self): + df = DataFrame(np.random.randn(10, 5)) + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.swapaxes(2, 5) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_swaplevel.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_swaplevel.py new file mode 100644 index 0000000..5511ac7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_swaplevel.py @@ -0,0 +1,36 @@ +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestSwaplevel: + def test_swaplevel(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + swapped = frame["A"].swaplevel() + swapped2 = frame["A"].swaplevel(0) + swapped3 = frame["A"].swaplevel(0, 1) + swapped4 = frame["A"].swaplevel("first", "second") + assert not swapped.index.equals(frame.index) + tm.assert_series_equal(swapped, swapped2) + tm.assert_series_equal(swapped, swapped3) + tm.assert_series_equal(swapped, swapped4) + + back = swapped.swaplevel() + back2 = swapped.swaplevel(0) + back3 = swapped.swaplevel(0, 1) + back4 = swapped.swaplevel("second", "first") + assert back.index.equals(frame.index) + tm.assert_series_equal(back, back2) + tm.assert_series_equal(back, back3) + tm.assert_series_equal(back, back4) + + ft = frame.T + swapped = ft.swaplevel("first", "second", axis=1) + exp = frame.swaplevel("first", "second").T + tm.assert_frame_equal(swapped, exp) + + msg = "Can only swap levels on a hierarchical axis." + with pytest.raises(TypeError, match=msg): + DataFrame(range(3)).swaplevel() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_csv.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_csv.py new file mode 100644 index 0000000..8a857c0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_csv.py @@ -0,0 +1,1335 @@ +import csv +from io import StringIO +import os + +import numpy as np +import pytest + +from pandas.errors import ParserError + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + NaT, + Series, + Timestamp, + date_range, + read_csv, + to_datetime, +) +import pandas._testing as tm +import pandas.core.common as com + +from pandas.io.common import get_handle + +MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"] +MIXED_INT_DTYPES = [ + "uint8", + "uint16", + "uint32", + "uint64", + "int8", + "int16", + "int32", + "int64", +] + + +class TestDataFrameToCSV: + def read_csv(self, path, **kwargs): + params = {"index_col": 0, "parse_dates": True} + params.update(**kwargs) + + return read_csv(path, **params) + + def test_to_csv_from_csv1(self, float_frame, datetime_frame): + + with tm.ensure_clean("__tmp_to_csv_from_csv1__") as path: + float_frame["A"][:5] = np.nan + + float_frame.to_csv(path) + float_frame.to_csv(path, columns=["A", "B"]) + float_frame.to_csv(path, header=False) + float_frame.to_csv(path, index=False) + + # test roundtrip + # freq does not roundtrip + datetime_frame.index = datetime_frame.index._with_freq(None) + datetime_frame.to_csv(path) + recons = self.read_csv(path) + tm.assert_frame_equal(datetime_frame, recons) + + datetime_frame.to_csv(path, index_label="index") + recons = self.read_csv(path, index_col=None) + + assert len(recons.columns) == len(datetime_frame.columns) + 1 + + # no index + datetime_frame.to_csv(path, index=False) + recons = self.read_csv(path, index_col=None) + tm.assert_almost_equal(datetime_frame.values, recons.values) + + # corner case + dm = DataFrame( + { + "s1": Series(range(3), index=np.arange(3)), + "s2": Series(range(2), index=np.arange(2)), + } + ) + dm.to_csv(path) + + recons = self.read_csv(path) + tm.assert_frame_equal(dm, recons) + + def test_to_csv_from_csv2(self, float_frame): + + with tm.ensure_clean("__tmp_to_csv_from_csv2__") as path: + + # duplicate index + df = DataFrame( + np.random.randn(3, 3), index=["a", "a", "b"], columns=["x", "y", "z"] + ) + df.to_csv(path) + result = self.read_csv(path) + tm.assert_frame_equal(result, df) + + midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)]) + df = DataFrame(np.random.randn(3, 3), index=midx, columns=["x", "y", "z"]) + + df.to_csv(path) + result = self.read_csv(path, index_col=[0, 1, 2], parse_dates=False) + tm.assert_frame_equal(result, df, check_names=False) + + # column aliases + col_aliases = Index(["AA", "X", "Y", "Z"]) + float_frame.to_csv(path, header=col_aliases) + + rs = self.read_csv(path) + xp = float_frame.copy() + xp.columns = col_aliases + tm.assert_frame_equal(xp, rs) + + msg = "Writing 4 cols but got 2 aliases" + with pytest.raises(ValueError, match=msg): + float_frame.to_csv(path, header=["AA", "X"]) + + def test_to_csv_from_csv3(self): + + with tm.ensure_clean("__tmp_to_csv_from_csv3__") as path: + df1 = DataFrame(np.random.randn(3, 1)) + df2 = DataFrame(np.random.randn(3, 1)) + + df1.to_csv(path) + df2.to_csv(path, mode="a", header=False) + xp = pd.concat([df1, df2]) + rs = read_csv(path, index_col=0) + rs.columns = [int(label) for label in rs.columns] + xp.columns = [int(label) for label in xp.columns] + tm.assert_frame_equal(xp, rs) + + def test_to_csv_from_csv4(self): + + with tm.ensure_clean("__tmp_to_csv_from_csv4__") as path: + # GH 10833 (TimedeltaIndex formatting) + dt = pd.Timedelta(seconds=1) + df = DataFrame( + {"dt_data": [i * dt for i in range(3)]}, + index=Index([i * dt for i in range(3)], name="dt_index"), + ) + df.to_csv(path) + + result = read_csv(path, index_col="dt_index") + result.index = pd.to_timedelta(result.index) + result["dt_data"] = pd.to_timedelta(result["dt_data"]) + + tm.assert_frame_equal(df, result, check_index_type=True) + + def test_to_csv_from_csv5(self, timezone_frame): + + # tz, 8260 + with tm.ensure_clean("__tmp_to_csv_from_csv5__") as path: + + timezone_frame.to_csv(path) + result = read_csv(path, index_col=0, parse_dates=["A"]) + + converter = ( + lambda c: to_datetime(result[c]) + .dt.tz_convert("UTC") + .dt.tz_convert(timezone_frame[c].dt.tz) + ) + result["B"] = converter("B") + result["C"] = converter("C") + tm.assert_frame_equal(result, timezone_frame) + + def test_to_csv_cols_reordering(self): + # GH3454 + chunksize = 5 + N = int(chunksize * 2.5) + + df = tm.makeCustomDataframe(N, 3) + cs = df.columns + cols = [cs[2], cs[0]] + + with tm.ensure_clean() as path: + df.to_csv(path, columns=cols, chunksize=chunksize) + rs_c = read_csv(path, index_col=0) + + tm.assert_frame_equal(df[cols], rs_c, check_names=False) + + def test_to_csv_new_dupe_cols(self): + def _check_df(df, cols=None): + with tm.ensure_clean() as path: + df.to_csv(path, columns=cols, chunksize=chunksize) + rs_c = read_csv(path, index_col=0) + + # we wrote them in a different order + # so compare them in that order + if cols is not None: + + if df.columns.is_unique: + rs_c.columns = cols + else: + indexer, missing = df.columns.get_indexer_non_unique(cols) + rs_c.columns = df.columns.take(indexer) + + for c in cols: + obj_df = df[c] + obj_rs = rs_c[c] + if isinstance(obj_df, Series): + tm.assert_series_equal(obj_df, obj_rs) + else: + tm.assert_frame_equal(obj_df, obj_rs, check_names=False) + + # wrote in the same order + else: + rs_c.columns = df.columns + tm.assert_frame_equal(df, rs_c, check_names=False) + + chunksize = 5 + N = int(chunksize * 2.5) + + # dupe cols + df = tm.makeCustomDataframe(N, 3) + df.columns = ["a", "a", "b"] + _check_df(df, None) + + # dupe cols with selection + cols = ["b", "a"] + _check_df(df, cols) + + @pytest.mark.slow + def test_to_csv_dtnat(self): + # GH3437 + def make_dtnat_arr(n, nnat=None): + if nnat is None: + nnat = int(n * 0.1) # 10% + s = list(date_range("2000", freq="5min", periods=n)) + if nnat: + for i in np.random.randint(0, len(s), nnat): + s[i] = NaT + i = np.random.randint(100) + s[-i] = NaT + s[i] = NaT + return s + + chunksize = 1000 + # N=35000 + s1 = make_dtnat_arr(chunksize + 5) + s2 = make_dtnat_arr(chunksize + 5, 0) + + # s3=make_dtnjat_arr(chunksize+5,0) + with tm.ensure_clean("1.csv") as pth: + df = DataFrame({"a": s1, "b": s2}) + df.to_csv(pth, chunksize=chunksize) + + recons = self.read_csv(pth).apply(to_datetime) + tm.assert_frame_equal(df, recons, check_names=False) + + @pytest.mark.slow + def test_to_csv_moar(self): + def _do_test( + df, r_dtype=None, c_dtype=None, rnlvl=None, cnlvl=None, dupe_col=False + ): + + kwargs = {"parse_dates": False} + if cnlvl: + if rnlvl is not None: + kwargs["index_col"] = list(range(rnlvl)) + kwargs["header"] = list(range(cnlvl)) + + with tm.ensure_clean("__tmp_to_csv_moar__") as path: + df.to_csv(path, encoding="utf8", chunksize=chunksize) + recons = self.read_csv(path, **kwargs) + else: + kwargs["header"] = 0 + + with tm.ensure_clean("__tmp_to_csv_moar__") as path: + df.to_csv(path, encoding="utf8", chunksize=chunksize) + recons = self.read_csv(path, **kwargs) + + def _to_uni(x): + if not isinstance(x, str): + return x.decode("utf8") + return x + + if dupe_col: + # read_Csv disambiguates the columns by + # labeling them dupe.1,dupe.2, etc'. monkey patch columns + recons.columns = df.columns + if rnlvl and not cnlvl: + delta_lvl = [recons.iloc[:, i].values for i in range(rnlvl - 1)] + ix = MultiIndex.from_arrays([list(recons.index)] + delta_lvl) + recons.index = ix + recons = recons.iloc[:, rnlvl - 1 :] + + type_map = {"i": "i", "f": "f", "s": "O", "u": "O", "dt": "O", "p": "O"} + if r_dtype: + if r_dtype == "u": # unicode + r_dtype = "O" + recons.index = np.array( + [_to_uni(label) for label in recons.index], dtype=r_dtype + ) + df.index = np.array( + [_to_uni(label) for label in df.index], dtype=r_dtype + ) + elif r_dtype == "dt": # unicode + r_dtype = "O" + recons.index = np.array( + [Timestamp(label) for label in recons.index], dtype=r_dtype + ) + df.index = np.array( + [Timestamp(label) for label in df.index], dtype=r_dtype + ) + elif r_dtype == "p": + r_dtype = "O" + idx_list = to_datetime(recons.index) + recons.index = np.array( + [Timestamp(label) for label in idx_list], dtype=r_dtype + ) + df.index = np.array( + list(map(Timestamp, df.index.to_timestamp())), dtype=r_dtype + ) + else: + r_dtype = type_map.get(r_dtype) + recons.index = np.array(recons.index, dtype=r_dtype) + df.index = np.array(df.index, dtype=r_dtype) + if c_dtype: + if c_dtype == "u": + c_dtype = "O" + recons.columns = np.array( + [_to_uni(label) for label in recons.columns], dtype=c_dtype + ) + df.columns = np.array( + [_to_uni(label) for label in df.columns], dtype=c_dtype + ) + elif c_dtype == "dt": + c_dtype = "O" + recons.columns = np.array( + [Timestamp(label) for label in recons.columns], dtype=c_dtype + ) + df.columns = np.array( + [Timestamp(label) for label in df.columns], dtype=c_dtype + ) + elif c_dtype == "p": + c_dtype = "O" + col_list = to_datetime(recons.columns) + recons.columns = np.array( + [Timestamp(label) for label in col_list], dtype=c_dtype + ) + col_list = df.columns.to_timestamp() + df.columns = np.array( + [Timestamp(label) for label in col_list], dtype=c_dtype + ) + else: + c_dtype = type_map.get(c_dtype) + recons.columns = np.array(recons.columns, dtype=c_dtype) + df.columns = np.array(df.columns, dtype=c_dtype) + + tm.assert_frame_equal(df, recons, check_names=False) + + N = 100 + chunksize = 1000 + ncols = 4 + base = chunksize // ncols + for nrows in [ + 2, + 10, + N - 1, + N, + N + 1, + N + 2, + 2 * N - 2, + 2 * N - 1, + 2 * N, + 2 * N + 1, + 2 * N + 2, + base - 1, + base, + base + 1, + ]: + _do_test( + tm.makeCustomDataframe(nrows, ncols, r_idx_type="dt", c_idx_type="s"), + "dt", + "s", + ) + + for r_idx_type, c_idx_type in [("i", "i"), ("s", "s"), ("u", "dt"), ("p", "p")]: + for ncols in [1, 2, 3, 4]: + base = chunksize // ncols + for nrows in [ + 2, + 10, + N - 1, + N, + N + 1, + N + 2, + 2 * N - 2, + 2 * N - 1, + 2 * N, + 2 * N + 1, + 2 * N + 2, + base - 1, + base, + base + 1, + ]: + _do_test( + tm.makeCustomDataframe( + nrows, ncols, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ), + r_idx_type, + c_idx_type, + ) + + for ncols in [1, 2, 3, 4]: + base = chunksize // ncols + for nrows in [ + 10, + N - 2, + N - 1, + N, + N + 1, + N + 2, + 2 * N - 2, + 2 * N - 1, + 2 * N, + 2 * N + 1, + 2 * N + 2, + base - 1, + base, + base + 1, + ]: + _do_test(tm.makeCustomDataframe(nrows, ncols)) + + for nrows in [10, N - 2, N - 1, N, N + 1, N + 2]: + df = tm.makeCustomDataframe(nrows, 3) + cols = list(df.columns) + cols[:2] = ["dupe", "dupe"] + cols[-2:] = ["dupe", "dupe"] + ix = list(df.index) + ix[:2] = ["rdupe", "rdupe"] + ix[-2:] = ["rdupe", "rdupe"] + df.index = ix + df.columns = cols + _do_test(df, dupe_col=True) + + _do_test(DataFrame(index=np.arange(10))) + _do_test( + tm.makeCustomDataframe(chunksize // 2 + 1, 2, r_idx_nlevels=2), rnlvl=2 + ) + for ncols in [2, 3, 4]: + base = int(chunksize // ncols) + for nrows in [ + 10, + N - 2, + N - 1, + N, + N + 1, + N + 2, + 2 * N - 2, + 2 * N - 1, + 2 * N, + 2 * N + 1, + 2 * N + 2, + base - 1, + base, + base + 1, + ]: + _do_test(tm.makeCustomDataframe(nrows, ncols, r_idx_nlevels=2), rnlvl=2) + _do_test(tm.makeCustomDataframe(nrows, ncols, c_idx_nlevels=2), cnlvl=2) + _do_test( + tm.makeCustomDataframe( + nrows, ncols, r_idx_nlevels=2, c_idx_nlevels=2 + ), + rnlvl=2, + cnlvl=2, + ) + + def test_to_csv_from_csv_w_some_infs(self, float_frame): + + # test roundtrip with inf, -inf, nan, as full columns and mix + float_frame["G"] = np.nan + f = lambda x: [np.inf, np.nan][np.random.rand() < 0.5] + float_frame["H"] = float_frame.index.map(f) + + with tm.ensure_clean() as path: + float_frame.to_csv(path) + recons = self.read_csv(path) + + tm.assert_frame_equal(float_frame, recons) + tm.assert_frame_equal(np.isinf(float_frame), np.isinf(recons)) + + def test_to_csv_from_csv_w_all_infs(self, float_frame): + + # test roundtrip with inf, -inf, nan, as full columns and mix + float_frame["E"] = np.inf + float_frame["F"] = -np.inf + + with tm.ensure_clean() as path: + float_frame.to_csv(path) + recons = self.read_csv(path) + + tm.assert_frame_equal(float_frame, recons) + tm.assert_frame_equal(np.isinf(float_frame), np.isinf(recons)) + + def test_to_csv_no_index(self): + # GH 3624, after appending columns, to_csv fails + with tm.ensure_clean("__tmp_to_csv_no_index__") as path: + df = DataFrame({"c1": [1, 2, 3], "c2": [4, 5, 6]}) + df.to_csv(path, index=False) + result = read_csv(path) + tm.assert_frame_equal(df, result) + df["c3"] = Series([7, 8, 9], dtype="int64") + df.to_csv(path, index=False) + result = read_csv(path) + tm.assert_frame_equal(df, result) + + def test_to_csv_with_mix_columns(self): + # gh-11637: incorrect output when a mix of integer and string column + # names passed as columns parameter in to_csv + + df = DataFrame({0: ["a", "b", "c"], 1: ["aa", "bb", "cc"]}) + df["test"] = "txt" + assert df.to_csv() == df.to_csv(columns=[0, 1, "test"]) + + def test_to_csv_headers(self): + # GH6186, the presence or absence of `index` incorrectly + # causes to_csv to have different header semantics. + from_df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + to_df = DataFrame([[1, 2], [3, 4]], columns=["X", "Y"]) + with tm.ensure_clean("__tmp_to_csv_headers__") as path: + from_df.to_csv(path, header=["X", "Y"]) + recons = self.read_csv(path) + + tm.assert_frame_equal(to_df, recons) + + from_df.to_csv(path, index=False, header=["X", "Y"]) + recons = self.read_csv(path) + + return_value = recons.reset_index(inplace=True) + assert return_value is None + tm.assert_frame_equal(to_df, recons) + + def test_to_csv_multiindex(self, float_frame, datetime_frame): + + frame = float_frame + old_index = frame.index + arrays = np.arange(len(old_index) * 2).reshape(2, -1) + new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) + frame.index = new_index + + with tm.ensure_clean("__tmp_to_csv_multiindex__") as path: + + frame.to_csv(path, header=False) + frame.to_csv(path, columns=["A", "B"]) + + # round trip + frame.to_csv(path) + + df = self.read_csv(path, index_col=[0, 1], parse_dates=False) + + # TODO to_csv drops column name + tm.assert_frame_equal(frame, df, check_names=False) + assert frame.index.names == df.index.names + + # needed if setUp becomes a class method + float_frame.index = old_index + + # try multiindex with dates + tsframe = datetime_frame + old_index = tsframe.index + new_index = [old_index, np.arange(len(old_index))] + tsframe.index = MultiIndex.from_arrays(new_index) + + tsframe.to_csv(path, index_label=["time", "foo"]) + recons = self.read_csv(path, index_col=[0, 1]) + + # TODO to_csv drops column name + tm.assert_frame_equal(tsframe, recons, check_names=False) + + # do not load index + tsframe.to_csv(path) + recons = self.read_csv(path, index_col=None) + assert len(recons.columns) == len(tsframe.columns) + 2 + + # no index + tsframe.to_csv(path, index=False) + recons = self.read_csv(path, index_col=None) + tm.assert_almost_equal(recons.values, datetime_frame.values) + + # needed if setUp becomes class method + datetime_frame.index = old_index + + with tm.ensure_clean("__tmp_to_csv_multiindex__") as path: + # GH3571, GH1651, GH3141 + + def _make_frame(names=None): + if names is True: + names = ["first", "second"] + return DataFrame( + np.random.randint(0, 10, size=(3, 3)), + columns=MultiIndex.from_tuples( + [("bah", "foo"), ("bah", "bar"), ("ban", "baz")], names=names + ), + dtype="int64", + ) + + # column & index are multi-index + df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1]) + tm.assert_frame_equal(df, result) + + # column is mi + df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=1, c_idx_nlevels=4) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], index_col=0) + tm.assert_frame_equal(df, result) + + # dup column names? + df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=3, c_idx_nlevels=4) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1, 2]) + tm.assert_frame_equal(df, result) + + # writing with no index + df = _make_frame() + df.to_csv(path, index=False) + result = read_csv(path, header=[0, 1]) + tm.assert_frame_equal(df, result) + + # we lose the names here + df = _make_frame(True) + df.to_csv(path, index=False) + result = read_csv(path, header=[0, 1]) + assert com.all_none(*result.columns.names) + result.columns.names = df.columns.names + tm.assert_frame_equal(df, result) + + # whatsnew example + df = _make_frame() + df.to_csv(path) + result = read_csv(path, header=[0, 1], index_col=[0]) + tm.assert_frame_equal(df, result) + + df = _make_frame(True) + df.to_csv(path) + result = read_csv(path, header=[0, 1], index_col=[0]) + tm.assert_frame_equal(df, result) + + # invalid options + df = _make_frame(True) + df.to_csv(path) + + for i in [6, 7]: + msg = f"len of {i}, but only 5 lines in file" + with pytest.raises(ParserError, match=msg): + read_csv(path, header=list(range(i)), index_col=0) + + # write with cols + msg = "cannot specify cols with a MultiIndex" + with pytest.raises(TypeError, match=msg): + df.to_csv(path, columns=["foo", "bar"]) + + with tm.ensure_clean("__tmp_to_csv_multiindex__") as path: + # empty + tsframe[:0].to_csv(path) + recons = self.read_csv(path) + + exp = tsframe[:0] + exp.index = [] + + tm.assert_index_equal(recons.columns, exp.columns) + assert len(recons) == 0 + + def test_to_csv_interval_index(self): + # GH 28210 + df = DataFrame({"A": list("abc"), "B": range(3)}, index=pd.interval_range(0, 3)) + + with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path: + df.to_csv(path) + result = self.read_csv(path, index_col=0) + + # can't roundtrip intervalindex via read_csv so check string repr (GH 23595) + expected = df.copy() + expected.index = expected.index.astype(str) + + tm.assert_frame_equal(result, expected) + + def test_to_csv_float32_nanrep(self): + df = DataFrame(np.random.randn(1, 4).astype(np.float32)) + df[1] = np.nan + + with tm.ensure_clean("__tmp_to_csv_float32_nanrep__.csv") as path: + df.to_csv(path, na_rep=999) + + with open(path) as f: + lines = f.readlines() + assert lines[1].split(",")[2] == "999" + + def test_to_csv_withcommas(self): + + # Commas inside fields should be correctly escaped when saving as CSV. + df = DataFrame({"A": [1, 2, 3], "B": ["5,6", "7,8", "9,0"]}) + + with tm.ensure_clean("__tmp_to_csv_withcommas__.csv") as path: + df.to_csv(path) + df2 = self.read_csv(path) + tm.assert_frame_equal(df2, df) + + def test_to_csv_mixed(self): + def create_cols(name): + return [f"{name}{i:03d}" for i in range(5)] + + df_float = DataFrame( + np.random.randn(100, 5), dtype="float64", columns=create_cols("float") + ) + df_int = DataFrame( + np.random.randn(100, 5).astype("int64"), + dtype="int64", + columns=create_cols("int"), + ) + df_bool = DataFrame(True, index=df_float.index, columns=create_cols("bool")) + df_object = DataFrame( + "foo", index=df_float.index, columns=create_cols("object") + ) + df_dt = DataFrame( + Timestamp("20010101"), index=df_float.index, columns=create_cols("date") + ) + + # add in some nans + df_float.iloc[30:50, 1:3] = np.nan + + # ## this is a bug in read_csv right now #### + # df_dt.loc[30:50,1:3] = np.nan + + df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1) + + # dtype + dtypes = {} + for n, dtype in [ + ("float", np.float64), + ("int", np.int64), + ("bool", np.bool_), + ("object", object), + ]: + for c in create_cols(n): + dtypes[c] = dtype + + with tm.ensure_clean() as filename: + df.to_csv(filename) + rs = read_csv( + filename, index_col=0, dtype=dtypes, parse_dates=create_cols("date") + ) + tm.assert_frame_equal(rs, df) + + def test_to_csv_dups_cols(self): + + df = DataFrame( + np.random.randn(1000, 30), + columns=list(range(15)) + list(range(15)), + dtype="float64", + ) + + with tm.ensure_clean() as filename: + df.to_csv(filename) # single dtype, fine + result = read_csv(filename, index_col=0) + result.columns = df.columns + tm.assert_frame_equal(result, df) + + df_float = DataFrame(np.random.randn(1000, 3), dtype="float64") + df_int = DataFrame(np.random.randn(1000, 3)).astype("int64") + df_bool = DataFrame(True, index=df_float.index, columns=range(3)) + df_object = DataFrame("foo", index=df_float.index, columns=range(3)) + df_dt = DataFrame(Timestamp("20010101"), index=df_float.index, columns=range(3)) + df = pd.concat( + [df_float, df_int, df_bool, df_object, df_dt], axis=1, ignore_index=True + ) + + df.columns = [0, 1, 2] * 5 + + with tm.ensure_clean() as filename: + df.to_csv(filename) + result = read_csv(filename, index_col=0) + + # date cols + for i in ["0.4", "1.4", "2.4"]: + result[i] = to_datetime(result[i]) + + result.columns = df.columns + tm.assert_frame_equal(result, df) + + # GH3457 + + N = 10 + df = tm.makeCustomDataframe(N, 3) + df.columns = ["a", "a", "b"] + + with tm.ensure_clean() as filename: + df.to_csv(filename) + + # read_csv will rename the dups columns + result = read_csv(filename, index_col=0) + result = result.rename(columns={"a.1": "a"}) + tm.assert_frame_equal(result, df) + + def test_to_csv_chunking(self): + + aa = DataFrame({"A": range(100000)}) + aa["B"] = aa.A + 1.0 + aa["C"] = aa.A + 2.0 + aa["D"] = aa.A + 3.0 + + for chunksize in [10000, 50000, 100000]: + with tm.ensure_clean() as filename: + aa.to_csv(filename, chunksize=chunksize) + rs = read_csv(filename, index_col=0) + tm.assert_frame_equal(rs, aa) + + @pytest.mark.slow + def test_to_csv_wide_frame_formatting(self): + # Issue #8621 + df = DataFrame(np.random.randn(1, 100010), columns=None, index=None) + with tm.ensure_clean() as filename: + df.to_csv(filename, header=False, index=False) + rs = read_csv(filename, header=None) + tm.assert_frame_equal(rs, df) + + def test_to_csv_bug(self): + f1 = StringIO("a,1.0\nb,2.0") + df = self.read_csv(f1, header=None) + newdf = DataFrame({"t": df[df.columns[0]]}) + + with tm.ensure_clean() as path: + newdf.to_csv(path) + + recons = read_csv(path, index_col=0) + # don't check_names as t != 1 + tm.assert_frame_equal(recons, newdf, check_names=False) + + def test_to_csv_unicode(self): + + df = DataFrame({"c/\u03c3": [1, 2, 3]}) + with tm.ensure_clean() as path: + + df.to_csv(path, encoding="UTF-8") + df2 = read_csv(path, index_col=0, encoding="UTF-8") + tm.assert_frame_equal(df, df2) + + df.to_csv(path, encoding="UTF-8", index=False) + df2 = read_csv(path, index_col=None, encoding="UTF-8") + tm.assert_frame_equal(df, df2) + + def test_to_csv_unicode_index_col(self): + buf = StringIO("") + df = DataFrame( + [["\u05d0", "d2", "d3", "d4"], ["a1", "a2", "a3", "a4"]], + columns=["\u05d0", "\u05d1", "\u05d2", "\u05d3"], + index=["\u05d0", "\u05d1"], + ) + + df.to_csv(buf, encoding="UTF-8") + buf.seek(0) + + df2 = read_csv(buf, index_col=0, encoding="UTF-8") + tm.assert_frame_equal(df, df2) + + def test_to_csv_stringio(self, float_frame): + buf = StringIO() + float_frame.to_csv(buf) + buf.seek(0) + recons = read_csv(buf, index_col=0) + tm.assert_frame_equal(recons, float_frame) + + def test_to_csv_float_format(self): + + df = DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + + with tm.ensure_clean() as filename: + + df.to_csv(filename, float_format="%.2f") + + rs = read_csv(filename, index_col=0) + xp = DataFrame( + [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + tm.assert_frame_equal(rs, xp) + + def test_to_csv_unicodewriter_quoting(self): + df = DataFrame({"A": [1, 2, 3], "B": ["foo", "bar", "baz"]}) + + buf = StringIO() + df.to_csv(buf, index=False, quoting=csv.QUOTE_NONNUMERIC, encoding="utf-8") + + result = buf.getvalue() + expected_rows = ['"A","B"', '1,"foo"', '2,"bar"', '3,"baz"'] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_to_csv_quote_none(self): + # GH4328 + df = DataFrame({"A": ["hello", '{"hello"}']}) + for encoding in (None, "utf-8"): + buf = StringIO() + df.to_csv(buf, quoting=csv.QUOTE_NONE, encoding=encoding, index=False) + + result = buf.getvalue() + expected_rows = ["A", "hello", '{"hello"}'] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_to_csv_index_no_leading_comma(self): + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["one", "two", "three"]) + + buf = StringIO() + df.to_csv(buf, index_label=False) + + expected_rows = ["A,B", "one,1,4", "two,2,5", "three,3,6"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert buf.getvalue() == expected + + def test_to_csv_line_terminators(self): + # see gh-20353 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["one", "two", "three"]) + + with tm.ensure_clean() as path: + # case 1: CRLF as line terminator + df.to_csv(path, line_terminator="\r\n") + expected = b",A,B\r\none,1,4\r\ntwo,2,5\r\nthree,3,6\r\n" + + with open(path, mode="rb") as f: + assert f.read() == expected + + with tm.ensure_clean() as path: + # case 2: LF as line terminator + df.to_csv(path, line_terminator="\n") + expected = b",A,B\none,1,4\ntwo,2,5\nthree,3,6\n" + + with open(path, mode="rb") as f: + assert f.read() == expected + + with tm.ensure_clean() as path: + # case 3: The default line terminator(=os.linesep)(gh-21406) + df.to_csv(path) + os_linesep = os.linesep.encode("utf-8") + expected = ( + b",A,B" + + os_linesep + + b"one,1,4" + + os_linesep + + b"two,2,5" + + os_linesep + + b"three,3,6" + + os_linesep + ) + + with open(path, mode="rb") as f: + assert f.read() == expected + + def test_to_csv_from_csv_categorical(self): + + # CSV with categoricals should result in the same output + # as when one would add a "normal" Series/DataFrame. + s = Series(pd.Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])) + s2 = Series(["a", "b", "b", "a", "a", "c", "c", "c"]) + res = StringIO() + + s.to_csv(res, header=False) + exp = StringIO() + + s2.to_csv(exp, header=False) + assert res.getvalue() == exp.getvalue() + + df = DataFrame({"s": s}) + df2 = DataFrame({"s": s2}) + + res = StringIO() + df.to_csv(res) + + exp = StringIO() + df2.to_csv(exp) + + assert res.getvalue() == exp.getvalue() + + def test_to_csv_path_is_none(self, float_frame): + # GH 8215 + # Make sure we return string for consistency with + # Series.to_csv() + csv_str = float_frame.to_csv(path_or_buf=None) + assert isinstance(csv_str, str) + recons = read_csv(StringIO(csv_str), index_col=0) + tm.assert_frame_equal(float_frame, recons) + + @pytest.mark.parametrize( + "df,encoding", + [ + ( + DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ), + None, + ), + # GH 21241, 21118 + (DataFrame([["abc", "def", "ghi"]], columns=["X", "Y", "Z"]), "ascii"), + (DataFrame(5 * [[123, "你好", "世界"]], columns=["X", "Y", "Z"]), "gb2312"), + ( + DataFrame(5 * [[123, "Γειά σου", "Κόσμε"]], columns=["X", "Y", "Z"]), + "cp737", + ), + ], + ) + def test_to_csv_compression(self, df, encoding, compression): + + with tm.ensure_clean() as filename: + + df.to_csv(filename, compression=compression, encoding=encoding) + # test the round trip - to_csv -> read_csv + result = read_csv( + filename, compression=compression, index_col=0, encoding=encoding + ) + tm.assert_frame_equal(df, result) + + # test the round trip using file handle - to_csv -> read_csv + with get_handle( + filename, "w", compression=compression, encoding=encoding + ) as handles: + df.to_csv(handles.handle, encoding=encoding) + assert not handles.handle.closed + + result = read_csv( + filename, + compression=compression, + encoding=encoding, + index_col=0, + ).squeeze("columns") + tm.assert_frame_equal(df, result) + + # explicitly make sure file is compressed + with tm.decompress_file(filename, compression) as fh: + text = fh.read().decode(encoding or "utf8") + for col in df.columns: + assert col in text + + with tm.decompress_file(filename, compression) as fh: + tm.assert_frame_equal(df, read_csv(fh, index_col=0, encoding=encoding)) + + def test_to_csv_date_format(self, datetime_frame): + with tm.ensure_clean("__tmp_to_csv_date_format__") as path: + dt_index = datetime_frame.index + datetime_frame = DataFrame( + {"A": dt_index, "B": dt_index.shift(1)}, index=dt_index + ) + datetime_frame.to_csv(path, date_format="%Y%m%d") + + # Check that the data was put in the specified format + test = read_csv(path, index_col=0) + + datetime_frame_int = datetime_frame.applymap( + lambda x: int(x.strftime("%Y%m%d")) + ) + datetime_frame_int.index = datetime_frame_int.index.map( + lambda x: int(x.strftime("%Y%m%d")) + ) + + tm.assert_frame_equal(test, datetime_frame_int) + + datetime_frame.to_csv(path, date_format="%Y-%m-%d") + + # Check that the data was put in the specified format + test = read_csv(path, index_col=0) + datetime_frame_str = datetime_frame.applymap( + lambda x: x.strftime("%Y-%m-%d") + ) + datetime_frame_str.index = datetime_frame_str.index.map( + lambda x: x.strftime("%Y-%m-%d") + ) + + tm.assert_frame_equal(test, datetime_frame_str) + + # Check that columns get converted + datetime_frame_columns = datetime_frame.T + datetime_frame_columns.to_csv(path, date_format="%Y%m%d") + + test = read_csv(path, index_col=0) + + datetime_frame_columns = datetime_frame_columns.applymap( + lambda x: int(x.strftime("%Y%m%d")) + ) + # Columns don't get converted to ints by read_csv + datetime_frame_columns.columns = datetime_frame_columns.columns.map( + lambda x: x.strftime("%Y%m%d") + ) + + tm.assert_frame_equal(test, datetime_frame_columns) + + # test NaTs + nat_index = to_datetime( + ["NaT"] * 10 + ["2000-01-01", "1/1/2000", "1-1-2000"] + ) + nat_frame = DataFrame({"A": nat_index}, index=nat_index) + nat_frame.to_csv(path, date_format="%Y-%m-%d") + + test = read_csv(path, parse_dates=[0, 1], index_col=0) + + tm.assert_frame_equal(test, nat_frame) + + def test_to_csv_with_dst_transitions(self): + + with tm.ensure_clean("csv_date_format_with_dst") as path: + # make sure we are not failing on transitions + times = date_range( + "2013-10-26 23:00", + "2013-10-27 01:00", + tz="Europe/London", + freq="H", + ambiguous="infer", + ) + + for i in [times, times + pd.Timedelta("10s")]: + i = i._with_freq(None) # freq is not preserved by read_csv + time_range = np.array(range(len(i)), dtype="int64") + df = DataFrame({"A": time_range}, index=i) + df.to_csv(path, index=True) + # we have to reconvert the index as we + # don't parse the tz's + result = read_csv(path, index_col=0) + result.index = to_datetime(result.index, utc=True).tz_convert( + "Europe/London" + ) + tm.assert_frame_equal(result, df) + + # GH11619 + idx = date_range("2015-01-01", "2015-12-31", freq="H", tz="Europe/Paris") + idx = idx._with_freq(None) # freq does not round-trip + idx._data._freq = None # otherwise there is trouble on unpickle + df = DataFrame({"values": 1, "idx": idx}, index=idx) + with tm.ensure_clean("csv_date_format_with_dst") as path: + df.to_csv(path, index=True) + result = read_csv(path, index_col=0) + result.index = to_datetime(result.index, utc=True).tz_convert( + "Europe/Paris" + ) + result["idx"] = to_datetime(result["idx"], utc=True).astype( + "datetime64[ns, Europe/Paris]" + ) + tm.assert_frame_equal(result, df) + + # assert working + df.astype(str) + + with tm.ensure_clean("csv_date_format_with_dst") as path: + df.to_pickle(path) + result = pd.read_pickle(path) + tm.assert_frame_equal(result, df) + + def test_to_csv_quoting(self): + df = DataFrame( + { + "c_bool": [True, False], + "c_float": [1.0, 3.2], + "c_int": [42, np.nan], + "c_string": ["a", "b,c"], + } + ) + + expected_rows = [ + ",c_bool,c_float,c_int,c_string", + "0,True,1.0,42.0,a", + '1,False,3.2,,"b,c"', + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + result = df.to_csv() + assert result == expected + + result = df.to_csv(quoting=None) + assert result == expected + + expected_rows = [ + ",c_bool,c_float,c_int,c_string", + "0,True,1.0,42.0,a", + '1,False,3.2,,"b,c"', + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + result = df.to_csv(quoting=csv.QUOTE_MINIMAL) + assert result == expected + + expected_rows = [ + '"","c_bool","c_float","c_int","c_string"', + '"0","True","1.0","42.0","a"', + '"1","False","3.2","","b,c"', + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + result = df.to_csv(quoting=csv.QUOTE_ALL) + assert result == expected + + # see gh-12922, gh-13259: make sure changes to + # the formatters do not break this behaviour + expected_rows = [ + '"","c_bool","c_float","c_int","c_string"', + '0,True,1.0,42.0,"a"', + '1,False,3.2,"","b,c"', + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC) + assert result == expected + + msg = "need to escape, but no escapechar set" + with pytest.raises(csv.Error, match=msg): + df.to_csv(quoting=csv.QUOTE_NONE) + + with pytest.raises(csv.Error, match=msg): + df.to_csv(quoting=csv.QUOTE_NONE, escapechar=None) + + expected_rows = [ + ",c_bool,c_float,c_int,c_string", + "0,True,1.0,42.0,a", + "1,False,3.2,,b!,c", + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar="!") + assert result == expected + + expected_rows = [ + ",c_bool,c_ffloat,c_int,c_string", + "0,True,1.0,42.0,a", + "1,False,3.2,,bf,c", + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar="f") + assert result == expected + + # see gh-3503: quoting Windows line terminators + # presents with encoding? + text_rows = ["a,b,c", '1,"test \r\n",3'] + text = tm.convert_rows_list_to_csv_str(text_rows) + df = read_csv(StringIO(text)) + + buf = StringIO() + df.to_csv(buf, encoding="utf-8", index=False) + assert buf.getvalue() == text + + # xref gh-7791: make sure the quoting parameter is passed through + # with multi-indexes + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}) + df = df.set_index(["a", "b"]) + + expected_rows = ['"a","b","c"', '"1","3","5"', '"2","4","6"'] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv(quoting=csv.QUOTE_ALL) == expected + + def test_period_index_date_overflow(self): + # see gh-15982 + + dates = ["1990-01-01", "2000-01-01", "3005-01-01"] + index = pd.PeriodIndex(dates, freq="D") + + df = DataFrame([4, 5, 6], index=index) + result = df.to_csv() + + expected_rows = [",0", "1990-01-01,4", "2000-01-01,5", "3005-01-01,6"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + date_format = "%m-%d-%Y" + result = df.to_csv(date_format=date_format) + + expected_rows = [",0", "01-01-1990,4", "01-01-2000,5", "01-01-3005,6"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + # Overflow with pd.NaT + dates = ["1990-01-01", NaT, "3005-01-01"] + index = pd.PeriodIndex(dates, freq="D") + + df = DataFrame([4, 5, 6], index=index) + result = df.to_csv() + + expected_rows = [",0", "1990-01-01,4", ",5", "3005-01-01,6"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_multi_index_header(self): + # see gh-5539 + columns = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]) + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) + df.columns = columns + + header = ["a", "b", "c", "d"] + result = df.to_csv(header=header) + + expected_rows = [",a,b,c,d", "0,1,2,3,4", "1,5,6,7,8"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_to_csv_single_level_multi_index(self): + # see gh-26303 + index = Index([(1,), (2,), (3,)]) + df = DataFrame([[1, 2, 3]], columns=index) + df = df.reindex(columns=[(1,), (3,)]) + expected = ",1,3\n0,1,3\n" + result = df.to_csv(line_terminator="\n") + tm.assert_almost_equal(result, expected) + + def test_gz_lineend(self): + # GH 25311 + df = DataFrame({"a": [1, 2]}) + expected_rows = ["a", "1", "2"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + with tm.ensure_clean("__test_gz_lineend.csv.gz") as path: + df.to_csv(path, index=False) + with tm.decompress_file(path, compression="gzip") as f: + result = f.read().decode("utf-8") + + assert result == expected + + def test_to_csv_numpy_16_bug(self): + frame = DataFrame({"a": date_range("1/1/2000", periods=10)}) + + buf = StringIO() + frame.to_csv(buf) + + result = buf.getvalue() + assert "2000-01-01" in result + + def test_to_csv_na_quoting(self): + # GH 15891 + # Normalize carriage return for Windows OS + result = ( + DataFrame([None, None]) + .to_csv(None, header=False, index=False, na_rep="") + .replace("\r\n", "\n") + ) + expected = '""\n""\n' + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_dict.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_dict.py new file mode 100644 index 0000000..31ea3e5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_dict.py @@ -0,0 +1,346 @@ +from collections import ( + OrderedDict, + defaultdict, +) +from datetime import datetime + +import numpy as np +import pytest +import pytz + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, +) +import pandas._testing as tm + + +class TestDataFrameToDict: + def test_to_dict_timestamp(self): + + # GH#11247 + # split/records producing np.datetime64 rather than Timestamps + # on datetime64[ns] dtypes only + + tsmp = Timestamp("20130101") + test_data = DataFrame({"A": [tsmp, tsmp], "B": [tsmp, tsmp]}) + test_data_mixed = DataFrame({"A": [tsmp, tsmp], "B": [1, 2]}) + + expected_records = [{"A": tsmp, "B": tsmp}, {"A": tsmp, "B": tsmp}] + expected_records_mixed = [{"A": tsmp, "B": 1}, {"A": tsmp, "B": 2}] + + assert test_data.to_dict(orient="records") == expected_records + assert test_data_mixed.to_dict(orient="records") == expected_records_mixed + + expected_series = { + "A": Series([tsmp, tsmp], name="A"), + "B": Series([tsmp, tsmp], name="B"), + } + expected_series_mixed = { + "A": Series([tsmp, tsmp], name="A"), + "B": Series([1, 2], name="B"), + } + + tm.assert_dict_equal(test_data.to_dict(orient="series"), expected_series) + tm.assert_dict_equal( + test_data_mixed.to_dict(orient="series"), expected_series_mixed + ) + + expected_split = { + "index": [0, 1], + "data": [[tsmp, tsmp], [tsmp, tsmp]], + "columns": ["A", "B"], + } + expected_split_mixed = { + "index": [0, 1], + "data": [[tsmp, 1], [tsmp, 2]], + "columns": ["A", "B"], + } + + tm.assert_dict_equal(test_data.to_dict(orient="split"), expected_split) + tm.assert_dict_equal( + test_data_mixed.to_dict(orient="split"), expected_split_mixed + ) + + def test_to_dict_index_not_unique_with_index_orient(self): + # GH#22801 + # Data loss when indexes are not unique. Raise ValueError. + df = DataFrame({"a": [1, 2], "b": [0.5, 0.75]}, index=["A", "A"]) + msg = "DataFrame index must be unique for orient='index'" + with pytest.raises(ValueError, match=msg): + df.to_dict(orient="index") + + def test_to_dict_invalid_orient(self): + df = DataFrame({"A": [0, 1]}) + msg = "orient 'xinvalid' not understood" + with pytest.raises(ValueError, match=msg): + df.to_dict(orient="xinvalid") + + @pytest.mark.parametrize("orient", ["d", "l", "r", "sp", "s", "i"]) + def test_to_dict_short_orient_warns(self, orient): + # GH#32515 + df = DataFrame({"A": [0, 1]}) + msg = "Using short name for 'orient' is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.to_dict(orient=orient) + + @pytest.mark.parametrize("mapping", [dict, defaultdict(list), OrderedDict]) + def test_to_dict(self, mapping): + # orient= should only take the listed options + # see GH#32515 + test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} + + # GH#16122 + recons_data = DataFrame(test_data).to_dict(into=mapping) + + for k, v in test_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k][k2] + + recons_data = DataFrame(test_data).to_dict("list", mapping) + + for k, v in test_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k][int(k2) - 1] + + recons_data = DataFrame(test_data).to_dict("series", mapping) + + for k, v in test_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k][k2] + + recons_data = DataFrame(test_data).to_dict("split", mapping) + expected_split = { + "columns": ["A", "B"], + "index": ["1", "2", "3"], + "data": [[1.0, "1"], [2.0, "2"], [np.nan, "3"]], + } + tm.assert_dict_equal(recons_data, expected_split) + + recons_data = DataFrame(test_data).to_dict("records", mapping) + expected_records = [ + {"A": 1.0, "B": "1"}, + {"A": 2.0, "B": "2"}, + {"A": np.nan, "B": "3"}, + ] + assert isinstance(recons_data, list) + assert len(recons_data) == 3 + for left, right in zip(recons_data, expected_records): + tm.assert_dict_equal(left, right) + + # GH#10844 + recons_data = DataFrame(test_data).to_dict("index") + + for k, v in test_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k2][k] + + df = DataFrame(test_data) + df["duped"] = df[df.columns[0]] + recons_data = df.to_dict("index") + comp_data = test_data.copy() + comp_data["duped"] = comp_data[df.columns[0]] + for k, v in comp_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k2][k] + + @pytest.mark.parametrize("mapping", [list, defaultdict, []]) + def test_to_dict_errors(self, mapping): + # GH#16122 + df = DataFrame(np.random.randn(3, 3)) + msg = "|".join( + [ + "unsupported type: ", + r"to_dict\(\) only accepts initialized defaultdicts", + ] + ) + with pytest.raises(TypeError, match=msg): + df.to_dict(into=mapping) + + def test_to_dict_not_unique_warning(self): + # GH#16927: When converting to a dict, if a column has a non-unique name + # it will be dropped, throwing a warning. + df = DataFrame([[1, 2, 3]], columns=["a", "a", "b"]) + with tm.assert_produces_warning(UserWarning): + df.to_dict() + + # orient - orient argument to to_dict function + # item_getter - function for extracting value from + # the resulting dict using column name and index + @pytest.mark.parametrize( + "orient,item_getter", + [ + ("dict", lambda d, col, idx: d[col][idx]), + ("records", lambda d, col, idx: d[idx][col]), + ("list", lambda d, col, idx: d[col][idx]), + ("split", lambda d, col, idx: d["data"][idx][d["columns"].index(col)]), + ("index", lambda d, col, idx: d[idx][col]), + ], + ) + def test_to_dict_box_scalars(self, orient, item_getter): + # GH#14216, GH#23753 + # make sure that we are boxing properly + df = DataFrame({"a": [1, 2], "b": [0.1, 0.2]}) + result = df.to_dict(orient=orient) + assert isinstance(item_getter(result, "a", 0), int) + assert isinstance(item_getter(result, "b", 0), float) + + def test_to_dict_tz(self): + # GH#18372 When converting to dict with orient='records' columns of + # datetime that are tz-aware were not converted to required arrays + data = [ + (datetime(2017, 11, 18, 21, 53, 0, 219225, tzinfo=pytz.utc),), + (datetime(2017, 11, 18, 22, 6, 30, 61810, tzinfo=pytz.utc),), + ] + df = DataFrame(list(data), columns=["d"]) + + result = df.to_dict(orient="records") + expected = [ + {"d": Timestamp("2017-11-18 21:53:00.219225+0000", tz=pytz.utc)}, + {"d": Timestamp("2017-11-18 22:06:30.061810+0000", tz=pytz.utc)}, + ] + tm.assert_dict_equal(result[0], expected[0]) + tm.assert_dict_equal(result[1], expected[1]) + + @pytest.mark.parametrize( + "into, expected", + [ + ( + dict, + { + 0: {"int_col": 1, "float_col": 1.0}, + 1: {"int_col": 2, "float_col": 2.0}, + 2: {"int_col": 3, "float_col": 3.0}, + }, + ), + ( + OrderedDict, + OrderedDict( + [ + (0, {"int_col": 1, "float_col": 1.0}), + (1, {"int_col": 2, "float_col": 2.0}), + (2, {"int_col": 3, "float_col": 3.0}), + ] + ), + ), + ( + defaultdict(dict), + defaultdict( + dict, + { + 0: {"int_col": 1, "float_col": 1.0}, + 1: {"int_col": 2, "float_col": 2.0}, + 2: {"int_col": 3, "float_col": 3.0}, + }, + ), + ), + ], + ) + def test_to_dict_index_dtypes(self, into, expected): + # GH#18580 + # When using to_dict(orient='index') on a dataframe with int + # and float columns only the int columns were cast to float + + df = DataFrame({"int_col": [1, 2, 3], "float_col": [1.0, 2.0, 3.0]}) + + result = df.to_dict(orient="index", into=into) + cols = ["int_col", "float_col"] + result = DataFrame.from_dict(result, orient="index")[cols] + expected = DataFrame.from_dict(expected, orient="index")[cols] + tm.assert_frame_equal(result, expected) + + def test_to_dict_numeric_names(self): + # GH#24940 + df = DataFrame({str(i): [i] for i in range(5)}) + result = set(df.to_dict("records")[0].keys()) + expected = set(df.columns) + assert result == expected + + def test_to_dict_wide(self): + # GH#24939 + df = DataFrame({(f"A_{i:d}"): [i] for i in range(256)}) + result = df.to_dict("records")[0] + expected = {f"A_{i:d}": i for i in range(256)} + assert result == expected + + @pytest.mark.parametrize( + "data,dtype", + ( + ([True, True, False], bool), + [ + [ + datetime(2018, 1, 1), + datetime(2019, 2, 2), + datetime(2020, 3, 3), + ], + Timestamp, + ], + [[1.0, 2.0, 3.0], float], + [[1, 2, 3], int], + [["X", "Y", "Z"], str], + ), + ) + def test_to_dict_orient_dtype(self, data, dtype): + # GH22620 & GH21256 + + df = DataFrame({"a": data}) + d = df.to_dict(orient="records") + assert all(type(record["a"]) is dtype for record in d) + + @pytest.mark.parametrize( + "data,expected_dtype", + ( + [np.uint64(2), int], + [np.int64(-9), int], + [np.float64(1.1), float], + [np.bool_(True), bool], + [np.datetime64("2005-02-25"), Timestamp], + ), + ) + def test_to_dict_scalar_constructor_orient_dtype(self, data, expected_dtype): + # GH22620 & GH21256 + + df = DataFrame({"a": data}, index=[0]) + d = df.to_dict(orient="records") + result = type(d[0]["a"]) + assert result is expected_dtype + + def test_to_dict_mixed_numeric_frame(self): + # GH 12859 + df = DataFrame({"a": [1.0], "b": [9.0]}) + result = df.reset_index().to_dict("records") + expected = [{"index": 0, "a": 1.0, "b": 9.0}] + assert result == expected + + @pytest.mark.parametrize( + "index", + [ + None, + Index(["aa", "bb"]), + Index(["aa", "bb"], name="cc"), + MultiIndex.from_tuples([("a", "b"), ("a", "c")]), + MultiIndex.from_tuples([("a", "b"), ("a", "c")], names=["n1", "n2"]), + ], + ) + @pytest.mark.parametrize( + "columns", + [ + ["x", "y"], + Index(["x", "y"]), + Index(["x", "y"], name="z"), + MultiIndex.from_tuples([("x", 1), ("y", 2)]), + MultiIndex.from_tuples([("x", 1), ("y", 2)], names=["z1", "z2"]), + ], + ) + def test_to_dict_orient_tight(self, index, columns): + df = DataFrame.from_records( + [[1, 3], [2, 4]], + columns=columns, + index=index, + ) + roundtrip = DataFrame.from_dict(df.to_dict(orient="tight"), orient="tight") + + tm.assert_frame_equal(df, roundtrip) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_dict_of_blocks.py new file mode 100644 index 0000000..c81bed9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -0,0 +1,74 @@ +import numpy as np + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + MultiIndex, +) +import pandas._testing as tm +from pandas.core.arrays import PandasArray + +pytestmark = td.skip_array_manager_invalid_test + + +class TestToDictOfBlocks: + def test_copy_blocks(self, float_frame): + # GH#9607 + df = DataFrame(float_frame, copy=True) + column = df.columns[0] + + # use the default copy=True, change a column + blocks = df._to_dict_of_blocks(copy=True) + for _df in blocks.values(): + if column in _df: + _df.loc[:, column] = _df[column] + 1 + + # make sure we did not change the original DataFrame + assert not _df[column].equals(df[column]) + + def test_no_copy_blocks(self, float_frame): + # GH#9607 + df = DataFrame(float_frame, copy=True) + column = df.columns[0] + + # use the copy=False, change a column + blocks = df._to_dict_of_blocks(copy=False) + for _df in blocks.values(): + if column in _df: + _df.loc[:, column] = _df[column] + 1 + + # make sure we did change the original DataFrame + assert _df[column].equals(df[column]) + + +def test_to_dict_of_blocks_item_cache(): + # Calling to_dict_of_blocks should not poison item_cache + df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) + df["c"] = PandasArray(np.array([1, 2, None, 3], dtype=object)) + mgr = df._mgr + assert len(mgr.blocks) == 3 # i.e. not consolidated + + ser = df["b"] # populations item_cache["b"] + + df._to_dict_of_blocks() + + # Check that the to_dict_of_blocks didn't break link between ser and df + ser.values[0] = "foo" + assert df.loc[0, "b"] == "foo" + + assert df["b"] is ser + + +def test_set_change_dtype_slice(): + # GH#8850 + cols = MultiIndex.from_tuples([("1st", "a"), ("2nd", "b"), ("3rd", "c")]) + df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols) + df["2nd"] = df["2nd"] * 2.0 + + blocks = df._to_dict_of_blocks() + assert sorted(blocks.keys()) == ["float64", "int64"] + tm.assert_frame_equal( + blocks["float64"], DataFrame([[1.0, 4.0], [4.0, 10.0]], columns=cols[:2]) + ) + tm.assert_frame_equal(blocks["int64"], DataFrame([[3], [6]], columns=cols[2:])) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_numpy.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_numpy.py new file mode 100644 index 0000000..532f7c8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_numpy.py @@ -0,0 +1,38 @@ +import numpy as np + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Timestamp, +) +import pandas._testing as tm + + +class TestToNumpy: + def test_to_numpy(self): + df = DataFrame({"A": [1, 2], "B": [3, 4.5]}) + expected = np.array([[1, 3], [2, 4.5]]) + result = df.to_numpy() + tm.assert_numpy_array_equal(result, expected) + + def test_to_numpy_dtype(self): + df = DataFrame({"A": [1, 2], "B": [3, 4.5]}) + expected = np.array([[1, 3], [2, 4]], dtype="int64") + result = df.to_numpy(dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + @td.skip_array_manager_invalid_test + def test_to_numpy_copy(self): + arr = np.random.randn(4, 3) + df = DataFrame(arr) + assert df.values.base is arr + assert df.to_numpy(copy=False).base is arr + assert df.to_numpy(copy=True).base is not arr + + def test_to_numpy_mixed_dtype_to_str(self): + # https://github.com/pandas-dev/pandas/issues/35455 + df = DataFrame([[Timestamp("2020-01-01 00:00:00"), 100.0]]) + result = df.to_numpy(dtype=str) + expected = np.array([["2020-01-01 00:00:00", "100.0"]], dtype=str) + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_period.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_period.py new file mode 100644 index 0000000..cd1b4b6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_period.py @@ -0,0 +1,85 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + DatetimeIndex, + PeriodIndex, + Series, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestToPeriod: + def test_to_period(self, frame_or_series): + K = 5 + + dr = date_range("1/1/2000", "1/1/2001", freq="D") + obj = DataFrame( + np.random.randn(len(dr), K), index=dr, columns=["A", "B", "C", "D", "E"] + ) + obj["mix"] = "a" + obj = tm.get_obj(obj, frame_or_series) + + pts = obj.to_period() + exp = obj.copy() + exp.index = period_range("1/1/2000", "1/1/2001") + tm.assert_equal(pts, exp) + + pts = obj.to_period("M") + exp.index = exp.index.asfreq("M") + tm.assert_equal(pts, exp) + + def test_to_period_without_freq(self, frame_or_series): + # GH#7606 without freq + idx = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"]) + exp_idx = PeriodIndex( + ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], freq="D" + ) + + obj = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) + obj = tm.get_obj(obj, frame_or_series) + expected = obj.copy() + expected.index = exp_idx + tm.assert_equal(obj.to_period(), expected) + + if frame_or_series is DataFrame: + expected = obj.copy() + expected.columns = exp_idx + tm.assert_frame_equal(obj.to_period(axis=1), expected) + + def test_to_period_columns(self): + dr = date_range("1/1/2000", "1/1/2001") + df = DataFrame(np.random.randn(len(dr), 5), index=dr) + df["mix"] = "a" + + df = df.T + pts = df.to_period(axis=1) + exp = df.copy() + exp.columns = period_range("1/1/2000", "1/1/2001") + tm.assert_frame_equal(pts, exp) + + pts = df.to_period("M", axis=1) + tm.assert_index_equal(pts.columns, exp.columns.asfreq("M")) + + def test_to_period_invalid_axis(self): + dr = date_range("1/1/2000", "1/1/2001") + df = DataFrame(np.random.randn(len(dr), 5), index=dr) + df["mix"] = "a" + + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.to_period(axis=2) + + def test_to_period_raises(self, index, frame_or_series): + # https://github.com/pandas-dev/pandas/issues/33327 + obj = Series(index=index, dtype=object) + if frame_or_series is DataFrame: + obj = obj.to_frame() + + if not isinstance(index, DatetimeIndex): + msg = f"unsupported Type {type(index).__name__}" + with pytest.raises(TypeError, match=msg): + obj.to_period() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_records.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_records.py new file mode 100644 index 0000000..a2e9478 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_to_records.py @@ -0,0 +1,398 @@ +from collections import abc + +import numpy as np +import pytest + +from pandas import ( + CategoricalDtype, + DataFrame, + MultiIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameToRecords: + def test_to_records_timeseries(self): + index = date_range("1/1/2000", periods=10) + df = DataFrame(np.random.randn(10, 3), index=index, columns=["a", "b", "c"]) + + result = df.to_records() + assert result["index"].dtype == "M8[ns]" + + result = df.to_records(index=False) + + def test_to_records_dt64(self): + df = DataFrame( + [["one", "two", "three"], ["four", "five", "six"]], + index=date_range("2012-01-01", "2012-01-02"), + ) + + expected = df.index.values[0] + result = df.to_records()["index"][0] + assert expected == result + + def test_to_records_dt64tz_column(self): + # GH#32535 dont less tz in to_records + df = DataFrame({"A": date_range("2012-01-01", "2012-01-02", tz="US/Eastern")}) + + result = df.to_records() + + assert result.dtype["A"] == object + val = result[0][1] + assert isinstance(val, Timestamp) + assert val == df.loc[0, "A"] + + def test_to_records_with_multindex(self): + # GH#3189 + index = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + data = np.zeros((8, 4)) + df = DataFrame(data, index=index) + r = df.to_records(index=True)["level_0"] + assert "bar" in r + assert "one" not in r + + def test_to_records_with_Mapping_type(self): + import email + from email.parser import Parser + + abc.Mapping.register(email.message.Message) + + headers = Parser().parsestr( + "From: \n" + "To: \n" + "Subject: Test message\n" + "\n" + "Body would go here\n" + ) + + frame = DataFrame.from_records([headers]) + all(x in frame for x in ["Type", "Subject", "From"]) + + def test_to_records_floats(self): + df = DataFrame(np.random.rand(10, 10)) + df.to_records() + + def test_to_records_index_name(self): + df = DataFrame(np.random.randn(3, 3)) + df.index.name = "X" + rs = df.to_records() + assert "X" in rs.dtype.fields + + df = DataFrame(np.random.randn(3, 3)) + rs = df.to_records() + assert "index" in rs.dtype.fields + + df.index = MultiIndex.from_tuples([("a", "x"), ("a", "y"), ("b", "z")]) + df.index.names = ["A", None] + result = df.to_records() + expected = np.rec.fromarrays( + [np.array(["a", "a", "b"]), np.array(["x", "y", "z"])] + + [np.asarray(df.iloc[:, i]) for i in range(3)], + dtype={ + "names": ["A", "level_1", "0", "1", "2"], + "formats": [" default to array dtypes. + ( + {}, + np.rec.array( + [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")], + dtype=[("index", " bool: + return key in self.d + + def keys(self): + return self.d.keys() + + df = DataFrame({"A": [1, 2], "B": [0.2, 1.5], "C": ["a", "bc"]}) + + dtype_mappings = { + "column_dtypes": DictLike(**{"A": np.int8, "B": np.float32}), + "index_dtypes": " 2) + + expected = DataFrame( + [[1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]] + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "bad_kwarg, exception, msg", + [ + # errors must be 'ignore' or 'raise' + ({"errors": "something"}, ValueError, "The parameter errors must.*"), + ({"join": "inner"}, NotImplementedError, "Only left join is supported"), + ], + ) + def test_update_raise_bad_parameter(self, bad_kwarg, exception, msg): + df = DataFrame([[1.5, 1, 3.0]]) + with pytest.raises(exception, match=msg): + df.update(df, **bad_kwarg) + + def test_update_raise_on_overlap(self): + df = DataFrame( + [[1.5, 1, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] + ) + + other = DataFrame([[2.0, np.nan], [np.nan, 7]], index=[1, 3], columns=[1, 2]) + with pytest.raises(ValueError, match="Data overlaps"): + df.update(other, errors="raise") + + def test_update_from_non_df(self): + d = {"a": Series([1, 2, 3, 4]), "b": Series([5, 6, 7, 8])} + df = DataFrame(d) + + d["a"] = Series([5, 6, 7, 8]) + df.update(d) + + expected = DataFrame(d) + + tm.assert_frame_equal(df, expected) + + d = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} + df = DataFrame(d) + + d["a"] = [5, 6, 7, 8] + df.update(d) + + expected = DataFrame(d) + + tm.assert_frame_equal(df, expected) + + def test_update_datetime_tz(self): + # GH 25807 + result = DataFrame([pd.Timestamp("2019", tz="UTC")]) + result.update(result) + expected = DataFrame([pd.Timestamp("2019", tz="UTC")]) + tm.assert_frame_equal(result, expected) + + def test_update_with_different_dtype(self): + # GH#3217 + df = DataFrame({"a": [1, 3], "b": [np.nan, 2]}) + df["c"] = np.nan + df["c"].update(Series(["foo"], index=[0])) + + expected = DataFrame({"a": [1, 3], "b": [np.nan, 2], "c": ["foo", np.nan]}) + tm.assert_frame_equal(df, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_value_counts.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_value_counts.py new file mode 100644 index 0000000..6e85288 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_value_counts.py @@ -0,0 +1,146 @@ +import numpy as np + +import pandas as pd +import pandas._testing as tm + + +def test_data_frame_value_counts_unsorted(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts(sort=False) + expected = pd.Series( + data=[1, 2, 1], + index=pd.MultiIndex.from_arrays( + [(2, 4, 6), (2, 0, 0)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_ascending(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts(ascending=True) + expected = pd.Series( + data=[1, 1, 2], + index=pd.MultiIndex.from_arrays( + [(2, 6, 4), (2, 0, 0)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_default(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts() + expected = pd.Series( + data=[2, 1, 1], + index=pd.MultiIndex.from_arrays( + [(4, 2, 6), (0, 2, 0)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_normalize(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts(normalize=True) + expected = pd.Series( + data=[0.5, 0.25, 0.25], + index=pd.MultiIndex.from_arrays( + [(4, 2, 6), (0, 2, 0)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_single_col_default(): + df = pd.DataFrame({"num_legs": [2, 4, 4, 6]}) + + result = df.value_counts() + expected = pd.Series( + data=[2, 1, 1], + index=pd.MultiIndex.from_arrays([[4, 2, 6]], names=["num_legs"]), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_empty(): + df_no_cols = pd.DataFrame() + + result = df_no_cols.value_counts() + expected = pd.Series([], dtype=np.int64) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_empty_normalize(): + df_no_cols = pd.DataFrame() + + result = df_no_cols.value_counts(normalize=True) + expected = pd.Series([], dtype=np.float64) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_dropna_true(nulls_fixture): + # GH 41334 + df = pd.DataFrame( + { + "first_name": ["John", "Anne", "John", "Beth"], + "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"], + }, + ) + result = df.value_counts() + expected = pd.Series( + data=[1, 1], + index=pd.MultiIndex.from_arrays( + [("Beth", "John"), ("Louise", "Smith")], names=["first_name", "middle_name"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_dropna_false(nulls_fixture): + # GH 41334 + df = pd.DataFrame( + { + "first_name": ["John", "Anne", "John", "Beth"], + "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"], + }, + ) + + result = df.value_counts(dropna=False) + expected = pd.Series( + data=[1, 1, 1, 1], + index=pd.MultiIndex( + levels=[ + pd.Index(["Anne", "Beth", "John"]), + pd.Index(["Louise", "Smith", nulls_fixture]), + ], + codes=[[0, 1, 2, 2], [2, 0, 1, 2]], + names=["first_name", "middle_name"], + ), + ) + + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_values.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_values.py new file mode 100644 index 0000000..f755b0a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/methods/test_values.py @@ -0,0 +1,273 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + NaT, + Series, + Timestamp, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestDataFrameValues: + @td.skip_array_manager_invalid_test + def test_values(self, float_frame): + float_frame.values[:, 0] = 5.0 + assert (float_frame.values[:, 0] == 5).all() + + def test_more_values(self, float_string_frame): + values = float_string_frame.values + assert values.shape[1] == len(float_string_frame.columns) + + def test_values_mixed_dtypes(self, float_frame, float_string_frame): + frame = float_frame + arr = frame.values + + frame_cols = frame.columns + for i, row in enumerate(arr): + for j, value in enumerate(row): + col = frame_cols[j] + if np.isnan(value): + assert np.isnan(frame[col][i]) + else: + assert value == frame[col][i] + + # mixed type + arr = float_string_frame[["foo", "A"]].values + assert arr[0, 0] == "bar" + + df = DataFrame({"complex": [1j, 2j, 3j], "real": [1, 2, 3]}) + arr = df.values + assert arr[0, 0] == 1j + + def test_values_duplicates(self): + df = DataFrame( + [[1, 2, "a", "b"], [1, 2, "a", "b"]], columns=["one", "one", "two", "two"] + ) + + result = df.values + expected = np.array([[1, 2, "a", "b"], [1, 2, "a", "b"]], dtype=object) + + tm.assert_numpy_array_equal(result, expected) + + def test_values_with_duplicate_columns(self): + df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=["x", "x"]) + result = df.values + expected = np.array([[1, 2.5], [3, 4.5]]) + assert (result == expected).all().all() + + @pytest.mark.parametrize("constructor", [date_range, period_range]) + def test_values_casts_datetimelike_to_object(self, constructor): + series = Series(constructor("2000-01-01", periods=10, freq="D")) + + expected = series.astype("object") + + df = DataFrame({"a": series, "b": np.random.randn(len(series))}) + + result = df.values.squeeze() + assert (result[:, 0] == expected.values).all() + + df = DataFrame({"a": series, "b": ["foo"] * len(series)}) + + result = df.values.squeeze() + assert (result[:, 0] == expected.values).all() + + def test_frame_values_with_tz(self): + tz = "US/Central" + df = DataFrame({"A": date_range("2000", periods=4, tz=tz)}) + result = df.values + expected = np.array( + [ + [Timestamp("2000-01-01", tz=tz)], + [Timestamp("2000-01-02", tz=tz)], + [Timestamp("2000-01-03", tz=tz)], + [Timestamp("2000-01-04", tz=tz)], + ] + ) + tm.assert_numpy_array_equal(result, expected) + + # two columns, homogeneous + + df["B"] = df["A"] + result = df.values + expected = np.concatenate([expected, expected], axis=1) + tm.assert_numpy_array_equal(result, expected) + + # three columns, heterogeneous + est = "US/Eastern" + df["C"] = df["A"].dt.tz_convert(est) + + new = np.array( + [ + [Timestamp("2000-01-01T01:00:00", tz=est)], + [Timestamp("2000-01-02T01:00:00", tz=est)], + [Timestamp("2000-01-03T01:00:00", tz=est)], + [Timestamp("2000-01-04T01:00:00", tz=est)], + ] + ) + expected = np.concatenate([expected, new], axis=1) + result = df.values + tm.assert_numpy_array_equal(result, expected) + + def test_interleave_with_tzaware(self, timezone_frame): + + # interleave with object + result = timezone_frame.assign(D="foo").values + expected = np.array( + [ + [ + Timestamp("2013-01-01 00:00:00"), + Timestamp("2013-01-02 00:00:00"), + Timestamp("2013-01-03 00:00:00"), + ], + [ + Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"), + NaT, + Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"), + ], + [ + Timestamp("2013-01-01 00:00:00+0100", tz="CET"), + NaT, + Timestamp("2013-01-03 00:00:00+0100", tz="CET"), + ], + ["foo", "foo", "foo"], + ], + dtype=object, + ).T + tm.assert_numpy_array_equal(result, expected) + + # interleave with only datetime64[ns] + result = timezone_frame.values + expected = np.array( + [ + [ + Timestamp("2013-01-01 00:00:00"), + Timestamp("2013-01-02 00:00:00"), + Timestamp("2013-01-03 00:00:00"), + ], + [ + Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"), + NaT, + Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"), + ], + [ + Timestamp("2013-01-01 00:00:00+0100", tz="CET"), + NaT, + Timestamp("2013-01-03 00:00:00+0100", tz="CET"), + ], + ], + dtype=object, + ).T + tm.assert_numpy_array_equal(result, expected) + + def test_values_interleave_non_unique_cols(self): + df = DataFrame( + [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]], + columns=["x", "x"], + index=[1, 2], + ) + + df_unique = df.copy() + df_unique.columns = ["x", "y"] + assert df_unique.values.shape == df.values.shape + tm.assert_numpy_array_equal(df_unique.values[0], df.values[0]) + tm.assert_numpy_array_equal(df_unique.values[1], df.values[1]) + + def test_values_numeric_cols(self, float_frame): + float_frame["foo"] = "bar" + + values = float_frame[["A", "B", "C", "D"]].values + assert values.dtype == np.float64 + + def test_values_lcd(self, mixed_float_frame, mixed_int_frame): + + # mixed lcd + values = mixed_float_frame[["A", "B", "C", "D"]].values + assert values.dtype == np.float64 + + values = mixed_float_frame[["A", "B", "C"]].values + assert values.dtype == np.float32 + + values = mixed_float_frame[["C"]].values + assert values.dtype == np.float16 + + # GH#10364 + # B uint64 forces float because there are other signed int types + values = mixed_int_frame[["A", "B", "C", "D"]].values + assert values.dtype == np.float64 + + values = mixed_int_frame[["A", "D"]].values + assert values.dtype == np.int64 + + # B uint64 forces float because there are other signed int types + values = mixed_int_frame[["A", "B", "C"]].values + assert values.dtype == np.float64 + + # as B and C are both unsigned, no forcing to float is needed + values = mixed_int_frame[["B", "C"]].values + assert values.dtype == np.uint64 + + values = mixed_int_frame[["A", "C"]].values + assert values.dtype == np.int32 + + values = mixed_int_frame[["C", "D"]].values + assert values.dtype == np.int64 + + values = mixed_int_frame[["A"]].values + assert values.dtype == np.int32 + + values = mixed_int_frame[["C"]].values + assert values.dtype == np.uint8 + + +class TestPrivateValues: + @td.skip_array_manager_invalid_test + def test_private_values_dt64tz(self): + dta = date_range("2000", periods=4, tz="US/Central")._data.reshape(-1, 1) + + df = DataFrame(dta, columns=["A"]) + tm.assert_equal(df._values, dta) + + # we have a view + assert np.shares_memory(df._values._ndarray, dta._ndarray) + + # TimedeltaArray + tda = dta - dta + df2 = df - df + tm.assert_equal(df2._values, tda) + + @td.skip_array_manager_invalid_test + def test_private_values_dt64tz_multicol(self): + dta = date_range("2000", periods=8, tz="US/Central")._data.reshape(-1, 2) + + df = DataFrame(dta, columns=["A", "B"]) + tm.assert_equal(df._values, dta) + + # we have a view + assert np.shares_memory(df._values._ndarray, dta._ndarray) + + # TimedeltaArray + tda = dta - dta + df2 = df - df + tm.assert_equal(df2._values, tda) + + def test_private_values_dt64_multiblock(self, using_array_manager, request): + if using_array_manager: + mark = pytest.mark.xfail(reason="returns ndarray") + request.node.add_marker(mark) + + dta = date_range("2000", periods=8)._data + + df = DataFrame({"A": dta[:4]}, copy=False) + df["B"] = dta[4:] + + assert len(df._mgr.arrays) == 2 + + result = df._values + expected = dta.reshape(2, 4).T + tm.assert_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_alter_axes.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_alter_axes.py new file mode 100644 index 0000000..c68171a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_alter_axes.py @@ -0,0 +1,30 @@ +from datetime import datetime + +import pytz + +from pandas import DataFrame +import pandas._testing as tm + + +class TestDataFrameAlterAxes: + # Tests for setting index/columns attributes directly (i.e. __setattr__) + + def test_set_axis_setattr_index(self): + # GH 6785 + # set the index manually + + df = DataFrame([{"ts": datetime(2014, 4, 1, tzinfo=pytz.utc), "foo": 1}]) + expected = df.set_index("ts") + df.index = df["ts"] + df.pop("ts") + tm.assert_frame_equal(df, expected) + + # Renaming + + def test_assign_columns(self, float_frame): + float_frame["hi"] = "there" + + df = float_frame.copy() + df.columns = ["foo", "bar", "baz", "quux", "foo2"] + tm.assert_series_equal(float_frame["C"], df["baz"], check_names=False) + tm.assert_series_equal(float_frame["hi"], df["foo2"], check_names=False) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_api.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_api.py new file mode 100644 index 0000000..3adc4eb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_api.py @@ -0,0 +1,368 @@ +from copy import deepcopy +import inspect +import pydoc + +import numpy as np +import pytest + +from pandas._config.config import option_context + +import pandas.util._test_decorators as td +from pandas.util._test_decorators import ( + async_mark, + skip_if_no, +) + +import pandas as pd +from pandas import ( + DataFrame, + Series, + date_range, + timedelta_range, +) +import pandas._testing as tm + + +class TestDataFrameMisc: + def test_getitem_pop_assign_name(self, float_frame): + s = float_frame["A"] + assert s.name == "A" + + s = float_frame.pop("A") + assert s.name == "A" + + s = float_frame.loc[:, "B"] + assert s.name == "B" + + s2 = s.loc[:] + assert s2.name == "B" + + def test_get_axis(self, float_frame): + f = float_frame + assert f._get_axis_number(0) == 0 + assert f._get_axis_number(1) == 1 + assert f._get_axis_number("index") == 0 + assert f._get_axis_number("rows") == 0 + assert f._get_axis_number("columns") == 1 + + assert f._get_axis_name(0) == "index" + assert f._get_axis_name(1) == "columns" + assert f._get_axis_name("index") == "index" + assert f._get_axis_name("rows") == "index" + assert f._get_axis_name("columns") == "columns" + + assert f._get_axis(0) is f.index + assert f._get_axis(1) is f.columns + + with pytest.raises(ValueError, match="No axis named"): + f._get_axis_number(2) + + with pytest.raises(ValueError, match="No axis.*foo"): + f._get_axis_name("foo") + + with pytest.raises(ValueError, match="No axis.*None"): + f._get_axis_name(None) + + with pytest.raises(ValueError, match="No axis named"): + f._get_axis_number(None) + + def test_column_contains_raises(self, float_frame): + with pytest.raises(TypeError, match="unhashable type: 'Index'"): + float_frame.columns in float_frame + + def test_tab_completion(self): + # DataFrame whose columns are identifiers shall have them in __dir__. + df = DataFrame([list("abcd"), list("efgh")], columns=list("ABCD")) + for key in list("ABCD"): + assert key in dir(df) + assert isinstance(df.__getitem__("A"), Series) + + # DataFrame whose first-level columns are identifiers shall have + # them in __dir__. + df = DataFrame( + [list("abcd"), list("efgh")], + columns=pd.MultiIndex.from_tuples(list(zip("ABCD", "EFGH"))), + ) + for key in list("ABCD"): + assert key in dir(df) + for key in list("EFGH"): + assert key not in dir(df) + assert isinstance(df.__getitem__("A"), DataFrame) + + def test_display_max_dir_items(self): + # display.max_dir_items increaes the number of columns that are in __dir__. + columns = ["a" + str(i) for i in range(420)] + values = [range(420), range(420)] + df = DataFrame(values, columns=columns) + + # The default value for display.max_dir_items is 100 + assert "a99" in dir(df) + assert "a100" not in dir(df) + + with option_context("display.max_dir_items", 300): + df = DataFrame(values, columns=columns) + assert "a299" in dir(df) + assert "a300" not in dir(df) + + with option_context("display.max_dir_items", None): + df = DataFrame(values, columns=columns) + assert "a419" in dir(df) + + def test_not_hashable(self): + empty_frame = DataFrame() + + df = DataFrame([1]) + msg = "unhashable type: 'DataFrame'" + with pytest.raises(TypeError, match=msg): + hash(df) + with pytest.raises(TypeError, match=msg): + hash(empty_frame) + + def test_column_name_contains_unicode_surrogate(self): + # GH 25509 + colname = "\ud83d" + df = DataFrame({colname: []}) + # this should not crash + assert colname not in dir(df) + assert df.columns[0] == colname + + def test_new_empty_index(self): + df1 = DataFrame(np.random.randn(0, 3)) + df2 = DataFrame(np.random.randn(0, 3)) + df1.index.name = "foo" + assert df2.index.name is None + + def test_get_agg_axis(self, float_frame): + cols = float_frame._get_agg_axis(0) + assert cols is float_frame.columns + + idx = float_frame._get_agg_axis(1) + assert idx is float_frame.index + + msg = r"Axis must be 0 or 1 \(got 2\)" + with pytest.raises(ValueError, match=msg): + float_frame._get_agg_axis(2) + + def test_empty(self, float_frame, float_string_frame): + empty_frame = DataFrame() + assert empty_frame.empty + + assert not float_frame.empty + assert not float_string_frame.empty + + # corner case + df = DataFrame({"A": [1.0, 2.0, 3.0], "B": ["a", "b", "c"]}, index=np.arange(3)) + del df["A"] + assert not df.empty + + def test_len(self, float_frame): + assert len(float_frame) == len(float_frame.index) + + # single block corner case + arr = float_frame[["A", "B"]].values + expected = float_frame.reindex(columns=["A", "B"]).values + tm.assert_almost_equal(arr, expected) + + def test_axis_aliases(self, float_frame): + f = float_frame + + # reg name + expected = f.sum(axis=0) + result = f.sum(axis="index") + tm.assert_series_equal(result, expected) + + expected = f.sum(axis=1) + result = f.sum(axis="columns") + tm.assert_series_equal(result, expected) + + def test_class_axis(self): + # GH 18147 + # no exception and no empty docstring + assert pydoc.getdoc(DataFrame.index) + assert pydoc.getdoc(DataFrame.columns) + + def test_series_put_names(self, float_string_frame): + series = float_string_frame._series + for k, v in series.items(): + assert v.name == k + + def test_empty_nonzero(self): + df = DataFrame([1, 2, 3]) + assert not df.empty + df = DataFrame(index=[1], columns=[1]) + assert not df.empty + df = DataFrame(index=["a", "b"], columns=["c", "d"]).dropna() + assert df.empty + assert df.T.empty + empty_frames = [ + DataFrame(), + DataFrame(index=[1]), + DataFrame(columns=[1]), + DataFrame({1: []}), + ] + for df in empty_frames: + assert df.empty + assert df.T.empty + + def test_with_datetimelikes(self): + + df = DataFrame( + { + "A": date_range("20130101", periods=10), + "B": timedelta_range("1 day", periods=10), + } + ) + t = df.T + + result = t.dtypes.value_counts() + expected = Series({np.dtype("object"): 10}) + tm.assert_series_equal(result, expected) + + def test_deepcopy(self, float_frame): + cp = deepcopy(float_frame) + series = cp["A"] + series[:] = 10 + for idx, value in series.items(): + assert float_frame["A"][idx] != value + + def test_inplace_return_self(self): + # GH 1893 + + data = DataFrame( + {"a": ["foo", "bar", "baz", "qux"], "b": [0, 0, 1, 1], "c": [1, 2, 3, 4]} + ) + + def _check_f(base, f): + result = f(base) + assert result is None + + # -----DataFrame----- + + # set_index + f = lambda x: x.set_index("a", inplace=True) + _check_f(data.copy(), f) + + # reset_index + f = lambda x: x.reset_index(inplace=True) + _check_f(data.set_index("a"), f) + + # drop_duplicates + f = lambda x: x.drop_duplicates(inplace=True) + _check_f(data.copy(), f) + + # sort + f = lambda x: x.sort_values("b", inplace=True) + _check_f(data.copy(), f) + + # sort_index + f = lambda x: x.sort_index(inplace=True) + _check_f(data.copy(), f) + + # fillna + f = lambda x: x.fillna(0, inplace=True) + _check_f(data.copy(), f) + + # replace + f = lambda x: x.replace(1, 0, inplace=True) + _check_f(data.copy(), f) + + # rename + f = lambda x: x.rename({1: "foo"}, inplace=True) + _check_f(data.copy(), f) + + # -----Series----- + d = data.copy()["c"] + + # reset_index + f = lambda x: x.reset_index(inplace=True, drop=True) + _check_f(data.set_index("a")["c"], f) + + # fillna + f = lambda x: x.fillna(0, inplace=True) + _check_f(d.copy(), f) + + # replace + f = lambda x: x.replace(1, 0, inplace=True) + _check_f(d.copy(), f) + + # rename + f = lambda x: x.rename({1: "foo"}, inplace=True) + _check_f(d.copy(), f) + + @async_mark() + @td.check_file_leaks + async def test_tab_complete_warning(self, ip, frame_or_series): + # GH 16409 + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.completer import provisionalcompleter + + if frame_or_series is DataFrame: + code = "from pandas import DataFrame; obj = DataFrame()" + else: + code = "from pandas import Series; obj = Series(dtype=object)" + + await ip.run_code(code) + + # GH 31324 newer jedi version raises Deprecation warning; + # appears resolved 2021-02-02 + with tm.assert_produces_warning(None): + with provisionalcompleter("ignore"): + list(ip.Completer.completions("obj.", 1)) + + def test_attrs(self): + df = DataFrame({"A": [2, 3]}) + assert df.attrs == {} + df.attrs["version"] = 1 + + result = df.rename(columns=str) + assert result.attrs == {"version": 1} + + @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None]) + def test_set_flags(self, allows_duplicate_labels, frame_or_series): + obj = DataFrame({"A": [1, 2]}) + key = (0, 0) + if frame_or_series is Series: + obj = obj["A"] + key = 0 + + result = obj.set_flags(allows_duplicate_labels=allows_duplicate_labels) + + if allows_duplicate_labels is None: + # We don't update when it's not provided + assert result.flags.allows_duplicate_labels is True + else: + assert result.flags.allows_duplicate_labels is allows_duplicate_labels + + # We made a copy + assert obj is not result + + # We didn't mutate obj + assert obj.flags.allows_duplicate_labels is True + + # But we didn't copy data + result.iloc[key] = 0 + assert obj.iloc[key] == 0 + + # Now we do copy. + result = obj.set_flags( + copy=True, allows_duplicate_labels=allows_duplicate_labels + ) + result.iloc[key] = 10 + assert obj.iloc[key] == 0 + + def test_constructor_expanddim(self): + # GH#33628 accessing _constructor_expanddim should not raise NotImplementedError + # GH38782 pandas has no container higher than DataFrame (two-dim), so + # DataFrame._constructor_expand_dim, doesn't make sense, so is removed. + df = DataFrame() + + msg = "'DataFrame' object has no attribute '_constructor_expanddim'" + with pytest.raises(AttributeError, match=msg): + df._constructor_expanddim(np.arange(27).reshape(3, 3, 3)) + + @skip_if_no("jinja2") + def test_inspect_getmembers(self): + # GH38740 + df = DataFrame() + with tm.assert_produces_warning(None): + inspect.getmembers(df) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_arithmetic.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_arithmetic.py new file mode 100644 index 0000000..4820fcc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_arithmetic.py @@ -0,0 +1,1934 @@ +from collections import deque +from datetime import datetime +import functools +import operator +import re + +import numpy as np +import pytest +import pytz + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm +import pandas.core.common as com +from pandas.core.computation import expressions as expr +from pandas.core.computation.expressions import ( + _MIN_ELEMENTS, + NUMEXPR_INSTALLED, +) +from pandas.tests.frame.common import ( + _check_mixed_float, + _check_mixed_int, +) + + +@pytest.fixture( + autouse=True, scope="module", params=[0, 1000000], ids=["numexpr", "python"] +) +def switch_numexpr_min_elements(request): + _MIN_ELEMENTS = expr._MIN_ELEMENTS + expr._MIN_ELEMENTS = request.param + yield request.param + expr._MIN_ELEMENTS = _MIN_ELEMENTS + + +class DummyElement: + def __init__(self, value, dtype): + self.value = value + self.dtype = np.dtype(dtype) + + def __array__(self): + return np.array(self.value, dtype=self.dtype) + + def __str__(self) -> str: + return f"DummyElement({self.value}, {self.dtype})" + + def __repr__(self) -> str: + return str(self) + + def astype(self, dtype, copy=False): + self.dtype = dtype + return self + + def view(self, dtype): + return type(self)(self.value.view(dtype), dtype) + + def any(self, axis=None): + return bool(self.value) + + +# ------------------------------------------------------------------- +# Comparisons + + +class TestFrameComparisons: + # Specifically _not_ flex-comparisons + + def test_comparison_with_categorical_dtype(self): + # GH#12564 + + df = DataFrame({"A": ["foo", "bar", "baz"]}) + exp = DataFrame({"A": [True, False, False]}) + + res = df == "foo" + tm.assert_frame_equal(res, exp) + + # casting to categorical shouldn't affect the result + df["A"] = df["A"].astype("category") + + res = df == "foo" + tm.assert_frame_equal(res, exp) + + def test_frame_in_list(self): + # GH#12689 this should raise at the DataFrame level, not blocks + df = DataFrame(np.random.randn(6, 4), columns=list("ABCD")) + msg = "The truth value of a DataFrame is ambiguous" + with pytest.raises(ValueError, match=msg): + df in [None] + + @pytest.mark.parametrize( + "arg, arg2", + [ + [ + { + "a": np.random.randint(10, size=10), + "b": pd.date_range("20010101", periods=10), + }, + { + "a": np.random.randint(10, size=10), + "b": np.random.randint(10, size=10), + }, + ], + [ + { + "a": np.random.randint(10, size=10), + "b": np.random.randint(10, size=10), + }, + { + "a": np.random.randint(10, size=10), + "b": pd.date_range("20010101", periods=10), + }, + ], + [ + { + "a": pd.date_range("20010101", periods=10), + "b": pd.date_range("20010101", periods=10), + }, + { + "a": np.random.randint(10, size=10), + "b": np.random.randint(10, size=10), + }, + ], + [ + { + "a": np.random.randint(10, size=10), + "b": pd.date_range("20010101", periods=10), + }, + { + "a": pd.date_range("20010101", periods=10), + "b": pd.date_range("20010101", periods=10), + }, + ], + ], + ) + def test_comparison_invalid(self, arg, arg2): + # GH4968 + # invalid date/int comparisons + x = DataFrame(arg) + y = DataFrame(arg2) + # we expect the result to match Series comparisons for + # == and !=, inequalities should raise + result = x == y + expected = DataFrame( + {col: x[col] == y[col] for col in x.columns}, + index=x.index, + columns=x.columns, + ) + tm.assert_frame_equal(result, expected) + + result = x != y + expected = DataFrame( + {col: x[col] != y[col] for col in x.columns}, + index=x.index, + columns=x.columns, + ) + tm.assert_frame_equal(result, expected) + + msgs = [ + r"Invalid comparison between dtype=datetime64\[ns\] and ndarray", + "invalid type promotion", + ( + # npdev 1.20.0 + r"The DTypes and " + r" do not have a common DType." + ), + ] + msg = "|".join(msgs) + with pytest.raises(TypeError, match=msg): + x >= y + with pytest.raises(TypeError, match=msg): + x > y + with pytest.raises(TypeError, match=msg): + x < y + with pytest.raises(TypeError, match=msg): + x <= y + + def test_timestamp_compare(self): + # make sure we can compare Timestamps on the right AND left hand side + # GH#4982 + df = DataFrame( + { + "dates1": pd.date_range("20010101", periods=10), + "dates2": pd.date_range("20010102", periods=10), + "intcol": np.random.randint(1000000000, size=10), + "floatcol": np.random.randn(10), + "stringcol": list(tm.rands(10)), + } + ) + df.loc[np.random.rand(len(df)) > 0.5, "dates2"] = pd.NaT + ops = {"gt": "lt", "lt": "gt", "ge": "le", "le": "ge", "eq": "eq", "ne": "ne"} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # no nats + if left in ["eq", "ne"]: + expected = left_f(df, pd.Timestamp("20010109")) + result = right_f(pd.Timestamp("20010109"), df) + tm.assert_frame_equal(result, expected) + else: + msg = ( + "'(<|>)=?' not supported between " + "instances of 'numpy.ndarray' and 'Timestamp'" + ) + with pytest.raises(TypeError, match=msg): + left_f(df, pd.Timestamp("20010109")) + with pytest.raises(TypeError, match=msg): + right_f(pd.Timestamp("20010109"), df) + # nats + if left in ["eq", "ne"]: + expected = left_f(df, pd.Timestamp("nat")) + result = right_f(pd.Timestamp("nat"), df) + tm.assert_frame_equal(result, expected) + else: + msg = ( + "'(<|>)=?' not supported between " + "instances of 'numpy.ndarray' and 'NaTType'" + ) + with pytest.raises(TypeError, match=msg): + left_f(df, pd.Timestamp("nat")) + with pytest.raises(TypeError, match=msg): + right_f(pd.Timestamp("nat"), df) + + def test_mixed_comparison(self): + # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False, + # not raise TypeError + # (this appears to be fixed before GH#22163, not sure when) + df = DataFrame([["1989-08-01", 1], ["1989-08-01", 2]]) + other = DataFrame([["a", "b"], ["c", "d"]]) + + result = df == other + assert not result.any().any() + + result = df != other + assert result.all().all() + + def test_df_boolean_comparison_error(self): + # GH#4576, GH#22880 + # comparing DataFrame against list/tuple with len(obj) matching + # len(df.columns) is supported as of GH#22800 + df = DataFrame(np.arange(6).reshape((3, 2))) + + expected = DataFrame([[False, False], [True, False], [False, False]]) + + result = df == (2, 2) + tm.assert_frame_equal(result, expected) + + result = df == [2, 2] + tm.assert_frame_equal(result, expected) + + def test_df_float_none_comparison(self): + df = DataFrame(np.random.randn(8, 3), index=range(8), columns=["A", "B", "C"]) + + result = df.__eq__(None) + assert not result.any().any() + + def test_df_string_comparison(self): + df = DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}]) + mask_a = df.a > 1 + tm.assert_frame_equal(df[mask_a], df.loc[1:1, :]) + tm.assert_frame_equal(df[-mask_a], df.loc[0:0, :]) + + mask_b = df.b == "foo" + tm.assert_frame_equal(df[mask_b], df.loc[0:0, :]) + tm.assert_frame_equal(df[-mask_b], df.loc[1:1, :]) + + +class TestFrameFlexComparisons: + # TODO: test_bool_flex_frame needs a better name + def test_bool_flex_frame(self): + data = np.random.randn(5, 3) + other_data = np.random.randn(5, 3) + df = DataFrame(data) + other = DataFrame(other_data) + ndim_5 = np.ones(df.shape + (1, 3)) + + # Unaligned + def _check_unaligned_frame(meth, op, df, other): + part_o = other.loc[3:, 1:].copy() + rs = meth(part_o) + xp = op(df, part_o.reindex(index=df.index, columns=df.columns)) + tm.assert_frame_equal(rs, xp) + + # DataFrame + assert df.eq(df).values.all() + assert not df.ne(df).values.any() + for op in ["eq", "ne", "gt", "lt", "ge", "le"]: + f = getattr(df, op) + o = getattr(operator, op) + # No NAs + tm.assert_frame_equal(f(other), o(df, other)) + _check_unaligned_frame(f, o, df, other) + # ndarray + tm.assert_frame_equal(f(other.values), o(df, other.values)) + # scalar + tm.assert_frame_equal(f(0), o(df, 0)) + # NAs + msg = "Unable to coerce to Series/DataFrame" + tm.assert_frame_equal(f(np.nan), o(df, np.nan)) + with pytest.raises(ValueError, match=msg): + f(ndim_5) + + # Series + def _test_seq(df, idx_ser, col_ser): + idx_eq = df.eq(idx_ser, axis=0) + col_eq = df.eq(col_ser) + idx_ne = df.ne(idx_ser, axis=0) + col_ne = df.ne(col_ser) + tm.assert_frame_equal(col_eq, df == Series(col_ser)) + tm.assert_frame_equal(col_eq, -col_ne) + tm.assert_frame_equal(idx_eq, -idx_ne) + tm.assert_frame_equal(idx_eq, df.T.eq(idx_ser).T) + tm.assert_frame_equal(col_eq, df.eq(list(col_ser))) + tm.assert_frame_equal(idx_eq, df.eq(Series(idx_ser), axis=0)) + tm.assert_frame_equal(idx_eq, df.eq(list(idx_ser), axis=0)) + + idx_gt = df.gt(idx_ser, axis=0) + col_gt = df.gt(col_ser) + idx_le = df.le(idx_ser, axis=0) + col_le = df.le(col_ser) + + tm.assert_frame_equal(col_gt, df > Series(col_ser)) + tm.assert_frame_equal(col_gt, -col_le) + tm.assert_frame_equal(idx_gt, -idx_le) + tm.assert_frame_equal(idx_gt, df.T.gt(idx_ser).T) + + idx_ge = df.ge(idx_ser, axis=0) + col_ge = df.ge(col_ser) + idx_lt = df.lt(idx_ser, axis=0) + col_lt = df.lt(col_ser) + tm.assert_frame_equal(col_ge, df >= Series(col_ser)) + tm.assert_frame_equal(col_ge, -col_lt) + tm.assert_frame_equal(idx_ge, -idx_lt) + tm.assert_frame_equal(idx_ge, df.T.ge(idx_ser).T) + + idx_ser = Series(np.random.randn(5)) + col_ser = Series(np.random.randn(3)) + _test_seq(df, idx_ser, col_ser) + + # list/tuple + _test_seq(df, idx_ser.values, col_ser.values) + + # NA + df.loc[0, 0] = np.nan + rs = df.eq(df) + assert not rs.loc[0, 0] + rs = df.ne(df) + assert rs.loc[0, 0] + rs = df.gt(df) + assert not rs.loc[0, 0] + rs = df.lt(df) + assert not rs.loc[0, 0] + rs = df.ge(df) + assert not rs.loc[0, 0] + rs = df.le(df) + assert not rs.loc[0, 0] + + def test_bool_flex_frame_complex_dtype(self): + # complex + arr = np.array([np.nan, 1, 6, np.nan]) + arr2 = np.array([2j, np.nan, 7, None]) + df = DataFrame({"a": arr}) + df2 = DataFrame({"a": arr2}) + + msg = "|".join( + [ + "'>' not supported between instances of '.*' and 'complex'", + r"unorderable types: .*complex\(\)", # PY35 + ] + ) + with pytest.raises(TypeError, match=msg): + # inequalities are not well-defined for complex numbers + df.gt(df2) + with pytest.raises(TypeError, match=msg): + # regression test that we get the same behavior for Series + df["a"].gt(df2["a"]) + with pytest.raises(TypeError, match=msg): + # Check that we match numpy behavior here + df.values > df2.values + + rs = df.ne(df2) + assert rs.values.all() + + arr3 = np.array([2j, np.nan, None]) + df3 = DataFrame({"a": arr3}) + + with pytest.raises(TypeError, match=msg): + # inequalities are not well-defined for complex numbers + df3.gt(2j) + with pytest.raises(TypeError, match=msg): + # regression test that we get the same behavior for Series + df3["a"].gt(2j) + with pytest.raises(TypeError, match=msg): + # Check that we match numpy behavior here + df3.values > 2j + + def test_bool_flex_frame_object_dtype(self): + # corner, dtype=object + df1 = DataFrame({"col": ["foo", np.nan, "bar"]}) + df2 = DataFrame({"col": ["foo", datetime.now(), "bar"]}) + result = df1.ne(df2) + exp = DataFrame({"col": [False, True, False]}) + tm.assert_frame_equal(result, exp) + + def test_flex_comparison_nat(self): + # GH 15697, GH 22163 df.eq(pd.NaT) should behave like df == pd.NaT, + # and _definitely_ not be NaN + df = DataFrame([pd.NaT]) + + result = df == pd.NaT + # result.iloc[0, 0] is a np.bool_ object + assert result.iloc[0, 0].item() is False + + result = df.eq(pd.NaT) + assert result.iloc[0, 0].item() is False + + result = df != pd.NaT + assert result.iloc[0, 0].item() is True + + result = df.ne(pd.NaT) + assert result.iloc[0, 0].item() is True + + @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) + def test_df_flex_cmp_constant_return_types(self, opname): + # GH 15077, non-empty DataFrame + df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]}) + const = 2 + + result = getattr(df, opname)(const).dtypes.value_counts() + tm.assert_series_equal(result, Series([2], index=[np.dtype(bool)])) + + @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) + def test_df_flex_cmp_constant_return_types_empty(self, opname): + # GH 15077 empty DataFrame + df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]}) + const = 2 + + empty = df.iloc[:0] + result = getattr(empty, opname)(const).dtypes.value_counts() + tm.assert_series_equal(result, Series([2], index=[np.dtype(bool)])) + + def test_df_flex_cmp_ea_dtype_with_ndarray_series(self): + ii = pd.IntervalIndex.from_breaks([1, 2, 3]) + df = DataFrame({"A": ii, "B": ii}) + + ser = Series([0, 0]) + res = df.eq(ser, axis=0) + + expected = DataFrame({"A": [False, False], "B": [False, False]}) + tm.assert_frame_equal(res, expected) + + ser2 = Series([1, 2], index=["A", "B"]) + res2 = df.eq(ser2, axis=1) + tm.assert_frame_equal(res2, expected) + + +# ------------------------------------------------------------------- +# Arithmetic + + +class TestFrameFlexArithmetic: + def test_floordiv_axis0(self): + # make sure we df.floordiv(ser, axis=0) matches column-wise result + arr = np.arange(3) + ser = Series(arr) + df = DataFrame({"A": ser, "B": ser}) + + result = df.floordiv(ser, axis=0) + + expected = DataFrame({col: df[col] // ser for col in df.columns}) + + tm.assert_frame_equal(result, expected) + + result2 = df.floordiv(ser.values, axis=0) + tm.assert_frame_equal(result2, expected) + + @pytest.mark.skipif(not NUMEXPR_INSTALLED, reason="numexpr not installed") + @pytest.mark.parametrize("opname", ["floordiv", "pow"]) + def test_floordiv_axis0_numexpr_path(self, opname): + # case that goes through numexpr and has to fall back to masked_arith_op + op = getattr(operator, opname) + + arr = np.arange(_MIN_ELEMENTS + 100).reshape(_MIN_ELEMENTS // 100 + 1, -1) * 100 + df = DataFrame(arr) + df["C"] = 1.0 + + ser = df[0] + result = getattr(df, opname)(ser, axis=0) + + expected = DataFrame({col: op(df[col], ser) for col in df.columns}) + tm.assert_frame_equal(result, expected) + + result2 = getattr(df, opname)(ser.values, axis=0) + tm.assert_frame_equal(result2, expected) + + def test_df_add_td64_columnwise(self): + # GH 22534 Check that column-wise addition broadcasts correctly + dti = pd.date_range("2016-01-01", periods=10) + tdi = pd.timedelta_range("1", periods=10) + tser = Series(tdi) + df = DataFrame({0: dti, 1: tdi}) + + result = df.add(tser, axis=0) + expected = DataFrame({0: dti + tdi, 1: tdi + tdi}) + tm.assert_frame_equal(result, expected) + + def test_df_add_flex_filled_mixed_dtypes(self): + # GH 19611 + dti = pd.date_range("2016-01-01", periods=3) + ser = Series(["1 Day", "NaT", "2 Days"], dtype="timedelta64[ns]") + df = DataFrame({"A": dti, "B": ser}) + other = DataFrame({"A": ser, "B": ser}) + fill = pd.Timedelta(days=1).to_timedelta64() + result = df.add(other, fill_value=fill) + + expected = DataFrame( + { + "A": Series( + ["2016-01-02", "2016-01-03", "2016-01-05"], dtype="datetime64[ns]" + ), + "B": ser * 2, + } + ) + tm.assert_frame_equal(result, expected) + + def test_arith_flex_frame( + self, all_arithmetic_operators, float_frame, mixed_float_frame + ): + # one instance of parametrized fixture + op = all_arithmetic_operators + + def f(x, y): + # r-versions not in operator-stdlib; get op without "r" and invert + if op.startswith("__r"): + return getattr(operator, op.replace("__r", "__"))(y, x) + return getattr(operator, op)(x, y) + + result = getattr(float_frame, op)(2 * float_frame) + expected = f(float_frame, 2 * float_frame) + tm.assert_frame_equal(result, expected) + + # vs mix float + result = getattr(mixed_float_frame, op)(2 * mixed_float_frame) + expected = f(mixed_float_frame, 2 * mixed_float_frame) + tm.assert_frame_equal(result, expected) + _check_mixed_float(result, dtype={"C": None}) + + @pytest.mark.parametrize("op", ["__add__", "__sub__", "__mul__"]) + def test_arith_flex_frame_mixed( + self, + op, + int_frame, + mixed_int_frame, + mixed_float_frame, + switch_numexpr_min_elements, + ): + f = getattr(operator, op) + + # vs mix int + result = getattr(mixed_int_frame, op)(2 + mixed_int_frame) + expected = f(mixed_int_frame, 2 + mixed_int_frame) + + # no overflow in the uint + dtype = None + if op in ["__sub__"]: + dtype = {"B": "uint64", "C": None} + elif op in ["__add__", "__mul__"]: + dtype = {"C": None} + if expr.USE_NUMEXPR and switch_numexpr_min_elements == 0: + # when using numexpr, the casting rules are slightly different: + # in the `2 + mixed_int_frame` operation, int32 column becomes + # and int64 column (not preserving dtype in operation with Python + # scalar), and then the int32/int64 combo results in int64 result + dtype["A"] = (2 + mixed_int_frame)["A"].dtype + tm.assert_frame_equal(result, expected) + _check_mixed_int(result, dtype=dtype) + + # vs mix float + result = getattr(mixed_float_frame, op)(2 * mixed_float_frame) + expected = f(mixed_float_frame, 2 * mixed_float_frame) + tm.assert_frame_equal(result, expected) + _check_mixed_float(result, dtype={"C": None}) + + # vs plain int + result = getattr(int_frame, op)(2 * int_frame) + expected = f(int_frame, 2 * int_frame) + tm.assert_frame_equal(result, expected) + + def test_arith_flex_frame_raise(self, all_arithmetic_operators, float_frame): + # one instance of parametrized fixture + op = all_arithmetic_operators + + # Check that arrays with dim >= 3 raise + for dim in range(3, 6): + arr = np.ones((1,) * dim) + msg = "Unable to coerce to Series/DataFrame" + with pytest.raises(ValueError, match=msg): + getattr(float_frame, op)(arr) + + def test_arith_flex_frame_corner(self, float_frame): + + const_add = float_frame.add(1) + tm.assert_frame_equal(const_add, float_frame + 1) + + # corner cases + result = float_frame.add(float_frame[:0]) + tm.assert_frame_equal(result, float_frame * np.nan) + + result = float_frame[:0].add(float_frame) + tm.assert_frame_equal(result, float_frame * np.nan) + + with pytest.raises(NotImplementedError, match="fill_value"): + float_frame.add(float_frame.iloc[0], fill_value=3) + + with pytest.raises(NotImplementedError, match="fill_value"): + float_frame.add(float_frame.iloc[0], axis="index", fill_value=3) + + def test_arith_flex_series(self, simple_frame): + df = simple_frame + + row = df.xs("a") + col = df["two"] + # after arithmetic refactor, add truediv here + ops = ["add", "sub", "mul", "mod"] + for op in ops: + f = getattr(df, op) + op = getattr(operator, op) + tm.assert_frame_equal(f(row), op(df, row)) + tm.assert_frame_equal(f(col, axis=0), op(df.T, col).T) + + # special case for some reason + tm.assert_frame_equal(df.add(row, axis=None), df + row) + + # cases which will be refactored after big arithmetic refactor + tm.assert_frame_equal(df.div(row), df / row) + tm.assert_frame_equal(df.div(col, axis=0), (df.T / col).T) + + # broadcasting issue in GH 7325 + df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype="int64") + expected = DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) + result = df.div(df[0], axis="index") + tm.assert_frame_equal(result, expected) + + df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype="float64") + expected = DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) + result = df.div(df[0], axis="index") + tm.assert_frame_equal(result, expected) + + def test_arith_flex_zero_len_raises(self): + # GH 19522 passing fill_value to frame flex arith methods should + # raise even in the zero-length special cases + ser_len0 = Series([], dtype=object) + df_len0 = DataFrame(columns=["A", "B"]) + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + + with pytest.raises(NotImplementedError, match="fill_value"): + df.add(ser_len0, fill_value="E") + + with pytest.raises(NotImplementedError, match="fill_value"): + df_len0.sub(df["A"], axis=None, fill_value=3) + + def test_flex_add_scalar_fill_value(self): + # GH#12723 + dat = np.array([0, 1, np.nan, 3, 4, 5], dtype="float") + df = DataFrame({"foo": dat}, index=range(6)) + + exp = df.fillna(0).add(2) + res = df.add(2, fill_value=0) + tm.assert_frame_equal(res, exp) + + def test_sub_alignment_with_duplicate_index(self): + # GH#5185 dup aligning operations should work + df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3]) + df2 = DataFrame([1, 2, 3], index=[1, 2, 3]) + expected = DataFrame([0, 2, 0, 2, 2], index=[1, 1, 2, 2, 3]) + result = df1.sub(df2) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("op", ["__add__", "__mul__", "__sub__", "__truediv__"]) + def test_arithmetic_with_duplicate_columns(self, op): + # operations + df = DataFrame({"A": np.arange(10), "B": np.random.rand(10)}) + expected = getattr(df, op)(df) + expected.columns = ["A", "A"] + df.columns = ["A", "A"] + result = getattr(df, op)(df) + tm.assert_frame_equal(result, expected) + str(result) + result.dtypes + + @pytest.mark.parametrize("level", [0, None]) + def test_broadcast_multiindex(self, level): + # GH34388 + df1 = DataFrame({"A": [0, 1, 2], "B": [1, 2, 3]}) + df1.columns = df1.columns.set_names("L1") + + df2 = DataFrame({("A", "C"): [0, 0, 0], ("A", "D"): [0, 0, 0]}) + df2.columns = df2.columns.set_names(["L1", "L2"]) + + result = df1.add(df2, level=level) + expected = DataFrame({("A", "C"): [0, 1, 2], ("A", "D"): [0, 1, 2]}) + expected.columns = expected.columns.set_names(["L1", "L2"]) + + tm.assert_frame_equal(result, expected) + + +class TestFrameArithmetic: + def test_td64_op_nat_casting(self): + # Make sure we don't accidentally treat timedelta64(NaT) as datetime64 + # when calling dispatch_to_series in DataFrame arithmetic + ser = Series(["NaT", "NaT"], dtype="timedelta64[ns]") + df = DataFrame([[1, 2], [3, 4]]) + + result = df * ser + expected = DataFrame({0: ser, 1: ser}) + tm.assert_frame_equal(result, expected) + + def test_df_add_2d_array_rowlike_broadcasts(self): + # GH#23000 + arr = np.arange(6).reshape(3, 2) + df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + + rowlike = arr[[1], :] # shape --> (1, ncols) + assert rowlike.shape == (1, df.shape[1]) + + expected = DataFrame( + [[2, 4], [4, 6], [6, 8]], + columns=df.columns, + index=df.index, + # specify dtype explicitly to avoid failing + # on 32bit builds + dtype=arr.dtype, + ) + result = df + rowlike + tm.assert_frame_equal(result, expected) + result = rowlike + df + tm.assert_frame_equal(result, expected) + + def test_df_add_2d_array_collike_broadcasts(self): + # GH#23000 + arr = np.arange(6).reshape(3, 2) + df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + + collike = arr[:, [1]] # shape --> (nrows, 1) + assert collike.shape == (df.shape[0], 1) + + expected = DataFrame( + [[1, 2], [5, 6], [9, 10]], + columns=df.columns, + index=df.index, + # specify dtype explicitly to avoid failing + # on 32bit builds + dtype=arr.dtype, + ) + result = df + collike + tm.assert_frame_equal(result, expected) + result = collike + df + tm.assert_frame_equal(result, expected) + + def test_df_arith_2d_array_rowlike_broadcasts( + self, request, all_arithmetic_operators, using_array_manager + ): + # GH#23000 + opname = all_arithmetic_operators + + if using_array_manager and opname in ("__rmod__", "__rfloordiv__"): + # TODO(ArrayManager) decide on dtypes + td.mark_array_manager_not_yet_implemented(request) + + arr = np.arange(6).reshape(3, 2) + df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + + rowlike = arr[[1], :] # shape --> (1, ncols) + assert rowlike.shape == (1, df.shape[1]) + + exvals = [ + getattr(df.loc["A"], opname)(rowlike.squeeze()), + getattr(df.loc["B"], opname)(rowlike.squeeze()), + getattr(df.loc["C"], opname)(rowlike.squeeze()), + ] + + expected = DataFrame(exvals, columns=df.columns, index=df.index) + + result = getattr(df, opname)(rowlike) + tm.assert_frame_equal(result, expected) + + def test_df_arith_2d_array_collike_broadcasts( + self, request, all_arithmetic_operators, using_array_manager + ): + # GH#23000 + opname = all_arithmetic_operators + + if using_array_manager and opname in ("__rmod__", "__rfloordiv__"): + # TODO(ArrayManager) decide on dtypes + td.mark_array_manager_not_yet_implemented(request) + + arr = np.arange(6).reshape(3, 2) + df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + + collike = arr[:, [1]] # shape --> (nrows, 1) + assert collike.shape == (df.shape[0], 1) + + exvals = { + True: getattr(df[True], opname)(collike.squeeze()), + False: getattr(df[False], opname)(collike.squeeze()), + } + + dtype = None + if opname in ["__rmod__", "__rfloordiv__"]: + # Series ops may return mixed int/float dtypes in cases where + # DataFrame op will return all-float. So we upcast `expected` + dtype = np.common_type(*(x.values for x in exvals.values())) + + expected = DataFrame(exvals, columns=df.columns, index=df.index, dtype=dtype) + + result = getattr(df, opname)(collike) + tm.assert_frame_equal(result, expected) + + def test_df_bool_mul_int(self): + # GH 22047, GH 22163 multiplication by 1 should result in int dtype, + # not object dtype + df = DataFrame([[False, True], [False, False]]) + result = df * 1 + + # On appveyor this comes back as np.int32 instead of np.int64, + # so we check dtype.kind instead of just dtype + kinds = result.dtypes.apply(lambda x: x.kind) + assert (kinds == "i").all() + + result = 1 * df + kinds = result.dtypes.apply(lambda x: x.kind) + assert (kinds == "i").all() + + def test_arith_mixed(self): + + left = DataFrame({"A": ["a", "b", "c"], "B": [1, 2, 3]}) + + result = left + left + expected = DataFrame({"A": ["aa", "bb", "cc"], "B": [2, 4, 6]}) + tm.assert_frame_equal(result, expected) + + def test_arith_getitem_commute(self): + df = DataFrame({"A": [1.1, 3.3], "B": [2.5, -3.9]}) + + def _test_op(df, op): + result = op(df, 1) + + if not df.columns.is_unique: + raise ValueError("Only unique columns supported by this test") + + for col in result.columns: + tm.assert_series_equal(result[col], op(df[col], 1)) + + _test_op(df, operator.add) + _test_op(df, operator.sub) + _test_op(df, operator.mul) + _test_op(df, operator.truediv) + _test_op(df, operator.floordiv) + _test_op(df, operator.pow) + + _test_op(df, lambda x, y: y + x) + _test_op(df, lambda x, y: y - x) + _test_op(df, lambda x, y: y * x) + _test_op(df, lambda x, y: y / x) + _test_op(df, lambda x, y: y ** x) + + _test_op(df, lambda x, y: x + y) + _test_op(df, lambda x, y: x - y) + _test_op(df, lambda x, y: x * y) + _test_op(df, lambda x, y: x / y) + _test_op(df, lambda x, y: x ** y) + + @pytest.mark.parametrize( + "values", [[1, 2], (1, 2), np.array([1, 2]), range(1, 3), deque([1, 2])] + ) + def test_arith_alignment_non_pandas_object(self, values): + # GH#17901 + df = DataFrame({"A": [1, 1], "B": [1, 1]}) + expected = DataFrame({"A": [2, 2], "B": [3, 3]}) + result = df + values + tm.assert_frame_equal(result, expected) + + def test_arith_non_pandas_object(self): + df = DataFrame( + np.arange(1, 10, dtype="f8").reshape(3, 3), + columns=["one", "two", "three"], + index=["a", "b", "c"], + ) + + val1 = df.xs("a").values + added = DataFrame(df.values + val1, index=df.index, columns=df.columns) + tm.assert_frame_equal(df + val1, added) + + added = DataFrame((df.values.T + val1).T, index=df.index, columns=df.columns) + tm.assert_frame_equal(df.add(val1, axis=0), added) + + val2 = list(df["two"]) + + added = DataFrame(df.values + val2, index=df.index, columns=df.columns) + tm.assert_frame_equal(df + val2, added) + + added = DataFrame((df.values.T + val2).T, index=df.index, columns=df.columns) + tm.assert_frame_equal(df.add(val2, axis="index"), added) + + val3 = np.random.rand(*df.shape) + added = DataFrame(df.values + val3, index=df.index, columns=df.columns) + tm.assert_frame_equal(df.add(val3), added) + + def test_operations_with_interval_categories_index(self, all_arithmetic_operators): + # GH#27415 + op = all_arithmetic_operators + ind = pd.CategoricalIndex(pd.interval_range(start=0.0, end=2.0)) + data = [1, 2] + df = DataFrame([data], columns=ind) + num = 10 + result = getattr(df, op)(num) + expected = DataFrame([[getattr(n, op)(num) for n in data]], columns=ind) + tm.assert_frame_equal(result, expected) + + def test_frame_with_frame_reindex(self): + # GH#31623 + df = DataFrame( + { + "foo": [pd.Timestamp("2019"), pd.Timestamp("2020")], + "bar": [pd.Timestamp("2018"), pd.Timestamp("2021")], + }, + columns=["foo", "bar"], + ) + df2 = df[["foo"]] + + result = df - df2 + + expected = DataFrame( + {"foo": [pd.Timedelta(0), pd.Timedelta(0)], "bar": [np.nan, np.nan]}, + columns=["bar", "foo"], + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "value, dtype", + [ + (1, "i8"), + (1.0, "f8"), + (2 ** 63, "f8"), + (1j, "complex128"), + (2 ** 63, "complex128"), + (True, "bool"), + (np.timedelta64(20, "ns"), " b + tm.assert_frame_equal(result, expected) + + result = df.values > b + tm.assert_numpy_array_equal(result, expected.values) + + msg1d = "Unable to coerce to Series, length must be 2: given 3" + msg2d = "Unable to coerce to DataFrame, shape must be" + msg2db = "operands could not be broadcast together with shapes" + with pytest.raises(ValueError, match=msg1d): + # wrong shape + df > lst + + with pytest.raises(ValueError, match=msg1d): + # wrong shape + result = df > tup + + # broadcasts like ndarray (GH#23000) + result = df > b_r + tm.assert_frame_equal(result, expected) + + result = df.values > b_r + tm.assert_numpy_array_equal(result, expected.values) + + with pytest.raises(ValueError, match=msg2d): + df > b_c + + with pytest.raises(ValueError, match=msg2db): + df.values > b_c + + # == + expected = DataFrame([[False, False], [True, False], [False, False]]) + result = df == b + tm.assert_frame_equal(result, expected) + + with pytest.raises(ValueError, match=msg1d): + result = df == lst + + with pytest.raises(ValueError, match=msg1d): + result = df == tup + + # broadcasts like ndarray (GH#23000) + result = df == b_r + tm.assert_frame_equal(result, expected) + + result = df.values == b_r + tm.assert_numpy_array_equal(result, expected.values) + + with pytest.raises(ValueError, match=msg2d): + df == b_c + + assert df.values.shape != b_c.shape + + # with alignment + df = DataFrame( + np.arange(6).reshape((3, 2)), columns=list("AB"), index=list("abc") + ) + expected.index = df.index + expected.columns = df.columns + + with pytest.raises(ValueError, match=msg1d): + result = df == lst + + with pytest.raises(ValueError, match=msg1d): + result = df == tup + + def test_inplace_ops_alignment(self): + + # inplace ops / ops alignment + # GH 8511 + + columns = list("abcdefg") + X_orig = DataFrame( + np.arange(10 * len(columns)).reshape(-1, len(columns)), + columns=columns, + index=range(10), + ) + Z = 100 * X_orig.iloc[:, 1:-1].copy() + block1 = list("bedcf") + subs = list("bcdef") + + # add + X = X_orig.copy() + result1 = (X[block1] + Z).reindex(columns=subs) + + X[block1] += Z + result2 = X.reindex(columns=subs) + + X = X_orig.copy() + result3 = (X[block1] + Z[block1]).reindex(columns=subs) + + X[block1] += Z[block1] + result4 = X.reindex(columns=subs) + + tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result3) + tm.assert_frame_equal(result1, result4) + + # sub + X = X_orig.copy() + result1 = (X[block1] - Z).reindex(columns=subs) + + X[block1] -= Z + result2 = X.reindex(columns=subs) + + X = X_orig.copy() + result3 = (X[block1] - Z[block1]).reindex(columns=subs) + + X[block1] -= Z[block1] + result4 = X.reindex(columns=subs) + + tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result3) + tm.assert_frame_equal(result1, result4) + + def test_inplace_ops_identity(self): + + # GH 5104 + # make sure that we are actually changing the object + s_orig = Series([1, 2, 3]) + df_orig = DataFrame(np.random.randint(0, 5, size=10).reshape(-1, 5)) + + # no dtype change + s = s_orig.copy() + s2 = s + s += 1 + tm.assert_series_equal(s, s2) + tm.assert_series_equal(s_orig + 1, s) + assert s is s2 + assert s._mgr is s2._mgr + + df = df_orig.copy() + df2 = df + df += 1 + tm.assert_frame_equal(df, df2) + tm.assert_frame_equal(df_orig + 1, df) + assert df is df2 + assert df._mgr is df2._mgr + + # dtype change + s = s_orig.copy() + s2 = s + s += 1.5 + tm.assert_series_equal(s, s2) + tm.assert_series_equal(s_orig + 1.5, s) + + df = df_orig.copy() + df2 = df + df += 1.5 + tm.assert_frame_equal(df, df2) + tm.assert_frame_equal(df_orig + 1.5, df) + assert df is df2 + assert df._mgr is df2._mgr + + # mixed dtype + arr = np.random.randint(0, 10, size=5) + df_orig = DataFrame({"A": arr.copy(), "B": "foo"}) + df = df_orig.copy() + df2 = df + df["A"] += 1 + expected = DataFrame({"A": arr.copy() + 1, "B": "foo"}) + tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df2, expected) + assert df._mgr is df2._mgr + + df = df_orig.copy() + df2 = df + df["A"] += 1.5 + expected = DataFrame({"A": arr.copy() + 1.5, "B": "foo"}) + tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df2, expected) + assert df._mgr is df2._mgr + + @pytest.mark.parametrize( + "op", + [ + "add", + "and", + "div", + "floordiv", + "mod", + "mul", + "or", + "pow", + "sub", + "truediv", + "xor", + ], + ) + def test_inplace_ops_identity2(self, op): + + if op == "div": + return + + df = DataFrame({"a": [1.0, 2.0, 3.0], "b": [1, 2, 3]}) + + operand = 2 + if op in ("and", "or", "xor"): + # cannot use floats for boolean ops + df["a"] = [True, False, True] + + df_copy = df.copy() + iop = f"__i{op}__" + op = f"__{op}__" + + # no id change and value is correct + getattr(df, iop)(operand) + expected = getattr(df_copy, op)(operand) + tm.assert_frame_equal(df, expected) + expected = id(df) + assert id(df) == expected + + def test_alignment_non_pandas(self): + index = ["A", "B", "C"] + columns = ["X", "Y", "Z"] + df = DataFrame(np.random.randn(3, 3), index=index, columns=columns) + + align = pd.core.ops.align_method_FRAME + for val in [ + [1, 2, 3], + (1, 2, 3), + np.array([1, 2, 3], dtype=np.int64), + range(1, 4), + ]: + + expected = DataFrame({"X": val, "Y": val, "Z": val}, index=df.index) + tm.assert_frame_equal(align(df, val, "index")[1], expected) + + expected = DataFrame( + {"X": [1, 1, 1], "Y": [2, 2, 2], "Z": [3, 3, 3]}, index=df.index + ) + tm.assert_frame_equal(align(df, val, "columns")[1], expected) + + # length mismatch + msg = "Unable to coerce to Series, length must be 3: given 2" + for val in [[1, 2], (1, 2), np.array([1, 2]), range(1, 3)]: + + with pytest.raises(ValueError, match=msg): + align(df, val, "index") + + with pytest.raises(ValueError, match=msg): + align(df, val, "columns") + + val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + tm.assert_frame_equal( + align(df, val, "index")[1], + DataFrame(val, index=df.index, columns=df.columns), + ) + tm.assert_frame_equal( + align(df, val, "columns")[1], + DataFrame(val, index=df.index, columns=df.columns), + ) + + # shape mismatch + msg = "Unable to coerce to DataFrame, shape must be" + val = np.array([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(ValueError, match=msg): + align(df, val, "index") + + with pytest.raises(ValueError, match=msg): + align(df, val, "columns") + + val = np.zeros((3, 3, 3)) + msg = re.escape( + "Unable to coerce to Series/DataFrame, dimension must be <= 2: (3, 3, 3)" + ) + with pytest.raises(ValueError, match=msg): + align(df, val, "index") + with pytest.raises(ValueError, match=msg): + align(df, val, "columns") + + def test_no_warning(self, all_arithmetic_operators): + df = DataFrame({"A": [0.0, 0.0], "B": [0.0, None]}) + b = df["B"] + with tm.assert_produces_warning(None): + getattr(df, all_arithmetic_operators)(b) + + def test_dunder_methods_binary(self, all_arithmetic_operators): + # GH#??? frame.__foo__ should only accept one argument + df = DataFrame({"A": [0.0, 0.0], "B": [0.0, None]}) + b = df["B"] + with pytest.raises(TypeError, match="takes 2 positional arguments"): + getattr(df, all_arithmetic_operators)(b, 0) + + def test_align_int_fill_bug(self): + # GH#910 + X = np.arange(10 * 10, dtype="float64").reshape(10, 10) + Y = np.ones((10, 1), dtype=int) + + df1 = DataFrame(X) + df1["0.X"] = Y.squeeze() + + df2 = df1.astype(float) + + result = df1 - df1.mean() + expected = df2 - df2.mean() + tm.assert_frame_equal(result, expected) + + +def test_pow_with_realignment(): + # GH#32685 pow has special semantics for operating with null values + left = DataFrame({"A": [0, 1, 2]}) + right = DataFrame(index=[0, 1, 2]) + + result = left ** right + expected = DataFrame({"A": [np.nan, 1.0, np.nan]}) + tm.assert_frame_equal(result, expected) + + +# TODO: move to tests.arithmetic and parametrize +def test_pow_nan_with_zero(): + left = DataFrame({"A": [np.nan, np.nan, np.nan]}) + right = DataFrame({"A": [0, 0, 0]}) + + expected = DataFrame({"A": [1.0, 1.0, 1.0]}) + + result = left ** right + tm.assert_frame_equal(result, expected) + + result = left["A"] ** right["A"] + tm.assert_series_equal(result, expected["A"]) + + +def test_dataframe_series_extension_dtypes(): + # https://github.com/pandas-dev/pandas/issues/34311 + df = DataFrame(np.random.randint(0, 100, (10, 3)), columns=["a", "b", "c"]) + ser = Series([1, 2, 3], index=["a", "b", "c"]) + + expected = df.to_numpy("int64") + ser.to_numpy("int64").reshape(-1, 3) + expected = DataFrame(expected, columns=df.columns, dtype="Int64") + + df_ea = df.astype("Int64") + result = df_ea + ser + tm.assert_frame_equal(result, expected) + result = df_ea + ser.astype("Int64") + tm.assert_frame_equal(result, expected) + + +def test_dataframe_blockwise_slicelike(): + # GH#34367 + arr = np.random.randint(0, 1000, (100, 10)) + df1 = DataFrame(arr) + df2 = df1.copy() + df2.iloc[0, [1, 3, 7]] = np.nan + + df3 = df1.copy() + df3.iloc[0, [5]] = np.nan + + df4 = df1.copy() + df4.iloc[0, np.arange(2, 5)] = np.nan + df5 = df1.copy() + df5.iloc[0, np.arange(4, 7)] = np.nan + + for left, right in [(df1, df2), (df2, df3), (df4, df5)]: + res = left + right + + expected = DataFrame({i: left[i] + right[i] for i in left.columns}) + tm.assert_frame_equal(res, expected) + + +@pytest.mark.parametrize( + "df, col_dtype", + [ + (DataFrame([[1.0, 2.0], [4.0, 5.0]], columns=list("ab")), "float64"), + (DataFrame([[1.0, "b"], [4.0, "b"]], columns=list("ab")), "object"), + ], +) +def test_dataframe_operation_with_non_numeric_types(df, col_dtype): + # GH #22663 + expected = DataFrame([[0.0, np.nan], [3.0, np.nan]], columns=list("ab")) + expected = expected.astype({"b": col_dtype}) + result = df + Series([-1.0], index=list("a")) + tm.assert_frame_equal(result, expected) + + +def test_arith_reindex_with_duplicates(): + # https://github.com/pandas-dev/pandas/issues/35194 + df1 = DataFrame(data=[[0]], columns=["second"]) + df2 = DataFrame(data=[[0, 0, 0]], columns=["first", "second", "second"]) + result = df1 + df2 + expected = DataFrame([[np.nan, 0, 0]], columns=["first", "second", "second"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("to_add", [[Series([1, 1])], [Series([1, 1]), Series([1, 1])]]) +def test_arith_list_of_arraylike_raise(to_add): + # GH 36702. Raise when trying to add list of array-like to DataFrame + df = DataFrame({"x": [1, 2], "y": [1, 2]}) + + msg = f"Unable to coerce list of {type(to_add[0])} to Series/DataFrame" + with pytest.raises(ValueError, match=msg): + df + to_add + with pytest.raises(ValueError, match=msg): + to_add + df + + +def test_inplace_arithmetic_series_update(): + # https://github.com/pandas-dev/pandas/issues/36373 + df = DataFrame({"A": [1, 2, 3]}) + series = df["A"] + vals = series._values + + series += 1 + assert series._values is vals + + expected = DataFrame({"A": [2, 3, 4]}) + tm.assert_frame_equal(df, expected) + + +def test_arithemetic_multiindex_align(): + """ + Regression test for: https://github.com/pandas-dev/pandas/issues/33765 + """ + df1 = DataFrame( + [[1]], + index=["a"], + columns=MultiIndex.from_product([[0], [1]], names=["a", "b"]), + ) + df2 = DataFrame([[1]], index=["a"], columns=Index([0], name="a")) + expected = DataFrame( + [[0]], + index=["a"], + columns=MultiIndex.from_product([[0], [1]], names=["a", "b"]), + ) + result = df1 - df2 + tm.assert_frame_equal(result, expected) + + +def test_bool_frame_mult_float(): + # GH 18549 + df = DataFrame(True, list("ab"), list("cd")) + result = df * 1.0 + expected = DataFrame(np.ones((2, 2)), list("ab"), list("cd")) + tm.assert_frame_equal(result, expected) + + +def test_frame_op_subclass_nonclass_constructor(): + # GH#43201 subclass._constructor is a function, not the subclass itself + + class SubclassedSeries(Series): + @property + def _constructor(self): + return SubclassedSeries + + @property + def _constructor_expanddim(self): + return SubclassedDataFrame + + class SubclassedDataFrame(DataFrame): + _metadata = ["my_extra_data"] + + def __init__(self, my_extra_data, *args, **kwargs): + self.my_extra_data = my_extra_data + super().__init__(*args, **kwargs) + + @property + def _constructor(self): + return functools.partial(type(self), self.my_extra_data) + + @property + def _constructor_sliced(self): + return SubclassedSeries + + sdf = SubclassedDataFrame("some_data", {"A": [1, 2, 3], "B": [4, 5, 6]}) + result = sdf * 2 + expected = SubclassedDataFrame("some_data", {"A": [2, 4, 6], "B": [8, 10, 12]}) + tm.assert_frame_equal(result, expected) + + result = sdf + sdf + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_block_internals.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_block_internals.py new file mode 100644 index 0000000..01a8982 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_block_internals.py @@ -0,0 +1,433 @@ +from datetime import ( + datetime, + timedelta, +) +import itertools + +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Series, + Timestamp, + compat, + date_range, + option_context, +) +import pandas._testing as tm +from pandas.core.internals import ( + NumericBlock, + ObjectBlock, +) + +# Segregated collection of methods that require the BlockManager internal data +# structure + + +# TODO(ArrayManager) check which of those tests need to be rewritten to test the +# equivalent for ArrayManager +pytestmark = td.skip_array_manager_invalid_test + + +class TestDataFrameBlockInternals: + def test_setitem_invalidates_datetime_index_freq(self): + # GH#24096 altering a datetime64tz column inplace invalidates the + # `freq` attribute on the underlying DatetimeIndex + + dti = date_range("20130101", periods=3, tz="US/Eastern") + ts = dti[1] + + df = DataFrame({"B": dti}) + assert df["B"]._values.freq is None + + df.iloc[1, 0] = pd.NaT + assert df["B"]._values.freq is None + + # check that the DatetimeIndex was not altered in place + assert dti.freq == "D" + assert dti[1] == ts + + def test_cast_internals(self, float_frame): + casted = DataFrame(float_frame._mgr, dtype=int) + expected = DataFrame(float_frame._series, dtype=int) + tm.assert_frame_equal(casted, expected) + + casted = DataFrame(float_frame._mgr, dtype=np.int32) + expected = DataFrame(float_frame._series, dtype=np.int32) + tm.assert_frame_equal(casted, expected) + + def test_consolidate(self, float_frame): + float_frame["E"] = 7.0 + consolidated = float_frame._consolidate() + assert len(consolidated._mgr.blocks) == 1 + + # Ensure copy, do I want this? + recons = consolidated._consolidate() + assert recons is not consolidated + tm.assert_frame_equal(recons, consolidated) + + float_frame["F"] = 8.0 + assert len(float_frame._mgr.blocks) == 3 + + return_value = float_frame._consolidate_inplace() + assert return_value is None + assert len(float_frame._mgr.blocks) == 1 + + def test_consolidate_inplace(self, float_frame): + frame = float_frame.copy() # noqa + + # triggers in-place consolidation + for letter in range(ord("A"), ord("Z")): + float_frame[chr(letter)] = chr(letter) + + def test_values_consolidate(self, float_frame): + float_frame["E"] = 7.0 + assert not float_frame._mgr.is_consolidated() + _ = float_frame.values + assert float_frame._mgr.is_consolidated() + + def test_modify_values(self, float_frame): + float_frame.values[5] = 5 + assert (float_frame.values[5] == 5).all() + + # unconsolidated + float_frame["E"] = 7.0 + col = float_frame["E"] + float_frame.values[6] = 6 + assert (float_frame.values[6] == 6).all() + + # check that item_cache was cleared + assert float_frame["E"] is not col + assert (col == 7).all() + + def test_boolean_set_uncons(self, float_frame): + float_frame["E"] = 7.0 + + expected = float_frame.values.copy() + expected[expected > 1] = 2 + + float_frame[float_frame > 1] = 2 + tm.assert_almost_equal(expected, float_frame.values) + + def test_constructor_with_convert(self): + # this is actually mostly a test of lib.maybe_convert_objects + # #2845 + df = DataFrame({"A": [2 ** 63 - 1]}) + result = df["A"] + expected = Series(np.asarray([2 ** 63 - 1], np.int64), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [2 ** 63]}) + result = df["A"] + expected = Series(np.asarray([2 ** 63], np.uint64), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [datetime(2005, 1, 1), True]}) + result = df["A"] + expected = Series( + np.asarray([datetime(2005, 1, 1), True], np.object_), name="A" + ) + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [None, 1]}) + result = df["A"] + expected = Series(np.asarray([np.nan, 1], np.float_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0, 2]}) + result = df["A"] + expected = Series(np.asarray([1.0, 2], np.float_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0 + 2.0j, 3]}) + result = df["A"] + expected = Series(np.asarray([1.0 + 2.0j, 3], np.complex_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0 + 2.0j, 3.0]}) + result = df["A"] + expected = Series(np.asarray([1.0 + 2.0j, 3.0], np.complex_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0 + 2.0j, True]}) + result = df["A"] + expected = Series(np.asarray([1.0 + 2.0j, True], np.object_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0, None]}) + result = df["A"] + expected = Series(np.asarray([1.0, np.nan], np.float_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0 + 2.0j, None]}) + result = df["A"] + expected = Series(np.asarray([1.0 + 2.0j, np.nan], np.complex_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [2.0, 1, True, None]}) + result = df["A"] + expected = Series(np.asarray([2.0, 1, True, None], np.object_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [2.0, 1, datetime(2006, 1, 1), None]}) + result = df["A"] + expected = Series( + np.asarray([2.0, 1, datetime(2006, 1, 1), None], np.object_), name="A" + ) + tm.assert_series_equal(result, expected) + + def test_construction_with_mixed(self, float_string_frame): + # test construction edge cases with mixed types + + # f7u12, this does not work without extensive workaround + data = [ + [datetime(2001, 1, 5), np.nan, datetime(2001, 1, 2)], + [datetime(2000, 1, 2), datetime(2000, 1, 3), datetime(2000, 1, 1)], + ] + df = DataFrame(data) + + # check dtypes + result = df.dtypes + expected = Series({"datetime64[ns]": 3}) + + # mixed-type frames + float_string_frame["datetime"] = datetime.now() + float_string_frame["timedelta"] = timedelta(days=1, seconds=1) + assert float_string_frame["datetime"].dtype == "M8[ns]" + assert float_string_frame["timedelta"].dtype == "m8[ns]" + result = float_string_frame.dtypes + expected = Series( + [np.dtype("float64")] * 4 + + [ + np.dtype("object"), + np.dtype("datetime64[ns]"), + np.dtype("timedelta64[ns]"), + ], + index=list("ABCD") + ["foo", "datetime", "timedelta"], + ) + tm.assert_series_equal(result, expected) + + def test_construction_with_conversions(self): + + # convert from a numpy array of non-ns timedelta64 + arr = np.array([1, 2, 3], dtype="timedelta64[s]") + df = DataFrame(index=range(3)) + df["A"] = arr + expected = DataFrame( + {"A": pd.timedelta_range("00:00:01", periods=3, freq="s")}, index=range(3) + ) + tm.assert_frame_equal(df, expected) + + expected = DataFrame( + { + "dt1": Timestamp("20130101"), + "dt2": date_range("20130101", periods=3), + # 'dt3' : date_range('20130101 00:00:01',periods=3,freq='s'), + }, + index=range(3), + ) + + df = DataFrame(index=range(3)) + df["dt1"] = np.datetime64("2013-01-01") + df["dt2"] = np.array( + ["2013-01-01", "2013-01-02", "2013-01-03"], dtype="datetime64[D]" + ) + + # df['dt3'] = np.array(['2013-01-01 00:00:01','2013-01-01 + # 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]') + + tm.assert_frame_equal(df, expected) + + def test_constructor_compound_dtypes(self): + # GH 5191 + # compound dtypes should raise not-implementederror + + def f(dtype): + data = list(itertools.repeat((datetime(2001, 1, 1), "aa", 20), 9)) + return DataFrame(data=data, columns=["A", "B", "C"], dtype=dtype) + + msg = "compound dtypes are not implemented in the DataFrame constructor" + with pytest.raises(NotImplementedError, match=msg): + f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")]) + + # these work (though results may be unexpected) + depr_msg = "either all columns will be cast to that dtype, or a TypeError will" + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + f("int64") + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + f("float64") + + # 10822 + # invalid error message on dt inference + if not compat.is_platform_windows(): + f("M8[ns]") + + def test_pickle(self, float_string_frame, timezone_frame): + empty_frame = DataFrame() + + unpickled = tm.round_trip_pickle(float_string_frame) + tm.assert_frame_equal(float_string_frame, unpickled) + + # buglet + float_string_frame._mgr.ndim + + # empty + unpickled = tm.round_trip_pickle(empty_frame) + repr(unpickled) + + # tz frame + unpickled = tm.round_trip_pickle(timezone_frame) + tm.assert_frame_equal(timezone_frame, unpickled) + + def test_consolidate_datetime64(self): + # numpy vstack bug + + df = DataFrame( + { + "starting": pd.to_datetime( + [ + "2012-06-21 00:00", + "2012-06-23 07:00", + "2012-06-23 16:30", + "2012-06-25 08:00", + "2012-06-26 12:00", + ] + ), + "ending": pd.to_datetime( + [ + "2012-06-23 07:00", + "2012-06-23 16:30", + "2012-06-25 08:00", + "2012-06-26 12:00", + "2012-06-27 08:00", + ] + ), + "measure": [77, 65, 77, 0, 77], + } + ) + + ser_starting = df.starting + ser_starting.index = ser_starting.values + ser_starting = ser_starting.tz_localize("US/Eastern") + ser_starting = ser_starting.tz_convert("UTC") + ser_starting.index.name = "starting" + + ser_ending = df.ending + ser_ending.index = ser_ending.values + ser_ending = ser_ending.tz_localize("US/Eastern") + ser_ending = ser_ending.tz_convert("UTC") + ser_ending.index.name = "ending" + + df.starting = ser_starting.index + df.ending = ser_ending.index + + tm.assert_index_equal(pd.DatetimeIndex(df.starting), ser_starting.index) + tm.assert_index_equal(pd.DatetimeIndex(df.ending), ser_ending.index) + + def test_is_mixed_type(self, float_frame, float_string_frame): + assert not float_frame._is_mixed_type + assert float_string_frame._is_mixed_type + + def test_stale_cached_series_bug_473(self): + + # this is chained, but ok + with option_context("chained_assignment", None): + Y = DataFrame( + np.random.random((4, 4)), + index=("a", "b", "c", "d"), + columns=("e", "f", "g", "h"), + ) + repr(Y) + Y["e"] = Y["e"].astype("object") + Y["g"]["c"] = np.NaN + repr(Y) + result = Y.sum() # noqa + exp = Y["g"].sum() # noqa + assert pd.isna(Y["g"]["c"]) + + def test_strange_column_corruption_issue(self): + # TODO(wesm): Unclear how exactly this is related to internal matters + df = DataFrame(index=[0, 1]) + df[0] = np.nan + wasCol = {} + + with tm.assert_produces_warning(PerformanceWarning): + for i, dt in enumerate(df.index): + for col in range(100, 200): + if col not in wasCol: + wasCol[col] = 1 + df[col] = np.nan + df[col][dt] = i + + myid = 100 + + first = len(df.loc[pd.isna(df[myid]), [myid]]) + second = len(df.loc[pd.isna(df[myid]), [myid]]) + assert first == second == 0 + + def test_constructor_no_pandas_array(self): + # Ensure that PandasArray isn't allowed inside Series + # See https://github.com/pandas-dev/pandas/issues/23995 for more. + arr = Series([1, 2, 3]).array + result = DataFrame({"A": arr}) + expected = DataFrame({"A": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + assert isinstance(result._mgr.blocks[0], NumericBlock) + + def test_add_column_with_pandas_array(self): + # GH 26390 + df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) + df["c"] = pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)) + df2 = DataFrame( + { + "a": [1, 2, 3, 4], + "b": ["a", "b", "c", "d"], + "c": pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)), + } + ) + assert type(df["c"]._mgr.blocks[0]) == ObjectBlock + assert type(df2["c"]._mgr.blocks[0]) == ObjectBlock + tm.assert_frame_equal(df, df2) + + +def test_update_inplace_sets_valid_block_values(): + # https://github.com/pandas-dev/pandas/issues/33457 + df = DataFrame({"a": Series([1, 2, None], dtype="category")}) + + # inplace update of a single column + df["a"].fillna(1, inplace=True) + + # check we haven't put a Series into any block.values + assert isinstance(df._mgr.blocks[0].values, Categorical) + + # smoketest for OP bug from GH#35731 + assert df.isnull().sum().sum() == 0 + + +def test_nonconsolidated_item_cache_take(): + # https://github.com/pandas-dev/pandas/issues/35521 + + # create non-consolidated dataframe with object dtype columns + df = DataFrame() + df["col1"] = Series(["a"], dtype=object) + df["col2"] = Series([0], dtype=object) + + # access column (item cache) + df["col1"] == "A" + # take operation + # (regression was that this consolidated but didn't reset item cache, + # resulting in an invalid cache and the .at operation not working properly) + df[df["col2"] == 0] + + # now setting value should update actual dataframe + df.at[0, "col1"] = "A" + + expected = DataFrame({"col1": ["A"], "col2": [0]}, dtype=object) + tm.assert_frame_equal(df, expected) + assert df.at[0, "col1"] == "A" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_constructors.py new file mode 100644 index 0000000..08027d5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_constructors.py @@ -0,0 +1,3087 @@ +from collections import ( + OrderedDict, + abc, +) +from datetime import ( + date, + datetime, + timedelta, +) +import functools +import itertools +import re +import warnings + +import numpy as np +import numpy.ma as ma +import numpy.ma.mrecords as mrecords +import pytest +import pytz + +from pandas.compat import np_version_under1p19 +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_integer_dtype +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + IntervalDtype, + PandasDtype, + PeriodDtype, +) + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + DatetimeIndex, + Index, + Interval, + MultiIndex, + Period, + RangeIndex, + Series, + Timedelta, + Timestamp, + cut, + date_range, + isna, +) +import pandas._testing as tm +from pandas.arrays import ( + DatetimeArray, + IntervalArray, + PeriodArray, + SparseArray, +) +from pandas.core.api import Int64Index + +MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"] +MIXED_INT_DTYPES = [ + "uint8", + "uint16", + "uint32", + "uint64", + "int8", + "int16", + "int32", + "int64", +] + + +class TestDataFrameConstructors: + def test_constructor_from_2d_datetimearray(self, using_array_manager): + dti = date_range("2016-01-01", periods=6, tz="US/Pacific") + dta = dti._data.reshape(3, 2) + + df = DataFrame(dta) + expected = DataFrame({0: dta[:, 0], 1: dta[:, 1]}) + tm.assert_frame_equal(df, expected) + if not using_array_manager: + # GH#44724 big performance hit if we de-consolidate + assert len(df._mgr.blocks) == 1 + + def test_constructor_dict_with_tzaware_scalar(self): + # GH#42505 + dt = Timestamp("2019-11-03 01:00:00-0700").tz_convert("America/Los_Angeles") + + df = DataFrame({"dt": dt}, index=[0]) + expected = DataFrame({"dt": [dt]}) + tm.assert_frame_equal(df, expected) + + # Non-homogeneous + df = DataFrame({"dt": dt, "value": [1]}) + expected = DataFrame({"dt": [dt], "value": [1]}) + tm.assert_frame_equal(df, expected) + + def test_construct_ndarray_with_nas_and_int_dtype(self): + # GH#26919 match Series by not casting np.nan to meaningless int + arr = np.array([[1, np.nan], [2, 3]]) + with tm.assert_produces_warning(FutureWarning): + df = DataFrame(arr, dtype="i8") + assert df.values.dtype == arr.dtype + assert isna(df.iloc[0, 1]) + + # check this matches Series behavior + with tm.assert_produces_warning(FutureWarning): + ser = Series(arr[0], dtype="i8", name=0) + expected = df.iloc[0] + tm.assert_series_equal(ser, expected) + + def test_construct_from_list_of_datetimes(self): + df = DataFrame([datetime.now(), datetime.now()]) + assert df[0].dtype == np.dtype("M8[ns]") + + def test_constructor_from_tzaware_datetimeindex(self): + # don't cast a DatetimeIndex WITH a tz, leave as object + # GH#6032 + naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B") + idx = naive.tz_localize("US/Pacific") + + expected = Series(np.array(idx.tolist(), dtype="object"), name="B") + assert expected.dtype == idx.dtype + + # convert index to series + result = Series(idx) + tm.assert_series_equal(result, expected) + + def test_array_of_dt64_nat_with_td64dtype_raises(self, frame_or_series): + # GH#39462 + nat = np.datetime64("NaT", "ns") + arr = np.array([nat], dtype=object) + if frame_or_series is DataFrame: + arr = arr.reshape(1, 1) + + msg = "|".join( + [ + "Could not convert object to NumPy timedelta", + "Invalid type for timedelta scalar: ", + ] + ) + with pytest.raises(ValueError, match=msg): + frame_or_series(arr, dtype="m8[ns]") + + @pytest.mark.parametrize("kind", ["m", "M"]) + def test_datetimelike_values_with_object_dtype(self, kind, frame_or_series): + # with dtype=object, we should cast dt64 values to Timestamps, not pydatetimes + if kind == "M": + dtype = "M8[ns]" + scalar_type = Timestamp + else: + dtype = "m8[ns]" + scalar_type = Timedelta + + arr = np.arange(6, dtype="i8").view(dtype).reshape(3, 2) + if frame_or_series is Series: + arr = arr[:, 0] + + obj = frame_or_series(arr, dtype=object) + assert obj._mgr.arrays[0].dtype == object + assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + + # go through a different path in internals.construction + obj = frame_or_series(frame_or_series(arr), dtype=object) + assert obj._mgr.arrays[0].dtype == object + assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + + obj = frame_or_series(frame_or_series(arr), dtype=PandasDtype(object)) + assert obj._mgr.arrays[0].dtype == object + assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + + if frame_or_series is DataFrame: + # other paths through internals.construction + sers = [Series(x) for x in arr] + obj = frame_or_series(sers, dtype=object) + assert obj._mgr.arrays[0].dtype == object + assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + + def test_series_with_name_not_matching_column(self): + # GH#9232 + x = Series(range(5), name=1) + y = Series(range(5), name=0) + + result = DataFrame(x, columns=[0]) + expected = DataFrame([], columns=[0]) + tm.assert_frame_equal(result, expected) + + result = DataFrame(y, columns=[1]) + expected = DataFrame([], columns=[1]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "constructor", + [ + lambda: DataFrame(), + lambda: DataFrame(None), + lambda: DataFrame({}), + lambda: DataFrame(()), + lambda: DataFrame([]), + lambda: DataFrame(_ for _ in []), + lambda: DataFrame(range(0)), + lambda: DataFrame(data=None), + lambda: DataFrame(data={}), + lambda: DataFrame(data=()), + lambda: DataFrame(data=[]), + lambda: DataFrame(data=(_ for _ in [])), + lambda: DataFrame(data=range(0)), + ], + ) + def test_empty_constructor(self, constructor): + expected = DataFrame() + result = constructor() + assert len(result.index) == 0 + assert len(result.columns) == 0 + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "emptylike,expected_index,expected_columns", + [ + ([[]], RangeIndex(1), RangeIndex(0)), + ([[], []], RangeIndex(2), RangeIndex(0)), + ([(_ for _ in [])], RangeIndex(1), RangeIndex(0)), + ], + ) + def test_emptylike_constructor(self, emptylike, expected_index, expected_columns): + expected = DataFrame(index=expected_index, columns=expected_columns) + result = DataFrame(emptylike) + tm.assert_frame_equal(result, expected) + + def test_constructor_mixed(self, float_string_frame): + index, data = tm.getMixedTypeDict() + + # TODO(wesm), incomplete test? + indexed_frame = DataFrame(data, index=index) # noqa + unindexed_frame = DataFrame(data) # noqa + + assert float_string_frame["foo"].dtype == np.object_ + + def test_constructor_cast_failure(self): + msg = "either all columns will be cast to that dtype, or a TypeError will" + with tm.assert_produces_warning(FutureWarning, match=msg): + foo = DataFrame({"a": ["a", "b", "c"]}, dtype=np.float64) + assert foo["a"].dtype == object + + # GH 3010, constructing with odd arrays + df = DataFrame(np.ones((4, 2))) + + # this is ok + df["foo"] = np.ones((4, 2)).tolist() + + # this is not ok + msg = "Expected a 1D array, got an array with shape \\(4, 2\\)" + with pytest.raises(ValueError, match=msg): + df["test"] = np.ones((4, 2)) + + # this is ok + df["foo2"] = np.ones((4, 2)).tolist() + + def test_constructor_dtype_copy(self): + orig_df = DataFrame({"col1": [1.0], "col2": [2.0], "col3": [3.0]}) + + new_df = DataFrame(orig_df, dtype=float, copy=True) + + new_df["col1"] = 200.0 + assert orig_df["col1"][0] == 1.0 + + def test_constructor_dtype_nocast_view_dataframe(self): + df = DataFrame([[1, 2]]) + should_be_view = DataFrame(df, dtype=df[0].dtype) + should_be_view[0][0] = 99 + assert df.values[0, 0] == 99 + + def test_constructor_dtype_nocast_view_2d_array(self, using_array_manager): + df = DataFrame([[1, 2], [3, 4]], dtype="int64") + if not using_array_manager: + should_be_view = DataFrame(df.values, dtype=df[0].dtype) + should_be_view[0][0] = 97 + assert df.values[0, 0] == 97 + else: + # INFO(ArrayManager) DataFrame(ndarray) doesn't necessarily preserve + # a view on the array to ensure contiguous 1D arrays + df2 = DataFrame(df.values, dtype=df[0].dtype) + assert df2._mgr.arrays[0].flags.c_contiguous + + @td.skip_array_manager_invalid_test + def test_1d_object_array_does_not_copy(self): + # https://github.com/pandas-dev/pandas/issues/39272 + arr = np.array(["a", "b"], dtype="object") + df = DataFrame(arr) + assert np.shares_memory(df.values, arr) + + @td.skip_array_manager_invalid_test + def test_2d_object_array_does_not_copy(self): + # https://github.com/pandas-dev/pandas/issues/39272 + arr = np.array([["a", "b"], ["c", "d"]], dtype="object") + df = DataFrame(arr) + assert np.shares_memory(df.values, arr) + + def test_constructor_dtype_list_data(self): + df = DataFrame([[1, "2"], [None, "a"]], dtype=object) + assert df.loc[1, 0] is None + assert df.loc[0, 1] == "2" + + @pytest.mark.skipif(np_version_under1p19, reason="NumPy change.") + def test_constructor_list_of_2d_raises(self): + # https://github.com/pandas-dev/pandas/issues/32289 + a = DataFrame() + b = np.empty((0, 0)) + with pytest.raises(ValueError, match=r"shape=\(1, 0, 0\)"): + DataFrame([a]) + + with pytest.raises(ValueError, match=r"shape=\(1, 0, 0\)"): + DataFrame([b]) + + a = DataFrame({"A": [1, 2]}) + with pytest.raises(ValueError, match=r"shape=\(2, 2, 1\)"): + DataFrame([a, a]) + + def test_constructor_mixed_dtypes(self): + def _make_mixed_dtypes_df(typ, ad=None): + + if typ == "int": + dtypes = MIXED_INT_DTYPES + arrays = [np.array(np.random.rand(10), dtype=d) for d in dtypes] + elif typ == "float": + dtypes = MIXED_FLOAT_DTYPES + arrays = [ + np.array(np.random.randint(10, size=10), dtype=d) for d in dtypes + ] + + for d, a in zip(dtypes, arrays): + assert a.dtype == d + if ad is None: + ad = {} + ad.update({d: a for d, a in zip(dtypes, arrays)}) + return DataFrame(ad) + + def _check_mixed_dtypes(df, dtypes=None): + if dtypes is None: + dtypes = MIXED_FLOAT_DTYPES + MIXED_INT_DTYPES + for d in dtypes: + if d in df: + assert df.dtypes[d] == d + + # mixed floating and integer coexist in the same frame + df = _make_mixed_dtypes_df("float") + _check_mixed_dtypes(df) + + # add lots of types + df = _make_mixed_dtypes_df("float", {"A": 1, "B": "foo", "C": "bar"}) + _check_mixed_dtypes(df) + + # GH 622 + df = _make_mixed_dtypes_df("int") + _check_mixed_dtypes(df) + + def test_constructor_complex_dtypes(self): + # GH10952 + a = np.random.rand(10).astype(np.complex64) + b = np.random.rand(10).astype(np.complex128) + + df = DataFrame({"a": a, "b": b}) + assert a.dtype == df.a.dtype + assert b.dtype == df.b.dtype + + def test_constructor_dtype_str_na_values(self, string_dtype): + # https://github.com/pandas-dev/pandas/issues/21083 + df = DataFrame({"A": ["x", None]}, dtype=string_dtype) + result = df.isna() + expected = DataFrame({"A": [False, True]}) + tm.assert_frame_equal(result, expected) + assert df.iloc[1, 0] is None + + df = DataFrame({"A": ["x", np.nan]}, dtype=string_dtype) + assert np.isnan(df.iloc[1, 0]) + + def test_constructor_rec(self, float_frame): + rec = float_frame.to_records(index=False) + rec.dtype.names = list(rec.dtype.names)[::-1] + + index = float_frame.index + + df = DataFrame(rec) + tm.assert_index_equal(df.columns, Index(rec.dtype.names)) + + df2 = DataFrame(rec, index=index) + tm.assert_index_equal(df2.columns, Index(rec.dtype.names)) + tm.assert_index_equal(df2.index, index) + + # case with columns != the ones we would infer from the data + rng = np.arange(len(rec))[::-1] + df3 = DataFrame(rec, index=rng, columns=["C", "B"]) + expected = DataFrame(rec, index=rng).reindex(columns=["C", "B"]) + tm.assert_frame_equal(df3, expected) + + def test_constructor_bool(self): + df = DataFrame({0: np.ones(10, dtype=bool), 1: np.zeros(10, dtype=bool)}) + assert df.values.dtype == np.bool_ + + def test_constructor_overflow_int64(self): + # see gh-14881 + values = np.array([2 ** 64 - i for i in range(1, 10)], dtype=np.uint64) + + result = DataFrame({"a": values}) + assert result["a"].dtype == np.uint64 + + # see gh-2355 + data_scores = [ + (6311132704823138710, 273), + (2685045978526272070, 23), + (8921811264899370420, 45), + (17019687244989530680, 270), + (9930107427299601010, 273), + ] + dtype = [("uid", "u8"), ("score", "u8")] + data = np.zeros((len(data_scores),), dtype=dtype) + data[:] = data_scores + df_crawls = DataFrame(data) + assert df_crawls["uid"].dtype == np.uint64 + + @pytest.mark.parametrize( + "values", + [ + np.array([2 ** 64], dtype=object), + np.array([2 ** 65]), + [2 ** 64 + 1], + np.array([-(2 ** 63) - 4], dtype=object), + np.array([-(2 ** 64) - 1]), + [-(2 ** 65) - 2], + ], + ) + def test_constructor_int_overflow(self, values): + # see gh-18584 + value = values[0] + result = DataFrame(values) + + assert result[0].dtype == object + assert result[0][0] == value + + def test_constructor_ordereddict(self): + import random + + nitems = 100 + nums = list(range(nitems)) + random.shuffle(nums) + expected = [f"A{i:d}" for i in nums] + df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems))) + assert expected == list(df.columns) + + def test_constructor_dict(self): + datetime_series = tm.makeTimeSeries(nper=30) + # test expects index shifted by 5 + datetime_series_short = tm.makeTimeSeries(nper=30)[5:] + + frame = DataFrame({"col1": datetime_series, "col2": datetime_series_short}) + + # col2 is padded with NaN + assert len(datetime_series) == 30 + assert len(datetime_series_short) == 25 + + tm.assert_series_equal(frame["col1"], datetime_series.rename("col1")) + + exp = Series( + np.concatenate([[np.nan] * 5, datetime_series_short.values]), + index=datetime_series.index, + name="col2", + ) + tm.assert_series_equal(exp, frame["col2"]) + + frame = DataFrame( + {"col1": datetime_series, "col2": datetime_series_short}, + columns=["col2", "col3", "col4"], + ) + + assert len(frame) == len(datetime_series_short) + assert "col1" not in frame + assert isna(frame["col3"]).all() + + # Corner cases + assert len(DataFrame()) == 0 + + # mix dict and array, wrong size - no spec for which error should raise + # first + msg = "Mixing dicts with non-Series may lead to ambiguous ordering." + with pytest.raises(ValueError, match=msg): + DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]}) + + def test_constructor_dict_length1(self): + # Length-one dict micro-optimization + frame = DataFrame({"A": {"1": 1, "2": 2}}) + tm.assert_index_equal(frame.index, Index(["1", "2"])) + + def test_constructor_dict_with_index(self): + # empty dict plus index + idx = Index([0, 1, 2]) + frame = DataFrame({}, index=idx) + assert frame.index is idx + + def test_constructor_dict_with_index_and_columns(self): + # empty dict with index and columns + idx = Index([0, 1, 2]) + frame = DataFrame({}, index=idx, columns=idx) + assert frame.index is idx + assert frame.columns is idx + assert len(frame._series) == 3 + + def test_constructor_dict_of_empty_lists(self): + # with dict of empty list and Series + frame = DataFrame({"A": [], "B": []}, columns=["A", "B"]) + tm.assert_index_equal(frame.index, RangeIndex(0), exact=True) + + def test_constructor_dict_with_none(self): + # GH 14381 + # Dict with None value + frame_none = DataFrame({"a": None}, index=[0]) + frame_none_list = DataFrame({"a": [None]}, index=[0]) + assert frame_none._get_value(0, "a") is None + assert frame_none_list._get_value(0, "a") is None + tm.assert_frame_equal(frame_none, frame_none_list) + + def test_constructor_dict_errors(self): + # GH10856 + # dict with scalar values should raise error, even if columns passed + msg = "If using all scalar values, you must pass an index" + with pytest.raises(ValueError, match=msg): + DataFrame({"a": 0.7}) + + with pytest.raises(ValueError, match=msg): + DataFrame({"a": 0.7}, columns=["a"]) + + @pytest.mark.parametrize("scalar", [2, np.nan, None, "D"]) + def test_constructor_invalid_items_unused(self, scalar): + # No error if invalid (scalar) value is in fact not used: + result = DataFrame({"a": scalar}, columns=["b"]) + expected = DataFrame(columns=["b"]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("value", [2, np.nan, None, float("nan")]) + def test_constructor_dict_nan_key(self, value): + # GH 18455 + cols = [1, value, 3] + idx = ["a", value] + values = [[0, 3], [1, 4], [2, 5]] + data = {cols[c]: Series(values[c], index=idx) for c in range(3)} + result = DataFrame(data).sort_values(1).sort_values("a", axis=1) + expected = DataFrame( + np.arange(6, dtype="int64").reshape(2, 3), index=idx, columns=cols + ) + tm.assert_frame_equal(result, expected) + + result = DataFrame(data, index=idx).sort_values("a", axis=1) + tm.assert_frame_equal(result, expected) + + result = DataFrame(data, index=idx, columns=cols) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("value", [np.nan, None, float("nan")]) + def test_constructor_dict_nan_tuple_key(self, value): + # GH 18455 + cols = Index([(11, 21), (value, 22), (13, value)]) + idx = Index([("a", value), (value, 2)]) + values = [[0, 3], [1, 4], [2, 5]] + data = {cols[c]: Series(values[c], index=idx) for c in range(3)} + result = DataFrame(data).sort_values((11, 21)).sort_values(("a", value), axis=1) + expected = DataFrame( + np.arange(6, dtype="int64").reshape(2, 3), index=idx, columns=cols + ) + tm.assert_frame_equal(result, expected) + + result = DataFrame(data, index=idx).sort_values(("a", value), axis=1) + tm.assert_frame_equal(result, expected) + + result = DataFrame(data, index=idx, columns=cols) + tm.assert_frame_equal(result, expected) + + def test_constructor_dict_order_insertion(self): + datetime_series = tm.makeTimeSeries(nper=30) + datetime_series_short = tm.makeTimeSeries(nper=25) + + # GH19018 + # initialization ordering: by insertion order if python>= 3.6 + d = {"b": datetime_series_short, "a": datetime_series} + frame = DataFrame(data=d) + expected = DataFrame(data=d, columns=list("ba")) + tm.assert_frame_equal(frame, expected) + + def test_constructor_dict_nan_key_and_columns(self): + # GH 16894 + result = DataFrame({np.nan: [1, 2], 2: [2, 3]}, columns=[np.nan, 2]) + expected = DataFrame([[1, 2], [2, 3]], columns=[np.nan, 2]) + tm.assert_frame_equal(result, expected) + + def test_constructor_multi_index(self): + # GH 4078 + # construction error with mi and all-nan frame + tuples = [(2, 3), (3, 3), (3, 3)] + mi = MultiIndex.from_tuples(tuples) + df = DataFrame(index=mi, columns=mi) + assert isna(df).values.ravel().all() + + tuples = [(3, 3), (2, 3), (3, 3)] + mi = MultiIndex.from_tuples(tuples) + df = DataFrame(index=mi, columns=mi) + assert isna(df).values.ravel().all() + + def test_constructor_2d_index(self): + # GH 25416 + # handling of 2d index in construction + df = DataFrame([[1]], columns=[[1]], index=[1, 2]) + expected = DataFrame( + [1, 1], + index=Int64Index([1, 2], dtype="int64"), + columns=MultiIndex(levels=[[1]], codes=[[0]]), + ) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1]], columns=[[1]], index=[[1, 2]]) + expected = DataFrame( + [1, 1], + index=MultiIndex(levels=[[1, 2]], codes=[[0, 1]]), + columns=MultiIndex(levels=[[1]], codes=[[0]]), + ) + tm.assert_frame_equal(df, expected) + + def test_constructor_error_msgs(self): + msg = "Empty data passed with indices specified." + # passing an empty array with columns specified. + with pytest.raises(ValueError, match=msg): + DataFrame(np.empty(0), columns=list("abc")) + + msg = "Mixing dicts with non-Series may lead to ambiguous ordering." + # mix dict and array, wrong size + with pytest.raises(ValueError, match=msg): + DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]}) + + # wrong size ndarray, GH 3105 + msg = r"Shape of passed values is \(4, 3\), indices imply \(3, 3\)" + with pytest.raises(ValueError, match=msg): + DataFrame( + np.arange(12).reshape((4, 3)), + columns=["foo", "bar", "baz"], + index=date_range("2000-01-01", periods=3), + ) + + arr = np.array([[4, 5, 6]]) + msg = r"Shape of passed values is \(1, 3\), indices imply \(1, 4\)" + with pytest.raises(ValueError, match=msg): + DataFrame(index=[0], columns=range(0, 4), data=arr) + + arr = np.array([4, 5, 6]) + msg = r"Shape of passed values is \(3, 1\), indices imply \(1, 4\)" + with pytest.raises(ValueError, match=msg): + DataFrame(index=[0], columns=range(0, 4), data=arr) + + # higher dim raise exception + with pytest.raises(ValueError, match="Must pass 2-d input"): + DataFrame(np.zeros((3, 3, 3)), columns=["A", "B", "C"], index=[1]) + + # wrong size axis labels + msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)" + with pytest.raises(ValueError, match=msg): + DataFrame(np.random.rand(2, 3), columns=["A", "B", "C"], index=[1]) + + msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)" + with pytest.raises(ValueError, match=msg): + DataFrame(np.random.rand(2, 3), columns=["A", "B"], index=[1, 2]) + + # gh-26429 + msg = "2 columns passed, passed data had 10 columns" + with pytest.raises(ValueError, match=msg): + DataFrame((range(10), range(10, 20)), columns=("ones", "twos")) + + msg = "If using all scalar values, you must pass an index" + with pytest.raises(ValueError, match=msg): + DataFrame({"a": False, "b": True}) + + def test_constructor_subclass_dict(self, dict_subclass): + # Test for passing dict subclass to constructor + data = { + "col1": dict_subclass((x, 10.0 * x) for x in range(10)), + "col2": dict_subclass((x, 20.0 * x) for x in range(10)), + } + df = DataFrame(data) + refdf = DataFrame({col: dict(val.items()) for col, val in data.items()}) + tm.assert_frame_equal(refdf, df) + + data = dict_subclass(data.items()) + df = DataFrame(data) + tm.assert_frame_equal(refdf, df) + + def test_constructor_defaultdict(self, float_frame): + # try with defaultdict + from collections import defaultdict + + data = {} + float_frame["B"][:10] = np.nan + for k, v in float_frame.items(): + dct = defaultdict(dict) + dct.update(v.to_dict()) + data[k] = dct + frame = DataFrame(data) + expected = frame.reindex(index=float_frame.index) + tm.assert_frame_equal(float_frame, expected) + + def test_constructor_dict_block(self): + expected = np.array([[4.0, 3.0, 2.0, 1.0]]) + df = DataFrame( + {"d": [4.0], "c": [3.0], "b": [2.0], "a": [1.0]}, + columns=["d", "c", "b", "a"], + ) + tm.assert_numpy_array_equal(df.values, expected) + + def test_constructor_dict_cast(self): + # cast float tests + test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} + frame = DataFrame(test_data, dtype=float) + assert len(frame) == 3 + assert frame["B"].dtype == np.float64 + assert frame["A"].dtype == np.float64 + + frame = DataFrame(test_data) + assert len(frame) == 3 + assert frame["B"].dtype == np.object_ + assert frame["A"].dtype == np.float64 + + def test_constructor_dict_cast2(self): + # can't cast to float + test_data = { + "A": dict(zip(range(20), tm.makeStringIndex(20))), + "B": dict(zip(range(15), np.random.randn(15))), + } + msg = "either all columns will be cast to that dtype, or a TypeError will" + with tm.assert_produces_warning(FutureWarning, match=msg): + frame = DataFrame(test_data, dtype=float) + + assert len(frame) == 20 + assert frame["A"].dtype == np.object_ + assert frame["B"].dtype == np.float64 + + def test_constructor_dict_dont_upcast(self): + d = {"Col1": {"Row1": "A String", "Row2": np.nan}} + df = DataFrame(d) + assert isinstance(df["Col1"]["Row2"], float) + + def test_constructor_dict_dont_upcast2(self): + dm = DataFrame([[1, 2], ["a", "b"]], index=[1, 2], columns=[1, 2]) + assert isinstance(dm[1][1], int) + + def test_constructor_dict_of_tuples(self): + # GH #1491 + data = {"a": (1, 2, 3), "b": (4, 5, 6)} + + result = DataFrame(data) + expected = DataFrame({k: list(v) for k, v in data.items()}) + tm.assert_frame_equal(result, expected, check_dtype=False) + + def test_constructor_dict_of_ranges(self): + # GH 26356 + data = {"a": range(3), "b": range(3, 6)} + + result = DataFrame(data) + expected = DataFrame({"a": [0, 1, 2], "b": [3, 4, 5]}) + tm.assert_frame_equal(result, expected) + + def test_constructor_dict_of_iterators(self): + # GH 26349 + data = {"a": iter(range(3)), "b": reversed(range(3))} + + result = DataFrame(data) + expected = DataFrame({"a": [0, 1, 2], "b": [2, 1, 0]}) + tm.assert_frame_equal(result, expected) + + def test_constructor_dict_of_generators(self): + # GH 26349 + data = {"a": (i for i in (range(3))), "b": (i for i in reversed(range(3)))} + result = DataFrame(data) + expected = DataFrame({"a": [0, 1, 2], "b": [2, 1, 0]}) + tm.assert_frame_equal(result, expected) + + def test_constructor_dict_multiindex(self): + d = { + ("a", "a"): {("i", "i"): 0, ("i", "j"): 1, ("j", "i"): 2}, + ("b", "a"): {("i", "i"): 6, ("i", "j"): 5, ("j", "i"): 4}, + ("b", "c"): {("i", "i"): 7, ("i", "j"): 8, ("j", "i"): 9}, + } + _d = sorted(d.items()) + df = DataFrame(d) + expected = DataFrame( + [x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d]) + ).T + expected.index = MultiIndex.from_tuples(expected.index) + tm.assert_frame_equal( + df, + expected, + ) + + d["z"] = {"y": 123.0, ("i", "i"): 111, ("i", "j"): 111, ("j", "i"): 111} + _d.insert(0, ("z", d["z"])) + expected = DataFrame( + [x[1] for x in _d], index=Index([x[0] for x in _d], tupleize_cols=False) + ).T + expected.index = Index(expected.index, tupleize_cols=False) + df = DataFrame(d) + df = df.reindex(columns=expected.columns, index=expected.index) + tm.assert_frame_equal(df, expected) + + def test_constructor_dict_datetime64_index(self): + # GH 10160 + dates_as_str = ["1984-02-19", "1988-11-06", "1989-12-03", "1990-03-15"] + + def create_data(constructor): + return {i: {constructor(s): 2 * i} for i, s in enumerate(dates_as_str)} + + data_datetime64 = create_data(np.datetime64) + data_datetime = create_data(lambda x: datetime.strptime(x, "%Y-%m-%d")) + data_Timestamp = create_data(Timestamp) + + expected = DataFrame( + [ + {0: 0, 1: None, 2: None, 3: None}, + {0: None, 1: 2, 2: None, 3: None}, + {0: None, 1: None, 2: 4, 3: None}, + {0: None, 1: None, 2: None, 3: 6}, + ], + index=[Timestamp(dt) for dt in dates_as_str], + ) + + result_datetime64 = DataFrame(data_datetime64) + result_datetime = DataFrame(data_datetime) + result_Timestamp = DataFrame(data_Timestamp) + tm.assert_frame_equal(result_datetime64, expected) + tm.assert_frame_equal(result_datetime, expected) + tm.assert_frame_equal(result_Timestamp, expected) + + def test_constructor_dict_timedelta64_index(self): + # GH 10160 + td_as_int = [1, 2, 3, 4] + + def create_data(constructor): + return {i: {constructor(s): 2 * i} for i, s in enumerate(td_as_int)} + + data_timedelta64 = create_data(lambda x: np.timedelta64(x, "D")) + data_timedelta = create_data(lambda x: timedelta(days=x)) + data_Timedelta = create_data(lambda x: Timedelta(x, "D")) + + expected = DataFrame( + [ + {0: 0, 1: None, 2: None, 3: None}, + {0: None, 1: 2, 2: None, 3: None}, + {0: None, 1: None, 2: 4, 3: None}, + {0: None, 1: None, 2: None, 3: 6}, + ], + index=[Timedelta(td, "D") for td in td_as_int], + ) + + result_timedelta64 = DataFrame(data_timedelta64) + result_timedelta = DataFrame(data_timedelta) + result_Timedelta = DataFrame(data_Timedelta) + tm.assert_frame_equal(result_timedelta64, expected) + tm.assert_frame_equal(result_timedelta, expected) + tm.assert_frame_equal(result_Timedelta, expected) + + def test_constructor_period_dict(self): + # PeriodIndex + a = pd.PeriodIndex(["2012-01", "NaT", "2012-04"], freq="M") + b = pd.PeriodIndex(["2012-02-01", "2012-03-01", "NaT"], freq="D") + df = DataFrame({"a": a, "b": b}) + assert df["a"].dtype == a.dtype + assert df["b"].dtype == b.dtype + + # list of periods + df = DataFrame({"a": a.astype(object).tolist(), "b": b.astype(object).tolist()}) + assert df["a"].dtype == a.dtype + assert df["b"].dtype == b.dtype + + def test_constructor_dict_extension_scalar(self, ea_scalar_and_dtype): + ea_scalar, ea_dtype = ea_scalar_and_dtype + df = DataFrame({"a": ea_scalar}, index=[0]) + assert df["a"].dtype == ea_dtype + + expected = DataFrame(index=[0], columns=["a"], data=ea_scalar) + + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "data,dtype", + [ + (Period("2020-01"), PeriodDtype("M")), + (Interval(left=0, right=5), IntervalDtype("int64", "right")), + ( + Timestamp("2011-01-01", tz="US/Eastern"), + DatetimeTZDtype(tz="US/Eastern"), + ), + ], + ) + def test_constructor_extension_scalar_data(self, data, dtype): + # GH 34832 + df = DataFrame(index=[0, 1], columns=["a", "b"], data=data) + + assert df["a"].dtype == dtype + assert df["b"].dtype == dtype + + arr = pd.array([data] * 2, dtype=dtype) + expected = DataFrame({"a": arr, "b": arr}) + + tm.assert_frame_equal(df, expected) + + def test_nested_dict_frame_constructor(self): + rng = pd.period_range("1/1/2000", periods=5) + df = DataFrame(np.random.randn(10, 5), columns=rng) + + data = {} + for col in df.columns: + for row in df.index: + data.setdefault(col, {})[row] = df._get_value(row, col) + + result = DataFrame(data, columns=rng) + tm.assert_frame_equal(result, df) + + data = {} + for col in df.columns: + for row in df.index: + data.setdefault(row, {})[col] = df._get_value(row, col) + + result = DataFrame(data, index=rng).T + tm.assert_frame_equal(result, df) + + def _check_basic_constructor(self, empty): + # mat: 2d matrix with shape (3, 2) to input. empty - makes sized + # objects + mat = empty((2, 3), dtype=float) + # 2-D input + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + + # 1-D input + frame = DataFrame(empty((3,)), columns=["A"], index=[1, 2, 3]) + assert len(frame.index) == 3 + assert len(frame.columns) == 1 + + warn = None if empty is np.ones else FutureWarning + with tm.assert_produces_warning(warn): + frame = DataFrame( + mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64 + ) + if empty is np.ones: + # passing dtype casts + assert frame.values.dtype == np.int64 + else: + # i.e. ma.masked_all + # Since we have NaNs, refuse to cast to int dtype, which would take NaN + # to meaningless integers. This matches Series behavior. GH#26919 + assert frame.isna().all().all() + assert frame.values.dtype == np.float64 + assert isna(frame.values).all() + + # wrong size axis labels + msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)" + with pytest.raises(ValueError, match=msg): + DataFrame(mat, columns=["A", "B", "C"], index=[1]) + msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)" + with pytest.raises(ValueError, match=msg): + DataFrame(mat, columns=["A", "B"], index=[1, 2]) + + # higher dim raise exception + with pytest.raises(ValueError, match="Must pass 2-d input"): + DataFrame(empty((3, 3, 3)), columns=["A", "B", "C"], index=[1]) + + # automatic labeling + frame = DataFrame(mat) + tm.assert_index_equal(frame.index, Index(range(2)), exact=True) + tm.assert_index_equal(frame.columns, Index(range(3)), exact=True) + + frame = DataFrame(mat, index=[1, 2]) + tm.assert_index_equal(frame.columns, Index(range(3)), exact=True) + + frame = DataFrame(mat, columns=["A", "B", "C"]) + tm.assert_index_equal(frame.index, Index(range(2)), exact=True) + + # 0-length axis + frame = DataFrame(empty((0, 3))) + assert len(frame.index) == 0 + + frame = DataFrame(empty((3, 0))) + assert len(frame.columns) == 0 + + def test_constructor_ndarray(self): + self._check_basic_constructor(np.ones) + + frame = DataFrame(["foo", "bar"], index=[0, 1], columns=["A"]) + assert len(frame) == 2 + + def test_constructor_maskedarray(self): + self._check_basic_constructor(ma.masked_all) + + # Check non-masked values + mat = ma.masked_all((2, 3), dtype=float) + mat[0, 0] = 1.0 + mat[1, 2] = 2.0 + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + assert 1.0 == frame["A"][1] + assert 2.0 == frame["C"][2] + + # what is this even checking?? + mat = ma.masked_all((2, 3), dtype=float) + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + assert np.all(~np.asarray(frame == frame)) + + def test_constructor_maskedarray_nonfloat(self): + # masked int promoted to float + mat = ma.masked_all((2, 3), dtype=int) + # 2-D input + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + assert np.all(~np.asarray(frame == frame)) + + # cast type + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.float64) + assert frame.values.dtype == np.float64 + + # Check non-masked values + mat2 = ma.copy(mat) + mat2[0, 0] = 1 + mat2[1, 2] = 2 + frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2]) + assert 1 == frame["A"][1] + assert 2 == frame["C"][2] + + # masked np.datetime64 stays (use NaT as null) + mat = ma.masked_all((2, 3), dtype="M8[ns]") + # 2-D input + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + assert isna(frame).values.all() + + # cast type + msg = r"datetime64\[ns\] values and dtype=int64" + with tm.assert_produces_warning(FutureWarning, match=msg): + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + category=DeprecationWarning, + message="elementwise comparison failed", + ) + frame = DataFrame( + mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64 + ) + assert frame.values.dtype == np.int64 + + # Check non-masked values + mat2 = ma.copy(mat) + mat2[0, 0] = 1 + mat2[1, 2] = 2 + frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2]) + assert 1 == frame["A"].view("i8")[1] + assert 2 == frame["C"].view("i8")[2] + + # masked bool promoted to object + mat = ma.masked_all((2, 3), dtype=bool) + # 2-D input + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + assert np.all(~np.asarray(frame == frame)) + + # cast type + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=object) + assert frame.values.dtype == object + + # Check non-masked values + mat2 = ma.copy(mat) + mat2[0, 0] = True + mat2[1, 2] = False + frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2]) + assert frame["A"][1] is True + assert frame["C"][2] is False + + def test_constructor_maskedarray_hardened(self): + # Check numpy masked arrays with hard masks -- from GH24574 + mat_hard = ma.masked_all((2, 2), dtype=float).harden_mask() + result = DataFrame(mat_hard, columns=["A", "B"], index=[1, 2]) + expected = DataFrame( + {"A": [np.nan, np.nan], "B": [np.nan, np.nan]}, + columns=["A", "B"], + index=[1, 2], + dtype=float, + ) + tm.assert_frame_equal(result, expected) + # Check case where mask is hard but no data are masked + mat_hard = ma.ones((2, 2), dtype=float).harden_mask() + result = DataFrame(mat_hard, columns=["A", "B"], index=[1, 2]) + expected = DataFrame( + {"A": [1.0, 1.0], "B": [1.0, 1.0]}, + columns=["A", "B"], + index=[1, 2], + dtype=float, + ) + tm.assert_frame_equal(result, expected) + + def test_constructor_maskedrecarray_dtype(self): + # Ensure constructor honors dtype + data = np.ma.array( + np.ma.zeros(5, dtype=[("date", "0 + df = DataFrame( + { + "a": 1.0, + "b": 2, + "c": "foo", + floatname: np.array([1.0] * 10, dtype=floatname), + intname: np.array([1] * 10, dtype=intname), + }, + index=np.arange(10), + ) + result = df.dtypes + expected = Series( + [np.dtype("float64")] + + [np.dtype("int64")] + + [np.dtype("object")] + + [np.dtype("float64")] + + [np.dtype(intname)], + index=["a", "b", "c", floatname, intname], + ) + tm.assert_series_equal(result, expected) + + def test_constructor_with_datetimes1(self): + + # GH 2809 + ind = date_range(start="2000-01-01", freq="D", periods=10) + datetimes = [ts.to_pydatetime() for ts in ind] + datetime_s = Series(datetimes) + assert datetime_s.dtype == "M8[ns]" + + def test_constructor_with_datetimes2(self): + # GH 2810 + ind = date_range(start="2000-01-01", freq="D", periods=10) + datetimes = [ts.to_pydatetime() for ts in ind] + dates = [ts.date() for ts in ind] + df = DataFrame(datetimes, columns=["datetimes"]) + df["dates"] = dates + result = df.dtypes + expected = Series( + [np.dtype("datetime64[ns]"), np.dtype("object")], + index=["datetimes", "dates"], + ) + tm.assert_series_equal(result, expected) + + def test_constructor_with_datetimes3(self): + # GH 7594 + # don't coerce tz-aware + tz = pytz.timezone("US/Eastern") + dt = tz.localize(datetime(2012, 1, 1)) + + df = DataFrame({"End Date": dt}, index=[0]) + assert df.iat[0, 0] == dt + tm.assert_series_equal( + df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"}) + ) + + df = DataFrame([{"End Date": dt}]) + assert df.iat[0, 0] == dt + tm.assert_series_equal( + df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"}) + ) + + def test_constructor_with_datetimes4(self): + # tz-aware (UTC and other tz's) + # GH 8411 + dr = date_range("20130101", periods=3) + df = DataFrame({"value": dr}) + assert df.iat[0, 0].tz is None + dr = date_range("20130101", periods=3, tz="UTC") + df = DataFrame({"value": dr}) + assert str(df.iat[0, 0].tz) == "UTC" + dr = date_range("20130101", periods=3, tz="US/Eastern") + df = DataFrame({"value": dr}) + assert str(df.iat[0, 0].tz) == "US/Eastern" + + def test_constructor_with_datetimes5(self): + # GH 7822 + # preserver an index with a tz on dict construction + i = date_range("1/1/2011", periods=5, freq="10s", tz="US/Eastern") + + expected = DataFrame({"a": i.to_series().reset_index(drop=True)}) + df = DataFrame() + df["a"] = i + tm.assert_frame_equal(df, expected) + + df = DataFrame({"a": i}) + tm.assert_frame_equal(df, expected) + + def test_constructor_with_datetimes6(self): + # multiples + i = date_range("1/1/2011", periods=5, freq="10s", tz="US/Eastern") + i_no_tz = date_range("1/1/2011", periods=5, freq="10s") + df = DataFrame({"a": i, "b": i_no_tz}) + expected = DataFrame({"a": i.to_series().reset_index(drop=True), "b": i_no_tz}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "arr", + [ + np.array([None, None, None, None, datetime.now(), None]), + np.array([None, None, datetime.now(), None]), + [[np.datetime64("NaT")], [None]], + [[np.datetime64("NaT")], [pd.NaT]], + [[None], [np.datetime64("NaT")]], + [[None], [pd.NaT]], + [[pd.NaT], [np.datetime64("NaT")]], + [[pd.NaT], [None]], + ], + ) + def test_constructor_datetimes_with_nulls(self, arr): + # gh-15869, GH#11220 + result = DataFrame(arr).dtypes + expected = Series([np.dtype("datetime64[ns]")]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("order", ["K", "A", "C", "F"]) + @pytest.mark.parametrize( + "dtype", + [ + "datetime64[M]", + "datetime64[D]", + "datetime64[h]", + "datetime64[m]", + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[ns]", + ], + ) + def test_constructor_datetimes_non_ns(self, order, dtype): + na = np.array( + [ + ["2015-01-01", "2015-01-02", "2015-01-03"], + ["2017-01-01", "2017-01-02", "2017-02-03"], + ], + dtype=dtype, + order=order, + ) + df = DataFrame(na) + expected = DataFrame( + [ + ["2015-01-01", "2015-01-02", "2015-01-03"], + ["2017-01-01", "2017-01-02", "2017-02-03"], + ] + ) + expected = expected.astype(dtype=dtype) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("order", ["K", "A", "C", "F"]) + @pytest.mark.parametrize( + "dtype", + [ + "timedelta64[D]", + "timedelta64[h]", + "timedelta64[m]", + "timedelta64[s]", + "timedelta64[ms]", + "timedelta64[us]", + "timedelta64[ns]", + ], + ) + def test_constructor_timedelta_non_ns(self, order, dtype): + na = np.array( + [ + [np.timedelta64(1, "D"), np.timedelta64(2, "D")], + [np.timedelta64(4, "D"), np.timedelta64(5, "D")], + ], + dtype=dtype, + order=order, + ) + df = DataFrame(na).astype("timedelta64[ns]") + expected = DataFrame( + [ + [Timedelta(1, "D"), Timedelta(2, "D")], + [Timedelta(4, "D"), Timedelta(5, "D")], + ], + ) + tm.assert_frame_equal(df, expected) + + def test_constructor_for_list_with_dtypes(self): + # test list of lists/ndarrays + df = DataFrame([np.arange(5) for x in range(5)]) + result = df.dtypes + expected = Series([np.dtype("int")] * 5) + tm.assert_series_equal(result, expected) + + df = DataFrame([np.array(np.arange(5), dtype="int32") for x in range(5)]) + result = df.dtypes + expected = Series([np.dtype("int32")] * 5) + tm.assert_series_equal(result, expected) + + # overflow issue? (we always expected int64 upcasting here) + df = DataFrame({"a": [2 ** 31, 2 ** 31 + 1]}) + assert df.dtypes.iloc[0] == np.dtype("int64") + + # GH #2751 (construction with no index specified), make sure we cast to + # platform values + df = DataFrame([1, 2]) + assert df.dtypes.iloc[0] == np.dtype("int64") + + df = DataFrame([1.0, 2.0]) + assert df.dtypes.iloc[0] == np.dtype("float64") + + df = DataFrame({"a": [1, 2]}) + assert df.dtypes.iloc[0] == np.dtype("int64") + + df = DataFrame({"a": [1.0, 2.0]}) + assert df.dtypes.iloc[0] == np.dtype("float64") + + df = DataFrame({"a": 1}, index=range(3)) + assert df.dtypes.iloc[0] == np.dtype("int64") + + df = DataFrame({"a": 1.0}, index=range(3)) + assert df.dtypes.iloc[0] == np.dtype("float64") + + # with object list + df = DataFrame( + { + "a": [1, 2, 4, 7], + "b": [1.2, 2.3, 5.1, 6.3], + "c": list("abcd"), + "d": [datetime(2000, 1, 1) for i in range(4)], + "e": [1.0, 2, 4.0, 7], + } + ) + result = df.dtypes + expected = Series( + [ + np.dtype("int64"), + np.dtype("float64"), + np.dtype("object"), + np.dtype("datetime64[ns]"), + np.dtype("float64"), + ], + index=list("abcde"), + ) + tm.assert_series_equal(result, expected) + + def test_constructor_frame_copy(self, float_frame): + cop = DataFrame(float_frame, copy=True) + cop["A"] = 5 + assert (cop["A"] == 5).all() + assert not (float_frame["A"] == 5).all() + + def test_constructor_ndarray_copy(self, float_frame, using_array_manager): + if not using_array_manager: + df = DataFrame(float_frame.values) + + float_frame.values[5] = 5 + assert (df.values[5] == 5).all() + + df = DataFrame(float_frame.values, copy=True) + float_frame.values[6] = 6 + assert not (df.values[6] == 6).all() + else: + arr = float_frame.values.copy() + # default: copy to ensure contiguous arrays + df = DataFrame(arr) + assert df._mgr.arrays[0].flags.c_contiguous + arr[0, 0] = 100 + assert df.iloc[0, 0] != 100 + + # manually specify copy=False + df = DataFrame(arr, copy=False) + assert not df._mgr.arrays[0].flags.c_contiguous + arr[0, 0] = 1000 + assert df.iloc[0, 0] == 1000 + + # TODO(ArrayManager) keep view on Series? + @td.skip_array_manager_not_yet_implemented + def test_constructor_series_copy(self, float_frame): + series = float_frame._series + + df = DataFrame({"A": series["A"]}, copy=True) + df["A"][:] = 5 + + assert not (series["A"] == 5).all() + + @pytest.mark.parametrize( + "df", + [ + DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan]), + DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1.1, 2.2, np.nan]), + DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan]), + DataFrame( + [[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan] + ), + DataFrame([[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1, 2, 2]), + ], + ) + def test_constructor_with_nas(self, df): + # GH 5016 + # na's in indices + # GH 21428 (non-unique columns) + + for i in range(len(df.columns)): + df.iloc[:, i] + + indexer = np.arange(len(df.columns))[isna(df.columns)] + + # No NaN found -> error + if len(indexer) == 0: + with pytest.raises(KeyError, match="^nan$"): + df.loc[:, np.nan] + # single nan should result in Series + elif len(indexer) == 1: + tm.assert_series_equal(df.iloc[:, indexer[0]], df.loc[:, np.nan]) + # multiple nans should result in DataFrame + else: + tm.assert_frame_equal(df.iloc[:, indexer], df.loc[:, np.nan]) + + def test_constructor_lists_to_object_dtype(self): + # from #1074 + d = DataFrame({"a": [np.nan, False]}) + assert d["a"].dtype == np.object_ + assert not d["a"][1] + + def test_constructor_ndarray_categorical_dtype(self): + cat = Categorical(["A", "B", "C"]) + arr = np.array(cat).reshape(-1, 1) + arr = np.broadcast_to(arr, (3, 4)) + + result = DataFrame(arr, dtype=cat.dtype) + + expected = DataFrame({0: cat, 1: cat, 2: cat, 3: cat}) + tm.assert_frame_equal(result, expected) + + def test_constructor_categorical(self): + + # GH8626 + + # dict creation + df = DataFrame({"A": list("abc")}, dtype="category") + expected = Series(list("abc"), dtype="category", name="A") + tm.assert_series_equal(df["A"], expected) + + # to_frame + s = Series(list("abc"), dtype="category") + result = s.to_frame() + expected = Series(list("abc"), dtype="category", name=0) + tm.assert_series_equal(result[0], expected) + result = s.to_frame(name="foo") + expected = Series(list("abc"), dtype="category", name="foo") + tm.assert_series_equal(result["foo"], expected) + + # list-like creation + df = DataFrame(list("abc"), dtype="category") + expected = Series(list("abc"), dtype="category", name=0) + tm.assert_series_equal(df[0], expected) + + def test_construct_from_1item_list_of_categorical(self): + # ndim != 1 + msg = "will be changed to match the behavior" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame([Categorical(list("abc"))]) + expected = DataFrame({0: Series(list("abc"), dtype="category")}) + tm.assert_frame_equal(df, expected) + + def test_construct_from_list_of_categoricals(self): + msg = "will be changed to match the behavior" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))]) + expected = DataFrame( + { + 0: Series(list("abc"), dtype="category"), + 1: Series(list("abd"), dtype="category"), + }, + columns=[0, 1], + ) + tm.assert_frame_equal(df, expected) + + def test_from_nested_listlike_mixed_types(self): + # mixed + msg = "will be changed to match the behavior" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame([Categorical(list("abc")), list("def")]) + expected = DataFrame( + {0: Series(list("abc"), dtype="category"), 1: list("def")}, columns=[0, 1] + ) + tm.assert_frame_equal(df, expected) + + def test_construct_from_listlikes_mismatched_lengths(self): + # invalid (shape) + msg = "|".join( + [ + r"Length of values \(6\) does not match length of index \(3\)", + ] + ) + msg2 = "will be changed to match the behavior" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match=msg2): + DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))]) + + def test_constructor_categorical_series(self): + + items = [1, 2, 3, 1] + exp = Series(items).astype("category") + res = Series(items, dtype="category") + tm.assert_series_equal(res, exp) + + items = ["a", "b", "c", "a"] + exp = Series(items).astype("category") + res = Series(items, dtype="category") + tm.assert_series_equal(res, exp) + + # insert into frame with different index + # GH 8076 + index = date_range("20000101", periods=3) + expected = Series( + Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"]) + ) + expected.index = index + + expected = DataFrame({"x": expected}) + df = DataFrame({"x": Series(["a", "b", "c"], dtype="category")}, index=index) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "dtype", + tm.ALL_INT_NUMPY_DTYPES + + tm.ALL_INT_EA_DTYPES + + tm.FLOAT_NUMPY_DTYPES + + tm.COMPLEX_DTYPES + + tm.DATETIME64_DTYPES + + tm.TIMEDELTA64_DTYPES + + tm.BOOL_DTYPES, + ) + def test_check_dtype_empty_numeric_column(self, dtype): + # GH24386: Ensure dtypes are set correctly for an empty DataFrame. + # Empty DataFrame is generated via dictionary data with non-overlapping columns. + data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype) + + assert data.b.dtype == dtype + + @pytest.mark.parametrize( + "dtype", tm.STRING_DTYPES + tm.BYTES_DTYPES + tm.OBJECT_DTYPES + ) + def test_check_dtype_empty_string_column(self, request, dtype, using_array_manager): + # GH24386: Ensure dtypes are set correctly for an empty DataFrame. + # Empty DataFrame is generated via dictionary data with non-overlapping columns. + data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype) + + if using_array_manager and dtype in tm.BYTES_DTYPES: + # TODO(ArrayManager) astype to bytes dtypes does not yet give object dtype + td.mark_array_manager_not_yet_implemented(request) + + assert data.b.dtype.name == "object" + + def test_to_frame_with_falsey_names(self): + # GH 16114 + result = Series(name=0, dtype=object).to_frame().dtypes + expected = Series({0: object}) + tm.assert_series_equal(result, expected) + + result = DataFrame(Series(name=0, dtype=object)).dtypes + tm.assert_series_equal(result, expected) + + @pytest.mark.arm_slow + @pytest.mark.parametrize("dtype", [None, "uint8", "category"]) + def test_constructor_range_dtype(self, dtype): + expected = DataFrame({"A": [0, 1, 2, 3, 4]}, dtype=dtype or "int64") + + # GH 26342 + result = DataFrame(range(5), columns=["A"], dtype=dtype) + tm.assert_frame_equal(result, expected) + + # GH 16804 + result = DataFrame({"A": range(5)}, dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_frame_from_list_subclass(self): + # GH21226 + class List(list): + pass + + expected = DataFrame([[1, 2, 3], [4, 5, 6]]) + result = DataFrame(List([List([1, 2, 3]), List([4, 5, 6])])) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "extension_arr", + [ + Categorical(list("aabbc")), + SparseArray([1, np.nan, np.nan, np.nan]), + IntervalArray([Interval(0, 1), Interval(1, 5)]), + PeriodArray(pd.period_range(start="1/1/2017", end="1/1/2018", freq="M")), + ], + ) + def test_constructor_with_extension_array(self, extension_arr): + # GH11363 + expected = DataFrame(Series(extension_arr)) + result = DataFrame(extension_arr) + tm.assert_frame_equal(result, expected) + + def test_datetime_date_tuple_columns_from_dict(self): + # GH 10863 + v = date.today() + tup = v, v + result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup]) + expected = DataFrame([0, 1, 2], columns=Index(Series([tup]))) + tm.assert_frame_equal(result, expected) + + def test_construct_with_two_categoricalindex_series(self): + # GH 14600 + s1 = Series([39, 6, 4], index=CategoricalIndex(["female", "male", "unknown"])) + s2 = Series( + [2, 152, 2, 242, 150], + index=CategoricalIndex(["f", "female", "m", "male", "unknown"]), + ) + result = DataFrame([s1, s2]) + expected = DataFrame( + np.array([[39, 6, 4, np.nan, np.nan], [152.0, 242.0, 150.0, 2.0, 2.0]]), + columns=["female", "male", "unknown", "f", "m"], + ) + tm.assert_frame_equal(result, expected) + + def test_constructor_series_nonexact_categoricalindex(self): + # GH 42424 + ser = Series(range(0, 100)) + ser1 = cut(ser, 10).value_counts().head(5) + ser2 = cut(ser, 10).value_counts().tail(5) + result = DataFrame({"1": ser1, "2": ser2}) + index = CategoricalIndex( + [ + Interval(-0.099, 9.9, closed="right"), + Interval(9.9, 19.8, closed="right"), + Interval(19.8, 29.7, closed="right"), + Interval(29.7, 39.6, closed="right"), + Interval(39.6, 49.5, closed="right"), + Interval(49.5, 59.4, closed="right"), + Interval(59.4, 69.3, closed="right"), + Interval(69.3, 79.2, closed="right"), + Interval(79.2, 89.1, closed="right"), + Interval(89.1, 99, closed="right"), + ], + ordered=True, + ) + expected = DataFrame( + {"1": [10] * 5 + [np.nan] * 5, "2": [np.nan] * 5 + [10] * 5}, index=index + ) + tm.assert_frame_equal(expected, result) + + def test_from_M8_structured(self): + dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))] + arr = np.array(dates, dtype=[("Date", "M8[us]"), ("Forecasting", "M8[us]")]) + df = DataFrame(arr) + + assert df["Date"][0] == dates[0][0] + assert df["Forecasting"][0] == dates[0][1] + + s = Series(arr["Date"]) + assert isinstance(s[0], Timestamp) + assert s[0] == dates[0][0] + + def test_from_datetime_subclass(self): + # GH21142 Verify whether Datetime subclasses are also of dtype datetime + class DatetimeSubclass(datetime): + pass + + data = DataFrame({"datetime": [DatetimeSubclass(2020, 1, 1, 1, 1)]}) + assert data.datetime.dtype == "datetime64[ns]" + + def test_with_mismatched_index_length_raises(self): + # GH#33437 + dti = date_range("2016-01-01", periods=3, tz="US/Pacific") + msg = "Shape of passed values|Passed arrays should have the same length" + with pytest.raises(ValueError, match=msg): + DataFrame(dti, index=range(4)) + + def test_frame_ctor_datetime64_column(self): + rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") + dates = np.asarray(rng) + + df = DataFrame({"A": np.random.randn(len(rng)), "B": dates}) + assert np.issubdtype(df["B"].dtype, np.dtype("M8[ns]")) + + def test_dataframe_constructor_infer_multiindex(self): + index_lists = [["a", "a", "b", "b"], ["x", "y", "x", "y"]] + + multi = DataFrame( + np.random.randn(4, 4), + index=[np.array(x) for x in index_lists], + ) + assert isinstance(multi.index, MultiIndex) + assert not isinstance(multi.columns, MultiIndex) + + multi = DataFrame(np.random.randn(4, 4), columns=index_lists) + assert isinstance(multi.columns, MultiIndex) + + @pytest.mark.parametrize( + "input_vals", + [ + ([1, 2]), + (["1", "2"]), + (list(date_range("1/1/2011", periods=2, freq="H"))), + (list(date_range("1/1/2011", periods=2, freq="H", tz="US/Eastern"))), + ([Interval(left=0, right=5)]), + ], + ) + def test_constructor_list_str(self, input_vals, string_dtype): + # GH#16605 + # Ensure that data elements are converted to strings when + # dtype is str, 'str', or 'U' + + result = DataFrame({"A": input_vals}, dtype=string_dtype) + expected = DataFrame({"A": input_vals}).astype({"A": string_dtype}) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_str_na(self, string_dtype): + + result = DataFrame({"A": [1.0, 2.0, None]}, dtype=string_dtype) + expected = DataFrame({"A": ["1.0", "2.0", None]}, dtype=object) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("copy", [False, True]) + def test_dict_nocopy( + self, request, copy, any_numeric_ea_dtype, any_numpy_dtype, using_array_manager + ): + if using_array_manager and not ( + (any_numpy_dtype in (tm.STRING_DTYPES + tm.BYTES_DTYPES)) + or ( + any_numpy_dtype + in (tm.DATETIME64_DTYPES + tm.TIMEDELTA64_DTYPES + tm.BOOL_DTYPES) + and copy + ) + ): + # TODO(ArrayManager) properly honor copy keyword for dict input + td.mark_array_manager_not_yet_implemented(request) + + a = np.array([1, 2], dtype=any_numpy_dtype) + b = np.array([3, 4], dtype=any_numpy_dtype) + if b.dtype.kind in ["S", "U"]: + # These get cast, making the checks below more cumbersome + return + + c = pd.array([1, 2], dtype=any_numeric_ea_dtype) + df = DataFrame({"a": a, "b": b, "c": c}, copy=copy) + + def get_base(obj): + if isinstance(obj, np.ndarray): + return obj.base + elif isinstance(obj.dtype, np.dtype): + # i.e. DatetimeArray, TimedeltaArray + return obj._ndarray.base + else: + raise TypeError + + def check_views(): + # written to work for either BlockManager or ArrayManager + assert sum(x is c for x in df._mgr.arrays) == 1 + assert ( + sum( + get_base(x) is a + for x in df._mgr.arrays + if isinstance(x.dtype, np.dtype) + ) + == 1 + ) + assert ( + sum( + get_base(x) is b + for x in df._mgr.arrays + if isinstance(x.dtype, np.dtype) + ) + == 1 + ) + + if not copy: + # constructor preserves views + check_views() + + df.iloc[0, 0] = 0 + df.iloc[0, 1] = 0 + if not copy: + # Check that the underlying data behind df["c"] is still `c` + # after setting with iloc. Since we don't know which entry in + # df._mgr.arrays corresponds to df["c"], we just check that exactly + # one of these arrays is `c`. GH#38939 + assert sum(x is c for x in df._mgr.arrays) == 1 + # TODO: we can call check_views if we stop consolidating + # in setitem_with_indexer + + # FIXME(GH#35417): until GH#35417, iloc.setitem into EA values does not preserve + # view, so we have to check in the other direction + # df.iloc[0, 2] = 0 + # if not copy: + # check_views() + c[0] = 0 + + if copy: + if a.dtype.kind == "M": + assert a[0] == a.dtype.type(1, "ns") + assert b[0] == b.dtype.type(3, "ns") + else: + assert a[0] == a.dtype.type(1) + assert b[0] == b.dtype.type(3) + # FIXME(GH#35417): enable after GH#35417 + # assert c[0] == 1 + assert df.iloc[0, 2] == 1 + else: + # TODO: we can call check_views if we stop consolidating + # in setitem_with_indexer + # FIXME(GH#35417): enable after GH#35417 + # assert b[0] == 0 + assert df.iloc[0, 2] == 0 + + def test_from_series_with_name_with_columns(self): + # GH 7893 + result = DataFrame(Series(1, name="foo"), columns=["bar"]) + expected = DataFrame(columns=["bar"]) + tm.assert_frame_equal(result, expected) + + def test_nested_list_columns(self): + # GH 14467 + result = DataFrame( + [[1, 2, 3], [4, 5, 6]], columns=[["A", "A", "A"], ["a", "b", "c"]] + ) + expected = DataFrame( + [[1, 2, 3], [4, 5, 6]], + columns=MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")]), + ) + tm.assert_frame_equal(result, expected) + + def test_from_2d_object_array_of_periods_or_intervals(self): + # Period analogue to GH#26825 + pi = pd.period_range("2016-04-05", periods=3) + data = pi._data.astype(object).reshape(1, -1) + df = DataFrame(data) + assert df.shape == (1, 3) + assert (df.dtypes == pi.dtype).all() + assert (df == pi).all().all() + + ii = pd.IntervalIndex.from_breaks([3, 4, 5, 6]) + data2 = ii._data.astype(object).reshape(1, -1) + df2 = DataFrame(data2) + assert df2.shape == (1, 3) + assert (df2.dtypes == ii.dtype).all() + assert (df2 == ii).all().all() + + # mixed + data3 = np.r_[data, data2, data, data2].T + df3 = DataFrame(data3) + expected = DataFrame({0: pi, 1: ii, 2: pi, 3: ii}) + tm.assert_frame_equal(df3, expected) + + @pytest.mark.parametrize( + "col_a, col_b", + [ + ([[1], [2]], np.array([[1], [2]])), + (np.array([[1], [2]]), [[1], [2]]), + (np.array([[1], [2]]), np.array([[1], [2]])), + ], + ) + def test_error_from_2darray(self, col_a, col_b): + msg = "Per-column arrays must each be 1-dimensional" + with pytest.raises(ValueError, match=msg): + DataFrame({"a": col_a, "b": col_b}) + + def test_from_dict_with_missing_copy_false(self): + # GH#45369 filled columns should not be views of one another + df = DataFrame(index=[1, 2, 3], columns=["a", "b", "c"], copy=False) + assert not np.shares_memory(df["a"]._values, df["b"]._values) + + df.iloc[0, 0] = 0 + expected = DataFrame( + { + "a": [0, np.nan, np.nan], + "b": [np.nan, np.nan, np.nan], + "c": [np.nan, np.nan, np.nan], + }, + index=[1, 2, 3], + dtype=object, + ) + tm.assert_frame_equal(df, expected) + + +class TestDataFrameConstructorIndexInference: + def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): + rng1 = pd.period_range("1/1/1999", "1/1/2012", freq="M") + s1 = Series(np.random.randn(len(rng1)), rng1) + + rng2 = pd.period_range("1/1/1980", "12/1/2001", freq="M") + s2 = Series(np.random.randn(len(rng2)), rng2) + df = DataFrame({"s1": s1, "s2": s2}) + + exp = pd.period_range("1/1/1980", "1/1/2012", freq="M") + tm.assert_index_equal(df.index, exp) + + def test_frame_from_dict_with_mixed_tzaware_indexes(self): + # GH#44091 + dti = date_range("2016-01-01", periods=3) + + ser1 = Series(range(3), index=dti) + ser2 = Series(range(3), index=dti.tz_localize("UTC")) + ser3 = Series(range(3), index=dti.tz_localize("US/Central")) + ser4 = Series(range(3)) + + # no tz-naive, but we do have mixed tzs and a non-DTI + df1 = DataFrame({"A": ser2, "B": ser3, "C": ser4}) + exp_index = Index( + list(ser2.index) + list(ser3.index) + list(ser4.index), dtype=object + ) + tm.assert_index_equal(df1.index, exp_index) + + df2 = DataFrame({"A": ser2, "C": ser4, "B": ser3}) + exp_index3 = Index( + list(ser2.index) + list(ser4.index) + list(ser3.index), dtype=object + ) + tm.assert_index_equal(df2.index, exp_index3) + + df3 = DataFrame({"B": ser3, "A": ser2, "C": ser4}) + exp_index3 = Index( + list(ser3.index) + list(ser2.index) + list(ser4.index), dtype=object + ) + tm.assert_index_equal(df3.index, exp_index3) + + df4 = DataFrame({"C": ser4, "B": ser3, "A": ser2}) + exp_index4 = Index( + list(ser4.index) + list(ser3.index) + list(ser2.index), dtype=object + ) + tm.assert_index_equal(df4.index, exp_index4) + + # TODO: not clear if these raising is desired (no extant tests), + # but this is de facto behavior 2021-12-22 + msg = "Cannot join tz-naive with tz-aware DatetimeIndex" + with pytest.raises(TypeError, match=msg): + DataFrame({"A": ser2, "B": ser3, "C": ser4, "D": ser1}) + with pytest.raises(TypeError, match=msg): + DataFrame({"A": ser2, "B": ser3, "D": ser1}) + with pytest.raises(TypeError, match=msg): + DataFrame({"D": ser1, "A": ser2, "B": ser3}) + + +class TestDataFrameConstructorWithDtypeCoercion: + def test_floating_values_integer_dtype(self): + # GH#40110 make DataFrame behavior with arraylike floating data and + # inty dtype match Series behavior + + arr = np.random.randn(10, 5) + + msg = "if they cannot be cast losslessly" + with tm.assert_produces_warning(FutureWarning, match=msg): + DataFrame(arr, dtype="i8") + + with tm.assert_produces_warning(None): + # if they can be cast losslessly, no warning + DataFrame(arr.round(), dtype="i8") + + # with NaNs, we go through a different path with a different warning + arr[0, 0] = np.nan + msg = "passing float-dtype values containing NaN" + with tm.assert_produces_warning(FutureWarning, match=msg): + DataFrame(arr, dtype="i8") + with tm.assert_produces_warning(FutureWarning, match=msg): + Series(arr[0], dtype="i8") + # The future (raising) behavior matches what we would get via astype: + msg = r"Cannot convert non-finite values \(NA or inf\) to integer" + with pytest.raises(ValueError, match=msg): + DataFrame(arr).astype("i8") + with pytest.raises(ValueError, match=msg): + Series(arr[0]).astype("i8") + + +class TestDataFrameConstructorWithDatetimeTZ: + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_construction_preserves_tzaware_dtypes(self, tz): + # after GH#7822 + # these retain the timezones on dict construction + dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") + dr_tz = dr.tz_localize(tz) + df = DataFrame({"A": "foo", "B": dr_tz}, index=dr) + tz_expected = DatetimeTZDtype("ns", dr_tz.tzinfo) + assert df["B"].dtype == tz_expected + + # GH#2810 (with timezones) + datetimes_naive = [ts.to_pydatetime() for ts in dr] + datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz] + df = DataFrame({"dr": dr}) + df["dr_tz"] = dr_tz + df["datetimes_naive"] = datetimes_naive + df["datetimes_with_tz"] = datetimes_with_tz + result = df.dtypes + expected = Series( + [ + np.dtype("datetime64[ns]"), + DatetimeTZDtype(tz=tz), + np.dtype("datetime64[ns]"), + DatetimeTZDtype(tz=tz), + ], + index=["dr", "dr_tz", "datetimes_naive", "datetimes_with_tz"], + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("pydt", [True, False]) + def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture, pydt): + # GH#25843, GH#41555, GH#33401 + tz = tz_aware_fixture + ts = Timestamp("2019", tz=tz) + if pydt: + ts = ts.to_pydatetime() + ts_naive = Timestamp("2019") + + with tm.assert_produces_warning(FutureWarning): + result = DataFrame({0: [ts]}, dtype="datetime64[ns]") + + expected = DataFrame({0: [ts_naive]}) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = DataFrame({0: ts}, index=[0], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = DataFrame([ts], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = DataFrame(np.array([ts], dtype=object), dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = DataFrame(ts, index=[0], columns=[0], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + df = DataFrame([Series([ts])], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + df = DataFrame([[ts]], columns=[0], dtype="datetime64[ns]") + tm.assert_equal(df, expected) + + def test_from_dict(self): + + # 8260 + # support datetime64 with tz + + idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") + dr = date_range("20130110", periods=3) + + # construction + df = DataFrame({"A": idx, "B": dr}) + assert df["A"].dtype, "M8[ns, US/Eastern" + assert df["A"].name == "A" + tm.assert_series_equal(df["A"], Series(idx, name="A")) + tm.assert_series_equal(df["B"], Series(dr, name="B")) + + def test_from_index(self): + + # from index + idx2 = date_range("20130101", periods=3, tz="US/Eastern", name="foo") + df2 = DataFrame(idx2) + tm.assert_series_equal(df2["foo"], Series(idx2, name="foo")) + df2 = DataFrame(Series(idx2)) + tm.assert_series_equal(df2["foo"], Series(idx2, name="foo")) + + idx2 = date_range("20130101", periods=3, tz="US/Eastern") + df2 = DataFrame(idx2) + tm.assert_series_equal(df2[0], Series(idx2, name=0)) + df2 = DataFrame(Series(idx2)) + tm.assert_series_equal(df2[0], Series(idx2, name=0)) + + def test_frame_dict_constructor_datetime64_1680(self): + dr = date_range("1/1/2012", periods=10) + s = Series(dr, index=dr) + + # it works! + DataFrame({"a": "foo", "b": s}, index=dr) + DataFrame({"a": "foo", "b": s.values}, index=dr) + + def test_frame_datetime64_mixed_index_ctor_1681(self): + dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") + ts = Series(dr) + + # it works! + d = DataFrame({"A": "foo", "B": ts}, index=dr) + assert d["B"].isna().all() + + def test_frame_timeseries_column(self): + # GH19157 + dr = date_range(start="20130101T10:00:00", periods=3, freq="T", tz="US/Eastern") + result = DataFrame(dr, columns=["timestamps"]) + expected = DataFrame( + { + "timestamps": [ + Timestamp("20130101T10:00:00", tz="US/Eastern"), + Timestamp("20130101T10:01:00", tz="US/Eastern"), + Timestamp("20130101T10:02:00", tz="US/Eastern"), + ] + } + ) + tm.assert_frame_equal(result, expected) + + def test_nested_dict_construction(self): + # GH22227 + columns = ["Nevada", "Ohio"] + pop = { + "Nevada": {2001: 2.4, 2002: 2.9}, + "Ohio": {2000: 1.5, 2001: 1.7, 2002: 3.6}, + } + result = DataFrame(pop, index=[2001, 2002, 2003], columns=columns) + expected = DataFrame( + [(2.4, 1.7), (2.9, 3.6), (np.nan, np.nan)], + columns=columns, + index=Index([2001, 2002, 2003]), + ) + tm.assert_frame_equal(result, expected) + + def test_from_tzaware_object_array(self): + # GH#26825 2D object array of tzaware timestamps should not raise + dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") + data = dti._data.astype(object).reshape(1, -1) + df = DataFrame(data) + assert df.shape == (1, 3) + assert (df.dtypes == dti.dtype).all() + assert (df == dti).all().all() + + def test_from_tzaware_mixed_object_array(self): + # GH#26825 + arr = np.array( + [ + [ + Timestamp("2013-01-01 00:00:00"), + Timestamp("2013-01-02 00:00:00"), + Timestamp("2013-01-03 00:00:00"), + ], + [ + Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"), + pd.NaT, + Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"), + ], + [ + Timestamp("2013-01-01 00:00:00+0100", tz="CET"), + pd.NaT, + Timestamp("2013-01-03 00:00:00+0100", tz="CET"), + ], + ], + dtype=object, + ).T + res = DataFrame(arr, columns=["A", "B", "C"]) + + expected_dtypes = [ + "datetime64[ns]", + "datetime64[ns, US/Eastern]", + "datetime64[ns, CET]", + ] + assert (res.dtypes == expected_dtypes).all() + + def test_from_2d_ndarray_with_dtype(self): + # GH#12513 + array_dim2 = np.arange(10).reshape((5, 2)) + df = DataFrame(array_dim2, dtype="datetime64[ns, UTC]") + + expected = DataFrame(array_dim2).astype("datetime64[ns, UTC]") + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("typ", [set, frozenset]) + def test_construction_from_set_raises(self, typ): + # https://github.com/pandas-dev/pandas/issues/32582 + values = typ({1, 2, 3}) + msg = f"'{typ.__name__}' type is unordered" + with pytest.raises(TypeError, match=msg): + DataFrame({"a": values}) + + with pytest.raises(TypeError, match=msg): + Series(values) + + def test_construction_from_ndarray_datetimelike(self): + # ensure the underlying arrays are properly wrapped as EA when + # constructed from 2D ndarray + arr = np.arange(0, 12, dtype="datetime64[ns]").reshape(4, 3) + df = DataFrame(arr) + assert all(isinstance(arr, DatetimeArray) for arr in df._mgr.arrays) + + def test_construction_from_ndarray_with_eadtype_mismatched_columns(self): + arr = np.random.randn(10, 2) + dtype = pd.array([2.0]).dtype + msg = r"len\(arrays\) must match len\(columns\)" + with pytest.raises(ValueError, match=msg): + DataFrame(arr, columns=["foo"], dtype=dtype) + + arr2 = pd.array([2.0, 3.0, 4.0]) + with pytest.raises(ValueError, match=msg): + DataFrame(arr2, columns=["foo", "bar"]) + + +def get1(obj): # TODO: make a helper in tm? + if isinstance(obj, Series): + return obj.iloc[0] + else: + return obj.iloc[0, 0] + + +class TestFromScalar: + @pytest.fixture(params=[list, dict, None]) + def constructor(self, request, frame_or_series): + box = request.param + + extra = {"index": range(2)} + if frame_or_series is DataFrame: + extra["columns"] = ["A"] + + if box is None: + return functools.partial(frame_or_series, **extra) + + elif box is dict: + if frame_or_series is Series: + return lambda x, **kwargs: frame_or_series( + {0: x, 1: x}, **extra, **kwargs + ) + else: + return lambda x, **kwargs: frame_or_series({"A": x}, **extra, **kwargs) + else: + if frame_or_series is Series: + return lambda x, **kwargs: frame_or_series([x, x], **extra, **kwargs) + else: + return lambda x, **kwargs: frame_or_series( + {"A": [x, x]}, **extra, **kwargs + ) + + @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) + def test_from_nat_scalar(self, dtype, constructor): + obj = constructor(pd.NaT, dtype=dtype) + assert np.all(obj.dtypes == dtype) + assert np.all(obj.isna()) + + def test_from_timedelta_scalar_preserves_nanos(self, constructor): + td = Timedelta(1) + + obj = constructor(td, dtype="m8[ns]") + assert get1(obj) == td + + def test_from_timestamp_scalar_preserves_nanos(self, constructor, fixed_now_ts): + ts = fixed_now_ts + Timedelta(1) + + obj = constructor(ts, dtype="M8[ns]") + assert get1(obj) == ts + + def test_from_timedelta64_scalar_object(self, constructor): + + td = Timedelta(1) + td64 = td.to_timedelta64() + + obj = constructor(td64, dtype=object) + assert isinstance(get1(obj), np.timedelta64) + + @pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64]) + def test_from_scalar_datetimelike_mismatched(self, constructor, cls): + scalar = cls("NaT", "ns") + dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls] + + msg = "Cannot cast" + if cls is np.datetime64: + msg = "|".join( + [ + r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]", + "Cannot cast", + ] + ) + + with pytest.raises(TypeError, match=msg): + constructor(scalar, dtype=dtype) + + scalar = cls(4, "ns") + with pytest.raises(TypeError, match=msg): + constructor(scalar, dtype=dtype) + + @pytest.mark.parametrize("cls", [datetime, np.datetime64]) + def test_from_out_of_bounds_datetime(self, constructor, cls): + scalar = datetime(9999, 1, 1) + if cls is np.datetime64: + scalar = np.datetime64(scalar, "D") + result = constructor(scalar) + + assert type(get1(result)) is cls + + @pytest.mark.parametrize("cls", [timedelta, np.timedelta64]) + def test_from_out_of_bounds_timedelta(self, constructor, cls): + scalar = datetime(9999, 1, 1) - datetime(1970, 1, 1) + if cls is np.timedelta64: + scalar = np.timedelta64(scalar, "D") + result = constructor(scalar) + + assert type(get1(result)) is cls + + def test_tzaware_data_tznaive_dtype(self, constructor): + tz = "US/Eastern" + ts = Timestamp("2019", tz=tz) + ts_naive = Timestamp("2019") + + with tm.assert_produces_warning(FutureWarning, match="Data is timezone-aware"): + result = constructor(ts, dtype="M8[ns]") + + assert np.all(result.dtypes == "M8[ns]") + assert np.all(result == ts_naive) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_cumulative.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_cumulative.py new file mode 100644 index 0000000..5bd9c42 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_cumulative.py @@ -0,0 +1,81 @@ +""" +Tests for DataFrame cumulative operations + +See also +-------- +tests.series.test_cumulative +""" + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestDataFrameCumulativeOps: + # --------------------------------------------------------------------- + # Cumulative Operations - cumsum, cummax, ... + + def test_cumulative_ops_smoke(self): + # it works + df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) + df.cummax() + df.cummin() + df.cumsum() + + dm = DataFrame(np.arange(20).reshape(4, 5), index=range(4), columns=range(5)) + # TODO(wesm): do something with this? + dm.cumsum() + + def test_cumprod_smoke(self, datetime_frame): + datetime_frame.iloc[5:10, 0] = np.nan + datetime_frame.iloc[10:15, 1] = np.nan + datetime_frame.iloc[15:, 2] = np.nan + + # ints + df = datetime_frame.fillna(0).astype(int) + df.cumprod(0) + df.cumprod(1) + + # ints32 + df = datetime_frame.fillna(0).astype(np.int32) + df.cumprod(0) + df.cumprod(1) + + @pytest.mark.parametrize("method", ["cumsum", "cumprod", "cummin", "cummax"]) + def test_cumulative_ops_match_series_apply(self, datetime_frame, method): + datetime_frame.iloc[5:10, 0] = np.nan + datetime_frame.iloc[10:15, 1] = np.nan + datetime_frame.iloc[15:, 2] = np.nan + + # axis = 0 + result = getattr(datetime_frame, method)() + expected = datetime_frame.apply(getattr(Series, method)) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = getattr(datetime_frame, method)(axis=1) + expected = datetime_frame.apply(getattr(Series, method), axis=1) + tm.assert_frame_equal(result, expected) + + # fix issue TODO: GH ref? + assert np.shape(result) == np.shape(datetime_frame) + + def test_cumsum_preserve_dtypes(self): + # GH#19296 dont incorrectly upcast to object + df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3.0], "C": [True, False, False]}) + + result = df.cumsum() + + expected = DataFrame( + { + "A": Series([1, 3, 6], dtype=np.int64), + "B": Series([1, 3, 6], dtype=np.float64), + "C": df["C"].cumsum(), + } + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_iteration.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_iteration.py new file mode 100644 index 0000000..c6e5aa6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_iteration.py @@ -0,0 +1,162 @@ +import datetime + +import numpy as np + +from pandas.compat import ( + IS64, + is_platform_windows, +) + +from pandas import ( + Categorical, + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +class TestIteration: + def test_keys(self, float_frame): + assert float_frame.keys() is float_frame.columns + + def test_iteritems(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) + for k, v in df.items(): + assert isinstance(v, DataFrame._constructor_sliced) + + def test_items(self): + # GH#17213, GH#13918 + cols = ["a", "b", "c"] + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols) + for c, (k, v) in zip(cols, df.items()): + assert c == k + assert isinstance(v, Series) + assert (df[k] == v).all() + + def test_items_names(self, float_string_frame): + for k, v in float_string_frame.items(): + assert v.name == k + + def test_iter(self, float_frame): + assert tm.equalContents(list(float_frame), float_frame.columns) + + def test_iterrows(self, float_frame, float_string_frame): + for k, v in float_frame.iterrows(): + exp = float_frame.loc[k] + tm.assert_series_equal(v, exp) + + for k, v in float_string_frame.iterrows(): + exp = float_string_frame.loc[k] + tm.assert_series_equal(v, exp) + + def test_iterrows_iso8601(self): + # GH#19671 + s = DataFrame( + { + "non_iso8601": ["M1701", "M1802", "M1903", "M2004"], + "iso8601": date_range("2000-01-01", periods=4, freq="M"), + } + ) + for k, v in s.iterrows(): + exp = s.loc[k] + tm.assert_series_equal(v, exp) + + def test_iterrows_corner(self): + # GH#12222 + df = DataFrame( + { + "a": [datetime.datetime(2015, 1, 1)], + "b": [None], + "c": [None], + "d": [""], + "e": [[]], + "f": [set()], + "g": [{}], + } + ) + expected = Series( + [datetime.datetime(2015, 1, 1), None, None, "", [], set(), {}], + index=list("abcdefg"), + name=0, + dtype="object", + ) + _, result = next(df.iterrows()) + tm.assert_series_equal(result, expected) + + def test_itertuples(self, float_frame): + for i, tup in enumerate(float_frame.itertuples()): + ser = DataFrame._constructor_sliced(tup[1:]) + ser.name = tup[0] + expected = float_frame.iloc[i, :].reset_index(drop=True) + tm.assert_series_equal(ser, expected) + + df = DataFrame( + {"floats": np.random.randn(5), "ints": range(5)}, columns=["floats", "ints"] + ) + + for tup in df.itertuples(index=False): + assert isinstance(tup[1], int) + + df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) + dfaa = df[["a", "a"]] + + assert list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)] + + # repr with int on 32-bit/windows + if not (is_platform_windows() or not IS64): + assert ( + repr(list(df.itertuples(name=None))) + == "[(0, 1, 4), (1, 2, 5), (2, 3, 6)]" + ) + + tup = next(df.itertuples(name="TestName")) + assert tup._fields == ("Index", "a", "b") + assert (tup.Index, tup.a, tup.b) == tup + assert type(tup).__name__ == "TestName" + + df.columns = ["def", "return"] + tup2 = next(df.itertuples(name="TestName")) + assert tup2 == (0, 1, 4) + assert tup2._fields == ("Index", "_1", "_2") + + df3 = DataFrame({"f" + str(i): [i] for i in range(1024)}) + # will raise SyntaxError if trying to create namedtuple + tup3 = next(df3.itertuples()) + assert isinstance(tup3, tuple) + assert hasattr(tup3, "_fields") + + # GH#28282 + df_254_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(254)}]) + result_254_columns = next(df_254_columns.itertuples(index=False)) + assert isinstance(result_254_columns, tuple) + assert hasattr(result_254_columns, "_fields") + + df_255_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(255)}]) + result_255_columns = next(df_255_columns.itertuples(index=False)) + assert isinstance(result_255_columns, tuple) + assert hasattr(result_255_columns, "_fields") + + def test_sequence_like_with_categorical(self): + + # GH#7839 + # make sure can iterate + df = DataFrame( + {"id": [1, 2, 3, 4, 5, 6], "raw_grade": ["a", "b", "b", "a", "a", "e"]} + ) + df["grade"] = Categorical(df["raw_grade"]) + + # basic sequencing testing + result = list(df.grade.values) + expected = np.array(df.grade.values).tolist() + tm.assert_almost_equal(result, expected) + + # iteration + for t in df.itertuples(index=False): + str(t) + + for row, s in df.iterrows(): + str(s) + + for c, col in df.items(): + str(s) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_logical_ops.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_logical_ops.py new file mode 100644 index 0000000..f509ae5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_logical_ops.py @@ -0,0 +1,191 @@ +import operator +import re + +import numpy as np +import pytest + +from pandas import ( + CategoricalIndex, + DataFrame, + Interval, + Series, + isnull, +) +import pandas._testing as tm + + +class TestDataFrameLogicalOperators: + # &, |, ^ + + @pytest.mark.parametrize( + "left, right, op, expected", + [ + ( + [True, False, np.nan], + [True, False, True], + operator.and_, + [True, False, False], + ), + ( + [True, False, True], + [True, False, np.nan], + operator.and_, + [True, False, False], + ), + ( + [True, False, np.nan], + [True, False, True], + operator.or_, + [True, False, False], + ), + ( + [True, False, True], + [True, False, np.nan], + operator.or_, + [True, False, True], + ), + ], + ) + def test_logical_operators_nans(self, left, right, op, expected, frame_or_series): + # GH#13896 + result = op(frame_or_series(left), frame_or_series(right)) + expected = frame_or_series(expected) + + tm.assert_equal(result, expected) + + def test_logical_ops_empty_frame(self): + # GH#5808 + # empty frames, non-mixed dtype + df = DataFrame(index=[1]) + + result = df & df + tm.assert_frame_equal(result, df) + + result = df | df + tm.assert_frame_equal(result, df) + + df2 = DataFrame(index=[1, 2]) + result = df & df2 + tm.assert_frame_equal(result, df2) + + dfa = DataFrame(index=[1], columns=["A"]) + + result = dfa & dfa + expected = DataFrame(False, index=[1], columns=["A"]) + tm.assert_frame_equal(result, expected) + + def test_logical_ops_bool_frame(self): + # GH#5808 + df1a_bool = DataFrame(True, index=[1], columns=["A"]) + + result = df1a_bool & df1a_bool + tm.assert_frame_equal(result, df1a_bool) + + result = df1a_bool | df1a_bool + tm.assert_frame_equal(result, df1a_bool) + + def test_logical_ops_int_frame(self): + # GH#5808 + df1a_int = DataFrame(1, index=[1], columns=["A"]) + df1a_bool = DataFrame(True, index=[1], columns=["A"]) + + result = df1a_int | df1a_bool + tm.assert_frame_equal(result, df1a_bool) + + # Check that this matches Series behavior + res_ser = df1a_int["A"] | df1a_bool["A"] + tm.assert_series_equal(res_ser, df1a_bool["A"]) + + def test_logical_ops_invalid(self): + # GH#5808 + + df1 = DataFrame(1.0, index=[1], columns=["A"]) + df2 = DataFrame(True, index=[1], columns=["A"]) + msg = re.escape("unsupported operand type(s) for |: 'float' and 'bool'") + with pytest.raises(TypeError, match=msg): + df1 | df2 + + df1 = DataFrame("foo", index=[1], columns=["A"]) + df2 = DataFrame(True, index=[1], columns=["A"]) + msg = re.escape("unsupported operand type(s) for |: 'str' and 'bool'") + with pytest.raises(TypeError, match=msg): + df1 | df2 + + def test_logical_operators(self): + def _check_bin_op(op): + result = op(df1, df2) + expected = DataFrame( + op(df1.values, df2.values), index=df1.index, columns=df1.columns + ) + assert result.values.dtype == np.bool_ + tm.assert_frame_equal(result, expected) + + def _check_unary_op(op): + result = op(df1) + expected = DataFrame(op(df1.values), index=df1.index, columns=df1.columns) + assert result.values.dtype == np.bool_ + tm.assert_frame_equal(result, expected) + + df1 = { + "a": {"a": True, "b": False, "c": False, "d": True, "e": True}, + "b": {"a": False, "b": True, "c": False, "d": False, "e": False}, + "c": {"a": False, "b": False, "c": True, "d": False, "e": False}, + "d": {"a": True, "b": False, "c": False, "d": True, "e": True}, + "e": {"a": True, "b": False, "c": False, "d": True, "e": True}, + } + + df2 = { + "a": {"a": True, "b": False, "c": True, "d": False, "e": False}, + "b": {"a": False, "b": True, "c": False, "d": False, "e": False}, + "c": {"a": True, "b": False, "c": True, "d": False, "e": False}, + "d": {"a": False, "b": False, "c": False, "d": True, "e": False}, + "e": {"a": False, "b": False, "c": False, "d": False, "e": True}, + } + + df1 = DataFrame(df1) + df2 = DataFrame(df2) + + _check_bin_op(operator.and_) + _check_bin_op(operator.or_) + _check_bin_op(operator.xor) + + _check_unary_op(operator.inv) # TODO: belongs elsewhere + + def test_logical_with_nas(self): + d = DataFrame({"a": [np.nan, False], "b": [True, True]}) + + # GH4947 + # bool comparisons should return bool + result = d["a"] | d["b"] + expected = Series([False, True]) + tm.assert_series_equal(result, expected) + + # GH4604, automatic casting here + result = d["a"].fillna(False) | d["b"] + expected = Series([True, True]) + tm.assert_series_equal(result, expected) + + result = d["a"].fillna(False, downcast=False) | d["b"] + expected = Series([True, True]) + tm.assert_series_equal(result, expected) + + def test_logical_ops_categorical_columns(self): + # GH#38367 + intervals = [Interval(1, 2), Interval(3, 4)] + data = DataFrame( + [[1, np.nan], [2, np.nan]], + columns=CategoricalIndex( + intervals, categories=intervals + [Interval(5, 6)] + ), + ) + mask = DataFrame( + [[False, False], [False, False]], columns=data.columns, dtype=bool + ) + result = mask | isnull(data) + expected = DataFrame( + [[False, True], [False, True]], + columns=CategoricalIndex( + intervals, categories=intervals + [Interval(5, 6)] + ), + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_nonunique_indexes.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_nonunique_indexes.py new file mode 100644 index 0000000..d010426 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_nonunique_indexes.py @@ -0,0 +1,335 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +def check(result, expected=None): + if expected is not None: + tm.assert_frame_equal(result, expected) + result.dtypes + str(result) + + +class TestDataFrameNonuniqueIndexes: + def test_setattr_columns_vs_construct_with_columns(self): + + # assignment + # GH 3687 + arr = np.random.randn(3, 2) + idx = list(range(2)) + df = DataFrame(arr, columns=["A", "A"]) + df.columns = idx + expected = DataFrame(arr, columns=idx) + check(df, expected) + + def test_setattr_columns_vs_construct_with_columns_datetimeindx(self): + idx = date_range("20130101", periods=4, freq="Q-NOV") + df = DataFrame( + [[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], columns=["a", "a", "a", "a"] + ) + df.columns = idx + expected = DataFrame([[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], columns=idx) + check(df, expected) + + def test_insert_with_duplicate_columns(self): + # insert + df = DataFrame( + [[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], + columns=["foo", "bar", "foo", "hello"], + ) + df["string"] = "bah" + expected = DataFrame( + [[1, 1, 1, 5, "bah"], [1, 1, 2, 5, "bah"], [2, 1, 3, 5, "bah"]], + columns=["foo", "bar", "foo", "hello", "string"], + ) + check(df, expected) + with pytest.raises(ValueError, match="Length of value"): + df.insert(0, "AnotherColumn", range(len(df.index) - 1)) + + # insert same dtype + df["foo2"] = 3 + expected = DataFrame( + [[1, 1, 1, 5, "bah", 3], [1, 1, 2, 5, "bah", 3], [2, 1, 3, 5, "bah", 3]], + columns=["foo", "bar", "foo", "hello", "string", "foo2"], + ) + check(df, expected) + + # set (non-dup) + df["foo2"] = 4 + expected = DataFrame( + [[1, 1, 1, 5, "bah", 4], [1, 1, 2, 5, "bah", 4], [2, 1, 3, 5, "bah", 4]], + columns=["foo", "bar", "foo", "hello", "string", "foo2"], + ) + check(df, expected) + df["foo2"] = 3 + + # delete (non dup) + del df["bar"] + expected = DataFrame( + [[1, 1, 5, "bah", 3], [1, 2, 5, "bah", 3], [2, 3, 5, "bah", 3]], + columns=["foo", "foo", "hello", "string", "foo2"], + ) + check(df, expected) + + # try to delete again (its not consolidated) + del df["hello"] + expected = DataFrame( + [[1, 1, "bah", 3], [1, 2, "bah", 3], [2, 3, "bah", 3]], + columns=["foo", "foo", "string", "foo2"], + ) + check(df, expected) + + # consolidate + df = df._consolidate() + expected = DataFrame( + [[1, 1, "bah", 3], [1, 2, "bah", 3], [2, 3, "bah", 3]], + columns=["foo", "foo", "string", "foo2"], + ) + check(df, expected) + + # insert + df.insert(2, "new_col", 5.0) + expected = DataFrame( + [[1, 1, 5.0, "bah", 3], [1, 2, 5.0, "bah", 3], [2, 3, 5.0, "bah", 3]], + columns=["foo", "foo", "new_col", "string", "foo2"], + ) + check(df, expected) + + # insert a dup + with pytest.raises(ValueError, match="cannot insert"): + df.insert(2, "new_col", 4.0) + + df.insert(2, "new_col", 4.0, allow_duplicates=True) + expected = DataFrame( + [ + [1, 1, 4.0, 5.0, "bah", 3], + [1, 2, 4.0, 5.0, "bah", 3], + [2, 3, 4.0, 5.0, "bah", 3], + ], + columns=["foo", "foo", "new_col", "new_col", "string", "foo2"], + ) + check(df, expected) + + # delete (dup) + del df["foo"] + expected = DataFrame( + [[4.0, 5.0, "bah", 3], [4.0, 5.0, "bah", 3], [4.0, 5.0, "bah", 3]], + columns=["new_col", "new_col", "string", "foo2"], + ) + tm.assert_frame_equal(df, expected) + + def test_dup_across_dtypes(self): + # dup across dtypes + df = DataFrame( + [[1, 1, 1.0, 5], [1, 1, 2.0, 5], [2, 1, 3.0, 5]], + columns=["foo", "bar", "foo", "hello"], + ) + check(df) + + df["foo2"] = 7.0 + expected = DataFrame( + [[1, 1, 1.0, 5, 7.0], [1, 1, 2.0, 5, 7.0], [2, 1, 3.0, 5, 7.0]], + columns=["foo", "bar", "foo", "hello", "foo2"], + ) + check(df, expected) + + result = df["foo"] + expected = DataFrame([[1, 1.0], [1, 2.0], [2, 3.0]], columns=["foo", "foo"]) + check(result, expected) + + # multiple replacements + df["foo"] = "string" + expected = DataFrame( + [ + ["string", 1, "string", 5, 7.0], + ["string", 1, "string", 5, 7.0], + ["string", 1, "string", 5, 7.0], + ], + columns=["foo", "bar", "foo", "hello", "foo2"], + ) + check(df, expected) + + del df["foo"] + expected = DataFrame( + [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "hello", "foo2"] + ) + check(df, expected) + + def test_column_dups_indexes(self): + # check column dups with index equal and not equal to df's index + df = DataFrame( + np.random.randn(5, 3), + index=["a", "b", "c", "d", "e"], + columns=["A", "B", "A"], + ) + for index in [df.index, pd.Index(list("edcba"))]: + this_df = df.copy() + expected_ser = Series(index.values, index=this_df.index) + expected_df = DataFrame( + {"A": expected_ser, "B": this_df["B"], "A": expected_ser}, + columns=["A", "B", "A"], + ) + this_df["A"] = index + check(this_df, expected_df) + + def test_changing_dtypes_with_duplicate_columns(self): + # multiple assignments that change dtypes + # the location indexer is a slice + # GH 6120 + df = DataFrame(np.random.randn(5, 2), columns=["that", "that"]) + expected = DataFrame(1.0, index=range(5), columns=["that", "that"]) + + df["that"] = 1.0 + check(df, expected) + + df = DataFrame(np.random.rand(5, 2), columns=["that", "that"]) + expected = DataFrame(1, index=range(5), columns=["that", "that"]) + + df["that"] = 1 + check(df, expected) + + def test_dup_columns_comparisons(self): + # equality + df1 = DataFrame([[1, 2], [2, np.nan], [3, 4], [4, 4]], columns=["A", "B"]) + df2 = DataFrame([[0, 1], [2, 4], [2, np.nan], [4, 5]], columns=["A", "A"]) + + # not-comparing like-labelled + msg = "Can only compare identically-labeled DataFrame objects" + with pytest.raises(ValueError, match=msg): + df1 == df2 + + df1r = df1.reindex_like(df2) + result = df1r == df2 + expected = DataFrame( + [[False, True], [True, False], [False, False], [True, False]], + columns=["A", "A"], + ) + tm.assert_frame_equal(result, expected) + + def test_mixed_column_selection(self): + # mixed column selection + # GH 5639 + dfbool = DataFrame( + { + "one": Series([True, True, False], index=["a", "b", "c"]), + "two": Series([False, False, True, False], index=["a", "b", "c", "d"]), + "three": Series([False, True, True, True], index=["a", "b", "c", "d"]), + } + ) + expected = pd.concat([dfbool["one"], dfbool["three"], dfbool["one"]], axis=1) + result = dfbool[["one", "three", "one"]] + check(result, expected) + + def test_multi_axis_dups(self): + # multi-axis dups + # GH 6121 + df = DataFrame( + np.arange(25.0).reshape(5, 5), + index=["a", "b", "c", "d", "e"], + columns=["A", "B", "C", "D", "E"], + ) + z = df[["A", "C", "A"]].copy() + expected = z.loc[["a", "c", "a"]] + + df = DataFrame( + np.arange(25.0).reshape(5, 5), + index=["a", "b", "c", "d", "e"], + columns=["A", "B", "C", "D", "E"], + ) + z = df[["A", "C", "A"]] + result = z.loc[["a", "c", "a"]] + check(result, expected) + + def test_columns_with_dups(self): + # GH 3468 related + + # basic + df = DataFrame([[1, 2]], columns=["a", "a"]) + df.columns = ["a", "a.1"] + str(df) + expected = DataFrame([[1, 2]], columns=["a", "a.1"]) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, 3]], columns=["b", "a", "a"]) + df.columns = ["b", "a", "a.1"] + str(df) + expected = DataFrame([[1, 2, 3]], columns=["b", "a", "a.1"]) + tm.assert_frame_equal(df, expected) + + def test_columns_with_dup_index(self): + # with a dup index + df = DataFrame([[1, 2]], columns=["a", "a"]) + df.columns = ["b", "b"] + str(df) + expected = DataFrame([[1, 2]], columns=["b", "b"]) + tm.assert_frame_equal(df, expected) + + def test_multi_dtype(self): + # multi-dtype + df = DataFrame( + [[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], + columns=["a", "a", "b", "b", "d", "c", "c"], + ) + df.columns = list("ABCDEFG") + str(df) + expected = DataFrame( + [[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + def test_multi_dtype2(self): + df = DataFrame([[1, 2, "foo", "bar"]], columns=["a", "a", "a", "a"]) + df.columns = ["a", "a.1", "a.2", "a.3"] + str(df) + expected = DataFrame([[1, 2, "foo", "bar"]], columns=["a", "a.1", "a.2", "a.3"]) + tm.assert_frame_equal(df, expected) + + def test_dups_across_blocks(self, using_array_manager): + # dups across blocks + df_float = DataFrame(np.random.randn(10, 3), dtype="float64") + df_int = DataFrame(np.random.randn(10, 3).astype("int64")) + df_bool = DataFrame(True, index=df_float.index, columns=df_float.columns) + df_object = DataFrame("foo", index=df_float.index, columns=df_float.columns) + df_dt = DataFrame( + pd.Timestamp("20010101"), index=df_float.index, columns=df_float.columns + ) + df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1) + + if not using_array_manager: + assert len(df._mgr.blknos) == len(df.columns) + assert len(df._mgr.blklocs) == len(df.columns) + + # testing iloc + for i in range(len(df.columns)): + df.iloc[:, i] + + def test_dup_columns_across_dtype(self): + # dup columns across dtype GH 2079/2194 + vals = [[1, -1, 2.0], [2, -2, 3.0]] + rs = DataFrame(vals, columns=["A", "A", "B"]) + xp = DataFrame(vals) + xp.columns = ["A", "A", "B"] + tm.assert_frame_equal(rs, xp) + + def test_set_value_by_index(self): + # See gh-12344 + df = DataFrame(np.arange(9).reshape(3, 3).T) + df.columns = list("AAA") + expected = df.iloc[:, 2] + + df.iloc[:, 0] = 3 + tm.assert_series_equal(df.iloc[:, 2], expected) + + df = DataFrame(np.arange(9).reshape(3, 3).T) + df.columns = [2, float(2), str(2)] + expected = df.iloc[:, 1] + + df.iloc[:, 0] = 3 + tm.assert_series_equal(df.iloc[:, 1], expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_npfuncs.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_npfuncs.py new file mode 100644 index 0000000..0b7699e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_npfuncs.py @@ -0,0 +1,28 @@ +""" +Tests for np.foo applied to DataFrame, not necessarily ufuncs. +""" +import numpy as np + +from pandas import ( + Categorical, + DataFrame, +) +import pandas._testing as tm + + +class TestAsArray: + def test_asarray_homogenous(self): + df = DataFrame({"A": Categorical([1, 2]), "B": Categorical([1, 2])}) + result = np.asarray(df) + # may change from object in the future + expected = np.array([[1, 1], [2, 2]], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + def test_np_sqrt(self, float_frame): + with np.errstate(all="ignore"): + result = np.sqrt(float_frame) + assert isinstance(result, type(float_frame)) + assert result.index is float_frame.index + assert result.columns is float_frame.columns + + tm.assert_frame_equal(result, float_frame.apply(np.sqrt)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_query_eval.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_query_eval.py new file mode 100644 index 0000000..558ba04 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_query_eval.py @@ -0,0 +1,1277 @@ +import operator + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + date_range, +) +import pandas._testing as tm +from pandas.core.computation.check import NUMEXPR_INSTALLED + +PARSERS = "python", "pandas" +ENGINES = "python", pytest.param("numexpr", marks=td.skip_if_no_ne) + + +@pytest.fixture(params=PARSERS, ids=lambda x: x) +def parser(request): + return request.param + + +@pytest.fixture(params=ENGINES, ids=lambda x: x) +def engine(request): + return request.param + + +def skip_if_no_pandas_parser(parser): + if parser != "pandas": + pytest.skip(f"cannot evaluate with parser {repr(parser)}") + + +class TestCompat: + def setup_method(self, method): + self.df = DataFrame({"A": [1, 2, 3]}) + self.expected1 = self.df[self.df.A > 0] + self.expected2 = self.df.A + 1 + + def test_query_default(self): + + # GH 12749 + # this should always work, whether NUMEXPR_INSTALLED or not + df = self.df + result = df.query("A>0") + tm.assert_frame_equal(result, self.expected1) + result = df.eval("A+1") + tm.assert_series_equal(result, self.expected2, check_names=False) + + def test_query_None(self): + + df = self.df + result = df.query("A>0", engine=None) + tm.assert_frame_equal(result, self.expected1) + result = df.eval("A+1", engine=None) + tm.assert_series_equal(result, self.expected2, check_names=False) + + def test_query_python(self): + + df = self.df + result = df.query("A>0", engine="python") + tm.assert_frame_equal(result, self.expected1) + result = df.eval("A+1", engine="python") + tm.assert_series_equal(result, self.expected2, check_names=False) + + def test_query_numexpr(self): + + df = self.df + if NUMEXPR_INSTALLED: + result = df.query("A>0", engine="numexpr") + tm.assert_frame_equal(result, self.expected1) + result = df.eval("A+1", engine="numexpr") + tm.assert_series_equal(result, self.expected2, check_names=False) + else: + msg = ( + r"'numexpr' is not installed or an unsupported version. " + r"Cannot use engine='numexpr' for query/eval if 'numexpr' is " + r"not installed" + ) + with pytest.raises(ImportError, match=msg): + df.query("A>0", engine="numexpr") + with pytest.raises(ImportError, match=msg): + df.eval("A+1", engine="numexpr") + + +class TestDataFrameEval: + + # smaller hits python, larger hits numexpr + @pytest.mark.parametrize("n", [4, 4000]) + @pytest.mark.parametrize( + "op_str,op,rop", + [ + ("+", "__add__", "__radd__"), + ("-", "__sub__", "__rsub__"), + ("*", "__mul__", "__rmul__"), + ("/", "__truediv__", "__rtruediv__"), + ], + ) + def test_ops(self, op_str, op, rop, n): + + # tst ops and reversed ops in evaluation + # GH7198 + + df = DataFrame(1, index=range(n), columns=list("abcd")) + df.iloc[0] = 2 + m = df.mean() + + base = DataFrame( # noqa:F841 + np.tile(m.values, n).reshape(n, -1), columns=list("abcd") + ) + + expected = eval(f"base {op_str} df") + + # ops as strings + result = eval(f"m {op_str} df") + tm.assert_frame_equal(result, expected) + + # these are commutative + if op in ["+", "*"]: + result = getattr(df, op)(m) + tm.assert_frame_equal(result, expected) + + # these are not + elif op in ["-", "/"]: + result = getattr(df, rop)(m) + tm.assert_frame_equal(result, expected) + + def test_dataframe_sub_numexpr_path(self): + # GH7192: Note we need a large number of rows to ensure this + # goes through the numexpr path + df = DataFrame({"A": np.random.randn(25000)}) + df.iloc[0:5] = np.nan + expected = 1 - np.isnan(df.iloc[0:25]) + result = (1 - np.isnan(df)).iloc[0:25] + tm.assert_frame_equal(result, expected) + + def test_query_non_str(self): + # GH 11485 + df = DataFrame({"A": [1, 2, 3], "B": ["a", "b", "b"]}) + + msg = "expr must be a string to be evaluated" + with pytest.raises(ValueError, match=msg): + df.query(lambda x: x.B == "b") + + with pytest.raises(ValueError, match=msg): + df.query(111) + + def test_query_empty_string(self): + # GH 13139 + df = DataFrame({"A": [1, 2, 3]}) + + msg = "expr cannot be an empty string" + with pytest.raises(ValueError, match=msg): + df.query("") + + def test_eval_resolvers_as_list(self): + # GH 14095 + df = DataFrame(np.random.randn(10, 2), columns=list("ab")) + dict1 = {"a": 1} + dict2 = {"b": 2} + assert df.eval("a + b", resolvers=[dict1, dict2]) == dict1["a"] + dict2["b"] + assert pd.eval("a + b", resolvers=[dict1, dict2]) == dict1["a"] + dict2["b"] + + def test_eval_resolvers_combined(self): + # GH 34966 + df = DataFrame(np.random.randn(10, 2), columns=list("ab")) + dict1 = {"c": 2} + + # Both input and default index/column resolvers should be usable + result = df.eval("a + b * c", resolvers=[dict1]) + + expected = df["a"] + df["b"] * dict1["c"] + tm.assert_series_equal(result, expected) + + def test_eval_object_dtype_binop(self): + # GH#24883 + df = DataFrame({"a1": ["Y", "N"]}) + res = df.eval("c = ((a1 == 'Y') & True)") + expected = DataFrame({"a1": ["Y", "N"], "c": [True, False]}) + tm.assert_frame_equal(res, expected) + + +class TestDataFrameQueryWithMultiIndex: + def test_query_with_named_multiindex(self, parser, engine): + skip_if_no_pandas_parser(parser) + a = np.random.choice(["red", "green"], size=10) + b = np.random.choice(["eggs", "ham"], size=10) + index = MultiIndex.from_arrays([a, b], names=["color", "food"]) + df = DataFrame(np.random.randn(10, 2), index=index) + ind = Series( + df.index.get_level_values("color").values, index=index, name="color" + ) + + # equality + res1 = df.query('color == "red"', parser=parser, engine=engine) + res2 = df.query('"red" == color', parser=parser, engine=engine) + exp = df[ind == "red"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # inequality + res1 = df.query('color != "red"', parser=parser, engine=engine) + res2 = df.query('"red" != color', parser=parser, engine=engine) + exp = df[ind != "red"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # list equality (really just set membership) + res1 = df.query('color == ["red"]', parser=parser, engine=engine) + res2 = df.query('["red"] == color', parser=parser, engine=engine) + exp = df[ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('color != ["red"]', parser=parser, engine=engine) + res2 = df.query('["red"] != color', parser=parser, engine=engine) + exp = df[~ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # in/not in ops + res1 = df.query('["red"] in color', parser=parser, engine=engine) + res2 = df.query('"red" in color', parser=parser, engine=engine) + exp = df[ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('["red"] not in color', parser=parser, engine=engine) + res2 = df.query('"red" not in color', parser=parser, engine=engine) + exp = df[~ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + def test_query_with_unnamed_multiindex(self, parser, engine): + skip_if_no_pandas_parser(parser) + a = np.random.choice(["red", "green"], size=10) + b = np.random.choice(["eggs", "ham"], size=10) + index = MultiIndex.from_arrays([a, b]) + df = DataFrame(np.random.randn(10, 2), index=index) + ind = Series(df.index.get_level_values(0).values, index=index) + + res1 = df.query('ilevel_0 == "red"', parser=parser, engine=engine) + res2 = df.query('"red" == ilevel_0', parser=parser, engine=engine) + exp = df[ind == "red"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # inequality + res1 = df.query('ilevel_0 != "red"', parser=parser, engine=engine) + res2 = df.query('"red" != ilevel_0', parser=parser, engine=engine) + exp = df[ind != "red"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # list equality (really just set membership) + res1 = df.query('ilevel_0 == ["red"]', parser=parser, engine=engine) + res2 = df.query('["red"] == ilevel_0', parser=parser, engine=engine) + exp = df[ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('ilevel_0 != ["red"]', parser=parser, engine=engine) + res2 = df.query('["red"] != ilevel_0', parser=parser, engine=engine) + exp = df[~ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # in/not in ops + res1 = df.query('["red"] in ilevel_0', parser=parser, engine=engine) + res2 = df.query('"red" in ilevel_0', parser=parser, engine=engine) + exp = df[ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('["red"] not in ilevel_0', parser=parser, engine=engine) + res2 = df.query('"red" not in ilevel_0', parser=parser, engine=engine) + exp = df[~ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # ## LEVEL 1 + ind = Series(df.index.get_level_values(1).values, index=index) + res1 = df.query('ilevel_1 == "eggs"', parser=parser, engine=engine) + res2 = df.query('"eggs" == ilevel_1', parser=parser, engine=engine) + exp = df[ind == "eggs"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # inequality + res1 = df.query('ilevel_1 != "eggs"', parser=parser, engine=engine) + res2 = df.query('"eggs" != ilevel_1', parser=parser, engine=engine) + exp = df[ind != "eggs"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # list equality (really just set membership) + res1 = df.query('ilevel_1 == ["eggs"]', parser=parser, engine=engine) + res2 = df.query('["eggs"] == ilevel_1', parser=parser, engine=engine) + exp = df[ind.isin(["eggs"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('ilevel_1 != ["eggs"]', parser=parser, engine=engine) + res2 = df.query('["eggs"] != ilevel_1', parser=parser, engine=engine) + exp = df[~ind.isin(["eggs"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # in/not in ops + res1 = df.query('["eggs"] in ilevel_1', parser=parser, engine=engine) + res2 = df.query('"eggs" in ilevel_1', parser=parser, engine=engine) + exp = df[ind.isin(["eggs"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('["eggs"] not in ilevel_1', parser=parser, engine=engine) + res2 = df.query('"eggs" not in ilevel_1', parser=parser, engine=engine) + exp = df[~ind.isin(["eggs"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + def test_query_with_partially_named_multiindex(self, parser, engine): + skip_if_no_pandas_parser(parser) + a = np.random.choice(["red", "green"], size=10) + b = np.arange(10) + index = MultiIndex.from_arrays([a, b]) + index.names = [None, "rating"] + df = DataFrame(np.random.randn(10, 2), index=index) + res = df.query("rating == 1", parser=parser, engine=engine) + ind = Series( + df.index.get_level_values("rating").values, index=index, name="rating" + ) + exp = df[ind == 1] + tm.assert_frame_equal(res, exp) + + res = df.query("rating != 1", parser=parser, engine=engine) + ind = Series( + df.index.get_level_values("rating").values, index=index, name="rating" + ) + exp = df[ind != 1] + tm.assert_frame_equal(res, exp) + + res = df.query('ilevel_0 == "red"', parser=parser, engine=engine) + ind = Series(df.index.get_level_values(0).values, index=index) + exp = df[ind == "red"] + tm.assert_frame_equal(res, exp) + + res = df.query('ilevel_0 != "red"', parser=parser, engine=engine) + ind = Series(df.index.get_level_values(0).values, index=index) + exp = df[ind != "red"] + tm.assert_frame_equal(res, exp) + + def test_query_multiindex_get_index_resolvers(self): + df = tm.makeCustomDataframe( + 10, 3, r_idx_nlevels=2, r_idx_names=["spam", "eggs"] + ) + resolvers = df._get_index_resolvers() + + def to_series(mi, level): + level_values = mi.get_level_values(level) + s = level_values.to_series() + s.index = mi + return s + + col_series = df.columns.to_series() + expected = { + "index": df.index, + "columns": col_series, + "spam": to_series(df.index, "spam"), + "eggs": to_series(df.index, "eggs"), + "C0": col_series, + } + for k, v in resolvers.items(): + if isinstance(v, Index): + assert v.is_(expected[k]) + elif isinstance(v, Series): + tm.assert_series_equal(v, expected[k]) + else: + raise AssertionError("object must be a Series or Index") + + +@td.skip_if_no_ne +class TestDataFrameQueryNumExprPandas: + @classmethod + def setup_class(cls): + cls.engine = "numexpr" + cls.parser = "pandas" + + @classmethod + def teardown_class(cls): + del cls.engine, cls.parser + + def test_date_query_with_attribute_access(self): + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + df = DataFrame(np.random.randn(5, 3)) + df["dates1"] = date_range("1/1/2012", periods=5) + df["dates2"] = date_range("1/1/2013", periods=5) + df["dates3"] = date_range("1/1/2014", periods=5) + res = df.query( + "@df.dates1 < 20130101 < @df.dates3", engine=engine, parser=parser + ) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_query_no_attribute_access(self): + engine, parser = self.engine, self.parser + df = DataFrame(np.random.randn(5, 3)) + df["dates1"] = date_range("1/1/2012", periods=5) + df["dates2"] = date_range("1/1/2013", periods=5) + df["dates3"] = date_range("1/1/2014", periods=5) + res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_query_with_NaT(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates2"] = date_range("1/1/2013", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT + df.loc[np.random.rand(n) > 0.5, "dates3"] = pd.NaT + res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + return_value = df.set_index("dates1", inplace=True, drop=True) + assert return_value is None + res = df.query("index < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query_with_NaT(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.iloc[0, 0] = pd.NaT + return_value = df.set_index("dates1", inplace=True, drop=True) + assert return_value is None + res = df.query("index < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query_with_NaT_duplicates(self): + engine, parser = self.engine, self.parser + n = 10 + d = {} + d["dates1"] = date_range("1/1/2012", periods=n) + d["dates3"] = date_range("1/1/2014", periods=n) + df = DataFrame(d) + df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT + return_value = df.set_index("dates1", inplace=True, drop=True) + assert return_value is None + res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.index.to_series() < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_query_with_non_date(self): + engine, parser = self.engine, self.parser + + n = 10 + df = DataFrame( + {"dates": date_range("1/1/2012", periods=n), "nondate": np.arange(n)} + ) + + result = df.query("dates == nondate", parser=parser, engine=engine) + assert len(result) == 0 + + result = df.query("dates != nondate", parser=parser, engine=engine) + tm.assert_frame_equal(result, df) + + msg = r"Invalid comparison between dtype=datetime64\[ns\] and ndarray" + for op in ["<", ">", "<=", ">="]: + with pytest.raises(TypeError, match=msg): + df.query(f"dates {op} nondate", parser=parser, engine=engine) + + def test_query_syntax_error(self): + engine, parser = self.engine, self.parser + df = DataFrame({"i": range(10), "+": range(3, 13), "r": range(4, 14)}) + msg = "invalid syntax" + with pytest.raises(SyntaxError, match=msg): + df.query("i - +", engine=engine, parser=parser) + + def test_query_scope(self): + from pandas.core.computation.ops import UndefinedVariableError + + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + + df = DataFrame(np.random.randn(20, 2), columns=list("ab")) + + a, b = 1, 2 # noqa:F841 + res = df.query("a > b", engine=engine, parser=parser) + expected = df[df.a > df.b] + tm.assert_frame_equal(res, expected) + + res = df.query("@a > b", engine=engine, parser=parser) + expected = df[a > df.b] + tm.assert_frame_equal(res, expected) + + # no local variable c + with pytest.raises( + UndefinedVariableError, match="local variable 'c' is not defined" + ): + df.query("@a > b > @c", engine=engine, parser=parser) + + # no column named 'c' + with pytest.raises(UndefinedVariableError, match="name 'c' is not defined"): + df.query("@a > b > c", engine=engine, parser=parser) + + def test_query_doesnt_pickup_local(self): + from pandas.core.computation.ops import UndefinedVariableError + + engine, parser = self.engine, self.parser + n = m = 10 + df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + + # we don't pick up the local 'sin' + with pytest.raises(UndefinedVariableError, match="name 'sin' is not defined"): + df.query("sin > 5", engine=engine, parser=parser) + + def test_query_builtin(self): + from pandas.core.computation.engines import NumExprClobberingError + + engine, parser = self.engine, self.parser + + n = m = 10 + df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + + df.index.name = "sin" + msg = "Variables in expression.+" + with pytest.raises(NumExprClobberingError, match=msg): + df.query("sin > 5", engine=engine, parser=parser) + + def test_query(self): + engine, parser = self.engine, self.parser + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + + tm.assert_frame_equal( + df.query("a < b", engine=engine, parser=parser), df[df.a < df.b] + ) + tm.assert_frame_equal( + df.query("a + b > b * c", engine=engine, parser=parser), + df[df.a + df.b > df.b * df.c], + ) + + def test_query_index_with_name(self): + engine, parser = self.engine, self.parser + df = DataFrame( + np.random.randint(10, size=(10, 3)), + index=Index(range(10), name="blob"), + columns=["a", "b", "c"], + ) + res = df.query("(blob < 5) & (a < b)", engine=engine, parser=parser) + expec = df[(df.index < 5) & (df.a < df.b)] + tm.assert_frame_equal(res, expec) + + res = df.query("blob < b", engine=engine, parser=parser) + expec = df[df.index < df.b] + + tm.assert_frame_equal(res, expec) + + def test_query_index_without_name(self): + engine, parser = self.engine, self.parser + df = DataFrame( + np.random.randint(10, size=(10, 3)), + index=range(10), + columns=["a", "b", "c"], + ) + + # "index" should refer to the index + res = df.query("index < b", engine=engine, parser=parser) + expec = df[df.index < df.b] + tm.assert_frame_equal(res, expec) + + # test against a scalar + res = df.query("index < 5", engine=engine, parser=parser) + expec = df[df.index < 5] + tm.assert_frame_equal(res, expec) + + def test_nested_scope(self): + engine = self.engine + parser = self.parser + + skip_if_no_pandas_parser(parser) + + df = DataFrame(np.random.randn(5, 3)) + df2 = DataFrame(np.random.randn(5, 3)) + expected = df[(df > 0) & (df2 > 0)] + + result = df.query("(@df > 0) & (@df2 > 0)", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + result = pd.eval("df[df > 0 and df2 > 0]", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + result = pd.eval( + "df[df > 0 and df2 > 0 and df[df > 0] > 0]", engine=engine, parser=parser + ) + expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)] + tm.assert_frame_equal(result, expected) + + result = pd.eval("df[(df>0) & (df2>0)]", engine=engine, parser=parser) + expected = df.query("(@df>0) & (@df2>0)", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + def test_nested_raises_on_local_self_reference(self): + from pandas.core.computation.ops import UndefinedVariableError + + df = DataFrame(np.random.randn(5, 3)) + + # can't reference ourself b/c we're a local so @ is necessary + with pytest.raises(UndefinedVariableError, match="name 'df' is not defined"): + df.query("df > 0", engine=self.engine, parser=self.parser) + + def test_local_syntax(self): + skip_if_no_pandas_parser(self.parser) + + engine, parser = self.engine, self.parser + df = DataFrame(np.random.randn(100, 10), columns=list("abcdefghij")) + b = 1 + expect = df[df.a < b] + result = df.query("a < @b", engine=engine, parser=parser) + tm.assert_frame_equal(result, expect) + + expect = df[df.a < df.b] + result = df.query("a < b", engine=engine, parser=parser) + tm.assert_frame_equal(result, expect) + + def test_chained_cmp_and_in(self): + skip_if_no_pandas_parser(self.parser) + engine, parser = self.engine, self.parser + cols = list("abc") + df = DataFrame(np.random.randn(100, len(cols)), columns=cols) + res = df.query( + "a < b < c and a not in b not in c", engine=engine, parser=parser + ) + ind = (df.a < df.b) & (df.b < df.c) & ~df.b.isin(df.a) & ~df.c.isin(df.b) + expec = df[ind] + tm.assert_frame_equal(res, expec) + + def test_local_variable_with_in(self): + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + a = Series(np.random.randint(3, size=15), name="a") + b = Series(np.random.randint(10, size=15), name="b") + df = DataFrame({"a": a, "b": b}) + + expected = df.loc[(df.b - 1).isin(a)] + result = df.query("b - 1 in a", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + b = Series(np.random.randint(10, size=15), name="b") + expected = df.loc[(b - 1).isin(a)] + result = df.query("@b - 1 in a", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + def test_at_inside_string(self): + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + c = 1 # noqa:F841 + df = DataFrame({"a": ["a", "a", "b", "b", "@c", "@c"]}) + result = df.query('a == "@c"', engine=engine, parser=parser) + expected = df[df.a == "@c"] + tm.assert_frame_equal(result, expected) + + def test_query_undefined_local(self): + from pandas.core.computation.ops import UndefinedVariableError + + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + + df = DataFrame(np.random.rand(10, 2), columns=list("ab")) + with pytest.raises( + UndefinedVariableError, match="local variable 'c' is not defined" + ): + df.query("a == @c", engine=engine, parser=parser) + + def test_index_resolvers_come_after_columns_with_the_same_name(self): + n = 1 # noqa:F841 + a = np.r_[20:101:20] + + df = DataFrame({"index": a, "b": np.random.randn(a.size)}) + df.index.name = "index" + result = df.query("index > 5", engine=self.engine, parser=self.parser) + expected = df[df["index"] > 5] + tm.assert_frame_equal(result, expected) + + df = DataFrame({"index": a, "b": np.random.randn(a.size)}) + result = df.query("ilevel_0 > 5", engine=self.engine, parser=self.parser) + expected = df.loc[df.index[df.index > 5]] + tm.assert_frame_equal(result, expected) + + df = DataFrame({"a": a, "b": np.random.randn(a.size)}) + df.index.name = "a" + result = df.query("a > 5", engine=self.engine, parser=self.parser) + expected = df[df.a > 5] + tm.assert_frame_equal(result, expected) + + result = df.query("index > 5", engine=self.engine, parser=self.parser) + expected = df.loc[df.index[df.index > 5]] + tm.assert_frame_equal(result, expected) + + def test_inf(self): + n = 10 + df = DataFrame({"a": np.random.rand(n), "b": np.random.rand(n)}) + df.loc[::2, 0] = np.inf + d = {"==": operator.eq, "!=": operator.ne} + for op, f in d.items(): + q = f"a {op} inf" + expected = df[f(df.a, np.inf)] + result = df.query(q, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(result, expected) + + def test_check_tz_aware_index_query(self, tz_aware_fixture): + # https://github.com/pandas-dev/pandas/issues/29463 + tz = tz_aware_fixture + df_index = date_range( + start="2019-01-01", freq="1d", periods=10, tz=tz, name="time" + ) + expected = DataFrame(index=df_index) + df = DataFrame(index=df_index) + result = df.query('"2018-01-03 00:00:00+00" < time') + tm.assert_frame_equal(result, expected) + + expected = DataFrame(df_index) + result = df.reset_index().query('"2018-01-03 00:00:00+00" < time') + tm.assert_frame_equal(result, expected) + + def test_method_calls_in_query(self): + # https://github.com/pandas-dev/pandas/issues/22435 + n = 10 + df = DataFrame({"a": 2 * np.random.rand(n), "b": np.random.rand(n)}) + expected = df[df["a"].astype("int") == 0] + result = df.query( + "a.astype('int') == 0", engine=self.engine, parser=self.parser + ) + tm.assert_frame_equal(result, expected) + + df = DataFrame( + { + "a": np.where(np.random.rand(n) < 0.5, np.nan, np.random.randn(n)), + "b": np.random.randn(n), + } + ) + expected = df[df["a"].notnull()] + result = df.query("a.notnull()", engine=self.engine, parser=self.parser) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no_ne +class TestDataFrameQueryNumExprPython(TestDataFrameQueryNumExprPandas): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = "numexpr" + cls.parser = "python" + + def test_date_query_no_attribute_access(self): + engine, parser = self.engine, self.parser + df = DataFrame(np.random.randn(5, 3)) + df["dates1"] = date_range("1/1/2012", periods=5) + df["dates2"] = date_range("1/1/2013", periods=5) + df["dates3"] = date_range("1/1/2014", periods=5) + res = df.query( + "(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser + ) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_query_with_NaT(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates2"] = date_range("1/1/2013", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT + df.loc[np.random.rand(n) > 0.5, "dates3"] = pd.NaT + res = df.query( + "(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser + ) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + return_value = df.set_index("dates1", inplace=True, drop=True) + assert return_value is None + res = df.query( + "(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser + ) + expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query_with_NaT(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.iloc[0, 0] = pd.NaT + return_value = df.set_index("dates1", inplace=True, drop=True) + assert return_value is None + res = df.query( + "(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser + ) + expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query_with_NaT_duplicates(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT + return_value = df.set_index("dates1", inplace=True, drop=True) + assert return_value is None + msg = r"'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + df.query("index < 20130101 < dates3", engine=engine, parser=parser) + + def test_nested_scope(self): + from pandas.core.computation.ops import UndefinedVariableError + + engine = self.engine + parser = self.parser + # smoke test + x = 1 # noqa:F841 + result = pd.eval("x + 1", engine=engine, parser=parser) + assert result == 2 + + df = DataFrame(np.random.randn(5, 3)) + df2 = DataFrame(np.random.randn(5, 3)) + + # don't have the pandas parser + msg = r"The '@' prefix is only supported by the pandas parser" + with pytest.raises(SyntaxError, match=msg): + df.query("(@df>0) & (@df2>0)", engine=engine, parser=parser) + + with pytest.raises(UndefinedVariableError, match="name 'df' is not defined"): + df.query("(df>0) & (df2>0)", engine=engine, parser=parser) + + expected = df[(df > 0) & (df2 > 0)] + result = pd.eval("df[(df > 0) & (df2 > 0)]", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)] + result = pd.eval( + "df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]", engine=engine, parser=parser + ) + tm.assert_frame_equal(expected, result) + + +class TestDataFrameQueryPythonPandas(TestDataFrameQueryNumExprPandas): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = "python" + cls.parser = "pandas" + + def test_query_builtin(self): + engine, parser = self.engine, self.parser + + n = m = 10 + df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + + df.index.name = "sin" + expected = df[df.index > 5] + result = df.query("sin > 5", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + +class TestDataFrameQueryPythonPython(TestDataFrameQueryNumExprPython): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = cls.parser = "python" + + def test_query_builtin(self): + engine, parser = self.engine, self.parser + + n = m = 10 + df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + + df.index.name = "sin" + expected = df[df.index > 5] + result = df.query("sin > 5", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + +class TestDataFrameQueryStrings: + def test_str_query_method(self, parser, engine): + df = DataFrame(np.random.randn(10, 1), columns=["b"]) + df["strings"] = Series(list("aabbccddee")) + expect = df[df.strings == "a"] + + if parser != "pandas": + col = "strings" + lst = '"a"' + + lhs = [col] * 2 + [lst] * 2 + rhs = lhs[::-1] + + eq, ne = "==", "!=" + ops = 2 * ([eq] + [ne]) + msg = r"'(Not)?In' nodes are not implemented" + + for lhs, op, rhs in zip(lhs, ops, rhs): + ex = f"{lhs} {op} {rhs}" + with pytest.raises(NotImplementedError, match=msg): + df.query( + ex, + engine=engine, + parser=parser, + local_dict={"strings": df.strings}, + ) + else: + res = df.query('"a" == strings', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + res = df.query('strings == "a"', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + tm.assert_frame_equal(res, df[df.strings.isin(["a"])]) + + expect = df[df.strings != "a"] + res = df.query('strings != "a"', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + res = df.query('"a" != strings', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + tm.assert_frame_equal(res, df[~df.strings.isin(["a"])]) + + def test_str_list_query_method(self, parser, engine): + df = DataFrame(np.random.randn(10, 1), columns=["b"]) + df["strings"] = Series(list("aabbccddee")) + expect = df[df.strings.isin(["a", "b"])] + + if parser != "pandas": + col = "strings" + lst = '["a", "b"]' + + lhs = [col] * 2 + [lst] * 2 + rhs = lhs[::-1] + + eq, ne = "==", "!=" + ops = 2 * ([eq] + [ne]) + msg = r"'(Not)?In' nodes are not implemented" + + for lhs, op, rhs in zip(lhs, ops, rhs): + ex = f"{lhs} {op} {rhs}" + with pytest.raises(NotImplementedError, match=msg): + df.query(ex, engine=engine, parser=parser) + else: + res = df.query('strings == ["a", "b"]', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + res = df.query('["a", "b"] == strings', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + expect = df[~df.strings.isin(["a", "b"])] + + res = df.query('strings != ["a", "b"]', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + res = df.query('["a", "b"] != strings', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + def test_query_with_string_columns(self, parser, engine): + df = DataFrame( + { + "a": list("aaaabbbbcccc"), + "b": list("aabbccddeeff"), + "c": np.random.randint(5, size=12), + "d": np.random.randint(9, size=12), + } + ) + if parser == "pandas": + res = df.query("a in b", parser=parser, engine=engine) + expec = df[df.a.isin(df.b)] + tm.assert_frame_equal(res, expec) + + res = df.query("a in b and c < d", parser=parser, engine=engine) + expec = df[df.a.isin(df.b) & (df.c < df.d)] + tm.assert_frame_equal(res, expec) + else: + msg = r"'(Not)?In' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + df.query("a in b", parser=parser, engine=engine) + + msg = r"'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + df.query("a in b and c < d", parser=parser, engine=engine) + + def test_object_array_eq_ne(self, parser, engine): + df = DataFrame( + { + "a": list("aaaabbbbcccc"), + "b": list("aabbccddeeff"), + "c": np.random.randint(5, size=12), + "d": np.random.randint(9, size=12), + } + ) + res = df.query("a == b", parser=parser, engine=engine) + exp = df[df.a == df.b] + tm.assert_frame_equal(res, exp) + + res = df.query("a != b", parser=parser, engine=engine) + exp = df[df.a != df.b] + tm.assert_frame_equal(res, exp) + + def test_query_with_nested_strings(self, parser, engine): + skip_if_no_pandas_parser(parser) + events = [ + f"page {n} {act}" for n in range(1, 4) for act in ["load", "exit"] + ] * 2 + stamps1 = date_range("2014-01-01 0:00:01", freq="30s", periods=6) + stamps2 = date_range("2014-02-01 1:00:01", freq="30s", periods=6) + df = DataFrame( + { + "id": np.arange(1, 7).repeat(2), + "event": events, + "timestamp": stamps1.append(stamps2), + } + ) + + expected = df[df.event == '"page 1 load"'] + res = df.query("""'"page 1 load"' in event""", parser=parser, engine=engine) + tm.assert_frame_equal(expected, res) + + def test_query_with_nested_special_character(self, parser, engine): + skip_if_no_pandas_parser(parser) + df = DataFrame({"a": ["a", "b", "test & test"], "b": [1, 2, 3]}) + res = df.query('a == "test & test"', parser=parser, engine=engine) + expec = df[df.a == "test & test"] + tm.assert_frame_equal(res, expec) + + def test_query_lex_compare_strings(self, parser, engine): + + a = Series(np.random.choice(list("abcde"), 20)) + b = Series(np.arange(a.size)) + df = DataFrame({"X": a, "Y": b}) + + ops = {"<": operator.lt, ">": operator.gt, "<=": operator.le, ">=": operator.ge} + + for op, func in ops.items(): + res = df.query(f'X {op} "d"', engine=engine, parser=parser) + expected = df[func(df.X, "d")] + tm.assert_frame_equal(res, expected) + + def test_query_single_element_booleans(self, parser, engine): + columns = "bid", "bidsize", "ask", "asksize" + data = np.random.randint(2, size=(1, len(columns))).astype(bool) + df = DataFrame(data, columns=columns) + res = df.query("bid & ask", engine=engine, parser=parser) + expected = df[df.bid & df.ask] + tm.assert_frame_equal(res, expected) + + def test_query_string_scalar_variable(self, parser, engine): + skip_if_no_pandas_parser(parser) + df = DataFrame( + { + "Symbol": ["BUD US", "BUD US", "IBM US", "IBM US"], + "Price": [109.70, 109.72, 183.30, 183.35], + } + ) + e = df[df.Symbol == "BUD US"] + symb = "BUD US" # noqa:F841 + r = df.query("Symbol == @symb", parser=parser, engine=engine) + tm.assert_frame_equal(e, r) + + +class TestDataFrameEvalWithFrame: + def setup_method(self, method): + self.frame = DataFrame(np.random.randn(10, 3), columns=list("abc")) + + def teardown_method(self, method): + del self.frame + + def test_simple_expr(self, parser, engine): + res = self.frame.eval("a + b", engine=engine, parser=parser) + expect = self.frame.a + self.frame.b + tm.assert_series_equal(res, expect) + + def test_bool_arith_expr(self, parser, engine): + res = self.frame.eval("a[a < 1] + b", engine=engine, parser=parser) + expect = self.frame.a[self.frame.a < 1] + self.frame.b + tm.assert_series_equal(res, expect) + + @pytest.mark.parametrize("op", ["+", "-", "*", "/"]) + def test_invalid_type_for_operator_raises(self, parser, engine, op): + df = DataFrame({"a": [1, 2], "b": ["c", "d"]}) + msg = r"unsupported operand type\(s\) for .+: '.+' and '.+'" + + with pytest.raises(TypeError, match=msg): + df.eval(f"a {op} b", engine=engine, parser=parser) + + +class TestDataFrameQueryBacktickQuoting: + @pytest.fixture(scope="class") + def df(self): + """ + Yields a dataframe with strings that may or may not need escaping + by backticks. The last two columns cannot be escaped by backticks + and should raise a ValueError. + """ + yield DataFrame( + { + "A": [1, 2, 3], + "B B": [3, 2, 1], + "C C": [4, 5, 6], + "C C": [7, 4, 3], + "C_C": [8, 9, 10], + "D_D D": [11, 1, 101], + "E.E": [6, 3, 5], + "F-F": [8, 1, 10], + "1e1": [2, 4, 8], + "def": [10, 11, 2], + "A (x)": [4, 1, 3], + "B(x)": [1, 1, 5], + "B (x)": [2, 7, 4], + " &^ :!€$?(} > <++*'' ": [2, 5, 6], + "": [10, 11, 1], + " A": [4, 7, 9], + " ": [1, 2, 1], + "it's": [6, 3, 1], + "that's": [9, 1, 8], + "☺": [8, 7, 6], + "foo#bar": [2, 4, 5], + 1: [5, 7, 9], + } + ) + + def test_single_backtick_variable_query(self, df): + res = df.query("1 < `B B`") + expect = df[1 < df["B B"]] + tm.assert_frame_equal(res, expect) + + def test_two_backtick_variables_query(self, df): + res = df.query("1 < `B B` and 4 < `C C`") + expect = df[(1 < df["B B"]) & (4 < df["C C"])] + tm.assert_frame_equal(res, expect) + + def test_single_backtick_variable_expr(self, df): + res = df.eval("A + `B B`") + expect = df["A"] + df["B B"] + tm.assert_series_equal(res, expect) + + def test_two_backtick_variables_expr(self, df): + res = df.eval("`B B` + `C C`") + expect = df["B B"] + df["C C"] + tm.assert_series_equal(res, expect) + + def test_already_underscore_variable(self, df): + res = df.eval("`C_C` + A") + expect = df["C_C"] + df["A"] + tm.assert_series_equal(res, expect) + + def test_same_name_but_underscores(self, df): + res = df.eval("C_C + `C C`") + expect = df["C_C"] + df["C C"] + tm.assert_series_equal(res, expect) + + def test_mixed_underscores_and_spaces(self, df): + res = df.eval("A + `D_D D`") + expect = df["A"] + df["D_D D"] + tm.assert_series_equal(res, expect) + + def test_backtick_quote_name_with_no_spaces(self, df): + res = df.eval("A + `C_C`") + expect = df["A"] + df["C_C"] + tm.assert_series_equal(res, expect) + + def test_special_characters(self, df): + res = df.eval("`E.E` + `F-F` - A") + expect = df["E.E"] + df["F-F"] - df["A"] + tm.assert_series_equal(res, expect) + + def test_start_with_digit(self, df): + res = df.eval("A + `1e1`") + expect = df["A"] + df["1e1"] + tm.assert_series_equal(res, expect) + + def test_keyword(self, df): + res = df.eval("A + `def`") + expect = df["A"] + df["def"] + tm.assert_series_equal(res, expect) + + def test_unneeded_quoting(self, df): + res = df.query("`A` > 2") + expect = df[df["A"] > 2] + tm.assert_frame_equal(res, expect) + + def test_parenthesis(self, df): + res = df.query("`A (x)` > 2") + expect = df[df["A (x)"] > 2] + tm.assert_frame_equal(res, expect) + + def test_empty_string(self, df): + res = df.query("`` > 5") + expect = df[df[""] > 5] + tm.assert_frame_equal(res, expect) + + def test_multiple_spaces(self, df): + res = df.query("`C C` > 5") + expect = df[df["C C"] > 5] + tm.assert_frame_equal(res, expect) + + def test_start_with_spaces(self, df): + res = df.eval("` A` + ` `") + expect = df[" A"] + df[" "] + tm.assert_series_equal(res, expect) + + def test_lots_of_operators_string(self, df): + res = df.query("` &^ :!€$?(} > <++*'' ` > 4") + expect = df[df[" &^ :!€$?(} > <++*'' "] > 4] + tm.assert_frame_equal(res, expect) + + def test_missing_attribute(self, df): + message = "module 'pandas' has no attribute 'thing'" + with pytest.raises(AttributeError, match=message): + df.eval("@pd.thing") + + def test_failing_quote(self, df): + msg = r"(Could not convert ).*( to a valid Python identifier.)" + with pytest.raises(SyntaxError, match=msg): + df.query("`it's` > `that's`") + + def test_failing_character_outside_range(self, df): + msg = r"(Could not convert ).*( to a valid Python identifier.)" + with pytest.raises(SyntaxError, match=msg): + df.query("`☺` > 4") + + def test_failing_hashtag(self, df): + msg = "Failed to parse backticks" + with pytest.raises(SyntaxError, match=msg): + df.query("`foo#bar` > 4") + + def test_call_non_named_expression(self, df): + """ + Only attributes and variables ('named functions') can be called. + .__call__() is not an allowed attribute because that would allow + calling anything. + https://github.com/pandas-dev/pandas/pull/32460 + """ + + def func(*_): + return 1 + + funcs = [func] # noqa:F841 + + df.eval("@func()") + + with pytest.raises(TypeError, match="Only named functions are supported"): + df.eval("@funcs[0]()") + + with pytest.raises(TypeError, match="Only named functions are supported"): + df.eval("@funcs[0].__call__()") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_reductions.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_reductions.py new file mode 100644 index 0000000..245e54d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_reductions.py @@ -0,0 +1,1773 @@ +from datetime import timedelta +from decimal import Decimal +import re + +from dateutil.tz import tzlocal +import numpy as np +import pytest + +from pandas.compat import is_platform_windows +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_categorical_dtype + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, + isna, + notna, + to_datetime, + to_timedelta, +) +import pandas._testing as tm +import pandas.core.algorithms as algorithms +import pandas.core.nanops as nanops + + +def assert_stat_op_calc( + opname, + alternative, + frame, + has_skipna=True, + check_dtype=True, + check_dates=False, + rtol=1e-5, + atol=1e-8, + skipna_alternative=None, +): + """ + Check that operator opname works as advertised on frame + + Parameters + ---------- + opname : str + Name of the operator to test on frame + alternative : function + Function that opname is tested against; i.e. "frame.opname()" should + equal "alternative(frame)". + frame : DataFrame + The object that the tests are executed on + has_skipna : bool, default True + Whether the method "opname" has the kwarg "skip_na" + check_dtype : bool, default True + Whether the dtypes of the result of "frame.opname()" and + "alternative(frame)" should be checked. + check_dates : bool, default false + Whether opname should be tested on a Datetime Series + rtol : float, default 1e-5 + Relative tolerance. + atol : float, default 1e-8 + Absolute tolerance. + skipna_alternative : function, default None + NaN-safe version of alternative + """ + f = getattr(frame, opname) + + if check_dates: + expected_warning = FutureWarning if opname in ["mean", "median"] else None + df = DataFrame({"b": date_range("1/1/2001", periods=2)}) + with tm.assert_produces_warning(expected_warning): + result = getattr(df, opname)() + assert isinstance(result, Series) + + df["a"] = range(len(df)) + with tm.assert_produces_warning(expected_warning): + result = getattr(df, opname)() + assert isinstance(result, Series) + assert len(result) + + if has_skipna: + + def wrapper(x): + return alternative(x.values) + + skipna_wrapper = tm._make_skipna_wrapper(alternative, skipna_alternative) + result0 = f(axis=0, skipna=False) + result1 = f(axis=1, skipna=False) + tm.assert_series_equal( + result0, frame.apply(wrapper), check_dtype=check_dtype, rtol=rtol, atol=atol + ) + tm.assert_series_equal( + result1, + frame.apply(wrapper, axis=1), + rtol=rtol, + atol=atol, + ) + else: + skipna_wrapper = alternative + + result0 = f(axis=0) + result1 = f(axis=1) + tm.assert_series_equal( + result0, + frame.apply(skipna_wrapper), + check_dtype=check_dtype, + rtol=rtol, + atol=atol, + ) + + if opname in ["sum", "prod"]: + expected = frame.apply(skipna_wrapper, axis=1) + tm.assert_series_equal( + result1, expected, check_dtype=False, rtol=rtol, atol=atol + ) + + # check dtypes + if check_dtype: + lcd_dtype = frame.values.dtype + assert lcd_dtype == result0.dtype + assert lcd_dtype == result1.dtype + + # bad axis + with pytest.raises(ValueError, match="No axis named 2"): + f(axis=2) + + # all NA case + if has_skipna: + all_na = frame * np.NaN + r0 = getattr(all_na, opname)(axis=0) + r1 = getattr(all_na, opname)(axis=1) + if opname in ["sum", "prod"]: + unit = 1 if opname == "prod" else 0 # result for empty sum/prod + expected = Series(unit, index=r0.index, dtype=r0.dtype) + tm.assert_series_equal(r0, expected) + expected = Series(unit, index=r1.index, dtype=r1.dtype) + tm.assert_series_equal(r1, expected) + + +def assert_stat_op_api(opname, float_frame, float_string_frame, has_numeric_only=True): + """ + Check that API for operator opname works as advertised on frame + + Parameters + ---------- + opname : str + Name of the operator to test on frame + float_frame : DataFrame + DataFrame with columns of type float + float_string_frame : DataFrame + DataFrame with both float and string columns + has_numeric_only : bool, default False + Whether the method "opname" has the kwarg "numeric_only" + """ + # make sure works on mixed-type frame + getattr(float_string_frame, opname)(axis=0) + getattr(float_string_frame, opname)(axis=1) + + if has_numeric_only: + getattr(float_string_frame, opname)(axis=0, numeric_only=True) + getattr(float_string_frame, opname)(axis=1, numeric_only=True) + getattr(float_frame, opname)(axis=0, numeric_only=False) + getattr(float_frame, opname)(axis=1, numeric_only=False) + + +def assert_bool_op_calc(opname, alternative, frame, has_skipna=True): + """ + Check that bool operator opname works as advertised on frame + + Parameters + ---------- + opname : str + Name of the operator to test on frame + alternative : function + Function that opname is tested against; i.e. "frame.opname()" should + equal "alternative(frame)". + frame : DataFrame + The object that the tests are executed on + has_skipna : bool, default True + Whether the method "opname" has the kwarg "skip_na" + """ + f = getattr(frame, opname) + + if has_skipna: + + def skipna_wrapper(x): + nona = x.dropna().values + return alternative(nona) + + def wrapper(x): + return alternative(x.values) + + result0 = f(axis=0, skipna=False) + result1 = f(axis=1, skipna=False) + + tm.assert_series_equal(result0, frame.apply(wrapper)) + tm.assert_series_equal(result1, frame.apply(wrapper, axis=1)) + else: + skipna_wrapper = alternative + wrapper = alternative + + result0 = f(axis=0) + result1 = f(axis=1) + + tm.assert_series_equal(result0, frame.apply(skipna_wrapper)) + tm.assert_series_equal( + result1, frame.apply(skipna_wrapper, axis=1), check_dtype=False + ) + + # bad axis + with pytest.raises(ValueError, match="No axis named 2"): + f(axis=2) + + # all NA case + if has_skipna: + all_na = frame * np.NaN + r0 = getattr(all_na, opname)(axis=0) + r1 = getattr(all_na, opname)(axis=1) + if opname == "any": + assert not r0.any() + assert not r1.any() + else: + assert r0.all() + assert r1.all() + + +def assert_bool_op_api( + opname, bool_frame_with_na, float_string_frame, has_bool_only=False +): + """ + Check that API for boolean operator opname works as advertised on frame + + Parameters + ---------- + opname : str + Name of the operator to test on frame + float_frame : DataFrame + DataFrame with columns of type float + float_string_frame : DataFrame + DataFrame with both float and string columns + has_bool_only : bool, default False + Whether the method "opname" has the kwarg "bool_only" + """ + # make sure op works on mixed-type frame + mixed = float_string_frame + mixed["_bool_"] = np.random.randn(len(mixed)) > 0.5 + + getattr(mixed, opname)(axis=0) + getattr(mixed, opname)(axis=1) + + if has_bool_only: + getattr(mixed, opname)(axis=0, bool_only=True) + getattr(mixed, opname)(axis=1, bool_only=True) + getattr(bool_frame_with_na, opname)(axis=0, bool_only=False) + getattr(bool_frame_with_na, opname)(axis=1, bool_only=False) + + +class TestDataFrameAnalytics: + + # --------------------------------------------------------------------- + # Reductions + + @pytest.mark.filterwarnings("ignore:Dropping of nuisance:FutureWarning") + def test_stat_op_api(self, float_frame, float_string_frame): + assert_stat_op_api("count", float_frame, float_string_frame) + assert_stat_op_api("sum", float_frame, float_string_frame) + + assert_stat_op_api( + "nunique", float_frame, float_string_frame, has_numeric_only=False + ) + assert_stat_op_api("mean", float_frame, float_string_frame) + assert_stat_op_api("product", float_frame, float_string_frame) + assert_stat_op_api("median", float_frame, float_string_frame) + assert_stat_op_api("min", float_frame, float_string_frame) + assert_stat_op_api("max", float_frame, float_string_frame) + assert_stat_op_api( + "mad", float_frame, float_string_frame, has_numeric_only=False + ) + assert_stat_op_api("var", float_frame, float_string_frame) + assert_stat_op_api("std", float_frame, float_string_frame) + assert_stat_op_api("sem", float_frame, float_string_frame) + assert_stat_op_api("median", float_frame, float_string_frame) + + @pytest.mark.filterwarnings("ignore:Dropping of nuisance:FutureWarning") + @td.skip_if_no_scipy + def test_stat_op_api_skew_kurt(self, float_frame, float_string_frame): + assert_stat_op_api("skew", float_frame, float_string_frame) + assert_stat_op_api("kurt", float_frame, float_string_frame) + + def test_stat_op_calc(self, float_frame_with_na, mixed_float_frame): + def count(s): + return notna(s).sum() + + def nunique(s): + return len(algorithms.unique1d(s.dropna())) + + def mad(x): + return np.abs(x - x.mean()).mean() + + def var(x): + return np.var(x, ddof=1) + + def std(x): + return np.std(x, ddof=1) + + def sem(x): + return np.std(x, ddof=1) / np.sqrt(len(x)) + + assert_stat_op_calc( + "nunique", + nunique, + float_frame_with_na, + has_skipna=False, + check_dtype=False, + check_dates=True, + ) + + # GH#32571 check_less_precise is needed on apparently-random + # py37-npdev builds and OSX-PY36-min_version builds + # mixed types (with upcasting happening) + assert_stat_op_calc( + "sum", + np.sum, + mixed_float_frame.astype("float32"), + check_dtype=False, + rtol=1e-3, + ) + + assert_stat_op_calc( + "sum", np.sum, float_frame_with_na, skipna_alternative=np.nansum + ) + assert_stat_op_calc("mean", np.mean, float_frame_with_na, check_dates=True) + assert_stat_op_calc( + "product", np.prod, float_frame_with_na, skipna_alternative=np.nanprod + ) + + assert_stat_op_calc("mad", mad, float_frame_with_na) + assert_stat_op_calc("var", var, float_frame_with_na) + assert_stat_op_calc("std", std, float_frame_with_na) + assert_stat_op_calc("sem", sem, float_frame_with_na) + + assert_stat_op_calc( + "count", + count, + float_frame_with_na, + has_skipna=False, + check_dtype=False, + check_dates=True, + ) + + @td.skip_if_no_scipy + def test_stat_op_calc_skew_kurtosis(self, float_frame_with_na): + def skewness(x): + from scipy.stats import skew + + if len(x) < 3: + return np.nan + return skew(x, bias=False) + + def kurt(x): + from scipy.stats import kurtosis + + if len(x) < 4: + return np.nan + return kurtosis(x, bias=False) + + assert_stat_op_calc("skew", skewness, float_frame_with_na) + assert_stat_op_calc("kurt", kurt, float_frame_with_na) + + # TODO: Ensure warning isn't emitted in the first place + # ignore mean of empty slice and all-NaN + @pytest.mark.filterwarnings("ignore::RuntimeWarning") + def test_median(self, float_frame_with_na, int_frame): + def wrapper(x): + if isna(x).any(): + return np.nan + return np.median(x) + + assert_stat_op_calc("median", wrapper, float_frame_with_na, check_dates=True) + assert_stat_op_calc( + "median", wrapper, int_frame, check_dtype=False, check_dates=True + ) + + @pytest.mark.parametrize( + "method", ["sum", "mean", "prod", "var", "std", "skew", "min", "max"] + ) + def test_stat_operators_attempt_obj_array(self, method): + # GH#676 + data = { + "a": [ + -0.00049987540199591344, + -0.0016467257772919831, + 0.00067695870775883013, + ], + "b": [-0, -0, 0.0], + "c": [ + 0.00031111847529610595, + 0.0014902627951905339, + -0.00094099200035979691, + ], + } + df1 = DataFrame(data, index=["foo", "bar", "baz"], dtype="O") + + df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object) + + for df in [df1, df2]: + assert df.values.dtype == np.object_ + result = getattr(df, method)(1) + expected = getattr(df.astype("f8"), method)(1) + + if method in ["sum", "prod"]: + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("op", ["mean", "std", "var", "skew", "kurt", "sem"]) + def test_mixed_ops(self, op): + # GH#16116 + df = DataFrame( + { + "int": [1, 2, 3, 4], + "float": [1.0, 2.0, 3.0, 4.0], + "str": ["a", "b", "c", "d"], + } + ) + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, op)() + assert len(result) == 2 + + with pd.option_context("use_bottleneck", False): + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, op)() + assert len(result) == 2 + + def test_reduce_mixed_frame(self): + # GH 6806 + df = DataFrame( + { + "bool_data": [True, True, False, False, False], + "int_data": [10, 20, 30, 40, 50], + "string_data": ["a", "b", "c", "d", "e"], + } + ) + df.reindex(columns=["bool_data", "int_data", "string_data"]) + test = df.sum(axis=0) + tm.assert_numpy_array_equal( + test.values, np.array([2, 150, "abcde"], dtype=object) + ) + alt = df.T.sum(axis=1) + tm.assert_series_equal(test, alt) + + def test_nunique(self): + df = DataFrame({"A": [1, 1, 1], "B": [1, 2, 3], "C": [1, np.nan, 3]}) + tm.assert_series_equal(df.nunique(), Series({"A": 1, "B": 3, "C": 2})) + tm.assert_series_equal( + df.nunique(dropna=False), Series({"A": 1, "B": 3, "C": 3}) + ) + tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2})) + tm.assert_series_equal( + df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2}) + ) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_mean_mixed_datetime_numeric(self, tz): + # https://github.com/pandas-dev/pandas/issues/24752 + df = DataFrame({"A": [1, 1], "B": [Timestamp("2000", tz=tz)] * 2}) + with tm.assert_produces_warning(FutureWarning): + result = df.mean() + expected = Series([1.0], index=["A"]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_mean_excludes_datetimes(self, tz): + # https://github.com/pandas-dev/pandas/issues/24752 + # Our long-term desired behavior is unclear, but the behavior in + # 0.24.0rc1 was buggy. + df = DataFrame({"A": [Timestamp("2000", tz=tz)] * 2}) + with tm.assert_produces_warning(FutureWarning): + result = df.mean() + + expected = Series(dtype=np.float64) + tm.assert_series_equal(result, expected) + + def test_mean_mixed_string_decimal(self): + # GH 11670 + # possible bug when calculating mean of DataFrame? + + d = [ + {"A": 2, "B": None, "C": Decimal("628.00")}, + {"A": 1, "B": None, "C": Decimal("383.00")}, + {"A": 3, "B": None, "C": Decimal("651.00")}, + {"A": 2, "B": None, "C": Decimal("575.00")}, + {"A": 4, "B": None, "C": Decimal("1114.00")}, + {"A": 1, "B": "TEST", "C": Decimal("241.00")}, + {"A": 2, "B": None, "C": Decimal("572.00")}, + {"A": 4, "B": None, "C": Decimal("609.00")}, + {"A": 3, "B": None, "C": Decimal("820.00")}, + {"A": 5, "B": None, "C": Decimal("1223.00")}, + ] + + df = DataFrame(d) + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.mean() + expected = Series([2.7, 681.6], index=["A", "C"]) + tm.assert_series_equal(result, expected) + + def test_var_std(self, datetime_frame): + result = datetime_frame.std(ddof=4) + expected = datetime_frame.apply(lambda x: x.std(ddof=4)) + tm.assert_almost_equal(result, expected) + + result = datetime_frame.var(ddof=4) + expected = datetime_frame.apply(lambda x: x.var(ddof=4)) + tm.assert_almost_equal(result, expected) + + arr = np.repeat(np.random.random((1, 1000)), 1000, 0) + result = nanops.nanvar(arr, axis=0) + assert not (result < 0).any() + + with pd.option_context("use_bottleneck", False): + result = nanops.nanvar(arr, axis=0) + assert not (result < 0).any() + + @pytest.mark.parametrize("meth", ["sem", "var", "std"]) + def test_numeric_only_flag(self, meth): + # GH 9201 + df1 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]) + # set one entry to a number in str format + df1.loc[0, "foo"] = "100" + + df2 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]) + # set one entry to a non-number str + df2.loc[0, "foo"] = "a" + + result = getattr(df1, meth)(axis=1, numeric_only=True) + expected = getattr(df1[["bar", "baz"]], meth)(axis=1) + tm.assert_series_equal(expected, result) + + result = getattr(df2, meth)(axis=1, numeric_only=True) + expected = getattr(df2[["bar", "baz"]], meth)(axis=1) + tm.assert_series_equal(expected, result) + + # df1 has all numbers, df2 has a letter inside + msg = r"unsupported operand type\(s\) for -: 'float' and 'str'" + with pytest.raises(TypeError, match=msg): + getattr(df1, meth)(axis=1, numeric_only=False) + msg = "could not convert string to float: 'a'" + with pytest.raises(TypeError, match=msg): + getattr(df2, meth)(axis=1, numeric_only=False) + + def test_sem(self, datetime_frame): + result = datetime_frame.sem(ddof=4) + expected = datetime_frame.apply(lambda x: x.std(ddof=4) / np.sqrt(len(x))) + tm.assert_almost_equal(result, expected) + + arr = np.repeat(np.random.random((1, 1000)), 1000, 0) + result = nanops.nansem(arr, axis=0) + assert not (result < 0).any() + + with pd.option_context("use_bottleneck", False): + result = nanops.nansem(arr, axis=0) + assert not (result < 0).any() + + @td.skip_if_no_scipy + def test_kurt(self): + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + df = DataFrame(np.random.randn(6, 3), index=index) + + kurt = df.kurt() + with tm.assert_produces_warning(FutureWarning): + kurt2 = df.kurt(level=0).xs("bar") + tm.assert_series_equal(kurt, kurt2, check_names=False) + assert kurt.name is None + assert kurt2.name == "bar" + + @pytest.mark.parametrize( + "dropna, expected", + [ + ( + True, + { + "A": [12], + "B": [10.0], + "C": [1.0], + "D": ["a"], + "E": Categorical(["a"], categories=["a"]), + "F": to_datetime(["2000-1-2"]), + "G": to_timedelta(["1 days"]), + }, + ), + ( + False, + { + "A": [12], + "B": [10.0], + "C": [np.nan], + "D": np.array([np.nan], dtype=object), + "E": Categorical([np.nan], categories=["a"]), + "F": [pd.NaT], + "G": to_timedelta([pd.NaT]), + }, + ), + ( + True, + { + "H": [8, 9, np.nan, np.nan], + "I": [8, 9, np.nan, np.nan], + "J": [1, np.nan, np.nan, np.nan], + "K": Categorical(["a", np.nan, np.nan, np.nan], categories=["a"]), + "L": to_datetime(["2000-1-2", "NaT", "NaT", "NaT"]), + "M": to_timedelta(["1 days", "nan", "nan", "nan"]), + "N": [0, 1, 2, 3], + }, + ), + ( + False, + { + "H": [8, 9, np.nan, np.nan], + "I": [8, 9, np.nan, np.nan], + "J": [1, np.nan, np.nan, np.nan], + "K": Categorical([np.nan, "a", np.nan, np.nan], categories=["a"]), + "L": to_datetime(["NaT", "2000-1-2", "NaT", "NaT"]), + "M": to_timedelta(["nan", "1 days", "nan", "nan"]), + "N": [0, 1, 2, 3], + }, + ), + ], + ) + def test_mode_dropna(self, dropna, expected): + + df = DataFrame( + { + "A": [12, 12, 19, 11], + "B": [10, 10, np.nan, 3], + "C": [1, np.nan, np.nan, np.nan], + "D": [np.nan, np.nan, "a", np.nan], + "E": Categorical([np.nan, np.nan, "a", np.nan]), + "F": to_datetime(["NaT", "2000-1-2", "NaT", "NaT"]), + "G": to_timedelta(["1 days", "nan", "nan", "nan"]), + "H": [8, 8, 9, 9], + "I": [9, 9, 8, 8], + "J": [1, 1, np.nan, np.nan], + "K": Categorical(["a", np.nan, "a", np.nan]), + "L": to_datetime(["2000-1-2", "2000-1-2", "NaT", "NaT"]), + "M": to_timedelta(["1 days", "nan", "1 days", "nan"]), + "N": np.arange(4, dtype="int64"), + } + ) + + result = df[sorted(expected.keys())].mode(dropna=dropna) + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + def test_mode_sortwarning(self): + # Check for the warning that is raised when the mode + # results cannot be sorted + + df = DataFrame({"A": [np.nan, np.nan, "a", "a"]}) + expected = DataFrame({"A": ["a", np.nan]}) + + with tm.assert_produces_warning(UserWarning): + result = df.mode(dropna=False) + result = result.sort_values(by="A").reset_index(drop=True) + + tm.assert_frame_equal(result, expected) + + def test_mode_empty_df(self): + df = DataFrame([], columns=["a", "b"]) + result = df.mode() + expected = DataFrame([], columns=["a", "b"], index=Index([], dtype=int)) + tm.assert_frame_equal(result, expected) + + def test_operators_timedelta64(self): + df = DataFrame( + { + "A": date_range("2012-1-1", periods=3, freq="D"), + "B": date_range("2012-1-2", periods=3, freq="D"), + "C": Timestamp("20120101") - timedelta(minutes=5, seconds=5), + } + ) + + diffs = DataFrame({"A": df["A"] - df["C"], "B": df["A"] - df["B"]}) + + # min + result = diffs.min() + assert result[0] == diffs.loc[0, "A"] + assert result[1] == diffs.loc[0, "B"] + + result = diffs.min(axis=1) + assert (result == diffs.loc[0, "B"]).all() + + # max + result = diffs.max() + assert result[0] == diffs.loc[2, "A"] + assert result[1] == diffs.loc[2, "B"] + + result = diffs.max(axis=1) + assert (result == diffs["A"]).all() + + # abs + result = diffs.abs() + result2 = abs(diffs) + expected = DataFrame({"A": df["A"] - df["C"], "B": df["B"] - df["A"]}) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + # mixed frame + mixed = diffs.copy() + mixed["C"] = "foo" + mixed["D"] = 1 + mixed["E"] = 1.0 + mixed["F"] = Timestamp("20130101") + + # results in an object array + result = mixed.min() + expected = Series( + [ + pd.Timedelta(timedelta(seconds=5 * 60 + 5)), + pd.Timedelta(timedelta(days=-1)), + "foo", + 1, + 1.0, + Timestamp("20130101"), + ], + index=mixed.columns, + ) + tm.assert_series_equal(result, expected) + + # excludes numeric + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + result = mixed.min(axis=1) + expected = Series([1, 1, 1.0], index=[0, 1, 2]) + tm.assert_series_equal(result, expected) + + # works when only those columns are selected + result = mixed[["A", "B"]].min(1) + expected = Series([timedelta(days=-1)] * 3) + tm.assert_series_equal(result, expected) + + result = mixed[["A", "B"]].min() + expected = Series( + [timedelta(seconds=5 * 60 + 5), timedelta(days=-1)], index=["A", "B"] + ) + tm.assert_series_equal(result, expected) + + # GH 3106 + df = DataFrame( + { + "time": date_range("20130102", periods=5), + "time2": date_range("20130105", periods=5), + } + ) + df["off1"] = df["time2"] - df["time"] + assert df["off1"].dtype == "timedelta64[ns]" + + df["off2"] = df["time"] - df["time2"] + df._consolidate_inplace() + assert df["off1"].dtype == "timedelta64[ns]" + assert df["off2"].dtype == "timedelta64[ns]" + + def test_std_timedelta64_skipna_false(self): + # GH#37392 + tdi = pd.timedelta_range("1 Day", periods=10) + df = DataFrame({"A": tdi, "B": tdi}) + # Copy is needed for ArrayManager case, otherwise setting df.iloc + # below edits tdi, alterting both df['A'] and df['B'] + # FIXME: passing copy=True to constructor does not fix this + df = df.copy() + df.iloc[-2, -1] = pd.NaT + + result = df.std(skipna=False) + expected = Series( + [df["A"].std(), pd.NaT], index=["A", "B"], dtype="timedelta64[ns]" + ) + tm.assert_series_equal(result, expected) + + result = df.std(axis=1, skipna=False) + expected = Series([pd.Timedelta(0)] * 8 + [pd.NaT, pd.Timedelta(0)]) + tm.assert_series_equal(result, expected) + + def test_sum_corner(self): + empty_frame = DataFrame() + + axis0 = empty_frame.sum(0) + axis1 = empty_frame.sum(1) + assert isinstance(axis0, Series) + assert isinstance(axis1, Series) + assert len(axis0) == 0 + assert len(axis1) == 0 + + @pytest.mark.parametrize("method, unit", [("sum", 0), ("prod", 1)]) + @pytest.mark.parametrize("numeric_only", [None, True, False]) + def test_sum_prod_nanops(self, method, unit, numeric_only): + idx = ["a", "b", "c"] + df = DataFrame({"a": [unit, unit], "b": [unit, np.nan], "c": [np.nan, np.nan]}) + # The default + result = getattr(df, method)(numeric_only=numeric_only) + expected = Series([unit, unit, unit], index=idx, dtype="float64") + tm.assert_series_equal(result, expected) + + # min_count=1 + result = getattr(df, method)(numeric_only=numeric_only, min_count=1) + expected = Series([unit, unit, np.nan], index=idx) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = getattr(df, method)(numeric_only=numeric_only, min_count=0) + expected = Series([unit, unit, unit], index=idx, dtype="float64") + tm.assert_series_equal(result, expected) + + result = getattr(df.iloc[1:], method)(numeric_only=numeric_only, min_count=1) + expected = Series([unit, np.nan, np.nan], index=idx) + tm.assert_series_equal(result, expected) + + # min_count > 1 + df = DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5}) + result = getattr(df, method)(numeric_only=numeric_only, min_count=5) + expected = Series(result, index=["A", "B"]) + tm.assert_series_equal(result, expected) + + result = getattr(df, method)(numeric_only=numeric_only, min_count=6) + expected = Series(result, index=["A", "B"]) + tm.assert_series_equal(result, expected) + + def test_sum_nanops_timedelta(self): + # prod isn't defined on timedeltas + idx = ["a", "b", "c"] + df = DataFrame({"a": [0, 0], "b": [0, np.nan], "c": [np.nan, np.nan]}) + + df2 = df.apply(to_timedelta) + + # 0 by default + result = df2.sum() + expected = Series([0, 0, 0], dtype="m8[ns]", index=idx) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = df2.sum(min_count=0) + tm.assert_series_equal(result, expected) + + # min_count=1 + result = df2.sum(min_count=1) + expected = Series([0, 0, np.nan], dtype="m8[ns]", index=idx) + tm.assert_series_equal(result, expected) + + def test_sum_nanops_min_count(self): + # https://github.com/pandas-dev/pandas/issues/39738 + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + result = df.sum(min_count=10) + expected = Series([np.nan, np.nan], index=["x", "y"]) + tm.assert_series_equal(result, expected) + + def test_sum_object(self, float_frame): + values = float_frame.values.astype(int) + frame = DataFrame(values, index=float_frame.index, columns=float_frame.columns) + deltas = frame * timedelta(1) + deltas.sum() + + def test_sum_bool(self, float_frame): + # ensure this works, bug report + bools = np.isnan(float_frame) + bools.sum(1) + bools.sum(0) + + def test_sum_mixed_datetime(self): + # GH#30886 + df = DataFrame({"A": date_range("2000", periods=4), "B": [1, 2, 3, 4]}).reindex( + [2, 3, 4] + ) + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + result = df.sum() + + expected = Series({"B": 7.0}) + tm.assert_series_equal(result, expected) + + def test_mean_corner(self, float_frame, float_string_frame): + # unit test when have object data + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + the_mean = float_string_frame.mean(axis=0) + the_sum = float_string_frame.sum(axis=0, numeric_only=True) + tm.assert_index_equal(the_sum.index, the_mean.index) + assert len(the_mean.index) < len(float_string_frame.columns) + + # xs sum mixed type, just want to know it works... + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + the_mean = float_string_frame.mean(axis=1) + the_sum = float_string_frame.sum(axis=1, numeric_only=True) + tm.assert_index_equal(the_sum.index, the_mean.index) + + # take mean of boolean column + float_frame["bool"] = float_frame["A"] > 0 + means = float_frame.mean(0) + assert means["bool"] == float_frame["bool"].values.mean() + + def test_mean_datetimelike(self): + # GH#24757 check that datetimelike are excluded by default, handled + # correctly with numeric_only=True + + df = DataFrame( + { + "A": np.arange(3), + "B": date_range("2016-01-01", periods=3), + "C": pd.timedelta_range("1D", periods=3), + "D": pd.period_range("2016", periods=3, freq="A"), + } + ) + result = df.mean(numeric_only=True) + expected = Series({"A": 1.0}) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + # in the future datetime columns will be included + result = df.mean() + expected = Series({"A": 1.0, "C": df.loc[1, "C"]}) + tm.assert_series_equal(result, expected) + + def test_mean_datetimelike_numeric_only_false(self): + df = DataFrame( + { + "A": np.arange(3), + "B": date_range("2016-01-01", periods=3), + "C": pd.timedelta_range("1D", periods=3), + } + ) + + # datetime(tz) and timedelta work + result = df.mean(numeric_only=False) + expected = Series({"A": 1, "B": df.loc[1, "B"], "C": df.loc[1, "C"]}) + tm.assert_series_equal(result, expected) + + # mean of period is not allowed + df["D"] = pd.period_range("2016", periods=3, freq="A") + + with pytest.raises(TypeError, match="mean is not implemented for Period"): + df.mean(numeric_only=False) + + def test_mean_extensionarray_numeric_only_true(self): + # https://github.com/pandas-dev/pandas/issues/33256 + arr = np.random.randint(1000, size=(10, 5)) + df = DataFrame(arr, dtype="Int64") + result = df.mean(numeric_only=True) + expected = DataFrame(arr).mean() + tm.assert_series_equal(result, expected) + + def test_stats_mixed_type(self, float_string_frame): + # don't blow up + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + float_string_frame.std(1) + float_string_frame.var(1) + float_string_frame.mean(1) + float_string_frame.skew(1) + + def test_sum_bools(self): + df = DataFrame(index=range(1), columns=range(10)) + bools = isna(df) + assert bools.sum(axis=1)[0] == 10 + + # ---------------------------------------------------------------------- + # Index of max / min + + def test_idxmin(self, float_frame, int_frame): + frame = float_frame + frame.iloc[5:10] = np.nan + frame.iloc[15:20, -2:] = np.nan + for skipna in [True, False]: + for axis in [0, 1]: + for df in [frame, int_frame]: + result = df.idxmin(axis=axis, skipna=skipna) + expected = df.apply(Series.idxmin, axis=axis, skipna=skipna) + tm.assert_series_equal(result, expected) + + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + frame.idxmin(axis=2) + + def test_idxmax(self, float_frame, int_frame): + frame = float_frame + frame.iloc[5:10] = np.nan + frame.iloc[15:20, -2:] = np.nan + for skipna in [True, False]: + for axis in [0, 1]: + for df in [frame, int_frame]: + result = df.idxmax(axis=axis, skipna=skipna) + expected = df.apply(Series.idxmax, axis=axis, skipna=skipna) + tm.assert_series_equal(result, expected) + + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + frame.idxmax(axis=2) + + def test_idxmax_mixed_dtype(self): + # don't cast to object, which would raise in nanops + dti = date_range("2016-01-01", periods=3) + + # Copying dti is needed for ArrayManager otherwise when we set + # df.loc[0, 3] = pd.NaT below it edits dti + df = DataFrame({1: [0, 2, 1], 2: range(3)[::-1], 3: dti.copy(deep=True)}) + + result = df.idxmax() + expected = Series([1, 0, 2], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + result = df.idxmin() + expected = Series([0, 2, 0], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + # with NaTs + df.loc[0, 3] = pd.NaT + result = df.idxmax() + expected = Series([1, 0, 2], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + result = df.idxmin() + expected = Series([0, 2, 1], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + # with multi-column dt64 block + df[4] = dti[::-1] + df._consolidate_inplace() + + result = df.idxmax() + expected = Series([1, 0, 2, 0], index=[1, 2, 3, 4]) + tm.assert_series_equal(result, expected) + + result = df.idxmin() + expected = Series([0, 2, 1, 2], index=[1, 2, 3, 4]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "op, expected_value", + [("idxmax", [0, 4]), ("idxmin", [0, 5])], + ) + def test_idxmax_idxmin_convert_dtypes(self, op, expected_value): + # GH 40346 + df = DataFrame( + { + "ID": [100, 100, 100, 200, 200, 200], + "value": [0, 0, 0, 1, 2, 0], + }, + dtype="Int64", + ) + df = df.groupby("ID") + + result = getattr(df, op)() + expected = DataFrame( + {"value": expected_value}, + index=Index([100, 200], name="ID", dtype="Int64"), + ) + tm.assert_frame_equal(result, expected) + + def test_idxmax_dt64_multicolumn_axis1(self): + dti = date_range("2016-01-01", periods=3) + df = DataFrame({3: dti, 4: dti[::-1]}) + # FIXME: copy needed for ArrayManager, otherwise setting with iloc + # below also sets df.iloc[-1, 1]; passing copy=True to DataFrame + # does not solve this. + df = df.copy() + df.iloc[0, 0] = pd.NaT + + df._consolidate_inplace() + + result = df.idxmax(axis=1) + expected = Series([4, 3, 3]) + tm.assert_series_equal(result, expected) + + result = df.idxmin(axis=1) + expected = Series([4, 3, 4]) + tm.assert_series_equal(result, expected) + + # ---------------------------------------------------------------------- + # Logical reductions + + @pytest.mark.parametrize("opname", ["any", "all"]) + def test_any_all(self, opname, bool_frame_with_na, float_string_frame): + assert_bool_op_api( + opname, bool_frame_with_na, float_string_frame, has_bool_only=True + ) + + @pytest.mark.parametrize("opname", ["any", "all"]) + def test_any_all_bool_frame(self, opname, bool_frame_with_na): + # GH#12863: numpy gives back non-boolean data for object type + # so fill NaNs to compare with pandas behavior + df = bool_frame_with_na.fillna(True) + assert_bool_op_calc(opname, getattr(np, opname), df, has_skipna=True) + + def test_any_all_extra(self): + df = DataFrame( + { + "A": [True, False, False], + "B": [True, True, False], + "C": [True, True, True], + }, + index=["a", "b", "c"], + ) + result = df[["A", "B"]].any(1) + expected = Series([True, True, False], index=["a", "b", "c"]) + tm.assert_series_equal(result, expected) + + result = df[["A", "B"]].any(1, bool_only=True) + tm.assert_series_equal(result, expected) + + result = df.all(1) + expected = Series([True, False, False], index=["a", "b", "c"]) + tm.assert_series_equal(result, expected) + + result = df.all(1, bool_only=True) + tm.assert_series_equal(result, expected) + + # Axis is None + result = df.all(axis=None).item() + assert result is False + + result = df.any(axis=None).item() + assert result is True + + result = df[["C"]].all(axis=None).item() + assert result is True + + @pytest.mark.parametrize("axis", [0, 1]) + @pytest.mark.parametrize("bool_agg_func", ["any", "all"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_any_all_object_dtype(self, axis, bool_agg_func, skipna): + # GH#35450 + df = DataFrame( + data=[ + [1, np.nan, np.nan, True], + [np.nan, 2, np.nan, True], + [np.nan, np.nan, np.nan, True], + [np.nan, np.nan, "5", np.nan], + ] + ) + result = getattr(df, bool_agg_func)(axis=axis, skipna=skipna) + expected = Series([True, True, True, True]) + tm.assert_series_equal(result, expected) + + def test_any_datetime(self): + + # GH 23070 + float_data = [1, np.nan, 3, np.nan] + datetime_data = [ + Timestamp("1960-02-15"), + Timestamp("1960-02-16"), + pd.NaT, + pd.NaT, + ] + df = DataFrame({"A": float_data, "B": datetime_data}) + + result = df.any(1) + expected = Series([True, True, True, False]) + tm.assert_series_equal(result, expected) + + def test_any_all_bool_only(self): + + # GH 25101 + df = DataFrame( + {"col1": [1, 2, 3], "col2": [4, 5, 6], "col3": [None, None, None]} + ) + + result = df.all(bool_only=True) + expected = Series(dtype=np.bool_) + tm.assert_series_equal(result, expected) + + df = DataFrame( + { + "col1": [1, 2, 3], + "col2": [4, 5, 6], + "col3": [None, None, None], + "col4": [False, False, True], + } + ) + + result = df.all(bool_only=True) + expected = Series({"col4": False}) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "func, data, expected", + [ + (np.any, {}, False), + (np.all, {}, True), + (np.any, {"A": []}, False), + (np.all, {"A": []}, True), + (np.any, {"A": [False, False]}, False), + (np.all, {"A": [False, False]}, False), + (np.any, {"A": [True, False]}, True), + (np.all, {"A": [True, False]}, False), + (np.any, {"A": [True, True]}, True), + (np.all, {"A": [True, True]}, True), + (np.any, {"A": [False], "B": [False]}, False), + (np.all, {"A": [False], "B": [False]}, False), + (np.any, {"A": [False, False], "B": [False, True]}, True), + (np.all, {"A": [False, False], "B": [False, True]}, False), + # other types + (np.all, {"A": Series([0.0, 1.0], dtype="float")}, False), + (np.any, {"A": Series([0.0, 1.0], dtype="float")}, True), + (np.all, {"A": Series([0, 1], dtype=int)}, False), + (np.any, {"A": Series([0, 1], dtype=int)}, True), + pytest.param(np.all, {"A": Series([0, 1], dtype="M8[ns]")}, False), + pytest.param(np.all, {"A": Series([0, 1], dtype="M8[ns, UTC]")}, False), + pytest.param(np.any, {"A": Series([0, 1], dtype="M8[ns]")}, True), + pytest.param(np.any, {"A": Series([0, 1], dtype="M8[ns, UTC]")}, True), + pytest.param(np.all, {"A": Series([1, 2], dtype="M8[ns]")}, True), + pytest.param(np.all, {"A": Series([1, 2], dtype="M8[ns, UTC]")}, True), + pytest.param(np.any, {"A": Series([1, 2], dtype="M8[ns]")}, True), + pytest.param(np.any, {"A": Series([1, 2], dtype="M8[ns, UTC]")}, True), + pytest.param(np.all, {"A": Series([0, 1], dtype="m8[ns]")}, False), + pytest.param(np.any, {"A": Series([0, 1], dtype="m8[ns]")}, True), + pytest.param(np.all, {"A": Series([1, 2], dtype="m8[ns]")}, True), + pytest.param(np.any, {"A": Series([1, 2], dtype="m8[ns]")}, True), + # np.all on Categorical raises, so the reduction drops the + # column, so all is being done on an empty Series, so is True + (np.all, {"A": Series([0, 1], dtype="category")}, True), + (np.any, {"A": Series([0, 1], dtype="category")}, False), + (np.all, {"A": Series([1, 2], dtype="category")}, True), + (np.any, {"A": Series([1, 2], dtype="category")}, False), + # Mix GH#21484 + pytest.param( + np.all, + { + "A": Series([10, 20], dtype="M8[ns]"), + "B": Series([10, 20], dtype="m8[ns]"), + }, + True, + ), + ], + ) + def test_any_all_np_func(self, func, data, expected): + # GH 19976 + data = DataFrame(data) + + warn = None + if any(is_categorical_dtype(x) for x in data.dtypes): + warn = FutureWarning + + with tm.assert_produces_warning( + warn, match="Select only valid columns", check_stacklevel=False + ): + result = func(data) + assert isinstance(result, np.bool_) + assert result.item() is expected + + # method version + with tm.assert_produces_warning( + warn, match="Select only valid columns", check_stacklevel=False + ): + result = getattr(DataFrame(data), func.__name__)(axis=None) + assert isinstance(result, np.bool_) + assert result.item() is expected + + def test_any_all_object(self): + # GH 19976 + result = np.all(DataFrame(columns=["a", "b"])).item() + assert result is True + + result = np.any(DataFrame(columns=["a", "b"])).item() + assert result is False + + def test_any_all_object_bool_only(self): + df = DataFrame({"A": ["foo", 2], "B": [True, False]}).astype(object) + df._consolidate_inplace() + df["C"] = Series([True, True]) + + # Categorical of bools is _not_ considered booly + df["D"] = df["C"].astype("category") + + # The underlying bug is in DataFrame._get_bool_data, so we check + # that while we're here + res = df._get_bool_data() + expected = df[["B", "C"]] + tm.assert_frame_equal(res, expected) + + res = df.all(bool_only=True, axis=0) + expected = Series([False, True], index=["B", "C"]) + tm.assert_series_equal(res, expected) + + # operating on a subset of columns should not produce a _larger_ Series + res = df[["B", "C"]].all(bool_only=True, axis=0) + tm.assert_series_equal(res, expected) + + assert not df.all(bool_only=True, axis=None) + + res = df.any(bool_only=True, axis=0) + expected = Series([True, True], index=["B", "C"]) + tm.assert_series_equal(res, expected) + + # operating on a subset of columns should not produce a _larger_ Series + res = df[["B", "C"]].any(bool_only=True, axis=0) + tm.assert_series_equal(res, expected) + + assert df.any(bool_only=True, axis=None) + + @pytest.mark.parametrize("method", ["any", "all"]) + def test_any_all_level_axis_none_raises(self, method): + df = DataFrame( + {"A": 1}, + index=MultiIndex.from_product( + [["A", "B"], ["a", "b"]], names=["out", "in"] + ), + ) + xpr = "Must specify 'axis' when aggregating by level." + with pytest.raises(ValueError, match=xpr): + with tm.assert_produces_warning(FutureWarning): + getattr(df, method)(axis=None, level="out") + + # --------------------------------------------------------------------- + # Unsorted + + def test_series_broadcasting(self): + # smoke test for numpy warnings + # GH 16378, GH 16306 + df = DataFrame([1.0, 1.0, 1.0]) + df_nan = DataFrame({"A": [np.nan, 2.0, np.nan]}) + s = Series([1, 1, 1]) + s_nan = Series([np.nan, np.nan, 1]) + + with tm.assert_produces_warning(None): + df_nan.clip(lower=s, axis=0) + for op in ["lt", "le", "gt", "ge", "eq", "ne"]: + getattr(df, op)(s_nan, axis=0) + + +class TestDataFrameReductions: + def test_min_max_dt64_with_NaT(self): + # Both NaT and Timestamp are in DataFrame. + df = DataFrame({"foo": [pd.NaT, pd.NaT, Timestamp("2012-05-01")]}) + + res = df.min() + exp = Series([Timestamp("2012-05-01")], index=["foo"]) + tm.assert_series_equal(res, exp) + + res = df.max() + exp = Series([Timestamp("2012-05-01")], index=["foo"]) + tm.assert_series_equal(res, exp) + + # GH12941, only NaTs are in DataFrame. + df = DataFrame({"foo": [pd.NaT, pd.NaT]}) + + res = df.min() + exp = Series([pd.NaT], index=["foo"]) + tm.assert_series_equal(res, exp) + + res = df.max() + exp = Series([pd.NaT], index=["foo"]) + tm.assert_series_equal(res, exp) + + def test_min_max_dt64_with_NaT_skipna_false(self, request, tz_naive_fixture): + # GH#36907 + tz = tz_naive_fixture + if isinstance(tz, tzlocal) and is_platform_windows(): + pytest.skip( + "GH#37659 OSError raised within tzlocal bc Windows " + "chokes in times before 1970-01-01" + ) + + df = DataFrame( + { + "a": [ + Timestamp("2020-01-01 08:00:00", tz=tz), + Timestamp("1920-02-01 09:00:00", tz=tz), + ], + "b": [Timestamp("2020-02-01 08:00:00", tz=tz), pd.NaT], + } + ) + res = df.min(axis=1, skipna=False) + expected = Series([df.loc[0, "a"], pd.NaT]) + assert expected.dtype == df["a"].dtype + + tm.assert_series_equal(res, expected) + + res = df.max(axis=1, skipna=False) + expected = Series([df.loc[0, "b"], pd.NaT]) + assert expected.dtype == df["a"].dtype + + tm.assert_series_equal(res, expected) + + def test_min_max_dt64_api_consistency_with_NaT(self): + # Calling the following sum functions returned an error for dataframes but + # returned NaT for series. These tests check that the API is consistent in + # min/max calls on empty Series/DataFrames. See GH:33704 for more + # information + df = DataFrame({"x": to_datetime([])}) + expected_dt_series = Series(to_datetime([])) + # check axis 0 + assert (df.min(axis=0).x is pd.NaT) == (expected_dt_series.min() is pd.NaT) + assert (df.max(axis=0).x is pd.NaT) == (expected_dt_series.max() is pd.NaT) + + # check axis 1 + tm.assert_series_equal(df.min(axis=1), expected_dt_series) + tm.assert_series_equal(df.max(axis=1), expected_dt_series) + + def test_min_max_dt64_api_consistency_empty_df(self): + # check DataFrame/Series api consistency when calling min/max on an empty + # DataFrame/Series. + df = DataFrame({"x": []}) + expected_float_series = Series([], dtype=float) + # check axis 0 + assert np.isnan(df.min(axis=0).x) == np.isnan(expected_float_series.min()) + assert np.isnan(df.max(axis=0).x) == np.isnan(expected_float_series.max()) + # check axis 1 + tm.assert_series_equal(df.min(axis=1), expected_float_series) + tm.assert_series_equal(df.min(axis=1), expected_float_series) + + @pytest.mark.parametrize( + "initial", + ["2018-10-08 13:36:45+00:00", "2018-10-08 13:36:45+03:00"], # Non-UTC timezone + ) + @pytest.mark.parametrize("method", ["min", "max"]) + def test_preserve_timezone(self, initial: str, method): + # GH 28552 + initial_dt = to_datetime(initial) + expected = Series([initial_dt]) + df = DataFrame([expected]) + result = getattr(df, method)(axis=1) + tm.assert_series_equal(result, expected) + + def test_frame_any_all_with_level(self): + df = DataFrame( + {"data": [False, False, True, False, True, False, True]}, + index=[ + ["one", "one", "two", "one", "two", "two", "two"], + [0, 1, 0, 2, 1, 2, 3], + ], + ) + + with tm.assert_produces_warning(FutureWarning, match="Using the level"): + result = df.any(level=0) + ex = DataFrame({"data": [False, True]}, index=["one", "two"]) + tm.assert_frame_equal(result, ex) + + with tm.assert_produces_warning(FutureWarning, match="Using the level"): + result = df.all(level=0) + ex = DataFrame({"data": [False, False]}, index=["one", "two"]) + tm.assert_frame_equal(result, ex) + + def test_frame_any_with_timedelta(self): + # GH#17667 + df = DataFrame( + { + "a": Series([0, 0]), + "t": Series([to_timedelta(0, "s"), to_timedelta(1, "ms")]), + } + ) + + result = df.any(axis=0) + expected = Series(data=[False, True], index=["a", "t"]) + tm.assert_series_equal(result, expected) + + result = df.any(axis=1) + expected = Series(data=[False, True]) + tm.assert_series_equal(result, expected) + + def test_reductions_deprecation_skipna_none(self, frame_or_series): + # GH#44580 + obj = frame_or_series([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning, match="skipna"): + obj.mad(skipna=None) + + def test_reductions_deprecation_level_argument( + self, frame_or_series, reduction_functions + ): + # GH#39983 + obj = frame_or_series( + [1, 2, 3], index=MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]]) + ) + with tm.assert_produces_warning(FutureWarning, match="level"): + getattr(obj, reduction_functions)(level=0) + + def test_reductions_skipna_none_raises(self, frame_or_series, reduction_functions): + if reduction_functions in ["count", "mad"]: + pytest.skip("Count does not accept skipna. Mad needs a deprecation cycle.") + obj = frame_or_series([1, 2, 3]) + msg = 'For argument "skipna" expected type bool, received type NoneType.' + with pytest.raises(ValueError, match=msg): + getattr(obj, reduction_functions)(skipna=None) + + +class TestNuisanceColumns: + @pytest.mark.parametrize("method", ["any", "all"]) + def test_any_all_categorical_dtype_nuisance_column(self, method): + # GH#36076 DataFrame should match Series behavior + ser = Series([0, 1], dtype="category", name="A") + df = ser.to_frame() + + # Double-check the Series behavior is to raise + with pytest.raises(TypeError, match="does not support reduction"): + getattr(ser, method)() + + with pytest.raises(TypeError, match="does not support reduction"): + getattr(np, method)(ser) + + with pytest.raises(TypeError, match="does not support reduction"): + getattr(df, method)(bool_only=False) + + # With bool_only=None, operating on this column raises and is ignored, + # so we expect an empty result. + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, method)(bool_only=None) + expected = Series([], index=Index([]), dtype=bool) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns", check_stacklevel=False + ): + result = getattr(np, method)(df, axis=0) + tm.assert_series_equal(result, expected) + + def test_median_categorical_dtype_nuisance_column(self): + # GH#21020 DataFrame.median should match Series.median + df = DataFrame({"A": Categorical([1, 2, 2, 2, 3])}) + ser = df["A"] + + # Double-check the Series behavior is to raise + with pytest.raises(TypeError, match="does not support reduction"): + ser.median() + + with pytest.raises(TypeError, match="does not support reduction"): + df.median(numeric_only=False) + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.median() + expected = Series([], index=Index([]), dtype=np.float64) + tm.assert_series_equal(result, expected) + + # same thing, but with an additional non-categorical column + df["B"] = df["A"].astype(int) + + with pytest.raises(TypeError, match="does not support reduction"): + df.median(numeric_only=False) + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.median() + expected = Series([2.0], index=["B"]) + tm.assert_series_equal(result, expected) + + # TODO: np.median(df, axis=0) gives np.array([2.0, 2.0]) instead + # of expected.values + + @pytest.mark.parametrize("method", ["min", "max"]) + def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method): + # GH#28949 DataFrame.min should behave like Series.min + cat = Categorical(["a", "b", "c", "b"], ordered=False) + ser = Series(cat) + df = ser.to_frame("A") + + # Double-check the Series behavior + with pytest.raises(TypeError, match="is not ordered for operation"): + getattr(ser, method)() + + with pytest.raises(TypeError, match="is not ordered for operation"): + getattr(np, method)(ser) + + with pytest.raises(TypeError, match="is not ordered for operation"): + getattr(df, method)(numeric_only=False) + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, method)() + expected = Series([], index=Index([]), dtype=np.float64) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns", check_stacklevel=False + ): + result = getattr(np, method)(df) + tm.assert_series_equal(result, expected) + + # same thing, but with an additional non-categorical column + df["B"] = df["A"].astype(object) + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, method)() + if method == "min": + expected = Series(["a"], index=["B"]) + else: + expected = Series(["c"], index=["B"]) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns", check_stacklevel=False + ): + result = getattr(np, method)(df) + tm.assert_series_equal(result, expected) + + def test_reduction_object_block_splits_nuisance_columns(self): + # GH#37827 + df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", "c"]}, dtype=object) + + # We should only exclude "B", not "A" + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.mean() + expected = Series([1.0], index=["A"]) + tm.assert_series_equal(result, expected) + + # Same behavior but heterogeneous dtype + df["C"] = df["A"].astype(int) + 4 + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.mean() + expected = Series([1.0, 5.0], index=["A", "C"]) + tm.assert_series_equal(result, expected) + + +def test_sum_timedelta64_skipna_false(): + # GH#17235 + arr = np.arange(8).astype(np.int64).view("m8[s]").reshape(4, 2) + arr[-1, -1] = "Nat" + + df = DataFrame(arr) + + result = df.sum(skipna=False) + expected = Series([pd.Timedelta(seconds=12), pd.NaT]) + tm.assert_series_equal(result, expected) + + result = df.sum(axis=0, skipna=False) + tm.assert_series_equal(result, expected) + + result = df.sum(axis=1, skipna=False) + expected = Series( + [ + pd.Timedelta(seconds=1), + pd.Timedelta(seconds=5), + pd.Timedelta(seconds=9), + pd.NaT, + ] + ) + tm.assert_series_equal(result, expected) + + +def test_mixed_frame_with_integer_sum(): + # https://github.com/pandas-dev/pandas/issues/34520 + df = DataFrame([["a", 1]], columns=list("ab")) + df = df.astype({"b": "Int64"}) + result = df.sum() + expected = Series(["a", 1], index=["a", "b"]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("numeric_only", [True, False, None]) +@pytest.mark.parametrize("method", ["min", "max"]) +def test_minmax_extensionarray(method, numeric_only): + # https://github.com/pandas-dev/pandas/issues/32651 + int64_info = np.iinfo("int64") + ser = Series([int64_info.max, None, int64_info.min], dtype=pd.Int64Dtype()) + df = DataFrame({"Int64": ser}) + result = getattr(df, method)(numeric_only=numeric_only) + expected = Series( + [getattr(int64_info, method)], index=Index(["Int64"], dtype="object") + ) + tm.assert_series_equal(result, expected) + + +def test_mad_nullable_integer(any_signed_int_ea_dtype): + # GH#33036 + df = DataFrame(np.random.randn(100, 4).astype(np.int64)) + df2 = df.astype(any_signed_int_ea_dtype) + + result = df2.mad() + expected = df.mad() + tm.assert_series_equal(result, expected) + + result = df2.mad(axis=1) + expected = df.mad(axis=1) + tm.assert_series_equal(result, expected) + + # case with NAs present + df2.iloc[::2, 1] = pd.NA + + result = df2.mad() + expected = df.mad() + expected[1] = df.iloc[1::2, 1].mad() + tm.assert_series_equal(result, expected) + + result = df2.mad(axis=1) + expected = df.mad(axis=1) + expected[::2] = df.T.loc[[0, 2, 3], ::2].mad() + tm.assert_series_equal(result, expected) + + +@pytest.mark.xfail(reason="GH#42895 caused by lack of 2D EA") +def test_mad_nullable_integer_all_na(any_signed_int_ea_dtype): + # GH#33036 + df = DataFrame(np.random.randn(100, 4).astype(np.int64)) + df2 = df.astype(any_signed_int_ea_dtype) + + # case with all-NA row/column + df2.iloc[:, 1] = pd.NA # FIXME(GH#44199): this doesn't operate in-place + df2.iloc[:, 1] = pd.array([pd.NA] * len(df2), dtype=any_signed_int_ea_dtype) + result = df2.mad() + expected = df.mad() + expected[1] = pd.NA + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("meth", ["max", "min", "sum", "mean", "median"]) +def test_groupby_regular_arithmetic_equivalent(meth): + # GH#40660 + df = DataFrame( + {"a": [pd.Timedelta(hours=6), pd.Timedelta(hours=7)], "b": [12.1, 13.3]} + ) + expected = df.copy() + + with tm.assert_produces_warning(FutureWarning): + result = getattr(df, meth)(level=0) + tm.assert_frame_equal(result, expected) + + result = getattr(df.groupby(level=0), meth)(numeric_only=False) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("ts_value", [Timestamp("2000-01-01"), pd.NaT]) +def test_frame_mixed_numeric_object_with_timestamp(ts_value): + # GH 13912 + df = DataFrame({"a": [1], "b": [1.1], "c": ["foo"], "d": [ts_value]}) + with tm.assert_produces_warning(FutureWarning, match="Dropping of nuisance"): + result = df.sum() + expected = Series([1, 1.1, "foo"], index=list("abc")) + tm.assert_series_equal(result, expected) + + +def test_prod_sum_min_count_mixed_object(): + # https://github.com/pandas-dev/pandas/issues/41074 + df = DataFrame([1, "a", True]) + + result = df.prod(axis=0, min_count=1, numeric_only=False) + expected = Series(["a"]) + tm.assert_series_equal(result, expected) + + msg = re.escape("unsupported operand type(s) for +: 'int' and 'str'") + with pytest.raises(TypeError, match=msg): + df.sum(axis=0, min_count=1, numeric_only=False) + + +@pytest.mark.parametrize("method", ["min", "max", "mean", "median", "skew", "kurt"]) +def test_reduction_axis_none_deprecation(method): + # GH#21597 deprecate axis=None defaulting to axis=0 so that we can change it + # to reducing over all axes. + + df = DataFrame(np.random.randn(4, 4)) + meth = getattr(df, method) + + msg = f"scalar {method} over the entire DataFrame" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = meth(axis=None) + with tm.assert_produces_warning(None): + expected = meth() + tm.assert_series_equal(res, expected) + tm.assert_series_equal(res, meth(axis=0)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_repr_info.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_repr_info.py new file mode 100644 index 0000000..f19edf5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_repr_info.py @@ -0,0 +1,351 @@ +from datetime import ( + datetime, + timedelta, +) +from io import StringIO +import warnings + +import numpy as np +import pytest + +from pandas import ( + Categorical, + DataFrame, + MultiIndex, + NaT, + PeriodIndex, + Series, + Timestamp, + date_range, + option_context, + period_range, +) +import pandas._testing as tm + +import pandas.io.formats.format as fmt + + +class TestDataFrameReprInfoEtc: + def test_repr_bytes_61_lines(self): + # GH#12857 + lets = list("ACDEFGHIJKLMNOP") + slen = 50 + nseqs = 1000 + words = [[np.random.choice(lets) for x in range(slen)] for _ in range(nseqs)] + df = DataFrame(words).astype("U1") + assert (df.dtypes == object).all() + + # smoke tests; at one point this raised with 61 but not 60 + repr(df) + repr(df.iloc[:60, :]) + repr(df.iloc[:61, :]) + + def test_repr_unicode_level_names(self, frame_or_series): + index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) + + obj = DataFrame(np.random.randn(2, 4), index=index) + obj = tm.get_obj(obj, frame_or_series) + repr(obj) + + def test_assign_index_sequences(self): + # GH#2200 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}).set_index( + ["a", "b"] + ) + index = list(df.index) + index[0] = ("faz", "boo") + df.index = index + repr(df) + + # this travels an improper code path + index[0] = ["faz", "boo"] + df.index = index + repr(df) + + def test_repr_with_mi_nat(self, float_string_frame): + df = DataFrame({"X": [1, 2]}, index=[[NaT, Timestamp("20130101")], ["a", "b"]]) + result = repr(df) + expected = " X\nNaT a 1\n2013-01-01 b 2" + assert result == expected + + def test_repr_with_different_nulls(self): + # GH45263 + df = DataFrame([1, 2, 3, 4], [True, None, np.nan, NaT]) + result = repr(df) + expected = """ 0 +True 1 +None 2 +NaN 3 +NaT 4""" + assert result == expected + + def test_repr_with_different_nulls_cols(self): + # GH45263 + d = {np.nan: [1, 2], None: [3, 4], NaT: [6, 7], True: [8, 9]} + df = DataFrame(data=d) + result = repr(df) + expected = """ NaN None NaT True +0 1 3 6 8 +1 2 4 7 9""" + assert result == expected + + def test_multiindex_na_repr(self): + # only an issue with long columns + df3 = DataFrame( + { + "A" * 30: {("A", "A0006000", "nuit"): "A0006000"}, + "B" * 30: {("A", "A0006000", "nuit"): np.nan}, + "C" * 30: {("A", "A0006000", "nuit"): np.nan}, + "D" * 30: {("A", "A0006000", "nuit"): np.nan}, + "E" * 30: {("A", "A0006000", "nuit"): "A"}, + "F" * 30: {("A", "A0006000", "nuit"): np.nan}, + } + ) + + idf = df3.set_index(["A" * 30, "C" * 30]) + repr(idf) + + def test_repr_name_coincide(self): + index = MultiIndex.from_tuples( + [("a", 0, "foo"), ("b", 1, "bar")], names=["a", "b", "c"] + ) + + df = DataFrame({"value": [0, 1]}, index=index) + + lines = repr(df).split("\n") + assert lines[2].startswith("a 0 foo") + + def test_repr_to_string( + self, + multiindex_year_month_day_dataframe_random_data, + multiindex_dataframe_random_data, + ): + ymd = multiindex_year_month_day_dataframe_random_data + frame = multiindex_dataframe_random_data + + repr(frame) + repr(ymd) + repr(frame.T) + repr(ymd.T) + + buf = StringIO() + frame.to_string(buf=buf) + ymd.to_string(buf=buf) + frame.T.to_string(buf=buf) + ymd.T.to_string(buf=buf) + + def test_repr_empty(self): + # empty + repr(DataFrame()) + + # empty with index + frame = DataFrame(index=np.arange(1000)) + repr(frame) + + def test_repr_mixed(self, float_string_frame): + buf = StringIO() + + # mixed + repr(float_string_frame) + float_string_frame.info(verbose=False, buf=buf) + + @pytest.mark.slow + def test_repr_mixed_big(self): + # big mixed + biggie = DataFrame( + {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, index=range(200) + ) + biggie.loc[:20, "A"] = np.nan + biggie.loc[:20, "B"] = np.nan + + repr(biggie) + + def test_repr(self, float_frame): + buf = StringIO() + + # small one + repr(float_frame) + float_frame.info(verbose=False, buf=buf) + + # even smaller + float_frame.reindex(columns=["A"]).info(verbose=False, buf=buf) + float_frame.reindex(columns=["A", "B"]).info(verbose=False, buf=buf) + + # exhausting cases in DataFrame.info + + # columns but no index + no_index = DataFrame(columns=[0, 1, 3]) + repr(no_index) + + # no columns or index + DataFrame().info(buf=buf) + + df = DataFrame(["a\n\r\tb"], columns=["a\n\r\td"], index=["a\n\r\tf"]) + assert "\t" not in repr(df) + assert "\r" not in repr(df) + assert "a\n" not in repr(df) + + def test_repr_dimensions(self): + df = DataFrame([[1, 2], [3, 4]]) + with option_context("display.show_dimensions", True): + assert "2 rows x 2 columns" in repr(df) + + with option_context("display.show_dimensions", False): + assert "2 rows x 2 columns" not in repr(df) + + with option_context("display.show_dimensions", "truncate"): + assert "2 rows x 2 columns" not in repr(df) + + @pytest.mark.slow + def test_repr_big(self): + # big one + biggie = DataFrame(np.zeros((200, 4)), columns=range(4), index=range(200)) + repr(biggie) + + def test_repr_unsortable(self, float_frame): + # columns are not sortable + + warn_filters = warnings.filters + warnings.filterwarnings("ignore", category=FutureWarning, module=".*format") + + unsortable = DataFrame( + { + "foo": [1] * 50, + datetime.today(): [1] * 50, + "bar": ["bar"] * 50, + datetime.today() + timedelta(1): ["bar"] * 50, + }, + index=np.arange(50), + ) + repr(unsortable) + + fmt.set_option("display.precision", 3, "display.column_space", 10) + repr(float_frame) + + fmt.set_option("display.max_rows", 10, "display.max_columns", 2) + repr(float_frame) + + fmt.set_option("display.max_rows", 1000, "display.max_columns", 1000) + repr(float_frame) + + tm.reset_display_options() + + warnings.filters = warn_filters + + def test_repr_unicode(self): + uval = "\u03c3\u03c3\u03c3\u03c3" + + df = DataFrame({"A": [uval, uval]}) + + result = repr(df) + ex_top = " A" + assert result.split("\n")[0].rstrip() == ex_top + + df = DataFrame({"A": [uval, uval]}) + result = repr(df) + assert result.split("\n")[0].rstrip() == ex_top + + def test_unicode_string_with_unicode(self): + df = DataFrame({"A": ["\u05d0"]}) + str(df) + + def test_repr_unicode_columns(self): + df = DataFrame({"\u05d0": [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) + repr(df.columns) # should not raise UnicodeDecodeError + + def test_str_to_bytes_raises(self): + # GH 26447 + df = DataFrame({"A": ["abc"]}) + msg = "^'str' object cannot be interpreted as an integer$" + with pytest.raises(TypeError, match=msg): + bytes(df) + + def test_very_wide_info_repr(self): + df = DataFrame(np.random.randn(10, 20), columns=tm.rands_array(10, 20)) + repr(df) + + def test_repr_column_name_unicode_truncation_bug(self): + # #1906 + df = DataFrame( + { + "Id": [7117434], + "StringCol": ( + "Is it possible to modify drop plot code" + "so that the output graph is displayed " + "in iphone simulator, Is it possible to " + "modify drop plot code so that the " + "output graph is \xe2\x80\xa8displayed " + "in iphone simulator.Now we are adding " + "the CSV file externally. I want to Call " + "the File through the code.." + ), + } + ) + + with option_context("display.max_columns", 20): + assert "StringCol" in repr(df) + + @pytest.mark.filterwarnings("ignore::FutureWarning") + def test_latex_repr(self): + result = r"""\begin{tabular}{llll} +\toprule +{} & 0 & 1 & 2 \\ +\midrule +0 & $\alpha$ & b & c \\ +1 & 1 & 2 & 3 \\ +\bottomrule +\end{tabular} +""" + with option_context("display.latex.escape", False, "display.latex.repr", True): + df = DataFrame([[r"$\alpha$", "b", "c"], [1, 2, 3]]) + assert result == df._repr_latex_() + + # GH 12182 + assert df._repr_latex_() is None + + def test_repr_categorical_dates_periods(self): + # normal DataFrame + dt = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + p = period_range("2011-01", freq="M", periods=5) + df = DataFrame({"dt": dt, "p": p}) + exp = """ dt p +0 2011-01-01 09:00:00-05:00 2011-01 +1 2011-01-01 10:00:00-05:00 2011-02 +2 2011-01-01 11:00:00-05:00 2011-03 +3 2011-01-01 12:00:00-05:00 2011-04 +4 2011-01-01 13:00:00-05:00 2011-05""" + + assert repr(df) == exp + + df2 = DataFrame({"dt": Categorical(dt), "p": Categorical(p)}) + assert repr(df2) == exp + + @pytest.mark.parametrize("arg", [np.datetime64, np.timedelta64]) + @pytest.mark.parametrize( + "box, expected", + [[Series, "0 NaT\ndtype: object"], [DataFrame, " 0\n0 NaT"]], + ) + def test_repr_np_nat_with_object(self, arg, box, expected): + # GH 25445 + result = repr(box([arg("NaT")], dtype=object)) + assert result == expected + + def test_frame_datetime64_pre1900_repr(self): + df = DataFrame({"year": date_range("1/1/1700", periods=50, freq="A-DEC")}) + # it works! + repr(df) + + def test_frame_to_string_with_periodindex(self): + index = PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M") + frame = DataFrame(np.random.randn(3, 4), index=index) + + # it works! + frame.to_string() + + def test_datetime64tz_slice_non_truncate(self): + # GH 30263 + df = DataFrame({"x": date_range("2019", periods=10, tz="UTC")}) + expected = repr(df) + df = df.iloc[:, :5] + result = repr(df) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_stack_unstack.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_stack_unstack.py new file mode 100644 index 0000000..005d260 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_stack_unstack.py @@ -0,0 +1,2153 @@ +from datetime import datetime +from io import StringIO +import itertools + +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Period, + Series, + Timedelta, + date_range, +) +import pandas._testing as tm +from pandas.core.reshape import reshape as reshape_lib + + +class TestDataFrameReshape: + def test_stack_unstack(self, float_frame): + df = float_frame.copy() + df[:] = np.arange(np.prod(df.shape)).reshape(df.shape) + + stacked = df.stack() + stacked_df = DataFrame({"foo": stacked, "bar": stacked}) + + unstacked = stacked.unstack() + unstacked_df = stacked_df.unstack() + + tm.assert_frame_equal(unstacked, df) + tm.assert_frame_equal(unstacked_df["bar"], df) + + unstacked_cols = stacked.unstack(0) + unstacked_cols_df = stacked_df.unstack(0) + tm.assert_frame_equal(unstacked_cols.T, df) + tm.assert_frame_equal(unstacked_cols_df["bar"].T, df) + + def test_stack_mixed_level(self): + # GH 18310 + levels = [range(3), [3, "a", "b"], [1, 2]] + + # flat columns: + df = DataFrame(1, index=levels[0], columns=levels[1]) + result = df.stack() + expected = Series(1, index=MultiIndex.from_product(levels[:2])) + tm.assert_series_equal(result, expected) + + # MultiIndex columns: + df = DataFrame(1, index=levels[0], columns=MultiIndex.from_product(levels[1:])) + result = df.stack(1) + expected = DataFrame( + 1, index=MultiIndex.from_product([levels[0], levels[2]]), columns=levels[1] + ) + tm.assert_frame_equal(result, expected) + + # as above, but used labels in level are actually of homogeneous type + result = df[["a", "b"]].stack(1) + expected = expected[["a", "b"]] + tm.assert_frame_equal(result, expected) + + def test_unstack_not_consolidated(self, using_array_manager): + # Gh#34708 + df = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]}) + df2 = df[["x"]] + df2["y"] = df["y"] + if not using_array_manager: + assert len(df2._mgr.blocks) == 2 + + res = df2.unstack() + expected = df.unstack() + tm.assert_series_equal(res, expected) + + def test_unstack_fill(self): + + # GH #9746: fill_value keyword argument for Series + # and DataFrame unstack + + # From a series + data = Series([1, 2, 4, 5], dtype=np.int16) + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = data.unstack(fill_value=-1) + expected = DataFrame( + {"a": [1, -1, 5], "b": [2, 4, -1]}, index=["x", "y", "z"], dtype=np.int16 + ) + tm.assert_frame_equal(result, expected) + + # From a series with incorrect data type for fill_value + result = data.unstack(fill_value=0.5) + expected = DataFrame( + {"a": [1, 0.5, 5], "b": [2, 4, 0.5]}, index=["x", "y", "z"], dtype=float + ) + tm.assert_frame_equal(result, expected) + + # GH #13971: fill_value when unstacking multiple levels: + df = DataFrame( + {"x": ["a", "a", "b"], "y": ["j", "k", "j"], "z": [0, 1, 2], "w": [0, 1, 2]} + ).set_index(["x", "y", "z"]) + unstacked = df.unstack(["x", "y"], fill_value=0) + key = ("w", "b", "j") + expected = unstacked[key] + result = Series([0, 0, 2], index=unstacked.index, name=key) + tm.assert_series_equal(result, expected) + + stacked = unstacked.stack(["x", "y"]) + stacked.index = stacked.index.reorder_levels(df.index.names) + # Workaround for GH #17886 (unnecessarily casts to float): + stacked = stacked.astype(np.int64) + result = stacked.loc[df.index] + tm.assert_frame_equal(result, df) + + # From a series + s = df["w"] + result = s.unstack(["x", "y"], fill_value=0) + expected = unstacked["w"] + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame(self): + + # From a dataframe + rows = [[1, 2], [3, 4], [5, 6], [7, 8]] + df = DataFrame(rows, columns=list("AB"), dtype=np.int32) + df.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = df.unstack(fill_value=-1) + + rows = [[1, 3, 2, 4], [-1, 5, -1, 6], [7, -1, 8, -1]] + expected = DataFrame(rows, index=list("xyz"), dtype=np.int32) + expected.columns = MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ) + tm.assert_frame_equal(result, expected) + + # From a mixed type dataframe + df["A"] = df["A"].astype(np.int16) + df["B"] = df["B"].astype(np.float64) + + result = df.unstack(fill_value=-1) + expected["A"] = expected["A"].astype(np.int16) + expected["B"] = expected["B"].astype(np.float64) + tm.assert_frame_equal(result, expected) + + # From a dataframe with incorrect data type for fill_value + result = df.unstack(fill_value=0.5) + + rows = [[1, 3, 2, 4], [0.5, 5, 0.5, 6], [7, 0.5, 8, 0.5]] + expected = DataFrame(rows, index=list("xyz"), dtype=float) + expected.columns = MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame_datetime(self): + + # Test unstacking with date times + dv = date_range("2012-01-01", periods=4).values + data = Series(dv) + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = data.unstack() + expected = DataFrame( + {"a": [dv[0], pd.NaT, dv[3]], "b": [dv[1], dv[2], pd.NaT]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + result = data.unstack(fill_value=dv[0]) + expected = DataFrame( + {"a": [dv[0], dv[0], dv[3]], "b": [dv[1], dv[2], dv[0]]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame_timedelta(self): + + # Test unstacking with time deltas + td = [Timedelta(days=i) for i in range(4)] + data = Series(td) + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = data.unstack() + expected = DataFrame( + {"a": [td[0], pd.NaT, td[3]], "b": [td[1], td[2], pd.NaT]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + result = data.unstack(fill_value=td[1]) + expected = DataFrame( + {"a": [td[0], td[1], td[3]], "b": [td[1], td[2], td[1]]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame_period(self): + + # Test unstacking with period + periods = [ + Period("2012-01"), + Period("2012-02"), + Period("2012-03"), + Period("2012-04"), + ] + data = Series(periods) + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = data.unstack() + expected = DataFrame( + {"a": [periods[0], None, periods[3]], "b": [periods[1], periods[2], None]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + result = data.unstack(fill_value=periods[1]) + expected = DataFrame( + { + "a": [periods[0], periods[1], periods[3]], + "b": [periods[1], periods[2], periods[1]], + }, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame_categorical(self): + + # Test unstacking with categorical + data = Series(["a", "b", "c", "a"], dtype="category") + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + # By default missing values will be NaN + result = data.unstack() + expected = DataFrame( + { + "a": pd.Categorical(list("axa"), categories=list("abc")), + "b": pd.Categorical(list("bcx"), categories=list("abc")), + }, + index=list("xyz"), + ) + tm.assert_frame_equal(result, expected) + + # Fill with non-category results in a ValueError + msg = r"Cannot setitem on a Categorical with a new category \(d\)" + with pytest.raises(TypeError, match=msg): + data.unstack(fill_value="d") + + # Fill with category value replaces missing values as expected + result = data.unstack(fill_value="c") + expected = DataFrame( + { + "a": pd.Categorical(list("aca"), categories=list("abc")), + "b": pd.Categorical(list("bcc"), categories=list("abc")), + }, + index=list("xyz"), + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_tuplename_in_multiindex(self): + # GH 19966 + idx = MultiIndex.from_product( + [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")] + ) + df = DataFrame({"d": [1] * 9, "e": [2] * 9}, index=idx) + result = df.unstack(("A", "a")) + + expected = DataFrame( + [[1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2]], + columns=MultiIndex.from_tuples( + [ + ("d", "a"), + ("d", "b"), + ("d", "c"), + ("e", "a"), + ("e", "b"), + ("e", "c"), + ], + names=[None, ("A", "a")], + ), + index=Index([1, 2, 3], name=("B", "b")), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "unstack_idx, expected_values, expected_index, expected_columns", + [ + ( + ("A", "a"), + [[1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2]], + MultiIndex.from_tuples( + [(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"] + ), + MultiIndex.from_tuples( + [("d", "a"), ("d", "b"), ("e", "a"), ("e", "b")], + names=[None, ("A", "a")], + ), + ), + ( + (("A", "a"), "B"), + [[1, 1, 1, 1, 2, 2, 2, 2], [1, 1, 1, 1, 2, 2, 2, 2]], + Index([3, 4], name="C"), + MultiIndex.from_tuples( + [ + ("d", "a", 1), + ("d", "a", 2), + ("d", "b", 1), + ("d", "b", 2), + ("e", "a", 1), + ("e", "a", 2), + ("e", "b", 1), + ("e", "b", 2), + ], + names=[None, ("A", "a"), "B"], + ), + ), + ], + ) + def test_unstack_mixed_type_name_in_multiindex( + self, unstack_idx, expected_values, expected_index, expected_columns + ): + # GH 19966 + idx = MultiIndex.from_product( + [["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"] + ) + df = DataFrame({"d": [1] * 8, "e": [2] * 8}, index=idx) + result = df.unstack(unstack_idx) + + expected = DataFrame( + expected_values, columns=expected_columns, index=expected_index + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_preserve_dtypes(self): + # Checks fix for #11847 + df = DataFrame( + { + "state": ["IL", "MI", "NC"], + "index": ["a", "b", "c"], + "some_categories": Series(["a", "b", "c"]).astype("category"), + "A": np.random.rand(3), + "B": 1, + "C": "foo", + "D": pd.Timestamp("20010102"), + "E": Series([1.0, 50.0, 100.0]).astype("float32"), + "F": Series([3.0, 4.0, 5.0]).astype("float64"), + "G": False, + "H": Series([1, 200, 923442]).astype("int8"), + } + ) + + def unstack_and_compare(df, column_name): + unstacked1 = df.unstack([column_name]) + unstacked2 = df.unstack(column_name) + tm.assert_frame_equal(unstacked1, unstacked2) + + df1 = df.set_index(["state", "index"]) + unstack_and_compare(df1, "index") + + df1 = df.set_index(["state", "some_categories"]) + unstack_and_compare(df1, "some_categories") + + df1 = df.set_index(["F", "C"]) + unstack_and_compare(df1, "F") + + df1 = df.set_index(["G", "B", "state"]) + unstack_and_compare(df1, "B") + + df1 = df.set_index(["E", "A"]) + unstack_and_compare(df1, "E") + + df1 = df.set_index(["state", "index"]) + s = df1["A"] + unstack_and_compare(s, "index") + + def test_stack_ints(self): + columns = MultiIndex.from_tuples(list(itertools.product(range(3), repeat=3))) + df = DataFrame(np.random.randn(30, 27), columns=columns) + + tm.assert_frame_equal(df.stack(level=[1, 2]), df.stack(level=1).stack(level=1)) + tm.assert_frame_equal( + df.stack(level=[-2, -1]), df.stack(level=1).stack(level=1) + ) + + df_named = df.copy() + return_value = df_named.columns.set_names(range(3), inplace=True) + assert return_value is None + + tm.assert_frame_equal( + df_named.stack(level=[1, 2]), df_named.stack(level=1).stack(level=1) + ) + + def test_stack_mixed_levels(self): + columns = MultiIndex.from_tuples( + [ + ("A", "cat", "long"), + ("B", "cat", "long"), + ("A", "dog", "short"), + ("B", "dog", "short"), + ], + names=["exp", "animal", "hair_length"], + ) + df = DataFrame(np.random.randn(4, 4), columns=columns) + + animal_hair_stacked = df.stack(level=["animal", "hair_length"]) + exp_hair_stacked = df.stack(level=["exp", "hair_length"]) + + # GH #8584: Need to check that stacking works when a number + # is passed that is both a level name and in the range of + # the level numbers + df2 = df.copy() + df2.columns.names = ["exp", "animal", 1] + tm.assert_frame_equal( + df2.stack(level=["animal", 1]), animal_hair_stacked, check_names=False + ) + tm.assert_frame_equal( + df2.stack(level=["exp", 1]), exp_hair_stacked, check_names=False + ) + + # When mixed types are passed and the ints are not level + # names, raise + msg = ( + "level should contain all level names or all level numbers, not " + "a mixture of the two" + ) + with pytest.raises(ValueError, match=msg): + df2.stack(level=["animal", 0]) + + # GH #8584: Having 0 in the level names could raise a + # strange error about lexsort depth + df3 = df.copy() + df3.columns.names = ["exp", "animal", 0] + tm.assert_frame_equal( + df3.stack(level=["animal", 0]), animal_hair_stacked, check_names=False + ) + + def test_stack_int_level_names(self): + columns = MultiIndex.from_tuples( + [ + ("A", "cat", "long"), + ("B", "cat", "long"), + ("A", "dog", "short"), + ("B", "dog", "short"), + ], + names=["exp", "animal", "hair_length"], + ) + df = DataFrame(np.random.randn(4, 4), columns=columns) + + exp_animal_stacked = df.stack(level=["exp", "animal"]) + animal_hair_stacked = df.stack(level=["animal", "hair_length"]) + exp_hair_stacked = df.stack(level=["exp", "hair_length"]) + + df2 = df.copy() + df2.columns.names = [0, 1, 2] + tm.assert_frame_equal( + df2.stack(level=[1, 2]), animal_hair_stacked, check_names=False + ) + tm.assert_frame_equal( + df2.stack(level=[0, 1]), exp_animal_stacked, check_names=False + ) + tm.assert_frame_equal( + df2.stack(level=[0, 2]), exp_hair_stacked, check_names=False + ) + + # Out-of-order int column names + df3 = df.copy() + df3.columns.names = [2, 0, 1] + tm.assert_frame_equal( + df3.stack(level=[0, 1]), animal_hair_stacked, check_names=False + ) + tm.assert_frame_equal( + df3.stack(level=[2, 0]), exp_animal_stacked, check_names=False + ) + tm.assert_frame_equal( + df3.stack(level=[2, 1]), exp_hair_stacked, check_names=False + ) + + def test_unstack_bool(self): + df = DataFrame( + [False, False], + index=MultiIndex.from_arrays([["a", "b"], ["c", "l"]]), + columns=["col"], + ) + rs = df.unstack() + xp = DataFrame( + np.array([[False, np.nan], [np.nan, False]], dtype=object), + index=["a", "b"], + columns=MultiIndex.from_arrays([["col", "col"], ["c", "l"]]), + ) + tm.assert_frame_equal(rs, xp) + + def test_unstack_level_binding(self): + # GH9856 + mi = MultiIndex( + levels=[["foo", "bar"], ["one", "two"], ["a", "b"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0]], + names=["first", "second", "third"], + ) + s = Series(0, index=mi) + result = s.unstack([1, 2]).stack(0) + + expected_mi = MultiIndex( + levels=[["foo", "bar"], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=["first", "second"], + ) + + expected = DataFrame( + np.array( + [[np.nan, 0], [0, np.nan], [np.nan, 0], [0, np.nan]], dtype=np.float64 + ), + index=expected_mi, + columns=Index(["a", "b"], name="third"), + ) + + tm.assert_frame_equal(result, expected) + + def test_unstack_to_series(self, float_frame): + # check reversibility + data = float_frame.unstack() + + assert isinstance(data, Series) + undo = data.unstack().T + tm.assert_frame_equal(undo, float_frame) + + # check NA handling + data = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]}) + data.index = Index(["a", "b", "c"]) + result = data.unstack() + + midx = MultiIndex( + levels=[["x", "y"], ["a", "b", "c"]], + codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], + ) + expected = Series([1, 2, np.NaN, 3, 4, np.NaN], index=midx) + + tm.assert_series_equal(result, expected) + + # check composability of unstack + old_data = data.copy() + for _ in range(4): + data = data.unstack() + tm.assert_frame_equal(old_data, data) + + def test_unstack_dtypes(self): + + # GH 2929 + rows = [[1, 1, 3, 4], [1, 2, 3, 4], [2, 1, 3, 4], [2, 2, 3, 4]] + + df = DataFrame(rows, columns=list("ABCD")) + result = df.dtypes + expected = Series([np.dtype("int64")] * 4, index=list("ABCD")) + tm.assert_series_equal(result, expected) + + # single dtype + df2 = df.set_index(["A", "B"]) + df3 = df2.unstack("B") + result = df3.dtypes + expected = Series( + [np.dtype("int64")] * 4, + index=MultiIndex.from_arrays( + [["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B") + ), + ) + tm.assert_series_equal(result, expected) + + # mixed + df2 = df.set_index(["A", "B"]) + df2["C"] = 3.0 + df3 = df2.unstack("B") + result = df3.dtypes + expected = Series( + [np.dtype("float64")] * 2 + [np.dtype("int64")] * 2, + index=MultiIndex.from_arrays( + [["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B") + ), + ) + tm.assert_series_equal(result, expected) + df2["D"] = "foo" + df3 = df2.unstack("B") + result = df3.dtypes + expected = Series( + [np.dtype("float64")] * 2 + [np.dtype("object")] * 2, + index=MultiIndex.from_arrays( + [["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B") + ), + ) + tm.assert_series_equal(result, expected) + + # GH7405 + for c, d in ( + (np.zeros(5), np.zeros(5)), + (np.arange(5, dtype="f8"), np.arange(5, 10, dtype="f8")), + ): + + df = DataFrame( + { + "A": ["a"] * 5, + "C": c, + "D": d, + "B": date_range("2012-01-01", periods=5), + } + ) + + right = df.iloc[:3].copy(deep=True) + + df = df.set_index(["A", "B"]) + df["D"] = df["D"].astype("int64") + + left = df.iloc[:3].unstack(0) + right = right.set_index(["A", "B"]).unstack(0) + right[("D", "a")] = right[("D", "a")].astype("int64") + + assert left.shape == (3, 2) + tm.assert_frame_equal(left, right) + + def test_unstack_non_unique_index_names(self): + idx = MultiIndex.from_tuples([("a", "b"), ("c", "d")], names=["c1", "c1"]) + df = DataFrame([1, 2], index=idx) + msg = "The name c1 occurs multiple times, use a level number" + with pytest.raises(ValueError, match=msg): + df.unstack("c1") + + with pytest.raises(ValueError, match=msg): + df.T.stack("c1") + + def test_unstack_unused_levels(self): + # GH 17845: unused codes in index make unstack() cast int to float + idx = MultiIndex.from_product([["a"], ["A", "B", "C", "D"]])[:-1] + df = DataFrame([[1, 0]] * 3, index=idx) + + result = df.unstack() + exp_col = MultiIndex.from_product([[0, 1], ["A", "B", "C"]]) + expected = DataFrame([[1, 1, 1, 0, 0, 0]], index=["a"], columns=exp_col) + tm.assert_frame_equal(result, expected) + assert (result.columns.levels[1] == idx.levels[1]).all() + + # Unused items on both levels + levels = [[0, 1, 7], [0, 1, 2, 3]] + codes = [[0, 0, 1, 1], [0, 2, 0, 2]] + idx = MultiIndex(levels, codes) + block = np.arange(4).reshape(2, 2) + df = DataFrame(np.concatenate([block, block + 4]), index=idx) + result = df.unstack() + expected = DataFrame( + np.concatenate([block * 2, block * 2 + 1], axis=1), columns=idx + ) + tm.assert_frame_equal(result, expected) + assert (result.columns.levels[1] == idx.levels[1]).all() + + # With mixed dtype and NaN + levels = [["a", 2, "c"], [1, 3, 5, 7]] + codes = [[0, -1, 1, 1], [0, 2, -1, 2]] + idx = MultiIndex(levels, codes) + data = np.arange(8) + df = DataFrame(data.reshape(4, 2), index=idx) + + cases = ( + (0, [13, 16, 6, 9, 2, 5, 8, 11], [np.nan, "a", 2], [np.nan, 5, 1]), + (1, [8, 11, 1, 4, 12, 15, 13, 16], [np.nan, 5, 1], [np.nan, "a", 2]), + ) + for level, idces, col_level, idx_level in cases: + result = df.unstack(level=level) + exp_data = np.zeros(18) * np.nan + exp_data[idces] = data + cols = MultiIndex.from_product([[0, 1], col_level]) + expected = DataFrame(exp_data.reshape(3, 6), index=idx_level, columns=cols) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("cols", [["A", "C"], slice(None)]) + def test_unstack_unused_level(self, cols): + # GH 18562 : unused codes on the unstacked level + df = DataFrame([[2010, "a", "I"], [2011, "b", "II"]], columns=["A", "B", "C"]) + + ind = df.set_index(["A", "B", "C"], drop=False) + selection = ind.loc[(slice(None), slice(None), "I"), cols] + result = selection.unstack() + + expected = ind.iloc[[0]][cols] + expected.columns = MultiIndex.from_product( + [expected.columns, ["I"]], names=[None, "C"] + ) + expected.index = expected.index.droplevel("C") + tm.assert_frame_equal(result, expected) + + def test_unstack_long_index(self): + # PH 32624: Error when using a lot of indices to unstack. + # The error occurred only, if a lot of indices are used. + df = DataFrame( + [[1]], + columns=MultiIndex.from_tuples([[0]], names=["c1"]), + index=MultiIndex.from_tuples( + [[0, 0, 1, 0, 0, 0, 1]], + names=["i1", "i2", "i3", "i4", "i5", "i6", "i7"], + ), + ) + result = df.unstack(["i2", "i3", "i4", "i5", "i6", "i7"]) + expected = DataFrame( + [[1]], + columns=MultiIndex.from_tuples( + [[0, 0, 1, 0, 0, 0, 1]], + names=["c1", "i2", "i3", "i4", "i5", "i6", "i7"], + ), + index=Index([0], name="i1"), + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_multi_level_cols(self): + # PH 24729: Unstack a df with multi level columns + df = DataFrame( + [[0.0, 0.0], [0.0, 0.0]], + columns=MultiIndex.from_tuples( + [["B", "C"], ["B", "D"]], names=["c1", "c2"] + ), + index=MultiIndex.from_tuples( + [[10, 20, 30], [10, 20, 40]], names=["i1", "i2", "i3"] + ), + ) + assert df.unstack(["i2", "i1"]).columns.names[-2:] == ["i2", "i1"] + + def test_unstack_multi_level_rows_and_cols(self): + # PH 28306: Unstack df with multi level cols and rows + df = DataFrame( + [[1, 2], [3, 4], [-1, -2], [-3, -4]], + columns=MultiIndex.from_tuples([["a", "b", "c"], ["d", "e", "f"]]), + index=MultiIndex.from_tuples( + [ + ["m1", "P3", 222], + ["m1", "A5", 111], + ["m2", "P3", 222], + ["m2", "A5", 111], + ], + names=["i1", "i2", "i3"], + ), + ) + result = df.unstack(["i3", "i2"]) + expected = df.unstack(["i3"]).unstack(["i2"]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("idx", [("jim", "joe"), ("joe", "jim")]) + @pytest.mark.parametrize("lev", list(range(2))) + def test_unstack_nan_index1(self, idx, lev): + # GH7466 + def cast(val): + val_str = "" if val != val else val + return f"{val_str:1}" + + df = DataFrame( + { + "jim": ["a", "b", np.nan, "d"], + "joe": ["w", "x", "y", "z"], + "jolie": ["a.w", "b.x", " .y", "d.z"], + } + ) + + left = df.set_index(["jim", "joe"]).unstack()["jolie"] + right = df.set_index(["joe", "jim"]).unstack()["jolie"].T + tm.assert_frame_equal(left, right) + + mi = df.set_index(list(idx)) + udf = mi.unstack(level=lev) + assert udf.notna().values.sum() == len(df) + mk_list = lambda a: list(a) if isinstance(a, tuple) else [a] + rows, cols = udf["jolie"].notna().values.nonzero() + for i, j in zip(rows, cols): + left = sorted(udf["jolie"].iloc[i, j].split(".")) + right = mk_list(udf["jolie"].index[i]) + mk_list(udf["jolie"].columns[j]) + right = sorted(map(cast, right)) + assert left == right + + @pytest.mark.parametrize("idx", itertools.permutations(["1st", "2nd", "3rd"])) + @pytest.mark.parametrize("lev", list(range(3))) + @pytest.mark.parametrize("col", ["4th", "5th"]) + def test_unstack_nan_index_repeats(self, idx, lev, col): + def cast(val): + val_str = "" if val != val else val + return f"{val_str:1}" + + df = DataFrame( + { + "1st": ["d"] * 3 + + [np.nan] * 5 + + ["a"] * 2 + + ["c"] * 3 + + ["e"] * 2 + + ["b"] * 5, + "2nd": ["y"] * 2 + + ["w"] * 3 + + [np.nan] * 3 + + ["z"] * 4 + + [np.nan] * 3 + + ["x"] * 3 + + [np.nan] * 2, + "3rd": [ + 67, + 39, + 53, + 72, + 57, + 80, + 31, + 18, + 11, + 30, + 59, + 50, + 62, + 59, + 76, + 52, + 14, + 53, + 60, + 51, + ], + } + ) + + df["4th"], df["5th"] = ( + df.apply(lambda r: ".".join(map(cast, r)), axis=1), + df.apply(lambda r: ".".join(map(cast, r.iloc[::-1])), axis=1), + ) + + mi = df.set_index(list(idx)) + udf = mi.unstack(level=lev) + assert udf.notna().values.sum() == 2 * len(df) + mk_list = lambda a: list(a) if isinstance(a, tuple) else [a] + rows, cols = udf[col].notna().values.nonzero() + for i, j in zip(rows, cols): + left = sorted(udf[col].iloc[i, j].split(".")) + right = mk_list(udf[col].index[i]) + mk_list(udf[col].columns[j]) + right = sorted(map(cast, right)) + assert left == right + + def test_unstack_nan_index2(self): + # GH7403 + df = DataFrame({"A": list("aaaabbbb"), "B": range(8), "C": range(8)}) + df.iloc[3, 1] = np.NaN + left = df.set_index(["A", "B"]).unstack(0) + + vals = [ + [3, 0, 1, 2, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, 4, 5, 6, 7], + ] + vals = list(map(list, zip(*vals))) + idx = Index([np.nan, 0, 1, 2, 4, 5, 6, 7], name="B") + cols = MultiIndex( + levels=[["C"], ["a", "b"]], codes=[[0, 0], [0, 1]], names=[None, "A"] + ) + + right = DataFrame(vals, columns=cols, index=idx) + tm.assert_frame_equal(left, right) + + df = DataFrame({"A": list("aaaabbbb"), "B": list(range(4)) * 2, "C": range(8)}) + df.iloc[2, 1] = np.NaN + left = df.set_index(["A", "B"]).unstack(0) + + vals = [[2, np.nan], [0, 4], [1, 5], [np.nan, 6], [3, 7]] + cols = MultiIndex( + levels=[["C"], ["a", "b"]], codes=[[0, 0], [0, 1]], names=[None, "A"] + ) + idx = Index([np.nan, 0, 1, 2, 3], name="B") + right = DataFrame(vals, columns=cols, index=idx) + tm.assert_frame_equal(left, right) + + df = DataFrame({"A": list("aaaabbbb"), "B": list(range(4)) * 2, "C": range(8)}) + df.iloc[3, 1] = np.NaN + left = df.set_index(["A", "B"]).unstack(0) + + vals = [[3, np.nan], [0, 4], [1, 5], [2, 6], [np.nan, 7]] + cols = MultiIndex( + levels=[["C"], ["a", "b"]], codes=[[0, 0], [0, 1]], names=[None, "A"] + ) + idx = Index([np.nan, 0, 1, 2, 3], name="B") + right = DataFrame(vals, columns=cols, index=idx) + tm.assert_frame_equal(left, right) + + def test_unstack_nan_index3(self, using_array_manager): + # GH7401 + df = DataFrame( + { + "A": list("aaaaabbbbb"), + "B": (date_range("2012-01-01", periods=5).tolist() * 2), + "C": np.arange(10), + } + ) + + df.iloc[3, 1] = np.NaN + left = df.set_index(["A", "B"]).unstack() + + vals = np.array([[3, 0, 1, 2, np.nan, 4], [np.nan, 5, 6, 7, 8, 9]]) + idx = Index(["a", "b"], name="A") + cols = MultiIndex( + levels=[["C"], date_range("2012-01-01", periods=5)], + codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], + names=[None, "B"], + ) + + right = DataFrame(vals, columns=cols, index=idx) + if using_array_manager: + # INFO(ArrayManager) with ArrayManager preserve dtype where possible + cols = right.columns[[1, 2, 3, 5]] + right[cols] = right[cols].astype(df["C"].dtype) + tm.assert_frame_equal(left, right) + + def test_unstack_nan_index4(self): + # GH4862 + vals = [ + ["Hg", np.nan, np.nan, 680585148], + ["U", 0.0, np.nan, 680585148], + ["Pb", 7.07e-06, np.nan, 680585148], + ["Sn", 2.3614e-05, 0.0133, 680607017], + ["Ag", 0.0, 0.0133, 680607017], + ["Hg", -0.00015, 0.0133, 680607017], + ] + df = DataFrame( + vals, + columns=["agent", "change", "dosage", "s_id"], + index=[17263, 17264, 17265, 17266, 17267, 17268], + ) + + left = df.copy().set_index(["s_id", "dosage", "agent"]).unstack() + + vals = [ + [np.nan, np.nan, 7.07e-06, np.nan, 0.0], + [0.0, -0.00015, np.nan, 2.3614e-05, np.nan], + ] + + idx = MultiIndex( + levels=[[680585148, 680607017], [0.0133]], + codes=[[0, 1], [-1, 0]], + names=["s_id", "dosage"], + ) + + cols = MultiIndex( + levels=[["change"], ["Ag", "Hg", "Pb", "Sn", "U"]], + codes=[[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]], + names=[None, "agent"], + ) + + right = DataFrame(vals, columns=cols, index=idx) + tm.assert_frame_equal(left, right) + + left = df.loc[17264:].copy().set_index(["s_id", "dosage", "agent"]) + tm.assert_frame_equal(left.unstack(), right) + + def test_unstack_nan_index5(self): + # GH9497 - multiple unstack with nulls + df = DataFrame( + { + "1st": [1, 2, 1, 2, 1, 2], + "2nd": date_range("2014-02-01", periods=6, freq="D"), + "jim": 100 + np.arange(6), + "joe": (np.random.randn(6) * 10).round(2), + } + ) + + df["3rd"] = df["2nd"] - pd.Timestamp("2014-02-02") + df.loc[1, "2nd"] = df.loc[3, "2nd"] = np.nan + df.loc[1, "3rd"] = df.loc[4, "3rd"] = np.nan + + left = df.set_index(["1st", "2nd", "3rd"]).unstack(["2nd", "3rd"]) + assert left.notna().values.sum() == 2 * len(df) + + for col in ["jim", "joe"]: + for _, r in df.iterrows(): + key = r["1st"], (col, r["2nd"], r["3rd"]) + assert r[col] == left.loc[key] + + def test_stack_datetime_column_multiIndex(self): + # GH 8039 + t = datetime(2014, 1, 1) + df = DataFrame([1, 2, 3, 4], columns=MultiIndex.from_tuples([(t, "A", "B")])) + result = df.stack() + + eidx = MultiIndex.from_product([(0, 1, 2, 3), ("B",)]) + ecols = MultiIndex.from_tuples([(t, "A")]) + expected = DataFrame([1, 2, 3, 4], index=eidx, columns=ecols) + tm.assert_frame_equal(result, expected) + + def test_stack_partial_multiIndex(self): + # GH 8844 + def _test_stack_with_multiindex(multiindex): + df = DataFrame( + np.arange(3 * len(multiindex)).reshape(3, len(multiindex)), + columns=multiindex, + ) + for level in (-1, 0, 1, [0, 1], [1, 0]): + result = df.stack(level=level, dropna=False) + + if isinstance(level, int): + # Stacking a single level should not make any all-NaN rows, + # so df.stack(level=level, dropna=False) should be the same + # as df.stack(level=level, dropna=True). + expected = df.stack(level=level, dropna=True) + if isinstance(expected, Series): + tm.assert_series_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + df.columns = MultiIndex.from_tuples( + df.columns.to_numpy(), names=df.columns.names + ) + expected = df.stack(level=level, dropna=False) + if isinstance(expected, Series): + tm.assert_series_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + full_multiindex = MultiIndex.from_tuples( + [("B", "x"), ("B", "z"), ("A", "y"), ("C", "x"), ("C", "u")], + names=["Upper", "Lower"], + ) + for multiindex_columns in ( + [0, 1, 2, 3, 4], + [0, 1, 2, 3], + [0, 1, 2, 4], + [0, 1, 2], + [1, 2, 3], + [2, 3, 4], + [0, 1], + [0, 2], + [0, 3], + [0], + [2], + [4], + ): + _test_stack_with_multiindex(full_multiindex[multiindex_columns]) + if len(multiindex_columns) > 1: + multiindex_columns.reverse() + _test_stack_with_multiindex(full_multiindex[multiindex_columns]) + + df = DataFrame(np.arange(6).reshape(2, 3), columns=full_multiindex[[0, 1, 3]]) + result = df.stack(dropna=False) + expected = DataFrame( + [[0, 2], [1, np.nan], [3, 5], [4, np.nan]], + index=MultiIndex( + levels=[[0, 1], ["u", "x", "y", "z"]], + codes=[[0, 0, 1, 1], [1, 3, 1, 3]], + names=[None, "Lower"], + ), + columns=Index(["B", "C"], name="Upper"), + ) + expected["B"] = expected["B"].astype(df.dtypes[0]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("ordered", [False, True]) + @pytest.mark.parametrize("labels", [list("yxz"), list("yxy")]) + def test_stack_preserve_categorical_dtype(self, ordered, labels): + # GH13854 + cidx = pd.CategoricalIndex(labels, categories=list("xyz"), ordered=ordered) + df = DataFrame([[10, 11, 12]], columns=cidx) + result = df.stack() + + # `MultiIndex.from_product` preserves categorical dtype - + # it's tested elsewhere. + midx = MultiIndex.from_product([df.index, cidx]) + expected = Series([10, 11, 12], index=midx) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("ordered", [False, True]) + @pytest.mark.parametrize( + "labels,data", + [ + (list("xyz"), [10, 11, 12, 13, 14, 15]), + (list("zyx"), [14, 15, 12, 13, 10, 11]), + ], + ) + def test_stack_multi_preserve_categorical_dtype(self, ordered, labels, data): + # GH-36991 + cidx = pd.CategoricalIndex(labels, categories=sorted(labels), ordered=ordered) + cidx2 = pd.CategoricalIndex(["u", "v"], ordered=ordered) + midx = MultiIndex.from_product([cidx, cidx2]) + df = DataFrame([sorted(data)], columns=midx) + result = df.stack([0, 1]) + + s_cidx = pd.CategoricalIndex(sorted(labels), ordered=ordered) + expected = Series(data, index=MultiIndex.from_product([[0], s_cidx, cidx2])) + + tm.assert_series_equal(result, expected) + + def test_stack_preserve_categorical_dtype_values(self): + # GH-23077 + cat = pd.Categorical(["a", "a", "b", "c"]) + df = DataFrame({"A": cat, "B": cat}) + result = df.stack() + index = MultiIndex.from_product([[0, 1, 2, 3], ["A", "B"]]) + expected = Series( + pd.Categorical(["a", "a", "a", "a", "b", "b", "c", "c"]), index=index + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "index, columns", + [ + ([0, 0, 1, 1], MultiIndex.from_product([[1, 2], ["a", "b"]])), + ([0, 0, 2, 3], MultiIndex.from_product([[1, 2], ["a", "b"]])), + ([0, 1, 2, 3], MultiIndex.from_product([[1, 2], ["a", "b"]])), + ], + ) + def test_stack_multi_columns_non_unique_index(self, index, columns): + # GH-28301 + df = DataFrame(index=index, columns=columns).fillna(1) + stacked = df.stack() + new_index = MultiIndex.from_tuples(stacked.index.to_numpy()) + expected = DataFrame( + stacked.to_numpy(), index=new_index, columns=stacked.columns + ) + tm.assert_frame_equal(stacked, expected) + stacked_codes = np.asarray(stacked.index.codes) + expected_codes = np.asarray(new_index.codes) + tm.assert_numpy_array_equal(stacked_codes, expected_codes) + + @pytest.mark.parametrize("level", [0, 1]) + def test_unstack_mixed_extension_types(self, level): + index = MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 1)], names=["a", "b"]) + df = DataFrame( + { + "A": pd.array([0, 1, None], dtype="Int64"), + "B": pd.Categorical(["a", "a", "b"]), + }, + index=index, + ) + + result = df.unstack(level=level) + expected = df.astype(object).unstack(level=level) + + expected_dtypes = Series( + [df.A.dtype] * 2 + [df.B.dtype] * 2, index=result.columns + ) + tm.assert_series_equal(result.dtypes, expected_dtypes) + tm.assert_frame_equal(result.astype(object), expected) + + @pytest.mark.parametrize("level", [0, "baz"]) + def test_unstack_swaplevel_sortlevel(self, level): + # GH 20994 + mi = MultiIndex.from_product([[0], ["d", "c"]], names=["bar", "baz"]) + df = DataFrame([[0, 2], [1, 3]], index=mi, columns=["B", "A"]) + df.columns.name = "foo" + + expected = DataFrame( + [[3, 1, 2, 0]], + columns=MultiIndex.from_tuples( + [("c", "A"), ("c", "B"), ("d", "A"), ("d", "B")], names=["baz", "foo"] + ), + ) + expected.index.name = "bar" + + result = df.unstack().swaplevel(axis=1).sort_index(axis=1, level=level) + tm.assert_frame_equal(result, expected) + + +def test_unstack_fill_frame_object(): + # GH12815 Test unstacking with object. + data = Series(["a", "b", "c", "a"], dtype="object") + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + # By default missing values will be NaN + result = data.unstack() + expected = DataFrame( + {"a": ["a", np.nan, "a"], "b": ["b", "c", np.nan]}, index=list("xyz") + ) + tm.assert_frame_equal(result, expected) + + # Fill with any value replaces missing values as expected + result = data.unstack(fill_value="d") + expected = DataFrame( + {"a": ["a", "d", "a"], "b": ["b", "c", "d"]}, index=list("xyz") + ) + tm.assert_frame_equal(result, expected) + + +def test_unstack_timezone_aware_values(): + # GH 18338 + df = DataFrame( + { + "timestamp": [pd.Timestamp("2017-08-27 01:00:00.709949+0000", tz="UTC")], + "a": ["a"], + "b": ["b"], + "c": ["c"], + }, + columns=["timestamp", "a", "b", "c"], + ) + result = df.set_index(["a", "b"]).unstack() + expected = DataFrame( + [[pd.Timestamp("2017-08-27 01:00:00.709949+0000", tz="UTC"), "c"]], + index=Index(["a"], name="a"), + columns=MultiIndex( + levels=[["timestamp", "c"], ["b"]], + codes=[[0, 1], [0, 0]], + names=[None, "b"], + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_stack_timezone_aware_values(): + # GH 19420 + ts = date_range(freq="D", start="20180101", end="20180103", tz="America/New_York") + df = DataFrame({"A": ts}, index=["a", "b", "c"]) + result = df.stack() + expected = Series( + ts, + index=MultiIndex(levels=[["a", "b", "c"], ["A"]], codes=[[0, 1, 2], [0, 0, 0]]), + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_stack_empty_frame(dropna): + # GH 36113 + expected = Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64) + result = DataFrame(dtype=np.float64).stack(dropna=dropna) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +@pytest.mark.parametrize("fill_value", [None, 0]) +def test_stack_unstack_empty_frame(dropna, fill_value): + # GH 36113 + result = ( + DataFrame(dtype=np.int64).stack(dropna=dropna).unstack(fill_value=fill_value) + ) + expected = DataFrame(dtype=np.int64) + tm.assert_frame_equal(result, expected) + + +def test_unstack_single_index_series(): + # GH 36113 + msg = r"index must be a MultiIndex to unstack.*" + with pytest.raises(ValueError, match=msg): + Series(dtype=np.int64).unstack() + + +def test_unstacking_multi_index_df(): + # see gh-30740 + df = DataFrame( + { + "name": ["Alice", "Bob"], + "score": [9.5, 8], + "employed": [False, True], + "kids": [0, 0], + "gender": ["female", "male"], + } + ) + df = df.set_index(["name", "employed", "kids", "gender"]) + df = df.unstack(["gender"], fill_value=0) + expected = df.unstack("employed", fill_value=0).unstack("kids", fill_value=0) + result = df.unstack(["employed", "kids"], fill_value=0) + expected = DataFrame( + [[9.5, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 8.0]], + index=Index(["Alice", "Bob"], name="name"), + columns=MultiIndex.from_tuples( + [ + ("score", "female", False, 0), + ("score", "female", True, 0), + ("score", "male", False, 0), + ("score", "male", True, 0), + ], + names=[None, "gender", "employed", "kids"], + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_stack_positional_level_duplicate_column_names(): + # https://github.com/pandas-dev/pandas/issues/36353 + columns = MultiIndex.from_product([("x", "y"), ("y", "z")], names=["a", "a"]) + df = DataFrame([[1, 1, 1, 1]], columns=columns) + result = df.stack(0) + + new_columns = Index(["y", "z"], name="a") + new_index = MultiIndex.from_tuples([(0, "x"), (0, "y")], names=[None, "a"]) + expected = DataFrame([[1, 1], [1, 1]], index=new_index, columns=new_columns) + + tm.assert_frame_equal(result, expected) + + +def test_unstack_non_slice_like_blocks(using_array_manager): + # Case where the mgr_locs of a DataFrame's underlying blocks are not slice-like + + mi = MultiIndex.from_product([range(5), ["A", "B", "C"]]) + df = DataFrame(np.random.randn(15, 4), index=mi) + df[1] = df[1].astype(np.int64) + if not using_array_manager: + assert any(not x.mgr_locs.is_slice_like for x in df._mgr.blocks) + + res = df.unstack() + + expected = pd.concat([df[n].unstack() for n in range(4)], keys=range(4), axis=1) + tm.assert_frame_equal(res, expected) + + +class TestStackUnstackMultiLevel: + def test_unstack(self, multiindex_year_month_day_dataframe_random_data): + # just check that it works for now + ymd = multiindex_year_month_day_dataframe_random_data + + unstacked = ymd.unstack() + unstacked.unstack() + + # test that ints work + ymd.astype(int).unstack() + + # test that int32 work + ymd.astype(np.int32).unstack() + + @pytest.mark.parametrize( + "result_rows,result_columns,index_product,expected_row", + [ + ( + [[1, 1, None, None, 30.0, None], [2, 2, None, None, 30.0, None]], + ["ix1", "ix2", "col1", "col2", "col3", "col4"], + 2, + [None, None, 30.0, None], + ), + ( + [[1, 1, None, None, 30.0], [2, 2, None, None, 30.0]], + ["ix1", "ix2", "col1", "col2", "col3"], + 2, + [None, None, 30.0], + ), + ( + [[1, 1, None, None, 30.0], [2, None, None, None, 30.0]], + ["ix1", "ix2", "col1", "col2", "col3"], + None, + [None, None, 30.0], + ), + ], + ) + def test_unstack_partial( + self, result_rows, result_columns, index_product, expected_row + ): + # check for regressions on this issue: + # https://github.com/pandas-dev/pandas/issues/19351 + # make sure DataFrame.unstack() works when its run on a subset of the DataFrame + # and the Index levels contain values that are not present in the subset + result = DataFrame(result_rows, columns=result_columns).set_index( + ["ix1", "ix2"] + ) + result = result.iloc[1:2].unstack("ix2") + expected = DataFrame( + [expected_row], + columns=MultiIndex.from_product( + [result_columns[2:], [index_product]], names=[None, "ix2"] + ), + index=Index([2], name="ix1"), + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_multiple_no_empty_columns(self): + index = MultiIndex.from_tuples( + [(0, "foo", 0), (0, "bar", 0), (1, "baz", 1), (1, "qux", 1)] + ) + + s = Series(np.random.randn(4), index=index) + + unstacked = s.unstack([1, 2]) + expected = unstacked.dropna(axis=1, how="all") + tm.assert_frame_equal(unstacked, expected) + + def test_stack(self, multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + + # regular roundtrip + unstacked = ymd.unstack() + restacked = unstacked.stack() + tm.assert_frame_equal(restacked, ymd) + + unlexsorted = ymd.sort_index(level=2) + + unstacked = unlexsorted.unstack(2) + restacked = unstacked.stack() + tm.assert_frame_equal(restacked.sort_index(level=0), ymd) + + unlexsorted = unlexsorted[::-1] + unstacked = unlexsorted.unstack(1) + restacked = unstacked.stack().swaplevel(1, 2) + tm.assert_frame_equal(restacked.sort_index(level=0), ymd) + + unlexsorted = unlexsorted.swaplevel(0, 1) + unstacked = unlexsorted.unstack(0).swaplevel(0, 1, axis=1) + restacked = unstacked.stack(0).swaplevel(1, 2) + tm.assert_frame_equal(restacked.sort_index(level=0), ymd) + + # columns unsorted + unstacked = ymd.unstack() + unstacked = unstacked.sort_index(axis=1, ascending=False) + restacked = unstacked.stack() + tm.assert_frame_equal(restacked, ymd) + + # more than 2 levels in the columns + unstacked = ymd.unstack(1).unstack(1) + + result = unstacked.stack(1) + expected = ymd.unstack() + tm.assert_frame_equal(result, expected) + + result = unstacked.stack(2) + expected = ymd.unstack(1) + tm.assert_frame_equal(result, expected) + + result = unstacked.stack(0) + expected = ymd.stack().unstack(1).unstack(1) + tm.assert_frame_equal(result, expected) + + # not all levels present in each echelon + unstacked = ymd.unstack(2).loc[:, ::3] + stacked = unstacked.stack().stack() + ymd_stacked = ymd.stack() + tm.assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) + + # stack with negative number + result = ymd.unstack(0).stack(-2) + expected = ymd.unstack(0).stack(0) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "idx, columns, exp_idx", + [ + [ + list("abab"), + ["1st", "2nd", "3rd"], + MultiIndex( + levels=[["a", "b"], ["1st", "2nd", "3rd"]], + codes=[ + np.tile(np.arange(2).repeat(3), 2), + np.tile(np.arange(3), 4), + ], + ), + ], + [ + list("abab"), + ["1st", "2nd", "1st"], + MultiIndex( + levels=[["a", "b"], ["1st", "2nd"]], + codes=[np.tile(np.arange(2).repeat(3), 2), np.tile([0, 1, 0], 4)], + ), + ], + [ + MultiIndex.from_tuples((("a", 2), ("b", 1), ("a", 1), ("b", 2))), + ["1st", "2nd", "1st"], + MultiIndex( + levels=[["a", "b"], [1, 2], ["1st", "2nd"]], + codes=[ + np.tile(np.arange(2).repeat(3), 2), + np.repeat([1, 0, 1], [3, 6, 3]), + np.tile([0, 1, 0], 4), + ], + ), + ], + ], + ) + def test_stack_duplicate_index(self, idx, columns, exp_idx): + # GH10417 + df = DataFrame( + np.arange(12).reshape(4, 3), + index=idx, + columns=columns, + ) + result = df.stack() + expected = Series(np.arange(12), index=exp_idx) + tm.assert_series_equal(result, expected) + assert result.index.is_unique is False + li, ri = result.index, expected.index + tm.assert_index_equal(li, ri) + + def test_unstack_odd_failure(self): + data = """day,time,smoker,sum,len +Fri,Dinner,No,8.25,3. +Fri,Dinner,Yes,27.03,9 +Fri,Lunch,No,3.0,1 +Fri,Lunch,Yes,13.68,6 +Sat,Dinner,No,139.63,45 +Sat,Dinner,Yes,120.77,42 +Sun,Dinner,No,180.57,57 +Sun,Dinner,Yes,66.82,19 +Thu,Dinner,No,3.0,1 +Thu,Lunch,No,117.32,44 +Thu,Lunch,Yes,51.51,17""" + + df = pd.read_csv(StringIO(data)).set_index(["day", "time", "smoker"]) + + # it works, #2100 + result = df.unstack(2) + + recons = result.stack() + tm.assert_frame_equal(recons, df) + + def test_stack_mixed_dtype(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + df = frame.T + df["foo", "four"] = "foo" + df = df.sort_index(level=1, axis=1) + + stacked = df.stack() + result = df["foo"].stack().sort_index() + tm.assert_series_equal(stacked["foo"], result, check_names=False) + assert result.name is None + assert stacked["bar"].dtype == np.float_ + + def test_unstack_bug(self): + df = DataFrame( + { + "state": ["naive", "naive", "naive", "active", "active", "active"], + "exp": ["a", "b", "b", "b", "a", "a"], + "barcode": [1, 2, 3, 4, 1, 3], + "v": ["hi", "hi", "bye", "bye", "bye", "peace"], + "extra": np.arange(6.0), + } + ) + + result = df.groupby(["state", "exp", "barcode", "v"]).apply(len) + + unstacked = result.unstack() + restacked = unstacked.stack() + tm.assert_series_equal(restacked, result.reindex(restacked.index).astype(float)) + + def test_stack_unstack_preserve_names(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + unstacked = frame.unstack() + assert unstacked.index.name == "first" + assert unstacked.columns.names == ["exp", "second"] + + restacked = unstacked.stack() + assert restacked.index.names == frame.index.names + + @pytest.mark.parametrize("method", ["stack", "unstack"]) + def test_stack_unstack_wrong_level_name( + self, method, multiindex_dataframe_random_data + ): + # GH 18303 - wrong level name should raise + frame = multiindex_dataframe_random_data + + # A DataFrame with flat axes: + df = frame.loc["foo"] + + with pytest.raises(KeyError, match="does not match index name"): + getattr(df, method)("mistake") + + if method == "unstack": + # Same on a Series: + s = df.iloc[:, 0] + with pytest.raises(KeyError, match="does not match index name"): + getattr(s, method)("mistake") + + def test_unstack_level_name(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + result = frame.unstack("second") + expected = frame.unstack(level=1) + tm.assert_frame_equal(result, expected) + + def test_stack_level_name(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + unstacked = frame.unstack("second") + result = unstacked.stack("exp") + expected = frame.unstack().stack(0) + tm.assert_frame_equal(result, expected) + + result = frame.stack("exp") + expected = frame.stack() + tm.assert_series_equal(result, expected) + + def test_stack_unstack_multiple( + self, multiindex_year_month_day_dataframe_random_data + ): + ymd = multiindex_year_month_day_dataframe_random_data + + unstacked = ymd.unstack(["year", "month"]) + expected = ymd.unstack("year").unstack("month") + tm.assert_frame_equal(unstacked, expected) + assert unstacked.columns.names == expected.columns.names + + # series + s = ymd["A"] + s_unstacked = s.unstack(["year", "month"]) + tm.assert_frame_equal(s_unstacked, expected["A"]) + + restacked = unstacked.stack(["year", "month"]) + restacked = restacked.swaplevel(0, 1).swaplevel(1, 2) + restacked = restacked.sort_index(level=0) + + tm.assert_frame_equal(restacked, ymd) + assert restacked.index.names == ymd.index.names + + # GH #451 + unstacked = ymd.unstack([1, 2]) + expected = ymd.unstack(1).unstack(1).dropna(axis=1, how="all") + tm.assert_frame_equal(unstacked, expected) + + unstacked = ymd.unstack([2, 1]) + expected = ymd.unstack(2).unstack(1).dropna(axis=1, how="all") + tm.assert_frame_equal(unstacked, expected.loc[:, unstacked.columns]) + + def test_stack_names_and_numbers( + self, multiindex_year_month_day_dataframe_random_data + ): + ymd = multiindex_year_month_day_dataframe_random_data + + unstacked = ymd.unstack(["year", "month"]) + + # Can't use mixture of names and numbers to stack + with pytest.raises(ValueError, match="level should contain"): + unstacked.stack([0, "month"]) + + def test_stack_multiple_out_of_bounds( + self, multiindex_year_month_day_dataframe_random_data + ): + # nlevels == 3 + ymd = multiindex_year_month_day_dataframe_random_data + + unstacked = ymd.unstack(["year", "month"]) + + with pytest.raises(IndexError, match="Too many levels"): + unstacked.stack([2, 3]) + with pytest.raises(IndexError, match="not a valid level number"): + unstacked.stack([-4, -3]) + + def test_unstack_period_series(self): + # GH4342 + idx1 = pd.PeriodIndex( + ["2013-01", "2013-01", "2013-02", "2013-02", "2013-03", "2013-03"], + freq="M", + name="period", + ) + idx2 = Index(["A", "B"] * 3, name="str") + value = [1, 2, 3, 4, 5, 6] + + idx = MultiIndex.from_arrays([idx1, idx2]) + s = Series(value, index=idx) + + result1 = s.unstack() + result2 = s.unstack(level=1) + result3 = s.unstack(level=0) + + e_idx = pd.PeriodIndex( + ["2013-01", "2013-02", "2013-03"], freq="M", name="period" + ) + expected = DataFrame( + {"A": [1, 3, 5], "B": [2, 4, 6]}, index=e_idx, columns=["A", "B"] + ) + expected.columns.name = "str" + + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected.T) + + idx1 = pd.PeriodIndex( + ["2013-01", "2013-01", "2013-02", "2013-02", "2013-03", "2013-03"], + freq="M", + name="period1", + ) + + idx2 = pd.PeriodIndex( + ["2013-12", "2013-11", "2013-10", "2013-09", "2013-08", "2013-07"], + freq="M", + name="period2", + ) + idx = MultiIndex.from_arrays([idx1, idx2]) + s = Series(value, index=idx) + + result1 = s.unstack() + result2 = s.unstack(level=1) + result3 = s.unstack(level=0) + + e_idx = pd.PeriodIndex( + ["2013-01", "2013-02", "2013-03"], freq="M", name="period1" + ) + e_cols = pd.PeriodIndex( + ["2013-07", "2013-08", "2013-09", "2013-10", "2013-11", "2013-12"], + freq="M", + name="period2", + ) + expected = DataFrame( + [ + [np.nan, np.nan, np.nan, np.nan, 2, 1], + [np.nan, np.nan, 4, 3, np.nan, np.nan], + [6, 5, np.nan, np.nan, np.nan, np.nan], + ], + index=e_idx, + columns=e_cols, + ) + + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected.T) + + def test_unstack_period_frame(self): + # GH4342 + idx1 = pd.PeriodIndex( + ["2014-01", "2014-02", "2014-02", "2014-02", "2014-01", "2014-01"], + freq="M", + name="period1", + ) + idx2 = pd.PeriodIndex( + ["2013-12", "2013-12", "2014-02", "2013-10", "2013-10", "2014-02"], + freq="M", + name="period2", + ) + value = {"A": [1, 2, 3, 4, 5, 6], "B": [6, 5, 4, 3, 2, 1]} + idx = MultiIndex.from_arrays([idx1, idx2]) + df = DataFrame(value, index=idx) + + result1 = df.unstack() + result2 = df.unstack(level=1) + result3 = df.unstack(level=0) + + e_1 = pd.PeriodIndex(["2014-01", "2014-02"], freq="M", name="period1") + e_2 = pd.PeriodIndex( + ["2013-10", "2013-12", "2014-02", "2013-10", "2013-12", "2014-02"], + freq="M", + name="period2", + ) + e_cols = MultiIndex.from_arrays(["A A A B B B".split(), e_2]) + expected = DataFrame( + [[5, 1, 6, 2, 6, 1], [4, 2, 3, 3, 5, 4]], index=e_1, columns=e_cols + ) + + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + + e_1 = pd.PeriodIndex( + ["2014-01", "2014-02", "2014-01", "2014-02"], freq="M", name="period1" + ) + e_2 = pd.PeriodIndex( + ["2013-10", "2013-12", "2014-02"], freq="M", name="period2" + ) + e_cols = MultiIndex.from_arrays(["A A B B".split(), e_1]) + expected = DataFrame( + [[5, 4, 2, 3], [1, 2, 6, 5], [6, 3, 1, 4]], index=e_2, columns=e_cols + ) + + tm.assert_frame_equal(result3, expected) + + def test_stack_multiple_bug(self): + # bug when some uniques are not present in the data GH#3170 + id_col = ([1] * 3) + ([2] * 3) + name = (["a"] * 3) + (["b"] * 3) + date = pd.to_datetime(["2013-01-03", "2013-01-04", "2013-01-05"] * 2) + var1 = np.random.randint(0, 100, 6) + df = DataFrame({"ID": id_col, "NAME": name, "DATE": date, "VAR1": var1}) + + multi = df.set_index(["DATE", "ID"]) + multi.columns.name = "Params" + unst = multi.unstack("ID") + down = unst.resample("W-THU").mean() + + rs = down.stack("ID") + xp = unst.loc[:, ["VAR1"]].resample("W-THU").mean().stack("ID") + xp.columns.name = "Params" + tm.assert_frame_equal(rs, xp) + + def test_stack_dropna(self): + # GH#3997 + df = DataFrame({"A": ["a1", "a2"], "B": ["b1", "b2"], "C": [1, 1]}) + df = df.set_index(["A", "B"]) + + stacked = df.unstack().stack(dropna=False) + assert len(stacked) > len(stacked.dropna()) + + stacked = df.unstack().stack(dropna=True) + tm.assert_frame_equal(stacked, stacked.dropna()) + + def test_unstack_multiple_hierarchical(self): + df = DataFrame( + index=[ + [0, 0, 0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 0, 0, 1, 1], + [0, 1, 0, 1, 0, 1, 0, 1], + ], + columns=[[0, 0, 1, 1], [0, 1, 0, 1]], + ) + + df.index.names = ["a", "b", "c"] + df.columns.names = ["d", "e"] + + # it works! + df.unstack(["b", "c"]) + + def test_unstack_sparse_keyspace(self): + # memory problems with naive impl GH#2278 + # Generate Long File & Test Pivot + NUM_ROWS = 1000 + + df = DataFrame( + { + "A": np.random.randint(100, size=NUM_ROWS), + "B": np.random.randint(300, size=NUM_ROWS), + "C": np.random.randint(-7, 7, size=NUM_ROWS), + "D": np.random.randint(-19, 19, size=NUM_ROWS), + "E": np.random.randint(3000, size=NUM_ROWS), + "F": np.random.randn(NUM_ROWS), + } + ) + + idf = df.set_index(["A", "B", "C", "D", "E"]) + + # it works! is sufficient + idf.unstack("E") + + def test_unstack_unobserved_keys(self): + # related to GH#2278 refactoring + levels = [[0, 1], [0, 1, 2, 3]] + codes = [[0, 0, 1, 1], [0, 2, 0, 2]] + + index = MultiIndex(levels, codes) + + df = DataFrame(np.random.randn(4, 2), index=index) + + result = df.unstack() + assert len(result.columns) == 4 + + recons = result.stack() + tm.assert_frame_equal(recons, df) + + @pytest.mark.slow + def test_unstack_number_of_levels_larger_than_int32(self, monkeypatch): + # GH#20601 + # GH 26314: Change ValueError to PerformanceWarning + + class MockUnstacker(reshape_lib._Unstacker): + def __init__(self, *args, **kwargs): + # __init__ will raise the warning + super().__init__(*args, **kwargs) + raise Exception("Don't compute final result.") + + with monkeypatch.context() as m: + m.setattr(reshape_lib, "_Unstacker", MockUnstacker) + df = DataFrame( + np.random.randn(2 ** 16, 2), + index=[np.arange(2 ** 16), np.arange(2 ** 16)], + ) + msg = "The following operation may generate" + with tm.assert_produces_warning(PerformanceWarning, match=msg): + with pytest.raises(Exception, match="Don't compute final result."): + df.unstack() + + def test_stack_order_with_unsorted_levels(self): + # GH#16323 + + def manual_compare_stacked(df, df_stacked, lev0, lev1): + assert all( + df.loc[row, col] == df_stacked.loc[(row, col[lev0]), col[lev1]] + for row in df.index + for col in df.columns + ) + + # deep check for 1-row case + for width in [2, 3]: + levels_poss = itertools.product( + itertools.permutations([0, 1, 2], width), repeat=2 + ) + + for levels in levels_poss: + columns = MultiIndex(levels=levels, codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + df = DataFrame(columns=columns, data=[range(4)]) + for stack_lev in range(2): + df_stacked = df.stack(stack_lev) + manual_compare_stacked(df, df_stacked, stack_lev, 1 - stack_lev) + + # check multi-row case + mi = MultiIndex( + levels=[["A", "C", "B"], ["B", "A", "C"]], + codes=[np.repeat(range(3), 3), np.tile(range(3), 3)], + ) + df = DataFrame( + columns=mi, index=range(5), data=np.arange(5 * len(mi)).reshape(5, -1) + ) + manual_compare_stacked(df, df.stack(0), 0, 1) + + def test_stack_unstack_unordered_multiindex(self): + # GH# 18265 + values = np.arange(5) + data = np.vstack( + [ + [f"b{x}" for x in values], # b0, b1, .. + [f"a{x}" for x in values], # a0, a1, .. + ] + ) + df = DataFrame(data.T, columns=["b", "a"]) + df.columns.name = "first" + second_level_dict = {"x": df} + multi_level_df = pd.concat(second_level_dict, axis=1) + multi_level_df.columns.names = ["second", "first"] + df = multi_level_df.reindex(sorted(multi_level_df.columns), axis=1) + result = df.stack(["first", "second"]).unstack(["first", "second"]) + expected = DataFrame( + [["a0", "b0"], ["a1", "b1"], ["a2", "b2"], ["a3", "b3"], ["a4", "b4"]], + index=[0, 1, 2, 3, 4], + columns=MultiIndex.from_tuples( + [("a", "x"), ("b", "x")], names=["first", "second"] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_preserve_types( + self, multiindex_year_month_day_dataframe_random_data + ): + # GH#403 + ymd = multiindex_year_month_day_dataframe_random_data + ymd["E"] = "foo" + ymd["F"] = 2 + + unstacked = ymd.unstack("month") + assert unstacked["A", 1].dtype == np.float64 + assert unstacked["E", 1].dtype == np.object_ + assert unstacked["F", 1].dtype == np.float64 + + def test_unstack_group_index_overflow(self): + codes = np.tile(np.arange(500), 2) + level = np.arange(500) + + index = MultiIndex( + levels=[level] * 8 + [[0, 1]], + codes=[codes] * 8 + [np.arange(2).repeat(500)], + ) + + s = Series(np.arange(1000), index=index) + result = s.unstack() + assert result.shape == (500, 2) + + # test roundtrip + stacked = result.stack() + tm.assert_series_equal(s, stacked.reindex(s.index)) + + # put it at beginning + index = MultiIndex( + levels=[[0, 1]] + [level] * 8, + codes=[np.arange(2).repeat(500)] + [codes] * 8, + ) + + s = Series(np.arange(1000), index=index) + result = s.unstack(0) + assert result.shape == (500, 2) + + # put it in middle + index = MultiIndex( + levels=[level] * 4 + [[0, 1]] + [level] * 4, + codes=([codes] * 4 + [np.arange(2).repeat(500)] + [codes] * 4), + ) + + s = Series(np.arange(1000), index=index) + result = s.unstack(4) + assert result.shape == (500, 2) + + def test_unstack_with_missing_int_cast_to_float(self, using_array_manager): + # https://github.com/pandas-dev/pandas/issues/37115 + df = DataFrame( + { + "a": ["A", "A", "B"], + "b": ["ca", "cb", "cb"], + "v": [10] * 3, + } + ).set_index(["a", "b"]) + + # add another int column to get 2 blocks + df["is_"] = 1 + if not using_array_manager: + assert len(df._mgr.blocks) == 2 + + result = df.unstack("b") + result[("is_", "ca")] = result[("is_", "ca")].fillna(0) + + expected = DataFrame( + [[10.0, 10.0, 1.0, 1.0], [np.nan, 10.0, 0.0, 1.0]], + index=Index(["A", "B"], dtype="object", name="a"), + columns=MultiIndex.from_tuples( + [("v", "ca"), ("v", "cb"), ("is_", "ca"), ("is_", "cb")], + names=[None, "b"], + ), + ) + if using_array_manager: + # INFO(ArrayManager) with ArrayManager preserve dtype where possible + expected[("v", "cb")] = expected[("v", "cb")].astype("int64") + expected[("is_", "cb")] = expected[("is_", "cb")].astype("int64") + tm.assert_frame_equal(result, expected) + + def test_unstack_with_level_has_nan(self): + # GH 37510 + df1 = DataFrame( + { + "L1": [1, 2, 3, 4], + "L2": [3, 4, 1, 2], + "L3": [1, 1, 1, 1], + "x": [1, 2, 3, 4], + } + ) + df1 = df1.set_index(["L1", "L2", "L3"]) + new_levels = ["n1", "n2", "n3", None] + df1.index = df1.index.set_levels(levels=new_levels, level="L1") + df1.index = df1.index.set_levels(levels=new_levels, level="L2") + + result = df1.unstack("L3")[("x", 1)].sort_index().index + expected = MultiIndex( + levels=[["n1", "n2", "n3", None], ["n1", "n2", "n3", None]], + codes=[[0, 1, 2, 3], [2, 3, 0, 1]], + names=["L1", "L2"], + ) + + tm.assert_index_equal(result, expected) + + def test_stack_nan_in_multiindex_columns(self): + # GH#39481 + df = DataFrame( + np.zeros([1, 5]), + columns=MultiIndex.from_tuples( + [ + (0, None, None), + (0, 2, 0), + (0, 2, 1), + (0, 3, 0), + (0, 3, 1), + ], + ), + ) + result = df.stack(2) + expected = DataFrame( + [[0.0, np.nan, np.nan], [np.nan, 0.0, 0.0], [np.nan, 0.0, 0.0]], + index=Index([(0, None), (0, 0), (0, 1)]), + columns=Index([(0, None), (0, 2), (0, 3)]), + ) + tm.assert_frame_equal(result, expected) + + def test_multi_level_stack_categorical(self): + # GH 15239 + midx = MultiIndex.from_arrays( + [ + ["A"] * 2 + ["B"] * 2, + pd.Categorical(list("abab")), + pd.Categorical(list("ccdd")), + ] + ) + df = DataFrame(np.arange(8).reshape(2, 4), columns=midx) + result = df.stack([1, 2]) + expected = DataFrame( + [ + [0, np.nan], + [np.nan, 2], + [1, np.nan], + [np.nan, 3], + [4, np.nan], + [np.nan, 6], + [5, np.nan], + [np.nan, 7], + ], + columns=["A", "B"], + index=MultiIndex.from_arrays( + [ + [0] * 4 + [1] * 4, + pd.Categorical(list("aabbaabb")), + pd.Categorical(list("cdcdcdcd")), + ] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_stack_nan_level(self): + # GH 9406 + df_nan = DataFrame( + np.arange(4).reshape(2, 2), + columns=MultiIndex.from_tuples( + [("A", np.nan), ("B", "b")], names=["Upper", "Lower"] + ), + index=Index([0, 1], name="Num"), + dtype=np.float64, + ) + result = df_nan.stack() + expected = DataFrame( + [[0.0, np.nan], [np.nan, 1], [2.0, np.nan], [np.nan, 3.0]], + columns=Index(["A", "B"], name="Upper"), + index=MultiIndex.from_tuples( + [(0, np.nan), (0, "b"), (1, np.nan), (1, "b")], names=["Num", "Lower"] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_categorical_columns(self): + # GH 14018 + idx = MultiIndex.from_product([["A"], [0, 1]]) + df = DataFrame({"cat": pd.Categorical(["a", "b"])}, index=idx) + result = df.unstack() + expected = DataFrame( + { + 0: pd.Categorical(["a"], categories=["a", "b"]), + 1: pd.Categorical(["b"], categories=["a", "b"]), + }, + index=["A"], + ) + expected.columns = MultiIndex.from_tuples([("cat", 0), ("cat", 1)]) + tm.assert_frame_equal(result, expected) + + def test_stack_unsorted(self): + # GH 16925 + PAE = ["ITA", "FRA"] + VAR = ["A1", "A2"] + TYP = ["CRT", "DBT", "NET"] + MI = MultiIndex.from_product([PAE, VAR, TYP], names=["PAE", "VAR", "TYP"]) + + V = list(range(len(MI))) + DF = DataFrame(data=V, index=MI, columns=["VALUE"]) + + DF = DF.unstack(["VAR", "TYP"]) + DF.columns = DF.columns.droplevel(0) + DF.loc[:, ("A0", "NET")] = 9999 + + result = DF.stack(["VAR", "TYP"]).sort_index() + expected = DF.sort_index(axis=1).stack(["VAR", "TYP"]).sort_index() + tm.assert_series_equal(result, expected) + + def test_stack_nullable_dtype(self): + # GH#43561 + columns = MultiIndex.from_product( + [["54511", "54515"], ["r", "t_mean"]], names=["station", "element"] + ) + index = Index([1, 2, 3], name="time") + + arr = np.array([[50, 226, 10, 215], [10, 215, 9, 220], [305, 232, 111, 220]]) + df = DataFrame(arr, columns=columns, index=index, dtype=pd.Int64Dtype()) + + result = df.stack("station") + + expected = df.astype(np.int64).stack("station").astype(pd.Int64Dtype()) + tm.assert_frame_equal(result, expected) + + # non-homogeneous case + df[df.columns[0]] = df[df.columns[0]].astype(pd.Float64Dtype()) + result = df.stack("station") + + # TODO(EA2D): we get object dtype because DataFrame.values can't + # be an EA + expected = df.astype(object).stack("station") + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_subclass.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_subclass.py new file mode 100644 index 0000000..8d9957b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_subclass.py @@ -0,0 +1,732 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +@pytest.fixture() +def gpd_style_subclass_df(): + class SubclassedDataFrame(DataFrame): + @property + def _constructor(self): + return SubclassedDataFrame + + return SubclassedDataFrame({"a": [1, 2, 3]}) + + +class TestDataFrameSubclassing: + def test_frame_subclassing_and_slicing(self): + # Subclass frame and ensure it returns the right class on slicing it + # In reference to PR 9632 + + class CustomSeries(Series): + @property + def _constructor(self): + return CustomSeries + + def custom_series_function(self): + return "OK" + + class CustomDataFrame(DataFrame): + """ + Subclasses pandas DF, fills DF with simulation results, adds some + custom plotting functions. + """ + + def __init__(self, *args, **kw): + super().__init__(*args, **kw) + + @property + def _constructor(self): + return CustomDataFrame + + _constructor_sliced = CustomSeries + + def custom_frame_function(self): + return "OK" + + data = {"col1": range(10), "col2": range(10)} + cdf = CustomDataFrame(data) + + # Did we get back our own DF class? + assert isinstance(cdf, CustomDataFrame) + + # Do we get back our own Series class after selecting a column? + cdf_series = cdf.col1 + assert isinstance(cdf_series, CustomSeries) + assert cdf_series.custom_series_function() == "OK" + + # Do we get back our own DF class after slicing row-wise? + cdf_rows = cdf[1:5] + assert isinstance(cdf_rows, CustomDataFrame) + assert cdf_rows.custom_frame_function() == "OK" + + # Make sure sliced part of multi-index frame is custom class + mcol = MultiIndex.from_tuples([("A", "A"), ("A", "B")]) + cdf_multi = CustomDataFrame([[0, 1], [2, 3]], columns=mcol) + assert isinstance(cdf_multi["A"], CustomDataFrame) + + mcol = MultiIndex.from_tuples([("A", ""), ("B", "")]) + cdf_multi2 = CustomDataFrame([[0, 1], [2, 3]], columns=mcol) + assert isinstance(cdf_multi2["A"], CustomSeries) + + def test_dataframe_metadata(self): + df = tm.SubclassedDataFrame( + {"X": [1, 2, 3], "Y": [1, 2, 3]}, index=["a", "b", "c"] + ) + df.testattr = "XXX" + + assert df.testattr == "XXX" + assert df[["X"]].testattr == "XXX" + assert df.loc[["a", "b"], :].testattr == "XXX" + assert df.iloc[[0, 1], :].testattr == "XXX" + + # see gh-9776 + assert df.iloc[0:1, :].testattr == "XXX" + + # see gh-10553 + unpickled = tm.round_trip_pickle(df) + tm.assert_frame_equal(df, unpickled) + assert df._metadata == unpickled._metadata + assert df.testattr == unpickled.testattr + + def test_indexing_sliced(self): + # GH 11559 + df = tm.SubclassedDataFrame( + {"X": [1, 2, 3], "Y": [4, 5, 6], "Z": [7, 8, 9]}, index=["a", "b", "c"] + ) + res = df.loc[:, "X"] + exp = tm.SubclassedSeries([1, 2, 3], index=list("abc"), name="X") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.iloc[:, 1] + exp = tm.SubclassedSeries([4, 5, 6], index=list("abc"), name="Y") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.loc[:, "Z"] + exp = tm.SubclassedSeries([7, 8, 9], index=list("abc"), name="Z") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.loc["a", :] + exp = tm.SubclassedSeries([1, 4, 7], index=list("XYZ"), name="a") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.iloc[1, :] + exp = tm.SubclassedSeries([2, 5, 8], index=list("XYZ"), name="b") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.loc["c", :] + exp = tm.SubclassedSeries([3, 6, 9], index=list("XYZ"), name="c") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + def test_subclass_attr_err_propagation(self): + # GH 11808 + class A(DataFrame): + @property + def bar(self): + return self.i_dont_exist + + with pytest.raises(AttributeError, match=".*i_dont_exist.*"): + A().bar + + def test_subclass_align(self): + # GH 12983 + df1 = tm.SubclassedDataFrame( + {"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE") + ) + df2 = tm.SubclassedDataFrame( + {"c": [1, 2, 4], "d": [1, 2, 4]}, index=list("ABD") + ) + + res1, res2 = df1.align(df2, axis=0) + exp1 = tm.SubclassedDataFrame( + {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]}, + index=list("ABCDE"), + ) + exp2 = tm.SubclassedDataFrame( + {"c": [1, 2, np.nan, 4, np.nan], "d": [1, 2, np.nan, 4, np.nan]}, + index=list("ABCDE"), + ) + assert isinstance(res1, tm.SubclassedDataFrame) + tm.assert_frame_equal(res1, exp1) + assert isinstance(res2, tm.SubclassedDataFrame) + tm.assert_frame_equal(res2, exp2) + + res1, res2 = df1.a.align(df2.c) + assert isinstance(res1, tm.SubclassedSeries) + tm.assert_series_equal(res1, exp1.a) + assert isinstance(res2, tm.SubclassedSeries) + tm.assert_series_equal(res2, exp2.c) + + def test_subclass_align_combinations(self): + # GH 12983 + df = tm.SubclassedDataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")) + s = tm.SubclassedSeries([1, 2, 4], index=list("ABD"), name="x") + + # frame + series + res1, res2 = df.align(s, axis=0) + exp1 = tm.SubclassedDataFrame( + {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]}, + index=list("ABCDE"), + ) + # name is lost when + exp2 = tm.SubclassedSeries( + [1, 2, np.nan, 4, np.nan], index=list("ABCDE"), name="x" + ) + + assert isinstance(res1, tm.SubclassedDataFrame) + tm.assert_frame_equal(res1, exp1) + assert isinstance(res2, tm.SubclassedSeries) + tm.assert_series_equal(res2, exp2) + + # series + frame + res1, res2 = s.align(df) + assert isinstance(res1, tm.SubclassedSeries) + tm.assert_series_equal(res1, exp2) + assert isinstance(res2, tm.SubclassedDataFrame) + tm.assert_frame_equal(res2, exp1) + + def test_subclass_iterrows(self): + # GH 13977 + df = tm.SubclassedDataFrame({"a": [1]}) + for i, row in df.iterrows(): + assert isinstance(row, tm.SubclassedSeries) + tm.assert_series_equal(row, df.loc[i]) + + def test_subclass_stack(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["a", "b", "c"], + columns=["X", "Y", "Z"], + ) + + res = df.stack() + exp = tm.SubclassedSeries( + [1, 2, 3, 4, 5, 6, 7, 8, 9], index=[list("aaabbbccc"), list("XYZXYZXYZ")] + ) + + tm.assert_series_equal(res, exp) + + def test_subclass_stack_multi(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + + exp = tm.SubclassedDataFrame( + [ + [10, 12], + [11, 13], + [20, 22], + [21, 23], + [30, 32], + [31, 33], + [40, 42], + [41, 43], + ], + index=MultiIndex.from_tuples( + list(zip(list("AAAABBBB"), list("ccddccdd"), list("yzyzyzyz"))), + names=["aaa", "ccc", "yyy"], + ), + columns=Index(["W", "X"], name="www"), + ) + + res = df.stack() + tm.assert_frame_equal(res, exp) + + res = df.stack("yyy") + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame( + [ + [10, 11], + [12, 13], + [20, 21], + [22, 23], + [30, 31], + [32, 33], + [40, 41], + [42, 43], + ], + index=MultiIndex.from_tuples( + list(zip(list("AAAABBBB"), list("ccddccdd"), list("WXWXWXWX"))), + names=["aaa", "ccc", "www"], + ), + columns=Index(["y", "z"], name="yyy"), + ) + + res = df.stack("www") + tm.assert_frame_equal(res, exp) + + def test_subclass_stack_multi_mixed(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [ + [10, 11, 12.0, 13.0], + [20, 21, 22.0, 23.0], + [30, 31, 32.0, 33.0], + [40, 41, 42.0, 43.0], + ], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + + exp = tm.SubclassedDataFrame( + [ + [10, 12.0], + [11, 13.0], + [20, 22.0], + [21, 23.0], + [30, 32.0], + [31, 33.0], + [40, 42.0], + [41, 43.0], + ], + index=MultiIndex.from_tuples( + list(zip(list("AAAABBBB"), list("ccddccdd"), list("yzyzyzyz"))), + names=["aaa", "ccc", "yyy"], + ), + columns=Index(["W", "X"], name="www"), + ) + + res = df.stack() + tm.assert_frame_equal(res, exp) + + res = df.stack("yyy") + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame( + [ + [10.0, 11.0], + [12.0, 13.0], + [20.0, 21.0], + [22.0, 23.0], + [30.0, 31.0], + [32.0, 33.0], + [40.0, 41.0], + [42.0, 43.0], + ], + index=MultiIndex.from_tuples( + list(zip(list("AAAABBBB"), list("ccddccdd"), list("WXWXWXWX"))), + names=["aaa", "ccc", "www"], + ), + columns=Index(["y", "z"], name="yyy"), + ) + + res = df.stack("www") + tm.assert_frame_equal(res, exp) + + def test_subclass_unstack(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["a", "b", "c"], + columns=["X", "Y", "Z"], + ) + + res = df.unstack() + exp = tm.SubclassedSeries( + [1, 4, 7, 2, 5, 8, 3, 6, 9], index=[list("XXXYYYZZZ"), list("abcabcabc")] + ) + + tm.assert_series_equal(res, exp) + + def test_subclass_unstack_multi(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + + exp = tm.SubclassedDataFrame( + [[10, 20, 11, 21, 12, 22, 13, 23], [30, 40, 31, 41, 32, 42, 33, 43]], + index=Index(["A", "B"], name="aaa"), + columns=MultiIndex.from_tuples( + list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("cdcdcdcd"))), + names=["www", "yyy", "ccc"], + ), + ) + + res = df.unstack() + tm.assert_frame_equal(res, exp) + + res = df.unstack("ccc") + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame( + [[10, 30, 11, 31, 12, 32, 13, 33], [20, 40, 21, 41, 22, 42, 23, 43]], + index=Index(["c", "d"], name="ccc"), + columns=MultiIndex.from_tuples( + list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("ABABABAB"))), + names=["www", "yyy", "aaa"], + ), + ) + + res = df.unstack("aaa") + tm.assert_frame_equal(res, exp) + + def test_subclass_unstack_multi_mixed(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [ + [10, 11, 12.0, 13.0], + [20, 21, 22.0, 23.0], + [30, 31, 32.0, 33.0], + [40, 41, 42.0, 43.0], + ], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + + exp = tm.SubclassedDataFrame( + [ + [10, 20, 11, 21, 12.0, 22.0, 13.0, 23.0], + [30, 40, 31, 41, 32.0, 42.0, 33.0, 43.0], + ], + index=Index(["A", "B"], name="aaa"), + columns=MultiIndex.from_tuples( + list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("cdcdcdcd"))), + names=["www", "yyy", "ccc"], + ), + ) + + res = df.unstack() + tm.assert_frame_equal(res, exp) + + res = df.unstack("ccc") + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame( + [ + [10, 30, 11, 31, 12.0, 32.0, 13.0, 33.0], + [20, 40, 21, 41, 22.0, 42.0, 23.0, 43.0], + ], + index=Index(["c", "d"], name="ccc"), + columns=MultiIndex.from_tuples( + list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("ABABABAB"))), + names=["www", "yyy", "aaa"], + ), + ) + + res = df.unstack("aaa") + tm.assert_frame_equal(res, exp) + + def test_subclass_pivot(self): + # GH 15564 + df = tm.SubclassedDataFrame( + { + "index": ["A", "B", "C", "C", "B", "A"], + "columns": ["One", "One", "One", "Two", "Two", "Two"], + "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0], + } + ) + + pivoted = df.pivot(index="index", columns="columns", values="values") + + expected = tm.SubclassedDataFrame( + { + "One": {"A": 1.0, "B": 2.0, "C": 3.0}, + "Two": {"A": 1.0, "B": 2.0, "C": 3.0}, + } + ) + + expected.index.name, expected.columns.name = "index", "columns" + + tm.assert_frame_equal(pivoted, expected) + + def test_subclassed_melt(self): + # GH 15564 + cheese = tm.SubclassedDataFrame( + { + "first": ["John", "Mary"], + "last": ["Doe", "Bo"], + "height": [5.5, 6.0], + "weight": [130, 150], + } + ) + + melted = pd.melt(cheese, id_vars=["first", "last"]) + + expected = tm.SubclassedDataFrame( + [ + ["John", "Doe", "height", 5.5], + ["Mary", "Bo", "height", 6.0], + ["John", "Doe", "weight", 130], + ["Mary", "Bo", "weight", 150], + ], + columns=["first", "last", "variable", "value"], + ) + + tm.assert_frame_equal(melted, expected) + + def test_subclassed_wide_to_long(self): + # GH 9762 + + np.random.seed(123) + x = np.random.randn(3) + df = tm.SubclassedDataFrame( + { + "A1970": {0: "a", 1: "b", 2: "c"}, + "A1980": {0: "d", 1: "e", 2: "f"}, + "B1970": {0: 2.5, 1: 1.2, 2: 0.7}, + "B1980": {0: 3.2, 1: 1.3, 2: 0.1}, + "X": dict(zip(range(3), x)), + } + ) + + df["id"] = df.index + exp_data = { + "X": x.tolist() + x.tolist(), + "A": ["a", "b", "c", "d", "e", "f"], + "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2], + } + expected = tm.SubclassedDataFrame(exp_data) + expected = expected.set_index(["id", "year"])[["X", "A", "B"]] + long_frame = pd.wide_to_long(df, ["A", "B"], i="id", j="year") + + tm.assert_frame_equal(long_frame, expected) + + def test_subclassed_apply(self): + # GH 19822 + + def check_row_subclass(row): + assert isinstance(row, tm.SubclassedSeries) + + def stretch(row): + if row["variable"] == "height": + row["value"] += 0.5 + return row + + df = tm.SubclassedDataFrame( + [ + ["John", "Doe", "height", 5.5], + ["Mary", "Bo", "height", 6.0], + ["John", "Doe", "weight", 130], + ["Mary", "Bo", "weight", 150], + ], + columns=["first", "last", "variable", "value"], + ) + + df.apply(lambda x: check_row_subclass(x)) + df.apply(lambda x: check_row_subclass(x), axis=1) + + expected = tm.SubclassedDataFrame( + [ + ["John", "Doe", "height", 6.0], + ["Mary", "Bo", "height", 6.5], + ["John", "Doe", "weight", 130], + ["Mary", "Bo", "weight", 150], + ], + columns=["first", "last", "variable", "value"], + ) + + result = df.apply(lambda x: stretch(x), axis=1) + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + expected = tm.SubclassedDataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]]) + + result = df.apply(lambda x: tm.SubclassedSeries([1, 2, 3]), axis=1) + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand") + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + expected = tm.SubclassedSeries([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]]) + + result = df.apply(lambda x: [1, 2, 3], axis=1) + assert not isinstance(result, tm.SubclassedDataFrame) + tm.assert_series_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:.*None will no longer:FutureWarning") + def test_subclassed_reductions(self, all_reductions): + # GH 25596 + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = getattr(df, all_reductions)() + assert isinstance(result, tm.SubclassedSeries) + + def test_subclassed_count(self): + + df = tm.SubclassedDataFrame( + { + "Person": ["John", "Myla", "Lewis", "John", "Myla"], + "Age": [24.0, np.nan, 21.0, 33, 26], + "Single": [False, True, True, True, False], + } + ) + result = df.count() + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame({"A": [1, 0, 3], "B": [0, 5, 6], "C": [7, 8, 0]}) + result = df.count() + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame( + [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + with tm.assert_produces_warning(FutureWarning): + result = df.count(level=1) + assert isinstance(result, tm.SubclassedDataFrame) + + df = tm.SubclassedDataFrame() + result = df.count() + assert isinstance(result, tm.SubclassedSeries) + + def test_isin(self): + + df = tm.SubclassedDataFrame( + {"num_legs": [2, 4], "num_wings": [2, 0]}, index=["falcon", "dog"] + ) + result = df.isin([0, 2]) + assert isinstance(result, tm.SubclassedDataFrame) + + def test_duplicated(self): + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.duplicated() + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame() + result = df.duplicated() + assert isinstance(result, tm.SubclassedSeries) + + @pytest.mark.parametrize("idx_method", ["idxmax", "idxmin"]) + def test_idx(self, idx_method): + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = getattr(df, idx_method)() + assert isinstance(result, tm.SubclassedSeries) + + def test_dot(self): + + df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + s = tm.SubclassedSeries([1, 1, 2, 1]) + result = df.dot(s) + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + s = tm.SubclassedDataFrame([1, 1, 2, 1]) + result = df.dot(s) + assert isinstance(result, tm.SubclassedDataFrame) + + def test_memory_usage(self): + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.memory_usage() + assert isinstance(result, tm.SubclassedSeries) + + result = df.memory_usage(index=False) + assert isinstance(result, tm.SubclassedSeries) + + @td.skip_if_no_scipy + def test_corrwith(self): + index = ["a", "b", "c", "d", "e"] + columns = ["one", "two", "three", "four"] + df1 = tm.SubclassedDataFrame( + np.random.randn(5, 4), index=index, columns=columns + ) + df2 = tm.SubclassedDataFrame( + np.random.randn(4, 4), index=index[:4], columns=columns + ) + correls = df1.corrwith(df2, axis=1, drop=True, method="kendall") + + assert isinstance(correls, (tm.SubclassedSeries)) + + def test_asof(self): + + N = 3 + rng = pd.date_range("1/1/1990", periods=N, freq="53s") + df = tm.SubclassedDataFrame( + { + "A": [np.nan, np.nan, np.nan], + "B": [np.nan, np.nan, np.nan], + "C": [np.nan, np.nan, np.nan], + }, + index=rng, + ) + + result = df.asof(rng[-2:]) + assert isinstance(result, tm.SubclassedDataFrame) + + result = df.asof(rng[-2]) + assert isinstance(result, tm.SubclassedSeries) + + result = df.asof("1989-12-31") + assert isinstance(result, tm.SubclassedSeries) + + def test_idxmin_preserves_subclass(self): + # GH 28330 + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.idxmin() + assert isinstance(result, tm.SubclassedSeries) + + def test_idxmax_preserves_subclass(self): + # GH 28330 + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.idxmax() + assert isinstance(result, tm.SubclassedSeries) + + def test_convert_dtypes_preserves_subclass(self, gpd_style_subclass_df): + # GH 43668 + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.convert_dtypes() + assert isinstance(result, tm.SubclassedDataFrame) + + result = gpd_style_subclass_df.convert_dtypes() + assert isinstance(result, type(gpd_style_subclass_df)) + + def test_equals_subclass(self): + # https://github.com/pandas-dev/pandas/pull/34402 + # allow subclass in both directions + df1 = DataFrame({"a": [1, 2, 3]}) + df2 = tm.SubclassedDataFrame({"a": [1, 2, 3]}) + assert df1.equals(df2) + assert df2.equals(df1) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_ufunc.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_ufunc.py new file mode 100644 index 0000000..79e9b1f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_ufunc.py @@ -0,0 +1,309 @@ +from functools import partial + +import numpy as np +import pytest + +from pandas.compat.numpy import np_version_gte1p22 +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import is_extension_array_dtype + +dtypes = [ + "int64", + "Int64", + {"A": "int64", "B": "Int64"}, +] + + +@pytest.mark.parametrize("dtype", dtypes) +def test_unary_unary(dtype): + # unary input, unary output + values = np.array([[-1, -1], [1, 1]], dtype="int64") + df = pd.DataFrame(values, columns=["A", "B"], index=["a", "b"]).astype(dtype=dtype) + result = np.positive(df) + expected = pd.DataFrame( + np.positive(values), index=df.index, columns=df.columns + ).astype(dtype) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", dtypes) +def test_unary_binary(request, dtype): + # unary input, binary output + if is_extension_array_dtype(dtype) or isinstance(dtype, dict): + request.node.add_marker( + pytest.mark.xfail( + reason="Extension / mixed with multiple outputs not implemented." + ) + ) + + values = np.array([[-1, -1], [1, 1]], dtype="int64") + df = pd.DataFrame(values, columns=["A", "B"], index=["a", "b"]).astype(dtype=dtype) + result_pandas = np.modf(df) + assert isinstance(result_pandas, tuple) + assert len(result_pandas) == 2 + expected_numpy = np.modf(values) + + for result, b in zip(result_pandas, expected_numpy): + expected = pd.DataFrame(b, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", dtypes) +def test_binary_input_dispatch_binop(dtype): + # binop ufuncs are dispatched to our dunder methods. + values = np.array([[-1, -1], [1, 1]], dtype="int64") + df = pd.DataFrame(values, columns=["A", "B"], index=["a", "b"]).astype(dtype=dtype) + result = np.add(df, df) + expected = pd.DataFrame( + np.add(values, values), index=df.index, columns=df.columns + ).astype(dtype) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "func,arg,expected", + [ + (np.add, 1, [2, 3, 4, 5]), + ( + partial(np.add, where=[[False, True], [True, False]]), + np.array([[1, 1], [1, 1]]), + [0, 3, 4, 0], + ), + (np.power, np.array([[1, 1], [2, 2]]), [1, 2, 9, 16]), + (np.subtract, 2, [-1, 0, 1, 2]), + ( + partial(np.negative, where=np.array([[False, True], [True, False]])), + None, + [0, -2, -3, 0], + ), + ], +) +def test_ufunc_passes_args(func, arg, expected, request): + # GH#40662 + arr = np.array([[1, 2], [3, 4]]) + df = pd.DataFrame(arr) + result_inplace = np.zeros_like(arr) + # 1-argument ufunc + if arg is None: + result = func(df, out=result_inplace) + else: + result = func(df, arg, out=result_inplace) + + expected = np.array(expected).reshape(2, 2) + tm.assert_numpy_array_equal(result_inplace, expected) + + expected = pd.DataFrame(expected) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype_a", dtypes) +@pytest.mark.parametrize("dtype_b", dtypes) +def test_binary_input_aligns_columns(request, dtype_a, dtype_b): + if ( + is_extension_array_dtype(dtype_a) + or isinstance(dtype_a, dict) + or is_extension_array_dtype(dtype_b) + or isinstance(dtype_b, dict) + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Extension / mixed with multiple inputs not implemented." + ) + ) + + df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}).astype(dtype_a) + + if isinstance(dtype_a, dict) and isinstance(dtype_b, dict): + dtype_b["C"] = dtype_b.pop("B") + + df2 = pd.DataFrame({"A": [1, 2], "C": [3, 4]}).astype(dtype_b) + with tm.assert_produces_warning(FutureWarning): + result = np.heaviside(df1, df2) + # Expected future behaviour: + # expected = np.heaviside( + # np.array([[1, 3, np.nan], [2, 4, np.nan]]), + # np.array([[1, np.nan, 3], [2, np.nan, 4]]), + # ) + # expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"]) + expected = pd.DataFrame([[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + # ensure the expected is the same when applying with numpy array + result = np.heaviside(df1, df2.values) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", dtypes) +def test_binary_input_aligns_index(request, dtype): + if is_extension_array_dtype(dtype) or isinstance(dtype, dict): + request.node.add_marker( + pytest.mark.xfail( + reason="Extension / mixed with multiple inputs not implemented." + ) + ) + df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).astype(dtype) + df2 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "c"]).astype(dtype) + with tm.assert_produces_warning(FutureWarning): + result = np.heaviside(df1, df2) + # Expected future behaviour: + # expected = np.heaviside( + # np.array([[1, 3], [3, 4], [np.nan, np.nan]]), + # np.array([[1, 3], [np.nan, np.nan], [3, 4]]), + # ) + # # TODO(FloatArray): this will be Float64Dtype. + # expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"]) + expected = pd.DataFrame( + [[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"], index=["a", "b"] + ) + tm.assert_frame_equal(result, expected) + + # ensure the expected is the same when applying with numpy array + result = np.heaviside(df1, df2.values) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.filterwarnings("ignore:Calling a ufunc on non-aligned:FutureWarning") +def test_binary_frame_series_raises(): + # We don't currently implement + df = pd.DataFrame({"A": [1, 2]}) + # with pytest.raises(NotImplementedError, match="logaddexp"): + with pytest.raises(ValueError, match=""): + np.logaddexp(df, df["A"]) + + # with pytest.raises(NotImplementedError, match="logaddexp"): + with pytest.raises(ValueError, match=""): + np.logaddexp(df["A"], df) + + +def test_unary_accumulate_axis(): + # https://github.com/pandas-dev/pandas/issues/39259 + df = pd.DataFrame({"a": [1, 3, 2, 4]}) + result = np.maximum.accumulate(df) + expected = pd.DataFrame({"a": [1, 3, 3, 4]}) + tm.assert_frame_equal(result, expected) + + df = pd.DataFrame({"a": [1, 3, 2, 4], "b": [0.1, 4.0, 3.0, 2.0]}) + result = np.maximum.accumulate(df) + # in theory could preserve int dtype for default axis=0 + expected = pd.DataFrame({"a": [1.0, 3.0, 3.0, 4.0], "b": [0.1, 4.0, 4.0, 4.0]}) + tm.assert_frame_equal(result, expected) + + result = np.maximum.accumulate(df, axis=0) + tm.assert_frame_equal(result, expected) + + result = np.maximum.accumulate(df, axis=1) + expected = pd.DataFrame({"a": [1.0, 3.0, 2.0, 4.0], "b": [1.0, 4.0, 3.0, 4.0]}) + tm.assert_frame_equal(result, expected) + + +def test_frame_outer_deprecated(): + df = pd.DataFrame({"A": [1, 2]}) + with tm.assert_produces_warning(FutureWarning): + np.subtract.outer(df, df) + + +def test_alignment_deprecation(): + # https://github.com/pandas-dev/pandas/issues/39184 + df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]}) + s1 = pd.Series([1, 2], index=["a", "b"]) + s2 = pd.Series([1, 2], index=["b", "c"]) + + # binary dataframe / dataframe + expected = pd.DataFrame({"a": [2, 4, 6], "b": [8, 10, 12]}) + + with tm.assert_produces_warning(None): + # aligned -> no warning! + result = np.add(df1, df1) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + # non-aligned -> warns + result = np.add(df1, df2) + tm.assert_frame_equal(result, expected) + + result = np.add(df1, df2.values) + tm.assert_frame_equal(result, expected) + + result = np.add(df1.values, df2) + expected = pd.DataFrame({"b": [2, 4, 6], "c": [8, 10, 12]}) + tm.assert_frame_equal(result, expected) + + # binary dataframe / series + expected = pd.DataFrame({"a": [2, 3, 4], "b": [6, 7, 8]}) + + with tm.assert_produces_warning(None): + # aligned -> no warning! + result = np.add(df1, s1) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = np.add(df1, s2) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = np.add(s2, df1) + tm.assert_frame_equal(result, expected) + + result = np.add(df1, s2.values) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no("numba") +def test_alignment_deprecation_many_inputs(request): + # https://github.com/pandas-dev/pandas/issues/39184 + # test that the deprecation also works with > 2 inputs -> using a numba + # written ufunc for this because numpy itself doesn't have such ufuncs + from numba import ( + float64, + vectorize, + ) + + if np_version_gte1p22: + mark = pytest.mark.xfail( + reason="ufunc 'my_ufunc' did not contain a loop with signature matching " + "types", + ) + request.node.add_marker(mark) + + mark = pytest.mark.filterwarnings( + "ignore:`np.MachAr` is deprecated.*:DeprecationWarning" + ) + request.node.add_marker(mark) + + @vectorize([float64(float64, float64, float64)]) + def my_ufunc(x, y, z): + return x + y + z + + df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]}) + df3 = pd.DataFrame({"a": [1, 2, 3], "c": [4, 5, 6]}) + + with tm.assert_produces_warning(FutureWarning): + result = my_ufunc(df1, df2, df3) + expected = pd.DataFrame([[3.0, 12.0], [6.0, 15.0], [9.0, 18.0]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + # all aligned -> no warning + with tm.assert_produces_warning(None): + result = my_ufunc(df1, df1, df1) + tm.assert_frame_equal(result, expected) + + # mixed frame / arrays + with tm.assert_produces_warning(FutureWarning): + result = my_ufunc(df1, df2, df3.values) + tm.assert_frame_equal(result, expected) + + # single frame -> no warning + with tm.assert_produces_warning(None): + result = my_ufunc(df1, df2.values, df3.values) + tm.assert_frame_equal(result, expected) + + # takes indices of first frame + with tm.assert_produces_warning(FutureWarning): + result = my_ufunc(df1.values, df2, df3) + expected = expected.set_axis(["b", "c"], axis=1) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_unary.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_unary.py new file mode 100644 index 0000000..a69ca0f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_unary.py @@ -0,0 +1,168 @@ +from decimal import Decimal + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestDataFrameUnaryOperators: + # __pos__, __neg__, __invert__ + + @pytest.mark.parametrize( + "df,expected", + [ + (pd.DataFrame({"a": [-1, 1]}), pd.DataFrame({"a": [1, -1]})), + (pd.DataFrame({"a": [False, True]}), pd.DataFrame({"a": [True, False]})), + ( + pd.DataFrame({"a": pd.Series(pd.to_timedelta([-1, 1]))}), + pd.DataFrame({"a": pd.Series(pd.to_timedelta([1, -1]))}), + ), + ], + ) + def test_neg_numeric(self, df, expected): + tm.assert_frame_equal(-df, expected) + tm.assert_series_equal(-df["a"], expected["a"]) + + @pytest.mark.parametrize( + "df, expected", + [ + (np.array([1, 2], dtype=object), np.array([-1, -2], dtype=object)), + ([Decimal("1.0"), Decimal("2.0")], [Decimal("-1.0"), Decimal("-2.0")]), + ], + ) + def test_neg_object(self, df, expected): + # GH#21380 + df = pd.DataFrame({"a": df}) + expected = pd.DataFrame({"a": expected}) + tm.assert_frame_equal(-df, expected) + tm.assert_series_equal(-df["a"], expected["a"]) + + @pytest.mark.parametrize( + "df", + [ + pd.DataFrame({"a": ["a", "b"]}), + pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])}), + ], + ) + def test_neg_raises(self, df): + msg = ( + "bad operand type for unary -: 'str'|" + r"bad operand type for unary -: 'DatetimeArray'" + ) + with pytest.raises(TypeError, match=msg): + (-df) + with pytest.raises(TypeError, match=msg): + (-df["a"]) + + def test_invert(self, float_frame): + df = float_frame + + tm.assert_frame_equal(-(df < 0), ~(df < 0)) + + def test_invert_mixed(self): + shape = (10, 5) + df = pd.concat( + [ + pd.DataFrame(np.zeros(shape, dtype="bool")), + pd.DataFrame(np.zeros(shape, dtype=int)), + ], + axis=1, + ignore_index=True, + ) + result = ~df + expected = pd.concat( + [ + pd.DataFrame(np.ones(shape, dtype="bool")), + pd.DataFrame(-np.ones(shape, dtype=int)), + ], + axis=1, + ignore_index=True, + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "df", + [ + pd.DataFrame({"a": [-1, 1]}), + pd.DataFrame({"a": [False, True]}), + pd.DataFrame({"a": pd.Series(pd.to_timedelta([-1, 1]))}), + ], + ) + def test_pos_numeric(self, df): + # GH#16073 + tm.assert_frame_equal(+df, df) + tm.assert_series_equal(+df["a"], df["a"]) + + @pytest.mark.parametrize( + "df", + [ + # numpy changing behavior in the future + pytest.param( + pd.DataFrame({"a": ["a", "b"]}), + marks=[pytest.mark.filterwarnings("ignore")], + ), + pd.DataFrame({"a": np.array([-1, 2], dtype=object)}), + pd.DataFrame({"a": [Decimal("-1.0"), Decimal("2.0")]}), + ], + ) + def test_pos_object(self, df): + # GH#21380 + tm.assert_frame_equal(+df, df) + tm.assert_series_equal(+df["a"], df["a"]) + + @pytest.mark.parametrize( + "df", [pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])})] + ) + def test_pos_raises(self, df): + msg = r"bad operand type for unary \+: 'DatetimeArray'" + with pytest.raises(TypeError, match=msg): + (+df) + with pytest.raises(TypeError, match=msg): + (+df["a"]) + + def test_unary_nullable(self): + df = pd.DataFrame( + { + "a": pd.array([1, -2, 3, pd.NA], dtype="Int64"), + "b": pd.array([4.0, -5.0, 6.0, pd.NA], dtype="Float32"), + "c": pd.array([True, False, False, pd.NA], dtype="boolean"), + # include numpy bool to make sure bool-vs-boolean behavior + # is consistent in non-NA locations + "d": np.array([True, False, False, True]), + } + ) + + result = +df + res_ufunc = np.positive(df) + expected = df + # TODO: assert that we have copies? + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(res_ufunc, expected) + + result = -df + res_ufunc = np.negative(df) + expected = pd.DataFrame( + { + "a": pd.array([-1, 2, -3, pd.NA], dtype="Int64"), + "b": pd.array([-4.0, 5.0, -6.0, pd.NA], dtype="Float32"), + "c": pd.array([False, True, True, pd.NA], dtype="boolean"), + "d": np.array([False, True, True, False]), + } + ) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(res_ufunc, expected) + + result = abs(df) + res_ufunc = np.abs(df) + expected = pd.DataFrame( + { + "a": pd.array([1, 2, 3, pd.NA], dtype="Int64"), + "b": pd.array([4.0, 5.0, 6.0, pd.NA], dtype="Float32"), + "c": pd.array([True, False, False, pd.NA], dtype="boolean"), + "d": np.array([True, False, False, True]), + } + ) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(res_ufunc, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/frame/test_validate.py b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_validate.py new file mode 100644 index 0000000..e99e0a6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/frame/test_validate.py @@ -0,0 +1,41 @@ +import pytest + +from pandas.core.frame import DataFrame + + +@pytest.fixture +def dataframe(): + return DataFrame({"a": [1, 2], "b": [3, 4]}) + + +class TestDataFrameValidate: + """Tests for error handling related to data types of method arguments.""" + + @pytest.mark.parametrize( + "func", + [ + "query", + "eval", + "set_index", + "reset_index", + "dropna", + "drop_duplicates", + "sort_values", + ], + ) + @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, dataframe, func, inplace): + msg = 'For argument "inplace" expected type bool' + kwargs = {"inplace": inplace} + + if func == "query": + kwargs["expr"] = "a > b" + elif func == "eval": + kwargs["expr"] = "a + b" + elif func == "set_index": + kwargs["keys"] = ["a"] + elif func == "sort_values": + kwargs["by"] = ["a"] + + with pytest.raises(ValueError, match=msg): + getattr(dataframe, func)(**kwargs) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/generic/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..a9fc82c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_duplicate_labels.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_duplicate_labels.cpython-38.pyc new file mode 100644 index 0000000..80db35d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_duplicate_labels.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_finalize.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_finalize.cpython-38.pyc new file mode 100644 index 0000000..cea03b4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_finalize.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_frame.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_frame.cpython-38.pyc new file mode 100644 index 0000000..f2c3a19 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_frame.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_generic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_generic.cpython-38.pyc new file mode 100644 index 0000000..20375a6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_generic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_label_or_level_utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_label_or_level_utils.cpython-38.pyc new file mode 100644 index 0000000..ef29d77 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_label_or_level_utils.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_series.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_series.cpython-38.pyc new file mode 100644 index 0000000..8201691 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_series.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_to_xarray.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_to_xarray.cpython-38.pyc new file mode 100644 index 0000000..59030fa Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/generic/__pycache__/test_to_xarray.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/test_duplicate_labels.py b/.local/lib/python3.8/site-packages/pandas/tests/generic/test_duplicate_labels.py new file mode 100644 index 0000000..43cd303 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/generic/test_duplicate_labels.py @@ -0,0 +1,452 @@ +"""Tests dealing with the NDFrame.allows_duplicates.""" +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +not_implemented = pytest.mark.xfail(reason="Not implemented.") + +# ---------------------------------------------------------------------------- +# Preservation + + +class TestPreserves: + @pytest.mark.parametrize( + "cls, data", + [ + (pd.Series, np.array([])), + (pd.Series, [1, 2]), + (pd.DataFrame, {}), + (pd.DataFrame, {"A": [1, 2]}), + ], + ) + def test_construction_ok(self, cls, data): + result = cls(data) + assert result.flags.allows_duplicate_labels is True + + result = cls(data).set_flags(allows_duplicate_labels=False) + assert result.flags.allows_duplicate_labels is False + + @pytest.mark.parametrize( + "func", + [ + operator.itemgetter(["a"]), + operator.methodcaller("add", 1), + operator.methodcaller("rename", str.upper), + operator.methodcaller("rename", "name"), + operator.methodcaller("abs"), + np.abs, + ], + ) + def test_preserved_series(self, func): + s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) + assert func(s).flags.allows_duplicate_labels is False + + @pytest.mark.parametrize( + "other", [pd.Series(0, index=["a", "b", "c"]), pd.Series(0, index=["a", "b"])] + ) + # TODO: frame + @not_implemented + def test_align(self, other): + s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) + a, b = s.align(other) + assert a.flags.allows_duplicate_labels is False + assert b.flags.allows_duplicate_labels is False + + def test_preserved_frame(self): + df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ) + assert df.loc[["a"]].flags.allows_duplicate_labels is False + assert df.loc[:, ["A", "B"]].flags.allows_duplicate_labels is False + + @not_implemented + def test_to_frame(self): + s = pd.Series(dtype=float).set_flags(allows_duplicate_labels=False) + assert s.to_frame().flags.allows_duplicate_labels is False + + @pytest.mark.parametrize("func", ["add", "sub"]) + @pytest.mark.parametrize( + "frame", [False, pytest.param(True, marks=not_implemented)] + ) + @pytest.mark.parametrize("other", [1, pd.Series([1, 2], name="A")]) + def test_binops(self, func, other, frame): + df = pd.Series([1, 2], name="A", index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ) + if frame: + df = df.to_frame() + if isinstance(other, pd.Series) and frame: + other = other.to_frame() + func = operator.methodcaller(func, other) + assert df.flags.allows_duplicate_labels is False + assert func(df).flags.allows_duplicate_labels is False + + @not_implemented + def test_preserve_getitem(self): + df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False) + assert df[["A"]].flags.allows_duplicate_labels is False + assert df["A"].flags.allows_duplicate_labels is False + assert df.loc[0].flags.allows_duplicate_labels is False + assert df.loc[[0]].flags.allows_duplicate_labels is False + assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False + + @pytest.mark.xfail(reason="Unclear behavior.") + def test_ndframe_getitem_caching_issue(self): + # NDFrame.__getitem__ will cache the first df['A']. May need to + # invalidate that cache? Update the cached entries? + df = pd.DataFrame({"A": [0]}).set_flags(allows_duplicate_labels=False) + assert df["A"].flags.allows_duplicate_labels is False + df.flags.allows_duplicate_labels = True + assert df["A"].flags.allows_duplicate_labels is True + + @pytest.mark.parametrize( + "objs, kwargs", + [ + # Series + ( + [ + pd.Series(1, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.Series(2, index=["c", "d"]).set_flags( + allows_duplicate_labels=False + ), + ], + {}, + ), + ( + [ + pd.Series(1, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.Series(2, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + ], + {"ignore_index": True}, + ), + ( + [ + pd.Series(1, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.Series(2, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + ], + {"axis": 1}, + ), + # Frame + ( + [ + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.DataFrame({"A": [1, 2]}, index=["c", "d"]).set_flags( + allows_duplicate_labels=False + ), + ], + {}, + ), + ( + [ + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + ], + {"ignore_index": True}, + ), + ( + [ + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.DataFrame({"B": [1, 2]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + ], + {"axis": 1}, + ), + # Series / Frame + ( + [ + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.Series([1, 2], index=["a", "b"], name="B",).set_flags( + allows_duplicate_labels=False, + ), + ], + {"axis": 1}, + ), + ], + ) + def test_concat(self, objs, kwargs): + result = pd.concat(objs, **kwargs) + assert result.flags.allows_duplicate_labels is False + + @pytest.mark.parametrize( + "left, right, kwargs, expected", + [ + # false false false + pytest.param( + pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.DataFrame({"B": [0, 1]}, index=["a", "d"]).set_flags( + allows_duplicate_labels=False + ), + {"left_index": True, "right_index": True}, + False, + marks=not_implemented, + ), + # false true false + pytest.param( + pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.DataFrame({"B": [0, 1]}, index=["a", "d"]), + {"left_index": True, "right_index": True}, + False, + marks=not_implemented, + ), + # true true true + ( + pd.DataFrame({"A": [0, 1]}, index=["a", "b"]), + pd.DataFrame({"B": [0, 1]}, index=["a", "d"]), + {"left_index": True, "right_index": True}, + True, + ), + ], + ) + def test_merge(self, left, right, kwargs, expected): + result = pd.merge(left, right, **kwargs) + assert result.flags.allows_duplicate_labels is expected + + @not_implemented + def test_groupby(self): + # XXX: This is under tested + # TODO: + # - apply + # - transform + # - Should passing a grouper that disallows duplicates propagate? + df = pd.DataFrame({"A": [1, 2, 3]}).set_flags(allows_duplicate_labels=False) + result = df.groupby([0, 0, 1]).agg("count") + assert result.flags.allows_duplicate_labels is False + + @pytest.mark.parametrize("frame", [True, False]) + @not_implemented + def test_window(self, frame): + df = pd.Series( + 1, + index=pd.date_range("2000", periods=12), + name="A", + allows_duplicate_labels=False, + ) + if frame: + df = df.to_frame() + assert df.rolling(3).mean().flags.allows_duplicate_labels is False + assert df.ewm(3).mean().flags.allows_duplicate_labels is False + assert df.expanding(3).mean().flags.allows_duplicate_labels is False + + +# ---------------------------------------------------------------------------- +# Raises + + +class TestRaises: + @pytest.mark.parametrize( + "cls, axes", + [ + (pd.Series, {"index": ["a", "a"], "dtype": float}), + (pd.DataFrame, {"index": ["a", "a"]}), + (pd.DataFrame, {"index": ["a", "a"], "columns": ["b", "b"]}), + (pd.DataFrame, {"columns": ["b", "b"]}), + ], + ) + def test_set_flags_with_duplicates(self, cls, axes): + result = cls(**axes) + assert result.flags.allows_duplicate_labels is True + + msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + cls(**axes).set_flags(allows_duplicate_labels=False) + + @pytest.mark.parametrize( + "data", + [ + pd.Series(index=[0, 0], dtype=float), + pd.DataFrame(index=[0, 0]), + pd.DataFrame(columns=[0, 0]), + ], + ) + def test_setting_allows_duplicate_labels_raises(self, data): + msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + data.flags.allows_duplicate_labels = False + + assert data.flags.allows_duplicate_labels is True + + def test_series_raises(self): + a = pd.Series(0, index=["a", "b"]) + b = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) + msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + pd.concat([a, b]) + + @pytest.mark.parametrize( + "getter, target", + [ + (operator.itemgetter(["A", "A"]), None), + # loc + (operator.itemgetter(["a", "a"]), "loc"), + pytest.param( + operator.itemgetter(("a", ["A", "A"])), "loc", marks=not_implemented + ), + (operator.itemgetter((["a", "a"], "A")), "loc"), + # iloc + (operator.itemgetter([0, 0]), "iloc"), + pytest.param( + operator.itemgetter((0, [0, 0])), "iloc", marks=not_implemented + ), + pytest.param(operator.itemgetter(([0, 0], 0)), "iloc"), + ], + ) + def test_getitem_raises(self, getter, target): + df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ) + if target: + # df, df.loc, or df.iloc + target = getattr(df, target) + else: + target = df + + msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + getter(target) + + @pytest.mark.parametrize( + "objs, kwargs", + [ + ( + [ + pd.Series(1, index=[0, 1], name="a").set_flags( + allows_duplicate_labels=False + ), + pd.Series(2, index=[0, 1], name="a").set_flags( + allows_duplicate_labels=False + ), + ], + {"axis": 1}, + ) + ], + ) + def test_concat_raises(self, objs, kwargs): + msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + pd.concat(objs, **kwargs) + + @not_implemented + def test_merge_raises(self): + a = pd.DataFrame({"A": [0, 1, 2]}, index=["a", "b", "c"]).set_flags( + allows_duplicate_labels=False + ) + b = pd.DataFrame({"B": [0, 1, 2]}, index=["a", "b", "b"]) + msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + pd.merge(a, b, left_index=True, right_index=True) + + +@pytest.mark.parametrize( + "idx", + [ + pd.Index([1, 1]), + pd.Index(["a", "a"]), + pd.Index([1.1, 1.1]), + pd.PeriodIndex([pd.Period("2000", "D")] * 2), + pd.DatetimeIndex([pd.Timestamp("2000")] * 2), + pd.TimedeltaIndex([pd.Timedelta("1D")] * 2), + pd.CategoricalIndex(["a", "a"]), + pd.IntervalIndex([pd.Interval(0, 1)] * 2), + pd.MultiIndex.from_tuples([("a", 1), ("a", 1)]), + ], + ids=lambda x: type(x).__name__, +) +def test_raises_basic(idx): + msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + pd.Series(1, index=idx).set_flags(allows_duplicate_labels=False) + + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + pd.DataFrame({"A": [1, 1]}, index=idx).set_flags(allows_duplicate_labels=False) + + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + pd.DataFrame([[1, 2]], columns=idx).set_flags(allows_duplicate_labels=False) + + +def test_format_duplicate_labels_message(): + idx = pd.Index(["a", "b", "a", "b", "c"]) + result = idx._format_duplicate_message() + expected = pd.DataFrame( + {"positions": [[0, 2], [1, 3]]}, index=pd.Index(["a", "b"], name="label") + ) + tm.assert_frame_equal(result, expected) + + +def test_format_duplicate_labels_message_multi(): + idx = pd.MultiIndex.from_product([["A"], ["a", "b", "a", "b", "c"]]) + result = idx._format_duplicate_message() + expected = pd.DataFrame( + {"positions": [[0, 2], [1, 3]]}, + index=pd.MultiIndex.from_product([["A"], ["a", "b"]]), + ) + tm.assert_frame_equal(result, expected) + + +def test_dataframe_insert_raises(): + df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False) + msg = "Cannot specify" + with pytest.raises(ValueError, match=msg): + df.insert(0, "A", [3, 4], allow_duplicates=True) + + +@pytest.mark.parametrize( + "method, frame_only", + [ + (operator.methodcaller("set_index", "A", inplace=True), True), + (operator.methodcaller("set_axis", ["A", "B"], inplace=True), False), + (operator.methodcaller("reset_index", inplace=True), True), + (operator.methodcaller("rename", lambda x: x, inplace=True), False), + ], +) +def test_inplace_raises(method, frame_only): + df = pd.DataFrame({"A": [0, 0], "B": [1, 2]}).set_flags( + allows_duplicate_labels=False + ) + s = df["A"] + s.flags.allows_duplicate_labels = False + msg = "Cannot specify" + + with pytest.raises(ValueError, match=msg): + method(df) + if not frame_only: + with pytest.raises(ValueError, match=msg): + method(s) + + +def test_pickle(): + a = pd.Series([1, 2]).set_flags(allows_duplicate_labels=False) + b = tm.round_trip_pickle(a) + tm.assert_series_equal(a, b) + + a = pd.DataFrame({"A": []}).set_flags(allows_duplicate_labels=False) + b = tm.round_trip_pickle(a) + tm.assert_frame_equal(a, b) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/test_finalize.py b/.local/lib/python3.8/site-packages/pandas/tests/generic/test_finalize.py new file mode 100644 index 0000000..403e5c6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/generic/test_finalize.py @@ -0,0 +1,788 @@ +""" +An exhaustive list of pandas methods exercising NDFrame.__finalize__. +""" +import operator +import re + +import numpy as np +import pytest + +import pandas as pd + +# TODO: +# * Binary methods (mul, div, etc.) +# * Binary outputs (align, etc.) +# * top-level methods (concat, merge, get_dummies, etc.) +# * window +# * cumulative reductions + +not_implemented_mark = pytest.mark.xfail(reason="not implemented") + +mi = pd.MultiIndex.from_product([["a", "b"], [0, 1]], names=["A", "B"]) + +frame_data = ({"A": [1]},) +frame_mi_data = ({"A": [1, 2, 3, 4]}, mi) + + +# Tuple of +# - Callable: Constructor (Series, DataFrame) +# - Tuple: Constructor args +# - Callable: pass the constructed value with attrs set to this. + +_all_methods = [ + ( + pd.Series, + (np.array([0], dtype="float64")), + operator.methodcaller("view", "int64"), + ), + (pd.Series, ([0],), operator.methodcaller("take", [])), + (pd.Series, ([0],), operator.methodcaller("__getitem__", [True])), + (pd.Series, ([0],), operator.methodcaller("repeat", 2)), + pytest.param( + (pd.Series, ([0],), operator.methodcaller("reset_index")), + marks=pytest.mark.xfail, + ), + (pd.Series, ([0],), operator.methodcaller("reset_index", drop=True)), + pytest.param( + (pd.Series, ([0],), operator.methodcaller("to_frame")), marks=pytest.mark.xfail + ), + (pd.Series, ([0, 0],), operator.methodcaller("drop_duplicates")), + (pd.Series, ([0, 0],), operator.methodcaller("duplicated")), + (pd.Series, ([0, 0],), operator.methodcaller("round")), + (pd.Series, ([0, 0],), operator.methodcaller("rename", lambda x: x + 1)), + (pd.Series, ([0, 0],), operator.methodcaller("rename", "name")), + (pd.Series, ([0, 0],), operator.methodcaller("set_axis", ["a", "b"])), + (pd.Series, ([0, 0],), operator.methodcaller("reindex", [1, 0])), + (pd.Series, ([0, 0],), operator.methodcaller("drop", [0])), + (pd.Series, (pd.array([0, pd.NA]),), operator.methodcaller("fillna", 0)), + (pd.Series, ([0, 0],), operator.methodcaller("replace", {0: 1})), + (pd.Series, ([0, 0],), operator.methodcaller("shift")), + (pd.Series, ([0, 0],), operator.methodcaller("isin", [0, 1])), + (pd.Series, ([0, 0],), operator.methodcaller("between", 0, 2)), + (pd.Series, ([0, 0],), operator.methodcaller("isna")), + (pd.Series, ([0, 0],), operator.methodcaller("isnull")), + (pd.Series, ([0, 0],), operator.methodcaller("notna")), + (pd.Series, ([0, 0],), operator.methodcaller("notnull")), + (pd.Series, ([1],), operator.methodcaller("add", pd.Series([1]))), + # TODO: mul, div, etc. + ( + pd.Series, + ([0], pd.period_range("2000", periods=1)), + operator.methodcaller("to_timestamp"), + ), + ( + pd.Series, + ([0], pd.date_range("2000", periods=1)), + operator.methodcaller("to_period"), + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("dot", pd.DataFrame(index=["A"])), + ), + marks=pytest.mark.xfail(reason="Implement binary finalize"), + ), + (pd.DataFrame, frame_data, operator.methodcaller("transpose")), + (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", "A")), + (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", ["A"])), + (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", np.array([True]))), + (pd.DataFrame, ({("A", "a"): [1]},), operator.methodcaller("__getitem__", ["A"])), + (pd.DataFrame, frame_data, operator.methodcaller("query", "A == 1")), + (pd.DataFrame, frame_data, operator.methodcaller("eval", "A + 1", engine="python")), + (pd.DataFrame, frame_data, operator.methodcaller("select_dtypes", include="int")), + (pd.DataFrame, frame_data, operator.methodcaller("assign", b=1)), + (pd.DataFrame, frame_data, operator.methodcaller("set_axis", ["A"])), + (pd.DataFrame, frame_data, operator.methodcaller("reindex", [0, 1])), + (pd.DataFrame, frame_data, operator.methodcaller("drop", columns=["A"])), + (pd.DataFrame, frame_data, operator.methodcaller("drop", index=[0])), + (pd.DataFrame, frame_data, operator.methodcaller("rename", columns={"A": "a"})), + (pd.DataFrame, frame_data, operator.methodcaller("rename", index=lambda x: x)), + (pd.DataFrame, frame_data, operator.methodcaller("fillna", "A")), + (pd.DataFrame, frame_data, operator.methodcaller("fillna", method="ffill")), + (pd.DataFrame, frame_data, operator.methodcaller("set_index", "A")), + (pd.DataFrame, frame_data, operator.methodcaller("reset_index")), + (pd.DataFrame, frame_data, operator.methodcaller("isna")), + (pd.DataFrame, frame_data, operator.methodcaller("isnull")), + (pd.DataFrame, frame_data, operator.methodcaller("notna")), + (pd.DataFrame, frame_data, operator.methodcaller("notnull")), + (pd.DataFrame, frame_data, operator.methodcaller("dropna")), + (pd.DataFrame, frame_data, operator.methodcaller("drop_duplicates")), + (pd.DataFrame, frame_data, operator.methodcaller("duplicated")), + (pd.DataFrame, frame_data, operator.methodcaller("sort_values", by="A")), + (pd.DataFrame, frame_data, operator.methodcaller("sort_index")), + (pd.DataFrame, frame_data, operator.methodcaller("nlargest", 1, "A")), + (pd.DataFrame, frame_data, operator.methodcaller("nsmallest", 1, "A")), + (pd.DataFrame, frame_mi_data, operator.methodcaller("swaplevel")), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("add", pd.DataFrame(*frame_data)), + ), + marks=not_implemented_mark, + ), + # TODO: div, mul, etc. + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("combine", pd.DataFrame(*frame_data), operator.add), + ), + marks=not_implemented_mark, + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("combine_first", pd.DataFrame(*frame_data)), + ), + marks=not_implemented_mark, + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("update", pd.DataFrame(*frame_data)), + ), + marks=not_implemented_mark, + ), + (pd.DataFrame, frame_data, operator.methodcaller("pivot", columns="A")), + ( + pd.DataFrame, + ({"A": [1], "B": [1]},), + operator.methodcaller("pivot_table", columns="A"), + ), + ( + pd.DataFrame, + ({"A": [1], "B": [1]},), + operator.methodcaller("pivot_table", columns="A", aggfunc=["mean", "sum"]), + ), + (pd.DataFrame, frame_data, operator.methodcaller("stack")), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("explode", "A")), + marks=not_implemented_mark, + ), + (pd.DataFrame, frame_mi_data, operator.methodcaller("unstack")), + pytest.param( + ( + pd.DataFrame, + ({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]},), + operator.methodcaller("melt", id_vars=["A"], value_vars=["B"]), + ), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("applymap", lambda x: x)) + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("append", pd.DataFrame({"A": [1]})), + ), + marks=pytest.mark.filterwarnings( + "ignore:.*append method is deprecated.*:FutureWarning" + ), + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("append", pd.DataFrame({"B": [1]})), + ), + marks=pytest.mark.filterwarnings( + "ignore:.*append method is deprecated.*:FutureWarning" + ), + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("merge", pd.DataFrame({"A": [1]})), + ), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("round", 2)), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("corr")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("cov")), + marks=[ + not_implemented_mark, + pytest.mark.filterwarnings("ignore::RuntimeWarning"), + ], + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("corrwith", pd.DataFrame(*frame_data)), + ), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("count")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_mi_data, operator.methodcaller("count", level="A")), + marks=[ + not_implemented_mark, + pytest.mark.filterwarnings("ignore:Using the level keyword:FutureWarning"), + ], + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("nunique")), + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("idxmin")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("idxmax")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("mode")), + ), + pytest.param( + (pd.Series, [0], operator.methodcaller("mode")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("quantile")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("quantile", q=[0.25, 0.75])), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("quantile")), + marks=not_implemented_mark, + ), + ( + pd.DataFrame, + ({"A": [1]}, [pd.Period("2000", "D")]), + operator.methodcaller("to_timestamp"), + ), + ( + pd.DataFrame, + ({"A": [1]}, [pd.Timestamp("2000")]), + operator.methodcaller("to_period", freq="D"), + ), + pytest.param( + (pd.DataFrame, frame_mi_data, operator.methodcaller("isin", [1])), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_mi_data, operator.methodcaller("isin", pd.Series([1]))), + marks=not_implemented_mark, + ), + pytest.param( + ( + pd.DataFrame, + frame_mi_data, + operator.methodcaller("isin", pd.DataFrame({"A": [1]})), + ), + marks=not_implemented_mark, + ), + (pd.DataFrame, frame_data, operator.methodcaller("swapaxes", 0, 1)), + (pd.DataFrame, frame_mi_data, operator.methodcaller("droplevel", "A")), + (pd.DataFrame, frame_data, operator.methodcaller("pop", "A")), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("squeeze")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.Series, ([1, 2],), operator.methodcaller("squeeze")) + # marks=not_implemented_mark, + ), + (pd.Series, ([1, 2],), operator.methodcaller("rename_axis", index="a")), + (pd.DataFrame, frame_data, operator.methodcaller("rename_axis", columns="a")), + # Unary ops + (pd.DataFrame, frame_data, operator.neg), + (pd.Series, [1], operator.neg), + (pd.DataFrame, frame_data, operator.pos), + (pd.Series, [1], operator.pos), + (pd.DataFrame, frame_data, operator.inv), + (pd.Series, [1], operator.inv), + (pd.DataFrame, frame_data, abs), + (pd.Series, [1], abs), + pytest.param((pd.DataFrame, frame_data, round), marks=not_implemented_mark), + (pd.Series, [1], round), + (pd.DataFrame, frame_data, operator.methodcaller("take", [0, 0])), + (pd.DataFrame, frame_mi_data, operator.methodcaller("xs", "a")), + (pd.Series, (1, mi), operator.methodcaller("xs", "a")), + (pd.DataFrame, frame_data, operator.methodcaller("get", "A")), + ( + pd.DataFrame, + frame_data, + operator.methodcaller("reindex_like", pd.DataFrame({"A": [1, 2, 3]})), + ), + ( + pd.Series, + frame_data, + operator.methodcaller("reindex_like", pd.Series([0, 1, 2])), + ), + (pd.DataFrame, frame_data, operator.methodcaller("add_prefix", "_")), + (pd.DataFrame, frame_data, operator.methodcaller("add_suffix", "_")), + (pd.Series, (1, ["a", "b"]), operator.methodcaller("add_prefix", "_")), + (pd.Series, (1, ["a", "b"]), operator.methodcaller("add_suffix", "_")), + (pd.Series, ([3, 2],), operator.methodcaller("sort_values")), + (pd.Series, ([1] * 10,), operator.methodcaller("head")), + (pd.DataFrame, ({"A": [1] * 10},), operator.methodcaller("head")), + (pd.Series, ([1] * 10,), operator.methodcaller("tail")), + (pd.DataFrame, ({"A": [1] * 10},), operator.methodcaller("tail")), + (pd.Series, ([1, 2],), operator.methodcaller("sample", n=2, replace=True)), + (pd.DataFrame, (frame_data,), operator.methodcaller("sample", n=2, replace=True)), + (pd.Series, ([1, 2],), operator.methodcaller("astype", float)), + (pd.DataFrame, frame_data, operator.methodcaller("astype", float)), + (pd.Series, ([1, 2],), operator.methodcaller("copy")), + (pd.DataFrame, frame_data, operator.methodcaller("copy")), + (pd.Series, ([1, 2], None, object), operator.methodcaller("infer_objects")), + ( + pd.DataFrame, + ({"A": np.array([1, 2], dtype=object)},), + operator.methodcaller("infer_objects"), + ), + (pd.Series, ([1, 2],), operator.methodcaller("convert_dtypes")), + (pd.DataFrame, frame_data, operator.methodcaller("convert_dtypes")), + (pd.Series, ([1, None, 3],), operator.methodcaller("interpolate")), + (pd.DataFrame, ({"A": [1, None, 3]},), operator.methodcaller("interpolate")), + (pd.Series, ([1, 2],), operator.methodcaller("clip", lower=1)), + (pd.DataFrame, frame_data, operator.methodcaller("clip", lower=1)), + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("asfreq", "H"), + ), + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("asfreq", "H"), + ), + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("at_time", "12:00"), + ), + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("at_time", "12:00"), + ), + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("between_time", "12:00", "13:00"), + ), + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("between_time", "12:00", "13:00"), + ), + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("first", "3D"), + ), + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("first", "3D"), + ), + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("last", "3D"), + ), + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("last", "3D"), + ), + (pd.Series, ([1, 2],), operator.methodcaller("rank")), + (pd.DataFrame, frame_data, operator.methodcaller("rank")), + (pd.Series, ([1, 2],), operator.methodcaller("where", np.array([True, False]))), + (pd.DataFrame, frame_data, operator.methodcaller("where", np.array([[True]]))), + (pd.Series, ([1, 2],), operator.methodcaller("mask", np.array([True, False]))), + (pd.DataFrame, frame_data, operator.methodcaller("mask", np.array([[True]]))), + pytest.param( + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("tshift"), + ), + marks=pytest.mark.filterwarnings("ignore::FutureWarning"), + ), + pytest.param( + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("tshift"), + ), + marks=pytest.mark.filterwarnings("ignore::FutureWarning"), + ), + (pd.Series, ([1, 2],), operator.methodcaller("truncate", before=0)), + (pd.DataFrame, frame_data, operator.methodcaller("truncate", before=0)), + ( + pd.Series, + (1, pd.date_range("2000", periods=4, tz="UTC")), + operator.methodcaller("tz_convert", "CET"), + ), + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4, tz="UTC")), + operator.methodcaller("tz_convert", "CET"), + ), + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("tz_localize", "CET"), + ), + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("tz_localize", "CET"), + ), + pytest.param( + (pd.Series, ([1, 2],), operator.methodcaller("describe")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("describe")), + marks=not_implemented_mark, + ), + (pd.Series, ([1, 2],), operator.methodcaller("pct_change")), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("pct_change")), + marks=not_implemented_mark, + ), + (pd.Series, ([1],), operator.methodcaller("transform", lambda x: x - x.min())), + pytest.param( + ( + pd.DataFrame, + frame_mi_data, + operator.methodcaller("transform", lambda x: x - x.min()), + ), + ), + (pd.Series, ([1],), operator.methodcaller("apply", lambda x: x)), + pytest.param( + (pd.DataFrame, frame_mi_data, operator.methodcaller("apply", lambda x: x)), + ), + # Cumulative reductions + (pd.Series, ([1],), operator.methodcaller("cumsum")), + (pd.DataFrame, frame_data, operator.methodcaller("cumsum")), + # Reductions + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("any")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("sum")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("std")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("mean")), + marks=not_implemented_mark, + ), +] + + +def idfn(x): + xpr = re.compile(r"'(.*)?'") + m = xpr.search(str(x)) + if m: + return m.group(1) + else: + return str(x) + + +@pytest.fixture(params=_all_methods, ids=lambda x: idfn(x[-1])) +def ndframe_method(request): + """ + An NDFrame method returning an NDFrame. + """ + return request.param + + +def test_finalize_called(ndframe_method): + cls, init_args, method = ndframe_method + ndframe = cls(*init_args) + + ndframe.attrs = {"a": 1} + result = method(ndframe) + + assert result.attrs == {"a": 1} + + +@not_implemented_mark +def test_finalize_called_eval_numexpr(): + pytest.importorskip("numexpr") + df = pd.DataFrame({"A": [1, 2]}) + df.attrs["A"] = 1 + result = df.eval("A + 1", engine="numexpr") + assert result.attrs == {"A": 1} + + +# ---------------------------------------------------------------------------- +# Binary operations + + +@pytest.mark.parametrize("annotate", ["left", "right", "both"]) +@pytest.mark.parametrize( + "args", + [ + (1, pd.Series([1])), + (1, pd.DataFrame({"A": [1]})), + (pd.Series([1]), 1), + (pd.DataFrame({"A": [1]}), 1), + (pd.Series([1]), pd.Series([1])), + (pd.DataFrame({"A": [1]}), pd.DataFrame({"A": [1]})), + (pd.Series([1]), pd.DataFrame({"A": [1]})), + (pd.DataFrame({"A": [1]}), pd.Series([1])), + ], +) +def test_binops(request, args, annotate, all_arithmetic_functions): + # This generates 326 tests... Is that needed? + left, right = args + if annotate == "both" and isinstance(left, int) or isinstance(right, int): + return + + if isinstance(left, pd.DataFrame) or isinstance(right, pd.DataFrame): + request.node.add_marker(pytest.mark.xfail(reason="not implemented")) + + if annotate in {"left", "both"} and not isinstance(left, int): + left.attrs = {"a": 1} + if annotate in {"left", "both"} and not isinstance(right, int): + right.attrs = {"a": 1} + + result = all_arithmetic_functions(left, right) + assert result.attrs == {"a": 1} + + +# ---------------------------------------------------------------------------- +# Accessors + + +@pytest.mark.parametrize( + "method", + [ + operator.methodcaller("capitalize"), + operator.methodcaller("casefold"), + operator.methodcaller("cat", ["a"]), + operator.methodcaller("contains", "a"), + operator.methodcaller("count", "a"), + operator.methodcaller("encode", "utf-8"), + operator.methodcaller("endswith", "a"), + operator.methodcaller("extract", r"(\w)(\d)"), + operator.methodcaller("extract", r"(\w)(\d)", expand=False), + operator.methodcaller("find", "a"), + operator.methodcaller("findall", "a"), + operator.methodcaller("get", 0), + operator.methodcaller("index", "a"), + operator.methodcaller("len"), + operator.methodcaller("ljust", 4), + operator.methodcaller("lower"), + operator.methodcaller("lstrip"), + operator.methodcaller("match", r"\w"), + operator.methodcaller("normalize", "NFC"), + operator.methodcaller("pad", 4), + operator.methodcaller("partition", "a"), + operator.methodcaller("repeat", 2), + operator.methodcaller("replace", "a", "b"), + operator.methodcaller("rfind", "a"), + operator.methodcaller("rindex", "a"), + operator.methodcaller("rjust", 4), + operator.methodcaller("rpartition", "a"), + operator.methodcaller("rstrip"), + operator.methodcaller("slice", 4), + operator.methodcaller("slice_replace", 1, repl="a"), + operator.methodcaller("startswith", "a"), + operator.methodcaller("strip"), + operator.methodcaller("swapcase"), + operator.methodcaller("translate", {"a": "b"}), + operator.methodcaller("upper"), + operator.methodcaller("wrap", 4), + operator.methodcaller("zfill", 4), + operator.methodcaller("isalnum"), + operator.methodcaller("isalpha"), + operator.methodcaller("isdigit"), + operator.methodcaller("isspace"), + operator.methodcaller("islower"), + operator.methodcaller("isupper"), + operator.methodcaller("istitle"), + operator.methodcaller("isnumeric"), + operator.methodcaller("isdecimal"), + operator.methodcaller("get_dummies"), + ], + ids=idfn, +) +def test_string_method(method): + s = pd.Series(["a1"]) + s.attrs = {"a": 1} + result = method(s.str) + assert result.attrs == {"a": 1} + + +@pytest.mark.parametrize( + "method", + [ + operator.methodcaller("to_period"), + operator.methodcaller("tz_localize", "CET"), + operator.methodcaller("normalize"), + operator.methodcaller("strftime", "%Y"), + operator.methodcaller("round", "H"), + operator.methodcaller("floor", "H"), + operator.methodcaller("ceil", "H"), + operator.methodcaller("month_name"), + operator.methodcaller("day_name"), + ], + ids=idfn, +) +def test_datetime_method(method): + s = pd.Series(pd.date_range("2000", periods=4)) + s.attrs = {"a": 1} + result = method(s.dt) + assert result.attrs == {"a": 1} + + +@pytest.mark.parametrize( + "attr", + [ + "date", + "time", + "timetz", + "year", + "month", + "day", + "hour", + "minute", + "second", + "microsecond", + "nanosecond", + "dayofweek", + "day_of_week", + "dayofyear", + "day_of_year", + "quarter", + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + "is_leap_year", + "daysinmonth", + "days_in_month", + ], +) +def test_datetime_property(attr): + s = pd.Series(pd.date_range("2000", periods=4)) + s.attrs = {"a": 1} + result = getattr(s.dt, attr) + assert result.attrs == {"a": 1} + + +@pytest.mark.parametrize( + "attr", ["days", "seconds", "microseconds", "nanoseconds", "components"] +) +def test_timedelta_property(attr): + s = pd.Series(pd.timedelta_range("2000", periods=4)) + s.attrs = {"a": 1} + result = getattr(s.dt, attr) + assert result.attrs == {"a": 1} + + +@pytest.mark.parametrize("method", [operator.methodcaller("total_seconds")]) +def test_timedelta_methods(method): + s = pd.Series(pd.timedelta_range("2000", periods=4)) + s.attrs = {"a": 1} + result = method(s.dt) + assert result.attrs == {"a": 1} + + +@pytest.mark.parametrize( + "method", + [ + operator.methodcaller("add_categories", ["c"]), + operator.methodcaller("as_ordered"), + operator.methodcaller("as_unordered"), + lambda x: getattr(x, "codes"), + operator.methodcaller("remove_categories", "a"), + operator.methodcaller("remove_unused_categories"), + operator.methodcaller("rename_categories", {"a": "A", "b": "B"}), + operator.methodcaller("reorder_categories", ["b", "a"]), + operator.methodcaller("set_categories", ["A", "B"]), + ], +) +@not_implemented_mark +def test_categorical_accessor(method): + s = pd.Series(["a", "b"], dtype="category") + s.attrs = {"a": 1} + result = method(s.cat) + assert result.attrs == {"a": 1} + + +# ---------------------------------------------------------------------------- +# Groupby + + +@pytest.mark.parametrize( + "obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})] +) +@pytest.mark.parametrize( + "method", + [ + operator.methodcaller("sum"), + lambda x: x.apply(lambda y: y), + lambda x: x.agg("sum"), + lambda x: x.agg("mean"), + lambda x: x.agg("median"), + ], +) +def test_groupby_finalize(obj, method): + obj.attrs = {"a": 1} + result = method(obj.groupby([0, 0])) + assert result.attrs == {"a": 1} + + +@pytest.mark.parametrize( + "obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})] +) +@pytest.mark.parametrize( + "method", + [ + lambda x: x.agg(["sum", "count"]), + lambda x: x.agg("std"), + lambda x: x.agg("var"), + lambda x: x.agg("sem"), + lambda x: x.agg("size"), + lambda x: x.agg("ohlc"), + lambda x: x.agg("describe"), + ], +) +@not_implemented_mark +def test_groupby_finalize_not_implemented(obj, method): + obj.attrs = {"a": 1} + result = method(obj.groupby([0, 0])) + assert result.attrs == {"a": 1} + + +def test_finalize_frame_series_name(): + # https://github.com/pandas-dev/pandas/pull/37186/files#r506978889 + # ensure we don't copy the column `name` to the Series. + df = pd.DataFrame({"name": [1, 2]}) + result = pd.Series([1, 2]).__finalize__(df) + assert result.name is None diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/test_frame.py b/.local/lib/python3.8/site-packages/pandas/tests/generic/test_frame.py new file mode 100644 index 0000000..2b248af --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/generic/test_frame.py @@ -0,0 +1,198 @@ +from copy import deepcopy +from operator import methodcaller + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + Series, + date_range, +) +import pandas._testing as tm + + +class TestDataFrame: + @pytest.mark.parametrize("func", ["_set_axis_name", "rename_axis"]) + def test_set_axis_name(self, func): + df = DataFrame([[1, 2], [3, 4]]) + + result = methodcaller(func, "foo")(df) + assert df.index.name is None + assert result.index.name == "foo" + + result = methodcaller(func, "cols", axis=1)(df) + assert df.columns.name is None + assert result.columns.name == "cols" + + @pytest.mark.parametrize("func", ["_set_axis_name", "rename_axis"]) + def test_set_axis_name_mi(self, func): + df = DataFrame( + np.empty((3, 3)), + index=MultiIndex.from_tuples([("A", x) for x in list("aBc")]), + columns=MultiIndex.from_tuples([("C", x) for x in list("xyz")]), + ) + + level_names = ["L1", "L2"] + + result = methodcaller(func, level_names)(df) + assert result.index.names == level_names + assert result.columns.names == [None, None] + + result = methodcaller(func, level_names, axis=1)(df) + assert result.columns.names == ["L1", "L2"] + assert result.index.names == [None, None] + + def test_nonzero_single_element(self): + + # allow single item via bool method + df = DataFrame([[True]]) + assert df.bool() + + df = DataFrame([[False]]) + assert not df.bool() + + df = DataFrame([[False, False]]) + msg = "The truth value of a DataFrame is ambiguous" + with pytest.raises(ValueError, match=msg): + df.bool() + with pytest.raises(ValueError, match=msg): + bool(df) + + def test_metadata_propagation_indiv_groupby(self): + # groupby + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + result = df.groupby("A").sum() + tm.assert_metadata_equivalent(df, result) + + def test_metadata_propagation_indiv_resample(self): + # resample + df = DataFrame( + np.random.randn(1000, 2), + index=date_range("20130101", periods=1000, freq="s"), + ) + result = df.resample("1T") + tm.assert_metadata_equivalent(df, result) + + def test_metadata_propagation_indiv(self, monkeypatch): + # merging with override + # GH 6923 + + def finalize(self, other, method=None, **kwargs): + + for name in self._metadata: + if method == "merge": + left, right = other.left, other.right + value = getattr(left, name, "") + "|" + getattr(right, name, "") + object.__setattr__(self, name, value) + elif method == "concat": + value = "+".join( + [getattr(o, name) for o in other.objs if getattr(o, name, None)] + ) + object.__setattr__(self, name, value) + else: + object.__setattr__(self, name, getattr(other, name, "")) + + return self + + with monkeypatch.context() as m: + m.setattr(DataFrame, "_metadata", ["filename"]) + m.setattr(DataFrame, "__finalize__", finalize) + + np.random.seed(10) + df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=["a", "b"]) + df2 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=["c", "d"]) + DataFrame._metadata = ["filename"] + df1.filename = "fname1.csv" + df2.filename = "fname2.csv" + + result = df1.merge(df2, left_on=["a"], right_on=["c"], how="inner") + assert result.filename == "fname1.csv|fname2.csv" + + # concat + # GH#6927 + df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=list("ab")) + df1.filename = "foo" + + result = pd.concat([df1, df1]) + assert result.filename == "foo+foo" + + def test_set_attribute(self): + # Test for consistent setattr behavior when an attribute and a column + # have the same name (Issue #8994) + df = DataFrame({"x": [1, 2, 3]}) + + df.y = 2 + df["y"] = [2, 4, 6] + df.y = 5 + + assert df.y == 5 + tm.assert_series_equal(df["y"], Series([2, 4, 6], name="y")) + + def test_deepcopy_empty(self): + # This test covers empty frame copying with non-empty column sets + # as reported in issue GH15370 + empty_frame = DataFrame(data=[], index=[], columns=["A"]) + empty_frame_copy = deepcopy(empty_frame) + + tm.assert_frame_equal(empty_frame_copy, empty_frame) + + +# formerly in Generic but only test DataFrame +class TestDataFrame2: + @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, value): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + msg = 'For argument "inplace" expected type bool, received type' + with pytest.raises(ValueError, match=msg): + super(DataFrame, df).rename_axis( + mapper={"a": "x", "b": "y"}, axis=1, inplace=value + ) + + with pytest.raises(ValueError, match=msg): + super(DataFrame, df).drop("a", axis=1, inplace=value) + + with pytest.raises(ValueError, match=msg): + super(DataFrame, df).fillna(value=0, inplace=value) + + with pytest.raises(ValueError, match=msg): + super(DataFrame, df).replace(to_replace=1, value=7, inplace=value) + + with pytest.raises(ValueError, match=msg): + super(DataFrame, df).interpolate(inplace=value) + + with pytest.raises(ValueError, match=msg): + super(DataFrame, df)._where(cond=df.a > 2, inplace=value) + + with pytest.raises(ValueError, match=msg): + super(DataFrame, df).mask(cond=df.a > 2, inplace=value) + + def test_unexpected_keyword(self): + # GH8597 + df = DataFrame(np.random.randn(5, 2), columns=["jim", "joe"]) + ca = pd.Categorical([0, 0, 2, 2, 3, np.nan]) + ts = df["joe"].copy() + ts[2] = np.nan + + msg = "unexpected keyword" + with pytest.raises(TypeError, match=msg): + df.drop("joe", axis=1, in_place=True) + + with pytest.raises(TypeError, match=msg): + df.reindex([1, 0], inplace=True) + + with pytest.raises(TypeError, match=msg): + ca.fillna(0, inplace=True) + + with pytest.raises(TypeError, match=msg): + ts.fillna(0, in_place=True) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/test_generic.py b/.local/lib/python3.8/site-packages/pandas/tests/generic/test_generic.py new file mode 100644 index 0000000..5c1eb28 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/generic/test_generic.py @@ -0,0 +1,467 @@ +from copy import ( + copy, + deepcopy, +) + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_scalar + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + +# ---------------------------------------------------------------------- +# Generic types test cases + + +def construct(box, shape, value=None, dtype=None, **kwargs): + """ + construct an object for the given shape + if value is specified use that if its a scalar + if value is an array, repeat it as needed + """ + if isinstance(shape, int): + shape = tuple([shape] * box._AXIS_LEN) + if value is not None: + if is_scalar(value): + if value == "empty": + arr = None + dtype = np.float64 + + # remove the info axis + kwargs.pop(box._info_axis_name, None) + else: + arr = np.empty(shape, dtype=dtype) + arr.fill(value) + else: + fshape = np.prod(shape) + arr = value.ravel() + new_shape = fshape / arr.shape[0] + if fshape % arr.shape[0] != 0: + raise Exception("invalid value passed in construct") + + arr = np.repeat(arr, new_shape).reshape(shape) + else: + arr = np.random.randn(*shape) + return box(arr, dtype=dtype, **kwargs) + + +class Generic: + @pytest.mark.parametrize( + "func", + [ + str.lower, + {x: x.lower() for x in list("ABCD")}, + Series({x: x.lower() for x in list("ABCD")}), + ], + ) + def test_rename(self, frame_or_series, func): + + # single axis + idx = list("ABCD") + + for axis in frame_or_series._AXIS_ORDERS: + kwargs = {axis: idx} + obj = construct(4, **kwargs) + + # rename a single axis + result = obj.rename(**{axis: func}) + expected = obj.copy() + setattr(expected, axis, list("abcd")) + tm.assert_equal(result, expected) + + def test_get_numeric_data(self, frame_or_series): + + n = 4 + kwargs = { + frame_or_series._get_axis_name(i): list(range(n)) + for i in range(frame_or_series._AXIS_LEN) + } + + # get the numeric data + o = construct(n, **kwargs) + result = o._get_numeric_data() + tm.assert_equal(result, o) + + # non-inclusion + result = o._get_bool_data() + expected = construct(n, value="empty", **kwargs) + if isinstance(o, DataFrame): + # preserve columns dtype + expected.columns = o.columns[:0] + tm.assert_equal(result, expected) + + # get the bool data + arr = np.array([True, True, False, True]) + o = construct(n, value=arr, **kwargs) + result = o._get_numeric_data() + tm.assert_equal(result, o) + + def test_nonzero(self, frame_or_series): + + # GH 4633 + # look at the boolean/nonzero behavior for objects + obj = construct(frame_or_series, shape=4) + msg = f"The truth value of a {frame_or_series.__name__} is ambiguous" + with pytest.raises(ValueError, match=msg): + bool(obj == 0) + with pytest.raises(ValueError, match=msg): + bool(obj == 1) + with pytest.raises(ValueError, match=msg): + bool(obj) + + obj = construct(frame_or_series, shape=4, value=1) + with pytest.raises(ValueError, match=msg): + bool(obj == 0) + with pytest.raises(ValueError, match=msg): + bool(obj == 1) + with pytest.raises(ValueError, match=msg): + bool(obj) + + obj = construct(frame_or_series, shape=4, value=np.nan) + with pytest.raises(ValueError, match=msg): + bool(obj == 0) + with pytest.raises(ValueError, match=msg): + bool(obj == 1) + with pytest.raises(ValueError, match=msg): + bool(obj) + + # empty + obj = construct(frame_or_series, shape=0) + with pytest.raises(ValueError, match=msg): + bool(obj) + + # invalid behaviors + + obj1 = construct(frame_or_series, shape=4, value=1) + obj2 = construct(frame_or_series, shape=4, value=1) + + with pytest.raises(ValueError, match=msg): + if obj1: + pass + + with pytest.raises(ValueError, match=msg): + obj1 and obj2 + with pytest.raises(ValueError, match=msg): + obj1 or obj2 + with pytest.raises(ValueError, match=msg): + not obj1 + + def test_frame_or_series_compound_dtypes(self, frame_or_series): + # see gh-5191 + # Compound dtypes should raise NotImplementedError. + + def f(dtype): + return construct(frame_or_series, shape=3, value=1, dtype=dtype) + + msg = ( + "compound dtypes are not implemented " + f"in the {frame_or_series.__name__} frame_or_series" + ) + + with pytest.raises(NotImplementedError, match=msg): + f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")]) + + # these work (though results may be unexpected) + f("int64") + f("float64") + f("M8[ns]") + + def test_metadata_propagation(self, frame_or_series): + # check that the metadata matches up on the resulting ops + + o = construct(frame_or_series, shape=3) + o.name = "foo" + o2 = construct(frame_or_series, shape=3) + o2.name = "bar" + + # ---------- + # preserving + # ---------- + + # simple ops with scalars + for op in ["__add__", "__sub__", "__truediv__", "__mul__"]: + result = getattr(o, op)(1) + tm.assert_metadata_equivalent(o, result) + + # ops with like + for op in ["__add__", "__sub__", "__truediv__", "__mul__"]: + result = getattr(o, op)(o) + tm.assert_metadata_equivalent(o, result) + + # simple boolean + for op in ["__eq__", "__le__", "__ge__"]: + v1 = getattr(o, op)(o) + tm.assert_metadata_equivalent(o, v1) + tm.assert_metadata_equivalent(o, v1 & v1) + tm.assert_metadata_equivalent(o, v1 | v1) + + # combine_first + result = o.combine_first(o2) + tm.assert_metadata_equivalent(o, result) + + # --------------------------- + # non-preserving (by default) + # --------------------------- + + # add non-like + result = o + o2 + tm.assert_metadata_equivalent(result) + + # simple boolean + for op in ["__eq__", "__le__", "__ge__"]: + + # this is a name matching op + v1 = getattr(o, op)(o) + v2 = getattr(o, op)(o2) + tm.assert_metadata_equivalent(v2) + tm.assert_metadata_equivalent(v1 & v2) + tm.assert_metadata_equivalent(v1 | v2) + + def test_size_compat(self, frame_or_series): + # GH8846 + # size property should be defined + + o = construct(frame_or_series, shape=10) + assert o.size == np.prod(o.shape) + assert o.size == 10 ** len(o.axes) + + def test_split_compat(self, frame_or_series): + # xref GH8846 + o = construct(frame_or_series, shape=10) + assert len(np.array_split(o, 5)) == 5 + assert len(np.array_split(o, 2)) == 2 + + # See gh-12301 + def test_stat_unexpected_keyword(self, frame_or_series): + obj = construct(frame_or_series, 5) + starwars = "Star Wars" + errmsg = "unexpected keyword" + + with pytest.raises(TypeError, match=errmsg): + obj.max(epic=starwars) # stat_function + with pytest.raises(TypeError, match=errmsg): + obj.var(epic=starwars) # stat_function_ddof + with pytest.raises(TypeError, match=errmsg): + obj.sum(epic=starwars) # cum_function + with pytest.raises(TypeError, match=errmsg): + obj.any(epic=starwars) # logical_function + + @pytest.mark.parametrize("func", ["sum", "cumsum", "any", "var"]) + def test_api_compat(self, func, frame_or_series): + + # GH 12021 + # compat for __name__, __qualname__ + + obj = (frame_or_series, 5) + f = getattr(obj, func) + assert f.__name__ == func + assert f.__qualname__.endswith(func) + + def test_stat_non_defaults_args(self, frame_or_series): + obj = construct(frame_or_series, 5) + out = np.array([0]) + errmsg = "the 'out' parameter is not supported" + + with pytest.raises(ValueError, match=errmsg): + obj.max(out=out) # stat_function + with pytest.raises(ValueError, match=errmsg): + obj.var(out=out) # stat_function_ddof + with pytest.raises(ValueError, match=errmsg): + obj.sum(out=out) # cum_function + with pytest.raises(ValueError, match=errmsg): + obj.any(out=out) # logical_function + + def test_truncate_out_of_bounds(self, frame_or_series): + # GH11382 + + # small + shape = [2000] + ([1] * (frame_or_series._AXIS_LEN - 1)) + small = construct(frame_or_series, shape, dtype="int8", value=1) + tm.assert_equal(small.truncate(), small) + tm.assert_equal(small.truncate(before=0, after=3e3), small) + tm.assert_equal(small.truncate(before=-1, after=2e3), small) + + # big + shape = [2_000_000] + ([1] * (frame_or_series._AXIS_LEN - 1)) + big = construct(frame_or_series, shape, dtype="int8", value=1) + tm.assert_equal(big.truncate(), big) + tm.assert_equal(big.truncate(before=0, after=3e6), big) + tm.assert_equal(big.truncate(before=-1, after=2e6), big) + + @pytest.mark.parametrize( + "func", + [copy, deepcopy, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)], + ) + @pytest.mark.parametrize("shape", [0, 1, 2]) + def test_copy_and_deepcopy(self, frame_or_series, shape, func): + # GH 15444 + obj = construct(frame_or_series, shape) + obj_copy = func(obj) + assert obj_copy is not obj + tm.assert_equal(obj_copy, obj) + + +class TestNDFrame: + # tests that don't fit elsewhere + + def test_squeeze(self): + # noop + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: + tm.assert_series_equal(s.squeeze(), s) + for df in [tm.makeTimeDataFrame()]: + tm.assert_frame_equal(df.squeeze(), df) + + # squeezing + df = tm.makeTimeDataFrame().reindex(columns=["A"]) + tm.assert_series_equal(df.squeeze(), df["A"]) + + # don't fail with 0 length dimensions GH11229 & GH8999 + empty_series = Series([], name="five", dtype=np.float64) + empty_frame = DataFrame([empty_series]) + tm.assert_series_equal(empty_series, empty_series.squeeze()) + tm.assert_series_equal(empty_series, empty_frame.squeeze()) + + # axis argument + df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] + assert df.shape == (1, 1) + tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) + tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0]) + tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) + tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0]) + assert df.squeeze() == df.iloc[0, 0] + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.squeeze(axis=2) + msg = "No axis named x for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.squeeze(axis="x") + + df = tm.makeTimeDataFrame(3) + tm.assert_frame_equal(df.squeeze(axis=0), df) + + def test_numpy_squeeze(self): + s = tm.makeFloatSeries() + tm.assert_series_equal(np.squeeze(s), s) + + df = tm.makeTimeDataFrame().reindex(columns=["A"]) + tm.assert_series_equal(np.squeeze(df), df["A"]) + + def test_transpose(self): + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: + # calls implementation in pandas/core/base.py + tm.assert_series_equal(s.transpose(), s) + for df in [tm.makeTimeDataFrame()]: + tm.assert_frame_equal(df.transpose().transpose(), df) + + def test_numpy_transpose(self, frame_or_series): + + obj = tm.makeTimeDataFrame() + obj = tm.get_obj(obj, frame_or_series) + + if frame_or_series is Series: + # 1D -> np.transpose is no-op + tm.assert_series_equal(np.transpose(obj), obj) + + # round-trip preserved + tm.assert_equal(np.transpose(np.transpose(obj)), obj) + + msg = "the 'axes' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.transpose(obj, axes=1) + + def test_take(self): + indices = [1, 5, -2, 6, 3, -1] + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: + out = s.take(indices) + expected = Series( + data=s.values.take(indices), index=s.index.take(indices), dtype=s.dtype + ) + tm.assert_series_equal(out, expected) + for df in [tm.makeTimeDataFrame()]: + out = df.take(indices) + expected = DataFrame( + data=df.values.take(indices, axis=0), + index=df.index.take(indices), + columns=df.columns, + ) + tm.assert_frame_equal(out, expected) + + def test_take_invalid_kwargs(self, frame_or_series): + indices = [-3, 2, 0, 1] + + obj = tm.makeTimeDataFrame() + obj = tm.get_obj(obj, frame_or_series) + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + obj.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + obj.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + obj.take(indices, mode="clip") + + @pytest.mark.parametrize("is_copy", [True, False]) + def test_depr_take_kwarg_is_copy(self, is_copy, frame_or_series): + # GH 27357 + obj = DataFrame({"A": [1, 2, 3]}) + obj = tm.get_obj(obj, frame_or_series) + + msg = ( + "is_copy is deprecated and will be removed in a future version. " + "'take' always returns a copy, so there is no need to specify this." + ) + with tm.assert_produces_warning(FutureWarning) as w: + obj.take([0, 1], is_copy=is_copy) + + assert w[0].message.args[0] == msg + + def test_axis_classmethods(self, frame_or_series): + box = frame_or_series + obj = box(dtype=object) + values = box._AXIS_TO_AXIS_NUMBER.keys() + for v in values: + assert obj._get_axis_number(v) == box._get_axis_number(v) + assert obj._get_axis_name(v) == box._get_axis_name(v) + assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v) + + def test_axis_names_deprecated(self, frame_or_series): + # GH33637 + box = frame_or_series + obj = box(dtype=object) + msg = "_AXIS_NAMES has been deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + obj._AXIS_NAMES + + def test_axis_numbers_deprecated(self, frame_or_series): + # GH33637 + box = frame_or_series + obj = box(dtype=object) + msg = "_AXIS_NUMBERS has been deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + obj._AXIS_NUMBERS + + def test_flags_identity(self, frame_or_series): + obj = Series([1, 2]) + if frame_or_series is DataFrame: + obj = obj.to_frame() + + assert obj.flags is obj.flags + obj2 = obj.copy() + assert obj2.flags is not obj.flags + + def test_slice_shift_deprecated(self, frame_or_series): + # GH 37601 + obj = DataFrame({"A": [1, 2, 3, 4]}) + obj = tm.get_obj(obj, frame_or_series) + + with tm.assert_produces_warning(FutureWarning): + obj.slice_shift() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/test_label_or_level_utils.py b/.local/lib/python3.8/site-packages/pandas/tests/generic/test_label_or_level_utils.py new file mode 100644 index 0000000..d1c85d7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/generic/test_label_or_level_utils.py @@ -0,0 +1,349 @@ +import pytest + +from pandas.core.dtypes.missing import array_equivalent + +import pandas as pd + + +# Fixtures +# ======== +@pytest.fixture +def df(): + """DataFrame with columns 'L1', 'L2', and 'L3'""" + return pd.DataFrame({"L1": [1, 2, 3], "L2": [11, 12, 13], "L3": ["A", "B", "C"]}) + + +@pytest.fixture(params=[[], ["L1"], ["L1", "L2"], ["L1", "L2", "L3"]]) +def df_levels(request, df): + """DataFrame with columns or index levels 'L1', 'L2', and 'L3'""" + levels = request.param + + if levels: + df = df.set_index(levels) + + return df + + +@pytest.fixture +def df_ambig(df): + """DataFrame with levels 'L1' and 'L2' and labels 'L1' and 'L3'""" + df = df.set_index(["L1", "L2"]) + + df["L1"] = df["L3"] + + return df + + +@pytest.fixture +def df_duplabels(df): + """DataFrame with level 'L1' and labels 'L2', 'L3', and 'L2'""" + df = df.set_index(["L1"]) + df = pd.concat([df, df["L2"]], axis=1) + + return df + + +# Test is label/level reference +# ============================= +def get_labels_levels(df_levels): + expected_labels = list(df_levels.columns) + expected_levels = [name for name in df_levels.index.names if name is not None] + return expected_labels, expected_levels + + +def assert_label_reference(frame, labels, axis): + for label in labels: + assert frame._is_label_reference(label, axis=axis) + assert not frame._is_level_reference(label, axis=axis) + assert frame._is_label_or_level_reference(label, axis=axis) + + +def assert_level_reference(frame, levels, axis): + for level in levels: + assert frame._is_level_reference(level, axis=axis) + assert not frame._is_label_reference(level, axis=axis) + assert frame._is_label_or_level_reference(level, axis=axis) + + +# DataFrame +# --------- +def test_is_level_or_label_reference_df_simple(df_levels, axis): + + axis = df_levels._get_axis_number(axis) + # Compute expected labels and levels + expected_labels, expected_levels = get_labels_levels(df_levels) + + # Transpose frame if axis == 1 + if axis == 1: + df_levels = df_levels.T + + # Perform checks + assert_level_reference(df_levels, expected_levels, axis=axis) + assert_label_reference(df_levels, expected_labels, axis=axis) + + +def test_is_level_reference_df_ambig(df_ambig, axis): + + axis = df_ambig._get_axis_number(axis) + + # Transpose frame if axis == 1 + if axis == 1: + df_ambig = df_ambig.T + + # df has both an on-axis level and off-axis label named L1 + # Therefore L1 should reference the label, not the level + assert_label_reference(df_ambig, ["L1"], axis=axis) + + # df has an on-axis level named L2 and it is not ambiguous + # Therefore L2 is an level reference + assert_level_reference(df_ambig, ["L2"], axis=axis) + + # df has a column named L3 and it not an level reference + assert_label_reference(df_ambig, ["L3"], axis=axis) + + +# Series +# ------ +def test_is_level_reference_series_simple_axis0(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + assert_level_reference(s, ["L1"], axis=0) + assert not s._is_level_reference("L2") + + # Make series with L1 and L2 as index + s = df.set_index(["L1", "L2"]).L3 + assert_level_reference(s, ["L1", "L2"], axis=0) + assert not s._is_level_reference("L3") + + +def test_is_level_reference_series_axis1_error(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + + with pytest.raises(ValueError, match="No axis named 1"): + s._is_level_reference("L1", axis=1) + + +# Test _check_label_or_level_ambiguity_df +# ======================================= + +# DataFrame +# --------- +def test_check_label_or_level_ambiguity_df(df_ambig, axis): + + axis = df_ambig._get_axis_number(axis) + # Transpose frame if axis == 1 + if axis == 1: + df_ambig = df_ambig.T + msg = "'L1' is both a column level and an index label" + + else: + msg = "'L1' is both an index level and a column label" + # df_ambig has both an on-axis level and off-axis label named L1 + # Therefore, L1 is ambiguous. + with pytest.raises(ValueError, match=msg): + df_ambig._check_label_or_level_ambiguity("L1", axis=axis) + + # df_ambig has an on-axis level named L2,, and it is not ambiguous. + df_ambig._check_label_or_level_ambiguity("L2", axis=axis) + + # df_ambig has an off-axis label named L3, and it is not ambiguous + assert not df_ambig._check_label_or_level_ambiguity("L3", axis=axis) + + +# Series +# ------ +def test_check_label_or_level_ambiguity_series(df): + + # A series has no columns and therefore references are never ambiguous + + # Make series with L1 as index + s = df.set_index("L1").L2 + s._check_label_or_level_ambiguity("L1", axis=0) + s._check_label_or_level_ambiguity("L2", axis=0) + + # Make series with L1 and L2 as index + s = df.set_index(["L1", "L2"]).L3 + s._check_label_or_level_ambiguity("L1", axis=0) + s._check_label_or_level_ambiguity("L2", axis=0) + s._check_label_or_level_ambiguity("L3", axis=0) + + +def test_check_label_or_level_ambiguity_series_axis1_error(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + + with pytest.raises(ValueError, match="No axis named 1"): + s._check_label_or_level_ambiguity("L1", axis=1) + + +# Test _get_label_or_level_values +# =============================== +def assert_label_values(frame, labels, axis): + axis = frame._get_axis_number(axis) + for label in labels: + if axis == 0: + expected = frame[label]._values + else: + expected = frame.loc[label]._values + + result = frame._get_label_or_level_values(label, axis=axis) + assert array_equivalent(expected, result) + + +def assert_level_values(frame, levels, axis): + axis = frame._get_axis_number(axis) + for level in levels: + if axis == 0: + expected = frame.index.get_level_values(level=level)._values + else: + expected = frame.columns.get_level_values(level=level)._values + + result = frame._get_label_or_level_values(level, axis=axis) + assert array_equivalent(expected, result) + + +# DataFrame +# --------- +def test_get_label_or_level_values_df_simple(df_levels, axis): + + # Compute expected labels and levels + expected_labels, expected_levels = get_labels_levels(df_levels) + + axis = df_levels._get_axis_number(axis) + # Transpose frame if axis == 1 + if axis == 1: + df_levels = df_levels.T + + # Perform checks + assert_label_values(df_levels, expected_labels, axis=axis) + assert_level_values(df_levels, expected_levels, axis=axis) + + +def test_get_label_or_level_values_df_ambig(df_ambig, axis): + + axis = df_ambig._get_axis_number(axis) + # Transpose frame if axis == 1 + if axis == 1: + df_ambig = df_ambig.T + + # df has an on-axis level named L2, and it is not ambiguous. + assert_level_values(df_ambig, ["L2"], axis=axis) + + # df has an off-axis label named L3, and it is not ambiguous. + assert_label_values(df_ambig, ["L3"], axis=axis) + + +def test_get_label_or_level_values_df_duplabels(df_duplabels, axis): + + axis = df_duplabels._get_axis_number(axis) + # Transpose frame if axis == 1 + if axis == 1: + df_duplabels = df_duplabels.T + + # df has unambiguous level 'L1' + assert_level_values(df_duplabels, ["L1"], axis=axis) + + # df has unique label 'L3' + assert_label_values(df_duplabels, ["L3"], axis=axis) + + # df has duplicate labels 'L2' + if axis == 0: + expected_msg = "The column label 'L2' is not unique" + else: + expected_msg = "The index label 'L2' is not unique" + + with pytest.raises(ValueError, match=expected_msg): + assert_label_values(df_duplabels, ["L2"], axis=axis) + + +# Series +# ------ +def test_get_label_or_level_values_series_axis0(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + assert_level_values(s, ["L1"], axis=0) + + # Make series with L1 and L2 as index + s = df.set_index(["L1", "L2"]).L3 + assert_level_values(s, ["L1", "L2"], axis=0) + + +def test_get_label_or_level_values_series_axis1_error(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + + with pytest.raises(ValueError, match="No axis named 1"): + s._get_label_or_level_values("L1", axis=1) + + +# Test _drop_labels_or_levels +# =========================== +def assert_labels_dropped(frame, labels, axis): + axis = frame._get_axis_number(axis) + for label in labels: + df_dropped = frame._drop_labels_or_levels(label, axis=axis) + + if axis == 0: + assert label in frame.columns + assert label not in df_dropped.columns + else: + assert label in frame.index + assert label not in df_dropped.index + + +def assert_levels_dropped(frame, levels, axis): + axis = frame._get_axis_number(axis) + for level in levels: + df_dropped = frame._drop_labels_or_levels(level, axis=axis) + + if axis == 0: + assert level in frame.index.names + assert level not in df_dropped.index.names + else: + assert level in frame.columns.names + assert level not in df_dropped.columns.names + + +# DataFrame +# --------- +def test_drop_labels_or_levels_df(df_levels, axis): + + # Compute expected labels and levels + expected_labels, expected_levels = get_labels_levels(df_levels) + + axis = df_levels._get_axis_number(axis) + # Transpose frame if axis == 1 + if axis == 1: + df_levels = df_levels.T + + # Perform checks + assert_labels_dropped(df_levels, expected_labels, axis=axis) + assert_levels_dropped(df_levels, expected_levels, axis=axis) + + with pytest.raises(ValueError, match="not valid labels or levels"): + df_levels._drop_labels_or_levels("L4", axis=axis) + + +# Series +# ------ +def test_drop_labels_or_levels_series(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + assert_levels_dropped(s, ["L1"], axis=0) + + with pytest.raises(ValueError, match="not valid labels or levels"): + s._drop_labels_or_levels("L4", axis=0) + + # Make series with L1 and L2 as index + s = df.set_index(["L1", "L2"]).L3 + assert_levels_dropped(s, ["L1", "L2"], axis=0) + + with pytest.raises(ValueError, match="not valid labels or levels"): + s._drop_labels_or_levels("L4", axis=0) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/test_series.py b/.local/lib/python3.8/site-packages/pandas/tests/generic/test_series.py new file mode 100644 index 0000000..dd2380e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/generic/test_series.py @@ -0,0 +1,144 @@ +from operator import methodcaller + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + MultiIndex, + Series, + date_range, +) +import pandas._testing as tm + + +class TestSeries: + @pytest.mark.parametrize("func", ["rename_axis", "_set_axis_name"]) + def test_set_axis_name_mi(self, func): + ser = Series( + [11, 21, 31], + index=MultiIndex.from_tuples( + [("A", x) for x in ["a", "B", "c"]], names=["l1", "l2"] + ), + ) + + result = methodcaller(func, ["L1", "L2"])(ser) + assert ser.index.name is None + assert ser.index.names == ["l1", "l2"] + assert result.index.name is None + assert result.index.names, ["L1", "L2"] + + def test_set_axis_name_raises(self): + ser = Series([1]) + msg = "No axis named 1 for object type Series" + with pytest.raises(ValueError, match=msg): + ser._set_axis_name(name="a", axis=1) + + def test_get_bool_data_preserve_dtype(self): + ser = Series([True, False, True]) + result = ser._get_bool_data() + tm.assert_series_equal(result, ser) + + def test_nonzero_single_element(self): + + # allow single item via bool method + ser = Series([True]) + assert ser.bool() + + ser = Series([False]) + assert not ser.bool() + + @pytest.mark.parametrize("data", [np.nan, pd.NaT, True, False]) + def test_nonzero_single_element_raise_1(self, data): + # single item nan to raise + series = Series([data]) + + msg = "The truth value of a Series is ambiguous" + with pytest.raises(ValueError, match=msg): + bool(series) + + @pytest.mark.parametrize("data", [np.nan, pd.NaT]) + def test_nonzero_single_element_raise_2(self, data): + series = Series([data]) + + msg = "bool cannot act on a non-boolean single element Series" + with pytest.raises(ValueError, match=msg): + series.bool() + + @pytest.mark.parametrize("data", [(True, True), (False, False)]) + def test_nonzero_multiple_element_raise(self, data): + # multiple bool are still an error + series = Series([data]) + + msg = "The truth value of a Series is ambiguous" + with pytest.raises(ValueError, match=msg): + bool(series) + with pytest.raises(ValueError, match=msg): + series.bool() + + @pytest.mark.parametrize("data", [1, 0, "a", 0.0]) + def test_nonbool_single_element_raise(self, data): + # single non-bool are an error + series = Series([data]) + + msg = "The truth value of a Series is ambiguous" + with pytest.raises(ValueError, match=msg): + bool(series) + + msg = "bool cannot act on a non-boolean single element Series" + with pytest.raises(ValueError, match=msg): + series.bool() + + def test_metadata_propagation_indiv_resample(self): + # resample + ts = Series( + np.random.rand(1000), + index=date_range("20130101", periods=1000, freq="s"), + name="foo", + ) + result = ts.resample("1T").mean() + tm.assert_metadata_equivalent(ts, result) + + result = ts.resample("1T").min() + tm.assert_metadata_equivalent(ts, result) + + result = ts.resample("1T").apply(lambda x: x.sum()) + tm.assert_metadata_equivalent(ts, result) + + def test_metadata_propagation_indiv(self, monkeypatch): + # check that the metadata matches up on the resulting ops + + ser = Series(range(3), range(3)) + ser.name = "foo" + ser2 = Series(range(3), range(3)) + ser2.name = "bar" + + result = ser.T + tm.assert_metadata_equivalent(ser, result) + + def finalize(self, other, method=None, **kwargs): + for name in self._metadata: + if method == "concat" and name == "filename": + value = "+".join( + [ + getattr(obj, name) + for obj in other.objs + if getattr(obj, name, None) + ] + ) + object.__setattr__(self, name, value) + else: + object.__setattr__(self, name, getattr(other, name, None)) + + return self + + with monkeypatch.context() as m: + m.setattr(Series, "_metadata", ["name", "filename"]) + m.setattr(Series, "__finalize__", finalize) + + ser.filename = "foo" + ser2.filename = "bar" + + result = pd.concat([ser, ser2]) + assert result.filename == "foo+bar" + assert result.name is None diff --git a/.local/lib/python3.8/site-packages/pandas/tests/generic/test_to_xarray.py b/.local/lib/python3.8/site-packages/pandas/tests/generic/test_to_xarray.py new file mode 100644 index 0000000..1fbd82f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/generic/test_to_xarray.py @@ -0,0 +1,128 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + Categorical, + DataFrame, + MultiIndex, + Series, + date_range, +) +import pandas._testing as tm + + +@td.skip_if_no("xarray") +class TestDataFrameToXArray: + @pytest.fixture + def df(self): + return DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": Categorical(list("abc")), + "g": date_range("20130101", periods=3), + "h": date_range("20130101", periods=3, tz="US/Eastern"), + } + ) + + def test_to_xarray_index_types(self, index_flat, df): + index = index_flat + # MultiIndex is tested in test_to_xarray_with_multiindex + if len(index) == 0: + pytest.skip("Test doesn't make sense for empty index") + + from xarray import Dataset + + df.index = index[:3] + df.index.name = "foo" + df.columns.name = "bar" + result = df.to_xarray() + assert result.dims["foo"] == 3 + assert len(result.coords) == 1 + assert len(result.data_vars) == 8 + tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) + assert isinstance(result, Dataset) + + # idempotency + # datetimes w/tz are preserved + # column names are lost + expected = df.copy() + expected["f"] = expected["f"].astype(object) + expected.columns.name = None + tm.assert_frame_equal(result.to_dataframe(), expected) + + def test_to_xarray_empty(self, df): + from xarray import Dataset + + df.index.name = "foo" + result = df[0:0].to_xarray() + assert result.dims["foo"] == 0 + assert isinstance(result, Dataset) + + def test_to_xarray_with_multiindex(self, df): + from xarray import Dataset + + # MultiIndex + df.index = MultiIndex.from_product([["a"], range(3)], names=["one", "two"]) + result = df.to_xarray() + assert result.dims["one"] == 1 + assert result.dims["two"] == 3 + assert len(result.coords) == 2 + assert len(result.data_vars) == 8 + tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"]) + assert isinstance(result, Dataset) + + result = result.to_dataframe() + expected = df.copy() + expected["f"] = expected["f"].astype(object) + expected.columns.name = None + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no("xarray") +class TestSeriesToXArray: + def test_to_xarray_index_types(self, index_flat): + index = index_flat + # MultiIndex is tested in test_to_xarray_with_multiindex + + from xarray import DataArray + + ser = Series(range(len(index)), index=index, dtype="int64") + ser.index.name = "foo" + result = ser.to_xarray() + repr(result) + assert len(result) == len(index) + assert len(result.coords) == 1 + tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) + assert isinstance(result, DataArray) + + # idempotency + tm.assert_series_equal(result.to_series(), ser) + + def test_to_xarray_empty(self): + from xarray import DataArray + + ser = Series([], dtype=object) + ser.index.name = "foo" + result = ser.to_xarray() + assert len(result) == 0 + assert len(result.coords) == 1 + tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) + assert isinstance(result, DataArray) + + def test_to_xarray_with_multiindex(self): + from xarray import DataArray + + mi = MultiIndex.from_product([["a", "b"], range(3)], names=["one", "two"]) + ser = Series(range(6), dtype="int64", index=mi) + result = ser.to_xarray() + assert len(result) == 2 + tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"]) + assert isinstance(result, DataArray) + res = result.to_series() + tm.assert_series_equal(res, ser) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..94c878c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..4533d78 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_allowlist.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_allowlist.cpython-38.pyc new file mode 100644 index 0000000..0056070 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_allowlist.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_any_all.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_any_all.cpython-38.pyc new file mode 100644 index 0000000..c620baa Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_any_all.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_apply.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_apply.cpython-38.pyc new file mode 100644 index 0000000..265ad4d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_apply.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_apply_mutate.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_apply_mutate.cpython-38.pyc new file mode 100644 index 0000000..9a7bdee Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_apply_mutate.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_bin_groupby.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_bin_groupby.cpython-38.pyc new file mode 100644 index 0000000..1d23b0d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_bin_groupby.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_categorical.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_categorical.cpython-38.pyc new file mode 100644 index 0000000..5fb52a0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_categorical.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_counting.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_counting.cpython-38.pyc new file mode 100644 index 0000000..e9759cf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_counting.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_filters.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_filters.cpython-38.pyc new file mode 100644 index 0000000..29f82ae Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_filters.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_frame_value_counts.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_frame_value_counts.cpython-38.pyc new file mode 100644 index 0000000..a7adc10 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_frame_value_counts.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_function.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_function.cpython-38.pyc new file mode 100644 index 0000000..842eeab Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_function.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_groupby.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_groupby.cpython-38.pyc new file mode 100644 index 0000000..bd5b624 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_groupby.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_groupby_dropna.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_groupby_dropna.cpython-38.pyc new file mode 100644 index 0000000..5582243 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_groupby_dropna.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_groupby_shift_diff.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_groupby_shift_diff.cpython-38.pyc new file mode 100644 index 0000000..61bde89 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_groupby_shift_diff.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_groupby_subclass.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_groupby_subclass.cpython-38.pyc new file mode 100644 index 0000000..c851982 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_groupby_subclass.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_grouping.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_grouping.cpython-38.pyc new file mode 100644 index 0000000..713b0af Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_grouping.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_index_as_string.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_index_as_string.cpython-38.pyc new file mode 100644 index 0000000..5d67975 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_index_as_string.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..968faa6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_libgroupby.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_libgroupby.cpython-38.pyc new file mode 100644 index 0000000..a2240c4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_libgroupby.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_min_max.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_min_max.cpython-38.pyc new file mode 100644 index 0000000..8f3f0fc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_min_max.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_missing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_missing.cpython-38.pyc new file mode 100644 index 0000000..046355a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_missing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_nth.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_nth.cpython-38.pyc new file mode 100644 index 0000000..cc26b80 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_nth.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_numba.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_numba.cpython-38.pyc new file mode 100644 index 0000000..ce9feac Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_numba.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_nunique.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_nunique.cpython-38.pyc new file mode 100644 index 0000000..c42bd9f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_nunique.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_pipe.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_pipe.cpython-38.pyc new file mode 100644 index 0000000..9c91b42 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_pipe.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_quantile.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_quantile.cpython-38.pyc new file mode 100644 index 0000000..71b39aa Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_quantile.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_rank.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_rank.cpython-38.pyc new file mode 100644 index 0000000..7c7fbe8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_rank.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_sample.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_sample.cpython-38.pyc new file mode 100644 index 0000000..e616bb3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_sample.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_size.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_size.cpython-38.pyc new file mode 100644 index 0000000..f9d19e6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_size.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_timegrouper.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_timegrouper.cpython-38.pyc new file mode 100644 index 0000000..1275e31 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_timegrouper.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_value_counts.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_value_counts.cpython-38.pyc new file mode 100644 index 0000000..75919c5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/__pycache__/test_value_counts.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..2720636 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/test_aggregate.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/test_aggregate.cpython-38.pyc new file mode 100644 index 0000000..0ed8fa4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/test_aggregate.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/test_cython.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/test_cython.cpython-38.pyc new file mode 100644 index 0000000..5c3aa66 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/test_cython.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/test_numba.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/test_numba.cpython-38.pyc new file mode 100644 index 0000000..a93b313 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/test_numba.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/test_other.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/test_other.cpython-38.pyc new file mode 100644 index 0000000..166010a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/__pycache__/test_other.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/test_aggregate.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/test_aggregate.py new file mode 100644 index 0000000..2ab5534 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/test_aggregate.py @@ -0,0 +1,1395 @@ +""" +test .agg behavior / note that .apply is tested generally in test_groupby.py +""" +import datetime +import functools +from functools import partial +import re + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_integer_dtype + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + concat, + to_datetime, +) +import pandas._testing as tm +from pandas.core.base import SpecificationError +from pandas.core.groupby.grouper import Grouping + + +def test_groupby_agg_no_extra_calls(): + # GH#31760 + df = DataFrame({"key": ["a", "b", "c", "c"], "value": [1, 2, 3, 4]}) + gb = df.groupby("key")["value"] + + def dummy_func(x): + assert len(x) != 0 + return x.sum() + + gb.agg(dummy_func) + + +def test_agg_regression1(tsframe): + grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +def test_agg_must_agg(df): + grouped = df.groupby("A")["C"] + + msg = "Must produce aggregated value" + with pytest.raises(Exception, match=msg): + grouped.agg(lambda x: x.describe()) + with pytest.raises(Exception, match=msg): + grouped.agg(lambda x: x.index[:2]) + + +def test_agg_ser_multi_key(df): + + f = lambda x: x.sum() + results = df.C.groupby([df.A, df.B]).aggregate(f) + expected = df.groupby(["A", "B"]).sum()["C"] + tm.assert_series_equal(results, expected) + + +def test_groupby_aggregation_mixed_dtype(): + # GH 6212 + expected = DataFrame( + { + "v1": [5, 5, 7, np.nan, 3, 3, 4, 1], + "v2": [55, 55, 77, np.nan, 33, 33, 44, 11], + }, + index=MultiIndex.from_tuples( + [ + (1, 95), + (1, 99), + (2, 95), + (2, 99), + ("big", "damp"), + ("blue", "dry"), + ("red", "red"), + ("red", "wet"), + ], + names=["by1", "by2"], + ), + ) + + df = DataFrame( + { + "v1": [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9], + "v2": [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99], + "by1": ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12], + "by2": [ + "wet", + "dry", + 99, + 95, + np.nan, + "damp", + 95, + 99, + "red", + 99, + np.nan, + np.nan, + ], + } + ) + + g = df.groupby(["by1", "by2"]) + result = g[["v1", "v2"]].mean() + tm.assert_frame_equal(result, expected) + + +def test_groupby_aggregation_multi_level_column(): + # GH 29772 + lst = [ + [True, True, True, False], + [True, False, np.nan, False], + [True, True, np.nan, False], + [True, True, np.nan, False], + ] + df = DataFrame( + data=lst, + columns=MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0), ("B", 1)]), + ) + + gb = df.groupby(level=1, axis=1) + result = gb.sum(numeric_only=False) + expected = DataFrame({0: [2.0, True, True, True], 1: [1, 0, 1, 1]}) + + tm.assert_frame_equal(result, expected) + + +def test_agg_apply_corner(ts, tsframe): + # nothing to group, all NA + grouped = ts.groupby(ts * np.nan) + assert ts.dtype == np.float64 + + # groupby float64 values results in Float64Index + exp = Series([], dtype=np.float64, index=Index([], dtype=np.float64)) + tm.assert_series_equal(grouped.sum(), exp) + tm.assert_series_equal(grouped.agg(np.sum), exp) + tm.assert_series_equal(grouped.apply(np.sum), exp, check_index_type=False) + + # DataFrame + grouped = tsframe.groupby(tsframe["A"] * np.nan) + exp_df = DataFrame( + columns=tsframe.columns, + dtype=float, + index=Index([], name="A", dtype=np.float64), + ) + tm.assert_frame_equal(grouped.sum(), exp_df) + tm.assert_frame_equal(grouped.agg(np.sum), exp_df) + tm.assert_frame_equal(grouped.apply(np.sum), exp_df) + + +def test_agg_grouping_is_list_tuple(ts): + df = tm.makeTimeDataFrame() + + grouped = df.groupby(lambda x: x.year) + grouper = grouped.grouper.groupings[0].grouping_vector + grouped.grouper.groupings[0] = Grouping(ts.index, list(grouper)) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + grouped.grouper.groupings[0] = Grouping(ts.index, tuple(grouper)) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +def test_agg_python_multiindex(mframe): + grouped = mframe.groupby(["A", "B"]) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "groupbyfunc", [lambda x: x.weekday(), [lambda x: x.month, lambda x: x.weekday()]] +) +def test_aggregate_str_func(tsframe, groupbyfunc): + grouped = tsframe.groupby(groupbyfunc) + + # single series + result = grouped["A"].agg("std") + expected = grouped["A"].std() + tm.assert_series_equal(result, expected) + + # group frame by function name + result = grouped.aggregate("var") + expected = grouped.var() + tm.assert_frame_equal(result, expected) + + # group frame by function dict + result = grouped.agg({"A": "var", "B": "std", "C": "mean", "D": "sem"}) + expected = DataFrame( + { + "A": grouped["A"].var(), + "B": grouped["B"].std(), + "C": grouped["C"].mean(), + "D": grouped["D"].sem(), + } + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_str_with_kwarg_axis_1_raises(df, reduction_func): + gb = df.groupby(level=0) + if reduction_func in ("idxmax", "idxmin"): + error = TypeError + msg = "reduction operation '.*' not allowed for this dtype" + else: + error = ValueError + msg = f"Operation {reduction_func} does not support axis=1" + with pytest.raises(error, match=msg): + gb.agg(reduction_func, axis=1) + + +@pytest.mark.parametrize( + "func, expected, dtype, result_dtype_dict", + [ + ("sum", [5, 7, 9], "int64", {}), + ("std", [4.5 ** 0.5] * 3, int, {"i": float, "j": float, "k": float}), + ("var", [4.5] * 3, int, {"i": float, "j": float, "k": float}), + ("sum", [5, 7, 9], "Int64", {"j": "int64"}), + ("std", [4.5 ** 0.5] * 3, "Int64", {"i": float, "j": float, "k": float}), + ("var", [4.5] * 3, "Int64", {"i": "float64", "j": "float64", "k": "float64"}), + ], +) +def test_multiindex_groupby_mixed_cols_axis1(func, expected, dtype, result_dtype_dict): + # GH#43209 + df = DataFrame( + [[1, 2, 3, 4, 5, 6]] * 3, + columns=MultiIndex.from_product([["a", "b"], ["i", "j", "k"]]), + ).astype({("a", "j"): dtype, ("b", "j"): dtype}) + result = df.groupby(level=1, axis=1).agg(func) + expected = DataFrame([expected] * 3, columns=["i", "j", "k"]).astype( + result_dtype_dict + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "func, expected_data, result_dtype_dict", + [ + ("sum", [[2, 4], [10, 12], [18, 20]], {10: "int64", 20: "int64"}), + # std should ideally return Int64 / Float64 #43330 + ("std", [[2 ** 0.5] * 2] * 3, "float64"), + ("var", [[2] * 2] * 3, {10: "float64", 20: "float64"}), + ], +) +def test_groupby_mixed_cols_axis1(func, expected_data, result_dtype_dict): + # GH#43209 + df = DataFrame( + np.arange(12).reshape(3, 4), + index=Index([0, 1, 0], name="y"), + columns=Index([10, 20, 10, 20], name="x"), + dtype="int64", + ).astype({10: "Int64"}) + result = df.groupby("x", axis=1).agg(func) + expected = DataFrame( + data=expected_data, + index=Index([0, 1, 0], name="y"), + columns=Index([10, 20], name="x"), + ).astype(result_dtype_dict) + tm.assert_frame_equal(result, expected) + + +def test_aggregate_item_by_item(df): + grouped = df.groupby("A") + + aggfun = lambda ser: ser.size + result = grouped.agg(aggfun) + foo = (df.A == "foo").sum() + bar = (df.A == "bar").sum() + K = len(result.columns) + + # GH5782 + exp = Series(np.array([foo] * K), index=list("BCD"), name="foo") + tm.assert_series_equal(result.xs("foo"), exp) + + exp = Series(np.array([bar] * K), index=list("BCD"), name="bar") + tm.assert_almost_equal(result.xs("bar"), exp) + + def aggfun(ser): + return ser.size + + result = DataFrame().groupby(df.A).agg(aggfun) + assert isinstance(result, DataFrame) + assert len(result) == 0 + + +def test_wrap_agg_out(three_group): + grouped = three_group.groupby(["A", "B"]) + + def func(ser): + if ser.dtype == object: + raise TypeError + else: + return ser.sum() + + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + result = grouped.aggregate(func) + exp_grouped = three_group.loc[:, three_group.columns != "C"] + expected = exp_grouped.groupby(["A", "B"]).aggregate(func) + tm.assert_frame_equal(result, expected) + + +def test_agg_multiple_functions_maintain_order(df): + # GH #610 + funcs = [("mean", np.mean), ("max", np.max), ("min", np.min)] + result = df.groupby("A")["C"].agg(funcs) + exp_cols = Index(["mean", "max", "min"]) + + tm.assert_index_equal(result.columns, exp_cols) + + +def test_agg_multiple_functions_same_name(): + # GH 30880 + df = DataFrame( + np.random.randn(1000, 3), + index=pd.date_range("1/1/2012", freq="S", periods=1000), + columns=["A", "B", "C"], + ) + result = df.resample("3T").agg( + {"A": [partial(np.quantile, q=0.9999), partial(np.quantile, q=0.1111)]} + ) + expected_index = pd.date_range("1/1/2012", freq="3T", periods=6) + expected_columns = MultiIndex.from_tuples([("A", "quantile"), ("A", "quantile")]) + expected_values = np.array( + [df.resample("3T").A.quantile(q=q).values for q in [0.9999, 0.1111]] + ).T + expected = DataFrame( + expected_values, columns=expected_columns, index=expected_index + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_multiple_functions_same_name_with_ohlc_present(): + # GH 30880 + # ohlc expands dimensions, so different test to the above is required. + df = DataFrame( + np.random.randn(1000, 3), + index=pd.date_range("1/1/2012", freq="S", periods=1000, name="dti"), + columns=Index(["A", "B", "C"], name="alpha"), + ) + result = df.resample("3T").agg( + {"A": ["ohlc", partial(np.quantile, q=0.9999), partial(np.quantile, q=0.1111)]} + ) + expected_index = pd.date_range("1/1/2012", freq="3T", periods=6, name="dti") + expected_columns = MultiIndex.from_tuples( + [ + ("A", "ohlc", "open"), + ("A", "ohlc", "high"), + ("A", "ohlc", "low"), + ("A", "ohlc", "close"), + ("A", "quantile", "A"), + ("A", "quantile", "A"), + ], + names=["alpha", None, None], + ) + non_ohlc_expected_values = np.array( + [df.resample("3T").A.quantile(q=q).values for q in [0.9999, 0.1111]] + ).T + expected_values = np.hstack([df.resample("3T").A.ohlc(), non_ohlc_expected_values]) + expected = DataFrame( + expected_values, columns=expected_columns, index=expected_index + ) + tm.assert_frame_equal(result, expected) + + +def test_multiple_functions_tuples_and_non_tuples(df): + # #1359 + funcs = [("foo", "mean"), "std"] + ex_funcs = [("foo", "mean"), ("std", "std")] + + result = df.groupby("A")["C"].agg(funcs) + expected = df.groupby("A")["C"].agg(ex_funcs) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match=r"\['B'\] did not aggregate successfully" + ): + result = df.groupby("A").agg(funcs) + with tm.assert_produces_warning( + FutureWarning, match=r"\['B'\] did not aggregate successfully" + ): + expected = df.groupby("A").agg(ex_funcs) + tm.assert_frame_equal(result, expected) + + +def test_more_flexible_frame_multi_function(df): + grouped = df.groupby("A") + + exmean = grouped.agg({"C": np.mean, "D": np.mean}) + exstd = grouped.agg({"C": np.std, "D": np.std}) + + expected = concat([exmean, exstd], keys=["mean", "std"], axis=1) + expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1) + + d = {"C": [np.mean, np.std], "D": [np.mean, np.std]} + result = grouped.aggregate(d) + + tm.assert_frame_equal(result, expected) + + # be careful + result = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]}) + expected = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]}) + tm.assert_frame_equal(result, expected) + + def foo(x): + return np.mean(x) + + def bar(x): + return np.std(x, ddof=1) + + # this uses column selection & renaming + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + d = {"C": np.mean, "D": {"foo": np.mean, "bar": np.std}} + grouped.aggregate(d) + + # But without renaming, these functions are OK + d = {"C": [np.mean], "D": [foo, bar]} + grouped.aggregate(d) + + +def test_multi_function_flexible_mix(df): + # GH #1268 + grouped = df.groupby("A") + + # Expected + d = {"C": {"foo": "mean", "bar": "std"}, "D": {"sum": "sum"}} + # this uses column selection & renaming + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + grouped.aggregate(d) + + # Test 1 + d = {"C": {"foo": "mean", "bar": "std"}, "D": "sum"} + # this uses column selection & renaming + with pytest.raises(SpecificationError, match=msg): + grouped.aggregate(d) + + # Test 2 + d = {"C": {"foo": "mean", "bar": "std"}, "D": "sum"} + # this uses column selection & renaming + with pytest.raises(SpecificationError, match=msg): + grouped.aggregate(d) + + +def test_groupby_agg_coercing_bools(): + # issue 14873 + dat = DataFrame({"a": [1, 1, 2, 2], "b": [0, 1, 2, 3], "c": [None, None, 1, 1]}) + gp = dat.groupby("a") + + index = Index([1, 2], name="a") + + result = gp["b"].aggregate(lambda x: (x != 0).all()) + expected = Series([False, True], index=index, name="b") + tm.assert_series_equal(result, expected) + + result = gp["c"].aggregate(lambda x: x.isnull().all()) + expected = Series([True, False], index=index, name="c") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "op", + [ + lambda x: x.sum(), + lambda x: x.cumsum(), + lambda x: x.transform("sum"), + lambda x: x.transform("cumsum"), + lambda x: x.agg("sum"), + lambda x: x.agg("cumsum"), + ], +) +def test_bool_agg_dtype(op): + # GH 7001 + # Bool sum aggregations result in int + df = DataFrame({"a": [1, 1], "b": [False, True]}) + s = df.set_index("a")["b"] + + result = op(df.groupby("a"))["b"].dtype + assert is_integer_dtype(result) + + result = op(s.groupby("a")).dtype + assert is_integer_dtype(result) + + +@pytest.mark.parametrize( + "keys, agg_index", + [ + (["a"], Index([1], name="a")), + (["a", "b"], MultiIndex([[1], [2]], [[0], [0]], names=["a", "b"])), + ], +) +@pytest.mark.parametrize( + "input_dtype", ["bool", "int32", "int64", "float32", "float64"] +) +@pytest.mark.parametrize( + "result_dtype", ["bool", "int32", "int64", "float32", "float64"] +) +@pytest.mark.parametrize("method", ["apply", "aggregate", "transform"]) +def test_callable_result_dtype_frame( + keys, agg_index, input_dtype, result_dtype, method +): + # GH 21240 + df = DataFrame({"a": [1], "b": [2], "c": [True]}) + df["c"] = df["c"].astype(input_dtype) + op = getattr(df.groupby(keys)[["c"]], method) + result = op(lambda x: x.astype(result_dtype).iloc[0]) + expected_index = pd.RangeIndex(0, 1) if method == "transform" else agg_index + expected = DataFrame({"c": [df["c"].iloc[0]]}, index=expected_index).astype( + result_dtype + ) + if method == "apply": + expected.columns.names = [0] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "keys, agg_index", + [ + (["a"], Index([1], name="a")), + (["a", "b"], MultiIndex([[1], [2]], [[0], [0]], names=["a", "b"])), + ], +) +@pytest.mark.parametrize("input", [True, 1, 1.0]) +@pytest.mark.parametrize("dtype", [bool, int, float]) +@pytest.mark.parametrize("method", ["apply", "aggregate", "transform"]) +def test_callable_result_dtype_series(keys, agg_index, input, dtype, method): + # GH 21240 + df = DataFrame({"a": [1], "b": [2], "c": [input]}) + op = getattr(df.groupby(keys)["c"], method) + result = op(lambda x: x.astype(dtype).iloc[0]) + expected_index = pd.RangeIndex(0, 1) if method == "transform" else agg_index + expected = Series([df["c"].iloc[0]], index=expected_index, name="c").astype(dtype) + tm.assert_series_equal(result, expected) + + +def test_order_aggregate_multiple_funcs(): + # GH 25692 + df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]}) + + res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"]) + result = res.columns.levels[1] + + expected = Index(["sum", "max", "mean", "ohlc", "min"]) + + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("dtype", [np.int64, np.uint64]) +@pytest.mark.parametrize("how", ["first", "last", "min", "max", "mean", "median"]) +def test_uint64_type_handling(dtype, how): + # GH 26310 + df = DataFrame({"x": 6903052872240755750, "y": [1, 2]}) + expected = df.groupby("y").agg({"x": how}) + df.x = df.x.astype(dtype) + result = df.groupby("y").agg({"x": how}) + if how not in ("mean", "median"): + # mean and median always result in floats + result.x = result.x.astype(np.int64) + tm.assert_frame_equal(result, expected, check_exact=True) + + +def test_func_duplicates_raises(): + # GH28426 + msg = "Function names" + df = DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) + with pytest.raises(SpecificationError, match=msg): + df.groupby("A").agg(["min", "min"]) + + +@pytest.mark.parametrize( + "index", + [ + pd.CategoricalIndex(list("abc")), + pd.interval_range(0, 3), + pd.period_range("2020", periods=3, freq="D"), + MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]), + ], +) +def test_agg_index_has_complex_internals(index): + # GH 31223 + df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index) + result = df.groupby("group").agg({"value": Series.nunique}) + expected = DataFrame({"group": [1, 2], "value": [2, 1]}).set_index("group") + tm.assert_frame_equal(result, expected) + + +def test_agg_split_block(): + # https://github.com/pandas-dev/pandas/issues/31522 + df = DataFrame( + { + "key1": ["a", "a", "b", "b", "a"], + "key2": ["one", "two", "one", "two", "one"], + "key3": ["three", "three", "three", "six", "six"], + } + ) + result = df.groupby("key1").min() + expected = DataFrame( + {"key2": ["one", "one"], "key3": ["six", "six"]}, + index=Index(["a", "b"], name="key1"), + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_split_object_part_datetime(): + # https://github.com/pandas-dev/pandas/pull/31616 + df = DataFrame( + { + "A": pd.date_range("2000", periods=4), + "B": ["a", "b", "c", "d"], + "C": [1, 2, 3, 4], + "D": ["b", "c", "d", "e"], + "E": pd.date_range("2000", periods=4), + "F": [1, 2, 3, 4], + } + ).astype(object) + result = df.groupby([0, 0, 0, 0]).min() + expected = DataFrame( + { + "A": [pd.Timestamp("2000")], + "B": ["a"], + "C": [1], + "D": ["b"], + "E": [pd.Timestamp("2000")], + "F": [1], + } + ) + tm.assert_frame_equal(result, expected) + + +class TestNamedAggregationSeries: + def test_series_named_agg(self): + df = Series([1, 2, 3, 4]) + gr = df.groupby([0, 0, 1, 1]) + result = gr.agg(a="sum", b="min") + expected = DataFrame( + {"a": [3, 7], "b": [1, 3]}, columns=["a", "b"], index=[0, 1] + ) + tm.assert_frame_equal(result, expected) + + result = gr.agg(b="min", a="sum") + expected = expected[["b", "a"]] + tm.assert_frame_equal(result, expected) + + def test_no_args_raises(self): + gr = Series([1, 2]).groupby([0, 1]) + with pytest.raises(TypeError, match="Must provide"): + gr.agg() + + # but we do allow this + result = gr.agg([]) + expected = DataFrame() + tm.assert_frame_equal(result, expected) + + def test_series_named_agg_duplicates_no_raises(self): + # GH28426 + gr = Series([1, 2, 3]).groupby([0, 0, 1]) + grouped = gr.agg(a="sum", b="sum") + expected = DataFrame({"a": [3, 3], "b": [3, 3]}) + tm.assert_frame_equal(expected, grouped) + + def test_mangled(self): + gr = Series([1, 2, 3]).groupby([0, 0, 1]) + result = gr.agg(a=lambda x: 0, b=lambda x: 1) + expected = DataFrame({"a": [0, 0], "b": [1, 1]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "inp", + [ + pd.NamedAgg(column="anything", aggfunc="min"), + ("anything", "min"), + ["anything", "min"], + ], + ) + def test_named_agg_nametuple(self, inp): + # GH34422 + s = Series([1, 1, 2, 2, 3, 3, 4, 5]) + msg = f"func is expected but received {type(inp).__name__}" + with pytest.raises(TypeError, match=msg): + s.groupby(s.values).agg(a=inp) + + +class TestNamedAggregationDataFrame: + def test_agg_relabel(self): + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + result = df.groupby("group").agg(a_max=("A", "max"), b_max=("B", "max")) + expected = DataFrame( + {"a_max": [1, 3], "b_max": [6, 8]}, + index=Index(["a", "b"], name="group"), + columns=["a_max", "b_max"], + ) + tm.assert_frame_equal(result, expected) + + # order invariance + p98 = functools.partial(np.percentile, q=98) + result = df.groupby("group").agg( + b_min=("B", "min"), + a_min=("A", min), + a_mean=("A", np.mean), + a_max=("A", "max"), + b_max=("B", "max"), + a_98=("A", p98), + ) + expected = DataFrame( + { + "b_min": [5, 7], + "a_min": [0, 2], + "a_mean": [0.5, 2.5], + "a_max": [1, 3], + "b_max": [6, 8], + "a_98": [0.98, 2.98], + }, + index=Index(["a", "b"], name="group"), + columns=["b_min", "a_min", "a_mean", "a_max", "b_max", "a_98"], + ) + tm.assert_frame_equal(result, expected) + + def test_agg_relabel_non_identifier(self): + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + + result = df.groupby("group").agg(**{"my col": ("A", "max")}) + expected = DataFrame({"my col": [1, 3]}, index=Index(["a", "b"], name="group")) + tm.assert_frame_equal(result, expected) + + def test_duplicate_no_raises(self): + # GH 28426, if use same input function on same column, + # no error should raise + df = DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) + + grouped = df.groupby("A").agg(a=("B", "min"), b=("B", "min")) + expected = DataFrame({"a": [1, 3], "b": [1, 3]}, index=Index([0, 1], name="A")) + tm.assert_frame_equal(grouped, expected) + + quant50 = functools.partial(np.percentile, q=50) + quant70 = functools.partial(np.percentile, q=70) + quant50.__name__ = "quant50" + quant70.__name__ = "quant70" + + test = DataFrame({"col1": ["a", "a", "b", "b", "b"], "col2": [1, 2, 3, 4, 5]}) + + grouped = test.groupby("col1").agg( + quantile_50=("col2", quant50), quantile_70=("col2", quant70) + ) + expected = DataFrame( + {"quantile_50": [1.5, 4.0], "quantile_70": [1.7, 4.4]}, + index=Index(["a", "b"], name="col1"), + ) + tm.assert_frame_equal(grouped, expected) + + def test_agg_relabel_with_level(self): + df = DataFrame( + {"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}, + index=MultiIndex.from_product([["A", "B"], ["a", "b"]]), + ) + result = df.groupby(level=0).agg( + aa=("A", "max"), bb=("A", "min"), cc=("B", "mean") + ) + expected = DataFrame( + {"aa": [0, 1], "bb": [0, 1], "cc": [1.5, 3.5]}, index=["A", "B"] + ) + tm.assert_frame_equal(result, expected) + + def test_agg_relabel_other_raises(self): + df = DataFrame({"A": [0, 0, 1], "B": [1, 2, 3]}) + grouped = df.groupby("A") + match = "Must provide" + with pytest.raises(TypeError, match=match): + grouped.agg(foo=1) + + with pytest.raises(TypeError, match=match): + grouped.agg() + + with pytest.raises(TypeError, match=match): + grouped.agg(a=("B", "max"), b=(1, 2, 3)) + + def test_missing_raises(self): + df = DataFrame({"A": [0, 1], "B": [1, 2]}) + match = re.escape("Column(s) ['C'] do not exist") + with pytest.raises(KeyError, match=match): + df.groupby("A").agg(c=("C", "sum")) + + def test_agg_namedtuple(self): + df = DataFrame({"A": [0, 1], "B": [1, 2]}) + result = df.groupby("A").agg( + b=pd.NamedAgg("B", "sum"), c=pd.NamedAgg(column="B", aggfunc="count") + ) + expected = df.groupby("A").agg(b=("B", "sum"), c=("B", "count")) + tm.assert_frame_equal(result, expected) + + def test_mangled(self): + df = DataFrame({"A": [0, 1], "B": [1, 2], "C": [3, 4]}) + result = df.groupby("A").agg(b=("B", lambda x: 0), c=("C", lambda x: 1)) + expected = DataFrame({"b": [0, 0], "c": [1, 1]}, index=Index([0, 1], name="A")) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "agg_col1, agg_col2, agg_col3, agg_result1, agg_result2, agg_result3", + [ + ( + (("y", "A"), "max"), + (("y", "A"), np.min), + (("y", "B"), "mean"), + [1, 3], + [0, 2], + [5.5, 7.5], + ), + ( + (("y", "A"), lambda x: max(x)), + (("y", "A"), lambda x: 1), + (("y", "B"), "mean"), + [1, 3], + [1, 1], + [5.5, 7.5], + ), + ( + pd.NamedAgg(("y", "A"), "max"), + pd.NamedAgg(("y", "B"), np.mean), + pd.NamedAgg(("y", "A"), lambda x: 1), + [1, 3], + [5.5, 7.5], + [1, 1], + ), + ], +) +def test_agg_relabel_multiindex_column( + agg_col1, agg_col2, agg_col3, agg_result1, agg_result2, agg_result3 +): + # GH 29422, add tests for multiindex column cases + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + idx = Index(["a", "b"], name=("x", "group")) + + result = df.groupby(("x", "group")).agg(a_max=(("y", "A"), "max")) + expected = DataFrame({"a_max": [1, 3]}, index=idx) + tm.assert_frame_equal(result, expected) + + result = df.groupby(("x", "group")).agg( + col_1=agg_col1, col_2=agg_col2, col_3=agg_col3 + ) + expected = DataFrame( + {"col_1": agg_result1, "col_2": agg_result2, "col_3": agg_result3}, index=idx + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_relabel_multiindex_raises_not_exist(): + # GH 29422, add test for raises scenario when aggregate column does not exist + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + + with pytest.raises(KeyError, match="do not exist"): + df.groupby(("x", "group")).agg(a=(("Y", "a"), "max")) + + +def test_agg_relabel_multiindex_duplicates(): + # GH29422, add test for raises scenario when getting duplicates + # GH28426, after this change, duplicates should also work if the relabelling is + # different + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + + result = df.groupby(("x", "group")).agg( + a=(("y", "A"), "min"), b=(("y", "A"), "min") + ) + idx = Index(["a", "b"], name=("x", "group")) + expected = DataFrame({"a": [0, 2], "b": [0, 2]}, index=idx) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("kwargs", [{"c": ["min"]}, {"b": [], "c": ["min"]}]) +def test_groupby_aggregate_empty_key(kwargs): + # GH: 32580 + df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 3], "c": [1, 2, 4]}) + result = df.groupby("a").agg(kwargs) + expected = DataFrame( + [1, 4], + index=Index([1, 2], dtype="int64", name="a"), + columns=MultiIndex.from_tuples([["c", "min"]]), + ) + tm.assert_frame_equal(result, expected) + + +def test_groupby_aggregate_empty_key_empty_return(): + # GH: 32580 Check if everything works, when return is empty + df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 3], "c": [1, 2, 4]}) + result = df.groupby("a").agg({"b": []}) + expected = DataFrame(columns=MultiIndex(levels=[["b"], []], codes=[[], []])) + tm.assert_frame_equal(result, expected) + + +def test_groupby_aggregate_empty_with_multiindex_frame(): + # GH 39178 + df = DataFrame(columns=["a", "b", "c"]) + result = df.groupby(["a", "b"]).agg(d=("c", list)) + expected = DataFrame( + columns=["d"], index=MultiIndex([[], []], [[], []], names=["a", "b"]) + ) + tm.assert_frame_equal(result, expected) + + +def test_grouby_agg_loses_results_with_as_index_false_relabel(): + # GH 32240: When the aggregate function relabels column names and + # as_index=False is specified, the results are dropped. + + df = DataFrame( + {"key": ["x", "y", "z", "x", "y", "z"], "val": [1.0, 0.8, 2.0, 3.0, 3.6, 0.75]} + ) + + grouped = df.groupby("key", as_index=False) + result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min")) + expected = DataFrame({"key": ["x", "y", "z"], "min_val": [1.0, 0.8, 0.75]}) + tm.assert_frame_equal(result, expected) + + +def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex(): + # GH 32240: When the aggregate function relabels column names and + # as_index=False is specified, the results are dropped. Check if + # multiindex is returned in the right order + + df = DataFrame( + { + "key": ["x", "y", "x", "y", "x", "x"], + "key1": ["a", "b", "c", "b", "a", "c"], + "val": [1.0, 0.8, 2.0, 3.0, 3.6, 0.75], + } + ) + + grouped = df.groupby(["key", "key1"], as_index=False) + result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min")) + expected = DataFrame( + {"key": ["x", "x", "y"], "key1": ["a", "c", "b"], "min_val": [1.0, 0.75, 0.8]} + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "func", [lambda s: s.mean(), lambda s: np.mean(s), lambda s: np.nanmean(s)] +) +def test_multiindex_custom_func(func): + # GH 31777 + data = [[1, 4, 2], [5, 7, 1]] + df = DataFrame( + data, + columns=MultiIndex.from_arrays( + [[1, 1, 2], [3, 4, 3]], names=["Sisko", "Janeway"] + ), + ) + result = df.groupby(np.array([0, 1])).agg(func) + expected_dict = { + (1, 3): {0: 1.0, 1: 5.0}, + (1, 4): {0: 4.0, 1: 7.0}, + (2, 3): {0: 2.0, 1: 1.0}, + } + expected = DataFrame(expected_dict) + expected.columns = df.columns + tm.assert_frame_equal(result, expected) + + +def myfunc(s): + return np.percentile(s, q=0.90) + + +@pytest.mark.parametrize("func", [lambda s: np.percentile(s, q=0.90), myfunc]) +def test_lambda_named_agg(func): + # see gh-28467 + animals = DataFrame( + { + "kind": ["cat", "dog", "cat", "dog"], + "height": [9.1, 6.0, 9.5, 34.0], + "weight": [7.9, 7.5, 9.9, 198.0], + } + ) + + result = animals.groupby("kind").agg( + mean_height=("height", "mean"), perc90=("height", func) + ) + expected = DataFrame( + [[9.3, 9.1036], [20.0, 6.252]], + columns=["mean_height", "perc90"], + index=Index(["cat", "dog"], name="kind"), + ) + + tm.assert_frame_equal(result, expected) + + +def test_aggregate_mixed_types(): + # GH 16916 + df = DataFrame( + data=np.array([0] * 9).reshape(3, 3), columns=list("XYZ"), index=list("abc") + ) + df["grouping"] = ["group 1", "group 1", 2] + result = df.groupby("grouping").aggregate(lambda x: x.tolist()) + expected_data = [[[0], [0], [0]], [[0, 0], [0, 0], [0, 0]]] + expected = DataFrame( + expected_data, + index=Index([2, "group 1"], dtype="object", name="grouping"), + columns=Index(["X", "Y", "Z"], dtype="object"), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.xfail(reason="Not implemented;see GH 31256") +def test_aggregate_udf_na_extension_type(): + # https://github.com/pandas-dev/pandas/pull/31359 + # This is currently failing to cast back to Int64Dtype. + # The presence of the NA causes two problems + # 1. NA is not an instance of Int64Dtype.type (numpy.int64) + # 2. The presence of an NA forces object type, so the non-NA values is + # a Python int rather than a NumPy int64. Python ints aren't + # instances of numpy.int64. + def aggfunc(x): + if all(x > 2): + return 1 + else: + return pd.NA + + df = DataFrame({"A": pd.array([1, 2, 3])}) + result = df.groupby([1, 1, 2]).agg(aggfunc) + expected = DataFrame({"A": pd.array([1, pd.NA], dtype="Int64")}, index=[1, 2]) + tm.assert_frame_equal(result, expected) + + +class TestLambdaMangling: + def test_basic(self): + df = DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) + result = df.groupby("A").agg({"B": [lambda x: 0, lambda x: 1]}) + + expected = DataFrame( + {("B", ""): [0, 0], ("B", ""): [1, 1]}, + index=Index([0, 1], name="A"), + ) + tm.assert_frame_equal(result, expected) + + def test_mangle_series_groupby(self): + gr = Series([1, 2, 3, 4]).groupby([0, 0, 1, 1]) + result = gr.agg([lambda x: 0, lambda x: 1]) + expected = DataFrame({"": [0, 0], "": [1, 1]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.") + @pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning") + def test_with_kwargs(self): + f1 = lambda x, y, b=1: x.sum() + y + b + f2 = lambda x, y, b=2: x.sum() + y * b + result = Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0) + expected = DataFrame({"": [4], "": [6]}) + tm.assert_frame_equal(result, expected) + + result = Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10) + expected = DataFrame({"": [13], "": [30]}) + tm.assert_frame_equal(result, expected) + + def test_agg_with_one_lambda(self): + # GH 25719, write tests for DataFrameGroupby.agg with only one lambda + df = DataFrame( + { + "kind": ["cat", "dog", "cat", "dog"], + "height": [9.1, 6.0, 9.5, 34.0], + "weight": [7.9, 7.5, 9.9, 198.0], + } + ) + + columns = ["height_sqr_min", "height_max", "weight_max"] + expected = DataFrame( + { + "height_sqr_min": [82.81, 36.00], + "height_max": [9.5, 34.0], + "weight_max": [9.9, 198.0], + }, + index=Index(["cat", "dog"], name="kind"), + columns=columns, + ) + + # check pd.NameAgg case + result1 = df.groupby(by="kind").agg( + height_sqr_min=pd.NamedAgg( + column="height", aggfunc=lambda x: np.min(x ** 2) + ), + height_max=pd.NamedAgg(column="height", aggfunc="max"), + weight_max=pd.NamedAgg(column="weight", aggfunc="max"), + ) + tm.assert_frame_equal(result1, expected) + + # check agg(key=(col, aggfunc)) case + result2 = df.groupby(by="kind").agg( + height_sqr_min=("height", lambda x: np.min(x ** 2)), + height_max=("height", "max"), + weight_max=("weight", "max"), + ) + tm.assert_frame_equal(result2, expected) + + def test_agg_multiple_lambda(self): + # GH25719, test for DataFrameGroupby.agg with multiple lambdas + # with mixed aggfunc + df = DataFrame( + { + "kind": ["cat", "dog", "cat", "dog"], + "height": [9.1, 6.0, 9.5, 34.0], + "weight": [7.9, 7.5, 9.9, 198.0], + } + ) + columns = [ + "height_sqr_min", + "height_max", + "weight_max", + "height_max_2", + "weight_min", + ] + expected = DataFrame( + { + "height_sqr_min": [82.81, 36.00], + "height_max": [9.5, 34.0], + "weight_max": [9.9, 198.0], + "height_max_2": [9.5, 34.0], + "weight_min": [7.9, 7.5], + }, + index=Index(["cat", "dog"], name="kind"), + columns=columns, + ) + + # check agg(key=(col, aggfunc)) case + result1 = df.groupby(by="kind").agg( + height_sqr_min=("height", lambda x: np.min(x ** 2)), + height_max=("height", "max"), + weight_max=("weight", "max"), + height_max_2=("height", lambda x: np.max(x)), + weight_min=("weight", lambda x: np.min(x)), + ) + tm.assert_frame_equal(result1, expected) + + # check pd.NamedAgg case + result2 = df.groupby(by="kind").agg( + height_sqr_min=pd.NamedAgg( + column="height", aggfunc=lambda x: np.min(x ** 2) + ), + height_max=pd.NamedAgg(column="height", aggfunc="max"), + weight_max=pd.NamedAgg(column="weight", aggfunc="max"), + height_max_2=pd.NamedAgg(column="height", aggfunc=lambda x: np.max(x)), + weight_min=pd.NamedAgg(column="weight", aggfunc=lambda x: np.min(x)), + ) + tm.assert_frame_equal(result2, expected) + + +def test_groupby_get_by_index(): + # GH 33439 + df = DataFrame({"A": ["S", "W", "W"], "B": [1.0, 1.0, 2.0]}) + res = df.groupby("A").agg({"B": lambda x: x.get(x.index[-1])}) + expected = DataFrame({"A": ["S", "W"], "B": [1.0, 2.0]}).set_index("A") + tm.assert_frame_equal(res, expected) + + +@pytest.mark.parametrize( + "grp_col_dict, exp_data", + [ + ({"nr": "min", "cat_ord": "min"}, {"nr": [1, 5], "cat_ord": ["a", "c"]}), + ({"cat_ord": "min"}, {"cat_ord": ["a", "c"]}), + ({"nr": "min"}, {"nr": [1, 5]}), + ], +) +def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data): + # test single aggregations on ordered categorical cols GHGH27800 + + # create the result dataframe + input_df = DataFrame( + { + "nr": [1, 2, 3, 4, 5, 6, 7, 8], + "cat_ord": list("aabbccdd"), + "cat": list("aaaabbbb"), + } + ) + + input_df = input_df.astype({"cat": "category", "cat_ord": "category"}) + input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered() + result_df = input_df.groupby("cat").agg(grp_col_dict) + + # create expected dataframe + cat_index = pd.CategoricalIndex( + ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category" + ) + + expected_df = DataFrame(data=exp_data, index=cat_index) + + if "cat_ord" in expected_df: + # ordered categorical columns should be preserved + dtype = input_df["cat_ord"].dtype + expected_df["cat_ord"] = expected_df["cat_ord"].astype(dtype) + + tm.assert_frame_equal(result_df, expected_df) + + +@pytest.mark.parametrize( + "grp_col_dict, exp_data", + [ + ({"nr": ["min", "max"], "cat_ord": "min"}, [(1, 4, "a"), (5, 8, "c")]), + ({"nr": "min", "cat_ord": ["min", "max"]}, [(1, "a", "b"), (5, "c", "d")]), + ({"cat_ord": ["min", "max"]}, [("a", "b"), ("c", "d")]), + ], +) +def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): + # test combined aggregations on ordered categorical cols GH27800 + + # create the result dataframe + input_df = DataFrame( + { + "nr": [1, 2, 3, 4, 5, 6, 7, 8], + "cat_ord": list("aabbccdd"), + "cat": list("aaaabbbb"), + } + ) + + input_df = input_df.astype({"cat": "category", "cat_ord": "category"}) + input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered() + result_df = input_df.groupby("cat").agg(grp_col_dict) + + # create expected dataframe + cat_index = pd.CategoricalIndex( + ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category" + ) + + # unpack the grp_col_dict to create the multi-index tuple + # this tuple will be used to create the expected dataframe index + multi_index_list = [] + for k, v in grp_col_dict.items(): + if isinstance(v, list): + for value in v: + multi_index_list.append([k, value]) + else: + multi_index_list.append([k, v]) + multi_index = MultiIndex.from_tuples(tuple(multi_index_list)) + + expected_df = DataFrame(data=exp_data, columns=multi_index, index=cat_index) + for col in expected_df.columns: + if isinstance(col, tuple) and "cat_ord" in col: + # ordered categorical should be preserved + expected_df[col] = expected_df[col].astype(input_df["cat_ord"].dtype) + + tm.assert_frame_equal(result_df, expected_df) + + +def test_nonagg_agg(): + # GH 35490 - Single/Multiple agg of non-agg function give same results + # TODO: agg should raise for functions that don't aggregate + df = DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 2, 1]}) + g = df.groupby("a") + + result = g.agg(["cumsum"]) + result.columns = result.columns.droplevel(-1) + expected = g.agg("cumsum") + + tm.assert_frame_equal(result, expected) + + +def test_aggregate_datetime_objects(): + # https://github.com/pandas-dev/pandas/issues/36003 + # ensure we don't raise an error but keep object dtype for out-of-bounds + # datetimes + df = DataFrame( + { + "A": ["X", "Y"], + "B": [ + datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + datetime.datetime(3005, 1, 1, 10, 30, 23, 540000), + ], + } + ) + result = df.groupby("A").B.max() + expected = df.set_index("A")["B"] + tm.assert_series_equal(result, expected) + + +def test_groupby_index_object_dtype(): + # GH 40014 + df = DataFrame({"c0": ["x", "x", "x"], "c1": ["x", "x", "y"], "p": [0, 1, 2]}) + df.index = df.index.astype("O") + grouped = df.groupby(["c0", "c1"]) + res = grouped.p.agg(lambda x: all(x > 0)) + # Check that providing a user-defined function in agg() + # produces the correct index shape when using an object-typed index. + expected_index = MultiIndex.from_tuples( + [("x", "x"), ("x", "y")], names=("c0", "c1") + ) + expected = Series([False, True], index=expected_index, name="p") + tm.assert_series_equal(res, expected) + + +def test_timeseries_groupby_agg(): + # GH#43290 + + def func(ser): + if ser.isna().all(): + return None + return np.sum(ser) + + df = DataFrame([1.0], index=[pd.Timestamp("2018-01-16 00:00:00+00:00")]) + res = df.groupby(lambda x: 1).agg(func) + + expected = DataFrame([[1.0]], index=[1]) + tm.assert_frame_equal(res, expected) + + +def test_groupby_aggregate_directory(reduction_func): + # GH#32793 + if reduction_func in ["corrwith", "nth"]: + return None + + obj = DataFrame([[0, 1], [0, np.nan]]) + + result_reduced_series = obj.groupby(0).agg(reduction_func) + result_reduced_frame = obj.groupby(0).agg({1: reduction_func}) + + if reduction_func in ["size", "ngroup"]: + # names are different: None / 1 + tm.assert_series_equal( + result_reduced_series, result_reduced_frame[1], check_names=False + ) + else: + tm.assert_frame_equal(result_reduced_series, result_reduced_frame) + tm.assert_series_equal( + result_reduced_series.dtypes, result_reduced_frame.dtypes + ) + + +def test_group_mean_timedelta_nat(): + # GH43132 + data = Series(["1 day", "3 days", "NaT"], dtype="timedelta64[ns]") + expected = Series(["2 days"], dtype="timedelta64[ns]") + + result = data.groupby([0, 0, 0]).mean() + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "input_data, expected_output", + [ + ( # no timezone + ["2021-01-01T00:00", "NaT", "2021-01-01T02:00"], + ["2021-01-01T01:00"], + ), + ( # timezone + ["2021-01-01T00:00-0100", "NaT", "2021-01-01T02:00-0100"], + ["2021-01-01T01:00-0100"], + ), + ], +) +def test_group_mean_datetime64_nat(input_data, expected_output): + # GH43132 + data = to_datetime(Series(input_data)) + expected = to_datetime(Series(expected_output)) + + result = data.groupby([0, 0, 0]).mean() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "func, output", [("mean", [8 + 18j, 10 + 22j]), ("sum", [40 + 90j, 50 + 110j])] +) +def test_groupby_complex(func, output): + # GH#43701 + data = Series(np.arange(20).reshape(10, 2).dot([1, 2j])) + result = data.groupby(data.index % 2).agg(func) + expected = Series(output) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max", "var"]) +def test_groupby_complex_raises(func): + # GH#43701 + data = Series(np.arange(20).reshape(10, 2).dot([1, 2j])) + msg = "No matching signature found" + with pytest.raises(TypeError, match=msg): + data.groupby(data.index % 2).agg(func) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/test_cython.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/test_cython.py new file mode 100644 index 0000000..d9372ba --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/test_cython.py @@ -0,0 +1,393 @@ +""" +test cython .agg behavior +""" + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_float_dtype + +import pandas as pd +from pandas import ( + DataFrame, + Index, + NaT, + Series, + Timedelta, + Timestamp, + bdate_range, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "op_name", + [ + "count", + "sum", + "std", + "var", + "sem", + "mean", + pytest.param( + "median", + # ignore mean of empty slice + # and all-NaN + marks=[pytest.mark.filterwarnings("ignore::RuntimeWarning")], + ), + "prod", + "min", + "max", + ], +) +def test_cythonized_aggers(op_name): + data = { + "A": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1.0, np.nan, np.nan], + "B": ["A", "B"] * 6, + "C": np.random.randn(12), + } + df = DataFrame(data) + df.loc[2:10:2, "C"] = np.nan + + op = lambda x: getattr(x, op_name)() + + # single column + grouped = df.drop(["B"], axis=1).groupby("A") + exp = {cat: op(group["C"]) for cat, group in grouped} + exp = DataFrame({"C": exp}) + exp.index.name = "A" + result = op(grouped) + tm.assert_frame_equal(result, exp) + + # multiple columns + grouped = df.groupby(["A", "B"]) + expd = {} + for (cat1, cat2), group in grouped: + expd.setdefault(cat1, {})[cat2] = op(group["C"]) + exp = DataFrame(expd).T.stack(dropna=False) + exp.index.names = ["A", "B"] + exp.name = "C" + + result = op(grouped)["C"] + if op_name in ["sum", "prod"]: + tm.assert_series_equal(result, exp) + + +def test_cython_agg_boolean(): + frame = DataFrame( + { + "a": np.random.randint(0, 5, 50), + "b": np.random.randint(0, 2, 50).astype("bool"), + } + ) + result = frame.groupby("a")["b"].mean() + expected = frame.groupby("a")["b"].agg(np.mean) + + tm.assert_series_equal(result, expected) + + +def test_cython_agg_nothing_to_agg(): + frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25}) + + with pytest.raises(NotImplementedError, match="does not implement"): + frame.groupby("a")["b"].mean(numeric_only=True) + + with pytest.raises(TypeError, match="Could not convert (foo|bar)*"): + frame.groupby("a")["b"].mean() + + frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25}) + + with tm.assert_produces_warning(FutureWarning): + result = frame[["b"]].groupby(frame["a"]).mean() + expected = DataFrame([], index=frame["a"].sort_values().drop_duplicates()) + tm.assert_frame_equal(result, expected) + + +def test_cython_agg_nothing_to_agg_with_dates(): + frame = DataFrame( + { + "a": np.random.randint(0, 5, 50), + "b": ["foo", "bar"] * 25, + "dates": pd.date_range("now", periods=50, freq="T"), + } + ) + with pytest.raises(NotImplementedError, match="does not implement"): + frame.groupby("b").dates.mean(numeric_only=True) + + +def test_cython_agg_frame_columns(): + # #2113 + df = DataFrame({"x": [1, 2, 3], "y": [3, 4, 5]}) + + df.groupby(level=0, axis="columns").mean() + df.groupby(level=0, axis="columns").mean() + df.groupby(level=0, axis="columns").mean() + df.groupby(level=0, axis="columns").mean() + + +def test_cython_agg_return_dict(): + # GH 16741 + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + + ts = df.groupby("A")["B"].agg(lambda x: x.value_counts().to_dict()) + expected = Series( + [{"two": 1, "one": 1, "three": 1}, {"two": 2, "one": 2, "three": 1}], + index=Index(["bar", "foo"], name="A"), + name="B", + ) + tm.assert_series_equal(ts, expected) + + +def test_cython_fail_agg(): + dr = bdate_range("1/1/2000", periods=50) + ts = Series(["A", "B", "C", "D", "E"] * 10, index=dr) + + grouped = ts.groupby(lambda x: x.month) + summed = grouped.sum() + expected = grouped.agg(np.sum) + tm.assert_series_equal(summed, expected) + + +@pytest.mark.parametrize( + "op, targop", + [ + ("mean", np.mean), + ("median", np.median), + ("var", np.var), + ("add", np.sum), + ("prod", np.prod), + ("min", np.min), + ("max", np.max), + ("first", lambda x: x.iloc[0]), + ("last", lambda x: x.iloc[-1]), + ], +) +def test__cython_agg_general(op, targop): + df = DataFrame(np.random.randn(1000)) + labels = np.random.randint(0, 50, size=1000).astype(float) + + result = df.groupby(labels)._cython_agg_general(op, alt=None, numeric_only=True) + expected = df.groupby(labels).agg(targop) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "op, targop", + [ + ("mean", np.mean), + ("median", lambda x: np.median(x) if len(x) > 0 else np.nan), + ("var", lambda x: np.var(x, ddof=1)), + ("min", np.min), + ("max", np.max), + ], +) +def test_cython_agg_empty_buckets(op, targop, observed): + df = DataFrame([11, 12, 13]) + grps = range(0, 55, 5) + + # calling _cython_agg_general directly, instead of via the user API + # which sets different values for min_count, so do that here. + g = df.groupby(pd.cut(df[0], grps), observed=observed) + result = g._cython_agg_general(op, alt=None, numeric_only=True) + + g = df.groupby(pd.cut(df[0], grps), observed=observed) + expected = g.agg(lambda x: targop(x)) + tm.assert_frame_equal(result, expected) + + +def test_cython_agg_empty_buckets_nanops(observed): + # GH-18869 can't call nanops on empty groups, so hardcode expected + # for these + df = DataFrame([11, 12, 13], columns=["a"]) + grps = range(0, 25, 5) + # add / sum + result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general( + "add", alt=None, numeric_only=True + ) + intervals = pd.interval_range(0, 20, freq=5) + expected = DataFrame( + {"a": [0, 0, 36, 0]}, + index=pd.CategoricalIndex(intervals, name="a", ordered=True), + ) + if observed: + expected = expected[expected.a != 0] + + tm.assert_frame_equal(result, expected) + + # prod + result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general( + "prod", alt=None, numeric_only=True + ) + expected = DataFrame( + {"a": [1, 1, 1716, 1]}, + index=pd.CategoricalIndex(intervals, name="a", ordered=True), + ) + if observed: + expected = expected[expected.a != 1] + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("op", ["first", "last", "max", "min"]) +@pytest.mark.parametrize( + "data", [Timestamp("2016-10-14 21:00:44.557"), Timedelta("17088 days 21:00:44.557")] +) +def test_cython_with_timestamp_and_nat(op, data): + # https://github.com/pandas-dev/pandas/issues/19526 + df = DataFrame({"a": [0, 1], "b": [data, NaT]}) + index = Index([0, 1], name="a") + + # We will group by a and test the cython aggregations + expected = DataFrame({"b": [data, NaT]}, index=index) + + result = df.groupby("a").aggregate(op) + tm.assert_frame_equal(expected, result) + + +@pytest.mark.parametrize( + "agg", + [ + "min", + "max", + "count", + "sum", + "prod", + "var", + "mean", + "median", + "ohlc", + "cumprod", + "cumsum", + "shift", + "any", + "all", + "quantile", + "first", + "last", + "rank", + "cummin", + "cummax", + ], +) +def test_read_only_buffer_source_agg(agg): + # https://github.com/pandas-dev/pandas/issues/36014 + df = DataFrame( + { + "sepal_length": [5.1, 4.9, 4.7, 4.6, 5.0], + "species": ["setosa", "setosa", "setosa", "setosa", "setosa"], + } + ) + df._mgr.arrays[0].flags.writeable = False + + result = df.groupby(["species"]).agg({"sepal_length": agg}) + expected = df.copy().groupby(["species"]).agg({"sepal_length": agg}) + + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "op_name", + [ + "count", + "sum", + "std", + "var", + "sem", + "mean", + "median", + "prod", + "min", + "max", + ], +) +def test_cython_agg_nullable_int(op_name): + # ensure that the cython-based aggregations don't fail for nullable dtype + # (eg https://github.com/pandas-dev/pandas/issues/37415) + df = DataFrame( + { + "A": ["A", "B"] * 5, + "B": pd.array([1, 2, 3, 4, 5, 6, 7, 8, 9, pd.NA], dtype="Int64"), + } + ) + result = getattr(df.groupby("A")["B"], op_name)() + df2 = df.assign(B=df["B"].astype("float64")) + expected = getattr(df2.groupby("A")["B"], op_name)() + + if op_name != "count": + # the result is not yet consistently using Int64/Float64 dtype, + # so for now just checking the values by casting to float + result = result.astype("float64") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("with_na", [True, False]) +@pytest.mark.parametrize( + "op_name, action", + [ + # ("count", "always_int"), + ("sum", "large_int"), + # ("std", "always_float"), + ("var", "always_float"), + # ("sem", "always_float"), + ("mean", "always_float"), + ("median", "always_float"), + ("prod", "large_int"), + ("min", "preserve"), + ("max", "preserve"), + ("first", "preserve"), + ("last", "preserve"), + ], +) +@pytest.mark.parametrize( + "data", + [ + pd.array([1, 2, 3, 4], dtype="Int64"), + pd.array([1, 2, 3, 4], dtype="Int8"), + pd.array([0.1, 0.2, 0.3, 0.4], dtype="Float32"), + pd.array([0.1, 0.2, 0.3, 0.4], dtype="Float64"), + pd.array([True, True, False, False], dtype="boolean"), + ], +) +def test_cython_agg_EA_known_dtypes(data, op_name, action, with_na): + if with_na: + data[3] = pd.NA + + df = DataFrame({"key": ["a", "a", "b", "b"], "col": data}) + grouped = df.groupby("key") + + if action == "always_int": + # always Int64 + expected_dtype = pd.Int64Dtype() + elif action == "large_int": + # for any int/bool use Int64, for float preserve dtype + if is_float_dtype(data.dtype): + expected_dtype = data.dtype + else: + expected_dtype = pd.Int64Dtype() + elif action == "always_float": + # for any int/bool use Float64, for float preserve dtype + if is_float_dtype(data.dtype): + expected_dtype = data.dtype + else: + expected_dtype = pd.Float64Dtype() + elif action == "preserve": + expected_dtype = data.dtype + + result = getattr(grouped, op_name)() + assert result["col"].dtype == expected_dtype + + result = grouped.aggregate(op_name) + assert result["col"].dtype == expected_dtype + + result = getattr(grouped["col"], op_name)() + assert result.dtype == expected_dtype + + result = grouped["col"].aggregate(op_name) + assert result.dtype == expected_dtype diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/test_numba.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/test_numba.py new file mode 100644 index 0000000..e7fa2e0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/test_numba.py @@ -0,0 +1,189 @@ +import numpy as np +import pytest + +from pandas.errors import NumbaUtilError +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Index, + NamedAgg, + Series, + option_context, +) +import pandas._testing as tm +from pandas.core.util.numba_ import NUMBA_FUNC_CACHE + + +@td.skip_if_no("numba") +def test_correct_function_signature(): + def incorrect_function(x): + return sum(x) * 2.7 + + data = DataFrame( + {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=["key", "data"], + ) + with pytest.raises(NumbaUtilError, match="The first 2"): + data.groupby("key").agg(incorrect_function, engine="numba") + + with pytest.raises(NumbaUtilError, match="The first 2"): + data.groupby("key")["data"].agg(incorrect_function, engine="numba") + + +@td.skip_if_no("numba") +def test_check_nopython_kwargs(): + def incorrect_function(x, **kwargs): + return sum(x) * 2.7 + + data = DataFrame( + {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=["key", "data"], + ) + with pytest.raises(NumbaUtilError, match="numba does not support"): + data.groupby("key").agg(incorrect_function, engine="numba", a=1) + + with pytest.raises(NumbaUtilError, match="numba does not support"): + data.groupby("key")["data"].agg(incorrect_function, engine="numba", a=1) + + +@td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore:\n") +# Filter warnings when parallel=True and the function can't be parallelized by Numba +@pytest.mark.parametrize("jit", [True, False]) +@pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) +def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython): + def func_numba(values, index): + return np.mean(values) * 2.7 + + if jit: + # Test accepted jitted functions + import numba + + func_numba = numba.jit(func_numba) + + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + grouped = data.groupby(0) + if pandas_obj == "Series": + grouped = grouped[1] + + result = grouped.agg(func_numba, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.agg(lambda x: np.mean(x) * 2.7, engine="cython") + + tm.assert_equal(result, expected) + + +@td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore:\n") +# Filter warnings when parallel=True and the function can't be parallelized by Numba +@pytest.mark.parametrize("jit", [True, False]) +@pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) +def test_cache(jit, pandas_obj, nogil, parallel, nopython): + # Test that the functions are cached correctly if we switch functions + def func_1(values, index): + return np.mean(values) - 3.4 + + def func_2(values, index): + return np.mean(values) * 2.7 + + if jit: + import numba + + func_1 = numba.jit(func_1) + func_2 = numba.jit(func_2) + + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + grouped = data.groupby(0) + if pandas_obj == "Series": + grouped = grouped[1] + + result = grouped.agg(func_1, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.agg(lambda x: np.mean(x) - 3.4, engine="cython") + tm.assert_equal(result, expected) + # func_1 should be in the cache now + assert (func_1, "groupby_agg") in NUMBA_FUNC_CACHE + + # Add func_2 to the cache + result = grouped.agg(func_2, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.agg(lambda x: np.mean(x) * 2.7, engine="cython") + tm.assert_equal(result, expected) + assert (func_2, "groupby_agg") in NUMBA_FUNC_CACHE + + # Retest func_1 which should use the cache + result = grouped.agg(func_1, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.agg(lambda x: np.mean(x) - 3.4, engine="cython") + tm.assert_equal(result, expected) + + +@td.skip_if_no("numba") +def test_use_global_config(): + def func_1(values, index): + return np.mean(values) - 3.4 + + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + grouped = data.groupby(0) + expected = grouped.agg(func_1, engine="numba") + with option_context("compute.use_numba", True): + result = grouped.agg(func_1, engine=None) + tm.assert_frame_equal(expected, result) + + +@td.skip_if_no("numba") +@pytest.mark.parametrize( + "agg_func", + [ + ["min", "max"], + "min", + {"B": ["min", "max"], "C": "sum"}, + NamedAgg(column="B", aggfunc="min"), + ], +) +def test_multifunc_notimplimented(agg_func): + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + grouped = data.groupby(0) + with pytest.raises(NotImplementedError, match="Numba engine can"): + grouped.agg(agg_func, engine="numba") + + with pytest.raises(NotImplementedError, match="Numba engine can"): + grouped[1].agg(agg_func, engine="numba") + + +@td.skip_if_no("numba") +def test_args_not_cached(): + # GH 41647 + def sum_last(values, index, n): + return values[-n:].sum() + + df = DataFrame({"id": [0, 0, 1, 1], "x": [1, 1, 1, 1]}) + grouped_x = df.groupby("id")["x"] + result = grouped_x.agg(sum_last, 1, engine="numba") + expected = Series([1.0] * 2, name="x", index=Index([0, 1], name="id")) + tm.assert_series_equal(result, expected) + + result = grouped_x.agg(sum_last, 2, engine="numba") + expected = Series([2.0] * 2, name="x", index=Index([0, 1], name="id")) + tm.assert_series_equal(result, expected) + + +@td.skip_if_no("numba") +def test_index_data_correctly_passed(): + # GH 43133 + def f(values, index): + return np.mean(index) + + df = DataFrame({"group": ["A", "A", "B"], "v": [4, 5, 6]}, index=[-1, -2, -3]) + result = df.groupby("group").aggregate(f, engine="numba") + expected = DataFrame( + [-1.5, -3.0], columns=["v"], index=Index(["A", "B"], name="group") + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/test_other.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/test_other.py new file mode 100644 index 0000000..06044dd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/aggregate/test_other.py @@ -0,0 +1,673 @@ +""" +test all other .agg behavior +""" + +import datetime as dt +from functools import partial + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + PeriodIndex, + Series, + date_range, + period_range, +) +import pandas._testing as tm +from pandas.core.base import SpecificationError + +from pandas.io.formats.printing import pprint_thing + + +def test_agg_api(): + # GH 6337 + # https://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error + # different api for agg when passed custom function with mixed frame + + df = DataFrame( + { + "data1": np.random.randn(5), + "data2": np.random.randn(5), + "key1": ["a", "a", "b", "b", "a"], + "key2": ["one", "two", "one", "two", "one"], + } + ) + grouped = df.groupby("key1") + + def peak_to_peak(arr): + return arr.max() - arr.min() + + with tm.assert_produces_warning( + FutureWarning, + match=r"\['key2'\] did not aggregate successfully", + ): + expected = grouped.agg([peak_to_peak]) + expected.columns = ["data1", "data2"] + + with tm.assert_produces_warning( + FutureWarning, + match=r"\['key2'\] did not aggregate successfully", + ): + result = grouped.agg(peak_to_peak) + tm.assert_frame_equal(result, expected) + + +def test_agg_datetimes_mixed(): + data = [[1, "2012-01-01", 1.0], [2, "2012-01-02", 2.0], [3, None, 3.0]] + + df1 = DataFrame( + { + "key": [x[0] for x in data], + "date": [x[1] for x in data], + "value": [x[2] for x in data], + } + ) + + data = [ + [ + row[0], + (dt.datetime.strptime(row[1], "%Y-%m-%d").date() if row[1] else None), + row[2], + ] + for row in data + ] + + df2 = DataFrame( + { + "key": [x[0] for x in data], + "date": [x[1] for x in data], + "value": [x[2] for x in data], + } + ) + + df1["weights"] = df1["value"] / df1["value"].sum() + gb1 = df1.groupby("date").aggregate(np.sum) + + df2["weights"] = df1["value"] / df1["value"].sum() + gb2 = df2.groupby("date").aggregate(np.sum) + + assert len(gb1) == len(gb2) + + +def test_agg_period_index(): + prng = period_range("2012-1-1", freq="M", periods=3) + df = DataFrame(np.random.randn(3, 2), index=prng) + rs = df.groupby(level=0).sum() + assert isinstance(rs.index, PeriodIndex) + + # GH 3579 + index = period_range(start="1999-01", periods=5, freq="M") + s1 = Series(np.random.rand(len(index)), index=index) + s2 = Series(np.random.rand(len(index)), index=index) + df = DataFrame.from_dict({"s1": s1, "s2": s2}) + grouped = df.groupby(df.index.month) + list(grouped) + + +def test_agg_dict_parameter_cast_result_dtypes(): + # GH 12821 + + df = DataFrame( + { + "class": ["A", "A", "B", "B", "C", "C", "D", "D"], + "time": date_range("1/1/2011", periods=8, freq="H"), + } + ) + df.loc[[0, 1, 2, 5], "time"] = None + + # test for `first` function + exp = df.loc[[0, 3, 4, 6]].set_index("class") + grouped = df.groupby("class") + tm.assert_frame_equal(grouped.first(), exp) + tm.assert_frame_equal(grouped.agg("first"), exp) + tm.assert_frame_equal(grouped.agg({"time": "first"}), exp) + tm.assert_series_equal(grouped.time.first(), exp["time"]) + tm.assert_series_equal(grouped.time.agg("first"), exp["time"]) + + # test for `last` function + exp = df.loc[[0, 3, 4, 7]].set_index("class") + grouped = df.groupby("class") + tm.assert_frame_equal(grouped.last(), exp) + tm.assert_frame_equal(grouped.agg("last"), exp) + tm.assert_frame_equal(grouped.agg({"time": "last"}), exp) + tm.assert_series_equal(grouped.time.last(), exp["time"]) + tm.assert_series_equal(grouped.time.agg("last"), exp["time"]) + + # count + exp = Series([2, 2, 2, 2], index=Index(list("ABCD"), name="class"), name="time") + tm.assert_series_equal(grouped.time.agg(len), exp) + tm.assert_series_equal(grouped.time.size(), exp) + + exp = Series([0, 1, 1, 2], index=Index(list("ABCD"), name="class"), name="time") + tm.assert_series_equal(grouped.time.count(), exp) + + +def test_agg_cast_results_dtypes(): + # similar to GH12821 + # xref #11444 + u = [dt.datetime(2015, x + 1, 1) for x in range(12)] + v = list("aaabbbbbbccd") + df = DataFrame({"X": v, "Y": u}) + + result = df.groupby("X")["Y"].agg(len) + expected = df.groupby("X")["Y"].count() + tm.assert_series_equal(result, expected) + + +def test_aggregate_float64_no_int64(): + # see gh-11199 + df = DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 2, 4, 5], "c": [1, 2, 3, 4, 5]}) + + expected = DataFrame({"a": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5]) + expected.index.name = "b" + + result = df.groupby("b")[["a"]].mean() + tm.assert_frame_equal(result, expected) + + expected = DataFrame({"a": [1, 2.5, 4, 5], "c": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5]) + expected.index.name = "b" + + result = df.groupby("b")[["a", "c"]].mean() + tm.assert_frame_equal(result, expected) + + +def test_aggregate_api_consistency(): + # GH 9052 + # make sure that the aggregates via dict + # are consistent + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": np.random.randn(8) + 1.0, + "D": np.arange(8), + } + ) + + grouped = df.groupby(["A", "B"]) + c_mean = grouped["C"].mean() + c_sum = grouped["C"].sum() + d_mean = grouped["D"].mean() + d_sum = grouped["D"].sum() + + result = grouped["D"].agg(["sum", "mean"]) + expected = pd.concat([d_sum, d_mean], axis=1) + expected.columns = ["sum", "mean"] + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped.agg([np.sum, np.mean]) + expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1) + expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]]) + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped[["D", "C"]].agg([np.sum, np.mean]) + expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1) + expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]]) + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped.agg({"C": "mean", "D": "sum"}) + expected = pd.concat([d_sum, c_mean], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped.agg({"C": ["mean", "sum"], "D": ["mean", "sum"]}) + expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1) + expected.columns = MultiIndex.from_product([["C", "D"], ["mean", "sum"]]) + + msg = r"Column\(s\) \['r', 'r2'\] do not exist" + with pytest.raises(KeyError, match=msg): + grouped[["D", "C"]].agg({"r": np.sum, "r2": np.mean}) + + +def test_agg_dict_renaming_deprecation(): + # 15931 + df = DataFrame({"A": [1, 1, 1, 2, 2], "B": range(5), "C": range(5)}) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + df.groupby("A").agg( + {"B": {"foo": ["sum", "max"]}, "C": {"bar": ["count", "min"]}} + ) + + msg = r"Column\(s\) \['ma'\] do not exist" + with pytest.raises(KeyError, match=msg): + df.groupby("A")[["B", "C"]].agg({"ma": "max"}) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + df.groupby("A").B.agg({"foo": "count"}) + + +def test_agg_compat(): + # GH 12334 + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": np.random.randn(8) + 1.0, + "D": np.arange(8), + } + ) + + g = df.groupby(["A", "B"]) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + g["D"].agg({"C": ["sum", "std"]}) + + with pytest.raises(SpecificationError, match=msg): + g["D"].agg({"C": "sum", "D": "std"}) + + +def test_agg_nested_dicts(): + # API change for disallowing these types of nested dicts + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": np.random.randn(8) + 1.0, + "D": np.arange(8), + } + ) + + g = df.groupby(["A", "B"]) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + g.aggregate({"r1": {"C": ["mean", "sum"]}, "r2": {"D": ["mean", "sum"]}}) + + with pytest.raises(SpecificationError, match=msg): + g.agg({"C": {"ra": ["mean", "std"]}, "D": {"rb": ["mean", "std"]}}) + + # same name as the original column + # GH9052 + with pytest.raises(SpecificationError, match=msg): + g["D"].agg({"result1": np.sum, "result2": np.mean}) + + with pytest.raises(SpecificationError, match=msg): + g["D"].agg({"D": np.sum, "result2": np.mean}) + + +def test_agg_item_by_item_raise_typeerror(): + df = DataFrame(np.random.randint(10, size=(20, 10))) + + def raiseException(df): + pprint_thing("----------------------------------------") + pprint_thing(df.to_string()) + raise TypeError("test") + + with pytest.raises(TypeError, match="test"): + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + df.groupby(0).agg(raiseException) + + +def test_series_agg_multikey(): + ts = tm.makeTimeSeries() + grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) + + result = grouped.agg(np.sum) + expected = grouped.sum() + tm.assert_series_equal(result, expected) + + +def test_series_agg_multi_pure_python(): + data = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + def bad(x): + assert len(x.values.base) > 0 + return "foo" + + result = data.groupby(["A", "B"]).agg(bad) + expected = data.groupby(["A", "B"]).agg(lambda x: "foo") + tm.assert_frame_equal(result, expected) + + +def test_agg_consistency(): + # agg with ([]) and () not consistent + # GH 6715 + def P1(a): + return np.percentile(a.dropna(), q=1) + + df = DataFrame( + { + "col1": [1, 2, 3, 4], + "col2": [10, 25, 26, 31], + "date": [ + dt.date(2013, 2, 10), + dt.date(2013, 2, 10), + dt.date(2013, 2, 11), + dt.date(2013, 2, 11), + ], + } + ) + + g = df.groupby("date") + + expected = g.agg([P1]) + expected.columns = expected.columns.levels[0] + + result = g.agg(P1) + tm.assert_frame_equal(result, expected) + + +def test_agg_callables(): + # GH 7929 + df = DataFrame({"foo": [1, 2], "bar": [3, 4]}).astype(np.int64) + + class fn_class: + def __call__(self, x): + return sum(x) + + equiv_callables = [ + sum, + np.sum, + lambda x: sum(x), + lambda x: x.sum(), + partial(sum), + fn_class(), + ] + + expected = df.groupby("foo").agg(sum) + for ecall in equiv_callables: + result = df.groupby("foo").agg(ecall) + tm.assert_frame_equal(result, expected) + + +def test_agg_over_numpy_arrays(): + # GH 3788 + df = DataFrame( + [ + [1, np.array([10, 20, 30])], + [1, np.array([40, 50, 60])], + [2, np.array([20, 30, 40])], + ], + columns=["category", "arraydata"], + ) + gb = df.groupby("category") + + expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]] + expected_index = Index([1, 2], name="category") + expected_column = ["arraydata"] + expected = DataFrame(expected_data, index=expected_index, columns=expected_column) + + alt = gb.sum(numeric_only=False) + tm.assert_frame_equal(alt, expected) + + result = gb.agg("sum", numeric_only=False) + tm.assert_frame_equal(result, expected) + + # FIXME: the original version of this test called `gb.agg(sum)` + # and that raises TypeError if `numeric_only=False` is passed + + +@pytest.mark.parametrize("as_period", [True, False]) +def test_agg_tzaware_non_datetime_result(as_period): + # discussed in GH#29589, fixed in GH#29641, operating on tzaware values + # with function that is not dtype-preserving + dti = date_range("2012-01-01", periods=4, tz="UTC") + if as_period: + dti = dti.tz_localize(None).to_period("D") + + df = DataFrame({"a": [0, 0, 1, 1], "b": dti}) + gb = df.groupby("a") + + # Case that _does_ preserve the dtype + result = gb["b"].agg(lambda x: x.iloc[0]) + expected = Series(dti[::2], name="b") + expected.index.name = "a" + tm.assert_series_equal(result, expected) + + # Cases that do _not_ preserve the dtype + result = gb["b"].agg(lambda x: x.iloc[0].year) + expected = Series([2012, 2012], name="b") + expected.index.name = "a" + tm.assert_series_equal(result, expected) + + result = gb["b"].agg(lambda x: x.iloc[-1] - x.iloc[0]) + expected = Series([pd.Timedelta(days=1), pd.Timedelta(days=1)], name="b") + expected.index.name = "a" + if as_period: + expected = Series([pd.offsets.Day(1), pd.offsets.Day(1)], name="b") + expected.index.name = "a" + tm.assert_series_equal(result, expected) + + +def test_agg_timezone_round_trip(): + # GH 15426 + ts = pd.Timestamp("2016-01-01 12:00:00", tz="US/Pacific") + df = DataFrame({"a": 1, "b": [ts + dt.timedelta(minutes=nn) for nn in range(10)]}) + + result1 = df.groupby("a")["b"].agg(np.min).iloc[0] + result2 = df.groupby("a")["b"].agg(lambda x: np.min(x)).iloc[0] + result3 = df.groupby("a")["b"].min().iloc[0] + + assert result1 == ts + assert result2 == ts + assert result3 == ts + + dates = [ + pd.Timestamp(f"2016-01-0{i:d} 12:00:00", tz="US/Pacific") for i in range(1, 5) + ] + df = DataFrame({"A": ["a", "b"] * 2, "B": dates}) + grouped = df.groupby("A") + + ts = df["B"].iloc[0] + assert ts == grouped.nth(0)["B"].iloc[0] + assert ts == grouped.head(1)["B"].iloc[0] + assert ts == grouped.first()["B"].iloc[0] + + # GH#27110 applying iloc should return a DataFrame + assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 1] + + ts = df["B"].iloc[2] + assert ts == grouped.last()["B"].iloc[0] + + # GH#27110 applying iloc should return a DataFrame + assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 1] + + +def test_sum_uint64_overflow(): + # see gh-14758 + # Convert to uint64 and don't overflow + df = DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object) + df = df + 9223372036854775807 + + index = Index( + [9223372036854775808, 9223372036854775810, 9223372036854775812], dtype=np.uint64 + ) + expected = DataFrame( + {1: [9223372036854775809, 9223372036854775811, 9223372036854775813]}, + index=index, + ) + + expected.index.name = 0 + result = df.groupby(0).sum(numeric_only=False) + tm.assert_frame_equal(result, expected) + + # out column is non-numeric, so with numeric_only=True it is dropped + result2 = df.groupby(0).sum(numeric_only=True) + expected2 = expected[[]] + tm.assert_frame_equal(result2, expected2) + + +@pytest.mark.parametrize( + "structure, expected", + [ + (tuple, DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})), + (list, DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})), + ( + lambda x: tuple(x), + DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}}), + ), + ( + lambda x: list(x), + DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}}), + ), + ], +) +def test_agg_structs_dataframe(structure, expected): + df = DataFrame( + {"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]} + ) + + result = df.groupby(["A", "B"]).aggregate(structure) + expected.index.names = ["A", "B"] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "structure, expected", + [ + (tuple, Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name="C")), + (list, Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name="C")), + (lambda x: tuple(x), Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name="C")), + (lambda x: list(x), Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name="C")), + ], +) +def test_agg_structs_series(structure, expected): + # Issue #18079 + df = DataFrame( + {"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]} + ) + + result = df.groupby("A")["C"].aggregate(structure) + expected.index.name = "A" + tm.assert_series_equal(result, expected) + + +def test_agg_category_nansum(observed): + categories = ["a", "b", "c"] + df = DataFrame( + {"A": pd.Categorical(["a", "a", "b"], categories=categories), "B": [1, 2, 3]} + ) + result = df.groupby("A", observed=observed).B.agg(np.nansum) + expected = Series( + [3, 3, 0], + index=pd.CategoricalIndex(["a", "b", "c"], categories=categories, name="A"), + name="B", + ) + if observed: + expected = expected[expected != 0] + tm.assert_series_equal(result, expected) + + +def test_agg_list_like_func(): + # GH 18473 + df = DataFrame({"A": [str(x) for x in range(3)], "B": [str(x) for x in range(3)]}) + grouped = df.groupby("A", as_index=False, sort=False) + result = grouped.agg({"B": lambda x: list(x)}) + expected = DataFrame( + {"A": [str(x) for x in range(3)], "B": [[str(x)] for x in range(3)]} + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_lambda_with_timezone(): + # GH 23683 + df = DataFrame( + { + "tag": [1, 1], + "date": [ + pd.Timestamp("2018-01-01", tz="UTC"), + pd.Timestamp("2018-01-02", tz="UTC"), + ], + } + ) + result = df.groupby("tag").agg({"date": lambda e: e.head(1)}) + expected = DataFrame( + [pd.Timestamp("2018-01-01", tz="UTC")], + index=Index([1], name="tag"), + columns=["date"], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "err_cls", + [ + NotImplementedError, + RuntimeError, + KeyError, + IndexError, + OSError, + ValueError, + ArithmeticError, + AttributeError, + ], +) +def test_groupby_agg_err_catching(err_cls): + # make sure we suppress anything other than TypeError or AssertionError + # in _python_agg_general + + # Use a non-standard EA to make sure we don't go down ndarray paths + from pandas.tests.extension.decimal.array import ( + DecimalArray, + make_data, + to_decimal, + ) + + data = make_data()[:5] + df = DataFrame( + {"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)} + ) + + expected = Series(to_decimal([data[0], data[3]])) + + def weird_func(x): + # weird function that raise something other than TypeError or IndexError + # in _python_agg_general + if len(x) == 0: + raise err_cls + return x.iloc[0] + + result = df["decimals"].groupby(df["id1"]).agg(weird_func) + tm.assert_series_equal(result, expected, check_names=False) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/conftest.py new file mode 100644 index 0000000..2be680d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/conftest.py @@ -0,0 +1,191 @@ +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm +from pandas.core.groupby.base import ( + reduction_kernels, + transformation_kernels, +) + + +@pytest.fixture(params=[True, False]) +def sort(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def as_index(request): + return request.param + + +@pytest.fixture +def mframe(multiindex_dataframe_random_data): + return multiindex_dataframe_random_data + + +@pytest.fixture +def df(): + return DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + + +@pytest.fixture +def ts(): + return tm.makeTimeSeries() + + +@pytest.fixture +def tsd(): + return tm.getTimeSeriesData() + + +@pytest.fixture +def tsframe(tsd): + return DataFrame(tsd) + + +@pytest.fixture +def df_mixed_floats(): + return DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.array(np.random.randn(8), dtype="float32"), + } + ) + + +@pytest.fixture +def three_group(): + return DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + +@pytest.fixture() +def slice_test_df(): + data = [ + [0, "a", "a0_at_0"], + [1, "b", "b0_at_1"], + [2, "a", "a1_at_2"], + [3, "b", "b1_at_3"], + [4, "c", "c0_at_4"], + [5, "a", "a2_at_5"], + [6, "a", "a3_at_6"], + [7, "a", "a4_at_7"], + ] + df = DataFrame(data, columns=["Index", "Group", "Value"]) + return df.set_index("Index") + + +@pytest.fixture() +def slice_test_grouped(slice_test_df): + return slice_test_df.groupby("Group", as_index=False) + + +@pytest.fixture(params=sorted(reduction_kernels)) +def reduction_func(request): + """ + yields the string names of all groupby reduction functions, one at a time. + """ + return request.param + + +@pytest.fixture(params=sorted(transformation_kernels)) +def transformation_func(request): + """yields the string names of all groupby transformation functions.""" + return request.param + + +@pytest.fixture(params=sorted(reduction_kernels) + sorted(transformation_kernels)) +def groupby_func(request): + """yields both aggregation and transformation functions.""" + return request.param + + +@pytest.fixture(params=[True, False]) +def parallel(request): + """parallel keyword argument for numba.jit""" + return request.param + + +# Can parameterize nogil & nopython over True | False, but limiting per +# https://github.com/pandas-dev/pandas/pull/41971#issuecomment-860607472 + + +@pytest.fixture(params=[False]) +def nogil(request): + """nogil keyword argument for numba.jit""" + return request.param + + +@pytest.fixture(params=[True]) +def nopython(request): + """nopython keyword argument for numba.jit""" + return request.param + + +@pytest.fixture( + params=[ + ("mean", {}), + ("var", {"ddof": 1}), + ("var", {"ddof": 0}), + ("std", {"ddof": 1}), + ("std", {"ddof": 0}), + ("sum", {}), + ] +) +def numba_supported_reductions(request): + """reductions supported with engine='numba'""" + return request.param diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_allowlist.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_allowlist.py new file mode 100644 index 0000000..44778aa --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_allowlist.py @@ -0,0 +1,450 @@ +""" +test methods relating to generic function evaluation +the so-called white/black lists +""" + +from string import ascii_lowercase + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm +from pandas.core.groupby.base import ( + groupby_other_methods, + reduction_kernels, + transformation_kernels, +) + +AGG_FUNCTIONS = [ + "sum", + "prod", + "min", + "max", + "median", + "mean", + "skew", + "mad", + "std", + "var", + "sem", +] +AGG_FUNCTIONS_WITH_SKIPNA = ["skew", "mad"] + +df_allowlist = [ + "quantile", + "fillna", + "mad", + "take", + "idxmax", + "idxmin", + "tshift", + "skew", + "plot", + "hist", + "dtypes", + "corrwith", + "corr", + "cov", + "diff", +] + + +@pytest.fixture(params=df_allowlist) +def df_allowlist_fixture(request): + return request.param + + +s_allowlist = [ + "quantile", + "fillna", + "mad", + "take", + "idxmax", + "idxmin", + "tshift", + "skew", + "plot", + "hist", + "dtype", + "corr", + "cov", + "diff", + "unique", + "nlargest", + "nsmallest", + "is_monotonic_increasing", + "is_monotonic_decreasing", +] + + +@pytest.fixture(params=s_allowlist) +def s_allowlist_fixture(request): + return request.param + + +@pytest.fixture +def df(): + return DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + + +@pytest.fixture +def df_letters(): + letters = np.array(list(ascii_lowercase)) + N = 10 + random_letters = letters.take(np.random.randint(0, 26, N)) + df = DataFrame( + { + "floats": N / 10 * Series(np.random.random(N)), + "letters": Series(random_letters), + } + ) + return df + + +@pytest.mark.parametrize("allowlist", [df_allowlist, s_allowlist]) +def test_groupby_allowlist(df_letters, allowlist): + df = df_letters + if allowlist == df_allowlist: + # dataframe + obj = df_letters + else: + obj = df_letters["floats"] + + gb = obj.groupby(df.letters) + + assert set(allowlist) == set(gb._apply_allowlist) + + +def check_allowlist(obj, df, m): + # check the obj for a particular allowlist m + + gb = obj.groupby(df.letters) + + f = getattr(type(gb), m) + + # name + try: + n = f.__name__ + except AttributeError: + return + assert n == m + + # qualname + try: + n = f.__qualname__ + except AttributeError: + return + assert n.endswith(m) + + +def test_groupby_series_allowlist(df_letters, s_allowlist_fixture): + m = s_allowlist_fixture + df = df_letters + check_allowlist(df.letters, df, m) + + +def test_groupby_frame_allowlist(df_letters, df_allowlist_fixture): + m = df_allowlist_fixture + df = df_letters + check_allowlist(df, df, m) + + +@pytest.fixture +def raw_frame(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + df.iloc[1, [1, 2]] = np.nan + df.iloc[7, [0, 1]] = np.nan + return df + + +@pytest.mark.parametrize("op", AGG_FUNCTIONS) +@pytest.mark.parametrize("level", [0, 1]) +@pytest.mark.parametrize("axis", [0, 1]) +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("sort", [True, False]) +def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort): + # GH6944 + # GH 17537 + # explicitly test the allowlist methods + + if axis == 0: + frame = raw_frame + else: + frame = raw_frame.T + + if op in AGG_FUNCTIONS_WITH_SKIPNA: + grouped = frame.groupby(level=level, axis=axis, sort=sort) + result = getattr(grouped, op)(skipna=skipna) + with tm.assert_produces_warning(FutureWarning): + expected = getattr(frame, op)(level=level, axis=axis, skipna=skipna) + if sort: + expected = expected.sort_index(axis=axis, level=level) + tm.assert_frame_equal(result, expected) + else: + grouped = frame.groupby(level=level, axis=axis, sort=sort) + result = getattr(grouped, op)() + with tm.assert_produces_warning(FutureWarning): + expected = getattr(frame, op)(level=level, axis=axis) + if sort: + expected = expected.sort_index(axis=axis, level=level) + tm.assert_frame_equal(result, expected) + + +def test_groupby_blocklist(df_letters): + df = df_letters + s = df_letters.floats + + blocklist = [ + "eval", + "query", + "abs", + "where", + "mask", + "align", + "groupby", + "clip", + "astype", + "at", + "combine", + "consolidate", + "convert_objects", + ] + to_methods = [method for method in dir(df) if method.startswith("to_")] + + blocklist.extend(to_methods) + + for bl in blocklist: + for obj in (df, s): + gb = obj.groupby(df.letters) + + # e.g., to_csv + defined_but_not_allowed = ( + f"(?:^Cannot.+{repr(bl)}.+'{type(gb).__name__}'.+try " + f"using the 'apply' method$)" + ) + + # e.g., query, eval + not_defined = ( + f"(?:^'{type(gb).__name__}' object has no attribute {repr(bl)}$)" + ) + + msg = f"{defined_but_not_allowed}|{not_defined}" + + with pytest.raises(AttributeError, match=msg): + getattr(gb, bl) + + +def test_tab_completion(mframe): + grp = mframe.groupby(level="second") + results = {v for v in dir(grp) if not v.startswith("_")} + expected = { + "A", + "B", + "C", + "agg", + "aggregate", + "apply", + "boxplot", + "filter", + "first", + "get_group", + "groups", + "hist", + "indices", + "last", + "max", + "mean", + "median", + "min", + "ngroups", + "nth", + "ohlc", + "plot", + "prod", + "size", + "std", + "sum", + "transform", + "var", + "sem", + "count", + "nunique", + "head", + "describe", + "cummax", + "quantile", + "rank", + "cumprod", + "tail", + "resample", + "cummin", + "fillna", + "cumsum", + "cumcount", + "ngroup", + "all", + "shift", + "skew", + "take", + "tshift", + "pct_change", + "any", + "mad", + "corr", + "corrwith", + "cov", + "dtypes", + "ndim", + "diff", + "idxmax", + "idxmin", + "ffill", + "bfill", + "pad", + "backfill", + "rolling", + "expanding", + "pipe", + "sample", + "ewm", + "value_counts", + } + assert results == expected + + +def test_groupby_function_rename(mframe): + grp = mframe.groupby(level="second") + for name in ["sum", "prod", "min", "max", "first", "last"]: + f = getattr(grp, name) + assert f.__name__ == name + + +@pytest.mark.parametrize( + "method", + [ + "count", + "corr", + "cummax", + "cummin", + "cumprod", + "describe", + "rank", + "quantile", + "diff", + "shift", + "all", + "any", + "idxmin", + "idxmax", + "ffill", + "bfill", + "pct_change", + ], +) +def test_groupby_selection_with_methods(df, method): + # some methods which require DatetimeIndex + rng = date_range("2014", periods=len(df)) + df.index = rng + + g = df.groupby(["A"])[["C"]] + g_exp = df[["C"]].groupby(df["A"]) + # TODO check groupby with > 1 col ? + + res = getattr(g, method)() + exp = getattr(g_exp, method)() + + # should always be frames! + tm.assert_frame_equal(res, exp) + + +@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") +def test_groupby_selection_tshift_raises(df): + rng = date_range("2014", periods=len(df)) + df.index = rng + + g = df.groupby(["A"])[["C"]] + + # check that the index cache is cleared + with pytest.raises(ValueError, match="Freq was not set in the index"): + # GH#35937 + g.tshift() + + +def test_groupby_selection_other_methods(df): + # some methods which require DatetimeIndex + rng = date_range("2014", periods=len(df)) + df.columns.name = "foo" + df.index = rng + + g = df.groupby(["A"])[["C"]] + g_exp = df[["C"]].groupby(df["A"]) + + # methods which aren't just .foo() + tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0)) + tm.assert_frame_equal(g.dtypes, g_exp.dtypes) + tm.assert_frame_equal(g.apply(lambda x: x.sum()), g_exp.apply(lambda x: x.sum())) + + tm.assert_frame_equal(g.resample("D").mean(), g_exp.resample("D").mean()) + tm.assert_frame_equal(g.resample("D").ohlc(), g_exp.resample("D").ohlc()) + + tm.assert_frame_equal( + g.filter(lambda x: len(x) == 3), g_exp.filter(lambda x: len(x) == 3) + ) + + +def test_all_methods_categorized(mframe): + grp = mframe.groupby(mframe.iloc[:, 0]) + names = {_ for _ in dir(grp) if not _.startswith("_")} - set(mframe.columns) + new_names = set(names) + new_names -= reduction_kernels + new_names -= transformation_kernels + new_names -= groupby_other_methods + + assert not (reduction_kernels & transformation_kernels) + assert not (reduction_kernels & groupby_other_methods) + assert not (transformation_kernels & groupby_other_methods) + + # new public method? + if new_names: + msg = f""" +There are uncatgeorized methods defined on the Grouper class: +{new_names}. + +Was a new method recently added? + +Every public method On Grouper must appear in exactly one the +following three lists defined in pandas.core.groupby.base: +- `reduction_kernels` +- `transformation_kernels` +- `groupby_other_methods` +see the comments in pandas/core/groupby/base.py for guidance on +how to fix this test. + """ + raise AssertionError(msg) + + # removed a public method? + all_categorized = reduction_kernels | transformation_kernels | groupby_other_methods + print(names) + print(all_categorized) + if not (names == all_categorized): + msg = f""" +Some methods which are supposed to be on the Grouper class +are missing: +{all_categorized - names}. + +They're still defined in one of the lists that live in pandas/core/groupby/base.py. +If you removed a method, you should update them +""" + raise AssertionError(msg) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_any_all.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_any_all.py new file mode 100644 index 0000000..3f61a4e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_any_all.py @@ -0,0 +1,190 @@ +import builtins + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + isna, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("agg_func", ["any", "all"]) +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize( + "vals", + [ + ["foo", "bar", "baz"], + ["foo", "", ""], + ["", "", ""], + [1, 2, 3], + [1, 0, 0], + [0, 0, 0], + [1.0, 2.0, 3.0], + [1.0, 0.0, 0.0], + [0.0, 0.0, 0.0], + [True, True, True], + [True, False, False], + [False, False, False], + [np.nan, np.nan, np.nan], + ], +) +def test_groupby_bool_aggs(agg_func, skipna, vals): + df = DataFrame({"key": ["a"] * 3 + ["b"] * 3, "val": vals * 2}) + + # Figure out expectation using Python builtin + exp = getattr(builtins, agg_func)(vals) + + # edge case for missing data with skipna and 'any' + if skipna and all(isna(vals)) and agg_func == "any": + exp = False + + exp_df = DataFrame([exp] * 2, columns=["val"], index=Index(["a", "b"], name="key")) + result = getattr(df.groupby("key"), agg_func)(skipna=skipna) + tm.assert_frame_equal(result, exp_df) + + +def test_any(): + df = DataFrame( + [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]], + columns=["A", "B", "C"], + ) + expected = DataFrame( + [[True, True], [False, True]], columns=["B", "C"], index=[1, 3] + ) + expected.index.name = "A" + result = df.groupby("A").any() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) +def test_bool_aggs_dup_column_labels(bool_agg_func): + # 21668 + df = DataFrame([[True, True]], columns=["a", "a"]) + grp_by = df.groupby([0]) + result = getattr(grp_by, bool_agg_func)() + + expected = df + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize( + "data", + [ + [False, False, False], + [True, True, True], + [pd.NA, pd.NA, pd.NA], + [False, pd.NA, False], + [True, pd.NA, True], + [True, pd.NA, False], + ], +) +def test_masked_kleene_logic(bool_agg_func, skipna, data): + # GH#37506 + ser = Series(data, dtype="boolean") + + # The result should match aggregating on the whole series. Correctness + # there is verified in test_reductions.py::test_any_all_boolean_kleene_logic + expected_data = getattr(ser, bool_agg_func)(skipna=skipna) + expected = Series(expected_data, dtype="boolean") + + result = ser.groupby([0, 0, 0]).agg(bool_agg_func, skipna=skipna) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "dtype1,dtype2,exp_col1,exp_col2", + [ + ( + "float", + "Float64", + np.array([True], dtype=bool), + pd.array([pd.NA], dtype="boolean"), + ), + ( + "Int64", + "float", + pd.array([pd.NA], dtype="boolean"), + np.array([True], dtype=bool), + ), + ( + "Int64", + "Int64", + pd.array([pd.NA], dtype="boolean"), + pd.array([pd.NA], dtype="boolean"), + ), + ( + "Float64", + "boolean", + pd.array([pd.NA], dtype="boolean"), + pd.array([pd.NA], dtype="boolean"), + ), + ], +) +def test_masked_mixed_types(dtype1, dtype2, exp_col1, exp_col2): + # GH#37506 + data = [1.0, np.nan] + df = DataFrame( + {"col1": pd.array(data, dtype=dtype1), "col2": pd.array(data, dtype=dtype2)} + ) + result = df.groupby([1, 1]).agg("all", skipna=False) + + expected = DataFrame({"col1": exp_col1, "col2": exp_col2}, index=[1]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) +@pytest.mark.parametrize("dtype", ["Int64", "Float64", "boolean"]) +@pytest.mark.parametrize("skipna", [True, False]) +def test_masked_bool_aggs_skipna(bool_agg_func, dtype, skipna, frame_or_series): + # GH#40585 + obj = frame_or_series([pd.NA, 1], dtype=dtype) + expected_res = True + if not skipna and bool_agg_func == "all": + expected_res = pd.NA + expected = frame_or_series([expected_res], index=[1], dtype="boolean") + + result = obj.groupby([1, 1]).agg(bool_agg_func, skipna=skipna) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "bool_agg_func,data,expected_res", + [ + ("any", [pd.NA, np.nan], False), + ("any", [pd.NA, 1, np.nan], True), + ("all", [pd.NA, pd.NaT], True), + ("all", [pd.NA, False, pd.NaT], False), + ], +) +def test_object_type_missing_vals(bool_agg_func, data, expected_res, frame_or_series): + # GH#37501 + obj = frame_or_series(data, dtype=object) + result = obj.groupby([1] * len(data)).agg(bool_agg_func) + expected = frame_or_series([expected_res], index=[1], dtype="bool") + tm.assert_equal(result, expected) + + +@pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning") +@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) +def test_object_NA_raises_with_skipna_false(bool_agg_func): + # GH#37501 + ser = Series([pd.NA], dtype=object) + with pytest.raises(TypeError, match="boolean value of NA is ambiguous"): + ser.groupby([1]).agg(bool_agg_func, skipna=False) + + +@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) +def test_empty(frame_or_series, bool_agg_func): + # GH 45231 + kwargs = {"columns": ["a"]} if frame_or_series is DataFrame else {"name": "a"} + obj = frame_or_series(**kwargs, dtype=object) + result = getattr(obj.groupby(obj.index), bool_agg_func)() + expected = frame_or_series(**kwargs, dtype=bool) + tm.assert_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_apply.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_apply.py new file mode 100644 index 0000000..22a4ce3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_apply.py @@ -0,0 +1,1249 @@ +from datetime import ( + date, + datetime, +) +from io import StringIO + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + bdate_range, +) +import pandas._testing as tm +from pandas.core.api import Int64Index + + +def test_apply_issues(): + # GH 5788 + + s = """2011.05.16,00:00,1.40893 +2011.05.16,01:00,1.40760 +2011.05.16,02:00,1.40750 +2011.05.16,03:00,1.40649 +2011.05.17,02:00,1.40893 +2011.05.17,03:00,1.40760 +2011.05.17,04:00,1.40750 +2011.05.17,05:00,1.40649 +2011.05.18,02:00,1.40893 +2011.05.18,03:00,1.40760 +2011.05.18,04:00,1.40750 +2011.05.18,05:00,1.40649""" + + df = pd.read_csv( + StringIO(s), + header=None, + names=["date", "time", "value"], + parse_dates=[["date", "time"]], + ) + df = df.set_index("date_time") + + expected = df.groupby(df.index.date).idxmax() + result = df.groupby(df.index.date).apply(lambda x: x.idxmax()) + tm.assert_frame_equal(result, expected) + + # GH 5789 + # don't auto coerce dates + df = pd.read_csv(StringIO(s), header=None, names=["date", "time", "value"]) + exp_idx = Index( + ["2011.05.16", "2011.05.17", "2011.05.18"], dtype=object, name="date" + ) + expected = Series(["00:00", "02:00", "02:00"], index=exp_idx) + result = df.groupby("date").apply(lambda x: x["time"][x["value"].idxmax()]) + tm.assert_series_equal(result, expected) + + +def test_apply_trivial(): + # GH 20066 + # trivial apply: ignore input and return a constant dataframe. + df = DataFrame( + {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=["key", "data"], + ) + expected = pd.concat([df.iloc[1:], df.iloc[1:]], axis=1, keys=["float64", "object"]) + result = df.groupby([str(x) for x in df.dtypes], axis=1).apply( + lambda x: df.iloc[1:] + ) + + tm.assert_frame_equal(result, expected) + + +def test_apply_trivial_fail(): + # GH 20066 + df = DataFrame( + {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=["key", "data"], + ) + expected = pd.concat([df, df], axis=1, keys=["float64", "object"]) + result = df.groupby([str(x) for x in df.dtypes], axis=1).apply(lambda x: df) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "df, group_names", + [ + (DataFrame({"a": [1, 1, 1, 2, 3], "b": ["a", "a", "a", "b", "c"]}), [1, 2, 3]), + (DataFrame({"a": [0, 0, 1, 1], "b": [0, 1, 0, 1]}), [0, 1]), + (DataFrame({"a": [1]}), [1]), + (DataFrame({"a": [1, 1, 1, 2, 2, 1, 1, 2], "b": range(8)}), [1, 2]), + (DataFrame({"a": [1, 2, 3, 1, 2, 3], "two": [4, 5, 6, 7, 8, 9]}), [1, 2, 3]), + ( + DataFrame( + { + "a": list("aaabbbcccc"), + "B": [3, 4, 3, 6, 5, 2, 1, 9, 5, 4], + "C": [4, 0, 2, 2, 2, 7, 8, 6, 2, 8], + } + ), + ["a", "b", "c"], + ), + (DataFrame([[1, 2, 3], [2, 2, 3]], columns=["a", "b", "c"]), [1, 2]), + ], + ids=[ + "GH2936", + "GH7739 & GH10519", + "GH10519", + "GH2656", + "GH12155", + "GH20084", + "GH21417", + ], +) +def test_group_apply_once_per_group(df, group_names): + # GH2936, GH7739, GH10519, GH2656, GH12155, GH20084, GH21417 + + # This test should ensure that a function is only evaluated + # once per group. Previously the function has been evaluated twice + # on the first group to check if the Cython index slider is safe to use + # This test ensures that the side effect (append to list) is only triggered + # once per group + + names = [] + # cannot parameterize over the functions since they need external + # `names` to detect side effects + + def f_copy(group): + # this takes the fast apply path + names.append(group.name) + return group.copy() + + def f_nocopy(group): + # this takes the slow apply path + names.append(group.name) + return group + + def f_scalar(group): + # GH7739, GH2656 + names.append(group.name) + return 0 + + def f_none(group): + # GH10519, GH12155, GH21417 + names.append(group.name) + return None + + def f_constant_df(group): + # GH2936, GH20084 + names.append(group.name) + return DataFrame({"a": [1], "b": [1]}) + + for func in [f_copy, f_nocopy, f_scalar, f_none, f_constant_df]: + del names[:] + + df.groupby("a").apply(func) + assert names == group_names + + +def test_group_apply_once_per_group2(capsys): + # GH: 31111 + # groupby-apply need to execute len(set(group_by_columns)) times + + expected = 2 # Number of times `apply` should call a function for the current test + + df = DataFrame( + { + "group_by_column": [0, 0, 0, 0, 1, 1, 1, 1], + "test_column": ["0", "2", "4", "6", "8", "10", "12", "14"], + }, + index=["0", "2", "4", "6", "8", "10", "12", "14"], + ) + + df.groupby("group_by_column").apply(lambda df: print("function_called")) + + result = capsys.readouterr().out.count("function_called") + # If `groupby` behaves unexpectedly, this test will break + assert result == expected + + +def test_apply_fast_slow_identical(): + # GH 31613 + + df = DataFrame({"A": [0, 0, 1], "b": range(3)}) + + # For simple index structures we check for fast/slow apply using + # an identity check on in/output + def slow(group): + return group + + def fast(group): + return group.copy() + + fast_df = df.groupby("A").apply(fast) + slow_df = df.groupby("A").apply(slow) + + tm.assert_frame_equal(fast_df, slow_df) + + +@pytest.mark.parametrize( + "func", + [ + lambda x: x, + lambda x: x[:], + lambda x: x.copy(deep=False), + lambda x: x.copy(deep=True), + ], +) +def test_groupby_apply_identity_maybecopy_index_identical(func): + # GH 14927 + # Whether the function returns a copy of the input data or not should not + # have an impact on the index structure of the result since this is not + # transparent to the user + + df = DataFrame({"g": [1, 2, 2, 2], "a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + + result = df.groupby("g").apply(func) + tm.assert_frame_equal(result, df) + + +def test_apply_with_mixed_dtype(): + # GH3480, apply with mixed dtype on axis=1 breaks in 0.11 + df = DataFrame( + { + "foo1": np.random.randn(6), + "foo2": ["one", "two", "two", "three", "one", "two"], + } + ) + result = df.apply(lambda x: x, axis=1).dtypes + expected = df.dtypes + tm.assert_series_equal(result, expected) + + # GH 3610 incorrect dtype conversion with as_index=False + df = DataFrame({"c1": [1, 2, 6, 6, 8]}) + df["c2"] = df.c1 / 2.0 + result1 = df.groupby("c2").mean().reset_index().c2 + result2 = df.groupby("c2", as_index=False).mean().c2 + tm.assert_series_equal(result1, result2) + + +def test_groupby_as_index_apply(df): + # GH #4648 and #3417 + df = DataFrame( + { + "item_id": ["b", "b", "a", "c", "a", "b"], + "user_id": [1, 2, 1, 1, 3, 1], + "time": range(6), + } + ) + + g_as = df.groupby("user_id", as_index=True) + g_not_as = df.groupby("user_id", as_index=False) + + res_as = g_as.head(2).index + res_not_as = g_not_as.head(2).index + exp = Index([0, 1, 2, 4]) + tm.assert_index_equal(res_as, exp) + tm.assert_index_equal(res_not_as, exp) + + res_as_apply = g_as.apply(lambda x: x.head(2)).index + res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index + + # apply doesn't maintain the original ordering + # changed in GH5610 as the as_index=False returns a MI here + exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (2, 4)]) + tp = [(1, 0), (1, 2), (2, 1), (3, 4)] + exp_as_apply = MultiIndex.from_tuples(tp, names=["user_id", None]) + + tm.assert_index_equal(res_as_apply, exp_as_apply) + tm.assert_index_equal(res_not_as_apply, exp_not_as_apply) + + ind = Index(list("abcde")) + df = DataFrame([[1, 2], [2, 3], [1, 4], [1, 5], [2, 6]], index=ind) + res = df.groupby(0, as_index=False).apply(lambda x: x).index + tm.assert_index_equal(res, ind) + + +def test_apply_concat_preserve_names(three_group): + grouped = three_group.groupby(["A", "B"]) + + def desc(group): + result = group.describe() + result.index.name = "stat" + return result + + def desc2(group): + result = group.describe() + result.index.name = "stat" + result = result[: len(group)] + # weirdo + return result + + def desc3(group): + result = group.describe() + + # names are different + result.index.name = f"stat_{len(group):d}" + + result = result[: len(group)] + # weirdo + return result + + result = grouped.apply(desc) + assert result.index.names == ("A", "B", "stat") + + result2 = grouped.apply(desc2) + assert result2.index.names == ("A", "B", "stat") + + result3 = grouped.apply(desc3) + assert result3.index.names == ("A", "B", None) + + +def test_apply_series_to_frame(): + def f(piece): + with np.errstate(invalid="ignore"): + logged = np.log(piece) + return DataFrame( + {"value": piece, "demeaned": piece - piece.mean(), "logged": logged} + ) + + dr = bdate_range("1/1/2000", periods=100) + ts = Series(np.random.randn(100), index=dr) + + grouped = ts.groupby(lambda x: x.month) + result = grouped.apply(f) + + assert isinstance(result, DataFrame) + tm.assert_index_equal(result.index, ts.index) + + +def test_apply_series_yield_constant(df): + result = df.groupby(["A", "B"])["C"].apply(len) + assert result.index.names[:2] == ("A", "B") + + +def test_apply_frame_yield_constant(df): + # GH13568 + result = df.groupby(["A", "B"]).apply(len) + assert isinstance(result, Series) + assert result.name is None + + result = df.groupby(["A", "B"])[["C", "D"]].apply(len) + assert isinstance(result, Series) + assert result.name is None + + +def test_apply_frame_to_series(df): + grouped = df.groupby(["A", "B"]) + result = grouped.apply(len) + expected = grouped.count()["C"] + tm.assert_index_equal(result.index, expected.index) + tm.assert_numpy_array_equal(result.values, expected.values) + + +def test_apply_frame_not_as_index_column_name(df): + # GH 35964 - path within _wrap_applied_output not hit by a test + grouped = df.groupby(["A", "B"], as_index=False) + result = grouped.apply(len) + expected = grouped.count().rename(columns={"C": np.nan}).drop(columns="D") + # TODO(GH#34306): Use assert_frame_equal when column name is not np.nan + tm.assert_index_equal(result.index, expected.index) + tm.assert_numpy_array_equal(result.values, expected.values) + + +def test_apply_frame_concat_series(): + def trans(group): + return group.groupby("B")["C"].sum().sort_values()[:2] + + def trans2(group): + grouped = group.groupby(df.reindex(group.index)["B"]) + return grouped.sum().sort_values()[:2] + + df = DataFrame( + { + "A": np.random.randint(0, 5, 1000), + "B": np.random.randint(0, 5, 1000), + "C": np.random.randn(1000), + } + ) + + result = df.groupby("A").apply(trans) + exp = df.groupby("A")["C"].apply(trans2) + tm.assert_series_equal(result, exp, check_names=False) + assert result.name == "C" + + +def test_apply_transform(ts): + grouped = ts.groupby(lambda x: x.month) + result = grouped.apply(lambda x: x * 2) + expected = grouped.transform(lambda x: x * 2) + tm.assert_series_equal(result, expected) + + +def test_apply_multikey_corner(tsframe): + grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) + + def f(group): + return group.sort_values("A")[-5:] + + result = grouped.apply(f) + for key, group in grouped: + tm.assert_frame_equal(result.loc[key], f(group)) + + +def test_apply_chunk_view(): + # Low level tinkering could be unsafe, make sure not + df = DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)}) + + result = df.groupby("key", group_keys=False).apply(lambda x: x[:2]) + expected = df.take([0, 1, 3, 4, 6, 7]) + tm.assert_frame_equal(result, expected) + + +def test_apply_no_name_column_conflict(): + df = DataFrame( + { + "name": [1, 1, 1, 1, 1, 1, 2, 2, 2, 2], + "name2": [0, 0, 0, 1, 1, 1, 0, 0, 1, 1], + "value": range(9, -1, -1), + } + ) + + # it works! #2605 + grouped = df.groupby(["name", "name2"]) + grouped.apply(lambda x: x.sort_values("value", inplace=True)) + + +def test_apply_typecast_fail(): + df = DataFrame( + { + "d": [1.0, 1.0, 1.0, 2.0, 2.0, 2.0], + "c": np.tile(["a", "b", "c"], 2), + "v": np.arange(1.0, 7.0), + } + ) + + def f(group): + v = group["v"] + group["v2"] = (v - v.min()) / (v.max() - v.min()) + return group + + result = df.groupby("d").apply(f) + + expected = df.copy() + expected["v2"] = np.tile([0.0, 0.5, 1], 2) + + tm.assert_frame_equal(result, expected) + + +def test_apply_multiindex_fail(): + index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]]) + df = DataFrame( + { + "d": [1.0, 1.0, 1.0, 2.0, 2.0, 2.0], + "c": np.tile(["a", "b", "c"], 2), + "v": np.arange(1.0, 7.0), + }, + index=index, + ) + + def f(group): + v = group["v"] + group["v2"] = (v - v.min()) / (v.max() - v.min()) + return group + + result = df.groupby("d").apply(f) + + expected = df.copy() + expected["v2"] = np.tile([0.0, 0.5, 1], 2) + + tm.assert_frame_equal(result, expected) + + +def test_apply_corner(tsframe): + result = tsframe.groupby(lambda x: x.year).apply(lambda x: x * 2) + expected = tsframe * 2 + tm.assert_frame_equal(result, expected) + + +def test_apply_without_copy(): + # GH 5545 + # returning a non-copy in an applied function fails + + data = DataFrame( + { + "id_field": [100, 100, 200, 300], + "category": ["a", "b", "c", "c"], + "value": [1, 2, 3, 4], + } + ) + + def filt1(x): + if x.shape[0] == 1: + return x.copy() + else: + return x[x.category == "c"] + + def filt2(x): + if x.shape[0] == 1: + return x + else: + return x[x.category == "c"] + + expected = data.groupby("id_field").apply(filt1) + result = data.groupby("id_field").apply(filt2) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("test_series", [True, False]) +def test_apply_with_duplicated_non_sorted_axis(test_series): + # GH 30667 + df = DataFrame( + [["x", "p"], ["x", "p"], ["x", "o"]], columns=["X", "Y"], index=[1, 2, 2] + ) + if test_series: + ser = df.set_index("Y")["X"] + result = ser.groupby(level=0).apply(lambda x: x) + + # not expecting the order to remain the same for duplicated axis + result = result.sort_index() + expected = ser.sort_index() + tm.assert_series_equal(result, expected) + else: + result = df.groupby("Y").apply(lambda x: x) + + # not expecting the order to remain the same for duplicated axis + result = result.sort_values("Y") + expected = df.sort_values("Y") + tm.assert_frame_equal(result, expected) + + +def test_apply_reindex_values(): + # GH: 26209 + # reindexing from a single column of a groupby object with duplicate indices caused + # a ValueError (cannot reindex from duplicate axis) in 0.24.2, the problem was + # solved in #30679 + values = [1, 2, 3, 4] + indices = [1, 1, 2, 2] + df = DataFrame({"group": ["Group1", "Group2"] * 2, "value": values}, index=indices) + expected = Series(values, index=indices, name="value") + + def reindex_helper(x): + return x.reindex(np.arange(x.index.min(), x.index.max() + 1)) + + # the following group by raised a ValueError + result = df.groupby("group").value.apply(reindex_helper) + tm.assert_series_equal(expected, result) + + +def test_apply_corner_cases(): + # #535, can't use sliding iterator + + N = 1000 + labels = np.random.randint(0, 100, size=N) + df = DataFrame( + { + "key": labels, + "value1": np.random.randn(N), + "value2": ["foo", "bar", "baz", "qux"] * (N // 4), + } + ) + + grouped = df.groupby("key") + + def f(g): + g["value3"] = g["value1"] * 2 + return g + + result = grouped.apply(f) + assert "value3" in result + + +def test_apply_numeric_coercion_when_datetime(): + # In the past, group-by/apply operations have been over-eager + # in converting dtypes to numeric, in the presence of datetime + # columns. Various GH issues were filed, the reproductions + # for which are here. + + # GH 15670 + df = DataFrame( + {"Number": [1, 2], "Date": ["2017-03-02"] * 2, "Str": ["foo", "inf"]} + ) + expected = df.groupby(["Number"]).apply(lambda x: x.iloc[0]) + df.Date = pd.to_datetime(df.Date) + result = df.groupby(["Number"]).apply(lambda x: x.iloc[0]) + tm.assert_series_equal(result["Str"], expected["Str"]) + + # GH 15421 + df = DataFrame( + {"A": [10, 20, 30], "B": ["foo", "3", "4"], "T": [pd.Timestamp("12:31:22")] * 3} + ) + + def get_B(g): + return g.iloc[0][["B"]] + + result = df.groupby("A").apply(get_B)["B"] + expected = df.B + expected.index = df.A + tm.assert_series_equal(result, expected) + + # GH 14423 + def predictions(tool): + out = Series(index=["p1", "p2", "useTime"], dtype=object) + if "step1" in list(tool.State): + out["p1"] = str(tool[tool.State == "step1"].Machine.values[0]) + if "step2" in list(tool.State): + out["p2"] = str(tool[tool.State == "step2"].Machine.values[0]) + out["useTime"] = str(tool[tool.State == "step2"].oTime.values[0]) + return out + + df1 = DataFrame( + { + "Key": ["B", "B", "A", "A"], + "State": ["step1", "step2", "step1", "step2"], + "oTime": ["", "2016-09-19 05:24:33", "", "2016-09-19 23:59:04"], + "Machine": ["23", "36L", "36R", "36R"], + } + ) + df2 = df1.copy() + df2.oTime = pd.to_datetime(df2.oTime) + expected = df1.groupby("Key").apply(predictions).p1 + result = df2.groupby("Key").apply(predictions).p1 + tm.assert_series_equal(expected, result) + + +def test_apply_aggregating_timedelta_and_datetime(): + # Regression test for GH 15562 + # The following groupby caused ValueErrors and IndexErrors pre 0.20.0 + + df = DataFrame( + { + "clientid": ["A", "B", "C"], + "datetime": [np.datetime64("2017-02-01 00:00:00")] * 3, + } + ) + df["time_delta_zero"] = df.datetime - df.datetime + result = df.groupby("clientid").apply( + lambda ddf: Series( + {"clientid_age": ddf.time_delta_zero.min(), "date": ddf.datetime.min()} + ) + ) + expected = DataFrame( + { + "clientid": ["A", "B", "C"], + "clientid_age": [np.timedelta64(0, "D")] * 3, + "date": [np.datetime64("2017-02-01 00:00:00")] * 3, + } + ).set_index("clientid") + + tm.assert_frame_equal(result, expected) + + +def test_apply_groupby_datetimeindex(): + # GH 26182 + # groupby apply failed on dataframe with DatetimeIndex + + data = [["A", 10], ["B", 20], ["B", 30], ["C", 40], ["C", 50]] + df = DataFrame( + data, columns=["Name", "Value"], index=pd.date_range("2020-09-01", "2020-09-05") + ) + + result = df.groupby("Name").sum() + + expected = DataFrame({"Name": ["A", "B", "C"], "Value": [10, 50, 90]}) + expected.set_index("Name", inplace=True) + + tm.assert_frame_equal(result, expected) + + +def test_time_field_bug(): + # Test a fix for the following error related to GH issue 11324 When + # non-key fields in a group-by dataframe contained time-based fields + # that were not returned by the apply function, an exception would be + # raised. + + df = DataFrame({"a": 1, "b": [datetime.now() for nn in range(10)]}) + + def func_with_no_date(batch): + return Series({"c": 2}) + + def func_with_date(batch): + return Series({"b": datetime(2015, 1, 1), "c": 2}) + + dfg_no_conversion = df.groupby(by=["a"]).apply(func_with_no_date) + dfg_no_conversion_expected = DataFrame({"c": 2}, index=[1]) + dfg_no_conversion_expected.index.name = "a" + + dfg_conversion = df.groupby(by=["a"]).apply(func_with_date) + dfg_conversion_expected = DataFrame({"b": datetime(2015, 1, 1), "c": 2}, index=[1]) + dfg_conversion_expected.index.name = "a" + + tm.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected) + tm.assert_frame_equal(dfg_conversion, dfg_conversion_expected) + + +def test_gb_apply_list_of_unequal_len_arrays(): + + # GH1738 + df = DataFrame( + { + "group1": ["a", "a", "a", "b", "b", "b", "a", "a", "a", "b", "b", "b"], + "group2": ["c", "c", "d", "d", "d", "e", "c", "c", "d", "d", "d", "e"], + "weight": [1.1, 2, 3, 4, 5, 6, 2, 4, 6, 8, 1, 2], + "value": [7.1, 8, 9, 10, 11, 12, 8, 7, 6, 5, 4, 3], + } + ) + df = df.set_index(["group1", "group2"]) + df_grouped = df.groupby(level=["group1", "group2"], sort=True) + + def noddy(value, weight): + out = np.array(value * weight).repeat(3) + return out + + # the kernel function returns arrays of unequal length + # pandas sniffs the first one, sees it's an array and not + # a list, and assumed the rest are of equal length + # and so tries a vstack + + # don't die + df_grouped.apply(lambda x: noddy(x.value, x.weight)) + + +def test_groupby_apply_all_none(): + # Tests to make sure no errors if apply function returns all None + # values. Issue 9684. + test_df = DataFrame({"groups": [0, 0, 1, 1], "random_vars": [8, 7, 4, 5]}) + + def test_func(x): + pass + + result = test_df.groupby("groups").apply(test_func) + expected = DataFrame() + tm.assert_frame_equal(result, expected) + + +def test_groupby_apply_none_first(): + # GH 12824. Tests if apply returns None first. + test_df1 = DataFrame({"groups": [1, 1, 1, 2], "vars": [0, 1, 2, 3]}) + test_df2 = DataFrame({"groups": [1, 2, 2, 2], "vars": [0, 1, 2, 3]}) + + def test_func(x): + if x.shape[0] < 2: + return None + return x.iloc[[0, -1]] + + result1 = test_df1.groupby("groups").apply(test_func) + result2 = test_df2.groupby("groups").apply(test_func) + index1 = MultiIndex.from_arrays([[1, 1], [0, 2]], names=["groups", None]) + index2 = MultiIndex.from_arrays([[2, 2], [1, 3]], names=["groups", None]) + expected1 = DataFrame({"groups": [1, 1], "vars": [0, 2]}, index=index1) + expected2 = DataFrame({"groups": [2, 2], "vars": [1, 3]}, index=index2) + tm.assert_frame_equal(result1, expected1) + tm.assert_frame_equal(result2, expected2) + + +def test_groupby_apply_return_empty_chunk(): + # GH 22221: apply filter which returns some empty groups + df = DataFrame({"value": [0, 1], "group": ["filled", "empty"]}) + groups = df.groupby("group") + result = groups.apply(lambda group: group[group.value != 1]["value"]) + expected = Series( + [0], + name="value", + index=MultiIndex.from_product( + [["empty", "filled"], [0]], names=["group", None] + ).drop("empty"), + ) + tm.assert_series_equal(result, expected) + + +def test_apply_with_mixed_types(): + # gh-20949 + df = DataFrame({"A": "a a b".split(), "B": [1, 2, 3], "C": [4, 6, 5]}) + g = df.groupby("A") + + result = g.transform(lambda x: x / x.sum()) + expected = DataFrame({"B": [1 / 3.0, 2 / 3.0, 1], "C": [0.4, 0.6, 1.0]}) + tm.assert_frame_equal(result, expected) + + result = g.apply(lambda x: x / x.sum()) + tm.assert_frame_equal(result, expected) + + +def test_func_returns_object(): + # GH 28652 + df = DataFrame({"a": [1, 2]}, index=Int64Index([1, 2])) + result = df.groupby("a").apply(lambda g: g.index) + expected = Series( + [Int64Index([1]), Int64Index([2])], index=Int64Index([1, 2], name="a") + ) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "group_column_dtlike", + [datetime.today(), datetime.today().date(), datetime.today().time()], +) +def test_apply_datetime_issue(group_column_dtlike): + # GH-28247 + # groupby-apply throws an error if one of the columns in the DataFrame + # is a datetime object and the column labels are different from + # standard int values in range(len(num_columns)) + + df = DataFrame({"a": ["foo"], "b": [group_column_dtlike]}) + result = df.groupby("a").apply(lambda x: Series(["spam"], index=[42])) + + expected = DataFrame( + ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42] + ) + tm.assert_frame_equal(result, expected) + + +def test_apply_series_return_dataframe_groups(): + # GH 10078 + tdf = DataFrame( + { + "day": { + 0: pd.Timestamp("2015-02-24 00:00:00"), + 1: pd.Timestamp("2015-02-24 00:00:00"), + 2: pd.Timestamp("2015-02-24 00:00:00"), + 3: pd.Timestamp("2015-02-24 00:00:00"), + 4: pd.Timestamp("2015-02-24 00:00:00"), + }, + "userAgent": { + 0: "some UA string", + 1: "some UA string", + 2: "some UA string", + 3: "another UA string", + 4: "some UA string", + }, + "userId": { + 0: "17661101", + 1: "17661101", + 2: "17661101", + 3: "17661101", + 4: "17661101", + }, + } + ) + + def most_common_values(df): + return Series({c: s.value_counts().index[0] for c, s in df.iteritems()}) + + result = tdf.groupby("day").apply(most_common_values)["userId"] + expected = Series( + ["17661101"], index=pd.DatetimeIndex(["2015-02-24"], name="day"), name="userId" + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("category", [False, True]) +def test_apply_multi_level_name(category): + # https://github.com/pandas-dev/pandas/issues/31068 + b = [1, 2] * 5 + if category: + b = pd.Categorical(b, categories=[1, 2, 3]) + expected_index = pd.CategoricalIndex([1, 2], categories=[1, 2, 3], name="B") + else: + expected_index = Index([1, 2], name="B") + df = DataFrame( + {"A": np.arange(10), "B": b, "C": list(range(10)), "D": list(range(10))} + ).set_index(["A", "B"]) + result = df.groupby("B").apply(lambda x: x.sum()) + expected = DataFrame({"C": [20, 25], "D": [20, 25]}, index=expected_index) + tm.assert_frame_equal(result, expected) + assert df.index.names == ["A", "B"] + + +def test_groupby_apply_datetime_result_dtypes(): + # GH 14849 + data = DataFrame.from_records( + [ + (pd.Timestamp(2016, 1, 1), "red", "dark", 1, "8"), + (pd.Timestamp(2015, 1, 1), "green", "stormy", 2, "9"), + (pd.Timestamp(2014, 1, 1), "blue", "bright", 3, "10"), + (pd.Timestamp(2013, 1, 1), "blue", "calm", 4, "potato"), + ], + columns=["observation", "color", "mood", "intensity", "score"], + ) + result = data.groupby("color").apply(lambda g: g.iloc[0]).dtypes + expected = Series( + [np.dtype("datetime64[ns]"), object, object, np.int64, object], + index=["observation", "color", "mood", "intensity", "score"], + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "index", + [ + pd.CategoricalIndex(list("abc")), + pd.interval_range(0, 3), + pd.period_range("2020", periods=3, freq="D"), + MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]), + ], +) +def test_apply_index_has_complex_internals(index): + # GH 31248 + df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index) + result = df.groupby("group").apply(lambda x: x) + tm.assert_frame_equal(result, df) + + +@pytest.mark.parametrize( + "function, expected_values", + [ + (lambda x: x.index.to_list(), [[0, 1], [2, 3]]), + (lambda x: set(x.index.to_list()), [{0, 1}, {2, 3}]), + (lambda x: tuple(x.index.to_list()), [(0, 1), (2, 3)]), + ( + lambda x: {n: i for (n, i) in enumerate(x.index.to_list())}, + [{0: 0, 1: 1}, {0: 2, 1: 3}], + ), + ( + lambda x: [{n: i} for (n, i) in enumerate(x.index.to_list())], + [[{0: 0}, {1: 1}], [{0: 2}, {1: 3}]], + ), + ], +) +def test_apply_function_returns_non_pandas_non_scalar(function, expected_values): + # GH 31441 + df = DataFrame(["A", "A", "B", "B"], columns=["groups"]) + result = df.groupby("groups").apply(function) + expected = Series(expected_values, index=Index(["A", "B"], name="groups")) + tm.assert_series_equal(result, expected) + + +def test_apply_function_returns_numpy_array(): + # GH 31605 + def fct(group): + return group["B"].values.flatten() + + df = DataFrame({"A": ["a", "a", "b", "none"], "B": [1, 2, 3, np.nan]}) + + result = df.groupby("A").apply(fct) + expected = Series( + [[1.0, 2.0], [3.0], [np.nan]], index=Index(["a", "b", "none"], name="A") + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("function", [lambda gr: gr.index, lambda gr: gr.index + 1 - 1]) +def test_apply_function_index_return(function): + # GH: 22541 + df = DataFrame([1, 2, 2, 2, 1, 2, 3, 1, 3, 1], columns=["id"]) + result = df.groupby("id").apply(function) + expected = Series( + [Index([0, 4, 7, 9]), Index([1, 2, 3, 5]), Index([6, 8])], + index=Index([1, 2, 3], name="id"), + ) + tm.assert_series_equal(result, expected) + + +def test_apply_function_with_indexing_return_column(): + # GH: 7002 + df = DataFrame( + { + "foo1": ["one", "two", "two", "three", "one", "two"], + "foo2": [1, 2, 4, 4, 5, 6], + } + ) + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) + expected = DataFrame({"foo1": ["one", "three", "two"], "foo2": [3.0, 4.0, 4.0]}) + tm.assert_frame_equal(result, expected) + + +def test_apply_with_timezones_aware(): + # GH: 27212 + dates = ["2001-01-01"] * 2 + ["2001-01-02"] * 2 + ["2001-01-03"] * 2 + index_no_tz = pd.DatetimeIndex(dates) + index_tz = pd.DatetimeIndex(dates, tz="UTC") + df1 = DataFrame({"x": list(range(2)) * 3, "y": range(6), "t": index_no_tz}) + df2 = DataFrame({"x": list(range(2)) * 3, "y": range(6), "t": index_tz}) + + result1 = df1.groupby("x", group_keys=False).apply(lambda df: df[["x", "y"]].copy()) + result2 = df2.groupby("x", group_keys=False).apply(lambda df: df[["x", "y"]].copy()) + + tm.assert_frame_equal(result1, result2) + + +def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func): + # GH #34656 + # GH #34271 + df = DataFrame( + { + "a": [99, 99, 99, 88, 88, 88], + "b": [1, 2, 3, 4, 5, 6], + "c": [10, 20, 30, 40, 50, 60], + } + ) + + expected = DataFrame( + {"a": [264, 297], "b": [15, 6], "c": [150, 60]}, + index=Index([88, 99], name="a"), + ) + + # Check output when no other methods are called before .apply() + grp = df.groupby(by="a") + result = grp.apply(sum) + tm.assert_frame_equal(result, expected) + + # Check output when another method is called before .apply() + grp = df.groupby(by="a") + args = {"nth": [0], "corrwith": [df]}.get(reduction_func, []) + _ = getattr(grp, reduction_func)(*args) + result = grp.apply(sum) + tm.assert_frame_equal(result, expected) + + +def test_apply_with_date_in_multiindex_does_not_convert_to_timestamp(): + # GH 29617 + + df = DataFrame( + { + "A": ["a", "a", "a", "b"], + "B": [ + date(2020, 1, 10), + date(2020, 1, 10), + date(2020, 2, 10), + date(2020, 2, 10), + ], + "C": [1, 2, 3, 4], + }, + index=Index([100, 101, 102, 103], name="idx"), + ) + + grp = df.groupby(["A", "B"]) + result = grp.apply(lambda x: x.head(1)) + + expected = df.iloc[[0, 2, 3]] + expected = expected.reset_index() + expected.index = MultiIndex.from_frame(expected[["A", "B", "idx"]]) + expected = expected.drop(columns="idx") + + tm.assert_frame_equal(result, expected) + for val in result.index.levels[1]: + assert type(val) is date + + +def test_apply_by_cols_equals_apply_by_rows_transposed(): + # GH 16646 + # Operating on the columns, or transposing and operating on the rows + # should give the same result. There was previously a bug where the + # by_rows operation would work fine, but by_cols would throw a ValueError + + df = DataFrame( + np.random.random([6, 4]), + columns=MultiIndex.from_product([["A", "B"], [1, 2]]), + ) + + by_rows = df.T.groupby(axis=0, level=0).apply( + lambda x: x.droplevel(axis=0, level=0) + ) + by_cols = df.groupby(axis=1, level=0).apply(lambda x: x.droplevel(axis=1, level=0)) + + tm.assert_frame_equal(by_cols, by_rows.T) + tm.assert_frame_equal(by_cols, df) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_apply_dropna_with_indexed_same(dropna): + # GH 38227 + # GH#43205 + df = DataFrame( + { + "col": [1, 2, 3, 4, 5], + "group": ["a", np.nan, np.nan, "b", "b"], + }, + index=list("xxyxz"), + ) + result = df.groupby("group", dropna=dropna).apply(lambda x: x) + expected = df.dropna() if dropna else df.iloc[[0, 3, 1, 2, 4]] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "as_index, expected", + [ + [ + False, + DataFrame( + [[1, 1, 1], [2, 2, 1]], columns=Index(["a", "b", None], dtype=object) + ), + ], + [ + True, + Series( + [1, 1], index=MultiIndex.from_tuples([(1, 1), (2, 2)], names=["a", "b"]) + ), + ], + ], +) +def test_apply_as_index_constant_lambda(as_index, expected): + # GH 13217 + df = DataFrame({"a": [1, 1, 2, 2], "b": [1, 1, 2, 2], "c": [1, 1, 1, 1]}) + result = df.groupby(["a", "b"], as_index=as_index).apply(lambda x: 1) + tm.assert_equal(result, expected) + + +def test_sort_index_groups(): + # GH 20420 + df = DataFrame( + {"A": [1, 2, 3, 4, 5], "B": [6, 7, 8, 9, 0], "C": [1, 1, 1, 2, 2]}, + index=range(5), + ) + result = df.groupby("C").apply(lambda x: x.A.sort_index()) + expected = Series( + range(1, 6), + index=MultiIndex.from_tuples( + [(1, 0), (1, 1), (1, 2), (2, 3), (2, 4)], names=["C", None] + ), + name="A", + ) + tm.assert_series_equal(result, expected) + + +def test_positional_slice_groups_datetimelike(): + # GH 21651 + expected = DataFrame( + { + "date": pd.date_range("2010-01-01", freq="12H", periods=5), + "vals": range(5), + "let": list("abcde"), + } + ) + result = expected.groupby([expected.let, expected.date.dt.date]).apply( + lambda x: x.iloc[0:] + ) + tm.assert_frame_equal(result, expected) + + +def test_groupby_apply_shape_cache_safety(): + # GH#42702 this fails if we cache_readonly Block.shape + df = DataFrame({"A": ["a", "a", "b"], "B": [1, 2, 3], "C": [4, 6, 5]}) + gb = df.groupby("A") + result = gb[["B", "C"]].apply(lambda x: x.astype(float).max() - x.min()) + + expected = DataFrame( + {"B": [1.0, 0.0], "C": [2.0, 0.0]}, index=Index(["a", "b"], name="A") + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_apply_na(dropna): + # GH#28984 + df = DataFrame( + {"grp": [1, 1, 2, 2], "y": [1, 0, 2, 5], "z": [1, 2, np.nan, np.nan]} + ) + dfgrp = df.groupby("grp", dropna=dropna) + result = dfgrp.apply(lambda grp_df: grp_df.nlargest(1, "z")) + expected = dfgrp.apply(lambda x: x.sort_values("z", ascending=False).head(1)) + tm.assert_frame_equal(result, expected) + + +def test_apply_empty_string_nan_coerce_bug(): + # GH#24903 + result = ( + DataFrame( + { + "a": [1, 1, 2, 2], + "b": ["", "", "", ""], + "c": pd.to_datetime([1, 2, 3, 4], unit="s"), + } + ) + .groupby(["a", "b"]) + .apply(lambda df: df.iloc[-1]) + ) + expected = DataFrame( + [[1, "", pd.to_datetime(2, unit="s")], [2, "", pd.to_datetime(4, unit="s")]], + columns=["a", "b", "c"], + index=MultiIndex.from_tuples([(1, ""), (2, "")], names=["a", "b"]), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("index_values", [[1, 2, 3], [1.0, 2.0, 3.0]]) +def test_apply_index_key_error_bug(index_values): + # GH 44310 + result = DataFrame( + { + "a": ["aa", "a2", "a3"], + "b": [1, 2, 3], + }, + index=Index(index_values), + ) + expected = DataFrame( + { + "b_mean": [2.0, 3.0, 1.0], + }, + index=Index(["a2", "a3", "aa"], name="a"), + ) + result = result.groupby("a").apply( + lambda df: Series([df["b"].mean()], index=["b_mean"]) + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "arg,idx", + [ + [ + [ + 1, + 2, + 3, + ], + [ + 0.1, + 0.3, + 0.2, + ], + ], + [ + [ + 1, + 2, + 3, + ], + [ + 0.1, + 0.2, + 0.3, + ], + ], + [ + [ + 1, + 4, + 3, + ], + [ + 0.1, + 0.4, + 0.2, + ], + ], + ], +) +def test_apply_nonmonotonic_float_index(arg, idx): + # GH 34455 + expected = DataFrame({"col": arg}, index=idx) + result = expected.groupby("col").apply(lambda x: x) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_apply_mutate.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_apply_mutate.py new file mode 100644 index 0000000..05c1f5b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_apply_mutate.py @@ -0,0 +1,130 @@ +import numpy as np + +import pandas as pd +import pandas._testing as tm + + +def test_mutate_groups(): + + # GH3380 + + df = pd.DataFrame( + { + "cat1": ["a"] * 8 + ["b"] * 6, + "cat2": ["c"] * 2 + + ["d"] * 2 + + ["e"] * 2 + + ["f"] * 2 + + ["c"] * 2 + + ["d"] * 2 + + ["e"] * 2, + "cat3": [f"g{x}" for x in range(1, 15)], + "val": np.random.randint(100, size=14), + } + ) + + def f_copy(x): + x = x.copy() + x["rank"] = x.val.rank(method="min") + return x.groupby("cat2")["rank"].min() + + def f_no_copy(x): + x["rank"] = x.val.rank(method="min") + return x.groupby("cat2")["rank"].min() + + grpby_copy = df.groupby("cat1").apply(f_copy) + grpby_no_copy = df.groupby("cat1").apply(f_no_copy) + tm.assert_series_equal(grpby_copy, grpby_no_copy) + + +def test_no_mutate_but_looks_like(): + + # GH 8467 + # first show's mutation indicator + # second does not, but should yield the same results + df = pd.DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)}) + + result1 = df.groupby("key", group_keys=True).apply(lambda x: x[:].key) + result2 = df.groupby("key", group_keys=True).apply(lambda x: x.key) + tm.assert_series_equal(result1, result2) + + +def test_apply_function_with_indexing(): + # GH: 33058 + df = pd.DataFrame( + {"col1": ["A", "A", "A", "B", "B", "B"], "col2": [1, 2, 3, 4, 5, 6]} + ) + + def fn(x): + x.col2[x.index[-1]] = 0 + return x.col2 + + result = df.groupby(["col1"], as_index=False).apply(fn) + expected = pd.Series( + [1, 2, 0, 4, 5, 0], + index=pd.MultiIndex.from_tuples( + [(0, 0), (0, 1), (0, 2), (1, 3), (1, 4), (1, 5)] + ), + name="col2", + ) + tm.assert_series_equal(result, expected) + + +def test_apply_mutate_columns_multiindex(): + # GH 12652 + df = pd.DataFrame( + { + ("C", "julian"): [1, 2, 3], + ("B", "geoffrey"): [1, 2, 3], + ("A", "julian"): [1, 2, 3], + ("B", "julian"): [1, 2, 3], + ("A", "geoffrey"): [1, 2, 3], + ("C", "geoffrey"): [1, 2, 3], + }, + columns=pd.MultiIndex.from_tuples( + [ + ("A", "julian"), + ("A", "geoffrey"), + ("B", "julian"), + ("B", "geoffrey"), + ("C", "julian"), + ("C", "geoffrey"), + ] + ), + ) + + def add_column(grouped): + name = grouped.columns[0][1] + grouped["sum", name] = grouped.sum(axis=1) + return grouped + + result = df.groupby(level=1, axis=1).apply(add_column) + expected = pd.DataFrame( + [ + [1, 1, 1, 3, 1, 1, 1, 3], + [2, 2, 2, 6, 2, 2, 2, 6], + [ + 3, + 3, + 3, + 9, + 3, + 3, + 3, + 9, + ], + ], + columns=pd.MultiIndex.from_tuples( + [ + ("geoffrey", "A", "geoffrey"), + ("geoffrey", "B", "geoffrey"), + ("geoffrey", "C", "geoffrey"), + ("geoffrey", "sum", "geoffrey"), + ("julian", "A", "julian"), + ("julian", "B", "julian"), + ("julian", "C", "julian"), + ("julian", "sum", "julian"), + ] + ), + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_bin_groupby.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_bin_groupby.py new file mode 100644 index 0000000..8c30836 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_bin_groupby.py @@ -0,0 +1,69 @@ +import numpy as np +import pytest + +from pandas._libs import lib +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm + + +def assert_block_lengths(x): + assert len(x) == len(x._mgr.blocks[0].mgr_locs) + return 0 + + +def cumsum_max(x): + x.cumsum().max() + return 0 + + +@pytest.mark.parametrize( + "func", + [ + cumsum_max, + pytest.param(assert_block_lengths, marks=td.skip_array_manager_invalid_test), + ], +) +def test_mgr_locs_updated(func): + # https://github.com/pandas-dev/pandas/issues/31802 + # Some operations may require creating new blocks, which requires + # valid mgr_locs + df = pd.DataFrame({"A": ["a", "a", "a"], "B": ["a", "b", "b"], "C": [1, 1, 1]}) + result = df.groupby(["A", "B"]).agg(func) + expected = pd.DataFrame( + {"C": [0, 0]}, + index=pd.MultiIndex.from_product([["a"], ["a", "b"]], names=["A", "B"]), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "binner,closed,expected", + [ + ( + np.array([0, 3, 6, 9], dtype=np.int64), + "left", + np.array([2, 5, 6], dtype=np.int64), + ), + ( + np.array([0, 3, 6, 9], dtype=np.int64), + "right", + np.array([3, 6, 6], dtype=np.int64), + ), + (np.array([0, 3, 6], dtype=np.int64), "left", np.array([2, 5], dtype=np.int64)), + ( + np.array([0, 3, 6], dtype=np.int64), + "right", + np.array([3, 6], dtype=np.int64), + ), + ], +) +def test_generate_bins(binner, closed, expected): + values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) + result = lib.generate_bins_dt64(values, binner, closed=closed) + tm.assert_numpy_array_equal(result, expected) + + +class TestMoments: + pass diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_categorical.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_categorical.py new file mode 100644 index 0000000..3d5016b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_categorical.py @@ -0,0 +1,1796 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + Series, + qcut, +) +import pandas._testing as tm + + +def cartesian_product_for_groupers(result, args, names, fill_value=np.NaN): + """Reindex to a cartesian production for the groupers, + preserving the nature (Categorical) of each grouper + """ + + def f(a): + if isinstance(a, (CategoricalIndex, Categorical)): + categories = a.categories + a = Categorical.from_codes( + np.arange(len(categories)), categories=categories, ordered=a.ordered + ) + return a + + index = MultiIndex.from_product(map(f, args), names=names) + return result.reindex(index, fill_value=fill_value).sort_index() + + +_results_for_groupbys_with_missing_categories = { + # This maps the builtin groupby functions to their expected outputs for + # missing categories when they are called on a categorical grouper with + # observed=False. Some functions are expected to return NaN, some zero. + # These expected values can be used across several tests (i.e. they are + # the same for SeriesGroupBy and DataFrameGroupBy) but they should only be + # hardcoded in one place. + "all": np.NaN, + "any": np.NaN, + "count": 0, + "corrwith": np.NaN, + "first": np.NaN, + "idxmax": np.NaN, + "idxmin": np.NaN, + "last": np.NaN, + "mad": np.NaN, + "max": np.NaN, + "mean": np.NaN, + "median": np.NaN, + "min": np.NaN, + "nth": np.NaN, + "nunique": 0, + "prod": np.NaN, + "quantile": np.NaN, + "sem": np.NaN, + "size": 0, + "skew": np.NaN, + "std": np.NaN, + "sum": 0, + "var": np.NaN, +} + + +def test_apply_use_categorical_name(df): + cats = qcut(df.C, 4) + + def get_stats(group): + return { + "min": group.min(), + "max": group.max(), + "count": group.count(), + "mean": group.mean(), + } + + result = df.groupby(cats, observed=False).D.apply(get_stats) + assert result.index.names[0] == "C" + + +def test_basic(): # TODO: split this test + + cats = Categorical( + ["a", "a", "a", "b", "b", "b", "c", "c", "c"], + categories=["a", "b", "c", "d"], + ordered=True, + ) + data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats}) + + exp_index = CategoricalIndex(list("abcd"), name="b", ordered=True) + expected = DataFrame({"a": [1, 2, 4, np.nan]}, index=exp_index) + result = data.groupby("b", observed=False).mean() + tm.assert_frame_equal(result, expected) + + cat1 = Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True) + cat2 = Categorical(["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True) + df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) + + # single grouper + gb = df.groupby("A", observed=False) + exp_idx = CategoricalIndex(["a", "b", "z"], name="A", ordered=True) + expected = DataFrame({"values": Series([3, 7, 0], index=exp_idx)}) + result = gb.sum() + tm.assert_frame_equal(result, expected) + + # GH 8623 + x = DataFrame( + [[1, "John P. Doe"], [2, "Jane Dove"], [1, "John P. Doe"]], + columns=["person_id", "person_name"], + ) + x["person_name"] = Categorical(x.person_name) + + g = x.groupby(["person_id"], observed=False) + result = g.transform(lambda x: x) + tm.assert_frame_equal(result, x[["person_name"]]) + + result = x.drop_duplicates("person_name") + expected = x.iloc[[0, 1]] + tm.assert_frame_equal(result, expected) + + def f(x): + return x.drop_duplicates("person_name").iloc[0] + + result = g.apply(f) + expected = x.iloc[[0, 1]].copy() + expected.index = Index([1, 2], name="person_id") + expected["person_name"] = expected["person_name"].astype("object") + tm.assert_frame_equal(result, expected) + + # GH 9921 + # Monotonic + df = DataFrame({"a": [5, 15, 25]}) + c = pd.cut(df.a, bins=[0, 10, 20, 30, 40]) + + result = df.a.groupby(c, observed=False).transform(sum) + tm.assert_series_equal(result, df["a"]) + + tm.assert_series_equal( + df.a.groupby(c, observed=False).transform(lambda xs: np.sum(xs)), df["a"] + ) + tm.assert_frame_equal(df.groupby(c, observed=False).transform(sum), df[["a"]]) + + gbc = df.groupby(c, observed=False) + with tm.assert_produces_warning( + FutureWarning, match="scalar max", check_stacklevel=False + ): + # stacklevel is thrown off (i think) bc the stack goes through numpy C code + result = gbc.transform(lambda xs: np.max(xs)) + tm.assert_frame_equal(result, df[["a"]]) + + with tm.assert_produces_warning(None): + result2 = gbc.transform(lambda xs: np.max(xs, axis=0)) + result3 = gbc.transform(max) + result4 = gbc.transform(np.maximum.reduce) + result5 = gbc.transform(lambda xs: np.maximum.reduce(xs)) + tm.assert_frame_equal(result2, df[["a"]], check_dtype=False) + tm.assert_frame_equal(result3, df[["a"]], check_dtype=False) + tm.assert_frame_equal(result4, df[["a"]]) + tm.assert_frame_equal(result5, df[["a"]]) + + # Filter + tm.assert_series_equal(df.a.groupby(c, observed=False).filter(np.all), df["a"]) + tm.assert_frame_equal(df.groupby(c, observed=False).filter(np.all), df) + + # Non-monotonic + df = DataFrame({"a": [5, 15, 25, -5]}) + c = pd.cut(df.a, bins=[-10, 0, 10, 20, 30, 40]) + + result = df.a.groupby(c, observed=False).transform(sum) + tm.assert_series_equal(result, df["a"]) + + tm.assert_series_equal( + df.a.groupby(c, observed=False).transform(lambda xs: np.sum(xs)), df["a"] + ) + tm.assert_frame_equal(df.groupby(c, observed=False).transform(sum), df[["a"]]) + tm.assert_frame_equal( + df.groupby(c, observed=False).transform(lambda xs: np.sum(xs)), df[["a"]] + ) + + # GH 9603 + df = DataFrame({"a": [1, 0, 0, 0]}) + c = pd.cut(df.a, [0, 1, 2, 3, 4], labels=Categorical(list("abcd"))) + result = df.groupby(c, observed=False).apply(len) + + exp_index = CategoricalIndex(c.values.categories, ordered=c.values.ordered) + expected = Series([1, 0, 0, 0], index=exp_index) + expected.index.name = "a" + tm.assert_series_equal(result, expected) + + # more basic + levels = ["foo", "bar", "baz", "qux"] + codes = np.random.randint(0, 4, size=100) + + cats = Categorical.from_codes(codes, levels, ordered=True) + + data = DataFrame(np.random.randn(100, 4)) + + result = data.groupby(cats, observed=False).mean() + + expected = data.groupby(np.asarray(cats), observed=False).mean() + exp_idx = CategoricalIndex(levels, categories=cats.categories, ordered=True) + expected = expected.reindex(exp_idx) + + tm.assert_frame_equal(result, expected) + + grouped = data.groupby(cats, observed=False) + desc_result = grouped.describe() + + idx = cats.codes.argsort() + ord_labels = np.asarray(cats).take(idx) + ord_data = data.take(idx) + + exp_cats = Categorical( + ord_labels, ordered=True, categories=["foo", "bar", "baz", "qux"] + ) + expected = ord_data.groupby(exp_cats, sort=False, observed=False).describe() + tm.assert_frame_equal(desc_result, expected) + + # GH 10460 + expc = Categorical.from_codes(np.arange(4).repeat(8), levels, ordered=True) + exp = CategoricalIndex(expc) + tm.assert_index_equal((desc_result.stack().index.get_level_values(0)), exp) + exp = Index(["count", "mean", "std", "min", "25%", "50%", "75%", "max"] * 4) + tm.assert_index_equal((desc_result.stack().index.get_level_values(1)), exp) + + +def test_level_get_group(observed): + # GH15155 + df = DataFrame( + data=np.arange(2, 22, 2), + index=MultiIndex( + levels=[CategoricalIndex(["a", "b"]), range(10)], + codes=[[0] * 5 + [1] * 5, range(10)], + names=["Index1", "Index2"], + ), + ) + g = df.groupby(level=["Index1"], observed=observed) + + # expected should equal test.loc[["a"]] + # GH15166 + expected = DataFrame( + data=np.arange(2, 12, 2), + index=MultiIndex( + levels=[CategoricalIndex(["a", "b"]), range(5)], + codes=[[0] * 5, range(5)], + names=["Index1", "Index2"], + ), + ) + result = g.get_group("a") + + tm.assert_frame_equal(result, expected) + + +def test_sorting_with_different_categoricals(): + # GH 24271 + df = DataFrame( + { + "group": ["A"] * 6 + ["B"] * 6, + "dose": ["high", "med", "low"] * 4, + "outcomes": np.arange(12.0), + } + ) + + df.dose = Categorical(df.dose, categories=["low", "med", "high"], ordered=True) + + result = df.groupby("group")["dose"].value_counts() + result = result.sort_index(level=0, sort_remaining=True) + index = ["low", "med", "high", "low", "med", "high"] + index = Categorical(index, categories=["low", "med", "high"], ordered=True) + index = [["A", "A", "A", "B", "B", "B"], CategoricalIndex(index)] + index = MultiIndex.from_arrays(index, names=["group", None]) + expected = Series([2] * 6, index=index, name="dose") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ordered", [True, False]) +def test_apply(ordered): + # GH 10138 + + dense = Categorical(list("abc"), ordered=ordered) + + # 'b' is in the categories but not in the list + missing = Categorical(list("aaa"), categories=["a", "b"], ordered=ordered) + values = np.arange(len(dense)) + df = DataFrame({"missing": missing, "dense": dense, "values": values}) + grouped = df.groupby(["missing", "dense"], observed=True) + + # missing category 'b' should still exist in the output index + idx = MultiIndex.from_arrays([missing, dense], names=["missing", "dense"]) + expected = DataFrame([0, 1, 2.0], index=idx, columns=["values"]) + + # GH#21636 tracking down the xfail, in some builds np.mean(df.loc[[0]]) + # is coming back as Series([0., 1., 0.], index=["missing", "dense", "values"]) + # when we expect Series(0., index=["values"]) + with tm.assert_produces_warning( + FutureWarning, match="Select only valid", check_stacklevel=False + ): + result = grouped.apply(lambda x: np.mean(x)) + tm.assert_frame_equal(result, expected) + + result = grouped.mean() + tm.assert_frame_equal(result, expected) + + result = grouped.agg(np.mean) + tm.assert_frame_equal(result, expected) + + # but for transform we should still get back the original index + idx = MultiIndex.from_arrays([missing, dense], names=["missing", "dense"]) + expected = Series(1, index=idx) + result = grouped.apply(lambda x: 1) + tm.assert_series_equal(result, expected) + + +def test_observed(observed): + # multiple groupers, don't re-expand the output space + # of the grouper + # gh-14942 (implement) + # gh-10132 (back-compat) + # gh-8138 (back-compat) + # gh-8869 + + cat1 = Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True) + cat2 = Categorical(["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True) + df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) + df["C"] = ["foo", "bar"] * 2 + + # multiple groupers with a non-cat + gb = df.groupby(["A", "B", "C"], observed=observed) + exp_index = MultiIndex.from_arrays( + [cat1, cat2, ["foo", "bar"] * 2], names=["A", "B", "C"] + ) + expected = DataFrame({"values": Series([1, 2, 3, 4], index=exp_index)}).sort_index() + result = gb.sum() + if not observed: + expected = cartesian_product_for_groupers( + expected, [cat1, cat2, ["foo", "bar"]], list("ABC"), fill_value=0 + ) + + tm.assert_frame_equal(result, expected) + + gb = df.groupby(["A", "B"], observed=observed) + exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"]) + expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index) + result = gb.sum() + if not observed: + expected = cartesian_product_for_groupers( + expected, [cat1, cat2], list("AB"), fill_value=0 + ) + + tm.assert_frame_equal(result, expected) + + # https://github.com/pandas-dev/pandas/issues/8138 + d = { + "cat": Categorical( + ["a", "b", "a", "b"], categories=["a", "b", "c"], ordered=True + ), + "ints": [1, 1, 2, 2], + "val": [10, 20, 30, 40], + } + df = DataFrame(d) + + # Grouping on a single column + groups_single_key = df.groupby("cat", observed=observed) + result = groups_single_key.mean() + + exp_index = CategoricalIndex( + list("ab"), name="cat", categories=list("abc"), ordered=True + ) + expected = DataFrame({"ints": [1.5, 1.5], "val": [20.0, 30]}, index=exp_index) + if not observed: + index = CategoricalIndex( + list("abc"), name="cat", categories=list("abc"), ordered=True + ) + expected = expected.reindex(index) + + tm.assert_frame_equal(result, expected) + + # Grouping on two columns + groups_double_key = df.groupby(["cat", "ints"], observed=observed) + result = groups_double_key.agg("mean") + expected = DataFrame( + { + "val": [10.0, 30.0, 20.0, 40.0], + "cat": Categorical( + ["a", "a", "b", "b"], categories=["a", "b", "c"], ordered=True + ), + "ints": [1, 2, 1, 2], + } + ).set_index(["cat", "ints"]) + if not observed: + expected = cartesian_product_for_groupers( + expected, [df.cat.values, [1, 2]], ["cat", "ints"] + ) + + tm.assert_frame_equal(result, expected) + + # GH 10132 + for key in [("a", 1), ("b", 2), ("b", 1), ("a", 2)]: + c, i = key + result = groups_double_key.get_group(key) + expected = df[(df.cat == c) & (df.ints == i)] + tm.assert_frame_equal(result, expected) + + # gh-8869 + # with as_index + d = { + "foo": [10, 8, 4, 8, 4, 1, 1], + "bar": [10, 20, 30, 40, 50, 60, 70], + "baz": ["d", "c", "e", "a", "a", "d", "c"], + } + df = DataFrame(d) + cat = pd.cut(df["foo"], np.linspace(0, 10, 3)) + df["range"] = cat + groups = df.groupby(["range", "baz"], as_index=False, observed=observed) + result = groups.agg("mean") + + groups2 = df.groupby(["range", "baz"], as_index=True, observed=observed) + expected = groups2.agg("mean").reset_index() + tm.assert_frame_equal(result, expected) + + +def test_observed_codes_remap(observed): + d = {"C1": [3, 3, 4, 5], "C2": [1, 2, 3, 4], "C3": [10, 100, 200, 34]} + df = DataFrame(d) + values = pd.cut(df["C1"], [1, 2, 3, 6]) + values.name = "cat" + groups_double_key = df.groupby([values, "C2"], observed=observed) + + idx = MultiIndex.from_arrays([values, [1, 2, 3, 4]], names=["cat", "C2"]) + expected = DataFrame( + {"C1": [3.0, 3.0, 4.0, 5.0], "C3": [10.0, 100.0, 200.0, 34.0]}, index=idx + ) + if not observed: + expected = cartesian_product_for_groupers( + expected, [values.values, [1, 2, 3, 4]], ["cat", "C2"] + ) + + result = groups_double_key.agg("mean") + tm.assert_frame_equal(result, expected) + + +def test_observed_perf(): + # we create a cartesian product, so this is + # non-performant if we don't use observed values + # gh-14942 + df = DataFrame( + { + "cat": np.random.randint(0, 255, size=30000), + "int_id": np.random.randint(0, 255, size=30000), + "other_id": np.random.randint(0, 10000, size=30000), + "foo": 0, + } + ) + df["cat"] = df.cat.astype(str).astype("category") + + grouped = df.groupby(["cat", "int_id", "other_id"], observed=True) + result = grouped.count() + assert result.index.levels[0].nunique() == df.cat.nunique() + assert result.index.levels[1].nunique() == df.int_id.nunique() + assert result.index.levels[2].nunique() == df.other_id.nunique() + + +def test_observed_groups(observed): + # gh-20583 + # test that we have the appropriate groups + + cat = Categorical(["a", "c", "a"], categories=["a", "b", "c"]) + df = DataFrame({"cat": cat, "vals": [1, 2, 3]}) + g = df.groupby("cat", observed=observed) + + result = g.groups + if observed: + expected = {"a": Index([0, 2], dtype="int64"), "c": Index([1], dtype="int64")} + else: + expected = { + "a": Index([0, 2], dtype="int64"), + "b": Index([], dtype="int64"), + "c": Index([1], dtype="int64"), + } + + tm.assert_dict_equal(result, expected) + + +def test_observed_groups_with_nan(observed): + # GH 24740 + df = DataFrame( + { + "cat": Categorical(["a", np.nan, "a"], categories=["a", "b", "d"]), + "vals": [1, 2, 3], + } + ) + g = df.groupby("cat", observed=observed) + result = g.groups + if observed: + expected = {"a": Index([0, 2], dtype="int64")} + else: + expected = { + "a": Index([0, 2], dtype="int64"), + "b": Index([], dtype="int64"), + "d": Index([], dtype="int64"), + } + tm.assert_dict_equal(result, expected) + + +def test_observed_nth(): + # GH 26385 + cat = Categorical(["a", np.nan, np.nan], categories=["a", "b", "c"]) + ser = Series([1, 2, 3]) + df = DataFrame({"cat": cat, "ser": ser}) + + result = df.groupby("cat", observed=False)["ser"].nth(0) + + index = Categorical(["a", "b", "c"], categories=["a", "b", "c"]) + expected = Series([1, np.nan, np.nan], index=index, name="ser") + expected.index.name = "cat" + + tm.assert_series_equal(result, expected) + + +def test_dataframe_categorical_with_nan(observed): + # GH 21151 + s1 = Categorical([np.nan, "a", np.nan, "a"], categories=["a", "b", "c"]) + s2 = Series([1, 2, 3, 4]) + df = DataFrame({"s1": s1, "s2": s2}) + result = df.groupby("s1", observed=observed).first().reset_index() + if observed: + expected = DataFrame( + {"s1": Categorical(["a"], categories=["a", "b", "c"]), "s2": [2]} + ) + else: + expected = DataFrame( + { + "s1": Categorical(["a", "b", "c"], categories=["a", "b", "c"]), + "s2": [2, np.nan, np.nan], + } + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("ordered", [True, False]) +@pytest.mark.parametrize("observed", [True, False]) +@pytest.mark.parametrize("sort", [True, False]) +def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort): + # GH 25871: Fix groupby sorting on ordered Categoricals + # GH 25167: Groupby with observed=True doesn't sort + + # Build a dataframe with cat having one unobserved category ('missing'), + # and a Series with identical values + label = Categorical( + ["d", "a", "b", "a", "d", "b"], + categories=["a", "b", "missing", "d"], + ordered=ordered, + ) + val = Series(["d", "a", "b", "a", "d", "b"]) + df = DataFrame({"label": label, "val": val}) + + # aggregate on the Categorical + result = df.groupby("label", observed=observed, sort=sort)["val"].aggregate("first") + + # If ordering works, we expect index labels equal to aggregation results, + # except for 'observed=False': label 'missing' has aggregation None + label = Series(result.index.array, dtype="object") + aggr = Series(result.array) + if not observed: + aggr[aggr.isna()] = "missing" + if not all(label == aggr): + msg = ( + "Labels and aggregation results not consistently sorted\n" + f"for (ordered={ordered}, observed={observed}, sort={sort})\n" + f"Result:\n{result}" + ) + assert False, msg + + +def test_datetime(): + # GH9049: ensure backward compatibility + levels = pd.date_range("2014-01-01", periods=4) + codes = np.random.randint(0, 4, size=100) + + cats = Categorical.from_codes(codes, levels, ordered=True) + + data = DataFrame(np.random.randn(100, 4)) + result = data.groupby(cats, observed=False).mean() + + expected = data.groupby(np.asarray(cats), observed=False).mean() + expected = expected.reindex(levels) + expected.index = CategoricalIndex( + expected.index, categories=expected.index, ordered=True + ) + + tm.assert_frame_equal(result, expected) + + grouped = data.groupby(cats, observed=False) + desc_result = grouped.describe() + + idx = cats.codes.argsort() + ord_labels = cats.take(idx) + ord_data = data.take(idx) + expected = ord_data.groupby(ord_labels, observed=False).describe() + tm.assert_frame_equal(desc_result, expected) + tm.assert_index_equal(desc_result.index, expected.index) + tm.assert_index_equal( + desc_result.index.get_level_values(0), expected.index.get_level_values(0) + ) + + # GH 10460 + expc = Categorical.from_codes(np.arange(4).repeat(8), levels, ordered=True) + exp = CategoricalIndex(expc) + tm.assert_index_equal((desc_result.stack().index.get_level_values(0)), exp) + exp = Index(["count", "mean", "std", "min", "25%", "50%", "75%", "max"] * 4) + tm.assert_index_equal((desc_result.stack().index.get_level_values(1)), exp) + + +def test_categorical_index(): + + s = np.random.RandomState(12345) + levels = ["foo", "bar", "baz", "qux"] + codes = s.randint(0, 4, size=20) + cats = Categorical.from_codes(codes, levels, ordered=True) + df = DataFrame(np.repeat(np.arange(20), 4).reshape(-1, 4), columns=list("abcd")) + df["cats"] = cats + + # with a cat index + result = df.set_index("cats").groupby(level=0, observed=False).sum() + expected = df[list("abcd")].groupby(cats.codes, observed=False).sum() + expected.index = CategoricalIndex( + Categorical.from_codes([0, 1, 2, 3], levels, ordered=True), name="cats" + ) + tm.assert_frame_equal(result, expected) + + # with a cat column, should produce a cat index + result = df.groupby("cats", observed=False).sum() + expected = df[list("abcd")].groupby(cats.codes, observed=False).sum() + expected.index = CategoricalIndex( + Categorical.from_codes([0, 1, 2, 3], levels, ordered=True), name="cats" + ) + tm.assert_frame_equal(result, expected) + + +def test_describe_categorical_columns(): + # GH 11558 + cats = CategoricalIndex( + ["qux", "foo", "baz", "bar"], + categories=["foo", "bar", "baz", "qux"], + ordered=True, + ) + df = DataFrame(np.random.randn(20, 4), columns=cats) + result = df.groupby([1, 2, 3, 4] * 5).describe() + + tm.assert_index_equal(result.stack().columns, cats) + tm.assert_categorical_equal(result.stack().columns.values, cats.values) + + +def test_unstack_categorical(): + # GH11558 (example is taken from the original issue) + df = DataFrame( + {"a": range(10), "medium": ["A", "B"] * 5, "artist": list("XYXXY") * 2} + ) + df["medium"] = df["medium"].astype("category") + + gcat = df.groupby(["artist", "medium"], observed=False)["a"].count().unstack() + result = gcat.describe() + + exp_columns = CategoricalIndex(["A", "B"], ordered=False, name="medium") + tm.assert_index_equal(result.columns, exp_columns) + tm.assert_categorical_equal(result.columns.values, exp_columns.values) + + result = gcat["A"] + gcat["B"] + expected = Series([6, 4], index=Index(["X", "Y"], name="artist")) + tm.assert_series_equal(result, expected) + + +def test_bins_unequal_len(): + # GH3011 + series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4]) + bins = pd.cut(series.dropna().values, 4) + + # len(bins) != len(series) here + with pytest.raises(ValueError, match="Grouper and axis must be same length"): + series.groupby(bins).mean() + + +@pytest.mark.parametrize( + ["series", "data"], + [ + # Group a series with length and index equal to those of the grouper. + (Series(range(4)), {"A": [0, 3], "B": [1, 2]}), + # Group a series with length equal to that of the grouper and index unequal to + # that of the grouper. + (Series(range(4)).rename(lambda idx: idx + 1), {"A": [2], "B": [0, 1]}), + # GH44179: Group a series with length unequal to that of the grouper. + (Series(range(7)), {"A": [0, 3], "B": [1, 2]}), + ], +) +def test_categorical_series(series, data): + # Group the given series by a series with categorical data type such that group A + # takes indices 0 and 3 and group B indices 1 and 2, obtaining the values mapped in + # the given data. + groupby = series.groupby(Series(list("ABBA"), dtype="category")) + result = groupby.aggregate(list) + expected = Series(data, index=CategoricalIndex(data.keys())) + tm.assert_series_equal(result, expected) + + +def test_as_index(): + # GH13204 + df = DataFrame( + { + "cat": Categorical([1, 2, 2], [1, 2, 3]), + "A": [10, 11, 11], + "B": [101, 102, 103], + } + ) + result = df.groupby(["cat", "A"], as_index=False, observed=True).sum() + expected = DataFrame( + { + "cat": Categorical([1, 2], categories=df.cat.cat.categories), + "A": [10, 11], + "B": [101, 205], + }, + columns=["cat", "A", "B"], + ) + tm.assert_frame_equal(result, expected) + + # function grouper + f = lambda r: df.loc[r, "A"] + result = df.groupby(["cat", f], as_index=False, observed=True).sum() + expected = DataFrame( + { + "cat": Categorical([1, 2], categories=df.cat.cat.categories), + "A": [10, 22], + "B": [101, 205], + }, + columns=["cat", "A", "B"], + ) + tm.assert_frame_equal(result, expected) + + # another not in-axis grouper (conflicting names in index) + s = Series(["a", "b", "b"], name="cat") + result = df.groupby(["cat", s], as_index=False, observed=True).sum() + tm.assert_frame_equal(result, expected) + + # is original index dropped? + group_columns = ["cat", "A"] + expected = DataFrame( + { + "cat": Categorical([1, 2], categories=df.cat.cat.categories), + "A": [10, 11], + "B": [101, 205], + }, + columns=["cat", "A", "B"], + ) + + for name in [None, "X", "B"]: + df.index = Index(list("abc"), name=name) + result = df.groupby(group_columns, as_index=False, observed=True).sum() + + tm.assert_frame_equal(result, expected) + + +def test_preserve_categories(): + # GH-13179 + categories = list("abc") + + # ordered=True + df = DataFrame({"A": Categorical(list("ba"), categories=categories, ordered=True)}) + index = CategoricalIndex(categories, categories, ordered=True, name="A") + tm.assert_index_equal( + df.groupby("A", sort=True, observed=False).first().index, index + ) + tm.assert_index_equal( + df.groupby("A", sort=False, observed=False).first().index, index + ) + + # ordered=False + df = DataFrame({"A": Categorical(list("ba"), categories=categories, ordered=False)}) + sort_index = CategoricalIndex(categories, categories, ordered=False, name="A") + nosort_index = CategoricalIndex(list("bac"), list("bac"), ordered=False, name="A") + tm.assert_index_equal( + df.groupby("A", sort=True, observed=False).first().index, sort_index + ) + tm.assert_index_equal( + df.groupby("A", sort=False, observed=False).first().index, nosort_index + ) + + +def test_preserve_categorical_dtype(): + # GH13743, GH13854 + df = DataFrame( + { + "A": [1, 2, 1, 1, 2], + "B": [10, 16, 22, 28, 34], + "C1": Categorical(list("abaab"), categories=list("bac"), ordered=False), + "C2": Categorical(list("abaab"), categories=list("bac"), ordered=True), + } + ) + # single grouper + exp_full = DataFrame( + { + "A": [2.0, 1.0, np.nan], + "B": [25.0, 20.0, np.nan], + "C1": Categorical(list("bac"), categories=list("bac"), ordered=False), + "C2": Categorical(list("bac"), categories=list("bac"), ordered=True), + } + ) + for col in ["C1", "C2"]: + result1 = df.groupby(by=col, as_index=False, observed=False).mean() + result2 = df.groupby(by=col, as_index=True, observed=False).mean().reset_index() + expected = exp_full.reindex(columns=result1.columns) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + + +@pytest.mark.parametrize( + "func, values", + [ + ("first", ["second", "first"]), + ("last", ["fourth", "third"]), + ("min", ["fourth", "first"]), + ("max", ["second", "third"]), + ], +) +def test_preserve_on_ordered_ops(func, values): + # gh-18502 + # preserve the categoricals on ops + c = Categorical(["first", "second", "third", "fourth"], ordered=True) + df = DataFrame({"payload": [-1, -2, -1, -2], "col": c}) + g = df.groupby("payload") + result = getattr(g, func)() + expected = DataFrame( + {"payload": [-2, -1], "col": Series(values, dtype=c.dtype)} + ).set_index("payload") + tm.assert_frame_equal(result, expected) + + # we should also preserve categorical for SeriesGroupBy + sgb = df.groupby("payload")["col"] + result = getattr(sgb, func)() + expected = expected["col"] + tm.assert_series_equal(result, expected) + + +def test_categorical_no_compress(): + data = Series(np.random.randn(9)) + + codes = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]) + cats = Categorical.from_codes(codes, [0, 1, 2], ordered=True) + + result = data.groupby(cats, observed=False).mean() + exp = data.groupby(codes, observed=False).mean() + + exp.index = CategoricalIndex( + exp.index, categories=cats.categories, ordered=cats.ordered + ) + tm.assert_series_equal(result, exp) + + codes = np.array([0, 0, 0, 1, 1, 1, 3, 3, 3]) + cats = Categorical.from_codes(codes, [0, 1, 2, 3], ordered=True) + + result = data.groupby(cats, observed=False).mean() + exp = data.groupby(codes, observed=False).mean().reindex(cats.categories) + exp.index = CategoricalIndex( + exp.index, categories=cats.categories, ordered=cats.ordered + ) + tm.assert_series_equal(result, exp) + + cats = Categorical( + ["a", "a", "a", "b", "b", "b", "c", "c", "c"], + categories=["a", "b", "c", "d"], + ordered=True, + ) + data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats}) + + result = data.groupby("b", observed=False).mean() + result = result["a"].values + exp = np.array([1, 2, 4, np.nan]) + tm.assert_numpy_array_equal(result, exp) + + +def test_groupby_empty_with_category(): + # GH-9614 + # test fix for when group by on None resulted in + # coercion of dtype categorical -> float + df = DataFrame({"A": [None] * 3, "B": Categorical(["train", "train", "test"])}) + result = df.groupby("A").first()["B"] + expected = Series( + Categorical([], categories=["test", "train"]), + index=Series([], dtype="object", name="A"), + name="B", + ) + tm.assert_series_equal(result, expected) + + +def test_sort(): + + # https://stackoverflow.com/questions/23814368/sorting-pandas- + # categorical-labels-after-groupby + # This should result in a properly sorted Series so that the plot + # has a sorted x axis + # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') + + df = DataFrame({"value": np.random.randint(0, 10000, 100)}) + labels = [f"{i} - {i+499}" for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) + + df = df.sort_values(by=["value"], ascending=True) + df["value_group"] = pd.cut( + df.value, range(0, 10500, 500), right=False, labels=cat_labels + ) + + res = df.groupby(["value_group"], observed=False)["value_group"].count() + exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))] + exp.index = CategoricalIndex(exp.index, name=exp.index.name) + tm.assert_series_equal(res, exp) + + +def test_sort2(): + # dataframe groupby sort was being ignored # GH 8868 + df = DataFrame( + [ + ["(7.5, 10]", 10, 10], + ["(7.5, 10]", 8, 20], + ["(2.5, 5]", 5, 30], + ["(5, 7.5]", 6, 40], + ["(2.5, 5]", 4, 50], + ["(0, 2.5]", 1, 60], + ["(5, 7.5]", 7, 70], + ], + columns=["range", "foo", "bar"], + ) + df["range"] = Categorical(df["range"], ordered=True) + index = CategoricalIndex( + ["(0, 2.5]", "(2.5, 5]", "(5, 7.5]", "(7.5, 10]"], name="range", ordered=True + ) + expected_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=["foo", "bar"], index=index + ) + + col = "range" + result_sort = df.groupby(col, sort=True, observed=False).first() + tm.assert_frame_equal(result_sort, expected_sort) + + # when categories is ordered, group is ordered by category's order + expected_sort = result_sort + result_sort = df.groupby(col, sort=False, observed=False).first() + tm.assert_frame_equal(result_sort, expected_sort) + + df["range"] = Categorical(df["range"], ordered=False) + index = CategoricalIndex( + ["(0, 2.5]", "(2.5, 5]", "(5, 7.5]", "(7.5, 10]"], name="range" + ) + expected_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=["foo", "bar"], index=index + ) + + index = CategoricalIndex( + ["(7.5, 10]", "(2.5, 5]", "(5, 7.5]", "(0, 2.5]"], + categories=["(7.5, 10]", "(2.5, 5]", "(5, 7.5]", "(0, 2.5]"], + name="range", + ) + expected_nosort = DataFrame( + [[10, 10], [5, 30], [6, 40], [1, 60]], index=index, columns=["foo", "bar"] + ) + + col = "range" + + # this is an unordered categorical, but we allow this #### + result_sort = df.groupby(col, sort=True, observed=False).first() + tm.assert_frame_equal(result_sort, expected_sort) + + result_nosort = df.groupby(col, sort=False, observed=False).first() + tm.assert_frame_equal(result_nosort, expected_nosort) + + +def test_sort_datetimelike(): + # GH10505 + + # use same data as test_groupby_sort_categorical, which category is + # corresponding to datetime.month + df = DataFrame( + { + "dt": [ + datetime(2011, 7, 1), + datetime(2011, 7, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 2, 1), + datetime(2011, 1, 1), + datetime(2011, 5, 1), + ], + "foo": [10, 8, 5, 6, 4, 1, 7], + "bar": [10, 20, 30, 40, 50, 60, 70], + }, + columns=["dt", "foo", "bar"], + ) + + # ordered=True + df["dt"] = Categorical(df["dt"], ordered=True) + index = [ + datetime(2011, 1, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 7, 1), + ] + result_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=["foo", "bar"] + ) + result_sort.index = CategoricalIndex(index, name="dt", ordered=True) + + index = [ + datetime(2011, 7, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 1, 1), + ] + result_nosort = DataFrame( + [[10, 10], [5, 30], [6, 40], [1, 60]], columns=["foo", "bar"] + ) + result_nosort.index = CategoricalIndex( + index, categories=index, name="dt", ordered=True + ) + + col = "dt" + tm.assert_frame_equal( + result_sort, df.groupby(col, sort=True, observed=False).first() + ) + + # when categories is ordered, group is ordered by category's order + tm.assert_frame_equal( + result_sort, df.groupby(col, sort=False, observed=False).first() + ) + + # ordered = False + df["dt"] = Categorical(df["dt"], ordered=False) + index = [ + datetime(2011, 1, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 7, 1), + ] + result_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=["foo", "bar"] + ) + result_sort.index = CategoricalIndex(index, name="dt") + + index = [ + datetime(2011, 7, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 1, 1), + ] + result_nosort = DataFrame( + [[10, 10], [5, 30], [6, 40], [1, 60]], columns=["foo", "bar"] + ) + result_nosort.index = CategoricalIndex(index, categories=index, name="dt") + + col = "dt" + tm.assert_frame_equal( + result_sort, df.groupby(col, sort=True, observed=False).first() + ) + tm.assert_frame_equal( + result_nosort, df.groupby(col, sort=False, observed=False).first() + ) + + +def test_empty_sum(): + # https://github.com/pandas-dev/pandas/issues/18678 + df = DataFrame( + {"A": Categorical(["a", "a", "b"], categories=["a", "b", "c"]), "B": [1, 2, 1]} + ) + expected_idx = CategoricalIndex(["a", "b", "c"], name="A") + + # 0 by default + result = df.groupby("A", observed=False).B.sum() + expected = Series([3, 1, 0], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count=0 + result = df.groupby("A", observed=False).B.sum(min_count=0) + expected = Series([3, 1, 0], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count=1 + result = df.groupby("A", observed=False).B.sum(min_count=1) + expected = Series([3, 1, np.nan], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count>1 + result = df.groupby("A", observed=False).B.sum(min_count=2) + expected = Series([3, np.nan, np.nan], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + +def test_empty_prod(): + # https://github.com/pandas-dev/pandas/issues/18678 + df = DataFrame( + {"A": Categorical(["a", "a", "b"], categories=["a", "b", "c"]), "B": [1, 2, 1]} + ) + + expected_idx = CategoricalIndex(["a", "b", "c"], name="A") + + # 1 by default + result = df.groupby("A", observed=False).B.prod() + expected = Series([2, 1, 1], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count=0 + result = df.groupby("A", observed=False).B.prod(min_count=0) + expected = Series([2, 1, 1], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count=1 + result = df.groupby("A", observed=False).B.prod(min_count=1) + expected = Series([2, 1, np.nan], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + +def test_groupby_multiindex_categorical_datetime(): + # https://github.com/pandas-dev/pandas/issues/21390 + + df = DataFrame( + { + "key1": Categorical(list("abcbabcba")), + "key2": Categorical( + list(pd.date_range("2018-06-01 00", freq="1T", periods=3)) * 3 + ), + "values": np.arange(9), + } + ) + result = df.groupby(["key1", "key2"]).mean() + + idx = MultiIndex.from_product( + [ + Categorical(["a", "b", "c"]), + Categorical(pd.date_range("2018-06-01 00", freq="1T", periods=3)), + ], + names=["key1", "key2"], + ) + expected = DataFrame({"values": [0, 4, 8, 3, 4, 5, 6, np.nan, 2]}, index=idx) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "as_index, expected", + [ + ( + True, + Series( + index=MultiIndex.from_arrays( + [Series([1, 1, 2], dtype="category"), [1, 2, 2]], names=["a", "b"] + ), + data=[1, 2, 3], + name="x", + ), + ), + ( + False, + DataFrame( + { + "a": Series([1, 1, 2], dtype="category"), + "b": [1, 2, 2], + "x": [1, 2, 3], + } + ), + ), + ], +) +def test_groupby_agg_observed_true_single_column(as_index, expected): + # GH-23970 + df = DataFrame( + {"a": Series([1, 1, 2], dtype="category"), "b": [1, 2, 2], "x": [1, 2, 3]} + ) + + result = df.groupby(["a", "b"], as_index=as_index, observed=True)["x"].sum() + + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("fill_value", [None, np.nan, pd.NaT]) +def test_shift(fill_value): + ct = Categorical( + ["a", "b", "c", "d"], categories=["a", "b", "c", "d"], ordered=False + ) + expected = Categorical( + [None, "a", "b", "c"], categories=["a", "b", "c", "d"], ordered=False + ) + res = ct.shift(1, fill_value=fill_value) + tm.assert_equal(res, expected) + + +@pytest.fixture +def df_cat(df): + """ + DataFrame with multiple categorical columns and a column of integers. + Shortened so as not to contain all possible combinations of categories. + Useful for testing `observed` kwarg functionality on GroupBy objects. + + Parameters + ---------- + df: DataFrame + Non-categorical, longer DataFrame from another fixture, used to derive + this one + + Returns + ------- + df_cat: DataFrame + """ + df_cat = df.copy()[:4] # leave out some groups + df_cat["A"] = df_cat["A"].astype("category") + df_cat["B"] = df_cat["B"].astype("category") + df_cat["C"] = Series([1, 2, 3, 4]) + df_cat = df_cat.drop(["D"], axis=1) + return df_cat + + +@pytest.mark.parametrize("operation", ["agg", "apply"]) +def test_seriesgroupby_observed_true(df_cat, operation): + # GH 24880 + lev_a = Index(["foo", "foo", "bar", "bar"], dtype=df_cat["A"].dtype, name="A") + lev_b = Index(["one", "two", "one", "three"], dtype=df_cat["B"].dtype, name="B") + index = MultiIndex.from_arrays([lev_a, lev_b]) + expected = Series(data=[1, 3, 2, 4], index=index, name="C") + + grouped = df_cat.groupby(["A", "B"], observed=True)["C"] + result = getattr(grouped, operation)(sum) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("operation", ["agg", "apply"]) +@pytest.mark.parametrize("observed", [False, None]) +def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation): + # GH 24880 + index, _ = MultiIndex.from_product( + [ + CategoricalIndex(["bar", "foo"], ordered=False), + CategoricalIndex(["one", "three", "two"], ordered=False), + ], + names=["A", "B"], + ).sortlevel() + + expected = Series(data=[2, 4, np.nan, 1, np.nan, 3], index=index, name="C") + if operation == "agg": + expected = expected.fillna(0, downcast="infer") + grouped = df_cat.groupby(["A", "B"], observed=observed)["C"] + result = getattr(grouped, operation)(sum) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "observed, index, data", + [ + ( + True, + MultiIndex.from_arrays( + [ + Index(["foo"] * 4 + ["bar"] * 4, dtype="category", name="A"), + Index( + ["one", "one", "two", "two", "one", "one", "three", "three"], + dtype="category", + name="B", + ), + Index(["min", "max"] * 4), + ] + ), + [1, 1, 3, 3, 2, 2, 4, 4], + ), + ( + False, + MultiIndex.from_product( + [ + CategoricalIndex(["bar", "foo"], ordered=False), + CategoricalIndex(["one", "three", "two"], ordered=False), + Index(["min", "max"]), + ], + names=["A", "B", None], + ), + [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3], + ), + ( + None, + MultiIndex.from_product( + [ + CategoricalIndex(["bar", "foo"], ordered=False), + CategoricalIndex(["one", "three", "two"], ordered=False), + Index(["min", "max"]), + ], + names=["A", "B", None], + ), + [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3], + ), + ], +) +def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data): + # GH 24880 + expected = Series(data=data, index=index, name="C") + result = df_cat.groupby(["A", "B"], observed=observed)["C"].apply( + lambda x: {"min": x.min(), "max": x.max()} + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_categorical_series_dataframe_consistent(df_cat): + # GH 20416 + expected = df_cat.groupby(["A", "B"])["C"].mean() + result = df_cat.groupby(["A", "B"]).mean()["C"] + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("code", [([1, 0, 0]), ([0, 0, 0])]) +def test_groupby_categorical_axis_1(code): + # GH 13420 + df = DataFrame({"a": [1, 2, 3, 4], "b": [-1, -2, -3, -4], "c": [5, 6, 7, 8]}) + cat = Categorical.from_codes(code, categories=list("abc")) + result = df.groupby(cat, axis=1).mean() + expected = df.T.groupby(cat, axis=0).mean().T + tm.assert_frame_equal(result, expected) + + +@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") +def test_groupby_cat_preserves_structure(observed, ordered): + # GH 28787 + df = DataFrame( + {"Name": Categorical(["Bob", "Greg"], ordered=ordered), "Item": [1, 2]}, + columns=["Name", "Item"], + ) + expected = df.copy() + + result = ( + df.groupby("Name", observed=observed) + .agg(DataFrame.sum, skipna=True) + .reset_index() + ) + + tm.assert_frame_equal(result, expected) + + +def test_get_nonexistent_category(): + # Accessing a Category that is not in the dataframe + df = DataFrame({"var": ["a", "a", "b", "b"], "val": range(4)}) + with pytest.raises(KeyError, match="'vau'"): + df.groupby("var").apply( + lambda rows: DataFrame( + {"var": [rows.iloc[-1]["var"]], "val": [rows.iloc[-1]["vau"]]} + ) + ) + + +def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed, request): + # GH 17605 + if reduction_func == "ngroup": + pytest.skip("ngroup is not truly a reduction") + + if reduction_func == "corrwith": # GH 32293 + mark = pytest.mark.xfail( + reason="TODO: implemented SeriesGroupBy.corrwith. See GH 32293" + ) + request.node.add_marker(mark) + + df = DataFrame( + { + "cat_1": Categorical(list("AABB"), categories=list("ABCD")), + "cat_2": Categorical(list("AB") * 2, categories=list("ABCD")), + "value": [0.1] * 4, + } + ) + args = {"nth": [0]}.get(reduction_func, []) + + expected_length = 4 if observed else 16 + + series_groupby = df.groupby(["cat_1", "cat_2"], observed=observed)["value"] + agg = getattr(series_groupby, reduction_func) + result = agg(*args) + + assert len(result) == expected_length + + +def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans( + reduction_func, request +): + # GH 17605 + # Tests whether the unobserved categories in the result contain 0 or NaN + + if reduction_func == "ngroup": + pytest.skip("ngroup is not truly a reduction") + + if reduction_func == "corrwith": # GH 32293 + mark = pytest.mark.xfail( + reason="TODO: implemented SeriesGroupBy.corrwith. See GH 32293" + ) + request.node.add_marker(mark) + + df = DataFrame( + { + "cat_1": Categorical(list("AABB"), categories=list("ABC")), + "cat_2": Categorical(list("AB") * 2, categories=list("ABC")), + "value": [0.1] * 4, + } + ) + unobserved = [tuple("AC"), tuple("BC"), tuple("CA"), tuple("CB"), tuple("CC")] + args = {"nth": [0]}.get(reduction_func, []) + + series_groupby = df.groupby(["cat_1", "cat_2"], observed=False)["value"] + agg = getattr(series_groupby, reduction_func) + result = agg(*args) + + zero_or_nan = _results_for_groupbys_with_missing_categories[reduction_func] + + for idx in unobserved: + val = result.loc[idx] + assert (pd.isna(zero_or_nan) and pd.isna(val)) or (val == zero_or_nan) + + # If we expect unobserved values to be zero, we also expect the dtype to be int. + # Except for .sum(). If the observed categories sum to dtype=float (i.e. their + # sums have decimals), then the zeros for the missing categories should also be + # floats. + if zero_or_nan == 0 and reduction_func != "sum": + assert np.issubdtype(result.dtype, np.integer) + + +def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_func): + # GH 23865 + # GH 27075 + # Ensure that df.groupby, when 'by' is two Categorical variables, + # does not return the categories that are not in df when observed=True + if reduction_func == "ngroup": + pytest.skip("ngroup does not return the Categories on the index") + + df = DataFrame( + { + "cat_1": Categorical(list("AABB"), categories=list("ABC")), + "cat_2": Categorical(list("1111"), categories=list("12")), + "value": [0.1, 0.1, 0.1, 0.1], + } + ) + unobserved_cats = [("A", "2"), ("B", "2"), ("C", "1"), ("C", "2")] + + df_grp = df.groupby(["cat_1", "cat_2"], observed=True) + + args = {"nth": [0], "corrwith": [df]}.get(reduction_func, []) + res = getattr(df_grp, reduction_func)(*args) + + for cat in unobserved_cats: + assert cat not in res.index + + +@pytest.mark.parametrize("observed", [False, None]) +def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( + reduction_func, observed, request +): + # GH 23865 + # GH 27075 + # Ensure that df.groupby, when 'by' is two Categorical variables, + # returns the categories that are not in df when observed=False/None + + if reduction_func == "ngroup": + pytest.skip("ngroup does not return the Categories on the index") + + df = DataFrame( + { + "cat_1": Categorical(list("AABB"), categories=list("ABC")), + "cat_2": Categorical(list("1111"), categories=list("12")), + "value": [0.1, 0.1, 0.1, 0.1], + } + ) + unobserved_cats = [("A", "2"), ("B", "2"), ("C", "1"), ("C", "2")] + + df_grp = df.groupby(["cat_1", "cat_2"], observed=observed) + + args = {"nth": [0], "corrwith": [df]}.get(reduction_func, []) + res = getattr(df_grp, reduction_func)(*args) + + expected = _results_for_groupbys_with_missing_categories[reduction_func] + + if expected is np.nan: + assert res.loc[unobserved_cats].isnull().all().all() + else: + assert (res.loc[unobserved_cats] == expected).all().all() + + +def test_series_groupby_categorical_aggregation_getitem(): + # GH 8870 + d = {"foo": [10, 8, 4, 1], "bar": [10, 20, 30, 40], "baz": ["d", "c", "d", "c"]} + df = DataFrame(d) + cat = pd.cut(df["foo"], np.linspace(0, 20, 5)) + df["range"] = cat + groups = df.groupby(["range", "baz"], as_index=True, sort=True) + result = groups["foo"].agg("mean") + expected = groups.agg("mean")["foo"] + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "func, expected_values", + [(Series.nunique, [1, 1, 2]), (Series.count, [1, 2, 2])], +) +def test_groupby_agg_categorical_columns(func, expected_values): + # 31256 + df = DataFrame( + { + "id": [0, 1, 2, 3, 4], + "groups": [0, 1, 1, 2, 2], + "value": Categorical([0, 0, 0, 0, 1]), + } + ).set_index("id") + result = df.groupby("groups").agg(func) + + expected = DataFrame( + {"value": expected_values}, index=Index([0, 1, 2], name="groups") + ) + tm.assert_frame_equal(result, expected) + + +def test_groupby_agg_non_numeric(): + df = DataFrame({"A": Categorical(["a", "a", "b"], categories=["a", "b", "c"])}) + expected = DataFrame({"A": [2, 1]}, index=[1, 2]) + + result = df.groupby([1, 2, 1]).agg(Series.nunique) + tm.assert_frame_equal(result, expected) + + result = df.groupby([1, 2, 1]).nunique() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func", ["first", "last"]) +def test_groupy_first_returned_categorical_instead_of_dataframe(func): + # GH 28641: groupby drops index, when grouping over categorical column with + # first/last. Renamed Categorical instead of DataFrame previously. + df = DataFrame({"A": [1997], "B": Series(["b"], dtype="category").cat.as_ordered()}) + df_grouped = df.groupby("A")["B"] + result = getattr(df_grouped, func)() + + # ordered categorical dtype should be preserved + expected = Series( + ["b"], index=Index([1997], name="A"), name="B", dtype=df["B"].dtype + ) + tm.assert_series_equal(result, expected) + + +def test_read_only_category_no_sort(): + # GH33410 + cats = np.array([1, 2]) + cats.flags.writeable = False + df = DataFrame( + {"a": [1, 3, 5, 7], "b": Categorical([1, 1, 2, 2], categories=Index(cats))} + ) + expected = DataFrame( + data={"a": [2.0, 6.0]}, index=CategoricalIndex([1, 2], name="b") + ) + result = df.groupby("b", sort=False).mean() + tm.assert_frame_equal(result, expected) + + +def test_sorted_missing_category_values(): + # GH 28597 + df = DataFrame( + { + "foo": [ + "small", + "large", + "large", + "large", + "medium", + "large", + "large", + "medium", + ], + "bar": ["C", "A", "A", "C", "A", "C", "A", "C"], + } + ) + df["foo"] = ( + df["foo"] + .astype("category") + .cat.set_categories(["tiny", "small", "medium", "large"], ordered=True) + ) + + expected = DataFrame( + { + "tiny": {"A": 0, "C": 0}, + "small": {"A": 0, "C": 1}, + "medium": {"A": 1, "C": 1}, + "large": {"A": 3, "C": 2}, + } + ) + expected = expected.rename_axis("bar", axis="index") + expected.columns = CategoricalIndex( + ["tiny", "small", "medium", "large"], + categories=["tiny", "small", "medium", "large"], + ordered=True, + name="foo", + dtype="category", + ) + + result = df.groupby(["bar", "foo"]).size().unstack() + + tm.assert_frame_equal(result, expected) + + +def test_agg_cython_category_not_implemented_fallback(): + # https://github.com/pandas-dev/pandas/issues/31450 + df = DataFrame({"col_num": [1, 1, 2, 3]}) + df["col_cat"] = df["col_num"].astype("category") + + result = df.groupby("col_num").col_cat.first() + + # ordered categorical dtype should definitely be preserved; + # this is unordered, so is less-clear case (if anything, it should raise) + expected = Series( + [1, 2, 3], + index=Index([1, 2, 3], name="col_num"), + name="col_cat", + dtype=df["col_cat"].dtype, + ) + tm.assert_series_equal(result, expected) + + result = df.groupby("col_num").agg({"col_cat": "first"}) + expected = expected.to_frame() + tm.assert_frame_equal(result, expected) + + +def test_aggregate_categorical_with_isnan(): + # GH 29837 + df = DataFrame( + { + "A": [1, 1, 1, 1], + "B": [1, 2, 1, 2], + "numerical_col": [0.1, 0.2, np.nan, 0.3], + "object_col": ["foo", "bar", "foo", "fee"], + "categorical_col": ["foo", "bar", "foo", "fee"], + } + ) + + df = df.astype({"categorical_col": "category"}) + + result = df.groupby(["A", "B"]).agg(lambda df: df.isna().sum()) + index = MultiIndex.from_arrays([[1, 1], [1, 2]], names=("A", "B")) + expected = DataFrame( + data={ + "numerical_col": [1, 0], + "object_col": [0, 0], + "categorical_col": [0, 0], + }, + index=index, + ) + tm.assert_frame_equal(result, expected) + + +def test_categorical_transform(): + # GH 29037 + df = DataFrame( + { + "package_id": [1, 1, 1, 2, 2, 3], + "status": [ + "Waiting", + "OnTheWay", + "Delivered", + "Waiting", + "OnTheWay", + "Waiting", + ], + } + ) + + delivery_status_type = pd.CategoricalDtype( + categories=["Waiting", "OnTheWay", "Delivered"], ordered=True + ) + df["status"] = df["status"].astype(delivery_status_type) + df["last_status"] = df.groupby("package_id")["status"].transform(max) + result = df.copy() + + expected = DataFrame( + { + "package_id": [1, 1, 1, 2, 2, 3], + "status": [ + "Waiting", + "OnTheWay", + "Delivered", + "Waiting", + "OnTheWay", + "Waiting", + ], + "last_status": [ + "Delivered", + "Delivered", + "Delivered", + "OnTheWay", + "OnTheWay", + "Waiting", + ], + } + ) + + expected["status"] = expected["status"].astype(delivery_status_type) + + # .transform(max) should preserve ordered categoricals + expected["last_status"] = expected["last_status"].astype(delivery_status_type) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func", ["first", "last"]) +def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals( + func: str, observed: bool +): + # GH 34951 + cat = Categorical([0, 0, 1, 1]) + val = [0, 1, 1, 0] + df = DataFrame({"a": cat, "b": cat, "c": val}) + + cat2 = Categorical([0, 1]) + idx = MultiIndex.from_product([cat2, cat2], names=["a", "b"]) + expected_dict = { + "first": Series([0, np.NaN, np.NaN, 1], idx, name="c"), + "last": Series([1, np.NaN, np.NaN, 0], idx, name="c"), + } + + expected = expected_dict[func] + if observed: + expected = expected.dropna().astype(np.int64) + + srs_grp = df.groupby(["a", "b"], observed=observed)["c"] + result = getattr(srs_grp, func)() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["first", "last"]) +def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals( + func: str, observed: bool +): + # GH 34951 + cat = Categorical([0, 0, 1, 1]) + val = [0, 1, 1, 0] + df = DataFrame({"a": cat, "b": cat, "c": val}) + + cat2 = Categorical([0, 1]) + idx = MultiIndex.from_product([cat2, cat2], names=["a", "b"]) + expected_dict = { + "first": Series([0, np.NaN, np.NaN, 1], idx, name="c"), + "last": Series([1, np.NaN, np.NaN, 0], idx, name="c"), + } + + expected = expected_dict[func].to_frame() + if observed: + expected = expected.dropna().astype(np.int64) + + df_grp = df.groupby(["a", "b"], observed=observed) + result = getattr(df_grp, func)() + tm.assert_frame_equal(result, expected) + + +def test_groupby_categorical_indices_unused_categories(): + # GH#38642 + df = DataFrame( + { + "key": Categorical(["b", "b", "a"], categories=["a", "b", "c"]), + "col": range(3), + } + ) + grouped = df.groupby("key", sort=False) + result = grouped.indices + expected = { + "b": np.array([0, 1], dtype="intp"), + "a": np.array([2], dtype="intp"), + "c": np.array([], dtype="intp"), + } + assert result.keys() == expected.keys() + for key in result.keys(): + tm.assert_numpy_array_equal(result[key], expected[key]) + + +@pytest.mark.parametrize("func", ["first", "last"]) +def test_groupby_last_first_preserve_categoricaldtype(func): + # GH#33090 + df = DataFrame({"a": [1, 2, 3]}) + df["b"] = df["a"].astype("category") + result = getattr(df.groupby("a")["b"], func)() + expected = Series( + Categorical([1, 2, 3]), name="b", index=Index([1, 2, 3], name="a") + ) + tm.assert_series_equal(expected, result) + + +def test_groupby_categorical_observed_nunique(): + # GH#45128 + df = DataFrame({"a": [1, 2], "b": [1, 2], "c": [10, 11]}) + df = df.astype(dtype={"a": "category", "b": "category"}) + result = df.groupby(["a", "b"], observed=True).nunique()["c"] + expected = Series( + [1, 1], + index=MultiIndex.from_arrays( + [CategoricalIndex([1, 2], name="a"), CategoricalIndex([1, 2], name="b")] + ), + name="c", + ) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_counting.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_counting.py new file mode 100644 index 0000000..73b2d8a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_counting.py @@ -0,0 +1,378 @@ +from itertools import product +from string import ascii_lowercase + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Period, + Series, + Timedelta, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestCounting: + def test_cumcount(self): + df = DataFrame([["a"], ["a"], ["a"], ["b"], ["a"]], columns=["A"]) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 1, 2, 0, 3]) + + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_empty(self): + ge = DataFrame().groupby(level=0) + se = Series(dtype=object).groupby(level=0) + + # edge case, as this is usually considered float + e = Series(dtype="int64") + + tm.assert_series_equal(e, ge.cumcount()) + tm.assert_series_equal(e, se.cumcount()) + + def test_cumcount_dupe_index(self): + df = DataFrame( + [["a"], ["a"], ["a"], ["b"], ["a"]], columns=["A"], index=[0] * 5 + ) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=[0] * 5) + + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_mi(self): + mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) + df = DataFrame([["a"], ["a"], ["a"], ["b"], ["a"]], columns=["A"], index=mi) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=mi) + + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_groupby_not_col(self): + df = DataFrame( + [["a"], ["a"], ["a"], ["b"], ["a"]], columns=["A"], index=[0] * 5 + ) + g = df.groupby([0, 0, 0, 1, 0]) + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=[0] * 5) + + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) + + def test_ngroup(self): + df = DataFrame({"A": list("aaaba")}) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 0, 0, 1, 0]) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_distinct(self): + df = DataFrame({"A": list("abcde")}) + g = df.groupby("A") + sg = g.A + + expected = Series(range(5), dtype="int64") + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_one_group(self): + df = DataFrame({"A": [0] * 5}) + g = df.groupby("A") + sg = g.A + + expected = Series([0] * 5) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_empty(self): + ge = DataFrame().groupby(level=0) + se = Series(dtype=object).groupby(level=0) + + # edge case, as this is usually considered float + e = Series(dtype="int64") + + tm.assert_series_equal(e, ge.ngroup()) + tm.assert_series_equal(e, se.ngroup()) + + def test_ngroup_series_matches_frame(self): + df = DataFrame({"A": list("aaaba")}) + s = Series(list("aaaba")) + + tm.assert_series_equal(df.groupby(s).ngroup(), s.groupby(s).ngroup()) + + def test_ngroup_dupe_index(self): + df = DataFrame({"A": list("aaaba")}, index=[0] * 5) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 0, 0, 1, 0], index=[0] * 5) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_mi(self): + mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) + df = DataFrame({"A": list("aaaba")}, index=mi) + g = df.groupby("A") + sg = g.A + expected = Series([0, 0, 0, 1, 0], index=mi) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_groupby_not_col(self): + df = DataFrame({"A": list("aaaba")}, index=[0] * 5) + g = df.groupby([0, 0, 0, 1, 0]) + sg = g.A + + expected = Series([0, 0, 0, 1, 0], index=[0] * 5) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_descending(self): + df = DataFrame(["a", "a", "b", "a", "b"], columns=["A"]) + g = df.groupby(["A"]) + + ascending = Series([0, 0, 1, 0, 1]) + descending = Series([1, 1, 0, 1, 0]) + + tm.assert_series_equal(descending, (g.ngroups - 1) - ascending) + tm.assert_series_equal(ascending, g.ngroup(ascending=True)) + tm.assert_series_equal(descending, g.ngroup(ascending=False)) + + def test_ngroup_matches_cumcount(self): + # verify one manually-worked out case works + df = DataFrame( + [["a", "x"], ["a", "y"], ["b", "x"], ["a", "x"], ["b", "y"]], + columns=["A", "X"], + ) + g = df.groupby(["A", "X"]) + g_ngroup = g.ngroup() + g_cumcount = g.cumcount() + expected_ngroup = Series([0, 1, 2, 0, 3]) + expected_cumcount = Series([0, 0, 0, 1, 0]) + + tm.assert_series_equal(g_ngroup, expected_ngroup) + tm.assert_series_equal(g_cumcount, expected_cumcount) + + def test_ngroup_cumcount_pair(self): + # brute force comparison for all small series + for p in product(range(3), repeat=4): + df = DataFrame({"a": p}) + g = df.groupby(["a"]) + + order = sorted(set(p)) + ngroupd = [order.index(val) for val in p] + cumcounted = [p[:i].count(val) for i, val in enumerate(p)] + + tm.assert_series_equal(g.ngroup(), Series(ngroupd)) + tm.assert_series_equal(g.cumcount(), Series(cumcounted)) + + def test_ngroup_respects_groupby_order(self): + np.random.seed(0) + df = DataFrame({"a": np.random.choice(list("abcdef"), 100)}) + for sort_flag in (False, True): + g = df.groupby(["a"], sort=sort_flag) + df["group_id"] = -1 + df["group_index"] = -1 + + for i, (_, group) in enumerate(g): + df.loc[group.index, "group_id"] = i + for j, ind in enumerate(group.index): + df.loc[ind, "group_index"] = j + + tm.assert_series_equal(Series(df["group_id"].values), g.ngroup()) + tm.assert_series_equal(Series(df["group_index"].values), g.cumcount()) + + @pytest.mark.parametrize( + "datetimelike", + [ + [Timestamp(f"2016-05-{i:02d} 20:09:25+00:00") for i in range(1, 4)], + [Timestamp(f"2016-05-{i:02d} 20:09:25") for i in range(1, 4)], + [Timestamp(f"2016-05-{i:02d} 20:09:25", tz="UTC") for i in range(1, 4)], + [Timedelta(x, unit="h") for x in range(1, 4)], + [Period(freq="2W", year=2017, month=x) for x in range(1, 4)], + ], + ) + def test_count_with_datetimelike(self, datetimelike): + # test for #13393, where DataframeGroupBy.count() fails + # when counting a datetimelike column. + + df = DataFrame({"x": ["a", "a", "b"], "y": datetimelike}) + res = df.groupby("x").count() + expected = DataFrame({"y": [2, 1]}, index=["a", "b"]) + expected.index.name = "x" + tm.assert_frame_equal(expected, res) + + def test_count_with_only_nans_in_first_group(self): + # GH21956 + df = DataFrame({"A": [np.nan, np.nan], "B": ["a", "b"], "C": [1, 2]}) + result = df.groupby(["A", "B"]).C.count() + mi = MultiIndex(levels=[[], ["a", "b"]], codes=[[], []], names=["A", "B"]) + expected = Series([], index=mi, dtype=np.int64, name="C") + tm.assert_series_equal(result, expected, check_index_type=False) + + def test_count_groupby_column_with_nan_in_groupby_column(self): + # https://github.com/pandas-dev/pandas/issues/32841 + df = DataFrame({"A": [1, 1, 1, 1, 1], "B": [5, 4, np.NaN, 3, 0]}) + res = df.groupby(["B"]).count() + expected = DataFrame( + index=Index([0.0, 3.0, 4.0, 5.0], name="B"), data={"A": [1, 1, 1, 1]} + ) + tm.assert_frame_equal(expected, res) + + def test_groupby_count_dateparseerror(self): + dr = date_range(start="1/1/2012", freq="5min", periods=10) + + # BAD Example, datetimes first + ser = Series(np.arange(10), index=[dr, np.arange(10)]) + grouped = ser.groupby(lambda x: x[1] % 2 == 0) + result = grouped.count() + + ser = Series(np.arange(10), index=[np.arange(10), dr]) + grouped = ser.groupby(lambda x: x[0] % 2 == 0) + expected = grouped.count() + + tm.assert_series_equal(result, expected) + + +def test_groupby_timedelta_cython_count(): + df = DataFrame( + {"g": list("ab" * 2), "delt": np.arange(4).astype("timedelta64[ns]")} + ) + expected = Series([2, 2], index=Index(["a", "b"], name="g"), name="delt") + result = df.groupby("g").delt.count() + tm.assert_series_equal(expected, result) + + +def test_count(): + n = 1 << 15 + dr = date_range("2015-08-30", periods=n // 10, freq="T") + + df = DataFrame( + { + "1st": np.random.choice(list(ascii_lowercase), n), + "2nd": np.random.randint(0, 5, n), + "3rd": np.random.randn(n).round(3), + "4th": np.random.randint(-10, 10, n), + "5th": np.random.choice(dr, n), + "6th": np.random.randn(n).round(3), + "7th": np.random.randn(n).round(3), + "8th": np.random.choice(dr, n) - np.random.choice(dr, 1), + "9th": np.random.choice(list(ascii_lowercase), n), + } + ) + + for col in df.columns.drop(["1st", "2nd", "4th"]): + df.loc[np.random.choice(n, n // 10), col] = np.nan + + df["9th"] = df["9th"].astype("category") + + for key in ["1st", "2nd", ["1st", "2nd"]]: + left = df.groupby(key).count() + right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1) + tm.assert_frame_equal(left, right) + + +def test_count_non_nulls(): + # GH#5610 + # count counts non-nulls + df = DataFrame( + [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, np.nan]], + columns=["A", "B", "C"], + ) + + count_as = df.groupby("A").count() + count_not_as = df.groupby("A", as_index=False).count() + + expected = DataFrame([[1, 2], [0, 0]], columns=["B", "C"], index=[1, 3]) + expected.index.name = "A" + tm.assert_frame_equal(count_not_as, expected.reset_index()) + tm.assert_frame_equal(count_as, expected) + + count_B = df.groupby("A")["B"].count() + tm.assert_series_equal(count_B, expected["B"]) + + +def test_count_object(): + df = DataFrame({"a": ["a"] * 3 + ["b"] * 3, "c": [2] * 3 + [3] * 3}) + result = df.groupby("c").a.count() + expected = Series([3, 3], index=Index([2, 3], name="c"), name="a") + tm.assert_series_equal(result, expected) + + df = DataFrame({"a": ["a", np.nan, np.nan] + ["b"] * 3, "c": [2] * 3 + [3] * 3}) + result = df.groupby("c").a.count() + expected = Series([1, 3], index=Index([2, 3], name="c"), name="a") + tm.assert_series_equal(result, expected) + + +def test_count_cross_type(): + # GH8169 + vals = np.hstack( + (np.random.randint(0, 5, (100, 2)), np.random.randint(0, 2, (100, 2))) + ) + + df = DataFrame(vals, columns=["a", "b", "c", "d"]) + df[df == 2] = np.nan + expected = df.groupby(["c", "d"]).count() + + for t in ["float32", "object"]: + df["a"] = df["a"].astype(t) + df["b"] = df["b"].astype(t) + result = df.groupby(["c", "d"]).count() + tm.assert_frame_equal(result, expected) + + +def test_lower_int_prec_count(): + df = DataFrame( + { + "a": np.array([0, 1, 2, 100], np.int8), + "b": np.array([1, 2, 3, 6], np.uint32), + "c": np.array([4, 5, 6, 8], np.int16), + "grp": list("ab" * 2), + } + ) + result = df.groupby("grp").count() + expected = DataFrame( + {"a": [2, 2], "b": [2, 2], "c": [2, 2]}, index=Index(list("ab"), name="grp") + ) + tm.assert_frame_equal(result, expected) + + +def test_count_uses_size_on_exception(): + class RaisingObjectException(Exception): + pass + + class RaisingObject: + def __init__(self, msg="I will raise inside Cython"): + super().__init__() + self.msg = msg + + def __eq__(self, other): + # gets called in Cython to check that raising calls the method + raise RaisingObjectException(self.msg) + + df = DataFrame({"a": [RaisingObject() for _ in range(4)], "grp": list("ab" * 2)}) + result = df.groupby("grp").count() + expected = DataFrame({"a": [2, 2]}, index=Index(list("ab"), name="grp")) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_filters.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_filters.py new file mode 100644 index 0000000..b405145 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_filters.py @@ -0,0 +1,614 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, + Timestamp, +) +import pandas._testing as tm + + +def test_filter_series(): + s = Series([1, 3, 20, 5, 22, 24, 7]) + expected_odd = Series([1, 3, 5, 7], index=[0, 1, 3, 6]) + expected_even = Series([20, 22, 24], index=[2, 4, 5]) + grouper = s.apply(lambda x: x % 2) + grouped = s.groupby(grouper) + tm.assert_series_equal(grouped.filter(lambda x: x.mean() < 10), expected_odd) + tm.assert_series_equal(grouped.filter(lambda x: x.mean() > 10), expected_even) + # Test dropna=False. + tm.assert_series_equal( + grouped.filter(lambda x: x.mean() < 10, dropna=False), + expected_odd.reindex(s.index), + ) + tm.assert_series_equal( + grouped.filter(lambda x: x.mean() > 10, dropna=False), + expected_even.reindex(s.index), + ) + + +def test_filter_single_column_df(): + df = DataFrame([1, 3, 20, 5, 22, 24, 7]) + expected_odd = DataFrame([1, 3, 5, 7], index=[0, 1, 3, 6]) + expected_even = DataFrame([20, 22, 24], index=[2, 4, 5]) + grouper = df[0].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + tm.assert_frame_equal(grouped.filter(lambda x: x.mean() < 10), expected_odd) + tm.assert_frame_equal(grouped.filter(lambda x: x.mean() > 10), expected_even) + # Test dropna=False. + tm.assert_frame_equal( + grouped.filter(lambda x: x.mean() < 10, dropna=False), + expected_odd.reindex(df.index), + ) + tm.assert_frame_equal( + grouped.filter(lambda x: x.mean() > 10, dropna=False), + expected_even.reindex(df.index), + ) + + +def test_filter_multi_column_df(): + df = DataFrame({"A": [1, 12, 12, 1], "B": [1, 1, 1, 1]}) + grouper = df["A"].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + expected = DataFrame({"A": [12, 12], "B": [1, 1]}, index=[1, 2]) + tm.assert_frame_equal( + grouped.filter(lambda x: x["A"].sum() - x["B"].sum() > 10), expected + ) + + +def test_filter_mixed_df(): + df = DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) + grouper = df["A"].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + expected = DataFrame({"A": [12, 12], "B": ["b", "c"]}, index=[1, 2]) + tm.assert_frame_equal(grouped.filter(lambda x: x["A"].sum() > 10), expected) + + +def test_filter_out_all_groups(): + s = Series([1, 3, 20, 5, 22, 24, 7]) + grouper = s.apply(lambda x: x % 2) + grouped = s.groupby(grouper) + tm.assert_series_equal(grouped.filter(lambda x: x.mean() > 1000), s[[]]) + df = DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) + grouper = df["A"].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + tm.assert_frame_equal(grouped.filter(lambda x: x["A"].sum() > 1000), df.loc[[]]) + + +def test_filter_out_no_groups(): + s = Series([1, 3, 20, 5, 22, 24, 7]) + grouper = s.apply(lambda x: x % 2) + grouped = s.groupby(grouper) + filtered = grouped.filter(lambda x: x.mean() > 0) + tm.assert_series_equal(filtered, s) + df = DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) + grouper = df["A"].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + filtered = grouped.filter(lambda x: x["A"].mean() > 0) + tm.assert_frame_equal(filtered, df) + + +def test_filter_out_all_groups_in_df(): + # GH12768 + df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 0]}) + res = df.groupby("a") + res = res.filter(lambda x: x["b"].sum() > 5, dropna=False) + expected = DataFrame({"a": [np.nan] * 3, "b": [np.nan] * 3}) + tm.assert_frame_equal(expected, res) + + df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 0]}) + res = df.groupby("a") + res = res.filter(lambda x: x["b"].sum() > 5, dropna=True) + expected = DataFrame({"a": [], "b": []}, dtype="int64") + tm.assert_frame_equal(expected, res) + + +def test_filter_condition_raises(): + def raise_if_sum_is_zero(x): + if x.sum() == 0: + raise ValueError + else: + return x.sum() > 0 + + s = Series([-1, 0, 1, 2]) + grouper = s.apply(lambda x: x % 2) + grouped = s.groupby(grouper) + msg = "the filter must return a boolean result" + with pytest.raises(TypeError, match=msg): + grouped.filter(raise_if_sum_is_zero) + + +def test_filter_with_axis_in_groupby(): + # issue 11041 + index = pd.MultiIndex.from_product([range(10), [0, 1]]) + data = DataFrame(np.arange(100).reshape(-1, 20), columns=index, dtype="int64") + result = data.groupby(level=0, axis=1).filter(lambda x: x.iloc[0, 0] > 10) + expected = data.iloc[:, 12:20] + tm.assert_frame_equal(result, expected) + + +def test_filter_bad_shapes(): + df = DataFrame({"A": np.arange(8), "B": list("aabbbbcc"), "C": np.arange(8)}) + s = df["B"] + g_df = df.groupby("B") + g_s = s.groupby(s) + + f = lambda x: x + msg = "filter function returned a DataFrame, but expected a scalar bool" + with pytest.raises(TypeError, match=msg): + g_df.filter(f) + msg = "the filter must return a boolean result" + with pytest.raises(TypeError, match=msg): + g_s.filter(f) + + f = lambda x: x == 1 + msg = "filter function returned a DataFrame, but expected a scalar bool" + with pytest.raises(TypeError, match=msg): + g_df.filter(f) + msg = "the filter must return a boolean result" + with pytest.raises(TypeError, match=msg): + g_s.filter(f) + + f = lambda x: np.outer(x, x) + msg = "can't multiply sequence by non-int of type 'str'" + with pytest.raises(TypeError, match=msg): + g_df.filter(f) + msg = "the filter must return a boolean result" + with pytest.raises(TypeError, match=msg): + g_s.filter(f) + + +def test_filter_nan_is_false(): + df = DataFrame({"A": np.arange(8), "B": list("aabbbbcc"), "C": np.arange(8)}) + s = df["B"] + g_df = df.groupby(df["B"]) + g_s = s.groupby(s) + + f = lambda x: np.nan + tm.assert_frame_equal(g_df.filter(f), df.loc[[]]) + tm.assert_series_equal(g_s.filter(f), s[[]]) + + +def test_filter_against_workaround(): + np.random.seed(0) + # Series of ints + s = Series(np.random.randint(0, 100, 1000)) + grouper = s.apply(lambda x: np.round(x, -1)) + grouped = s.groupby(grouper) + f = lambda x: x.mean() > 10 + + old_way = s[grouped.transform(f).astype("bool")] + new_way = grouped.filter(f) + tm.assert_series_equal(new_way.sort_values(), old_way.sort_values()) + + # Series of floats + s = 100 * Series(np.random.random(1000)) + grouper = s.apply(lambda x: np.round(x, -1)) + grouped = s.groupby(grouper) + f = lambda x: x.mean() > 10 + old_way = s[grouped.transform(f).astype("bool")] + new_way = grouped.filter(f) + tm.assert_series_equal(new_way.sort_values(), old_way.sort_values()) + + # Set up DataFrame of ints, floats, strings. + from string import ascii_lowercase + + letters = np.array(list(ascii_lowercase)) + N = 1000 + random_letters = letters.take(np.random.randint(0, 26, N)) + df = DataFrame( + { + "ints": Series(np.random.randint(0, 100, N)), + "floats": N / 10 * Series(np.random.random(N)), + "letters": Series(random_letters), + } + ) + + # Group by ints; filter on floats. + grouped = df.groupby("ints") + old_way = df[grouped.floats.transform(lambda x: x.mean() > N / 20).astype("bool")] + new_way = grouped.filter(lambda x: x["floats"].mean() > N / 20) + tm.assert_frame_equal(new_way, old_way) + + # Group by floats (rounded); filter on strings. + grouper = df.floats.apply(lambda x: np.round(x, -1)) + grouped = df.groupby(grouper) + old_way = df[grouped.letters.transform(lambda x: len(x) < N / 10).astype("bool")] + new_way = grouped.filter(lambda x: len(x.letters) < N / 10) + tm.assert_frame_equal(new_way, old_way) + + # Group by strings; filter on ints. + grouped = df.groupby("letters") + old_way = df[grouped.ints.transform(lambda x: x.mean() > N / 20).astype("bool")] + new_way = grouped.filter(lambda x: x["ints"].mean() > N / 20) + tm.assert_frame_equal(new_way, old_way) + + +def test_filter_using_len(): + # BUG GH4447 + df = DataFrame({"A": np.arange(8), "B": list("aabbbbcc"), "C": np.arange(8)}) + grouped = df.groupby("B") + actual = grouped.filter(lambda x: len(x) > 2) + expected = DataFrame( + {"A": np.arange(2, 6), "B": list("bbbb"), "C": np.arange(2, 6)}, + index=np.arange(2, 6), + ) + tm.assert_frame_equal(actual, expected) + + actual = grouped.filter(lambda x: len(x) > 4) + expected = df.loc[[]] + tm.assert_frame_equal(actual, expected) + + # Series have always worked properly, but we'll test anyway. + s = df["B"] + grouped = s.groupby(s) + actual = grouped.filter(lambda x: len(x) > 2) + expected = Series(4 * ["b"], index=np.arange(2, 6), name="B") + tm.assert_series_equal(actual, expected) + + actual = grouped.filter(lambda x: len(x) > 4) + expected = s[[]] + tm.assert_series_equal(actual, expected) + + +def test_filter_maintains_ordering(): + # Simple case: index is sequential. #4621 + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]} + ) + s = df["pid"] + grouped = df.groupby("tag") + actual = grouped.filter(lambda x: len(x) > 1) + expected = df.iloc[[1, 2, 4, 7]] + tm.assert_frame_equal(actual, expected) + + grouped = s.groupby(df["tag"]) + actual = grouped.filter(lambda x: len(x) > 1) + expected = s.iloc[[1, 2, 4, 7]] + tm.assert_series_equal(actual, expected) + + # Now index is sequentially decreasing. + df.index = np.arange(len(df) - 1, -1, -1) + s = df["pid"] + grouped = df.groupby("tag") + actual = grouped.filter(lambda x: len(x) > 1) + expected = df.iloc[[1, 2, 4, 7]] + tm.assert_frame_equal(actual, expected) + + grouped = s.groupby(df["tag"]) + actual = grouped.filter(lambda x: len(x) > 1) + expected = s.iloc[[1, 2, 4, 7]] + tm.assert_series_equal(actual, expected) + + # Index is shuffled. + SHUFFLED = [4, 6, 7, 2, 1, 0, 5, 3] + df.index = df.index[SHUFFLED] + s = df["pid"] + grouped = df.groupby("tag") + actual = grouped.filter(lambda x: len(x) > 1) + expected = df.iloc[[1, 2, 4, 7]] + tm.assert_frame_equal(actual, expected) + + grouped = s.groupby(df["tag"]) + actual = grouped.filter(lambda x: len(x) > 1) + expected = s.iloc[[1, 2, 4, 7]] + tm.assert_series_equal(actual, expected) + + +def test_filter_multiple_timestamp(): + # GH 10114 + df = DataFrame( + { + "A": np.arange(5, dtype="int64"), + "B": ["foo", "bar", "foo", "bar", "bar"], + "C": Timestamp("20130101"), + } + ) + + grouped = df.groupby(["B", "C"]) + + result = grouped["A"].filter(lambda x: True) + tm.assert_series_equal(df["A"], result) + + result = grouped["A"].transform(len) + expected = Series([2, 3, 2, 3, 3], name="A") + tm.assert_series_equal(result, expected) + + result = grouped.filter(lambda x: True) + tm.assert_frame_equal(df, result) + + result = grouped.transform("sum") + expected = DataFrame({"A": [2, 8, 2, 8, 8]}) + tm.assert_frame_equal(result, expected) + + result = grouped.transform(len) + expected = DataFrame({"A": [2, 3, 2, 3, 3]}) + tm.assert_frame_equal(result, expected) + + +def test_filter_and_transform_with_non_unique_int_index(): + # GH4620 + index = [1, 1, 1, 2, 1, 1, 0, 1] + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_and_transform_with_multiple_non_unique_int_index(): + # GH4620 + index = [1, 1, 1, 2, 0, 0, 0, 1] + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_and_transform_with_non_unique_float_index(): + # GH4620 + index = np.array([1, 1, 1, 2, 1, 1, 0, 1], dtype=float) + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_and_transform_with_non_unique_timestamp_index(): + # GH4620 + t0 = Timestamp("2013-09-30 00:05:00") + t1 = Timestamp("2013-10-30 00:05:00") + t2 = Timestamp("2013-11-30 00:05:00") + index = [t1, t1, t1, t2, t1, t1, t0, t1] + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_and_transform_with_non_unique_string_index(): + # GH4620 + index = list("bbbcbbab") + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_has_access_to_grouped_cols(): + df = DataFrame([[1, 2], [1, 3], [5, 6]], columns=["A", "B"]) + g = df.groupby("A") + # previously didn't have access to col A #???? + filt = g.filter(lambda x: x["A"].sum() == 2) + tm.assert_frame_equal(filt, df.iloc[[0, 1]]) + + +def test_filter_enforces_scalarness(): + df = DataFrame( + [ + ["best", "a", "x"], + ["worst", "b", "y"], + ["best", "c", "x"], + ["best", "d", "y"], + ["worst", "d", "y"], + ["worst", "d", "y"], + ["best", "d", "z"], + ], + columns=["a", "b", "c"], + ) + with pytest.raises(TypeError, match="filter function returned a.*"): + df.groupby("c").filter(lambda g: g["a"] == "best") + + +def test_filter_non_bool_raises(): + df = DataFrame( + [ + ["best", "a", 1], + ["worst", "b", 1], + ["best", "c", 1], + ["best", "d", 1], + ["worst", "d", 1], + ["worst", "d", 1], + ["best", "d", 1], + ], + columns=["a", "b", "c"], + ) + with pytest.raises(TypeError, match="filter function returned a.*"): + df.groupby("a").filter(lambda g: g.c.mean()) + + +def test_filter_dropna_with_empty_groups(): + # GH 10780 + data = Series(np.random.rand(9), index=np.repeat([1, 2, 3], 3)) + groupped = data.groupby(level=0) + result_false = groupped.filter(lambda x: x.mean() > 1, dropna=False) + expected_false = Series([np.nan] * 9, index=np.repeat([1, 2, 3], 3)) + tm.assert_series_equal(result_false, expected_false) + + result_true = groupped.filter(lambda x: x.mean() > 1, dropna=True) + expected_true = Series(index=pd.Index([], dtype=int), dtype=np.float64) + tm.assert_series_equal(result_true, expected_true) + + +def test_filter_consistent_result_before_after_agg_func(): + # GH 17091 + df = DataFrame({"data": range(6), "key": list("ABCABC")}) + grouper = df.groupby("key") + result = grouper.filter(lambda x: True) + expected = DataFrame({"data": range(6), "key": list("ABCABC")}) + tm.assert_frame_equal(result, expected) + + grouper.sum() + result = grouper.filter(lambda x: True) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_frame_value_counts.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_frame_value_counts.py new file mode 100644 index 0000000..79ef46d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_frame_value_counts.py @@ -0,0 +1,444 @@ +import numpy as np +import pytest + +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +@pytest.fixture +def education_df(): + return DataFrame( + { + "gender": ["male", "male", "female", "male", "female", "male"], + "education": ["low", "medium", "high", "low", "high", "low"], + "country": ["US", "FR", "US", "FR", "FR", "FR"], + } + ) + + +def test_axis(education_df): + gp = education_df.groupby("country", axis=1) + with pytest.raises(NotImplementedError, match="axis"): + gp.value_counts() + + +def test_bad_subset(education_df): + gp = education_df.groupby("country") + with pytest.raises(ValueError, match="subset"): + gp.value_counts(subset=["country"]) + + +def test_basic(education_df): + # gh43564 + result = education_df.groupby("country")[["gender", "education"]].value_counts( + normalize=True + ) + expected = Series( + data=[0.5, 0.25, 0.25, 0.5, 0.5], + index=MultiIndex.from_tuples( + [ + ("FR", "male", "low"), + ("FR", "female", "high"), + ("FR", "male", "medium"), + ("US", "female", "high"), + ("US", "male", "low"), + ], + names=["country", "gender", "education"], + ), + ) + tm.assert_series_equal(result, expected) + + +def _frame_value_counts(df, keys, normalize, sort, ascending): + return df[keys].value_counts(normalize=normalize, sort=sort, ascending=ascending) + + +@pytest.mark.parametrize("groupby", ["column", "array", "function"]) +@pytest.mark.parametrize("normalize", [True, False]) +@pytest.mark.parametrize( + "sort, ascending", + [ + (False, None), + (True, True), + (True, False), + ], +) +@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize("frame", [True, False]) +def test_against_frame_and_seriesgroupby( + education_df, groupby, normalize, sort, ascending, as_index, frame +): + # test all parameters: + # - Use column, array or function as by= parameter + # - Whether or not to normalize + # - Whether or not to sort and how + # - Whether or not to use the groupby as an index + # - 3-way compare against: + # - apply with :meth:`~DataFrame.value_counts` + # - `~SeriesGroupBy.value_counts` + by = { + "column": "country", + "array": education_df["country"].values, + "function": lambda x: education_df["country"][x] == "US", + }[groupby] + + gp = education_df.groupby(by=by, as_index=as_index) + result = gp[["gender", "education"]].value_counts( + normalize=normalize, sort=sort, ascending=ascending + ) + if frame: + # compare against apply with DataFrame value_counts + expected = gp.apply( + _frame_value_counts, ["gender", "education"], normalize, sort, ascending + ) + + if as_index: + tm.assert_series_equal(result, expected) + else: + name = "proportion" if normalize else "count" + expected = expected.reset_index().rename({0: name}, axis=1) + if groupby == "column": + expected = expected.rename({"level_0": "country"}, axis=1) + expected["country"] = np.where(expected["country"], "US", "FR") + elif groupby == "function": + expected["level_0"] = expected["level_0"] == 1 + else: + expected["level_0"] = np.where(expected["level_0"], "US", "FR") + tm.assert_frame_equal(result, expected) + else: + # compare against SeriesGroupBy value_counts + education_df["both"] = education_df["gender"] + "-" + education_df["education"] + expected = gp["both"].value_counts( + normalize=normalize, sort=sort, ascending=ascending + ) + expected.name = None + if as_index: + index_frame = expected.index.to_frame(index=False) + index_frame["gender"] = index_frame["both"].str.split("-").str.get(0) + index_frame["education"] = index_frame["both"].str.split("-").str.get(1) + del index_frame["both"] + index_frame = index_frame.rename({0: None}, axis=1) + expected.index = MultiIndex.from_frame(index_frame) + tm.assert_series_equal(result, expected) + else: + expected.insert(1, "gender", expected["both"].str.split("-").str.get(0)) + expected.insert(2, "education", expected["both"].str.split("-").str.get(1)) + del expected["both"] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("normalize", [True, False]) +@pytest.mark.parametrize( + "sort, ascending, expected_rows, expected_count, expected_group_size", + [ + (False, None, [0, 1, 2, 3, 4], [1, 1, 1, 2, 1], [1, 3, 1, 3, 1]), + (True, False, [4, 3, 1, 2, 0], [1, 2, 1, 1, 1], [1, 3, 3, 1, 1]), + (True, True, [4, 1, 3, 2, 0], [1, 1, 2, 1, 1], [1, 3, 3, 1, 1]), + ], +) +def test_compound( + education_df, + normalize, + sort, + ascending, + expected_rows, + expected_count, + expected_group_size, +): + # Multiple groupby keys and as_index=False + gp = education_df.groupby(["country", "gender"], as_index=False, sort=False) + result = gp["education"].value_counts( + normalize=normalize, sort=sort, ascending=ascending + ) + expected = DataFrame() + for column in ["country", "gender", "education"]: + expected[column] = [education_df[column][row] for row in expected_rows] + if normalize: + expected["proportion"] = expected_count + expected["proportion"] /= expected_group_size + else: + expected["count"] = expected_count + tm.assert_frame_equal(result, expected) + + +@pytest.fixture +def animals_df(): + return DataFrame( + {"key": [1, 1, 1, 1], "num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + +@pytest.mark.parametrize( + "sort, ascending, normalize, expected_data, expected_index", + [ + (False, None, False, [1, 2, 1], [(1, 1, 1), (2, 4, 6), (2, 0, 0)]), + (True, True, False, [1, 1, 2], [(1, 1, 1), (2, 6, 4), (2, 0, 0)]), + (True, False, False, [2, 1, 1], [(1, 1, 1), (4, 2, 6), (0, 2, 0)]), + (True, False, True, [0.5, 0.25, 0.25], [(1, 1, 1), (4, 2, 6), (0, 2, 0)]), + ], +) +def test_data_frame_value_counts( + animals_df, sort, ascending, normalize, expected_data, expected_index +): + # 3-way compare with :meth:`~DataFrame.value_counts` + # Tests from frame/methods/test_value_counts.py + result_frame = animals_df.value_counts( + sort=sort, ascending=ascending, normalize=normalize + ) + expected = Series( + data=expected_data, + index=MultiIndex.from_arrays( + expected_index, names=["key", "num_legs", "num_wings"] + ), + ) + tm.assert_series_equal(result_frame, expected) + + result_frame_groupby = animals_df.groupby("key").value_counts( + sort=sort, ascending=ascending, normalize=normalize + ) + + tm.assert_series_equal(result_frame_groupby, expected) + + +@pytest.fixture +def nulls_df(): + n = np.nan + return DataFrame( + { + "A": [1, 1, n, 4, n, 6, 6, 6, 6], + "B": [1, 1, 3, n, n, 6, 6, 6, 6], + "C": [1, 2, 3, 4, 5, 6, n, 8, n], + "D": [1, 2, 3, 4, 5, 6, 7, n, n], + } + ) + + +@pytest.mark.parametrize( + "group_dropna, count_dropna, expected_rows, expected_values", + [ + ( + False, + False, + [0, 1, 3, 5, 7, 6, 8, 2, 4], + [0.5, 0.5, 1.0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0], + ), + (False, True, [0, 1, 3, 5, 2, 4], [0.5, 0.5, 1.0, 1.0, 1.0, 1.0]), + (True, False, [0, 1, 5, 7, 6, 8], [0.5, 0.5, 0.25, 0.25, 0.25, 0.25]), + (True, True, [0, 1, 5], [0.5, 0.5, 1.0]), + ], +) +def test_dropna_combinations( + nulls_df, group_dropna, count_dropna, expected_rows, expected_values +): + gp = nulls_df.groupby(["A", "B"], dropna=group_dropna) + result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna) + columns = DataFrame() + for column in nulls_df.columns: + columns[column] = [nulls_df[column][row] for row in expected_rows] + index = MultiIndex.from_frame(columns) + expected = Series(data=expected_values, index=index) + tm.assert_series_equal(result, expected) + + +@pytest.fixture +def names_with_nulls_df(nulls_fixture): + return DataFrame( + { + "key": [1, 1, 1, 1], + "first_name": ["John", "Anne", "John", "Beth"], + "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"], + }, + ) + + +@pytest.mark.parametrize( + "dropna, expected_data, expected_index", + [ + ( + True, + [1, 1], + MultiIndex.from_arrays( + [(1, 1), ("Beth", "John"), ("Louise", "Smith")], + names=["key", "first_name", "middle_name"], + ), + ), + ( + False, + [1, 1, 1, 1], + MultiIndex( + levels=[ + Index([1]), + Index(["Anne", "Beth", "John"]), + Index(["Louise", "Smith", np.nan]), + ], + codes=[[0, 0, 0, 0], [0, 1, 2, 2], [2, 0, 1, 2]], + names=["key", "first_name", "middle_name"], + ), + ), + ], +) +@pytest.mark.parametrize("normalize", [False, True]) +def test_data_frame_value_counts_dropna( + names_with_nulls_df, dropna, normalize, expected_data, expected_index +): + # GH 41334 + # 3-way compare with :meth:`~DataFrame.value_counts` + # Tests with nulls from frame/methods/test_value_counts.py + result_frame = names_with_nulls_df.value_counts(dropna=dropna, normalize=normalize) + expected = Series( + data=expected_data, + index=expected_index, + ) + if normalize: + expected /= float(len(expected_data)) + + tm.assert_series_equal(result_frame, expected) + + result_frame_groupby = names_with_nulls_df.groupby("key").value_counts( + dropna=dropna, normalize=normalize + ) + + tm.assert_series_equal(result_frame_groupby, expected) + + +@pytest.mark.parametrize("as_index", [False, True]) +@pytest.mark.parametrize( + "observed, expected_index", + [ + ( + False, + [ + ("FR", "male", "low"), + ("FR", "female", "high"), + ("FR", "male", "medium"), + ("FR", "female", "low"), + ("FR", "female", "medium"), + ("FR", "male", "high"), + ("US", "female", "high"), + ("US", "male", "low"), + ("US", "female", "low"), + ("US", "female", "medium"), + ("US", "male", "high"), + ("US", "male", "medium"), + ], + ), + ( + True, + [ + ("FR", "male", "low"), + ("FR", "female", "high"), + ("FR", "male", "medium"), + ("US", "female", "high"), + ("US", "male", "low"), + ], + ), + ], +) +@pytest.mark.parametrize( + "normalize, expected_data", + [ + (False, np.array([2, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0], dtype=np.int64)), + ( + True, + np.array([0.5, 0.25, 0.25, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]), + ), + ], +) +def test_categorical( + education_df, as_index, observed, expected_index, normalize, expected_data +): + # Test categorical data whether or not observed + gp = education_df.astype("category").groupby( + "country", as_index=as_index, observed=observed + ) + result = gp.value_counts(normalize=normalize) + + expected_series = Series( + data=expected_data[expected_data > 0.0] if observed else expected_data, + index=MultiIndex.from_tuples( + expected_index, + names=["country", "gender", "education"], + ), + ) + for i in range(3): + expected_series.index = expected_series.index.set_levels( + CategoricalIndex(expected_series.index.levels[i]), level=i + ) + + if as_index: + tm.assert_series_equal(result, expected_series) + else: + expected = expected_series.reset_index( + name="proportion" if normalize else "count" + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "normalize, expected_label, expected_values", + [ + (False, "count", [1, 1, 1]), + (True, "proportion", [0.5, 0.5, 1.0]), + ], +) +def test_mixed_groupings(normalize, expected_label, expected_values): + # Test multiple groupings + df = DataFrame({"A": [1, 2, 1], "B": [1, 2, 3]}) + gp = df.groupby([[4, 5, 4], "A", lambda i: 7 if i == 1 else 8], as_index=False) + result = gp.value_counts(sort=True, normalize=normalize) + expected = DataFrame( + { + "level_0": [4, 4, 5], + "A": [1, 1, 2], + "level_2": [8, 8, 7], + "B": [1, 3, 2], + expected_label: expected_values, + } + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "test, expected_names", + [ + ("repeat", ["a", None, "d", "b", "b", "e"]), + ("level", ["a", None, "d", "b", "c", "level_1"]), + ], +) +@pytest.mark.parametrize("as_index", [False, True]) +def test_column_name_clashes(test, expected_names, as_index): + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6], "d": [7, 8], "e": [9, 10]}) + if test == "repeat": + df.columns = list("abbde") + else: + df.columns = list("abcd") + ["level_1"] + + if as_index: + result = df.groupby(["a", [0, 1], "d"], as_index=as_index).value_counts() + expected = Series( + data=(1, 1), + index=MultiIndex.from_tuples( + [(1, 0, 7, 3, 5, 9), (2, 1, 8, 4, 6, 10)], + names=expected_names, + ), + ) + tm.assert_series_equal(result, expected) + else: + with pytest.raises(ValueError, match="cannot insert"): + df.groupby(["a", [0, 1], "d"], as_index=as_index).value_counts() + + +def test_ambiguous_grouping(): + # Test that groupby is not confused by groupings length equal to row count + df = DataFrame({"a": [1, 1]}) + gb = df.groupby([1, 1]) + result = gb.value_counts() + expected = Series([2], index=MultiIndex.from_tuples([[1, 1]], names=[None, "a"])) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_function.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_function.py new file mode 100644 index 0000000..dbc3849 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_function.py @@ -0,0 +1,1201 @@ +import builtins +from io import StringIO + +import numpy as np +import pytest + +from pandas.errors import UnsupportedFunctionCall + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm +import pandas.core.nanops as nanops +from pandas.util import _test_decorators as td + + +@pytest.fixture( + params=[np.int32, np.int64, np.float32, np.float64, "Int64", "Float64"], + ids=["np.int32", "np.int64", "np.float32", "np.float64", "Int64", "Float64"], +) +def dtypes_for_minmax(request): + """ + Fixture of dtypes with min and max values used for testing + cummin and cummax + """ + dtype = request.param + + np_type = dtype + if dtype == "Int64": + np_type = np.int64 + elif dtype == "Float64": + np_type = np.float64 + + min_val = ( + np.iinfo(np_type).min + if np.dtype(np_type).kind == "i" + else np.finfo(np_type).min + ) + max_val = ( + np.iinfo(np_type).max + if np.dtype(np_type).kind == "i" + else np.finfo(np_type).max + ) + + return (dtype, min_val, max_val) + + +def test_intercept_builtin_sum(): + s = Series([1.0, 2.0, np.nan, 3.0]) + grouped = s.groupby([0, 1, 2, 2]) + + result = grouped.agg(builtins.sum) + result2 = grouped.apply(builtins.sum) + expected = grouped.sum() + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + +@pytest.mark.parametrize("f", [max, min, sum]) +@pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key +def test_builtins_apply(keys, f): + # see gh-8155 + df = DataFrame(np.random.randint(1, 50, (1000, 2)), columns=["jim", "joe"]) + df["jolie"] = np.random.randn(1000) + + gb = df.groupby(keys) + + fname = f.__name__ + result = gb.apply(f) + ngroups = len(df.drop_duplicates(subset=keys)) + + assert_msg = f"invalid frame shape: {result.shape} (expected ({ngroups}, 3))" + assert result.shape == (ngroups, 3), assert_msg + + npfunc = getattr(np, fname) # numpy's equivalent function + if f in [max, min]: + warn = FutureWarning + else: + warn = None + msg = "scalar (max|min) over the entire DataFrame" + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): + # stacklevel can be thrown off because (i think) the stack + # goes through some of numpy's C code. + expected = gb.apply(npfunc) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(None): + expected2 = gb.apply(lambda x: npfunc(x, axis=0)) + tm.assert_frame_equal(result, expected2) + + if f != sum: + expected = gb.agg(fname).reset_index() + expected.set_index(keys, inplace=True, drop=False) + tm.assert_frame_equal(result, expected, check_dtype=False) + + tm.assert_series_equal(getattr(result, fname)(), getattr(df, fname)()) + + +class TestNumericOnly: + # make sure that we are passing thru kwargs to our agg functions + + @pytest.fixture + def df(self): + # GH3668 + # GH5724 + df = DataFrame( + { + "group": [1, 1, 2], + "int": [1, 2, 3], + "float": [4.0, 5.0, 6.0], + "string": list("abc"), + "category_string": Series(list("abc")).astype("category"), + "category_int": [7, 8, 9], + "datetime": date_range("20130101", periods=3), + "datetimetz": date_range("20130101", periods=3, tz="US/Eastern"), + "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), + }, + columns=[ + "group", + "int", + "float", + "string", + "category_string", + "category_int", + "datetime", + "datetimetz", + "timedelta", + ], + ) + return df + + @pytest.mark.parametrize("method", ["mean", "median"]) + def test_averages(self, df, method): + # mean / median + expected_columns_numeric = Index(["int", "float", "category_int"]) + + gb = df.groupby("group") + expected = DataFrame( + { + "category_int": [7.5, 9], + "float": [4.5, 6.0], + "timedelta": [pd.Timedelta("1.5s"), pd.Timedelta("3s")], + "int": [1.5, 3], + "datetime": [ + Timestamp("2013-01-01 12:00:00"), + Timestamp("2013-01-03 00:00:00"), + ], + "datetimetz": [ + Timestamp("2013-01-01 12:00:00", tz="US/Eastern"), + Timestamp("2013-01-03 00:00:00", tz="US/Eastern"), + ], + }, + index=Index([1, 2], name="group"), + columns=[ + "int", + "float", + "category_int", + "datetime", + "datetimetz", + "timedelta", + ], + ) + + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = getattr(gb, method)(numeric_only=False) + tm.assert_frame_equal(result.reindex_like(expected), expected) + + expected_columns = expected.columns + + self._check(df, method, expected_columns, expected_columns_numeric) + + @pytest.mark.parametrize("method", ["min", "max"]) + def test_extrema(self, df, method): + # TODO: min, max *should* handle + # categorical (ordered) dtype + + expected_columns = Index( + [ + "int", + "float", + "string", + "category_int", + "datetime", + "datetimetz", + "timedelta", + ] + ) + expected_columns_numeric = expected_columns + + self._check(df, method, expected_columns, expected_columns_numeric) + + @pytest.mark.parametrize("method", ["first", "last"]) + def test_first_last(self, df, method): + + expected_columns = Index( + [ + "int", + "float", + "string", + "category_string", + "category_int", + "datetime", + "datetimetz", + "timedelta", + ] + ) + expected_columns_numeric = expected_columns + + self._check(df, method, expected_columns, expected_columns_numeric) + + @pytest.mark.parametrize("method", ["sum", "cumsum"]) + def test_sum_cumsum(self, df, method): + + expected_columns_numeric = Index(["int", "float", "category_int"]) + expected_columns = Index( + ["int", "float", "string", "category_int", "timedelta"] + ) + if method == "cumsum": + # cumsum loses string + expected_columns = Index(["int", "float", "category_int", "timedelta"]) + + self._check(df, method, expected_columns, expected_columns_numeric) + + @pytest.mark.parametrize("method", ["prod", "cumprod"]) + def test_prod_cumprod(self, df, method): + + expected_columns = Index(["int", "float", "category_int"]) + expected_columns_numeric = expected_columns + + self._check(df, method, expected_columns, expected_columns_numeric) + + @pytest.mark.parametrize("method", ["cummin", "cummax"]) + def test_cummin_cummax(self, df, method): + # like min, max, but don't include strings + expected_columns = Index( + ["int", "float", "category_int", "datetime", "datetimetz", "timedelta"] + ) + + # GH#15561: numeric_only=False set by default like min/max + expected_columns_numeric = expected_columns + + self._check(df, method, expected_columns, expected_columns_numeric) + + def _check(self, df, method, expected_columns, expected_columns_numeric): + gb = df.groupby("group") + + # cummin, cummax dont have numeric_only kwarg, always use False + warn = None + if method in ["cummin", "cummax"]: + # these dont have numeric_only kwarg, always use False + warn = FutureWarning + elif method in ["min", "max"]: + # these have numeric_only kwarg, but default to False + warn = FutureWarning + + with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + result = getattr(gb, method)() + + tm.assert_index_equal(result.columns, expected_columns_numeric) + + # GH#41475 deprecated silently ignoring nuisance columns + warn = None + if len(expected_columns) < len(gb._obj_with_exclusions.columns): + warn = FutureWarning + with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + result = getattr(gb, method)(numeric_only=False) + + tm.assert_index_equal(result.columns, expected_columns) + + +class TestGroupByNonCythonPaths: + # GH#5610 non-cython calls should not include the grouper + # Tests for code not expected to go through cython paths. + + @pytest.fixture + def df(self): + df = DataFrame( + [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]], + columns=["A", "B", "C"], + ) + return df + + @pytest.fixture + def gb(self, df): + gb = df.groupby("A") + return gb + + @pytest.fixture + def gni(self, df): + gni = df.groupby("A", as_index=False) + return gni + + # TODO: non-unique columns, as_index=False + @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") + def test_idxmax(self, gb): + # object dtype so idxmax goes through _aggregate_item_by_item + # GH#5610 + # non-cython calls should not include the grouper + expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) + expected.index.name = "A" + result = gb.idxmax() + tm.assert_frame_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") + def test_idxmin(self, gb): + # object dtype so idxmax goes through _aggregate_item_by_item + # GH#5610 + # non-cython calls should not include the grouper + expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) + expected.index.name = "A" + result = gb.idxmin() + tm.assert_frame_equal(result, expected) + + def test_mad(self, gb, gni): + # mad + expected = DataFrame([[0], [np.nan]], columns=["B"], index=[1, 3]) + expected.index.name = "A" + result = gb.mad() + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[1, 0.0], [3, np.nan]], columns=["A", "B"], index=[0, 1]) + result = gni.mad() + tm.assert_frame_equal(result, expected) + + def test_describe(self, df, gb, gni): + # describe + expected_index = Index([1, 3], name="A") + expected_col = MultiIndex( + levels=[["B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]], + codes=[[0] * 8, list(range(8))], + ) + expected = DataFrame( + [ + [1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0], + [0.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + ], + index=expected_index, + columns=expected_col, + ) + result = gb.describe() + tm.assert_frame_equal(result, expected) + + expected = pd.concat( + [ + df[df.A == 1].describe().unstack().to_frame().T, + df[df.A == 3].describe().unstack().to_frame().T, + ] + ) + expected.index = Index([0, 1]) + result = gni.describe() + tm.assert_frame_equal(result, expected) + + +def test_cython_api2(): + + # this takes the fast apply path + + # cumsum (GH5614) + df = DataFrame([[1, 2, np.nan], [1, np.nan, 9], [3, 4, 9]], columns=["A", "B", "C"]) + expected = DataFrame([[2, np.nan], [np.nan, 9], [4, 9]], columns=["B", "C"]) + result = df.groupby("A").cumsum() + tm.assert_frame_equal(result, expected) + + # GH 5755 - cumsum is a transformer and should ignore as_index + result = df.groupby("A", as_index=False).cumsum() + tm.assert_frame_equal(result, expected) + + # GH 13994 + result = df.groupby("A").cumsum(axis=1) + expected = df.cumsum(axis=1) + tm.assert_frame_equal(result, expected) + result = df.groupby("A").cumprod(axis=1) + expected = df.cumprod(axis=1) + tm.assert_frame_equal(result, expected) + + +def test_cython_median(): + df = DataFrame(np.random.randn(1000)) + df.values[::2] = np.nan + + labels = np.random.randint(0, 50, size=1000).astype(float) + labels[::17] = np.nan + + result = df.groupby(labels).median() + exp = df.groupby(labels).agg(nanops.nanmedian) + tm.assert_frame_equal(result, exp) + + df = DataFrame(np.random.randn(1000, 5)) + rs = df.groupby(labels).agg(np.median) + xp = df.groupby(labels).median() + tm.assert_frame_equal(rs, xp) + + +def test_median_empty_bins(observed): + df = DataFrame(np.random.randint(0, 44, 500)) + + grps = range(0, 55, 5) + bins = pd.cut(df[0], grps) + + result = df.groupby(bins, observed=observed).median() + expected = df.groupby(bins, observed=observed).agg(lambda x: x.median()) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "dtype", ["int8", "int16", "int32", "int64", "float32", "float64", "uint64"] +) +@pytest.mark.parametrize( + "method,data", + [ + ("first", {"df": [{"a": 1, "b": 1}, {"a": 2, "b": 3}]}), + ("last", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 4}]}), + ("min", {"df": [{"a": 1, "b": 1}, {"a": 2, "b": 3}]}), + ("max", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 4}]}), + ("nth", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 4}], "args": [1]}), + ("count", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 2}], "out_type": "int64"}), + ], +) +def test_groupby_non_arithmetic_agg_types(dtype, method, data): + # GH9311, GH6620 + df = DataFrame( + [{"a": 1, "b": 1}, {"a": 1, "b": 2}, {"a": 2, "b": 3}, {"a": 2, "b": 4}] + ) + + df["b"] = df.b.astype(dtype) + + if "args" not in data: + data["args"] = [] + + if "out_type" in data: + out_type = data["out_type"] + else: + out_type = dtype + + exp = data["df"] + df_out = DataFrame(exp) + + df_out["b"] = df_out.b.astype(out_type) + df_out.set_index("a", inplace=True) + + grpd = df.groupby("a") + t = getattr(grpd, method)(*data["args"]) + tm.assert_frame_equal(t, df_out) + + +@pytest.mark.parametrize( + "i", + [ + ( + Timestamp("2011-01-15 12:50:28.502376"), + Timestamp("2011-01-20 12:50:28.593448"), + ), + (24650000000000001, 24650000000000002), + ], +) +def test_groupby_non_arithmetic_agg_int_like_precision(i): + # see gh-6620, gh-9311 + df = DataFrame([{"a": 1, "b": i[0]}, {"a": 1, "b": i[1]}]) + + grp_exp = { + "first": {"expected": i[0]}, + "last": {"expected": i[1]}, + "min": {"expected": i[0]}, + "max": {"expected": i[1]}, + "nth": {"expected": i[1], "args": [1]}, + "count": {"expected": 2}, + } + + for method, data in grp_exp.items(): + if "args" not in data: + data["args"] = [] + + grouped = df.groupby("a") + res = getattr(grouped, method)(*data["args"]) + + assert res.iloc[0].b == data["expected"] + + +@pytest.mark.parametrize( + "func, values", + [ + ("idxmin", {"c_int": [0, 2], "c_float": [1, 3], "c_date": [1, 2]}), + ("idxmax", {"c_int": [1, 3], "c_float": [0, 2], "c_date": [0, 3]}), + ], +) +@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") +def test_idxmin_idxmax_returns_int_types(func, values): + # GH 25444 + df = DataFrame( + { + "name": ["A", "A", "B", "B"], + "c_int": [1, 2, 3, 4], + "c_float": [4.02, 3.03, 2.04, 1.05], + "c_date": ["2019", "2018", "2016", "2017"], + } + ) + df["c_date"] = pd.to_datetime(df["c_date"]) + df["c_date_tz"] = df["c_date"].dt.tz_localize("US/Pacific") + df["c_timedelta"] = df["c_date"] - df["c_date"].iloc[0] + df["c_period"] = df["c_date"].dt.to_period("W") + df["c_Integer"] = df["c_int"].astype("Int64") + df["c_Floating"] = df["c_float"].astype("Float64") + + result = getattr(df.groupby("name"), func)() + + expected = DataFrame(values, index=Index(["A", "B"], name="name")) + expected["c_date_tz"] = expected["c_date"] + expected["c_timedelta"] = expected["c_date"] + expected["c_period"] = expected["c_date"] + expected["c_Integer"] = expected["c_int"] + expected["c_Floating"] = expected["c_float"] + + tm.assert_frame_equal(result, expected) + + +def test_idxmin_idxmax_axis1(): + df = DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"]) + df["A"] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 4] + + gb = df.groupby("A") + + res = gb.idxmax(axis=1) + + alt = df.iloc[:, 1:].idxmax(axis=1) + indexer = res.index.get_level_values(1) + + tm.assert_series_equal(alt[indexer], res.droplevel("A")) + + df["E"] = date_range("2016-01-01", periods=10) + gb2 = df.groupby("A") + + msg = "reduction operation 'argmax' not allowed for this dtype" + with pytest.raises(TypeError, match=msg): + gb2.idxmax(axis=1) + + +def test_groupby_cumprod(): + # GH 4095 + df = DataFrame({"key": ["b"] * 10, "value": 2}) + + actual = df.groupby("key")["value"].cumprod() + expected = df.groupby("key")["value"].apply(lambda x: x.cumprod()) + expected.name = "value" + tm.assert_series_equal(actual, expected) + + df = DataFrame({"key": ["b"] * 100, "value": 2}) + actual = df.groupby("key")["value"].cumprod() + # if overflows, groupby product casts to float + # while numpy passes back invalid values + df["value"] = df["value"].astype(float) + expected = df.groupby("key")["value"].apply(lambda x: x.cumprod()) + expected.name = "value" + tm.assert_series_equal(actual, expected) + + +def scipy_sem(*args, **kwargs): + from scipy.stats import sem + + return sem(*args, ddof=1, **kwargs) + + +@pytest.mark.parametrize( + "op,targop", + [ + ("mean", np.mean), + ("median", np.median), + ("std", np.std), + ("var", np.var), + ("sum", np.sum), + ("prod", np.prod), + ("min", np.min), + ("max", np.max), + ("first", lambda x: x.iloc[0]), + ("last", lambda x: x.iloc[-1]), + ("count", np.size), + pytest.param("sem", scipy_sem, marks=td.skip_if_no_scipy), + ], +) +def test_ops_general(op, targop): + df = DataFrame(np.random.randn(1000)) + labels = np.random.randint(0, 50, size=1000).astype(float) + + result = getattr(df.groupby(labels), op)() + expected = df.groupby(labels).agg(targop) + tm.assert_frame_equal(result, expected) + + +def test_max_nan_bug(): + raw = """,Date,app,File +-04-23,2013-04-23 00:00:00,,log080001.log +-05-06,2013-05-06 00:00:00,,log.log +-05-07,2013-05-07 00:00:00,OE,xlsx""" + + df = pd.read_csv(StringIO(raw), parse_dates=[0]) + gb = df.groupby("Date") + r = gb[["File"]].max() + e = gb["File"].max().to_frame() + tm.assert_frame_equal(r, e) + assert not r["File"].isna().any() + + +def test_nlargest(): + a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) + b = Series(list("a" * 5 + "b" * 5)) + gb = a.groupby(b) + r = gb.nlargest(3) + e = Series( + [7, 5, 3, 10, 9, 6], + index=MultiIndex.from_arrays([list("aaabbb"), [3, 2, 1, 9, 5, 8]]), + ) + tm.assert_series_equal(r, e) + + a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0]) + gb = a.groupby(b) + e = Series( + [3, 2, 1, 3, 3, 2], + index=MultiIndex.from_arrays([list("aaabbb"), [2, 3, 1, 6, 5, 7]]), + ) + tm.assert_series_equal(gb.nlargest(3, keep="last"), e) + + +def test_nlargest_mi_grouper(): + # see gh-21411 + npr = np.random.RandomState(123456789) + + dts = date_range("20180101", periods=10) + iterables = [dts, ["one", "two"]] + + idx = MultiIndex.from_product(iterables, names=["first", "second"]) + s = Series(npr.randn(20), index=idx) + + result = s.groupby("first").nlargest(1) + + exp_idx = MultiIndex.from_tuples( + [ + (dts[0], dts[0], "one"), + (dts[1], dts[1], "one"), + (dts[2], dts[2], "one"), + (dts[3], dts[3], "two"), + (dts[4], dts[4], "one"), + (dts[5], dts[5], "one"), + (dts[6], dts[6], "one"), + (dts[7], dts[7], "one"), + (dts[8], dts[8], "two"), + (dts[9], dts[9], "one"), + ], + names=["first", "first", "second"], + ) + + exp_values = [ + 2.2129019979039612, + 1.8417114045748335, + 0.858963679564603, + 1.3759151378258088, + 0.9430284594687134, + 0.5296914208183142, + 0.8318045593815487, + -0.8476703342910327, + 0.3804446884133735, + -0.8028845810770998, + ] + + expected = Series(exp_values, index=exp_idx) + tm.assert_series_equal(result, expected, check_exact=False, rtol=1e-3) + + +def test_nsmallest(): + a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) + b = Series(list("a" * 5 + "b" * 5)) + gb = a.groupby(b) + r = gb.nsmallest(3) + e = Series( + [1, 2, 3, 0, 4, 6], + index=MultiIndex.from_arrays([list("aaabbb"), [0, 4, 1, 6, 7, 8]]), + ) + tm.assert_series_equal(r, e) + + a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0]) + gb = a.groupby(b) + e = Series( + [0, 1, 1, 0, 1, 2], + index=MultiIndex.from_arrays([list("aaabbb"), [4, 1, 0, 9, 8, 7]]), + ) + tm.assert_series_equal(gb.nsmallest(3, keep="last"), e) + + +@pytest.mark.parametrize( + "data, groups", + [([0, 1, 2, 3], [0, 0, 1, 1]), ([0], [0])], +) +@pytest.mark.parametrize("method", ["nlargest", "nsmallest"]) +def test_nlargest_and_smallest_noop(data, groups, method): + # GH 15272, GH 16345, GH 29129 + # Test nlargest/smallest when it results in a noop, + # i.e. input is sorted and group size <= n + if method == "nlargest": + data = list(reversed(data)) + ser = Series(data, name="a") + result = getattr(ser.groupby(groups), method)(n=2) + expected = Series(data, index=MultiIndex.from_arrays([groups, ser.index]), name="a") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["cumprod", "cumsum"]) +def test_numpy_compat(func): + # see gh-12811 + df = DataFrame({"A": [1, 2, 1], "B": [1, 2, 3]}) + g = df.groupby("A") + + msg = "numpy operations are not valid with groupby" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(g, func)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(g, func)(foo=1) + + +def test_cummin(dtypes_for_minmax): + dtype = dtypes_for_minmax[0] + min_val = dtypes_for_minmax[1] + + # GH 15048 + base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]}) + expected_mins = [3, 3, 3, 2, 2, 2, 2, 1] + + df = base_df.astype(dtype) + + expected = DataFrame({"B": expected_mins}).astype(dtype) + result = df.groupby("A").cummin() + tm.assert_frame_equal(result, expected) + result = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(result, expected) + + # Test w/ min value for dtype + df.loc[[2, 6], "B"] = min_val + df.loc[[1, 5], "B"] = min_val + 1 + expected.loc[[2, 3, 6, 7], "B"] = min_val + expected.loc[[1, 5], "B"] = min_val + 1 # should not be rounded to min_val + result = df.groupby("A").cummin() + tm.assert_frame_equal(result, expected, check_exact=True) + expected = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(result, expected, check_exact=True) + + # Test nan in some values + base_df.loc[[0, 2, 4, 6], "B"] = np.nan + expected = DataFrame({"B": [np.nan, 4, np.nan, 2, np.nan, 3, np.nan, 1]}) + result = base_df.groupby("A").cummin() + tm.assert_frame_equal(result, expected) + expected = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(result, expected) + + # GH 15561 + df = DataFrame({"a": [1], "b": pd.to_datetime(["2001"])}) + expected = Series(pd.to_datetime("2001"), index=[0], name="b") + + result = df.groupby("a")["b"].cummin() + tm.assert_series_equal(expected, result) + + # GH 15635 + df = DataFrame({"a": [1, 2, 1], "b": [1, 2, 2]}) + result = df.groupby("a").b.cummin() + expected = Series([1, 2, 1], name="b") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("method", ["cummin", "cummax"]) +@pytest.mark.parametrize("dtype", ["UInt64", "Int64", "Float64", "float", "boolean"]) +def test_cummin_max_all_nan_column(method, dtype): + base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) + base_df["B"] = base_df["B"].astype(dtype) + grouped = base_df.groupby("A") + + expected = DataFrame({"B": [np.nan] * 8}, dtype=dtype) + result = getattr(grouped, method)() + tm.assert_frame_equal(expected, result) + + result = getattr(grouped["B"], method)().to_frame() + tm.assert_frame_equal(expected, result) + + +def test_cummax(dtypes_for_minmax): + dtype = dtypes_for_minmax[0] + max_val = dtypes_for_minmax[2] + + # GH 15048 + base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]}) + expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3] + + df = base_df.astype(dtype) + + expected = DataFrame({"B": expected_maxs}).astype(dtype) + result = df.groupby("A").cummax() + tm.assert_frame_equal(result, expected) + result = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(result, expected) + + # Test w/ max value for dtype + df.loc[[2, 6], "B"] = max_val + expected.loc[[2, 3, 6, 7], "B"] = max_val + result = df.groupby("A").cummax() + tm.assert_frame_equal(result, expected) + expected = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(result, expected) + + # Test nan in some values + base_df.loc[[0, 2, 4, 6], "B"] = np.nan + expected = DataFrame({"B": [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]}) + result = base_df.groupby("A").cummax() + tm.assert_frame_equal(result, expected) + expected = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(result, expected) + + # GH 15561 + df = DataFrame({"a": [1], "b": pd.to_datetime(["2001"])}) + expected = Series(pd.to_datetime("2001"), index=[0], name="b") + + result = df.groupby("a")["b"].cummax() + tm.assert_series_equal(expected, result) + + # GH 15635 + df = DataFrame({"a": [1, 2, 1], "b": [2, 1, 1]}) + result = df.groupby("a").b.cummax() + expected = Series([2, 1, 2], name="b") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("method", ["cummin", "cummax"]) +@pytest.mark.parametrize("dtype", ["float", "Int64", "Float64"]) +@pytest.mark.parametrize( + "groups,expected_data", + [ + ([1, 1, 1], [1, None, None]), + ([1, 2, 3], [1, None, 2]), + ([1, 3, 3], [1, None, None]), + ], +) +def test_cummin_max_skipna(method, dtype, groups, expected_data): + # GH-34047 + df = DataFrame({"a": Series([1, None, 2], dtype=dtype)}) + gb = df.groupby(groups)["a"] + + result = getattr(gb, method)(skipna=False) + expected = Series(expected_data, dtype=dtype, name="a") + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("method", ["cummin", "cummax"]) +def test_cummin_max_skipna_multiple_cols(method): + # Ensure missing value in "a" doesn't cause "b" to be nan-filled + df = DataFrame({"a": [np.nan, 2.0, 2.0], "b": [2.0, 2.0, 2.0]}) + gb = df.groupby([1, 1, 1])[["a", "b"]] + + result = getattr(gb, method)(skipna=False) + expected = DataFrame({"a": [np.nan, np.nan, np.nan], "b": [2.0, 2.0, 2.0]}) + + tm.assert_frame_equal(result, expected) + + +@td.skip_if_32bit +@pytest.mark.parametrize("method", ["cummin", "cummax"]) +@pytest.mark.parametrize( + "dtype,val", [("UInt64", np.iinfo("uint64").max), ("Int64", 2 ** 53 + 1)] +) +def test_nullable_int_not_cast_as_float(method, dtype, val): + data = [val, pd.NA] + df = DataFrame({"grp": [1, 1], "b": data}, dtype=dtype) + grouped = df.groupby("grp") + + result = grouped.transform(method) + expected = DataFrame({"b": data}, dtype=dtype) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "in_vals, out_vals", + [ + # Basics: strictly increasing (T), strictly decreasing (F), + # abs val increasing (F), non-strictly increasing (T) + ([1, 2, 5, 3, 2, 0, 4, 5, -6, 1, 1], [True, False, False, True]), + # Test with inf vals + ( + [1, 2.1, np.inf, 3, 2, np.inf, -np.inf, 5, 11, 1, -np.inf], + [True, False, True, False], + ), + # Test with nan vals; should always be False + ( + [1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan], + [False, False, False, False], + ), + ], +) +def test_is_monotonic_increasing(in_vals, out_vals): + # GH 17015 + source_dict = { + "A": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"], + "B": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "d", "d"], + "C": in_vals, + } + df = DataFrame(source_dict) + result = df.groupby("B").C.is_monotonic_increasing + index = Index(list("abcd"), name="B") + expected = Series(index=index, data=out_vals, name="C") + tm.assert_series_equal(result, expected) + + # Also check result equal to manually taking x.is_monotonic_increasing. + expected = df.groupby(["B"]).C.apply(lambda x: x.is_monotonic_increasing) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "in_vals, out_vals", + [ + # Basics: strictly decreasing (T), strictly increasing (F), + # abs val decreasing (F), non-strictly increasing (T) + ([10, 9, 7, 3, 4, 5, -3, 2, 0, 1, 1], [True, False, False, True]), + # Test with inf vals + ( + [np.inf, 1, -np.inf, np.inf, 2, -3, -np.inf, 5, -3, -np.inf, -np.inf], + [True, True, False, True], + ), + # Test with nan vals; should always be False + ( + [1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan], + [False, False, False, False], + ), + ], +) +def test_is_monotonic_decreasing(in_vals, out_vals): + # GH 17015 + source_dict = { + "A": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"], + "B": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "d", "d"], + "C": in_vals, + } + + df = DataFrame(source_dict) + result = df.groupby("B").C.is_monotonic_decreasing + index = Index(list("abcd"), name="B") + expected = Series(index=index, data=out_vals, name="C") + tm.assert_series_equal(result, expected) + + +# describe +# -------------------------------- + + +def test_apply_describe_bug(mframe): + grouped = mframe.groupby(level="first") + grouped.describe() # it works! + + +def test_series_describe_multikey(): + ts = tm.makeTimeSeries() + grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.describe() + tm.assert_series_equal(result["mean"], grouped.mean(), check_names=False) + tm.assert_series_equal(result["std"], grouped.std(), check_names=False) + tm.assert_series_equal(result["min"], grouped.min(), check_names=False) + + +def test_series_describe_single(): + ts = tm.makeTimeSeries() + grouped = ts.groupby(lambda x: x.month) + result = grouped.apply(lambda x: x.describe()) + expected = grouped.describe().stack() + tm.assert_series_equal(result, expected) + + +def test_series_index_name(df): + grouped = df.loc[:, ["C"]].groupby(df["A"]) + result = grouped.agg(lambda x: x.mean()) + assert result.index.name == "A" + + +def test_frame_describe_multikey(tsframe): + grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.describe() + desc_groups = [] + for col in tsframe: + group = grouped[col].describe() + # GH 17464 - Remove duplicate MultiIndex levels + group_col = MultiIndex( + levels=[[col], group.columns], + codes=[[0] * len(group.columns), range(len(group.columns))], + ) + group = DataFrame(group.values, columns=group_col, index=group.index) + desc_groups.append(group) + expected = pd.concat(desc_groups, axis=1) + tm.assert_frame_equal(result, expected) + + groupedT = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1) + result = groupedT.describe() + expected = tsframe.describe().T + tm.assert_frame_equal(result, expected) + + +def test_frame_describe_tupleindex(): + + # GH 14848 - regression from 0.19.0 to 0.19.1 + df1 = DataFrame( + { + "x": [1, 2, 3, 4, 5] * 3, + "y": [10, 20, 30, 40, 50] * 3, + "z": [100, 200, 300, 400, 500] * 3, + } + ) + df1["k"] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5 + df2 = df1.rename(columns={"k": "key"}) + msg = "Names should be list-like for a MultiIndex" + with pytest.raises(ValueError, match=msg): + df1.groupby("k").describe() + with pytest.raises(ValueError, match=msg): + df2.groupby("key").describe() + + +def test_frame_describe_unstacked_format(): + # GH 4792 + prices = { + Timestamp("2011-01-06 10:59:05", tz=None): 24990, + Timestamp("2011-01-06 12:43:33", tz=None): 25499, + Timestamp("2011-01-06 12:54:09", tz=None): 25499, + } + volumes = { + Timestamp("2011-01-06 10:59:05", tz=None): 1500000000, + Timestamp("2011-01-06 12:43:33", tz=None): 5000000000, + Timestamp("2011-01-06 12:54:09", tz=None): 100000000, + } + df = DataFrame({"PRICE": prices, "VOLUME": volumes}) + result = df.groupby("PRICE").VOLUME.describe() + data = [ + df[df.PRICE == 24990].VOLUME.describe().values.tolist(), + df[df.PRICE == 25499].VOLUME.describe().values.tolist(), + ] + expected = DataFrame( + data, + index=Index([24990, 25499], name="PRICE"), + columns=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.filterwarnings( + "ignore:" + "indexing past lexsort depth may impact performance:" + "pandas.errors.PerformanceWarning" +) +@pytest.mark.parametrize("as_index", [True, False]) +def test_describe_with_duplicate_output_column_names(as_index): + # GH 35314 + df = DataFrame( + { + "a": [99, 99, 99, 88, 88, 88], + "b": [1, 2, 3, 4, 5, 6], + "c": [10, 20, 30, 40, 50, 60], + }, + columns=["a", "b", "b"], + copy=False, + ) + + expected = ( + DataFrame.from_records( + [ + ("a", "count", 3.0, 3.0), + ("a", "mean", 88.0, 99.0), + ("a", "std", 0.0, 0.0), + ("a", "min", 88.0, 99.0), + ("a", "25%", 88.0, 99.0), + ("a", "50%", 88.0, 99.0), + ("a", "75%", 88.0, 99.0), + ("a", "max", 88.0, 99.0), + ("b", "count", 3.0, 3.0), + ("b", "mean", 5.0, 2.0), + ("b", "std", 1.0, 1.0), + ("b", "min", 4.0, 1.0), + ("b", "25%", 4.5, 1.5), + ("b", "50%", 5.0, 2.0), + ("b", "75%", 5.5, 2.5), + ("b", "max", 6.0, 3.0), + ("b", "count", 3.0, 3.0), + ("b", "mean", 5.0, 2.0), + ("b", "std", 1.0, 1.0), + ("b", "min", 4.0, 1.0), + ("b", "25%", 4.5, 1.5), + ("b", "50%", 5.0, 2.0), + ("b", "75%", 5.5, 2.5), + ("b", "max", 6.0, 3.0), + ], + ) + .set_index([0, 1]) + .T + ) + expected.columns.names = [None, None] + expected.index = Index([88, 99], name="a") + + if as_index: + expected = expected.drop(columns=["a"], level=0) + else: + expected = expected.reset_index(drop=True) + + result = df.groupby("a", as_index=as_index).describe() + + tm.assert_frame_equal(result, expected) + + +def test_groupby_mean_no_overflow(): + # Regression test for (#22487) + df = DataFrame( + { + "user": ["A", "A", "A", "A", "A"], + "connections": [4970, 4749, 4719, 4704, 18446744073699999744], + } + ) + assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840 + + +@pytest.mark.parametrize( + "values", + [ + { + "a": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2], + }, + {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]}, + ], +) +@pytest.mark.parametrize("function", ["mean", "median", "var"]) +def test_apply_to_nullable_integer_returns_float(values, function): + # https://github.com/pandas-dev/pandas/issues/32219 + output = 0.5 if function == "var" else 1.5 + arr = np.array([output] * 3, dtype=float) + idx = Index([1, 2, 3], name="a", dtype="Int64") + expected = DataFrame({"b": arr}, index=idx).astype("Float64") + + groups = DataFrame(values, dtype="Int64").groupby("a") + + result = getattr(groups, function)() + tm.assert_frame_equal(result, expected) + + result = groups.agg(function) + tm.assert_frame_equal(result, expected) + + result = groups.agg([function]) + expected.columns = MultiIndex.from_tuples([("b", function)]) + tm.assert_frame_equal(result, expected) + + +def test_groupby_sum_below_mincount_nullable_integer(): + # https://github.com/pandas-dev/pandas/issues/32861 + df = DataFrame({"a": [0, 1, 2], "b": [0, 1, 2], "c": [0, 1, 2]}, dtype="Int64") + grouped = df.groupby("a") + idx = Index([0, 1, 2], name="a", dtype="Int64") + + result = grouped["b"].sum(min_count=2) + expected = Series([pd.NA] * 3, dtype="Int64", index=idx, name="b") + tm.assert_series_equal(result, expected) + + result = grouped.sum(min_count=2) + expected = DataFrame({"b": [pd.NA] * 3, "c": [pd.NA] * 3}, dtype="Int64", index=idx) + tm.assert_frame_equal(result, expected) + + +def test_mean_on_timedelta(): + # GH 17382 + df = DataFrame({"time": pd.to_timedelta(range(10)), "cat": ["A", "B"] * 5}) + result = df.groupby("cat")["time"].mean() + expected = Series( + pd.to_timedelta([4, 5]), name="time", index=Index(["A", "B"], name="cat") + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_sum_timedelta_with_nat(): + # GH#42659 + df = DataFrame( + { + "a": [1, 1, 2, 2], + "b": [pd.Timedelta("1d"), pd.Timedelta("2d"), pd.Timedelta("3d"), pd.NaT], + } + ) + td3 = pd.Timedelta(days=3) + + gb = df.groupby("a") + + res = gb.sum() + expected = DataFrame({"b": [td3, td3]}, index=Index([1, 2], name="a")) + tm.assert_frame_equal(res, expected) + + res = gb["b"].sum() + tm.assert_series_equal(res, expected["b"]) + + res = gb["b"].sum(min_count=2) + expected = Series([td3, pd.NaT], dtype="m8[ns]", name="b", index=expected.index) + tm.assert_series_equal(res, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_groupby.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_groupby.py new file mode 100644 index 0000000..fb2b9f0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_groupby.py @@ -0,0 +1,2627 @@ +from datetime import datetime +from decimal import Decimal + +import numpy as np +import pytest + +from pandas.compat import IS64 +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Grouper, + Index, + MultiIndex, + RangeIndex, + Series, + Timedelta, + Timestamp, + date_range, + to_datetime, +) +import pandas._testing as tm +from pandas.core.arrays import ( + BooleanArray, + FloatingArray, + IntegerArray, +) +from pandas.core.base import SpecificationError +import pandas.core.common as com +from pandas.core.groupby.base import maybe_normalize_deprecated_kernels + + +def test_repr(): + # GH18203 + result = repr(Grouper(key="A", level="B")) + expected = "Grouper(key='A', level='B', axis=0, sort=False)" + assert result == expected + + +@pytest.mark.parametrize("dtype", ["int64", "int32", "float64", "float32"]) +def test_basic(dtype): + + data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype) + + index = np.arange(9) + np.random.shuffle(index) + data = data.reindex(index) + + grouped = data.groupby(lambda x: x // 3) + + for k, v in grouped: + assert len(v) == 3 + + agged = grouped.aggregate(np.mean) + assert agged[1] == 1 + + tm.assert_series_equal(agged, grouped.agg(np.mean)) # shorthand + tm.assert_series_equal(agged, grouped.mean()) + tm.assert_series_equal(grouped.agg(np.sum), grouped.sum()) + + expected = grouped.apply(lambda x: x * x.sum()) + transformed = grouped.transform(lambda x: x * x.sum()) + assert transformed[7] == 12 + tm.assert_series_equal(transformed, expected) + + value_grouped = data.groupby(data) + tm.assert_series_equal( + value_grouped.aggregate(np.mean), agged, check_index_type=False + ) + + # complex agg + agged = grouped.aggregate([np.mean, np.std]) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + grouped.aggregate({"one": np.mean, "two": np.std}) + + group_constants = {0: 10, 1: 20, 2: 30} + agged = grouped.agg(lambda x: group_constants[x.name] + x.mean()) + assert agged[1] == 21 + + # corner cases + msg = "Must produce aggregated value" + # exception raised is type Exception + with pytest.raises(Exception, match=msg): + grouped.aggregate(lambda x: x * 2) + + +def test_groupby_nonobject_dtype(mframe, df_mixed_floats): + key = mframe.index.codes[0] + grouped = mframe.groupby(key) + result = grouped.sum() + + expected = mframe.groupby(key.astype("O")).sum() + tm.assert_frame_equal(result, expected) + + # GH 3911, mixed frame non-conversion + df = df_mixed_floats.copy() + df["value"] = range(len(df)) + + def max_value(group): + return group.loc[group["value"].idxmax()] + + applied = df.groupby("A").apply(max_value) + result = applied.dtypes + expected = df.dtypes + tm.assert_series_equal(result, expected) + + +def test_groupby_return_type(): + + # GH2893, return a reduced type + df1 = DataFrame( + [ + {"val1": 1, "val2": 20}, + {"val1": 1, "val2": 19}, + {"val1": 2, "val2": 27}, + {"val1": 2, "val2": 12}, + ] + ) + + def func(dataf): + return dataf["val2"] - dataf["val2"].mean() + + with tm.assert_produces_warning(FutureWarning): + result = df1.groupby("val1", squeeze=True).apply(func) + assert isinstance(result, Series) + + df2 = DataFrame( + [ + {"val1": 1, "val2": 20}, + {"val1": 1, "val2": 19}, + {"val1": 1, "val2": 27}, + {"val1": 1, "val2": 12}, + ] + ) + + def func(dataf): + return dataf["val2"] - dataf["val2"].mean() + + with tm.assert_produces_warning(FutureWarning): + result = df2.groupby("val1", squeeze=True).apply(func) + assert isinstance(result, Series) + + # GH3596, return a consistent type (regression in 0.11 from 0.10.1) + df = DataFrame([[1, 1], [1, 1]], columns=["X", "Y"]) + with tm.assert_produces_warning(FutureWarning): + result = df.groupby("X", squeeze=False).count() + assert isinstance(result, DataFrame) + + +def test_inconsistent_return_type(): + # GH5592 + # inconsistent return type + df = DataFrame( + { + "A": ["Tiger", "Tiger", "Tiger", "Lamb", "Lamb", "Pony", "Pony"], + "B": Series(np.arange(7), dtype="int64"), + "C": date_range("20130101", periods=7), + } + ) + + def f(grp): + return grp.iloc[0] + + expected = df.groupby("A").first()[["B"]] + result = df.groupby("A").apply(f)[["B"]] + tm.assert_frame_equal(result, expected) + + def f(grp): + if grp.name == "Tiger": + return None + return grp.iloc[0] + + result = df.groupby("A").apply(f)[["B"]] + e = expected.copy() + e.loc["Tiger"] = np.nan + tm.assert_frame_equal(result, e) + + def f(grp): + if grp.name == "Pony": + return None + return grp.iloc[0] + + result = df.groupby("A").apply(f)[["B"]] + e = expected.copy() + e.loc["Pony"] = np.nan + tm.assert_frame_equal(result, e) + + # 5592 revisited, with datetimes + def f(grp): + if grp.name == "Pony": + return None + return grp.iloc[0] + + result = df.groupby("A").apply(f)[["C"]] + e = df.groupby("A").first()[["C"]] + e.loc["Pony"] = pd.NaT + tm.assert_frame_equal(result, e) + + # scalar outputs + def f(grp): + if grp.name == "Pony": + return None + return grp.iloc[0].loc["C"] + + result = df.groupby("A").apply(f) + e = df.groupby("A").first()["C"].copy() + e.loc["Pony"] = np.nan + e.name = None + tm.assert_series_equal(result, e) + + +def test_pass_args_kwargs(ts, tsframe): + def f(x, q=None, axis=0): + return np.percentile(x, q, axis=axis) + + g = lambda x: np.percentile(x, 80, axis=0) + + # Series + ts_grouped = ts.groupby(lambda x: x.month) + agg_result = ts_grouped.agg(np.percentile, 80, axis=0) + apply_result = ts_grouped.apply(np.percentile, 80, axis=0) + trans_result = ts_grouped.transform(np.percentile, 80, axis=0) + + agg_expected = ts_grouped.quantile(0.8) + trans_expected = ts_grouped.transform(g) + + tm.assert_series_equal(apply_result, agg_expected) + tm.assert_series_equal(agg_result, agg_expected) + tm.assert_series_equal(trans_result, trans_expected) + + agg_result = ts_grouped.agg(f, q=80) + apply_result = ts_grouped.apply(f, q=80) + trans_result = ts_grouped.transform(f, q=80) + tm.assert_series_equal(agg_result, agg_expected) + tm.assert_series_equal(apply_result, agg_expected) + tm.assert_series_equal(trans_result, trans_expected) + + # DataFrame + for as_index in [True, False]: + df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index) + agg_result = df_grouped.agg(np.percentile, 80, axis=0) + apply_result = df_grouped.apply(DataFrame.quantile, 0.8) + expected = df_grouped.quantile(0.8) + tm.assert_frame_equal(apply_result, expected, check_names=False) + tm.assert_frame_equal(agg_result, expected) + + apply_result = df_grouped.apply(DataFrame.quantile, [0.4, 0.8]) + expected_seq = df_grouped.quantile([0.4, 0.8]) + tm.assert_frame_equal(apply_result, expected_seq, check_names=False) + + agg_result = df_grouped.agg(f, q=80) + apply_result = df_grouped.apply(DataFrame.quantile, q=0.8) + tm.assert_frame_equal(agg_result, expected) + tm.assert_frame_equal(apply_result, expected, check_names=False) + + +@pytest.mark.parametrize("as_index", [True, False]) +def test_pass_args_kwargs_duplicate_columns(tsframe, as_index): + # go through _aggregate_frame with self.axis == 0 and duplicate columns + tsframe.columns = ["A", "B", "A", "C"] + gb = tsframe.groupby(lambda x: x.month, as_index=as_index) + + res = gb.agg(np.percentile, 80, axis=0) + + ex_data = { + 1: tsframe[tsframe.index.month == 1].quantile(0.8), + 2: tsframe[tsframe.index.month == 2].quantile(0.8), + } + expected = DataFrame(ex_data).T + if not as_index: + # TODO: try to get this more consistent? + expected.index = Index(range(2)) + + tm.assert_frame_equal(res, expected) + + +def test_len(): + df = tm.makeTimeDataFrame() + grouped = df.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]) + assert len(grouped) == len(df) + + grouped = df.groupby([lambda x: x.year, lambda x: x.month]) + expected = len({(x.year, x.month) for x in df.index}) + assert len(grouped) == expected + + # issue 11016 + df = DataFrame({"a": [np.nan] * 3, "b": [1, 2, 3]}) + assert len(df.groupby("a")) == 0 + assert len(df.groupby("b")) == 3 + assert len(df.groupby(["a", "b"])) == 3 + + +def test_basic_regression(): + # regression + result = Series([1.0 * x for x in list(range(1, 10)) * 10]) + + data = np.random.random(1100) * 10.0 + groupings = Series(data) + + grouped = result.groupby(groupings) + grouped.mean() + + +@pytest.mark.parametrize( + "dtype", ["float64", "float32", "int64", "int32", "int16", "int8"] +) +def test_with_na_groups(dtype): + index = Index(np.arange(10)) + values = Series(np.ones(10), index, dtype=dtype) + labels = Series( + [np.nan, "foo", "bar", "bar", np.nan, np.nan, "bar", "bar", np.nan, "foo"], + index=index, + ) + + # this SHOULD be an int + grouped = values.groupby(labels) + agged = grouped.agg(len) + expected = Series([4, 2], index=["bar", "foo"]) + + tm.assert_series_equal(agged, expected, check_dtype=False) + + # assert issubclass(agged.dtype.type, np.integer) + + # explicitly return a float from my function + def f(x): + return float(len(x)) + + agged = grouped.agg(f) + expected = Series([4.0, 2.0], index=["bar", "foo"]) + + tm.assert_series_equal(agged, expected) + + +def test_indices_concatenation_order(): + + # GH 2808 + + def f1(x): + y = x[(x.b % 2) == 1] ** 2 + if y.empty: + multiindex = MultiIndex(levels=[[]] * 2, codes=[[]] * 2, names=["b", "c"]) + res = DataFrame(columns=["a"], index=multiindex) + return res + else: + y = y.set_index(["b", "c"]) + return y + + def f2(x): + y = x[(x.b % 2) == 1] ** 2 + if y.empty: + return DataFrame() + else: + y = y.set_index(["b", "c"]) + return y + + def f3(x): + y = x[(x.b % 2) == 1] ** 2 + if y.empty: + multiindex = MultiIndex( + levels=[[]] * 2, codes=[[]] * 2, names=["foo", "bar"] + ) + res = DataFrame(columns=["a", "b"], index=multiindex) + return res + else: + return y + + df = DataFrame({"a": [1, 2, 2, 2], "b": range(4), "c": range(5, 9)}) + + df2 = DataFrame({"a": [3, 2, 2, 2], "b": range(4), "c": range(5, 9)}) + + # correct result + result1 = df.groupby("a").apply(f1) + result2 = df2.groupby("a").apply(f1) + tm.assert_frame_equal(result1, result2) + + # should fail (not the same number of levels) + msg = "Cannot concat indices that do not have the same number of levels" + with pytest.raises(AssertionError, match=msg): + df.groupby("a").apply(f2) + with pytest.raises(AssertionError, match=msg): + df2.groupby("a").apply(f2) + + # should fail (incorrect shape) + with pytest.raises(AssertionError, match=msg): + df.groupby("a").apply(f3) + with pytest.raises(AssertionError, match=msg): + df2.groupby("a").apply(f3) + + +def test_attr_wrapper(ts): + grouped = ts.groupby(lambda x: x.weekday()) + + result = grouped.std() + expected = grouped.agg(lambda x: np.std(x, ddof=1)) + tm.assert_series_equal(result, expected) + + # this is pretty cool + result = grouped.describe() + expected = {name: gp.describe() for name, gp in grouped} + expected = DataFrame(expected).T + tm.assert_frame_equal(result, expected) + + # get attribute + result = grouped.dtype + expected = grouped.agg(lambda x: x.dtype) + tm.assert_series_equal(result, expected) + + # make sure raises error + msg = "'SeriesGroupBy' object has no attribute 'foo'" + with pytest.raises(AttributeError, match=msg): + getattr(grouped, "foo") + + +def test_frame_groupby(tsframe): + grouped = tsframe.groupby(lambda x: x.weekday()) + + # aggregate + aggregated = grouped.aggregate(np.mean) + assert len(aggregated) == 5 + assert len(aggregated.columns) == 4 + + # by string + tscopy = tsframe.copy() + tscopy["weekday"] = [x.weekday() for x in tscopy.index] + stragged = tscopy.groupby("weekday").aggregate(np.mean) + tm.assert_frame_equal(stragged, aggregated, check_names=False) + + # transform + grouped = tsframe.head(30).groupby(lambda x: x.weekday()) + transformed = grouped.transform(lambda x: x - x.mean()) + assert len(transformed) == 30 + assert len(transformed.columns) == 4 + + # transform propagate + transformed = grouped.transform(lambda x: x.mean()) + for name, group in grouped: + mean = group.mean() + for idx in group.index: + tm.assert_series_equal(transformed.xs(idx), mean, check_names=False) + + # iterate + for weekday, group in grouped: + assert group.index[0].weekday() == weekday + + # groups / group_indices + groups = grouped.groups + indices = grouped.indices + + for k, v in groups.items(): + samething = tsframe.index.take(indices[k]) + assert (samething == v).all() + + +def test_frame_groupby_columns(tsframe): + mapping = {"A": 0, "B": 0, "C": 1, "D": 1} + grouped = tsframe.groupby(mapping, axis=1) + + # aggregate + aggregated = grouped.aggregate(np.mean) + assert len(aggregated) == len(tsframe) + assert len(aggregated.columns) == 2 + + # transform + tf = lambda x: x - x.mean() + groupedT = tsframe.T.groupby(mapping, axis=0) + tm.assert_frame_equal(groupedT.transform(tf).T, grouped.transform(tf)) + + # iterate + for k, v in grouped: + assert len(v.columns) == 2 + + +def test_frame_set_name_single(df): + grouped = df.groupby("A") + + result = grouped.mean() + assert result.index.name == "A" + + result = df.groupby("A", as_index=False).mean() + assert result.index.name != "A" + + result = grouped.agg(np.mean) + assert result.index.name == "A" + + result = grouped.agg({"C": np.mean, "D": np.std}) + assert result.index.name == "A" + + result = grouped["C"].mean() + assert result.index.name == "A" + result = grouped["C"].agg(np.mean) + assert result.index.name == "A" + result = grouped["C"].agg([np.mean, np.std]) + assert result.index.name == "A" + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + grouped["C"].agg({"foo": np.mean, "bar": np.std}) + + +def test_multi_func(df): + col1 = df["A"] + col2 = df["B"] + + grouped = df.groupby([col1.get, col2.get]) + agged = grouped.mean() + expected = df.groupby(["A", "B"]).mean() + + # TODO groupby get drops names + tm.assert_frame_equal( + agged.loc[:, ["C", "D"]], expected.loc[:, ["C", "D"]], check_names=False + ) + + # some "groups" with no data + df = DataFrame( + { + "v1": np.random.randn(6), + "v2": np.random.randn(6), + "k1": np.array(["b", "b", "b", "a", "a", "a"]), + "k2": np.array(["1", "1", "1", "2", "2", "2"]), + }, + index=["one", "two", "three", "four", "five", "six"], + ) + # only verify that it works for now + grouped = df.groupby(["k1", "k2"]) + grouped.agg(np.sum) + + +def test_multi_key_multiple_functions(df): + grouped = df.groupby(["A", "B"])["C"] + + agged = grouped.agg([np.mean, np.std]) + expected = DataFrame({"mean": grouped.agg(np.mean), "std": grouped.agg(np.std)}) + tm.assert_frame_equal(agged, expected) + + +def test_frame_multi_key_function_list(): + data = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + grouped = data.groupby(["A", "B"]) + funcs = [np.mean, np.std] + with tm.assert_produces_warning( + FutureWarning, match=r"\['C'\] did not aggregate successfully" + ): + agged = grouped.agg(funcs) + expected = pd.concat( + [grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)], + keys=["D", "E", "F"], + axis=1, + ) + assert isinstance(agged.index, MultiIndex) + assert isinstance(expected.index, MultiIndex) + tm.assert_frame_equal(agged, expected) + + +@pytest.mark.parametrize("op", [lambda x: x.sum(), lambda x: x.mean()]) +def test_groupby_multiple_columns(df, op): + data = df + grouped = data.groupby(["A", "B"]) + + result1 = op(grouped) + + keys = [] + values = [] + for n1, gp1 in data.groupby("A"): + for n2, gp2 in gp1.groupby("B"): + keys.append((n1, n2)) + values.append(op(gp2.loc[:, ["C", "D"]])) + + mi = MultiIndex.from_tuples(keys, names=["A", "B"]) + expected = pd.concat(values, axis=1).T + expected.index = mi + + # a little bit crude + for col in ["C", "D"]: + result_col = op(grouped[col]) + pivoted = result1[col] + exp = expected[col] + tm.assert_series_equal(result_col, exp) + tm.assert_series_equal(pivoted, exp) + + # test single series works the same + result = data["C"].groupby([data["A"], data["B"]]).mean() + expected = data.groupby(["A", "B"]).mean()["C"] + + tm.assert_series_equal(result, expected) + + +def test_as_index_select_column(): + # GH 5764 + df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) + result = df.groupby("A", as_index=False)["B"].get_group(1) + expected = Series([2, 4], name="B") + tm.assert_series_equal(result, expected) + + result = df.groupby("A", as_index=False)["B"].apply(lambda x: x.cumsum()) + expected = Series( + [2, 6, 6], name="B", index=MultiIndex.from_tuples([(0, 0), (0, 1), (1, 2)]) + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_as_index_select_column_sum_empty_df(): + # GH 35246 + df = DataFrame(columns=Index(["A", "B", "C"], name="alpha")) + left = df.groupby(by="A", as_index=False)["B"].sum(numeric_only=False) + + expected = DataFrame(columns=df.columns[:2], index=range(0)) + tm.assert_frame_equal(left, expected) + + +def test_groupby_as_index_agg(df): + grouped = df.groupby("A", as_index=False) + + # single-key + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + result2 = grouped.agg({"C": np.mean, "D": np.sum}) + expected2 = grouped.mean() + expected2["D"] = grouped.sum()["D"] + tm.assert_frame_equal(result2, expected2) + + grouped = df.groupby("A", as_index=True) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + grouped["C"].agg({"Q": np.sum}) + + # multi-key + + grouped = df.groupby(["A", "B"], as_index=False) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + result2 = grouped.agg({"C": np.mean, "D": np.sum}) + expected2 = grouped.mean() + expected2["D"] = grouped.sum()["D"] + tm.assert_frame_equal(result2, expected2) + + expected3 = grouped["C"].sum() + expected3 = DataFrame(expected3).rename(columns={"C": "Q"}) + result3 = grouped["C"].agg({"Q": np.sum}) + tm.assert_frame_equal(result3, expected3) + + # GH7115 & GH8112 & GH8582 + df = DataFrame(np.random.randint(0, 100, (50, 3)), columns=["jim", "joe", "jolie"]) + ts = Series(np.random.randint(5, 10, 50), name="jim") + + gr = df.groupby(ts) + gr.nth(0) # invokes set_selection_from_grouper internally + tm.assert_frame_equal(gr.apply(sum), df.groupby(ts).apply(sum)) + + for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]: + gr = df.groupby(ts, as_index=False) + left = getattr(gr, attr)() + + gr = df.groupby(ts.values, as_index=True) + right = getattr(gr, attr)().reset_index(drop=True) + + tm.assert_frame_equal(left, right) + + +def test_ops_not_as_index(reduction_func): + # GH 10355, 21090 + # Using as_index=False should not modify grouped column + + if reduction_func in ("corrwith",): + pytest.skip("Test not applicable") + + if reduction_func in ("nth", "ngroup"): + pytest.skip("Skip until behavior is determined (GH #5755)") + + df = DataFrame(np.random.randint(0, 5, size=(100, 2)), columns=["a", "b"]) + expected = getattr(df.groupby("a"), reduction_func)() + if reduction_func == "size": + expected = expected.rename("size") + expected = expected.reset_index() + + if reduction_func != "size": + # 32 bit compat -> groupby preserves dtype whereas reset_index casts to int64 + expected["a"] = expected["a"].astype(df["a"].dtype) + + g = df.groupby("a", as_index=False) + + result = getattr(g, reduction_func)() + tm.assert_frame_equal(result, expected) + + result = g.agg(reduction_func) + tm.assert_frame_equal(result, expected) + + result = getattr(g["b"], reduction_func)() + tm.assert_frame_equal(result, expected) + + result = g["b"].agg(reduction_func) + tm.assert_frame_equal(result, expected) + + +def test_as_index_series_return_frame(df): + grouped = df.groupby("A", as_index=False) + grouped2 = df.groupby(["A", "B"], as_index=False) + + result = grouped["C"].agg(np.sum) + expected = grouped.agg(np.sum).loc[:, ["A", "C"]] + assert isinstance(result, DataFrame) + tm.assert_frame_equal(result, expected) + + result2 = grouped2["C"].agg(np.sum) + expected2 = grouped2.agg(np.sum).loc[:, ["A", "B", "C"]] + assert isinstance(result2, DataFrame) + tm.assert_frame_equal(result2, expected2) + + result = grouped["C"].sum() + expected = grouped.sum().loc[:, ["A", "C"]] + assert isinstance(result, DataFrame) + tm.assert_frame_equal(result, expected) + + result2 = grouped2["C"].sum() + expected2 = grouped2.sum().loc[:, ["A", "B", "C"]] + assert isinstance(result2, DataFrame) + tm.assert_frame_equal(result2, expected2) + + +def test_as_index_series_column_slice_raises(df): + # GH15072 + grouped = df.groupby("A", as_index=False) + msg = r"Column\(s\) C already selected" + + with pytest.raises(IndexError, match=msg): + grouped["C"].__getitem__("D") + + +def test_groupby_as_index_cython(df): + data = df + + # single-key + grouped = data.groupby("A", as_index=False) + result = grouped.mean() + expected = data.groupby(["A"]).mean() + expected.insert(0, "A", expected.index) + expected.index = np.arange(len(expected)) + tm.assert_frame_equal(result, expected) + + # multi-key + grouped = data.groupby(["A", "B"], as_index=False) + result = grouped.mean() + expected = data.groupby(["A", "B"]).mean() + + arrays = list(zip(*expected.index.values)) + expected.insert(0, "A", arrays[0]) + expected.insert(1, "B", arrays[1]) + expected.index = np.arange(len(expected)) + tm.assert_frame_equal(result, expected) + + +def test_groupby_as_index_series_scalar(df): + grouped = df.groupby(["A", "B"], as_index=False) + + # GH #421 + + result = grouped["C"].agg(len) + expected = grouped.agg(len).loc[:, ["A", "B", "C"]] + tm.assert_frame_equal(result, expected) + + +def test_groupby_as_index_corner(df, ts): + msg = "as_index=False only valid with DataFrame" + with pytest.raises(TypeError, match=msg): + ts.groupby(lambda x: x.weekday(), as_index=False) + + msg = "as_index=False only valid for axis=0" + with pytest.raises(ValueError, match=msg): + df.groupby(lambda x: x.lower(), as_index=False, axis=1) + + +def test_groupby_multiple_key(df): + df = tm.makeTimeDataFrame() + grouped = df.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]) + agged = grouped.sum() + tm.assert_almost_equal(df.values, agged.values) + + grouped = df.T.groupby( + [lambda x: x.year, lambda x: x.month, lambda x: x.day], axis=1 + ) + + agged = grouped.agg(lambda x: x.sum()) + tm.assert_index_equal(agged.index, df.columns) + tm.assert_almost_equal(df.T.values, agged.values) + + agged = grouped.agg(lambda x: x.sum()) + tm.assert_almost_equal(df.T.values, agged.values) + + +def test_groupby_multi_corner(df): + # test that having an all-NA column doesn't mess you up + df = df.copy() + df["bad"] = np.nan + agged = df.groupby(["A", "B"]).mean() + + expected = df.groupby(["A", "B"]).mean() + expected["bad"] = np.nan + + tm.assert_frame_equal(agged, expected) + + +def test_omit_nuisance(df): + grouped = df.groupby("A") + agged = grouped.agg(np.mean) + exp = grouped.mean() + tm.assert_frame_equal(agged, exp) + + df = df.loc[:, ["A", "C", "D"]] + df["E"] = datetime.now() + grouped = df.groupby("A") + result = grouped.agg(np.sum) + expected = grouped.sum() + tm.assert_frame_equal(result, expected) + + # won't work with axis = 1 + grouped = df.groupby({"A": 0, "C": 0, "D": 1, "E": 1}, axis=1) + msg = "does not support reduction 'sum'" + with pytest.raises(TypeError, match=msg): + grouped.agg(lambda x: x.sum(0, numeric_only=False)) + + +@pytest.mark.parametrize( + "agg_function", + ["max", "min"], +) +def test_keep_nuisance_agg(df, agg_function): + # GH 38815 + grouped = df.groupby("A") + result = getattr(grouped, agg_function)() + expected = result.copy() + expected.loc["bar", "B"] = getattr(df.loc[df["A"] == "bar", "B"], agg_function)() + expected.loc["foo", "B"] = getattr(df.loc[df["A"] == "foo", "B"], agg_function)() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "agg_function", + ["sum", "mean", "prod", "std", "var", "median"], +) +def test_omit_nuisance_agg(df, agg_function): + # GH 38774, GH 38815 + grouped = df.groupby("A") + result = getattr(grouped, agg_function)() + expected = getattr(df.loc[:, ["A", "C", "D"]].groupby("A"), agg_function)() + tm.assert_frame_equal(result, expected) + + +def test_omit_nuisance_warnings(df): + # GH 38815 + with tm.assert_produces_warning(FutureWarning, filter_level="always"): + grouped = df.groupby("A") + result = grouped.skew() + expected = df.loc[:, ["A", "C", "D"]].groupby("A").skew() + tm.assert_frame_equal(result, expected) + + +def test_omit_nuisance_python_multiple(three_group): + grouped = three_group.groupby(["A", "B"]) + + agged = grouped.agg(np.mean) + exp = grouped.mean() + tm.assert_frame_equal(agged, exp) + + +def test_empty_groups_corner(mframe): + # handle empty groups + df = DataFrame( + { + "k1": np.array(["b", "b", "b", "a", "a", "a"]), + "k2": np.array(["1", "1", "1", "2", "2", "2"]), + "k3": ["foo", "bar"] * 3, + "v1": np.random.randn(6), + "v2": np.random.randn(6), + } + ) + + grouped = df.groupby(["k1", "k2"]) + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + grouped = mframe[3:5].groupby(level=0) + agged = grouped.apply(lambda x: x.mean()) + agged_A = grouped["A"].apply(np.mean) + tm.assert_series_equal(agged["A"], agged_A) + assert agged.index.name == "first" + + +def test_nonsense_func(): + df = DataFrame([0]) + msg = r"unsupported operand type\(s\) for \+: 'int' and 'str'" + with pytest.raises(TypeError, match=msg): + df.groupby(lambda x: x + "foo") + + +def test_wrap_aggregated_output_multindex(mframe): + df = mframe.T + df["baz", "two"] = "peekaboo" + + keys = [np.array([0, 0, 1]), np.array([0, 0, 1])] + agged = df.groupby(keys).agg(np.mean) + assert isinstance(agged.columns, MultiIndex) + + def aggfun(ser): + if ser.name == ("foo", "one"): + raise TypeError + else: + return ser.sum() + + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + agged2 = df.groupby(keys).aggregate(aggfun) + assert len(agged2.columns) + 1 == len(df.columns) + + +def test_groupby_level_apply(mframe): + + result = mframe.groupby(level=0).count() + assert result.index.name == "first" + result = mframe.groupby(level=1).count() + assert result.index.name == "second" + + result = mframe["A"].groupby(level=0).count() + assert result.index.name == "first" + + +def test_groupby_level_mapper(mframe): + deleveled = mframe.reset_index() + + mapper0 = {"foo": 0, "bar": 0, "baz": 1, "qux": 1} + mapper1 = {"one": 0, "two": 0, "three": 1} + + result0 = mframe.groupby(mapper0, level=0).sum() + result1 = mframe.groupby(mapper1, level=1).sum() + + mapped_level0 = np.array([mapper0.get(x) for x in deleveled["first"]]) + mapped_level1 = np.array([mapper1.get(x) for x in deleveled["second"]]) + expected0 = mframe.groupby(mapped_level0).sum() + expected1 = mframe.groupby(mapped_level1).sum() + expected0.index.name, expected1.index.name = "first", "second" + + tm.assert_frame_equal(result0, expected0) + tm.assert_frame_equal(result1, expected1) + + +def test_groupby_level_nonmulti(): + # GH 1313, GH 13901 + s = Series([1, 2, 3, 10, 4, 5, 20, 6], Index([1, 2, 3, 1, 4, 5, 2, 6], name="foo")) + expected = Series([11, 22, 3, 4, 5, 6], Index(range(1, 7), name="foo")) + + result = s.groupby(level=0).sum() + tm.assert_series_equal(result, expected) + result = s.groupby(level=[0]).sum() + tm.assert_series_equal(result, expected) + result = s.groupby(level=-1).sum() + tm.assert_series_equal(result, expected) + result = s.groupby(level=[-1]).sum() + tm.assert_series_equal(result, expected) + + msg = "level > 0 or level < -1 only valid with MultiIndex" + with pytest.raises(ValueError, match=msg): + s.groupby(level=1) + with pytest.raises(ValueError, match=msg): + s.groupby(level=-2) + msg = "No group keys passed!" + with pytest.raises(ValueError, match=msg): + s.groupby(level=[]) + msg = "multiple levels only valid with MultiIndex" + with pytest.raises(ValueError, match=msg): + s.groupby(level=[0, 0]) + with pytest.raises(ValueError, match=msg): + s.groupby(level=[0, 1]) + msg = "level > 0 or level < -1 only valid with MultiIndex" + with pytest.raises(ValueError, match=msg): + s.groupby(level=[1]) + + +def test_groupby_complex(): + # GH 12902 + a = Series(data=np.arange(4) * (1 + 2j), index=[0, 0, 1, 1]) + expected = Series((1 + 2j, 5 + 10j)) + + result = a.groupby(level=0).sum() + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = a.sum(level=0) + tm.assert_series_equal(result, expected) + + +def test_groupby_complex_numbers(): + # GH 17927 + df = DataFrame( + [ + {"a": 1, "b": 1 + 1j}, + {"a": 1, "b": 1 + 2j}, + {"a": 4, "b": 1}, + ] + ) + expected = DataFrame( + np.array([1, 1, 1], dtype=np.int64), + index=Index([(1 + 1j), (1 + 2j), (1 + 0j)], dtype="object", name="b"), + columns=Index(["a"], dtype="object"), + ) + result = df.groupby("b", sort=False).count() + tm.assert_frame_equal(result, expected) + + # Sorted by the magnitude of the complex numbers + # Complex Index dtype is cast to object + expected.index = Index([(1 + 0j), (1 + 1j), (1 + 2j)], dtype="object", name="b") + result = df.groupby("b", sort=True).count() + tm.assert_frame_equal(result, expected) + + +def test_groupby_series_indexed_differently(): + s1 = Series( + [5.0, -9.0, 4.0, 100.0, -5.0, 55.0, 6.7], + index=Index(["a", "b", "c", "d", "e", "f", "g"]), + ) + s2 = Series( + [1.0, 1.0, 4.0, 5.0, 5.0, 7.0], index=Index(["a", "b", "d", "f", "g", "h"]) + ) + + grouped = s1.groupby(s2) + agged = grouped.mean() + exp = s1.groupby(s2.reindex(s1.index).get).mean() + tm.assert_series_equal(agged, exp) + + +def test_groupby_with_hier_columns(): + tuples = list( + zip( + *[ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + ) + ) + index = MultiIndex.from_tuples(tuples) + columns = MultiIndex.from_tuples( + [("A", "cat"), ("B", "dog"), ("B", "cat"), ("A", "dog")] + ) + df = DataFrame(np.random.randn(8, 4), index=index, columns=columns) + + result = df.groupby(level=0).mean() + tm.assert_index_equal(result.columns, columns) + + result = df.groupby(level=0, axis=1).mean() + tm.assert_index_equal(result.index, df.index) + + result = df.groupby(level=0).agg(np.mean) + tm.assert_index_equal(result.columns, columns) + + result = df.groupby(level=0).apply(lambda x: x.mean()) + tm.assert_index_equal(result.columns, columns) + + result = df.groupby(level=0, axis=1).agg(lambda x: x.mean(1)) + tm.assert_index_equal(result.columns, Index(["A", "B"])) + tm.assert_index_equal(result.index, df.index) + + # add a nuisance column + sorted_columns, _ = columns.sortlevel(0) + df["A", "foo"] = "bar" + result = df.groupby(level=0).mean() + tm.assert_index_equal(result.columns, df.columns[:-1]) + + +def test_grouping_ndarray(df): + grouped = df.groupby(df["A"].values) + + result = grouped.sum() + expected = df.groupby("A").sum() + tm.assert_frame_equal( + result, expected, check_names=False + ) # Note: no names when grouping by value + + +def test_groupby_wrong_multi_labels(): + + index = Index([0, 1, 2, 3, 4], name="index") + data = DataFrame( + { + "foo": ["foo1", "foo1", "foo2", "foo1", "foo3"], + "bar": ["bar1", "bar2", "bar2", "bar1", "bar1"], + "baz": ["baz1", "baz1", "baz1", "baz2", "baz2"], + "spam": ["spam2", "spam3", "spam2", "spam1", "spam1"], + "data": [20, 30, 40, 50, 60], + }, + index=index, + ) + + grouped = data.groupby(["foo", "bar", "baz", "spam"]) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +def test_groupby_series_with_name(df): + result = df.groupby(df["A"]).mean() + result2 = df.groupby(df["A"], as_index=False).mean() + assert result.index.name == "A" + assert "A" in result2 + + result = df.groupby([df["A"], df["B"]]).mean() + result2 = df.groupby([df["A"], df["B"]], as_index=False).mean() + assert result.index.names == ("A", "B") + assert "A" in result2 + assert "B" in result2 + + +def test_seriesgroupby_name_attr(df): + # GH 6265 + result = df.groupby("A")["C"] + assert result.count().name == "C" + assert result.mean().name == "C" + + testFunc = lambda x: np.sum(x) * 2 + assert result.agg(testFunc).name == "C" + + +def test_consistency_name(): + # GH 12363 + + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": np.random.randn(8) + 1.0, + "D": np.arange(8), + } + ) + + expected = df.groupby(["A"]).B.count() + result = df.B.groupby(df.A).count() + tm.assert_series_equal(result, expected) + + +def test_groupby_name_propagation(df): + # GH 6124 + def summarize(df, name=None): + return Series({"count": 1, "mean": 2, "omissions": 3}, name=name) + + def summarize_random_name(df): + # Provide a different name for each Series. In this case, groupby + # should not attempt to propagate the Series name since they are + # inconsistent. + return Series({"count": 1, "mean": 2, "omissions": 3}, name=df.iloc[0]["A"]) + + metrics = df.groupby("A").apply(summarize) + assert metrics.columns.name is None + metrics = df.groupby("A").apply(summarize, "metrics") + assert metrics.columns.name == "metrics" + metrics = df.groupby("A").apply(summarize_random_name) + assert metrics.columns.name is None + + +def test_groupby_nonstring_columns(): + df = DataFrame([np.arange(10) for x in range(10)]) + grouped = df.groupby(0) + result = grouped.mean() + expected = df.groupby(df[0]).mean() + tm.assert_frame_equal(result, expected) + + +def test_groupby_mixed_type_columns(): + # GH 13432, unorderable types in py3 + df = DataFrame([[0, 1, 2]], columns=["A", "B", 0]) + expected = DataFrame([[1, 2]], columns=["B", 0], index=Index([0], name="A")) + + result = df.groupby("A").first() + tm.assert_frame_equal(result, expected) + + result = df.groupby("A").sum() + tm.assert_frame_equal(result, expected) + + +# TODO: Ensure warning isn't emitted in the first place +@pytest.mark.filterwarnings("ignore:Mean of:RuntimeWarning") +def test_cython_grouper_series_bug_noncontig(): + arr = np.empty((100, 100)) + arr.fill(np.nan) + obj = Series(arr[:, 0]) + inds = np.tile(range(10), 10) + + result = obj.groupby(inds).agg(Series.median) + assert result.isna().all() + + +def test_series_grouper_noncontig_index(): + index = Index(tm.rands_array(10, 100)) + + values = Series(np.random.randn(50), index=index[::2]) + labels = np.random.randint(0, 5, 50) + + # it works! + grouped = values.groupby(labels) + + # accessing the index elements causes segfault + f = lambda x: len(set(map(id, x.index))) + grouped.agg(f) + + +def test_convert_objects_leave_decimal_alone(): + + s = Series(range(5)) + labels = np.array(["a", "b", "c", "d", "e"], dtype="O") + + def convert_fast(x): + return Decimal(str(x.mean())) + + def convert_force_pure(x): + # base will be length 0 + assert len(x.values.base) > 0 + return Decimal(str(x.mean())) + + grouped = s.groupby(labels) + + result = grouped.agg(convert_fast) + assert result.dtype == np.object_ + assert isinstance(result[0], Decimal) + + result = grouped.agg(convert_force_pure) + assert result.dtype == np.object_ + assert isinstance(result[0], Decimal) + + +def test_groupby_dtype_inference_empty(): + # GH 6733 + df = DataFrame({"x": [], "range": np.arange(0, dtype="int64")}) + assert df["x"].dtype == np.float64 + + result = df.groupby("x").first() + exp_index = Index([], name="x", dtype=np.float64) + expected = DataFrame({"range": Series([], index=exp_index, dtype="int64")}) + tm.assert_frame_equal(result, expected, by_blocks=True) + + +def test_groupby_unit64_float_conversion(): + #  GH: 30859 groupby converts unit64 to floats sometimes + df = DataFrame({"first": [1], "second": [1], "value": [16148277970000000000]}) + result = df.groupby(["first", "second"])["value"].max() + expected = Series( + [16148277970000000000], + MultiIndex.from_product([[1], [1]], names=["first", "second"]), + name="value", + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_list_infer_array_like(df): + result = df.groupby(list(df["A"])).mean() + expected = df.groupby(df["A"]).mean() + tm.assert_frame_equal(result, expected, check_names=False) + + with pytest.raises(KeyError, match=r"^'foo'$"): + df.groupby(list(df["A"][:-1])) + + # pathological case of ambiguity + df = DataFrame({"foo": [0, 1], "bar": [3, 4], "val": np.random.randn(2)}) + + result = df.groupby(["foo", "bar"]).mean() + expected = df.groupby([df["foo"], df["bar"]]).mean()[["val"]] + + +def test_groupby_keys_same_size_as_index(): + # GH 11185 + freq = "s" + index = date_range( + start=Timestamp("2015-09-29T11:34:44-0700"), periods=2, freq=freq + ) + df = DataFrame([["A", 10], ["B", 15]], columns=["metric", "values"], index=index) + result = df.groupby([Grouper(level=0, freq=freq), "metric"]).mean() + expected = df.set_index([df.index, "metric"]).astype(float) + + tm.assert_frame_equal(result, expected) + + +def test_groupby_one_row(): + # GH 11741 + msg = r"^'Z'$" + df1 = DataFrame(np.random.randn(1, 4), columns=list("ABCD")) + with pytest.raises(KeyError, match=msg): + df1.groupby("Z") + df2 = DataFrame(np.random.randn(2, 4), columns=list("ABCD")) + with pytest.raises(KeyError, match=msg): + df2.groupby("Z") + + +def test_groupby_nat_exclude(): + # GH 6992 + df = DataFrame( + { + "values": np.random.randn(8), + "dt": [ + np.nan, + Timestamp("2013-01-01"), + np.nan, + Timestamp("2013-02-01"), + np.nan, + Timestamp("2013-02-01"), + np.nan, + Timestamp("2013-01-01"), + ], + "str": [np.nan, "a", np.nan, "a", np.nan, "a", np.nan, "b"], + } + ) + grouped = df.groupby("dt") + + expected = [Index([1, 7]), Index([3, 5])] + keys = sorted(grouped.groups.keys()) + assert len(keys) == 2 + for k, e in zip(keys, expected): + # grouped.groups keys are np.datetime64 with system tz + # not to be affected by tz, only compare values + tm.assert_index_equal(grouped.groups[k], e) + + # confirm obj is not filtered + tm.assert_frame_equal(grouped.grouper.groupings[0].obj, df) + assert grouped.ngroups == 2 + + expected = { + Timestamp("2013-01-01 00:00:00"): np.array([1, 7], dtype=np.intp), + Timestamp("2013-02-01 00:00:00"): np.array([3, 5], dtype=np.intp), + } + + for k in grouped.indices: + tm.assert_numpy_array_equal(grouped.indices[k], expected[k]) + + tm.assert_frame_equal(grouped.get_group(Timestamp("2013-01-01")), df.iloc[[1, 7]]) + tm.assert_frame_equal(grouped.get_group(Timestamp("2013-02-01")), df.iloc[[3, 5]]) + + with pytest.raises(KeyError, match=r"^NaT$"): + grouped.get_group(pd.NaT) + + nan_df = DataFrame( + {"nan": [np.nan, np.nan, np.nan], "nat": [pd.NaT, pd.NaT, pd.NaT]} + ) + assert nan_df["nan"].dtype == "float64" + assert nan_df["nat"].dtype == "datetime64[ns]" + + for key in ["nan", "nat"]: + grouped = nan_df.groupby(key) + assert grouped.groups == {} + assert grouped.ngroups == 0 + assert grouped.indices == {} + with pytest.raises(KeyError, match=r"^nan$"): + grouped.get_group(np.nan) + with pytest.raises(KeyError, match=r"^NaT$"): + grouped.get_group(pd.NaT) + + +def test_groupby_two_group_keys_all_nan(): + # GH #36842: Grouping over two group keys shouldn't raise an error + df = DataFrame({"a": [np.nan, np.nan], "b": [np.nan, np.nan], "c": [1, 2]}) + result = df.groupby(["a", "b"]).indices + assert result == {} + + +def test_groupby_2d_malformed(): + d = DataFrame(index=range(2)) + d["group"] = ["g1", "g2"] + d["zeros"] = [0, 0] + d["ones"] = [1, 1] + d["label"] = ["l1", "l2"] + tmp = d.groupby(["group"]).mean() + res_values = np.array([[0.0, 1.0], [0.0, 1.0]]) + tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) + tm.assert_numpy_array_equal(tmp.values, res_values) + + +def test_int32_overflow(): + B = np.concatenate((np.arange(10000), np.arange(10000), np.arange(5000))) + A = np.arange(25000) + df = DataFrame({"A": A, "B": B, "C": A, "D": B, "E": np.random.randn(25000)}) + + left = df.groupby(["A", "B", "C", "D"]).sum() + right = df.groupby(["D", "C", "B", "A"]).sum() + assert len(left) == len(right) + + +def test_groupby_sort_multi(): + df = DataFrame( + { + "a": ["foo", "bar", "baz"], + "b": [3, 2, 1], + "c": [0, 1, 2], + "d": np.random.randn(3), + } + ) + + tups = [tuple(row) for row in df[["a", "b", "c"]].values] + tups = com.asarray_tuplesafe(tups) + result = df.groupby(["a", "b", "c"], sort=True).sum() + tm.assert_numpy_array_equal(result.index.values, tups[[1, 2, 0]]) + + tups = [tuple(row) for row in df[["c", "a", "b"]].values] + tups = com.asarray_tuplesafe(tups) + result = df.groupby(["c", "a", "b"], sort=True).sum() + tm.assert_numpy_array_equal(result.index.values, tups) + + tups = [tuple(x) for x in df[["b", "c", "a"]].values] + tups = com.asarray_tuplesafe(tups) + result = df.groupby(["b", "c", "a"], sort=True).sum() + tm.assert_numpy_array_equal(result.index.values, tups[[2, 1, 0]]) + + df = DataFrame( + {"a": [0, 1, 2, 0, 1, 2], "b": [0, 0, 0, 1, 1, 1], "d": np.random.randn(6)} + ) + grouped = df.groupby(["a", "b"])["d"] + result = grouped.sum() + + def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): + tups = [tuple(row) for row in df[keys].values] + tups = com.asarray_tuplesafe(tups) + expected = f(df.groupby(tups)[field]) + for k, v in expected.items(): + assert result[k] == v + + _check_groupby(df, result, ["a", "b"], "d") + + +def test_dont_clobber_name_column(): + df = DataFrame( + {"key": ["a", "a", "a", "b", "b", "b"], "name": ["foo", "bar", "baz"] * 2} + ) + + result = df.groupby("key").apply(lambda x: x) + tm.assert_frame_equal(result, df) + + +def test_skip_group_keys(): + + tsf = tm.makeTimeDataFrame() + + grouped = tsf.groupby(lambda x: x.month, group_keys=False) + result = grouped.apply(lambda x: x.sort_values(by="A")[:3]) + + pieces = [group.sort_values(by="A")[:3] for key, group in grouped] + + expected = pd.concat(pieces) + tm.assert_frame_equal(result, expected) + + grouped = tsf["A"].groupby(lambda x: x.month, group_keys=False) + result = grouped.apply(lambda x: x.sort_values()[:3]) + + pieces = [group.sort_values()[:3] for key, group in grouped] + + expected = pd.concat(pieces) + tm.assert_series_equal(result, expected) + + +def test_no_nonsense_name(float_frame): + # GH #995 + s = float_frame["C"].copy() + s.name = None + + result = s.groupby(float_frame["A"]).agg(np.sum) + assert result.name is None + + +def test_multifunc_sum_bug(): + # GH #1065 + x = DataFrame(np.arange(9).reshape(3, 3)) + x["test"] = 0 + x["fl"] = [1.3, 1.5, 1.6] + + grouped = x.groupby("test") + result = grouped.agg({"fl": "sum", 2: "size"}) + assert result["fl"].dtype == np.float64 + + +def test_handle_dict_return_value(df): + def f(group): + return {"max": group.max(), "min": group.min()} + + def g(group): + return Series({"max": group.max(), "min": group.min()}) + + result = df.groupby("A")["C"].apply(f) + expected = df.groupby("A")["C"].apply(g) + + assert isinstance(result, Series) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("grouper", ["A", ["A", "B"]]) +def test_set_group_name(df, grouper): + def f(group): + assert group.name is not None + return group + + def freduce(group): + assert group.name is not None + return group.sum() + + def foo(x): + return freduce(x) + + grouped = df.groupby(grouper) + + # make sure all these work + grouped.apply(f) + grouped.aggregate(freduce) + grouped.aggregate({"C": freduce, "D": freduce}) + grouped.transform(f) + + grouped["C"].apply(f) + grouped["C"].aggregate(freduce) + grouped["C"].aggregate([freduce, foo]) + grouped["C"].transform(f) + + +def test_group_name_available_in_inference_pass(): + # gh-15062 + df = DataFrame({"a": [0, 0, 1, 1, 2, 2], "b": np.arange(6)}) + + names = [] + + def f(group): + names.append(group.name) + return group.copy() + + df.groupby("a", sort=False, group_keys=False).apply(f) + + expected_names = [0, 1, 2] + assert names == expected_names + + +def test_no_dummy_key_names(df): + # see gh-1291 + result = df.groupby(df["A"].values).sum() + assert result.index.name is None + + result = df.groupby([df["A"].values, df["B"].values]).sum() + assert result.index.names == (None, None) + + +def test_groupby_sort_multiindex_series(): + # series multiindex groupby sort argument was not being passed through + # _compress_group_index + # GH 9444 + index = MultiIndex( + levels=[[1, 2], [1, 2]], + codes=[[0, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 0]], + names=["a", "b"], + ) + mseries = Series([0, 1, 2, 3, 4, 5], index=index) + index = MultiIndex( + levels=[[1, 2], [1, 2]], codes=[[0, 0, 1], [1, 0, 0]], names=["a", "b"] + ) + mseries_result = Series([0, 2, 4], index=index) + + result = mseries.groupby(level=["a", "b"], sort=False).first() + tm.assert_series_equal(result, mseries_result) + result = mseries.groupby(level=["a", "b"], sort=True).first() + tm.assert_series_equal(result, mseries_result.sort_index()) + + +def test_groupby_reindex_inside_function(): + + periods = 1000 + ind = date_range(start="2012/1/1", freq="5min", periods=periods) + df = DataFrame({"high": np.arange(periods), "low": np.arange(periods)}, index=ind) + + def agg_before(func, fix=False): + """ + Run an aggregate func on the subset of data. + """ + + def _func(data): + d = data.loc[data.index.map(lambda x: x.hour < 11)].dropna() + if fix: + data[data.index[0]] + if len(d) == 0: + return None + return func(d) + + return _func + + grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day)) + closure_bad = grouped.agg({"high": agg_before(np.max)}) + closure_good = grouped.agg({"high": agg_before(np.max, True)}) + + tm.assert_frame_equal(closure_bad, closure_good) + + +def test_groupby_multiindex_missing_pair(): + # GH9049 + df = DataFrame( + { + "group1": ["a", "a", "a", "b"], + "group2": ["c", "c", "d", "c"], + "value": [1, 1, 1, 5], + } + ) + df = df.set_index(["group1", "group2"]) + df_grouped = df.groupby(level=["group1", "group2"], sort=True) + + res = df_grouped.agg("sum") + idx = MultiIndex.from_tuples( + [("a", "c"), ("a", "d"), ("b", "c")], names=["group1", "group2"] + ) + exp = DataFrame([[2], [1], [5]], index=idx, columns=["value"]) + + tm.assert_frame_equal(res, exp) + + +def test_groupby_multiindex_not_lexsorted(): + # GH 11640 + + # define the lexsorted version + lexsorted_mi = MultiIndex.from_tuples( + [("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"] + ) + lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi) + assert lexsorted_df.columns._is_lexsorted() + + # define the non-lexsorted version + not_lexsorted_df = DataFrame( + columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]] + ) + not_lexsorted_df = not_lexsorted_df.pivot_table( + index="a", columns=["b", "c"], values="d" + ) + not_lexsorted_df = not_lexsorted_df.reset_index() + assert not not_lexsorted_df.columns._is_lexsorted() + + # compare the results + tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) + + expected = lexsorted_df.groupby("a").mean() + with tm.assert_produces_warning(PerformanceWarning): + result = not_lexsorted_df.groupby("a").mean() + tm.assert_frame_equal(expected, result) + + # a transforming function should work regardless of sort + # GH 14776 + df = DataFrame( + {"x": ["a", "a", "b", "a"], "y": [1, 1, 2, 2], "z": [1, 2, 3, 4]} + ).set_index(["x", "y"]) + assert not df.index._is_lexsorted() + + for level in [0, 1, [0, 1]]: + for sort in [False, True]: + result = df.groupby(level=level, sort=sort).apply(DataFrame.drop_duplicates) + expected = df + tm.assert_frame_equal(expected, result) + + result = ( + df.sort_index() + .groupby(level=level, sort=sort) + .apply(DataFrame.drop_duplicates) + ) + expected = df.sort_index() + tm.assert_frame_equal(expected, result) + + +def test_index_label_overlaps_location(): + # checking we don't have any label/location confusion in the + # wake of GH5375 + df = DataFrame(list("ABCDE"), index=[2, 0, 2, 1, 1]) + g = df.groupby(list("ababb")) + actual = g.filter(lambda x: len(x) > 2) + expected = df.iloc[[1, 3, 4]] + tm.assert_frame_equal(actual, expected) + + ser = df[0] + g = ser.groupby(list("ababb")) + actual = g.filter(lambda x: len(x) > 2) + expected = ser.take([1, 3, 4]) + tm.assert_series_equal(actual, expected) + + # and again, with a generic Index of floats + df.index = df.index.astype(float) + g = df.groupby(list("ababb")) + actual = g.filter(lambda x: len(x) > 2) + expected = df.iloc[[1, 3, 4]] + tm.assert_frame_equal(actual, expected) + + ser = df[0] + g = ser.groupby(list("ababb")) + actual = g.filter(lambda x: len(x) > 2) + expected = ser.take([1, 3, 4]) + tm.assert_series_equal(actual, expected) + + +def test_transform_doesnt_clobber_ints(): + # GH 7972 + n = 6 + x = np.arange(n) + df = DataFrame({"a": x // 2, "b": 2.0 * x, "c": 3.0 * x}) + df2 = DataFrame({"a": x // 2 * 1.0, "b": 2.0 * x, "c": 3.0 * x}) + + gb = df.groupby("a") + result = gb.transform("mean") + + gb2 = df2.groupby("a") + expected = gb2.transform("mean") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "sort_column", + ["ints", "floats", "strings", ["ints", "floats"], ["ints", "strings"]], +) +@pytest.mark.parametrize( + "group_column", ["int_groups", "string_groups", ["int_groups", "string_groups"]] +) +def test_groupby_preserves_sort(sort_column, group_column): + # Test to ensure that groupby always preserves sort order of original + # object. Issue #8588 and #9651 + + df = DataFrame( + { + "int_groups": [3, 1, 0, 1, 0, 3, 3, 3], + "string_groups": ["z", "a", "z", "a", "a", "g", "g", "g"], + "ints": [8, 7, 4, 5, 2, 9, 1, 1], + "floats": [2.3, 5.3, 6.2, -2.4, 2.2, 1.1, 1.1, 5], + "strings": ["z", "d", "a", "e", "word", "word2", "42", "47"], + } + ) + + # Try sorting on different types and with different group types + + df = df.sort_values(by=sort_column) + g = df.groupby(group_column) + + def test_sort(x): + tm.assert_frame_equal(x, x.sort_values(by=sort_column)) + + g.apply(test_sort) + + +def test_pivot_table_values_key_error(): + # This test is designed to replicate the error in issue #14938 + df = DataFrame( + { + "eventDate": date_range(datetime.today(), periods=20, freq="M").tolist(), + "thename": range(0, 20), + } + ) + + df["year"] = df.set_index("eventDate").index.year + df["month"] = df.set_index("eventDate").index.month + + with pytest.raises(KeyError, match="'badname'"): + df.reset_index().pivot_table( + index="year", columns="month", values="badname", aggfunc="count" + ) + + +@pytest.mark.parametrize("columns", ["C", ["C"]]) +@pytest.mark.parametrize("keys", [["A"], ["A", "B"]]) +@pytest.mark.parametrize( + "values", + [ + [True], + [0], + [0.0], + ["a"], + Categorical([0]), + [to_datetime(0)], + date_range(0, 1, 1, tz="US/Eastern"), + pd.array([0], dtype="Int64"), + pd.array([0], dtype="Float64"), + pd.array([False], dtype="boolean"), + ], +) +@pytest.mark.parametrize("method", ["attr", "agg", "apply"]) +@pytest.mark.parametrize( + "op", ["idxmax", "idxmin", "mad", "min", "max", "sum", "prod", "skew"] +) +@pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning") +@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") +def test_empty_groupby(columns, keys, values, method, op, request, using_array_manager): + # GH8093 & GH26411 + override_dtype = None + + if ( + isinstance(values, Categorical) + and not isinstance(columns, list) + and op in ["sum", "prod", "skew", "mad"] + ): + # handled below GH#41291 + + if using_array_manager and op == "mad": + right_msg = "Cannot interpret 'CategoricalDtype.* as a data type" + msg = "Regex pattern \"'Categorical' does not implement.*" + right_msg + mark = pytest.mark.xfail(raises=AssertionError, match=msg) + request.node.add_marker(mark) + + elif ( + isinstance(values, Categorical) + and len(keys) == 1 + and op in ["idxmax", "idxmin"] + ): + mark = pytest.mark.xfail( + raises=ValueError, match="attempt to get arg(min|max) of an empty sequence" + ) + request.node.add_marker(mark) + elif ( + isinstance(values, Categorical) + and len(keys) == 1 + and not isinstance(columns, list) + ): + mark = pytest.mark.xfail( + raises=TypeError, match="'Categorical' does not implement" + ) + request.node.add_marker(mark) + elif isinstance(values, Categorical) and len(keys) == 1 and op in ["sum", "prod"]: + mark = pytest.mark.xfail( + raises=AssertionError, match="(DataFrame|Series) are different" + ) + request.node.add_marker(mark) + elif ( + isinstance(values, Categorical) + and len(keys) == 2 + and op in ["min", "max", "sum"] + ): + mark = pytest.mark.xfail( + raises=AssertionError, match="(DataFrame|Series) are different" + ) + request.node.add_marker(mark) + elif ( + isinstance(values, (IntegerArray, FloatingArray)) + and op == "mad" + and isinstance(columns, list) + ): + mark = pytest.mark.xfail( + raises=TypeError, match="can only perform ops with numeric values" + ) + request.node.add_marker(mark) + + elif ( + op == "mad" + and not isinstance(columns, list) + and isinstance(values, pd.DatetimeIndex) + and values.tz is not None + and using_array_manager + ): + mark = pytest.mark.xfail( + raises=TypeError, + match=r"Cannot interpret 'datetime64\[ns, US/Eastern\]' as a data type", + ) + request.node.add_marker(mark) + + elif isinstance(values, BooleanArray) and op in ["sum", "prod"]: + # We expect to get Int64 back for these + override_dtype = "Int64" + + if isinstance(values[0], bool) and op in ("prod", "sum"): + # sum/product of bools is an integer + override_dtype = "int64" + + df = DataFrame({"A": values, "B": values, "C": values}, columns=list("ABC")) + + if hasattr(values, "dtype"): + # check that we did the construction right + assert (df.dtypes == values.dtype).all() + + df = df.iloc[:0] + + gb = df.groupby(keys)[columns] + + def get_result(): + if method == "attr": + return getattr(gb, op)() + else: + return getattr(gb, method)(op) + + if columns == "C": + # i.e. SeriesGroupBy + if op in ["prod", "sum", "skew"]: + # ops that require more than just ordered-ness + if df.dtypes[0].kind == "M": + # GH#41291 + # datetime64 -> prod and sum are invalid + if op == "skew": + msg = "does not support reduction 'skew'" + else: + msg = "datetime64 type does not support" + with pytest.raises(TypeError, match=msg): + get_result() + + return + if op in ["prod", "sum", "skew", "mad"]: + if isinstance(values, Categorical): + # GH#41291 + if op == "mad": + # mad calls mean, which Categorical doesn't implement + msg = "does not support reduction 'mean'" + elif op == "skew": + msg = f"does not support reduction '{op}'" + else: + msg = "category type does not support" + with pytest.raises(TypeError, match=msg): + get_result() + + return + else: + # ie. DataFrameGroupBy + if op in ["prod", "sum"]: + # ops that require more than just ordered-ness + if df.dtypes[0].kind == "M": + # GH#41291 + # datetime64 -> prod and sum are invalid + result = get_result() + + # with numeric_only=True, these are dropped, and we get + # an empty DataFrame back + expected = df.set_index(keys)[[]] + tm.assert_equal(result, expected) + return + + elif isinstance(values, Categorical): + # GH#41291 + # Categorical doesn't implement sum or prod + result = get_result() + + # with numeric_only=True, these are dropped, and we get + # an empty DataFrame back + expected = df.set_index(keys)[[]] + if len(keys) != 1 and op == "prod": + # TODO: why just prod and not sum? + # Categorical is special without 'observed=True' + lev = Categorical([0], dtype=values.dtype) + mi = MultiIndex.from_product([lev, lev], names=["A", "B"]) + expected = DataFrame([], columns=[], index=mi) + + tm.assert_equal(result, expected) + return + + elif df.dtypes[0] == object: + # FIXME: the test is actually wrong here, xref #41341 + result = get_result() + # In this case we have list-of-list, will raise TypeError, + # and subsequently be dropped as nuisance columns + expected = df.set_index(keys)[[]] + tm.assert_equal(result, expected) + return + + if ( + op in ["mad", "min", "max", "skew"] + and isinstance(values, Categorical) + and len(keys) == 1 + ): + # Categorical doesn't implement, so with numeric_only=True + # these are dropped and we get an empty DataFrame back + result = get_result() + expected = df.set_index(keys)[[]] + + # with numeric_only=True, these are dropped, and we get + # an empty DataFrame back + if len(keys) != 1: + # Categorical is special without 'observed=True' + lev = Categorical([0], dtype=values.dtype) + mi = MultiIndex.from_product([lev, lev], names=keys) + expected = DataFrame([], columns=[], index=mi) + else: + # all columns are dropped, but we end up with one row + # Categorical is special without 'observed=True' + lev = Categorical([0], dtype=values.dtype) + ci = Index(lev, name=keys[0]) + expected = DataFrame([], columns=[], index=ci) + # expected = df.set_index(keys)[columns] + + tm.assert_equal(result, expected) + return + + result = get_result() + expected = df.set_index(keys)[columns] + if override_dtype is not None: + expected = expected.astype(override_dtype) + if len(keys) == 1: + expected.index.name = keys[0] + tm.assert_equal(result, expected) + + +def test_empty_groupby_apply_nonunique_columns(): + # GH#44417 + df = DataFrame(np.random.randn(0, 4)) + df[3] = df[3].astype(np.int64) + df.columns = [0, 1, 2, 0] + gb = df.groupby(df[1]) + res = gb.apply(lambda x: x) + assert (res.dtypes == df.dtypes).all() + + +def test_tuple_as_grouping(): + # https://github.com/pandas-dev/pandas/issues/18314 + df = DataFrame( + { + ("a", "b"): [1, 1, 1, 1], + "a": [2, 2, 2, 2], + "b": [2, 2, 2, 2], + "c": [1, 1, 1, 1], + } + ) + + with pytest.raises(KeyError, match=r"('a', 'b')"): + df[["a", "b", "c"]].groupby(("a", "b")) + + result = df.groupby(("a", "b"))["c"].sum() + expected = Series([4], name="c", index=Index([1], name=("a", "b"))) + tm.assert_series_equal(result, expected) + + +def test_tuple_correct_keyerror(): + # https://github.com/pandas-dev/pandas/issues/18798 + df = DataFrame(1, index=range(3), columns=MultiIndex.from_product([[1, 2], [3, 4]])) + with pytest.raises(KeyError, match=r"^\(7, 8\)$"): + df.groupby((7, 8)).mean() + + +def test_groupby_agg_ohlc_non_first(): + # GH 21716 + df = DataFrame( + [[1], [1]], + columns=Index(["foo"], name="mycols"), + index=date_range("2018-01-01", periods=2, freq="D", name="dti"), + ) + + expected = DataFrame( + [[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]], + columns=MultiIndex.from_tuples( + ( + ("foo", "sum", "foo"), + ("foo", "ohlc", "open"), + ("foo", "ohlc", "high"), + ("foo", "ohlc", "low"), + ("foo", "ohlc", "close"), + ), + names=["mycols", None, None], + ), + index=date_range("2018-01-01", periods=2, freq="D", name="dti"), + ) + + result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"]) + + tm.assert_frame_equal(result, expected) + + +def test_groupby_multiindex_nat(): + # GH 9236 + values = [ + (pd.NaT, "a"), + (datetime(2012, 1, 2), "a"), + (datetime(2012, 1, 2), "b"), + (datetime(2012, 1, 3), "a"), + ] + mi = MultiIndex.from_tuples(values, names=["date", None]) + ser = Series([3, 2, 2.5, 4], index=mi) + + result = ser.groupby(level=1).mean() + expected = Series([3.0, 2.5], index=["a", "b"]) + tm.assert_series_equal(result, expected) + + +def test_groupby_empty_list_raises(): + # GH 5289 + values = zip(range(10), range(10)) + df = DataFrame(values, columns=["apple", "b"]) + msg = "Grouper and axis must be same length" + with pytest.raises(ValueError, match=msg): + df.groupby([[]]) + + +def test_groupby_multiindex_series_keys_len_equal_group_axis(): + # GH 25704 + index_array = [["x", "x"], ["a", "b"], ["k", "k"]] + index_names = ["first", "second", "third"] + ri = MultiIndex.from_arrays(index_array, names=index_names) + s = Series(data=[1, 2], index=ri) + result = s.groupby(["first", "third"]).sum() + + index_array = [["x"], ["k"]] + index_names = ["first", "third"] + ei = MultiIndex.from_arrays(index_array, names=index_names) + expected = Series([3], index=ei) + + tm.assert_series_equal(result, expected) + + +def test_groupby_groups_in_BaseGrouper(): + # GH 26326 + # Test if DataFrame grouped with a pandas.Grouper has correct groups + mi = MultiIndex.from_product([["A", "B"], ["C", "D"]], names=["alpha", "beta"]) + df = DataFrame({"foo": [1, 2, 1, 2], "bar": [1, 2, 3, 4]}, index=mi) + result = df.groupby([Grouper(level="alpha"), "beta"]) + expected = df.groupby(["alpha", "beta"]) + assert result.groups == expected.groups + + result = df.groupby(["beta", Grouper(level="alpha")]) + expected = df.groupby(["beta", "alpha"]) + assert result.groups == expected.groups + + +@pytest.mark.parametrize("group_name", ["x", ["x"]]) +def test_groupby_axis_1(group_name): + # GH 27614 + df = DataFrame( + np.arange(12).reshape(3, 4), index=[0, 1, 0], columns=[10, 20, 10, 20] + ) + df.index.name = "y" + df.columns.name = "x" + + results = df.groupby(group_name, axis=1).sum() + expected = df.T.groupby(group_name).sum().T + tm.assert_frame_equal(results, expected) + + # test on MI column + iterables = [["bar", "baz", "foo"], ["one", "two"]] + mi = MultiIndex.from_product(iterables=iterables, names=["x", "x1"]) + df = DataFrame(np.arange(18).reshape(3, 6), index=[0, 1, 0], columns=mi) + results = df.groupby(group_name, axis=1).sum() + expected = df.T.groupby(group_name).sum().T + tm.assert_frame_equal(results, expected) + + +@pytest.mark.parametrize( + "op, expected", + [ + ( + "shift", + { + "time": [ + None, + None, + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + None, + None, + ] + }, + ), + ( + "bfill", + { + "time": [ + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + ] + }, + ), + ( + "ffill", + { + "time": [ + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + ] + }, + ), + ], +) +def test_shift_bfill_ffill_tz(tz_naive_fixture, op, expected): + # GH19995, GH27992: Check that timezone does not drop in shift, bfill, and ffill + tz = tz_naive_fixture + data = { + "id": ["A", "B", "A", "B", "A", "B"], + "time": [ + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + None, + None, + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + ], + } + df = DataFrame(data).assign(time=lambda x: x.time.dt.tz_localize(tz)) + + grouped = df.groupby("id") + result = getattr(grouped, op)() + expected = DataFrame(expected).assign(time=lambda x: x.time.dt.tz_localize(tz)) + tm.assert_frame_equal(result, expected) + + +def test_groupby_only_none_group(): + # see GH21624 + # this was crashing with "ValueError: Length of passed values is 1, index implies 0" + df = DataFrame({"g": [None], "x": 1}) + actual = df.groupby("g")["x"].transform("sum") + expected = Series([np.nan], name="x") + + tm.assert_series_equal(actual, expected) + + +def test_groupby_duplicate_index(): + # GH#29189 the groupby call here used to raise + ser = Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0]) + gb = ser.groupby(level=0) + + result = gb.mean() + expected = Series([2, 5.5, 8], index=[2.0, 4.0, 5.0]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "idx", + [ + Index(["a", "a"], name="foo"), + MultiIndex.from_tuples((("a", "a"), ("a", "a")), names=["foo", "bar"]), + ], +) +@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") +def test_dup_labels_output_shape(groupby_func, idx): + if groupby_func in {"size", "ngroup", "cumcount"}: + pytest.skip("Not applicable") + # TODO(2.0) Remove after pad/backfill deprecation enforced + groupby_func = maybe_normalize_deprecated_kernels(groupby_func) + df = DataFrame([[1, 1]], columns=idx) + grp_by = df.groupby([0]) + + args = [] + if groupby_func in {"fillna", "nth"}: + args.append(0) + elif groupby_func == "corrwith": + args.append(df) + elif groupby_func == "tshift": + df.index = [Timestamp("today")] + args.extend([1, "D"]) + + result = getattr(grp_by, groupby_func)(*args) + + assert result.shape == (1, 2) + tm.assert_index_equal(result.columns, idx) + + +def test_groupby_crash_on_nunique(axis): + # Fix following 30253 + dti = date_range("2016-01-01", periods=2, name="foo") + df = DataFrame({("A", "B"): [1, 2], ("A", "C"): [1, 3], ("D", "B"): [0, 0]}) + df.columns.names = ("bar", "baz") + df.index = dti + + axis_number = df._get_axis_number(axis) + if not axis_number: + df = df.T + + gb = df.groupby(axis=axis_number, level=0) + result = gb.nunique() + + expected = DataFrame({"A": [1, 2], "D": [1, 1]}, index=dti) + expected.columns.name = "bar" + if not axis_number: + expected = expected.T + + tm.assert_frame_equal(result, expected) + + if axis_number == 0: + # same thing, but empty columns + gb2 = df[[]].groupby(axis=axis_number, level=0) + exp = expected[[]] + else: + # same thing, but empty rows + gb2 = df.loc[[]].groupby(axis=axis_number, level=0) + # default for empty when we can't infer a dtype is float64 + exp = expected.loc[[]].astype(np.float64) + + res = gb2.nunique() + tm.assert_frame_equal(res, exp) + + +def test_groupby_list_level(): + # GH 9790 + expected = DataFrame(np.arange(0, 9).reshape(3, 3), dtype=float) + result = expected.groupby(level=[0]).mean() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "max_seq_items, expected", + [ + (5, "{0: [0], 1: [1], 2: [2], 3: [3], 4: [4]}"), + (4, "{0: [0], 1: [1], 2: [2], 3: [3], ...}"), + (1, "{0: [0], ...}"), + ], +) +def test_groups_repr_truncates(max_seq_items, expected): + # GH 1135 + df = DataFrame(np.random.randn(5, 1)) + df["a"] = df.index + + with pd.option_context("display.max_seq_items", max_seq_items): + result = df.groupby("a").groups.__repr__() + assert result == expected + + result = df.groupby(np.array(df.a)).groups.__repr__() + assert result == expected + + +def test_group_on_two_row_multiindex_returns_one_tuple_key(): + # GH 18451 + df = DataFrame([{"a": 1, "b": 2, "c": 99}, {"a": 1, "b": 2, "c": 88}]) + df = df.set_index(["a", "b"]) + + grp = df.groupby(["a", "b"]) + result = grp.indices + expected = {(1, 2): np.array([0, 1], dtype=np.int64)} + + assert len(result) == 1 + key = (1, 2) + assert (result[key] == expected[key]).all() + + +@pytest.mark.parametrize( + "klass, attr, value", + [ + (DataFrame, "level", "a"), + (DataFrame, "as_index", False), + (DataFrame, "sort", False), + (DataFrame, "group_keys", False), + (DataFrame, "squeeze", True), + (DataFrame, "observed", True), + (DataFrame, "dropna", False), + pytest.param( + Series, + "axis", + 1, + marks=pytest.mark.xfail( + reason="GH 35443: Attribute currently not passed on to series" + ), + ), + (Series, "level", "a"), + (Series, "as_index", False), + (Series, "sort", False), + (Series, "group_keys", False), + (Series, "squeeze", True), + (Series, "observed", True), + (Series, "dropna", False), + ], +) +@pytest.mark.filterwarnings( + "ignore:The `squeeze` parameter is deprecated:FutureWarning" +) +def test_subsetting_columns_keeps_attrs(klass, attr, value): + # GH 9959 - When subsetting columns, don't drop attributes + df = DataFrame({"a": [1], "b": [2], "c": [3]}) + if attr != "axis": + df = df.set_index("a") + + expected = df.groupby("a", **{attr: value}) + result = expected[["b"]] if klass is DataFrame else expected["b"] + assert getattr(result, attr) == getattr(expected, attr) + + +def test_subsetting_columns_axis_1(): + # GH 37725 + g = DataFrame({"A": [1], "B": [2], "C": [3]}).groupby([0, 0, 1], axis=1) + match = "Cannot subset columns when using axis=1" + with pytest.raises(ValueError, match=match): + g[["A", "B"]].sum() + + +@pytest.mark.parametrize("func", ["sum", "any", "shift"]) +def test_groupby_column_index_name_lost(func): + # GH: 29764 groupby loses index sometimes + expected = Index(["a"], name="idx") + df = DataFrame([[1]], columns=expected) + df_grouped = df.groupby([1]) + result = getattr(df_grouped, func)().columns + tm.assert_index_equal(result, expected) + + +def test_groupby_duplicate_columns(): + # GH: 31735 + df = DataFrame( + {"A": ["f", "e", "g", "h"], "B": ["a", "b", "c", "d"], "C": [1, 2, 3, 4]} + ).astype(object) + df.columns = ["A", "B", "B"] + result = df.groupby([0, 0, 0, 0]).min() + expected = DataFrame([["e", "a", 1]], columns=["A", "B", "B"]) + tm.assert_frame_equal(result, expected) + + +def test_groupby_series_with_tuple_name(): + # GH 37755 + ser = Series([1, 2, 3, 4], index=[1, 1, 2, 2], name=("a", "a")) + ser.index.name = ("b", "b") + result = ser.groupby(level=0).last() + expected = Series([2, 4], index=[1, 2], name=("a", "a")) + expected.index.name = ("b", "b") + tm.assert_series_equal(result, expected) + + +@pytest.mark.xfail(not IS64, reason="GH#38778: fail on 32-bit system") +@pytest.mark.parametrize( + "func, values", [("sum", [97.0, 98.0]), ("mean", [24.25, 24.5])] +) +def test_groupby_numerical_stability_sum_mean(func, values): + # GH#38778 + data = [1e16, 1e16, 97, 98, -5e15, -5e15, -5e15, -5e15] + df = DataFrame({"group": [1, 2] * 4, "a": data, "b": data}) + result = getattr(df.groupby("group"), func)() + expected = DataFrame({"a": values, "b": values}, index=Index([1, 2], name="group")) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.xfail(not IS64, reason="GH#38778: fail on 32-bit system") +def test_groupby_numerical_stability_cumsum(): + # GH#38934 + data = [1e16, 1e16, 97, 98, -5e15, -5e15, -5e15, -5e15] + df = DataFrame({"group": [1, 2] * 4, "a": data, "b": data}) + result = df.groupby("group").cumsum() + exp_data = ( + [1e16] * 2 + [1e16 + 96, 1e16 + 98] + [5e15 + 97, 5e15 + 98] + [97.0, 98.0] + ) + expected = DataFrame({"a": exp_data, "b": exp_data}) + tm.assert_frame_equal(result, expected, check_exact=True) + + +def test_groupby_mean_duplicate_index(rand_series_with_duplicate_datetimeindex): + dups = rand_series_with_duplicate_datetimeindex + result = dups.groupby(level=0).mean() + expected = dups.groupby(dups.index).mean() + tm.assert_series_equal(result, expected) + + +def test_groupby_all_nan_groups_drop(): + # GH 15036 + s = Series([1, 2, 3], [np.nan, np.nan, np.nan]) + result = s.groupby(s.index).sum() + expected = Series([], index=Index([], dtype=np.float64), dtype=np.int64) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("numeric_only", [True, False]) +def test_groupby_empty_multi_column(as_index, numeric_only): + # GH 15106 & GH 41998 + df = DataFrame(data=[], columns=["A", "B", "C"]) + gb = df.groupby(["A", "B"], as_index=as_index) + result = gb.sum(numeric_only=numeric_only) + if as_index: + index = MultiIndex([[], []], [[], []], names=["A", "B"]) + columns = ["C"] if not numeric_only else [] + else: + index = RangeIndex(0) + columns = ["A", "B", "C"] if not numeric_only else ["A", "B"] + expected = DataFrame([], columns=columns, index=index) + tm.assert_frame_equal(result, expected) + + +def test_groupby_aggregation_non_numeric_dtype(): + # GH #43108 + df = DataFrame( + [["M", [1]], ["M", [1]], ["W", [10]], ["W", [20]]], columns=["MW", "v"] + ) + + expected = DataFrame( + { + "v": [[1, 1], [10, 20]], + }, + index=Index(["M", "W"], dtype="object", name="MW"), + ) + + gb = df.groupby(by=["MW"]) + result = gb.sum() + tm.assert_frame_equal(result, expected) + + +def test_groupby_aggregation_multi_non_numeric_dtype(): + # GH #42395 + df = DataFrame( + { + "x": [1, 0, 1, 1, 0], + "y": [Timedelta(i, "days") for i in range(1, 6)], + "z": [Timedelta(i * 10, "days") for i in range(1, 6)], + } + ) + + expected = DataFrame( + { + "y": [Timedelta(i, "days") for i in range(7, 9)], + "z": [Timedelta(i * 10, "days") for i in range(7, 9)], + }, + index=Index([0, 1], dtype="int64", name="x"), + ) + + gb = df.groupby(by=["x"]) + result = gb.sum() + tm.assert_frame_equal(result, expected) + + +def test_groupby_aggregation_numeric_with_non_numeric_dtype(): + # GH #43108 + df = DataFrame( + { + "x": [1, 0, 1, 1, 0], + "y": [Timedelta(i, "days") for i in range(1, 6)], + "z": list(range(1, 6)), + } + ) + + expected = DataFrame( + {"z": [7, 8]}, + index=Index([0, 1], dtype="int64", name="x"), + ) + + gb = df.groupby(by=["x"]) + result = gb.sum() + tm.assert_frame_equal(result, expected) + + +def test_groupby_filtered_df_std(): + # GH 16174 + dicts = [ + {"filter_col": False, "groupby_col": True, "bool_col": True, "float_col": 10.5}, + {"filter_col": True, "groupby_col": True, "bool_col": True, "float_col": 20.5}, + {"filter_col": True, "groupby_col": True, "bool_col": True, "float_col": 30.5}, + ] + df = DataFrame(dicts) + + df_filter = df[df["filter_col"] == True] # noqa:E712 + dfgb = df_filter.groupby("groupby_col") + result = dfgb.std() + expected = DataFrame( + [[0.0, 0.0, 7.071068]], + columns=["filter_col", "bool_col", "float_col"], + index=Index([True], name="groupby_col"), + ) + tm.assert_frame_equal(result, expected) + + +def test_datetime_categorical_multikey_groupby_indices(): + # GH 26859 + df = DataFrame( + { + "a": Series(list("abc")), + "b": Series( + to_datetime(["2018-01-01", "2018-02-01", "2018-03-01"]), + dtype="category", + ), + "c": Categorical.from_codes([-1, 0, 1], categories=[0, 1]), + } + ) + result = df.groupby(["a", "b"]).indices + expected = { + ("a", Timestamp("2018-01-01 00:00:00")): np.array([0]), + ("b", Timestamp("2018-02-01 00:00:00")): np.array([1]), + ("c", Timestamp("2018-03-01 00:00:00")): np.array([2]), + } + assert result == expected + + +def test_rolling_wrong_param_min_period(): + # GH34037 + name_l = ["Alice"] * 5 + ["Bob"] * 5 + val_l = [np.nan, np.nan, 1, 2, 3] + [np.nan, 1, 2, 3, 4] + test_df = DataFrame([name_l, val_l]).T + test_df.columns = ["name", "val"] + + result_error_msg = r"__init__\(\) got an unexpected keyword argument 'min_period'" + with pytest.raises(TypeError, match=result_error_msg): + test_df.groupby("name")["val"].rolling(window=2, min_period=1).sum() + + +def test_pad_backfill_deprecation(): + # GH 33396 + s = Series([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning, match="backfill"): + s.groupby(level=0).backfill() + with tm.assert_produces_warning(FutureWarning, match="pad"): + s.groupby(level=0).pad() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_groupby_dropna.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_groupby_dropna.py new file mode 100644 index 0000000..ab568e2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_groupby_dropna.py @@ -0,0 +1,372 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "dropna, tuples, outputs", + [ + ( + True, + [["A", "B"], ["B", "A"]], + {"c": [13.0, 123.23], "d": [13.0, 123.0], "e": [13.0, 1.0]}, + ), + ( + False, + [["A", "B"], ["A", np.nan], ["B", "A"]], + { + "c": [13.0, 12.3, 123.23], + "d": [13.0, 233.0, 123.0], + "e": [13.0, 12.0, 1.0], + }, + ), + ], +) +def test_groupby_dropna_multi_index_dataframe_nan_in_one_group( + dropna, tuples, outputs, nulls_fixture +): + # GH 3729 this is to test that NA is in one group + df_list = [ + ["A", "B", 12, 12, 12], + ["A", nulls_fixture, 12.3, 233.0, 12], + ["B", "A", 123.23, 123, 1], + ["A", "B", 1, 1, 1.0], + ] + df = pd.DataFrame(df_list, columns=["a", "b", "c", "d", "e"]) + grouped = df.groupby(["a", "b"], dropna=dropna).sum() + + mi = pd.MultiIndex.from_tuples(tuples, names=list("ab")) + + # Since right now, by default MI will drop NA from levels when we create MI + # via `from_*`, so we need to add NA for level manually afterwards. + if not dropna: + mi = mi.set_levels(["A", "B", np.nan], level="b") + expected = pd.DataFrame(outputs, index=mi) + + tm.assert_frame_equal(grouped, expected) + + +@pytest.mark.parametrize( + "dropna, tuples, outputs", + [ + ( + True, + [["A", "B"], ["B", "A"]], + {"c": [12.0, 123.23], "d": [12.0, 123.0], "e": [12.0, 1.0]}, + ), + ( + False, + [["A", "B"], ["A", np.nan], ["B", "A"], [np.nan, "B"]], + { + "c": [12.0, 13.3, 123.23, 1.0], + "d": [12.0, 234.0, 123.0, 1.0], + "e": [12.0, 13.0, 1.0, 1.0], + }, + ), + ], +) +def test_groupby_dropna_multi_index_dataframe_nan_in_two_groups( + dropna, tuples, outputs, nulls_fixture, nulls_fixture2 +): + # GH 3729 this is to test that NA in different groups with different representations + df_list = [ + ["A", "B", 12, 12, 12], + ["A", nulls_fixture, 12.3, 233.0, 12], + ["B", "A", 123.23, 123, 1], + [nulls_fixture2, "B", 1, 1, 1.0], + ["A", nulls_fixture2, 1, 1, 1.0], + ] + df = pd.DataFrame(df_list, columns=["a", "b", "c", "d", "e"]) + grouped = df.groupby(["a", "b"], dropna=dropna).sum() + + mi = pd.MultiIndex.from_tuples(tuples, names=list("ab")) + + # Since right now, by default MI will drop NA from levels when we create MI + # via `from_*`, so we need to add NA for level manually afterwards. + if not dropna: + mi = mi.set_levels([["A", "B", np.nan], ["A", "B", np.nan]]) + expected = pd.DataFrame(outputs, index=mi) + + tm.assert_frame_equal(grouped, expected) + + +@pytest.mark.parametrize( + "dropna, idx, outputs", + [ + (True, ["A", "B"], {"b": [123.23, 13.0], "c": [123.0, 13.0], "d": [1.0, 13.0]}), + ( + False, + ["A", "B", np.nan], + { + "b": [123.23, 13.0, 12.3], + "c": [123.0, 13.0, 233.0], + "d": [1.0, 13.0, 12.0], + }, + ), + ], +) +def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs): + # GH 3729 + df_list = [ + ["B", 12, 12, 12], + [None, 12.3, 233.0, 12], + ["A", 123.23, 123, 1], + ["B", 1, 1, 1.0], + ] + df = pd.DataFrame(df_list, columns=["a", "b", "c", "d"]) + grouped = df.groupby("a", dropna=dropna).sum() + + expected = pd.DataFrame(outputs, index=pd.Index(idx, dtype="object", name="a")) + + tm.assert_frame_equal(grouped, expected) + + +@pytest.mark.parametrize( + "dropna, idx, expected", + [ + (True, ["a", "a", "b", np.nan], pd.Series([3, 3], index=["a", "b"])), + ( + False, + ["a", "a", "b", np.nan], + pd.Series([3, 3, 3], index=["a", "b", np.nan]), + ), + ], +) +def test_groupby_dropna_series_level(dropna, idx, expected): + ser = pd.Series([1, 2, 3, 3], index=idx) + + result = ser.groupby(level=0, dropna=dropna).sum() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "dropna, expected", + [ + (True, pd.Series([210.0, 350.0], index=["a", "b"], name="Max Speed")), + ( + False, + pd.Series([210.0, 350.0, 20.0], index=["a", "b", np.nan], name="Max Speed"), + ), + ], +) +def test_groupby_dropna_series_by(dropna, expected): + ser = pd.Series( + [390.0, 350.0, 30.0, 20.0], + index=["Falcon", "Falcon", "Parrot", "Parrot"], + name="Max Speed", + ) + + result = ser.groupby(["a", "b", "a", np.nan], dropna=dropna).mean() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dropna", (False, True)) +def test_grouper_dropna_propagation(dropna): + # GH 36604 + df = pd.DataFrame({"A": [0, 0, 1, None], "B": [1, 2, 3, None]}) + gb = df.groupby("A", dropna=dropna) + assert gb.grouper.dropna == dropna + + +@pytest.mark.parametrize( + "dropna,input_index,expected_data,expected_index", + [ + (True, pd.RangeIndex(0, 4), {"B": [2, 2, 1]}, pd.RangeIndex(0, 3)), + (True, list("abcd"), {"B": [2, 2, 1]}, list("abc")), + ( + True, + pd.MultiIndex.from_tuples( + [(1, "R"), (1, "B"), (2, "R"), (2, "B")], names=["num", "col"] + ), + {"B": [2, 2, 1]}, + pd.MultiIndex.from_tuples( + [(1, "R"), (1, "B"), (2, "R")], names=["num", "col"] + ), + ), + (False, pd.RangeIndex(0, 4), {"B": [2, 2, 1, 1]}, pd.RangeIndex(0, 4)), + (False, list("abcd"), {"B": [2, 2, 1, 1]}, list("abcd")), + ( + False, + pd.MultiIndex.from_tuples( + [(1, "R"), (1, "B"), (2, "R"), (2, "B")], names=["num", "col"] + ), + {"B": [2, 2, 1, 1]}, + pd.MultiIndex.from_tuples( + [(1, "R"), (1, "B"), (2, "R"), (2, "B")], names=["num", "col"] + ), + ), + ], +) +def test_groupby_dataframe_slice_then_transform( + dropna, input_index, expected_data, expected_index +): + # GH35014 & GH35612 + + df = pd.DataFrame({"A": [0, 0, 1, None], "B": [1, 2, 3, None]}, index=input_index) + gb = df.groupby("A", dropna=dropna) + + result = gb.transform(len) + expected = pd.DataFrame(expected_data, index=expected_index) + tm.assert_frame_equal(result, expected) + + result = gb[["B"]].transform(len) + expected = pd.DataFrame(expected_data, index=expected_index) + tm.assert_frame_equal(result, expected) + + result = gb["B"].transform(len) + expected = pd.Series(expected_data["B"], index=expected_index, name="B") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "dropna, tuples, outputs", + [ + ( + True, + [["A", "B"], ["B", "A"]], + {"c": [13.0, 123.23], "d": [12.0, 123.0], "e": [1.0, 1.0]}, + ), + ( + False, + [["A", "B"], ["A", np.nan], ["B", "A"]], + { + "c": [13.0, 12.3, 123.23], + "d": [12.0, 233.0, 123.0], + "e": [1.0, 12.0, 1.0], + }, + ), + ], +) +def test_groupby_dropna_multi_index_dataframe_agg(dropna, tuples, outputs): + # GH 3729 + df_list = [ + ["A", "B", 12, 12, 12], + ["A", None, 12.3, 233.0, 12], + ["B", "A", 123.23, 123, 1], + ["A", "B", 1, 1, 1.0], + ] + df = pd.DataFrame(df_list, columns=["a", "b", "c", "d", "e"]) + agg_dict = {"c": sum, "d": max, "e": "min"} + grouped = df.groupby(["a", "b"], dropna=dropna).agg(agg_dict) + + mi = pd.MultiIndex.from_tuples(tuples, names=list("ab")) + + # Since right now, by default MI will drop NA from levels when we create MI + # via `from_*`, so we need to add NA for level manually afterwards. + if not dropna: + mi = mi.set_levels(["A", "B", np.nan], level="b") + expected = pd.DataFrame(outputs, index=mi) + + tm.assert_frame_equal(grouped, expected) + + +@pytest.mark.arm_slow +@pytest.mark.parametrize( + "datetime1, datetime2", + [ + (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-02-01")), + (pd.Timedelta("-2 days"), pd.Timedelta("-1 days")), + (pd.Period("2020-01-01"), pd.Period("2020-02-01")), + ], +) +@pytest.mark.parametrize("dropna, values", [(True, [12, 3]), (False, [12, 3, 6])]) +def test_groupby_dropna_datetime_like_data( + dropna, values, datetime1, datetime2, unique_nulls_fixture, unique_nulls_fixture2 +): + # 3729 + df = pd.DataFrame( + { + "values": [1, 2, 3, 4, 5, 6], + "dt": [ + datetime1, + unique_nulls_fixture, + datetime2, + unique_nulls_fixture2, + datetime1, + datetime1, + ], + } + ) + + if dropna: + indexes = [datetime1, datetime2] + else: + indexes = [datetime1, datetime2, np.nan] + + grouped = df.groupby("dt", dropna=dropna).agg({"values": sum}) + expected = pd.DataFrame({"values": values}, index=pd.Index(indexes, name="dt")) + + tm.assert_frame_equal(grouped, expected) + + +@pytest.mark.parametrize( + "dropna, data, selected_data, levels", + [ + pytest.param( + False, + {"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]}, + {"values": [0, 1, 0, 0]}, + ["a", "b", np.nan], + id="dropna_false_has_nan", + ), + pytest.param( + True, + {"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]}, + {"values": [0, 1, 0]}, + None, + id="dropna_true_has_nan", + ), + pytest.param( + # no nan in "groups"; dropna=True|False should be same. + False, + {"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]}, + {"values": [0, 1, 0, 0]}, + None, + id="dropna_false_no_nan", + ), + pytest.param( + # no nan in "groups"; dropna=True|False should be same. + True, + {"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]}, + {"values": [0, 1, 0, 0]}, + None, + id="dropna_true_no_nan", + ), + ], +) +def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data, levels): + # GH 35889 + + df = pd.DataFrame(data) + gb = df.groupby("groups", dropna=dropna) + result = gb.apply(lambda grp: pd.DataFrame({"values": range(len(grp))})) + + mi_tuples = tuple(zip(data["groups"], selected_data["values"])) + mi = pd.MultiIndex.from_tuples(mi_tuples, names=["groups", None]) + # Since right now, by default MI will drop NA from levels when we create MI + # via `from_*`, so we need to add NA for level manually afterwards. + if not dropna and levels: + mi = mi.set_levels(levels, level="groups") + + expected = pd.DataFrame(selected_data, index=mi) + tm.assert_frame_equal(result, expected) + + +def test_groupby_nan_included(): + # GH 35646 + data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]} + df = pd.DataFrame(data) + grouped = df.groupby("group", dropna=False) + result = grouped.indices + dtype = np.intp + expected = { + "g1": np.array([0, 2], dtype=dtype), + "g2": np.array([3], dtype=dtype), + np.nan: np.array([1, 4], dtype=dtype), + } + for result_values, expected_values in zip(result.values(), expected.values()): + tm.assert_numpy_array_equal(result_values, expected_values) + assert np.isnan(list(result.keys())[2]) + assert list(result.keys())[0:2] == ["g1", "g2"] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_groupby_shift_diff.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_groupby_shift_diff.py new file mode 100644 index 0000000..c989c0e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_groupby_shift_diff.py @@ -0,0 +1,133 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + NaT, + Series, + Timedelta, + Timestamp, +) +import pandas._testing as tm + + +def test_group_shift_with_null_key(): + # This test is designed to replicate the segfault in issue #13813. + n_rows = 1200 + + # Generate a moderately large dataframe with occasional missing + # values in column `B`, and then group by [`A`, `B`]. This should + # force `-1` in `labels` array of `g.grouper.group_info` exactly + # at those places, where the group-by key is partially missing. + df = DataFrame( + [(i % 12, i % 3 if i % 3 else np.nan, i) for i in range(n_rows)], + dtype=float, + columns=["A", "B", "Z"], + index=None, + ) + g = df.groupby(["A", "B"]) + + expected = DataFrame( + [(i + 12 if i % 3 and i < n_rows - 12 else np.nan) for i in range(n_rows)], + dtype=float, + columns=["Z"], + index=None, + ) + result = g.shift(-1) + + tm.assert_frame_equal(result, expected) + + +def test_group_shift_with_fill_value(): + # GH #24128 + n_rows = 24 + df = DataFrame( + [(i % 12, i % 3, i) for i in range(n_rows)], + dtype=float, + columns=["A", "B", "Z"], + index=None, + ) + g = df.groupby(["A", "B"]) + + expected = DataFrame( + [(i + 12 if i < n_rows - 12 else 0) for i in range(n_rows)], + dtype=float, + columns=["Z"], + index=None, + ) + result = g.shift(-1, fill_value=0) + + tm.assert_frame_equal(result, expected) + + +def test_group_shift_lose_timezone(): + # GH 30134 + now_dt = Timestamp.utcnow() + df = DataFrame({"a": [1, 1], "date": now_dt}) + result = df.groupby("a").shift(0).iloc[0] + expected = Series({"date": now_dt}, name=result.name) + tm.assert_series_equal(result, expected) + + +def test_group_diff_real(any_real_numpy_dtype): + df = DataFrame( + {"a": [1, 2, 3, 3, 2], "b": [1, 2, 3, 4, 5]}, + dtype=any_real_numpy_dtype, + ) + result = df.groupby("a")["b"].diff() + exp_dtype = "float" + if any_real_numpy_dtype in ["int8", "int16", "float32"]: + exp_dtype = "float32" + expected = Series([np.nan, np.nan, np.nan, 1.0, 3.0], dtype=exp_dtype, name="b") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [ + [ + Timestamp("2013-01-01"), + Timestamp("2013-01-02"), + Timestamp("2013-01-03"), + ], + [Timedelta("5 days"), Timedelta("6 days"), Timedelta("7 days")], + ], +) +def test_group_diff_datetimelike(data): + df = DataFrame({"a": [1, 2, 2], "b": data}) + result = df.groupby("a")["b"].diff() + expected = Series([NaT, NaT, Timedelta("1 days")], name="b") + tm.assert_series_equal(result, expected) + + +def test_group_diff_bool(): + df = DataFrame({"a": [1, 2, 3, 3, 2], "b": [True, True, False, False, True]}) + result = df.groupby("a")["b"].diff() + expected = Series([np.nan, np.nan, np.nan, False, False], name="b") + tm.assert_series_equal(result, expected) + + +def test_group_diff_object_raises(object_dtype): + df = DataFrame( + {"a": ["foo", "bar", "bar"], "b": ["baz", "foo", "foo"]}, dtype=object_dtype + ) + with pytest.raises(TypeError, match=r"unsupported operand type\(s\) for -"): + df.groupby("a")["b"].diff() + + +def test_empty_shift_with_fill(): + # GH 41264, single-index check + df = DataFrame(columns=["a", "b", "c"]) + shifted = df.groupby(["a"]).shift(1) + shifted_with_fill = df.groupby(["a"]).shift(1, fill_value=0) + tm.assert_frame_equal(shifted, shifted_with_fill) + tm.assert_index_equal(shifted.index, shifted_with_fill.index) + + +def test_multindex_empty_shift_with_fill(): + # GH 41264, multi-index check + df = DataFrame(columns=["a", "b", "c"]) + shifted = df.groupby(["a", "b"]).shift(1) + shifted_with_fill = df.groupby(["a", "b"]).shift(1, fill_value=0) + tm.assert_frame_equal(shifted, shifted_with_fill) + tm.assert_index_equal(shifted.index, shifted_with_fill.index) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_groupby_subclass.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_groupby_subclass.py new file mode 100644 index 0000000..6b1bc5f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_groupby_subclass.py @@ -0,0 +1,113 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm +from pandas.core.groupby.base import maybe_normalize_deprecated_kernels + + +@pytest.mark.parametrize( + "obj", + [ + tm.SubclassedDataFrame({"A": np.arange(0, 10)}), + tm.SubclassedSeries(np.arange(0, 10), name="A"), + ], +) +@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") +def test_groupby_preserves_subclass(obj, groupby_func): + # GH28330 -- preserve subclass through groupby operations + + if isinstance(obj, Series) and groupby_func in {"corrwith"}: + pytest.skip("Not applicable") + # TODO(2.0) Remove after pad/backfill deprecation enforced + groupby_func = maybe_normalize_deprecated_kernels(groupby_func) + grouped = obj.groupby(np.arange(0, 10)) + + # Groups should preserve subclass type + assert isinstance(grouped.get_group(0), type(obj)) + + args = [] + if groupby_func in {"fillna", "nth"}: + args.append(0) + elif groupby_func == "corrwith": + args.append(obj) + elif groupby_func == "tshift": + args.extend([0, 0]) + + result1 = getattr(grouped, groupby_func)(*args) + result2 = grouped.agg(groupby_func, *args) + + # Reduction or transformation kernels should preserve type + slices = {"ngroup", "cumcount", "size"} + if isinstance(obj, DataFrame) and groupby_func in slices: + assert isinstance(result1, obj._constructor_sliced) + else: + assert isinstance(result1, type(obj)) + + # Confirm .agg() groupby operations return same results + if isinstance(result1, DataFrame): + tm.assert_frame_equal(result1, result2) + else: + tm.assert_series_equal(result1, result2) + + +def test_groupby_preserves_metadata(): + # GH-37343 + custom_df = tm.SubclassedDataFrame({"a": [1, 2, 3], "b": [1, 1, 2], "c": [7, 8, 9]}) + assert "testattr" in custom_df._metadata + custom_df.testattr = "hello" + for _, group_df in custom_df.groupby("c"): + assert group_df.testattr == "hello" + + # GH-45314 + def func(group): + assert isinstance(group, tm.SubclassedDataFrame) + assert hasattr(group, "testattr") + return group.testattr + + result = custom_df.groupby("c").apply(func) + expected = tm.SubclassedSeries(["hello"] * 3, index=Index([7, 8, 9], name="c")) + tm.assert_series_equal(result, expected) + + def func2(group): + assert isinstance(group, tm.SubclassedSeries) + assert hasattr(group, "testattr") + return group.testattr + + custom_series = tm.SubclassedSeries([1, 2, 3]) + custom_series.testattr = "hello" + result = custom_series.groupby(custom_df["c"]).apply(func2) + tm.assert_series_equal(result, expected) + result = custom_series.groupby(custom_df["c"]).agg(func2) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("obj", [DataFrame, tm.SubclassedDataFrame]) +def test_groupby_resample_preserves_subclass(obj): + # GH28330 -- preserve subclass through groupby.resample() + + df = obj( + { + "Buyer": "Carl Carl Carl Carl Joe Carl".split(), + "Quantity": [18, 3, 5, 1, 9, 3], + "Date": [ + datetime(2013, 9, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 3, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 9, 2, 14, 0), + ], + } + ) + df = df.set_index("Date") + + # Confirm groupby.resample() preserves dataframe type + result = df.groupby("Buyer").resample("5D").sum() + assert isinstance(result, obj) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_grouping.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_grouping.py new file mode 100644 index 0000000..efb0b82 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_grouping.py @@ -0,0 +1,1015 @@ +""" test where we are determining what we are grouping, or getting groups """ + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, +) +from pandas.core.groupby.grouper import Grouping + +# selection +# -------------------------------- + + +class TestSelection: + def test_select_bad_cols(self): + df = DataFrame([[1, 2]], columns=["A", "B"]) + g = df.groupby("A") + with pytest.raises(KeyError, match="\"Columns not found: 'C'\""): + g[["C"]] + + with pytest.raises(KeyError, match="^[^A]+$"): + # A should not be referenced as a bad column... + # will have to rethink regex if you change message! + g[["A", "C"]] + + def test_groupby_duplicated_column_errormsg(self): + # GH7511 + df = DataFrame( + columns=["A", "B", "A", "C"], data=[range(4), range(2, 6), range(0, 8, 2)] + ) + + msg = "Grouper for 'A' not 1-dimensional" + with pytest.raises(ValueError, match=msg): + df.groupby("A") + with pytest.raises(ValueError, match=msg): + df.groupby(["A", "B"]) + + grouped = df.groupby("B") + c = grouped.count() + assert c.columns.nlevels == 1 + assert c.columns.size == 3 + + def test_column_select_via_attr(self, df): + result = df.groupby("A").C.sum() + expected = df.groupby("A")["C"].sum() + tm.assert_series_equal(result, expected) + + df["mean"] = 1.5 + result = df.groupby("A").mean() + expected = df.groupby("A").agg(np.mean) + tm.assert_frame_equal(result, expected) + + def test_getitem_list_of_columns(self): + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + "E": np.random.randn(8), + } + ) + + result = df.groupby("A")[["C", "D"]].mean() + result2 = df.groupby("A")[df.columns[2:4]].mean() + + expected = df.loc[:, ["A", "C", "D"]].groupby("A").mean() + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + def test_getitem_numeric_column_names(self): + # GH #13731 + df = DataFrame( + { + 0: list("abcd") * 2, + 2: np.random.randn(8), + 4: np.random.randn(8), + 6: np.random.randn(8), + } + ) + result = df.groupby(0)[df.columns[1:3]].mean() + result2 = df.groupby(0)[[2, 4]].mean() + + expected = df.loc[:, [0, 2, 4]].groupby(0).mean() + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + # per GH 23566 this should raise a FutureWarning + with tm.assert_produces_warning(FutureWarning): + df.groupby(0)[2, 4].mean() + + def test_getitem_single_list_of_columns(self, df): + # per GH 23566 this should raise a FutureWarning + with tm.assert_produces_warning(FutureWarning): + df.groupby("A")["C", "D"].mean() + + def test_getitem_single_column(self): + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + "E": np.random.randn(8), + } + ) + + result = df.groupby("A")["C"].mean() + + as_frame = df.loc[:, ["A", "C"]].groupby("A").mean() + as_series = as_frame.iloc[:, 0] + expected = as_series + + tm.assert_series_equal(result, expected) + + def test_indices_grouped_by_tuple_with_lambda(self): + # GH 36158 + df = DataFrame( + {"Tuples": ((x, y) for x in [0, 1] for y in np.random.randint(3, 5, 5))} + ) + + gb = df.groupby("Tuples") + gb_lambda = df.groupby(lambda x: df.iloc[x, 0]) + + expected = gb.indices + result = gb_lambda.indices + + tm.assert_dict_equal(result, expected) + + +# grouping +# -------------------------------- + + +class TestGrouping: + def test_grouper_index_types(self): + # related GH5375 + # groupby misbehaving when using a Floatlike index + df = DataFrame(np.arange(10).reshape(5, 2), columns=list("AB")) + for index in [ + tm.makeFloatIndex, + tm.makeStringIndex, + tm.makeUnicodeIndex, + tm.makeIntIndex, + tm.makeDateIndex, + tm.makePeriodIndex, + ]: + + df.index = index(len(df)) + df.groupby(list("abcde")).apply(lambda x: x) + + df.index = list(reversed(df.index.tolist())) + df.groupby(list("abcde")).apply(lambda x: x) + + def test_grouper_multilevel_freq(self): + + # GH 7885 + # with level and freq specified in a pd.Grouper + from datetime import ( + date, + timedelta, + ) + + d0 = date.today() - timedelta(days=14) + dates = date_range(d0, date.today()) + date_index = MultiIndex.from_product([dates, dates], names=["foo", "bar"]) + df = DataFrame(np.random.randint(0, 100, 225), index=date_index) + + # Check string level + expected = ( + df.reset_index() + .groupby([pd.Grouper(key="foo", freq="W"), pd.Grouper(key="bar", freq="W")]) + .sum() + ) + # reset index changes columns dtype to object + expected.columns = Index([0], dtype="int64") + + result = df.groupby( + [pd.Grouper(level="foo", freq="W"), pd.Grouper(level="bar", freq="W")] + ).sum() + tm.assert_frame_equal(result, expected) + + # Check integer level + result = df.groupby( + [pd.Grouper(level=0, freq="W"), pd.Grouper(level=1, freq="W")] + ).sum() + tm.assert_frame_equal(result, expected) + + def test_grouper_creation_bug(self): + + # GH 8795 + df = DataFrame({"A": [0, 0, 1, 1, 2, 2], "B": [1, 2, 3, 4, 5, 6]}) + g = df.groupby("A") + expected = g.sum() + + g = df.groupby(pd.Grouper(key="A")) + result = g.sum() + tm.assert_frame_equal(result, expected) + + g = df.groupby(pd.Grouper(key="A", axis=0)) + result = g.sum() + tm.assert_frame_equal(result, expected) + + result = g.apply(lambda x: x.sum()) + expected["A"] = [0, 2, 4] + expected = expected.loc[:, ["A", "B"]] + tm.assert_frame_equal(result, expected) + + # GH14334 + # pd.Grouper(key=...) may be passed in a list + df = DataFrame( + {"A": [0, 0, 0, 1, 1, 1], "B": [1, 1, 2, 2, 3, 3], "C": [1, 2, 3, 4, 5, 6]} + ) + # Group by single column + expected = df.groupby("A").sum() + g = df.groupby([pd.Grouper(key="A")]) + result = g.sum() + tm.assert_frame_equal(result, expected) + + # Group by two columns + # using a combination of strings and Grouper objects + expected = df.groupby(["A", "B"]).sum() + + # Group with two Grouper objects + g = df.groupby([pd.Grouper(key="A"), pd.Grouper(key="B")]) + result = g.sum() + tm.assert_frame_equal(result, expected) + + # Group with a string and a Grouper object + g = df.groupby(["A", pd.Grouper(key="B")]) + result = g.sum() + tm.assert_frame_equal(result, expected) + + # Group with a Grouper object and a string + g = df.groupby([pd.Grouper(key="A"), "B"]) + result = g.sum() + tm.assert_frame_equal(result, expected) + + # GH8866 + s = Series( + np.arange(8, dtype="int64"), + index=MultiIndex.from_product( + [list("ab"), range(2), date_range("20130101", periods=2)], + names=["one", "two", "three"], + ), + ) + result = s.groupby(pd.Grouper(level="three", freq="M")).sum() + expected = Series( + [28], + index=pd.DatetimeIndex([Timestamp("2013-01-31")], freq="M", name="three"), + ) + tm.assert_series_equal(result, expected) + + # just specifying a level breaks + result = s.groupby(pd.Grouper(level="one")).sum() + expected = s.groupby(level="one").sum() + tm.assert_series_equal(result, expected) + + def test_grouper_column_and_index(self): + # GH 14327 + + # Grouping a multi-index frame by a column and an index level should + # be equivalent to resetting the index and grouping by two columns + idx = MultiIndex.from_tuples( + [("a", 1), ("a", 2), ("a", 3), ("b", 1), ("b", 2), ("b", 3)] + ) + idx.names = ["outer", "inner"] + df_multi = DataFrame( + {"A": np.arange(6), "B": ["one", "one", "two", "two", "one", "one"]}, + index=idx, + ) + result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean() + expected = df_multi.reset_index().groupby(["B", "inner"]).mean() + tm.assert_frame_equal(result, expected) + + # Test the reverse grouping order + result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean() + expected = df_multi.reset_index().groupby(["inner", "B"]).mean() + tm.assert_frame_equal(result, expected) + + # Grouping a single-index frame by a column and the index should + # be equivalent to resetting the index and grouping by two columns + df_single = df_multi.reset_index("outer") + result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean() + expected = df_single.reset_index().groupby(["B", "inner"]).mean() + tm.assert_frame_equal(result, expected) + + # Test the reverse grouping order + result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean() + expected = df_single.reset_index().groupby(["inner", "B"]).mean() + tm.assert_frame_equal(result, expected) + + def test_groupby_levels_and_columns(self): + # GH9344, GH9049 + idx_names = ["x", "y"] + idx = MultiIndex.from_tuples([(1, 1), (1, 2), (3, 4), (5, 6)], names=idx_names) + df = DataFrame(np.arange(12).reshape(-1, 3), index=idx) + + by_levels = df.groupby(level=idx_names).mean() + # reset_index changes columns dtype to object + by_columns = df.reset_index().groupby(idx_names).mean() + + # without casting, by_columns.columns is object-dtype + by_columns.columns = by_columns.columns.astype(np.int64) + tm.assert_frame_equal(by_levels, by_columns) + + def test_groupby_categorical_index_and_columns(self, observed): + # GH18432, adapted for GH25871 + columns = ["A", "B", "A", "B"] + categories = ["B", "A"] + data = np.array( + [[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2]], int + ) + cat_columns = CategoricalIndex(columns, categories=categories, ordered=True) + df = DataFrame(data=data, columns=cat_columns) + result = df.groupby(axis=1, level=0, observed=observed).sum() + expected_data = np.array([[4, 2], [4, 2], [4, 2], [4, 2], [4, 2]], int) + expected_columns = CategoricalIndex( + categories, categories=categories, ordered=True + ) + expected = DataFrame(data=expected_data, columns=expected_columns) + tm.assert_frame_equal(result, expected) + + # test transposed version + df = DataFrame(data.T, index=cat_columns) + result = df.groupby(axis=0, level=0, observed=observed).sum() + expected = DataFrame(data=expected_data.T, index=expected_columns) + tm.assert_frame_equal(result, expected) + + def test_grouper_getting_correct_binner(self): + + # GH 10063 + # using a non-time-based grouper and a time-based grouper + # and specifying levels + df = DataFrame( + {"A": 1}, + index=MultiIndex.from_product( + [list("ab"), date_range("20130101", periods=80)], names=["one", "two"] + ), + ) + result = df.groupby( + [pd.Grouper(level="one"), pd.Grouper(level="two", freq="M")] + ).sum() + expected = DataFrame( + {"A": [31, 28, 21, 31, 28, 21]}, + index=MultiIndex.from_product( + [list("ab"), date_range("20130101", freq="M", periods=3)], + names=["one", "two"], + ), + ) + tm.assert_frame_equal(result, expected) + + def test_grouper_iter(self, df): + assert sorted(df.groupby("A").grouper) == ["bar", "foo"] + + def test_empty_groups(self, df): + # see gh-1048 + with pytest.raises(ValueError, match="No group keys passed!"): + df.groupby([]) + + def test_groupby_grouper(self, df): + grouped = df.groupby("A") + + result = df.groupby(grouped.grouper).mean() + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + def test_groupby_dict_mapping(self): + # GH #679 + from pandas import Series + + s = Series({"T1": 5}) + result = s.groupby({"T1": "T2"}).agg(sum) + expected = s.groupby(["T2"]).agg(sum) + tm.assert_series_equal(result, expected) + + s = Series([1.0, 2.0, 3.0, 4.0], index=list("abcd")) + mapping = {"a": 0, "b": 0, "c": 1, "d": 1} + + result = s.groupby(mapping).mean() + result2 = s.groupby(mapping).agg(np.mean) + expected = s.groupby([0, 0, 1, 1]).mean() + expected2 = s.groupby([0, 0, 1, 1]).mean() + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, result2) + tm.assert_series_equal(result, expected2) + + @pytest.mark.parametrize( + "index", + [ + [0, 1, 2, 3], + ["a", "b", "c", "d"], + [Timestamp(2021, 7, 28 + i) for i in range(4)], + ], + ) + def test_groupby_series_named_with_tuple(self, frame_or_series, index): + # GH 42731 + obj = frame_or_series([1, 2, 3, 4], index=index) + groups = Series([1, 0, 1, 0], index=index, name=("a", "a")) + result = obj.groupby(groups).last() + expected = frame_or_series([4, 3]) + expected.index.name = ("a", "a") + tm.assert_equal(result, expected) + + def test_groupby_grouper_f_sanity_checked(self): + dates = date_range("01-Jan-2013", periods=12, freq="MS") + ts = Series(np.random.randn(12), index=dates) + + # GH3035 + # index.map is used to apply grouper to the index + # if it fails on the elements, map tries it on the entire index as + # a sequence. That can yield invalid results that cause trouble + # down the line. + # the surprise comes from using key[0:6] rather than str(key)[0:6] + # when the elements are Timestamp. + # the result is Index[0:6], very confusing. + + msg = r"Grouper result violates len\(labels\) == len\(data\)" + with pytest.raises(AssertionError, match=msg): + ts.groupby(lambda key: key[0:6]) + + def test_grouping_error_on_multidim_input(self, df): + msg = "Grouper for '' not 1-dimensional" + with pytest.raises(ValueError, match=msg): + Grouping(df.index, df[["A", "A"]]) + + def test_multiindex_passthru(self): + + # GH 7997 + # regression from 0.14.1 + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + df.columns = MultiIndex.from_tuples([(0, 1), (1, 1), (2, 1)]) + + result = df.groupby(axis=1, level=[0, 1]).first() + tm.assert_frame_equal(result, df) + + def test_multiindex_negative_level(self, mframe): + # GH 13901 + result = mframe.groupby(level=-1).sum() + expected = mframe.groupby(level="second").sum() + tm.assert_frame_equal(result, expected) + + result = mframe.groupby(level=-2).sum() + expected = mframe.groupby(level="first").sum() + tm.assert_frame_equal(result, expected) + + result = mframe.groupby(level=[-2, -1]).sum() + expected = mframe + tm.assert_frame_equal(result, expected) + + result = mframe.groupby(level=[-1, "first"]).sum() + expected = mframe.groupby(level=["second", "first"]).sum() + tm.assert_frame_equal(result, expected) + + def test_multifunc_select_col_integer_cols(self, df): + df.columns = np.arange(len(df.columns)) + + # it works! + df.groupby(1, as_index=False)[2].agg({"Q": np.mean}) + + def test_multiindex_columns_empty_level(self): + lst = [["count", "values"], ["to filter", ""]] + midx = MultiIndex.from_tuples(lst) + + df = DataFrame([[1, "A"]], columns=midx) + + grouped = df.groupby("to filter").groups + assert grouped["A"] == [0] + + grouped = df.groupby([("to filter", "")]).groups + assert grouped["A"] == [0] + + df = DataFrame([[1, "A"], [2, "B"]], columns=midx) + + expected = df.groupby("to filter").groups + result = df.groupby([("to filter", "")]).groups + assert result == expected + + df = DataFrame([[1, "A"], [2, "A"]], columns=midx) + + expected = df.groupby("to filter").groups + result = df.groupby([("to filter", "")]).groups + tm.assert_dict_equal(result, expected) + + def test_groupby_multiindex_tuple(self): + # GH 17979 + df = DataFrame( + [[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]], + columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]), + ) + expected = df.groupby([("b", 1)]).groups + result = df.groupby(("b", 1)).groups + tm.assert_dict_equal(expected, result) + + df2 = DataFrame( + df.values, + columns=MultiIndex.from_arrays( + [["a", "b", "b", "c"], ["d", "d", "e", "e"]] + ), + ) + expected = df2.groupby([("b", "d")]).groups + result = df.groupby(("b", 1)).groups + tm.assert_dict_equal(expected, result) + + df3 = DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"]) + expected = df3.groupby([("b", "d")]).groups + result = df.groupby(("b", 1)).groups + tm.assert_dict_equal(expected, result) + + @pytest.mark.parametrize("sort", [True, False]) + def test_groupby_level(self, sort, mframe, df): + # GH 17537 + frame = mframe + deleveled = frame.reset_index() + + result0 = frame.groupby(level=0, sort=sort).sum() + result1 = frame.groupby(level=1, sort=sort).sum() + + expected0 = frame.groupby(deleveled["first"].values, sort=sort).sum() + expected1 = frame.groupby(deleveled["second"].values, sort=sort).sum() + + expected0.index.name = "first" + expected1.index.name = "second" + + assert result0.index.name == "first" + assert result1.index.name == "second" + + tm.assert_frame_equal(result0, expected0) + tm.assert_frame_equal(result1, expected1) + assert result0.index.name == frame.index.names[0] + assert result1.index.name == frame.index.names[1] + + # groupby level name + result0 = frame.groupby(level="first", sort=sort).sum() + result1 = frame.groupby(level="second", sort=sort).sum() + tm.assert_frame_equal(result0, expected0) + tm.assert_frame_equal(result1, expected1) + + # axis=1 + + result0 = frame.T.groupby(level=0, axis=1, sort=sort).sum() + result1 = frame.T.groupby(level=1, axis=1, sort=sort).sum() + tm.assert_frame_equal(result0, expected0.T) + tm.assert_frame_equal(result1, expected1.T) + + # raise exception for non-MultiIndex + msg = "level > 0 or level < -1 only valid with MultiIndex" + with pytest.raises(ValueError, match=msg): + df.groupby(level=1) + + def test_groupby_level_index_names(self, axis): + # GH4014 this used to raise ValueError since 'exp'>1 (in py2) + df = DataFrame({"exp": ["A"] * 3 + ["B"] * 3, "var1": range(6)}).set_index( + "exp" + ) + if axis in (1, "columns"): + df = df.T + df.groupby(level="exp", axis=axis) + msg = f"level name foo is not the name of the {df._get_axis_name(axis)}" + with pytest.raises(ValueError, match=msg): + df.groupby(level="foo", axis=axis) + + @pytest.mark.parametrize("sort", [True, False]) + def test_groupby_level_with_nas(self, sort): + # GH 17537 + index = MultiIndex( + levels=[[1, 0], [0, 1, 2, 3]], + codes=[[1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]], + ) + + # factorizing doesn't confuse things + s = Series(np.arange(8.0), index=index) + result = s.groupby(level=0, sort=sort).sum() + expected = Series([6.0, 22.0], index=[0, 1]) + tm.assert_series_equal(result, expected) + + index = MultiIndex( + levels=[[1, 0], [0, 1, 2, 3]], + codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]], + ) + + # factorizing doesn't confuse things + s = Series(np.arange(8.0), index=index) + result = s.groupby(level=0, sort=sort).sum() + expected = Series([6.0, 18.0], index=[0.0, 1.0]) + tm.assert_series_equal(result, expected) + + def test_groupby_args(self, mframe): + # PR8618 and issue 8015 + frame = mframe + + msg = "You have to supply one of 'by' and 'level'" + with pytest.raises(TypeError, match=msg): + frame.groupby() + + msg = "You have to supply one of 'by' and 'level'" + with pytest.raises(TypeError, match=msg): + frame.groupby(by=None, level=None) + + @pytest.mark.parametrize( + "sort,labels", + [ + [True, [2, 2, 2, 0, 0, 1, 1, 3, 3, 3]], + [False, [0, 0, 0, 1, 1, 2, 2, 3, 3, 3]], + ], + ) + def test_level_preserve_order(self, sort, labels, mframe): + # GH 17537 + grouped = mframe.groupby(level=0, sort=sort) + exp_labels = np.array(labels, np.intp) + tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels) + + def test_grouping_labels(self, mframe): + grouped = mframe.groupby(mframe.index.get_level_values(0)) + exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp) + tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels) + + def test_list_grouper_with_nat(self): + # GH 14715 + df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")}) + df.iloc[-1] = pd.NaT + grouper = pd.Grouper(key="date", freq="AS") + + # Grouper in a list grouping + result = df.groupby([grouper]) + expected = {Timestamp("2011-01-01"): Index(list(range(364)))} + tm.assert_dict_equal(result.groups, expected) + + # Test case without a list + result = df.groupby(grouper) + expected = {Timestamp("2011-01-01"): 365} + tm.assert_dict_equal(result.groups, expected) + + @pytest.mark.parametrize( + "func,expected", + [ + ( + "transform", + Series(name=2, dtype=np.float64, index=Index([])), + ), + ( + "agg", + Series(name=2, dtype=np.float64, index=Float64Index([], name=1)), + ), + ( + "apply", + Series(name=2, dtype=np.float64, index=Float64Index([], name=1)), + ), + ], + ) + def test_evaluate_with_empty_groups(self, func, expected): + # 26208 + # test transform'ing empty groups + # (not testing other agg fns, because they return + # different index objects. + df = DataFrame({1: [], 2: []}) + g = df.groupby(1) + result = getattr(g[2], func)(lambda x: x) + tm.assert_series_equal(result, expected) + + def test_groupby_empty(self): + # https://github.com/pandas-dev/pandas/issues/27190 + s = Series([], name="name", dtype="float64") + gr = s.groupby([]) + + result = gr.mean() + tm.assert_series_equal(result, s) + + # check group properties + assert len(gr.grouper.groupings) == 1 + tm.assert_numpy_array_equal( + gr.grouper.group_info[0], np.array([], dtype=np.dtype(np.intp)) + ) + + tm.assert_numpy_array_equal( + gr.grouper.group_info[1], np.array([], dtype=np.dtype(np.intp)) + ) + + assert gr.grouper.group_info[2] == 0 + + # check name + assert s.groupby(s).grouper.names == ["name"] + + def test_groupby_level_index_value_all_na(self): + # issue 20519 + df = DataFrame( + [["x", np.nan, 10], [None, np.nan, 20]], columns=["A", "B", "C"] + ).set_index(["A", "B"]) + result = df.groupby(level=["A", "B"]).sum() + expected = DataFrame( + data=[], + index=MultiIndex( + levels=[Index(["x"], dtype="object"), Index([], dtype="float64")], + codes=[[], []], + names=["A", "B"], + ), + columns=["C"], + dtype="int64", + ) + tm.assert_frame_equal(result, expected) + + def test_groupby_multiindex_level_empty(self): + # https://github.com/pandas-dev/pandas/issues/31670 + df = DataFrame( + [[123, "a", 1.0], [123, "b", 2.0]], columns=["id", "category", "value"] + ) + df = df.set_index(["id", "category"]) + empty = df[df.value < 0] + result = empty.groupby("id").sum() + expected = DataFrame( + dtype="float64", columns=["value"], index=Int64Index([], name="id") + ) + tm.assert_frame_equal(result, expected) + + +# get_group +# -------------------------------- + + +class TestGetGroup: + def test_get_group(self): + # GH 5267 + # be datelike friendly + df = DataFrame( + { + "DATE": pd.to_datetime( + [ + "10-Oct-2013", + "10-Oct-2013", + "10-Oct-2013", + "11-Oct-2013", + "11-Oct-2013", + "11-Oct-2013", + ] + ), + "label": ["foo", "foo", "bar", "foo", "foo", "bar"], + "VAL": [1, 2, 3, 4, 5, 6], + } + ) + + g = df.groupby("DATE") + key = list(g.groups)[0] + result1 = g.get_group(key) + result2 = g.get_group(Timestamp(key).to_pydatetime()) + result3 = g.get_group(str(Timestamp(key))) + tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result3) + + g = df.groupby(["DATE", "label"]) + + key = list(g.groups)[0] + result1 = g.get_group(key) + result2 = g.get_group((Timestamp(key[0]).to_pydatetime(), key[1])) + result3 = g.get_group((str(Timestamp(key[0])), key[1])) + tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result3) + + # must pass a same-length tuple with multiple keys + msg = "must supply a tuple to get_group with multiple grouping keys" + with pytest.raises(ValueError, match=msg): + g.get_group("foo") + with pytest.raises(ValueError, match=msg): + g.get_group("foo") + msg = "must supply a same-length tuple to get_group with multiple grouping keys" + with pytest.raises(ValueError, match=msg): + g.get_group(("foo", "bar", "baz")) + + def test_get_group_empty_bins(self, observed): + + d = DataFrame([3, 1, 7, 6]) + bins = [0, 5, 10, 15] + g = d.groupby(pd.cut(d[0], bins), observed=observed) + + # TODO: should prob allow a str of Interval work as well + # IOW '(0, 5]' + result = g.get_group(pd.Interval(0, 5)) + expected = DataFrame([3, 1], index=[0, 1]) + tm.assert_frame_equal(result, expected) + + msg = r"Interval\(10, 15, closed='right'\)" + with pytest.raises(KeyError, match=msg): + g.get_group(pd.Interval(10, 15)) + + def test_get_group_grouped_by_tuple(self): + # GH 8121 + df = DataFrame([[(1,), (1, 2), (1,), (1, 2)]], index=["ids"]).T + gr = df.groupby("ids") + expected = DataFrame({"ids": [(1,), (1,)]}, index=[0, 2]) + result = gr.get_group((1,)) + tm.assert_frame_equal(result, expected) + + dt = pd.to_datetime(["2010-01-01", "2010-01-02", "2010-01-01", "2010-01-02"]) + df = DataFrame({"ids": [(x,) for x in dt]}) + gr = df.groupby("ids") + result = gr.get_group(("2010-01-01",)) + expected = DataFrame({"ids": [(dt[0],), (dt[0],)]}, index=[0, 2]) + tm.assert_frame_equal(result, expected) + + def test_get_group_grouped_by_tuple_with_lambda(self): + # GH 36158 + df = DataFrame( + {"Tuples": ((x, y) for x in [0, 1] for y in np.random.randint(3, 5, 5))} + ) + + gb = df.groupby("Tuples") + gb_lambda = df.groupby(lambda x: df.iloc[x, 0]) + + expected = gb.get_group(list(gb.groups.keys())[0]) + result = gb_lambda.get_group(list(gb_lambda.groups.keys())[0]) + + tm.assert_frame_equal(result, expected) + + def test_groupby_with_empty(self): + index = pd.DatetimeIndex(()) + data = () + series = Series(data, index, dtype=object) + grouper = pd.Grouper(freq="D") + grouped = series.groupby(grouper) + assert next(iter(grouped), None) is None + + def test_groupby_with_single_column(self): + df = DataFrame({"a": list("abssbab")}) + tm.assert_frame_equal(df.groupby("a").get_group("a"), df.iloc[[0, 5]]) + # GH 13530 + exp = DataFrame(index=Index(["a", "b", "s"], name="a")) + tm.assert_frame_equal(df.groupby("a").count(), exp) + tm.assert_frame_equal(df.groupby("a").sum(), exp) + tm.assert_frame_equal(df.groupby("a").nth(1), exp) + + def test_gb_key_len_equal_axis_len(self): + # GH16843 + # test ensures that index and column keys are recognized correctly + # when number of keys equals axis length of groupby + df = DataFrame( + [["foo", "bar", "B", 1], ["foo", "bar", "B", 2], ["foo", "baz", "C", 3]], + columns=["first", "second", "third", "one"], + ) + df = df.set_index(["first", "second"]) + df = df.groupby(["first", "second", "third"]).size() + assert df.loc[("foo", "bar", "B")] == 2 + assert df.loc[("foo", "baz", "C")] == 1 + + +# groups & iteration +# -------------------------------- + + +class TestIteration: + def test_groups(self, df): + grouped = df.groupby(["A"]) + groups = grouped.groups + assert groups is grouped.groups # caching works + + for k, v in grouped.groups.items(): + assert (df.loc[v]["A"] == k).all() + + grouped = df.groupby(["A", "B"]) + groups = grouped.groups + assert groups is grouped.groups # caching works + + for k, v in grouped.groups.items(): + assert (df.loc[v]["A"] == k[0]).all() + assert (df.loc[v]["B"] == k[1]).all() + + def test_grouping_is_iterable(self, tsframe): + # this code path isn't used anywhere else + # not sure it's useful + grouped = tsframe.groupby([lambda x: x.weekday(), lambda x: x.year]) + + # test it works + for g in grouped.grouper.groupings[0]: + pass + + def test_multi_iter(self): + s = Series(np.arange(6)) + k1 = np.array(["a", "a", "a", "b", "b", "b"]) + k2 = np.array(["1", "2", "1", "2", "1", "2"]) + + grouped = s.groupby([k1, k2]) + + iterated = list(grouped) + expected = [ + ("a", "1", s[[0, 2]]), + ("a", "2", s[[1]]), + ("b", "1", s[[4]]), + ("b", "2", s[[3, 5]]), + ] + for i, ((one, two), three) in enumerate(iterated): + e1, e2, e3 = expected[i] + assert e1 == one + assert e2 == two + tm.assert_series_equal(three, e3) + + def test_multi_iter_frame(self, three_group): + k1 = np.array(["b", "b", "b", "a", "a", "a"]) + k2 = np.array(["1", "2", "1", "2", "1", "2"]) + df = DataFrame( + {"v1": np.random.randn(6), "v2": np.random.randn(6), "k1": k1, "k2": k2}, + index=["one", "two", "three", "four", "five", "six"], + ) + + grouped = df.groupby(["k1", "k2"]) + + # things get sorted! + iterated = list(grouped) + idx = df.index + expected = [ + ("a", "1", df.loc[idx[[4]]]), + ("a", "2", df.loc[idx[[3, 5]]]), + ("b", "1", df.loc[idx[[0, 2]]]), + ("b", "2", df.loc[idx[[1]]]), + ] + for i, ((one, two), three) in enumerate(iterated): + e1, e2, e3 = expected[i] + assert e1 == one + assert e2 == two + tm.assert_frame_equal(three, e3) + + # don't iterate through groups with no data + df["k1"] = np.array(["b", "b", "b", "a", "a", "a"]) + df["k2"] = np.array(["1", "1", "1", "2", "2", "2"]) + grouped = df.groupby(["k1", "k2"]) + groups = {key: gp for key, gp in grouped} + assert len(groups) == 2 + + # axis = 1 + three_levels = three_group.groupby(["A", "B", "C"]).mean() + grouped = three_levels.T.groupby(axis=1, level=(1, 2)) + for key, group in grouped: + pass + + def test_dictify(self, df): + dict(iter(df.groupby("A"))) + dict(iter(df.groupby(["A", "B"]))) + dict(iter(df["C"].groupby(df["A"]))) + dict(iter(df["C"].groupby([df["A"], df["B"]]))) + dict(iter(df.groupby("A")["C"])) + dict(iter(df.groupby(["A", "B"])["C"])) + + def test_groupby_with_small_elem(self): + # GH 8542 + # length=2 + df = DataFrame( + {"event": ["start", "start"], "change": [1234, 5678]}, + index=pd.DatetimeIndex(["2014-09-10", "2013-10-10"]), + ) + grouped = df.groupby([pd.Grouper(freq="M"), "event"]) + assert len(grouped.groups) == 2 + assert grouped.ngroups == 2 + assert (Timestamp("2014-09-30"), "start") in grouped.groups + assert (Timestamp("2013-10-31"), "start") in grouped.groups + + res = grouped.get_group((Timestamp("2014-09-30"), "start")) + tm.assert_frame_equal(res, df.iloc[[0], :]) + res = grouped.get_group((Timestamp("2013-10-31"), "start")) + tm.assert_frame_equal(res, df.iloc[[1], :]) + + df = DataFrame( + {"event": ["start", "start", "start"], "change": [1234, 5678, 9123]}, + index=pd.DatetimeIndex(["2014-09-10", "2013-10-10", "2014-09-15"]), + ) + grouped = df.groupby([pd.Grouper(freq="M"), "event"]) + assert len(grouped.groups) == 2 + assert grouped.ngroups == 2 + assert (Timestamp("2014-09-30"), "start") in grouped.groups + assert (Timestamp("2013-10-31"), "start") in grouped.groups + + res = grouped.get_group((Timestamp("2014-09-30"), "start")) + tm.assert_frame_equal(res, df.iloc[[0, 2], :]) + res = grouped.get_group((Timestamp("2013-10-31"), "start")) + tm.assert_frame_equal(res, df.iloc[[1], :]) + + # length=3 + df = DataFrame( + {"event": ["start", "start", "start"], "change": [1234, 5678, 9123]}, + index=pd.DatetimeIndex(["2014-09-10", "2013-10-10", "2014-08-05"]), + ) + grouped = df.groupby([pd.Grouper(freq="M"), "event"]) + assert len(grouped.groups) == 3 + assert grouped.ngroups == 3 + assert (Timestamp("2014-09-30"), "start") in grouped.groups + assert (Timestamp("2013-10-31"), "start") in grouped.groups + assert (Timestamp("2014-08-31"), "start") in grouped.groups + + res = grouped.get_group((Timestamp("2014-09-30"), "start")) + tm.assert_frame_equal(res, df.iloc[[0], :]) + res = grouped.get_group((Timestamp("2013-10-31"), "start")) + tm.assert_frame_equal(res, df.iloc[[1], :]) + res = grouped.get_group((Timestamp("2014-08-31"), "start")) + tm.assert_frame_equal(res, df.iloc[[2], :]) + + def test_grouping_string_repr(self): + # GH 13394 + mi = MultiIndex.from_arrays([list("AAB"), list("aba")]) + df = DataFrame([[1, 2, 3]], columns=mi) + gr = df.groupby(df[("A", "a")]) + + result = gr.grouper.groupings[0].__repr__() + expected = "Grouping(('A', 'a'))" + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_index_as_string.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_index_as_string.py new file mode 100644 index 0000000..971a447 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_index_as_string.py @@ -0,0 +1,82 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.fixture(params=[["inner"], ["inner", "outer"]]) +def frame(request): + levels = request.param + df = pd.DataFrame( + { + "outer": ["a", "a", "a", "b", "b", "b"], + "inner": [1, 2, 3, 1, 2, 3], + "A": np.arange(6), + "B": ["one", "one", "two", "two", "one", "one"], + } + ) + if levels: + df = df.set_index(levels) + + return df + + +@pytest.fixture() +def series(): + df = pd.DataFrame( + { + "outer": ["a", "a", "a", "b", "b", "b"], + "inner": [1, 2, 3, 1, 2, 3], + "A": np.arange(6), + "B": ["one", "one", "two", "two", "one", "one"], + } + ) + s = df.set_index(["outer", "inner", "B"])["A"] + + return s + + +@pytest.mark.parametrize( + "key_strs,groupers", + [ + ("inner", pd.Grouper(level="inner")), # Index name + (["inner"], [pd.Grouper(level="inner")]), # List of index name + (["B", "inner"], ["B", pd.Grouper(level="inner")]), # Column and index + (["inner", "B"], [pd.Grouper(level="inner"), "B"]), # Index and column + ], +) +def test_grouper_index_level_as_string(frame, key_strs, groupers): + result = frame.groupby(key_strs).mean() + expected = frame.groupby(groupers).mean() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "levels", + [ + "inner", + "outer", + "B", + ["inner"], + ["outer"], + ["B"], + ["inner", "outer"], + ["outer", "inner"], + ["inner", "outer", "B"], + ["B", "outer", "inner"], + ], +) +def test_grouper_index_level_as_string_series(series, levels): + + # Compute expected result + if isinstance(levels, list): + groupers = [pd.Grouper(level=lv) for lv in levels] + else: + groupers = pd.Grouper(level=levels) + + expected = series.groupby(groupers).mean() + + # Compute and check result + result = series.groupby(levels).mean() + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_indexing.py new file mode 100644 index 0000000..0caa17f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_indexing.py @@ -0,0 +1,316 @@ +# Test GroupBy._positional_selector positional grouped indexing GH#42864 + +import random + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "arg, expected_rows", + [ + [0, [0, 1, 4]], + [2, [5]], + [5, []], + [-1, [3, 4, 7]], + [-2, [1, 6]], + [-6, []], + ], +) +def test_int(slice_test_df, slice_test_grouped, arg, expected_rows): + # Test single integer + result = slice_test_grouped._positional_selector[arg] + expected = slice_test_df.iloc[expected_rows] + + tm.assert_frame_equal(result, expected) + + +def test_slice(slice_test_df, slice_test_grouped): + # Test single slice + result = slice_test_grouped._positional_selector[0:3:2] + expected = slice_test_df.iloc[[0, 1, 4, 5]] + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "arg, expected_rows", + [ + [[0, 2], [0, 1, 4, 5]], + [[0, 2, -1], [0, 1, 3, 4, 5, 7]], + [range(0, 3, 2), [0, 1, 4, 5]], + [{0, 2}, [0, 1, 4, 5]], + ], + ids=[ + "list", + "negative", + "range", + "set", + ], +) +def test_list(slice_test_df, slice_test_grouped, arg, expected_rows): + # Test lists of integers and integer valued iterables + result = slice_test_grouped._positional_selector[arg] + expected = slice_test_df.iloc[expected_rows] + + tm.assert_frame_equal(result, expected) + + +def test_ints(slice_test_df, slice_test_grouped): + # Test tuple of ints + result = slice_test_grouped._positional_selector[0, 2, -1] + expected = slice_test_df.iloc[[0, 1, 3, 4, 5, 7]] + + tm.assert_frame_equal(result, expected) + + +def test_slices(slice_test_df, slice_test_grouped): + # Test tuple of slices + result = slice_test_grouped._positional_selector[:2, -2:] + expected = slice_test_df.iloc[[0, 1, 2, 3, 4, 6, 7]] + + tm.assert_frame_equal(result, expected) + + +def test_mix(slice_test_df, slice_test_grouped): + # Test mixed tuple of ints and slices + result = slice_test_grouped._positional_selector[0, 1, -2:] + expected = slice_test_df.iloc[[0, 1, 2, 3, 4, 6, 7]] + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "arg, expected_rows", + [ + [0, [0, 1, 4]], + [[0, 2, -1], [0, 1, 3, 4, 5, 7]], + [(slice(None, 2), slice(-2, None)), [0, 1, 2, 3, 4, 6, 7]], + ], +) +def test_as_index(slice_test_df, arg, expected_rows): + # Test the default as_index behaviour + result = slice_test_df.groupby("Group", sort=False)._positional_selector[arg] + expected = slice_test_df.iloc[expected_rows] + + tm.assert_frame_equal(result, expected) + + +def test_doc_examples(): + # Test the examples in the documentation + df = pd.DataFrame( + [["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]], columns=["A", "B"] + ) + + grouped = df.groupby("A", as_index=False) + + result = grouped._positional_selector[1:2] + expected = pd.DataFrame([["a", 2], ["b", 5]], columns=["A", "B"], index=[1, 4]) + + tm.assert_frame_equal(result, expected) + + result = grouped._positional_selector[1, -1] + expected = pd.DataFrame( + [["a", 2], ["a", 3], ["b", 5]], columns=["A", "B"], index=[1, 2, 4] + ) + + tm.assert_frame_equal(result, expected) + + +@pytest.fixture() +def multiindex_data(): + ndates = 100 + nitems = 20 + dates = pd.date_range("20130101", periods=ndates, freq="D") + items = [f"item {i}" for i in range(nitems)] + + data = {} + for date in dates: + nitems_for_date = nitems - random.randint(0, 12) + levels = [ + (item, random.randint(0, 10000) / 100, random.randint(0, 10000) / 100) + for item in items[:nitems_for_date] + ] + levels.sort(key=lambda x: x[1]) + data[date] = levels + + return data + + +def _make_df_from_data(data): + rows = {} + for date in data: + for level in data[date]: + rows[(date, level[0])] = {"A": level[1], "B": level[2]} + + df = pd.DataFrame.from_dict(rows, orient="index") + df.index.names = ("Date", "Item") + return df + + +def test_multiindex(multiindex_data): + # Test the multiindex mentioned as the use-case in the documentation + df = _make_df_from_data(multiindex_data) + result = df.groupby("Date", as_index=False).nth(slice(3, -3)) + + sliced = {date: multiindex_data[date][3:-3] for date in multiindex_data} + expected = _make_df_from_data(sliced) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("arg", [1, 5, 30, 1000, -1, -5, -30, -1000]) +@pytest.mark.parametrize("method", ["head", "tail"]) +@pytest.mark.parametrize("simulated", [True, False]) +def test_against_head_and_tail(arg, method, simulated): + # Test gives the same results as grouped head and tail + n_groups = 100 + n_rows_per_group = 30 + + data = { + "group": [ + f"group {g}" for j in range(n_rows_per_group) for g in range(n_groups) + ], + "value": [ + f"group {g} row {j}" + for j in range(n_rows_per_group) + for g in range(n_groups) + ], + } + df = pd.DataFrame(data) + grouped = df.groupby("group", as_index=False) + size = arg if arg >= 0 else n_rows_per_group + arg + + if method == "head": + result = grouped._positional_selector[:arg] + + if simulated: + indices = [] + for j in range(size): + for i in range(n_groups): + if j * n_groups + i < n_groups * n_rows_per_group: + indices.append(j * n_groups + i) + + expected = df.iloc[indices] + + else: + expected = grouped.head(arg) + + else: + result = grouped._positional_selector[-arg:] + + if simulated: + indices = [] + for j in range(size): + for i in range(n_groups): + if (n_rows_per_group + j - size) * n_groups + i >= 0: + indices.append((n_rows_per_group + j - size) * n_groups + i) + + expected = df.iloc[indices] + + else: + expected = grouped.tail(arg) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("start", [None, 0, 1, 10, -1, -10]) +@pytest.mark.parametrize("stop", [None, 0, 1, 10, -1, -10]) +@pytest.mark.parametrize("step", [None, 1, 5]) +def test_against_df_iloc(start, stop, step): + # Test that a single group gives the same results as DataFame.iloc + n_rows = 30 + + data = { + "group": ["group 0"] * n_rows, + "value": list(range(n_rows)), + } + df = pd.DataFrame(data) + grouped = df.groupby("group", as_index=False) + + result = grouped._positional_selector[start:stop:step] + expected = df.iloc[start:stop:step] + + tm.assert_frame_equal(result, expected) + + +def test_series(): + # Test grouped Series + ser = pd.Series([1, 2, 3, 4, 5], index=["a", "a", "a", "b", "b"]) + grouped = ser.groupby(level=0) + result = grouped._positional_selector[1:2] + expected = pd.Series([2, 5], index=["a", "b"]) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("step", [1, 2, 3, 4, 5]) +def test_step(step): + # Test slice with various step values + data = [["x", f"x{i}"] for i in range(5)] + data += [["y", f"y{i}"] for i in range(4)] + data += [["z", f"z{i}"] for i in range(3)] + df = pd.DataFrame(data, columns=["A", "B"]) + + grouped = df.groupby("A", as_index=False) + + result = grouped._positional_selector[::step] + + data = [["x", f"x{i}"] for i in range(0, 5, step)] + data += [["y", f"y{i}"] for i in range(0, 4, step)] + data += [["z", f"z{i}"] for i in range(0, 3, step)] + + index = [0 + i for i in range(0, 5, step)] + index += [5 + i for i in range(0, 4, step)] + index += [9 + i for i in range(0, 3, step)] + + expected = pd.DataFrame(data, columns=["A", "B"], index=index) + + tm.assert_frame_equal(result, expected) + + +@pytest.fixture() +def column_group_df(): + return pd.DataFrame( + [[0, 1, 2, 3, 4, 5, 6], [0, 0, 1, 0, 1, 0, 2]], + columns=["A", "B", "C", "D", "E", "F", "G"], + ) + + +def test_column_axis(column_group_df): + g = column_group_df.groupby(column_group_df.iloc[1], axis=1) + result = g._positional_selector[1:-1] + expected = column_group_df.iloc[:, [1, 3]] + + tm.assert_frame_equal(result, expected) + + +def test_columns_on_iter(): + # GitHub issue #44821 + df = pd.DataFrame({k: range(10) for k in "ABC"}) + + # Group-by and select columns + cols = ["A", "B"] + for _, dg in df.groupby(df.A < 4)[cols]: + tm.assert_index_equal(dg.columns, pd.Index(cols)) + assert "C" not in dg.columns + + +@pytest.mark.parametrize("func", [list, pd.Index, pd.Series, np.array]) +def test_groupby_duplicated_columns(func): + # GH#44924 + df = pd.DataFrame( + { + "A": [1, 2], + "B": [3, 3], + "C": ["G", "G"], + } + ) + result = df.groupby("C")[func(["A", "B", "A"])].mean() + expected = pd.DataFrame( + [[1.5, 3.0, 1.5]], columns=["A", "B", "A"], index=pd.Index(["G"], name="C") + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_libgroupby.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_libgroupby.py new file mode 100644 index 0000000..2a24448 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_libgroupby.py @@ -0,0 +1,283 @@ +import numpy as np +import pytest + +from pandas._libs import groupby as libgroupby +from pandas._libs.groupby import ( + group_cumprod_float64, + group_cumsum, + group_mean, + group_var, +) + +from pandas.core.dtypes.common import ensure_platform_int + +from pandas import isna +import pandas._testing as tm + + +class GroupVarTestMixin: + def test_group_var_generic_1d(self): + prng = np.random.RandomState(1234) + + out = (np.nan * np.ones((5, 1))).astype(self.dtype) + counts = np.zeros(5, dtype="int64") + values = 10 * prng.rand(15, 1).astype(self.dtype) + labels = np.tile(np.arange(5), (3,)).astype("intp") + + expected_out = ( + np.squeeze(values).reshape((5, 3), order="F").std(axis=1, ddof=1) ** 2 + )[:, np.newaxis] + expected_counts = counts + 3 + + self.algo(out, counts, values, labels) + assert np.allclose(out, expected_out, self.rtol) + tm.assert_numpy_array_equal(counts, expected_counts) + + def test_group_var_generic_1d_flat_labels(self): + prng = np.random.RandomState(1234) + + out = (np.nan * np.ones((1, 1))).astype(self.dtype) + counts = np.zeros(1, dtype="int64") + values = 10 * prng.rand(5, 1).astype(self.dtype) + labels = np.zeros(5, dtype="intp") + + expected_out = np.array([[values.std(ddof=1) ** 2]]) + expected_counts = counts + 5 + + self.algo(out, counts, values, labels) + + assert np.allclose(out, expected_out, self.rtol) + tm.assert_numpy_array_equal(counts, expected_counts) + + def test_group_var_generic_2d_all_finite(self): + prng = np.random.RandomState(1234) + + out = (np.nan * np.ones((5, 2))).astype(self.dtype) + counts = np.zeros(5, dtype="int64") + values = 10 * prng.rand(10, 2).astype(self.dtype) + labels = np.tile(np.arange(5), (2,)).astype("intp") + + expected_out = np.std(values.reshape(2, 5, 2), ddof=1, axis=0) ** 2 + expected_counts = counts + 2 + + self.algo(out, counts, values, labels) + assert np.allclose(out, expected_out, self.rtol) + tm.assert_numpy_array_equal(counts, expected_counts) + + def test_group_var_generic_2d_some_nan(self): + prng = np.random.RandomState(1234) + + out = (np.nan * np.ones((5, 2))).astype(self.dtype) + counts = np.zeros(5, dtype="int64") + values = 10 * prng.rand(10, 2).astype(self.dtype) + values[:, 1] = np.nan + labels = np.tile(np.arange(5), (2,)).astype("intp") + + expected_out = np.vstack( + [ + values[:, 0].reshape(5, 2, order="F").std(ddof=1, axis=1) ** 2, + np.nan * np.ones(5), + ] + ).T.astype(self.dtype) + expected_counts = counts + 2 + + self.algo(out, counts, values, labels) + tm.assert_almost_equal(out, expected_out, rtol=0.5e-06) + tm.assert_numpy_array_equal(counts, expected_counts) + + def test_group_var_constant(self): + # Regression test from GH 10448. + + out = np.array([[np.nan]], dtype=self.dtype) + counts = np.array([0], dtype="int64") + values = 0.832845131556193 * np.ones((3, 1), dtype=self.dtype) + labels = np.zeros(3, dtype="intp") + + self.algo(out, counts, values, labels) + + assert counts[0] == 3 + assert out[0, 0] >= 0 + tm.assert_almost_equal(out[0, 0], 0.0) + + +class TestGroupVarFloat64(GroupVarTestMixin): + __test__ = True + + algo = staticmethod(group_var) + dtype = np.float64 + rtol = 1e-5 + + def test_group_var_large_inputs(self): + prng = np.random.RandomState(1234) + + out = np.array([[np.nan]], dtype=self.dtype) + counts = np.array([0], dtype="int64") + values = (prng.rand(10 ** 6) + 10 ** 12).astype(self.dtype) + values.shape = (10 ** 6, 1) + labels = np.zeros(10 ** 6, dtype="intp") + + self.algo(out, counts, values, labels) + + assert counts[0] == 10 ** 6 + tm.assert_almost_equal(out[0, 0], 1.0 / 12, rtol=0.5e-3) + + +class TestGroupVarFloat32(GroupVarTestMixin): + __test__ = True + + algo = staticmethod(group_var) + dtype = np.float32 + rtol = 1e-2 + + +@pytest.mark.parametrize("dtype", ["float32", "float64"]) +def test_group_ohlc(dtype): + obj = np.array(np.random.randn(20), dtype=dtype) + + bins = np.array([6, 12, 20]) + out = np.zeros((3, 4), dtype) + counts = np.zeros(len(out), dtype=np.int64) + labels = ensure_platform_int(np.repeat(np.arange(3), np.diff(np.r_[0, bins]))) + + func = libgroupby.group_ohlc + func(out, counts, obj[:, None], labels) + + def _ohlc(group): + if isna(group).all(): + return np.repeat(np.nan, 4) + return [group[0], group.max(), group.min(), group[-1]] + + expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])]) + + tm.assert_almost_equal(out, expected) + tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64)) + + obj[:6] = np.nan + func(out, counts, obj[:, None], labels) + expected[0] = np.nan + tm.assert_almost_equal(out, expected) + + +def _check_cython_group_transform_cumulative(pd_op, np_op, dtype): + """ + Check a group transform that executes a cumulative function. + + Parameters + ---------- + pd_op : callable + The pandas cumulative function. + np_op : callable + The analogous one in NumPy. + dtype : type + The specified dtype of the data. + """ + is_datetimelike = False + + data = np.array([[1], [2], [3], [4]], dtype=dtype) + answer = np.zeros_like(data) + + labels = np.array([0, 0, 0, 0], dtype=np.intp) + ngroups = 1 + pd_op(answer, data, labels, ngroups, is_datetimelike) + + tm.assert_numpy_array_equal(np_op(data), answer[:, 0], check_dtype=False) + + +def test_cython_group_transform_cumsum(any_real_numpy_dtype): + # see gh-4095 + dtype = np.dtype(any_real_numpy_dtype).type + pd_op, np_op = group_cumsum, np.cumsum + _check_cython_group_transform_cumulative(pd_op, np_op, dtype) + + +def test_cython_group_transform_cumprod(): + # see gh-4095 + dtype = np.float64 + pd_op, np_op = group_cumprod_float64, np.cumproduct + _check_cython_group_transform_cumulative(pd_op, np_op, dtype) + + +def test_cython_group_transform_algos(): + # see gh-4095 + is_datetimelike = False + + # with nans + labels = np.array([0, 0, 0, 0, 0], dtype=np.intp) + ngroups = 1 + + data = np.array([[1], [2], [3], [np.nan], [4]], dtype="float64") + actual = np.zeros_like(data) + actual.fill(np.nan) + group_cumprod_float64(actual, data, labels, ngroups, is_datetimelike) + expected = np.array([1, 2, 6, np.nan, 24], dtype="float64") + tm.assert_numpy_array_equal(actual[:, 0], expected) + + actual = np.zeros_like(data) + actual.fill(np.nan) + group_cumsum(actual, data, labels, ngroups, is_datetimelike) + expected = np.array([1, 3, 6, np.nan, 10], dtype="float64") + tm.assert_numpy_array_equal(actual[:, 0], expected) + + # timedelta + is_datetimelike = True + data = np.array([np.timedelta64(1, "ns")] * 5, dtype="m8[ns]")[:, None] + actual = np.zeros_like(data, dtype="int64") + group_cumsum(actual, data.view("int64"), labels, ngroups, is_datetimelike) + expected = np.array( + [ + np.timedelta64(1, "ns"), + np.timedelta64(2, "ns"), + np.timedelta64(3, "ns"), + np.timedelta64(4, "ns"), + np.timedelta64(5, "ns"), + ] + ) + tm.assert_numpy_array_equal(actual[:, 0].view("m8[ns]"), expected) + + +def test_cython_group_mean_datetimelike(): + actual = np.zeros(shape=(1, 1), dtype="float64") + counts = np.array([0], dtype="int64") + data = ( + np.array( + [np.timedelta64(2, "ns"), np.timedelta64(4, "ns"), np.timedelta64("NaT")], + dtype="m8[ns]", + )[:, None] + .view("int64") + .astype("float64") + ) + labels = np.zeros(len(data), dtype=np.intp) + + group_mean(actual, counts, data, labels, is_datetimelike=True) + + tm.assert_numpy_array_equal(actual[:, 0], np.array([3], dtype="float64")) + + +def test_cython_group_mean_wrong_min_count(): + actual = np.zeros(shape=(1, 1), dtype="float64") + counts = np.zeros(1, dtype="int64") + data = np.zeros(1, dtype="float64")[:, None] + labels = np.zeros(1, dtype=np.intp) + + with pytest.raises(AssertionError, match="min_count"): + group_mean(actual, counts, data, labels, is_datetimelike=True, min_count=0) + + +def test_cython_group_mean_not_datetimelike_but_has_NaT_values(): + actual = np.zeros(shape=(1, 1), dtype="float64") + counts = np.array([0], dtype="int64") + data = ( + np.array( + [np.timedelta64("NaT"), np.timedelta64("NaT")], + dtype="m8[ns]", + )[:, None] + .view("int64") + .astype("float64") + ) + labels = np.zeros(len(data), dtype=np.intp) + + group_mean(actual, counts, data, labels, is_datetimelike=False) + + tm.assert_numpy_array_equal( + actual[:, 0], np.array(np.divide(np.add(data[0], data[1]), 2), dtype="float64") + ) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_min_max.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_min_max.py new file mode 100644 index 0000000..767ef29 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_min_max.py @@ -0,0 +1,227 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import iNaT + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm +from pandas.core.api import Int64Index + + +def test_max_min_non_numeric(): + # #2700 + aa = DataFrame({"nn": [11, 11, 22, 22], "ii": [1, 2, 3, 4], "ss": 4 * ["mama"]}) + + result = aa.groupby("nn").max() + assert "ss" in result + + result = aa.groupby("nn").max(numeric_only=False) + assert "ss" in result + + result = aa.groupby("nn").min() + assert "ss" in result + + result = aa.groupby("nn").min(numeric_only=False) + assert "ss" in result + + +def test_max_min_object_multiple_columns(using_array_manager): + # GH#41111 case where the aggregation is valid for some columns but not + # others; we split object blocks column-wise, consistent with + # DataFrame._reduce + + df = DataFrame( + { + "A": [1, 1, 2, 2, 3], + "B": [1, "foo", 2, "bar", False], + "C": ["a", "b", "c", "d", "e"], + } + ) + df._consolidate_inplace() # should already be consolidate, but double-check + if not using_array_manager: + assert len(df._mgr.blocks) == 2 + + gb = df.groupby("A") + + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = gb.max(numeric_only=False) + # "max" is valid for column "C" but not for "B" + ei = Index([1, 2, 3], name="A") + expected = DataFrame({"C": ["b", "d", "e"]}, index=ei) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = gb.min(numeric_only=False) + # "min" is valid for column "C" but not for "B" + ei = Index([1, 2, 3], name="A") + expected = DataFrame({"C": ["a", "c", "e"]}, index=ei) + tm.assert_frame_equal(result, expected) + + +def test_min_date_with_nans(): + # GH26321 + dates = pd.to_datetime( + Series(["2019-05-09", "2019-05-09", "2019-05-09"]), format="%Y-%m-%d" + ).dt.date + df = DataFrame({"a": [np.nan, "1", np.nan], "b": [0, 1, 1], "c": dates}) + + result = df.groupby("b", as_index=False)["c"].min()["c"] + expected = pd.to_datetime( + Series(["2019-05-09", "2019-05-09"], name="c"), format="%Y-%m-%d" + ).dt.date + tm.assert_series_equal(result, expected) + + result = df.groupby("b")["c"].min() + expected.index.name = "b" + tm.assert_series_equal(result, expected) + + +def test_max_inat(): + # GH#40767 dont interpret iNaT as NaN + ser = Series([1, iNaT]) + gb = ser.groupby([1, 1]) + + result = gb.max(min_count=2) + expected = Series({1: 1}, dtype=np.int64) + tm.assert_series_equal(result, expected, check_exact=True) + + result = gb.min(min_count=2) + expected = Series({1: iNaT}, dtype=np.int64) + tm.assert_series_equal(result, expected, check_exact=True) + + # not enough entries -> gets masked to NaN + result = gb.min(min_count=3) + expected = Series({1: np.nan}) + tm.assert_series_equal(result, expected, check_exact=True) + + +def test_max_inat_not_all_na(): + # GH#40767 dont interpret iNaT as NaN + + # make sure we dont round iNaT+1 to iNaT + ser = Series([1, iNaT, 2, iNaT + 1]) + gb = ser.groupby([1, 2, 3, 3]) + result = gb.min(min_count=2) + + # Note: in converting to float64, the iNaT + 1 maps to iNaT, i.e. is lossy + expected = Series({1: np.nan, 2: np.nan, 3: iNaT + 1}) + tm.assert_series_equal(result, expected, check_exact=True) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_groupby_aggregate_period_column(func): + # GH 31471 + groups = [1, 2] + periods = pd.period_range("2020", periods=2, freq="Y") + df = DataFrame({"a": groups, "b": periods}) + + result = getattr(df.groupby("a")["b"], func)() + idx = Int64Index([1, 2], name="a") + expected = Series(periods, index=idx, name="b") + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_groupby_aggregate_period_frame(func): + # GH 31471 + groups = [1, 2] + periods = pd.period_range("2020", periods=2, freq="Y") + df = DataFrame({"a": groups, "b": periods}) + + result = getattr(df.groupby("a"), func)() + idx = Int64Index([1, 2], name="a") + expected = DataFrame({"b": periods}, index=idx) + + tm.assert_frame_equal(result, expected) + + +def test_aggregate_numeric_object_dtype(): + # https://github.com/pandas-dev/pandas/issues/39329 + # simplified case: multiple object columns where one is all-NaN + # -> gets split as the all-NaN is inferred as float + df = DataFrame( + {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": [np.nan] * 4}, + ).astype(object) + result = df.groupby("key").min() + expected = DataFrame( + {"key": ["A", "B"], "col1": ["a", "c"], "col2": [np.nan, np.nan]} + ).set_index("key") + tm.assert_frame_equal(result, expected) + + # same but with numbers + df = DataFrame( + {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": range(4)}, + ).astype(object) + result = df.groupby("key").min() + expected = DataFrame( + {"key": ["A", "B"], "col1": ["a", "c"], "col2": [0, 2]} + ).set_index("key") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_aggregate_categorical_lost_index(func: str): + # GH: 28641 groupby drops index, when grouping over categorical column with min/max + ds = Series(["b"], dtype="category").cat.as_ordered() + df = DataFrame({"A": [1997], "B": ds}) + result = df.groupby("A").agg({"B": func}) + expected = DataFrame({"B": ["b"]}, index=Index([1997], name="A")) + + # ordered categorical dtype should be preserved + expected["B"] = expected["B"].astype(ds.dtype) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["Int64", "Int32", "Float64", "Float32", "boolean"]) +def test_groupby_min_max_nullable(dtype): + if dtype == "Int64": + # GH#41743 avoid precision loss + ts = 1618556707013635762 + elif dtype == "boolean": + ts = 0 + else: + ts = 4.0 + + df = DataFrame({"id": [2, 2], "ts": [ts, ts + 1]}) + df["ts"] = df["ts"].astype(dtype) + + gb = df.groupby("id") + + result = gb.min() + expected = df.iloc[:1].set_index("id") + tm.assert_frame_equal(result, expected) + + res_max = gb.max() + expected_max = df.iloc[1:].set_index("id") + tm.assert_frame_equal(res_max, expected_max) + + result2 = gb.min(min_count=3) + expected2 = DataFrame({"ts": [pd.NA]}, index=expected.index, dtype=dtype) + tm.assert_frame_equal(result2, expected2) + + res_max2 = gb.max(min_count=3) + tm.assert_frame_equal(res_max2, expected2) + + # Case with NA values + df2 = DataFrame({"id": [2, 2, 2], "ts": [ts, pd.NA, ts + 1]}) + df2["ts"] = df2["ts"].astype(dtype) + gb2 = df2.groupby("id") + + result3 = gb2.min() + tm.assert_frame_equal(result3, expected) + + res_max3 = gb2.max() + tm.assert_frame_equal(res_max3, expected_max) + + result4 = gb2.min(min_count=100) + tm.assert_frame_equal(result4, expected2) + + res_max4 = gb2.max(min_count=100) + tm.assert_frame_equal(res_max4, expected2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_missing.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_missing.py new file mode 100644 index 0000000..76da8df --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_missing.py @@ -0,0 +1,155 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + date_range, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("func", ["ffill", "bfill"]) +def test_groupby_column_index_name_lost_fill_funcs(func): + # GH: 29764 groupby loses index sometimes + df = DataFrame( + [[1, 1.0, -1.0], [1, np.nan, np.nan], [1, 2.0, -2.0]], + columns=Index(["type", "a", "b"], name="idx"), + ) + df_grouped = df.groupby(["type"])[["a", "b"]] + result = getattr(df_grouped, func)().columns + expected = Index(["a", "b"], name="idx") + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("func", ["ffill", "bfill"]) +def test_groupby_fill_duplicate_column_names(func): + # GH: 25610 ValueError with duplicate column names + df1 = DataFrame({"field1": [1, 3, 4], "field2": [1, 3, 4]}) + df2 = DataFrame({"field1": [1, np.nan, 4]}) + df_grouped = pd.concat([df1, df2], axis=1).groupby(by=["field2"]) + expected = DataFrame( + [[1, 1.0], [3, np.nan], [4, 4.0]], columns=["field1", "field1"] + ) + result = getattr(df_grouped, func)() + tm.assert_frame_equal(result, expected) + + +def test_ffill_missing_arguments(): + # GH 14955 + df = DataFrame({"a": [1, 2], "b": [1, 1]}) + with pytest.raises(ValueError, match="Must specify a fill"): + df.groupby("b").fillna() + + +@pytest.mark.parametrize( + "method, expected", [("ffill", [None, "a", "a"]), ("bfill", ["a", "a", None])] +) +def test_fillna_with_string_dtype(method, expected): + # GH 40250 + df = DataFrame({"a": pd.array([None, "a", None], dtype="string"), "b": [0, 0, 0]}) + grp = df.groupby("b") + result = grp.fillna(method=method) + expected = DataFrame({"a": pd.array(expected, dtype="string")}) + tm.assert_frame_equal(result, expected) + + +def test_fill_consistency(): + + # GH9221 + # pass thru keyword arguments to the generated wrapper + # are set if the passed kw is None (only) + df = DataFrame( + index=pd.MultiIndex.from_product( + [["value1", "value2"], date_range("2014-01-01", "2014-01-06")] + ), + columns=Index(["1", "2"], name="id"), + ) + df["1"] = [ + np.nan, + 1, + np.nan, + np.nan, + 11, + np.nan, + np.nan, + 2, + np.nan, + np.nan, + 22, + np.nan, + ] + df["2"] = [ + np.nan, + 3, + np.nan, + np.nan, + 33, + np.nan, + np.nan, + 4, + np.nan, + np.nan, + 44, + np.nan, + ] + + expected = df.groupby(level=0, axis=0).fillna(method="ffill") + result = df.T.groupby(level=0, axis=1).fillna(method="ffill").T + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("method", ["ffill", "bfill"]) +@pytest.mark.parametrize("dropna", [True, False]) +@pytest.mark.parametrize("has_nan_group", [True, False]) +def test_ffill_handles_nan_groups(dropna, method, has_nan_group): + # GH 34725 + + df_without_nan_rows = DataFrame([(1, 0.1), (2, 0.2)]) + + ridx = [-1, 0, -1, -1, 1, -1] + df = df_without_nan_rows.reindex(ridx).reset_index(drop=True) + + group_b = np.nan if has_nan_group else "b" + df["group_col"] = pd.Series(["a"] * 3 + [group_b] * 3) + + grouped = df.groupby(by="group_col", dropna=dropna) + result = getattr(grouped, method)(limit=None) + + expected_rows = { + ("ffill", True, True): [-1, 0, 0, -1, -1, -1], + ("ffill", True, False): [-1, 0, 0, -1, 1, 1], + ("ffill", False, True): [-1, 0, 0, -1, 1, 1], + ("ffill", False, False): [-1, 0, 0, -1, 1, 1], + ("bfill", True, True): [0, 0, -1, -1, -1, -1], + ("bfill", True, False): [0, 0, -1, 1, 1, -1], + ("bfill", False, True): [0, 0, -1, 1, 1, -1], + ("bfill", False, False): [0, 0, -1, 1, 1, -1], + } + + ridx = expected_rows.get((method, dropna, has_nan_group)) + expected = df_without_nan_rows.reindex(ridx).reset_index(drop=True) + # columns are a 'take' on df.columns, which are object dtype + expected.columns = expected.columns.astype(object) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("min_count, value", [(2, np.nan), (-1, 1.0)]) +@pytest.mark.parametrize("func", ["first", "last", "max", "min"]) +def test_min_count(func, min_count, value): + # GH#37821 + df = DataFrame({"a": [1] * 3, "b": [1, np.nan, np.nan], "c": [np.nan] * 3}) + result = getattr(df.groupby("a"), func)(min_count=min_count) + expected = DataFrame({"b": [value], "c": [np.nan]}, index=Index([1], name="a")) + tm.assert_frame_equal(result, expected) + + +def test_indices_with_missing(): + # GH 9304 + df = DataFrame({"a": [1, 1, np.nan], "b": [2, 3, 4], "c": [5, 6, 7]}) + g = df.groupby(["a", "b"]) + result = g.indices + expected = {(1.0, 2): np.array([0]), (1.0, 3): np.array([1])} + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_nth.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_nth.py new file mode 100644 index 0000000..185b0b8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_nth.py @@ -0,0 +1,843 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + isna, +) +import pandas._testing as tm + + +def test_first_last_nth(df): + # tests for first / last / nth + grouped = df.groupby("A") + first = grouped.first() + expected = df.loc[[1, 0], ["B", "C", "D"]] + expected.index = Index(["bar", "foo"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(first, expected) + + nth = grouped.nth(0) + tm.assert_frame_equal(nth, expected) + + last = grouped.last() + expected = df.loc[[5, 7], ["B", "C", "D"]] + expected.index = Index(["bar", "foo"], name="A") + tm.assert_frame_equal(last, expected) + + nth = grouped.nth(-1) + tm.assert_frame_equal(nth, expected) + + nth = grouped.nth(1) + expected = df.loc[[2, 3], ["B", "C", "D"]].copy() + expected.index = Index(["foo", "bar"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(nth, expected) + + # it works! + grouped["B"].first() + grouped["B"].last() + grouped["B"].nth(0) + + df.loc[df["A"] == "foo", "B"] = np.nan + assert isna(grouped["B"].first()["foo"]) + assert isna(grouped["B"].last()["foo"]) + assert isna(grouped["B"].nth(0)["foo"]) + + # v0.14.0 whatsnew + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"]) + g = df.groupby("A") + result = g.first() + expected = df.iloc[[1, 2]].set_index("A") + tm.assert_frame_equal(result, expected) + + expected = df.iloc[[1, 2]].set_index("A") + result = g.nth(0, dropna="any") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("method", ["first", "last"]) +def test_first_last_with_na_object(method, nulls_fixture): + # https://github.com/pandas-dev/pandas/issues/32123 + groups = DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 3, nulls_fixture]}).groupby("a") + result = getattr(groups, method)() + + if method == "first": + values = [1, 3] + else: + values = [2, 3] + + values = np.array(values, dtype=result["b"].dtype) + idx = Index([1, 2], name="a") + expected = DataFrame({"b": values}, index=idx) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("index", [0, -1]) +def test_nth_with_na_object(index, nulls_fixture): + # https://github.com/pandas-dev/pandas/issues/32123 + groups = DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 3, nulls_fixture]}).groupby("a") + result = groups.nth(index) + + if index == 0: + values = [1, 3] + else: + values = [2, nulls_fixture] + + values = np.array(values, dtype=result["b"].dtype) + idx = Index([1, 2], name="a") + expected = DataFrame({"b": values}, index=idx) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("method", ["first", "last"]) +def test_first_last_with_None(method): + # https://github.com/pandas-dev/pandas/issues/32800 + # None should be preserved as object dtype + df = DataFrame.from_dict({"id": ["a"], "value": [None]}) + groups = df.groupby("id", as_index=False) + result = getattr(groups, method)() + + tm.assert_frame_equal(result, df) + + +@pytest.mark.parametrize("method", ["first", "last"]) +@pytest.mark.parametrize( + "df, expected", + [ + ( + DataFrame({"id": "a", "value": [None, "foo", np.nan]}), + DataFrame({"value": ["foo"]}, index=Index(["a"], name="id")), + ), + ( + DataFrame({"id": "a", "value": [np.nan]}, dtype=object), + DataFrame({"value": [None]}, index=Index(["a"], name="id")), + ), + ], +) +def test_first_last_with_None_expanded(method, df, expected): + # GH 32800, 38286 + result = getattr(df.groupby("id"), method)() + tm.assert_frame_equal(result, expected) + + +def test_first_last_nth_dtypes(df_mixed_floats): + + df = df_mixed_floats.copy() + df["E"] = True + df["F"] = 1 + + # tests for first / last / nth + grouped = df.groupby("A") + first = grouped.first() + expected = df.loc[[1, 0], ["B", "C", "D", "E", "F"]] + expected.index = Index(["bar", "foo"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(first, expected) + + last = grouped.last() + expected = df.loc[[5, 7], ["B", "C", "D", "E", "F"]] + expected.index = Index(["bar", "foo"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(last, expected) + + nth = grouped.nth(1) + expected = df.loc[[3, 2], ["B", "C", "D", "E", "F"]] + expected.index = Index(["bar", "foo"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(nth, expected) + + # GH 2763, first/last shifting dtypes + idx = list(range(10)) + idx.append(9) + s = Series(data=range(11), index=idx, name="IntCol") + assert s.dtype == "int64" + f = s.groupby(level=0).first() + assert f.dtype == "int64" + + +def test_first_last_nth_nan_dtype(): + # GH 33591 + df = DataFrame({"data": ["A"], "nans": Series([np.nan], dtype=object)}) + + grouped = df.groupby("data") + expected = df.set_index("data").nans + tm.assert_series_equal(grouped.nans.first(), expected) + tm.assert_series_equal(grouped.nans.last(), expected) + tm.assert_series_equal(grouped.nans.nth(-1), expected) + tm.assert_series_equal(grouped.nans.nth(0), expected) + + +def test_first_strings_timestamps(): + # GH 11244 + test = DataFrame( + { + Timestamp("2012-01-01 00:00:00"): ["a", "b"], + Timestamp("2012-01-02 00:00:00"): ["c", "d"], + "name": ["e", "e"], + "aaaa": ["f", "g"], + } + ) + result = test.groupby("name").first() + expected = DataFrame( + [["a", "c", "f"]], + columns=Index([Timestamp("2012-01-01"), Timestamp("2012-01-02"), "aaaa"]), + index=Index(["e"], name="name"), + ) + tm.assert_frame_equal(result, expected) + + +def test_nth(): + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"]) + g = df.groupby("A") + + tm.assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index("A")) + tm.assert_frame_equal(g.nth(1), df.iloc[[1]].set_index("A")) + tm.assert_frame_equal(g.nth(2), df.loc[[]].set_index("A")) + tm.assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index("A")) + tm.assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index("A")) + tm.assert_frame_equal(g.nth(-3), df.loc[[]].set_index("A")) + tm.assert_series_equal(g.B.nth(0), df.set_index("A").B.iloc[[0, 2]]) + tm.assert_series_equal(g.B.nth(1), df.set_index("A").B.iloc[[1]]) + tm.assert_frame_equal(g[["B"]].nth(0), df.loc[[0, 2], ["A", "B"]].set_index("A")) + + exp = df.set_index("A") + tm.assert_frame_equal(g.nth(0, dropna="any"), exp.iloc[[1, 2]]) + tm.assert_frame_equal(g.nth(-1, dropna="any"), exp.iloc[[1, 2]]) + + exp["B"] = np.nan + tm.assert_frame_equal(g.nth(7, dropna="any"), exp.iloc[[1, 2]]) + tm.assert_frame_equal(g.nth(2, dropna="any"), exp.iloc[[1, 2]]) + + # out of bounds, regression from 0.13.1 + # GH 6621 + df = DataFrame( + { + "color": {0: "green", 1: "green", 2: "red", 3: "red", 4: "red"}, + "food": {0: "ham", 1: "eggs", 2: "eggs", 3: "ham", 4: "pork"}, + "two": { + 0: 1.5456590000000001, + 1: -0.070345000000000005, + 2: -2.4004539999999999, + 3: 0.46206000000000003, + 4: 0.52350799999999997, + }, + "one": { + 0: 0.56573799999999996, + 1: -0.9742360000000001, + 2: 1.033801, + 3: -0.78543499999999999, + 4: 0.70422799999999997, + }, + } + ).set_index(["color", "food"]) + + result = df.groupby(level=0, as_index=False).nth(2) + expected = df.iloc[[-1]] + tm.assert_frame_equal(result, expected) + + result = df.groupby(level=0, as_index=False).nth(3) + expected = df.loc[[]] + tm.assert_frame_equal(result, expected) + + # GH 7559 + # from the vbench + df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype="int64") + s = df[1] + g = df[0] + expected = s.groupby(g).first() + expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) + tm.assert_series_equal(expected2, expected, check_names=False) + assert expected.name == 1 + assert expected2.name == 1 + + # validate first + v = s[g == 1].iloc[0] + assert expected.iloc[0] == v + assert expected2.iloc[0] == v + + # this is NOT the same as .first (as sorted is default!) + # as it keeps the order in the series (and not the group order) + # related GH 7287 + expected = s.groupby(g, sort=False).first() + result = s.groupby(g, sort=False).nth(0, dropna="all") + tm.assert_series_equal(result, expected) + + with pytest.raises(ValueError, match="For a DataFrame"): + s.groupby(g, sort=False).nth(0, dropna=True) + + # doc example + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"]) + g = df.groupby("A") + result = g.B.nth(0, dropna="all") + expected = g.B.first() + tm.assert_series_equal(result, expected) + + # test multiple nth values + df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]], columns=["A", "B"]) + g = df.groupby("A") + + tm.assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index("A")) + tm.assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index("A")) + tm.assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index("A")) + tm.assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index("A")) + + business_dates = pd.date_range(start="4/1/2014", end="6/30/2014", freq="B") + df = DataFrame(1, index=business_dates, columns=["a", "b"]) + # get the first, fourth and last two business days for each month + key = [df.index.year, df.index.month] + result = df.groupby(key, as_index=False).nth([0, 3, -2, -1]) + expected_dates = pd.to_datetime( + [ + "2014/4/1", + "2014/4/4", + "2014/4/29", + "2014/4/30", + "2014/5/1", + "2014/5/6", + "2014/5/29", + "2014/5/30", + "2014/6/2", + "2014/6/5", + "2014/6/27", + "2014/6/30", + ] + ) + expected = DataFrame(1, columns=["a", "b"], index=expected_dates) + tm.assert_frame_equal(result, expected) + + +def test_nth_multi_index(three_group): + # PR 9090, related to issue 8979 + # test nth on MultiIndex, should match .first() + grouped = three_group.groupby(["A", "B"]) + result = grouped.nth(0) + expected = grouped.first() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data, expected_first, expected_last", + [ + ( + { + "id": ["A"], + "time": Timestamp("2012-02-01 14:00:00", tz="US/Central"), + "foo": [1], + }, + { + "id": ["A"], + "time": Timestamp("2012-02-01 14:00:00", tz="US/Central"), + "foo": [1], + }, + { + "id": ["A"], + "time": Timestamp("2012-02-01 14:00:00", tz="US/Central"), + "foo": [1], + }, + ), + ( + { + "id": ["A", "B", "A"], + "time": [ + Timestamp("2012-01-01 13:00:00", tz="America/New_York"), + Timestamp("2012-02-01 14:00:00", tz="US/Central"), + Timestamp("2012-03-01 12:00:00", tz="Europe/London"), + ], + "foo": [1, 2, 3], + }, + { + "id": ["A", "B"], + "time": [ + Timestamp("2012-01-01 13:00:00", tz="America/New_York"), + Timestamp("2012-02-01 14:00:00", tz="US/Central"), + ], + "foo": [1, 2], + }, + { + "id": ["A", "B"], + "time": [ + Timestamp("2012-03-01 12:00:00", tz="Europe/London"), + Timestamp("2012-02-01 14:00:00", tz="US/Central"), + ], + "foo": [3, 2], + }, + ), + ], +) +def test_first_last_tz(data, expected_first, expected_last): + # GH15884 + # Test that the timezone is retained when calling first + # or last on groupby with as_index=False + + df = DataFrame(data) + + result = df.groupby("id", as_index=False).first() + expected = DataFrame(expected_first) + cols = ["id", "time", "foo"] + tm.assert_frame_equal(result[cols], expected[cols]) + + result = df.groupby("id", as_index=False)["time"].first() + tm.assert_frame_equal(result, expected[["id", "time"]]) + + result = df.groupby("id", as_index=False).last() + expected = DataFrame(expected_last) + cols = ["id", "time", "foo"] + tm.assert_frame_equal(result[cols], expected[cols]) + + result = df.groupby("id", as_index=False)["time"].last() + tm.assert_frame_equal(result, expected[["id", "time"]]) + + +@pytest.mark.parametrize( + "method, ts, alpha", + [ + ["first", Timestamp("2013-01-01", tz="US/Eastern"), "a"], + ["last", Timestamp("2013-01-02", tz="US/Eastern"), "b"], + ], +) +def test_first_last_tz_multi_column(method, ts, alpha): + # GH 21603 + category_string = Series(list("abc")).astype("category") + df = DataFrame( + { + "group": [1, 1, 2], + "category_string": category_string, + "datetimetz": pd.date_range("20130101", periods=3, tz="US/Eastern"), + } + ) + result = getattr(df.groupby("group"), method)() + expected = DataFrame( + { + "category_string": pd.Categorical( + [alpha, "c"], dtype=category_string.dtype + ), + "datetimetz": [ts, Timestamp("2013-01-03", tz="US/Eastern")], + }, + index=Index([1, 2], name="group"), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + pd.array([True, False], dtype="boolean"), + pd.array([1, 2], dtype="Int64"), + pd.to_datetime(["2020-01-01", "2020-02-01"]), + pd.to_timedelta([1, 2], unit="D"), + ], +) +@pytest.mark.parametrize("function", ["first", "last", "min", "max"]) +def test_first_last_extension_array_keeps_dtype(values, function): + # https://github.com/pandas-dev/pandas/issues/33071 + # https://github.com/pandas-dev/pandas/issues/32194 + df = DataFrame({"a": [1, 2], "b": values}) + grouped = df.groupby("a") + idx = Index([1, 2], name="a") + expected_series = Series(values, name="b", index=idx) + expected_frame = DataFrame({"b": values}, index=idx) + + result_series = getattr(grouped["b"], function)() + tm.assert_series_equal(result_series, expected_series) + + result_frame = grouped.agg({"b": function}) + tm.assert_frame_equal(result_frame, expected_frame) + + +def test_nth_multi_index_as_expected(): + # PR 9090, related to issue 8979 + # test nth on MultiIndex + three_group = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + } + ) + grouped = three_group.groupby(["A", "B"]) + result = grouped.nth(0) + expected = DataFrame( + {"C": ["dull", "dull", "dull", "dull"]}, + index=MultiIndex.from_arrays( + [["bar", "bar", "foo", "foo"], ["one", "two", "one", "two"]], + names=["A", "B"], + ), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "op, n, expected_rows", + [ + ("head", -1, [0]), + ("head", 0, []), + ("head", 1, [0, 2]), + ("head", 7, [0, 1, 2]), + ("tail", -1, [1]), + ("tail", 0, []), + ("tail", 1, [1, 2]), + ("tail", 7, [0, 1, 2]), + ], +) +@pytest.mark.parametrize("columns", [None, [], ["A"], ["B"], ["A", "B"]]) +@pytest.mark.parametrize("as_index", [True, False]) +def test_groupby_head_tail(op, n, expected_rows, columns, as_index): + df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) + g = df.groupby("A", as_index=as_index) + expected = df.iloc[expected_rows] + if columns is not None: + g = g[columns] + expected = expected[columns] + result = getattr(g, op)(n) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "op, n, expected_cols", + [ + ("head", -1, [0]), + ("head", 0, []), + ("head", 1, [0, 2]), + ("head", 7, [0, 1, 2]), + ("tail", -1, [1]), + ("tail", 0, []), + ("tail", 1, [1, 2]), + ("tail", 7, [0, 1, 2]), + ], +) +def test_groupby_head_tail_axis_1(op, n, expected_cols): + # GH 9772 + df = DataFrame( + [[1, 2, 3], [1, 4, 5], [2, 6, 7], [3, 8, 9]], columns=["A", "B", "C"] + ) + g = df.groupby([0, 0, 1], axis=1) + expected = df.iloc[:, expected_cols] + result = getattr(g, op)(n) + tm.assert_frame_equal(result, expected) + + +def test_group_selection_cache(): + # GH 12839 nth, head, and tail should return same result consistently + df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) + expected = df.iloc[[0, 2]].set_index("A") + + g = df.groupby("A") + result1 = g.head(n=2) + result2 = g.nth(0) + tm.assert_frame_equal(result1, df) + tm.assert_frame_equal(result2, expected) + + g = df.groupby("A") + result1 = g.tail(n=2) + result2 = g.nth(0) + tm.assert_frame_equal(result1, df) + tm.assert_frame_equal(result2, expected) + + g = df.groupby("A") + result1 = g.nth(0) + result2 = g.head(n=2) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, df) + + g = df.groupby("A") + result1 = g.nth(0) + result2 = g.tail(n=2) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, df) + + +def test_nth_empty(): + # GH 16064 + df = DataFrame(index=[0], columns=["a", "b", "c"]) + result = df.groupby("a").nth(10) + expected = DataFrame(index=Index([], name="a"), columns=["b", "c"]) + tm.assert_frame_equal(result, expected) + + result = df.groupby(["a", "b"]).nth(10) + expected = DataFrame( + index=MultiIndex([[], []], [[], []], names=["a", "b"]), columns=["c"] + ) + tm.assert_frame_equal(result, expected) + + +def test_nth_column_order(): + # GH 20760 + # Check that nth preserves column order + df = DataFrame( + [[1, "b", 100], [1, "a", 50], [1, "a", np.nan], [2, "c", 200], [2, "d", 150]], + columns=["A", "C", "B"], + ) + result = df.groupby("A").nth(0) + expected = DataFrame( + [["b", 100.0], ["c", 200.0]], columns=["C", "B"], index=Index([1, 2], name="A") + ) + tm.assert_frame_equal(result, expected) + + result = df.groupby("A").nth(-1, dropna="any") + expected = DataFrame( + [["a", 50.0], ["d", 150.0]], columns=["C", "B"], index=Index([1, 2], name="A") + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [None, "any", "all"]) +def test_nth_nan_in_grouper(dropna): + # GH 26011 + df = DataFrame( + [[np.nan, 0, 1], ["abc", 2, 3], [np.nan, 4, 5], ["def", 6, 7], [np.nan, 8, 9]], + columns=list("abc"), + ) + result = df.groupby("a").nth(0, dropna=dropna) + expected = DataFrame( + [[2, 3], [6, 7]], columns=list("bc"), index=Index(["abc", "def"], name="a") + ) + + tm.assert_frame_equal(result, expected) + + +def test_first_categorical_and_datetime_data_nat(): + # GH 20520 + df = DataFrame( + { + "group": ["first", "first", "second", "third", "third"], + "time": 5 * [np.datetime64("NaT")], + "categories": Series(["a", "b", "c", "a", "b"], dtype="category"), + } + ) + result = df.groupby("group").first() + expected = DataFrame( + { + "time": 3 * [np.datetime64("NaT")], + "categories": Series(["a", "c", "a"]).astype( + pd.CategoricalDtype(["a", "b", "c"]) + ), + } + ) + expected.index = Index(["first", "second", "third"], name="group") + tm.assert_frame_equal(result, expected) + + +def test_first_multi_key_groupbby_categorical(): + # GH 22512 + df = DataFrame( + { + "A": [1, 1, 1, 2, 2], + "B": [100, 100, 200, 100, 100], + "C": ["apple", "orange", "mango", "mango", "orange"], + "D": ["jupiter", "mercury", "mars", "venus", "venus"], + } + ) + df = df.astype({"D": "category"}) + result = df.groupby(by=["A", "B"]).first() + expected = DataFrame( + { + "C": ["apple", "mango", "mango"], + "D": Series(["jupiter", "mars", "venus"]).astype( + pd.CategoricalDtype(["jupiter", "mars", "mercury", "venus"]) + ), + } + ) + expected.index = MultiIndex.from_tuples( + [(1, 100), (1, 200), (2, 100)], names=["A", "B"] + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("method", ["first", "last", "nth"]) +def test_groupby_last_first_nth_with_none(method, nulls_fixture): + # GH29645 + expected = Series(["y"]) + data = Series( + [nulls_fixture, nulls_fixture, nulls_fixture, "y", nulls_fixture], + index=[0, 0, 0, 0, 0], + ).groupby(level=0) + + if method == "nth": + result = getattr(data, method)(3) + else: + result = getattr(data, method)() + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "arg, expected_rows", + [ + [slice(None, 3, 2), [0, 1, 4, 5]], + [slice(None, -2), [0, 2, 5]], + [[slice(None, 2), slice(-2, None)], [0, 1, 2, 3, 4, 6, 7]], + [[0, 1, slice(-2, None)], [0, 1, 2, 3, 4, 6, 7]], + ], +) +def test_slice(slice_test_df, slice_test_grouped, arg, expected_rows): + # Test slices GH #42947 + + result = slice_test_grouped.nth[arg] + equivalent = slice_test_grouped.nth(arg) + expected = slice_test_df.iloc[expected_rows] + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(equivalent, expected) + + +def test_nth_indexed(slice_test_df, slice_test_grouped): + # Test index notation GH #44688 + + result = slice_test_grouped.nth[0, 1, -2:] + equivalent = slice_test_grouped.nth([0, 1, slice(-2, None)]) + expected = slice_test_df.iloc[[0, 1, 2, 3, 4, 6, 7]] + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(equivalent, expected) + + +def test_invalid_argument(slice_test_grouped): + # Test for error on invalid argument + + with pytest.raises(TypeError, match="Invalid index"): + slice_test_grouped.nth(3.14) + + +def test_negative_step(slice_test_grouped): + # Test for error on negative slice step + + with pytest.raises(ValueError, match="Invalid step"): + slice_test_grouped.nth(slice(None, None, -1)) + + +def test_np_ints(slice_test_df, slice_test_grouped): + # Test np ints work + + result = slice_test_grouped.nth(np.array([0, 1])) + expected = slice_test_df.iloc[[0, 1, 2, 3, 4]] + tm.assert_frame_equal(result, expected) + + +def test_groupby_nth_with_column_axis(): + # GH43926 + df = DataFrame( + [ + [4, 5, 6], + [8, 8, 7], + ], + index=["z", "y"], + columns=["C", "B", "A"], + ) + result = df.groupby(df.iloc[1], axis=1).nth(0) + expected = DataFrame( + [ + [6, 4], + [7, 8], + ], + index=["z", "y"], + columns=[7, 8], + ) + expected.columns.name = "y" + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "start, stop, expected_values, expected_columns", + [ + (None, None, [0, 1, 2, 3, 4], [5, 5, 5, 6, 6]), + (None, 1, [0, 3], [5, 6]), + (None, 9, [0, 1, 2, 3, 4], [5, 5, 5, 6, 6]), + (None, -1, [0, 1, 3], [5, 5, 6]), + (1, None, [1, 2, 4], [5, 5, 6]), + (1, -1, [1], [5]), + (-1, None, [2, 4], [5, 6]), + (-1, 2, [4], [6]), + ], +) +@pytest.mark.parametrize("method", ["call", "index"]) +def test_nth_slices_with_column_axis( + start, stop, expected_values, expected_columns, method +): + df = DataFrame([range(5)], columns=[list("ABCDE")]) + gb = df.groupby([5, 5, 5, 6, 6], axis=1) + result = { + "call": lambda start, stop: gb.nth(slice(start, stop)), + "index": lambda start, stop: gb.nth[start:stop], + }[method](start, stop) + expected = DataFrame([expected_values], columns=expected_columns) + tm.assert_frame_equal(result, expected) + + +def test_head_tail_dropna_true(): + # GH#45089 + df = DataFrame( + [["a", "z"], ["b", np.nan], ["c", np.nan], ["c", np.nan]], columns=["X", "Y"] + ) + expected = DataFrame([["a", "z"]], columns=["X", "Y"]) + + result = df.groupby(["X", "Y"]).head(n=1) + tm.assert_frame_equal(result, expected) + + result = df.groupby(["X", "Y"]).tail(n=1) + tm.assert_frame_equal(result, expected) + + result = df.groupby(["X", "Y"]).nth(n=0).reset_index() + tm.assert_frame_equal(result, expected) + + +def test_head_tail_dropna_false(): + # GH#45089 + df = DataFrame([["a", "z"], ["b", np.nan], ["c", np.nan]], columns=["X", "Y"]) + expected = DataFrame([["a", "z"], ["b", np.nan], ["c", np.nan]], columns=["X", "Y"]) + + result = df.groupby(["X", "Y"], dropna=False).head(n=1) + tm.assert_frame_equal(result, expected) + + result = df.groupby(["X", "Y"], dropna=False).tail(n=1) + tm.assert_frame_equal(result, expected) + + result = df.groupby(["X", "Y"], dropna=False).nth(n=0).reset_index() + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_numba.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_numba.py new file mode 100644 index 0000000..6554993 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_numba.py @@ -0,0 +1,73 @@ +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +@td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore:\n") +# Filter warnings when parallel=True and the function can't be parallelized by Numba +class TestEngine: + def test_cython_vs_numba_frame( + self, sort, nogil, parallel, nopython, numba_supported_reductions + ): + func, kwargs = numba_supported_reductions + df = DataFrame({"a": [3, 2, 3, 2], "b": range(4), "c": range(1, 5)}) + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + gb = df.groupby("a", sort=sort) + result = getattr(gb, func)( + engine="numba", engine_kwargs=engine_kwargs, **kwargs + ) + expected = getattr(gb, func)(**kwargs) + # check_dtype can be removed if GH 44952 is addressed + check_dtype = func != "sum" + tm.assert_frame_equal(result, expected, check_dtype=check_dtype) + + def test_cython_vs_numba_getitem( + self, sort, nogil, parallel, nopython, numba_supported_reductions + ): + func, kwargs = numba_supported_reductions + df = DataFrame({"a": [3, 2, 3, 2], "b": range(4), "c": range(1, 5)}) + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + gb = df.groupby("a", sort=sort)["c"] + result = getattr(gb, func)( + engine="numba", engine_kwargs=engine_kwargs, **kwargs + ) + expected = getattr(gb, func)(**kwargs) + # check_dtype can be removed if GH 44952 is addressed + check_dtype = func != "sum" + tm.assert_series_equal(result, expected, check_dtype=check_dtype) + + def test_cython_vs_numba_series( + self, sort, nogil, parallel, nopython, numba_supported_reductions + ): + func, kwargs = numba_supported_reductions + ser = Series(range(3), index=[1, 2, 1], name="foo") + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + gb = ser.groupby(level=0, sort=sort) + result = getattr(gb, func)( + engine="numba", engine_kwargs=engine_kwargs, **kwargs + ) + expected = getattr(gb, func)(**kwargs) + # check_dtype can be removed if GH 44952 is addressed + check_dtype = func != "sum" + tm.assert_series_equal(result, expected, check_dtype=check_dtype) + + def test_as_index_false_unsupported(self, numba_supported_reductions): + func, kwargs = numba_supported_reductions + df = DataFrame({"a": [3, 2, 3, 2], "b": range(4), "c": range(1, 5)}) + gb = df.groupby("a", as_index=False) + with pytest.raises(NotImplementedError, match="as_index=False"): + getattr(gb, func)(engine="numba", **kwargs) + + def test_axis_1_unsupported(self, numba_supported_reductions): + func, kwargs = numba_supported_reductions + df = DataFrame({"a": [3, 2, 3, 2], "b": range(4), "c": range(1, 5)}) + gb = df.groupby("a", axis=1) + with pytest.raises(NotImplementedError, match="axis=1"): + getattr(gb, func)(engine="numba", **kwargs) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_nunique.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_nunique.py new file mode 100644 index 0000000..6656fd5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_nunique.py @@ -0,0 +1,184 @@ +import datetime as dt +from string import ascii_lowercase + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + NaT, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +@pytest.mark.slow +@pytest.mark.parametrize("n", 10 ** np.arange(2, 6)) +@pytest.mark.parametrize("m", [10, 100, 1000]) +@pytest.mark.parametrize("sort", [False, True]) +@pytest.mark.parametrize("dropna", [False, True]) +def test_series_groupby_nunique(n, m, sort, dropna): + def check_nunique(df, keys, as_index=True): + original_df = df.copy() + gr = df.groupby(keys, as_index=as_index, sort=sort) + left = gr["julie"].nunique(dropna=dropna) + + gr = df.groupby(keys, as_index=as_index, sort=sort) + right = gr["julie"].apply(Series.nunique, dropna=dropna) + if not as_index: + right = right.reset_index(drop=True) + + if as_index: + tm.assert_series_equal(left, right, check_names=False) + else: + tm.assert_frame_equal(left, right, check_names=False) + tm.assert_frame_equal(df, original_df) + + days = date_range("2015-08-23", periods=10) + + frame = DataFrame( + { + "jim": np.random.choice(list(ascii_lowercase), n), + "joe": np.random.choice(days, n), + "julie": np.random.randint(0, m, n), + } + ) + + check_nunique(frame, ["jim"]) + check_nunique(frame, ["jim", "joe"]) + + frame.loc[1::17, "jim"] = None + frame.loc[3::37, "joe"] = None + frame.loc[7::19, "julie"] = None + frame.loc[8::19, "julie"] = None + frame.loc[9::19, "julie"] = None + + check_nunique(frame, ["jim"]) + check_nunique(frame, ["jim", "joe"]) + check_nunique(frame, ["jim"], as_index=False) + check_nunique(frame, ["jim", "joe"], as_index=False) + + +def test_nunique(): + df = DataFrame({"A": list("abbacc"), "B": list("abxacc"), "C": list("abbacx")}) + + expected = DataFrame({"A": list("abc"), "B": [1, 2, 1], "C": [1, 1, 2]}) + result = df.groupby("A", as_index=False).nunique() + tm.assert_frame_equal(result, expected) + + # as_index + expected.index = list("abc") + expected.index.name = "A" + expected = expected.drop(columns="A") + result = df.groupby("A").nunique() + tm.assert_frame_equal(result, expected) + + # with na + result = df.replace({"x": None}).groupby("A").nunique(dropna=False) + tm.assert_frame_equal(result, expected) + + # dropna + expected = DataFrame({"B": [1] * 3, "C": [1] * 3}, index=list("abc")) + expected.index.name = "A" + result = df.replace({"x": None}).groupby("A").nunique() + tm.assert_frame_equal(result, expected) + + +def test_nunique_with_object(): + # GH 11077 + data = DataFrame( + [ + [100, 1, "Alice"], + [200, 2, "Bob"], + [300, 3, "Charlie"], + [-400, 4, "Dan"], + [500, 5, "Edith"], + ], + columns=["amount", "id", "name"], + ) + + result = data.groupby(["id", "amount"])["name"].nunique() + index = MultiIndex.from_arrays([data.id, data.amount]) + expected = Series([1] * 5, name="name", index=index) + tm.assert_series_equal(result, expected) + + +def test_nunique_with_empty_series(): + # GH 12553 + data = Series(name="name", dtype=object) + result = data.groupby(level=0).nunique() + expected = Series(name="name", dtype="int64") + tm.assert_series_equal(result, expected) + + +def test_nunique_with_timegrouper(): + # GH 13453 + test = DataFrame( + { + "time": [ + Timestamp("2016-06-28 09:35:35"), + Timestamp("2016-06-28 16:09:30"), + Timestamp("2016-06-28 16:46:28"), + ], + "data": ["1", "2", "3"], + } + ).set_index("time") + result = test.groupby(pd.Grouper(freq="h"))["data"].nunique() + expected = test.groupby(pd.Grouper(freq="h"))["data"].apply(Series.nunique) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "key, data, dropna, expected", + [ + ( + ["x", "x", "x"], + [Timestamp("2019-01-01"), NaT, Timestamp("2019-01-01")], + True, + Series([1], index=pd.Index(["x"], name="key"), name="data"), + ), + ( + ["x", "x", "x"], + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], + True, + Series([1], index=pd.Index(["x"], name="key"), name="data"), + ), + ( + ["x", "x", "x", "y", "y"], + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], + False, + Series([2, 2], index=pd.Index(["x", "y"], name="key"), name="data"), + ), + ( + ["x", "x", "x", "x", "y"], + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], + False, + Series([2, 1], index=pd.Index(["x", "y"], name="key"), name="data"), + ), + ], +) +def test_nunique_with_NaT(key, data, dropna, expected): + # GH 27951 + df = DataFrame({"key": key, "data": data}) + result = df.groupby(["key"])["data"].nunique(dropna=dropna) + tm.assert_series_equal(result, expected) + + +def test_nunique_preserves_column_level_names(): + # GH 23222 + test = DataFrame([1, 2, 2], columns=pd.Index(["A"], name="level_0")) + result = test.groupby([0, 0, 0]).nunique() + expected = DataFrame([2], columns=test.columns) + tm.assert_frame_equal(result, expected) + + +def test_nunique_transform_with_datetime(): + # GH 35109 - transform with nunique on datetimes results in integers + df = DataFrame(date_range("2008-12-31", "2009-01-02"), columns=["date"]) + result = df.groupby([0, 0, 1])["date"].transform("nunique") + expected = Series([2, 2, 1], name="date") + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_pipe.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_pipe.py new file mode 100644 index 0000000..42bd6a8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_pipe.py @@ -0,0 +1,82 @@ +import numpy as np + +import pandas as pd +from pandas import ( + DataFrame, + Index, +) +import pandas._testing as tm +from pandas.core.api import Int64Index + + +def test_pipe(): + # Test the pipe method of DataFrameGroupBy. + # Issue #17871 + + random_state = np.random.RandomState(1234567890) + + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": random_state.randn(8), + "C": random_state.randn(8), + } + ) + + def f(dfgb): + return dfgb.B.max() - dfgb.C.min().min() + + def square(srs): + return srs ** 2 + + # Note that the transformations are + # GroupBy -> Series + # Series -> Series + # This then chains the GroupBy.pipe and the + # NDFrame.pipe methods + result = df.groupby("A").pipe(f).pipe(square) + + index = Index(["bar", "foo"], dtype="object", name="A") + expected = pd.Series([8.99110003361, 8.17516964785], name="B", index=index) + + tm.assert_series_equal(expected, result) + + +def test_pipe_args(): + # Test passing args to the pipe method of DataFrameGroupBy. + # Issue #17871 + + df = DataFrame( + { + "group": ["A", "A", "B", "B", "C"], + "x": [1.0, 2.0, 3.0, 2.0, 5.0], + "y": [10.0, 100.0, 1000.0, -100.0, -1000.0], + } + ) + + def f(dfgb, arg1): + return dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False).groupby( + dfgb.grouper + ) + + def g(dfgb, arg2): + return dfgb.sum() / dfgb.sum().sum() + arg2 + + def h(df, arg3): + return df.x + df.y - arg3 + + result = df.groupby("group").pipe(f, 0).pipe(g, 10).pipe(h, 100) + + # Assert the results here + index = Index(["A", "B", "C"], name="group") + expected = pd.Series([-79.5160891089, -78.4839108911, -80], index=index) + + tm.assert_series_equal(expected, result) + + # test SeriesGroupby.pipe + ser = pd.Series([1, 1, 2, 2, 3, 3]) + result = ser.groupby(ser).pipe(lambda grp: grp.sum() * grp.count()) + + expected = pd.Series([4, 8, 12], index=Int64Index([1, 2, 3])) + + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_quantile.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_quantile.py new file mode 100644 index 0000000..1badc4a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_quantile.py @@ -0,0 +1,331 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] +) +@pytest.mark.parametrize( + "a_vals,b_vals", + [ + # Ints + ([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]), + ([1, 2, 3, 4], [4, 3, 2, 1]), + ([1, 2, 3, 4, 5], [4, 3, 2, 1]), + # Floats + ([1.0, 2.0, 3.0, 4.0, 5.0], [5.0, 4.0, 3.0, 2.0, 1.0]), + # Missing data + ([1.0, np.nan, 3.0, np.nan, 5.0], [5.0, np.nan, 3.0, np.nan, 1.0]), + ([np.nan, 4.0, np.nan, 2.0, np.nan], [np.nan, 4.0, np.nan, 2.0, np.nan]), + # Timestamps + ( + list(pd.date_range("1/1/18", freq="D", periods=5)), + list(pd.date_range("1/1/18", freq="D", periods=5))[::-1], + ), + # All NA + ([np.nan] * 5, [np.nan] * 5), + ], +) +@pytest.mark.parametrize("q", [0, 0.25, 0.5, 0.75, 1]) +def test_quantile(interpolation, a_vals, b_vals, q): + if interpolation == "nearest" and q == 0.5 and b_vals == [4, 3, 2, 1]: + pytest.skip( + "Unclear numpy expectation for nearest result with equidistant data" + ) + + a_expected = pd.Series(a_vals).quantile(q, interpolation=interpolation) + b_expected = pd.Series(b_vals).quantile(q, interpolation=interpolation) + + df = DataFrame( + {"key": ["a"] * len(a_vals) + ["b"] * len(b_vals), "val": a_vals + b_vals} + ) + + expected = DataFrame( + [a_expected, b_expected], columns=["val"], index=Index(["a", "b"], name="key") + ) + result = df.groupby("key").quantile(q, interpolation=interpolation) + + tm.assert_frame_equal(result, expected) + + +def test_quantile_array(): + # https://github.com/pandas-dev/pandas/issues/27526 + df = DataFrame({"A": [0, 1, 2, 3, 4]}) + result = df.groupby([0, 0, 1, 1, 1]).quantile([0.25]) + + index = pd.MultiIndex.from_product([[0, 1], [0.25]]) + expected = DataFrame({"A": [0.25, 2.50]}, index=index) + tm.assert_frame_equal(result, expected) + + df = DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]}) + index = pd.MultiIndex.from_product([[0, 1], [0.25, 0.75]]) + + result = df.groupby([0, 0, 1, 1]).quantile([0.25, 0.75]) + expected = DataFrame( + {"A": [0.25, 0.75, 2.25, 2.75], "B": [4.25, 4.75, 6.25, 6.75]}, index=index + ) + tm.assert_frame_equal(result, expected) + + +def test_quantile_array2(): + # https://github.com/pandas-dev/pandas/pull/28085#issuecomment-524066959 + df = DataFrame( + np.random.RandomState(0).randint(0, 5, size=(10, 3)), columns=list("ABC") + ) + result = df.groupby("A").quantile([0.3, 0.7]) + expected = DataFrame( + { + "B": [0.9, 2.1, 2.2, 3.4, 1.6, 2.4, 2.3, 2.7, 0.0, 0.0], + "C": [1.2, 2.8, 1.8, 3.0, 0.0, 0.0, 1.9, 3.1, 3.0, 3.0], + }, + index=pd.MultiIndex.from_product( + [[0, 1, 2, 3, 4], [0.3, 0.7]], names=["A", None] + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_quantile_array_no_sort(): + df = DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]}) + result = df.groupby([1, 0, 1], sort=False).quantile([0.25, 0.5, 0.75]) + expected = DataFrame( + {"A": [0.5, 1.0, 1.5, 1.0, 1.0, 1.0], "B": [3.5, 4.0, 4.5, 4.0, 4.0, 4.0]}, + index=pd.MultiIndex.from_product([[1, 0], [0.25, 0.5, 0.75]]), + ) + tm.assert_frame_equal(result, expected) + + result = df.groupby([1, 0, 1], sort=False).quantile([0.75, 0.25]) + expected = DataFrame( + {"A": [1.5, 0.5, 1.0, 1.0], "B": [4.5, 3.5, 4.0, 4.0]}, + index=pd.MultiIndex.from_product([[1, 0], [0.75, 0.25]]), + ) + tm.assert_frame_equal(result, expected) + + +def test_quantile_array_multiple_levels(): + df = DataFrame( + {"A": [0, 1, 2], "B": [3, 4, 5], "c": ["a", "a", "a"], "d": ["a", "a", "b"]} + ) + result = df.groupby(["c", "d"]).quantile([0.25, 0.75]) + index = pd.MultiIndex.from_tuples( + [("a", "a", 0.25), ("a", "a", 0.75), ("a", "b", 0.25), ("a", "b", 0.75)], + names=["c", "d", None], + ) + expected = DataFrame( + {"A": [0.25, 0.75, 2.0, 2.0], "B": [3.25, 3.75, 5.0, 5.0]}, index=index + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("frame_size", [(2, 3), (100, 10)]) +@pytest.mark.parametrize("groupby", [[0], [0, 1]]) +@pytest.mark.parametrize("q", [[0.5, 0.6]]) +def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby, q): + # GH30289 + nrow, ncol = frame_size + df = DataFrame(np.array([ncol * [_ % 4] for _ in range(nrow)]), columns=range(ncol)) + + idx_levels = [list(range(min(nrow, 4)))] * len(groupby) + [q] + idx_codes = [[x for x in range(min(nrow, 4)) for _ in q]] * len(groupby) + [ + list(range(len(q))) * min(nrow, 4) + ] + expected_index = pd.MultiIndex( + levels=idx_levels, codes=idx_codes, names=groupby + [None] + ) + expected_values = [ + [float(x)] * (ncol - len(groupby)) for x in range(min(nrow, 4)) for _ in q + ] + expected_columns = [x for x in range(ncol) if x not in groupby] + expected = DataFrame( + expected_values, index=expected_index, columns=expected_columns + ) + result = df.groupby(groupby).quantile(q) + + tm.assert_frame_equal(result, expected) + + +def test_quantile_raises(): + df = DataFrame([["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]) + + with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"): + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid columns" + ): + df.groupby("key").quantile() + + +def test_quantile_out_of_bounds_q_raises(): + # https://github.com/pandas-dev/pandas/issues/27470 + df = DataFrame({"a": [0, 0, 0, 1, 1, 1], "b": range(6)}) + g = df.groupby([0, 0, 0, 1, 1, 1]) + with pytest.raises(ValueError, match="Got '50.0' instead"): + g.quantile(50) + + with pytest.raises(ValueError, match="Got '-1.0' instead"): + g.quantile(-1) + + +def test_quantile_missing_group_values_no_segfaults(): + # GH 28662 + data = np.array([1.0, np.nan, 1.0]) + df = DataFrame({"key": data, "val": range(3)}) + + # Random segfaults; would have been guaranteed in loop + grp = df.groupby("key") + for _ in range(100): + grp.quantile() + + +@pytest.mark.parametrize( + "key, val, expected_key, expected_val", + [ + ([1.0, np.nan, 3.0, np.nan], range(4), [1.0, 3.0], [0.0, 2.0]), + ([1.0, np.nan, 2.0, 2.0], range(4), [1.0, 2.0], [0.0, 2.5]), + (["a", "b", "b", np.nan], range(4), ["a", "b"], [0, 1.5]), + ([0], [42], [0], [42.0]), + ([], [], np.array([], dtype="float64"), np.array([], dtype="float64")), + ], +) +def test_quantile_missing_group_values_correct_results( + key, val, expected_key, expected_val +): + # GH 28662, GH 33200, GH 33569 + df = DataFrame({"key": key, "val": val}) + + expected = DataFrame( + expected_val, index=Index(expected_key, name="key"), columns=["val"] + ) + + grp = df.groupby("key") + + result = grp.quantile(0.5) + tm.assert_frame_equal(result, expected) + + result = grp.quantile() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + pd.array([1, 0, None] * 2, dtype="Int64"), + pd.array([True, False, None] * 2, dtype="boolean"), + ], +) +@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]]) +def test_groupby_quantile_nullable_array(values, q): + # https://github.com/pandas-dev/pandas/issues/33136 + df = DataFrame({"a": ["x"] * 3 + ["y"] * 3, "b": values}) + result = df.groupby("a")["b"].quantile(q) + + if isinstance(q, list): + idx = pd.MultiIndex.from_product((["x", "y"], q), names=["a", None]) + true_quantiles = [0.0, 0.5, 1.0] + else: + idx = Index(["x", "y"], name="a") + true_quantiles = [0.5] + + expected = pd.Series(true_quantiles * 2, index=idx, name="b") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]]) +def test_groupby_quantile_skips_invalid_dtype(q): + df = DataFrame({"a": [1], "b": [2.0], "c": ["x"]}) + + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + result = df.groupby("a").quantile(q) + + expected = df.groupby("a")[["b"]].quantile(q) + tm.assert_frame_equal(result, expected) + + +def test_groupby_quantile_NA_float(any_float_dtype): + # GH#42849 + df = DataFrame({"x": [1, 1], "y": [0.2, np.nan]}, dtype=any_float_dtype) + result = df.groupby("x")["y"].quantile(0.5) + exp_index = Index([1.0], dtype=any_float_dtype, name="x") + expected = pd.Series([0.2], dtype=float, index=exp_index, name="y") + tm.assert_series_equal(expected, result) + + result = df.groupby("x")["y"].quantile([0.5, 0.75]) + expected = pd.Series( + [0.2] * 2, + index=pd.MultiIndex.from_product((exp_index, [0.5, 0.75]), names=["x", None]), + name="y", + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_quantile_NA_int(any_int_ea_dtype): + # GH#42849 + df = DataFrame({"x": [1, 1], "y": [2, 5]}, dtype=any_int_ea_dtype) + result = df.groupby("x")["y"].quantile(0.5) + expected = pd.Series( + [3.5], dtype=float, index=Index([1], name="x", dtype=any_int_ea_dtype), name="y" + ) + tm.assert_series_equal(expected, result) + + result = df.groupby("x").quantile(0.5) + expected = DataFrame({"y": 3.5}, index=Index([1], name="x", dtype=any_int_ea_dtype)) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["Float64", "Float32"]) +def test_groupby_quantile_allNA_column(dtype): + # GH#42849 + df = DataFrame({"x": [1, 1], "y": [pd.NA] * 2}, dtype=dtype) + result = df.groupby("x")["y"].quantile(0.5) + expected = pd.Series( + [np.nan], dtype=float, index=Index([1.0], dtype=dtype), name="y" + ) + expected.index.name = "x" + tm.assert_series_equal(expected, result) + + +def test_groupby_timedelta_quantile(): + # GH: 29485 + df = DataFrame( + {"value": pd.to_timedelta(np.arange(4), unit="s"), "group": [1, 1, 2, 2]} + ) + result = df.groupby("group").quantile(0.99) + expected = DataFrame( + { + "value": [ + pd.Timedelta("0 days 00:00:00.990000"), + pd.Timedelta("0 days 00:00:02.990000"), + ] + }, + index=Index([1, 2], name="group"), + ) + tm.assert_frame_equal(result, expected) + + +def test_columns_groupby_quantile(): + # GH 33795 + df = DataFrame( + np.arange(12).reshape(3, -1), + index=list("XYZ"), + columns=pd.Series(list("ABAB"), name="col"), + ) + result = df.groupby("col", axis=1).quantile(q=[0.8, 0.2]) + expected = DataFrame( + [ + [1.6, 0.4, 2.6, 1.4], + [5.6, 4.4, 6.6, 5.4], + [9.6, 8.4, 10.6, 9.4], + ], + index=list("XYZ"), + columns=pd.MultiIndex.from_tuples( + [("A", 0.8), ("A", 0.2), ("B", 0.8), ("B", 0.2)], names=["col", None] + ), + ) + + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_rank.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_rank.py new file mode 100644 index 0000000..83b8d5c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_rank.py @@ -0,0 +1,663 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + NaT, + Series, + concat, +) +import pandas._testing as tm + + +def test_rank_apply(): + lev1 = tm.rands_array(10, 100) + lev2 = tm.rands_array(10, 130) + lab1 = np.random.randint(0, 100, size=500) + lab2 = np.random.randint(0, 130, size=500) + + df = DataFrame( + { + "value": np.random.randn(500), + "key1": lev1.take(lab1), + "key2": lev2.take(lab2), + } + ) + + result = df.groupby(["key1", "key2"]).value.rank() + + expected = [piece.value.rank() for key, piece in df.groupby(["key1", "key2"])] + expected = concat(expected, axis=0) + expected = expected.reindex(result.index) + tm.assert_series_equal(result, expected) + + result = df.groupby(["key1", "key2"]).value.rank(pct=True) + + expected = [ + piece.value.rank(pct=True) for key, piece in df.groupby(["key1", "key2"]) + ] + expected = concat(expected, axis=0) + expected = expected.reindex(result.index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("grps", [["qux"], ["qux", "quux"]]) +@pytest.mark.parametrize( + "vals", + [ + np.array([2, 2, 8, 2, 6], dtype=dtype) + for dtype in ["i8", "i4", "i2", "i1", "u8", "u4", "u2", "u1", "f8", "f4", "f2"] + ] + + [ + [ + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-08"), + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-06"), + ], + [ + pd.Timestamp("2018-01-02", tz="US/Pacific"), + pd.Timestamp("2018-01-02", tz="US/Pacific"), + pd.Timestamp("2018-01-08", tz="US/Pacific"), + pd.Timestamp("2018-01-02", tz="US/Pacific"), + pd.Timestamp("2018-01-06", tz="US/Pacific"), + ], + [ + pd.Timestamp("2018-01-02") - pd.Timestamp(0), + pd.Timestamp("2018-01-02") - pd.Timestamp(0), + pd.Timestamp("2018-01-08") - pd.Timestamp(0), + pd.Timestamp("2018-01-02") - pd.Timestamp(0), + pd.Timestamp("2018-01-06") - pd.Timestamp(0), + ], + [ + pd.Timestamp("2018-01-02").to_period("D"), + pd.Timestamp("2018-01-02").to_period("D"), + pd.Timestamp("2018-01-08").to_period("D"), + pd.Timestamp("2018-01-02").to_period("D"), + pd.Timestamp("2018-01-06").to_period("D"), + ], + ], + ids=lambda x: type(x[0]), +) +@pytest.mark.parametrize( + "ties_method,ascending,pct,exp", + [ + ("average", True, False, [2.0, 2.0, 5.0, 2.0, 4.0]), + ("average", True, True, [0.4, 0.4, 1.0, 0.4, 0.8]), + ("average", False, False, [4.0, 4.0, 1.0, 4.0, 2.0]), + ("average", False, True, [0.8, 0.8, 0.2, 0.8, 0.4]), + ("min", True, False, [1.0, 1.0, 5.0, 1.0, 4.0]), + ("min", True, True, [0.2, 0.2, 1.0, 0.2, 0.8]), + ("min", False, False, [3.0, 3.0, 1.0, 3.0, 2.0]), + ("min", False, True, [0.6, 0.6, 0.2, 0.6, 0.4]), + ("max", True, False, [3.0, 3.0, 5.0, 3.0, 4.0]), + ("max", True, True, [0.6, 0.6, 1.0, 0.6, 0.8]), + ("max", False, False, [5.0, 5.0, 1.0, 5.0, 2.0]), + ("max", False, True, [1.0, 1.0, 0.2, 1.0, 0.4]), + ("first", True, False, [1.0, 2.0, 5.0, 3.0, 4.0]), + ("first", True, True, [0.2, 0.4, 1.0, 0.6, 0.8]), + ("first", False, False, [3.0, 4.0, 1.0, 5.0, 2.0]), + ("first", False, True, [0.6, 0.8, 0.2, 1.0, 0.4]), + ("dense", True, False, [1.0, 1.0, 3.0, 1.0, 2.0]), + ("dense", True, True, [1.0 / 3.0, 1.0 / 3.0, 3.0 / 3.0, 1.0 / 3.0, 2.0 / 3.0]), + ("dense", False, False, [3.0, 3.0, 1.0, 3.0, 2.0]), + ("dense", False, True, [3.0 / 3.0, 3.0 / 3.0, 1.0 / 3.0, 3.0 / 3.0, 2.0 / 3.0]), + ], +) +def test_rank_args(grps, vals, ties_method, ascending, pct, exp): + key = np.repeat(grps, len(vals)) + + orig_vals = vals + vals = list(vals) * len(grps) + if isinstance(orig_vals, np.ndarray): + vals = np.array(vals, dtype=orig_vals.dtype) + + df = DataFrame({"key": key, "val": vals}) + result = df.groupby("key").rank(method=ties_method, ascending=ascending, pct=pct) + + exp_df = DataFrame(exp * len(grps), columns=["val"]) + tm.assert_frame_equal(result, exp_df) + + +@pytest.mark.parametrize("grps", [["qux"], ["qux", "quux"]]) +@pytest.mark.parametrize( + "vals", [[-np.inf, -np.inf, np.nan, 1.0, np.nan, np.inf, np.inf]] +) +@pytest.mark.parametrize( + "ties_method,ascending,na_option,exp", + [ + ("average", True, "keep", [1.5, 1.5, np.nan, 3, np.nan, 4.5, 4.5]), + ("average", True, "top", [3.5, 3.5, 1.5, 5.0, 1.5, 6.5, 6.5]), + ("average", True, "bottom", [1.5, 1.5, 6.5, 3.0, 6.5, 4.5, 4.5]), + ("average", False, "keep", [4.5, 4.5, np.nan, 3, np.nan, 1.5, 1.5]), + ("average", False, "top", [6.5, 6.5, 1.5, 5.0, 1.5, 3.5, 3.5]), + ("average", False, "bottom", [4.5, 4.5, 6.5, 3.0, 6.5, 1.5, 1.5]), + ("min", True, "keep", [1.0, 1.0, np.nan, 3.0, np.nan, 4.0, 4.0]), + ("min", True, "top", [3.0, 3.0, 1.0, 5.0, 1.0, 6.0, 6.0]), + ("min", True, "bottom", [1.0, 1.0, 6.0, 3.0, 6.0, 4.0, 4.0]), + ("min", False, "keep", [4.0, 4.0, np.nan, 3.0, np.nan, 1.0, 1.0]), + ("min", False, "top", [6.0, 6.0, 1.0, 5.0, 1.0, 3.0, 3.0]), + ("min", False, "bottom", [4.0, 4.0, 6.0, 3.0, 6.0, 1.0, 1.0]), + ("max", True, "keep", [2.0, 2.0, np.nan, 3.0, np.nan, 5.0, 5.0]), + ("max", True, "top", [4.0, 4.0, 2.0, 5.0, 2.0, 7.0, 7.0]), + ("max", True, "bottom", [2.0, 2.0, 7.0, 3.0, 7.0, 5.0, 5.0]), + ("max", False, "keep", [5.0, 5.0, np.nan, 3.0, np.nan, 2.0, 2.0]), + ("max", False, "top", [7.0, 7.0, 2.0, 5.0, 2.0, 4.0, 4.0]), + ("max", False, "bottom", [5.0, 5.0, 7.0, 3.0, 7.0, 2.0, 2.0]), + ("first", True, "keep", [1.0, 2.0, np.nan, 3.0, np.nan, 4.0, 5.0]), + ("first", True, "top", [3.0, 4.0, 1.0, 5.0, 2.0, 6.0, 7.0]), + ("first", True, "bottom", [1.0, 2.0, 6.0, 3.0, 7.0, 4.0, 5.0]), + ("first", False, "keep", [4.0, 5.0, np.nan, 3.0, np.nan, 1.0, 2.0]), + ("first", False, "top", [6.0, 7.0, 1.0, 5.0, 2.0, 3.0, 4.0]), + ("first", False, "bottom", [4.0, 5.0, 6.0, 3.0, 7.0, 1.0, 2.0]), + ("dense", True, "keep", [1.0, 1.0, np.nan, 2.0, np.nan, 3.0, 3.0]), + ("dense", True, "top", [2.0, 2.0, 1.0, 3.0, 1.0, 4.0, 4.0]), + ("dense", True, "bottom", [1.0, 1.0, 4.0, 2.0, 4.0, 3.0, 3.0]), + ("dense", False, "keep", [3.0, 3.0, np.nan, 2.0, np.nan, 1.0, 1.0]), + ("dense", False, "top", [4.0, 4.0, 1.0, 3.0, 1.0, 2.0, 2.0]), + ("dense", False, "bottom", [3.0, 3.0, 4.0, 2.0, 4.0, 1.0, 1.0]), + ], +) +def test_infs_n_nans(grps, vals, ties_method, ascending, na_option, exp): + # GH 20561 + key = np.repeat(grps, len(vals)) + vals = vals * len(grps) + df = DataFrame({"key": key, "val": vals}) + result = df.groupby("key").rank( + method=ties_method, ascending=ascending, na_option=na_option + ) + exp_df = DataFrame(exp * len(grps), columns=["val"]) + tm.assert_frame_equal(result, exp_df) + + +@pytest.mark.parametrize("grps", [["qux"], ["qux", "quux"]]) +@pytest.mark.parametrize( + "vals", + [ + np.array([2, 2, np.nan, 8, 2, 6, np.nan, np.nan], dtype=dtype) + for dtype in ["f8", "f4", "f2"] + ] + + [ + [ + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-02"), + np.nan, + pd.Timestamp("2018-01-08"), + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-06"), + np.nan, + np.nan, + ], + [ + pd.Timestamp("2018-01-02", tz="US/Pacific"), + pd.Timestamp("2018-01-02", tz="US/Pacific"), + np.nan, + pd.Timestamp("2018-01-08", tz="US/Pacific"), + pd.Timestamp("2018-01-02", tz="US/Pacific"), + pd.Timestamp("2018-01-06", tz="US/Pacific"), + np.nan, + np.nan, + ], + [ + pd.Timestamp("2018-01-02") - pd.Timestamp(0), + pd.Timestamp("2018-01-02") - pd.Timestamp(0), + np.nan, + pd.Timestamp("2018-01-08") - pd.Timestamp(0), + pd.Timestamp("2018-01-02") - pd.Timestamp(0), + pd.Timestamp("2018-01-06") - pd.Timestamp(0), + np.nan, + np.nan, + ], + [ + pd.Timestamp("2018-01-02").to_period("D"), + pd.Timestamp("2018-01-02").to_period("D"), + np.nan, + pd.Timestamp("2018-01-08").to_period("D"), + pd.Timestamp("2018-01-02").to_period("D"), + pd.Timestamp("2018-01-06").to_period("D"), + np.nan, + np.nan, + ], + ], + ids=lambda x: type(x[0]), +) +@pytest.mark.parametrize( + "ties_method,ascending,na_option,pct,exp", + [ + ( + "average", + True, + "keep", + False, + [2.0, 2.0, np.nan, 5.0, 2.0, 4.0, np.nan, np.nan], + ), + ( + "average", + True, + "keep", + True, + [0.4, 0.4, np.nan, 1.0, 0.4, 0.8, np.nan, np.nan], + ), + ( + "average", + False, + "keep", + False, + [4.0, 4.0, np.nan, 1.0, 4.0, 2.0, np.nan, np.nan], + ), + ( + "average", + False, + "keep", + True, + [0.8, 0.8, np.nan, 0.2, 0.8, 0.4, np.nan, np.nan], + ), + ("min", True, "keep", False, [1.0, 1.0, np.nan, 5.0, 1.0, 4.0, np.nan, np.nan]), + ("min", True, "keep", True, [0.2, 0.2, np.nan, 1.0, 0.2, 0.8, np.nan, np.nan]), + ( + "min", + False, + "keep", + False, + [3.0, 3.0, np.nan, 1.0, 3.0, 2.0, np.nan, np.nan], + ), + ("min", False, "keep", True, [0.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]), + ("max", True, "keep", False, [3.0, 3.0, np.nan, 5.0, 3.0, 4.0, np.nan, np.nan]), + ("max", True, "keep", True, [0.6, 0.6, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]), + ( + "max", + False, + "keep", + False, + [5.0, 5.0, np.nan, 1.0, 5.0, 2.0, np.nan, np.nan], + ), + ("max", False, "keep", True, [1.0, 1.0, np.nan, 0.2, 1.0, 0.4, np.nan, np.nan]), + ( + "first", + True, + "keep", + False, + [1.0, 2.0, np.nan, 5.0, 3.0, 4.0, np.nan, np.nan], + ), + ( + "first", + True, + "keep", + True, + [0.2, 0.4, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan], + ), + ( + "first", + False, + "keep", + False, + [3.0, 4.0, np.nan, 1.0, 5.0, 2.0, np.nan, np.nan], + ), + ( + "first", + False, + "keep", + True, + [0.6, 0.8, np.nan, 0.2, 1.0, 0.4, np.nan, np.nan], + ), + ( + "dense", + True, + "keep", + False, + [1.0, 1.0, np.nan, 3.0, 1.0, 2.0, np.nan, np.nan], + ), + ( + "dense", + True, + "keep", + True, + [ + 1.0 / 3.0, + 1.0 / 3.0, + np.nan, + 3.0 / 3.0, + 1.0 / 3.0, + 2.0 / 3.0, + np.nan, + np.nan, + ], + ), + ( + "dense", + False, + "keep", + False, + [3.0, 3.0, np.nan, 1.0, 3.0, 2.0, np.nan, np.nan], + ), + ( + "dense", + False, + "keep", + True, + [ + 3.0 / 3.0, + 3.0 / 3.0, + np.nan, + 1.0 / 3.0, + 3.0 / 3.0, + 2.0 / 3.0, + np.nan, + np.nan, + ], + ), + ("average", True, "bottom", False, [2.0, 2.0, 7.0, 5.0, 2.0, 4.0, 7.0, 7.0]), + ( + "average", + True, + "bottom", + True, + [0.25, 0.25, 0.875, 0.625, 0.25, 0.5, 0.875, 0.875], + ), + ("average", False, "bottom", False, [4.0, 4.0, 7.0, 1.0, 4.0, 2.0, 7.0, 7.0]), + ( + "average", + False, + "bottom", + True, + [0.5, 0.5, 0.875, 0.125, 0.5, 0.25, 0.875, 0.875], + ), + ("min", True, "bottom", False, [1.0, 1.0, 6.0, 5.0, 1.0, 4.0, 6.0, 6.0]), + ( + "min", + True, + "bottom", + True, + [0.125, 0.125, 0.75, 0.625, 0.125, 0.5, 0.75, 0.75], + ), + ("min", False, "bottom", False, [3.0, 3.0, 6.0, 1.0, 3.0, 2.0, 6.0, 6.0]), + ( + "min", + False, + "bottom", + True, + [0.375, 0.375, 0.75, 0.125, 0.375, 0.25, 0.75, 0.75], + ), + ("max", True, "bottom", False, [3.0, 3.0, 8.0, 5.0, 3.0, 4.0, 8.0, 8.0]), + ("max", True, "bottom", True, [0.375, 0.375, 1.0, 0.625, 0.375, 0.5, 1.0, 1.0]), + ("max", False, "bottom", False, [5.0, 5.0, 8.0, 1.0, 5.0, 2.0, 8.0, 8.0]), + ( + "max", + False, + "bottom", + True, + [0.625, 0.625, 1.0, 0.125, 0.625, 0.25, 1.0, 1.0], + ), + ("first", True, "bottom", False, [1.0, 2.0, 6.0, 5.0, 3.0, 4.0, 7.0, 8.0]), + ( + "first", + True, + "bottom", + True, + [0.125, 0.25, 0.75, 0.625, 0.375, 0.5, 0.875, 1.0], + ), + ("first", False, "bottom", False, [3.0, 4.0, 6.0, 1.0, 5.0, 2.0, 7.0, 8.0]), + ( + "first", + False, + "bottom", + True, + [0.375, 0.5, 0.75, 0.125, 0.625, 0.25, 0.875, 1.0], + ), + ("dense", True, "bottom", False, [1.0, 1.0, 4.0, 3.0, 1.0, 2.0, 4.0, 4.0]), + ("dense", True, "bottom", True, [0.25, 0.25, 1.0, 0.75, 0.25, 0.5, 1.0, 1.0]), + ("dense", False, "bottom", False, [3.0, 3.0, 4.0, 1.0, 3.0, 2.0, 4.0, 4.0]), + ("dense", False, "bottom", True, [0.75, 0.75, 1.0, 0.25, 0.75, 0.5, 1.0, 1.0]), + ], +) +def test_rank_args_missing(grps, vals, ties_method, ascending, na_option, pct, exp): + key = np.repeat(grps, len(vals)) + + orig_vals = vals + vals = list(vals) * len(grps) + if isinstance(orig_vals, np.ndarray): + vals = np.array(vals, dtype=orig_vals.dtype) + + df = DataFrame({"key": key, "val": vals}) + result = df.groupby("key").rank( + method=ties_method, ascending=ascending, na_option=na_option, pct=pct + ) + + exp_df = DataFrame(exp * len(grps), columns=["val"]) + tm.assert_frame_equal(result, exp_df) + + +@pytest.mark.parametrize( + "pct,exp", [(False, [3.0, 3.0, 3.0, 3.0, 3.0]), (True, [0.6, 0.6, 0.6, 0.6, 0.6])] +) +def test_rank_resets_each_group(pct, exp): + df = DataFrame( + {"key": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"], "val": [1] * 10} + ) + result = df.groupby("key").rank(pct=pct) + exp_df = DataFrame(exp * 2, columns=["val"]) + tm.assert_frame_equal(result, exp_df) + + +@pytest.mark.parametrize( + "dtype", ["int64", "int32", "uint64", "uint32", "float64", "float32"] +) +@pytest.mark.parametrize("upper", [True, False]) +def test_rank_avg_even_vals(dtype, upper): + if upper: + # use IntegerDtype/FloatingDtype + dtype = dtype[0].upper() + dtype[1:] + dtype = dtype.replace("Ui", "UI") + df = DataFrame({"key": ["a"] * 4, "val": [1] * 4}) + df["val"] = df["val"].astype(dtype) + assert df["val"].dtype == dtype + + result = df.groupby("key").rank() + exp_df = DataFrame([2.5, 2.5, 2.5, 2.5], columns=["val"]) + tm.assert_frame_equal(result, exp_df) + + +@pytest.mark.parametrize("ties_method", ["average", "min", "max", "first", "dense"]) +@pytest.mark.parametrize("ascending", [True, False]) +@pytest.mark.parametrize("na_option", ["keep", "top", "bottom"]) +@pytest.mark.parametrize("pct", [True, False]) +@pytest.mark.parametrize( + "vals", [["bar", "bar", "foo", "bar", "baz"], ["bar", np.nan, "foo", np.nan, "baz"]] +) +def test_rank_object_dtype(ties_method, ascending, na_option, pct, vals): + df = DataFrame({"key": ["foo"] * 5, "val": vals}) + mask = df["val"].isna() + + gb = df.groupby("key") + res = gb.rank(method=ties_method, ascending=ascending, na_option=na_option, pct=pct) + + # construct our expected by using numeric values with the same ordering + if mask.any(): + df2 = DataFrame({"key": ["foo"] * 5, "val": [0, np.nan, 2, np.nan, 1]}) + else: + df2 = DataFrame({"key": ["foo"] * 5, "val": [0, 0, 2, 0, 1]}) + + gb2 = df2.groupby("key") + alt = gb2.rank( + method=ties_method, ascending=ascending, na_option=na_option, pct=pct + ) + + tm.assert_frame_equal(res, alt) + + +@pytest.mark.parametrize("na_option", [True, "bad", 1]) +@pytest.mark.parametrize("ties_method", ["average", "min", "max", "first", "dense"]) +@pytest.mark.parametrize("ascending", [True, False]) +@pytest.mark.parametrize("pct", [True, False]) +@pytest.mark.parametrize( + "vals", + [ + ["bar", "bar", "foo", "bar", "baz"], + ["bar", np.nan, "foo", np.nan, "baz"], + [1, np.nan, 2, np.nan, 3], + ], +) +def test_rank_naoption_raises(ties_method, ascending, na_option, pct, vals): + df = DataFrame({"key": ["foo"] * 5, "val": vals}) + msg = "na_option must be one of 'keep', 'top', or 'bottom'" + + with pytest.raises(ValueError, match=msg): + df.groupby("key").rank( + method=ties_method, ascending=ascending, na_option=na_option, pct=pct + ) + + +def test_rank_empty_group(): + # see gh-22519 + column = "A" + df = DataFrame({"A": [0, 1, 0], "B": [1.0, np.nan, 2.0]}) + + result = df.groupby(column).B.rank(pct=True) + expected = Series([0.5, np.nan, 1.0], name="B") + tm.assert_series_equal(result, expected) + + result = df.groupby(column).rank(pct=True) + expected = DataFrame({"B": [0.5, np.nan, 1.0]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "input_key,input_value,output_value", + [ + ([1, 2], [1, 1], [1.0, 1.0]), + ([1, 1, 2, 2], [1, 2, 1, 2], [0.5, 1.0, 0.5, 1.0]), + ([1, 1, 2, 2], [1, 2, 1, np.nan], [0.5, 1.0, 1.0, np.nan]), + ([1, 1, 2], [1, 2, np.nan], [0.5, 1.0, np.nan]), + ], +) +def test_rank_zero_div(input_key, input_value, output_value): + # GH 23666 + df = DataFrame({"A": input_key, "B": input_value}) + + result = df.groupby("A").rank(method="dense", pct=True) + expected = DataFrame({"B": output_value}) + tm.assert_frame_equal(result, expected) + + +def test_rank_min_int(): + # GH-32859 + df = DataFrame( + { + "grp": [1, 1, 2], + "int_col": [ + np.iinfo(np.int64).min, + np.iinfo(np.int64).max, + np.iinfo(np.int64).min, + ], + "datetimelike": [NaT, datetime(2001, 1, 1), NaT], + } + ) + + result = df.groupby("grp").rank() + expected = DataFrame( + {"int_col": [1.0, 2.0, 1.0], "datetimelike": [np.NaN, 1.0, np.NaN]} + ) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("use_nan", [True, False]) +def test_rank_pct_equal_values_on_group_transition(use_nan): + # GH#40518 + fill_value = np.nan if use_nan else 3 + df = DataFrame( + [ + [-1, 1], + [-1, 2], + [1, fill_value], + [-1, fill_value], + ], + columns=["group", "val"], + ) + result = df.groupby(["group"])["val"].rank( + method="dense", + pct=True, + ) + if use_nan: + expected = Series([0.5, 1, np.nan, np.nan], name="val") + else: + expected = Series([1 / 3, 2 / 3, 1, 1], name="val") + + tm.assert_series_equal(result, expected) + + +def test_rank_multiindex(): + # GH27721 + df = concat( + { + "a": DataFrame({"col1": [3, 4], "col2": [1, 2]}), + "b": DataFrame({"col3": [5, 6], "col4": [7, 8]}), + }, + axis=1, + ) + + gb = df.groupby(level=0, axis=1) + result = gb.rank(axis=1) + + expected = concat( + [ + df["a"].rank(axis=1), + df["b"].rank(axis=1), + ], + axis=1, + keys=["a", "b"], + ) + tm.assert_frame_equal(result, expected) + + +def test_groupby_axis0_rank_axis1(): + # GH#41320 + df = DataFrame( + {0: [1, 3, 5, 7], 1: [2, 4, 6, 8], 2: [1.5, 3.5, 5.5, 7.5]}, + index=["a", "a", "b", "b"], + ) + gb = df.groupby(level=0, axis=0) + + res = gb.rank(axis=1) + + # This should match what we get when "manually" operating group-by-group + expected = concat([df.loc["a"].rank(axis=1), df.loc["b"].rank(axis=1)], axis=0) + tm.assert_frame_equal(res, expected) + + # check that we haven't accidentally written a case that coincidentally + # matches rank(axis=0) + alt = gb.rank(axis=0) + assert not alt.equals(expected) + + +def test_groupby_axis0_cummax_axis1(): + # case where groupby axis is 0 and axis keyword in transform is 1 + + # df has mixed dtype -> multiple blocks + df = DataFrame( + {0: [1, 3, 5, 7], 1: [2, 4, 6, 8], 2: [1.5, 3.5, 5.5, 7.5]}, + index=["a", "a", "b", "b"], + ) + gb = df.groupby(level=0, axis=0) + + cmax = gb.cummax(axis=1) + expected = df[[0, 1]].astype(np.float64) + expected[2] = expected[1] + tm.assert_frame_equal(cmax, expected) + + +def test_non_unique_index(): + # GH 16577 + df = DataFrame( + {"A": [1.0, 2.0, 3.0, np.nan], "value": 1.0}, + index=[pd.Timestamp("20170101", tz="US/Eastern")] * 4, + ) + result = df.groupby([df.index, "A"]).value.rank(ascending=True, pct=True) + expected = Series( + [1.0] * 4, index=[pd.Timestamp("20170101", tz="US/Eastern")] * 4, name="value" + ) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_sample.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_sample.py new file mode 100644 index 0000000..9153fac --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_sample.py @@ -0,0 +1,144 @@ +import pytest + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("n, frac", [(2, None), (None, 0.2)]) +def test_groupby_sample_balanced_groups_shape(n, frac): + values = [1] * 10 + [2] * 10 + df = DataFrame({"a": values, "b": values}) + + result = df.groupby("a").sample(n=n, frac=frac) + values = [1] * 2 + [2] * 2 + expected = DataFrame({"a": values, "b": values}, index=result.index) + tm.assert_frame_equal(result, expected) + + result = df.groupby("a")["b"].sample(n=n, frac=frac) + expected = Series(values, name="b", index=result.index) + tm.assert_series_equal(result, expected) + + +def test_groupby_sample_unbalanced_groups_shape(): + values = [1] * 10 + [2] * 20 + df = DataFrame({"a": values, "b": values}) + + result = df.groupby("a").sample(n=5) + values = [1] * 5 + [2] * 5 + expected = DataFrame({"a": values, "b": values}, index=result.index) + tm.assert_frame_equal(result, expected) + + result = df.groupby("a")["b"].sample(n=5) + expected = Series(values, name="b", index=result.index) + tm.assert_series_equal(result, expected) + + +def test_groupby_sample_index_value_spans_groups(): + values = [1] * 3 + [2] * 3 + df = DataFrame({"a": values, "b": values}, index=[1, 2, 2, 2, 2, 2]) + + result = df.groupby("a").sample(n=2) + values = [1] * 2 + [2] * 2 + expected = DataFrame({"a": values, "b": values}, index=result.index) + tm.assert_frame_equal(result, expected) + + result = df.groupby("a")["b"].sample(n=2) + expected = Series(values, name="b", index=result.index) + tm.assert_series_equal(result, expected) + + +def test_groupby_sample_n_and_frac_raises(): + df = DataFrame({"a": [1, 2], "b": [1, 2]}) + msg = "Please enter a value for `frac` OR `n`, not both" + + with pytest.raises(ValueError, match=msg): + df.groupby("a").sample(n=1, frac=1.0) + + with pytest.raises(ValueError, match=msg): + df.groupby("a")["b"].sample(n=1, frac=1.0) + + +def test_groupby_sample_frac_gt_one_without_replacement_raises(): + df = DataFrame({"a": [1, 2], "b": [1, 2]}) + msg = "Replace has to be set to `True` when upsampling the population `frac` > 1." + + with pytest.raises(ValueError, match=msg): + df.groupby("a").sample(frac=1.5, replace=False) + + with pytest.raises(ValueError, match=msg): + df.groupby("a")["b"].sample(frac=1.5, replace=False) + + +@pytest.mark.parametrize("n", [-1, 1.5]) +def test_groupby_sample_invalid_n_raises(n): + df = DataFrame({"a": [1, 2], "b": [1, 2]}) + + if n < 0: + msg = "A negative number of rows requested. Please provide `n` >= 0." + else: + msg = "Only integers accepted as `n` values" + + with pytest.raises(ValueError, match=msg): + df.groupby("a").sample(n=n) + + with pytest.raises(ValueError, match=msg): + df.groupby("a")["b"].sample(n=n) + + +def test_groupby_sample_oversample(): + values = [1] * 10 + [2] * 10 + df = DataFrame({"a": values, "b": values}) + + result = df.groupby("a").sample(frac=2.0, replace=True) + values = [1] * 20 + [2] * 20 + expected = DataFrame({"a": values, "b": values}, index=result.index) + tm.assert_frame_equal(result, expected) + + result = df.groupby("a")["b"].sample(frac=2.0, replace=True) + expected = Series(values, name="b", index=result.index) + tm.assert_series_equal(result, expected) + + +def test_groupby_sample_without_n_or_frac(): + values = [1] * 10 + [2] * 10 + df = DataFrame({"a": values, "b": values}) + + result = df.groupby("a").sample(n=None, frac=None) + expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=result.index) + tm.assert_frame_equal(result, expected) + + result = df.groupby("a")["b"].sample(n=None, frac=None) + expected = Series([1, 2], name="b", index=result.index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "index, expected_index", + [(["w", "x", "y", "z"], ["w", "w", "y", "y"]), ([3, 4, 5, 6], [3, 3, 5, 5])], +) +def test_groupby_sample_with_weights(index, expected_index): + # GH 39927 - tests for integer index needed + values = [1] * 2 + [2] * 2 + df = DataFrame({"a": values, "b": values}, index=Index(index)) + + result = df.groupby("a").sample(n=2, replace=True, weights=[1, 0, 1, 0]) + expected = DataFrame({"a": values, "b": values}, index=Index(expected_index)) + tm.assert_frame_equal(result, expected) + + result = df.groupby("a")["b"].sample(n=2, replace=True, weights=[1, 0, 1, 0]) + expected = Series(values, name="b", index=Index(expected_index)) + tm.assert_series_equal(result, expected) + + +def test_groupby_sample_with_selections(): + # GH 39928 + values = [1] * 10 + [2] * 10 + df = DataFrame({"a": values, "b": values, "c": values}) + + result = df.groupby("a")[["b", "c"]].sample(n=None, frac=None) + expected = DataFrame({"b": [1, 2], "c": [1, 2]}, index=result.index) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_size.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_size.py new file mode 100644 index 0000000..f87e411 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_size.py @@ -0,0 +1,67 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + PeriodIndex, + Series, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]]) +def test_size(df, by): + grouped = df.groupby(by=by) + result = grouped.size() + for key, group in grouped: + assert result[key] == len(group) + + +@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]]) +@pytest.mark.parametrize("sort", [True, False]) +def test_size_sort(df, sort, by): + df = DataFrame(np.random.choice(20, (1000, 3)), columns=list("ABC")) + left = df.groupby(by=by, sort=sort).size() + right = df.groupby(by=by, sort=sort)["C"].apply(lambda a: a.shape[0]) + tm.assert_series_equal(left, right, check_names=False) + + +def test_size_series_dataframe(): + # https://github.com/pandas-dev/pandas/issues/11699 + df = DataFrame(columns=["A", "B"]) + out = Series(dtype="int64", index=Index([], name="A")) + tm.assert_series_equal(df.groupby("A").size(), out) + + +def test_size_groupby_all_null(): + # https://github.com/pandas-dev/pandas/issues/23050 + # Assert no 'Value Error : Length of passed values is 2, index implies 0' + df = DataFrame({"A": [None, None]}) # all-null groups + result = df.groupby("A").size() + expected = Series(dtype="int64", index=Index([], name="A")) + tm.assert_series_equal(result, expected) + + +def test_size_period_index(): + # https://github.com/pandas-dev/pandas/issues/34010 + ser = Series([1], index=PeriodIndex(["2000"], name="A", freq="D")) + grp = ser.groupby(level="A") + result = grp.size() + tm.assert_series_equal(result, ser) + + +@pytest.mark.parametrize("as_index", [True, False]) +def test_size_on_categorical(as_index): + df = DataFrame([[1, 1], [2, 2]], columns=["A", "B"]) + df["A"] = df["A"].astype("category") + result = df.groupby(["A", "B"], as_index=as_index).size() + + expected = DataFrame( + [[1, 1, 1], [1, 2, 0], [2, 1, 0], [2, 2, 1]], columns=["A", "B", "size"] + ) + expected["A"] = expected["A"].astype("category") + if as_index: + expected = expected.set_index(["A", "B"])["size"].rename(None) + + tm.assert_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_timegrouper.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_timegrouper.py new file mode 100644 index 0000000..d4b2163 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_timegrouper.py @@ -0,0 +1,910 @@ +""" test with the TimeGrouper / grouping with datetimes """ + +from datetime import datetime +from io import StringIO + +import numpy as np +import pytest +import pytz + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + Timestamp, + date_range, + offsets, +) +import pandas._testing as tm +from pandas.core.groupby.grouper import Grouper +from pandas.core.groupby.ops import BinGrouper + + +@pytest.fixture +def frame_for_truncated_bingrouper(): + """ + DataFrame used by groupby_with_truncated_bingrouper, made into + a separate fixture for easier re-use in + test_groupby_apply_timegrouper_with_nat_apply_squeeze + """ + df = DataFrame( + { + "Quantity": [18, 3, 5, 1, 9, 3], + "Date": [ + Timestamp(2013, 9, 1, 13, 0), + Timestamp(2013, 9, 1, 13, 5), + Timestamp(2013, 10, 1, 20, 0), + Timestamp(2013, 10, 3, 10, 0), + pd.NaT, + Timestamp(2013, 9, 2, 14, 0), + ], + } + ) + return df + + +@pytest.fixture +def groupby_with_truncated_bingrouper(frame_for_truncated_bingrouper): + """ + GroupBy object such that gb.grouper is a BinGrouper and + len(gb.grouper.result_index) < len(gb.grouper.group_keys_seq) + + Aggregations on this groupby should have + + dti = date_range("2013-09-01", "2013-10-01", freq="5D", name="Date") + + As either the index or an index level. + """ + df = frame_for_truncated_bingrouper + + tdg = Grouper(key="Date", freq="5D") + gb = df.groupby(tdg) + + # check we're testing the case we're interested in + assert len(gb.grouper.result_index) != len(gb.grouper.group_keys_seq) + + return gb + + +class TestGroupBy: + def test_groupby_with_timegrouper(self): + # GH 4161 + # TimeGrouper requires a sorted index + # also verifies that the resultant index has the correct name + df_original = DataFrame( + { + "Buyer": "Carl Carl Carl Carl Joe Carl".split(), + "Quantity": [18, 3, 5, 1, 9, 3], + "Date": [ + datetime(2013, 9, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 3, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 9, 2, 14, 0), + ], + } + ) + + # GH 6908 change target column's order + df_reordered = df_original.sort_values(by="Quantity") + + for df in [df_original, df_reordered]: + df = df.set_index(["Date"]) + + expected = DataFrame( + {"Quantity": 0}, + index=date_range( + "20130901", "20131205", freq="5D", name="Date", inclusive="left" + ), + ) + expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64") + + result1 = df.resample("5D").sum() + tm.assert_frame_equal(result1, expected) + + df_sorted = df.sort_index() + result2 = df_sorted.groupby(Grouper(freq="5D")).sum() + tm.assert_frame_equal(result2, expected) + + result3 = df.groupby(Grouper(freq="5D")).sum() + tm.assert_frame_equal(result3, expected) + + @pytest.mark.parametrize("should_sort", [True, False]) + def test_groupby_with_timegrouper_methods(self, should_sort): + # GH 3881 + # make sure API of timegrouper conforms + + df = DataFrame( + { + "Branch": "A A A A A B".split(), + "Buyer": "Carl Mark Carl Joe Joe Carl".split(), + "Quantity": [1, 3, 5, 8, 9, 3], + "Date": [ + datetime(2013, 1, 1, 13, 0), + datetime(2013, 1, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 12, 2, 14, 0), + ], + } + ) + + if should_sort: + df = df.sort_values(by="Quantity", ascending=False) + + df = df.set_index("Date", drop=False) + g = df.groupby(Grouper(freq="6M")) + assert g.group_keys + + assert isinstance(g.grouper, BinGrouper) + groups = g.groups + assert isinstance(groups, dict) + assert len(groups) == 3 + + def test_timegrouper_with_reg_groups(self): + + # GH 3794 + # allow combination of timegrouper/reg groups + + df_original = DataFrame( + { + "Branch": "A A A A A A A B".split(), + "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(), + "Quantity": [1, 3, 5, 1, 8, 1, 9, 3], + "Date": [ + datetime(2013, 1, 1, 13, 0), + datetime(2013, 1, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 12, 2, 14, 0), + ], + } + ).set_index("Date") + + df_sorted = df_original.sort_values(by="Quantity", ascending=False) + + for df in [df_original, df_sorted]: + expected = DataFrame( + { + "Buyer": "Carl Joe Mark".split(), + "Quantity": [10, 18, 3], + "Date": [ + datetime(2013, 12, 31, 0, 0), + datetime(2013, 12, 31, 0, 0), + datetime(2013, 12, 31, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + + result = df.groupby([Grouper(freq="A"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + { + "Buyer": "Carl Mark Carl Joe".split(), + "Quantity": [1, 3, 9, 18], + "Date": [ + datetime(2013, 1, 1, 0, 0), + datetime(2013, 1, 1, 0, 0), + datetime(2013, 7, 1, 0, 0), + datetime(2013, 7, 1, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + df_original = DataFrame( + { + "Branch": "A A A A A A A B".split(), + "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(), + "Quantity": [1, 3, 5, 1, 8, 1, 9, 3], + "Date": [ + datetime(2013, 10, 1, 13, 0), + datetime(2013, 10, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 2, 12, 0), + datetime(2013, 10, 2, 14, 0), + ], + } + ).set_index("Date") + + df_sorted = df_original.sort_values(by="Quantity", ascending=False) + for df in [df_original, df_sorted]: + + expected = DataFrame( + { + "Buyer": "Carl Joe Mark Carl Joe".split(), + "Quantity": [6, 8, 3, 4, 10], + "Date": [ + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 2, 0, 0), + datetime(2013, 10, 2, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + + result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum() + expected = DataFrame( + { + "Buyer": "Carl Joe Mark".split(), + "Quantity": [10, 18, 3], + "Date": [ + datetime(2013, 10, 31, 0, 0), + datetime(2013, 10, 31, 0, 0), + datetime(2013, 10, 31, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + tm.assert_frame_equal(result, expected) + + # passing the name + df = df.reset_index() + result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError, match="'The grouper name foo is not found'"): + df.groupby([Grouper(freq="1M", key="foo"), "Buyer"]).sum() + + # passing the level + df = df.set_index("Date") + result = df.groupby([Grouper(freq="1M", level="Date"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + result = df.groupby([Grouper(freq="1M", level=0), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + with pytest.raises(ValueError, match="The level foo is not valid"): + df.groupby([Grouper(freq="1M", level="foo"), "Buyer"]).sum() + + # multi names + df = df.copy() + df["Date"] = df.index + offsets.MonthEnd(2) + result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum() + expected = DataFrame( + { + "Buyer": "Carl Joe Mark".split(), + "Quantity": [10, 18, 3], + "Date": [ + datetime(2013, 11, 30, 0, 0), + datetime(2013, 11, 30, 0, 0), + datetime(2013, 11, 30, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + tm.assert_frame_equal(result, expected) + + # error as we have both a level and a name! + msg = "The Grouper cannot specify both a key and a level!" + with pytest.raises(ValueError, match=msg): + df.groupby( + [Grouper(freq="1M", key="Date", level="Date"), "Buyer"] + ).sum() + + # single groupers + expected = DataFrame( + [[31]], + columns=["Quantity"], + index=DatetimeIndex( + [datetime(2013, 10, 31, 0, 0)], freq=offsets.MonthEnd(), name="Date" + ), + ) + result = df.groupby(Grouper(freq="1M")).sum() + tm.assert_frame_equal(result, expected) + + result = df.groupby([Grouper(freq="1M")]).sum() + tm.assert_frame_equal(result, expected) + + expected.index = expected.index.shift(1) + assert expected.index.freq == offsets.MonthEnd() + result = df.groupby(Grouper(freq="1M", key="Date")).sum() + tm.assert_frame_equal(result, expected) + + result = df.groupby([Grouper(freq="1M", key="Date")]).sum() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("freq", ["D", "M", "A", "Q-APR"]) + def test_timegrouper_with_reg_groups_freq(self, freq): + # GH 6764 multiple grouping with/without sort + df = DataFrame( + { + "date": pd.to_datetime( + [ + "20121002", + "20121007", + "20130130", + "20130202", + "20130305", + "20121002", + "20121207", + "20130130", + "20130202", + "20130305", + "20130202", + "20130305", + ] + ), + "user_id": [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5], + "whole_cost": [ + 1790, + 364, + 280, + 259, + 201, + 623, + 90, + 312, + 359, + 301, + 359, + 801, + ], + "cost1": [12, 15, 10, 24, 39, 1, 0, 90, 45, 34, 1, 12], + } + ).set_index("date") + + expected = ( + df.groupby("user_id")["whole_cost"] + .resample(freq) + .sum(min_count=1) # XXX + .dropna() + .reorder_levels(["date", "user_id"]) + .sort_index() + .astype("int64") + ) + expected.name = "whole_cost" + + result1 = ( + df.sort_index().groupby([Grouper(freq=freq), "user_id"])["whole_cost"].sum() + ) + tm.assert_series_equal(result1, expected) + + result2 = df.groupby([Grouper(freq=freq), "user_id"])["whole_cost"].sum() + tm.assert_series_equal(result2, expected) + + def test_timegrouper_get_group(self): + # GH 6914 + + df_original = DataFrame( + { + "Buyer": "Carl Joe Joe Carl Joe Carl".split(), + "Quantity": [18, 3, 5, 1, 9, 3], + "Date": [ + datetime(2013, 9, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 3, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 9, 2, 14, 0), + ], + } + ) + df_reordered = df_original.sort_values(by="Quantity") + + # single grouping + expected_list = [ + df_original.iloc[[0, 1, 5]], + df_original.iloc[[2, 3]], + df_original.iloc[[4]], + ] + dt_list = ["2013-09-30", "2013-10-31", "2013-12-31"] + + for df in [df_original, df_reordered]: + grouped = df.groupby(Grouper(freq="M", key="Date")) + for t, expected in zip(dt_list, expected_list): + dt = Timestamp(t) + result = grouped.get_group(dt) + tm.assert_frame_equal(result, expected) + + # multiple grouping + expected_list = [ + df_original.iloc[[1]], + df_original.iloc[[3]], + df_original.iloc[[4]], + ] + g_list = [("Joe", "2013-09-30"), ("Carl", "2013-10-31"), ("Joe", "2013-12-31")] + + for df in [df_original, df_reordered]: + grouped = df.groupby(["Buyer", Grouper(freq="M", key="Date")]) + for (b, t), expected in zip(g_list, expected_list): + dt = Timestamp(t) + result = grouped.get_group((b, dt)) + tm.assert_frame_equal(result, expected) + + # with index + df_original = df_original.set_index("Date") + df_reordered = df_original.sort_values(by="Quantity") + + expected_list = [ + df_original.iloc[[0, 1, 5]], + df_original.iloc[[2, 3]], + df_original.iloc[[4]], + ] + + for df in [df_original, df_reordered]: + grouped = df.groupby(Grouper(freq="M")) + for t, expected in zip(dt_list, expected_list): + dt = Timestamp(t) + result = grouped.get_group(dt) + tm.assert_frame_equal(result, expected) + + def test_timegrouper_apply_return_type_series(self): + # Using `apply` with the `TimeGrouper` should give the + # same return type as an `apply` with a `Grouper`. + # Issue #11742 + df = DataFrame({"date": ["10/10/2000", "11/10/2000"], "value": [10, 13]}) + df_dt = df.copy() + df_dt["date"] = pd.to_datetime(df_dt["date"]) + + def sumfunc_series(x): + return Series([x["value"].sum()], ("sum",)) + + expected = df.groupby(Grouper(key="date")).apply(sumfunc_series) + result = df_dt.groupby(Grouper(freq="M", key="date")).apply(sumfunc_series) + tm.assert_frame_equal( + result.reset_index(drop=True), expected.reset_index(drop=True) + ) + + def test_timegrouper_apply_return_type_value(self): + # Using `apply` with the `TimeGrouper` should give the + # same return type as an `apply` with a `Grouper`. + # Issue #11742 + df = DataFrame({"date": ["10/10/2000", "11/10/2000"], "value": [10, 13]}) + df_dt = df.copy() + df_dt["date"] = pd.to_datetime(df_dt["date"]) + + def sumfunc_value(x): + return x.value.sum() + + expected = df.groupby(Grouper(key="date")).apply(sumfunc_value) + result = df_dt.groupby(Grouper(freq="M", key="date")).apply(sumfunc_value) + tm.assert_series_equal( + result.reset_index(drop=True), expected.reset_index(drop=True) + ) + + def test_groupby_groups_datetimeindex(self): + # GH#1430 + periods = 1000 + ind = date_range(start="2012/1/1", freq="5min", periods=periods) + df = DataFrame( + {"high": np.arange(periods), "low": np.arange(periods)}, index=ind + ) + grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day)) + + # it works! + groups = grouped.groups + assert isinstance(list(groups.keys())[0], datetime) + + # GH#11442 + index = date_range("2015/01/01", periods=5, name="date") + df = DataFrame({"A": [5, 6, 7, 8, 9], "B": [1, 2, 3, 4, 5]}, index=index) + result = df.groupby(level="date").groups + dates = ["2015-01-05", "2015-01-04", "2015-01-03", "2015-01-02", "2015-01-01"] + expected = { + Timestamp(date): DatetimeIndex([date], name="date") for date in dates + } + tm.assert_dict_equal(result, expected) + + grouped = df.groupby(level="date") + for date in dates: + result = grouped.get_group(date) + data = [[df.loc[date, "A"], df.loc[date, "B"]]] + expected_index = DatetimeIndex([date], name="date", freq="D") + expected = DataFrame(data, columns=list("AB"), index=expected_index) + tm.assert_frame_equal(result, expected) + + def test_groupby_groups_datetimeindex_tz(self): + # GH 3950 + dates = [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ] + df = DataFrame( + { + "label": ["a", "a", "a", "b", "b", "b"], + "datetime": dates, + "value1": np.arange(6, dtype="int64"), + "value2": [1, 2] * 3, + } + ) + df["datetime"] = df["datetime"].apply(lambda d: Timestamp(d, tz="US/Pacific")) + + exp_idx1 = DatetimeIndex( + [ + "2011-07-19 07:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 09:00:00", + ], + tz="US/Pacific", + name="datetime", + ) + exp_idx2 = Index(["a", "b"] * 3, name="label") + exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) + expected = DataFrame( + {"value1": [0, 3, 1, 4, 2, 5], "value2": [1, 2, 2, 1, 1, 2]}, + index=exp_idx, + columns=["value1", "value2"], + ) + + result = df.groupby(["datetime", "label"]).sum() + tm.assert_frame_equal(result, expected) + + # by level + didx = DatetimeIndex(dates, tz="Asia/Tokyo") + df = DataFrame( + {"value1": np.arange(6, dtype="int64"), "value2": [1, 2, 3, 1, 2, 3]}, + index=didx, + ) + + exp_idx = DatetimeIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + tz="Asia/Tokyo", + ) + expected = DataFrame( + {"value1": [3, 5, 7], "value2": [2, 4, 6]}, + index=exp_idx, + columns=["value1", "value2"], + ) + + result = df.groupby(level=0).sum() + tm.assert_frame_equal(result, expected) + + def test_frame_datetime64_handling_groupby(self): + # it works! + df = DataFrame( + [(3, np.datetime64("2012-07-03")), (3, np.datetime64("2012-07-04"))], + columns=["a", "date"], + ) + result = df.groupby("a").first() + assert result["date"][3] == Timestamp("2012-07-03") + + def test_groupby_multi_timezone(self): + + # combining multiple / different timezones yields UTC + + data = """0,2000-01-28 16:47:00,America/Chicago +1,2000-01-29 16:48:00,America/Chicago +2,2000-01-30 16:49:00,America/Los_Angeles +3,2000-01-31 16:50:00,America/Chicago +4,2000-01-01 16:50:00,America/New_York""" + + df = pd.read_csv(StringIO(data), header=None, names=["value", "date", "tz"]) + result = df.groupby("tz").date.apply( + lambda x: pd.to_datetime(x).dt.tz_localize(x.name) + ) + + expected = Series( + [ + Timestamp("2000-01-28 16:47:00-0600", tz="America/Chicago"), + Timestamp("2000-01-29 16:48:00-0600", tz="America/Chicago"), + Timestamp("2000-01-30 16:49:00-0800", tz="America/Los_Angeles"), + Timestamp("2000-01-31 16:50:00-0600", tz="America/Chicago"), + Timestamp("2000-01-01 16:50:00-0500", tz="America/New_York"), + ], + name="date", + dtype=object, + ) + tm.assert_series_equal(result, expected) + + tz = "America/Chicago" + res_values = df.groupby("tz").date.get_group(tz) + result = pd.to_datetime(res_values).dt.tz_localize(tz) + exp_values = Series( + ["2000-01-28 16:47:00", "2000-01-29 16:48:00", "2000-01-31 16:50:00"], + index=[0, 1, 3], + name="date", + ) + expected = pd.to_datetime(exp_values).dt.tz_localize(tz) + tm.assert_series_equal(result, expected) + + def test_groupby_groups_periods(self): + dates = [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ] + df = DataFrame( + { + "label": ["a", "a", "a", "b", "b", "b"], + "period": [pd.Period(d, freq="H") for d in dates], + "value1": np.arange(6, dtype="int64"), + "value2": [1, 2] * 3, + } + ) + + exp_idx1 = pd.PeriodIndex( + [ + "2011-07-19 07:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 09:00:00", + ], + freq="H", + name="period", + ) + exp_idx2 = Index(["a", "b"] * 3, name="label") + exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) + expected = DataFrame( + {"value1": [0, 3, 1, 4, 2, 5], "value2": [1, 2, 2, 1, 1, 2]}, + index=exp_idx, + columns=["value1", "value2"], + ) + + result = df.groupby(["period", "label"]).sum() + tm.assert_frame_equal(result, expected) + + # by level + didx = pd.PeriodIndex(dates, freq="H") + df = DataFrame( + {"value1": np.arange(6, dtype="int64"), "value2": [1, 2, 3, 1, 2, 3]}, + index=didx, + ) + + exp_idx = pd.PeriodIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + freq="H", + ) + expected = DataFrame( + {"value1": [3, 5, 7], "value2": [2, 4, 6]}, + index=exp_idx, + columns=["value1", "value2"], + ) + + result = df.groupby(level=0).sum() + tm.assert_frame_equal(result, expected) + + def test_groupby_first_datetime64(self): + df = DataFrame([(1, 1351036800000000000), (2, 1351036800000000000)]) + df[1] = df[1].view("M8[ns]") + + assert issubclass(df[1].dtype.type, np.datetime64) + + result = df.groupby(level=0).first() + got_dt = result[1].dtype + assert issubclass(got_dt.type, np.datetime64) + + result = df[1].groupby(level=0).first() + got_dt = result.dtype + assert issubclass(got_dt.type, np.datetime64) + + def test_groupby_max_datetime64(self): + # GH 5869 + # datetimelike dtype conversion from int + df = DataFrame({"A": Timestamp("20130101"), "B": np.arange(5)}) + expected = df.groupby("A")["A"].apply(lambda x: x.max()) + result = df.groupby("A")["A"].max() + tm.assert_series_equal(result, expected) + + def test_groupby_datetime64_32_bit(self): + # GH 6410 / numpy 4328 + # 32-bit under 1.9-dev indexing issue + + df = DataFrame({"A": range(2), "B": [Timestamp("2000-01-1")] * 2}) + result = df.groupby("A")["B"].transform(min) + expected = Series([Timestamp("2000-01-1")] * 2, name="B") + tm.assert_series_equal(result, expected) + + def test_groupby_with_timezone_selection(self): + # GH 11616 + # Test that column selection returns output in correct timezone. + np.random.seed(42) + df = DataFrame( + { + "factor": np.random.randint(0, 3, size=60), + "time": date_range("01/01/2000 00:00", periods=60, freq="s", tz="UTC"), + } + ) + df1 = df.groupby("factor").max()["time"] + df2 = df.groupby("factor")["time"].max() + tm.assert_series_equal(df1, df2) + + def test_timezone_info(self): + # see gh-11682: Timezone info lost when broadcasting + # scalar datetime to DataFrame + + df = DataFrame({"a": [1], "b": [datetime.now(pytz.utc)]}) + assert df["b"][0].tzinfo == pytz.utc + df = DataFrame({"a": [1, 2, 3]}) + df["b"] = datetime.now(pytz.utc) + assert df["b"][0].tzinfo == pytz.utc + + def test_datetime_count(self): + df = DataFrame( + {"a": [1, 2, 3] * 2, "dates": date_range("now", periods=6, freq="T")} + ) + result = df.groupby("a").dates.count() + expected = Series([2, 2, 2], index=Index([1, 2, 3], name="a"), name="dates") + tm.assert_series_equal(result, expected) + + def test_first_last_max_min_on_time_data(self): + # GH 10295 + # Verify that NaT is not in the result of max, min, first and last on + # Dataframe with datetime or timedelta values. + from datetime import timedelta as td + + df_test = DataFrame( + { + "dt": [ + np.nan, + "2015-07-24 10:10", + "2015-07-25 11:11", + "2015-07-23 12:12", + np.nan, + ], + "td": [np.nan, td(days=1), td(days=2), td(days=3), np.nan], + } + ) + df_test.dt = pd.to_datetime(df_test.dt) + df_test["group"] = "A" + df_ref = df_test[df_test.dt.notna()] + + grouped_test = df_test.groupby("group") + grouped_ref = df_ref.groupby("group") + + tm.assert_frame_equal(grouped_ref.max(), grouped_test.max()) + tm.assert_frame_equal(grouped_ref.min(), grouped_test.min()) + tm.assert_frame_equal(grouped_ref.first(), grouped_test.first()) + tm.assert_frame_equal(grouped_ref.last(), grouped_test.last()) + + def test_nunique_with_timegrouper_and_nat(self): + # GH 17575 + test = DataFrame( + { + "time": [ + Timestamp("2016-06-28 09:35:35"), + pd.NaT, + Timestamp("2016-06-28 16:46:28"), + ], + "data": ["1", "2", "3"], + } + ) + + grouper = Grouper(key="time", freq="h") + result = test.groupby(grouper)["data"].nunique() + expected = test[test.time.notnull()].groupby(grouper)["data"].nunique() + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(result, expected) + + def test_scalar_call_versus_list_call(self): + # Issue: 17530 + data_frame = { + "location": ["shanghai", "beijing", "shanghai"], + "time": Series( + ["2017-08-09 13:32:23", "2017-08-11 23:23:15", "2017-08-11 22:23:15"], + dtype="datetime64[ns]", + ), + "value": [1, 2, 3], + } + data_frame = DataFrame(data_frame).set_index("time") + grouper = Grouper(freq="D") + + grouped = data_frame.groupby(grouper) + result = grouped.count() + grouped = data_frame.groupby([grouper]) + expected = grouped.count() + + tm.assert_frame_equal(result, expected) + + def test_grouper_period_index(self): + # GH 32108 + periods = 2 + index = pd.period_range( + start="2018-01", periods=periods, freq="M", name="Month" + ) + period_series = Series(range(periods), index=index) + result = period_series.groupby(period_series.index.month).sum() + + expected = Series( + range(0, periods), index=Index(range(1, periods + 1), name=index.name) + ) + tm.assert_series_equal(result, expected) + + def test_groupby_apply_timegrouper_with_nat_dict_returns( + self, groupby_with_truncated_bingrouper + ): + # GH#43500 case where gb.grouper.result_index and gb.grouper.group_keys_seq + # have different lengths that goes through the `isinstance(values[0], dict)` + # path + gb = groupby_with_truncated_bingrouper + + res = gb["Quantity"].apply(lambda x: {"foo": len(x)}) + + dti = date_range("2013-09-01", "2013-10-01", freq="5D", name="Date") + mi = MultiIndex.from_arrays([dti, ["foo"] * len(dti)]) + expected = Series([3, 0, 0, 0, 0, 0, 2], index=mi, name="Quantity") + tm.assert_series_equal(res, expected) + + def test_groupby_apply_timegrouper_with_nat_scalar_returns( + self, groupby_with_truncated_bingrouper + ): + # GH#43500 Previously raised ValueError bc used index with incorrect + # length in wrap_applied_result + gb = groupby_with_truncated_bingrouper + + res = gb["Quantity"].apply(lambda x: x.iloc[0] if len(x) else np.nan) + + dti = date_range("2013-09-01", "2013-10-01", freq="5D", name="Date") + expected = Series( + [18, np.nan, np.nan, np.nan, np.nan, np.nan, 5], + index=dti._with_freq(None), + name="Quantity", + ) + + tm.assert_series_equal(res, expected) + + def test_groupby_apply_timegrouper_with_nat_apply_squeeze( + self, frame_for_truncated_bingrouper + ): + df = frame_for_truncated_bingrouper + + # We need to create a GroupBy object with only one non-NaT group, + # so use a huge freq so that all non-NaT dates will be grouped together + tdg = Grouper(key="Date", freq="100Y") + + with tm.assert_produces_warning(FutureWarning, match="`squeeze` parameter"): + gb = df.groupby(tdg, squeeze=True) + + # check that we will go through the singular_series path + # in _wrap_applied_output_series + assert gb.ngroups == 1 + assert gb._selected_obj._get_axis(gb.axis).nlevels == 1 + + # function that returns a Series + res = gb.apply(lambda x: x["Quantity"] * 2) + + key = Timestamp("2013-12-31") + ordering = df["Date"].sort_values().dropna().index + mi = MultiIndex.from_product([[key], ordering], names=["Date", None]) + + ex_values = df["Quantity"].take(ordering).values * 2 + expected = Series(ex_values, index=mi, name="Quantity") + tm.assert_series_equal(res, expected) + + @td.skip_if_no("numba") + def test_groupby_agg_numba_timegrouper_with_nat( + self, groupby_with_truncated_bingrouper + ): + # See discussion in GH#43487 + gb = groupby_with_truncated_bingrouper + + result = gb["Quantity"].aggregate( + lambda values, index: np.nanmean(values), engine="numba" + ) + + expected = gb["Quantity"].aggregate(np.nanmean) + tm.assert_series_equal(result, expected) + + result_df = gb[["Quantity"]].aggregate( + lambda values, index: np.nanmean(values), engine="numba" + ) + expected_df = gb[["Quantity"]].aggregate(np.nanmean) + tm.assert_frame_equal(result_df, expected_df) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_value_counts.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_value_counts.py new file mode 100644 index 0000000..54f672c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/test_value_counts.py @@ -0,0 +1,174 @@ +""" +these are systematically testing all of the args to value_counts +with different size combinations. This is to ensure stability of the sorting +and proper parameter handling +""" + +from itertools import product + +import numpy as np +import pytest + +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Grouper, + MultiIndex, + Series, + date_range, + to_datetime, +) +import pandas._testing as tm + + +# our starting frame +def seed_df(seed_nans, n, m): + np.random.seed(1234) + days = date_range("2015-08-24", periods=10) + + frame = DataFrame( + { + "1st": np.random.choice(list("abcd"), n), + "2nd": np.random.choice(days, n), + "3rd": np.random.randint(1, m + 1, n), + } + ) + + if seed_nans: + frame.loc[1::11, "1st"] = np.nan + frame.loc[3::17, "2nd"] = np.nan + frame.loc[7::19, "3rd"] = np.nan + frame.loc[8::19, "3rd"] = np.nan + frame.loc[9::19, "3rd"] = np.nan + + return frame + + +# create input df, keys, and the bins +binned = [] +ids = [] +for seed_nans in [True, False]: + for n, m in product((100, 1000), (5, 20)): + + df = seed_df(seed_nans, n, m) + bins = None, np.arange(0, max(5, df["3rd"].max()) + 1, 2) + keys = "1st", "2nd", ["1st", "2nd"] + for k, b in product(keys, bins): + binned.append((df, k, b, n, m)) + ids.append(f"{k}-{n}-{m}") + + +@pytest.mark.slow +@pytest.mark.parametrize("df, keys, bins, n, m", binned, ids=ids) +@pytest.mark.parametrize("isort", [True, False]) +@pytest.mark.parametrize("normalize", [True, False]) +@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("ascending", [True, False]) +@pytest.mark.parametrize("dropna", [True, False]) +def test_series_groupby_value_counts( + df, keys, bins, n, m, isort, normalize, sort, ascending, dropna +): + def rebuild_index(df): + arr = list(map(df.index.get_level_values, range(df.index.nlevels))) + df.index = MultiIndex.from_arrays(arr, names=df.index.names) + return df + + kwargs = { + "normalize": normalize, + "sort": sort, + "ascending": ascending, + "dropna": dropna, + "bins": bins, + } + + gr = df.groupby(keys, sort=isort) + left = gr["3rd"].value_counts(**kwargs) + + gr = df.groupby(keys, sort=isort) + right = gr["3rd"].apply(Series.value_counts, **kwargs) + right.index.names = right.index.names[:-1] + ["3rd"] + + # have to sort on index because of unstable sort on values + left, right = map(rebuild_index, (left, right)) # xref GH9212 + tm.assert_series_equal(left.sort_index(), right.sort_index()) + + +def test_series_groupby_value_counts_with_grouper(): + # GH28479 + df = DataFrame( + { + "Timestamp": [ + 1565083561, + 1565083561 + 86400, + 1565083561 + 86500, + 1565083561 + 86400 * 2, + 1565083561 + 86400 * 3, + 1565083561 + 86500 * 3, + 1565083561 + 86400 * 4, + ], + "Food": ["apple", "apple", "banana", "banana", "orange", "orange", "pear"], + } + ).drop([3]) + + df["Datetime"] = to_datetime(df["Timestamp"].apply(lambda t: str(t)), unit="s") + dfg = df.groupby(Grouper(freq="1D", key="Datetime")) + + # have to sort on index because of unstable sort on values xref GH9212 + result = dfg["Food"].value_counts().sort_index() + expected = dfg["Food"].apply(Series.value_counts).sort_index() + expected.index.names = result.index.names + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]]) +def test_series_groupby_value_counts_empty(columns): + # GH39172 + df = DataFrame(columns=columns) + dfg = df.groupby(columns[:-1]) + + result = dfg[columns[-1]].value_counts() + expected = Series([], name=columns[-1], dtype=result.dtype) + expected.index = MultiIndex.from_arrays([[]] * len(columns), names=columns) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]]) +def test_series_groupby_value_counts_one_row(columns): + # GH42618 + df = DataFrame(data=[range(len(columns))], columns=columns) + dfg = df.groupby(columns[:-1]) + + result = dfg[columns[-1]].value_counts() + expected = df.value_counts().rename(columns[-1]) + + tm.assert_series_equal(result, expected) + + +def test_series_groupby_value_counts_on_categorical(): + # GH38672 + + s = Series(Categorical(["a"], categories=["a", "b"])) + result = s.groupby([0]).value_counts() + + expected = Series( + data=[1, 0], + index=MultiIndex.from_arrays( + [ + [0, 0], + CategoricalIndex( + ["a", "b"], categories=["a", "b"], ordered=False, dtype="category" + ), + ] + ), + name=0, + ) + + # Expected: + # 0 a 1 + # b 0 + # Name: 0, dtype: int64 + + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..756b245 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/__pycache__/test_numba.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/__pycache__/test_numba.cpython-38.pyc new file mode 100644 index 0000000..801d6dc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/__pycache__/test_numba.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/__pycache__/test_transform.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/__pycache__/test_transform.cpython-38.pyc new file mode 100644 index 0000000..652121d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/__pycache__/test_transform.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/test_numba.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/test_numba.py new file mode 100644 index 0000000..4e1b777 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/test_numba.py @@ -0,0 +1,178 @@ +import pytest + +from pandas.errors import NumbaUtilError +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, + option_context, +) +import pandas._testing as tm +from pandas.core.util.numba_ import NUMBA_FUNC_CACHE + + +@td.skip_if_no("numba") +def test_correct_function_signature(): + def incorrect_function(x): + return x + 1 + + data = DataFrame( + {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=["key", "data"], + ) + with pytest.raises(NumbaUtilError, match="The first 2"): + data.groupby("key").transform(incorrect_function, engine="numba") + + with pytest.raises(NumbaUtilError, match="The first 2"): + data.groupby("key")["data"].transform(incorrect_function, engine="numba") + + +@td.skip_if_no("numba") +def test_check_nopython_kwargs(): + def incorrect_function(x, **kwargs): + return x + 1 + + data = DataFrame( + {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=["key", "data"], + ) + with pytest.raises(NumbaUtilError, match="numba does not support"): + data.groupby("key").transform(incorrect_function, engine="numba", a=1) + + with pytest.raises(NumbaUtilError, match="numba does not support"): + data.groupby("key")["data"].transform(incorrect_function, engine="numba", a=1) + + +@td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore:\n") +# Filter warnings when parallel=True and the function can't be parallelized by Numba +@pytest.mark.parametrize("jit", [True, False]) +@pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) +def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython): + def func(values, index): + return values + 1 + + if jit: + # Test accepted jitted functions + import numba + + func = numba.jit(func) + + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + grouped = data.groupby(0) + if pandas_obj == "Series": + grouped = grouped[1] + + result = grouped.transform(func, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.transform(lambda x: x + 1, engine="cython") + + tm.assert_equal(result, expected) + + +@td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore:\n") +# Filter warnings when parallel=True and the function can't be parallelized by Numba +@pytest.mark.parametrize("jit", [True, False]) +@pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) +def test_cache(jit, pandas_obj, nogil, parallel, nopython): + # Test that the functions are cached correctly if we switch functions + def func_1(values, index): + return values + 1 + + def func_2(values, index): + return values * 5 + + if jit: + import numba + + func_1 = numba.jit(func_1) + func_2 = numba.jit(func_2) + + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + grouped = data.groupby(0) + if pandas_obj == "Series": + grouped = grouped[1] + + result = grouped.transform(func_1, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.transform(lambda x: x + 1, engine="cython") + tm.assert_equal(result, expected) + # func_1 should be in the cache now + assert (func_1, "groupby_transform") in NUMBA_FUNC_CACHE + + # Add func_2 to the cache + result = grouped.transform(func_2, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.transform(lambda x: x * 5, engine="cython") + tm.assert_equal(result, expected) + assert (func_2, "groupby_transform") in NUMBA_FUNC_CACHE + + # Retest func_1 which should use the cache + result = grouped.transform(func_1, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.transform(lambda x: x + 1, engine="cython") + tm.assert_equal(result, expected) + + +@td.skip_if_no("numba") +def test_use_global_config(): + def func_1(values, index): + return values + 1 + + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + grouped = data.groupby(0) + expected = grouped.transform(func_1, engine="numba") + with option_context("compute.use_numba", True): + result = grouped.transform(func_1, engine=None) + tm.assert_frame_equal(expected, result) + + +@td.skip_if_no("numba") +@pytest.mark.parametrize( + "agg_func", [["min", "max"], "min", {"B": ["min", "max"], "C": "sum"}] +) +def test_multifunc_notimplimented(agg_func): + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + grouped = data.groupby(0) + with pytest.raises(NotImplementedError, match="Numba engine can"): + grouped.transform(agg_func, engine="numba") + + with pytest.raises(NotImplementedError, match="Numba engine can"): + grouped[1].transform(agg_func, engine="numba") + + +@td.skip_if_no("numba") +def test_args_not_cached(): + # GH 41647 + def sum_last(values, index, n): + return values[-n:].sum() + + df = DataFrame({"id": [0, 0, 1, 1], "x": [1, 1, 1, 1]}) + grouped_x = df.groupby("id")["x"] + result = grouped_x.transform(sum_last, 1, engine="numba") + expected = Series([1.0] * 4, name="x") + tm.assert_series_equal(result, expected) + + result = grouped_x.transform(sum_last, 2, engine="numba") + expected = Series([2.0] * 4, name="x") + tm.assert_series_equal(result, expected) + + +@td.skip_if_no("numba") +def test_index_data_correctly_passed(): + # GH 43133 + def f(values, index): + return index - 1 + + df = DataFrame({"group": ["A", "A", "B"], "v": [4, 5, 6]}, index=[-1, -2, -3]) + result = df.groupby("group").transform(f, engine="numba") + expected = DataFrame([-4.0, -3.0, -2.0], columns=["v"], index=[-1, -2, -3]) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/test_transform.py b/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/test_transform.py new file mode 100644 index 0000000..12a25a1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/groupby/transform/test_transform.py @@ -0,0 +1,1311 @@ +""" test with the .transform """ +from io import StringIO + +import numpy as np +import pytest + +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_timedelta64_dtype, +) + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + MultiIndex, + Series, + Timestamp, + concat, + date_range, +) +import pandas._testing as tm +from pandas.core.groupby.base import maybe_normalize_deprecated_kernels +from pandas.core.groupby.generic import ( + DataFrameGroupBy, + SeriesGroupBy, +) + + +def assert_fp_equal(a, b): + assert (np.abs(a - b) < 1e-12).all() + + +def test_transform(): + data = Series(np.arange(9) // 3, index=np.arange(9)) + + index = np.arange(9) + np.random.shuffle(index) + data = data.reindex(index) + + grouped = data.groupby(lambda x: x // 3) + + transformed = grouped.transform(lambda x: x * x.sum()) + assert transformed[7] == 12 + + # GH 8046 + # make sure that we preserve the input order + + df = DataFrame( + np.arange(6, dtype="int64").reshape(3, 2), columns=["a", "b"], index=[0, 2, 1] + ) + key = [0, 0, 1] + expected = ( + df.sort_index() + .groupby(key) + .transform(lambda x: x - x.mean()) + .groupby(key) + .mean() + ) + result = df.groupby(key).transform(lambda x: x - x.mean()).groupby(key).mean() + tm.assert_frame_equal(result, expected) + + def demean(arr): + return arr - arr.mean() + + people = DataFrame( + np.random.randn(5, 5), + columns=["a", "b", "c", "d", "e"], + index=["Joe", "Steve", "Wes", "Jim", "Travis"], + ) + key = ["one", "two", "one", "two", "one"] + result = people.groupby(key).transform(demean).groupby(key).mean() + expected = people.groupby(key).apply(demean).groupby(key).mean() + tm.assert_frame_equal(result, expected) + + # GH 8430 + df = tm.makeTimeDataFrame() + g = df.groupby(pd.Grouper(freq="M")) + g.transform(lambda x: x - 1) + + # GH 9700 + df = DataFrame({"a": range(5, 10), "b": range(5)}) + result = df.groupby("a").transform(max) + expected = DataFrame({"b": range(5)}) + tm.assert_frame_equal(result, expected) + + +def test_transform_fast(): + + df = DataFrame({"id": np.arange(100000) / 3, "val": np.random.randn(100000)}) + + grp = df.groupby("id")["val"] + + values = np.repeat(grp.mean().values, ensure_platform_int(grp.count().values)) + expected = Series(values, index=df.index, name="val") + + result = grp.transform(np.mean) + tm.assert_series_equal(result, expected) + + result = grp.transform("mean") + tm.assert_series_equal(result, expected) + + # GH 12737 + df = DataFrame( + { + "grouping": [0, 1, 1, 3], + "f": [1.1, 2.1, 3.1, 4.5], + "d": date_range("2014-1-1", "2014-1-4"), + "i": [1, 2, 3, 4], + }, + columns=["grouping", "f", "i", "d"], + ) + result = df.groupby("grouping").transform("first") + + dates = [ + Timestamp("2014-1-1"), + Timestamp("2014-1-2"), + Timestamp("2014-1-2"), + Timestamp("2014-1-4"), + ] + expected = DataFrame( + {"f": [1.1, 2.1, 2.1, 4.5], "d": dates, "i": [1, 2, 2, 4]}, + columns=["f", "i", "d"], + ) + tm.assert_frame_equal(result, expected) + + # selection + result = df.groupby("grouping")[["f", "i"]].transform("first") + expected = expected[["f", "i"]] + tm.assert_frame_equal(result, expected) + + # dup columns + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["g", "a", "a"]) + result = df.groupby("g").transform("first") + expected = df.drop("g", axis=1) + tm.assert_frame_equal(result, expected) + + +def test_transform_broadcast(tsframe, ts): + grouped = ts.groupby(lambda x: x.month) + result = grouped.transform(np.mean) + + tm.assert_index_equal(result.index, ts.index) + for _, gp in grouped: + assert_fp_equal(result.reindex(gp.index), gp.mean()) + + grouped = tsframe.groupby(lambda x: x.month) + result = grouped.transform(np.mean) + tm.assert_index_equal(result.index, tsframe.index) + for _, gp in grouped: + agged = gp.mean() + res = result.reindex(gp.index) + for col in tsframe: + assert_fp_equal(res[col], agged[col]) + + # group columns + grouped = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1) + result = grouped.transform(np.mean) + tm.assert_index_equal(result.index, tsframe.index) + tm.assert_index_equal(result.columns, tsframe.columns) + for _, gp in grouped: + agged = gp.mean(1) + res = result.reindex(columns=gp.columns) + for idx in gp.index: + assert_fp_equal(res.xs(idx), agged[idx]) + + +def test_transform_axis_1(request, transformation_func, using_array_manager): + # GH 36308 + if using_array_manager and transformation_func == "pct_change": + # TODO(ArrayManager) column-wise shift + request.node.add_marker( + pytest.mark.xfail(reason="ArrayManager: shift axis=1 not yet implemented") + ) + # TODO(2.0) Remove after pad/backfill deprecation enforced + transformation_func = maybe_normalize_deprecated_kernels(transformation_func) + + warn = None + if transformation_func == "tshift": + warn = FutureWarning + + request.node.add_marker(pytest.mark.xfail(reason="tshift is deprecated")) + args = ("ffill",) if transformation_func == "fillna" else () + + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) + with tm.assert_produces_warning(warn): + result = df.groupby([0, 0, 1], axis=1).transform(transformation_func, *args) + expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T + + if transformation_func in ["diff", "shift"]: + # Result contains nans, so transpose coerces to float + expected["b"] = expected["b"].astype("int64") + + # cumcount returns Series; the rest are DataFrame + tm.assert_equal(result, expected) + + +def test_transform_axis_ts(tsframe): + + # make sure that we are setting the axes + # correctly when on axis=0 or 1 + # in the presence of a non-monotonic indexer + # GH12713 + + base = tsframe.iloc[0:5] + r = len(base.index) + c = len(base.columns) + tso = DataFrame( + np.random.randn(r, c), index=base.index, columns=base.columns, dtype="float64" + ) + # monotonic + ts = tso + grouped = ts.groupby(lambda x: x.weekday()) + result = ts - grouped.transform("mean") + expected = grouped.apply(lambda x: x - x.mean()) + tm.assert_frame_equal(result, expected) + + ts = ts.T + grouped = ts.groupby(lambda x: x.weekday(), axis=1) + result = ts - grouped.transform("mean") + expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) + tm.assert_frame_equal(result, expected) + + # non-monotonic + ts = tso.iloc[[1, 0] + list(range(2, len(base)))] + grouped = ts.groupby(lambda x: x.weekday()) + result = ts - grouped.transform("mean") + expected = grouped.apply(lambda x: x - x.mean()) + tm.assert_frame_equal(result, expected) + + ts = ts.T + grouped = ts.groupby(lambda x: x.weekday(), axis=1) + result = ts - grouped.transform("mean") + expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) + tm.assert_frame_equal(result, expected) + + +def test_transform_dtype(): + # GH 9807 + # Check transform dtype output is preserved + df = DataFrame([[1, 3], [2, 3]]) + result = df.groupby(1).transform("mean") + expected = DataFrame([[1.5], [1.5]]) + tm.assert_frame_equal(result, expected) + + +def test_transform_bug(): + # GH 5712 + # transforming on a datetime column + df = DataFrame({"A": Timestamp("20130101"), "B": np.arange(5)}) + result = df.groupby("A")["B"].transform(lambda x: x.rank(ascending=False)) + expected = Series(np.arange(5, 0, step=-1), name="B", dtype="float64") + tm.assert_series_equal(result, expected) + + +def test_transform_numeric_to_boolean(): + # GH 16875 + # inconsistency in transforming boolean values + expected = Series([True, True], name="A") + + df = DataFrame({"A": [1.1, 2.2], "B": [1, 2]}) + result = df.groupby("B").A.transform(lambda x: True) + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1, 2], "B": [1, 2]}) + result = df.groupby("B").A.transform(lambda x: True) + tm.assert_series_equal(result, expected) + + +def test_transform_datetime_to_timedelta(): + # GH 15429 + # transforming a datetime to timedelta + df = DataFrame({"A": Timestamp("20130101"), "B": np.arange(5)}) + expected = Series([Timestamp("20130101") - Timestamp("20130101")] * 5, name="A") + + # this does date math without changing result type in transform + base_time = df["A"][0] + result = ( + df.groupby("A")["A"].transform(lambda x: x.max() - x.min() + base_time) + - base_time + ) + tm.assert_series_equal(result, expected) + + # this does date math and causes the transform to return timedelta + result = df.groupby("A")["A"].transform(lambda x: x.max() - x.min()) + tm.assert_series_equal(result, expected) + + +def test_transform_datetime_to_numeric(): + # GH 10972 + # convert dt to float + df = DataFrame({"a": 1, "b": date_range("2015-01-01", periods=2, freq="D")}) + result = df.groupby("a").b.transform( + lambda x: x.dt.dayofweek - x.dt.dayofweek.mean() + ) + + expected = Series([-0.5, 0.5], name="b") + tm.assert_series_equal(result, expected) + + # convert dt to int + df = DataFrame({"a": 1, "b": date_range("2015-01-01", periods=2, freq="D")}) + result = df.groupby("a").b.transform( + lambda x: x.dt.dayofweek - x.dt.dayofweek.min() + ) + + expected = Series([0, 1], name="b") + tm.assert_series_equal(result, expected) + + +def test_transform_casting(): + # 13046 + data = """ + idx A ID3 DATETIME + 0 B-028 b76cd912ff "2014-10-08 13:43:27" + 1 B-054 4a57ed0b02 "2014-10-08 14:26:19" + 2 B-076 1a682034f8 "2014-10-08 14:29:01" + 3 B-023 b76cd912ff "2014-10-08 18:39:34" + 4 B-023 f88g8d7sds "2014-10-08 18:40:18" + 5 B-033 b76cd912ff "2014-10-08 18:44:30" + 6 B-032 b76cd912ff "2014-10-08 18:46:00" + 7 B-037 b76cd912ff "2014-10-08 18:52:15" + 8 B-046 db959faf02 "2014-10-08 18:59:59" + 9 B-053 b76cd912ff "2014-10-08 19:17:48" + 10 B-065 b76cd912ff "2014-10-08 19:21:38" + """ + df = pd.read_csv( + StringIO(data), sep=r"\s+", index_col=[0], parse_dates=["DATETIME"] + ) + + result = df.groupby("ID3")["DATETIME"].transform(lambda x: x.diff()) + assert is_timedelta64_dtype(result.dtype) + + result = df[["ID3", "DATETIME"]].groupby("ID3").transform(lambda x: x.diff()) + assert is_timedelta64_dtype(result.DATETIME.dtype) + + +def test_transform_multiple(ts): + grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) + + grouped.transform(lambda x: x * 2) + grouped.transform(np.mean) + + +def test_dispatch_transform(tsframe): + df = tsframe[::5].reindex(tsframe.index) + + grouped = df.groupby(lambda x: x.month) + + filled = grouped.fillna(method="pad") + fillit = lambda x: x.fillna(method="pad") + expected = df.groupby(lambda x: x.month).transform(fillit) + tm.assert_frame_equal(filled, expected) + + +def test_transform_transformation_func(request, transformation_func): + # GH 30918 + df = DataFrame( + { + "A": ["foo", "foo", "foo", "foo", "bar", "bar", "baz"], + "B": [1, 2, np.nan, 3, 3, np.nan, 4], + }, + index=date_range("2020-01-01", "2020-01-07"), + ) + # TODO(2.0) Remove after pad/backfill deprecation enforced + transformation_func = maybe_normalize_deprecated_kernels(transformation_func) + if transformation_func == "cumcount": + test_op = lambda x: x.transform("cumcount") + mock_op = lambda x: Series(range(len(x)), x.index) + elif transformation_func == "fillna": + test_op = lambda x: x.transform("fillna", value=0) + mock_op = lambda x: x.fillna(value=0) + elif transformation_func == "tshift": + msg = ( + "Current behavior of groupby.tshift is inconsistent with other " + "transformations. See GH34452 for more details" + ) + request.node.add_marker(pytest.mark.xfail(reason=msg)) + else: + test_op = lambda x: x.transform(transformation_func) + mock_op = lambda x: getattr(x, transformation_func)() + + result = test_op(df.groupby("A")) + groups = [df[["B"]].iloc[:4], df[["B"]].iloc[4:6], df[["B"]].iloc[6:]] + expected = concat([mock_op(g) for g in groups]) + + if transformation_func == "cumcount": + tm.assert_series_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + +def test_transform_select_columns(df): + f = lambda x: x.mean() + result = df.groupby("A")[["C", "D"]].transform(f) + + selection = df[["C", "D"]] + expected = selection.groupby(df["A"]).transform(f) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("duplicates", [True, False]) +def test_transform_exclude_nuisance(df, duplicates): + # case that goes through _transform_item_by_item + + if duplicates: + # make sure we work with duplicate columns GH#41427 + df.columns = ["A", "C", "C", "D"] + + # this also tests orderings in transform between + # series/frame to make sure it's consistent + expected = {} + grouped = df.groupby("A") + + gbc = grouped["C"] + warn = FutureWarning if duplicates else None + with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + expected["C"] = gbc.transform(np.mean) + if duplicates: + # squeeze 1-column DataFrame down to Series + expected["C"] = expected["C"]["C"] + + assert isinstance(gbc.obj, DataFrame) + assert isinstance(gbc, DataFrameGroupBy) + else: + assert isinstance(gbc, SeriesGroupBy) + assert isinstance(gbc.obj, Series) + + expected["D"] = grouped["D"].transform(np.mean) + expected = DataFrame(expected) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + result = df.groupby("A").transform(np.mean) + + tm.assert_frame_equal(result, expected) + + +def test_transform_function_aliases(df): + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + result = df.groupby("A").transform("mean") + expected = df.groupby("A").transform(np.mean) + tm.assert_frame_equal(result, expected) + + result = df.groupby("A")["C"].transform("mean") + expected = df.groupby("A")["C"].transform(np.mean) + tm.assert_series_equal(result, expected) + + +def test_series_fast_transform_date(): + # GH 13191 + df = DataFrame( + {"grouping": [np.nan, 1, 1, 3], "d": date_range("2014-1-1", "2014-1-4")} + ) + result = df.groupby("grouping")["d"].transform("first") + dates = [ + pd.NaT, + Timestamp("2014-1-2"), + Timestamp("2014-1-2"), + Timestamp("2014-1-4"), + ] + expected = Series(dates, name="d") + tm.assert_series_equal(result, expected) + + +def test_transform_length(): + # GH 9697 + df = DataFrame({"col1": [1, 1, 2, 2], "col2": [1, 2, 3, np.nan]}) + expected = Series([3.0] * 4) + + def nsum(x): + return np.nansum(x) + + results = [ + df.groupby("col1").transform(sum)["col2"], + df.groupby("col1")["col2"].transform(sum), + df.groupby("col1").transform(nsum)["col2"], + df.groupby("col1")["col2"].transform(nsum), + ] + for result in results: + tm.assert_series_equal(result, expected, check_names=False) + + +def test_transform_coercion(): + + # 14457 + # when we are transforming be sure to not coerce + # via assignment + df = DataFrame({"A": ["a", "a"], "B": [0, 1]}) + g = df.groupby("A") + + expected = g.transform(np.mean) + + msg = "will return a scalar mean" + with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False): + result = g.transform(lambda x: np.mean(x)) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(None): + result2 = g.transform(lambda x: np.mean(x, axis=0)) + tm.assert_frame_equal(result2, expected) + + +def test_groupby_transform_with_int(): + + # GH 3740, make sure that we might upcast on item-by-item transform + + # floats + df = DataFrame( + { + "A": [1, 1, 1, 2, 2, 2], + "B": Series(1, dtype="float64"), + "C": Series([1, 2, 3, 1, 2, 3], dtype="float64"), + "D": "foo", + } + ) + with np.errstate(all="ignore"): + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid columns" + ): + result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) + expected = DataFrame( + {"B": np.nan, "C": Series([-1, 0, 1, -1, 0, 1], dtype="float64")} + ) + tm.assert_frame_equal(result, expected) + + # int case + df = DataFrame( + { + "A": [1, 1, 1, 2, 2, 2], + "B": 1, + "C": [1, 2, 3, 1, 2, 3], + "D": "foo", + } + ) + with np.errstate(all="ignore"): + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid columns" + ): + result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) + expected = DataFrame({"B": np.nan, "C": [-1.0, 0.0, 1.0, -1.0, 0.0, 1.0]}) + tm.assert_frame_equal(result, expected) + + # int that needs float conversion + s = Series([2, 3, 4, 10, 5, -1]) + df = DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": s, "D": "foo"}) + with np.errstate(all="ignore"): + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid columns" + ): + result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) + + s1 = s.iloc[0:3] + s1 = (s1 - s1.mean()) / s1.std() + s2 = s.iloc[3:6] + s2 = (s2 - s2.mean()) / s2.std() + expected = DataFrame({"B": np.nan, "C": concat([s1, s2])}) + tm.assert_frame_equal(result, expected) + + # int doesn't get downcasted + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + result = df.groupby("A").transform(lambda x: x * 2 / 2) + expected = DataFrame({"B": 1.0, "C": [2.0, 3.0, 4.0, 10.0, 5.0, -1.0]}) + tm.assert_frame_equal(result, expected) + + +def test_groupby_transform_with_nan_group(): + # GH 9941 + df = DataFrame({"a": range(10), "b": [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]}) + result = df.groupby(df.b)["a"].transform(max) + expected = Series([1.0, 1.0, 2.0, 3.0, np.nan, 6.0, 6.0, 9.0, 9.0, 9.0], name="a") + tm.assert_series_equal(result, expected) + + +def test_transform_mixed_type(): + index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]]) + df = DataFrame( + { + "d": [1.0, 1.0, 1.0, 2.0, 2.0, 2.0], + "c": np.tile(["a", "b", "c"], 2), + "v": np.arange(1.0, 7.0), + }, + index=index, + ) + + def f(group): + group["g"] = group["d"] * 2 + return group[:1] + + grouped = df.groupby("c") + result = grouped.apply(f) + + assert result["d"].dtype == np.float64 + + # this is by definition a mutating operation! + with pd.option_context("mode.chained_assignment", None): + for key, group in grouped: + res = f(group) + tm.assert_frame_equal(res, result.loc[key]) + + +@pytest.mark.parametrize( + "op, args, targop", + [ + ("cumprod", (), lambda x: x.cumprod()), + ("cumsum", (), lambda x: x.cumsum()), + ("shift", (-1,), lambda x: x.shift(-1)), + ("shift", (1,), lambda x: x.shift()), + ], +) +def test_cython_transform_series(op, args, targop): + # GH 4095 + s = Series(np.random.randn(1000)) + s_missing = s.copy() + s_missing.iloc[2:10] = np.nan + labels = np.random.randint(0, 50, size=1000).astype(float) + + # series + for data in [s, s_missing]: + # print(data.head()) + expected = data.groupby(labels).transform(targop) + + tm.assert_series_equal(expected, data.groupby(labels).transform(op, *args)) + tm.assert_series_equal(expected, getattr(data.groupby(labels), op)(*args)) + + +@pytest.mark.parametrize("op", ["cumprod", "cumsum"]) +@pytest.mark.parametrize("skipna", [False, True]) +@pytest.mark.parametrize( + "input, exp", + [ + # When everything is NaN + ({"key": ["b"] * 10, "value": np.nan}, Series([np.nan] * 10, name="value")), + # When there is a single NaN + ( + {"key": ["b"] * 10 + ["a"] * 2, "value": [3] * 3 + [np.nan] + [3] * 8}, + { + ("cumprod", False): [3.0, 9.0, 27.0] + [np.nan] * 7 + [3.0, 9.0], + ("cumprod", True): [ + 3.0, + 9.0, + 27.0, + np.nan, + 81.0, + 243.0, + 729.0, + 2187.0, + 6561.0, + 19683.0, + 3.0, + 9.0, + ], + ("cumsum", False): [3.0, 6.0, 9.0] + [np.nan] * 7 + [3.0, 6.0], + ("cumsum", True): [ + 3.0, + 6.0, + 9.0, + np.nan, + 12.0, + 15.0, + 18.0, + 21.0, + 24.0, + 27.0, + 3.0, + 6.0, + ], + }, + ), + ], +) +def test_groupby_cum_skipna(op, skipna, input, exp): + df = DataFrame(input) + result = df.groupby("key")["value"].transform(op, skipna=skipna) + if isinstance(exp, dict): + expected = exp[(op, skipna)] + else: + expected = exp + expected = Series(expected, name="value") + tm.assert_series_equal(expected, result) + + +@pytest.mark.slow +@pytest.mark.parametrize( + "op, args, targop", + [ + ("cumprod", (), lambda x: x.cumprod()), + ("cumsum", (), lambda x: x.cumsum()), + ("shift", (-1,), lambda x: x.shift(-1)), + ("shift", (1,), lambda x: x.shift()), + ], +) +def test_cython_transform_frame(op, args, targop): + s = Series(np.random.randn(1000)) + s_missing = s.copy() + s_missing.iloc[2:10] = np.nan + labels = np.random.randint(0, 50, size=1000).astype(float) + strings = list("qwertyuiopasdfghjklz") + strings_missing = strings[:] + strings_missing[5] = np.nan + df = DataFrame( + { + "float": s, + "float_missing": s_missing, + "int": [1, 1, 1, 1, 2] * 200, + "datetime": date_range("1990-1-1", periods=1000), + "timedelta": pd.timedelta_range(1, freq="s", periods=1000), + "string": strings * 50, + "string_missing": strings_missing * 50, + }, + columns=[ + "float", + "float_missing", + "int", + "datetime", + "timedelta", + "string", + "string_missing", + ], + ) + df["cat"] = df["string"].astype("category") + + df2 = df.copy() + df2.index = MultiIndex.from_product([range(100), range(10)]) + + # DataFrame - Single and MultiIndex, + # group by values, index level, columns + for df in [df, df2]: + for gb_target in [ + {"by": labels}, + {"level": 0}, + {"by": "string"}, + ]: # {"by": 'string_missing'}]: + # {"by": ['int','string']}]: + + gb = df.groupby(**gb_target) + # allowlisted methods set the selection before applying + # bit a of hack to make sure the cythonized shift + # is equivalent to pre 0.17.1 behavior + if op == "shift": + gb._set_group_selection() + + if op != "shift" and "int" not in gb_target: + # numeric apply fastpath promotes dtype so have + # to apply separately and concat + i = gb[["int"]].apply(targop) + f = gb[["float", "float_missing"]].apply(targop) + expected = concat([f, i], axis=1) + else: + expected = gb.apply(targop) + + expected = expected.sort_index(axis=1) + tm.assert_frame_equal(expected, gb.transform(op, *args).sort_index(axis=1)) + tm.assert_frame_equal(expected, getattr(gb, op)(*args).sort_index(axis=1)) + # individual columns + for c in df: + if ( + c not in ["float", "int", "float_missing"] + and op != "shift" + and not (c == "timedelta" and op == "cumsum") + ): + msg = "|".join( + [ + "does not support .* operations", + ".* is not supported for object dtype", + "is not implemented for this dtype", + ] + ) + with pytest.raises(TypeError, match=msg): + gb[c].transform(op) + with pytest.raises(TypeError, match=msg): + getattr(gb[c], op)() + else: + expected = gb[c].apply(targop) + expected.name = c + tm.assert_series_equal(expected, gb[c].transform(op, *args)) + tm.assert_series_equal(expected, getattr(gb[c], op)(*args)) + + +def test_transform_with_non_scalar_group(): + # GH 10165 + cols = MultiIndex.from_tuples( + [ + ("syn", "A"), + ("mis", "A"), + ("non", "A"), + ("syn", "C"), + ("mis", "C"), + ("non", "C"), + ("syn", "T"), + ("mis", "T"), + ("non", "T"), + ("syn", "G"), + ("mis", "G"), + ("non", "G"), + ] + ) + df = DataFrame( + np.random.randint(1, 10, (4, 12)), columns=cols, index=["A", "C", "G", "T"] + ) + + msg = "transform must return a scalar value for each group.*" + with pytest.raises(ValueError, match=msg): + df.groupby(axis=1, level=1).transform(lambda z: z.div(z.sum(axis=1), axis=0)) + + +@pytest.mark.parametrize( + "cols,exp,comp_func", + [ + ("a", Series([1, 1, 1], name="a"), tm.assert_series_equal), + ( + ["a", "c"], + DataFrame({"a": [1, 1, 1], "c": [1, 1, 1]}), + tm.assert_frame_equal, + ), + ], +) +@pytest.mark.parametrize("agg_func", ["count", "rank", "size"]) +def test_transform_numeric_ret(cols, exp, comp_func, agg_func, request): + if agg_func == "size" and isinstance(cols, list): + # https://github.com/pytest-dev/pytest/issues/6300 + # workaround to xfail fixture/param permutations + reason = "'size' transformation not supported with NDFrameGroupy" + request.node.add_marker(pytest.mark.xfail(reason=reason)) + + # GH 19200 + df = DataFrame( + {"a": date_range("2018-01-01", periods=3), "b": range(3), "c": range(7, 10)} + ) + + warn = FutureWarning + if isinstance(exp, Series) or agg_func != "size": + warn = None + with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + result = df.groupby("b")[cols].transform(agg_func) + + if agg_func == "rank": + exp = exp.astype("float") + + comp_func(result, exp) + + +def test_transform_ffill(): + # GH 24211 + data = [["a", 0.0], ["a", float("nan")], ["b", 1.0], ["b", float("nan")]] + df = DataFrame(data, columns=["key", "values"]) + result = df.groupby("key").transform("ffill") + expected = DataFrame({"values": [0.0, 0.0, 1.0, 1.0]}) + tm.assert_frame_equal(result, expected) + result = df.groupby("key")["values"].transform("ffill") + expected = Series([0.0, 0.0, 1.0, 1.0], name="values") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("mix_groupings", [True, False]) +@pytest.mark.parametrize("as_series", [True, False]) +@pytest.mark.parametrize("val1,val2", [("foo", "bar"), (1, 2), (1.0, 2.0)]) +@pytest.mark.parametrize( + "fill_method,limit,exp_vals", + [ + ( + "ffill", + None, + [np.nan, np.nan, "val1", "val1", "val1", "val2", "val2", "val2"], + ), + ("ffill", 1, [np.nan, np.nan, "val1", "val1", np.nan, "val2", "val2", np.nan]), + ( + "bfill", + None, + ["val1", "val1", "val1", "val2", "val2", "val2", np.nan, np.nan], + ), + ("bfill", 1, [np.nan, "val1", "val1", np.nan, "val2", "val2", np.nan, np.nan]), + ], +) +def test_group_fill_methods( + mix_groupings, as_series, val1, val2, fill_method, limit, exp_vals +): + vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan] + _exp_vals = list(exp_vals) + # Overwrite placeholder values + for index, exp_val in enumerate(_exp_vals): + if exp_val == "val1": + _exp_vals[index] = val1 + elif exp_val == "val2": + _exp_vals[index] = val2 + + # Need to modify values and expectations depending on the + # Series / DataFrame that we ultimately want to generate + if mix_groupings: # ['a', 'b', 'a, 'b', ...] + keys = ["a", "b"] * len(vals) + + def interweave(list_obj): + temp = [] + for x in list_obj: + temp.extend([x, x]) + + return temp + + _exp_vals = interweave(_exp_vals) + vals = interweave(vals) + else: # ['a', 'a', 'a', ... 'b', 'b', 'b'] + keys = ["a"] * len(vals) + ["b"] * len(vals) + _exp_vals = _exp_vals * 2 + vals = vals * 2 + + df = DataFrame({"key": keys, "val": vals}) + if as_series: + result = getattr(df.groupby("key")["val"], fill_method)(limit=limit) + exp = Series(_exp_vals, name="val") + tm.assert_series_equal(result, exp) + else: + result = getattr(df.groupby("key"), fill_method)(limit=limit) + exp = DataFrame({"val": _exp_vals}) + tm.assert_frame_equal(result, exp) + + +@pytest.mark.parametrize("fill_method", ["ffill", "bfill"]) +def test_pad_stable_sorting(fill_method): + # GH 21207 + x = [0] * 20 + y = [np.nan] * 10 + [1] * 10 + + if fill_method == "bfill": + y = y[::-1] + + df = DataFrame({"x": x, "y": y}) + expected = df.drop("x", axis=1) + + result = getattr(df.groupby("x"), fill_method)() + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("test_series", [True, False]) +@pytest.mark.parametrize( + "freq", + [ + None, + pytest.param( + "D", + marks=pytest.mark.xfail( + reason="GH#23918 before method uses freq in vectorized approach" + ), + ), + ], +) +@pytest.mark.parametrize("periods", [1, -1]) +@pytest.mark.parametrize("fill_method", ["ffill", "bfill", None]) +@pytest.mark.parametrize("limit", [None, 1]) +def test_pct_change(test_series, freq, periods, fill_method, limit): + # GH 21200, 21621, 30463 + vals = [3, np.nan, np.nan, np.nan, 1, 2, 4, 10, np.nan, 4] + keys = ["a", "b"] + key_v = np.repeat(keys, len(vals)) + df = DataFrame({"key": key_v, "vals": vals * 2}) + + df_g = df + if fill_method is not None: + df_g = getattr(df.groupby("key"), fill_method)(limit=limit) + grp = df_g.groupby(df.key) + + expected = grp["vals"].obj / grp["vals"].shift(periods) - 1 + + if test_series: + result = df.groupby("key")["vals"].pct_change( + periods=periods, fill_method=fill_method, limit=limit, freq=freq + ) + tm.assert_series_equal(result, expected) + else: + result = df.groupby("key").pct_change( + periods=periods, fill_method=fill_method, limit=limit, freq=freq + ) + tm.assert_frame_equal(result, expected.to_frame("vals")) + + +@pytest.mark.parametrize( + "func, expected_status", + [ + ("ffill", ["shrt", "shrt", "lng", np.nan, "shrt", "ntrl", "ntrl"]), + ("bfill", ["shrt", "lng", "lng", "shrt", "shrt", "ntrl", np.nan]), + ], +) +def test_ffill_bfill_non_unique_multilevel(func, expected_status): + # GH 19437 + date = pd.to_datetime( + [ + "2018-01-01", + "2018-01-01", + "2018-01-01", + "2018-01-01", + "2018-01-02", + "2018-01-01", + "2018-01-02", + ] + ) + symbol = ["MSFT", "MSFT", "MSFT", "AAPL", "AAPL", "TSLA", "TSLA"] + status = ["shrt", np.nan, "lng", np.nan, "shrt", "ntrl", np.nan] + + df = DataFrame({"date": date, "symbol": symbol, "status": status}) + df = df.set_index(["date", "symbol"]) + result = getattr(df.groupby("symbol")["status"], func)() + + index = MultiIndex.from_tuples( + tuples=list(zip(*[date, symbol])), names=["date", "symbol"] + ) + expected = Series(expected_status, index=index, name="status") + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", [np.any, np.all]) +def test_any_all_np_func(func): + # GH 20653 + df = DataFrame( + [["foo", True], [np.nan, True], ["foo", True]], columns=["key", "val"] + ) + + exp = Series([True, np.nan, True], name="val") + + res = df.groupby("key")["val"].transform(func) + tm.assert_series_equal(res, exp) + + +def test_groupby_transform_rename(): + # https://github.com/pandas-dev/pandas/issues/23461 + def demean_rename(x): + result = x - x.mean() + + if isinstance(x, Series): + return result + + result = result.rename(columns={c: "{c}_demeaned" for c in result.columns}) + + return result + + df = DataFrame({"group": list("ababa"), "value": [1, 1, 1, 2, 2]}) + expected = DataFrame({"value": [-1.0 / 3, -0.5, -1.0 / 3, 0.5, 2.0 / 3]}) + + result = df.groupby("group").transform(demean_rename) + tm.assert_frame_equal(result, expected) + result_single = df.groupby("group").value.transform(demean_rename) + tm.assert_series_equal(result_single, expected["value"]) + + +@pytest.mark.parametrize("func", [min, max, np.min, np.max, "first", "last"]) +def test_groupby_transform_timezone_column(func): + # GH 24198 + ts = pd.to_datetime("now", utc=True).tz_convert("Asia/Singapore") + result = DataFrame({"end_time": [ts], "id": [1]}) + result["max_end_time"] = result.groupby("id").end_time.transform(func) + expected = DataFrame([[ts, 1, ts]], columns=["end_time", "id", "max_end_time"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "func, values", + [ + ("idxmin", ["1/1/2011"] * 2 + ["1/3/2011"] * 7 + ["1/10/2011"]), + ("idxmax", ["1/2/2011"] * 2 + ["1/9/2011"] * 7 + ["1/10/2011"]), + ], +) +def test_groupby_transform_with_datetimes(func, values): + # GH 15306 + dates = date_range("1/1/2011", periods=10, freq="D") + + stocks = DataFrame({"price": np.arange(10.0)}, index=dates) + stocks["week_id"] = dates.isocalendar().week + + result = stocks.groupby(stocks["week_id"])["price"].transform(func) + + expected = Series(data=pd.to_datetime(values), index=dates, name="price") + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["cumsum", "cumprod", "cummin", "cummax"]) +def test_transform_absent_categories(func): + # GH 16771 + # cython transforms with more groups than rows + x_vals = [1] + x_cats = range(2) + y = [1] + df = DataFrame({"x": Categorical(x_vals, x_cats), "y": y}) + result = getattr(df.y.groupby(df.x), func)() + expected = df.y + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["ffill", "bfill", "shift"]) +@pytest.mark.parametrize("key, val", [("level", 0), ("by", Series([0]))]) +def test_ffill_not_in_axis(func, key, val): + # GH 21521 + df = DataFrame([[np.nan]]) + result = getattr(df.groupby(**{key: val}), func)() + expected = df + + tm.assert_frame_equal(result, expected) + + +def test_transform_invalid_name_raises(): + # GH#27486 + df = DataFrame({"a": [0, 1, 1, 2]}) + g = df.groupby(["a", "b", "b", "c"]) + with pytest.raises(ValueError, match="not a valid function name"): + g.transform("some_arbitrary_name") + + # method exists on the object, but is not a valid transformation/agg + assert hasattr(g, "aggregate") # make sure the method exists + with pytest.raises(ValueError, match="not a valid function name"): + g.transform("aggregate") + + # Test SeriesGroupBy + g = df["a"].groupby(["a", "b", "b", "c"]) + with pytest.raises(ValueError, match="not a valid function name"): + g.transform("some_arbitrary_name") + + +@pytest.mark.parametrize( + "obj", + [ + DataFrame( + {"a": [0, 0, 0, 1, 1, 1], "b": range(6)}, + index=["A", "B", "C", "D", "E", "F"], + ), + Series([0, 0, 0, 1, 1, 1], index=["A", "B", "C", "D", "E", "F"]), + ], +) +def test_transform_agg_by_name(request, reduction_func, obj): + func = reduction_func + g = obj.groupby(np.repeat([0, 1], 3)) + + if func == "ngroup": # GH#27468 + request.node.add_marker( + pytest.mark.xfail(reason="TODO: g.transform('ngroup') doesn't work") + ) + if func == "size" and obj.ndim == 2: # GH#27469 + request.node.add_marker( + pytest.mark.xfail(reason="TODO: g.transform('size') doesn't work") + ) + if func == "corrwith" and isinstance(obj, Series): # GH#32293 + request.node.add_marker( + pytest.mark.xfail(reason="TODO: implement SeriesGroupBy.corrwith") + ) + + args = {"nth": [0], "quantile": [0.5], "corrwith": [obj]}.get(func, []) + + warn = None + if isinstance(obj, DataFrame) and func == "size": + warn = FutureWarning + + with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + result = g.transform(func, *args) + + # this is the *definition* of a transformation + tm.assert_index_equal(result.index, obj.index) + if hasattr(obj, "columns"): + tm.assert_index_equal(result.columns, obj.columns) + + # verify that values were broadcasted across each group + assert len(set(DataFrame(result).iloc[-3:, -1])) == 1 + + +def test_transform_lambda_with_datetimetz(): + # GH 27496 + df = DataFrame( + { + "time": [ + Timestamp("2010-07-15 03:14:45"), + Timestamp("2010-11-19 18:47:06"), + ], + "timezone": ["Etc/GMT+4", "US/Eastern"], + } + ) + result = df.groupby(["timezone"])["time"].transform( + lambda x: x.dt.tz_localize(x.name) + ) + expected = Series( + [ + Timestamp("2010-07-15 03:14:45", tz="Etc/GMT+4"), + Timestamp("2010-11-19 18:47:06", tz="US/Eastern"), + ], + name="time", + ) + tm.assert_series_equal(result, expected) + + +def test_transform_fastpath_raises(): + # GH#29631 case where fastpath defined in groupby.generic _choose_path + # raises, but slow_path does not + + df = DataFrame({"A": [1, 1, 2, 2], "B": [1, -1, 1, 2]}) + gb = df.groupby("A") + + def func(grp): + # we want a function such that func(frame) fails but func.apply(frame) + # works + if grp.ndim == 2: + # Ensure that fast_path fails + raise NotImplementedError("Don't cross the streams") + return grp * 2 + + # Check that the fastpath raises, see _transform_general + obj = gb._obj_with_exclusions + gen = gb.grouper.get_iterator(obj, axis=gb.axis) + fast_path, slow_path = gb._define_paths(func) + _, group = next(gen) + + with pytest.raises(NotImplementedError, match="Don't cross the streams"): + fast_path(group) + + result = gb.transform(func) + + expected = DataFrame([2, -2, 2, 4], columns=["B"]) + tm.assert_frame_equal(result, expected) + + +def test_transform_lambda_indexing(): + # GH 7883 + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "flux", "foo", "flux"], + "B": ["one", "one", "two", "three", "two", "six", "five", "three"], + "C": range(8), + "D": range(8), + "E": range(8), + } + ) + df = df.set_index(["A", "B"]) + df = df.sort_index() + result = df.groupby(level="A").transform(lambda x: x.iloc[-1]) + expected = DataFrame( + { + "C": [3, 3, 7, 7, 4, 4, 4, 4], + "D": [3, 3, 7, 7, 4, 4, 4, 4], + "E": [3, 3, 7, 7, 4, 4, 4, 4], + }, + index=MultiIndex.from_tuples( + [ + ("bar", "one"), + ("bar", "three"), + ("flux", "six"), + ("flux", "three"), + ("foo", "five"), + ("foo", "one"), + ("foo", "two"), + ("foo", "two"), + ], + names=["A", "B"], + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_categorical_and_not_categorical_key(observed): + # Checks that groupby-transform, when grouping by both a categorical + # and a non-categorical key, doesn't try to expand the output to include + # non-observed categories but instead matches the input shape. + # GH 32494 + df_with_categorical = DataFrame( + { + "A": Categorical(["a", "b", "a"], categories=["a", "b", "c"]), + "B": [1, 2, 3], + "C": ["a", "b", "a"], + } + ) + df_without_categorical = DataFrame( + {"A": ["a", "b", "a"], "B": [1, 2, 3], "C": ["a", "b", "a"]} + ) + + # DataFrame case + result = df_with_categorical.groupby(["A", "C"], observed=observed).transform("sum") + expected = df_without_categorical.groupby(["A", "C"]).transform("sum") + tm.assert_frame_equal(result, expected) + expected_explicit = DataFrame({"B": [4, 2, 4]}) + tm.assert_frame_equal(result, expected_explicit) + + # Series case + result = df_with_categorical.groupby(["A", "C"], observed=observed)["B"].transform( + "sum" + ) + expected = df_without_categorical.groupby(["A", "C"])["B"].transform("sum") + tm.assert_series_equal(result, expected) + expected_explicit = Series([4, 2, 4], name="B") + tm.assert_series_equal(result, expected_explicit) + + +def test_string_rank_grouping(): + # GH 19354 + df = DataFrame({"A": [1, 1, 2], "B": [1, 2, 3]}) + result = df.groupby("A").transform("rank") + expected = DataFrame({"B": [1.0, 2.0, 1.0]}) + tm.assert_frame_equal(result, expected) + + +def test_transform_cumcount(): + # GH 27472 + df = DataFrame({"a": [0, 0, 0, 1, 1, 1], "b": range(6)}) + grp = df.groupby(np.repeat([0, 1], 3)) + + result = grp.cumcount() + expected = Series([0, 1, 2, 0, 1, 2]) + tm.assert_series_equal(result, expected) + + result = grp.transform("cumcount") + tm.assert_series_equal(result, expected) + + +def test_null_group_lambda_self(): + # GH 17093 + df = DataFrame({"A": [1, np.nan], "B": [1, 1]}) + result = df.groupby("A").transform(lambda x: x) + expected = DataFrame([1], columns=["B"]) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..c59878f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..918e700 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..0a46680 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/datetimelike.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/datetimelike.cpython-38.pyc new file mode 100644 index 0000000..f6deae3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/datetimelike.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_any_index.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_any_index.cpython-38.pyc new file mode 100644 index 0000000..ec1ecfe Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_any_index.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_base.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_base.cpython-38.pyc new file mode 100644 index 0000000..4614030 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_common.cpython-38.pyc new file mode 100644 index 0000000..1209e79 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_engines.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_engines.cpython-38.pyc new file mode 100644 index 0000000..cb8453b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_engines.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_frozen.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_frozen.cpython-38.pyc new file mode 100644 index 0000000..d726142 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_frozen.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_index_new.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_index_new.cpython-38.pyc new file mode 100644 index 0000000..210c359 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_index_new.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..0dcb05e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_numpy_compat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_numpy_compat.cpython-38.pyc new file mode 100644 index 0000000..795eae7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_numpy_compat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_setops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_setops.cpython-38.pyc new file mode 100644 index 0000000..b88a326 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/__pycache__/test_setops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..49229aa Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..98fec61 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_formats.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_formats.cpython-38.pyc new file mode 100644 index 0000000..8bd661e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_formats.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..840ef95 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_pickle.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_pickle.cpython-38.pyc new file mode 100644 index 0000000..586d56f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_pickle.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_reshape.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_reshape.cpython-38.pyc new file mode 100644 index 0000000..e0da9ac Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_reshape.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_setops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_setops.cpython-38.pyc new file mode 100644 index 0000000..5c615fe Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_setops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_where.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_where.cpython-38.pyc new file mode 100644 index 0000000..09e1291 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/__pycache__/test_where.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_constructors.py new file mode 100644 index 0000000..bc89457 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_constructors.py @@ -0,0 +1,42 @@ +import numpy as np +import pytest + +from pandas import ( + Index, + MultiIndex, +) +import pandas._testing as tm + + +class TestIndexConstructor: + # Tests for the Index constructor, specifically for cases that do + # not return a subclass + + @pytest.mark.parametrize("value", [1, np.int64(1)]) + def test_constructor_corner(self, value): + # corner case + msg = ( + r"Index\(\.\.\.\) must be called with a collection of some " + f"kind, {value} was passed" + ) + with pytest.raises(TypeError, match=msg): + Index(value) + + @pytest.mark.parametrize("index_vals", [[("A", 1), "B"], ["B", ("A", 1)]]) + def test_construction_list_mixed_tuples(self, index_vals): + # see gh-10697: if we are constructing from a mixed list of tuples, + # make sure that we are independent of the sorting order. + index = Index(index_vals) + assert isinstance(index, Index) + assert not isinstance(index, MultiIndex) + + def test_constructor_wrong_kwargs(self): + # GH #19348 + with pytest.raises(TypeError, match="Unexpected keyword arguments {'foo'}"): + with tm.assert_produces_warning(FutureWarning): + Index([], foo="bar") + + def test_constructor_cast(self): + msg = "could not convert string to float" + with pytest.raises(ValueError, match=msg): + Index(["a", "b", "c"], dtype=float) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_formats.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_formats.py new file mode 100644 index 0000000..9053d45 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_formats.py @@ -0,0 +1,148 @@ +import numpy as np +import pytest + +import pandas._config.config as cf + +from pandas import Index + + +class TestIndexRendering: + @pytest.mark.parametrize( + "index,expected", + [ + # ASCII + # short + ( + Index(["a", "bb", "ccc"]), + """Index(['a', 'bb', 'ccc'], dtype='object')""", + ), + # multiple lines + ( + Index(["a", "bb", "ccc"] * 10), + "Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', " + "'bb', 'ccc', 'a', 'bb', 'ccc',\n" + " 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', " + "'bb', 'ccc', 'a', 'bb', 'ccc',\n" + " 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n" + " dtype='object')", + ), + # truncated + ( + Index(["a", "bb", "ccc"] * 100), + "Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',\n" + " ...\n" + " 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n" + " dtype='object', length=300)", + ), + # Non-ASCII + # short + ( + Index(["あ", "いい", "ううう"]), + """Index(['あ', 'いい', 'ううう'], dtype='object')""", + ), + # multiple lines + ( + Index(["あ", "いい", "ううう"] * 10), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " + "'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " + "'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう'],\n" + " dtype='object')" + ), + ), + # truncated + ( + Index(["あ", "いい", "ううう"] * 100), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " + "'あ', 'いい', 'ううう', 'あ',\n" + " ...\n" + " 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう'],\n" + " dtype='object', length=300)" + ), + ), + ], + ) + def test_string_index_repr(self, index, expected): + result = repr(index) + assert result == expected + + @pytest.mark.parametrize( + "index,expected", + [ + # short + ( + Index(["あ", "いい", "ううう"]), + ("Index(['あ', 'いい', 'ううう'], dtype='object')"), + ), + # multiple lines + ( + Index(["あ", "いい", "ううう"] * 10), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう'],\n" + " dtype='object')" + "" + ), + ), + # truncated + ( + Index(["あ", "いい", "ううう"] * 100), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ',\n" + " ...\n" + " 'ううう', 'あ', 'いい', 'ううう', 'あ', " + "'いい', 'ううう', 'あ', 'いい',\n" + " 'ううう'],\n" + " dtype='object', length=300)" + ), + ), + ], + ) + def test_string_index_repr_with_unicode_option(self, index, expected): + # Enable Unicode option ----------------------------------------- + with cf.option_context("display.unicode.east_asian_width", True): + result = repr(index) + assert result == expected + + def test_repr_summary(self): + with cf.option_context("display.max_seq_items", 10): + result = repr(Index(np.arange(1000))) + assert len(result) < 200 + assert "..." in result + + def test_summary_bug(self): + # GH#3869 + ind = Index(["{other}%s", "~:{range}:0"], name="A") + result = ind._summary() + # shouldn't be formatted accidentally. + assert "~:{range}:0" in result + assert "{other}%s" in result + + def test_index_repr_bool_nan(self): + # GH32146 + arr = Index([True, False, np.nan], dtype=object) + exp1 = arr.format() + out1 = ["True", "False", "NaN"] + assert out1 == exp1 + + exp2 = repr(arr) + out2 = "Index([True, False, nan], dtype='object')" + assert out2 == exp2 + + def test_format_different_scalar_lengths(self): + # GH#35439 + idx = Index(["aaaaaaaaa", "b"]) + expected = ["aaaaaaaaa", "b"] + assert idx.format() == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_indexing.py new file mode 100644 index 0000000..9cd5829 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_indexing.py @@ -0,0 +1,78 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + NaT, +) +import pandas._testing as tm + + +class TestGetSliceBounds: + @pytest.mark.parametrize("kind", ["getitem", "loc", None]) + @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)]) + def test_get_slice_bounds_within(self, kind, side, expected): + index = Index(list("abcdef")) + with tm.assert_produces_warning(FutureWarning, match="'kind' argument"): + result = index.get_slice_bound("e", kind=kind, side=side) + assert result == expected + + @pytest.mark.parametrize("kind", ["getitem", "loc", None]) + @pytest.mark.parametrize("side", ["left", "right"]) + @pytest.mark.parametrize( + "data, bound, expected", [(list("abcdef"), "x", 6), (list("bcdefg"), "a", 0)] + ) + def test_get_slice_bounds_outside(self, kind, side, expected, data, bound): + index = Index(data) + with tm.assert_produces_warning(FutureWarning, match="'kind' argument"): + result = index.get_slice_bound(bound, kind=kind, side=side) + assert result == expected + + def test_get_slice_bounds_invalid_side(self): + with pytest.raises(ValueError, match="Invalid value for side kwarg"): + Index([]).get_slice_bound("a", side="middle") + + +class TestGetIndexerNonUnique: + def test_get_indexer_non_unique_dtype_mismatch(self): + # GH#25459 + indexes, missing = Index(["A", "B"]).get_indexer_non_unique(Index([0])) + tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes) + tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing) + + +class TestGetLoc: + @pytest.mark.slow # to_flat_index takes a while + def test_get_loc_tuple_monotonic_above_size_cutoff(self): + # Go through the libindex path for which using + # _bin_search vs ndarray.searchsorted makes a difference + + lev = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + dti = pd.date_range("2016-01-01", periods=100) + + mi = pd.MultiIndex.from_product([lev, range(10 ** 3), dti]) + oidx = mi.to_flat_index() + + loc = len(oidx) // 2 + tup = oidx[loc] + + res = oidx.get_loc(tup) + assert res == loc + + def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self): + # case that goes through _maybe_get_bool_indexer + idx = Index(["foo", np.nan, None, "foo", 1.0, None], dtype=object) + + # we dont raise KeyError on nan + res = idx.get_loc(np.nan) + assert res == 1 + + # we only match on None, not on np.nan + res = idx.get_loc(None) + expected = np.array([False, False, True, False, False, True]) + tm.assert_numpy_array_equal(res, expected) + + # we don't match at all on mismatched NA + with pytest.raises(KeyError, match="NaT"): + idx.get_loc(NaT) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_pickle.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_pickle.py new file mode 100644 index 0000000..c670921 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_pickle.py @@ -0,0 +1,11 @@ +from pandas import Index +import pandas._testing as tm + + +def test_pickle_preserves_object_dtype(): + # GH#43188, GH#43155 don't infer numeric dtype + index = Index([1, 2, 3], dtype=object) + + result = tm.round_trip_pickle(index) + assert result.dtype == object + tm.assert_index_equal(index, result) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_reshape.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_reshape.py new file mode 100644 index 0000000..547d626 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_reshape.py @@ -0,0 +1,86 @@ +""" +Tests for ndarray-like method on the base Index class +""" +import numpy as np +import pytest + +from pandas import Index +import pandas._testing as tm + + +class TestReshape: + def test_repeat(self): + repeats = 2 + index = Index([1, 2, 3]) + expected = Index([1, 1, 2, 2, 3, 3]) + + result = index.repeat(repeats) + tm.assert_index_equal(result, expected) + + def test_insert(self): + + # GH 7256 + # validate neg/pos inserts + result = Index(["b", "c", "d"]) + + # test 0th element + tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a")) + + # test Nth element that follows Python list behavior + tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e")) + + # test loc +/- neq (0, -1) + tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z")) + + # test empty + null_index = Index([]) + tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a")) + + def test_insert_missing(self, nulls_fixture): + # GH#22295 + # test there is no mangling of NA values + expected = Index(["a", nulls_fixture, "b", "c"]) + result = Index(list("abc")).insert(1, nulls_fixture) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "val", [(1, 2), np.datetime64("2019-12-31"), np.timedelta64(1, "D")] + ) + @pytest.mark.parametrize("loc", [-1, 2]) + def test_insert_datetime_into_object(self, loc, val): + # GH#44509 + idx = Index(["1", "2", "3"]) + result = idx.insert(loc, val) + expected = Index(["1", "2", val, "3"]) + tm.assert_index_equal(result, expected) + assert type(expected[2]) is type(val) + + @pytest.mark.parametrize( + "pos,expected", + [ + (0, Index(["b", "c", "d"], name="index")), + (-1, Index(["a", "b", "c"], name="index")), + ], + ) + def test_delete(self, pos, expected): + index = Index(["a", "b", "c", "d"], name="index") + result = index.delete(pos) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + def test_delete_raises(self): + index = Index(["a", "b", "c", "d"], name="index") + msg = "index 5 is out of bounds for axis 0 with size 4" + with pytest.raises(IndexError, match=msg): + index.delete(5) + + def test_append_multiple(self): + index = Index(["a", "b", "c", "d", "e", "f"]) + + foos = [index[:2], index[2:4], index[4:]] + result = foos[0].append(foos[1:]) + tm.assert_index_equal(result, index) + + # empty + result = index.append([]) + tm.assert_index_equal(result, index) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_setops.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_setops.py new file mode 100644 index 0000000..87ffe99 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_setops.py @@ -0,0 +1,261 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + Series, +) +import pandas._testing as tm +from pandas.core.algorithms import safe_sort + + +class TestIndexSetOps: + @pytest.mark.parametrize( + "method", ["union", "intersection", "difference", "symmetric_difference"] + ) + def test_setops_disallow_true(self, method): + idx1 = Index(["a", "b"]) + idx2 = Index(["b", "c"]) + + with pytest.raises(ValueError, match="The 'sort' keyword only takes"): + getattr(idx1, method)(idx2, sort=True) + + def test_setops_preserve_object_dtype(self): + idx = Index([1, 2, 3], dtype=object) + result = idx.intersection(idx[1:]) + expected = idx[1:] + tm.assert_index_equal(result, expected) + + # if other is not monotonic increasing, intersection goes through + # a different route + result = idx.intersection(idx[1:][::-1]) + tm.assert_index_equal(result, expected) + + result = idx._union(idx[1:], sort=None) + expected = idx + tm.assert_numpy_array_equal(result, expected.values) + + result = idx.union(idx[1:], sort=None) + tm.assert_index_equal(result, expected) + + # if other is not monotonic increasing, _union goes through + # a different route + result = idx._union(idx[1:][::-1], sort=None) + tm.assert_numpy_array_equal(result, expected.values) + + result = idx.union(idx[1:][::-1], sort=None) + tm.assert_index_equal(result, expected) + + def test_union_base(self): + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[3:] + second = index[:5] + + result = first.union(second) + + expected = Index([0, 1, 2, "a", "b", "c"]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("klass", [np.array, Series, list]) + def test_union_different_type_base(self, klass): + # GH 10149 + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[3:] + second = index[:5] + + result = first.union(klass(second.values)) + + assert tm.equalContents(result, index) + + def test_union_sort_other_incomparable(self): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = Index([1, pd.Timestamp("2000")]) + # default (sort=None) + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1]) + + tm.assert_index_equal(result, idx) + + # sort=None + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1], sort=None) + tm.assert_index_equal(result, idx) + + # sort=False + result = idx.union(idx[:1], sort=False) + tm.assert_index_equal(result, idx) + + @pytest.mark.xfail(reason="GH#25151 need to decide on True behavior") + def test_union_sort_other_incomparable_true(self): + # TODO(GH#25151): decide on True behaviour + # sort=True + idx = Index([1, pd.Timestamp("2000")]) + with pytest.raises(TypeError, match=".*"): + idx.union(idx[:1], sort=True) + + @pytest.mark.xfail(reason="GH#25151 need to decide on True behavior") + def test_intersection_equal_sort_true(self): + # TODO(GH#25151): decide on True behaviour + idx = Index(["c", "a", "b"]) + sorted_ = Index(["a", "b", "c"]) + tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + + def test_intersection_base(self, sort): + # (same results for py2 and py3 but sortedness not tested elsewhere) + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[:5] + second = index[:3] + + expected = Index([0, 1, "a"]) if sort is None else Index([0, "a", 1]) + result = first.intersection(second, sort=sort) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("klass", [np.array, Series, list]) + def test_intersection_different_type_base(self, klass, sort): + # GH 10149 + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[:5] + second = index[:3] + + result = first.intersection(klass(second.values), sort=sort) + assert tm.equalContents(result, second) + + def test_intersection_nosort(self): + result = Index(["c", "b", "a"]).intersection(["b", "a"]) + expected = Index(["b", "a"]) + tm.assert_index_equal(result, expected) + + def test_intersection_equal_sort(self): + idx = Index(["c", "a", "b"]) + tm.assert_index_equal(idx.intersection(idx, sort=False), idx) + tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + + def test_intersection_str_dates(self, sort): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(["aa"], dtype=object) + result = i2.intersection(i1, sort=sort) + + assert len(result) == 0 + + @pytest.mark.parametrize( + "index2,expected_arr", + [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])], + ) + def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort): + # non-monotonic non-unique + index1 = Index(["A", "B", "A", "C"]) + expected = Index(expected_arr, dtype="object") + result = index1.intersection(index2, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + def test_difference_base(self, sort): + # (same results for py2 and py3 but sortedness not tested elsewhere) + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[:4] + second = index[3:] + + result = first.difference(second, sort) + expected = Index([0, "a", 1]) + if sort is None: + expected = Index(safe_sort(expected)) + tm.assert_index_equal(result, expected) + + def test_symmetric_difference(self): + # (same results for py2 and py3 but sortedness not tested elsewhere) + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[:4] + second = index[3:] + + result = first.symmetric_difference(second) + expected = Index([0, 1, 2, "a", "c"]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "method,expected,sort", + [ + ( + "intersection", + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B")], + dtype=[("num", int), ("let", "a1")], + ), + False, + ), + ( + "intersection", + np.array( + [(1, "A"), (1, "B"), (2, "A"), (2, "B")], + dtype=[("num", int), ("let", "a1")], + ), + None, + ), + ( + "union", + np.array( + [(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")], + dtype=[("num", int), ("let", "a1")], + ), + None, + ), + ], + ) + def test_tuple_union_bug(self, method, expected, sort): + index1 = Index( + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B")], + dtype=[("num", int), ("let", "a1")], + ) + ) + index2 = Index( + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")], + dtype=[("num", int), ("let", "a1")], + ) + ) + + result = getattr(index1, method)(index2, sort=sort) + assert result.ndim == 1 + + expected = Index(expected) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("first_list", [["b", "a"], []]) + @pytest.mark.parametrize("second_list", [["a", "b"], []]) + @pytest.mark.parametrize( + "first_name, second_name, expected_name", + [("A", "B", None), (None, "B", None), ("A", None, None)], + ) + def test_union_name_preservation( + self, first_list, second_list, first_name, second_name, expected_name, sort + ): + first = Index(first_list, name=first_name) + second = Index(second_list, name=second_name) + union = first.union(second, sort=sort) + + vals = set(first_list).union(second_list) + + if sort is None and len(first_list) > 0 and len(second_list) > 0: + expected = Index(sorted(vals), name=expected_name) + tm.assert_index_equal(union, expected) + else: + expected = Index(vals, name=expected_name) + tm.equalContents(union, expected) + + @pytest.mark.parametrize( + "diff_type, expected", + [["difference", [1, "B"]], ["symmetric_difference", [1, 2, "B", "C"]]], + ) + def test_difference_object_type(self, diff_type, expected): + # GH 13432 + idx1 = Index([0, 1, "A", "B"]) + idx2 = Index([0, 2, "A", "C"]) + result = getattr(idx1, diff_type)(idx2) + expected = Index(expected) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_where.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_where.py new file mode 100644 index 0000000..0c89697 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/base_class/test_where.py @@ -0,0 +1,13 @@ +import numpy as np + +from pandas import Index +import pandas._testing as tm + + +class TestWhere: + def test_where_intlike_str_doesnt_cast_ints(self): + idx = Index(range(3)) + mask = np.array([True, False, True]) + res = idx.where(mask, "2") + expected = Index([0, "2", 2]) + tm.assert_index_equal(res, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..bdfe9c0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_append.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_append.cpython-38.pyc new file mode 100644 index 0000000..677fd4a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_append.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..b0d9433 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_category.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_category.cpython-38.pyc new file mode 100644 index 0000000..763d194 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_category.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..d7c1a33 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_equals.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_equals.cpython-38.pyc new file mode 100644 index 0000000..7bfa3e7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_equals.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_fillna.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_fillna.cpython-38.pyc new file mode 100644 index 0000000..35418f0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_fillna.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_formats.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_formats.cpython-38.pyc new file mode 100644 index 0000000..ee0cc5c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_formats.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..4a44594 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_map.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_map.cpython-38.pyc new file mode 100644 index 0000000..d724ae6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_map.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_reindex.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_reindex.cpython-38.pyc new file mode 100644 index 0000000..e991e03 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/__pycache__/test_reindex.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_append.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_append.py new file mode 100644 index 0000000..b48c321 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_append.py @@ -0,0 +1,62 @@ +import pytest + +from pandas import ( + CategoricalIndex, + Index, +) +import pandas._testing as tm + + +class TestAppend: + @pytest.fixture + def ci(self): + categories = list("cab") + return CategoricalIndex(list("aabbca"), categories=categories, ordered=False) + + def test_append(self, ci): + # append cats with the same categories + result = ci[:3].append(ci[3:]) + tm.assert_index_equal(result, ci, exact=True) + + foos = [ci[:1], ci[1:3], ci[3:]] + result = foos[0].append(foos[1:]) + tm.assert_index_equal(result, ci, exact=True) + + def test_append_empty(self, ci): + # empty + result = ci.append([]) + tm.assert_index_equal(result, ci, exact=True) + + def test_append_mismatched_categories(self, ci): + # appending with different categories or reordered is not ok + msg = "all inputs must be Index" + with pytest.raises(TypeError, match=msg): + ci.append(ci.values.set_categories(list("abcd"))) + with pytest.raises(TypeError, match=msg): + ci.append(ci.values.reorder_categories(list("abc"))) + + def test_append_category_objects(self, ci): + # with objects + result = ci.append(Index(["c", "a"])) + expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories) + tm.assert_index_equal(result, expected, exact=True) + + def test_append_non_categories(self, ci): + # invalid objects -> cast to object via concat_compat + result = ci.append(Index(["a", "d"])) + expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"]) + tm.assert_index_equal(result, expected, exact=True) + + def test_append_object(self, ci): + # GH#14298 - if base object is not categorical -> coerce to object + result = Index(["c", "a"]).append(ci) + expected = Index(list("caaabbca")) + tm.assert_index_equal(result, expected, exact=True) + + def test_append_to_another(self): + # hits Index._concat + fst = Index(["a", "b"]) + snd = CategoricalIndex(["d", "e"]) + result = fst.append(snd) + expected = Index(["a", "b", "d", "e"]) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_astype.py new file mode 100644 index 0000000..854ae8b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_astype.py @@ -0,0 +1,87 @@ +from datetime import date + +import numpy as np +import pytest + +from pandas import ( + Categorical, + CategoricalDtype, + CategoricalIndex, + Index, + IntervalIndex, +) +import pandas._testing as tm + + +class TestAstype: + def test_astype(self): + ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) + + result = ci.astype(object) + tm.assert_index_equal(result, Index(np.array(ci))) + + # this IS equal, but not the same class + assert result.equals(ci) + assert isinstance(result, Index) + assert not isinstance(result, CategoricalIndex) + + # interval + ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right") + + ci = CategoricalIndex( + Categorical.from_codes([0, 1, -1], categories=ii, ordered=True) + ) + + result = ci.astype("interval") + expected = ii.take([0, 1, -1], allow_fill=True, fill_value=np.nan) + tm.assert_index_equal(result, expected) + + result = IntervalIndex(result.values) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("name", [None, "foo"]) + @pytest.mark.parametrize("dtype_ordered", [True, False]) + @pytest.mark.parametrize("index_ordered", [True, False]) + def test_astype_category(self, name, dtype_ordered, index_ordered): + # GH#18630 + index = CategoricalIndex( + list("aabbca"), categories=list("cab"), ordered=index_ordered + ) + if name: + index = index.rename(name) + + # standard categories + dtype = CategoricalDtype(ordered=dtype_ordered) + result = index.astype(dtype) + expected = CategoricalIndex( + index.tolist(), + name=name, + categories=index.categories, + ordered=dtype_ordered, + ) + tm.assert_index_equal(result, expected) + + # non-standard categories + dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered) + result = index.astype(dtype) + expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype) + tm.assert_index_equal(result, expected) + + if dtype_ordered is False: + # dtype='category' can't specify ordered, so only test once + result = index.astype("category") + expected = index + tm.assert_index_equal(result, expected) + + def test_categorical_date_roundtrip(self): + # astype to categorical and back should preserve date objects + v = date.today() + + obj = Index([v, v]) + assert obj.dtype == object + + cat = obj.astype("category") + + rtrip = cat.astype(object) + assert rtrip.dtype == object + assert type(rtrip[0]) is date diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_category.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_category.py new file mode 100644 index 0000000..2ae6ce9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_category.py @@ -0,0 +1,390 @@ +import numpy as np +import pytest + +from pandas._libs import index as libindex +from pandas._libs.arrays import NDArrayBacked + +import pandas as pd +from pandas import ( + Categorical, + CategoricalDtype, +) +import pandas._testing as tm +from pandas.core.indexes.api import ( + CategoricalIndex, + Index, +) +from pandas.tests.indexes.common import Base + + +class TestCategoricalIndex(Base): + _index_cls = CategoricalIndex + + @pytest.fixture + def simple_index(self) -> CategoricalIndex: + return self._index_cls(list("aabbca"), categories=list("cab"), ordered=False) + + @pytest.fixture + def index(self, request): + return tm.makeCategoricalIndex(100) + + def create_index(self, *, categories=None, ordered=False): + if categories is None: + categories = list("cab") + return CategoricalIndex(list("aabbca"), categories=categories, ordered=ordered) + + def test_can_hold_identifiers(self): + idx = self.create_index(categories=list("abcd")) + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True + + def test_pickle_compat_construction(self): + # Once the deprecation is enforced, we can use the parent class's test + with tm.assert_produces_warning(FutureWarning, match="without passing data"): + self._index_cls() + + def test_insert(self, simple_index): + + ci = simple_index + categories = ci.categories + + # test 0th element + result = ci.insert(0, "a") + expected = CategoricalIndex(list("aaabbca"), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + # test Nth element that follows Python list behavior + result = ci.insert(-1, "a") + expected = CategoricalIndex(list("aabbcaa"), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + # test empty + result = CategoricalIndex([], categories=categories).insert(0, "a") + expected = CategoricalIndex(["a"], categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + # invalid -> cast to object + expected = ci.astype(object).insert(0, "d") + result = ci.insert(0, "d") + tm.assert_index_equal(result, expected, exact=True) + + # GH 18295 (test missing) + expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"]) + for na in (np.nan, pd.NaT, None): + result = CategoricalIndex(list("aabcb")).insert(1, na) + tm.assert_index_equal(result, expected) + + def test_insert_na_mismatched_dtype(self): + ci = CategoricalIndex([0, 1, 1]) + result = ci.insert(0, pd.NaT) + expected = Index([pd.NaT, 0, 1, 1], dtype=object) + tm.assert_index_equal(result, expected) + + def test_delete(self, simple_index): + + ci = simple_index + categories = ci.categories + + result = ci.delete(0) + expected = CategoricalIndex(list("abbca"), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + result = ci.delete(-1) + expected = CategoricalIndex(list("aabbc"), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + with tm.external_error_raised((IndexError, ValueError)): + # Either depending on NumPy version + ci.delete(10) + + @pytest.mark.parametrize( + "data, non_lexsorted_data", + [[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]], + ) + def test_is_monotonic(self, data, non_lexsorted_data): + c = CategoricalIndex(data) + assert c.is_monotonic_increasing is True + assert c.is_monotonic_decreasing is False + + c = CategoricalIndex(data, ordered=True) + assert c.is_monotonic_increasing is True + assert c.is_monotonic_decreasing is False + + c = CategoricalIndex(data, categories=reversed(data)) + assert c.is_monotonic_increasing is False + assert c.is_monotonic_decreasing is True + + c = CategoricalIndex(data, categories=reversed(data), ordered=True) + assert c.is_monotonic_increasing is False + assert c.is_monotonic_decreasing is True + + # test when data is neither monotonic increasing nor decreasing + reordered_data = [data[0], data[2], data[1]] + c = CategoricalIndex(reordered_data, categories=reversed(data)) + assert c.is_monotonic_increasing is False + assert c.is_monotonic_decreasing is False + + # non lexsorted categories + categories = non_lexsorted_data + + c = CategoricalIndex(categories[:2], categories=categories) + assert c.is_monotonic_increasing is True + assert c.is_monotonic_decreasing is False + + c = CategoricalIndex(categories[1:3], categories=categories) + assert c.is_monotonic_increasing is True + assert c.is_monotonic_decreasing is False + + def test_has_duplicates(self): + idx = CategoricalIndex([0, 0, 0], name="foo") + assert idx.is_unique is False + assert idx.has_duplicates is True + + idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo") + assert idx.is_unique is False + assert idx.has_duplicates is True + + idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo") + assert idx.is_unique is True + assert idx.has_duplicates is False + + @pytest.mark.parametrize( + "data, categories, expected", + [ + ( + [1, 1, 1], + [1, 2, 3], + { + "first": np.array([False, True, True]), + "last": np.array([True, True, False]), + False: np.array([True, True, True]), + }, + ), + ( + [1, 1, 1], + list("abc"), + { + "first": np.array([False, True, True]), + "last": np.array([True, True, False]), + False: np.array([True, True, True]), + }, + ), + ( + [2, "a", "b"], + list("abc"), + { + "first": np.zeros(shape=(3), dtype=np.bool_), + "last": np.zeros(shape=(3), dtype=np.bool_), + False: np.zeros(shape=(3), dtype=np.bool_), + }, + ), + ( + list("abb"), + list("abc"), + { + "first": np.array([False, False, True]), + "last": np.array([False, True, False]), + False: np.array([False, True, True]), + }, + ), + ], + ) + def test_drop_duplicates(self, data, categories, expected): + + idx = CategoricalIndex(data, categories=categories, name="foo") + for keep, e in expected.items(): + tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e) + e = idx[~e] + result = idx.drop_duplicates(keep=keep) + tm.assert_index_equal(result, e) + + @pytest.mark.parametrize( + "data, categories, expected_data", + [ + ([1, 1, 1], [1, 2, 3], [1]), + ([1, 1, 1], list("abc"), [np.nan]), + ([1, 2, "a"], [1, 2, 3], [1, 2, np.nan]), + ([2, "a", "b"], list("abc"), [np.nan, "a", "b"]), + ], + ) + def test_unique(self, data, categories, expected_data, ordered): + dtype = CategoricalDtype(categories, ordered=ordered) + + idx = CategoricalIndex(data, dtype=dtype) + expected = CategoricalIndex(expected_data, dtype=dtype) + tm.assert_index_equal(idx.unique(), expected) + + def test_repr_roundtrip(self): + + ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) + str(ci) + tm.assert_index_equal(eval(repr(ci)), ci, exact=True) + + # formatting + str(ci) + + # long format + # this is not reprable + ci = CategoricalIndex(np.random.randint(0, 5, size=100)) + str(ci) + + def test_isin(self): + + ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) + tm.assert_numpy_array_equal( + ci.isin(["c"]), np.array([False, False, False, True, False, False]) + ) + tm.assert_numpy_array_equal( + ci.isin(["c", "a", "b"]), np.array([True] * 5 + [False]) + ) + tm.assert_numpy_array_equal( + ci.isin(["c", "a", "b", np.nan]), np.array([True] * 6) + ) + + # mismatched categorical -> coerced to ndarray so doesn't matter + result = ci.isin(ci.set_categories(list("abcdefghi"))) + expected = np.array([True] * 6) + tm.assert_numpy_array_equal(result, expected) + + result = ci.isin(ci.set_categories(list("defghi"))) + expected = np.array([False] * 5 + [True]) + tm.assert_numpy_array_equal(result, expected) + + def test_identical(self): + + ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) + ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True) + assert ci1.identical(ci1) + assert ci1.identical(ci1.copy()) + assert not ci1.identical(ci2) + + def test_ensure_copied_data(self, index): + # gh-12309: Check the "copy" argument of each + # Index.__new__ is honored. + # + # Must be tested separately from other indexes because + # self.values is not an ndarray. + + result = CategoricalIndex(index.values, copy=True) + tm.assert_index_equal(index, result) + assert not np.shares_memory(result._data._codes, index._data._codes) + + result = CategoricalIndex(index.values, copy=False) + assert result._data._codes is index._data._codes + + def test_frame_repr(self): + df = pd.DataFrame({"A": [1, 2, 3]}, index=CategoricalIndex(["a", "b", "c"])) + result = repr(df) + expected = " A\na 1\nb 2\nc 3" + assert result == expected + + def test_reindex_base(self): + # See test_reindex.py + pass + + def test_map_str(self): + # See test_map.py + pass + + +class TestCategoricalIndex2: + # Tests that are not overriding a test in Base + + @pytest.mark.parametrize( + "dtype, engine_type", + [ + (np.int8, libindex.Int8Engine), + (np.int16, libindex.Int16Engine), + (np.int32, libindex.Int32Engine), + (np.int64, libindex.Int64Engine), + ], + ) + def test_engine_type(self, dtype, engine_type): + if dtype != np.int64: + # num. of uniques required to push CategoricalIndex.codes to a + # dtype (128 categories required for .codes dtype to be int16 etc.) + num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype] + ci = CategoricalIndex(range(num_uniques)) + else: + # having 2**32 - 2**31 categories would be very memory-intensive, + # so we cheat a bit with the dtype + ci = CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1) + arr = ci.values._ndarray.astype("int64") + NDArrayBacked.__init__(ci._data, arr, ci.dtype) + assert np.issubdtype(ci.codes.dtype, dtype) + assert isinstance(ci._engine, engine_type) + + @pytest.mark.parametrize( + "func,op_name", + [ + (lambda idx: idx - idx, "__sub__"), + (lambda idx: idx + idx, "__add__"), + (lambda idx: idx - ["a", "b"], "__sub__"), + (lambda idx: idx + ["a", "b"], "__add__"), + (lambda idx: ["a", "b"] - idx, "__rsub__"), + (lambda idx: ["a", "b"] + idx, "__radd__"), + ], + ) + def test_disallow_addsub_ops(self, func, op_name): + # GH 10039 + # set ops (+/-) raise TypeError + idx = Index(Categorical(["a", "b"])) + cat_or_list = "'(Categorical|list)' and '(Categorical|list)'" + msg = "|".join( + [ + f"cannot perform {op_name} with this index type: CategoricalIndex", + "can only concatenate list", + rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}", + ] + ) + with pytest.raises(TypeError, match=msg): + func(idx) + + def test_method_delegation(self): + + ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) + result = ci.set_categories(list("cab")) + tm.assert_index_equal( + result, CategoricalIndex(list("aabbca"), categories=list("cab")) + ) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + result = ci.rename_categories(list("efg")) + tm.assert_index_equal( + result, CategoricalIndex(list("ffggef"), categories=list("efg")) + ) + + # GH18862 (let rename_categories take callables) + result = ci.rename_categories(lambda x: x.upper()) + tm.assert_index_equal( + result, CategoricalIndex(list("AABBCA"), categories=list("CAB")) + ) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + result = ci.add_categories(["d"]) + tm.assert_index_equal( + result, CategoricalIndex(list("aabbca"), categories=list("cabd")) + ) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + result = ci.remove_categories(["c"]) + tm.assert_index_equal( + result, + CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")), + ) + + ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) + result = ci.as_unordered() + tm.assert_index_equal(result, ci) + + ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) + result = ci.as_ordered() + tm.assert_index_equal( + result, + CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True), + ) + + # invalid + msg = "cannot use inplace with CategoricalIndex" + with pytest.raises(ValueError, match=msg): + ci.set_categories(list("cab"), inplace=True) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_constructors.py new file mode 100644 index 0000000..98da803 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_constructors.py @@ -0,0 +1,159 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + CategoricalDtype, + CategoricalIndex, + Index, +) +import pandas._testing as tm + + +class TestCategoricalIndexConstructors: + def test_construction_without_data_deprecated(self): + # Once the deprecation is enforced, we can add this case to + # test_construction_disallows_scalar + msg = "without passing data" + with tm.assert_produces_warning(FutureWarning, match=msg): + CategoricalIndex(categories=list("abcd"), ordered=False) + + def test_construction_disallows_scalar(self): + msg = "must be called with a collection of some kind" + with pytest.raises(TypeError, match=msg): + CategoricalIndex(data=1, categories=list("abcd"), ordered=False) + + def test_construction(self): + + ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False) + categories = ci.categories + + result = Index(ci) + tm.assert_index_equal(result, ci, exact=True) + assert not result.ordered + + result = Index(ci.values) + tm.assert_index_equal(result, ci, exact=True) + assert not result.ordered + + # empty + result = CategoricalIndex([], categories=categories) + tm.assert_index_equal(result.categories, Index(categories)) + tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8")) + assert not result.ordered + + # passing categories + result = CategoricalIndex(list("aabbca"), categories=categories) + tm.assert_index_equal(result.categories, Index(categories)) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") + ) + + c = Categorical(list("aabbca")) + result = CategoricalIndex(c) + tm.assert_index_equal(result.categories, Index(list("abc"))) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") + ) + assert not result.ordered + + result = CategoricalIndex(c, categories=categories) + tm.assert_index_equal(result.categories, Index(categories)) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") + ) + assert not result.ordered + + ci = CategoricalIndex(c, categories=list("abcd")) + result = CategoricalIndex(ci) + tm.assert_index_equal(result.categories, Index(categories)) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") + ) + assert not result.ordered + + result = CategoricalIndex(ci, categories=list("ab")) + tm.assert_index_equal(result.categories, Index(list("ab"))) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8") + ) + assert not result.ordered + + result = CategoricalIndex(ci, categories=list("ab"), ordered=True) + tm.assert_index_equal(result.categories, Index(list("ab"))) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8") + ) + assert result.ordered + + result = CategoricalIndex(ci, categories=list("ab"), ordered=True) + expected = CategoricalIndex( + ci, categories=list("ab"), ordered=True, dtype="category" + ) + tm.assert_index_equal(result, expected, exact=True) + + # turn me to an Index + result = Index(np.array(ci)) + assert isinstance(result, Index) + assert not isinstance(result, CategoricalIndex) + + def test_construction_with_dtype(self): + + # specify dtype + ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False) + + result = Index(np.array(ci), dtype="category") + tm.assert_index_equal(result, ci, exact=True) + + result = Index(np.array(ci).tolist(), dtype="category") + tm.assert_index_equal(result, ci, exact=True) + + # these are generally only equal when the categories are reordered + ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) + + result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories) + tm.assert_index_equal(result, ci, exact=True) + + # make sure indexes are handled + idx = Index(range(3)) + expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True) + result = CategoricalIndex(idx, categories=idx, ordered=True) + tm.assert_index_equal(result, expected, exact=True) + + def test_construction_empty_with_bool_categories(self): + # see GH#22702 + cat = CategoricalIndex([], categories=[True, False]) + categories = sorted(cat.categories.tolist()) + assert categories == [False, True] + + def test_construction_with_categorical_dtype(self): + # construction with CategoricalDtype + # GH#18109 + data, cats, ordered = "a a b b".split(), "c b a".split(), True + dtype = CategoricalDtype(categories=cats, ordered=ordered) + + result = CategoricalIndex(data, dtype=dtype) + expected = CategoricalIndex(data, categories=cats, ordered=ordered) + tm.assert_index_equal(result, expected, exact=True) + + # GH#19032 + result = Index(data, dtype=dtype) + tm.assert_index_equal(result, expected, exact=True) + + # error when combining categories/ordered and dtype kwargs + msg = "Cannot specify `categories` or `ordered` together with `dtype`." + with pytest.raises(ValueError, match=msg): + CategoricalIndex(data, categories=cats, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # passing subclass-specific kwargs to pd.Index + Index(data, categories=cats, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + CategoricalIndex(data, ordered=ordered, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # passing subclass-specific kwargs to pd.Index + Index(data, ordered=ordered, dtype=dtype) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_equals.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_equals.py new file mode 100644 index 0000000..1ed8f3a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_equals.py @@ -0,0 +1,90 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + CategoricalIndex, + Index, + MultiIndex, +) + + +class TestEquals: + def test_equals_categorical(self): + ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) + ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True) + + assert ci1.equals(ci1) + assert not ci1.equals(ci2) + assert ci1.equals(ci1.astype(object)) + assert ci1.astype(object).equals(ci1) + + assert (ci1 == ci1).all() + assert not (ci1 != ci1).all() + assert not (ci1 > ci1).all() + assert not (ci1 < ci1).all() + assert (ci1 <= ci1).all() + assert (ci1 >= ci1).all() + + assert not (ci1 == 1).all() + assert (ci1 == Index(["a", "b"])).all() + assert (ci1 == ci1.values).all() + + # invalid comparisons + with pytest.raises(ValueError, match="Lengths must match"): + ci1 == Index(["a", "b", "c"]) + + msg = "Categoricals can only be compared if 'categories' are the same" + with pytest.raises(TypeError, match=msg): + ci1 == ci2 + with pytest.raises(TypeError, match=msg): + ci1 == Categorical(ci1.values, ordered=False) + with pytest.raises(TypeError, match=msg): + ci1 == Categorical(ci1.values, categories=list("abc")) + + # tests + # make sure that we are testing for category inclusion properly + ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"]) + assert not ci.equals(list("aabca")) + # Same categories, but different order + # Unordered + assert ci.equals(CategoricalIndex(list("aabca"))) + # Ordered + assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True)) + assert ci.equals(ci.copy()) + + ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) + assert not ci.equals(list("aabca")) + assert not ci.equals(CategoricalIndex(list("aabca"))) + assert ci.equals(ci.copy()) + + ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) + assert not ci.equals(list("aabca") + [np.nan]) + assert ci.equals(CategoricalIndex(list("aabca") + [np.nan])) + assert not ci.equals(CategoricalIndex(list("aabca") + [np.nan], ordered=True)) + assert ci.equals(ci.copy()) + + def test_equals_categorical_unordered(self): + # https://github.com/pandas-dev/pandas/issues/16603 + a = CategoricalIndex(["A"], categories=["A", "B"]) + b = CategoricalIndex(["A"], categories=["B", "A"]) + c = CategoricalIndex(["C"], categories=["B", "A"]) + assert a.equals(b) + assert not a.equals(c) + assert not b.equals(c) + + def test_equals_non_category(self): + # GH#37667 Case where other contains a value not among ci's + # categories ("D") and also contains np.nan + ci = CategoricalIndex(["A", "B", np.nan, np.nan]) + other = Index(["A", "B", "D", np.nan]) + + assert not ci.equals(other) + + def test_equals_multiindex(self): + # dont raise NotImplementedError when calling is_dtype_compat + + mi = MultiIndex.from_arrays([["A", "B", "C", "D"], range(4)]) + ci = mi.to_flat_index().astype("category") + + assert not ci.equals(mi) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_fillna.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_fillna.py new file mode 100644 index 0000000..09de578 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_fillna.py @@ -0,0 +1,54 @@ +import numpy as np +import pytest + +from pandas import CategoricalIndex +import pandas._testing as tm + + +class TestFillNA: + def test_fillna_categorical(self): + # GH#11343 + idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x") + # fill by value in categories + exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x") + tm.assert_index_equal(idx.fillna(1.0), exp) + + cat = idx._data + + # fill by value not in categories raises TypeError on EA, casts on CI + msg = "Cannot setitem on a Categorical with a new category" + with pytest.raises(TypeError, match=msg): + cat.fillna(2.0) + + result = idx.fillna(2.0) + expected = idx.astype(object).fillna(2.0) + tm.assert_index_equal(result, expected) + + def test_fillna_copies_with_no_nas(self): + # Nothing to fill, should still get a copy for the Categorical method, + # but OK to get a view on CategoricalIndex method + ci = CategoricalIndex([0, 1, 1]) + result = ci.fillna(0) + assert result is not ci + assert tm.shares_memory(result, ci) + + # But at the EA level we always get a copy. + cat = ci._data + result = cat.fillna(0) + assert result._ndarray is not cat._ndarray + assert result._ndarray.base is None + assert not tm.shares_memory(result, cat) + + def test_fillna_validates_with_no_nas(self): + # We validate the fill value even if fillna is a no-op + ci = CategoricalIndex([2, 3, 3]) + cat = ci._data + + msg = "Cannot setitem on a Categorical with a new category" + res = ci.fillna(False) + # nothing to fill, so we dont cast + tm.assert_index_equal(res, ci) + + # Same check directly on the Categorical + with pytest.raises(TypeError, match=msg): + cat.fillna(False) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_formats.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_formats.py new file mode 100644 index 0000000..044b035 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_formats.py @@ -0,0 +1,114 @@ +""" +Tests for CategoricalIndex.__repr__ and related methods. +""" +import pandas._config.config as cf + +from pandas import CategoricalIndex + + +class TestCategoricalIndexRepr: + def test_format_different_scalar_lengths(self): + # GH#35439 + idx = CategoricalIndex(["aaaaaaaaa", "b"]) + expected = ["aaaaaaaaa", "b"] + assert idx.format() == expected + + def test_string_categorical_index_repr(self): + # short + idx = CategoricalIndex(["a", "bb", "ccc"]) + expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa:E501 + assert repr(idx) == expected + + # multiple lines + idx = CategoricalIndex(["a", "bb", "ccc"] * 10) + expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', + 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', + 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], + categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" + + assert repr(idx) == expected + + # truncated + idx = CategoricalIndex(["a", "bb", "ccc"] * 100) + expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', + ... + 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], + categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa:E501 + + assert repr(idx) == expected + + # larger categories + idx = CategoricalIndex(list("abcdefghijklmmo")) + expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', + 'm', 'm', 'o'], + categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(idx) == expected + + # short + idx = CategoricalIndex(["あ", "いい", "ううう"]) + expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa:E501 + assert repr(idx) == expected + + # multiple lines + idx = CategoricalIndex(["あ", "いい", "ううう"] * 10) + expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', + 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + + assert repr(idx) == expected + + # truncated + idx = CategoricalIndex(["あ", "いい", "ううう"] * 100) + expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', + ... + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa:E501 + + assert repr(idx) == expected + + # larger categories + idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ")) + expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', + 'す', 'せ', 'そ'], + categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(idx) == expected + + # Enable Unicode option ----------------------------------------- + with cf.option_context("display.unicode.east_asian_width", True): + + # short + idx = CategoricalIndex(["あ", "いい", "ううう"]) + expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa:E501 + assert repr(idx) == expected + + # multiple lines + idx = CategoricalIndex(["あ", "いい", "ううう"] * 10) + expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', + 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + + assert repr(idx) == expected + + # truncated + idx = CategoricalIndex(["あ", "いい", "ううう"] * 100) + expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', + ... + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', + 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa:E501 + + assert repr(idx) == expected + + # larger categories + idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ")) + expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', + 'さ', 'し', 'す', 'せ', 'そ'], + categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(idx) == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_indexing.py new file mode 100644 index 0000000..2297f8c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_indexing.py @@ -0,0 +1,414 @@ +import numpy as np +import pytest + +from pandas.errors import InvalidIndexError + +import pandas as pd +from pandas import ( + CategoricalIndex, + Index, + IntervalIndex, + Timestamp, +) +import pandas._testing as tm + + +class TestTake: + def test_take_fill_value(self): + # GH 12631 + + # numeric category + idx = CategoricalIndex([1, 2, 3], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = CategoricalIndex([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = CategoricalIndex([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # object category + idx = CategoricalIndex( + list("CBA"), categories=list("ABC"), ordered=True, name="xxx" + ) + result = idx.take(np.array([1, 0, -1])) + expected = CategoricalIndex( + list("BCA"), categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = CategoricalIndex( + ["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = CategoricalIndex( + list("BCA"), categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for (axis 0 with )?size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + def test_take_fill_value_datetime(self): + + # datetime category + idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx") + idx = CategoricalIndex(idx) + result = idx.take(np.array([1, 0, -1])) + expected = pd.DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" + ) + expected = CategoricalIndex(expected) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx") + exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"]) + expected = CategoricalIndex(expected, categories=exp_cats) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" + ) + expected = CategoricalIndex(expected) + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for (axis 0 with )?size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + def test_take_invalid_kwargs(self): + idx = CategoricalIndex([1, 2, 3], name="foo") + indices = [1, 0, -1] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + +class TestGetLoc: + def test_get_loc(self): + # GH 12531 + cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc")) + idx1 = Index(list("abcde")) + assert cidx1.get_loc("a") == idx1.get_loc("a") + assert cidx1.get_loc("e") == idx1.get_loc("e") + + for i in [cidx1, idx1]: + with pytest.raises(KeyError, match="'NOT-EXIST'"): + i.get_loc("NOT-EXIST") + + # non-unique + cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc")) + idx2 = Index(list("aacded")) + + # results in bool array + res = cidx2.get_loc("d") + tm.assert_numpy_array_equal(res, idx2.get_loc("d")) + tm.assert_numpy_array_equal( + res, np.array([False, False, False, True, False, True]) + ) + # unique element results in scalar + res = cidx2.get_loc("e") + assert res == idx2.get_loc("e") + assert res == 4 + + for i in [cidx2, idx2]: + with pytest.raises(KeyError, match="'NOT-EXIST'"): + i.get_loc("NOT-EXIST") + + # non-unique, sliceable + cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc")) + idx3 = Index(list("aabbb")) + + # results in slice + res = cidx3.get_loc("a") + assert res == idx3.get_loc("a") + assert res == slice(0, 2, None) + + res = cidx3.get_loc("b") + assert res == idx3.get_loc("b") + assert res == slice(2, 5, None) + + for i in [cidx3, idx3]: + with pytest.raises(KeyError, match="'c'"): + i.get_loc("c") + + def test_get_loc_unique(self): + cidx = CategoricalIndex(list("abc")) + result = cidx.get_loc("b") + assert result == 1 + + def test_get_loc_monotonic_nonunique(self): + cidx = CategoricalIndex(list("abbc")) + result = cidx.get_loc("b") + expected = slice(1, 3, None) + assert result == expected + + def test_get_loc_nonmonotonic_nonunique(self): + cidx = CategoricalIndex(list("abcb")) + result = cidx.get_loc("b") + expected = np.array([False, True, False, True], dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + def test_get_loc_nan(self): + # GH#41933 + ci = CategoricalIndex(["A", "B", np.nan]) + res = ci.get_loc(np.nan) + + assert res == 2 + + +class TestGetIndexer: + def test_get_indexer_base(self): + # Determined by cat ordering. + idx = CategoricalIndex(list("cab"), categories=list("cab")) + expected = np.arange(len(idx), dtype=np.intp) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with pytest.raises(ValueError, match="Invalid fill method"): + idx.get_indexer(idx, method="invalid") + + def test_get_indexer_requires_unique(self): + np.random.seed(123456789) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) + oidx = Index(np.array(ci)) + + msg = "Reindexing only valid with uniquely valued Index objects" + + for n in [1, 2, 5, len(ci)]: + finder = oidx[np.random.randint(0, len(ci), size=n)] + + with pytest.raises(InvalidIndexError, match=msg): + ci.get_indexer(finder) + + # see gh-17323 + # + # Even when indexer is equal to the + # members in the index, we should + # respect duplicates instead of taking + # the fast-track path. + for finder in [list("aabbca"), list("aababca")]: + + with pytest.raises(InvalidIndexError, match=msg): + ci.get_indexer(finder) + + def test_get_indexer_non_unique(self): + + idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) + idx2 = CategoricalIndex(list("abf")) + + for indexer in [idx2, list("abf"), Index(list("abf"))]: + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): + idx1.get_indexer(indexer) + + r1, _ = idx1.get_indexer_non_unique(indexer) + expected = np.array([0, 1, 2, -1], dtype=np.intp) + tm.assert_almost_equal(r1, expected) + + def test_get_indexer_method(self): + idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) + idx2 = CategoricalIndex(list("abf")) + + msg = "method pad not yet implemented for CategoricalIndex" + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="pad") + msg = "method backfill not yet implemented for CategoricalIndex" + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="backfill") + + msg = "method nearest not yet implemented for CategoricalIndex" + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="nearest") + + def test_get_indexer_array(self): + arr = np.array( + [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")], + dtype=object, + ) + cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")] + ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category") + result = ci.get_indexer(arr) + expected = np.array([0, 1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_same_categories_same_order(self): + ci = CategoricalIndex(["a", "b"], categories=["a", "b"]) + + result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"])) + expected = np.array([1, 1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_same_categories_different_order(self): + # https://github.com/pandas-dev/pandas/issues/19551 + ci = CategoricalIndex(["a", "b"], categories=["a", "b"]) + + result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"])) + expected = np.array([1, 1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + +class TestWhere: + def test_where(self, listlike_box): + klass = listlike_box + + i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) + cond = [True] * len(i) + expected = i + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + + cond = [False] + [True] * (len(i) - 1) + expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories) + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + + def test_where_non_categories(self): + ci = CategoricalIndex(["a", "b", "c", "d"]) + mask = np.array([True, False, True, False]) + + result = ci.where(mask, 2) + expected = Index(["a", 2, "c", 2], dtype=object) + tm.assert_index_equal(result, expected) + + msg = "Cannot setitem on a Categorical with a new category" + with pytest.raises(TypeError, match=msg): + # Test the Categorical method directly + ci._data._where(mask, 2) + + +class TestContains: + def test_contains(self): + + ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False) + + assert "a" in ci + assert "z" not in ci + assert "e" not in ci + assert np.nan not in ci + + # assert codes NOT in index + assert 0 not in ci + assert 1 not in ci + + def test_contains_nan(self): + ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef")) + assert np.nan in ci + + @pytest.mark.parametrize("unwrap", [True, False]) + def test_contains_na_dtype(self, unwrap): + dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT) + pi = dti.to_period("D") + tdi = dti - dti[-1] + ci = CategoricalIndex(dti) + + obj = ci + if unwrap: + obj = ci._data + + assert np.nan in obj + assert None in obj + assert pd.NaT in obj + assert np.datetime64("NaT") in obj + assert np.timedelta64("NaT") not in obj + + obj2 = CategoricalIndex(tdi) + if unwrap: + obj2 = obj2._data + + assert np.nan in obj2 + assert None in obj2 + assert pd.NaT in obj2 + assert np.datetime64("NaT") not in obj2 + assert np.timedelta64("NaT") in obj2 + + obj3 = CategoricalIndex(pi) + if unwrap: + obj3 = obj3._data + + assert np.nan in obj3 + assert None in obj3 + assert pd.NaT in obj3 + assert np.datetime64("NaT") not in obj3 + assert np.timedelta64("NaT") not in obj3 + + @pytest.mark.parametrize( + "item, expected", + [ + (pd.Interval(0, 1), True), + (1.5, True), + (pd.Interval(0.5, 1.5), False), + ("a", False), + (Timestamp(1), False), + (pd.Timedelta(1), False), + ], + ids=str, + ) + def test_contains_interval(self, item, expected): + # GH 23705 + ci = CategoricalIndex(IntervalIndex.from_breaks(range(3))) + result = item in ci + assert result is expected + + def test_contains_list(self): + # GH#21729 + idx = CategoricalIndex([1, 2, 3]) + + assert "a" not in idx + + with pytest.raises(TypeError, match="unhashable type"): + ["a"] in idx + + with pytest.raises(TypeError, match="unhashable type"): + ["a", "b"] in idx diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_map.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_map.py new file mode 100644 index 0000000..71ee829 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_map.py @@ -0,0 +1,115 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + CategoricalIndex, + Index, + Series, +) +import pandas._testing as tm + + +class TestMap: + @pytest.mark.parametrize( + "data, categories", + [ + (list("abcbca"), list("cab")), + (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), + ], + ids=["string", "interval"], + ) + def test_map_str(self, data, categories, ordered): + # GH 31202 - override base class since we want to maintain categorical/ordered + index = CategoricalIndex(data, categories=categories, ordered=ordered) + result = index.map(str) + expected = CategoricalIndex( + map(str, data), categories=map(str, categories), ordered=ordered + ) + tm.assert_index_equal(result, expected) + + def test_map(self): + ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True) + result = ci.map(lambda x: x.lower()) + exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True) + tm.assert_index_equal(result, exp) + + ci = CategoricalIndex( + list("ABABC"), categories=list("BAC"), ordered=False, name="XXX" + ) + result = ci.map(lambda x: x.lower()) + exp = CategoricalIndex( + list("ababc"), categories=list("bac"), ordered=False, name="XXX" + ) + tm.assert_index_equal(result, exp) + + # GH 12766: Return an index not an array + tm.assert_index_equal( + ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX") + ) + + # change categories dtype + ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False) + + def f(x): + return {"A": 10, "B": 20, "C": 30}.get(x) + + result = ci.map(f) + exp = CategoricalIndex( + [10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False + ) + tm.assert_index_equal(result, exp) + + result = ci.map(Series([10, 20, 30], index=["A", "B", "C"])) + tm.assert_index_equal(result, exp) + + result = ci.map({"A": 10, "B": 20, "C": 30}) + tm.assert_index_equal(result, exp) + + def test_map_with_categorical_series(self): + # GH 12756 + a = Index([1, 2, 3, 4]) + b = Series(["even", "odd", "even", "odd"], dtype="category") + c = Series(["even", "odd", "even", "odd"]) + + exp = CategoricalIndex(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(b), exp) + exp = Index(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(c), exp) + + @pytest.mark.parametrize( + ("data", "f"), + ( + ([1, 1, np.nan], pd.isna), + ([1, 2, np.nan], pd.isna), + ([1, 1, np.nan], {1: False}), + ([1, 2, np.nan], {1: False, 2: False}), + ([1, 1, np.nan], Series([False, False])), + ([1, 2, np.nan], Series([False, False, False])), + ), + ) + def test_map_with_nan(self, data, f): # GH 24241 + values = pd.Categorical(data) + result = values.map(f) + if data[1] == 1: + expected = pd.Categorical([False, False, np.nan]) + tm.assert_categorical_equal(result, expected) + else: + expected = Index([False, False, np.nan]) + tm.assert_index_equal(result, expected) + + def test_map_with_dict_or_series(self): + orig_values = ["a", "B", 1, "a"] + new_values = ["one", 2, 3.0, "one"] + cur_index = CategoricalIndex(orig_values, name="XXX") + expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"]) + + mapper = Series(new_values[:-1], index=orig_values[:-1]) + result = cur_index.map(mapper) + # Order of categories in result can be different + tm.assert_index_equal(result, expected) + + mapper = {o: n for o, n in zip(orig_values[:-1], new_values[:-1])} + result = cur_index.map(mapper) + # Order of categories in result can be different + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_reindex.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_reindex.py new file mode 100644 index 0000000..5a0b267 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/categorical/test_reindex.py @@ -0,0 +1,86 @@ +import numpy as np + +from pandas import ( + Categorical, + CategoricalIndex, + Index, + Interval, +) +import pandas._testing as tm + + +class TestReindex: + def test_reindex_list_non_unique(self): + # GH#11586 + ci = CategoricalIndex(["a", "b", "c", "a"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(["a", "c"]) + + tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + def test_reindex_categorcal_non_unique(self): + ci = CategoricalIndex(["a", "b", "c", "a"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(Categorical(["a", "c"])) + + exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + def test_reindex_list_non_unique_unused_category(self): + ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(["a", "c"]) + exp = Index(["a", "a", "c"], dtype="object") + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + def test_reindex_categorical_non_unique_unused_category(self): + ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(Categorical(["a", "c"])) + exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + def test_reindex_duplicate_target(self): + # See GH25459 + cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"]) + res, indexer = cat.reindex(["a", "c", "c"]) + exp = Index(["a", "c", "c"], dtype="object") + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) + + res, indexer = cat.reindex( + CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) + ) + exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) + + def test_reindex_empty_index(self): + # See GH16770 + c = CategoricalIndex([]) + res, indexer = c.reindex(["a", "b"]) + tm.assert_index_equal(res, Index(["a", "b"]), exact=True) + tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp)) + + def test_reindex_categorical_added_category(self): + # GH 42424 + ci = CategoricalIndex( + [Interval(0, 1, closed="right"), Interval(1, 2, closed="right")], + ordered=True, + ) + ci_add = CategoricalIndex( + [ + Interval(0, 1, closed="right"), + Interval(1, 2, closed="right"), + Interval(2, 3, closed="right"), + Interval(3, 4, closed="right"), + ], + ordered=True, + ) + result, _ = ci.reindex(ci_add) + expected = ci_add + tm.assert_index_equal(expected, result) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/common.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/common.py new file mode 100644 index 0000000..61cfd55 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/common.py @@ -0,0 +1,897 @@ +from __future__ import annotations + +from datetime import datetime +import gc + +import numpy as np +import pytest + +from pandas._libs.tslibs import Timestamp + +from pandas.core.dtypes.common import ( + is_datetime64tz_dtype, + is_integer_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + CategoricalIndex, + DatetimeIndex, + Index, + IntervalIndex, + MultiIndex, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, + isna, +) +import pandas._testing as tm +from pandas.core.api import ( # noqa:F401 + Float64Index, + Int64Index, + NumericIndex, + UInt64Index, +) +from pandas.core.arrays import BaseMaskedArray + + +class Base: + """ + Base class for index sub-class tests. + """ + + _index_cls: type[Index] + + @pytest.fixture + def simple_index(self): + raise NotImplementedError("Method not implemented") + + def create_index(self) -> Index: + raise NotImplementedError("Method not implemented") + + def test_pickle_compat_construction(self): + # need an object to create with + msg = "|".join( + [ + r"Index\(\.\.\.\) must be called with a collection of some " + r"kind, None was passed", + r"DatetimeIndex\(\) must be called with a collection of some " + r"kind, None was passed", + r"TimedeltaIndex\(\) must be called with a collection of some " + r"kind, None was passed", + r"__new__\(\) missing 1 required positional argument: 'data'", + r"__new__\(\) takes at least 2 arguments \(1 given\)", + ] + ) + with pytest.raises(TypeError, match=msg): + self._index_cls() + + def test_shift(self, simple_index): + + # GH8083 test the base class for shift + idx = simple_index + msg = ( + f"This method is only implemented for DatetimeIndex, PeriodIndex and " + f"TimedeltaIndex; Got type {type(idx).__name__}" + ) + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1) + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1, 2) + + def test_constructor_name_unhashable(self, simple_index): + # GH#29069 check that name is hashable + # See also same-named test in tests.series.test_constructors + idx = simple_index + with pytest.raises(TypeError, match="Index.name must be a hashable type"): + type(idx)(idx, name=[]) + + def test_create_index_existing_name(self, simple_index): + + # GH11193, when an existing index is passed, and a new name is not + # specified, the new index should inherit the previous object name + expected = simple_index + if not isinstance(expected, MultiIndex): + expected.name = "foo" + result = Index(expected) + tm.assert_index_equal(result, expected) + + result = Index(expected, name="bar") + expected.name = "bar" + tm.assert_index_equal(result, expected) + else: + expected.names = ["foo", "bar"] + result = Index(expected) + tm.assert_index_equal( + result, + Index( + Index( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + dtype="object", + ), + names=["foo", "bar"], + ), + ) + + result = Index(expected, names=["A", "B"]) + tm.assert_index_equal( + result, + Index( + Index( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + dtype="object", + ), + names=["A", "B"], + ), + ) + + def test_numeric_compat(self, simple_index): + + idx = simple_index + # Check that this doesn't cover MultiIndex case, if/when it does, + # we can remove multi.test_compat.test_numeric_compat + assert not isinstance(idx, MultiIndex) + if type(idx) is Index: + return + + typ = type(idx._data).__name__ + cls = type(idx).__name__ + lmsg = "|".join( + [ + rf"unsupported operand type\(s\) for \*: '{typ}' and 'int'", + "cannot perform (__mul__|__truediv__|__floordiv__) with " + f"this index type: ({cls}|{typ})", + ] + ) + with pytest.raises(TypeError, match=lmsg): + idx * 1 + rmsg = "|".join( + [ + rf"unsupported operand type\(s\) for \*: 'int' and '{typ}'", + "cannot perform (__rmul__|__rtruediv__|__rfloordiv__) with " + f"this index type: ({cls}|{typ})", + ] + ) + with pytest.raises(TypeError, match=rmsg): + 1 * idx + + div_err = lmsg.replace("*", "/") + with pytest.raises(TypeError, match=div_err): + idx / 1 + div_err = rmsg.replace("*", "/") + with pytest.raises(TypeError, match=div_err): + 1 / idx + + floordiv_err = lmsg.replace("*", "//") + with pytest.raises(TypeError, match=floordiv_err): + idx // 1 + floordiv_err = rmsg.replace("*", "//") + with pytest.raises(TypeError, match=floordiv_err): + 1 // idx + + def test_logical_compat(self, simple_index): + idx = simple_index + with pytest.raises(TypeError, match="cannot perform all"): + idx.all() + with pytest.raises(TypeError, match="cannot perform any"): + idx.any() + + def test_repr_roundtrip(self, simple_index): + + idx = simple_index + tm.assert_index_equal(eval(repr(idx)), idx) + + def test_repr_max_seq_item_setting(self, simple_index): + # GH10182 + idx = simple_index + idx = idx.repeat(50) + with pd.option_context("display.max_seq_items", None): + repr(idx) + assert "..." not in str(idx) + + def test_ensure_copied_data(self, index): + # Check the "copy" argument of each Index.__new__ is honoured + # GH12309 + init_kwargs = {} + if isinstance(index, PeriodIndex): + # Needs "freq" specification: + init_kwargs["freq"] = index.freq + elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)): + # RangeIndex cannot be initialized from data + # MultiIndex and CategoricalIndex are tested separately + return + + index_type = type(index) + result = index_type(index.values, copy=True, **init_kwargs) + if is_datetime64tz_dtype(index.dtype): + result = result.tz_localize("UTC").tz_convert(index.tz) + if isinstance(index, (DatetimeIndex, TimedeltaIndex)): + index = index._with_freq(None) + + tm.assert_index_equal(index, result) + + if isinstance(index, PeriodIndex): + # .values an object array of Period, thus copied + result = index_type(ordinal=index.asi8, copy=False, **init_kwargs) + tm.assert_numpy_array_equal(index.asi8, result.asi8, check_same="same") + elif isinstance(index, IntervalIndex): + # checked in test_interval.py + pass + elif type(index) is Index and not isinstance(index.dtype, np.dtype): + result = index_type(index.values, copy=False, **init_kwargs) + tm.assert_index_equal(result, index) + + if isinstance(index._values, BaseMaskedArray): + assert np.shares_memory(index._values._data, result._values._data) + tm.assert_numpy_array_equal( + index._values._data, result._values._data, check_same="same" + ) + assert np.shares_memory(index._values._mask, result._values._mask) + tm.assert_numpy_array_equal( + index._values._mask, result._values._mask, check_same="same" + ) + elif index.dtype == "string[python]": + assert np.shares_memory(index._values._ndarray, result._values._ndarray) + tm.assert_numpy_array_equal( + index._values._ndarray, result._values._ndarray, check_same="same" + ) + elif index.dtype == "string[pyarrow]": + assert tm.shares_memory(result._values, index._values) + else: + raise NotImplementedError(index.dtype) + else: + result = index_type(index.values, copy=False, **init_kwargs) + tm.assert_numpy_array_equal(index.values, result.values, check_same="same") + + def test_memory_usage(self, index): + index._engine.clear_mapping() + result = index.memory_usage() + if index.empty: + # we report 0 for no-length + assert result == 0 + return + + # non-zero length + index.get_loc(index[0]) + result2 = index.memory_usage() + result3 = index.memory_usage(deep=True) + + # RangeIndex, IntervalIndex + # don't have engines + # Index[EA] has engine but it does not have a Hashtable .mapping + if not isinstance(index, (RangeIndex, IntervalIndex)) and not ( + type(index) is Index and not isinstance(index.dtype, np.dtype) + ): + assert result2 > result + + if index.inferred_type == "object": + assert result3 > result2 + + def test_argsort(self, request, index): + # separately tested + if isinstance(index, CategoricalIndex): + return + + result = index.argsort() + expected = np.array(index).argsort() + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + def test_numpy_argsort(self, index): + result = np.argsort(index) + expected = index.argsort() + tm.assert_numpy_array_equal(result, expected) + + result = np.argsort(index, kind="mergesort") + expected = index.argsort(kind="mergesort") + tm.assert_numpy_array_equal(result, expected) + + # these are the only two types that perform + # pandas compatibility input validation - the + # rest already perform separate (or no) such + # validation via their 'values' attribute as + # defined in pandas.core.indexes/base.py - they + # cannot be changed at the moment due to + # backwards compatibility concerns + if isinstance(index, (CategoricalIndex, RangeIndex)): + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(index, axis=1) + + msg = "the 'order' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(index, order=("a", "b")) + + def test_repeat(self, simple_index): + rep = 2 + idx = simple_index.copy() + new_index_cls = Int64Index if isinstance(idx, RangeIndex) else idx._constructor + expected = new_index_cls(idx.values.repeat(rep), name=idx.name) + tm.assert_index_equal(idx.repeat(rep), expected) + + idx = simple_index + rep = np.arange(len(idx)) + expected = new_index_cls(idx.values.repeat(rep), name=idx.name) + tm.assert_index_equal(idx.repeat(rep), expected) + + def test_numpy_repeat(self, simple_index): + rep = 2 + idx = simple_index + expected = idx.repeat(rep) + tm.assert_index_equal(np.repeat(idx, rep), expected) + + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.repeat(idx, rep, axis=0) + + def test_where(self, listlike_box, simple_index): + klass = listlike_box + + idx = simple_index + if isinstance(idx, (DatetimeIndex, TimedeltaIndex)): + # where does not preserve freq + idx = idx._with_freq(None) + + cond = [True] * len(idx) + result = idx.where(klass(cond)) + expected = idx + tm.assert_index_equal(result, expected) + + cond = [False] + [True] * len(idx[1:]) + expected = Index([idx._na_value] + idx[1:].tolist(), dtype=idx.dtype) + result = idx.where(klass(cond)) + tm.assert_index_equal(result, expected) + + def test_insert_base(self, index): + result = index[1:4] + + if not len(index): + return + + # test 0th element + assert index[0:4].equals(result.insert(0, index[0])) + + def test_insert_out_of_bounds(self, index): + # TypeError/IndexError matches what np.insert raises in these cases + + if len(index) > 0: + err = TypeError + else: + err = IndexError + if len(index) == 0: + # 0 vs 0.5 in error message varies with numpy version + msg = "index (0|0.5) is out of bounds for axis 0 with size 0" + else: + msg = "slice indices must be integers or None or have an __index__ method" + with pytest.raises(err, match=msg): + index.insert(0.5, "foo") + + msg = "|".join( + [ + r"index -?\d+ is out of bounds for axis 0 with size \d+", + "loc must be an integer between", + ] + ) + with pytest.raises(IndexError, match=msg): + index.insert(len(index) + 1, 1) + + with pytest.raises(IndexError, match=msg): + index.insert(-len(index) - 1, 1) + + def test_delete_base(self, index): + if not len(index): + return + + if isinstance(index, RangeIndex): + # tested in class + return + + expected = index[1:] + result = index.delete(0) + assert result.equals(expected) + assert result.name == expected.name + + expected = index[:-1] + result = index.delete(-1) + assert result.equals(expected) + assert result.name == expected.name + + length = len(index) + msg = f"index {length} is out of bounds for axis 0 with size {length}" + with pytest.raises(IndexError, match=msg): + index.delete(length) + + def test_equals(self, index): + if isinstance(index, IntervalIndex): + # IntervalIndex tested separately, the index.equals(index.astype(object)) + # fails for IntervalIndex + return + + is_ea_idx = type(index) is Index and not isinstance(index.dtype, np.dtype) + + assert index.equals(index) + assert index.equals(index.copy()) + if not is_ea_idx: + # doesn't hold for e.g. IntegerDtype + assert index.equals(index.astype(object)) + + assert not index.equals(list(index)) + assert not index.equals(np.array(index)) + + # Cannot pass in non-int64 dtype to RangeIndex + if not isinstance(index, RangeIndex) and not is_ea_idx: + same_values = Index(index, dtype=object) + assert index.equals(same_values) + assert same_values.equals(index) + + if index.nlevels == 1: + # do not test MultiIndex + assert not index.equals(Series(index)) + + def test_equals_op(self, simple_index): + # GH9947, GH10637 + index_a = simple_index + + n = len(index_a) + index_b = index_a[0:-1] + index_c = index_a[0:-1].append(index_a[-2:-1]) + index_d = index_a[0:1] + + msg = "Lengths must match|could not be broadcast" + with pytest.raises(ValueError, match=msg): + index_a == index_b + expected1 = np.array([True] * n) + expected2 = np.array([True] * (n - 1) + [False]) + tm.assert_numpy_array_equal(index_a == index_a, expected1) + tm.assert_numpy_array_equal(index_a == index_c, expected2) + + # test comparisons with numpy arrays + array_a = np.array(index_a) + array_b = np.array(index_a[0:-1]) + array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) + array_d = np.array(index_a[0:1]) + with pytest.raises(ValueError, match=msg): + index_a == array_b + tm.assert_numpy_array_equal(index_a == array_a, expected1) + tm.assert_numpy_array_equal(index_a == array_c, expected2) + + # test comparisons with Series + series_a = Series(array_a) + series_b = Series(array_b) + series_c = Series(array_c) + series_d = Series(array_d) + with pytest.raises(ValueError, match=msg): + index_a == series_b + + tm.assert_numpy_array_equal(index_a == series_a, expected1) + tm.assert_numpy_array_equal(index_a == series_c, expected2) + + # cases where length is 1 for one of them + with pytest.raises(ValueError, match="Lengths must match"): + index_a == index_d + with pytest.raises(ValueError, match="Lengths must match"): + index_a == series_d + with pytest.raises(ValueError, match="Lengths must match"): + index_a == array_d + msg = "Can only compare identically-labeled Series objects" + with pytest.raises(ValueError, match=msg): + series_a == series_d + with pytest.raises(ValueError, match="Lengths must match"): + series_a == array_d + + # comparing with a scalar should broadcast; note that we are excluding + # MultiIndex because in this case each item in the index is a tuple of + # length 2, and therefore is considered an array of length 2 in the + # comparison instead of a scalar + if not isinstance(index_a, MultiIndex): + expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) + # assuming the 2nd to last item is unique in the data + item = index_a[-2] + tm.assert_numpy_array_equal(index_a == item, expected3) + # For RangeIndex we can convert to Int64Index + tm.assert_series_equal(series_a == item, Series(expected3)) + + def test_format(self, simple_index): + # GH35439 + idx = simple_index + expected = [str(x) for x in idx] + assert idx.format() == expected + + def test_format_empty(self): + # GH35712 + empty_idx = self._index_cls([]) + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] + + def test_fillna(self, index): + # GH 11343 + if len(index) == 0: + return + elif isinstance(index, NumericIndex) and is_integer_dtype(index.dtype): + return + elif isinstance(index, MultiIndex): + idx = index.copy(deep=True) + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.fillna(idx[0]) + else: + idx = index.copy(deep=True) + result = idx.fillna(idx[0]) + tm.assert_index_equal(result, idx) + assert result is not idx + + msg = "'value' must be a scalar, passed: " + with pytest.raises(TypeError, match=msg): + idx.fillna([idx[0]]) + + idx = index.copy(deep=True) + values = idx._values + + values[1] = np.nan + + idx = type(index)(values) + + msg = "does not support 'downcast'" + with pytest.raises(NotImplementedError, match=msg): + # For now at least, we only raise if there are NAs present + idx.fillna(idx[0], downcast="infer") + + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans is True + + def test_nulls(self, index): + # this is really a smoke test for the methods + # as these are adequately tested for function elsewhere + if len(index) == 0: + tm.assert_numpy_array_equal(index.isna(), np.array([], dtype=bool)) + elif isinstance(index, MultiIndex): + idx = index.copy() + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.isna() + elif not index.hasnans: + tm.assert_numpy_array_equal(index.isna(), np.zeros(len(index), dtype=bool)) + tm.assert_numpy_array_equal(index.notna(), np.ones(len(index), dtype=bool)) + else: + result = isna(index) + tm.assert_numpy_array_equal(index.isna(), result) + tm.assert_numpy_array_equal(index.notna(), ~result) + + def test_empty(self, simple_index): + # GH 15270 + idx = simple_index + assert not idx.empty + assert idx[:0].empty + + def test_join_self_unique(self, join_type, simple_index): + idx = simple_index + if idx.is_unique: + joined = idx.join(idx, how=join_type) + assert (idx == joined).all() + + def test_map(self, simple_index): + # callable + idx = simple_index + + result = idx.map(lambda x: x) + # For RangeIndex we convert to Int64Index + tm.assert_index_equal(result, idx, exact="equiv") + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: Series(values, index), + ], + ) + def test_map_dictlike(self, mapper, simple_index): + + idx = simple_index + if isinstance(idx, CategoricalIndex): + # TODO(2.0): see if we can avoid skipping once + # CategoricalIndex.reindex is removed. + pytest.skip(f"skipping tests for {type(idx)}") + + identity = mapper(idx.values, idx) + + result = idx.map(identity) + # For RangeIndex we convert to Int64Index + tm.assert_index_equal(result, idx, exact="equiv") + + # empty mappable + dtype = None + if idx._is_backward_compat_public_numeric_index: + new_index_cls = NumericIndex + if idx.dtype.kind == "f": + dtype = idx.dtype + else: + new_index_cls = Float64Index + + expected = new_index_cls([np.nan] * len(idx), dtype=dtype) + result = idx.map(mapper(expected, idx)) + tm.assert_index_equal(result, expected) + + def test_map_str(self, simple_index): + # GH 31202 + idx = simple_index + result = idx.map(str) + expected = Index([str(x) for x in idx], dtype=object) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("copy", [True, False]) + @pytest.mark.parametrize("name", [None, "foo"]) + @pytest.mark.parametrize("ordered", [True, False]) + def test_astype_category(self, copy, name, ordered, simple_index): + # GH 18630 + idx = simple_index + if name: + idx = idx.rename(name) + + # standard categories + dtype = CategoricalDtype(ordered=ordered) + result = idx.astype(dtype, copy=copy) + expected = CategoricalIndex(idx, name=name, ordered=ordered) + tm.assert_index_equal(result, expected, exact=True) + + # non-standard categories + dtype = CategoricalDtype(idx.unique().tolist()[:-1], ordered) + result = idx.astype(dtype, copy=copy) + expected = CategoricalIndex(idx, name=name, dtype=dtype) + tm.assert_index_equal(result, expected, exact=True) + + if ordered is False: + # dtype='category' defaults to ordered=False, so only test once + result = idx.astype("category", copy=copy) + expected = CategoricalIndex(idx, name=name) + tm.assert_index_equal(result, expected, exact=True) + + def test_is_unique(self, simple_index): + # initialize a unique index + index = simple_index.drop_duplicates() + assert index.is_unique is True + + # empty index should be unique + index_empty = index[:0] + assert index_empty.is_unique is True + + # test basic dupes + index_dup = index.insert(0, index[0]) + assert index_dup.is_unique is False + + # single NA should be unique + index_na = index.insert(0, np.nan) + assert index_na.is_unique is True + + # multiple NA should not be unique + index_na_dup = index_na.insert(0, np.nan) + assert index_na_dup.is_unique is False + + @pytest.mark.arm_slow + def test_engine_reference_cycle(self, simple_index): + # GH27585 + index = simple_index + nrefs_pre = len(gc.get_referrers(index)) + index._engine + assert len(gc.get_referrers(index)) == nrefs_pre + + def test_getitem_2d_deprecated(self, simple_index): + # GH#30588, GH#31479 + idx = simple_index + msg = "Support for multi-dimensional indexing" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = idx[:, None] + + assert isinstance(res, np.ndarray), type(res) + + if not isinstance(idx, RangeIndex): + # GH#44051 RangeIndex already raises + with tm.assert_produces_warning(FutureWarning, match=msg): + res = idx[True] + assert isinstance(res, np.ndarray), type(res) + with tm.assert_produces_warning(FutureWarning, match=msg): + res = idx[False] + assert isinstance(res, np.ndarray), type(res) + else: + msg = "only integers, slices" + with pytest.raises(IndexError, match=msg): + idx[True] + with pytest.raises(IndexError, match=msg): + idx[False] + + def test_copy_shares_cache(self, simple_index): + # GH32898, GH36840 + idx = simple_index + idx.get_loc(idx[0]) # populates the _cache. + copy = idx.copy() + + assert copy._cache is idx._cache + + def test_shallow_copy_shares_cache(self, simple_index): + # GH32669, GH36840 + idx = simple_index + idx.get_loc(idx[0]) # populates the _cache. + shallow_copy = idx._view() + + assert shallow_copy._cache is idx._cache + + shallow_copy = idx._shallow_copy(idx._data) + assert shallow_copy._cache is not idx._cache + assert shallow_copy._cache == {} + + def test_index_groupby(self, simple_index): + idx = simple_index[:5] + to_groupby = np.array([1, 2, np.nan, 2, 1]) + tm.assert_dict_equal( + idx.groupby(to_groupby), {1.0: idx[[0, 4]], 2.0: idx[[1, 3]]} + ) + + to_groupby = DatetimeIndex( + [ + datetime(2011, 11, 1), + datetime(2011, 12, 1), + pd.NaT, + datetime(2011, 12, 1), + datetime(2011, 11, 1), + ], + tz="UTC", + ).values + + ex_keys = [Timestamp("2011-11-01"), Timestamp("2011-12-01")] + expected = {ex_keys[0]: idx[[0, 4]], ex_keys[1]: idx[[1, 3]]} + tm.assert_dict_equal(idx.groupby(to_groupby), expected) + + def test_append_preserves_dtype(self, simple_index): + # In particular NumericIndex with dtype float32 + index = simple_index + N = len(index) + + result = index.append(index) + assert result.dtype == index.dtype + tm.assert_index_equal(result[:N], index, check_exact=True) + tm.assert_index_equal(result[N:], index, check_exact=True) + + alt = index.take(list(range(N)) * 2) + tm.assert_index_equal(result, alt, check_exact=True) + + def test_inv(self, simple_index): + idx = simple_index + + if idx.dtype.kind in ["i", "u"]: + res = ~idx + expected = Index(~idx.values, name=idx.name) + tm.assert_index_equal(res, expected) + + # check that we are matching Series behavior + res2 = ~Series(idx) + # TODO(2.0): once we preserve dtype, check_dtype can be True + tm.assert_series_equal(res2, Series(expected), check_dtype=False) + else: + if idx.dtype.kind == "f": + msg = "ufunc 'invert' not supported for the input types" + else: + msg = "bad operand" + with pytest.raises(TypeError, match=msg): + ~idx + + # check that we get the same behavior with Series + with pytest.raises(TypeError, match=msg): + ~Series(idx) + + +class NumericBase(Base): + """ + Base class for numeric index (incl. RangeIndex) sub-class tests. + """ + + def test_constructor_unwraps_index(self, dtype): + index_cls = self._index_cls + + idx = Index([1, 2], dtype=dtype) + result = index_cls(idx) + expected = np.array([1, 2], dtype=idx.dtype) + tm.assert_numpy_array_equal(result._data, expected) + + def test_where(self): + # Tested in numeric.test_indexing + pass + + def test_can_hold_identifiers(self, simple_index): + idx = simple_index + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False + + def test_format(self, simple_index): + # GH35439 + idx = simple_index + max_width = max(len(str(x)) for x in idx) + expected = [str(x).ljust(max_width) for x in idx] + assert idx.format() == expected + + def test_numeric_compat(self): + pass # override Base method + + def test_insert_non_na(self, simple_index): + # GH#43921 inserting an element that we know we can hold should + # not change dtype or type (except for RangeIndex) + index = simple_index + + result = index.insert(0, index[0]) + + cls = type(index) + if cls is RangeIndex: + cls = Int64Index + + expected = cls([index[0]] + list(index), dtype=index.dtype) + tm.assert_index_equal(result, expected, exact=True) + + def test_insert_na(self, nulls_fixture, simple_index): + # GH 18295 (test missing) + index = simple_index + na_val = nulls_fixture + + if na_val is pd.NaT: + expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object) + else: + expected = Float64Index([index[0], np.nan] + list(index[1:])) + + if index._is_backward_compat_public_numeric_index: + # GH#43921 we preserve NumericIndex + if index.dtype.kind == "f": + expected = NumericIndex(expected, dtype=index.dtype) + else: + expected = NumericIndex(expected) + + result = index.insert(1, na_val) + tm.assert_index_equal(result, expected, exact=True) + + def test_arithmetic_explicit_conversions(self): + # GH 8608 + # add/sub are overridden explicitly for Float/Int Index + index_cls = self._index_cls + if index_cls is RangeIndex: + idx = RangeIndex(5) + else: + idx = index_cls(np.arange(5, dtype="int64")) + + # float conversions + arr = np.arange(5, dtype="int64") * 3.2 + expected = Float64Index(arr) + fidx = idx * 3.2 + tm.assert_index_equal(fidx, expected) + fidx = 3.2 * idx + tm.assert_index_equal(fidx, expected) + + # interops with numpy arrays + expected = Float64Index(arr) + a = np.zeros(5, dtype="float64") + result = fidx - a + tm.assert_index_equal(result, expected) + + expected = Float64Index(-arr) + a = np.zeros(5, dtype="float64") + result = a - fidx + tm.assert_index_equal(result, expected) + + def test_invalid_dtype(self, invalid_dtype): + # GH 29539 + dtype = invalid_dtype + msg = fr"Incorrect `dtype` passed: expected \w+(?: \w+)?, received {dtype}" + with pytest.raises(ValueError, match=msg): + self._index_cls([1, 2, 3], dtype=dtype) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/conftest.py new file mode 100644 index 0000000..1e70194 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/conftest.py @@ -0,0 +1,41 @@ +import numpy as np +import pytest + +from pandas import ( + Series, + array, +) + + +@pytest.fixture(params=[None, False]) +def sort(request): + """ + Valid values for the 'sort' parameter used in the Index + setops methods (intersection, union, etc.) + + Caution: + Don't confuse this one with the "sort" fixture used + for DataFrame.append or concat. That one has + parameters [True, False]. + + We can't combine them as sort=True is not permitted + in the Index setops methods. + """ + return request.param + + +@pytest.fixture(params=["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"]) +def freq_sample(request): + """ + Valid values for 'freq' parameter used to create date_range and + timedelta_range.. + """ + return request.param + + +@pytest.fixture(params=[list, tuple, np.array, array, Series]) +def listlike_box(request): + """ + Types that may be passed as the indexer to searchsorted. + """ + return request.param diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike.py new file mode 100644 index 0000000..ecdbf01 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike.py @@ -0,0 +1,139 @@ +""" generic datetimelike tests """ + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.indexes.common import Base + + +class DatetimeLike(Base): + def test_isin(self, simple_index): + index = simple_index[:4] + result = index.isin(index) + assert result.all() + + result = index.isin(list(index)) + assert result.all() + + result = index.isin([index[2], 5]) + expected = np.array([False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + def test_argsort_matches_array(self, simple_index): + idx = simple_index + idx = idx.insert(1, pd.NaT) + + result = idx.argsort() + expected = idx._data.argsort() + tm.assert_numpy_array_equal(result, expected) + + def test_can_hold_identifiers(self, simple_index): + idx = simple_index + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False + + def test_shift_identity(self, simple_index): + + idx = simple_index + tm.assert_index_equal(idx, idx.shift(0)) + + def test_shift_empty(self, simple_index): + # GH#14811 + idx = simple_index[:0] + tm.assert_index_equal(idx, idx.shift(1)) + + def test_str(self, simple_index): + + # test the string repr + idx = simple_index + idx.name = "foo" + assert not (f"length={len(idx)}" in str(idx)) + assert "'foo'" in str(idx) + assert type(idx).__name__ in str(idx) + + if hasattr(idx, "tz"): + if idx.tz is not None: + assert idx.tz in str(idx) + if isinstance(idx, pd.PeriodIndex): + assert f"dtype='period[{idx.freqstr}]'" in str(idx) + else: + assert f"freq='{idx.freqstr}'" in str(idx) + + def test_view(self, simple_index): + idx = simple_index + + idx_view = idx.view("i8") + result = self._index_cls(idx) + tm.assert_index_equal(result, idx) + + idx_view = idx.view(self._index_cls) + result = self._index_cls(idx) + tm.assert_index_equal(result, idx_view) + + def test_map_callable(self, simple_index): + index = simple_index + expected = index + index.freq + result = index.map(lambda x: x + x.freq) + tm.assert_index_equal(result, expected) + + # map to NaT + result = index.map(lambda x: pd.NaT if x == index[0] else x) + expected = pd.Index([pd.NaT] + index[1:].tolist()) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index, dtype=object), + ], + ) + def test_map_dictlike(self, mapper, simple_index): + index = simple_index + expected = index + index.freq + + # don't compare the freqs + if isinstance(expected, (pd.DatetimeIndex, pd.TimedeltaIndex)): + expected = expected._with_freq(None) + + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) + + expected = pd.Index([pd.NaT] + index[1:].tolist()) + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) + + # empty map; these map to np.nan because we cannot know + # to re-infer things + expected = pd.Index([np.nan] * len(index)) + result = index.map(mapper([], [])) + tm.assert_index_equal(result, expected) + + def test_getitem_preserves_freq(self, simple_index): + index = simple_index + assert index.freq is not None + + result = index[:] + assert result.freq == index.freq + + def test_where_cast_str(self, simple_index): + index = simple_index + + mask = np.ones(len(index), dtype=bool) + mask[-1] = False + + result = index.where(mask, str(index[0])) + expected = index.where(mask, index[0]) + tm.assert_index_equal(result, expected) + + result = index.where(mask, [str(index[0])]) + tm.assert_index_equal(result, expected) + + expected = index.astype(object).where(mask, "foo") + result = index.where(mask, "foo") + tm.assert_index_equal(result, expected) + + result = index.where(mask, ["foo"]) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..52cfc97 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_drop_duplicates.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_drop_duplicates.cpython-38.pyc new file mode 100644 index 0000000..3370dc1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_drop_duplicates.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_equals.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_equals.cpython-38.pyc new file mode 100644 index 0000000..9b00feb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_equals.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..634d509 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_is_monotonic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_is_monotonic.cpython-38.pyc new file mode 100644 index 0000000..53c4712 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_is_monotonic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_nat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_nat.cpython-38.pyc new file mode 100644 index 0000000..6b801a1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_nat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_sort_values.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_sort_values.cpython-38.pyc new file mode 100644 index 0000000..0c42ee6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_sort_values.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_value_counts.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_value_counts.cpython-38.pyc new file mode 100644 index 0000000..40375ab Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/__pycache__/test_value_counts.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py new file mode 100644 index 0000000..c56fc84 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py @@ -0,0 +1,80 @@ +import numpy as np +import pytest + +from pandas import ( + PeriodIndex, + Series, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm + + +class DropDuplicates: + def test_drop_duplicates_metadata(self, idx): + # GH#10115 + result = idx.drop_duplicates() + tm.assert_index_equal(idx, result) + assert idx.freq == result.freq + + idx_dup = idx.append(idx) + result = idx_dup.drop_duplicates() + + expected = idx + if not isinstance(idx, PeriodIndex): + # freq is reset except for PeriodIndex + assert idx_dup.freq is None + assert result.freq is None + expected = idx._with_freq(None) + else: + assert result.freq == expected.freq + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "keep, expected, index", + [ + ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), + ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), + ( + False, + np.concatenate(([True] * 5, [False] * 5, [True] * 5)), + np.arange(5, 10), + ), + ], + ) + def test_drop_duplicates(self, keep, expected, index, idx): + # to check Index/Series compat + idx = idx.append(idx[:5]) + + tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) + expected = idx[~expected] + + result = idx.drop_duplicates(keep=keep) + tm.assert_index_equal(result, expected) + + result = Series(idx).drop_duplicates(keep=keep) + tm.assert_series_equal(result, Series(expected, index=index)) + + +class TestDropDuplicatesPeriodIndex(DropDuplicates): + @pytest.fixture(params=["D", "3D", "H", "2H", "T", "2T", "S", "3S"]) + def freq(self, request): + return request.param + + @pytest.fixture + def idx(self, freq): + return period_range("2011-01-01", periods=10, freq=freq, name="idx") + + +class TestDropDuplicatesDatetimeIndex(DropDuplicates): + @pytest.fixture + def idx(self, freq_sample): + return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") + + +class TestDropDuplicatesTimedeltaIndex(DropDuplicates): + @pytest.fixture + def idx(self, freq_sample): + return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_equals.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_equals.py new file mode 100644 index 0000000..cc90e8f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_equals.py @@ -0,0 +1,182 @@ +""" +Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex +""" +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + CategoricalIndex, + DatetimeIndex, + Index, + PeriodIndex, + TimedeltaIndex, + date_range, + period_range, +) +import pandas._testing as tm + + +class EqualsTests: + def test_not_equals_numeric(self, index): + + assert not index.equals(Index(index.asi8)) + assert not index.equals(Index(index.asi8.astype("u8"))) + assert not index.equals(Index(index.asi8).astype("f8")) + + def test_equals(self, index): + assert index.equals(index) + assert index.equals(index.astype(object)) + assert index.equals(CategoricalIndex(index)) + assert index.equals(CategoricalIndex(index.astype(object))) + + def test_not_equals_non_arraylike(self, index): + assert not index.equals(list(index)) + + def test_not_equals_strings(self, index): + + other = Index([str(x) for x in index], dtype=object) + assert not index.equals(other) + assert not index.equals(CategoricalIndex(other)) + + def test_not_equals_misc_strs(self, index): + other = Index(list("abc")) + assert not index.equals(other) + + +class TestPeriodIndexEquals(EqualsTests): + @pytest.fixture + def index(self): + return period_range("2013-01-01", periods=5, freq="D") + + # TODO: de-duplicate with other test_equals2 methods + @pytest.mark.parametrize("freq", ["D", "M"]) + def test_equals2(self, freq): + # GH#13107 + idx = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="H") + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # same internal, different tz + idx3 = PeriodIndex._simple_new( + idx._values._simple_new(idx._values.asi8, freq="H") + ) + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + assert not idx.equals(idx3) + assert not idx.equals(idx3.copy()) + assert not idx.equals(idx3.astype(object)) + assert not idx.astype(object).equals(idx3) + assert not idx.equals(list(idx3)) + assert not idx.equals(pd.Series(idx3)) + + +class TestDatetimeIndexEquals(EqualsTests): + @pytest.fixture + def index(self): + return date_range("2013-01-01", periods=5) + + def test_equals2(self): + # GH#13107 + idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"]) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific") + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # same internal, different tz + idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific") + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + assert not idx.equals(idx3) + assert not idx.equals(idx3.copy()) + assert not idx.equals(idx3.astype(object)) + assert not idx.astype(object).equals(idx3) + assert not idx.equals(list(idx3)) + assert not idx.equals(pd.Series(idx3)) + + # check that we do not raise when comparing with OutOfBounds objects + oob = Index([datetime(2500, 1, 1)] * 3, dtype=object) + assert not idx.equals(oob) + assert not idx2.equals(oob) + assert not idx3.equals(oob) + + # check that we do not raise when comparing with OutOfBounds dt64 + oob2 = oob.map(np.datetime64) + assert not idx.equals(oob2) + assert not idx2.equals(oob2) + assert not idx3.equals(oob2) + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_not_equals_bday(self, freq): + rng = date_range("2009-01-01", "2010-01-01", freq=freq) + assert not rng.equals(list(rng)) + + +class TestTimedeltaIndexEquals(EqualsTests): + @pytest.fixture + def index(self): + return tm.makeTimedeltaIndex(10) + + def test_equals2(self): + # GH#13107 + idx = TimedeltaIndex(["1 days", "2 days", "NaT"]) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = TimedeltaIndex(["2 days", "1 days", "NaT"]) + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.astype(object).equals(idx2.astype(object)) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # Check that we dont raise OverflowError on comparisons outside the + # implementation range GH#28532 + oob = Index([timedelta(days=10 ** 6)] * 3, dtype=object) + assert not idx.equals(oob) + assert not idx2.equals(oob) + + oob2 = Index([np.timedelta64(x) for x in oob], dtype=object) + assert (oob == oob2).all() + assert not idx.equals(oob2) + assert not idx2.equals(oob2) + + oob3 = oob.map(np.timedelta64) + assert (oob3 == oob).all() + assert not idx.equals(oob3) + assert not idx2.equals(oob3) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_indexing.py new file mode 100644 index 0000000..eb37c2c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_indexing.py @@ -0,0 +1,46 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, +) +import pandas._testing as tm + +dtlike_dtypes = [ + np.dtype("timedelta64[ns]"), + np.dtype("datetime64[ns]"), + pd.DatetimeTZDtype("ns", "Asia/Tokyo"), + pd.PeriodDtype("ns"), +] + + +@pytest.mark.parametrize("ldtype", dtlike_dtypes) +@pytest.mark.parametrize("rdtype", dtlike_dtypes) +def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype): + + vals = np.tile(3600 * 10 ** 9 * np.arange(3), 2) + + def construct(dtype): + if dtype is dtlike_dtypes[-1]: + # PeriodArray will try to cast ints to strings + return DatetimeIndex(vals).astype(dtype) + return Index(vals, dtype=dtype) + + left = construct(ldtype) + right = construct(rdtype) + + result = left.get_indexer_non_unique(right) + + if ldtype is rdtype: + ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp) + ex2 = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(result[0], ex1) + tm.assert_numpy_array_equal(result[1], ex2) + + else: + no_matches = np.array([-1] * 6, dtype=np.intp) + missing = np.arange(6, dtype=np.intp) + tm.assert_numpy_array_equal(result[0], no_matches) + tm.assert_numpy_array_equal(result[1], missing) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_is_monotonic.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_is_monotonic.py new file mode 100644 index 0000000..22247c9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_is_monotonic.py @@ -0,0 +1,46 @@ +from pandas import ( + Index, + NaT, + date_range, +) + + +def test_is_monotonic_with_nat(): + # GH#31437 + # PeriodIndex.is_monotonic should behave analogously to DatetimeIndex, + # in particular never be monotonic when we have NaT + dti = date_range("2016-01-01", periods=3) + pi = dti.to_period("D") + tdi = Index(dti.view("timedelta64[ns]")) + + for obj in [pi, pi._engine, dti, dti._engine, tdi, tdi._engine]: + if isinstance(obj, Index): + # i.e. not Engines + assert obj.is_monotonic + assert obj.is_monotonic_increasing + assert not obj.is_monotonic_decreasing + assert obj.is_unique + + dti1 = dti.insert(0, NaT) + pi1 = dti1.to_period("D") + tdi1 = Index(dti1.view("timedelta64[ns]")) + + for obj in [pi1, pi1._engine, dti1, dti1._engine, tdi1, tdi1._engine]: + if isinstance(obj, Index): + # i.e. not Engines + assert not obj.is_monotonic + assert not obj.is_monotonic_increasing + assert not obj.is_monotonic_decreasing + assert obj.is_unique + + dti2 = dti.insert(3, NaT) + pi2 = dti2.to_period("H") + tdi2 = Index(dti2.view("timedelta64[ns]")) + + for obj in [pi2, pi2._engine, dti2, dti2._engine, tdi2, tdi2._engine]: + if isinstance(obj, Index): + # i.e. not Engines + assert not obj.is_monotonic + assert not obj.is_monotonic_increasing + assert not obj.is_monotonic_decreasing + assert obj.is_unique diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_nat.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_nat.py new file mode 100644 index 0000000..50cf29d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_nat.py @@ -0,0 +1,53 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + NaT, + PeriodIndex, + TimedeltaIndex, +) +import pandas._testing as tm + + +class NATests: + def test_nat(self, index_without_na): + empty_index = index_without_na[:0] + + index_with_na = index_without_na.copy(deep=True) + index_with_na._data[1] = NaT + + assert empty_index._na_value is NaT + assert index_with_na._na_value is NaT + assert index_without_na._na_value is NaT + + idx = index_without_na + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + assert idx.hasnans is False + + idx = index_with_na + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + assert idx.hasnans is True + + +class TestDatetimeIndexNA(NATests): + @pytest.fixture + def index_without_na(self, tz_naive_fixture): + tz = tz_naive_fixture + return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) + + +class TestTimedeltaIndexNA(NATests): + @pytest.fixture + def index_without_na(self): + return TimedeltaIndex(["1 days", "2 days"]) + + +class TestPeriodIndexNA(NATests): + @pytest.fixture + def index_without_na(self): + return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_sort_values.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_sort_values.py new file mode 100644 index 0000000..9a91cc2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_sort_values.py @@ -0,0 +1,317 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + Index, + NaT, + PeriodIndex, + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +def check_freq_ascending(ordered, orig, ascending): + """ + Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex + when the original index is generated (or generate-able) with + period_range/date_range/timedelta_range. + """ + if isinstance(ordered, PeriodIndex): + assert ordered.freq == orig.freq + elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)): + if ascending: + assert ordered.freq.n == orig.freq.n + else: + assert ordered.freq.n == -1 * orig.freq.n + + +def check_freq_nonmonotonic(ordered, orig): + """ + Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex + when the original index is _not_ generated (or generate-able) with + period_range/date_range//timedelta_range. + """ + if isinstance(ordered, PeriodIndex): + assert ordered.freq == orig.freq + elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)): + assert ordered.freq is None + + +class TestSortValues: + @pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex]) + def non_monotonic_idx(self, request): + if request.param is DatetimeIndex: + return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) + elif request.param is PeriodIndex: + dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) + return dti.to_period("D") + else: + return TimedeltaIndex( + ["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"] + ) + + def test_argmin_argmax(self, non_monotonic_idx): + assert non_monotonic_idx.argmin() == 1 + assert non_monotonic_idx.argmax() == 0 + + def test_sort_values(self, non_monotonic_idx): + idx = non_monotonic_idx + ordered = idx.sort_values() + assert ordered.is_monotonic + + ordered = idx.sort_values(ascending=False) + assert ordered[::-1].is_monotonic + + ordered, dexer = idx.sort_values(return_indexer=True) + assert ordered.is_monotonic + tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp)) + + ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) + assert ordered[::-1].is_monotonic + tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp)) + + def check_sort_values_with_freq(self, idx): + ordered = idx.sort_values() + tm.assert_index_equal(ordered, idx) + check_freq_ascending(ordered, idx, True) + + ordered = idx.sort_values(ascending=False) + expected = idx[::-1] + tm.assert_index_equal(ordered, expected) + check_freq_ascending(ordered, idx, False) + + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, idx) + tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp)) + check_freq_ascending(ordered, idx, True) + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + expected = idx[::-1] + tm.assert_index_equal(ordered, expected) + tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp)) + check_freq_ascending(ordered, idx, False) + + @pytest.mark.parametrize("freq", ["D", "H"]) + def test_sort_values_with_freq_timedeltaindex(self, freq): + # GH#10295 + idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx") + + self.check_sort_values_with_freq(idx) + + @pytest.mark.parametrize( + "idx", + [ + DatetimeIndex( + ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx" + ), + DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], + freq="H", + name="tzidx", + tz="Asia/Tokyo", + ), + ], + ) + def test_sort_values_with_freq_datetimeindex(self, idx): + self.check_sort_values_with_freq(idx) + + @pytest.mark.parametrize("freq", ["D", "2D", "4D"]) + def test_sort_values_with_freq_periodindex(self, freq): + # here with_freq refers to being period_range-like + idx = PeriodIndex( + ["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx" + ) + self.check_sort_values_with_freq(idx) + + @pytest.mark.parametrize( + "idx", + [ + PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A"), + Index([2011, 2012, 2013], name="idx"), # for compatibility check + ], + ) + def test_sort_values_with_freq_periodindex2(self, idx): + # here with_freq indicates this is period_range-like + self.check_sort_values_with_freq(idx) + + def check_sort_values_without_freq(self, idx, expected): + + ordered = idx.sort_values(na_position="first") + tm.assert_index_equal(ordered, expected) + check_freq_nonmonotonic(ordered, idx) + + if not idx.isna().any(): + ordered = idx.sort_values() + tm.assert_index_equal(ordered, expected) + check_freq_nonmonotonic(ordered, idx) + + ordered = idx.sort_values(ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + check_freq_nonmonotonic(ordered, idx) + + ordered, indexer = idx.sort_values(return_indexer=True, na_position="first") + tm.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, exp) + check_freq_nonmonotonic(ordered, idx) + + if not idx.isna().any(): + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, exp) + check_freq_nonmonotonic(ordered, idx) + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + + exp = np.array([2, 1, 3, 0, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, exp) + check_freq_nonmonotonic(ordered, idx) + + def test_sort_values_without_freq_timedeltaindex(self): + # GH#10295 + + idx = TimedeltaIndex( + ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1" + ) + expected = TimedeltaIndex( + ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1" + ) + self.check_sort_values_without_freq(idx, expected) + + @pytest.mark.parametrize( + "index_dates,expected_dates", + [ + ( + ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], + ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ( + ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], + ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ( + [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], + [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ], + ) + def test_sort_values_without_freq_datetimeindex( + self, index_dates, expected_dates, tz_naive_fixture + ): + tz = tz_naive_fixture + + # without freq + idx = DatetimeIndex(index_dates, tz=tz, name="idx") + expected = DatetimeIndex(expected_dates, tz=tz, name="idx") + + self.check_sort_values_without_freq(idx, expected) + + @pytest.mark.parametrize( + "idx,expected", + [ + ( + PeriodIndex( + [ + "2011-01-01", + "2011-01-03", + "2011-01-05", + "2011-01-02", + "2011-01-01", + ], + freq="D", + name="idx1", + ), + PeriodIndex( + [ + "2011-01-01", + "2011-01-01", + "2011-01-02", + "2011-01-03", + "2011-01-05", + ], + freq="D", + name="idx1", + ), + ), + ( + PeriodIndex( + [ + "2011-01-01", + "2011-01-03", + "2011-01-05", + "2011-01-02", + "2011-01-01", + ], + freq="D", + name="idx2", + ), + PeriodIndex( + [ + "2011-01-01", + "2011-01-01", + "2011-01-02", + "2011-01-03", + "2011-01-05", + ], + freq="D", + name="idx2", + ), + ), + ( + PeriodIndex( + [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], + freq="D", + name="idx3", + ), + PeriodIndex( + [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], + freq="D", + name="idx3", + ), + ), + ( + PeriodIndex( + ["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A" + ), + PeriodIndex( + ["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A" + ), + ), + ( + # For compatibility check + Index([2011, 2013, 2015, 2012, 2011], name="idx"), + Index([2011, 2011, 2012, 2013, 2015], name="idx"), + ), + ], + ) + def test_sort_values_without_freq_periodindex(self, idx, expected): + # here without_freq means not generateable by period_range + self.check_sort_values_without_freq(idx, expected) + + def test_sort_values_without_freq_periodindex_nat(self): + # doesn't quite fit into check_sort_values_without_freq + idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D") + expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D") + + ordered = idx.sort_values(na_position="first") + tm.assert_index_equal(ordered, expected) + check_freq_nonmonotonic(ordered, idx) + + ordered = idx.sort_values(ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + check_freq_nonmonotonic(ordered, idx) + + +def test_order_stability_compat(): + # GH#35922. sort_values is stable both for normal and datetime-like Index + pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A") + iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") + ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False) + ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False) + tm.assert_numpy_array_equal(indexer1, indexer2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_value_counts.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_value_counts.py new file mode 100644 index 0000000..f0df6dd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimelike_/test_value_counts.py @@ -0,0 +1,103 @@ +import numpy as np + +from pandas import ( + DatetimeIndex, + NaT, + PeriodIndex, + Series, + TimedeltaIndex, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm + + +class TestValueCounts: + # GH#7735 + + def test_value_counts_unique_datetimeindex(self, tz_naive_fixture): + tz = tz_naive_fixture + orig = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz) + self._check_value_counts_with_repeats(orig) + + def test_value_counts_unique_timedeltaindex(self): + orig = timedelta_range("1 days 09:00:00", freq="H", periods=10) + self._check_value_counts_with_repeats(orig) + + def test_value_counts_unique_periodindex(self): + orig = period_range("2011-01-01 09:00", freq="H", periods=10) + self._check_value_counts_with_repeats(orig) + + def _check_value_counts_with_repeats(self, orig): + # create repeated values, 'n'th element is repeated by n+1 times + idx = type(orig)( + np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype + ) + + exp_idx = orig[::-1] + if not isinstance(exp_idx, PeriodIndex): + exp_idx = exp_idx._with_freq(None) + expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + tm.assert_index_equal(idx.unique(), orig) + + def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture): + tz = tz_naive_fixture + idx = DatetimeIndex( + [ + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 08:00", + "2013-01-01 08:00", + NaT, + ], + tz=tz, + ) + self._check_value_counts_dropna(idx) + + def test_value_counts_unique_timedeltaindex2(self): + idx = TimedeltaIndex( + [ + "1 days 09:00:00", + "1 days 09:00:00", + "1 days 09:00:00", + "1 days 08:00:00", + "1 days 08:00:00", + NaT, + ] + ) + self._check_value_counts_dropna(idx) + + def test_value_counts_unique_periodindex2(self): + idx = PeriodIndex( + [ + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 08:00", + "2013-01-01 08:00", + NaT, + ], + freq="H", + ) + self._check_value_counts_dropna(idx) + + def _check_value_counts_dropna(self, idx): + exp_idx = idx[[2, 3]] + expected = Series([3, 2], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + exp_idx = idx[[2, 3, -1]] + expected = Series([3, 2, 1], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(dropna=False), expected) + + tm.assert_index_equal(idx.unique(), exp_idx) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..7e39ccb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_asof.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_asof.cpython-38.pyc new file mode 100644 index 0000000..5850f89 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_asof.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..57fc528 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_date_range.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_date_range.cpython-38.pyc new file mode 100644 index 0000000..c51024f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_date_range.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_datetime.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_datetime.cpython-38.pyc new file mode 100644 index 0000000..b3a2247 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_datetime.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_datetimelike.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_datetimelike.cpython-38.pyc new file mode 100644 index 0000000..d3206c3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_datetimelike.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_delete.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_delete.cpython-38.pyc new file mode 100644 index 0000000..7b2b791 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_delete.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_formats.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_formats.cpython-38.pyc new file mode 100644 index 0000000..be0fcd3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_formats.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_freq_attr.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_freq_attr.cpython-38.pyc new file mode 100644 index 0000000..f146c29 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_freq_attr.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..d5cd825 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_join.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_join.cpython-38.pyc new file mode 100644 index 0000000..9b88691 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_join.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_map.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_map.cpython-38.pyc new file mode 100644 index 0000000..364352d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_map.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_misc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_misc.cpython-38.pyc new file mode 100644 index 0000000..9599346 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_misc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_npfuncs.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_npfuncs.cpython-38.pyc new file mode 100644 index 0000000..1af7796 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_npfuncs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_ops.cpython-38.pyc new file mode 100644 index 0000000..305b11f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_partial_slicing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_partial_slicing.cpython-38.pyc new file mode 100644 index 0000000..af1ab2e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_partial_slicing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_pickle.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_pickle.cpython-38.pyc new file mode 100644 index 0000000..7420ee4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_pickle.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_reindex.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_reindex.cpython-38.pyc new file mode 100644 index 0000000..a5980fb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_reindex.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_scalar_compat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_scalar_compat.cpython-38.pyc new file mode 100644 index 0000000..2e13798 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_scalar_compat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_setops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_setops.cpython-38.pyc new file mode 100644 index 0000000..7c03c1e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_setops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_timezones.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_timezones.cpython-38.pyc new file mode 100644 index 0000000..889ccde Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_timezones.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_unique.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_unique.cpython-38.pyc new file mode 100644 index 0000000..ae70bf4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/__pycache__/test_unique.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..b8aba27 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..b859174 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_factorize.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_factorize.cpython-38.pyc new file mode 100644 index 0000000..34abe37 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_factorize.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_fillna.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_fillna.cpython-38.pyc new file mode 100644 index 0000000..706d702 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_fillna.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_insert.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_insert.cpython-38.pyc new file mode 100644 index 0000000..ee5004f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_insert.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_isocalendar.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_isocalendar.cpython-38.pyc new file mode 100644 index 0000000..a17902a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_isocalendar.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_repeat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_repeat.cpython-38.pyc new file mode 100644 index 0000000..70a6404 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_repeat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_shift.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_shift.cpython-38.pyc new file mode 100644 index 0000000..64dd2bf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_shift.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_snap.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_snap.cpython-38.pyc new file mode 100644 index 0000000..3a0afd3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_snap.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_to_frame.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_to_frame.cpython-38.pyc new file mode 100644 index 0000000..a025355 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_to_frame.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_to_period.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_to_period.cpython-38.pyc new file mode 100644 index 0000000..75996f0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_to_period.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_to_series.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_to_series.cpython-38.pyc new file mode 100644 index 0000000..673442e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/__pycache__/test_to_series.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_astype.py new file mode 100644 index 0000000..9002371 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_astype.py @@ -0,0 +1,328 @@ +from datetime import datetime + +import dateutil +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + NaT, + PeriodIndex, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Int64Index, + UInt64Index, +) + + +class TestDatetimeIndex: + def test_astype(self): + # GH 13149, GH 13209 + idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], name="idx") + + result = idx.astype(object) + expected = Index( + [Timestamp("2016-05-16")] + [NaT] * 3, dtype=object, name="idx" + ) + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index( + [1463356800000000000] + [-9223372036854775808] * 3, + dtype=np.int64, + name="idx", + ) + tm.assert_index_equal(result, expected) + + rng = date_range("1/1/2000", periods=10, name="idx") + result = rng.astype("i8") + tm.assert_index_equal(result, Index(rng.asi8, name="idx")) + tm.assert_numpy_array_equal(result.values, rng.asi8) + + def test_astype_uint(self): + arr = date_range("2000", periods=2, name="idx") + expected = UInt64Index( + np.array([946684800000000000, 946771200000000000], dtype="uint64"), + name="idx", + ) + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) + + def test_astype_with_tz(self): + + # with tz + rng = date_range("1/1/2000", periods=10, tz="US/Eastern") + with tm.assert_produces_warning(FutureWarning): + # deprecated + result = rng.astype("datetime64[ns]") + with tm.assert_produces_warning(FutureWarning): + # check DatetimeArray while we're here deprecated + rng._data.astype("datetime64[ns]") + + expected = ( + date_range("1/1/2000", periods=10, tz="US/Eastern") + .tz_convert("UTC") + .tz_localize(None) + ) + tm.assert_index_equal(result, expected) + + def test_astype_tzaware_to_tzaware(self): + # GH 18951: tz-aware to tz-aware + idx = date_range("20170101", periods=4, tz="US/Pacific") + result = idx.astype("datetime64[ns, US/Eastern]") + expected = date_range("20170101 03:00:00", periods=4, tz="US/Eastern") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + def test_astype_tznaive_to_tzaware(self): + # GH 18951: tz-naive to tz-aware + idx = date_range("20170101", periods=4) + idx = idx._with_freq(None) # tz_localize does not preserve freq + with tm.assert_produces_warning(FutureWarning): + # dt64->dt64tz deprecated + result = idx.astype("datetime64[ns, US/Eastern]") + with tm.assert_produces_warning(FutureWarning): + # dt64->dt64tz deprecated + idx._data.astype("datetime64[ns, US/Eastern]") + + expected = date_range("20170101", periods=4, tz="US/Eastern") + expected = expected._with_freq(None) + tm.assert_index_equal(result, expected) + + def test_astype_str_nat(self): + # GH 13149, GH 13209 + # verify that we are returning NaT as a string (and not unicode) + + idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN]) + result = idx.astype(str) + expected = Index(["2016-05-16", "NaT", "NaT", "NaT"], dtype=object) + tm.assert_index_equal(result, expected) + + def test_astype_str(self): + # test astype string - #10442 + dti = date_range("2012-01-01", periods=4, name="test_name") + result = dti.astype(str) + expected = Index( + ["2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04"], + name="test_name", + dtype=object, + ) + tm.assert_index_equal(result, expected) + + def test_astype_str_tz_and_name(self): + # test astype string with tz and name + dti = date_range("2012-01-01", periods=3, name="test_name", tz="US/Eastern") + result = dti.astype(str) + expected = Index( + [ + "2012-01-01 00:00:00-05:00", + "2012-01-02 00:00:00-05:00", + "2012-01-03 00:00:00-05:00", + ], + name="test_name", + dtype=object, + ) + tm.assert_index_equal(result, expected) + + def test_astype_str_freq_and_name(self): + # test astype string with freqH and name + dti = date_range("1/1/2011", periods=3, freq="H", name="test_name") + result = dti.astype(str) + expected = Index( + ["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"], + name="test_name", + dtype=object, + ) + tm.assert_index_equal(result, expected) + + def test_astype_str_freq_and_tz(self): + # test astype string with freqH and timezone + dti = date_range( + "3/6/2012 00:00", periods=2, freq="H", tz="Europe/London", name="test_name" + ) + result = dti.astype(str) + expected = Index( + ["2012-03-06 00:00:00+00:00", "2012-03-06 01:00:00+00:00"], + dtype=object, + name="test_name", + ) + tm.assert_index_equal(result, expected) + + def test_astype_datetime64(self): + # GH 13149, GH 13209 + idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], name="idx") + + result = idx.astype("datetime64[ns]") + tm.assert_index_equal(result, idx) + assert result is not idx + + result = idx.astype("datetime64[ns]", copy=False) + tm.assert_index_equal(result, idx) + assert result is idx + + idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], tz="EST", name="idx") + with tm.assert_produces_warning(FutureWarning): + # dt64tz->dt64 deprecated + result = idx_tz.astype("datetime64[ns]") + expected = DatetimeIndex( + ["2016-05-16 05:00:00", "NaT", "NaT", "NaT"], + dtype="datetime64[ns]", + name="idx", + ) + tm.assert_index_equal(result, expected) + + def test_astype_object(self): + rng = date_range("1/1/2000", periods=20) + + casted = rng.astype("O") + exp_values = list(rng) + + tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) + assert casted.tolist() == exp_values + + @pytest.mark.parametrize("tz", [None, "Asia/Tokyo"]) + def test_astype_object_tz(self, tz): + idx = date_range(start="2013-01-01", periods=4, freq="M", name="idx", tz=tz) + expected_list = [ + Timestamp("2013-01-31", tz=tz), + Timestamp("2013-02-28", tz=tz), + Timestamp("2013-03-31", tz=tz), + Timestamp("2013-04-30", tz=tz), + ] + expected = Index(expected_list, dtype=object, name="idx") + result = idx.astype(object) + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + def test_astype_object_with_nat(self): + idx = DatetimeIndex( + [datetime(2013, 1, 1), datetime(2013, 1, 2), NaT, datetime(2013, 1, 4)], + name="idx", + ) + expected_list = [ + Timestamp("2013-01-01"), + Timestamp("2013-01-02"), + NaT, + Timestamp("2013-01-04"), + ] + expected = Index(expected_list, dtype=object, name="idx") + result = idx.astype(object) + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + @pytest.mark.parametrize( + "dtype", + [float, "timedelta64", "timedelta64[ns]", "datetime64", "datetime64[D]"], + ) + def test_astype_raises(self, dtype): + # GH 13149, GH 13209 + idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN]) + msg = "Cannot cast DatetimeIndex to dtype" + with pytest.raises(TypeError, match=msg): + idx.astype(dtype) + + def test_index_convert_to_datetime_array(self): + def _check_rng(rng): + converted = rng.to_pydatetime() + assert isinstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + assert isinstance(x, datetime) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo + + rng = date_range("20090415", "20090519") + rng_eastern = date_range("20090415", "20090519", tz="US/Eastern") + rng_utc = date_range("20090415", "20090519", tz="utc") + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_index_convert_to_datetime_array_explicit_pytz(self): + def _check_rng(rng): + converted = rng.to_pydatetime() + assert isinstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + assert isinstance(x, datetime) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo + + rng = date_range("20090415", "20090519") + rng_eastern = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern")) + rng_utc = date_range("20090415", "20090519", tz=pytz.utc) + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_index_convert_to_datetime_array_dateutil(self): + def _check_rng(rng): + converted = rng.to_pydatetime() + assert isinstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + assert isinstance(x, datetime) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo + + rng = date_range("20090415", "20090519") + rng_eastern = date_range("20090415", "20090519", tz="dateutil/US/Eastern") + rng_utc = date_range("20090415", "20090519", tz=dateutil.tz.tzutc()) + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + @pytest.mark.parametrize( + "tz, dtype", + [["US/Pacific", "datetime64[ns, US/Pacific]"], [None, "datetime64[ns]"]], + ) + def test_integer_index_astype_datetime(self, tz, dtype): + # GH 20997, 20964, 24559 + val = [Timestamp("2018-01-01", tz=tz).value] + result = Index(val, name="idx").astype(dtype) + expected = DatetimeIndex(["2018-01-01"], tz=tz, name="idx") + tm.assert_index_equal(result, expected) + + def test_dti_astype_period(self): + idx = DatetimeIndex([NaT, "2011-01-01", "2011-02-01"], name="idx") + + res = idx.astype("period[M]") + exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx") + tm.assert_index_equal(res, exp) + + res = idx.astype("period[3M]") + exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx") + tm.assert_index_equal(res, exp) + + +class TestAstype: + @pytest.mark.parametrize("tz", [None, "US/Central"]) + def test_astype_category(self, tz): + obj = date_range("2000", periods=2, tz=tz, name="idx") + result = obj.astype("category") + expected = pd.CategoricalIndex( + [Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)], + name="idx", + ) + tm.assert_index_equal(result, expected) + + result = obj._data.astype("category") + expected = expected.values + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + def test_astype_array_fallback(self, tz): + obj = date_range("2000", periods=2, tz=tz, name="idx") + result = obj.astype(bool) + expected = Index(np.array([True, True]), name="idx") + tm.assert_index_equal(result, expected) + + result = obj._data.astype(bool) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_factorize.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_factorize.py new file mode 100644 index 0000000..90ad65c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_factorize.py @@ -0,0 +1,107 @@ +import numpy as np + +from pandas import ( + DatetimeIndex, + Index, + date_range, + factorize, +) +import pandas._testing as tm + + +class TestDatetimeIndexFactorize: + def test_factorize(self): + idx1 = DatetimeIndex( + ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"] + ) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"]) + + arr, idx = idx1.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + arr, idx = idx1.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + # tz must be preserved + idx1 = idx1.tz_localize("Asia/Tokyo") + exp_idx = exp_idx.tz_localize("Asia/Tokyo") + + arr, idx = idx1.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + idx2 = DatetimeIndex( + ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"] + ) + + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) + exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"]) + arr, idx = idx2.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) + exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"]) + arr, idx = idx2.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + def test_factorize_preserves_freq(self): + # GH#38120 freq should be preserved + idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo") + exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) + + arr, idx = idx3.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq + + arr, idx = factorize(idx3) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq + + def test_factorize_tz(self, tz_naive_fixture, index_or_series): + tz = tz_naive_fixture + # GH#13750 + base = date_range("2016-11-05", freq="H", periods=100, tz=tz) + idx = base.repeat(5) + + exp_arr = np.arange(100, dtype=np.intp).repeat(5) + + obj = index_or_series(idx) + + arr, res = obj.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + expected = base._with_freq(None) + tm.assert_index_equal(res, expected) + assert res.freq == expected.freq + + def test_factorize_dst(self, index_or_series): + # GH#13750 + idx = date_range("2016-11-06", freq="H", periods=12, tz="US/Eastern") + obj = index_or_series(idx) + + arr, res = obj.factorize() + tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) + tm.assert_index_equal(res, idx) + if index_or_series is Index: + assert res.freq == idx.freq + + idx = date_range("2016-06-13", freq="H", periods=12, tz="US/Eastern") + obj = index_or_series(idx) + + arr, res = obj.factorize() + tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) + tm.assert_index_equal(res, idx) + if index_or_series is Index: + assert res.freq == idx.freq diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_fillna.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_fillna.py new file mode 100644 index 0000000..5fbe60b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_fillna.py @@ -0,0 +1,62 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestDatetimeIndexFillNA: + @pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"]) + def test_fillna_datetime64(self, tz): + # GH 11343 + idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"]) + + exp = pd.DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"] + ) + tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp) + + # tz mismatch + exp = pd.Index( + [ + pd.Timestamp("2011-01-01 09:00"), + pd.Timestamp("2011-01-01 10:00", tz=tz), + pd.Timestamp("2011-01-01 11:00"), + ], + dtype=object, + ) + tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp) + + # object + exp = pd.Index( + [pd.Timestamp("2011-01-01 09:00"), "x", pd.Timestamp("2011-01-01 11:00")], + dtype=object, + ) + tm.assert_index_equal(idx.fillna("x"), exp) + + idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], tz=tz) + + exp = pd.DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], tz=tz + ) + tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp) + + exp = pd.Index( + [ + pd.Timestamp("2011-01-01 09:00", tz=tz), + pd.Timestamp("2011-01-01 10:00"), + pd.Timestamp("2011-01-01 11:00", tz=tz), + ], + dtype=object, + ) + tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp) + + # object + exp = pd.Index( + [ + pd.Timestamp("2011-01-01 09:00", tz=tz), + "x", + pd.Timestamp("2011-01-01 11:00", tz=tz), + ], + dtype=object, + ) + tm.assert_index_equal(idx.fillna("x"), exp) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_insert.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_insert.py new file mode 100644 index 0000000..592f424 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_insert.py @@ -0,0 +1,267 @@ +from datetime import datetime + +import numpy as np +import pytest +import pytz + +from pandas import ( + NA, + DatetimeIndex, + Index, + NaT, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestInsert: + @pytest.mark.parametrize("null", [None, np.nan, np.datetime64("NaT"), NaT, NA]) + @pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"]) + def test_insert_nat(self, tz, null): + # GH#16537, GH#18295 (test missing) + + idx = DatetimeIndex(["2017-01-01"], tz=tz) + expected = DatetimeIndex(["NaT", "2017-01-01"], tz=tz) + if tz is not None and isinstance(null, np.datetime64): + expected = Index([null, idx[0]], dtype=object) + + res = idx.insert(0, null) + tm.assert_index_equal(res, expected) + + @pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"]) + def test_insert_invalid_na(self, tz): + idx = DatetimeIndex(["2017-01-01"], tz=tz) + + item = np.timedelta64("NaT") + result = idx.insert(0, item) + expected = Index([item] + list(idx), dtype=object) + tm.assert_index_equal(result, expected) + + def test_insert_empty_preserves_freq(self, tz_naive_fixture): + # GH#33573 + tz = tz_naive_fixture + dti = DatetimeIndex([], tz=tz, freq="D") + item = Timestamp("2017-04-05").tz_localize(tz) + + result = dti.insert(0, item) + assert result.freq == dti.freq + + # But not when we insert an item that doesn't conform to freq + dti = DatetimeIndex([], tz=tz, freq="W-THU") + result = dti.insert(0, item) + assert result.freq is None + + def test_insert(self): + idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"], name="idx") + + result = idx.insert(2, datetime(2000, 1, 5)) + exp = DatetimeIndex( + ["2000-01-04", "2000-01-01", "2000-01-05", "2000-01-02"], name="idx" + ) + tm.assert_index_equal(result, exp) + + # insertion of non-datetime should coerce to object index + result = idx.insert(1, "inserted") + expected = Index( + [ + datetime(2000, 1, 4), + "inserted", + datetime(2000, 1, 1), + datetime(2000, 1, 2), + ], + name="idx", + ) + assert not isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + idx = date_range("1/1/2000", periods=3, freq="M", name="idx") + + # preserve freq + expected_0 = DatetimeIndex( + ["1999-12-31", "2000-01-31", "2000-02-29", "2000-03-31"], + name="idx", + freq="M", + ) + expected_3 = DatetimeIndex( + ["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30"], + name="idx", + freq="M", + ) + + # reset freq to None + expected_1_nofreq = DatetimeIndex( + ["2000-01-31", "2000-01-31", "2000-02-29", "2000-03-31"], + name="idx", + freq=None, + ) + expected_3_nofreq = DatetimeIndex( + ["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"], + name="idx", + freq=None, + ) + + cases = [ + (0, datetime(1999, 12, 31), expected_0), + (-3, datetime(1999, 12, 31), expected_0), + (3, datetime(2000, 4, 30), expected_3), + (1, datetime(2000, 1, 31), expected_1_nofreq), + (3, datetime(2000, 1, 2), expected_3_nofreq), + ] + + for n, d, expected in cases: + result = idx.insert(n, d) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + # reset freq to None + result = idx.insert(3, datetime(2000, 1, 2)) + expected = DatetimeIndex( + ["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"], + name="idx", + freq=None, + ) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq is None + + for tz in ["US/Pacific", "Asia/Singapore"]: + idx = date_range("1/1/2000 09:00", periods=6, freq="H", tz=tz, name="idx") + # preserve freq + expected = date_range( + "1/1/2000 09:00", periods=7, freq="H", tz=tz, name="idx" + ) + for d in [ + Timestamp("2000-01-01 15:00", tz=tz), + pytz.timezone(tz).localize(datetime(2000, 1, 1, 15)), + ]: + + result = idx.insert(6, d) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz + + expected = DatetimeIndex( + [ + "2000-01-01 09:00", + "2000-01-01 10:00", + "2000-01-01 11:00", + "2000-01-01 12:00", + "2000-01-01 13:00", + "2000-01-01 14:00", + "2000-01-01 10:00", + ], + name="idx", + tz=tz, + freq=None, + ) + # reset freq to None + for d in [ + Timestamp("2000-01-01 10:00", tz=tz), + pytz.timezone(tz).localize(datetime(2000, 1, 1, 10)), + ]: + result = idx.insert(6, d) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.tz == expected.tz + assert result.freq is None + + # TODO: also changes DataFrame.__setitem__ with expansion + def test_insert_mismatched_tzawareness(self): + # see GH#7299 + idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx") + + # mismatched tz-awareness + item = Timestamp("2000-01-04") + result = idx.insert(3, item) + expected = Index( + list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx" + ) + tm.assert_index_equal(result, expected) + + # mismatched tz-awareness + item = datetime(2000, 1, 4) + result = idx.insert(3, item) + expected = Index( + list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx" + ) + tm.assert_index_equal(result, expected) + + # TODO: also changes DataFrame.__setitem__ with expansion + def test_insert_mismatched_tz(self): + # see GH#7299 + idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx") + + # mismatched tz -> cast to object (could reasonably cast to same tz or UTC) + item = Timestamp("2000-01-04", tz="US/Eastern") + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = idx.insert(3, item) + expected = Index( + list(idx[:3]) + [item] + list(idx[3:]), + dtype=object, + # once deprecation is enforced + # list(idx[:3]) + [item.tz_convert(idx.tz)] + list(idx[3:]), + name="idx", + ) + # once deprecation is enforced + # assert expected.dtype == idx.dtype + tm.assert_index_equal(result, expected) + + # mismatched tz -> cast to object (could reasonably cast to same tz) + item = datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern")) + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = idx.insert(3, item) + expected = Index( + list(idx[:3]) + [item] + list(idx[3:]), + dtype=object, + # once deprecation is enforced + # list(idx[:3]) + [item.astimezone(idx.tzinfo)] + list(idx[3:]), + name="idx", + ) + # once deprecation is enforced + # assert expected.dtype == idx.dtype + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "item", [0, np.int64(0), np.float64(0), np.array(0), np.timedelta64(456)] + ) + def test_insert_mismatched_types_raises(self, tz_aware_fixture, item): + # GH#33703 dont cast these to dt64 + tz = tz_aware_fixture + dti = date_range("2019-11-04", periods=9, freq="-1D", name=9, tz=tz) + + result = dti.insert(1, item) + + if isinstance(item, np.ndarray): + assert item.item() == 0 + expected = Index([dti[0], 0] + list(dti[1:]), dtype=object, name=9) + else: + expected = Index([dti[0], item] + list(dti[1:]), dtype=object, name=9) + + tm.assert_index_equal(result, expected) + + def test_insert_castable_str(self, tz_aware_fixture): + # GH#33703 + tz = tz_aware_fixture + dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz) + + value = "2019-11-05" + result = dti.insert(0, value) + + ts = Timestamp(value).tz_localize(tz) + expected = DatetimeIndex([ts] + list(dti), dtype=dti.dtype, name=9) + tm.assert_index_equal(result, expected) + + def test_insert_non_castable_str(self, tz_aware_fixture): + # GH#33703 + tz = tz_aware_fixture + dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz) + + value = "foo" + result = dti.insert(0, value) + + expected = Index(["foo"] + list(dti), dtype=object, name=9) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_isocalendar.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_isocalendar.py new file mode 100644 index 0000000..128a8b3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_isocalendar.py @@ -0,0 +1,20 @@ +from pandas import ( + DataFrame, + DatetimeIndex, +) +import pandas._testing as tm + + +def test_isocalendar_returns_correct_values_close_to_new_year_with_tz(): + # GH#6538: Check that DatetimeIndex and its TimeStamp elements + # return the same weekofyear accessor close to new year w/ tz + dates = ["2013/12/29", "2013/12/30", "2013/12/31"] + dates = DatetimeIndex(dates, tz="Europe/Brussels") + result = dates.isocalendar() + expected_data_frame = DataFrame( + [[2013, 52, 7], [2014, 1, 1], [2014, 1, 2]], + columns=["year", "week", "day"], + index=dates, + dtype="UInt32", + ) + tm.assert_frame_equal(result, expected_data_frame) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_repeat.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_repeat.py new file mode 100644 index 0000000..c18109a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_repeat.py @@ -0,0 +1,78 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestRepeat: + def test_repeat_range(self, tz_naive_fixture): + tz = tz_naive_fixture + rng = date_range("1/1/2000", "1/1/2001") + + result = rng.repeat(5) + assert result.freq is None + assert len(result) == 5 * len(rng) + + index = date_range("2001-01-01", periods=2, freq="D", tz=tz) + exp = DatetimeIndex( + ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz + ) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = date_range("2001-01-01", periods=2, freq="2D", tz=tz) + exp = DatetimeIndex( + ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz + ) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz) + exp = DatetimeIndex( + [ + "2001-01-01", + "2001-01-01", + "2001-01-01", + "NaT", + "NaT", + "NaT", + "2003-01-01", + "2003-01-01", + "2003-01-01", + ], + tz=tz, + ) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + def test_repeat(self, tz_naive_fixture): + tz = tz_naive_fixture + reps = 2 + msg = "the 'axis' parameter is not supported" + + rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz) + + expected_rng = DatetimeIndex( + [ + Timestamp("2016-01-01 00:00:00", tz=tz), + Timestamp("2016-01-01 00:00:00", tz=tz), + Timestamp("2016-01-01 00:30:00", tz=tz), + Timestamp("2016-01-01 00:30:00", tz=tz), + ] + ) + + res = rng.repeat(reps) + tm.assert_index_equal(res, expected_rng) + assert res.freq is None + + tm.assert_index_equal(np.repeat(rng, reps), expected_rng) + with pytest.raises(ValueError, match=msg): + np.repeat(rng, reps, axis=1) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_shift.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_shift.py new file mode 100644 index 0000000..5a47b36 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_shift.py @@ -0,0 +1,163 @@ +from datetime import datetime + +import pytest +import pytz + +from pandas.errors import NullFrequencyError + +import pandas as pd +from pandas import ( + DatetimeIndex, + Series, + date_range, +) +import pandas._testing as tm + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + +class TestDatetimeIndexShift: + + # ------------------------------------------------------------- + # DatetimeIndex.shift is used in integer addition + + def test_dti_shift_tzaware(self, tz_naive_fixture): + # GH#9903 + tz = tz_naive_fixture + idx = DatetimeIndex([], name="xxx", tz=tz) + tm.assert_index_equal(idx.shift(0, freq="H"), idx) + tm.assert_index_equal(idx.shift(3, freq="H"), idx) + + idx = DatetimeIndex( + ["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"], + name="xxx", + tz=tz, + freq="H", + ) + tm.assert_index_equal(idx.shift(0, freq="H"), idx) + exp = DatetimeIndex( + ["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"], + name="xxx", + tz=tz, + freq="H", + ) + tm.assert_index_equal(idx.shift(3, freq="H"), exp) + exp = DatetimeIndex( + ["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"], + name="xxx", + tz=tz, + freq="H", + ) + tm.assert_index_equal(idx.shift(-3, freq="H"), exp) + + def test_dti_shift_freqs(self): + # test shift for DatetimeIndex and non DatetimeIndex + # GH#8083 + drange = date_range("20130101", periods=5) + result = drange.shift(1) + expected = DatetimeIndex( + ["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"], + freq="D", + ) + tm.assert_index_equal(result, expected) + + result = drange.shift(-1) + expected = DatetimeIndex( + ["2012-12-31", "2013-01-01", "2013-01-02", "2013-01-03", "2013-01-04"], + freq="D", + ) + tm.assert_index_equal(result, expected) + + result = drange.shift(3, freq="2D") + expected = DatetimeIndex( + ["2013-01-07", "2013-01-08", "2013-01-09", "2013-01-10", "2013-01-11"], + freq="D", + ) + tm.assert_index_equal(result, expected) + + def test_dti_shift_int(self): + rng = date_range("1/1/2000", periods=20) + + result = rng + 5 * rng.freq + expected = rng.shift(5) + tm.assert_index_equal(result, expected) + + result = rng - 5 * rng.freq + expected = rng.shift(-5) + tm.assert_index_equal(result, expected) + + def test_dti_shift_no_freq(self): + # GH#19147 + dti = DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None) + with pytest.raises(NullFrequencyError, match="Cannot shift with no freq"): + dti.shift(2) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_shift_localized(self, tzstr): + dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") + dr_tz = dr.tz_localize(tzstr) + + result = dr_tz.shift(1, "10T") + assert result.tz == dr_tz.tz + + def test_dti_shift_across_dst(self): + # GH 8616 + idx = date_range("2013-11-03", tz="America/Chicago", periods=7, freq="H") + s = Series(index=idx[:-1], dtype=object) + result = s.shift(freq="H") + expected = Series(index=idx[1:], dtype=object) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "shift, result_time", + [ + [0, "2014-11-14 00:00:00"], + [-1, "2014-11-13 23:00:00"], + [1, "2014-11-14 01:00:00"], + ], + ) + def test_dti_shift_near_midnight(self, shift, result_time): + # GH 8616 + dt = datetime(2014, 11, 14, 0) + dt_est = pytz.timezone("EST").localize(dt) + s = Series(data=[1], index=[dt_est]) + result = s.shift(shift, freq="H") + expected = Series(1, index=DatetimeIndex([result_time], tz="EST")) + tm.assert_series_equal(result, expected) + + def test_shift_periods(self): + # GH#22458 : argument 'n' was deprecated in favor of 'periods' + idx = date_range(start=START, end=END, periods=3) + tm.assert_index_equal(idx.shift(periods=0), idx) + tm.assert_index_equal(idx.shift(0), idx) + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_shift_bday(self, freq): + rng = date_range(START, END, freq=freq) + shifted = rng.shift(5) + assert shifted[0] == rng[5] + assert shifted.freq == rng.freq + + shifted = rng.shift(-5) + assert shifted[5] == rng[0] + assert shifted.freq == rng.freq + + shifted = rng.shift(0) + assert shifted[0] == rng[0] + assert shifted.freq == rng.freq + + def test_shift_bmonth(self): + rng = date_range(START, END, freq=pd.offsets.BMonthEnd()) + shifted = rng.shift(1, freq=pd.offsets.BDay()) + assert shifted[0] == rng[0] + pd.offsets.BDay() + + rng = date_range(START, END, freq=pd.offsets.BMonthEnd()) + with tm.assert_produces_warning(pd.errors.PerformanceWarning): + shifted = rng.shift(1, freq=pd.offsets.CDay()) + assert shifted[0] == rng[0] + pd.offsets.CDay() + + def test_shift_empty(self): + # GH#14811 + dti = date_range(start="2016-10-21", end="2016-10-21", freq="BM") + result = dti.shift(1) + tm.assert_index_equal(result, dti) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_snap.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_snap.py new file mode 100644 index 0000000..e591441 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_snap.py @@ -0,0 +1,44 @@ +import pytest + +from pandas import ( + DatetimeIndex, + date_range, +) +import pandas._testing as tm + + +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +@pytest.mark.parametrize("tz", [None, "Asia/Shanghai", "Europe/Berlin"]) +@pytest.mark.parametrize("name", [None, "my_dti"]) +def test_dti_snap(name, tz): + dti = DatetimeIndex( + [ + "1/1/2002", + "1/2/2002", + "1/3/2002", + "1/4/2002", + "1/5/2002", + "1/6/2002", + "1/7/2002", + ], + name=name, + tz=tz, + freq="D", + ) + + result = dti.snap(freq="W-MON") + expected = date_range("12/31/2001", "1/7/2002", name=name, tz=tz, freq="w-mon") + expected = expected.repeat([3, 4]) + tm.assert_index_equal(result, expected) + assert result.tz == expected.tz + assert result.freq is None + assert expected.freq is None + + result = dti.snap(freq="B") + + expected = date_range("1/1/2002", "1/7/2002", name=name, tz=tz, freq="b") + expected = expected.repeat([1, 1, 1, 2, 2]) + tm.assert_index_equal(result, expected) + assert result.tz == expected.tz + assert result.freq is None + assert expected.freq is None diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_to_frame.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_to_frame.py new file mode 100644 index 0000000..fa5cca1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_to_frame.py @@ -0,0 +1,31 @@ +from pandas import ( + DataFrame, + Index, + date_range, +) +import pandas._testing as tm + + +class TestToFrame: + def test_to_frame_datetime_tz(self): + # GH#25809 + idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC") + result = idx.to_frame() + expected = DataFrame(idx, index=idx) + tm.assert_frame_equal(result, expected) + + def test_to_frame_respects_none_name(self): + # GH#44212 if we explicitly pass name=None, then that should be respected, + # not changed to 0 + # GH-45448 this is first deprecated to only change in the future + idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC") + with tm.assert_produces_warning(FutureWarning): + result = idx.to_frame(name=None) + # exp_idx = Index([None], dtype=object) + exp_idx = Index([0]) + tm.assert_index_equal(exp_idx, result.columns) + + with tm.assert_produces_warning(FutureWarning): + result = idx.rename("foo").to_frame(name=None) + exp_idx = Index(["foo"], dtype=object) + tm.assert_index_equal(exp_idx, result.columns) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_to_period.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_to_period.py new file mode 100644 index 0000000..f6a598b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_to_period.py @@ -0,0 +1,191 @@ +import warnings + +import dateutil.tz +from dateutil.tz import tzlocal +import pytest +import pytz + +from pandas._libs.tslibs.ccalendar import MONTHS +from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG + +from pandas import ( + DatetimeIndex, + Period, + PeriodIndex, + Timestamp, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestToPeriod: + def test_dti_to_period(self): + dti = date_range(start="1/1/2005", end="12/1/2005", freq="M") + pi1 = dti.to_period() + pi2 = dti.to_period(freq="D") + pi3 = dti.to_period(freq="3D") + + assert pi1[0] == Period("Jan 2005", freq="M") + assert pi2[0] == Period("1/31/2005", freq="D") + assert pi3[0] == Period("1/31/2005", freq="3D") + + assert pi1[-1] == Period("Nov 2005", freq="M") + assert pi2[-1] == Period("11/30/2005", freq="D") + assert pi3[-1], Period("11/30/2005", freq="3D") + + tm.assert_index_equal(pi1, period_range("1/1/2005", "11/1/2005", freq="M")) + tm.assert_index_equal( + pi2, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("D") + ) + tm.assert_index_equal( + pi3, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("3D") + ) + + @pytest.mark.parametrize("month", MONTHS) + def test_to_period_quarterly(self, month): + # make sure we can make the round trip + freq = f"Q-{month}" + rng = period_range("1989Q3", "1991Q3", freq=freq) + stamps = rng.to_timestamp() + result = stamps.to_period(freq) + tm.assert_index_equal(rng, result) + + @pytest.mark.parametrize("off", ["BQ", "QS", "BQS"]) + def test_to_period_quarterlyish(self, off): + rng = date_range("01-Jan-2012", periods=8, freq=off) + prng = rng.to_period() + assert prng.freq == "Q-DEC" + + @pytest.mark.parametrize("off", ["BA", "AS", "BAS"]) + def test_to_period_annualish(self, off): + rng = date_range("01-Jan-2012", periods=8, freq=off) + prng = rng.to_period() + assert prng.freq == "A-DEC" + + def test_to_period_monthish(self): + offsets = ["MS", "BM"] + for off in offsets: + rng = date_range("01-Jan-2012", periods=8, freq=off) + prng = rng.to_period() + assert prng.freq == "M" + + rng = date_range("01-Jan-2012", periods=8, freq="M") + prng = rng.to_period() + assert prng.freq == "M" + + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + date_range("01-Jan-2012", periods=8, freq="EOM") + + def test_to_period_infer(self): + # https://github.com/pandas-dev/pandas/issues/33358 + rng = date_range( + start="2019-12-22 06:40:00+00:00", + end="2019-12-22 08:45:00+00:00", + freq="5min", + ) + + with tm.assert_produces_warning(None): + # Using simple filter because we are not checking for the warning here + warnings.simplefilter("ignore", UserWarning) + + pi1 = rng.to_period("5min") + + with tm.assert_produces_warning(None): + # Using simple filter because we are not checking for the warning here + warnings.simplefilter("ignore", UserWarning) + + pi2 = rng.to_period() + + tm.assert_index_equal(pi1, pi2) + + def test_period_dt64_round_trip(self): + dti = date_range("1/1/2000", "1/7/2002", freq="B") + pi = dti.to_period() + tm.assert_index_equal(pi.to_timestamp(), dti) + + dti = date_range("1/1/2000", "1/7/2002", freq="B") + pi = dti.to_period(freq="H") + tm.assert_index_equal(pi.to_timestamp(), dti) + + def test_to_period_millisecond(self): + index = DatetimeIndex( + [ + Timestamp("2007-01-01 10:11:12.123456Z"), + Timestamp("2007-01-01 10:11:13.789123Z"), + ] + ) + + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + period = index.to_period(freq="L") + assert 2 == len(period) + assert period[0] == Period("2007-01-01 10:11:12.123Z", "L") + assert period[1] == Period("2007-01-01 10:11:13.789Z", "L") + + def test_to_period_microsecond(self): + index = DatetimeIndex( + [ + Timestamp("2007-01-01 10:11:12.123456Z"), + Timestamp("2007-01-01 10:11:13.789123Z"), + ] + ) + + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + period = index.to_period(freq="U") + assert 2 == len(period) + assert period[0] == Period("2007-01-01 10:11:12.123456Z", "U") + assert period[1] == Period("2007-01-01 10:11:13.789123Z", "U") + + @pytest.mark.parametrize( + "tz", + ["US/Eastern", pytz.utc, tzlocal(), "dateutil/US/Eastern", dateutil.tz.tzutc()], + ) + def test_to_period_tz(self, tz): + ts = date_range("1/1/2000", "2/1/2000", tz=tz) + + with tm.assert_produces_warning(UserWarning): + # GH#21333 warning that timezone info will be lost + # filter warning about freq deprecation + warnings.filterwarnings("ignore", category=FutureWarning) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + assert result == expected + + expected = date_range("1/1/2000", "2/1/2000").to_period() + + with tm.assert_produces_warning(UserWarning): + # GH#21333 warning that timezone info will be lost + result = ts.to_period() + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", ["Etc/GMT-1", "Etc/GMT+1"]) + def test_to_period_tz_utc_offset_consistency(self, tz): + # GH#22905 + ts = date_range("1/1/2000", "2/1/2000", tz="Etc/GMT-1") + with tm.assert_produces_warning(UserWarning): + warnings.filterwarnings("ignore", category=FutureWarning) + + result = ts.to_period()[0] + expected = ts[0].to_period() + assert result == expected + + def test_to_period_nofreq(self): + idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"]) + msg = "You must pass a freq argument as current index has none." + with pytest.raises(ValueError, match=msg): + idx.to_period() + + idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="infer") + assert idx.freqstr == "D" + expected = PeriodIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="D") + tm.assert_index_equal(idx.to_period(), expected) + + # GH#7606 + idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"]) + assert idx.freqstr is None + tm.assert_index_equal(idx.to_period(), expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_to_series.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_to_series.py new file mode 100644 index 0000000..5a216d3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/methods/test_to_series.py @@ -0,0 +1,40 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + Series, +) +import pandas._testing as tm + + +class TestToSeries: + @pytest.fixture + def idx_expected(self): + naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B") + idx = naive.tz_localize("US/Pacific") + + expected = Series(np.array(idx.tolist(), dtype="object"), name="B") + + assert expected.dtype == idx.dtype + return idx, expected + + def test_to_series_keep_tz_deprecated_true(self, idx_expected): + # convert to series while keeping the timezone + idx, expected = idx_expected + + msg = "stop passing 'keep_tz'" + with tm.assert_produces_warning(FutureWarning) as m: + result = idx.to_series(keep_tz=True, index=[0, 1]) + assert msg in str(m[0].message) + + tm.assert_series_equal(result, expected) + + def test_to_series_keep_tz_deprecated_false(self, idx_expected): + idx, expected = idx_expected + + with tm.assert_produces_warning(FutureWarning) as m: + result = idx.to_series(keep_tz=False, index=[0, 1]) + tm.assert_series_equal(result, expected.dt.tz_convert(None)) + msg = "do 'idx.tz_convert(None)' before calling" + assert msg in str(m[0].message) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_asof.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_asof.py new file mode 100644 index 0000000..7adc400 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_asof.py @@ -0,0 +1,31 @@ +from datetime import timedelta + +from pandas import ( + Index, + Timestamp, + date_range, + isna, +) +import pandas._testing as tm + + +class TestAsOf: + def test_asof_partial(self): + index = date_range("2010-01-01", periods=2, freq="m") + expected = Timestamp("2010-02-28") + result = index.asof("2010-02") + assert result == expected + assert not isinstance(result, Index) + + def test_asof(self): + index = tm.makeDateIndex(100) + + dt = index[0] + assert index.asof(dt) == dt + assert isna(index.asof(dt - timedelta(1))) + + dt = index[-1] + assert index.asof(dt + timedelta(1)) == dt + + dt = index[0].to_pydatetime() + assert isinstance(index.asof(dt), Timestamp) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_constructors.py new file mode 100644 index 0000000..b1e764c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_constructors.py @@ -0,0 +1,1168 @@ +from datetime import ( + datetime, + timedelta, + timezone, +) +from functools import partial +from operator import attrgetter + +import dateutil +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + conversion, +) + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + Timestamp, + date_range, + offsets, + to_datetime, +) +import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + period_array, +) + + +class TestDatetimeIndex: + @pytest.mark.parametrize( + "dt_cls", [DatetimeIndex, DatetimeArray._from_sequence_not_strict] + ) + def test_freq_validation_with_nat(self, dt_cls): + # GH#11587 make sure we get a useful error message when generate_range + # raises + msg = ( + "Inferred frequency None from passed values does not conform " + "to passed frequency D" + ) + with pytest.raises(ValueError, match=msg): + dt_cls([pd.NaT, Timestamp("2011-01-01")], freq="D") + with pytest.raises(ValueError, match=msg): + dt_cls([pd.NaT, Timestamp("2011-01-01").value], freq="D") + + # TODO: better place for tests shared by DTI/TDI? + @pytest.mark.parametrize( + "index", + [ + date_range("2016-01-01", periods=5, tz="US/Pacific"), + pd.timedelta_range("1 Day", periods=5), + ], + ) + def test_shallow_copy_inherits_array_freq(self, index): + # If we pass a DTA/TDA to shallow_copy and dont specify a freq, + # we should inherit the array's freq, not our own. + array = index._data + + arr = array[[0, 3, 2, 4, 1]] + assert arr.freq is None + + result = index._shallow_copy(arr) + assert result.freq is None + + def test_categorical_preserves_tz(self): + # GH#18664 retain tz when going DTI-->Categorical-->DTI + dti = DatetimeIndex( + [pd.NaT, "2015-01-01", "1999-04-06 15:14:13", "2015-01-01"], tz="US/Eastern" + ) + + for dtobj in [dti, dti._data]: + # works for DatetimeIndex or DatetimeArray + + ci = pd.CategoricalIndex(dtobj) + carr = pd.Categorical(dtobj) + cser = pd.Series(ci) + + for obj in [ci, carr, cser]: + result = DatetimeIndex(obj) + tm.assert_index_equal(result, dti) + + def test_dti_with_period_data_raises(self): + # GH#23675 + data = pd.PeriodIndex(["2016Q1", "2016Q2"], freq="Q") + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + DatetimeIndex(data) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + to_datetime(data) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + DatetimeIndex(period_array(data)) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + to_datetime(period_array(data)) + + def test_dti_with_timedelta64_data_raises(self): + # GH#23675 deprecated, enforrced in GH#29794 + data = np.array([0], dtype="m8[ns]") + msg = r"timedelta64\[ns\] cannot be converted to datetime64" + with pytest.raises(TypeError, match=msg): + DatetimeIndex(data) + + with pytest.raises(TypeError, match=msg): + to_datetime(data) + + with pytest.raises(TypeError, match=msg): + DatetimeIndex(pd.TimedeltaIndex(data)) + + with pytest.raises(TypeError, match=msg): + to_datetime(pd.TimedeltaIndex(data)) + + def test_constructor_from_sparse_array(self): + # https://github.com/pandas-dev/pandas/issues/35843 + values = [ + Timestamp("2012-05-01T01:00:00.000000"), + Timestamp("2016-05-01T01:00:00.000000"), + ] + arr = pd.arrays.SparseArray(values) + msg = "will store that array directly" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = Index(arr) + expected = DatetimeIndex(values) + tm.assert_index_equal(result, expected) + + def test_construction_caching(self): + + df = pd.DataFrame( + { + "dt": date_range("20130101", periods=3), + "dttz": date_range("20130101", periods=3, tz="US/Eastern"), + "dt_with_null": [ + Timestamp("20130101"), + pd.NaT, + Timestamp("20130103"), + ], + "dtns": date_range("20130101", periods=3, freq="ns"), + } + ) + assert df.dttz.dtype.tz.zone == "US/Eastern" + + @pytest.mark.parametrize( + "kwargs", + [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}], + ) + def test_construction_with_alt(self, kwargs, tz_aware_fixture): + tz = tz_aware_fixture + i = date_range("20130101", periods=5, freq="H", tz=tz) + kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} + result = DatetimeIndex(i, **kwargs) + tm.assert_index_equal(i, result) + + @pytest.mark.parametrize( + "kwargs", + [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}], + ) + def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture): + tz = tz_aware_fixture + i = date_range("20130101", periods=5, freq="H", tz=tz) + i = i._with_freq(None) + kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} + + if "tz" in kwargs: + result = DatetimeIndex(i.asi8, tz="UTC").tz_convert(kwargs["tz"]) + + expected = DatetimeIndex(i, **kwargs) + tm.assert_index_equal(result, expected) + + # localize into the provided tz + i2 = DatetimeIndex(i.tz_localize(None).asi8, tz="UTC") + expected = i.tz_localize(None).tz_localize("UTC") + tm.assert_index_equal(i2, expected) + + # incompat tz/dtype + msg = "cannot supply both a tz and a dtype with a tz" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz="US/Pacific") + + def test_construction_index_with_mixed_timezones(self): + # gh-11488: no tz results in DatetimeIndex + result = Index([Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx") + exp = DatetimeIndex( + [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None + + # same tz results in DatetimeIndex + result = Index( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")], + tz="Asia/Tokyo", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + # same tz results in DatetimeIndex (DST) + result = Index( + [ + Timestamp("2011-01-01 10:00", tz="US/Eastern"), + Timestamp("2011-08-01 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")], + tz="US/Eastern", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + # Different tz results in Index(dtype=object) + result = Index( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = Index( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + dtype="object", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) + + result = Index( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = Index( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + dtype="object", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) + + # length = 1 + result = Index([Timestamp("2011-01-01")], name="idx") + exp = DatetimeIndex([Timestamp("2011-01-01")], name="idx") + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None + + # length = 1 with tz + result = Index([Timestamp("2011-01-01 10:00", tz="Asia/Tokyo")], name="idx") + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00")], tz="Asia/Tokyo", name="idx" + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + def test_construction_index_with_mixed_timezones_with_NaT(self): + # see gh-11488 + result = Index( + [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")], + name="idx", + ) + exp = DatetimeIndex( + [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")], + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None + + # Same tz results in DatetimeIndex + result = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), + ], + name="idx", + ) + exp = DatetimeIndex( + [ + pd.NaT, + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-02 10:00"), + ], + tz="Asia/Tokyo", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + # same tz results in DatetimeIndex (DST) + result = Index( + [ + Timestamp("2011-01-01 10:00", tz="US/Eastern"), + pd.NaT, + Timestamp("2011-08-01 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")], + tz="US/Eastern", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + # different tz results in Index(dtype=object) + result = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + dtype="object", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) + + result = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + dtype="object", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) + + # all NaT + result = Index([pd.NaT, pd.NaT], name="idx") + exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx") + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None + + # all NaT with tz + with tm.assert_produces_warning(FutureWarning): + # subclass-specific kwargs to pd.Index + result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") + exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") + + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + def test_construction_dti_with_mixed_timezones(self): + # GH 11488 (not changed, added explicit tests) + + # no tz results in DatetimeIndex + result = DatetimeIndex( + [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + + # same tz results in DatetimeIndex + result = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")], + tz="Asia/Tokyo", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + + # same tz results in DatetimeIndex (DST) + result = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="US/Eastern"), + Timestamp("2011-08-01 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")], + tz="US/Eastern", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + + # tz mismatch affecting to tz-aware raises TypeError/ValueError + + msg = "cannot be converted to datetime64" + with pytest.raises(ValueError, match=msg): + DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + + with pytest.raises(ValueError, match=msg): + DatetimeIndex( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="Asia/Tokyo", + name="idx", + ) + + with pytest.raises(ValueError, match=msg): + DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="US/Eastern", + name="idx", + ) + + with pytest.raises(ValueError, match=msg): + # passing tz should results in DatetimeIndex, then mismatch raises + # TypeError + with tm.assert_produces_warning(FutureWarning): + # subclass-specific kwargs to pd.Index + Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="Asia/Tokyo", + name="idx", + ) + + def test_construction_base_constructor(self): + arr = [Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-03")] + tm.assert_index_equal(Index(arr), DatetimeIndex(arr)) + tm.assert_index_equal(Index(np.array(arr)), DatetimeIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, Timestamp("2011-01-03")] + tm.assert_index_equal(Index(arr), DatetimeIndex(arr)) + tm.assert_index_equal(Index(np.array(arr)), DatetimeIndex(np.array(arr))) + + def test_construction_outofbounds(self): + # GH 13663 + dates = [ + datetime(3000, 1, 1), + datetime(4000, 1, 1), + datetime(5000, 1, 1), + datetime(6000, 1, 1), + ] + exp = Index(dates, dtype=object) + # coerces to object + tm.assert_index_equal(Index(dates), exp) + + msg = "Out of bounds nanosecond timestamp" + with pytest.raises(OutOfBoundsDatetime, match=msg): + # can't create DatetimeIndex + DatetimeIndex(dates) + + def test_construction_with_ndarray(self): + # GH 5152 + dates = [datetime(2013, 10, 7), datetime(2013, 10, 8), datetime(2013, 10, 9)] + data = DatetimeIndex(dates, freq=offsets.BDay()).values + result = DatetimeIndex(data, freq=offsets.BDay()) + expected = DatetimeIndex(["2013-10-07", "2013-10-08", "2013-10-09"], freq="B") + tm.assert_index_equal(result, expected) + + def test_integer_values_and_tz_interpreted_as_utc(self): + # GH-24559 + val = np.datetime64("2000-01-01 00:00:00", "ns") + values = np.array([val.view("i8")]) + + result = DatetimeIndex(values).tz_localize("US/Central") + + expected = DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central") + tm.assert_index_equal(result, expected) + + # but UTC is *not* deprecated. + with tm.assert_produces_warning(None): + result = DatetimeIndex(values, tz="UTC") + expected = DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central") + + def test_constructor_coverage(self): + rng = date_range("1/1/2000", periods=10.5) + exp = date_range("1/1/2000", periods=10) + tm.assert_index_equal(rng, exp) + + msg = "periods must be a number, got foo" + with pytest.raises(TypeError, match=msg): + date_range(start="1/1/2000", periods="foo", freq="D") + + msg = r"DatetimeIndex\(\.\.\.\) must be called with a collection" + with pytest.raises(TypeError, match=msg): + DatetimeIndex("1/1/2000") + + # generator expression + gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10)) + result = DatetimeIndex(gen) + expected = DatetimeIndex( + [datetime(2000, 1, 1) + timedelta(i) for i in range(10)] + ) + tm.assert_index_equal(result, expected) + + # NumPy string array + strings = np.array(["2000-01-01", "2000-01-02", "2000-01-03"]) + result = DatetimeIndex(strings) + expected = DatetimeIndex(strings.astype("O")) + tm.assert_index_equal(result, expected) + + from_ints = DatetimeIndex(expected.asi8) + tm.assert_index_equal(from_ints, expected) + + # string with NaT + strings = np.array(["2000-01-01", "2000-01-02", "NaT"]) + result = DatetimeIndex(strings) + expected = DatetimeIndex(strings.astype("O")) + tm.assert_index_equal(result, expected) + + from_ints = DatetimeIndex(expected.asi8) + tm.assert_index_equal(from_ints, expected) + + # non-conforming + msg = ( + "Inferred frequency None from passed values does not conform " + "to passed frequency D" + ) + with pytest.raises(ValueError, match=msg): + DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"], freq="D") + + msg = ( + "Of the four parameters: start, end, periods, and freq, exactly " + "three must be specified" + ) + with pytest.raises(ValueError, match=msg): + date_range(start="2011-01-01", freq="b") + with pytest.raises(ValueError, match=msg): + date_range(end="2011-01-01", freq="B") + with pytest.raises(ValueError, match=msg): + date_range(periods=10, freq="D") + + @pytest.mark.parametrize("freq", ["AS", "W-SUN"]) + def test_constructor_datetime64_tzformat(self, freq): + # see GH#6572: ISO 8601 format results in pytz.FixedOffset + idx = date_range( + "2013-01-01T00:00:00-05:00", "2016-01-01T23:59:59-05:00", freq=freq + ) + expected = date_range( + "2013-01-01T00:00:00", + "2016-01-01T23:59:59", + freq=freq, + tz=pytz.FixedOffset(-300), + ) + tm.assert_index_equal(idx, expected) + # Unable to use `US/Eastern` because of DST + expected_i8 = date_range( + "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima" + ) + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + idx = date_range( + "2013-01-01T00:00:00+09:00", "2016-01-01T23:59:59+09:00", freq=freq + ) + expected = date_range( + "2013-01-01T00:00:00", + "2016-01-01T23:59:59", + freq=freq, + tz=pytz.FixedOffset(540), + ) + tm.assert_index_equal(idx, expected) + expected_i8 = date_range( + "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo" + ) + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + # Non ISO 8601 format results in dateutil.tz.tzoffset + idx = date_range("2013/1/1 0:00:00-5:00", "2016/1/1 23:59:59-5:00", freq=freq) + expected = date_range( + "2013-01-01T00:00:00", + "2016-01-01T23:59:59", + freq=freq, + tz=pytz.FixedOffset(-300), + ) + tm.assert_index_equal(idx, expected) + # Unable to use `US/Eastern` because of DST + expected_i8 = date_range( + "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima" + ) + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + idx = date_range("2013/1/1 0:00:00+9:00", "2016/1/1 23:59:59+09:00", freq=freq) + expected = date_range( + "2013-01-01T00:00:00", + "2016-01-01T23:59:59", + freq=freq, + tz=pytz.FixedOffset(540), + ) + tm.assert_index_equal(idx, expected) + expected_i8 = date_range( + "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo" + ) + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + def test_constructor_dtype(self): + + # passing a dtype with a tz should localize + idx = DatetimeIndex( + ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]" + ) + expected = DatetimeIndex(["2013-01-01", "2013-01-02"]).tz_localize("US/Eastern") + tm.assert_index_equal(idx, expected) + + idx = DatetimeIndex(["2013-01-01", "2013-01-02"], tz="US/Eastern") + tm.assert_index_equal(idx, expected) + + # if we already have a tz and its not the same, then raise + idx = DatetimeIndex( + ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]" + ) + + msg = ( + "cannot supply both a tz and a timezone-naive dtype " + r"\(i\.e\. datetime64\[ns\]\)" + ) + with pytest.raises(ValueError, match=msg): + DatetimeIndex(idx, dtype="datetime64[ns]") + + # this is effectively trying to convert tz's + msg = "data is already tz-aware US/Eastern, unable to set specified tz: CET" + with pytest.raises(TypeError, match=msg): + DatetimeIndex(idx, dtype="datetime64[ns, CET]") + msg = "cannot supply both a tz and a dtype with a tz" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(idx, tz="CET", dtype="datetime64[ns, US/Eastern]") + + result = DatetimeIndex(idx, dtype="datetime64[ns, US/Eastern]") + tm.assert_index_equal(idx, result) + + @pytest.mark.parametrize("dtype", [object, np.int32, np.int64]) + def test_constructor_invalid_dtype_raises(self, dtype): + # GH 23986 + msg = "Unexpected value for 'dtype'" + with pytest.raises(ValueError, match=msg): + DatetimeIndex([1, 2], dtype=dtype) + + def test_constructor_name(self): + idx = date_range(start="2000-01-01", periods=1, freq="A", name="TEST") + assert idx.name == "TEST" + + def test_000constructor_resolution(self): + # 2252 + t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1) + idx = DatetimeIndex([t1]) + + assert idx.nanosecond[0] == t1.nanosecond + + def test_disallow_setting_tz(self): + # GH 3746 + dti = DatetimeIndex(["2010"], tz="UTC") + msg = "Cannot directly set timezone" + with pytest.raises(AttributeError, match=msg): + dti.tz = pytz.timezone("US/Pacific") + + @pytest.mark.parametrize( + "tz", + [ + None, + "America/Los_Angeles", + pytz.timezone("America/Los_Angeles"), + Timestamp("2000", tz="America/Los_Angeles").tz, + ], + ) + def test_constructor_start_end_with_tz(self, tz): + # GH 18595 + start = Timestamp("2013-01-01 06:00:00", tz="America/Los_Angeles") + end = Timestamp("2013-01-02 06:00:00", tz="America/Los_Angeles") + result = date_range(freq="D", start=start, end=end, tz=tz) + expected = DatetimeIndex( + ["2013-01-01 06:00:00", "2013-01-02 06:00:00"], + tz="America/Los_Angeles", + freq="D", + ) + tm.assert_index_equal(result, expected) + # Especially assert that the timezone is consistent for pytz + assert pytz.timezone("America/Los_Angeles") is result.tz + + @pytest.mark.parametrize("tz", ["US/Pacific", "US/Eastern", "Asia/Tokyo"]) + def test_constructor_with_non_normalized_pytz(self, tz): + # GH 18595 + non_norm_tz = Timestamp("2010", tz=tz).tz + result = DatetimeIndex(["2010"], tz=non_norm_tz) + assert pytz.timezone(tz) is result.tz + + def test_constructor_timestamp_near_dst(self): + # GH 20854 + ts = [ + Timestamp("2016-10-30 03:00:00+0300", tz="Europe/Helsinki"), + Timestamp("2016-10-30 03:00:00+0200", tz="Europe/Helsinki"), + ] + result = DatetimeIndex(ts) + expected = DatetimeIndex([ts[0].to_pydatetime(), ts[1].to_pydatetime()]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("klass", [Index, DatetimeIndex]) + @pytest.mark.parametrize("box", [np.array, partial(np.array, dtype=object), list]) + @pytest.mark.parametrize( + "tz, dtype", + [("US/Pacific", "datetime64[ns, US/Pacific]"), (None, "datetime64[ns]")], + ) + def test_constructor_with_int_tz(self, klass, box, tz, dtype): + # GH 20997, 20964 + ts = Timestamp("2018-01-01", tz=tz) + result = klass(box([ts.value]), dtype=dtype) + expected = klass([ts]) + assert result == expected + + def test_construction_int_rountrip(self, tz_naive_fixture): + # GH 12619, GH#24559 + tz = tz_naive_fixture + + result = 1293858000000000000 + expected = DatetimeIndex([result], tz=tz).asi8[0] + assert result == expected + + def test_construction_from_replaced_timestamps_with_dst(self): + # GH 18785 + index = date_range( + Timestamp(2000, 1, 1), + Timestamp(2005, 1, 1), + freq="MS", + tz="Australia/Melbourne", + ) + test = pd.DataFrame({"data": range(len(index))}, index=index) + test = test.resample("Y").mean() + result = DatetimeIndex([x.replace(month=6, day=1) for x in test.index]) + expected = DatetimeIndex( + [ + "2000-06-01 00:00:00", + "2001-06-01 00:00:00", + "2002-06-01 00:00:00", + "2003-06-01 00:00:00", + "2004-06-01 00:00:00", + "2005-06-01 00:00:00", + ], + tz="Australia/Melbourne", + ) + tm.assert_index_equal(result, expected) + + def test_construction_with_tz_and_tz_aware_dti(self): + # GH 23579 + dti = date_range("2016-01-01", periods=3, tz="US/Central") + msg = "data is already tz-aware US/Central, unable to set specified tz" + with pytest.raises(TypeError, match=msg): + DatetimeIndex(dti, tz="Asia/Tokyo") + + def test_construction_with_nat_and_tzlocal(self): + tz = dateutil.tz.tzlocal() + result = DatetimeIndex(["2018", "NaT"], tz=tz) + expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT]) + tm.assert_index_equal(result, expected) + + def test_constructor_with_ambiguous_keyword_arg(self): + # GH 35297 + + expected = DatetimeIndex( + ["2020-11-01 01:00:00", "2020-11-02 01:00:00"], + dtype="datetime64[ns, America/New_York]", + freq="D", + ambiguous=False, + ) + + # ambiguous keyword in start + timezone = "America/New_York" + start = Timestamp(year=2020, month=11, day=1, hour=1).tz_localize( + timezone, ambiguous=False + ) + result = date_range(start=start, periods=2, ambiguous=False) + tm.assert_index_equal(result, expected) + + # ambiguous keyword in end + timezone = "America/New_York" + end = Timestamp(year=2020, month=11, day=2, hour=1).tz_localize( + timezone, ambiguous=False + ) + result = date_range(end=end, periods=2, ambiguous=False) + tm.assert_index_equal(result, expected) + + def test_constructor_with_nonexistent_keyword_arg(self): + # GH 35297 + + timezone = "Europe/Warsaw" + + # nonexistent keyword in start + start = Timestamp("2015-03-29 02:30:00").tz_localize( + timezone, nonexistent="shift_forward" + ) + result = date_range(start=start, periods=2, freq="H") + expected = DatetimeIndex( + [ + Timestamp("2015-03-29 03:00:00+02:00", tz=timezone), + Timestamp("2015-03-29 04:00:00+02:00", tz=timezone), + ] + ) + + tm.assert_index_equal(result, expected) + + # nonexistent keyword in end + end = Timestamp("2015-03-29 02:30:00").tz_localize( + timezone, nonexistent="shift_forward" + ) + result = date_range(end=end, periods=2, freq="H") + expected = DatetimeIndex( + [ + Timestamp("2015-03-29 01:00:00+01:00", tz=timezone), + Timestamp("2015-03-29 03:00:00+02:00", tz=timezone), + ] + ) + + tm.assert_index_equal(result, expected) + + def test_constructor_no_precision_raises(self): + # GH-24753, GH-24739 + + msg = "with no precision is not allowed" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(["2000"], dtype="datetime64") + + with pytest.raises(ValueError, match=msg): + Index(["2000"], dtype="datetime64") + + def test_constructor_wrong_precision_raises(self): + msg = "Unexpected value for 'dtype': 'datetime64\\[us\\]'" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(["2000"], dtype="datetime64[us]") + + def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self): + # GH 27011 + result = Index(np.array([Timestamp("2019", tz="UTC"), np.nan], dtype=object)) + expected = DatetimeIndex([Timestamp("2019", tz="UTC"), pd.NaT]) + tm.assert_index_equal(result, expected) + + +class TestTimeSeries: + def test_dti_constructor_preserve_dti_freq(self): + rng = date_range("1/1/2000", "1/2/2000", freq="5min") + + rng2 = DatetimeIndex(rng) + assert rng.freq == rng2.freq + + def test_explicit_none_freq(self): + # Explicitly passing freq=None is respected + rng = date_range("1/1/2000", "1/2/2000", freq="5min") + + result = DatetimeIndex(rng, freq=None) + assert result.freq is None + + result = DatetimeIndex(rng._data, freq=None) + assert result.freq is None + + def test_dti_constructor_years_only(self, tz_naive_fixture): + tz = tz_naive_fixture + # GH 6961 + rng1 = date_range("2014", "2015", freq="M", tz=tz) + expected1 = date_range("2014-01-31", "2014-12-31", freq="M", tz=tz) + + rng2 = date_range("2014", "2015", freq="MS", tz=tz) + expected2 = date_range("2014-01-01", "2015-01-01", freq="MS", tz=tz) + + rng3 = date_range("2014", "2020", freq="A", tz=tz) + expected3 = date_range("2014-12-31", "2019-12-31", freq="A", tz=tz) + + rng4 = date_range("2014", "2020", freq="AS", tz=tz) + expected4 = date_range("2014-01-01", "2020-01-01", freq="AS", tz=tz) + + for rng, expected in [ + (rng1, expected1), + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + tm.assert_index_equal(rng, expected) + + def test_dti_constructor_small_int(self, any_int_numpy_dtype): + # see gh-13721 + exp = DatetimeIndex( + [ + "1970-01-01 00:00:00.00000000", + "1970-01-01 00:00:00.00000001", + "1970-01-01 00:00:00.00000002", + ] + ) + + arr = np.array([0, 10, 20], dtype=any_int_numpy_dtype) + tm.assert_index_equal(DatetimeIndex(arr), exp) + + def test_ctor_str_intraday(self): + rng = DatetimeIndex(["1-1-2000 00:00:01"]) + assert rng[0].second == 1 + + def test_is_(self): + dti = date_range(start="1/1/2005", end="12/1/2005", freq="M") + assert dti.is_(dti) + assert dti.is_(dti.view()) + assert not dti.is_(dti.copy()) + + def test_index_cast_datetime64_other_units(self): + arr = np.arange(0, 100, 10, dtype=np.int64).view("M8[D]") + idx = Index(arr) + + assert (idx.values == conversion.ensure_datetime64ns(arr)).all() + + def test_constructor_int64_nocopy(self): + # GH#1624 + arr = np.arange(1000, dtype=np.int64) + index = DatetimeIndex(arr) + + arr[50:100] = -1 + assert (index.asi8[50:100] == -1).all() + + arr = np.arange(1000, dtype=np.int64) + index = DatetimeIndex(arr, copy=True) + + arr[50:100] = -1 + assert (index.asi8[50:100] != -1).all() + + @pytest.mark.parametrize( + "freq", ["M", "Q", "A", "D", "B", "BH", "T", "S", "L", "U", "H", "N", "C"] + ) + def test_from_freq_recreate_from_data(self, freq): + org = date_range(start="2001/02/01 09:00", freq=freq, periods=1) + idx = DatetimeIndex(org, freq=freq) + tm.assert_index_equal(idx, org) + + org = date_range( + start="2001/02/01 09:00", freq=freq, tz="US/Pacific", periods=1 + ) + idx = DatetimeIndex(org, freq=freq, tz="US/Pacific") + tm.assert_index_equal(idx, org) + + def test_datetimeindex_constructor_misc(self): + arr = ["1/1/2005", "1/2/2005", "Jn 3, 2005", "2005-01-04"] + msg = r"(\(')?Unknown string format(:', 'Jn 3, 2005'\))?" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(arr) + + arr = ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"] + idx1 = DatetimeIndex(arr) + + arr = [datetime(2005, 1, 1), "1/2/2005", "1/3/2005", "2005-01-04"] + idx2 = DatetimeIndex(arr) + + arr = [Timestamp(datetime(2005, 1, 1)), "1/2/2005", "1/3/2005", "2005-01-04"] + idx3 = DatetimeIndex(arr) + + arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O") + idx4 = DatetimeIndex(arr) + + arr = to_datetime(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]) + idx5 = DatetimeIndex(arr) + + arr = to_datetime(["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"]) + idx6 = DatetimeIndex(arr) + + idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True) + idx8 = DatetimeIndex( + ["2007/05/12", "2008/01/25"], dayfirst=False, yearfirst=True + ) + tm.assert_index_equal(idx7, idx8) + + for other in [idx2, idx3, idx4, idx5, idx6]: + assert (idx1.values == other.values).all() + + sdate = datetime(1999, 12, 25) + edate = datetime(2000, 1, 1) + idx = date_range(start=sdate, freq="1B", periods=20) + assert len(idx) == 20 + assert idx[0] == sdate + 0 * offsets.BDay() + assert idx.freq == "B" + + idx1 = date_range(start=sdate, end=edate, freq="W-SUN") + idx2 = date_range(start=sdate, end=edate, freq=offsets.Week(weekday=6)) + assert len(idx1) == len(idx2) + assert idx1.freq == idx2.freq + + idx1 = date_range(start=sdate, end=edate, freq="QS") + idx2 = date_range( + start=sdate, end=edate, freq=offsets.QuarterBegin(startingMonth=1) + ) + assert len(idx1) == len(idx2) + assert idx1.freq == idx2.freq + + idx1 = date_range(start=sdate, end=edate, freq="BQ") + idx2 = date_range( + start=sdate, end=edate, freq=offsets.BQuarterEnd(startingMonth=12) + ) + assert len(idx1) == len(idx2) + assert idx1.freq == idx2.freq + + def test_pass_datetimeindex_to_index(self): + # Bugs in #1396 + rng = date_range("1/1/2000", "3/1/2000") + idx = Index(rng, dtype=object) + + expected = Index(rng.to_pydatetime(), dtype=object) + + tm.assert_numpy_array_equal(idx.values, expected.values) + + def test_date_range_tuple_freq_raises(self): + # GH#34703 + edate = datetime(2000, 1, 1) + with pytest.raises(TypeError, match="pass as a string instead"): + date_range(end=edate, freq=("D", 5), periods=20) + + +def test_timestamp_constructor_invalid_fold_raise(): + # Test for #25057 + # Valid fold values are only [None, 0, 1] + msg = "Valid values for the fold argument are None, 0, or 1." + with pytest.raises(ValueError, match=msg): + Timestamp(123, fold=2) + + +def test_timestamp_constructor_pytz_fold_raise(): + # Test for #25057 + # pytz doesn't support fold. Check that we raise + # if fold is passed with pytz + msg = "pytz timezones do not support fold. Please use dateutil timezones." + tz = pytz.timezone("Europe/London") + with pytest.raises(ValueError, match=msg): + Timestamp(datetime(2019, 10, 27, 0, 30, 0, 0), tz=tz, fold=0) + + +@pytest.mark.parametrize("fold", [0, 1]) +@pytest.mark.parametrize( + "ts_input", + [ + 1572136200000000000, + 1572136200000000000.0, + np.datetime64(1572136200000000000, "ns"), + "2019-10-27 01:30:00+01:00", + datetime(2019, 10, 27, 0, 30, 0, 0, tzinfo=timezone.utc), + ], +) +def test_timestamp_constructor_fold_conflict(ts_input, fold): + # Test for #25057 + # Check that we raise on fold conflict + msg = ( + "Cannot pass fold with possibly unambiguous input: int, float, " + "numpy.datetime64, str, or timezone-aware datetime-like. " + "Pass naive datetime-like or build Timestamp from components." + ) + with pytest.raises(ValueError, match=msg): + Timestamp(ts_input=ts_input, fold=fold) + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London", None]) +@pytest.mark.parametrize("fold", [0, 1]) +def test_timestamp_constructor_retain_fold(tz, fold): + # Test for #25057 + # Check that we retain fold + ts = Timestamp(year=2019, month=10, day=27, hour=1, minute=30, tz=tz, fold=fold) + result = ts.fold + expected = fold + assert result == expected + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London"]) +@pytest.mark.parametrize( + "ts_input,fold_out", + [ + (1572136200000000000, 0), + (1572139800000000000, 1), + ("2019-10-27 01:30:00+01:00", 0), + ("2019-10-27 01:30:00+00:00", 1), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=0), 0), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=1), 1), + ], +) +def test_timestamp_constructor_infer_fold_from_value(tz, ts_input, fold_out): + # Test for #25057 + # Check that we infer fold correctly based on timestamps since utc + # or strings + ts = Timestamp(ts_input, tz=tz) + result = ts.fold + expected = fold_out + assert result == expected + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London"]) +@pytest.mark.parametrize( + "ts_input,fold,value_out", + [ + (datetime(2019, 10, 27, 1, 30, 0, 0), 0, 1572136200000000000), + (datetime(2019, 10, 27, 1, 30, 0, 0), 1, 1572139800000000000), + ], +) +def test_timestamp_constructor_adjust_value_for_fold(tz, ts_input, fold, value_out): + # Test for #25057 + # Check that we adjust value for fold correctly + # based on timestamps since utc + ts = Timestamp(ts_input, tz=tz, fold=fold) + result = ts.value + expected = value_out + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_date_range.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_date_range.py new file mode 100644 index 0000000..377974a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_date_range.py @@ -0,0 +1,1143 @@ +""" +test date_range, bdate_range construction from the convenience range functions +""" + +from datetime import ( + datetime, + time, + timedelta, +) + +import numpy as np +import pytest +import pytz +from pytz import timezone + +from pandas._libs.tslibs import timezones +from pandas._libs.tslibs.offsets import ( + BDay, + CDay, + DateOffset, + MonthEnd, + prefix_mapping, +) +from pandas.errors import OutOfBoundsDatetime +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DatetimeIndex, + Timedelta, + Timestamp, + bdate_range, + date_range, + offsets, +) +import pandas._testing as tm +from pandas.core.arrays.datetimes import generate_range + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + +def _get_expected_range( + begin_to_match, + end_to_match, + both_range, + inclusive_endpoints, +): + """Helper to get expected range from a both inclusive range""" + left_match = begin_to_match == both_range[0] + right_match = end_to_match == both_range[-1] + + if inclusive_endpoints == "left" and right_match: + expected_range = both_range[:-1] + elif inclusive_endpoints == "right" and left_match: + expected_range = both_range[1:] + elif inclusive_endpoints == "neither" and left_match and right_match: + expected_range = both_range[1:-1] + elif inclusive_endpoints == "neither" and right_match: + expected_range = both_range[:-1] + elif inclusive_endpoints == "neither" and left_match: + expected_range = both_range[1:] + elif inclusive_endpoints == "both": + expected_range = both_range[:] + else: + expected_range = both_range[:] + + return expected_range + + +class TestTimestampEquivDateRange: + # Older tests in TestTimeSeries constructed their `stamp` objects + # using `date_range` instead of the `Timestamp` constructor. + # TestTimestampEquivDateRange checks that these are equivalent in the + # pertinent cases. + + def test_date_range_timestamp_equiv(self): + rng = date_range("20090415", "20090519", tz="US/Eastern") + stamp = rng[0] + + ts = Timestamp("20090415", tz="US/Eastern") + assert ts == stamp + + def test_date_range_timestamp_equiv_dateutil(self): + rng = date_range("20090415", "20090519", tz="dateutil/US/Eastern") + stamp = rng[0] + + ts = Timestamp("20090415", tz="dateutil/US/Eastern") + assert ts == stamp + + def test_date_range_timestamp_equiv_explicit_pytz(self): + rng = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern")) + stamp = rng[0] + + ts = Timestamp("20090415", tz=pytz.timezone("US/Eastern")) + assert ts == stamp + + @td.skip_if_windows + def test_date_range_timestamp_equiv_explicit_dateutil(self): + from pandas._libs.tslibs.timezones import dateutil_gettz as gettz + + rng = date_range("20090415", "20090519", tz=gettz("US/Eastern")) + stamp = rng[0] + + ts = Timestamp("20090415", tz=gettz("US/Eastern")) + assert ts == stamp + + def test_date_range_timestamp_equiv_from_datetime_instance(self): + datetime_instance = datetime(2014, 3, 4) + # build a timestamp with a frequency, since then it supports + # addition/subtraction of integers + timestamp_instance = date_range(datetime_instance, periods=1, freq="D")[0] + + ts = Timestamp(datetime_instance) + assert ts == timestamp_instance + + def test_date_range_timestamp_equiv_preserve_frequency(self): + timestamp_instance = date_range("2014-03-05", periods=1, freq="D")[0] + ts = Timestamp("2014-03-05") + + assert timestamp_instance == ts + + +class TestDateRanges: + @pytest.mark.parametrize("freq", ["N", "U", "L", "T", "S", "H", "D"]) + def test_date_range_edges(self, freq): + # GH#13672 + td = Timedelta(f"1{freq}") + ts = Timestamp("1970-01-01") + + idx = date_range( + start=ts + td, + end=ts + 4 * td, + freq=freq, + ) + exp = DatetimeIndex( + [ts + n * td for n in range(1, 5)], + freq=freq, + ) + tm.assert_index_equal(idx, exp) + + # start after end + idx = date_range( + start=ts + 4 * td, + end=ts + td, + freq=freq, + ) + exp = DatetimeIndex([], freq=freq) + tm.assert_index_equal(idx, exp) + + # start matches end + idx = date_range( + start=ts + td, + end=ts + td, + freq=freq, + ) + exp = DatetimeIndex([ts + td], freq=freq) + tm.assert_index_equal(idx, exp) + + def test_date_range_near_implementation_bound(self): + # GH#??? + freq = Timedelta(1) + + with pytest.raises(OutOfBoundsDatetime, match="Cannot generate range with"): + date_range(end=Timestamp.min, periods=2, freq=freq) + + def test_date_range_nat(self): + # GH#11587 + msg = "Neither `start` nor `end` can be NaT" + with pytest.raises(ValueError, match=msg): + date_range(start="2016-01-01", end=pd.NaT, freq="D") + with pytest.raises(ValueError, match=msg): + date_range(start=pd.NaT, end="2016-01-01", freq="D") + + def test_date_range_multiplication_overflow(self): + # GH#24255 + # check that overflows in calculating `addend = periods * stride` + # are caught + with tm.assert_produces_warning(None): + # we should _not_ be seeing a overflow RuntimeWarning + dti = date_range(start="1677-09-22", periods=213503, freq="D") + + assert dti[0] == Timestamp("1677-09-22") + assert len(dti) == 213503 + + msg = "Cannot generate range with" + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range("1969-05-04", periods=200000000, freq="30000D") + + def test_date_range_unsigned_overflow_handling(self): + # GH#24255 + # case where `addend = periods * stride` overflows int64 bounds + # but not uint64 bounds + dti = date_range(start="1677-09-22", end="2262-04-11", freq="D") + + dti2 = date_range(start=dti[0], periods=len(dti), freq="D") + assert dti2.equals(dti) + + dti3 = date_range(end=dti[-1], periods=len(dti), freq="D") + assert dti3.equals(dti) + + def test_date_range_int64_overflow_non_recoverable(self): + # GH#24255 + # case with start later than 1970-01-01, overflow int64 but not uint64 + msg = "Cannot generate range with" + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range(start="1970-02-01", periods=106752 * 24, freq="H") + + # case with end before 1970-01-01, overflow int64 but not uint64 + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range(end="1969-11-14", periods=106752 * 24, freq="H") + + @pytest.mark.slow + def test_date_range_int64_overflow_stride_endpoint_different_signs(self): + # cases where stride * periods overflow int64 and stride/endpoint + # have different signs + start = Timestamp("2262-02-23") + end = Timestamp("1969-11-14") + + expected = date_range(start=start, end=end, freq="-1H") + assert expected[0] == start + assert expected[-1] == end + + dti = date_range(end=end, periods=len(expected), freq="-1H") + tm.assert_index_equal(dti, expected) + + start2 = Timestamp("1970-02-01") + end2 = Timestamp("1677-10-22") + + expected2 = date_range(start=start2, end=end2, freq="-1H") + assert expected2[0] == start2 + assert expected2[-1] == end2 + + dti2 = date_range(start=start2, periods=len(expected2), freq="-1H") + tm.assert_index_equal(dti2, expected2) + + def test_date_range_out_of_bounds(self): + # GH#14187 + msg = "Cannot generate range" + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range("2016-01-01", periods=100000, freq="D") + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range(end="1763-10-12", periods=100000, freq="D") + + def test_date_range_gen_error(self): + rng = date_range("1/1/2000 00:00", "1/1/2000 00:18", freq="5min") + assert len(rng) == 4 + + @pytest.mark.parametrize("freq", ["AS", "YS"]) + def test_begin_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = DatetimeIndex( + ["2013-01-01", "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01"], + freq=freq, + ) + tm.assert_index_equal(rng, exp) + + @pytest.mark.parametrize("freq", ["A", "Y"]) + def test_end_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = DatetimeIndex( + ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-31"], freq=freq + ) + tm.assert_index_equal(rng, exp) + + @pytest.mark.parametrize("freq", ["BA", "BY"]) + def test_business_end_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = DatetimeIndex( + ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-30"], freq=freq + ) + tm.assert_index_equal(rng, exp) + + def test_date_range_negative_freq(self): + # GH 11018 + rng = date_range("2011-12-31", freq="-2A", periods=3) + exp = DatetimeIndex(["2011-12-31", "2009-12-31", "2007-12-31"], freq="-2A") + tm.assert_index_equal(rng, exp) + assert rng.freq == "-2A" + + rng = date_range("2011-01-31", freq="-2M", periods=3) + exp = DatetimeIndex(["2011-01-31", "2010-11-30", "2010-09-30"], freq="-2M") + tm.assert_index_equal(rng, exp) + assert rng.freq == "-2M" + + def test_date_range_bms_bug(self): + # #1645 + rng = date_range("1/1/2000", periods=10, freq="BMS") + + ex_first = Timestamp("2000-01-03") + assert rng[0] == ex_first + + def test_date_range_normalize(self): + snap = datetime.today() + n = 50 + + rng = date_range(snap, periods=n, normalize=False, freq="2D") + + offset = timedelta(2) + values = DatetimeIndex([snap + i * offset for i in range(n)], freq=offset) + + tm.assert_index_equal(rng, values) + + rng = date_range("1/1/2000 08:15", periods=n, normalize=False, freq="B") + the_time = time(8, 15) + for val in rng: + assert val.time() == the_time + + def test_date_range_fy5252(self): + dr = date_range( + start="2013-01-01", + periods=2, + freq=offsets.FY5253(startingMonth=1, weekday=3, variation="nearest"), + ) + assert dr[0] == Timestamp("2013-01-31") + assert dr[1] == Timestamp("2014-01-30") + + def test_date_range_ambiguous_arguments(self): + # #2538 + start = datetime(2011, 1, 1, 5, 3, 40) + end = datetime(2011, 1, 1, 8, 9, 40) + + msg = ( + "Of the four parameters: start, end, periods, and " + "freq, exactly three must be specified" + ) + with pytest.raises(ValueError, match=msg): + date_range(start, end, periods=10, freq="s") + + def test_date_range_convenience_periods(self): + # GH 20808 + result = date_range("2018-04-24", "2018-04-27", periods=3) + expected = DatetimeIndex( + ["2018-04-24 00:00:00", "2018-04-25 12:00:00", "2018-04-27 00:00:00"], + freq=None, + ) + + tm.assert_index_equal(result, expected) + + # Test if spacing remains linear if tz changes to dst in range + result = date_range( + "2018-04-01 01:00:00", + "2018-04-01 04:00:00", + tz="Australia/Sydney", + periods=3, + ) + expected = DatetimeIndex( + [ + Timestamp("2018-04-01 01:00:00+1100", tz="Australia/Sydney"), + Timestamp("2018-04-01 02:00:00+1000", tz="Australia/Sydney"), + Timestamp("2018-04-01 04:00:00+1000", tz="Australia/Sydney"), + ] + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start,end,result_tz", + [ + ["20180101", "20180103", "US/Eastern"], + [datetime(2018, 1, 1), datetime(2018, 1, 3), "US/Eastern"], + [Timestamp("20180101"), Timestamp("20180103"), "US/Eastern"], + [ + Timestamp("20180101", tz="US/Eastern"), + Timestamp("20180103", tz="US/Eastern"), + "US/Eastern", + ], + [ + Timestamp("20180101", tz="US/Eastern"), + Timestamp("20180103", tz="US/Eastern"), + None, + ], + ], + ) + def test_date_range_linspacing_tz(self, start, end, result_tz): + # GH 20983 + result = date_range(start, end, periods=3, tz=result_tz) + expected = date_range("20180101", periods=3, freq="D", tz="US/Eastern") + tm.assert_index_equal(result, expected) + + def test_date_range_businesshour(self): + idx = DatetimeIndex( + [ + "2014-07-04 09:00", + "2014-07-04 10:00", + "2014-07-04 11:00", + "2014-07-04 12:00", + "2014-07-04 13:00", + "2014-07-04 14:00", + "2014-07-04 15:00", + "2014-07-04 16:00", + ], + freq="BH", + ) + rng = date_range("2014-07-04 09:00", "2014-07-04 16:00", freq="BH") + tm.assert_index_equal(idx, rng) + + idx = DatetimeIndex(["2014-07-04 16:00", "2014-07-07 09:00"], freq="BH") + rng = date_range("2014-07-04 16:00", "2014-07-07 09:00", freq="BH") + tm.assert_index_equal(idx, rng) + + idx = DatetimeIndex( + [ + "2014-07-04 09:00", + "2014-07-04 10:00", + "2014-07-04 11:00", + "2014-07-04 12:00", + "2014-07-04 13:00", + "2014-07-04 14:00", + "2014-07-04 15:00", + "2014-07-04 16:00", + "2014-07-07 09:00", + "2014-07-07 10:00", + "2014-07-07 11:00", + "2014-07-07 12:00", + "2014-07-07 13:00", + "2014-07-07 14:00", + "2014-07-07 15:00", + "2014-07-07 16:00", + "2014-07-08 09:00", + "2014-07-08 10:00", + "2014-07-08 11:00", + "2014-07-08 12:00", + "2014-07-08 13:00", + "2014-07-08 14:00", + "2014-07-08 15:00", + "2014-07-08 16:00", + ], + freq="BH", + ) + rng = date_range("2014-07-04 09:00", "2014-07-08 16:00", freq="BH") + tm.assert_index_equal(idx, rng) + + def test_range_misspecified(self): + # GH #1095 + msg = ( + "Of the four parameters: start, end, periods, and " + "freq, exactly three must be specified" + ) + + with pytest.raises(ValueError, match=msg): + date_range(start="1/1/2000") + + with pytest.raises(ValueError, match=msg): + date_range(end="1/1/2000") + + with pytest.raises(ValueError, match=msg): + date_range(periods=10) + + with pytest.raises(ValueError, match=msg): + date_range(start="1/1/2000", freq="H") + + with pytest.raises(ValueError, match=msg): + date_range(end="1/1/2000", freq="H") + + with pytest.raises(ValueError, match=msg): + date_range(periods=10, freq="H") + + with pytest.raises(ValueError, match=msg): + date_range() + + def test_compat_replace(self): + # https://github.com/statsmodels/statsmodels/issues/3349 + # replace should take ints/longs for compat + result = date_range(Timestamp("1960-04-01 00:00:00"), periods=76, freq="QS-JAN") + assert len(result) == 76 + + def test_catch_infinite_loop(self): + offset = offsets.DateOffset(minute=5) + # blow up, don't loop forever + msg = "Offset did not increment date" + with pytest.raises(ValueError, match=msg): + date_range(datetime(2011, 11, 11), datetime(2011, 11, 12), freq=offset) + + @pytest.mark.parametrize("periods", (1, 2)) + def test_wom_len(self, periods): + # https://github.com/pandas-dev/pandas/issues/20517 + res = date_range(start="20110101", periods=periods, freq="WOM-1MON") + assert len(res) == periods + + def test_construct_over_dst(self): + # GH 20854 + pre_dst = Timestamp("2010-11-07 01:00:00").tz_localize( + "US/Pacific", ambiguous=True + ) + pst_dst = Timestamp("2010-11-07 01:00:00").tz_localize( + "US/Pacific", ambiguous=False + ) + expect_data = [ + Timestamp("2010-11-07 00:00:00", tz="US/Pacific"), + pre_dst, + pst_dst, + ] + expected = DatetimeIndex(expect_data, freq="H") + result = date_range(start="2010-11-7", periods=3, freq="H", tz="US/Pacific") + tm.assert_index_equal(result, expected) + + def test_construct_with_different_start_end_string_format(self): + # GH 12064 + result = date_range( + "2013-01-01 00:00:00+09:00", "2013/01/01 02:00:00+09:00", freq="H" + ) + expected = DatetimeIndex( + [ + Timestamp("2013-01-01 00:00:00+09:00"), + Timestamp("2013-01-01 01:00:00+09:00"), + Timestamp("2013-01-01 02:00:00+09:00"), + ], + freq="H", + ) + tm.assert_index_equal(result, expected) + + def test_error_with_zero_monthends(self): + msg = r"Offset <0 \* MonthEnds> did not increment date" + with pytest.raises(ValueError, match=msg): + date_range("1/1/2000", "1/1/2001", freq=MonthEnd(0)) + + def test_range_bug(self): + # GH #770 + offset = DateOffset(months=3) + result = date_range("2011-1-1", "2012-1-31", freq=offset) + + start = datetime(2011, 1, 1) + expected = DatetimeIndex([start + i * offset for i in range(5)], freq=offset) + tm.assert_index_equal(result, expected) + + def test_range_tz_pytz(self): + # see gh-2906 + tz = timezone("US/Eastern") + start = tz.localize(datetime(2011, 1, 1)) + end = tz.localize(datetime(2011, 1, 3)) + + dr = date_range(start=start, periods=3) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end + + dr = date_range(end=end, periods=3) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end + + dr = date_range(start=start, end=end) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end + + @pytest.mark.parametrize( + "start, end", + [ + [ + Timestamp(datetime(2014, 3, 6), tz="US/Eastern"), + Timestamp(datetime(2014, 3, 12), tz="US/Eastern"), + ], + [ + Timestamp(datetime(2013, 11, 1), tz="US/Eastern"), + Timestamp(datetime(2013, 11, 6), tz="US/Eastern"), + ], + ], + ) + def test_range_tz_dst_straddle_pytz(self, start, end): + dr = date_range(start, end, freq="D") + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + dr = date_range(start, end, freq="D", tz="US/Eastern") + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + dr = date_range( + start.replace(tzinfo=None), + end.replace(tzinfo=None), + freq="D", + tz="US/Eastern", + ) + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + def test_range_tz_dateutil(self): + # see gh-2906 + + # Use maybe_get_tz to fix filename in tz under dateutil. + from pandas._libs.tslibs.timezones import maybe_get_tz + + tz = lambda x: maybe_get_tz("dateutil/" + x) + + start = datetime(2011, 1, 1, tzinfo=tz("US/Eastern")) + end = datetime(2011, 1, 3, tzinfo=tz("US/Eastern")) + + dr = date_range(start=start, periods=3) + assert dr.tz == tz("US/Eastern") + assert dr[0] == start + assert dr[2] == end + + dr = date_range(end=end, periods=3) + assert dr.tz == tz("US/Eastern") + assert dr[0] == start + assert dr[2] == end + + dr = date_range(start=start, end=end) + assert dr.tz == tz("US/Eastern") + assert dr[0] == start + assert dr[2] == end + + @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) + def test_range_closed(self, freq, inclusive_endpoints_fixture): + begin = datetime(2011, 1, 1) + end = datetime(2014, 1, 1) + + result_range = date_range( + begin, end, inclusive=inclusive_endpoints_fixture, freq=freq + ) + both_range = date_range(begin, end, inclusive="both", freq=freq) + expected_range = _get_expected_range( + begin, end, both_range, inclusive_endpoints_fixture + ) + + tm.assert_index_equal(expected_range, result_range) + + @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) + def test_range_closed_with_tz_aware_start_end( + self, freq, inclusive_endpoints_fixture + ): + # GH12409, GH12684 + begin = Timestamp("2011/1/1", tz="US/Eastern") + end = Timestamp("2014/1/1", tz="US/Eastern") + + result_range = date_range( + begin, end, inclusive=inclusive_endpoints_fixture, freq=freq + ) + both_range = date_range(begin, end, inclusive="both", freq=freq) + expected_range = _get_expected_range( + begin, + end, + both_range, + inclusive_endpoints_fixture, + ) + + tm.assert_index_equal(expected_range, result_range) + + @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) + def test_range_with_tz_closed_with_tz_aware_start_end( + self, freq, inclusive_endpoints_fixture + ): + begin = Timestamp("2011/1/1") + end = Timestamp("2014/1/1") + begintz = Timestamp("2011/1/1", tz="US/Eastern") + endtz = Timestamp("2014/1/1", tz="US/Eastern") + + result_range = date_range( + begin, + end, + inclusive=inclusive_endpoints_fixture, + freq=freq, + tz="US/Eastern", + ) + both_range = date_range( + begin, end, inclusive="both", freq=freq, tz="US/Eastern" + ) + expected_range = _get_expected_range( + begintz, + endtz, + both_range, + inclusive_endpoints_fixture, + ) + + tm.assert_index_equal(expected_range, result_range) + + def test_range_closed_boundary(self, inclusive_endpoints_fixture): + # GH#11804 + right_boundary = date_range( + "2015-09-12", + "2015-12-01", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, + ) + left_boundary = date_range( + "2015-09-01", + "2015-09-12", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, + ) + both_boundary = date_range( + "2015-09-01", + "2015-12-01", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, + ) + neither_boundary = date_range( + "2015-09-11", + "2015-09-12", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, + ) + + expected_right = both_boundary + expected_left = both_boundary + expected_both = both_boundary + + if inclusive_endpoints_fixture == "right": + expected_left = both_boundary[1:] + elif inclusive_endpoints_fixture == "left": + expected_right = both_boundary[:-1] + elif inclusive_endpoints_fixture == "both": + expected_right = both_boundary[1:] + expected_left = both_boundary[:-1] + + expected_neither = both_boundary[1:-1] + + tm.assert_index_equal(right_boundary, expected_right) + tm.assert_index_equal(left_boundary, expected_left) + tm.assert_index_equal(both_boundary, expected_both) + tm.assert_index_equal(neither_boundary, expected_neither) + + def test_years_only(self): + # GH 6961 + dr = date_range("2014", "2015", freq="M") + assert dr[0] == datetime(2014, 1, 31) + assert dr[-1] == datetime(2014, 12, 31) + + def test_freq_divides_end_in_nanos(self): + # GH 10885 + result_1 = date_range("2005-01-12 10:00", "2005-01-12 16:00", freq="345min") + result_2 = date_range("2005-01-13 10:00", "2005-01-13 16:00", freq="345min") + expected_1 = DatetimeIndex( + ["2005-01-12 10:00:00", "2005-01-12 15:45:00"], + dtype="datetime64[ns]", + freq="345T", + tz=None, + ) + expected_2 = DatetimeIndex( + ["2005-01-13 10:00:00", "2005-01-13 15:45:00"], + dtype="datetime64[ns]", + freq="345T", + tz=None, + ) + tm.assert_index_equal(result_1, expected_1) + tm.assert_index_equal(result_2, expected_2) + + def test_cached_range_bug(self): + rng = date_range("2010-09-01 05:00:00", periods=50, freq=DateOffset(hours=6)) + assert len(rng) == 50 + assert rng[0] == datetime(2010, 9, 1, 5) + + def test_timezone_comparison_bug(self): + # smoke test + start = Timestamp("20130220 10:00", tz="US/Eastern") + result = date_range(start, periods=2, tz="US/Eastern") + assert len(result) == 2 + + def test_timezone_comparison_assert(self): + start = Timestamp("20130220 10:00", tz="US/Eastern") + msg = "Inferred time zone not equal to passed time zone" + with pytest.raises(AssertionError, match=msg): + date_range(start, periods=2, tz="Europe/Berlin") + + def test_negative_non_tick_frequency_descending_dates(self, tz_aware_fixture): + # GH 23270 + tz = tz_aware_fixture + result = date_range(start="2011-06-01", end="2011-01-01", freq="-1MS", tz=tz) + expected = date_range(end="2011-06-01", start="2011-01-01", freq="1MS", tz=tz)[ + ::-1 + ] + tm.assert_index_equal(result, expected) + + def test_range_where_start_equal_end(self, inclusive_endpoints_fixture): + # GH 43394 + start = "2021-09-02" + end = "2021-09-02" + result = date_range( + start=start, end=end, freq="D", inclusive=inclusive_endpoints_fixture + ) + + both_range = date_range(start=start, end=end, freq="D", inclusive="both") + if inclusive_endpoints_fixture == "neither": + expected = both_range[1:-1] + elif inclusive_endpoints_fixture in ("left", "right", "both"): + expected = both_range[:] + + tm.assert_index_equal(result, expected) + + +class TestDateRangeTZ: + """Tests for date_range with timezones""" + + def test_hongkong_tz_convert(self): + # GH#1673 smoke test + dr = date_range("2012-01-01", "2012-01-10", freq="D", tz="Hongkong") + + # it works! + dr.hour + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_date_range_span_dst_transition(self, tzstr): + # GH#1778 + + # Standard -> Daylight Savings Time + dr = date_range("03/06/2012 00:00", periods=200, freq="W-FRI", tz="US/Eastern") + + assert (dr.hour == 0).all() + + dr = date_range("2012-11-02", periods=10, tz=tzstr) + result = dr.hour + expected = pd.Index([0] * 10) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_date_range_timezone_str_argument(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + result = date_range("1/1/2000", periods=10, tz=tzstr) + expected = date_range("1/1/2000", periods=10, tz=tz) + + tm.assert_index_equal(result, expected) + + def test_date_range_with_fixedoffset_noname(self): + from pandas.tests.indexes.datetimes.test_timezones import fixed_off_no_name + + off = fixed_off_no_name + start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) + end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) + rng = date_range(start=start, end=end) + assert off == rng.tz + + idx = pd.Index([start, end]) + assert off == idx.tz + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_date_range_with_tz(self, tzstr): + stamp = Timestamp("3/11/2012 05:00", tz=tzstr) + assert stamp.hour == 5 + + rng = date_range("3/11/2012 04:00", periods=10, freq="H", tz=tzstr) + + assert stamp == rng[1] + + +class TestGenRangeGeneration: + def test_generate(self): + rng1 = list(generate_range(START, END, offset=BDay())) + rng2 = list(generate_range(START, END, offset="B")) + assert rng1 == rng2 + + def test_generate_cday(self): + rng1 = list(generate_range(START, END, offset=CDay())) + rng2 = list(generate_range(START, END, offset="C")) + assert rng1 == rng2 + + def test_1(self): + rng = list(generate_range(start=datetime(2009, 3, 25), periods=2)) + expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)] + assert rng == expected + + def test_2(self): + rng = list(generate_range(start=datetime(2008, 1, 1), end=datetime(2008, 1, 3))) + expected = [datetime(2008, 1, 1), datetime(2008, 1, 2), datetime(2008, 1, 3)] + assert rng == expected + + def test_3(self): + rng = list(generate_range(start=datetime(2008, 1, 5), end=datetime(2008, 1, 6))) + expected = [] + assert rng == expected + + def test_precision_finer_than_offset(self): + # GH#9907 + result1 = date_range( + start="2015-04-15 00:00:03", end="2016-04-22 00:00:00", freq="Q" + ) + result2 = date_range( + start="2015-04-15 00:00:03", end="2015-06-22 00:00:04", freq="W" + ) + expected1_list = [ + "2015-06-30 00:00:03", + "2015-09-30 00:00:03", + "2015-12-31 00:00:03", + "2016-03-31 00:00:03", + ] + expected2_list = [ + "2015-04-19 00:00:03", + "2015-04-26 00:00:03", + "2015-05-03 00:00:03", + "2015-05-10 00:00:03", + "2015-05-17 00:00:03", + "2015-05-24 00:00:03", + "2015-05-31 00:00:03", + "2015-06-07 00:00:03", + "2015-06-14 00:00:03", + "2015-06-21 00:00:03", + ] + expected1 = DatetimeIndex( + expected1_list, dtype="datetime64[ns]", freq="Q-DEC", tz=None + ) + expected2 = DatetimeIndex( + expected2_list, dtype="datetime64[ns]", freq="W-SUN", tz=None + ) + tm.assert_index_equal(result1, expected1) + tm.assert_index_equal(result2, expected2) + + dt1, dt2 = "2017-01-01", "2017-01-01" + tz1, tz2 = "US/Eastern", "Europe/London" + + @pytest.mark.parametrize( + "start,end", + [ + (Timestamp(dt1, tz=tz1), Timestamp(dt2)), + (Timestamp(dt1), Timestamp(dt2, tz=tz2)), + (Timestamp(dt1, tz=tz1), Timestamp(dt2, tz=tz2)), + (Timestamp(dt1, tz=tz2), Timestamp(dt2, tz=tz1)), + ], + ) + def test_mismatching_tz_raises_err(self, start, end): + # issue 18488 + msg = "Start and end cannot both be tz-aware with different timezones" + with pytest.raises(TypeError, match=msg): + date_range(start, end) + with pytest.raises(TypeError, match=msg): + date_range(start, end, freq=BDay()) + + +class TestBusinessDateRange: + def test_constructor(self): + bdate_range(START, END, freq=BDay()) + bdate_range(START, periods=20, freq=BDay()) + bdate_range(end=START, periods=20, freq=BDay()) + + msg = "periods must be a number, got B" + with pytest.raises(TypeError, match=msg): + date_range("2011-1-1", "2012-1-1", "B") + + with pytest.raises(TypeError, match=msg): + bdate_range("2011-1-1", "2012-1-1", "B") + + msg = "freq must be specified for bdate_range; use date_range instead" + with pytest.raises(TypeError, match=msg): + bdate_range(START, END, periods=10, freq=None) + + def test_misc(self): + end = datetime(2009, 5, 13) + dr = bdate_range(end=end, periods=20) + firstDate = end - 19 * BDay() + + assert len(dr) == 20 + assert dr[0] == firstDate + assert dr[-1] == end + + def test_date_parse_failure(self): + badly_formed_date = "2007/100/1" + + msg = "could not convert string to Timestamp" + with pytest.raises(ValueError, match=msg): + Timestamp(badly_formed_date) + + with pytest.raises(ValueError, match=msg): + bdate_range(start=badly_formed_date, periods=10) + + with pytest.raises(ValueError, match=msg): + bdate_range(end=badly_formed_date, periods=10) + + with pytest.raises(ValueError, match=msg): + bdate_range(badly_formed_date, badly_formed_date) + + def test_daterange_bug_456(self): + # GH #456 + rng1 = bdate_range("12/5/2011", "12/5/2011") + rng2 = bdate_range("12/2/2011", "12/5/2011") + assert rng2._data.freq == BDay() + + result = rng1.union(rng2) + assert isinstance(result, DatetimeIndex) + + @pytest.mark.parametrize("inclusive", ["left", "right", "neither", "both"]) + def test_bdays_and_open_boundaries(self, inclusive): + # GH 6673 + start = "2018-07-21" # Saturday + end = "2018-07-29" # Sunday + result = date_range(start, end, freq="B", inclusive=inclusive) + + bday_start = "2018-07-23" # Monday + bday_end = "2018-07-27" # Friday + expected = date_range(bday_start, bday_end, freq="D") + tm.assert_index_equal(result, expected) + # Note: we do _not_ expect the freqs to match here + + def test_bday_near_overflow(self): + # GH#24252 avoid doing unnecessary addition that _would_ overflow + start = Timestamp.max.floor("D").to_pydatetime() + rng = date_range(start, end=None, periods=1, freq="B") + expected = DatetimeIndex([start], freq="B") + tm.assert_index_equal(rng, expected) + + def test_bday_overflow_error(self): + # GH#24252 check that we get OutOfBoundsDatetime and not OverflowError + msg = "Out of bounds nanosecond timestamp" + start = Timestamp.max.floor("D").to_pydatetime() + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range(start, periods=2, freq="B") + + +class TestCustomDateRange: + def test_constructor(self): + bdate_range(START, END, freq=CDay()) + bdate_range(START, periods=20, freq=CDay()) + bdate_range(end=START, periods=20, freq=CDay()) + + msg = "periods must be a number, got C" + with pytest.raises(TypeError, match=msg): + date_range("2011-1-1", "2012-1-1", "C") + + with pytest.raises(TypeError, match=msg): + bdate_range("2011-1-1", "2012-1-1", "C") + + def test_misc(self): + end = datetime(2009, 5, 13) + dr = bdate_range(end=end, periods=20, freq="C") + firstDate = end - 19 * CDay() + + assert len(dr) == 20 + assert dr[0] == firstDate + assert dr[-1] == end + + def test_daterange_bug_456(self): + # GH #456 + rng1 = bdate_range("12/5/2011", "12/5/2011", freq="C") + rng2 = bdate_range("12/2/2011", "12/5/2011", freq="C") + assert rng2._data.freq == CDay() + + result = rng1.union(rng2) + assert isinstance(result, DatetimeIndex) + + def test_cdaterange(self): + result = bdate_range("2013-05-01", periods=3, freq="C") + expected = DatetimeIndex(["2013-05-01", "2013-05-02", "2013-05-03"], freq="C") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + def test_cdaterange_weekmask(self): + result = bdate_range( + "2013-05-01", periods=3, freq="C", weekmask="Sun Mon Tue Wed Thu" + ) + expected = DatetimeIndex( + ["2013-05-01", "2013-05-02", "2013-05-05"], freq=result.freq + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + # raise with non-custom freq + msg = ( + "a custom frequency string is required when holidays or " + "weekmask are passed, got frequency B" + ) + with pytest.raises(ValueError, match=msg): + bdate_range("2013-05-01", periods=3, weekmask="Sun Mon Tue Wed Thu") + + def test_cdaterange_holidays(self): + result = bdate_range("2013-05-01", periods=3, freq="C", holidays=["2013-05-01"]) + expected = DatetimeIndex( + ["2013-05-02", "2013-05-03", "2013-05-06"], freq=result.freq + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + # raise with non-custom freq + msg = ( + "a custom frequency string is required when holidays or " + "weekmask are passed, got frequency B" + ) + with pytest.raises(ValueError, match=msg): + bdate_range("2013-05-01", periods=3, holidays=["2013-05-01"]) + + def test_cdaterange_weekmask_and_holidays(self): + result = bdate_range( + "2013-05-01", + periods=3, + freq="C", + weekmask="Sun Mon Tue Wed Thu", + holidays=["2013-05-01"], + ) + expected = DatetimeIndex( + ["2013-05-02", "2013-05-05", "2013-05-06"], freq=result.freq + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + # raise with non-custom freq + msg = ( + "a custom frequency string is required when holidays or " + "weekmask are passed, got frequency B" + ) + with pytest.raises(ValueError, match=msg): + bdate_range( + "2013-05-01", + periods=3, + weekmask="Sun Mon Tue Wed Thu", + holidays=["2013-05-01"], + ) + + @pytest.mark.parametrize( + "freq", [freq for freq in prefix_mapping if freq.startswith("C")] + ) + def test_all_custom_freq(self, freq): + # should not raise + bdate_range( + START, END, freq=freq, weekmask="Mon Wed Fri", holidays=["2009-03-14"] + ) + + bad_freq = freq + "FOO" + msg = f"invalid custom frequency string: {bad_freq}" + with pytest.raises(ValueError, match=msg): + bdate_range(START, END, freq=bad_freq) + + @pytest.mark.parametrize( + "start_end", + [ + ("2018-01-01T00:00:01.000Z", "2018-01-03T00:00:01.000Z"), + ("2018-01-01T00:00:00.010Z", "2018-01-03T00:00:00.010Z"), + ("2001-01-01T00:00:00.010Z", "2001-01-03T00:00:00.010Z"), + ], + ) + def test_range_with_millisecond_resolution(self, start_end): + # https://github.com/pandas-dev/pandas/issues/24110 + start, end = start_end + result = date_range(start=start, end=end, periods=2, inclusive="left") + expected = DatetimeIndex([start]) + tm.assert_index_equal(result, expected) + + +def test_date_range_with_custom_holidays(): + # GH 30593 + freq = offsets.CustomBusinessHour(start="15:00", holidays=["2020-11-26"]) + result = date_range(start="2020-11-25 15:00", periods=4, freq=freq) + expected = DatetimeIndex( + [ + "2020-11-25 15:00:00", + "2020-11-25 16:00:00", + "2020-11-27 15:00:00", + "2020-11-27 16:00:00", + ], + freq=freq, + ) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_datetime.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_datetime.py new file mode 100644 index 0000000..5c85221 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_datetime.py @@ -0,0 +1,168 @@ +from datetime import date + +import dateutil +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Timestamp, + date_range, + offsets, +) +import pandas._testing as tm + + +class TestDatetimeIndex: + def test_time_overflow_for_32bit_machines(self): + # GH8943. On some machines NumPy defaults to np.int32 (for example, + # 32-bit Linux machines). In the function _generate_regular_range + # found in tseries/index.py, `periods` gets multiplied by `strides` + # (which has value 1e9) and since the max value for np.int32 is ~2e9, + # and since those machines won't promote np.int32 to np.int64, we get + # overflow. + periods = np.int_(1000) + + idx1 = date_range(start="2000", periods=periods, freq="S") + assert len(idx1) == periods + + idx2 = date_range(end="2000", periods=periods, freq="S") + assert len(idx2) == periods + + def test_nat(self): + assert DatetimeIndex([np.nan])[0] is pd.NaT + + def test_week_of_month_frequency(self): + # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise + d1 = date(2002, 9, 1) + d2 = date(2013, 10, 27) + d3 = date(2012, 9, 30) + idx1 = DatetimeIndex([d1, d2]) + idx2 = DatetimeIndex([d3]) + result_append = idx1.append(idx2) + expected = DatetimeIndex([d1, d2, d3]) + tm.assert_index_equal(result_append, expected) + result_union = idx1.union(idx2) + expected = DatetimeIndex([d1, d3, d2]) + tm.assert_index_equal(result_union, expected) + + # GH 5115 + result = date_range("2013-1-1", periods=4, freq="WOM-1SAT") + dates = ["2013-01-05", "2013-02-02", "2013-03-02", "2013-04-06"] + expected = DatetimeIndex(dates, freq="WOM-1SAT") + tm.assert_index_equal(result, expected) + + def test_append_nondatetimeindex(self): + rng = date_range("1/1/2000", periods=10) + idx = Index(["a", "b", "c", "d"]) + + result = rng.append(idx) + assert isinstance(result[0], Timestamp) + + def test_iteration_preserves_tz(self): + # see gh-8890 + index = date_range("2012-01-01", periods=3, freq="H", tz="US/Eastern") + + for i, ts in enumerate(index): + result = ts + expected = index[i] + assert result == expected + + index = date_range( + "2012-01-01", periods=3, freq="H", tz=dateutil.tz.tzoffset(None, -28800) + ) + + for i, ts in enumerate(index): + result = ts + expected = index[i] + assert result._repr_base == expected._repr_base + assert result == expected + + # 9100 + index = DatetimeIndex( + ["2014-12-01 03:32:39.987000-08:00", "2014-12-01 04:12:34.987000-08:00"] + ) + for i, ts in enumerate(index): + result = ts + expected = index[i] + assert result._repr_base == expected._repr_base + assert result == expected + + @pytest.mark.parametrize("periods", [0, 9999, 10000, 10001]) + def test_iteration_over_chunksize(self, periods): + # GH21012 + + index = date_range("2000-01-01 00:00:00", periods=periods, freq="min") + num = 0 + for stamp in index: + assert index[num] == stamp + num += 1 + assert num == len(index) + + def test_misc_coverage(self): + rng = date_range("1/1/2000", periods=5) + result = rng.groupby(rng.day) + assert isinstance(list(result.values())[0][0], Timestamp) + + def test_groupby_function_tuple_1677(self): + df = DataFrame(np.random.rand(100), index=date_range("1/1/2000", periods=100)) + monthly_group = df.groupby(lambda x: (x.year, x.month)) + + result = monthly_group.mean() + assert isinstance(result.index[0], tuple) + + def assert_index_parameters(self, index): + assert index.freq == "40960N" + assert index.inferred_freq == "40960N" + + def test_ns_index(self): + nsamples = 400 + ns = int(1e9 / 24414) + dtstart = np.datetime64("2012-09-20T00:00:00") + + dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, "ns") + freq = ns * offsets.Nano() + index = DatetimeIndex(dt, freq=freq, name="time") + self.assert_index_parameters(index) + + new_index = date_range(start=index[0], end=index[-1], freq=index.freq) + self.assert_index_parameters(new_index) + + def test_asarray_tz_naive(self): + # This shouldn't produce a warning. + idx = date_range("2000", periods=2) + # M8[ns] by default + result = np.asarray(idx) + + expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + # optionally, object + result = np.asarray(idx, dtype=object) + + expected = np.array([Timestamp("2000-01-01"), Timestamp("2000-01-02")]) + tm.assert_numpy_array_equal(result, expected) + + def test_asarray_tz_aware(self): + tz = "US/Central" + idx = date_range("2000", periods=2, tz=tz) + expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]") + result = np.asarray(idx, dtype="datetime64[ns]") + + tm.assert_numpy_array_equal(result, expected) + + # Old behavior with no warning + result = np.asarray(idx, dtype="M8[ns]") + + tm.assert_numpy_array_equal(result, expected) + + # Future behavior with no warning + expected = np.array( + [Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)] + ) + result = np.asarray(idx, dtype=object) + + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_datetimelike.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_datetimelike.py new file mode 100644 index 0000000..31ec8c4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -0,0 +1,39 @@ +""" generic tests from the Datetimelike class """ +import pytest + +from pandas import ( + DatetimeIndex, + date_range, +) +import pandas._testing as tm +from pandas.tests.indexes.datetimelike import DatetimeLike + + +class TestDatetimeIndex(DatetimeLike): + _index_cls = DatetimeIndex + + @pytest.fixture + def simple_index(self) -> DatetimeIndex: + return date_range("20130101", periods=5) + + @pytest.fixture( + params=[tm.makeDateIndex(10), date_range("20130110", periods=10, freq="-1D")], + ids=["index_inc", "index_dec"], + ) + def index(self, request): + return request.param + + def test_format(self, simple_index): + # GH35439 + idx = simple_index + expected = [f"{x:%Y-%m-%d}" for x in idx] + assert idx.format() == expected + + def test_shift(self): + pass # handled in test_ops + + def test_intersection(self): + pass # handled in test_setops + + def test_union(self): + pass # handled in test_setops diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_delete.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_delete.py new file mode 100644 index 0000000..e9de5a0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_delete.py @@ -0,0 +1,138 @@ +import pytest + +from pandas import ( + DatetimeIndex, + Series, + date_range, +) +import pandas._testing as tm + + +class TestDelete: + def test_delete(self): + idx = date_range(start="2000-01-01", periods=5, freq="M", name="idx") + + # preserve freq + expected_0 = date_range(start="2000-02-01", periods=4, freq="M", name="idx") + expected_4 = date_range(start="2000-01-01", periods=4, freq="M", name="idx") + + # reset freq to None + expected_1 = DatetimeIndex( + ["2000-01-31", "2000-03-31", "2000-04-30", "2000-05-31"], + freq=None, + name="idx", + ) + + cases = { + 0: expected_0, + -5: expected_0, + -1: expected_4, + 4: expected_4, + 1: expected_1, + } + for n, expected in cases.items(): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + with pytest.raises((IndexError, ValueError), match="out of bounds"): + # either depending on numpy version + idx.delete(5) + + for tz in [None, "Asia/Tokyo", "US/Pacific"]: + idx = date_range( + start="2000-01-01 09:00", periods=10, freq="H", name="idx", tz=tz + ) + + expected = date_range( + start="2000-01-01 10:00", periods=9, freq="H", name="idx", tz=tz + ) + result = idx.delete(0) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freqstr == "H" + assert result.tz == expected.tz + + expected = date_range( + start="2000-01-01 09:00", periods=9, freq="H", name="idx", tz=tz + ) + result = idx.delete(-1) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freqstr == "H" + assert result.tz == expected.tz + + def test_delete_slice(self): + idx = date_range(start="2000-01-01", periods=10, freq="D", name="idx") + + # preserve freq + expected_0_2 = date_range(start="2000-01-04", periods=7, freq="D", name="idx") + expected_7_9 = date_range(start="2000-01-01", periods=7, freq="D", name="idx") + + # reset freq to None + expected_3_5 = DatetimeIndex( + [ + "2000-01-01", + "2000-01-02", + "2000-01-03", + "2000-01-07", + "2000-01-08", + "2000-01-09", + "2000-01-10", + ], + freq=None, + name="idx", + ) + + cases = { + (0, 1, 2): expected_0_2, + (7, 8, 9): expected_7_9, + (3, 4, 5): expected_3_5, + } + for n, expected in cases.items(): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + result = idx.delete(slice(n[0], n[-1] + 1)) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + for tz in [None, "Asia/Tokyo", "US/Pacific"]: + ts = Series( + 1, + index=date_range( + "2000-01-01 09:00", periods=10, freq="H", name="idx", tz=tz + ), + ) + # preserve freq + result = ts.drop(ts.index[:5]).index + expected = date_range( + "2000-01-01 14:00", periods=5, freq="H", name="idx", tz=tz + ) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz + + # reset freq to None + result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index + expected = DatetimeIndex( + [ + "2000-01-01 09:00", + "2000-01-01 11:00", + "2000-01-01 13:00", + "2000-01-01 15:00", + "2000-01-01 17:00", + ], + freq=None, + name="idx", + tz=tz, + ) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_formats.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_formats.py new file mode 100644 index 0000000..197038d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_formats.py @@ -0,0 +1,273 @@ +from datetime import datetime + +import dateutil.tz +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import ( + DatetimeIndex, + Series, +) +import pandas._testing as tm + + +def test_to_native_types_method_deprecated(): + index = pd.date_range(freq="1D", periods=3, start="2017-01-01") + expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object) + + with tm.assert_produces_warning(FutureWarning): + result = index.to_native_types() + + tm.assert_numpy_array_equal(result, expected) + + # Make sure slicing works + expected = np.array(["2017-01-01", "2017-01-03"], dtype=object) + + with tm.assert_produces_warning(FutureWarning): + result = index.to_native_types([0, 2]) + + tm.assert_numpy_array_equal(result, expected) + + +def test_to_native_types(): + index = pd.date_range(freq="1D", periods=3, start="2017-01-01") + + # First, with no arguments. + expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object) + + result = index._format_native_types() + tm.assert_numpy_array_equal(result, expected) + + # No NaN values, so na_rep has no effect + result = index._format_native_types(na_rep="pandas") + tm.assert_numpy_array_equal(result, expected) + + # Make sure date formatting works + expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object) + + result = index._format_native_types(date_format="%m-%Y-%d") + tm.assert_numpy_array_equal(result, expected) + + # NULL object handling should work + index = DatetimeIndex(["2017-01-01", pd.NaT, "2017-01-03"]) + expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object) + + result = index._format_native_types() + tm.assert_numpy_array_equal(result, expected) + + expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object) + + result = index._format_native_types(na_rep="pandas") + tm.assert_numpy_array_equal(result, expected) + + +class TestDatetimeIndexRendering: + def test_dti_repr_short(self): + dr = pd.date_range(start="1/1/2012", periods=1) + repr(dr) + + dr = pd.date_range(start="1/1/2012", periods=2) + repr(dr) + + dr = pd.date_range(start="1/1/2012", periods=3) + repr(dr) + + @pytest.mark.parametrize("method", ["__repr__", "__str__"]) + def test_dti_representation(self, method): + idxs = [] + idxs.append(DatetimeIndex([], freq="D")) + idxs.append(DatetimeIndex(["2011-01-01"], freq="D")) + idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")) + idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")) + idxs.append( + DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], + freq="H", + tz="Asia/Tokyo", + ) + ) + idxs.append( + DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="US/Eastern" + ) + ) + idxs.append( + DatetimeIndex(["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="UTC") + ) + + exp = [] + exp.append("DatetimeIndex([], dtype='datetime64[ns]', freq='D')") + exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D')") + exp.append( + "DatetimeIndex(['2011-01-01', '2011-01-02'], " + "dtype='datetime64[ns]', freq='D')" + ) + exp.append( + "DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " + "dtype='datetime64[ns]', freq='D')" + ) + exp.append( + "DatetimeIndex(['2011-01-01 09:00:00+09:00', " + "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']" + ", dtype='datetime64[ns, Asia/Tokyo]', freq='H')" + ) + exp.append( + "DatetimeIndex(['2011-01-01 09:00:00-05:00', " + "'2011-01-01 10:00:00-05:00', 'NaT'], " + "dtype='datetime64[ns, US/Eastern]', freq=None)" + ) + exp.append( + "DatetimeIndex(['2011-01-01 09:00:00+00:00', " + "'2011-01-01 10:00:00+00:00', 'NaT'], " + "dtype='datetime64[ns, UTC]', freq=None)" + "" + ) + + with pd.option_context("display.width", 300): + for indx, expected in zip(idxs, exp): + result = getattr(indx, method)() + assert result == expected + + def test_dti_representation_to_series(self): + idx1 = DatetimeIndex([], freq="D") + idx2 = DatetimeIndex(["2011-01-01"], freq="D") + idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D") + idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D") + idx5 = DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], + freq="H", + tz="Asia/Tokyo", + ) + idx6 = DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="US/Eastern" + ) + idx7 = DatetimeIndex(["2011-01-01 09:00", "2011-01-02 10:15"]) + + exp1 = """Series([], dtype: datetime64[ns])""" + + exp2 = "0 2011-01-01\ndtype: datetime64[ns]" + + exp3 = "0 2011-01-01\n1 2011-01-02\ndtype: datetime64[ns]" + + exp4 = ( + "0 2011-01-01\n" + "1 2011-01-02\n" + "2 2011-01-03\n" + "dtype: datetime64[ns]" + ) + + exp5 = ( + "0 2011-01-01 09:00:00+09:00\n" + "1 2011-01-01 10:00:00+09:00\n" + "2 2011-01-01 11:00:00+09:00\n" + "dtype: datetime64[ns, Asia/Tokyo]" + ) + + exp6 = ( + "0 2011-01-01 09:00:00-05:00\n" + "1 2011-01-01 10:00:00-05:00\n" + "2 NaT\n" + "dtype: datetime64[ns, US/Eastern]" + ) + + exp7 = ( + "0 2011-01-01 09:00:00\n" + "1 2011-01-02 10:15:00\n" + "dtype: datetime64[ns]" + ) + + with pd.option_context("display.width", 300): + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5, idx6, idx7], + [exp1, exp2, exp3, exp4, exp5, exp6, exp7], + ): + result = repr(Series(idx)) + assert result == expected + + def test_dti_summary(self): + # GH#9116 + idx1 = DatetimeIndex([], freq="D") + idx2 = DatetimeIndex(["2011-01-01"], freq="D") + idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D") + idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D") + idx5 = DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], + freq="H", + tz="Asia/Tokyo", + ) + idx6 = DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="US/Eastern" + ) + + exp1 = "DatetimeIndex: 0 entries\nFreq: D" + + exp2 = "DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\nFreq: D" + + exp3 = "DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\nFreq: D" + + exp4 = "DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\nFreq: D" + + exp5 = ( + "DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 " + "to 2011-01-01 11:00:00+09:00\n" + "Freq: H" + ) + + exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT""" + + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5, idx6], [exp1, exp2, exp3, exp4, exp5, exp6] + ): + result = idx._summary() + assert result == expected + + def test_dti_business_repr(self): + # only really care that it works + repr(pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1))) + + def test_dti_business_summary(self): + rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1)) + rng._summary() + rng[2:2]._summary() + + def test_dti_business_summary_pytz(self): + pd.bdate_range("1/1/2005", "1/1/2009", tz=pytz.utc)._summary() + + def test_dti_business_summary_dateutil(self): + pd.bdate_range("1/1/2005", "1/1/2009", tz=dateutil.tz.tzutc())._summary() + + def test_dti_custom_business_repr(self): + # only really care that it works + repr(pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1), freq="C")) + + def test_dti_custom_business_summary(self): + rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1), freq="C") + rng._summary() + rng[2:2]._summary() + + def test_dti_custom_business_summary_pytz(self): + pd.bdate_range("1/1/2005", "1/1/2009", freq="C", tz=pytz.utc)._summary() + + def test_dti_custom_business_summary_dateutil(self): + pd.bdate_range( + "1/1/2005", "1/1/2009", freq="C", tz=dateutil.tz.tzutc() + )._summary() + + +class TestFormat: + def test_format_with_name_time_info(self): + # bug I fixed 12/20/2011 + dates = pd.date_range("2011-01-01 04:00:00", periods=10, name="something") + + formatted = dates.format(name=True) + assert formatted[0] == "something" + + def test_format_datetime_with_time(self): + dti = DatetimeIndex([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)]) + + result = dti.format() + expected = ["2012-02-07 00:00:00", "2012-02-07 23:00:00"] + assert len(result) == 2 + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_freq_attr.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_freq_attr.py new file mode 100644 index 0000000..f5821a3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_freq_attr.py @@ -0,0 +1,61 @@ +import pytest + +from pandas import ( + DatetimeIndex, + date_range, +) + +from pandas.tseries.offsets import ( + BDay, + DateOffset, + Day, + Hour, +) + + +class TestFreq: + def test_freq_setter_errors(self): + # GH#20678 + idx = DatetimeIndex(["20180101", "20180103", "20180105"]) + + # setting with an incompatible freq + msg = ( + "Inferred frequency 2D from passed values does not conform to " + "passed frequency 5D" + ) + with pytest.raises(ValueError, match=msg): + idx._data.freq = "5D" + + # setting with non-freq string + with pytest.raises(ValueError, match="Invalid frequency"): + idx._data.freq = "foo" + + @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) + @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + def test_freq_setter(self, values, freq, tz): + # GH#20678 + idx = DatetimeIndex(values, tz=tz) + + # can set to an offset, converting from string if necessary + idx._data.freq = freq + assert idx.freq == freq + assert isinstance(idx.freq, DateOffset) + + # can reset to None + idx._data.freq = None + assert idx.freq is None + + def test_freq_view_safe(self): + # Setting the freq for one DatetimeIndex shouldn't alter the freq + # for another that views the same data + + dti = date_range("2016-01-01", periods=5) + dta = dti._data + + dti2 = DatetimeIndex(dta)._with_freq(None) + assert dti2.freq is None + + # Original was not altered + assert dti.freq == "D" + assert dta.freq == "D" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_indexing.py new file mode 100644 index 0000000..b8f72a8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_indexing.py @@ -0,0 +1,779 @@ +from datetime import ( + date, + datetime, + time, + timedelta, +) + +import numpy as np +import pytest + +from pandas.errors import InvalidIndexError + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + Timestamp, + bdate_range, + date_range, + notna, +) +import pandas._testing as tm + +from pandas.tseries.frequencies import to_offset + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + +class TestGetItem: + def test_getitem_slice_keeps_name(self): + # GH4226 + st = Timestamp("2013-07-01 00:00:00", tz="America/Los_Angeles") + et = Timestamp("2013-07-02 00:00:00", tz="America/Los_Angeles") + dr = date_range(st, et, freq="H", name="timebucket") + assert dr[1:].name == dr.name + + def test_getitem(self): + idx1 = date_range("2011-01-01", "2011-01-31", freq="D", name="idx") + idx2 = date_range( + "2011-01-01", "2011-01-31", freq="D", tz="Asia/Tokyo", name="idx" + ) + + for idx in [idx1, idx2]: + result = idx[0] + assert result == Timestamp("2011-01-01", tz=idx.tz) + + result = idx[0:5] + expected = date_range( + "2011-01-01", "2011-01-05", freq="D", tz=idx.tz, name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[0:10:2] + expected = date_range( + "2011-01-01", "2011-01-09", freq="2D", tz=idx.tz, name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[-20:-5:3] + expected = date_range( + "2011-01-12", "2011-01-24", freq="3D", tz=idx.tz, name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[4::-1] + expected = DatetimeIndex( + ["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"], + freq="-1D", + tz=idx.tz, + name="idx", + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_dti_business_getitem(self, freq): + rng = bdate_range(START, END, freq=freq) + smaller = rng[:5] + exp = DatetimeIndex(rng.view(np.ndarray)[:5], freq=freq) + tm.assert_index_equal(smaller, exp) + assert smaller.freq == exp.freq + assert smaller.freq == rng.freq + + sliced = rng[::5] + assert sliced.freq == to_offset(freq) * 5 + + fancy_indexed = rng[[4, 3, 2, 1, 0]] + assert len(fancy_indexed) == 5 + assert isinstance(fancy_indexed, DatetimeIndex) + assert fancy_indexed.freq is None + + # 32-bit vs. 64-bit platforms + assert rng[4] == rng[np.int_(4)] + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_dti_business_getitem_matplotlib_hackaround(self, freq): + rng = bdate_range(START, END, freq=freq) + with tm.assert_produces_warning(FutureWarning): + # GH#30588 multi-dimensional indexing deprecated + values = rng[:, None] + expected = rng.values[:, None] + tm.assert_numpy_array_equal(values, expected) + + def test_getitem_int_list(self): + dti = date_range(start="1/1/2005", end="12/1/2005", freq="M") + dti2 = dti[[1, 3, 5]] + + v1 = dti2[0] + v2 = dti2[1] + v3 = dti2[2] + + assert v1 == Timestamp("2/28/2005") + assert v2 == Timestamp("4/30/2005") + assert v3 == Timestamp("6/30/2005") + + # getitem with non-slice drops freq + assert dti2.freq is None + + +class TestWhere: + def test_where_doesnt_retain_freq(self): + dti = date_range("20130101", periods=3, freq="D", name="idx") + cond = [True, True, False] + expected = DatetimeIndex([dti[0], dti[1], dti[0]], freq=None, name="idx") + + result = dti.where(cond, dti[::-1]) + tm.assert_index_equal(result, expected) + + def test_where_other(self): + # other is ndarray or Index + i = date_range("20130101", periods=3, tz="US/Eastern") + + for arr in [np.nan, pd.NaT]: + result = i.where(notna(i), other=arr) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) + result = i.where(notna(i2), i2) + tm.assert_index_equal(result, i2) + + i2 = i.copy() + i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) + result = i.where(notna(i2), i2._values) + tm.assert_index_equal(result, i2) + + def test_where_invalid_dtypes(self): + dti = date_range("20130101", periods=3, tz="US/Eastern") + + tail = dti[2:].tolist() + i2 = Index([pd.NaT, pd.NaT] + tail) + + mask = notna(i2) + + # passing tz-naive ndarray to tzaware DTI + result = dti.where(mask, i2.values) + expected = Index([pd.NaT.asm8, pd.NaT.asm8] + tail, dtype=object) + tm.assert_index_equal(result, expected) + + # passing tz-aware DTI to tznaive DTI + naive = dti.tz_localize(None) + result = naive.where(mask, i2) + expected = Index([i2[0], i2[1]] + naive[2:].tolist(), dtype=object) + tm.assert_index_equal(result, expected) + + pi = i2.tz_localize(None).to_period("D") + result = dti.where(mask, pi) + expected = Index([pi[0], pi[1]] + tail, dtype=object) + tm.assert_index_equal(result, expected) + + tda = i2.asi8.view("timedelta64[ns]") + result = dti.where(mask, tda) + expected = Index([tda[0], tda[1]] + tail, dtype=object) + assert isinstance(expected[0], np.timedelta64) + tm.assert_index_equal(result, expected) + + result = dti.where(mask, i2.asi8) + expected = Index([pd.NaT.value, pd.NaT.value] + tail, dtype=object) + assert isinstance(expected[0], int) + tm.assert_index_equal(result, expected) + + # non-matching scalar + td = pd.Timedelta(days=4) + result = dti.where(mask, td) + expected = Index([td, td] + tail, dtype=object) + assert expected[0] is td + tm.assert_index_equal(result, expected) + + def test_where_mismatched_nat(self, tz_aware_fixture): + tz = tz_aware_fixture + dti = date_range("2013-01-01", periods=3, tz=tz) + cond = np.array([True, False, True]) + + tdnat = np.timedelta64("NaT", "ns") + expected = Index([dti[0], tdnat, dti[2]], dtype=object) + assert expected[1] is tdnat + + result = dti.where(cond, tdnat) + tm.assert_index_equal(result, expected) + + def test_where_tz(self): + i = date_range("20130101", periods=3, tz="US/Eastern") + result = i.where(notna(i)) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) + result = i.where(notna(i2)) + expected = i2 + tm.assert_index_equal(result, expected) + + +class TestTake: + def test_take_nan_first_datetime(self): + index = DatetimeIndex([pd.NaT, Timestamp("20130101"), Timestamp("20130102")]) + result = index.take([-1, 0, 1]) + expected = DatetimeIndex([index[-1], index[0], index[1]]) + tm.assert_index_equal(result, expected) + + def test_take(self): + # GH#10295 + idx1 = date_range("2011-01-01", "2011-01-31", freq="D", name="idx") + idx2 = date_range( + "2011-01-01", "2011-01-31", freq="D", tz="Asia/Tokyo", name="idx" + ) + + for idx in [idx1, idx2]: + result = idx.take([0]) + assert result == Timestamp("2011-01-01", tz=idx.tz) + + result = idx.take([0, 1, 2]) + expected = date_range( + "2011-01-01", "2011-01-03", freq="D", tz=idx.tz, name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([0, 2, 4]) + expected = date_range( + "2011-01-01", "2011-01-05", freq="2D", tz=idx.tz, name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([7, 4, 1]) + expected = date_range( + "2011-01-08", "2011-01-02", freq="-3D", tz=idx.tz, name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([3, 2, 5]) + expected = DatetimeIndex( + ["2011-01-04", "2011-01-03", "2011-01-06"], + freq=None, + tz=idx.tz, + name="idx", + ) + tm.assert_index_equal(result, expected) + assert result.freq is None + + result = idx.take([-3, 2, 5]) + expected = DatetimeIndex( + ["2011-01-29", "2011-01-03", "2011-01-06"], + freq=None, + tz=idx.tz, + name="idx", + ) + tm.assert_index_equal(result, expected) + assert result.freq is None + + def test_take_invalid_kwargs(self): + idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx") + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + # TODO: This method came from test_datetime; de-dup with version above + @pytest.mark.parametrize("tz", [None, "US/Eastern", "Asia/Tokyo"]) + def test_take2(self, tz): + dates = [ + datetime(2010, 1, 1, 14), + datetime(2010, 1, 1, 15), + datetime(2010, 1, 1, 17), + datetime(2010, 1, 1, 21), + ] + + idx = date_range( + start="2010-01-01 09:00", + end="2010-02-01 09:00", + freq="H", + tz=tz, + name="idx", + ) + expected = DatetimeIndex(dates, freq=None, name="idx", tz=tz) + + taken1 = idx.take([5, 6, 8, 12]) + taken2 = idx[[5, 6, 8, 12]] + + for taken in [taken1, taken2]: + tm.assert_index_equal(taken, expected) + assert isinstance(taken, DatetimeIndex) + assert taken.freq is None + assert taken.tz == expected.tz + assert taken.name == expected.name + + def test_take_fill_value(self): + # GH#12631 + idx = DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx") + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx") + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "out of bounds" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + def test_take_fill_value_with_timezone(self): + idx = DatetimeIndex( + ["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", tz="US/Eastern" + ) + result = idx.take(np.array([1, 0, -1])) + expected = DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern" + ) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = DatetimeIndex( + ["2011-02-01", "2011-01-01", "NaT"], name="xxx", tz="US/Eastern" + ) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern" + ) + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "out of bounds" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + +class TestGetLoc: + @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) + @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") + def test_get_loc_method_exact_match(self, method): + idx = date_range("2000-01-01", periods=3) + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + if method is not None: + assert idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1 + + @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") + def test_get_loc(self): + idx = date_range("2000-01-01", periods=3) + + assert idx.get_loc("2000-01-01", method="nearest") == 0 + assert idx.get_loc("2000-01-01T12", method="nearest") == 1 + + assert idx.get_loc("2000-01-01T12", method="nearest", tolerance="1 day") == 1 + assert ( + idx.get_loc("2000-01-01T12", method="nearest", tolerance=pd.Timedelta("1D")) + == 1 + ) + assert ( + idx.get_loc( + "2000-01-01T12", method="nearest", tolerance=np.timedelta64(1, "D") + ) + == 1 + ) + assert ( + idx.get_loc("2000-01-01T12", method="nearest", tolerance=timedelta(1)) == 1 + ) + with pytest.raises(ValueError, match="unit abbreviation w/o a number"): + idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo") + with pytest.raises(KeyError, match="'2000-01-01T03'"): + idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours") + with pytest.raises( + ValueError, match="tolerance size must match target index size" + ): + idx.get_loc( + "2000-01-01", + method="nearest", + tolerance=[ + pd.Timedelta("1day").to_timedelta64(), + pd.Timedelta("1day").to_timedelta64(), + ], + ) + + assert idx.get_loc("2000", method="nearest") == slice(0, 3) + assert idx.get_loc("2000-01", method="nearest") == slice(0, 3) + + assert idx.get_loc("1999", method="nearest") == 0 + assert idx.get_loc("2001", method="nearest") == 2 + + with pytest.raises(KeyError, match="'1999'"): + idx.get_loc("1999", method="pad") + with pytest.raises(KeyError, match="'2001'"): + idx.get_loc("2001", method="backfill") + + with pytest.raises(KeyError, match="'foobar'"): + idx.get_loc("foobar") + with pytest.raises(InvalidIndexError, match=r"slice\(None, 2, None\)"): + idx.get_loc(slice(2)) + + idx = DatetimeIndex(["2000-01-01", "2000-01-04"]) + assert idx.get_loc("2000-01-02", method="nearest") == 0 + assert idx.get_loc("2000-01-03", method="nearest") == 1 + assert idx.get_loc("2000-01", method="nearest") == slice(0, 2) + + def test_get_loc_time_obj(self): + # time indexing + idx = date_range("2000-01-01", periods=24, freq="H") + + result = idx.get_loc(time(12)) + expected = np.array([12]) + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + result = idx.get_loc(time(12, 30)) + expected = np.array([]) + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + msg = "cannot yet lookup inexact labels when key is a time object" + with pytest.raises(NotImplementedError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + idx.get_loc(time(12, 30), method="pad") + + def test_get_loc_time_obj2(self): + # GH#8667 + + from pandas._libs.index import _SIZE_CUTOFF + + ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) + key = time(15, 11, 30) + start = key.hour * 3600 + key.minute * 60 + key.second + step = 24 * 3600 + + for n in ns: + idx = date_range("2014-11-26", periods=n, freq="S") + ts = pd.Series(np.random.randn(n), index=idx) + locs = np.arange(start, n, step, dtype=np.intp) + + result = ts.index.get_loc(key) + tm.assert_numpy_array_equal(result, locs) + tm.assert_series_equal(ts[key], ts.iloc[locs]) + + left, right = ts.copy(), ts.copy() + left[key] *= -10 + right.iloc[locs] *= -10 + tm.assert_series_equal(left, right) + + def test_get_loc_time_nat(self): + # GH#35114 + # Case where key's total microseconds happens to match iNaT % 1e6 // 1000 + tic = time(minute=12, second=43, microsecond=145224) + dti = DatetimeIndex([pd.NaT]) + + loc = dti.get_loc(tic) + expected = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(loc, expected) + + def test_get_loc_tz_aware(self): + # https://github.com/pandas-dev/pandas/issues/32140 + dti = date_range( + Timestamp("2019-12-12 00:00:00", tz="US/Eastern"), + Timestamp("2019-12-13 00:00:00", tz="US/Eastern"), + freq="5s", + ) + key = Timestamp("2019-12-12 10:19:25", tz="US/Eastern") + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + result = dti.get_loc(key, method="nearest") + assert result == 7433 + + def test_get_loc_nat(self): + # GH#20464 + index = DatetimeIndex(["1/3/2000", "NaT"]) + assert index.get_loc(pd.NaT) == 1 + + assert index.get_loc(None) == 1 + + assert index.get_loc(np.nan) == 1 + + assert index.get_loc(pd.NA) == 1 + + assert index.get_loc(np.datetime64("NaT")) == 1 + + with pytest.raises(KeyError, match="NaT"): + index.get_loc(np.timedelta64("NaT")) + + @pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)]) + def test_get_loc_timedelta_invalid_key(self, key): + # GH#20464 + dti = date_range("1970-01-01", periods=10) + msg = "Cannot index DatetimeIndex with [Tt]imedelta" + with pytest.raises(TypeError, match=msg): + dti.get_loc(key) + + def test_get_loc_reasonable_key_error(self): + # GH#1062 + index = DatetimeIndex(["1/3/2000"]) + with pytest.raises(KeyError, match="2000"): + index.get_loc("1/1/2000") + + def test_get_loc_year_str(self): + rng = date_range("1/1/2000", "1/1/2010") + + result = rng.get_loc("2009") + expected = slice(3288, 3653) + assert result == expected + + +class TestContains: + def test_dti_contains_with_duplicates(self): + d = datetime(2011, 12, 5, 20, 30) + ix = DatetimeIndex([d, d]) + assert d in ix + + @pytest.mark.parametrize( + "vals", + [ + [0, 1, 0], + [0, 0, -1], + [0, -1, -1], + ["2015", "2015", "2016"], + ["2015", "2015", "2014"], + ], + ) + def test_contains_nonunique(self, vals): + # GH#9512 + idx = DatetimeIndex(vals) + assert idx[0] in idx + + +class TestGetIndexer: + def test_get_indexer_date_objs(self): + rng = date_range("1/1/2000", periods=20) + + result = rng.get_indexer(rng.map(lambda x: x.date())) + expected = rng.get_indexer(rng) + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer(self): + idx = date_range("2000-01-01", periods=3) + exp = np.array([0, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(idx.get_indexer(idx), exp) + + target = idx[0] + pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 hour")), + np.array([0, -1, 1], dtype=np.intp), + ) + tol_raw = [ + pd.Timedelta("1 hour"), + pd.Timedelta("1 hour"), + pd.Timedelta("1 hour").to_timedelta64(), + ] + tm.assert_numpy_array_equal( + idx.get_indexer( + target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw] + ), + np.array([0, -1, 1], dtype=np.intp), + ) + tol_bad = [ + pd.Timedelta("2 hour").to_timedelta64(), + pd.Timedelta("1 hour").to_timedelta64(), + "foo", + ] + msg = "Could not convert 'foo' to NumPy timedelta" + with pytest.raises(ValueError, match=msg): + idx.get_indexer(target, "nearest", tolerance=tol_bad) + with pytest.raises(ValueError, match="abbreviation w/o a number"): + idx.get_indexer(idx[[0]], method="nearest", tolerance="foo") + + @pytest.mark.parametrize( + "target", + [ + [date(2020, 1, 1), Timestamp("2020-01-02")], + [Timestamp("2020-01-01"), date(2020, 1, 2)], + ], + ) + def test_get_indexer_mixed_dtypes(self, target): + # https://github.com/pandas-dev/pandas/issues/33741 + values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")]) + result = values.get_indexer(target) + expected = np.array([0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "target, positions", + [ + ([date(9999, 1, 1), Timestamp("2020-01-01")], [-1, 0]), + ([Timestamp("2020-01-01"), date(9999, 1, 1)], [0, -1]), + ([date(9999, 1, 1), date(9999, 1, 1)], [-1, -1]), + ], + ) + @pytest.mark.filterwarnings("ignore:Comparison of Timestamp.*:FutureWarning") + def test_get_indexer_out_of_bounds_date(self, target, positions): + values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")]) + + result = values.get_indexer(target) + expected = np.array(positions, dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_pad_requires_monotonicity(self): + rng = date_range("1/1/2000", "3/1/2000", freq="B") + + # neither monotonic increasing or decreasing + rng2 = rng[[1, 0, 2]] + + msg = "index must be monotonic increasing or decreasing" + with pytest.raises(ValueError, match=msg): + rng2.get_indexer(rng, method="pad") + + +class TestMaybeCastSliceBound: + def test_maybe_cast_slice_bounds_empty(self): + # GH#14354 + empty_idx = date_range(freq="1H", periods=0, end="2015") + + right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right") + exp = Timestamp("2015-01-02 23:59:59.999999999") + assert right == exp + + left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left") + exp = Timestamp("2015-01-02 00:00:00") + assert left == exp + + def test_maybe_cast_slice_duplicate_monotonic(self): + # https://github.com/pandas-dev/pandas/issues/16515 + idx = DatetimeIndex(["2017", "2017"]) + result = idx._maybe_cast_slice_bound("2017-01-01", "left") + expected = Timestamp("2017-01-01") + assert result == expected + + +class TestGetValue: + def test_get_value(self): + # specifically make sure we have test for np.datetime64 key + dti = date_range("2016-01-01", periods=3) + + arr = np.arange(6, 9) + ser = pd.Series(arr, index=dti) + + key = dti[1] + + with pytest.raises(AttributeError, match="has no attribute '_values'"): + with tm.assert_produces_warning(FutureWarning): + dti.get_value(arr, key) + + with tm.assert_produces_warning(FutureWarning): + result = dti.get_value(ser, key) + assert result == 7 + + with tm.assert_produces_warning(FutureWarning): + result = dti.get_value(ser, key.to_pydatetime()) + assert result == 7 + + with tm.assert_produces_warning(FutureWarning): + result = dti.get_value(ser, key.to_datetime64()) + assert result == 7 + + +class TestGetSliceBounds: + @pytest.mark.parametrize("box", [date, datetime, Timestamp]) + @pytest.mark.parametrize("kind", ["getitem", "loc", None]) + @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)]) + def test_get_slice_bounds_datetime_within( + self, box, kind, side, expected, tz_aware_fixture + ): + # GH 35690 + tz = tz_aware_fixture + index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz) + key = box(year=2000, month=1, day=7) + + warn = None if tz is None else FutureWarning + with tm.assert_produces_warning(warn): + # GH#36148 will require tzawareness-compat + result = index.get_slice_bound(key, kind=kind, side=side) + assert result == expected + + @pytest.mark.parametrize("box", [datetime, Timestamp]) + @pytest.mark.parametrize("kind", ["getitem", "loc", None]) + @pytest.mark.parametrize("side", ["left", "right"]) + @pytest.mark.parametrize("year, expected", [(1999, 0), (2020, 30)]) + def test_get_slice_bounds_datetime_outside( + self, box, kind, side, year, expected, tz_aware_fixture + ): + # GH 35690 + tz = tz_aware_fixture + index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz) + key = box(year=year, month=1, day=7) + + warn = None if tz is None else FutureWarning + with tm.assert_produces_warning(warn): + # GH#36148 will require tzawareness-compat + result = index.get_slice_bound(key, kind=kind, side=side) + assert result == expected + + @pytest.mark.parametrize("box", [datetime, Timestamp]) + @pytest.mark.parametrize("kind", ["getitem", "loc", None]) + def test_slice_datetime_locs(self, box, kind, tz_aware_fixture): + # GH 34077 + tz = tz_aware_fixture + index = DatetimeIndex(["2010-01-01", "2010-01-03"]).tz_localize(tz) + key = box(2010, 1, 1) + + warn = None if tz is None else FutureWarning + with tm.assert_produces_warning(warn): + # GH#36148 will require tzawareness-compat + result = index.slice_locs(key, box(2010, 1, 2)) + expected = (0, 1) + assert result == expected + + +class TestIndexerBetweenTime: + def test_indexer_between_time(self): + # GH#11818 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" + with pytest.raises(ValueError, match=msg): + rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_join.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_join.py new file mode 100644 index 0000000..8b633e8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_join.py @@ -0,0 +1,152 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + Index, + Timestamp, + date_range, + to_datetime, +) +import pandas._testing as tm + +from pandas.tseries.offsets import ( + BDay, + BMonthEnd, +) + + +class TestJoin: + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args, **kwargs: np.random.randn(), + r_idx_type="i", + c_idx_type="dt", + ) + cols = df.columns.join(df.index, how="outer") + joined = cols.join(df.columns) + assert cols.dtype == np.dtype("O") + assert cols.dtype == joined.dtype + tm.assert_numpy_array_equal(cols.values, joined.values) + + def test_join_self(self, join_type): + index = date_range("1/1/2000", periods=10) + joined = index.join(index, how=join_type) + assert index is joined + + def test_join_with_period_index(self, join_type): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args: np.random.randint(2), + c_idx_type="p", + r_idx_type="dt", + ) + s = df.iloc[:5, 0] + + expected = df.columns.astype("O").join(s.index, how=join_type) + result = df.columns.join(s.index, how=join_type) + tm.assert_index_equal(expected, result) + + def test_join_object_index(self): + rng = date_range("1/1/2000", periods=10) + idx = Index(["a", "b", "c", "d"]) + + result = rng.join(idx, how="outer") + assert isinstance(result[0], Timestamp) + + def test_join_utc_convert(self, join_type): + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + + left = rng.tz_convert("US/Eastern") + right = rng.tz_convert("Europe/Berlin") + + result = left.join(left[:-5], how=join_type) + assert isinstance(result, DatetimeIndex) + assert result.tz == left.tz + + result = left.join(right[:-5], how=join_type) + assert isinstance(result, DatetimeIndex) + assert result.tz.zone == "UTC" + + def test_datetimeindex_union_join_empty(self, sort): + dti = date_range(start="1/1/2001", end="2/1/2001", freq="D") + empty = Index([]) + + result = dti.union(empty, sort=sort) + expected = dti.astype("O") + tm.assert_index_equal(result, expected) + + result = dti.join(empty) + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, dti) + + def test_join_nonunique(self): + idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"]) + idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"]) + rs = idx1.join(idx2, how="outer") + assert rs.is_monotonic + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_outer_join(self, freq): + # should just behave as union + start, end = datetime(2009, 1, 1), datetime(2010, 1, 1) + rng = date_range(start=start, end=end, freq=freq) + + # overlapping + left = rng[:10] + right = rng[5:10] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = rng[:5] + right = rng[10:] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None + + # non-overlapping, no gap + left = rng[:5] + right = rng[5:10] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + + # overlapping, but different offset + other = date_range(start, end, freq=BMonthEnd()) + + the_join = rng.join(other, how="outer") + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None + + def test_naive_aware_conflicts(self): + start, end = datetime(2009, 1, 1), datetime(2010, 1, 1) + naive = date_range(start, end, freq=BDay(), tz=None) + aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong") + + msg = "tz-naive.*tz-aware" + with pytest.raises(TypeError, match=msg): + naive.join(aware) + + with pytest.raises(TypeError, match=msg): + aware.join(naive) + + @pytest.mark.parametrize("tz", [None, "US/Pacific"]) + def test_join_preserves_freq(self, tz): + # GH#32157 + dti = date_range("2016-01-01", periods=10, tz=tz) + result = dti[:5].join(dti[5:], how="outer") + assert result.freq == dti.freq + tm.assert_index_equal(result, dti) + + result = dti[:5].join(dti[6:], how="outer") + assert result.freq is None + expected = dti.delete(5) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_map.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_map.py new file mode 100644 index 0000000..45698ef --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_map.py @@ -0,0 +1,47 @@ +import pytest + +from pandas import ( + DatetimeIndex, + Index, + MultiIndex, + Period, + date_range, +) +import pandas._testing as tm + + +class TestMap: + def test_map(self): + rng = date_range("1/1/2000", periods=10) + + f = lambda x: x.strftime("%Y%m%d") + result = rng.map(f) + exp = Index([f(x) for x in rng], dtype=" return Index + for accessor in DatetimeArray._field_ops: + if accessor in ["week", "weekofyear"]: + # GH#33595 Deprecate week and weekofyear + continue + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, Index) + assert res.name == "name" + + # boolean accessors -> return array + for accessor in DatetimeArray._bool_ops: + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, np.ndarray) + + # test boolean indexing + res = dti[dti.is_quarter_start] + exp = dti[[0, 90, 181, 273]] + tm.assert_index_equal(res, exp) + res = dti[dti.is_leap_year] + exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name") + tm.assert_index_equal(res, exp) + + def test_datetimeindex_accessors2(self): + dti = date_range(freq="BQ-FEB", start=datetime(1998, 1, 1), periods=4) + + assert sum(dti.is_quarter_start) == 0 + assert sum(dti.is_quarter_end) == 4 + assert sum(dti.is_year_start) == 0 + assert sum(dti.is_year_end) == 1 + + def test_datetimeindex_accessors3(self): + # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, + bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu") + dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) + msg = "Custom business days is not supported by is_month_start" + with pytest.raises(ValueError, match=msg): + dti.is_month_start + + def test_datetimeindex_accessors4(self): + dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"]) + + assert dti.is_month_start[0] == 1 + + def test_datetimeindex_accessors5(self): + with tm.assert_produces_warning(FutureWarning, match="The 'freq' argument"): + tests = [ + (Timestamp("2013-06-01", freq="M").is_month_start, 1), + (Timestamp("2013-06-01", freq="BM").is_month_start, 0), + (Timestamp("2013-06-03", freq="M").is_month_start, 0), + (Timestamp("2013-06-03", freq="BM").is_month_start, 1), + (Timestamp("2013-02-28", freq="Q-FEB").is_month_end, 1), + (Timestamp("2013-02-28", freq="Q-FEB").is_quarter_end, 1), + (Timestamp("2013-02-28", freq="Q-FEB").is_year_end, 1), + (Timestamp("2013-03-01", freq="Q-FEB").is_month_start, 1), + (Timestamp("2013-03-01", freq="Q-FEB").is_quarter_start, 1), + (Timestamp("2013-03-01", freq="Q-FEB").is_year_start, 1), + (Timestamp("2013-03-31", freq="QS-FEB").is_month_end, 1), + (Timestamp("2013-03-31", freq="QS-FEB").is_quarter_end, 0), + (Timestamp("2013-03-31", freq="QS-FEB").is_year_end, 0), + (Timestamp("2013-02-01", freq="QS-FEB").is_month_start, 1), + (Timestamp("2013-02-01", freq="QS-FEB").is_quarter_start, 1), + (Timestamp("2013-02-01", freq="QS-FEB").is_year_start, 1), + (Timestamp("2013-06-30", freq="BQ").is_month_end, 0), + (Timestamp("2013-06-30", freq="BQ").is_quarter_end, 0), + (Timestamp("2013-06-30", freq="BQ").is_year_end, 0), + (Timestamp("2013-06-28", freq="BQ").is_month_end, 1), + (Timestamp("2013-06-28", freq="BQ").is_quarter_end, 1), + (Timestamp("2013-06-28", freq="BQ").is_year_end, 0), + (Timestamp("2013-06-30", freq="BQS-APR").is_month_end, 0), + (Timestamp("2013-06-30", freq="BQS-APR").is_quarter_end, 0), + (Timestamp("2013-06-30", freq="BQS-APR").is_year_end, 0), + (Timestamp("2013-06-28", freq="BQS-APR").is_month_end, 1), + (Timestamp("2013-06-28", freq="BQS-APR").is_quarter_end, 1), + (Timestamp("2013-03-29", freq="BQS-APR").is_year_end, 1), + (Timestamp("2013-11-01", freq="AS-NOV").is_year_start, 1), + (Timestamp("2013-10-31", freq="AS-NOV").is_year_end, 1), + (Timestamp("2012-02-01").days_in_month, 29), + (Timestamp("2013-02-01").days_in_month, 28), + ] + + for ts, value in tests: + assert ts == value + + def test_datetimeindex_accessors6(self): + # GH 6538: Check that DatetimeIndex and its TimeStamp elements + # return the same weekofyear accessor close to new year w/ tz + dates = ["2013/12/29", "2013/12/30", "2013/12/31"] + dates = DatetimeIndex(dates, tz="Europe/Brussels") + expected = [52, 1, 1] + assert dates.isocalendar().week.tolist() == expected + assert [d.weekofyear for d in dates] == expected + + # GH 12806 + # error: Unsupported operand types for + ("List[None]" and "List[str]") + @pytest.mark.parametrize( + "time_locale", [None] + (tm.get_locales() or []) # type: ignore[operator] + ) + def test_datetime_name_accessors(self, time_locale): + # Test Monday -> Sunday and January -> December, in that sequence + if time_locale is None: + # If the time_locale is None, day-name and month_name should + # return the english attributes + expected_days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + expected_months = [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", + ] + else: + with tm.set_locale(time_locale, locale.LC_TIME): + expected_days = calendar.day_name[:] + expected_months = calendar.month_name[1:] + + # GH#11128 + dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365) + english_days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + for day, name, eng_name in zip(range(4, 11), expected_days, english_days): + name = name.capitalize() + assert dti.day_name(locale=time_locale)[day] == name + assert dti.day_name(locale=None)[day] == eng_name + ts = Timestamp(datetime(2016, 4, day)) + assert ts.day_name(locale=time_locale) == name + dti = dti.append(DatetimeIndex([pd.NaT])) + assert np.isnan(dti.day_name(locale=time_locale)[-1]) + ts = Timestamp(pd.NaT) + assert np.isnan(ts.day_name(locale=time_locale)) + + # GH#12805 + dti = date_range(freq="M", start="2012", end="2013") + result = dti.month_name(locale=time_locale) + expected = Index([month.capitalize() for month in expected_months]) + + # work around different normalization schemes + # https://github.com/pandas-dev/pandas/issues/22342 + result = result.str.normalize("NFD") + expected = expected.str.normalize("NFD") + + tm.assert_index_equal(result, expected) + + for date, expected in zip(dti, expected_months): + result = date.month_name(locale=time_locale) + expected = expected.capitalize() + + result = unicodedata.normalize("NFD", result) + expected = unicodedata.normalize("NFD", result) + + assert result == expected + dti = dti.append(DatetimeIndex([pd.NaT])) + assert np.isnan(dti.month_name(locale=time_locale)[-1]) + + def test_nanosecond_field(self): + dti = DatetimeIndex(np.arange(10)) + + tm.assert_index_equal(dti.nanosecond, Index(np.arange(10, dtype=np.int64))) + + +def test_iter_readonly(): + # GH#28055 ints_to_pydatetime with readonly array + arr = np.array([np.datetime64("2012-02-15T12:00:00.000000000")]) + arr.setflags(write=False) + dti = pd.to_datetime(arr) + list(dti) + + +def test_week_and_weekofyear_are_deprecated(): + # GH#33595 Deprecate week and weekofyear + idx = date_range(start="2019-12-29", freq="D", periods=4) + with tm.assert_produces_warning(FutureWarning): + idx.week + with tm.assert_produces_warning(FutureWarning): + idx.weekofyear + + +def test_add_timedelta_preserves_freq(): + # GH#37295 should hold for any DTI with freq=None or Tick freq + tz = "Canada/Eastern" + dti = date_range( + start=Timestamp("2019-03-26 00:00:00-0400", tz=tz), + end=Timestamp("2020-10-17 00:00:00-0400", tz=tz), + freq="D", + ) + result = dti + Timedelta(days=1) + assert result.freq == dti.freq diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_npfuncs.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_npfuncs.py new file mode 100644 index 0000000..301466c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_npfuncs.py @@ -0,0 +1,13 @@ +import numpy as np + +from pandas import date_range +import pandas._testing as tm + + +class TestSplit: + def test_split_non_utc(self): + # GH#14042 + indices = date_range("2016-01-01 00:00:00+0200", freq="S", periods=10) + result = np.split(indices, indices_or_sections=[])[0] + expected = indices._with_freq(None) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_ops.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_ops.py new file mode 100644 index 0000000..d6ef419 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_ops.py @@ -0,0 +1,85 @@ +from datetime import datetime + +from dateutil.tz import tzlocal +import pytest + +from pandas.compat import IS64 + +from pandas import ( + DatetimeIndex, + Index, + bdate_range, + date_range, +) +import pandas._testing as tm + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + +class TestDatetimeIndexOps: + @pytest.mark.parametrize( + "freq,expected", + [ + ("A", "day"), + ("Q", "day"), + ("M", "day"), + ("D", "day"), + ("H", "hour"), + ("T", "minute"), + ("S", "second"), + ("L", "millisecond"), + ("U", "microsecond"), + ], + ) + def test_resolution(self, request, tz_naive_fixture, freq, expected): + tz = tz_naive_fixture + if freq == "A" and not IS64 and isinstance(tz, tzlocal): + request.node.add_marker( + pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038") + ) + + idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) + assert idx.resolution == expected + + def test_infer_freq(self, freq_sample): + # GH 11018 + idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) + result = DatetimeIndex(idx.asi8, freq="infer") + tm.assert_index_equal(idx, result) + assert result.freq == freq_sample + + +@pytest.mark.parametrize("freq", ["B", "C"]) +class TestBusinessDatetimeIndex: + @pytest.fixture + def rng(self, freq): + return bdate_range(START, END, freq=freq) + + def test_comparison(self, rng): + d = rng[10] + + comp = rng > d + assert comp[11] + assert not comp[9] + + def test_copy(self, rng): + cp = rng.copy() + repr(cp) + tm.assert_index_equal(cp, rng) + + def test_identical(self, rng): + t1 = rng.copy() + t2 = rng.copy() + assert t1.identical(t2) + + # name + t1 = t1.rename("foo") + assert t1.equals(t2) + assert not t1.identical(t2) + t2 = t2.rename("foo") + assert t1.identical(t2) + + # freq + t2v = Index(t2.values) + assert t1.equals(t2v) + assert not t1.identical(t2v) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_partial_slicing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_partial_slicing.py new file mode 100644 index 0000000..2f32f9e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -0,0 +1,459 @@ +""" test partial slicing on Series/Frame """ + +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Series, + Timedelta, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestSlicing: + def test_string_index_series_name_converted(self): + # GH#1644 + df = DataFrame(np.random.randn(10, 4), index=date_range("1/1/2000", periods=10)) + + result = df.loc["1/3/2000"] + assert result.name == df.index[2] + + result = df.T["1/3/2000"] + assert result.name == df.index[2] + + def test_stringified_slice_with_tz(self): + # GH#2658 + start = "2013-01-07" + idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern") + df = DataFrame(np.arange(10), index=idx) + df["2013-01-14 23:44:34.437768-05:00":] # no exception here + + def test_return_type_doesnt_depend_on_monotonicity(self): + # GH#24892 we get Series back regardless of whether our DTI is monotonic + dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3) + ser = Series(range(3), index=dti) + + # non-monotonic index + ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]]) + + # key with resolution strictly lower than "min" + key = "2015-5-14 00" + + # monotonic increasing index + result = ser.loc[key] + expected = ser.iloc[1:] + tm.assert_series_equal(result, expected) + + # monotonic decreasing index + result = ser.iloc[::-1].loc[key] + expected = ser.iloc[::-1][:-1] + tm.assert_series_equal(result, expected) + + # non-monotonic index + result2 = ser2.loc[key] + expected2 = ser2.iloc[::2] + tm.assert_series_equal(result2, expected2) + + def test_return_type_doesnt_depend_on_monotonicity_higher_reso(self): + # GH#24892 we get Series back regardless of whether our DTI is monotonic + dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3) + ser = Series(range(3), index=dti) + + # non-monotonic index + ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]]) + + # key with resolution strictly *higher) than "min" + key = "2015-5-14 00:00:00" + + # monotonic increasing index + result = ser.loc[key] + assert result == 1 + + # monotonic decreasing index + result = ser.iloc[::-1].loc[key] + assert result == 1 + + # non-monotonic index + result2 = ser2.loc[key] + assert result2 == 0 + + def test_monotone_DTI_indexing_bug(self): + # GH 19362 + # Testing accessing the first element in a monotonic descending + # partial string indexing. + + df = DataFrame(list(range(5))) + date_list = [ + "2018-01-02", + "2017-02-10", + "2016-03-10", + "2015-03-15", + "2014-03-16", + ] + date_index = DatetimeIndex(date_list) + df["date"] = date_index + expected = DataFrame({0: list(range(5)), "date": date_index}) + tm.assert_frame_equal(df, expected) + + # We get a slice because df.index's resolution is hourly and we + # are slicing with a daily-resolution string. If both were daily, + # we would get a single item back + dti = date_range("20170101 01:00:00", periods=3) + df = DataFrame({"A": [1, 2, 3]}, index=dti[::-1]) + + expected = DataFrame({"A": 1}, index=dti[-1:][::-1]) + result = df.loc["2017-01-03"] + tm.assert_frame_equal(result, expected) + + result2 = df.iloc[::-1].loc["2017-01-03"] + expected2 = expected.iloc[::-1] + tm.assert_frame_equal(result2, expected2) + + def test_slice_year(self): + dti = date_range(freq="B", start=datetime(2005, 1, 1), periods=500) + + s = Series(np.arange(len(dti)), index=dti) + result = s["2005"] + expected = s[s.index.year == 2005] + tm.assert_series_equal(result, expected) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + result = df.loc["2005"] + expected = df[df.index.year == 2005] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "partial_dtime", + [ + "2019", + "2019Q4", + "Dec 2019", + "2019-12-31", + "2019-12-31 23", + "2019-12-31 23:59", + ], + ) + def test_slice_end_of_period_resolution(self, partial_dtime): + # GH#31064 + dti = date_range("2019-12-31 23:59:55.999999999", periods=10, freq="s") + + ser = Series(range(10), index=dti) + result = ser[partial_dtime] + expected = ser.iloc[:5] + tm.assert_series_equal(result, expected) + + def test_slice_quarter(self): + dti = date_range(freq="D", start=datetime(2000, 6, 1), periods=500) + + s = Series(np.arange(len(dti)), index=dti) + assert len(s["2001Q1"]) == 90 + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + assert len(df.loc["1Q01"]) == 90 + + def test_slice_month(self): + dti = date_range(freq="D", start=datetime(2005, 1, 1), periods=500) + s = Series(np.arange(len(dti)), index=dti) + assert len(s["2005-11"]) == 30 + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + assert len(df.loc["2005-11"]) == 30 + + tm.assert_series_equal(s["2005-11"], s["11-2005"]) + + def test_partial_slice(self): + rng = date_range(freq="D", start=datetime(2005, 1, 1), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s["2005-05":"2006-02"] + expected = s["20050501":"20060228"] + tm.assert_series_equal(result, expected) + + result = s["2005-05":] + expected = s["20050501":] + tm.assert_series_equal(result, expected) + + result = s[:"2006-02"] + expected = s[:"20060228"] + tm.assert_series_equal(result, expected) + + result = s["2005-1-1"] + assert result == s.iloc[0] + + with pytest.raises(KeyError, match=r"^'2004-12-31'$"): + s["2004-12-31"] + + def test_partial_slice_daily(self): + rng = date_range(freq="H", start=datetime(2005, 1, 31), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s["2005-1-31"] + tm.assert_series_equal(result, s.iloc[:24]) + + with pytest.raises(KeyError, match=r"^'2004-12-31 00'$"): + s["2004-12-31 00"] + + def test_partial_slice_hourly(self): + rng = date_range(freq="T", start=datetime(2005, 1, 1, 20, 0, 0), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s["2005-1-1"] + tm.assert_series_equal(result, s.iloc[: 60 * 4]) + + result = s["2005-1-1 20"] + tm.assert_series_equal(result, s.iloc[:60]) + + assert s["2005-1-1 20:00"] == s.iloc[0] + with pytest.raises(KeyError, match=r"^'2004-12-31 00:15'$"): + s["2004-12-31 00:15"] + + def test_partial_slice_minutely(self): + rng = date_range(freq="S", start=datetime(2005, 1, 1, 23, 59, 0), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s["2005-1-1 23:59"] + tm.assert_series_equal(result, s.iloc[:60]) + + result = s["2005-1-1"] + tm.assert_series_equal(result, s.iloc[:60]) + + assert s[Timestamp("2005-1-1 23:59:00")] == s.iloc[0] + with pytest.raises(KeyError, match=r"^'2004-12-31 00:00:00'$"): + s["2004-12-31 00:00:00"] + + def test_partial_slice_second_precision(self): + rng = date_range( + start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990), + periods=20, + freq="US", + ) + s = Series(np.arange(20), rng) + + tm.assert_series_equal(s["2005-1-1 00:00"], s.iloc[:10]) + tm.assert_series_equal(s["2005-1-1 00:00:59"], s.iloc[:10]) + + tm.assert_series_equal(s["2005-1-1 00:01"], s.iloc[10:]) + tm.assert_series_equal(s["2005-1-1 00:01:00"], s.iloc[10:]) + + assert s[Timestamp("2005-1-1 00:00:59.999990")] == s.iloc[0] + with pytest.raises(KeyError, match="2005-1-1 00:00:00"): + s["2005-1-1 00:00:00"] + + def test_partial_slicing_dataframe(self): + # GH14856 + # Test various combinations of string slicing resolution vs. + # index resolution + # - If string resolution is less precise than index resolution, + # string is considered a slice + # - If string resolution is equal to or more precise than index + # resolution, string is considered an exact match + formats = [ + "%Y", + "%Y-%m", + "%Y-%m-%d", + "%Y-%m-%d %H", + "%Y-%m-%d %H:%M", + "%Y-%m-%d %H:%M:%S", + ] + resolutions = ["year", "month", "day", "hour", "minute", "second"] + for rnum, resolution in enumerate(resolutions[2:], 2): + # we check only 'day', 'hour', 'minute' and 'second' + unit = Timedelta("1 " + resolution) + middate = datetime(2012, 1, 1, 0, 0, 0) + index = DatetimeIndex([middate - unit, middate, middate + unit]) + values = [1, 2, 3] + df = DataFrame({"a": values}, index, dtype=np.int64) + assert df.index.resolution == resolution + + # Timestamp with the same resolution as index + # Should be exact match for Series (return scalar) + # and raise KeyError for Frame + for timestamp, expected in zip(index, values): + ts_string = timestamp.strftime(formats[rnum]) + # make ts_string as precise as index + result = df["a"][ts_string] + assert isinstance(result, np.int64) + assert result == expected + msg = fr"^'{ts_string}'$" + with pytest.raises(KeyError, match=msg): + df[ts_string] + + # Timestamp with resolution less precise than index + for fmt in formats[:rnum]: + for element, theslice in [[0, slice(None, 1)], [1, slice(1, None)]]: + ts_string = index[element].strftime(fmt) + + # Series should return slice + result = df["a"][ts_string] + expected = df["a"][theslice] + tm.assert_series_equal(result, expected) + + # Frame should return slice as well + with tm.assert_produces_warning(FutureWarning): + # GH#36179 deprecated this indexing + result = df[ts_string] + expected = df[theslice] + tm.assert_frame_equal(result, expected) + + # Timestamp with resolution more precise than index + # Compatible with existing key + # Should return scalar for Series + # and raise KeyError for Frame + for fmt in formats[rnum + 1 :]: + ts_string = index[1].strftime(fmt) + result = df["a"][ts_string] + assert isinstance(result, np.int64) + assert result == 2 + msg = fr"^'{ts_string}'$" + with pytest.raises(KeyError, match=msg): + df[ts_string] + + # Not compatible with existing key + # Should raise KeyError + for fmt, res in list(zip(formats, resolutions))[rnum + 1 :]: + ts = index[1] + Timedelta("1 " + res) + ts_string = ts.strftime(fmt) + msg = fr"^'{ts_string}'$" + with pytest.raises(KeyError, match=msg): + df["a"][ts_string] + with pytest.raises(KeyError, match=msg): + df[ts_string] + + def test_partial_slicing_with_multiindex(self): + + # GH 4758 + # partial string indexing with a multi-index buggy + df = DataFrame( + { + "ACCOUNT": ["ACCT1", "ACCT1", "ACCT1", "ACCT2"], + "TICKER": ["ABC", "MNP", "XYZ", "XYZ"], + "val": [1, 2, 3, 4], + }, + index=date_range("2013-06-19 09:30:00", periods=4, freq="5T"), + ) + df_multi = df.set_index(["ACCOUNT", "TICKER"], append=True) + + expected = DataFrame( + [[1]], index=Index(["ABC"], name="TICKER"), columns=["val"] + ) + result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1")] + tm.assert_frame_equal(result, expected) + + expected = df_multi.loc[ + (Timestamp("2013-06-19 09:30:00", tz=None), "ACCT1", "ABC") + ] + result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")] + tm.assert_series_equal(result, expected) + + # partial string indexing on first level, scalar indexing on the other two + result = df_multi.loc[("2013-06-19", "ACCT1", "ABC")] + expected = df_multi.iloc[:1].droplevel([1, 2]) + tm.assert_frame_equal(result, expected) + + def test_partial_slicing_with_multiindex_series(self): + # GH 4294 + # partial slice on a series mi + ser = DataFrame( + np.random.rand(1000, 1000), index=date_range("2000-1-1", periods=1000) + ).stack() + + s2 = ser[:-1].copy() + expected = s2["2000-1-4"] + result = s2[Timestamp("2000-1-4")] + tm.assert_series_equal(result, expected) + + result = ser[Timestamp("2000-1-4")] + expected = ser["2000-1-4"] + tm.assert_series_equal(result, expected) + + df2 = DataFrame(ser) + expected = df2.xs("2000-1-4") + result = df2.loc[Timestamp("2000-1-4")] + tm.assert_frame_equal(result, expected) + + def test_partial_slice_doesnt_require_monotonicity(self): + # For historical reasons. + ser = Series(np.arange(10), date_range("2014-01-01", periods=10)) + + nonmonotonic = ser[[3, 5, 4]] + expected = nonmonotonic.iloc[:0] + timestamp = Timestamp("2014-01-10") + with tm.assert_produces_warning(FutureWarning): + result = nonmonotonic["2014-01-10":] + tm.assert_series_equal(result, expected) + + with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"): + nonmonotonic[timestamp:] + + with tm.assert_produces_warning(FutureWarning): + result = nonmonotonic.loc["2014-01-10":] + tm.assert_series_equal(result, expected) + + with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"): + nonmonotonic.loc[timestamp:] + + def test_loc_datetime_length_one(self): + # GH16071 + df = DataFrame( + columns=["1"], + index=date_range("2016-10-01T00:00:00", "2016-10-01T23:59:59"), + ) + result = df.loc[datetime(2016, 10, 1) :] + tm.assert_frame_equal(result, df) + + result = df.loc["2016-10-01T00:00:00":] + tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize( + "start", + [ + "2018-12-02 21:50:00+00:00", + Timestamp("2018-12-02 21:50:00+00:00"), + Timestamp("2018-12-02 21:50:00+00:00").to_pydatetime(), + ], + ) + @pytest.mark.parametrize( + "end", + [ + "2018-12-02 21:52:00+00:00", + Timestamp("2018-12-02 21:52:00+00:00"), + Timestamp("2018-12-02 21:52:00+00:00").to_pydatetime(), + ], + ) + def test_getitem_with_datestring_with_UTC_offset(self, start, end): + # GH 24076 + idx = date_range( + start="2018-12-02 14:50:00-07:00", + end="2018-12-02 14:50:00-07:00", + freq="1min", + ) + df = DataFrame(1, index=idx, columns=["A"]) + result = df[start:end] + expected = df.iloc[0:3, :] + tm.assert_frame_equal(result, expected) + + # GH 16785 + start = str(start) + end = str(end) + with pytest.raises(ValueError, match="Both dates must"): + df[start : end[:-4] + "1:00"] + + with pytest.raises(ValueError, match="The index must be timezone"): + df = df.tz_localize(None) + df[start:end] + + def test_slice_reduce_to_series(self): + # GH 27516 + df = DataFrame({"A": range(24)}, index=date_range("2000", periods=24, freq="M")) + expected = Series( + range(12), index=date_range("2000", periods=12, freq="M"), name="A" + ) + result = df.loc["2000", "A"] + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_pickle.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_pickle.py new file mode 100644 index 0000000..922b4a1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_pickle.py @@ -0,0 +1,45 @@ +import pytest + +from pandas import ( + NaT, + date_range, + to_datetime, +) +import pandas._testing as tm + + +class TestPickle: + def test_pickle(self): + # GH#4606 + idx = to_datetime(["2013-01-01", NaT, "2014-01-06"]) + idx_p = tm.round_trip_pickle(idx) + assert idx_p[0] == idx[0] + assert idx_p[1] is NaT + assert idx_p[2] == idx[2] + + def test_pickle_dont_infer_freq(self): + # GH#11002 + # don't infer freq + idx = date_range("1750-1-1", "2050-1-1", freq="7D") + idx_p = tm.round_trip_pickle(idx) + tm.assert_index_equal(idx, idx_p) + + def test_pickle_after_set_freq(self): + dti = date_range("20130101", periods=3, tz="US/Eastern", name="foo") + dti = dti._with_freq(None) + + res = tm.round_trip_pickle(dti) + tm.assert_index_equal(res, dti) + + def test_roundtrip_pickle_with_tz(self): + # GH#8367 + # round-trip of timezone + index = date_range("20130101", periods=3, tz="US/Eastern", name="foo") + unpickled = tm.round_trip_pickle(index) + tm.assert_index_equal(index, unpickled) + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_pickle_unpickle(self, freq): + rng = date_range("2009-01-01", "2010-01-01", freq=freq) + unpickled = tm.round_trip_pickle(rng) + assert unpickled.freq == freq diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_reindex.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_reindex.py new file mode 100644 index 0000000..e4911aa --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_reindex.py @@ -0,0 +1,56 @@ +from datetime import timedelta + +import numpy as np + +from pandas import ( + DatetimeIndex, + date_range, +) +import pandas._testing as tm + + +class TestDatetimeIndexReindex: + def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): + # GH#7774 + index = date_range("2013-01-01", periods=3, tz="US/Eastern") + assert str(index.reindex([])[0].tz) == "US/Eastern" + assert str(index.reindex(np.array([]))[0].tz) == "US/Eastern" + + def test_reindex_with_same_tz_nearest(self): + # GH#32740 + rng_a = date_range("2010-01-01", "2010-01-02", periods=24, tz="utc") + rng_b = date_range("2010-01-01", "2010-01-02", periods=23, tz="utc") + result1, result2 = rng_a.reindex( + rng_b, method="nearest", tolerance=timedelta(seconds=20) + ) + expected_list1 = [ + "2010-01-01 00:00:00", + "2010-01-01 01:05:27.272727272", + "2010-01-01 02:10:54.545454545", + "2010-01-01 03:16:21.818181818", + "2010-01-01 04:21:49.090909090", + "2010-01-01 05:27:16.363636363", + "2010-01-01 06:32:43.636363636", + "2010-01-01 07:38:10.909090909", + "2010-01-01 08:43:38.181818181", + "2010-01-01 09:49:05.454545454", + "2010-01-01 10:54:32.727272727", + "2010-01-01 12:00:00", + "2010-01-01 13:05:27.272727272", + "2010-01-01 14:10:54.545454545", + "2010-01-01 15:16:21.818181818", + "2010-01-01 16:21:49.090909090", + "2010-01-01 17:27:16.363636363", + "2010-01-01 18:32:43.636363636", + "2010-01-01 19:38:10.909090909", + "2010-01-01 20:43:38.181818181", + "2010-01-01 21:49:05.454545454", + "2010-01-01 22:54:32.727272727", + "2010-01-02 00:00:00", + ] + expected1 = DatetimeIndex( + expected_list1, dtype="datetime64[ns, UTC]", freq=None + ) + expected2 = np.array([0] + [-1] * 21 + [23], dtype=np.dtype("intp")) + tm.assert_index_equal(result1, expected1) + tm.assert_numpy_array_equal(result2, expected2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_scalar_compat.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_scalar_compat.py new file mode 100644 index 0000000..c60e568 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -0,0 +1,363 @@ +""" +Tests for DatetimeIndex methods behaving like their Timestamp counterparts +""" +from datetime import datetime + +import numpy as np +import pytest + +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + to_offset, +) +from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG + +import pandas as pd +from pandas import ( + DatetimeIndex, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.core.api import Float64Index + + +class TestDatetimeIndexOps: + def test_dti_time(self): + rng = date_range("1/1/2000", freq="12min", periods=10) + result = pd.Index(rng).time + expected = [t.time() for t in rng] + assert (result == expected).all() + + def test_dti_date(self): + rng = date_range("1/1/2000", freq="12H", periods=10) + result = pd.Index(rng).date + expected = [t.date() for t in rng] + assert (result == expected).all() + + @pytest.mark.parametrize("data", [["1400-01-01"], [datetime(1400, 1, 1)]]) + def test_dti_date_out_of_range(self, data): + # GH#1475 + msg = "Out of bounds nanosecond timestamp: 1400-01-01 00:00:00" + with pytest.raises(OutOfBoundsDatetime, match=msg): + DatetimeIndex(data) + + @pytest.mark.parametrize( + "field", + [ + "dayofweek", + "day_of_week", + "dayofyear", + "day_of_year", + "quarter", + "days_in_month", + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + ], + ) + def test_dti_timestamp_fields(self, field): + # extra fields from DatetimeIndex like quarter and week + idx = tm.makeDateIndex(100) + expected = getattr(idx, field)[-1] + + warn = FutureWarning if field.startswith("is_") else None + with tm.assert_produces_warning(warn, match="Timestamp.freq is deprecated"): + result = getattr(Timestamp(idx[-1]), field) + assert result == expected + + def test_dti_timestamp_isocalendar_fields(self): + idx = tm.makeDateIndex(100) + expected = tuple(idx.isocalendar().iloc[-1].to_list()) + result = idx[-1].isocalendar() + assert result == expected + + def test_dti_timestamp_freq_fields(self): + # extra fields from DatetimeIndex like quarter and week + idx = tm.makeDateIndex(100) + + msg = "The 'freq' argument in Timestamp is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + ts = Timestamp(idx[-1], idx.freq) + + msg2 = "Timestamp.freq is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg2): + assert idx.freq == ts.freq + + msg3 = "Timestamp.freqstr is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg3): + assert idx.freqstr == ts.freqstr + + # ---------------------------------------------------------------- + # DatetimeIndex.round + + def test_round_daily(self): + dti = date_range("20130101 09:10:11", periods=5) + result = dti.round("D") + expected = date_range("20130101", periods=5) + tm.assert_index_equal(result, expected) + + dti = dti.tz_localize("UTC").tz_convert("US/Eastern") + result = dti.round("D") + expected = date_range("20130101", periods=5).tz_localize("US/Eastern") + tm.assert_index_equal(result, expected) + + result = dti.round("s") + tm.assert_index_equal(result, dti) + + @pytest.mark.parametrize( + "freq, error_msg", + [ + ("Y", " is a non-fixed frequency"), + ("M", " is a non-fixed frequency"), + ("foobar", "Invalid frequency: foobar"), + ], + ) + def test_round_invalid(self, freq, error_msg): + dti = date_range("20130101 09:10:11", periods=5) + dti = dti.tz_localize("UTC").tz_convert("US/Eastern") + with pytest.raises(ValueError, match=error_msg): + dti.round(freq) + + def test_round(self, tz_naive_fixture): + tz = tz_naive_fixture + rng = date_range(start="2016-01-01", periods=5, freq="30Min", tz=tz) + elt = rng[1] + + expected_rng = DatetimeIndex( + [ + Timestamp("2016-01-01 00:00:00", tz=tz), + Timestamp("2016-01-01 00:00:00", tz=tz), + Timestamp("2016-01-01 01:00:00", tz=tz), + Timestamp("2016-01-01 02:00:00", tz=tz), + Timestamp("2016-01-01 02:00:00", tz=tz), + ] + ) + expected_elt = expected_rng[1] + + tm.assert_index_equal(rng.round(freq="H"), expected_rng) + assert elt.round(freq="H") == expected_elt + + msg = INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + rng.round(freq="foo") + with pytest.raises(ValueError, match=msg): + elt.round(freq="foo") + + msg = " is a non-fixed frequency" + with pytest.raises(ValueError, match=msg): + rng.round(freq="M") + with pytest.raises(ValueError, match=msg): + elt.round(freq="M") + + # GH#14440 & GH#15578 + index = DatetimeIndex(["2016-10-17 12:00:00.0015"], tz=tz) + result = index.round("ms") + expected = DatetimeIndex(["2016-10-17 12:00:00.002000"], tz=tz) + tm.assert_index_equal(result, expected) + + for freq in ["us", "ns"]: + tm.assert_index_equal(index, index.round(freq)) + + index = DatetimeIndex(["2016-10-17 12:00:00.00149"], tz=tz) + result = index.round("ms") + expected = DatetimeIndex(["2016-10-17 12:00:00.001000"], tz=tz) + tm.assert_index_equal(result, expected) + + index = DatetimeIndex(["2016-10-17 12:00:00.001501031"]) + result = index.round("10ns") + expected = DatetimeIndex(["2016-10-17 12:00:00.001501030"]) + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(False): + ts = "2016-10-17 12:00:00.001501031" + DatetimeIndex([ts]).round("1010ns") + + def test_no_rounding_occurs(self, tz_naive_fixture): + # GH 21262 + tz = tz_naive_fixture + rng = date_range(start="2016-01-01", periods=5, freq="2Min", tz=tz) + + expected_rng = DatetimeIndex( + [ + Timestamp("2016-01-01 00:00:00", tz=tz), + Timestamp("2016-01-01 00:02:00", tz=tz), + Timestamp("2016-01-01 00:04:00", tz=tz), + Timestamp("2016-01-01 00:06:00", tz=tz), + Timestamp("2016-01-01 00:08:00", tz=tz), + ] + ) + + tm.assert_index_equal(rng.round(freq="2T"), expected_rng) + + @pytest.mark.parametrize( + "test_input, rounder, freq, expected", + [ + (["2117-01-01 00:00:45"], "floor", "15s", ["2117-01-01 00:00:45"]), + (["2117-01-01 00:00:45"], "ceil", "15s", ["2117-01-01 00:00:45"]), + ( + ["2117-01-01 00:00:45.000000012"], + "floor", + "10ns", + ["2117-01-01 00:00:45.000000010"], + ), + ( + ["1823-01-01 00:00:01.000000012"], + "ceil", + "10ns", + ["1823-01-01 00:00:01.000000020"], + ), + (["1823-01-01 00:00:01"], "floor", "1s", ["1823-01-01 00:00:01"]), + (["1823-01-01 00:00:01"], "ceil", "1s", ["1823-01-01 00:00:01"]), + (["2018-01-01 00:15:00"], "ceil", "15T", ["2018-01-01 00:15:00"]), + (["2018-01-01 00:15:00"], "floor", "15T", ["2018-01-01 00:15:00"]), + (["1823-01-01 03:00:00"], "ceil", "3H", ["1823-01-01 03:00:00"]), + (["1823-01-01 03:00:00"], "floor", "3H", ["1823-01-01 03:00:00"]), + ( + ("NaT", "1823-01-01 00:00:01"), + "floor", + "1s", + ("NaT", "1823-01-01 00:00:01"), + ), + ( + ("NaT", "1823-01-01 00:00:01"), + "ceil", + "1s", + ("NaT", "1823-01-01 00:00:01"), + ), + ], + ) + def test_ceil_floor_edge(self, test_input, rounder, freq, expected): + dt = DatetimeIndex(list(test_input)) + func = getattr(dt, rounder) + result = func(freq) + expected = DatetimeIndex(list(expected)) + assert expected.equals(result) + + @pytest.mark.parametrize( + "start, index_freq, periods", + [("2018-01-01", "12H", 25), ("2018-01-01 0:0:0.124999", "1ns", 1000)], + ) + @pytest.mark.parametrize( + "round_freq", + [ + "2ns", + "3ns", + "4ns", + "5ns", + "6ns", + "7ns", + "250ns", + "500ns", + "750ns", + "1us", + "19us", + "250us", + "500us", + "750us", + "1s", + "2s", + "3s", + "12H", + "1D", + ], + ) + def test_round_int64(self, start, index_freq, periods, round_freq): + dt = date_range(start=start, freq=index_freq, periods=periods) + unit = to_offset(round_freq).nanos + + # test floor + result = dt.floor(round_freq) + diff = dt.asi8 - result.asi8 + mod = result.asi8 % unit + assert (mod == 0).all(), f"floor not a {round_freq} multiple" + assert (0 <= diff).all() and (diff < unit).all(), "floor error" + + # test ceil + result = dt.ceil(round_freq) + diff = result.asi8 - dt.asi8 + mod = result.asi8 % unit + assert (mod == 0).all(), f"ceil not a {round_freq} multiple" + assert (0 <= diff).all() and (diff < unit).all(), "ceil error" + + # test round + result = dt.round(round_freq) + diff = abs(result.asi8 - dt.asi8) + mod = result.asi8 % unit + assert (mod == 0).all(), f"round not a {round_freq} multiple" + assert (diff <= unit // 2).all(), "round error" + if unit % 2 == 0: + assert ( + result.asi8[diff == unit // 2] % 2 == 0 + ).all(), "round half to even error" + + # ---------------------------------------------------------------- + # DatetimeIndex.normalize + + def test_normalize(self): + rng = date_range("1/1/2000 9:30", periods=10, freq="D") + + result = rng.normalize() + expected = date_range("1/1/2000", periods=10, freq="D") + tm.assert_index_equal(result, expected) + + arr_ns = np.array([1380585623454345752, 1380585612343234312]).astype( + "datetime64[ns]" + ) + rng_ns = DatetimeIndex(arr_ns) + rng_ns_normalized = rng_ns.normalize() + + arr_ns = np.array([1380585600000000000, 1380585600000000000]).astype( + "datetime64[ns]" + ) + expected = DatetimeIndex(arr_ns) + tm.assert_index_equal(rng_ns_normalized, expected) + + assert result.is_normalized + assert not rng.is_normalized + + def test_normalize_nat(self): + dti = DatetimeIndex([pd.NaT, Timestamp("2018-01-01 01:00:00")]) + result = dti.normalize() + expected = DatetimeIndex([pd.NaT, Timestamp("2018-01-01")]) + tm.assert_index_equal(result, expected) + + +class TestDateTimeIndexToJulianDate: + def test_1700(self): + dr = date_range(start=Timestamp("1710-10-01"), periods=5, freq="D") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_2000(self): + dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="D") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_hour(self): + dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="H") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_minute(self): + dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="T") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_second(self): + dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="S") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_setops.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_setops.py new file mode 100644 index 0000000..7c2b3b7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_setops.py @@ -0,0 +1,599 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Series, + bdate_range, + date_range, +) +import pandas._testing as tm +from pandas.core.api import Int64Index + +from pandas.tseries.offsets import ( + BMonthEnd, + Minute, + MonthEnd, +) + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + +def test_union_many_deprecated(): + dti = date_range("2016-01-01", periods=3) + + with tm.assert_produces_warning(FutureWarning): + dti.union_many([dti, dti]) + + +class TestDatetimeIndexSetOps: + tz = [ + None, + "UTC", + "Asia/Tokyo", + "US/Eastern", + "dateutil/Asia/Singapore", + "dateutil/US/Pacific", + ] + + # TODO: moved from test_datetimelike; dedup with version below + def test_union2(self, sort): + everything = tm.makeDateIndex(10) + first = everything[:5] + second = everything[5:] + union = first.union(second, sort=sort) + tm.assert_index_equal(union, everything) + + @pytest.mark.parametrize("box", [np.array, Series, list]) + def test_union3(self, sort, box): + everything = tm.makeDateIndex(10) + first = everything[:5] + second = everything[5:] + + # GH 10149 support listlike inputs other than Index objects + expected = first.union(second, sort=sort) + case = box(second.values) + result = first.union(case, sort=sort) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", tz) + def test_union(self, tz, sort): + rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz) + other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz) + expected1 = date_range("1/1/2000", freq="D", periods=10, tz=tz) + expected1_notsorted = DatetimeIndex(list(other1) + list(rng1)) + + rng2 = date_range("1/1/2000", freq="D", periods=5, tz=tz) + other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz) + expected2 = date_range("1/1/2000", freq="D", periods=8, tz=tz) + expected2_notsorted = DatetimeIndex(list(other2) + list(rng2[:3])) + + rng3 = date_range("1/1/2000", freq="D", periods=5, tz=tz) + other3 = DatetimeIndex([], tz=tz) + expected3 = date_range("1/1/2000", freq="D", periods=5, tz=tz) + expected3_notsorted = rng3 + + for rng, other, exp, exp_notsorted in [ + (rng1, other1, expected1, expected1_notsorted), + (rng2, other2, expected2, expected2_notsorted), + (rng3, other3, expected3, expected3_notsorted), + ]: + + result_union = rng.union(other, sort=sort) + tm.assert_index_equal(result_union, exp) + + result_union = other.union(rng, sort=sort) + if sort is None: + tm.assert_index_equal(result_union, exp) + else: + tm.assert_index_equal(result_union, exp_notsorted) + + def test_union_coverage(self, sort): + idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"]) + ordered = DatetimeIndex(idx.sort_values(), freq="infer") + result = ordered.union(idx, sort=sort) + tm.assert_index_equal(result, ordered) + + result = ordered[:0].union(ordered, sort=sort) + tm.assert_index_equal(result, ordered) + assert result.freq == ordered.freq + + def test_union_bug_1730(self, sort): + rng_a = date_range("1/1/2012", periods=4, freq="3H") + rng_b = date_range("1/1/2012", periods=4, freq="4H") + + result = rng_a.union(rng_b, sort=sort) + exp = list(rng_a) + list(rng_b[1:]) + if sort is None: + exp = DatetimeIndex(sorted(exp)) + else: + exp = DatetimeIndex(exp) + tm.assert_index_equal(result, exp) + + def test_union_bug_1745(self, sort): + left = DatetimeIndex(["2012-05-11 15:19:49.695000"]) + right = DatetimeIndex( + [ + "2012-05-29 13:04:21.322000", + "2012-05-11 15:27:24.873000", + "2012-05-11 15:31:05.350000", + ] + ) + + result = left.union(right, sort=sort) + exp = DatetimeIndex( + [ + "2012-05-11 15:19:49.695000", + "2012-05-29 13:04:21.322000", + "2012-05-11 15:27:24.873000", + "2012-05-11 15:31:05.350000", + ] + ) + if sort is None: + exp = exp.sort_values() + tm.assert_index_equal(result, exp) + + def test_union_bug_4564(self, sort): + from pandas import DateOffset + + left = date_range("2013-01-01", "2013-02-01") + right = left + DateOffset(minutes=15) + + result = left.union(right, sort=sort) + exp = list(left) + list(right) + if sort is None: + exp = DatetimeIndex(sorted(exp)) + else: + exp = DatetimeIndex(exp) + tm.assert_index_equal(result, exp) + + def test_union_freq_both_none(self, sort): + # GH11086 + expected = bdate_range("20150101", periods=10) + expected._data.freq = None + + result = expected.union(expected, sort=sort) + tm.assert_index_equal(result, expected) + assert result.freq is None + + def test_union_freq_infer(self): + # When taking the union of two DatetimeIndexes, we infer + # a freq even if the arguments don't have freq. This matches + # TimedeltaIndex behavior. + dti = date_range("2016-01-01", periods=5) + left = dti[[0, 1, 3, 4]] + right = dti[[2, 3, 1]] + + assert left.freq is None + assert right.freq is None + + result = left.union(right) + tm.assert_index_equal(result, dti) + assert result.freq == "D" + + def test_union_dataframe_index(self): + rng1 = date_range("1/1/1999", "1/1/2012", freq="MS") + s1 = Series(np.random.randn(len(rng1)), rng1) + + rng2 = date_range("1/1/1980", "12/1/2001", freq="MS") + s2 = Series(np.random.randn(len(rng2)), rng2) + df = DataFrame({"s1": s1, "s2": s2}) + + exp = date_range("1/1/1980", "1/1/2012", freq="MS") + tm.assert_index_equal(df.index, exp) + + def test_union_with_DatetimeIndex(self, sort): + i1 = Int64Index(np.arange(0, 20, 2)) + i2 = date_range(start="2012-01-03 00:00:00", periods=10, freq="D") + # Works + i1.union(i2, sort=sort) + # Fails with "AttributeError: can't set attribute" + i2.union(i1, sort=sort) + + # TODO: moved from test_datetimelike; de-duplicate with version below + def test_intersection2(self): + first = tm.makeDateIndex(10) + second = first[5:] + intersect = first.intersection(second) + assert tm.equalContents(intersect, second) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.intersection(case) + assert tm.equalContents(result, second) + + third = Index(["a", "b", "c"]) + result = first.intersection(third) + expected = Index([], dtype=object) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "tz", [None, "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"] + ) + def test_intersection(self, tz, sort): + # GH 4690 (with tz) + base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx") + + # if target has the same name, it is preserved + rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx") + expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx") + + # if target name is different, it will be reset + rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other") + expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None) + + rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx") + expected4 = DatetimeIndex([], freq="D", name="idx") + + for (rng, expected) in [ + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + result = base.intersection(rng) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + # non-monotonic + base = DatetimeIndex( + ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], tz=tz, name="idx" + ) + + rng2 = DatetimeIndex( + ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="idx" + ) + expected2 = DatetimeIndex(["2011-01-04", "2011-01-02"], tz=tz, name="idx") + + rng3 = DatetimeIndex( + ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], + tz=tz, + name="other", + ) + expected3 = DatetimeIndex(["2011-01-04", "2011-01-02"], tz=tz, name=None) + + # GH 7880 + rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx") + expected4 = DatetimeIndex([], tz=tz, name="idx") + assert expected4.freq is None + + for (rng, expected) in [ + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + # parametrize over both anchored and non-anchored freqs, as they + # have different code paths + @pytest.mark.parametrize("freq", ["T", "B"]) + def test_intersection_empty(self, tz_aware_fixture, freq): + # empty same freq GH2129 + tz = tz_aware_fixture + rng = date_range("6/1/2000", "6/15/2000", freq=freq, tz=tz) + result = rng[0:0].intersection(rng) + assert len(result) == 0 + assert result.freq == rng.freq + + result = rng.intersection(rng[0:0]) + assert len(result) == 0 + assert result.freq == rng.freq + + # no overlap GH#33604 + check_freq = freq != "T" # We don't preserve freq on non-anchored offsets + result = rng[:3].intersection(rng[-3:]) + tm.assert_index_equal(result, rng[:0]) + if check_freq: + # We don't preserve freq on non-anchored offsets + assert result.freq == rng.freq + + # swapped left and right + result = rng[-3:].intersection(rng[:3]) + tm.assert_index_equal(result, rng[:0]) + if check_freq: + # We don't preserve freq on non-anchored offsets + assert result.freq == rng.freq + + def test_intersection_bug_1708(self): + from pandas import DateOffset + + index_1 = date_range("1/1/2012", periods=4, freq="12H") + index_2 = index_1 + DateOffset(hours=1) + + with tm.assert_produces_warning(FutureWarning): + result = index_1 & index_2 + assert len(result) == 0 + + @pytest.mark.parametrize("tz", tz) + def test_difference(self, tz, sort): + rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"] + + rng1 = DatetimeIndex(rng_dates, tz=tz) + other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz) + expected1 = DatetimeIndex(rng_dates, tz=tz) + + rng2 = DatetimeIndex(rng_dates, tz=tz) + other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz) + expected2 = DatetimeIndex(rng_dates[:3], tz=tz) + + rng3 = DatetimeIndex(rng_dates, tz=tz) + other3 = DatetimeIndex([], tz=tz) + expected3 = DatetimeIndex(rng_dates, tz=tz) + + for rng, other, expected in [ + (rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3), + ]: + result_diff = rng.difference(other, sort) + if sort is None and len(other): + # We dont sort (yet?) when empty GH#24959 + expected = expected.sort_values() + tm.assert_index_equal(result_diff, expected) + + def test_difference_freq(self, sort): + # GH14323: difference of DatetimeIndex should not preserve frequency + + index = date_range("20160920", "20160925", freq="D") + other = date_range("20160921", "20160924", freq="D") + expected = DatetimeIndex(["20160920", "20160925"], freq=None) + idx_diff = index.difference(other, sort) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + other = date_range("20160922", "20160925", freq="D") + idx_diff = index.difference(other, sort) + expected = DatetimeIndex(["20160920", "20160921"], freq=None) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + def test_datetimeindex_diff(self, sort): + dti1 = date_range(freq="Q-JAN", start=datetime(1997, 12, 31), periods=100) + dti2 = date_range(freq="Q-JAN", start=datetime(1997, 12, 31), periods=98) + assert len(dti1.difference(dti2, sort)) == 2 + + @pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Eastern"]) + def test_setops_preserve_freq(self, tz): + rng = date_range("1/1/2000", "1/1/2002", name="idx", tz=tz) + + result = rng[:50].union(rng[50:100]) + assert result.name == rng.name + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].union(rng[30:100]) + assert result.name == rng.name + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].union(rng[60:100]) + assert result.name == rng.name + assert result.freq is None + assert result.tz == rng.tz + + result = rng[:50].intersection(rng[25:75]) + assert result.name == rng.name + assert result.freqstr == "D" + assert result.tz == rng.tz + + nofreq = DatetimeIndex(list(rng[25:75]), name="other") + result = rng[:50].union(nofreq) + assert result.name is None + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].intersection(nofreq) + assert result.name is None + assert result.freq == rng.freq + assert result.tz == rng.tz + + def test_intersection_non_tick_no_fastpath(self): + # GH#42104 + dti = DatetimeIndex( + [ + "2018-12-31", + "2019-03-31", + "2019-06-30", + "2019-09-30", + "2019-12-31", + "2020-03-31", + ], + freq="Q-DEC", + ) + result = dti[::2].intersection(dti[1::2]) + expected = dti[:0] + tm.assert_index_equal(result, expected) + + +class TestBusinessDatetimeIndex: + def setup_method(self, method): + self.rng = bdate_range(START, END) + + def test_union(self, sort): + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, Index) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + # order does not matter + if sort is None: + tm.assert_index_equal(right.union(left, sort=sort), the_union) + else: + expected = DatetimeIndex(list(right) + list(left)) + tm.assert_index_equal(right.union(left, sort=sort), expected) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = self.rng.union(rng, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + def test_union_not_cacheable(self, sort): + rng = date_range("1/1/2000", periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_union = rng1.union(rng2, sort=sort) + if sort is None: + tm.assert_index_equal(the_union, rng) + else: + expected = DatetimeIndex(list(rng[10:]) + list(rng[:10])) + tm.assert_index_equal(the_union, expected) + + rng1 = rng[10:] + rng2 = rng[15:35] + the_union = rng1.union(rng2, sort=sort) + expected = rng[10:] + tm.assert_index_equal(the_union, expected) + + def test_intersection(self): + rng = date_range("1/1/2000", periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_int = rng1.intersection(rng2) + expected = rng[10:25] + tm.assert_index_equal(the_int, expected) + assert isinstance(the_int, DatetimeIndex) + assert the_int.freq == rng.freq + + the_int = rng1.intersection(rng2.view(DatetimeIndex)) + tm.assert_index_equal(the_int, expected) + + # non-overlapping + the_int = rng[:10].intersection(rng[10:]) + expected = DatetimeIndex([]) + tm.assert_index_equal(the_int, expected) + + def test_intersection_bug(self): + # GH #771 + a = bdate_range("11/30/2011", "12/31/2011") + b = bdate_range("12/10/2011", "12/20/2011") + result = a.intersection(b) + tm.assert_index_equal(result, b) + assert result.freq == b.freq + + def test_intersection_list(self): + # GH#35876 + # values is not an Index -> no name -> retain "a" + values = [pd.Timestamp("2020-01-01"), pd.Timestamp("2020-02-01")] + idx = DatetimeIndex(values, name="a") + res = idx.intersection(values) + tm.assert_index_equal(res, idx) + + def test_month_range_union_tz_pytz(self, sort): + from pytz import timezone + + tz = timezone("US/Eastern") + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd()) + + early_dr.union(late_dr, sort=sort) + + @td.skip_if_windows + def test_month_range_union_tz_dateutil(self, sort): + from pandas._libs.tslibs.timezones import dateutil_gettz + + tz = dateutil_gettz("US/Eastern") + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd()) + + early_dr.union(late_dr, sort=sort) + + @pytest.mark.parametrize("sort", [False, None]) + def test_intersection_duplicates(self, sort): + # GH#38196 + idx1 = Index( + [ + pd.Timestamp("2019-12-13"), + pd.Timestamp("2019-12-12"), + pd.Timestamp("2019-12-12"), + ] + ) + result = idx1.intersection(idx1, sort=sort) + expected = Index([pd.Timestamp("2019-12-13"), pd.Timestamp("2019-12-12")]) + tm.assert_index_equal(result, expected) + + +class TestCustomDatetimeIndex: + def setup_method(self, method): + self.rng = bdate_range(START, END, freq="C") + + def test_union(self, sort): + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_union = left.union(right, sort) + assert isinstance(the_union, Index) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + # order does not matter + if sort is None: + tm.assert_index_equal(right.union(left, sort=sort), the_union) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = self.rng.union(rng, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + def test_intersection_bug(self): + # GH #771 + a = bdate_range("11/30/2011", "12/31/2011", freq="C") + b = bdate_range("12/10/2011", "12/20/2011", freq="C") + result = a.intersection(b) + tm.assert_index_equal(result, b) + assert result.freq == b.freq diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_timezones.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_timezones.py new file mode 100644 index 0000000..a12f4c9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_timezones.py @@ -0,0 +1,1206 @@ +""" +Tests for DatetimeIndex timezone-related methods +""" +from datetime import ( + date, + datetime, + time, + timedelta, + tzinfo, +) + +import dateutil +from dateutil.tz import ( + gettz, + tzlocal, +) +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import ( + conversion, + timezones, +) +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + Timestamp, + bdate_range, + date_range, + isna, + to_datetime, +) +import pandas._testing as tm + + +class FixedOffset(tzinfo): + """Fixed offset in minutes east from UTC.""" + + def __init__(self, offset, name): + self.__offset = timedelta(minutes=offset) + self.__name = name + + def utcoffset(self, dt): + return self.__offset + + def tzname(self, dt): + return self.__name + + def dst(self, dt): + return timedelta(0) + + +fixed_off = FixedOffset(-420, "-07:00") +fixed_off_no_name = FixedOffset(-330, None) + + +class TestDatetimeIndexTimezones: + # ------------------------------------------------------------- + # DatetimeIndex.tz_convert + def test_tz_convert_nat(self): + # GH#5546 + dates = [pd.NaT] + idx = DatetimeIndex(dates) + idx = idx.tz_localize("US/Pacific") + tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific")) + idx = idx.tz_convert("US/Eastern") + tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Eastern")) + idx = idx.tz_convert("UTC") + tm.assert_index_equal(idx, DatetimeIndex(dates, tz="UTC")) + + dates = ["2010-12-01 00:00", "2010-12-02 00:00", pd.NaT] + idx = DatetimeIndex(dates) + idx = idx.tz_localize("US/Pacific") + tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific")) + idx = idx.tz_convert("US/Eastern") + expected = ["2010-12-01 03:00", "2010-12-02 03:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern")) + + idx = idx + pd.offsets.Hour(5) + expected = ["2010-12-01 08:00", "2010-12-02 08:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern")) + idx = idx.tz_convert("US/Pacific") + expected = ["2010-12-01 05:00", "2010-12-02 05:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific")) + + idx = idx + np.timedelta64(3, "h") + expected = ["2010-12-01 08:00", "2010-12-02 08:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific")) + + idx = idx.tz_convert("US/Eastern") + expected = ["2010-12-01 11:00", "2010-12-02 11:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern")) + + @pytest.mark.parametrize("prefix", ["", "dateutil/"]) + def test_dti_tz_convert_compat_timestamp(self, prefix): + strdates = ["1/1/2012", "3/1/2012", "4/1/2012"] + idx = DatetimeIndex(strdates, tz=prefix + "US/Eastern") + + conv = idx[0].tz_convert(prefix + "US/Pacific") + expected = idx.tz_convert(prefix + "US/Pacific")[0] + + assert conv == expected + + def test_dti_tz_convert_hour_overflow_dst(self): + # Regression test for: + # https://github.com/pandas-dev/pandas/issues/13306 + + # sorted case US/Eastern -> UTC + ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2009-05-12 09:50:32"] + tt = DatetimeIndex(ts).tz_localize("US/Eastern") + ut = tt.tz_convert("UTC") + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # sorted case UTC -> US/Eastern + ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2009-05-12 13:50:32"] + tt = DatetimeIndex(ts).tz_localize("UTC") + ut = tt.tz_convert("US/Eastern") + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case US/Eastern -> UTC + ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2008-05-12 09:50:32"] + tt = DatetimeIndex(ts).tz_localize("US/Eastern") + ut = tt.tz_convert("UTC") + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case UTC -> US/Eastern + ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2008-05-12 13:50:32"] + tt = DatetimeIndex(ts).tz_localize("UTC") + ut = tt.tz_convert("US/Eastern") + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): + # Regression test for GH#13306 + + # sorted case US/Eastern -> UTC + ts = [ + Timestamp("2008-05-12 09:50:00", tz=tz), + Timestamp("2008-12-12 09:50:35", tz=tz), + Timestamp("2009-05-12 09:50:32", tz=tz), + ] + tt = DatetimeIndex(ts) + ut = tt.tz_convert("UTC") + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # sorted case UTC -> US/Eastern + ts = [ + Timestamp("2008-05-12 13:50:00", tz="UTC"), + Timestamp("2008-12-12 14:50:35", tz="UTC"), + Timestamp("2009-05-12 13:50:32", tz="UTC"), + ] + tt = DatetimeIndex(ts) + ut = tt.tz_convert("US/Eastern") + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case US/Eastern -> UTC + ts = [ + Timestamp("2008-05-12 09:50:00", tz=tz), + Timestamp("2008-12-12 09:50:35", tz=tz), + Timestamp("2008-05-12 09:50:32", tz=tz), + ] + tt = DatetimeIndex(ts) + ut = tt.tz_convert("UTC") + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case UTC -> US/Eastern + ts = [ + Timestamp("2008-05-12 13:50:00", tz="UTC"), + Timestamp("2008-12-12 14:50:35", tz="UTC"), + Timestamp("2008-05-12 13:50:32", tz="UTC"), + ] + tt = DatetimeIndex(ts) + ut = tt.tz_convert("US/Eastern") + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + @pytest.mark.parametrize("freq, n", [("H", 1), ("T", 60), ("S", 3600)]) + def test_dti_tz_convert_trans_pos_plus_1__bug(self, freq, n): + # Regression test for tslib.tz_convert(vals, tz1, tz2). + # See https://github.com/pandas-dev/pandas/issues/4496 for details. + idx = date_range(datetime(2011, 3, 26, 23), datetime(2011, 3, 27, 1), freq=freq) + idx = idx.tz_localize("UTC") + idx = idx.tz_convert("Europe/Moscow") + + expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) + tm.assert_index_equal(idx.hour, Index(expected)) + + def test_dti_tz_convert_dst(self): + for freq, n in [("H", 1), ("T", 60), ("S", 3600)]: + # Start DST + idx = date_range( + "2014-03-08 23:00", "2014-03-09 09:00", freq=freq, tz="UTC" + ) + idx = idx.tz_convert("US/Eastern") + expected = np.repeat( + np.array([18, 19, 20, 21, 22, 23, 0, 1, 3, 4, 5]), + np.array([n, n, n, n, n, n, n, n, n, n, 1]), + ) + tm.assert_index_equal(idx.hour, Index(expected)) + + idx = date_range( + "2014-03-08 18:00", "2014-03-09 05:00", freq=freq, tz="US/Eastern" + ) + idx = idx.tz_convert("UTC") + expected = np.repeat( + np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + np.array([n, n, n, n, n, n, n, n, n, n, 1]), + ) + tm.assert_index_equal(idx.hour, Index(expected)) + + # End DST + idx = date_range( + "2014-11-01 23:00", "2014-11-02 09:00", freq=freq, tz="UTC" + ) + idx = idx.tz_convert("US/Eastern") + expected = np.repeat( + np.array([19, 20, 21, 22, 23, 0, 1, 1, 2, 3, 4]), + np.array([n, n, n, n, n, n, n, n, n, n, 1]), + ) + tm.assert_index_equal(idx.hour, Index(expected)) + + idx = date_range( + "2014-11-01 18:00", "2014-11-02 05:00", freq=freq, tz="US/Eastern" + ) + idx = idx.tz_convert("UTC") + expected = np.repeat( + np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), + np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1]), + ) + tm.assert_index_equal(idx.hour, Index(expected)) + + # daily + # Start DST + idx = date_range("2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="UTC") + idx = idx.tz_convert("US/Eastern") + tm.assert_index_equal(idx.hour, Index([19, 19])) + + idx = date_range( + "2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="US/Eastern" + ) + idx = idx.tz_convert("UTC") + tm.assert_index_equal(idx.hour, Index([5, 5])) + + # End DST + idx = date_range("2014-11-01 00:00", "2014-11-02 00:00", freq="D", tz="UTC") + idx = idx.tz_convert("US/Eastern") + tm.assert_index_equal(idx.hour, Index([20, 20])) + + idx = date_range( + "2014-11-01 00:00", "2014-11-02 000:00", freq="D", tz="US/Eastern" + ) + idx = idx.tz_convert("UTC") + tm.assert_index_equal(idx.hour, Index([4, 4])) + + def test_tz_convert_roundtrip(self, tz_aware_fixture): + tz = tz_aware_fixture + idx1 = date_range(start="2014-01-01", end="2014-12-31", freq="M", tz="UTC") + exp1 = date_range(start="2014-01-01", end="2014-12-31", freq="M") + + idx2 = date_range(start="2014-01-01", end="2014-12-31", freq="D", tz="UTC") + exp2 = date_range(start="2014-01-01", end="2014-12-31", freq="D") + + idx3 = date_range(start="2014-01-01", end="2014-03-01", freq="H", tz="UTC") + exp3 = date_range(start="2014-01-01", end="2014-03-01", freq="H") + + idx4 = date_range(start="2014-08-01", end="2014-10-31", freq="T", tz="UTC") + exp4 = date_range(start="2014-08-01", end="2014-10-31", freq="T") + + for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3), (idx4, exp4)]: + converted = idx.tz_convert(tz) + reset = converted.tz_convert(None) + tm.assert_index_equal(reset, expected) + assert reset.tzinfo is None + expected = converted.tz_convert("UTC").tz_localize(None) + expected = expected._with_freq("infer") + tm.assert_index_equal(reset, expected) + + def test_dti_tz_convert_tzlocal(self): + # GH#13583 + # tz_convert doesn't affect to internal + dti = date_range(start="2001-01-01", end="2001-03-01", tz="UTC") + dti2 = dti.tz_convert(dateutil.tz.tzlocal()) + tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) + + dti = date_range(start="2001-01-01", end="2001-03-01", tz=dateutil.tz.tzlocal()) + dti2 = dti.tz_convert(None) + tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) + + @pytest.mark.parametrize( + "tz", + [ + "US/Eastern", + "dateutil/US/Eastern", + pytz.timezone("US/Eastern"), + gettz("US/Eastern"), + ], + ) + def test_dti_tz_convert_utc_to_local_no_modify(self, tz): + rng = date_range("3/11/2012", "3/12/2012", freq="H", tz="utc") + rng_eastern = rng.tz_convert(tz) + + # Values are unmodified + tm.assert_numpy_array_equal(rng.asi8, rng_eastern.asi8) + + assert timezones.tz_compare(rng_eastern.tz, timezones.maybe_get_tz(tz)) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_tz_convert_unsorted(self, tzstr): + dr = date_range("2012-03-09", freq="H", periods=100, tz="utc") + dr = dr.tz_convert(tzstr) + + result = dr[::-1].hour + exp = dr.hour[::-1] + tm.assert_almost_equal(result, exp) + + # ------------------------------------------------------------- + # DatetimeIndex.tz_localize + + def test_dti_tz_localize_nonexistent_raise_coerce(self): + # GH#13057 + times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"] + index = DatetimeIndex(times) + tz = "US/Eastern" + with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)): + index.tz_localize(tz=tz) + + with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)): + index.tz_localize(tz=tz, nonexistent="raise") + + result = index.tz_localize(tz=tz, nonexistent="NaT") + test_times = ["2015-03-08 01:00-05:00", "NaT", "2015-03-08 03:00-04:00"] + dti = to_datetime(test_times, utc=True) + expected = dti.tz_convert("US/Eastern") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_tz_localize_ambiguous_infer(self, tz): + # November 6, 2011, fall back, repeat 2 AM hour + # With no repeated hours, we cannot infer the transition + dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=pd.offsets.Hour()) + with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): + dr.tz_localize(tz) + + # With repeated hours, we can infer the transition + dr = date_range( + datetime(2011, 11, 6, 0), periods=5, freq=pd.offsets.Hour(), tz=tz + ) + times = [ + "11/06/2011 00:00", + "11/06/2011 01:00", + "11/06/2011 01:00", + "11/06/2011 02:00", + "11/06/2011 03:00", + ] + di = DatetimeIndex(times) + localized = di.tz_localize(tz, ambiguous="infer") + expected = dr._with_freq(None) + tm.assert_index_equal(expected, localized) + tm.assert_index_equal(expected, DatetimeIndex(times, tz=tz, ambiguous="infer")) + + # When there is no dst transition, nothing special happens + dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=pd.offsets.Hour()) + localized = dr.tz_localize(tz) + localized_infer = dr.tz_localize(tz, ambiguous="infer") + tm.assert_index_equal(localized, localized_infer) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_tz_localize_ambiguous_times(self, tz): + # March 13, 2011, spring forward, skip from 2 AM to 3 AM + dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3, freq=pd.offsets.Hour()) + with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:30:00"): + dr.tz_localize(tz) + + # after dst transition, it works + dr = date_range( + datetime(2011, 3, 13, 3, 30), periods=3, freq=pd.offsets.Hour(), tz=tz + ) + + # November 6, 2011, fall back, repeat 2 AM hour + dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3, freq=pd.offsets.Hour()) + with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): + dr.tz_localize(tz) + + # UTC is OK + dr = date_range( + datetime(2011, 3, 13), periods=48, freq=pd.offsets.Minute(30), tz=pytz.utc + ) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_tz_localize_pass_dates_to_utc(self, tzstr): + strdates = ["1/1/2012", "3/1/2012", "4/1/2012"] + + idx = DatetimeIndex(strdates) + conv = idx.tz_localize(tzstr) + + fromdates = DatetimeIndex(strdates, tz=tzstr) + + assert conv.tz == fromdates.tz + tm.assert_numpy_array_equal(conv.values, fromdates.values) + + @pytest.mark.parametrize("prefix", ["", "dateutil/"]) + def test_dti_tz_localize(self, prefix): + tzstr = prefix + "US/Eastern" + dti = date_range(start="1/1/2005", end="1/1/2005 0:00:30.256", freq="L") + dti2 = dti.tz_localize(tzstr) + + dti_utc = date_range( + start="1/1/2005 05:00", end="1/1/2005 5:00:30.256", freq="L", tz="utc" + ) + + tm.assert_numpy_array_equal(dti2.values, dti_utc.values) + + dti3 = dti2.tz_convert(prefix + "US/Pacific") + tm.assert_numpy_array_equal(dti3.values, dti_utc.values) + + dti = date_range(start="11/6/2011 1:59", end="11/6/2011 2:00", freq="L") + with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): + dti.tz_localize(tzstr) + + dti = date_range(start="3/13/2011 1:59", end="3/13/2011 2:00", freq="L") + with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:00:00"): + dti.tz_localize(tzstr) + + @pytest.mark.parametrize( + "tz", + [ + "US/Eastern", + "dateutil/US/Eastern", + pytz.timezone("US/Eastern"), + gettz("US/Eastern"), + ], + ) + def test_dti_tz_localize_utc_conversion(self, tz): + # Localizing to time zone should: + # 1) check for DST ambiguities + # 2) convert to UTC + + rng = date_range("3/10/2012", "3/11/2012", freq="30T") + + converted = rng.tz_localize(tz) + expected_naive = rng + pd.offsets.Hour(5) + tm.assert_numpy_array_equal(converted.asi8, expected_naive.asi8) + + # DST ambiguity, this should fail + rng = date_range("3/11/2012", "3/12/2012", freq="30T") + # Is this really how it should fail?? + with pytest.raises(pytz.NonExistentTimeError, match="2012-03-11 02:00:00"): + rng.tz_localize(tz) + + def test_dti_tz_localize_roundtrip(self, tz_aware_fixture): + # note: this tz tests that a tz-naive index can be localized + # and de-localized successfully, when there are no DST transitions + # in the range. + idx = date_range(start="2014-06-01", end="2014-08-30", freq="15T") + tz = tz_aware_fixture + localized = idx.tz_localize(tz) + # can't localize a tz-aware object + with pytest.raises( + TypeError, match="Already tz-aware, use tz_convert to convert" + ): + localized.tz_localize(tz) + reset = localized.tz_localize(None) + assert reset.tzinfo is None + expected = idx._with_freq(None) + tm.assert_index_equal(reset, expected) + + def test_dti_tz_localize_naive(self): + rng = date_range("1/1/2011", periods=100, freq="H") + + conv = rng.tz_localize("US/Pacific") + exp = date_range("1/1/2011", periods=100, freq="H", tz="US/Pacific") + + tm.assert_index_equal(conv, exp._with_freq(None)) + + def test_dti_tz_localize_tzlocal(self): + # GH#13583 + offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) + offset = int(offset.total_seconds() * 1000000000) + + dti = date_range(start="2001-01-01", end="2001-03-01") + dti2 = dti.tz_localize(dateutil.tz.tzlocal()) + tm.assert_numpy_array_equal(dti2.asi8 + offset, dti.asi8) + + dti = date_range(start="2001-01-01", end="2001-03-01", tz=dateutil.tz.tzlocal()) + dti2 = dti.tz_localize(None) + tm.assert_numpy_array_equal(dti2.asi8 - offset, dti.asi8) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_tz_localize_ambiguous_nat(self, tz): + times = [ + "11/06/2011 00:00", + "11/06/2011 01:00", + "11/06/2011 01:00", + "11/06/2011 02:00", + "11/06/2011 03:00", + ] + di = DatetimeIndex(times) + localized = di.tz_localize(tz, ambiguous="NaT") + + times = [ + "11/06/2011 00:00", + np.NaN, + np.NaN, + "11/06/2011 02:00", + "11/06/2011 03:00", + ] + di_test = DatetimeIndex(times, tz="US/Eastern") + + # left dtype is datetime64[ns, US/Eastern] + # right is datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')] + tm.assert_numpy_array_equal(di_test.values, localized.values) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_tz_localize_ambiguous_flags(self, tz): + # November 6, 2011, fall back, repeat 2 AM hour + + # Pass in flags to determine right dst transition + dr = date_range( + datetime(2011, 11, 6, 0), periods=5, freq=pd.offsets.Hour(), tz=tz + ) + times = [ + "11/06/2011 00:00", + "11/06/2011 01:00", + "11/06/2011 01:00", + "11/06/2011 02:00", + "11/06/2011 03:00", + ] + + # Test tz_localize + di = DatetimeIndex(times) + is_dst = [1, 1, 0, 0, 0] + localized = di.tz_localize(tz, ambiguous=is_dst) + expected = dr._with_freq(None) + tm.assert_index_equal(expected, localized) + tm.assert_index_equal(expected, DatetimeIndex(times, tz=tz, ambiguous=is_dst)) + + localized = di.tz_localize(tz, ambiguous=np.array(is_dst)) + tm.assert_index_equal(dr, localized) + + localized = di.tz_localize(tz, ambiguous=np.array(is_dst).astype("bool")) + tm.assert_index_equal(dr, localized) + + # Test constructor + localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst) + tm.assert_index_equal(dr, localized) + + # Test duplicate times where inferring the dst fails + times += times + di = DatetimeIndex(times) + + # When the sizes are incompatible, make sure error is raised + msg = "Length of ambiguous bool-array must be the same size as vals" + with pytest.raises(Exception, match=msg): + di.tz_localize(tz, ambiguous=is_dst) + + # When sizes are compatible and there are repeats ('infer' won't work) + is_dst = np.hstack((is_dst, is_dst)) + localized = di.tz_localize(tz, ambiguous=is_dst) + dr = dr.append(dr) + tm.assert_index_equal(dr, localized) + + # When there is no dst transition, nothing special happens + dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=pd.offsets.Hour()) + is_dst = np.array([1] * 10) + localized = dr.tz_localize(tz) + localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst) + tm.assert_index_equal(localized, localized_is_dst) + + # TODO: belongs outside tz_localize tests? + @pytest.mark.parametrize("tz", ["Europe/London", "dateutil/Europe/London"]) + def test_dti_construction_ambiguous_endpoint(self, tz): + # construction with an ambiguous end-point + # GH#11626 + + with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): + date_range( + "2013-10-26 23:00", "2013-10-27 01:00", tz="Europe/London", freq="H" + ) + + times = date_range( + "2013-10-26 23:00", "2013-10-27 01:00", freq="H", tz=tz, ambiguous="infer" + ) + assert times[0] == Timestamp("2013-10-26 23:00", tz=tz) + assert times[-1] == Timestamp("2013-10-27 01:00:00+0000", tz=tz) + + @pytest.mark.parametrize( + "tz, option, expected", + [ + ["US/Pacific", "shift_forward", "2019-03-10 03:00"], + ["dateutil/US/Pacific", "shift_forward", "2019-03-10 03:00"], + ["US/Pacific", "shift_backward", "2019-03-10 01:00"], + ["dateutil/US/Pacific", "shift_backward", "2019-03-10 01:00"], + ["US/Pacific", timedelta(hours=1), "2019-03-10 03:00"], + ], + ) + def test_dti_construction_nonexistent_endpoint(self, tz, option, expected): + # construction with an nonexistent end-point + + with pytest.raises(pytz.NonExistentTimeError, match="2019-03-10 02:00:00"): + date_range( + "2019-03-10 00:00", "2019-03-10 02:00", tz="US/Pacific", freq="H" + ) + + times = date_range( + "2019-03-10 00:00", "2019-03-10 02:00", freq="H", tz=tz, nonexistent=option + ) + assert times[-1] == Timestamp(expected, tz=tz) + + def test_dti_tz_localize_bdate_range(self): + dr = bdate_range("1/1/2009", "1/1/2010") + dr_utc = bdate_range("1/1/2009", "1/1/2010", tz=pytz.utc) + localized = dr.tz_localize(pytz.utc) + tm.assert_index_equal(dr_utc, localized) + + @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) + @pytest.mark.parametrize( + "method, exp", [["NaT", pd.NaT], ["raise", None], ["foo", "invalid"]] + ) + def test_dti_tz_localize_nonexistent(self, tz, method, exp): + # GH 8917 + n = 60 + dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min") + if method == "raise": + with pytest.raises(pytz.NonExistentTimeError, match="2015-03-29 02:00:00"): + dti.tz_localize(tz, nonexistent=method) + elif exp == "invalid": + msg = ( + "The nonexistent argument must be one of " + "'raise', 'NaT', 'shift_forward', 'shift_backward' " + "or a timedelta object" + ) + with pytest.raises(ValueError, match=msg): + dti.tz_localize(tz, nonexistent=method) + else: + result = dti.tz_localize(tz, nonexistent=method) + expected = DatetimeIndex([exp] * n, tz=tz) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start_ts, tz, end_ts, shift", + [ + ["2015-03-29 02:20:00", "Europe/Warsaw", "2015-03-29 03:00:00", "forward"], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 01:59:59.999999999", + "backward", + ], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 03:20:00", + timedelta(hours=1), + ], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 01:20:00", + timedelta(hours=-1), + ], + ["2018-03-11 02:33:00", "US/Pacific", "2018-03-11 03:00:00", "forward"], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 01:59:59.999999999", + "backward", + ], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 03:33:00", + timedelta(hours=1), + ], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 01:33:00", + timedelta(hours=-1), + ], + ], + ) + @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) + def test_dti_tz_localize_nonexistent_shift( + self, start_ts, tz, end_ts, shift, tz_type + ): + # GH 8917 + tz = tz_type + tz + if isinstance(shift, str): + shift = "shift_" + shift + dti = DatetimeIndex([Timestamp(start_ts)]) + result = dti.tz_localize(tz, nonexistent=shift) + expected = DatetimeIndex([Timestamp(end_ts)]).tz_localize(tz) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("offset", [-1, 1]) + @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) + def test_dti_tz_localize_nonexistent_shift_invalid(self, offset, tz_type): + # GH 8917 + tz = tz_type + "Europe/Warsaw" + dti = DatetimeIndex([Timestamp("2015-03-29 02:20:00")]) + msg = "The provided timedelta will relocalize on a nonexistent time" + with pytest.raises(ValueError, match=msg): + dti.tz_localize(tz, nonexistent=timedelta(seconds=offset)) + + # ------------------------------------------------------------- + # DatetimeIndex.normalize + + def test_normalize_tz(self): + rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz="US/Eastern") + + result = rng.normalize() # does not preserve freq + expected = date_range("1/1/2000", periods=10, freq="D", tz="US/Eastern") + tm.assert_index_equal(result, expected._with_freq(None)) + + assert result.is_normalized + assert not rng.is_normalized + + rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz="UTC") + + result = rng.normalize() + expected = date_range("1/1/2000", periods=10, freq="D", tz="UTC") + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz=tzlocal()) + result = rng.normalize() # does not preserve freq + expected = date_range("1/1/2000", periods=10, freq="D", tz=tzlocal()) + tm.assert_index_equal(result, expected._with_freq(None)) + + assert result.is_normalized + assert not rng.is_normalized + + @td.skip_if_windows + @pytest.mark.parametrize( + "timezone", + [ + "US/Pacific", + "US/Eastern", + "UTC", + "Asia/Kolkata", + "Asia/Shanghai", + "Australia/Canberra", + ], + ) + def test_normalize_tz_local(self, timezone): + # GH#13459 + with tm.set_timezone(timezone): + rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz=tzlocal()) + + result = rng.normalize() + expected = date_range("1/1/2000", periods=10, freq="D", tz=tzlocal()) + expected = expected._with_freq(None) + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + # ------------------------------------------------------------ + # DatetimeIndex.__new__ + + @pytest.mark.parametrize("prefix", ["", "dateutil/"]) + def test_dti_constructor_static_tzinfo(self, prefix): + # it works! + index = DatetimeIndex([datetime(2012, 1, 1)], tz=prefix + "EST") + index.hour + index[0] + + def test_dti_constructor_with_fixed_tz(self): + off = FixedOffset(420, "+07:00") + start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) + end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) + rng = date_range(start=start, end=end) + assert off == rng.tz + + rng2 = date_range(start, periods=len(rng), tz=off) + tm.assert_index_equal(rng, rng2) + + rng3 = date_range("3/11/2012 05:00:00+07:00", "6/11/2012 05:00:00+07:00") + assert (rng.values == rng3.values).all() + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_convert_datetime_list(self, tzstr): + dr = date_range("2012-06-02", periods=10, tz=tzstr, name="foo") + dr2 = DatetimeIndex(list(dr), name="foo", freq="D") + tm.assert_index_equal(dr, dr2) + + def test_dti_construction_univalent(self): + rng = date_range("03/12/2012 00:00", periods=10, freq="W-FRI", tz="US/Eastern") + rng2 = DatetimeIndex(data=rng, tz="US/Eastern") + tm.assert_index_equal(rng, rng2) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_from_tzaware_datetime(self, tz): + d = [datetime(2012, 8, 19, tzinfo=tz)] + + index = DatetimeIndex(d) + assert timezones.tz_compare(index.tz, tz) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_tz_constructors(self, tzstr): + """Test different DatetimeIndex constructions with timezone + Follow-up of GH#4229 + """ + arr = ["11/10/2005 08:00:00", "11/10/2005 09:00:00"] + + idx1 = to_datetime(arr).tz_localize(tzstr) + idx2 = date_range(start="2005-11-10 08:00:00", freq="H", periods=2, tz=tzstr) + idx2 = idx2._with_freq(None) # the others all have freq=None + idx3 = DatetimeIndex(arr, tz=tzstr) + idx4 = DatetimeIndex(np.array(arr), tz=tzstr) + + for other in [idx2, idx3, idx4]: + tm.assert_index_equal(idx1, other) + + # ------------------------------------------------------------- + # Unsorted + + @pytest.mark.parametrize( + "dtype", + [None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"], + ) + def test_date_accessor(self, dtype): + # Regression test for GH#21230 + expected = np.array([date(2018, 6, 4), pd.NaT]) + + index = DatetimeIndex(["2018-06-04 10:00:00", pd.NaT], dtype=dtype) + result = index.date + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"], + ) + def test_time_accessor(self, dtype): + # Regression test for GH#21267 + expected = np.array([time(10, 20, 30), pd.NaT]) + + index = DatetimeIndex(["2018-06-04 10:20:30", pd.NaT], dtype=dtype) + result = index.time + + tm.assert_numpy_array_equal(result, expected) + + def test_timetz_accessor(self, tz_naive_fixture): + # GH21358 + tz = timezones.maybe_get_tz(tz_naive_fixture) + + expected = np.array([time(10, 20, 30, tzinfo=tz), pd.NaT]) + + index = DatetimeIndex(["2018-06-04 10:20:30", pd.NaT], tz=tz) + result = index.timetz + + tm.assert_numpy_array_equal(result, expected) + + def test_dti_drop_dont_lose_tz(self): + # GH#2621 + ind = date_range("2012-12-01", periods=10, tz="utc") + ind = ind.drop(ind[-1]) + + assert ind.tz is not None + + def test_dti_tz_conversion_freq(self, tz_naive_fixture): + # GH25241 + t3 = DatetimeIndex(["2019-01-01 10:00"], freq="H") + assert t3.tz_localize(tz=tz_naive_fixture).freq == t3.freq + t4 = DatetimeIndex(["2019-01-02 12:00"], tz="UTC", freq="T") + assert t4.tz_convert(tz="UTC").freq == t4.freq + + def test_drop_dst_boundary(self): + # see gh-18031 + tz = "Europe/Brussels" + freq = "15min" + + start = Timestamp("201710290100", tz=tz) + end = Timestamp("201710290300", tz=tz) + index = date_range(start=start, end=end, freq=freq) + + expected = DatetimeIndex( + [ + "201710290115", + "201710290130", + "201710290145", + "201710290200", + "201710290215", + "201710290230", + "201710290245", + "201710290200", + "201710290215", + "201710290230", + "201710290245", + "201710290300", + ], + tz=tz, + freq=freq, + ambiguous=[ + True, + True, + True, + True, + True, + True, + True, + False, + False, + False, + False, + False, + ], + ) + result = index.drop(index[0]) + tm.assert_index_equal(result, expected) + + def test_date_range_localize(self): + rng = date_range("3/11/2012 03:00", periods=15, freq="H", tz="US/Eastern") + rng2 = DatetimeIndex(["3/11/2012 03:00", "3/11/2012 04:00"], tz="US/Eastern") + rng3 = date_range("3/11/2012 03:00", periods=15, freq="H") + rng3 = rng3.tz_localize("US/Eastern") + + tm.assert_index_equal(rng._with_freq(None), rng3) + + # DST transition time + val = rng[0] + exp = Timestamp("3/11/2012 03:00", tz="US/Eastern") + + assert val.hour == 3 + assert exp.hour == 3 + assert val == exp # same UTC value + tm.assert_index_equal(rng[:2], rng2) + + # Right before the DST transition + rng = date_range("3/11/2012 00:00", periods=2, freq="H", tz="US/Eastern") + rng2 = DatetimeIndex( + ["3/11/2012 00:00", "3/11/2012 01:00"], tz="US/Eastern", freq="H" + ) + tm.assert_index_equal(rng, rng2) + exp = Timestamp("3/11/2012 00:00", tz="US/Eastern") + assert exp.hour == 0 + assert rng[0] == exp + exp = Timestamp("3/11/2012 01:00", tz="US/Eastern") + assert exp.hour == 1 + assert rng[1] == exp + + rng = date_range("3/11/2012 00:00", periods=10, freq="H", tz="US/Eastern") + assert rng[2].hour == 3 + + def test_timestamp_equality_different_timezones(self): + utc_range = date_range("1/1/2000", periods=20, tz="UTC") + eastern_range = utc_range.tz_convert("US/Eastern") + berlin_range = utc_range.tz_convert("Europe/Berlin") + + for a, b, c in zip(utc_range, eastern_range, berlin_range): + assert a == b + assert b == c + assert a == c + + assert (utc_range == eastern_range).all() + assert (utc_range == berlin_range).all() + assert (berlin_range == eastern_range).all() + + def test_dti_intersection(self): + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + + left = rng[10:90][::-1] + right = rng[20:80][::-1] + + assert left.tz == rng.tz + result = left.intersection(right) + assert result.tz == left.tz + + def test_dti_equals_with_tz(self): + left = date_range("1/1/2011", periods=100, freq="H", tz="utc") + right = date_range("1/1/2011", periods=100, freq="H", tz="US/Eastern") + + assert not left.equals(right) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_tz_nat(self, tzstr): + idx = DatetimeIndex([Timestamp("2013-1-1", tz=tzstr), pd.NaT]) + + assert isna(idx[1]) + assert idx[0].tzinfo is not None + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_astype_asobject_tzinfos(self, tzstr): + # GH#1345 + + # dates around a dst transition + rng = date_range("2/13/2010", "5/6/2010", tz=tzstr) + + objs = rng.astype(object) + for i, x in enumerate(objs): + exval = rng[i] + assert x == exval + assert x.tzinfo == exval.tzinfo + + objs = rng.astype(object) + for i, x in enumerate(objs): + exval = rng[i] + assert x == exval + assert x.tzinfo == exval.tzinfo + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_with_timezone_repr(self, tzstr): + rng = date_range("4/13/2010", "5/6/2010") + + rng_eastern = rng.tz_localize(tzstr) + + rng_repr = repr(rng_eastern) + assert "2010-04-13 00:00:00" in rng_repr + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_take_dont_lose_meta(self, tzstr): + rng = date_range("1/1/2000", periods=20, tz=tzstr) + + result = rng.take(range(5)) + assert result.tz == rng.tz + assert result.freq == rng.freq + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_utc_box_timestamp_and_localize(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range("3/11/2012", "3/12/2012", freq="H", tz="utc") + rng_eastern = rng.tz_convert(tzstr) + + expected = rng[-1].astimezone(tz) + + stamp = rng_eastern[-1] + assert stamp == expected + assert stamp.tzinfo == expected.tzinfo + + # right tzinfo + rng = date_range("3/13/2012", "3/14/2012", freq="H", tz="utc") + rng_eastern = rng.tz_convert(tzstr) + # test not valid for dateutil timezones. + # assert 'EDT' in repr(rng_eastern[0].tzinfo) + assert "EDT" in repr(rng_eastern[0].tzinfo) or "tzfile" in repr( + rng_eastern[0].tzinfo + ) + + def test_dti_to_pydatetime(self): + dt = dateutil.parser.parse("2012-06-13T01:39:00Z") + dt = dt.replace(tzinfo=tzlocal()) + + arr = np.array([dt], dtype=object) + + result = to_datetime(arr, utc=True) + assert result.tz is pytz.utc + + rng = date_range("2012-11-03 03:00", "2012-11-05 03:00", tz=tzlocal()) + arr = rng.to_pydatetime() + result = to_datetime(arr, utc=True) + assert result.tz is pytz.utc + + def test_dti_to_pydatetime_fizedtz(self): + dates = np.array( + [ + datetime(2000, 1, 1, tzinfo=fixed_off), + datetime(2000, 1, 2, tzinfo=fixed_off), + datetime(2000, 1, 3, tzinfo=fixed_off), + ] + ) + dti = DatetimeIndex(dates) + + result = dti.to_pydatetime() + tm.assert_numpy_array_equal(dates, result) + + result = dti._mpl_repr() + tm.assert_numpy_array_equal(dates, result) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Central"), gettz("US/Central")]) + def test_with_tz(self, tz): + # just want it to work + start = datetime(2011, 3, 12, tzinfo=pytz.utc) + dr = bdate_range(start, periods=50, freq=pd.offsets.Hour()) + assert dr.tz is pytz.utc + + # DateRange with naive datetimes + dr = bdate_range("1/1/2005", "1/1/2009", tz=pytz.utc) + dr = bdate_range("1/1/2005", "1/1/2009", tz=tz) + + # normalized + central = dr.tz_convert(tz) + assert central.tz is tz + naive = central[0].to_pydatetime().replace(tzinfo=None) + comp = conversion.localize_pydatetime(naive, tz).tzinfo + assert central[0].tz is comp + + # compare vs a localized tz + naive = dr[0].to_pydatetime().replace(tzinfo=None) + comp = conversion.localize_pydatetime(naive, tz).tzinfo + assert central[0].tz is comp + + # datetimes with tzinfo set + dr = bdate_range( + datetime(2005, 1, 1, tzinfo=pytz.utc), datetime(2009, 1, 1, tzinfo=pytz.utc) + ) + msg = "Start and end cannot both be tz-aware with different timezones" + with pytest.raises(Exception, match=msg): + bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), "1/1/2009", tz=tz) + + @pytest.mark.parametrize("prefix", ["", "dateutil/"]) + def test_field_access_localize(self, prefix): + strdates = ["1/1/2012", "3/1/2012", "4/1/2012"] + rng = DatetimeIndex(strdates, tz=prefix + "US/Eastern") + assert (rng.hour == 0).all() + + # a more unusual time zone, #1946 + dr = date_range( + "2011-10-02 00:00", freq="h", periods=10, tz=prefix + "America/Atikokan" + ) + + expected = Index(np.arange(10, dtype=np.int64)) + tm.assert_index_equal(dr.hour, expected) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_convert_tz_aware_datetime_datetime(self, tz): + # GH#1581 + dates = [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)] + + dates_aware = [conversion.localize_pydatetime(x, tz) for x in dates] + result = DatetimeIndex(dates_aware) + assert timezones.tz_compare(result.tz, tz) + + converted = to_datetime(dates_aware, utc=True) + ex_vals = np.array([Timestamp(x).value for x in dates_aware]) + tm.assert_numpy_array_equal(converted.asi8, ex_vals) + assert converted.tz is pytz.utc + + def test_dti_union_aware(self): + # non-overlapping + rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", tz="US/Central") + + rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="H", tz="US/Eastern") + + with tm.assert_produces_warning(FutureWarning): + # # GH#39328 will cast both to UTC + result = rng.union(rng2) + + expected = rng.astype("O").union(rng2.astype("O")) + tm.assert_index_equal(result, expected) + assert result[0].tz.zone == "US/Central" + assert result[-1].tz.zone == "US/Eastern" + + def test_dti_union_mixed(self): + # GH 21671 + rng = DatetimeIndex([Timestamp("2011-01-01"), pd.NaT]) + rng2 = DatetimeIndex(["2012-01-01", "2012-01-02"], tz="Asia/Tokyo") + result = rng.union(rng2) + expected = Index( + [ + Timestamp("2011-01-01"), + pd.NaT, + Timestamp("2012-01-01", tz="Asia/Tokyo"), + Timestamp("2012-01-02", tz="Asia/Tokyo"), + ], + dtype=object, + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "tz", [None, "UTC", "US/Central", dateutil.tz.tzoffset(None, -28800)] + ) + def test_iteration_preserves_nanoseconds(self, tz): + # GH 19603 + index = DatetimeIndex( + ["2018-02-08 15:00:00.168456358", "2018-02-08 15:00:00.168456359"], tz=tz + ) + for i, ts in enumerate(index): + assert ts == index[i] + + +def test_tz_localize_invalidates_freq(): + # we only preserve freq in unambiguous cases + + # if localized to US/Eastern, this crosses a DST transition + dti = date_range("2014-03-08 23:00", "2014-03-09 09:00", freq="H") + assert dti.freq == "H" + + result = dti.tz_localize(None) # no-op + assert result.freq == "H" + + result = dti.tz_localize("UTC") # unambiguous freq preservation + assert result.freq == "H" + + result = dti.tz_localize("US/Eastern", nonexistent="shift_forward") + assert result.freq is None + assert result.inferred_freq is None # i.e. we are not _too_ strict here + + # Case where we _can_ keep freq because we're length==1 + dti2 = dti[:1] + result = dti2.tz_localize("US/Eastern") + assert result.freq == "H" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_unique.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_unique.py new file mode 100644 index 0000000..68ac770 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/datetimes/test_unique.py @@ -0,0 +1,77 @@ +from datetime import ( + datetime, + timedelta, +) + +from pandas import ( + DatetimeIndex, + NaT, + Timestamp, +) +import pandas._testing as tm + + +def test_unique(tz_naive_fixture): + + idx = DatetimeIndex(["2017"] * 2, tz=tz_naive_fixture) + expected = idx[:1] + + result = idx.unique() + tm.assert_index_equal(result, expected) + # GH#21737 + # Ensure the underlying data is consistent + assert result[0] == expected[0] + + +def test_index_unique(rand_series_with_duplicate_datetimeindex): + dups = rand_series_with_duplicate_datetimeindex + index = dups.index + + uniques = index.unique() + expected = DatetimeIndex( + [ + datetime(2000, 1, 2), + datetime(2000, 1, 3), + datetime(2000, 1, 4), + datetime(2000, 1, 5), + ] + ) + assert uniques.dtype == "M8[ns]" # sanity + tm.assert_index_equal(uniques, expected) + assert index.nunique() == 4 + + # GH#2563 + assert isinstance(uniques, DatetimeIndex) + + dups_local = index.tz_localize("US/Eastern") + dups_local.name = "foo" + result = dups_local.unique() + expected = DatetimeIndex(expected, name="foo") + expected = expected.tz_localize("US/Eastern") + assert result.tz is not None + assert result.name == "foo" + tm.assert_index_equal(result, expected) + + +def test_index_unique2(): + # NaT, note this is excluded + arr = [1370745748 + t for t in range(20)] + [NaT.value] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + assert idx.nunique() == 20 + assert idx.nunique(dropna=False) == 21 + + +def test_index_unique3(): + arr = [ + Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20) + ] + [NaT] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + assert idx.nunique() == 20 + assert idx.nunique(dropna=False) == 21 + + +def test_is_unique_monotonic(rand_series_with_duplicate_datetimeindex): + index = rand_series_with_duplicate_datetimeindex.index + assert not index.is_unique diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..272f4a1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..9742dde Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_base.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_base.cpython-38.pyc new file mode 100644 index 0000000..82bea17 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..e3d2c51 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_equals.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_equals.cpython-38.pyc new file mode 100644 index 0000000..bf503be Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_equals.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_formats.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_formats.cpython-38.pyc new file mode 100644 index 0000000..a6c7ce5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_formats.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..5855e91 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_interval.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_interval.cpython-38.pyc new file mode 100644 index 0000000..15d8920 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_interval.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_interval_range.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_interval_range.cpython-38.pyc new file mode 100644 index 0000000..ec95ee1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_interval_range.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_interval_tree.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_interval_tree.cpython-38.pyc new file mode 100644 index 0000000..bec5726 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_interval_tree.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_join.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_join.cpython-38.pyc new file mode 100644 index 0000000..0549cb6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_join.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_pickle.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_pickle.cpython-38.pyc new file mode 100644 index 0000000..b246d98 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_pickle.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_setops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_setops.cpython-38.pyc new file mode 100644 index 0000000..49bcf0f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/__pycache__/test_setops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_astype.py new file mode 100644 index 0000000..6d5eda9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_astype.py @@ -0,0 +1,240 @@ +import re + +import numpy as np +import pytest + +from pandas.compat import is_platform_arm + +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + IntervalDtype, +) + +from pandas import ( + CategoricalIndex, + Index, + IntervalIndex, + NaT, + Timedelta, + Timestamp, + interval_range, +) +import pandas._testing as tm + + +class AstypeTests: + """Tests common to IntervalIndex with any subtype""" + + def test_astype_idempotent(self, index): + result = index.astype("interval") + tm.assert_index_equal(result, index) + + result = index.astype(index.dtype) + tm.assert_index_equal(result, index) + + def test_astype_object(self, index): + result = index.astype(object) + expected = Index(index.values, dtype="object") + tm.assert_index_equal(result, expected) + assert not result.equals(index) + + def test_astype_category(self, index): + result = index.astype("category") + expected = CategoricalIndex(index.values) + tm.assert_index_equal(result, expected) + + result = index.astype(CategoricalDtype()) + tm.assert_index_equal(result, expected) + + # non-default params + categories = index.dropna().unique().values[:-1] + dtype = CategoricalDtype(categories=categories, ordered=True) + result = index.astype(dtype) + expected = CategoricalIndex(index.values, categories=categories, ordered=True) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [ + "int64", + "uint64", + "float64", + "complex128", + "period[M]", + "timedelta64", + "timedelta64[ns]", + "datetime64", + "datetime64[ns]", + "datetime64[ns, US/Eastern]", + ], + ) + def test_astype_cannot_cast(self, index, dtype): + msg = "Cannot cast IntervalIndex to dtype" + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + def test_astype_invalid_dtype(self, index): + msg = "data type [\"']fake_dtype[\"'] not understood" + with pytest.raises(TypeError, match=msg): + index.astype("fake_dtype") + + +class TestIntSubtype(AstypeTests): + """Tests specific to IntervalIndex with integer-like subtype""" + + indexes = [ + IntervalIndex.from_breaks(np.arange(-10, 11, dtype="int64")), + IntervalIndex.from_breaks(np.arange(100, dtype="uint64"), closed="left"), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize( + "subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"] + ) + def test_subtype_conversion(self, index, subtype): + dtype = IntervalDtype(subtype, index.closed) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype), index.right.astype(subtype), closed=index.closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "subtype_start, subtype_end", [("int64", "uint64"), ("uint64", "int64")] + ) + def test_subtype_integer(self, subtype_start, subtype_end): + index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start)) + dtype = IntervalDtype(subtype_end, index.closed) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype_end), + index.right.astype(subtype_end), + closed=index.closed, + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.xfail(reason="GH#15832") + def test_subtype_integer_errors(self): + # int64 -> uint64 fails with negative values + index = interval_range(-10, 10) + dtype = IntervalDtype("uint64", "right") + + # Until we decide what the exception message _should_ be, we + # assert something that it should _not_ be. + # We should _not_ be getting a message suggesting that the -10 + # has been wrapped around to a large-positive integer + msg = "^(?!(left side of interval must be <= right side))" + with pytest.raises(ValueError, match=msg): + index.astype(dtype) + + +class TestFloatSubtype(AstypeTests): + """Tests specific to IntervalIndex with float subtype""" + + indexes = [ + interval_range(-10.0, 10.0, closed="neither"), + IntervalIndex.from_arrays( + [-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both" + ), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize("subtype", ["int64", "uint64"]) + def test_subtype_integer(self, subtype): + index = interval_range(0.0, 10.0) + dtype = IntervalDtype(subtype, "right") + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype), index.right.astype(subtype), closed=index.closed + ) + tm.assert_index_equal(result, expected) + + # raises with NA + msg = r"Cannot convert non-finite values \(NA or inf\) to integer" + with pytest.raises(ValueError, match=msg): + index.insert(0, np.nan).astype(dtype) + + @pytest.mark.parametrize("subtype", ["int64", "uint64"]) + def test_subtype_integer_with_non_integer_borders(self, subtype): + index = interval_range(0.0, 3.0, freq=0.25) + dtype = IntervalDtype(subtype, "right") + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype), index.right.astype(subtype), closed=index.closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.xfail(is_platform_arm(), reason="GH 41740") + def test_subtype_integer_errors(self): + # float64 -> uint64 fails with negative values + index = interval_range(-10.0, 10.0) + dtype = IntervalDtype("uint64", "right") + msg = re.escape( + "Cannot convert interval[float64, right] to interval[uint64, right]; " + "subtypes are incompatible" + ) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + @pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"]) + def test_subtype_datetimelike(self, index, subtype): + dtype = IntervalDtype(subtype, "right") + msg = "Cannot convert .* to .*; subtypes are incompatible" + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + +class TestDatetimelikeSubtype(AstypeTests): + """Tests specific to IntervalIndex with datetime-like subtype""" + + indexes = [ + interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"), + interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT), + interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10), + interval_range(Timedelta("0 days"), periods=10, closed="both"), + interval_range(Timedelta("0 days"), periods=10).insert(2, NaT), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize("subtype", ["int64", "uint64"]) + def test_subtype_integer(self, index, subtype): + dtype = IntervalDtype(subtype, "right") + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype), index.right.astype(subtype), closed=index.closed + ) + tm.assert_index_equal(result, expected) + + def test_subtype_float(self, index): + dtype = IntervalDtype("float64", "right") + msg = "Cannot convert .* to .*; subtypes are incompatible" + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + def test_subtype_datetimelike(self): + # datetime -> timedelta raises + dtype = IntervalDtype("timedelta64[ns]", "right") + msg = "Cannot convert .* to .*; subtypes are incompatible" + + index = interval_range(Timestamp("2018-01-01"), periods=10) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + index = interval_range(Timestamp("2018-01-01", tz="CET"), periods=10) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + # timedelta -> datetime raises + dtype = IntervalDtype("datetime64[ns]", "right") + index = interval_range(Timedelta("0 days"), periods=10) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_base.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_base.py new file mode 100644 index 0000000..c44303a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_base.py @@ -0,0 +1,71 @@ +import numpy as np +import pytest + +from pandas import IntervalIndex +import pandas._testing as tm +from pandas.tests.indexes.common import Base + + +class TestBase(Base): + """ + Tests specific to the shared common index tests; unrelated tests should be placed + in test_interval.py or the specific test file (e.g. test_astype.py) + """ + + _index_cls = IntervalIndex + + @pytest.fixture + def simple_index(self) -> IntervalIndex: + return self._index_cls.from_breaks(range(11), closed="right") + + @pytest.fixture + def index(self): + return tm.makeIntervalIndex(10) + + def create_index(self, *, closed="right"): + return IntervalIndex.from_breaks(range(11), closed=closed) + + def test_repr_max_seq_item_setting(self): + # override base test: not a valid repr as we use interval notation + pass + + def test_repr_roundtrip(self): + # override base test: not a valid repr as we use interval notation + pass + + def test_take(self, closed): + index = self.create_index(closed=closed) + + result = index.take(range(10)) + tm.assert_index_equal(result, index) + + result = index.take([0, 0, 1]) + expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed) + tm.assert_index_equal(result, expected) + + def test_where(self, simple_index, listlike_box): + klass = listlike_box + + idx = simple_index + cond = [True] * len(idx) + expected = idx + result = expected.where(klass(cond)) + tm.assert_index_equal(result, expected) + + cond = [False] + [True] * len(idx[1:]) + expected = IntervalIndex([np.nan] + idx[1:].tolist()) + result = idx.where(klass(cond)) + tm.assert_index_equal(result, expected) + + def test_getitem_2d_deprecated(self, simple_index): + # GH#30588 multi-dim indexing is deprecated, but raising is also acceptable + idx = simple_index + with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): + with tm.assert_produces_warning(FutureWarning): + idx[:, None] + with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): + # GH#44051 + idx[True] + with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): + # GH#44051 + idx[False] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_constructors.py new file mode 100644 index 0000000..a71a8f9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_constructors.py @@ -0,0 +1,473 @@ +from functools import partial + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_categorical_dtype +from pandas.core.dtypes.dtypes import IntervalDtype + +from pandas import ( + Categorical, + CategoricalIndex, + Index, + Interval, + IntervalIndex, + date_range, + notna, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, +) +from pandas.core.arrays import IntervalArray +import pandas.core.common as com + + +@pytest.fixture(params=[None, "foo"]) +def name(request): + return request.param + + +class ConstructorTests: + """ + Common tests for all variations of IntervalIndex construction. Input data + to be supplied in breaks format, then converted by the subclass method + get_kwargs_from_breaks to the expected format. + """ + + @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") + @pytest.mark.parametrize( + "breaks", + [ + [3, 14, 15, 92, 653], + np.arange(10, dtype="int64"), + Int64Index(range(-10, 11)), + Float64Index(np.arange(20, 30, 0.5)), + date_range("20180101", periods=10), + date_range("20180101", periods=10, tz="US/Eastern"), + timedelta_range("1 day", periods=10), + ], + ) + def test_constructor(self, constructor, breaks, closed, name): + result_kwargs = self.get_kwargs_from_breaks(breaks, closed) + result = constructor(closed=closed, name=name, **result_kwargs) + + assert result.closed == closed + assert result.name == name + assert result.dtype.subtype == getattr(breaks, "dtype", "int64") + tm.assert_index_equal(result.left, Index(breaks[:-1])) + tm.assert_index_equal(result.right, Index(breaks[1:])) + + @pytest.mark.parametrize( + "breaks, subtype", + [ + (Int64Index([0, 1, 2, 3, 4]), "float64"), + (Int64Index([0, 1, 2, 3, 4]), "datetime64[ns]"), + (Int64Index([0, 1, 2, 3, 4]), "timedelta64[ns]"), + (Float64Index([0, 1, 2, 3, 4]), "int64"), + (date_range("2017-01-01", periods=5), "int64"), + (timedelta_range("1 day", periods=5), "int64"), + ], + ) + def test_constructor_dtype(self, constructor, breaks, subtype): + # GH 19262: conversion via dtype parameter + expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype)) + expected = constructor(**expected_kwargs) + + result_kwargs = self.get_kwargs_from_breaks(breaks) + iv_dtype = IntervalDtype(subtype, "right") + for dtype in (iv_dtype, str(iv_dtype)): + result = constructor(dtype=dtype, **result_kwargs) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "breaks", + [ + Int64Index([0, 1, 2, 3, 4]), + Int64Index([0, 1, 2, 3, 4]), + Int64Index([0, 1, 2, 3, 4]), + Float64Index([0, 1, 2, 3, 4]), + date_range("2017-01-01", periods=5), + timedelta_range("1 day", periods=5), + ], + ) + def test_constructor_pass_closed(self, constructor, breaks): + # not passing closed to IntervalDtype, but to IntervalArray constructor + warn = None + if isinstance(constructor, partial) and constructor.func is Index: + # passing kwargs to Index is deprecated + warn = FutureWarning + + iv_dtype = IntervalDtype(breaks.dtype) + + result_kwargs = self.get_kwargs_from_breaks(breaks) + + for dtype in (iv_dtype, str(iv_dtype)): + with tm.assert_produces_warning(warn): + + result = constructor(dtype=dtype, closed="left", **result_kwargs) + assert result.dtype.closed == "left" + + @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") + @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) + def test_constructor_nan(self, constructor, breaks, closed): + # GH 18421 + result_kwargs = self.get_kwargs_from_breaks(breaks) + result = constructor(closed=closed, **result_kwargs) + + expected_subtype = np.float64 + expected_values = np.array(breaks[:-1], dtype=object) + + assert result.closed == closed + assert result.dtype.subtype == expected_subtype + tm.assert_numpy_array_equal(np.array(result), expected_values) + + @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") + @pytest.mark.parametrize( + "breaks", + [ + [], + np.array([], dtype="int64"), + np.array([], dtype="float64"), + np.array([], dtype="datetime64[ns]"), + np.array([], dtype="timedelta64[ns]"), + ], + ) + def test_constructor_empty(self, constructor, breaks, closed): + # GH 18421 + result_kwargs = self.get_kwargs_from_breaks(breaks) + result = constructor(closed=closed, **result_kwargs) + + expected_values = np.array([], dtype=object) + expected_subtype = getattr(breaks, "dtype", np.int64) + + assert result.empty + assert result.closed == closed + assert result.dtype.subtype == expected_subtype + tm.assert_numpy_array_equal(np.array(result), expected_values) + + @pytest.mark.parametrize( + "breaks", + [ + tuple("0123456789"), + list("abcdefghij"), + np.array(list("abcdefghij"), dtype=object), + np.array(list("abcdefghij"), dtype=" Interval(0.5, 1.5) + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == self.index + expected = np.array([True, True]) + tm.assert_numpy_array_equal(actual, expected) + actual = self.index <= self.index + tm.assert_numpy_array_equal(actual, expected) + actual = self.index >= self.index + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index < self.index + expected = np.array([False, False]) + tm.assert_numpy_array_equal(actual, expected) + actual = self.index > self.index + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == self.index.values + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index.values == self.index + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index <= self.index.values + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index != self.index.values + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index > self.index.values + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index.values > self.index + tm.assert_numpy_array_equal(actual, np.array([False, False])) + + # invalid comparisons + actual = self.index == 0 + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index == self.index.left + tm.assert_numpy_array_equal(actual, np.array([False, False])) + + msg = "|".join( + [ + "not supported between instances of 'int' and '.*.Interval'", + r"Invalid comparison between dtype=interval\[int64, right\] and ", + ] + ) + with pytest.raises(TypeError, match=msg): + self.index > 0 + with pytest.raises(TypeError, match=msg): + self.index <= 0 + with pytest.raises(TypeError, match=msg): + self.index > np.arange(2) + + msg = "Lengths must match to compare" + with pytest.raises(ValueError, match=msg): + self.index > np.arange(3) + + def test_missing_values(self, closed): + idx = Index( + [np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)] + ) + idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) + assert idx.equals(idx2) + + msg = ( + "missing values must be missing in the same location both left " + "and right sides" + ) + with pytest.raises(ValueError, match=msg): + IntervalIndex.from_arrays( + [np.nan, 0, 1], np.array([0, 1, 2]), closed=closed + ) + + tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) + + def test_sort_values(self, closed): + index = self.create_index(closed=closed) + + result = index.sort_values() + tm.assert_index_equal(result, index) + + result = index.sort_values(ascending=False) + tm.assert_index_equal(result, index[::-1]) + + # with nan + index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) + + result = index.sort_values() + expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) + tm.assert_index_equal(result, expected) + + result = index.sort_values(ascending=False, na_position="first") + expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + def test_datetime(self, tz): + start = Timestamp("2000-01-01", tz=tz) + dates = date_range(start=start, periods=10) + index = IntervalIndex.from_breaks(dates) + + # test mid + start = Timestamp("2000-01-01T12:00", tz=tz) + expected = date_range(start=start, periods=9) + tm.assert_index_equal(index.mid, expected) + + # __contains__ doesn't check individual points + assert Timestamp("2000-01-01", tz=tz) not in index + assert Timestamp("2000-01-01T12", tz=tz) not in index + assert Timestamp("2000-01-02", tz=tz) not in index + iv_true = Interval( + Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz) + ) + iv_false = Interval( + Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz) + ) + assert iv_true in index + assert iv_false not in index + + # .contains does check individual points + assert not index.contains(Timestamp("2000-01-01", tz=tz)).any() + assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any() + assert index.contains(Timestamp("2000-01-02", tz=tz)).any() + + # test get_indexer + start = Timestamp("1999-12-31T12:00", tz=tz) + target = date_range(start=start, periods=7, freq="12H") + actual = index.get_indexer(target) + expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp") + tm.assert_numpy_array_equal(actual, expected) + + start = Timestamp("2000-01-08T18:00", tz=tz) + target = date_range(start=start, periods=7, freq="6H") + actual = index.get_indexer(target) + expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp") + tm.assert_numpy_array_equal(actual, expected) + + def test_append(self, closed): + + index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) + index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) + + result = index1.append(index2) + expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) + tm.assert_index_equal(result, expected) + + result = index1.append([index1, index2]) + expected = IntervalIndex.from_arrays( + [0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed + ) + tm.assert_index_equal(result, expected) + + for other_closed in {"left", "right", "both", "neither"} - {closed}: + index_other_closed = IntervalIndex.from_arrays( + [0, 1], [1, 2], closed=other_closed + ) + result = index1.append(index_other_closed) + expected = index1.astype(object).append(index_other_closed.astype(object)) + tm.assert_index_equal(result, expected) + + def test_is_non_overlapping_monotonic(self, closed): + # Should be True in all cases + tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is True + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is True + + # Should be False in all cases (overlapping) + tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is False + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is False + + # Should be False in all cases (non-monotonic) + tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is False + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is False + + # Should be False for closed='both', otherwise True (GH16560) + if closed == "both": + idx = IntervalIndex.from_breaks(range(4), closed=closed) + assert idx.is_non_overlapping_monotonic is False + else: + idx = IntervalIndex.from_breaks(range(4), closed=closed) + assert idx.is_non_overlapping_monotonic is True + + @pytest.mark.parametrize( + "start, shift, na_value", + [ + (0, 1, np.nan), + (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT), + (Timedelta("0 days"), Timedelta("1 day"), pd.NaT), + ], + ) + def test_is_overlapping(self, start, shift, na_value, closed): + # GH 23309 + # see test_interval_tree.py for extensive tests; interface tests here + + # non-overlapping + tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is False + + # non-overlapping with NA + tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is False + + # overlapping + tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is True + + # overlapping with NA + tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is True + + # common endpoints + tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + result = index.is_overlapping + expected = closed == "both" + assert result is expected + + # common endpoints with NA + tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + result = index.is_overlapping + assert result is expected + + @pytest.mark.parametrize( + "tuples", + [ + list(zip(range(10), range(1, 11))), + list( + zip( + date_range("20170101", periods=10), + date_range("20170101", periods=10), + ) + ), + list( + zip( + timedelta_range("0 days", periods=10), + timedelta_range("1 day", periods=10), + ) + ), + ], + ) + def test_to_tuples(self, tuples): + # GH 18756 + idx = IntervalIndex.from_tuples(tuples) + result = idx.to_tuples() + expected = Index(com.asarray_tuplesafe(tuples)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "tuples", + [ + list(zip(range(10), range(1, 11))) + [np.nan], + list( + zip( + date_range("20170101", periods=10), + date_range("20170101", periods=10), + ) + ) + + [np.nan], + list( + zip( + timedelta_range("0 days", periods=10), + timedelta_range("1 day", periods=10), + ) + ) + + [np.nan], + ], + ) + @pytest.mark.parametrize("na_tuple", [True, False]) + def test_to_tuples_na(self, tuples, na_tuple): + # GH 18756 + idx = IntervalIndex.from_tuples(tuples) + result = idx.to_tuples(na_tuple=na_tuple) + + # check the non-NA portion + expected_notna = Index(com.asarray_tuplesafe(tuples[:-1])) + result_notna = result[:-1] + tm.assert_index_equal(result_notna, expected_notna) + + # check the NA portion + result_na = result[-1] + if na_tuple: + assert isinstance(result_na, tuple) + assert len(result_na) == 2 + assert all(isna(x) for x in result_na) + else: + assert isna(result_na) + + def test_nbytes(self): + # GH 19209 + left = np.arange(0, 4, dtype="i8") + right = np.arange(1, 5, dtype="i8") + + result = IntervalIndex.from_arrays(left, right).nbytes + expected = 64 # 4 * 8 * 2 + assert result == expected + + @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) + def test_set_closed(self, name, closed, new_closed): + # GH 21670 + index = interval_range(0, 5, closed=closed, name=name) + result = index.set_closed(new_closed) + expected = interval_range(0, 5, closed=new_closed, name=name) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False]) + def test_set_closed_errors(self, bad_closed): + # GH 21670 + index = interval_range(0, 5) + msg = f"invalid option for 'closed': {bad_closed}" + with pytest.raises(ValueError, match=msg): + index.set_closed(bad_closed) + + def test_is_all_dates(self): + # GH 23576 + year_2017 = Interval( + Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00") + ) + year_2017_index = IntervalIndex([year_2017]) + assert not year_2017_index._is_all_dates + + +def test_dir(): + # GH#27571 dir(interval_index) should not raise + index = IntervalIndex.from_arrays([0, 1], [1, 2]) + result = dir(index) + assert "str" not in result + + +def test_searchsorted_different_argument_classes(listlike_box): + # https://github.com/pandas-dev/pandas/issues/32762 + values = IntervalIndex([Interval(0, 1), Interval(1, 2)]) + result = values.searchsorted(listlike_box(values)) + expected = np.array([0, 1], dtype=result.dtype) + tm.assert_numpy_array_equal(result, expected) + + result = values._data.searchsorted(listlike_box(values)) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2] +) +def test_searchsorted_invalid_argument(arg): + values = IntervalIndex([Interval(0, 1), Interval(1, 2)]) + msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and " + with pytest.raises(TypeError, match=msg): + values.searchsorted(arg) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_interval_range.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_interval_range.py new file mode 100644 index 0000000..2f28c33 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_interval_range.py @@ -0,0 +1,355 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_integer + +from pandas import ( + DateOffset, + Interval, + IntervalIndex, + Timedelta, + Timestamp, + date_range, + interval_range, + timedelta_range, +) +import pandas._testing as tm + +from pandas.tseries.offsets import Day + + +@pytest.fixture(scope="class", params=[None, "foo"]) +def name(request): + return request.param + + +class TestIntervalRange: + @pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)]) + def test_constructor_numeric(self, closed, name, freq, periods): + start, end = 0, 100 + breaks = np.arange(101, step=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # GH 20976: linspace behavior defined from start/end/periods + result = interval_range( + start=start, end=end, periods=periods, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + @pytest.mark.parametrize( + "freq, periods", [("D", 364), ("2D", 182), ("22D18H", 16), ("M", 11)] + ) + def test_constructor_timestamp(self, closed, name, freq, periods, tz): + start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz) + breaks = date_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # GH 20976: linspace behavior defined from start/end/periods + if not breaks.freq.is_anchored() and tz is None: + # matches expected only for non-anchored offsets and tz naive + # (anchored/DST transitions cause unequal spacing in expected) + result = interval_range( + start=start, end=end, periods=periods, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "freq, periods", [("D", 100), ("2D12H", 40), ("5D", 20), ("25D", 4)] + ) + def test_constructor_timedelta(self, closed, name, freq, periods): + start, end = Timedelta("0 days"), Timedelta("100 days") + breaks = timedelta_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # GH 20976: linspace behavior defined from start/end/periods + result = interval_range( + start=start, end=end, periods=periods, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start, end, freq, expected_endpoint", + [ + (0, 10, 3, 9), + (0, 10, 1.5, 9), + (0.5, 10, 3, 9.5), + (Timedelta("0D"), Timedelta("10D"), "2D4H", Timedelta("8D16H")), + ( + Timestamp("2018-01-01"), + Timestamp("2018-02-09"), + "MS", + Timestamp("2018-02-01"), + ), + ( + Timestamp("2018-01-01", tz="US/Eastern"), + Timestamp("2018-01-20", tz="US/Eastern"), + "5D12H", + Timestamp("2018-01-17 12:00:00", tz="US/Eastern"), + ), + ], + ) + def test_early_truncation(self, start, end, freq, expected_endpoint): + # index truncates early if freq causes end to be skipped + result = interval_range(start=start, end=end, freq=freq) + result_endpoint = result.right[-1] + assert result_endpoint == expected_endpoint + + @pytest.mark.parametrize( + "start, end, freq", + [(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)], + ) + def test_no_invalid_float_truncation(self, start, end, freq): + # GH 21161 + if freq is None: + breaks = [0.5, 1.5, 2.5, 3.5, 4.5] + else: + breaks = [0.5, 2.0, 3.5, 5.0, 6.5] + expected = IntervalIndex.from_breaks(breaks) + + result = interval_range(start=start, end=end, periods=4, freq=freq) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start, mid, end", + [ + ( + Timestamp("2018-03-10", tz="US/Eastern"), + Timestamp("2018-03-10 23:30:00", tz="US/Eastern"), + Timestamp("2018-03-12", tz="US/Eastern"), + ), + ( + Timestamp("2018-11-03", tz="US/Eastern"), + Timestamp("2018-11-04 00:30:00", tz="US/Eastern"), + Timestamp("2018-11-05", tz="US/Eastern"), + ), + ], + ) + def test_linspace_dst_transition(self, start, mid, end): + # GH 20976: linspace behavior defined from start/end/periods + # accounts for the hour gained/lost during DST transition + result = interval_range(start=start, end=end, periods=2) + expected = IntervalIndex.from_breaks([start, mid, end]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("freq", [2, 2.0]) + @pytest.mark.parametrize("end", [10, 10.0]) + @pytest.mark.parametrize("start", [0, 0.0]) + def test_float_subtype(self, start, end, freq): + # Has float subtype if any of start/end/freq are float, even if all + # resulting endpoints can safely be upcast to integers + + # defined from start/end/freq + index = interval_range(start=start, end=end, freq=freq) + result = index.dtype.subtype + expected = "int64" if is_integer(start + end + freq) else "float64" + assert result == expected + + # defined from start/periods/freq + index = interval_range(start=start, periods=5, freq=freq) + result = index.dtype.subtype + expected = "int64" if is_integer(start + freq) else "float64" + assert result == expected + + # defined from end/periods/freq + index = interval_range(end=end, periods=5, freq=freq) + result = index.dtype.subtype + expected = "int64" if is_integer(end + freq) else "float64" + assert result == expected + + # GH 20976: linspace behavior defined from start/end/periods + index = interval_range(start=start, end=end, periods=5) + result = index.dtype.subtype + expected = "int64" if is_integer(start + end) else "float64" + assert result == expected + + def test_constructor_coverage(self): + # float value for periods + expected = interval_range(start=0, periods=10) + result = interval_range(start=0, periods=10.5) + tm.assert_index_equal(result, expected) + + # equivalent timestamp-like start/end + start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15") + expected = interval_range(start=start, end=end) + + result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime()) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timestamp + equiv_freq = [ + "D", + Day(), + Timedelta(days=1), + timedelta(days=1), + DateOffset(days=1), + ] + for freq in equiv_freq: + result = interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + # equivalent timedelta-like start/end + start, end = Timedelta(days=1), Timedelta(days=10) + expected = interval_range(start=start, end=end) + + result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta()) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timedelta + equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)] + for freq in equiv_freq: + result = interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ( + "Of the four parameters: start, end, periods, and freq, " + "exactly three must be specified" + ) + + with pytest.raises(ValueError, match=msg): + interval_range(start=0) + + with pytest.raises(ValueError, match=msg): + interval_range(end=5) + + with pytest.raises(ValueError, match=msg): + interval_range(periods=2) + + with pytest.raises(ValueError, match=msg): + interval_range() + + # too many params + with pytest.raises(ValueError, match=msg): + interval_range(start=0, end=5, periods=6, freq=1.5) + + # mixed units + msg = "start, end, freq need to be type compatible" + with pytest.raises(TypeError, match=msg): + interval_range(start=0, end=Timestamp("20130101"), freq=2) + + with pytest.raises(TypeError, match=msg): + interval_range(start=0, end=Timedelta("1 day"), freq=2) + + with pytest.raises(TypeError, match=msg): + interval_range(start=0, end=10, freq="D") + + with pytest.raises(TypeError, match=msg): + interval_range(start=Timestamp("20130101"), end=10, freq="D") + + with pytest.raises(TypeError, match=msg): + interval_range( + start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D" + ) + + with pytest.raises(TypeError, match=msg): + interval_range( + start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2 + ) + + with pytest.raises(TypeError, match=msg): + interval_range(start=Timedelta("1 day"), end=10, freq="D") + + with pytest.raises(TypeError, match=msg): + interval_range( + start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D" + ) + + with pytest.raises(TypeError, match=msg): + interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2) + + # invalid periods + msg = "periods must be a number, got foo" + with pytest.raises(TypeError, match=msg): + interval_range(start=0, periods="foo") + + # invalid start + msg = "start must be numeric or datetime-like, got foo" + with pytest.raises(ValueError, match=msg): + interval_range(start="foo", periods=10) + + # invalid end + msg = r"end must be numeric or datetime-like, got \(0, 1\]" + with pytest.raises(ValueError, match=msg): + interval_range(end=Interval(0, 1), periods=10) + + # invalid freq for datetime-like + msg = "freq must be numeric or convertible to DateOffset, got foo" + with pytest.raises(ValueError, match=msg): + interval_range(start=0, end=10, freq="foo") + + with pytest.raises(ValueError, match=msg): + interval_range(start=Timestamp("20130101"), periods=10, freq="foo") + + with pytest.raises(ValueError, match=msg): + interval_range(end=Timedelta("1 day"), periods=10, freq="foo") + + # mixed tz + start = Timestamp("2017-01-01", tz="US/Eastern") + end = Timestamp("2017-01-07", tz="US/Pacific") + msg = "Start and end cannot both be tz-aware with different timezones" + with pytest.raises(TypeError, match=msg): + interval_range(start=start, end=end) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_interval_tree.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_interval_tree.py new file mode 100644 index 0000000..ab6eac4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_interval_tree.py @@ -0,0 +1,191 @@ +from itertools import permutations + +import numpy as np +import pytest + +from pandas._libs.interval import IntervalTree +from pandas.compat import IS64 + +import pandas._testing as tm + + +def skipif_32bit(param): + """ + Skip parameters in a parametrize on 32bit systems. Specifically used + here to skip leaf_size parameters related to GH 23440. + """ + marks = pytest.mark.skipif(not IS64, reason="GH 23440: int type mismatch on 32bit") + return pytest.param(param, marks=marks) + + +@pytest.fixture(scope="class", params=["int64", "float64", "uint64"]) +def dtype(request): + return request.param + + +@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10]) +def leaf_size(request): + """ + Fixture to specify IntervalTree leaf_size parameter; to be used with the + tree fixture. + """ + return request.param + + +@pytest.fixture( + params=[ + np.arange(5, dtype="int64"), + np.arange(5, dtype="uint64"), + np.arange(5, dtype="float64"), + np.array([0, 1, 2, 3, 4, np.nan], dtype="float64"), + ] +) +def tree(request, leaf_size): + left = request.param + return IntervalTree(left, left + 2, leaf_size=leaf_size) + + +class TestIntervalTree: + def test_get_indexer(self, tree): + result = tree.get_indexer(np.array([1.0, 5.5, 6.5])) + expected = np.array([0, 4, -1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + with pytest.raises( + KeyError, match="'indexer does not intersect a unique set of intervals'" + ): + tree.get_indexer(np.array([3.0])) + + @pytest.mark.parametrize( + "dtype, target_value, target_dtype", + [("int64", 2 ** 63 + 1, "uint64"), ("uint64", -1, "int64")], + ) + def test_get_indexer_overflow(self, dtype, target_value, target_dtype): + left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype) + tree = IntervalTree(left, right) + + result = tree.get_indexer(np.array([target_value], dtype=target_dtype)) + expected = np.array([-1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_non_unique(self, tree): + indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5])) + + result = indexer[:1] + expected = np.array([0], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = np.sort(indexer[1:3]) + expected = np.array([0, 1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = np.sort(indexer[3:]) + expected = np.array([-1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = missing + expected = np.array([2], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, target_value, target_dtype", + [("int64", 2 ** 63 + 1, "uint64"), ("uint64", -1, "int64")], + ) + def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype): + left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype) + tree = IntervalTree(left, right) + target = np.array([target_value], dtype=target_dtype) + + result_indexer, result_missing = tree.get_indexer_non_unique(target) + expected_indexer = np.array([-1], dtype="intp") + tm.assert_numpy_array_equal(result_indexer, expected_indexer) + + expected_missing = np.array([0], dtype="intp") + tm.assert_numpy_array_equal(result_missing, expected_missing) + + def test_duplicates(self, dtype): + left = np.array([0, 0, 0], dtype=dtype) + tree = IntervalTree(left, left + 1) + + with pytest.raises( + KeyError, match="'indexer does not intersect a unique set of intervals'" + ): + tree.get_indexer(np.array([0.5])) + + indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) + result = np.sort(indexer) + expected = np.array([0, 1, 2], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = missing + expected = np.array([], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000] + ) + def test_get_indexer_closed(self, closed, leaf_size): + x = np.arange(1000, dtype="float64") + found = x.astype("intp") + not_found = (-1 * np.ones(1000)).astype("intp") + + tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size) + tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25)) + + expected = found if tree.closed_left else not_found + tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0)) + + expected = found if tree.closed_right else not_found + tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5)) + + @pytest.mark.parametrize( + "left, right, expected", + [ + (np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True), + (np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True), + (np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True), + (np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False), + (np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False), + ], + ) + @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3)))) + def test_is_overlapping(self, closed, order, left, right, expected): + # GH 23309 + tree = IntervalTree(left[order], right[order], closed=closed) + result = tree.is_overlapping + assert result is expected + + @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3)))) + def test_is_overlapping_endpoints(self, closed, order): + """shared endpoints are marked as overlapping""" + # GH 23309 + left, right = np.arange(3, dtype="int64"), np.arange(1, 4) + tree = IntervalTree(left[order], right[order], closed=closed) + result = tree.is_overlapping + expected = closed == "both" + assert result is expected + + @pytest.mark.parametrize( + "left, right", + [ + (np.array([], dtype="int64"), np.array([], dtype="int64")), + (np.array([0], dtype="int64"), np.array([1], dtype="int64")), + (np.array([np.nan]), np.array([np.nan])), + (np.array([np.nan] * 3), np.array([np.nan] * 3)), + ], + ) + def test_is_overlapping_trivial(self, closed, left, right): + # GH 23309 + tree = IntervalTree(left, right, closed=closed) + assert tree.is_overlapping is False + + @pytest.mark.skipif(not IS64, reason="GH 23440") + def test_construction_overflow(self): + # GH 25485 + left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101 + tree = IntervalTree(left, right) + + # pivot should be average of left/right medians + result = tree.root.pivot + expected = (50 + np.iinfo(np.int64).max) / 2 + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_join.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_join.py new file mode 100644 index 0000000..2f42c53 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_join.py @@ -0,0 +1,44 @@ +import pytest + +from pandas import ( + IntervalIndex, + MultiIndex, + RangeIndex, +) +import pandas._testing as tm + + +@pytest.fixture +def range_index(): + return RangeIndex(3, name="range_index") + + +@pytest.fixture +def interval_index(): + return IntervalIndex.from_tuples( + [(0.0, 1.0), (1.0, 2.0), (1.5, 2.5)], name="interval_index" + ) + + +def test_join_overlapping_in_mi_to_same_intervalindex(range_index, interval_index): + # GH-45661 + multi_index = MultiIndex.from_product([interval_index, range_index]) + result = multi_index.join(interval_index) + + tm.assert_index_equal(result, multi_index) + + +def test_join_overlapping_to_multiindex_with_same_interval(range_index, interval_index): + # GH-45661 + multi_index = MultiIndex.from_product([interval_index, range_index]) + result = interval_index.join(multi_index) + + tm.assert_index_equal(result, multi_index) + + +def test_join_overlapping_interval_to_another_intervalindex(interval_index): + # GH-45661 + flipped_interval_index = interval_index[::-1] + result = interval_index.join(flipped_interval_index) + + tm.assert_index_equal(result, interval_index) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_pickle.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_pickle.py new file mode 100644 index 0000000..308a90e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_pickle.py @@ -0,0 +1,13 @@ +import pytest + +from pandas import IntervalIndex +import pandas._testing as tm + + +class TestPickle: + @pytest.mark.parametrize("closed", ["left", "right", "both"]) + def test_pickle_round_trip_closed(self, closed): + # https://github.com/pandas-dev/pandas/issues/35658 + idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed) + result = tm.round_trip_pickle(idx) + tm.assert_index_equal(result, idx) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_setops.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_setops.py new file mode 100644 index 0000000..059b0b7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/interval/test_setops.py @@ -0,0 +1,202 @@ +import numpy as np +import pytest + +from pandas import ( + Index, + IntervalIndex, + Timestamp, + interval_range, +) +import pandas._testing as tm + + +def monotonic_index(start, end, dtype="int64", closed="right"): + return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed) + + +def empty_index(dtype="int64", closed="right"): + return IntervalIndex(np.array([], dtype=dtype), closed=closed) + + +class TestIntervalIndex: + def test_union(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + other = monotonic_index(5, 13, closed=closed) + + expected = monotonic_index(0, 13, closed=closed) + result = index[::-1].union(other, sort=sort) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + result = other[::-1].union(index, sort=sort) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + tm.assert_index_equal(index.union(index, sort=sort), index) + tm.assert_index_equal(index.union(index[:1], sort=sort), index) + + def test_union_empty_result(self, closed, sort): + # GH 19101: empty result, same dtype + index = empty_index(dtype="int64", closed=closed) + result = index.union(index, sort=sort) + tm.assert_index_equal(result, index) + + # GH 19101: empty result, different numeric dtypes -> common dtype is f8 + other = empty_index(dtype="float64", closed=closed) + result = index.union(other, sort=sort) + expected = other + tm.assert_index_equal(result, expected) + + other = index.union(index, sort=sort) + tm.assert_index_equal(result, expected) + + other = empty_index(dtype="uint64", closed=closed) + result = index.union(other, sort=sort) + tm.assert_index_equal(result, expected) + + result = other.union(index, sort=sort) + tm.assert_index_equal(result, expected) + + def test_intersection(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + other = monotonic_index(5, 13, closed=closed) + + expected = monotonic_index(5, 11, closed=closed) + result = index[::-1].intersection(other, sort=sort) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + result = other[::-1].intersection(index, sort=sort) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + tm.assert_index_equal(index.intersection(index, sort=sort), index) + + # GH 26225: nested intervals + index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)]) + other = IntervalIndex.from_tuples([(1, 2), (1, 3)]) + expected = IntervalIndex.from_tuples([(1, 2), (1, 3)]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + # GH 26225 + index = IntervalIndex.from_tuples([(0, 3), (0, 2)]) + other = IntervalIndex.from_tuples([(0, 2), (1, 3)]) + expected = IntervalIndex.from_tuples([(0, 2)]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + # GH 26225: duplicate nan element + index = IntervalIndex([np.nan, np.nan]) + other = IntervalIndex([np.nan]) + expected = IntervalIndex([np.nan]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + def test_intersection_empty_result(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + + # GH 19101: empty result, same dtype + other = monotonic_index(300, 314, closed=closed) + expected = empty_index(dtype="int64", closed=closed) + result = index.intersection(other, sort=sort) + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, different numeric dtypes -> common dtype is float64 + other = monotonic_index(300, 314, dtype="float64", closed=closed) + result = index.intersection(other, sort=sort) + expected = other[:0] + tm.assert_index_equal(result, expected) + + other = monotonic_index(300, 314, dtype="uint64", closed=closed) + result = index.intersection(other, sort=sort) + tm.assert_index_equal(result, expected) + + def test_intersection_duplicates(self): + # GH#38743 + index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)]) + other = IntervalIndex.from_tuples([(1, 2), (2, 3)]) + expected = IntervalIndex.from_tuples([(1, 2), (2, 3)]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + def test_difference(self, closed, sort): + index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed) + result = index.difference(index[:1], sort=sort) + expected = index[1:] + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, same dtype + result = index.difference(index, sort=sort) + expected = empty_index(dtype="int64", closed=closed) + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, different dtypes + other = IntervalIndex.from_arrays( + index.left.astype("float64"), index.right, closed=closed + ) + result = index.difference(other, sort=sort) + tm.assert_index_equal(result, expected) + + def test_symmetric_difference(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + result = index[1:].symmetric_difference(index[:-1], sort=sort) + expected = IntervalIndex([index[0], index[-1]]) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + # GH 19101: empty result, same dtype + result = index.symmetric_difference(index, sort=sort) + expected = empty_index(dtype="int64", closed=closed) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + # GH 19101: empty result, different dtypes + other = IntervalIndex.from_arrays( + index.left.astype("float64"), index.right, closed=closed + ) + result = index.symmetric_difference(other, sort=sort) + expected = empty_index(dtype="float64", closed=closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:'<' not supported between:RuntimeWarning") + @pytest.mark.parametrize( + "op_name", ["union", "intersection", "difference", "symmetric_difference"] + ) + def test_set_incompatible_types(self, closed, op_name, sort): + index = monotonic_index(0, 11, closed=closed) + set_op = getattr(index, op_name) + + # TODO: standardize return type of non-union setops type(self vs other) + # non-IntervalIndex + if op_name == "difference": + expected = index + else: + expected = getattr(index.astype("O"), op_name)(Index([1, 2, 3])) + result = set_op(Index([1, 2, 3]), sort=sort) + tm.assert_index_equal(result, expected) + + # mixed closed -> cast to object + for other_closed in {"right", "left", "both", "neither"} - {closed}: + other = monotonic_index(0, 11, closed=other_closed) + expected = getattr(index.astype(object), op_name)(other, sort=sort) + if op_name == "difference": + expected = index + result = set_op(other, sort=sort) + tm.assert_index_equal(result, expected) + + # GH 19016: incompatible dtypes -> cast to object + other = interval_range(Timestamp("20180101"), periods=9, closed=closed) + expected = getattr(index.astype(object), op_name)(other, sort=sort) + if op_name == "difference": + expected = index + result = set_op(other, sort=sort) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..60bbefc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..d918333 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_analytics.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_analytics.cpython-38.pyc new file mode 100644 index 0000000..2b15745 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_analytics.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..9c7a397 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_compat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_compat.cpython-38.pyc new file mode 100644 index 0000000..b7b3e6d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_compat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..d3f4754 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_conversion.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_conversion.cpython-38.pyc new file mode 100644 index 0000000..d1ef7c2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_conversion.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_copy.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_copy.cpython-38.pyc new file mode 100644 index 0000000..6d50023 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_copy.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_drop.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_drop.cpython-38.pyc new file mode 100644 index 0000000..37d10fa Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_drop.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_duplicates.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_duplicates.cpython-38.pyc new file mode 100644 index 0000000..9987a68 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_duplicates.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_equivalence.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_equivalence.cpython-38.pyc new file mode 100644 index 0000000..c0f7f61 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_equivalence.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_formats.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_formats.cpython-38.pyc new file mode 100644 index 0000000..519f0b4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_formats.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_get_level_values.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_get_level_values.cpython-38.pyc new file mode 100644 index 0000000..9de7d87 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_get_level_values.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_get_set.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_get_set.cpython-38.pyc new file mode 100644 index 0000000..0a9e3a1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_get_set.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..050c8dc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_integrity.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_integrity.cpython-38.pyc new file mode 100644 index 0000000..9b91032 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_integrity.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_isin.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_isin.cpython-38.pyc new file mode 100644 index 0000000..02edf82 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_isin.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_join.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_join.cpython-38.pyc new file mode 100644 index 0000000..a82cbfd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_join.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_lexsort.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_lexsort.cpython-38.pyc new file mode 100644 index 0000000..0ec2a39 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_lexsort.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_missing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_missing.cpython-38.pyc new file mode 100644 index 0000000..405bad8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_missing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_monotonic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_monotonic.cpython-38.pyc new file mode 100644 index 0000000..27632b2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_monotonic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_names.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_names.cpython-38.pyc new file mode 100644 index 0000000..0b0f34a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_names.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_partial_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_partial_indexing.cpython-38.pyc new file mode 100644 index 0000000..4e2abe2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_partial_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_pickle.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_pickle.cpython-38.pyc new file mode 100644 index 0000000..dd21817 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_pickle.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_reindex.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_reindex.cpython-38.pyc new file mode 100644 index 0000000..4fa5376 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_reindex.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_reshape.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_reshape.cpython-38.pyc new file mode 100644 index 0000000..0b1e9bd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_reshape.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_setops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_setops.cpython-38.pyc new file mode 100644 index 0000000..109b700 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_setops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_sorting.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_sorting.cpython-38.pyc new file mode 100644 index 0000000..be483c8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_sorting.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_take.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_take.cpython-38.pyc new file mode 100644 index 0000000..bedda2b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/__pycache__/test_take.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/conftest.py new file mode 100644 index 0000000..9d0a2fa --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/conftest.py @@ -0,0 +1,77 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) + + +# Note: identical the the "multi" entry in the top-level "index" fixture +@pytest.fixture +def idx(): + # a MultiIndex used to test the general functionality of the + # general functionality of this object + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + index_names = ["first", "second"] + mi = MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=index_names, + verify_integrity=False, + ) + return mi + + +@pytest.fixture +def idx_dup(): + # compare tests/indexes/multi/conftest.py + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 1, 0, 1, 1]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + index_names = ["first", "second"] + mi = MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=index_names, + verify_integrity=False, + ) + return mi + + +@pytest.fixture +def index_names(): + # names that match those in the idx fixture for testing equality of + # names assigned to the idx + return ["first", "second"] + + +@pytest.fixture +def narrow_multi_index(): + """ + Return a MultiIndex that is narrower than the display (<80 characters). + """ + n = 1000 + ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n)) + dti = pd.date_range("2000-01-01", freq="s", periods=n * 2) + return MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"]) + + +@pytest.fixture +def wide_multi_index(): + """ + Return a MultiIndex that is wider than the display (>80 characters). + """ + n = 1000 + ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n)) + dti = pd.date_range("2000-01-01", freq="s", periods=n * 2) + levels = [ci, ci.codes + 9, dti, dti, dti] + names = ["a", "b", "dti_1", "dti_2", "dti_3"] + return MultiIndex.from_arrays(levels, names=names) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_analytics.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_analytics.py new file mode 100644 index 0000000..629cd7e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_analytics.py @@ -0,0 +1,260 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, + date_range, + period_range, +) +import pandas._testing as tm +from pandas.core.api import UInt64Index + + +def test_shift(idx): + + # GH8083 test the base class for shift + msg = "This method is only implemented for DatetimeIndex, PeriodIndex and " + "TimedeltaIndex; Got type MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1) + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1, 2) + + +def test_groupby(idx): + groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2])) + labels = idx.tolist() + exp = {1: labels[:3], 2: labels[3:]} + tm.assert_dict_equal(groups, exp) + + # GH5620 + groups = idx.groupby(idx) + exp = {key: [key] for key in idx} + tm.assert_dict_equal(groups, exp) + + +def test_truncate_multiindex(): + # GH 34564 for MultiIndex level names check + major_axis = Index(list(range(4))) + minor_axis = Index(list(range(2))) + + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=["L1", "L2"], + ) + + result = index.truncate(before=1) + assert "foo" not in result.levels[0] + assert 1 in result.levels[0] + assert index.names == result.names + + result = index.truncate(after=1) + assert 2 not in result.levels[0] + assert 1 in result.levels[0] + assert index.names == result.names + + result = index.truncate(before=1, after=2) + assert len(result.levels[0]) == 2 + assert index.names == result.names + + msg = "after < before" + with pytest.raises(ValueError, match=msg): + index.truncate(3, 1) + + +# TODO: reshape + + +def test_reorder_levels(idx): + # this blows up + with pytest.raises(IndexError, match="^Too many levels"): + idx.reorder_levels([2, 1, 0]) + + +def test_numpy_repeat(): + reps = 2 + numbers = [1, 2, 3] + names = np.array(["foo", "bar"]) + + m = MultiIndex.from_product([numbers, names], names=names) + expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(np.repeat(m, reps), expected) + + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.repeat(m, reps, axis=1) + + +def test_append_mixed_dtypes(): + # GH 13660 + dti = date_range("2011-01-01", freq="M", periods=3) + dti_tz = date_range("2011-01-01", freq="M", periods=3, tz="US/Eastern") + pi = period_range("2011-01", freq="M", periods=3) + + mi = MultiIndex.from_arrays( + [[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi] + ) + assert mi.nlevels == 6 + + res = mi.append(mi) + exp = MultiIndex.from_arrays( + [ + [1, 2, 3, 1, 2, 3], + [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], + ["a", "b", "c", "a", "b", "c"], + dti.append(dti), + dti_tz.append(dti_tz), + pi.append(pi), + ] + ) + tm.assert_index_equal(res, exp) + + other = MultiIndex.from_arrays( + [ + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ] + ) + + res = mi.append(other) + exp = MultiIndex.from_arrays( + [ + [1, 2, 3, "x", "y", "z"], + [1.1, np.nan, 3.3, "x", "y", "z"], + ["a", "b", "c", "x", "y", "z"], + dti.append(Index(["x", "y", "z"])), + dti_tz.append(Index(["x", "y", "z"])), + pi.append(Index(["x", "y", "z"])), + ] + ) + tm.assert_index_equal(res, exp) + + +def test_iter(idx): + result = list(idx) + expected = [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ] + assert result == expected + + +def test_sub(idx): + + first = idx + + # - now raises (previously was set op difference) + msg = "cannot perform __sub__ with this index type: MultiIndex" + with pytest.raises(TypeError, match=msg): + first - idx[-3:] + with pytest.raises(TypeError, match=msg): + idx[-3:] - first + with pytest.raises(TypeError, match=msg): + idx[-3:] - first.tolist() + msg = "cannot perform __rsub__ with this index type: MultiIndex" + with pytest.raises(TypeError, match=msg): + first.tolist() - idx[-3:] + + +def test_map(idx): + # callable + index = idx + + result = index.map(lambda x: x) + tm.assert_index_equal(result, index) + + +@pytest.mark.parametrize( + "mapper", + [ + lambda values, idx: {i: e for e, i in zip(values, idx)}, + lambda values, idx: pd.Series(values, idx), + ], +) +def test_map_dictlike(idx, mapper): + + identity = mapper(idx.values, idx) + + # we don't infer to UInt64 for a dict + if isinstance(idx, UInt64Index) and isinstance(identity, dict): + expected = idx.astype("int64") + else: + expected = idx + + result = idx.map(identity) + tm.assert_index_equal(result, expected) + + # empty mappable + expected = Index([np.nan] * len(idx)) + result = idx.map(mapper(expected, idx)) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "func", + [ + np.exp, + np.exp2, + np.expm1, + np.log, + np.log2, + np.log10, + np.log1p, + np.sqrt, + np.sin, + np.cos, + np.tan, + np.arcsin, + np.arccos, + np.arctan, + np.sinh, + np.cosh, + np.tanh, + np.arcsinh, + np.arccosh, + np.arctanh, + np.deg2rad, + np.rad2deg, + ], + ids=lambda func: func.__name__, +) +def test_numpy_ufuncs(idx, func): + # test ufuncs of numpy. see: + # https://numpy.org/doc/stable/reference/ufuncs.html + + expected_exception = TypeError + msg = ( + "loop of ufunc does not support argument 0 of type tuple which " + f"has no callable {func.__name__} method" + ) + with pytest.raises(expected_exception, match=msg): + func(idx) + + +@pytest.mark.parametrize( + "func", + [np.isfinite, np.isinf, np.isnan, np.signbit], + ids=lambda func: func.__name__, +) +def test_numpy_type_funcs(idx, func): + msg = ( + f"ufunc '{func.__name__}' not supported for the input types, and the inputs " + "could not be safely coerced to any supported types according to " + "the casting rule ''safe''" + ) + with pytest.raises(TypeError, match=msg): + func(idx) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_astype.py new file mode 100644 index 0000000..2990853 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_astype.py @@ -0,0 +1,30 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas._testing as tm + + +def test_astype(idx): + expected = idx.copy() + actual = idx.astype("O") + tm.assert_copy(actual.levels, expected.levels) + tm.assert_copy(actual.codes, expected.codes) + assert actual.names == list(expected.names) + + with pytest.raises(TypeError, match="^Setting.*dtype.*object"): + idx.astype(np.dtype(int)) + + +@pytest.mark.parametrize("ordered", [True, False]) +def test_astype_category(idx, ordered): + # GH 18630 + msg = "> 1 ndim Categorical are not supported at this time" + with pytest.raises(NotImplementedError, match=msg): + idx.astype(CategoricalDtype(ordered=ordered)) + + if ordered is False: + # dtype='category' defaults to ordered=False, so only test once + with pytest.raises(NotImplementedError, match=msg): + idx.astype("category") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_compat.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_compat.py new file mode 100644 index 0000000..d50a440 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_compat.py @@ -0,0 +1,98 @@ +import numpy as np +import pytest + +from pandas import MultiIndex +import pandas._testing as tm + + +def test_numeric_compat(idx): + with pytest.raises(TypeError, match="cannot perform __mul__"): + idx * 1 + + with pytest.raises(TypeError, match="cannot perform __rmul__"): + 1 * idx + + div_err = "cannot perform __truediv__" + with pytest.raises(TypeError, match=div_err): + idx / 1 + + div_err = div_err.replace(" __", " __r") + with pytest.raises(TypeError, match=div_err): + 1 / idx + + with pytest.raises(TypeError, match="cannot perform __floordiv__"): + idx // 1 + + with pytest.raises(TypeError, match="cannot perform __rfloordiv__"): + 1 // idx + + +@pytest.mark.parametrize("method", ["all", "any", "__invert__"]) +def test_logical_compat(idx, method): + msg = f"cannot perform {method}" + + with pytest.raises(TypeError, match=msg): + getattr(idx, method)() + + +def test_inplace_mutation_resets_values(): + levels = [["a", "b", "c"], [4]] + levels2 = [[1, 2, 3], ["a"]] + codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] + + mi1 = MultiIndex(levels=levels, codes=codes) + mi2 = MultiIndex(levels=levels2, codes=codes) + + # instantiating MultiIndex should not access/cache _.values + assert "_values" not in mi1._cache + assert "_values" not in mi2._cache + + vals = mi1.values.copy() + vals2 = mi2.values.copy() + + # accessing .values should cache ._values + assert mi1._values is mi1._cache["_values"] + assert mi1.values is mi1._cache["_values"] + assert isinstance(mi1._cache["_values"], np.ndarray) + + # Make sure level setting works + new_vals = mi1.set_levels(levels2).values + tm.assert_almost_equal(vals2, new_vals) + + # Non-inplace doesn't drop _values from _cache [implementation detail] + tm.assert_almost_equal(mi1._cache["_values"], vals) + + # ...and values is still same too + tm.assert_almost_equal(mi1.values, vals) + + # Inplace should drop _values from _cache + with tm.assert_produces_warning(FutureWarning): + mi1.set_levels(levels2, inplace=True) + assert "_values" not in mi1._cache + tm.assert_almost_equal(mi1.values, vals2) + + # Make sure label setting works too + codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] + exp_values = np.empty((6,), dtype=object) + exp_values[:] = [(1, "a")] * 6 + + # Must be 1d array of tuples + assert exp_values.shape == (6,) + + new_mi = mi2.set_codes(codes2) + assert "_values" not in new_mi._cache + new_values = new_mi.values + assert "_values" in new_mi._cache + + # Not inplace shouldn't change + tm.assert_almost_equal(mi2._cache["_values"], vals2) + + # Should have correct values + tm.assert_almost_equal(exp_values, new_values) + + # ...and again setting inplace should drop _values from _cache, etc + with tm.assert_produces_warning(FutureWarning): + mi2.set_codes(codes2, inplace=True) + assert "_values" not in mi2._cache + tm.assert_almost_equal(mi2.values, new_values) + assert "_values" in mi2._cache diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_constructors.py new file mode 100644 index 0000000..63b0bd2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_constructors.py @@ -0,0 +1,829 @@ +from datetime import ( + date, + datetime, +) +import itertools + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + +import pandas as pd +from pandas import ( + Index, + MultiIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +def test_constructor_single_level(): + result = MultiIndex( + levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"] + ) + assert isinstance(result, MultiIndex) + expected = Index(["foo", "bar", "baz", "qux"], name="first") + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ["first"] + + +def test_constructor_no_levels(): + msg = "non-zero number of levels/codes" + with pytest.raises(ValueError, match=msg): + MultiIndex(levels=[], codes=[]) + + msg = "Must pass both levels and codes" + with pytest.raises(TypeError, match=msg): + MultiIndex(levels=[]) + with pytest.raises(TypeError, match=msg): + MultiIndex(codes=[]) + + +def test_constructor_nonhashable_names(): + # GH 20527 + levels = [[1, 2], ["one", "two"]] + codes = [[0, 0, 1, 1], [0, 1, 0, 1]] + names = (["foo"], ["bar"]) + msg = r"MultiIndex\.name must be a hashable type" + with pytest.raises(TypeError, match=msg): + MultiIndex(levels=levels, codes=codes, names=names) + + # With .rename() + mi = MultiIndex( + levels=[[1, 2], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=("foo", "bar"), + ) + renamed = [["foor"], ["barr"]] + with pytest.raises(TypeError, match=msg): + mi.rename(names=renamed) + + # With .set_names() + with pytest.raises(TypeError, match=msg): + mi.set_names(names=renamed) + + +def test_constructor_mismatched_codes_levels(idx): + codes = [np.array([1]), np.array([2]), np.array([3])] + levels = ["a"] + + msg = "Length of levels and codes must be the same" + with pytest.raises(ValueError, match=msg): + MultiIndex(levels=levels, codes=codes) + + length_error = ( + r"On level 0, code max \(3\) >= length of level \(1\)\. " + "NOTE: this index is in an inconsistent state" + ) + label_error = r"Unequal code lengths: \[4, 2\]" + code_value_error = r"On level 0, code value \(-2\) < -1" + + # important to check that it's looking at the right thing. + with pytest.raises(ValueError, match=length_error): + MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]]) + + with pytest.raises(ValueError, match=label_error): + MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]]) + + # external API + with pytest.raises(ValueError, match=length_error): + idx.copy().set_levels([["a"], ["b"]]) + + with pytest.raises(ValueError, match=label_error): + idx.copy().set_codes([[0, 0, 0, 0], [0, 0]]) + + # test set_codes with verify_integrity=False + # the setting should not raise any value error + idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False) + + # code value smaller than -1 + with pytest.raises(ValueError, match=code_value_error): + MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]]) + + +def test_na_levels(): + # GH26408 + # test if codes are re-assigned value -1 for levels + # with missing values (NaN, NaT, None) + result = MultiIndex( + levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]] + ) + expected = MultiIndex( + levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]] + ) + tm.assert_index_equal(result, expected) + + result = MultiIndex( + levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]] + ) + expected = MultiIndex( + levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]] + ) + tm.assert_index_equal(result, expected) + + # verify set_levels and set_codes + result = MultiIndex( + levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]] + ).set_levels([[np.nan, "s", pd.NaT, 128, None]]) + tm.assert_index_equal(result, expected) + + result = MultiIndex( + levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]] + ).set_codes([[0, -1, 1, 2, 3, 4]]) + tm.assert_index_equal(result, expected) + + +def test_copy_in_constructor(): + levels = np.array(["a", "b", "c"]) + codes = np.array([1, 1, 2, 0, 0, 1, 1]) + val = codes[0] + mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True) + assert mi.codes[0][0] == val + codes[0] = 15 + assert mi.codes[0][0] == val + val = levels[0] + levels[0] = "PANDA" + assert mi.levels[0][0] == val + + +# ---------------------------------------------------------------------------- +# from_arrays +# ---------------------------------------------------------------------------- +def test_from_arrays(idx): + arrays = [ + np.asarray(lev).take(level_codes) + for lev, level_codes in zip(idx.levels, idx.codes) + ] + + # list of arrays as input + result = MultiIndex.from_arrays(arrays, names=idx.names) + tm.assert_index_equal(result, idx) + + # infer correctly + result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]]) + assert result.levels[0].equals(Index([Timestamp("20130101")])) + assert result.levels[1].equals(Index(["a", "b"])) + + +def test_from_arrays_iterator(idx): + # GH 18434 + arrays = [ + np.asarray(lev).take(level_codes) + for lev, level_codes in zip(idx.levels, idx.codes) + ] + + # iterator as input + result = MultiIndex.from_arrays(iter(arrays), names=idx.names) + tm.assert_index_equal(result, idx) + + # invalid iterator input + msg = "Input must be a list / sequence of array-likes." + with pytest.raises(TypeError, match=msg): + MultiIndex.from_arrays(0) + + +def test_from_arrays_tuples(idx): + arrays = tuple( + tuple(np.asarray(lev).take(level_codes)) + for lev, level_codes in zip(idx.levels, idx.codes) + ) + + # tuple of tuples as input + result = MultiIndex.from_arrays(arrays, names=idx.names) + tm.assert_index_equal(result, idx) + + +@pytest.mark.parametrize( + ("idx1", "idx2"), + [ + ( + pd.period_range("2011-01-01", freq="D", periods=3), + pd.period_range("2015-01-01", freq="H", periods=3), + ), + ( + date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"), + date_range("2015-01-01 10:00", freq="H", periods=3, tz="Asia/Tokyo"), + ), + ( + pd.timedelta_range("1 days", freq="D", periods=3), + pd.timedelta_range("2 hours", freq="H", periods=3), + ), + ], +) +def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2): + result = MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_datetimelike_mixed(): + idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern") + idx2 = date_range("2015-01-01 10:00", freq="H", periods=3) + idx3 = pd.timedelta_range("1 days", freq="D", periods=3) + idx4 = pd.period_range("2011-01-01", freq="D", periods=3) + + result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + tm.assert_index_equal(result.get_level_values(2), idx3) + tm.assert_index_equal(result.get_level_values(3), idx4) + + result2 = MultiIndex.from_arrays( + [Series(idx1), Series(idx2), Series(idx3), Series(idx4)] + ) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + tm.assert_index_equal(result2.get_level_values(2), idx3) + tm.assert_index_equal(result2.get_level_values(3), idx4) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_series_categorical(): + # GH13743 + idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False) + idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True) + + result = MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + result3 = MultiIndex.from_arrays([idx1.values, idx2.values]) + tm.assert_index_equal(result3.get_level_values(0), idx1) + tm.assert_index_equal(result3.get_level_values(1), idx2) + + +def test_from_arrays_empty(): + # 0 levels + msg = "Must pass non-zero number of levels/codes" + with pytest.raises(ValueError, match=msg): + MultiIndex.from_arrays(arrays=[]) + + # 1 level + result = MultiIndex.from_arrays(arrays=[[]], names=["A"]) + assert isinstance(result, MultiIndex) + expected = Index([], name="A") + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ["A"] + + # N levels + for N in [2, 3]: + arrays = [[]] * N + names = list("ABC")[:N] + result = MultiIndex.from_arrays(arrays=arrays, names=names) + expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "invalid_sequence_of_arrays", + [ + 1, + [1], + [1, 2], + [[1], 2], + [1, [2]], + "a", + ["a"], + ["a", "b"], + [["a"], "b"], + (1,), + (1, 2), + ([1], 2), + (1, [2]), + "a", + ("a",), + ("a", "b"), + (["a"], "b"), + [(1,), 2], + [1, (2,)], + [("a",), "b"], + ((1,), 2), + (1, (2,)), + (("a",), "b"), + ], +) +def test_from_arrays_invalid_input(invalid_sequence_of_arrays): + msg = "Input must be a list / sequence of array-likes" + with pytest.raises(TypeError, match=msg): + MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays) + + +@pytest.mark.parametrize( + "idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])] +) +def test_from_arrays_different_lengths(idx1, idx2): + # see gh-13599 + msg = "^all arrays must be same length$" + with pytest.raises(ValueError, match=msg): + MultiIndex.from_arrays([idx1, idx2]) + + +def test_from_arrays_respects_none_names(): + # GH27292 + a = Series([1, 2, 3], name="foo") + b = Series(["a", "b", "c"], name="bar") + + result = MultiIndex.from_arrays([a, b], names=None) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None + ) + + tm.assert_index_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# from_tuples +# ---------------------------------------------------------------------------- +def test_from_tuples(): + msg = "Cannot infer number of levels from empty list" + with pytest.raises(TypeError, match=msg): + MultiIndex.from_tuples([]) + + expected = MultiIndex( + levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"] + ) + + # input tuples + result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"]) + tm.assert_index_equal(result, expected) + + +def test_from_tuples_iterator(): + # GH 18434 + # input iterator for tuples + expected = MultiIndex( + levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"] + ) + + result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"]) + tm.assert_index_equal(result, expected) + + # input non-iterables + msg = "Input must be a list / sequence of tuple-likes." + with pytest.raises(TypeError, match=msg): + MultiIndex.from_tuples(0) + + +def test_from_tuples_empty(): + # GH 16777 + result = MultiIndex.from_tuples([], names=["a", "b"]) + expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) + tm.assert_index_equal(result, expected) + + +def test_from_tuples_index_values(idx): + result = MultiIndex.from_tuples(idx) + assert (result.values == idx.values).all() + + +def test_tuples_with_name_string(): + # GH 15110 and GH 14848 + + li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] + msg = "Names should be list-like for a MultiIndex" + with pytest.raises(ValueError, match=msg): + Index(li, name="abc") + with pytest.raises(ValueError, match=msg): + Index(li, name="a") + + +def test_from_tuples_with_tuple_label(): + # GH 15457 + expected = pd.DataFrame( + [[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"] + ).set_index(["a", "b"]) + idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b")) + result = pd.DataFrame([2, 3], columns=["c"], index=idx) + tm.assert_frame_equal(expected, result) + + +# ---------------------------------------------------------------------------- +# from_product +# ---------------------------------------------------------------------------- +def test_from_product_empty_zero_levels(): + # 0 levels + msg = "Must pass non-zero number of levels/codes" + with pytest.raises(ValueError, match=msg): + MultiIndex.from_product([]) + + +def test_from_product_empty_one_level(): + result = MultiIndex.from_product([[]], names=["A"]) + expected = Index([], name="A") + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ["A"] + + +@pytest.mark.parametrize( + "first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])] +) +def test_from_product_empty_two_levels(first, second): + names = ["A", "B"] + result = MultiIndex.from_product([first, second], names=names) + expected = MultiIndex(levels=[first, second], codes=[[], []], names=names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("N", list(range(4))) +def test_from_product_empty_three_levels(N): + # GH12258 + names = ["A", "B", "C"] + lvl2 = list(range(N)) + result = MultiIndex.from_product([[], lvl2, []], names=names) + expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]] +) +def test_from_product_invalid_input(invalid_input): + msg = r"Input must be a list / sequence of iterables|Input must be list-like" + with pytest.raises(TypeError, match=msg): + MultiIndex.from_product(iterables=invalid_input) + + +def test_from_product_datetimeindex(): + dt_index = date_range("2000-01-01", periods=2) + mi = MultiIndex.from_product([[1, 2], dt_index]) + etalon = construct_1d_object_array_from_listlike( + [ + (1, Timestamp("2000-01-01")), + (1, Timestamp("2000-01-02")), + (2, Timestamp("2000-01-01")), + (2, Timestamp("2000-01-02")), + ] + ) + tm.assert_numpy_array_equal(mi.values, etalon) + + +def test_from_product_rangeindex(): + # RangeIndex is preserved by factorize, so preserved in levels + rng = Index(range(5)) + other = ["a", "b"] + mi = MultiIndex.from_product([rng, other]) + tm.assert_index_equal(mi._levels[0], rng, exact=True) + + +@pytest.mark.parametrize("ordered", [False, True]) +@pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values]) +def test_from_product_index_series_categorical(ordered, f): + # GH13743 + first = ["foo", "bar"] + + idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered) + expected = pd.CategoricalIndex( + list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered + ) + + result = MultiIndex.from_product([first, f(idx)]) + tm.assert_index_equal(result.get_level_values(1), expected) + + +def test_from_product(): + + first = ["foo", "bar", "buz"] + second = ["a", "b", "c"] + names = ["first", "second"] + result = MultiIndex.from_product([first, second], names=names) + + tuples = [ + ("foo", "a"), + ("foo", "b"), + ("foo", "c"), + ("bar", "a"), + ("bar", "b"), + ("bar", "c"), + ("buz", "a"), + ("buz", "b"), + ("buz", "c"), + ] + expected = MultiIndex.from_tuples(tuples, names=names) + + tm.assert_index_equal(result, expected) + + +def test_from_product_iterator(): + # GH 18434 + first = ["foo", "bar", "buz"] + second = ["a", "b", "c"] + names = ["first", "second"] + tuples = [ + ("foo", "a"), + ("foo", "b"), + ("foo", "c"), + ("bar", "a"), + ("bar", "b"), + ("bar", "c"), + ("buz", "a"), + ("buz", "b"), + ("buz", "c"), + ] + expected = MultiIndex.from_tuples(tuples, names=names) + + # iterator as input + result = MultiIndex.from_product(iter([first, second]), names=names) + tm.assert_index_equal(result, expected) + + # Invalid non-iterable input + msg = "Input must be a list / sequence of iterables." + with pytest.raises(TypeError, match=msg): + MultiIndex.from_product(0) + + +@pytest.mark.parametrize( + "a, b, expected_names", + [ + ( + Series([1, 2, 3], name="foo"), + Series(["a", "b"], name="bar"), + ["foo", "bar"], + ), + (Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]), + ([1, 2, 3], ["a", "b"], None), + ], +) +def test_from_product_infer_names(a, b, expected_names): + # GH27292 + result = MultiIndex.from_product([a, b]) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b"]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=expected_names, + ) + tm.assert_index_equal(result, expected) + + +def test_from_product_respects_none_names(): + # GH27292 + a = Series([1, 2, 3], name="foo") + b = Series(["a", "b"], name="bar") + + result = MultiIndex.from_product([a, b], names=None) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b"]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=None, + ) + tm.assert_index_equal(result, expected) + + +def test_from_product_readonly(): + # GH#15286 passing read-only array to from_product + a = np.array(range(3)) + b = ["a", "b"] + expected = MultiIndex.from_product([a, b]) + + a.setflags(write=False) + result = MultiIndex.from_product([a, b]) + tm.assert_index_equal(result, expected) + + +def test_create_index_existing_name(idx): + + # GH11193, when an existing index is passed, and a new name is not + # specified, the new index should inherit the previous object name + index = idx + index.names = ["foo", "bar"] + result = Index(index) + expected = Index( + Index( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + dtype="object", + ) + ) + tm.assert_index_equal(result, expected) + + result = Index(index, name="A") + expected = Index( + Index( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + dtype="object", + ), + name="A", + ) + tm.assert_index_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# from_frame +# ---------------------------------------------------------------------------- +def test_from_frame(): + # GH 22420 + df = pd.DataFrame( + [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"] + ) + expected = MultiIndex.from_tuples( + [("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"] + ) + result = MultiIndex.from_frame(df) + tm.assert_index_equal(expected, result) + + +@pytest.mark.parametrize( + "non_frame", + [ + Series([1, 2, 3, 4]), + [1, 2, 3, 4], + [[1, 2], [3, 4], [5, 6]], + Index([1, 2, 3, 4]), + np.array([[1, 2], [3, 4], [5, 6]]), + 27, + ], +) +def test_from_frame_error(non_frame): + # GH 22420 + with pytest.raises(TypeError, match="Input must be a DataFrame"): + MultiIndex.from_frame(non_frame) + + +def test_from_frame_dtype_fidelity(): + # GH 22420 + df = pd.DataFrame( + { + "dates": date_range("19910905", periods=6, tz="US/Eastern"), + "a": [1, 1, 1, 2, 2, 2], + "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + "c": ["x", "x", "y", "z", "x", "y"], + } + ) + original_dtypes = df.dtypes.to_dict() + + expected_mi = MultiIndex.from_arrays( + [ + date_range("19910905", periods=6, tz="US/Eastern"), + [1, 1, 1, 2, 2, 2], + pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + ["x", "x", "y", "z", "x", "y"], + ], + names=["dates", "a", "b", "c"], + ) + mi = MultiIndex.from_frame(df) + mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} + + tm.assert_index_equal(expected_mi, mi) + assert original_dtypes == mi_dtypes + + +@pytest.mark.parametrize( + "names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])] +) +def test_from_frame_valid_names(names_in, names_out): + # GH 22420 + df = pd.DataFrame( + [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], + columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]), + ) + mi = MultiIndex.from_frame(df, names=names_in) + assert mi.names == names_out + + +@pytest.mark.parametrize( + "names,expected_error_msg", + [ + ("bad_input", "Names should be list-like for a MultiIndex"), + (["a", "b", "c"], "Length of names must match number of levels in MultiIndex"), + ], +) +def test_from_frame_invalid_names(names, expected_error_msg): + # GH 22420 + df = pd.DataFrame( + [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], + columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]), + ) + with pytest.raises(ValueError, match=expected_error_msg): + MultiIndex.from_frame(df, names=names) + + +def test_index_equal_empty_iterable(): + # #16844 + a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"]) + b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) + tm.assert_index_equal(a, b) + + +def test_raise_invalid_sortorder(): + # Test that the MultiIndex constructor raise when a incorrect sortorder is given + # GH#28518 + + levels = [[0, 1], [0, 1, 2]] + + # Correct sortorder + MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + + with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"): + MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2 + ) + + with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"): + MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1 + ) + + +def test_datetimeindex(): + idx1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo" + ) + idx2 = date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern") + idx = MultiIndex.from_arrays([idx1, idx2]) + + expected1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" + ) + + tm.assert_index_equal(idx.levels[0], expected1) + tm.assert_index_equal(idx.levels[1], idx2) + + # from datetime combos + # GH 7888 + date1 = np.datetime64("today") + date2 = datetime.today() + date3 = Timestamp.today() + + for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): + index = MultiIndex.from_product([[d1], [d2]]) + assert isinstance(index.levels[0], pd.DatetimeIndex) + assert isinstance(index.levels[1], pd.DatetimeIndex) + + # but NOT date objects, matching Index behavior + date4 = date.today() + index = MultiIndex.from_product([[date4], [date2]]) + assert not isinstance(index.levels[0], pd.DatetimeIndex) + assert isinstance(index.levels[1], pd.DatetimeIndex) + + +def test_constructor_with_tz(): + + index = pd.DatetimeIndex( + ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" + ) + columns = pd.DatetimeIndex( + ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" + ) + + result = MultiIndex.from_arrays([index, columns]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) + + result = MultiIndex.from_arrays([Series(index), Series(columns)]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) + + +def test_multiindex_inference_consistency(): + # check that inference behavior matches the base class + + v = date.today() + + arr = [v, v] + + idx = Index(arr) + assert idx.dtype == object + + mi = MultiIndex.from_arrays([arr]) + lev = mi.levels[0] + assert lev.dtype == object + + mi = MultiIndex.from_product([arr]) + lev = mi.levels[0] + assert lev.dtype == object + + mi = MultiIndex.from_tuples([(x,) for x in arr]) + lev = mi.levels[0] + assert lev.dtype == object diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_conversion.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_conversion.py new file mode 100644 index 0000000..072055e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_conversion.py @@ -0,0 +1,142 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, +) +import pandas._testing as tm + + +def test_to_numpy(idx): + result = idx.to_numpy() + exp = idx.values + tm.assert_numpy_array_equal(result, exp) + + +def test_to_frame(): + tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")] + + index = MultiIndex.from_tuples(tuples) + result = index.to_frame(index=False) + expected = DataFrame(tuples) + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")] + index = MultiIndex.from_tuples(tuples, names=["first", "second"]) + result = index.to_frame(index=False) + expected = DataFrame(tuples) + expected.columns = ["first", "second"] + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + # See GH-22580 + index = MultiIndex.from_tuples(tuples) + result = index.to_frame(index=False, name=["first", "second"]) + expected = DataFrame(tuples) + expected.columns = ["first", "second"] + tm.assert_frame_equal(result, expected) + + result = index.to_frame(name=["first", "second"]) + expected.index = index + expected.columns = ["first", "second"] + tm.assert_frame_equal(result, expected) + + msg = "'name' must be a list / sequence of column names." + with pytest.raises(TypeError, match=msg): + index.to_frame(name="first") + + msg = "'name' should have same length as number of levels on index." + with pytest.raises(ValueError, match=msg): + index.to_frame(name=["first"]) + + # Tests for datetime index + index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)]) + result = index.to_frame(index=False) + expected = DataFrame( + { + 0: np.repeat(np.arange(5, dtype="int64"), 3), + 1: np.tile(pd.date_range("20130101", periods=3), 5), + } + ) + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + # See GH-22580 + result = index.to_frame(index=False, name=["first", "second"]) + expected = DataFrame( + { + "first": np.repeat(np.arange(5, dtype="int64"), 3), + "second": np.tile(pd.date_range("20130101", periods=3), 5), + } + ) + tm.assert_frame_equal(result, expected) + + result = index.to_frame(name=["first", "second"]) + expected.index = index + tm.assert_frame_equal(result, expected) + + +def test_to_frame_dtype_fidelity(): + # GH 22420 + mi = MultiIndex.from_arrays( + [ + pd.date_range("19910905", periods=6, tz="US/Eastern"), + [1, 1, 1, 2, 2, 2], + pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + ["x", "x", "y", "z", "x", "y"], + ], + names=["dates", "a", "b", "c"], + ) + original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} + + expected_df = DataFrame( + { + "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"), + "a": [1, 1, 1, 2, 2, 2], + "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + "c": ["x", "x", "y", "z", "x", "y"], + } + ) + df = mi.to_frame(index=False) + df_dtypes = df.dtypes.to_dict() + + tm.assert_frame_equal(df, expected_df) + assert original_dtypes == df_dtypes + + +def test_to_frame_resulting_column_order(): + # GH 22420 + expected = ["z", 0, "a"] + mi = MultiIndex.from_arrays( + [["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected + ) + result = mi.to_frame().columns.tolist() + assert result == expected + + +def test_to_flat_index(idx): + expected = pd.Index( + ( + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ), + tupleize_cols=False, + ) + result = idx.to_flat_index() + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_copy.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_copy.py new file mode 100644 index 0000000..9a0e4bc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_copy.py @@ -0,0 +1,106 @@ +from copy import ( + copy, + deepcopy, +) + +import pytest + +from pandas import MultiIndex +import pandas._testing as tm + + +def assert_multiindex_copied(copy, original): + # Levels should be (at least, shallow copied) + tm.assert_copy(copy.levels, original.levels) + tm.assert_almost_equal(copy.codes, original.codes) + + # Labels doesn't matter which way copied + tm.assert_almost_equal(copy.codes, original.codes) + assert copy.codes is not original.codes + + # Names doesn't matter which way copied + assert copy.names == original.names + assert copy.names is not original.names + + # Sort order should be copied + assert copy.sortorder == original.sortorder + + +def test_copy(idx): + i_copy = idx.copy() + + assert_multiindex_copied(i_copy, idx) + + +def test_shallow_copy(idx): + i_copy = idx._view() + + assert_multiindex_copied(i_copy, idx) + + +def test_view(idx): + i_view = idx.view() + assert_multiindex_copied(i_view, idx) + + +@pytest.mark.parametrize("func", [copy, deepcopy]) +def test_copy_and_deepcopy(func): + + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + idx_copy = func(idx) + assert idx_copy is not idx + assert idx_copy.equals(idx) + + +@pytest.mark.parametrize("deep", [True, False]) +def test_copy_method(deep): + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + idx_copy = idx.copy(deep=deep) + assert idx_copy.equals(idx) + + +@pytest.mark.parametrize("deep", [True, False]) +@pytest.mark.parametrize( + "kwarg, value", + [ + ("names", ["third", "fourth"]), + ], +) +def test_copy_method_kwargs(deep, kwarg, value): + # gh-12309: Check that the "name" argument as well other kwargs are honored + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + idx_copy = idx.copy(**{kwarg: value, "deep": deep}) + assert getattr(idx_copy, kwarg) == value + + +@pytest.mark.parametrize("deep", [True, False]) +@pytest.mark.parametrize( + "param_name, param_value", + [ + ("levels", [["foo2", "bar2"], ["fizz2", "buzz2"]]), + ("codes", [[1, 0, 0, 0], [1, 1, 0, 0]]), + ], +) +def test_copy_deprecated_parameters(deep, param_name, param_value): + # gh-36685 + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + with tm.assert_produces_warning(FutureWarning): + idx_copy = idx.copy(deep=deep, **{param_name: param_value}) + + assert [list(i) for i in getattr(idx_copy, param_name)] == param_value diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_drop.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_drop.py new file mode 100644 index 0000000..47959ec --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_drop.py @@ -0,0 +1,193 @@ +import warnings + +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) +import pandas._testing as tm + + +def test_drop(idx): + dropped = idx.drop([("foo", "two"), ("qux", "one")]) + + index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")]) + dropped2 = idx.drop(index) + + expected = idx[[0, 2, 3, 5]] + tm.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped2, expected) + + dropped = idx.drop(["bar"]) + expected = idx[[0, 1, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop("foo") + expected = idx[[2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + index = MultiIndex.from_tuples([("bar", "two")]) + with pytest.raises(KeyError, match=r"^10$"): + idx.drop([("bar", "two")]) + with pytest.raises(KeyError, match=r"^10$"): + idx.drop(index) + with pytest.raises(KeyError, match=r"^'two'$"): + idx.drop(["foo", "two"]) + + # partially correct argument + mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")]) + with pytest.raises(KeyError, match=r"^10$"): + idx.drop(mixed_index) + + # error='ignore' + dropped = idx.drop(index, errors="ignore") + expected = idx[[0, 1, 2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop(mixed_index, errors="ignore") + expected = idx[[0, 1, 2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop(["foo", "two"], errors="ignore") + expected = idx[[2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + # mixed partial / full drop + dropped = idx.drop(["foo", ("qux", "one")]) + expected = idx[[2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + # mixed partial / full drop / error='ignore' + mixed_index = ["foo", ("qux", "one"), "two"] + with pytest.raises(KeyError, match=r"^'two'$"): + idx.drop(mixed_index) + dropped = idx.drop(mixed_index, errors="ignore") + expected = idx[[2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + +def test_droplevel_with_names(idx): + index = idx[idx.get_loc("foo")] + dropped = index.droplevel(0) + assert dropped.name == "second" + + index = MultiIndex( + levels=[Index(range(4)), Index(range(4)), Index(range(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + names=["one", "two", "three"], + ) + dropped = index.droplevel(0) + assert dropped.names == ("two", "three") + + dropped = index.droplevel("two") + expected = index.droplevel(1) + assert dropped.equals(expected) + + +def test_droplevel_list(): + index = MultiIndex( + levels=[Index(range(4)), Index(range(4)), Index(range(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + names=["one", "two", "three"], + ) + + dropped = index[:2].droplevel(["three", "one"]) + expected = index[:2].droplevel(2).droplevel(0) + assert dropped.equals(expected) + + dropped = index[:2].droplevel([]) + expected = index[:2] + assert dropped.equals(expected) + + msg = ( + "Cannot remove 3 levels from an index with 3 levels: " + "at least one level must be left" + ) + with pytest.raises(ValueError, match=msg): + index[:2].droplevel(["one", "two", "three"]) + + with pytest.raises(KeyError, match="'Level four not found'"): + index[:2].droplevel(["one", "four"]) + + +def test_drop_not_lexsorted(): + # GH 12078 + + # define the lexsorted version of the multi-index + tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")] + lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"]) + assert lexsorted_mi._is_lexsorted() + + # and the not-lexsorted version + df = pd.DataFrame( + columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]] + ) + df = df.pivot_table(index="a", columns=["b", "c"], values="d") + df = df.reset_index() + not_lexsorted_mi = df.columns + assert not not_lexsorted_mi._is_lexsorted() + + # compare the results + tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) + with tm.assert_produces_warning(PerformanceWarning): + tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a")) + + +def test_drop_with_nan_in_index(nulls_fixture): + # GH#18853 + mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"]) + msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop(pd.Timestamp("2001"), level="date") + + +def test_drop_with_non_monotonic_duplicates(): + # GH#33494 + mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)]) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", PerformanceWarning) + result = mi.drop((1, 2)) + expected = MultiIndex.from_tuples([(2, 3)]) + tm.assert_index_equal(result, expected) + + +def test_single_level_drop_partially_missing_elements(): + # GH 37820 + + mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)]) + msg = r"labels \[4\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop(4, level=0) + with pytest.raises(KeyError, match=msg): + mi.drop([1, 4], level=0) + msg = r"labels \[nan\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop([np.nan], level=0) + with pytest.raises(KeyError, match=msg): + mi.drop([np.nan, 1, 2, 3], level=0) + + mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)]) + msg = r"labels \['a'\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop([np.nan, 1, "a"], level=0) + + +def test_droplevel_multiindex_one_level(): + # GH#37208 + index = MultiIndex.from_tuples([(2,)], names=("b",)) + result = index.droplevel([]) + expected = Index([2], name="b") + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_duplicates.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_duplicates.py new file mode 100644 index 0000000..6ec4d1f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_duplicates.py @@ -0,0 +1,339 @@ +from itertools import product + +import numpy as np +import pytest + +from pandas._libs import hashtable + +from pandas import ( + DatetimeIndex, + MultiIndex, + Series, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("names", [None, ["first", "second"]]) +def test_unique(names): + mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names) + + res = mi.unique() + exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names) + tm.assert_index_equal(res, exp) + + mi = MultiIndex.from_arrays([list("aaaa"), list("abab")], names=names) + res = mi.unique() + exp = MultiIndex.from_arrays([list("aa"), list("ab")], names=mi.names) + tm.assert_index_equal(res, exp) + + mi = MultiIndex.from_arrays([list("aaaa"), list("aaaa")], names=names) + res = mi.unique() + exp = MultiIndex.from_arrays([["a"], ["a"]], names=mi.names) + tm.assert_index_equal(res, exp) + + # GH #20568 - empty MI + mi = MultiIndex.from_arrays([[], []], names=names) + res = mi.unique() + tm.assert_index_equal(mi, res) + + +def test_unique_datetimelike(): + idx1 = DatetimeIndex( + ["2015-01-01", "2015-01-01", "2015-01-01", "2015-01-01", "NaT", "NaT"] + ) + idx2 = DatetimeIndex( + ["2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "NaT", "2015-01-01"], + tz="Asia/Tokyo", + ) + result = MultiIndex.from_arrays([idx1, idx2]).unique() + + eidx1 = DatetimeIndex(["2015-01-01", "2015-01-01", "NaT", "NaT"]) + eidx2 = DatetimeIndex( + ["2015-01-01", "2015-01-02", "NaT", "2015-01-01"], tz="Asia/Tokyo" + ) + exp = MultiIndex.from_arrays([eidx1, eidx2]) + tm.assert_index_equal(result, exp) + + +@pytest.mark.parametrize("level", [0, "first", 1, "second"]) +def test_unique_level(idx, level): + # GH #17896 - with level= argument + result = idx.unique(level=level) + expected = idx.get_level_values(level).unique() + tm.assert_index_equal(result, expected) + + # With already unique level + mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=["first", "second"]) + result = mi.unique(level=level) + expected = mi.get_level_values(level) + tm.assert_index_equal(result, expected) + + # With empty MI + mi = MultiIndex.from_arrays([[], []], names=["first", "second"]) + result = mi.unique(level=level) + expected = mi.get_level_values(level) + tm.assert_index_equal(result, expected) + + +def test_duplicate_multiindex_codes(): + # GH 17464 + # Make sure that a MultiIndex with duplicate levels throws a ValueError + msg = r"Level values must be unique: \[[A', ]+\] on level 0" + with pytest.raises(ValueError, match=msg): + mi = MultiIndex([["A"] * 10, range(10)], [[0] * 10, range(10)]) + + # And that using set_levels with duplicate levels fails + mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) + msg = r"Level values must be unique: \[[AB', ]+\] on level 0" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]], inplace=True) + + +@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]]) +def test_duplicate_level_names(names): + # GH18872, GH19029 + mi = MultiIndex.from_product([[0, 1]] * 3, names=names) + assert mi.names == names + + # With .rename() + mi = MultiIndex.from_product([[0, 1]] * 3) + mi = mi.rename(names) + assert mi.names == names + + # With .rename(., level=) + mi.rename(names[1], level=1, inplace=True) + mi = mi.rename([names[0], names[2]], level=[0, 2]) + assert mi.names == names + + +def test_duplicate_meta_data(): + # GH 10115 + mi = MultiIndex( + levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]] + ) + + for idx in [ + mi, + mi.set_names([None, None]), + mi.set_names([None, "Num"]), + mi.set_names(["Upper", "Num"]), + ]: + assert idx.has_duplicates + assert idx.drop_duplicates().names == idx.names + + +def test_has_duplicates(idx, idx_dup): + # see fixtures + assert idx.is_unique is True + assert idx.has_duplicates is False + assert idx_dup.is_unique is False + assert idx_dup.has_duplicates is True + + mi = MultiIndex( + levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]] + ) + assert mi.is_unique is False + assert mi.has_duplicates is True + + # single instance of NaN + mi_nan = MultiIndex( + levels=[["a", "b"], [0, 1]], codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]] + ) + assert mi_nan.is_unique is True + assert mi_nan.has_duplicates is False + + # multiple instances of NaN + mi_nan_dup = MultiIndex( + levels=[["a", "b"], [0, 1]], codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]] + ) + assert mi_nan_dup.is_unique is False + assert mi_nan_dup.has_duplicates is True + + +def test_has_duplicates_from_tuples(): + # GH 9075 + t = [ + ("x", "out", "z", 5, "y", "in", "z", 169), + ("x", "out", "z", 7, "y", "in", "z", 119), + ("x", "out", "z", 9, "y", "in", "z", 135), + ("x", "out", "z", 13, "y", "in", "z", 145), + ("x", "out", "z", 14, "y", "in", "z", 158), + ("x", "out", "z", 16, "y", "in", "z", 122), + ("x", "out", "z", 17, "y", "in", "z", 160), + ("x", "out", "z", 18, "y", "in", "z", 180), + ("x", "out", "z", 20, "y", "in", "z", 143), + ("x", "out", "z", 21, "y", "in", "z", 128), + ("x", "out", "z", 22, "y", "in", "z", 129), + ("x", "out", "z", 25, "y", "in", "z", 111), + ("x", "out", "z", 28, "y", "in", "z", 114), + ("x", "out", "z", 29, "y", "in", "z", 121), + ("x", "out", "z", 31, "y", "in", "z", 126), + ("x", "out", "z", 32, "y", "in", "z", 155), + ("x", "out", "z", 33, "y", "in", "z", 123), + ("x", "out", "z", 12, "y", "in", "z", 144), + ] + + mi = MultiIndex.from_tuples(t) + assert not mi.has_duplicates + + +@pytest.mark.parametrize("nlevels", [4, 8]) +@pytest.mark.parametrize("with_nulls", [True, False]) +def test_has_duplicates_overflow(nlevels, with_nulls): + # handle int64 overflow if possible + # no overflow with 4 + # overflow possible with 8 + codes = np.tile(np.arange(500), 2) + level = np.arange(500) + + if with_nulls: # inject some null values + codes[500] = -1 # common nan value + codes = [codes.copy() for i in range(nlevels)] + for i in range(nlevels): + codes[i][500 + i - nlevels // 2] = -1 + + codes += [np.array([-1, 1]).repeat(500)] + else: + codes = [codes] * nlevels + [np.arange(2).repeat(500)] + + levels = [level] * nlevels + [[0, 1]] + + # no dups + mi = MultiIndex(levels=levels, codes=codes) + assert not mi.has_duplicates + + # with a dup + if with_nulls: + + def f(a): + return np.insert(a, 1000, a[0]) + + codes = list(map(f, codes)) + mi = MultiIndex(levels=levels, codes=codes) + else: + values = mi.values.tolist() + mi = MultiIndex.from_tuples(values + [values[0]]) + + assert mi.has_duplicates + + +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", np.array([False, False, False, True, True, False])), + ("last", np.array([False, True, True, False, False, False])), + (False, np.array([False, True, True, True, True, False])), + ], +) +def test_duplicated(idx_dup, keep, expected): + result = idx_dup.duplicated(keep=keep) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.arm_slow +def test_duplicated_large(keep): + # GH 9125 + n, k = 200, 5000 + levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] + codes = [np.random.choice(n, k * n) for lev in levels] + mi = MultiIndex(levels=levels, codes=codes) + + result = mi.duplicated(keep=keep) + expected = hashtable.duplicated(mi.values, keep=keep) + tm.assert_numpy_array_equal(result, expected) + + +def test_duplicated2(): + # TODO: more informative test name + # GH5873 + for a in [101, 102]: + mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) + assert not mi.has_duplicates + + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool")) + + for n in range(1, 6): # 1st level shape + for m in range(1, 5): # 2nd level shape + # all possible unique combinations, including nan + codes = product(range(-1, n), range(-1, m)) + mi = MultiIndex( + levels=[list("abcde")[:n], list("WXYZ")[:m]], + codes=np.random.permutation(list(codes)).T, + ) + assert len(mi) == (n + 1) * (m + 1) + assert not mi.has_duplicates + + tm.assert_numpy_array_equal( + mi.duplicated(), np.zeros(len(mi), dtype="bool") + ) + + +def test_duplicated_drop_duplicates(): + # GH#4060 + idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2])) + + expected = np.array([False, False, False, True, False, False], dtype=bool) + duplicated = idx.duplicated() + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(), expected) + + expected = np.array([True, False, False, False, False, False]) + duplicated = idx.duplicated(keep="last") + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected) + + expected = np.array([True, False, False, True, False, False]) + duplicated = idx.duplicated(keep=False) + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) + + +@pytest.mark.parametrize( + "dtype", + [ + np.complex64, + np.complex128, + ], +) +def test_duplicated_series_complex_numbers(dtype): + # GH 17927 + expected = Series( + [False, False, False, True, False, False, False, True, False, True], + dtype=bool, + ) + result = Series( + [ + np.nan + np.nan * 1j, + 0, + 1j, + 1j, + 1, + 1 + 1j, + 1 + 2j, + 1 + 1j, + np.nan, + np.nan + np.nan * 1j, + ], + dtype=dtype, + ).duplicated() + tm.assert_series_equal(result, expected) + + +def test_multi_drop_duplicates_pos_args_deprecation(): + # GH#41485 + idx = MultiIndex.from_arrays([[1, 2, 3, 1], [1, 2, 3, 1]]) + msg = ( + "In a future version of pandas all arguments of " + "MultiIndex.drop_duplicates will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = idx.drop_duplicates("last") + expected = MultiIndex.from_arrays([[2, 3, 1], [2, 3, 1]]) + tm.assert_index_equal(expected, result) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_equivalence.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_equivalence.py new file mode 100644 index 0000000..3854aca --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_equivalence.py @@ -0,0 +1,290 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +def test_equals(idx): + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.equals(idx.to_flat_index()) + assert idx.equals(idx.to_flat_index().astype("category")) + + assert not idx.equals(list(idx)) + assert not idx.equals(np.array(idx)) + + same_values = Index(idx, dtype=object) + assert idx.equals(same_values) + assert same_values.equals(idx) + + if idx.nlevels == 1: + # do not test MultiIndex + assert not idx.equals(Series(idx)) + + +def test_equals_op(idx): + # GH9947, GH10637 + index_a = idx + + n = len(index_a) + index_b = index_a[0:-1] + index_c = index_a[0:-1].append(index_a[-2:-1]) + index_d = index_a[0:1] + with pytest.raises(ValueError, match="Lengths must match"): + index_a == index_b + expected1 = np.array([True] * n) + expected2 = np.array([True] * (n - 1) + [False]) + tm.assert_numpy_array_equal(index_a == index_a, expected1) + tm.assert_numpy_array_equal(index_a == index_c, expected2) + + # test comparisons with numpy arrays + array_a = np.array(index_a) + array_b = np.array(index_a[0:-1]) + array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) + array_d = np.array(index_a[0:1]) + with pytest.raises(ValueError, match="Lengths must match"): + index_a == array_b + tm.assert_numpy_array_equal(index_a == array_a, expected1) + tm.assert_numpy_array_equal(index_a == array_c, expected2) + + # test comparisons with Series + series_a = Series(array_a) + series_b = Series(array_b) + series_c = Series(array_c) + series_d = Series(array_d) + with pytest.raises(ValueError, match="Lengths must match"): + index_a == series_b + + tm.assert_numpy_array_equal(index_a == series_a, expected1) + tm.assert_numpy_array_equal(index_a == series_c, expected2) + + # cases where length is 1 for one of them + with pytest.raises(ValueError, match="Lengths must match"): + index_a == index_d + with pytest.raises(ValueError, match="Lengths must match"): + index_a == series_d + with pytest.raises(ValueError, match="Lengths must match"): + index_a == array_d + msg = "Can only compare identically-labeled Series objects" + with pytest.raises(ValueError, match=msg): + series_a == series_d + with pytest.raises(ValueError, match="Lengths must match"): + series_a == array_d + + # comparing with a scalar should broadcast; note that we are excluding + # MultiIndex because in this case each item in the index is a tuple of + # length 2, and therefore is considered an array of length 2 in the + # comparison instead of a scalar + if not isinstance(index_a, MultiIndex): + expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) + # assuming the 2nd to last item is unique in the data + item = index_a[-2] + tm.assert_numpy_array_equal(index_a == item, expected3) + tm.assert_series_equal(series_a == item, Series(expected3)) + + +def test_compare_tuple(): + # GH#21517 + mi = MultiIndex.from_product([[1, 2]] * 2) + + all_false = np.array([False, False, False, False]) + + result = mi == mi[0] + expected = np.array([True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = mi != mi[0] + tm.assert_numpy_array_equal(result, ~expected) + + result = mi < mi[0] + tm.assert_numpy_array_equal(result, all_false) + + result = mi <= mi[0] + tm.assert_numpy_array_equal(result, expected) + + result = mi > mi[0] + tm.assert_numpy_array_equal(result, ~expected) + + result = mi >= mi[0] + tm.assert_numpy_array_equal(result, ~all_false) + + +def test_compare_tuple_strs(): + # GH#34180 + + mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")]) + + result = mi == ("c", "a") + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = mi == ("c",) + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + +def test_equals_multi(idx): + assert idx.equals(idx) + assert not idx.equals(idx.values) + assert idx.equals(Index(idx.values)) + + assert idx.equal_levels(idx) + assert not idx.equals(idx[:-1]) + assert not idx.equals(idx[-1]) + + # different number of levels + index = MultiIndex( + levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + + index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1]) + assert not index.equals(index2) + assert not index.equal_levels(index2) + + # levels are different + major_axis = Index(list(range(4))) + minor_axis = Index(list(range(2))) + + major_codes = np.array([0, 0, 1, 2, 2, 3]) + minor_codes = np.array([0, 1, 0, 0, 1, 0]) + + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + assert not idx.equals(index) + assert not idx.equal_levels(index) + + # some of the labels are different + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 2, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + assert not idx.equals(index) + + +def test_identical(idx): + mi = idx.copy() + mi2 = idx.copy() + assert mi.identical(mi2) + + mi = mi.set_names(["new1", "new2"]) + assert mi.equals(mi2) + assert not mi.identical(mi2) + + mi2 = mi2.set_names(["new1", "new2"]) + assert mi.identical(mi2) + + with tm.assert_produces_warning(FutureWarning): + # subclass-specific keywords to pd.Index + mi3 = Index(mi.tolist(), names=mi.names) + + msg = r"Unexpected keyword arguments {'names'}" + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # subclass-specific keywords to pd.Index + Index(mi.tolist(), names=mi.names, tupleize_cols=False) + + mi4 = Index(mi.tolist(), tupleize_cols=False) + assert mi.identical(mi3) + assert not mi.identical(mi4) + assert mi.equals(mi4) + + +def test_equals_operator(idx): + # GH9785 + assert (idx == idx).all() + + +def test_equals_missing_values(): + # make sure take is not using -1 + i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))]) + result = i[0:1].equals(i[0]) + assert not result + result = i[1:2].equals(i[1]) + assert not result + + +def test_equals_missing_values_differently_sorted(): + # GH#38439 + mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)]) + mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)]) + assert not mi1.equals(mi2) + + mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)]) + assert mi1.equals(mi2) + + +def test_is_(): + mi = MultiIndex.from_tuples(zip(range(10), range(10))) + assert mi.is_(mi) + assert mi.is_(mi.view()) + assert mi.is_(mi.view().view().view().view()) + mi2 = mi.view() + # names are metadata, they don't change id + mi2.names = ["A", "B"] + assert mi2.is_(mi) + assert mi.is_(mi2) + + assert not mi.is_(mi.set_names(["C", "D"])) + mi2 = mi.view() + mi2.set_names(["E", "F"], inplace=True) + assert mi.is_(mi2) + # levels are inherent properties, they change identity + mi3 = mi2.set_levels([list(range(10)), list(range(10))]) + assert not mi3.is_(mi2) + # shouldn't change + assert mi2.is_(mi) + mi4 = mi3.view() + + # GH 17464 - Remove duplicate MultiIndex levels + with tm.assert_produces_warning(FutureWarning): + mi4.set_levels([list(range(10)), list(range(10))], inplace=True) + assert not mi4.is_(mi3) + mi5 = mi.view() + with tm.assert_produces_warning(FutureWarning): + mi5.set_levels(mi5.levels, inplace=True) + assert not mi5.is_(mi) + + +def test_is_all_dates(idx): + assert not idx._is_all_dates + + +def test_is_numeric(idx): + # MultiIndex is never numeric + assert not idx.is_numeric() + + +def test_multiindex_compare(): + # GH 21149 + # Ensure comparison operations for MultiIndex with nlevels == 1 + # behave consistently with those for MultiIndex with nlevels > 1 + + midx = MultiIndex.from_product([[0, 1]]) + + # Equality self-test: MultiIndex object vs self + expected = Series([True, True]) + result = Series(midx == midx) + tm.assert_series_equal(result, expected) + + # Greater than comparison: MultiIndex object vs self + expected = Series([False, False]) + result = Series(midx > midx) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_formats.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_formats.py new file mode 100644 index 0000000..a6dadd4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_formats.py @@ -0,0 +1,233 @@ +import warnings + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) +import pandas._testing as tm + + +def test_format(idx): + idx.format() + idx[:0].format() + + +def test_format_integer_names(): + index = MultiIndex( + levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1] + ) + index.format(names=True) + + +def test_format_sparse_config(idx): + warn_filters = warnings.filters + warnings.filterwarnings("ignore", category=FutureWarning, module=".*format") + # GH1538 + pd.set_option("display.multi_sparse", False) + + result = idx.format() + assert result[1] == "foo two" + + tm.reset_display_options() + + warnings.filters = warn_filters + + +def test_format_sparse_display(): + index = MultiIndex( + levels=[[0, 1], [0, 1], [0, 1], [0]], + codes=[ + [0, 0, 0, 1, 1, 1], + [0, 0, 1, 0, 0, 1], + [0, 1, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0], + ], + ) + + result = index.format() + assert result[3] == "1 0 0 0" + + +def test_repr_with_unicode_data(): + with pd.option_context("display.encoding", "UTF-8"): + d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + index = pd.DataFrame(d).set_index(["a", "b"]).index + assert "\\" not in repr(index) # we don't want unicode-escaped + + +def test_repr_roundtrip_raises(): + mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"]) + msg = "Must pass both levels and codes" + with pytest.raises(TypeError, match=msg): + eval(repr(mi)) + + +def test_unicode_string_with_unicode(): + d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + idx = pd.DataFrame(d).set_index(["a", "b"]).index + str(idx) + + +def test_repr_max_seq_item_setting(idx): + # GH10182 + idx = idx.repeat(50) + with pd.option_context("display.max_seq_items", None): + repr(idx) + assert "..." not in str(idx) + + +class TestRepr: + def test_unicode_repr_issues(self): + levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])] + codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] + index = MultiIndex(levels=levels, codes=codes) + + repr(index.levels) + repr(index.get_level_values(1)) + + def test_repr_max_seq_items_equal_to_n(self, idx): + # display.max_seq_items == n + with pd.option_context("display.max_seq_items", 6): + result = idx.__repr__() + expected = """\ +MultiIndex([('foo', 'one'), + ('foo', 'two'), + ('bar', 'one'), + ('baz', 'two'), + ('qux', 'one'), + ('qux', 'two')], + names=['first', 'second'])""" + assert result == expected + + def test_repr(self, idx): + result = idx[:1].__repr__() + expected = """\ +MultiIndex([('foo', 'one')], + names=['first', 'second'])""" + assert result == expected + + result = idx.__repr__() + expected = """\ +MultiIndex([('foo', 'one'), + ('foo', 'two'), + ('bar', 'one'), + ('baz', 'two'), + ('qux', 'one'), + ('qux', 'two')], + names=['first', 'second'])""" + assert result == expected + + with pd.option_context("display.max_seq_items", 5): + result = idx.__repr__() + expected = """\ +MultiIndex([('foo', 'one'), + ('foo', 'two'), + ... + ('qux', 'one'), + ('qux', 'two')], + names=['first', 'second'], length=6)""" + assert result == expected + + # display.max_seq_items == 1 + with pd.option_context("display.max_seq_items", 1): + result = idx.__repr__() + expected = """\ +MultiIndex([... + ('qux', 'two')], + names=['first', ...], length=6)""" + assert result == expected + + def test_rjust(self, narrow_multi_index): + mi = narrow_multi_index + result = mi[:1].__repr__() + expected = """\ +MultiIndex([('a', 9, '2000-01-01 00:00:00')], + names=['a', 'b', 'dti'])""" + assert result == expected + + result = mi[::500].__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), + ( 'a', 9, '2000-01-01 00:08:20'), + ('abc', 10, '2000-01-01 00:16:40'), + ('abc', 10, '2000-01-01 00:25:00')], + names=['a', 'b', 'dti'])""" + assert result == expected + + result = mi.__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), + ( 'a', 9, '2000-01-01 00:00:01'), + ( 'a', 9, '2000-01-01 00:00:02'), + ( 'a', 9, '2000-01-01 00:00:03'), + ( 'a', 9, '2000-01-01 00:00:04'), + ( 'a', 9, '2000-01-01 00:00:05'), + ( 'a', 9, '2000-01-01 00:00:06'), + ( 'a', 9, '2000-01-01 00:00:07'), + ( 'a', 9, '2000-01-01 00:00:08'), + ( 'a', 9, '2000-01-01 00:00:09'), + ... + ('abc', 10, '2000-01-01 00:33:10'), + ('abc', 10, '2000-01-01 00:33:11'), + ('abc', 10, '2000-01-01 00:33:12'), + ('abc', 10, '2000-01-01 00:33:13'), + ('abc', 10, '2000-01-01 00:33:14'), + ('abc', 10, '2000-01-01 00:33:15'), + ('abc', 10, '2000-01-01 00:33:16'), + ('abc', 10, '2000-01-01 00:33:17'), + ('abc', 10, '2000-01-01 00:33:18'), + ('abc', 10, '2000-01-01 00:33:19')], + names=['a', 'b', 'dti'], length=2000)""" + assert result == expected + + def test_tuple_width(self, wide_multi_index): + mi = wide_multi_index + result = mi[:1].__repr__() + expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" + assert result == expected + + result = mi[:10].__repr__() + expected = """\ +MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), + ('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), + ('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), + ('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), + ('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), + ('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), + ('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), + ('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), + ('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), + ('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" + assert result == expected + + result = mi.__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), + ( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), + ( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), + ( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), + ( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), + ( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), + ( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), + ( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), + ( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), + ( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...), + ... + ('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...), + ('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...), + ('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...), + ('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...), + ('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...), + ('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...), + ('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...), + ('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...), + ('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...), + ('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_get_level_values.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_get_level_values.py new file mode 100644 index 0000000..25b4501 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_get_level_values.py @@ -0,0 +1,114 @@ +import numpy as np + +import pandas as pd +from pandas import ( + CategoricalIndex, + Index, + MultiIndex, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestGetLevelValues: + def test_get_level_values_box_datetime64(self): + + dates = date_range("1/1/2000", periods=4) + levels = [dates, [0, 1]] + codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]] + + index = MultiIndex(levels=levels, codes=codes) + + assert isinstance(index.get_level_values(0)[0], Timestamp) + + +def test_get_level_values(idx): + result = idx.get_level_values(0) + expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first") + tm.assert_index_equal(result, expected) + assert result.name == "first" + + result = idx.get_level_values("first") + expected = idx.get_level_values(0) + tm.assert_index_equal(result, expected) + + # GH 10460 + index = MultiIndex( + levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])], + codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])], + ) + + exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"]) + tm.assert_index_equal(index.get_level_values(0), exp) + exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) + tm.assert_index_equal(index.get_level_values(1), exp) + + +def test_get_level_values_all_na(): + # GH#17924 when level entirely consists of nan + arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]] + index = MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = Index([np.nan, np.nan, np.nan], dtype=np.float64) + tm.assert_index_equal(result, expected) + + result = index.get_level_values(1) + expected = Index(["a", np.nan, 1], dtype=object) + tm.assert_index_equal(result, expected) + + +def test_get_level_values_int_with_na(): + # GH#17924 + arrays = [["a", "b", "b"], [1, np.nan, 2]] + index = MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = Index([1, np.nan, 2]) + tm.assert_index_equal(result, expected) + + arrays = [["a", "b", "b"], [np.nan, np.nan, 2]] + index = MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = Index([np.nan, np.nan, 2]) + tm.assert_index_equal(result, expected) + + +def test_get_level_values_na(): + arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]] + index = MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = Index([np.nan, np.nan, np.nan]) + tm.assert_index_equal(result, expected) + + result = index.get_level_values(1) + expected = Index(["a", np.nan, 1]) + tm.assert_index_equal(result, expected) + + arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])] + index = MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = pd.DatetimeIndex([0, 1, pd.NaT]) + tm.assert_index_equal(result, expected) + + arrays = [[], []] + index = MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = Index([], dtype=object) + tm.assert_index_equal(result, expected) + + +def test_get_level_values_when_periods(): + # GH33131. See also discussion in GH32669. + # This test can probably be removed when PeriodIndex._engine is removed. + from pandas import ( + Period, + PeriodIndex, + ) + + idx = MultiIndex.from_arrays( + [PeriodIndex([Period("2019Q1"), Period("2019Q2")], name="b")] + ) + idx2 = MultiIndex.from_arrays( + [idx._get_level_values(level) for level in range(idx.nlevels)] + ) + assert all(x.is_monotonic for x in idx2.levels) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_get_set.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_get_set.py new file mode 100644 index 0000000..aa0e91c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_get_set.py @@ -0,0 +1,490 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas import ( + CategoricalIndex, + MultiIndex, +) +import pandas._testing as tm + + +def assert_matching(actual, expected, check_dtype=False): + # avoid specifying internal representation + # as much as possible + assert len(actual) == len(expected) + for act, exp in zip(actual, expected): + act = np.asarray(act) + exp = np.asarray(exp) + tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype) + + +def test_get_level_number_integer(idx): + idx.names = [1, 0] + assert idx._get_level_number(1) == 0 + assert idx._get_level_number(0) == 1 + msg = "Too many levels: Index has only 2 levels, not 3" + with pytest.raises(IndexError, match=msg): + idx._get_level_number(2) + with pytest.raises(KeyError, match="Level fourth not found"): + idx._get_level_number("fourth") + + +def test_get_dtypes(): + # Test MultiIndex.dtypes (# Gh37062) + idx_multitype = MultiIndex.from_product( + [[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")], + names=["int", "string", "dt"], + ) + expected = pd.Series( + { + "int": np.dtype("int64"), + "string": np.dtype("O"), + "dt": DatetimeTZDtype(tz="utc"), + } + ) + tm.assert_series_equal(expected, idx_multitype.dtypes) + + +def test_get_dtypes_no_level_name(): + # Test MultiIndex.dtypes (# GH38580 ) + idx_multitype = MultiIndex.from_product( + [ + [1, 2, 3], + ["a", "b", "c"], + pd.date_range("20200101", periods=2, tz="UTC"), + ], + ) + expected = pd.Series( + { + "level_0": np.dtype("int64"), + "level_1": np.dtype("O"), + "level_2": DatetimeTZDtype(tz="utc"), + } + ) + tm.assert_series_equal(expected, idx_multitype.dtypes) + + +def test_get_dtypes_duplicate_level_names(): + # Test MultiIndex.dtypes with non-unique level names (# GH45174) + result = MultiIndex.from_product( + [ + [1, 2, 3], + ["a", "b", "c"], + pd.date_range("20200101", periods=2, tz="UTC"), + ], + names=["A", "A", "A"], + ).dtypes + expected = pd.Series( + [np.dtype("int64"), np.dtype("O"), DatetimeTZDtype(tz="utc")], + index=["A", "A", "A"], + ) + tm.assert_series_equal(result, expected) + + +def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + with pytest.raises(IndexError, match="Too many levels"): + frame.index._get_level_number(2) + with pytest.raises(IndexError, match="not a valid level number"): + frame.index._get_level_number(-3) + + +def test_set_name_methods(idx, index_names): + # so long as these are synonyms, we don't need to test set_names + assert idx.rename == idx.set_names + new_names = [name + "SUFFIX" for name in index_names] + ind = idx.set_names(new_names) + assert idx.names == index_names + assert ind.names == new_names + msg = "Length of names must match number of levels in MultiIndex" + with pytest.raises(ValueError, match=msg): + ind.set_names(new_names + new_names) + new_names2 = [name + "SUFFIX2" for name in new_names] + res = ind.set_names(new_names2, inplace=True) + assert res is None + assert ind.names == new_names2 + + # set names for specific level (# GH7792) + ind = idx.set_names(new_names[0], level=0) + assert idx.names == index_names + assert ind.names == [new_names[0], index_names[1]] + + res = ind.set_names(new_names2[0], level=0, inplace=True) + assert res is None + assert ind.names == [new_names2[0], index_names[1]] + + # set names for multiple levels + ind = idx.set_names(new_names, level=[0, 1]) + assert idx.names == index_names + assert ind.names == new_names + + res = ind.set_names(new_names2, level=[0, 1], inplace=True) + assert res is None + assert ind.names == new_names2 + + +def test_set_levels_codes_directly(idx): + # setting levels/codes directly raises AttributeError + + levels = idx.levels + new_levels = [[lev + "a" for lev in level] for level in levels] + + codes = idx.codes + major_codes, minor_codes = codes + major_codes = [(x + 1) % 3 for x in major_codes] + minor_codes = [(x + 1) % 1 for x in minor_codes] + new_codes = [major_codes, minor_codes] + + msg = "[Cc]an't set attribute" + with pytest.raises(AttributeError, match=msg): + idx.levels = new_levels + with pytest.raises(AttributeError, match=msg): + idx.codes = new_codes + + +def test_set_levels(idx): + # side note - you probably wouldn't want to use levels and codes + # directly like this - but it is possible. + levels = idx.levels + new_levels = [[lev + "a" for lev in level] for level in levels] + + # level changing [w/o mutation] + ind2 = idx.set_levels(new_levels) + assert_matching(ind2.levels, new_levels) + assert_matching(idx.levels, levels) + + # level changing [w/ mutation] + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_levels(new_levels, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, new_levels) + + # level changing specific level [w/o mutation] + ind2 = idx.set_levels(new_levels[0], level=0) + assert_matching(ind2.levels, [new_levels[0], levels[1]]) + assert_matching(idx.levels, levels) + + ind2 = idx.set_levels(new_levels[1], level=1) + assert_matching(ind2.levels, [levels[0], new_levels[1]]) + assert_matching(idx.levels, levels) + + # level changing multiple levels [w/o mutation] + ind2 = idx.set_levels(new_levels, level=[0, 1]) + assert_matching(ind2.levels, new_levels) + assert_matching(idx.levels, levels) + + # level changing specific level [w/ mutation] + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, [new_levels[0], levels[1]]) + assert_matching(idx.levels, levels) + + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, [levels[0], new_levels[1]]) + assert_matching(idx.levels, levels) + + # level changing multiple levels [w/ mutation] + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_levels(new_levels, level=[0, 1], inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, new_levels) + assert_matching(idx.levels, levels) + + # illegal level changing should not change levels + # GH 13754 + original_index = idx.copy() + for inplace in [True, False]: + with pytest.raises(ValueError, match="^On"): + with tm.assert_produces_warning(FutureWarning): + idx.set_levels(["c"], level=0, inplace=inplace) + assert_matching(idx.levels, original_index.levels, check_dtype=True) + + with pytest.raises(ValueError, match="^On"): + with tm.assert_produces_warning(FutureWarning): + idx.set_codes([0, 1, 2, 3, 4, 5], level=0, inplace=inplace) + assert_matching(idx.codes, original_index.codes, check_dtype=True) + + with pytest.raises(TypeError, match="^Levels"): + with tm.assert_produces_warning(FutureWarning): + idx.set_levels("c", level=0, inplace=inplace) + assert_matching(idx.levels, original_index.levels, check_dtype=True) + + with pytest.raises(TypeError, match="^Codes"): + with tm.assert_produces_warning(FutureWarning): + idx.set_codes(1, level=0, inplace=inplace) + assert_matching(idx.codes, original_index.codes, check_dtype=True) + + +def test_set_codes(idx): + # side note - you probably wouldn't want to use levels and codes + # directly like this - but it is possible. + codes = idx.codes + major_codes, minor_codes = codes + major_codes = [(x + 1) % 3 for x in major_codes] + minor_codes = [(x + 1) % 1 for x in minor_codes] + new_codes = [major_codes, minor_codes] + + # changing codes w/o mutation + ind2 = idx.set_codes(new_codes) + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) + + # changing label w/ mutation + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_codes(new_codes, inplace=True) + assert inplace_return is None + assert_matching(ind2.codes, new_codes) + + # codes changing specific level w/o mutation + ind2 = idx.set_codes(new_codes[0], level=0) + assert_matching(ind2.codes, [new_codes[0], codes[1]]) + assert_matching(idx.codes, codes) + + ind2 = idx.set_codes(new_codes[1], level=1) + assert_matching(ind2.codes, [codes[0], new_codes[1]]) + assert_matching(idx.codes, codes) + + # codes changing multiple levels w/o mutation + ind2 = idx.set_codes(new_codes, level=[0, 1]) + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) + + # label changing specific level w/ mutation + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True) + assert inplace_return is None + assert_matching(ind2.codes, [new_codes[0], codes[1]]) + assert_matching(idx.codes, codes) + + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True) + assert inplace_return is None + assert_matching(ind2.codes, [codes[0], new_codes[1]]) + assert_matching(idx.codes, codes) + + # codes changing multiple levels [w/ mutation] + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_codes(new_codes, level=[0, 1], inplace=True) + assert inplace_return is None + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) + + # label changing for levels of different magnitude of categories + ind = MultiIndex.from_tuples([(0, i) for i in range(130)]) + new_codes = range(129, -1, -1) + expected = MultiIndex.from_tuples([(0, i) for i in new_codes]) + + # [w/o mutation] + result = ind.set_codes(codes=new_codes, level=1) + assert result.equals(expected) + + # [w/ mutation] + result = ind.copy() + with tm.assert_produces_warning(FutureWarning): + result.set_codes(codes=new_codes, level=1, inplace=True) + assert result.equals(expected) + + +def test_set_levels_codes_names_bad_input(idx): + levels, codes = idx.levels, idx.codes + names = idx.names + + with pytest.raises(ValueError, match="Length of levels"): + idx.set_levels([levels[0]]) + + with pytest.raises(ValueError, match="Length of codes"): + idx.set_codes([codes[0]]) + + with pytest.raises(ValueError, match="Length of names"): + idx.set_names([names[0]]) + + # shouldn't scalar data error, instead should demand list-like + with pytest.raises(TypeError, match="list of lists-like"): + idx.set_levels(levels[0]) + + # shouldn't scalar data error, instead should demand list-like + with pytest.raises(TypeError, match="list of lists-like"): + idx.set_codes(codes[0]) + + # shouldn't scalar data error, instead should demand list-like + with pytest.raises(TypeError, match="list-like"): + idx.set_names(names[0]) + + # should have equal lengths + with pytest.raises(TypeError, match="list of lists-like"): + idx.set_levels(levels[0], level=[0, 1]) + + with pytest.raises(TypeError, match="list-like"): + idx.set_levels(levels, level=0) + + # should have equal lengths + with pytest.raises(TypeError, match="list of lists-like"): + idx.set_codes(codes[0], level=[0, 1]) + + with pytest.raises(TypeError, match="list-like"): + idx.set_codes(codes, level=0) + + # should have equal lengths + with pytest.raises(ValueError, match="Length of names"): + idx.set_names(names[0], level=[0, 1]) + + with pytest.raises(TypeError, match="Names must be a"): + idx.set_names(names, level=0) + + +@pytest.mark.parametrize("inplace", [True, False]) +def test_set_names_with_nlevel_1(inplace): + # GH 21149 + # Ensure that .set_names for MultiIndex with + # nlevels == 1 does not raise any errors + expected = MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"]) + m = MultiIndex.from_product([[0, 1]]) + result = m.set_names("first", level=0, inplace=inplace) + + if inplace: + result = m + + tm.assert_index_equal(result, expected) + + +def test_multi_set_names_pos_args_deprecation(): + # GH#41485 + idx = MultiIndex.from_product([["python", "cobra"], [2018, 2019]]) + msg = ( + "In a future version of pandas all arguments of MultiIndex.set_names " + "except for the argument 'names' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = idx.set_names(["kind", "year"], None) + expected = MultiIndex( + levels=[["python", "cobra"], [2018, 2019]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=["kind", "year"], + ) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("ordered", [True, False]) +def test_set_levels_categorical(ordered): + # GH13854 + index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]]) + + cidx = CategoricalIndex(list("bac"), ordered=ordered) + result = index.set_levels(cidx, level=0) + expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes) + tm.assert_index_equal(result, expected) + + result_lvl = result.get_level_values(0) + expected_lvl = CategoricalIndex( + list("bacb"), categories=cidx.categories, ordered=cidx.ordered + ) + tm.assert_index_equal(result_lvl, expected_lvl) + + +def test_set_value_keeps_names(): + # motivating example from #3742 + lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"] + lev2 = ["1", "2", "3"] * 2 + idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"]) + df = pd.DataFrame( + np.random.randn(6, 4), columns=["one", "two", "three", "four"], index=idx + ) + df = df.sort_index() + assert df._is_copy is None + assert df.index.names == ("Name", "Number") + df.at[("grethe", "4"), "one"] = 99.34 + assert df._is_copy is None + assert df.index.names == ("Name", "Number") + + +def test_set_levels_with_iterable(): + # GH23273 + sizes = [1, 2, 3] + colors = ["black"] * 3 + index = MultiIndex.from_arrays([sizes, colors], names=["size", "color"]) + + result = index.set_levels(map(int, ["3", "2", "1"]), level="size") + + expected_sizes = [3, 2, 1] + expected = MultiIndex.from_arrays([expected_sizes, colors], names=["size", "color"]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("inplace", [True, False]) +def test_set_codes_inplace_deprecated(idx, inplace): + new_codes = idx.codes[1][::-1] + + with tm.assert_produces_warning(FutureWarning): + idx.set_codes(codes=new_codes, level=1, inplace=inplace) + + +@pytest.mark.parametrize("inplace", [True, False]) +def test_set_levels_inplace_deprecated(idx, inplace): + new_level = idx.levels[1].copy() + + with tm.assert_produces_warning(FutureWarning): + idx.set_levels(levels=new_level, level=1, inplace=inplace) + + +def test_set_levels_pos_args_deprecation(): + # https://github.com/pandas-dev/pandas/issues/41485 + idx = MultiIndex.from_tuples( + [ + (1, "one"), + (2, "one"), + (3, "one"), + ], + names=["foo", "bar"], + ) + msg = ( + r"In a future version of pandas all arguments of MultiIndex.set_levels except " + r"for the argument 'levels' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = idx.set_levels(["a", "b", "c"], 0) + expected = MultiIndex.from_tuples( + [ + ("a", "one"), + ("b", "one"), + ("c", "one"), + ], + names=["foo", "bar"], + ) + tm.assert_index_equal(result, expected) + + +def test_set_codes_pos_args_depreciation(idx): + # https://github.com/pandas-dev/pandas/issues/41485 + msg = ( + r"In a future version of pandas all arguments of MultiIndex.set_codes except " + r"for the argument 'codes' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = idx.set_codes([[0, 0, 1, 2, 3, 3], [0, 1, 0, 1, 0, 1]], [0, 1]) + expected = MultiIndex.from_tuples( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + names=["first", "second"], + ) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_indexing.py new file mode 100644 index 0000000..9cb6512 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_indexing.py @@ -0,0 +1,891 @@ +from datetime import timedelta +import re + +import numpy as np +import pytest + +from pandas.errors import ( + InvalidIndexError, + PerformanceWarning, +) + +import pandas as pd +from pandas import ( + Categorical, + Index, + MultiIndex, + date_range, +) +import pandas._testing as tm + + +class TestSliceLocs: + def test_slice_locs_partial(self, idx): + sorted_idx, _ = idx.sortlevel(0) + + result = sorted_idx.slice_locs(("foo", "two"), ("qux", "one")) + assert result == (1, 5) + + result = sorted_idx.slice_locs(None, ("qux", "one")) + assert result == (0, 5) + + result = sorted_idx.slice_locs(("foo", "two"), None) + assert result == (1, len(sorted_idx)) + + result = sorted_idx.slice_locs("bar", "baz") + assert result == (2, 4) + + def test_slice_locs(self): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index + + slob = slice(*idx.slice_locs(df.index[5], df.index[15])) + sliced = stacked[slob] + expected = df[5:16].stack() + tm.assert_almost_equal(sliced.values, expected.values) + + slob = slice( + *idx.slice_locs( + df.index[5] + timedelta(seconds=30), + df.index[15] - timedelta(seconds=30), + ) + ) + sliced = stacked[slob] + expected = df[6:15].stack() + tm.assert_almost_equal(sliced.values, expected.values) + + def test_slice_locs_with_type_mismatch(self): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs((1, 3)) + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2)) + df = tm.makeCustomDataframe(5, 5) + stacked = df.stack() + idx = stacked.index + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs(timedelta(seconds=30)) + # TODO: Try creating a UnicodeDecodeError in exception message + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs(df.index[1], (16, "a")) + + def test_slice_locs_not_sorted(self): + index = MultiIndex( + levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + msg = "[Kk]ey length.*greater than MultiIndex lexsort depth" + with pytest.raises(KeyError, match=msg): + index.slice_locs((1, 0, 1), (2, 1, 0)) + + # works + sorted_index, _ = index.sortlevel(0) + # should there be a test case here??? + sorted_index.slice_locs((1, 0, 1), (2, 1, 0)) + + def test_slice_locs_not_contained(self): + # some searchsorted action + + index = MultiIndex( + levels=[[0, 2, 4, 6], [0, 2, 4]], + codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3], [0, 1, 2, 1, 2, 2, 0, 1, 2]], + ) + + result = index.slice_locs((1, 0), (5, 2)) + assert result == (3, 6) + + result = index.slice_locs(1, 5) + assert result == (3, 6) + + result = index.slice_locs((2, 2), (5, 2)) + assert result == (3, 6) + + result = index.slice_locs(2, 5) + assert result == (3, 6) + + result = index.slice_locs((1, 0), (6, 3)) + assert result == (3, 8) + + result = index.slice_locs(-1, 10) + assert result == (0, len(index)) + + @pytest.mark.parametrize( + "index_arr,expected,start_idx,end_idx", + [ + ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, None), + ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, "b"), + ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, ("b", "e")), + ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), None), + ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), "c"), + ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), ("c", "e")), + ], + ) + def test_slice_locs_with_missing_value( + self, index_arr, expected, start_idx, end_idx + ): + # issue 19132 + idx = MultiIndex.from_arrays(index_arr) + result = idx.slice_locs(start=start_idx, end=end_idx) + assert result == expected + + +class TestPutmask: + def test_putmask_with_wrong_mask(self, idx): + # GH18368 + + msg = "putmask: mask and data must be the same size" + with pytest.raises(ValueError, match=msg): + idx.putmask(np.ones(len(idx) + 1, np.bool_), 1) + + with pytest.raises(ValueError, match=msg): + idx.putmask(np.ones(len(idx) - 1, np.bool_), 1) + + with pytest.raises(ValueError, match=msg): + idx.putmask("foo", 1) + + def test_putmask_multiindex_other(self): + # GH#43212 `value` is also a MultiIndex + + left = MultiIndex.from_tuples([(np.nan, 6), (np.nan, 6), ("a", 4)]) + right = MultiIndex.from_tuples([("a", 1), ("a", 1), ("d", 1)]) + mask = np.array([True, True, False]) + + result = left.putmask(mask, right) + + expected = MultiIndex.from_tuples([right[0], right[1], left[2]]) + tm.assert_index_equal(result, expected) + + +class TestGetIndexer: + def test_get_indexer(self): + major_axis = Index(np.arange(4)) + minor_axis = Index(np.arange(2)) + + major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) + minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) + + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + idx1 = index[:5] + idx2 = index[[1, 3, 5]] + + r1 = idx1.get_indexer(idx2) + tm.assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) + + r1 = idx2.get_indexer(idx1, method="pad") + e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) + tm.assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method="pad") + tm.assert_almost_equal(r2, e1[::-1]) + + rffill1 = idx2.get_indexer(idx1, method="ffill") + tm.assert_almost_equal(r1, rffill1) + + r1 = idx2.get_indexer(idx1, method="backfill") + e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) + tm.assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method="backfill") + tm.assert_almost_equal(r2, e1[::-1]) + + rbfill1 = idx2.get_indexer(idx1, method="bfill") + tm.assert_almost_equal(r1, rbfill1) + + # pass non-MultiIndex + r1 = idx1.get_indexer(idx2.values) + rexp1 = idx1.get_indexer(idx2) + tm.assert_almost_equal(r1, rexp1) + + r1 = idx1.get_indexer([1, 2, 3]) + assert (r1 == [-1, -1, -1]).all() + + # create index with duplicates + idx1 = Index(list(range(10)) + list(range(10))) + idx2 = Index(list(range(20))) + + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): + idx1.get_indexer(idx2) + + def test_get_indexer_nearest(self): + midx = MultiIndex.from_tuples([("a", 1), ("b", 2)]) + msg = ( + "method='nearest' not implemented yet for MultiIndex; " + "see GitHub issue 9365" + ) + with pytest.raises(NotImplementedError, match=msg): + midx.get_indexer(["a"], method="nearest") + msg = "tolerance not implemented yet for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + midx.get_indexer(["a"], method="pad", tolerance=2) + + def test_get_indexer_categorical_time(self): + # https://github.com/pandas-dev/pandas/issues/21390 + midx = MultiIndex.from_product( + [ + Categorical(["a", "b", "c"]), + Categorical(date_range("2012-01-01", periods=3, freq="H")), + ] + ) + result = midx.get_indexer(midx) + tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp)) + + @pytest.mark.parametrize( + "index_arr,labels,expected", + [ + ( + [[1, np.nan, 2], [3, 4, 5]], + [1, np.nan, 2], + np.array([-1, -1, -1], dtype=np.intp), + ), + ([[1, np.nan, 2], [3, 4, 5]], [(np.nan, 4)], np.array([1], dtype=np.intp)), + ([[1, 2, 3], [np.nan, 4, 5]], [(1, np.nan)], np.array([0], dtype=np.intp)), + ( + [[1, 2, 3], [np.nan, 4, 5]], + [np.nan, 4, 5], + np.array([-1, -1, -1], dtype=np.intp), + ), + ], + ) + def test_get_indexer_with_missing_value(self, index_arr, labels, expected): + # issue 19132 + idx = MultiIndex.from_arrays(index_arr) + result = idx.get_indexer(labels) + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_methods(self): + # https://github.com/pandas-dev/pandas/issues/29896 + # test getting an indexer for another index with different methods + # confirms that getting an indexer without a filling method, getting an + # indexer and backfilling, and getting an indexer and padding all behave + # correctly in the case where all of the target values fall in between + # several levels in the MultiIndex into which they are getting an indexer + # + # visually, the MultiIndexes used in this test are: + # mult_idx_1: + # 0: -1 0 + # 1: 2 + # 2: 3 + # 3: 4 + # 4: 0 0 + # 5: 2 + # 6: 3 + # 7: 4 + # 8: 1 0 + # 9: 2 + # 10: 3 + # 11: 4 + # + # mult_idx_2: + # 0: 0 1 + # 1: 3 + # 2: 4 + mult_idx_1 = MultiIndex.from_product([[-1, 0, 1], [0, 2, 3, 4]]) + mult_idx_2 = MultiIndex.from_product([[0], [1, 3, 4]]) + + indexer = mult_idx_1.get_indexer(mult_idx_2) + expected = np.array([-1, 6, 7], dtype=indexer.dtype) + tm.assert_almost_equal(expected, indexer) + + backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="backfill") + expected = np.array([5, 6, 7], dtype=backfill_indexer.dtype) + tm.assert_almost_equal(expected, backfill_indexer) + + # ensure the legacy "bfill" option functions identically to "backfill" + backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="bfill") + expected = np.array([5, 6, 7], dtype=backfill_indexer.dtype) + tm.assert_almost_equal(expected, backfill_indexer) + + pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="pad") + expected = np.array([4, 6, 7], dtype=pad_indexer.dtype) + tm.assert_almost_equal(expected, pad_indexer) + + # ensure the legacy "ffill" option functions identically to "pad" + pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="ffill") + expected = np.array([4, 6, 7], dtype=pad_indexer.dtype) + tm.assert_almost_equal(expected, pad_indexer) + + def test_get_indexer_three_or_more_levels(self): + # https://github.com/pandas-dev/pandas/issues/29896 + # tests get_indexer() on MultiIndexes with 3+ levels + # visually, these are + # mult_idx_1: + # 0: 1 2 5 + # 1: 7 + # 2: 4 5 + # 3: 7 + # 4: 6 5 + # 5: 7 + # 6: 3 2 5 + # 7: 7 + # 8: 4 5 + # 9: 7 + # 10: 6 5 + # 11: 7 + # + # mult_idx_2: + # 0: 1 1 8 + # 1: 1 5 9 + # 2: 1 6 7 + # 3: 2 1 6 + # 4: 2 7 6 + # 5: 2 7 8 + # 6: 3 6 8 + mult_idx_1 = MultiIndex.from_product([[1, 3], [2, 4, 6], [5, 7]]) + mult_idx_2 = MultiIndex.from_tuples( + [ + (1, 1, 8), + (1, 5, 9), + (1, 6, 7), + (2, 1, 6), + (2, 7, 7), + (2, 7, 8), + (3, 6, 8), + ] + ) + # sanity check + assert mult_idx_1.is_monotonic + assert mult_idx_1.is_unique + assert mult_idx_2.is_monotonic + assert mult_idx_2.is_unique + + # show the relationships between the two + assert mult_idx_2[0] < mult_idx_1[0] + assert mult_idx_1[3] < mult_idx_2[1] < mult_idx_1[4] + assert mult_idx_1[5] == mult_idx_2[2] + assert mult_idx_1[5] < mult_idx_2[3] < mult_idx_1[6] + assert mult_idx_1[5] < mult_idx_2[4] < mult_idx_1[6] + assert mult_idx_1[5] < mult_idx_2[5] < mult_idx_1[6] + assert mult_idx_1[-1] < mult_idx_2[6] + + indexer_no_fill = mult_idx_1.get_indexer(mult_idx_2) + expected = np.array([-1, -1, 5, -1, -1, -1, -1], dtype=indexer_no_fill.dtype) + tm.assert_almost_equal(expected, indexer_no_fill) + + # test with backfilling + indexer_backfilled = mult_idx_1.get_indexer(mult_idx_2, method="backfill") + expected = np.array([0, 4, 5, 6, 6, 6, -1], dtype=indexer_backfilled.dtype) + tm.assert_almost_equal(expected, indexer_backfilled) + + # now, the same thing, but forward-filled (aka "padded") + indexer_padded = mult_idx_1.get_indexer(mult_idx_2, method="pad") + expected = np.array([-1, 3, 5, 5, 5, 5, 11], dtype=indexer_padded.dtype) + tm.assert_almost_equal(expected, indexer_padded) + + # now, do the indexing in the other direction + assert mult_idx_2[0] < mult_idx_1[0] < mult_idx_2[1] + assert mult_idx_2[0] < mult_idx_1[1] < mult_idx_2[1] + assert mult_idx_2[0] < mult_idx_1[2] < mult_idx_2[1] + assert mult_idx_2[0] < mult_idx_1[3] < mult_idx_2[1] + assert mult_idx_2[1] < mult_idx_1[4] < mult_idx_2[2] + assert mult_idx_2[2] == mult_idx_1[5] + assert mult_idx_2[5] < mult_idx_1[6] < mult_idx_2[6] + assert mult_idx_2[5] < mult_idx_1[7] < mult_idx_2[6] + assert mult_idx_2[5] < mult_idx_1[8] < mult_idx_2[6] + assert mult_idx_2[5] < mult_idx_1[9] < mult_idx_2[6] + assert mult_idx_2[5] < mult_idx_1[10] < mult_idx_2[6] + assert mult_idx_2[5] < mult_idx_1[11] < mult_idx_2[6] + + indexer = mult_idx_2.get_indexer(mult_idx_1) + expected = np.array( + [-1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1], dtype=indexer.dtype + ) + tm.assert_almost_equal(expected, indexer) + + backfill_indexer = mult_idx_2.get_indexer(mult_idx_1, method="bfill") + expected = np.array( + [1, 1, 1, 1, 2, 2, 6, 6, 6, 6, 6, 6], dtype=backfill_indexer.dtype + ) + tm.assert_almost_equal(expected, backfill_indexer) + + pad_indexer = mult_idx_2.get_indexer(mult_idx_1, method="pad") + expected = np.array( + [0, 0, 0, 0, 1, 2, 5, 5, 5, 5, 5, 5], dtype=pad_indexer.dtype + ) + tm.assert_almost_equal(expected, pad_indexer) + + def test_get_indexer_crossing_levels(self): + # https://github.com/pandas-dev/pandas/issues/29896 + # tests a corner case with get_indexer() with MultiIndexes where, when we + # need to "carry" across levels, proper tuple ordering is respected + # + # the MultiIndexes used in this test, visually, are: + # mult_idx_1: + # 0: 1 1 1 1 + # 1: 2 + # 2: 2 1 + # 3: 2 + # 4: 1 2 1 1 + # 5: 2 + # 6: 2 1 + # 7: 2 + # 8: 2 1 1 1 + # 9: 2 + # 10: 2 1 + # 11: 2 + # 12: 2 2 1 1 + # 13: 2 + # 14: 2 1 + # 15: 2 + # + # mult_idx_2: + # 0: 1 3 2 2 + # 1: 2 3 2 2 + mult_idx_1 = MultiIndex.from_product([[1, 2]] * 4) + mult_idx_2 = MultiIndex.from_tuples([(1, 3, 2, 2), (2, 3, 2, 2)]) + + # show the tuple orderings, which get_indexer() should respect + assert mult_idx_1[7] < mult_idx_2[0] < mult_idx_1[8] + assert mult_idx_1[-1] < mult_idx_2[1] + + indexer = mult_idx_1.get_indexer(mult_idx_2) + expected = np.array([-1, -1], dtype=indexer.dtype) + tm.assert_almost_equal(expected, indexer) + + backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="bfill") + expected = np.array([8, -1], dtype=backfill_indexer.dtype) + tm.assert_almost_equal(expected, backfill_indexer) + + pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="ffill") + expected = np.array([7, 15], dtype=pad_indexer.dtype) + tm.assert_almost_equal(expected, pad_indexer) + + def test_get_indexer_kwarg_validation(self): + # GH#41918 + mi = MultiIndex.from_product([range(3), ["A", "B"]]) + + msg = "limit argument only valid if doing pad, backfill or nearest" + with pytest.raises(ValueError, match=msg): + mi.get_indexer(mi[:-1], limit=4) + + msg = "tolerance argument only valid if doing pad, backfill or nearest" + with pytest.raises(ValueError, match=msg): + mi.get_indexer(mi[:-1], tolerance="piano") + + +def test_getitem(idx): + # scalar + assert idx[2] == ("bar", "one") + + # slice + result = idx[2:5] + expected = idx[[2, 3, 4]] + assert result.equals(expected) + + # boolean + result = idx[[True, False, True, False, True, True]] + result2 = idx[np.array([True, False, True, False, True, True])] + expected = idx[[0, 2, 4, 5]] + assert result.equals(expected) + assert result2.equals(expected) + + +def test_getitem_group_select(idx): + sorted_idx, _ = idx.sortlevel(0) + assert sorted_idx.get_loc("baz") == slice(3, 4) + assert sorted_idx.get_loc("foo") == slice(0, 2) + + +@pytest.mark.parametrize("ind1", [[True] * 5, Index([True] * 5)]) +@pytest.mark.parametrize( + "ind2", + [[True, False, True, False, False], Index([True, False, True, False, False])], +) +def test_getitem_bool_index_all(ind1, ind2): + # GH#22533 + idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3), (40, 4), (50, 5)]) + tm.assert_index_equal(idx[ind1], idx) + + expected = MultiIndex.from_tuples([(10, 1), (30, 3)]) + tm.assert_index_equal(idx[ind2], expected) + + +@pytest.mark.parametrize("ind1", [[True], Index([True])]) +@pytest.mark.parametrize("ind2", [[False], Index([False])]) +def test_getitem_bool_index_single(ind1, ind2): + # GH#22533 + idx = MultiIndex.from_tuples([(10, 1)]) + tm.assert_index_equal(idx[ind1], idx) + + expected = MultiIndex( + levels=[np.array([], dtype=np.int64), np.array([], dtype=np.int64)], + codes=[[], []], + ) + tm.assert_index_equal(idx[ind2], expected) + + +class TestGetLoc: + def test_get_loc(self, idx): + assert idx.get_loc(("foo", "two")) == 1 + assert idx.get_loc(("baz", "two")) == 3 + with pytest.raises(KeyError, match=r"^10$"): + idx.get_loc(("bar", "two")) + with pytest.raises(KeyError, match=r"^'quux'$"): + idx.get_loc("quux") + + msg = "only the default get_loc method is currently supported for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.get_loc("foo", method="nearest") + + # 3 levels + index = MultiIndex( + levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + with pytest.raises(KeyError, match=r"^\(1, 1\)$"): + index.get_loc((1, 1)) + assert index.get_loc((2, 0)) == slice(3, 5) + + def test_get_loc_duplicates(self): + index = Index([2, 2, 2, 2]) + result = index.get_loc(2) + expected = slice(0, 4) + assert result == expected + + index = Index(["c", "a", "a", "b", "b"]) + rs = index.get_loc("c") + xp = 0 + assert rs == xp + + with pytest.raises(KeyError, match="2"): + index.get_loc(2) + + def test_get_loc_level(self): + index = MultiIndex( + levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + loc, new_index = index.get_loc_level((0, 1)) + expected = slice(1, 2) + exp_index = index[expected].droplevel(0).droplevel(0) + assert loc == expected + assert new_index.equals(exp_index) + + loc, new_index = index.get_loc_level((0, 1, 0)) + expected = 1 + assert loc == expected + assert new_index is None + + with pytest.raises(KeyError, match=r"^\(2, 2\)$"): + index.get_loc_level((2, 2)) + # GH 22221: unused label + with pytest.raises(KeyError, match=r"^2$"): + index.drop(2).get_loc_level(2) + # Unused label on unsorted level: + with pytest.raises(KeyError, match=r"^2$"): + index.drop(1, level=2).get_loc_level(2, level=2) + + index = MultiIndex( + levels=[[2000], list(range(4))], + codes=[np.array([0, 0, 0, 0]), np.array([0, 1, 2, 3])], + ) + result, new_index = index.get_loc_level((2000, slice(None, None))) + expected = slice(None, None) + assert result == expected + assert new_index.equals(index.droplevel(0)) + + @pytest.mark.parametrize("dtype1", [int, float, bool, str]) + @pytest.mark.parametrize("dtype2", [int, float, bool, str]) + def test_get_loc_multiple_dtypes(self, dtype1, dtype2): + # GH 18520 + levels = [np.array([0, 1]).astype(dtype1), np.array([0, 1]).astype(dtype2)] + idx = MultiIndex.from_product(levels) + assert idx.get_loc(idx[2]) == 2 + + @pytest.mark.parametrize("level", [0, 1]) + @pytest.mark.parametrize("dtypes", [[int, float], [float, int]]) + def test_get_loc_implicit_cast(self, level, dtypes): + # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa + levels = [["a", "b"], ["c", "d"]] + key = ["b", "d"] + lev_dtype, key_dtype = dtypes + levels[level] = np.array([0, 1], dtype=lev_dtype) + key[level] = key_dtype(1) + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + def test_get_loc_cast_bool(self): + # GH 19086 : int is casted to bool, but not vice-versa + levels = [[False, True], np.arange(2, dtype="int64")] + idx = MultiIndex.from_product(levels) + + assert idx.get_loc((0, 1)) == 1 + assert idx.get_loc((1, 0)) == 2 + + with pytest.raises(KeyError, match=r"^\(False, True\)$"): + idx.get_loc((False, True)) + with pytest.raises(KeyError, match=r"^\(True, False\)$"): + idx.get_loc((True, False)) + + @pytest.mark.parametrize("level", [0, 1]) + def test_get_loc_nan(self, level, nulls_fixture): + # GH 18485 : NaN in MultiIndex + levels = [["a", "b"], ["c", "d"]] + key = ["b", "d"] + levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture)) + key[level] = nulls_fixture + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + def test_get_loc_missing_nan(self): + # GH 8569 + idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) + assert isinstance(idx.get_loc(1), slice) + with pytest.raises(KeyError, match=r"^3$"): + idx.get_loc(3) + with pytest.raises(KeyError, match=r"^nan$"): + idx.get_loc(np.nan) + with pytest.raises(InvalidIndexError, match=r"\[nan\]"): + # listlike/non-hashable raises TypeError + idx.get_loc([np.nan]) + + def test_get_loc_with_values_including_missing_values(self): + # issue 19132 + idx = MultiIndex.from_product([[np.nan, 1]] * 2) + expected = slice(0, 2, None) + assert idx.get_loc(np.nan) == expected + + idx = MultiIndex.from_arrays([[np.nan, 1, 2, np.nan]]) + expected = np.array([True, False, False, True]) + tm.assert_numpy_array_equal(idx.get_loc(np.nan), expected) + + idx = MultiIndex.from_product([[np.nan, 1]] * 3) + expected = slice(2, 4, None) + assert idx.get_loc((np.nan, 1)) == expected + + def test_get_loc_duplicates2(self): + # TODO: de-duplicate with test_get_loc_duplicates above? + index = MultiIndex( + levels=[["D", "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]], + codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=["tag", "day"], + ) + + assert index.get_loc("D") == slice(0, 3) + + def test_get_loc_past_lexsort_depth(self): + # GH#30053 + idx = MultiIndex( + levels=[["a"], [0, 7], [1]], + codes=[[0, 0], [1, 0], [0, 0]], + names=["x", "y", "z"], + sortorder=0, + ) + key = ("a", 7) + + with tm.assert_produces_warning(PerformanceWarning): + # PerformanceWarning: indexing past lexsort depth may impact performance + result = idx.get_loc(key) + + assert result == slice(0, 1, None) + + def test_multiindex_get_loc_list_raises(self): + # GH#35878 + idx = MultiIndex.from_tuples([("a", 1), ("b", 2)]) + msg = r"\[\]" + with pytest.raises(InvalidIndexError, match=msg): + idx.get_loc([]) + + def test_get_loc_nested_tuple_raises_keyerror(self): + # raise KeyError, not TypeError + mi = MultiIndex.from_product([range(3), range(4), range(5), range(6)]) + key = ((2, 3, 4), "foo") + + with pytest.raises(KeyError, match=re.escape(str(key))): + mi.get_loc(key) + + +class TestWhere: + def test_where(self): + i = MultiIndex.from_tuples([("A", 1), ("A", 2)]) + + msg = r"\.where is not supported for MultiIndex operations" + with pytest.raises(NotImplementedError, match=msg): + i.where(True) + + def test_where_array_like(self, listlike_box): + mi = MultiIndex.from_tuples([("A", 1), ("A", 2)]) + cond = [False, True] + msg = r"\.where is not supported for MultiIndex operations" + with pytest.raises(NotImplementedError, match=msg): + mi.where(listlike_box(cond)) + + +class TestContains: + def test_contains_top_level(self): + midx = MultiIndex.from_product([["A", "B"], [1, 2]]) + assert "A" in midx + assert "A" not in midx._engine + + def test_contains_with_nat(self): + # MI with a NaT + mi = MultiIndex( + levels=[["C"], date_range("2012-01-01", periods=5)], + codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], + names=[None, "B"], + ) + assert ("C", pd.Timestamp("2012-01-01")) in mi + for val in mi.values: + assert val in mi + + def test_contains(self, idx): + assert ("foo", "two") in idx + assert ("bar", "two") not in idx + assert None not in idx + + def test_contains_with_missing_value(self): + # GH#19132 + idx = MultiIndex.from_arrays([[1, np.nan, 2]]) + assert np.nan in idx + + idx = MultiIndex.from_arrays([[1, 2], [np.nan, 3]]) + assert np.nan not in idx + assert (1, np.nan) in idx + + def test_multiindex_contains_dropped(self): + # GH#19027 + # test that dropped MultiIndex levels are not in the MultiIndex + # despite continuing to be in the MultiIndex's levels + idx = MultiIndex.from_product([[1, 2], [3, 4]]) + assert 2 in idx + idx = idx.drop(2) + + # drop implementation keeps 2 in the levels + assert 2 in idx.levels[0] + # but it should no longer be in the index itself + assert 2 not in idx + + # also applies to strings + idx = MultiIndex.from_product([["a", "b"], ["c", "d"]]) + assert "a" in idx + idx = idx.drop("a") + assert "a" in idx.levels[0] + assert "a" not in idx + + def test_contains_td64_level(self): + # GH#24570 + tx = pd.timedelta_range("09:30:00", "16:00:00", freq="30 min") + idx = MultiIndex.from_arrays([tx, np.arange(len(tx))]) + assert tx[0] in idx + assert "element_not_exit" not in idx + assert "0 day 09:30:00" in idx + + @pytest.mark.slow + def test_large_mi_contains(self): + # GH#10645 + result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)]) + assert not (10 ** 6, 0) in result + + +def test_timestamp_multiindex_indexer(): + # https://github.com/pandas-dev/pandas/issues/26944 + idx = MultiIndex.from_product( + [ + date_range("2019-01-01T00:15:33", periods=100, freq="H", name="date"), + ["x"], + [3], + ] + ) + df = pd.DataFrame({"foo": np.arange(len(idx))}, idx) + result = df.loc[pd.IndexSlice["2019-1-2":, "x", :], "foo"] + qidx = MultiIndex.from_product( + [ + date_range( + start="2019-01-02T00:15:33", + end="2019-01-05T03:15:33", + freq="H", + name="date", + ), + ["x"], + [3], + ] + ) + should_be = pd.Series(data=np.arange(24, len(qidx) + 24), index=qidx, name="foo") + tm.assert_series_equal(result, should_be) + + +@pytest.mark.parametrize( + "index_arr,expected,target,algo", + [ + ([[np.nan, "a", "b"], ["c", "d", "e"]], 0, np.nan, "left"), + ([[np.nan, "a", "b"], ["c", "d", "e"]], 1, (np.nan, "c"), "right"), + ([["a", "b", "c"], ["d", np.nan, "d"]], 1, ("b", np.nan), "left"), + ], +) +def test_get_slice_bound_with_missing_value(index_arr, expected, target, algo): + # issue 19132 + idx = MultiIndex.from_arrays(index_arr) + with tm.assert_produces_warning(FutureWarning, match="'kind' argument"): + result = idx.get_slice_bound(target, side=algo, kind="loc") + assert result == expected + + +@pytest.mark.parametrize( + "index_arr,expected,start_idx,end_idx", + [ + ([[np.nan, 1, 2], [3, 4, 5]], slice(0, 2, None), np.nan, 1), + ([[np.nan, 1, 2], [3, 4, 5]], slice(0, 3, None), np.nan, (2, 5)), + ([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), 3), + ([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), (3, 5)), + ], +) +def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_idx): + # issue 19132 + idx = MultiIndex.from_arrays(index_arr) + result = idx.slice_indexer(start=start_idx, end=end_idx) + assert result == expected + + +def test_pyint_engine(): + # GH#18519 : when combinations of codes cannot be represented in 64 + # bits, the index underlying the MultiIndex engine works with Python + # integers, rather than uint64. + N = 5 + keys = [ + tuple(arr) + for arr in [ + [0] * 10 * N, + [1] * 10 * N, + [2] * 10 * N, + [np.nan] * N + [2] * 9 * N, + [0] * N + [2] * 9 * N, + [np.nan] * N + [2] * 8 * N + [0] * N, + ] + ] + # Each level contains 4 elements (including NaN), so it is represented + # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a + # 64 bit engine and truncating the first levels, the fourth and fifth + # keys would collide; if truncating the last levels, the fifth and + # sixth; if rotating bits rather than shifting, the third and fifth. + + for idx in range(len(keys)): + index = MultiIndex.from_tuples(keys) + assert index.get_loc(keys[idx]) == idx + + expected = np.arange(idx + 1, dtype=np.intp) + result = index.get_indexer([keys[i] for i in expected]) + tm.assert_numpy_array_equal(result, expected) + + # With missing key: + idces = range(len(keys)) + expected = np.array([-1] + list(idces), dtype=np.intp) + missing = tuple([0, 1] * 5 * N) + result = index.get_indexer([missing] + [keys[i] for i in idces]) + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_integrity.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_integrity.py new file mode 100644 index 0000000..e1c2134 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_integrity.py @@ -0,0 +1,280 @@ +import re + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + +import pandas as pd +from pandas import ( + IntervalIndex, + MultiIndex, + RangeIndex, +) +import pandas._testing as tm +from pandas.core.api import Int64Index + + +def test_labels_dtypes(): + + # GH 8456 + i = MultiIndex.from_tuples([("A", 1), ("A", 2)]) + assert i.codes[0].dtype == "int8" + assert i.codes[1].dtype == "int8" + + i = MultiIndex.from_product([["a"], range(40)]) + assert i.codes[1].dtype == "int8" + i = MultiIndex.from_product([["a"], range(400)]) + assert i.codes[1].dtype == "int16" + i = MultiIndex.from_product([["a"], range(40000)]) + assert i.codes[1].dtype == "int32" + + i = MultiIndex.from_product([["a"], range(1000)]) + assert (i.codes[0] >= 0).all() + assert (i.codes[1] >= 0).all() + + +def test_values_boxed(): + tuples = [ + (1, pd.Timestamp("2000-01-01")), + (2, pd.NaT), + (3, pd.Timestamp("2000-01-03")), + (1, pd.Timestamp("2000-01-04")), + (2, pd.Timestamp("2000-01-02")), + (3, pd.Timestamp("2000-01-03")), + ] + result = MultiIndex.from_tuples(tuples) + expected = construct_1d_object_array_from_listlike(tuples) + tm.assert_numpy_array_equal(result.values, expected) + # Check that code branches for boxed values produce identical results + tm.assert_numpy_array_equal(result.values[:4], result[:4].values) + + +def test_values_multiindex_datetimeindex(): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(10 ** 18, 10 ** 18 + 5) + naive = pd.DatetimeIndex(ints) + + aware = pd.DatetimeIndex(ints, tz="US/Central") + + idx = MultiIndex.from_arrays([naive, aware]) + result = idx.values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware) + + # n_lev > n_lab + result = idx[:2].values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive[:2]) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware[:2]) + + +def test_values_multiindex_periodindex(): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(2007, 2012) + pidx = pd.PeriodIndex(ints, freq="D") + + idx = MultiIndex.from_arrays([ints, pidx]) + result = idx.values + + outer = Int64Index([x[0] for x in result]) + tm.assert_index_equal(outer, Int64Index(ints)) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx) + + # n_lev > n_lab + result = idx[:2].values + + outer = Int64Index([x[0] for x in result]) + tm.assert_index_equal(outer, Int64Index(ints[:2])) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx[:2]) + + +def test_consistency(): + # need to construct an overflow + major_axis = list(range(70000)) + minor_axis = list(range(10)) + + major_codes = np.arange(70000) + minor_codes = np.repeat(range(10), 7000) + + # the fact that is works means it's consistent + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + + # inconsistent + major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + + assert index.is_unique is False + + +@pytest.mark.slow +def test_hash_collisions(): + # non-smoke test that we don't get hash collisions + + index = MultiIndex.from_product( + [np.arange(1000), np.arange(1000)], names=["one", "two"] + ) + result = index.get_indexer(index.values) + tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp")) + + for i in [0, 1, len(index) - 2, len(index) - 1]: + result = index.get_loc(index[i]) + assert result == i + + +def test_dims(): + pass + + +def test_take_invalid_kwargs(): + vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]] + idx = MultiIndex.from_product(vals, names=["str", "dt"]) + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + +def test_isna_behavior(idx): + # should not segfault GH5123 + # NOTE: if MI representation changes, may make sense to allow + # isna(MI) + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + pd.isna(idx) + + +def test_large_multiindex_error(): + # GH12527 + df_below_1000000 = pd.DataFrame( + 1, index=MultiIndex.from_product([[1, 2], range(499999)]), columns=["dest"] + ) + with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): + df_below_1000000.loc[(-1, 0), "dest"] + with pytest.raises(KeyError, match=r"^\(3, 0\)$"): + df_below_1000000.loc[(3, 0), "dest"] + df_above_1000000 = pd.DataFrame( + 1, index=MultiIndex.from_product([[1, 2], range(500001)]), columns=["dest"] + ) + with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): + df_above_1000000.loc[(-1, 0), "dest"] + with pytest.raises(KeyError, match=r"^\(3, 0\)$"): + df_above_1000000.loc[(3, 0), "dest"] + + +def test_million_record_attribute_error(): + # GH 18165 + r = list(range(1000000)) + df = pd.DataFrame( + {"a": r, "b": r}, index=MultiIndex.from_tuples([(x, x) for x in r]) + ) + + msg = "'Series' object has no attribute 'foo'" + with pytest.raises(AttributeError, match=msg): + df["a"].foo() + + +def test_can_hold_identifiers(idx): + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True + + +def test_metadata_immutable(idx): + levels, codes = idx.levels, idx.codes + # shouldn't be able to set at either the top level or base level + mutable_regex = re.compile("does not support mutable operations") + with pytest.raises(TypeError, match=mutable_regex): + levels[0] = levels[0] + with pytest.raises(TypeError, match=mutable_regex): + levels[0][0] = levels[0][0] + # ditto for labels + with pytest.raises(TypeError, match=mutable_regex): + codes[0] = codes[0] + with pytest.raises(ValueError, match="assignment destination is read-only"): + codes[0][0] = codes[0][0] + # and for names + names = idx.names + with pytest.raises(TypeError, match=mutable_regex): + names[0] = names[0] + + +def test_level_setting_resets_attributes(): + ind = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) + assert ind.is_monotonic + with tm.assert_produces_warning(FutureWarning): + ind.set_levels([["A", "B"], [1, 3, 2]], inplace=True) + # if this fails, probably didn't reset the cache correctly. + assert not ind.is_monotonic + + +def test_rangeindex_fallback_coercion_bug(): + # GH 12893 + foo = pd.DataFrame(np.arange(100).reshape((10, 10))) + bar = pd.DataFrame(np.arange(100).reshape((10, 10))) + df = pd.concat({"foo": foo.stack(), "bar": bar.stack()}, axis=1) + df.index.names = ["fizz", "buzz"] + + str(df) + expected = pd.DataFrame( + {"bar": np.arange(100), "foo": np.arange(100)}, + index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]), + ) + tm.assert_frame_equal(df, expected, check_like=True) + + result = df.index.get_level_values("fizz") + expected = Int64Index(np.arange(10), name="fizz").repeat(10) + tm.assert_index_equal(result, expected) + + result = df.index.get_level_values("buzz") + expected = Int64Index(np.tile(np.arange(10), 10), name="buzz") + tm.assert_index_equal(result, expected) + + +def test_memory_usage(idx): + result = idx.memory_usage() + if len(idx): + idx.get_loc(idx[0]) + result2 = idx.memory_usage() + result3 = idx.memory_usage(deep=True) + + # RangeIndex, IntervalIndex + # don't have engines + if not isinstance(idx, (RangeIndex, IntervalIndex)): + assert result2 > result + + if idx.inferred_type == "object": + assert result3 > result2 + + else: + + # we report 0 for no-length + assert result == 0 + + +def test_nlevels(idx): + assert idx.nlevels == 2 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_isin.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_isin.py new file mode 100644 index 0000000..6954582 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_isin.py @@ -0,0 +1,78 @@ +import numpy as np +import pytest + +from pandas import MultiIndex +import pandas._testing as tm + + +def test_isin_nan(): + idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]]) + tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True])) + tm.assert_numpy_array_equal( + idx.isin([("bar", float("nan"))]), np.array([False, True]) + ) + + +def test_isin(): + values = [("foo", 2), ("bar", 3), ("quux", 4)] + + idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)]) + result = idx.isin(values) + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # empty, return dtype bool + idx = MultiIndex.from_arrays([[], []]) + result = idx.isin(values) + assert len(result) == 0 + assert result.dtype == np.bool_ + + +def test_isin_level_kwarg(): + idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)]) + + vals_0 = ["foo", "bar", "quux"] + vals_1 = [2, 3, 10] + + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0)) + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2)) + + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) + + msg = "Too many levels: Index has only 2 levels, not 6" + with pytest.raises(IndexError, match=msg): + idx.isin(vals_0, level=5) + msg = "Too many levels: Index has only 2 levels, -5 is not a valid level number" + with pytest.raises(IndexError, match=msg): + idx.isin(vals_0, level=-5) + + with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"): + idx.isin(vals_0, level=1.0) + with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"): + idx.isin(vals_1, level=-1.0) + with pytest.raises(KeyError, match="'Level A not found'"): + idx.isin(vals_1, level="A") + + idx.names = ["A", "B"] + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level="A")) + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level="B")) + + with pytest.raises(KeyError, match="'Level C not found'"): + idx.isin(vals_1, level="C") + + +@pytest.mark.parametrize( + "labels,expected,level", + [ + ([("b", np.nan)], np.array([False, False, True]), None), + ([np.nan, "a"], np.array([True, True, False]), 0), + (["d", np.nan], np.array([False, True, True]), 1), + ], +) +def test_isin_multi_index_with_missing_value(labels, expected, level): + # GH 19132 + midx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", np.nan]]) + result = midx.isin(labels, level=level) + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_join.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_join.py new file mode 100644 index 0000000..e6bec97 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_join.py @@ -0,0 +1,160 @@ +import numpy as np +import pytest + +from pandas import ( + Index, + Interval, + MultiIndex, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])] +) +def test_join_level(idx, other, join_type): + join_index, lidx, ridx = other.join( + idx, how=join_type, level="second", return_indexers=True + ) + + exp_level = other.join(idx.levels[1], how=join_type) + assert join_index.levels[0].equals(idx.levels[0]) + assert join_index.levels[1].equals(exp_level) + + # pare down levels + mask = np.array([x[1] in exp_level for x in idx], dtype=bool) + exp_values = idx.values[mask] + tm.assert_numpy_array_equal(join_index.values, exp_values) + + if join_type in ("outer", "inner"): + join_index2, ridx2, lidx2 = idx.join( + other, how=join_type, level="second", return_indexers=True + ) + + assert join_index.equals(join_index2) + tm.assert_numpy_array_equal(lidx, lidx2) + tm.assert_numpy_array_equal(ridx, ridx2) + tm.assert_numpy_array_equal(join_index2.values, exp_values) + + +def test_join_level_corner_case(idx): + # some corner cases + index = Index(["three", "one", "two"]) + result = index.join(idx, level="second") + assert isinstance(result, MultiIndex) + + with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"): + idx.join(idx, level=1) + + +def test_join_self(idx, join_type): + joined = idx.join(idx, how=join_type) + tm.assert_index_equal(joined, idx) + + +def test_join_multi(): + # GH 10665 + midx = MultiIndex.from_product([np.arange(4), np.arange(4)], names=["a", "b"]) + idx = Index([1, 2, 5], name="b") + + # inner + jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True) + exp_idx = MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"]) + exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp) + exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp) + tm.assert_index_equal(jidx, exp_idx) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) + # flip + jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True) + tm.assert_index_equal(jidx, exp_idx) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + # keep MultiIndex + jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True) + exp_ridx = np.array( + [-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp + ) + tm.assert_index_equal(jidx, midx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) + # flip + jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True) + tm.assert_index_equal(jidx, midx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) + + +def test_join_self_unique(idx, join_type): + if idx.is_unique: + joined = idx.join(idx, how=join_type) + assert (idx == joined).all() + + +def test_join_multi_wrong_order(): + # GH 25760 + # GH 28956 + + midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) + midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["b", "a"]) + + join_idx, lidx, ridx = midx1.join(midx2, return_indexers=True) + + exp_ridx = np.array([-1, -1, -1, -1], dtype=np.intp) + + tm.assert_index_equal(midx1, join_idx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) + + +def test_join_multi_return_indexers(): + # GH 34074 + + midx1 = MultiIndex.from_product([[1, 2], [3, 4], [5, 6]], names=["a", "b", "c"]) + midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) + + result = midx1.join(midx2, return_indexers=False) + tm.assert_index_equal(result, midx1) + + +def test_join_overlapping_interval_level(): + # GH 44096 + idx_1 = MultiIndex.from_tuples( + [ + (1, Interval(0.0, 1.0)), + (1, Interval(1.0, 2.0)), + (1, Interval(2.0, 5.0)), + (2, Interval(0.0, 1.0)), + (2, Interval(1.0, 3.0)), # interval limit is here at 3.0, not at 2.0 + (2, Interval(3.0, 5.0)), + ], + names=["num", "interval"], + ) + + idx_2 = MultiIndex.from_tuples( + [ + (1, Interval(2.0, 5.0)), + (1, Interval(0.0, 1.0)), + (1, Interval(1.0, 2.0)), + (2, Interval(3.0, 5.0)), + (2, Interval(0.0, 1.0)), + (2, Interval(1.0, 3.0)), + ], + names=["num", "interval"], + ) + + expected = MultiIndex.from_tuples( + [ + (1, Interval(0.0, 1.0)), + (1, Interval(1.0, 2.0)), + (1, Interval(2.0, 5.0)), + (2, Interval(0.0, 1.0)), + (2, Interval(1.0, 3.0)), + (2, Interval(3.0, 5.0)), + ], + names=["num", "interval"], + ) + result = idx_1.join(idx_2, how="outer") + + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_lexsort.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_lexsort.py new file mode 100644 index 0000000..c37172a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_lexsort.py @@ -0,0 +1,57 @@ +from pandas import MultiIndex +import pandas._testing as tm + + +class TestIsLexsorted: + def test_is_lexsorted(self): + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + ) + assert index._is_lexsorted() + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]] + ) + assert not index._is_lexsorted() + + index = MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] + ) + assert not index._is_lexsorted() + assert index._lexsort_depth == 0 + + def test_is_lexsorted_deprecation(self): + # GH 32259 + with tm.assert_produces_warning(): + MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).is_lexsorted() + + +class TestLexsortDepth: + def test_lexsort_depth(self): + # Test that lexsort_depth return the correct sortorder + # when it was given to the MultiIndex const. + # GH#28518 + + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + assert index._lexsort_depth == 2 + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1 + ) + assert index._lexsort_depth == 1 + + index = MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 + ) + assert index._lexsort_depth == 0 + + def test_lexsort_depth_deprecation(self): + # GH 32259 + with tm.assert_produces_warning(): + MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).lexsort_depth diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_missing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_missing.py new file mode 100644 index 0000000..cd95802 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_missing.py @@ -0,0 +1,112 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import MultiIndex +import pandas._testing as tm + + +def test_fillna(idx): + # GH 11343 + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.fillna(idx[0]) + + +def test_dropna(): + # GH 6194 + idx = MultiIndex.from_arrays( + [ + [1, np.nan, 3, np.nan, 5], + [1, 2, np.nan, np.nan, 5], + ["a", "b", "c", np.nan, "e"], + ] + ) + + exp = MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]]) + tm.assert_index_equal(idx.dropna(), exp) + tm.assert_index_equal(idx.dropna(how="any"), exp) + + exp = MultiIndex.from_arrays( + [[1, np.nan, 3, 5], [1, 2, np.nan, 5], ["a", "b", "c", "e"]] + ) + tm.assert_index_equal(idx.dropna(how="all"), exp) + + msg = "invalid how option: xxx" + with pytest.raises(ValueError, match=msg): + idx.dropna(how="xxx") + + # GH26408 + # test if missing values are dropped for multiindex constructed + # from codes and values + idx = MultiIndex( + levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]], + codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]], + ) + expected = MultiIndex.from_arrays([["128", 2], ["128", 2]]) + tm.assert_index_equal(idx.dropna(), expected) + tm.assert_index_equal(idx.dropna(how="any"), expected) + + expected = MultiIndex.from_arrays( + [[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]] + ) + tm.assert_index_equal(idx.dropna(how="all"), expected) + + +def test_nulls(idx): + # this is really a smoke test for the methods + # as these are adequately tested for function elsewhere + + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.isna() + + +@pytest.mark.xfail(reason="isna is not defined for MultiIndex") +def test_hasnans_isnans(idx): + # GH 11343, added tests for hasnans / isnans + index = idx.copy() + + # cases in indices doesn't include NaN + expected = np.array([False] * len(index), dtype=bool) + tm.assert_numpy_array_equal(index._isnan, expected) + assert index.hasnans is False + + index = idx.copy() + values = index.values + values[1] = np.nan + + index = type(idx)(values) + + expected = np.array([False] * len(index), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(index._isnan, expected) + assert index.hasnans is True + + +def test_nan_stays_float(): + + # GH 7031 + idx0 = MultiIndex(levels=[["A", "B"], []], codes=[[1, 0], [-1, -1]], names=[0, 1]) + idx1 = MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1]) + idxm = idx0.join(idx1, how="outer") + assert pd.isna(idx0.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isna(idxm.get_level_values(1)[:-1]).all() + + df0 = pd.DataFrame([[1, 2]], index=idx0) + df1 = pd.DataFrame([[3, 4]], index=idx1) + dfm = df0 - df1 + assert pd.isna(df0.index.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() + + +def test_tuples_have_na(): + index = MultiIndex( + levels=[[1, 0], [0, 1, 2, 3]], + codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]], + ) + + assert pd.isna(index[4][0]) + assert pd.isna(index.values[4][0]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_monotonic.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_monotonic.py new file mode 100644 index 0000000..74be9f8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_monotonic.py @@ -0,0 +1,188 @@ +import numpy as np +import pytest + +from pandas import ( + Index, + MultiIndex, +) + + +def test_is_monotonic_increasing_lexsorted(lexsorted_two_level_string_multiindex): + # string ordering + mi = lexsorted_two_level_string_multiindex + assert mi.is_monotonic is False + assert Index(mi.values).is_monotonic is False + assert mi._is_strictly_monotonic_increasing is False + assert Index(mi.values)._is_strictly_monotonic_increasing is False + + +def test_is_monotonic_increasing(): + i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=["one", "two"]) + assert i.is_monotonic is True + assert i._is_strictly_monotonic_increasing is True + assert Index(i.values).is_monotonic is True + assert i._is_strictly_monotonic_increasing is True + + i = MultiIndex.from_product( + [np.arange(10, 0, -1), np.arange(10)], names=["one", "two"] + ) + assert i.is_monotonic is False + assert i._is_strictly_monotonic_increasing is False + assert Index(i.values).is_monotonic is False + assert Index(i.values)._is_strictly_monotonic_increasing is False + + i = MultiIndex.from_product( + [np.arange(10), np.arange(10, 0, -1)], names=["one", "two"] + ) + assert i.is_monotonic is False + assert i._is_strictly_monotonic_increasing is False + assert Index(i.values).is_monotonic is False + assert Index(i.values)._is_strictly_monotonic_increasing is False + + i = MultiIndex.from_product([[1.0, np.nan, 2.0], ["a", "b", "c"]]) + assert i.is_monotonic is False + assert i._is_strictly_monotonic_increasing is False + assert Index(i.values).is_monotonic is False + assert Index(i.values)._is_strictly_monotonic_increasing is False + + i = MultiIndex( + levels=[["bar", "baz", "foo", "qux"], ["mom", "next", "zenith"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + assert i.is_monotonic is True + assert Index(i.values).is_monotonic is True + assert i._is_strictly_monotonic_increasing is True + assert Index(i.values)._is_strictly_monotonic_increasing is True + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[ + [1, 2, 3, 4], + [ + "gb00b03mlx29", + "lu0197800237", + "nl0000289783", + "nl0000289965", + "nl0000301109", + ], + ], + codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=["household_id", "asset_id"], + ) + + assert i.is_monotonic is False + assert i._is_strictly_monotonic_increasing is False + + # empty + i = MultiIndex.from_arrays([[], []]) + assert i.is_monotonic is True + assert Index(i.values).is_monotonic is True + assert i._is_strictly_monotonic_increasing is True + assert Index(i.values)._is_strictly_monotonic_increasing is True + + +def test_is_monotonic_decreasing(): + i = MultiIndex.from_product( + [np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"] + ) + assert i.is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + assert Index(i.values).is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + + i = MultiIndex.from_product( + [np.arange(10), np.arange(10, 0, -1)], names=["one", "two"] + ) + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + i = MultiIndex.from_product( + [np.arange(10, 0, -1), np.arange(10)], names=["one", "two"] + ) + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]]) + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + # string ordering + i = MultiIndex( + levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + assert i.is_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + i = MultiIndex( + levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + assert i.is_monotonic_decreasing is True + assert Index(i.values).is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + assert Index(i.values)._is_strictly_monotonic_decreasing is True + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[ + [4, 3, 2, 1], + [ + "nl0000301109", + "nl0000289965", + "nl0000289783", + "lu0197800237", + "gb00b03mlx29", + ], + ], + codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=["household_id", "asset_id"], + ) + + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + + # empty + i = MultiIndex.from_arrays([[], []]) + assert i.is_monotonic_decreasing is True + assert Index(i.values).is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + assert Index(i.values)._is_strictly_monotonic_decreasing is True + + +def test_is_strictly_monotonic_increasing(): + idx = MultiIndex( + levels=[["bar", "baz"], ["mom", "next"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]] + ) + assert idx.is_monotonic_increasing is True + assert idx._is_strictly_monotonic_increasing is False + + +def test_is_strictly_monotonic_decreasing(): + idx = MultiIndex( + levels=[["baz", "bar"], ["next", "mom"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]] + ) + assert idx.is_monotonic_decreasing is True + assert idx._is_strictly_monotonic_decreasing is False + + +@pytest.mark.parametrize("attr", ["is_monotonic_increasing", "is_monotonic_decreasing"]) +@pytest.mark.parametrize( + "values", + [[(np.nan,), (1,), (2,)], [(1,), (np.nan,), (2,)], [(1,), (2,), (np.nan,)]], +) +def test_is_monotonic_with_nans(values, attr): + # GH: 37220 + idx = MultiIndex.from_tuples(values, names=["test"]) + assert getattr(idx, attr) is False diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_names.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_names.py new file mode 100644 index 0000000..cfbc90d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_names.py @@ -0,0 +1,205 @@ +import pytest + +import pandas as pd +from pandas import MultiIndex +import pandas._testing as tm + + +def check_level_names(index, names): + assert [level.name for level in index.levels] == list(names) + + +def test_slice_keep_name(): + x = MultiIndex.from_tuples([("a", "b"), (1, 2), ("c", "d")], names=["x", "y"]) + assert x[1:].names == x.names + + +def test_index_name_retained(): + # GH9857 + result = pd.DataFrame({"x": [1, 2, 6], "y": [2, 2, 8], "z": [-5, 0, 5]}) + result = result.set_index("z") + result.loc[10] = [9, 10] + df_expected = pd.DataFrame( + {"x": [1, 2, 6, 9], "y": [2, 2, 8, 10], "z": [-5, 0, 5, 10]} + ) + df_expected = df_expected.set_index("z") + tm.assert_frame_equal(result, df_expected) + + +def test_changing_names(idx): + assert [level.name for level in idx.levels] == ["first", "second"] + + view = idx.view() + copy = idx.copy() + shallow_copy = idx._view() + + # changing names should not change level names on object + new_names = [name + "a" for name in idx.names] + idx.names = new_names + check_level_names(idx, ["firsta", "seconda"]) + + # and not on copies + check_level_names(view, ["first", "second"]) + check_level_names(copy, ["first", "second"]) + check_level_names(shallow_copy, ["first", "second"]) + + # and copies shouldn't change original + shallow_copy.names = [name + "c" for name in shallow_copy.names] + check_level_names(idx, ["firsta", "seconda"]) + + +def test_take_preserve_name(idx): + taken = idx.take([3, 0, 1]) + assert taken.names == idx.names + + +def test_copy_names(): + # Check that adding a "names" parameter to the copy is honored + # GH14302 + with tm.assert_produces_warning(FutureWarning): + # subclass-specific kwargs to pd.Index + multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"]) + multi_idx1 = multi_idx.copy() + + assert multi_idx.equals(multi_idx1) + assert multi_idx.names == ["MyName1", "MyName2"] + assert multi_idx1.names == ["MyName1", "MyName2"] + + multi_idx2 = multi_idx.copy(names=["NewName1", "NewName2"]) + + assert multi_idx.equals(multi_idx2) + assert multi_idx.names == ["MyName1", "MyName2"] + assert multi_idx2.names == ["NewName1", "NewName2"] + + multi_idx3 = multi_idx.copy(name=["NewName1", "NewName2"]) + + assert multi_idx.equals(multi_idx3) + assert multi_idx.names == ["MyName1", "MyName2"] + assert multi_idx3.names == ["NewName1", "NewName2"] + + # gh-35592 + with pytest.raises(ValueError, match="Length of new names must be 2, got 1"): + multi_idx.copy(names=["mario"]) + + with pytest.raises(TypeError, match="MultiIndex.name must be a hashable type"): + multi_idx.copy(names=[["mario"], ["luigi"]]) + + +def test_names(idx, index_names): + + # names are assigned in setup + assert index_names == ["first", "second"] + level_names = [level.name for level in idx.levels] + assert level_names == index_names + + # setting bad names on existing + index = idx + with pytest.raises(ValueError, match="^Length of names"): + setattr(index, "names", list(index.names) + ["third"]) + with pytest.raises(ValueError, match="^Length of names"): + setattr(index, "names", []) + + # initializing with bad names (should always be equivalent) + major_axis, minor_axis = idx.levels + major_codes, minor_codes = idx.codes + with pytest.raises(ValueError, match="^Length of names"): + MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=["first"], + ) + with pytest.raises(ValueError, match="^Length of names"): + MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=["first", "second", "third"], + ) + + # names are assigned on index, but not transferred to the levels + index.names = ["a", "b"] + level_names = [level.name for level in index.levels] + assert level_names == ["a", "b"] + + +def test_duplicate_level_names_access_raises(idx): + # GH19029 + idx.names = ["foo", "foo"] + with pytest.raises(ValueError, match="name foo occurs multiple times"): + idx._get_level_number("foo") + + +def test_get_names_from_levels(): + idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"]) + + assert idx.levels[0].name == "a" + assert idx.levels[1].name == "b" + + +def test_setting_names_from_levels_raises(): + idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"]) + with pytest.raises(RuntimeError, match="set_names"): + idx.levels[0].name = "foo" + + with pytest.raises(RuntimeError, match="set_names"): + idx.levels[1].name = "foo" + + new = pd.Series(1, index=idx.levels[0]) + with pytest.raises(RuntimeError, match="set_names"): + new.index.name = "bar" + + assert pd.Index._no_setting_name is False + assert pd.core.api.NumericIndex._no_setting_name is False + assert pd.RangeIndex._no_setting_name is False + + +@pytest.mark.parametrize("func", ["rename", "set_names"]) +@pytest.mark.parametrize( + "rename_dict, exp_names", + [ + ({"x": "z"}, ["z", "y", "z"]), + ({"x": "z", "y": "x"}, ["z", "x", "z"]), + ({"y": "z"}, ["x", "z", "x"]), + ({}, ["x", "y", "x"]), + ({"z": "a"}, ["x", "y", "x"]), + ({"y": "z", "a": "b"}, ["x", "z", "x"]), + ], +) +def test_name_mi_with_dict_like_duplicate_names(func, rename_dict, exp_names): + # GH#20421 + mi = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=["x", "y", "x"]) + result = getattr(mi, func)(rename_dict) + expected = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=exp_names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("func", ["rename", "set_names"]) +@pytest.mark.parametrize( + "rename_dict, exp_names", + [ + ({"x": "z"}, ["z", "y"]), + ({"x": "z", "y": "x"}, ["z", "x"]), + ({"a": "z"}, ["x", "y"]), + ({}, ["x", "y"]), + ], +) +def test_name_mi_with_dict_like(func, rename_dict, exp_names): + # GH#20421 + mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"]) + result = getattr(mi, func)(rename_dict) + expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=exp_names) + tm.assert_index_equal(result, expected) + + +def test_index_name_with_dict_like_raising(): + # GH#20421 + ix = pd.Index([1, 2]) + msg = "Can only pass dict-like as `names` for MultiIndex." + with pytest.raises(TypeError, match=msg): + ix.set_names({"x": "z"}) + + +def test_multiindex_name_and_level_raising(): + # GH#20421 + mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"]) + with pytest.raises(TypeError, match="Can not pass level for dictlike `names`."): + mi.set_names(names={"x": "z"}, level={"x": "z"}) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_partial_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_partial_indexing.py new file mode 100644 index 0000000..47efc43 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_partial_indexing.py @@ -0,0 +1,148 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + IndexSlice, + MultiIndex, + date_range, +) +import pandas._testing as tm + + +@pytest.fixture +def df(): + # c1 + # 2016-01-01 00:00:00 a 0 + # b 1 + # c 2 + # 2016-01-01 12:00:00 a 3 + # b 4 + # c 5 + # 2016-01-02 00:00:00 a 6 + # b 7 + # c 8 + # 2016-01-02 12:00:00 a 9 + # b 10 + # c 11 + # 2016-01-03 00:00:00 a 12 + # b 13 + # c 14 + dr = date_range("2016-01-01", "2016-01-03", freq="12H") + abc = ["a", "b", "c"] + mi = MultiIndex.from_product([dr, abc]) + frame = DataFrame({"c1": range(0, 15)}, index=mi) + return frame + + +def test_partial_string_matching_single_index(df): + # partial string matching on a single index + for df_swap in [df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)]: + df_swap = df_swap.sort_index() + just_a = df_swap.loc["a"] + result = just_a.loc["2016-01-01"] + expected = df.loc[IndexSlice[:, "a"], :].iloc[0:2] + expected.index = expected.index.droplevel(1) + tm.assert_frame_equal(result, expected) + + +def test_get_loc_partial_timestamp_multiindex(df): + mi = df.index + key = ("2016-01-01", "a") + loc = mi.get_loc(key) + + expected = np.zeros(len(mi), dtype=bool) + expected[[0, 3]] = True + tm.assert_numpy_array_equal(loc, expected) + + key2 = ("2016-01-02", "a") + loc2 = mi.get_loc(key2) + expected2 = np.zeros(len(mi), dtype=bool) + expected2[[6, 9]] = True + tm.assert_numpy_array_equal(loc2, expected2) + + key3 = ("2016-01", "a") + loc3 = mi.get_loc(key3) + expected3 = np.zeros(len(mi), dtype=bool) + expected3[mi.get_level_values(1).get_loc("a")] = True + tm.assert_numpy_array_equal(loc3, expected3) + + key4 = ("2016", "a") + loc4 = mi.get_loc(key4) + expected4 = expected3 + tm.assert_numpy_array_equal(loc4, expected4) + + # non-monotonic + taker = np.arange(len(mi), dtype=np.intp) + taker[::2] = taker[::-2] + mi2 = mi.take(taker) + loc5 = mi2.get_loc(key) + expected5 = np.zeros(len(mi2), dtype=bool) + expected5[[3, 14]] = True + tm.assert_numpy_array_equal(loc5, expected5) + + +def test_partial_string_timestamp_multiindex(df): + # GH10331 + df_swap = df.swaplevel(0, 1).sort_index() + SLC = IndexSlice + + # indexing with IndexSlice + result = df.loc[SLC["2016-01-01":"2016-02-01", :], :] + expected = df + tm.assert_frame_equal(result, expected) + + # match on secondary index + result = df_swap.loc[SLC[:, "2016-01-01":"2016-01-01"], :] + expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]] + tm.assert_frame_equal(result, expected) + + # partial string match on year only + result = df.loc["2016"] + expected = df + tm.assert_frame_equal(result, expected) + + # partial string match on date + result = df.loc["2016-01-01"] + expected = df.iloc[0:6] + tm.assert_frame_equal(result, expected) + + # partial string match on date and hour, from middle + result = df.loc["2016-01-02 12"] + # hourly resolution, same as index.levels[0], so we are _not_ slicing on + # that level, so that level gets dropped + expected = df.iloc[9:12].droplevel(0) + tm.assert_frame_equal(result, expected) + + # partial string match on secondary index + result = df_swap.loc[SLC[:, "2016-01-02"], :] + expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]] + tm.assert_frame_equal(result, expected) + + # tuple selector with partial string match on date + # "2016-01-01" has daily resolution, so _is_ a slice on the first level. + result = df.loc[("2016-01-01", "a"), :] + expected = df.iloc[[0, 3]] + expected = df.iloc[[0, 3]].droplevel(1) + tm.assert_frame_equal(result, expected) + + # Slicing date on first level should break (of course) bc the DTI is the + # second level on df_swap + with pytest.raises(KeyError, match="'2016-01-01'"): + df_swap.loc["2016-01-01"] + + +def test_partial_string_timestamp_multiindex_str_key_raises(df): + # Even though this syntax works on a single index, this is somewhat + # ambiguous and we don't want to extend this behavior forward to work + # in multi-indexes. This would amount to selecting a scalar from a + # column. + with pytest.raises(KeyError, match="'2016-01-01'"): + df["2016-01-01"] + + +def test_partial_string_timestamp_multiindex_daily_resolution(df): + # GH12685 (partial string with daily resolution or below) + result = df.loc[IndexSlice["2013-03":"2013-03", :], :] + expected = df.iloc[118:180] + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_pickle.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_pickle.py new file mode 100644 index 0000000..1d8b721 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_pickle.py @@ -0,0 +1,10 @@ +import pytest + +from pandas import MultiIndex + + +def test_pickle_compat_construction(): + # this is testing for pickle compat + # need an object to create with + with pytest.raises(TypeError, match="Must pass both levels and codes"): + MultiIndex() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_reindex.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_reindex.py new file mode 100644 index 0000000..8136169 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_reindex.py @@ -0,0 +1,164 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) +import pandas._testing as tm + + +def test_reindex(idx): + result, indexer = idx.reindex(list(idx[:4])) + assert isinstance(result, MultiIndex) + assert result.names == ["first", "second"] + assert [level.name for level in result.levels] == ["first", "second"] + + result, indexer = idx.reindex(list(idx)) + assert isinstance(result, MultiIndex) + assert indexer is None + assert result.names == ["first", "second"] + assert [level.name for level in result.levels] == ["first", "second"] + + +def test_reindex_level(idx): + index = Index(["one"]) + + target, indexer = idx.reindex(index, level="second") + target2, indexer2 = index.reindex(idx, level="second") + + exp_index = idx.join(index, level="second", how="right") + exp_index2 = idx.join(index, level="second", how="left") + + assert target.equals(exp_index) + exp_indexer = np.array([0, 2, 4]) + tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False) + + assert target2.equals(exp_index2) + exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) + tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) + + with pytest.raises(TypeError, match="Fill method not supported"): + idx.reindex(idx, method="pad", level="second") + + with pytest.raises(TypeError, match="Fill method not supported"): + index.reindex(index, method="bfill", level="first") + + +def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx): + # GH6552 + idx = idx.copy() + target = idx.copy() + idx.names = target.names = [None, None] + + other_dtype = MultiIndex.from_product([[1, 2], [3, 4]]) + + # list & ndarray cases + assert idx.reindex([])[0].names == [None, None] + assert idx.reindex(np.array([]))[0].names == [None, None] + assert idx.reindex(target.tolist())[0].names == [None, None] + assert idx.reindex(target.values)[0].names == [None, None] + assert idx.reindex(other_dtype.tolist())[0].names == [None, None] + assert idx.reindex(other_dtype.values)[0].names == [None, None] + + idx.names = ["foo", "bar"] + assert idx.reindex([])[0].names == ["foo", "bar"] + assert idx.reindex(np.array([]))[0].names == ["foo", "bar"] + assert idx.reindex(target.tolist())[0].names == ["foo", "bar"] + assert idx.reindex(target.values)[0].names == ["foo", "bar"] + assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"] + assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"] + + +def test_reindex_lvl_preserves_names_when_target_is_list_or_array(): + # GH7774 + idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"]) + assert idx.reindex([], level=0)[0].names == ["foo", "bar"] + assert idx.reindex([], level=1)[0].names == ["foo", "bar"] + + +def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(): + # GH7774 + idx = MultiIndex.from_product([[0, 1], ["a", "b"]]) + assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64 + assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_ + + # case with EA levels + cat = pd.Categorical(["foo", "bar"]) + dti = pd.date_range("2016-01-01", periods=2, tz="US/Pacific") + mi = MultiIndex.from_product([cat, dti]) + assert mi.reindex([], level=0)[0].levels[0].dtype == cat.dtype + assert mi.reindex([], level=1)[0].levels[1].dtype == dti.dtype + + +def test_reindex_base(idx): + idx = idx + expected = np.arange(idx.size, dtype=np.intp) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with pytest.raises(ValueError, match="Invalid fill method"): + idx.get_indexer(idx, method="invalid") + + +def test_reindex_non_unique(): + idx = MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)]) + a = pd.Series(np.arange(4), index=idx) + new_idx = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + + msg = "cannot handle a non-unique multi-index!" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + a.reindex(new_idx) + + +@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]]) +def test_reindex_empty_with_level(values): + # GH41170 + idx = MultiIndex.from_arrays(values) + result, result_indexer = idx.reindex(np.array(["b"]), level=0) + expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []]) + expected_indexer = np.array([], dtype=result_indexer.dtype) + tm.assert_index_equal(result, expected) + tm.assert_numpy_array_equal(result_indexer, expected_indexer) + + +def test_reindex_not_all_tuples(): + keys = [("i", "i"), ("i", "j"), ("j", "i"), "j"] + mi = MultiIndex.from_tuples(keys[:-1]) + idx = Index(keys) + res, indexer = mi.reindex(idx) + + tm.assert_index_equal(res, idx) + expected = np.array([0, 1, 2, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + +def test_reindex_limit_arg_with_multiindex(): + # GH21247 + + idx = MultiIndex.from_tuples([(3, "A"), (4, "A"), (4, "B")]) + + df = pd.Series([0.02, 0.01, 0.012], index=idx) + + new_idx = MultiIndex.from_tuples( + [ + (3, "A"), + (3, "B"), + (4, "A"), + (4, "B"), + (4, "C"), + (5, "B"), + (5, "C"), + (6, "B"), + (6, "C"), + ] + ) + + with pytest.raises( + ValueError, + match="limit argument only valid if doing pad, backfill or nearest reindexing", + ): + df.reindex(new_idx, fill_value=0, limit=1) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_reshape.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_reshape.py new file mode 100644 index 0000000..eed27cd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_reshape.py @@ -0,0 +1,185 @@ +from datetime import datetime + +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) +import pandas._testing as tm + + +def test_insert(idx): + # key contained in all levels + new_index = idx.insert(0, ("bar", "two")) + assert new_index.equal_levels(idx) + assert new_index[0] == ("bar", "two") + + # key not contained in all levels + new_index = idx.insert(0, ("abc", "three")) + + exp0 = Index(list(idx.levels[0]) + ["abc"], name="first") + tm.assert_index_equal(new_index.levels[0], exp0) + assert new_index.names == ["first", "second"] + + exp1 = Index(list(idx.levels[1]) + ["three"], name="second") + tm.assert_index_equal(new_index.levels[1], exp1) + assert new_index[0] == ("abc", "three") + + # key wrong length + msg = "Item must have length equal to number of levels" + with pytest.raises(ValueError, match=msg): + idx.insert(0, ("foo2",)) + + left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"]) + left.set_index(["1st", "2nd"], inplace=True) + ts = left["3rd"].copy(deep=True) + + left.loc[("b", "x"), "3rd"] = 2 + left.loc[("b", "a"), "3rd"] = -1 + left.loc[("b", "b"), "3rd"] = 3 + left.loc[("a", "x"), "3rd"] = 4 + left.loc[("a", "w"), "3rd"] = 5 + left.loc[("a", "a"), "3rd"] = 6 + + ts.loc[("b", "x")] = 2 + ts.loc["b", "a"] = -1 + ts.loc[("b", "b")] = 3 + ts.loc["a", "x"] = 4 + ts.loc[("a", "w")] = 5 + ts.loc["a", "a"] = 6 + + right = pd.DataFrame( + [ + ["a", "b", 0], + ["b", "d", 1], + ["b", "x", 2], + ["b", "a", -1], + ["b", "b", 3], + ["a", "x", 4], + ["a", "w", 5], + ["a", "a", 6], + ], + columns=["1st", "2nd", "3rd"], + ) + right.set_index(["1st", "2nd"], inplace=True) + # FIXME data types changes to float because + # of intermediate nan insertion; + tm.assert_frame_equal(left, right, check_dtype=False) + tm.assert_series_equal(ts, right["3rd"]) + + +def test_insert2(): + # GH9250 + idx = ( + [("test1", i) for i in range(5)] + + [("test2", i) for i in range(6)] + + [("test", 17), ("test", 18)] + ) + + left = pd.Series(np.linspace(0, 10, 11), MultiIndex.from_tuples(idx[:-2])) + + left.loc[("test", 17)] = 11 + left.loc[("test", 18)] = 12 + + right = pd.Series(np.linspace(0, 12, 13), MultiIndex.from_tuples(idx)) + + tm.assert_series_equal(left, right) + + +def test_append(idx): + result = idx[:3].append(idx[3:]) + assert result.equals(idx) + + foos = [idx[:1], idx[1:3], idx[3:]] + result = foos[0].append(foos[1:]) + assert result.equals(idx) + + # empty + result = idx.append([]) + assert result.equals(idx) + + +def test_append_index(): + idx1 = Index([1.1, 1.2, 1.3]) + idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo") + idx3 = Index(["A", "B", "C"]) + + midx_lv2 = MultiIndex.from_arrays([idx1, idx2]) + midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3]) + + result = idx1.append(midx_lv2) + + # see gh-7112 + tz = pytz.timezone("Asia/Tokyo") + expected_tuples = [ + (1.1, tz.localize(datetime(2011, 1, 1))), + (1.2, tz.localize(datetime(2011, 1, 2))), + (1.3, tz.localize(datetime(2011, 1, 3))), + ] + expected = Index([1.1, 1.2, 1.3] + expected_tuples) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(idx1) + expected = Index(expected_tuples + [1.1, 1.2, 1.3]) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(midx_lv2) + expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)]) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(midx_lv3) + tm.assert_index_equal(result, expected) + + result = midx_lv3.append(midx_lv2) + expected = Index._simple_new( + np.array( + [ + (1.1, tz.localize(datetime(2011, 1, 1)), "A"), + (1.2, tz.localize(datetime(2011, 1, 2)), "B"), + (1.3, tz.localize(datetime(2011, 1, 3)), "C"), + ] + + expected_tuples, + dtype=object, + ), + None, + ) + tm.assert_index_equal(result, expected) + + +def test_repeat(): + reps = 2 + numbers = [1, 2, 3] + names = np.array(["foo", "bar"]) + + m = MultiIndex.from_product([numbers, names], names=names) + expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(m.repeat(reps), expected) + + +def test_insert_base(idx): + + result = idx[1:4] + + # test 0th element + assert idx[0:4].equals(result.insert(0, idx[0])) + + +def test_delete_base(idx): + + expected = idx[1:] + result = idx.delete(0) + assert result.equals(expected) + assert result.name == expected.name + + expected = idx[:-1] + result = idx.delete(-1) + assert result.equals(expected) + assert result.name == expected.name + + msg = "index 6 is out of bounds for axis 0 with size 6" + with pytest.raises(IndexError, match=msg): + idx.delete(len(idx)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_setops.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_setops.py new file mode 100644 index 0000000..9f12d62 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_setops.py @@ -0,0 +1,540 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + CategoricalIndex, + Index, + IntervalIndex, + MultiIndex, + Series, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("case", [0.5, "xxx"]) +@pytest.mark.parametrize( + "method", ["intersection", "union", "difference", "symmetric_difference"] +) +def test_set_ops_error_cases(idx, case, sort, method): + # non-iterable input + msg = "Input must be Index or array-like" + with pytest.raises(TypeError, match=msg): + getattr(idx, method)(case, sort=sort) + + +@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) +def test_intersection_base(idx, sort, klass): + first = idx[2::-1] # first 3 elements reversed + second = idx[:5] + + if klass is not MultiIndex: + second = klass(second.values) + + intersect = first.intersection(second, sort=sort) + if sort is None: + expected = first.sort_values() + else: + expected = first + tm.assert_index_equal(intersect, expected) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.intersection([1, 2, 3], sort=sort) + + +@pytest.mark.arm_slow +@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) +def test_union_base(idx, sort, klass): + first = idx[::-1] + second = idx[:5] + + if klass is not MultiIndex: + second = klass(second.values) + + union = first.union(second, sort=sort) + if sort is None: + expected = first.sort_values() + else: + expected = first + tm.assert_index_equal(union, expected) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.union([1, 2, 3], sort=sort) + + +def test_difference_base(idx, sort): + second = idx[4:] + answer = idx[:4] + result = idx.difference(second, sort=sort) + + if sort is None: + answer = answer.sort_values() + + assert result.equals(answer) + tm.assert_index_equal(result, answer) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = idx.difference(case, sort=sort) + tm.assert_index_equal(result, answer) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + idx.difference([1, 2, 3], sort=sort) + + +def test_symmetric_difference(idx, sort): + first = idx[1:] + second = idx[:-1] + answer = idx[[-1, 0]] + result = first.symmetric_difference(second, sort=sort) + + if sort is None: + answer = answer.sort_values() + + tm.assert_index_equal(result, answer) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.symmetric_difference(case, sort=sort) + tm.assert_index_equal(result, answer) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.symmetric_difference([1, 2, 3], sort=sort) + + +def test_multiindex_symmetric_difference(): + # GH 13490 + idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"]) + with tm.assert_produces_warning(FutureWarning): + result = idx ^ idx + assert result.names == idx.names + + idx2 = idx.copy().rename(["A", "B"]) + with tm.assert_produces_warning(FutureWarning): + result = idx ^ idx2 + assert result.names == [None, None] + + +def test_empty(idx): + # GH 15270 + assert not idx.empty + assert idx[:0].empty + + +def test_difference(idx, sort): + + first = idx + result = first.difference(idx[-3:], sort=sort) + vals = idx[:-3].values + + if sort is None: + vals = sorted(vals) + + expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names) + + assert isinstance(result, MultiIndex) + assert result.equals(expected) + assert result.names == idx.names + tm.assert_index_equal(result, expected) + + # empty difference: reflexive + result = idx.difference(idx, sort=sort) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # empty difference: superset + result = idx[-3:].difference(idx, sort=sort) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # empty difference: degenerate + result = idx[:0].difference(idx, sort=sort) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # names not the same + chunklet = idx[-3:] + chunklet.names = ["foo", "baz"] + result = first.difference(chunklet, sort=sort) + assert result.names == (None, None) + + # empty, but non-equal + result = idx.difference(idx.sortlevel(1)[0], sort=sort) + assert len(result) == 0 + + # raise Exception called with non-MultiIndex + result = first.difference(first.values, sort=sort) + assert result.equals(first[:0]) + + # name from empty array + result = first.difference([], sort=sort) + assert first.equals(result) + assert first.names == result.names + + # name from non-empty array + result = first.difference([("foo", "one")], sort=sort) + expected = MultiIndex.from_tuples( + [("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")] + ) + expected.names = first.names + assert first.names == result.names + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.difference([1, 2, 3, 4, 5], sort=sort) + + +def test_difference_sort_special(): + # GH-24959 + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + # sort=None, the default + result = idx.difference([]) + tm.assert_index_equal(result, idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_difference_sort_special_true(): + # TODO(GH#25151): decide on True behaviour + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + result = idx.difference([], sort=True) + expected = MultiIndex.from_product([[0, 1], ["a", "b"]]) + tm.assert_index_equal(result, expected) + + +def test_difference_sort_incomparable(): + # GH-24959 + idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]]) + + other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]]) + # sort=None, the default + msg = "sort order is undefined for incomparable objects" + with tm.assert_produces_warning(RuntimeWarning, match=msg): + result = idx.difference(other) + tm.assert_index_equal(result, idx) + + # sort=False + result = idx.difference(other, sort=False) + tm.assert_index_equal(result, idx) + + +def test_difference_sort_incomparable_true(): + idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]]) + other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]]) + + msg = "The 'sort' keyword only takes the values of None or False; True was passed." + with pytest.raises(ValueError, match=msg): + idx.difference(other, sort=True) + + +def test_union(idx, sort): + piece1 = idx[:5][::-1] + piece2 = idx[3:] + + the_union = piece1.union(piece2, sort=sort) + + if sort is None: + tm.assert_index_equal(the_union, idx.sort_values()) + + assert tm.equalContents(the_union, idx) + + # corner case, pass self or empty thing: + the_union = idx.union(idx, sort=sort) + tm.assert_index_equal(the_union, idx) + + the_union = idx.union(idx[:0], sort=sort) + tm.assert_index_equal(the_union, idx) + + tuples = idx.values + result = idx[:4].union(tuples[4:], sort=sort) + if sort is None: + tm.equalContents(result, idx) + else: + assert result.equals(idx) + + +@pytest.mark.xfail( + # This test was commented out from Oct 2011 to Dec 2021, may no longer + # be relevant. + reason="Length of names must match number of levels in MultiIndex", + raises=ValueError, +) +def test_union_with_regular_index(idx): + other = Index(["A", "B", "C"]) + + result = other.union(idx) + assert ("foo", "one") in result + assert "B" in result + + msg = "The values in the array are unorderable" + with tm.assert_produces_warning(RuntimeWarning, match=msg): + result2 = idx.union(other) + assert result.equals(result2) + + +def test_intersection(idx, sort): + piece1 = idx[:5][::-1] + piece2 = idx[3:] + + the_int = piece1.intersection(piece2, sort=sort) + + if sort is None: + tm.assert_index_equal(the_int, idx[3:5]) + assert tm.equalContents(the_int, idx[3:5]) + + # corner case, pass self + the_int = idx.intersection(idx, sort=sort) + tm.assert_index_equal(the_int, idx) + + # empty intersection: disjoint + empty = idx[:2].intersection(idx[2:], sort=sort) + expected = idx[:0] + assert empty.equals(expected) + + tuples = idx.values + result = idx.intersection(tuples) + assert result.equals(idx) + + +@pytest.mark.parametrize( + "method", ["intersection", "union", "difference", "symmetric_difference"] +) +def test_setop_with_categorical(idx, sort, method): + other = idx.to_flat_index().astype("category") + res_names = [None] * idx.nlevels + + result = getattr(idx, method)(other, sort=sort) + expected = getattr(idx, method)(idx, sort=sort).rename(res_names) + tm.assert_index_equal(result, expected) + + result = getattr(idx, method)(other[:5], sort=sort) + expected = getattr(idx, method)(idx[:5], sort=sort).rename(res_names) + tm.assert_index_equal(result, expected) + + +def test_intersection_non_object(idx, sort): + other = Index(range(3), name="foo") + + result = idx.intersection(other, sort=sort) + expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=None) + tm.assert_index_equal(result, expected, exact=True) + + # if we pass a length-0 ndarray (i.e. no name, we retain our idx.name) + result = idx.intersection(np.asarray(other)[:0], sort=sort) + expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=idx.names) + tm.assert_index_equal(result, expected, exact=True) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + # With non-zero length non-index, we try and fail to convert to tuples + idx.intersection(np.asarray(other), sort=sort) + + +def test_intersect_equal_sort(): + # GH-24959 + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + tm.assert_index_equal(idx.intersection(idx, sort=False), idx) + tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_intersect_equal_sort_true(): + # TODO(GH#25151): decide on True behaviour + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + sorted_ = MultiIndex.from_product([[0, 1], ["a", "b"]]) + tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + + +@pytest.mark.parametrize("slice_", [slice(None), slice(0)]) +def test_union_sort_other_empty(slice_): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + + # default, sort=None + other = idx[slice_] + tm.assert_index_equal(idx.union(other), idx) + tm.assert_index_equal(other.union(idx), idx) + + # sort=False + tm.assert_index_equal(idx.union(other, sort=False), idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_union_sort_other_empty_sort(slice_): + # TODO(GH#25151): decide on True behaviour + # # sort=True + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + other = idx[:0] + result = idx.union(other, sort=True) + expected = MultiIndex.from_product([[0, 1], ["a", "b"]]) + tm.assert_index_equal(result, expected) + + +def test_union_sort_other_incomparable(): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]]) + + # default, sort=None + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1]) + tm.assert_index_equal(result, idx) + + # sort=False + result = idx.union(idx[:1], sort=False) + tm.assert_index_equal(result, idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_union_sort_other_incomparable_sort(): + # TODO(GH#25151): decide on True behaviour + # # sort=True + idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]]) + with pytest.raises(TypeError, match="Cannot compare"): + idx.union(idx[:1], sort=True) + + +def test_union_non_object_dtype_raises(): + # GH#32646 raise NotImplementedError instead of less-informative error + mi = MultiIndex.from_product([["a", "b"], [1, 2]]) + + idx = mi.levels[1] + + msg = "Can only union MultiIndex with MultiIndex or Index of tuples" + with pytest.raises(NotImplementedError, match=msg): + mi.union(idx) + + +def test_union_empty_self_different_names(): + # GH#38423 + mi = MultiIndex.from_arrays([[]]) + mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) + result = mi.union(mi2) + expected = MultiIndex.from_arrays([[1, 2], [3, 4]]) + tm.assert_index_equal(result, expected) + + +def test_union_multiindex_empty_rangeindex(): + # GH#41234 + mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) + ri = pd.RangeIndex(0) + + result_left = mi.union(ri) + tm.assert_index_equal(mi, result_left, check_names=False) + + result_right = ri.union(mi) + tm.assert_index_equal(mi, result_right, check_names=False) + + +@pytest.mark.parametrize( + "method", ["union", "intersection", "difference", "symmetric_difference"] +) +def test_setops_disallow_true(method): + idx1 = MultiIndex.from_product([["a", "b"], [1, 2]]) + idx2 = MultiIndex.from_product([["b", "c"], [1, 2]]) + + with pytest.raises(ValueError, match="The 'sort' keyword only takes"): + getattr(idx1, method)(idx2, sort=True) + + +@pytest.mark.parametrize( + ("tuples", "exp_tuples"), + [ + ([("val1", "test1")], [("val1", "test1")]), + ([("val1", "test1"), ("val1", "test1")], [("val1", "test1")]), + ( + [("val2", "test2"), ("val1", "test1")], + [("val2", "test2"), ("val1", "test1")], + ), + ], +) +def test_intersect_with_duplicates(tuples, exp_tuples): + # GH#36915 + left = MultiIndex.from_tuples(tuples, names=["first", "second"]) + right = MultiIndex.from_tuples( + [("val1", "test1"), ("val1", "test1"), ("val2", "test2")], + names=["first", "second"], + ) + result = left.intersection(right) + expected = MultiIndex.from_tuples(exp_tuples, names=["first", "second"]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "data, names, expected", + [ + ((1,), None, [None, None]), + ((1,), ["a"], [None, None]), + ((1,), ["b"], [None, None]), + ((1, 2), ["c", "d"], [None, None]), + ((1, 2), ["b", "a"], [None, None]), + ((1, 2, 3), ["a", "b", "c"], [None, None]), + ((1, 2), ["a", "c"], ["a", None]), + ((1, 2), ["c", "b"], [None, "b"]), + ((1, 2), ["a", "b"], ["a", "b"]), + ((1, 2), [None, "b"], [None, "b"]), + ], +) +def test_maybe_match_names(data, names, expected): + # GH#38323 + mi = MultiIndex.from_tuples([], names=["a", "b"]) + mi2 = MultiIndex.from_tuples([data], names=names) + result = mi._maybe_match_names(mi2) + assert result == expected + + +def test_intersection_equal_different_names(): + # GH#30302 + mi1 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["c", "b"]) + mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) + + result = mi1.intersection(mi2) + expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=[None, "b"]) + tm.assert_index_equal(result, expected) + + +def test_intersection_different_names(): + # GH#38323 + mi = MultiIndex.from_arrays([[1], [3]], names=["c", "b"]) + mi2 = MultiIndex.from_arrays([[1], [3]]) + result = mi.intersection(mi2) + tm.assert_index_equal(result, mi2) + + +def test_intersection_with_missing_values_on_both_sides(nulls_fixture): + # GH#38623 + mi1 = MultiIndex.from_arrays([[3, nulls_fixture, 4, nulls_fixture], [1, 2, 4, 2]]) + mi2 = MultiIndex.from_arrays([[3, nulls_fixture, 3], [1, 2, 4]]) + result = mi1.intersection(mi2) + expected = MultiIndex.from_arrays([[3.0, nulls_fixture], [1, 2]]) + tm.assert_index_equal(result, expected) + + +def test_union_nan_got_duplicated(): + # GH#38977 + mi1 = MultiIndex.from_arrays([[1.0, np.nan], [2, 3]]) + mi2 = MultiIndex.from_arrays([[1.0, np.nan, 3.0], [2, 3, 4]]) + result = mi1.union(mi2) + tm.assert_index_equal(result, mi2) + + +def test_union_duplicates(index): + # GH#38977 + if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)): + # No duplicates in empty indexes + return + values = index.unique().values.tolist() + mi1 = MultiIndex.from_arrays([values, [1] * len(values)]) + mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)]) + result = mi1.union(mi2) + tm.assert_index_equal(result, mi2.sort_values()) + + result = mi2.union(mi1) + tm.assert_index_equal(result, mi2.sort_values()) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_sorting.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_sorting.py new file mode 100644 index 0000000..63d3fe5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_sorting.py @@ -0,0 +1,287 @@ +import random + +import numpy as np +import pytest + +from pandas.errors import ( + PerformanceWarning, + UnsortedIndexError, +) + +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + RangeIndex, +) +import pandas._testing as tm +from pandas.core.indexes.frozen import FrozenList + + +def test_sortlevel(idx): + tuples = list(idx) + random.shuffle(tuples) + + index = MultiIndex.from_tuples(tuples) + + sorted_idx, _ = index.sortlevel(0) + expected = MultiIndex.from_tuples(sorted(tuples)) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(0, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + sorted_idx, _ = index.sortlevel(1) + by1 = sorted(tuples, key=lambda x: (x[1], x[0])) + expected = MultiIndex.from_tuples(by1) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(1, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + +def test_sortlevel_not_sort_remaining(): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) + sorted_idx, _ = mi.sortlevel("A", sort_remaining=False) + assert sorted_idx.equals(mi) + + +def test_sortlevel_deterministic(): + tuples = [ + ("bar", "one"), + ("foo", "two"), + ("qux", "two"), + ("foo", "one"), + ("baz", "two"), + ("qux", "one"), + ] + + index = MultiIndex.from_tuples(tuples) + + sorted_idx, _ = index.sortlevel(0) + expected = MultiIndex.from_tuples(sorted(tuples)) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(0, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + sorted_idx, _ = index.sortlevel(1) + by1 = sorted(tuples, key=lambda x: (x[1], x[0])) + expected = MultiIndex.from_tuples(by1) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(1, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + +def test_numpy_argsort(idx): + result = np.argsort(idx) + expected = idx.argsort() + tm.assert_numpy_array_equal(result, expected) + + # these are the only two types that perform + # pandas compatibility input validation - the + # rest already perform separate (or no) such + # validation via their 'values' attribute as + # defined in pandas.core.indexes/base.py - they + # cannot be changed at the moment due to + # backwards compatibility concerns + if isinstance(type(idx), (CategoricalIndex, RangeIndex)): + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, axis=1) + + msg = "the 'kind' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, kind="mergesort") + + msg = "the 'order' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, order=("a", "b")) + + +def test_unsortedindex(): + # GH 11897 + mi = MultiIndex.from_tuples( + [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")], + names=["one", "two"], + ) + df = DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"]) + + # GH 16734: not sorted, but no real slicing + result = df.loc(axis=0)["z", "a"] + expected = df.iloc[0] + tm.assert_series_equal(result, expected) + + msg = ( + "MultiIndex slicing requires the index to be lexsorted: " + r"slicing on levels \[1\], lexsort depth 0" + ) + with pytest.raises(UnsortedIndexError, match=msg): + df.loc(axis=0)["z", slice("a")] + df.sort_index(inplace=True) + assert len(df.loc(axis=0)["z", :]) == 2 + + with pytest.raises(KeyError, match="'q'"): + df.loc(axis=0)["q", :] + + +def test_unsortedindex_doc_examples(): + # https://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex + dfm = DataFrame( + {"jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], "jolie": np.random.rand(4)} + ) + + dfm = dfm.set_index(["jim", "joe"]) + with tm.assert_produces_warning(PerformanceWarning): + dfm.loc[(1, "z")] + + msg = r"Key length \(2\) was greater than MultiIndex lexsort depth \(1\)" + with pytest.raises(UnsortedIndexError, match=msg): + dfm.loc[(0, "y"):(1, "z")] + + assert not dfm.index._is_lexsorted() + assert dfm.index._lexsort_depth == 1 + + # sort it + dfm = dfm.sort_index() + dfm.loc[(1, "z")] + dfm.loc[(0, "y"):(1, "z")] + + assert dfm.index._is_lexsorted() + assert dfm.index._lexsort_depth == 2 + + +def test_reconstruct_sort(): + + # starts off lexsorted & monotonic + mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) + assert mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert recons.is_monotonic + assert mi is recons + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = MultiIndex.from_tuples( + [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")], + names=["one", "two"], + ) + assert not mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = MultiIndex( + levels=[["b", "d", "a"], [1, 2, 3]], + codes=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=["col1", "col2"], + ) + assert not mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + +def test_reconstruct_remove_unused(): + # xref to GH 2770 + df = DataFrame( + [["deleteMe", 1, 9], ["keepMe", 2, 9], ["keepMeToo", 3, 9]], + columns=["first", "second", "third"], + ) + df2 = df.set_index(["first", "second"], drop=False) + df2 = df2[df2["first"] != "deleteMe"] + + # removed levels are there + expected = MultiIndex( + levels=[["deleteMe", "keepMe", "keepMeToo"], [1, 2, 3]], + codes=[[1, 2], [1, 2]], + names=["first", "second"], + ) + result = df2.index + tm.assert_index_equal(result, expected) + + expected = MultiIndex( + levels=[["keepMe", "keepMeToo"], [2, 3]], + codes=[[0, 1], [0, 1]], + names=["first", "second"], + ) + result = df2.index.remove_unused_levels() + tm.assert_index_equal(result, expected) + + # idempotent + result2 = result.remove_unused_levels() + tm.assert_index_equal(result2, expected) + assert result2.is_(result) + + +@pytest.mark.parametrize( + "first_type,second_type", [("int64", "int64"), ("datetime64[D]", "str")] +) +def test_remove_unused_levels_large(first_type, second_type): + # GH16556 + + # because tests should be deterministic (and this test in particular + # checks that levels are removed, which is not the case for every + # random input): + rng = np.random.RandomState(4) # seed is arbitrary value that works + + size = 1 << 16 + df = DataFrame( + { + "first": rng.randint(0, 1 << 13, size).astype(first_type), + "second": rng.randint(0, 1 << 10, size).astype(second_type), + "third": rng.rand(size), + } + ) + df = df.groupby(["first", "second"]).sum() + df = df[df.third < 0.1] + + result = df.index.remove_unused_levels() + assert len(result.levels[0]) < len(df.index.levels[0]) + assert len(result.levels[1]) < len(df.index.levels[1]) + assert result.equals(df.index) + + expected = df.reset_index().set_index(["first", "second"]).index + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("level0", [["a", "d", "b"], ["a", "d", "b", "unused"]]) +@pytest.mark.parametrize( + "level1", [["w", "x", "y", "z"], ["w", "x", "y", "z", "unused"]] +) +def test_remove_unused_nan(level0, level1): + # GH 18417 + mi = MultiIndex(levels=[level0, level1], codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]) + + result = mi.remove_unused_levels() + tm.assert_index_equal(result, mi) + for level in 0, 1: + assert "unused" not in result.levels[level] + + +def test_argsort(idx): + result = idx.argsort() + expected = idx.values.argsort() + tm.assert_numpy_array_equal(result, expected) + + +def test_remove_unused_levels_with_nan(): + # GH 37510 + idx = Index([(1, np.nan), (3, 4)]).rename(["id1", "id2"]) + idx = idx.set_levels(["a", np.nan], level="id1") + idx = idx.remove_unused_levels() + result = idx.levels + expected = FrozenList([["a", np.nan], [4]]) + assert str(result) == str(expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_take.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_take.py new file mode 100644 index 0000000..f8e7632 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/multi/test_take.py @@ -0,0 +1,79 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_take(idx): + indexer = [4, 3, 0, 2] + result = idx.take(indexer) + expected = idx[indexer] + assert result.equals(expected) + + # GH 10791 + msg = "'MultiIndex' object has no attribute 'freq'" + with pytest.raises(AttributeError, match=msg): + idx.freq + + +def test_take_invalid_kwargs(idx): + idx = idx + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + +def test_take_fill_value(): + # GH 12631 + vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]] + idx = pd.MultiIndex.from_product(vals, names=["str", "dt"]) + + result = idx.take(np.array([1, 0, -1])) + exp_vals = [ + ("A", pd.Timestamp("2011-01-02")), + ("A", pd.Timestamp("2011-01-01")), + ("B", pd.Timestamp("2011-01-02")), + ] + expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + exp_vals = [ + ("A", pd.Timestamp("2011-01-02")), + ("A", pd.Timestamp("2011-01-01")), + (np.nan, pd.NaT), + ] + expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + exp_vals = [ + ("A", pd.Timestamp("2011-01-02")), + ("A", pd.Timestamp("2011-01-01")), + ("B", pd.Timestamp("2011-01-02")), + ] + expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) + tm.assert_index_equal(result, expected) + + msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1" + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for( axis 0 with)? size 4" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..87788e4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..1a9782c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..b37cefa Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_join.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_join.cpython-38.pyc new file mode 100644 index 0000000..5ca5f63 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_join.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_numeric.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_numeric.cpython-38.pyc new file mode 100644 index 0000000..5941922 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_numeric.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_setops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_setops.cpython-38.pyc new file mode 100644 index 0000000..e0d63d5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/__pycache__/test_setops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_astype.py new file mode 100644 index 0000000..89f26e9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_astype.py @@ -0,0 +1,87 @@ +import re + +import numpy as np +import pytest + +from pandas.core.dtypes.common import pandas_dtype + +from pandas import Index +import pandas._testing as tm +from pandas.core.indexes.api import ( + Float64Index, + Int64Index, +) + + +class TestAstype: + def test_astype_float64_to_object(self): + float_index = Float64Index([0.0, 2.5, 5.0, 7.5, 10.0]) + result = float_index.astype(object) + assert result.equals(float_index) + assert float_index.equals(result) + assert isinstance(result, Index) and not isinstance(result, Float64Index) + + def test_astype_float64_mixed_to_object(self): + # mixed int-float + idx = Float64Index([1.5, 2, 3, 4, 5]) + idx.name = "foo" + result = idx.astype(object) + assert result.equals(idx) + assert idx.equals(result) + assert isinstance(result, Index) and not isinstance(result, Float64Index) + + @pytest.mark.parametrize("dtype", ["int16", "int32", "int64"]) + def test_astype_float64_to_int_dtype(self, dtype): + # GH#12881 + # a float astype int + idx = Float64Index([0, 1, 2]) + result = idx.astype(dtype) + expected = Int64Index([0, 1, 2]) + tm.assert_index_equal(result, expected) + + idx = Float64Index([0, 1.1, 2]) + result = idx.astype(dtype) + expected = Int64Index([0, 1, 2]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["float32", "float64"]) + def test_astype_float64_to_float_dtype(self, dtype): + # GH#12881 + # a float astype int + idx = Float64Index([0, 1, 2]) + result = idx.astype(dtype) + expected = idx + tm.assert_index_equal(result, expected) + + idx = Float64Index([0, 1.1, 2]) + result = idx.astype(dtype) + expected = Index(idx.values.astype(dtype)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) + def test_cannot_cast_to_datetimelike(self, dtype): + idx = Float64Index([0, 1.1, 2]) + + msg = ( + f"Cannot convert Float64Index to dtype {pandas_dtype(dtype)}; " + f"integer values are required for conversion" + ) + with pytest.raises(TypeError, match=re.escape(msg)): + idx.astype(dtype) + + @pytest.mark.parametrize("dtype", [int, "int16", "int32", "int64"]) + @pytest.mark.parametrize("non_finite", [np.inf, np.nan]) + def test_cannot_cast_inf_to_int(self, non_finite, dtype): + # GH#13149 + idx = Float64Index([1, 2, non_finite]) + + msg = r"Cannot convert non-finite values \(NA or inf\) to integer" + with pytest.raises(ValueError, match=msg): + idx.astype(dtype) + + def test_astype_from_object(self): + index = Index([1.0, np.nan, 0.2], dtype="object") + result = index.astype(float) + expected = Float64Index([1.0, np.nan, 0.2]) + assert result.dtype == expected.dtype + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_indexing.py new file mode 100644 index 0000000..a70845a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_indexing.py @@ -0,0 +1,584 @@ +import numpy as np +import pytest + +from pandas.errors import InvalidIndexError + +from pandas import ( + Index, + RangeIndex, + Series, + Timestamp, +) +import pandas._testing as tm +from pandas.core.indexes.api import ( + Float64Index, + Int64Index, + UInt64Index, +) + + +@pytest.fixture +def index_large(): + # large values used in UInt64Index tests where no compat needed with Int64/Float64 + large = [2 ** 63, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20, 2 ** 63 + 25] + return UInt64Index(large) + + +class TestGetLoc: + @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) + def test_get_loc(self, method): + index = Index([0, 1, 2]) + warn = None if method is None else FutureWarning + + with tm.assert_produces_warning(warn, match="deprecated"): + assert index.get_loc(1, method=method) == 1 + + if method: + with tm.assert_produces_warning(warn, match="deprecated"): + assert index.get_loc(1, method=method, tolerance=0) == 1 + + @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) + @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") + def test_get_loc_raises_bad_label(self, method): + index = Index([0, 1, 2]) + if method: + msg = "not supported between" + err = TypeError + else: + msg = r"\[1, 2\]" + err = InvalidIndexError + + with pytest.raises(err, match=msg): + index.get_loc([1, 2], method=method) + + @pytest.mark.parametrize( + "method,loc", [("pad", 1), ("backfill", 2), ("nearest", 1)] + ) + @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") + def test_get_loc_tolerance(self, method, loc): + index = Index([0, 1, 2]) + assert index.get_loc(1.1, method) == loc + assert index.get_loc(1.1, method, tolerance=1) == loc + + @pytest.mark.parametrize("method", ["pad", "backfill", "nearest"]) + def test_get_loc_outside_tolerance_raises(self, method): + index = Index([0, 1, 2]) + with pytest.raises(KeyError, match="1.1"): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + index.get_loc(1.1, method, tolerance=0.05) + + def test_get_loc_bad_tolerance_raises(self): + index = Index([0, 1, 2]) + with pytest.raises(ValueError, match="must be numeric"): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + index.get_loc(1.1, "nearest", tolerance="invalid") + + def test_get_loc_tolerance_no_method_raises(self): + index = Index([0, 1, 2]) + with pytest.raises(ValueError, match="tolerance .* valid if"): + index.get_loc(1.1, tolerance=1) + + def test_get_loc_raises_missized_tolerance(self): + index = Index([0, 1, 2]) + with pytest.raises(ValueError, match="tolerance size must match"): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + index.get_loc(1.1, "nearest", tolerance=[1, 1]) + + @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") + def test_get_loc_float64(self): + idx = Float64Index([0.0, 1.0, 2.0]) + for method in [None, "pad", "backfill", "nearest"]: + assert idx.get_loc(1, method) == 1 + if method is not None: + assert idx.get_loc(1, method, tolerance=0) == 1 + + for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: + assert idx.get_loc(1.1, method) == loc + assert idx.get_loc(1.1, method, tolerance=0.9) == loc + + with pytest.raises(KeyError, match="^'foo'$"): + idx.get_loc("foo") + with pytest.raises(KeyError, match=r"^1\.5$"): + idx.get_loc(1.5) + with pytest.raises(KeyError, match=r"^1\.5$"): + idx.get_loc(1.5, method="pad", tolerance=0.1) + with pytest.raises(KeyError, match="^True$"): + idx.get_loc(True) + with pytest.raises(KeyError, match="^False$"): + idx.get_loc(False) + + with pytest.raises(ValueError, match="must be numeric"): + idx.get_loc(1.4, method="nearest", tolerance="foo") + + with pytest.raises(ValueError, match="must contain numeric elements"): + idx.get_loc(1.4, method="nearest", tolerance=np.array(["foo"])) + + with pytest.raises( + ValueError, match="tolerance size must match target index size" + ): + idx.get_loc(1.4, method="nearest", tolerance=np.array([1, 2])) + + def test_get_loc_na(self): + idx = Float64Index([np.nan, 1, 2]) + assert idx.get_loc(1) == 1 + assert idx.get_loc(np.nan) == 0 + + idx = Float64Index([np.nan, 1, np.nan]) + assert idx.get_loc(1) == 1 + + # representable by slice [0:2:2] + msg = "'Cannot get left slice bound for non-unique label: nan'" + with pytest.raises(KeyError, match=msg): + idx.slice_locs(np.nan) + # not representable by slice + idx = Float64Index([np.nan, 1, np.nan, np.nan]) + assert idx.get_loc(1) == 1 + msg = "'Cannot get left slice bound for non-unique label: nan" + with pytest.raises(KeyError, match=msg): + idx.slice_locs(np.nan) + + def test_get_loc_missing_nan(self): + # GH#8569 + idx = Float64Index([1, 2]) + assert idx.get_loc(1) == 0 + with pytest.raises(KeyError, match=r"^3$"): + idx.get_loc(3) + with pytest.raises(KeyError, match="^nan$"): + idx.get_loc(np.nan) + with pytest.raises(InvalidIndexError, match=r"\[nan\]"): + # listlike/non-hashable raises TypeError + idx.get_loc([np.nan]) + + @pytest.mark.parametrize("vals", [[1], [1.0], [Timestamp("2019-12-31")], ["test"]]) + @pytest.mark.parametrize("method", ["nearest", "pad", "backfill"]) + def test_get_loc_float_index_nan_with_method(self, vals, method): + # GH#39382 + idx = Index(vals) + with pytest.raises(KeyError, match="nan"): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + idx.get_loc(np.nan, method=method) + + @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"]) + def test_get_loc_numericindex_none_raises(self, dtype): + # case that goes through searchsorted and key is non-comparable to values + arr = np.arange(10 ** 7, dtype=dtype) + idx = Index(arr) + with pytest.raises(KeyError, match="None"): + idx.get_loc(None) + + def test_get_loc_overflows(self): + # unique but non-monotonic goes through IndexEngine.mapping.get_item + idx = Index([0, 2, 1]) + + val = np.iinfo(np.int64).max + 1 + + with pytest.raises(KeyError, match=str(val)): + idx.get_loc(val) + with pytest.raises(KeyError, match=str(val)): + idx._engine.get_loc(val) + + +class TestGetIndexer: + def test_get_indexer(self): + index1 = Index([1, 2, 3, 4, 5]) + index2 = Index([2, 4, 6]) + + r1 = index1.get_indexer(index2) + e1 = np.array([1, 3, -1], dtype=np.intp) + tm.assert_almost_equal(r1, e1) + + @pytest.mark.parametrize("reverse", [True, False]) + @pytest.mark.parametrize( + "expected,method", + [ + (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "pad"), + (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "ffill"), + (np.array([0, 0, 1, 1, 2], dtype=np.intp), "backfill"), + (np.array([0, 0, 1, 1, 2], dtype=np.intp), "bfill"), + ], + ) + def test_get_indexer_methods(self, reverse, expected, method): + index1 = Index([1, 2, 3, 4, 5]) + index2 = Index([2, 4, 6]) + + if reverse: + index1 = index1[::-1] + expected = expected[::-1] + + result = index2.get_indexer(index1, method=method) + tm.assert_almost_equal(result, expected) + + def test_get_indexer_invalid(self): + # GH10411 + index = Index(np.arange(10)) + + with pytest.raises(ValueError, match="tolerance argument"): + index.get_indexer([1, 0], tolerance=1) + + with pytest.raises(ValueError, match="limit argument"): + index.get_indexer([1, 0], limit=1) + + @pytest.mark.parametrize( + "method, tolerance, indexer, expected", + [ + ("pad", None, [0, 5, 9], [0, 5, 9]), + ("backfill", None, [0, 5, 9], [0, 5, 9]), + ("nearest", None, [0, 5, 9], [0, 5, 9]), + ("pad", 0, [0, 5, 9], [0, 5, 9]), + ("backfill", 0, [0, 5, 9], [0, 5, 9]), + ("nearest", 0, [0, 5, 9], [0, 5, 9]), + ("pad", None, [0.2, 1.8, 8.5], [0, 1, 8]), + ("backfill", None, [0.2, 1.8, 8.5], [1, 2, 9]), + ("nearest", None, [0.2, 1.8, 8.5], [0, 2, 9]), + ("pad", 1, [0.2, 1.8, 8.5], [0, 1, 8]), + ("backfill", 1, [0.2, 1.8, 8.5], [1, 2, 9]), + ("nearest", 1, [0.2, 1.8, 8.5], [0, 2, 9]), + ("pad", 0.2, [0.2, 1.8, 8.5], [0, -1, -1]), + ("backfill", 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]), + ("nearest", 0.2, [0.2, 1.8, 8.5], [0, 2, -1]), + ], + ) + def test_get_indexer_nearest(self, method, tolerance, indexer, expected): + index = Index(np.arange(10)) + + actual = index.get_indexer(indexer, method=method, tolerance=tolerance) + tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) + + @pytest.mark.parametrize("listtype", [list, tuple, Series, np.array]) + @pytest.mark.parametrize( + "tolerance, expected", + list( + zip( + [[0.3, 0.3, 0.1], [0.2, 0.1, 0.1], [0.1, 0.5, 0.5]], + [[0, 2, -1], [0, -1, -1], [-1, 2, 9]], + ) + ), + ) + def test_get_indexer_nearest_listlike_tolerance( + self, tolerance, expected, listtype + ): + index = Index(np.arange(10)) + + actual = index.get_indexer( + [0.2, 1.8, 8.5], method="nearest", tolerance=listtype(tolerance) + ) + tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) + + def test_get_indexer_nearest_error(self): + index = Index(np.arange(10)) + with pytest.raises(ValueError, match="limit argument"): + index.get_indexer([1, 0], method="nearest", limit=1) + + with pytest.raises(ValueError, match="tolerance size must match"): + index.get_indexer([1, 0], method="nearest", tolerance=[1, 2, 3]) + + @pytest.mark.parametrize( + "method,expected", + [("pad", [8, 7, 0]), ("backfill", [9, 8, 1]), ("nearest", [9, 7, 0])], + ) + def test_get_indexer_nearest_decreasing(self, method, expected): + index = Index(np.arange(10))[::-1] + + actual = index.get_indexer([0, 5, 9], method=method) + tm.assert_numpy_array_equal(actual, np.array([9, 4, 0], dtype=np.intp)) + + actual = index.get_indexer([0.2, 1.8, 8.5], method=method) + tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) + + @pytest.mark.parametrize( + "idx_class", [Int64Index, RangeIndex, Float64Index, UInt64Index] + ) + @pytest.mark.parametrize("method", ["get_indexer", "get_indexer_non_unique"]) + def test_get_indexer_numeric_index_boolean_target(self, method, idx_class): + # GH 16877 + + numeric_index = idx_class(RangeIndex(4)) + other = Index([True, False, True]) + + result = getattr(numeric_index, method)(other) + expected = np.array([-1, -1, -1], dtype=np.intp) + if method == "get_indexer": + tm.assert_numpy_array_equal(result, expected) + else: + missing = np.arange(3, dtype=np.intp) + tm.assert_numpy_array_equal(result[0], expected) + tm.assert_numpy_array_equal(result[1], missing) + + @pytest.mark.parametrize("method", ["pad", "backfill", "nearest"]) + def test_get_indexer_with_method_numeric_vs_bool(self, method): + left = Index([1, 2, 3]) + right = Index([True, False]) + + with pytest.raises(TypeError, match="Cannot compare"): + left.get_indexer(right, method=method) + + with pytest.raises(TypeError, match="Cannot compare"): + right.get_indexer(left, method=method) + + def test_get_indexer_numeric_vs_bool(self): + left = Index([1, 2, 3]) + right = Index([True, False]) + + res = left.get_indexer(right) + expected = -1 * np.ones(len(right), dtype=np.intp) + tm.assert_numpy_array_equal(res, expected) + + res = right.get_indexer(left) + expected = -1 * np.ones(len(left), dtype=np.intp) + tm.assert_numpy_array_equal(res, expected) + + res = left.get_indexer_non_unique(right)[0] + expected = -1 * np.ones(len(right), dtype=np.intp) + tm.assert_numpy_array_equal(res, expected) + + res = right.get_indexer_non_unique(left)[0] + expected = -1 * np.ones(len(left), dtype=np.intp) + tm.assert_numpy_array_equal(res, expected) + + def test_get_indexer_float64(self): + idx = Float64Index([0.0, 1.0, 2.0]) + tm.assert_numpy_array_equal( + idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp) + ) + + target = [-0.1, 0.5, 1.1] + tm.assert_numpy_array_equal( + idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) + ) + + def test_get_indexer_nan(self): + # GH#7820 + result = Float64Index([1, 2, np.nan]).get_indexer([np.nan]) + expected = np.array([2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_int64(self): + index = Int64Index(range(0, 20, 2)) + target = Int64Index(np.arange(10)) + indexer = index.get_indexer(target) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = Int64Index(np.arange(10)) + indexer = index.get_indexer(target, method="pad") + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = Int64Index(np.arange(10)) + indexer = index.get_indexer(target, method="backfill") + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_uint64(self, index_large): + target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2 ** 63) + indexer = index_large.get_indexer(target) + expected = np.array([0, -1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2 ** 63) + indexer = index_large.get_indexer(target, method="pad") + expected = np.array([0, 0, 1, 2, 3, 4, 4, 4, 4, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2 ** 63) + indexer = index_large.get_indexer(target, method="backfill") + expected = np.array([0, 1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + +class TestWhere: + @pytest.mark.parametrize( + "index", + [ + Float64Index(np.arange(5, dtype="float64")), + Int64Index(range(0, 20, 2)), + UInt64Index(np.arange(5, dtype="uint64")), + ], + ) + def test_where(self, listlike_box, index): + cond = [True] * len(index) + expected = index + result = index.where(listlike_box(cond)) + + cond = [False] + [True] * (len(index) - 1) + expected = Float64Index([index._na_value] + index[1:].tolist()) + result = index.where(listlike_box(cond)) + tm.assert_index_equal(result, expected) + + def test_where_uint64(self): + idx = UInt64Index([0, 6, 2]) + mask = np.array([False, True, False]) + other = np.array([1], dtype=np.int64) + + expected = UInt64Index([1, 6, 1]) + + result = idx.where(mask, other) + tm.assert_index_equal(result, expected) + + result = idx.putmask(~mask, other) + tm.assert_index_equal(result, expected) + + +class TestTake: + @pytest.mark.parametrize("klass", [Float64Index, Int64Index, UInt64Index]) + def test_take_preserve_name(self, klass): + index = klass([1, 2, 3, 4], name="foo") + taken = index.take([3, 0, 1]) + assert index.name == taken.name + + def test_take_fill_value_float64(self): + # GH 12631 + idx = Float64Index([1.0, 2.0, 3.0], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = Float64Index([2.0, 1.0, 3.0], name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = Float64Index([2.0, 1.0, np.nan], name="xxx") + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = Float64Index([2.0, 1.0, 3.0], name="xxx") + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for (axis 0 with )?size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + @pytest.mark.parametrize("klass", [Int64Index, UInt64Index]) + def test_take_fill_value_ints(self, klass): + # see gh-12631 + idx = klass([1, 2, 3], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = klass([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + + name = klass.__name__ + msg = f"Unable to fill values because {name} cannot contain NA" + + # fill_value=True + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -1]), fill_value=True) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = klass([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for (axis 0 with )?size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + +class TestContains: + @pytest.mark.parametrize("klass", [Float64Index, Int64Index, UInt64Index]) + def test_contains_none(self, klass): + # GH#35788 should return False, not raise TypeError + index = klass([0, 1, 2, 3, 4]) + assert None not in index + + def test_contains_float64_nans(self): + index = Float64Index([1.0, 2.0, np.nan]) + assert np.nan in index + + def test_contains_float64_not_nans(self): + index = Float64Index([1.0, 2.0, np.nan]) + assert 1.0 in index + + +class TestSliceLocs: + @pytest.mark.parametrize("dtype", [int, float]) + def test_slice_locs(self, dtype): + index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) + n = len(index) + + assert index.slice_locs(start=2) == (2, n) + assert index.slice_locs(start=3) == (3, n) + assert index.slice_locs(3, 8) == (3, 6) + assert index.slice_locs(5, 10) == (3, n) + assert index.slice_locs(end=8) == (0, 6) + assert index.slice_locs(end=9) == (0, 7) + + # reversed + index2 = index[::-1] + assert index2.slice_locs(8, 2) == (2, 6) + assert index2.slice_locs(7, 3) == (2, 5) + + @pytest.mark.parametrize("dtype", [int, float]) + def test_slice_locs_float_locs(self, dtype): + index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) + n = len(index) + assert index.slice_locs(5.0, 10.0) == (3, n) + assert index.slice_locs(4.5, 10.5) == (3, 8) + + index2 = index[::-1] + assert index2.slice_locs(8.5, 1.5) == (2, 6) + assert index2.slice_locs(10.5, -1) == (0, n) + + @pytest.mark.parametrize("dtype", [int, float]) + def test_slice_locs_dup_numeric(self, dtype): + index = Index(np.array([10, 12, 12, 14], dtype=dtype)) + assert index.slice_locs(12, 12) == (1, 3) + assert index.slice_locs(11, 13) == (1, 3) + + index2 = index[::-1] + assert index2.slice_locs(12, 12) == (1, 3) + assert index2.slice_locs(13, 11) == (1, 3) + + def test_slice_locs_na(self): + index = Index([np.nan, 1, 2]) + assert index.slice_locs(1) == (1, 3) + assert index.slice_locs(np.nan) == (0, 3) + + index = Index([0, np.nan, np.nan, 1, 2]) + assert index.slice_locs(np.nan) == (1, 5) + + def test_slice_locs_na_raises(self): + index = Index([np.nan, 1, 2]) + with pytest.raises(KeyError, match=""): + index.slice_locs(start=1.5) + + with pytest.raises(KeyError, match=""): + index.slice_locs(end=1.5) + + +class TestGetSliceBounds: + @pytest.mark.parametrize("kind", ["getitem", "loc", None]) + @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)]) + def test_get_slice_bounds_within(self, kind, side, expected): + index = Index(range(6)) + with tm.assert_produces_warning(FutureWarning, match="'kind' argument"): + + result = index.get_slice_bound(4, kind=kind, side=side) + assert result == expected + + @pytest.mark.parametrize("kind", ["getitem", "loc", None]) + @pytest.mark.parametrize("side", ["left", "right"]) + @pytest.mark.parametrize("bound, expected", [(-1, 0), (10, 6)]) + def test_get_slice_bounds_outside(self, kind, side, expected, bound): + index = Index(range(6)) + with tm.assert_produces_warning(FutureWarning, match="'kind' argument"): + result = index.get_slice_bound(bound, kind=kind, side=side) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_join.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_join.py new file mode 100644 index 0000000..2a47289 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_join.py @@ -0,0 +1,392 @@ +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.indexes.api import ( + Index, + Int64Index, + UInt64Index, +) + + +class TestJoinInt64Index: + def test_join_non_unique(self): + left = Index([4, 4, 3, 3]) + + joined, lidx, ridx = left.join(left, return_indexers=True) + + exp_joined = Index([3, 3, 3, 3, 4, 4, 4, 4]) + tm.assert_index_equal(joined, exp_joined) + + exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.intp) + tm.assert_numpy_array_equal(lidx, exp_lidx) + + exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + def test_join_inner(self): + index = Int64Index(range(0, 20, 2)) + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = index.join(other, how="inner", return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([2, 12]) + elidx = np.array([1, 6], dtype=np.intp) + eridx = np.array([4, 1], dtype=np.intp) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="inner", return_indexers=True) + + res2 = index.intersection(other_mono) + tm.assert_index_equal(res, res2) + + elidx = np.array([1, 6], dtype=np.intp) + eridx = np.array([1, 4], dtype=np.intp) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self): + index = Int64Index(range(0, 20, 2)) + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = index.join(other, how="left", return_indexers=True) + eres = index + eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], dtype=np.intp) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="left", return_indexers=True) + eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], dtype=np.intp) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # non-unique + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True) + eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 + eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + index = Int64Index(range(0, 20, 2)) + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = index.join(other, how="right", return_indexers=True) + eres = other + elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.intp) + + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + # monotonic + res, lidx, ridx = index.join(other_mono, how="right", return_indexers=True) + eres = other_mono + elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.intp) + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + # non-unique + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True) + eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 + elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_non_int_index(self): + index = Int64Index(range(0, 20, 2)) + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = index.join(other, how="outer") + outer2 = other.join(index, how="outer") + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) + + inner = index.join(other, how="inner") + inner2 = other.join(index, how="inner") + expected = Index([6, 8, 10]) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) + + left = index.join(other, how="left") + tm.assert_index_equal(left, index.astype(object)) + + left2 = other.join(index, how="left") + tm.assert_index_equal(left2, other) + + right = index.join(other, how="right") + tm.assert_index_equal(right, other) + + right2 = other.join(index, how="right") + tm.assert_index_equal(right2, index.astype(object)) + + def test_join_outer(self): + index = Int64Index(range(0, 20, 2)) + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + # guarantee of sortedness + res, lidx, ridx = index.join(other, how="outer", return_indexers=True) + noidx_res = index.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) + eridx = np.array( + [-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], dtype=np.intp + ) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="outer", return_indexers=True) + noidx_res = index.join(other_mono, how="outer") + tm.assert_index_equal(res, noidx_res) + + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) + eridx = np.array( + [-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], dtype=np.intp + ) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + +class TestJoinUInt64Index: + @pytest.fixture + def index_large(self): + # large values used in TestUInt64Index where no compat needed with Int64/Float64 + large = [2 ** 63, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20, 2 ** 63 + 25] + return UInt64Index(large) + + def test_join_inner(self, index_large): + other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="inner", return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = UInt64Index(2 ** 63 + np.array([10, 25], dtype="uint64")) + elidx = np.array([1, 4], dtype=np.intp) + eridx = np.array([5, 2], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="inner", return_indexers=True + ) + + res2 = index_large.intersection(other_mono) + tm.assert_index_equal(res, res2) + + elidx = np.array([1, 4], dtype=np.intp) + eridx = np.array([3, 5], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self, index_large): + other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="left", return_indexers=True) + eres = index_large + eridx = np.array([-1, 5, -1, -1, 2], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join(other_mono, how="left", return_indexers=True) + eridx = np.array([-1, 3, -1, -1, 5], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # non-unique + idx = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5], dtype="uint64")) + idx2 = UInt64Index(2 ** 63 + np.array([1, 2, 5, 7, 9], dtype="uint64")) + res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True) + + # 1 is in idx2, so it should be x2 + eres = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5, 7, 9], dtype="uint64")) + eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self, index_large): + other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="right", return_indexers=True) + eres = other + elidx = np.array([-1, -1, 4, -1, -1, 1], dtype=np.intp) + + tm.assert_numpy_array_equal(lidx, elidx) + assert isinstance(other, UInt64Index) + tm.assert_index_equal(res, eres) + assert ridx is None + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="right", return_indexers=True + ) + eres = other_mono + elidx = np.array([-1, -1, -1, 1, -1, 4], dtype=np.intp) + + assert isinstance(other, UInt64Index) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_index_equal(res, eres) + assert ridx is None + + # non-unique + idx = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5], dtype="uint64")) + idx2 = UInt64Index(2 ** 63 + np.array([1, 2, 5, 7, 9], dtype="uint64")) + res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True) + + # 1 is in idx2, so it should be x2 + eres = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5, 7, 9], dtype="uint64")) + elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_non_int_index(self, index_large): + other = Index( + 2 ** 63 + np.array([1, 5, 7, 10, 20], dtype="uint64"), dtype=object + ) + + outer = index_large.join(other, how="outer") + outer2 = other.join(index_large, how="outer") + expected = Index( + 2 ** 63 + np.array([0, 1, 5, 7, 10, 15, 20, 25], dtype="uint64") + ) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) + + inner = index_large.join(other, how="inner") + inner2 = other.join(index_large, how="inner") + expected = Index(2 ** 63 + np.array([10, 20], dtype="uint64")) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) + + left = index_large.join(other, how="left") + tm.assert_index_equal(left, index_large.astype(object)) + + left2 = other.join(index_large, how="left") + tm.assert_index_equal(left2, other) + + right = index_large.join(other, how="right") + tm.assert_index_equal(right, other) + + right2 = other.join(index_large, how="right") + tm.assert_index_equal(right2, index_large.astype(object)) + + def test_join_outer(self, index_large): + other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + # guarantee of sortedness + res, lidx, ridx = index_large.join(other, how="outer", return_indexers=True) + noidx_res = index_large.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + eres = UInt64Index( + 2 ** 63 + np.array([0, 1, 2, 7, 10, 12, 15, 20, 25], dtype="uint64") + ) + elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) + eridx = np.array([-1, 3, 4, 0, 5, 1, -1, -1, 2], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="outer", return_indexers=True + ) + noidx_res = index_large.join(other_mono, how="outer") + tm.assert_index_equal(res, noidx_res) + + elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) + eridx = np.array([-1, 0, 1, 2, 3, 4, -1, -1, 5], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_numeric.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_numeric.py new file mode 100644 index 0000000..af30837 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_numeric.py @@ -0,0 +1,689 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import Timestamp + +import pandas as pd +from pandas import ( + Index, + Series, +) +import pandas._testing as tm +from pandas.core.indexes.api import ( + Float64Index, + Int64Index, + NumericIndex, + UInt64Index, +) +from pandas.tests.indexes.common import NumericBase + + +class TestFloatNumericIndex(NumericBase): + _index_cls = NumericIndex + + @pytest.fixture(params=[np.float64, np.float32]) + def dtype(self, request): + return request.param + + @pytest.fixture(params=["category", "datetime64", "object"]) + def invalid_dtype(self, request): + return request.param + + @pytest.fixture + def simple_index(self, dtype): + values = np.arange(5, dtype=dtype) + return self._index_cls(values) + + @pytest.fixture( + params=[ + [1.5, 2, 3, 4, 5], + [0.0, 2.5, 5.0, 7.5, 10.0], + [5, 4, 3, 2, 1.5], + [10.0, 7.5, 5.0, 2.5, 0.0], + ], + ids=["mixed", "float", "mixed_dec", "float_dec"], + ) + def index(self, request, dtype): + return self._index_cls(request.param, dtype=dtype) + + @pytest.fixture + def mixed_index(self, dtype): + return self._index_cls([1.5, 2, 3, 4, 5], dtype=dtype) + + @pytest.fixture + def float_index(self, dtype): + return self._index_cls([0.0, 2.5, 5.0, 7.5, 10.0], dtype=dtype) + + def test_repr_roundtrip(self, index): + tm.assert_index_equal(eval(repr(index)), index, exact=True) + + def check_is_index(self, idx): + assert isinstance(idx, Index) + assert not isinstance(idx, self._index_cls) + + def check_coerce(self, a, b, is_float_index=True): + assert a.equals(b) + tm.assert_index_equal(a, b, exact=False) + if is_float_index: + assert isinstance(b, self._index_cls) + else: + self.check_is_index(b) + + def test_constructor(self, dtype): + index_cls = self._index_cls + + # explicit construction + index = index_cls([1, 2, 3, 4, 5], dtype=dtype) + + assert isinstance(index, index_cls) + assert index.dtype == dtype + + expected = np.array([1, 2, 3, 4, 5], dtype=dtype) + tm.assert_numpy_array_equal(index.values, expected) + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + index = index_cls([1.0, 2, 3, 4, 5], dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + index = index_cls([1.0, 2, 3, 4, 5], dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + # nan handling + result = index_cls([np.nan, np.nan], dtype=dtype) + assert pd.isna(result.values).all() + + result = index_cls(np.array([np.nan]), dtype=dtype) + assert pd.isna(result.values).all() + + def test_constructor_invalid(self): + index_cls = self._index_cls + cls_name = index_cls.__name__ + + # invalid + msg = ( + rf"{cls_name}\(\.\.\.\) must be called with a collection of " + r"some kind, 0\.0 was passed" + ) + with pytest.raises(TypeError, match=msg): + index_cls(0.0) + + # 2021-02-1 we get ValueError in numpy 1.20, but not on all builds + msg = "|".join( + [ + "String dtype not supported, you may need to explicitly cast ", + "could not convert string to float: 'a'", + ] + ) + with pytest.raises((TypeError, ValueError), match=msg): + index_cls(["a", "b", 0.0]) + + msg = f"data is not compatible with {index_cls.__name__}" + with pytest.raises(ValueError, match=msg): + index_cls([Timestamp("20130101")]) + + def test_constructor_coerce(self, mixed_index, float_index): + + self.check_coerce(mixed_index, Index([1.5, 2, 3, 4, 5])) + self.check_coerce(float_index, Index(np.arange(5) * 2.5)) + + with tm.assert_produces_warning(FutureWarning, match="will not infer"): + result = Index(np.array(np.arange(5) * 2.5, dtype=object)) + self.check_coerce(float_index, result.astype("float64")) + + def test_constructor_explicit(self, mixed_index, float_index): + + # these don't auto convert + self.check_coerce( + float_index, Index((np.arange(5) * 2.5), dtype=object), is_float_index=False + ) + self.check_coerce( + mixed_index, Index([1.5, 2, 3, 4, 5], dtype=object), is_float_index=False + ) + + def test_type_coercion_fail(self, any_int_numpy_dtype): + # see gh-15832 + msg = "Trying to coerce float values to integers" + with pytest.raises(ValueError, match=msg): + Index([1, 2, 3.5], dtype=any_int_numpy_dtype) + + def test_type_coercion_valid(self, float_numpy_dtype): + # There is no Float32Index, so we always + # generate Float64Index. + idx = Index([1, 2, 3.5], dtype=float_numpy_dtype) + tm.assert_index_equal(idx, Index([1, 2, 3.5]), exact=True) + + def test_equals_numeric(self): + index_cls = self._index_cls + + idx = index_cls([1.0, 2.0]) + assert idx.equals(idx) + assert idx.identical(idx) + + idx2 = index_cls([1.0, 2.0]) + assert idx.equals(idx2) + + idx = index_cls([1.0, np.nan]) + assert idx.equals(idx) + assert idx.identical(idx) + + idx2 = index_cls([1.0, np.nan]) + assert idx.equals(idx2) + + @pytest.mark.parametrize( + "other", + ( + Int64Index([1, 2]), + Index([1.0, 2.0], dtype=object), + Index([1, 2], dtype=object), + ), + ) + def test_equals_numeric_other_index_type(self, other): + idx = self._index_cls([1.0, 2.0]) + assert idx.equals(other) + assert other.equals(idx) + + @pytest.mark.parametrize( + "vals", + [ + pd.date_range("2016-01-01", periods=3), + pd.timedelta_range("1 Day", periods=3), + ], + ) + def test_lookups_datetimelike_values(self, vals, dtype): + + # If we have datetime64 or timedelta64 values, make sure they are + # wrappped correctly GH#31163 + ser = Series(vals, index=range(3, 6)) + ser.index = ser.index.astype(dtype) + + expected = vals[1] + + with tm.assert_produces_warning(FutureWarning): + result = ser.index.get_value(ser, 4.0) + assert isinstance(result, type(expected)) and result == expected + with tm.assert_produces_warning(FutureWarning): + result = ser.index.get_value(ser, 4) + assert isinstance(result, type(expected)) and result == expected + + result = ser[4.0] + assert isinstance(result, type(expected)) and result == expected + result = ser[4] + assert isinstance(result, type(expected)) and result == expected + + result = ser.loc[4.0] + assert isinstance(result, type(expected)) and result == expected + result = ser.loc[4] + assert isinstance(result, type(expected)) and result == expected + + result = ser.at[4.0] + assert isinstance(result, type(expected)) and result == expected + # GH#31329 .at[4] should cast to 4.0, matching .loc behavior + result = ser.at[4] + assert isinstance(result, type(expected)) and result == expected + + result = ser.iloc[1] + assert isinstance(result, type(expected)) and result == expected + + result = ser.iat[1] + assert isinstance(result, type(expected)) and result == expected + + def test_doesnt_contain_all_the_things(self): + idx = self._index_cls([np.nan]) + assert not idx.isin([0]).item() + assert not idx.isin([1]).item() + assert idx.isin([np.nan]).item() + + def test_nan_multiple_containment(self): + index_cls = self._index_cls + + idx = index_cls([1.0, np.nan]) + tm.assert_numpy_array_equal(idx.isin([1.0]), np.array([True, False])) + tm.assert_numpy_array_equal(idx.isin([2.0, np.pi]), np.array([False, False])) + tm.assert_numpy_array_equal(idx.isin([np.nan]), np.array([False, True])) + tm.assert_numpy_array_equal(idx.isin([1.0, np.nan]), np.array([True, True])) + idx = index_cls([1.0, 2.0]) + tm.assert_numpy_array_equal(idx.isin([np.nan]), np.array([False, False])) + + def test_fillna_float64(self): + index_cls = self._index_cls + # GH 11343 + idx = Index([1.0, np.nan, 3.0], dtype=float, name="x") + # can't downcast + exp = Index([1.0, 0.1, 3.0], name="x") + tm.assert_index_equal(idx.fillna(0.1), exp, exact=True) + + # downcast + exact = True if index_cls is Int64Index else "equiv" + exp = index_cls([1.0, 2.0, 3.0], name="x") + tm.assert_index_equal(idx.fillna(2), exp, exact=exact) + + # object + exp = Index([1.0, "obj", 3.0], name="x") + tm.assert_index_equal(idx.fillna("obj"), exp, exact=True) + + +class TestFloat64Index(TestFloatNumericIndex): + _index_cls = Float64Index + + @pytest.fixture + def dtype(self, request): + return np.float64 + + @pytest.fixture( + params=["int64", "uint64", "object", "category", "datetime64"], + ) + def invalid_dtype(self, request): + return request.param + + def test_constructor_from_base_index(self, dtype): + index_cls = self._index_cls + + result = Index(np.array([np.nan], dtype=dtype)) + assert isinstance(result, index_cls) + assert result.dtype == dtype + assert pd.isna(result.values).all() + + def test_constructor_32bit(self, dtype): + index_cls = self._index_cls + + index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=np.float32) + assert isinstance(index, index_cls) + assert index.dtype == np.float64 + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.float32) + assert isinstance(index, index_cls) + assert index.dtype == np.float64 + + +class NumericInt(NumericBase): + def test_view(self, dtype): + index_cls = self._index_cls + + idx = index_cls([], dtype=dtype, name="Foo") + idx_view = idx.view() + assert idx_view.name == "Foo" + + idx_view = idx.view(dtype) + tm.assert_index_equal(idx, index_cls(idx_view, name="Foo"), exact=True) + + idx_view = idx.view(index_cls) + tm.assert_index_equal(idx, index_cls(idx_view, name="Foo"), exact=True) + + def test_is_monotonic(self): + index_cls = self._index_cls + + index = index_cls([1, 2, 3, 4]) + assert index.is_monotonic is True + assert index.is_monotonic_increasing is True + assert index._is_strictly_monotonic_increasing is True + assert index.is_monotonic_decreasing is False + assert index._is_strictly_monotonic_decreasing is False + + index = index_cls([4, 3, 2, 1]) + assert index.is_monotonic is False + assert index._is_strictly_monotonic_increasing is False + assert index._is_strictly_monotonic_decreasing is True + + index = index_cls([1]) + assert index.is_monotonic is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is True + + def test_is_strictly_monotonic(self): + index_cls = self._index_cls + + index = index_cls([1, 1, 2, 3]) + assert index.is_monotonic_increasing is True + assert index._is_strictly_monotonic_increasing is False + + index = index_cls([3, 2, 1, 1]) + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_decreasing is False + + index = index_cls([1, 1]) + assert index.is_monotonic_increasing + assert index.is_monotonic_decreasing + assert not index._is_strictly_monotonic_increasing + assert not index._is_strictly_monotonic_decreasing + + def test_logical_compat(self, simple_index): + idx = simple_index + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() + + def test_identical(self, simple_index, dtype): + index = simple_index + + idx = Index(index.copy()) + assert idx.identical(index) + + same_values_different_type = Index(idx, dtype=object) + assert not idx.identical(same_values_different_type) + + idx = index.astype(dtype=object) + idx = idx.rename("foo") + same_values = Index(idx, dtype=object) + assert same_values.identical(idx) + + assert not idx.identical(index) + assert Index(same_values, name="foo", dtype=object).identical(idx) + + assert not index.astype(dtype=object).identical(index.astype(dtype=dtype)) + + def test_cant_or_shouldnt_cast(self): + msg = ( + "String dtype not supported, " + "you may need to explicitly cast to a numeric type" + ) + # can't + data = ["foo", "bar", "baz"] + with pytest.raises(TypeError, match=msg): + self._index_cls(data) + + # shouldn't + data = ["0", "1", "2"] + with pytest.raises(TypeError, match=msg): + self._index_cls(data) + + def test_view_index(self, simple_index): + index = simple_index + index.view(Index) + + def test_prevent_casting(self, simple_index): + index = simple_index + result = index.astype("O") + assert result.dtype == np.object_ + + +class TestIntNumericIndex(NumericInt): + _index_cls = NumericIndex + + @pytest.fixture(params=[np.int64, np.int32, np.int16, np.int8]) + def dtype(self, request): + return request.param + + @pytest.fixture(params=["category", "datetime64", "object"]) + def invalid_dtype(self, request): + return request.param + + @pytest.fixture + def simple_index(self, dtype): + return self._index_cls(range(0, 20, 2), dtype=dtype) + + @pytest.fixture( + params=[range(0, 20, 2), range(19, -1, -1)], ids=["index_inc", "index_dec"] + ) + def index(self, request, dtype): + return self._index_cls(request.param, dtype=dtype) + + def test_constructor(self, dtype): + index_cls = self._index_cls + + # scalar raise Exception + msg = ( + rf"{index_cls.__name__}\(\.\.\.\) must be called with a collection of some " + "kind, 5 was passed" + ) + with pytest.raises(TypeError, match=msg): + index_cls(5) + + # copy + # pass list, coerce fine + index = index_cls([-5, 0, 1, 2], dtype=dtype) + arr = index.values + new_index = index_cls(arr, copy=True) + tm.assert_index_equal(new_index, index, exact=True) + val = arr[0] + 3000 + + # this should not change index + arr[0] = val + assert new_index[0] != val + + if dtype == np.int64: + exact = "equiv" if index_cls != Int64Index else True + + # pass list, coerce fine + index = index_cls([-5, 0, 1, 2], dtype=dtype) + expected = Index([-5, 0, 1, 2], dtype=dtype) + tm.assert_index_equal(index, expected, exact=exact) + + # from iterable + index = index_cls(iter([-5, 0, 1, 2]), dtype=dtype) + expected = index_cls([-5, 0, 1, 2], dtype=dtype) + tm.assert_index_equal(index, expected, exact=exact) + + # interpret list-like + expected = index_cls([5, 0], dtype=dtype) + for cls in [Index, index_cls]: + for idx in [ + cls([5, 0], dtype=dtype), + cls(np.array([5, 0]), dtype=dtype), + cls(Series([5, 0]), dtype=dtype), + ]: + tm.assert_index_equal(idx, expected, exact=exact) + + def test_constructor_corner(self, dtype): + index_cls = self._index_cls + + arr = np.array([1, 2, 3, 4], dtype=object) + + index = index_cls(arr, dtype=dtype) + assert index.values.dtype == index.dtype + if dtype == np.int64: + + msg = "will not infer" + with tm.assert_produces_warning(FutureWarning, match=msg): + without_dtype = Index(arr) + + exact = True if index_cls is Int64Index else "equiv" + tm.assert_index_equal(index, without_dtype, exact=exact) + + # preventing casting + arr = np.array([1, "2", 3, "4"], dtype=object) + with pytest.raises(TypeError, match="casting"): + index_cls(arr, dtype=dtype) + + def test_constructor_coercion_signed_to_unsigned( + self, + any_unsigned_int_numpy_dtype, + ): + + # see gh-15832 + msg = "Trying to coerce negative values to unsigned integers" + + with pytest.raises(OverflowError, match=msg): + Index([-1], dtype=any_unsigned_int_numpy_dtype) + + def test_coerce_list(self): + # coerce things + arr = Index([1, 2, 3, 4]) + assert isinstance(arr, self._index_cls) + + # but not if explicit dtype passed + arr = Index([1, 2, 3, 4], dtype=object) + assert type(arr) is Index + + +class TestInt64Index(TestIntNumericIndex): + _index_cls = Int64Index + + @pytest.fixture + def dtype(self): + return np.int64 + + @pytest.fixture( + params=["float64", "uint64", "object", "category", "datetime64"], + ) + def invalid_dtype(self, request): + return request.param + + def test_constructor_32bit(self, dtype): + index_cls = self._index_cls + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.int32) + assert isinstance(index, index_cls) + assert index.dtype == np.int64 + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.int32) + assert isinstance(index, index_cls) + assert index.dtype == np.int64 + + +class TestUIntNumericIndex(NumericInt): + + _index_cls = NumericIndex + + @pytest.fixture(params=[np.uint64]) + def dtype(self, request): + return request.param + + @pytest.fixture(params=["category", "datetime64", "object"]) + def invalid_dtype(self, request): + return request.param + + @pytest.fixture + def simple_index(self, dtype): + # compat with shared Int64/Float64 tests + return self._index_cls(np.arange(5, dtype=dtype)) + + @pytest.fixture( + params=[ + [2 ** 63, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20, 2 ** 63 + 25], + [2 ** 63 + 25, 2 ** 63 + 20, 2 ** 63 + 15, 2 ** 63 + 10, 2 ** 63], + ], + ids=["index_inc", "index_dec"], + ) + def index(self, request): + return self._index_cls(request.param, dtype=np.uint64) + + +class TestUInt64Index(TestUIntNumericIndex): + + _index_cls = UInt64Index + + @pytest.fixture + def dtype(self): + return np.uint64 + + @pytest.fixture( + params=["int64", "float64", "object", "category", "datetime64"], + ) + def invalid_dtype(self, request): + return request.param + + def test_constructor(self, dtype): + index_cls = self._index_cls + exact = True if index_cls is UInt64Index else "equiv" + + idx = index_cls([1, 2, 3]) + res = Index([1, 2, 3], dtype=dtype) + tm.assert_index_equal(res, idx, exact=exact) + + idx = index_cls([1, 2 ** 63]) + res = Index([1, 2 ** 63], dtype=dtype) + tm.assert_index_equal(res, idx, exact=exact) + + idx = index_cls([1, 2 ** 63]) + res = Index([1, 2 ** 63]) + tm.assert_index_equal(res, idx, exact=exact) + + idx = Index([-1, 2 ** 63], dtype=object) + res = Index(np.array([-1, 2 ** 63], dtype=object)) + tm.assert_index_equal(res, idx, exact=exact) + + # https://github.com/pandas-dev/pandas/issues/29526 + idx = index_cls([1, 2 ** 63 + 1], dtype=dtype) + res = Index([1, 2 ** 63 + 1], dtype=dtype) + tm.assert_index_equal(res, idx, exact=exact) + + def test_constructor_does_not_cast_to_float(self): + # https://github.com/numpy/numpy/issues/19146 + values = [0, np.iinfo(np.uint64).max] + + result = UInt64Index(values) + assert list(result) == values + + def test_constructor_32bit(self, dtype): + index_cls = self._index_cls + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.uint32) + assert isinstance(index, index_cls) + assert index.dtype == np.uint64 + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.uint32) + assert isinstance(index, index_cls) + assert index.dtype == np.uint64 + + +@pytest.mark.parametrize( + "box", + [list, lambda x: np.array(x, dtype=object), lambda x: Index(x, dtype=object)], +) +def test_uint_index_does_not_convert_to_float64(box): + # https://github.com/pandas-dev/pandas/issues/28279 + # https://github.com/pandas-dev/pandas/issues/28023 + series = Series( + [0, 1, 2, 3, 4, 5], + index=[ + 7606741985629028552, + 17876870360202815256, + 17876870360202815256, + 13106359306506049338, + 8991270399732411471, + 8991270399732411472, + ], + ) + + result = series.loc[box([7606741985629028552, 17876870360202815256])] + + expected = UInt64Index( + [7606741985629028552, 17876870360202815256, 17876870360202815256], + dtype="uint64", + ) + tm.assert_index_equal(result.index, expected) + + tm.assert_equal(result, series[:3]) + + +def test_float64_index_equals(): + # https://github.com/pandas-dev/pandas/issues/35217 + float_index = Index([1.0, 2, 3]) + string_index = Index(["1", "2", "3"]) + + result = float_index.equals(string_index) + assert result is False + + result = string_index.equals(float_index) + assert result is False + + +def test_map_dtype_inference_unsigned_to_signed(): + # GH#44609 cases where we don't retain dtype + idx = UInt64Index([1, 2, 3]) + result = idx.map(lambda x: -x) + expected = Int64Index([-1, -2, -3]) + tm.assert_index_equal(result, expected) + + +def test_map_dtype_inference_overflows(): + # GH#44609 case where we have to upcast + idx = NumericIndex(np.array([1, 2, 3], dtype=np.int8)) + result = idx.map(lambda x: x * 1000) + # TODO: we could plausibly try to infer down to int16 here + expected = NumericIndex([1000, 2000, 3000], dtype=np.int64) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_setops.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_setops.py new file mode 100644 index 0000000..72336d3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/numeric/test_setops.py @@ -0,0 +1,166 @@ +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.indexes.api import ( + Float64Index, + Index, + Int64Index, + RangeIndex, + UInt64Index, +) + + +@pytest.fixture +def index_large(): + # large values used in TestUInt64Index where no compat needed with Int64/Float64 + large = [2 ** 63, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20, 2 ** 63 + 25] + return UInt64Index(large) + + +class TestSetOps: + @pytest.mark.parametrize("dtype", ["f8", "u8", "i8"]) + def test_union_non_numeric(self, dtype): + # corner case, non-numeric + index = Index(np.arange(5, dtype=dtype), dtype=dtype) + assert index.dtype == dtype + + other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) + result = index.union(other) + expected = Index(np.concatenate((index, other))) + tm.assert_index_equal(result, expected) + + result = other.union(index) + expected = Index(np.concatenate((other, index))) + tm.assert_index_equal(result, expected) + + def test_intersection(self): + index = Int64Index(range(5)) + + other = Index([1, 2, 3, 4, 5]) + result = index.intersection(other) + expected = Index(np.sort(np.intersect1d(index.values, other.values))) + tm.assert_index_equal(result, expected) + + result = other.intersection(index) + expected = Index( + np.sort(np.asarray(np.intersect1d(index.values, other.values))) + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["int64", "uint64"]) + def test_int_float_union_dtype(self, dtype): + # https://github.com/pandas-dev/pandas/issues/26778 + # [u]int | float -> float + index = Index([0, 2, 3], dtype=dtype) + other = Float64Index([0.5, 1.5]) + expected = Float64Index([0.0, 0.5, 1.5, 2.0, 3.0]) + result = index.union(other) + tm.assert_index_equal(result, expected) + + result = other.union(index) + tm.assert_index_equal(result, expected) + + def test_range_float_union_dtype(self): + # https://github.com/pandas-dev/pandas/issues/26778 + index = RangeIndex(start=0, stop=3) + other = Float64Index([0.5, 1.5]) + result = index.union(other) + expected = Float64Index([0.0, 0.5, 1, 1.5, 2.0]) + tm.assert_index_equal(result, expected) + + result = other.union(index) + tm.assert_index_equal(result, expected) + + def test_float64_index_difference(self): + # https://github.com/pandas-dev/pandas/issues/35217 + float_index = Index([1.0, 2, 3]) + string_index = Index(["1", "2", "3"]) + + result = float_index.difference(string_index) + tm.assert_index_equal(result, float_index) + + result = string_index.difference(float_index) + tm.assert_index_equal(result, string_index) + + def test_intersection_uint64_outside_int64_range(self, index_large): + other = Index([2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20]) + result = index_large.intersection(other) + expected = Index(np.sort(np.intersect1d(index_large.values, other.values))) + tm.assert_index_equal(result, expected) + + result = other.intersection(index_large) + expected = Index( + np.sort(np.asarray(np.intersect1d(index_large.values, other.values))) + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "index2,keeps_name", + [ + (Index([4, 7, 6, 5, 3], name="index"), True), + (Index([4, 7, 6, 5, 3], name="other"), False), + ], + ) + def test_intersection_monotonic(self, index2, keeps_name, sort): + index1 = Index([5, 3, 2, 4, 1], name="index") + expected = Index([5, 3, 4]) + + if keeps_name: + expected.name = "index" + + result = index1.intersection(index2, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + def test_symmetric_difference(self, sort): + # smoke + index1 = Index([5, 2, 3, 4], name="index1") + index2 = Index([2, 3, 4, 1]) + result = index1.symmetric_difference(index2, sort=sort) + expected = Index([5, 1]) + assert tm.equalContents(result, expected) + assert result.name is None + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + # __xor__ syntax + with tm.assert_produces_warning(FutureWarning): + expected = index1 ^ index2 + assert tm.equalContents(result, expected) + assert result.name is None + + +class TestSetOpsSort: + @pytest.mark.parametrize("slice_", [slice(None), slice(0)]) + def test_union_sort_other_special(self, slice_): + # https://github.com/pandas-dev/pandas/issues/24959 + + idx = Index([1, 0, 2]) + # default, sort=None + other = idx[slice_] + tm.assert_index_equal(idx.union(other), idx) + tm.assert_index_equal(other.union(idx), idx) + + # sort=False + tm.assert_index_equal(idx.union(other, sort=False), idx) + + @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.parametrize("slice_", [slice(None), slice(0)]) + def test_union_sort_special_true(self, slice_): + # TODO(GH#25151): decide on True behaviour + # sort=True + idx = Index([1, 0, 2]) + # default, sort=None + other = idx[slice_] + + result = idx.union(other, sort=True) + expected = Index([0, 1, 2]) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..cfda6f7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..76520c3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..61477d5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/test_astype.py new file mode 100644 index 0000000..9bfc0c1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/test_astype.py @@ -0,0 +1,10 @@ +from pandas import Index +import pandas._testing as tm + + +def test_astype_str_from_bytes(): + # https://github.com/pandas-dev/pandas/issues/38607 + idx = Index(["あ", b"a"], dtype="object") + result = idx.astype(str) + expected = Index(["あ", "a"], dtype="object") + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/test_indexing.py new file mode 100644 index 0000000..38bd969 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/object/test_indexing.py @@ -0,0 +1,196 @@ +from decimal import Decimal + +import numpy as np +import pytest + +from pandas._libs.missing import is_matching_na + +import pandas as pd +from pandas import Index +import pandas._testing as tm + + +class TestGetLoc: + def test_get_loc_raises_object_nearest(self): + index = Index(["a", "c"]) + with pytest.raises(TypeError, match="unsupported operand type"): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + index.get_loc("a", method="nearest") + + def test_get_loc_raises_object_tolerance(self): + index = Index(["a", "c"]) + with pytest.raises(TypeError, match="unsupported operand type"): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + index.get_loc("a", method="pad", tolerance="invalid") + + +class TestGetIndexer: + @pytest.mark.parametrize( + "method,expected", + [ + ("pad", np.array([-1, 0, 1, 1], dtype=np.intp)), + ("backfill", np.array([0, 0, 1, -1], dtype=np.intp)), + ], + ) + def test_get_indexer_strings(self, method, expected): + index = Index(["b", "c"]) + actual = index.get_indexer(["a", "b", "c", "d"], method=method) + + tm.assert_numpy_array_equal(actual, expected) + + def test_get_indexer_strings_raises(self): + index = Index(["b", "c"]) + + msg = r"unsupported operand type\(s\) for -: 'str' and 'str'" + with pytest.raises(TypeError, match=msg): + index.get_indexer(["a", "b", "c", "d"], method="nearest") + + with pytest.raises(TypeError, match=msg): + index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2) + + with pytest.raises(TypeError, match=msg): + index.get_indexer( + ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2] + ) + + def test_get_indexer_with_NA_values( + self, unique_nulls_fixture, unique_nulls_fixture2 + ): + # GH#22332 + # check pairwise, that no pair of na values + # is mangled + if unique_nulls_fixture is unique_nulls_fixture2: + return # skip it, values are not unique + arr = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=object) + index = Index(arr, dtype=object) + result = index.get_indexer( + [unique_nulls_fixture, unique_nulls_fixture2, "Unknown"] + ) + expected = np.array([0, 1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + +class TestGetIndexerNonUnique: + def test_get_indexer_non_unique_nas(self, nulls_fixture): + # even though this isn't non-unique, this should still work + index = Index(["a", "b", nulls_fixture]) + indexer, missing = index.get_indexer_non_unique([nulls_fixture]) + + expected_indexer = np.array([2], dtype=np.intp) + expected_missing = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + # actually non-unique + index = Index(["a", nulls_fixture, "b", nulls_fixture]) + indexer, missing = index.get_indexer_non_unique([nulls_fixture]) + + expected_indexer = np.array([1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + # matching-but-not-identical nans + if is_matching_na(nulls_fixture, float("NaN")): + index = Index(["a", float("NaN"), "b", float("NaN")]) + match_but_not_identical = True + elif is_matching_na(nulls_fixture, Decimal("NaN")): + index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")]) + match_but_not_identical = True + else: + match_but_not_identical = False + + if match_but_not_identical: + indexer, missing = index.get_indexer_non_unique([nulls_fixture]) + + expected_indexer = np.array([1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + @pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning") + def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2): + expected_missing = np.array([], dtype=np.intp) + # matching-but-not-identical nats + if is_matching_na(np_nat_fixture, np_nat_fixture2): + # ensure nats are different objects + index = Index( + np.array( + ["2021-10-02", np_nat_fixture.copy(), np_nat_fixture2.copy()], + dtype=object, + ), + dtype=object, + ) + # pass as index to prevent target from being casted to DatetimeIndex + indexer, missing = index.get_indexer_non_unique( + Index([np_nat_fixture], dtype=object) + ) + expected_indexer = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + # dt64nat vs td64nat + else: + index = Index( + np.array( + [ + "2021-10-02", + np_nat_fixture, + np_nat_fixture2, + np_nat_fixture, + np_nat_fixture2, + ], + dtype=object, + ), + dtype=object, + ) + # pass as index to prevent target from being casted to DatetimeIndex + indexer, missing = index.get_indexer_non_unique( + Index([np_nat_fixture], dtype=object) + ) + expected_indexer = np.array([1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + +class TestSliceLocs: + @pytest.mark.parametrize( + "in_slice,expected", + [ + # error: Slice index must be an integer or None + (pd.IndexSlice[::-1], "yxdcb"), + (pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc] + (pd.IndexSlice["b"::-1], "b"), # type: ignore[misc] + (pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc] + (pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc] + # absent labels + (pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc] + (pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc] + (pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc] + (pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc] + (pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc] + (pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc] + (pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc] + ], + ) + def test_slice_locs_negative_step(self, in_slice, expected): + index = Index(list("bcdxy")) + + s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step) + result = index[s_start : s_stop : in_slice.step] + expected = Index(list(expected)) + tm.assert_index_equal(result, expected) + + def test_slice_locs_dup(self): + index = Index(["a", "a", "b", "c", "d", "d"]) + assert index.slice_locs("a", "d") == (0, 6) + assert index.slice_locs(end="d") == (0, 6) + assert index.slice_locs("a", "c") == (0, 4) + assert index.slice_locs("b", "d") == (2, 6) + + index2 = index[::-1] + assert index2.slice_locs("d", "a") == (0, 6) + assert index2.slice_locs(end="a") == (0, 6) + assert index2.slice_locs("d", "b") == (0, 4) + assert index2.slice_locs("c", "a") == (2, 6) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..9e86795 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..d14ef22 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_formats.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_formats.cpython-38.pyc new file mode 100644 index 0000000..2cb9acd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_formats.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_freq_attr.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_freq_attr.cpython-38.pyc new file mode 100644 index 0000000..bc47673 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_freq_attr.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..da5eb7b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_join.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_join.cpython-38.pyc new file mode 100644 index 0000000..93b4b55 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_join.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_monotonic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_monotonic.cpython-38.pyc new file mode 100644 index 0000000..41e3e8b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_monotonic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_partial_slicing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_partial_slicing.cpython-38.pyc new file mode 100644 index 0000000..1fb9fe7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_partial_slicing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_period.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_period.cpython-38.pyc new file mode 100644 index 0000000..bdbd23d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_period.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_period_range.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_period_range.cpython-38.pyc new file mode 100644 index 0000000..3755e28 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_period_range.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_pickle.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_pickle.cpython-38.pyc new file mode 100644 index 0000000..408dd62 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_pickle.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_resolution.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_resolution.cpython-38.pyc new file mode 100644 index 0000000..675aa78 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_resolution.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_scalar_compat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_scalar_compat.cpython-38.pyc new file mode 100644 index 0000000..c1a9ef3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_scalar_compat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_searchsorted.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_searchsorted.cpython-38.pyc new file mode 100644 index 0000000..49e9c0a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_searchsorted.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_setops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_setops.cpython-38.pyc new file mode 100644 index 0000000..965fcf3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_setops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_tools.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_tools.cpython-38.pyc new file mode 100644 index 0000000..af71512 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/__pycache__/test_tools.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..b1fcd67 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_asfreq.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_asfreq.cpython-38.pyc new file mode 100644 index 0000000..7d0edaf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_asfreq.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..a7cc517 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_factorize.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_factorize.cpython-38.pyc new file mode 100644 index 0000000..ea72af1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_factorize.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_fillna.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_fillna.cpython-38.pyc new file mode 100644 index 0000000..20cda1d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_fillna.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_insert.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_insert.cpython-38.pyc new file mode 100644 index 0000000..86f26d2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_insert.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_is_full.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_is_full.cpython-38.pyc new file mode 100644 index 0000000..16dd2ad Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_is_full.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_repeat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_repeat.cpython-38.pyc new file mode 100644 index 0000000..0188a15 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_repeat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_shift.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_shift.cpython-38.pyc new file mode 100644 index 0000000..baf8843 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_shift.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_to_timestamp.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_to_timestamp.cpython-38.pyc new file mode 100644 index 0000000..3a8d9d6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/__pycache__/test_to_timestamp.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_asfreq.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_asfreq.py new file mode 100644 index 0000000..23b88fb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_asfreq.py @@ -0,0 +1,130 @@ +import pytest + +from pandas import ( + PeriodIndex, + period_range, +) +import pandas._testing as tm + + +class TestPeriodIndex: + def test_asfreq(self): + pi1 = period_range(freq="A", start="1/1/2001", end="1/1/2001") + pi2 = period_range(freq="Q", start="1/1/2001", end="1/1/2001") + pi3 = period_range(freq="M", start="1/1/2001", end="1/1/2001") + pi4 = period_range(freq="D", start="1/1/2001", end="1/1/2001") + pi5 = period_range(freq="H", start="1/1/2001", end="1/1/2001 00:00") + pi6 = period_range(freq="Min", start="1/1/2001", end="1/1/2001 00:00") + pi7 = period_range(freq="S", start="1/1/2001", end="1/1/2001 00:00:00") + + assert pi1.asfreq("Q", "S") == pi2 + assert pi1.asfreq("Q", "s") == pi2 + assert pi1.asfreq("M", "start") == pi3 + assert pi1.asfreq("D", "StarT") == pi4 + assert pi1.asfreq("H", "beGIN") == pi5 + assert pi1.asfreq("Min", "S") == pi6 + assert pi1.asfreq("S", "S") == pi7 + + assert pi2.asfreq("A", "S") == pi1 + assert pi2.asfreq("M", "S") == pi3 + assert pi2.asfreq("D", "S") == pi4 + assert pi2.asfreq("H", "S") == pi5 + assert pi2.asfreq("Min", "S") == pi6 + assert pi2.asfreq("S", "S") == pi7 + + assert pi3.asfreq("A", "S") == pi1 + assert pi3.asfreq("Q", "S") == pi2 + assert pi3.asfreq("D", "S") == pi4 + assert pi3.asfreq("H", "S") == pi5 + assert pi3.asfreq("Min", "S") == pi6 + assert pi3.asfreq("S", "S") == pi7 + + assert pi4.asfreq("A", "S") == pi1 + assert pi4.asfreq("Q", "S") == pi2 + assert pi4.asfreq("M", "S") == pi3 + assert pi4.asfreq("H", "S") == pi5 + assert pi4.asfreq("Min", "S") == pi6 + assert pi4.asfreq("S", "S") == pi7 + + assert pi5.asfreq("A", "S") == pi1 + assert pi5.asfreq("Q", "S") == pi2 + assert pi5.asfreq("M", "S") == pi3 + assert pi5.asfreq("D", "S") == pi4 + assert pi5.asfreq("Min", "S") == pi6 + assert pi5.asfreq("S", "S") == pi7 + + assert pi6.asfreq("A", "S") == pi1 + assert pi6.asfreq("Q", "S") == pi2 + assert pi6.asfreq("M", "S") == pi3 + assert pi6.asfreq("D", "S") == pi4 + assert pi6.asfreq("H", "S") == pi5 + assert pi6.asfreq("S", "S") == pi7 + + assert pi7.asfreq("A", "S") == pi1 + assert pi7.asfreq("Q", "S") == pi2 + assert pi7.asfreq("M", "S") == pi3 + assert pi7.asfreq("D", "S") == pi4 + assert pi7.asfreq("H", "S") == pi5 + assert pi7.asfreq("Min", "S") == pi6 + + msg = "How must be one of S or E" + with pytest.raises(ValueError, match=msg): + pi7.asfreq("T", "foo") + result1 = pi1.asfreq("3M") + result2 = pi1.asfreq("M") + expected = period_range(freq="M", start="2001-12", end="2001-12") + tm.assert_numpy_array_equal(result1.asi8, expected.asi8) + assert result1.freqstr == "3M" + tm.assert_numpy_array_equal(result2.asi8, expected.asi8) + assert result2.freqstr == "M" + + def test_asfreq_nat(self): + idx = PeriodIndex(["2011-01", "2011-02", "NaT", "2011-04"], freq="M") + result = idx.asfreq(freq="Q") + expected = PeriodIndex(["2011Q1", "2011Q1", "NaT", "2011Q2"], freq="Q") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("freq", ["D", "3D"]) + def test_asfreq_mult_pi(self, freq): + pi = PeriodIndex(["2001-01", "2001-02", "NaT", "2001-03"], freq="2M") + + result = pi.asfreq(freq) + exp = PeriodIndex(["2001-02-28", "2001-03-31", "NaT", "2001-04-30"], freq=freq) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + result = pi.asfreq(freq, how="S") + exp = PeriodIndex(["2001-01-01", "2001-02-01", "NaT", "2001-03-01"], freq=freq) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + def test_asfreq_combined_pi(self): + pi = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="H") + exp = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="25H") + for freq, how in zip(["1D1H", "1H1D"], ["S", "E"]): + result = pi.asfreq(freq, how=how) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + for freq in ["1D1H", "1H1D"]: + pi = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq=freq) + result = pi.asfreq("H") + exp = PeriodIndex(["2001-01-02 00:00", "2001-01-03 02:00", "NaT"], freq="H") + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + pi = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq=freq) + result = pi.asfreq("H", how="S") + exp = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="H") + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + def test_astype_asfreq(self): + pi1 = PeriodIndex(["2011-01-01", "2011-02-01", "2011-03-01"], freq="D") + exp = PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="M") + tm.assert_index_equal(pi1.asfreq("M"), exp) + tm.assert_index_equal(pi1.astype("period[M]"), exp) + + exp = PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="3M") + tm.assert_index_equal(pi1.asfreq("3M"), exp) + tm.assert_index_equal(pi1.astype("period[3M]"), exp) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_astype.py new file mode 100644 index 0000000..c3ea72c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_astype.py @@ -0,0 +1,174 @@ +import numpy as np +import pytest + +from pandas import ( + CategoricalIndex, + DatetimeIndex, + Index, + NaT, + Period, + PeriodIndex, + Timedelta, + period_range, +) +import pandas._testing as tm +from pandas.core.indexes.api import ( + Int64Index, + UInt64Index, +) + + +class TestPeriodIndexAsType: + @pytest.mark.parametrize("dtype", [float, "timedelta64", "timedelta64[ns]"]) + def test_astype_raises(self, dtype): + # GH#13149, GH#13209 + idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq="D") + msg = "Cannot cast PeriodIndex to dtype" + with pytest.raises(TypeError, match=msg): + idx.astype(dtype) + + def test_astype_conversion(self): + # GH#13149, GH#13209 + idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq="D", name="idx") + + result = idx.astype(object) + expected = Index( + [Period("2016-05-16", freq="D")] + [Period(NaT, freq="D")] * 3, + dtype="object", + name="idx", + ) + tm.assert_index_equal(result, expected) + + result = idx.astype(np.int64) + expected = Int64Index( + [16937] + [-9223372036854775808] * 3, dtype=np.int64, name="idx" + ) + tm.assert_index_equal(result, expected) + + result = idx.astype(str) + expected = Index([str(x) for x in idx], name="idx") + tm.assert_index_equal(result, expected) + + idx = period_range("1990", "2009", freq="A", name="idx") + result = idx.astype("i8") + tm.assert_index_equal(result, Index(idx.asi8, name="idx")) + tm.assert_numpy_array_equal(result.values, idx.asi8) + + def test_astype_uint(self): + arr = period_range("2000", periods=2, name="idx") + expected = UInt64Index(np.array([10957, 10958], dtype="uint64"), name="idx") + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) + + def test_astype_object(self): + idx = PeriodIndex([], freq="M") + + exp = np.array([], dtype=object) + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + idx = PeriodIndex(["2011-01", NaT], freq="M") + + exp = np.array([Period("2011-01", freq="M"), NaT], dtype=object) + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + exp = np.array([Period("2011-01-01", freq="D"), NaT], dtype=object) + idx = PeriodIndex(["2011-01-01", NaT], freq="D") + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + # TODO: de-duplicate this version (from test_ops) with the one above + # (from test_period) + def test_astype_object2(self): + idx = period_range(start="2013-01-01", periods=4, freq="M", name="idx") + expected_list = [ + Period("2013-01-31", freq="M"), + Period("2013-02-28", freq="M"), + Period("2013-03-31", freq="M"), + Period("2013-04-30", freq="M"), + ] + expected = Index(expected_list, dtype=object, name="idx") + result = idx.astype(object) + assert isinstance(result, Index) + assert result.dtype == object + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert idx.tolist() == expected_list + + idx = PeriodIndex( + ["2013-01-01", "2013-01-02", "NaT", "2013-01-04"], freq="D", name="idx" + ) + expected_list = [ + Period("2013-01-01", freq="D"), + Period("2013-01-02", freq="D"), + Period("NaT", freq="D"), + Period("2013-01-04", freq="D"), + ] + expected = Index(expected_list, dtype=object, name="idx") + result = idx.astype(object) + assert isinstance(result, Index) + assert result.dtype == object + tm.assert_index_equal(result, expected) + for i in [0, 1, 3]: + assert result[i] == expected[i] + assert result[2] is NaT + assert result.name == expected.name + + result_list = idx.tolist() + for i in [0, 1, 3]: + assert result_list[i] == expected_list[i] + assert result_list[2] is NaT + + def test_astype_category(self): + obj = period_range("2000", periods=2, name="idx") + result = obj.astype("category") + expected = CategoricalIndex( + [Period("2000-01-01", freq="D"), Period("2000-01-02", freq="D")], name="idx" + ) + tm.assert_index_equal(result, expected) + + result = obj._data.astype("category") + expected = expected.values + tm.assert_categorical_equal(result, expected) + + def test_astype_array_fallback(self): + obj = period_range("2000", periods=2, name="idx") + result = obj.astype(bool) + expected = Index(np.array([True, True]), name="idx") + tm.assert_index_equal(result, expected) + + result = obj._data.astype(bool) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + + def test_period_astype_to_timestamp(self): + pi = PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="M") + + exp = DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], freq="MS") + with tm.assert_produces_warning(FutureWarning): + # how keyword deprecated GH#37982 + res = pi.astype("datetime64[ns]", how="start") + tm.assert_index_equal(res, exp) + assert res.freq == exp.freq + + exp = DatetimeIndex(["2011-01-31", "2011-02-28", "2011-03-31"]) + exp = exp + Timedelta(1, "D") - Timedelta(1, "ns") + with tm.assert_produces_warning(FutureWarning): + # how keyword deprecated GH#37982 + res = pi.astype("datetime64[ns]", how="end") + tm.assert_index_equal(res, exp) + assert res.freq == exp.freq + + exp = DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], tz="US/Eastern") + res = pi.astype("datetime64[ns, US/Eastern]") + tm.assert_index_equal(res, exp) + assert res.freq == exp.freq + + exp = DatetimeIndex(["2011-01-31", "2011-02-28", "2011-03-31"], tz="US/Eastern") + exp = exp + Timedelta(1, "D") - Timedelta(1, "ns") + with tm.assert_produces_warning(FutureWarning): + # how keyword deprecated GH#37982 + res = pi.astype("datetime64[ns, US/Eastern]", how="end") + tm.assert_index_equal(res, exp) + assert res.freq == exp.freq diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_factorize.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_factorize.py new file mode 100644 index 0000000..9e297d6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_factorize.py @@ -0,0 +1,52 @@ +import numpy as np + +from pandas import ( + PeriodIndex, + factorize, +) +import pandas._testing as tm + + +class TestFactorize: + def test_factorize(self): + idx1 = PeriodIndex( + ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M" + ) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") + + arr, idx = idx1.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + arr, idx = idx1.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + idx2 = PeriodIndex( + ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M" + ) + + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) + arr, idx = idx2.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) + exp_idx = PeriodIndex(["2014-03", "2014-02", "2014-01"], freq="M") + arr, idx = idx2.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + def test_factorize_complex(self): + # GH 17927 + array = [1, 2, 2 + 1j] + labels, uniques = factorize(array) + + expected_labels = np.array([0, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(labels, expected_labels) + + # Should return a complex dtype in the future + expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=object) + tm.assert_numpy_array_equal(uniques, expected_uniques) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_fillna.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_fillna.py new file mode 100644 index 0000000..12a07ba --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_fillna.py @@ -0,0 +1,41 @@ +from pandas import ( + Index, + NaT, + Period, + PeriodIndex, +) +import pandas._testing as tm + + +class TestFillNA: + def test_fillna_period(self): + # GH#11343 + idx = PeriodIndex(["2011-01-01 09:00", NaT, "2011-01-01 11:00"], freq="H") + + exp = PeriodIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H" + ) + result = idx.fillna(Period("2011-01-01 10:00", freq="H")) + tm.assert_index_equal(result, exp) + + exp = Index( + [ + Period("2011-01-01 09:00", freq="H"), + "x", + Period("2011-01-01 11:00", freq="H"), + ], + dtype=object, + ) + result = idx.fillna("x") + tm.assert_index_equal(result, exp) + + exp = Index( + [ + Period("2011-01-01 09:00", freq="H"), + Period("2011-01-01", freq="D"), + Period("2011-01-01 11:00", freq="H"), + ], + dtype=object, + ) + result = idx.fillna(Period("2011-01-01", freq="D")) + tm.assert_index_equal(result, exp) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_insert.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_insert.py new file mode 100644 index 0000000..32bbe09 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_insert.py @@ -0,0 +1,18 @@ +import numpy as np +import pytest + +from pandas import ( + NaT, + PeriodIndex, + period_range, +) +import pandas._testing as tm + + +class TestInsert: + @pytest.mark.parametrize("na", [np.nan, NaT, None]) + def test_insert(self, na): + # GH#18295 (test missing) + expected = PeriodIndex(["2017Q1", NaT, "2017Q2", "2017Q3", "2017Q4"], freq="Q") + result = period_range("2017Q1", periods=4, freq="Q").insert(1, na) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_is_full.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_is_full.py new file mode 100644 index 0000000..490f199 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_is_full.py @@ -0,0 +1,23 @@ +import pytest + +from pandas import PeriodIndex + + +def test_is_full(): + index = PeriodIndex([2005, 2007, 2009], freq="A") + assert not index.is_full + + index = PeriodIndex([2005, 2006, 2007], freq="A") + assert index.is_full + + index = PeriodIndex([2005, 2005, 2007], freq="A") + assert not index.is_full + + index = PeriodIndex([2005, 2005, 2006], freq="A") + assert index.is_full + + index = PeriodIndex([2006, 2005, 2005], freq="A") + with pytest.raises(ValueError, match="Index is not monotonic"): + index.is_full + + assert index[:0].is_full diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_repeat.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_repeat.py new file mode 100644 index 0000000..fc344b0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_repeat.py @@ -0,0 +1,26 @@ +import numpy as np +import pytest + +from pandas import ( + PeriodIndex, + period_range, +) +import pandas._testing as tm + + +class TestRepeat: + @pytest.mark.parametrize("use_numpy", [True, False]) + @pytest.mark.parametrize( + "index", + [ + period_range("2000-01-01", periods=3, freq="D"), + period_range("2001-01-01", periods=3, freq="2D"), + PeriodIndex(["2001-01", "NaT", "2003-01"], freq="M"), + ], + ) + def test_repeat_freqstr(self, index, use_numpy): + # GH#10183 + expected = PeriodIndex([per for per in index for _ in range(3)]) + result = np.repeat(index, 3) if use_numpy else index.repeat(3) + tm.assert_index_equal(result, expected) + assert result.freqstr == index.freqstr diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_shift.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_shift.py new file mode 100644 index 0000000..730172c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_shift.py @@ -0,0 +1,122 @@ +import numpy as np +import pytest + +from pandas import ( + PeriodIndex, + period_range, +) +import pandas._testing as tm + + +class TestPeriodIndexShift: + # --------------------------------------------------------------- + # PeriodIndex.shift is used by __add__ and __sub__ + + def test_pi_shift_ndarray(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + result = idx.shift(np.array([1, 2, 3, 4])) + expected = PeriodIndex( + ["2011-02", "2011-04", "NaT", "2011-08"], freq="M", name="idx" + ) + tm.assert_index_equal(result, expected) + + result = idx.shift(np.array([1, -2, 3, -4])) + expected = PeriodIndex( + ["2011-02", "2010-12", "NaT", "2010-12"], freq="M", name="idx" + ) + tm.assert_index_equal(result, expected) + + def test_shift(self): + pi1 = period_range(freq="A", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="A", start="1/1/2002", end="12/1/2010") + + tm.assert_index_equal(pi1.shift(0), pi1) + + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = period_range(freq="A", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="A", start="1/1/2000", end="12/1/2008") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + pi1 = period_range(freq="M", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="M", start="2/1/2001", end="1/1/2010") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = period_range(freq="M", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="M", start="12/1/2000", end="11/1/2009") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + pi1 = period_range(freq="D", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="D", start="1/2/2001", end="12/2/2009") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = period_range(freq="D", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="D", start="12/31/2000", end="11/30/2009") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + def test_shift_corner_cases(self): + # GH#9903 + idx = PeriodIndex([], name="xxx", freq="H") + + msg = "`freq` argument is not supported for PeriodArray._time_shift" + with pytest.raises(TypeError, match=msg): + # period shift doesn't accept freq + idx.shift(1, freq="H") + + tm.assert_index_equal(idx.shift(0), idx) + tm.assert_index_equal(idx.shift(3), idx) + + idx = PeriodIndex( + ["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"], + name="xxx", + freq="H", + ) + tm.assert_index_equal(idx.shift(0), idx) + exp = PeriodIndex( + ["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"], + name="xxx", + freq="H", + ) + tm.assert_index_equal(idx.shift(3), exp) + exp = PeriodIndex( + ["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"], + name="xxx", + freq="H", + ) + tm.assert_index_equal(idx.shift(-3), exp) + + def test_shift_nat(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + result = idx.shift(1) + expected = PeriodIndex( + ["2011-02", "2011-03", "NaT", "2011-05"], freq="M", name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + def test_shift_gh8083(self): + # test shift for PeriodIndex + # GH#8083 + drange = period_range("20130101", periods=5, freq="D") + result = drange.shift(1) + expected = PeriodIndex( + ["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"], + freq="D", + ) + tm.assert_index_equal(result, expected) + + def test_shift_periods(self): + # GH #22458 : argument 'n' was deprecated in favor of 'periods' + idx = period_range(freq="A", start="1/1/2001", end="12/1/2009") + tm.assert_index_equal(idx.shift(periods=0), idx) + tm.assert_index_equal(idx.shift(0), idx) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_to_timestamp.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_to_timestamp.py new file mode 100644 index 0000000..164ed3e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/methods/test_to_timestamp.py @@ -0,0 +1,132 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + NaT, + PeriodIndex, + Timedelta, + Timestamp, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestToTimestamp: + def test_to_timestamp_non_contiguous(self): + # GH#44100 + dti = date_range("2021-10-18", periods=9, freq="B") + pi = dti.to_period() + + result = pi[::2].to_timestamp() + expected = dti[::2] + tm.assert_index_equal(result, expected) + + result = pi._data[::2].to_timestamp() + expected = dti._data[::2] + # TODO: can we get the freq to round-trip? + tm.assert_datetime_array_equal(result, expected, check_freq=False) + + result = pi[::-1].to_timestamp() + expected = dti[::-1] + tm.assert_index_equal(result, expected) + + result = pi._data[::-1].to_timestamp() + expected = dti._data[::-1] + tm.assert_datetime_array_equal(result, expected, check_freq=False) + + result = pi[::2][::-1].to_timestamp() + expected = dti[::2][::-1] + tm.assert_index_equal(result, expected) + + result = pi._data[::2][::-1].to_timestamp() + expected = dti._data[::2][::-1] + tm.assert_datetime_array_equal(result, expected, check_freq=False) + + def test_to_timestamp_freq(self): + idx = period_range("2017", periods=12, freq="A-DEC") + result = idx.to_timestamp() + expected = date_range("2017", periods=12, freq="AS-JAN") + tm.assert_index_equal(result, expected) + + def test_to_timestamp_pi_nat(self): + # GH#7228 + index = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx") + + result = index.to_timestamp("D") + expected = DatetimeIndex( + [NaT, datetime(2011, 1, 1), datetime(2011, 2, 1)], name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.name == "idx" + + result2 = result.to_period(freq="M") + tm.assert_index_equal(result2, index) + assert result2.name == "idx" + + result3 = result.to_period(freq="3M") + exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx") + tm.assert_index_equal(result3, exp) + assert result3.freqstr == "3M" + + msg = "Frequency must be positive, because it represents span: -2A" + with pytest.raises(ValueError, match=msg): + result.to_period(freq="-2A") + + def test_to_timestamp_preserve_name(self): + index = period_range(freq="A", start="1/1/2001", end="12/1/2009", name="foo") + assert index.name == "foo" + + conv = index.to_timestamp("D") + assert conv.name == "foo" + + def test_to_timestamp_quarterly_bug(self): + years = np.arange(1960, 2000).repeat(4) + quarters = np.tile(list(range(1, 5)), 40) + + pindex = PeriodIndex(year=years, quarter=quarters) + + stamps = pindex.to_timestamp("D", "end") + expected = DatetimeIndex([x.to_timestamp("D", "end") for x in pindex]) + tm.assert_index_equal(stamps, expected) + assert stamps.freq == expected.freq + + def test_to_timestamp_pi_mult(self): + idx = PeriodIndex(["2011-01", "NaT", "2011-02"], freq="2M", name="idx") + + result = idx.to_timestamp() + expected = DatetimeIndex(["2011-01-01", "NaT", "2011-02-01"], name="idx") + tm.assert_index_equal(result, expected) + + result = idx.to_timestamp(how="E") + expected = DatetimeIndex(["2011-02-28", "NaT", "2011-03-31"], name="idx") + expected = expected + Timedelta(1, "D") - Timedelta(1, "ns") + tm.assert_index_equal(result, expected) + + def test_to_timestamp_pi_combined(self): + idx = period_range(start="2011", periods=2, freq="1D1H", name="idx") + + result = idx.to_timestamp() + expected = DatetimeIndex(["2011-01-01 00:00", "2011-01-02 01:00"], name="idx") + tm.assert_index_equal(result, expected) + + result = idx.to_timestamp(how="E") + expected = DatetimeIndex( + ["2011-01-02 00:59:59", "2011-01-03 01:59:59"], name="idx" + ) + expected = expected + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result, expected) + + result = idx.to_timestamp(how="E", freq="H") + expected = DatetimeIndex(["2011-01-02 00:00", "2011-01-03 01:00"], name="idx") + expected = expected + Timedelta(1, "h") - Timedelta(1, "ns") + tm.assert_index_equal(result, expected) + + def test_to_timestamp_1703(self): + index = period_range("1/1/2012", periods=4, freq="D") + + result = index.to_timestamp() + assert result[0] == Timestamp("1/1/2012") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_constructors.py new file mode 100644 index 0000000..e372fd0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_constructors.py @@ -0,0 +1,548 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs.period import IncompatibleFrequency + +from pandas.core.dtypes.dtypes import PeriodDtype + +from pandas import ( + Index, + NaT, + Period, + PeriodIndex, + Series, + date_range, + offsets, + period_range, +) +import pandas._testing as tm +from pandas.core.arrays import PeriodArray + + +class TestPeriodIndex: + def test_construction_base_constructor(self): + # GH 13664 + arr = [Period("2011-01", freq="M"), NaT, Period("2011-03", freq="M")] + tm.assert_index_equal(Index(arr), PeriodIndex(arr)) + tm.assert_index_equal(Index(np.array(arr)), PeriodIndex(np.array(arr))) + + arr = [np.nan, NaT, Period("2011-03", freq="M")] + tm.assert_index_equal(Index(arr), PeriodIndex(arr)) + tm.assert_index_equal(Index(np.array(arr)), PeriodIndex(np.array(arr))) + + arr = [Period("2011-01", freq="M"), NaT, Period("2011-03", freq="D")] + tm.assert_index_equal(Index(arr), Index(arr, dtype=object)) + + tm.assert_index_equal(Index(np.array(arr)), Index(np.array(arr), dtype=object)) + + def test_base_constructor_with_period_dtype(self): + dtype = PeriodDtype("D") + values = ["2011-01-01", "2012-03-04", "2014-05-01"] + result = Index(values, dtype=dtype) + + expected = PeriodIndex(values, dtype=dtype) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "values_constructor", [list, np.array, PeriodIndex, PeriodArray._from_sequence] + ) + def test_index_object_dtype(self, values_constructor): + # Index(periods, dtype=object) is an Index (not an PeriodIndex) + periods = [ + Period("2011-01", freq="M"), + NaT, + Period("2011-03", freq="M"), + ] + values = values_constructor(periods) + result = Index(values, dtype=object) + + assert type(result) is Index + tm.assert_numpy_array_equal(result.values, np.array(values)) + + def test_constructor_use_start_freq(self): + # GH #1118 + p = Period("4/2/2012", freq="B") + expected = period_range(start="4/2/2012", periods=10, freq="B") + + index = period_range(start=p, periods=10) + tm.assert_index_equal(index, expected) + + def test_constructor_field_arrays(self): + # GH #1264 + + years = np.arange(1990, 2010).repeat(4)[2:-2] + quarters = np.tile(np.arange(1, 5), 20)[2:-2] + + index = PeriodIndex(year=years, quarter=quarters, freq="Q-DEC") + expected = period_range("1990Q3", "2009Q2", freq="Q-DEC") + tm.assert_index_equal(index, expected) + + index2 = PeriodIndex(year=years, quarter=quarters, freq="2Q-DEC") + tm.assert_numpy_array_equal(index.asi8, index2.asi8) + + index = PeriodIndex(year=years, quarter=quarters) + tm.assert_index_equal(index, expected) + + years = [2007, 2007, 2007] + months = [1, 2] + + msg = "Mismatched Period array lengths" + with pytest.raises(ValueError, match=msg): + PeriodIndex(year=years, month=months, freq="M") + with pytest.raises(ValueError, match=msg): + PeriodIndex(year=years, month=months, freq="2M") + + years = [2007, 2007, 2007] + months = [1, 2, 3] + idx = PeriodIndex(year=years, month=months, freq="M") + exp = period_range("2007-01", periods=3, freq="M") + tm.assert_index_equal(idx, exp) + + def test_constructor_U(self): + # U was used as undefined period + with pytest.raises(ValueError, match="Invalid frequency: X"): + period_range("2007-1-1", periods=500, freq="X") + + def test_constructor_nano(self): + idx = period_range( + start=Period(ordinal=1, freq="N"), end=Period(ordinal=4, freq="N"), freq="N" + ) + exp = PeriodIndex( + [ + Period(ordinal=1, freq="N"), + Period(ordinal=2, freq="N"), + Period(ordinal=3, freq="N"), + Period(ordinal=4, freq="N"), + ], + freq="N", + ) + tm.assert_index_equal(idx, exp) + + def test_constructor_arrays_negative_year(self): + years = np.arange(1960, 2000, dtype=np.int64).repeat(4) + quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40) + + pindex = PeriodIndex(year=years, quarter=quarters) + + tm.assert_index_equal(pindex.year, Index(years)) + tm.assert_index_equal(pindex.quarter, Index(quarters)) + + def test_constructor_invalid_quarters(self): + msg = "Quarter must be 1 <= q <= 4" + with pytest.raises(ValueError, match=msg): + PeriodIndex(year=range(2000, 2004), quarter=list(range(4)), freq="Q-DEC") + + def test_constructor_corner(self): + result = period_range("2007-01", periods=10.5, freq="M") + exp = period_range("2007-01", periods=10, freq="M") + tm.assert_index_equal(result, exp) + + def test_constructor_fromarraylike(self): + idx = period_range("2007-01", periods=20, freq="M") + + # values is an array of Period, thus can retrieve freq + tm.assert_index_equal(PeriodIndex(idx.values), idx) + tm.assert_index_equal(PeriodIndex(list(idx.values)), idx) + + msg = "freq not specified and cannot be inferred" + with pytest.raises(ValueError, match=msg): + PeriodIndex(idx.asi8) + with pytest.raises(ValueError, match=msg): + PeriodIndex(list(idx.asi8)) + + msg = "'Period' object is not iterable" + with pytest.raises(TypeError, match=msg): + PeriodIndex(data=Period("2007", freq="A")) + + result = PeriodIndex(iter(idx)) + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx) + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx, freq="M") + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx, freq=offsets.MonthEnd()) + tm.assert_index_equal(result, idx) + assert result.freq == "M" + + result = PeriodIndex(idx, freq="2M") + tm.assert_index_equal(result, idx.asfreq("2M")) + assert result.freq == "2M" + + result = PeriodIndex(idx, freq=offsets.MonthEnd(2)) + tm.assert_index_equal(result, idx.asfreq("2M")) + assert result.freq == "2M" + + result = PeriodIndex(idx, freq="D") + exp = idx.asfreq("D", "e") + tm.assert_index_equal(result, exp) + + def test_constructor_datetime64arr(self): + vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64) + vals = vals.view(np.dtype("M8[us]")) + + msg = r"Wrong dtype: datetime64\[us\]" + with pytest.raises(ValueError, match=msg): + PeriodIndex(vals, freq="D") + + @pytest.mark.parametrize("box", [None, "series", "index"]) + def test_constructor_datetime64arr_ok(self, box): + # https://github.com/pandas-dev/pandas/issues/23438 + data = date_range("2017", periods=4, freq="M") + if box is None: + data = data._values + elif box == "series": + data = Series(data) + + result = PeriodIndex(data, freq="D") + expected = PeriodIndex( + ["2017-01-31", "2017-02-28", "2017-03-31", "2017-04-30"], freq="D" + ) + tm.assert_index_equal(result, expected) + + def test_constructor_dtype(self): + # passing a dtype with a tz should localize + idx = PeriodIndex(["2013-01", "2013-03"], dtype="period[M]") + exp = PeriodIndex(["2013-01", "2013-03"], freq="M") + tm.assert_index_equal(idx, exp) + assert idx.dtype == "period[M]" + + idx = PeriodIndex(["2013-01-05", "2013-03-05"], dtype="period[3D]") + exp = PeriodIndex(["2013-01-05", "2013-03-05"], freq="3D") + tm.assert_index_equal(idx, exp) + assert idx.dtype == "period[3D]" + + # if we already have a freq and its not the same, then asfreq + # (not changed) + idx = PeriodIndex(["2013-01-01", "2013-01-02"], freq="D") + + res = PeriodIndex(idx, dtype="period[M]") + exp = PeriodIndex(["2013-01", "2013-01"], freq="M") + tm.assert_index_equal(res, exp) + assert res.dtype == "period[M]" + + res = PeriodIndex(idx, freq="M") + tm.assert_index_equal(res, exp) + assert res.dtype == "period[M]" + + msg = "specified freq and dtype are different" + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex(["2011-01"], freq="M", dtype="period[D]") + + def test_constructor_empty(self): + idx = PeriodIndex([], freq="M") + assert isinstance(idx, PeriodIndex) + assert len(idx) == 0 + assert idx.freq == "M" + + with pytest.raises(ValueError, match="freq not specified"): + PeriodIndex([]) + + def test_constructor_pi_nat(self): + idx = PeriodIndex( + [Period("2011-01", freq="M"), NaT, Period("2011-01", freq="M")] + ) + exp = PeriodIndex(["2011-01", "NaT", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex( + np.array([Period("2011-01", freq="M"), NaT, Period("2011-01", freq="M")]) + ) + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex( + [NaT, NaT, Period("2011-01", freq="M"), Period("2011-01", freq="M")] + ) + exp = PeriodIndex(["NaT", "NaT", "2011-01", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex( + np.array( + [NaT, NaT, Period("2011-01", freq="M"), Period("2011-01", freq="M")] + ) + ) + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([NaT, NaT, "2011-01", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + with pytest.raises(ValueError, match="freq not specified"): + PeriodIndex([NaT, NaT]) + + with pytest.raises(ValueError, match="freq not specified"): + PeriodIndex(np.array([NaT, NaT])) + + with pytest.raises(ValueError, match="freq not specified"): + PeriodIndex(["NaT", "NaT"]) + + with pytest.raises(ValueError, match="freq not specified"): + PeriodIndex(np.array(["NaT", "NaT"])) + + def test_constructor_incompat_freq(self): + msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)" + + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex([Period("2011-01", freq="M"), NaT, Period("2011-01", freq="D")]) + + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex( + np.array( + [Period("2011-01", freq="M"), NaT, Period("2011-01", freq="D")] + ) + ) + + # first element is NaT + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex([NaT, Period("2011-01", freq="M"), Period("2011-01", freq="D")]) + + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex( + np.array( + [NaT, Period("2011-01", freq="M"), Period("2011-01", freq="D")] + ) + ) + + def test_constructor_mixed(self): + idx = PeriodIndex(["2011-01", NaT, Period("2011-01", freq="M")]) + exp = PeriodIndex(["2011-01", "NaT", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex(["NaT", NaT, Period("2011-01", freq="M")]) + exp = PeriodIndex(["NaT", "NaT", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([Period("2011-01-01", freq="D"), NaT, "2012-01-01"]) + exp = PeriodIndex(["2011-01-01", "NaT", "2012-01-01"], freq="D") + tm.assert_index_equal(idx, exp) + + def test_constructor_simple_new(self): + idx = period_range("2007-01", name="p", periods=2, freq="M") + + with pytest.raises(AssertionError, match=""): + idx._simple_new(idx, name="p") + + result = idx._simple_new(idx._data, name="p") + tm.assert_index_equal(result, idx) + + msg = "Should be numpy array of type i8" + with pytest.raises(AssertionError, match=msg): + # Need ndarray, not Int64Index + type(idx._data)._simple_new(Index(idx.asi8), freq=idx.freq) + + arr = type(idx._data)._simple_new(idx.asi8, freq=idx.freq) + result = idx._simple_new(arr, name="p") + tm.assert_index_equal(result, idx) + + def test_constructor_simple_new_empty(self): + # GH13079 + idx = PeriodIndex([], freq="M", name="p") + with pytest.raises(AssertionError, match=""): + idx._simple_new(idx, name="p") + + result = idx._simple_new(idx._data, name="p") + tm.assert_index_equal(result, idx) + + @pytest.mark.parametrize("floats", [[1.1, 2.1], np.array([1.1, 2.1])]) + def test_constructor_floats(self, floats): + with pytest.raises(AssertionError, match="= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for( axis 0 with)? size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + +class TestGetValue: + def test_get_value(self): + # GH 17717 + p0 = Period("2017-09-01") + p1 = Period("2017-09-02") + p2 = Period("2017-09-03") + + idx0 = PeriodIndex([p0, p1, p2]) + input0 = Series(np.array([1, 2, 3]), index=idx0) + expected0 = 2 + + with tm.assert_produces_warning(FutureWarning): + result0 = idx0.get_value(input0, p1) + assert result0 == expected0 + + idx1 = PeriodIndex([p1, p1, p2]) + input1 = Series(np.array([1, 2, 3]), index=idx1) + expected1 = input1.iloc[[0, 1]] + + with tm.assert_produces_warning(FutureWarning): + result1 = idx1.get_value(input1, p1) + tm.assert_series_equal(result1, expected1) + + idx2 = PeriodIndex([p1, p2, p1]) + input2 = Series(np.array([1, 2, 3]), index=idx2) + expected2 = input2.iloc[[0, 2]] + + with tm.assert_produces_warning(FutureWarning): + result2 = idx2.get_value(input2, p1) + tm.assert_series_equal(result2, expected2) + + @pytest.mark.parametrize("freq", ["H", "D"]) + def test_get_value_datetime_hourly(self, freq): + # get_loc and get_value should treat datetime objects symmetrically + dti = date_range("2016-01-01", periods=3, freq="MS") + pi = dti.to_period(freq) + ser = Series(range(7, 10), index=pi) + + ts = dti[0] + + assert pi.get_loc(ts) == 0 + with tm.assert_produces_warning(FutureWarning): + assert pi.get_value(ser, ts) == 7 + assert ser[ts] == 7 + assert ser.loc[ts] == 7 + + ts2 = ts + Timedelta(hours=3) + if freq == "H": + with pytest.raises(KeyError, match="2016-01-01 03:00"): + pi.get_loc(ts2) + with pytest.raises(KeyError, match="2016-01-01 03:00"): + with tm.assert_produces_warning(FutureWarning): + pi.get_value(ser, ts2) + with pytest.raises(KeyError, match="2016-01-01 03:00"): + ser[ts2] + with pytest.raises(KeyError, match="2016-01-01 03:00"): + ser.loc[ts2] + else: + assert pi.get_loc(ts2) == 0 + with tm.assert_produces_warning(FutureWarning): + assert pi.get_value(ser, ts2) == 7 + assert ser[ts2] == 7 + assert ser.loc[ts2] == 7 + + def test_get_value_integer(self): + msg = "index 16801 is out of bounds for axis 0 with size 3" + dti = date_range("2016-01-01", periods=3) + pi = dti.to_period("D") + ser = Series(range(3), index=pi) + with pytest.raises(IndexError, match=msg): + with tm.assert_produces_warning(FutureWarning): + pi.get_value(ser, 16801) + + msg = "index 46 is out of bounds for axis 0 with size 3" + pi2 = dti.to_period("Y") # duplicates, ordinals are all 46 + ser2 = Series(range(3), index=pi2) + with pytest.raises(IndexError, match=msg): + with tm.assert_produces_warning(FutureWarning): + pi2.get_value(ser2, 46) + + +class TestContains: + def test_contains(self): + # GH 17717 + p0 = Period("2017-09-01") + p1 = Period("2017-09-02") + p2 = Period("2017-09-03") + p3 = Period("2017-09-04") + + ps0 = [p0, p1, p2] + idx0 = PeriodIndex(ps0) + ser = Series(range(6, 9), index=idx0) + + for p in ps0: + assert p in idx0 + assert str(p) in idx0 + + # GH#31172 + # Higher-resolution period-like are _not_ considered as contained + key = "2017-09-01 00:00:01" + assert key not in idx0 + with pytest.raises(KeyError, match=key): + idx0.get_loc(key) + with pytest.raises(KeyError, match=key): + with tm.assert_produces_warning(FutureWarning): + idx0.get_value(ser, key) + + assert "2017-09" in idx0 + + assert p3 not in idx0 + + def test_contains_freq_mismatch(self): + rng = period_range("2007-01", freq="M", periods=10) + + assert Period("2007-01", freq="M") in rng + assert not Period("2007-01", freq="D") in rng + assert not Period("2007-01", freq="2M") in rng + + def test_contains_nat(self): + # see gh-13582 + idx = period_range("2007-01", freq="M", periods=10) + assert NaT not in idx + assert None not in idx + assert float("nan") not in idx + assert np.nan not in idx + + idx = PeriodIndex(["2011-01", "NaT", "2011-02"], freq="M") + assert NaT in idx + assert None in idx + assert float("nan") in idx + assert np.nan in idx + + +class TestAsOfLocs: + def test_asof_locs_mismatched_type(self): + dti = date_range("2016-01-01", periods=3) + pi = dti.to_period("D") + pi2 = dti.to_period("H") + + mask = np.array([0, 1, 0], dtype=bool) + + msg = "must be DatetimeIndex or PeriodIndex" + with pytest.raises(TypeError, match=msg): + pi.asof_locs(Int64Index(pi.asi8), mask) + + with pytest.raises(TypeError, match=msg): + pi.asof_locs(Float64Index(pi.asi8), mask) + + with pytest.raises(TypeError, match=msg): + # TimedeltaIndex + pi.asof_locs(dti - dti, mask) + + msg = "Input has different freq=H" + with pytest.raises(libperiod.IncompatibleFrequency, match=msg): + pi.asof_locs(pi2, mask) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_join.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_join.py new file mode 100644 index 0000000..27cba86 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_join.py @@ -0,0 +1,58 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import IncompatibleFrequency + +from pandas import ( + Index, + PeriodIndex, + period_range, +) +import pandas._testing as tm + + +class TestJoin: + def test_join_outer_indexer(self): + pi = period_range("1/1/2000", "1/20/2000", freq="D") + + result = pi._outer_indexer(pi) + tm.assert_extension_array_equal(result[0], pi._values) + tm.assert_numpy_array_equal(result[1], np.arange(len(pi), dtype=np.intp)) + tm.assert_numpy_array_equal(result[2], np.arange(len(pi), dtype=np.intp)) + + def test_joins(self, join_type): + index = period_range("1/1/2000", "1/20/2000", freq="D") + + joined = index.join(index[:-5], how=join_type) + + assert isinstance(joined, PeriodIndex) + assert joined.freq == index.freq + + def test_join_self(self, join_type): + index = period_range("1/1/2000", "1/20/2000", freq="D") + + res = index.join(index, how=join_type) + assert index is res + + def test_join_does_not_recur(self): + df = tm.makeCustomDataframe( + 3, + 2, + data_gen_f=lambda *args: np.random.randint(2), + c_idx_type="p", + r_idx_type="dt", + ) + ser = df.iloc[:2, 0] + + res = ser.index.join(df.columns, how="outer") + expected = Index( + [ser.index[0], ser.index[1], df.columns[0], df.columns[1]], object + ) + tm.assert_index_equal(res, expected) + + def test_join_mismatched_freq_raises(self): + index = period_range("1/1/2000", "1/20/2000", freq="D") + index3 = period_range("1/1/2000", "1/20/2000", freq="2D") + msg = r".*Input has different freq=2D from Period\(freq=D\)" + with pytest.raises(IncompatibleFrequency, match=msg): + index.join(index3) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_monotonic.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_monotonic.py new file mode 100644 index 0000000..15cb8f7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_monotonic.py @@ -0,0 +1,42 @@ +from pandas import ( + Period, + PeriodIndex, +) + + +def test_is_monotonic_increasing(): + # GH#17717 + p0 = Period("2017-09-01") + p1 = Period("2017-09-02") + p2 = Period("2017-09-03") + + idx_inc0 = PeriodIndex([p0, p1, p2]) + idx_inc1 = PeriodIndex([p0, p1, p1]) + idx_dec0 = PeriodIndex([p2, p1, p0]) + idx_dec1 = PeriodIndex([p2, p1, p1]) + idx = PeriodIndex([p1, p2, p0]) + + assert idx_inc0.is_monotonic_increasing is True + assert idx_inc1.is_monotonic_increasing is True + assert idx_dec0.is_monotonic_increasing is False + assert idx_dec1.is_monotonic_increasing is False + assert idx.is_monotonic_increasing is False + + +def test_is_monotonic_decreasing(): + # GH#17717 + p0 = Period("2017-09-01") + p1 = Period("2017-09-02") + p2 = Period("2017-09-03") + + idx_inc0 = PeriodIndex([p0, p1, p2]) + idx_inc1 = PeriodIndex([p0, p1, p1]) + idx_dec0 = PeriodIndex([p2, p1, p0]) + idx_dec1 = PeriodIndex([p2, p1, p1]) + idx = PeriodIndex([p1, p2, p0]) + + assert idx_inc0.is_monotonic_decreasing is False + assert idx_inc1.is_monotonic_decreasing is False + assert idx_dec0.is_monotonic_decreasing is True + assert idx_dec1.is_monotonic_decreasing is True + assert idx.is_monotonic_decreasing is False diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_partial_slicing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_partial_slicing.py new file mode 100644 index 0000000..72e7e45 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_partial_slicing.py @@ -0,0 +1,201 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + PeriodIndex, + Series, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestPeriodIndex: + def test_getitem_periodindex_duplicates_string_slice(self): + # monotonic + idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts["2007"] + expected = ts[1:3] + tm.assert_series_equal(result, expected) + result[:] = 1 + assert (ts[1:3] == 1).all() + + # not monotonic + idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN") + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts["2007"] + expected = ts[idx == "2007"] + tm.assert_series_equal(result, expected) + + def test_getitem_periodindex_quarter_string(self): + pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q") + ser = Series(np.random.rand(len(pi)), index=pi).cumsum() + # Todo: fix these accessors! + assert ser["05Q4"] == ser[2] + + def test_pindex_slice_index(self): + pi = period_range(start="1/1/10", end="12/31/12", freq="M") + s = Series(np.random.rand(len(pi)), index=pi) + res = s["2010"] + exp = s[0:12] + tm.assert_series_equal(res, exp) + res = s["2011"] + exp = s[12:24] + tm.assert_series_equal(res, exp) + + @pytest.mark.parametrize("make_range", [date_range, period_range]) + def test_range_slice_day(self, make_range): + # GH#6716 + idx = make_range(start="2013/01/01", freq="D", periods=400) + + msg = "slice indices must be integers or None or have an __index__ method" + # slices against index should raise IndexError + values = [ + "2014", + "2013/02", + "2013/01/02", + "2013/02/01 9H", + "2013/02/01 09:00", + ] + for v in values: + with pytest.raises(TypeError, match=msg): + idx[v:] + + s = Series(np.random.rand(len(idx)), index=idx) + + tm.assert_series_equal(s["2013/01/02":], s[1:]) + tm.assert_series_equal(s["2013/01/02":"2013/01/05"], s[1:5]) + tm.assert_series_equal(s["2013/02":], s[31:]) + tm.assert_series_equal(s["2014":], s[365:]) + + invalid = ["2013/02/01 9H", "2013/02/01 09:00"] + for v in invalid: + with pytest.raises(TypeError, match=msg): + idx[v:] + + @pytest.mark.parametrize("make_range", [date_range, period_range]) + def test_range_slice_seconds(self, make_range): + # GH#6716 + idx = make_range(start="2013/01/01 09:00:00", freq="S", periods=4000) + msg = "slice indices must be integers or None or have an __index__ method" + + # slices against index should raise IndexError + values = [ + "2014", + "2013/02", + "2013/01/02", + "2013/02/01 9H", + "2013/02/01 09:00", + ] + for v in values: + with pytest.raises(TypeError, match=msg): + idx[v:] + + s = Series(np.random.rand(len(idx)), index=idx) + + tm.assert_series_equal(s["2013/01/01 09:05":"2013/01/01 09:10"], s[300:660]) + tm.assert_series_equal(s["2013/01/01 10:00":"2013/01/01 10:05"], s[3600:3960]) + tm.assert_series_equal(s["2013/01/01 10H":], s[3600:]) + tm.assert_series_equal(s[:"2013/01/01 09:30"], s[:1860]) + for d in ["2013/01/01", "2013/01", "2013"]: + tm.assert_series_equal(s[d:], s) + + @pytest.mark.parametrize("make_range", [date_range, period_range]) + def test_range_slice_outofbounds(self, make_range): + # GH#5407 + idx = make_range(start="2013/10/01", freq="D", periods=10) + + df = DataFrame({"units": [100 + i for i in range(10)]}, index=idx) + empty = DataFrame(index=type(idx)([], freq="D"), columns=["units"]) + empty["units"] = empty["units"].astype("int64") + + tm.assert_frame_equal(df["2013/09/01":"2013/09/30"], empty) + tm.assert_frame_equal(df["2013/09/30":"2013/10/02"], df.iloc[:2]) + tm.assert_frame_equal(df["2013/10/01":"2013/10/02"], df.iloc[:2]) + tm.assert_frame_equal(df["2013/10/02":"2013/09/30"], empty) + tm.assert_frame_equal(df["2013/10/15":"2013/10/17"], empty) + tm.assert_frame_equal(df["2013-06":"2013-09"], empty) + tm.assert_frame_equal(df["2013-11":"2013-12"], empty) + + @pytest.mark.parametrize("make_range", [date_range, period_range]) + def test_maybe_cast_slice_bound(self, make_range, frame_or_series): + idx = make_range(start="2013/10/01", freq="D", periods=10) + + obj = DataFrame({"units": [100 + i for i in range(10)]}, index=idx) + obj = tm.get_obj(obj, frame_or_series) + + msg = ( + f"cannot do slice indexing on {type(idx).__name__} with " + r"these indexers \[foo\] of type str" + ) + + # Check the lower-level calls are raising where expected. + with pytest.raises(TypeError, match=msg): + idx._maybe_cast_slice_bound("foo", "left") + with pytest.raises(TypeError, match=msg): + idx.get_slice_bound("foo", "left") + + with pytest.raises(TypeError, match=msg): + obj["2013/09/30":"foo"] + with pytest.raises(TypeError, match=msg): + obj["foo":"2013/09/30"] + with pytest.raises(TypeError, match=msg): + obj.loc["2013/09/30":"foo"] + with pytest.raises(TypeError, match=msg): + obj.loc["foo":"2013/09/30"] + + def test_partial_slice_doesnt_require_monotonicity(self): + # See also: DatetimeIndex test ofm the same name + dti = date_range("2014-01-01", periods=30, freq="30D") + pi = dti.to_period("D") + + ser_montonic = Series(np.arange(30), index=pi) + + shuffler = list(range(0, 30, 2)) + list(range(1, 31, 2)) + ser = ser_montonic[shuffler] + nidx = ser.index + + # Manually identified locations of year==2014 + indexer_2014 = np.array( + [0, 1, 2, 3, 4, 5, 6, 15, 16, 17, 18, 19, 20], dtype=np.intp + ) + assert (nidx[indexer_2014].year == 2014).all() + assert not (nidx[~indexer_2014].year == 2014).any() + + result = nidx.get_loc("2014") + tm.assert_numpy_array_equal(result, indexer_2014) + + expected = ser[indexer_2014] + + with tm.assert_produces_warning(FutureWarning): + result = nidx.get_value(ser, "2014") + tm.assert_series_equal(result, expected) + + result = ser.loc["2014"] + tm.assert_series_equal(result, expected) + + result = ser["2014"] + tm.assert_series_equal(result, expected) + + # Manually identified locations where ser.index is within Mat 2015 + indexer_may2015 = np.array([23], dtype=np.intp) + assert nidx[23].year == 2015 and nidx[23].month == 5 + + result = nidx.get_loc("May 2015") + tm.assert_numpy_array_equal(result, indexer_may2015) + + expected = ser[indexer_may2015] + + with tm.assert_produces_warning(FutureWarning): + result = nidx.get_value(ser, "May 2015") + tm.assert_series_equal(result, expected) + + result = ser.loc["May 2015"] + tm.assert_series_equal(result, expected) + + result = ser["May 2015"] + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_period.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_period.py new file mode 100644 index 0000000..e5c85ed --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_period.py @@ -0,0 +1,353 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs.period import IncompatibleFrequency + +from pandas import ( + Index, + NaT, + Period, + PeriodIndex, + Series, + date_range, + offsets, + period_range, +) +import pandas._testing as tm +from pandas.tests.indexes.datetimelike import DatetimeLike + + +class TestPeriodIndex(DatetimeLike): + _index_cls = PeriodIndex + + @pytest.fixture + def simple_index(self) -> Index: + return period_range("20130101", periods=5, freq="D") + + @pytest.fixture( + params=[ + tm.makePeriodIndex(10), + period_range("20130101", periods=10, freq="D")[::-1], + ], + ids=["index_inc", "index_dec"], + ) + def index(self, request): + return request.param + + def test_where(self): + # This is handled in test_indexing + pass + + def test_make_time_series(self): + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + series = Series(1, index=index) + assert isinstance(series, Series) + + def test_view_asi8(self): + idx = PeriodIndex([], freq="M") + + exp = np.array([], dtype=np.int64) + tm.assert_numpy_array_equal(idx.view("i8"), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + idx = PeriodIndex(["2011-01", NaT], freq="M") + + exp = np.array([492, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx.view("i8"), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + exp = np.array([14975, -9223372036854775808], dtype=np.int64) + idx = PeriodIndex(["2011-01-01", NaT], freq="D") + tm.assert_numpy_array_equal(idx.view("i8"), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + def test_values(self): + idx = PeriodIndex([], freq="M") + + exp = np.array([], dtype=object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.to_numpy(), exp) + + exp = np.array([], dtype=np.int64) + tm.assert_numpy_array_equal(idx.asi8, exp) + + idx = PeriodIndex(["2011-01", NaT], freq="M") + + exp = np.array([Period("2011-01", freq="M"), NaT], dtype=object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.to_numpy(), exp) + exp = np.array([492, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx.asi8, exp) + + idx = PeriodIndex(["2011-01-01", NaT], freq="D") + + exp = np.array([Period("2011-01-01", freq="D"), NaT], dtype=object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.to_numpy(), exp) + exp = np.array([14975, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx.asi8, exp) + + def test_period_index_length(self): + pi = period_range(freq="A", start="1/1/2001", end="12/1/2009") + assert len(pi) == 9 + + pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009") + assert len(pi) == 4 * 9 + + pi = period_range(freq="M", start="1/1/2001", end="12/1/2009") + assert len(pi) == 12 * 9 + + start = Period("02-Apr-2005", "B") + i1 = period_range(start=start, periods=20) + assert len(i1) == 20 + assert i1.freq == start.freq + assert i1[0] == start + + end_intv = Period("2006-12-31", "W") + i1 = period_range(end=end_intv, periods=10) + assert len(i1) == 10 + assert i1.freq == end_intv.freq + assert i1[-1] == end_intv + + end_intv = Period("2006-12-31", "1w") + i2 = period_range(end=end_intv, periods=10) + assert len(i1) == len(i2) + assert (i1 == i2).all() + assert i1.freq == i2.freq + + msg = "start and end must have same freq" + with pytest.raises(ValueError, match=msg): + period_range(start=start, end=end_intv) + + end_intv = Period("2005-05-01", "B") + i1 = period_range(start=start, end=end_intv) + + msg = ( + "Of the three parameters: start, end, and periods, exactly two " + "must be specified" + ) + with pytest.raises(ValueError, match=msg): + period_range(start=start) + + # infer freq from first element + i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")]) + assert len(i2) == 2 + assert i2[0] == end_intv + + i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")])) + assert len(i2) == 2 + assert i2[0] == end_intv + + # Mixed freq should fail + vals = [end_intv, Period("2006-12-31", "w")] + msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)" + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex(vals) + vals = np.array(vals) + with pytest.raises(ValueError, match=msg): + PeriodIndex(vals) + + def test_fields(self): + # year, month, day, hour, minute + # second, weekofyear, week, dayofweek, weekday, dayofyear, quarter + # qyear + pi = period_range(freq="A", start="1/1/2001", end="12/1/2005") + self._check_all_fields(pi) + + pi = period_range(freq="Q", start="1/1/2001", end="12/1/2002") + self._check_all_fields(pi) + + pi = period_range(freq="M", start="1/1/2001", end="1/1/2002") + self._check_all_fields(pi) + + pi = period_range(freq="D", start="12/1/2001", end="6/1/2001") + self._check_all_fields(pi) + + pi = period_range(freq="B", start="12/1/2001", end="6/1/2001") + self._check_all_fields(pi) + + pi = period_range(freq="H", start="12/31/2001", end="1/1/2002 23:00") + self._check_all_fields(pi) + + pi = period_range(freq="Min", start="12/31/2001", end="1/1/2002 00:20") + self._check_all_fields(pi) + + pi = period_range( + freq="S", start="12/31/2001 00:00:00", end="12/31/2001 00:05:00" + ) + self._check_all_fields(pi) + + end_intv = Period("2006-12-31", "W") + i1 = period_range(end=end_intv, periods=10) + self._check_all_fields(i1) + + def _check_all_fields(self, periodindex): + fields = [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "weekofyear", + "week", + "dayofweek", + "day_of_week", + "dayofyear", + "day_of_year", + "quarter", + "qyear", + "days_in_month", + ] + + periods = list(periodindex) + ser = Series(periodindex) + + for field in fields: + field_idx = getattr(periodindex, field) + assert len(periodindex) == len(field_idx) + for x, val in zip(periods, field_idx): + assert getattr(x, field) == val + + if len(ser) == 0: + continue + + field_s = getattr(ser.dt, field) + assert len(periodindex) == len(field_s) + for x, val in zip(periods, field_s): + assert getattr(x, field) == val + + def test_is_(self): + create_index = lambda: period_range(freq="A", start="1/1/2001", end="12/1/2009") + index = create_index() + assert index.is_(index) + assert not index.is_(create_index()) + assert index.is_(index.view()) + assert index.is_(index.view().view().view().view().view()) + assert index.view().is_(index) + ind2 = index.view() + index.name = "Apple" + assert ind2.is_(index) + assert not index.is_(index[:]) + assert not index.is_(index.asfreq("M")) + assert not index.is_(index.asfreq("A")) + + assert not index.is_(index - 2) + assert not index.is_(index - 0) + + def test_index_unique(self): + idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") + expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN") + tm.assert_index_equal(idx.unique(), expected) + assert idx.nunique() == 3 + + def test_shift(self): + # This is tested in test_arithmetic + pass + + def test_negative_ordinals(self): + Period(ordinal=-1000, freq="A") + Period(ordinal=0, freq="A") + + idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq="A") + idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq="A") + tm.assert_index_equal(idx1, idx2) + + def test_pindex_fieldaccessor_nat(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2012-03", "2012-04"], freq="D", name="name" + ) + + exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name="name") + tm.assert_index_equal(idx.year, exp) + exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name="name") + tm.assert_index_equal(idx.month, exp) + + def test_pindex_multiples(self): + expected = PeriodIndex( + ["2011-01", "2011-03", "2011-05", "2011-07", "2011-09", "2011-11"], + freq="2M", + ) + + pi = period_range(start="1/1/11", end="12/31/11", freq="2M") + tm.assert_index_equal(pi, expected) + assert pi.freq == offsets.MonthEnd(2) + assert pi.freqstr == "2M" + + pi = period_range(start="1/1/11", periods=6, freq="2M") + tm.assert_index_equal(pi, expected) + assert pi.freq == offsets.MonthEnd(2) + assert pi.freqstr == "2M" + + def test_iteration(self): + index = period_range(start="1/1/10", periods=4, freq="B") + + result = list(index) + assert isinstance(result[0], Period) + assert result[0].freq == index.freq + + def test_with_multi_index(self): + # #1705 + index = date_range("1/1/2012", periods=4, freq="12H") + index_as_arrays = [index.to_period(freq="D"), index.hour] + + s = Series([0, 1, 2, 3], index_as_arrays) + + assert isinstance(s.index.levels[0], PeriodIndex) + + assert isinstance(s.index.values[0][0], Period) + + def test_map(self): + # test_map_dictlike generally tests + + index = PeriodIndex([2005, 2007, 2009], freq="A") + result = index.map(lambda x: x.ordinal) + exp = Index([x.ordinal for x in index]) + tm.assert_index_equal(result, exp) + + def test_format_empty(self): + # GH35712 + empty_idx = self._index_cls([], freq="A") + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] + + +def test_maybe_convert_timedelta(): + pi = PeriodIndex(["2000", "2001"], freq="D") + offset = offsets.Day(2) + assert pi._maybe_convert_timedelta(offset) == 2 + assert pi._maybe_convert_timedelta(2) == 2 + + offset = offsets.BusinessDay() + msg = r"Input has different freq=B from PeriodIndex\(freq=D\)" + with pytest.raises(ValueError, match=msg): + pi._maybe_convert_timedelta(offset) + + +@pytest.mark.parametrize("array", [True, False]) +def test_dunder_array(array): + obj = PeriodIndex(["2000-01-01", "2001-01-01"], freq="D") + if array: + obj = obj._data + + expected = np.array([obj[0], obj[1]], dtype=object) + result = np.array(obj) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(obj) + tm.assert_numpy_array_equal(result, expected) + + expected = obj.asi8 + for dtype in ["i8", "int64", np.int64]: + result = np.array(obj, dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(obj, dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + for dtype in ["float64", "int32", "uint64"]: + msg = "argument must be" + with pytest.raises(TypeError, match=msg): + np.array(obj, dtype=dtype) + with pytest.raises(TypeError, match=msg): + np.array(obj, dtype=getattr(np, dtype)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_period_range.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_period_range.py new file mode 100644 index 0000000..c94ddf5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_period_range.py @@ -0,0 +1,121 @@ +import numpy as np +import pytest + +from pandas import ( + NaT, + Period, + PeriodIndex, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestPeriodRange: + def test_required_arguments(self): + msg = ( + "Of the three parameters: start, end, and periods, exactly two " + "must be specified" + ) + with pytest.raises(ValueError, match=msg): + period_range("2011-1-1", "2012-1-1", "B") + + @pytest.mark.parametrize("freq", ["D", "W", "M", "Q", "A"]) + def test_construction_from_string(self, freq): + # non-empty + expected = date_range( + start="2017-01-01", periods=5, freq=freq, name="foo" + ).to_period() + start, end = str(expected[0]), str(expected[-1]) + + result = period_range(start=start, end=end, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(start=start, periods=5, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=5, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + # empty + expected = PeriodIndex([], freq=freq, name="foo") + + result = period_range(start=start, periods=0, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + def test_construction_from_period(self): + # upsampling + start, end = Period("2017Q1", freq="Q"), Period("2018Q1", freq="Q") + expected = date_range( + start="2017-03-31", end="2018-03-31", freq="M", name="foo" + ).to_period() + result = period_range(start=start, end=end, freq="M", name="foo") + tm.assert_index_equal(result, expected) + + # downsampling + start, end = Period("2017-1", freq="M"), Period("2019-12", freq="M") + expected = date_range( + start="2017-01-31", end="2019-12-31", freq="Q", name="foo" + ).to_period() + result = period_range(start=start, end=end, freq="Q", name="foo") + tm.assert_index_equal(result, expected) + + # test for issue # 21793 + start, end = Period("2017Q1", freq="Q"), Period("2018Q1", freq="Q") + idx = period_range(start=start, end=end, freq="Q", name="foo") + result = idx == idx.values + expected = np.array([True, True, True, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # empty + expected = PeriodIndex([], freq="W", name="foo") + + result = period_range(start=start, periods=0, freq="W", name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq="W", name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq="W", name="foo") + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ( + "Of the three parameters: start, end, and periods, " + "exactly two must be specified" + ) + with pytest.raises(ValueError, match=msg): + period_range(start="2017Q1") + + with pytest.raises(ValueError, match=msg): + period_range(end="2017Q1") + + with pytest.raises(ValueError, match=msg): + period_range(periods=5) + + with pytest.raises(ValueError, match=msg): + period_range() + + # too many params + with pytest.raises(ValueError, match=msg): + period_range(start="2017Q1", end="2018Q1", periods=8, freq="Q") + + # start/end NaT + msg = "start and end must not be NaT" + with pytest.raises(ValueError, match=msg): + period_range(start=NaT, end="2018Q1") + + with pytest.raises(ValueError, match=msg): + period_range(start="2017Q1", end=NaT) + + # invalid periods param + msg = "periods must be a number, got foo" + with pytest.raises(TypeError, match=msg): + period_range(start="2017Q1", periods="foo") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_pickle.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_pickle.py new file mode 100644 index 0000000..82f906d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_pickle.py @@ -0,0 +1,26 @@ +import numpy as np +import pytest + +from pandas import ( + NaT, + PeriodIndex, + period_range, +) +import pandas._testing as tm + +from pandas.tseries import offsets + + +class TestPickle: + @pytest.mark.parametrize("freq", ["D", "M", "A"]) + def test_pickle_round_trip(self, freq): + idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq=freq) + result = tm.round_trip_pickle(idx) + tm.assert_index_equal(result, idx) + + def test_pickle_freq(self): + # GH#2891 + prng = period_range("1/1/2011", "1/1/2012", freq="M") + new_prng = tm.round_trip_pickle(prng) + assert new_prng.freq == offsets.MonthEnd() + assert new_prng.freqstr == "M" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_resolution.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_resolution.py new file mode 100644 index 0000000..7ecbde7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_resolution.py @@ -0,0 +1,23 @@ +import pytest + +import pandas as pd + + +class TestResolution: + @pytest.mark.parametrize( + "freq,expected", + [ + ("A", "year"), + ("Q", "quarter"), + ("M", "month"), + ("D", "day"), + ("H", "hour"), + ("T", "minute"), + ("S", "second"), + ("L", "millisecond"), + ("U", "microsecond"), + ], + ) + def test_resolution(self, freq, expected): + idx = pd.period_range(start="2013-04-01", periods=30, freq=freq) + assert idx.resolution == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_scalar_compat.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_scalar_compat.py new file mode 100644 index 0000000..a42b849 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_scalar_compat.py @@ -0,0 +1,32 @@ +"""Tests for PeriodIndex behaving like a vectorized Period scalar""" + +from pandas import ( + Timedelta, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestPeriodIndexOps: + def test_start_time(self): + # GH#17157 + index = period_range(freq="M", start="2016-01-01", end="2016-05-31") + expected_index = date_range("2016-01-01", end="2016-05-31", freq="MS") + tm.assert_index_equal(index.start_time, expected_index) + + def test_end_time(self): + # GH#17157 + index = period_range(freq="M", start="2016-01-01", end="2016-05-31") + expected_index = date_range("2016-01-01", end="2016-05-31", freq="M") + expected_index += Timedelta(1, "D") - Timedelta(1, "ns") + tm.assert_index_equal(index.end_time, expected_index) + + def test_end_time_business_friday(self): + # GH#34449 + pi = period_range("1990-01-05", freq="B", periods=1) + result = pi.end_time + + dti = date_range("1990-01-05", freq="D", periods=1)._with_freq(None) + expected = dti + Timedelta(days=1, nanoseconds=-1) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_searchsorted.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_searchsorted.py new file mode 100644 index 0000000..b9863d1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_searchsorted.py @@ -0,0 +1,80 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import IncompatibleFrequency + +from pandas import ( + NaT, + Period, + PeriodIndex, +) +import pandas._testing as tm + + +class TestSearchsorted: + @pytest.mark.parametrize("freq", ["D", "2D"]) + def test_searchsorted(self, freq): + pidx = PeriodIndex( + ["2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05"], + freq=freq, + ) + + p1 = Period("2014-01-01", freq=freq) + assert pidx.searchsorted(p1) == 0 + + p2 = Period("2014-01-04", freq=freq) + assert pidx.searchsorted(p2) == 3 + + assert pidx.searchsorted(NaT) == 5 + + msg = "Input has different freq=H from PeriodArray" + with pytest.raises(IncompatibleFrequency, match=msg): + pidx.searchsorted(Period("2014-01-01", freq="H")) + + msg = "Input has different freq=5D from PeriodArray" + with pytest.raises(IncompatibleFrequency, match=msg): + pidx.searchsorted(Period("2014-01-01", freq="5D")) + + def test_searchsorted_different_argument_classes(self, listlike_box): + pidx = PeriodIndex( + ["2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05"], + freq="D", + ) + result = pidx.searchsorted(listlike_box(pidx)) + expected = np.arange(len(pidx), dtype=result.dtype) + tm.assert_numpy_array_equal(result, expected) + + result = pidx._data.searchsorted(listlike_box(pidx)) + tm.assert_numpy_array_equal(result, expected) + + def test_searchsorted_invalid(self): + pidx = PeriodIndex( + ["2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05"], + freq="D", + ) + + other = np.array([0, 1], dtype=np.int64) + + msg = "|".join( + [ + "searchsorted requires compatible dtype or scalar", + "value should be a 'Period', 'NaT', or array of those. Got", + ] + ) + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(other) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(other.astype("timedelta64[ns]")) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.timedelta64(4)) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.timedelta64("NaT", "ms")) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.datetime64(4, "ns")) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.datetime64("NaT", "ns")) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_setops.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_setops.py new file mode 100644 index 0000000..bac231e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_setops.py @@ -0,0 +1,360 @@ +import numpy as np + +import pandas as pd +from pandas import ( + PeriodIndex, + date_range, + period_range, +) +import pandas._testing as tm + + +def _permute(obj): + return obj.take(np.random.permutation(len(obj))) + + +class TestPeriodIndex: + def test_union(self, sort): + # union + other1 = period_range("1/1/2000", freq="D", periods=5) + rng1 = period_range("1/6/2000", freq="D", periods=5) + expected1 = PeriodIndex( + [ + "2000-01-06", + "2000-01-07", + "2000-01-08", + "2000-01-09", + "2000-01-10", + "2000-01-01", + "2000-01-02", + "2000-01-03", + "2000-01-04", + "2000-01-05", + ], + freq="D", + ) + + rng2 = period_range("1/1/2000", freq="D", periods=5) + other2 = period_range("1/4/2000", freq="D", periods=5) + expected2 = period_range("1/1/2000", freq="D", periods=8) + + rng3 = period_range("1/1/2000", freq="D", periods=5) + other3 = PeriodIndex([], freq="D") + expected3 = period_range("1/1/2000", freq="D", periods=5) + + rng4 = period_range("2000-01-01 09:00", freq="H", periods=5) + other4 = period_range("2000-01-02 09:00", freq="H", periods=5) + expected4 = PeriodIndex( + [ + "2000-01-01 09:00", + "2000-01-01 10:00", + "2000-01-01 11:00", + "2000-01-01 12:00", + "2000-01-01 13:00", + "2000-01-02 09:00", + "2000-01-02 10:00", + "2000-01-02 11:00", + "2000-01-02 12:00", + "2000-01-02 13:00", + ], + freq="H", + ) + + rng5 = PeriodIndex( + ["2000-01-01 09:01", "2000-01-01 09:03", "2000-01-01 09:05"], freq="T" + ) + other5 = PeriodIndex( + ["2000-01-01 09:01", "2000-01-01 09:05", "2000-01-01 09:08"], freq="T" + ) + expected5 = PeriodIndex( + [ + "2000-01-01 09:01", + "2000-01-01 09:03", + "2000-01-01 09:05", + "2000-01-01 09:08", + ], + freq="T", + ) + + rng6 = period_range("2000-01-01", freq="M", periods=7) + other6 = period_range("2000-04-01", freq="M", periods=7) + expected6 = period_range("2000-01-01", freq="M", periods=10) + + rng7 = period_range("2003-01-01", freq="A", periods=5) + other7 = period_range("1998-01-01", freq="A", periods=8) + expected7 = PeriodIndex( + [ + "2003", + "2004", + "2005", + "2006", + "2007", + "1998", + "1999", + "2000", + "2001", + "2002", + ], + freq="A", + ) + + rng8 = PeriodIndex( + ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"], freq="D" + ) + other8 = period_range("1/6/2000", freq="D", periods=5) + expected8 = PeriodIndex( + [ + "1/3/2000", + "1/2/2000", + "1/1/2000", + "1/5/2000", + "1/4/2000", + "1/6/2000", + "1/7/2000", + "1/8/2000", + "1/9/2000", + "1/10/2000", + ], + freq="D", + ) + + for rng, other, expected in [ + (rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3), + (rng4, other4, expected4), + (rng5, other5, expected5), + (rng6, other6, expected6), + (rng7, other7, expected7), + (rng8, other8, expected8), + ]: + + result_union = rng.union(other, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result_union, expected) + + def test_union_misc(self, sort): + index = period_range("1/1/2000", "1/20/2000", freq="D") + + result = index[:-5].union(index[10:], sort=sort) + tm.assert_index_equal(result, index) + + # not in order + result = _permute(index[:-5]).union(_permute(index[10:]), sort=sort) + if sort is None: + tm.assert_index_equal(result, index) + assert tm.equalContents(result, index) + + # cast if different frequencies + index = period_range("1/1/2000", "1/20/2000", freq="D") + index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED") + result = index.union(index2, sort=sort) + expected = index.astype(object).union(index2.astype(object), sort=sort) + tm.assert_index_equal(result, expected) + + def test_intersection(self, sort): + index = period_range("1/1/2000", "1/20/2000", freq="D") + + result = index[:-5].intersection(index[10:], sort=sort) + tm.assert_index_equal(result, index[10:-5]) + + # not in order + left = _permute(index[:-5]) + right = _permute(index[10:]) + result = left.intersection(right, sort=sort) + if sort is None: + tm.assert_index_equal(result, index[10:-5]) + assert tm.equalContents(result, index[10:-5]) + + # cast if different frequencies + index = period_range("1/1/2000", "1/20/2000", freq="D") + index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED") + + result = index.intersection(index2, sort=sort) + expected = pd.Index([], dtype=object) + tm.assert_index_equal(result, expected) + + index3 = period_range("1/1/2000", "1/20/2000", freq="2D") + result = index.intersection(index3, sort=sort) + tm.assert_index_equal(result, expected) + + def test_intersection_cases(self, sort): + base = period_range("6/1/2000", "6/30/2000", freq="D", name="idx") + + # if target has the same name, it is preserved + rng2 = period_range("5/15/2000", "6/20/2000", freq="D", name="idx") + expected2 = period_range("6/1/2000", "6/20/2000", freq="D", name="idx") + + # if target name is different, it will be reset + rng3 = period_range("5/15/2000", "6/20/2000", freq="D", name="other") + expected3 = period_range("6/1/2000", "6/20/2000", freq="D", name=None) + + rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx") + expected4 = PeriodIndex([], name="idx", freq="D") + + for (rng, expected) in [ + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + result = base.intersection(rng, sort=sort) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + # non-monotonic + base = PeriodIndex( + ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], + freq="D", + name="idx", + ) + + rng2 = PeriodIndex( + ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], + freq="D", + name="idx", + ) + expected2 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name="idx") + + rng3 = PeriodIndex( + ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], + freq="D", + name="other", + ) + expected3 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name=None) + + rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx") + expected4 = PeriodIndex([], freq="D", name="idx") + + for (rng, expected) in [ + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == "D" + + # empty same freq + rng = date_range("6/1/2000", "6/15/2000", freq="T") + result = rng[0:0].intersection(rng) + assert len(result) == 0 + + result = rng.intersection(rng[0:0]) + assert len(result) == 0 + + def test_difference(self, sort): + # diff + period_rng = ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"] + rng1 = PeriodIndex(period_rng, freq="D") + other1 = period_range("1/6/2000", freq="D", periods=5) + expected1 = rng1 + + rng2 = PeriodIndex(period_rng, freq="D") + other2 = period_range("1/4/2000", freq="D", periods=5) + expected2 = PeriodIndex(["1/3/2000", "1/2/2000", "1/1/2000"], freq="D") + + rng3 = PeriodIndex(period_rng, freq="D") + other3 = PeriodIndex([], freq="D") + expected3 = rng3 + + period_rng = [ + "2000-01-01 10:00", + "2000-01-01 09:00", + "2000-01-01 12:00", + "2000-01-01 11:00", + "2000-01-01 13:00", + ] + rng4 = PeriodIndex(period_rng, freq="H") + other4 = period_range("2000-01-02 09:00", freq="H", periods=5) + expected4 = rng4 + + rng5 = PeriodIndex( + ["2000-01-01 09:03", "2000-01-01 09:01", "2000-01-01 09:05"], freq="T" + ) + other5 = PeriodIndex(["2000-01-01 09:01", "2000-01-01 09:05"], freq="T") + expected5 = PeriodIndex(["2000-01-01 09:03"], freq="T") + + period_rng = [ + "2000-02-01", + "2000-01-01", + "2000-06-01", + "2000-07-01", + "2000-05-01", + "2000-03-01", + "2000-04-01", + ] + rng6 = PeriodIndex(period_rng, freq="M") + other6 = period_range("2000-04-01", freq="M", periods=7) + expected6 = PeriodIndex(["2000-02-01", "2000-01-01", "2000-03-01"], freq="M") + + period_rng = ["2003", "2007", "2006", "2005", "2004"] + rng7 = PeriodIndex(period_rng, freq="A") + other7 = period_range("1998-01-01", freq="A", periods=8) + expected7 = PeriodIndex(["2007", "2006"], freq="A") + + for rng, other, expected in [ + (rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3), + (rng4, other4, expected4), + (rng5, other5, expected5), + (rng6, other6, expected6), + (rng7, other7, expected7), + ]: + result_difference = rng.difference(other, sort=sort) + if sort is None and len(other): + # We dont sort (yet?) when empty GH#24959 + expected = expected.sort_values() + tm.assert_index_equal(result_difference, expected) + + def test_difference_freq(self, sort): + # GH14323: difference of Period MUST preserve frequency + # but the ability to union results must be preserved + + index = period_range("20160920", "20160925", freq="D") + + other = period_range("20160921", "20160924", freq="D") + expected = PeriodIndex(["20160920", "20160925"], freq="D") + idx_diff = index.difference(other, sort) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + other = period_range("20160922", "20160925", freq="D") + idx_diff = index.difference(other, sort) + expected = PeriodIndex(["20160920", "20160921"], freq="D") + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + def test_intersection_equal_duplicates(self): + # GH#38302 + idx = period_range("2011-01-01", periods=2) + idx_dup = idx.append(idx) + result = idx_dup.intersection(idx_dup) + tm.assert_index_equal(result, idx) + + def test_union_duplicates(self): + # GH#36289 + idx = period_range("2011-01-01", periods=2) + idx_dup = idx.append(idx) + + idx2 = period_range("2011-01-02", periods=2) + idx2_dup = idx2.append(idx2) + result = idx_dup.union(idx2_dup) + + expected = PeriodIndex( + [ + "2011-01-01", + "2011-01-01", + "2011-01-02", + "2011-01-02", + "2011-01-03", + "2011-01-03", + ], + freq="D", + ) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_tools.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_tools.py new file mode 100644 index 0000000..82a3721 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/period/test_tools.py @@ -0,0 +1,42 @@ +import numpy as np +import pytest + +from pandas import ( + Period, + PeriodIndex, + period_range, +) +import pandas._testing as tm + + +class TestPeriodRepresentation: + """ + Wish to match NumPy units + """ + + def _check_freq(self, freq, base_date): + rng = period_range(start=base_date, periods=10, freq=freq) + exp = np.arange(10, dtype=np.int64) + + tm.assert_numpy_array_equal(rng.asi8, exp) + + def test_annual(self): + self._check_freq("A", 1970) + + def test_monthly(self): + self._check_freq("M", "1970-01") + + @pytest.mark.parametrize("freq", ["W-THU", "D", "B", "H", "T", "S", "L", "U", "N"]) + def test_freq(self, freq): + self._check_freq(freq, "1970-01-01") + + +class TestPeriodIndexConversion: + def test_tolist(self): + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + rs = index.tolist() + for x in rs: + assert isinstance(x, Period) + + recon = PeriodIndex(rs) + tm.assert_index_equal(index, recon) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..f31d88e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..fa1baa0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..ffb7b20 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_join.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_join.cpython-38.pyc new file mode 100644 index 0000000..e0b7e05 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_join.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_range.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_range.cpython-38.pyc new file mode 100644 index 0000000..26f222b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_range.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_setops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_setops.cpython-38.pyc new file mode 100644 index 0000000..ec05b1c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/__pycache__/test_setops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_constructors.py new file mode 100644 index 0000000..c4f2622 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_constructors.py @@ -0,0 +1,167 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + Index, + RangeIndex, + Series, +) +import pandas._testing as tm + + +class TestRangeIndexConstructors: + @pytest.mark.parametrize("name", [None, "foo"]) + @pytest.mark.parametrize( + "args, kwargs, start, stop, step", + [ + ((5,), {}, 0, 5, 1), + ((1, 5), {}, 1, 5, 1), + ((1, 5, 2), {}, 1, 5, 2), + ((0,), {}, 0, 0, 1), + ((0, 0), {}, 0, 0, 1), + ((), {"start": 0}, 0, 0, 1), + ((), {"stop": 0}, 0, 0, 1), + ], + ) + def test_constructor(self, args, kwargs, start, stop, step, name): + result = RangeIndex(*args, name=name, **kwargs) + expected = Index(np.arange(start, stop, step, dtype=np.int64), name=name) + assert isinstance(result, RangeIndex) + assert result.name is name + assert result._range == range(start, stop, step) + tm.assert_index_equal(result, expected, exact="equiv") + + def test_constructor_invalid_args(self): + msg = "RangeIndex\\(\\.\\.\\.\\) must be called with integers" + with pytest.raises(TypeError, match=msg): + RangeIndex() + + with pytest.raises(TypeError, match=msg): + RangeIndex(name="Foo") + + # we don't allow on a bare Index + msg = ( + r"Index\(\.\.\.\) must be called with a collection of some " + r"kind, 0 was passed" + ) + with pytest.raises(TypeError, match=msg): + Index(0) + + @pytest.mark.parametrize( + "args", + [ + Index(["a", "b"]), + Series(["a", "b"]), + np.array(["a", "b"]), + [], + np.arange(0, 10), + np.array([1]), + [1], + ], + ) + def test_constructor_additional_invalid_args(self, args): + msg = f"Value needs to be a scalar value, was type {type(args).__name__}" + with pytest.raises(TypeError, match=msg): + RangeIndex(args) + + @pytest.mark.parametrize("args", ["foo", datetime(2000, 1, 1, 0, 0)]) + def test_constructor_invalid_args_wrong_type(self, args): + msg = f"Wrong type {type(args)} for value {args}" + with pytest.raises(TypeError, match=msg): + RangeIndex(args) + + def test_constructor_same(self): + + # pass thru w and w/o copy + index = RangeIndex(1, 5, 2) + result = RangeIndex(index, copy=False) + assert result.identical(index) + + result = RangeIndex(index, copy=True) + tm.assert_index_equal(result, index, exact=True) + + result = RangeIndex(index) + tm.assert_index_equal(result, index, exact=True) + + with pytest.raises( + ValueError, + match="Incorrect `dtype` passed: expected signed integer, received float64", + ): + RangeIndex(index, dtype="float64") + + def test_constructor_range_object(self): + result = RangeIndex(range(1, 5, 2)) + expected = RangeIndex(1, 5, 2) + tm.assert_index_equal(result, expected, exact=True) + + def test_constructor_range(self): + + result = RangeIndex.from_range(range(1, 5, 2)) + expected = RangeIndex(1, 5, 2) + tm.assert_index_equal(result, expected, exact=True) + + result = RangeIndex.from_range(range(5, 6)) + expected = RangeIndex(5, 6, 1) + tm.assert_index_equal(result, expected, exact=True) + + # an invalid range + result = RangeIndex.from_range(range(5, 1)) + expected = RangeIndex(0, 0, 1) + tm.assert_index_equal(result, expected, exact=True) + + result = RangeIndex.from_range(range(5)) + expected = RangeIndex(0, 5, 1) + tm.assert_index_equal(result, expected, exact=True) + + result = Index(range(1, 5, 2)) + expected = RangeIndex(1, 5, 2) + tm.assert_index_equal(result, expected, exact=True) + + msg = ( + r"(RangeIndex.)?from_range\(\) got an unexpected keyword argument( 'copy')?" + ) + with pytest.raises(TypeError, match=msg): + RangeIndex.from_range(range(10), copy=True) + + def test_constructor_name(self): + # GH#12288 + orig = RangeIndex(10) + orig.name = "original" + + copy = RangeIndex(orig) + copy.name = "copy" + + assert orig.name == "original" + assert copy.name == "copy" + + new = Index(copy) + assert new.name == "copy" + + new.name = "new" + assert orig.name == "original" + assert copy.name == "copy" + assert new.name == "new" + + def test_constructor_corner(self): + arr = np.array([1, 2, 3, 4], dtype=object) + index = RangeIndex(1, 5) + assert index.values.dtype == np.int64 + with tm.assert_produces_warning(FutureWarning, match="will not infer"): + expected = Index(arr).astype("int64") + + tm.assert_index_equal(index, expected, exact="equiv") + + # non-int raise Exception + with pytest.raises(TypeError, match=r"Wrong type \"): + RangeIndex("1", "10", "1") + with pytest.raises(TypeError, match=r"Wrong type \"): + RangeIndex(1.1, 10.2, 1.3) + + # invalid passed type + with pytest.raises( + ValueError, + match="Incorrect `dtype` passed: expected signed integer, received float64", + ): + RangeIndex(1, 5, dtype="float64") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_indexing.py new file mode 100644 index 0000000..f8c3eff --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_indexing.py @@ -0,0 +1,93 @@ +import numpy as np +import pytest + +from pandas import RangeIndex +import pandas._testing as tm +from pandas.core.api import Int64Index + + +class TestGetIndexer: + def test_get_indexer(self): + index = RangeIndex(start=0, stop=20, step=2) + target = RangeIndex(10) + indexer = index.get_indexer(target) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_pad(self): + index = RangeIndex(start=0, stop=20, step=2) + target = RangeIndex(10) + indexer = index.get_indexer(target, method="pad") + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_backfill(self): + index = RangeIndex(start=0, stop=20, step=2) + target = RangeIndex(10) + indexer = index.get_indexer(target, method="backfill") + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_limit(self): + # GH#28631 + idx = RangeIndex(4) + target = RangeIndex(6) + result = idx.get_indexer(target, method="pad", limit=1) + expected = np.array([0, 1, 2, 3, 3, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("stop", [0, -1, -2]) + def test_get_indexer_decreasing(self, stop): + # GH#28678 + index = RangeIndex(7, stop, -3) + result = index.get_indexer(range(9)) + expected = np.array([-1, 2, -1, -1, 1, -1, -1, 0, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + +class TestTake: + def test_take_preserve_name(self): + index = RangeIndex(1, 5, name="foo") + taken = index.take([3, 0, 1]) + assert index.name == taken.name + + def test_take_fill_value(self): + # GH#12631 + idx = RangeIndex(1, 4, name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = Int64Index([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + msg = "Unable to fill values because RangeIndex cannot contain NA" + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -1]), fill_value=True) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = Int64Index([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + + msg = "Unable to fill values because RangeIndex cannot contain NA" + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for (axis 0 with )?size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + +class TestWhere: + def test_where_putmask_range_cast(self): + # GH#43240 + idx = RangeIndex(0, 5, name="test") + + mask = np.array([True, True, False, False, False]) + result = idx.putmask(mask, 10) + expected = Int64Index([10, 10, 2, 3, 4], name="test") + tm.assert_index_equal(result, expected) + + result = idx.where(~mask, 10) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_join.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_join.py new file mode 100644 index 0000000..ed21996 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_join.py @@ -0,0 +1,178 @@ +import numpy as np + +from pandas import ( + Index, + RangeIndex, +) +import pandas._testing as tm +from pandas.core.indexes.api import Int64Index + + +class TestJoin: + def test_join_outer(self): + # join with Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="outer", return_indexers=True) + noidx_res = index.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + eres = Int64Index( + [0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] + ) + elidx = np.array( + [0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, -1, -1, -1, -1, -1, -1, -1], + dtype=np.intp, + ) + eridx = np.array( + [-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], + dtype=np.intp, + ) + + assert isinstance(res, Int64Index) + assert not isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # join with RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = index.join(other, how="outer", return_indexers=True) + noidx_res = index.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + assert isinstance(res, Int64Index) + assert not isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_inner(self): + # Join with non-RangeIndex + index = RangeIndex(start=0, stop=20, step=2) + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="inner", return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([16, 18]) + elidx = np.array([8, 9], dtype=np.intp) + eridx = np.array([9, 7], dtype=np.intp) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # Join two RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = index.join(other, how="inner", return_indexers=True) + + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres, exact="equiv") + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self): + # Join with Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="left", return_indexers=True) + eres = index + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], dtype=np.intp) + + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # Join withRangeIndex + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="left", return_indexers=True) + + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + # Join with Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="right", return_indexers=True) + eres = other + elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], dtype=np.intp) + + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + # Join withRangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = index.join(other, how="right", return_indexers=True) + eres = other + + assert isinstance(other, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + def test_join_non_int_index(self): + index = RangeIndex(start=0, stop=20, step=2) + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = index.join(other, how="outer") + outer2 = other.join(index, how="outer") + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) + + inner = index.join(other, how="inner") + inner2 = other.join(index, how="inner") + expected = Index([6, 8, 10]) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) + + left = index.join(other, how="left") + tm.assert_index_equal(left, index.astype(object)) + + left2 = other.join(index, how="left") + tm.assert_index_equal(left2, other) + + right = index.join(other, how="right") + tm.assert_index_equal(right, other) + + right2 = other.join(index, how="right") + tm.assert_index_equal(right2, index.astype(object)) + + def test_join_non_unique(self): + index = RangeIndex(start=0, stop=20, step=2) + other = Index([4, 4, 3, 3]) + + res, lidx, ridx = index.join(other, return_indexers=True) + + eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) + elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp) + eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], dtype=np.intp) + + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_self(self, join_type): + index = RangeIndex(start=0, stop=20, step=2) + joined = index.join(index, how=join_type) + assert index is joined diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_range.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_range.py new file mode 100644 index 0000000..c45a4c7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_range.py @@ -0,0 +1,626 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import ensure_platform_int + +import pandas as pd +import pandas._testing as tm +from pandas.core.indexes.api import ( + Float64Index, + Index, + Int64Index, + RangeIndex, +) +from pandas.tests.indexes.common import NumericBase + +# aliases to make some tests easier to read +RI = RangeIndex +I64 = Int64Index +F64 = Float64Index +OI = Index + + +class TestRangeIndex(NumericBase): + _index_cls = RangeIndex + + @pytest.fixture + def dtype(self): + return np.int64 + + @pytest.fixture( + params=["uint64", "float64", "category", "datetime64", "object"], + ) + def invalid_dtype(self, request): + return request.param + + @pytest.fixture + def simple_index(self) -> Index: + return self._index_cls(start=0, stop=20, step=2) + + @pytest.fixture( + params=[ + RangeIndex(start=0, stop=20, step=2, name="foo"), + RangeIndex(start=18, stop=-1, step=-2, name="bar"), + ], + ids=["index_inc", "index_dec"], + ) + def index(self, request): + return request.param + + def test_constructor_unwraps_index(self, dtype): + result = self._index_cls(1, 3) + expected = np.array([1, 2], dtype=dtype) + tm.assert_numpy_array_equal(result._data, expected) + + def test_can_hold_identifiers(self, simple_index): + idx = simple_index + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False + + def test_too_many_names(self, simple_index): + index = simple_index + with pytest.raises(ValueError, match="^Length"): + index.names = ["roger", "harold"] + + @pytest.mark.parametrize( + "index, start, stop, step", + [ + (RangeIndex(5), 0, 5, 1), + (RangeIndex(0, 5), 0, 5, 1), + (RangeIndex(5, step=2), 0, 5, 2), + (RangeIndex(1, 5, 2), 1, 5, 2), + ], + ) + def test_start_stop_step_attrs(self, index, start, stop, step): + # GH 25710 + assert index.start == start + assert index.stop == stop + assert index.step == step + + @pytest.mark.parametrize("attr_name", ["_start", "_stop", "_step"]) + def test_deprecated_start_stop_step_attrs(self, attr_name, simple_index): + # GH 26581 + idx = simple_index + with tm.assert_produces_warning(FutureWarning): + getattr(idx, attr_name) + + def test_copy(self): + i = RangeIndex(5, name="Foo") + i_copy = i.copy() + assert i_copy is not i + assert i_copy.identical(i) + assert i_copy._range == range(0, 5, 1) + assert i_copy.name == "Foo" + + def test_repr(self): + i = RangeIndex(5, name="Foo") + result = repr(i) + expected = "RangeIndex(start=0, stop=5, step=1, name='Foo')" + assert result == expected + + result = eval(result) + tm.assert_index_equal(result, i, exact=True) + + i = RangeIndex(5, 0, -1) + result = repr(i) + expected = "RangeIndex(start=5, stop=0, step=-1)" + assert result == expected + + result = eval(result) + tm.assert_index_equal(result, i, exact=True) + + def test_insert(self): + + idx = RangeIndex(5, name="Foo") + result = idx[1:4] + + # test 0th element + tm.assert_index_equal(idx[0:4], result.insert(0, idx[0]), exact="equiv") + + # GH 18295 (test missing) + expected = Float64Index([0, np.nan, 1, 2, 3, 4]) + for na in [np.nan, None, pd.NA]: + result = RangeIndex(5).insert(1, na) + tm.assert_index_equal(result, expected) + + result = RangeIndex(5).insert(1, pd.NaT) + expected = Index([0, pd.NaT, 1, 2, 3, 4], dtype=object) + tm.assert_index_equal(result, expected) + + def test_insert_edges_preserves_rangeindex(self): + idx = Index(range(4, 9, 2)) + + result = idx.insert(0, 2) + expected = Index(range(2, 9, 2)) + tm.assert_index_equal(result, expected, exact=True) + + result = idx.insert(3, 10) + expected = Index(range(4, 11, 2)) + tm.assert_index_equal(result, expected, exact=True) + + def test_insert_middle_preserves_rangeindex(self): + # insert in the middle + idx = Index(range(0, 3, 2)) + result = idx.insert(1, 1) + expected = Index(range(3)) + tm.assert_index_equal(result, expected, exact=True) + + idx = idx * 2 + result = idx.insert(1, 2) + expected = expected * 2 + tm.assert_index_equal(result, expected, exact=True) + + def test_delete(self): + + idx = RangeIndex(5, name="Foo") + expected = idx[1:] + result = idx.delete(0) + tm.assert_index_equal(result, expected, exact=True) + assert result.name == expected.name + + expected = idx[:-1] + result = idx.delete(-1) + tm.assert_index_equal(result, expected, exact=True) + assert result.name == expected.name + + msg = "index 5 is out of bounds for axis 0 with size 5" + with pytest.raises((IndexError, ValueError), match=msg): + # either depending on numpy version + result = idx.delete(len(idx)) + + def test_delete_preserves_rangeindex(self): + idx = Index(range(2), name="foo") + + result = idx.delete([1]) + expected = Index(range(1), name="foo") + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(1) + tm.assert_index_equal(result, expected, exact=True) + + def test_delete_preserves_rangeindex_middle(self): + idx = Index(range(3), name="foo") + result = idx.delete(1) + expected = idx[::2] + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(-2) + tm.assert_index_equal(result, expected, exact=True) + + def test_delete_preserves_rangeindex_list_at_end(self): + idx = RangeIndex(0, 6, 1) + + loc = [2, 3, 4, 5] + result = idx.delete(loc) + expected = idx[:2] + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(loc[::-1]) + tm.assert_index_equal(result, expected, exact=True) + + def test_delete_preserves_rangeindex_list_middle(self): + idx = RangeIndex(0, 6, 1) + + loc = [1, 2, 3, 4] + result = idx.delete(loc) + expected = RangeIndex(0, 6, 5) + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(loc[::-1]) + tm.assert_index_equal(result, expected, exact=True) + + def test_delete_all_preserves_rangeindex(self): + idx = RangeIndex(0, 6, 1) + + loc = [0, 1, 2, 3, 4, 5] + result = idx.delete(loc) + expected = idx[:0] + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(loc[::-1]) + tm.assert_index_equal(result, expected, exact=True) + + def test_delete_not_preserving_rangeindex(self): + idx = RangeIndex(0, 6, 1) + + loc = [0, 3, 5] + result = idx.delete(loc) + expected = Int64Index([1, 2, 4]) + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(loc[::-1]) + tm.assert_index_equal(result, expected, exact=True) + + def test_view(self): + i = RangeIndex(0, name="Foo") + i_view = i.view() + assert i_view.name == "Foo" + + i_view = i.view("i8") + tm.assert_numpy_array_equal(i.values, i_view) + + i_view = i.view(RangeIndex) + tm.assert_index_equal(i, i_view) + + def test_dtype(self, simple_index): + index = simple_index + assert index.dtype == np.int64 + + def test_cache(self): + # GH 26565, GH26617, GH35432 + # This test checks whether _cache has been set. + # Calling RangeIndex._cache["_data"] creates an int64 array of the same length + # as the RangeIndex and stores it in _cache. + idx = RangeIndex(0, 100, 10) + + assert idx._cache == {} + + repr(idx) + assert idx._cache == {} + + str(idx) + assert idx._cache == {} + + idx.get_loc(20) + assert idx._cache == {} + + 90 in idx # True + assert idx._cache == {} + + 91 in idx # False + assert idx._cache == {} + + idx.all() + assert idx._cache == {} + + idx.any() + assert idx._cache == {} + + for _ in idx: + pass + assert idx._cache == {} + + idx.format() + assert idx._cache == {} + + df = pd.DataFrame({"a": range(10)}, index=idx) + + str(df) + assert idx._cache == {} + + df.loc[50] + assert idx._cache == {} + + with pytest.raises(KeyError, match="51"): + df.loc[51] + assert idx._cache == {} + + df.loc[10:50] + assert idx._cache == {} + + df.iloc[5:10] + assert idx._cache == {} + + # idx._cache should contain a _data entry after call to idx._data + idx._data + assert isinstance(idx._data, np.ndarray) + assert idx._data is idx._data # check cached value is reused + assert len(idx._cache) == 1 + expected = np.arange(0, 100, 10, dtype="int64") + tm.assert_numpy_array_equal(idx._cache["_data"], expected) + + def test_is_monotonic(self): + index = RangeIndex(0, 20, 2) + assert index.is_monotonic is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is False + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is False + + index = RangeIndex(4, 0, -1) + assert index.is_monotonic is False + assert index._is_strictly_monotonic_increasing is False + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_decreasing is True + + index = RangeIndex(1, 2) + assert index.is_monotonic is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is True + + index = RangeIndex(2, 1) + assert index.is_monotonic is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is True + + index = RangeIndex(1, 1) + assert index.is_monotonic is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is True + + def test_equals_range(self): + equiv_pairs = [ + (RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)), + (RangeIndex(0), RangeIndex(1, -1, 3)), + (RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)), + (RangeIndex(0, -9, -2), RangeIndex(0, -10, -2)), + ] + for left, right in equiv_pairs: + assert left.equals(right) + assert right.equals(left) + + def test_logical_compat(self, simple_index): + idx = simple_index + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() + + def test_identical(self, simple_index): + index = simple_index + i = Index(index.copy()) + assert i.identical(index) + + # we don't allow object dtype for RangeIndex + if isinstance(index, RangeIndex): + return + + same_values_different_type = Index(i, dtype=object) + assert not i.identical(same_values_different_type) + + i = index.copy(dtype=object) + i = i.rename("foo") + same_values = Index(i, dtype=object) + assert same_values.identical(index.copy(dtype=object)) + + assert not i.identical(index) + assert Index(same_values, name="foo", dtype=object).identical(i) + + assert not index.copy(dtype=object).identical(index.copy(dtype="int64")) + + def test_nbytes(self): + + # memory savings vs int index + idx = RangeIndex(0, 1000) + assert idx.nbytes < Int64Index(idx._values).nbytes / 10 + + # constant memory usage + i2 = RangeIndex(0, 10) + assert idx.nbytes == i2.nbytes + + @pytest.mark.parametrize( + "start,stop,step", + [ + # can't + ("foo", "bar", "baz"), + # shouldn't + ("0", "1", "2"), + ], + ) + def test_cant_or_shouldnt_cast(self, start, stop, step): + msg = f"Wrong type {type(start)} for value {start}" + with pytest.raises(TypeError, match=msg): + RangeIndex(start, stop, step) + + def test_view_index(self, simple_index): + index = simple_index + index.view(Index) + + def test_prevent_casting(self, simple_index): + index = simple_index + result = index.astype("O") + assert result.dtype == np.object_ + + def test_repr_roundtrip(self, simple_index): + index = simple_index + tm.assert_index_equal(eval(repr(index)), index) + + def test_slice_keep_name(self): + idx = RangeIndex(1, 2, name="asdf") + assert idx.name == idx[1:].name + + def test_has_duplicates(self, index): + assert index.is_unique + assert not index.has_duplicates + + def test_extended_gcd(self, simple_index): + index = simple_index + result = index._extended_gcd(6, 10) + assert result[0] == result[1] * 6 + result[2] * 10 + assert 2 == result[0] + + result = index._extended_gcd(10, 6) + assert 2 == result[1] * 10 + result[2] * 6 + assert 2 == result[0] + + def test_min_fitting_element(self): + result = RangeIndex(0, 20, 2)._min_fitting_element(1) + assert 2 == result + + result = RangeIndex(1, 6)._min_fitting_element(1) + assert 1 == result + + result = RangeIndex(18, -2, -2)._min_fitting_element(1) + assert 2 == result + + result = RangeIndex(5, 0, -1)._min_fitting_element(1) + assert 1 == result + + big_num = 500000000000000000000000 + + result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num) + assert big_num == result + + def test_pickle_compat_construction(self): + # RangeIndex() is a valid constructor + pass + + def test_slice_specialised(self, simple_index): + index = simple_index + index.name = "foo" + + # scalar indexing + res = index[1] + expected = 2 + assert res == expected + + res = index[-1] + expected = 18 + assert res == expected + + # slicing + # slice value completion + index_slice = index[:] + expected = index + tm.assert_index_equal(index_slice, expected) + + # positive slice values + index_slice = index[7:10:2] + expected = Index(np.array([14, 18]), name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + # negative slice values + index_slice = index[-1:-5:-2] + expected = Index(np.array([18, 14]), name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + # stop overshoot + index_slice = index[2:100:4] + expected = Index(np.array([4, 12]), name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + # reverse + index_slice = index[::-1] + expected = Index(index.values[::-1], name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + index_slice = index[-8::-1] + expected = Index(np.array([4, 2, 0]), name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + index_slice = index[-40::-1] + expected = Index(np.array([], dtype=np.int64), name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + index_slice = index[40::-1] + expected = Index(index.values[40::-1], name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + index_slice = index[10::-1] + expected = Index(index.values[::-1], name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + @pytest.mark.parametrize("step", set(range(-5, 6)) - {0}) + def test_len_specialised(self, step): + # make sure that our len is the same as np.arange calc + start, stop = (0, 5) if step > 0 else (5, 0) + + arr = np.arange(start, stop, step) + index = RangeIndex(start, stop, step) + assert len(index) == len(arr) + + index = RangeIndex(stop, start, step) + assert len(index) == 0 + + @pytest.fixture( + params=[ + ([RI(1, 12, 5)], RI(1, 12, 5)), + ([RI(0, 6, 4)], RI(0, 6, 4)), + ([RI(1, 3), RI(3, 7)], RI(1, 7)), + ([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)), + ([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)), + ([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)), + ([RI(-4, -8), RI(-8, -12)], RI(0, 0)), + ([RI(-4, -8), RI(3, -4)], RI(0, 0)), + ([RI(-4, -8), RI(3, 5)], RI(3, 5)), + ([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])), + ([RI(-2), RI(3, 5)], RI(3, 5)), + ([RI(2), RI(2)], I64([0, 1, 0, 1])), + ([RI(2), RI(2, 5), RI(5, 8, 4)], RI(0, 6)), + ([RI(2), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])), + ([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)), + ([RI(3), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])), + ([RI(3), F64([-1, 3.1, 15.0])], F64([0, 1, 2, -1, 3.1, 15.0])), + ([RI(3), OI(["a", None, 14])], OI([0, 1, 2, "a", None, 14])), + ([RI(3, 1), OI(["a", None, 14])], OI(["a", None, 14])), + ] + ) + def appends(self, request): + """Inputs and expected outputs for RangeIndex.append test""" + return request.param + + def test_append(self, appends): + # GH16212 + + indices, expected = appends + + result = indices[0].append(indices[1:]) + tm.assert_index_equal(result, expected, exact=True) + + if len(indices) == 2: + # Append single item rather than list + result2 = indices[0].append(indices[1]) + tm.assert_index_equal(result2, expected, exact=True) + + def test_engineless_lookup(self): + # GH 16685 + # Standard lookup on RangeIndex should not require the engine to be + # created + idx = RangeIndex(2, 10, 3) + + assert idx.get_loc(5) == 1 + tm.assert_numpy_array_equal( + idx.get_indexer([2, 8]), ensure_platform_int(np.array([0, 2])) + ) + with pytest.raises(KeyError, match="3"): + idx.get_loc(3) + + assert "_engine" not in idx._cache + + # Different types of scalars can be excluded immediately, no need to + # use the _engine + with pytest.raises(KeyError, match="'a'"): + idx.get_loc("a") + + assert "_engine" not in idx._cache + + def test_format_empty(self): + # GH35712 + empty_idx = self._index_cls(0) + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] + + @pytest.mark.parametrize( + "RI", + [ + RangeIndex(0, -1, -1), + RangeIndex(0, 1, 1), + RangeIndex(1, 3, 2), + RangeIndex(0, -1, -2), + RangeIndex(-3, -5, -2), + ], + ) + def test_append_len_one(self, RI): + # GH39401 + result = RI.append([]) + tm.assert_index_equal(result, RI, exact=True) + + @pytest.mark.parametrize("base", [RangeIndex(0, 2), Index([0, 1])]) + def test_isin_range(self, base): + # GH#41151 + values = RangeIndex(0, 1) + result = base.isin(values) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + def test_sort_values_key(self): + # GH#43666 + sort_order = {8: 2, 6: 0, 4: 8, 2: 10, 0: 12} + values = RangeIndex(0, 10, 2) + result = values.sort_values(key=lambda x: x.map(sort_order)) + expected = Index([4, 8, 6, 0, 2], dtype="int64") + tm.assert_index_equal(result, expected, check_exact=True) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_setops.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_setops.py new file mode 100644 index 0000000..2942010 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/ranges/test_setops.py @@ -0,0 +1,490 @@ +from datetime import ( + datetime, + timedelta, +) + +from hypothesis import ( + assume, + given, + strategies as st, +) +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.indexes.api import ( + Index, + Int64Index, + RangeIndex, + UInt64Index, +) + + +class TestRangeIndexSetOps: + @pytest.mark.parametrize("klass", [RangeIndex, Int64Index, UInt64Index]) + def test_intersection_mismatched_dtype(self, klass): + # check that we cast to float, not object + index = RangeIndex(start=0, stop=20, step=2, name="foo") + index = klass(index) + + flt = index.astype(np.float64) + + # bc index.equals(flt), we go through fastpath and get RangeIndex back + result = index.intersection(flt) + tm.assert_index_equal(result, index, exact=True) + + result = flt.intersection(index) + tm.assert_index_equal(result, flt, exact=True) + + # neither empty, not-equals + result = index.intersection(flt[1:]) + tm.assert_index_equal(result, flt[1:], exact=True) + + result = flt[1:].intersection(index) + tm.assert_index_equal(result, flt[1:], exact=True) + + # empty other + result = index.intersection(flt[:0]) + tm.assert_index_equal(result, flt[:0], exact=True) + + result = flt[:0].intersection(index) + tm.assert_index_equal(result, flt[:0], exact=True) + + def test_intersection_empty(self, sort, names): + # name retention on empty intersections + index = RangeIndex(start=0, stop=20, step=2, name=names[0]) + + # empty other + result = index.intersection(index[:0].rename(names[1]), sort=sort) + tm.assert_index_equal(result, index[:0].rename(names[2]), exact=True) + + # empty self + result = index[:0].intersection(index.rename(names[1]), sort=sort) + tm.assert_index_equal(result, index[:0].rename(names[2]), exact=True) + + def test_intersection(self, sort): + # intersect with Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Index(np.arange(1, 6)) + result = index.intersection(other, sort=sort) + expected = Index(np.sort(np.intersect1d(index.values, other.values))) + tm.assert_index_equal(result, expected) + + result = other.intersection(index, sort=sort) + expected = Index( + np.sort(np.asarray(np.intersect1d(index.values, other.values))) + ) + tm.assert_index_equal(result, expected) + + # intersect with increasing RangeIndex + other = RangeIndex(1, 6) + result = index.intersection(other, sort=sort) + expected = Index(np.sort(np.intersect1d(index.values, other.values))) + tm.assert_index_equal(result, expected, exact="equiv") + + # intersect with decreasing RangeIndex + other = RangeIndex(5, 0, -1) + result = index.intersection(other, sort=sort) + expected = Index(np.sort(np.intersect1d(index.values, other.values))) + tm.assert_index_equal(result, expected, exact="equiv") + + # reversed (GH 17296) + result = other.intersection(index, sort=sort) + tm.assert_index_equal(result, expected, exact="equiv") + + # GH 17296: intersect two decreasing RangeIndexes + first = RangeIndex(10, -2, -2) + other = RangeIndex(5, -4, -1) + expected = first.astype(int).intersection(other.astype(int), sort=sort) + result = first.intersection(other, sort=sort).astype(int) + tm.assert_index_equal(result, expected) + + # reversed + result = other.intersection(first, sort=sort).astype(int) + tm.assert_index_equal(result, expected) + + index = RangeIndex(5, name="foo") + + # intersect of non-overlapping indices + other = RangeIndex(5, 10, 1, name="foo") + result = index.intersection(other, sort=sort) + expected = RangeIndex(0, 0, 1, name="foo") + tm.assert_index_equal(result, expected) + + other = RangeIndex(-1, -5, -1) + result = index.intersection(other, sort=sort) + expected = RangeIndex(0, 0, 1) + tm.assert_index_equal(result, expected) + + # intersection of empty indices + other = RangeIndex(0, 0, 1) + result = index.intersection(other, sort=sort) + expected = RangeIndex(0, 0, 1) + tm.assert_index_equal(result, expected) + + result = other.intersection(index, sort=sort) + tm.assert_index_equal(result, expected) + + def test_intersection_non_overlapping_gcd(self, sort, names): + # intersection of non-overlapping values based on start value and gcd + index = RangeIndex(1, 10, 2, name=names[0]) + other = RangeIndex(0, 10, 4, name=names[1]) + result = index.intersection(other, sort=sort) + expected = RangeIndex(0, 0, 1, name=names[2]) + tm.assert_index_equal(result, expected) + + def test_union_noncomparable(self, sort): + # corner case, non-Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) + result = index.union(other, sort=sort) + expected = Index(np.concatenate((index, other))) + tm.assert_index_equal(result, expected) + + result = other.union(index, sort=sort) + expected = Index(np.concatenate((other, index))) + tm.assert_index_equal(result, expected) + + @pytest.fixture( + params=[ + ( + RangeIndex(0, 10, 1), + RangeIndex(0, 10, 1), + RangeIndex(0, 10, 1), + RangeIndex(0, 10, 1), + ), + ( + RangeIndex(0, 10, 1), + RangeIndex(5, 20, 1), + RangeIndex(0, 20, 1), + Int64Index(range(20)), + ), + ( + RangeIndex(0, 10, 1), + RangeIndex(10, 20, 1), + RangeIndex(0, 20, 1), + Int64Index(range(20)), + ), + ( + RangeIndex(0, -10, -1), + RangeIndex(0, -10, -1), + RangeIndex(0, -10, -1), + RangeIndex(0, -10, -1), + ), + ( + RangeIndex(0, -10, -1), + RangeIndex(-10, -20, -1), + RangeIndex(-19, 1, 1), + Int64Index(range(0, -20, -1)), + ), + ( + RangeIndex(0, 10, 2), + RangeIndex(1, 10, 2), + RangeIndex(0, 10, 1), + Int64Index(list(range(0, 10, 2)) + list(range(1, 10, 2))), + ), + ( + RangeIndex(0, 11, 2), + RangeIndex(1, 12, 2), + RangeIndex(0, 12, 1), + Int64Index(list(range(0, 11, 2)) + list(range(1, 12, 2))), + ), + ( + RangeIndex(0, 21, 4), + RangeIndex(-2, 24, 4), + RangeIndex(-2, 24, 2), + Int64Index(list(range(0, 21, 4)) + list(range(-2, 24, 4))), + ), + ( + RangeIndex(0, -20, -2), + RangeIndex(-1, -21, -2), + RangeIndex(-19, 1, 1), + Int64Index(list(range(0, -20, -2)) + list(range(-1, -21, -2))), + ), + ( + RangeIndex(0, 100, 5), + RangeIndex(0, 100, 20), + RangeIndex(0, 100, 5), + Int64Index(range(0, 100, 5)), + ), + ( + RangeIndex(0, -100, -5), + RangeIndex(5, -100, -20), + RangeIndex(-95, 10, 5), + Int64Index(list(range(0, -100, -5)) + [5]), + ), + ( + RangeIndex(0, -11, -1), + RangeIndex(1, -12, -4), + RangeIndex(-11, 2, 1), + Int64Index(list(range(0, -11, -1)) + [1, -11]), + ), + (RangeIndex(0), RangeIndex(0), RangeIndex(0), RangeIndex(0)), + ( + RangeIndex(0, -10, -2), + RangeIndex(0), + RangeIndex(0, -10, -2), + RangeIndex(0, -10, -2), + ), + ( + RangeIndex(0, 100, 2), + RangeIndex(100, 150, 200), + RangeIndex(0, 102, 2), + Int64Index(range(0, 102, 2)), + ), + ( + RangeIndex(0, -100, -2), + RangeIndex(-100, 50, 102), + RangeIndex(-100, 4, 2), + Int64Index(list(range(0, -100, -2)) + [-100, 2]), + ), + ( + RangeIndex(0, -100, -1), + RangeIndex(0, -50, -3), + RangeIndex(-99, 1, 1), + Int64Index(list(range(0, -100, -1))), + ), + ( + RangeIndex(0, 1, 1), + RangeIndex(5, 6, 10), + RangeIndex(0, 6, 5), + Int64Index([0, 5]), + ), + ( + RangeIndex(0, 10, 5), + RangeIndex(-5, -6, -20), + RangeIndex(-5, 10, 5), + Int64Index([0, 5, -5]), + ), + ( + RangeIndex(0, 3, 1), + RangeIndex(4, 5, 1), + Int64Index([0, 1, 2, 4]), + Int64Index([0, 1, 2, 4]), + ), + ( + RangeIndex(0, 10, 1), + Int64Index([]), + RangeIndex(0, 10, 1), + RangeIndex(0, 10, 1), + ), + ( + RangeIndex(0), + Int64Index([1, 5, 6]), + Int64Index([1, 5, 6]), + Int64Index([1, 5, 6]), + ), + ] + ) + def unions(self, request): + """Inputs and expected outputs for RangeIndex.union tests""" + return request.param + + def test_union_sorted(self, unions): + + idx1, idx2, expected_sorted, expected_notsorted = unions + + res1 = idx1.union(idx2, sort=None) + tm.assert_index_equal(res1, expected_sorted, exact=True) + + res1 = idx1.union(idx2, sort=False) + tm.assert_index_equal(res1, expected_notsorted, exact=True) + + res2 = idx2.union(idx1, sort=None) + res3 = Int64Index(idx1._values, name=idx1.name).union(idx2, sort=None) + tm.assert_index_equal(res2, expected_sorted, exact=True) + tm.assert_index_equal(res3, expected_sorted, exact="equiv") + + def test_union_same_step_misaligned(self): + # GH#44019 + left = RangeIndex(range(0, 20, 4)) + right = RangeIndex(range(1, 21, 4)) + + result = left.union(right) + expected = Int64Index([0, 1, 4, 5, 8, 9, 12, 13, 16, 17]) + tm.assert_index_equal(result, expected, exact=True) + + def test_difference(self): + # GH#12034 Cases where we operate against another RangeIndex and may + # get back another RangeIndex + obj = RangeIndex.from_range(range(1, 10), name="foo") + + result = obj.difference(obj) + expected = RangeIndex.from_range(range(0), name="foo") + tm.assert_index_equal(result, expected, exact=True) + + result = obj.difference(expected.rename("bar")) + tm.assert_index_equal(result, obj.rename(None), exact=True) + + result = obj.difference(obj[:3]) + tm.assert_index_equal(result, obj[3:], exact=True) + + result = obj.difference(obj[-3:]) + tm.assert_index_equal(result, obj[:-3], exact=True) + + # Flipping the step of 'other' doesn't affect the result, but + # flipping the stepof 'self' does when sort=None + result = obj[::-1].difference(obj[-3:]) + tm.assert_index_equal(result, obj[:-3], exact=True) + + result = obj[::-1].difference(obj[-3:], sort=False) + tm.assert_index_equal(result, obj[:-3][::-1], exact=True) + + result = obj[::-1].difference(obj[-3:][::-1]) + tm.assert_index_equal(result, obj[:-3], exact=True) + + result = obj[::-1].difference(obj[-3:][::-1], sort=False) + tm.assert_index_equal(result, obj[:-3][::-1], exact=True) + + result = obj.difference(obj[2:6]) + expected = Int64Index([1, 2, 7, 8, 9], name="foo") + tm.assert_index_equal(result, expected) + + def test_difference_sort(self): + # GH#44085 ensure we respect the sort keyword + + idx = Index(range(4))[::-1] + other = Index(range(3, 4)) + + result = idx.difference(other) + expected = Index(range(3)) + tm.assert_index_equal(result, expected, exact=True) + + result = idx.difference(other, sort=False) + expected = expected[::-1] + tm.assert_index_equal(result, expected, exact=True) + + # case where the intersection is empty + other = range(10, 12) + result = idx.difference(other, sort=None) + expected = idx[::-1] + tm.assert_index_equal(result, expected, exact=True) + + def test_difference_mismatched_step(self): + obj = RangeIndex.from_range(range(1, 10), name="foo") + + result = obj.difference(obj[::2]) + expected = obj[1::2] + tm.assert_index_equal(result, expected, exact=True) + + result = obj[::-1].difference(obj[::2], sort=False) + tm.assert_index_equal(result, expected[::-1], exact=True) + + result = obj.difference(obj[1::2]) + expected = obj[::2] + tm.assert_index_equal(result, expected, exact=True) + + result = obj[::-1].difference(obj[1::2], sort=False) + tm.assert_index_equal(result, expected[::-1], exact=True) + + def test_difference_interior_overlap_endpoints_preserved(self): + left = RangeIndex(range(4)) + right = RangeIndex(range(1, 3)) + + result = left.difference(right) + expected = RangeIndex(0, 4, 3) + assert expected.tolist() == [0, 3] + tm.assert_index_equal(result, expected, exact=True) + + def test_difference_endpoints_overlap_interior_preserved(self): + left = RangeIndex(-8, 20, 7) + right = RangeIndex(13, -9, -3) + + result = left.difference(right) + expected = RangeIndex(-1, 13, 7) + assert expected.tolist() == [-1, 6] + tm.assert_index_equal(result, expected, exact=True) + + def test_difference_interior_non_preserving(self): + # case with intersection of length 1 but RangeIndex is not preserved + idx = Index(range(10)) + + other = idx[3:4] + result = idx.difference(other) + expected = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 9]) + tm.assert_index_equal(result, expected, exact=True) + + # case with other.step / self.step > 2 + other = idx[::3] + result = idx.difference(other) + expected = Int64Index([1, 2, 4, 5, 7, 8]) + tm.assert_index_equal(result, expected, exact=True) + + # cases with only reaching one end of left + obj = Index(range(20)) + other = obj[:10:2] + result = obj.difference(other) + expected = Int64Index([1, 3, 5, 7, 9] + list(range(10, 20))) + tm.assert_index_equal(result, expected, exact=True) + + other = obj[1:11:2] + result = obj.difference(other) + expected = Int64Index([0, 2, 4, 6, 8, 10] + list(range(11, 20))) + tm.assert_index_equal(result, expected, exact=True) + + def test_symmetric_difference(self): + # GH#12034 Cases where we operate against another RangeIndex and may + # get back another RangeIndex + left = RangeIndex.from_range(range(1, 10), name="foo") + + result = left.symmetric_difference(left) + expected = RangeIndex.from_range(range(0), name="foo") + tm.assert_index_equal(result, expected) + + result = left.symmetric_difference(expected.rename("bar")) + tm.assert_index_equal(result, left.rename(None)) + + result = left[:-2].symmetric_difference(left[2:]) + expected = Int64Index([1, 2, 8, 9], name="foo") + tm.assert_index_equal(result, expected) + + right = RangeIndex.from_range(range(10, 15)) + + result = left.symmetric_difference(right) + expected = RangeIndex.from_range(range(1, 15)) + tm.assert_index_equal(result, expected) + + result = left.symmetric_difference(right[1:]) + expected = Int64Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14]) + tm.assert_index_equal(result, expected) + + +def assert_range_or_not_is_rangelike(index): + """ + Check that we either have a RangeIndex or that this index *cannot* + be represented as a RangeIndex. + """ + if not isinstance(index, RangeIndex) and len(index) > 0: + diff = index[:-1] - index[1:] + assert not (diff == diff[0]).all() + + +@given( + st.integers(-20, 20), + st.integers(-20, 20), + st.integers(-20, 20), + st.integers(-20, 20), + st.integers(-20, 20), + st.integers(-20, 20), +) +def test_range_difference(start1, stop1, step1, start2, stop2, step2): + # test that + # a) we match Int64Index.difference and + # b) we return RangeIndex whenever it is possible to do so. + assume(step1 != 0) + assume(step2 != 0) + + left = RangeIndex(start1, stop1, step1) + right = RangeIndex(start2, stop2, step2) + + result = left.difference(right, sort=None) + assert_range_or_not_is_rangelike(result) + + alt = Int64Index(left).difference(Int64Index(right), sort=None) + tm.assert_index_equal(result, alt, exact="equiv") + + result = left.difference(right, sort=False) + assert_range_or_not_is_rangelike(result) + + alt = Int64Index(left).difference(Int64Index(right), sort=False) + tm.assert_index_equal(result, alt, exact="equiv") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_any_index.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_any_index.py new file mode 100644 index 0000000..c7aae5d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_any_index.py @@ -0,0 +1,190 @@ +""" +Tests that can be parametrized over _any_ Index object. +""" +import re + +import numpy as np +import pytest + +from pandas.errors import InvalidIndexError + +import pandas._testing as tm + + +def test_boolean_context_compat(index): + # GH#7897 + with pytest.raises(ValueError, match="The truth value of a"): + if index: + pass + + with pytest.raises(ValueError, match="The truth value of a"): + bool(index) + + +def test_sort(index): + msg = "cannot sort an Index object in-place, use sort_values instead" + with pytest.raises(TypeError, match=msg): + index.sort() + + +def test_hash_error(index): + with pytest.raises(TypeError, match=f"unhashable type: '{type(index).__name__}'"): + hash(index) + + +def test_copy_dtype_deprecated(index): + # GH#35853 + with tm.assert_produces_warning(FutureWarning): + index.copy(dtype=object) + + +def test_mutability(index): + if not len(index): + return + msg = "Index does not support mutable operations" + with pytest.raises(TypeError, match=msg): + index[0] = index[0] + + +def test_map_identity_mapping(index): + # GH#12766 + result = index.map(lambda x: x) + tm.assert_index_equal(result, index, exact="equiv") + + +def test_wrong_number_names(index): + names = index.nlevels * ["apple", "banana", "carrot"] + with pytest.raises(ValueError, match="^Length"): + index.names = names + + +def test_view_preserves_name(index): + assert index.view().name == index.name + + +def test_ravel_deprecation(index): + # GH#19956 ravel returning ndarray is deprecated + with tm.assert_produces_warning(FutureWarning): + index.ravel() + + +def test_is_type_compatible_deprecation(index): + # GH#42113 + msg = "is_type_compatible is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + index.is_type_compatible(index.inferred_type) + + +def test_is_mixed_deprecated(index): + # GH#32922 + msg = "Index.is_mixed is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + index.is_mixed() + + +class TestConversion: + def test_to_series(self, index): + # assert that we are creating a copy of the index + + ser = index.to_series() + assert ser.values is not index.values + assert ser.index is not index + assert ser.name == index.name + + def test_to_series_with_arguments(self, index): + # GH#18699 + + # index kwarg + ser = index.to_series(index=index) + + assert ser.values is not index.values + assert ser.index is index + assert ser.name == index.name + + # name kwarg + ser = index.to_series(name="__test") + + assert ser.values is not index.values + assert ser.index is not index + assert ser.name != index.name + + def test_tolist_matches_list(self, index): + assert index.tolist() == list(index) + + +class TestRoundTrips: + def test_pickle_roundtrip(self, index): + result = tm.round_trip_pickle(index) + tm.assert_index_equal(result, index, exact=True) + if result.nlevels > 1: + # GH#8367 round-trip with timezone + assert index.equal_levels(result) + + def test_pickle_preserves_name(self, index): + original_name, index.name = index.name, "foo" + unpickled = tm.round_trip_pickle(index) + assert index.equals(unpickled) + index.name = original_name + + +class TestIndexing: + def test_get_loc_listlike_raises_invalid_index_error(self, index): + # and never TypeError + key = np.array([0, 1], dtype=np.intp) + + with pytest.raises(InvalidIndexError, match=r"\[0 1\]"): + index.get_loc(key) + + with pytest.raises(InvalidIndexError, match=r"\[False True\]"): + index.get_loc(key.astype(bool)) + + def test_getitem_ellipsis(self, index): + # GH#21282 + result = index[...] + assert result.equals(index) + assert result is not index + + def test_slice_keeps_name(self, index): + assert index.name == index[1:].name + + @pytest.mark.parametrize("item", [101, "no_int", 2.5]) + # FutureWarning from non-tuple sequence of nd indexing + @pytest.mark.filterwarnings("ignore::FutureWarning") + def test_getitem_error(self, index, item): + msg = "|".join( + [ + r"index 101 is out of bounds for axis 0 with size [\d]+", + re.escape( + "only integers, slices (`:`), ellipsis (`...`), " + "numpy.newaxis (`None`) and integer or boolean arrays " + "are valid indices" + ), + "index out of bounds", # string[pyarrow] + "Only integers, slices and integer or " + "boolean arrays are valid indices.", # string[pyarrow] + ] + ) + with pytest.raises(IndexError, match=msg): + index[item] + + +class TestRendering: + def test_str(self, index): + # test the string repr + index.name = "foo" + assert "'foo'" in str(index) + assert type(index).__name__ in str(index) + + +class TestReductions: + def test_argmax_axis_invalid(self, index): + # GH#23081 + msg = r"`axis` must be fewer than the number of dimensions \(1\)" + with pytest.raises(ValueError, match=msg): + index.argmax(axis=1) + with pytest.raises(ValueError, match=msg): + index.argmin(axis=2) + with pytest.raises(ValueError, match=msg): + index.min(axis=-2) + with pytest.raises(ValueError, match=msg): + index.max(axis=-3) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_base.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_base.py new file mode 100644 index 0000000..1145de1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_base.py @@ -0,0 +1,1589 @@ +from collections import defaultdict +from datetime import datetime +from io import StringIO +import math +import re + +import numpy as np +import pytest + +from pandas.compat import IS64 +from pandas.errors import InvalidIndexError +from pandas.util._test_decorators import async_mark + +import pandas as pd +from pandas import ( + CategoricalIndex, + DataFrame, + DatetimeIndex, + IntervalIndex, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, + date_range, + period_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, + NumericIndex, + UInt64Index, +) +from pandas.core.indexes.api import ( + Index, + MultiIndex, + _get_combined_index, + ensure_index, + ensure_index_from_sequences, +) +from pandas.tests.indexes.common import Base + + +class TestIndex(Base): + _index_cls = Index + + @pytest.fixture + def simple_index(self) -> Index: + return self._index_cls(list("abcde")) + + def test_can_hold_identifiers(self, simple_index): + index = simple_index + key = index[0] + assert index._can_hold_identifiers_and_holds_name(key) is True + + @pytest.mark.parametrize("index", ["datetime"], indirect=True) + def test_new_axis(self, index): + with tm.assert_produces_warning(FutureWarning): + # GH#30588 multi-dimensional indexing deprecated + new_index = index[None, :] + assert new_index.ndim == 2 + assert isinstance(new_index, np.ndarray) + + def test_constructor_regular(self, index): + tm.assert_contains_all(index, index) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_constructor_casting(self, index): + # casting + arr = np.array(index) + new_index = Index(arr) + tm.assert_contains_all(arr, new_index) + tm.assert_index_equal(index, new_index) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_constructor_copy(self, index): + arr = np.array(index) + new_index = Index(arr, copy=True, name="name") + assert isinstance(new_index, Index) + assert new_index.name == "name" + tm.assert_numpy_array_equal(arr, new_index.values) + arr[0] = "SOMEBIGLONGSTRING" + assert new_index[0] != "SOMEBIGLONGSTRING" + + @pytest.mark.parametrize("cast_as_obj", [True, False]) + @pytest.mark.parametrize( + "index", + [ + date_range( + "2015-01-01 10:00", + freq="D", + periods=3, + tz="US/Eastern", + name="Green Eggs & Ham", + ), # DTI with tz + date_range("2015-01-01 10:00", freq="D", periods=3), # DTI no tz + pd.timedelta_range("1 days", freq="D", periods=3), # td + period_range("2015-01-01", freq="D", periods=3), # period + ], + ) + def test_constructor_from_index_dtlike(self, cast_as_obj, index): + if cast_as_obj: + result = Index(index.astype(object)) + else: + result = Index(index) + + tm.assert_index_equal(result, index) + + if isinstance(index, DatetimeIndex): + assert result.tz == index.tz + if cast_as_obj: + # GH#23524 check that Index(dti, dtype=object) does not + # incorrectly raise ValueError, and that nanoseconds are not + # dropped + index += pd.Timedelta(nanoseconds=50) + result = Index(index, dtype=object) + assert result.dtype == np.object_ + assert list(result) == list(index) + + @pytest.mark.parametrize( + "index,has_tz", + [ + ( + date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"), + True, + ), # datetimetz + (pd.timedelta_range("1 days", freq="D", periods=3), False), # td + (period_range("2015-01-01", freq="D", periods=3), False), # period + ], + ) + def test_constructor_from_series_dtlike(self, index, has_tz): + result = Index(Series(index)) + tm.assert_index_equal(result, index) + + if has_tz: + assert result.tz == index.tz + + def test_constructor_from_series_freq(self): + # GH 6273 + # create from a series, passing a freq + dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"] + expected = DatetimeIndex(dts, freq="MS") + + s = Series(pd.to_datetime(dts)) + result = DatetimeIndex(s, freq="MS") + + tm.assert_index_equal(result, expected) + + def test_constructor_from_frame_series_freq(self): + # GH 6273 + # create from a series, passing a freq + dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"] + expected = DatetimeIndex(dts, freq="MS") + + df = DataFrame(np.random.rand(5, 3)) + df["date"] = dts + result = DatetimeIndex(df["date"], freq="MS") + + assert df["date"].dtype == object + expected.name = "date" + tm.assert_index_equal(result, expected) + + expected = Series(dts, name="date") + tm.assert_series_equal(df["date"], expected) + + # GH 6274 + # infer freq of same + freq = pd.infer_freq(df["date"]) + assert freq == "MS" + + def test_constructor_int_dtype_nan(self): + # see gh-15187 + data = [np.nan] + expected = Float64Index(data) + result = Index(data, dtype="float") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "klass,dtype,na_val", + [ + (Float64Index, np.float64, np.nan), + (DatetimeIndex, "datetime64[ns]", pd.NaT), + ], + ) + def test_index_ctor_infer_nan_nat(self, klass, dtype, na_val): + # GH 13467 + na_list = [na_val, na_val] + expected = klass(na_list) + assert expected.dtype == dtype + + result = Index(na_list) + tm.assert_index_equal(result, expected) + + result = Index(np.array(na_list)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "vals,dtype", + [ + ([1, 2, 3, 4, 5], "int"), + ([1.1, np.nan, 2.2, 3.0], "float"), + (["A", "B", "C", np.nan], "obj"), + ], + ) + def test_constructor_simple_new(self, vals, dtype): + index = Index(vals, name=dtype) + result = index._simple_new(index.values, dtype) + tm.assert_index_equal(result, index) + + @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") + @pytest.mark.parametrize("attr", ["values", "asi8"]) + @pytest.mark.parametrize("klass", [Index, DatetimeIndex]) + def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass): + # Test constructing with a datetimetz dtype + # .values produces numpy datetimes, so these are considered naive + # .asi8 produces integers, so these are considered epoch timestamps + # ^the above will be true in a later version. Right now we `.view` + # the i8 values as NS_DTYPE, effectively treating them as wall times. + index = date_range("2011-01-01", periods=5) + arg = getattr(index, attr) + index = index.tz_localize(tz_naive_fixture) + dtype = index.dtype + + warn = None if tz_naive_fixture is None else FutureWarning + # astype dt64 -> dt64tz deprecated + + if attr == "asi8": + result = DatetimeIndex(arg).tz_localize(tz_naive_fixture) + else: + result = klass(arg, tz=tz_naive_fixture) + tm.assert_index_equal(result, index) + + if attr == "asi8": + with tm.assert_produces_warning(warn): + result = DatetimeIndex(arg).astype(dtype) + else: + result = klass(arg, dtype=dtype) + tm.assert_index_equal(result, index) + + if attr == "asi8": + result = DatetimeIndex(list(arg)).tz_localize(tz_naive_fixture) + else: + result = klass(list(arg), tz=tz_naive_fixture) + tm.assert_index_equal(result, index) + + if attr == "asi8": + with tm.assert_produces_warning(warn): + result = DatetimeIndex(list(arg)).astype(dtype) + else: + result = klass(list(arg), dtype=dtype) + tm.assert_index_equal(result, index) + + @pytest.mark.parametrize("attr", ["values", "asi8"]) + @pytest.mark.parametrize("klass", [Index, TimedeltaIndex]) + def test_constructor_dtypes_timedelta(self, attr, klass): + index = pd.timedelta_range("1 days", periods=5) + index = index._with_freq(None) # won't be preserved by constructors + dtype = index.dtype + + values = getattr(index, attr) + + result = klass(values, dtype=dtype) + tm.assert_index_equal(result, index) + + result = klass(list(values), dtype=dtype) + tm.assert_index_equal(result, index) + + @pytest.mark.parametrize("value", [[], iter([]), (_ for _ in [])]) + @pytest.mark.parametrize( + "klass", + [ + Index, + Float64Index, + Int64Index, + UInt64Index, + CategoricalIndex, + DatetimeIndex, + TimedeltaIndex, + ], + ) + def test_constructor_empty(self, value, klass): + empty = klass(value) + assert isinstance(empty, klass) + assert not len(empty) + + @pytest.mark.parametrize( + "empty,klass", + [ + (PeriodIndex([], freq="B"), PeriodIndex), + (PeriodIndex(iter([]), freq="B"), PeriodIndex), + (PeriodIndex((_ for _ in []), freq="B"), PeriodIndex), + (RangeIndex(step=1), RangeIndex), + (MultiIndex(levels=[[1, 2], ["blue", "red"]], codes=[[], []]), MultiIndex), + ], + ) + def test_constructor_empty_special(self, empty, klass): + assert isinstance(empty, klass) + assert not len(empty) + + @pytest.mark.parametrize( + "index", + [ + "datetime", + "float", + "int", + "period", + "range", + "repeats", + "timedelta", + "tuples", + "uint", + ], + indirect=True, + ) + def test_view_with_args(self, index): + index.view("i8") + + @pytest.mark.parametrize( + "index", + [ + "unicode", + "string", + pytest.param("categorical", marks=pytest.mark.xfail(reason="gh-25464")), + "bool", + "empty", + ], + indirect=True, + ) + def test_view_with_args_object_array_raises(self, index): + msg = "Cannot change data-type for object array" + with pytest.raises(TypeError, match=msg): + index.view("i8") + + @pytest.mark.parametrize("index", ["int", "range"], indirect=True) + def test_astype(self, index): + casted = index.astype("i8") + + # it works! + casted.get_loc(5) + + # pass on name + index.name = "foobar" + casted = index.astype("i8") + assert casted.name == "foobar" + + def test_equals_object(self): + # same + assert Index(["a", "b", "c"]).equals(Index(["a", "b", "c"])) + + @pytest.mark.parametrize( + "comp", [Index(["a", "b"]), Index(["a", "b", "d"]), ["a", "b", "c"]] + ) + def test_not_equals_object(self, comp): + assert not Index(["a", "b", "c"]).equals(comp) + + def test_identical(self): + + # index + i1 = Index(["a", "b", "c"]) + i2 = Index(["a", "b", "c"]) + + assert i1.identical(i2) + + i1 = i1.rename("foo") + assert i1.equals(i2) + assert not i1.identical(i2) + + i2 = i2.rename("foo") + assert i1.identical(i2) + + i3 = Index([("a", "a"), ("a", "b"), ("b", "a")]) + i4 = Index([("a", "a"), ("a", "b"), ("b", "a")], tupleize_cols=False) + assert not i3.identical(i4) + + def test_is_(self): + ind = Index(range(10)) + assert ind.is_(ind) + assert ind.is_(ind.view().view().view().view()) + assert not ind.is_(Index(range(10))) + assert not ind.is_(ind.copy()) + assert not ind.is_(ind.copy(deep=False)) + assert not ind.is_(ind[:]) + assert not ind.is_(np.array(range(10))) + + # quasi-implementation dependent + assert ind.is_(ind.view()) + ind2 = ind.view() + ind2.name = "bob" + assert ind.is_(ind2) + assert ind2.is_(ind) + # doesn't matter if Indices are *actually* views of underlying data, + assert not ind.is_(Index(ind.values)) + arr = np.array(range(1, 11)) + ind1 = Index(arr, copy=False) + ind2 = Index(arr, copy=False) + assert not ind1.is_(ind2) + + def test_asof_numeric_vs_bool_raises(self): + left = Index([1, 2, 3]) + right = Index([True, False]) + + msg = "Cannot compare dtypes int64 and object" + with pytest.raises(TypeError, match=msg): + left.asof(right[0]) + # TODO: should right.asof(left[0]) also raise? + + with pytest.raises(InvalidIndexError, match=re.escape(str(right))): + left.asof(right) + + with pytest.raises(InvalidIndexError, match=re.escape(str(left))): + right.asof(left) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_booleanindex(self, index): + bool_index = np.ones(len(index), dtype=bool) + bool_index[5:30:2] = False + + sub_index = index[bool_index] + + for i, val in enumerate(sub_index): + assert sub_index.get_loc(val) == i + + sub_index = index[list(bool_index)] + for i, val in enumerate(sub_index): + assert sub_index.get_loc(val) == i + + def test_fancy(self, simple_index): + index = simple_index + sl = index[[1, 2, 3]] + for i in sl: + assert i == sl[sl.get_loc(i)] + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + @pytest.mark.parametrize("dtype", [np.int_, np.bool_]) + def test_empty_fancy(self, index, dtype): + empty_arr = np.array([], dtype=dtype) + empty_index = type(index)([]) + + assert index[[]].identical(empty_index) + assert index[empty_arr].identical(empty_index) + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + def test_empty_fancy_raises(self, index): + # DatetimeIndex is excluded, because it overrides getitem and should + # be tested separately. + empty_farr = np.array([], dtype=np.float_) + empty_index = type(index)([]) + + assert index[[]].identical(empty_index) + # np.ndarray only accepts ndarray of int & bool dtypes, so should Index + msg = r"arrays used as indices must be of integer \(or boolean\) type" + with pytest.raises(IndexError, match=msg): + index[empty_farr] + + def test_union_dt_as_obj(self, sort, simple_index): + # TODO: Replace with fixturesult + index = simple_index + date_index = date_range("2019-01-01", periods=10) + first_cat = index.union(date_index) + second_cat = index.union(index) + + appended = np.append(index, date_index.astype("O")) + + assert tm.equalContents(first_cat, appended) + assert tm.equalContents(second_cat, index) + tm.assert_contains_all(index, first_cat) + tm.assert_contains_all(index, second_cat) + tm.assert_contains_all(date_index, first_cat) + + def test_map_with_tuples(self): + # GH 12766 + + # Test that returning a single tuple from an Index + # returns an Index. + index = tm.makeIntIndex(3) + result = tm.makeIntIndex(3).map(lambda x: (x,)) + expected = Index([(i,) for i in index]) + tm.assert_index_equal(result, expected) + + # Test that returning a tuple from a map of a single index + # returns a MultiIndex object. + result = index.map(lambda x: (x, x == 1)) + expected = MultiIndex.from_tuples([(i, i == 1) for i in index]) + tm.assert_index_equal(result, expected) + + def test_map_with_tuples_mi(self): + # Test that returning a single object from a MultiIndex + # returns an Index. + first_level = ["foo", "bar", "baz"] + multi_index = MultiIndex.from_tuples(zip(first_level, [1, 2, 3])) + reduced_index = multi_index.map(lambda x: x[0]) + tm.assert_index_equal(reduced_index, Index(first_level)) + + @pytest.mark.parametrize( + "attr", ["makeDateIndex", "makePeriodIndex", "makeTimedeltaIndex"] + ) + def test_map_tseries_indices_return_index(self, attr): + index = getattr(tm, attr)(10) + expected = Index([1] * 10) + result = index.map(lambda x: 1) + tm.assert_index_equal(expected, result) + + def test_map_tseries_indices_accsr_return_index(self): + date_index = tm.makeDateIndex(24, freq="h", name="hourly") + expected = Int64Index(range(24), name="hourly") + tm.assert_index_equal(expected, date_index.map(lambda x: x.hour), exact=True) + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: Series(values, index), + ], + ) + def test_map_dictlike_simple(self, mapper): + # GH 12756 + expected = Index(["foo", "bar", "baz"]) + index = tm.makeIntIndex(3) + result = index.map(mapper(expected.values, index)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: Series(values, index), + ], + ) + def test_map_dictlike(self, index, mapper): + # GH 12756 + if isinstance(index, CategoricalIndex): + # Tested in test_categorical + return + elif not index.is_unique: + # Cannot map duplicated index + return + + rng = np.arange(len(index), 0, -1) + + if index.empty: + # to match proper result coercion for uints + expected = Index([]) + elif index._is_backward_compat_public_numeric_index: + expected = index._constructor(rng, dtype=index.dtype) + elif type(index) is Index and index.dtype != object: + # i.e. EA-backed, for now just Nullable + expected = Index(rng, dtype=index.dtype) + elif index.dtype.kind == "u": + expected = Index(rng, dtype=index.dtype) + else: + expected = Index(rng) + + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "mapper", + [Series(["foo", 2.0, "baz"], index=[0, 2, -1]), {0: "foo", 2: 2.0, -1: "baz"}], + ) + def test_map_with_non_function_missing_values(self, mapper): + # GH 12756 + expected = Index([2.0, np.nan, "foo"]) + result = Index([2, 1, 0]).map(mapper) + + tm.assert_index_equal(expected, result) + + def test_map_na_exclusion(self): + index = Index([1.5, np.nan, 3, np.nan, 5]) + + result = index.map(lambda x: x * 2, na_action="ignore") + expected = index * 2 + tm.assert_index_equal(result, expected) + + def test_map_defaultdict(self): + index = Index([1, 2, 3]) + default_dict = defaultdict(lambda: "blank") + default_dict[1] = "stuff" + result = index.map(default_dict) + expected = Index(["stuff", "blank", "blank"]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("name,expected", [("foo", "foo"), ("bar", None)]) + def test_append_empty_preserve_name(self, name, expected): + left = Index([], name="foo") + right = Index([1, 2, 3], name=name) + + result = left.append(right) + assert result.name == expected + + @pytest.mark.parametrize( + "index, expected", + [ + ("string", False), + ("bool", False), + ("categorical", False), + ("int", True), + ("datetime", False), + ("float", True), + ], + indirect=["index"], + ) + def test_is_numeric(self, index, expected): + assert index.is_numeric() is expected + + @pytest.mark.parametrize( + "index, expected", + [ + ("string", True), + ("bool", True), + ("categorical", False), + ("int", False), + ("datetime", False), + ("float", False), + ], + indirect=["index"], + ) + def test_is_object(self, index, expected): + assert index.is_object() is expected + + @pytest.mark.parametrize( + "index, expected", + [ + ("string", False), + ("bool", False), + ("categorical", False), + ("int", False), + ("datetime", True), + ("float", False), + ], + indirect=["index"], + ) + def test_is_all_dates(self, index, expected): + with tm.assert_produces_warning(FutureWarning): + assert index.is_all_dates is expected + + def test_summary(self, index): + index._summary() + + def test_format_bug(self): + # GH 14626 + # windows has different precision on datetime.datetime.now (it doesn't + # include us since the default for Timestamp shows these but Index + # formatting does not we are skipping) + now = datetime.now() + if not str(now).endswith("000"): + index = Index([now]) + formatted = index.format() + expected = [str(index[0])] + assert formatted == expected + + Index([]).format() + + @pytest.mark.parametrize("vals", [[1, 2.0 + 3.0j, 4.0], ["a", "b", "c"]]) + def test_format_missing(self, vals, nulls_fixture): + # 2845 + vals = list(vals) # Copy for each iteration + vals.append(nulls_fixture) + index = Index(vals) + + formatted = index.format() + null_repr = "NaN" if isinstance(nulls_fixture, float) else str(nulls_fixture) + expected = [str(index[0]), str(index[1]), str(index[2]), null_repr] + + assert formatted == expected + assert index[3] is nulls_fixture + + @pytest.mark.parametrize("op", ["any", "all"]) + def test_logical_compat(self, op, simple_index): + index = simple_index + assert getattr(index, op)() == getattr(index.values, op)() + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + def test_drop_by_str_label(self, index): + n = len(index) + drop = index[list(range(5, 10))] + dropped = index.drop(drop) + + expected = index[list(range(5)) + list(range(10, n))] + tm.assert_index_equal(dropped, expected) + + dropped = index.drop(index[0]) + expected = index[1:] + tm.assert_index_equal(dropped, expected) + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + @pytest.mark.parametrize("keys", [["foo", "bar"], ["1", "bar"]]) + def test_drop_by_str_label_raises_missing_keys(self, index, keys): + with pytest.raises(KeyError, match=""): + index.drop(keys) + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + def test_drop_by_str_label_errors_ignore(self, index): + n = len(index) + drop = index[list(range(5, 10))] + mixed = drop.tolist() + ["foo"] + dropped = index.drop(mixed, errors="ignore") + + expected = index[list(range(5)) + list(range(10, n))] + tm.assert_index_equal(dropped, expected) + + dropped = index.drop(["foo", "bar"], errors="ignore") + expected = index[list(range(n))] + tm.assert_index_equal(dropped, expected) + + def test_drop_by_numeric_label_loc(self): + # TODO: Parametrize numeric and str tests after self.strIndex fixture + index = Index([1, 2, 3]) + dropped = index.drop(1) + expected = Index([2, 3]) + + tm.assert_index_equal(dropped, expected) + + def test_drop_by_numeric_label_raises_missing_keys(self): + index = Index([1, 2, 3]) + with pytest.raises(KeyError, match=""): + index.drop([3, 4]) + + @pytest.mark.parametrize( + "key,expected", [(4, Index([1, 2, 3])), ([3, 4, 5], Index([1, 2]))] + ) + def test_drop_by_numeric_label_errors_ignore(self, key, expected): + index = Index([1, 2, 3]) + dropped = index.drop(key, errors="ignore") + + tm.assert_index_equal(dropped, expected) + + @pytest.mark.parametrize( + "values", + [["a", "b", ("c", "d")], ["a", ("c", "d"), "b"], [("c", "d"), "a", "b"]], + ) + @pytest.mark.parametrize("to_drop", [[("c", "d"), "a"], ["a", ("c", "d")]]) + def test_drop_tuple(self, values, to_drop): + # GH 18304 + index = Index(values) + expected = Index(["b"]) + + result = index.drop(to_drop) + tm.assert_index_equal(result, expected) + + removed = index.drop(to_drop[0]) + for drop_me in to_drop[1], [to_drop[1]]: + result = removed.drop(drop_me) + tm.assert_index_equal(result, expected) + + removed = index.drop(to_drop[1]) + msg = fr"\"\[{re.escape(to_drop[1].__repr__())}\] not found in axis\"" + for drop_me in to_drop[1], [to_drop[1]]: + with pytest.raises(KeyError, match=msg): + removed.drop(drop_me) + + def test_drop_with_duplicates_in_index(self, index): + # GH38051 + if len(index) == 0 or isinstance(index, MultiIndex): + return + if isinstance(index, IntervalIndex) and not IS64: + pytest.skip("Cannot test IntervalIndex with int64 dtype on 32 bit platform") + index = index.unique().repeat(2) + expected = index[2:] + result = index.drop(index[0]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "attr", + [ + "is_monotonic_increasing", + "is_monotonic_decreasing", + "_is_strictly_monotonic_increasing", + "_is_strictly_monotonic_decreasing", + ], + ) + def test_is_monotonic_incomparable(self, attr): + index = Index([5, datetime.now(), 7]) + assert not getattr(index, attr) + + def test_set_value_deprecated(self, simple_index): + # GH 28621 + idx = simple_index + arr = np.array([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + idx.set_value(arr, idx[1], 80) + assert arr[1] == 80 + + @pytest.mark.parametrize("values", [["foo", "bar", "quux"], {"foo", "bar", "quux"}]) + @pytest.mark.parametrize( + "index,expected", + [ + (Index(["qux", "baz", "foo", "bar"]), np.array([False, False, True, True])), + (Index([]), np.array([], dtype=bool)), # empty + ], + ) + def test_isin(self, values, index, expected): + result = index.isin(values) + tm.assert_numpy_array_equal(result, expected) + + def test_isin_nan_common_object(self, request, nulls_fixture, nulls_fixture2): + # Test cartesian product of null fixtures and ensure that we don't + # mangle the various types (save a corner case with PyPy) + + # all nans are the same + if ( + isinstance(nulls_fixture, float) + and isinstance(nulls_fixture2, float) + and math.isnan(nulls_fixture) + and math.isnan(nulls_fixture2) + ): + tm.assert_numpy_array_equal( + Index(["a", nulls_fixture]).isin([nulls_fixture2]), + np.array([False, True]), + ) + + elif nulls_fixture is nulls_fixture2: # should preserve NA type + tm.assert_numpy_array_equal( + Index(["a", nulls_fixture]).isin([nulls_fixture2]), + np.array([False, True]), + ) + + else: + tm.assert_numpy_array_equal( + Index(["a", nulls_fixture]).isin([nulls_fixture2]), + np.array([False, False]), + ) + + def test_isin_nan_common_float64(self, request, nulls_fixture): + + if nulls_fixture is pd.NaT or nulls_fixture is pd.NA: + # Check 1) that we cannot construct a Float64Index with this value + # and 2) that with an NaN we do not have .isin(nulls_fixture) + msg = "data is not compatible with Float64Index" + with pytest.raises(ValueError, match=msg): + Float64Index([1.0, nulls_fixture]) + + idx = Float64Index([1.0, np.nan]) + assert not idx.isin([nulls_fixture]).any() + return + + idx = Float64Index([1.0, nulls_fixture]) + res = idx.isin([np.nan]) + tm.assert_numpy_array_equal(res, np.array([False, True])) + + # we cannot compare NaT with NaN + res = idx.isin([pd.NaT]) + tm.assert_numpy_array_equal(res, np.array([False, False])) + + @pytest.mark.parametrize("level", [0, -1]) + @pytest.mark.parametrize( + "index", + [ + Index(["qux", "baz", "foo", "bar"]), + # Float64Index overrides isin, so must be checked separately + Float64Index([1.0, 2.0, 3.0, 4.0]), + ], + ) + def test_isin_level_kwarg(self, level, index): + values = index.tolist()[-2:] + ["nonexisting"] + + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(expected, index.isin(values, level=level)) + + index.name = "foobar" + tm.assert_numpy_array_equal(expected, index.isin(values, level="foobar")) + + def test_isin_level_kwarg_bad_level_raises(self, index): + for level in [10, index.nlevels, -(index.nlevels + 1)]: + with pytest.raises(IndexError, match="Too many levels"): + index.isin([], level=level) + + @pytest.mark.parametrize("label", [1.0, "foobar", "xyzzy", np.nan]) + def test_isin_level_kwarg_bad_label_raises(self, label, index): + if isinstance(index, MultiIndex): + index = index.rename(["foo", "bar"] + index.names[2:]) + msg = f"'Level {label} not found'" + else: + index = index.rename("foo") + msg = fr"Requested level \({label}\) does not match index name \(foo\)" + with pytest.raises(KeyError, match=msg): + index.isin([], level=label) + + @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) + def test_isin_empty(self, empty): + # see gh-16991 + index = Index(["a", "b"]) + expected = np.array([False, False]) + + result = index.isin(empty) + tm.assert_numpy_array_equal(expected, result) + + @pytest.mark.parametrize( + "values", + [ + [1, 2, 3, 4], + [1.0, 2.0, 3.0, 4.0], + [True, True, True, True], + ["foo", "bar", "baz", "qux"], + date_range("2018-01-01", freq="D", periods=4), + ], + ) + def test_boolean_cmp(self, values): + index = Index(values) + result = index == values + expected = np.array([True, True, True, True], dtype=bool) + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + @pytest.mark.parametrize("name,level", [(None, 0), ("a", "a")]) + def test_get_level_values(self, index, name, level): + expected = index.copy() + if name: + expected.name = name + + result = expected.get_level_values(level) + tm.assert_index_equal(result, expected) + + def test_slice_keep_name(self): + index = Index(["a", "b"], name="asdf") + assert index.name == index[1:].name + + @pytest.mark.parametrize( + "index", + ["unicode", "string", "datetime", "int", "uint", "float"], + indirect=True, + ) + def test_join_self(self, index, join_type): + joined = index.join(index, how=join_type) + assert index is joined + + @pytest.mark.parametrize("method", ["strip", "rstrip", "lstrip"]) + def test_str_attribute(self, method): + # GH9068 + index = Index([" jack", "jill ", " jesse ", "frank"]) + expected = Index([getattr(str, method)(x) for x in index.values]) + + result = getattr(index.str, method)() + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "index", + [ + Index(range(5)), + tm.makeDateIndex(10), + MultiIndex.from_tuples([("foo", "1"), ("bar", "3")]), + period_range(start="2000", end="2010", freq="A"), + ], + ) + def test_str_attribute_raises(self, index): + with pytest.raises(AttributeError, match="only use .str accessor"): + index.str.repeat(2) + + @pytest.mark.parametrize( + "expand,expected", + [ + (None, Index([["a", "b", "c"], ["d", "e"], ["f"]])), + (False, Index([["a", "b", "c"], ["d", "e"], ["f"]])), + ( + True, + MultiIndex.from_tuples( + [("a", "b", "c"), ("d", "e", np.nan), ("f", np.nan, np.nan)] + ), + ), + ], + ) + def test_str_split(self, expand, expected): + index = Index(["a b c", "d e", "f"]) + if expand is not None: + result = index.str.split(expand=expand) + else: + result = index.str.split() + + tm.assert_index_equal(result, expected) + + def test_str_bool_return(self): + # test boolean case, should return np.array instead of boolean Index + index = Index(["a1", "a2", "b1", "b2"]) + result = index.str.startswith("a") + expected = np.array([True, True, False, False]) + + tm.assert_numpy_array_equal(result, expected) + assert isinstance(result, np.ndarray) + + def test_str_bool_series_indexing(self): + index = Index(["a1", "a2", "b1", "b2"]) + s = Series(range(4), index=index) + + result = s[s.index.str.startswith("a")] + expected = Series(range(2), index=["a1", "a2"]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "index,expected", [(Index(list("abcd")), True), (Index(range(4)), False)] + ) + def test_tab_completion(self, index, expected): + # GH 9910 + result = "str" in dir(index) + assert result == expected + + def test_indexing_doesnt_change_class(self): + index = Index([1, 2, 3, "a", "b", "c"]) + + assert index[1:3].identical(Index([2, 3], dtype=np.object_)) + assert index[[0, 1]].identical(Index([1, 2], dtype=np.object_)) + + def test_outer_join_sort(self): + left_index = Index(np.random.permutation(15)) + right_index = tm.makeDateIndex(10) + + with tm.assert_produces_warning(RuntimeWarning): + result = left_index.join(right_index, how="outer") + + # right_index in this case because DatetimeIndex has join precedence + # over Int64Index + with tm.assert_produces_warning(RuntimeWarning): + expected = right_index.astype(object).union(left_index.astype(object)) + + tm.assert_index_equal(result, expected) + + def test_take_fill_value(self): + # GH 12631 + index = Index(list("ABC"), name="xxx") + result = index.take(np.array([1, 0, -1])) + expected = Index(list("BAC"), name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + result = index.take(np.array([1, 0, -1]), fill_value=True) + expected = Index(["B", "A", np.nan], name="xxx") + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = index.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = Index(["B", "A", "C"], name="xxx") + tm.assert_index_equal(result, expected) + + def test_take_fill_value_none_raises(self): + index = Index(list("ABC"), name="xxx") + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + + with pytest.raises(ValueError, match=msg): + index.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + index.take(np.array([1, 0, -5]), fill_value=True) + + def test_take_bad_bounds_raises(self): + index = Index(list("ABC"), name="xxx") + with pytest.raises(IndexError, match="out of bounds"): + index.take(np.array([1, -5])) + + @pytest.mark.parametrize("name", [None, "foobar"]) + @pytest.mark.parametrize( + "labels", + [ + [], + np.array([]), + ["A", "B", "C"], + ["C", "B", "A"], + np.array(["A", "B", "C"]), + np.array(["C", "B", "A"]), + # Must preserve name even if dtype changes + date_range("20130101", periods=3).values, + date_range("20130101", periods=3).tolist(), + ], + ) + def test_reindex_preserves_name_if_target_is_list_or_ndarray(self, name, labels): + # GH6552 + index = Index([0, 1, 2]) + index.name = name + assert index.reindex(labels)[0].name == name + + @pytest.mark.parametrize("labels", [[], np.array([]), np.array([], dtype=np.int64)]) + def test_reindex_preserves_type_if_target_is_empty_list_or_array(self, labels): + # GH7774 + index = Index(list("abc")) + assert index.reindex(labels)[0].dtype.type == np.object_ + + @pytest.mark.parametrize( + "labels,dtype", + [ + (Int64Index([]), np.int64), + (Float64Index([]), np.float64), + (DatetimeIndex([]), np.datetime64), + ], + ) + def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self, labels, dtype): + # GH7774 + index = Index(list("abc")) + assert index.reindex(labels)[0].dtype.type == dtype + + def test_reindex_no_type_preserve_target_empty_mi(self): + index = Index(list("abc")) + result = index.reindex( + MultiIndex([Int64Index([]), Float64Index([])], [[], []]) + )[0] + assert result.levels[0].dtype.type == np.int64 + assert result.levels[1].dtype.type == np.float64 + + def test_groupby(self): + index = Index(range(5)) + result = index.groupby(np.array([1, 1, 2, 2, 2])) + expected = {1: Index([0, 1]), 2: Index([2, 3, 4])} + + tm.assert_dict_equal(result, expected) + + @pytest.mark.parametrize( + "mi,expected", + [ + (MultiIndex.from_tuples([(1, 2), (4, 5)]), np.array([True, True])), + (MultiIndex.from_tuples([(1, 2), (4, 6)]), np.array([True, False])), + ], + ) + def test_equals_op_multiindex(self, mi, expected): + # GH9785 + # test comparisons of multiindex + df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) + + result = df.index == mi + tm.assert_numpy_array_equal(result, expected) + + def test_equals_op_multiindex_identify(self): + df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) + + result = df.index == df.index + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "index", + [ + MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]), + Index(["foo", "bar", "baz"]), + ], + ) + def test_equals_op_mismatched_multiindex_raises(self, index): + df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) + + with pytest.raises(ValueError, match="Lengths must match"): + df.index == index + + def test_equals_op_index_vs_mi_same_length(self): + mi = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]) + index = Index(["foo", "bar", "baz"]) + + result = mi == index + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dt_conv", [pd.to_datetime, pd.to_timedelta]) + def test_dt_conversion_preserves_name(self, dt_conv): + # GH 10875 + index = Index(["01:02:03", "01:02:04"], name="label") + assert index.name == dt_conv(index).name + + def test_cached_properties_not_settable(self): + index = Index([1, 2, 3]) + with pytest.raises(AttributeError, match="Can't set attribute"): + index.is_unique = False + + @async_mark() + async def test_tab_complete_warning(self, ip): + # https://github.com/pandas-dev/pandas/issues/16409 + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.completer import provisionalcompleter + + code = "import pandas as pd; idx = pd.Index([1, 2])" + await ip.run_code(code) + + # GH 31324 newer jedi version raises Deprecation warning; + # appears resolved 2021-02-02 + with tm.assert_produces_warning(None): + with provisionalcompleter("ignore"): + list(ip.Completer.completions("idx.", 4)) + + def test_contains_method_removed(self, index): + # GH#30103 method removed for all types except IntervalIndex + if isinstance(index, IntervalIndex): + index.contains(1) + else: + msg = f"'{type(index).__name__}' object has no attribute 'contains'" + with pytest.raises(AttributeError, match=msg): + index.contains(1) + + def test_sortlevel(self): + index = Index([5, 4, 3, 2, 1]) + with pytest.raises(Exception, match="ascending must be a single bool value or"): + index.sortlevel(ascending="True") + + with pytest.raises( + Exception, match="ascending must be a list of bool values of length 1" + ): + index.sortlevel(ascending=[True, True]) + + with pytest.raises(Exception, match="ascending must be a bool value"): + index.sortlevel(ascending=["True"]) + + expected = Index([1, 2, 3, 4, 5]) + result = index.sortlevel(ascending=[True]) + tm.assert_index_equal(result[0], expected) + + expected = Index([1, 2, 3, 4, 5]) + result = index.sortlevel(ascending=True) + tm.assert_index_equal(result[0], expected) + + expected = Index([5, 4, 3, 2, 1]) + result = index.sortlevel(ascending=False) + tm.assert_index_equal(result[0], expected) + + +class TestMixedIntIndex(Base): + # Mostly the tests from common.py for which the results differ + # in py2 and py3 because ints and strings are uncomparable in py3 + # (GH 13514) + _index_cls = Index + + @pytest.fixture + def simple_index(self) -> Index: + return self._index_cls([0, "a", 1, "b", 2, "c"]) + + @pytest.fixture(params=[[0, "a", 1, "b", 2, "c"]], ids=["mixedIndex"]) + def index(self, request): + return Index(request.param) + + def test_argsort(self, simple_index): + index = simple_index + with pytest.raises(TypeError, match="'>|<' not supported"): + index.argsort() + + def test_numpy_argsort(self, simple_index): + index = simple_index + with pytest.raises(TypeError, match="'>|<' not supported"): + np.argsort(index) + + def test_copy_name(self, simple_index): + # Check that "name" argument passed at initialization is honoured + # GH12309 + index = simple_index + + first = type(index)(index, copy=True, name="mario") + second = type(first)(first, copy=False) + + # Even though "copy=False", we want a new object. + assert first is not second + tm.assert_index_equal(first, second) + + assert first.name == "mario" + assert second.name == "mario" + + s1 = Series(2, index=first) + s2 = Series(3, index=second[:-1]) + + s3 = s1 * s2 + + assert s3.index.name == "mario" + + def test_copy_name2(self): + # Check that adding a "name" parameter to the copy is honored + # GH14302 + index = Index([1, 2], name="MyName") + index1 = index.copy() + + tm.assert_index_equal(index, index1) + + index2 = index.copy(name="NewName") + tm.assert_index_equal(index, index2, check_names=False) + assert index.name == "MyName" + assert index2.name == "NewName" + + with tm.assert_produces_warning(FutureWarning): + index3 = index.copy(names=["NewName"]) + tm.assert_index_equal(index, index3, check_names=False) + assert index.name == "MyName" + assert index.names == ["MyName"] + assert index3.name == "NewName" + assert index3.names == ["NewName"] + + def test_copy_names_deprecated(self, simple_index): + # GH44916 + with tm.assert_produces_warning(FutureWarning): + simple_index.copy(names=["a"]) + + def test_unique_na(self): + idx = Index([2, np.nan, 2, 1], name="my_index") + expected = Index([2, np.nan, 1], name="my_index") + result = idx.unique() + tm.assert_index_equal(result, expected) + + def test_logical_compat(self, simple_index): + index = simple_index + assert index.all() == index.values.all() + assert index.any() == index.values.any() + + @pytest.mark.parametrize("how", ["any", "all"]) + @pytest.mark.parametrize("dtype", [None, object, "category"]) + @pytest.mark.parametrize( + "vals,expected", + [ + ([1, 2, 3], [1, 2, 3]), + ([1.0, 2.0, 3.0], [1.0, 2.0, 3.0]), + ([1.0, 2.0, np.nan, 3.0], [1.0, 2.0, 3.0]), + (["A", "B", "C"], ["A", "B", "C"]), + (["A", np.nan, "B", "C"], ["A", "B", "C"]), + ], + ) + def test_dropna(self, how, dtype, vals, expected): + # GH 6194 + index = Index(vals, dtype=dtype) + result = index.dropna(how=how) + expected = Index(expected, dtype=dtype) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("how", ["any", "all"]) + @pytest.mark.parametrize( + "index,expected", + [ + ( + DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]), + DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]), + ), + ( + DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03", pd.NaT]), + DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]), + ), + ( + TimedeltaIndex(["1 days", "2 days", "3 days"]), + TimedeltaIndex(["1 days", "2 days", "3 days"]), + ), + ( + TimedeltaIndex([pd.NaT, "1 days", "2 days", "3 days", pd.NaT]), + TimedeltaIndex(["1 days", "2 days", "3 days"]), + ), + ( + PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"), + PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"), + ), + ( + PeriodIndex(["2012-02", "2012-04", "NaT", "2012-05"], freq="M"), + PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"), + ), + ], + ) + def test_dropna_dt_like(self, how, index, expected): + result = index.dropna(how=how) + tm.assert_index_equal(result, expected) + + def test_dropna_invalid_how_raises(self): + msg = "invalid how option: xxx" + with pytest.raises(ValueError, match=msg): + Index([1, 2, 3]).dropna(how="xxx") + + @pytest.mark.parametrize( + "index", + [ + Index([np.nan]), + Index([np.nan, 1]), + Index([1, 2, np.nan]), + Index(["a", "b", np.nan]), + pd.to_datetime(["NaT"]), + pd.to_datetime(["NaT", "2000-01-01"]), + pd.to_datetime(["2000-01-01", "NaT", "2000-01-02"]), + pd.to_timedelta(["1 day", "NaT"]), + ], + ) + def test_is_monotonic_na(self, index): + assert index.is_monotonic_increasing is False + assert index.is_monotonic_decreasing is False + assert index._is_strictly_monotonic_increasing is False + assert index._is_strictly_monotonic_decreasing is False + + def test_int_name_format(self, frame_or_series): + index = Index(["a", "b", "c"], name=0) + result = frame_or_series(list(range(3)), index=index) + assert "0" in repr(result) + + def test_str_to_bytes_raises(self): + # GH 26447 + index = Index([str(x) for x in range(10)]) + msg = "^'str' object cannot be interpreted as an integer$" + with pytest.raises(TypeError, match=msg): + bytes(index) + + @pytest.mark.filterwarnings("ignore:elementwise comparison failed:FutureWarning") + def test_index_with_tuple_bool(self): + # GH34123 + # TODO: also this op right now produces FutureWarning from numpy + # https://github.com/numpy/numpy/issues/11521 + idx = Index([("a", "b"), ("b", "c"), ("c", "a")]) + result = idx == ("c", "a") + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + +class TestIndexUtils: + @pytest.mark.parametrize( + "data, names, expected", + [ + ([[1, 2, 3]], None, Index([1, 2, 3])), + ([[1, 2, 3]], ["name"], Index([1, 2, 3], name="name")), + ( + [["a", "a"], ["c", "d"]], + None, + MultiIndex([["a"], ["c", "d"]], [[0, 0], [0, 1]]), + ), + ( + [["a", "a"], ["c", "d"]], + ["L1", "L2"], + MultiIndex([["a"], ["c", "d"]], [[0, 0], [0, 1]], names=["L1", "L2"]), + ), + ], + ) + def test_ensure_index_from_sequences(self, data, names, expected): + result = ensure_index_from_sequences(data, names) + tm.assert_index_equal(result, expected) + + def test_ensure_index_mixed_closed_intervals(self): + # GH27172 + intervals = [ + pd.Interval(0, 1, closed="left"), + pd.Interval(1, 2, closed="right"), + pd.Interval(2, 3, closed="neither"), + pd.Interval(3, 4, closed="both"), + ] + result = ensure_index(intervals) + expected = Index(intervals, dtype=object) + tm.assert_index_equal(result, expected) + + def test_ensure_index_uint64(self): + # with both 0 and a large-uint64, np.array will infer to float64 + # https://github.com/numpy/numpy/issues/19146 + # but a more accurate choice would be uint64 + values = [0, np.iinfo(np.uint64).max] + + result = ensure_index(values) + assert list(result) == values + + expected = Index(values, dtype="uint64") + tm.assert_index_equal(result, expected) + + def test_get_combined_index(self): + result = _get_combined_index([]) + expected = Index([]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "opname", + [ + "eq", + "ne", + "le", + "lt", + "ge", + "gt", + "add", + "radd", + "sub", + "rsub", + "mul", + "rmul", + "truediv", + "rtruediv", + "floordiv", + "rfloordiv", + "pow", + "rpow", + "mod", + "divmod", + ], +) +def test_generated_op_names(opname, index): + opname = f"__{opname}__" + method = getattr(index, opname) + assert method.__name__ == opname + + +@pytest.mark.parametrize("index_maker", tm.index_subclass_makers_generator()) +def test_index_subclass_constructor_wrong_kwargs(index_maker): + # GH #19348 + with pytest.raises(TypeError, match="unexpected keyword argument"): + index_maker(foo="bar") + + +@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") +def test_deprecated_fastpath(): + msg = "[Uu]nexpected keyword argument" + with pytest.raises(TypeError, match=msg): + Index(np.array(["a", "b"], dtype=object), name="test", fastpath=True) + + with pytest.raises(TypeError, match=msg): + Int64Index(np.array([1, 2, 3], dtype="int64"), name="test", fastpath=True) + + with pytest.raises(TypeError, match=msg): + RangeIndex(0, 5, 2, name="test", fastpath=True) + + with pytest.raises(TypeError, match=msg): + CategoricalIndex(["a", "b", "c"], name="test", fastpath=True) + + +def test_shape_of_invalid_index(): + # Currently, it is possible to create "invalid" index objects backed by + # a multi-dimensional array (see https://github.com/pandas-dev/pandas/issues/27125 + # about this). However, as long as this is not solved in general,this test ensures + # that the returned shape is consistent with this underlying array for + # compat with matplotlib (see https://github.com/pandas-dev/pandas/issues/27775) + idx = Index([0, 1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + # GH#30588 multi-dimensional indexing deprecated + assert idx[:, None].shape == (4, 1) + + +def test_validate_1d_input(): + # GH#27125 check that we do not have >1-dimensional input + msg = "Index data must be 1-dimensional" + + arr = np.arange(8).reshape(2, 2, 2) + with pytest.raises(ValueError, match=msg): + Index(arr) + + with pytest.raises(ValueError, match=msg): + Float64Index(arr.astype(np.float64)) + + with pytest.raises(ValueError, match=msg): + Int64Index(arr.astype(np.int64)) + + with pytest.raises(ValueError, match=msg): + UInt64Index(arr.astype(np.uint64)) + + df = DataFrame(arr.reshape(4, 2)) + with pytest.raises(ValueError, match=msg): + Index(df) + + # GH#13601 trying to assign a multi-dimensional array to an index is not + # allowed + ser = Series(0, range(4)) + with pytest.raises(ValueError, match=msg): + ser.index = np.array([[2, 3]] * 4) + + +@pytest.mark.parametrize( + "klass, extra_kwargs", + [ + [Index, {}], + [Int64Index, {}], + [Float64Index, {}], + [DatetimeIndex, {}], + [TimedeltaIndex, {}], + [NumericIndex, {}], + [PeriodIndex, {"freq": "Y"}], + ], +) +def test_construct_from_memoryview(klass, extra_kwargs): + # GH 13120 + result = klass(memoryview(np.arange(2000, 2005)), **extra_kwargs) + expected = klass(list(range(2000, 2005)), **extra_kwargs) + tm.assert_index_equal(result, expected, exact=True) + + +def test_index_set_names_pos_args_deprecation(): + # GH#41485 + idx = Index([1, 2, 3, 4]) + msg = ( + "In a future version of pandas all arguments of Index.set_names " + "except for the argument 'names' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = idx.set_names("quarter", None) + expected = Index([1, 2, 3, 4], name="quarter") + tm.assert_index_equal(result, expected) + + +def test_drop_duplicates_pos_args_deprecation(): + # GH#41485 + idx = Index([1, 2, 3, 1]) + msg = ( + "In a future version of pandas all arguments of " + "Index.drop_duplicates will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + idx.drop_duplicates("last") + result = idx.drop_duplicates("last") + expected = Index([2, 3, 1]) + tm.assert_index_equal(expected, result) + + +def test_get_attributes_dict_deprecated(): + # https://github.com/pandas-dev/pandas/pull/44028 + idx = Index([1, 2, 3, 1]) + with tm.assert_produces_warning(DeprecationWarning): + attrs = idx._get_attributes_dict() + assert attrs == {"name": None} diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_common.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_common.py new file mode 100644 index 0000000..5fb9673 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_common.py @@ -0,0 +1,489 @@ +""" +Collection of tests asserting things that should be true for +any index subclass except for MultiIndex. Makes use of the `index_flat` +fixture defined in pandas/conftest.py. +""" +import re + +import numpy as np +import pytest + +from pandas.compat import IS64 + +from pandas.core.dtypes.common import is_integer_dtype + +import pandas as pd +from pandas import ( + CategoricalIndex, + DatetimeIndex, + MultiIndex, + PeriodIndex, + RangeIndex, + TimedeltaIndex, +) +import pandas._testing as tm +from pandas.core.api import NumericIndex + + +class TestCommon: + @pytest.mark.parametrize("name", [None, "new_name"]) + def test_to_frame(self, name, index_flat): + # see GH#15230, GH#22580 + idx = index_flat + + if name: + idx_name = name + else: + idx_name = idx.name or 0 + + df = idx.to_frame(name=idx_name) + + assert df.index is idx + assert len(df.columns) == 1 + assert df.columns[0] == idx_name + assert df[idx_name].values is not idx.values + + df = idx.to_frame(index=False, name=idx_name) + assert df.index is not idx + + def test_droplevel(self, index_flat): + # GH 21115 + # MultiIndex is tested separately in test_multi.py + index = index_flat + + assert index.droplevel([]).equals(index) + + for level in [index.name, [index.name]]: + if isinstance(index.name, tuple) and level is index.name: + # GH 21121 : droplevel with tuple name + continue + msg = ( + "Cannot remove 1 levels from an index with 1 levels: at least one " + "level must be left." + ) + with pytest.raises(ValueError, match=msg): + index.droplevel(level) + + for level in "wrong", ["wrong"]: + with pytest.raises( + KeyError, + match=r"'Requested level \(wrong\) does not match index name \(None\)'", + ): + index.droplevel(level) + + def test_constructor_non_hashable_name(self, index_flat): + # GH 20527 + index = index_flat + + message = "Index.name must be a hashable type" + renamed = [["1"]] + + # With .rename() + with pytest.raises(TypeError, match=message): + index.rename(name=renamed) + + # With .set_names() + with pytest.raises(TypeError, match=message): + index.set_names(names=renamed) + + def test_constructor_unwraps_index(self, index_flat): + a = index_flat + b = type(a)(a) + tm.assert_equal(a._data, b._data) + + def test_to_flat_index(self, index_flat): + # 22866 + index = index_flat + + result = index.to_flat_index() + tm.assert_index_equal(result, index) + + def test_set_name_methods(self, index_flat): + # MultiIndex tested separately + index = index_flat + new_name = "This is the new name for this index" + + original_name = index.name + new_ind = index.set_names([new_name]) + assert new_ind.name == new_name + assert index.name == original_name + res = index.rename(new_name, inplace=True) + + # should return None + assert res is None + assert index.name == new_name + assert index.names == [new_name] + # FIXME: dont leave commented-out + # with pytest.raises(TypeError, match="list-like"): + # # should still fail even if it would be the right length + # ind.set_names("a") + with pytest.raises(ValueError, match="Level must be None"): + index.set_names("a", level=0) + + # rename in place just leaves tuples and other containers alone + name = ("A", "B") + index.rename(name, inplace=True) + assert index.name == name + assert index.names == [name] + + def test_copy_and_deepcopy(self, index_flat): + from copy import ( + copy, + deepcopy, + ) + + index = index_flat + + for func in (copy, deepcopy): + idx_copy = func(index) + assert idx_copy is not index + assert idx_copy.equals(index) + + new_copy = index.copy(deep=True, name="banana") + assert new_copy.name == "banana" + + def test_copy_name(self, index_flat): + # GH#12309: Check that the "name" argument + # passed at initialization is honored. + index = index_flat + + first = type(index)(index, copy=True, name="mario") + second = type(first)(first, copy=False) + + # Even though "copy=False", we want a new object. + assert first is not second + tm.assert_index_equal(first, second) + + # Not using tm.assert_index_equal() since names differ. + assert index.equals(first) + + assert first.name == "mario" + assert second.name == "mario" + + # TODO: belongs in series arithmetic tests? + s1 = pd.Series(2, index=first) + s2 = pd.Series(3, index=second[:-1]) + # See GH#13365 + s3 = s1 * s2 + assert s3.index.name == "mario" + + def test_copy_name2(self, index_flat): + # GH#35592 + index = index_flat + + assert index.copy(name="mario").name == "mario" + + with pytest.raises(ValueError, match="Length of new names must be 1, got 2"): + index.copy(name=["mario", "luigi"]) + + msg = f"{type(index).__name__}.name must be a hashable type" + with pytest.raises(TypeError, match=msg): + index.copy(name=[["mario"]]) + + def test_unique_level(self, index_flat): + # don't test a MultiIndex here (as its tested separated) + index = index_flat + + # GH 17896 + expected = index.drop_duplicates() + for level in [0, index.name, None]: + result = index.unique(level=level) + tm.assert_index_equal(result, expected) + + msg = "Too many levels: Index has only 1 level, not 4" + with pytest.raises(IndexError, match=msg): + index.unique(level=3) + + msg = ( + fr"Requested level \(wrong\) does not match index name " + fr"\({re.escape(index.name.__repr__())}\)" + ) + with pytest.raises(KeyError, match=msg): + index.unique(level="wrong") + + def test_unique(self, index_flat): + # MultiIndex tested separately + index = index_flat + if not len(index): + pytest.skip("Skip check for empty Index and MultiIndex") + + idx = index[[0] * 5] + idx_unique = index[[0]] + + # We test against `idx_unique`, so first we make sure it's unique + # and doesn't contain nans. + assert idx_unique.is_unique is True + try: + assert idx_unique.hasnans is False + except NotImplementedError: + pass + + result = idx.unique() + tm.assert_index_equal(result, idx_unique) + + # nans: + if not index._can_hold_na: + pytest.skip("Skip na-check if index cannot hold na") + + vals = index._values[[0] * 5] + vals[0] = np.nan + + vals_unique = vals[:2] + idx_nan = index._shallow_copy(vals) + idx_unique_nan = index._shallow_copy(vals_unique) + assert idx_unique_nan.is_unique is True + + assert idx_nan.dtype == index.dtype + assert idx_unique_nan.dtype == index.dtype + + expected = idx_unique_nan + for i in [idx_nan, idx_unique_nan]: + result = i.unique() + tm.assert_index_equal(result, expected) + + def test_searchsorted_monotonic(self, index_flat, request): + # GH17271 + index = index_flat + # not implemented for tuple searches in MultiIndex + # or Intervals searches in IntervalIndex + if isinstance(index, pd.IntervalIndex): + mark = pytest.mark.xfail( + reason="IntervalIndex.searchsorted does not support Interval arg", + raises=NotImplementedError, + ) + request.node.add_marker(mark) + + # nothing to test if the index is empty + if index.empty: + pytest.skip("Skip check for empty Index") + value = index[0] + + # determine the expected results (handle dupes for 'right') + expected_left, expected_right = 0, (index == value).argmin() + if expected_right == 0: + # all values are the same, expected_right should be length + expected_right = len(index) + + # test _searchsorted_monotonic in all cases + # test searchsorted only for increasing + if index.is_monotonic_increasing: + ssm_left = index._searchsorted_monotonic(value, side="left") + assert expected_left == ssm_left + + ssm_right = index._searchsorted_monotonic(value, side="right") + assert expected_right == ssm_right + + ss_left = index.searchsorted(value, side="left") + assert expected_left == ss_left + + ss_right = index.searchsorted(value, side="right") + assert expected_right == ss_right + + elif index.is_monotonic_decreasing: + ssm_left = index._searchsorted_monotonic(value, side="left") + assert expected_left == ssm_left + + ssm_right = index._searchsorted_monotonic(value, side="right") + assert expected_right == ssm_right + else: + # non-monotonic should raise. + msg = "index must be monotonic increasing or decreasing" + with pytest.raises(ValueError, match=msg): + index._searchsorted_monotonic(value, side="left") + + def test_drop_duplicates(self, index_flat, keep): + # MultiIndex is tested separately + index = index_flat + if isinstance(index, RangeIndex): + pytest.skip( + "RangeIndex is tested in test_drop_duplicates_no_duplicates " + "as it cannot hold duplicates" + ) + if len(index) == 0: + pytest.skip( + "empty index is tested in test_drop_duplicates_no_duplicates " + "as it cannot hold duplicates" + ) + + # make unique index + holder = type(index) + unique_values = list(set(index)) + dtype = index.dtype if isinstance(index, NumericIndex) else None + unique_idx = holder(unique_values, dtype=dtype) + + # make duplicated index + n = len(unique_idx) + duplicated_selection = np.random.choice(n, int(n * 1.5)) + idx = holder(unique_idx.values[duplicated_selection]) + + # Series.duplicated is tested separately + expected_duplicated = ( + pd.Series(duplicated_selection).duplicated(keep=keep).values + ) + tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected_duplicated) + + # Series.drop_duplicates is tested separately + expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep)) + tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped) + + def test_drop_duplicates_no_duplicates(self, index_flat): + # MultiIndex is tested separately + index = index_flat + + # make unique index + if isinstance(index, RangeIndex): + # RangeIndex cannot have duplicates + unique_idx = index + else: + holder = type(index) + unique_values = list(set(index)) + dtype = index.dtype if isinstance(index, NumericIndex) else None + unique_idx = holder(unique_values, dtype=dtype) + + # check on unique index + expected_duplicated = np.array([False] * len(unique_idx), dtype="bool") + tm.assert_numpy_array_equal(unique_idx.duplicated(), expected_duplicated) + result_dropped = unique_idx.drop_duplicates() + tm.assert_index_equal(result_dropped, unique_idx) + # validate shallow copy + assert result_dropped is not unique_idx + + def test_drop_duplicates_inplace(self, index): + msg = r"drop_duplicates\(\) got an unexpected keyword argument" + with pytest.raises(TypeError, match=msg): + index.drop_duplicates(inplace=True) + + def test_has_duplicates(self, index_flat): + # MultiIndex tested separately in: + # tests/indexes/multi/test_unique_and_duplicates. + index = index_flat + holder = type(index) + if not len(index) or isinstance(index, RangeIndex): + # MultiIndex tested separately in: + # tests/indexes/multi/test_unique_and_duplicates. + # RangeIndex is unique by definition. + pytest.skip("Skip check for empty Index, MultiIndex, and RangeIndex") + + idx = holder([index[0]] * 5) + assert idx.is_unique is False + assert idx.has_duplicates is True + + @pytest.mark.parametrize( + "dtype", + ["int64", "uint64", "float64", "category", "datetime64[ns]", "timedelta64[ns]"], + ) + def test_astype_preserves_name(self, index, dtype): + # https://github.com/pandas-dev/pandas/issues/32013 + if isinstance(index, MultiIndex): + index.names = ["idx" + str(i) for i in range(index.nlevels)] + else: + index.name = "idx" + + warn = None + if ( + isinstance(index, DatetimeIndex) + and index.tz is not None + and dtype == "datetime64[ns]" + ): + # This astype is deprecated in favor of tz_localize + warn = FutureWarning + try: + # Some of these conversions cannot succeed so we use a try / except + with tm.assert_produces_warning(warn): + result = index.astype(dtype) + except (ValueError, TypeError, NotImplementedError, SystemError): + return + + if isinstance(index, MultiIndex): + assert result.names == index.names + else: + assert result.name == index.name + + def test_asi8_deprecation(self, index): + # GH#37877 + if isinstance(index, (DatetimeIndex, TimedeltaIndex, PeriodIndex)): + warn = None + else: + warn = FutureWarning + + with tm.assert_produces_warning(warn): + index.asi8 + + def test_hasnans_isnans(self, index_flat): + # GH#11343, added tests for hasnans / isnans + index = index_flat + + # cases in indices doesn't include NaN + idx = index.copy(deep=True) + expected = np.array([False] * len(idx), dtype=bool) + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans is False + + idx = index.copy(deep=True) + values = idx._values + + if len(index) == 0: + return + elif isinstance(index, NumericIndex) and is_integer_dtype(index.dtype): + return + + values[1] = np.nan + + idx = type(index)(values) + + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans is True + + +@pytest.mark.parametrize("na_position", [None, "middle"]) +def test_sort_values_invalid_na_position(index_with_missing, na_position): + + with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): + index_with_missing.sort_values(na_position=na_position) + + +@pytest.mark.parametrize("na_position", ["first", "last"]) +def test_sort_values_with_missing(index_with_missing, na_position): + # GH 35584. Test that sort_values works with missing values, + # sort non-missing and place missing according to na_position + + if isinstance(index_with_missing, CategoricalIndex): + pytest.skip("missing value sorting order not well-defined") + + missing_count = np.sum(index_with_missing.isna()) + not_na_vals = index_with_missing[index_with_missing.notna()].values + sorted_values = np.sort(not_na_vals) + if na_position == "first": + sorted_values = np.concatenate([[None] * missing_count, sorted_values]) + else: + sorted_values = np.concatenate([sorted_values, [None] * missing_count]) + + # Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray + expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype) + + result = index_with_missing.sort_values(na_position=na_position) + tm.assert_index_equal(result, expected) + + +def test_ndarray_compat_properties(index): + if isinstance(index, PeriodIndex) and not IS64: + pytest.skip("Overflow") + idx = index + assert idx.T.equals(idx) + assert idx.transpose().equals(idx) + + values = idx.values + + assert idx.shape == values.shape + assert idx.ndim == values.ndim + assert idx.size == values.size + + if not isinstance(index, (RangeIndex, MultiIndex)): + # These two are not backed by an ndarray + assert idx.nbytes == values.nbytes + + # test for validity + idx.nbytes + idx.values.nbytes diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_engines.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_engines.py new file mode 100644 index 0000000..02d8c5b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_engines.py @@ -0,0 +1,193 @@ +import re + +import numpy as np +import pytest + +from pandas._libs import index as libindex + +import pandas as pd + + +@pytest.fixture( + params=[ + (libindex.Int64Engine, np.int64), + (libindex.Int32Engine, np.int32), + (libindex.Int16Engine, np.int16), + (libindex.Int8Engine, np.int8), + (libindex.UInt64Engine, np.uint64), + (libindex.UInt32Engine, np.uint32), + (libindex.UInt16Engine, np.uint16), + (libindex.UInt8Engine, np.uint8), + (libindex.Float64Engine, np.float64), + (libindex.Float32Engine, np.float32), + ], + ids=lambda x: x[0].__name__, +) +def numeric_indexing_engine_type_and_dtype(request): + return request.param + + +class TestDatetimeEngine: + @pytest.mark.parametrize( + "scalar", + [ + pd.Timedelta(pd.Timestamp("2016-01-01").asm8.view("m8[ns]")), + pd.Timestamp("2016-01-01").value, + pd.Timestamp("2016-01-01").to_pydatetime(), + pd.Timestamp("2016-01-01").to_datetime64(), + ], + ) + def test_not_contains_requires_timestamp(self, scalar): + dti1 = pd.date_range("2016-01-01", periods=3) + dti2 = dti1.insert(1, pd.NaT) # non-monotonic + dti3 = dti1.insert(3, dti1[0]) # non-unique + dti4 = pd.date_range("2016-01-01", freq="ns", periods=2_000_000) + dti5 = dti4.insert(0, dti4[0]) # over size threshold, not unique + + msg = "|".join([re.escape(str(scalar)), re.escape(repr(scalar))]) + for dti in [dti1, dti2, dti3, dti4, dti5]: + with pytest.raises(TypeError, match=msg): + scalar in dti._engine + + with pytest.raises(KeyError, match=msg): + dti._engine.get_loc(scalar) + + +class TestTimedeltaEngine: + @pytest.mark.parametrize( + "scalar", + [ + pd.Timestamp(pd.Timedelta(days=42).asm8.view("datetime64[ns]")), + pd.Timedelta(days=42).value, + pd.Timedelta(days=42).to_pytimedelta(), + pd.Timedelta(days=42).to_timedelta64(), + ], + ) + def test_not_contains_requires_timedelta(self, scalar): + tdi1 = pd.timedelta_range("42 days", freq="9h", periods=1234) + tdi2 = tdi1.insert(1, pd.NaT) # non-monotonic + tdi3 = tdi1.insert(3, tdi1[0]) # non-unique + tdi4 = pd.timedelta_range("42 days", freq="ns", periods=2_000_000) + tdi5 = tdi4.insert(0, tdi4[0]) # over size threshold, not unique + + msg = "|".join([re.escape(str(scalar)), re.escape(repr(scalar))]) + for tdi in [tdi1, tdi2, tdi3, tdi4, tdi5]: + with pytest.raises(TypeError, match=msg): + scalar in tdi._engine + + with pytest.raises(KeyError, match=msg): + tdi._engine.get_loc(scalar) + + +class TestNumericEngine: + def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype): + engine_type, dtype = numeric_indexing_engine_type_and_dtype + num = 1000 + arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype) + + # monotonic increasing + engine = engine_type(arr) + assert engine.is_monotonic_increasing is True + assert engine.is_monotonic_decreasing is False + + # monotonic decreasing + engine = engine_type(arr[::-1]) + assert engine.is_monotonic_increasing is False + assert engine.is_monotonic_decreasing is True + + # neither monotonic increasing or decreasing + arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype) + engine = engine_type(arr[::-1]) + assert engine.is_monotonic_increasing is False + assert engine.is_monotonic_decreasing is False + + def test_is_unique(self, numeric_indexing_engine_type_and_dtype): + engine_type, dtype = numeric_indexing_engine_type_and_dtype + + # unique + arr = np.array([1, 3, 2], dtype=dtype) + engine = engine_type(arr) + assert engine.is_unique is True + + # not unique + arr = np.array([1, 2, 1], dtype=dtype) + engine = engine_type(arr) + assert engine.is_unique is False + + def test_get_loc(self, numeric_indexing_engine_type_and_dtype): + engine_type, dtype = numeric_indexing_engine_type_and_dtype + + # unique + arr = np.array([1, 2, 3], dtype=dtype) + engine = engine_type(arr) + assert engine.get_loc(2) == 1 + + # monotonic + num = 1000 + arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype) + engine = engine_type(arr) + assert engine.get_loc(2) == slice(1000, 2000) + + # not monotonic + arr = np.array([1, 2, 3] * num, dtype=dtype) + engine = engine_type(arr) + expected = np.array([False, True, False] * num, dtype=bool) + result = engine.get_loc(2) + assert (result == expected).all() + + +class TestObjectEngine: + engine_type = libindex.ObjectEngine + dtype = np.object_ + values = list("abc") + + def test_is_monotonic(self): + + num = 1000 + arr = np.array(["a"] * num + ["a"] * num + ["c"] * num, dtype=self.dtype) + + # monotonic increasing + engine = self.engine_type(arr) + assert engine.is_monotonic_increasing is True + assert engine.is_monotonic_decreasing is False + + # monotonic decreasing + engine = self.engine_type(arr[::-1]) + assert engine.is_monotonic_increasing is False + assert engine.is_monotonic_decreasing is True + + # neither monotonic increasing or decreasing + arr = np.array(["a"] * num + ["b"] * num + ["a"] * num, dtype=self.dtype) + engine = self.engine_type(arr[::-1]) + assert engine.is_monotonic_increasing is False + assert engine.is_monotonic_decreasing is False + + def test_is_unique(self): + # unique + arr = np.array(self.values, dtype=self.dtype) + engine = self.engine_type(arr) + assert engine.is_unique is True + + # not unique + arr = np.array(["a", "b", "a"], dtype=self.dtype) + engine = self.engine_type(arr) + assert engine.is_unique is False + + def test_get_loc(self): + # unique + arr = np.array(self.values, dtype=self.dtype) + engine = self.engine_type(arr) + assert engine.get_loc("b") == 1 + + # monotonic + num = 1000 + arr = np.array(["a"] * num + ["b"] * num + ["c"] * num, dtype=self.dtype) + engine = self.engine_type(arr) + assert engine.get_loc("b") == slice(1000, 2000) + + # not monotonic + arr = np.array(self.values * num, dtype=self.dtype) + engine = self.engine_type(arr) + expected = np.array([False, True, False] * num, dtype=bool) + result = engine.get_loc("b") + assert (result == expected).all() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_frozen.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_frozen.py new file mode 100644 index 0000000..cde3fc0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_frozen.py @@ -0,0 +1,105 @@ +import re + +import pytest + +from pandas.core.indexes.frozen import FrozenList + + +class TestFrozenList: + + unicode_container = FrozenList(["\u05d0", "\u05d1", "c"]) + + def setup_method(self, _): + self.lst = [1, 2, 3, 4, 5] + self.container = FrozenList(self.lst) + + def check_mutable_error(self, *args, **kwargs): + # Pass whatever function you normally would to pytest.raises + # (after the Exception kind). + mutable_regex = re.compile("does not support mutable operations") + msg = "'(_s)?re.(SRE_)?Pattern' object is not callable" + with pytest.raises(TypeError, match=msg): + mutable_regex(*args, **kwargs) + + def test_no_mutable_funcs(self): + def setitem(): + self.container[0] = 5 + + self.check_mutable_error(setitem) + + def setslice(): + self.container[1:2] = 3 + + self.check_mutable_error(setslice) + + def delitem(): + del self.container[0] + + self.check_mutable_error(delitem) + + def delslice(): + del self.container[0:3] + + self.check_mutable_error(delslice) + + mutable_methods = ("extend", "pop", "remove", "insert") + + for meth in mutable_methods: + self.check_mutable_error(getattr(self.container, meth)) + + def test_slicing_maintains_type(self): + result = self.container[1:2] + expected = self.lst[1:2] + self.check_result(result, expected) + + def check_result(self, result, expected): + assert isinstance(result, FrozenList) + assert result == expected + + def test_string_methods_dont_fail(self): + repr(self.container) + str(self.container) + bytes(self.container) + + def test_tricky_container(self): + repr(self.unicode_container) + str(self.unicode_container) + + def test_add(self): + result = self.container + (1, 2, 3) + expected = FrozenList(self.lst + [1, 2, 3]) + self.check_result(result, expected) + + result = (1, 2, 3) + self.container + expected = FrozenList([1, 2, 3] + self.lst) + self.check_result(result, expected) + + def test_iadd(self): + q = r = self.container + + q += [5] + self.check_result(q, self.lst + [5]) + + # Other shouldn't be mutated. + self.check_result(r, self.lst) + + def test_union(self): + result = self.container.union((1, 2, 3)) + expected = FrozenList(self.lst + [1, 2, 3]) + self.check_result(result, expected) + + def test_difference(self): + result = self.container.difference([2]) + expected = FrozenList([1, 3, 4, 5]) + self.check_result(result, expected) + + def test_difference_dupe(self): + result = FrozenList([1, 2, 3, 2]).difference([2]) + expected = FrozenList([1, 3]) + self.check_result(result, expected) + + def test_tricky_container_to_bytes_raises(self): + # GH 26447 + msg = "^'str' object cannot be interpreted as an integer$" + with pytest.raises(TypeError, match=msg): + bytes(self.unicode_container) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_index_new.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_index_new.py new file mode 100644 index 0000000..5f57e03 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_index_new.py @@ -0,0 +1,374 @@ +""" +Tests for the Index constructor conducting inference. +""" +from datetime import ( + datetime, + timedelta, +) +from decimal import Decimal + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_unsigned_integer_dtype + +from pandas import ( + NA, + Categorical, + CategoricalIndex, + DatetimeIndex, + Index, + IntervalIndex, + MultiIndex, + NaT, + PeriodIndex, + Series, + TimedeltaIndex, + Timestamp, + array, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) + + +class TestIndexConstructorInference: + @pytest.mark.parametrize("na_value", [None, np.nan]) + @pytest.mark.parametrize("vtype", [list, tuple, iter]) + def test_construction_list_tuples_nan(self, na_value, vtype): + # GH#18505 : valid tuples containing NaN + values = [(1, "two"), (3.0, na_value)] + result = Index(vtype(values)) + expected = MultiIndex.from_tuples(values) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [int, "int64", "int32", "int16", "int8", "uint64", "uint32", "uint16", "uint8"], + ) + def test_constructor_int_dtype_float(self, dtype): + # GH#18400 + if is_unsigned_integer_dtype(dtype): + index_type = UInt64Index + else: + index_type = Int64Index + + expected = index_type([0, 1, 2, 3]) + result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("cast_index", [True, False]) + @pytest.mark.parametrize( + "vals", [[True, False, True], np.array([True, False, True], dtype=bool)] + ) + def test_constructor_dtypes_to_object(self, cast_index, vals): + if cast_index: + index = Index(vals, dtype=bool) + else: + index = Index(vals) + + assert type(index) is Index + assert index.dtype == object + + def test_constructor_categorical_to_object(self): + # GH#32167 Categorical data and dtype=object should return object-dtype + ci = CategoricalIndex(range(5)) + result = Index(ci, dtype=object) + assert not isinstance(result, CategoricalIndex) + + def test_constructor_infer_periodindex(self): + xp = period_range("2012-1-1", freq="M", periods=3) + rs = Index(xp) + tm.assert_index_equal(rs, xp) + assert isinstance(rs, PeriodIndex) + + def test_from_list_of_periods(self): + rng = period_range("1/1/2000", periods=20, freq="D") + periods = list(rng) + + result = Index(periods) + assert isinstance(result, PeriodIndex) + + @pytest.mark.parametrize("pos", [0, 1]) + @pytest.mark.parametrize( + "klass,dtype,ctor", + [ + (DatetimeIndex, "datetime64[ns]", np.datetime64("nat")), + (TimedeltaIndex, "timedelta64[ns]", np.timedelta64("nat")), + ], + ) + def test_constructor_infer_nat_dt_like( + self, pos, klass, dtype, ctor, nulls_fixture, request + ): + if isinstance(nulls_fixture, Decimal): + # We dont cast these to datetime64/timedelta64 + return + + expected = klass([NaT, NaT]) + assert expected.dtype == dtype + data = [ctor] + data.insert(pos, nulls_fixture) + + warn = None + if nulls_fixture is NA: + expected = Index([NA, NaT]) + mark = pytest.mark.xfail(reason="Broken with np.NaT ctor; see GH 31884") + request.node.add_marker(mark) + # GH#35942 numpy will emit a DeprecationWarning within the + # assert_index_equal calls. Since we can't do anything + # about it until GH#31884 is fixed, we suppress that warning. + warn = DeprecationWarning + + result = Index(data) + + with tm.assert_produces_warning(warn): + tm.assert_index_equal(result, expected) + + result = Index(np.array(data, dtype=object)) + + with tm.assert_produces_warning(warn): + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("swap_objs", [True, False]) + def test_constructor_mixed_nat_objs_infers_object(self, swap_objs): + # mixed np.datetime64/timedelta64 nat results in object + data = [np.datetime64("nat"), np.timedelta64("nat")] + if swap_objs: + data = data[::-1] + + expected = Index(data, dtype=object) + tm.assert_index_equal(Index(data), expected) + tm.assert_index_equal(Index(np.array(data, dtype=object)), expected) + + @pytest.mark.parametrize("swap_objs", [True, False]) + def test_constructor_datetime_and_datetime64(self, swap_objs): + data = [Timestamp(2021, 6, 8, 9, 42), np.datetime64("now")] + if swap_objs: + data = data[::-1] + expected = DatetimeIndex(data) + + tm.assert_index_equal(Index(data), expected) + tm.assert_index_equal(Index(np.array(data, dtype=object)), expected) + + +class TestDtypeEnforced: + # check we don't silently ignore the dtype keyword + + def test_constructor_object_dtype_with_ea_data(self, any_numeric_ea_dtype): + # GH#45206 + arr = array([0], dtype=any_numeric_ea_dtype) + + idx = Index(arr, dtype=object) + assert idx.dtype == object + + @pytest.mark.parametrize("dtype", [object, "float64", "uint64", "category"]) + def test_constructor_range_values_mismatched_dtype(self, dtype): + rng = Index(range(5)) + + result = Index(rng, dtype=dtype) + assert result.dtype == dtype + + result = Index(range(5), dtype=dtype) + assert result.dtype == dtype + + @pytest.mark.parametrize("dtype", [object, "float64", "uint64", "category"]) + def test_constructor_categorical_values_mismatched_non_ea_dtype(self, dtype): + cat = Categorical([1, 2, 3]) + + result = Index(cat, dtype=dtype) + assert result.dtype == dtype + + def test_constructor_categorical_values_mismatched_dtype(self): + dti = date_range("2016-01-01", periods=3) + cat = Categorical(dti) + result = Index(cat, dti.dtype) + tm.assert_index_equal(result, dti) + + dti2 = dti.tz_localize("Asia/Tokyo") + cat2 = Categorical(dti2) + result = Index(cat2, dti2.dtype) + tm.assert_index_equal(result, dti2) + + ii = IntervalIndex.from_breaks(range(5)) + cat3 = Categorical(ii) + result = Index(cat3, dtype=ii.dtype) + tm.assert_index_equal(result, ii) + + def test_constructor_ea_values_mismatched_categorical_dtype(self): + dti = date_range("2016-01-01", periods=3) + result = Index(dti, dtype="category") + expected = CategoricalIndex(dti) + tm.assert_index_equal(result, expected) + + dti2 = date_range("2016-01-01", periods=3, tz="US/Pacific") + result = Index(dti2, dtype="category") + expected = CategoricalIndex(dti2) + tm.assert_index_equal(result, expected) + + def test_constructor_period_values_mismatched_dtype(self): + pi = period_range("2016-01-01", periods=3, freq="D") + result = Index(pi, dtype="category") + expected = CategoricalIndex(pi) + tm.assert_index_equal(result, expected) + + def test_constructor_timedelta64_values_mismatched_dtype(self): + # check we don't silently ignore the dtype keyword + tdi = timedelta_range("4 Days", periods=5) + result = Index(tdi, dtype="category") + expected = CategoricalIndex(tdi) + tm.assert_index_equal(result, expected) + + def test_constructor_interval_values_mismatched_dtype(self): + dti = date_range("2016-01-01", periods=3) + ii = IntervalIndex.from_breaks(dti) + result = Index(ii, dtype="category") + expected = CategoricalIndex(ii) + tm.assert_index_equal(result, expected) + + def test_constructor_datetime64_values_mismatched_period_dtype(self): + dti = date_range("2016-01-01", periods=3) + result = Index(dti, dtype="Period[D]") + expected = dti.to_period("D") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["int64", "uint64"]) + def test_constructor_int_dtype_nan_raises(self, dtype): + # see GH#15187 + data = [np.nan] + msg = "cannot convert" + with pytest.raises(ValueError, match=msg): + Index(data, dtype=dtype) + + @pytest.mark.parametrize( + "vals", + [ + [1, 2, 3], + np.array([1, 2, 3]), + np.array([1, 2, 3], dtype=int), + # below should coerce + [1.0, 2.0, 3.0], + np.array([1.0, 2.0, 3.0], dtype=float), + ], + ) + def test_constructor_dtypes_to_int64(self, vals): + index = Index(vals, dtype=int) + assert isinstance(index, Int64Index) + + @pytest.mark.parametrize( + "vals", + [ + [1, 2, 3], + [1.0, 2.0, 3.0], + np.array([1.0, 2.0, 3.0]), + np.array([1, 2, 3], dtype=int), + np.array([1.0, 2.0, 3.0], dtype=float), + ], + ) + def test_constructor_dtypes_to_float64(self, vals): + index = Index(vals, dtype=float) + assert isinstance(index, Float64Index) + + @pytest.mark.parametrize( + "vals", + [ + [1, 2, 3], + np.array([1, 2, 3], dtype=int), + np.array(["2011-01-01", "2011-01-02"], dtype="datetime64[ns]"), + [datetime(2011, 1, 1), datetime(2011, 1, 2)], + ], + ) + def test_constructor_dtypes_to_categorical(self, vals): + index = Index(vals, dtype="category") + assert isinstance(index, CategoricalIndex) + + @pytest.mark.parametrize("cast_index", [True, False]) + @pytest.mark.parametrize( + "vals", + [ + Index(np.array([np.datetime64("2011-01-01"), np.datetime64("2011-01-02")])), + Index([datetime(2011, 1, 1), datetime(2011, 1, 2)]), + ], + ) + def test_constructor_dtypes_to_datetime(self, cast_index, vals): + if cast_index: + index = Index(vals, dtype=object) + assert isinstance(index, Index) + assert index.dtype == object + else: + index = Index(vals) + assert isinstance(index, DatetimeIndex) + + @pytest.mark.parametrize("cast_index", [True, False]) + @pytest.mark.parametrize( + "vals", + [ + np.array([np.timedelta64(1, "D"), np.timedelta64(1, "D")]), + [timedelta(1), timedelta(1)], + ], + ) + def test_constructor_dtypes_to_timedelta(self, cast_index, vals): + if cast_index: + index = Index(vals, dtype=object) + assert isinstance(index, Index) + assert index.dtype == object + else: + index = Index(vals) + assert isinstance(index, TimedeltaIndex) + + +class TestIndexConstructorUnwrapping: + # Test passing different arraylike values to pd.Index + + @pytest.mark.parametrize("klass", [Index, DatetimeIndex]) + def test_constructor_from_series_dt64(self, klass): + stamps = [Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")] + expected = DatetimeIndex(stamps) + ser = Series(stamps) + result = klass(ser) + tm.assert_index_equal(result, expected) + + def test_constructor_no_pandas_array(self): + ser = Series([1, 2, 3]) + result = Index(ser.array) + expected = Index([1, 2, 3]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "array", + [ + np.arange(5), + np.array(["a", "b", "c"]), + date_range("2000-01-01", periods=3).values, + ], + ) + def test_constructor_ndarray_like(self, array): + # GH#5460#issuecomment-44474502 + # it should be possible to convert any object that satisfies the numpy + # ndarray interface directly into an Index + class ArrayLike: + def __init__(self, array): + self.array = array + + def __array__(self, dtype=None) -> np.ndarray: + return self.array + + expected = Index(array) + result = Index(ArrayLike(array)) + tm.assert_index_equal(result, expected) + + +class TestIndexConstructionErrors: + def test_constructor_overflow_int64(self): + # see GH#15832 + msg = ( + "The elements provided in the data cannot " + "all be casted to the dtype int64" + ) + with pytest.raises(OverflowError, match=msg): + Index([np.iinfo(np.uint64).max - 1], dtype="int64") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_indexing.py new file mode 100644 index 0000000..7390392 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_indexing.py @@ -0,0 +1,358 @@ +""" +test_indexing tests the following Index methods: + __getitem__ + get_loc + get_value + __contains__ + take + where + get_indexer + get_indexer_for + slice_locs + asof_locs + +The corresponding tests.indexes.[index_type].test_indexing files +contain tests for the corresponding methods specific to those Index subclasses. +""" +import numpy as np +import pytest + +from pandas.errors import InvalidIndexError + +from pandas import ( + DatetimeIndex, + Index, + IntervalIndex, + MultiIndex, + NaT, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) + + +class TestTake: + def test_take_invalid_kwargs(self, index): + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + index.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + index.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + index.take(indices, mode="clip") + + def test_take(self, index): + indexer = [4, 3, 0, 2] + if len(index) < 5: + # not enough elements; ignore + return + + result = index.take(indexer) + expected = index[indexer] + assert result.equals(expected) + + if not isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + # GH 10791 + msg = r"'(.*Index)' object has no attribute 'freq'" + with pytest.raises(AttributeError, match=msg): + index.freq + + def test_take_indexer_type(self): + # GH#42875 + integer_index = Index([0, 1, 2, 3]) + scalar_index = 1 + msg = "Expected indices to be array-like" + with pytest.raises(TypeError, match=msg): + integer_index.take(scalar_index) + + def test_take_minus1_without_fill(self, index): + # -1 does not get treated as NA unless allow_fill=True is passed + if len(index) == 0: + # Test is not applicable + return + + result = index.take([0, 0, -1]) + + expected = index.take([0, 0, len(index) - 1]) + tm.assert_index_equal(result, expected) + + +class TestContains: + @pytest.mark.parametrize( + "index,val", + [ + (Index([0, 1, 2]), 2), + (Index([0, 1, "2"]), "2"), + (Index([0, 1, 2, np.inf, 4]), 4), + (Index([0, 1, 2, np.nan, 4]), 4), + (Index([0, 1, 2, np.inf]), np.inf), + (Index([0, 1, 2, np.nan]), np.nan), + ], + ) + def test_index_contains(self, index, val): + assert val in index + + @pytest.mark.parametrize( + "index,val", + [ + (Index([0, 1, 2]), "2"), + (Index([0, 1, "2"]), 2), + (Index([0, 1, 2, np.inf]), 4), + (Index([0, 1, 2, np.nan]), 4), + (Index([0, 1, 2, np.inf]), np.nan), + (Index([0, 1, 2, np.nan]), np.inf), + # Checking if np.inf in Int64Index should not cause an OverflowError + # Related to GH 16957 + (Int64Index([0, 1, 2]), np.inf), + (Int64Index([0, 1, 2]), np.nan), + (UInt64Index([0, 1, 2]), np.inf), + (UInt64Index([0, 1, 2]), np.nan), + ], + ) + def test_index_not_contains(self, index, val): + assert val not in index + + @pytest.mark.parametrize( + "index,val", [(Index([0, 1, "2"]), 0), (Index([0, 1, "2"]), "2")] + ) + def test_mixed_index_contains(self, index, val): + # GH#19860 + assert val in index + + @pytest.mark.parametrize( + "index,val", [(Index([0, 1, "2"]), "1"), (Index([0, 1, "2"]), 2)] + ) + def test_mixed_index_not_contains(self, index, val): + # GH#19860 + assert val not in index + + def test_contains_with_float_index(self): + # GH#22085 + integer_index = Int64Index([0, 1, 2, 3]) + uinteger_index = UInt64Index([0, 1, 2, 3]) + float_index = Float64Index([0.1, 1.1, 2.2, 3.3]) + + for index in (integer_index, uinteger_index): + assert 1.1 not in index + assert 1.0 in index + assert 1 in index + + assert 1.1 in float_index + assert 1.0 not in float_index + assert 1 not in float_index + + def test_contains_requires_hashable_raises(self, index): + if isinstance(index, MultiIndex): + return # TODO: do we want this to raise? + + msg = "unhashable type: 'list'" + with pytest.raises(TypeError, match=msg): + [] in index + + msg = "|".join( + [ + r"unhashable type: 'dict'", + r"must be real number, not dict", + r"an integer is required", + r"\{\}", + r"pandas\._libs\.interval\.IntervalTree' is not iterable", + ] + ) + with pytest.raises(TypeError, match=msg): + {} in index._engine + + +class TestGetValue: + @pytest.mark.parametrize( + "index", ["string", "int", "datetime", "timedelta"], indirect=True + ) + def test_get_value(self, index): + # TODO(2.0): can remove once get_value deprecation is enforced GH#19728 + values = np.random.randn(100) + value = index[67] + + with pytest.raises(AttributeError, match="has no attribute '_values'"): + # Index.get_value requires a Series, not an ndarray + with tm.assert_produces_warning(FutureWarning): + index.get_value(values, value) + + with tm.assert_produces_warning(FutureWarning): + result = index.get_value(Series(values, index=values), value) + tm.assert_almost_equal(result, values[67]) + + +class TestGetLoc: + def test_get_loc_non_hashable(self, index): + # MultiIndex and Index raise TypeError, others InvalidIndexError + + with pytest.raises((TypeError, InvalidIndexError), match="slice"): + index.get_loc(slice(0, 1)) + + def test_get_loc_generator(self, index): + + exc = KeyError + if isinstance( + index, + ( + DatetimeIndex, + TimedeltaIndex, + PeriodIndex, + RangeIndex, + IntervalIndex, + MultiIndex, + ), + ): + # TODO: make these more consistent? + exc = InvalidIndexError + with pytest.raises(exc, match="generator object"): + # MultiIndex specifically checks for generator; others for scalar + index.get_loc(x for x in range(5)) + + +class TestGetIndexer: + def test_get_indexer_base(self, index): + + if index._index_as_unique: + expected = np.arange(index.size, dtype=np.intp) + actual = index.get_indexer(index) + tm.assert_numpy_array_equal(expected, actual) + else: + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): + index.get_indexer(index) + + with pytest.raises(ValueError, match="Invalid fill method"): + index.get_indexer(index, method="invalid") + + def test_get_indexer_consistency(self, index): + # See GH#16819 + + if index._index_as_unique: + indexer = index.get_indexer(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + else: + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): + index.get_indexer(index[0:2]) + + indexer, _ = index.get_indexer_non_unique(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + + +class TestConvertSliceIndexer: + def test_convert_almost_null_slice(self, index): + # slice with None at both ends, but not step + + key = slice(None, None, "foo") + + if isinstance(index, IntervalIndex): + msg = "label-based slicing with step!=1 is not supported for IntervalIndex" + with pytest.raises(ValueError, match=msg): + index._convert_slice_indexer(key, "loc") + else: + msg = "'>=' not supported between instances of 'str' and 'int'" + with pytest.raises(TypeError, match=msg): + index._convert_slice_indexer(key, "loc") + + +class TestPutmask: + def test_putmask_with_wrong_mask(self, index): + # GH#18368 + if not len(index): + return + + fill = index[0] + + msg = "putmask: mask and data must be the same size" + with pytest.raises(ValueError, match=msg): + index.putmask(np.ones(len(index) + 1, np.bool_), fill) + + with pytest.raises(ValueError, match=msg): + index.putmask(np.ones(len(index) - 1, np.bool_), fill) + + with pytest.raises(ValueError, match=msg): + index.putmask("foo", fill) + + +@pytest.mark.parametrize( + "idx", [Index([1, 2, 3]), Index([0.1, 0.2, 0.3]), Index(["a", "b", "c"])] +) +def test_getitem_deprecated_float(idx): + # https://github.com/pandas-dev/pandas/issues/34191 + + with tm.assert_produces_warning(FutureWarning): + result = idx[1.0] + + expected = idx[1] + assert result == expected + + +def test_maybe_cast_slice_bound_kind_deprecated(index): + if not len(index): + return + + with tm.assert_produces_warning(FutureWarning): + # passed as keyword + index._maybe_cast_slice_bound(index[0], "left", kind="loc") + + with tm.assert_produces_warning(FutureWarning): + # pass as positional + index._maybe_cast_slice_bound(index[0], "left", "loc") + + +@pytest.mark.parametrize( + "idx,target,expected", + [ + ([np.nan, "var1", np.nan], [np.nan], np.array([0, 2], dtype=np.intp)), + ( + [np.nan, "var1", np.nan], + [np.nan, "var1"], + np.array([0, 2, 1], dtype=np.intp), + ), + ( + np.array([np.nan, "var1", np.nan], dtype=object), + [np.nan], + np.array([0, 2], dtype=np.intp), + ), + ( + DatetimeIndex(["2020-08-05", NaT, NaT]), + [NaT], + np.array([1, 2], dtype=np.intp), + ), + (["a", "b", "a", np.nan], [np.nan], np.array([3], dtype=np.intp)), + ( + np.array(["b", np.nan, float("NaN"), "b"], dtype=object), + Index([np.nan], dtype=object), + np.array([1, 2], dtype=np.intp), + ), + ], +) +def test_get_indexer_non_unique_multiple_nans(idx, target, expected): + # GH 35392 + axis = Index(idx) + actual = axis.get_indexer_for(target) + tm.assert_numpy_array_equal(actual, expected) + + +def test_get_indexer_non_unique_nans_in_object_dtype_target(nulls_fixture): + idx = Index([1.0, 2.0]) + target = Index([1, nulls_fixture], dtype="object") + + result_idx, result_missing = idx.get_indexer_non_unique(target) + tm.assert_numpy_array_equal(result_idx, np.array([0, -1], dtype=np.intp)) + tm.assert_numpy_array_equal(result_missing, np.array([1], dtype=np.intp)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_numpy_compat.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_numpy_compat.py new file mode 100644 index 0000000..3fad803 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_numpy_compat.py @@ -0,0 +1,148 @@ +import numpy as np +import pytest + +from pandas import ( + CategoricalIndex, + DatetimeIndex, + Index, + PeriodIndex, + TimedeltaIndex, + isna, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + NumericIndex, +) +from pandas.core.arrays import BooleanArray +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin + + +@pytest.mark.parametrize( + "func", + [ + np.exp, + np.exp2, + np.expm1, + np.log, + np.log2, + np.log10, + np.log1p, + np.sqrt, + np.sin, + np.cos, + np.tan, + np.arcsin, + np.arccos, + np.arctan, + np.sinh, + np.cosh, + np.tanh, + np.arcsinh, + np.arccosh, + np.arctanh, + np.deg2rad, + np.rad2deg, + ], + ids=lambda x: x.__name__, +) +def test_numpy_ufuncs_basic(index, func): + # test ufuncs of numpy, see: + # https://numpy.org/doc/stable/reference/ufuncs.html + + if isinstance(index, DatetimeIndexOpsMixin): + with tm.external_error_raised((TypeError, AttributeError)): + with np.errstate(all="ignore"): + func(index) + elif isinstance(index, NumericIndex) or ( + not isinstance(index.dtype, np.dtype) and index.dtype._is_numeric + ): + # coerces to float (e.g. np.sin) + with np.errstate(all="ignore"): + result = func(index) + exp = Index(func(index.values), name=index.name) + + tm.assert_index_equal(result, exp) + if type(index) is not Index: + # i.e NumericIndex + assert isinstance(result, Float64Index) + else: + # e.g. np.exp with Int64 -> Float64 + assert type(result) is Index + else: + # raise AttributeError or TypeError + if len(index) == 0: + pass + else: + with tm.external_error_raised((TypeError, AttributeError)): + with np.errstate(all="ignore"): + func(index) + + +@pytest.mark.parametrize( + "func", [np.isfinite, np.isinf, np.isnan, np.signbit], ids=lambda x: x.__name__ +) +def test_numpy_ufuncs_other(index, func, request): + # test ufuncs of numpy, see: + # https://numpy.org/doc/stable/reference/ufuncs.html + if isinstance(index, (DatetimeIndex, TimedeltaIndex)): + + if func in (np.isfinite, np.isinf, np.isnan): + # numpy 1.18 changed isinf and isnan to not raise on dt64/td64 + result = func(index) + assert isinstance(result, np.ndarray) + else: + with tm.external_error_raised(TypeError): + func(index) + + elif isinstance(index, PeriodIndex): + with tm.external_error_raised(TypeError): + func(index) + + elif isinstance(index, NumericIndex) or ( + not isinstance(index.dtype, np.dtype) and index.dtype._is_numeric + ): + # Results in bool array + result = func(index) + if not isinstance(index.dtype, np.dtype): + # e.g. Int64 we expect to get BooleanArray back + assert isinstance(result, BooleanArray) + else: + assert isinstance(result, np.ndarray) + assert not isinstance(result, Index) + else: + if len(index) == 0: + pass + else: + with tm.external_error_raised(TypeError): + func(index) + + +@pytest.mark.parametrize("func", [np.maximum, np.minimum]) +def test_numpy_ufuncs_reductions(index, func, request): + # TODO: overlap with tests.series.test_ufunc.test_reductions + if len(index) == 0: + return + + if repr(index.dtype) == "string[pyarrow]": + mark = pytest.mark.xfail(reason="ArrowStringArray has no min/max") + request.node.add_marker(mark) + + if isinstance(index, CategoricalIndex) and index.dtype.ordered is False: + with pytest.raises(TypeError, match="is not ordered for"): + func.reduce(index) + return + else: + result = func.reduce(index) + + if func is np.maximum: + expected = index.max(skipna=False) + else: + expected = index.min(skipna=False) + # TODO: do we have cases both with and without NAs? + + assert type(result) is type(expected) + if isna(result): + assert isna(expected) + else: + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_setops.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_setops.py new file mode 100644 index 0000000..a73ac89 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/test_setops.py @@ -0,0 +1,827 @@ +""" +The tests in this package are to ensure the proper resultant dtypes of +set operations. +""" +from datetime import datetime +import operator + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import find_common_type +from pandas.core.dtypes.common import is_dtype_equal + +from pandas import ( + CategoricalIndex, + DatetimeIndex, + Index, + MultiIndex, + RangeIndex, + Series, + TimedeltaIndex, + Timestamp, +) +import pandas._testing as tm +from pandas.api.types import ( + is_datetime64tz_dtype, + is_signed_integer_dtype, + pandas_dtype, +) +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) + +COMPATIBLE_INCONSISTENT_PAIRS = [ + (np.float64, np.int64), + (np.float64, np.uint64), +] + + +def test_union_same_types(index): + # Union with a non-unique, non-monotonic index raises error + # Only needed for bool index factory + idx1 = index.sort_values() + idx2 = index.sort_values() + assert idx1.union(idx2).dtype == idx1.dtype + + +def test_union_different_types(index_flat, index_flat2, request): + # This test only considers combinations of indices + # GH 23525 + idx1 = index_flat + idx2 = index_flat2 + + if ( + not idx1.is_unique + and idx1.dtype.kind == "i" + and is_dtype_equal(idx2.dtype, "boolean") + ) or ( + not idx2.is_unique + and idx2.dtype.kind == "i" + and is_dtype_equal(idx1.dtype, "boolean") + ): + mark = pytest.mark.xfail(reason="GH#44000 True==1", raises=ValueError) + request.node.add_marker(mark) + + common_dtype = find_common_type([idx1.dtype, idx2.dtype]) + + any_uint64 = idx1.dtype == np.uint64 or idx2.dtype == np.uint64 + idx1_signed = is_signed_integer_dtype(idx1.dtype) + idx2_signed = is_signed_integer_dtype(idx2.dtype) + + # Union with a non-unique, non-monotonic index raises error + # This applies to the boolean index + idx1 = idx1.sort_values() + idx2 = idx2.sort_values() + + res1 = idx1.union(idx2) + res2 = idx2.union(idx1) + + if any_uint64 and (idx1_signed or idx2_signed): + assert res1.dtype == np.dtype("O") + assert res2.dtype == np.dtype("O") + else: + assert res1.dtype == common_dtype + assert res2.dtype == common_dtype + + +@pytest.mark.parametrize( + "idx_fact1,idx_fact2", + [ + (tm.makeIntIndex, tm.makeRangeIndex), + (tm.makeFloatIndex, tm.makeIntIndex), + (tm.makeFloatIndex, tm.makeRangeIndex), + (tm.makeFloatIndex, tm.makeUIntIndex), + ], +) +def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2): + # GH 23525 + idx1 = idx_fact1(10) + idx2 = idx_fact2(20) + + res1 = idx1.union(idx2) + res2 = idx2.union(idx1) + + assert res1.dtype in (idx1.dtype, idx2.dtype) + assert res2.dtype in (idx1.dtype, idx2.dtype) + + +@pytest.mark.parametrize( + "left, right, expected", + [ + ("int64", "int64", "int64"), + ("int64", "uint64", "object"), + ("int64", "float64", "float64"), + ("uint64", "float64", "float64"), + ("uint64", "uint64", "uint64"), + ("float64", "float64", "float64"), + ("datetime64[ns]", "int64", "object"), + ("datetime64[ns]", "uint64", "object"), + ("datetime64[ns]", "float64", "object"), + ("datetime64[ns, CET]", "int64", "object"), + ("datetime64[ns, CET]", "uint64", "object"), + ("datetime64[ns, CET]", "float64", "object"), + ("Period[D]", "int64", "object"), + ("Period[D]", "uint64", "object"), + ("Period[D]", "float64", "object"), + ], +) +@pytest.mark.parametrize("names", [("foo", "foo", "foo"), ("foo", "bar", None)]) +def test_union_dtypes(left, right, expected, names): + left = pandas_dtype(left) + right = pandas_dtype(right) + a = Index([], dtype=left, name=names[0]) + b = Index([], dtype=right, name=names[1]) + result = a.union(b) + assert result.dtype == expected + assert result.name == names[2] + + # Testing name retention + # TODO: pin down desired dtype; do we want it to be commutative? + result = a.intersection(b) + assert result.name == names[2] + + +def test_dunder_inplace_setops_deprecated(index): + # GH#37374 these will become logical ops, not setops + + with tm.assert_produces_warning(FutureWarning): + index |= index + + with tm.assert_produces_warning(FutureWarning): + index &= index + + with tm.assert_produces_warning(FutureWarning): + index ^= index + + +@pytest.mark.parametrize("values", [[1, 2, 2, 3], [3, 3]]) +def test_intersection_duplicates(values): + # GH#31326 + a = Index(values) + b = Index([3, 3]) + result = a.intersection(b) + expected = Index([3]) + tm.assert_index_equal(result, expected) + + +class TestSetOps: + # Set operation tests shared by all indexes in the `index` fixture + @pytest.mark.parametrize("case", [0.5, "xxx"]) + @pytest.mark.parametrize( + "method", ["intersection", "union", "difference", "symmetric_difference"] + ) + def test_set_ops_error_cases(self, case, method, index): + # non-iterable input + msg = "Input must be Index or array-like" + with pytest.raises(TypeError, match=msg): + getattr(index, method)(case) + + def test_intersection_base(self, index): + if isinstance(index, CategoricalIndex): + return + + first = index[:5] + second = index[:3] + intersect = first.intersection(second) + assert tm.equalContents(intersect, second) + + if is_datetime64tz_dtype(index.dtype): + # The second.values below will drop tz, so the rest of this test + # is not applicable. + return + + # GH#10149 + cases = [second.to_numpy(), second.to_series(), second.to_list()] + for case in cases: + result = first.intersection(case) + assert tm.equalContents(result, second) + + if isinstance(index, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.intersection([1, 2, 3]) + + def test_union_base(self, index): + first = index[3:] + second = index[:5] + everything = index + + union = first.union(second) + assert tm.equalContents(union, everything) + + if is_datetime64tz_dtype(index.dtype): + # The second.values below will drop tz, so the rest of this test + # is not applicable. + return + + # GH#10149 + cases = [second.to_numpy(), second.to_series(), second.to_list()] + for case in cases: + result = first.union(case) + assert tm.equalContents(result, everything) + + if isinstance(index, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.union([1, 2, 3]) + + def test_difference_base(self, sort, index): + first = index[2:] + second = index[:4] + if isinstance(index, CategoricalIndex) or index.is_boolean(): + answer = [] + else: + answer = index[4:] + result = first.difference(second, sort) + assert tm.equalContents(result, answer) + + # GH#10149 + cases = [second.to_numpy(), second.to_series(), second.to_list()] + for case in cases: + result = first.difference(case, sort) + assert tm.equalContents(result, answer) + + if isinstance(index, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.difference([1, 2, 3], sort) + + def test_symmetric_difference(self, index): + if isinstance(index, CategoricalIndex): + return + if len(index) < 2: + return + if index[0] in index[1:] or index[-1] in index[:-1]: + # index fixture has e.g. an index of bools that does not satisfy this, + # another with [0, 0, 1, 1, 2, 2] + return + + first = index[1:] + second = index[:-1] + answer = index[[0, -1]] + result = first.symmetric_difference(second) + assert tm.equalContents(result, answer) + + # GH#10149 + cases = [second.to_numpy(), second.to_series(), second.to_list()] + for case in cases: + result = first.symmetric_difference(case) + assert tm.equalContents(result, answer) + + if isinstance(index, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.symmetric_difference([1, 2, 3]) + + @pytest.mark.parametrize( + "fname, sname, expected_name", + [ + ("A", "A", "A"), + ("A", "B", None), + ("A", None, None), + (None, "B", None), + (None, None, None), + ], + ) + def test_corner_union(self, index_flat_unique, fname, sname, expected_name): + # GH#9943, GH#9862 + # Test unions with various name combinations + # Do not test MultiIndex or repeats + index = index_flat_unique + + # Test copy.union(copy) + first = index.copy().set_names(fname) + second = index.copy().set_names(sname) + union = first.union(second) + expected = index.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test copy.union(empty) + first = index.copy().set_names(fname) + second = index.drop(index).set_names(sname) + union = first.union(second) + expected = index.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test empty.union(copy) + first = index.drop(index).set_names(fname) + second = index.copy().set_names(sname) + union = first.union(second) + expected = index.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test empty.union(empty) + first = index.drop(index).set_names(fname) + second = index.drop(index).set_names(sname) + union = first.union(second) + expected = index.drop(index).set_names(expected_name) + tm.assert_index_equal(union, expected) + + @pytest.mark.parametrize( + "fname, sname, expected_name", + [ + ("A", "A", "A"), + ("A", "B", None), + ("A", None, None), + (None, "B", None), + (None, None, None), + ], + ) + def test_union_unequal(self, index_flat_unique, fname, sname, expected_name): + index = index_flat_unique + + # test copy.union(subset) - need sort for unicode and string + first = index.copy().set_names(fname) + second = index[1:].set_names(sname) + union = first.union(second).sort_values() + expected = index.set_names(expected_name).sort_values() + tm.assert_index_equal(union, expected) + + @pytest.mark.parametrize( + "fname, sname, expected_name", + [ + ("A", "A", "A"), + ("A", "B", None), + ("A", None, None), + (None, "B", None), + (None, None, None), + ], + ) + def test_corner_intersect(self, index_flat_unique, fname, sname, expected_name): + # GH#35847 + # Test intersections with various name combinations + index = index_flat_unique + + # Test copy.intersection(copy) + first = index.copy().set_names(fname) + second = index.copy().set_names(sname) + intersect = first.intersection(second) + expected = index.copy().set_names(expected_name) + tm.assert_index_equal(intersect, expected) + + # Test copy.intersection(empty) + first = index.copy().set_names(fname) + second = index.drop(index).set_names(sname) + intersect = first.intersection(second) + expected = index.drop(index).set_names(expected_name) + tm.assert_index_equal(intersect, expected) + + # Test empty.intersection(copy) + first = index.drop(index).set_names(fname) + second = index.copy().set_names(sname) + intersect = first.intersection(second) + expected = index.drop(index).set_names(expected_name) + tm.assert_index_equal(intersect, expected) + + # Test empty.intersection(empty) + first = index.drop(index).set_names(fname) + second = index.drop(index).set_names(sname) + intersect = first.intersection(second) + expected = index.drop(index).set_names(expected_name) + tm.assert_index_equal(intersect, expected) + + @pytest.mark.parametrize( + "fname, sname, expected_name", + [ + ("A", "A", "A"), + ("A", "B", None), + ("A", None, None), + (None, "B", None), + (None, None, None), + ], + ) + def test_intersect_unequal(self, index_flat_unique, fname, sname, expected_name): + index = index_flat_unique + + # test copy.intersection(subset) - need sort for unicode and string + first = index.copy().set_names(fname) + second = index[1:].set_names(sname) + intersect = first.intersection(second).sort_values() + expected = index[1:].set_names(expected_name).sort_values() + tm.assert_index_equal(intersect, expected) + + def test_intersection_name_retention_with_nameless(self, index): + if isinstance(index, MultiIndex): + index = index.rename(list(range(index.nlevels))) + else: + index = index.rename("foo") + + other = np.asarray(index) + + result = index.intersection(other) + assert result.name == index.name + + # empty other, same dtype + result = index.intersection(other[:0]) + assert result.name == index.name + + # empty `self` + result = index[:0].intersection(other) + assert result.name == index.name + + def test_difference_preserves_type_empty(self, index, sort): + # GH#20040 + # If taking difference of a set and itself, it + # needs to preserve the type of the index + if not index.is_unique: + return + result = index.difference(index, sort=sort) + expected = index[:0] + tm.assert_index_equal(result, expected, exact=True) + + def test_difference_name_retention_equals(self, index, sort, names): + if isinstance(index, MultiIndex): + names = [[x] * index.nlevels for x in names] + index = index.rename(names[0]) + other = index.rename(names[1]) + + assert index.equals(other) + + result = index.difference(other) + expected = index[:0].rename(names[2]) + tm.assert_index_equal(result, expected) + + def test_intersection_difference_match_empty(self, index, sort): + # GH#20040 + # Test that the intersection of an index with an + # empty index produces the same index as the difference + # of an index with itself. Test for all types + if not index.is_unique: + return + inter = index.intersection(index[:0]) + diff = index.difference(index, sort=sort) + tm.assert_index_equal(inter, diff, exact=True) + + +@pytest.mark.parametrize( + "method", ["intersection", "union", "difference", "symmetric_difference"] +) +def test_setop_with_categorical(index_flat, sort, method): + # MultiIndex tested separately in tests.indexes.multi.test_setops + index = index_flat + + other = index.astype("category") + exact = "equiv" if isinstance(index, RangeIndex) else True + + result = getattr(index, method)(other, sort=sort) + expected = getattr(index, method)(index, sort=sort) + tm.assert_index_equal(result, expected, exact=exact) + + result = getattr(index, method)(other[:5], sort=sort) + expected = getattr(index, method)(index[:5], sort=sort) + tm.assert_index_equal(result, expected, exact=exact) + + +def test_intersection_duplicates_all_indexes(index): + # GH#38743 + if index.empty: + # No duplicates in empty indexes + return + + def check_intersection_commutative(left, right): + assert left.intersection(right).equals(right.intersection(left)) + + idx = index + idx_non_unique = idx[[0, 0, 1, 2]] + + check_intersection_commutative(idx, idx_non_unique) + assert idx.intersection(idx_non_unique).is_unique + + +@pytest.mark.parametrize( + "cls", + [ + Int64Index, + Float64Index, + DatetimeIndex, + CategoricalIndex, + lambda x: CategoricalIndex(x, categories=set(x)), + TimedeltaIndex, + lambda x: Index(x, dtype=object), + UInt64Index, + ], +) +def test_union_duplicate_index_subsets_of_each_other(cls): + # GH#31326 + a = cls([1, 2, 2, 3]) + b = cls([3, 3, 4]) + expected = cls([1, 2, 2, 3, 3, 4]) + if isinstance(a, CategoricalIndex): + expected = Index([1, 2, 2, 3, 3, 4]) + result = a.union(b) + tm.assert_index_equal(result, expected) + result = a.union(b, sort=False) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "cls", + [ + Int64Index, + Float64Index, + DatetimeIndex, + CategoricalIndex, + TimedeltaIndex, + lambda x: Index(x, dtype=object), + ], +) +def test_union_with_duplicate_index_and_non_monotonic(cls): + # GH#36289 + a = cls([1, 0, 0]) + b = cls([0, 1]) + expected = cls([0, 0, 1]) + + result = a.union(b) + tm.assert_index_equal(result, expected) + + result = b.union(a) + tm.assert_index_equal(result, expected) + + +def test_union_duplicate_index_different_dtypes(): + # GH#36289 + a = Index([1, 2, 2, 3]) + b = Index(["1", "0", "0"]) + expected = Index([1, 2, 2, 3, "1", "0", "0"]) + result = a.union(b, sort=False) + tm.assert_index_equal(result, expected) + + +def test_union_same_value_duplicated_in_both(): + # GH#36289 + a = Index([0, 0, 1]) + b = Index([0, 0, 1, 2]) + result = a.union(b) + expected = Index([0, 0, 1, 2]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("dup", [1, np.nan]) +def test_union_nan_in_both(dup): + # GH#36289 + a = Index([np.nan, 1, 2, 2]) + b = Index([np.nan, dup, 1, 2]) + result = a.union(b, sort=False) + expected = Index([np.nan, dup, 1.0, 2.0, 2.0]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "cls", + [ + Int64Index, + Float64Index, + DatetimeIndex, + TimedeltaIndex, + lambda x: Index(x, dtype=object), + ], +) +def test_union_with_duplicate_index_not_subset_and_non_monotonic(cls): + # GH#36289 + a = cls([1, 0, 2]) + b = cls([0, 0, 1]) + expected = cls([0, 0, 1, 2]) + + result = a.union(b) + tm.assert_index_equal(result, expected) + + result = b.union(a) + tm.assert_index_equal(result, expected) + + +class TestSetOpsUnsorted: + # These may eventually belong in a dtype-specific test_setops, or + # parametrized over a more general fixture + def test_intersect_str_dates(self): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + index1 = Index(dt_dates, dtype=object) + index2 = Index(["aa"], dtype=object) + result = index2.intersection(index1) + + expected = Index([], dtype=object) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_intersection(self, index, sort): + first = index[:20] + second = index[:10] + intersect = first.intersection(second, sort=sort) + if sort is None: + tm.assert_index_equal(intersect, second.sort_values()) + assert tm.equalContents(intersect, second) + + # Corner cases + inter = first.intersection(first, sort=sort) + assert inter is first + + @pytest.mark.parametrize( + "index2,keeps_name", + [ + (Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name + (Index([3, 4, 5, 6, 7], name="other"), False), # drop diff names + (Index([3, 4, 5, 6, 7]), False), + ], + ) + def test_intersection_name_preservation(self, index2, keeps_name, sort): + index1 = Index([1, 2, 3, 4, 5], name="index") + expected = Index([3, 4, 5]) + result = index1.intersection(index2, sort) + + if keeps_name: + expected.name = "index" + + assert result.name == expected.name + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + @pytest.mark.parametrize( + "first_name,second_name,expected_name", + [("A", "A", "A"), ("A", "B", None), (None, "B", None)], + ) + def test_intersection_name_preservation2( + self, index, first_name, second_name, expected_name, sort + ): + first = index[5:20] + second = index[:10] + first.name = first_name + second.name = second_name + intersect = first.intersection(second, sort=sort) + assert intersect.name == expected_name + + def test_chained_union(self, sort): + # Chained unions handles names correctly + i1 = Index([1, 2], name="i1") + i2 = Index([5, 6], name="i2") + i3 = Index([3, 4], name="i3") + union = i1.union(i2.union(i3, sort=sort), sort=sort) + expected = i1.union(i2, sort=sort).union(i3, sort=sort) + tm.assert_index_equal(union, expected) + + j1 = Index([1, 2], name="j1") + j2 = Index([], name="j2") + j3 = Index([], name="j3") + union = j1.union(j2.union(j3, sort=sort), sort=sort) + expected = j1.union(j2, sort=sort).union(j3, sort=sort) + tm.assert_index_equal(union, expected) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_union(self, index, sort): + first = index[5:20] + second = index[:10] + everything = index[:20] + + union = first.union(second, sort=sort) + if sort is None: + tm.assert_index_equal(union, everything.sort_values()) + assert tm.equalContents(union, everything) + + @pytest.mark.parametrize("klass", [np.array, Series, list]) + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_union_from_iterables(self, index, klass, sort): + # GH#10149 + first = index[5:20] + second = index[:10] + everything = index[:20] + + case = klass(second.values) + result = first.union(case, sort=sort) + if sort is None: + tm.assert_index_equal(result, everything.sort_values()) + assert tm.equalContents(result, everything) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_union_identity(self, index, sort): + first = index[5:20] + + union = first.union(first, sort=sort) + # i.e. identity is not preserved when sort is True + assert (union is first) is (not sort) + + # This should no longer be the same object, since [] is not consistent, + # both objects will be recast to dtype('O') + union = first.union([], sort=sort) + assert (union is first) is (not sort) + + union = Index([]).union(first, sort=sort) + assert (union is first) is (not sort) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + @pytest.mark.parametrize("second_name,expected", [(None, None), ("name", "name")]) + def test_difference_name_preservation(self, index, second_name, expected, sort): + first = index[5:20] + second = index[:10] + answer = index[10:20] + + first.name = "name" + second.name = second_name + result = first.difference(second, sort=sort) + + assert tm.equalContents(result, answer) + + if expected is None: + assert result.name is None + else: + assert result.name == expected + + def test_difference_empty_arg(self, index, sort): + first = index[5:20] + first.name = "name" + result = first.difference([], sort) + + tm.assert_index_equal(result, first) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_difference_identity(self, index, sort): + first = index[5:20] + first.name = "name" + result = first.difference(first, sort) + + assert len(result) == 0 + assert result.name == first.name + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_difference_sort(self, index, sort): + first = index[5:20] + second = index[:10] + + result = first.difference(second, sort) + expected = index[10:20] + + if sort is None: + expected = expected.sort_values() + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"]) + def test_difference_incomparable(self, opname): + a = Index([3, Timestamp("2000"), 1]) + b = Index([2, Timestamp("1999"), 1]) + op = operator.methodcaller(opname, b) + + with tm.assert_produces_warning(RuntimeWarning): + # sort=None, the default + result = op(a) + expected = Index([3, Timestamp("2000"), 2, Timestamp("1999")]) + if opname == "difference": + expected = expected[:2] + tm.assert_index_equal(result, expected) + + # sort=False + op = operator.methodcaller(opname, b, sort=False) + result = op(a) + tm.assert_index_equal(result, expected) + + @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"]) + def test_difference_incomparable_true(self, opname): + # TODO(GH#25151): decide on True behaviour + # # sort=True, raises + a = Index([3, Timestamp("2000"), 1]) + b = Index([2, Timestamp("1999"), 1]) + op = operator.methodcaller(opname, b, sort=True) + + with pytest.raises(TypeError, match="Cannot compare"): + op(a) + + def test_symmetric_difference_mi(self, sort): + index1 = MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])) + index2 = MultiIndex.from_tuples([("foo", 1), ("bar", 3)]) + result = index1.symmetric_difference(index2, sort=sort) + expected = MultiIndex.from_tuples([("bar", 2), ("baz", 3), ("bar", 3)]) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + @pytest.mark.parametrize( + "index2,expected", + [ + (Index([0, 1, np.nan]), Index([2.0, 3.0, 0.0])), + (Index([0, 1]), Index([np.nan, 2.0, 3.0, 0.0])), + ], + ) + def test_symmetric_difference_missing(self, index2, expected, sort): + # GH#13514 change: {nan} - {nan} == {} + # (GH#6444, sorting of nans, is no longer an issue) + index1 = Index([1, np.nan, 2, 3]) + + result = index1.symmetric_difference(index2, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + def test_symmetric_difference_non_index(self, sort): + index1 = Index([1, 2, 3, 4], name="index1") + index2 = np.array([2, 3, 4, 5]) + expected = Index([1, 5]) + result = index1.symmetric_difference(index2, sort=sort) + assert tm.equalContents(result, expected) + assert result.name == "index1" + + result = index1.symmetric_difference(index2, result_name="new_name", sort=sort) + assert tm.equalContents(result, expected) + assert result.name == "new_name" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..8cb7937 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..5836c73 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_delete.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_delete.cpython-38.pyc new file mode 100644 index 0000000..daa16bf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_delete.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_formats.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_formats.cpython-38.pyc new file mode 100644 index 0000000..59ebc48 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_formats.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_freq_attr.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_freq_attr.cpython-38.pyc new file mode 100644 index 0000000..699732f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_freq_attr.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..6cfd073 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_join.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_join.cpython-38.pyc new file mode 100644 index 0000000..504fbbc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_join.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_ops.cpython-38.pyc new file mode 100644 index 0000000..c72f853 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_pickle.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_pickle.cpython-38.pyc new file mode 100644 index 0000000..7e1a532 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_pickle.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_scalar_compat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_scalar_compat.cpython-38.pyc new file mode 100644 index 0000000..57f6ddb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_scalar_compat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_searchsorted.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_searchsorted.cpython-38.pyc new file mode 100644 index 0000000..3b8a7a1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_searchsorted.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_setops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_setops.cpython-38.pyc new file mode 100644 index 0000000..3c87cce Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_setops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_timedelta.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_timedelta.cpython-38.pyc new file mode 100644 index 0000000..5da5a4c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_timedelta.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_timedelta_range.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_timedelta_range.cpython-38.pyc new file mode 100644 index 0000000..24c94a2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/__pycache__/test_timedelta_range.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..8152021 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..ece1c4d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_factorize.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_factorize.cpython-38.pyc new file mode 100644 index 0000000..a3efa83 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_factorize.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_fillna.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_fillna.cpython-38.pyc new file mode 100644 index 0000000..8481a77 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_fillna.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_insert.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_insert.cpython-38.pyc new file mode 100644 index 0000000..3301415 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_insert.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_repeat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_repeat.cpython-38.pyc new file mode 100644 index 0000000..4a62f4f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_repeat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_shift.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_shift.cpython-38.pyc new file mode 100644 index 0000000..eb08d8f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/__pycache__/test_shift.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_astype.py new file mode 100644 index 0000000..276b0db --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_astype.py @@ -0,0 +1,127 @@ +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + NaT, + Timedelta, + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) + + +class TestTimedeltaIndex: + def test_astype_object(self): + idx = timedelta_range(start="1 days", periods=4, freq="D", name="idx") + expected_list = [ + Timedelta("1 days"), + Timedelta("2 days"), + Timedelta("3 days"), + Timedelta("4 days"), + ] + result = idx.astype(object) + expected = Index(expected_list, dtype=object, name="idx") + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + def test_astype_object_with_nat(self): + idx = TimedeltaIndex( + [timedelta(days=1), timedelta(days=2), NaT, timedelta(days=4)], name="idx" + ) + expected_list = [ + Timedelta("1 days"), + Timedelta("2 days"), + NaT, + Timedelta("4 days"), + ] + result = idx.astype(object) + expected = Index(expected_list, dtype=object, name="idx") + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + def test_astype(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN], name="idx") + + result = idx.astype(object) + expected = Index( + [Timedelta("1 days 03:46:40")] + [NaT] * 3, dtype=object, name="idx" + ) + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index( + [100000000000000] + [-9223372036854775808] * 3, dtype=np.int64, name="idx" + ) + tm.assert_index_equal(result, expected) + + result = idx.astype(str) + expected = Index([str(x) for x in idx], name="idx") + tm.assert_index_equal(result, expected) + + rng = timedelta_range("1 days", periods=10) + result = rng.astype("i8") + tm.assert_index_equal(result, Index(rng.asi8)) + tm.assert_numpy_array_equal(rng.asi8, result.values) + + def test_astype_uint(self): + arr = timedelta_range("1H", periods=2) + expected = UInt64Index( + np.array([3600000000000, 90000000000000], dtype="uint64") + ) + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) + + def test_astype_timedelta64(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN]) + + result = idx.astype("timedelta64") + expected = Float64Index([1e14] + [np.NaN] * 3, dtype="float64") + tm.assert_index_equal(result, expected) + + result = idx.astype("timedelta64[ns]") + tm.assert_index_equal(result, idx) + assert result is not idx + + result = idx.astype("timedelta64[ns]", copy=False) + tm.assert_index_equal(result, idx) + assert result is idx + + @pytest.mark.parametrize("dtype", [float, "datetime64", "datetime64[ns]"]) + def test_astype_raises(self, dtype): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN]) + msg = "Cannot cast TimedeltaIndex to dtype" + with pytest.raises(TypeError, match=msg): + idx.astype(dtype) + + def test_astype_category(self): + obj = timedelta_range("1H", periods=2, freq="H") + + result = obj.astype("category") + expected = pd.CategoricalIndex([Timedelta("1H"), Timedelta("2H")]) + tm.assert_index_equal(result, expected) + + result = obj._data.astype("category") + expected = expected.values + tm.assert_categorical_equal(result, expected) + + def test_astype_array_fallback(self): + obj = timedelta_range("1H", periods=2) + result = obj.astype(bool) + expected = Index(np.array([True, True])) + tm.assert_index_equal(result, expected) + + result = obj._data.astype(bool) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_factorize.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_factorize.py new file mode 100644 index 0000000..24ab388 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_factorize.py @@ -0,0 +1,40 @@ +import numpy as np + +from pandas import ( + TimedeltaIndex, + factorize, + timedelta_range, +) +import pandas._testing as tm + + +class TestTimedeltaIndexFactorize: + def test_factorize(self): + idx1 = TimedeltaIndex(["1 day", "1 day", "2 day", "2 day", "3 day", "3 day"]) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = TimedeltaIndex(["1 day", "2 day", "3 day"]) + + arr, idx = idx1.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + arr, idx = idx1.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + def test_factorize_preserves_freq(self): + # GH#38120 freq should be preserved + idx3 = timedelta_range("1 day", periods=4, freq="s") + exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) + arr, idx = idx3.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq + + arr, idx = factorize(idx3) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_fillna.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_fillna.py new file mode 100644 index 0000000..40aa95d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_fillna.py @@ -0,0 +1,22 @@ +from pandas import ( + Index, + NaT, + Timedelta, + TimedeltaIndex, +) +import pandas._testing as tm + + +class TestFillNA: + def test_fillna_timedelta(self): + # GH#11343 + idx = TimedeltaIndex(["1 day", NaT, "3 day"]) + + exp = TimedeltaIndex(["1 day", "2 day", "3 day"]) + tm.assert_index_equal(idx.fillna(Timedelta("2 day")), exp) + + exp = TimedeltaIndex(["1 day", "3 hour", "3 day"]) + idx.fillna(Timedelta("3 hour")) + + exp = Index([Timedelta("1 day"), "x", Timedelta("3 day")], dtype=object) + tm.assert_index_equal(idx.fillna("x"), exp) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_insert.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_insert.py new file mode 100644 index 0000000..c2f22da --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_insert.py @@ -0,0 +1,146 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas._libs import lib + +import pandas as pd +from pandas import ( + Index, + Timedelta, + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +class TestTimedeltaIndexInsert: + def test_insert(self): + + idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx") + + result = idx.insert(2, timedelta(days=5)) + exp = TimedeltaIndex(["4day", "1day", "5day", "2day"], name="idx") + tm.assert_index_equal(result, exp) + + # insertion of non-datetime should coerce to object index + result = idx.insert(1, "inserted") + expected = Index( + [Timedelta("4day"), "inserted", Timedelta("1day"), Timedelta("2day")], + name="idx", + ) + assert not isinstance(result, TimedeltaIndex) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + idx = timedelta_range("1day 00:00:01", periods=3, freq="s", name="idx") + + # preserve freq + expected_0 = TimedeltaIndex( + ["1day", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"], + name="idx", + freq="s", + ) + expected_3 = TimedeltaIndex( + ["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:04"], + name="idx", + freq="s", + ) + + # reset freq to None + expected_1_nofreq = TimedeltaIndex( + ["1day 00:00:01", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"], + name="idx", + freq=None, + ) + expected_3_nofreq = TimedeltaIndex( + ["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:05"], + name="idx", + freq=None, + ) + + cases = [ + (0, Timedelta("1day"), expected_0), + (-3, Timedelta("1day"), expected_0), + (3, Timedelta("1day 00:00:04"), expected_3), + (1, Timedelta("1day 00:00:01"), expected_1_nofreq), + (3, Timedelta("1day 00:00:05"), expected_3_nofreq), + ] + + for n, d, expected in cases: + result = idx.insert(n, d) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + @pytest.mark.parametrize( + "null", [None, np.nan, np.timedelta64("NaT"), pd.NaT, pd.NA] + ) + def test_insert_nat(self, null): + # GH 18295 (test missing) + idx = timedelta_range("1day", "3day") + result = idx.insert(1, null) + expected = TimedeltaIndex(["1day", pd.NaT, "2day", "3day"]) + tm.assert_index_equal(result, expected) + + def test_insert_invalid_na(self): + idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx") + + item = np.datetime64("NaT") + result = idx.insert(0, item) + + expected = Index([item] + list(idx), dtype=object, name="idx") + tm.assert_index_equal(result, expected) + + # Also works if we pass a different dt64nat object + item2 = np.datetime64("NaT") + result = idx.insert(0, item2) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "item", [0, np.int64(0), np.float64(0), np.array(0), np.datetime64(456, "us")] + ) + def test_insert_mismatched_types_raises(self, item): + # GH#33703 dont cast these to td64 + tdi = TimedeltaIndex(["4day", "1day", "2day"], name="idx") + + result = tdi.insert(1, item) + + expected = Index( + [tdi[0], lib.item_from_zerodim(item)] + list(tdi[1:]), + dtype=object, + name="idx", + ) + tm.assert_index_equal(result, expected) + + def test_insert_castable_str(self): + idx = timedelta_range("1day", "3day") + + result = idx.insert(0, "1 Day") + + expected = TimedeltaIndex([idx[0]] + list(idx)) + tm.assert_index_equal(result, expected) + + def test_insert_non_castable_str(self): + idx = timedelta_range("1day", "3day") + + result = idx.insert(0, "foo") + + expected = Index(["foo"] + list(idx), dtype=object) + tm.assert_index_equal(result, expected) + + def test_insert_empty(self): + # Corner case inserting with length zero doesn't raise IndexError + # GH#33573 for freq preservation + idx = timedelta_range("1 Day", periods=3) + td = idx[0] + + result = idx[:0].insert(0, td) + assert result.freq == "D" + + with pytest.raises(IndexError, match="loc must be an integer between"): + result = idx[:0].insert(1, td) + + with pytest.raises(IndexError, match="loc must be an integer between"): + result = idx[:0].insert(-1, td) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_repeat.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_repeat.py new file mode 100644 index 0000000..2a9b58d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_repeat.py @@ -0,0 +1,34 @@ +import numpy as np + +from pandas import ( + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +class TestRepeat: + def test_repeat(self): + index = timedelta_range("1 days", periods=2, freq="D") + exp = TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"]) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = TimedeltaIndex(["1 days", "NaT", "3 days"]) + exp = TimedeltaIndex( + [ + "1 days", + "1 days", + "1 days", + "NaT", + "NaT", + "NaT", + "3 days", + "3 days", + "3 days", + ] + ) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + assert res.freq is None diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_shift.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_shift.py new file mode 100644 index 0000000..9864f73 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/methods/test_shift.py @@ -0,0 +1,77 @@ +import pytest + +from pandas.errors import NullFrequencyError + +import pandas as pd +from pandas import TimedeltaIndex +import pandas._testing as tm + + +class TestTimedeltaIndexShift: + + # ------------------------------------------------------------- + # TimedeltaIndex.shift is used by __add__/__sub__ + + def test_tdi_shift_empty(self): + # GH#9903 + idx = TimedeltaIndex([], name="xxx") + tm.assert_index_equal(idx.shift(0, freq="H"), idx) + tm.assert_index_equal(idx.shift(3, freq="H"), idx) + + def test_tdi_shift_hours(self): + # GH#9903 + idx = TimedeltaIndex(["5 hours", "6 hours", "9 hours"], name="xxx") + tm.assert_index_equal(idx.shift(0, freq="H"), idx) + exp = TimedeltaIndex(["8 hours", "9 hours", "12 hours"], name="xxx") + tm.assert_index_equal(idx.shift(3, freq="H"), exp) + exp = TimedeltaIndex(["2 hours", "3 hours", "6 hours"], name="xxx") + tm.assert_index_equal(idx.shift(-3, freq="H"), exp) + + def test_tdi_shift_minutes(self): + # GH#9903 + idx = TimedeltaIndex(["5 hours", "6 hours", "9 hours"], name="xxx") + tm.assert_index_equal(idx.shift(0, freq="T"), idx) + exp = TimedeltaIndex(["05:03:00", "06:03:00", "9:03:00"], name="xxx") + tm.assert_index_equal(idx.shift(3, freq="T"), exp) + exp = TimedeltaIndex(["04:57:00", "05:57:00", "8:57:00"], name="xxx") + tm.assert_index_equal(idx.shift(-3, freq="T"), exp) + + def test_tdi_shift_int(self): + # GH#8083 + tdi = pd.to_timedelta(range(5), unit="d") + trange = tdi._with_freq("infer") + pd.offsets.Hour(1) + result = trange.shift(1) + expected = TimedeltaIndex( + [ + "1 days 01:00:00", + "2 days 01:00:00", + "3 days 01:00:00", + "4 days 01:00:00", + "5 days 01:00:00", + ], + freq="D", + ) + tm.assert_index_equal(result, expected) + + def test_tdi_shift_nonstandard_freq(self): + # GH#8083 + tdi = pd.to_timedelta(range(5), unit="d") + trange = tdi._with_freq("infer") + pd.offsets.Hour(1) + result = trange.shift(3, freq="2D 1s") + expected = TimedeltaIndex( + [ + "6 days 01:00:03", + "7 days 01:00:03", + "8 days 01:00:03", + "9 days 01:00:03", + "10 days 01:00:03", + ], + freq="D", + ) + tm.assert_index_equal(result, expected) + + def test_shift_no_freq(self): + # GH#19147 + tdi = TimedeltaIndex(["1 days 01:00:00", "2 days 01:00:00"], freq=None) + with pytest.raises(NullFrequencyError, match="Cannot shift with no freq"): + tdi.shift(2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_constructors.py new file mode 100644 index 0000000..dcddba8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_constructors.py @@ -0,0 +1,276 @@ +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Timedelta, + TimedeltaIndex, + timedelta_range, + to_timedelta, +) +import pandas._testing as tm +from pandas.core.arrays.timedeltas import ( + TimedeltaArray, + sequence_to_td64ns, +) + + +class TestTimedeltaIndex: + def test_array_of_dt64_nat_raises(self): + # GH#39462 + nat = np.datetime64("NaT", "ns") + arr = np.array([nat], dtype=object) + + # TODO: should be TypeError? + msg = "Invalid type for timedelta scalar" + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(arr) + + with pytest.raises(ValueError, match=msg): + TimedeltaArray._from_sequence(arr) + + with pytest.raises(ValueError, match=msg): + sequence_to_td64ns(arr) + + @pytest.mark.parametrize("unit", ["Y", "y", "M"]) + def test_unit_m_y_raises(self, unit): + msg = "Units 'M', 'Y', and 'y' are no longer supported" + with pytest.raises(ValueError, match=msg): + TimedeltaIndex([1, 3, 7], unit) + + def test_int64_nocopy(self): + # GH#23539 check that a copy isn't made when we pass int64 data + # and copy=False + arr = np.arange(10, dtype=np.int64) + tdi = TimedeltaIndex(arr, copy=False) + assert tdi._data._data.base is arr + + def test_infer_from_tdi(self): + # GH#23539 + # fast-path for inferring a frequency if the passed data already + # has one + tdi = timedelta_range("1 second", periods=10 ** 7, freq="1s") + + result = TimedeltaIndex(tdi, freq="infer") + assert result.freq == tdi.freq + + # check that inferred_freq was not called by checking that the + # value has not been cached + assert "inferred_freq" not in getattr(result, "_cache", {}) + + def test_infer_from_tdi_mismatch(self): + # GH#23539 + # fast-path for invalidating a frequency if the passed data already + # has one and it does not match the `freq` input + tdi = timedelta_range("1 second", periods=100, freq="1s") + + msg = ( + "Inferred frequency .* from passed values does " + "not conform to passed frequency" + ) + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(tdi, freq="D") + + with pytest.raises(ValueError, match=msg): + # GH#23789 + TimedeltaArray(tdi, freq="D") + + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(tdi._data, freq="D") + + with pytest.raises(ValueError, match=msg): + TimedeltaArray(tdi._data, freq="D") + + def test_dt64_data_invalid(self): + # GH#23539 + # passing tz-aware DatetimeIndex raises, naive or ndarray[datetime64] + # raise as of GH#29794 + dti = pd.date_range("2016-01-01", periods=3) + + msg = "cannot be converted to timedelta64" + with pytest.raises(TypeError, match=msg): + TimedeltaIndex(dti.tz_localize("Europe/Brussels")) + + with pytest.raises(TypeError, match=msg): + TimedeltaIndex(dti) + + with pytest.raises(TypeError, match=msg): + TimedeltaIndex(np.asarray(dti)) + + def test_float64_ns_rounded(self): + # GH#23539 without specifying a unit, floats are regarded as nanos, + # and fractional portions are truncated + tdi = TimedeltaIndex([2.3, 9.7]) + expected = TimedeltaIndex([2, 9]) + tm.assert_index_equal(tdi, expected) + + # integral floats are non-lossy + tdi = TimedeltaIndex([2.0, 9.0]) + expected = TimedeltaIndex([2, 9]) + tm.assert_index_equal(tdi, expected) + + # NaNs get converted to NaT + tdi = TimedeltaIndex([2.0, np.nan]) + expected = TimedeltaIndex([Timedelta(nanoseconds=2), pd.NaT]) + tm.assert_index_equal(tdi, expected) + + def test_float64_unit_conversion(self): + # GH#23539 + tdi = TimedeltaIndex([1.5, 2.25], unit="D") + expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)]) + tm.assert_index_equal(tdi, expected) + + def test_construction_base_constructor(self): + arr = [Timedelta("1 days"), pd.NaT, Timedelta("3 days")] + tm.assert_index_equal(pd.Index(arr), TimedeltaIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), TimedeltaIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, Timedelta("1 days")] + tm.assert_index_equal(pd.Index(arr), TimedeltaIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), TimedeltaIndex(np.array(arr))) + + def test_constructor(self): + expected = TimedeltaIndex( + [ + "1 days", + "1 days 00:00:05", + "2 days", + "2 days 00:00:02", + "0 days 00:00:03", + ] + ) + result = TimedeltaIndex( + [ + "1 days", + "1 days, 00:00:05", + np.timedelta64(2, "D"), + timedelta(days=2, seconds=2), + pd.offsets.Second(3), + ] + ) + tm.assert_index_equal(result, expected) + + # unicode + result = TimedeltaIndex( + [ + "1 days", + "1 days, 00:00:05", + np.timedelta64(2, "D"), + timedelta(days=2, seconds=2), + pd.offsets.Second(3), + ] + ) + + expected = TimedeltaIndex( + ["0 days 00:00:00", "0 days 00:00:01", "0 days 00:00:02"] + ) + tm.assert_index_equal(TimedeltaIndex(range(3), unit="s"), expected) + expected = TimedeltaIndex( + ["0 days 00:00:00", "0 days 00:00:05", "0 days 00:00:09"] + ) + tm.assert_index_equal(TimedeltaIndex([0, 5, 9], unit="s"), expected) + expected = TimedeltaIndex( + ["0 days 00:00:00.400", "0 days 00:00:00.450", "0 days 00:00:01.200"] + ) + tm.assert_index_equal(TimedeltaIndex([400, 450, 1200], unit="ms"), expected) + + def test_constructor_iso(self): + # GH #21877 + expected = timedelta_range("1s", periods=9, freq="s") + durations = [f"P0DT0H0M{i}S" for i in range(1, 10)] + result = to_timedelta(durations) + tm.assert_index_equal(result, expected) + + def test_constructor_coverage(self): + rng = timedelta_range("1 days", periods=10.5) + exp = timedelta_range("1 days", periods=10) + tm.assert_index_equal(rng, exp) + + msg = "periods must be a number, got foo" + with pytest.raises(TypeError, match=msg): + timedelta_range(start="1 days", periods="foo", freq="D") + + msg = ( + r"TimedeltaIndex\(\.\.\.\) must be called with a collection of some kind, " + "'1 days' was passed" + ) + with pytest.raises(TypeError, match=msg): + TimedeltaIndex("1 days") + + # generator expression + gen = (timedelta(i) for i in range(10)) + result = TimedeltaIndex(gen) + expected = TimedeltaIndex([timedelta(i) for i in range(10)]) + tm.assert_index_equal(result, expected) + + # NumPy string array + strings = np.array(["1 days", "2 days", "3 days"]) + result = TimedeltaIndex(strings) + expected = to_timedelta([1, 2, 3], unit="d") + tm.assert_index_equal(result, expected) + + from_ints = TimedeltaIndex(expected.asi8) + tm.assert_index_equal(from_ints, expected) + + # non-conforming freq + msg = ( + "Inferred frequency None from passed values does not conform to " + "passed frequency D" + ) + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(["1 days", "2 days", "4 days"], freq="D") + + msg = ( + "Of the four parameters: start, end, periods, and freq, exactly " + "three must be specified" + ) + with pytest.raises(ValueError, match=msg): + timedelta_range(periods=10, freq="D") + + def test_constructor_name(self): + idx = timedelta_range(start="1 days", periods=1, freq="D", name="TEST") + assert idx.name == "TEST" + + # GH10025 + idx2 = TimedeltaIndex(idx, name="something else") + assert idx2.name == "something else" + + def test_constructor_no_precision_raises(self): + # GH-24753, GH-24739 + + msg = "with no precision is not allowed" + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(["2000"], dtype="timedelta64") + + with pytest.raises(ValueError, match=msg): + pd.Index(["2000"], dtype="timedelta64") + + def test_constructor_wrong_precision_raises(self): + msg = r"dtype timedelta64\[us\] cannot be converted to timedelta64\[ns\]" + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(["2000"], dtype="timedelta64[us]") + + def test_explicit_none_freq(self): + # Explicitly passing freq=None is respected + tdi = timedelta_range(1, periods=5) + assert tdi.freq is not None + + result = TimedeltaIndex(tdi, freq=None) + assert result.freq is None + + result = TimedeltaIndex(tdi._data, freq=None) + assert result.freq is None + + def test_from_categorical(self): + tdi = timedelta_range(1, periods=5) + + cat = pd.Categorical(tdi) + + result = TimedeltaIndex(cat) + tm.assert_index_equal(result, tdi) + + ci = pd.CategoricalIndex(tdi) + result = TimedeltaIndex(ci) + tm.assert_index_equal(result, tdi) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_delete.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_delete.py new file mode 100644 index 0000000..6e6f547 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_delete.py @@ -0,0 +1,71 @@ +from pandas import ( + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +class TestTimedeltaIndexDelete: + def test_delete(self): + idx = timedelta_range(start="1 Days", periods=5, freq="D", name="idx") + + # preserve freq + expected_0 = timedelta_range(start="2 Days", periods=4, freq="D", name="idx") + expected_4 = timedelta_range(start="1 Days", periods=4, freq="D", name="idx") + + # reset freq to None + expected_1 = TimedeltaIndex( + ["1 day", "3 day", "4 day", "5 day"], freq=None, name="idx" + ) + + cases = { + 0: expected_0, + -5: expected_0, + -1: expected_4, + 4: expected_4, + 1: expected_1, + } + for n, expected in cases.items(): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + with tm.external_error_raised((IndexError, ValueError)): + # either depending on numpy version + idx.delete(5) + + def test_delete_slice(self): + idx = timedelta_range(start="1 days", periods=10, freq="D", name="idx") + + # preserve freq + expected_0_2 = timedelta_range(start="4 days", periods=7, freq="D", name="idx") + expected_7_9 = timedelta_range(start="1 days", periods=7, freq="D", name="idx") + + # reset freq to None + expected_3_5 = TimedeltaIndex( + ["1 d", "2 d", "3 d", "7 d", "8 d", "9 d", "10d"], freq=None, name="idx" + ) + + cases = { + (0, 1, 2): expected_0_2, + (7, 8, 9): expected_7_9, + (3, 4, 5): expected_3_5, + } + for n, expected in cases.items(): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + result = idx.delete(slice(n[0], n[-1] + 1)) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + def test_delete_doesnt_infer_freq(self): + # GH#30655 behavior matches DatetimeIndex + + tdi = TimedeltaIndex(["1 Day", "2 Days", None, "3 Days", "4 Days"]) + result = tdi.delete(2) + assert result.freq is None diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_formats.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_formats.py new file mode 100644 index 0000000..751f9e4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_formats.py @@ -0,0 +1,93 @@ +import pytest + +import pandas as pd +from pandas import ( + Series, + TimedeltaIndex, +) + + +class TestTimedeltaIndexRendering: + @pytest.mark.parametrize("method", ["__repr__", "__str__"]) + def test_representation(self, method): + idx1 = TimedeltaIndex([], freq="D") + idx2 = TimedeltaIndex(["1 days"], freq="D") + idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D") + idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") + idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"]) + + exp1 = "TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')" + + exp2 = "TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')" + + exp3 = "TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')" + + exp4 = ( + "TimedeltaIndex(['1 days', '2 days', '3 days'], " + "dtype='timedelta64[ns]', freq='D')" + ) + + exp5 = ( + "TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', " + "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)" + ) + + with pd.option_context("display.width", 300): + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5] + ): + result = getattr(idx, method)() + assert result == expected + + def test_representation_to_series(self): + idx1 = TimedeltaIndex([], freq="D") + idx2 = TimedeltaIndex(["1 days"], freq="D") + idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D") + idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") + idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"]) + + exp1 = """Series([], dtype: timedelta64[ns])""" + + exp2 = "0 1 days\ndtype: timedelta64[ns]" + + exp3 = "0 1 days\n1 2 days\ndtype: timedelta64[ns]" + + exp4 = "0 1 days\n1 2 days\n2 3 days\ndtype: timedelta64[ns]" + + exp5 = ( + "0 1 days 00:00:01\n" + "1 2 days 00:00:00\n" + "2 3 days 00:00:00\n" + "dtype: timedelta64[ns]" + ) + + with pd.option_context("display.width", 300): + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5] + ): + result = repr(Series(idx)) + assert result == expected + + def test_summary(self): + # GH#9116 + idx1 = TimedeltaIndex([], freq="D") + idx2 = TimedeltaIndex(["1 days"], freq="D") + idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D") + idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") + idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"]) + + exp1 = "TimedeltaIndex: 0 entries\nFreq: D" + + exp2 = "TimedeltaIndex: 1 entries, 1 days to 1 days\nFreq: D" + + exp3 = "TimedeltaIndex: 2 entries, 1 days to 2 days\nFreq: D" + + exp4 = "TimedeltaIndex: 3 entries, 1 days to 3 days\nFreq: D" + + exp5 = "TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days 00:00:00" + + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5] + ): + result = idx._summary() + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_freq_attr.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_freq_attr.py new file mode 100644 index 0000000..39b9c11 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_freq_attr.py @@ -0,0 +1,61 @@ +import pytest + +from pandas import TimedeltaIndex + +from pandas.tseries.offsets import ( + DateOffset, + Day, + Hour, +) + + +class TestFreq: + @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []]) + @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)]) + def test_freq_setter(self, values, freq): + # GH#20678 + idx = TimedeltaIndex(values) + + # can set to an offset, converting from string if necessary + idx._data.freq = freq + assert idx.freq == freq + assert isinstance(idx.freq, DateOffset) + + # can reset to None + idx._data.freq = None + assert idx.freq is None + + def test_freq_setter_errors(self): + # GH#20678 + idx = TimedeltaIndex(["0 days", "2 days", "4 days"]) + + # setting with an incompatible freq + msg = ( + "Inferred frequency 2D from passed values does not conform to " + "passed frequency 5D" + ) + with pytest.raises(ValueError, match=msg): + idx._data.freq = "5D" + + # setting with a non-fixed frequency + msg = r"<2 \* BusinessDays> is a non-fixed frequency" + with pytest.raises(ValueError, match=msg): + idx._data.freq = "2B" + + # setting with non-freq string + with pytest.raises(ValueError, match="Invalid frequency"): + idx._data.freq = "foo" + + def test_freq_view_safe(self): + # Setting the freq for one TimedeltaIndex shouldn't alter the freq + # for another that views the same data + + tdi = TimedeltaIndex(["0 days", "2 days", "4 days"], freq="2D") + tda = tdi._data + + tdi2 = TimedeltaIndex(tda)._with_freq(None) + assert tdi2.freq is None + + # Original was not altered + assert tdi.freq == "2D" + assert tda.freq == "2D" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_indexing.py new file mode 100644 index 0000000..b618f12 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_indexing.py @@ -0,0 +1,348 @@ +from datetime import ( + datetime, + timedelta, +) +import re + +import numpy as np +import pytest + +from pandas import ( + Index, + NaT, + Timedelta, + TimedeltaIndex, + Timestamp, + notna, + timedelta_range, + to_timedelta, +) +import pandas._testing as tm + + +class TestGetItem: + def test_getitem_slice_keeps_name(self): + # GH#4226 + tdi = timedelta_range("1d", "5d", freq="H", name="timebucket") + assert tdi[1:].name == tdi.name + + def test_getitem(self): + idx1 = timedelta_range("1 day", "31 day", freq="D", name="idx") + + for idx in [idx1]: + result = idx[0] + assert result == Timedelta("1 day") + + result = idx[0:5] + expected = timedelta_range("1 day", "5 day", freq="D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[0:10:2] + expected = timedelta_range("1 day", "9 day", freq="2D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[-20:-5:3] + expected = timedelta_range("12 day", "24 day", freq="3D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[4::-1] + expected = TimedeltaIndex( + ["5 day", "4 day", "3 day", "2 day", "1 day"], freq="-1D", name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + @pytest.mark.parametrize( + "key", + [ + Timestamp("1970-01-01"), + Timestamp("1970-01-02"), + datetime(1970, 1, 1), + Timestamp("1970-01-03").to_datetime64(), + # non-matching NA values + np.datetime64("NaT"), + ], + ) + def test_timestamp_invalid_key(self, key): + # GH#20464 + tdi = timedelta_range(0, periods=10) + with pytest.raises(KeyError, match=re.escape(repr(key))): + tdi.get_loc(key) + + +class TestGetLoc: + @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") + def test_get_loc(self): + idx = to_timedelta(["0 days", "1 days", "2 days"]) + + for method in [None, "pad", "backfill", "nearest"]: + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + assert idx.get_loc(idx[1], "pad", tolerance=Timedelta(0)) == 1 + assert idx.get_loc(idx[1], "pad", tolerance=np.timedelta64(0, "s")) == 1 + assert idx.get_loc(idx[1], "pad", tolerance=timedelta(0)) == 1 + + with pytest.raises(ValueError, match="unit abbreviation w/o a number"): + idx.get_loc(idx[1], method="nearest", tolerance="foo") + + with pytest.raises(ValueError, match="tolerance size must match"): + idx.get_loc( + idx[1], + method="nearest", + tolerance=[ + Timedelta(0).to_timedelta64(), + Timedelta(0).to_timedelta64(), + ], + ) + + for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: + assert idx.get_loc("1 day 1 hour", method) == loc + + # GH 16909 + assert idx.get_loc(idx[1].to_timedelta64()) == 1 + + # GH 16896 + assert idx.get_loc("0 days") == 0 + + def test_get_loc_nat(self): + tidx = TimedeltaIndex(["1 days 01:00:00", "NaT", "2 days 01:00:00"]) + + assert tidx.get_loc(NaT) == 1 + assert tidx.get_loc(None) == 1 + assert tidx.get_loc(float("nan")) == 1 + assert tidx.get_loc(np.nan) == 1 + + +class TestGetIndexer: + def test_get_indexer(self): + idx = to_timedelta(["0 days", "1 days", "2 days"]) + tm.assert_numpy_array_equal( + idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp) + ) + + target = to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) + ) + + res = idx.get_indexer(target, "nearest", tolerance=Timedelta("1 hour")) + tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) + + +class TestWhere: + def test_where_doesnt_retain_freq(self): + tdi = timedelta_range("1 day", periods=3, freq="D", name="idx") + cond = [True, True, False] + expected = TimedeltaIndex([tdi[0], tdi[1], tdi[0]], freq=None, name="idx") + + result = tdi.where(cond, tdi[::-1]) + tm.assert_index_equal(result, expected) + + def test_where_invalid_dtypes(self, fixed_now_ts): + tdi = timedelta_range("1 day", periods=3, freq="D", name="idx") + + tail = tdi[2:].tolist() + i2 = Index([NaT, NaT] + tail) + mask = notna(i2) + + expected = Index([NaT.value, NaT.value] + tail, dtype=object, name="idx") + assert isinstance(expected[0], int) + result = tdi.where(mask, i2.asi8) + tm.assert_index_equal(result, expected) + + ts = i2 + fixed_now_ts + expected = Index([ts[0], ts[1]] + tail, dtype=object, name="idx") + result = tdi.where(mask, ts) + tm.assert_index_equal(result, expected) + + per = (i2 + fixed_now_ts).to_period("D") + expected = Index([per[0], per[1]] + tail, dtype=object, name="idx") + result = tdi.where(mask, per) + tm.assert_index_equal(result, expected) + + ts = fixed_now_ts + expected = Index([ts, ts] + tail, dtype=object, name="idx") + result = tdi.where(mask, ts) + tm.assert_index_equal(result, expected) + + def test_where_mismatched_nat(self): + tdi = timedelta_range("1 day", periods=3, freq="D", name="idx") + cond = np.array([True, False, False]) + + dtnat = np.datetime64("NaT", "ns") + expected = Index([tdi[0], dtnat, dtnat], dtype=object, name="idx") + assert expected[2] is dtnat + result = tdi.where(cond, dtnat) + tm.assert_index_equal(result, expected) + + +class TestTake: + def test_take(self): + # GH 10295 + idx1 = timedelta_range("1 day", "31 day", freq="D", name="idx") + + for idx in [idx1]: + result = idx.take([0]) + assert result == Timedelta("1 day") + + result = idx.take([-1]) + assert result == Timedelta("31 day") + + result = idx.take([0, 1, 2]) + expected = timedelta_range("1 day", "3 day", freq="D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([0, 2, 4]) + expected = timedelta_range("1 day", "5 day", freq="2D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([7, 4, 1]) + expected = timedelta_range("8 day", "2 day", freq="-3D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([3, 2, 5]) + expected = TimedeltaIndex(["4 day", "3 day", "6 day"], name="idx") + tm.assert_index_equal(result, expected) + assert result.freq is None + + result = idx.take([-3, 2, 5]) + expected = TimedeltaIndex(["29 day", "3 day", "6 day"], name="idx") + tm.assert_index_equal(result, expected) + assert result.freq is None + + def test_take_invalid_kwargs(self): + idx = timedelta_range("1 day", "31 day", freq="D", name="idx") + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + def test_take_equiv_getitem(self): + tds = ["1day 02:00:00", "1 day 04:00:00", "1 day 10:00:00"] + idx = timedelta_range(start="1d", end="2d", freq="H", name="idx") + expected = TimedeltaIndex(tds, freq=None, name="idx") + + taken1 = idx.take([2, 4, 10]) + taken2 = idx[[2, 4, 10]] + + for taken in [taken1, taken2]: + tm.assert_index_equal(taken, expected) + assert isinstance(taken, TimedeltaIndex) + assert taken.freq is None + assert taken.name == expected.name + + def test_take_fill_value(self): + # GH 12631 + idx = TimedeltaIndex(["1 days", "2 days", "3 days"], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = TimedeltaIndex(["2 days", "1 days", "3 days"], name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = TimedeltaIndex(["2 days", "1 days", "NaT"], name="xxx") + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = TimedeltaIndex(["2 days", "1 days", "3 days"], name="xxx") + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for (axis 0 with )?size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + +class TestMaybeCastSliceBound: + @pytest.fixture(params=["increasing", "decreasing", None]) + def monotonic(self, request): + return request.param + + @pytest.fixture + def tdi(self, monotonic): + tdi = timedelta_range("1 Day", periods=10) + if monotonic == "decreasing": + tdi = tdi[::-1] + elif monotonic is None: + taker = np.arange(10, dtype=np.intp) + np.random.shuffle(taker) + tdi = tdi.take(taker) + return tdi + + def test_maybe_cast_slice_bound_invalid_str(self, tdi): + # test the low-level _maybe_cast_slice_bound and that we get the + # expected exception+message all the way up the stack + msg = ( + "cannot do slice indexing on TimedeltaIndex with these " + r"indexers \[foo\] of type str" + ) + with pytest.raises(TypeError, match=msg): + tdi._maybe_cast_slice_bound("foo", side="left") + with pytest.raises(TypeError, match=msg): + tdi.get_slice_bound("foo", side="left") + with pytest.raises(TypeError, match=msg): + tdi.slice_locs("foo", None, None) + + def test_slice_invalid_str_with_timedeltaindex( + self, tdi, frame_or_series, indexer_sl + ): + obj = frame_or_series(range(10), index=tdi) + + msg = ( + "cannot do slice indexing on TimedeltaIndex with these " + r"indexers \[foo\] of type str" + ) + with pytest.raises(TypeError, match=msg): + indexer_sl(obj)["foo":] + with pytest.raises(TypeError, match=msg): + indexer_sl(obj)["foo":-1] + with pytest.raises(TypeError, match=msg): + indexer_sl(obj)[:"foo"] + with pytest.raises(TypeError, match=msg): + indexer_sl(obj)[tdi[0] : "foo"] + + +class TestContains: + def test_contains_nonunique(self): + # GH#9512 + for vals in ( + [0, 1, 0], + [0, 0, -1], + [0, -1, -1], + ["00:01:00", "00:01:00", "00:02:00"], + ["00:01:00", "00:01:00", "00:00:01"], + ): + idx = TimedeltaIndex(vals) + assert idx[0] in idx diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_join.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_join.py new file mode 100644 index 0000000..2d8795b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_join.py @@ -0,0 +1,53 @@ +import numpy as np + +from pandas import ( + Index, + Timedelta, + timedelta_range, +) +import pandas._testing as tm + + +class TestJoin: + def test_append_join_nondatetimeindex(self): + rng = timedelta_range("1 days", periods=10) + idx = Index(["a", "b", "c", "d"]) + + result = rng.append(idx) + assert isinstance(result[0], Timedelta) + + # it works + rng.join(idx, how="outer") + + def test_join_self(self, join_type): + index = timedelta_range("1 day", periods=10) + joined = index.join(index, how=join_type) + tm.assert_index_equal(index, joined) + + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args, **kwargs: np.random.randn(), + r_idx_type="i", + c_idx_type="td", + ) + str(df) + + cols = df.columns.join(df.index, how="outer") + joined = cols.join(df.columns) + assert cols.dtype == np.dtype("O") + assert cols.dtype == joined.dtype + tm.assert_index_equal(cols, joined) + + def test_join_preserves_freq(self): + # GH#32157 + tdi = timedelta_range("1 day", periods=10) + result = tdi[:5].join(tdi[5:], how="outer") + assert result.freq == tdi.freq + tm.assert_index_equal(result, tdi) + + result = tdi[:5].join(tdi[6:], how="outer") + assert result.freq is None + expected = tdi.delete(5) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_ops.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_ops.py new file mode 100644 index 0000000..f6013ba --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_ops.py @@ -0,0 +1,14 @@ +from pandas import ( + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +class TestTimedeltaIndexOps: + def test_infer_freq(self, freq_sample): + # GH#11018 + idx = timedelta_range("1", freq=freq_sample, periods=10) + result = TimedeltaIndex(idx.asi8, freq="infer") + tm.assert_index_equal(idx, result) + assert result.freq == freq_sample diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_pickle.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_pickle.py new file mode 100644 index 0000000..befe709 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_pickle.py @@ -0,0 +1,11 @@ +from pandas import timedelta_range +import pandas._testing as tm + + +class TestPickle: + def test_pickle_after_set_freq(self): + tdi = timedelta_range("1 day", periods=4, freq="s") + tdi = tdi._with_freq(None) + + res = tm.round_trip_pickle(tdi) + tm.assert_index_equal(res, tdi) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_scalar_compat.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_scalar_compat.py new file mode 100644 index 0000000..5e4b228 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_scalar_compat.py @@ -0,0 +1,142 @@ +""" +Tests for TimedeltaIndex methods behaving like their Timedelta counterparts +""" + +import numpy as np +import pytest + +from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG + +from pandas import ( + Index, + Series, + Timedelta, + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +class TestVectorizedTimedelta: + def test_tdi_total_seconds(self): + # GH#10939 + # test index + rng = timedelta_range("1 days, 10:11:12.100123456", periods=2, freq="s") + expt = [ + 1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456.0 / 1e9, + 1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456.0 / 1e9, + ] + tm.assert_almost_equal(rng.total_seconds(), Index(expt)) + + # test Series + ser = Series(rng) + s_expt = Series(expt, index=[0, 1]) + tm.assert_series_equal(ser.dt.total_seconds(), s_expt) + + # with nat + ser[1] = np.nan + s_expt = Series( + [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456.0 / 1e9, np.nan], + index=[0, 1], + ) + tm.assert_series_equal(ser.dt.total_seconds(), s_expt) + + # with both nat + ser = Series([np.nan, np.nan], dtype="timedelta64[ns]") + tm.assert_series_equal( + ser.dt.total_seconds(), Series([np.nan, np.nan], index=[0, 1]) + ) + + def test_tdi_round(self): + td = timedelta_range(start="16801 days", periods=5, freq="30Min") + elt = td[1] + + expected_rng = TimedeltaIndex( + [ + Timedelta("16801 days 00:00:00"), + Timedelta("16801 days 00:00:00"), + Timedelta("16801 days 01:00:00"), + Timedelta("16801 days 02:00:00"), + Timedelta("16801 days 02:00:00"), + ] + ) + expected_elt = expected_rng[1] + + tm.assert_index_equal(td.round(freq="H"), expected_rng) + assert elt.round(freq="H") == expected_elt + + msg = INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + td.round(freq="foo") + with pytest.raises(ValueError, match=msg): + elt.round(freq="foo") + + msg = " is a non-fixed frequency" + with pytest.raises(ValueError, match=msg): + td.round(freq="M") + with pytest.raises(ValueError, match=msg): + elt.round(freq="M") + + @pytest.mark.parametrize( + "freq,msg", + [ + ("Y", " is a non-fixed frequency"), + ("M", " is a non-fixed frequency"), + ("foobar", "Invalid frequency: foobar"), + ], + ) + def test_tdi_round_invalid(self, freq, msg): + t1 = timedelta_range("1 days", periods=3, freq="1 min 2 s 3 us") + + with pytest.raises(ValueError, match=msg): + t1.round(freq) + with pytest.raises(ValueError, match=msg): + # Same test for TimedeltaArray + t1._data.round(freq) + + # TODO: de-duplicate with test_tdi_round + def test_round(self): + t1 = timedelta_range("1 days", periods=3, freq="1 min 2 s 3 us") + t2 = -1 * t1 + t1a = timedelta_range("1 days", periods=3, freq="1 min 2 s") + t1c = TimedeltaIndex([1, 1, 1], unit="D") + + # note that negative times round DOWN! so don't give whole numbers + for (freq, s1, s2) in [ + ("N", t1, t2), + ("U", t1, t2), + ( + "L", + t1a, + TimedeltaIndex( + ["-1 days +00:00:00", "-2 days +23:58:58", "-2 days +23:57:56"] + ), + ), + ( + "S", + t1a, + TimedeltaIndex( + ["-1 days +00:00:00", "-2 days +23:58:58", "-2 days +23:57:56"] + ), + ), + ("12T", t1c, TimedeltaIndex(["-1 days", "-1 days", "-1 days"])), + ("H", t1c, TimedeltaIndex(["-1 days", "-1 days", "-1 days"])), + ("d", t1c, TimedeltaIndex([-1, -1, -1], unit="D")), + ]: + + r1 = t1.round(freq) + tm.assert_index_equal(r1, s1) + r2 = t2.round(freq) + tm.assert_index_equal(r2, s2) + + def test_components(self): + rng = timedelta_range("1 days, 10:11:12", periods=2, freq="s") + rng.components + + # with nat + s = Series(rng) + s[1] = np.nan + + result = s.dt.components + assert not result.iloc[0].isna().all() + assert result.iloc[1].isna().all() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_searchsorted.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_searchsorted.py new file mode 100644 index 0000000..710571e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_searchsorted.py @@ -0,0 +1,28 @@ +import numpy as np +import pytest + +from pandas import ( + TimedeltaIndex, + Timestamp, +) +import pandas._testing as tm + + +class TestSearchSorted: + def test_searchsorted_different_argument_classes(self, listlike_box): + idx = TimedeltaIndex(["1 day", "2 days", "3 days"]) + result = idx.searchsorted(listlike_box(idx)) + expected = np.arange(len(idx), dtype=result.dtype) + tm.assert_numpy_array_equal(result, expected) + + result = idx._data.searchsorted(listlike_box(idx)) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2] + ) + def test_searchsorted_invalid_argument_dtype(self, arg): + idx = TimedeltaIndex(["1 day", "2 days", "3 days"]) + msg = "value should be a 'Timedelta', 'NaT', or array of those. Got" + with pytest.raises(TypeError, match=msg): + idx.searchsorted(arg) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_setops.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_setops.py new file mode 100644 index 0000000..4574c15 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_setops.py @@ -0,0 +1,260 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import Int64Index + +from pandas.tseries.offsets import Hour + + +class TestTimedeltaIndex: + def test_union(self): + + i1 = timedelta_range("1day", periods=5) + i2 = timedelta_range("3day", periods=5) + result = i1.union(i2) + expected = timedelta_range("1day", periods=7) + tm.assert_index_equal(result, expected) + + i1 = Int64Index(np.arange(0, 20, 2)) + i2 = timedelta_range(start="1 day", periods=10, freq="D") + i1.union(i2) # Works + i2.union(i1) # Fails with "AttributeError: can't set attribute" + + def test_union_sort_false(self): + tdi = timedelta_range("1day", periods=5) + + left = tdi[3:] + right = tdi[:3] + + # Check that we are testing the desired code path + assert left._can_fast_union(right) + + result = left.union(right) + tm.assert_index_equal(result, tdi) + + result = left.union(right, sort=False) + expected = TimedeltaIndex(["4 Days", "5 Days", "1 Days", "2 Day", "3 Days"]) + tm.assert_index_equal(result, expected) + + def test_union_coverage(self): + + idx = TimedeltaIndex(["3d", "1d", "2d"]) + ordered = TimedeltaIndex(idx.sort_values(), freq="infer") + result = ordered.union(idx) + tm.assert_index_equal(result, ordered) + + result = ordered[:0].union(ordered) + tm.assert_index_equal(result, ordered) + assert result.freq == ordered.freq + + def test_union_bug_1730(self): + + rng_a = timedelta_range("1 day", periods=4, freq="3H") + rng_b = timedelta_range("1 day", periods=4, freq="4H") + + result = rng_a.union(rng_b) + exp = TimedeltaIndex(sorted(set(rng_a) | set(rng_b))) + tm.assert_index_equal(result, exp) + + def test_union_bug_1745(self): + + left = TimedeltaIndex(["1 day 15:19:49.695000"]) + right = TimedeltaIndex( + ["2 day 13:04:21.322000", "1 day 15:27:24.873000", "1 day 15:31:05.350000"] + ) + + result = left.union(right) + exp = TimedeltaIndex(sorted(set(left) | set(right))) + tm.assert_index_equal(result, exp) + + def test_union_bug_4564(self): + + left = timedelta_range("1 day", "30d") + right = left + pd.offsets.Minute(15) + + result = left.union(right) + exp = TimedeltaIndex(sorted(set(left) | set(right))) + tm.assert_index_equal(result, exp) + + def test_union_freq_infer(self): + # When taking the union of two TimedeltaIndexes, we infer + # a freq even if the arguments don't have freq. This matches + # DatetimeIndex behavior. + tdi = timedelta_range("1 Day", periods=5) + left = tdi[[0, 1, 3, 4]] + right = tdi[[2, 3, 1]] + + assert left.freq is None + assert right.freq is None + + result = left.union(right) + tm.assert_index_equal(result, tdi) + assert result.freq == "D" + + def test_intersection_bug_1708(self): + index_1 = timedelta_range("1 day", periods=4, freq="h") + index_2 = index_1 + pd.offsets.Hour(5) + + with tm.assert_produces_warning(FutureWarning): + result = index_1 & index_2 + assert len(result) == 0 + + index_1 = timedelta_range("1 day", periods=4, freq="h") + index_2 = index_1 + pd.offsets.Hour(1) + + with tm.assert_produces_warning(FutureWarning): + result = index_1 & index_2 + expected = timedelta_range("1 day 01:00:00", periods=3, freq="h") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + def test_intersection_equal(self, sort): + # GH 24471 Test intersection outcome given the sort keyword + # for equal indices intersection should return the original index + first = timedelta_range("1 day", periods=4, freq="h") + second = timedelta_range("1 day", periods=4, freq="h") + intersect = first.intersection(second, sort=sort) + if sort is None: + tm.assert_index_equal(intersect, second.sort_values()) + assert tm.equalContents(intersect, second) + + # Corner cases + inter = first.intersection(first, sort=sort) + assert inter is first + + @pytest.mark.parametrize("period_1, period_2", [(0, 4), (4, 0)]) + def test_intersection_zero_length(self, period_1, period_2, sort): + # GH 24471 test for non overlap the intersection should be zero length + index_1 = timedelta_range("1 day", periods=period_1, freq="h") + index_2 = timedelta_range("1 day", periods=period_2, freq="h") + expected = timedelta_range("1 day", periods=0, freq="h") + result = index_1.intersection(index_2, sort=sort) + tm.assert_index_equal(result, expected) + + def test_zero_length_input_index(self, sort): + # GH 24966 test for 0-len intersections are copied + index_1 = timedelta_range("1 day", periods=0, freq="h") + index_2 = timedelta_range("1 day", periods=3, freq="h") + result = index_1.intersection(index_2, sort=sort) + assert index_1 is not result + assert index_2 is not result + tm.assert_copy(result, index_1) + + @pytest.mark.parametrize( + "rng, expected", + # if target has the same name, it is preserved + [ + ( + timedelta_range("1 day", periods=5, freq="h", name="idx"), + timedelta_range("1 day", periods=4, freq="h", name="idx"), + ), + # if target name is different, it will be reset + ( + timedelta_range("1 day", periods=5, freq="h", name="other"), + timedelta_range("1 day", periods=4, freq="h", name=None), + ), + # if no overlap exists return empty index + ( + timedelta_range("1 day", periods=10, freq="h", name="idx")[5:], + TimedeltaIndex([], freq="h", name="idx"), + ), + ], + ) + def test_intersection(self, rng, expected, sort): + # GH 4690 (with tz) + base = timedelta_range("1 day", periods=4, freq="h", name="idx") + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + @pytest.mark.parametrize( + "rng, expected", + # part intersection works + [ + ( + TimedeltaIndex(["5 hour", "2 hour", "4 hour", "9 hour"], name="idx"), + TimedeltaIndex(["2 hour", "4 hour"], name="idx"), + ), + # reordered part intersection + ( + TimedeltaIndex(["2 hour", "5 hour", "5 hour", "1 hour"], name="other"), + TimedeltaIndex(["1 hour", "2 hour"], name=None), + ), + # reversed index + ( + TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx")[ + ::-1 + ], + TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx"), + ), + ], + ) + def test_intersection_non_monotonic(self, rng, expected, sort): + # 24471 non-monotonic + base = TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx") + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + # if reversed order, frequency is still the same + if all(base == rng[::-1]) and sort is None: + assert isinstance(result.freq, Hour) + else: + assert result.freq is None + + +class TestTimedeltaIndexDifference: + def test_difference_freq(self, sort): + # GH14323: Difference of TimedeltaIndex should not preserve frequency + + index = timedelta_range("0 days", "5 days", freq="D") + + other = timedelta_range("1 days", "4 days", freq="D") + expected = TimedeltaIndex(["0 days", "5 days"], freq=None) + idx_diff = index.difference(other, sort) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + other = timedelta_range("2 days", "5 days", freq="D") + idx_diff = index.difference(other, sort) + expected = TimedeltaIndex(["0 days", "1 days"], freq=None) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + def test_difference_sort(self, sort): + + index = TimedeltaIndex( + ["5 days", "3 days", "2 days", "4 days", "1 days", "0 days"] + ) + + other = timedelta_range("1 days", "4 days", freq="D") + idx_diff = index.difference(other, sort) + + expected = TimedeltaIndex(["5 days", "0 days"], freq=None) + + if sort is None: + expected = expected.sort_values() + + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + other = timedelta_range("2 days", "5 days", freq="D") + idx_diff = index.difference(other, sort) + expected = TimedeltaIndex(["1 days", "0 days"], freq=None) + + if sort is None: + expected = expected.sort_values() + + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_timedelta.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_timedelta.py new file mode 100644 index 0000000..8ceef81 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -0,0 +1,145 @@ +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + NaT, + Series, + Timedelta, + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.indexes.api import Int64Index +from pandas.tests.indexes.datetimelike import DatetimeLike + +randn = np.random.randn + + +class TestTimedeltaIndex(DatetimeLike): + _index_cls = TimedeltaIndex + + @pytest.fixture + def simple_index(self) -> TimedeltaIndex: + index = pd.to_timedelta(range(5), unit="d")._with_freq("infer") + assert index.freq == "D" + ret = index + pd.offsets.Hour(1) + assert ret.freq == "D" + return ret + + @pytest.fixture + def index(self): + return tm.makeTimedeltaIndex(10) + + def test_numeric_compat(self): + # Dummy method to override super's version; this test is now done + # in test_arithmetic.py + pass + + def test_shift(self): + pass # this is handled in test_arithmetic.py + + def test_misc_coverage(self): + + rng = timedelta_range("1 day", periods=5) + result = rng.groupby(rng.days) + assert isinstance(list(result.values())[0][0], Timedelta) + + def test_map(self): + # test_map_dictlike generally tests + + rng = timedelta_range("1 day", periods=10) + + f = lambda x: x.days + result = rng.map(f) + exp = Int64Index([f(x) for x in rng]) + tm.assert_index_equal(result, exp) + + def test_pass_TimedeltaIndex_to_index(self): + + rng = timedelta_range("1 days", "10 days") + idx = Index(rng, dtype=object) + + expected = Index(rng.to_pytimedelta(), dtype=object) + + tm.assert_numpy_array_equal(idx.values, expected.values) + + def test_fields(self): + rng = timedelta_range("1 days, 10:11:12.100123456", periods=2, freq="s") + tm.assert_index_equal(rng.days, Index([1, 1], dtype="int64")) + tm.assert_index_equal( + rng.seconds, + Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], dtype="int64"), + ) + tm.assert_index_equal( + rng.microseconds, Index([100 * 1000 + 123, 100 * 1000 + 123], dtype="int64") + ) + tm.assert_index_equal(rng.nanoseconds, Index([456, 456], dtype="int64")) + + msg = "'TimedeltaIndex' object has no attribute '{}'" + with pytest.raises(AttributeError, match=msg.format("hours")): + rng.hours + with pytest.raises(AttributeError, match=msg.format("minutes")): + rng.minutes + with pytest.raises(AttributeError, match=msg.format("milliseconds")): + rng.milliseconds + + # with nat + s = Series(rng) + s[1] = np.nan + + tm.assert_series_equal(s.dt.days, Series([1, np.nan], index=[0, 1])) + tm.assert_series_equal( + s.dt.seconds, Series([10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1]) + ) + + # preserve name (GH15589) + rng.name = "name" + assert rng.days.name == "name" + + def test_freq_conversion_always_floating(self): + # even if we have no NaTs, we get back float64; this matches TDA and Series + tdi = timedelta_range("1 Day", periods=30) + + res = tdi.astype("m8[s]") + expected = Index((tdi.view("i8") / 10 ** 9).astype(np.float64)) + tm.assert_index_equal(res, expected) + + # check this matches Series and TimedeltaArray + res = tdi._data.astype("m8[s]") + tm.assert_numpy_array_equal(res, expected._values) + + res = tdi.to_series().astype("m8[s]") + tm.assert_numpy_array_equal(res._values, expected._values) + + def test_freq_conversion(self, index_or_series): + + # doc example + + scalar = Timedelta(days=31) + td = index_or_series( + [scalar, scalar, scalar + timedelta(minutes=5, seconds=3), NaT], + dtype="m8[ns]", + ) + + result = td / np.timedelta64(1, "D") + expected = index_or_series( + [31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan] + ) + tm.assert_equal(result, expected) + + result = td.astype("timedelta64[D]") + expected = index_or_series([31, 31, 31, np.nan]) + tm.assert_equal(result, expected) + + result = td / np.timedelta64(1, "s") + expected = index_or_series( + [31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, np.nan] + ) + tm.assert_equal(result, expected) + + result = td.astype("timedelta64[s]") + tm.assert_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_timedelta_range.py new file mode 100644 index 0000000..7277595 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -0,0 +1,92 @@ +import numpy as np +import pytest + +from pandas import ( + Timedelta, + timedelta_range, + to_timedelta, +) +import pandas._testing as tm + +from pandas.tseries.offsets import ( + Day, + Second, +) + + +class TestTimedeltas: + def test_timedelta_range(self): + + expected = to_timedelta(np.arange(5), unit="D") + result = timedelta_range("0 days", periods=5, freq="D") + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(11), unit="D") + result = timedelta_range("0 days", "10 days", freq="D") + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Day() + result = timedelta_range("1 days, 00:00:02", "5 days, 00:00:02", freq="D") + tm.assert_index_equal(result, expected) + + expected = to_timedelta([1, 3, 5, 7, 9], unit="D") + Second(2) + result = timedelta_range("1 days, 00:00:02", periods=5, freq="2D") + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(50), unit="T") * 30 + result = timedelta_range("0 days", freq="30T", periods=50) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "periods, freq", [(3, "2D"), (5, "D"), (6, "19H12T"), (7, "16H"), (9, "12H")] + ) + def test_linspace_behavior(self, periods, freq): + # GH 20976 + result = timedelta_range(start="0 days", end="4 days", periods=periods) + expected = timedelta_range(start="0 days", end="4 days", freq=freq) + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ( + "Of the four parameters: start, end, periods, and freq, " + "exactly three must be specified" + ) + with pytest.raises(ValueError, match=msg): + timedelta_range(start="0 days") + + with pytest.raises(ValueError, match=msg): + timedelta_range(end="5 days") + + with pytest.raises(ValueError, match=msg): + timedelta_range(periods=2) + + with pytest.raises(ValueError, match=msg): + timedelta_range() + + # too many params + with pytest.raises(ValueError, match=msg): + timedelta_range(start="0 days", end="5 days", periods=10, freq="H") + + @pytest.mark.parametrize( + "start, end, freq, expected_periods", + [ + ("1D", "10D", "2D", (10 - 1) // 2 + 1), + ("2D", "30D", "3D", (30 - 2) // 3 + 1), + ("2s", "50s", "5s", (50 - 2) // 5 + 1), + # tests that worked before GH 33498: + ("4D", "16D", "3D", (16 - 4) // 3 + 1), + ("8D", "16D", "40s", (16 * 3600 * 24 - 8 * 3600 * 24) // 40 + 1), + ], + ) + def test_timedelta_range_freq_divide_end(self, start, end, freq, expected_periods): + # GH 33498 only the cases where `(end % freq) == 0` used to fail + res = timedelta_range(start=start, end=end, freq=freq) + assert Timedelta(start) == res[0] + assert Timedelta(end) >= res[-1] + assert len(res) == expected_periods + + def test_timedelta_range_infer_freq(self): + # https://github.com/pandas-dev/pandas/issues/35897 + result = timedelta_range("0s", "1s", periods=31) + assert result.freq is None diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..a452d72 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..3195c03 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_at.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_at.cpython-38.pyc new file mode 100644 index 0000000..a97e84c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_at.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_categorical.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_categorical.cpython-38.pyc new file mode 100644 index 0000000..e3d96da Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_categorical.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_chaining_and_caching.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_chaining_and_caching.cpython-38.pyc new file mode 100644 index 0000000..5a4edc2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_chaining_and_caching.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_check_indexer.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_check_indexer.cpython-38.pyc new file mode 100644 index 0000000..5a6dcc3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_check_indexer.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_coercion.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_coercion.cpython-38.pyc new file mode 100644 index 0000000..bfcbdc1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_coercion.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_datetime.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_datetime.cpython-38.pyc new file mode 100644 index 0000000..a19d94d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_datetime.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_floats.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_floats.cpython-38.pyc new file mode 100644 index 0000000..fea777c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_floats.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_iat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_iat.cpython-38.pyc new file mode 100644 index 0000000..7e5ef3e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_iat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_iloc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_iloc.cpython-38.pyc new file mode 100644 index 0000000..27c2983 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_iloc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_indexers.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_indexers.cpython-38.pyc new file mode 100644 index 0000000..b708d3a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_indexers.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..a2c23a7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_loc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_loc.cpython-38.pyc new file mode 100644 index 0000000..0f319a5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_loc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_na_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_na_indexing.cpython-38.pyc new file mode 100644 index 0000000..fa39ef4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_na_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_partial.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_partial.cpython-38.pyc new file mode 100644 index 0000000..4c7edcf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_partial.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_scalar.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_scalar.cpython-38.pyc new file mode 100644 index 0000000..4cc8c01 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/__pycache__/test_scalar.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/common.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/common.py new file mode 100644 index 0000000..f8db005 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/common.py @@ -0,0 +1,190 @@ +""" common utilities """ +import itertools + +import numpy as np + +from pandas import ( + DataFrame, + MultiIndex, + Series, + date_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + UInt64Index, +) + + +def _mklbl(prefix, n): + return [f"{prefix}{i}" for i in range(n)] + + +def _axify(obj, key, axis): + # create a tuple accessor + axes = [slice(None)] * obj.ndim + axes[axis] = key + return tuple(axes) + + +class Base: + """indexing comprehensive base class""" + + _kinds = {"series", "frame"} + _typs = { + "ints", + "uints", + "labels", + "mixed", + "ts", + "floats", + "empty", + "ts_rev", + "multi", + } + + def setup_method(self, method): + + self.series_ints = Series(np.random.rand(4), index=np.arange(0, 8, 2)) + self.frame_ints = DataFrame( + np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3) + ) + + self.series_uints = Series( + np.random.rand(4), index=UInt64Index(np.arange(0, 8, 2)) + ) + self.frame_uints = DataFrame( + np.random.randn(4, 4), + index=UInt64Index(range(0, 8, 2)), + columns=UInt64Index(range(0, 12, 3)), + ) + + self.series_floats = Series( + np.random.rand(4), index=Float64Index(range(0, 8, 2)) + ) + self.frame_floats = DataFrame( + np.random.randn(4, 4), + index=Float64Index(range(0, 8, 2)), + columns=Float64Index(range(0, 12, 3)), + ) + + m_idces = [ + MultiIndex.from_product([[1, 2], [3, 4]]), + MultiIndex.from_product([[5, 6], [7, 8]]), + MultiIndex.from_product([[9, 10], [11, 12]]), + ] + + self.series_multi = Series(np.random.rand(4), index=m_idces[0]) + self.frame_multi = DataFrame( + np.random.randn(4, 4), index=m_idces[0], columns=m_idces[1] + ) + + self.series_labels = Series(np.random.randn(4), index=list("abcd")) + self.frame_labels = DataFrame( + np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD") + ) + + self.series_mixed = Series(np.random.randn(4), index=[2, 4, "null", 8]) + self.frame_mixed = DataFrame(np.random.randn(4, 4), index=[2, 4, "null", 8]) + + self.series_ts = Series( + np.random.randn(4), index=date_range("20130101", periods=4) + ) + self.frame_ts = DataFrame( + np.random.randn(4, 4), index=date_range("20130101", periods=4) + ) + + dates_rev = date_range("20130101", periods=4).sort_values(ascending=False) + self.series_ts_rev = Series(np.random.randn(4), index=dates_rev) + self.frame_ts_rev = DataFrame(np.random.randn(4, 4), index=dates_rev) + + self.frame_empty = DataFrame() + self.series_empty = Series(dtype=object) + + # form agglomerates + for kind in self._kinds: + d = {} + for typ in self._typs: + d[typ] = getattr(self, f"{kind}_{typ}") + + setattr(self, kind, d) + + def generate_indices(self, f, values=False): + """ + generate the indices + if values is True , use the axis values + is False, use the range + """ + axes = f.axes + if values: + axes = (list(range(len(ax))) for ax in axes) + + return itertools.product(*axes) + + def get_value(self, name, f, i, values=False): + """return the value for the location i""" + # check against values + if values: + return f.values[i] + + elif name == "iat": + return f.iloc[i] + else: + assert name == "at" + return f.loc[i] + + def check_values(self, f, func, values=False): + + if f is None: + return + axes = f.axes + indices = itertools.product(*axes) + + for i in indices: + result = getattr(f, func)[i] + + # check against values + if values: + expected = f.values[i] + else: + expected = f + for a in reversed(i): + expected = expected.__getitem__(a) + + tm.assert_almost_equal(result, expected) + + def check_result(self, method, key, typs=None, axes=None, fails=None): + def _eq(axis, obj, key): + """compare equal for these 2 keys""" + axified = _axify(obj, key, axis) + try: + getattr(obj, method).__getitem__(axified) + + except (IndexError, TypeError, KeyError) as detail: + + # if we are in fails, the ok, otherwise raise it + if fails is not None: + if isinstance(detail, fails): + return + raise + + if typs is None: + typs = self._typs + + if axes is None: + axes = [0, 1] + else: + assert axes in [0, 1] + axes = [axes] + + # check + for kind in self._kinds: + + d = getattr(self, kind) + for ax in axes: + for typ in typs: + assert typ in self._typs + + obj = d[typ] + if ax < obj.ndim: + _eq(axis=ax, obj=obj, key=key) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..0324cda Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/__pycache__/test_interval.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/__pycache__/test_interval.cpython-38.pyc new file mode 100644 index 0000000..36ea0d6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/__pycache__/test_interval.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/__pycache__/test_interval_new.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/__pycache__/test_interval_new.cpython-38.pyc new file mode 100644 index 0000000..0f83c6b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/__pycache__/test_interval_new.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/test_interval.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/test_interval.py new file mode 100644 index 0000000..db3a569 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/test_interval.py @@ -0,0 +1,175 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + IntervalIndex, + Series, +) +import pandas._testing as tm + + +class TestIntervalIndex: + @pytest.fixture + def series_with_interval_index(self): + return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) + + def test_getitem_with_scalar(self, series_with_interval_index, indexer_sl): + + ser = series_with_interval_index.copy() + + expected = ser.iloc[:3] + tm.assert_series_equal(expected, indexer_sl(ser)[:3]) + tm.assert_series_equal(expected, indexer_sl(ser)[:2.5]) + tm.assert_series_equal(expected, indexer_sl(ser)[0.1:2.5]) + if indexer_sl is tm.loc: + tm.assert_series_equal(expected, ser.loc[-1:3]) + + expected = ser.iloc[1:4] + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]]) + tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]]) + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]]) + + expected = ser.iloc[2:5] + tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2]) + + @pytest.mark.parametrize("direction", ["increasing", "decreasing"]) + def test_getitem_nonoverlapping_monotonic(self, direction, closed, indexer_sl): + tpls = [(0, 1), (2, 3), (4, 5)] + if direction == "decreasing": + tpls = tpls[::-1] + + idx = IntervalIndex.from_tuples(tpls, closed=closed) + ser = Series(list("abc"), idx) + + for key, expected in zip(idx.left, ser): + if idx.closed_left: + assert indexer_sl(ser)[key] == expected + else: + with pytest.raises(KeyError, match=str(key)): + indexer_sl(ser)[key] + + for key, expected in zip(idx.right, ser): + if idx.closed_right: + assert indexer_sl(ser)[key] == expected + else: + with pytest.raises(KeyError, match=str(key)): + indexer_sl(ser)[key] + + for key, expected in zip(idx.mid, ser): + assert indexer_sl(ser)[key] == expected + + def test_getitem_non_matching(self, series_with_interval_index, indexer_sl): + ser = series_with_interval_index.copy() + + # this is a departure from our current + # indexing scheme, but simpler + with pytest.raises(KeyError, match=r"\[-1\] not in index"): + indexer_sl(ser)[[-1, 3, 4, 5]] + + with pytest.raises(KeyError, match=r"\[-1\] not in index"): + indexer_sl(ser)[[-1, 3]] + + @pytest.mark.slow + def test_loc_getitem_large_series(self): + ser = Series( + np.arange(1000000), index=IntervalIndex.from_breaks(np.arange(1000001)) + ) + + result1 = ser.loc[:80000] + result2 = ser.loc[0:80000] + result3 = ser.loc[0:80000:1] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + + def test_loc_getitem_frame(self): + # CategoricalIndex with IntervalIndex categories + df = DataFrame({"A": range(10)}) + ser = pd.cut(df.A, 5) + df["B"] = ser + df = df.set_index("B") + + result = df.loc[4] + expected = df.iloc[4:6] + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError, match="10"): + df.loc[10] + + # single list-like + result = df.loc[[4]] + expected = df.iloc[4:6] + tm.assert_frame_equal(result, expected) + + # non-unique + result = df.loc[[4, 5]] + expected = df.take([4, 5, 4, 5]) + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError, match=r"None of \[\[10\]\] are"): + df.loc[[10]] + + # partial missing + with pytest.raises(KeyError, match=r"\[10\] not in index"): + df.loc[[10, 4]] + + def test_getitem_interval_with_nans(self, frame_or_series, indexer_sl): + # GH#41831 + + index = IntervalIndex([np.nan, np.nan]) + key = index[:-1] + + obj = frame_or_series(range(2), index=index) + if frame_or_series is DataFrame and indexer_sl is tm.setitem: + obj = obj.T + + result = indexer_sl(obj)[key] + expected = obj + + tm.assert_equal(result, expected) + + +class TestIntervalIndexInsideMultiIndex: + def test_mi_intervalindex_slicing_with_scalar(self): + # GH#27456 + ii = IntervalIndex.from_arrays( + [0, 1, 10, 11, 0, 1, 10, 11], [1, 2, 11, 12, 1, 2, 11, 12], name="MP" + ) + idx = pd.MultiIndex.from_arrays( + [ + pd.Index(["FC", "FC", "FC", "FC", "OWNER", "OWNER", "OWNER", "OWNER"]), + pd.Index( + ["RID1", "RID1", "RID2", "RID2", "RID1", "RID1", "RID2", "RID2"] + ), + ii, + ] + ) + + idx.names = ["Item", "RID", "MP"] + df = DataFrame({"value": [1, 2, 3, 4, 5, 6, 7, 8]}) + df.index = idx + + query_df = DataFrame( + { + "Item": ["FC", "OWNER", "FC", "OWNER", "OWNER"], + "RID": ["RID1", "RID1", "RID1", "RID2", "RID2"], + "MP": [0.2, 1.5, 1.6, 11.1, 10.9], + } + ) + + query_df = query_df.sort_index() + + idx = pd.MultiIndex.from_arrays([query_df.Item, query_df.RID, query_df.MP]) + query_df.index = idx + result = df.value.loc[query_df.index] + + # the IntervalIndex level is indexed with floats, which map to + # the intervals containing them. Matching the behavior we would get + # with _only_ an IntervalIndex, we get an IntervalIndex level back. + sliced_level = ii.take([0, 1, 1, 3, 2]) + expected_index = pd.MultiIndex.from_arrays( + [idx.get_level_values(0), idx.get_level_values(1), sliced_level] + ) + expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value") + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/test_interval_new.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/test_interval_new.py new file mode 100644 index 0000000..aad6523 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/interval/test_interval_new.py @@ -0,0 +1,209 @@ +import re + +import numpy as np +import pytest + +from pandas import ( + Interval, + IntervalIndex, + Series, +) +import pandas._testing as tm + + +class TestIntervalIndex: + @pytest.fixture + def series_with_interval_index(self): + return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) + + def test_loc_with_interval(self, series_with_interval_index, indexer_sl): + + # loc with single label / list of labels: + # - Intervals: only exact matches + # - scalars: those that contain it + + ser = series_with_interval_index.copy() + + expected = 0 + result = indexer_sl(ser)[Interval(0, 1)] + assert result == expected + + expected = ser.iloc[3:5] + result = indexer_sl(ser)[[Interval(3, 4), Interval(4, 5)]] + tm.assert_series_equal(expected, result) + + # missing or not exact + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")): + indexer_sl(ser)[Interval(3, 5, closed="left")] + + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): + indexer_sl(ser)[Interval(3, 5)] + + with pytest.raises( + KeyError, match=re.escape("Interval(-2, 0, closed='right')") + ): + indexer_sl(ser)[Interval(-2, 0)] + + with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")): + indexer_sl(ser)[Interval(5, 6)] + + def test_loc_with_scalar(self, series_with_interval_index, indexer_sl): + + # loc with single label / list of labels: + # - Intervals: only exact matches + # - scalars: those that contain it + + ser = series_with_interval_index.copy() + + assert indexer_sl(ser)[1] == 0 + assert indexer_sl(ser)[1.5] == 1 + assert indexer_sl(ser)[2] == 1 + + expected = ser.iloc[1:4] + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]]) + tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]]) + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]]) + + expected = ser.iloc[[1, 1, 2, 1]] + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2, 2.5, 1.5]]) + + expected = ser.iloc[2:5] + tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2]) + + def test_loc_with_slices(self, series_with_interval_index, indexer_sl): + + # loc with slices: + # - Interval objects: only works with exact matches + # - scalars: only works for non-overlapping, monotonic intervals, + # and start/stop select location based on the interval that + # contains them: + # (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop)) + + ser = series_with_interval_index.copy() + + # slice of interval + + expected = ser.iloc[:3] + result = indexer_sl(ser)[Interval(0, 1) : Interval(2, 3)] + tm.assert_series_equal(expected, result) + + expected = ser.iloc[3:] + result = indexer_sl(ser)[Interval(3, 4) :] + tm.assert_series_equal(expected, result) + + msg = "Interval objects are not currently supported" + with pytest.raises(NotImplementedError, match=msg): + indexer_sl(ser)[Interval(3, 6) :] + + with pytest.raises(NotImplementedError, match=msg): + indexer_sl(ser)[Interval(3, 4, closed="left") :] + + def test_slice_step_ne1(self, series_with_interval_index): + # GH#31658 slice of scalar with step != 1 + ser = series_with_interval_index.copy() + expected = ser.iloc[0:4:2] + + result = ser[0:4:2] + tm.assert_series_equal(result, expected) + + result2 = ser[0:4][::2] + tm.assert_series_equal(result2, expected) + + def test_slice_float_start_stop(self, series_with_interval_index): + # GH#31658 slicing with integers is positional, with floats is not + # supported + ser = series_with_interval_index.copy() + + msg = "label-based slicing with step!=1 is not supported for IntervalIndex" + with pytest.raises(ValueError, match=msg): + ser[1.5:9.5:2] + + def test_slice_interval_step(self, series_with_interval_index): + # GH#31658 allows for integer step!=1, not Interval step + ser = series_with_interval_index.copy() + msg = "label-based slicing with step!=1 is not supported for IntervalIndex" + with pytest.raises(ValueError, match=msg): + ser[0 : 4 : Interval(0, 1)] + + def test_loc_with_overlap(self, indexer_sl): + + idx = IntervalIndex.from_tuples([(1, 5), (3, 7)]) + ser = Series(range(len(idx)), index=idx) + + # scalar + expected = ser + result = indexer_sl(ser)[4] + tm.assert_series_equal(expected, result) + + result = indexer_sl(ser)[[4]] + tm.assert_series_equal(expected, result) + + # interval + expected = 0 + result = indexer_sl(ser)[Interval(1, 5)] + result == expected + + expected = ser + result = indexer_sl(ser)[[Interval(1, 5), Interval(3, 7)]] + tm.assert_series_equal(expected, result) + + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): + indexer_sl(ser)[Interval(3, 5)] + + msg = r"None of \[\[Interval\(3, 5, closed='right'\)\]\]" + with pytest.raises(KeyError, match=msg): + indexer_sl(ser)[[Interval(3, 5)]] + + # slices with interval (only exact matches) + expected = ser + result = indexer_sl(ser)[Interval(1, 5) : Interval(3, 7)] + tm.assert_series_equal(expected, result) + + msg = "'can only get slices from an IntervalIndex if bounds are" + " non-overlapping and all monotonic increasing or decreasing'" + with pytest.raises(KeyError, match=msg): + indexer_sl(ser)[Interval(1, 6) : Interval(3, 8)] + + if indexer_sl is tm.loc: + # slices with scalar raise for overlapping intervals + # TODO KeyError is the appropriate error? + with pytest.raises(KeyError, match=msg): + ser.loc[1:4] + + def test_non_unique(self, indexer_sl): + + idx = IntervalIndex.from_tuples([(1, 3), (3, 7)]) + ser = Series(range(len(idx)), index=idx) + + result = indexer_sl(ser)[Interval(1, 3)] + assert result == 0 + + result = indexer_sl(ser)[[Interval(1, 3)]] + expected = ser.iloc[0:1] + tm.assert_series_equal(expected, result) + + def test_non_unique_moar(self, indexer_sl): + + idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)]) + ser = Series(range(len(idx)), index=idx) + + expected = ser.iloc[[0, 1]] + result = indexer_sl(ser)[Interval(1, 3)] + tm.assert_series_equal(expected, result) + + expected = ser + result = indexer_sl(ser)[Interval(1, 3) :] + tm.assert_series_equal(expected, result) + + expected = ser.iloc[[0, 1]] + result = indexer_sl(ser)[[Interval(1, 3)]] + tm.assert_series_equal(expected, result) + + def test_loc_getitem_missing_key_error_message( + self, frame_or_series, series_with_interval_index + ): + # GH#27365 + ser = series_with_interval_index.copy() + obj = frame_or_series(ser) + with pytest.raises(KeyError, match=r"\[6\]"): + obj.loc[[4, 5, 6]] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ed16e84 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_chaining_and_caching.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_chaining_and_caching.cpython-38.pyc new file mode 100644 index 0000000..9d6d4ba Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_chaining_and_caching.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_datetime.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_datetime.cpython-38.pyc new file mode 100644 index 0000000..f18d2ec Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_datetime.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_getitem.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_getitem.cpython-38.pyc new file mode 100644 index 0000000..d71a708 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_getitem.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_iloc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_iloc.cpython-38.pyc new file mode 100644 index 0000000..b592b1a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_iloc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_indexing_slow.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_indexing_slow.cpython-38.pyc new file mode 100644 index 0000000..301d362 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_indexing_slow.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_loc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_loc.cpython-38.pyc new file mode 100644 index 0000000..eca15a7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_loc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_multiindex.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_multiindex.cpython-38.pyc new file mode 100644 index 0000000..82f1fa0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_multiindex.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_partial.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_partial.cpython-38.pyc new file mode 100644 index 0000000..e0a4448 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_partial.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_setitem.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_setitem.cpython-38.pyc new file mode 100644 index 0000000..8306f38 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_setitem.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_slice.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_slice.cpython-38.pyc new file mode 100644 index 0000000..be56457 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_slice.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_sorted.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_sorted.cpython-38.pyc new file mode 100644 index 0000000..8602adf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/__pycache__/test_sorted.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_chaining_and_caching.py new file mode 100644 index 0000000..6ccd44e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -0,0 +1,73 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + MultiIndex, + Series, +) +import pandas._testing as tm +import pandas.core.common as com + + +def test_detect_chained_assignment(): + # Inplace ops, originally from: + # https://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug + a = [12, 23] + b = [123, None] + c = [1234, 2345] + d = [12345, 23456] + tuples = [("eyes", "left"), ("eyes", "right"), ("ears", "left"), ("ears", "right")] + events = { + ("eyes", "left"): a, + ("eyes", "right"): b, + ("ears", "left"): c, + ("ears", "right"): d, + } + multiind = MultiIndex.from_tuples(tuples, names=["part", "side"]) + zed = DataFrame(events, index=["a", "b"], columns=multiind) + + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + zed["eyes"]["right"].fillna(value=555, inplace=True) + + +@td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view +def test_cache_updating(): + # 5216 + # make sure that we don't try to set a dead cache + a = np.random.rand(10, 3) + df = DataFrame(a, columns=["x", "y", "z"]) + tuples = [(i, j) for i in range(5) for j in range(2)] + index = MultiIndex.from_tuples(tuples) + df.index = index + + # setting via chained assignment + # but actually works, since everything is a view + df.loc[0]["z"].iloc[0] = 1.0 + result = df.loc[(0, 0), "z"] + assert result == 1 + + # correct setting + df.loc[(0, 0), "z"] = 2 + result = df.loc[(0, 0), "z"] + assert result == 2 + + +@pytest.mark.slow +def test_indexer_caching(): + # GH5727 + # make sure that indexers are in the _internal_names_set + n = 1000001 + arrays = (range(n), range(n)) + index = MultiIndex.from_tuples(zip(*arrays)) + s = Series(np.zeros(n), index=index) + str(s) + + # setitem + expected = Series(np.ones(n), index=index) + s = Series(np.zeros(n), index=index) + s[s == 0] = 1 + tm.assert_series_equal(s, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_datetime.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_datetime.py new file mode 100644 index 0000000..a49cb0b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_datetime.py @@ -0,0 +1,50 @@ +from datetime import datetime + +import numpy as np + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Period, + Series, + period_range, + to_datetime, +) +import pandas._testing as tm + + +def test_multiindex_period_datetime(): + # GH4861, using datetime in period of multiindex raises exception + + idx1 = Index(["a", "a", "a", "b", "b"]) + idx2 = period_range("2012-01", periods=len(idx1), freq="M") + s = Series(np.random.randn(len(idx1)), [idx1, idx2]) + + # try Period as index + expected = s.iloc[0] + result = s.loc["a", Period("2012-01")] + assert result == expected + + # try datetime as index + result = s.loc["a", datetime(2012, 1, 1)] + assert result == expected + + +def test_multiindex_datetime_columns(): + # GH35015, using datetime as column indices raises exception + + mi = MultiIndex.from_tuples( + [(to_datetime("02/29/2020"), to_datetime("03/01/2020"))], names=["a", "b"] + ) + + df = DataFrame([], columns=mi) + + expected_df = DataFrame( + [], + columns=MultiIndex.from_arrays( + [[to_datetime("02/29/2020")], [to_datetime("03/01/2020")]], names=["a", "b"] + ), + ) + + tm.assert_frame_equal(df, expected_df) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_getitem.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_getitem.py new file mode 100644 index 0000000..3790a6e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_getitem.py @@ -0,0 +1,394 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm +from pandas.core.indexing import IndexingError + +# ---------------------------------------------------------------------------- +# test indexing of Series with multi-level Index +# ---------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "access_method", + [lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)], +) +@pytest.mark.parametrize( + "level1_value, expected", + [(0, Series([1], index=[0])), (1, Series([2, 3], index=[1, 2]))], +) +def test_series_getitem_multiindex(access_method, level1_value, expected): + + # GH 6018 + # series regression getitem with a multi-index + + mi = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)], names=["A", "B"]) + ser = Series([1, 2, 3], index=mi) + expected.index.name = "A" + + result = access_method(ser, level1_value) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("level0_value", ["D", "A"]) +def test_series_getitem_duplicates_multiindex(level0_value): + # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise + # the appropriate error, only in PY3 of course! + + index = MultiIndex( + levels=[[level0_value, "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]], + codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=["tag", "day"], + ) + arr = np.random.randn(len(index), 1) + df = DataFrame(arr, index=index, columns=["val"]) + + # confirm indexing on missing value raises KeyError + if level0_value != "A": + with pytest.raises(KeyError, match=r"^'A'$"): + df.val["A"] + + with pytest.raises(KeyError, match=r"^'X'$"): + df.val["X"] + + result = df.val[level0_value] + expected = Series( + arr.ravel()[0:3], name="val", index=Index([26, 37, 57], name="day") + ) + tm.assert_series_equal(result, expected) + + +def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer_sl): + s = multiindex_year_month_day_dataframe_random_data["A"] + expected = s.reindex(s.index[42:65]) + expected.index = expected.index.droplevel(0).droplevel(0) + + result = indexer_sl(s)[2000, 3] + tm.assert_series_equal(result, expected) + + +def test_series_getitem_returns_scalar( + multiindex_year_month_day_dataframe_random_data, indexer_sl +): + s = multiindex_year_month_day_dataframe_random_data["A"] + expected = s.iloc[49] + + result = indexer_sl(s)[2000, 3, 10] + assert result == expected + + +@pytest.mark.parametrize( + "indexer,expected_error,expected_error_msg", + [ + (lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^\(2000, 3, 4\)$"), + (lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), + (lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), + (lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"), + (lambda s: s.__getitem__(len(s)), KeyError, ""), # match should include len(s) + (lambda s: s[len(s)], KeyError, ""), # match should include len(s) + ( + lambda s: s.iloc[len(s)], + IndexError, + "single positional indexer is out-of-bounds", + ), + ], +) +def test_series_getitem_indexing_errors( + multiindex_year_month_day_dataframe_random_data, + indexer, + expected_error, + expected_error_msg, +): + s = multiindex_year_month_day_dataframe_random_data["A"] + with pytest.raises(expected_error, match=expected_error_msg): + indexer(s) + + +def test_series_getitem_corner_generator( + multiindex_year_month_day_dataframe_random_data, +): + s = multiindex_year_month_day_dataframe_random_data["A"] + result = s[(x > 0 for x in s)] + expected = s[s > 0] + tm.assert_series_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# test indexing of DataFrame with multi-level Index +# ---------------------------------------------------------------------------- + + +def test_getitem_simple(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data.T + expected = df.values[:, 0] + result = df["foo", "one"].values + tm.assert_almost_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer,expected_error_msg", + [ + (lambda df: df[("foo", "four")], r"^\('foo', 'four'\)$"), + (lambda df: df["foobar"], r"^'foobar'$"), + ], +) +def test_frame_getitem_simple_key_error( + multiindex_dataframe_random_data, indexer, expected_error_msg +): + df = multiindex_dataframe_random_data.T + with pytest.raises(KeyError, match=expected_error_msg): + indexer(df) + + +def test_frame_getitem_multicolumn_empty_level(): + df = DataFrame({"a": ["1", "2", "3"], "b": ["2", "3", "4"]}) + df.columns = [ + ["level1 item1", "level1 item2"], + ["", "level2 item2"], + ["level3 item1", "level3 item2"], + ] + + result = df["level1 item1"] + expected = DataFrame( + [["1"], ["2"], ["3"]], index=df.index, columns=["level3 item1"] + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer,expected_slice", + [ + (lambda df: df["foo"], slice(3)), + (lambda df: df["bar"], slice(3, 5)), + (lambda df: df.loc[:, "bar"], slice(3, 5)), + ], +) +def test_frame_getitem_toplevel( + multiindex_dataframe_random_data, indexer, expected_slice +): + df = multiindex_dataframe_random_data.T + expected = df.reindex(columns=df.columns[expected_slice]) + expected.columns = expected.columns.droplevel(0) + result = indexer(df) + tm.assert_frame_equal(result, expected) + + +def test_frame_mixed_depth_get(): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.randn(4, 6), columns=index) + + result = df["a"] + expected = df["a", "", ""].rename("a") + tm.assert_series_equal(result, expected) + + result = df["routine1", "result1"] + expected = df["routine1", "result1", ""] + expected = expected.rename(("routine1", "result1")) + tm.assert_series_equal(result, expected) + + +def test_frame_getitem_nan_multiindex(nulls_fixture): + # GH#29751 + # loc on a multiindex containing nan values + n = nulls_fixture # for code readability + cols = ["a", "b", "c"] + df = DataFrame( + [[11, n, 13], [21, n, 23], [31, n, 33], [41, n, 43]], + columns=cols, + ).set_index(["a", "b"]) + df["c"] = df["c"].astype("int64") + + idx = (21, n) + result = df.loc[:idx] + expected = DataFrame([[11, n, 13], [21, n, 23]], columns=cols).set_index(["a", "b"]) + expected["c"] = expected["c"].astype("int64") + tm.assert_frame_equal(result, expected) + + result = df.loc[idx:] + expected = DataFrame( + [[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols + ).set_index(["a", "b"]) + expected["c"] = expected["c"].astype("int64") + tm.assert_frame_equal(result, expected) + + idx1, idx2 = (21, n), (31, n) + result = df.loc[idx1:idx2] + expected = DataFrame([[21, n, 23], [31, n, 33]], columns=cols).set_index(["a", "b"]) + expected["c"] = expected["c"].astype("int64") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer,expected", + [ + ( + (["b"], ["bar", np.nan]), + ( + DataFrame( + [[2, 3], [5, 6]], + columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]), + dtype="int64", + ) + ), + ), + ( + (["a", "b"]), + ( + DataFrame( + [[1, 2, 3], [4, 5, 6]], + columns=MultiIndex.from_tuples( + [("a", "foo"), ("b", "bar"), ("b", np.nan)] + ), + dtype="int64", + ) + ), + ), + ( + (["b"]), + ( + DataFrame( + [[2, 3], [5, 6]], + columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]), + dtype="int64", + ) + ), + ), + ( + (["b"], ["bar"]), + ( + DataFrame( + [[2], [5]], + columns=MultiIndex.from_tuples([("b", "bar")]), + dtype="int64", + ) + ), + ), + ( + (["b"], [np.nan]), + ( + DataFrame( + [[3], [6]], + columns=MultiIndex( + codes=[[1], [-1]], levels=[["a", "b"], ["bar", "foo"]] + ), + dtype="int64", + ) + ), + ), + (("b", np.nan), Series([3, 6], dtype="int64", name=("b", np.nan))), + ], +) +def test_frame_getitem_nan_cols_multiindex( + indexer, + expected, + nulls_fixture, +): + # Slicing MultiIndex including levels with nan values, for more information + # see GH#25154 + df = DataFrame( + [[1, 2, 3], [4, 5, 6]], + columns=MultiIndex.from_tuples( + [("a", "foo"), ("b", "bar"), ("b", nulls_fixture)] + ), + dtype="int64", + ) + + result = df.loc[:, indexer] + tm.assert_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# test indexing of DataFrame with multi-level Index with duplicates +# ---------------------------------------------------------------------------- + + +@pytest.fixture +def dataframe_with_duplicate_index(): + """Fixture for DataFrame used in tests for gh-4145 and gh-4146""" + data = [["a", "d", "e", "c", "f", "b"], [1, 4, 5, 3, 6, 2], [1, 4, 5, 3, 6, 2]] + index = ["h1", "h3", "h5"] + columns = MultiIndex( + levels=[["A", "B"], ["A1", "A2", "B1", "B2"]], + codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]], + names=["main", "sub"], + ) + return DataFrame(data, index=index, columns=columns) + + +@pytest.mark.parametrize( + "indexer", [lambda df: df[("A", "A1")], lambda df: df.loc[:, ("A", "A1")]] +) +def test_frame_mi_access(dataframe_with_duplicate_index, indexer): + # GH 4145 + df = dataframe_with_duplicate_index + index = Index(["h1", "h3", "h5"]) + columns = MultiIndex.from_tuples([("A", "A1")], names=["main", "sub"]) + expected = DataFrame([["a", 1, 1]], index=columns, columns=index).T + + result = indexer(df) + tm.assert_frame_equal(result, expected) + + +def test_frame_mi_access_returns_series(dataframe_with_duplicate_index): + # GH 4146, not returning a block manager when selecting a unique index + # from a duplicate index + # as of 4879, this returns a Series (which is similar to what happens + # with a non-unique) + df = dataframe_with_duplicate_index + expected = Series(["a", 1, 1], index=["h1", "h3", "h5"], name="A1") + result = df["A"]["A1"] + tm.assert_series_equal(result, expected) + + +def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index): + # selecting a non_unique from the 2nd level + df = dataframe_with_duplicate_index + expected = DataFrame( + [["d", 4, 4], ["e", 5, 5]], + index=Index(["B2", "B2"], name="sub"), + columns=["h1", "h3", "h5"], + ).T + result = df["A"]["B2"] + tm.assert_frame_equal(result, expected) + + +def test_frame_mi_empty_slice(): + # GH 15454 + df = DataFrame(0, index=range(2), columns=MultiIndex.from_product([[1], [2]])) + result = df[[]] + expected = DataFrame( + index=[0, 1], columns=MultiIndex(levels=[[1], [2]], codes=[[], []]) + ) + tm.assert_frame_equal(result, expected) + + +def test_loc_empty_multiindex(): + # GH#36936 + arrays = [["a", "a", "b", "a"], ["a", "a", "b", "b"]] + index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2")) + df = DataFrame([1, 2, 3, 4], index=index, columns=["value"]) + + # loc on empty multiindex == loc with False mask + empty_multiindex = df.loc[df.loc[:, "value"] == 0, :].index + result = df.loc[empty_multiindex, :] + expected = df.loc[[False] * len(df.index), :] + tm.assert_frame_equal(result, expected) + + # replacing value with loc on empty multiindex + df.loc[df.loc[df.loc[:, "value"] == 0].index, "value"] = 5 + result = df + expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"]) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_iloc.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_iloc.py new file mode 100644 index 0000000..db91d5a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_iloc.py @@ -0,0 +1,171 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + Series, +) +import pandas._testing as tm + + +@pytest.fixture +def simple_multiindex_dataframe(): + """ + Factory function to create simple 3 x 3 dataframe with + both columns and row MultiIndex using supplied data or + random data by default. + """ + + data = np.random.randn(3, 3) + return DataFrame( + data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]] + ) + + +@pytest.mark.parametrize( + "indexer, expected", + [ + ( + lambda df: df.iloc[0], + lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8)), + ), + ( + lambda df: df.iloc[2], + lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12)), + ), + ( + lambda df: df.iloc[:, 2], + lambda arr: Series(arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)), + ), + ], +) +def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe): + df = simple_multiindex_dataframe + arr = df.values + result = indexer(df) + expected = expected(arr) + tm.assert_series_equal(result, expected) + + +def test_iloc_returns_dataframe(simple_multiindex_dataframe): + df = simple_multiindex_dataframe + result = df.iloc[[0, 1]] + expected = df.xs(4, drop_level=False) + tm.assert_frame_equal(result, expected) + + +def test_iloc_returns_scalar(simple_multiindex_dataframe): + df = simple_multiindex_dataframe + arr = df.values + result = df.iloc[2, 2] + expected = arr[2, 2] + assert result == expected + + +def test_iloc_getitem_multiple_items(): + # GH 5528 + tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]]) + index = MultiIndex.from_tuples(tup) + df = DataFrame(np.random.randn(4, 4), index=index) + result = df.iloc[[2, 3]] + expected = df.xs("b", drop_level=False) + tm.assert_frame_equal(result, expected) + + +def test_iloc_getitem_labels(): + # this is basically regular indexing + arr = np.random.randn(4, 3) + df = DataFrame( + arr, + columns=[["i", "i", "j"], ["A", "A", "B"]], + index=[["i", "i", "j", "k"], ["X", "X", "Y", "Y"]], + ) + result = df.iloc[2, 2] + expected = arr[2, 2] + assert result == expected + + +def test_frame_getitem_slice(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.iloc[:4] + expected = df[:4] + tm.assert_frame_equal(result, expected) + + +def test_frame_setitem_slice(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + df.iloc[:4] = 0 + + assert (df.values[:4] == 0).all() + assert (df.values[4:] != 0).all() + + +def test_indexing_ambiguity_bug_1678(): + # GH 1678 + columns = MultiIndex.from_tuples( + [("Ohio", "Green"), ("Ohio", "Red"), ("Colorado", "Green")] + ) + index = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]) + + df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns) + + result = df.iloc[:, 1] + expected = df.loc[:, ("Ohio", "Red")] + tm.assert_series_equal(result, expected) + + +def test_iloc_integer_locations(): + # GH 13797 + data = [ + ["str00", "str01"], + ["str10", "str11"], + ["str20", "srt21"], + ["str30", "str31"], + ["str40", "str41"], + ] + + index = MultiIndex.from_tuples( + [("CC", "A"), ("CC", "B"), ("CC", "B"), ("BB", "a"), ("BB", "b")] + ) + + expected = DataFrame(data) + df = DataFrame(data, index=index) + + result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)]) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data, indexes, values, expected_k", + [ + # test without indexer value in first level of MultiIndex + ([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]), + # test like code sample 1 in the issue + ([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], [755, 1066]), + # test like code sample 2 in the issue + ([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]), + # test like code sample 3 in the issue + ([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], [8, 15, 13]), + ], +) +def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k): + # GH17148 + df = DataFrame(data=data, columns=["i", "j", "k"]) + df = df.set_index(["i", "j"]) + + series = df.k.copy() + for i, v in zip(indexes, values): + series.iloc[i] += v + + df["k"] = expected_k + expected = df.k + tm.assert_series_equal(series, expected) + + +def test_getitem_iloc(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.iloc[2] + expected = df.xs(df.index[2]) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_indexing_slow.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_indexing_slow.py new file mode 100644 index 0000000..e8c766d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_indexing_slow.py @@ -0,0 +1,97 @@ +from typing import ( + Any, + List, +) +import warnings + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + +m = 50 +n = 1000 +cols = ["jim", "joe", "jolie", "joline", "jolia"] + +vals: List[Any] = [ + np.random.randint(0, 10, n), + np.random.choice(list("abcdefghij"), n), + np.random.choice(pd.date_range("20141009", periods=10).tolist(), n), + np.random.choice(list("ZYXWVUTSRQ"), n), + np.random.randn(n), +] +vals = list(map(tuple, zip(*vals))) + +# bunch of keys for testing +keys: List[Any] = [ + np.random.randint(0, 11, m), + np.random.choice(list("abcdefghijk"), m), + np.random.choice(pd.date_range("20141009", periods=11).tolist(), m), + np.random.choice(list("ZYXWVUTSRQP"), m), +] +keys = list(map(tuple, zip(*keys))) +keys += list(map(lambda t: t[:-1], vals[:: n // m])) + + +# covers both unique index and non-unique index +df = DataFrame(vals, columns=cols) +a = pd.concat([df, df]) +b = df.drop_duplicates(subset=cols[:-1]) + + +def validate(mi, df, key): + # check indexing into a multi-index before & past the lexsort depth + + mask = np.ones(len(df)).astype("bool") + + # test for all partials of this key + for i, k in enumerate(key): + mask &= df.iloc[:, i] == k + + if not mask.any(): + assert key[: i + 1] not in mi.index + continue + + assert key[: i + 1] in mi.index + right = df[mask].copy() + + if i + 1 != len(key): # partial key + return_value = right.drop(cols[: i + 1], axis=1, inplace=True) + assert return_value is None + return_value = right.set_index(cols[i + 1 : -1], inplace=True) + assert return_value is None + tm.assert_frame_equal(mi.loc[key[: i + 1]], right) + + else: # full key + return_value = right.set_index(cols[:-1], inplace=True) + assert return_value is None + if len(right) == 1: # single hit + right = Series( + right["jolia"].values, name=right.index[0], index=["jolia"] + ) + tm.assert_series_equal(mi.loc[key[: i + 1]], right) + else: # multi hit + tm.assert_frame_equal(mi.loc[key[: i + 1]], right) + + +@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") +@pytest.mark.parametrize("lexsort_depth", list(range(5))) +@pytest.mark.parametrize("key", keys) +@pytest.mark.parametrize("frame", [a, b]) +def test_multiindex_get_loc(lexsort_depth, key, frame): + # GH7724, GH2646 + + with warnings.catch_warnings(record=True): + if lexsort_depth == 0: + df = frame.copy() + else: + df = frame.sort_values(by=cols[:lexsort_depth]) + + mi = df.set_index(cols[:-1]) + assert not mi.index._lexsort_depth < lexsort_depth + validate(mi, df, key) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_loc.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_loc.py new file mode 100644 index 0000000..6e59311 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_loc.py @@ -0,0 +1,945 @@ +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm +from pandas.core.indexing import IndexingError + + +@pytest.fixture +def single_level_multiindex(): + """single level MultiIndex""" + return MultiIndex( + levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"] + ) + + +@pytest.fixture +def frame_random_data_integer_multi_index(): + levels = [[0, 1], [0, 1, 2]] + codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + index = MultiIndex(levels=levels, codes=codes) + return DataFrame(np.random.randn(6, 2), index=index) + + +class TestMultiIndexLoc: + def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + frame.loc[("bar", "two"), "B"] = 5 + assert frame.loc[("bar", "two"), "B"] == 5 + + # with integer labels + df = frame.copy() + df.columns = list(range(3)) + df.loc[("bar", "two"), 1] = 7 + assert df.loc[("bar", "two"), 1] == 7 + + def test_loc_getitem_general(self): + + # GH#2817 + data = { + "amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, + "col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, + "year": {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}, + } + df = DataFrame(data).set_index(keys=["col", "year"]) + key = 4.0, 2012 + + # emits a PerformanceWarning, ok + with tm.assert_produces_warning(PerformanceWarning): + tm.assert_frame_equal(df.loc[key], df.iloc[2:]) + + # this is ok + return_value = df.sort_index(inplace=True) + assert return_value is None + res = df.loc[key] + + # col has float dtype, result should be Float64Index + index = MultiIndex.from_arrays([[4.0] * 3, [2012] * 3], names=["col", "year"]) + expected = DataFrame({"amount": [222, 333, 444]}, index=index) + tm.assert_frame_equal(res, expected) + + def test_loc_getitem_multiindex_missing_label_raises(self): + # GH#21593 + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + + with pytest.raises(KeyError, match=r"^2$"): + df.loc[2] + + def test_loc_getitem_list_of_tuples_with_multiindex( + self, multiindex_year_month_day_dataframe_random_data + ): + ser = multiindex_year_month_day_dataframe_random_data["A"] + expected = ser.reindex(ser.index[49:51]) + result = ser.loc[[(2000, 3, 10), (2000, 3, 13)]] + tm.assert_series_equal(result, expected) + + def test_loc_getitem_series(self): + # GH14730 + # passing a series as a key with a MultiIndex + index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]]) + x = Series(index=index, data=range(9), dtype=np.float64) + y = Series([1, 3]) + expected = Series( + data=[0, 1, 2, 6, 7, 8], + index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]), + dtype=np.float64, + ) + result = x.loc[y] + tm.assert_series_equal(result, expected) + + result = x.loc[[1, 3]] + tm.assert_series_equal(result, expected) + + # GH15424 + y1 = Series([1, 3], index=[1, 2]) + result = x.loc[y1] + tm.assert_series_equal(result, expected) + + empty = Series(data=[], dtype=np.float64) + expected = Series( + [], + index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64), + dtype=np.float64, + ) + result = x.loc[empty] + tm.assert_series_equal(result, expected) + + def test_loc_getitem_array(self): + # GH15434 + # passing an array as a key with a MultiIndex + index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]]) + x = Series(index=index, data=range(9), dtype=np.float64) + y = np.array([1, 3]) + expected = Series( + data=[0, 1, 2, 6, 7, 8], + index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]), + dtype=np.float64, + ) + result = x.loc[y] + tm.assert_series_equal(result, expected) + + # empty array: + empty = np.array([]) + expected = Series( + [], + index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64), + dtype="float64", + ) + result = x.loc[empty] + tm.assert_series_equal(result, expected) + + # 0-dim array (scalar): + scalar = np.int64(1) + expected = Series(data=[0, 1, 2], index=["A", "B", "C"], dtype=np.float64) + result = x.loc[scalar] + tm.assert_series_equal(result, expected) + + def test_loc_multiindex_labels(self): + df = DataFrame( + np.random.randn(3, 3), + columns=[["i", "i", "j"], ["A", "A", "B"]], + index=[["i", "i", "j"], ["X", "X", "Y"]], + ) + + # the first 2 rows + expected = df.iloc[[0, 1]].droplevel(0) + result = df.loc["i"] + tm.assert_frame_equal(result, expected) + + # 2nd (last) column + expected = df.iloc[:, [2]].droplevel(0, axis=1) + result = df.loc[:, "j"] + tm.assert_frame_equal(result, expected) + + # bottom right corner + expected = df.iloc[[2], [2]].droplevel(0).droplevel(0, axis=1) + result = df.loc["j"].loc[:, "j"] + tm.assert_frame_equal(result, expected) + + # with a tuple + expected = df.iloc[[0, 1]] + result = df.loc[("i", "X")] + tm.assert_frame_equal(result, expected) + + def test_loc_multiindex_ints(self): + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + expected = df.iloc[[0, 1]].droplevel(0) + result = df.loc[4] + tm.assert_frame_equal(result, expected) + + def test_loc_multiindex_missing_label_raises(self): + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + + with pytest.raises(KeyError, match=r"^2$"): + df.loc[2] + + @pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])]) + def test_loc_multiindex_list_missing_label(self, key, pos): + # GH 27148 - lists with missing labels _do_ raise + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + + with pytest.raises(KeyError, match="not in index"): + df.loc[key] + + def test_loc_multiindex_too_many_dims_raises(self): + # GH 14885 + s = Series( + range(8), + index=MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]), + ) + + with pytest.raises(KeyError, match=r"^\('a', 'b'\)$"): + s.loc["a", "b"] + with pytest.raises(KeyError, match=r"^\('a', 'd', 'g'\)$"): + s.loc["a", "d", "g"] + with pytest.raises(IndexingError, match="Too many indexers"): + s.loc["a", "d", "g", "j"] + + def test_loc_multiindex_indexer_none(self): + + # GH6788 + # multi-index indexer is None (meaning take all) + attributes = ["Attribute" + str(i) for i in range(1)] + attribute_values = ["Value" + str(i) for i in range(5)] + + index = MultiIndex.from_product([attributes, attribute_values]) + df = 0.1 * np.random.randn(10, 1 * 5) + 0.5 + df = DataFrame(df, columns=index) + result = df[attributes] + tm.assert_frame_equal(result, df) + + # GH 7349 + # loc with a multi-index seems to be doing fallback + df = DataFrame( + np.arange(12).reshape(-1, 1), + index=MultiIndex.from_product([[1, 2, 3, 4], [1, 2, 3]]), + ) + + expected = df.loc[([1, 2],), :] + result = df.loc[[1, 2]] + tm.assert_frame_equal(result, expected) + + def test_loc_multiindex_incomplete(self): + + # GH 7399 + # incomplete indexers + s = Series( + np.arange(15, dtype="int64"), + MultiIndex.from_product([range(5), ["a", "b", "c"]]), + ) + expected = s.loc[:, "a":"c"] + + result = s.loc[0:4, "a":"c"] + tm.assert_series_equal(result, expected) + + result = s.loc[:4, "a":"c"] + tm.assert_series_equal(result, expected) + + result = s.loc[0:, "a":"c"] + tm.assert_series_equal(result, expected) + + # GH 7400 + # multiindexer getitem with list of indexers skips wrong element + s = Series( + np.arange(15, dtype="int64"), + MultiIndex.from_product([range(5), ["a", "b", "c"]]), + ) + expected = s.iloc[[6, 7, 8, 12, 13, 14]] + result = s.loc[2:4:2, "a":"c"] + tm.assert_series_equal(result, expected) + + def test_get_loc_single_level(self, single_level_multiindex): + single_level = single_level_multiindex + s = Series(np.random.randn(len(single_level)), index=single_level) + for k in single_level.values: + s[k] + + def test_loc_getitem_int_slice(self): + # GH 3053 + # loc should treat integer slices like label slices + + index = MultiIndex.from_product([[6, 7, 8], ["a", "b"]]) + df = DataFrame(np.random.randn(6, 6), index, index) + result = df.loc[6:8, :] + expected = df + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_product([[10, 20, 30], ["a", "b"]]) + df = DataFrame(np.random.randn(6, 6), index, index) + result = df.loc[20:30, :] + expected = df.iloc[2:] + tm.assert_frame_equal(result, expected) + + # doc examples + result = df.loc[10, :] + expected = df.iloc[0:2] + expected.index = ["a", "b"] + tm.assert_frame_equal(result, expected) + + result = df.loc[:, 10] + expected = df[10] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "indexer_type_1", (list, tuple, set, slice, np.ndarray, Series, Index) + ) + @pytest.mark.parametrize( + "indexer_type_2", (list, tuple, set, slice, np.ndarray, Series, Index) + ) + def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2): + # GH #19686 + # .loc should work with nested indexers which can be + # any list-like objects (see `is_list_like` (`pandas.api.types`)) or slices + + def convert_nested_indexer(indexer_type, keys): + if indexer_type == np.ndarray: + return np.array(keys) + if indexer_type == slice: + return slice(*keys) + return indexer_type(keys) + + a = [10, 20, 30] + b = [1, 2, 3] + index = MultiIndex.from_product([a, b]) + df = DataFrame( + np.arange(len(index), dtype="int64"), index=index, columns=["Data"] + ) + + keys = ([10, 20], [2, 3]) + types = (indexer_type_1, indexer_type_2) + + # check indexers with all the combinations of nested objects + # of all the valid types + indexer = tuple( + convert_nested_indexer(indexer_type, k) + for indexer_type, k in zip(types, keys) + ) + if indexer_type_1 is set or indexer_type_2 is set: + with tm.assert_produces_warning(FutureWarning): + result = df.loc[indexer, "Data"] + else: + result = df.loc[indexer, "Data"] + expected = Series( + [1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys) + ) + + tm.assert_series_equal(result, expected) + + def test_multiindex_loc_one_dimensional_tuple(self, frame_or_series): + # GH#37711 + mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")]) + obj = frame_or_series([1, 2], index=mi) + obj.loc[("a",)] = 0 + expected = frame_or_series([0, 2], index=mi) + tm.assert_equal(obj, expected) + + @pytest.mark.parametrize("indexer", [("a",), ("a")]) + def test_multiindex_one_dimensional_tuple_columns(self, indexer): + # GH#37711 + mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")]) + obj = DataFrame([1, 2], index=mi) + obj.loc[indexer, :] = 0 + expected = DataFrame([0, 2], index=mi) + tm.assert_frame_equal(obj, expected) + + @pytest.mark.parametrize( + "indexer, exp_value", [(slice(None), 1.0), ((1, 2), np.nan)] + ) + def test_multiindex_setitem_columns_enlarging(self, indexer, exp_value): + # GH#39147 + mi = MultiIndex.from_tuples([(1, 2), (3, 4)]) + df = DataFrame([[1, 2], [3, 4]], index=mi, columns=["a", "b"]) + df.loc[indexer, ["c", "d"]] = 1.0 + expected = DataFrame( + [[1, 2, 1.0, 1.0], [3, 4, exp_value, exp_value]], + index=mi, + columns=["a", "b", "c", "d"], + ) + tm.assert_frame_equal(df, expected) + + def test_sorted_multiindex_after_union(self): + # GH#44752 + midx = MultiIndex.from_product( + [pd.date_range("20110101", periods=2), Index(["a", "b"])] + ) + ser1 = Series(1, index=midx) + ser2 = Series(1, index=midx[:2]) + df = pd.concat([ser1, ser2], axis=1) + expected = df.copy() + result = df.loc["2011-01-01":"2011-01-02"] + tm.assert_frame_equal(result, expected) + + df = DataFrame({0: ser1, 1: ser2}) + result = df.loc["2011-01-01":"2011-01-02"] + tm.assert_frame_equal(result, expected) + + df = pd.concat([ser1, ser2.reindex(ser1.index)], axis=1) + result = df.loc["2011-01-01":"2011-01-02"] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer, pos", + [ + ([], []), # empty ok + (["A"], slice(3)), + (["A", "D"], []), # "D" isn't present -> raise + (["D", "E"], []), # no values found -> raise + (["D"], []), # same, with single item list: GH 27148 + (pd.IndexSlice[:, ["foo"]], slice(2, None, 3)), + (pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)), + ], +) +def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos): + # GH 7866 + # multi-index slicing with missing indexers + idx = MultiIndex.from_product( + [["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"] + ) + ser = Series(np.arange(9, dtype="int64"), index=idx).sort_index() + expected = ser.iloc[pos] + + if expected.size == 0 and indexer != []: + with pytest.raises(KeyError, match=str(indexer)): + ser.loc[indexer] + else: + warn = None + msg = "MultiIndex with a nested sequence" + if indexer == (slice(None), ["foo", "bah"]): + # "bah" is not in idx.levels[1], so is ignored, will raise KeyError + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=msg): + result = ser.loc[indexer] + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("columns_indexer", [([], slice(None)), (["foo"], [])]) +def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer): + # GH 8737 + # empty indexer + multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"])) + df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index) + df = df.sort_index(level=0, axis=1) + + expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0]) + result = df.loc[:, columns_indexer] + tm.assert_frame_equal(result, expected) + + +def test_loc_getitem_duplicates_multiindex_non_scalar_type_object(): + # regression from < 0.14.0 + # GH 7914 + df = DataFrame( + [[np.mean, np.median], ["mean", "median"]], + columns=MultiIndex.from_tuples([("functs", "mean"), ("functs", "median")]), + index=["function", "name"], + ) + result = df.loc["function", ("functs", "mean")] + expected = np.mean + assert result == expected + + +def test_loc_getitem_tuple_plus_slice(): + # GH 671 + df = DataFrame( + { + "a": np.arange(10), + "b": np.arange(10), + "c": np.random.randn(10), + "d": np.random.randn(10), + } + ).set_index(["a", "b"]) + expected = df.loc[0, 0] + result = df.loc[(0, 0), :] + tm.assert_series_equal(result, expected) + + +def test_loc_getitem_int(frame_random_data_integer_multi_index): + df = frame_random_data_integer_multi_index + result = df.loc[1] + expected = df[-3:] + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) + + +def test_loc_getitem_int_raises_exception(frame_random_data_integer_multi_index): + df = frame_random_data_integer_multi_index + with pytest.raises(KeyError, match=r"^3$"): + df.loc[3] + + +def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + + # test setup - check key not in dataframe + with pytest.raises(KeyError, match=r"^\('bar', 'three'\)$"): + df.loc[("bar", "three"), "B"] + + # in theory should be inserting in a sorted space???? + df.loc[("bar", "three"), "B"] = 0 + expected = 0 + result = df.sort_index().loc[("bar", "three"), "B"] + assert result == expected + + +def test_loc_setitem_single_column_slice(): + # case from https://github.com/pandas-dev/pandas/issues/27841 + df = DataFrame( + "string", + index=list("abcd"), + columns=MultiIndex.from_product([["Main"], ("another", "one")]), + ) + df["labels"] = "a" + df.loc[:, "labels"] = df.index + tm.assert_numpy_array_equal(np.asarray(df["labels"]), np.asarray(df.index)) + + # test with non-object block + df = DataFrame( + np.nan, + index=range(4), + columns=MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]), + ) + expected = df.copy() + df.loc[:, "B"] = np.arange(4) + expected.iloc[:, 2] = np.arange(4) + tm.assert_frame_equal(df, expected) + + +def test_loc_nan_multiindex(): + # GH 5286 + tups = [ + ("Good Things", "C", np.nan), + ("Good Things", "R", np.nan), + ("Bad Things", "C", np.nan), + ("Bad Things", "T", np.nan), + ("Okay Things", "N", "B"), + ("Okay Things", "N", "D"), + ("Okay Things", "B", np.nan), + ("Okay Things", "D", np.nan), + ] + df = DataFrame( + np.ones((8, 4)), + columns=Index(["d1", "d2", "d3", "d4"]), + index=MultiIndex.from_tuples(tups, names=["u1", "u2", "u3"]), + ) + result = df.loc["Good Things"].loc["C"] + expected = DataFrame( + np.ones((1, 4)), + index=Index([np.nan], dtype="object", name="u3"), + columns=Index(["d1", "d2", "d3", "d4"], dtype="object"), + ) + tm.assert_frame_equal(result, expected) + + +def test_loc_period_string_indexing(): + # GH 9892 + a = pd.period_range("2013Q1", "2013Q4", freq="Q") + i = (1111, 2222, 3333) + idx = MultiIndex.from_product((a, i), names=("Period", "CVR")) + df = DataFrame( + index=idx, + columns=( + "OMS", + "OMK", + "RES", + "DRIFT_IND", + "OEVRIG_IND", + "FIN_IND", + "VARE_UD", + "LOEN_UD", + "FIN_UD", + ), + ) + result = df.loc[("2013Q1", 1111), "OMS"] + + alt = df.loc[(a[0], 1111), "OMS"] + assert np.isnan(alt) + + # Because the resolution of the string matches, it is an exact lookup, + # not a slice + assert np.isnan(result) + + # TODO: should it figure this out? + # alt = df.loc["2013Q1", 1111, "OMS"] + # assert np.isnan(alt) + + +def test_loc_datetime_mask_slicing(): + # GH 16699 + dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"]) + m_idx = MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"]) + df = DataFrame( + data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"] + ) + result = df.loc[(dt_idx[0], (df.index.get_level_values(1) > "2017-05-04")), "C1"] + expected = Series( + [3], + name="C1", + index=MultiIndex.from_tuples( + [(pd.Timestamp("2017-05-04"), pd.Timestamp("2017-05-05"))], + names=["Idx1", "Idx2"], + ), + ) + tm.assert_series_equal(result, expected) + + +def test_loc_datetime_series_tuple_slicing(): + # https://github.com/pandas-dev/pandas/issues/35858 + date = pd.Timestamp("2000") + ser = Series( + 1, + index=MultiIndex.from_tuples([("a", date)], names=["a", "b"]), + name="c", + ) + result = ser.loc[:, [date]] + tm.assert_series_equal(result, ser) + + +def test_loc_with_mi_indexer(): + # https://github.com/pandas-dev/pandas/issues/35351 + df = DataFrame( + data=[["a", 1], ["a", 0], ["b", 1], ["c", 2]], + index=MultiIndex.from_tuples( + [(0, 1), (1, 0), (1, 1), (1, 1)], names=["index", "date"] + ), + columns=["author", "price"], + ) + idx = MultiIndex.from_tuples([(0, 1), (1, 1)], names=["index", "date"]) + result = df.loc[idx, :] + expected = DataFrame( + [["a", 1], ["b", 1], ["c", 2]], + index=MultiIndex.from_tuples([(0, 1), (1, 1), (1, 1)], names=["index", "date"]), + columns=["author", "price"], + ) + tm.assert_frame_equal(result, expected) + + +def test_loc_mi_with_level1_named_0(): + # GH#37194 + dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") + + ser = Series(range(3), index=dti) + df = ser.to_frame() + df[1] = dti + + df2 = df.set_index(0, append=True) + assert df2.index.names == (None, 0) + df2.index.get_loc(dti[0]) # smoke test + + result = df2.loc[dti[0]] + expected = df2.iloc[[0]].droplevel(None) + tm.assert_frame_equal(result, expected) + + ser2 = df2[1] + assert ser2.index.names == (None, 0) + + result = ser2.loc[dti[0]] + expected = ser2.iloc[[0]].droplevel(None) + tm.assert_series_equal(result, expected) + + +def test_getitem_str_slice(datapath): + # GH#15928 + path = datapath("reshape", "merge", "data", "quotes2.csv") + df = pd.read_csv(path, parse_dates=["time"]) + df2 = df.set_index(["ticker", "time"]).sort_index() + + res = df2.loc[("AAPL", slice("2016-05-25 13:30:00")), :].droplevel(0) + expected = df2.loc["AAPL"].loc[slice("2016-05-25 13:30:00"), :] + tm.assert_frame_equal(res, expected) + + +def test_3levels_leading_period_index(): + # GH#24091 + pi = pd.PeriodIndex( + ["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"], + name="datetime", + freq="B", + ) + lev2 = ["A", "A", "Z", "W"] + lev3 = ["B", "C", "Q", "F"] + mi = MultiIndex.from_arrays([pi, lev2, lev3]) + + ser = Series(range(4), index=mi, dtype=np.float64) + result = ser.loc[(pi[0], "A", "B")] + assert result == 0.0 + + +class TestKeyErrorsWithMultiIndex: + def test_missing_keys_raises_keyerror(self): + # GH#27420 KeyError, not TypeError + df = DataFrame(np.arange(12).reshape(4, 3), columns=["A", "B", "C"]) + df2 = df.set_index(["A", "B"]) + + with pytest.raises(KeyError, match="1"): + df2.loc[(1, 6)] + + def test_missing_key_raises_keyerror2(self): + # GH#21168 KeyError, not "IndexingError: Too many indexers" + ser = Series(-1, index=MultiIndex.from_product([[0, 1]] * 2)) + + with pytest.raises(KeyError, match=r"\(0, 3\)"): + ser.loc[0, 3] + + def test_missing_key_combination(self): + # GH: 19556 + mi = MultiIndex.from_arrays( + [ + np.array(["a", "a", "b", "b"]), + np.array(["1", "2", "2", "3"]), + np.array(["c", "d", "c", "d"]), + ], + names=["one", "two", "three"], + ) + df = DataFrame(np.random.rand(4, 3), index=mi) + msg = r"\('b', '1', slice\(None, None, None\)\)" + with pytest.raises(KeyError, match=msg): + df.loc[("b", "1", slice(None)), :] + with pytest.raises(KeyError, match=msg): + df.index.get_locs(("b", "1", slice(None))) + with pytest.raises(KeyError, match=r"\('b', '1'\)"): + df.loc[("b", "1"), :] + + +def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data): + df = multiindex_year_month_day_dataframe_random_data + ser = df["A"] + result = ser[2000, 5] + expected = df.loc[2000, 5]["A"] + tm.assert_series_equal(result, expected) + + +def test_loc_with_nan(): + # GH: 27104 + df = DataFrame( + {"col": [1, 2, 5], "ind1": ["a", "d", np.nan], "ind2": [1, 4, 5]} + ).set_index(["ind1", "ind2"]) + result = df.loc[["a"]] + expected = DataFrame( + {"col": [1]}, index=MultiIndex.from_tuples([("a", 1)], names=["ind1", "ind2"]) + ) + tm.assert_frame_equal(result, expected) + + result = df.loc["a"] + expected = DataFrame({"col": [1]}, index=Index([1], name="ind2")) + tm.assert_frame_equal(result, expected) + + +def test_getitem_non_found_tuple(): + # GH: 25236 + df = DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"]).set_index( + ["a", "b", "c"] + ) + with pytest.raises(KeyError, match=r"\(2\.0, 2\.0, 3\.0\)"): + df.loc[(2.0, 2.0, 3.0)] + + +def test_get_loc_datetime_index(): + # GH#24263 + index = pd.date_range("2001-01-01", periods=100) + mi = MultiIndex.from_arrays([index]) + # Check if get_loc matches for Index and MultiIndex + assert mi.get_loc("2001-01") == slice(0, 31, None) + assert index.get_loc("2001-01") == slice(0, 31, None) + + loc = mi[::2].get_loc("2001-01") + expected = index[::2].get_loc("2001-01") + assert loc == expected + + loc = mi.repeat(2).get_loc("2001-01") + expected = index.repeat(2).get_loc("2001-01") + assert loc == expected + + loc = mi.append(mi).get_loc("2001-01") + expected = index.append(index).get_loc("2001-01") + # TODO: standardize return type for MultiIndex.get_loc + tm.assert_numpy_array_equal(loc.nonzero()[0], expected) + + +def test_loc_setitem_indexer_differently_ordered(): + # GH#34603 + mi = MultiIndex.from_product([["a", "b"], [0, 1]]) + df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=mi) + + indexer = ("a", [1, 0]) + df.loc[indexer, :] = np.array([[9, 10], [11, 12]]) + expected = DataFrame([[11, 12], [9, 10], [5, 6], [7, 8]], index=mi) + tm.assert_frame_equal(df, expected) + + +def test_loc_getitem_index_differently_ordered_slice_none(): + # GH#31330 + df = DataFrame( + [[1, 2], [3, 4], [5, 6], [7, 8]], + index=[["a", "a", "b", "b"], [1, 2, 1, 2]], + columns=["a", "b"], + ) + result = df.loc[(slice(None), [2, 1]), :] + expected = DataFrame( + [[3, 4], [7, 8], [1, 2], [5, 6]], + index=[["a", "b", "a", "b"], [2, 2, 1, 1]], + columns=["a", "b"], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("indexer", [[1, 2, 7, 6, 2, 3, 8, 7], [1, 2, 7, 6, 3, 8]]) +def test_loc_getitem_index_differently_ordered_slice_none_duplicates(indexer): + # GH#40978 + df = DataFrame( + [1] * 8, + index=MultiIndex.from_tuples( + [(1, 1), (1, 2), (1, 7), (1, 6), (2, 2), (2, 3), (2, 8), (2, 7)] + ), + columns=["a"], + ) + result = df.loc[(slice(None), indexer), :] + expected = DataFrame( + [1] * 8, + index=[[1, 1, 2, 1, 2, 1, 2, 2], [1, 2, 2, 7, 7, 6, 3, 8]], + columns=["a"], + ) + tm.assert_frame_equal(result, expected) + + result = df.loc[df.index.isin(indexer, level=1), :] + tm.assert_frame_equal(result, df) + + +def test_loc_getitem_drops_levels_for_one_row_dataframe(): + # GH#10521 "x" and "z" are both scalar indexing, so those levels are dropped + mi = MultiIndex.from_arrays([["x"], ["y"], ["z"]], names=["a", "b", "c"]) + df = DataFrame({"d": [0]}, index=mi) + expected = df.droplevel([0, 2]) + result = df.loc["x", :, "z"] + tm.assert_frame_equal(result, expected) + + ser = Series([0], index=mi) + result = ser.loc["x", :, "z"] + expected = Series([0], index=Index(["y"], name="b")) + tm.assert_series_equal(result, expected) + + +def test_mi_columns_loc_list_label_order(): + # GH 10710 + cols = MultiIndex.from_product([["A", "B", "C"], [1, 2]]) + df = DataFrame(np.zeros((5, 6)), columns=cols) + result = df.loc[:, ["B", "A"]] + expected = DataFrame( + np.zeros((5, 4)), + columns=MultiIndex.from_tuples([("B", 1), ("B", 2), ("A", 1), ("A", 2)]), + ) + tm.assert_frame_equal(result, expected) + + +def test_mi_partial_indexing_list_raises(): + # GH 13501 + frame = DataFrame( + np.arange(12).reshape((4, 3)), + index=[["a", "a", "b", "b"], [1, 2, 1, 2]], + columns=[["Ohio", "Ohio", "Colorado"], ["Green", "Red", "Green"]], + ) + frame.index.names = ["key1", "key2"] + frame.columns.names = ["state", "color"] + with pytest.raises(KeyError, match="\\[2\\] not in index"): + frame.loc[["b", 2], "Colorado"] + + +def test_mi_indexing_list_nonexistent_raises(): + # GH 15452 + s = Series(range(4), index=MultiIndex.from_product([[1, 2], ["a", "b"]])) + with pytest.raises(KeyError, match="\\['not' 'found'\\] not in index"): + s.loc[["not", "found"]] + + +def test_mi_add_cell_missing_row_non_unique(): + # GH 16018 + result = DataFrame( + [[1, 2, 5, 6], [3, 4, 7, 8]], + index=["a", "a"], + columns=MultiIndex.from_product([[1, 2], ["A", "B"]]), + ) + result.loc["c"] = -1 + result.loc["c", (1, "A")] = 3 + result.loc["d", (1, "A")] = 3 + expected = DataFrame( + [ + [1.0, 2.0, 5.0, 6.0], + [3.0, 4.0, 7.0, 8.0], + [3.0, -1.0, -1, -1], + [3.0, np.nan, np.nan, np.nan], + ], + index=["a", "a", "c", "d"], + columns=MultiIndex.from_product([[1, 2], ["A", "B"]]), + ) + tm.assert_frame_equal(result, expected) + + +def test_loc_get_scalar_casting_to_float(): + # GH#41369 + df = DataFrame( + {"a": 1.0, "b": 2}, index=MultiIndex.from_arrays([[3], [4]], names=["c", "d"]) + ) + result = df.loc[(3, 4), "b"] + assert result == 2 + assert isinstance(result, np.int64) + result = df.loc[[(3, 4)], "b"].iloc[0] + assert result == 2 + assert isinstance(result, np.int64) + + +def test_loc_empty_single_selector_with_names(): + # GH 19517 + idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=[1, 0]) + s2 = Series(index=idx, dtype=np.float64) + result = s2.loc["a"] + expected = Series([np.nan, np.nan], index=Index(["A", "B"], name=0)) + tm.assert_series_equal(result, expected) + + +def test_loc_keyerror_rightmost_key_missing(): + # GH 20951 + + df = DataFrame( + { + "A": [100, 100, 200, 200, 300, 300], + "B": [10, 10, 20, 21, 31, 33], + "C": range(6), + } + ) + df = df.set_index(["A", "B"]) + with pytest.raises(KeyError, match="^1$"): + df.loc[(100, 1)] + + +def test_multindex_series_loc_with_tuple_label(): + # GH#43908 + mi = MultiIndex.from_tuples([(1, 2), (3, (4, 5))]) + ser = Series([1, 2], index=mi) + result = ser.loc[(3, (4, 5))] + assert result == 2 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_multiindex.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_multiindex.py new file mode 100644 index 0000000..9fa873a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_multiindex.py @@ -0,0 +1,151 @@ +import numpy as np + +import pandas._libs.index as _index +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestMultiIndexBasic: + def test_multiindex_perf_warn(self): + df = DataFrame( + { + "jim": [0, 0, 1, 1], + "joe": ["x", "x", "z", "y"], + "jolie": np.random.rand(4), + } + ).set_index(["jim", "joe"]) + + with tm.assert_produces_warning(PerformanceWarning): + df.loc[(1, "z")] + + df = df.iloc[[2, 1, 3, 0]] + with tm.assert_produces_warning(PerformanceWarning): + df.loc[(0,)] + + def test_indexing_over_hashtable_size_cutoff(self): + n = 10000 + + old_cutoff = _index._SIZE_CUTOFF + _index._SIZE_CUTOFF = 20000 + + s = Series(np.arange(n), MultiIndex.from_arrays((["a"] * n, np.arange(n)))) + + # hai it works! + assert s[("a", 5)] == 5 + assert s[("a", 6)] == 6 + assert s[("a", 7)] == 7 + + _index._SIZE_CUTOFF = old_cutoff + + def test_multi_nan_indexing(self): + # GH 3588 + df = DataFrame( + { + "a": ["R1", "R2", np.nan, "R4"], + "b": ["C1", "C2", "C3", "C4"], + "c": [10, 15, np.nan, 20], + } + ) + result = df.set_index(["a", "b"], drop=False) + expected = DataFrame( + { + "a": ["R1", "R2", np.nan, "R4"], + "b": ["C1", "C2", "C3", "C4"], + "c": [10, 15, np.nan, 20], + }, + index=[ + Index(["R1", "R2", np.nan, "R4"], name="a"), + Index(["C1", "C2", "C3", "C4"], name="b"), + ], + ) + tm.assert_frame_equal(result, expected) + + def test_exclusive_nat_column_indexing(self): + # GH 38025 + # test multi indexing when one column exclusively contains NaT values + df = DataFrame( + { + "a": [pd.NaT, pd.NaT, pd.NaT, pd.NaT], + "b": ["C1", "C2", "C3", "C4"], + "c": [10, 15, np.nan, 20], + } + ) + df = df.set_index(["a", "b"]) + expected = DataFrame( + { + "c": [10, 15, np.nan, 20], + }, + index=[ + Index([pd.NaT, pd.NaT, pd.NaT, pd.NaT], name="a"), + Index(["C1", "C2", "C3", "C4"], name="b"), + ], + ) + tm.assert_frame_equal(df, expected) + + def test_nested_tuples_duplicates(self): + # GH#30892 + + dti = pd.to_datetime(["20190101", "20190101", "20190102"]) + idx = Index(["a", "a", "c"]) + mi = MultiIndex.from_arrays([dti, idx], names=["index1", "index2"]) + + df = DataFrame({"c1": [1, 2, 3], "c2": [np.nan, np.nan, np.nan]}, index=mi) + + expected = DataFrame({"c1": df["c1"], "c2": [1.0, 1.0, np.nan]}, index=mi) + + df2 = df.copy(deep=True) + df2.loc[(dti[0], "a"), "c2"] = 1.0 + tm.assert_frame_equal(df2, expected) + + df3 = df.copy(deep=True) + df3.loc[[(dti[0], "a")], "c2"] = 1.0 + tm.assert_frame_equal(df3, expected) + + def test_multiindex_with_datatime_level_preserves_freq(self): + # https://github.com/pandas-dev/pandas/issues/35563 + idx = Index(range(2), name="A") + dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B") + mi = MultiIndex.from_product([idx, dti]) + df = DataFrame(np.random.randn(14, 2), index=mi) + result = df.loc[0].index + tm.assert_index_equal(result, dti) + assert result.freq == dti.freq + + def test_multiindex_complex(self): + # GH#42145 + complex_data = [1 + 2j, 4 - 3j, 10 - 1j] + non_complex_data = [3, 4, 5] + result = DataFrame( + { + "x": complex_data, + "y": non_complex_data, + "z": non_complex_data, + } + ) + result.set_index(["x", "y"], inplace=True) + expected = DataFrame( + {"z": non_complex_data}, + index=MultiIndex.from_arrays( + [complex_data, non_complex_data], + names=("x", "y"), + ), + ) + tm.assert_frame_equal(result, expected) + + def test_rename_multiindex_with_duplicates(self): + # GH 38015 + mi = MultiIndex.from_tuples([("A", "cat"), ("B", "cat"), ("B", "cat")]) + df = DataFrame(index=mi) + df = df.rename(index={"A": "Apple"}, level=0) + + mi2 = MultiIndex.from_tuples([("Apple", "cat"), ("B", "cat"), ("B", "cat")]) + expected = DataFrame(index=mi2) + tm.assert_frame_equal(df, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_partial.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_partial.py new file mode 100644 index 0000000..9d5e65e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_partial.py @@ -0,0 +1,252 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + MultiIndex, + date_range, + to_datetime, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, +) + + +class TestMultiIndexPartial: + def test_getitem_partial_int(self): + # GH 12416 + # with single item + l1 = [10, 20] + l2 = ["a", "b"] + df = DataFrame(index=range(2), columns=MultiIndex.from_product([l1, l2])) + expected = DataFrame(index=range(2), columns=l2) + result = df[20] + tm.assert_frame_equal(result, expected) + + # with list + expected = DataFrame( + index=range(2), columns=MultiIndex.from_product([l1[1:], l2]) + ) + result = df[[20]] + tm.assert_frame_equal(result, expected) + + # missing item: + with pytest.raises(KeyError, match="1"): + df[1] + with pytest.raises(KeyError, match=r"'\[1\] not in index'"): + df[[1]] + + def test_series_slice_partial(self): + pass + + def test_xs_partial( + self, + multiindex_dataframe_random_data, + multiindex_year_month_day_dataframe_random_data, + ): + frame = multiindex_dataframe_random_data + ymd = multiindex_year_month_day_dataframe_random_data + result = frame.xs("foo") + result2 = frame.loc["foo"] + expected = frame.T["foo"].T + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, result2) + + result = ymd.xs((2000, 4)) + expected = ymd.loc[2000, 4] + tm.assert_frame_equal(result, expected) + + # ex from #1796 + index = MultiIndex( + levels=[["foo", "bar"], ["one", "two"], [-1, 1]], + codes=[ + [0, 0, 0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 0, 0, 1, 1], + [0, 1, 0, 1, 0, 1, 0, 1], + ], + ) + df = DataFrame(np.random.randn(8, 4), index=index, columns=list("abcd")) + + with tm.assert_produces_warning(FutureWarning): + result = df.xs(["foo", "one"]) + expected = df.loc["foo", "one"] + tm.assert_frame_equal(result, expected) + + def test_getitem_partial(self, multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + ymd = ymd.T + result = ymd[2000, 2] + + expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1]) + expected.columns = expected.columns.droplevel(0).droplevel(0) + tm.assert_frame_equal(result, expected) + + def test_fancy_slice_partial( + self, + multiindex_dataframe_random_data, + multiindex_year_month_day_dataframe_random_data, + ): + frame = multiindex_dataframe_random_data + result = frame.loc["bar":"baz"] + expected = frame[3:7] + tm.assert_frame_equal(result, expected) + + ymd = multiindex_year_month_day_dataframe_random_data + result = ymd.loc[(2000, 2):(2000, 4)] + lev = ymd.index.codes[1] + expected = ymd[(lev >= 1) & (lev <= 3)] + tm.assert_frame_equal(result, expected) + + def test_getitem_partial_column_select(self): + idx = MultiIndex( + codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]], + levels=[["a", "b"], ["x", "y"], ["p", "q"]], + ) + df = DataFrame(np.random.rand(3, 2), index=idx) + + result = df.loc[("a", "y"), :] + expected = df.loc[("a", "y")] + tm.assert_frame_equal(result, expected) + + result = df.loc[("a", "y"), [1, 0]] + expected = df.loc[("a", "y")][[1, 0]] + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError, match=r"\('a', 'foo'\)"): + df.loc[("a", "foo"), :] + + # TODO(ArrayManager) rewrite test to not use .values + # exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view + @td.skip_array_manager_invalid_test + def test_partial_set(self, multiindex_year_month_day_dataframe_random_data): + # GH #397 + ymd = multiindex_year_month_day_dataframe_random_data + df = ymd.copy() + exp = ymd.copy() + df.loc[2000, 4] = 0 + exp.loc[2000, 4].values[:] = 0 + tm.assert_frame_equal(df, exp) + + df["A"].loc[2000, 4] = 1 + exp["A"].loc[2000, 4].values[:] = 1 + tm.assert_frame_equal(df, exp) + + df.loc[2000] = 5 + exp.loc[2000].values[:] = 5 + tm.assert_frame_equal(df, exp) + + # this works...for now + df["A"].iloc[14] = 5 + assert df["A"].iloc[14] == 5 + + @pytest.mark.parametrize("dtype", [int, float]) + def test_getitem_intkey_leading_level( + self, multiindex_year_month_day_dataframe_random_data, dtype + ): + # GH#33355 dont fall-back to positional when leading level is int + ymd = multiindex_year_month_day_dataframe_random_data + levels = ymd.index.levels + ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:]) + ser = ymd["A"] + mi = ser.index + assert isinstance(mi, MultiIndex) + if dtype is int: + assert isinstance(mi.levels[0], Int64Index) + else: + assert isinstance(mi.levels[0], Float64Index) + + assert 14 not in mi.levels[0] + assert not mi.levels[0]._should_fallback_to_positional + assert not mi._should_fallback_to_positional + + with pytest.raises(KeyError, match="14"): + ser[14] + with pytest.raises(KeyError, match="14"): + with tm.assert_produces_warning(FutureWarning): + mi.get_value(ser, 14) + + # --------------------------------------------------------------------- + + def test_setitem_multiple_partial(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + expected = frame.copy() + result = frame.copy() + result.loc[["foo", "bar"]] = 0 + expected.loc["foo"] = 0 + expected.loc["bar"] = 0 + tm.assert_frame_equal(result, expected) + + expected = frame.copy() + result = frame.copy() + result.loc["foo":"bar"] = 0 + expected.loc["foo"] = 0 + expected.loc["bar"] = 0 + tm.assert_frame_equal(result, expected) + + expected = frame["A"].copy() + result = frame["A"].copy() + result.loc[["foo", "bar"]] = 0 + expected.loc["foo"] = 0 + expected.loc["bar"] = 0 + tm.assert_series_equal(result, expected) + + expected = frame["A"].copy() + result = frame["A"].copy() + result.loc["foo":"bar"] = 0 + expected.loc["foo"] = 0 + expected.loc["bar"] = 0 + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "indexer, exp_idx, exp_values", + [ + (slice("2019-2", None), [to_datetime("2019-02-01")], [2, 3]), + ( + slice(None, "2019-2"), + date_range("2019", periods=2, freq="MS"), + [0, 1, 2, 3], + ), + ], + ) + def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values): + # GH: 25165 + date_idx = date_range("2019", periods=2, freq="MS") + df = DataFrame( + list(range(4)), + index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]), + ) + expected = DataFrame( + exp_values, + index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]), + ) + result = df[indexer] + tm.assert_frame_equal(result, expected) + result = df.loc[indexer] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis=0)[indexer] + tm.assert_frame_equal(result, expected) + + result = df.loc[indexer, :] + tm.assert_frame_equal(result, expected) + + df2 = df.swaplevel(0, 1).sort_index() + expected = expected.swaplevel(0, 1).sort_index() + + result = df2.loc[:, indexer, :] + tm.assert_frame_equal(result, expected) + + +def test_loc_getitem_partial_both_axis(): + # gh-12660 + iterables = [["a", "b"], [2, 1]] + columns = MultiIndex.from_product(iterables, names=["col1", "col2"]) + rows = MultiIndex.from_product(iterables, names=["row1", "row2"]) + df = DataFrame(np.random.randn(4, 4), index=rows, columns=columns) + expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1) + result = df.loc["a", "b"] + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_setitem.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_setitem.py new file mode 100644 index 0000000..4786877 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_setitem.py @@ -0,0 +1,509 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + Series, + Timestamp, + date_range, + isna, + notna, +) +import pandas._testing as tm +import pandas.core.common as com + + +def assert_equal(a, b): + assert a == b + + +class TestMultiIndexSetItem: + def check(self, target, indexers, value, compare_fn=assert_equal, expected=None): + target.loc[indexers] = value + result = target.loc[indexers] + if expected is None: + expected = value + compare_fn(result, expected) + + def test_setitem_multiindex(self): + # GH#7190 + cols = ["A", "w", "l", "a", "x", "X", "d", "profit"] + index = MultiIndex.from_product( + [np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"] + ) + t, n = 0, 2 + + df = DataFrame( + np.nan, + columns=cols, + index=index, + ) + self.check(target=df, indexers=((t, n), "X"), value=0) + + df = DataFrame(-999, columns=cols, index=index) + self.check(target=df, indexers=((t, n), "X"), value=1) + + df = DataFrame(columns=cols, index=index) + self.check(target=df, indexers=((t, n), "X"), value=2) + + # gh-7218: assigning with 0-dim arrays + df = DataFrame(-999, columns=cols, index=index) + self.check( + target=df, + indexers=((t, n), "X"), + value=np.array(3), + expected=3, + ) + + def test_setitem_multiindex2(self): + # GH#5206 + df = DataFrame( + np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float + ) + df["F"] = 99 + row_selection = df["A"] % 2 == 0 + col_selection = ["B", "C"] + df.loc[row_selection, col_selection] = df["F"] + output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"]) + tm.assert_frame_equal(df.loc[row_selection, col_selection], output) + self.check( + target=df, + indexers=(row_selection, col_selection), + value=df["F"], + compare_fn=tm.assert_frame_equal, + expected=output, + ) + + def test_setitem_multiindex3(self): + # GH#11372 + idx = MultiIndex.from_product( + [["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")] + ) + cols = MultiIndex.from_product( + [["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")] + ) + + df = DataFrame(np.random.random((12, 4)), index=idx, columns=cols) + + subidx = MultiIndex.from_tuples( + [("A", Timestamp("2015-01-01")), ("A", Timestamp("2015-02-01"))] + ) + subcols = MultiIndex.from_tuples( + [("foo", Timestamp("2016-01-01")), ("foo", Timestamp("2016-02-01"))] + ) + + vals = DataFrame(np.random.random((2, 2)), index=subidx, columns=subcols) + self.check( + target=df, + indexers=(subidx, subcols), + value=vals, + compare_fn=tm.assert_frame_equal, + ) + # set all columns + vals = DataFrame(np.random.random((2, 4)), index=subidx, columns=cols) + self.check( + target=df, + indexers=(subidx, slice(None, None, None)), + value=vals, + compare_fn=tm.assert_frame_equal, + ) + # identity + copy = df.copy() + self.check( + target=df, + indexers=(df.index, df.columns), + value=df, + compare_fn=tm.assert_frame_equal, + expected=copy, + ) + + # TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in + # all NaNs -> doesn't work in the "split" path (also for BlockManager actually) + @td.skip_array_manager_not_yet_implemented + def test_multiindex_setitem(self): + + # GH 3738 + # setting with a multi-index right hand side + arrays = [ + np.array(["bar", "bar", "baz", "qux", "qux", "bar"]), + np.array(["one", "two", "one", "one", "two", "one"]), + np.arange(0, 6, 1), + ] + + df_orig = DataFrame( + np.random.randn(6, 3), index=arrays, columns=["A", "B", "C"] + ).sort_index() + + expected = df_orig.loc[["bar"]] * 2 + df = df_orig.copy() + df.loc[["bar"]] *= 2 + tm.assert_frame_equal(df.loc[["bar"]], expected) + + # raise because these have differing levels + msg = "cannot align on a multi-index with out specifying the join levels" + with pytest.raises(TypeError, match=msg): + df.loc["bar"] *= 2 + + def test_multiindex_setitem2(self): + + # from SO + # https://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation + df_orig = DataFrame.from_dict( + { + "price": { + ("DE", "Coal", "Stock"): 2, + ("DE", "Gas", "Stock"): 4, + ("DE", "Elec", "Demand"): 1, + ("FR", "Gas", "Stock"): 5, + ("FR", "Solar", "SupIm"): 0, + ("FR", "Wind", "SupIm"): 0, + } + } + ) + df_orig.index = MultiIndex.from_tuples( + df_orig.index, names=["Sit", "Com", "Type"] + ) + + expected = df_orig.copy() + expected.iloc[[0, 2, 3]] *= 2 + + idx = pd.IndexSlice + df = df_orig.copy() + df.loc[idx[:, :, "Stock"], :] *= 2 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[idx[:, :, "Stock"], "price"] *= 2 + tm.assert_frame_equal(df, expected) + + def test_multiindex_assignment(self): + + # GH3777 part 2 + + # mixed dtype + df = DataFrame( + np.random.randint(5, 10, size=9).reshape(3, 3), + columns=list("abc"), + index=[[4, 4, 8], [8, 10, 12]], + ) + df["d"] = np.nan + arr = np.array([0.0, 1.0]) + + df.loc[4, "d"] = arr + tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d")) + + def test_multiindex_assignment_single_dtype(self, using_array_manager): + # GH3777 part 2b + # single dtype + arr = np.array([0.0, 1.0]) + + df = DataFrame( + np.random.randint(5, 10, size=9).reshape(3, 3), + columns=list("abc"), + index=[[4, 4, 8], [8, 10, 12]], + dtype=np.int64, + ) + view = df["c"].iloc[:2].values + + # arr can be losslessly cast to int, so this setitem is inplace + df.loc[4, "c"] = arr + exp = Series(arr, index=[8, 10], name="c", dtype="int64") + result = df.loc[4, "c"] + tm.assert_series_equal(result, exp) + if not using_array_manager: + # FIXME(ArrayManager): this correctly preserves dtype, + # but incorrectly is not inplace. + # extra check for inplace-ness + tm.assert_numpy_array_equal(view, exp.values) + + # arr + 0.5 cannot be cast losslessly to int, so we upcast + df.loc[4, "c"] = arr + 0.5 + result = df.loc[4, "c"] + exp = exp + 0.5 + tm.assert_series_equal(result, exp) + + # scalar ok + df.loc[4, "c"] = 10 + exp = Series(10, index=[8, 10], name="c", dtype="float64") + tm.assert_series_equal(df.loc[4, "c"], exp) + + # invalid assignments + msg = "Must have equal len keys and value when setting with an iterable" + with pytest.raises(ValueError, match=msg): + df.loc[4, "c"] = [0, 1, 2, 3] + + with pytest.raises(ValueError, match=msg): + df.loc[4, "c"] = [0] + + # But with a length-1 listlike column indexer this behaves like + # `df.loc[4, "c"] = 0 + df.loc[4, ["c"]] = [0] + assert (df.loc[4, "c"] == 0).all() + + def test_groupby_example(self): + # groupby example + NUM_ROWS = 100 + NUM_COLS = 10 + col_names = ["A" + num for num in map(str, np.arange(NUM_COLS).tolist())] + index_cols = col_names[:5] + + df = DataFrame( + np.random.randint(5, size=(NUM_ROWS, NUM_COLS)), + dtype=np.int64, + columns=col_names, + ) + df = df.set_index(index_cols).sort_index() + grp = df.groupby(level=index_cols[:4]) + df["new_col"] = np.nan + + # we are actually operating on a copy here + # but in this case, that's ok + for name, df2 in grp: + new_vals = np.arange(df2.shape[0]) + df.loc[name, "new_col"] = new_vals + + def test_series_setitem(self, multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + s = ymd["A"] + + s[2000, 3] = np.nan + assert isna(s.values[42:65]).all() + assert notna(s.values[:42]).all() + assert notna(s.values[65:]).all() + + s[2000, 3, 10] = np.nan + assert isna(s.iloc[49]) + + with pytest.raises(KeyError, match="49"): + # GH#33355 dont fall-back to positional when leading level is int + s[49] + + def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + df = frame.T.copy() + values = df.values + + result = df[df > 0] + expected = df.where(df > 0) + tm.assert_frame_equal(result, expected) + + df[df > 0] = 5 + values[values > 0] = 5 + tm.assert_almost_equal(df.values, values) + + df[df == 5] = 0 + values[values == 5] = 0 + tm.assert_almost_equal(df.values, values) + + # a df that needs alignment first + df[df[:-1] < 0] = 2 + np.putmask(values[:-1], values[:-1] < 0, 2) + tm.assert_almost_equal(df.values, values) + + with pytest.raises(TypeError, match="boolean values only"): + df[df * 0] = 2 + + def test_frame_getitem_setitem_multislice(self): + levels = [["t1", "t2"], ["a", "b", "c"]] + codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]] + midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"]) + df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx) + + result = df.loc[:, "value"] + tm.assert_series_equal(df["value"], result) + + result = df.loc[df.index[1:3], "value"] + tm.assert_series_equal(df["value"][1:3], result) + + result = df.loc[:, :] + tm.assert_frame_equal(df, result) + + result = df + df.loc[:, "value"] = 10 + result["value"] = 10 + tm.assert_frame_equal(df, result) + + df.loc[:, :] = 10 + tm.assert_frame_equal(df, result) + + def test_frame_setitem_multi_column(self): + df = DataFrame( + np.random.randn(10, 4), columns=[["a", "a", "b", "b"], [0, 1, 0, 1]] + ) + + cp = df.copy() + cp["a"] = cp["b"] + tm.assert_frame_equal(cp["a"], cp["b"]) + + # set with ndarray + cp = df.copy() + cp["a"] = cp["b"].values + tm.assert_frame_equal(cp["a"], cp["b"]) + + def test_frame_setitem_multi_column2(self): + + # --------------------------------------- + # GH#1803 + columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]) + df = DataFrame(index=[1, 3, 5], columns=columns) + + # Works, but adds a column instead of updating the two existing ones + df["A"] = 0.0 # Doesn't work + assert (df["A"].values == 0).all() + + # it broadcasts + df["B", "1"] = [1, 2, 3] + df["A"] = df["B", "1"] + + sliced_a1 = df["A", "1"] + sliced_a2 = df["A", "2"] + sliced_b1 = df["B", "1"] + tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False) + tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False) + assert sliced_a1.name == ("A", "1") + assert sliced_a2.name == ("A", "2") + assert sliced_b1.name == ("B", "1") + + def test_loc_getitem_tuple_plus_columns( + self, multiindex_year_month_day_dataframe_random_data + ): + # GH #1013 + ymd = multiindex_year_month_day_dataframe_random_data + df = ymd[:5] + + result = df.loc[(2000, 1, 6), ["A", "B", "C"]] + expected = df.loc[2000, 1, 6][["A", "B", "C"]] + tm.assert_series_equal(result, expected) + + def test_loc_getitem_setitem_slice_integers(self, frame_or_series): + index = MultiIndex( + levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]] + ) + + obj = DataFrame( + np.random.randn(len(index), 4), index=index, columns=["a", "b", "c", "d"] + ) + obj = tm.get_obj(obj, frame_or_series) + + res = obj.loc[1:2] + exp = obj.reindex(obj.index[2:]) + tm.assert_equal(res, exp) + + obj.loc[1:2] = 7 + assert (obj.loc[1:2] == 7).values.all() + + def test_setitem_change_dtype(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + dft = frame.T + s = dft["foo", "two"] + dft["foo", "two"] = s > s.median() + tm.assert_series_equal(dft["foo", "two"], s > s.median()) + # assert isinstance(dft._data.blocks[1].items, MultiIndex) + + reindexed = dft.reindex(columns=[("foo", "two")]) + tm.assert_series_equal(reindexed["foo", "two"], s > s.median()) + + def test_set_column_scalar_with_loc(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + subset = frame.index[[1, 4, 5]] + + frame.loc[subset] = 99 + assert (frame.loc[subset].values == 99).all() + + col = frame["B"] + col[subset] = 97 + assert (frame.loc[subset, "B"] == 97).all() + + def test_nonunique_assignment_1750(self): + df = DataFrame( + [[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], columns=list("ABCD") + ) + + df = df.set_index(["A", "B"]) + mi = MultiIndex.from_tuples([(1, 1)]) + + df.loc[mi, "C"] = "_" + + assert (df.xs((1, 1))["C"] == "_").all() + + def test_astype_assignment_with_dups(self): + + # GH 4686 + # assignment with dups that has a dtype change + cols = MultiIndex.from_tuples([("A", "1"), ("B", "1"), ("A", "2")]) + df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object) + index = df.index.copy() + + df["A"] = df["A"].astype(np.float64) + tm.assert_index_equal(df.index, index) + + def test_setitem_nonmonotonic(self): + # https://github.com/pandas-dev/pandas/issues/31449 + index = MultiIndex.from_tuples( + [("a", "c"), ("b", "x"), ("a", "d")], names=["l1", "l2"] + ) + df = DataFrame(data=[0, 1, 2], index=index, columns=["e"]) + df.loc["a", "e"] = np.arange(99, 101, dtype="int64") + expected = DataFrame({"e": [99, 1, 100]}, index=index) + tm.assert_frame_equal(df, expected) + + +class TestSetitemWithExpansionMultiIndex: + def test_setitem_new_column_mixed_depth(self): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.randn(4, 6), columns=index) + + result = df.copy() + expected = df.copy() + result["b"] = [1, 2, 3, 4] + expected["b", "", ""] = [1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + def test_setitem_new_column_all_na(self): + # GH#1534 + mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")]) + df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix) + s = Series({(1, 1): 1, (1, 2): 2}) + df["new"] = s + assert df["new"].isna().all() + + +@td.skip_array_manager_invalid_test # df["foo"] select multiple columns -> .values +# is not a view +def test_frame_setitem_view_direct(multiindex_dataframe_random_data): + # this works because we are modifying the underlying array + # really a no-no + df = multiindex_dataframe_random_data.T + df["foo"].values[:] = 0 + assert (df["foo"].values == 0).all() + + +def test_frame_setitem_copy_raises(multiindex_dataframe_random_data): + # will raise/warn as its chained assignment + df = multiindex_dataframe_random_data.T + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + df["foo"]["one"] = 2 + + +def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data.T + expected = frame + df = frame.copy() + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + df["foo"]["one"] = 2 + + result = df + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_slice.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_slice.py new file mode 100644 index 0000000..55d45a2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_slice.py @@ -0,0 +1,769 @@ +import numpy as np +import pytest + +from pandas.errors import UnsortedIndexError + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, +) +import pandas._testing as tm +from pandas.tests.indexing.common import _mklbl + + +class TestMultiIndexSlicers: + def test_per_axis_per_level_getitem(self): + + # GH6134 + # example test case + ix = MultiIndex.from_product( + [_mklbl("A", 5), _mklbl("B", 7), _mklbl("C", 4), _mklbl("D", 2)] + ) + df = DataFrame(np.arange(len(ix.to_numpy())), index=ix) + + result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :] + expected = df.loc[ + [ + ( + a, + b, + c, + d, + ) + for a, b, c, d in df.index.values + if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + + expected = df.loc[ + [ + ( + a, + b, + c, + d, + ) + for a, b, c, d in df.index.values + if (a == "A1" or a == "A2" or a == "A3") + and (c == "C1" or c == "C2" or c == "C3") + ] + ] + result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :] + tm.assert_frame_equal(result, expected) + + # test multi-index slicing with per axis and per index controls + index = MultiIndex.from_tuples( + [("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"] + ) + columns = MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], + names=["lvl0", "lvl1"], + ) + + df = DataFrame( + np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns + ) + df = df.sort_index(axis=0).sort_index(axis=1) + + # identity + result = df.loc[(slice(None), slice(None)), :] + tm.assert_frame_equal(result, df) + result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] + tm.assert_frame_equal(result, df) + result = df.loc[:, (slice(None), slice(None))] + tm.assert_frame_equal(result, df) + + # index + result = df.loc[(slice(None), [1]), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), 1), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + # columns + result = df.loc[:, (slice(None), ["foo"])] + expected = df.iloc[:, [1, 3]] + tm.assert_frame_equal(result, expected) + + # both + result = df.loc[(slice(None), 1), (slice(None), ["foo"])] + expected = df.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(result, expected) + + result = df.loc["A", "a"] + expected = DataFrame( + {"bar": [1, 5, 9], "foo": [0, 4, 8]}, + index=Index([1, 2, 3], name="two"), + columns=Index(["bar", "foo"], name="lvl1"), + ) + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), [1, 2]), :] + expected = df.iloc[[0, 1, 3]] + tm.assert_frame_equal(result, expected) + + # multi-level series + s = Series(np.arange(len(ix.to_numpy())), index=ix) + result = s.loc["A1":"A3", :, ["C1", "C3"]] + expected = s.loc[ + [ + ( + a, + b, + c, + d, + ) + for a, b, c, d in s.index.values + if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3") + ] + ] + tm.assert_series_equal(result, expected) + + # boolean indexers + result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :] + expected = df.iloc[[2, 3]] + tm.assert_frame_equal(result, expected) + + msg = ( + "cannot index with a boolean indexer " + "that is not the same length as the index" + ) + with pytest.raises(ValueError, match=msg): + df.loc[(slice(None), np.array([True, False])), :] + + with pytest.raises(KeyError, match=r"\[1\] not in index"): + # slice(None) is on the index, [1] is on the columns, but 1 is + # not in the columns, so we raise + # This used to treat [1] as positional GH#16396 + df.loc[slice(None), [1]] + + # not lexsorted + assert df.index._lexsort_depth == 2 + df = df.sort_index(level=1, axis=0) + assert df.index._lexsort_depth == 0 + + msg = ( + "MultiIndex slicing requires the index to be " + r"lexsorted: slicing on levels \[1\], lexsort depth 0" + ) + with pytest.raises(UnsortedIndexError, match=msg): + df.loc[(slice(None), slice("bar")), :] + + # GH 16734: not sorted, but no real slicing + result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :] + tm.assert_frame_equal(result, df.iloc[[1, 3], :]) + + def test_multiindex_slicers_non_unique(self): + + # GH 7106 + # non-unique mi index support + df = ( + DataFrame( + { + "A": ["foo", "foo", "foo", "foo"], + "B": ["a", "a", "a", "a"], + "C": [1, 2, 1, 3], + "D": [1, 2, 3, 4], + } + ) + .set_index(["A", "B", "C"]) + .sort_index() + ) + assert not df.index.is_unique + expected = ( + DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]}) + .set_index(["A", "B", "C"]) + .sort_index() + ) + result = df.loc[(slice(None), slice(None), 1), :] + tm.assert_frame_equal(result, expected) + + # this is equivalent of an xs expression + result = df.xs(1, level=2, drop_level=False) + tm.assert_frame_equal(result, expected) + + df = ( + DataFrame( + { + "A": ["foo", "foo", "foo", "foo"], + "B": ["a", "a", "a", "a"], + "C": [1, 2, 1, 2], + "D": [1, 2, 3, 4], + } + ) + .set_index(["A", "B", "C"]) + .sort_index() + ) + assert not df.index.is_unique + expected = ( + DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]}) + .set_index(["A", "B", "C"]) + .sort_index() + ) + result = df.loc[(slice(None), slice(None), 1), :] + assert not result.index.is_unique + tm.assert_frame_equal(result, expected) + + # GH12896 + # numpy-implementation dependent bug + ints = [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 12, + 13, + 14, + 14, + 16, + 17, + 18, + 19, + 200000, + 200000, + ] + n = len(ints) + idx = MultiIndex.from_arrays([["a"] * n, ints]) + result = Series([1] * n, index=idx) + result = result.sort_index() + result = result.loc[(slice(None), slice(100000))] + expected = Series([1] * (n - 2), index=idx[:-2]).sort_index() + tm.assert_series_equal(result, expected) + + def test_multiindex_slicers_datetimelike(self): + + # GH 7429 + # buggy/inconsistent behavior when slicing with datetime-like + import datetime + + dates = [ + datetime.datetime(2012, 1, 1, 12, 12, 12) + datetime.timedelta(days=i) + for i in range(6) + ] + freq = [1, 2] + index = MultiIndex.from_product([dates, freq], names=["date", "frequency"]) + + df = DataFrame( + np.arange(6 * 2 * 4, dtype="int64").reshape(-1, 4), + index=index, + columns=list("ABCD"), + ) + + # multi-axis slicing + idx = pd.IndexSlice + expected = df.iloc[[0, 2, 4], [0, 1]] + result = df.loc[ + ( + slice( + Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12") + ), + slice(1, 1), + ), + slice("A", "B"), + ] + tm.assert_frame_equal(result, expected) + + result = df.loc[ + ( + idx[ + Timestamp("2012-01-01 12:12:12") : Timestamp("2012-01-03 12:12:12") + ], + idx[1:1], + ), + slice("A", "B"), + ] + tm.assert_frame_equal(result, expected) + + result = df.loc[ + ( + slice( + Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12") + ), + 1, + ), + slice("A", "B"), + ] + tm.assert_frame_equal(result, expected) + + # with strings + result = df.loc[ + (slice("2012-01-01 12:12:12", "2012-01-03 12:12:12"), slice(1, 1)), + slice("A", "B"), + ] + tm.assert_frame_equal(result, expected) + + result = df.loc[ + (idx["2012-01-01 12:12:12":"2012-01-03 12:12:12"], 1), idx["A", "B"] + ] + tm.assert_frame_equal(result, expected) + + def test_multiindex_slicers_edges(self): + # GH 8132 + # various edge cases + df = DataFrame( + { + "A": ["A0"] * 5 + ["A1"] * 5 + ["A2"] * 5, + "B": ["B0", "B0", "B1", "B1", "B2"] * 3, + "DATE": [ + "2013-06-11", + "2013-07-02", + "2013-07-09", + "2013-07-30", + "2013-08-06", + "2013-06-11", + "2013-07-02", + "2013-07-09", + "2013-07-30", + "2013-08-06", + "2013-09-03", + "2013-10-01", + "2013-07-09", + "2013-08-06", + "2013-09-03", + ], + "VALUES": [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2], + } + ) + + df["DATE"] = pd.to_datetime(df["DATE"]) + df1 = df.set_index(["A", "B", "DATE"]) + df1 = df1.sort_index() + + # A1 - Get all values under "A0" and "A1" + result = df1.loc[(slice("A1")), :] + expected = df1.iloc[0:10] + tm.assert_frame_equal(result, expected) + + # A2 - Get all values from the start to "A2" + result = df1.loc[(slice("A2")), :] + expected = df1 + tm.assert_frame_equal(result, expected) + + # A3 - Get all values under "B1" or "B2" + result = df1.loc[(slice(None), slice("B1", "B2")), :] + expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]] + tm.assert_frame_equal(result, expected) + + # A4 - Get all values between 2013-07-02 and 2013-07-09 + result = df1.loc[(slice(None), slice(None), slice("20130702", "20130709")), :] + expected = df1.iloc[[1, 2, 6, 7, 12]] + tm.assert_frame_equal(result, expected) + + # B1 - Get all values in B0 that are also under A0, A1 and A2 + result = df1.loc[(slice("A2"), slice("B0")), :] + expected = df1.iloc[[0, 1, 5, 6, 10, 11]] + tm.assert_frame_equal(result, expected) + + # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for + # the As) + result = df1.loc[(slice(None), slice("B2")), :] + expected = df1 + tm.assert_frame_equal(result, expected) + + # B3 - Get all values from B1 to B2 and up to 2013-08-06 + result = df1.loc[(slice(None), slice("B1", "B2"), slice("2013-08-06")), :] + expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]] + tm.assert_frame_equal(result, expected) + + # B4 - Same as A4 but the start of the date slice is not a key. + # shows indexing on a partial selection slice + result = df1.loc[(slice(None), slice(None), slice("20130701", "20130709")), :] + expected = df1.iloc[[1, 2, 6, 7, 12]] + tm.assert_frame_equal(result, expected) + + def test_per_axis_per_level_doc_examples(self): + + # test index maker + idx = pd.IndexSlice + + # from indexing.rst / advanced + index = MultiIndex.from_product( + [_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)] + ) + columns = MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], + names=["lvl0", "lvl1"], + ) + df = DataFrame( + np.arange(len(index) * len(columns), dtype="int64").reshape( + (len(index), len(columns)) + ), + index=index, + columns=columns, + ) + result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :] + expected = df.loc[ + [ + ( + a, + b, + c, + d, + ) + for a, b, c, d in df.index.values + if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :] + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :] + expected = df.loc[ + [ + ( + a, + b, + c, + d, + ) + for a, b, c, d in df.index.values + if (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + result = df.loc[idx[:, :, ["C1", "C3"]], :] + tm.assert_frame_equal(result, expected) + + # not sorted + msg = ( + "MultiIndex slicing requires the index to be lexsorted: " + r"slicing on levels \[1\], lexsort depth 1" + ) + with pytest.raises(UnsortedIndexError, match=msg): + df.loc["A1", ("a", slice("foo"))] + + # GH 16734: not sorted, but no real slicing + tm.assert_frame_equal( + df.loc["A1", (slice(None), "foo")], df.loc["A1"].iloc[:, [0, 2]] + ) + + df = df.sort_index(axis=1) + + # slicing + df.loc["A1", (slice(None), "foo")] + df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")] + + # setitem + df.loc(axis=0)[:, :, ["C1", "C3"]] = -10 + + def test_loc_axis_arguments(self): + + index = MultiIndex.from_product( + [_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)] + ) + columns = MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], + names=["lvl0", "lvl1"], + ) + df = ( + DataFrame( + np.arange(len(index) * len(columns), dtype="int64").reshape( + (len(index), len(columns)) + ), + index=index, + columns=columns, + ) + .sort_index() + .sort_index(axis=1) + ) + + # axis 0 + result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]] + expected = df.loc[ + [ + ( + a, + b, + c, + d, + ) + for a, b, c, d in df.index.values + if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis="index")[:, :, ["C1", "C3"]] + expected = df.loc[ + [ + ( + a, + b, + c, + d, + ) + for a, b, c, d in df.index.values + if (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + + # axis 1 + result = df.loc(axis=1)[:, "foo"] + expected = df.loc[:, (slice(None), "foo")] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis="columns")[:, "foo"] + expected = df.loc[:, (slice(None), "foo")] + tm.assert_frame_equal(result, expected) + + # invalid axis + for i in [-1, 2, "foo"]: + msg = f"No axis named {i} for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.loc(axis=i)[:, :, ["C1", "C3"]] + + def test_loc_axis_single_level_multi_col_indexing_multiindex_col_df(self): + + # GH29519 + df = DataFrame( + np.arange(27).reshape(3, 9), + columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]), + ) + result = df.loc(axis=1)["a1":"a2"] + expected = df.iloc[:, :-3] + + tm.assert_frame_equal(result, expected) + + def test_loc_axis_single_level_single_col_indexing_multiindex_col_df(self): + + # GH29519 + df = DataFrame( + np.arange(27).reshape(3, 9), + columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]), + ) + result = df.loc(axis=1)["a1"] + expected = df.iloc[:, :3] + expected.columns = ["b1", "b2", "b3"] + + tm.assert_frame_equal(result, expected) + + def test_loc_ax_single_level_indexer_simple_df(self): + + # GH29519 + # test single level indexing on single index column data frame + df = DataFrame(np.arange(9).reshape(3, 3), columns=["a", "b", "c"]) + result = df.loc(axis=1)["a"] + expected = Series(np.array([0, 3, 6]), name="a") + tm.assert_series_equal(result, expected) + + def test_per_axis_per_level_setitem(self): + + # test index maker + idx = pd.IndexSlice + + # test multi-index slicing with per axis and per index controls + index = MultiIndex.from_tuples( + [("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"] + ) + columns = MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], + names=["lvl0", "lvl1"], + ) + + df_orig = DataFrame( + np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns + ) + df_orig = df_orig.sort_index(axis=0).sort_index(axis=1) + + # identity + df = df_orig.copy() + df.loc[(slice(None), slice(None)), :] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc(axis=0)[:, :] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[:, (slice(None), slice(None))] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + # index + df = df_orig.copy() + df.loc[(slice(None), [1]), :] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), 1), :] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc(axis=0)[:, 1] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # columns + df = df_orig.copy() + df.loc[:, (slice(None), ["foo"])] = 100 + expected = df_orig.copy() + expected.iloc[:, [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # both + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[idx[:, 1], idx[:, ["foo"]]] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc["A", "a"] = 100 + expected = df_orig.copy() + expected.iloc[0:3, 0:2] = 100 + tm.assert_frame_equal(df, expected) + + # setting with a list-like + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array( + [[100, 100], [100, 100]], dtype="int64" + ) + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # not enough values + df = df_orig.copy() + + msg = "setting an array element with a sequence." + with pytest.raises(ValueError, match=msg): + df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array( + [[100], [100, 100]], dtype="int64" + ) + + msg = "Must have equal len keys and value when setting with an iterable" + with pytest.raises(ValueError, match=msg): + df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array( + [100, 100, 100, 100], dtype="int64" + ) + + # with an alignable rhs + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] = ( + df.loc[(slice(None), 1), (slice(None), ["foo"])] * 5 + ) + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] *= df.loc[ + (slice(None), 1), (slice(None), ["foo"]) + ] + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(df, expected) + + rhs = df_orig.loc[(slice(None), 1), (slice(None), ["foo"])].copy() + rhs.loc[:, ("c", "bah")] = 10 + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] *= rhs + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(df, expected) + + def test_multiindex_label_slicing_with_negative_step(self): + ser = Series( + np.arange(20), MultiIndex.from_product([list("abcde"), np.arange(4)]) + ) + SLC = pd.IndexSlice + + tm.assert_indexing_slices_equivalent(ser, SLC[::-1], SLC[::-1]) + + tm.assert_indexing_slices_equivalent(ser, SLC["d"::-1], SLC[15::-1]) + tm.assert_indexing_slices_equivalent(ser, SLC[("d",)::-1], SLC[15::-1]) + + tm.assert_indexing_slices_equivalent(ser, SLC[:"d":-1], SLC[:11:-1]) + tm.assert_indexing_slices_equivalent(ser, SLC[:("d",):-1], SLC[:11:-1]) + + tm.assert_indexing_slices_equivalent(ser, SLC["d":"b":-1], SLC[15:3:-1]) + tm.assert_indexing_slices_equivalent(ser, SLC[("d",):"b":-1], SLC[15:3:-1]) + tm.assert_indexing_slices_equivalent(ser, SLC["d":("b",):-1], SLC[15:3:-1]) + tm.assert_indexing_slices_equivalent(ser, SLC[("d",):("b",):-1], SLC[15:3:-1]) + tm.assert_indexing_slices_equivalent(ser, SLC["b":"d":-1], SLC[:0]) + + tm.assert_indexing_slices_equivalent(ser, SLC[("c", 2)::-1], SLC[10::-1]) + tm.assert_indexing_slices_equivalent(ser, SLC[:("c", 2):-1], SLC[:9:-1]) + tm.assert_indexing_slices_equivalent( + ser, SLC[("e", 0):("c", 2):-1], SLC[16:9:-1] + ) + + def test_multiindex_slice_first_level(self): + # GH 12697 + freq = ["a", "b", "c", "d"] + idx = MultiIndex.from_product([freq, np.arange(500)]) + df = DataFrame(list(range(2000)), index=idx, columns=["Test"]) + df_slice = df.loc[pd.IndexSlice[:, 30:70], :] + result = df_slice.loc["a"] + expected = DataFrame(list(range(30, 71)), columns=["Test"], index=range(30, 71)) + tm.assert_frame_equal(result, expected) + result = df_slice.loc["d"] + expected = DataFrame( + list(range(1530, 1571)), columns=["Test"], index=range(30, 71) + ) + tm.assert_frame_equal(result, expected) + + def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + s = ymd["A"] + result = s[5:] + expected = s.reindex(s.index[5:]) + tm.assert_series_equal(result, expected) + + exp = ymd["A"].copy() + s[5:] = 0 + exp.values[5:] = 0 + tm.assert_numpy_array_equal(s.values, exp.values) + + result = ymd[5:] + expected = ymd.reindex(s.index[5:]) + tm.assert_frame_equal(result, expected) + + def test_loc_slice_negative_stepsize(self): + # GH#38071 + mi = MultiIndex.from_product([["a", "b"], [0, 1]]) + df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=mi) + result = df.loc[("a", slice(None, None, -1)), :] + expected = DataFrame( + [[3, 4], [1, 2]], index=MultiIndex.from_tuples([("a", 1), ("a", 0)]) + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_sorted.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_sorted.py new file mode 100644 index 0000000..6ba083d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/multiindex/test_sorted.py @@ -0,0 +1,127 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestMultiIndexSorted: + def test_getitem_multilevel_index_tuple_not_sorted(self): + index_columns = list("abc") + df = DataFrame( + [[0, 1, 0, "x"], [0, 0, 1, "y"]], columns=index_columns + ["data"] + ) + df = df.set_index(index_columns) + query_index = df.index[:1] + rs = df.loc[query_index, "data"] + + xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=["a", "b", "c"]) + xp = Series(["x"], index=xp_idx, name="data") + tm.assert_series_equal(rs, xp) + + def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + df = frame.sort_index(level=1).T + + # buglet with int typechecking + result = df.iloc[:, : np.int32(3)] + expected = df.reindex(columns=df.columns[:3]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("key", [None, lambda x: x]) + def test_frame_getitem_not_sorted2(self, key): + # 13431 + df = DataFrame( + { + "col1": ["b", "d", "b", "a"], + "col2": [3, 1, 1, 2], + "data": ["one", "two", "three", "four"], + } + ) + + df2 = df.set_index(["col1", "col2"]) + df2_original = df2.copy() + + with tm.assert_produces_warning(FutureWarning): + return_value = df2.index.set_levels( + ["b", "d", "a"], level="col1", inplace=True + ) + assert return_value is None + with tm.assert_produces_warning(FutureWarning): + return_value = df2.index.set_codes([0, 1, 0, 2], level="col1", inplace=True) + assert return_value is None + assert not df2.index.is_monotonic + + assert df2_original.index.equals(df2.index) + expected = df2.sort_index(key=key) + assert expected.index.is_monotonic + + result = df2.sort_index(level=0, key=key) + assert result.index.is_monotonic + tm.assert_frame_equal(result, expected) + + def test_sort_values_key(self, multiindex_dataframe_random_data): + arrays = [ + ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = zip(*arrays) + index = MultiIndex.from_tuples(tuples) + index = index.sort_values( # sort by third letter + key=lambda x: x.map(lambda entry: entry[2]) + ) + result = DataFrame(range(8), index=index) + + arrays = [ + ["foo", "foo", "bar", "bar", "qux", "qux", "baz", "baz"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = zip(*arrays) + index = MultiIndex.from_tuples(tuples) + expected = DataFrame(range(8), index=index) + + tm.assert_frame_equal(result, expected) + + def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + df = frame.T + df["foo", "four"] = "foo" + + arrays = [np.array(x) for x in zip(*df.columns.values)] + + result = df["foo"] + result2 = df.loc[:, "foo"] + expected = df.reindex(columns=df.columns[arrays[0] == "foo"]) + expected.columns = expected.columns.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + df = df.T + result = df.xs("foo") + result2 = df.loc["foo"] + expected = df.reindex(df.index[arrays[0] == "foo"]) + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + def test_series_getitem_not_sorted(self): + arrays = [ + ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = zip(*arrays) + index = MultiIndex.from_tuples(tuples) + s = Series(np.random.randn(8), index=index) + + arrays = [np.array(x) for x in zip(*index.values)] + + result = s["qux"] + result2 = s.loc["qux"] + expected = s[arrays[0] == "qux"] + expected.index = expected.index.droplevel(0) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_at.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_at.py new file mode 100644 index 0000000..d6be817 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_at.py @@ -0,0 +1,178 @@ +from datetime import ( + datetime, + timezone, +) + +import numpy as np +import pytest + +from pandas import ( + CategoricalDtype, + CategoricalIndex, + DataFrame, + Series, + Timestamp, +) +import pandas._testing as tm + + +def test_at_timezone(): + # https://github.com/pandas-dev/pandas/issues/33544 + result = DataFrame({"foo": [datetime(2000, 1, 1)]}) + result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc) + expected = DataFrame( + {"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object + ) + tm.assert_frame_equal(result, expected) + + +def test_selection_methods_of_assigned_col(): + # GH 29282 + df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) + df2 = DataFrame(data={"c": [7, 8, 9]}, index=[2, 1, 0]) + df["c"] = df2["c"] + df.at[1, "c"] = 11 + result = df + expected = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [9, 11, 7]}) + tm.assert_frame_equal(result, expected) + result = df.at[1, "c"] + assert result == 11 + + result = df["c"] + expected = Series([9, 11, 7], name="c") + tm.assert_series_equal(result, expected) + + result = df[["c"]] + expected = DataFrame({"c": [9, 11, 7]}) + tm.assert_frame_equal(result, expected) + + +class TestAtSetItem: + def test_at_setitem_mixed_index_assignment(self): + # GH#19860 + ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) + ser.at["a"] = 11 + assert ser.iat[0] == 11 + ser.at[1] = 22 + assert ser.iat[3] == 22 + + def test_at_setitem_categorical_missing(self): + df = DataFrame( + index=range(3), columns=range(3), dtype=CategoricalDtype(["foo", "bar"]) + ) + df.at[1, 1] = "foo" + + expected = DataFrame( + [ + [np.nan, np.nan, np.nan], + [np.nan, "foo", np.nan], + [np.nan, np.nan, np.nan], + ], + dtype=CategoricalDtype(["foo", "bar"]), + ) + + tm.assert_frame_equal(df, expected) + + +class TestAtSetItemWithExpansion: + def test_at_setitem_expansion_series_dt64tz_value(self, tz_naive_fixture): + # GH#25506 + ts = Timestamp("2017-08-05 00:00:00+0100", tz=tz_naive_fixture) + result = Series(ts) + result.at[1] = ts + expected = Series([ts, ts]) + tm.assert_series_equal(result, expected) + + +class TestAtWithDuplicates: + def test_at_with_duplicate_axes_requires_scalar_lookup(self): + # GH#33041 check that falling back to loc doesn't allow non-scalar + # args to slip in + + arr = np.random.randn(6).reshape(3, 2) + df = DataFrame(arr, columns=["A", "A"]) + + msg = "Invalid call for scalar access" + with pytest.raises(ValueError, match=msg): + df.at[[1, 2]] + with pytest.raises(ValueError, match=msg): + df.at[1, ["A"]] + with pytest.raises(ValueError, match=msg): + df.at[:, "A"] + + with pytest.raises(ValueError, match=msg): + df.at[[1, 2]] = 1 + with pytest.raises(ValueError, match=msg): + df.at[1, ["A"]] = 1 + with pytest.raises(ValueError, match=msg): + df.at[:, "A"] = 1 + + +class TestAtErrors: + # TODO: De-duplicate/parametrize + # test_at_series_raises_key_error2, test_at_frame_raises_key_error2 + + def test_at_series_raises_key_error(self, indexer_al): + # GH#31724 .at should match .loc + + ser = Series([1, 2, 3], index=[3, 2, 1]) + result = indexer_al(ser)[1] + assert result == 3 + + with pytest.raises(KeyError, match="a"): + indexer_al(ser)["a"] + + def test_at_frame_raises_key_error(self, indexer_al): + # GH#31724 .at should match .loc + + df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) + + result = indexer_al(df)[1, 0] + assert result == 3 + + with pytest.raises(KeyError, match="a"): + indexer_al(df)["a", 0] + + with pytest.raises(KeyError, match="a"): + indexer_al(df)[1, "a"] + + def test_at_series_raises_key_error2(self, indexer_al): + # at should not fallback + # GH#7814 + # GH#31724 .at should match .loc + ser = Series([1, 2, 3], index=list("abc")) + result = indexer_al(ser)["a"] + assert result == 1 + + with pytest.raises(KeyError, match="^0$"): + indexer_al(ser)[0] + + def test_at_frame_raises_key_error2(self, indexer_al): + # GH#31724 .at should match .loc + df = DataFrame({"A": [1, 2, 3]}, index=list("abc")) + result = indexer_al(df)["a", "A"] + assert result == 1 + + with pytest.raises(KeyError, match="^0$"): + indexer_al(df)["a", 0] + + def test_at_getitem_mixed_index_no_fallback(self): + # GH#19860 + ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) + with pytest.raises(KeyError, match="^0$"): + ser.at[0] + with pytest.raises(KeyError, match="^4$"): + ser.at[4] + + def test_at_categorical_integers(self): + # CategoricalIndex with integer categories that don't happen to match + # the Categorical's codes + ci = CategoricalIndex([3, 4]) + + arr = np.arange(4).reshape(2, 2) + frame = DataFrame(arr, index=ci) + + for df in [frame, frame.T]: + for key in [0, 1]: + with pytest.raises(KeyError, match=str(key)): + df.at[key, key] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_categorical.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_categorical.py new file mode 100644 index 0000000..8700438 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_categorical.py @@ -0,0 +1,555 @@ +import re + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_categorical_dtype + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Index, + Interval, + Series, + Timedelta, + Timestamp, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype as CDT + + +class TestCategoricalIndex: + def setup_method(self, method): + + self.df = DataFrame( + { + "A": np.arange(6, dtype="int64"), + }, + index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cab")), name="B"), + ) + self.df2 = DataFrame( + { + "A": np.arange(6, dtype="int64"), + }, + index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"), + ) + + def test_loc_scalar(self): + dtype = CDT(list("cab")) + result = self.df.loc["a"] + bidx = Series(list("aaa"), name="B").astype(dtype) + assert bidx.dtype == dtype + + expected = DataFrame({"A": [0, 1, 5]}, index=Index(bidx)) + tm.assert_frame_equal(result, expected) + + df = self.df.copy() + df.loc["a"] = 20 + bidx2 = Series(list("aabbca"), name="B").astype(dtype) + assert bidx2.dtype == dtype + expected = DataFrame( + { + "A": [20, 20, 2, 3, 4, 20], + }, + index=Index(bidx2), + ) + tm.assert_frame_equal(df, expected) + + # value not in the categories + with pytest.raises(KeyError, match=r"^'d'$"): + df.loc["d"] + + df2 = df.copy() + expected = df2.copy() + expected.index = expected.index.astype(object) + expected.loc["d"] = 10 + df2.loc["d"] = 10 + tm.assert_frame_equal(df2, expected) + + def test_loc_setitem_with_expansion_non_category(self): + # Setting-with-expansion with a new key "d" that is not among caegories + df = self.df + df.loc["a"] = 20 + + # Setting a new row on an existing column + df3 = df.copy() + df3.loc["d", "A"] = 10 + bidx3 = Index(list("aabbcad"), name="B") + expected3 = DataFrame( + { + "A": [20, 20, 2, 3, 4, 20, 10.0], + }, + index=Index(bidx3), + ) + tm.assert_frame_equal(df3, expected3) + + # Settig a new row _and_ new column + df4 = df.copy() + df4.loc["d", "C"] = 10 + expected3 = DataFrame( + { + "A": [20, 20, 2, 3, 4, 20, np.nan], + "C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 10], + }, + index=Index(bidx3), + ) + tm.assert_frame_equal(df4, expected3) + + def test_loc_getitem_scalar_non_category(self): + with pytest.raises(KeyError, match="^1$"): + self.df.loc[1] + + def test_slicing(self): + cat = Series(Categorical([1, 2, 3, 4])) + reverse = cat[::-1] + exp = np.array([4, 3, 2, 1], dtype=np.int64) + tm.assert_numpy_array_equal(reverse.__array__(), exp) + + df = DataFrame({"value": (np.arange(100) + 1).astype("int64")}) + df["D"] = pd.cut(df.value, bins=[0, 25, 50, 75, 100]) + + expected = Series([11, Interval(0, 25)], index=["value", "D"], name=10) + result = df.iloc[10] + tm.assert_series_equal(result, expected) + + expected = DataFrame( + {"value": np.arange(11, 21).astype("int64")}, + index=np.arange(10, 20).astype("int64"), + ) + expected["D"] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100]) + result = df.iloc[10:20] + tm.assert_frame_equal(result, expected) + + expected = Series([9, Interval(0, 25)], index=["value", "D"], name=8) + result = df.loc[8] + tm.assert_series_equal(result, expected) + + def test_slicing_and_getting_ops(self): + + # systematically test the slicing operations: + # for all slicing ops: + # - returning a dataframe + # - returning a column + # - returning a row + # - returning a single value + + cats = Categorical( + ["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"] + ) + idx = Index(["h", "i", "j", "k", "l", "m", "n"]) + values = [1, 2, 3, 4, 5, 6, 7] + df = DataFrame({"cats": cats, "values": values}, index=idx) + + # the expected values + cats2 = Categorical(["b", "c"], categories=["a", "b", "c"]) + idx2 = Index(["j", "k"]) + values2 = [3, 4] + + # 2:4,: | "j":"k",: + exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2) + + # :,"cats" | :,0 + exp_col = Series(cats, index=idx, name="cats") + + # "j",: | 2,: + exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", name="j") + + # "j","cats | 2,0 + exp_val = "b" + + # iloc + # frame + res_df = df.iloc[2:4, :] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"].dtype) + + # row + res_row = df.iloc[2, :] + tm.assert_series_equal(res_row, exp_row) + assert isinstance(res_row["cats"], str) + + # col + res_col = df.iloc[:, 0] + tm.assert_series_equal(res_col, exp_col) + assert is_categorical_dtype(res_col.dtype) + + # single value + res_val = df.iloc[2, 0] + assert res_val == exp_val + + # loc + # frame + res_df = df.loc["j":"k", :] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"].dtype) + + # row + res_row = df.loc["j", :] + tm.assert_series_equal(res_row, exp_row) + assert isinstance(res_row["cats"], str) + + # col + res_col = df.loc[:, "cats"] + tm.assert_series_equal(res_col, exp_col) + assert is_categorical_dtype(res_col.dtype) + + # single value + res_val = df.loc["j", "cats"] + assert res_val == exp_val + + # single value + res_val = df.loc["j", df.columns[0]] + assert res_val == exp_val + + # iat + res_val = df.iat[2, 0] + assert res_val == exp_val + + # at + res_val = df.at["j", "cats"] + assert res_val == exp_val + + # fancy indexing + exp_fancy = df.iloc[[2]] + + res_fancy = df[df["cats"] == "b"] + tm.assert_frame_equal(res_fancy, exp_fancy) + res_fancy = df[df["values"] == 3] + tm.assert_frame_equal(res_fancy, exp_fancy) + + # get_value + res_val = df.at["j", "cats"] + assert res_val == exp_val + + # i : int, slice, or sequence of integers + res_row = df.iloc[2] + tm.assert_series_equal(res_row, exp_row) + assert isinstance(res_row["cats"], str) + + res_df = df.iloc[slice(2, 4)] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"].dtype) + + res_df = df.iloc[[2, 3]] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"].dtype) + + res_col = df.iloc[:, 0] + tm.assert_series_equal(res_col, exp_col) + assert is_categorical_dtype(res_col.dtype) + + res_df = df.iloc[:, slice(0, 2)] + tm.assert_frame_equal(res_df, df) + assert is_categorical_dtype(res_df["cats"].dtype) + + res_df = df.iloc[:, [0, 1]] + tm.assert_frame_equal(res_df, df) + assert is_categorical_dtype(res_df["cats"].dtype) + + def test_slicing_doc_examples(self): + + # GH 7918 + cats = Categorical( + ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c"] + ) + idx = Index(["h", "i", "j", "k", "l", "m", "n"]) + values = [1, 2, 2, 2, 3, 4, 5] + df = DataFrame({"cats": cats, "values": values}, index=idx) + + result = df.iloc[2:4, :] + expected = DataFrame( + { + "cats": Categorical(["b", "b"], categories=["a", "b", "c"]), + "values": [2, 2], + }, + index=["j", "k"], + ) + tm.assert_frame_equal(result, expected) + + result = df.iloc[2:4, :].dtypes + expected = Series(["category", "int64"], ["cats", "values"]) + tm.assert_series_equal(result, expected) + + result = df.loc["h":"j", "cats"] + expected = Series( + Categorical(["a", "b", "b"], categories=["a", "b", "c"]), + index=["h", "i", "j"], + name="cats", + ) + tm.assert_series_equal(result, expected) + + result = df.loc["h":"j", df.columns[0:1]] + expected = DataFrame( + {"cats": Categorical(["a", "b", "b"], categories=["a", "b", "c"])}, + index=["h", "i", "j"], + ) + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_listlike_labels(self): + # list of labels + result = self.df.loc[["c", "a"]] + expected = self.df.iloc[[4, 0, 1, 5]] + tm.assert_frame_equal(result, expected, check_index_type=True) + + def test_loc_getitem_listlike_unused_category(self): + # GH#37901 a label that is in index.categories but not in index + # listlike containing an element in the categories but not in the values + with pytest.raises(KeyError, match=re.escape("['e'] not in index")): + self.df2.loc[["a", "b", "e"]] + + def test_loc_getitem_label_unused_category(self): + # element in the categories but not in the values + with pytest.raises(KeyError, match=r"^'e'$"): + self.df2.loc["e"] + + def test_loc_getitem_non_category(self): + # not all labels in the categories + with pytest.raises(KeyError, match=re.escape("['d'] not in index")): + self.df2.loc[["a", "d"]] + + def test_loc_setitem_expansion_label_unused_category(self): + # assigning with a label that is in the categories but not in the index + df = self.df2.copy() + df.loc["e"] = 20 + result = df.loc[["a", "b", "e"]] + exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B") + expected = DataFrame({"A": [0, 1, 5, 2, 3, 20]}, index=exp_index) + tm.assert_frame_equal(result, expected) + + def test_loc_listlike_dtypes(self): + # GH 11586 + + # unique categories and codes + index = CategoricalIndex(["a", "b", "c"]) + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index) + + # unique slice + res = df.loc[["a", "b"]] + exp_index = CategoricalIndex(["a", "b"], categories=index.categories) + exp = DataFrame({"A": [1, 2], "B": [4, 5]}, index=exp_index) + tm.assert_frame_equal(res, exp, check_index_type=True) + + # duplicated slice + res = df.loc[["a", "a", "b"]] + + exp_index = CategoricalIndex(["a", "a", "b"], categories=index.categories) + exp = DataFrame({"A": [1, 1, 2], "B": [4, 4, 5]}, index=exp_index) + tm.assert_frame_equal(res, exp, check_index_type=True) + + with pytest.raises(KeyError, match=re.escape("['x'] not in index")): + df.loc[["a", "x"]] + + def test_loc_listlike_dtypes_duplicated_categories_and_codes(self): + # duplicated categories and codes + index = CategoricalIndex(["a", "b", "a"]) + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index) + + # unique slice + res = df.loc[["a", "b"]] + exp = DataFrame( + {"A": [1, 3, 2], "B": [4, 6, 5]}, index=CategoricalIndex(["a", "a", "b"]) + ) + tm.assert_frame_equal(res, exp, check_index_type=True) + + # duplicated slice + res = df.loc[["a", "a", "b"]] + exp = DataFrame( + {"A": [1, 3, 1, 3, 2], "B": [4, 6, 4, 6, 5]}, + index=CategoricalIndex(["a", "a", "a", "a", "b"]), + ) + tm.assert_frame_equal(res, exp, check_index_type=True) + + with pytest.raises(KeyError, match=re.escape("['x'] not in index")): + df.loc[["a", "x"]] + + def test_loc_listlike_dtypes_unused_category(self): + # contains unused category + index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde")) + df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index) + + res = df.loc[["a", "b"]] + exp = DataFrame( + {"A": [1, 3, 2], "B": [5, 7, 6]}, + index=CategoricalIndex(["a", "a", "b"], categories=list("abcde")), + ) + tm.assert_frame_equal(res, exp, check_index_type=True) + + # duplicated slice + res = df.loc[["a", "a", "b"]] + exp = DataFrame( + {"A": [1, 3, 1, 3, 2], "B": [5, 7, 5, 7, 6]}, + index=CategoricalIndex(["a", "a", "a", "a", "b"], categories=list("abcde")), + ) + tm.assert_frame_equal(res, exp, check_index_type=True) + + with pytest.raises(KeyError, match=re.escape("['x'] not in index")): + df.loc[["a", "x"]] + + def test_loc_getitem_listlike_unused_category_raises_keyerror(self): + # key that is an *unused* category raises + index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde")) + df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index) + + with pytest.raises(KeyError, match="e"): + # For comparison, check the scalar behavior + df.loc["e"] + + with pytest.raises(KeyError, match=re.escape("['e'] not in index")): + df.loc[["a", "e"]] + + def test_ix_categorical_index(self): + # GH 12531 + df = DataFrame(np.random.randn(3, 3), index=list("ABC"), columns=list("XYZ")) + cdf = df.copy() + cdf.index = CategoricalIndex(df.index) + cdf.columns = CategoricalIndex(df.columns) + + expect = Series(df.loc["A", :], index=cdf.columns, name="A") + tm.assert_series_equal(cdf.loc["A", :], expect) + + expect = Series(df.loc[:, "X"], index=cdf.index, name="X") + tm.assert_series_equal(cdf.loc[:, "X"], expect) + + exp_index = CategoricalIndex(list("AB"), categories=["A", "B", "C"]) + expect = DataFrame(df.loc[["A", "B"], :], columns=cdf.columns, index=exp_index) + tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect) + + exp_columns = CategoricalIndex(list("XY"), categories=["X", "Y", "Z"]) + expect = DataFrame(df.loc[:, ["X", "Y"]], index=cdf.index, columns=exp_columns) + tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect) + + def test_ix_categorical_index_non_unique(self): + + # non-unique + df = DataFrame(np.random.randn(3, 3), index=list("ABA"), columns=list("XYX")) + cdf = df.copy() + cdf.index = CategoricalIndex(df.index) + cdf.columns = CategoricalIndex(df.columns) + + exp_index = CategoricalIndex(list("AA"), categories=["A", "B"]) + expect = DataFrame(df.loc["A", :], columns=cdf.columns, index=exp_index) + tm.assert_frame_equal(cdf.loc["A", :], expect) + + exp_columns = CategoricalIndex(list("XX"), categories=["X", "Y"]) + expect = DataFrame(df.loc[:, "X"], index=cdf.index, columns=exp_columns) + tm.assert_frame_equal(cdf.loc[:, "X"], expect) + + expect = DataFrame( + df.loc[["A", "B"], :], + columns=cdf.columns, + index=CategoricalIndex(list("AAB")), + ) + tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect) + + expect = DataFrame( + df.loc[:, ["X", "Y"]], + index=cdf.index, + columns=CategoricalIndex(list("XXY")), + ) + tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect) + + def test_loc_slice(self): + # GH9748 + msg = ( + "cannot do slice indexing on CategoricalIndex with these " + r"indexers \[1\] of type int" + ) + with pytest.raises(TypeError, match=msg): + self.df.loc[1:5] + + result = self.df.loc["b":"c"] + expected = self.df.iloc[[2, 3, 4]] + tm.assert_frame_equal(result, expected) + + def test_loc_and_at_with_categorical_index(self): + # GH 20629 + df = DataFrame( + [[1, 2], [3, 4], [5, 6]], index=CategoricalIndex(["A", "B", "C"]) + ) + + s = df[0] + assert s.loc["A"] == 1 + assert s.at["A"] == 1 + + assert df.loc["B", 1] == 4 + assert df.at["B", 1] == 4 + + @pytest.mark.parametrize( + "idx_values", + [ + # python types + [1, 2, 3], + [-1, -2, -3], + [1.5, 2.5, 3.5], + [-1.5, -2.5, -3.5], + # numpy int/uint + *(np.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_NUMPY_DTYPES), + # numpy floats + *(np.array([1.5, 2.5, 3.5], dtype=dtyp) for dtyp in tm.FLOAT_NUMPY_DTYPES), + # numpy object + np.array([1, "b", 3.5], dtype=object), + # pandas scalars + [Interval(1, 4), Interval(4, 6), Interval(6, 9)], + [Timestamp(2019, 1, 1), Timestamp(2019, 2, 1), Timestamp(2019, 3, 1)], + [Timedelta(1, "d"), Timedelta(2, "d"), Timedelta(3, "D")], + # pandas Integer arrays + *(pd.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES), + # other pandas arrays + pd.IntervalIndex.from_breaks([1, 4, 6, 9]).array, + pd.date_range("2019-01-01", periods=3).array, + pd.timedelta_range(start="1d", periods=3).array, + ], + ) + def test_loc_getitem_with_non_string_categories(self, idx_values, ordered): + # GH-17569 + cat_idx = CategoricalIndex(idx_values, ordered=ordered) + df = DataFrame({"A": ["foo", "bar", "baz"]}, index=cat_idx) + sl = slice(idx_values[0], idx_values[1]) + + # scalar selection + result = df.loc[idx_values[0]] + expected = Series(["foo"], index=["A"], name=idx_values[0]) + tm.assert_series_equal(result, expected) + + # list selection + result = df.loc[idx_values[:2]] + expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"]) + tm.assert_frame_equal(result, expected) + + # slice selection + result = df.loc[sl] + expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"]) + tm.assert_frame_equal(result, expected) + + # scalar assignment + result = df.copy() + result.loc[idx_values[0]] = "qux" + expected = DataFrame({"A": ["qux", "bar", "baz"]}, index=cat_idx) + tm.assert_frame_equal(result, expected) + + # list assignment + result = df.copy() + result.loc[idx_values[:2], "A"] = ["qux", "qux2"] + expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx) + tm.assert_frame_equal(result, expected) + + # slice assignment + result = df.copy() + result.loc[sl, "A"] = ["qux", "qux2"] + expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx) + tm.assert_frame_equal(result, expected) + + def test_getitem_categorical_with_nan(self): + # GH#41933 + ci = CategoricalIndex(["A", "B", np.nan]) + + ser = Series(range(3), index=ci) + + assert ser[np.nan] == 2 + assert ser.loc[np.nan] == 2 + + df = DataFrame(ser) + assert df.loc[np.nan, 0] == 2 + assert df.loc[np.nan][0] == 2 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_chaining_and_caching.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_chaining_and_caching.py new file mode 100644 index 0000000..4fb90a2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_chaining_and_caching.py @@ -0,0 +1,516 @@ +from string import ascii_letters as letters + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Series, + Timestamp, + date_range, + option_context, +) +import pandas._testing as tm +import pandas.core.common as com + +msg = "A value is trying to be set on a copy of a slice from a DataFrame" + + +def random_text(nobs=100): + # Construct a DataFrame where each row is a random slice from 'letters' + idxs = np.random.randint(len(letters), size=(nobs, 2)) + idxs.sort(axis=1) + strings = [letters[x[0] : x[1]] for x in idxs] + + return DataFrame(strings, columns=["letters"]) + + +class TestCaching: + def test_slice_consolidate_invalidate_item_cache(self): + + # this is chained assignment, but will 'work' + with option_context("chained_assignment", None): + + # #3970 + df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5}) + + # Creates a second float block + df["cc"] = 0.0 + + # caches a reference to the 'bb' series + df["bb"] + + # repr machinery triggers consolidation + repr(df) + + # Assignment to wrong series + df["bb"].iloc[0] = 0.17 + df._clear_item_cache() + tm.assert_almost_equal(df["bb"][0], 0.17) + + @pytest.mark.parametrize("do_ref", [True, False]) + def test_setitem_cache_updating(self, do_ref): + # GH 5424 + cont = ["one", "two", "three", "four", "five", "six", "seven"] + + df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)}) + + # ref the cache + if do_ref: + df.loc[0, "c"] + + # set it + df.loc[7, "c"] = 1 + + assert df.loc[0, "c"] == 0.0 + assert df.loc[7, "c"] == 1.0 + + def test_setitem_cache_updating_slices(self): + # GH 7084 + # not updating cache on series setting with slices + expected = DataFrame( + {"A": [600, 600, 600]}, index=date_range("5/7/2014", "5/9/2014") + ) + out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014")) + df = DataFrame({"C": ["A", "A", "A"], "D": [100, 200, 300]}) + + # loop through df to update out + six = Timestamp("5/7/2014") + eix = Timestamp("5/9/2014") + for ix, row in df.iterrows(): + out.loc[six:eix, row["C"]] = out.loc[six:eix, row["C"]] + row["D"] + + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out["A"], expected["A"]) + + # try via a chain indexing + # this actually works + out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014")) + for ix, row in df.iterrows(): + v = out[row["C"]][six:eix] + row["D"] + out[row["C"]][six:eix] = v + + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out["A"], expected["A"]) + + out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014")) + for ix, row in df.iterrows(): + out.loc[six:eix, row["C"]] += row["D"] + + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out["A"], expected["A"]) + + def test_altering_series_clears_parent_cache(self): + # GH #33675 + df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"]) + ser = df["A"] + + assert "A" in df._item_cache + + # Adding a new entry to ser swaps in a new array, so "A" needs to + # be removed from df._item_cache + ser["c"] = 5 + assert len(ser) == 3 + assert "A" not in df._item_cache + assert df["A"] is not ser + assert len(df["A"]) == 2 + + +class TestChaining: + def test_setitem_chained_setfault(self): + + # GH6026 + data = ["right", "left", "left", "left", "right", "left", "timeout"] + mdata = ["right", "left", "left", "left", "right", "left", "none"] + + df = DataFrame({"response": np.array(data)}) + mask = df.response == "timeout" + df.response[mask] = "none" + tm.assert_frame_equal(df, DataFrame({"response": mdata})) + + recarray = np.rec.fromarrays([data], names=["response"]) + df = DataFrame(recarray) + mask = df.response == "timeout" + df.response[mask] = "none" + tm.assert_frame_equal(df, DataFrame({"response": mdata})) + + df = DataFrame({"response": data, "response1": data}) + mask = df.response == "timeout" + df.response[mask] = "none" + tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data})) + + # GH 6056 + expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) + df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) + df["A"].iloc[0] = np.nan + result = df.head() + tm.assert_frame_equal(result, expected) + + df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) + df.A.iloc[0] = np.nan + result = df.head() + tm.assert_frame_equal(result, expected) + + @pytest.mark.arm_slow + def test_detect_chained_assignment(self): + + pd.set_option("chained_assignment", "raise") + + # work with the chain + expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB")) + df = DataFrame(np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64") + assert df._is_copy is None + + df["A"][0] = -5 + df["A"][1] = -6 + tm.assert_frame_equal(df, expected) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_raises(self, using_array_manager): + + # test with the chaining + df = DataFrame( + { + "A": Series(range(2), dtype="int64"), + "B": np.array(np.arange(2, 4), dtype=np.float64), + } + ) + assert df._is_copy is None + + if not using_array_manager: + with pytest.raises(com.SettingWithCopyError, match=msg): + df["A"][0] = -5 + + with pytest.raises(com.SettingWithCopyError, match=msg): + df["A"][1] = np.nan + + assert df["A"]._is_copy is None + + else: + # INFO(ArrayManager) for ArrayManager it doesn't matter that it's + # a mixed dataframe + df["A"][0] = -5 + df["A"][1] = -6 + expected = DataFrame([[-5, 2], [-6, 3]], columns=list("AB")) + expected["B"] = expected["B"].astype("float64") + tm.assert_frame_equal(df, expected) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_fails(self): + + # Using a copy (the chain), fails + df = DataFrame( + { + "A": Series(range(2), dtype="int64"), + "B": np.array(np.arange(2, 4), dtype=np.float64), + } + ) + + with pytest.raises(com.SettingWithCopyError, match=msg): + df.loc[0]["A"] = -5 + + @pytest.mark.arm_slow + def test_detect_chained_assignment_doc_example(self): + + # Doc example + df = DataFrame( + { + "a": ["one", "one", "two", "three", "two", "one", "six"], + "c": Series(range(7), dtype="int64"), + } + ) + assert df._is_copy is None + + with pytest.raises(com.SettingWithCopyError, match=msg): + indexer = df.a.str.startswith("o") + df[indexer]["c"] = 42 + + @pytest.mark.arm_slow + def test_detect_chained_assignment_object_dtype(self, using_array_manager): + + expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]}) + df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) + + with pytest.raises(com.SettingWithCopyError, match=msg): + df.loc[0]["A"] = 111 + + if not using_array_manager: + with pytest.raises(com.SettingWithCopyError, match=msg): + df["A"][0] = 111 + + df.loc[0, "A"] = 111 + else: + # INFO(ArrayManager) for ArrayManager it doesn't matter that it's + # a mixed dataframe + df["A"][0] = 111 + + tm.assert_frame_equal(df, expected) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_is_copy_pickle(self): + + # gh-5475: Make sure that is_copy is picked up reconstruction + df = DataFrame({"A": [1, 2]}) + assert df._is_copy is None + + with tm.ensure_clean("__tmp__pickle") as path: + df.to_pickle(path) + df2 = pd.read_pickle(path) + df2["B"] = df2["A"] + df2["B"] = df2["A"] + + @pytest.mark.arm_slow + def test_detect_chained_assignment_setting_entire_column(self): + + # gh-5597: a spurious raise as we are setting the entire column here + + df = random_text(100000) + + # Always a copy + x = df.iloc[[0, 1, 2]] + assert x._is_copy is not None + + x = df.iloc[[0, 1, 2, 4]] + assert x._is_copy is not None + + # Explicitly copy + indexer = df.letters.apply(lambda x: len(x) > 10) + df = df.loc[indexer].copy() + + assert df._is_copy is None + df["letters"] = df["letters"].apply(str.lower) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_implicit_take(self): + + # Implicitly take + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + df = df.loc[indexer] + + assert df._is_copy is not None + df["letters"] = df["letters"].apply(str.lower) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_implicit_take2(self): + + # Implicitly take 2 + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + + df = df.loc[indexer] + assert df._is_copy is not None + df.loc[:, "letters"] = df["letters"].apply(str.lower) + + # Should be ok even though it's a copy! + assert df._is_copy is None + + df["letters"] = df["letters"].apply(str.lower) + assert df._is_copy is None + + @pytest.mark.arm_slow + def test_detect_chained_assignment_str(self): + + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_is_copy(self): + + # an identical take, so no copy + df = DataFrame({"a": [1]}).dropna() + assert df._is_copy is None + df["a"] += 1 + + @pytest.mark.arm_slow + def test_detect_chained_assignment_sorting(self): + + df = DataFrame(np.random.randn(10, 4)) + ser = df.iloc[:, 0].sort_values() + + tm.assert_series_equal(ser, df.iloc[:, 0].sort_values()) + tm.assert_series_equal(ser, df[0].sort_values()) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_false_positives(self): + + # see gh-6025: false positives + df = DataFrame({"column1": ["a", "a", "a"], "column2": [4, 8, 9]}) + str(df) + + df["column1"] = df["column1"] + "b" + str(df) + + df = df[df["column2"] != 8] + str(df) + + df["column1"] = df["column1"] + "c" + str(df) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_undefined_column(self): + + # from SO: + # https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc + df = DataFrame(np.arange(0, 9), columns=["count"]) + df["group"] = "b" + + with pytest.raises(com.SettingWithCopyError, match=msg): + df.iloc[0:5]["group"] = "a" + + @pytest.mark.arm_slow + def test_detect_chained_assignment_changing_dtype(self, using_array_manager): + + # Mixed type setting but same dtype & changing dtype + df = DataFrame( + { + "A": date_range("20130101", periods=5), + "B": np.random.randn(5), + "C": np.arange(5, dtype="int64"), + "D": ["a", "b", "c", "d", "e"], + } + ) + + with pytest.raises(com.SettingWithCopyError, match=msg): + df.loc[2]["D"] = "foo" + + with pytest.raises(com.SettingWithCopyError, match=msg): + df.loc[2]["C"] = "foo" + + if not using_array_manager: + with pytest.raises(com.SettingWithCopyError, match=msg): + df["C"][2] = "foo" + else: + # INFO(ArrayManager) for ArrayManager it doesn't matter if it's + # changing the dtype or not + df["C"][2] = "foo" + assert df.loc[2, "C"] == "foo" + + def test_setting_with_copy_bug(self): + + # operating on a copy + df = DataFrame( + {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]} + ) + mask = pd.isna(df.c) + + with pytest.raises(com.SettingWithCopyError, match=msg): + df[["c"]][mask] = df[["b"]][mask] + + def test_setting_with_copy_bug_no_warning(self): + # invalid warning as we are returning a new object + # GH 8730 + df1 = DataFrame({"x": Series(["a", "b", "c"]), "y": Series(["d", "e", "f"])}) + df2 = df1[["x"]] + + # this should not raise + df2["y"] = ["g", "h", "i"] + + def test_detect_chained_assignment_warnings_errors(self): + df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) + with option_context("chained_assignment", "warn"): + with tm.assert_produces_warning(com.SettingWithCopyWarning): + df.loc[0]["A"] = 111 + + with option_context("chained_assignment", "raise"): + with pytest.raises(com.SettingWithCopyError, match=msg): + df.loc[0]["A"] = 111 + + def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self): + # xref gh-13017. + with option_context("chained_assignment", "warn"): + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"]) + + with tm.assert_produces_warning(com.SettingWithCopyWarning): + df.c.loc[df.c > 0] = None + + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"] + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})]) + def test_detect_chained_assignment_warning_stacklevel(self, rhs): + # GH#42570 + df = DataFrame(np.arange(25).reshape(5, 5)) + chained = df.loc[:3] + with option_context("chained_assignment", "warn"): + with tm.assert_produces_warning(com.SettingWithCopyWarning) as t: + chained[2] = rhs + assert t[0].filename == __file__ + + # TODO(ArrayManager) fast_xs with array-like scalars is not yet working + @td.skip_array_manager_not_yet_implemented + def test_chained_getitem_with_lists(self): + + # GH6394 + # Regression in chained getitem indexing with embedded list-like from + # 0.12 + + df = DataFrame({"A": 5 * [np.zeros(3)], "B": 5 * [np.ones(3)]}) + expected = df["A"].iloc[2] + result = df.loc[2, "A"] + tm.assert_numpy_array_equal(result, expected) + result2 = df.iloc[2]["A"] + tm.assert_numpy_array_equal(result2, expected) + result3 = df["A"].loc[2] + tm.assert_numpy_array_equal(result3, expected) + result4 = df["A"].iloc[2] + tm.assert_numpy_array_equal(result4, expected) + + def test_cache_updating(self): + # GH 4939, make sure to update the cache on setitem + + df = tm.makeDataFrame() + df["A"] # cache series + df.loc["Hello Friend"] = df.iloc[0] + assert "Hello Friend" in df["A"].index + assert "Hello Friend" in df["B"].index + + def test_cache_updating2(self): + # 10264 + df = DataFrame( + np.zeros((5, 5), dtype="int64"), + columns=["a", "b", "c", "d", "e"], + index=range(5), + ) + df["f"] = 0 + df.f.values[3] = 1 + + df.f.values[3] = 2 + expected = DataFrame( + np.zeros((5, 6), dtype="int64"), + columns=["a", "b", "c", "d", "e", "f"], + index=range(5), + ) + expected.at[3, "f"] = 2 + tm.assert_frame_equal(df, expected) + expected = Series([0, 0, 0, 2, 0], name="f") + tm.assert_series_equal(df.f, expected) + + def test_iloc_setitem_chained_assignment(self): + # GH#3970 + with option_context("chained_assignment", None): + df = DataFrame({"aa": range(5), "bb": [2.2] * 5}) + df["cc"] = 0.0 + + ck = [True] * len(df) + + df["bb"].iloc[0] = 0.13 + + # GH#3970 this lookup used to break the chained setting to 0.15 + df.iloc[ck] + + df["bb"].iloc[0] = 0.15 + assert df["bb"].iloc[0] == 0.15 + + def test_getitem_loc_assignment_slice_state(self): + # GH 13569 + df = DataFrame({"a": [10, 20, 30]}) + df["a"].loc[4] = 40 + tm.assert_frame_equal(df, DataFrame({"a": [10, 20, 30]})) + tm.assert_series_equal(df["a"], Series([10, 20, 30], name="a")) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_check_indexer.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_check_indexer.py new file mode 100644 index 0000000..975a31b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_check_indexer.py @@ -0,0 +1,105 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.api.indexers import check_array_indexer + + +@pytest.mark.parametrize( + "indexer, expected", + [ + # integer + ([1, 2], np.array([1, 2], dtype=np.intp)), + (np.array([1, 2], dtype="int64"), np.array([1, 2], dtype=np.intp)), + (pd.array([1, 2], dtype="Int32"), np.array([1, 2], dtype=np.intp)), + (pd.Index([1, 2]), np.array([1, 2], dtype=np.intp)), + # boolean + ([True, False, True], np.array([True, False, True], dtype=np.bool_)), + (np.array([True, False, True]), np.array([True, False, True], dtype=np.bool_)), + ( + pd.array([True, False, True], dtype="boolean"), + np.array([True, False, True], dtype=np.bool_), + ), + # other + ([], np.array([], dtype=np.intp)), + ], +) +def test_valid_input(indexer, expected): + arr = np.array([1, 2, 3]) + result = check_array_indexer(arr, indexer) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")] +) +def test_boolean_na_returns_indexer(indexer): + # https://github.com/pandas-dev/pandas/issues/31503 + arr = np.array([1, 2, 3]) + + result = check_array_indexer(arr, indexer) + expected = np.array([True, False, False], dtype=bool) + + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer", + [ + [True, False], + pd.array([True, False], dtype="boolean"), + np.array([True, False], dtype=np.bool_), + ], +) +def test_bool_raise_length(indexer): + arr = np.array([1, 2, 3]) + + msg = "Boolean index has wrong length" + with pytest.raises(IndexError, match=msg): + check_array_indexer(arr, indexer) + + +@pytest.mark.parametrize( + "indexer", [[0, 1, None], pd.array([0, 1, pd.NA], dtype="Int64")] +) +def test_int_raise_missing_values(indexer): + arr = np.array([1, 2, 3]) + + msg = "Cannot index with an integer indexer containing NA values" + with pytest.raises(ValueError, match=msg): + check_array_indexer(arr, indexer) + + +@pytest.mark.parametrize( + "indexer", + [ + [0.0, 1.0], + np.array([1.0, 2.0], dtype="float64"), + np.array([True, False], dtype=object), + pd.Index([True, False], dtype=object), + ], +) +def test_raise_invalid_array_dtypes(indexer): + arr = np.array([1, 2, 3]) + + msg = "arrays used as indices must be of integer or boolean type" + with pytest.raises(IndexError, match=msg): + check_array_indexer(arr, indexer) + + +def test_raise_nullable_string_dtype(nullable_string_dtype): + indexer = pd.array(["a", "b"], dtype=nullable_string_dtype) + arr = np.array([1, 2, 3]) + + msg = "arrays used as indices must be of integer or boolean type" + with pytest.raises(IndexError, match=msg): + check_array_indexer(arr, indexer) + + +@pytest.mark.parametrize("indexer", [None, Ellipsis, slice(0, 3), (None,)]) +def test_pass_through_non_array_likes(indexer): + arr = np.array([1, 2, 3]) + + result = check_array_indexer(arr, indexer) + assert result == indexer diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_coercion.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_coercion.py new file mode 100644 index 0000000..75337cb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_coercion.py @@ -0,0 +1,1207 @@ +from __future__ import annotations + +from datetime import timedelta +import itertools + +import numpy as np +import pytest + +from pandas.compat import ( + IS64, + is_platform_windows, +) + +import pandas as pd +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, +) + +############################################################### +# Index / Series common tests which may trigger dtype coercions +############################################################### + + +@pytest.fixture(autouse=True, scope="class") +def check_comprehensiveness(request): + # Iterate over combination of dtype, method and klass + # and ensure that each are contained within a collected test + cls = request.cls + combos = itertools.product(cls.klasses, cls.dtypes, [cls.method]) + + def has_test(combo): + klass, dtype, method = combo + cls_funcs = request.node.session.items + return any( + klass in x.name and dtype in x.name and method in x.name for x in cls_funcs + ) + + opts = request.config.option + if opts.lf or opts.keyword: + # If we are running with "last-failed" or -k foo, we expect to only + # run a subset of tests. + yield + + else: + + for combo in combos: + if not has_test(combo): + raise AssertionError( + f"test method is not defined: {cls.__name__}, {combo}" + ) + + yield + + +class CoercionBase: + + klasses = ["index", "series"] + dtypes = [ + "object", + "int64", + "float64", + "complex128", + "bool", + "datetime64", + "datetime64tz", + "timedelta64", + "period", + ] + + @property + def method(self): + raise NotImplementedError(self) + + +class TestSetitemCoercion(CoercionBase): + + method = "setitem" + + def _assert_setitem_series_conversion( + self, original_series, loc_value, expected_series, expected_dtype + ): + """test series value's coercion triggered by assignment""" + temp = original_series.copy() + temp[1] = loc_value + tm.assert_series_equal(temp, expected_series) + # check dtype explicitly for sure + assert temp.dtype == expected_dtype + + temp = original_series.copy() + temp.loc[1] = loc_value + tm.assert_series_equal(temp, expected_series) + + @pytest.mark.parametrize( + "val,exp_dtype", [(1, object), (1.1, object), (1 + 1j, object), (True, object)] + ) + def test_setitem_series_object(self, val, exp_dtype): + obj = pd.Series(list("abcd")) + assert obj.dtype == object + + exp = pd.Series(["a", val, "c", "d"]) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", + [(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)], + ) + def test_setitem_series_int64(self, val, exp_dtype): + obj = pd.Series([1, 2, 3, 4]) + assert obj.dtype == np.int64 + + exp = pd.Series([1, val, 3, 4]) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", [(np.int32(1), np.int8), (np.int16(2 ** 9), np.int16)] + ) + def test_setitem_series_int8(self, val, exp_dtype): + obj = pd.Series([1, 2, 3, 4], dtype=np.int8) + assert obj.dtype == np.int8 + + warn = None if exp_dtype is np.int8 else FutureWarning + msg = "Values are too large to be losslessly cast to int8" + with tm.assert_produces_warning(warn, match=msg): + exp = pd.Series([1, val, 3, 4], dtype=np.int8) + + exp = pd.Series([1, val, 3, 4], dtype=exp_dtype) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", + [(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)], + ) + def test_setitem_series_float64(self, val, exp_dtype): + obj = pd.Series([1.1, 2.2, 3.3, 4.4]) + assert obj.dtype == np.float64 + + exp = pd.Series([1.1, val, 3.3, 4.4]) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", + [ + (1, np.complex128), + (1.1, np.complex128), + (1 + 1j, np.complex128), + (True, object), + ], + ) + def test_setitem_series_complex128(self, val, exp_dtype): + obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j]) + assert obj.dtype == np.complex128 + + exp = pd.Series([1 + 1j, val, 3 + 3j, 4 + 4j]) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", + [ + (1, object), + ("3", object), + (3, object), + (1.1, object), + (1 + 1j, object), + (True, np.bool_), + ], + ) + def test_setitem_series_bool(self, val, exp_dtype): + obj = pd.Series([True, False, True, False]) + assert obj.dtype == np.bool_ + + exp = pd.Series([True, val, True, False], dtype=exp_dtype) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", + [(pd.Timestamp("2012-01-01"), "datetime64[ns]"), (1, object), ("x", object)], + ) + def test_setitem_series_datetime64(self, val, exp_dtype): + obj = pd.Series( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + assert obj.dtype == "datetime64[ns]" + + exp = pd.Series( + [ + pd.Timestamp("2011-01-01"), + val, + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", + [ + (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), + (pd.Timestamp("2012-01-01", tz="US/Pacific"), object), + (pd.Timestamp("2012-01-01"), object), + (1, object), + ], + ) + def test_setitem_series_datetime64tz(self, val, exp_dtype): + tz = "US/Eastern" + obj = pd.Series( + [ + pd.Timestamp("2011-01-01", tz=tz), + pd.Timestamp("2011-01-02", tz=tz), + pd.Timestamp("2011-01-03", tz=tz), + pd.Timestamp("2011-01-04", tz=tz), + ] + ) + assert obj.dtype == "datetime64[ns, US/Eastern]" + + exp = pd.Series( + [ + pd.Timestamp("2011-01-01", tz=tz), + val, + # once deprecation is enforced + # val if getattr(val, "tz", None) is None else val.tz_convert(tz), + pd.Timestamp("2011-01-03", tz=tz), + pd.Timestamp("2011-01-04", tz=tz), + ] + ) + warn = None + if getattr(val, "tz", None) is not None and val.tz != obj[0].tz: + warn = FutureWarning + with tm.assert_produces_warning(warn, match="mismatched timezones"): + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", + [(pd.Timedelta("12 day"), "timedelta64[ns]"), (1, object), ("x", object)], + ) + def test_setitem_series_timedelta64(self, val, exp_dtype): + obj = pd.Series( + [ + pd.Timedelta("1 day"), + pd.Timedelta("2 day"), + pd.Timedelta("3 day"), + pd.Timedelta("4 day"), + ] + ) + assert obj.dtype == "timedelta64[ns]" + + exp = pd.Series( + [pd.Timedelta("1 day"), val, pd.Timedelta("3 day"), pd.Timedelta("4 day")] + ) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + def test_setitem_series_no_coercion_from_values_list(self): + # GH35865 - int casted to str when internally calling np.array(ser.values) + ser = pd.Series(["a", 1]) + ser[:] = list(ser.values) + + expected = pd.Series(["a", 1]) + + tm.assert_series_equal(ser, expected) + + def _assert_setitem_index_conversion( + self, original_series, loc_key, expected_index, expected_dtype + ): + """test index's coercion triggered by assign key""" + temp = original_series.copy() + warn = None + if isinstance(loc_key, int) and temp.index.dtype == np.float64: + # GH#33469 + warn = FutureWarning + with tm.assert_produces_warning(warn): + temp[loc_key] = 5 + exp = pd.Series([1, 2, 3, 4, 5], index=expected_index) + tm.assert_series_equal(temp, exp) + # check dtype explicitly for sure + assert temp.index.dtype == expected_dtype + + temp = original_series.copy() + temp.loc[loc_key] = 5 + exp = pd.Series([1, 2, 3, 4, 5], index=expected_index) + tm.assert_series_equal(temp, exp) + # check dtype explicitly for sure + assert temp.index.dtype == expected_dtype + + @pytest.mark.parametrize( + "val,exp_dtype", [("x", object), (5, IndexError), (1.1, object)] + ) + def test_setitem_index_object(self, val, exp_dtype): + obj = pd.Series([1, 2, 3, 4], index=list("abcd")) + assert obj.index.dtype == object + + if exp_dtype is IndexError: + temp = obj.copy() + msg = "index 5 is out of bounds for axis 0 with size 4" + with pytest.raises(exp_dtype, match=msg): + temp[5] = 5 + else: + exp_index = pd.Index(list("abcd") + [val]) + self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", [(5, np.int64), (1.1, np.float64), ("x", object)] + ) + def test_setitem_index_int64(self, val, exp_dtype): + obj = pd.Series([1, 2, 3, 4]) + assert obj.index.dtype == np.int64 + + exp_index = pd.Index([0, 1, 2, 3, val]) + self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", [(5, IndexError), (5.1, np.float64), ("x", object)] + ) + def test_setitem_index_float64(self, val, exp_dtype, request): + obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1]) + assert obj.index.dtype == np.float64 + + if exp_dtype is IndexError: + # float + int -> int + temp = obj.copy() + msg = "index 5 is out of bounds for axis 0 with size 4" + with pytest.raises(exp_dtype, match=msg): + # GH#33469 + depr_msg = "Treating integers as positional" + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + temp[5] = 5 + mark = pytest.mark.xfail(reason="TODO_GH12747 The result must be float") + request.node.add_marker(mark) + exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val]) + self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype) + + @pytest.mark.xfail(reason="Test not implemented") + def test_setitem_series_period(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_setitem_index_complex128(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_setitem_index_bool(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_setitem_index_datetime64(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_setitem_index_datetime64tz(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_setitem_index_timedelta64(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_setitem_index_period(self): + raise NotImplementedError + + +class TestInsertIndexCoercion(CoercionBase): + + klasses = ["index"] + method = "insert" + + def _assert_insert_conversion(self, original, value, expected, expected_dtype): + """test coercion triggered by insert""" + target = original.copy() + res = target.insert(1, value) + tm.assert_index_equal(res, expected) + assert res.dtype == expected_dtype + + @pytest.mark.parametrize( + "insert, coerced_val, coerced_dtype", + [ + (1, 1, object), + (1.1, 1.1, object), + (False, False, object), + ("x", "x", object), + ], + ) + def test_insert_index_object(self, insert, coerced_val, coerced_dtype): + obj = pd.Index(list("abcd")) + assert obj.dtype == object + + exp = pd.Index(["a", coerced_val, "b", "c", "d"]) + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) + + @pytest.mark.parametrize( + "insert, coerced_val, coerced_dtype", + [ + (1, 1, np.int64), + (1.1, 1.1, np.float64), + (False, False, object), # GH#36319 + ("x", "x", object), + ], + ) + def test_insert_index_int64(self, insert, coerced_val, coerced_dtype): + obj = Int64Index([1, 2, 3, 4]) + assert obj.dtype == np.int64 + + exp = pd.Index([1, coerced_val, 2, 3, 4]) + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) + + @pytest.mark.parametrize( + "insert, coerced_val, coerced_dtype", + [ + (1, 1.0, np.float64), + (1.1, 1.1, np.float64), + (False, False, object), # GH#36319 + ("x", "x", object), + ], + ) + def test_insert_index_float64(self, insert, coerced_val, coerced_dtype): + obj = Float64Index([1.0, 2.0, 3.0, 4.0]) + assert obj.dtype == np.float64 + + exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0]) + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [ + (pd.Timestamp("2012-01-01"), "datetime64[ns]"), + (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), + ], + ids=["datetime64", "datetime64tz"], + ) + @pytest.mark.parametrize( + "insert_value", + [pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), 1], + ) + def test_insert_index_datetimes(self, request, fill_val, exp_dtype, insert_value): + + obj = pd.DatetimeIndex( + ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], tz=fill_val.tz + ) + assert obj.dtype == exp_dtype + + exp = pd.DatetimeIndex( + ["2011-01-01", fill_val.date(), "2011-01-02", "2011-01-03", "2011-01-04"], + tz=fill_val.tz, + ) + self._assert_insert_conversion(obj, fill_val, exp, exp_dtype) + + if fill_val.tz: + + # mismatched tzawareness + ts = pd.Timestamp("2012-01-01") + result = obj.insert(1, ts) + expected = obj.astype(object).insert(1, ts) + assert expected.dtype == object + tm.assert_index_equal(result, expected) + + # mismatched tz --> cast to object (could reasonably cast to common tz) + ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = obj.insert(1, ts) + # once deprecation is enforced: + # expected = obj.insert(1, ts.tz_convert(obj.dtype.tz)) + # assert expected.dtype == obj.dtype + expected = obj.astype(object).insert(1, ts) + tm.assert_index_equal(result, expected) + + else: + # mismatched tzawareness + ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") + result = obj.insert(1, ts) + expected = obj.astype(object).insert(1, ts) + assert expected.dtype == object + tm.assert_index_equal(result, expected) + + item = 1 + result = obj.insert(1, item) + expected = obj.astype(object).insert(1, item) + assert expected[1] == item + assert expected.dtype == object + tm.assert_index_equal(result, expected) + + def test_insert_index_timedelta64(self): + obj = pd.TimedeltaIndex(["1 day", "2 day", "3 day", "4 day"]) + assert obj.dtype == "timedelta64[ns]" + + # timedelta64 + timedelta64 => timedelta64 + exp = pd.TimedeltaIndex(["1 day", "10 day", "2 day", "3 day", "4 day"]) + self._assert_insert_conversion( + obj, pd.Timedelta("10 day"), exp, "timedelta64[ns]" + ) + + for item in [pd.Timestamp("2012-01-01"), 1]: + result = obj.insert(1, item) + expected = obj.astype(object).insert(1, item) + assert expected.dtype == object + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "insert, coerced_val, coerced_dtype", + [ + (pd.Period("2012-01", freq="M"), "2012-01", "period[M]"), + (pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01"), object), + (1, 1, object), + ("x", "x", object), + ], + ) + def test_insert_index_period(self, insert, coerced_val, coerced_dtype): + obj = pd.PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq="M") + assert obj.dtype == "period[M]" + + data = [ + pd.Period("2011-01", freq="M"), + coerced_val, + pd.Period("2011-02", freq="M"), + pd.Period("2011-03", freq="M"), + pd.Period("2011-04", freq="M"), + ] + if isinstance(insert, pd.Period): + exp = pd.PeriodIndex(data, freq="M") + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) + + # string that can be parsed to appropriate PeriodDtype + self._assert_insert_conversion(obj, str(insert), exp, coerced_dtype) + + else: + result = obj.insert(0, insert) + expected = obj.astype(object).insert(0, insert) + tm.assert_index_equal(result, expected) + + # TODO: ATM inserting '2012-01-01 00:00:00' when we have obj.freq=="M" + # casts that string to Period[M], not clear that is desirable + if not isinstance(insert, pd.Timestamp): + # non-castable string + result = obj.insert(0, str(insert)) + expected = obj.astype(object).insert(0, str(insert)) + tm.assert_index_equal(result, expected) + + msg = r"Unexpected keyword arguments {'freq'}" + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # passing keywords to pd.Index + pd.Index(data, freq="M") + + @pytest.mark.xfail(reason="Test not implemented") + def test_insert_index_complex128(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_insert_index_bool(self): + raise NotImplementedError + + +class TestWhereCoercion(CoercionBase): + + method = "where" + + def _assert_where_conversion( + self, original, cond, values, expected, expected_dtype + ): + """test coercion triggered by where""" + target = original.copy() + res = target.where(cond, values) + tm.assert_equal(res, expected) + assert res.dtype == expected_dtype + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [(1, object), (1.1, object), (1 + 1j, object), (True, object)], + ) + def test_where_object(self, index_or_series, fill_val, exp_dtype): + klass = index_or_series + obj = klass(list("abcd")) + assert obj.dtype == object + cond = klass([True, False, True, False]) + + if fill_val is True and klass is pd.Series: + ret_val = 1 + else: + ret_val = fill_val + + exp = klass(["a", ret_val, "c", ret_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) + + if fill_val is True: + values = klass([True, False, True, True]) + else: + values = klass(x * fill_val for x in [5, 6, 7, 8]) + + exp = klass(["a", values[1], "c", values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)], + ) + def test_where_int64(self, index_or_series, fill_val, exp_dtype, request): + klass = index_or_series + if klass is pd.Index and exp_dtype is np.complex128: + mark = pytest.mark.xfail(reason="Complex Index not supported") + request.node.add_marker(mark) + + obj = klass([1, 2, 3, 4]) + assert obj.dtype == np.int64 + cond = klass([True, False, True, False]) + + exp = klass([1, fill_val, 3, fill_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) + + if fill_val is True: + values = klass([True, False, True, True]) + else: + values = klass(x * fill_val for x in [5, 6, 7, 8]) + exp = klass([1, values[1], 3, values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize( + "fill_val, exp_dtype", + [(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)], + ) + def test_where_float64(self, index_or_series, fill_val, exp_dtype, request): + klass = index_or_series + if klass is pd.Index and exp_dtype is np.complex128: + mark = pytest.mark.xfail(reason="Complex Index not supported") + request.node.add_marker(mark) + + obj = klass([1.1, 2.2, 3.3, 4.4]) + assert obj.dtype == np.float64 + cond = klass([True, False, True, False]) + + exp = klass([1.1, fill_val, 3.3, fill_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) + + if fill_val is True: + values = klass([True, False, True, True]) + else: + values = klass(x * fill_val for x in [5, 6, 7, 8]) + exp = klass([1.1, values[1], 3.3, values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [ + (1, np.complex128), + (1.1, np.complex128), + (1 + 1j, np.complex128), + (True, object), + ], + ) + def test_where_series_complex128(self, fill_val, exp_dtype): + klass = pd.Series + obj = klass([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j]) + assert obj.dtype == np.complex128 + cond = klass([True, False, True, False]) + + exp = klass([1 + 1j, fill_val, 3 + 3j, fill_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) + + if fill_val is True: + values = klass([True, False, True, True]) + else: + values = klass(x * fill_val for x in [5, 6, 7, 8]) + exp = klass([1 + 1j, values[1], 3 + 3j, values[3]], dtype=exp_dtype) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [(1, object), (1.1, object), (1 + 1j, object), (True, np.bool_)], + ) + def test_where_series_bool(self, fill_val, exp_dtype): + klass = pd.Series + + obj = klass([True, False, True, False]) + assert obj.dtype == np.bool_ + cond = klass([True, False, True, False]) + + exp = klass([True, fill_val, True, fill_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) + + if fill_val is True: + values = klass([True, False, True, True]) + else: + values = klass(x * fill_val for x in [5, 6, 7, 8]) + exp = klass([True, values[1], True, values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [ + (pd.Timestamp("2012-01-01"), "datetime64[ns]"), + (pd.Timestamp("2012-01-01", tz="US/Eastern"), object), + ], + ids=["datetime64", "datetime64tz"], + ) + def test_where_series_datetime64(self, fill_val, exp_dtype): + obj = pd.Series( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + assert obj.dtype == "datetime64[ns]" + cond = pd.Series([True, False, True, False]) + + exp = pd.Series( + [pd.Timestamp("2011-01-01"), fill_val, pd.Timestamp("2011-01-03"), fill_val] + ) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) + + values = pd.Series(pd.date_range(fill_val, periods=4)) + if fill_val.tz: + exp = pd.Series( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2012-01-02 00:00", tz="US/Eastern"), + pd.Timestamp("2011-01-03"), + pd.Timestamp("2012-01-04 00:00", tz="US/Eastern"), + ] + ) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + exp = pd.Series( + [ + pd.Timestamp("2011-01-01"), + values[1], + pd.Timestamp("2011-01-03"), + values[3], + ] + ) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize( + "fill_val", + [ + pd.Timestamp("2012-01-01"), + pd.Timestamp("2012-01-01").to_datetime64(), + pd.Timestamp("2012-01-01").to_pydatetime(), + ], + ) + def test_where_index_datetime(self, fill_val): + exp_dtype = "datetime64[ns]" + obj = pd.Index( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + assert obj.dtype == "datetime64[ns]" + cond = pd.Index([True, False, True, False]) + + result = obj.where(cond, fill_val) + expected = pd.DatetimeIndex([obj[0], fill_val, obj[2], fill_val]) + tm.assert_index_equal(result, expected) + + values = pd.Index(pd.date_range(fill_val, periods=4)) + exp = pd.Index( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2012-01-02"), + pd.Timestamp("2011-01-03"), + pd.Timestamp("2012-01-04"), + ] + ) + + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + def test_where_index_datetime64tz(self): + fill_val = pd.Timestamp("2012-01-01", tz="US/Eastern") + exp_dtype = object + obj = pd.Index( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + assert obj.dtype == "datetime64[ns]" + cond = pd.Index([True, False, True, False]) + + res = obj.where(cond, fill_val) + expected = pd.Index([obj[0], fill_val, obj[2], fill_val], dtype=object) + tm.assert_index_equal(res, expected) + + values = pd.Index(pd.date_range(fill_val, periods=4)) + exp = pd.Index( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2012-01-02", tz="US/Eastern"), + pd.Timestamp("2011-01-03"), + pd.Timestamp("2012-01-04", tz="US/Eastern"), + ], + dtype=exp_dtype, + ) + + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.xfail(reason="Test not implemented") + def test_where_index_complex128(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_where_index_bool(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_where_series_timedelta64(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_where_series_period(self): + raise NotImplementedError + + @pytest.mark.parametrize( + "value", [pd.Timedelta(days=9), timedelta(days=9), np.timedelta64(9, "D")] + ) + def test_where_index_timedelta64(self, value): + tdi = pd.timedelta_range("1 Day", periods=4) + cond = np.array([True, False, False, True]) + + expected = pd.TimedeltaIndex(["1 Day", value, value, "4 Days"]) + result = tdi.where(cond, value) + tm.assert_index_equal(result, expected) + + # wrong-dtyped NaT + dtnat = np.datetime64("NaT", "ns") + expected = pd.Index([tdi[0], dtnat, dtnat, tdi[3]], dtype=object) + assert expected[1] is dtnat + + result = tdi.where(cond, dtnat) + tm.assert_index_equal(result, expected) + + def test_where_index_period(self): + dti = pd.date_range("2016-01-01", periods=3, freq="QS") + pi = dti.to_period("Q") + + cond = np.array([False, True, False]) + + # Passinga valid scalar + value = pi[-1] + pi.freq * 10 + expected = pd.PeriodIndex([value, pi[1], value]) + result = pi.where(cond, value) + tm.assert_index_equal(result, expected) + + # Case passing ndarray[object] of Periods + other = np.asarray(pi + pi.freq * 10, dtype=object) + result = pi.where(cond, other) + expected = pd.PeriodIndex([other[0], pi[1], other[2]]) + tm.assert_index_equal(result, expected) + + # Passing a mismatched scalar -> casts to object + td = pd.Timedelta(days=4) + expected = pd.Index([td, pi[1], td], dtype=object) + result = pi.where(cond, td) + tm.assert_index_equal(result, expected) + + per = pd.Period("2020-04-21", "D") + expected = pd.Index([per, pi[1], per], dtype=object) + result = pi.where(cond, per) + tm.assert_index_equal(result, expected) + + +class TestFillnaSeriesCoercion(CoercionBase): + + # not indexing, but place here for consistency + + method = "fillna" + + @pytest.mark.xfail(reason="Test not implemented") + def test_has_comprehensive_tests(self): + raise NotImplementedError + + def _assert_fillna_conversion(self, original, value, expected, expected_dtype): + """test coercion triggered by fillna""" + target = original.copy() + res = target.fillna(value) + tm.assert_equal(res, expected) + assert res.dtype == expected_dtype + + @pytest.mark.parametrize( + "fill_val, fill_dtype", + [(1, object), (1.1, object), (1 + 1j, object), (True, object)], + ) + def test_fillna_object(self, index_or_series, fill_val, fill_dtype): + klass = index_or_series + obj = klass(["a", np.nan, "c", "d"]) + assert obj.dtype == object + + exp = klass(["a", fill_val, "c", "d"]) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize( + "fill_val,fill_dtype", + [(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)], + ) + def test_fillna_float64(self, index_or_series, fill_val, fill_dtype): + klass = index_or_series + obj = klass([1.1, np.nan, 3.3, 4.4]) + assert obj.dtype == np.float64 + + exp = klass([1.1, fill_val, 3.3, 4.4]) + # float + complex -> we don't support a complex Index + # complex for Series, + # object for Index + if fill_dtype == np.complex128 and klass == pd.Index: + fill_dtype = object + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize( + "fill_val,fill_dtype", + [ + (1, np.complex128), + (1.1, np.complex128), + (1 + 1j, np.complex128), + (True, object), + ], + ) + def test_fillna_series_complex128(self, fill_val, fill_dtype): + obj = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j]) + assert obj.dtype == np.complex128 + + exp = pd.Series([1 + 1j, fill_val, 3 + 3j, 4 + 4j]) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize( + "fill_val,fill_dtype", + [ + (pd.Timestamp("2012-01-01"), "datetime64[ns]"), + (pd.Timestamp("2012-01-01", tz="US/Eastern"), object), + (1, object), + ("x", object), + ], + ids=["datetime64", "datetime64tz", "object", "object"], + ) + def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype): + klass = index_or_series + obj = klass( + [ + pd.Timestamp("2011-01-01"), + pd.NaT, + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + assert obj.dtype == "datetime64[ns]" + + exp = klass( + [ + pd.Timestamp("2011-01-01"), + fill_val, + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize( + "fill_val,fill_dtype", + [ + (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), + (pd.Timestamp("2012-01-01"), object), + (pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), object), + (1, object), + ("x", object), + ], + ) + def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype): + klass = index_or_series + tz = "US/Eastern" + + obj = klass( + [ + pd.Timestamp("2011-01-01", tz=tz), + pd.NaT, + pd.Timestamp("2011-01-03", tz=tz), + pd.Timestamp("2011-01-04", tz=tz), + ] + ) + assert obj.dtype == "datetime64[ns, US/Eastern]" + + exp = klass( + [ + pd.Timestamp("2011-01-01", tz=tz), + fill_val, + # Once deprecation is enforced, this becomes: + # fill_val.tz_convert(tz) if getattr(fill_val, "tz", None) + # is not None else fill_val, + pd.Timestamp("2011-01-03", tz=tz), + pd.Timestamp("2011-01-04", tz=tz), + ] + ) + warn = None + if getattr(fill_val, "tz", None) is not None and fill_val.tz != obj[0].tz: + warn = FutureWarning + with tm.assert_produces_warning(warn, match="mismatched timezone"): + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_series_int64(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_index_int64(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_series_bool(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_index_bool(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_series_timedelta64(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_series_period(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_index_timedelta64(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_index_period(self): + raise NotImplementedError + + +class TestReplaceSeriesCoercion(CoercionBase): + + klasses = ["series"] + method = "replace" + + rep: dict[str, list] = {} + rep["object"] = ["a", "b"] + rep["int64"] = [4, 5] + rep["float64"] = [1.1, 2.2] + rep["complex128"] = [1 + 1j, 2 + 2j] + rep["bool"] = [True, False] + rep["datetime64[ns]"] = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-03")] + + for tz in ["UTC", "US/Eastern"]: + # to test tz => different tz replacement + key = f"datetime64[ns, {tz}]" + rep[key] = [ + pd.Timestamp("2011-01-01", tz=tz), + pd.Timestamp("2011-01-03", tz=tz), + ] + + rep["timedelta64[ns]"] = [pd.Timedelta("1 day"), pd.Timedelta("2 day")] + + @pytest.fixture(params=["dict", "series"]) + def how(self, request): + return request.param + + @pytest.fixture( + params=[ + "object", + "int64", + "float64", + "complex128", + "bool", + "datetime64[ns]", + "datetime64[ns, UTC]", + "datetime64[ns, US/Eastern]", + "timedelta64[ns]", + ] + ) + def from_key(self, request): + return request.param + + @pytest.fixture( + params=[ + "object", + "int64", + "float64", + "complex128", + "bool", + "datetime64[ns]", + "datetime64[ns, UTC]", + "datetime64[ns, US/Eastern]", + "timedelta64[ns]", + ], + ids=[ + "object", + "int64", + "float64", + "complex128", + "bool", + "datetime64", + "datetime64tz", + "datetime64tz", + "timedelta64", + ], + ) + def to_key(self, request): + return request.param + + @pytest.fixture + def replacer(self, how, from_key, to_key): + """ + Object we will pass to `Series.replace` + """ + if how == "dict": + replacer = dict(zip(self.rep[from_key], self.rep[to_key])) + elif how == "series": + replacer = pd.Series(self.rep[to_key], index=self.rep[from_key]) + else: + raise ValueError + return replacer + + def test_replace_series(self, how, to_key, from_key, replacer): + index = pd.Index([3, 4], name="xxx") + obj = pd.Series(self.rep[from_key], index=index, name="yyy") + assert obj.dtype == from_key + + if from_key.startswith("datetime") and to_key.startswith("datetime"): + # tested below + return + elif from_key in ["datetime64[ns, US/Eastern]", "datetime64[ns, UTC]"]: + # tested below + return + + result = obj.replace(replacer) + + if (from_key == "float64" and to_key in ("int64")) or ( + from_key == "complex128" and to_key in ("int64", "float64") + ): + + if not IS64 or is_platform_windows(): + pytest.skip(f"32-bit platform buggy: {from_key} -> {to_key}") + + # Expected: do not downcast by replacement + exp = pd.Series(self.rep[to_key], index=index, name="yyy", dtype=from_key) + + else: + exp = pd.Series(self.rep[to_key], index=index, name="yyy") + assert exp.dtype == to_key + + tm.assert_series_equal(result, exp) + + @pytest.mark.parametrize( + "to_key", + ["timedelta64[ns]", "bool", "object", "complex128", "float64", "int64"], + indirect=True, + ) + @pytest.mark.parametrize( + "from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True + ) + def test_replace_series_datetime_tz(self, how, to_key, from_key, replacer): + index = pd.Index([3, 4], name="xyz") + obj = pd.Series(self.rep[from_key], index=index, name="yyy") + assert obj.dtype == from_key + + result = obj.replace(replacer) + + exp = pd.Series(self.rep[to_key], index=index, name="yyy") + assert exp.dtype == to_key + + tm.assert_series_equal(result, exp) + + @pytest.mark.parametrize( + "to_key", + ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], + indirect=True, + ) + @pytest.mark.parametrize( + "from_key", + ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], + indirect=True, + ) + def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer): + index = pd.Index([3, 4], name="xyz") + obj = pd.Series(self.rep[from_key], index=index, name="yyy") + assert obj.dtype == from_key + + warn = None + rep_ser = pd.Series(replacer) + if ( + isinstance(obj.dtype, pd.DatetimeTZDtype) + and isinstance(rep_ser.dtype, pd.DatetimeTZDtype) + and obj.dtype != rep_ser.dtype + ): + # mismatched tz DatetimeArray behavior will change to cast + # for setitem-like methods with mismatched tzs GH#44940 + warn = FutureWarning + + msg = "explicitly cast to object" + with tm.assert_produces_warning(warn, match=msg): + result = obj.replace(replacer) + + exp = pd.Series(self.rep[to_key], index=index, name="yyy") + assert exp.dtype == to_key + + tm.assert_series_equal(result, exp) + + @pytest.mark.xfail(reason="Test not implemented") + def test_replace_series_period(self): + raise NotImplementedError diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_datetime.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_datetime.py new file mode 100644 index 0000000..332ab02 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_datetime.py @@ -0,0 +1,154 @@ +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestDatetimeIndex: + def test_indexing_with_datetime_tz(self): + + # GH#8260 + # support datetime64 with tz + + idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") + dr = date_range("20130110", periods=3) + df = DataFrame({"A": idx, "B": dr}) + df["C"] = idx + df.iloc[1, 1] = pd.NaT + df.iloc[1, 2] = pd.NaT + + expected = Series( + [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], + index=list("ABC"), + dtype="object", + name=1, + ) + + # indexing + result = df.iloc[1] + tm.assert_series_equal(result, expected) + result = df.loc[1] + tm.assert_series_equal(result, expected) + + def test_indexing_fast_xs(self): + # indexing - fast_xs + df = DataFrame({"a": date_range("2014-01-01", periods=10, tz="UTC")}) + result = df.iloc[5] + expected = Series( + [Timestamp("2014-01-06 00:00:00+0000", tz="UTC")], index=["a"], name=5 + ) + tm.assert_series_equal(result, expected) + + result = df.loc[5] + tm.assert_series_equal(result, expected) + + # indexing - boolean + result = df[df.a > df.a[3]] + expected = df.iloc[4:] + tm.assert_frame_equal(result, expected) + + def test_consistency_with_tz_aware_scalar(self): + # xef gh-12938 + # various ways of indexing the same tz-aware scalar + df = Series([Timestamp("2016-03-30 14:35:25", tz="Europe/Brussels")]).to_frame() + + df = pd.concat([df, df]).reset_index(drop=True) + expected = Timestamp("2016-03-30 14:35:25+0200", tz="Europe/Brussels") + + result = df[0][0] + assert result == expected + + result = df.iloc[0, 0] + assert result == expected + + result = df.loc[0, 0] + assert result == expected + + result = df.iat[0, 0] + assert result == expected + + result = df.at[0, 0] + assert result == expected + + result = df[0].loc[0] + assert result == expected + + result = df[0].at[0] + assert result == expected + + def test_indexing_with_datetimeindex_tz(self, indexer_sl): + + # GH 12050 + # indexing on a series with a datetimeindex with tz + index = date_range("2015-01-01", periods=2, tz="utc") + + ser = Series(range(2), index=index, dtype="int64") + + # list-like indexing + + for sel in (index, list(index)): + # getitem + result = indexer_sl(ser)[sel] + expected = ser.copy() + if sel is not index: + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(result, expected) + + # setitem + result = ser.copy() + indexer_sl(result)[sel] = 1 + expected = Series(1, index=index) + tm.assert_series_equal(result, expected) + + # single element indexing + + # getitem + assert indexer_sl(ser)[index[1]] == 1 + + # setitem + result = ser.copy() + indexer_sl(result)[index[1]] = 5 + expected = Series([0, 5], index=index) + tm.assert_series_equal(result, expected) + + def test_nanosecond_getitem_setitem_with_tz(self): + # GH 11679 + data = ["2016-06-28 08:30:00.123456789"] + index = pd.DatetimeIndex(data, dtype="datetime64[ns, America/Chicago]") + df = DataFrame({"a": [10]}, index=index) + result = df.loc[df.index[0]] + expected = Series(10, index=["a"], name=df.index[0]) + tm.assert_series_equal(result, expected) + + result = df.copy() + result.loc[df.index[0], "a"] = -1 + expected = DataFrame(-1, index=index, columns=["a"]) + tm.assert_frame_equal(result, expected) + + def test_getitem_str_slice_millisecond_resolution(self, frame_or_series): + # GH#33589 + + keys = [ + "2017-10-25T16:25:04.151", + "2017-10-25T16:25:04.252", + "2017-10-25T16:50:05.237", + "2017-10-25T16:50:05.238", + ] + obj = frame_or_series( + [1, 2, 3, 4], + index=[Timestamp(x) for x in keys], + ) + result = obj[keys[1] : keys[2]] + expected = frame_or_series( + [2, 3], + index=[ + Timestamp(keys[1]), + Timestamp(keys[2]), + ], + ) + tm.assert_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_floats.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_floats.py new file mode 100644 index 0000000..902bd94 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_floats.py @@ -0,0 +1,697 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + RangeIndex, + Series, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, +) + + +def gen_obj(klass, index): + if klass is Series: + obj = Series(np.arange(len(index)), index=index) + else: + obj = DataFrame( + np.random.randn(len(index), len(index)), index=index, columns=index + ) + return obj + + +class TestFloatIndexers: + def check(self, result, original, indexer, getitem): + """ + comparator for results + we need to take care if we are indexing on a + Series or a frame + """ + if isinstance(original, Series): + expected = original.iloc[indexer] + else: + if getitem: + expected = original.iloc[:, indexer] + else: + expected = original.iloc[indexer] + + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize( + "index_func", + [ + tm.makeStringIndex, + tm.makeUnicodeIndex, + tm.makeCategoricalIndex, + tm.makeDateIndex, + tm.makeTimedeltaIndex, + tm.makePeriodIndex, + ], + ) + def test_scalar_non_numeric(self, index_func, frame_or_series, indexer_sl): + + # GH 4892 + # float_indexers should raise exceptions + # on appropriate Index types & accessors + + i = index_func(5) + s = gen_obj(frame_or_series, i) + + # getting + with pytest.raises(KeyError, match="^3.0$"): + indexer_sl(s)[3.0] + + # contains + assert 3.0 not in s + + s2 = s.copy() + indexer_sl(s2)[3.0] = 10 + + if indexer_sl is tm.setitem: + assert 3.0 in s2.axes[-1] + elif indexer_sl is tm.loc: + assert 3.0 in s2.axes[0] + else: + assert 3.0 not in s2.axes[0] + assert 3.0 not in s2.axes[-1] + + @pytest.mark.parametrize( + "index_func", + [ + tm.makeStringIndex, + tm.makeUnicodeIndex, + tm.makeCategoricalIndex, + tm.makeDateIndex, + tm.makeTimedeltaIndex, + tm.makePeriodIndex, + ], + ) + def test_scalar_non_numeric_series_fallback(self, index_func): + # fallsback to position selection, series only + i = index_func(5) + s = Series(np.arange(len(i)), index=i) + s[3] + with pytest.raises(KeyError, match="^3.0$"): + s[3.0] + + def test_scalar_with_mixed(self, indexer_sl): + + s2 = Series([1, 2, 3], index=["a", "b", "c"]) + s3 = Series([1, 2, 3], index=["a", "b", 1.5]) + + # lookup in a pure string index with an invalid indexer + + with pytest.raises(KeyError, match="^1.0$"): + indexer_sl(s2)[1.0] + + with pytest.raises(KeyError, match=r"^1\.0$"): + indexer_sl(s2)[1.0] + + result = indexer_sl(s2)["b"] + expected = 2 + assert result == expected + + # mixed index so we have label + # indexing + with pytest.raises(KeyError, match="^1.0$"): + indexer_sl(s3)[1.0] + + if indexer_sl is not tm.loc: + # __getitem__ falls back to positional + result = s3[1] + expected = 2 + assert result == expected + + with pytest.raises(KeyError, match=r"^1\.0$"): + indexer_sl(s3)[1.0] + + result = indexer_sl(s3)[1.5] + expected = 3 + assert result == expected + + @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) + def test_scalar_integer(self, index_func, frame_or_series, indexer_sl): + getitem = indexer_sl is not tm.loc + + # test how scalar float indexers work on int indexes + + # integer index + i = index_func(5) + obj = gen_obj(frame_or_series, i) + + # coerce to equal int + + result = indexer_sl(obj)[3.0] + self.check(result, obj, 3, getitem) + + if isinstance(obj, Series): + + def compare(x, y): + assert x == y + + expected = 100 + else: + compare = tm.assert_series_equal + if getitem: + expected = Series(100, index=range(len(obj)), name=3) + else: + expected = Series(100.0, index=range(len(obj)), name=3) + + s2 = obj.copy() + indexer_sl(s2)[3.0] = 100 + + result = indexer_sl(s2)[3.0] + compare(result, expected) + + result = indexer_sl(s2)[3] + compare(result, expected) + + @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) + def test_scalar_integer_contains_float(self, index_func, frame_or_series): + # contains + # integer index + index = index_func(5) + obj = gen_obj(frame_or_series, index) + + # coerce to equal int + assert 3.0 in obj + + def test_scalar_float(self, frame_or_series): + + # scalar float indexers work on a float index + index = Index(np.arange(5.0)) + s = gen_obj(frame_or_series, index) + + # assert all operations except for iloc are ok + indexer = index[3] + for idxr in [tm.loc, tm.setitem]: + getitem = idxr is not tm.loc + + # getting + result = idxr(s)[indexer] + self.check(result, s, 3, getitem) + + # setting + s2 = s.copy() + + result = idxr(s2)[indexer] + self.check(result, s, 3, getitem) + + # random float is a KeyError + with pytest.raises(KeyError, match=r"^3\.5$"): + idxr(s)[3.5] + + # contains + assert 3.0 in s + + # iloc succeeds with an integer + expected = s.iloc[3] + s2 = s.copy() + + s2.iloc[3] = expected + result = s2.iloc[3] + self.check(result, s, 3, False) + + @pytest.mark.parametrize( + "index_func", + [ + tm.makeStringIndex, + tm.makeUnicodeIndex, + tm.makeDateIndex, + tm.makeTimedeltaIndex, + tm.makePeriodIndex, + ], + ) + @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) + def test_slice_non_numeric(self, index_func, idx, frame_or_series, indexer_sli): + + # GH 4892 + # float_indexers should raise exceptions + # on appropriate Index types & accessors + + index = index_func(5) + s = gen_obj(frame_or_series, index) + + # getitem + if indexer_sli is tm.iloc: + msg = ( + "cannot do positional indexing " + fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" + ) + else: + msg = ( + "cannot do slice indexing " + fr"on {type(index).__name__} with these indexers " + r"\[(3|4)(\.0)?\] " + r"of type (float|int)" + ) + with pytest.raises(TypeError, match=msg): + indexer_sli(s)[idx] + + # setitem + if indexer_sli is tm.iloc: + # otherwise we keep the same message as above + msg = "slice indices must be integers or None or have an __index__ method" + with pytest.raises(TypeError, match=msg): + indexer_sli(s)[idx] = 0 + + def test_slice_integer(self): + + # same as above, but for Integer based indexes + # these coerce to a like integer + # oob indicates if we are out of bounds + # of positional indexing + for index, oob in [ + (Int64Index(range(5)), False), + (RangeIndex(5), False), + (Int64Index(range(5)) + 10, True), + ]: + + # s is an in-range index + s = Series(range(5), index=index) + + # getitem + for idx in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: + + result = s.loc[idx] + + # these are all label indexing + # except getitem which is positional + # empty + if oob: + indexer = slice(0, 0) + else: + indexer = slice(3, 5) + self.check(result, s, indexer, False) + + # getitem out-of-bounds + for idx in [slice(-6, 6), slice(-6.0, 6.0)]: + + result = s.loc[idx] + + # these are all label indexing + # except getitem which is positional + # empty + if oob: + indexer = slice(0, 0) + else: + indexer = slice(-6, 6) + self.check(result, s, indexer, False) + + # positional indexing + msg = ( + "cannot do slice indexing " + fr"on {type(index).__name__} with these indexers \[-6\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[slice(-6.0, 6.0)] + + # getitem odd floats + for idx, res1 in [ + (slice(2.5, 4), slice(3, 5)), + (slice(2, 3.5), slice(2, 4)), + (slice(2.5, 3.5), slice(3, 4)), + ]: + + result = s.loc[idx] + if oob: + res = slice(0, 0) + else: + res = res1 + + self.check(result, s, res, False) + + # positional indexing + msg = ( + "cannot do slice indexing " + fr"on {type(index).__name__} with these indexers \[(2|3)\.5\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[idx] + + @pytest.mark.parametrize("idx", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)]) + def test_integer_positional_indexing(self, idx): + """make sure that we are raising on positional indexing + w.r.t. an integer index + """ + s = Series(range(2, 6), index=range(2, 6)) + + result = s[2:4] + expected = s.iloc[2:4] + tm.assert_series_equal(result, expected) + + klass = RangeIndex + msg = ( + "cannot do (slice|positional) indexing " + fr"on {klass.__name__} with these indexers \[(2|4)\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[idx] + with pytest.raises(TypeError, match=msg): + s.iloc[idx] + + @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) + def test_slice_integer_frame_getitem(self, index_func): + + # similar to above, but on the getitem dim (of a DataFrame) + index = index_func(5) + + s = DataFrame(np.random.randn(5, 2), index=index) + + # getitem + for idx in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]: + + result = s.loc[idx] + indexer = slice(0, 2) + self.check(result, s, indexer, False) + + # positional indexing + msg = ( + "cannot do slice indexing " + fr"on {type(index).__name__} with these indexers \[(0|1)\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[idx] + + # getitem out-of-bounds + for idx in [slice(-10, 10), slice(-10.0, 10.0)]: + + result = s.loc[idx] + self.check(result, s, slice(-10, 10), True) + + # positional indexing + msg = ( + "cannot do slice indexing " + fr"on {type(index).__name__} with these indexers \[-10\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[slice(-10.0, 10.0)] + + # getitem odd floats + for idx, res in [ + (slice(0.5, 1), slice(1, 2)), + (slice(0, 0.5), slice(0, 1)), + (slice(0.5, 1.5), slice(1, 2)), + ]: + + result = s.loc[idx] + self.check(result, s, res, False) + + # positional indexing + msg = ( + "cannot do slice indexing " + fr"on {type(index).__name__} with these indexers \[0\.5\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[idx] + + @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) + @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) + def test_float_slice_getitem_with_integer_index_raises(self, idx, index_func): + + # similar to above, but on the getitem dim (of a DataFrame) + index = index_func(5) + + s = DataFrame(np.random.randn(5, 2), index=index) + + # setitem + sc = s.copy() + sc.loc[idx] = 0 + result = sc.loc[idx].values.ravel() + assert (result == 0).all() + + # positional indexing + msg = ( + "cannot do slice indexing " + fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[idx] = 0 + + with pytest.raises(TypeError, match=msg): + s[idx] + + @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) + def test_slice_float(self, idx, frame_or_series, indexer_sl): + + # same as above, but for floats + index = Index(np.arange(5.0)) + 0.1 + s = gen_obj(frame_or_series, index) + + expected = s.iloc[3:4] + + # getitem + result = indexer_sl(s)[idx] + assert isinstance(result, type(s)) + tm.assert_equal(result, expected) + + # setitem + s2 = s.copy() + indexer_sl(s2)[idx] = 0 + result = indexer_sl(s2)[idx].values.ravel() + assert (result == 0).all() + + def test_floating_index_doc_example(self): + + index = Index([1.5, 2, 3, 4.5, 5]) + s = Series(range(5), index=index) + assert s[3] == 2 + assert s.loc[3] == 2 + assert s.iloc[3] == 3 + + def test_floating_misc(self, indexer_sl): + + # related 236 + # scalar/slicing of a float index + s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64) + + # label based slicing + result = indexer_sl(s)[1.0:3.0] + expected = Series(1, index=[2.5]) + tm.assert_series_equal(result, expected) + + # exact indexing when found + + result = indexer_sl(s)[5.0] + assert result == 2 + + result = indexer_sl(s)[5] + assert result == 2 + + # value not found (and no fallbacking at all) + + # scalar integers + with pytest.raises(KeyError, match=r"^4$"): + indexer_sl(s)[4] + + # fancy floats/integers create the correct entry (as nan) + # fancy tests + expected = Series([2, 0], index=Float64Index([5.0, 0.0])) + for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float + tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected) + + expected = Series([2, 0], index=Index([5, 0], dtype="float64")) + for fancy_idx in [[5, 0], np.array([5, 0])]: + tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected) + + # all should return the same as we are slicing 'the same' + result1 = indexer_sl(s)[2:5] + result2 = indexer_sl(s)[2.0:5.0] + result3 = indexer_sl(s)[2.0:5] + result4 = indexer_sl(s)[2.1:5] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + tm.assert_series_equal(result1, result4) + + expected = Series([1, 2], index=[2.5, 5.0]) + result = indexer_sl(s)[2:5] + + tm.assert_series_equal(result, expected) + + # list selection + result1 = indexer_sl(s)[[0.0, 5, 10]] + result2 = s.iloc[[0, 2, 4]] + tm.assert_series_equal(result1, result2) + + with pytest.raises(KeyError, match="not in index"): + indexer_sl(s)[[1.6, 5, 10]] + + with pytest.raises(KeyError, match="not in index"): + indexer_sl(s)[[0, 1, 2]] + + result = indexer_sl(s)[[2.5, 5]] + tm.assert_series_equal(result, Series([1, 2], index=[2.5, 5.0])) + + result = indexer_sl(s)[[2.5]] + tm.assert_series_equal(result, Series([1], index=[2.5])) + + def test_float64index_slicing_bug(self): + # GH 5557, related to slicing a float index + ser = { + 256: 2321.0, + 1: 78.0, + 2: 2716.0, + 3: 0.0, + 4: 369.0, + 5: 0.0, + 6: 269.0, + 7: 0.0, + 8: 0.0, + 9: 0.0, + 10: 3536.0, + 11: 0.0, + 12: 24.0, + 13: 0.0, + 14: 931.0, + 15: 0.0, + 16: 101.0, + 17: 78.0, + 18: 9643.0, + 19: 0.0, + 20: 0.0, + 21: 0.0, + 22: 63761.0, + 23: 0.0, + 24: 446.0, + 25: 0.0, + 26: 34773.0, + 27: 0.0, + 28: 729.0, + 29: 78.0, + 30: 0.0, + 31: 0.0, + 32: 3374.0, + 33: 0.0, + 34: 1391.0, + 35: 0.0, + 36: 361.0, + 37: 0.0, + 38: 61808.0, + 39: 0.0, + 40: 0.0, + 41: 0.0, + 42: 6677.0, + 43: 0.0, + 44: 802.0, + 45: 0.0, + 46: 2691.0, + 47: 0.0, + 48: 3582.0, + 49: 0.0, + 50: 734.0, + 51: 0.0, + 52: 627.0, + 53: 70.0, + 54: 2584.0, + 55: 0.0, + 56: 324.0, + 57: 0.0, + 58: 605.0, + 59: 0.0, + 60: 0.0, + 61: 0.0, + 62: 3989.0, + 63: 10.0, + 64: 42.0, + 65: 0.0, + 66: 904.0, + 67: 0.0, + 68: 88.0, + 69: 70.0, + 70: 8172.0, + 71: 0.0, + 72: 0.0, + 73: 0.0, + 74: 64902.0, + 75: 0.0, + 76: 347.0, + 77: 0.0, + 78: 36605.0, + 79: 0.0, + 80: 379.0, + 81: 70.0, + 82: 0.0, + 83: 0.0, + 84: 3001.0, + 85: 0.0, + 86: 1630.0, + 87: 7.0, + 88: 364.0, + 89: 0.0, + 90: 67404.0, + 91: 9.0, + 92: 0.0, + 93: 0.0, + 94: 7685.0, + 95: 0.0, + 96: 1017.0, + 97: 0.0, + 98: 2831.0, + 99: 0.0, + 100: 2963.0, + 101: 0.0, + 102: 854.0, + 103: 0.0, + 104: 0.0, + 105: 0.0, + 106: 0.0, + 107: 0.0, + 108: 0.0, + 109: 0.0, + 110: 0.0, + 111: 0.0, + 112: 0.0, + 113: 0.0, + 114: 0.0, + 115: 0.0, + 116: 0.0, + 117: 0.0, + 118: 0.0, + 119: 0.0, + 120: 0.0, + 121: 0.0, + 122: 0.0, + 123: 0.0, + 124: 0.0, + 125: 0.0, + 126: 67744.0, + 127: 22.0, + 128: 264.0, + 129: 0.0, + 260: 197.0, + 268: 0.0, + 265: 0.0, + 269: 0.0, + 261: 0.0, + 266: 1198.0, + 267: 0.0, + 262: 2629.0, + 258: 775.0, + 257: 0.0, + 263: 0.0, + 259: 0.0, + 264: 163.0, + 250: 10326.0, + 251: 0.0, + 252: 1228.0, + 253: 0.0, + 254: 2769.0, + 255: 0.0, + } + + # smoke test for the repr + s = Series(ser) + result = s.value_counts() + str(result) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_iat.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_iat.py new file mode 100644 index 0000000..44bd51e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_iat.py @@ -0,0 +1,48 @@ +import numpy as np + +from pandas import ( + DataFrame, + Series, + period_range, +) + + +def test_iat(float_frame): + + for i, row in enumerate(float_frame.index): + for j, col in enumerate(float_frame.columns): + result = float_frame.iat[i, j] + expected = float_frame.at[row, col] + assert result == expected + + +def test_iat_duplicate_columns(): + # https://github.com/pandas-dev/pandas/issues/11754 + df = DataFrame([[1, 2]], columns=["x", "x"]) + assert df.iat[0, 0] == 1 + + +def test_iat_getitem_series_with_period_index(): + # GH#4390, iat incorrectly indexing + index = period_range("1/1/2001", periods=10) + ser = Series(np.random.randn(10), index=index) + expected = ser[index[0]] + result = ser.iat[0] + assert expected == result + + +def test_iat_setitem_item_cache_cleared(indexer_ial): + # GH#45684 + data = {"x": np.arange(8, dtype=np.int64), "y": np.int64(0)} + df = DataFrame(data).copy() + ser = df["y"] + + # previously this iat setting would split the block and fail to clear + # the item_cache. + indexer_ial(df)[7, 0] = 9999 + + indexer_ial(df)[7, 1] = 1234 + + assert df.iat[7, 1] == 1234 + assert ser.iloc[-1] == 1234 + assert df.iloc[-1, -1] == 1234 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_iloc.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_iloc.py new file mode 100644 index 0000000..ee9d276 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_iloc.py @@ -0,0 +1,1398 @@ +""" test positional based indexing with iloc """ + +from datetime import datetime +import re +from warnings import ( + catch_warnings, + simplefilter, +) + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + NA, + Categorical, + CategoricalDtype, + DataFrame, + Index, + Interval, + NaT, + Series, + array, + concat, + date_range, + isna, +) +import pandas._testing as tm +from pandas.api.types import is_scalar +from pandas.core.indexing import IndexingError +from pandas.tests.indexing.common import Base + +# We pass through the error message from numpy +_slice_iloc_msg = re.escape( + "only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) " + "and integer or boolean arrays are valid indices" +) + + +class TestiLoc(Base): + @pytest.mark.parametrize("key", [2, -1, [0, 1, 2]]) + def test_iloc_getitem_int_and_list_int(self, key): + self.check_result( + "iloc", + key, + typs=["labels", "mixed", "ts", "floats", "empty"], + fails=IndexError, + ) + + # array of ints (GH5006), make sure that a single indexer is returning + # the correct type + + +class TestiLocBaseIndependent: + """Tests Independent Of Base Class""" + + @pytest.mark.parametrize( + "key", + [ + slice(None), + slice(3), + range(3), + [0, 1, 2], + Index(range(3)), + np.asarray([0, 1, 2]), + ], + ) + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manager): + frame = DataFrame({0: range(3)}, dtype=object) + + cat = Categorical(["alpha", "beta", "gamma"]) + + if not using_array_manager: + assert frame._mgr.blocks[0]._can_hold_element(cat) + + df = frame.copy() + orig_vals = df.values + indexer(df)[key, 0] = cat + + overwrite = isinstance(key, slice) and key == slice(None) + + if overwrite or using_array_manager: + # TODO(ArrayManager) we always overwrite because ArrayManager takes + # the "split" path, which still overwrites + # TODO: GH#39986 this probably shouldn't behave differently + expected = DataFrame({0: cat}) + assert not np.shares_memory(df.values, orig_vals) + else: + expected = DataFrame({0: cat}).astype(object) + if not using_array_manager: + assert np.shares_memory(df[0].values, orig_vals) + + tm.assert_frame_equal(df, expected) + + # check we dont have a view on cat (may be undesired GH#39986) + df.iloc[0, 0] = "gamma" + assert cat[0] != "gamma" + + # TODO with mixed dataframe ("split" path), we always overwrite the column + frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)}) + df = frame.copy() + orig_vals = df.values + indexer(df)[key, 0] = cat + expected = DataFrame({0: cat, 1: range(3)}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("box", [array, Series]) + def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager): + # GH#38952 Case with not setting a full column + # IntegerArray without NAs + arr = array([1, 2, 3, 4]) + obj = frame_or_series(arr.to_numpy("i8")) + + if frame_or_series is Series or not using_array_manager: + values = obj.values + else: + values = obj[0].values + + if frame_or_series is Series: + obj.iloc[:2] = box(arr[2:]) + else: + obj.iloc[:2, 0] = box(arr[2:]) + + expected = frame_or_series(np.array([3, 4, 3, 4], dtype="i8")) + tm.assert_equal(obj, expected) + + # Check that we are actually in-place + if frame_or_series is Series: + assert obj.values is values + else: + if using_array_manager: + assert obj[0].values is values + else: + assert obj.values.base is values.base and values.base is not None + + def test_is_scalar_access(self): + # GH#32085 index with duplicates doesn't matter for _is_scalar_access + index = Index([1, 2, 1]) + ser = Series(range(3), index=index) + + assert ser.iloc._is_scalar_access((1,)) + + df = ser.to_frame() + assert df.iloc._is_scalar_access((1, 0)) + + def test_iloc_exceeds_bounds(self): + + # GH6296 + # iloc should allow indexers that exceed the bounds + df = DataFrame(np.random.random_sample((20, 5)), columns=list("ABCDE")) + + # lists of positions should raise IndexError! + msg = "positional indexers are out-of-bounds" + with pytest.raises(IndexError, match=msg): + df.iloc[:, [0, 1, 2, 3, 4, 5]] + with pytest.raises(IndexError, match=msg): + df.iloc[[1, 30]] + with pytest.raises(IndexError, match=msg): + df.iloc[[1, -30]] + with pytest.raises(IndexError, match=msg): + df.iloc[[100]] + + s = df["A"] + with pytest.raises(IndexError, match=msg): + s.iloc[[100]] + with pytest.raises(IndexError, match=msg): + s.iloc[[-100]] + + # still raise on a single indexer + msg = "single positional indexer is out-of-bounds" + with pytest.raises(IndexError, match=msg): + df.iloc[30] + with pytest.raises(IndexError, match=msg): + df.iloc[-30] + + # GH10779 + # single positive/negative indexer exceeding Series bounds should raise + # an IndexError + with pytest.raises(IndexError, match=msg): + s.iloc[30] + with pytest.raises(IndexError, match=msg): + s.iloc[-30] + + # slices are ok + result = df.iloc[:, 4:10] # 0 < start < len < stop + expected = df.iloc[:, 4:] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -4:-10] # stop < 0 < start < len + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down) + expected = df.iloc[:, :4:-1] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down) + expected = df.iloc[:, 4::-1] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -10:4] # start < 0 < stop < len + expected = df.iloc[:, :4] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:4] # 0 < stop < len < start + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down) + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:11] # 0 < len < start < stop + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + # slice bounds exceeding is ok + result = s.iloc[18:30] + expected = s.iloc[18:] + tm.assert_series_equal(result, expected) + + result = s.iloc[30:] + expected = s.iloc[:0] + tm.assert_series_equal(result, expected) + + result = s.iloc[30::-1] + expected = s.iloc[::-1] + tm.assert_series_equal(result, expected) + + # doc example + def check(result, expected): + str(result) + result.dtypes + tm.assert_frame_equal(result, expected) + + dfl = DataFrame(np.random.randn(5, 2), columns=list("AB")) + check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) + check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) + check(dfl.iloc[4:6], dfl.iloc[[4]]) + + msg = "positional indexers are out-of-bounds" + with pytest.raises(IndexError, match=msg): + dfl.iloc[[4, 5, 6]] + msg = "single positional indexer is out-of-bounds" + with pytest.raises(IndexError, match=msg): + dfl.iloc[:, 4] + + @pytest.mark.parametrize("index,columns", [(np.arange(20), list("ABCDE"))]) + @pytest.mark.parametrize( + "index_vals,column_vals", + [ + ([slice(None), ["A", "D"]]), + (["1", "2"], slice(None)), + ([datetime(2019, 1, 1)], slice(None)), + ], + ) + def test_iloc_non_integer_raises(self, index, columns, index_vals, column_vals): + # GH 25753 + df = DataFrame( + np.random.randn(len(index), len(columns)), index=index, columns=columns + ) + msg = ".iloc requires numeric indexers, got" + with pytest.raises(IndexError, match=msg): + df.iloc[index_vals, column_vals] + + def test_iloc_getitem_invalid_scalar(self, frame_or_series): + # GH 21982 + + obj = DataFrame(np.arange(100).reshape(10, 10)) + obj = tm.get_obj(obj, frame_or_series) + + with pytest.raises(TypeError, match="Cannot index by location index"): + obj.iloc["a"] + + def test_iloc_array_not_mutating_negative_indices(self): + + # GH 21867 + array_with_neg_numbers = np.array([1, 2, -1]) + array_copy = array_with_neg_numbers.copy() + df = DataFrame( + {"A": [100, 101, 102], "B": [103, 104, 105], "C": [106, 107, 108]}, + index=[1, 2, 3], + ) + df.iloc[array_with_neg_numbers] + tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy) + df.iloc[:, array_with_neg_numbers] + tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy) + + def test_iloc_getitem_neg_int_can_reach_first_index(self): + # GH10547 and GH10779 + # negative integers should be able to reach index 0 + df = DataFrame({"A": [2, 3, 5], "B": [7, 11, 13]}) + s = df["A"] + + expected = df.iloc[0] + result = df.iloc[-3] + tm.assert_series_equal(result, expected) + + expected = df.iloc[[0]] + result = df.iloc[[-3]] + tm.assert_frame_equal(result, expected) + + expected = s.iloc[0] + result = s.iloc[-3] + assert result == expected + + expected = s.iloc[[0]] + result = s.iloc[[-3]] + tm.assert_series_equal(result, expected) + + # check the length 1 Series case highlighted in GH10547 + expected = Series(["a"], index=["A"]) + result = expected.iloc[[-1]] + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_dups(self): + # GH 6766 + df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) + df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) + df = concat([df1, df2], axis=1) + + # cross-sectional indexing + result = df.iloc[0, 0] + assert isna(result) + + result = df.iloc[0, :] + expected = Series([np.nan, 1, 3, 3], index=["A", "B", "A", "B"], name=0) + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_array(self): + df = DataFrame( + [ + {"A": 1, "B": 2, "C": 3}, + {"A": 100, "B": 200, "C": 300}, + {"A": 1000, "B": 2000, "C": 3000}, + ] + ) + + expected = DataFrame([{"A": 1, "B": 2, "C": 3}]) + tm.assert_frame_equal(df.iloc[[0]], expected) + + expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}]) + tm.assert_frame_equal(df.iloc[[0, 1]], expected) + + expected = DataFrame([{"B": 2, "C": 3}, {"B": 2000, "C": 3000}], index=[0, 2]) + result = df.iloc[[0, 2], [1, 2]] + tm.assert_frame_equal(result, expected) + + def test_iloc_getitem_bool(self): + df = DataFrame( + [ + {"A": 1, "B": 2, "C": 3}, + {"A": 100, "B": 200, "C": 300}, + {"A": 1000, "B": 2000, "C": 3000}, + ] + ) + + expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}]) + result = df.iloc[[True, True, False]] + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + [{"A": 1, "B": 2, "C": 3}, {"A": 1000, "B": 2000, "C": 3000}], index=[0, 2] + ) + result = df.iloc[lambda x: x.index % 2 == 0] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]]) + def test_iloc_getitem_bool_diff_len(self, index): + # GH26658 + s = Series([1, 2, 3]) + msg = f"Boolean index has wrong length: {len(index)} instead of {len(s)}" + with pytest.raises(IndexError, match=msg): + s.iloc[index] + + def test_iloc_getitem_slice(self): + df = DataFrame( + [ + {"A": 1, "B": 2, "C": 3}, + {"A": 100, "B": 200, "C": 300}, + {"A": 1000, "B": 2000, "C": 3000}, + ] + ) + + expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}]) + result = df.iloc[:2] + tm.assert_frame_equal(result, expected) + + expected = DataFrame([{"A": 100, "B": 200}], index=[1]) + result = df.iloc[1:2, 0:2] + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + [{"A": 1, "C": 3}, {"A": 100, "C": 300}, {"A": 1000, "C": 3000}] + ) + result = df.iloc[:, lambda df: [0, 2]] + tm.assert_frame_equal(result, expected) + + def test_iloc_getitem_slice_dups(self): + + df1 = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]) + df2 = DataFrame( + np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] + ) + + # axis=1 + df = concat([df1, df2], axis=1) + tm.assert_frame_equal(df.iloc[:, :4], df1) + tm.assert_frame_equal(df.iloc[:, 4:], df2) + + df = concat([df2, df1], axis=1) + tm.assert_frame_equal(df.iloc[:, :2], df2) + tm.assert_frame_equal(df.iloc[:, 2:], df1) + + exp = concat([df2, df1.iloc[:, [0]]], axis=1) + tm.assert_frame_equal(df.iloc[:, 0:3], exp) + + # axis=0 + df = concat([df, df], axis=0) + tm.assert_frame_equal(df.iloc[0:10, :2], df2) + tm.assert_frame_equal(df.iloc[0:10, 2:], df1) + tm.assert_frame_equal(df.iloc[10:, :2], df2) + tm.assert_frame_equal(df.iloc[10:, 2:], df1) + + def test_iloc_setitem(self): + df = DataFrame( + np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3) + ) + + df.iloc[1, 1] = 1 + result = df.iloc[1, 1] + assert result == 1 + + df.iloc[:, 2:3] = 0 + expected = df.iloc[:, 2:3] + result = df.iloc[:, 2:3] + tm.assert_frame_equal(result, expected) + + # GH5771 + s = Series(0, index=[4, 5, 6]) + s.iloc[1:2] += 1 + expected = Series([0, 1, 0], index=[4, 5, 6]) + tm.assert_series_equal(s, expected) + + def test_iloc_setitem_axis_argument(self): + # GH45032 + df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]]) + expected = DataFrame([[6, "c", 10], [7, "d", 11], [5, 5, 5]]) + df.iloc(axis=0)[2] = 5 + tm.assert_frame_equal(df, expected) + + df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]]) + expected = DataFrame([[6, "c", 5], [7, "d", 5], [8, "e", 5]]) + df.iloc(axis=1)[2] = 5 + tm.assert_frame_equal(df, expected) + + def test_iloc_setitem_list(self): + + # setitem with an iloc list + df = DataFrame( + np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"] + ) + df.iloc[[0, 1], [1, 2]] + df.iloc[[0, 1], [1, 2]] += 100 + + expected = DataFrame( + np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)), + index=["A", "B", "C"], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(df, expected) + + def test_iloc_setitem_pandas_object(self): + # GH 17193 + s_orig = Series([0, 1, 2, 3]) + expected = Series([0, -1, -2, 3]) + + s = s_orig.copy() + s.iloc[Series([1, 2])] = [-1, -2] + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.iloc[Index([1, 2])] = [-1, -2] + tm.assert_series_equal(s, expected) + + def test_iloc_setitem_dups(self): + + # GH 6766 + # iloc with a mask aligning from another iloc + df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) + df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) + df = concat([df1, df2], axis=1) + + expected = df.fillna(3) + inds = np.isnan(df.iloc[:, 0]) + mask = inds[inds].index + df.iloc[mask, 0] = df.iloc[mask, 2] + tm.assert_frame_equal(df, expected) + + # del a dup column across blocks + expected = DataFrame({0: [1, 2], 1: [3, 4]}) + expected.columns = ["B", "B"] + del df["A"] + tm.assert_frame_equal(df, expected) + + # assign back to self + df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] + tm.assert_frame_equal(df, expected) + + # reversed x 2 + df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) + df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) + tm.assert_frame_equal(df, expected) + + def test_iloc_setitem_frame_duplicate_columns_multiple_blocks( + self, using_array_manager + ): + # Same as the "assign back to self" check in test_iloc_setitem_dups + # but on a DataFrame with multiple blocks + df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"]) + + # setting float values that can be held by existing integer arrays + # is inplace + df.iloc[:, 0] = df.iloc[:, 0].astype("f8") + if not using_array_manager: + assert len(df._mgr.blocks) == 1 + + # if the assigned values cannot be held by existing integer arrays, + # we cast + df.iloc[:, 0] = df.iloc[:, 0] + 0.5 + if not using_array_manager: + assert len(df._mgr.blocks) == 2 + + expected = df.copy() + + # assign back to self + df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] + + tm.assert_frame_equal(df, expected) + + # TODO: GH#27620 this test used to compare iloc against ix; check if this + # is redundant with another test comparing iloc against loc + def test_iloc_getitem_frame(self): + df = DataFrame( + np.random.randn(10, 4), index=range(0, 20, 2), columns=range(0, 8, 2) + ) + + result = df.iloc[2] + exp = df.loc[4] + tm.assert_series_equal(result, exp) + + result = df.iloc[2, 2] + exp = df.loc[4, 4] + assert result == exp + + # slice + result = df.iloc[4:8] + expected = df.loc[8:14] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 2:3] + expected = df.loc[:, 4:5] + tm.assert_frame_equal(result, expected) + + # list of integers + result = df.iloc[[0, 1, 3]] + expected = df.loc[[0, 2, 6]] + tm.assert_frame_equal(result, expected) + + result = df.iloc[[0, 1, 3], [0, 1]] + expected = df.loc[[0, 2, 6], [0, 2]] + tm.assert_frame_equal(result, expected) + + # neg indices + result = df.iloc[[-1, 1, 3], [-1, 1]] + expected = df.loc[[18, 2, 6], [6, 2]] + tm.assert_frame_equal(result, expected) + + # dups indices + result = df.iloc[[-1, -1, 1, 3], [-1, 1]] + expected = df.loc[[18, 18, 2, 6], [6, 2]] + tm.assert_frame_equal(result, expected) + + # with index-like + s = Series(index=range(1, 5), dtype=object) + result = df.iloc[s.index] + expected = df.loc[[2, 4, 6, 8]] + tm.assert_frame_equal(result, expected) + + def test_iloc_getitem_labelled_frame(self): + # try with labelled frame + df = DataFrame( + np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD") + ) + + result = df.iloc[1, 1] + exp = df.loc["b", "B"] + assert result == exp + + result = df.iloc[:, 2:3] + expected = df.loc[:, ["C"]] + tm.assert_frame_equal(result, expected) + + # negative indexing + result = df.iloc[-1, -1] + exp = df.loc["j", "D"] + assert result == exp + + # out-of-bounds exception + msg = "index 5 is out of bounds for axis 0 with size 4" + with pytest.raises(IndexError, match=msg): + df.iloc[10, 5] + + # trying to use a label + msg = ( + r"Location based indexing can only have \[integer, integer " + r"slice \(START point is INCLUDED, END point is EXCLUDED\), " + r"listlike of integers, boolean array\] types" + ) + with pytest.raises(ValueError, match=msg): + df.iloc["j", "D"] + + def test_iloc_getitem_doc_issue(self, using_array_manager): + + # multi axis slicing issue with single block + # surfaced in GH 6059 + + arr = np.random.randn(6, 4) + index = date_range("20130101", periods=6) + columns = list("ABCD") + df = DataFrame(arr, index=index, columns=columns) + + # defines ref_locs + df.describe() + + result = df.iloc[3:5, 0:2] + str(result) + result.dtypes + + expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=columns[0:2]) + tm.assert_frame_equal(result, expected) + + # for dups + df.columns = list("aaaa") + result = df.iloc[3:5, 0:2] + str(result) + result.dtypes + + expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=list("aa")) + tm.assert_frame_equal(result, expected) + + # related + arr = np.random.randn(6, 4) + index = list(range(0, 12, 2)) + columns = list(range(0, 8, 2)) + df = DataFrame(arr, index=index, columns=columns) + + if not using_array_manager: + df._mgr.blocks[0].mgr_locs + result = df.iloc[1:5, 2:4] + str(result) + result.dtypes + expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4]) + tm.assert_frame_equal(result, expected) + + def test_iloc_setitem_series(self): + df = DataFrame( + np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD") + ) + + df.iloc[1, 1] = 1 + result = df.iloc[1, 1] + assert result == 1 + + df.iloc[:, 2:3] = 0 + expected = df.iloc[:, 2:3] + result = df.iloc[:, 2:3] + tm.assert_frame_equal(result, expected) + + s = Series(np.random.randn(10), index=range(0, 20, 2)) + + s.iloc[1] = 1 + result = s.iloc[1] + assert result == 1 + + s.iloc[:4] = 0 + expected = s.iloc[:4] + result = s.iloc[:4] + tm.assert_series_equal(result, expected) + + s = Series([-1] * 6) + s.iloc[0::2] = [0, 2, 4] + s.iloc[1::2] = [1, 3, 5] + result = s + expected = Series([0, 1, 2, 3, 4, 5]) + tm.assert_series_equal(result, expected) + + def test_iloc_setitem_list_of_lists(self): + + # GH 7551 + # list-of-list is set incorrectly in mixed vs. single dtyped frames + df = DataFrame( + {"A": np.arange(5, dtype="int64"), "B": np.arange(5, 10, dtype="int64")} + ) + df.iloc[2:4] = [[10, 11], [12, 13]] + expected = DataFrame({"A": [0, 1, 10, 12, 4], "B": [5, 6, 11, 13, 9]}) + tm.assert_frame_equal(df, expected) + + df = DataFrame( + {"A": ["a", "b", "c", "d", "e"], "B": np.arange(5, 10, dtype="int64")} + ) + df.iloc[2:4] = [["x", 11], ["y", 13]] + expected = DataFrame({"A": ["a", "b", "x", "y", "e"], "B": [5, 6, 11, 13, 9]}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])]) + @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) + def test_iloc_setitem_with_scalar_index(self, indexer, value): + # GH #19474 + # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated + # elementwisely, not using "setter('A', ['Z'])". + + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + df.iloc[0, indexer] = value + result = df.iloc[0, 0] + + assert is_scalar(result) and result == "Z" + + def test_iloc_mask(self): + + # GH 3631, iloc with a mask (of a series) should raise + df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"]) + mask = df.a % 2 == 0 + msg = "iLocation based boolean indexing cannot use an indexable as a mask" + with pytest.raises(ValueError, match=msg): + df.iloc[mask] + mask.index = range(len(mask)) + msg = "iLocation based boolean indexing on an integer type is not available" + with pytest.raises(NotImplementedError, match=msg): + df.iloc[mask] + + # ndarray ok + result = df.iloc[np.array([True] * len(mask), dtype=bool)] + tm.assert_frame_equal(result, df) + + # the possibilities + locs = np.arange(4) + nums = 2 ** locs + reps = [bin(num) for num in nums] + df = DataFrame({"locs": locs, "nums": nums}, reps) + + expected = { + (None, ""): "0b1100", + (None, ".loc"): "0b1100", + (None, ".iloc"): "0b1100", + ("index", ""): "0b11", + ("index", ".loc"): "0b11", + ("index", ".iloc"): ( + "iLocation based boolean indexing cannot use an indexable as a mask" + ), + ("locs", ""): "Unalignable boolean Series provided as indexer " + "(index of the boolean Series and of the indexed " + "object do not match).", + ("locs", ".loc"): "Unalignable boolean Series provided as indexer " + "(index of the boolean Series and of the " + "indexed object do not match).", + ("locs", ".iloc"): ( + "iLocation based boolean indexing on an " + "integer type is not available" + ), + } + + # UserWarnings from reindex of a boolean mask + with catch_warnings(record=True): + simplefilter("ignore", UserWarning) + for idx in [None, "index", "locs"]: + mask = (df.nums > 2).values + if idx: + mask = Series(mask, list(reversed(getattr(df, idx)))) + for method in ["", ".loc", ".iloc"]: + try: + if method: + accessor = getattr(df, method[1:]) + else: + accessor = df + answer = str(bin(accessor[mask]["nums"].sum())) + except (ValueError, IndexingError, NotImplementedError) as e: + answer = str(e) + + key = ( + idx, + method, + ) + r = expected.get(key) + if r != answer: + raise AssertionError( + f"[{key}] does not match [{answer}], received [{r}]" + ) + + def test_iloc_non_unique_indexing(self): + + # GH 4017, non-unique indexing (on the axis) + df = DataFrame({"A": [0.1] * 3000, "B": [1] * 3000}) + idx = np.arange(30) * 99 + expected = df.iloc[idx] + + df3 = concat([df, 2 * df, 3 * df]) + result = df3.iloc[idx] + + tm.assert_frame_equal(result, expected) + + df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000}) + df2 = concat([df2, 2 * df2, 3 * df2]) + + with pytest.raises(KeyError, match="not in index"): + df2.loc[idx] + + def test_iloc_empty_list_indexer_is_ok(self): + + df = tm.makeCustomDataframe(5, 2) + # vertical empty + tm.assert_frame_equal( + df.iloc[:, []], + df.iloc[:, :0], + check_index_type=True, + check_column_type=True, + ) + # horizontal empty + tm.assert_frame_equal( + df.iloc[[], :], + df.iloc[:0, :], + check_index_type=True, + check_column_type=True, + ) + # horizontal empty + tm.assert_frame_equal( + df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True + ) + + def test_identity_slice_returns_new_object(self, using_array_manager, request): + # GH13873 + if using_array_manager: + mark = pytest.mark.xfail( + reason="setting with .loc[:, 'a'] does not alter inplace" + ) + request.node.add_marker(mark) + + original_df = DataFrame({"a": [1, 2, 3]}) + sliced_df = original_df.iloc[:] + assert sliced_df is not original_df + + # should be a shallow copy + assert np.shares_memory(original_df["a"], sliced_df["a"]) + + # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig + original_df.loc[:, "a"] = [4, 4, 4] + assert (sliced_df["a"] == 4).all() + + original_series = Series([1, 2, 3, 4, 5, 6]) + sliced_series = original_series.iloc[:] + assert sliced_series is not original_series + + # should also be a shallow copy + original_series[:3] = [7, 8, 9] + assert all(sliced_series[:3] == [7, 8, 9]) + + def test_indexing_zerodim_np_array(self): + # GH24919 + df = DataFrame([[1, 2], [3, 4]]) + result = df.iloc[np.array(0)] + s = Series([1, 2], name=0) + tm.assert_series_equal(result, s) + + def test_series_indexing_zerodim_np_array(self): + # GH24919 + s = Series([1, 2]) + result = s.iloc[np.array(0)] + assert result == 1 + + @pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/33457") + def test_iloc_setitem_categorical_updates_inplace(self): + # Mixed dtype ensures we go through take_split_path in setitem_with_indexer + cat = Categorical(["A", "B", "C"]) + df = DataFrame({1: cat, 2: [1, 2, 3]}) + + # This should modify our original values in-place + df.iloc[:, 0] = cat[::-1] + + expected = Categorical(["C", "B", "A"]) + tm.assert_categorical_equal(cat, expected) + + def test_iloc_with_boolean_operation(self): + # GH 20627 + result = DataFrame([[0, 1], [2, 3], [4, 5], [6, np.nan]]) + result.iloc[result.index <= 2] *= 2 + expected = DataFrame([[0, 2], [4, 6], [8, 10], [6, np.nan]]) + tm.assert_frame_equal(result, expected) + + result.iloc[result.index > 2] *= 2 + expected = DataFrame([[0, 2], [4, 6], [8, 10], [12, np.nan]]) + tm.assert_frame_equal(result, expected) + + result.iloc[[True, True, False, False]] *= 2 + expected = DataFrame([[0, 4], [8, 12], [8, 10], [12, np.nan]]) + tm.assert_frame_equal(result, expected) + + result.iloc[[False, False, True, True]] /= 2 + expected = DataFrame([[0, 4.0], [8, 12.0], [4, 5.0], [6, np.nan]]) + tm.assert_frame_equal(result, expected) + + def test_iloc_getitem_singlerow_slice_categoricaldtype_gives_series(self): + # GH#29521 + df = DataFrame({"x": Categorical("a b c d e".split())}) + result = df.iloc[0] + raw_cat = Categorical(["a"], categories=["a", "b", "c", "d", "e"]) + expected = Series(raw_cat, index=["x"], name=0, dtype="category") + + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_categorical_values(self): + # GH#14580 + # test iloc() on Series with Categorical data + + ser = Series([1, 2, 3]).astype("category") + + # get slice + result = ser.iloc[0:2] + expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # get list of indexes + result = ser.iloc[[0, 1]] + expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # get boolean array + result = ser.iloc[[True, False, False]] + expected = Series([1]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("value", [None, NaT, np.nan]) + def test_iloc_setitem_td64_values_cast_na(self, value): + # GH#18586 + series = Series([0, 1, 2], dtype="timedelta64[ns]") + series.iloc[0] = value + expected = Series([NaT, 1, 2], dtype="timedelta64[ns]") + tm.assert_series_equal(series, expected) + + @pytest.mark.parametrize("not_na", [Interval(0, 1), "a", 1.0]) + def test_setitem_mix_of_nan_and_interval(self, not_na, nulls_fixture): + # GH#27937 + dtype = CategoricalDtype(categories=[not_na]) + ser = Series( + [nulls_fixture, nulls_fixture, nulls_fixture, nulls_fixture], dtype=dtype + ) + ser.iloc[:3] = [nulls_fixture, not_na, nulls_fixture] + exp = Series([nulls_fixture, not_na, nulls_fixture, nulls_fixture], dtype=dtype) + tm.assert_series_equal(ser, exp) + + def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): + idx = Index([]) + obj = DataFrame(np.random.randn(len(idx), len(idx)), index=idx, columns=idx) + nd3 = np.random.randint(5, size=(2, 2, 2)) + + msg = f"Cannot set values with ndim > {obj.ndim}" + with pytest.raises(ValueError, match=msg): + obj.iloc[nd3] = 0 + + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_iloc_getitem_read_only_values(self, indexer): + # GH#10043 this is fundamentally a test for iloc, but test loc while + # we're here + rw_array = np.eye(10) + rw_df = DataFrame(rw_array) + + ro_array = np.eye(10) + ro_array.setflags(write=False) + ro_df = DataFrame(ro_array) + + tm.assert_frame_equal(indexer(rw_df)[[1, 2, 3]], indexer(ro_df)[[1, 2, 3]]) + tm.assert_frame_equal(indexer(rw_df)[[1]], indexer(ro_df)[[1]]) + tm.assert_series_equal(indexer(rw_df)[1], indexer(ro_df)[1]) + tm.assert_frame_equal(indexer(rw_df)[1:3], indexer(ro_df)[1:3]) + + def test_iloc_getitem_readonly_key(self): + # GH#17192 iloc with read-only array raising TypeError + df = DataFrame({"data": np.ones(100, dtype="float64")}) + indices = np.array([1, 3, 6]) + indices.flags.writeable = False + + result = df.iloc[indices] + expected = df.loc[[1, 3, 6]] + tm.assert_frame_equal(result, expected) + + result = df["data"].iloc[indices] + expected = df["data"].loc[[1, 3, 6]] + tm.assert_series_equal(result, expected) + + # TODO(ArrayManager) setting single item with an iterable doesn't work yet + # in the "split" path + @td.skip_array_manager_not_yet_implemented + def test_iloc_assign_series_to_df_cell(self): + # GH 37593 + df = DataFrame(columns=["a"], index=[0]) + df.iloc[0, 0] = Series([1, 2, 3]) + expected = DataFrame({"a": [Series([1, 2, 3])]}, columns=["a"], index=[0]) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("klass", [list, np.array]) + def test_iloc_setitem_bool_indexer(self, klass): + # GH#36741 + df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]}) + indexer = klass([True, False, False]) + df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 + expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("indexer", [[1], slice(1, 2)]) + def test_iloc_setitem_pure_position_based(self, indexer): + # GH#22046 + df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]}) + df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + df2.iloc[:, indexer] = df1.iloc[:, [0]] + expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]}) + tm.assert_frame_equal(df2, expected) + + def test_iloc_setitem_dictionary_value(self): + # GH#37728 + df = DataFrame({"x": [1, 2], "y": [2, 2]}) + rhs = {"x": 9, "y": 99} + df.iloc[1] = rhs + expected = DataFrame({"x": [1, 9], "y": [2, 99]}) + tm.assert_frame_equal(df, expected) + + # GH#38335 same thing, mixed dtypes + df = DataFrame({"x": [1, 2], "y": [2.0, 2.0]}) + df.iloc[1] = rhs + expected = DataFrame({"x": [1, 9], "y": [2.0, 99.0]}) + tm.assert_frame_equal(df, expected) + + def test_iloc_getitem_float_duplicates(self): + df = DataFrame( + np.random.randn(3, 3), index=[0.1, 0.2, 0.2], columns=list("abc") + ) + expect = df.iloc[1:] + tm.assert_frame_equal(df.loc[0.2], expect) + + expect = df.iloc[1:, 0] + tm.assert_series_equal(df.loc[0.2, "a"], expect) + + df.index = [1, 0.2, 0.2] + expect = df.iloc[1:] + tm.assert_frame_equal(df.loc[0.2], expect) + + expect = df.iloc[1:, 0] + tm.assert_series_equal(df.loc[0.2, "a"], expect) + + df = DataFrame( + np.random.randn(4, 3), index=[1, 0.2, 0.2, 1], columns=list("abc") + ) + expect = df.iloc[1:-1] + tm.assert_frame_equal(df.loc[0.2], expect) + + expect = df.iloc[1:-1, 0] + tm.assert_series_equal(df.loc[0.2, "a"], expect) + + df.index = [0.1, 0.2, 2, 0.2] + expect = df.iloc[[1, -1]] + tm.assert_frame_equal(df.loc[0.2], expect) + + expect = df.iloc[[1, -1], 0] + tm.assert_series_equal(df.loc[0.2, "a"], expect) + + def test_iloc_setitem_custom_object(self): + # iloc with an object + class TO: + def __init__(self, value): + self.value = value + + def __str__(self) -> str: + return f"[{self.value}]" + + __repr__ = __str__ + + def __eq__(self, other) -> bool: + return self.value == other.value + + def view(self): + return self + + df = DataFrame(index=[0, 1], columns=[0]) + df.iloc[1, 0] = TO(1) + df.iloc[1, 0] = TO(2) + + result = DataFrame(index=[0, 1], columns=[0]) + result.iloc[1, 0] = TO(2) + + tm.assert_frame_equal(result, df) + + # remains object dtype even after setting it back + df = DataFrame(index=[0, 1], columns=[0]) + df.iloc[1, 0] = TO(1) + df.iloc[1, 0] = np.nan + result = DataFrame(index=[0, 1], columns=[0]) + + tm.assert_frame_equal(result, df) + + def test_iloc_getitem_with_duplicates(self): + + df = DataFrame(np.random.rand(3, 3), columns=list("ABC"), index=list("aab")) + + result = df.iloc[0] + assert isinstance(result, Series) + tm.assert_almost_equal(result.values, df.values[0]) + + result = df.T.iloc[:, 0] + assert isinstance(result, Series) + tm.assert_almost_equal(result.values, df.values[0]) + + def test_iloc_getitem_with_duplicates2(self): + # GH#2259 + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1, 1, 2]) + result = df.iloc[:, [0]] + expected = df.take([0], axis=1) + tm.assert_frame_equal(result, expected) + + def test_iloc_interval(self): + # GH#17130 + df = DataFrame({Interval(1, 2): [1, 2]}) + + result = df.iloc[0] + expected = Series({Interval(1, 2): 1}, name=0) + tm.assert_series_equal(result, expected) + + result = df.iloc[:, 0] + expected = Series([1, 2], name=Interval(1, 2)) + tm.assert_series_equal(result, expected) + + result = df.copy() + result.iloc[:, 0] += 1 + expected = DataFrame({Interval(1, 2): [2, 3]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("indexing_func", [list, np.array]) + @pytest.mark.parametrize("rhs_func", [list, np.array]) + def test_loc_setitem_boolean_list(self, rhs_func, indexing_func): + # GH#20438 testing specifically list key, not arraylike + ser = Series([0, 1, 2]) + ser.iloc[indexing_func([True, False, True])] = rhs_func([5, 10]) + expected = Series([5, 1, 10]) + tm.assert_series_equal(ser, expected) + + df = DataFrame({"a": [0, 1, 2]}) + df.iloc[indexing_func([True, False, True])] = rhs_func([[5], [10]]) + expected = DataFrame({"a": [5, 1, 10]}) + tm.assert_frame_equal(df, expected) + + def test_iloc_getitem_slice_negative_step_ea_block(self): + # GH#44551 + df = DataFrame({"A": [1, 2, 3]}, dtype="Int64") + + res = df.iloc[:, ::-1] + tm.assert_frame_equal(res, df) + + df["B"] = "foo" + res = df.iloc[:, ::-1] + expected = DataFrame({"B": df["B"], "A": df["A"]}) + tm.assert_frame_equal(res, expected) + + def test_iloc_setitem_2d_ndarray_into_ea_block(self): + # GH#44703 + df = DataFrame({"status": ["a", "b", "c"]}, dtype="category") + df.iloc[np.array([0, 1]), np.array([0])] = np.array([["a"], ["a"]]) + + expected = DataFrame({"status": ["a", "a", "c"]}, dtype=df["status"].dtype) + tm.assert_frame_equal(df, expected) + + +class TestILocErrors: + # NB: this test should work for _any_ Series we can pass as + # series_with_simple_index + def test_iloc_float_raises(self, series_with_simple_index, frame_or_series): + # GH#4892 + # float_indexers should raise exceptions + # on appropriate Index types & accessors + # this duplicates the code below + # but is specifically testing for the error + # message + + obj = series_with_simple_index + if frame_or_series is DataFrame: + obj = obj.to_frame() + + msg = "Cannot index by location index with a non-integer key" + with pytest.raises(TypeError, match=msg): + obj.iloc[3.0] + + with pytest.raises(IndexError, match=_slice_iloc_msg): + obj.iloc[3.0] = 0 + + def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame): + with pytest.raises(IndexingError, match="Too many indexers"): + float_frame.iloc[:, :, :] + + with pytest.raises(IndexError, match="too many indices for array"): + # GH#32257 we let numpy do validation, get their exception + float_frame.iloc[:, :, :] = 1 + + # TODO(ArrayManager) "split" path doesn't properly implement DataFrame indexer + @td.skip_array_manager_not_yet_implemented + def test_iloc_frame_indexer(self): + # GH#39004 + df = DataFrame({"a": [1, 2, 3]}) + indexer = DataFrame({"a": [True, False, True]}) + with tm.assert_produces_warning(FutureWarning): + df.iloc[indexer] = 1 + + msg = ( + "DataFrame indexer is not allowed for .iloc\n" + "Consider using .loc for automatic alignment." + ) + with pytest.raises(IndexError, match=msg): + df.iloc[indexer] + + +class TestILocSetItemDuplicateColumns: + def test_iloc_setitem_scalar_duplicate_columns(self): + # GH#15686, duplicate columns and mixed dtype + df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) + df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) + df = concat([df1, df2], axis=1) + df.iloc[0, 0] = -1 + + assert df.iloc[0, 0] == -1 + assert df.iloc[0, 2] == 3 + assert df.dtypes.iloc[2] == np.int64 + + def test_iloc_setitem_list_duplicate_columns(self): + # GH#22036 setting with same-sized list + df = DataFrame([[0, "str", "str2"]], columns=["a", "b", "b"]) + + df.iloc[:, 2] = ["str3"] + + expected = DataFrame([[0, "str", "str3"]], columns=["a", "b", "b"]) + tm.assert_frame_equal(df, expected) + + def test_iloc_setitem_series_duplicate_columns(self): + df = DataFrame( + np.arange(8, dtype=np.int64).reshape(2, 4), columns=["A", "B", "A", "B"] + ) + df.iloc[:, 0] = df.iloc[:, 0].astype(np.float64) + assert df.dtypes.iloc[2] == np.int64 + + @pytest.mark.parametrize( + ["dtypes", "init_value", "expected_value"], + [("int64", "0", 0), ("float", "1.2", 1.2)], + ) + def test_iloc_setitem_dtypes_duplicate_columns( + self, dtypes, init_value, expected_value + ): + # GH#22035 + df = DataFrame([[init_value, "str", "str2"]], columns=["a", "b", "b"]) + df.iloc[:, 0] = df.iloc[:, 0].astype(dtypes) + expected_df = DataFrame( + [[expected_value, "str", "str2"]], columns=["a", "b", "b"] + ) + tm.assert_frame_equal(df, expected_df) + + +class TestILocCallable: + def test_frame_iloc_getitem_callable(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return location + res = df.iloc[lambda x: [1, 3]] + tm.assert_frame_equal(res, df.iloc[[1, 3]]) + + res = df.iloc[lambda x: [1, 3], :] + tm.assert_frame_equal(res, df.iloc[[1, 3], :]) + + res = df.iloc[lambda x: [1, 3], lambda x: 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[lambda x: [1, 3], lambda x: [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + # mixture + res = df.iloc[[1, 3], lambda x: 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[[1, 3], lambda x: [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + res = df.iloc[lambda x: [1, 3], 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[lambda x: [1, 3], [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + def test_frame_iloc_setitem_callable(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return location + res = df.copy() + res.iloc[lambda x: [1, 3]] = 0 + exp = df.copy() + exp.iloc[[1, 3]] = 0 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], :] = -1 + exp = df.copy() + exp.iloc[[1, 3], :] = -1 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], lambda x: 0] = 5 + exp = df.copy() + exp.iloc[[1, 3], 0] = 5 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], lambda x: [0]] = 25 + exp = df.copy() + exp.iloc[[1, 3], [0]] = 25 + tm.assert_frame_equal(res, exp) + + # mixture + res = df.copy() + res.iloc[[1, 3], lambda x: 0] = -3 + exp = df.copy() + exp.iloc[[1, 3], 0] = -3 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[[1, 3], lambda x: [0]] = -5 + exp = df.copy() + exp.iloc[[1, 3], [0]] = -5 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], 0] = 10 + exp = df.copy() + exp.iloc[[1, 3], 0] = 10 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], [0]] = [-5, -5] + exp = df.copy() + exp.iloc[[1, 3], [0]] = [-5, -5] + tm.assert_frame_equal(res, exp) + + +class TestILocSeries: + def test_iloc(self): + ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) + + for i in range(len(ser)): + result = ser.iloc[i] + exp = ser[ser.index[i]] + tm.assert_almost_equal(result, exp) + + # pass a slice + result = ser.iloc[slice(1, 3)] + expected = ser.loc[2:4] + tm.assert_series_equal(result, expected) + + # test slice is a view + result[:] = 0 + assert (ser[1:3] == 0).all() + + # list of integers + result = ser.iloc[[0, 2, 3, 4, 5]] + expected = ser.reindex(ser.index[[0, 2, 3, 4, 5]]) + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_nonunique(self): + ser = Series([0, 1, 2], index=[0, 1, 0]) + assert ser.iloc[2] == 2 + + def test_iloc_setitem_pure_position_based(self): + # GH#22046 + ser1 = Series([1, 2, 3]) + ser2 = Series([4, 5, 6], index=[1, 0, 2]) + ser1.iloc[1:3] = ser2.iloc[1:3] + expected = Series([1, 5, 6]) + tm.assert_series_equal(ser1, expected) + + def test_iloc_nullable_int64_size_1_nan(self): + # GH 31861 + result = DataFrame({"a": ["test"], "b": [np.nan]}) + result.loc[:, "b"] = result.loc[:, "b"].astype("Int64") + expected = DataFrame({"a": ["test"], "b": array([NA], dtype="Int64")}) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_indexers.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_indexers.py new file mode 100644 index 0000000..ddc5c03 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_indexers.py @@ -0,0 +1,61 @@ +# Tests aimed at pandas.core.indexers +import numpy as np +import pytest + +from pandas.core.indexers import ( + is_scalar_indexer, + length_of_indexer, + validate_indices, +) + + +def test_length_of_indexer(): + arr = np.zeros(4, dtype=bool) + arr[0] = 1 + result = length_of_indexer(arr) + assert result == 1 + + +def test_is_scalar_indexer(): + indexer = (0, 1) + assert is_scalar_indexer(indexer, 2) + assert not is_scalar_indexer(indexer[0], 2) + + indexer = (np.array([2]), 1) + assert not is_scalar_indexer(indexer, 2) + + indexer = (np.array([2]), np.array([3])) + assert not is_scalar_indexer(indexer, 2) + + indexer = (np.array([2]), np.array([3, 4])) + assert not is_scalar_indexer(indexer, 2) + + assert not is_scalar_indexer(slice(None), 1) + + indexer = 0 + assert is_scalar_indexer(indexer, 1) + + indexer = (0,) + assert is_scalar_indexer(indexer, 1) + + +class TestValidateIndices: + def test_validate_indices_ok(self): + indices = np.asarray([0, 1]) + validate_indices(indices, 2) + validate_indices(indices[:0], 0) + validate_indices(np.array([-1, -1]), 0) + + def test_validate_indices_low(self): + indices = np.asarray([0, -2]) + with pytest.raises(ValueError, match="'indices' contains"): + validate_indices(indices, 2) + + def test_validate_indices_high(self): + indices = np.asarray([0, 1, 2]) + with pytest.raises(IndexError, match="indices are out"): + validate_indices(indices, 2) + + def test_validate_indices_empty(self): + with pytest.raises(IndexError, match="indices are out"): + validate_indices(np.array([0, 1]), 0) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_indexing.py new file mode 100644 index 0000000..36176bb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_indexing.py @@ -0,0 +1,987 @@ +""" test fancy indexing & misc """ + +from datetime import datetime +import re +import weakref + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, +) + +import pandas as pd +from pandas import ( + DataFrame, + Index, + NaT, + Series, + date_range, + offsets, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import Float64Index +from pandas.tests.indexing.common import _mklbl +from pandas.tests.indexing.test_floats import gen_obj + +# ------------------------------------------------------------------------ +# Indexing test cases + + +class TestFancy: + """pure get/set item & fancy indexing""" + + def test_setitem_ndarray_1d(self): + # GH5508 + + # len of indexer vs length of the 1d ndarray + df = DataFrame(index=Index(np.arange(1, 11))) + df["foo"] = np.zeros(10, dtype=np.float64) + df["bar"] = np.zeros(10, dtype=complex) + + # invalid + msg = "Must have equal len keys and value when setting with an iterable" + with pytest.raises(ValueError, match=msg): + df.loc[df.index[2:5], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) + + # valid + df.loc[df.index[2:6], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) + + result = df.loc[df.index[2:6], "bar"] + expected = Series( + [2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6], name="bar" + ) + tm.assert_series_equal(result, expected) + + def test_setitem_ndarray_1d_2(self): + # GH5508 + + # dtype getting changed? + df = DataFrame(index=Index(np.arange(1, 11))) + df["foo"] = np.zeros(10, dtype=np.float64) + df["bar"] = np.zeros(10, dtype=complex) + + msg = "Must have equal len keys and value when setting with an iterable" + with pytest.raises(ValueError, match=msg): + df[2:5] = np.arange(1, 4) * 1j + + def test_getitem_ndarray_3d( + self, index, frame_or_series, indexer_sli, using_array_manager + ): + # GH 25567 + obj = gen_obj(frame_or_series, index) + idxr = indexer_sli(obj) + nd3 = np.random.randint(5, size=(2, 2, 2)) + + msgs = [] + if frame_or_series is Series and indexer_sli in [tm.setitem, tm.iloc]: + msgs.append(r"Wrong number of dimensions. values.ndim > ndim \[3 > 1\]") + if using_array_manager: + msgs.append("Passed array should be 1-dimensional") + if frame_or_series is Series or indexer_sli is tm.iloc: + msgs.append(r"Buffer has wrong number of dimensions \(expected 1, got 3\)") + if using_array_manager: + msgs.append("indexer should be 1-dimensional") + if indexer_sli is tm.loc or ( + frame_or_series is Series and indexer_sli is tm.setitem + ): + msgs.append("Cannot index with multidimensional key") + if frame_or_series is DataFrame and indexer_sli is tm.setitem: + msgs.append("Index data must be 1-dimensional") + if isinstance(index, pd.IntervalIndex) and indexer_sli is tm.iloc: + msgs.append("Index data must be 1-dimensional") + if isinstance(index, (pd.TimedeltaIndex, pd.DatetimeIndex, pd.PeriodIndex)): + msgs.append("Data must be 1-dimensional") + if len(index) == 0 or isinstance(index, pd.MultiIndex): + msgs.append("positional indexers are out-of-bounds") + if type(index) is Index and not isinstance(index._values, np.ndarray): + # e.g. Int64 + msgs.append("values must be a 1D array") + + # string[pyarrow] + msgs.append("only handle 1-dimensional arrays") + + msg = "|".join(msgs) + + potential_errors = (IndexError, ValueError, NotImplementedError) + with pytest.raises(potential_errors, match=msg): + idxr[nd3] + + def test_setitem_ndarray_3d(self, index, frame_or_series, indexer_sli): + # GH 25567 + obj = gen_obj(frame_or_series, index) + idxr = indexer_sli(obj) + nd3 = np.random.randint(5, size=(2, 2, 2)) + + if indexer_sli is tm.iloc: + err = ValueError + msg = f"Cannot set values with ndim > {obj.ndim}" + else: + err = ValueError + msg = "|".join( + [ + r"Buffer has wrong number of dimensions \(expected 1, got 3\)", + "Cannot set values with ndim > 1", + "Index data must be 1-dimensional", + "Data must be 1-dimensional", + "Array conditional must be same shape as self", + ] + ) + + with pytest.raises(err, match=msg): + idxr[nd3] = 0 + + def test_getitem_ndarray_0d(self): + # GH#24924 + key = np.array(0) + + # dataframe __getitem__ + df = DataFrame([[1, 2], [3, 4]]) + result = df[key] + expected = Series([1, 3], name=0) + tm.assert_series_equal(result, expected) + + # series __getitem__ + ser = Series([1, 2]) + result = ser[key] + assert result == 1 + + def test_inf_upcast(self): + # GH 16957 + # We should be able to use np.inf as a key + # np.inf should cause an index to convert to float + + # Test with np.inf in rows + df = DataFrame(columns=[0]) + df.loc[1] = 1 + df.loc[2] = 2 + df.loc[np.inf] = 3 + + # make sure we can look up the value + assert df.loc[np.inf, 0] == 3 + + result = df.index + expected = Float64Index([1, 2, np.inf]) + tm.assert_index_equal(result, expected) + + def test_setitem_dtype_upcast(self): + + # GH3216 + df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) + df["c"] = np.nan + assert df["c"].dtype == np.float64 + + df.loc[0, "c"] = "foo" + expected = DataFrame( + [{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}] + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("val", [3.14, "wxyz"]) + def test_setitem_dtype_upcast2(self, val): + + # GH10280 + df = DataFrame( + np.arange(6, dtype="int64").reshape(2, 3), + index=list("ab"), + columns=["foo", "bar", "baz"], + ) + + left = df.copy() + left.loc["a", "bar"] = val + right = DataFrame( + [[0, val, 2], [3, 4, 5]], + index=list("ab"), + columns=["foo", "bar", "baz"], + ) + + tm.assert_frame_equal(left, right) + assert is_integer_dtype(left["foo"]) + assert is_integer_dtype(left["baz"]) + + def test_setitem_dtype_upcast3(self): + left = DataFrame( + np.arange(6, dtype="int64").reshape(2, 3) / 10.0, + index=list("ab"), + columns=["foo", "bar", "baz"], + ) + left.loc["a", "bar"] = "wxyz" + + right = DataFrame( + [[0, "wxyz", 0.2], [0.3, 0.4, 0.5]], + index=list("ab"), + columns=["foo", "bar", "baz"], + ) + + tm.assert_frame_equal(left, right) + assert is_float_dtype(left["foo"]) + assert is_float_dtype(left["baz"]) + + def test_dups_fancy_indexing(self): + + # GH 3455 + + df = tm.makeCustomDataframe(10, 3) + df.columns = ["a", "a", "b"] + result = df[["b", "a"]].columns + expected = Index(["b", "a", "a"]) + tm.assert_index_equal(result, expected) + + def test_dups_fancy_indexing_across_dtypes(self): + + # across dtypes + df = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], columns=list("aaaaaaa")) + df.head() + str(df) + result = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]]) + result.columns = list("aaaaaaa") # GH#3468 + + # GH#3509 smoke tests for indexing with duplicate columns + df.iloc[:, 4] + result.iloc[:, 4] + + tm.assert_frame_equal(df, result) + + def test_dups_fancy_indexing_not_in_order(self): + # GH 3561, dups not in selected order + df = DataFrame( + {"test": [5, 7, 9, 11], "test1": [4.0, 5, 6, 7], "other": list("abcd")}, + index=["A", "A", "B", "C"], + ) + rows = ["C", "B"] + expected = DataFrame( + {"test": [11, 9], "test1": [7.0, 6], "other": ["d", "c"]}, index=rows + ) + result = df.loc[rows] + tm.assert_frame_equal(result, expected) + + result = df.loc[Index(rows)] + tm.assert_frame_equal(result, expected) + + rows = ["C", "B", "E"] + with pytest.raises(KeyError, match="not in index"): + df.loc[rows] + + # see GH5553, make sure we use the right indexer + rows = ["F", "G", "H", "C", "B", "E"] + with pytest.raises(KeyError, match="not in index"): + df.loc[rows] + + def test_dups_fancy_indexing_only_missing_label(self): + + # List containing only missing label + dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD")) + with pytest.raises( + KeyError, + match=re.escape( + "\"None of [Index(['E'], dtype='object')] are in the [index]\"" + ), + ): + dfnu.loc[["E"]] + + @pytest.mark.parametrize("vals", [[0, 1, 2], list("abc")]) + def test_dups_fancy_indexing_missing_label(self, vals): + + # GH 4619; duplicate indexer with missing label + df = DataFrame({"A": vals}) + with pytest.raises(KeyError, match="not in index"): + df.loc[[0, 8, 0]] + + def test_dups_fancy_indexing_non_unique(self): + + # non unique with non unique selector + df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"]) + with pytest.raises(KeyError, match="not in index"): + df.loc[["A", "A", "E"]] + + def test_dups_fancy_indexing2(self): + # GH 5835 + # dups on index and missing values + df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"]) + + with pytest.raises(KeyError, match="not in index"): + df.loc[:, ["A", "B", "C"]] + + def test_dups_fancy_indexing3(self): + + # GH 6504, multi-axis indexing + df = DataFrame( + np.random.randn(9, 2), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=["a", "b"] + ) + + expected = df.iloc[0:6] + result = df.loc[[1, 2]] + tm.assert_frame_equal(result, expected) + + expected = df + result = df.loc[:, ["a", "b"]] + tm.assert_frame_equal(result, expected) + + expected = df.iloc[0:6, :] + result = df.loc[[1, 2], ["a", "b"]] + tm.assert_frame_equal(result, expected) + + def test_duplicate_int_indexing(self, indexer_sl): + # GH 17347 + ser = Series(range(3), index=[1, 1, 3]) + expected = Series(range(2), index=[1, 1]) + result = indexer_sl(ser)[[1]] + tm.assert_series_equal(result, expected) + + def test_indexing_mixed_frame_bug(self): + + # GH3492 + df = DataFrame( + {"a": {1: "aaa", 2: "bbb", 3: "ccc"}, "b": {1: 111, 2: 222, 3: 333}} + ) + + # this works, new column is created correctly + df["test"] = df["a"].apply(lambda x: "_" if x == "aaa" else x) + + # this does not work, ie column test is not changed + idx = df["test"] == "_" + temp = df.loc[idx, "a"].apply(lambda x: "-----" if x == "aaa" else x) + df.loc[idx, "test"] = temp + assert df.iloc[0, 2] == "-----" + + def test_multitype_list_index_access(self): + # GH 10610 + df = DataFrame(np.random.random((10, 5)), columns=["a"] + [20, 21, 22, 23]) + + with pytest.raises(KeyError, match=re.escape("'[26, -8] not in index'")): + df[[22, 26, -8]] + assert df[21].shape[0] == df.shape[0] + + def test_set_index_nan(self): + + # GH 3586 + df = DataFrame( + { + "PRuid": { + 17: "nonQC", + 18: "nonQC", + 19: "nonQC", + 20: "10", + 21: "11", + 22: "12", + 23: "13", + 24: "24", + 25: "35", + 26: "46", + 27: "47", + 28: "48", + 29: "59", + 30: "10", + }, + "QC": { + 17: 0.0, + 18: 0.0, + 19: 0.0, + 20: np.nan, + 21: np.nan, + 22: np.nan, + 23: np.nan, + 24: 1.0, + 25: np.nan, + 26: np.nan, + 27: np.nan, + 28: np.nan, + 29: np.nan, + 30: np.nan, + }, + "data": { + 17: 7.9544899999999998, + 18: 8.0142609999999994, + 19: 7.8591520000000008, + 20: 0.86140349999999999, + 21: 0.87853110000000001, + 22: 0.8427041999999999, + 23: 0.78587700000000005, + 24: 0.73062459999999996, + 25: 0.81668560000000001, + 26: 0.81927080000000008, + 27: 0.80705009999999999, + 28: 0.81440240000000008, + 29: 0.80140849999999997, + 30: 0.81307740000000006, + }, + "year": { + 17: 2006, + 18: 2007, + 19: 2008, + 20: 1985, + 21: 1985, + 22: 1985, + 23: 1985, + 24: 1985, + 25: 1985, + 26: 1985, + 27: 1985, + 28: 1985, + 29: 1985, + 30: 1986, + }, + } + ).reset_index() + + result = ( + df.set_index(["year", "PRuid", "QC"]) + .reset_index() + .reindex(columns=df.columns) + ) + tm.assert_frame_equal(result, df) + + def test_multi_assign(self): + + # GH 3626, an assignment of a sub-df to a df + df = DataFrame( + { + "FC": ["a", "b", "a", "b", "a", "b"], + "PF": [0, 0, 0, 0, 1, 1], + "col1": list(range(6)), + "col2": list(range(6, 12)), + } + ) + df.iloc[1, 0] = np.nan + df2 = df.copy() + + mask = ~df2.FC.isna() + cols = ["col1", "col2"] + + dft = df2 * 2 + dft.iloc[3, 3] = np.nan + + expected = DataFrame( + { + "FC": ["a", np.nan, "a", "b", "a", "b"], + "PF": [0, 0, 0, 0, 1, 1], + "col1": Series([0, 1, 4, 6, 8, 10]), + "col2": [12, 7, 16, np.nan, 20, 22], + } + ) + + # frame on rhs + df2.loc[mask, cols] = dft.loc[mask, cols] + tm.assert_frame_equal(df2, expected) + + # with an ndarray on rhs + # coerces to float64 because values has float64 dtype + # GH 14001 + expected = DataFrame( + { + "FC": ["a", np.nan, "a", "b", "a", "b"], + "PF": [0, 0, 0, 0, 1, 1], + "col1": [0.0, 1.0, 4.0, 6.0, 8.0, 10.0], + "col2": [12, 7, 16, np.nan, 20, 22], + } + ) + df2 = df.copy() + df2.loc[mask, cols] = dft.loc[mask, cols].values + tm.assert_frame_equal(df2, expected) + + def test_multi_assign_broadcasting_rhs(self): + # broadcasting on the rhs is required + df = DataFrame( + { + "A": [1, 2, 0, 0, 0], + "B": [0, 0, 0, 10, 11], + "C": [0, 0, 0, 10, 11], + "D": [3, 4, 5, 6, 7], + } + ) + + expected = df.copy() + mask = expected["A"] == 0 + for col in ["A", "B"]: + expected.loc[mask, col] = df["D"] + + df.loc[df["A"] == 0, ["A", "B"]] = df["D"] + tm.assert_frame_equal(df, expected) + + # TODO(ArrayManager) setting single item with an iterable doesn't work yet + # in the "split" path + @td.skip_array_manager_not_yet_implemented + def test_setitem_list(self): + + # GH 6043 + # iloc with a list + df = DataFrame(index=[0, 1], columns=[0]) + df.iloc[1, 0] = [1, 2, 3] + df.iloc[1, 0] = [1, 2] + + result = DataFrame(index=[0, 1], columns=[0]) + result.iloc[1, 0] = [1, 2] + + tm.assert_frame_equal(result, df) + + def test_string_slice(self): + # GH 14424 + # string indexing against datetimelike with object + # dtype should properly raises KeyError + df = DataFrame([1], Index([pd.Timestamp("2011-01-01")], dtype=object)) + assert df.index._is_all_dates + with pytest.raises(KeyError, match="'2011'"): + df["2011"] + + with pytest.raises(KeyError, match="'2011'"): + df.loc["2011", 0] + + def test_string_slice_empty(self): + # GH 14424 + + df = DataFrame() + assert not df.index._is_all_dates + with pytest.raises(KeyError, match="'2011'"): + df["2011"] + + with pytest.raises(KeyError, match="^0$"): + df.loc["2011", 0] + + def test_astype_assignment(self): + + # GH4312 (iloc) + df_orig = DataFrame( + [["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + + df = df_orig.copy() + df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64) + expected = DataFrame( + [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True) + expected = DataFrame( + [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + # GH5702 (loc) + df = df_orig.copy() + df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64) + expected = DataFrame( + [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64) + expected = DataFrame( + [["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + def test_astype_assignment_full_replacements(self): + # full replacements / no nans + df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) + df.iloc[:, 0] = df["A"].astype(np.int64) + expected = DataFrame({"A": [1, 2, 3, 4]}) + tm.assert_frame_equal(df, expected) + + df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) + df.loc[:, "A"] = df["A"].astype(np.int64) + expected = DataFrame({"A": [1, 2, 3, 4]}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("indexer", [tm.getitem, tm.loc]) + def test_index_type_coercion(self, indexer): + + # GH 11836 + # if we have an index type and set it with something that looks + # to numpy like the same, but is actually, not + # (e.g. setting with a float or string '0') + # then we need to coerce to object + + # integer indexes + for s in [Series(range(5)), Series(range(5), index=range(1, 6))]: + + assert s.index.is_integer() + + s2 = s.copy() + indexer(s2)[0.1] = 0 + assert s2.index.is_floating() + assert indexer(s2)[0.1] == 0 + + s2 = s.copy() + indexer(s2)[0.0] = 0 + exp = s.index + if 0 not in s: + exp = Index(s.index.tolist() + [0]) + tm.assert_index_equal(s2.index, exp) + + s2 = s.copy() + indexer(s2)["0"] = 0 + assert s2.index.is_object() + + for s in [Series(range(5), index=np.arange(5.0))]: + + assert s.index.is_floating() + + s2 = s.copy() + indexer(s2)[0.1] = 0 + assert s2.index.is_floating() + assert indexer(s2)[0.1] == 0 + + s2 = s.copy() + indexer(s2)[0.0] = 0 + tm.assert_index_equal(s2.index, s.index) + + s2 = s.copy() + indexer(s2)["0"] = 0 + assert s2.index.is_object() + + +class TestMisc: + def test_float_index_to_mixed(self): + df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) + df["a"] = 10 + + expected = DataFrame({0.0: df[0.0], 1.0: df[1.0], "a": [10] * 10}) + tm.assert_frame_equal(expected, df) + + def test_float_index_non_scalar_assignment(self): + df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0]) + df.loc[df.index[:2]] = 1 + expected = DataFrame({"a": [1, 1, 3], "b": [1, 1, 5]}, index=df.index) + tm.assert_frame_equal(expected, df) + + def test_loc_setitem_fullindex_views(self): + df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0]) + df2 = df.copy() + df.loc[df.index] = df.loc[df.index] + tm.assert_frame_equal(df, df2) + + def test_rhs_alignment(self): + # GH8258, tests that both rows & columns are aligned to what is + # assigned to. covers both uniform data-type & multi-type cases + def run_tests(df, rhs, right_loc, right_iloc): + # label, index, slice + lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4) + lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3) + + left = df.copy() + left.loc[lbl_one, lbl_two] = rhs + tm.assert_frame_equal(left, right_loc) + + left = df.copy() + left.iloc[idx_one, idx_two] = rhs + tm.assert_frame_equal(left, right_iloc) + + left = df.copy() + left.iloc[slice_one, slice_two] = rhs + tm.assert_frame_equal(left, right_iloc) + + xs = np.arange(20).reshape(5, 4) + cols = ["jim", "joe", "jolie", "joline"] + df = DataFrame(xs, columns=cols, index=list("abcde"), dtype="int64") + + # right hand side; permute the indices and multiplpy by -2 + rhs = -2 * df.iloc[3:0:-1, 2:0:-1] + + # expected `right` result; just multiply by -2 + right_iloc = df.copy() + right_iloc["joe"] = [1, 14, 10, 6, 17] + right_iloc["jolie"] = [2, 13, 9, 5, 18] + right_iloc.iloc[1:4, 1:3] *= -2 + right_loc = df.copy() + right_loc.iloc[1:4, 1:3] *= -2 + + # run tests with uniform dtypes + run_tests(df, rhs, right_loc, right_iloc) + + # make frames multi-type & re-run tests + for frame in [df, rhs, right_loc, right_iloc]: + frame["joe"] = frame["joe"].astype("float64") + frame["jolie"] = frame["jolie"].map("@{}".format) + right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0] + right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"] + run_tests(df, rhs, right_loc, right_iloc) + + def test_str_label_slicing_with_negative_step(self): + SLC = pd.IndexSlice + + for idx in [_mklbl("A", 20), np.arange(20) + 100, np.linspace(100, 150, 20)]: + idx = Index(idx) + ser = Series(np.arange(20), index=idx) + tm.assert_indexing_slices_equivalent(ser, SLC[idx[9] :: -1], SLC[9::-1]) + tm.assert_indexing_slices_equivalent(ser, SLC[: idx[9] : -1], SLC[:8:-1]) + tm.assert_indexing_slices_equivalent( + ser, SLC[idx[13] : idx[9] : -1], SLC[13:8:-1] + ) + tm.assert_indexing_slices_equivalent( + ser, SLC[idx[9] : idx[13] : -1], SLC[:0] + ) + + def test_slice_with_zero_step_raises(self, index, indexer_sl, frame_or_series): + obj = frame_or_series(np.arange(len(index)), index=index) + with pytest.raises(ValueError, match="slice step cannot be zero"): + indexer_sl(obj)[::0] + + def test_loc_setitem_indexing_assignment_dict_already_exists(self): + index = Index([-5, 0, 5], name="z") + df = DataFrame({"x": [1, 2, 6], "y": [2, 2, 8]}, index=index) + expected = df.copy() + rhs = {"x": 9, "y": 99} + df.loc[5] = rhs + expected.loc[5] = [9, 99] + tm.assert_frame_equal(df, expected) + + # GH#38335 same thing, mixed dtypes + df = DataFrame({"x": [1, 2, 6], "y": [2.0, 2.0, 8.0]}, index=index) + df.loc[5] = rhs + expected = DataFrame({"x": [1, 2, 9], "y": [2.0, 2.0, 99.0]}, index=index) + tm.assert_frame_equal(df, expected) + + def test_iloc_getitem_indexing_dtypes_on_empty(self): + # Check that .iloc returns correct dtypes GH9983 + df = DataFrame({"a": [1, 2, 3], "b": ["b", "b2", "b3"]}) + df2 = df.iloc[[], :] + + assert df2.loc[:, "a"].dtype == np.int64 + tm.assert_series_equal(df2.loc[:, "a"], df2.iloc[:, 0]) + + @pytest.mark.parametrize("size", [5, 999999, 1000000]) + def test_loc_range_in_series_indexing(self, size): + # range can cause an indexing error + # GH 11652 + s = Series(index=range(size), dtype=np.float64) + s.loc[range(1)] = 42 + tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0])) + + s.loc[range(2)] = 43 + tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1])) + + def test_partial_boolean_frame_indexing(self): + # GH 17170 + df = DataFrame( + np.arange(9.0).reshape(3, 3), index=list("abc"), columns=list("ABC") + ) + index_df = DataFrame(1, index=list("ab"), columns=list("AB")) + result = df[index_df.notnull()] + expected = DataFrame( + np.array([[0.0, 1.0, np.nan], [3.0, 4.0, np.nan], [np.nan] * 3]), + index=list("abc"), + columns=list("ABC"), + ) + tm.assert_frame_equal(result, expected) + + def test_no_reference_cycle(self): + df = DataFrame({"a": [0, 1], "b": [2, 3]}) + for name in ("loc", "iloc", "at", "iat"): + getattr(df, name) + wr = weakref.ref(df) + del df + assert wr() is None + + def test_label_indexing_on_nan(self, nulls_fixture): + # GH 32431 + df = Series([1, "{1,2}", 1, nulls_fixture]) + vc = df.value_counts(dropna=False) + result1 = vc.loc[nulls_fixture] + result2 = vc[nulls_fixture] + + expected = 1 + assert result1 == expected + assert result2 == expected + + +class TestDataframeNoneCoercion: + EXPECTED_SINGLE_ROW_RESULTS = [ + # For numeric series, we should coerce to NaN. + ([1, 2, 3], [np.nan, 2, 3]), + ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]), + # For datetime series, we should coerce to NaT. + ( + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)], + ), + # For objects, we should preserve the None value. + (["foo", "bar", "baz"], [None, "bar", "baz"]), + ] + + @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) + def test_coercion_with_loc(self, expected): + start_data, expected_result = expected + + start_dataframe = DataFrame({"foo": start_data}) + start_dataframe.loc[0, ["foo"]] = None + + expected_dataframe = DataFrame({"foo": expected_result}) + tm.assert_frame_equal(start_dataframe, expected_dataframe) + + @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) + def test_coercion_with_setitem_and_dataframe(self, expected): + start_data, expected_result = expected + + start_dataframe = DataFrame({"foo": start_data}) + start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None + + expected_dataframe = DataFrame({"foo": expected_result}) + tm.assert_frame_equal(start_dataframe, expected_dataframe) + + @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) + def test_none_coercion_loc_and_dataframe(self, expected): + start_data, expected_result = expected + + start_dataframe = DataFrame({"foo": start_data}) + start_dataframe.loc[start_dataframe["foo"] == start_dataframe["foo"][0]] = None + + expected_dataframe = DataFrame({"foo": expected_result}) + tm.assert_frame_equal(start_dataframe, expected_dataframe) + + def test_none_coercion_mixed_dtypes(self): + start_dataframe = DataFrame( + { + "a": [1, 2, 3], + "b": [1.0, 2.0, 3.0], + "c": [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + "d": ["a", "b", "c"], + } + ) + start_dataframe.iloc[0] = None + + exp = DataFrame( + { + "a": [np.nan, 2, 3], + "b": [np.nan, 2.0, 3.0], + "c": [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)], + "d": [None, "b", "c"], + } + ) + tm.assert_frame_equal(start_dataframe, exp) + + +class TestDatetimelikeCoercion: + def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer_sli): + # dispatching _can_hold_element to underlying DatetimeArray + tz = tz_naive_fixture + + dti = date_range("2016-01-01", periods=3, tz=tz) + ser = Series(dti) + + values = ser._values + + newval = "2018-01-01" + values._validate_setitem_value(newval) + + indexer_sli(ser)[0] = newval + + if tz is None: + # TODO(EA2D): we can make this no-copy in tz-naive case too + assert ser.dtype == dti.dtype + assert ser._values._data is values._data + else: + assert ser._values is values + + @pytest.mark.parametrize("box", [list, np.array, pd.array, pd.Categorical, Index]) + @pytest.mark.parametrize( + "key", [[0, 1], slice(0, 2), np.array([True, True, False])] + ) + def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer_sli, key, box): + # dispatching _can_hold_element to underling DatetimeArray + tz = tz_naive_fixture + + if isinstance(key, slice) and indexer_sli is tm.loc: + key = slice(0, 1) + + dti = date_range("2016-01-01", periods=3, tz=tz) + ser = Series(dti) + + values = ser._values + + newvals = box(["2019-01-01", "2010-01-02"]) + values._validate_setitem_value(newvals) + + indexer_sli(ser)[key] = newvals + + if tz is None: + # TODO(EA2D): we can make this no-copy in tz-naive case too + assert ser.dtype == dti.dtype + assert ser._values._data is values._data + else: + assert ser._values is values + + @pytest.mark.parametrize("scalar", ["3 Days", offsets.Hour(4)]) + def test_setitem_td64_scalar(self, indexer_sli, scalar): + # dispatching _can_hold_element to underling TimedeltaArray + tdi = timedelta_range("1 Day", periods=3) + ser = Series(tdi) + + values = ser._values + values._validate_setitem_value(scalar) + + indexer_sli(ser)[0] = scalar + assert ser._values._data is values._data + + @pytest.mark.parametrize("box", [list, np.array, pd.array, pd.Categorical, Index]) + @pytest.mark.parametrize( + "key", [[0, 1], slice(0, 2), np.array([True, True, False])] + ) + def test_setitem_td64_string_values(self, indexer_sli, key, box): + # dispatching _can_hold_element to underling TimedeltaArray + if isinstance(key, slice) and indexer_sli is tm.loc: + key = slice(0, 1) + + tdi = timedelta_range("1 Day", periods=3) + ser = Series(tdi) + + values = ser._values + + newvals = box(["10 Days", "44 hours"]) + values._validate_setitem_value(newvals) + + indexer_sli(ser)[key] = newvals + assert ser._values._data is values._data + + +def test_extension_array_cross_section(): + # A cross-section of a homogeneous EA should be an EA + df = DataFrame( + { + "A": pd.array([1, 2], dtype="Int64"), + "B": pd.array([3, 4], dtype="Int64"), + }, + index=["a", "b"], + ) + expected = Series(pd.array([1, 3], dtype="Int64"), index=["A", "B"], name="a") + result = df.loc["a"] + tm.assert_series_equal(result, expected) + + result = df.iloc[0] + tm.assert_series_equal(result, expected) + + +def test_extension_array_cross_section_converts(): + # all numeric columns -> numeric series + df = DataFrame( + { + "A": pd.array([1, 2], dtype="Int64"), + "B": np.array([1, 2], dtype="int64"), + }, + index=["a", "b"], + ) + result = df.loc["a"] + expected = Series([1, 1], dtype="Int64", index=["A", "B"], name="a") + tm.assert_series_equal(result, expected) + + result = df.iloc[0] + tm.assert_series_equal(result, expected) + + # mixed columns -> object series + df = DataFrame( + {"A": pd.array([1, 2], dtype="Int64"), "B": np.array(["a", "b"])}, + index=["a", "b"], + ) + result = df.loc["a"] + expected = Series([1, "a"], dtype=object, index=["A", "B"], name="a") + tm.assert_series_equal(result, expected) + + result = df.iloc[0] + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_loc.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_loc.py new file mode 100644 index 0000000..63c5091 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_loc.py @@ -0,0 +1,2985 @@ +""" test label based indexing with loc """ +from collections import namedtuple +from datetime import ( + date, + datetime, + time, + timedelta, +) +import re + +from dateutil.tz import gettz +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + DatetimeIndex, + Index, + IndexSlice, + MultiIndex, + Period, + Series, + SparseDtype, + Timedelta, + Timestamp, + date_range, + timedelta_range, + to_datetime, + to_timedelta, +) +import pandas._testing as tm +from pandas.api.types import is_scalar +from pandas.core.api import Float64Index +from pandas.core.indexing import ( + IndexingError, + _one_ellipsis_message, +) +from pandas.tests.indexing.common import Base + + +class TestLoc(Base): + def test_loc_getitem_int(self): + + # int label + self.check_result("loc", 2, typs=["labels"], fails=KeyError) + + def test_loc_getitem_label(self): + + # label + self.check_result("loc", "c", typs=["empty"], fails=KeyError) + + def test_loc_getitem_label_out_of_range(self): + + # out of range label + self.check_result( + "loc", "f", typs=["ints", "uints", "labels", "mixed", "ts"], fails=KeyError + ) + self.check_result("loc", "f", typs=["floats"], fails=KeyError) + self.check_result("loc", "f", typs=["floats"], fails=KeyError) + self.check_result("loc", 20, typs=["ints", "uints", "mixed"], fails=KeyError) + self.check_result("loc", 20, typs=["labels"], fails=KeyError) + self.check_result("loc", 20, typs=["ts"], axes=0, fails=KeyError) + self.check_result("loc", 20, typs=["floats"], axes=0, fails=KeyError) + + def test_loc_getitem_label_list(self): + # list of labels + self.check_result( + "loc", [0, 1, 2], typs=["ints", "uints", "floats"], fails=KeyError + ) + self.check_result( + "loc", [1, 3.0, "A"], typs=["ints", "uints", "floats"], fails=KeyError + ) + + def test_loc_getitem_label_list_with_missing(self): + self.check_result("loc", [0, 1, 2], typs=["empty"], fails=KeyError) + self.check_result( + "loc", [0, 2, 10], typs=["ints", "uints", "floats"], axes=0, fails=KeyError + ) + + self.check_result( + "loc", [3, 6, 7], typs=["ints", "uints", "floats"], axes=1, fails=KeyError + ) + + # GH 17758 - MultiIndex and missing keys + self.check_result( + "loc", [(1, 3), (1, 4), (2, 5)], typs=["multi"], axes=0, fails=KeyError + ) + + def test_loc_getitem_label_list_fails(self): + # fails + self.check_result( + "loc", [20, 30, 40], typs=["ints", "uints"], axes=1, fails=KeyError + ) + + def test_loc_getitem_label_array_like(self): + # TODO: test something? + # array like + pass + + def test_loc_getitem_bool(self): + # boolean indexers + b = [True, False, True, False] + + self.check_result("loc", b, typs=["empty"], fails=IndexError) + + def test_loc_getitem_label_slice(self): + + # label slices (with ints) + + # real label slices + + # GH 14316 + + self.check_result( + "loc", + slice(1, 3), + typs=["labels", "mixed", "empty", "ts", "floats"], + fails=TypeError, + ) + + self.check_result( + "loc", slice("20130102", "20130104"), typs=["ts"], axes=1, fails=TypeError + ) + + self.check_result("loc", slice(2, 8), typs=["mixed"], axes=0, fails=TypeError) + self.check_result("loc", slice(2, 8), typs=["mixed"], axes=1, fails=KeyError) + + self.check_result( + "loc", slice(2, 4, 2), typs=["mixed"], axes=0, fails=TypeError + ) + + def test_setitem_from_duplicate_axis(self): + # GH#34034 + df = DataFrame( + [[20, "a"], [200, "a"], [200, "a"]], + columns=["col1", "col2"], + index=[10, 1, 1], + ) + df.loc[1, "col1"] = np.arange(2) + expected = DataFrame( + [[20, "a"], [0, "a"], [1, "a"]], columns=["col1", "col2"], index=[10, 1, 1] + ) + tm.assert_frame_equal(df, expected) + + def test_column_types_consistent(self): + # GH 26779 + df = DataFrame( + data={ + "channel": [1, 2, 3], + "A": ["String 1", np.NaN, "String 2"], + "B": [ + Timestamp("2019-06-11 11:00:00"), + pd.NaT, + Timestamp("2019-06-11 12:00:00"), + ], + } + ) + df2 = DataFrame( + data={"A": ["String 3"], "B": [Timestamp("2019-06-11 12:00:00")]} + ) + # Change Columns A and B to df2.values wherever Column A is NaN + df.loc[df["A"].isna(), ["A", "B"]] = df2.values + expected = DataFrame( + data={ + "channel": [1, 2, 3], + "A": ["String 1", "String 3", "String 2"], + "B": [ + Timestamp("2019-06-11 11:00:00"), + Timestamp("2019-06-11 12:00:00"), + Timestamp("2019-06-11 12:00:00"), + ], + } + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "obj, key, exp", + [ + ( + DataFrame([[1]], columns=Index([False])), + IndexSlice[:, False], + Series([1], name=False), + ), + (Series([1], index=Index([False])), False, [1]), + (DataFrame([[1]], index=Index([False])), False, Series([1], name=False)), + ], + ) + def test_loc_getitem_single_boolean_arg(self, obj, key, exp): + # GH 44322 + res = obj.loc[key] + if isinstance(exp, (DataFrame, Series)): + tm.assert_equal(res, exp) + else: + assert res == exp + + +class TestLocBaseIndependent: + # Tests for loc that do not depend on subclassing Base + @pytest.mark.parametrize( + "msg, key", + [ + (r"Period\('2019', 'A-DEC'\), 'foo', 'bar'", (Period(2019), "foo", "bar")), + (r"Period\('2019', 'A-DEC'\), 'y1', 'bar'", (Period(2019), "y1", "bar")), + (r"Period\('2019', 'A-DEC'\), 'foo', 'z1'", (Period(2019), "foo", "z1")), + ( + r"Period\('2018', 'A-DEC'\), Period\('2016', 'A-DEC'\), 'bar'", + (Period(2018), Period(2016), "bar"), + ), + (r"Period\('2018', 'A-DEC'\), 'foo', 'y1'", (Period(2018), "foo", "y1")), + ( + r"Period\('2017', 'A-DEC'\), 'foo', Period\('2015', 'A-DEC'\)", + (Period(2017), "foo", Period(2015)), + ), + (r"Period\('2017', 'A-DEC'\), 'z1', 'bar'", (Period(2017), "z1", "bar")), + ], + ) + def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key): + # GH#20684 + """ + parse_time_string return parameter if type not matched. + PeriodIndex.get_loc takes returned value from parse_time_string as a tuple. + If first argument is Period and a tuple has 3 items, + process go on not raise exception + """ + df = DataFrame( + { + "A": [Period(2019), "x1", "x2"], + "B": [Period(2018), Period(2016), "y1"], + "C": [Period(2017), "z1", Period(2015)], + "V1": [1, 2, 3], + "V2": [10, 20, 30], + } + ).set_index(["A", "B", "C"]) + with pytest.raises(KeyError, match=msg): + df.loc[key] + + def test_loc_getitem_missing_unicode_key(self): + df = DataFrame({"a": [1]}) + with pytest.raises(KeyError, match="\u05d0"): + df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError + + def test_loc_getitem_dups(self): + # GH 5678 + # repeated getitems on a dup index returning a ndarray + df = DataFrame( + np.random.random_sample((20, 5)), index=["ABCDE"[x % 5] for x in range(20)] + ) + expected = df.loc["A", 0] + result = df.loc[:, 0].loc["A"] + tm.assert_series_equal(result, expected) + + def test_loc_getitem_dups2(self): + + # GH4726 + # dup indexing with iloc/loc + df = DataFrame( + [[1, 2, "foo", "bar", Timestamp("20130101")]], + columns=["a", "a", "a", "a", "a"], + index=[1], + ) + expected = Series( + [1, 2, "foo", "bar", Timestamp("20130101")], + index=["a", "a", "a", "a", "a"], + name=1, + ) + + result = df.iloc[0] + tm.assert_series_equal(result, expected) + + result = df.loc[1] + tm.assert_series_equal(result, expected) + + def test_loc_setitem_dups(self): + + # GH 6541 + df_orig = DataFrame( + { + "me": list("rttti"), + "foo": list("aaade"), + "bar": np.arange(5, dtype="float64") * 1.34 + 2, + "bar2": np.arange(5, dtype="float64") * -0.34 + 2, + } + ).set_index("me") + + indexer = ( + "r", + ["bar", "bar2"], + ) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + + indexer = ( + "r", + "bar", + ) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + assert df.loc[indexer] == 2.0 * df_orig.loc[indexer] + + indexer = ( + "t", + ["bar", "bar2"], + ) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + + def test_loc_setitem_slice(self): + # GH10503 + + # assigning the same type should not change the type + df1 = DataFrame({"a": [0, 1, 1], "b": Series([100, 200, 300], dtype="uint32")}) + ix = df1["a"] == 1 + newb1 = df1.loc[ix, "b"] + 1 + df1.loc[ix, "b"] = newb1 + expected = DataFrame( + {"a": [0, 1, 1], "b": Series([100, 201, 301], dtype="uint32")} + ) + tm.assert_frame_equal(df1, expected) + + # assigning a new type should get the inferred type + df2 = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") + ix = df1["a"] == 1 + newb2 = df2.loc[ix, "b"] + df1.loc[ix, "b"] = newb2 + expected = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") + tm.assert_frame_equal(df2, expected) + + def test_loc_setitem_dtype(self): + # GH31340 + df = DataFrame({"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]}) + cols = ["a", "b", "c"] + df.loc[:, cols] = df.loc[:, cols].astype("float32") + + expected = DataFrame( + { + "id": ["A"], + "a": np.array([1.2], dtype="float32"), + "b": np.array([0.0], dtype="float32"), + "c": np.array([-2.5], dtype="float32"), + } + ) # id is inferred as object + + tm.assert_frame_equal(df, expected) + + def test_getitem_label_list_with_missing(self): + s = Series(range(3), index=["a", "b", "c"]) + + # consistency + with pytest.raises(KeyError, match="not in index"): + s[["a", "d"]] + + s = Series(range(3)) + with pytest.raises(KeyError, match="not in index"): + s[[0, 3]] + + @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]]) + def test_loc_getitem_bool_diff_len(self, index): + # GH26658 + s = Series([1, 2, 3]) + msg = f"Boolean index has wrong length: {len(index)} instead of {len(s)}" + with pytest.raises(IndexError, match=msg): + s.loc[index] + + def test_loc_getitem_int_slice(self): + # TODO: test something here? + pass + + def test_loc_to_fail(self): + + # GH3449 + df = DataFrame( + np.random.random((3, 3)), index=["a", "b", "c"], columns=["e", "f", "g"] + ) + + # raise a KeyError? + msg = ( + r"\"None of \[Int64Index\(\[1, 2\], dtype='int64'\)\] are " + r"in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + df.loc[[1, 2], [1, 2]] + + def test_loc_to_fail2(self): + # GH 7496 + # loc should not fallback + + s = Series(dtype=object) + s.loc[1] = 1 + s.loc["a"] = 2 + + with pytest.raises(KeyError, match=r"^-1$"): + s.loc[-1] + + msg = ( + r"\"None of \[Int64Index\(\[-1, -2\], dtype='int64'\)\] are " + r"in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + s.loc[[-1, -2]] + + msg = r"\"None of \[Index\(\['4'\], dtype='object'\)\] are in the \[index\]\"" + with pytest.raises(KeyError, match=msg): + s.loc[["4"]] + + s.loc[-1] = 3 + with pytest.raises(KeyError, match="not in index"): + s.loc[[-1, -2]] + + s["a"] = 2 + msg = ( + r"\"None of \[Int64Index\(\[-2\], dtype='int64'\)\] are " + r"in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + s.loc[[-2]] + + del s["a"] + + with pytest.raises(KeyError, match=msg): + s.loc[[-2]] = 0 + + def test_loc_to_fail3(self): + # inconsistency between .loc[values] and .loc[values,:] + # GH 7999 + df = DataFrame([["a"], ["b"]], index=[1, 2], columns=["value"]) + + msg = ( + r"\"None of \[Int64Index\(\[3\], dtype='int64'\)\] are " + r"in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + df.loc[[3], :] + + with pytest.raises(KeyError, match=msg): + df.loc[[3]] + + def test_loc_getitem_list_with_fail(self): + # 15747 + # should KeyError if *any* missing labels + + s = Series([1, 2, 3]) + + s.loc[[2]] + + with pytest.raises( + KeyError, + match=re.escape( + "\"None of [Int64Index([3], dtype='int64')] are in the [index]\"" + ), + ): + s.loc[[3]] + + # a non-match and a match + with pytest.raises(KeyError, match="not in index"): + s.loc[[2, 3]] + + def test_loc_index(self): + # gh-17131 + # a boolean index should index like a boolean numpy array + + df = DataFrame( + np.random.random(size=(5, 10)), + index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"], + ) + + mask = df.index.map(lambda x: "alpha" in x) + expected = df.loc[np.array(mask)] + + result = df.loc[mask] + tm.assert_frame_equal(result, expected) + + result = df.loc[mask.values] + tm.assert_frame_equal(result, expected) + + result = df.loc[pd.array(mask, dtype="boolean")] + tm.assert_frame_equal(result, expected) + + def test_loc_general(self): + + df = DataFrame( + np.random.rand(4, 4), + columns=["A", "B", "C", "D"], + index=["A", "B", "C", "D"], + ) + + # want this to work + result = df.loc[:, "A":"B"].iloc[0:2, :] + assert (result.columns == ["A", "B"]).all() + assert (result.index == ["A", "B"]).all() + + # mixed type + result = DataFrame({"a": [Timestamp("20130101")], "b": [1]}).iloc[0] + expected = Series([Timestamp("20130101"), 1], index=["a", "b"], name=0) + tm.assert_series_equal(result, expected) + assert result.dtype == object + + @pytest.fixture + def frame_for_consistency(self): + return DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + + @pytest.mark.parametrize( + "val", + [0, np.array(0, dtype=np.int64), np.array([0, 0, 0, 0, 0], dtype=np.int64)], + ) + def test_loc_setitem_consistency(self, frame_for_consistency, val): + # GH 6149 + # coerce similarly for setitem and loc when rows have a null-slice + expected = DataFrame( + { + "date": Series(0, index=range(5), dtype=np.int64), + "val": Series(range(5), dtype=np.int64), + } + ) + df = frame_for_consistency.copy() + df.loc[:, "date"] = val + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency): + # GH 6149 + # coerce similarly for setitem and loc when rows have a null-slice + + expected = DataFrame( + { + "date": Series("foo", index=range(5)), + "val": Series(range(5), dtype=np.int64), + } + ) + df = frame_for_consistency.copy() + df.loc[:, "date"] = "foo" + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency): + # GH 6149 + # coerce similarly for setitem and loc when rows have a null-slice + expected = DataFrame( + { + "date": Series(1.0, index=range(5)), + "val": Series(range(5), dtype=np.int64), + } + ) + df = frame_for_consistency.copy() + df.loc[:, "date"] = 1.0 + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_single_row(self): + # GH 15494 + # setting on frame with single row + df = DataFrame({"date": Series([Timestamp("20180101")])}) + df.loc[:, "date"] = "string" + expected = DataFrame({"date": Series(["string"])}) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_empty(self): + # empty (essentially noops) + expected = DataFrame(columns=["x", "y"]) + expected["x"] = expected["x"].astype(np.int64) + df = DataFrame(columns=["x", "y"]) + df.loc[:, "x"] = 1 + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=["x", "y"]) + df["x"] = 1 + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_slice_column_len(self): + # .loc[:,column] setting with slice == len of the column + # GH10408 + levels = [ + ["Region_1"] * 4, + ["Site_1", "Site_1", "Site_2", "Site_2"], + [3987227376, 3980680971, 3977723249, 3977723089], + ] + mi = MultiIndex.from_arrays(levels, names=["Region", "Site", "RespondentID"]) + + clevels = [ + ["Respondent", "Respondent", "Respondent", "OtherCat", "OtherCat"], + ["Something", "StartDate", "EndDate", "Yes/No", "SomethingElse"], + ] + cols = MultiIndex.from_arrays(clevels, names=["Level_0", "Level_1"]) + + values = [ + ["A", "5/25/2015 10:59", "5/25/2015 11:22", "Yes", np.nan], + ["A", "5/21/2015 9:40", "5/21/2015 9:52", "Yes", "Yes"], + ["A", "5/20/2015 8:27", "5/20/2015 8:41", "Yes", np.nan], + ["A", "5/20/2015 8:33", "5/20/2015 9:09", "Yes", "No"], + ] + df = DataFrame(values, index=mi, columns=cols) + + df.loc[:, ("Respondent", "StartDate")] = to_datetime( + df.loc[:, ("Respondent", "StartDate")] + ) + df.loc[:, ("Respondent", "EndDate")] = to_datetime( + df.loc[:, ("Respondent", "EndDate")] + ) + df.loc[:, ("Respondent", "Duration")] = ( + df.loc[:, ("Respondent", "EndDate")] + - df.loc[:, ("Respondent", "StartDate")] + ) + + df.loc[:, ("Respondent", "Duration")] = df.loc[ + :, ("Respondent", "Duration") + ].astype("timedelta64[s]") + expected = Series( + [1380, 720, 840, 2160.0], index=df.index, name=("Respondent", "Duration") + ) + tm.assert_series_equal(df[("Respondent", "Duration")], expected) + + @pytest.mark.parametrize("unit", ["Y", "M", "D", "h", "m", "s", "ms", "us"]) + def test_loc_assign_non_ns_datetime(self, unit): + # GH 27395, non-ns dtype assignment via .loc should work + # and return the same result when using simple assignment + df = DataFrame( + { + "timestamp": [ + np.datetime64("2017-02-11 12:41:29"), + np.datetime64("1991-11-07 04:22:37"), + ] + } + ) + + df.loc[:, unit] = df.loc[:, "timestamp"].values.astype(f"datetime64[{unit}]") + df["expected"] = df.loc[:, "timestamp"].values.astype(f"datetime64[{unit}]") + expected = Series(df.loc[:, "expected"], name=unit) + tm.assert_series_equal(df.loc[:, unit], expected) + + def test_loc_modify_datetime(self): + # see gh-28837 + df = DataFrame.from_dict( + {"date": [1485264372711, 1485265925110, 1540215845888, 1540282121025]} + ) + + df["date_dt"] = to_datetime(df["date"], unit="ms", cache=True) + + df.loc[:, "date_dt_cp"] = df.loc[:, "date_dt"] + df.loc[[2, 3], "date_dt_cp"] = df.loc[[2, 3], "date_dt"] + + expected = DataFrame( + [ + [1485264372711, "2017-01-24 13:26:12.711", "2017-01-24 13:26:12.711"], + [1485265925110, "2017-01-24 13:52:05.110", "2017-01-24 13:52:05.110"], + [1540215845888, "2018-10-22 13:44:05.888", "2018-10-22 13:44:05.888"], + [1540282121025, "2018-10-23 08:08:41.025", "2018-10-23 08:08:41.025"], + ], + columns=["date", "date_dt", "date_dt_cp"], + ) + + columns = ["date_dt", "date_dt_cp"] + expected[columns] = expected[columns].apply(to_datetime) + + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame_with_reindex(self, using_array_manager): + # GH#6254 setting issue + df = DataFrame(index=[3, 5, 4], columns=["A"], dtype=float) + df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") + + # setting integer values into a float dataframe with loc is inplace, + # so we retain float dtype + ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float) + if using_array_manager: + # TODO(ArrayManager) with "split" path, we still overwrite the column + # and therefore don't take the dtype of the underlying object into account + ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64") + expected = DataFrame({"A": ser}) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame_with_reindex_mixed(self): + # GH#40480 + df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float) + df["B"] = "string" + df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") + ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64") + expected = DataFrame({"A": ser}) + expected["B"] = "string" + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame_with_inverted_slice(self): + # GH#40480 + df = DataFrame(index=[1, 2, 3], columns=["A", "B"], dtype=float) + df["B"] = "string" + df.loc[slice(3, 0, -1), "A"] = np.array([1, 2, 3], dtype="int64") + expected = DataFrame({"A": [3, 2, 1], "B": "string"}, index=[1, 2, 3]) + tm.assert_frame_equal(df, expected) + + # TODO(ArrayManager) "split" path overwrites column and therefore don't take + # the dtype of the underlying object into account + @td.skip_array_manager_not_yet_implemented + def test_loc_setitem_empty_frame(self): + # GH#6252 setting with an empty frame + keys1 = ["@" + str(i) for i in range(5)] + val1 = np.arange(5, dtype="int64") + + keys2 = ["@" + str(i) for i in range(4)] + val2 = np.arange(4, dtype="int64") + + index = list(set(keys1).union(keys2)) + df = DataFrame(index=index) + df["A"] = np.nan + df.loc[keys1, "A"] = val1 + + df["B"] = np.nan + df.loc[keys2, "B"] = val2 + + # Because df["A"] was initialized as float64, setting values into it + # is inplace, so that dtype is retained + sera = Series(val1, index=keys1, dtype=np.float64) + serb = Series(val2, index=keys2) + expected = DataFrame({"A": sera, "B": serb}).reindex(index=index) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame(self): + df = DataFrame(np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD")) + + result = df.iloc[0, 0] + + df.loc["a", "A"] = 1 + result = df.loc["a", "A"] + assert result == 1 + + result = df.iloc[0, 0] + assert result == 1 + + df.loc[:, "B":"D"] = 0 + expected = df.loc[:, "B":"D"] + result = df.iloc[:, 1:] + tm.assert_frame_equal(result, expected) + + def test_loc_setitem_frame_nan_int_coercion_invalid(self): + # GH 8669 + # invalid coercion of nan -> int + df = DataFrame({"A": [1, 2, 3], "B": np.nan}) + df.loc[df.B > df.A, "B"] = df.A + expected = DataFrame({"A": [1, 2, 3], "B": np.nan}) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame_mixed_labels(self): + # GH 6546 + # setting with mixed labels + df = DataFrame({1: [1, 2], 2: [3, 4], "a": ["a", "b"]}) + + result = df.loc[0, [1, 2]] + expected = Series( + [1, 3], index=Index([1, 2], dtype=object), dtype=object, name=0 + ) + tm.assert_series_equal(result, expected) + + expected = DataFrame({1: [5, 2], 2: [6, 4], "a": ["a", "b"]}) + df.loc[0, [1, 2]] = [5, 6] + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame_multiples(self): + # multiple setting + df = DataFrame( + {"A": ["foo", "bar", "baz"], "B": Series(range(3), dtype=np.int64)} + ) + rhs = df.loc[1:2] + rhs.index = df.index[0:2] + df.loc[0:1] = rhs + expected = DataFrame( + {"A": ["bar", "baz", "baz"], "B": Series([1, 2, 2], dtype=np.int64)} + ) + tm.assert_frame_equal(df, expected) + + # multiple setting with frame on rhs (with M8) + df = DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + expected = DataFrame( + { + "date": [ + Timestamp("20000101"), + Timestamp("20000102"), + Timestamp("20000101"), + Timestamp("20000102"), + Timestamp("20000103"), + ], + "val": Series([0, 1, 0, 1, 2], dtype=np.int64), + } + ) + rhs = df.loc[0:2] + rhs.index = df.index[2:5] + df.loc[2:4] = rhs + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "indexer", [["A"], slice(None, "A", None), np.array(["A"])] + ) + @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) + def test_loc_setitem_with_scalar_index(self, indexer, value): + # GH #19474 + # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated + # elementwisely, not using "setter('A', ['Z'])". + + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + df.loc[0, indexer] = value + result = df.loc[0, "A"] + + assert is_scalar(result) and result == "Z" + + @pytest.mark.parametrize( + "index,box,expected", + [ + ( + ([0, 2], ["A", "B", "C", "D"]), + 7, + DataFrame( + [[7, 7, 7, 7], [3, 4, np.nan, np.nan], [7, 7, 7, 7]], + columns=["A", "B", "C", "D"], + ), + ), + ( + (1, ["C", "D"]), + [7, 8], + DataFrame( + [[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]], + columns=["A", "B", "C", "D"], + ), + ), + ( + (1, ["A", "B", "C"]), + np.array([7, 8, 9], dtype=np.int64), + DataFrame( + [[1, 2, np.nan], [7, 8, 9], [5, 6, np.nan]], columns=["A", "B", "C"] + ), + ), + ( + (slice(1, 3, None), ["B", "C", "D"]), + [[7, 8, 9], [10, 11, 12]], + DataFrame( + [[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]], + columns=["A", "B", "C", "D"], + ), + ), + ( + (slice(1, 3, None), ["C", "A", "D"]), + np.array([[7, 8, 9], [10, 11, 12]], dtype=np.int64), + DataFrame( + [[1, 2, np.nan, np.nan], [8, 4, 7, 9], [11, 6, 10, 12]], + columns=["A", "B", "C", "D"], + ), + ), + ( + (slice(None, None, None), ["A", "C"]), + DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), + DataFrame( + [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"] + ), + ), + ], + ) + def test_loc_setitem_missing_columns(self, index, box, expected): + # GH 29334 + df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) + df.loc[index] = box + tm.assert_frame_equal(df, expected) + + def test_loc_coercion(self): + + # GH#12411 + df = DataFrame({"date": [Timestamp("20130101").tz_localize("UTC"), pd.NaT]}) + expected = df.dtypes + + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) + + def test_loc_coercion2(self): + # GH#12045 + import datetime + + df = DataFrame( + {"date": [datetime.datetime(2012, 1, 1), datetime.datetime(1012, 1, 2)]} + ) + expected = df.dtypes + + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) + + def test_loc_coercion3(self): + # GH#11594 + df = DataFrame({"text": ["some words"] + [None] * 9}) + expected = df.dtypes + + result = df.iloc[0:2] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[3:] + tm.assert_series_equal(result.dtypes, expected) + + def test_setitem_new_key_tz(self, indexer_sl): + # GH#12862 should not raise on assigning the second value + vals = [ + to_datetime(42).tz_localize("UTC"), + to_datetime(666).tz_localize("UTC"), + ] + expected = Series(vals, index=["foo", "bar"]) + + ser = Series(dtype=object) + indexer_sl(ser)["foo"] = vals[0] + indexer_sl(ser)["bar"] = vals[1] + + tm.assert_series_equal(ser, expected) + + def test_loc_non_unique(self): + # GH3659 + # non-unique indexer with loc slice + # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs + + # these are going to raise because the we are non monotonic + df = DataFrame( + {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3] + ) + msg = "'Cannot get left slice bound for non-unique label: 1'" + with pytest.raises(KeyError, match=msg): + df.loc[1:] + msg = "'Cannot get left slice bound for non-unique label: 0'" + with pytest.raises(KeyError, match=msg): + df.loc[0:] + msg = "'Cannot get left slice bound for non-unique label: 1'" + with pytest.raises(KeyError, match=msg): + df.loc[1:2] + + # monotonic are ok + df = DataFrame( + {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3] + ).sort_index(axis=0) + result = df.loc[1:] + expected = DataFrame({"A": [2, 4, 5, 6], "B": [4, 6, 7, 8]}, index=[1, 1, 2, 3]) + tm.assert_frame_equal(result, expected) + + result = df.loc[0:] + tm.assert_frame_equal(result, df) + + result = df.loc[1:2] + expected = DataFrame({"A": [2, 4, 5], "B": [4, 6, 7]}, index=[1, 1, 2]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.arm_slow + def test_loc_non_unique_memory_error(self): + + # GH 4280 + # non_unique index with a large selection triggers a memory error + + columns = list("ABCDEFG") + + def gen_test(length, l2): + return pd.concat( + [ + DataFrame( + np.random.randn(length, len(columns)), + index=np.arange(length), + columns=columns, + ), + DataFrame( + np.ones((l2, len(columns))), index=[0] * l2, columns=columns + ), + ] + ) + + def gen_expected(df, mask): + len_mask = len(mask) + return pd.concat( + [ + df.take([0]), + DataFrame( + np.ones((len_mask, len(columns))), + index=[0] * len_mask, + columns=columns, + ), + df.take(mask[1:]), + ] + ) + + df = gen_test(900, 100) + assert df.index.is_unique is False + + mask = np.arange(100) + result = df.loc[mask] + expected = gen_expected(df, mask) + tm.assert_frame_equal(result, expected) + + df = gen_test(900000, 100000) + assert df.index.is_unique is False + + mask = np.arange(100000) + result = df.loc[mask] + expected = gen_expected(df, mask) + tm.assert_frame_equal(result, expected) + + def test_loc_name(self): + # GH 3880 + df = DataFrame([[1, 1], [1, 1]]) + df.index.name = "index_name" + result = df.iloc[[0, 1]].index.name + assert result == "index_name" + + result = df.loc[[0, 1]].index.name + assert result == "index_name" + + def test_loc_empty_list_indexer_is_ok(self): + + df = tm.makeCustomDataframe(5, 2) + # vertical empty + tm.assert_frame_equal( + df.loc[:, []], df.iloc[:, :0], check_index_type=True, check_column_type=True + ) + # horizontal empty + tm.assert_frame_equal( + df.loc[[], :], df.iloc[:0, :], check_index_type=True, check_column_type=True + ) + # horizontal empty + tm.assert_frame_equal( + df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True + ) + + def test_identity_slice_returns_new_object(self, using_array_manager, request): + # GH13873 + if using_array_manager: + mark = pytest.mark.xfail( + reason="setting with .loc[:, 'a'] does not alter inplace" + ) + request.node.add_marker(mark) + + original_df = DataFrame({"a": [1, 2, 3]}) + sliced_df = original_df.loc[:] + assert sliced_df is not original_df + assert original_df[:] is not original_df + + # should be a shallow copy + assert np.shares_memory(original_df["a"]._values, sliced_df["a"]._values) + + # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig + original_df.loc[:, "a"] = [4, 4, 4] + assert (sliced_df["a"] == 4).all() + + # These should not return copies + assert original_df is original_df.loc[:, :] + df = DataFrame(np.random.randn(10, 4)) + assert df[0] is df.loc[:, 0] + + # Same tests for Series + original_series = Series([1, 2, 3, 4, 5, 6]) + sliced_series = original_series.loc[:] + assert sliced_series is not original_series + assert original_series[:] is not original_series + + original_series[:3] = [7, 8, 9] + assert all(sliced_series[:3] == [7, 8, 9]) + + @pytest.mark.xfail(reason="accidental fix reverted - GH37497") + def test_loc_copy_vs_view(self): + # GH 15631 + x = DataFrame(zip(range(3), range(3)), columns=["a", "b"]) + + y = x.copy() + q = y.loc[:, "a"] + q += 2 + + tm.assert_frame_equal(x, y) + + z = x.copy() + q = z.loc[x.index, "a"] + q += 2 + + tm.assert_frame_equal(x, z) + + def test_loc_uint64(self): + # GH20722 + # Test whether loc accept uint64 max value as index. + umax = np.iinfo("uint64").max + ser = Series([1, 2], index=[umax - 1, umax]) + + result = ser.loc[umax - 1] + expected = ser.iloc[0] + assert result == expected + + result = ser.loc[[umax - 1]] + expected = ser.iloc[[0]] + tm.assert_series_equal(result, expected) + + result = ser.loc[[umax - 1, umax]] + tm.assert_series_equal(result, ser) + + def test_loc_uint64_disallow_negative(self): + # GH#41775 + umax = np.iinfo("uint64").max + ser = Series([1, 2], index=[umax - 1, umax]) + + with pytest.raises(KeyError, match="-1"): + # don't wrap around + ser.loc[-1] + + with pytest.raises(KeyError, match="-1"): + # don't wrap around + ser.loc[[-1]] + + def test_loc_setitem_empty_append_expands_rows(self): + # GH6173, various appends to an empty dataframe + + data = [1, 2, 3] + expected = DataFrame({"x": data, "y": [None] * len(data)}) + + # appends to fit length of data + df = DataFrame(columns=["x", "y"]) + df.loc[:, "x"] = data + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self): + # GH#37932 same as test_loc_setitem_empty_append_expands_rows + # but with mixed dtype so we go through take_split_path + data = [1, 2, 3] + expected = DataFrame({"x": data, "y": [None] * len(data)}) + + df = DataFrame(columns=["x", "y"]) + df["x"] = df["x"].astype(np.int64) + df.loc[:, "x"] = data + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_empty_append_single_value(self): + # only appends one value + expected = DataFrame({"x": [1.0], "y": [np.nan]}) + df = DataFrame(columns=["x", "y"], dtype=float) + df.loc[0, "x"] = expected.loc[0, "x"] + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_empty_append_raises(self): + # GH6173, various appends to an empty dataframe + + data = [1, 2] + df = DataFrame(columns=["x", "y"]) + df.index = df.index.astype(np.int64) + msg = ( + r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] " + r"are in the \[index\]" + ) + with pytest.raises(KeyError, match=msg): + df.loc[[0, 1], "x"] = data + + msg = "|".join( + [ + "cannot copy sequence with size 2 to array axis with dimension 0", + r"could not broadcast input array from shape \(2,\) into shape \(0,\)", + "Must have equal len keys and value when setting with an iterable", + ] + ) + with pytest.raises(ValueError, match=msg): + df.loc[0:2, "x"] = data + + def test_indexing_zerodim_np_array(self): + # GH24924 + df = DataFrame([[1, 2], [3, 4]]) + result = df.loc[np.array(0)] + s = Series([1, 2], name=0) + tm.assert_series_equal(result, s) + + def test_series_indexing_zerodim_np_array(self): + # GH24924 + s = Series([1, 2]) + result = s.loc[np.array(0)] + assert result == 1 + + def test_loc_reverse_assignment(self): + # GH26939 + data = [1, 2, 3, 4, 5, 6] + [None] * 4 + expected = Series(data, index=range(2010, 2020)) + + result = Series(index=range(2010, 2020), dtype=np.float64) + result.loc[2015:2010:-1] = [6, 5, 4, 3, 2, 1] + + tm.assert_series_equal(result, expected) + + def test_loc_setitem_str_to_small_float_conversion_type(self): + # GH#20388 + np.random.seed(13) + col_data = [str(np.random.random() * 1e-12) for _ in range(5)] + result = DataFrame(col_data, columns=["A"]) + expected = DataFrame(col_data, columns=["A"], dtype=object) + tm.assert_frame_equal(result, expected) + + # assigning with loc/iloc attempts to set the values inplace, which + # in this case is successful + result.loc[result.index, "A"] = [float(x) for x in col_data] + expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object) + tm.assert_frame_equal(result, expected) + + # assigning the entire column using __setitem__ swaps in the new array + # GH#??? + result["A"] = [float(x) for x in col_data] + expected = DataFrame(col_data, columns=["A"], dtype=float) + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_time_object(self, frame_or_series): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + mask = (rng.hour == 9) & (rng.minute == 30) + + obj = DataFrame(np.random.randn(len(rng), 3), index=rng) + obj = tm.get_obj(obj, frame_or_series) + + result = obj.loc[time(9, 30)] + exp = obj.loc[mask] + tm.assert_equal(result, exp) + + chunk = obj.loc["1/4/2000":] + result = chunk.loc[time(9, 30)] + expected = result[-1:] + + # Without resetting the freqs, these are 5 min and 1440 min, respectively + result.index = result.index._with_freq(None) + expected.index = expected.index._with_freq(None) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"]) + @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex]) + @td.skip_if_no_scipy + def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype): + import scipy.sparse + + spmatrix_t = getattr(scipy.sparse, spmatrix_t) + + # The bug is triggered by a sparse matrix with purely sparse columns. So the + # recipe below generates a rectangular matrix of dimension (5, 7) where all the + # diagonal cells are ones, meaning the last two columns are purely sparse. + rows, cols = 5, 7 + spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype) + df = DataFrame.sparse.from_spmatrix(spmatrix) + + # regression test for GH#34526 + itr_idx = range(2, rows) + result = df.loc[itr_idx].values + expected = spmatrix.toarray()[itr_idx] + tm.assert_numpy_array_equal(result, expected) + + # regression test for GH#34540 + result = df.loc[itr_idx].dtypes.values + expected = np.full(cols, SparseDtype(dtype, fill_value=0)) + tm.assert_numpy_array_equal(result, expected) + + def test_loc_getitem_listlike_all_retains_sparse(self): + df = DataFrame({"A": pd.array([0, 0], dtype=SparseDtype("int64"))}) + result = df.loc[[0, 1]] + tm.assert_frame_equal(result, df) + + @td.skip_if_no_scipy + def test_loc_getitem_sparse_frame(self): + # GH34687 + from scipy.sparse import eye + + df = DataFrame.sparse.from_spmatrix(eye(5)) + result = df.loc[range(2)] + expected = DataFrame( + [[1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0]], + dtype=SparseDtype("float64", 0.0), + ) + tm.assert_frame_equal(result, expected) + + result = df.loc[range(2)].loc[range(1)] + expected = DataFrame( + [[1.0, 0.0, 0.0, 0.0, 0.0]], dtype=SparseDtype("float64", 0.0) + ) + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_sparse_series(self): + # GH34687 + s = Series([1.0, 0.0, 0.0, 0.0, 0.0], dtype=SparseDtype("float64", 0.0)) + + result = s.loc[range(2)] + expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) + tm.assert_series_equal(result, expected) + + result = s.loc[range(3)].loc[range(2)] + expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) + def test_loc_getitem_iterable(self, float_frame, key_type): + idx = key_type(["A", "B", "C"]) + result = float_frame.loc[:, idx] + expected = float_frame.loc[:, ["A", "B", "C"]] + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_timedelta_0seconds(self): + # GH#10583 + df = DataFrame(np.random.normal(size=(10, 4))) + df.index = timedelta_range(start="0s", periods=10, freq="s") + expected = df.loc[Timedelta("0s") :, :] + result = df.loc["0s":, :] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "val,expected", [(2 ** 63 - 1, Series([1])), (2 ** 63, Series([2]))] + ) + def test_loc_getitem_uint64_scalar(self, val, expected): + # see GH#19399 + df = DataFrame([1, 2], index=[2 ** 63 - 1, 2 ** 63]) + result = df.loc[val] + + expected.name = val + tm.assert_series_equal(result, expected) + + def test_loc_setitem_int_label_with_float64index(self): + # note labels are floats + ser = Series(["a", "b", "c"], index=[0, 0.5, 1]) + expected = ser.copy() + + ser.loc[1] = "zoo" + expected.iloc[2] = "zoo" + + tm.assert_series_equal(ser, expected) + + @pytest.mark.parametrize( + "indexer, expected", + [ + # The test name is a misnomer in the 0 case as df.index[indexer] + # is a scalar. + (0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + (slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]), + ([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9]), + ], + ) + def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected): + # GH#16637 + tdi = to_timedelta(range(10), unit="s") + df = DataFrame({"x": range(10)}, dtype="int64", index=tdi) + + df.loc[df.index[indexer], "x"] = 20 + + expected = DataFrame( + expected, + index=tdi, + columns=["x"], + dtype="int64", + ) + + tm.assert_frame_equal(expected, df) + + def test_loc_setitem_categorical_values_partial_column_slice(self): + # Assigning a Category to parts of a int/... column uses the values of + # the Categorical + df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")}) + exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")}) + df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"]) + df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"]) + tm.assert_frame_equal(df, exp) + + def test_loc_setitem_single_row_categorical(self): + # GH#25495 + df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) + categories = Categorical(df["Alpha"], categories=["a", "b", "c"]) + df.loc[:, "Alpha"] = categories + + result = df["Alpha"] + expected = Series(categories, index=df.index, name="Alpha") + tm.assert_series_equal(result, expected) + + def test_loc_setitem_datetime_coercion(self): + # GH#1048 + df = DataFrame({"c": [Timestamp("2010-10-01")] * 3}) + df.loc[0:1, "c"] = np.datetime64("2008-08-08") + assert Timestamp("2008-08-08") == df.loc[0, "c"] + assert Timestamp("2008-08-08") == df.loc[1, "c"] + df.loc[2, "c"] = date(2005, 5, 5) + with tm.assert_produces_warning(FutureWarning): + # Comparing Timestamp to date obj is deprecated + assert Timestamp("2005-05-05") == df.loc[2, "c"] + assert Timestamp("2005-05-05").date() == df.loc[2, "c"] + + @pytest.mark.parametrize("idxer", ["var", ["var"]]) + def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): + # GH#11365 + tz = tz_naive_fixture + idx = date_range(start="2015-07-12", periods=3, freq="H", tz=tz) + expected = DataFrame(1.2, index=idx, columns=["var"]) + # if result started off with object dtype, then the .loc.__setitem__ + # below would retain object dtype + result = DataFrame(index=idx, columns=["var"], dtype=np.float64) + result.loc[:, idxer] = expected + tm.assert_frame_equal(result, expected) + + def test_loc_setitem_time_key(self, using_array_manager): + index = date_range("2012-01-01", "2012-01-05", freq="30min") + df = DataFrame(np.random.randn(len(index), 5), index=index) + akey = time(12, 0, 0) + bkey = slice(time(13, 0, 0), time(14, 0, 0)) + ainds = [24, 72, 120, 168] + binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172] + + result = df.copy() + result.loc[akey] = 0 + result = result.loc[akey] + expected = df.loc[akey].copy() + expected.loc[:] = 0 + if using_array_manager: + # TODO(ArrayManager) we are still overwriting columns + expected = expected.astype(float) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.loc[akey] = 0 + result.loc[akey] = df.iloc[ainds] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[bkey] = 0 + result = result.loc[bkey] + expected = df.loc[bkey].copy() + expected.loc[:] = 0 + if using_array_manager: + # TODO(ArrayManager) we are still overwriting columns + expected = expected.astype(float) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.loc[bkey] = 0 + result.loc[bkey] = df.iloc[binds] + tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize("key", ["A", ["A"], ("A", slice(None))]) + def test_loc_setitem_unsorted_multiindex_columns(self, key): + # GH#38601 + mi = MultiIndex.from_tuples([("A", 4), ("B", "3"), ("A", "2")]) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi) + obj = df.copy() + obj.loc[:, key] = np.zeros((2, 2), dtype=int) + expected = DataFrame([[0, 2, 0], [0, 5, 0]], columns=mi) + tm.assert_frame_equal(obj, expected) + + df = df.sort_index(axis=1) + df.loc[:, key] = np.zeros((2, 2), dtype=int) + expected = expected.sort_index(axis=1) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_uint_drop(self, any_int_numpy_dtype): + # see GH#18311 + # assigning series.loc[0] = 4 changed series.dtype to int + series = Series([1, 2, 3], dtype=any_int_numpy_dtype) + series.loc[0] = 4 + expected = Series([4, 2, 3], dtype=any_int_numpy_dtype) + tm.assert_series_equal(series, expected) + + def test_loc_setitem_td64_non_nano(self): + # GH#14155 + ser = Series(10 * [np.timedelta64(10, "m")]) + ser.loc[[1, 2, 3]] = np.timedelta64(20, "m") + expected = Series(10 * [np.timedelta64(10, "m")]) + expected.loc[[1, 2, 3]] = Timedelta(np.timedelta64(20, "m")) + tm.assert_series_equal(ser, expected) + + def test_loc_setitem_2d_to_1d_raises(self): + data = np.random.randn(2, 2) + ser = Series(range(2)) + + msg = "|".join( + [ + r"shape mismatch: value array of shape \(2,2\)", + r"cannot reshape array of size 4 into shape \(2,\)", + ] + ) + with pytest.raises(ValueError, match=msg): + ser.loc[range(2)] = data + + msg = r"could not broadcast input array from shape \(2,2\) into shape \(2,?\)" + with pytest.raises(ValueError, match=msg): + ser.loc[:] = data + + def test_loc_getitem_interval_index(self): + # GH#19977 + index = pd.interval_range(start=0, periods=3) + df = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"] + ) + + expected = 1 + result = df.loc[0.5, "A"] + tm.assert_almost_equal(result, expected) + + def test_loc_getitem_interval_index2(self): + # GH#19977 + index = pd.interval_range(start=0, periods=3, closed="both") + df = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"] + ) + + index_exp = pd.interval_range(start=0, periods=2, freq=1, closed="both") + expected = Series([1, 4], index=index_exp, name="A") + result = df.loc[1, "A"] + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tpl", [(1,), (1, 2)]) + def test_loc_getitem_index_single_double_tuples(self, tpl): + # GH#20991 + idx = Index( + [(1,), (1, 2)], + name="A", + tupleize_cols=False, + ) + df = DataFrame(index=idx) + + result = df.loc[[tpl]] + idx = Index([tpl], name="A", tupleize_cols=False) + expected = DataFrame(index=idx) + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_index_namedtuple(self): + IndexType = namedtuple("IndexType", ["a", "b"]) + idx1 = IndexType("foo", "bar") + idx2 = IndexType("baz", "bof") + index = Index([idx1, idx2], name="composite_index", tupleize_cols=False) + df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"]) + + result = df.loc[IndexType("foo", "bar")]["A"] + assert result == 1 + + def test_loc_setitem_single_column_mixed(self): + df = DataFrame( + np.random.randn(5, 3), + index=["a", "b", "c", "d", "e"], + columns=["foo", "bar", "baz"], + ) + df["str"] = "qux" + df.loc[df.index[::2], "str"] = np.nan + expected = np.array([np.nan, "qux", np.nan, "qux", np.nan], dtype=object) + tm.assert_almost_equal(df["str"].values, expected) + + def test_loc_setitem_cast2(self): + # GH#7704 + # dtype conversion on setting + df = DataFrame(np.random.rand(30, 3), columns=tuple("ABC")) + df["event"] = np.nan + df.loc[10, "event"] = "foo" + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 3 + [np.dtype("object")], + index=["A", "B", "C", "event"], + ) + tm.assert_series_equal(result, expected) + + def test_loc_setitem_cast3(self): + # Test that data type is preserved . GH#5782 + df = DataFrame({"one": np.arange(6, dtype=np.int8)}) + df.loc[1, "one"] = 6 + assert df.dtypes.one == np.dtype(np.int8) + df.one = np.int8(7) + assert df.dtypes.one == np.dtype(np.int8) + + +class TestLocWithEllipsis: + @pytest.fixture(params=[tm.loc, tm.iloc]) + def indexer(self, request): + # Test iloc while we're here + return request.param + + @pytest.fixture + def obj(self, series_with_simple_index, frame_or_series): + obj = series_with_simple_index + if frame_or_series is not Series: + obj = obj.to_frame() + return obj + + def test_loc_iloc_getitem_ellipsis(self, obj, indexer): + result = indexer(obj)[...] + tm.assert_equal(result, obj) + + def test_loc_iloc_getitem_leading_ellipses(self, series_with_simple_index, indexer): + obj = series_with_simple_index + key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0] + + if indexer is tm.loc and obj.index.is_boolean(): + # passing [False] will get interpreted as a boolean mask + # TODO: should it? unambiguous when lengths dont match? + return + if indexer is tm.loc and isinstance(obj.index, MultiIndex): + msg = "MultiIndex does not support indexing with Ellipsis" + with pytest.raises(NotImplementedError, match=msg): + result = indexer(obj)[..., [key]] + + elif len(obj) != 0: + result = indexer(obj)[..., [key]] + expected = indexer(obj)[[key]] + tm.assert_series_equal(result, expected) + + key2 = 0 if indexer is tm.iloc else obj.name + df = obj.to_frame() + result = indexer(df)[..., [key2]] + expected = indexer(df)[:, [key2]] + tm.assert_frame_equal(result, expected) + + def test_loc_iloc_getitem_ellipses_only_one_ellipsis(self, obj, indexer): + # GH37750 + key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0] + + with pytest.raises(IndexingError, match=_one_ellipsis_message): + indexer(obj)[..., ...] + + with pytest.raises(IndexingError, match=_one_ellipsis_message): + indexer(obj)[..., [key], ...] + + with pytest.raises(IndexingError, match=_one_ellipsis_message): + indexer(obj)[..., ..., key] + + # one_ellipsis_message takes precedence over "Too many indexers" + # only when the first key is Ellipsis + with pytest.raises(IndexingError, match="Too many indexers"): + indexer(obj)[key, ..., ...] + + +class TestLocWithMultiIndex: + @pytest.mark.parametrize( + "keys, expected", + [ + (["b", "a"], [["b", "b", "a", "a"], [1, 2, 1, 2]]), + (["a", "b"], [["a", "a", "b", "b"], [1, 2, 1, 2]]), + ((["a", "b"], [1, 2]), [["a", "a", "b", "b"], [1, 2, 1, 2]]), + ((["a", "b"], [2, 1]), [["a", "a", "b", "b"], [2, 1, 2, 1]]), + ((["b", "a"], [2, 1]), [["b", "b", "a", "a"], [2, 1, 2, 1]]), + ((["b", "a"], [1, 2]), [["b", "b", "a", "a"], [1, 2, 1, 2]]), + ((["c", "a"], [2, 1]), [["c", "a", "a"], [1, 2, 1]]), + ], + ) + @pytest.mark.parametrize("dim", ["index", "columns"]) + def test_loc_getitem_multilevel_index_order(self, dim, keys, expected): + # GH#22797 + # Try to respect order of keys given for MultiIndex.loc + kwargs = {dim: [["c", "a", "a", "b", "b"], [1, 1, 2, 1, 2]]} + df = DataFrame(np.arange(25).reshape(5, 5), **kwargs) + exp_index = MultiIndex.from_arrays(expected) + if dim == "index": + res = df.loc[keys, :] + tm.assert_index_equal(res.index, exp_index) + elif dim == "columns": + res = df.loc[:, keys] + tm.assert_index_equal(res.columns, exp_index) + + def test_loc_preserve_names(self, multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + + result = ymd.loc[2000] + result2 = ymd["A"].loc[2000] + assert result.index.names == ymd.index.names[1:] + assert result2.index.names == ymd.index.names[1:] + + result = ymd.loc[2000, 2] + result2 = ymd["A"].loc[2000, 2] + assert result.index.name == ymd.index.names[2] + assert result2.index.name == ymd.index.names[2] + + def test_loc_getitem_multiindex_nonunique_len_zero(self): + # GH#13691 + mi = MultiIndex.from_product([[0], [1, 1]]) + ser = Series(0, index=mi) + + res = ser.loc[[]] + + expected = ser[:0] + tm.assert_series_equal(res, expected) + + res2 = ser.loc[ser.iloc[0:0]] + tm.assert_series_equal(res2, expected) + + def test_loc_getitem_access_none_value_in_multiindex(self): + # GH#34318: test that you can access a None value using .loc + # through a Multiindex + + ser = Series([None], MultiIndex.from_arrays([["Level1"], ["Level2"]])) + result = ser.loc[("Level1", "Level2")] + assert result is None + + midx = MultiIndex.from_product([["Level1"], ["Level2_a", "Level2_b"]]) + ser = Series([None] * len(midx), dtype=object, index=midx) + result = ser.loc[("Level1", "Level2_a")] + assert result is None + + ser = Series([1] * len(midx), dtype=object, index=midx) + result = ser.loc[("Level1", "Level2_a")] + assert result == 1 + + def test_loc_setitem_multiindex_slice(self): + # GH 34870 + + index = MultiIndex.from_tuples( + zip( + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ), + names=["first", "second"], + ) + + result = Series([1, 1, 1, 1, 1, 1, 1, 1], index=index) + result.loc[("baz", "one"):("foo", "two")] = 100 + + expected = Series([1, 1, 100, 100, 100, 100, 1, 1], index=index) + + tm.assert_series_equal(result, expected) + + def test_loc_getitem_slice_datetime_objs_with_datetimeindex(self): + times = date_range("2000-01-01", freq="10min", periods=100000) + ser = Series(range(100000), times) + result = ser.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)] + tm.assert_series_equal(result, ser) + + def test_loc_getitem_datetime_string_with_datetimeindex(self): + # GH 16710 + df = DataFrame( + {"a": range(10), "b": range(10)}, + index=date_range("2010-01-01", "2010-01-10"), + ) + result = df.loc[["2010-01-01", "2010-01-05"], ["a", "b"]] + expected = DataFrame( + {"a": [0, 4], "b": [0, 4]}, + index=DatetimeIndex(["2010-01-01", "2010-01-05"]), + ) + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_sorted_index_level_with_duplicates(self): + # GH#4516 sorting a MultiIndex with duplicates and multiple dtypes + mi = MultiIndex.from_tuples( + [ + ("foo", "bar"), + ("foo", "bar"), + ("bah", "bam"), + ("bah", "bam"), + ("foo", "bar"), + ("bah", "bam"), + ], + names=["A", "B"], + ) + df = DataFrame( + [ + [1.0, 1], + [2.0, 2], + [3.0, 3], + [4.0, 4], + [5.0, 5], + [6.0, 6], + ], + index=mi, + columns=["C", "D"], + ) + df = df.sort_index(level=0) + + expected = DataFrame( + [[1.0, 1], [2.0, 2], [5.0, 5]], columns=["C", "D"], index=mi.take([0, 1, 4]) + ) + + result = df.loc[("foo", "bar")] + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_preserves_index_level_category_dtype(self): + # GH#15166 + df = DataFrame( + data=np.arange(2, 22, 2), + index=MultiIndex( + levels=[CategoricalIndex(["a", "b"]), range(10)], + codes=[[0] * 5 + [1] * 5, range(10)], + names=["Index1", "Index2"], + ), + ) + + expected = CategoricalIndex( + ["a", "b"], + categories=["a", "b"], + ordered=False, + name="Index1", + dtype="category", + ) + + result = df.index.levels[0] + tm.assert_index_equal(result, expected) + + result = df.loc[["a"]].index.levels[0] + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("lt_value", [30, 10]) + def test_loc_multiindex_levels_contain_values_not_in_index_anymore(self, lt_value): + # GH#41170 + df = DataFrame({"a": [12, 23, 34, 45]}, index=[list("aabb"), [0, 1, 2, 3]]) + with pytest.raises(KeyError, match=r"\['b'\] not in index"): + df.loc[df["a"] < lt_value, :].loc[["b"], :] + + def test_loc_multiindex_null_slice_na_level(self): + # GH#42055 + lev1 = np.array([np.nan, np.nan]) + lev2 = ["bar", "baz"] + mi = MultiIndex.from_arrays([lev1, lev2]) + ser = Series([0, 1], index=mi) + result = ser.loc[:, "bar"] + + # TODO: should we have name="bar"? + expected = Series([0], index=[np.nan]) + tm.assert_series_equal(result, expected) + + def test_loc_drops_level(self): + # Based on test_series_varied_multiindex_alignment, where + # this used to fail to drop the first level + mi = MultiIndex.from_product( + [list("ab"), list("xy"), [1, 2]], names=["ab", "xy", "num"] + ) + ser = Series(range(8), index=mi) + + loc_result = ser.loc["a", :, :] + expected = ser.index.droplevel(0)[:4] + tm.assert_index_equal(loc_result.index, expected) + + +class TestLocSetitemWithExpansion: + @pytest.mark.slow + def test_loc_setitem_with_expansion_large_dataframe(self): + # GH#10692 + result = DataFrame({"x": range(10 ** 6)}, dtype="int64") + result.loc[len(result)] = len(result) + 1 + expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64") + tm.assert_frame_equal(result, expected) + + def test_loc_setitem_empty_series(self): + # GH#5226 + + # partially set with an empty object series + ser = Series(dtype=object) + ser.loc[1] = 1 + tm.assert_series_equal(ser, Series([1], index=[1])) + ser.loc[3] = 3 + tm.assert_series_equal(ser, Series([1, 3], index=[1, 3])) + + ser = Series(dtype=object) + ser.loc[1] = 1.0 + tm.assert_series_equal(ser, Series([1.0], index=[1])) + ser.loc[3] = 3.0 + tm.assert_series_equal(ser, Series([1.0, 3.0], index=[1, 3])) + + ser = Series(dtype=object) + ser.loc["foo"] = 1 + tm.assert_series_equal(ser, Series([1], index=["foo"])) + ser.loc["bar"] = 3 + tm.assert_series_equal(ser, Series([1, 3], index=["foo", "bar"])) + ser.loc[3] = 4 + tm.assert_series_equal(ser, Series([1, 3, 4], index=["foo", "bar", 3])) + + def test_loc_setitem_incremental_with_dst(self): + # GH#20724 + base = datetime(2015, 11, 1, tzinfo=gettz("US/Pacific")) + idxs = [base + timedelta(seconds=i * 900) for i in range(16)] + result = Series([0], index=[idxs[0]]) + for ts in idxs: + result.loc[ts] = 1 + expected = Series(1, index=idxs) + tm.assert_series_equal(result, expected) + + def test_loc_setitem_datetime_keys_cast(self): + # GH#9516 + dt1 = Timestamp("20130101 09:00:00") + dt2 = Timestamp("20130101 10:00:00") + + for conv in [ + lambda x: x, + lambda x: x.to_datetime64(), + lambda x: x.to_pydatetime(), + lambda x: np.datetime64(x), + ]: + + df = DataFrame() + df.loc[conv(dt1), "one"] = 100 + df.loc[conv(dt2), "one"] = 200 + + expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2]) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_categorical_column_retains_dtype(self, ordered): + # GH16360 + result = DataFrame({"A": [1]}) + result.loc[:, "B"] = Categorical(["b"], ordered=ordered) + expected = DataFrame({"A": [1], "B": Categorical(["b"], ordered=ordered)}) + tm.assert_frame_equal(result, expected) + + def test_loc_setitem_with_expansion_and_existing_dst(self): + # GH#18308 + start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid") + end = Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid") + ts = Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid") + idx = date_range(start, end, inclusive="left", freq="H") + assert ts not in idx # i.e. result.loc setitem is with-expansion + + result = DataFrame(index=idx, columns=["value"]) + result.loc[ts, "value"] = 12 + expected = DataFrame( + [np.nan] * len(idx) + [12], + index=idx.append(DatetimeIndex([ts])), + columns=["value"], + dtype=object, + ) + tm.assert_frame_equal(result, expected) + + def test_setitem_with_expansion(self): + # indexing - setting an element + df = DataFrame( + data=to_datetime(["2015-03-30 20:12:32", "2015-03-12 00:11:11"]), + columns=["time"], + ) + df["new_col"] = ["new", "old"] + df.time = df.set_index("time").index.tz_localize("UTC") + v = df[df.new_col == "new"].set_index("time").index.tz_convert("US/Pacific") + + # trying to set a single element on a part of a different timezone + # this converts to object + df2 = df.copy() + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + df2.loc[df2.new_col == "new", "time"] = v + + expected = Series([v[0], df.loc[1, "time"]], name="time") + tm.assert_series_equal(df2.time, expected) + + v = df.loc[df.new_col == "new", "time"] + Timedelta("1s") + df.loc[df.new_col == "new", "time"] = v + tm.assert_series_equal(df.loc[df.new_col == "new", "time"], v) + + def test_loc_setitem_with_expansion_inf_upcast_empty(self): + # Test with np.inf in columns + df = DataFrame() + df.loc[0, 0] = 1 + df.loc[1, 1] = 2 + df.loc[0, np.inf] = 3 + + result = df.columns + expected = Float64Index([0, 1, np.inf]) + tm.assert_index_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:indexing past lexsort depth") + def test_loc_setitem_with_expansion_nonunique_index(self, index, request): + # GH#40096 + if not len(index): + return + + index = index.repeat(2) # ensure non-unique + N = len(index) + arr = np.arange(N).astype(np.int64) + + orig = DataFrame(arr, index=index, columns=[0]) + + # key that will requiring object-dtype casting in the index + key = "kapow" + assert key not in index # otherwise test is invalid + # TODO: using a tuple key breaks here in many cases + + exp_index = index.insert(len(index), key) + if isinstance(index, MultiIndex): + assert exp_index[-1][0] == key + else: + assert exp_index[-1] == key + exp_data = np.arange(N + 1).astype(np.float64) + expected = DataFrame(exp_data, index=exp_index, columns=[0]) + + # Add new row, but no new columns + df = orig.copy() + df.loc[key, 0] = N + tm.assert_frame_equal(df, expected) + + # add new row on a Series + ser = orig.copy()[0] + ser.loc[key] = N + # the series machinery lets us preserve int dtype instead of float + expected = expected[0].astype(np.int64) + tm.assert_series_equal(ser, expected) + + # add new row and new column + df = orig.copy() + df.loc[key, 1] = N + expected = DataFrame( + {0: list(arr) + [np.nan], 1: [np.nan] * N + [float(N)]}, + index=exp_index, + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "dtype", ["Int32", "Int64", "UInt32", "UInt64", "Float32", "Float64"] + ) + def test_loc_setitem_with_expansion_preserves_nullable_int(self, dtype): + # GH#42099 + ser = Series([0, 1, 2, 3], dtype=dtype) + df = DataFrame({"data": ser}) + + result = DataFrame(index=df.index) + result.loc[df.index, "data"] = ser + + tm.assert_frame_equal(result, df) + + result = DataFrame(index=df.index) + result.loc[df.index, "data"] = ser._values + tm.assert_frame_equal(result, df) + + +class TestLocCallable: + def test_frame_loc_getitem_callable(self): + # GH#11485 + df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) + # iloc cannot use boolean Series (see GH3635) + + # return bool indexer + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.B == "b", :] + tm.assert_frame_equal(res, df.loc[df.B == "b", :]) + + res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] + tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) + + res = df.loc[lambda x: x.A > 2, lambda x: "B"] + tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) + + res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) + + # scalar + res = df.loc[lambda x: 1, lambda x: "A"] + assert res == df.loc[1, "A"] + + def test_frame_loc_getitem_callable_mixture(self): + # GH#11485 + df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) + + res = df.loc[lambda x: x.A > 2, ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[[2, 3], lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) + + res = df.loc[3, lambda x: ["A", "B"]] + tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) + + def test_frame_loc_getitem_callable_labels(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return label + res = df.loc[lambda x: ["A", "C"]] + tm.assert_frame_equal(res, df.loc[["A", "C"]]) + + res = df.loc[lambda x: ["A", "C"], :] + tm.assert_frame_equal(res, df.loc[["A", "C"], :]) + + res = df.loc[lambda x: ["A", "C"], lambda x: "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + # mixture + res = df.loc[["A", "C"], lambda x: "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[["A", "C"], lambda x: ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + res = df.loc[lambda x: ["A", "C"], "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[lambda x: ["A", "C"], ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + def test_frame_loc_setitem_callable(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return label + res = df.copy() + res.loc[lambda x: ["A", "C"]] = -20 + exp = df.copy() + exp.loc[["A", "C"]] = -20 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], :] = 20 + exp = df.copy() + exp.loc[["A", "C"], :] = 20 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1 + exp = df.copy() + exp.loc[["A", "C"], "X"] = -1 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10] + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = [5, 10] + tm.assert_frame_equal(res, exp) + + # mixture + res = df.copy() + res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2]) + exp = df.copy() + exp.loc[["A", "C"], "X"] = np.array([-1, -2]) + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[["A", "C"], lambda x: ["X"]] = 10 + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = 10 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], "X"] = -2 + exp = df.copy() + exp.loc[["A", "C"], "X"] = -2 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], ["X"]] = -4 + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = -4 + tm.assert_frame_equal(res, exp) + + +class TestPartialStringSlicing: + def test_loc_getitem_partial_string_slicing_datetimeindex(self): + # GH#35509 + df = DataFrame( + {"col1": ["a", "b", "c"], "col2": [1, 2, 3]}, + index=to_datetime(["2020-08-01", "2020-07-02", "2020-08-05"]), + ) + expected = DataFrame( + {"col1": ["a", "c"], "col2": [1, 3]}, + index=to_datetime(["2020-08-01", "2020-08-05"]), + ) + result = df.loc["2020-08"] + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_partial_string_slicing_with_periodindex(self): + pi = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") + ser = pi.to_series() + result = ser.loc[:"2017-12"] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_getitem_partial_string_slicing_with_timedeltaindex(self): + ix = timedelta_range(start="1 day", end="2 days", freq="1H") + ser = ix.to_series() + result = ser.loc[:"1 days"] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_getitem_str_timedeltaindex(self): + # GH#16896 + df = DataFrame({"x": range(3)}, index=to_timedelta(range(3), unit="days")) + expected = df.iloc[0] + sliced = df.loc["0 days"] + tm.assert_series_equal(sliced, expected) + + @pytest.mark.parametrize("indexer_end", [None, "2020-01-02 23:59:59.999999999"]) + def test_loc_getitem_partial_slice_non_monotonicity( + self, tz_aware_fixture, indexer_end, frame_or_series + ): + # GH#33146 + obj = frame_or_series( + [1] * 5, + index=DatetimeIndex( + [ + Timestamp("2019-12-30"), + Timestamp("2020-01-01"), + Timestamp("2019-12-25"), + Timestamp("2020-01-02 23:59:59.999999999"), + Timestamp("2019-12-19"), + ], + tz=tz_aware_fixture, + ), + ) + expected = frame_or_series( + [1] * 2, + index=DatetimeIndex( + [ + Timestamp("2020-01-01"), + Timestamp("2020-01-02 23:59:59.999999999"), + ], + tz=tz_aware_fixture, + ), + ) + indexer = slice("2020-01-01", indexer_end) + + result = obj[indexer] + tm.assert_equal(result, expected) + + result = obj.loc[indexer] + tm.assert_equal(result, expected) + + +class TestLabelSlicing: + def test_loc_getitem_slicing_datetimes_frame(self): + # GH#7523 + + # unique + df_unique = DataFrame( + np.arange(4.0, dtype="float64"), + index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 3, 4]], + ) + + # duplicates + df_dups = DataFrame( + np.arange(5.0, dtype="float64"), + index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 2, 3, 4]], + ) + + for df in [df_unique, df_dups]: + result = df.loc[datetime(2001, 1, 1, 10) :] + tm.assert_frame_equal(result, df) + result = df.loc[: datetime(2001, 1, 4, 10)] + tm.assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)] + tm.assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11) :] + expected = df.iloc[1:] + tm.assert_frame_equal(result, expected) + result = df.loc["20010101 11":] + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_label_slice_across_dst(self): + # GH#21846 + idx = date_range( + "2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min" + ) + series2 = Series([0, 1, 2, 3, 4], index=idx) + + t_1 = Timestamp("2017-10-29 02:30:00+02:00", tz="Europe/Berlin") + t_2 = Timestamp("2017-10-29 02:00:00+01:00", tz="Europe/Berlin") + result = series2.loc[t_1:t_2] + expected = Series([2, 3], index=idx[2:4]) + tm.assert_series_equal(result, expected) + + result = series2[t_1] + expected = 2 + assert result == expected + + @pytest.mark.parametrize( + "index", + [ + pd.period_range(start="2017-01-01", end="2018-01-01", freq="M"), + timedelta_range(start="1 day", end="2 days", freq="1H"), + ], + ) + def test_loc_getitem_label_slice_period_timedelta(self, index): + ser = index.to_series() + result = ser.loc[: index[-2]] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_getitem_slice_floats_inexact(self): + index = [52195.504153, 52196.303147, 52198.369883] + df = DataFrame(np.random.rand(3, 2), index=index) + + s1 = df.loc[52195.1:52196.5] + assert len(s1) == 2 + + s1 = df.loc[52195.1:52196.6] + assert len(s1) == 2 + + s1 = df.loc[52195.1:52198.9] + assert len(s1) == 3 + + def test_loc_getitem_float_slice_float64index(self): + ser = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float)) + + assert len(ser.loc[12.0:]) == 8 + assert len(ser.loc[12.5:]) == 7 + + idx = np.arange(10, 20, dtype=float) + idx[2] = 12.2 + ser.index = idx + assert len(ser.loc[12.0:]) == 8 + assert len(ser.loc[12.5:]) == 7 + + @pytest.mark.parametrize( + "start,stop, expected_slice", + [ + [np.timedelta64(0, "ns"), None, slice(0, 11)], + [np.timedelta64(1, "D"), np.timedelta64(6, "D"), slice(1, 7)], + [None, np.timedelta64(4, "D"), slice(0, 5)], + ], + ) + def test_loc_getitem_slice_label_td64obj(self, start, stop, expected_slice): + # GH#20393 + ser = Series(range(11), timedelta_range("0 days", "10 days")) + result = ser.loc[slice(start, stop)] + expected = ser.iloc[expected_slice] + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("start", ["2018", "2020"]) + def test_loc_getitem_slice_unordered_dt_index(self, frame_or_series, start): + obj = frame_or_series( + [1, 2, 3], + index=[Timestamp("2016"), Timestamp("2019"), Timestamp("2017")], + ) + with tm.assert_produces_warning(FutureWarning): + obj.loc[start:"2022"] + + @pytest.mark.parametrize("value", [1, 1.5]) + def test_loc_getitem_slice_labels_int_in_object_index(self, frame_or_series, value): + # GH: 26491 + obj = frame_or_series(range(4), index=[value, "first", 2, "third"]) + result = obj.loc[value:"third"] + expected = frame_or_series(range(4), index=[value, "first", 2, "third"]) + tm.assert_equal(result, expected) + + def test_loc_getitem_slice_columns_mixed_dtype(self): + # GH: 20975 + df = DataFrame({"test": 1, 1: 2, 2: 3}, index=[0]) + expected = DataFrame( + data=[[2, 3]], index=[0], columns=Index([1, 2], dtype=object) + ) + tm.assert_frame_equal(df.loc[:, 1:], expected) + + +class TestLocBooleanLabelsAndSlices(Base): + @pytest.mark.parametrize("bool_value", [True, False]) + def test_loc_bool_incompatible_index_raises( + self, index, frame_or_series, bool_value + ): + # GH20432 + message = f"{bool_value}: boolean label can not be used without a boolean index" + if index.inferred_type != "boolean": + obj = frame_or_series(index=index, dtype="object") + with pytest.raises(KeyError, match=message): + obj.loc[bool_value] + + @pytest.mark.parametrize("bool_value", [True, False]) + def test_loc_bool_should_not_raise(self, frame_or_series, bool_value): + obj = frame_or_series( + index=Index([True, False], dtype="boolean"), dtype="object" + ) + obj.loc[bool_value] + + def test_loc_bool_slice_raises(self, index, frame_or_series): + # GH20432 + message = ( + r"slice\(True, False, None\): boolean values can not be used in a slice" + ) + obj = frame_or_series(index=index, dtype="object") + with pytest.raises(TypeError, match=message): + obj.loc[True:False] + + +class TestLocBooleanMask: + def test_loc_setitem_bool_mask_timedeltaindex(self): + # GH#14946 + df = DataFrame({"x": range(10)}) + df.index = to_timedelta(range(10), unit="s") + conditions = [df["x"] > 3, df["x"] == 3, df["x"] < 3] + expected_data = [ + [0, 1, 2, 3, 10, 10, 10, 10, 10, 10], + [0, 1, 2, 10, 4, 5, 6, 7, 8, 9], + [10, 10, 10, 3, 4, 5, 6, 7, 8, 9], + ] + for cond, data in zip(conditions, expected_data): + result = df.copy() + result.loc[cond, "x"] = 10 + + expected = DataFrame( + data, + index=to_timedelta(range(10), unit="s"), + columns=["x"], + dtype="int64", + ) + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_loc_setitem_mask_with_datetimeindex_tz(self, tz): + # GH#16889 + # support .loc with alignment and tz-aware DatetimeIndex + mask = np.array([True, False, True, False]) + + idx = date_range("20010101", periods=4, tz=tz) + df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") + + result = df.copy() + result.loc[mask, :] = df.loc[mask, :] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[mask] = df.loc[mask] + tm.assert_frame_equal(result, df) + + def test_loc_setitem_mask_and_label_with_datetimeindex(self): + # GH#9478 + # a datetimeindex alignment issue with partial setting + df = DataFrame( + np.arange(6.0).reshape(3, 2), + columns=list("AB"), + index=date_range("1/1/2000", periods=3, freq="1H"), + ) + expected = df.copy() + expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT] + + mask = df.A < 1 + df.loc[mask, "C"] = df.loc[mask].index + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_mask_td64_series_value(self): + # GH#23462 key list of bools, value is a Series + td1 = Timedelta(0) + td2 = Timedelta(28767471428571405) + df = DataFrame({"col": Series([td1, td2])}) + df_copy = df.copy() + ser = Series([td1]) + + expected = df["col"].iloc[1].value + df.loc[[True, False]] = ser + result = df["col"].iloc[1].value + + assert expected == result + tm.assert_frame_equal(df, df_copy) + + @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values + def test_loc_setitem_boolean_and_column(self, float_frame): + expected = float_frame.copy() + mask = float_frame["A"] > 0 + + float_frame.loc[mask, "B"] = 0 + expected.values[mask.values, 1] = 0 + + tm.assert_frame_equal(float_frame, expected) + + +class TestLocListlike: + @pytest.mark.parametrize("box", [lambda x: x, np.asarray, list]) + def test_loc_getitem_list_of_labels_categoricalindex_with_na(self, box): + # passing a list can include valid categories _or_ NA values + ci = CategoricalIndex(["A", "B", np.nan]) + ser = Series(range(3), index=ci) + + result = ser.loc[box(ci)] + tm.assert_series_equal(result, ser) + + result = ser[box(ci)] + tm.assert_series_equal(result, ser) + + result = ser.to_frame().loc[box(ci)] + tm.assert_frame_equal(result, ser.to_frame()) + + ser2 = ser[:-1] + ci2 = ci[1:] + # but if there are no NAs present, this should raise KeyError + msg = "not in index" + with pytest.raises(KeyError, match=msg): + ser2.loc[box(ci2)] + + with pytest.raises(KeyError, match=msg): + ser2[box(ci2)] + + with pytest.raises(KeyError, match=msg): + ser2.to_frame().loc[box(ci2)] + + def test_loc_getitem_series_label_list_missing_values(self): + # gh-11428 + key = np.array( + ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64" + ) + ser = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4)) + with pytest.raises(KeyError, match="not in index"): + ser.loc[key] + + def test_loc_getitem_series_label_list_missing_integer_values(self): + # GH: 25927 + ser = Series( + index=np.array([9730701000001104, 10049011000001109]), + data=np.array([999000011000001104, 999000011000001104]), + ) + with pytest.raises(KeyError, match="not in index"): + ser.loc[np.array([9730701000001104, 10047311000001102])] + + @pytest.mark.parametrize("to_period", [True, False]) + def test_loc_getitem_listlike_of_datetimelike_keys(self, to_period): + # GH#11497 + + idx = date_range("2011-01-01", "2011-01-02", freq="D", name="idx") + if to_period: + idx = idx.to_period("D") + ser = Series([0.1, 0.2], index=idx, name="s") + + keys = [Timestamp("2011-01-01"), Timestamp("2011-01-02")] + if to_period: + keys = [x.to_period("D") for x in keys] + result = ser.loc[keys] + exp = Series([0.1, 0.2], index=idx, name="s") + if not to_period: + exp.index = exp.index._with_freq(None) + tm.assert_series_equal(result, exp, check_index_type=True) + + keys = [ + Timestamp("2011-01-02"), + Timestamp("2011-01-02"), + Timestamp("2011-01-01"), + ] + if to_period: + keys = [x.to_period("D") for x in keys] + exp = Series( + [0.2, 0.2, 0.1], index=Index(keys, name="idx", dtype=idx.dtype), name="s" + ) + result = ser.loc[keys] + tm.assert_series_equal(result, exp, check_index_type=True) + + keys = [ + Timestamp("2011-01-03"), + Timestamp("2011-01-02"), + Timestamp("2011-01-03"), + ] + if to_period: + keys = [x.to_period("D") for x in keys] + + with pytest.raises(KeyError, match="not in index"): + ser.loc[keys] + + def test_loc_named_index(self): + # GH 42790 + df = DataFrame( + [[1, 2], [4, 5], [7, 8]], + index=["cobra", "viper", "sidewinder"], + columns=["max_speed", "shield"], + ) + expected = df.iloc[:2] + expected.index.name = "foo" + result = df.loc[Index(["cobra", "viper"], name="foo")] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "columns, column_key, expected_columns", + [ + ([2011, 2012, 2013], [2011, 2012], [0, 1]), + ([2011, 2012, "All"], [2011, 2012], [0, 1]), + ([2011, 2012, "All"], [2011, "All"], [0, 2]), + ], +) +def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_columns): + # gh-14836 + df = DataFrame(np.random.rand(3, 3), columns=columns, index=list("ABC")) + expected = df.iloc[:, expected_columns] + result = df.loc[["A", "B", "C"], column_key] + + tm.assert_frame_equal(result, expected, check_column_type=True) + + +def test_loc_setitem_float_intindex(): + # GH 8720 + rand_data = np.random.randn(8, 4) + result = DataFrame(rand_data) + result.loc[:, 0.5] = np.nan + expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1))) + expected = DataFrame(expected_data, columns=[0.0, 1.0, 2.0, 3.0, 0.5]) + tm.assert_frame_equal(result, expected) + + result = DataFrame(rand_data) + result.loc[:, 0.5] = np.nan + tm.assert_frame_equal(result, expected) + + +def test_loc_axis_1_slice(): + # GH 10586 + cols = [(yr, m) for yr in [2014, 2015] for m in [7, 8, 9, 10]] + df = DataFrame( + np.ones((10, 8)), + index=tuple("ABCDEFGHIJ"), + columns=MultiIndex.from_tuples(cols), + ) + result = df.loc(axis=1)[(2014, 9):(2015, 8)] + expected = DataFrame( + np.ones((10, 4)), + index=tuple("ABCDEFGHIJ"), + columns=MultiIndex.from_tuples([(2014, 9), (2014, 10), (2015, 7), (2015, 8)]), + ) + tm.assert_frame_equal(result, expected) + + +def test_loc_set_dataframe_multiindex(): + # GH 14592 + expected = DataFrame( + "a", index=range(2), columns=MultiIndex.from_product([range(2), range(2)]) + ) + result = expected.copy() + result.loc[0, [(0, 1)]] = result.loc[0, [(0, 1)]] + tm.assert_frame_equal(result, expected) + + +def test_loc_mixed_int_float(): + # GH#19456 + ser = Series(range(2), Index([1, 2.0], dtype=object)) + + result = ser.loc[1] + assert result == 0 + + +def test_loc_with_positional_slice_deprecation(): + # GH#31840 + ser = Series(range(4), index=["A", "B", "C", "D"]) + + with tm.assert_produces_warning(FutureWarning): + ser.loc[:3] = 2 + + expected = Series([2, 2, 2, 3], index=["A", "B", "C", "D"]) + tm.assert_series_equal(ser, expected) + + +def test_loc_slice_disallows_positional(): + # GH#16121, GH#24612, GH#31810 + dti = date_range("2016-01-01", periods=3) + df = DataFrame(np.random.random((3, 2)), index=dti) + + ser = df[0] + + msg = ( + "cannot do slice indexing on DatetimeIndex with these " + r"indexers \[1\] of type int" + ) + + for obj in [df, ser]: + with pytest.raises(TypeError, match=msg): + obj.loc[1:3] + + with tm.assert_produces_warning(FutureWarning): + # GH#31840 deprecated incorrect behavior + obj.loc[1:3] = 1 + + with pytest.raises(TypeError, match=msg): + df.loc[1:3, 1] + + with tm.assert_produces_warning(FutureWarning): + # GH#31840 deprecated incorrect behavior + df.loc[1:3, 1] = 2 + + +def test_loc_datetimelike_mismatched_dtypes(): + # GH#32650 dont mix and match datetime/timedelta/period dtypes + + df = DataFrame( + np.random.randn(5, 3), + columns=["a", "b", "c"], + index=date_range("2012", freq="H", periods=5), + ) + # create dataframe with non-unique DatetimeIndex + df = df.iloc[[0, 2, 2, 3]].copy() + + dti = df.index + tdi = pd.TimedeltaIndex(dti.asi8) # matching i8 values + + msg = r"None of \[TimedeltaIndex.* are in the \[index\]" + with pytest.raises(KeyError, match=msg): + df.loc[tdi] + + with pytest.raises(KeyError, match=msg): + df["a"].loc[tdi] + + +def test_loc_with_period_index_indexer(): + # GH#4125 + idx = pd.period_range("2002-01", "2003-12", freq="M") + df = DataFrame(np.random.randn(24, 10), index=idx) + tm.assert_frame_equal(df, df.loc[idx]) + tm.assert_frame_equal(df, df.loc[list(idx)]) + tm.assert_frame_equal(df, df.loc[list(idx)]) + tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) + tm.assert_frame_equal(df, df.loc[list(idx)]) + + +def test_loc_getitem_multiindex_tuple_level(): + # GH#27591 + lev1 = ["a", "b", "c"] + lev2 = [(0, 1), (1, 0)] + lev3 = [0, 1] + cols = MultiIndex.from_product([lev1, lev2, lev3], names=["x", "y", "z"]) + df = DataFrame(6, index=range(5), columns=cols) + + # the lev2[0] here should be treated as a single label, not as a sequence + # of labels + result = df.loc[:, (lev1[0], lev2[0], lev3[0])] + + # TODO: i think this actually should drop levels + expected = df.iloc[:, :1] + tm.assert_frame_equal(result, expected) + + alt = df.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=1) + tm.assert_frame_equal(alt, expected) + + # same thing on a Series + ser = df.iloc[0] + expected2 = ser.iloc[:1] + + alt2 = ser.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=0) + tm.assert_series_equal(alt2, expected2) + + result2 = ser.loc[lev1[0], lev2[0], lev3[0]] + assert result2 == 6 + + +def test_loc_getitem_nullable_index_with_duplicates(): + # GH#34497 + df = DataFrame( + data=np.array([[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, np.nan, np.nan]]).T, + columns=["a", "b", "c"], + dtype="Int64", + ) + df2 = df.set_index("c") + assert df2.index.dtype == "Int64" + + res = df2.loc[1] + expected = Series([1, 5], index=df2.columns, dtype="Int64", name=1) + tm.assert_series_equal(res, expected) + + # pd.NA and duplicates in an object-dtype Index + df2.index = df2.index.astype(object) + res = df2.loc[1] + tm.assert_series_equal(res, expected) + + +class TestLocSeries: + @pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)]) + def test_loc_uint64(self, val, expected): + # see GH#19399 + ser = Series({2 ** 63 - 1: 3, 2 ** 63: 4}) + assert ser.loc[val] == expected + + def test_loc_getitem(self, string_series, datetime_series): + inds = string_series.index[[3, 4, 7]] + tm.assert_series_equal(string_series.loc[inds], string_series.reindex(inds)) + tm.assert_series_equal(string_series.iloc[5::2], string_series[5::2]) + + # slice with indices + d1, d2 = datetime_series.index[[5, 15]] + result = datetime_series.loc[d1:d2] + expected = datetime_series.truncate(d1, d2) + tm.assert_series_equal(result, expected) + + # boolean + mask = string_series > string_series.median() + tm.assert_series_equal(string_series.loc[mask], string_series[mask]) + + # ask for index value + assert datetime_series.loc[d1] == datetime_series[d1] + assert datetime_series.loc[d2] == datetime_series[d2] + + def test_loc_getitem_not_monotonic(self, datetime_series): + d1, d2 = datetime_series.index[[5, 15]] + + ts2 = datetime_series[::2][[1, 2, 0]] + + msg = r"Timestamp\('2000-01-10 00:00:00'\)" + with pytest.raises(KeyError, match=msg): + ts2.loc[d1:d2] + with pytest.raises(KeyError, match=msg): + ts2.loc[d1:d2] = 0 + + def test_loc_getitem_setitem_integer_slice_keyerrors(self): + ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) + + # this is OK + cp = ser.copy() + cp.iloc[4:10] = 0 + assert (cp.iloc[4:10] == 0).all() + + # so is this + cp = ser.copy() + cp.iloc[3:11] = 0 + assert (cp.iloc[3:11] == 0).values.all() + + result = ser.iloc[2:6] + result2 = ser.loc[3:11] + expected = ser.reindex([4, 6, 8, 10]) + + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + # non-monotonic, raise KeyError + s2 = ser.iloc[list(range(5)) + list(range(9, 4, -1))] + with pytest.raises(KeyError, match=r"^3$"): + s2.loc[3:11] + with pytest.raises(KeyError, match=r"^3$"): + s2.loc[3:11] = 0 + + def test_loc_getitem_iterator(self, string_series): + idx = iter(string_series.index[:10]) + result = string_series.loc[idx] + tm.assert_series_equal(result, string_series[:10]) + + def test_loc_setitem_boolean(self, string_series): + mask = string_series > string_series.median() + + result = string_series.copy() + result.loc[mask] = 0 + expected = string_series + expected[mask] = 0 + tm.assert_series_equal(result, expected) + + def test_loc_setitem_corner(self, string_series): + inds = list(string_series.index[[5, 8, 12]]) + string_series.loc[inds] = 5 + msg = r"\['foo'\] not in index" + with pytest.raises(KeyError, match=msg): + string_series.loc[inds + ["foo"]] = 5 + + def test_basic_setitem_with_labels(self, datetime_series): + indices = datetime_series.index[[5, 10, 15]] + + cp = datetime_series.copy() + exp = datetime_series.copy() + cp[indices] = 0 + exp.loc[indices] = 0 + tm.assert_series_equal(cp, exp) + + cp = datetime_series.copy() + exp = datetime_series.copy() + cp[indices[0] : indices[2]] = 0 + exp.loc[indices[0] : indices[2]] = 0 + tm.assert_series_equal(cp, exp) + + def test_loc_setitem_listlike_of_ints(self): + + # integer indexes, be careful + ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) + inds = [0, 4, 6] + arr_inds = np.array([0, 4, 6]) + + cp = ser.copy() + exp = ser.copy() + ser[inds] = 0 + ser.loc[inds] = 0 + tm.assert_series_equal(cp, exp) + + cp = ser.copy() + exp = ser.copy() + ser[arr_inds] = 0 + ser.loc[arr_inds] = 0 + tm.assert_series_equal(cp, exp) + + inds_notfound = [0, 4, 5, 6] + arr_inds_notfound = np.array([0, 4, 5, 6]) + msg = r"\[5\] not in index" + with pytest.raises(KeyError, match=msg): + ser[inds_notfound] = 0 + with pytest.raises(Exception, match=msg): + ser[arr_inds_notfound] = 0 + + def test_loc_setitem_dt64tz_values(self): + # GH#12089 + ser = Series( + date_range("2011-01-01", periods=3, tz="US/Eastern"), + index=["a", "b", "c"], + ) + s2 = ser.copy() + expected = Timestamp("2011-01-03", tz="US/Eastern") + s2.loc["a"] = expected + result = s2.loc["a"] + assert result == expected + + s2 = ser.copy() + s2.iloc[0] = expected + result = s2.iloc[0] + assert result == expected + + s2 = ser.copy() + s2["a"] = expected + result = s2["a"] + assert result == expected + + @pytest.mark.parametrize("array_fn", [np.array, pd.array, list, tuple]) + @pytest.mark.parametrize("size", [0, 4, 5, 6]) + def test_loc_iloc_setitem_with_listlike(self, size, array_fn): + # GH37748 + # testing insertion, in a Series of size N (here 5), of a listlike object + # of size 0, N-1, N, N+1 + + arr = array_fn([0] * size) + expected = Series([arr, 0, 0, 0, 0], index=list("abcde"), dtype=object) + + ser = Series(0, index=list("abcde"), dtype=object) + ser.loc["a"] = arr + tm.assert_series_equal(ser, expected) + + ser = Series(0, index=list("abcde"), dtype=object) + ser.iloc[0] = arr + tm.assert_series_equal(ser, expected) + + @pytest.mark.parametrize("indexer", [IndexSlice["A", :], ("A", slice(None))]) + def test_loc_series_getitem_too_many_dimensions(self, indexer): + # GH#35349 + ser = Series( + index=MultiIndex.from_tuples([("A", "0"), ("A", "1"), ("B", "0")]), + data=[21, 22, 23], + ) + msg = "Too many indices" + with pytest.raises(ValueError, match=msg): + ser.loc[indexer, :] + + with pytest.raises(ValueError, match=msg): + ser.loc[indexer, :] = 1 + + def test_loc_setitem(self, string_series): + inds = string_series.index[[3, 4, 7]] + + result = string_series.copy() + result.loc[inds] = 5 + + expected = string_series.copy() + expected[[3, 4, 7]] = 5 + tm.assert_series_equal(result, expected) + + result.iloc[5:10] = 10 + expected[5:10] = 10 + tm.assert_series_equal(result, expected) + + # set slice with indices + d1, d2 = string_series.index[[5, 15]] + result.loc[d1:d2] = 6 + expected[5:16] = 6 # because it's inclusive + tm.assert_series_equal(result, expected) + + # set index value + string_series.loc[d1] = 4 + string_series.loc[d2] = 6 + assert string_series[d1] == 4 + assert string_series[d2] == 6 + + @pytest.mark.parametrize("dtype", ["object", "string"]) + def test_loc_assign_dict_to_row(self, dtype): + # GH41044 + df = DataFrame({"A": ["abc", "def"], "B": ["ghi", "jkl"]}, dtype=dtype) + df.loc[0, :] = {"A": "newA", "B": "newB"} + + expected = DataFrame({"A": ["newA", "def"], "B": ["newB", "jkl"]}, dtype=dtype) + + tm.assert_frame_equal(df, expected) + + @td.skip_array_manager_invalid_test + def test_loc_setitem_dict_timedelta_multiple_set(self): + # GH 16309 + result = DataFrame(columns=["time", "value"]) + result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"} + result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"} + expected = DataFrame( + [[Timedelta(6, unit="s"), "foo"]], columns=["time", "value"], index=[1] + ) + tm.assert_frame_equal(result, expected) + + def test_loc_set_multiple_items_in_multiple_new_columns(self): + # GH 25594 + df = DataFrame(index=[1, 2], columns=["a"]) + df.loc[1, ["b", "c"]] = [6, 7] + + expected = DataFrame( + { + "a": Series([np.nan, np.nan], dtype="object"), + "b": [6, np.nan], + "c": [7, np.nan], + }, + index=[1, 2], + ) + + tm.assert_frame_equal(df, expected) + + def test_getitem_loc_str_periodindex(self): + # GH#33964 + index = pd.period_range(start="2000", periods=20, freq="B") + series = Series(range(20), index=index) + assert series.loc["2000-01-14"] == 9 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_na_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_na_indexing.py new file mode 100644 index 0000000..7e54bbc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_na_indexing.py @@ -0,0 +1,75 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "values, dtype", + [ + ([], "object"), + ([1, 2, 3], "int64"), + ([1.0, 2.0, 3.0], "float64"), + (["a", "b", "c"], "object"), + (["a", "b", "c"], "string"), + ([1, 2, 3], "datetime64[ns]"), + ([1, 2, 3], "datetime64[ns, CET]"), + ([1, 2, 3], "timedelta64[ns]"), + (["2000", "2001", "2002"], "Period[D]"), + ([1, 0, 3], "Sparse"), + ([pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(3, 4)], "interval"), + ], +) +@pytest.mark.parametrize( + "mask", [[True, False, False], [True, True, True], [False, False, False]] +) +@pytest.mark.parametrize("indexer_class", [list, pd.array, pd.Index, pd.Series]) +@pytest.mark.parametrize("frame", [True, False]) +def test_series_mask_boolean(values, dtype, mask, indexer_class, frame): + # In case len(values) < 3 + index = ["a", "b", "c"][: len(values)] + mask = mask[: len(values)] + + obj = pd.Series(values, dtype=dtype, index=index) + if frame: + if len(values) == 0: + # Otherwise obj is an empty DataFrame with shape (0, 1) + obj = pd.DataFrame(dtype=dtype) + else: + obj = obj.to_frame() + + if indexer_class is pd.array: + mask = pd.array(mask, dtype="boolean") + elif indexer_class is pd.Series: + mask = pd.Series(mask, index=obj.index, dtype="boolean") + else: + mask = indexer_class(mask) + + expected = obj[mask] + + result = obj[mask] + tm.assert_equal(result, expected) + + if indexer_class is pd.Series: + msg = "iLocation based boolean indexing cannot use an indexable as a mask" + with pytest.raises(ValueError, match=msg): + result = obj.iloc[mask] + tm.assert_equal(result, expected) + else: + result = obj.iloc[mask] + tm.assert_equal(result, expected) + + result = obj.loc[mask] + tm.assert_equal(result, expected) + + +def test_na_treated_as_false(frame_or_series, indexer_sli): + # https://github.com/pandas-dev/pandas/issues/31503 + obj = frame_or_series([1, 2, 3]) + + mask = pd.array([True, False, None], dtype="boolean") + + result = indexer_sli(obj)[mask] + expected = indexer_sli(obj)[mask.fillna(False)] + + tm.assert_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_partial.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_partial.py new file mode 100644 index 0000000..8251f09 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_partial.py @@ -0,0 +1,668 @@ +""" +test setting *parts* of objects both positionally and label based + +TODO: these should be split among the indexer tests +""" + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Period, + Series, + Timestamp, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestEmptyFrameSetitemExpansion: + def test_empty_frame_setitem_index_name_retained(self): + # GH#31368 empty frame has non-None index.name -> retained + df = DataFrame({}, index=pd.RangeIndex(0, name="df_index")) + series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) + + df["series"] = series + expected = DataFrame( + {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="df_index") + ) + + tm.assert_frame_equal(df, expected) + + def test_empty_frame_setitem_index_name_inherited(self): + # GH#36527 empty frame has None index.name -> not retained + df = DataFrame() + series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) + df["series"] = series + expected = DataFrame( + {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="series_index") + ) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_zerolen_series_columns_align(self): + # columns will align + df = DataFrame(columns=["A", "B"]) + df.loc[0] = Series(1, index=range(4)) + expected = DataFrame(columns=["A", "B"], index=[0], dtype=np.float64) + tm.assert_frame_equal(df, expected) + + # columns will align + df = DataFrame(columns=["A", "B"]) + df.loc[0] = Series(1, index=["B"]) + + exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64") + tm.assert_frame_equal(df, exp) + + def test_loc_setitem_zerolen_list_length_must_match_columns(self): + # list-like must conform + df = DataFrame(columns=["A", "B"]) + + msg = "cannot set a row with mismatched columns" + with pytest.raises(ValueError, match=msg): + df.loc[0] = [1, 2, 3] + + df = DataFrame(columns=["A", "B"]) + df.loc[3] = [6, 7] # length matches len(df.columns) --> OK! + + exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=np.int64) + tm.assert_frame_equal(df, exp) + + def test_partial_set_empty_frame(self): + + # partially set with an empty object + # frame + df = DataFrame() + + msg = "cannot set a frame with no defined columns" + + with pytest.raises(ValueError, match=msg): + df.loc[1] = 1 + + with pytest.raises(ValueError, match=msg): + df.loc[1] = Series([1], index=["foo"]) + + msg = "cannot set a frame with no defined index and a scalar" + with pytest.raises(ValueError, match=msg): + df.loc[:, 1] = 1 + + def test_partial_set_empty_frame2(self): + # these work as they don't really change + # anything but the index + # GH#5632 + expected = DataFrame(columns=["foo"], index=Index([], dtype="object")) + + df = DataFrame(index=Index([], dtype="object")) + df["foo"] = Series([], dtype="object") + + tm.assert_frame_equal(df, expected) + + df = DataFrame() + df["foo"] = Series(df.index) + + tm.assert_frame_equal(df, expected) + + df = DataFrame() + df["foo"] = df.index + + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame3(self): + expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) + expected["foo"] = expected["foo"].astype("float64") + + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = [] + + tm.assert_frame_equal(df, expected) + + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = Series(np.arange(len(df)), dtype="float64") + + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame4(self): + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = range(len(df)) + + expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) + # range is int-dtype-like, so we get int64 dtype + expected["foo"] = expected["foo"].astype("int64") + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame5(self): + df = DataFrame() + tm.assert_index_equal(df.columns, Index([], dtype=object)) + df2 = DataFrame() + df2[1] = Series([1], index=["foo"]) + df.loc[:, 1] = Series([1], index=["foo"]) + tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1])) + tm.assert_frame_equal(df, df2) + + def test_partial_set_empty_frame_no_index(self): + # no index to start + expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0]) + + df = DataFrame(columns=["A", "B"]) + df[0] = Series(1, index=range(4)) + df.dtypes + str(df) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=["A", "B"]) + df.loc[:, 0] = Series(1, index=range(4)) + df.dtypes + str(df) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_row(self): + # GH#5720, GH#5744 + # don't create rows when empty + expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64")) + expected["A"] = expected["A"].astype("int64") + expected["B"] = expected["B"].astype("float64") + expected["New"] = expected["New"].astype("float64") + + df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) + y = df[df.A > 5] + y["New"] = np.nan + tm.assert_frame_equal(y, expected) + + expected = DataFrame(columns=["a", "b", "c c", "d"]) + expected["d"] = expected["d"].astype("int64") + df = DataFrame(columns=["a", "b", "c c"]) + df["d"] = 3 + tm.assert_frame_equal(df, expected) + tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object)) + + # reindex columns is ok + df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) + y = df[df.A > 5] + result = y.reindex(columns=["A", "B", "C"]) + expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64")) + expected["A"] = expected["A"].astype("int64") + expected["B"] = expected["B"].astype("float64") + expected["C"] = expected["C"].astype("float64") + tm.assert_frame_equal(result, expected) + + def test_partial_set_empty_frame_set_series(self): + # GH#5756 + # setting with empty Series + df = DataFrame(Series(dtype=object)) + expected = DataFrame({0: Series(dtype=object)}) + tm.assert_frame_equal(df, expected) + + df = DataFrame(Series(name="foo", dtype=object)) + expected = DataFrame({"foo": Series(dtype=object)}) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_empty_copy_assignment(self): + # GH#5932 + # copy on empty with assignment fails + df = DataFrame(index=[0]) + df = df.copy() + df["a"] = 0 + expected = DataFrame(0, index=[0], columns=["a"]) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_empty_consistencies(self): + # GH#6171 + # consistency on empty frames + df = DataFrame(columns=["x", "y"]) + df["x"] = [1, 2] + expected = DataFrame({"x": [1, 2], "y": [np.nan, np.nan]}) + tm.assert_frame_equal(df, expected, check_dtype=False) + + df = DataFrame(columns=["x", "y"]) + df["x"] = ["1", "2"] + expected = DataFrame({"x": ["1", "2"], "y": [np.nan, np.nan]}, dtype=object) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=["x", "y"]) + df.loc[0, "x"] = 1 + expected = DataFrame({"x": [1], "y": [np.nan]}) + tm.assert_frame_equal(df, expected, check_dtype=False) + + +class TestPartialSetting: + def test_partial_setting(self): + + # GH2578, allow ix and friends to partially set + + # series + s_orig = Series([1, 2, 3]) + + s = s_orig.copy() + s[5] = 5 + expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.loc[5] = 5 + expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s[5] = 5.0 + expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.loc[5] = 5.0 + expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + # iloc/iat raise + s = s_orig.copy() + + msg = "iloc cannot enlarge its target object" + with pytest.raises(IndexError, match=msg): + s.iloc[3] = 5.0 + + msg = "index 3 is out of bounds for axis 0 with size 3" + with pytest.raises(IndexError, match=msg): + s.iat[3] = 5.0 + + def test_partial_setting_frame(self): + df_orig = DataFrame( + np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64" + ) + + # iloc/iat raise + df = df_orig.copy() + + msg = "iloc cannot enlarge its target object" + with pytest.raises(IndexError, match=msg): + df.iloc[4, 2] = 5.0 + + msg = "index 2 is out of bounds for axis 0 with size 2" + with pytest.raises(IndexError, match=msg): + df.iat[4, 2] = 5.0 + + # row setting where it exists + expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]})) + df = df_orig.copy() + df.iloc[1] = df.iloc[2] + tm.assert_frame_equal(df, expected) + + expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]})) + df = df_orig.copy() + df.loc[1] = df.loc[2] + tm.assert_frame_equal(df, expected) + + # like 2578, partial setting with dtype preservation + expected = DataFrame(dict({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]})) + df = df_orig.copy() + df.loc[3] = df.loc[2] + tm.assert_frame_equal(df, expected) + + # single dtype frame, overwrite + expected = DataFrame(dict({"A": [0, 2, 4], "B": [0, 2, 4]})) + df = df_orig.copy() + df.loc[:, "B"] = df.loc[:, "A"] + tm.assert_frame_equal(df, expected) + + # mixed dtype frame, overwrite + expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])})) + df = df_orig.copy() + df["B"] = df["B"].astype(np.float64) + df.loc[:, "B"] = df.loc[:, "A"] + tm.assert_frame_equal(df, expected) + + # single dtype frame, partial setting + expected = df_orig.copy() + expected["C"] = df["A"] + df = df_orig.copy() + df.loc[:, "C"] = df.loc[:, "A"] + tm.assert_frame_equal(df, expected) + + # mixed frame, partial setting + expected = df_orig.copy() + expected["C"] = df["A"] + df = df_orig.copy() + df.loc[:, "C"] = df.loc[:, "A"] + tm.assert_frame_equal(df, expected) + + def test_partial_setting2(self): + # GH 8473 + dates = date_range("1/1/2000", periods=8) + df_orig = DataFrame( + np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"] + ) + + expected = pd.concat( + [df_orig, DataFrame({"A": 7}, index=dates[-1:] + dates.freq)], sort=True + ) + df = df_orig.copy() + df.loc[dates[-1] + dates.freq, "A"] = 7 + tm.assert_frame_equal(df, expected) + df = df_orig.copy() + df.at[dates[-1] + dates.freq, "A"] = 7 + tm.assert_frame_equal(df, expected) + + exp_other = DataFrame({0: 7}, index=dates[-1:] + dates.freq) + expected = pd.concat([df_orig, exp_other], axis=1) + + df = df_orig.copy() + df.loc[dates[-1] + dates.freq, 0] = 7 + tm.assert_frame_equal(df, expected) + df = df_orig.copy() + df.at[dates[-1] + dates.freq, 0] = 7 + tm.assert_frame_equal(df, expected) + + def test_partial_setting_mixed_dtype(self): + + # in a mixed dtype environment, try to preserve dtypes + # by appending + df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"]) + + s = df.loc[1].copy() + s.name = 2 + expected = pd.concat([df, DataFrame(s).T.infer_objects()]) + + df.loc[2] = df.loc[1] + tm.assert_frame_equal(df, expected) + + def test_series_partial_set(self): + # partial set with new index + # Regression from GH4825 + ser = Series([0.1, 0.2], index=[1, 2]) + + # loc equiv to .reindex + expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) + with pytest.raises(KeyError, match=r"not in index"): + ser.loc[[3, 2, 3]] + + result = ser.reindex([3, 2, 3]) + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"]) + with pytest.raises(KeyError, match="not in index"): + ser.loc[[3, 2, 3, "x"]] + + result = ser.reindex([3, 2, 3, "x"]) + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1]) + result = ser.loc[[2, 2, 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1]) + with pytest.raises(KeyError, match="not in index"): + ser.loc[[2, 2, "x", 1]] + + result = ser.reindex([2, 2, "x", 1]) + tm.assert_series_equal(result, expected, check_index_type=True) + + # raises as nothing is in the index + msg = ( + r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are " + r"in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + ser.loc[[3, 3, 3]] + + expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) + with pytest.raises(KeyError, match="not in index"): + ser.loc[[2, 2, 3]] + + result = ser.reindex([2, 2, 3]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3], index=[1, 2, 3]) + expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) + with pytest.raises(KeyError, match="not in index"): + s.loc[[3, 4, 4]] + + result = s.reindex([3, 4, 4]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) + expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) + with pytest.raises(KeyError, match="not in index"): + s.loc[[5, 3, 3]] + + result = s.reindex([5, 3, 3]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) + expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) + with pytest.raises(KeyError, match="not in index"): + s.loc[[5, 4, 4]] + + result = s.reindex([5, 4, 4]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7]) + expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) + with pytest.raises(KeyError, match="not in index"): + s.loc[[7, 2, 2]] + + result = s.reindex([7, 2, 2]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) + expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) + with pytest.raises(KeyError, match="not in index"): + s.loc[[4, 5, 5]] + + result = s.reindex([4, 5, 5]) + tm.assert_series_equal(result, expected, check_index_type=True) + + # iloc + expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1]) + result = ser.iloc[[1, 1, 0, 0]] + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_series_partial_set_with_name(self): + # GH 11497 + + idx = Index([1, 2], dtype="int64", name="idx") + ser = Series([0.1, 0.2], index=idx, name="s") + + # loc + with pytest.raises(KeyError, match=r"\[3\] not in index"): + ser.loc[[3, 2, 3]] + + with pytest.raises(KeyError, match=r"not in index"): + ser.loc[[3, 2, 3, "x"]] + + exp_idx = Index([2, 2, 1], dtype="int64", name="idx") + expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s") + result = ser.loc[[2, 2, 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + with pytest.raises(KeyError, match=r"\['x'\] not in index"): + ser.loc[[2, 2, "x", 1]] + + # raises as nothing is in the index + msg = ( + r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64', " + r"name='idx'\)\] are in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + ser.loc[[3, 3, 3]] + + with pytest.raises(KeyError, match="not in index"): + ser.loc[[2, 2, 3]] + + idx = Index([1, 2, 3], dtype="int64", name="idx") + with pytest.raises(KeyError, match="not in index"): + Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]] + + idx = Index([1, 2, 3, 4], dtype="int64", name="idx") + with pytest.raises(KeyError, match="not in index"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]] + + idx = Index([1, 2, 3, 4], dtype="int64", name="idx") + with pytest.raises(KeyError, match="not in index"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]] + + idx = Index([4, 5, 6, 7], dtype="int64", name="idx") + with pytest.raises(KeyError, match="not in index"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]] + + idx = Index([1, 2, 3, 4], dtype="int64", name="idx") + with pytest.raises(KeyError, match="not in index"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]] + + # iloc + exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx") + expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s") + result = ser.iloc[[1, 1, 0, 0]] + tm.assert_series_equal(result, expected, check_index_type=True) + + @pytest.mark.parametrize("key", [100, 100.0]) + def test_setitem_with_expansion_numeric_into_datetimeindex(self, key): + # GH#4940 inserting non-strings + orig = tm.makeTimeDataFrame() + df = orig.copy() + + df.loc[key, :] = df.iloc[0] + ex_index = Index(list(orig.index) + [key], dtype=object, name=orig.index.name) + ex_data = np.concatenate([orig.values, df.iloc[[0]].values], axis=0) + expected = DataFrame(ex_data, index=ex_index, columns=orig.columns) + + tm.assert_frame_equal(df, expected) + + def test_partial_set_invalid(self): + + # GH 4940 + # allow only setting of 'valid' values + + orig = tm.makeTimeDataFrame() + + # allow object conversion here + df = orig.copy() + df.loc["a", :] = df.iloc[0] + ser = Series(df.iloc[0], name="a") + exp = pd.concat([orig, DataFrame(ser).T.infer_objects()]) + tm.assert_frame_equal(df, exp) + tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) + assert df.index.dtype == "object" + + @pytest.mark.parametrize( + "idx,labels,expected_idx", + [ + ( + period_range(start="2000", periods=20, freq="D"), + ["2000-01-04", "2000-01-08", "2000-01-12"], + [ + Period("2000-01-04", freq="D"), + Period("2000-01-08", freq="D"), + Period("2000-01-12", freq="D"), + ], + ), + ( + date_range(start="2000", periods=20, freq="D"), + ["2000-01-04", "2000-01-08", "2000-01-12"], + [ + Timestamp("2000-01-04"), + Timestamp("2000-01-08"), + Timestamp("2000-01-12"), + ], + ), + ( + pd.timedelta_range(start="1 day", periods=20), + ["4D", "8D", "12D"], + [pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")], + ), + ], + ) + def test_loc_with_list_of_strings_representing_datetimes( + self, idx, labels, expected_idx, frame_or_series + ): + # GH 11278 + obj = frame_or_series(range(20), index=idx) + + expected_value = [3, 7, 11] + expected = frame_or_series(expected_value, expected_idx) + + tm.assert_equal(expected, obj.loc[labels]) + if frame_or_series is Series: + tm.assert_series_equal(expected, obj[labels]) + + @pytest.mark.parametrize( + "idx,labels", + [ + ( + period_range(start="2000", periods=20, freq="D"), + ["2000-01-04", "2000-01-30"], + ), + ( + date_range(start="2000", periods=20, freq="D"), + ["2000-01-04", "2000-01-30"], + ), + (pd.timedelta_range(start="1 day", periods=20), ["3 day", "30 day"]), + ], + ) + def test_loc_with_list_of_strings_representing_datetimes_missing_value( + self, idx, labels + ): + # GH 11278 + ser = Series(range(20), index=idx) + df = DataFrame(range(20), index=idx) + msg = r"not in index" + + with pytest.raises(KeyError, match=msg): + ser.loc[labels] + with pytest.raises(KeyError, match=msg): + ser[labels] + with pytest.raises(KeyError, match=msg): + df.loc[labels] + + @pytest.mark.parametrize( + "idx,labels,msg", + [ + ( + period_range(start="2000", periods=20, freq="D"), + ["4D", "8D"], + ( + r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] " + r"are in the \[index\]" + ), + ), + ( + date_range(start="2000", periods=20, freq="D"), + ["4D", "8D"], + ( + r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] " + r"are in the \[index\]" + ), + ), + ( + pd.timedelta_range(start="1 day", periods=20), + ["2000-01-04", "2000-01-08"], + ( + r"None of \[Index\(\['2000-01-04', '2000-01-08'\], " + r"dtype='object'\)\] are in the \[index\]" + ), + ), + ], + ) + def test_loc_with_list_of_strings_representing_datetimes_not_matched_type( + self, idx, labels, msg + ): + # GH 11278 + ser = Series(range(20), index=idx) + df = DataFrame(range(20), index=idx) + + with pytest.raises(KeyError, match=msg): + ser.loc[labels] + with pytest.raises(KeyError, match=msg): + ser[labels] + with pytest.raises(KeyError, match=msg): + df.loc[labels] + + +class TestStringSlicing: + def test_slice_irregular_datetime_index_with_nan(self): + # GH36953 + index = pd.to_datetime(["2012-01-01", "2012-01-02", "2012-01-03", None]) + df = DataFrame(range(len(index)), index=index) + expected = DataFrame(range(len(index[:3])), index=index[:3]) + result = df["2012-01-01":"2012-01-04"] + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_scalar.py b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_scalar.py new file mode 100644 index 0000000..d5268a1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/indexing/test_scalar.py @@ -0,0 +1,319 @@ +""" test scalar indexing, including at and iat """ +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, + Timedelta, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.tests.indexing.common import Base + + +class TestScalar(Base): + @pytest.mark.parametrize("kind", ["series", "frame"]) + def test_at_and_iat_get(self, kind): + def _check(f, func, values=False): + + if f is not None: + indices = self.generate_indices(f, values) + for i in indices: + result = getattr(f, func)[i] + expected = self.get_value(func, f, i, values) + tm.assert_almost_equal(result, expected) + + d = getattr(self, kind) + + # iat + for f in [d["ints"], d["uints"]]: + _check(f, "iat", values=True) + + for f in [d["labels"], d["ts"], d["floats"]]: + if f is not None: + msg = "iAt based indexing can only have integer indexers" + with pytest.raises(ValueError, match=msg): + self.check_values(f, "iat") + + # at + for f in [d["ints"], d["uints"], d["labels"], d["ts"], d["floats"]]: + _check(f, "at") + + @pytest.mark.parametrize("kind", ["series", "frame"]) + @pytest.mark.parametrize("col", ["ints", "uints"]) + def test_iat_set_ints(self, kind, col): + f = getattr(self, kind)[col] + if f is not None: + indices = self.generate_indices(f, True) + for i in indices: + f.iat[i] = 1 + expected = self.get_value("iat", f, i, True) + tm.assert_almost_equal(expected, 1) + + @pytest.mark.parametrize("kind", ["series", "frame"]) + @pytest.mark.parametrize("col", ["labels", "ts", "floats"]) + def test_iat_set_other(self, kind, col): + f = getattr(self, kind)[col] + if f is not None: + msg = "iAt based indexing can only have integer indexers" + with pytest.raises(ValueError, match=msg): + indices = self.generate_indices(f, False) + for i in indices: + f.iat[i] = 1 + expected = self.get_value("iat", f, i, False) + tm.assert_almost_equal(expected, 1) + + @pytest.mark.parametrize("kind", ["series", "frame"]) + @pytest.mark.parametrize("col", ["ints", "uints", "labels", "ts", "floats"]) + def test_at_set_ints_other(self, kind, col): + f = getattr(self, kind)[col] + if f is not None: + indices = self.generate_indices(f, False) + for i in indices: + f.at[i] = 1 + expected = self.get_value("at", f, i, False) + tm.assert_almost_equal(expected, 1) + + +class TestAtAndiAT: + # at and iat tests that don't need Base class + + def test_float_index_at_iat(self): + ser = Series([1, 2, 3], index=[0.1, 0.2, 0.3]) + for el, item in ser.items(): + assert ser.at[el] == item + for i in range(len(ser)): + assert ser.iat[i] == i + 1 + + def test_at_iat_coercion(self): + + # as timestamp is not a tuple! + dates = date_range("1/1/2000", periods=8) + df = DataFrame(np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"]) + s = df["A"] + + result = s.at[dates[5]] + xp = s.values[5] + assert result == xp + + # GH 7729 + # make sure we are boxing the returns + s = Series(["2014-01-01", "2014-02-02"], dtype="datetime64[ns]") + expected = Timestamp("2014-02-02") + + for r in [lambda: s.iat[1], lambda: s.iloc[1]]: + result = r() + assert result == expected + + s = Series(["1 days", "2 days"], dtype="timedelta64[ns]") + expected = Timedelta("2 days") + + for r in [lambda: s.iat[1], lambda: s.iloc[1]]: + result = r() + assert result == expected + + def test_iat_invalid_args(self): + pass + + def test_imethods_with_dups(self): + + # GH6493 + # iat/iloc with dups + + s = Series(range(5), index=[1, 1, 2, 2, 3], dtype="int64") + result = s.iloc[2] + assert result == 2 + result = s.iat[2] + assert result == 2 + + msg = "index 10 is out of bounds for axis 0 with size 5" + with pytest.raises(IndexError, match=msg): + s.iat[10] + msg = "index -10 is out of bounds for axis 0 with size 5" + with pytest.raises(IndexError, match=msg): + s.iat[-10] + + result = s.iloc[[2, 3]] + expected = Series([2, 3], [2, 2], dtype="int64") + tm.assert_series_equal(result, expected) + + df = s.to_frame() + result = df.iloc[2] + expected = Series(2, index=[0], name=2) + tm.assert_series_equal(result, expected) + + result = df.iat[2, 0] + assert result == 2 + + def test_frame_at_with_duplicate_axes(self): + # GH#33041 + arr = np.random.randn(6).reshape(3, 2) + df = DataFrame(arr, columns=["A", "A"]) + + result = df.at[0, "A"] + expected = df.iloc[0] + + tm.assert_series_equal(result, expected) + + result = df.T.at["A", 0] + tm.assert_series_equal(result, expected) + + # setter + df.at[1, "A"] = 2 + expected = Series([2.0, 2.0], index=["A", "A"], name=1) + tm.assert_series_equal(df.iloc[1], expected) + + def test_at_getitem_dt64tz_values(self): + # gh-15822 + df = DataFrame( + { + "name": ["John", "Anderson"], + "date": [ + Timestamp(2017, 3, 13, 13, 32, 56), + Timestamp(2017, 2, 16, 12, 10, 3), + ], + } + ) + df["date"] = df["date"].dt.tz_localize("Asia/Shanghai") + + expected = Timestamp("2017-03-13 13:32:56+0800", tz="Asia/Shanghai") + + result = df.loc[0, "date"] + assert result == expected + + result = df.at[0, "date"] + assert result == expected + + def test_mixed_index_at_iat_loc_iloc_series(self): + # GH 19860 + s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) + for el, item in s.items(): + assert s.at[el] == s.loc[el] == item + for i in range(len(s)): + assert s.iat[i] == s.iloc[i] == i + 1 + + with pytest.raises(KeyError, match="^4$"): + s.at[4] + with pytest.raises(KeyError, match="^4$"): + s.loc[4] + + def test_mixed_index_at_iat_loc_iloc_dataframe(self): + # GH 19860 + df = DataFrame( + [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], columns=["a", "b", "c", 1, 2] + ) + for rowIdx, row in df.iterrows(): + for el, item in row.items(): + assert df.at[rowIdx, el] == df.loc[rowIdx, el] == item + + for row in range(2): + for i in range(5): + assert df.iat[row, i] == df.iloc[row, i] == row * 5 + i + + with pytest.raises(KeyError, match="^3$"): + df.at[0, 3] + with pytest.raises(KeyError, match="^3$"): + df.loc[0, 3] + + def test_iat_setter_incompatible_assignment(self): + # GH 23236 + result = DataFrame({"a": [0, 1], "b": [4, 5]}) + result.iat[0, 0] = None + expected = DataFrame({"a": [None, 1], "b": [4, 5]}) + tm.assert_frame_equal(result, expected) + + +def test_iat_dont_wrap_object_datetimelike(): + # GH#32809 .iat calls go through DataFrame._get_value, should not + # call maybe_box_datetimelike + dti = date_range("2016-01-01", periods=3) + tdi = dti - dti + ser = Series(dti.to_pydatetime(), dtype=object) + ser2 = Series(tdi.to_pytimedelta(), dtype=object) + df = DataFrame({"A": ser, "B": ser2}) + assert (df.dtypes == object).all() + + for result in [df.at[0, "A"], df.iat[0, 0], df.loc[0, "A"], df.iloc[0, 0]]: + assert result is ser[0] + assert isinstance(result, datetime) + assert not isinstance(result, Timestamp) + + for result in [df.at[1, "B"], df.iat[1, 1], df.loc[1, "B"], df.iloc[1, 1]]: + assert result is ser2[1] + assert isinstance(result, timedelta) + assert not isinstance(result, Timedelta) + + +def test_at_with_tuple_index_get(): + # GH 26989 + # DataFrame.at getter works with Index of tuples + df = DataFrame({"a": [1, 2]}, index=[(1, 2), (3, 4)]) + assert df.index.nlevels == 1 + assert df.at[(1, 2), "a"] == 1 + + # Series.at getter works with Index of tuples + series = df["a"] + assert series.index.nlevels == 1 + assert series.at[(1, 2)] == 1 + + +def test_at_with_tuple_index_set(): + # GH 26989 + # DataFrame.at setter works with Index of tuples + df = DataFrame({"a": [1, 2]}, index=[(1, 2), (3, 4)]) + assert df.index.nlevels == 1 + df.at[(1, 2), "a"] = 2 + assert df.at[(1, 2), "a"] == 2 + + # Series.at setter works with Index of tuples + series = df["a"] + assert series.index.nlevels == 1 + series.at[1, 2] = 3 + assert series.at[1, 2] == 3 + + +class TestMultiIndexScalar: + def test_multiindex_at_get(self): + # GH 26989 + # DataFrame.at and DataFrame.loc getter works with MultiIndex + df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]]) + assert df.index.nlevels == 2 + assert df.at[(1, 3), "a"] == 1 + assert df.loc[(1, 3), "a"] == 1 + + # Series.at and Series.loc getter works with MultiIndex + series = df["a"] + assert series.index.nlevels == 2 + assert series.at[1, 3] == 1 + assert series.loc[1, 3] == 1 + + def test_multiindex_at_set(self): + # GH 26989 + # DataFrame.at and DataFrame.loc setter works with MultiIndex + df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]]) + assert df.index.nlevels == 2 + df.at[(1, 3), "a"] = 3 + assert df.at[(1, 3), "a"] == 3 + df.loc[(1, 3), "a"] = 4 + assert df.loc[(1, 3), "a"] == 4 + + # Series.at and Series.loc setter works with MultiIndex + series = df["a"] + assert series.index.nlevels == 2 + series.at[1, 3] = 5 + assert series.at[1, 3] == 5 + series.loc[1, 3] = 6 + assert series.loc[1, 3] == 6 + + def test_multiindex_at_get_one_level(self): + # GH#38053 + s2 = Series((0, 1), index=[[False, True]]) + result = s2.at[False] + assert result == 0 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/internals/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/internals/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/internals/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/internals/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..e38e82b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/internals/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/internals/__pycache__/test_api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/internals/__pycache__/test_api.cpython-38.pyc new file mode 100644 index 0000000..3aafbe2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/internals/__pycache__/test_api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/internals/__pycache__/test_internals.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/internals/__pycache__/test_internals.cpython-38.pyc new file mode 100644 index 0000000..6fda85f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/internals/__pycache__/test_internals.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/internals/__pycache__/test_managers.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/internals/__pycache__/test_managers.cpython-38.pyc new file mode 100644 index 0000000..ac13a0c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/internals/__pycache__/test_managers.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/internals/test_api.py b/.local/lib/python3.8/site-packages/pandas/tests/internals/test_api.py new file mode 100644 index 0000000..c759cc1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/internals/test_api.py @@ -0,0 +1,55 @@ +""" +Tests for the pseudo-public API implemented in internals/api.py and exposed +in core.internals +""" + +import pandas as pd +from pandas.core import internals +from pandas.core.internals import api + + +def test_internals_api(): + assert internals.make_block is api.make_block + + +def test_namespace(): + # SUBJECT TO CHANGE + + modules = [ + "blocks", + "concat", + "managers", + "construction", + "array_manager", + "base", + "api", + "ops", + ] + expected = [ + "Block", + "NumericBlock", + "DatetimeTZBlock", + "ExtensionBlock", + "ObjectBlock", + "make_block", + "DataManager", + "ArrayManager", + "BlockManager", + "SingleDataManager", + "SingleBlockManager", + "SingleArrayManager", + "concatenate_managers", + "create_block_manager_from_blocks", + ] + + result = [x for x in dir(internals) if not x.startswith("__")] + assert set(result) == set(expected + modules) + + +def test_make_block_2d_with_dti(): + # GH#41168 + dti = pd.date_range("2012", periods=3, tz="UTC") + blk = api.make_block(dti, placement=[0]) + + assert blk.shape == (1, 3) + assert blk.values.shape == (1, 3) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/internals/test_internals.py b/.local/lib/python3.8/site-packages/pandas/tests/internals/test_internals.py new file mode 100644 index 0000000..26e46e9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/internals/test_internals.py @@ -0,0 +1,1410 @@ +from datetime import ( + date, + datetime, +) +import itertools +import re + +import numpy as np +import pytest + +from pandas._libs.internals import BlockPlacement +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_scalar + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + IntervalIndex, + Series, + Timedelta, + Timestamp, + period_range, +) +import pandas._testing as tm +import pandas.core.algorithms as algos +from pandas.core.arrays import ( + DatetimeArray, + SparseArray, + TimedeltaArray, +) +from pandas.core.internals import ( + BlockManager, + SingleBlockManager, + make_block, +) +from pandas.core.internals.blocks import ( + ensure_block_shape, + new_block, +) + +# this file contains BlockManager specific tests +# TODO(ArrayManager) factor out interleave_dtype tests +pytestmark = td.skip_array_manager_invalid_test + + +@pytest.fixture(params=[new_block, make_block]) +def block_maker(request): + """ + Fixture to test both the internal new_block and pseudo-public make_block. + """ + return request.param + + +@pytest.fixture +def mgr(): + return create_mgr( + "a: f8; b: object; c: f8; d: object; e: f8;" + "f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;" + "k: M8[ns, US/Eastern]; l: M8[ns, CET];" + ) + + +def assert_block_equal(left, right): + tm.assert_numpy_array_equal(left.values, right.values) + assert left.dtype == right.dtype + assert isinstance(left.mgr_locs, BlockPlacement) + assert isinstance(right.mgr_locs, BlockPlacement) + tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array) + + +def get_numeric_mat(shape): + arr = np.arange(shape[0]) + return np.lib.stride_tricks.as_strided( + x=arr, shape=shape, strides=(arr.itemsize,) + (0,) * (len(shape) - 1) + ).copy() + + +N = 10 + + +def create_block(typestr, placement, item_shape=None, num_offset=0, maker=new_block): + """ + Supported typestr: + + * float, f8, f4, f2 + * int, i8, i4, i2, i1 + * uint, u8, u4, u2, u1 + * complex, c16, c8 + * bool + * object, string, O + * datetime, dt, M8[ns], M8[ns, tz] + * timedelta, td, m8[ns] + * sparse (SparseArray with fill_value=0.0) + * sparse_na (SparseArray with fill_value=np.nan) + * category, category2 + + """ + placement = BlockPlacement(placement) + num_items = len(placement) + + if item_shape is None: + item_shape = (N,) + + shape = (num_items,) + item_shape + + mat = get_numeric_mat(shape) + + if typestr in ( + "float", + "f8", + "f4", + "f2", + "int", + "i8", + "i4", + "i2", + "i1", + "uint", + "u8", + "u4", + "u2", + "u1", + ): + values = mat.astype(typestr) + num_offset + elif typestr in ("complex", "c16", "c8"): + values = 1.0j * (mat.astype(typestr) + num_offset) + elif typestr in ("object", "string", "O"): + values = np.reshape([f"A{i:d}" for i in mat.ravel() + num_offset], shape) + elif typestr in ("b", "bool"): + values = np.ones(shape, dtype=np.bool_) + elif typestr in ("datetime", "dt", "M8[ns]"): + values = (mat * 1e9).astype("M8[ns]") + elif typestr.startswith("M8[ns"): + # datetime with tz + m = re.search(r"M8\[ns,\s*(\w+\/?\w*)\]", typestr) + assert m is not None, f"incompatible typestr -> {typestr}" + tz = m.groups()[0] + assert num_items == 1, "must have only 1 num items for a tz-aware" + values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)._data + values = ensure_block_shape(values, ndim=len(shape)) + elif typestr in ("timedelta", "td", "m8[ns]"): + values = (mat * 1).astype("m8[ns]") + elif typestr in ("category",): + values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4]) + elif typestr in ("category2",): + values = Categorical(["a", "a", "a", "a", "b", "b", "c", "c", "c", "d"]) + elif typestr in ("sparse", "sparse_na"): + if shape[-1] != 10: + # We also are implicitly assuming this in the category cases above + raise NotImplementedError + + assert all(s == 1 for s in shape[:-1]) + if typestr.endswith("_na"): + fill_value = np.nan + else: + fill_value = 0.0 + values = SparseArray( + [fill_value, fill_value, 1, 2, 3, fill_value, 4, 5, fill_value, 6], + fill_value=fill_value, + ) + arr = values.sp_values.view() + arr += num_offset - 1 + else: + raise ValueError(f'Unsupported typestr: "{typestr}"') + + return maker(values, placement=placement, ndim=len(shape)) + + +def create_single_mgr(typestr, num_rows=None): + if num_rows is None: + num_rows = N + + return SingleBlockManager( + create_block(typestr, placement=slice(0, num_rows), item_shape=()), + Index(np.arange(num_rows)), + ) + + +def create_mgr(descr, item_shape=None): + """ + Construct BlockManager from string description. + + String description syntax looks similar to np.matrix initializer. It looks + like this:: + + a,b,c: f8; d,e,f: i8 + + Rules are rather simple: + + * see list of supported datatypes in `create_block` method + * components are semicolon-separated + * each component is `NAME,NAME,NAME: DTYPE_ID` + * whitespace around colons & semicolons are removed + * components with same DTYPE_ID are combined into single block + * to force multiple blocks with same dtype, use '-SUFFIX':: + + 'a:f8-1; b:f8-2; c:f8-foobar' + + """ + if item_shape is None: + item_shape = (N,) + + offset = 0 + mgr_items = [] + block_placements = {} + for d in descr.split(";"): + d = d.strip() + if not len(d): + continue + names, blockstr = d.partition(":")[::2] + blockstr = blockstr.strip() + names = names.strip().split(",") + + mgr_items.extend(names) + placement = list(np.arange(len(names)) + offset) + try: + block_placements[blockstr].extend(placement) + except KeyError: + block_placements[blockstr] = placement + offset += len(names) + + mgr_items = Index(mgr_items) + + blocks = [] + num_offset = 0 + for blockstr, placement in block_placements.items(): + typestr = blockstr.split("-")[0] + blocks.append( + create_block( + typestr, placement, item_shape=item_shape, num_offset=num_offset + ) + ) + num_offset += len(placement) + + sblocks = sorted(blocks, key=lambda b: b.mgr_locs[0]) + return BlockManager( + tuple(sblocks), + [mgr_items] + [Index(np.arange(n)) for n in item_shape], + ) + + +class TestBlock: + def setup_method(self, method): + self.fblock = create_block("float", [0, 2, 4]) + self.cblock = create_block("complex", [7]) + self.oblock = create_block("object", [1, 3]) + self.bool_block = create_block("bool", [5]) + + def test_constructor(self): + int32block = create_block("i4", [0]) + assert int32block.dtype == np.int32 + + @pytest.mark.parametrize( + "typ, data", + [ + ["float", [0, 2, 4]], + ["complex", [7]], + ["object", [1, 3]], + ["bool", [5]], + ], + ) + def test_pickle(self, typ, data): + blk = create_block(typ, data) + assert_block_equal(tm.round_trip_pickle(blk), blk) + + def test_mgr_locs(self): + assert isinstance(self.fblock.mgr_locs, BlockPlacement) + tm.assert_numpy_array_equal( + self.fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.intp) + ) + + def test_attrs(self): + assert self.fblock.shape == self.fblock.values.shape + assert self.fblock.dtype == self.fblock.values.dtype + assert len(self.fblock) == len(self.fblock.values) + + def test_copy(self): + cop = self.fblock.copy() + assert cop is not self.fblock + assert_block_equal(self.fblock, cop) + + def test_delete(self): + newb = self.fblock.copy() + newb.delete(0) + assert isinstance(newb.mgr_locs, BlockPlacement) + tm.assert_numpy_array_equal( + newb.mgr_locs.as_array, np.array([2, 4], dtype=np.intp) + ) + assert (newb.values[0] == 1).all() + + newb = self.fblock.copy() + newb.delete(1) + assert isinstance(newb.mgr_locs, BlockPlacement) + tm.assert_numpy_array_equal( + newb.mgr_locs.as_array, np.array([0, 4], dtype=np.intp) + ) + assert (newb.values[1] == 2).all() + + newb = self.fblock.copy() + newb.delete(2) + tm.assert_numpy_array_equal( + newb.mgr_locs.as_array, np.array([0, 2], dtype=np.intp) + ) + assert (newb.values[1] == 1).all() + + newb = self.fblock.copy() + + with pytest.raises(IndexError, match=None): + newb.delete(3) + + def test_delete_datetimelike(self): + # dont use np.delete on values, as that will coerce from DTA/TDA to ndarray + arr = np.arange(20, dtype="i8").reshape(5, 4).view("m8[ns]") + df = DataFrame(arr) + blk = df._mgr.blocks[0] + assert isinstance(blk.values, TimedeltaArray) + + blk.delete(1) + assert isinstance(blk.values, TimedeltaArray) + + df = DataFrame(arr.view("M8[ns]")) + blk = df._mgr.blocks[0] + assert isinstance(blk.values, DatetimeArray) + + blk.delete([1, 3]) + assert isinstance(blk.values, DatetimeArray) + + def test_split(self): + # GH#37799 + values = np.random.randn(3, 4) + blk = new_block(values, placement=[3, 1, 6], ndim=2) + result = blk._split() + + # check that we get views, not copies + values[:] = -9999 + assert (blk.values == -9999).all() + + assert len(result) == 3 + expected = [ + new_block(values[[0]], placement=[3], ndim=2), + new_block(values[[1]], placement=[1], ndim=2), + new_block(values[[2]], placement=[6], ndim=2), + ] + for res, exp in zip(result, expected): + assert_block_equal(res, exp) + + def test_is_categorical_deprecated(self): + # GH#40571 + blk = self.fblock + with tm.assert_produces_warning(DeprecationWarning): + blk.is_categorical + + +class TestBlockManager: + def test_attrs(self): + mgr = create_mgr("a,b,c: f8-1; d,e,f: f8-2") + assert mgr.nblocks == 2 + assert len(mgr) == 6 + + def test_duplicate_ref_loc_failure(self): + tmp_mgr = create_mgr("a:bool; a: f8") + + axes, blocks = tmp_mgr.axes, tmp_mgr.blocks + + blocks[0].mgr_locs = BlockPlacement(np.array([0])) + blocks[1].mgr_locs = BlockPlacement(np.array([0])) + + # test trying to create block manager with overlapping ref locs + + msg = "Gaps in blk ref_locs" + + with pytest.raises(AssertionError, match=msg): + mgr = BlockManager(blocks, axes) + mgr._rebuild_blknos_and_blklocs() + + blocks[0].mgr_locs = BlockPlacement(np.array([0])) + blocks[1].mgr_locs = BlockPlacement(np.array([1])) + mgr = BlockManager(blocks, axes) + mgr.iget(1) + + def test_pickle(self, mgr): + + mgr2 = tm.round_trip_pickle(mgr) + tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + + # GH2431 + assert hasattr(mgr2, "_is_consolidated") + assert hasattr(mgr2, "_known_consolidated") + + # reset to False on load + assert not mgr2._is_consolidated + assert not mgr2._known_consolidated + + @pytest.mark.parametrize("mgr_string", ["a,a,a:f8", "a: f8; a: i8"]) + def test_non_unique_pickle(self, mgr_string): + mgr = create_mgr(mgr_string) + mgr2 = tm.round_trip_pickle(mgr) + tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + + def test_categorical_block_pickle(self): + mgr = create_mgr("a: category") + mgr2 = tm.round_trip_pickle(mgr) + tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + + smgr = create_single_mgr("category") + smgr2 = tm.round_trip_pickle(smgr) + tm.assert_series_equal(Series(smgr), Series(smgr2)) + + def test_iget(self): + cols = Index(list("abc")) + values = np.random.rand(3, 3) + block = new_block( + values=values.copy(), + placement=np.arange(3, dtype=np.intp), + ndim=values.ndim, + ) + mgr = BlockManager(blocks=(block,), axes=[cols, Index(np.arange(3))]) + + tm.assert_almost_equal(mgr.iget(0).internal_values(), values[0]) + tm.assert_almost_equal(mgr.iget(1).internal_values(), values[1]) + tm.assert_almost_equal(mgr.iget(2).internal_values(), values[2]) + + def test_set(self): + mgr = create_mgr("a,b,c: int", item_shape=(3,)) + + mgr.insert(len(mgr.items), "d", np.array(["foo"] * 3)) + mgr.iset(1, np.array(["bar"] * 3)) + tm.assert_numpy_array_equal(mgr.iget(0).internal_values(), np.array([0] * 3)) + tm.assert_numpy_array_equal( + mgr.iget(1).internal_values(), np.array(["bar"] * 3, dtype=np.object_) + ) + tm.assert_numpy_array_equal(mgr.iget(2).internal_values(), np.array([2] * 3)) + tm.assert_numpy_array_equal( + mgr.iget(3).internal_values(), np.array(["foo"] * 3, dtype=np.object_) + ) + + def test_set_change_dtype(self, mgr): + mgr.insert(len(mgr.items), "baz", np.zeros(N, dtype=bool)) + + mgr.iset(mgr.items.get_loc("baz"), np.repeat("foo", N)) + idx = mgr.items.get_loc("baz") + assert mgr.iget(idx).dtype == np.object_ + + mgr2 = mgr.consolidate() + mgr2.iset(mgr2.items.get_loc("baz"), np.repeat("foo", N)) + idx = mgr2.items.get_loc("baz") + assert mgr2.iget(idx).dtype == np.object_ + + mgr2.insert(len(mgr2.items), "quux", np.random.randn(N).astype(int)) + idx = mgr2.items.get_loc("quux") + assert mgr2.iget(idx).dtype == np.int_ + + mgr2.iset(mgr2.items.get_loc("quux"), np.random.randn(N)) + assert mgr2.iget(idx).dtype == np.float_ + + def test_copy(self, mgr): + cp = mgr.copy(deep=False) + for blk, cp_blk in zip(mgr.blocks, cp.blocks): + + # view assertion + tm.assert_equal(cp_blk.values, blk.values) + if isinstance(blk.values, np.ndarray): + assert cp_blk.values.base is blk.values.base + else: + # DatetimeTZBlock has DatetimeIndex values + assert cp_blk.values._data.base is blk.values._data.base + + # copy(deep=True) consolidates, so the block-wise assertions will + # fail is mgr is not consolidated + mgr._consolidate_inplace() + cp = mgr.copy(deep=True) + for blk, cp_blk in zip(mgr.blocks, cp.blocks): + + bvals = blk.values + cpvals = cp_blk.values + + tm.assert_equal(cpvals, bvals) + + if isinstance(cpvals, np.ndarray): + lbase = cpvals.base + rbase = bvals.base + else: + lbase = cpvals._ndarray.base + rbase = bvals._ndarray.base + + # copy assertion we either have a None for a base or in case of + # some blocks it is an array (e.g. datetimetz), but was copied + if isinstance(cpvals, DatetimeArray): + assert (lbase is None and rbase is None) or (lbase is not rbase) + elif not isinstance(cpvals, np.ndarray): + assert lbase is not rbase + else: + assert lbase is None and rbase is None + + def test_sparse(self): + mgr = create_mgr("a: sparse-1; b: sparse-2") + assert mgr.as_array().dtype == np.float64 + + def test_sparse_mixed(self): + mgr = create_mgr("a: sparse-1; b: sparse-2; c: f8") + assert len(mgr.blocks) == 3 + assert isinstance(mgr, BlockManager) + + @pytest.mark.parametrize( + "mgr_string, dtype", + [("c: f4; d: f2", np.float32), ("c: f4; d: f2; e: f8", np.float64)], + ) + def test_as_array_float(self, mgr_string, dtype): + mgr = create_mgr(mgr_string) + assert mgr.as_array().dtype == dtype + + @pytest.mark.parametrize( + "mgr_string, dtype", + [ + ("a: bool-1; b: bool-2", np.bool_), + ("a: i8-1; b: i8-2; c: i4; d: i2; e: u1", np.int64), + ("c: i4; d: i2; e: u1", np.int32), + ], + ) + def test_as_array_int_bool(self, mgr_string, dtype): + mgr = create_mgr(mgr_string) + assert mgr.as_array().dtype == dtype + + def test_as_array_datetime(self): + mgr = create_mgr("h: datetime-1; g: datetime-2") + assert mgr.as_array().dtype == "M8[ns]" + + def test_as_array_datetime_tz(self): + mgr = create_mgr("h: M8[ns, US/Eastern]; g: M8[ns, CET]") + assert mgr.iget(0).dtype == "datetime64[ns, US/Eastern]" + assert mgr.iget(1).dtype == "datetime64[ns, CET]" + assert mgr.as_array().dtype == "object" + + @pytest.mark.parametrize("t", ["float16", "float32", "float64", "int32", "int64"]) + def test_astype(self, t): + # coerce all + mgr = create_mgr("c: f4; d: f2; e: f8") + + t = np.dtype(t) + tmgr = mgr.astype(t) + assert tmgr.iget(0).dtype.type == t + assert tmgr.iget(1).dtype.type == t + assert tmgr.iget(2).dtype.type == t + + # mixed + mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8") + + t = np.dtype(t) + tmgr = mgr.astype(t, errors="ignore") + assert tmgr.iget(2).dtype.type == t + assert tmgr.iget(4).dtype.type == t + assert tmgr.iget(5).dtype.type == t + assert tmgr.iget(6).dtype.type == t + + assert tmgr.iget(0).dtype.type == np.object_ + assert tmgr.iget(1).dtype.type == np.object_ + if t != np.int64: + assert tmgr.iget(3).dtype.type == np.datetime64 + else: + assert tmgr.iget(3).dtype.type == t + + def test_convert(self): + def _compare(old_mgr, new_mgr): + """compare the blocks, numeric compare ==, object don't""" + old_blocks = set(old_mgr.blocks) + new_blocks = set(new_mgr.blocks) + assert len(old_blocks) == len(new_blocks) + + # compare non-numeric + for b in old_blocks: + found = False + for nb in new_blocks: + if (b.values == nb.values).all(): + found = True + break + assert found + + for b in new_blocks: + found = False + for ob in old_blocks: + if (b.values == ob.values).all(): + found = True + break + assert found + + # noops + mgr = create_mgr("f: i8; g: f8") + new_mgr = mgr.convert() + _compare(mgr, new_mgr) + + # convert + mgr = create_mgr("a,b,foo: object; f: i8; g: f8") + mgr.iset(0, np.array(["1"] * N, dtype=np.object_)) + mgr.iset(1, np.array(["2."] * N, dtype=np.object_)) + mgr.iset(2, np.array(["foo."] * N, dtype=np.object_)) + new_mgr = mgr.convert(numeric=True) + assert new_mgr.iget(0).dtype == np.int64 + assert new_mgr.iget(1).dtype == np.float64 + assert new_mgr.iget(2).dtype == np.object_ + assert new_mgr.iget(3).dtype == np.int64 + assert new_mgr.iget(4).dtype == np.float64 + + mgr = create_mgr( + "a,b,foo: object; f: i4; bool: bool; dt: datetime; i: i8; g: f8; h: f2" + ) + mgr.iset(0, np.array(["1"] * N, dtype=np.object_)) + mgr.iset(1, np.array(["2."] * N, dtype=np.object_)) + mgr.iset(2, np.array(["foo."] * N, dtype=np.object_)) + new_mgr = mgr.convert(numeric=True) + assert new_mgr.iget(0).dtype == np.int64 + assert new_mgr.iget(1).dtype == np.float64 + assert new_mgr.iget(2).dtype == np.object_ + assert new_mgr.iget(3).dtype == np.int32 + assert new_mgr.iget(4).dtype == np.bool_ + assert new_mgr.iget(5).dtype.type, np.datetime64 + assert new_mgr.iget(6).dtype == np.int64 + assert new_mgr.iget(7).dtype == np.float64 + assert new_mgr.iget(8).dtype == np.float16 + + def test_invalid_ea_block(self): + with pytest.raises(ValueError, match="need to split"): + create_mgr("a: category; b: category") + + with pytest.raises(ValueError, match="need to split"): + create_mgr("a: category2; b: category2") + + def test_interleave(self): + # self + for dtype in ["f8", "i8", "object", "bool", "complex", "M8[ns]", "m8[ns]"]: + mgr = create_mgr(f"a: {dtype}") + assert mgr.as_array().dtype == dtype + mgr = create_mgr(f"a: {dtype}; b: {dtype}") + assert mgr.as_array().dtype == dtype + + @pytest.mark.parametrize( + "mgr_string, dtype", + [ + ("a: category", "i8"), + ("a: category; b: category", "i8"), + ("a: category; b: category2", "object"), + ("a: category2", "object"), + ("a: category2; b: category2", "object"), + ("a: f8", "f8"), + ("a: f8; b: i8", "f8"), + ("a: f4; b: i8", "f8"), + ("a: f4; b: i8; d: object", "object"), + ("a: bool; b: i8", "object"), + ("a: complex", "complex"), + ("a: f8; b: category", "object"), + ("a: M8[ns]; b: category", "object"), + ("a: M8[ns]; b: bool", "object"), + ("a: M8[ns]; b: i8", "object"), + ("a: m8[ns]; b: bool", "object"), + ("a: m8[ns]; b: i8", "object"), + ("a: M8[ns]; b: m8[ns]", "object"), + ], + ) + def test_interleave_dtype(self, mgr_string, dtype): + # will be converted according the actual dtype of the underlying + mgr = create_mgr("a: category") + assert mgr.as_array().dtype == "i8" + mgr = create_mgr("a: category; b: category2") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: category2") + assert mgr.as_array().dtype == "object" + + # combinations + mgr = create_mgr("a: f8") + assert mgr.as_array().dtype == "f8" + mgr = create_mgr("a: f8; b: i8") + assert mgr.as_array().dtype == "f8" + mgr = create_mgr("a: f4; b: i8") + assert mgr.as_array().dtype == "f8" + mgr = create_mgr("a: f4; b: i8; d: object") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: bool; b: i8") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: complex") + assert mgr.as_array().dtype == "complex" + mgr = create_mgr("a: f8; b: category") + assert mgr.as_array().dtype == "f8" + mgr = create_mgr("a: M8[ns]; b: category") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: M8[ns]; b: bool") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: M8[ns]; b: i8") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: m8[ns]; b: bool") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: m8[ns]; b: i8") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: M8[ns]; b: m8[ns]") + assert mgr.as_array().dtype == "object" + + def test_consolidate_ordering_issues(self, mgr): + mgr.iset(mgr.items.get_loc("f"), np.random.randn(N)) + mgr.iset(mgr.items.get_loc("d"), np.random.randn(N)) + mgr.iset(mgr.items.get_loc("b"), np.random.randn(N)) + mgr.iset(mgr.items.get_loc("g"), np.random.randn(N)) + mgr.iset(mgr.items.get_loc("h"), np.random.randn(N)) + + # we have datetime/tz blocks in mgr + cons = mgr.consolidate() + assert cons.nblocks == 4 + cons = mgr.consolidate().get_numeric_data() + assert cons.nblocks == 1 + assert isinstance(cons.blocks[0].mgr_locs, BlockPlacement) + tm.assert_numpy_array_equal( + cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.intp) + ) + + def test_reindex_items(self): + # mgr is not consolidated, f8 & f8-2 blocks + mgr = create_mgr("a: f8; b: i8; c: f8; d: i8; e: f8; f: bool; g: f8-2") + + reindexed = mgr.reindex_axis(["g", "c", "a", "d"], axis=0) + assert reindexed.nblocks == 2 + tm.assert_index_equal(reindexed.items, Index(["g", "c", "a", "d"])) + tm.assert_almost_equal( + mgr.iget(6).internal_values(), reindexed.iget(0).internal_values() + ) + tm.assert_almost_equal( + mgr.iget(2).internal_values(), reindexed.iget(1).internal_values() + ) + tm.assert_almost_equal( + mgr.iget(0).internal_values(), reindexed.iget(2).internal_values() + ) + tm.assert_almost_equal( + mgr.iget(3).internal_values(), reindexed.iget(3).internal_values() + ) + + def test_get_numeric_data(self): + mgr = create_mgr( + "int: int; float: float; complex: complex;" + "str: object; bool: bool; obj: object; dt: datetime", + item_shape=(3,), + ) + mgr.iset(5, np.array([1, 2, 3], dtype=np.object_)) + + numeric = mgr.get_numeric_data() + tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"])) + tm.assert_almost_equal( + mgr.iget(mgr.items.get_loc("float")).internal_values(), + numeric.iget(numeric.items.get_loc("float")).internal_values(), + ) + + # Check sharing + numeric.iset( + numeric.items.get_loc("float"), + np.array([100.0, 200.0, 300.0]), + inplace=True, + ) + tm.assert_almost_equal( + mgr.iget(mgr.items.get_loc("float")).internal_values(), + np.array([100.0, 200.0, 300.0]), + ) + + numeric2 = mgr.get_numeric_data(copy=True) + tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"])) + numeric2.iset( + numeric2.items.get_loc("float"), + np.array([1000.0, 2000.0, 3000.0]), + inplace=True, + ) + tm.assert_almost_equal( + mgr.iget(mgr.items.get_loc("float")).internal_values(), + np.array([100.0, 200.0, 300.0]), + ) + + def test_get_bool_data(self): + mgr = create_mgr( + "int: int; float: float; complex: complex;" + "str: object; bool: bool; obj: object; dt: datetime", + item_shape=(3,), + ) + mgr.iset(6, np.array([True, False, True], dtype=np.object_)) + + bools = mgr.get_bool_data() + tm.assert_index_equal(bools.items, Index(["bool", "dt"])) + tm.assert_almost_equal( + mgr.iget(mgr.items.get_loc("bool")).internal_values(), + bools.iget(bools.items.get_loc("bool")).internal_values(), + ) + + bools.iset(0, np.array([True, False, True]), inplace=True) + tm.assert_numpy_array_equal( + mgr.iget(mgr.items.get_loc("bool")).internal_values(), + np.array([True, False, True]), + ) + + # Check sharing + bools2 = mgr.get_bool_data(copy=True) + bools2.iset(0, np.array([False, True, False])) + tm.assert_numpy_array_equal( + mgr.iget(mgr.items.get_loc("bool")).internal_values(), + np.array([True, False, True]), + ) + + def test_unicode_repr_doesnt_raise(self): + repr(create_mgr("b,\u05d0: object")) + + @pytest.mark.parametrize( + "mgr_string", ["a,b,c: i8-1; d,e,f: i8-2", "a,a,a: i8-1; b,b,b: i8-2"] + ) + def test_equals(self, mgr_string): + # unique items + bm1 = create_mgr(mgr_string) + bm2 = BlockManager(bm1.blocks[::-1], bm1.axes) + assert bm1.equals(bm2) + + @pytest.mark.parametrize( + "mgr_string", + [ + "a:i8;b:f8", # basic case + "a:i8;b:f8;c:c8;d:b", # many types + "a:i8;e:dt;f:td;g:string", # more types + "a:i8;b:category;c:category2", # categories + "c:sparse;d:sparse_na;b:f8", # sparse + ], + ) + def test_equals_block_order_different_dtypes(self, mgr_string): + # GH 9330 + bm = create_mgr(mgr_string) + block_perms = itertools.permutations(bm.blocks) + for bm_perm in block_perms: + bm_this = BlockManager(tuple(bm_perm), bm.axes) + assert bm.equals(bm_this) + assert bm_this.equals(bm) + + def test_single_mgr_ctor(self): + mgr = create_single_mgr("f8", num_rows=5) + assert mgr.external_values().tolist() == [0.0, 1.0, 2.0, 3.0, 4.0] + + @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, value): + bm1 = create_mgr("a,b,c: i8-1; d,e,f: i8-2") + + msg = ( + 'For argument "inplace" expected type bool, ' + f"received type {type(value).__name__}." + ) + with pytest.raises(ValueError, match=msg): + bm1.replace_list([1], [2], inplace=value) + + +def _as_array(mgr): + if mgr.ndim == 1: + return mgr.external_values() + return mgr.as_array().T + + +class TestIndexing: + # Nosetests-style data-driven tests. + # + # This test applies different indexing routines to block managers and + # compares the outcome to the result of same operations on np.ndarray. + # + # NOTE: sparse (SparseBlock with fill_value != np.nan) fail a lot of tests + # and are disabled. + + MANAGERS = [ + create_single_mgr("f8", N), + create_single_mgr("i8", N), + # 2-dim + create_mgr("a,b,c,d,e,f: f8", item_shape=(N,)), + create_mgr("a,b,c,d,e,f: i8", item_shape=(N,)), + create_mgr("a,b: f8; c,d: i8; e,f: string", item_shape=(N,)), + create_mgr("a,b: f8; c,d: i8; e,f: f8", item_shape=(N,)), + ] + + @pytest.mark.parametrize("mgr", MANAGERS) + def test_get_slice(self, mgr): + def assert_slice_ok(mgr, axis, slobj): + mat = _as_array(mgr) + + # we maybe using an ndarray to test slicing and + # might not be the full length of the axis + if isinstance(slobj, np.ndarray): + ax = mgr.axes[axis] + if len(ax) and len(slobj) and len(slobj) != len(ax): + slobj = np.concatenate( + [slobj, np.zeros(len(ax) - len(slobj), dtype=bool)] + ) + + if isinstance(slobj, slice): + sliced = mgr.get_slice(slobj, axis=axis) + elif mgr.ndim == 1 and axis == 0: + sliced = mgr.getitem_mgr(slobj) + else: + # BlockManager doesn't support non-slice, SingleBlockManager + # doesn't support axis > 0 + return + + mat_slobj = (slice(None),) * axis + (slobj,) + tm.assert_numpy_array_equal( + mat[mat_slobj], _as_array(sliced), check_dtype=False + ) + tm.assert_index_equal(mgr.axes[axis][slobj], sliced.axes[axis]) + + assert mgr.ndim <= 2, mgr.ndim + for ax in range(mgr.ndim): + # slice + assert_slice_ok(mgr, ax, slice(None)) + assert_slice_ok(mgr, ax, slice(3)) + assert_slice_ok(mgr, ax, slice(100)) + assert_slice_ok(mgr, ax, slice(1, 4)) + assert_slice_ok(mgr, ax, slice(3, 0, -2)) + + if mgr.ndim < 2: + # 2D only support slice objects + + # boolean mask + assert_slice_ok(mgr, ax, np.array([], dtype=np.bool_)) + assert_slice_ok(mgr, ax, np.ones(mgr.shape[ax], dtype=np.bool_)) + assert_slice_ok(mgr, ax, np.zeros(mgr.shape[ax], dtype=np.bool_)) + + if mgr.shape[ax] >= 3: + assert_slice_ok(mgr, ax, np.arange(mgr.shape[ax]) % 3 == 0) + assert_slice_ok( + mgr, ax, np.array([True, True, False], dtype=np.bool_) + ) + + # fancy indexer + assert_slice_ok(mgr, ax, []) + assert_slice_ok(mgr, ax, list(range(mgr.shape[ax]))) + + if mgr.shape[ax] >= 3: + assert_slice_ok(mgr, ax, [0, 1, 2]) + assert_slice_ok(mgr, ax, [-1, -2, -3]) + + @pytest.mark.parametrize("mgr", MANAGERS) + def test_take(self, mgr): + def assert_take_ok(mgr, axis, indexer): + mat = _as_array(mgr) + taken = mgr.take(indexer, axis) + tm.assert_numpy_array_equal( + np.take(mat, indexer, axis), _as_array(taken), check_dtype=False + ) + tm.assert_index_equal(mgr.axes[axis].take(indexer), taken.axes[axis]) + + for ax in range(mgr.ndim): + # take/fancy indexer + assert_take_ok(mgr, ax, indexer=[]) + assert_take_ok(mgr, ax, indexer=[0, 0, 0]) + assert_take_ok(mgr, ax, indexer=list(range(mgr.shape[ax]))) + + if mgr.shape[ax] >= 3: + assert_take_ok(mgr, ax, indexer=[0, 1, 2]) + assert_take_ok(mgr, ax, indexer=[-1, -2, -3]) + + @pytest.mark.parametrize("mgr", MANAGERS) + @pytest.mark.parametrize("fill_value", [None, np.nan, 100.0]) + def test_reindex_axis(self, fill_value, mgr): + def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value): + mat = _as_array(mgr) + indexer = mgr.axes[axis].get_indexer_for(new_labels) + + reindexed = mgr.reindex_axis(new_labels, axis, fill_value=fill_value) + tm.assert_numpy_array_equal( + algos.take_nd(mat, indexer, axis, fill_value=fill_value), + _as_array(reindexed), + check_dtype=False, + ) + tm.assert_index_equal(reindexed.axes[axis], new_labels) + + for ax in range(mgr.ndim): + assert_reindex_axis_is_ok(mgr, ax, Index([]), fill_value) + assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax], fill_value) + assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][[0, 0, 0]], fill_value) + assert_reindex_axis_is_ok(mgr, ax, Index(["foo", "bar", "baz"]), fill_value) + assert_reindex_axis_is_ok( + mgr, ax, Index(["foo", mgr.axes[ax][0], "baz"]), fill_value + ) + + if mgr.shape[ax] >= 3: + assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][:-3], fill_value) + assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][-3::-1], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value + ) + + @pytest.mark.parametrize("mgr", MANAGERS) + @pytest.mark.parametrize("fill_value", [None, np.nan, 100.0]) + def test_reindex_indexer(self, fill_value, mgr): + def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value): + mat = _as_array(mgr) + reindexed_mat = algos.take_nd(mat, indexer, axis, fill_value=fill_value) + reindexed = mgr.reindex_indexer( + new_labels, indexer, axis, fill_value=fill_value + ) + tm.assert_numpy_array_equal( + reindexed_mat, _as_array(reindexed), check_dtype=False + ) + tm.assert_index_equal(reindexed.axes[axis], new_labels) + + for ax in range(mgr.ndim): + assert_reindex_indexer_is_ok( + mgr, ax, Index([]), np.array([], dtype=np.intp), fill_value + ) + assert_reindex_indexer_is_ok( + mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value + ) + assert_reindex_indexer_is_ok( + mgr, + ax, + Index(["foo"] * mgr.shape[ax]), + np.arange(mgr.shape[ax]), + fill_value, + ) + assert_reindex_indexer_is_ok( + mgr, ax, mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), fill_value + ) + assert_reindex_indexer_is_ok( + mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax])[::-1], fill_value + ) + assert_reindex_indexer_is_ok( + mgr, ax, Index(["foo", "bar", "baz"]), np.array([0, 0, 0]), fill_value + ) + assert_reindex_indexer_is_ok( + mgr, ax, Index(["foo", "bar", "baz"]), np.array([-1, 0, -1]), fill_value + ) + assert_reindex_indexer_is_ok( + mgr, + ax, + Index(["foo", mgr.axes[ax][0], "baz"]), + np.array([-1, -1, -1]), + fill_value, + ) + + if mgr.shape[ax] >= 3: + assert_reindex_indexer_is_ok( + mgr, + ax, + Index(["foo", "bar", "baz"]), + np.array([0, 1, 2]), + fill_value, + ) + + +class TestBlockPlacement: + @pytest.mark.parametrize( + "slc, expected", + [ + (slice(0, 4), 4), + (slice(0, 4, 2), 2), + (slice(0, 3, 2), 2), + (slice(0, 1, 2), 1), + (slice(1, 0, -1), 1), + ], + ) + def test_slice_len(self, slc, expected): + assert len(BlockPlacement(slc)) == expected + + @pytest.mark.parametrize("slc", [slice(1, 1, 0), slice(1, 2, 0)]) + def test_zero_step_raises(self, slc): + msg = "slice step cannot be zero" + with pytest.raises(ValueError, match=msg): + BlockPlacement(slc) + + def test_slice_canonize_negative_stop(self): + # GH#37524 negative stop is OK with negative step and positive start + slc = slice(3, -1, -2) + + bp = BlockPlacement(slc) + assert bp.indexer == slice(3, None, -2) + + @pytest.mark.parametrize( + "slc", + [ + slice(None, None), + slice(10, None), + slice(None, None, -1), + slice(None, 10, -1), + # These are "unbounded" because negative index will + # change depending on container shape. + slice(-1, None), + slice(None, -1), + slice(-1, -1), + slice(-1, None, -1), + slice(None, -1, -1), + slice(-1, -1, -1), + ], + ) + def test_unbounded_slice_raises(self, slc): + msg = "unbounded slice" + with pytest.raises(ValueError, match=msg): + BlockPlacement(slc) + + @pytest.mark.parametrize( + "slc", + [ + slice(0, 0), + slice(100, 0), + slice(100, 100), + slice(100, 100, -1), + slice(0, 100, -1), + ], + ) + def test_not_slice_like_slices(self, slc): + assert not BlockPlacement(slc).is_slice_like + + @pytest.mark.parametrize( + "arr, slc", + [ + ([0], slice(0, 1, 1)), + ([100], slice(100, 101, 1)), + ([0, 1, 2], slice(0, 3, 1)), + ([0, 5, 10], slice(0, 15, 5)), + ([0, 100], slice(0, 200, 100)), + ([2, 1], slice(2, 0, -1)), + ], + ) + def test_array_to_slice_conversion(self, arr, slc): + assert BlockPlacement(arr).as_slice == slc + + @pytest.mark.parametrize( + "arr", + [ + [], + [-1], + [-1, -2, -3], + [-10], + [-1], + [-1, 0, 1, 2], + [-2, 0, 2, 4], + [1, 0, -1], + [1, 1, 1], + ], + ) + def test_not_slice_like_arrays(self, arr): + assert not BlockPlacement(arr).is_slice_like + + @pytest.mark.parametrize( + "slc, expected", + [(slice(0, 3), [0, 1, 2]), (slice(0, 0), []), (slice(3, 0), [])], + ) + def test_slice_iter(self, slc, expected): + assert list(BlockPlacement(slc)) == expected + + @pytest.mark.parametrize( + "slc, arr", + [ + (slice(0, 3), [0, 1, 2]), + (slice(0, 0), []), + (slice(3, 0), []), + (slice(3, 0, -1), [3, 2, 1]), + ], + ) + def test_slice_to_array_conversion(self, slc, arr): + tm.assert_numpy_array_equal( + BlockPlacement(slc).as_array, np.asarray(arr, dtype=np.intp) + ) + + def test_blockplacement_add(self): + bpl = BlockPlacement(slice(0, 5)) + assert bpl.add(1).as_slice == slice(1, 6, 1) + assert bpl.add(np.arange(5)).as_slice == slice(0, 10, 2) + assert list(bpl.add(np.arange(5, 0, -1))) == [5, 5, 5, 5, 5] + + @pytest.mark.parametrize( + "val, inc, expected", + [ + (slice(0, 0), 0, []), + (slice(1, 4), 0, [1, 2, 3]), + (slice(3, 0, -1), 0, [3, 2, 1]), + ([1, 2, 4], 0, [1, 2, 4]), + (slice(0, 0), 10, []), + (slice(1, 4), 10, [11, 12, 13]), + (slice(3, 0, -1), 10, [13, 12, 11]), + ([1, 2, 4], 10, [11, 12, 14]), + (slice(0, 0), -1, []), + (slice(1, 4), -1, [0, 1, 2]), + ([1, 2, 4], -1, [0, 1, 3]), + ], + ) + def test_blockplacement_add_int(self, val, inc, expected): + assert list(BlockPlacement(val).add(inc)) == expected + + @pytest.mark.parametrize("val", [slice(1, 4), [1, 2, 4]]) + def test_blockplacement_add_int_raises(self, val): + msg = "iadd causes length change" + with pytest.raises(ValueError, match=msg): + BlockPlacement(val).add(-10) + + +class TestCanHoldElement: + @pytest.fixture( + params=[ + lambda x: x, + lambda x: x.to_series(), + lambda x: x._data, + lambda x: list(x), + lambda x: x.astype(object), + lambda x: np.asarray(x), + lambda x: x[0], + lambda x: x[:0], + ] + ) + def element(self, request): + """ + Functions that take an Index and return an element that should have + blk._can_hold_element(element) for a Block with this index's dtype. + """ + return request.param + + def test_datetime_block_can_hold_element(self): + block = create_block("datetime", [0]) + + assert block._can_hold_element([]) + + # We will check that block._can_hold_element iff arr.__setitem__ works + arr = pd.array(block.values.ravel()) + + # coerce None + assert block._can_hold_element(None) + arr[0] = None + assert arr[0] is pd.NaT + + # coerce different types of datetime objects + vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)] + for val in vals: + assert block._can_hold_element(val) + arr[0] = val + + val = date(2010, 10, 10) + assert not block._can_hold_element(val) + + msg = ( + "value should be a 'Timestamp', 'NaT', " + "or array of those. Got 'date' instead." + ) + with pytest.raises(TypeError, match=msg): + arr[0] = val + + @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64]) + def test_interval_can_hold_element_emptylist(self, dtype, element): + arr = np.array([1, 3, 4], dtype=dtype) + ii = IntervalIndex.from_breaks(arr) + blk = new_block(ii._data, [1], ndim=2) + + assert blk._can_hold_element([]) + # TODO: check this holds for all blocks + + @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64]) + def test_interval_can_hold_element(self, dtype, element): + arr = np.array([1, 3, 4, 9], dtype=dtype) + ii = IntervalIndex.from_breaks(arr) + blk = new_block(ii._data, [1], ndim=2) + + elem = element(ii) + self.check_series_setitem(elem, ii, True) + assert blk._can_hold_element(elem) + + # Careful: to get the expected Series-inplace behavior we need + # `elem` to not have the same length as `arr` + ii2 = IntervalIndex.from_breaks(arr[:-1], closed="neither") + elem = element(ii2) + self.check_series_setitem(elem, ii, False) + assert not blk._can_hold_element(elem) + + ii3 = IntervalIndex.from_breaks([Timestamp(1), Timestamp(3), Timestamp(4)]) + elem = element(ii3) + self.check_series_setitem(elem, ii, False) + assert not blk._can_hold_element(elem) + + ii4 = IntervalIndex.from_breaks([Timedelta(1), Timedelta(3), Timedelta(4)]) + elem = element(ii4) + self.check_series_setitem(elem, ii, False) + assert not blk._can_hold_element(elem) + + def test_period_can_hold_element_emptylist(self): + pi = period_range("2016", periods=3, freq="A") + blk = new_block(pi._data.reshape(1, 3), [1], ndim=2) + + assert blk._can_hold_element([]) + + def test_period_can_hold_element(self, element): + pi = period_range("2016", periods=3, freq="A") + + elem = element(pi) + self.check_series_setitem(elem, pi, True) + + # Careful: to get the expected Series-inplace behavior we need + # `elem` to not have the same length as `arr` + pi2 = pi.asfreq("D")[:-1] + elem = element(pi2) + self.check_series_setitem(elem, pi, False) + + dti = pi.to_timestamp("S")[:-1] + elem = element(dti) + self.check_series_setitem(elem, pi, False) + + def check_setting(self, elem, index: Index, inplace: bool): + self.check_series_setitem(elem, index, inplace) + self.check_frame_setitem(elem, index, inplace) + + def check_can_hold_element(self, obj, elem, inplace: bool): + blk = obj._mgr.blocks[0] + if inplace: + assert blk._can_hold_element(elem) + else: + assert not blk._can_hold_element(elem) + + def check_series_setitem(self, elem, index: Index, inplace: bool): + arr = index._data.copy() + ser = Series(arr) + + self.check_can_hold_element(ser, elem, inplace) + + if is_scalar(elem): + ser[0] = elem + else: + ser[: len(elem)] = elem + + if inplace: + assert ser.array is arr # i.e. setting was done inplace + else: + assert ser.dtype == object + + def check_frame_setitem(self, elem, index: Index, inplace: bool): + arr = index._data.copy() + df = DataFrame(arr) + + self.check_can_hold_element(df, elem, inplace) + + if is_scalar(elem): + df.iloc[0, 0] = elem + else: + df.iloc[: len(elem), 0] = elem + + if inplace: + # assertion here implies setting was done inplace + + # error: Item "ArrayManager" of "Union[ArrayManager, BlockManager]" has no + # attribute "blocks" + assert df._mgr.blocks[0].values is arr # type:ignore[union-attr] + else: + assert df.dtypes[0] == object + + +class TestShouldStore: + def test_should_store_categorical(self): + cat = Categorical(["A", "B", "C"]) + df = DataFrame(cat) + blk = df._mgr.blocks[0] + + # matching dtype + assert blk.should_store(cat) + assert blk.should_store(cat[:-1]) + + # different dtype + assert not blk.should_store(cat.as_ordered()) + + # ndarray instead of Categorical + assert not blk.should_store(np.asarray(cat)) + + +def test_validate_ndim(block_maker): + values = np.array([1.0, 2.0]) + placement = slice(2) + msg = r"Wrong number of dimensions. values.ndim != ndim \[1 != 2\]" + + with pytest.raises(ValueError, match=msg): + block_maker(values, placement, ndim=2) + + +def test_block_shape(): + idx = Index([0, 1, 2, 3, 4]) + a = Series([1, 2, 3]).reindex(idx) + b = Series(Categorical([1, 2, 3])).reindex(idx) + + assert a._mgr.blocks[0].mgr_locs.indexer == b._mgr.blocks[0].mgr_locs.indexer + + +def test_make_block_no_pandas_array(block_maker): + # https://github.com/pandas-dev/pandas/pull/24866 + arr = pd.arrays.PandasArray(np.array([1, 2])) + + # PandasArray, no dtype + result = block_maker(arr, slice(len(arr)), ndim=arr.ndim) + assert result.dtype.kind in ["i", "u"] + + if block_maker is make_block: + # new_block requires caller to unwrap PandasArray + assert result.is_extension is False + + # PandasArray, PandasDtype + result = block_maker(arr, slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim) + assert result.dtype.kind in ["i", "u"] + assert result.is_extension is False + + # new_block no longer taked dtype keyword + # ndarray, PandasDtype + result = block_maker( + arr.to_numpy(), slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim + ) + assert result.dtype.kind in ["i", "u"] + assert result.is_extension is False + + +def test_single_block_manager_fastpath_deprecated(): + # GH#33092 + ser = Series(range(3)) + blk = ser._data.blocks[0] + with tm.assert_produces_warning(FutureWarning): + SingleBlockManager(blk, ser.index, fastpath=True) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/internals/test_managers.py b/.local/lib/python3.8/site-packages/pandas/tests/internals/test_managers.py new file mode 100644 index 0000000..045c3cb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/internals/test_managers.py @@ -0,0 +1,72 @@ +""" +Testing interaction between the different managers (BlockManager, ArrayManager) +""" +from pandas.core.dtypes.missing import array_equivalent + +import pandas as pd +import pandas._testing as tm +from pandas.core.internals import ( + ArrayManager, + BlockManager, + SingleArrayManager, + SingleBlockManager, +) + + +def test_dataframe_creation(): + + with pd.option_context("mode.data_manager", "block"): + df_block = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}) + assert isinstance(df_block._mgr, BlockManager) + + with pd.option_context("mode.data_manager", "array"): + df_array = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}) + assert isinstance(df_array._mgr, ArrayManager) + + # also ensure both are seen as equal + tm.assert_frame_equal(df_block, df_array) + + # conversion from one manager to the other + result = df_block._as_manager("block") + assert isinstance(result._mgr, BlockManager) + result = df_block._as_manager("array") + assert isinstance(result._mgr, ArrayManager) + tm.assert_frame_equal(result, df_block) + assert all( + array_equivalent(left, right) + for left, right in zip(result._mgr.arrays, df_array._mgr.arrays) + ) + + result = df_array._as_manager("array") + assert isinstance(result._mgr, ArrayManager) + result = df_array._as_manager("block") + assert isinstance(result._mgr, BlockManager) + tm.assert_frame_equal(result, df_array) + assert len(result._mgr.blocks) == 2 + + +def test_series_creation(): + + with pd.option_context("mode.data_manager", "block"): + s_block = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"]) + assert isinstance(s_block._mgr, SingleBlockManager) + + with pd.option_context("mode.data_manager", "array"): + s_array = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"]) + assert isinstance(s_array._mgr, SingleArrayManager) + + # also ensure both are seen as equal + tm.assert_series_equal(s_block, s_array) + + # conversion from one manager to the other + result = s_block._as_manager("block") + assert isinstance(result._mgr, SingleBlockManager) + result = s_block._as_manager("array") + assert isinstance(result._mgr, SingleArrayManager) + tm.assert_series_equal(result, s_block) + + result = s_array._as_manager("array") + assert isinstance(result._mgr, SingleArrayManager) + result = s_array._as_manager("block") + assert isinstance(result._mgr, SingleBlockManager) + tm.assert_series_equal(result, s_array) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/io/__init__.py new file mode 100644 index 0000000..3231e38 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/__init__.py @@ -0,0 +1,27 @@ +import pytest + +pytestmark = [ + # fastparquet + pytest.mark.filterwarnings( + "ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning" + ), + pytest.mark.filterwarnings( + "ignore:Block.is_categorical is deprecated:DeprecationWarning" + ), + pytest.mark.filterwarnings( + r"ignore:`np\.bool` is a deprecated alias:DeprecationWarning" + ), + # xlrd + pytest.mark.filterwarnings( + "ignore:This method will be removed in future versions:DeprecationWarning" + ), + pytest.mark.filterwarnings( + "ignore:This method will be removed in future versions. " + r"Use 'tree.iter\(\)' or 'list\(tree.iter\(\)\)' instead." + ":PendingDeprecationWarning" + ), + # GH 26552 + pytest.mark.filterwarnings( + "ignore:As the xlwt package is no longer maintained:FutureWarning" + ), +] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..b83cd5f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..277a25b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/generate_legacy_storage_files.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/generate_legacy_storage_files.cpython-38.pyc new file mode 100644 index 0000000..0bebd27 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/generate_legacy_storage_files.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_clipboard.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_clipboard.cpython-38.pyc new file mode 100644 index 0000000..a16f4ca Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_clipboard.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_common.cpython-38.pyc new file mode 100644 index 0000000..97293f0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_compression.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_compression.cpython-38.pyc new file mode 100644 index 0000000..5969307 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_compression.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_date_converters.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_date_converters.cpython-38.pyc new file mode 100644 index 0000000..4dad927 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_date_converters.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_feather.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_feather.cpython-38.pyc new file mode 100644 index 0000000..ff51f09 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_feather.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_fsspec.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_fsspec.cpython-38.pyc new file mode 100644 index 0000000..2c5236e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_fsspec.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_gcs.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_gcs.cpython-38.pyc new file mode 100644 index 0000000..e55d079 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_gcs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_html.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_html.cpython-38.pyc new file mode 100644 index 0000000..9ba4945 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_html.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_orc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_orc.cpython-38.pyc new file mode 100644 index 0000000..59ba183 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_orc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_parquet.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_parquet.cpython-38.pyc new file mode 100644 index 0000000..8ba38f8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_parquet.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_pickle.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_pickle.cpython-38.pyc new file mode 100644 index 0000000..45e6074 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_pickle.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_s3.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_s3.cpython-38.pyc new file mode 100644 index 0000000..416c4bb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_s3.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_spss.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_spss.cpython-38.pyc new file mode 100644 index 0000000..8ec230b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_spss.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_sql.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_sql.cpython-38.pyc new file mode 100644 index 0000000..5a05e08 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_sql.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_stata.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_stata.cpython-38.pyc new file mode 100644 index 0000000..4f49f3c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_stata.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_user_agent.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_user_agent.cpython-38.pyc new file mode 100644 index 0000000..358001a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/__pycache__/test_user_agent.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/io/conftest.py new file mode 100644 index 0000000..ff31d93 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/conftest.py @@ -0,0 +1,214 @@ +import os +import shlex +import subprocess +import time + +import pytest + +from pandas.compat import ( + is_ci_environment, + is_platform_arm, + is_platform_mac, + is_platform_windows, +) +import pandas.util._test_decorators as td + +import pandas._testing as tm + +from pandas.io.parsers import read_csv + + +@pytest.fixture +def tips_file(datapath): + """Path to the tips dataset""" + return datapath("io", "data", "csv", "tips.csv") + + +@pytest.fixture +def jsonl_file(datapath): + """Path to a JSONL dataset""" + return datapath("io", "parser", "data", "items.jsonl") + + +@pytest.fixture +def salaries_table(datapath): + """DataFrame with the salaries dataset""" + return read_csv(datapath("io", "parser", "data", "salaries.csv"), sep="\t") + + +@pytest.fixture +def feather_file(datapath): + return datapath("io", "data", "feather", "feather-0_3_1.feather") + + +@pytest.fixture +def s3so(worker_id): + if is_ci_environment(): + url = "http://localhost:5000/" + else: + worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw") + url = f"http://127.0.0.1:555{worker_id}/" + return {"client_kwargs": {"endpoint_url": url}} + + +@pytest.fixture(scope="session") +def s3_base(worker_id): + """ + Fixture for mocking S3 interaction. + + Sets up moto server in separate process locally + Return url for motoserver/moto CI service + """ + pytest.importorskip("s3fs") + pytest.importorskip("boto3") + + with tm.ensure_safe_environment_variables(): + # temporary workaround as moto fails for botocore >= 1.11 otherwise, + # see https://github.com/spulec/moto/issues/1924 & 1952 + os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key") + os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret") + if is_ci_environment(): + if is_platform_arm() or is_platform_mac() or is_platform_windows(): + # NOT RUN on Windows/MacOS/ARM, only Ubuntu + # - subprocess in CI can cause timeouts + # - Azure pipelines/Github Actions do not support + # container services for the above OSs + # - CircleCI will probably hit the Docker rate pull limit + pytest.skip( + "S3 tests do not have a corresponding service in " + "Windows, MacOS or ARM platforms" + ) + else: + yield "http://localhost:5000" + else: + requests = pytest.importorskip("requests") + pytest.importorskip("moto", minversion="1.3.14") + pytest.importorskip("flask") # server mode needs flask too + + # Launching moto in server mode, i.e., as a separate process + # with an S3 endpoint on localhost + + worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw") + endpoint_port = f"555{worker_id}" + endpoint_uri = f"http://127.0.0.1:{endpoint_port}/" + + # pipe to null to avoid logging in terminal + with subprocess.Popen( + shlex.split(f"moto_server s3 -p {endpoint_port}"), + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) as proc: + + timeout = 5 + while timeout > 0: + try: + # OK to go once server is accepting connections + r = requests.get(endpoint_uri) + if r.ok: + break + except Exception: + pass + timeout -= 0.1 + time.sleep(0.1) + yield endpoint_uri + + proc.terminate() + + +@pytest.fixture +def s3_resource(s3_base, tips_file, jsonl_file, feather_file): + """ + Sets up S3 bucket with contents + + The primary bucket name is "pandas-test". The following datasets + are loaded. + + - tips.csv + - tips.csv.gz + - tips.csv.bz2 + - items.jsonl + + A private bucket "cant_get_it" is also created. The boto3 s3 resource + is yielded by the fixture. + """ + import boto3 + import s3fs + + test_s3_files = [ + ("tips#1.csv", tips_file), + ("tips.csv", tips_file), + ("tips.csv.gz", tips_file + ".gz"), + ("tips.csv.bz2", tips_file + ".bz2"), + ("items.jsonl", jsonl_file), + ("simple_dataset.feather", feather_file), + ] + + def add_tips_files(bucket_name): + for s3_key, file_name in test_s3_files: + with open(file_name, "rb") as f: + cli.put_object(Bucket=bucket_name, Key=s3_key, Body=f) + + bucket = "pandas-test" + conn = boto3.resource("s3", endpoint_url=s3_base) + cli = boto3.client("s3", endpoint_url=s3_base) + + try: + cli.create_bucket(Bucket=bucket) + except Exception: + # OK is bucket already exists + pass + try: + cli.create_bucket(Bucket="cant_get_it", ACL="private") + except Exception: + # OK is bucket already exists + pass + timeout = 2 + while not cli.list_buckets()["Buckets"] and timeout > 0: + time.sleep(0.1) + timeout -= 0.1 + + add_tips_files(bucket) + add_tips_files("cant_get_it") + s3fs.S3FileSystem.clear_instance_cache() + yield conn + + s3 = s3fs.S3FileSystem(client_kwargs={"endpoint_url": s3_base}) + + try: + s3.rm(bucket, recursive=True) + except Exception: + pass + try: + s3.rm("cant_get_it", recursive=True) + except Exception: + pass + timeout = 2 + while cli.list_buckets()["Buckets"] and timeout > 0: + time.sleep(0.1) + timeout -= 0.1 + + +_compression_formats_params = [ + (".no_compress", None), + ("", None), + (".gz", "gzip"), + (".GZ", "gzip"), + (".bz2", "bz2"), + (".BZ2", "bz2"), + (".zip", "zip"), + (".ZIP", "zip"), + (".xz", "xz"), + (".XZ", "xz"), + pytest.param((".zst", "zstd"), marks=td.skip_if_no("zstandard")), + pytest.param((".ZST", "zstd"), marks=td.skip_if_no("zstandard")), +] + + +@pytest.fixture(params=_compression_formats_params[1:]) +def compression_format(request): + return request.param + + +@pytest.fixture(params=_compression_formats_params) +def compression_ext(request): + return request.param[0] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/data/fixed_width/fixed_width_format.txt b/.local/lib/python3.8/site-packages/pandas/tests/io/data/fixed_width/fixed_width_format.txt new file mode 100644 index 0000000..bb487d8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/data/fixed_width/fixed_width_format.txt @@ -0,0 +1,3 @@ +A B C +1 2 3 +4 5 6 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/data/gbq_fake_job.txt b/.local/lib/python3.8/site-packages/pandas/tests/io/data/gbq_fake_job.txt new file mode 100644 index 0000000..b099522 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/data/gbq_fake_job.txt @@ -0,0 +1 @@ +{'status': {'state': 'DONE'}, 'kind': 'bigquery#job', 'statistics': {'query': {'cacheHit': True, 'totalBytesProcessed': '0'}, 'endTime': '1377668744674', 'totalBytesProcessed': '0', 'startTime': '1377668744466'}, 'jobReference': {'projectId': '57288129629', 'jobId': 'bqjob_r5f956972f0190bdf_00000140c374bf42_2'}, 'etag': '"4PTsVxg68bQkQs1RJ1Ndewqkgg4/oO4VmgFrAku4N6FWci9s7iFIftc"', 'configuration': {'query': {'createDisposition': 'CREATE_IF_NEEDED', 'query': 'SELECT * FROM [publicdata:samples.shakespeare]', 'writeDisposition': 'WRITE_TRUNCATE', 'destinationTable': {'projectId': '57288129629', 'tableId': 'anonb5ec450da88eeeb78a27784ea482ee75a146d442', 'datasetId': '_d0b4f5f0d50dc68a3eb0fa6cba66a9a8687d9253'}}}, 'id': '57288129629:bqjob_r5f956972f0190bdf_00000140c374bf42_2', 'selfLink': 'https://www.googleapis.com/bigquery/v2/projects/57288129629/jobs/bqjob_r5f956972f0190bdf_00000140c374bf42_2'} \ No newline at end of file diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/data/legacy_pickle/1.2.4/empty_frame_v1_2_4-GH#42345.pkl b/.local/lib/python3.8/site-packages/pandas/tests/io/data/legacy_pickle/1.2.4/empty_frame_v1_2_4-GH#42345.pkl new file mode 100644 index 0000000..255a745 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/data/legacy_pickle/1.2.4/empty_frame_v1_2_4-GH#42345.pkl differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/data/parquet/simple.parquet b/.local/lib/python3.8/site-packages/pandas/tests/io/data/parquet/simple.parquet new file mode 100644 index 0000000..2862a91 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/data/parquet/simple.parquet differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/data/pickle/test_mi_py27.pkl b/.local/lib/python3.8/site-packages/pandas/tests/io/data/pickle/test_mi_py27.pkl new file mode 100644 index 0000000..89021dd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/data/pickle/test_mi_py27.pkl differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/data/pickle/test_py27.pkl b/.local/lib/python3.8/site-packages/pandas/tests/io/data/pickle/test_py27.pkl new file mode 100644 index 0000000..5308b86 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/data/pickle/test_py27.pkl differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/baby_names.xml b/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/baby_names.xml new file mode 100644 index 0000000..b4797b7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/baby_names.xml @@ -0,0 +1,53 @@ + + + + 1 + Jos + Sofa + + + 2 + Luis + Valentina + + + 3 + Carlos + Isabella + + + 4 + Juan + Camila + + + 5 + Jorge + Valeria + + + 6 + Pedro + Mariana + + + 7 + Jess + Gabriela + + + 8 + Manuel + Sara + + + 9 + Santiago + Daniella + + + 10 + Sebastin + Mara Jos + + diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/books.xml b/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/books.xml new file mode 100644 index 0000000..666ce60 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/books.xml @@ -0,0 +1,21 @@ + + + + Everyday Italian + Giada De Laurentiis + 2005 + 30.00 + + + Harry Potter + J K. Rowling + 2005 + 29.99 + + + Learning XML + Erik T. Ray + 2003 + 39.95 + + diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/cta_rail_lines.kml b/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/cta_rail_lines.kml new file mode 100644 index 0000000..c031137 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/cta_rail_lines.kml @@ -0,0 +1,92 @@ + + + CTA_RailLines + + + CTA_RailLines + + + Blue Line (Forest Park) + +
Blue Line (Forest Park)
OBJECTID_1 1
ASSET_ID 21100001
LINES Blue Line (Forest Park)
DESCRIPTIO Oak Park to Austin
TYPE Elevated or at Grade
LEGEND BL
ALT_LEGEND BL
BRANCH Blue Line Forest Park
SHAPE.LEN 4060.368778
]]>
+ #LineStyle01 + + + 0 + clampedToGround + -87.77678526964958,41.8708863930319,0 -87.77826234150609,41.87097820122218,0 -87.78251583439344,41.87130129991005,0 -87.78418294588424,41.87145055520308,0 -87.7872369165933,41.8717239119163,0 -87.79160214925886,41.87210797280065,0 + + +
+ + Red, Purple Line + +
Red, Purple Line
OBJECTID_1 2
ASSET_ID 21100002
LINES Red, Purple Line
DESCRIPTIO Lawrence to Wilson
TYPE Elevated or at Grade
LEGEND RD
ALT_LEGEND RDPR
BRANCH Red Line North Side
SHAPE.LEN 1800.132896
]]>
+ #LineStyle01 + + + 0 + clampedToGround + -87.65758750947528,41.96427269188822,0 -87.65802133507393,41.96581929055245,0 -87.65819033925305,41.96621846093642,0 -87.6583189819129,41.96650362897086,0 -87.65835858701473,41.96669002089185,0 -87.65838428411853,41.96688150295095,0 -87.65842208882658,41.96745896091846,0 -87.65846556843937,41.9683761425439,0 -87.65849296214573,41.96913893870342,0 + + +
+ + Red, Purple Line + +
Red, Purple Line
OBJECTID_1 3
ASSET_ID 21100003
LINES Red, Purple Line
DESCRIPTIO Wilson to Sheridan
TYPE Elevated or at Grade
LEGEND RD
ALT_LEGEND RDPR
BRANCH Red Line North Side
SHAPE.LEN 4256.243677
]]>
+ #LineStyle01 + + + 0 + clampedToGround + -87.65492939166126,41.95377494531437,0 -87.65557043199591,41.95376544118533,0 -87.65606302030132,41.95376391658746,0 -87.65623502146268,41.95377379126367,0 -87.65634748981634,41.95380103566435,0 -87.65646537904269,41.95387703994676,0 -87.65656532461145,41.95396622645799,0 -87.65664760856414,41.95404201996044,0 -87.65671750555913,41.95416647054043,0 -87.65673983607117,41.95429949810849,0 -87.65673866475777,41.95441024240925,0 -87.6567690255541,41.95490657227902,0 -87.65683672482363,41.95692259283837,0 -87.6568900886376,41.95861070983142,0 -87.65699865558875,41.96181418669004,0 -87.65756347177603,41.96397045777844,0 -87.65758750947528,41.96427269188822,0 + + +
+ + Red, Purple Line + +
Red, Purple Line
OBJECTID_1 4
ASSET_ID 21100004
LINES Red, Purple Line
DESCRIPTIO Sheridan to Addison
TYPE Elevated or at Grade
LEGEND RD
ALT_LEGEND RDPR
BRANCH Red Line North Side
SHAPE.LEN 2581.713736
]]>
+ #LineStyle01 + + + 0 + clampedToGround + -87.65362593118043,41.94742799535678,0 -87.65363554415794,41.94819886386848,0 -87.6536456393239,41.95059994675451,0 -87.65365831235026,41.95108288489359,0 -87.6536604873874,41.9519954657554,0 -87.65362592053201,41.95245597302328,0 -87.65367158496069,41.95311153649393,0 -87.65368468595476,41.9533202828916,0 -87.65369271253692,41.95343095587119,0 -87.65373335834569,41.95351536301472,0 -87.65378605844126,41.95358212680591,0 -87.65385067928185,41.95364452823767,0 -87.6539390793817,41.95370263886964,0 -87.6540786298351,41.95373403675265,0 -87.65430648647626,41.9537535411832,0 -87.65492939166126,41.95377494531437,0 + + +
+ + Red, Purple Line + +
Red, Purple Line
OBJECTID_1 5
ASSET_ID 21100005
LINES Red, Purple Line
DESCRIPTIO Addison to Clark Junction
TYPE Elevated or at Grade
LEGEND RD
ALT_LEGEND RDPR
BRANCH Red Line North Side
SHAPE.LEN 1918.716686
]]>
+ #LineStyle01 + + + 0 + clampedToGround + -87.65345391792157,41.94217681262115,0 -87.65342448305786,41.94237224420864,0 -87.65339745703922,41.94268217746244,0 -87.65337753982941,41.94288140770284,0 -87.65336256753105,41.94317369618263,0 -87.65338799707138,41.94357253961736,0 -87.65340240886648,41.94389158188269,0 -87.65341837392448,41.94406444407721,0 -87.65342275247338,41.94421065714904,0 -87.65347469646018,41.94434829382345,0 -87.65351486483024,41.94447699917548,0 -87.65353483605053,41.9453896864472,0 -87.65361975532807,41.94689193720703,0 -87.65362593118043,41.94742799535678,0 + + +
+
+ +
+
diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/flatten_doc.xsl b/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/flatten_doc.xsl new file mode 100644 index 0000000..a9d62d1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/flatten_doc.xsl @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/row_field_output.xsl b/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/row_field_output.xsl new file mode 100644 index 0000000..5a0f0e6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/data/xml/row_field_output.xsl @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__init__.py new file mode 100644 index 0000000..c434349 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__init__.py @@ -0,0 +1,35 @@ +import pytest + +from pandas.compat._optional import ( + get_version, + import_optional_dependency, +) + +from pandas.util.version import Version + +pytestmark = [ + pytest.mark.filterwarnings( + # Looks like tree.getiterator is deprecated in favor of tree.iter + "ignore:This method will be removed in future versions:" + "PendingDeprecationWarning" + ), + pytest.mark.filterwarnings( + "ignore:This method will be removed in future versions:DeprecationWarning" + ), + # GH 26552 + pytest.mark.filterwarnings( + "ignore:As the xlwt package is no longer maintained:FutureWarning" + ), + # GH 38571 + pytest.mark.filterwarnings( + "ignore:.*In xlrd >= 2.0, only the xls format is supported:FutureWarning" + ), +] + + +if import_optional_dependency("xlrd", errors="ignore") is None: + xlrd_version = None +else: + import xlrd + + xlrd_version = Version(get_version(xlrd)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ddf0c99 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..3db3967 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_odf.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_odf.cpython-38.pyc new file mode 100644 index 0000000..c607200 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_odf.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_odswriter.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_odswriter.cpython-38.pyc new file mode 100644 index 0000000..a47bc17 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_odswriter.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_openpyxl.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_openpyxl.cpython-38.pyc new file mode 100644 index 0000000..3f117f3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_openpyxl.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_readers.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_readers.cpython-38.pyc new file mode 100644 index 0000000..f89f8f7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_readers.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_style.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_style.cpython-38.pyc new file mode 100644 index 0000000..76a3729 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_style.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_writers.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_writers.cpython-38.pyc new file mode 100644 index 0000000..c583d4f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_writers.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_xlrd.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_xlrd.cpython-38.pyc new file mode 100644 index 0000000..54a156b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_xlrd.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_xlsxwriter.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_xlsxwriter.cpython-38.pyc new file mode 100644 index 0000000..47ebcd9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_xlsxwriter.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_xlwt.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_xlwt.cpython-38.pyc new file mode 100644 index 0000000..477163e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/__pycache__/test_xlwt.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/conftest.py new file mode 100644 index 0000000..4ce06c0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/conftest.py @@ -0,0 +1,67 @@ +import pytest + +from pandas.compat import is_platform_windows +import pandas.util._test_decorators as td + +import pandas._testing as tm + +from pandas.io.parsers import read_csv + + +@pytest.fixture +def frame(float_frame): + """ + Returns the first ten items in fixture "float_frame". + """ + return float_frame[:10] + + +@pytest.fixture +def tsframe(): + return tm.makeTimeDataFrame()[:5] + + +@pytest.fixture(params=[True, False]) +def merge_cells(request): + return request.param + + +@pytest.fixture +def df_ref(datapath): + """ + Obtain the reference data from read_csv with the Python engine. + """ + filepath = datapath("io", "data", "csv", "test1.csv") + df_ref = read_csv(filepath, index_col=0, parse_dates=True, engine="python") + return df_ref + + +@pytest.fixture(params=[".xls", ".xlsx", ".xlsm", ".ods", ".xlsb"]) +def read_ext(request): + """ + Valid extensions for reading Excel files. + """ + return request.param + + +# Checking for file leaks can hang on Windows CI +@pytest.fixture(autouse=not is_platform_windows()) +def check_for_file_leaks(): + """ + Fixture to run around every test to ensure that we are not leaking files. + + See also + -------- + _test_decorators.check_file_leaks + """ + # GH#30162 + psutil = td.safe_import("psutil") + if not psutil: + yield + + else: + proc = psutil.Process() + flist = proc.open_files() + yield + flist2 = proc.open_files() + assert flist == flist2 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_odf.py b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_odf.py new file mode 100644 index 0000000..ddc3c42 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_odf.py @@ -0,0 +1,38 @@ +import functools + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +pytest.importorskip("odf") + + +@pytest.fixture(autouse=True) +def cd_and_set_engine(monkeypatch, datapath): + func = functools.partial(pd.read_excel, engine="odf") + monkeypatch.setattr(pd, "read_excel", func) + monkeypatch.chdir(datapath("io", "data", "excel")) + + +def test_read_invalid_types_raises(): + # the invalid_value_type.ods required manually editing + # of the included content.xml file + with pytest.raises(ValueError, match="Unrecognized type awesome_new_type"): + pd.read_excel("invalid_value_type.ods") + + +def test_read_writer_table(): + # Also test reading tables from an text OpenDocument file + # (.odt) + index = pd.Index(["Row 1", "Row 2", "Row 3"], name="Header") + expected = pd.DataFrame( + [[1, np.nan, 7], [2, np.nan, 8], [3, np.nan, 9]], + index=index, + columns=["Column 1", "Unnamed: 2", "Column 3"], + ) + + result = pd.read_excel("writertable.odt", sheet_name="Table1", index_col=0) + + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_odswriter.py b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_odswriter.py new file mode 100644 index 0000000..0e6d1da --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_odswriter.py @@ -0,0 +1,58 @@ +import re + +import pytest + +import pandas._testing as tm + +from pandas.io.excel import ExcelWriter + +odf = pytest.importorskip("odf") + +pytestmark = pytest.mark.parametrize("ext", [".ods"]) + + +def test_write_append_mode_raises(ext): + msg = "Append mode is not supported with odf!" + + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=msg): + ExcelWriter(f, engine="odf", mode="a") + + +def test_kwargs(ext): + # GH 42286 + # GH 43445 + # test for error: OpenDocumentSpreadsheet does not accept any arguments + kwargs = {"kwarg": 1} + with tm.ensure_clean(ext) as f: + msg = re.escape("Use of **kwargs is deprecated") + error = re.escape( + "OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'" + ) + with pytest.raises( + TypeError, + match=error, + ): + with tm.assert_produces_warning(FutureWarning, match=msg): + with ExcelWriter(f, engine="odf", **kwargs) as _: + pass + + +@pytest.mark.parametrize("engine_kwargs", [None, {"kwarg": 1}]) +def test_engine_kwargs(ext, engine_kwargs): + # GH 42286 + # GH 43445 + # test for error: OpenDocumentSpreadsheet does not accept any arguments + with tm.ensure_clean(ext) as f: + if engine_kwargs is not None: + error = re.escape( + "OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'" + ) + with pytest.raises( + TypeError, + match=error, + ): + ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) + else: + with ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) as _: + pass diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_openpyxl.py b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_openpyxl.py new file mode 100644 index 0000000..e0d4a0c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_openpyxl.py @@ -0,0 +1,377 @@ +from pathlib import Path +import re + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.excel import ( + ExcelWriter, + _OpenpyxlWriter, +) + +openpyxl = pytest.importorskip("openpyxl") + +pytestmark = pytest.mark.parametrize("ext", [".xlsx"]) + + +def test_to_excel_styleconverter(ext): + from openpyxl import styles + + hstyle = { + "font": {"color": "00FF0000", "bold": True}, + "borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"}, + "alignment": {"horizontal": "center", "vertical": "top"}, + "fill": {"patternType": "solid", "fgColor": {"rgb": "006666FF", "tint": 0.3}}, + "number_format": {"format_code": "0.00"}, + "protection": {"locked": True, "hidden": False}, + } + + font_color = styles.Color("00FF0000") + font = styles.Font(bold=True, color=font_color) + side = styles.Side(style=styles.borders.BORDER_THIN) + border = styles.Border(top=side, right=side, bottom=side, left=side) + alignment = styles.Alignment(horizontal="center", vertical="top") + fill_color = styles.Color(rgb="006666FF", tint=0.3) + fill = styles.PatternFill(patternType="solid", fgColor=fill_color) + + number_format = "0.00" + + protection = styles.Protection(locked=True, hidden=False) + + kw = _OpenpyxlWriter._convert_to_style_kwargs(hstyle) + assert kw["font"] == font + assert kw["border"] == border + assert kw["alignment"] == alignment + assert kw["fill"] == fill + assert kw["number_format"] == number_format + assert kw["protection"] == protection + + +def test_write_cells_merge_styled(ext): + from pandas.io.formats.excel import ExcelCell + + sheet_name = "merge_styled" + + sty_b1 = {"font": {"color": "00FF0000"}} + sty_a2 = {"font": {"color": "0000FF00"}} + + initial_cells = [ + ExcelCell(col=1, row=0, val=42, style=sty_b1), + ExcelCell(col=0, row=1, val=99, style=sty_a2), + ] + + sty_merged = {"font": {"color": "000000FF", "bold": True}} + sty_kwargs = _OpenpyxlWriter._convert_to_style_kwargs(sty_merged) + openpyxl_sty_merged = sty_kwargs["font"] + merge_cells = [ + ExcelCell( + col=0, row=0, val="pandas", mergestart=1, mergeend=1, style=sty_merged + ) + ] + + with tm.ensure_clean(ext) as path: + with _OpenpyxlWriter(path) as writer: + writer.write_cells(initial_cells, sheet_name=sheet_name) + writer.write_cells(merge_cells, sheet_name=sheet_name) + + wks = writer.sheets[sheet_name] + xcell_b1 = wks["B1"] + xcell_a2 = wks["A2"] + assert xcell_b1.font == openpyxl_sty_merged + assert xcell_a2.font == openpyxl_sty_merged + + +@pytest.mark.parametrize("iso_dates", [True, False]) +def test_kwargs(ext, iso_dates): + # GH 42286 GH 43445 + kwargs = {"iso_dates": iso_dates} + with tm.ensure_clean(ext) as f: + msg = re.escape("Use of **kwargs is deprecated") + with tm.assert_produces_warning(FutureWarning, match=msg): + with ExcelWriter(f, engine="openpyxl", **kwargs) as writer: + assert writer.book.iso_dates == iso_dates + # ExcelWriter won't allow us to close without writing something + DataFrame().to_excel(writer) + + +@pytest.mark.parametrize("iso_dates", [True, False]) +def test_engine_kwargs_write(ext, iso_dates): + # GH 42286 GH 43445 + engine_kwargs = {"iso_dates": iso_dates} + with tm.ensure_clean(ext) as f: + with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer: + assert writer.book.iso_dates == iso_dates + # ExcelWriter won't allow us to close without writing something + DataFrame().to_excel(writer) + + +def test_engine_kwargs_append_invalid(ext): + # GH 43445 + # test whether an invalid engine kwargs actually raises + with tm.ensure_clean(ext) as f: + DataFrame(["hello", "world"]).to_excel(f) + with pytest.raises( + TypeError, + match=re.escape( + "load_workbook() got an unexpected keyword argument 'apple_banana'" + ), + ): + with ExcelWriter( + f, engine="openpyxl", mode="a", engine_kwargs={"apple_banana": "fruit"} + ) as writer: + # ExcelWriter needs us to write something to close properly + DataFrame(["good"]).to_excel(writer, sheet_name="Sheet2") + + +@pytest.mark.parametrize("data_only, expected", [(True, 0), (False, "=1+1")]) +def test_engine_kwargs_append_data_only(ext, data_only, expected): + # GH 43445 + # tests whether the data_only engine_kwarg actually works well for + # openpyxl's load_workbook + with tm.ensure_clean(ext) as f: + DataFrame(["=1+1"]).to_excel(f) + with ExcelWriter( + f, engine="openpyxl", mode="a", engine_kwargs={"data_only": data_only} + ) as writer: + assert writer.sheets["Sheet1"]["B2"].value == expected + # ExcelWriter needs us to writer something to close properly? + DataFrame().to_excel(writer, sheet_name="Sheet2") + + +@pytest.mark.parametrize( + "mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])] +) +def test_write_append_mode(ext, mode, expected): + df = DataFrame([1], columns=["baz"]) + + with tm.ensure_clean(ext) as f: + wb = openpyxl.Workbook() + wb.worksheets[0].title = "foo" + wb.worksheets[0]["A1"].value = "foo" + wb.create_sheet("bar") + wb.worksheets[1]["A1"].value = "bar" + wb.save(f) + + with ExcelWriter(f, engine="openpyxl", mode=mode) as writer: + df.to_excel(writer, sheet_name="baz", index=False) + + wb2 = openpyxl.load_workbook(f) + result = [sheet.title for sheet in wb2.worksheets] + assert result == expected + + for index, cell_value in enumerate(expected): + assert wb2.worksheets[index]["A1"].value == cell_value + + +@pytest.mark.parametrize( + "if_sheet_exists,num_sheets,expected", + [ + ("new", 2, ["apple", "banana"]), + ("replace", 1, ["pear"]), + ("overlay", 1, ["pear", "banana"]), + ], +) +def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected): + # GH 40230 + df1 = DataFrame({"fruit": ["apple", "banana"]}) + df2 = DataFrame({"fruit": ["pear"]}) + + with tm.ensure_clean(ext) as f: + df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False) + with ExcelWriter( + f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists + ) as writer: + df2.to_excel(writer, sheet_name="foo", index=False) + + wb = openpyxl.load_workbook(f) + assert len(wb.sheetnames) == num_sheets + assert wb.sheetnames[0] == "foo" + result = pd.read_excel(wb, "foo", engine="openpyxl") + assert list(result["fruit"]) == expected + if len(wb.sheetnames) == 2: + result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl") + tm.assert_frame_equal(result, df2) + wb.close() + + +@pytest.mark.parametrize( + "startrow, startcol, greeting, goodbye", + [ + (0, 0, ["poop", "world"], ["goodbye", "people"]), + (0, 1, ["hello", "world"], ["poop", "people"]), + (1, 0, ["hello", "poop"], ["goodbye", "people"]), + (1, 1, ["hello", "world"], ["goodbye", "poop"]), + ], +) +def test_append_overlay_startrow_startcol(ext, startrow, startcol, greeting, goodbye): + df1 = DataFrame({"greeting": ["hello", "world"], "goodbye": ["goodbye", "people"]}) + df2 = DataFrame(["poop"]) + + with tm.ensure_clean(ext) as f: + df1.to_excel(f, engine="openpyxl", sheet_name="poo", index=False) + with ExcelWriter( + f, engine="openpyxl", mode="a", if_sheet_exists="overlay" + ) as writer: + # use startrow+1 because we don't have a header + df2.to_excel( + writer, + index=False, + header=False, + startrow=startrow + 1, + startcol=startcol, + sheet_name="poo", + ) + + result = pd.read_excel(f, sheet_name="poo", engine="openpyxl") + expected = DataFrame({"greeting": greeting, "goodbye": goodbye}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "if_sheet_exists,msg", + [ + ( + "invalid", + "'invalid' is not valid for if_sheet_exists. Valid options " + "are 'error', 'new', 'replace' and 'overlay'.", + ), + ( + "error", + "Sheet 'foo' already exists and if_sheet_exists is set to 'error'.", + ), + ( + None, + "Sheet 'foo' already exists and if_sheet_exists is set to 'error'.", + ), + ], +) +def test_if_sheet_exists_raises(ext, if_sheet_exists, msg): + # GH 40230 + df = DataFrame({"fruit": ["pear"]}) + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=re.escape(msg)): + df.to_excel(f, "foo", engine="openpyxl") + with ExcelWriter( + f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists + ) as writer: + df.to_excel(writer, sheet_name="foo") + + +def test_to_excel_with_openpyxl_engine(ext): + # GH 29854 + with tm.ensure_clean(ext) as filename: + + df1 = DataFrame({"A": np.linspace(1, 10, 10)}) + df2 = DataFrame({"B": np.linspace(1, 20, 10)}) + df = pd.concat([df1, df2], axis=1) + styled = df.style.applymap( + lambda val: "color: %s" % ("red" if val < 0 else "black") + ).highlight_max() + + styled.to_excel(filename, engine="openpyxl") + + +@pytest.mark.parametrize("read_only", [True, False]) +def test_read_workbook(datapath, ext, read_only): + # GH 39528 + filename = datapath("io", "data", "excel", "test1" + ext) + wb = openpyxl.load_workbook(filename, read_only=read_only) + result = pd.read_excel(wb, engine="openpyxl") + wb.close() + expected = pd.read_excel(filename) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "header, expected_data", + [ + ( + 0, + { + "Title": [np.nan, "A", 1, 2, 3], + "Unnamed: 1": [np.nan, "B", 4, 5, 6], + "Unnamed: 2": [np.nan, "C", 7, 8, 9], + }, + ), + (2, {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}), + ], +) +@pytest.mark.parametrize( + "filename", ["dimension_missing", "dimension_small", "dimension_large"] +) +# When read_only is None, use read_excel instead of a workbook +@pytest.mark.parametrize("read_only", [True, False, None]) +def test_read_with_bad_dimension( + datapath, ext, header, expected_data, filename, read_only, request +): + # GH 38956, 39001 - no/incorrect dimension information + path = datapath("io", "data", "excel", f"{filename}{ext}") + if read_only is None: + result = pd.read_excel(path, header=header) + else: + wb = openpyxl.load_workbook(path, read_only=read_only) + result = pd.read_excel(wb, engine="openpyxl", header=header) + wb.close() + expected = DataFrame(expected_data) + tm.assert_frame_equal(result, expected) + + +def test_append_mode_file(ext): + # GH 39576 + df = DataFrame() + + with tm.ensure_clean(ext) as f: + df.to_excel(f, engine="openpyxl") + + with ExcelWriter( + f, mode="a", engine="openpyxl", if_sheet_exists="new" + ) as writer: + df.to_excel(writer) + + # make sure that zip files are not concatenated by making sure that + # "docProps/app.xml" only occurs twice in the file + data = Path(f).read_bytes() + first = data.find(b"docProps/app.xml") + second = data.find(b"docProps/app.xml", first + 1) + third = data.find(b"docProps/app.xml", second + 1) + assert second != -1 and third == -1 + + +# When read_only is None, use read_excel instead of a workbook +@pytest.mark.parametrize("read_only", [True, False, None]) +def test_read_with_empty_trailing_rows(datapath, ext, read_only, request): + # GH 39181 + path = datapath("io", "data", "excel", f"empty_trailing_rows{ext}") + if read_only is None: + result = pd.read_excel(path) + else: + wb = openpyxl.load_workbook(path, read_only=read_only) + result = pd.read_excel(wb, engine="openpyxl") + wb.close() + expected = DataFrame( + { + "Title": [np.nan, "A", 1, 2, 3], + "Unnamed: 1": [np.nan, "B", 4, 5, 6], + "Unnamed: 2": [np.nan, "C", 7, 8, 9], + } + ) + tm.assert_frame_equal(result, expected) + + +# When read_only is None, use read_excel instead of a workbook +@pytest.mark.parametrize("read_only", [True, False, None]) +def test_read_empty_with_blank_row(datapath, ext, read_only): + # GH 39547 - empty excel file with a row that has no data + path = datapath("io", "data", "excel", f"empty_with_blank_row{ext}") + if read_only is None: + result = pd.read_excel(path) + else: + wb = openpyxl.load_workbook(path, read_only=read_only) + result = pd.read_excel(wb, engine="openpyxl") + wb.close() + expected = DataFrame() + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_readers.py b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_readers.py new file mode 100644 index 0000000..6d533d6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_readers.py @@ -0,0 +1,1578 @@ +from datetime import ( + datetime, + time, +) +from functools import partial +import os +from pathlib import Path +from urllib.error import URLError +from zipfile import BadZipFile + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm +from pandas.tests.io.excel import xlrd_version +from pandas.util.version import Version + +read_ext_params = [".xls", ".xlsx", ".xlsm", ".xlsb", ".ods"] +engine_params = [ + # Add any engines to test here + # When defusedxml is installed it triggers deprecation warnings for + # xlrd and openpyxl, so catch those here + pytest.param( + "xlrd", + marks=[ + td.skip_if_no("xlrd"), + ], + ), + pytest.param( + "openpyxl", + marks=[ + td.skip_if_no("openpyxl"), + pytest.mark.filterwarnings("ignore:.*html argument"), + ], + ), + pytest.param( + None, + marks=[ + td.skip_if_no("xlrd"), + ], + ), + pytest.param("pyxlsb", marks=td.skip_if_no("pyxlsb")), + pytest.param("odf", marks=td.skip_if_no("odf")), +] + + +def _is_valid_engine_ext_pair(engine, read_ext: str) -> bool: + """ + Filter out invalid (engine, ext) pairs instead of skipping, as that + produces 500+ pytest.skips. + """ + engine = engine.values[0] + if engine == "openpyxl" and read_ext == ".xls": + return False + if engine == "odf" and read_ext != ".ods": + return False + if read_ext == ".ods" and engine != "odf": + return False + if engine == "pyxlsb" and read_ext != ".xlsb": + return False + if read_ext == ".xlsb" and engine != "pyxlsb": + return False + if ( + engine == "xlrd" + and xlrd_version is not None + and xlrd_version >= Version("2") + and read_ext != ".xls" + ): + return False + return True + + +def _transfer_marks(engine, read_ext): + """ + engine gives us a pytest.param object with some marks, read_ext is just + a string. We need to generate a new pytest.param inheriting the marks. + """ + values = engine.values + (read_ext,) + new_param = pytest.param(values, marks=engine.marks) + return new_param + + +@pytest.fixture( + params=[ + _transfer_marks(eng, ext) + for eng in engine_params + for ext in read_ext_params + if _is_valid_engine_ext_pair(eng, ext) + ], +) +def engine_and_read_ext(request): + """ + Fixture for Excel reader engine and read_ext, only including valid pairs. + """ + return request.param + + +@pytest.fixture +def engine(engine_and_read_ext): + engine, read_ext = engine_and_read_ext + return engine + + +@pytest.fixture +def read_ext(engine_and_read_ext): + engine, read_ext = engine_and_read_ext + return read_ext + + +class TestReaders: + @pytest.fixture(autouse=True) + def cd_and_set_engine(self, engine, datapath, monkeypatch): + """ + Change directory and set engine for read_excel calls. + """ + func = partial(pd.read_excel, engine=engine) + monkeypatch.chdir(datapath("io", "data", "excel")) + monkeypatch.setattr(pd, "read_excel", func) + + def test_engine_used(self, read_ext, engine, monkeypatch): + # GH 38884 + def parser(self, *args, **kwargs): + return self.engine + + monkeypatch.setattr(pd.ExcelFile, "parse", parser) + + expected_defaults = { + "xlsx": "openpyxl", + "xlsm": "openpyxl", + "xlsb": "pyxlsb", + "xls": "xlrd", + "ods": "odf", + } + + with open("test1" + read_ext, "rb") as f: + result = pd.read_excel(f) + + if engine is not None: + expected = engine + else: + expected = expected_defaults[read_ext[1:]] + assert result == expected + + def test_usecols_int(self, read_ext): + # usecols as int + msg = "Passing an integer for `usecols`" + with pytest.raises(ValueError, match=msg): + pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=3 + ) + + # usecols as int + with pytest.raises(ValueError, match=msg): + pd.read_excel( + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols=3, + ) + + def test_usecols_list(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + df_ref = df_ref.reindex(columns=["B", "C"]) + df1 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=[0, 2, 3] + ) + df2 = pd.read_excel( + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols=[0, 2, 3], + ) + + # TODO add index to xls file) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + def test_usecols_str(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + df1 = df_ref.reindex(columns=["A", "B", "C"]) + df2 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A:D" + ) + df3 = pd.read_excel( + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols="A:D", + ) + + # TODO add index to xls, read xls ignores index name ? + tm.assert_frame_equal(df2, df1, check_names=False) + tm.assert_frame_equal(df3, df1, check_names=False) + + df1 = df_ref.reindex(columns=["B", "C"]) + df2 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A,C,D" + ) + df3 = pd.read_excel( + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols="A,C,D", + ) + # TODO add index to xls file + tm.assert_frame_equal(df2, df1, check_names=False) + tm.assert_frame_equal(df3, df1, check_names=False) + + df1 = df_ref.reindex(columns=["B", "C"]) + df2 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A,C:D" + ) + df3 = pd.read_excel( + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols="A,C:D", + ) + tm.assert_frame_equal(df2, df1, check_names=False) + tm.assert_frame_equal(df3, df1, check_names=False) + + @pytest.mark.parametrize( + "usecols", [[0, 1, 3], [0, 3, 1], [1, 0, 3], [1, 3, 0], [3, 0, 1], [3, 1, 0]] + ) + def test_usecols_diff_positional_int_columns_order( + self, request, read_ext, usecols, df_ref + ): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + expected = df_ref[["A", "C"]] + result = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=usecols + ) + tm.assert_frame_equal(result, expected, check_names=False) + + @pytest.mark.parametrize("usecols", [["B", "D"], ["D", "B"]]) + def test_usecols_diff_positional_str_columns_order(self, read_ext, usecols, df_ref): + expected = df_ref[["B", "D"]] + expected.index = range(len(expected)) + + result = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", usecols=usecols) + tm.assert_frame_equal(result, expected, check_names=False) + + def test_read_excel_without_slicing(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + expected = df_ref + result = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", index_col=0) + tm.assert_frame_equal(result, expected, check_names=False) + + def test_usecols_excel_range_str(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + expected = df_ref[["C", "D"]] + result = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A,D:E" + ) + tm.assert_frame_equal(result, expected, check_names=False) + + def test_usecols_excel_range_str_invalid(self, read_ext): + msg = "Invalid column name: E1" + + with pytest.raises(ValueError, match=msg): + pd.read_excel("test1" + read_ext, sheet_name="Sheet1", usecols="D:E1") + + def test_index_col_label_error(self, read_ext): + msg = "list indices must be integers.*, not str" + + with pytest.raises(TypeError, match=msg): + pd.read_excel( + "test1" + read_ext, + sheet_name="Sheet1", + index_col=["A"], + usecols=["A", "C"], + ) + + def test_index_col_empty(self, read_ext): + # see gh-9208 + result = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet3", index_col=["A", "B", "C"] + ) + expected = DataFrame( + columns=["D", "E", "F"], + index=MultiIndex(levels=[[]] * 3, codes=[[]] * 3, names=["A", "B", "C"]), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("index_col", [None, 2]) + def test_index_col_with_unnamed(self, read_ext, index_col): + # see gh-18792 + result = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet4", index_col=index_col + ) + expected = DataFrame( + [["i1", "a", "x"], ["i2", "b", "y"]], columns=["Unnamed: 0", "col1", "col2"] + ) + if index_col: + expected = expected.set_index(expected.columns[index_col]) + + tm.assert_frame_equal(result, expected) + + def test_usecols_pass_non_existent_column(self, read_ext): + msg = ( + "Usecols do not match columns, " + "columns expected but not found: " + r"\['E'\]" + ) + + with pytest.raises(ValueError, match=msg): + pd.read_excel("test1" + read_ext, usecols=["E"]) + + def test_usecols_wrong_type(self, read_ext): + msg = ( + "'usecols' must either be list-like of " + "all strings, all unicode, all integers or a callable." + ) + + with pytest.raises(ValueError, match=msg): + pd.read_excel("test1" + read_ext, usecols=["E1", 0]) + + def test_excel_stop_iterator(self, read_ext): + + parsed = pd.read_excel("test2" + read_ext, sheet_name="Sheet1") + expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"]) + tm.assert_frame_equal(parsed, expected) + + def test_excel_cell_error_na(self, request, read_ext): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + parsed = pd.read_excel("test3" + read_ext, sheet_name="Sheet1") + expected = DataFrame([[np.nan]], columns=["Test"]) + tm.assert_frame_equal(parsed, expected) + + def test_excel_table(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + df1 = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", index_col=0) + df2 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet2", skiprows=[1], index_col=0 + ) + # TODO add index to file + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + df3 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, skipfooter=1 + ) + tm.assert_frame_equal(df3, df1.iloc[:-1]) + + def test_reader_special_dtypes(self, request, read_ext): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + expected = DataFrame.from_dict( + { + "IntCol": [1, 2, -3, 4, 0], + "FloatCol": [1.25, 2.25, 1.83, 1.92, 0.0000000005], + "BoolCol": [True, False, True, True, False], + "StrCol": [1, 2, 3, 4, 5], + # GH5394 - this is why convert_float isn't vectorized + "Str2Col": ["a", 3, "c", "d", "e"], + "DateCol": [ + datetime(2013, 10, 30), + datetime(2013, 10, 31), + datetime(1905, 1, 1), + datetime(2013, 12, 14), + datetime(2015, 3, 14), + ], + }, + ) + basename = "test_types" + + # should read in correctly and infer types + actual = pd.read_excel(basename + read_ext, sheet_name="Sheet1") + tm.assert_frame_equal(actual, expected) + + # if not coercing number, then int comes in as float + float_expected = expected.copy() + float_expected["IntCol"] = float_expected["IntCol"].astype(float) + float_expected.loc[float_expected.index[1], "Str2Col"] = 3.0 + with tm.assert_produces_warning( + FutureWarning, + match="convert_float is deprecated", + raise_on_extra_warnings=False, + ): + # raise_on_extra_warnings because xlrd raises a PendingDeprecationWarning + # on database job Linux_py37_IO (ci/deps/actions-37-db.yaml) + # See GH#41176 + actual = pd.read_excel( + basename + read_ext, sheet_name="Sheet1", convert_float=False + ) + tm.assert_frame_equal(actual, float_expected) + + # check setting Index (assuming xls and xlsx are the same here) + for icol, name in enumerate(expected.columns): + actual = pd.read_excel( + basename + read_ext, sheet_name="Sheet1", index_col=icol + ) + exp = expected.set_index(name) + tm.assert_frame_equal(actual, exp) + + # convert_float and converters should be different but both accepted + expected["StrCol"] = expected["StrCol"].apply(str) + actual = pd.read_excel( + basename + read_ext, sheet_name="Sheet1", converters={"StrCol": str} + ) + tm.assert_frame_equal(actual, expected) + + no_convert_float = float_expected.copy() + no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str) + with tm.assert_produces_warning( + FutureWarning, + match="convert_float is deprecated", + raise_on_extra_warnings=False, + ): + # raise_on_extra_warnings because xlrd raises a PendingDeprecationWarning + # on database job Linux_py37_IO (ci/deps/actions-37-db.yaml) + # See GH#41176 + actual = pd.read_excel( + basename + read_ext, + sheet_name="Sheet1", + convert_float=False, + converters={"StrCol": str}, + ) + tm.assert_frame_equal(actual, no_convert_float) + + # GH8212 - support for converters and missing values + def test_reader_converters(self, read_ext): + + basename = "test_converters" + + expected = DataFrame.from_dict( + { + "IntCol": [1, 2, -3, -1000, 0], + "FloatCol": [12.5, np.nan, 18.3, 19.2, 0.000000005], + "BoolCol": ["Found", "Found", "Found", "Not found", "Found"], + "StrCol": ["1", np.nan, "3", "4", "5"], + } + ) + + converters = { + "IntCol": lambda x: int(x) if x != "" else -1000, + "FloatCol": lambda x: 10 * x if x else np.nan, + 2: lambda x: "Found" if x != "" else "Not found", + 3: lambda x: str(x) if x else "", + } + + # should read in correctly and set types of single cells (not array + # dtypes) + actual = pd.read_excel( + basename + read_ext, sheet_name="Sheet1", converters=converters + ) + tm.assert_frame_equal(actual, expected) + + def test_reader_dtype(self, read_ext): + # GH 8212 + basename = "testdtype" + actual = pd.read_excel(basename + read_ext) + + expected = DataFrame( + { + "a": [1, 2, 3, 4], + "b": [2.5, 3.5, 4.5, 5.5], + "c": [1, 2, 3, 4], + "d": [1.0, 2.0, np.nan, 4.0], + } + ).reindex(columns=["a", "b", "c", "d"]) + + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel( + basename + read_ext, dtype={"a": "float64", "b": "float32", "c": str} + ) + + expected["a"] = expected["a"].astype("float64") + expected["b"] = expected["b"].astype("float32") + expected["c"] = ["001", "002", "003", "004"] + tm.assert_frame_equal(actual, expected) + + msg = "Unable to convert column d to type int64" + with pytest.raises(ValueError, match=msg): + pd.read_excel(basename + read_ext, dtype={"d": "int64"}) + + @pytest.mark.parametrize( + "dtype,expected", + [ + ( + None, + DataFrame( + { + "a": [1, 2, 3, 4], + "b": [2.5, 3.5, 4.5, 5.5], + "c": [1, 2, 3, 4], + "d": [1.0, 2.0, np.nan, 4.0], + } + ), + ), + ( + {"a": "float64", "b": "float32", "c": str, "d": str}, + DataFrame( + { + "a": Series([1, 2, 3, 4], dtype="float64"), + "b": Series([2.5, 3.5, 4.5, 5.5], dtype="float32"), + "c": ["001", "002", "003", "004"], + "d": ["1", "2", np.nan, "4"], + } + ), + ), + ], + ) + def test_reader_dtype_str(self, read_ext, dtype, expected): + # see gh-20377 + basename = "testdtype" + + actual = pd.read_excel(basename + read_ext, dtype=dtype) + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize("dtypes, exp_value", [({}, "1"), ({"a.1": "int64"}, 1)]) + def test_dtype_mangle_dup_cols(self, read_ext, dtypes, exp_value): + # GH#35211 + basename = "df_mangle_dup_col_dtypes" + dtype_dict = {"a": str, **dtypes} + dtype_dict_copy = dtype_dict.copy() + # GH#42462 + result = pd.read_excel(basename + read_ext, dtype=dtype_dict) + expected = DataFrame({"a": ["1"], "a.1": [exp_value]}) + assert dtype_dict == dtype_dict_copy, "dtype dict changed" + tm.assert_frame_equal(result, expected) + + def test_reader_spaces(self, read_ext): + # see gh-32207 + basename = "test_spaces" + + actual = pd.read_excel(basename + read_ext) + expected = DataFrame( + { + "testcol": [ + "this is great", + "4 spaces", + "1 trailing ", + " 1 leading", + "2 spaces multiple times", + ] + } + ) + tm.assert_frame_equal(actual, expected) + + # gh-36122, gh-35802 + @pytest.mark.parametrize( + "basename,expected", + [ + ("gh-35802", DataFrame({"COLUMN": ["Test (1)"]})), + ("gh-36122", DataFrame(columns=["got 2nd sa"])), + ], + ) + def test_read_excel_ods_nested_xml(self, engine, read_ext, basename, expected): + # see gh-35802 + if engine != "odf": + pytest.skip(f"Skipped for engine: {engine}") + + actual = pd.read_excel(basename + read_ext) + tm.assert_frame_equal(actual, expected) + + def test_reading_all_sheets(self, read_ext): + # Test reading all sheet names by setting sheet_name to None, + # Ensure a dict is returned. + # See PR #9450 + basename = "test_multisheet" + dfs = pd.read_excel(basename + read_ext, sheet_name=None) + # ensure this is not alphabetical to test order preservation + expected_keys = ["Charlie", "Alpha", "Beta"] + tm.assert_contains_all(expected_keys, dfs.keys()) + # Issue 9930 + # Ensure sheet order is preserved + assert expected_keys == list(dfs.keys()) + + def test_reading_multiple_specific_sheets(self, read_ext): + # Test reading specific sheet names by specifying a mixed list + # of integers and strings, and confirm that duplicated sheet + # references (positions/names) are removed properly. + # Ensure a dict is returned + # See PR #9450 + basename = "test_multisheet" + # Explicitly request duplicates. Only the set should be returned. + expected_keys = [2, "Charlie", "Charlie"] + dfs = pd.read_excel(basename + read_ext, sheet_name=expected_keys) + expected_keys = list(set(expected_keys)) + tm.assert_contains_all(expected_keys, dfs.keys()) + assert len(expected_keys) == len(dfs.keys()) + + def test_reading_all_sheets_with_blank(self, read_ext): + # Test reading all sheet names by setting sheet_name to None, + # In the case where some sheets are blank. + # Issue #11711 + basename = "blank_with_header" + dfs = pd.read_excel(basename + read_ext, sheet_name=None) + expected_keys = ["Sheet1", "Sheet2", "Sheet3"] + tm.assert_contains_all(expected_keys, dfs.keys()) + + # GH6403 + def test_read_excel_blank(self, read_ext): + actual = pd.read_excel("blank" + read_ext, sheet_name="Sheet1") + tm.assert_frame_equal(actual, DataFrame()) + + def test_read_excel_blank_with_header(self, read_ext): + expected = DataFrame(columns=["col_1", "col_2"]) + actual = pd.read_excel("blank_with_header" + read_ext, sheet_name="Sheet1") + tm.assert_frame_equal(actual, expected) + + def test_date_conversion_overflow(self, request, engine, read_ext): + # GH 10001 : pandas.ExcelFile ignore parse_dates=False + if engine == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + expected = DataFrame( + [ + [pd.Timestamp("2016-03-12"), "Marc Johnson"], + [pd.Timestamp("2016-03-16"), "Jack Black"], + [1e20, "Timothy Brown"], + ], + columns=["DateColWithBigInt", "StringCol"], + ) + + if engine == "openpyxl": + request.node.add_marker( + pytest.mark.xfail(reason="Maybe not supported by openpyxl") + ) + + if engine is None and read_ext in (".xlsx", ".xlsm"): + # GH 35029 + request.node.add_marker( + pytest.mark.xfail(reason="Defaults to openpyxl, maybe not supported") + ) + + result = pd.read_excel("testdateoverflow" + read_ext) + tm.assert_frame_equal(result, expected) + + def test_sheet_name(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + filename = "test1" + sheet_name = "Sheet1" + + df1 = pd.read_excel( + filename + read_ext, sheet_name=sheet_name, index_col=0 + ) # doc + df2 = pd.read_excel(filename + read_ext, index_col=0, sheet_name=sheet_name) + + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + def test_excel_read_buffer(self, read_ext): + + pth = "test1" + read_ext + expected = pd.read_excel(pth, sheet_name="Sheet1", index_col=0) + with open(pth, "rb") as f: + actual = pd.read_excel(f, sheet_name="Sheet1", index_col=0) + tm.assert_frame_equal(expected, actual) + + def test_bad_engine_raises(self, read_ext): + bad_engine = "foo" + with pytest.raises(ValueError, match="Unknown engine: foo"): + pd.read_excel("", engine=bad_engine) + + @pytest.mark.parametrize( + "sheet_name", + [3, [0, 3], [3, 0], "Sheet4", ["Sheet1", "Sheet4"], ["Sheet4", "Sheet1"]], + ) + def test_bad_sheetname_raises(self, read_ext, sheet_name): + # GH 39250 + msg = "Worksheet index 3 is invalid|Worksheet named 'Sheet4' not found" + with pytest.raises(ValueError, match=msg): + pd.read_excel("blank" + read_ext, sheet_name=sheet_name) + + def test_missing_file_raises(self, read_ext): + bad_file = f"foo{read_ext}" + # CI tests with other languages, translates to "No such file or directory" + match = r"(No such file or directory|没有那个文件或目录|File o directory non esistente)" + with pytest.raises(FileNotFoundError, match=match): + pd.read_excel(bad_file) + + def test_corrupt_bytes_raises(self, read_ext, engine): + bad_stream = b"foo" + if engine is None: + error = ValueError + msg = ( + "Excel file format cannot be determined, you must " + "specify an engine manually." + ) + elif engine == "xlrd": + from xlrd import XLRDError + + error = XLRDError + msg = ( + "Unsupported format, or corrupt file: Expected BOF " + "record; found b'foo'" + ) + else: + error = BadZipFile + msg = "File is not a zip file" + with pytest.raises(error, match=msg): + pd.read_excel(bad_stream) + + @pytest.mark.network + @tm.network + def test_read_from_http_url(self, read_ext): + url = ( + "https://raw.githubusercontent.com/pandas-dev/pandas/main/" + "pandas/tests/io/data/excel/test1" + read_ext + ) + url_table = pd.read_excel(url) + local_table = pd.read_excel("test1" + read_ext) + tm.assert_frame_equal(url_table, local_table) + + @td.skip_if_not_us_locale + def test_read_from_s3_url(self, read_ext, s3_resource, s3so): + # Bucket "pandas-test" created in tests/io/conftest.py + with open("test1" + read_ext, "rb") as f: + s3_resource.Bucket("pandas-test").put_object(Key="test1" + read_ext, Body=f) + + url = "s3://pandas-test/test1" + read_ext + + url_table = pd.read_excel(url, storage_options=s3so) + local_table = pd.read_excel("test1" + read_ext) + tm.assert_frame_equal(url_table, local_table) + + def test_read_from_s3_object(self, read_ext, s3_resource, s3so): + # GH 38788 + # Bucket "pandas-test" created in tests/io/conftest.py + with open("test1" + read_ext, "rb") as f: + s3_resource.Bucket("pandas-test").put_object(Key="test1" + read_ext, Body=f) + + import s3fs + + s3 = s3fs.S3FileSystem(**s3so) + + with s3.open("s3://pandas-test/test1" + read_ext) as f: + url_table = pd.read_excel(f) + + local_table = pd.read_excel("test1" + read_ext) + tm.assert_frame_equal(url_table, local_table) + + @pytest.mark.slow + def test_read_from_file_url(self, read_ext, datapath): + + # FILE + localtable = os.path.join(datapath("io", "data", "excel"), "test1" + read_ext) + local_table = pd.read_excel(localtable) + + try: + url_table = pd.read_excel("file://localhost/" + localtable) + except URLError: + # fails on some systems + import platform + + platform_info = " ".join(platform.uname()).strip() + pytest.skip(f"failing on {platform_info}") + + tm.assert_frame_equal(url_table, local_table) + + def test_read_from_pathlib_path(self, read_ext): + + # GH12655 + from pathlib import Path + + str_path = "test1" + read_ext + expected = pd.read_excel(str_path, sheet_name="Sheet1", index_col=0) + + path_obj = Path("test1" + read_ext) + actual = pd.read_excel(path_obj, sheet_name="Sheet1", index_col=0) + + tm.assert_frame_equal(expected, actual) + + @td.skip_if_no("py.path") + @td.check_file_leaks + def test_read_from_py_localpath(self, read_ext): + + # GH12655 + from py.path import local as LocalPath + + str_path = os.path.join("test1" + read_ext) + expected = pd.read_excel(str_path, sheet_name="Sheet1", index_col=0) + + path_obj = LocalPath().join("test1" + read_ext) + actual = pd.read_excel(path_obj, sheet_name="Sheet1", index_col=0) + + tm.assert_frame_equal(expected, actual) + + @td.check_file_leaks + def test_close_from_py_localpath(self, read_ext): + + # GH31467 + str_path = os.path.join("test1" + read_ext) + with open(str_path, "rb") as f: + x = pd.read_excel(f, sheet_name="Sheet1", index_col=0) + del x + # should not throw an exception because the passed file was closed + f.read() + + def test_reader_seconds(self, request, engine, read_ext): + if engine == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + # Test reading times with and without milliseconds. GH5945. + expected = DataFrame.from_dict( + { + "Time": [ + time(1, 2, 3), + time(2, 45, 56, 100000), + time(4, 29, 49, 200000), + time(6, 13, 42, 300000), + time(7, 57, 35, 400000), + time(9, 41, 28, 500000), + time(11, 25, 21, 600000), + time(13, 9, 14, 700000), + time(14, 53, 7, 800000), + time(16, 37, 0, 900000), + time(18, 20, 54), + ] + } + ) + + actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1") + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1") + tm.assert_frame_equal(actual, expected) + + def test_read_excel_multiindex(self, request, read_ext): + # see gh-4679 + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]]) + mi_file = "testmultiindex" + read_ext + + # "mi_column" sheet + expected = DataFrame( + [ + [1, 2.5, pd.Timestamp("2015-01-01"), True], + [2, 3.5, pd.Timestamp("2015-01-02"), False], + [3, 4.5, pd.Timestamp("2015-01-03"), False], + [4, 5.5, pd.Timestamp("2015-01-04"), True], + ], + columns=mi, + ) + + actual = pd.read_excel( + mi_file, sheet_name="mi_column", header=[0, 1], index_col=0 + ) + tm.assert_frame_equal(actual, expected) + + # "mi_index" sheet + expected.index = mi + expected.columns = ["a", "b", "c", "d"] + + actual = pd.read_excel(mi_file, sheet_name="mi_index", index_col=[0, 1]) + tm.assert_frame_equal(actual, expected, check_names=False) + + # "both" sheet + expected.columns = mi + + actual = pd.read_excel( + mi_file, sheet_name="both", index_col=[0, 1], header=[0, 1] + ) + tm.assert_frame_equal(actual, expected, check_names=False) + + # "mi_index_name" sheet + expected.columns = ["a", "b", "c", "d"] + expected.index = mi.set_names(["ilvl1", "ilvl2"]) + + actual = pd.read_excel(mi_file, sheet_name="mi_index_name", index_col=[0, 1]) + tm.assert_frame_equal(actual, expected) + + # "mi_column_name" sheet + expected.index = list(range(4)) + expected.columns = mi.set_names(["c1", "c2"]) + actual = pd.read_excel( + mi_file, sheet_name="mi_column_name", header=[0, 1], index_col=0 + ) + tm.assert_frame_equal(actual, expected) + + # see gh-11317 + # "name_with_int" sheet + expected.columns = mi.set_levels([1, 2], level=1).set_names(["c1", "c2"]) + + actual = pd.read_excel( + mi_file, sheet_name="name_with_int", index_col=0, header=[0, 1] + ) + tm.assert_frame_equal(actual, expected) + + # "both_name" sheet + expected.columns = mi.set_names(["c1", "c2"]) + expected.index = mi.set_names(["ilvl1", "ilvl2"]) + + actual = pd.read_excel( + mi_file, sheet_name="both_name", index_col=[0, 1], header=[0, 1] + ) + tm.assert_frame_equal(actual, expected) + + # "both_skiprows" sheet + actual = pd.read_excel( + mi_file, + sheet_name="both_name_skiprows", + index_col=[0, 1], + header=[0, 1], + skiprows=2, + ) + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize( + "sheet_name,idx_lvl2", + [ + ("both_name_blank_after_mi_name", [np.nan, "b", "a", "b"]), + ("both_name_multiple_blanks", [np.nan] * 4), + ], + ) + def test_read_excel_multiindex_blank_after_name( + self, request, read_ext, sheet_name, idx_lvl2 + ): + # GH34673 + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb (GH4679" + ) + ) + + mi_file = "testmultiindex" + read_ext + mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]], names=["c1", "c2"]) + expected = DataFrame( + [ + [1, 2.5, pd.Timestamp("2015-01-01"), True], + [2, 3.5, pd.Timestamp("2015-01-02"), False], + [3, 4.5, pd.Timestamp("2015-01-03"), False], + [4, 5.5, pd.Timestamp("2015-01-04"), True], + ], + columns=mi, + index=MultiIndex.from_arrays( + (["foo", "foo", "bar", "bar"], idx_lvl2), + names=["ilvl1", "ilvl2"], + ), + ) + result = pd.read_excel( + mi_file, + sheet_name=sheet_name, + index_col=[0, 1], + header=[0, 1], + ) + tm.assert_frame_equal(result, expected) + + def test_read_excel_multiindex_header_only(self, read_ext): + # see gh-11733. + # + # Don't try to parse a header name if there isn't one. + mi_file = "testmultiindex" + read_ext + result = pd.read_excel(mi_file, sheet_name="index_col_none", header=[0, 1]) + + exp_columns = MultiIndex.from_product([("A", "B"), ("key", "val")]) + expected = DataFrame([[1, 2, 3, 4]] * 2, columns=exp_columns) + tm.assert_frame_equal(result, expected) + + def test_excel_old_index_format(self, read_ext): + # see gh-4679 + filename = "test_index_name_pre17" + read_ext + + # We detect headers to determine if index names exist, so + # that "index" name in the "names" version of the data will + # now be interpreted as rows that include null data. + data = np.array( + [ + [None, None, None, None, None], + ["R0C0", "R0C1", "R0C2", "R0C3", "R0C4"], + ["R1C0", "R1C1", "R1C2", "R1C3", "R1C4"], + ["R2C0", "R2C1", "R2C2", "R2C3", "R2C4"], + ["R3C0", "R3C1", "R3C2", "R3C3", "R3C4"], + ["R4C0", "R4C1", "R4C2", "R4C3", "R4C4"], + ] + ) + columns = ["C_l0_g0", "C_l0_g1", "C_l0_g2", "C_l0_g3", "C_l0_g4"] + mi = MultiIndex( + levels=[ + ["R0", "R_l0_g0", "R_l0_g1", "R_l0_g2", "R_l0_g3", "R_l0_g4"], + ["R1", "R_l1_g0", "R_l1_g1", "R_l1_g2", "R_l1_g3", "R_l1_g4"], + ], + codes=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], + names=[None, None], + ) + si = Index( + ["R0", "R_l0_g0", "R_l0_g1", "R_l0_g2", "R_l0_g3", "R_l0_g4"], name=None + ) + + expected = DataFrame(data, index=si, columns=columns) + + actual = pd.read_excel(filename, sheet_name="single_names", index_col=0) + tm.assert_frame_equal(actual, expected) + + expected.index = mi + + actual = pd.read_excel(filename, sheet_name="multi_names", index_col=[0, 1]) + tm.assert_frame_equal(actual, expected) + + # The analogous versions of the "names" version data + # where there are explicitly no names for the indices. + data = np.array( + [ + ["R0C0", "R0C1", "R0C2", "R0C3", "R0C4"], + ["R1C0", "R1C1", "R1C2", "R1C3", "R1C4"], + ["R2C0", "R2C1", "R2C2", "R2C3", "R2C4"], + ["R3C0", "R3C1", "R3C2", "R3C3", "R3C4"], + ["R4C0", "R4C1", "R4C2", "R4C3", "R4C4"], + ] + ) + columns = ["C_l0_g0", "C_l0_g1", "C_l0_g2", "C_l0_g3", "C_l0_g4"] + mi = MultiIndex( + levels=[ + ["R_l0_g0", "R_l0_g1", "R_l0_g2", "R_l0_g3", "R_l0_g4"], + ["R_l1_g0", "R_l1_g1", "R_l1_g2", "R_l1_g3", "R_l1_g4"], + ], + codes=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], + names=[None, None], + ) + si = Index(["R_l0_g0", "R_l0_g1", "R_l0_g2", "R_l0_g3", "R_l0_g4"], name=None) + + expected = DataFrame(data, index=si, columns=columns) + + actual = pd.read_excel(filename, sheet_name="single_no_names", index_col=0) + tm.assert_frame_equal(actual, expected) + + expected.index = mi + + actual = pd.read_excel(filename, sheet_name="multi_no_names", index_col=[0, 1]) + tm.assert_frame_equal(actual, expected, check_names=False) + + def test_read_excel_bool_header_arg(self, read_ext): + # GH 6114 + msg = "Passing a bool to header is invalid" + for arg in [True, False]: + with pytest.raises(TypeError, match=msg): + pd.read_excel("test1" + read_ext, header=arg) + + def test_read_excel_skiprows(self, request, read_ext): + # GH 4903 + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + actual = pd.read_excel( + "testskiprows" + read_ext, sheet_name="skiprows_list", skiprows=[0, 2] + ) + expected = DataFrame( + [ + [1, 2.5, pd.Timestamp("2015-01-01"), True], + [2, 3.5, pd.Timestamp("2015-01-02"), False], + [3, 4.5, pd.Timestamp("2015-01-03"), False], + [4, 5.5, pd.Timestamp("2015-01-04"), True], + ], + columns=["a", "b", "c", "d"], + ) + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel( + "testskiprows" + read_ext, + sheet_name="skiprows_list", + skiprows=np.array([0, 2]), + ) + tm.assert_frame_equal(actual, expected) + + # GH36435 + actual = pd.read_excel( + "testskiprows" + read_ext, + sheet_name="skiprows_list", + skiprows=lambda x: x in [0, 2], + ) + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel( + "testskiprows" + read_ext, + sheet_name="skiprows_list", + skiprows=3, + names=["a", "b", "c", "d"], + ) + expected = DataFrame( + [ + # [1, 2.5, pd.Timestamp("2015-01-01"), True], + [2, 3.5, pd.Timestamp("2015-01-02"), False], + [3, 4.5, pd.Timestamp("2015-01-03"), False], + [4, 5.5, pd.Timestamp("2015-01-04"), True], + ], + columns=["a", "b", "c", "d"], + ) + tm.assert_frame_equal(actual, expected) + + def test_read_excel_nrows(self, read_ext): + # GH 16645 + num_rows_to_pull = 5 + actual = pd.read_excel("test1" + read_ext, nrows=num_rows_to_pull) + expected = pd.read_excel("test1" + read_ext) + expected = expected[:num_rows_to_pull] + tm.assert_frame_equal(actual, expected) + + def test_read_excel_nrows_greater_than_nrows_in_file(self, read_ext): + # GH 16645 + expected = pd.read_excel("test1" + read_ext) + num_records_in_file = len(expected) + num_rows_to_pull = num_records_in_file + 10 + actual = pd.read_excel("test1" + read_ext, nrows=num_rows_to_pull) + tm.assert_frame_equal(actual, expected) + + def test_read_excel_nrows_non_integer_parameter(self, read_ext): + # GH 16645 + msg = "'nrows' must be an integer >=0" + with pytest.raises(ValueError, match=msg): + pd.read_excel("test1" + read_ext, nrows="5") + + def test_read_excel_squeeze(self, read_ext): + # GH 12157 + f = "test_squeeze" + read_ext + + with tm.assert_produces_warning( + FutureWarning, + match="The squeeze argument has been deprecated " + "and will be removed in a future version. " + 'Append .squeeze\\("columns"\\) to the call to squeeze.\n\n', + ): + actual = pd.read_excel( + f, sheet_name="two_columns", index_col=0, squeeze=True + ) + expected = Series([2, 3, 4], [4, 5, 6], name="b") + expected.index.name = "a" + tm.assert_series_equal(actual, expected) + + actual = pd.read_excel(f, sheet_name="two_columns", squeeze=True) + expected = DataFrame({"a": [4, 5, 6], "b": [2, 3, 4]}) + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel(f, sheet_name="one_column", squeeze=True) + expected = Series([1, 2, 3], name="a") + tm.assert_series_equal(actual, expected) + + def test_deprecated_kwargs(self, read_ext): + with tm.assert_produces_warning(FutureWarning, raise_on_extra_warnings=False): + pd.read_excel("test1" + read_ext, "Sheet1", 0) + + pd.read_excel("test1" + read_ext) + + def test_no_header_with_list_index_col(self, read_ext): + # GH 31783 + file_name = "testmultiindex" + read_ext + data = [("B", "B"), ("key", "val"), (3, 4), (3, 4)] + idx = MultiIndex.from_tuples( + [("A", "A"), ("key", "val"), (1, 2), (1, 2)], names=(0, 1) + ) + expected = DataFrame(data, index=idx, columns=(2, 3)) + result = pd.read_excel( + file_name, sheet_name="index_col_none", index_col=[0, 1], header=None + ) + tm.assert_frame_equal(expected, result) + + def test_one_col_noskip_blank_line(self, read_ext): + # GH 39808 + file_name = "one_col_blank_line" + read_ext + data = [0.5, np.nan, 1, 2] + expected = DataFrame(data, columns=["numbers"]) + result = pd.read_excel(file_name) + tm.assert_frame_equal(result, expected) + + def test_multiheader_two_blank_lines(self, read_ext): + # GH 40442 + file_name = "testmultiindex" + read_ext + columns = MultiIndex.from_tuples([("a", "A"), ("b", "B")]) + data = [[np.nan, np.nan], [np.nan, np.nan], [1, 3], [2, 4]] + expected = DataFrame(data, columns=columns) + result = pd.read_excel( + file_name, sheet_name="mi_column_empty_rows", header=[0, 1] + ) + tm.assert_frame_equal(result, expected) + + def test_trailing_blanks(self, read_ext): + """ + Sheets can contain blank cells with no data. Some of our readers + were including those cells, creating many empty rows and columns + """ + file_name = "trailing_blanks" + read_ext + result = pd.read_excel(file_name) + assert result.shape == (3, 3) + + def test_ignore_chartsheets_by_str(self, request, engine, read_ext): + # GH 41448 + if engine == "odf": + pytest.skip("chartsheets do not exist in the ODF format") + if engine == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="pyxlsb can't distinguish chartsheets from worksheets" + ) + ) + with pytest.raises(ValueError, match="Worksheet named 'Chart1' not found"): + pd.read_excel("chartsheet" + read_ext, sheet_name="Chart1") + + def test_ignore_chartsheets_by_int(self, request, engine, read_ext): + # GH 41448 + if engine == "odf": + pytest.skip("chartsheets do not exist in the ODF format") + if engine == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="pyxlsb can't distinguish chartsheets from worksheets" + ) + ) + with pytest.raises( + ValueError, match="Worksheet index 1 is invalid, 1 worksheets found" + ): + pd.read_excel("chartsheet" + read_ext, sheet_name=1) + + def test_euro_decimal_format(self, request, read_ext): + # copied from read_csv + result = pd.read_excel("test_decimal" + read_ext, decimal=",", skiprows=1) + expected = DataFrame( + [ + [1, 1521.1541, 187101.9543, "ABC", "poi", 4.738797819], + [2, 121.12, 14897.76, "DEF", "uyt", 0.377320872], + [3, 878.158, 108013.434, "GHI", "rez", 2.735694704], + ], + columns=["Id", "Number1", "Number2", "Text1", "Text2", "Number3"], + ) + tm.assert_frame_equal(result, expected) + + +class TestExcelFileRead: + @pytest.fixture(autouse=True) + def cd_and_set_engine(self, engine, datapath, monkeypatch): + """ + Change directory and set engine for ExcelFile objects. + """ + func = partial(pd.ExcelFile, engine=engine) + monkeypatch.chdir(datapath("io", "data", "excel")) + monkeypatch.setattr(pd, "ExcelFile", func) + + def test_engine_used(self, read_ext, engine, monkeypatch): + expected_defaults = { + "xlsx": "openpyxl", + "xlsm": "openpyxl", + "xlsb": "pyxlsb", + "xls": "xlrd", + "ods": "odf", + } + + with pd.ExcelFile("test1" + read_ext) as excel: + result = excel.engine + + if engine is not None: + expected = engine + else: + expected = expected_defaults[read_ext[1:]] + assert result == expected + + def test_excel_passes_na(self, read_ext): + with pd.ExcelFile("test4" + read_ext) as excel: + parsed = pd.read_excel( + excel, sheet_name="Sheet1", keep_default_na=False, na_values=["apple"] + ) + expected = DataFrame( + [["NA"], [1], ["NA"], [np.nan], ["rabbit"]], columns=["Test"] + ) + tm.assert_frame_equal(parsed, expected) + + with pd.ExcelFile("test4" + read_ext) as excel: + parsed = pd.read_excel( + excel, sheet_name="Sheet1", keep_default_na=True, na_values=["apple"] + ) + expected = DataFrame( + [[np.nan], [1], [np.nan], [np.nan], ["rabbit"]], columns=["Test"] + ) + tm.assert_frame_equal(parsed, expected) + + # 13967 + with pd.ExcelFile("test5" + read_ext) as excel: + parsed = pd.read_excel( + excel, sheet_name="Sheet1", keep_default_na=False, na_values=["apple"] + ) + expected = DataFrame( + [["1.#QNAN"], [1], ["nan"], [np.nan], ["rabbit"]], columns=["Test"] + ) + tm.assert_frame_equal(parsed, expected) + + with pd.ExcelFile("test5" + read_ext) as excel: + parsed = pd.read_excel( + excel, sheet_name="Sheet1", keep_default_na=True, na_values=["apple"] + ) + expected = DataFrame( + [[np.nan], [1], [np.nan], [np.nan], ["rabbit"]], columns=["Test"] + ) + tm.assert_frame_equal(parsed, expected) + + @pytest.mark.parametrize("na_filter", [None, True, False]) + def test_excel_passes_na_filter(self, read_ext, na_filter): + # gh-25453 + kwargs = {} + + if na_filter is not None: + kwargs["na_filter"] = na_filter + + with pd.ExcelFile("test5" + read_ext) as excel: + parsed = pd.read_excel( + excel, + sheet_name="Sheet1", + keep_default_na=True, + na_values=["apple"], + **kwargs, + ) + + if na_filter is False: + expected = [["1.#QNAN"], [1], ["nan"], ["apple"], ["rabbit"]] + else: + expected = [[np.nan], [1], [np.nan], [np.nan], ["rabbit"]] + + expected = DataFrame(expected, columns=["Test"]) + tm.assert_frame_equal(parsed, expected) + + def test_excel_table_sheet_by_index(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + with pd.ExcelFile("test1" + read_ext) as excel: + df1 = pd.read_excel(excel, sheet_name=0, index_col=0) + df2 = pd.read_excel(excel, sheet_name=1, skiprows=[1], index_col=0) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + with pd.ExcelFile("test1" + read_ext) as excel: + df1 = excel.parse(0, index_col=0) + df2 = excel.parse(1, skiprows=[1], index_col=0) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + with pd.ExcelFile("test1" + read_ext) as excel: + df3 = pd.read_excel(excel, sheet_name=0, index_col=0, skipfooter=1) + tm.assert_frame_equal(df3, df1.iloc[:-1]) + + with pd.ExcelFile("test1" + read_ext) as excel: + df3 = excel.parse(0, index_col=0, skipfooter=1) + + tm.assert_frame_equal(df3, df1.iloc[:-1]) + + def test_sheet_name(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + filename = "test1" + sheet_name = "Sheet1" + + with pd.ExcelFile(filename + read_ext) as excel: + df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc + + with pd.ExcelFile(filename + read_ext) as excel: + df2_parse = excel.parse(index_col=0, sheet_name=sheet_name) + + tm.assert_frame_equal(df1_parse, df_ref, check_names=False) + tm.assert_frame_equal(df2_parse, df_ref, check_names=False) + + @pytest.mark.parametrize( + "sheet_name", + [3, [0, 3], [3, 0], "Sheet4", ["Sheet1", "Sheet4"], ["Sheet4", "Sheet1"]], + ) + def test_bad_sheetname_raises(self, read_ext, sheet_name): + # GH 39250 + msg = "Worksheet index 3 is invalid|Worksheet named 'Sheet4' not found" + with pytest.raises(ValueError, match=msg): + with pd.ExcelFile("blank" + read_ext) as excel: + excel.parse(sheet_name=sheet_name) + + def test_excel_read_buffer(self, engine, read_ext): + pth = "test1" + read_ext + expected = pd.read_excel(pth, sheet_name="Sheet1", index_col=0, engine=engine) + + with open(pth, "rb") as f: + with pd.ExcelFile(f) as xls: + actual = pd.read_excel(xls, sheet_name="Sheet1", index_col=0) + + tm.assert_frame_equal(expected, actual) + + def test_reader_closes_file(self, engine, read_ext): + with open("test1" + read_ext, "rb") as f: + with pd.ExcelFile(f) as xlsx: + # parses okay + pd.read_excel(xlsx, sheet_name="Sheet1", index_col=0, engine=engine) + + assert f.closed + + def test_conflicting_excel_engines(self, read_ext): + # GH 26566 + msg = "Engine should not be specified when passing an ExcelFile" + + with pd.ExcelFile("test1" + read_ext) as xl: + with pytest.raises(ValueError, match=msg): + pd.read_excel(xl, engine="foo") + + def test_excel_read_binary(self, engine, read_ext): + # GH 15914 + expected = pd.read_excel("test1" + read_ext, engine=engine) + + with open("test1" + read_ext, "rb") as f: + data = f.read() + + actual = pd.read_excel(data, engine=engine) + tm.assert_frame_equal(expected, actual) + + def test_excel_read_binary_via_read_excel(self, read_ext, engine): + # GH 38424 + with open("test1" + read_ext, "rb") as f: + result = pd.read_excel(f) + expected = pd.read_excel("test1" + read_ext, engine=engine) + tm.assert_frame_equal(result, expected) + + @pytest.mark.skipif( + xlrd_version is not None and xlrd_version >= Version("2"), + reason="xlrd no longer supports xlsx", + ) + def test_excel_high_surrogate(self): + # GH 23809 + expected = DataFrame(["\udc88"], columns=["Column1"]) + + # should not produce a segmentation violation + actual = pd.read_excel("high_surrogate.xlsx", engine="xlrd") + tm.assert_frame_equal(expected, actual) + + @pytest.mark.parametrize("filename", ["df_empty.xlsx", "df_equals.xlsx"]) + def test_header_with_index_col(self, filename): + # GH 33476 + idx = Index(["Z"], name="I2") + cols = MultiIndex.from_tuples([("A", "B"), ("A", "B.1")], names=["I11", "I12"]) + expected = DataFrame([[1, 3]], index=idx, columns=cols, dtype="int64") + result = pd.read_excel( + filename, sheet_name="Sheet1", index_col=0, header=[0, 1] + ) + tm.assert_frame_equal(expected, result) + + def test_read_datetime_multiindex(self, request, engine, read_ext): + # GH 34748 + if engine == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + f = "test_datetime_mi" + read_ext + with pd.ExcelFile(f) as excel: + actual = pd.read_excel(excel, header=[0, 1], index_col=0, engine=engine) + expected_column_index = MultiIndex.from_tuples( + [(pd.to_datetime("02/29/2020"), pd.to_datetime("03/01/2020"))], + names=[ + pd.to_datetime("02/29/2020").to_pydatetime(), + pd.to_datetime("03/01/2020").to_pydatetime(), + ], + ) + expected = DataFrame([], columns=expected_column_index) + + tm.assert_frame_equal(expected, actual) + + def test_engine_invalid_option(self, read_ext): + # read_ext includes the '.' hence the weird formatting + with pytest.raises(ValueError, match="Value must be one of *"): + with pd.option_context(f"io.excel{read_ext}.reader", "abc"): + pass + + def test_ignore_chartsheets(self, request, engine, read_ext): + # GH 41448 + if engine == "odf": + pytest.skip("chartsheets do not exist in the ODF format") + if engine == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="pyxlsb can't distinguish chartsheets from worksheets" + ) + ) + with pd.ExcelFile("chartsheet" + read_ext) as excel: + assert excel.sheet_names == ["Sheet1"] + + def test_corrupt_files_closed(self, engine, read_ext): + # GH41778 + errors = (BadZipFile,) + if engine is None: + pytest.skip() + elif engine == "xlrd": + import xlrd + + errors = (BadZipFile, xlrd.biffh.XLRDError) + + with tm.ensure_clean(f"corrupt{read_ext}") as file: + Path(file).write_text("corrupt") + with tm.assert_produces_warning(False): + try: + pd.ExcelFile(file, engine=engine) + except errors: + pass diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_style.py b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_style.py new file mode 100644 index 0000000..8a142ae --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_style.py @@ -0,0 +1,167 @@ +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.excel import ExcelWriter +from pandas.io.formats.excel import ExcelFormatter + +pytest.importorskip("jinja2") +# jinja2 is currently required for Styler.__init__(). Technically Styler.to_excel +# could compute styles and render to excel without jinja2, since there is no +# 'template' file, but this needs the import error to delayed until render time. + + +def assert_equal_cell_styles(cell1, cell2): + # TODO: should find a better way to check equality + assert cell1.alignment.__dict__ == cell2.alignment.__dict__ + assert cell1.border.__dict__ == cell2.border.__dict__ + assert cell1.fill.__dict__ == cell2.fill.__dict__ + assert cell1.font.__dict__ == cell2.font.__dict__ + assert cell1.number_format == cell2.number_format + assert cell1.protection.__dict__ == cell2.protection.__dict__ + + +@pytest.mark.parametrize( + "engine", + ["xlsxwriter", "openpyxl"], +) +def test_styler_to_excel_unstyled(engine): + # compare DataFrame.to_excel and Styler.to_excel when no styles applied + pytest.importorskip(engine) + df = DataFrame(np.random.randn(2, 2)) + with tm.ensure_clean(".xlsx") as path: + with ExcelWriter(path, engine=engine) as writer: + df.to_excel(writer, sheet_name="dataframe") + df.style.to_excel(writer, sheet_name="unstyled") + + openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl + wb = openpyxl.load_workbook(path) + + for col1, col2 in zip(wb["dataframe"].columns, wb["unstyled"].columns): + assert len(col1) == len(col2) + for cell1, cell2 in zip(col1, col2): + assert cell1.value == cell2.value + assert_equal_cell_styles(cell1, cell2) + + +shared_style_params = [ + ( + "background-color: #111222", + ["fill", "fgColor", "rgb"], + {"xlsxwriter": "FF111222", "openpyxl": "00111222"}, + ), + ( + "color: #111222", + ["font", "color", "value"], + {"xlsxwriter": "FF111222", "openpyxl": "00111222"}, + ), + ("font-family: Arial;", ["font", "name"], "arial"), + ("font-weight: bold;", ["font", "b"], True), + ("font-style: italic;", ["font", "i"], True), + ("text-decoration: underline;", ["font", "u"], "single"), + ("number-format: $??,???.00;", ["number_format"], "$??,???.00"), + ("text-align: left;", ["alignment", "horizontal"], "left"), + ( + "vertical-align: bottom;", + ["alignment", "vertical"], + {"xlsxwriter": None, "openpyxl": "bottom"}, # xlsxwriter Fails + ), +] + + +@pytest.mark.parametrize( + "engine", + ["xlsxwriter", "openpyxl"], +) +@pytest.mark.parametrize("css, attrs, expected", shared_style_params) +def test_styler_to_excel_basic(engine, css, attrs, expected): + pytest.importorskip(engine) + df = DataFrame(np.random.randn(1, 1)) + styler = df.style.applymap(lambda x: css) + + with tm.ensure_clean(".xlsx") as path: + with ExcelWriter(path, engine=engine) as writer: + df.to_excel(writer, sheet_name="dataframe") + styler.to_excel(writer, sheet_name="styled") + + openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl + wb = openpyxl.load_workbook(path) + + # test unstyled data cell does not have expected styles + # test styled cell has expected styles + u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2) + for attr in attrs: + u_cell, s_cell = getattr(u_cell, attr), getattr(s_cell, attr) + + if isinstance(expected, dict): + assert u_cell is None or u_cell != expected[engine] + assert s_cell == expected[engine] + else: + assert u_cell is None or u_cell != expected + assert s_cell == expected + + +@pytest.mark.parametrize( + "engine", + ["xlsxwriter", "openpyxl"], +) +@pytest.mark.parametrize("css, attrs, expected", shared_style_params) +def test_styler_to_excel_basic_indexes(engine, css, attrs, expected): + pytest.importorskip(engine) + df = DataFrame(np.random.randn(1, 1)) + + styler = df.style + styler.applymap_index(lambda x: css, axis=0) + styler.applymap_index(lambda x: css, axis=1) + + null_styler = df.style + null_styler.applymap(lambda x: "null: css;") + null_styler.applymap_index(lambda x: "null: css;", axis=0) + null_styler.applymap_index(lambda x: "null: css;", axis=1) + + with tm.ensure_clean(".xlsx") as path: + with ExcelWriter(path, engine=engine) as writer: + null_styler.to_excel(writer, sheet_name="null_styled") + styler.to_excel(writer, sheet_name="styled") + + openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl + wb = openpyxl.load_workbook(path) + + # test null styled index cells does not have expected styles + # test styled cell has expected styles + ui_cell, si_cell = wb["null_styled"].cell(2, 1), wb["styled"].cell(2, 1) + uc_cell, sc_cell = wb["null_styled"].cell(1, 2), wb["styled"].cell(1, 2) + for attr in attrs: + ui_cell, si_cell = getattr(ui_cell, attr), getattr(si_cell, attr) + uc_cell, sc_cell = getattr(uc_cell, attr), getattr(sc_cell, attr) + + if isinstance(expected, dict): + assert ui_cell is None or ui_cell != expected[engine] + assert si_cell == expected[engine] + assert uc_cell is None or uc_cell != expected[engine] + assert sc_cell == expected[engine] + else: + assert ui_cell is None or ui_cell != expected + assert si_cell == expected + assert uc_cell is None or uc_cell != expected + assert sc_cell == expected + + +def test_styler_custom_converter(): + openpyxl = pytest.importorskip("openpyxl") + + def custom_converter(css): + return {"font": {"color": {"rgb": "111222"}}} + + df = DataFrame(np.random.randn(1, 1)) + styler = df.style.applymap(lambda x: "color: #888999") + with tm.ensure_clean(".xlsx") as path: + with ExcelWriter(path, engine="openpyxl") as writer: + ExcelFormatter(styler, style_converter=custom_converter).write( + writer, sheet_name="custom" + ) + + wb = openpyxl.load_workbook(path) + assert wb["custom"].cell(2, 2).font.color.value == "00111222" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_writers.py b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_writers.py new file mode 100644 index 0000000..0315783 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_writers.py @@ -0,0 +1,1338 @@ +from datetime import ( + date, + datetime, + timedelta, +) +from functools import partial +from io import BytesIO +import os +import re + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + get_option, + set_option, +) +import pandas._testing as tm + +from pandas.io.excel import ( + ExcelFile, + ExcelWriter, + _OpenpyxlWriter, + _XlsxWriter, + _XlwtWriter, + register_writer, +) + + +@pytest.fixture +def path(ext): + """ + Fixture to open file for use in each test case. + """ + with tm.ensure_clean(ext) as file_path: + yield file_path + + +@pytest.fixture +def set_engine(engine, ext): + """ + Fixture to set engine for use in each test case. + + Rather than requiring `engine=...` to be provided explicitly as an + argument in each test, this fixture sets a global option to dictate + which engine should be used to write Excel files. After executing + the test it rolls back said change to the global option. + """ + option_name = f"io.excel.{ext.strip('.')}.writer" + prev_engine = get_option(option_name) + set_option(option_name, engine) + yield + set_option(option_name, prev_engine) # Roll back option change + + +@pytest.mark.parametrize( + "ext", + [ + pytest.param(".xlsx", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), + pytest.param(".xlsm", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), + pytest.param(".xls", marks=[td.skip_if_no("xlwt"), td.skip_if_no("xlrd")]), + pytest.param( + ".xlsx", marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")] + ), + pytest.param(".ods", marks=td.skip_if_no("odf")), + ], +) +class TestRoundTrip: + @pytest.mark.parametrize( + "header,expected", + [(None, DataFrame([np.nan] * 4)), (0, DataFrame({"Unnamed: 0": [np.nan] * 3}))], + ) + def test_read_one_empty_col_no_header(self, ext, header, expected): + # xref gh-12292 + filename = "no_header" + df = DataFrame([["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]]) + + with tm.ensure_clean(ext) as path: + df.to_excel(path, filename, index=False, header=False) + result = pd.read_excel( + path, sheet_name=filename, usecols=[0], header=header + ) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "header,expected", + [(None, DataFrame([0] + [np.nan] * 4)), (0, DataFrame([np.nan] * 4))], + ) + def test_read_one_empty_col_with_header(self, ext, header, expected): + filename = "with_header" + df = DataFrame([["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]]) + + with tm.ensure_clean(ext) as path: + df.to_excel(path, "with_header", index=False, header=True) + result = pd.read_excel( + path, sheet_name=filename, usecols=[0], header=header + ) + + tm.assert_frame_equal(result, expected) + + def test_set_column_names_in_parameter(self, ext): + # GH 12870 : pass down column names associated with + # keyword argument names + refdf = DataFrame([[1, "foo"], [2, "bar"], [3, "baz"]], columns=["a", "b"]) + + with tm.ensure_clean(ext) as pth: + with ExcelWriter(pth) as writer: + refdf.to_excel(writer, "Data_no_head", header=False, index=False) + refdf.to_excel(writer, "Data_with_head", index=False) + + refdf.columns = ["A", "B"] + + with ExcelFile(pth) as reader: + xlsdf_no_head = pd.read_excel( + reader, sheet_name="Data_no_head", header=None, names=["A", "B"] + ) + xlsdf_with_head = pd.read_excel( + reader, + sheet_name="Data_with_head", + index_col=None, + names=["A", "B"], + ) + + tm.assert_frame_equal(xlsdf_no_head, refdf) + tm.assert_frame_equal(xlsdf_with_head, refdf) + + def test_creating_and_reading_multiple_sheets(self, ext): + # see gh-9450 + # + # Test reading multiple sheets, from a runtime + # created Excel file with multiple sheets. + def tdf(col_sheet_name): + d, i = [11, 22, 33], [1, 2, 3] + return DataFrame(d, i, columns=[col_sheet_name]) + + sheets = ["AAA", "BBB", "CCC"] + + dfs = [tdf(s) for s in sheets] + dfs = dict(zip(sheets, dfs)) + + with tm.ensure_clean(ext) as pth: + with ExcelWriter(pth) as ew: + for sheetname, df in dfs.items(): + df.to_excel(ew, sheetname) + + dfs_returned = pd.read_excel(pth, sheet_name=sheets, index_col=0) + + for s in sheets: + tm.assert_frame_equal(dfs[s], dfs_returned[s]) + + def test_read_excel_multiindex_empty_level(self, ext): + # see gh-12453 + with tm.ensure_clean(ext) as path: + df = DataFrame( + { + ("One", "x"): {0: 1}, + ("Two", "X"): {0: 3}, + ("Two", "Y"): {0: 7}, + ("Zero", ""): {0: 0}, + } + ) + + expected = DataFrame( + { + ("One", "x"): {0: 1}, + ("Two", "X"): {0: 3}, + ("Two", "Y"): {0: 7}, + ("Zero", "Unnamed: 4_level_1"): {0: 0}, + } + ) + + df.to_excel(path) + actual = pd.read_excel(path, header=[0, 1], index_col=0) + tm.assert_frame_equal(actual, expected) + + df = DataFrame( + { + ("Beg", ""): {0: 0}, + ("Middle", "x"): {0: 1}, + ("Tail", "X"): {0: 3}, + ("Tail", "Y"): {0: 7}, + } + ) + + expected = DataFrame( + { + ("Beg", "Unnamed: 1_level_1"): {0: 0}, + ("Middle", "x"): {0: 1}, + ("Tail", "X"): {0: 3}, + ("Tail", "Y"): {0: 7}, + } + ) + + df.to_excel(path) + actual = pd.read_excel(path, header=[0, 1], index_col=0) + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize("c_idx_names", [True, False]) + @pytest.mark.parametrize("r_idx_names", [True, False]) + @pytest.mark.parametrize("c_idx_levels", [1, 3]) + @pytest.mark.parametrize("r_idx_levels", [1, 3]) + def test_excel_multindex_roundtrip( + self, ext, c_idx_names, r_idx_names, c_idx_levels, r_idx_levels, request + ): + # see gh-4679 + with tm.ensure_clean(ext) as pth: + if (c_idx_levels == 1 and c_idx_names) and not ( + r_idx_levels == 3 and not r_idx_names + ): + mark = pytest.mark.xfail( + reason="Column index name cannot be serialized unless " + "it's a MultiIndex" + ) + request.node.add_marker(mark) + + # Empty name case current read in as + # unnamed levels, not Nones. + check_names = r_idx_names or r_idx_levels <= 1 + + df = tm.makeCustomDataframe( + 5, 5, c_idx_names, r_idx_names, c_idx_levels, r_idx_levels + ) + df.to_excel(pth) + + act = pd.read_excel( + pth, + index_col=list(range(r_idx_levels)), + header=list(range(c_idx_levels)), + ) + tm.assert_frame_equal(df, act, check_names=check_names) + + df.iloc[0, :] = np.nan + df.to_excel(pth) + + act = pd.read_excel( + pth, + index_col=list(range(r_idx_levels)), + header=list(range(c_idx_levels)), + ) + tm.assert_frame_equal(df, act, check_names=check_names) + + df.iloc[-1, :] = np.nan + df.to_excel(pth) + act = pd.read_excel( + pth, + index_col=list(range(r_idx_levels)), + header=list(range(c_idx_levels)), + ) + tm.assert_frame_equal(df, act, check_names=check_names) + + def test_read_excel_parse_dates(self, ext): + # see gh-11544, gh-12051 + df = DataFrame( + {"col": [1, 2, 3], "date_strings": pd.date_range("2012-01-01", periods=3)} + ) + df2 = df.copy() + df2["date_strings"] = df2["date_strings"].dt.strftime("%m/%d/%Y") + + with tm.ensure_clean(ext) as pth: + df2.to_excel(pth) + + res = pd.read_excel(pth, index_col=0) + tm.assert_frame_equal(df2, res) + + res = pd.read_excel(pth, parse_dates=["date_strings"], index_col=0) + tm.assert_frame_equal(df, res) + + date_parser = lambda x: datetime.strptime(x, "%m/%d/%Y") + res = pd.read_excel( + pth, parse_dates=["date_strings"], date_parser=date_parser, index_col=0 + ) + tm.assert_frame_equal(df, res) + + def test_multiindex_interval_datetimes(self, ext): + # GH 30986 + midx = MultiIndex.from_arrays( + [ + range(4), + pd.interval_range( + start=pd.Timestamp("2020-01-01"), periods=4, freq="6M" + ), + ] + ) + df = DataFrame(range(4), index=midx) + with tm.ensure_clean(ext) as pth: + df.to_excel(pth) + result = pd.read_excel(pth, index_col=[0, 1]) + expected = DataFrame( + range(4), + MultiIndex.from_arrays( + [ + range(4), + [ + "(2020-01-31, 2020-07-31]", + "(2020-07-31, 2021-01-31]", + "(2021-01-31, 2021-07-31]", + "(2021-07-31, 2022-01-31]", + ], + ] + ), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "engine,ext", + [ + pytest.param( + "openpyxl", + ".xlsx", + marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")], + ), + pytest.param( + "openpyxl", + ".xlsm", + marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")], + ), + pytest.param( + "xlwt", ".xls", marks=[td.skip_if_no("xlwt"), td.skip_if_no("xlrd")] + ), + pytest.param( + "xlsxwriter", + ".xlsx", + marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")], + ), + pytest.param("odf", ".ods", marks=td.skip_if_no("odf")), + ], +) +@pytest.mark.usefixtures("set_engine") +class TestExcelWriter: + def test_excel_sheet_size(self, path): + + # GH 26080 + breaking_row_count = 2 ** 20 + 1 + breaking_col_count = 2 ** 14 + 1 + # purposely using two arrays to prevent memory issues while testing + row_arr = np.zeros(shape=(breaking_row_count, 1)) + col_arr = np.zeros(shape=(1, breaking_col_count)) + row_df = DataFrame(row_arr) + col_df = DataFrame(col_arr) + + msg = "sheet is too large" + with pytest.raises(ValueError, match=msg): + row_df.to_excel(path) + + with pytest.raises(ValueError, match=msg): + col_df.to_excel(path) + + def test_excel_sheet_by_name_raise(self, path, engine): + gt = DataFrame(np.random.randn(10, 2)) + gt.to_excel(path) + + with ExcelFile(path) as xl: + df = pd.read_excel(xl, sheet_name=0, index_col=0) + + tm.assert_frame_equal(gt, df) + + msg = "Worksheet named '0' not found" + with pytest.raises(ValueError, match=msg): + pd.read_excel(xl, "0") + + def test_excel_writer_context_manager(self, frame, path): + with ExcelWriter(path) as writer: + frame.to_excel(writer, "Data1") + frame2 = frame.copy() + frame2.columns = frame.columns[::-1] + frame2.to_excel(writer, "Data2") + + with ExcelFile(path) as reader: + found_df = pd.read_excel(reader, sheet_name="Data1", index_col=0) + found_df2 = pd.read_excel(reader, sheet_name="Data2", index_col=0) + + tm.assert_frame_equal(found_df, frame) + tm.assert_frame_equal(found_df2, frame2) + + def test_roundtrip(self, frame, path): + frame = frame.copy() + frame["A"][:5] = np.nan + + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + # test roundtrip + frame.to_excel(path, "test1") + recons = pd.read_excel(path, sheet_name="test1", index_col=0) + tm.assert_frame_equal(frame, recons) + + frame.to_excel(path, "test1", index=False) + recons = pd.read_excel(path, sheet_name="test1", index_col=None) + recons.index = frame.index + tm.assert_frame_equal(frame, recons) + + frame.to_excel(path, "test1", na_rep="NA") + recons = pd.read_excel(path, sheet_name="test1", index_col=0, na_values=["NA"]) + tm.assert_frame_equal(frame, recons) + + # GH 3611 + frame.to_excel(path, "test1", na_rep="88") + recons = pd.read_excel(path, sheet_name="test1", index_col=0, na_values=["88"]) + tm.assert_frame_equal(frame, recons) + + frame.to_excel(path, "test1", na_rep="88") + recons = pd.read_excel( + path, sheet_name="test1", index_col=0, na_values=[88, 88.0] + ) + tm.assert_frame_equal(frame, recons) + + # GH 6573 + frame.to_excel(path, "Sheet1") + recons = pd.read_excel(path, index_col=0) + tm.assert_frame_equal(frame, recons) + + frame.to_excel(path, "0") + recons = pd.read_excel(path, index_col=0) + tm.assert_frame_equal(frame, recons) + + # GH 8825 Pandas Series should provide to_excel method + s = frame["A"] + s.to_excel(path) + recons = pd.read_excel(path, index_col=0) + tm.assert_frame_equal(s.to_frame(), recons) + + def test_mixed(self, frame, path): + mixed_frame = frame.copy() + mixed_frame["foo"] = "bar" + + mixed_frame.to_excel(path, "test1") + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + tm.assert_frame_equal(mixed_frame, recons) + + def test_ts_frame(self, tsframe, path): + df = tsframe + + # freq doesn't round-trip + index = pd.DatetimeIndex(np.asarray(df.index), freq=None) + df.index = index + + df.to_excel(path, "test1") + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + tm.assert_frame_equal(df, recons) + + def test_basics_with_nan(self, frame, path): + frame = frame.copy() + frame["A"][:5] = np.nan + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + @pytest.mark.parametrize("np_type", [np.int8, np.int16, np.int32, np.int64]) + def test_int_types(self, np_type, path): + # Test np.int values read come back as int + # (rather than float which is Excel's format). + df = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type) + df.to_excel(path, "test1") + + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + + int_frame = df.astype(np.int64) + tm.assert_frame_equal(int_frame, recons) + + recons2 = pd.read_excel(path, sheet_name="test1", index_col=0) + tm.assert_frame_equal(int_frame, recons2) + + # Test with convert_float=False comes back as float. + float_frame = df.astype(float) + float_frame.columns = float_frame.columns.astype(float) + float_frame.index = float_frame.index.astype(float) + with tm.assert_produces_warning( + FutureWarning, match="convert_float is deprecated" + ): + recons = pd.read_excel( + path, sheet_name="test1", convert_float=False, index_col=0 + ) + tm.assert_frame_equal(recons, float_frame) + + @pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64]) + def test_float_types(self, np_type, path): + # Test np.float values read come back as float. + df = DataFrame(np.random.random_sample(10), dtype=np_type) + df.to_excel(path, "test1") + + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( + np_type + ) + + tm.assert_frame_equal(df, recons) + + @pytest.mark.parametrize("np_type", [np.bool8, np.bool_]) + def test_bool_types(self, np_type, path): + # Test np.bool8 and np.bool_ values read come back as float. + df = DataFrame([1, 0, True, False], dtype=np_type) + df.to_excel(path, "test1") + + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( + np_type + ) + + tm.assert_frame_equal(df, recons) + + def test_inf_roundtrip(self, path): + df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) + df.to_excel(path, "test1") + + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + + tm.assert_frame_equal(df, recons) + + def test_sheets(self, frame, tsframe, path): + + # freq doesn't round-trip + index = pd.DatetimeIndex(np.asarray(tsframe.index), freq=None) + tsframe.index = index + + frame = frame.copy() + frame["A"][:5] = np.nan + + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + # Test writing to separate sheets + with ExcelWriter(path) as writer: + frame.to_excel(writer, "test1") + tsframe.to_excel(writer, "test2") + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + tm.assert_frame_equal(frame, recons) + recons = pd.read_excel(reader, sheet_name="test2", index_col=0) + tm.assert_frame_equal(tsframe, recons) + assert 2 == len(reader.sheet_names) + assert "test1" == reader.sheet_names[0] + assert "test2" == reader.sheet_names[1] + + def test_colaliases(self, frame, path): + frame = frame.copy() + frame["A"][:5] = np.nan + + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + # column aliases + col_aliases = Index(["AA", "X", "Y", "Z"]) + frame.to_excel(path, "test1", header=col_aliases) + with ExcelFile(path) as reader: + rs = pd.read_excel(reader, sheet_name="test1", index_col=0) + xp = frame.copy() + xp.columns = col_aliases + tm.assert_frame_equal(xp, rs) + + def test_roundtrip_indexlabels(self, merge_cells, frame, path): + frame = frame.copy() + frame["A"][:5] = np.nan + + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + # test index_label + df = DataFrame(np.random.randn(10, 2)) >= 0 + df.to_excel(path, "test1", index_label=["test"], merge_cells=merge_cells) + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( + np.int64 + ) + df.index.names = ["test"] + assert df.index.names == recons.index.names + + df = DataFrame(np.random.randn(10, 2)) >= 0 + df.to_excel( + path, + "test1", + index_label=["test", "dummy", "dummy2"], + merge_cells=merge_cells, + ) + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( + np.int64 + ) + df.index.names = ["test"] + assert df.index.names == recons.index.names + + df = DataFrame(np.random.randn(10, 2)) >= 0 + df.to_excel(path, "test1", index_label="test", merge_cells=merge_cells) + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( + np.int64 + ) + df.index.names = ["test"] + tm.assert_frame_equal(df, recons.astype(bool)) + + frame.to_excel( + path, + "test1", + columns=["A", "B", "C", "D"], + index=False, + merge_cells=merge_cells, + ) + # take 'A' and 'B' as indexes (same row as cols 'C', 'D') + df = frame.copy() + df = df.set_index(["A", "B"]) + + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1]) + tm.assert_frame_equal(df, recons) + + def test_excel_roundtrip_indexname(self, merge_cells, path): + df = DataFrame(np.random.randn(10, 4)) + df.index.name = "foo" + + df.to_excel(path, merge_cells=merge_cells) + + with ExcelFile(path) as xf: + result = pd.read_excel(xf, sheet_name=xf.sheet_names[0], index_col=0) + + tm.assert_frame_equal(result, df) + assert result.index.name == "foo" + + def test_excel_roundtrip_datetime(self, merge_cells, tsframe, path): + # datetime.date, not sure what to test here exactly + + # freq does not round-trip + index = pd.DatetimeIndex(np.asarray(tsframe.index), freq=None) + tsframe.index = index + + tsf = tsframe.copy() + + tsf.index = [x.date() for x in tsframe.index] + tsf.to_excel(path, "test1", merge_cells=merge_cells) + + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + + tm.assert_frame_equal(tsframe, recons) + + def test_excel_date_datetime_format(self, engine, ext, path): + # see gh-4133 + # + # Excel output format strings + df = DataFrame( + [ + [date(2014, 1, 31), date(1999, 9, 24)], + [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)], + ], + index=["DATE", "DATETIME"], + columns=["X", "Y"], + ) + df_expected = DataFrame( + [ + [datetime(2014, 1, 31), datetime(1999, 9, 24)], + [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)], + ], + index=["DATE", "DATETIME"], + columns=["X", "Y"], + ) + + with tm.ensure_clean(ext) as filename2: + with ExcelWriter(path) as writer1: + df.to_excel(writer1, "test1") + + with ExcelWriter( + filename2, + date_format="DD.MM.YYYY", + datetime_format="DD.MM.YYYY HH-MM-SS", + ) as writer2: + df.to_excel(writer2, "test1") + + with ExcelFile(path) as reader1: + rs1 = pd.read_excel(reader1, sheet_name="test1", index_col=0) + + with ExcelFile(filename2) as reader2: + rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0) + + tm.assert_frame_equal(rs1, rs2) + + # Since the reader returns a datetime object for dates, + # we need to use df_expected to check the result. + tm.assert_frame_equal(rs2, df_expected) + + def test_to_excel_interval_no_labels(self, path): + # see gh-19242 + # + # Test writing Interval without labels. + df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) + expected = df.copy() + + df["new"] = pd.cut(df[0], 10) + expected["new"] = pd.cut(expected[0], 10).astype(str) + + df.to_excel(path, "test1") + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + tm.assert_frame_equal(expected, recons) + + def test_to_excel_interval_labels(self, path): + # see gh-19242 + # + # Test writing Interval with labels. + df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) + expected = df.copy() + intervals = pd.cut( + df[0], 10, labels=["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] + ) + df["new"] = intervals + expected["new"] = pd.Series(list(intervals)) + + df.to_excel(path, "test1") + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + tm.assert_frame_equal(expected, recons) + + def test_to_excel_timedelta(self, path): + # see gh-19242, gh-9155 + # + # Test writing timedelta to xls. + df = DataFrame( + np.random.randint(-10, 10, size=(20, 1)), columns=["A"], dtype=np.int64 + ) + expected = df.copy() + + df["new"] = df["A"].apply(lambda x: timedelta(seconds=x)) + expected["new"] = expected["A"].apply( + lambda x: timedelta(seconds=x).total_seconds() / 86400 + ) + + df.to_excel(path, "test1") + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + tm.assert_frame_equal(expected, recons) + + def test_to_excel_periodindex(self, tsframe, path): + xp = tsframe.resample("M", kind="period").mean() + + xp.to_excel(path, "sht1") + + with ExcelFile(path) as reader: + rs = pd.read_excel(reader, sheet_name="sht1", index_col=0) + tm.assert_frame_equal(xp, rs.to_period("M")) + + def test_to_excel_multiindex(self, merge_cells, frame, path): + arrays = np.arange(len(frame.index) * 2).reshape(2, -1) + new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) + frame.index = new_index + + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", columns=["A", "B"]) + + # round trip + frame.to_excel(path, "test1", merge_cells=merge_cells) + with ExcelFile(path) as reader: + df = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1]) + tm.assert_frame_equal(frame, df) + + # GH13511 + def test_to_excel_multiindex_nan_label(self, merge_cells, path): + df = DataFrame({"A": [None, 2, 3], "B": [10, 20, 30], "C": np.random.sample(3)}) + df = df.set_index(["A", "B"]) + + df.to_excel(path, merge_cells=merge_cells) + df1 = pd.read_excel(path, index_col=[0, 1]) + tm.assert_frame_equal(df, df1) + + # Test for Issue 11328. If column indices are integers, make + # sure they are handled correctly for either setting of + # merge_cells + def test_to_excel_multiindex_cols(self, merge_cells, frame, path): + arrays = np.arange(len(frame.index) * 2).reshape(2, -1) + new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) + frame.index = new_index + + new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), (50, 1), (50, 2)]) + frame.columns = new_cols_index + header = [0, 1] + if not merge_cells: + header = 0 + + # round trip + frame.to_excel(path, "test1", merge_cells=merge_cells) + with ExcelFile(path) as reader: + df = pd.read_excel( + reader, sheet_name="test1", header=header, index_col=[0, 1] + ) + if not merge_cells: + fm = frame.columns.format(sparsify=False, adjoin=False, names=False) + frame.columns = [".".join(map(str, q)) for q in zip(*fm)] + tm.assert_frame_equal(frame, df) + + def test_to_excel_multiindex_dates(self, merge_cells, tsframe, path): + # try multiindex with dates + new_index = [tsframe.index, np.arange(len(tsframe.index))] + tsframe.index = MultiIndex.from_arrays(new_index) + + tsframe.index.names = ["time", "foo"] + tsframe.to_excel(path, "test1", merge_cells=merge_cells) + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1]) + + tm.assert_frame_equal(tsframe, recons) + assert recons.index.names == ("time", "foo") + + def test_to_excel_multiindex_no_write_index(self, path): + # Test writing and re-reading a MI without the index. GH 5616. + + # Initial non-MI frame. + frame1 = DataFrame({"a": [10, 20], "b": [30, 40], "c": [50, 60]}) + + # Add a MI. + frame2 = frame1.copy() + multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)]) + frame2.index = multi_index + + # Write out to Excel without the index. + frame2.to_excel(path, "test1", index=False) + + # Read it back in. + with ExcelFile(path) as reader: + frame3 = pd.read_excel(reader, sheet_name="test1") + + # Test that it is the same as the initial frame. + tm.assert_frame_equal(frame1, frame3) + + def test_to_excel_float_format(self, path): + df = DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + df.to_excel(path, "test1", float_format="%.2f") + + with ExcelFile(path) as reader: + result = pd.read_excel(reader, sheet_name="test1", index_col=0) + + expected = DataFrame( + [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + tm.assert_frame_equal(result, expected) + + def test_to_excel_output_encoding(self, ext): + # Avoid mixed inferred_type. + df = DataFrame( + [["\u0192", "\u0193", "\u0194"], ["\u0195", "\u0196", "\u0197"]], + index=["A\u0192", "B"], + columns=["X\u0193", "Y", "Z"], + ) + + with tm.ensure_clean("__tmp_to_excel_float_format__." + ext) as filename: + df.to_excel(filename, sheet_name="TestSheet", encoding="utf8") + result = pd.read_excel(filename, sheet_name="TestSheet", index_col=0) + tm.assert_frame_equal(result, df) + + def test_to_excel_unicode_filename(self, ext, path): + with tm.ensure_clean("\u0192u." + ext) as filename: + try: + f = open(filename, "wb") + except UnicodeEncodeError: + pytest.skip("No unicode file names on this system") + finally: + f.close() + + df = DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + df.to_excel(filename, "test1", float_format="%.2f") + + with ExcelFile(filename) as reader: + result = pd.read_excel(reader, sheet_name="test1", index_col=0) + + expected = DataFrame( + [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("use_headers", [True, False]) + @pytest.mark.parametrize("r_idx_nlevels", [1, 2, 3]) + @pytest.mark.parametrize("c_idx_nlevels", [1, 2, 3]) + def test_excel_010_hemstring( + self, merge_cells, c_idx_nlevels, r_idx_nlevels, use_headers, path + ): + def roundtrip(data, header=True, parser_hdr=0, index=True): + data.to_excel(path, header=header, merge_cells=merge_cells, index=index) + + with ExcelFile(path) as xf: + return pd.read_excel( + xf, sheet_name=xf.sheet_names[0], header=parser_hdr + ) + + # Basic test. + parser_header = 0 if use_headers else None + res = roundtrip(DataFrame([0]), use_headers, parser_header) + + assert res.shape == (1, 2) + assert res.iloc[0, 0] is not np.nan + + # More complex tests with multi-index. + nrows = 5 + ncols = 3 + + # ensure limited functionality in 0.10 + # override of gh-2370 until sorted out in 0.11 + + df = tm.makeCustomDataframe( + nrows, ncols, r_idx_nlevels=r_idx_nlevels, c_idx_nlevels=c_idx_nlevels + ) + + # This if will be removed once multi-column Excel writing + # is implemented. For now fixing gh-9794. + if c_idx_nlevels > 1: + msg = ( + "Writing to Excel with MultiIndex columns and no index " + "\\('index'=False\\) is not yet implemented." + ) + with pytest.raises(NotImplementedError, match=msg): + roundtrip(df, use_headers, index=False) + else: + res = roundtrip(df, use_headers) + + if use_headers: + assert res.shape == (nrows, ncols + r_idx_nlevels) + else: + # First row taken as columns. + assert res.shape == (nrows - 1, ncols + r_idx_nlevels) + + # No NaNs. + for r in range(len(res.index)): + for c in range(len(res.columns)): + assert res.iloc[r, c] is not np.nan + + def test_duplicated_columns(self, path): + # see gh-5235 + df = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B"]) + df.to_excel(path, "test1") + expected = DataFrame( + [[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B.1"] + ) + + # By default, we mangle. + result = pd.read_excel(path, sheet_name="test1", index_col=0) + tm.assert_frame_equal(result, expected) + + # Explicitly, we pass in the parameter. + result = pd.read_excel( + path, sheet_name="test1", index_col=0, mangle_dupe_cols=True + ) + tm.assert_frame_equal(result, expected) + + # see gh-11007, gh-10970 + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A", "B"]) + df.to_excel(path, "test1") + + result = pd.read_excel(path, sheet_name="test1", index_col=0) + expected = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A.1", "B.1"] + ) + tm.assert_frame_equal(result, expected) + + # see gh-10982 + df.to_excel(path, "test1", index=False, header=False) + result = pd.read_excel(path, sheet_name="test1", header=None) + + expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) + tm.assert_frame_equal(result, expected) + + msg = "Setting mangle_dupe_cols=False is not supported yet" + with pytest.raises(ValueError, match=msg): + pd.read_excel(path, sheet_name="test1", header=None, mangle_dupe_cols=False) + + def test_swapped_columns(self, path): + # Test for issue #5427. + write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]}) + write_frame.to_excel(path, "test1", columns=["B", "A"]) + + read_frame = pd.read_excel(path, sheet_name="test1", header=0) + + tm.assert_series_equal(write_frame["A"], read_frame["A"]) + tm.assert_series_equal(write_frame["B"], read_frame["B"]) + + def test_invalid_columns(self, path): + # see gh-10982 + write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]}) + + with pytest.raises(KeyError, match="Not all names specified"): + write_frame.to_excel(path, "test1", columns=["B", "C"]) + + with pytest.raises( + KeyError, match="'passes columns are not ALL present dataframe'" + ): + write_frame.to_excel(path, "test1", columns=["C", "D"]) + + @pytest.mark.parametrize( + "to_excel_index,read_excel_index_col", + [ + (True, 0), # Include index in write to file + (False, None), # Dont include index in write to file + ], + ) + def test_write_subset_columns(self, path, to_excel_index, read_excel_index_col): + # GH 31677 + write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2], "C": [3, 3, 3]}) + write_frame.to_excel( + path, "col_subset_bug", columns=["A", "B"], index=to_excel_index + ) + + expected = write_frame[["A", "B"]] + read_frame = pd.read_excel( + path, sheet_name="col_subset_bug", index_col=read_excel_index_col + ) + + tm.assert_frame_equal(expected, read_frame) + + def test_comment_arg(self, path): + # see gh-18735 + # + # Test the comment argument functionality to pd.read_excel. + + # Create file to read in. + df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]}) + df.to_excel(path, "test_c") + + # Read file without comment arg. + result1 = pd.read_excel(path, sheet_name="test_c", index_col=0) + + result1.iloc[1, 0] = None + result1.iloc[1, 1] = None + result1.iloc[2, 1] = None + + result2 = pd.read_excel(path, sheet_name="test_c", comment="#", index_col=0) + tm.assert_frame_equal(result1, result2) + + def test_comment_default(self, path): + # Re issue #18735 + # Test the comment argument default to pd.read_excel + + # Create file to read in + df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]}) + df.to_excel(path, "test_c") + + # Read file with default and explicit comment=None + result1 = pd.read_excel(path, sheet_name="test_c") + result2 = pd.read_excel(path, sheet_name="test_c", comment=None) + tm.assert_frame_equal(result1, result2) + + def test_comment_used(self, path): + # see gh-18735 + # + # Test the comment argument is working as expected when used. + + # Create file to read in. + df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]}) + df.to_excel(path, "test_c") + + # Test read_frame_comment against manually produced expected output. + expected = DataFrame({"A": ["one", None, "one"], "B": ["two", None, None]}) + result = pd.read_excel(path, sheet_name="test_c", comment="#", index_col=0) + tm.assert_frame_equal(result, expected) + + def test_comment_empty_line(self, path): + # Re issue #18735 + # Test that pd.read_excel ignores commented lines at the end of file + + df = DataFrame({"a": ["1", "#2"], "b": ["2", "3"]}) + df.to_excel(path, index=False) + + # Test that all-comment lines at EoF are ignored + expected = DataFrame({"a": [1], "b": [2]}) + result = pd.read_excel(path, comment="#") + tm.assert_frame_equal(result, expected) + + def test_datetimes(self, path): + + # Test writing and reading datetimes. For issue #9139. (xref #9185) + datetimes = [ + datetime(2013, 1, 13, 1, 2, 3), + datetime(2013, 1, 13, 2, 45, 56), + datetime(2013, 1, 13, 4, 29, 49), + datetime(2013, 1, 13, 6, 13, 42), + datetime(2013, 1, 13, 7, 57, 35), + datetime(2013, 1, 13, 9, 41, 28), + datetime(2013, 1, 13, 11, 25, 21), + datetime(2013, 1, 13, 13, 9, 14), + datetime(2013, 1, 13, 14, 53, 7), + datetime(2013, 1, 13, 16, 37, 0), + datetime(2013, 1, 13, 18, 20, 52), + ] + + write_frame = DataFrame({"A": datetimes}) + write_frame.to_excel(path, "Sheet1") + if path.endswith("xlsx") or path.endswith("xlsm"): + pytest.skip( + "Defaults to openpyxl and fails with floating point error on " + "datetimes; may be fixed on newer versions of openpyxl - GH #38644" + ) + read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0) + + tm.assert_series_equal(write_frame["A"], read_frame["A"]) + + def test_bytes_io(self, engine): + # see gh-7074 + with BytesIO() as bio: + df = DataFrame(np.random.randn(10, 2)) + + # Pass engine explicitly, as there is no file path to infer from. + with ExcelWriter(bio, engine=engine) as writer: + df.to_excel(writer) + + bio.seek(0) + reread_df = pd.read_excel(bio, index_col=0) + tm.assert_frame_equal(df, reread_df) + + def test_write_lists_dict(self, path): + # see gh-8188. + df = DataFrame( + { + "mixed": ["a", ["b", "c"], {"d": "e", "f": 2}], + "numeric": [1, 2, 3.0], + "str": ["apple", "banana", "cherry"], + } + ) + df.to_excel(path, "Sheet1") + read = pd.read_excel(path, sheet_name="Sheet1", header=0, index_col=0) + + expected = df.copy() + expected.mixed = expected.mixed.apply(str) + expected.numeric = expected.numeric.astype("int64") + + tm.assert_frame_equal(read, expected) + + def test_render_as_column_name(self, path): + # see gh-34331 + df = DataFrame({"render": [1, 2], "data": [3, 4]}) + df.to_excel(path, "Sheet1") + read = pd.read_excel(path, "Sheet1", index_col=0) + expected = df + tm.assert_frame_equal(read, expected) + + def test_true_and_false_value_options(self, path): + # see gh-13347 + df = DataFrame([["foo", "bar"]], columns=["col1", "col2"]) + expected = df.replace({"foo": True, "bar": False}) + + df.to_excel(path) + read_frame = pd.read_excel( + path, true_values=["foo"], false_values=["bar"], index_col=0 + ) + tm.assert_frame_equal(read_frame, expected) + + def test_freeze_panes(self, path): + # see gh-15160 + expected = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"]) + expected.to_excel(path, "Sheet1", freeze_panes=(1, 1)) + + result = pd.read_excel(path, index_col=0) + tm.assert_frame_equal(result, expected) + + def test_path_path_lib(self, engine, ext): + df = tm.makeDataFrame() + writer = partial(df.to_excel, engine=engine) + + reader = partial(pd.read_excel, index_col=0) + result = tm.round_trip_pathlib(writer, reader, path=f"foo{ext}") + tm.assert_frame_equal(result, df) + + def test_path_local_path(self, engine, ext): + df = tm.makeDataFrame() + writer = partial(df.to_excel, engine=engine) + + reader = partial(pd.read_excel, index_col=0) + result = tm.round_trip_localpath(writer, reader, path=f"foo{ext}") + tm.assert_frame_equal(result, df) + + def test_merged_cell_custom_objects(self, merge_cells, path): + # see GH-27006 + mi = MultiIndex.from_tuples( + [ + (pd.Period("2018"), pd.Period("2018Q1")), + (pd.Period("2018"), pd.Period("2018Q2")), + ] + ) + expected = DataFrame(np.ones((2, 2)), columns=mi) + expected.to_excel(path) + with tm.assert_produces_warning( + FutureWarning, match="convert_float is deprecated" + ): + result = pd.read_excel( + path, header=[0, 1], index_col=0, convert_float=False + ) + # need to convert PeriodIndexes to standard Indexes for assert equal + expected.columns = expected.columns.set_levels( + [[str(i) for i in mi.levels[0]], [str(i) for i in mi.levels[1]]], + level=[0, 1], + ) + expected.index = expected.index.astype(np.float64) + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize("dtype", [None, object]) + def test_raise_when_saving_timezones(self, dtype, tz_aware_fixture, path): + # GH 27008, GH 7056 + tz = tz_aware_fixture + data = pd.Timestamp("2019", tz=tz) + df = DataFrame([data], dtype=dtype) + with pytest.raises(ValueError, match="Excel does not support"): + df.to_excel(path) + + data = data.to_pydatetime() + df = DataFrame([data], dtype=dtype) + with pytest.raises(ValueError, match="Excel does not support"): + df.to_excel(path) + + def test_excel_duplicate_columns_with_names(self, path): + # GH#39695 + df = DataFrame({"A": [0, 1], "B": [10, 11]}) + df.to_excel(path, columns=["A", "B", "A"], index=False) + + result = pd.read_excel(path) + expected = DataFrame([[0, 10, 0], [1, 11, 1]], columns=["A", "B", "A.1"]) + tm.assert_frame_equal(result, expected) + + def test_if_sheet_exists_raises(self, ext): + # GH 40230 + msg = "if_sheet_exists is only valid in append mode (mode='a')" + + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=re.escape(msg)): + ExcelWriter(f, if_sheet_exists="replace") + + +class TestExcelWriterEngineTests: + @pytest.mark.parametrize( + "klass,ext", + [ + pytest.param(_XlsxWriter, ".xlsx", marks=td.skip_if_no("xlsxwriter")), + pytest.param(_OpenpyxlWriter, ".xlsx", marks=td.skip_if_no("openpyxl")), + pytest.param(_XlwtWriter, ".xls", marks=td.skip_if_no("xlwt")), + ], + ) + def test_ExcelWriter_dispatch(self, klass, ext): + with tm.ensure_clean(ext) as path: + with ExcelWriter(path) as writer: + if ext == ".xlsx" and td.safe_import("xlsxwriter"): + # xlsxwriter has preference over openpyxl if both installed + assert isinstance(writer, _XlsxWriter) + else: + assert isinstance(writer, klass) + + def test_ExcelWriter_dispatch_raises(self): + with pytest.raises(ValueError, match="No engine"): + ExcelWriter("nothing") + + def test_register_writer(self): + # some awkward mocking to test out dispatch and such actually works + called_save = [] + called_write_cells = [] + + class DummyClass(ExcelWriter): + called_save = False + called_write_cells = False + supported_extensions = ["xlsx", "xls"] + engine = "dummy" + + def save(self): + called_save.append(True) + + def write_cells(self, *args, **kwargs): + called_write_cells.append(True) + + def check_called(func): + func() + assert len(called_save) >= 1 + assert len(called_write_cells) >= 1 + del called_save[:] + del called_write_cells[:] + + with pd.option_context("io.excel.xlsx.writer", "dummy"): + path = "something.xlsx" + with tm.ensure_clean(path) as filepath: + register_writer(DummyClass) + with ExcelWriter(filepath) as writer: + assert isinstance(writer, DummyClass) + df = tm.makeCustomDataframe(1, 1) + check_called(lambda: df.to_excel(filepath)) + with tm.ensure_clean("something.xls") as filepath: + check_called(lambda: df.to_excel(filepath, engine="dummy")) + + @pytest.mark.parametrize( + "ext", + [ + pytest.param(".xlsx", marks=td.skip_if_no("xlsxwriter")), + pytest.param(".xlsx", marks=td.skip_if_no("openpyxl")), + pytest.param(".ods", marks=td.skip_if_no("odf")), + ], + ) + def test_engine_kwargs_and_kwargs_raises(self, ext): + # GH 40430 + msg = re.escape("Cannot use both engine_kwargs and **kwargs") + with pytest.raises(ValueError, match=msg): + with ExcelWriter("", engine_kwargs={"a": 1}, b=2): + pass + + +@td.skip_if_no("xlrd") +@td.skip_if_no("openpyxl") +class TestFSPath: + def test_excelfile_fspath(self): + with tm.ensure_clean("foo.xlsx") as path: + df = DataFrame({"A": [1, 2]}) + df.to_excel(path) + with ExcelFile(path) as xl: + result = os.fspath(xl) + assert result == path + + def test_excelwriter_fspath(self): + with tm.ensure_clean("foo.xlsx") as path: + with ExcelWriter(path) as writer: + assert os.fspath(writer) == str(path) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_xlrd.py b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_xlrd.py new file mode 100644 index 0000000..2309187 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_xlrd.py @@ -0,0 +1,109 @@ +import io + +import pytest + +from pandas.compat._optional import import_optional_dependency + +import pandas as pd +import pandas._testing as tm +from pandas.tests.io.excel import xlrd_version +from pandas.util.version import Version + +from pandas.io.excel import ExcelFile +from pandas.io.excel._base import inspect_excel_format + +xlrd = pytest.importorskip("xlrd") +xlwt = pytest.importorskip("xlwt") + +pytestmark = pytest.mark.filterwarnings( + "ignore:As the xlwt package is no longer maintained:FutureWarning" +) + + +# error: Unsupported operand types for <= ("Version" and "None") +if xlrd_version >= Version("2"): # type: ignore[operator] + exts = [".xls"] +else: + exts = [".xls", ".xlsx", ".xlsm"] + + +@pytest.fixture(params=exts) +def read_ext_xlrd(request): + """ + Valid extensions for reading Excel files with xlrd. + + Similar to read_ext, but excludes .ods, .xlsb, and for xlrd>2 .xlsx, .xlsm + """ + return request.param + + +def test_read_xlrd_book(read_ext_xlrd, frame): + df = frame + + engine = "xlrd" + sheet_name = "SheetA" + + with tm.ensure_clean(read_ext_xlrd) as pth: + df.to_excel(pth, sheet_name) + book = xlrd.open_workbook(pth) + + with ExcelFile(book, engine=engine) as xl: + result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0) + tm.assert_frame_equal(df, result) + + result = pd.read_excel(book, sheet_name=sheet_name, engine=engine, index_col=0) + tm.assert_frame_equal(df, result) + + +def test_excel_file_warning_with_xlsx_file(datapath): + # GH 29375 + path = datapath("io", "data", "excel", "test1.xlsx") + has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None + if not has_openpyxl: + with tm.assert_produces_warning( + FutureWarning, + raise_on_extra_warnings=False, + match="The xlrd engine is no longer maintained", + ): + ExcelFile(path, engine=None) + else: + with tm.assert_produces_warning(None): + pd.read_excel(path, "Sheet1", engine=None) + + +def test_read_excel_warning_with_xlsx_file(datapath): + # GH 29375 + path = datapath("io", "data", "excel", "test1.xlsx") + has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None + if not has_openpyxl: + if xlrd_version >= Version("2"): + with pytest.raises( + ValueError, + match="Your version of xlrd is ", + ): + pd.read_excel(path, "Sheet1", engine=None) + else: + with tm.assert_produces_warning( + FutureWarning, + raise_on_extra_warnings=False, + match="The xlrd engine is no longer maintained", + ): + pd.read_excel(path, "Sheet1", engine=None) + else: + with tm.assert_produces_warning(None): + pd.read_excel(path, "Sheet1", engine=None) + + +@pytest.mark.parametrize( + "file_header", + [ + b"\x09\x00\x04\x00\x07\x00\x10\x00", + b"\x09\x02\x06\x00\x00\x00\x10\x00", + b"\x09\x04\x06\x00\x00\x00\x10\x00", + b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1", + ], +) +def test_read_old_xls_files(file_header): + # GH 41226 + f = io.BytesIO(file_header) + assert inspect_excel_format(f) == "xls" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_xlsxwriter.py b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_xlsxwriter.py new file mode 100644 index 0000000..79d2f55 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_xlsxwriter.py @@ -0,0 +1,84 @@ +import re +import warnings + +import pytest + +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.excel import ExcelWriter + +xlsxwriter = pytest.importorskip("xlsxwriter") + +pytestmark = pytest.mark.parametrize("ext", [".xlsx"]) + + +def test_column_format(ext): + # Test that column formats are applied to cells. Test for issue #9167. + # Applicable to xlsxwriter only. + with warnings.catch_warnings(): + # Ignore the openpyxl lxml warning. + warnings.simplefilter("ignore") + openpyxl = pytest.importorskip("openpyxl") + + with tm.ensure_clean(ext) as path: + frame = DataFrame({"A": [123456, 123456], "B": [123456, 123456]}) + + with ExcelWriter(path) as writer: + frame.to_excel(writer) + + # Add a number format to col B and ensure it is applied to cells. + num_format = "#,##0" + write_workbook = writer.book + write_worksheet = write_workbook.worksheets()[0] + col_format = write_workbook.add_format({"num_format": num_format}) + write_worksheet.set_column("B:B", None, col_format) + + read_workbook = openpyxl.load_workbook(path) + try: + read_worksheet = read_workbook["Sheet1"] + except TypeError: + # compat + read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1") + + # Get the number format from the cell. + try: + cell = read_worksheet["B2"] + except TypeError: + # compat + cell = read_worksheet.cell("B2") + + try: + read_num_format = cell.number_format + except AttributeError: + read_num_format = cell.style.number_format._format_code + + assert read_num_format == num_format + + +def test_write_append_mode_raises(ext): + msg = "Append mode is not supported with xlsxwriter!" + + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=msg): + ExcelWriter(f, engine="xlsxwriter", mode="a") + + +@pytest.mark.parametrize("nan_inf_to_errors", [True, False]) +def test_kwargs(ext, nan_inf_to_errors): + # GH 42286 + kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}} + with tm.ensure_clean(ext) as f: + msg = re.escape("Use of **kwargs is deprecated") + with tm.assert_produces_warning(FutureWarning, match=msg): + with ExcelWriter(f, engine="xlsxwriter", **kwargs) as writer: + assert writer.book.nan_inf_to_errors == nan_inf_to_errors + + +@pytest.mark.parametrize("nan_inf_to_errors", [True, False]) +def test_engine_kwargs(ext, nan_inf_to_errors): + # GH 42286 + engine_kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}} + with tm.ensure_clean(ext) as f: + with ExcelWriter(f, engine="xlsxwriter", engine_kwargs=engine_kwargs) as writer: + assert writer.book.nan_inf_to_errors == nan_inf_to_errors diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_xlwt.py b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_xlwt.py new file mode 100644 index 0000000..ec333de --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/excel/test_xlwt.py @@ -0,0 +1,127 @@ +import re + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + options, +) +import pandas._testing as tm + +from pandas.io.excel import ( + ExcelWriter, + _XlwtWriter, +) + +xlwt = pytest.importorskip("xlwt") + +pytestmark = pytest.mark.parametrize("ext,", [".xls"]) + + +def test_excel_raise_error_on_multiindex_columns_and_no_index(ext): + # MultiIndex as columns is not yet implemented 9794 + cols = MultiIndex.from_tuples( + [("site", ""), ("2014", "height"), ("2014", "weight")] + ) + df = DataFrame(np.random.randn(10, 3), columns=cols) + + msg = ( + "Writing to Excel with MultiIndex columns and no index " + "\\('index'=False\\) is not yet implemented." + ) + with pytest.raises(NotImplementedError, match=msg): + with tm.ensure_clean(ext) as path: + df.to_excel(path, index=False) + + +def test_excel_multiindex_columns_and_index_true(ext): + cols = MultiIndex.from_tuples( + [("site", ""), ("2014", "height"), ("2014", "weight")] + ) + df = DataFrame(np.random.randn(10, 3), columns=cols) + with tm.ensure_clean(ext) as path: + df.to_excel(path, index=True) + + +def test_excel_multiindex_index(ext): + # MultiIndex as index works so assert no error #9794 + cols = MultiIndex.from_tuples( + [("site", ""), ("2014", "height"), ("2014", "weight")] + ) + df = DataFrame(np.random.randn(3, 10), index=cols) + with tm.ensure_clean(ext) as path: + df.to_excel(path, index=False) + + +def test_to_excel_styleconverter(ext): + hstyle = { + "font": {"bold": True}, + "borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"}, + "alignment": {"horizontal": "center", "vertical": "top"}, + } + + xls_style = _XlwtWriter._convert_to_style(hstyle) + assert xls_style.font.bold + assert xlwt.Borders.THIN == xls_style.borders.top + assert xlwt.Borders.THIN == xls_style.borders.right + assert xlwt.Borders.THIN == xls_style.borders.bottom + assert xlwt.Borders.THIN == xls_style.borders.left + assert xlwt.Alignment.HORZ_CENTER == xls_style.alignment.horz + assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert + + +def test_write_append_mode_raises(ext): + msg = "Append mode is not supported with xlwt!" + + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=msg): + ExcelWriter(f, engine="xlwt", mode="a") + + +def test_to_excel_xlwt_warning(ext): + # GH 26552 + df = DataFrame(np.random.randn(3, 10)) + with tm.ensure_clean(ext) as path: + with tm.assert_produces_warning( + FutureWarning, + match="As the xlwt package is no longer maintained", + ): + df.to_excel(path) + + +def test_option_xls_writer_deprecated(ext): + # GH 26552 + with tm.assert_produces_warning( + FutureWarning, + match="As the xlwt package is no longer maintained", + check_stacklevel=False, + ): + options.io.excel.xls.writer = "xlwt" + + +@pytest.mark.parametrize("style_compression", [0, 2]) +def test_kwargs(ext, style_compression): + # GH 42286 + kwargs = {"style_compression": style_compression} + with tm.ensure_clean(ext) as f: + msg = re.escape("Use of **kwargs is deprecated") + with tm.assert_produces_warning(FutureWarning, match=msg): + with ExcelWriter(f, engine="xlwt", **kwargs) as writer: + assert ( + writer.book._Workbook__styles.style_compression == style_compression + ) + # xlwt won't allow us to close without writing something + DataFrame().to_excel(writer) + + +@pytest.mark.parametrize("style_compression", [0, 2]) +def test_engine_kwargs(ext, style_compression): + # GH 42286 + engine_kwargs = {"style_compression": style_compression} + with tm.ensure_clean(ext) as f: + with ExcelWriter(f, engine="xlwt", engine_kwargs=engine_kwargs) as writer: + assert writer.book._Workbook__styles.style_compression == style_compression + # xlwt won't allow us to close without writing something + DataFrame().to_excel(writer) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ff259c2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_console.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_console.cpython-38.pyc new file mode 100644 index 0000000..b3917ed Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_console.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_css.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_css.cpython-38.pyc new file mode 100644 index 0000000..d365996 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_css.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_eng_formatting.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_eng_formatting.cpython-38.pyc new file mode 100644 index 0000000..6b5480a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_eng_formatting.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_format.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_format.cpython-38.pyc new file mode 100644 index 0000000..057649c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_format.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_info.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_info.cpython-38.pyc new file mode 100644 index 0000000..5b3809a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_info.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_printing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_printing.cpython-38.pyc new file mode 100644 index 0000000..6f99264 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_printing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_series_info.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_series_info.cpython-38.pyc new file mode 100644 index 0000000..61ee4d7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_series_info.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_csv.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_csv.cpython-38.pyc new file mode 100644 index 0000000..13571d3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_csv.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_excel.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_excel.cpython-38.pyc new file mode 100644 index 0000000..b88021a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_excel.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_html.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_html.cpython-38.pyc new file mode 100644 index 0000000..f0b42c6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_html.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_latex.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_latex.cpython-38.pyc new file mode 100644 index 0000000..3a2d395 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_latex.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_markdown.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_markdown.cpython-38.pyc new file mode 100644 index 0000000..0a97f29 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_markdown.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_string.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_string.cpython-38.pyc new file mode 100644 index 0000000..d450a4e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/__pycache__/test_to_string.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..6ff4508 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_bar.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_bar.cpython-38.pyc new file mode 100644 index 0000000..38a02a5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_bar.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_deprecated.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_deprecated.cpython-38.pyc new file mode 100644 index 0000000..07dd3c4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_deprecated.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_format.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_format.cpython-38.pyc new file mode 100644 index 0000000..796ef94 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_format.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_highlight.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_highlight.cpython-38.pyc new file mode 100644 index 0000000..c3c88ba Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_highlight.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_html.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_html.cpython-38.pyc new file mode 100644 index 0000000..b93ba74 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_html.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_matplotlib.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_matplotlib.cpython-38.pyc new file mode 100644 index 0000000..57d3310 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_matplotlib.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_non_unique.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_non_unique.cpython-38.pyc new file mode 100644 index 0000000..36b8ecb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_non_unique.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_style.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_style.cpython-38.pyc new file mode 100644 index 0000000..4fe1bd7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_style.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_to_latex.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_to_latex.cpython-38.pyc new file mode 100644 index 0000000..daebc75 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_to_latex.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_tooltip.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_tooltip.cpython-38.pyc new file mode 100644 index 0000000..7d56a87 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/__pycache__/test_tooltip.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_bar.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_bar.py new file mode 100644 index 0000000..19884aa --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_bar.py @@ -0,0 +1,307 @@ +import numpy as np +import pytest + +from pandas import DataFrame + +pytest.importorskip("jinja2") + + +def bar_grad(a=None, b=None, c=None, d=None): + """Used in multiple tests to simplify formatting of expected result""" + ret = [("width", "10em")] + if all(x is None for x in [a, b, c, d]): + return ret + return ret + [ + ( + "background", + f"linear-gradient(90deg,{','.join([x for x in [a, b, c, d] if x])})", + ) + ] + + +def no_bar(): + return bar_grad() + + +def bar_to(x, color="#d65f5f"): + return bar_grad(f" {color} {x:.1f}%", f" transparent {x:.1f}%") + + +def bar_from_to(x, y, color="#d65f5f"): + return bar_grad( + f" transparent {x:.1f}%", + f" {color} {x:.1f}%", + f" {color} {y:.1f}%", + f" transparent {y:.1f}%", + ) + + +@pytest.fixture +def df_pos(): + return DataFrame([[1], [2], [3]]) + + +@pytest.fixture +def df_neg(): + return DataFrame([[-1], [-2], [-3]]) + + +@pytest.fixture +def df_mix(): + return DataFrame([[-3], [1], [2]]) + + +@pytest.mark.parametrize( + "align, exp", + [ + ("left", [no_bar(), bar_to(50), bar_to(100)]), + ("right", [bar_to(100), bar_from_to(50, 100), no_bar()]), + ("mid", [bar_to(33.33), bar_to(66.66), bar_to(100)]), + ("zero", [bar_from_to(50, 66.7), bar_from_to(50, 83.3), bar_from_to(50, 100)]), + ("mean", [bar_to(50), no_bar(), bar_from_to(50, 100)]), + (2.0, [bar_to(50), no_bar(), bar_from_to(50, 100)]), + (np.median, [bar_to(50), no_bar(), bar_from_to(50, 100)]), + ], +) +def test_align_positive_cases(df_pos, align, exp): + # test different align cases for all positive values + result = df_pos.style.bar(align=align)._compute().ctx + expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]} + assert result == expected + + +@pytest.mark.parametrize( + "align, exp", + [ + ("left", [bar_to(100), bar_to(50), no_bar()]), + ("right", [no_bar(), bar_from_to(50, 100), bar_to(100)]), + ("mid", [bar_from_to(66.66, 100), bar_from_to(33.33, 100), bar_to(100)]), + ("zero", [bar_from_to(33.33, 50), bar_from_to(16.66, 50), bar_to(50)]), + ("mean", [bar_from_to(50, 100), no_bar(), bar_to(50)]), + (-2.0, [bar_from_to(50, 100), no_bar(), bar_to(50)]), + (np.median, [bar_from_to(50, 100), no_bar(), bar_to(50)]), + ], +) +def test_align_negative_cases(df_neg, align, exp): + # test different align cases for all negative values + result = df_neg.style.bar(align=align)._compute().ctx + expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]} + assert result == expected + + +@pytest.mark.parametrize( + "align, exp", + [ + ("left", [no_bar(), bar_to(80), bar_to(100)]), + ("right", [bar_to(100), bar_from_to(80, 100), no_bar()]), + ("mid", [bar_to(60), bar_from_to(60, 80), bar_from_to(60, 100)]), + ("zero", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]), + ("mean", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]), + (-0.0, [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]), + (np.nanmedian, [bar_to(50), no_bar(), bar_from_to(50, 62.5)]), + ], +) +@pytest.mark.parametrize("nans", [True, False]) +def test_align_mixed_cases(df_mix, align, exp, nans): + # test different align cases for mixed positive and negative values + # also test no impact of NaNs and no_bar + expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]} + if nans: + df_mix.loc[3, :] = np.nan + expected.update({(3, 0): no_bar()}) + result = df_mix.style.bar(align=align)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "align, exp", + [ + ( + "left", + { + "index": [[no_bar(), no_bar()], [bar_to(100), bar_to(100)]], + "columns": [[no_bar(), bar_to(100)], [no_bar(), bar_to(100)]], + "none": [[no_bar(), bar_to(33.33)], [bar_to(66.66), bar_to(100)]], + }, + ), + ( + "mid", + { + "index": [[bar_to(33.33), bar_to(50)], [bar_to(100), bar_to(100)]], + "columns": [[bar_to(50), bar_to(100)], [bar_to(75), bar_to(100)]], + "none": [[bar_to(25), bar_to(50)], [bar_to(75), bar_to(100)]], + }, + ), + ( + "zero", + { + "index": [ + [bar_from_to(50, 66.66), bar_from_to(50, 75)], + [bar_from_to(50, 100), bar_from_to(50, 100)], + ], + "columns": [ + [bar_from_to(50, 75), bar_from_to(50, 100)], + [bar_from_to(50, 87.5), bar_from_to(50, 100)], + ], + "none": [ + [bar_from_to(50, 62.5), bar_from_to(50, 75)], + [bar_from_to(50, 87.5), bar_from_to(50, 100)], + ], + }, + ), + ( + 2, + { + "index": [ + [bar_to(50), no_bar()], + [bar_from_to(50, 100), bar_from_to(50, 100)], + ], + "columns": [ + [bar_to(50), no_bar()], + [bar_from_to(50, 75), bar_from_to(50, 100)], + ], + "none": [ + [bar_from_to(25, 50), no_bar()], + [bar_from_to(50, 75), bar_from_to(50, 100)], + ], + }, + ), + ], +) +@pytest.mark.parametrize("axis", ["index", "columns", "none"]) +def test_align_axis(align, exp, axis): + # test all axis combinations with positive values and different aligns + data = DataFrame([[1, 2], [3, 4]]) + result = ( + data.style.bar(align=align, axis=None if axis == "none" else axis) + ._compute() + .ctx + ) + expected = { + (0, 0): exp[axis][0][0], + (0, 1): exp[axis][0][1], + (1, 0): exp[axis][1][0], + (1, 1): exp[axis][1][1], + } + assert result == expected + + +@pytest.mark.parametrize( + "values, vmin, vmax", + [ + ("positive", 1.5, 2.5), + ("negative", -2.5, -1.5), + ("mixed", -2.5, 1.5), + ], +) +@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"]) # test min/max separately +@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"]) +def test_vmin_vmax_clipping(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align): + # test that clipping occurs if any vmin > data_values or vmax < data_values + if align == "mid": # mid acts as left or right in each case + if values == "positive": + align = "left" + elif values == "negative": + align = "right" + df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values] + vmin = None if nullify == "vmin" else vmin + vmax = None if nullify == "vmax" else vmax + + clip_df = df.where(df <= (vmax if vmax else 999), other=vmax) + clip_df = clip_df.where(clip_df >= (vmin if vmin else -999), other=vmin) + + result = ( + df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"]) + ._compute() + .ctx + ) + expected = clip_df.style.bar(align=align, color=["red", "green"])._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "values, vmin, vmax", + [ + ("positive", 0.5, 4.5), + ("negative", -4.5, -0.5), + ("mixed", -4.5, 4.5), + ], +) +@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"]) # test min/max separately +@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"]) +def test_vmin_vmax_widening(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align): + # test that widening occurs if any vmax > data_values or vmin < data_values + if align == "mid": # mid acts as left or right in each case + if values == "positive": + align = "left" + elif values == "negative": + align = "right" + df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values] + vmin = None if nullify == "vmin" else vmin + vmax = None if nullify == "vmax" else vmax + + expand_df = df.copy() + expand_df.loc[3, :], expand_df.loc[4, :] = vmin, vmax + + result = ( + df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"]) + ._compute() + .ctx + ) + expected = expand_df.style.bar(align=align, color=["red", "green"])._compute().ctx + assert result.items() <= expected.items() + + +def test_numerics(): + # test data is pre-selected for numeric values + data = DataFrame([[1, "a"], [2, "b"]]) + result = data.style.bar()._compute().ctx + assert (0, 1) not in result + assert (1, 1) not in result + + +@pytest.mark.parametrize( + "align, exp", + [ + ("left", [no_bar(), bar_to(100, "green")]), + ("right", [bar_to(100, "red"), no_bar()]), + ("mid", [bar_to(25, "red"), bar_from_to(25, 100, "green")]), + ("zero", [bar_from_to(33.33, 50, "red"), bar_from_to(50, 100, "green")]), + ], +) +def test_colors_mixed(align, exp): + data = DataFrame([[-1], [3]]) + result = data.style.bar(align=align, color=["red", "green"])._compute().ctx + assert result == {(0, 0): exp[0], (1, 0): exp[1]} + + +def test_bar_align_height(): + # test when keyword height is used 'no-repeat center' and 'background-size' present + data = DataFrame([[1], [2]]) + result = data.style.bar(align="left", height=50)._compute().ctx + bg_s = "linear-gradient(90deg, #d65f5f 100.0%, transparent 100.0%) no-repeat center" + expected = { + (0, 0): [("width", "10em")], + (1, 0): [ + ("width", "10em"), + ("background", bg_s), + ("background-size", "100% 50.0%"), + ], + } + assert result == expected + + +def test_bar_value_error_raises(): + df = DataFrame({"A": [-100, -60, -30, -20]}) + + msg = "`align` should be in {'left', 'right', 'mid', 'mean', 'zero'} or" + with pytest.raises(ValueError, match=msg): + df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]).to_html() + + msg = r"`width` must be a value in \[0, 100\]" + with pytest.raises(ValueError, match=msg): + df.style.bar(width=200).to_html() + + msg = r"`height` must be a value in \[0, 100\]" + with pytest.raises(ValueError, match=msg): + df.style.bar(height=200).to_html() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_deprecated.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_deprecated.py new file mode 100644 index 0000000..9c96e3c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_deprecated.py @@ -0,0 +1,165 @@ +""" +modules collects tests for Styler methods which have been deprecated +""" +import numpy as np +import pytest + +jinja2 = pytest.importorskip("jinja2") + +from pandas import ( + DataFrame, + IndexSlice, + NaT, + Timestamp, +) +import pandas._testing as tm + + +@pytest.fixture +def df(): + return DataFrame({"A": [0, 1], "B": np.random.randn(2)}) + + +@pytest.mark.parametrize("axis", ["index", "columns"]) +def test_hide_index_columns(df, axis): + with tm.assert_produces_warning(FutureWarning): + getattr(df.style, "hide_" + axis)() + + +def test_set_non_numeric_na(): + # GH 21527 28358 + df = DataFrame( + { + "object": [None, np.nan, "foo"], + "datetime": [None, NaT, Timestamp("20120101")], + } + ) + + with tm.assert_produces_warning(FutureWarning): + ctx = df.style.set_na_rep("NA")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "NA" + assert ctx["body"][1][1]["display_value"] == "NA" + assert ctx["body"][1][2]["display_value"] == "NA" + + +def test_where_with_one_style(df): + # GH 17474 + def f(x): + return x > 0.5 + + style1 = "foo: bar" + + with tm.assert_produces_warning(FutureWarning): + result = df.style.where(f, style1)._compute().ctx + expected = { + (r, c): [("foo", "bar")] + for r, row in enumerate(df.index) + for c, col in enumerate(df.columns) + if f(df.loc[row, col]) + } + assert result == expected + + +@pytest.mark.parametrize( + "slice_", + [ + IndexSlice[:], + IndexSlice[:, ["A"]], + IndexSlice[[1], :], + IndexSlice[[1], ["A"]], + IndexSlice[:2, ["A", "B"]], + ], +) +def test_where_subset(df, slice_): + # GH 17474 + def f(x): + return x > 0.5 + + style1 = "foo: bar" + style2 = "baz: foo" + + with tm.assert_produces_warning(FutureWarning): + res = df.style.where(f, style1, style2, subset=slice_)._compute().ctx + expected = { + (r, c): [("foo", "bar") if f(df.loc[row, col]) else ("baz", "foo")] + for r, row in enumerate(df.index) + for c, col in enumerate(df.columns) + if row in df.loc[slice_].index and col in df.loc[slice_].columns + } + assert res == expected + + +def test_where_subset_compare_with_applymap(df): + # GH 17474 + def f(x): + return x > 0.5 + + style1 = "foo: bar" + style2 = "baz: foo" + + def g(x): + return style1 if f(x) else style2 + + slices = [ + IndexSlice[:], + IndexSlice[:, ["A"]], + IndexSlice[[1], :], + IndexSlice[[1], ["A"]], + IndexSlice[:2, ["A", "B"]], + ] + + for slice_ in slices: + with tm.assert_produces_warning(FutureWarning): + result = df.style.where(f, style1, style2, subset=slice_)._compute().ctx + expected = df.style.applymap(g, subset=slice_)._compute().ctx + assert result == expected + + +def test_where_kwargs(): + df = DataFrame([[1, 2], [3, 4]]) + + def f(x, val): + return x > val + + with tm.assert_produces_warning(FutureWarning): + res = df.style.where(f, "color:green;", "color:red;", val=2)._compute().ctx + expected = { + (0, 0): [("color", "red")], + (0, 1): [("color", "red")], + (1, 0): [("color", "green")], + (1, 1): [("color", "green")], + } + assert res == expected + + +def test_set_na_rep(): + # GH 21527 28358 + df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + + with tm.assert_produces_warning(FutureWarning): + ctx = df.style.set_na_rep("NA")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "NA" + + with tm.assert_produces_warning(FutureWarning): + ctx = ( + df.style.set_na_rep("NA") + .format(None, na_rep="-", subset=["B"]) + ._translate(True, True) + ) + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "-" + + +def test_precision(df): + styler = df.style + with tm.assert_produces_warning(FutureWarning): + s2 = styler.set_precision(1) + assert styler is s2 + assert styler.precision == 1 + + +def test_render(df): + with tm.assert_produces_warning(FutureWarning): + df.style.render() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_format.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_format.py new file mode 100644 index 0000000..5207be9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_format.py @@ -0,0 +1,436 @@ +import numpy as np +import pytest + +from pandas import ( + NA, + DataFrame, + IndexSlice, + MultiIndex, + NaT, + Timestamp, + option_context, +) + +pytest.importorskip("jinja2") +from pandas.io.formats.style import Styler +from pandas.io.formats.style_render import _str_escape + + +@pytest.fixture +def df(): + return DataFrame( + data=[[0, -0.609], [1, -1.228]], + columns=["A", "B"], + index=["x", "y"], + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +def test_display_format(styler): + ctx = styler.format("{:0.1f}")._translate(True, True) + assert all(["display_value" in c for c in row] for row in ctx["body"]) + assert all([len(c["display_value"]) <= 3 for c in row[1:]] for row in ctx["body"]) + assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3 + + +@pytest.mark.parametrize("index", [True, False]) +@pytest.mark.parametrize("columns", [True, False]) +def test_display_format_index(styler, index, columns): + exp_index = ["x", "y"] + if index: + styler.format_index(lambda v: v.upper(), axis=0) # test callable + exp_index = ["X", "Y"] + + exp_columns = ["A", "B"] + if columns: + styler.format_index("*{}*", axis=1) # test string + exp_columns = ["*A*", "*B*"] + + ctx = styler._translate(True, True) + + for r, row in enumerate(ctx["body"]): + assert row[0]["display_value"] == exp_index[r] + + for c, col in enumerate(ctx["head"][1:]): + assert col["display_value"] == exp_columns[c] + + +def test_format_dict(styler): + ctx = styler.format({"A": "{:0.1f}", "B": "{0:.2%}"})._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "0.0" + assert ctx["body"][0][2]["display_value"] == "-60.90%" + + +def test_format_index_dict(styler): + ctx = styler.format_index({0: lambda v: v.upper()})._translate(True, True) + for i, val in enumerate(["X", "Y"]): + assert ctx["body"][i][0]["display_value"] == val + + +def test_format_string(styler): + ctx = styler.format("{:.2f}")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "0.00" + assert ctx["body"][0][2]["display_value"] == "-0.61" + assert ctx["body"][1][1]["display_value"] == "1.00" + assert ctx["body"][1][2]["display_value"] == "-1.23" + + +def test_format_callable(styler): + ctx = styler.format(lambda v: "neg" if v < 0 else "pos")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "pos" + assert ctx["body"][0][2]["display_value"] == "neg" + assert ctx["body"][1][1]["display_value"] == "pos" + assert ctx["body"][1][2]["display_value"] == "neg" + + +def test_format_with_na_rep(): + # GH 21527 28358 + df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + + ctx = df.style.format(None, na_rep="-")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + + ctx = df.style.format("{:.2%}", na_rep="-")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][1]["display_value"] == "110.00%" + assert ctx["body"][1][2]["display_value"] == "120.00%" + + ctx = df.style.format("{:.2%}", na_rep="-", subset=["B"])._translate(True, True) + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][2]["display_value"] == "120.00%" + + +def test_format_index_with_na_rep(): + df = DataFrame([[1, 2, 3, 4, 5]], columns=["A", None, np.nan, NaT, NA]) + ctx = df.style.format_index(None, na_rep="--", axis=1)._translate(True, True) + assert ctx["head"][0][1]["display_value"] == "A" + for i in [2, 3, 4, 5]: + assert ctx["head"][0][i]["display_value"] == "--" + + +def test_format_non_numeric_na(): + # GH 21527 28358 + df = DataFrame( + { + "object": [None, np.nan, "foo"], + "datetime": [None, NaT, Timestamp("20120101")], + } + ) + ctx = df.style.format(None, na_rep="-")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][1]["display_value"] == "-" + assert ctx["body"][1][2]["display_value"] == "-" + + +@pytest.mark.parametrize( + "func, attr, kwargs", + [ + ("format", "_display_funcs", {}), + ("format_index", "_display_funcs_index", {"axis": 0}), + ("format_index", "_display_funcs_columns", {"axis": 1}), + ], +) +def test_format_clear(styler, func, attr, kwargs): + assert (0, 0) not in getattr(styler, attr) # using default + getattr(styler, func)("{:.2f}", **kwargs) + assert (0, 0) in getattr(styler, attr) # formatter is specified + getattr(styler, func)(**kwargs) + assert (0, 0) not in getattr(styler, attr) # formatter cleared to default + + +@pytest.mark.parametrize( + "escape, exp", + [ + ("html", "<>&"%$#_{}~^\\~ ^ \\ "), + ( + "latex", + '<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum ' + "\\textbackslash \\textasciitilde \\space \\textasciicircum \\space " + "\\textbackslash \\space ", + ), + ], +) +def test_format_escape_html(escape, exp): + chars = '<>&"%$#_{}~^\\~ ^ \\ ' + df = DataFrame([[chars]]) + + s = Styler(df, uuid_len=0).format("&{0}&", escape=None) + expected = f'
&{chars}&&{exp}&X&<>&">X&
+ + + + + + + + + + + + + + + + +
 A
a2.610000
b2.690000
+ + + """ + ) + assert result == expected + + +def test_w3_html_format(styler): + styler.set_uuid("").set_table_styles( + [{"selector": "th", "props": "att2:v2;"}] + ).applymap(lambda x: "att1:v1;").set_table_attributes( + 'class="my-cls1" style="attr3:v3;"' + ).set_td_classes( + DataFrame(["my-cls2"], index=["a"], columns=["A"]) + ).format( + "{:.1f}" + ).set_caption( + "A comprehensive test" + ) + expected = dedent( + """\ + + + + + + + + + + + + + + + + + + + +
A comprehensive test
 A
a2.6
b2.7
+ """ + ) + assert expected == styler.to_html() + + +def test_colspan_w3(): + # GH 36223 + df = DataFrame(data=[[1, 2]], columns=[["l0", "l0"], ["l1a", "l1b"]]) + styler = Styler(df, uuid="_", cell_ids=False) + assert '
l0l0
+ + + + + + + + + + + + + + + + +
 A
a2.610000
b2.690000
+ + + """ + ) + assert result == expected + + +def test_doctype(styler): + result = styler.to_html(doctype_html=False) + assert "" not in result + assert "" not in result + assert "" not in result + assert "" not in result + + +def test_doctype_encoding(styler): + with option_context("styler.render.encoding", "ASCII"): + result = styler.to_html(doctype_html=True) + assert '' in result + result = styler.to_html(doctype_html=True, encoding="ANSI") + assert '' in result + + +def test_bold_headers_arg(styler): + result = styler.to_html(bold_headers=True) + assert "th {\n font-weight: bold;\n}" in result + result = styler.to_html() + assert "th {\n font-weight: bold;\n}" not in result + + +def test_caption_arg(styler): + result = styler.to_html(caption="foo bar") + assert "
foo barfoo bar
2.6100002.690000abA
+ + + + + + + + + + + + + + + + + + + + + + + + +
 n1a
 n2c
n1n2 
ac0
+ """ + ) + result = styler_mi.to_html() + assert result == expected + + +def test_include_css_style_rules_only_for_visible_cells(styler_mi): + # GH 43619 + result = ( + styler_mi.set_uuid("") + .applymap(lambda v: "color: blue;") + .hide(styler_mi.data.columns[1:], axis="columns") + .hide(styler_mi.data.index[1:], axis="index") + .to_html() + ) + expected_styles = dedent( + """\ + + """ + ) + assert expected_styles in result + + +def test_include_css_style_rules_only_for_visible_index_labels(styler_mi): + # GH 43619 + result = ( + styler_mi.set_uuid("") + .applymap_index(lambda v: "color: blue;", axis="index") + .hide(styler_mi.data.columns, axis="columns") + .hide(styler_mi.data.index[1:], axis="index") + .to_html() + ) + expected_styles = dedent( + """\ + + """ + ) + assert expected_styles in result + + +def test_include_css_style_rules_only_for_visible_column_labels(styler_mi): + # GH 43619 + result = ( + styler_mi.set_uuid("") + .applymap_index(lambda v: "color: blue;", axis="columns") + .hide(styler_mi.data.columns[1:], axis="columns") + .hide(styler_mi.data.index, axis="index") + .to_html() + ) + expected_styles = dedent( + """\ + + """ + ) + assert expected_styles in result + + +def test_hiding_index_columns_multiindex_alignment(): + # gh 43644 + midx = MultiIndex.from_product( + [["i0", "j0"], ["i1"], ["i2", "j2"]], names=["i-0", "i-1", "i-2"] + ) + cidx = MultiIndex.from_product( + [["c0"], ["c1", "d1"], ["c2", "d2"]], names=["c-0", "c-1", "c-2"] + ) + df = DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=cidx) + styler = Styler(df, uuid_len=0) + styler.hide(level=1, axis=0).hide(level=0, axis=1) + styler.hide([("j0", "i1", "j2")], axis=0) + styler.hide([("c0", "d1", "d2")], axis=1) + result = styler.to_html() + expected = dedent( + """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 c-1c1d1
 c-2c2d2c2
i-0i-2   
i0i2012
j2456
j0i28910
+ """ + ) + assert result == expected + + +def test_hiding_index_columns_multiindex_trimming(): + # gh 44272 + df = DataFrame(np.arange(64).reshape(8, 8)) + df.columns = MultiIndex.from_product([[0, 1, 2, 3], [0, 1]]) + df.index = MultiIndex.from_product([[0, 1, 2, 3], [0, 1]]) + df.index.names, df.columns.names = ["a", "b"], ["c", "d"] + styler = Styler(df, cell_ids=False, uuid_len=0) + styler.hide([(0, 0), (0, 1), (1, 0)], axis=1).hide([(0, 0), (0, 1), (1, 0)], axis=0) + with option_context("styler.render.max_rows", 4, "styler.render.max_columns", 4): + result = styler.to_html() + + expected = dedent( + """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 c123
 d1010...
ab     
1127282930...
2035363738...
143444546...
3051525354...
.....................
+ """ + ) + + assert result == expected + + +@pytest.mark.parametrize("type", ["data", "index"]) +@pytest.mark.parametrize( + "text, exp, found", + [ + ("no link, just text", False, ""), + ("subdomain not www: sub.web.com", False, ""), + ("www subdomain: www.web.com other", True, "www.web.com"), + ("scheme full structure: http://www.web.com", True, "http://www.web.com"), + ("scheme no top-level: http://www.web", True, "http://www.web"), + ("no scheme, no top-level: www.web", False, "www.web"), + ("https scheme: https://www.web.com", True, "https://www.web.com"), + ("ftp scheme: ftp://www.web", True, "ftp://www.web"), + ("subdirectories: www.web.com/directory", True, "www.web.com/directory"), + ("Multiple domains: www.1.2.3.4", True, "www.1.2.3.4"), + ], +) +def test_rendered_links(type, text, exp, found): + if type == "data": + df = DataFrame([text]) + styler = df.style.format(hyperlinks="html") + else: + df = DataFrame([0], index=[text]) + styler = df.style.format_index(hyperlinks="html") + + rendered = '{0}'.format(found) + result = styler.to_html() + assert (rendered in result) is exp + assert (text in result) is not exp # test conversion done when expected and not + + +def test_multiple_rendered_links(): + links = ("www.a.b", "http://a.c", "https://a.d", "ftp://a.e") + df = DataFrame(["text {} {} text {} {}".format(*links)]) + result = df.style.format(hyperlinks="html").to_html() + href = '{0}' + for link in links: + assert href.format(link) in result + assert href.format("text") not in result diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_matplotlib.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_matplotlib.py new file mode 100644 index 0000000..a350b6f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_matplotlib.py @@ -0,0 +1,286 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + IndexSlice, + Series, +) + +pytest.importorskip("matplotlib") +pytest.importorskip("jinja2") + +import matplotlib as mpl + +from pandas.io.formats.style import Styler + + +@pytest.fixture +def df(): + return DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +@pytest.fixture +def df_blank(): + return DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"]) + + +@pytest.fixture +def styler_blank(df_blank): + return Styler(df_blank, uuid_len=0) + + +@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"]) +def test_function_gradient(styler, f): + for c_map in [None, "YlOrRd"]: + result = getattr(styler, f)(cmap=c_map)._compute().ctx + assert all("#" in x[0][1] for x in result.values()) + assert result[(0, 0)] == result[(0, 1)] + assert result[(1, 0)] == result[(1, 1)] + + +@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"]) +def test_background_gradient_color(styler, f): + result = getattr(styler, f)(subset=IndexSlice[1, "A"])._compute().ctx + if f == "background_gradient": + assert result[(1, 0)] == [("background-color", "#fff7fb"), ("color", "#000000")] + elif f == "text_gradient": + assert result[(1, 0)] == [("color", "#fff7fb")] + + +@pytest.mark.parametrize( + "axis, expected", + [ + (0, ["low", "low", "high", "high"]), + (1, ["low", "high", "low", "high"]), + (None, ["low", "mid", "mid", "high"]), + ], +) +@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"]) +def test_background_gradient_axis(styler, axis, expected, f): + if f == "background_gradient": + colors = { + "low": [("background-color", "#f7fbff"), ("color", "#000000")], + "mid": [("background-color", "#abd0e6"), ("color", "#000000")], + "high": [("background-color", "#08306b"), ("color", "#f1f1f1")], + } + elif f == "text_gradient": + colors = { + "low": [("color", "#f7fbff")], + "mid": [("color", "#abd0e6")], + "high": [("color", "#08306b")], + } + result = getattr(styler, f)(cmap="Blues", axis=axis)._compute().ctx + for i, cell in enumerate([(0, 0), (0, 1), (1, 0), (1, 1)]): + assert result[cell] == colors[expected[i]] + + +@pytest.mark.parametrize( + "cmap, expected", + [ + ( + "PuBu", + { + (4, 5): [("background-color", "#86b0d3"), ("color", "#000000")], + (4, 6): [("background-color", "#83afd3"), ("color", "#f1f1f1")], + }, + ), + ( + "YlOrRd", + { + (4, 8): [("background-color", "#fd913e"), ("color", "#000000")], + (4, 9): [("background-color", "#fd8f3d"), ("color", "#f1f1f1")], + }, + ), + ( + None, + { + (7, 0): [("background-color", "#48c16e"), ("color", "#f1f1f1")], + (7, 1): [("background-color", "#4cc26c"), ("color", "#000000")], + }, + ), + ], +) +def test_text_color_threshold(cmap, expected): + # GH 39888 + df = DataFrame(np.arange(100).reshape(10, 10)) + result = df.style.background_gradient(cmap=cmap, axis=None)._compute().ctx + for k in expected.keys(): + assert result[k] == expected[k] + + +def test_background_gradient_vmin_vmax(): + # GH 12145 + df = DataFrame(range(5)) + ctx = df.style.background_gradient(vmin=1, vmax=3)._compute().ctx + assert ctx[(0, 0)] == ctx[(1, 0)] + assert ctx[(4, 0)] == ctx[(3, 0)] + + +def test_background_gradient_int64(): + # GH 28869 + df1 = Series(range(3)).to_frame() + df2 = Series(range(3), dtype="Int64").to_frame() + ctx1 = df1.style.background_gradient()._compute().ctx + ctx2 = df2.style.background_gradient()._compute().ctx + assert ctx2[(0, 0)] == ctx1[(0, 0)] + assert ctx2[(1, 0)] == ctx1[(1, 0)] + assert ctx2[(2, 0)] == ctx1[(2, 0)] + + +@pytest.mark.parametrize( + "axis, gmap, expected", + [ + ( + 0, + [1, 2], + { + (0, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 0): [("background-color", "#023858"), ("color", "#f1f1f1")], + (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ( + 1, + [1, 2], + { + (0, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (0, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ( + None, + np.array([[2, 1], [1, 2]]), + { + (0, 0): [("background-color", "#023858"), ("color", "#f1f1f1")], + (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ], +) +def test_background_gradient_gmap_array(styler_blank, axis, gmap, expected): + # tests when gmap is given as a sequence and converted to ndarray + result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "gmap, axis", [([1, 2, 3], 0), ([1, 2], 1), (np.array([[1, 2], [1, 2]]), None)] +) +def test_background_gradient_gmap_array_raises(gmap, axis): + # test when gmap as converted ndarray is bad shape + df = DataFrame([[0, 0, 0], [0, 0, 0]]) + msg = "supplied 'gmap' is not correct shape" + with pytest.raises(ValueError, match=msg): + df.style.background_gradient(gmap=gmap, axis=axis)._compute() + + +@pytest.mark.parametrize( + "gmap", + [ + DataFrame( # reverse the columns + [[2, 1], [1, 2]], columns=["B", "A"], index=["X", "Y"] + ), + DataFrame( # reverse the index + [[2, 1], [1, 2]], columns=["A", "B"], index=["Y", "X"] + ), + DataFrame( # reverse the index and columns + [[1, 2], [2, 1]], columns=["B", "A"], index=["Y", "X"] + ), + DataFrame( # add unnecessary columns + [[1, 2, 3], [2, 1, 3]], columns=["A", "B", "C"], index=["X", "Y"] + ), + DataFrame( # add unnecessary index + [[1, 2], [2, 1], [3, 3]], columns=["A", "B"], index=["X", "Y", "Z"] + ), + ], +) +@pytest.mark.parametrize( + "subset, exp_gmap", # exp_gmap is underlying map DataFrame should conform to + [ + (None, [[1, 2], [2, 1]]), + (["A"], [[1], [2]]), # slice only column "A" in data and gmap + (["B", "A"], [[2, 1], [1, 2]]), # reverse the columns in data + (IndexSlice["X", :], [[1, 2]]), # slice only index "X" in data and gmap + (IndexSlice[["Y", "X"], :], [[2, 1], [1, 2]]), # reverse the index in data + ], +) +def test_background_gradient_gmap_dataframe_align(styler_blank, gmap, subset, exp_gmap): + # test gmap given as DataFrame that it aligns to the the data including subset + expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap, subset=subset) + result = styler_blank.background_gradient(axis=None, gmap=gmap, subset=subset) + assert expected._compute().ctx == result._compute().ctx + + +@pytest.mark.parametrize( + "gmap, axis, exp_gmap", + [ + (Series([2, 1], index=["Y", "X"]), 0, [[1, 1], [2, 2]]), # revrse the index + (Series([2, 1], index=["B", "A"]), 1, [[1, 2], [1, 2]]), # revrse the cols + (Series([1, 2, 3], index=["X", "Y", "Z"]), 0, [[1, 1], [2, 2]]), # add idx + (Series([1, 2, 3], index=["A", "B", "C"]), 1, [[1, 2], [1, 2]]), # add col + ], +) +def test_background_gradient_gmap_series_align(styler_blank, gmap, axis, exp_gmap): + # test gmap given as Series that it aligns to the the data including subset + expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap)._compute() + result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute() + assert expected.ctx == result.ctx + + +@pytest.mark.parametrize( + "gmap, axis", + [ + (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 1), + (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 0), + ], +) +def test_background_gradient_gmap_wrong_dataframe(styler_blank, gmap, axis): + # test giving a gmap in DataFrame but with wrong axis + msg = "'gmap' is a DataFrame but underlying data for operations is a Series" + with pytest.raises(ValueError, match=msg): + styler_blank.background_gradient(gmap=gmap, axis=axis)._compute() + + +def test_background_gradient_gmap_wrong_series(styler_blank): + # test giving a gmap in Series form but with wrong axis + msg = "'gmap' is a Series but underlying data for operations is a DataFrame" + gmap = Series([1, 2], index=["X", "Y"]) + with pytest.raises(ValueError, match=msg): + styler_blank.background_gradient(gmap=gmap, axis=None)._compute() + + +@pytest.mark.parametrize("cmap", ["PuBu", mpl.cm.get_cmap("PuBu")]) +def test_bar_colormap(cmap): + data = DataFrame([[1, 2], [3, 4]]) + ctx = data.style.bar(cmap=cmap, axis=None)._compute().ctx + pubu_colors = { + (0, 0): "#d0d1e6", + (1, 0): "#056faf", + (0, 1): "#73a9cf", + (1, 1): "#023858", + } + for k, v in pubu_colors.items(): + assert v in ctx[k][1][1] + + +def test_bar_color_raises(df): + msg = "`color` must be string or list or tuple of 2 strings" + with pytest.raises(ValueError, match=msg): + df.style.bar(color={"a", "b"}).to_html() + with pytest.raises(ValueError, match=msg): + df.style.bar(color=["a", "b", "c"]).to_html() + + msg = "`color` and `cmap` cannot both be given" + with pytest.raises(ValueError, match=msg): + df.style.bar(color="something", cmap="something else").to_html() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_non_unique.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_non_unique.py new file mode 100644 index 0000000..b719bf3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_non_unique.py @@ -0,0 +1,140 @@ +from textwrap import dedent + +import pytest + +from pandas import ( + DataFrame, + IndexSlice, +) + +pytest.importorskip("jinja2") + +from pandas.io.formats.style import Styler + + +@pytest.fixture +def df(): + return DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["i", "j", "j"], + columns=["c", "d", "d"], + dtype=float, + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +def test_format_non_unique(df): + # GH 41269 + + # test dict + html = df.style.format({"d": "{:.1f}"}).to_html() + for val in ["1.000000<", "4.000000<", "7.000000<"]: + assert val in html + for val in ["2.0<", "3.0<", "5.0<", "6.0<", "8.0<", "9.0<"]: + assert val in html + + # test subset + html = df.style.format(precision=1, subset=IndexSlice["j", "d"]).to_html() + for val in ["1.000000<", "4.000000<", "7.000000<", "2.000000<", "3.000000<"]: + assert val in html + for val in ["5.0<", "6.0<", "8.0<", "9.0<"]: + assert val in html + + +@pytest.mark.parametrize("func", ["apply", "applymap"]) +def test_apply_applymap_non_unique_raises(df, func): + # GH 41269 + if func == "apply": + op = lambda s: ["color: red;"] * len(s) + else: + op = lambda v: "color: red;" + + with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"): + getattr(df.style, func)(op)._compute() + + +def test_table_styles_dict_non_unique_index(styler): + styles = styler.set_table_styles( + {"j": [{"selector": "td", "props": "a: v;"}]}, axis=1 + ).table_styles + assert styles == [ + {"selector": "td.row1", "props": [("a", "v")]}, + {"selector": "td.row2", "props": [("a", "v")]}, + ] + + +def test_table_styles_dict_non_unique_columns(styler): + styles = styler.set_table_styles( + {"d": [{"selector": "td", "props": "a: v;"}]}, axis=0 + ).table_styles + assert styles == [ + {"selector": "td.col1", "props": [("a", "v")]}, + {"selector": "td.col2", "props": [("a", "v")]}, + ] + + +def test_tooltips_non_unique_raises(styler): + # ttips has unique keys + ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"]) + styler.set_tooltips(ttips=ttips) # OK + + # ttips has non-unique columns + ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"]) + with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"): + styler.set_tooltips(ttips=ttips) + + # ttips has non-unique index + ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) + with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"): + styler.set_tooltips(ttips=ttips) + + +def test_set_td_classes_non_unique_raises(styler): + # classes has unique keys + classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"]) + styler.set_td_classes(classes=classes) # OK + + # classes has non-unique columns + classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"]) + with pytest.raises(KeyError, match="Classes render only if `classes` has unique"): + styler.set_td_classes(classes=classes) + + # classes has non-unique index + classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) + with pytest.raises(KeyError, match="Classes render only if `classes` has unique"): + styler.set_td_classes(classes=classes) + + +def test_hide_columns_non_unique(styler): + ctx = styler.hide(["d"], axis="columns")._translate(True, True) + + assert ctx["head"][0][1]["display_value"] == "c" + assert ctx["head"][0][1]["is_visible"] is True + + assert ctx["head"][0][2]["display_value"] == "d" + assert ctx["head"][0][2]["is_visible"] is False + + assert ctx["head"][0][3]["display_value"] == "d" + assert ctx["head"][0][3]["is_visible"] is False + + assert ctx["body"][0][1]["is_visible"] is True + assert ctx["body"][0][2]["is_visible"] is False + assert ctx["body"][0][3]["is_visible"] is False + + +def test_latex_non_unique(styler): + result = styler.to_latex() + assert result == dedent( + """\ + \\begin{tabular}{lrrr} + & c & d & d \\\\ + i & 1.000000 & 2.000000 & 3.000000 \\\\ + j & 4.000000 & 5.000000 & 6.000000 \\\\ + j & 7.000000 & 8.000000 & 9.000000 \\\\ + \\end{tabular} + """ + ) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_style.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_style.py new file mode 100644 index 0000000..fa054ff --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/style/test_style.py @@ -0,0 +1,1549 @@ +import copy +import re +from textwrap import dedent + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, +) +import pandas._testing as tm + +jinja2 = pytest.importorskip("jinja2") +from pandas.io.formats.style import ( # isort:skip + Styler, +) +from pandas.io.formats.style_render import ( + _get_level_lengths, + _get_trimming_maximums, + maybe_convert_css_to_tuples, + non_reducing_slice, +) + + +@pytest.fixture +def mi_df(): + return DataFrame( + [[1, 2], [3, 4]], + index=MultiIndex.from_product([["i0"], ["i1_a", "i1_b"]]), + columns=MultiIndex.from_product([["c0"], ["c1_a", "c1_b"]]), + dtype=int, + ) + + +@pytest.fixture +def mi_styler(mi_df): + return Styler(mi_df, uuid_len=0) + + +@pytest.fixture +def mi_styler_comp(mi_styler): + # comprehensively add features to mi_styler + mi_styler = mi_styler._copy(deepcopy=True) + mi_styler.css = {**mi_styler.css, **{"row": "ROW", "col": "COL"}} + mi_styler.uuid_len = 5 + mi_styler.uuid = "abcde" + mi_styler.set_caption("capt") + mi_styler.set_table_styles([{"selector": "a", "props": "a:v;"}]) + mi_styler.hide(axis="columns") + mi_styler.hide([("c0", "c1_a")], axis="columns", names=True) + mi_styler.hide(axis="index") + mi_styler.hide([("i0", "i1_a")], axis="index", names=True) + mi_styler.set_table_attributes('class="box"') + mi_styler.format(na_rep="MISSING", precision=3) + mi_styler.format_index(precision=2, axis=0) + mi_styler.format_index(precision=4, axis=1) + mi_styler.highlight_max(axis=None) + mi_styler.applymap_index(lambda x: "color: white;", axis=0) + mi_styler.applymap_index(lambda x: "color: black;", axis=1) + mi_styler.set_td_classes( + DataFrame( + [["a", "b"], ["a", "c"]], index=mi_styler.index, columns=mi_styler.columns + ) + ) + mi_styler.set_tooltips( + DataFrame( + [["a2", "b2"], ["a2", "c2"]], + index=mi_styler.index, + columns=mi_styler.columns, + ) + ) + return mi_styler + + +@pytest.mark.parametrize( + "sparse_columns, exp_cols", + [ + ( + True, + [ + {"is_visible": True, "attributes": 'colspan="2"', "value": "c0"}, + {"is_visible": False, "attributes": "", "value": "c0"}, + ], + ), + ( + False, + [ + {"is_visible": True, "attributes": "", "value": "c0"}, + {"is_visible": True, "attributes": "", "value": "c0"}, + ], + ), + ], +) +def test_mi_styler_sparsify_columns(mi_styler, sparse_columns, exp_cols): + exp_l1_c0 = {"is_visible": True, "attributes": "", "display_value": "c1_a"} + exp_l1_c1 = {"is_visible": True, "attributes": "", "display_value": "c1_b"} + + ctx = mi_styler._translate(True, sparse_columns) + + assert exp_cols[0].items() <= ctx["head"][0][2].items() + assert exp_cols[1].items() <= ctx["head"][0][3].items() + assert exp_l1_c0.items() <= ctx["head"][1][2].items() + assert exp_l1_c1.items() <= ctx["head"][1][3].items() + + +@pytest.mark.parametrize( + "sparse_index, exp_rows", + [ + ( + True, + [ + {"is_visible": True, "attributes": 'rowspan="2"', "value": "i0"}, + {"is_visible": False, "attributes": "", "value": "i0"}, + ], + ), + ( + False, + [ + {"is_visible": True, "attributes": "", "value": "i0"}, + {"is_visible": True, "attributes": "", "value": "i0"}, + ], + ), + ], +) +def test_mi_styler_sparsify_index(mi_styler, sparse_index, exp_rows): + exp_l1_r0 = {"is_visible": True, "attributes": "", "display_value": "i1_a"} + exp_l1_r1 = {"is_visible": True, "attributes": "", "display_value": "i1_b"} + + ctx = mi_styler._translate(sparse_index, True) + + assert exp_rows[0].items() <= ctx["body"][0][0].items() + assert exp_rows[1].items() <= ctx["body"][1][0].items() + assert exp_l1_r0.items() <= ctx["body"][0][1].items() + assert exp_l1_r1.items() <= ctx["body"][1][1].items() + + +def test_mi_styler_sparsify_options(mi_styler): + with pd.option_context("styler.sparse.index", False): + html1 = mi_styler.to_html() + with pd.option_context("styler.sparse.index", True): + html2 = mi_styler.to_html() + + assert html1 != html2 + + with pd.option_context("styler.sparse.columns", False): + html1 = mi_styler.to_html() + with pd.option_context("styler.sparse.columns", True): + html2 = mi_styler.to_html() + + assert html1 != html2 + + +@pytest.mark.parametrize( + "rn, cn, max_els, max_rows, max_cols, exp_rn, exp_cn", + [ + (100, 100, 100, None, None, 12, 6), # reduce to (12, 6) < 100 elements + (1000, 3, 750, None, None, 250, 3), # dynamically reduce rows to 250, keep cols + (4, 1000, 500, None, None, 4, 125), # dynamically reduce cols to 125, keep rows + (1000, 3, 750, 10, None, 10, 3), # overwrite above dynamics with max_row + (4, 1000, 500, None, 5, 4, 5), # overwrite above dynamics with max_col + (100, 100, 700, 50, 50, 25, 25), # rows cols below given maxes so < 700 elmts + ], +) +def test_trimming_maximum(rn, cn, max_els, max_rows, max_cols, exp_rn, exp_cn): + rn, cn = _get_trimming_maximums( + rn, cn, max_els, max_rows, max_cols, scaling_factor=0.5 + ) + assert (rn, cn) == (exp_rn, exp_cn) + + +@pytest.mark.parametrize( + "option, val", + [ + ("styler.render.max_elements", 6), + ("styler.render.max_rows", 3), + ], +) +def test_render_trimming_rows(option, val): + # test auto and specific trimming of rows + df = DataFrame(np.arange(120).reshape(60, 2)) + with pd.option_context(option, val): + ctx = df.style._translate(True, True) + assert len(ctx["head"][0]) == 3 # index + 2 data cols + assert len(ctx["body"]) == 4 # 3 data rows + trimming row + assert len(ctx["body"][0]) == 3 # index + 2 data cols + + +@pytest.mark.parametrize( + "option, val", + [ + ("styler.render.max_elements", 6), + ("styler.render.max_columns", 2), + ], +) +def test_render_trimming_cols(option, val): + # test auto and specific trimming of cols + df = DataFrame(np.arange(30).reshape(3, 10)) + with pd.option_context(option, val): + ctx = df.style._translate(True, True) + assert len(ctx["head"][0]) == 4 # index + 2 data cols + trimming col + assert len(ctx["body"]) == 3 # 3 data rows + assert len(ctx["body"][0]) == 4 # index + 2 data cols + trimming col + + +def test_render_trimming_mi(): + midx = MultiIndex.from_product([[1, 2], [1, 2, 3]]) + df = DataFrame(np.arange(36).reshape(6, 6), columns=midx, index=midx) + with pd.option_context("styler.render.max_elements", 4): + ctx = df.style._translate(True, True) + + assert len(ctx["body"][0]) == 5 # 2 indexes + 2 data cols + trimming row + assert {"attributes": 'rowspan="2"'}.items() <= ctx["body"][0][0].items() + assert {"class": "data row0 col_trim"}.items() <= ctx["body"][0][4].items() + assert {"class": "data row_trim col_trim"}.items() <= ctx["body"][2][4].items() + assert len(ctx["body"]) == 3 # 2 data rows + trimming row + + +def test_render_empty_mi(): + # GH 43305 + df = DataFrame(index=MultiIndex.from_product([["A"], [0, 1]], names=[None, "one"])) + expected = dedent( + """\ + > +
 one
') != -1 + + @pytest.mark.parametrize( + "classes", + [ + DataFrame( + data=[["", "test-class"], [np.nan, None]], + columns=["A", "B"], + index=["a", "b"], + ), + DataFrame(data=[["test-class"]], columns=["B"], index=["a"]), + DataFrame(data=[["test-class", "unused"]], columns=["B", "C"], index=["a"]), + ], + ) + def test_set_data_classes(self, classes): + # GH 36159 + df = DataFrame(data=[[0, 1], [2, 3]], columns=["A", "B"], index=["a", "b"]) + s = Styler(df, uuid_len=0, cell_ids=False).set_td_classes(classes).to_html() + assert '0123012302468255{40 + h}31c' + assert expected in h + + +def test_html_repr_min_rows_default(datapath): + # gh-27991 + + # default setting no truncation even if above min_rows + df = DataFrame({"a": range(20)}) + result = df._repr_html_() + expected = expected_html(datapath, "html_repr_min_rows_default_no_truncation") + assert result == expected + + # default of max_rows 60 triggers truncation if above + df = DataFrame({"a": range(61)}) + result = df._repr_html_() + expected = expected_html(datapath, "html_repr_min_rows_default_truncated") + assert result == expected + + +@pytest.mark.parametrize( + "max_rows,min_rows,expected", + [ + # truncated after first two rows + (10, 4, "html_repr_max_rows_10_min_rows_4"), + # when set to None, follow value of max_rows + (12, None, "html_repr_max_rows_12_min_rows_None"), + # when set value higher as max_rows, use the minimum + (10, 12, "html_repr_max_rows_10_min_rows_12"), + # max_rows of None -> never truncate + (None, 12, "html_repr_max_rows_None_min_rows_12"), + ], +) +def test_html_repr_min_rows(datapath, max_rows, min_rows, expected): + # gh-27991 + + df = DataFrame({"a": range(61)}) + expected = expected_html(datapath, expected) + with option_context("display.max_rows", max_rows, "display.min_rows", min_rows): + result = df._repr_html_() + assert result == expected + + +def test_to_html_multilevel(multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + + ymd.columns.name = "foo" + ymd.to_html() + ymd.T.to_html() + + +@pytest.mark.parametrize("na_rep", ["NaN", "Ted"]) +def test_to_html_na_rep_and_float_format(na_rep, datapath): + # https://github.com/pandas-dev/pandas/issues/13828 + df = DataFrame( + [ + ["A", 1.2225], + ["A", None], + ], + columns=["Group", "Data"], + ) + result = df.to_html(na_rep=na_rep, float_format="{:.2f}".format) + expected = expected_html(datapath, "gh13828_expected_output") + expected = expected.format(na_rep=na_rep) + assert result == expected + + +def test_to_html_float_format_object_col(datapath): + # GH#40024 + df = DataFrame(data={"x": [1000.0, "test"]}) + result = df.to_html(float_format=lambda x: f"{x:,.0f}") + expected = expected_html(datapath, "gh40024_expected_output") + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_latex.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_latex.py new file mode 100644 index 0000000..01bc94b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_latex.py @@ -0,0 +1,1530 @@ +import codecs +from datetime import datetime +from textwrap import dedent + +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + +from pandas.io.formats.format import DataFrameFormatter +from pandas.io.formats.latex import ( + RegularTableBuilder, + RowBodyIterator, + RowHeaderIterator, + RowStringConverter, +) + +pytestmark = pytest.mark.filterwarnings("ignore::FutureWarning") + + +def _dedent(string): + """Dedent without new line in the beginning. + + Built-in textwrap.dedent would keep new line character in the beginning + of multi-line string starting from the new line. + This version drops the leading new line character. + """ + return dedent(string).lstrip() + + +@pytest.fixture +def df_short(): + """Short dataframe for testing table/tabular/longtable LaTeX env.""" + return DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + + +class TestToLatex: + def test_to_latex_to_file(self, float_frame): + with tm.ensure_clean("test.tex") as path: + float_frame.to_latex(path) + with open(path) as f: + assert float_frame.to_latex() == f.read() + + def test_to_latex_to_file_utf8_with_encoding(self): + # test with utf-8 and encoding option (GH 7061) + df = DataFrame([["au\xdfgangen"]]) + with tm.ensure_clean("test.tex") as path: + df.to_latex(path, encoding="utf-8") + with codecs.open(path, "r", encoding="utf-8") as f: + assert df.to_latex() == f.read() + + def test_to_latex_to_file_utf8_without_encoding(self): + # test with utf-8 without encoding option + df = DataFrame([["au\xdfgangen"]]) + with tm.ensure_clean("test.tex") as path: + df.to_latex(path) + with codecs.open(path, "r", encoding="utf-8") as f: + assert df.to_latex() == f.read() + + def test_to_latex_tabular_with_index(self): + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + result = df.to_latex() + expected = _dedent( + r""" + \begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_tabular_without_index(self): + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + result = df.to_latex(index=False) + expected = _dedent( + r""" + \begin{tabular}{rl} + \toprule + a & b \\ + \midrule + 1 & b1 \\ + 2 & b2 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + @pytest.mark.parametrize( + "bad_column_format", + [5, 1.2, ["l", "r"], ("r", "c"), {"r", "c", "l"}, {"a": "r", "b": "l"}], + ) + def test_to_latex_bad_column_format(self, bad_column_format): + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + msg = r"column_format must be str or unicode" + with pytest.raises(ValueError, match=msg): + df.to_latex(column_format=bad_column_format) + + def test_to_latex_column_format_just_works(self, float_frame): + # GH Bug #9402 + float_frame.to_latex(column_format="lcr") + + def test_to_latex_column_format(self): + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + result = df.to_latex(column_format="lcr") + expected = _dedent( + r""" + \begin{tabular}{lcr} + \toprule + {} & a & b \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_float_format_object_col(self): + # GH#40024 + ser = Series([1000.0, "test"]) + result = ser.to_latex(float_format="{:,.0f}".format) + expected = _dedent( + r""" + \begin{tabular}{ll} + \toprule + {} & 0 \\ + \midrule + 0 & 1,000 \\ + 1 & test \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_empty_tabular(self): + df = DataFrame() + result = df.to_latex() + expected = _dedent( + r""" + \begin{tabular}{l} + \toprule + Empty DataFrame + Columns: Index([], dtype='object') + Index: Index([], dtype='object') \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_series(self): + s = Series(["a", "b", "c"]) + result = s.to_latex() + expected = _dedent( + r""" + \begin{tabular}{ll} + \toprule + {} & 0 \\ + \midrule + 0 & a \\ + 1 & b \\ + 2 & c \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_midrule_location(self): + # GH 18326 + df = DataFrame({"a": [1, 2]}) + df.index.name = "foo" + result = df.to_latex(index_names=False) + expected = _dedent( + r""" + \begin{tabular}{lr} + \toprule + {} & a \\ + \midrule + 0 & 1 \\ + 1 & 2 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + +class TestToLatexLongtable: + def test_to_latex_empty_longtable(self): + df = DataFrame() + result = df.to_latex(longtable=True) + expected = _dedent( + r""" + \begin{longtable}{l} + \toprule + Empty DataFrame + Columns: Index([], dtype='object') + Index: Index([], dtype='object') \\ + \end{longtable} + """ + ) + assert result == expected + + def test_to_latex_longtable_with_index(self): + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + result = df.to_latex(longtable=True) + expected = _dedent( + r""" + \begin{longtable}{lrl} + \toprule + {} & a & b \\ + \midrule + \endfirsthead + + \toprule + {} & a & b \\ + \midrule + \endhead + \midrule + \multicolumn{3}{r}{{Continued on next page}} \\ + \midrule + \endfoot + + \bottomrule + \endlastfoot + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \end{longtable} + """ + ) + assert result == expected + + def test_to_latex_longtable_without_index(self): + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + result = df.to_latex(index=False, longtable=True) + expected = _dedent( + r""" + \begin{longtable}{rl} + \toprule + a & b \\ + \midrule + \endfirsthead + + \toprule + a & b \\ + \midrule + \endhead + \midrule + \multicolumn{2}{r}{{Continued on next page}} \\ + \midrule + \endfoot + + \bottomrule + \endlastfoot + 1 & b1 \\ + 2 & b2 \\ + \end{longtable} + """ + ) + assert result == expected + + @pytest.mark.parametrize( + "df, expected_number", + [ + (DataFrame({"a": [1, 2]}), 1), + (DataFrame({"a": [1, 2], "b": [3, 4]}), 2), + (DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}), 3), + ], + ) + def test_to_latex_longtable_continued_on_next_page(self, df, expected_number): + result = df.to_latex(index=False, longtable=True) + assert fr"\multicolumn{{{expected_number}}}" in result + + +class TestToLatexHeader: + def test_to_latex_no_header_with_index(self): + # GH 7124 + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + result = df.to_latex(header=False) + expected = _dedent( + r""" + \begin{tabular}{lrl} + \toprule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_no_header_without_index(self): + # GH 7124 + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + result = df.to_latex(index=False, header=False) + expected = _dedent( + r""" + \begin{tabular}{rl} + \toprule + 1 & b1 \\ + 2 & b2 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_specified_header_with_index(self): + # GH 7124 + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + result = df.to_latex(header=["AA", "BB"]) + expected = _dedent( + r""" + \begin{tabular}{lrl} + \toprule + {} & AA & BB \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_specified_header_without_index(self): + # GH 7124 + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + result = df.to_latex(header=["AA", "BB"], index=False) + expected = _dedent( + r""" + \begin{tabular}{rl} + \toprule + AA & BB \\ + \midrule + 1 & b1 \\ + 2 & b2 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + @pytest.mark.parametrize( + "header, num_aliases", + [ + (["A"], 1), + (("B",), 1), + (("Col1", "Col2", "Col3"), 3), + (("Col1", "Col2", "Col3", "Col4"), 4), + ], + ) + def test_to_latex_number_of_items_in_header_missmatch_raises( + self, + header, + num_aliases, + ): + # GH 7124 + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + msg = f"Writing 2 cols but got {num_aliases} aliases" + with pytest.raises(ValueError, match=msg): + df.to_latex(header=header) + + def test_to_latex_decimal(self): + # GH 12031 + df = DataFrame({"a": [1.0, 2.1], "b": ["b1", "b2"]}) + result = df.to_latex(decimal=",") + expected = _dedent( + r""" + \begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + 0 & 1,0 & b1 \\ + 1 & 2,1 & b2 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + +class TestToLatexBold: + def test_to_latex_bold_rows(self): + # GH 16707 + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + result = df.to_latex(bold_rows=True) + expected = _dedent( + r""" + \begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + \textbf{0} & 1 & b1 \\ + \textbf{1} & 2 & b2 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_no_bold_rows(self): + # GH 16707 + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + result = df.to_latex(bold_rows=False) + expected = _dedent( + r""" + \begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + +class TestToLatexCaptionLabel: + @pytest.fixture + def caption_table(self): + """Caption for table/tabular LaTeX environment.""" + return "a table in a \\texttt{table/tabular} environment" + + @pytest.fixture + def short_caption(self): + """Short caption for testing \\caption[short_caption]{full_caption}.""" + return "a table" + + @pytest.fixture + def label_table(self): + """Label for table/tabular LaTeX environment.""" + return "tab:table_tabular" + + @pytest.fixture + def caption_longtable(self): + """Caption for longtable LaTeX environment.""" + return "a table in a \\texttt{longtable} environment" + + @pytest.fixture + def label_longtable(self): + """Label for longtable LaTeX environment.""" + return "tab:longtable" + + def test_to_latex_caption_only(self, df_short, caption_table): + # GH 25436 + result = df_short.to_latex(caption=caption_table) + expected = _dedent( + r""" + \begin{table} + \centering + \caption{a table in a \texttt{table/tabular} environment} + \begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + \end{table} + """ + ) + assert result == expected + + def test_to_latex_label_only(self, df_short, label_table): + # GH 25436 + result = df_short.to_latex(label=label_table) + expected = _dedent( + r""" + \begin{table} + \centering + \label{tab:table_tabular} + \begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + \end{table} + """ + ) + assert result == expected + + def test_to_latex_caption_and_label(self, df_short, caption_table, label_table): + # GH 25436 + result = df_short.to_latex(caption=caption_table, label=label_table) + expected = _dedent( + r""" + \begin{table} + \centering + \caption{a table in a \texttt{table/tabular} environment} + \label{tab:table_tabular} + \begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + \end{table} + """ + ) + assert result == expected + + def test_to_latex_caption_and_shortcaption( + self, + df_short, + caption_table, + short_caption, + ): + result = df_short.to_latex(caption=(caption_table, short_caption)) + expected = _dedent( + r""" + \begin{table} + \centering + \caption[a table]{a table in a \texttt{table/tabular} environment} + \begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + \end{table} + """ + ) + assert result == expected + + def test_to_latex_caption_and_shortcaption_list_is_ok(self, df_short): + caption = ("Long-long-caption", "Short") + result_tuple = df_short.to_latex(caption=caption) + result_list = df_short.to_latex(caption=list(caption)) + assert result_tuple == result_list + + def test_to_latex_caption_shortcaption_and_label( + self, + df_short, + caption_table, + short_caption, + label_table, + ): + # test when the short_caption is provided alongside caption and label + result = df_short.to_latex( + caption=(caption_table, short_caption), + label=label_table, + ) + expected = _dedent( + r""" + \begin{table} + \centering + \caption[a table]{a table in a \texttt{table/tabular} environment} + \label{tab:table_tabular} + \begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + \end{table} + """ + ) + assert result == expected + + @pytest.mark.parametrize( + "bad_caption", + [ + ("full_caption", "short_caption", "extra_string"), + ("full_caption", "short_caption", 1), + ("full_caption", "short_caption", None), + ("full_caption",), + (None,), + ], + ) + def test_to_latex_bad_caption_raises(self, bad_caption): + # test that wrong number of params is raised + df = DataFrame({"a": [1]}) + msg = "caption must be either a string or a tuple of two strings" + with pytest.raises(ValueError, match=msg): + df.to_latex(caption=bad_caption) + + def test_to_latex_two_chars_caption(self, df_short): + # test that two chars caption is handled correctly + # it must not be unpacked into long_caption, short_caption. + result = df_short.to_latex(caption="xy") + expected = _dedent( + r""" + \begin{table} + \centering + \caption{xy} + \begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + \end{table} + """ + ) + assert result == expected + + def test_to_latex_longtable_caption_only(self, df_short, caption_longtable): + # GH 25436 + # test when no caption and no label is provided + # is performed by test_to_latex_longtable() + result = df_short.to_latex(longtable=True, caption=caption_longtable) + expected = _dedent( + r""" + \begin{longtable}{lrl} + \caption{a table in a \texttt{longtable} environment}\\ + \toprule + {} & a & b \\ + \midrule + \endfirsthead + \caption[]{a table in a \texttt{longtable} environment} \\ + \toprule + {} & a & b \\ + \midrule + \endhead + \midrule + \multicolumn{3}{r}{{Continued on next page}} \\ + \midrule + \endfoot + + \bottomrule + \endlastfoot + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \end{longtable} + """ + ) + assert result == expected + + def test_to_latex_longtable_label_only(self, df_short, label_longtable): + # GH 25436 + result = df_short.to_latex(longtable=True, label=label_longtable) + expected = _dedent( + r""" + \begin{longtable}{lrl} + \label{tab:longtable}\\ + \toprule + {} & a & b \\ + \midrule + \endfirsthead + + \toprule + {} & a & b \\ + \midrule + \endhead + \midrule + \multicolumn{3}{r}{{Continued on next page}} \\ + \midrule + \endfoot + + \bottomrule + \endlastfoot + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \end{longtable} + """ + ) + assert result == expected + + def test_to_latex_longtable_caption_and_label( + self, + df_short, + caption_longtable, + label_longtable, + ): + # GH 25436 + result = df_short.to_latex( + longtable=True, + caption=caption_longtable, + label=label_longtable, + ) + expected = _dedent( + r""" + \begin{longtable}{lrl} + \caption{a table in a \texttt{longtable} environment} + \label{tab:longtable}\\ + \toprule + {} & a & b \\ + \midrule + \endfirsthead + \caption[]{a table in a \texttt{longtable} environment} \\ + \toprule + {} & a & b \\ + \midrule + \endhead + \midrule + \multicolumn{3}{r}{{Continued on next page}} \\ + \midrule + \endfoot + + \bottomrule + \endlastfoot + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \end{longtable} + """ + ) + assert result == expected + + def test_to_latex_longtable_caption_shortcaption_and_label( + self, + df_short, + caption_longtable, + short_caption, + label_longtable, + ): + # test when the caption, the short_caption and the label are provided + result = df_short.to_latex( + longtable=True, + caption=(caption_longtable, short_caption), + label=label_longtable, + ) + expected = _dedent( + r""" + \begin{longtable}{lrl} + \caption[a table]{a table in a \texttt{longtable} environment} + \label{tab:longtable}\\ + \toprule + {} & a & b \\ + \midrule + \endfirsthead + \caption[]{a table in a \texttt{longtable} environment} \\ + \toprule + {} & a & b \\ + \midrule + \endhead + \midrule + \multicolumn{3}{r}{{Continued on next page}} \\ + \midrule + \endfoot + + \bottomrule + \endlastfoot + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \end{longtable} + """ + ) + assert result == expected + + +class TestToLatexEscape: + @pytest.fixture + def df_with_symbols(self): + """Dataframe with special characters for testing chars escaping.""" + a = "a" + b = "b" + yield DataFrame({"co$e^x$": {a: "a", b: "b"}, "co^l1": {a: "a", b: "b"}}) + + def test_to_latex_escape_false(self, df_with_symbols): + result = df_with_symbols.to_latex(escape=False) + expected = _dedent( + r""" + \begin{tabular}{lll} + \toprule + {} & co$e^x$ & co^l1 \\ + \midrule + a & a & a \\ + b & b & b \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_escape_default(self, df_with_symbols): + result = df_with_symbols.to_latex() # default: escape=True + expected = _dedent( + r""" + \begin{tabular}{lll} + \toprule + {} & co\$e\textasciicircum x\$ & co\textasciicircum l1 \\ + \midrule + a & a & a \\ + b & b & b \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_special_escape(self): + df = DataFrame([r"a\b\c", r"^a^b^c", r"~a~b~c"]) + result = df.to_latex() + expected = _dedent( + r""" + \begin{tabular}{ll} + \toprule + {} & 0 \\ + \midrule + 0 & a\textbackslash b\textbackslash c \\ + 1 & \textasciicircum a\textasciicircum b\textasciicircum c \\ + 2 & \textasciitilde a\textasciitilde b\textasciitilde c \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_escape_special_chars(self): + special_characters = ["&", "%", "$", "#", "_", "{", "}", "~", "^", "\\"] + df = DataFrame(data=special_characters) + result = df.to_latex() + expected = _dedent( + r""" + \begin{tabular}{ll} + \toprule + {} & 0 \\ + \midrule + 0 & \& \\ + 1 & \% \\ + 2 & \$ \\ + 3 & \# \\ + 4 & \_ \\ + 5 & \{ \\ + 6 & \} \\ + 7 & \textasciitilde \\ + 8 & \textasciicircum \\ + 9 & \textbackslash \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_specified_header_special_chars_without_escape(self): + # GH 7124 + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + result = df.to_latex(header=["$A$", "$B$"], escape=False) + expected = _dedent( + r""" + \begin{tabular}{lrl} + \toprule + {} & $A$ & $B$ \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + +class TestToLatexPosition: + def test_to_latex_position(self): + the_position = "h" + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + result = df.to_latex(position=the_position) + expected = _dedent( + r""" + \begin{table}[h] + \centering + \begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + \end{table} + """ + ) + assert result == expected + + def test_to_latex_longtable_position(self): + the_position = "t" + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + result = df.to_latex(longtable=True, position=the_position) + expected = _dedent( + r""" + \begin{longtable}[t]{lrl} + \toprule + {} & a & b \\ + \midrule + \endfirsthead + + \toprule + {} & a & b \\ + \midrule + \endhead + \midrule + \multicolumn{3}{r}{{Continued on next page}} \\ + \midrule + \endfoot + + \bottomrule + \endlastfoot + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \end{longtable} + """ + ) + assert result == expected + + +class TestToLatexFormatters: + def test_to_latex_with_formatters(self): + df = DataFrame( + { + "datetime64": [ + datetime(2016, 1, 1), + datetime(2016, 2, 5), + datetime(2016, 3, 3), + ], + "float": [1.0, 2.0, 3.0], + "int": [1, 2, 3], + "object": [(1, 2), True, False], + } + ) + + formatters = { + "datetime64": lambda x: x.strftime("%Y-%m"), + "float": lambda x: f"[{x: 4.1f}]", + "int": lambda x: f"0x{x:x}", + "object": lambda x: f"-{x!s}-", + "__index__": lambda x: f"index: {x}", + } + result = df.to_latex(formatters=dict(formatters)) + + expected = _dedent( + r""" + \begin{tabular}{llrrl} + \toprule + {} & datetime64 & float & int & object \\ + \midrule + index: 0 & 2016-01 & [ 1.0] & 0x1 & -(1, 2)- \\ + index: 1 & 2016-02 & [ 2.0] & 0x2 & -True- \\ + index: 2 & 2016-03 & [ 3.0] & 0x3 & -False- \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_float_format_no_fixed_width_3decimals(self): + # GH 21625 + df = DataFrame({"x": [0.19999]}) + result = df.to_latex(float_format="%.3f") + expected = _dedent( + r""" + \begin{tabular}{lr} + \toprule + {} & x \\ + \midrule + 0 & 0.200 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_float_format_no_fixed_width_integer(self): + # GH 22270 + df = DataFrame({"x": [100.0]}) + result = df.to_latex(float_format="%.0f") + expected = _dedent( + r""" + \begin{tabular}{lr} + \toprule + {} & x \\ + \midrule + 0 & 100 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + @pytest.mark.parametrize("na_rep", ["NaN", "Ted"]) + def test_to_latex_na_rep_and_float_format(self, na_rep): + df = DataFrame( + [ + ["A", 1.2225], + ["A", None], + ], + columns=["Group", "Data"], + ) + result = df.to_latex(na_rep=na_rep, float_format="{:.2f}".format) + expected = _dedent( + fr""" + \begin{{tabular}}{{llr}} + \toprule + {{}} & Group & Data \\ + \midrule + 0 & A & 1.22 \\ + 1 & A & {na_rep} \\ + \bottomrule + \end{{tabular}} + """ + ) + assert result == expected + + +class TestToLatexMultiindex: + @pytest.fixture + def multiindex_frame(self): + """Multiindex dataframe for testing multirow LaTeX macros.""" + yield DataFrame.from_dict( + { + ("c1", 0): Series({x: x for x in range(4)}), + ("c1", 1): Series({x: x + 4 for x in range(4)}), + ("c2", 0): Series({x: x for x in range(4)}), + ("c2", 1): Series({x: x + 4 for x in range(4)}), + ("c3", 0): Series({x: x for x in range(4)}), + } + ).T + + @pytest.fixture + def multicolumn_frame(self): + """Multicolumn dataframe for testing multicolumn LaTeX macros.""" + yield DataFrame( + { + ("c1", 0): {x: x for x in range(5)}, + ("c1", 1): {x: x + 5 for x in range(5)}, + ("c2", 0): {x: x for x in range(5)}, + ("c2", 1): {x: x + 5 for x in range(5)}, + ("c3", 0): {x: x for x in range(5)}, + } + ) + + def test_to_latex_multindex_header(self): + # GH 16718 + df = DataFrame({"a": [0], "b": [1], "c": [2], "d": [3]}) + df = df.set_index(["a", "b"]) + observed = df.to_latex(header=["r1", "r2"]) + expected = _dedent( + r""" + \begin{tabular}{llrr} + \toprule + & & r1 & r2 \\ + a & b & & \\ + \midrule + 0 & 1 & 2 & 3 \\ + \bottomrule + \end{tabular} + """ + ) + assert observed == expected + + def test_to_latex_multiindex_empty_name(self): + # GH 18669 + mi = pd.MultiIndex.from_product([[1, 2]], names=[""]) + df = DataFrame(-1, index=mi, columns=range(4)) + observed = df.to_latex() + expected = _dedent( + r""" + \begin{tabular}{lrrrr} + \toprule + & 0 & 1 & 2 & 3 \\ + {} & & & & \\ + \midrule + 1 & -1 & -1 & -1 & -1 \\ + 2 & -1 & -1 & -1 & -1 \\ + \bottomrule + \end{tabular} + """ + ) + assert observed == expected + + def test_to_latex_multiindex_column_tabular(self): + df = DataFrame({("x", "y"): ["a"]}) + result = df.to_latex() + expected = _dedent( + r""" + \begin{tabular}{ll} + \toprule + {} & x \\ + {} & y \\ + \midrule + 0 & a \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_multiindex_small_tabular(self): + df = DataFrame({("x", "y"): ["a"]}).T + result = df.to_latex() + expected = _dedent( + r""" + \begin{tabular}{lll} + \toprule + & & 0 \\ + \midrule + x & y & a \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_multiindex_tabular(self, multiindex_frame): + result = multiindex_frame.to_latex() + expected = _dedent( + r""" + \begin{tabular}{llrrrr} + \toprule + & & 0 & 1 & 2 & 3 \\ + \midrule + c1 & 0 & 0 & 1 & 2 & 3 \\ + & 1 & 4 & 5 & 6 & 7 \\ + c2 & 0 & 0 & 1 & 2 & 3 \\ + & 1 & 4 & 5 & 6 & 7 \\ + c3 & 0 & 0 & 1 & 2 & 3 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_multicolumn_tabular(self, multiindex_frame): + # GH 14184 + df = multiindex_frame.T + df.columns.names = ["a", "b"] + result = df.to_latex() + expected = _dedent( + r""" + \begin{tabular}{lrrrrr} + \toprule + a & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\ + b & 0 & 1 & 0 & 1 & 0 \\ + \midrule + 0 & 0 & 4 & 0 & 4 & 0 \\ + 1 & 1 & 5 & 1 & 5 & 1 \\ + 2 & 2 & 6 & 2 & 6 & 2 \\ + 3 & 3 & 7 & 3 & 7 & 3 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_index_has_name_tabular(self): + # GH 10660 + df = DataFrame({"a": [0, 0, 1, 1], "b": list("abab"), "c": [1, 2, 3, 4]}) + result = df.set_index(["a", "b"]).to_latex() + expected = _dedent( + r""" + \begin{tabular}{llr} + \toprule + & & c \\ + a & b & \\ + \midrule + 0 & a & 1 \\ + & b & 2 \\ + 1 & a & 3 \\ + & b & 4 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_groupby_tabular(self): + # GH 10660 + df = DataFrame({"a": [0, 0, 1, 1], "b": list("abab"), "c": [1, 2, 3, 4]}) + result = df.groupby("a").describe().to_latex() + expected = _dedent( + r""" + \begin{tabular}{lrrrrrrrr} + \toprule + {} & \multicolumn{8}{l}{c} \\ + {} & count & mean & std & min & 25\% & 50\% & 75\% & max \\ + a & & & & & & & & \\ + \midrule + 0 & 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 & 2.0 \\ + 1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 & 3.5 & 3.75 & 4.0 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_multiindex_dupe_level(self): + # see gh-14484 + # + # If an index is repeated in subsequent rows, it should be + # replaced with a blank in the created table. This should + # ONLY happen if all higher order indices (to the left) are + # equal too. In this test, 'c' has to be printed both times + # because the higher order index 'A' != 'B'. + df = DataFrame( + index=pd.MultiIndex.from_tuples([("A", "c"), ("B", "c")]), columns=["col"] + ) + result = df.to_latex() + expected = _dedent( + r""" + \begin{tabular}{lll} + \toprule + & & col \\ + \midrule + A & c & NaN \\ + B & c & NaN \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_multicolumn_default(self, multicolumn_frame): + result = multicolumn_frame.to_latex() + expected = _dedent( + r""" + \begin{tabular}{lrrrrr} + \toprule + {} & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\ + {} & 0 & 1 & 0 & 1 & 0 \\ + \midrule + 0 & 0 & 5 & 0 & 5 & 0 \\ + 1 & 1 & 6 & 1 & 6 & 1 \\ + 2 & 2 & 7 & 2 & 7 & 2 \\ + 3 & 3 & 8 & 3 & 8 & 3 \\ + 4 & 4 & 9 & 4 & 9 & 4 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_multicolumn_false(self, multicolumn_frame): + result = multicolumn_frame.to_latex(multicolumn=False) + expected = _dedent( + r""" + \begin{tabular}{lrrrrr} + \toprule + {} & c1 & & c2 & & c3 \\ + {} & 0 & 1 & 0 & 1 & 0 \\ + \midrule + 0 & 0 & 5 & 0 & 5 & 0 \\ + 1 & 1 & 6 & 1 & 6 & 1 \\ + 2 & 2 & 7 & 2 & 7 & 2 \\ + 3 & 3 & 8 & 3 & 8 & 3 \\ + 4 & 4 & 9 & 4 & 9 & 4 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_multirow_true(self, multicolumn_frame): + result = multicolumn_frame.T.to_latex(multirow=True) + expected = _dedent( + r""" + \begin{tabular}{llrrrrr} + \toprule + & & 0 & 1 & 2 & 3 & 4 \\ + \midrule + \multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ + \cline{1-7} + \multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ + \cline{1-7} + c3 & 0 & 0 & 1 & 2 & 3 & 4 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_multicolumnrow_with_multicol_format(self, multicolumn_frame): + multicolumn_frame.index = multicolumn_frame.T.index + result = multicolumn_frame.T.to_latex( + multirow=True, + multicolumn=True, + multicolumn_format="c", + ) + expected = _dedent( + r""" + \begin{tabular}{llrrrrr} + \toprule + & & \multicolumn{2}{c}{c1} & \multicolumn{2}{c}{c2} & c3 \\ + & & 0 & 1 & 0 & 1 & 0 \\ + \midrule + \multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ + \cline{1-7} + \multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ + \cline{1-7} + c3 & 0 & 0 & 1 & 2 & 3 & 4 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + @pytest.mark.parametrize("name0", [None, "named0"]) + @pytest.mark.parametrize("name1", [None, "named1"]) + @pytest.mark.parametrize("axes", [[0], [1], [0, 1]]) + def test_to_latex_multiindex_names(self, name0, name1, axes): + # GH 18667 + names = [name0, name1] + mi = pd.MultiIndex.from_product([[1, 2], [3, 4]]) + df = DataFrame(-1, index=mi.copy(), columns=mi.copy()) + for idx in axes: + df.axes[idx].names = names + + idx_names = tuple(n or "{}" for n in names) + idx_names_row = ( + f"{idx_names[0]} & {idx_names[1]} & & & & \\\\\n" + if (0 in axes and any(names)) + else "" + ) + placeholder = "{}" if any(names) and 1 in axes else " " + col_names = [n if (bool(n) and 1 in axes) else placeholder for n in names] + observed = df.to_latex() + expected = r"""\begin{tabular}{llrrrr} +\toprule + & %s & \multicolumn{2}{l}{1} & \multicolumn{2}{l}{2} \\ + & %s & 3 & 4 & 3 & 4 \\ +%s\midrule +1 & 3 & -1 & -1 & -1 & -1 \\ + & 4 & -1 & -1 & -1 & -1 \\ +2 & 3 & -1 & -1 & -1 & -1 \\ + & 4 & -1 & -1 & -1 & -1 \\ +\bottomrule +\end{tabular} +""" % tuple( + list(col_names) + [idx_names_row] + ) + assert observed == expected + + @pytest.mark.parametrize("one_row", [True, False]) + def test_to_latex_multiindex_nans(self, one_row): + # GH 14249 + df = DataFrame({"a": [None, 1], "b": [2, 3], "c": [4, 5]}) + if one_row: + df = df.iloc[[0]] + observed = df.set_index(["a", "b"]).to_latex() + expected = _dedent( + r""" + \begin{tabular}{llr} + \toprule + & & c \\ + a & b & \\ + \midrule + NaN & 2 & 4 \\ + """ + ) + if not one_row: + expected += r"""1.0 & 3 & 5 \\ +""" + expected += r"""\bottomrule +\end{tabular} +""" + assert observed == expected + + def test_to_latex_non_string_index(self): + # GH 19981 + df = DataFrame([[1, 2, 3]] * 2).set_index([0, 1]) + result = df.to_latex() + expected = _dedent( + r""" + \begin{tabular}{llr} + \toprule + & & 2 \\ + 0 & 1 & \\ + \midrule + 1 & 2 & 3 \\ + & 2 & 3 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + def test_to_latex_multiindex_multirow(self): + # GH 16719 + mi = pd.MultiIndex.from_product( + [[0.0, 1.0], [3.0, 2.0, 1.0], ["0", "1"]], names=["i", "val0", "val1"] + ) + df = DataFrame(index=mi) + result = df.to_latex(multirow=True, escape=False) + expected = _dedent( + r""" + \begin{tabular}{lll} + \toprule + & & \\ + i & val0 & val1 \\ + \midrule + \multirow{6}{*}{0.0} & \multirow{2}{*}{3.0} & 0 \\ + & & 1 \\ + \cline{2-3} + & \multirow{2}{*}{2.0} & 0 \\ + & & 1 \\ + \cline{2-3} + & \multirow{2}{*}{1.0} & 0 \\ + & & 1 \\ + \cline{1-3} + \cline{2-3} + \multirow{6}{*}{1.0} & \multirow{2}{*}{3.0} & 0 \\ + & & 1 \\ + \cline{2-3} + & \multirow{2}{*}{2.0} & 0 \\ + & & 1 \\ + \cline{2-3} + & \multirow{2}{*}{1.0} & 0 \\ + & & 1 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + + +class TestTableBuilder: + @pytest.fixture + def dataframe(self): + return DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + + @pytest.fixture + def table_builder(self, dataframe): + return RegularTableBuilder(formatter=DataFrameFormatter(dataframe)) + + def test_create_row_iterator(self, table_builder): + iterator = table_builder._create_row_iterator(over="header") + assert isinstance(iterator, RowHeaderIterator) + + def test_create_body_iterator(self, table_builder): + iterator = table_builder._create_row_iterator(over="body") + assert isinstance(iterator, RowBodyIterator) + + def test_create_body_wrong_kwarg_raises(self, table_builder): + with pytest.raises(ValueError, match="must be either 'header' or 'body'"): + table_builder._create_row_iterator(over="SOMETHING BAD") + + +class TestRowStringConverter: + @pytest.mark.parametrize( + "row_num, expected", + [ + (0, r"{} & Design & ratio & xy \\"), + (1, r"0 & 1 & 4 & 10 \\"), + (2, r"1 & 2 & 5 & 11 \\"), + ], + ) + def test_get_strrow_normal_without_escape(self, row_num, expected): + df = DataFrame({r"Design": [1, 2, 3], r"ratio": [4, 5, 6], r"xy": [10, 11, 12]}) + row_string_converter = RowStringConverter( + formatter=DataFrameFormatter(df, escape=True), + ) + assert row_string_converter.get_strrow(row_num=row_num) == expected + + @pytest.mark.parametrize( + "row_num, expected", + [ + (0, r"{} & Design \# & ratio, \% & x\&y \\"), + (1, r"0 & 1 & 4 & 10 \\"), + (2, r"1 & 2 & 5 & 11 \\"), + ], + ) + def test_get_strrow_normal_with_escape(self, row_num, expected): + df = DataFrame( + {r"Design #": [1, 2, 3], r"ratio, %": [4, 5, 6], r"x&y": [10, 11, 12]} + ) + row_string_converter = RowStringConverter( + formatter=DataFrameFormatter(df, escape=True), + ) + assert row_string_converter.get_strrow(row_num=row_num) == expected + + @pytest.mark.parametrize( + "row_num, expected", + [ + (0, r"{} & \multicolumn{2}{r}{c1} & \multicolumn{2}{r}{c2} & c3 \\"), + (1, r"{} & 0 & 1 & 0 & 1 & 0 \\"), + (2, r"0 & 0 & 5 & 0 & 5 & 0 \\"), + ], + ) + def test_get_strrow_multindex_multicolumn(self, row_num, expected): + df = DataFrame( + { + ("c1", 0): {x: x for x in range(5)}, + ("c1", 1): {x: x + 5 for x in range(5)}, + ("c2", 0): {x: x for x in range(5)}, + ("c2", 1): {x: x + 5 for x in range(5)}, + ("c3", 0): {x: x for x in range(5)}, + } + ) + + row_string_converter = RowStringConverter( + formatter=DataFrameFormatter(df), + multicolumn=True, + multicolumn_format="r", + multirow=True, + ) + + assert row_string_converter.get_strrow(row_num=row_num) == expected + + def test_future_warning(self): + df = DataFrame([[1]]) + msg = ( + "In future versions `DataFrame.to_latex` is expected to utilise the base " + "implementation of `Styler.to_latex` for formatting and rendering. " + "The arguments signature may therefore change. It is recommended instead " + "to use `DataFrame.style.to_latex` which also contains additional " + "functionality." + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + df.to_latex() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_markdown.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_markdown.py new file mode 100644 index 0000000..2bd0d11 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_markdown.py @@ -0,0 +1,101 @@ +from io import StringIO + +import pytest + +import pandas as pd +import pandas._testing as tm + +pytest.importorskip("tabulate") + + +def test_simple(): + buf = StringIO() + df = pd.DataFrame([1, 2, 3]) + df.to_markdown(buf=buf) + result = buf.getvalue() + assert ( + result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) + + +def test_empty_frame(): + buf = StringIO() + df = pd.DataFrame({"id": [], "first_name": [], "last_name": []}).set_index("id") + df.to_markdown(buf=buf) + result = buf.getvalue() + assert result == ( + "| id | first_name | last_name |\n" + "|------|--------------|-------------|" + ) + + +def test_other_tablefmt(): + buf = StringIO() + df = pd.DataFrame([1, 2, 3]) + df.to_markdown(buf=buf, tablefmt="jira") + result = buf.getvalue() + assert result == "|| || 0 ||\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + + +def test_other_headers(): + buf = StringIO() + df = pd.DataFrame([1, 2, 3]) + df.to_markdown(buf=buf, headers=["foo", "bar"]) + result = buf.getvalue() + assert result == ( + "| foo | bar |\n|------:|------:|\n| 0 " + "| 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) + + +def test_series(): + buf = StringIO() + s = pd.Series([1, 2, 3], name="foo") + s.to_markdown(buf=buf) + result = buf.getvalue() + assert result == ( + "| | foo |\n|---:|------:|\n| 0 | 1 " + "|\n| 1 | 2 |\n| 2 | 3 |" + ) + + +def test_no_buf(capsys): + df = pd.DataFrame([1, 2, 3]) + result = df.to_markdown() + assert ( + result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) + + +@pytest.mark.parametrize("index", [True, False, None]) +@pytest.mark.parametrize("showindex", [True, False, None]) +def test_index(index, showindex): + # GH 32667 + kwargs = {} + if index is not None: + kwargs["index"] = index + if showindex is not None: + kwargs["showindex"] = showindex + + df = pd.DataFrame([1, 2, 3]) + yes_index_result = ( + "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) + no_index_result = "| 0 |\n|----:|\n| 1 |\n| 2 |\n| 3 |" + + warning = FutureWarning if "showindex" in kwargs else None + with tm.assert_produces_warning(warning): + result = df.to_markdown(**kwargs) + + if "showindex" in kwargs: + # give showindex higher priority if specified + if showindex: + expected = yes_index_result + else: + expected = no_index_result + else: + if index in [True, None]: + expected = yes_index_result + else: + expected = no_index_result + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_string.py b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_string.py new file mode 100644 index 0000000..5e7aeb7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/formats/test_to_string.py @@ -0,0 +1,340 @@ +from datetime import datetime +from io import StringIO +from textwrap import dedent + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, + option_context, + to_datetime, +) + + +def test_repr_embedded_ndarray(): + arr = np.empty(10, dtype=[("err", object)]) + for i in range(len(arr)): + arr["err"][i] = np.random.randn(i) + + df = DataFrame(arr) + repr(df["err"]) + repr(df) + df.to_string() + + +def test_repr_tuples(): + buf = StringIO() + + df = DataFrame({"tups": list(zip(range(10), range(10)))}) + repr(df) + df.to_string(col_space=10, buf=buf) + + +def test_to_string_truncate(): + # GH 9784 - dont truncate when calling DataFrame.to_string + df = DataFrame( + [ + { + "a": "foo", + "b": "bar", + "c": "let's make this a very VERY long line that is longer " + "than the default 50 character limit", + "d": 1, + }, + {"a": "foo", "b": "bar", "c": "stuff", "d": 1}, + ] + ) + df.set_index(["a", "b", "c"]) + assert df.to_string() == ( + " a b " + " c d\n" + "0 foo bar let's make this a very VERY long line t" + "hat is longer than the default 50 character limit 1\n" + "1 foo bar " + " stuff 1" + ) + with option_context("max_colwidth", 20): + # the display option has no effect on the to_string method + assert df.to_string() == ( + " a b " + " c d\n" + "0 foo bar let's make this a very VERY long line t" + "hat is longer than the default 50 character limit 1\n" + "1 foo bar " + " stuff 1" + ) + assert df.to_string(max_colwidth=20) == ( + " a b c d\n" + "0 foo bar let's make this ... 1\n" + "1 foo bar stuff 1" + ) + + +@pytest.mark.parametrize( + "input_array, expected", + [ + ("a", "a"), + (["a", "b"], "a\nb"), + ([1, "a"], "1\na"), + (1, "1"), + ([0, -1], " 0\n-1"), + (1.0, "1.0"), + ([" a", " b"], " a\n b"), + ([".1", "1"], ".1\n 1"), + (["10", "-10"], " 10\n-10"), + ], +) +def test_format_remove_leading_space_series(input_array, expected): + # GH: 24980 + s = Series(input_array).to_string(index=False) + assert s == expected + + +@pytest.mark.parametrize( + "input_array, expected", + [ + ({"A": ["a"]}, "A\na"), + ({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"), + ({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"), + ], +) +def test_format_remove_leading_space_dataframe(input_array, expected): + # GH: 24980 + df = DataFrame(input_array).to_string(index=False) + assert df == expected + + +@pytest.mark.parametrize( + "max_cols, max_rows, expected", + [ + ( + 10, + None, + " 0 1 2 3 4 ... 6 7 8 9 10\n" + " 0 0 0 0 0 ... 0 0 0 0 0\n" + " 0 0 0 0 0 ... 0 0 0 0 0\n" + " 0 0 0 0 0 ... 0 0 0 0 0\n" + " 0 0 0 0 0 ... 0 0 0 0 0", + ), + ( + None, + 2, + " 0 1 2 3 4 5 6 7 8 9 10\n" + " 0 0 0 0 0 0 0 0 0 0 0\n" + " .. .. .. .. .. .. .. .. .. .. ..\n" + " 0 0 0 0 0 0 0 0 0 0 0", + ), + ( + 10, + 2, + " 0 1 2 3 4 ... 6 7 8 9 10\n" + " 0 0 0 0 0 ... 0 0 0 0 0\n" + " .. .. .. .. .. ... .. .. .. .. ..\n" + " 0 0 0 0 0 ... 0 0 0 0 0", + ), + ( + 9, + 2, + " 0 1 2 3 ... 7 8 9 10\n" + " 0 0 0 0 ... 0 0 0 0\n" + " .. .. .. .. ... .. .. .. ..\n" + " 0 0 0 0 ... 0 0 0 0", + ), + ( + 1, + 1, + " 0 ...\n 0 ...\n.. ...", + ), + ], +) +def test_truncation_no_index(max_cols, max_rows, expected): + df = DataFrame([[0] * 11] * 4) + assert df.to_string(index=False, max_cols=max_cols, max_rows=max_rows) == expected + + +def test_to_string_unicode_columns(float_frame): + df = DataFrame({"\u03c3": np.arange(10.0)}) + + buf = StringIO() + df.to_string(buf=buf) + buf.getvalue() + + buf = StringIO() + df.info(buf=buf) + buf.getvalue() + + result = float_frame.to_string() + assert isinstance(result, str) + + +def test_to_string_utf8_columns(): + n = "\u05d0".encode() + + with option_context("display.max_rows", 1): + df = DataFrame([1, 2], columns=[n]) + repr(df) + + +def test_to_string_unicode_two(): + dm = DataFrame({"c/\u03c3": []}) + buf = StringIO() + dm.to_string(buf) + + +def test_to_string_unicode_three(): + dm = DataFrame(["\xc2"]) + buf = StringIO() + dm.to_string(buf) + + +def test_to_string_with_formatters(): + df = DataFrame( + { + "int": [1, 2, 3], + "float": [1.0, 2.0, 3.0], + "object": [(1, 2), True, False], + }, + columns=["int", "float", "object"], + ) + + formatters = [ + ("int", lambda x: f"0x{x:x}"), + ("float", lambda x: f"[{x: 4.1f}]"), + ("object", lambda x: f"-{x!s}-"), + ] + result = df.to_string(formatters=dict(formatters)) + result2 = df.to_string(formatters=list(zip(*formatters))[1]) + assert result == ( + " int float object\n" + "0 0x1 [ 1.0] -(1, 2)-\n" + "1 0x2 [ 2.0] -True-\n" + "2 0x3 [ 3.0] -False-" + ) + assert result == result2 + + +def test_to_string_with_datetime64_monthformatter(): + months = [datetime(2016, 1, 1), datetime(2016, 2, 2)] + x = DataFrame({"months": months}) + + def format_func(x): + return x.strftime("%Y-%m") + + result = x.to_string(formatters={"months": format_func}) + expected = dedent( + """\ + months + 0 2016-01 + 1 2016-02""" + ) + assert result.strip() == expected + + +def test_to_string_with_datetime64_hourformatter(): + x = DataFrame( + {"hod": to_datetime(["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f")} + ) + + def format_func(x): + return x.strftime("%H:%M") + + result = x.to_string(formatters={"hod": format_func}) + expected = dedent( + """\ + hod + 0 10:10 + 1 12:12""" + ) + assert result.strip() == expected + + +def test_to_string_with_formatters_unicode(): + df = DataFrame({"c/\u03c3": [1, 2, 3]}) + result = df.to_string(formatters={"c/\u03c3": str}) + expected = dedent( + """\ + c/\u03c3 + 0 1 + 1 2 + 2 3""" + ) + assert result == expected + + +def test_to_string_complex_number_trims_zeros(): + s = Series([1.000000 + 1.000000j, 1.0 + 1.0j, 1.05 + 1.0j]) + result = s.to_string() + expected = dedent( + """\ + 0 1.00+1.00j + 1 1.00+1.00j + 2 1.05+1.00j""" + ) + assert result == expected + + +def test_nullable_float_to_string(float_ea_dtype): + # https://github.com/pandas-dev/pandas/issues/36775 + dtype = float_ea_dtype + s = Series([0.0, 1.0, None], dtype=dtype) + result = s.to_string() + expected = dedent( + """\ + 0 0.0 + 1 1.0 + 2 """ + ) + assert result == expected + + +def test_nullable_int_to_string(any_int_ea_dtype): + # https://github.com/pandas-dev/pandas/issues/36775 + dtype = any_int_ea_dtype + s = Series([0, 1, None], dtype=dtype) + result = s.to_string() + expected = dedent( + """\ + 0 0 + 1 1 + 2 """ + ) + assert result == expected + + +@pytest.mark.parametrize("na_rep", ["NaN", "Ted"]) +def test_to_string_na_rep_and_float_format(na_rep): + # GH 13828 + df = DataFrame([["A", 1.2225], ["A", None]], columns=["Group", "Data"]) + result = df.to_string(na_rep=na_rep, float_format="{:.2f}".format) + expected = dedent( + f"""\ + Group Data + 0 A 1.22 + 1 A {na_rep}""" + ) + assert result == expected + + +@pytest.mark.parametrize( + "data,expected", + [ + ( + {"col1": [1, 2], "col2": [3, 4]}, + " col1 col2\n0 1 3\n1 2 4", + ), + ( + {"col1": ["Abc", 0.756], "col2": [np.nan, 4.5435]}, + " col1 col2\n0 Abc NaN\n1 0.756 4.5435", + ), + ( + {"col1": [np.nan, "a"], "col2": [0.009, 3.543], "col3": ["Abc", 23]}, + " col1 col2 col3\n0 NaN 0.009 Abc\n1 a 3.543 23", + ), + ], +) +def test_to_string_max_rows_zero(data, expected): + # GH35394 + result = DataFrame(data=data).to_string(max_rows=0) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/generate_legacy_storage_files.py b/.local/lib/python3.8/site-packages/pandas/tests/io/generate_legacy_storage_files.py new file mode 100644 index 0000000..8f03655 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/generate_legacy_storage_files.py @@ -0,0 +1,345 @@ +""" +self-contained to write legacy storage pickle files + +To use this script. Create an environment where you want +generate pickles, say its for 0.20.3, with your pandas clone +in ~/pandas + +. activate pandas_0.20.3 +cd ~/pandas/pandas + +$ python -m tests.io.generate_legacy_storage_files \ + tests/io/data/legacy_pickle/0.20.3/ pickle + +This script generates a storage file for the current arch, system, +and python version + pandas version: 0.20.3 + output dir : pandas/pandas/tests/io/data/legacy_pickle/0.20.3/ + storage format: pickle +created pickle file: 0.20.3_x86_64_darwin_3.5.2.pickle + +The idea here is you are using the *current* version of the +generate_legacy_storage_files with an *older* version of pandas to +generate a pickle file. We will then check this file into a current +branch, and test using test_pickle.py. This will load the *older* +pickles and test versus the current data that is generated +(with main). These are then compared. + +If we have cases where we changed the signature (e.g. we renamed +offset -> freq in Timestamp). Then we have to conditionally execute +in the generate_legacy_storage_files.py to make it +run under the older AND the newer version. + +""" + +from datetime import timedelta +import os +import pickle +import platform as pl +import sys + +import numpy as np + +import pandas +from pandas import ( + Categorical, + DataFrame, + Index, + MultiIndex, + NaT, + Period, + RangeIndex, + Series, + Timestamp, + bdate_range, + date_range, + interval_range, + period_range, + timedelta_range, +) +from pandas.arrays import SparseArray + +from pandas.tseries.offsets import ( + FY5253, + BusinessDay, + BusinessHour, + CustomBusinessDay, + DateOffset, + Day, + Easter, + Hour, + LastWeekOfMonth, + Minute, + MonthBegin, + MonthEnd, + QuarterBegin, + QuarterEnd, + SemiMonthBegin, + SemiMonthEnd, + Week, + WeekOfMonth, + YearBegin, + YearEnd, +) + + +def _create_sp_series(): + nan = np.nan + + # nan-based + arr = np.arange(15, dtype=np.float64) + arr[7:12] = nan + arr[-1:] = nan + + bseries = Series(SparseArray(arr, kind="block")) + bseries.name = "bseries" + return bseries + + +def _create_sp_tsseries(): + nan = np.nan + + # nan-based + arr = np.arange(15, dtype=np.float64) + arr[7:12] = nan + arr[-1:] = nan + + date_index = bdate_range("1/1/2011", periods=len(arr)) + bseries = Series(SparseArray(arr, kind="block"), index=date_index) + bseries.name = "btsseries" + return bseries + + +def _create_sp_frame(): + nan = np.nan + + data = { + "A": [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], + "B": [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], + "C": np.arange(10).astype(np.int64), + "D": [0, 1, 2, 3, 4, 5, nan, nan, nan, nan], + } + + dates = bdate_range("1/1/2011", periods=10) + return DataFrame(data, index=dates).apply(SparseArray) + + +def create_data(): + """create the pickle data""" + data = { + "A": [0.0, 1.0, 2.0, 3.0, np.nan], + "B": [0, 1, 0, 1, 0], + "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], + "D": date_range("1/1/2009", periods=5), + "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], + } + + scalars = {"timestamp": Timestamp("20130101"), "period": Period("2012", "M")} + + index = { + "int": Index(np.arange(10)), + "date": date_range("20130101", periods=10), + "period": period_range("2013-01-01", freq="M", periods=10), + "float": Index(np.arange(10, dtype=np.float64)), + "uint": Index(np.arange(10, dtype=np.uint64)), + "timedelta": timedelta_range("00:00:00", freq="30T", periods=10), + } + + index["range"] = RangeIndex(10) + + index["interval"] = interval_range(0, periods=10) + + mi = { + "reg2": MultiIndex.from_tuples( + tuple( + zip( + *[ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + ) + ), + names=["first", "second"], + ) + } + + series = { + "float": Series(data["A"]), + "int": Series(data["B"]), + "mixed": Series(data["E"]), + "ts": Series( + np.arange(10).astype(np.int64), index=date_range("20130101", periods=10) + ), + "mi": Series( + np.arange(5).astype(np.float64), + index=MultiIndex.from_tuples( + tuple(zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"] + ), + ), + "dup": Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), + "cat": Series(Categorical(["foo", "bar", "baz"])), + "dt": Series(date_range("20130101", periods=5)), + "dt_tz": Series(date_range("20130101", periods=5, tz="US/Eastern")), + "period": Series([Period("2000Q1")] * 5), + } + + mixed_dup_df = DataFrame(data) + mixed_dup_df.columns = list("ABCDA") + frame = { + "float": DataFrame({"A": series["float"], "B": series["float"] + 1}), + "int": DataFrame({"A": series["int"], "B": series["int"] + 1}), + "mixed": DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), + "mi": DataFrame( + {"A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64)}, + index=MultiIndex.from_tuples( + tuple( + zip( + *[ + ["bar", "bar", "baz", "baz", "baz"], + ["one", "two", "one", "two", "three"], + ] + ) + ), + names=["first", "second"], + ), + ), + "dup": DataFrame( + np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"] + ), + "cat_onecol": DataFrame({"A": Categorical(["foo", "bar"])}), + "cat_and_float": DataFrame( + { + "A": Categorical(["foo", "bar", "baz"]), + "B": np.arange(3).astype(np.int64), + } + ), + "mixed_dup": mixed_dup_df, + "dt_mixed_tzs": DataFrame( + { + "A": Timestamp("20130102", tz="US/Eastern"), + "B": Timestamp("20130603", tz="CET"), + }, + index=range(5), + ), + "dt_mixed2_tzs": DataFrame( + { + "A": Timestamp("20130102", tz="US/Eastern"), + "B": Timestamp("20130603", tz="CET"), + "C": Timestamp("20130603", tz="UTC"), + }, + index=range(5), + ), + } + + cat = { + "int8": Categorical(list("abcdefg")), + "int16": Categorical(np.arange(1000)), + "int32": Categorical(np.arange(10000)), + } + + timestamp = { + "normal": Timestamp("2011-01-01"), + "nat": NaT, + "tz": Timestamp("2011-01-01", tz="US/Eastern"), + } + + timestamp["freq"] = Timestamp("2011-01-01", freq="D") + timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") + + off = { + "DateOffset": DateOffset(years=1), + "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), + "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), + "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), + "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), + "SemiMonthBegin": SemiMonthBegin(day_of_month=9), + "SemiMonthEnd": SemiMonthEnd(day_of_month=24), + "MonthBegin": MonthBegin(1), + "MonthEnd": MonthEnd(1), + "QuarterBegin": QuarterBegin(1), + "QuarterEnd": QuarterEnd(1), + "Day": Day(1), + "YearBegin": YearBegin(1), + "YearEnd": YearEnd(1), + "Week": Week(1), + "Week_Tues": Week(2, normalize=False, weekday=1), + "WeekOfMonth": WeekOfMonth(week=3, weekday=4), + "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), + "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), + "Easter": Easter(), + "Hour": Hour(1), + "Minute": Minute(1), + } + + return { + "series": series, + "frame": frame, + "index": index, + "scalars": scalars, + "mi": mi, + "sp_series": {"float": _create_sp_series(), "ts": _create_sp_tsseries()}, + "sp_frame": {"float": _create_sp_frame()}, + "cat": cat, + "timestamp": timestamp, + "offsets": off, + } + + +def create_pickle_data(): + data = create_data() + + return data + + +def platform_name(): + return "_".join( + [ + str(pandas.__version__), + str(pl.machine()), + str(pl.system().lower()), + str(pl.python_version()), + ] + ) + + +def write_legacy_pickles(output_dir): + + version = pandas.__version__ + + print( + "This script generates a storage file for the current arch, system, " + "and python version" + ) + print(f" pandas version: {version}") + print(f" output dir : {output_dir}") + print(" storage format: pickle") + + pth = f"{platform_name()}.pickle" + + with open(os.path.join(output_dir, pth), "wb") as fh: + pickle.dump(create_pickle_data(), fh, pickle.DEFAULT_PROTOCOL) + + print(f"created pickle file: {pth}") + + +def write_legacy_file(): + # force our cwd to be the first searched + sys.path.insert(0, ".") + + if not (3 <= len(sys.argv) <= 4): + exit( + "Specify output directory and storage type: generate_legacy_" + "storage_files.py " + ) + + output_dir = str(sys.argv[1]) + storage_type = str(sys.argv[2]) + + if storage_type == "pickle": + write_legacy_pickles(output_dir=output_dir) + else: + exit("storage_type must be one of {'pickle'}") + + +if __name__ == "__main__": + write_legacy_file() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..63b04d2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..31e00ea Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_compression.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_compression.cpython-38.pyc new file mode 100644 index 0000000..0aa70bc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_compression.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_deprecated_kwargs.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_deprecated_kwargs.cpython-38.pyc new file mode 100644 index 0000000..3fff26c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_deprecated_kwargs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_json_table_schema.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_json_table_schema.cpython-38.pyc new file mode 100644 index 0000000..c89ce9a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_json_table_schema.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_json_table_schema_ext_dtype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_json_table_schema_ext_dtype.cpython-38.pyc new file mode 100644 index 0000000..3bcb811 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_json_table_schema_ext_dtype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_normalize.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_normalize.cpython-38.pyc new file mode 100644 index 0000000..57ff946 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_normalize.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_pandas.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_pandas.cpython-38.pyc new file mode 100644 index 0000000..6d9a32a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_pandas.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_readlines.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_readlines.cpython-38.pyc new file mode 100644 index 0000000..9b166eb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_readlines.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_ujson.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_ujson.cpython-38.pyc new file mode 100644 index 0000000..6d2c05e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/json/__pycache__/test_ujson.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/io/json/conftest.py new file mode 100644 index 0000000..4e848cd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/json/conftest.py @@ -0,0 +1,9 @@ +import pytest + + +@pytest.fixture(params=["split", "records", "index", "columns", "values"]) +def orient(request): + """ + Fixture for orients excluding the table format. + """ + return request.param diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_compression.py b/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_compression.py new file mode 100644 index 0000000..a752c93 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_compression.py @@ -0,0 +1,122 @@ +from io import BytesIO + +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm + +import pandas.io.common as icom + + +def test_compression_roundtrip(compression): + df = pd.DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + + with tm.ensure_clean() as path: + df.to_json(path, compression=compression) + tm.assert_frame_equal(df, pd.read_json(path, compression=compression)) + + # explicitly ensure file was compressed. + with tm.decompress_file(path, compression) as fh: + result = fh.read().decode("utf8") + tm.assert_frame_equal(df, pd.read_json(result)) + + +def test_read_zipped_json(datapath): + uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json") + uncompressed_df = pd.read_json(uncompressed_path) + + compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip") + compressed_df = pd.read_json(compressed_path, compression="zip") + + tm.assert_frame_equal(uncompressed_df, compressed_df) + + +@td.skip_if_not_us_locale +def test_with_s3_url(compression, s3_resource, s3so): + # Bucket "pandas-test" created in tests/io/conftest.py + + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + + with tm.ensure_clean() as path: + df.to_json(path, compression=compression) + with open(path, "rb") as f: + s3_resource.Bucket("pandas-test").put_object(Key="test-1", Body=f) + + roundtripped_df = pd.read_json( + "s3://pandas-test/test-1", compression=compression, storage_options=s3so + ) + tm.assert_frame_equal(df, roundtripped_df) + + +def test_lines_with_compression(compression): + + with tm.ensure_clean() as path: + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + df.to_json(path, orient="records", lines=True, compression=compression) + roundtripped_df = pd.read_json(path, lines=True, compression=compression) + tm.assert_frame_equal(df, roundtripped_df) + + +def test_chunksize_with_compression(compression): + + with tm.ensure_clean() as path: + df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}') + df.to_json(path, orient="records", lines=True, compression=compression) + + with pd.read_json( + path, lines=True, chunksize=1, compression=compression + ) as res: + roundtripped_df = pd.concat(res) + tm.assert_frame_equal(df, roundtripped_df) + + +def test_write_unsupported_compression_type(): + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + with tm.ensure_clean() as path: + msg = "Unrecognized compression type: unsupported" + with pytest.raises(ValueError, match=msg): + df.to_json(path, compression="unsupported") + + +def test_read_unsupported_compression_type(): + with tm.ensure_clean() as path: + msg = "Unrecognized compression type: unsupported" + with pytest.raises(ValueError, match=msg): + pd.read_json(path, compression="unsupported") + + +@pytest.mark.parametrize("to_infer", [True, False]) +@pytest.mark.parametrize("read_infer", [True, False]) +def test_to_json_compression(compression_only, read_infer, to_infer): + # see gh-15008 + compression = compression_only + + # We'll complete file extension subsequently. + filename = "test." + filename += icom._compression_to_extension[compression] + + df = pd.DataFrame({"A": [1]}) + + to_compression = "infer" if to_infer else compression + read_compression = "infer" if read_infer else compression + + with tm.ensure_clean(filename) as path: + df.to_json(path, compression=to_compression) + result = pd.read_json(path, compression=read_compression) + tm.assert_frame_equal(result, df) + + +def test_to_json_compression_mode(compression): + # GH 39985 (read_json does not support user-provided binary files) + expected = pd.DataFrame({"A": [1]}) + + with BytesIO() as buffer: + expected.to_json(buffer, compression=compression) + # df = pd.read_json(buffer, compression=compression) + # tm.assert_frame_equal(expected, df) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_deprecated_kwargs.py b/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_deprecated_kwargs.py new file mode 100644 index 0000000..79245bc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_deprecated_kwargs.py @@ -0,0 +1,31 @@ +""" +Tests for the deprecated keyword arguments for `read_json`. +""" + +import pandas as pd +import pandas._testing as tm + +from pandas.io.json import read_json + + +def test_deprecated_kwargs(): + df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2]) + buf = df.to_json(orient="split") + with tm.assert_produces_warning(FutureWarning): + tm.assert_frame_equal(df, read_json(buf, "split")) + buf = df.to_json(orient="columns") + with tm.assert_produces_warning(FutureWarning): + tm.assert_frame_equal(df, read_json(buf, "columns")) + buf = df.to_json(orient="index") + with tm.assert_produces_warning(FutureWarning): + tm.assert_frame_equal(df, read_json(buf, "index")) + + +def test_good_kwargs(): + df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2]) + with tm.assert_produces_warning(None): + tm.assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split")) + tm.assert_frame_equal( + df, read_json(df.to_json(orient="columns"), orient="columns") + ) + tm.assert_frame_equal(df, read_json(df.to_json(orient="index"), orient="index")) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_json_table_schema.py b/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_json_table_schema.py new file mode 100644 index 0000000..f7aade1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_json_table_schema.py @@ -0,0 +1,813 @@ +"""Tests for Table Schema integration.""" +from collections import OrderedDict +import json + +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + PeriodDtype, +) + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.json._table_schema import ( + as_json_table_type, + build_table_schema, + convert_json_field_to_pandas_type, + convert_pandas_type_to_json_field, + set_default_names, +) + + +class TestBuildSchema: + def setup_method(self, method): + self.df = DataFrame( + { + "A": [1, 2, 3, 4], + "B": ["a", "b", "c", "c"], + "C": pd.date_range("2016-01-01", freq="d", periods=4), + "D": pd.timedelta_range("1H", periods=4, freq="T"), + }, + index=pd.Index(range(4), name="idx"), + ) + + def test_build_table_schema(self): + result = build_table_schema(self.df, version=False) + expected = { + "fields": [ + {"name": "idx", "type": "integer"}, + {"name": "A", "type": "integer"}, + {"name": "B", "type": "string"}, + {"name": "C", "type": "datetime"}, + {"name": "D", "type": "duration"}, + ], + "primaryKey": ["idx"], + } + assert result == expected + result = build_table_schema(self.df) + assert "pandas_version" in result + + def test_series(self): + s = pd.Series([1, 2, 3], name="foo") + result = build_table_schema(s, version=False) + expected = { + "fields": [ + {"name": "index", "type": "integer"}, + {"name": "foo", "type": "integer"}, + ], + "primaryKey": ["index"], + } + assert result == expected + result = build_table_schema(s) + assert "pandas_version" in result + + def test_series_unnamed(self): + result = build_table_schema(pd.Series([1, 2, 3]), version=False) + expected = { + "fields": [ + {"name": "index", "type": "integer"}, + {"name": "values", "type": "integer"}, + ], + "primaryKey": ["index"], + } + assert result == expected + + def test_multiindex(self): + df = self.df.copy() + idx = pd.MultiIndex.from_product([("a", "b"), (1, 2)]) + df.index = idx + + result = build_table_schema(df, version=False) + expected = { + "fields": [ + {"name": "level_0", "type": "string"}, + {"name": "level_1", "type": "integer"}, + {"name": "A", "type": "integer"}, + {"name": "B", "type": "string"}, + {"name": "C", "type": "datetime"}, + {"name": "D", "type": "duration"}, + ], + "primaryKey": ["level_0", "level_1"], + } + assert result == expected + + df.index.names = ["idx0", None] + expected["fields"][0]["name"] = "idx0" + expected["primaryKey"] = ["idx0", "level_1"] + result = build_table_schema(df, version=False) + assert result == expected + + +class TestTableSchemaType: + @pytest.mark.parametrize("int_type", [int, np.int16, np.int32, np.int64]) + def test_as_json_table_type_int_data(self, int_type): + int_data = [1, 2, 3] + assert as_json_table_type(np.array(int_data, dtype=int_type).dtype) == "integer" + + @pytest.mark.parametrize("float_type", [float, np.float16, np.float32, np.float64]) + def test_as_json_table_type_float_data(self, float_type): + float_data = [1.0, 2.0, 3.0] + assert ( + as_json_table_type(np.array(float_data, dtype=float_type).dtype) == "number" + ) + + @pytest.mark.parametrize("bool_type", [bool, np.bool_]) + def test_as_json_table_type_bool_data(self, bool_type): + bool_data = [True, False] + assert ( + as_json_table_type(np.array(bool_data, dtype=bool_type).dtype) == "boolean" + ) + + @pytest.mark.parametrize( + "date_data", + [ + pd.to_datetime(["2016"]), + pd.to_datetime(["2016"], utc=True), + pd.Series(pd.to_datetime(["2016"])), + pd.Series(pd.to_datetime(["2016"], utc=True)), + pd.period_range("2016", freq="A", periods=3), + ], + ) + def test_as_json_table_type_date_data(self, date_data): + assert as_json_table_type(date_data.dtype) == "datetime" + + @pytest.mark.parametrize("str_data", [pd.Series(["a", "b"]), pd.Index(["a", "b"])]) + def test_as_json_table_type_string_data(self, str_data): + assert as_json_table_type(str_data.dtype) == "string" + + @pytest.mark.parametrize( + "cat_data", + [ + pd.Categorical(["a"]), + pd.Categorical([1]), + pd.Series(pd.Categorical([1])), + pd.CategoricalIndex([1]), + pd.Categorical([1]), + ], + ) + def test_as_json_table_type_categorical_data(self, cat_data): + assert as_json_table_type(cat_data.dtype) == "any" + + # ------ + # dtypes + # ------ + @pytest.mark.parametrize("int_dtype", [int, np.int16, np.int32, np.int64]) + def test_as_json_table_type_int_dtypes(self, int_dtype): + assert as_json_table_type(int_dtype) == "integer" + + @pytest.mark.parametrize("float_dtype", [float, np.float16, np.float32, np.float64]) + def test_as_json_table_type_float_dtypes(self, float_dtype): + assert as_json_table_type(float_dtype) == "number" + + @pytest.mark.parametrize("bool_dtype", [bool, np.bool_]) + def test_as_json_table_type_bool_dtypes(self, bool_dtype): + assert as_json_table_type(bool_dtype) == "boolean" + + @pytest.mark.parametrize( + "date_dtype", + [ + np.datetime64, + np.dtype(" str: + return self.hexed + + hexed = "574b4454ba8c5eb4f98a8f45" + binthing = BinaryThing(hexed) + + # verify the proper conversion of printable content + df_printable = DataFrame({"A": [binthing.hexed]}) + assert df_printable.to_json() == f'{{"A":{{"0":"{hexed}"}}}}' + + # check if non-printable content throws appropriate Exception + df_nonprintable = DataFrame({"A": [binthing]}) + msg = "Unsupported UTF-8 sequence length when encoding string" + with pytest.raises(OverflowError, match=msg): + df_nonprintable.to_json() + + # the same with multiple columns threw segfaults + df_mixed = DataFrame({"A": [binthing], "B": [1]}, columns=["A", "B"]) + with pytest.raises(OverflowError, match=msg): + df_mixed.to_json() + + # default_handler should resolve exceptions for non-string types + result = df_nonprintable.to_json(default_handler=str) + expected = f'{{"A":{{"0":"{hexed}"}}}}' + assert result == expected + assert ( + df_mixed.to_json(default_handler=str) + == f'{{"A":{{"0":"{hexed}"}},"B":{{"0":1}}}}' + ) + + def test_label_overflow(self): + # GH14256: buffer length not checked when writing label + result = DataFrame({"bar" * 100000: [1], "foo": [1337]}).to_json() + expected = f'{{"{"bar" * 100000}":{{"0":1}},"foo":{{"0":1337}}}}' + assert result == expected + + def test_series_non_unique_index(self): + s = Series(["a", "b"], index=[1, 1]) + + msg = "Series index must be unique for orient='index'" + with pytest.raises(ValueError, match=msg): + s.to_json(orient="index") + + tm.assert_series_equal( + s, read_json(s.to_json(orient="split"), orient="split", typ="series") + ) + unserialized = read_json( + s.to_json(orient="records"), orient="records", typ="series" + ) + tm.assert_numpy_array_equal(s.values, unserialized.values) + + def test_series_default_orient(self, string_series): + assert string_series.to_json() == string_series.to_json(orient="index") + + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_simple(self, orient, numpy, string_series): + data = string_series.to_json(orient=orient) + result = read_json(data, typ="series", orient=orient, numpy=numpy) + + expected = string_series + if orient in ("values", "records"): + expected = expected.reset_index(drop=True) + if orient != "split": + expected.name = None + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [False, None]) + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_object(self, orient, numpy, dtype, object_series): + data = object_series.to_json(orient=orient) + result = read_json(data, typ="series", orient=orient, numpy=numpy, dtype=dtype) + + expected = object_series + if orient in ("values", "records"): + expected = expected.reset_index(drop=True) + if orient != "split": + expected.name = None + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_empty(self, orient, numpy): + empty_series = Series([], index=[], dtype=np.float64) + data = empty_series.to_json(orient=orient) + result = read_json(data, typ="series", orient=orient, numpy=numpy) + + expected = empty_series + if orient in ("values", "records"): + expected = expected.reset_index(drop=True) + else: + expected.index = expected.index.astype(float) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_timeseries(self, orient, numpy, datetime_series): + data = datetime_series.to_json(orient=orient) + result = read_json(data, typ="series", orient=orient, numpy=numpy) + + expected = datetime_series + if orient in ("values", "records"): + expected = expected.reset_index(drop=True) + if orient != "split": + expected.name = None + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [np.float64, int]) + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_numeric(self, orient, numpy, dtype): + s = Series(range(6), index=["a", "b", "c", "d", "e", "f"]) + data = s.to_json(orient=orient) + result = read_json(data, typ="series", orient=orient, numpy=numpy) + + expected = s.copy() + if orient in ("values", "records"): + expected = expected.reset_index(drop=True) + + tm.assert_series_equal(result, expected) + + def test_series_to_json_except(self): + s = Series([1, 2, 3]) + msg = "Invalid value 'garbage' for option 'orient'" + with pytest.raises(ValueError, match=msg): + s.to_json(orient="garbage") + + def test_series_from_json_precise_float(self): + s = Series([4.56, 4.56, 4.56]) + result = read_json(s.to_json(), typ="series", precise_float=True) + tm.assert_series_equal(result, s, check_index_type=False) + + def test_series_with_dtype(self): + # GH 21986 + s = Series([4.56, 4.56, 4.56]) + result = read_json(s.to_json(), typ="series", dtype=np.int64) + expected = Series([4] * 3) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype,expected", + [ + (True, Series(["2000-01-01"], dtype="datetime64[ns]")), + (False, Series([946684800000])), + ], + ) + def test_series_with_dtype_datetime(self, dtype, expected): + s = Series(["2000-01-01"], dtype="datetime64[ns]") + data = s.to_json() + result = read_json(data, typ="series", dtype=dtype) + tm.assert_series_equal(result, expected) + + def test_frame_from_json_precise_float(self): + df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]]) + result = read_json(df.to_json(), precise_float=True) + tm.assert_frame_equal(result, df) + + def test_typ(self): + + s = Series(range(6), index=["a", "b", "c", "d", "e", "f"], dtype="int64") + result = read_json(s.to_json(), typ=None) + tm.assert_series_equal(result, s) + + def test_reconstruction_index(self): + + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + result = read_json(df.to_json()) + + tm.assert_frame_equal(result, df) + + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"]) + result = read_json(df.to_json()) + tm.assert_frame_equal(result, df) + + def test_path(self, float_frame, int_frame, datetime_frame): + with tm.ensure_clean("test.json") as path: + for df in [float_frame, int_frame, datetime_frame]: + df.to_json(path) + read_json(path) + + def test_axis_dates(self, datetime_series, datetime_frame): + + # frame + json = datetime_frame.to_json() + result = read_json(json) + tm.assert_frame_equal(result, datetime_frame) + + # series + json = datetime_series.to_json() + result = read_json(json, typ="series") + tm.assert_series_equal(result, datetime_series, check_names=False) + assert result.name is None + + def test_convert_dates(self, datetime_series, datetime_frame): + + # frame + df = datetime_frame + df["date"] = Timestamp("20130101") + + json = df.to_json() + result = read_json(json) + tm.assert_frame_equal(result, df) + + df["foo"] = 1.0 + json = df.to_json(date_unit="ns") + + result = read_json(json, convert_dates=False) + expected = df.copy() + expected["date"] = expected["date"].values.view("i8") + expected["foo"] = expected["foo"].astype("int64") + tm.assert_frame_equal(result, expected) + + # series + ts = Series(Timestamp("20130101"), index=datetime_series.index) + json = ts.to_json() + result = read_json(json, typ="series") + tm.assert_series_equal(result, ts) + + @pytest.mark.parametrize("date_format", ["epoch", "iso"]) + @pytest.mark.parametrize("as_object", [True, False]) + @pytest.mark.parametrize("date_typ", [datetime.date, datetime.datetime, Timestamp]) + def test_date_index_and_values(self, date_format, as_object, date_typ): + data = [date_typ(year=2020, month=1, day=1), pd.NaT] + if as_object: + data.append("a") + + ser = Series(data, index=data) + result = ser.to_json(date_format=date_format) + + if date_format == "epoch": + expected = '{"1577836800000":1577836800000,"null":null}' + else: + expected = ( + '{"2020-01-01T00:00:00.000Z":"2020-01-01T00:00:00.000Z","null":null}' + ) + + if as_object: + expected = expected.replace("}", ',"a":"a"}') + + assert result == expected + + @pytest.mark.parametrize( + "infer_word", + [ + "trade_time", + "date", + "datetime", + "sold_at", + "modified", + "timestamp", + "timestamps", + ], + ) + def test_convert_dates_infer(self, infer_word): + # GH10747 + from pandas.io.json import dumps + + data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}] + expected = DataFrame( + [[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word] + ) + result = read_json(dumps(data))[["id", infer_word]] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "date,date_unit", + [ + ("20130101 20:43:42.123", None), + ("20130101 20:43:42", "s"), + ("20130101 20:43:42.123", "ms"), + ("20130101 20:43:42.123456", "us"), + ("20130101 20:43:42.123456789", "ns"), + ], + ) + def test_date_format_frame(self, date, date_unit, datetime_frame): + df = datetime_frame + + df["date"] = Timestamp(date) + df.iloc[1, df.columns.get_loc("date")] = pd.NaT + df.iloc[5, df.columns.get_loc("date")] = pd.NaT + if date_unit: + json = df.to_json(date_format="iso", date_unit=date_unit) + else: + json = df.to_json(date_format="iso") + result = read_json(json) + expected = df.copy() + expected.index = expected.index.tz_localize("UTC") + expected["date"] = expected["date"].dt.tz_localize("UTC") + tm.assert_frame_equal(result, expected) + + def test_date_format_frame_raises(self, datetime_frame): + df = datetime_frame + msg = "Invalid value 'foo' for option 'date_unit'" + with pytest.raises(ValueError, match=msg): + df.to_json(date_format="iso", date_unit="foo") + + @pytest.mark.parametrize( + "date,date_unit", + [ + ("20130101 20:43:42.123", None), + ("20130101 20:43:42", "s"), + ("20130101 20:43:42.123", "ms"), + ("20130101 20:43:42.123456", "us"), + ("20130101 20:43:42.123456789", "ns"), + ], + ) + def test_date_format_series(self, date, date_unit, datetime_series): + ts = Series(Timestamp(date), index=datetime_series.index) + ts.iloc[1] = pd.NaT + ts.iloc[5] = pd.NaT + if date_unit: + json = ts.to_json(date_format="iso", date_unit=date_unit) + else: + json = ts.to_json(date_format="iso") + result = read_json(json, typ="series") + expected = ts.copy() + expected.index = expected.index.tz_localize("UTC") + expected = expected.dt.tz_localize("UTC") + tm.assert_series_equal(result, expected) + + def test_date_format_series_raises(self, datetime_series): + ts = Series(Timestamp("20130101 20:43:42.123"), index=datetime_series.index) + msg = "Invalid value 'foo' for option 'date_unit'" + with pytest.raises(ValueError, match=msg): + ts.to_json(date_format="iso", date_unit="foo") + + @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) + def test_date_unit(self, unit, datetime_frame): + df = datetime_frame + df["date"] = Timestamp("20130101 20:43:42") + dl = df.columns.get_loc("date") + df.iloc[1, dl] = Timestamp("19710101 20:43:42") + df.iloc[2, dl] = Timestamp("21460101 20:43:42") + df.iloc[4, dl] = pd.NaT + + json = df.to_json(date_format="epoch", date_unit=unit) + + # force date unit + result = read_json(json, date_unit=unit) + tm.assert_frame_equal(result, df) + + # detect date unit + result = read_json(json, date_unit=None) + tm.assert_frame_equal(result, df) + + def test_weird_nested_json(self): + # this used to core dump the parser + s = r"""{ + "status": "success", + "data": { + "posts": [ + { + "id": 1, + "title": "A blog post", + "body": "Some useful content" + }, + { + "id": 2, + "title": "Another blog post", + "body": "More content" + } + ] + } + }""" + + read_json(s) + + def test_doc_example(self): + dfj2 = DataFrame(np.random.randn(5, 2), columns=list("AB")) + dfj2["date"] = Timestamp("20130101") + dfj2["ints"] = range(5) + dfj2["bools"] = True + dfj2.index = pd.date_range("20130101", periods=5) + + json = dfj2.to_json() + result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_}) + tm.assert_frame_equal(result, result) + + def test_misc_example(self): + + # parsing unordered input fails + result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True) + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + + error_msg = """DataFrame\\.index are different + +DataFrame\\.index values are different \\(100\\.0 %\\) +\\[left\\]: Index\\(\\['a', 'b'\\], dtype='object'\\) +\\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)""" + with pytest.raises(AssertionError, match=error_msg): + tm.assert_frame_equal(result, expected, check_index_type=False) + + result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]') + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + def test_round_trip_exception_(self, datapath): + # GH 3867 + path = datapath("io", "json", "data", "teams.csv") + df = pd.read_csv(path) + s = df.to_json() + result = read_json(s) + tm.assert_frame_equal(result.reindex(index=df.index, columns=df.columns), df) + + @pytest.mark.network + @tm.network + @pytest.mark.parametrize( + "field,dtype", + [ + ["created_at", pd.DatetimeTZDtype(tz="UTC")], + ["closed_at", "datetime64[ns]"], + ["updated_at", pd.DatetimeTZDtype(tz="UTC")], + ], + ) + def test_url(self, field, dtype): + url = "https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5" + result = read_json(url, convert_dates=True) + assert result[field].dtype == dtype + + def test_timedelta(self): + converter = lambda x: pd.to_timedelta(x, unit="ms") + + s = Series([timedelta(23), timedelta(seconds=5)]) + assert s.dtype == "timedelta64[ns]" + + result = read_json(s.to_json(), typ="series").apply(converter) + tm.assert_series_equal(result, s) + + s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1])) + assert s.dtype == "timedelta64[ns]" + result = read_json(s.to_json(), typ="series").apply(converter) + tm.assert_series_equal(result, s) + + frame = DataFrame([timedelta(23), timedelta(seconds=5)]) + assert frame[0].dtype == "timedelta64[ns]" + tm.assert_frame_equal(frame, read_json(frame.to_json()).apply(converter)) + + frame = DataFrame( + { + "a": [timedelta(days=23), timedelta(seconds=5)], + "b": [1, 2], + "c": pd.date_range(start="20130101", periods=2), + } + ) + + result = read_json(frame.to_json(date_unit="ns")) + result["a"] = pd.to_timedelta(result.a, unit="ns") + result["c"] = pd.to_datetime(result.c) + tm.assert_frame_equal(frame, result) + + def test_mixed_timedelta_datetime(self): + frame = DataFrame({"a": [timedelta(23), Timestamp("20130101")]}, dtype=object) + + expected = DataFrame( + {"a": [pd.Timedelta(frame.a[0]).value, Timestamp(frame.a[1]).value]} + ) + result = read_json(frame.to_json(date_unit="ns"), dtype={"a": "int64"}) + tm.assert_frame_equal(result, expected, check_index_type=False) + + @pytest.mark.parametrize("as_object", [True, False]) + @pytest.mark.parametrize("date_format", ["iso", "epoch"]) + @pytest.mark.parametrize("timedelta_typ", [pd.Timedelta, timedelta]) + def test_timedelta_to_json(self, as_object, date_format, timedelta_typ): + # GH28156: to_json not correctly formatting Timedelta + data = [timedelta_typ(days=1), timedelta_typ(days=2), pd.NaT] + if as_object: + data.append("a") + + ser = Series(data, index=data) + if date_format == "iso": + expected = ( + '{"P1DT0H0M0S":"P1DT0H0M0S","P2DT0H0M0S":"P2DT0H0M0S","null":null}' + ) + else: + expected = '{"86400000":86400000,"172800000":172800000,"null":null}' + + if as_object: + expected = expected.replace("}", ',"a":"a"}') + + result = ser.to_json(date_format=date_format) + assert result == expected + + def test_default_handler(self): + value = object() + frame = DataFrame({"a": [7, value]}) + expected = DataFrame({"a": [7, str(value)]}) + result = read_json(frame.to_json(default_handler=str)) + tm.assert_frame_equal(expected, result, check_index_type=False) + + def test_default_handler_indirect(self): + from pandas.io.json import dumps + + def default(obj): + if isinstance(obj, complex): + return [("mathjs", "Complex"), ("re", obj.real), ("im", obj.imag)] + return str(obj) + + df_list = [ + 9, + DataFrame( + {"a": [1, "STR", complex(4, -5)], "b": [float("nan"), None, "N/A"]}, + columns=["a", "b"], + ), + ] + expected = ( + '[9,[[1,null],["STR",null],[[["mathjs","Complex"],' + '["re",4.0],["im",-5.0]],"N\\/A"]]]' + ) + assert dumps(df_list, default_handler=default, orient="values") == expected + + def test_default_handler_numpy_unsupported_dtype(self): + # GH12554 to_json raises 'Unhandled numpy dtype 15' + df = DataFrame( + {"a": [1, 2.3, complex(4, -5)], "b": [float("nan"), None, complex(1.2, 0)]}, + columns=["a", "b"], + ) + expected = ( + '[["(1+0j)","(nan+0j)"],' + '["(2.3+0j)","(nan+0j)"],' + '["(4-5j)","(1.2+0j)"]]' + ) + assert df.to_json(default_handler=str, orient="values") == expected + + def test_default_handler_raises(self): + msg = "raisin" + + def my_handler_raises(obj): + raise TypeError(msg) + + with pytest.raises(TypeError, match=msg): + DataFrame({"a": [1, 2, object()]}).to_json( + default_handler=my_handler_raises + ) + with pytest.raises(TypeError, match=msg): + DataFrame({"a": [1, 2, complex(4, -5)]}).to_json( + default_handler=my_handler_raises + ) + + def test_categorical(self): + # GH4377 df.to_json segfaults with non-ndarray blocks + df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]}) + df["B"] = df["A"] + expected = df.to_json() + + df["B"] = df["A"].astype("category") + assert expected == df.to_json() + + s = df["A"] + sc = df["B"] + assert s.to_json() == sc.to_json() + + def test_datetime_tz(self): + # GH4377 df.to_json segfaults with non-ndarray blocks + tz_range = pd.date_range("20130101", periods=3, tz="US/Eastern") + tz_naive = tz_range.tz_convert("utc").tz_localize(None) + + df = DataFrame({"A": tz_range, "B": pd.date_range("20130101", periods=3)}) + + df_naive = df.copy() + df_naive["A"] = tz_naive + expected = df_naive.to_json() + assert expected == df.to_json() + + stz = Series(tz_range) + s_naive = Series(tz_naive) + assert stz.to_json() == s_naive.to_json() + + def test_sparse(self): + # GH4377 df.to_json segfaults with non-ndarray blocks + df = DataFrame(np.random.randn(10, 4)) + df.loc[:8] = np.nan + + sdf = df.astype("Sparse") + expected = df.to_json() + assert expected == sdf.to_json() + + s = Series(np.random.randn(10)) + s.loc[:8] = np.nan + ss = s.astype("Sparse") + + expected = s.to_json() + assert expected == ss.to_json() + + @pytest.mark.parametrize( + "ts", + [ + Timestamp("2013-01-10 05:00:00Z"), + Timestamp("2013-01-10 00:00:00", tz="US/Eastern"), + Timestamp("2013-01-10 00:00:00-0500"), + ], + ) + def test_tz_is_utc(self, ts): + from pandas.io.json import dumps + + exp = '"2013-01-10T05:00:00.000Z"' + + assert dumps(ts, iso_dates=True) == exp + dt = ts.to_pydatetime() + assert dumps(dt, iso_dates=True) == exp + + @pytest.mark.parametrize( + "tz_range", + [ + pd.date_range("2013-01-01 05:00:00Z", periods=2), + pd.date_range("2013-01-01 00:00:00", periods=2, tz="US/Eastern"), + pd.date_range("2013-01-01 00:00:00-0500", periods=2), + ], + ) + def test_tz_range_is_utc(self, tz_range): + from pandas.io.json import dumps + + exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]' + dfexp = ( + '{"DT":{' + '"0":"2013-01-01T05:00:00.000Z",' + '"1":"2013-01-02T05:00:00.000Z"}}' + ) + + assert dumps(tz_range, iso_dates=True) == exp + dti = DatetimeIndex(tz_range) + assert dumps(dti, iso_dates=True) == exp + df = DataFrame({"DT": dti}) + result = dumps(df, iso_dates=True) + assert result == dfexp + + def test_read_inline_jsonl(self): + # GH9180 + result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + @td.skip_if_not_us_locale + def test_read_s3_jsonl(self, s3_resource, s3so): + # GH17200 + + result = read_json( + "s3n://pandas-test/items.jsonl", lines=True, storage_options=s3so + ) + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + def test_read_local_jsonl(self): + # GH17200 + with tm.ensure_clean("tmp_items.json") as path: + with open(path, "w") as infile: + infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n') + result = read_json(path, lines=True) + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + def test_read_jsonl_unicode_chars(self): + # GH15132: non-ascii unicode characters + # \u201d == RIGHT DOUBLE QUOTATION MARK + + # simulate file handle + json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' + json = StringIO(json) + result = read_json(json, lines=True) + expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + # simulate string + json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' + result = read_json(json, lines=True) + expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("bigNum", [sys.maxsize + 1, -(sys.maxsize + 2)]) + def test_to_json_large_numbers(self, bigNum): + # GH34473 + series = Series(bigNum, dtype=object, index=["articleId"]) + json = series.to_json() + expected = '{"articleId":' + str(bigNum) + "}" + assert json == expected + + df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0]) + json = df.to_json() + expected = '{"0":{"articleId":' + str(bigNum) + "}}" + assert json == expected + + @pytest.mark.parametrize("bigNum", [-(2 ** 63) - 1, 2 ** 64]) + def test_read_json_large_numbers(self, bigNum): + # GH20599, 26068 + json = StringIO('{"articleId":' + str(bigNum) + "}") + msg = r"Value is too small|Value is too big" + with pytest.raises(ValueError, match=msg): + read_json(json) + + json = StringIO('{"0":{"articleId":' + str(bigNum) + "}}") + with pytest.raises(ValueError, match=msg): + read_json(json) + + def test_read_json_large_numbers2(self): + # GH18842 + json = '{"articleId": "1404366058080022500245"}' + json = StringIO(json) + result = read_json(json, typ="series") + expected = Series(1.404366e21, index=["articleId"]) + tm.assert_series_equal(result, expected) + + json = '{"0": {"articleId": "1404366058080022500245"}}' + json = StringIO(json) + result = read_json(json) + expected = DataFrame(1.404366e21, index=["articleId"], columns=[0]) + tm.assert_frame_equal(result, expected) + + def test_to_jsonl(self): + # GH9180 + df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + result = df.to_json(orient="records", lines=True) + expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n' + assert result == expected + + df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"]) + result = df.to_json(orient="records", lines=True) + expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n' + assert result == expected + tm.assert_frame_equal(read_json(result, lines=True), df) + + # GH15096: escaped characters in columns and data + df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"]) + result = df.to_json(orient="records", lines=True) + expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n' + assert result == expected + tm.assert_frame_equal(read_json(result, lines=True), df) + + # TODO: there is a near-identical test for pytables; can we share? + @pytest.mark.xfail(reason="GH#13774 encoding kwarg not supported", raises=TypeError) + def test_latin_encoding(self): + # GH 13774 + values = [ + [b"E\xc9, 17", b"", b"a", b"b", b"c"], + [b"E\xc9, 17", b"a", b"b", b"c"], + [b"EE, 17", b"", b"a", b"b", b"c"], + [b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"], + [b"", b"a", b"b", b"c"], + [b"\xf8\xfc", b"a", b"b", b"c"], + [b"A\xf8\xfc", b"", b"a", b"b", b"c"], + [np.nan, b"", b"b", b"c"], + [b"A\xf8\xfc", np.nan, b"", b"b", b"c"], + ] + + values = [ + [x.decode("latin-1") if isinstance(x, bytes) else x for x in y] + for y in values + ] + + examples = [] + for dtype in ["category", object]: + for val in values: + examples.append(Series(val, dtype=dtype)) + + def roundtrip(s, encoding="latin-1"): + with tm.ensure_clean("test.json") as path: + s.to_json(path, encoding=encoding) + retr = read_json(path, encoding=encoding) + tm.assert_series_equal(s, retr, check_categorical=False) + + for s in examples: + roundtrip(s) + + def test_data_frame_size_after_to_json(self): + # GH15344 + df = DataFrame({"a": [str(1)]}) + + size_before = df.memory_usage(index=True, deep=True).sum() + df.to_json() + size_after = df.memory_usage(index=True, deep=True).sum() + + assert size_before == size_after + + @pytest.mark.parametrize( + "index", [None, [1, 2], [1.0, 2.0], ["a", "b"], ["1", "2"], ["1.", "2."]] + ) + @pytest.mark.parametrize("columns", [["a", "b"], ["1", "2"], ["1.", "2."]]) + def test_from_json_to_json_table_index_and_columns(self, index, columns): + # GH25433 GH25435 + expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns) + dfjson = expected.to_json(orient="table") + result = read_json(dfjson, orient="table") + tm.assert_frame_equal(result, expected) + + def test_from_json_to_json_table_dtypes(self): + # GH21345 + expected = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]}) + dfjson = expected.to_json(orient="table") + result = read_json(dfjson, orient="table") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("orient", ["split", "records", "index", "columns"]) + def test_to_json_from_json_columns_dtypes(self, orient): + # GH21892 GH33205 + expected = DataFrame.from_dict( + { + "Integer": Series([1, 2, 3], dtype="int64"), + "Float": Series([None, 2.0, 3.0], dtype="float64"), + "Object": Series([None, "", "c"], dtype="object"), + "Bool": Series([True, False, True], dtype="bool"), + "Category": Series(["a", "b", None], dtype="category"), + "Datetime": Series( + ["2020-01-01", None, "2020-01-03"], dtype="datetime64[ns]" + ), + } + ) + dfjson = expected.to_json(orient=orient) + result = read_json( + dfjson, + orient=orient, + dtype={ + "Integer": "int64", + "Float": "float64", + "Object": "object", + "Bool": "bool", + "Category": "category", + "Datetime": "datetime64[ns]", + }, + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}]) + def test_read_json_table_dtype_raises(self, dtype): + # GH21345 + df = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]}) + dfjson = df.to_json(orient="table") + msg = "cannot pass both dtype and orient='table'" + with pytest.raises(ValueError, match=msg): + read_json(dfjson, orient="table", dtype=dtype) + + def test_read_json_table_convert_axes_raises(self): + # GH25433 GH25435 + df = DataFrame([[1, 2], [3, 4]], index=[1.0, 2.0], columns=["1.", "2."]) + dfjson = df.to_json(orient="table") + msg = "cannot pass both convert_axes and orient='table'" + with pytest.raises(ValueError, match=msg): + read_json(dfjson, orient="table", convert_axes=True) + + @pytest.mark.parametrize( + "data, expected", + [ + ( + DataFrame([[1, 2], [4, 5]], columns=["a", "b"]), + {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]}, + ), + ( + DataFrame([[1, 2], [4, 5]], columns=["a", "b"]).rename_axis("foo"), + {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]}, + ), + ( + DataFrame( + [[1, 2], [4, 5]], columns=["a", "b"], index=[["a", "b"], ["c", "d"]] + ), + {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]}, + ), + (Series([1, 2, 3], name="A"), {"name": "A", "data": [1, 2, 3]}), + ( + Series([1, 2, 3], name="A").rename_axis("foo"), + {"name": "A", "data": [1, 2, 3]}, + ), + ( + Series([1, 2], name="A", index=[["a", "b"], ["c", "d"]]), + {"name": "A", "data": [1, 2]}, + ), + ], + ) + def test_index_false_to_json_split(self, data, expected): + # GH 17394 + # Testing index=False in to_json with orient='split' + + result = data.to_json(orient="split", index=False) + result = json.loads(result) + + assert result == expected + + @pytest.mark.parametrize( + "data", + [ + (DataFrame([[1, 2], [4, 5]], columns=["a", "b"])), + (DataFrame([[1, 2], [4, 5]], columns=["a", "b"]).rename_axis("foo")), + ( + DataFrame( + [[1, 2], [4, 5]], columns=["a", "b"], index=[["a", "b"], ["c", "d"]] + ) + ), + (Series([1, 2, 3], name="A")), + (Series([1, 2, 3], name="A").rename_axis("foo")), + (Series([1, 2], name="A", index=[["a", "b"], ["c", "d"]])), + ], + ) + def test_index_false_to_json_table(self, data): + # GH 17394 + # Testing index=False in to_json with orient='table' + + result = data.to_json(orient="table", index=False) + result = json.loads(result) + + expected = { + "schema": pd.io.json.build_table_schema(data, index=False), + "data": DataFrame(data).to_dict(orient="records"), + } + + assert result == expected + + @pytest.mark.parametrize("orient", ["records", "index", "columns", "values"]) + def test_index_false_error_to_json(self, orient): + # GH 17394 + # Testing error message from to_json with index=False + + df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"]) + + msg = "'index=False' is only valid when 'orient' is 'split' or 'table'" + with pytest.raises(ValueError, match=msg): + df.to_json(orient=orient, index=False) + + @pytest.mark.parametrize("orient", ["split", "table"]) + @pytest.mark.parametrize("index", [True, False]) + def test_index_false_from_json_to_json(self, orient, index): + # GH25170 + # Test index=False in from_json to_json + expected = DataFrame({"a": [1, 2], "b": [3, 4]}) + dfjson = expected.to_json(orient=orient, index=index) + result = read_json(dfjson, orient=orient) + tm.assert_frame_equal(result, expected) + + def test_read_timezone_information(self): + # GH 25546 + result = read_json( + '{"2019-01-01T11:00:00.000Z":88}', typ="series", orient="index" + ) + expected = Series([88], index=DatetimeIndex(["2019-01-01 11:00:00"], tz="UTC")) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "url", + [ + "s3://example-fsspec/", + "gcs://another-fsspec/file.json", + "https://example-site.com/data", + "some-protocol://data.txt", + ], + ) + def test_read_json_with_url_value(self, url): + # GH 36271 + result = read_json(f'{{"url":{{"0":"{url}"}}}}') + expected = DataFrame({"url": [url]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "date_format,key", [("epoch", 86400000), ("iso", "P1DT0H0M0S")] + ) + def test_timedelta_as_label(self, date_format, key): + df = DataFrame([[1]], columns=[pd.Timedelta("1D")]) + expected = f'{{"{key}":{{"0":1}}}}' + result = df.to_json(date_format=date_format) + + assert result == expected + + @pytest.mark.parametrize( + "orient,expected", + [ + ("index", "{\"('a', 'b')\":{\"('c', 'd')\":1}}"), + ("columns", "{\"('c', 'd')\":{\"('a', 'b')\":1}}"), + # TODO: the below have separate encoding procedures + # They produce JSON but not in a consistent manner + pytest.param("split", "", marks=pytest.mark.skip), + pytest.param("table", "", marks=pytest.mark.skip), + ], + ) + def test_tuple_labels(self, orient, expected): + # GH 20500 + df = DataFrame([[1]], index=[("a", "b")], columns=[("c", "d")]) + result = df.to_json(orient=orient) + assert result == expected + + @pytest.mark.parametrize("indent", [1, 2, 4]) + def test_to_json_indent(self, indent): + # GH 12004 + df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"]) + + result = df.to_json(indent=indent) + spaces = " " * indent + expected = f"""{{ +{spaces}"a":{{ +{spaces}{spaces}"0":"foo", +{spaces}{spaces}"1":"baz" +{spaces}}}, +{spaces}"b":{{ +{spaces}{spaces}"0":"bar", +{spaces}{spaces}"1":"qux" +{spaces}}} +}}""" + + assert result == expected + + @pytest.mark.parametrize( + "orient,expected", + [ + ( + "split", + """{ + "columns":[ + "a", + "b" + ], + "index":[ + 0, + 1 + ], + "data":[ + [ + "foo", + "bar" + ], + [ + "baz", + "qux" + ] + ] +}""", + ), + ( + "records", + """[ + { + "a":"foo", + "b":"bar" + }, + { + "a":"baz", + "b":"qux" + } +]""", + ), + ( + "index", + """{ + "0":{ + "a":"foo", + "b":"bar" + }, + "1":{ + "a":"baz", + "b":"qux" + } +}""", + ), + ( + "columns", + """{ + "a":{ + "0":"foo", + "1":"baz" + }, + "b":{ + "0":"bar", + "1":"qux" + } +}""", + ), + ( + "values", + """[ + [ + "foo", + "bar" + ], + [ + "baz", + "qux" + ] +]""", + ), + ( + "table", + """{ + "schema":{ + "fields":[ + { + "name":"index", + "type":"integer" + }, + { + "name":"a", + "type":"string" + }, + { + "name":"b", + "type":"string" + } + ], + "primaryKey":[ + "index" + ], + "pandas_version":"1.4.0" + }, + "data":[ + { + "index":0, + "a":"foo", + "b":"bar" + }, + { + "index":1, + "a":"baz", + "b":"qux" + } + ] +}""", + ), + ], + ) + def test_json_indent_all_orients(self, orient, expected): + # GH 12004 + df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"]) + result = df.to_json(orient=orient, indent=4) + assert result == expected + + def test_json_negative_indent_raises(self): + with pytest.raises(ValueError, match="must be a nonnegative integer"): + DataFrame().to_json(indent=-1) + + def test_emca_262_nan_inf_support(self): + # GH 12213 + data = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]' + result = read_json(data) + expected = DataFrame( + ["a", np.nan, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"] + ) + tm.assert_frame_equal(result, expected) + + def test_deprecate_numpy_argument_read_json(self): + # GH 28512 + expected = DataFrame([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + result = read_json(expected.to_json(), numpy=True) + tm.assert_frame_equal(result, expected) + + def test_frame_int_overflow(self): + # GH 30320 + encoded_json = json.dumps([{"col": "31900441201190696999"}, {"col": "Text"}]) + expected = DataFrame({"col": ["31900441201190696999", "Text"]}) + result = read_json(encoded_json) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dataframe,expected", + [ + ( + DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}), + '{"(0, \'x\')":1,"(0, \'y\')":"a","(1, \'x\')":2,' + '"(1, \'y\')":"b","(2, \'x\')":3,"(2, \'y\')":"c"}', + ) + ], + ) + def test_json_multiindex(self, dataframe, expected): + series = dataframe.stack() + result = series.to_json(orient="index") + assert result == expected + + def test_to_s3(self, s3_resource, s3so): + import time + + # GH 28375 + mock_bucket_name, target_file = "pandas-test", "test.json" + df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]}) + df.to_json(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so) + timeout = 5 + while True: + if target_file in ( + obj.key for obj in s3_resource.Bucket("pandas-test").objects.all() + ): + break + time.sleep(0.1) + timeout -= 0.1 + assert timeout > 0, "Timed out waiting for file to appear on moto" + + def test_json_pandas_nulls(self, nulls_fixture, request): + # GH 31615 + if isinstance(nulls_fixture, Decimal): + mark = pytest.mark.xfail(reason="not implemented") + request.node.add_marker(mark) + + result = DataFrame([[nulls_fixture]]).to_json() + assert result == '{"0":{"0":null}}' + + def test_readjson_bool_series(self): + # GH31464 + result = read_json("[true, true, false]", typ="series") + expected = Series([True, True, False]) + tm.assert_series_equal(result, expected) + + def test_to_json_multiindex_escape(self): + # GH 15273 + df = DataFrame( + True, + index=pd.date_range("2017-01-20", "2017-01-23"), + columns=["foo", "bar"], + ).stack() + result = df.to_json() + expected = ( + "{\"(Timestamp('2017-01-20 00:00:00'), 'foo')\":true," + "\"(Timestamp('2017-01-20 00:00:00'), 'bar')\":true," + "\"(Timestamp('2017-01-21 00:00:00'), 'foo')\":true," + "\"(Timestamp('2017-01-21 00:00:00'), 'bar')\":true," + "\"(Timestamp('2017-01-22 00:00:00'), 'foo')\":true," + "\"(Timestamp('2017-01-22 00:00:00'), 'bar')\":true," + "\"(Timestamp('2017-01-23 00:00:00'), 'foo')\":true," + "\"(Timestamp('2017-01-23 00:00:00'), 'bar')\":true}" + ) + assert result == expected + + def test_to_json_series_of_objects(self): + class _TestObject: + def __init__(self, a, b, _c, d): + self.a = a + self.b = b + self._c = _c + self.d = d + + def e(self): + return 5 + + # JSON keys should be all non-callable non-underscore attributes, see GH-42768 + series = Series([_TestObject(a=1, b=2, _c=3, d=4)]) + assert json.loads(series.to_json()) == {"0": {"a": 1, "b": 2, "d": 4}} + + @pytest.mark.parametrize( + "data,expected", + [ + ( + Series({0: -6 + 8j, 1: 0 + 1j, 2: 9 - 5j}), + '{"0":{"imag":8.0,"real":-6.0},' + '"1":{"imag":1.0,"real":0.0},' + '"2":{"imag":-5.0,"real":9.0}}', + ), + ( + Series({0: -9.39 + 0.66j, 1: 3.95 + 9.32j, 2: 4.03 - 0.17j}), + '{"0":{"imag":0.66,"real":-9.39},' + '"1":{"imag":9.32,"real":3.95},' + '"2":{"imag":-0.17,"real":4.03}}', + ), + ( + DataFrame([[-2 + 3j, -1 - 0j], [4 - 3j, -0 - 10j]]), + '{"0":{"0":{"imag":3.0,"real":-2.0},' + '"1":{"imag":-3.0,"real":4.0}},' + '"1":{"0":{"imag":0.0,"real":-1.0},' + '"1":{"imag":-10.0,"real":0.0}}}', + ), + ( + DataFrame( + [[-0.28 + 0.34j, -1.08 - 0.39j], [0.41 - 0.34j, -0.78 - 1.35j]] + ), + '{"0":{"0":{"imag":0.34,"real":-0.28},' + '"1":{"imag":-0.34,"real":0.41}},' + '"1":{"0":{"imag":-0.39,"real":-1.08},' + '"1":{"imag":-1.35,"real":-0.78}}}', + ), + ], + ) + def test_complex_data_tojson(self, data, expected): + # GH41174 + result = data.to_json() + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_readlines.py b/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_readlines.py new file mode 100644 index 0000000..4ba9f48 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_readlines.py @@ -0,0 +1,298 @@ +from io import StringIO +from pathlib import Path + +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + read_json, +) +import pandas._testing as tm + +from pandas.io.json._json import JsonReader + + +@pytest.fixture +def lines_json_df(): + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + return df.to_json(lines=True, orient="records") + + +def test_read_jsonl(): + # GH9180 + result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + +def test_read_datetime(): + # GH33787 + df = DataFrame( + [([1, 2], ["2020-03-05", "2020-04-08T09:58:49+00:00"], "hector")], + columns=["accounts", "date", "name"], + ) + json_line = df.to_json(lines=True, orient="records") + result = read_json(json_line) + expected = DataFrame( + [[1, "2020-03-05", "hector"], [2, "2020-04-08T09:58:49+00:00", "hector"]], + columns=["accounts", "date", "name"], + ) + tm.assert_frame_equal(result, expected) + + +def test_read_jsonl_unicode_chars(): + # GH15132: non-ascii unicode characters + # \u201d == RIGHT DOUBLE QUOTATION MARK + + # simulate file handle + json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' + json = StringIO(json) + result = read_json(json, lines=True) + expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + # simulate string + json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' + result = read_json(json, lines=True) + expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + +def test_to_jsonl(): + # GH9180 + df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + result = df.to_json(orient="records", lines=True) + expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n' + assert result == expected + + df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"]) + result = df.to_json(orient="records", lines=True) + expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n' + assert result == expected + tm.assert_frame_equal(read_json(result, lines=True), df) + + # GH15096: escaped characters in columns and data + df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"]) + result = df.to_json(orient="records", lines=True) + expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n' + assert result == expected + tm.assert_frame_equal(read_json(result, lines=True), df) + + +def test_to_jsonl_count_new_lines(): + # GH36888 + df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + actual_new_lines_count = df.to_json(orient="records", lines=True).count("\n") + expected_new_lines_count = 2 + assert actual_new_lines_count == expected_new_lines_count + + +@pytest.mark.parametrize("chunksize", [1, 1.0]) +def test_readjson_chunks(lines_json_df, chunksize): + # Basic test that read_json(chunks=True) gives the same result as + # read_json(chunks=False) + # GH17048: memory usage when lines=True + + unchunked = read_json(StringIO(lines_json_df), lines=True) + with read_json(StringIO(lines_json_df), lines=True, chunksize=chunksize) as reader: + chunked = pd.concat(reader) + + tm.assert_frame_equal(chunked, unchunked) + + +def test_readjson_chunksize_requires_lines(lines_json_df): + msg = "chunksize can only be passed if lines=True" + with pytest.raises(ValueError, match=msg): + with read_json(StringIO(lines_json_df), lines=False, chunksize=2) as _: + pass + + +def test_readjson_chunks_series(): + # Test reading line-format JSON to Series with chunksize param + s = pd.Series({"A": 1, "B": 2}) + + strio = StringIO(s.to_json(lines=True, orient="records")) + unchunked = read_json(strio, lines=True, typ="Series") + + strio = StringIO(s.to_json(lines=True, orient="records")) + with read_json(strio, lines=True, typ="Series", chunksize=1) as reader: + chunked = pd.concat(reader) + + tm.assert_series_equal(chunked, unchunked) + + +def test_readjson_each_chunk(lines_json_df): + # Other tests check that the final result of read_json(chunksize=True) + # is correct. This checks the intermediate chunks. + with read_json(StringIO(lines_json_df), lines=True, chunksize=2) as reader: + chunks = list(reader) + assert chunks[0].shape == (2, 2) + assert chunks[1].shape == (1, 2) + + +def test_readjson_chunks_from_file(): + with tm.ensure_clean("test.json") as path: + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df.to_json(path, lines=True, orient="records") + with read_json(path, lines=True, chunksize=1) as reader: + chunked = pd.concat(reader) + unchunked = read_json(path, lines=True) + tm.assert_frame_equal(unchunked, chunked) + + +@pytest.mark.parametrize("chunksize", [None, 1]) +def test_readjson_chunks_closes(chunksize): + with tm.ensure_clean("test.json") as path: + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df.to_json(path, lines=True, orient="records") + reader = JsonReader( + path, + orient=None, + typ="frame", + dtype=True, + convert_axes=True, + convert_dates=True, + keep_default_dates=True, + numpy=False, + precise_float=False, + date_unit=None, + encoding=None, + lines=True, + chunksize=chunksize, + compression=None, + nrows=None, + ) + with reader: + reader.read() + assert ( + reader.handles.handle.closed + ), f"didn't close stream with chunksize = {chunksize}" + + +@pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"]) +def test_readjson_invalid_chunksize(lines_json_df, chunksize): + msg = r"'chunksize' must be an integer >=1" + + with pytest.raises(ValueError, match=msg): + with read_json(StringIO(lines_json_df), lines=True, chunksize=chunksize) as _: + pass + + +@pytest.mark.parametrize("chunksize", [None, 1, 2]) +def test_readjson_chunks_multiple_empty_lines(chunksize): + j = """ + + {"A":1,"B":4} + + + + {"A":2,"B":5} + + + + + + + + {"A":3,"B":6} + """ + orig = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + test = read_json(j, lines=True, chunksize=chunksize) + if chunksize is not None: + with test: + test = pd.concat(test) + tm.assert_frame_equal(orig, test, obj=f"chunksize: {chunksize}") + + +def test_readjson_unicode(monkeypatch): + with tm.ensure_clean("test.json") as path: + monkeypatch.setattr("locale.getpreferredencoding", lambda l: "cp949") + with open(path, "w", encoding="utf-8") as f: + f.write('{"£©µÀÆÖÞßéöÿ":["АБВГДабвгд가"]}') + + result = read_json(path) + expected = DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("nrows", [1, 2]) +def test_readjson_nrows(nrows): + # GH 33916 + # Test reading line-format JSON to Series with nrows param + jsonl = """{"a": 1, "b": 2} + {"a": 3, "b": 4} + {"a": 5, "b": 6} + {"a": 7, "b": 8}""" + result = read_json(jsonl, lines=True, nrows=nrows) + expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("nrows,chunksize", [(2, 2), (4, 2)]) +def test_readjson_nrows_chunks(nrows, chunksize): + # GH 33916 + # Test reading line-format JSON to Series with nrows and chunksize param + jsonl = """{"a": 1, "b": 2} + {"a": 3, "b": 4} + {"a": 5, "b": 6} + {"a": 7, "b": 8}""" + with read_json(jsonl, lines=True, nrows=nrows, chunksize=chunksize) as reader: + chunked = pd.concat(reader) + expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows] + tm.assert_frame_equal(chunked, expected) + + +def test_readjson_nrows_requires_lines(): + # GH 33916 + # Test ValuError raised if nrows is set without setting lines in read_json + jsonl = """{"a": 1, "b": 2} + {"a": 3, "b": 4} + {"a": 5, "b": 6} + {"a": 7, "b": 8}""" + msg = "nrows can only be passed if lines=True" + with pytest.raises(ValueError, match=msg): + read_json(jsonl, lines=False, nrows=2) + + +def test_readjson_lines_chunks_fileurl(datapath): + # GH 27135 + # Test reading line-format JSON from file url + df_list_expected = [ + DataFrame([[1, 2]], columns=["a", "b"], index=[0]), + DataFrame([[3, 4]], columns=["a", "b"], index=[1]), + DataFrame([[5, 6]], columns=["a", "b"], index=[2]), + ] + os_path = datapath("io", "json", "data", "line_delimited.json") + file_url = Path(os_path).as_uri() + with read_json(file_url, lines=True, chunksize=1) as url_reader: + for index, chuck in enumerate(url_reader): + tm.assert_frame_equal(chuck, df_list_expected[index]) + + +def test_chunksize_is_incremental(): + # See https://github.com/pandas-dev/pandas/issues/34548 + jsonl = ( + """{"a": 1, "b": 2} + {"a": 3, "b": 4} + {"a": 5, "b": 6} + {"a": 7, "b": 8}\n""" + * 1000 + ) + + class MyReader: + def __init__(self, contents): + self.read_count = 0 + self.stringio = StringIO(contents) + + def read(self, *args): + self.read_count += 1 + return self.stringio.read(*args) + + def __iter__(self): + self.read_count += 1 + return iter(self.stringio) + + reader = MyReader(jsonl) + assert len(list(read_json(reader, lines=True, chunksize=100))) > 1 + assert reader.read_count > 10 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_ujson.py b/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_ujson.py new file mode 100644 index 0000000..b4ae54d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/json/test_ujson.py @@ -0,0 +1,1244 @@ +import calendar +import datetime +import decimal +import json +import locale +import math +import re +import time + +import dateutil +import numpy as np +import pytest +import pytz + +import pandas._libs.json as ujson +from pandas.compat import ( + IS64, + PY310, + is_platform_windows, +) + +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + NaT, + Series, + Timedelta, + Timestamp, + date_range, +) +import pandas._testing as tm + + +def _clean_dict(d): + """ + Sanitize dictionary for JSON by converting all keys to strings. + + Parameters + ---------- + d : dict + The dictionary to convert. + + Returns + ------- + cleaned_dict : dict + """ + return {str(k): v for k, v in d.items()} + + +@pytest.fixture( + params=[None, "split", "records", "values", "index"] # Column indexed by default. +) +def orient(request): + return request.param + + +@pytest.fixture(params=[None, True]) +def numpy(request): + return request.param + + +def get_int32_compat_dtype(numpy, orient): + # See GH#32527 + dtype = np.int64 + if not ((numpy is None or orient == "index") or (numpy is True and orient is None)): + if is_platform_windows(): + dtype = np.int32 + else: + dtype = np.intp + + return dtype + + +class TestUltraJSONTests: + @pytest.mark.skipif(not IS64, reason="not compliant on 32-bit, xref #15865") + def test_encode_decimal(self): + sut = decimal.Decimal("1337.1337") + encoded = ujson.encode(sut, double_precision=15) + decoded = ujson.decode(encoded) + assert decoded == 1337.1337 + + sut = decimal.Decimal("0.95") + encoded = ujson.encode(sut, double_precision=1) + assert encoded == "1.0" + + decoded = ujson.decode(encoded) + assert decoded == 1.0 + + sut = decimal.Decimal("0.94") + encoded = ujson.encode(sut, double_precision=1) + assert encoded == "0.9" + + decoded = ujson.decode(encoded) + assert decoded == 0.9 + + sut = decimal.Decimal("1.95") + encoded = ujson.encode(sut, double_precision=1) + assert encoded == "2.0" + + decoded = ujson.decode(encoded) + assert decoded == 2.0 + + sut = decimal.Decimal("-1.95") + encoded = ujson.encode(sut, double_precision=1) + assert encoded == "-2.0" + + decoded = ujson.decode(encoded) + assert decoded == -2.0 + + sut = decimal.Decimal("0.995") + encoded = ujson.encode(sut, double_precision=2) + assert encoded == "1.0" + + decoded = ujson.decode(encoded) + assert decoded == 1.0 + + sut = decimal.Decimal("0.9995") + encoded = ujson.encode(sut, double_precision=3) + assert encoded == "1.0" + + decoded = ujson.decode(encoded) + assert decoded == 1.0 + + sut = decimal.Decimal("0.99999999999999944") + encoded = ujson.encode(sut, double_precision=15) + assert encoded == "1.0" + + decoded = ujson.decode(encoded) + assert decoded == 1.0 + + @pytest.mark.parametrize("ensure_ascii", [True, False]) + def test_encode_string_conversion(self, ensure_ascii): + string_input = "A string \\ / \b \f \n \r \t &" + not_html_encoded = '"A string \\\\ \\/ \\b \\f \\n \\r \\t <\\/script> &"' + html_encoded = ( + '"A string \\\\ \\/ \\b \\f \\n \\r \\t \\u003c\\/script\\u003e \\u0026"' + ) + + def helper(expected_output, **encode_kwargs): + output = ujson.encode( + string_input, ensure_ascii=ensure_ascii, **encode_kwargs + ) + + assert output == expected_output + assert string_input == json.loads(output) + assert string_input == ujson.decode(output) + + # Default behavior assumes encode_html_chars=False. + helper(not_html_encoded) + + # Make sure explicit encode_html_chars=False works. + helper(not_html_encoded, encode_html_chars=False) + + # Make sure explicit encode_html_chars=True does the encoding. + helper(html_encoded, encode_html_chars=True) + + @pytest.mark.parametrize( + "long_number", [-4342969734183514, -12345678901234.56789012, -528656961.4399388] + ) + def test_double_long_numbers(self, long_number): + sut = {"a": long_number} + encoded = ujson.encode(sut, double_precision=15) + + decoded = ujson.decode(encoded) + assert sut == decoded + + def test_encode_non_c_locale(self): + lc_category = locale.LC_NUMERIC + + # We just need one of these locales to work. + for new_locale in ("it_IT.UTF-8", "Italian_Italy"): + if tm.can_set_locale(new_locale, lc_category): + with tm.set_locale(new_locale, lc_category): + assert ujson.loads(ujson.dumps(4.78e60)) == 4.78e60 + assert ujson.loads("4.78", precise_float=True) == 4.78 + break + + def test_decimal_decode_test_precise(self): + sut = {"a": 4.56} + encoded = ujson.encode(sut) + decoded = ujson.decode(encoded, precise_float=True) + assert sut == decoded + + def test_encode_double_tiny_exponential(self): + num = 1e-40 + assert num == ujson.decode(ujson.encode(num)) + num = 1e-100 + assert num == ujson.decode(ujson.encode(num)) + num = -1e-45 + assert num == ujson.decode(ujson.encode(num)) + num = -1e-145 + assert np.allclose(num, ujson.decode(ujson.encode(num))) + + @pytest.mark.parametrize("unicode_key", ["key1", "بن"]) + def test_encode_dict_with_unicode_keys(self, unicode_key): + unicode_dict = {unicode_key: "value1"} + assert unicode_dict == ujson.decode(ujson.encode(unicode_dict)) + + @pytest.mark.parametrize( + "double_input", [math.pi, -math.pi] # Should work with negatives too. + ) + def test_encode_double_conversion(self, double_input): + output = ujson.encode(double_input) + assert round(double_input, 5) == round(json.loads(output), 5) + assert round(double_input, 5) == round(ujson.decode(output), 5) + + def test_encode_with_decimal(self): + decimal_input = 1.0 + output = ujson.encode(decimal_input) + + assert output == "1.0" + + def test_encode_array_of_nested_arrays(self): + nested_input = [[[[]]]] * 20 + output = ujson.encode(nested_input) + + assert nested_input == json.loads(output) + assert nested_input == ujson.decode(output) + + nested_input = np.array(nested_input) + tm.assert_numpy_array_equal( + nested_input, ujson.decode(output, numpy=True, dtype=nested_input.dtype) + ) + + def test_encode_array_of_doubles(self): + doubles_input = [31337.31337, 31337.31337, 31337.31337, 31337.31337] * 10 + output = ujson.encode(doubles_input) + + assert doubles_input == json.loads(output) + assert doubles_input == ujson.decode(output) + + tm.assert_numpy_array_equal( + np.array(doubles_input), ujson.decode(output, numpy=True) + ) + + def test_double_precision(self): + double_input = 30.012345678901234 + output = ujson.encode(double_input, double_precision=15) + + assert double_input == json.loads(output) + assert double_input == ujson.decode(output) + + for double_precision in (3, 9): + output = ujson.encode(double_input, double_precision=double_precision) + rounded_input = round(double_input, double_precision) + + assert rounded_input == json.loads(output) + assert rounded_input == ujson.decode(output) + + @pytest.mark.parametrize( + "invalid_val", + [ + 20, + -1, + pytest.param( + "9", + marks=pytest.mark.xfail(PY310, reason="Failing on Python 3.10 GH41940"), + ), + pytest.param( + None, + marks=pytest.mark.xfail(PY310, reason="Failing on Python 3.10 GH41940"), + ), + ], + ) + def test_invalid_double_precision(self, invalid_val): + double_input = 30.12345678901234567890 + expected_exception = ValueError if isinstance(invalid_val, int) else TypeError + msg = ( + r"Invalid value '.*' for option 'double_precision', max is '15'|" + r"an integer is required \(got type " + ) + with pytest.raises(expected_exception, match=msg): + ujson.encode(double_input, double_precision=invalid_val) + + def test_encode_string_conversion2(self): + string_input = "A string \\ / \b \f \n \r \t" + output = ujson.encode(string_input) + + assert string_input == json.loads(output) + assert string_input == ujson.decode(output) + assert output == '"A string \\\\ \\/ \\b \\f \\n \\r \\t"' + + @pytest.mark.parametrize( + "unicode_input", + ["Räksmörgås اسامة بن محمد بن عوض بن لادن", "\xe6\x97\xa5\xd1\x88"], + ) + def test_encode_unicode_conversion(self, unicode_input): + enc = ujson.encode(unicode_input) + dec = ujson.decode(enc) + + assert enc == json.dumps(unicode_input) + assert dec == json.loads(enc) + + def test_encode_control_escaping(self): + escaped_input = "\x19" + enc = ujson.encode(escaped_input) + dec = ujson.decode(enc) + + assert escaped_input == dec + assert enc == json.dumps(escaped_input) + + def test_encode_unicode_surrogate_pair(self): + surrogate_input = "\xf0\x90\x8d\x86" + enc = ujson.encode(surrogate_input) + dec = ujson.decode(enc) + + assert enc == json.dumps(surrogate_input) + assert dec == json.loads(enc) + + def test_encode_unicode_4bytes_utf8(self): + four_bytes_input = "\xf0\x91\x80\xb0TRAILINGNORMAL" + enc = ujson.encode(four_bytes_input) + dec = ujson.decode(enc) + + assert enc == json.dumps(four_bytes_input) + assert dec == json.loads(enc) + + def test_encode_unicode_4bytes_utf8highest(self): + four_bytes_input = "\xf3\xbf\xbf\xbfTRAILINGNORMAL" + enc = ujson.encode(four_bytes_input) + + dec = ujson.decode(enc) + + assert enc == json.dumps(four_bytes_input) + assert dec == json.loads(enc) + + def test_encode_array_in_array(self): + arr_in_arr_input = [[[[]]]] + output = ujson.encode(arr_in_arr_input) + + assert arr_in_arr_input == json.loads(output) + assert output == json.dumps(arr_in_arr_input) + assert arr_in_arr_input == ujson.decode(output) + + tm.assert_numpy_array_equal( + np.array(arr_in_arr_input), ujson.decode(output, numpy=True) + ) + + @pytest.mark.parametrize( + "num_input", + [ + 31337, + -31337, # Negative number. + -9223372036854775808, # Large negative number. + ], + ) + def test_encode_num_conversion(self, num_input): + output = ujson.encode(num_input) + assert num_input == json.loads(output) + assert output == json.dumps(num_input) + assert num_input == ujson.decode(output) + + def test_encode_list_conversion(self): + list_input = [1, 2, 3, 4] + output = ujson.encode(list_input) + + assert list_input == json.loads(output) + assert list_input == ujson.decode(output) + + tm.assert_numpy_array_equal( + np.array(list_input), ujson.decode(output, numpy=True) + ) + + def test_encode_dict_conversion(self): + dict_input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4} + output = ujson.encode(dict_input) + + assert dict_input == json.loads(output) + assert dict_input == ujson.decode(output) + + @pytest.mark.parametrize("builtin_value", [None, True, False]) + def test_encode_builtin_values_conversion(self, builtin_value): + output = ujson.encode(builtin_value) + assert builtin_value == json.loads(output) + assert output == json.dumps(builtin_value) + assert builtin_value == ujson.decode(output) + + def test_encode_datetime_conversion(self): + datetime_input = datetime.datetime.fromtimestamp(time.time()) + output = ujson.encode(datetime_input, date_unit="s") + expected = calendar.timegm(datetime_input.utctimetuple()) + + assert int(expected) == json.loads(output) + assert int(expected) == ujson.decode(output) + + def test_encode_date_conversion(self): + date_input = datetime.date.fromtimestamp(time.time()) + output = ujson.encode(date_input, date_unit="s") + + tup = (date_input.year, date_input.month, date_input.day, 0, 0, 0) + expected = calendar.timegm(tup) + + assert int(expected) == json.loads(output) + assert int(expected) == ujson.decode(output) + + @pytest.mark.parametrize( + "test", + [datetime.time(), datetime.time(1, 2, 3), datetime.time(10, 12, 15, 343243)], + ) + def test_encode_time_conversion_basic(self, test): + output = ujson.encode(test) + expected = f'"{test.isoformat()}"' + assert expected == output + + def test_encode_time_conversion_pytz(self): + # see gh-11473: to_json segfaults with timezone-aware datetimes + test = datetime.time(10, 12, 15, 343243, pytz.utc) + output = ujson.encode(test) + expected = f'"{test.isoformat()}"' + assert expected == output + + def test_encode_time_conversion_dateutil(self): + # see gh-11473: to_json segfaults with timezone-aware datetimes + test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc()) + output = ujson.encode(test) + expected = f'"{test.isoformat()}"' + assert expected == output + + @pytest.mark.parametrize( + "decoded_input", [NaT, np.datetime64("NaT"), np.nan, np.inf, -np.inf] + ) + def test_encode_as_null(self, decoded_input): + assert ujson.encode(decoded_input) == "null", "Expected null" + + def test_datetime_units(self): + val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504) + stamp = Timestamp(val) + + roundtrip = ujson.decode(ujson.encode(val, date_unit="s")) + assert roundtrip == stamp.value // 10 ** 9 + + roundtrip = ujson.decode(ujson.encode(val, date_unit="ms")) + assert roundtrip == stamp.value // 10 ** 6 + + roundtrip = ujson.decode(ujson.encode(val, date_unit="us")) + assert roundtrip == stamp.value // 10 ** 3 + + roundtrip = ujson.decode(ujson.encode(val, date_unit="ns")) + assert roundtrip == stamp.value + + msg = "Invalid value 'foo' for option 'date_unit'" + with pytest.raises(ValueError, match=msg): + ujson.encode(val, date_unit="foo") + + def test_encode_to_utf8(self): + unencoded = "\xe6\x97\xa5\xd1\x88" + + enc = ujson.encode(unencoded, ensure_ascii=False) + dec = ujson.decode(enc) + + assert enc == json.dumps(unencoded, ensure_ascii=False) + assert dec == json.loads(enc) + + def test_decode_from_unicode(self): + unicode_input = '{"obj": 31337}' + + dec1 = ujson.decode(unicode_input) + dec2 = ujson.decode(str(unicode_input)) + + assert dec1 == dec2 + + def test_encode_recursion_max(self): + # 8 is the max recursion depth + + class O2: + member = 0 + pass + + class O1: + member = 0 + pass + + decoded_input = O1() + decoded_input.member = O2() + decoded_input.member.member = decoded_input + + with pytest.raises(OverflowError, match="Maximum recursion level reached"): + ujson.encode(decoded_input) + + def test_decode_jibberish(self): + jibberish = "fdsa sda v9sa fdsa" + msg = "Unexpected character found when decoding 'false'" + with pytest.raises(ValueError, match=msg): + ujson.decode(jibberish) + + @pytest.mark.parametrize( + "broken_json", + [ + "[", # Broken array start. + "{", # Broken object start. + "]", # Broken array end. + "}", # Broken object end. + ], + ) + def test_decode_broken_json(self, broken_json): + msg = "Expected object or value" + with pytest.raises(ValueError, match=msg): + ujson.decode(broken_json) + + @pytest.mark.parametrize("too_big_char", ["[", "{"]) + def test_decode_depth_too_big(self, too_big_char): + with pytest.raises(ValueError, match="Reached object decoding depth limit"): + ujson.decode(too_big_char * (1024 * 1024)) + + @pytest.mark.parametrize( + "bad_string", + [ + '"TESTING', # Unterminated. + '"TESTING\\"', # Unterminated escape. + "tru", # Broken True. + "fa", # Broken False. + "n", # Broken None. + ], + ) + def test_decode_bad_string(self, bad_string): + msg = ( + "Unexpected character found when decoding|" + "Unmatched ''\"' when when decoding 'string'" + ) + with pytest.raises(ValueError, match=msg): + ujson.decode(bad_string) + + @pytest.mark.parametrize( + "broken_json, err_msg", + [ + ( + '{{1337:""}}', + "Key name of object must be 'string' when decoding 'object'", + ), + ('{{"key":"}', "Unmatched ''\"' when when decoding 'string'"), + ("[[[true", "Unexpected character found when decoding array value (2)"), + ], + ) + def test_decode_broken_json_leak(self, broken_json, err_msg): + for _ in range(1000): + with pytest.raises(ValueError, match=re.escape(err_msg)): + ujson.decode(broken_json) + + @pytest.mark.parametrize( + "invalid_dict", + [ + "{{{{31337}}}}", # No key. + '{{{{"key":}}}}', # No value. + '{{{{"key"}}}}', # No colon or value. + ], + ) + def test_decode_invalid_dict(self, invalid_dict): + msg = ( + "Key name of object must be 'string' when decoding 'object'|" + "No ':' found when decoding object value|" + "Expected object or value" + ) + with pytest.raises(ValueError, match=msg): + ujson.decode(invalid_dict) + + @pytest.mark.parametrize( + "numeric_int_as_str", ["31337", "-31337"] # Should work with negatives. + ) + def test_decode_numeric_int(self, numeric_int_as_str): + assert int(numeric_int_as_str) == ujson.decode(numeric_int_as_str) + + def test_encode_null_character(self): + wrapped_input = "31337 \x00 1337" + output = ujson.encode(wrapped_input) + + assert wrapped_input == json.loads(output) + assert output == json.dumps(wrapped_input) + assert wrapped_input == ujson.decode(output) + + alone_input = "\x00" + output = ujson.encode(alone_input) + + assert alone_input == json.loads(output) + assert output == json.dumps(alone_input) + assert alone_input == ujson.decode(output) + assert '" \\u0000\\r\\n "' == ujson.dumps(" \u0000\r\n ") + + def test_decode_null_character(self): + wrapped_input = '"31337 \\u0000 31337"' + assert ujson.decode(wrapped_input) == json.loads(wrapped_input) + + def test_encode_list_long_conversion(self): + long_input = [ + 9223372036854775807, + 9223372036854775807, + 9223372036854775807, + 9223372036854775807, + 9223372036854775807, + 9223372036854775807, + ] + output = ujson.encode(long_input) + + assert long_input == json.loads(output) + assert long_input == ujson.decode(output) + + tm.assert_numpy_array_equal( + np.array(long_input), ujson.decode(output, numpy=True, dtype=np.int64) + ) + + @pytest.mark.parametrize("long_input", [9223372036854775807, 18446744073709551615]) + def test_encode_long_conversion(self, long_input): + output = ujson.encode(long_input) + + assert long_input == json.loads(output) + assert output == json.dumps(long_input) + assert long_input == ujson.decode(output) + + @pytest.mark.parametrize("bigNum", [2 ** 64, -(2 ** 63) - 1]) + def test_dumps_ints_larger_than_maxsize(self, bigNum): + encoding = ujson.encode(bigNum) + assert str(bigNum) == encoding + + with pytest.raises( + ValueError, + match="Value is too big|Value is too small", + ): + assert ujson.loads(encoding) == bigNum + + @pytest.mark.parametrize( + "int_exp", ["1337E40", "1.337E40", "1337E+9", "1.337e+40", "1.337E-4"] + ) + def test_decode_numeric_int_exp(self, int_exp): + assert ujson.decode(int_exp) == json.loads(int_exp) + + def test_loads_non_str_bytes_raises(self): + msg = "Expected 'str' or 'bytes'" + with pytest.raises(TypeError, match=msg): + ujson.loads(None) + + @pytest.mark.parametrize("val", [3590016419, 2 ** 31, 2 ** 32, (2 ** 32) - 1]) + def test_decode_number_with_32bit_sign_bit(self, val): + # Test that numbers that fit within 32 bits but would have the + # sign bit set (2**31 <= x < 2**32) are decoded properly. + doc = f'{{"id": {val}}}' + assert ujson.decode(doc)["id"] == val + + def test_encode_big_escape(self): + # Make sure no Exception is raised. + for _ in range(10): + base = "\u00e5".encode() + escape_input = base * 1024 * 1024 * 2 + ujson.encode(escape_input) + + def test_decode_big_escape(self): + # Make sure no Exception is raised. + for _ in range(10): + base = "\u00e5".encode() + quote = b'"' + + escape_input = quote + (base * 1024 * 1024 * 2) + quote + ujson.decode(escape_input) + + def test_to_dict(self): + d = {"key": 31337} + + class DictTest: + def toDict(self): + return d + + o = DictTest() + output = ujson.encode(o) + + dec = ujson.decode(output) + assert dec == d + + def test_default_handler(self): + class _TestObject: + def __init__(self, val): + self.val = val + + @property + def recursive_attr(self): + return _TestObject("recursive_attr") + + def __str__(self) -> str: + return str(self.val) + + msg = "Maximum recursion level reached" + with pytest.raises(OverflowError, match=msg): + ujson.encode(_TestObject("foo")) + assert '"foo"' == ujson.encode(_TestObject("foo"), default_handler=str) + + def my_handler(_): + return "foobar" + + assert '"foobar"' == ujson.encode( + _TestObject("foo"), default_handler=my_handler + ) + + def my_handler_raises(_): + raise TypeError("I raise for anything") + + with pytest.raises(TypeError, match="I raise for anything"): + ujson.encode(_TestObject("foo"), default_handler=my_handler_raises) + + def my_int_handler(_): + return 42 + + assert ( + ujson.decode( + ujson.encode(_TestObject("foo"), default_handler=my_int_handler) + ) + == 42 + ) + + def my_obj_handler(_): + return datetime.datetime(2013, 2, 3) + + assert ujson.decode( + ujson.encode(datetime.datetime(2013, 2, 3)) + ) == ujson.decode( + ujson.encode(_TestObject("foo"), default_handler=my_obj_handler) + ) + + obj_list = [_TestObject("foo"), _TestObject("bar")] + assert json.loads(json.dumps(obj_list, default=str)) == ujson.decode( + ujson.encode(obj_list, default_handler=str) + ) + + def test_encode_object(self): + class _TestObject: + def __init__(self, a, b, _c, d): + self.a = a + self.b = b + self._c = _c + self.d = d + + def e(self): + return 5 + + # JSON keys should be all non-callable non-underscore attributes, see GH-42768 + test_object = _TestObject(a=1, b=2, _c=3, d=4) + assert ujson.decode(ujson.encode(test_object)) == {"a": 1, "b": 2, "d": 4} + + +class TestNumpyJSONTests: + @pytest.mark.parametrize("bool_input", [True, False]) + def test_bool(self, bool_input): + b = bool(bool_input) + assert ujson.decode(ujson.encode(b)) == b + + def test_bool_array(self): + bool_array = np.array( + [True, False, True, True, False, True, False, False], dtype=bool + ) + output = np.array(ujson.decode(ujson.encode(bool_array)), dtype=bool) + tm.assert_numpy_array_equal(bool_array, output) + + def test_int(self, any_int_numpy_dtype): + klass = np.dtype(any_int_numpy_dtype).type + num = klass(1) + + assert klass(ujson.decode(ujson.encode(num))) == num + + def test_int_array(self, any_int_numpy_dtype): + arr = np.arange(100, dtype=int) + arr_input = arr.astype(any_int_numpy_dtype) + + arr_output = np.array( + ujson.decode(ujson.encode(arr_input)), dtype=any_int_numpy_dtype + ) + tm.assert_numpy_array_equal(arr_input, arr_output) + + def test_int_max(self, any_int_numpy_dtype): + if any_int_numpy_dtype in ("int64", "uint64") and not IS64: + pytest.skip("Cannot test 64-bit integer on 32-bit platform") + + klass = np.dtype(any_int_numpy_dtype).type + + # uint64 max will always overflow, + # as it's encoded to signed. + if any_int_numpy_dtype == "uint64": + num = np.iinfo("int64").max + else: + num = np.iinfo(any_int_numpy_dtype).max + + assert klass(ujson.decode(ujson.encode(num))) == num + + def test_float(self, float_numpy_dtype): + klass = np.dtype(float_numpy_dtype).type + num = klass(256.2013) + + assert klass(ujson.decode(ujson.encode(num))) == num + + def test_float_array(self, float_numpy_dtype): + arr = np.arange(12.5, 185.72, 1.7322, dtype=float) + float_input = arr.astype(float_numpy_dtype) + + float_output = np.array( + ujson.decode(ujson.encode(float_input, double_precision=15)), + dtype=float_numpy_dtype, + ) + tm.assert_almost_equal(float_input, float_output) + + def test_float_max(self, float_numpy_dtype): + klass = np.dtype(float_numpy_dtype).type + num = klass(np.finfo(float_numpy_dtype).max / 10) + + tm.assert_almost_equal( + klass(ujson.decode(ujson.encode(num, double_precision=15))), num + ) + + def test_array_basic(self): + arr = np.arange(96) + arr = arr.reshape((2, 2, 2, 2, 3, 2)) + + tm.assert_numpy_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr) + tm.assert_numpy_array_equal(ujson.decode(ujson.encode(arr), numpy=True), arr) + + @pytest.mark.parametrize("shape", [(10, 10), (5, 5, 4), (100, 1)]) + def test_array_reshaped(self, shape): + arr = np.arange(100) + arr = arr.reshape(shape) + + tm.assert_numpy_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr) + tm.assert_numpy_array_equal(ujson.decode(ujson.encode(arr), numpy=True), arr) + + def test_array_list(self): + arr_list = [ + "a", + [], + {}, + {}, + [], + 42, + 97.8, + ["a", "b"], + {"key": "val"}, + ] + arr = np.array(arr_list, dtype=object) + result = np.array(ujson.decode(ujson.encode(arr)), dtype=object) + tm.assert_numpy_array_equal(result, arr) + + def test_array_float(self): + dtype = np.float32 + + arr = np.arange(100.202, 200.202, 1, dtype=dtype) + arr = arr.reshape((5, 5, 4)) + + arr_out = np.array(ujson.decode(ujson.encode(arr)), dtype=dtype) + tm.assert_almost_equal(arr, arr_out) + + arr_out = ujson.decode(ujson.encode(arr), numpy=True, dtype=dtype) + tm.assert_almost_equal(arr, arr_out) + + def test_0d_array(self): + # gh-18878 + msg = re.escape("array(1) (0d array) is not JSON serializable at the moment") + with pytest.raises(TypeError, match=msg): + ujson.encode(np.array(1)) + + @pytest.mark.parametrize( + "bad_input,exc_type,err_msg,kwargs", + [ + ( + [{}, []], + ValueError, + r"nesting not supported for object or variable length dtypes", + {}, + ), + ( + [42, None], + TypeError, + r"int\(\) argument must be a string, a bytes-like object or a( real)? " + r"number, not 'NoneType'", + {}, + ), + ( + [["a"], 42], + ValueError, + r"Cannot decode multidimensional arrays with variable length elements " + r"to numpy", + {}, + ), + ( + [42, {}, "a"], + TypeError, + r"int\(\) argument must be a string, a bytes-like object or a( real)? " + r"number, not 'dict'", + {}, + ), + ( + [42, ["a"], 42], + ValueError, + r"invalid literal for int\(\) with base 10: 'a'", + {}, + ), + ( + ["a", "b", [], "c"], + ValueError, + r"nesting not supported for object or variable length dtypes", + {}, + ), + ( + [{"a": "b"}], + ValueError, + r"Cannot decode multidimensional arrays with variable length elements " + r"to numpy", + {"labelled": True}, + ), + ( + {"a": {"b": {"c": 42}}}, + ValueError, + r"labels only supported up to 2 dimensions", + {"labelled": True}, + ), + ( + [{"a": 42, "b": 23}, {"c": 17}], + ValueError, + r"cannot reshape array of size 3 into shape \(2,1\)", + {"labelled": True}, + ), + ], + ) + def test_array_numpy_except(self, bad_input, exc_type, err_msg, kwargs): + with pytest.raises(exc_type, match=err_msg): + ujson.decode(ujson.dumps(bad_input), numpy=True, **kwargs) + + def test_array_numpy_labelled(self): + labelled_input = {"a": []} + output = ujson.loads(ujson.dumps(labelled_input), numpy=True, labelled=True) + assert (np.empty((1, 0)) == output[0]).all() + assert (np.array(["a"]) == output[1]).all() + assert output[2] is None + + labelled_input = [{"a": 42}] + output = ujson.loads(ujson.dumps(labelled_input), numpy=True, labelled=True) + assert (np.array(["a"]) == output[2]).all() + assert (np.array([42]) == output[0]).all() + assert output[1] is None + + # see gh-10837: write out the dump explicitly + # so there is no dependency on iteration order + input_dumps = '[{"a": 42, "b":31}, {"a": 24, "c": 99}, {"a": 2.4, "b": 78}]' + output = ujson.loads(input_dumps, numpy=True, labelled=True) + expected_vals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2)) + assert (expected_vals == output[0]).all() + assert output[1] is None + assert (np.array(["a", "b"]) == output[2]).all() + + input_dumps = ( + '{"1": {"a": 42, "b":31}, "2": {"a": 24, "c": 99}, ' + '"3": {"a": 2.4, "b": 78}}' + ) + output = ujson.loads(input_dumps, numpy=True, labelled=True) + expected_vals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2)) + assert (expected_vals == output[0]).all() + assert (np.array(["1", "2", "3"]) == output[1]).all() + assert (np.array(["a", "b"]) == output[2]).all() + + +class TestPandasJSONTests: + def test_dataframe(self, orient, numpy): + if orient == "records" and numpy: + pytest.skip("Not idiomatic pandas") + + dtype = get_int32_compat_dtype(numpy, orient) + + df = DataFrame( + [[1, 2, 3], [4, 5, 6]], + index=["a", "b"], + columns=["x", "y", "z"], + dtype=dtype, + ) + encode_kwargs = {} if orient is None else {"orient": orient} + decode_kwargs = {} if numpy is None else {"numpy": numpy} + assert (df.dtypes == dtype).all() + + output = ujson.decode(ujson.encode(df, **encode_kwargs), **decode_kwargs) + assert (df.dtypes == dtype).all() + + # Ensure proper DataFrame initialization. + if orient == "split": + dec = _clean_dict(output) + output = DataFrame(**dec) + else: + output = DataFrame(output) + + # Corrections to enable DataFrame comparison. + if orient == "values": + df.columns = [0, 1, 2] + df.index = [0, 1] + elif orient == "records": + df.index = [0, 1] + elif orient == "index": + df = df.transpose() + + assert (df.dtypes == dtype).all() + tm.assert_frame_equal(output, df) + + def test_dataframe_nested(self, orient): + df = DataFrame( + [[1, 2, 3], [4, 5, 6]], index=["a", "b"], columns=["x", "y", "z"] + ) + + nested = {"df1": df, "df2": df.copy()} + kwargs = {} if orient is None else {"orient": orient} + + exp = { + "df1": ujson.decode(ujson.encode(df, **kwargs)), + "df2": ujson.decode(ujson.encode(df, **kwargs)), + } + assert ujson.decode(ujson.encode(nested, **kwargs)) == exp + + def test_dataframe_numpy_labelled(self, orient): + if orient in ("split", "values"): + pytest.skip("Incompatible with labelled=True") + + df = DataFrame( + [[1, 2, 3], [4, 5, 6]], + index=["a", "b"], + columns=["x", "y", "z"], + dtype=int, + ) + kwargs = {} if orient is None else {"orient": orient} + + output = DataFrame( + *ujson.decode(ujson.encode(df, **kwargs), numpy=True, labelled=True) + ) + + if orient is None: + df = df.T + elif orient == "records": + df.index = [0, 1] + + tm.assert_frame_equal(output, df) + + def test_series(self, orient, numpy): + dtype = get_int32_compat_dtype(numpy, orient) + s = Series( + [10, 20, 30, 40, 50, 60], + name="series", + index=[6, 7, 8, 9, 10, 15], + dtype=dtype, + ).sort_values() + assert s.dtype == dtype + + encode_kwargs = {} if orient is None else {"orient": orient} + decode_kwargs = {} if numpy is None else {"numpy": numpy} + + output = ujson.decode(ujson.encode(s, **encode_kwargs), **decode_kwargs) + assert s.dtype == dtype + + if orient == "split": + dec = _clean_dict(output) + output = Series(**dec) + else: + output = Series(output) + + if orient in (None, "index"): + s.name = None + output = output.sort_values() + s.index = ["6", "7", "8", "9", "10", "15"] + elif orient in ("records", "values"): + s.name = None + s.index = [0, 1, 2, 3, 4, 5] + + assert s.dtype == dtype + tm.assert_series_equal(output, s) + + def test_series_nested(self, orient): + s = Series( + [10, 20, 30, 40, 50, 60], name="series", index=[6, 7, 8, 9, 10, 15] + ).sort_values() + nested = {"s1": s, "s2": s.copy()} + kwargs = {} if orient is None else {"orient": orient} + + exp = { + "s1": ujson.decode(ujson.encode(s, **kwargs)), + "s2": ujson.decode(ujson.encode(s, **kwargs)), + } + assert ujson.decode(ujson.encode(nested, **kwargs)) == exp + + def test_index(self): + i = Index([23, 45, 18, 98, 43, 11], name="index") + + # Column indexed. + output = Index(ujson.decode(ujson.encode(i)), name="index") + tm.assert_index_equal(i, output) + + output = Index(ujson.decode(ujson.encode(i), numpy=True), name="index") + tm.assert_index_equal(i, output) + + dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"))) + output = Index(**dec) + + tm.assert_index_equal(i, output) + assert i.name == output.name + + dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"), numpy=True)) + output = Index(**dec) + + tm.assert_index_equal(i, output) + assert i.name == output.name + + output = Index(ujson.decode(ujson.encode(i, orient="values")), name="index") + tm.assert_index_equal(i, output) + + output = Index( + ujson.decode(ujson.encode(i, orient="values"), numpy=True), name="index" + ) + tm.assert_index_equal(i, output) + + output = Index(ujson.decode(ujson.encode(i, orient="records")), name="index") + tm.assert_index_equal(i, output) + + output = Index( + ujson.decode(ujson.encode(i, orient="records"), numpy=True), name="index" + ) + tm.assert_index_equal(i, output) + + output = Index(ujson.decode(ujson.encode(i, orient="index")), name="index") + tm.assert_index_equal(i, output) + + output = Index( + ujson.decode(ujson.encode(i, orient="index"), numpy=True), name="index" + ) + tm.assert_index_equal(i, output) + + def test_datetime_index(self): + date_unit = "ns" + + # freq doesn't round-trip + rng = DatetimeIndex(list(date_range("1/1/2000", periods=20)), freq=None) + encoded = ujson.encode(rng, date_unit=date_unit) + + decoded = DatetimeIndex(np.array(ujson.decode(encoded))) + tm.assert_index_equal(rng, decoded) + + ts = Series(np.random.randn(len(rng)), index=rng) + decoded = Series(ujson.decode(ujson.encode(ts, date_unit=date_unit))) + + idx_values = decoded.index.values.astype(np.int64) + decoded.index = DatetimeIndex(idx_values) + tm.assert_series_equal(ts, decoded) + + @pytest.mark.parametrize( + "invalid_arr", + [ + "[31337,]", # Trailing comma. + "[,31337]", # Leading comma. + "[]]", # Unmatched bracket. + "[,]", # Only comma. + ], + ) + def test_decode_invalid_array(self, invalid_arr): + msg = ( + "Expected object or value|Trailing data|" + "Unexpected character found when decoding array value" + ) + with pytest.raises(ValueError, match=msg): + ujson.decode(invalid_arr) + + @pytest.mark.parametrize("arr", [[], [31337]]) + def test_decode_array(self, arr): + assert arr == ujson.decode(str(arr)) + + @pytest.mark.parametrize("extreme_num", [9223372036854775807, -9223372036854775808]) + def test_decode_extreme_numbers(self, extreme_num): + assert extreme_num == ujson.decode(str(extreme_num)) + + @pytest.mark.parametrize("too_extreme_num", [f"{2**64}", f"{-2**63-1}"]) + def test_decode_too_extreme_numbers(self, too_extreme_num): + with pytest.raises( + ValueError, + match="Value is too big|Value is too small", + ): + ujson.decode(too_extreme_num) + + def test_decode_with_trailing_whitespaces(self): + assert {} == ujson.decode("{}\n\t ") + + def test_decode_with_trailing_non_whitespaces(self): + with pytest.raises(ValueError, match="Trailing data"): + ujson.decode("{}\n\t a") + + @pytest.mark.parametrize("value", [f"{2**64}", f"{-2**63-1}"]) + def test_decode_array_with_big_int(self, value): + with pytest.raises( + ValueError, + match="Value is too big|Value is too small", + ): + ujson.loads(value) + + @pytest.mark.parametrize( + "float_number", + [ + 1.1234567893, + 1.234567893, + 1.34567893, + 1.4567893, + 1.567893, + 1.67893, + 1.7893, + 1.893, + 1.3, + ], + ) + @pytest.mark.parametrize("sign", [-1, 1]) + def test_decode_floating_point(self, sign, float_number): + float_number *= sign + tm.assert_almost_equal(float_number, ujson.loads(str(float_number)), rtol=1e-15) + + def test_encode_big_set(self): + s = set() + + for x in range(0, 100000): + s.add(x) + + # Make sure no Exception is raised. + ujson.encode(s) + + def test_encode_empty_set(self): + assert "[]" == ujson.encode(set()) + + def test_encode_set(self): + s = {1, 2, 3, 4, 5, 6, 7, 8, 9} + enc = ujson.encode(s) + dec = ujson.decode(enc) + + for v in dec: + assert v in s + + @pytest.mark.parametrize( + "td", + [ + Timedelta(days=366), + Timedelta(days=-1), + Timedelta(hours=13, minutes=5, seconds=5), + Timedelta(hours=13, minutes=20, seconds=30), + Timedelta(days=-1, nanoseconds=5), + Timedelta(nanoseconds=1), + Timedelta(microseconds=1, nanoseconds=1), + Timedelta(milliseconds=1, microseconds=1, nanoseconds=1), + Timedelta(milliseconds=999, microseconds=999, nanoseconds=999), + ], + ) + def test_encode_timedelta_iso(self, td): + # GH 28256 + result = ujson.encode(td, iso_dates=True) + expected = f'"{td.isoformat()}"' + + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ad8bf8e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..c471a0a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_c_parser_only.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_c_parser_only.cpython-38.pyc new file mode 100644 index 0000000..8b685a4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_c_parser_only.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_comment.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_comment.cpython-38.pyc new file mode 100644 index 0000000..2b25c37 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_comment.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_compression.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_compression.cpython-38.pyc new file mode 100644 index 0000000..84d0da1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_compression.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_converters.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_converters.cpython-38.pyc new file mode 100644 index 0000000..f9f9c26 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_converters.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_dialect.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_dialect.cpython-38.pyc new file mode 100644 index 0000000..3d1a44f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_dialect.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_encoding.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_encoding.cpython-38.pyc new file mode 100644 index 0000000..d210eb0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_encoding.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_header.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_header.cpython-38.pyc new file mode 100644 index 0000000..223571c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_header.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_index_col.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_index_col.cpython-38.pyc new file mode 100644 index 0000000..db6d94b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_index_col.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_mangle_dupes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_mangle_dupes.cpython-38.pyc new file mode 100644 index 0000000..0407459 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_mangle_dupes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_multi_thread.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_multi_thread.cpython-38.pyc new file mode 100644 index 0000000..071a5d2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_multi_thread.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_na_values.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_na_values.cpython-38.pyc new file mode 100644 index 0000000..0990cd0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_na_values.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_network.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_network.cpython-38.pyc new file mode 100644 index 0000000..4eea26f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_network.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_parse_dates.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_parse_dates.cpython-38.pyc new file mode 100644 index 0000000..7af863c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_parse_dates.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_python_parser_only.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_python_parser_only.cpython-38.pyc new file mode 100644 index 0000000..dbc1b3c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_python_parser_only.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_quoting.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_quoting.cpython-38.pyc new file mode 100644 index 0000000..40bbb4c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_quoting.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_read_fwf.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_read_fwf.cpython-38.pyc new file mode 100644 index 0000000..7b28601 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_read_fwf.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_skiprows.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_skiprows.cpython-38.pyc new file mode 100644 index 0000000..d97a77c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_skiprows.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_textreader.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_textreader.cpython-38.pyc new file mode 100644 index 0000000..dc16bb5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_textreader.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_unsupported.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_unsupported.cpython-38.pyc new file mode 100644 index 0000000..75792bf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/__pycache__/test_unsupported.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..5ae20ce Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_chunksize.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_chunksize.cpython-38.pyc new file mode 100644 index 0000000..54d0665 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_chunksize.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_common_basic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_common_basic.cpython-38.pyc new file mode 100644 index 0000000..64b1432 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_common_basic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_data_list.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_data_list.cpython-38.pyc new file mode 100644 index 0000000..0960960 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_data_list.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_decimal.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_decimal.cpython-38.pyc new file mode 100644 index 0000000..9e70ac0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_decimal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_file_buffer_url.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_file_buffer_url.cpython-38.pyc new file mode 100644 index 0000000..ad02c49 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_file_buffer_url.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_float.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_float.cpython-38.pyc new file mode 100644 index 0000000..8600007 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_float.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_index.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_index.cpython-38.pyc new file mode 100644 index 0000000..a7a1b87 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_index.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_inf.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_inf.cpython-38.pyc new file mode 100644 index 0000000..4a2f6ba Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_inf.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_ints.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_ints.cpython-38.pyc new file mode 100644 index 0000000..bfaaa27 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_ints.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_iterator.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_iterator.cpython-38.pyc new file mode 100644 index 0000000..89a9231 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_iterator.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_read_errors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_read_errors.cpython-38.pyc new file mode 100644 index 0000000..9e114ad Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_read_errors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_verbose.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_verbose.cpython-38.pyc new file mode 100644 index 0000000..c1addfd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/__pycache__/test_verbose.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_chunksize.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_chunksize.py new file mode 100644 index 0000000..8a3f878 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_chunksize.py @@ -0,0 +1,278 @@ +""" +Tests that work on both the Python and C engines but do not have a +specific classification into the other test modules. +""" +from io import StringIO + +import numpy as np +import pytest + +from pandas.errors import DtypeWarning + +from pandas import ( + DataFrame, + concat, +) +import pandas._testing as tm + +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +@pytest.mark.parametrize("index_col", [0, "index"]) +def test_read_chunksize_with_index(all_parsers, index_col): + parser = all_parsers + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + + expected = DataFrame( + [ + ["foo", 2, 3, 4, 5], + ["bar", 7, 8, 9, 10], + ["baz", 12, 13, 14, 15], + ["qux", 12, 13, 14, 15], + ["foo2", 12, 13, 14, 15], + ["bar2", 12, 13, 14, 15], + ], + columns=["index", "A", "B", "C", "D"], + ) + expected = expected.set_index("index") + + with parser.read_csv(StringIO(data), index_col=0, chunksize=2) as reader: + chunks = list(reader) + tm.assert_frame_equal(chunks[0], expected[:2]) + tm.assert_frame_equal(chunks[1], expected[2:4]) + tm.assert_frame_equal(chunks[2], expected[4:]) + + +@pytest.mark.parametrize("chunksize", [1.3, "foo", 0]) +def test_read_chunksize_bad(all_parsers, chunksize): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + parser = all_parsers + msg = r"'chunksize' must be an integer >=1" + + with pytest.raises(ValueError, match=msg): + with parser.read_csv(StringIO(data), chunksize=chunksize) as _: + pass + + +@pytest.mark.parametrize("chunksize", [2, 8]) +def test_read_chunksize_and_nrows(all_parsers, chunksize): + # see gh-15755 + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + parser = all_parsers + kwargs = {"index_col": 0, "nrows": 5} + + expected = parser.read_csv(StringIO(data), **kwargs) + with parser.read_csv(StringIO(data), chunksize=chunksize, **kwargs) as reader: + tm.assert_frame_equal(concat(reader), expected) + + +def test_read_chunksize_and_nrows_changing_size(all_parsers): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + parser = all_parsers + kwargs = {"index_col": 0, "nrows": 5} + + expected = parser.read_csv(StringIO(data), **kwargs) + with parser.read_csv(StringIO(data), chunksize=8, **kwargs) as reader: + tm.assert_frame_equal(reader.get_chunk(size=2), expected.iloc[:2]) + tm.assert_frame_equal(reader.get_chunk(size=4), expected.iloc[2:5]) + + with pytest.raises(StopIteration, match=""): + reader.get_chunk(size=3) + + +def test_get_chunk_passed_chunksize(all_parsers): + parser = all_parsers + data = """A,B,C +1,2,3 +4,5,6 +7,8,9 +1,2,3""" + + with parser.read_csv(StringIO(data), chunksize=2) as reader: + result = reader.get_chunk() + + expected = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("kwargs", [{}, {"index_col": 0}]) +def test_read_chunksize_compat(all_parsers, kwargs): + # see gh-12185 + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), **kwargs) + with parser.read_csv(StringIO(data), chunksize=2, **kwargs) as reader: + tm.assert_frame_equal(concat(reader), result) + + +def test_read_chunksize_jagged_names(all_parsers): + # see gh-23509 + parser = all_parsers + data = "\n".join(["0"] * 7 + [",".join(["0"] * 10)]) + + expected = DataFrame([[0] + [np.nan] * 9] * 7 + [[0] * 10]) + with parser.read_csv(StringIO(data), names=range(10), chunksize=4) as reader: + result = concat(reader) + tm.assert_frame_equal(result, expected) + + +def test_chunk_begins_with_newline_whitespace(all_parsers): + # see gh-10022 + parser = all_parsers + data = "\n hello\nworld\n" + + result = parser.read_csv(StringIO(data), header=None) + expected = DataFrame([" hello", "world"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.slow +def test_chunks_have_consistent_numerical_type(all_parsers): + parser = all_parsers + integers = [str(i) for i in range(499999)] + data = "a\n" + "\n".join(integers + ["1.0", "2.0"] + integers) + + # Coercions should work without warnings. + with tm.assert_produces_warning(None): + result = parser.read_csv(StringIO(data)) + + assert type(result.a[0]) is np.float64 + assert result.a.dtype == float + + +def test_warn_if_chunks_have_mismatched_type(all_parsers): + warning_type = None + parser = all_parsers + size = 10000 + + # see gh-3866: if chunks are different types and can't + # be coerced using numerical types, then issue warning. + if parser.engine == "c" and parser.low_memory: + warning_type = DtypeWarning + # Use larger size to hit warning path + size = 499999 + + integers = [str(i) for i in range(size)] + data = "a\n" + "\n".join(integers + ["a", "b"] + integers) + + buf = StringIO(data) + + with tm.assert_produces_warning(warning_type): + df = parser.read_csv(buf) + + assert df.a.dtype == object + + +@pytest.mark.parametrize("iterator", [True, False]) +def test_empty_with_nrows_chunksize(all_parsers, iterator): + # see gh-9535 + parser = all_parsers + expected = DataFrame(columns=["foo", "bar"]) + + nrows = 10 + data = StringIO("foo,bar\n") + + if iterator: + with parser.read_csv(data, chunksize=nrows) as reader: + result = next(iter(reader)) + else: + result = parser.read_csv(data, nrows=nrows) + + tm.assert_frame_equal(result, expected) + + +def test_read_csv_memory_growth_chunksize(all_parsers): + # see gh-24805 + # + # Let's just make sure that we don't crash + # as we iteratively process all chunks. + parser = all_parsers + + with tm.ensure_clean() as path: + with open(path, "w") as f: + for i in range(1000): + f.write(str(i) + "\n") + + with parser.read_csv(path, chunksize=20) as result: + for _ in result: + pass + + +def test_chunksize_with_usecols_second_block_shorter(all_parsers): + # GH#21211 + parser = all_parsers + data = """1,2,3,4 +5,6,7,8 +9,10,11 +""" + + result_chunks = parser.read_csv( + StringIO(data), + names=["a", "b"], + chunksize=2, + usecols=[0, 1], + header=None, + ) + + expected_frames = [ + DataFrame({"a": [1, 5], "b": [2, 6]}), + DataFrame({"a": [9], "b": [10]}, index=[2]), + ] + + for i, result in enumerate(result_chunks): + tm.assert_frame_equal(result, expected_frames[i]) + + +def test_chunksize_second_block_shorter(all_parsers): + # GH#21211 + parser = all_parsers + data = """a,b,c,d +1,2,3,4 +5,6,7,8 +9,10,11 +""" + + result_chunks = parser.read_csv(StringIO(data), chunksize=2) + + expected_frames = [ + DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}), + DataFrame({"a": [9], "b": [10], "c": [11], "d": [np.nan]}, index=[2]), + ] + + for i, result in enumerate(result_chunks): + tm.assert_frame_equal(result, expected_frames[i]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_common_basic.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_common_basic.py new file mode 100644 index 0000000..bde69e3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_common_basic.py @@ -0,0 +1,931 @@ +""" +Tests that work on both the Python and C engines but do not have a +specific classification into the other test modules. +""" +from datetime import datetime +from inspect import signature +from io import StringIO +import os +from pathlib import Path +import sys + +import numpy as np +import pytest + +from pandas.compat import PY310 +from pandas.errors import ( + EmptyDataError, + ParserError, + ParserWarning, +) + +from pandas import ( + DataFrame, + Index, + Series, + Timestamp, + compat, +) +import pandas._testing as tm + +from pandas.io.parsers import TextFileReader +from pandas.io.parsers.c_parser_wrapper import CParserWrapper + +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") + + +def test_override_set_noconvert_columns(): + # see gh-17351 + # + # Usecols needs to be sorted in _set_noconvert_columns based + # on the test_usecols_with_parse_dates test from test_usecols.py + class MyTextFileReader(TextFileReader): + def __init__(self): + self._currow = 0 + self.squeeze = False + + class MyCParserWrapper(CParserWrapper): + def _set_noconvert_columns(self): + if self.usecols_dtype == "integer": + # self.usecols is a set, which is documented as unordered + # but in practice, a CPython set of integers is sorted. + # In other implementations this assumption does not hold. + # The following code simulates a different order, which + # before GH 17351 would cause the wrong columns to be + # converted via the parse_dates parameter + self.usecols = list(self.usecols) + self.usecols.reverse() + return CParserWrapper._set_noconvert_columns(self) + + data = """a,b,c,d,e +0,1,20140101,0900,4 +0,1,20140102,1000,4""" + + parse_dates = [[1, 2]] + cols = { + "a": [0, 0], + "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")], + } + expected = DataFrame(cols, columns=["c_d", "a"]) + + parser = MyTextFileReader() + parser.options = { + "usecols": [0, 2, 3], + "parse_dates": parse_dates, + "delimiter": ",", + } + parser.engine = "c" + parser._engine = MyCParserWrapper(StringIO(data), **parser.options) + + result = parser.read() + tm.assert_frame_equal(result, expected) + + +def test_read_csv_local(all_parsers, csv1): + prefix = "file:///" if compat.is_platform_windows() else "file://" + parser = all_parsers + + fname = prefix + str(os.path.abspath(csv1)) + result = parser.read_csv(fname, index_col=0, parse_dates=True) + + expected = DataFrame( + [ + [0.980269, 3.685731, -0.364216805298, -1.159738], + [1.047916, -0.041232, -0.16181208307, 0.212549], + [0.498581, 0.731168, -0.537677223318, 1.346270], + [1.120202, 1.567621, 0.00364077397681, 0.675253], + [-0.487094, 0.571455, -1.6116394093, 0.103469], + [0.836649, 0.246462, 0.588542635376, 1.062782], + [-0.157161, 1.340307, 1.1957779562, -1.097007], + ], + columns=["A", "B", "C", "D"], + index=Index( + [ + datetime(2000, 1, 3), + datetime(2000, 1, 4), + datetime(2000, 1, 5), + datetime(2000, 1, 6), + datetime(2000, 1, 7), + datetime(2000, 1, 10), + datetime(2000, 1, 11), + ], + name="index", + ), + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_1000_sep(all_parsers): + parser = all_parsers + data = """A|B|C +1|2,334|5 +10|13|10. +""" + expected = DataFrame({"A": [1, 10], "B": [2334, 13], "C": [5, 10.0]}) + + result = parser.read_csv(StringIO(data), sep="|", thousands=",") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("squeeze", [True, False]) +def test_squeeze(all_parsers, squeeze): + data = """\ +a,1 +b,2 +c,3 +""" + parser = all_parsers + index = Index(["a", "b", "c"], name=0) + expected = Series([1, 2, 3], name=1, index=index) + + result = parser.read_csv_check_warnings( + FutureWarning, + "The squeeze argument has been deprecated " + "and will be removed in a future version. " + 'Append .squeeze\\("columns"\\) to the call to squeeze.\n\n', + StringIO(data), + index_col=0, + header=None, + squeeze=squeeze, + ) + if not squeeze: + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + else: + tm.assert_series_equal(result, expected) + + # see gh-8217 + # + # Series should not be a view. + assert not result._is_view + + +@xfail_pyarrow +def test_unnamed_columns(all_parsers): + data = """A,B,C,, +1,2,3,4,5 +6,7,8,9,10 +11,12,13,14,15 +""" + parser = all_parsers + expected = DataFrame( + [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], + dtype=np.int64, + columns=["A", "B", "C", "Unnamed: 3", "Unnamed: 4"], + ) + result = parser.read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +def test_csv_mixed_type(all_parsers): + data = """A,B,C +a,1,2 +b,3,4 +c,4,5 +""" + parser = all_parsers + expected = DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 4], "C": [2, 4, 5]}) + result = parser.read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_read_csv_low_memory_no_rows_with_index(all_parsers): + # see gh-21141 + parser = all_parsers + + if not parser.low_memory: + pytest.skip("This is a low-memory specific test") + + data = """A,B,C +1,1,1,2 +2,2,3,4 +3,3,4,5 +""" + result = parser.read_csv(StringIO(data), low_memory=True, index_col=0, nrows=0) + expected = DataFrame(columns=["A", "B", "C"]) + tm.assert_frame_equal(result, expected) + + +def test_read_csv_dataframe(all_parsers, csv1): + parser = all_parsers + result = parser.read_csv(csv1, index_col=0, parse_dates=True) + + expected = DataFrame( + [ + [0.980269, 3.685731, -0.364216805298, -1.159738], + [1.047916, -0.041232, -0.16181208307, 0.212549], + [0.498581, 0.731168, -0.537677223318, 1.346270], + [1.120202, 1.567621, 0.00364077397681, 0.675253], + [-0.487094, 0.571455, -1.6116394093, 0.103469], + [0.836649, 0.246462, 0.588542635376, 1.062782], + [-0.157161, 1.340307, 1.1957779562, -1.097007], + ], + columns=["A", "B", "C", "D"], + index=Index( + [ + datetime(2000, 1, 3), + datetime(2000, 1, 4), + datetime(2000, 1, 5), + datetime(2000, 1, 6), + datetime(2000, 1, 7), + datetime(2000, 1, 10), + datetime(2000, 1, 11), + ], + name="index", + ), + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize("nrows", [3, 3.0]) +def test_read_nrows(all_parsers, nrows): + # see gh-10476 + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + expected = DataFrame( + [["foo", 2, 3, 4, 5], ["bar", 7, 8, 9, 10], ["baz", 12, 13, 14, 15]], + columns=["index", "A", "B", "C", "D"], + ) + parser = all_parsers + + result = parser.read_csv(StringIO(data), nrows=nrows) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize("nrows", [1.2, "foo", -1]) +def test_read_nrows_bad(all_parsers, nrows): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + msg = r"'nrows' must be an integer >=0" + parser = all_parsers + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), nrows=nrows) + + +def test_nrows_skipfooter_errors(all_parsers): + msg = "'skipfooter' not supported with 'nrows'" + data = "a\n1\n2\n3\n4\n5\n6" + parser = all_parsers + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), skipfooter=1, nrows=5) + + +@xfail_pyarrow +def test_missing_trailing_delimiters(all_parsers): + parser = all_parsers + data = """A,B,C,D +1,2,3,4 +1,3,3, +1,4,5""" + + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + [[1, 2, 3, 4], [1, 3, 3, np.nan], [1, 4, 5, np.nan]], + columns=["A", "B", "C", "D"], + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_skip_initial_space(all_parsers): + data = ( + '"09-Apr-2012", "01:10:18.300", 2456026.548822908, 12849, ' + "1.00361, 1.12551, 330.65659, 0355626618.16711, 73.48821, " + "314.11625, 1917.09447, 179.71425, 80.000, 240.000, -350, " + "70.06056, 344.98370, 1, 1, -0.689265, -0.692787, " + "0.212036, 14.7674, 41.605, -9999.0, -9999.0, " + "-9999.0, -9999.0, -9999.0, -9999.0, 000, 012, 128" + ) + parser = all_parsers + + result = parser.read_csv( + StringIO(data), + names=list(range(33)), + header=None, + na_values=["-9999.0"], + skipinitialspace=True, + ) + expected = DataFrame( + [ + [ + "09-Apr-2012", + "01:10:18.300", + 2456026.548822908, + 12849, + 1.00361, + 1.12551, + 330.65659, + 355626618.16711, + 73.48821, + 314.11625, + 1917.09447, + 179.71425, + 80.0, + 240.0, + -350, + 70.06056, + 344.9837, + 1, + 1, + -0.689265, + -0.692787, + 0.212036, + 14.7674, + 41.605, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + 0, + 12, + 128, + ] + ] + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_trailing_delimiters(all_parsers): + # see gh-2442 + data = """A,B,C +1,2,3, +4,5,6, +7,8,9,""" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=False) + + expected = DataFrame({"A": [1, 4, 7], "B": [2, 5, 8], "C": [3, 6, 9]}) + tm.assert_frame_equal(result, expected) + + +def test_escapechar(all_parsers): + # https://stackoverflow.com/questions/13824840/feature-request-for- + # pandas-read-csv + data = '''SEARCH_TERM,ACTUAL_URL +"bra tv board","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord" +"tv p\xc3\xa5 hjul","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord" +"SLAGBORD, \\"Bergslagen\\", IKEA:s 1700-tals series","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"''' # noqa:E501 + + parser = all_parsers + result = parser.read_csv( + StringIO(data), escapechar="\\", quotechar='"', encoding="utf-8" + ) + + assert result["SEARCH_TERM"][2] == 'SLAGBORD, "Bergslagen", IKEA:s 1700-tals series' + + tm.assert_index_equal(result.columns, Index(["SEARCH_TERM", "ACTUAL_URL"])) + + +@xfail_pyarrow +def test_ignore_leading_whitespace(all_parsers): + # see gh-3374, gh-6607 + parser = all_parsers + data = " a b c\n 1 2 3\n 4 5 6\n 7 8 9" + result = parser.read_csv(StringIO(data), sep=r"\s+") + + expected = DataFrame({"a": [1, 4, 7], "b": [2, 5, 8], "c": [3, 6, 9]}) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize("usecols", [None, [0, 1], ["a", "b"]]) +def test_uneven_lines_with_usecols(all_parsers, usecols): + # see gh-12203 + parser = all_parsers + data = r"""a,b,c +0,1,2 +3,4,5,6,7 +8,9,10""" + + if usecols is None: + # Make sure that an error is still raised + # when the "usecols" parameter is not provided. + msg = r"Expected \d+ fields in line \d+, saw \d+" + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data)) + else: + expected = DataFrame({"a": [0, 3, 8], "b": [1, 4, 9]}) + + result = parser.read_csv(StringIO(data), usecols=usecols) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + # First, check to see that the response of parser when faced with no + # provided columns raises the correct error, with or without usecols. + ("", {}, None), + ("", {"usecols": ["X"]}, None), + ( + ",,", + {"names": ["Dummy", "X", "Dummy_2"], "usecols": ["X"]}, + DataFrame(columns=["X"], index=[0], dtype=np.float64), + ), + ( + "", + {"names": ["Dummy", "X", "Dummy_2"], "usecols": ["X"]}, + DataFrame(columns=["X"]), + ), + ], +) +def test_read_empty_with_usecols(all_parsers, data, kwargs, expected): + # see gh-12493 + parser = all_parsers + + if expected is None: + msg = "No columns to parse from file" + with pytest.raises(EmptyDataError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + else: + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize( + "kwargs,expected", + [ + # gh-8661, gh-8679: this should ignore six lines, including + # lines with trailing whitespace and blank lines. + ( + { + "header": None, + "delim_whitespace": True, + "skiprows": [0, 1, 2, 3, 5, 6], + "skip_blank_lines": True, + }, + DataFrame([[1.0, 2.0, 4.0], [5.1, np.nan, 10.0]]), + ), + # gh-8983: test skipping set of rows after a row with trailing spaces. + ( + { + "delim_whitespace": True, + "skiprows": [1, 2, 3, 5, 6], + "skip_blank_lines": True, + }, + DataFrame({"A": [1.0, 5.1], "B": [2.0, np.nan], "C": [4.0, 10]}), + ), + ], +) +def test_trailing_spaces(all_parsers, kwargs, expected): + data = "A B C \nrandom line with trailing spaces \nskip\n1,2,3\n1,2.,4.\nrandom line with trailing tabs\t\t\t\n \n5.1,NaN,10.0\n" # noqa:E501 + parser = all_parsers + + result = parser.read_csv(StringIO(data.replace(",", " ")), **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_raise_on_sep_with_delim_whitespace(all_parsers): + # see gh-6607 + data = "a b c\n1 2 3" + parser = all_parsers + + with pytest.raises(ValueError, match="you can only specify one"): + parser.read_csv(StringIO(data), sep=r"\s", delim_whitespace=True) + + +def test_read_filepath_or_buffer(all_parsers): + # see gh-43366 + parser = all_parsers + + with pytest.raises(TypeError, match="Expected file path name or file-like"): + parser.read_csv(filepath_or_buffer=b"input") + + +@xfail_pyarrow +@pytest.mark.parametrize("delim_whitespace", [True, False]) +def test_single_char_leading_whitespace(all_parsers, delim_whitespace): + # see gh-9710 + parser = all_parsers + data = """\ +MyColumn +a +b +a +b\n""" + + expected = DataFrame({"MyColumn": list("abab")}) + result = parser.read_csv( + StringIO(data), skipinitialspace=True, delim_whitespace=delim_whitespace + ) + tm.assert_frame_equal(result, expected) + + +# Skip for now, actually only one test fails though, but its tricky to xfail +@skip_pyarrow +@pytest.mark.parametrize( + "sep,skip_blank_lines,exp_data", + [ + (",", True, [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0], [-70.0, 0.4, 1.0]]), + (r"\s+", True, [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0], [-70.0, 0.4, 1.0]]), + ( + ",", + False, + [ + [1.0, 2.0, 4.0], + [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan], + [5.0, np.nan, 10.0], + [np.nan, np.nan, np.nan], + [-70.0, 0.4, 1.0], + ], + ), + ], +) +def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data): + parser = all_parsers + data = """\ +A,B,C +1,2.,4. + + +5.,NaN,10.0 + +-70,.4,1 +""" + + if sep == r"\s+": + data = data.replace(",", " ") + + result = parser.read_csv(StringIO(data), sep=sep, skip_blank_lines=skip_blank_lines) + expected = DataFrame(exp_data, columns=["A", "B", "C"]) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_whitespace_lines(all_parsers): + parser = all_parsers + data = """ + +\t \t\t +\t +A,B,C +\t 1,2.,4. +5.,NaN,10.0 +""" + expected = DataFrame([[1, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]) + result = parser.read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize( + "data,expected", + [ + ( + """ A B C D +a 1 2 3 4 +b 1 2 3 4 +c 1 2 3 4 +""", + DataFrame( + [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], + columns=["A", "B", "C", "D"], + index=["a", "b", "c"], + ), + ), + ( + " a b c\n1 2 3 \n4 5 6\n 7 8 9", + DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]), + ), + ], +) +def test_whitespace_regex_separator(all_parsers, data, expected): + # see gh-6607 + parser = all_parsers + result = parser.read_csv(StringIO(data), sep=r"\s+") + tm.assert_frame_equal(result, expected) + + +def test_sub_character(all_parsers, csv_dir_path): + # see gh-16893 + filename = os.path.join(csv_dir_path, "sub_char.csv") + expected = DataFrame([[1, 2, 3]], columns=["a", "\x1ab", "c"]) + + parser = all_parsers + result = parser.read_csv(filename) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("filename", ["sé-es-vé.csv", "ru-sй.csv", "中文文件名.csv"]) +def test_filename_with_special_chars(all_parsers, filename): + # see gh-15086. + parser = all_parsers + df = DataFrame({"a": [1, 2, 3]}) + + with tm.ensure_clean(filename) as path: + df.to_csv(path, index=False) + + result = parser.read_csv(path) + tm.assert_frame_equal(result, df) + + +def test_read_table_same_signature_as_read_csv(all_parsers): + # GH-34976 + parser = all_parsers + + table_sign = signature(parser.read_table) + csv_sign = signature(parser.read_csv) + + assert table_sign.parameters.keys() == csv_sign.parameters.keys() + assert table_sign.return_annotation == csv_sign.return_annotation + + for key, csv_param in csv_sign.parameters.items(): + table_param = table_sign.parameters[key] + if key == "sep": + assert csv_param.default == "," + assert table_param.default == "\t" + assert table_param.annotation == csv_param.annotation + assert table_param.kind == csv_param.kind + continue + else: + assert table_param == csv_param + + +def test_read_table_equivalency_to_read_csv(all_parsers): + # see gh-21948 + # As of 0.25.0, read_table is undeprecated + parser = all_parsers + data = "a\tb\n1\t2\n3\t4" + expected = parser.read_csv(StringIO(data), sep="\t") + result = parser.read_table(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.skipif( + PY310, + reason="GH41935 This test is leaking only on Python 3.10," + "causing other tests to fail with a cryptic error.", +) +@pytest.mark.parametrize("read_func", ["read_csv", "read_table"]) +def test_read_csv_and_table_sys_setprofile(all_parsers, read_func): + # GH#41069 + parser = all_parsers + data = "a b\n0 1" + + sys.setprofile(lambda *a, **k: None) + result = getattr(parser, read_func)(StringIO(data)) + sys.setprofile(None) + + expected = DataFrame({"a b": ["0 1"]}) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_first_row_bom(all_parsers): + # see gh-26545 + parser = all_parsers + data = '''\ufeff"Head1" "Head2" "Head3"''' + + result = parser.read_csv(StringIO(data), delimiter="\t") + expected = DataFrame(columns=["Head1", "Head2", "Head3"]) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_first_row_bom_unquoted(all_parsers): + # see gh-36343 + parser = all_parsers + data = """\ufeffHead1 Head2 Head3""" + + result = parser.read_csv(StringIO(data), delimiter="\t") + expected = DataFrame(columns=["Head1", "Head2", "Head3"]) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize("nrows", range(1, 6)) +def test_blank_lines_between_header_and_data_rows(all_parsers, nrows): + # GH 28071 + ref = DataFrame( + [[np.nan, np.nan], [np.nan, np.nan], [1, 2], [np.nan, np.nan], [3, 4]], + columns=list("ab"), + ) + csv = "\nheader\n\na,b\n\n\n1,2\n\n3,4" + parser = all_parsers + df = parser.read_csv(StringIO(csv), header=3, nrows=nrows, skip_blank_lines=False) + tm.assert_frame_equal(df, ref[:nrows]) + + +@xfail_pyarrow +def test_no_header_two_extra_columns(all_parsers): + # GH 26218 + column_names = ["one", "two", "three"] + ref = DataFrame([["foo", "bar", "baz"]], columns=column_names) + stream = StringIO("foo,bar,baz,bam,blah") + parser = all_parsers + with tm.assert_produces_warning(ParserWarning): + df = parser.read_csv(stream, header=None, names=column_names, index_col=False) + tm.assert_frame_equal(df, ref) + + +def test_read_csv_names_not_accepting_sets(all_parsers): + # GH 34946 + data = """\ + 1,2,3 + 4,5,6\n""" + parser = all_parsers + with pytest.raises(ValueError, match="Names should be an ordered collection."): + parser.read_csv(StringIO(data), names=set("QAZ")) + + +@xfail_pyarrow +def test_read_table_delim_whitespace_default_sep(all_parsers): + # GH: 35958 + f = StringIO("a b c\n1 -2 -3\n4 5 6") + parser = all_parsers + result = parser.read_table(f, delim_whitespace=True) + expected = DataFrame({"a": [1, 4], "b": [-2, 5], "c": [-3, 6]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("delimiter", [",", "\t"]) +def test_read_csv_delim_whitespace_non_default_sep(all_parsers, delimiter): + # GH: 35958 + f = StringIO("a b c\n1 -2 -3\n4 5 6") + parser = all_parsers + msg = ( + "Specified a delimiter with both sep and " + "delim_whitespace=True; you can only specify one." + ) + with pytest.raises(ValueError, match=msg): + parser.read_csv(f, delim_whitespace=True, sep=delimiter) + + with pytest.raises(ValueError, match=msg): + parser.read_csv(f, delim_whitespace=True, delimiter=delimiter) + + +def test_read_csv_delimiter_and_sep_no_default(all_parsers): + # GH#39823 + f = StringIO("a,b\n1,2") + parser = all_parsers + msg = "Specified a sep and a delimiter; you can only specify one." + with pytest.raises(ValueError, match=msg): + parser.read_csv(f, sep=" ", delimiter=".") + + +@pytest.mark.parametrize("kwargs", [{"delimiter": "\n"}, {"sep": "\n"}]) +def test_read_csv_line_break_as_separator(kwargs, all_parsers): + # GH#43528 + parser = all_parsers + data = """a,b,c +1,2,3 + """ + msg = ( + r"Specified \\n as separator or delimiter. This forces the python engine " + r"which does not accept a line terminator. Hence it is not allowed to use " + r"the line terminator as separator." + ) + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + + +def test_read_csv_posargs_deprecation(all_parsers): + # GH 41485 + f = StringIO("a,b\n1,2") + parser = all_parsers + msg = ( + "In a future version of pandas all arguments of read_csv " + "except for the argument 'filepath_or_buffer' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + parser.read_csv(f, " ") + + +@pytest.mark.parametrize("delimiter", [",", "\t"]) +def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter): + # GH: 35958 + f = StringIO("a b c\n1 -2 -3\n4 5 6") + parser = all_parsers + msg = ( + "Specified a delimiter with both sep and " + "delim_whitespace=True; you can only specify one." + ) + with pytest.raises(ValueError, match=msg): + parser.read_table(f, delim_whitespace=True, sep=delimiter) + + with pytest.raises(ValueError, match=msg): + parser.read_table(f, delim_whitespace=True, delimiter=delimiter) + + +@pytest.mark.parametrize("func", ["read_csv", "read_table"]) +def test_names_and_prefix_not_None_raises(all_parsers, func): + # GH#39123 + f = StringIO("a,b\n1,2") + parser = all_parsers + msg = "Specified named and prefix; you can only specify one." + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + getattr(parser, func)(f, names=["a", "b"], prefix="x") + + +@pytest.mark.parametrize("func", ["read_csv", "read_table"]) +@pytest.mark.parametrize("prefix, names", [(None, ["x0", "x1"]), ("x", None)]) +def test_names_and_prefix_explicit_None(all_parsers, names, prefix, func): + # GH42387 + f = StringIO("a,b\n1,2") + expected = DataFrame({"x0": ["a", "1"], "x1": ["b", "2"]}) + parser = all_parsers + if prefix is not None: + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = getattr(parser, func)( + f, names=names, sep=",", prefix=prefix, header=None + ) + else: + result = getattr(parser, func)( + f, names=names, sep=",", prefix=prefix, header=None + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_dict_keys_as_names(all_parsers): + # GH: 36928 + data = "1,2" + + keys = {"a": int, "b": int}.keys() + parser = all_parsers + + result = parser.read_csv(StringIO(data), names=keys) + expected = DataFrame({"a": [1], "b": [2]}) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_encoding_surrogatepass(all_parsers): + # GH39017 + parser = all_parsers + content = b"\xed\xbd\xbf" + decoded = content.decode("utf-8", errors="surrogatepass") + expected = DataFrame({decoded: [decoded]}, index=[decoded * 2]) + expected.index.name = decoded * 2 + + with tm.ensure_clean() as path: + Path(path).write_bytes( + content * 2 + b"," + content + b"\n" + content * 2 + b"," + content + ) + df = parser.read_csv(path, encoding_errors="surrogatepass", index_col=0) + tm.assert_frame_equal(df, expected) + with pytest.raises(UnicodeDecodeError, match="'utf-8' codec can't decode byte"): + parser.read_csv(path) + + +@xfail_pyarrow +@pytest.mark.parametrize("on_bad_lines", ["error", "warn"]) +def test_deprecated_bad_lines_warns(all_parsers, csv1, on_bad_lines): + # GH 15122 + parser = all_parsers + kwds = {f"{on_bad_lines}_bad_lines": False} + parser.read_csv_check_warnings( + FutureWarning, + f"The {on_bad_lines}_bad_lines argument has been deprecated " + "and will be removed in a future version. " + "Use on_bad_lines in the future.\n\n", + csv1, + **kwds, + ) + + +def test_malformed_second_line(all_parsers): + # see GH14782 + parser = all_parsers + data = "\na\nb\n" + result = parser.read_csv(StringIO(data), skip_blank_lines=False, header=1) + expected = DataFrame({"a": ["b"]}) + tm.assert_frame_equal(result, expected) + + +def test_read_table_posargs_deprecation(all_parsers): + # https://github.com/pandas-dev/pandas/issues/41485 + data = StringIO("a\tb\n1\t2") + parser = all_parsers + msg = ( + "In a future version of pandas all arguments of read_table " + "except for the argument 'filepath_or_buffer' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + parser.read_table(data, " ") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_data_list.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_data_list.py new file mode 100644 index 0000000..8d484bb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_data_list.py @@ -0,0 +1,87 @@ +""" +Tests that work on both the Python and C engines but do not have a +specific classification into the other test modules. +""" +import csv +from io import StringIO + +import pytest + +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.parsers import TextParser + +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") + + +@xfail_pyarrow +def test_read_data_list(all_parsers): + parser = all_parsers + kwargs = {"index_col": 0} + data = "A,B,C\nfoo,1,2,3\nbar,4,5,6" + + data_list = [["A", "B", "C"], ["foo", "1", "2", "3"], ["bar", "4", "5", "6"]] + expected = parser.read_csv(StringIO(data), **kwargs) + + with TextParser(data_list, chunksize=2, **kwargs) as parser: + result = parser.read() + + tm.assert_frame_equal(result, expected) + + +def test_reader_list(all_parsers): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + parser = all_parsers + kwargs = {"index_col": 0} + + lines = list(csv.reader(StringIO(data))) + with TextParser(lines, chunksize=2, **kwargs) as reader: + chunks = list(reader) + + expected = parser.read_csv(StringIO(data), **kwargs) + + tm.assert_frame_equal(chunks[0], expected[:2]) + tm.assert_frame_equal(chunks[1], expected[2:4]) + tm.assert_frame_equal(chunks[2], expected[4:]) + + +def test_reader_list_skiprows(all_parsers): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + parser = all_parsers + kwargs = {"index_col": 0} + + lines = list(csv.reader(StringIO(data))) + with TextParser(lines, chunksize=2, skiprows=[1], **kwargs) as reader: + chunks = list(reader) + + expected = parser.read_csv(StringIO(data), **kwargs) + + tm.assert_frame_equal(chunks[0], expected[1:3]) + + +def test_read_csv_parse_simple_list(all_parsers): + parser = all_parsers + data = """foo +bar baz +qux foo +foo +bar""" + + result = parser.read_csv(StringIO(data), header=None) + expected = DataFrame(["foo", "bar baz", "qux foo", "foo", "bar"]) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_decimal.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_decimal.py new file mode 100644 index 0000000..ab58ddf --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_decimal.py @@ -0,0 +1,62 @@ +""" +Tests that work on both the Python and C engines but do not have a +specific classification into the other test modules. +""" +from io import StringIO + +import pytest + +from pandas import DataFrame +import pandas._testing as tm + +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +@pytest.mark.parametrize( + "data,thousands,decimal", + [ + ( + """A|B|C +1|2,334.01|5 +10|13|10. +""", + ",", + ".", + ), + ( + """A|B|C +1|2.334,01|5 +10|13|10, +""", + ".", + ",", + ), + ], +) +def test_1000_sep_with_decimal(all_parsers, data, thousands, decimal): + parser = all_parsers + expected = DataFrame({"A": [1, 10], "B": [2334.01, 13], "C": [5, 10.0]}) + + result = parser.read_csv( + StringIO(data), sep="|", thousands=thousands, decimal=decimal + ) + tm.assert_frame_equal(result, expected) + + +def test_euro_decimal_format(all_parsers): + parser = all_parsers + data = """Id;Number1;Number2;Text1;Text2;Number3 +1;1521,1541;187101,9543;ABC;poi;4,738797819 +2;121,12;14897,76;DEF;uyt;0,377320872 +3;878,158;108013,434;GHI;rez;2,735694704""" + + result = parser.read_csv(StringIO(data), sep=";", decimal=",") + expected = DataFrame( + [ + [1, 1521.1541, 187101.9543, "ABC", "poi", 4.738797819], + [2, 121.12, 14897.76, "DEF", "uyt", 0.377320872], + [3, 878.158, 108013.434, "GHI", "rez", 2.735694704], + ], + columns=["Id", "Number1", "Number2", "Text1", "Text2", "Number3"], + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_file_buffer_url.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_file_buffer_url.py new file mode 100644 index 0000000..b5c3e98 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -0,0 +1,423 @@ +""" +Tests that work on both the Python and C engines but do not have a +specific classification into the other test modules. +""" +from io import ( + BytesIO, + StringIO, +) +import os +import platform +from urllib.error import URLError + +import pytest + +from pandas.errors import ( + EmptyDataError, + ParserError, +) +import pandas.util._test_decorators as td + +from pandas import DataFrame +import pandas._testing as tm + +# TODO(1.4) Please xfail individual tests at release time +# instead of skip +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +@pytest.mark.network +@tm.network +def test_url(all_parsers, csv_dir_path): + parser = all_parsers + kwargs = {"sep": "\t"} + + url = ( + "https://raw.github.com/pandas-dev/pandas/main/" + "pandas/tests/io/parser/data/salaries.csv" + ) + url_result = parser.read_csv(url, **kwargs) + + local_path = os.path.join(csv_dir_path, "salaries.csv") + local_result = parser.read_csv(local_path, **kwargs) + tm.assert_frame_equal(url_result, local_result) + + +@pytest.mark.slow +def test_local_file(all_parsers, csv_dir_path): + parser = all_parsers + kwargs = {"sep": "\t"} + + local_path = os.path.join(csv_dir_path, "salaries.csv") + local_result = parser.read_csv(local_path, **kwargs) + url = "file://localhost/" + local_path + + try: + url_result = parser.read_csv(url, **kwargs) + tm.assert_frame_equal(url_result, local_result) + except URLError: + # Fails on some systems. + pytest.skip("Failing on: " + " ".join(platform.uname())) + + +def test_path_path_lib(all_parsers): + parser = all_parsers + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_csv, lambda p: parser.read_csv(p, index_col=0)) + tm.assert_frame_equal(df, result) + + +def test_path_local_path(all_parsers): + parser = all_parsers + df = tm.makeDataFrame() + result = tm.round_trip_localpath( + df.to_csv, lambda p: parser.read_csv(p, index_col=0) + ) + tm.assert_frame_equal(df, result) + + +def test_nonexistent_path(all_parsers): + # gh-2428: pls no segfault + # gh-14086: raise more helpful FileNotFoundError + # GH#29233 "File foo" instead of "File b'foo'" + parser = all_parsers + path = f"{tm.rands(10)}.csv" + + msg = r"\[Errno 2\]" + with pytest.raises(FileNotFoundError, match=msg) as e: + parser.read_csv(path) + assert path == e.value.filename + + +@td.skip_if_windows # os.chmod does not work in windows +def test_no_permission(all_parsers): + # GH 23784 + parser = all_parsers + + msg = r"\[Errno 13\]" + with tm.ensure_clean() as path: + os.chmod(path, 0) # make file unreadable + + # verify that this process cannot open the file (not running as sudo) + try: + with open(path): + pass + pytest.skip("Running as sudo.") + except PermissionError: + pass + + with pytest.raises(PermissionError, match=msg) as e: + parser.read_csv(path) + assert path == e.value.filename + + +@pytest.mark.parametrize( + "data,kwargs,expected,msg", + [ + # gh-10728: WHITESPACE_LINE + ( + "a,b,c\n4,5,6\n ", + {}, + DataFrame([[4, 5, 6]], columns=["a", "b", "c"]), + None, + ), + # gh-10548: EAT_LINE_COMMENT + ( + "a,b,c\n4,5,6\n#comment", + {"comment": "#"}, + DataFrame([[4, 5, 6]], columns=["a", "b", "c"]), + None, + ), + # EAT_CRNL_NOP + ( + "a,b,c\n4,5,6\n\r", + {}, + DataFrame([[4, 5, 6]], columns=["a", "b", "c"]), + None, + ), + # EAT_COMMENT + ( + "a,b,c\n4,5,6#comment", + {"comment": "#"}, + DataFrame([[4, 5, 6]], columns=["a", "b", "c"]), + None, + ), + # SKIP_LINE + ( + "a,b,c\n4,5,6\nskipme", + {"skiprows": [2]}, + DataFrame([[4, 5, 6]], columns=["a", "b", "c"]), + None, + ), + # EAT_LINE_COMMENT + ( + "a,b,c\n4,5,6\n#comment", + {"comment": "#", "skip_blank_lines": False}, + DataFrame([[4, 5, 6]], columns=["a", "b", "c"]), + None, + ), + # IN_FIELD + ( + "a,b,c\n4,5,6\n ", + {"skip_blank_lines": False}, + DataFrame([["4", 5, 6], [" ", None, None]], columns=["a", "b", "c"]), + None, + ), + # EAT_CRNL + ( + "a,b,c\n4,5,6\n\r", + {"skip_blank_lines": False}, + DataFrame([[4, 5, 6], [None, None, None]], columns=["a", "b", "c"]), + None, + ), + # ESCAPED_CHAR + ( + "a,b,c\n4,5,6\n\\", + {"escapechar": "\\"}, + None, + "(EOF following escape character)|(unexpected end of data)", + ), + # ESCAPE_IN_QUOTED_FIELD + ( + 'a,b,c\n4,5,6\n"\\', + {"escapechar": "\\"}, + None, + "(EOF inside string starting at row 2)|(unexpected end of data)", + ), + # IN_QUOTED_FIELD + ( + 'a,b,c\n4,5,6\n"', + {"escapechar": "\\"}, + None, + "(EOF inside string starting at row 2)|(unexpected end of data)", + ), + ], + ids=[ + "whitespace-line", + "eat-line-comment", + "eat-crnl-nop", + "eat-comment", + "skip-line", + "eat-line-comment", + "in-field", + "eat-crnl", + "escaped-char", + "escape-in-quoted-field", + "in-quoted-field", + ], +) +def test_eof_states(all_parsers, data, kwargs, expected, msg): + # see gh-10728, gh-10548 + parser = all_parsers + + if expected is None: + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + else: + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_temporary_file(all_parsers): + # see gh-13398 + parser = all_parsers + data = "0 0" + + with tm.ensure_clean(mode="w+", return_filelike=True) as new_file: + new_file.write(data) + new_file.flush() + new_file.seek(0) + + result = parser.read_csv(new_file, sep=r"\s+", header=None) + + expected = DataFrame([[0, 0]]) + tm.assert_frame_equal(result, expected) + + +def test_internal_eof_byte(all_parsers): + # see gh-5500 + parser = all_parsers + data = "a,b\n1\x1a,2" + + expected = DataFrame([["1\x1a", 2]], columns=["a", "b"]) + result = parser.read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +def test_internal_eof_byte_to_file(all_parsers): + # see gh-16559 + parser = all_parsers + data = b'c1,c2\r\n"test \x1a test", test\r\n' + expected = DataFrame([["test \x1a test", " test"]], columns=["c1", "c2"]) + path = f"__{tm.rands(10)}__.csv" + + with tm.ensure_clean(path) as path: + with open(path, "wb") as f: + f.write(data) + + result = parser.read_csv(path) + tm.assert_frame_equal(result, expected) + + +def test_file_handle_string_io(all_parsers): + # gh-14418 + # + # Don't close user provided file handles. + parser = all_parsers + data = "a,b\n1,2" + + fh = StringIO(data) + parser.read_csv(fh) + assert not fh.closed + + +def test_file_handles_with_open(all_parsers, csv1): + # gh-14418 + # + # Don't close user provided file handles. + parser = all_parsers + + for mode in ["r", "rb"]: + with open(csv1, mode) as f: + parser.read_csv(f) + assert not f.closed + + +def test_invalid_file_buffer_class(all_parsers): + # see gh-15337 + class InvalidBuffer: + pass + + parser = all_parsers + msg = "Invalid file path or buffer object type" + + with pytest.raises(ValueError, match=msg): + parser.read_csv(InvalidBuffer()) + + +def test_invalid_file_buffer_mock(all_parsers): + # see gh-15337 + parser = all_parsers + msg = "Invalid file path or buffer object type" + + class Foo: + pass + + with pytest.raises(ValueError, match=msg): + parser.read_csv(Foo()) + + +def test_valid_file_buffer_seems_invalid(all_parsers): + # gh-16135: we want to ensure that "tell" and "seek" + # aren't actually being used when we call `read_csv` + # + # Thus, while the object may look "invalid" (these + # methods are attributes of the `StringIO` class), + # it is still a valid file-object for our purposes. + class NoSeekTellBuffer(StringIO): + def tell(self): + raise AttributeError("No tell method") + + def seek(self, pos, whence=0): + raise AttributeError("No seek method") + + data = "a\n1" + parser = all_parsers + expected = DataFrame({"a": [1]}) + + result = parser.read_csv(NoSeekTellBuffer(data)) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("io_class", [StringIO, BytesIO]) +@pytest.mark.parametrize("encoding", [None, "utf-8"]) +def test_read_csv_file_handle(all_parsers, io_class, encoding): + """ + Test whether read_csv does not close user-provided file handles. + + GH 36980 + """ + parser = all_parsers + expected = DataFrame({"a": [1], "b": [2]}) + + content = "a,b\n1,2" + handle = io_class(content.encode("utf-8") if io_class == BytesIO else content) + + tm.assert_frame_equal(parser.read_csv(handle, encoding=encoding), expected) + assert not handle.closed + + +def test_memory_map_compression(all_parsers, compression): + """ + Support memory map for compressed files. + + GH 37621 + """ + parser = all_parsers + expected = DataFrame({"a": [1], "b": [2]}) + + with tm.ensure_clean() as path: + expected.to_csv(path, index=False, compression=compression) + + tm.assert_frame_equal( + parser.read_csv(path, memory_map=True, compression=compression), + expected, + ) + + +def test_context_manager(all_parsers, datapath): + # make sure that opened files are closed + parser = all_parsers + + path = datapath("io", "data", "csv", "iris.csv") + + reader = parser.read_csv(path, chunksize=1) + assert not reader.handles.handle.closed + try: + with reader: + next(reader) + assert False + except AssertionError: + assert reader.handles.handle.closed + + +def test_context_manageri_user_provided(all_parsers, datapath): + # make sure that user-provided handles are not closed + parser = all_parsers + + with open(datapath("io", "data", "csv", "iris.csv")) as path: + + reader = parser.read_csv(path, chunksize=1) + assert not reader.handles.handle.closed + try: + with reader: + next(reader) + assert False + except AssertionError: + assert not reader.handles.handle.closed + + +def test_file_descriptor_leak(all_parsers): + # GH 31488 + + parser = all_parsers + with tm.ensure_clean() as path: + + def test(): + with pytest.raises(EmptyDataError, match="No columns to parse from file"): + parser.read_csv(path) + + td.check_file_leaks(test)() + + +@td.check_file_leaks +def test_memory_map(all_parsers, csv_dir_path): + mmap_file = os.path.join(csv_dir_path, "test_mmap.csv") + parser = all_parsers + + expected = DataFrame( + {"a": [1, 2, 3], "b": ["one", "two", "three"], "c": ["I", "II", "III"]} + ) + + result = parser.read_csv(mmap_file, memory_map=True) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_float.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_float.py new file mode 100644 index 0000000..2ca98de --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_float.py @@ -0,0 +1,65 @@ +""" +Tests that work on both the Python and C engines but do not have a +specific classification into the other test modules. +""" +from io import StringIO + +import numpy as np +import pytest + +from pandas.compat import is_platform_linux + +from pandas import DataFrame +import pandas._testing as tm + +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +def test_float_parser(all_parsers): + # see gh-9565 + parser = all_parsers + data = "45e-1,4.5,45.,inf,-inf" + result = parser.read_csv(StringIO(data), header=None) + + expected = DataFrame([[float(s) for s in data.split(",")]]) + tm.assert_frame_equal(result, expected) + + +def test_scientific_no_exponent(all_parsers_all_precisions): + # see gh-12215 + df = DataFrame.from_dict({"w": ["2e"], "x": ["3E"], "y": ["42e"], "z": ["632E"]}) + data = df.to_csv(index=False) + parser, precision = all_parsers_all_precisions + + df_roundtrip = parser.read_csv(StringIO(data), float_precision=precision) + tm.assert_frame_equal(df_roundtrip, df) + + +@pytest.mark.parametrize("neg_exp", [-617, -100000, -99999999999999999]) +def test_very_negative_exponent(all_parsers_all_precisions, neg_exp): + # GH#38753 + parser, precision = all_parsers_all_precisions + + data = f"data\n10E{neg_exp}" + result = parser.read_csv(StringIO(data), float_precision=precision) + expected = DataFrame({"data": [0.0]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999]) +def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request): + # GH#38753 + parser, precision = all_parsers_all_precisions + data = f"data\n10E{exp}" + result = parser.read_csv(StringIO(data), float_precision=precision) + if precision == "round_trip": + if exp == 999999999999999999 and is_platform_linux(): + mark = pytest.mark.xfail(reason="GH38794, on Linux gives object result") + request.node.add_marker(mark) + + value = np.inf if exp > 0 else 0.0 + expected = DataFrame({"data": [value]}) + else: + expected = DataFrame({"data": [f"10E{exp}"]}) + + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_index.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_index.py new file mode 100644 index 0000000..69afb9f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_index.py @@ -0,0 +1,299 @@ +""" +Tests that work on both the Python and C engines but do not have a +specific classification into the other test modules. +""" +from datetime import datetime +from io import StringIO +import os + +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, +) +import pandas._testing as tm + +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") + +# GH#43650: Some expected failures with the pyarrow engine can occasionally +# cause a deadlock instead, so we skip these instead of xfailing +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") + + +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + ( + """foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""", + {"index_col": 0, "names": ["index", "A", "B", "C", "D"]}, + DataFrame( + [ + [2, 3, 4, 5], + [7, 8, 9, 10], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + ], + index=Index(["foo", "bar", "baz", "qux", "foo2", "bar2"], name="index"), + columns=["A", "B", "C", "D"], + ), + ), + ( + """foo,one,2,3,4,5 +foo,two,7,8,9,10 +foo,three,12,13,14,15 +bar,one,12,13,14,15 +bar,two,12,13,14,15 +""", + {"index_col": [0, 1], "names": ["index1", "index2", "A", "B", "C", "D"]}, + DataFrame( + [ + [2, 3, 4, 5], + [7, 8, 9, 10], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + ], + index=MultiIndex.from_tuples( + [ + ("foo", "one"), + ("foo", "two"), + ("foo", "three"), + ("bar", "one"), + ("bar", "two"), + ], + names=["index1", "index2"], + ), + columns=["A", "B", "C", "D"], + ), + ), + ], +) +def test_pass_names_with_index(all_parsers, data, kwargs, expected): + parser = all_parsers + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("index_col", [[0, 1], [1, 0]]) +def test_multi_index_no_level_names(all_parsers, index_col): + data = """index1,index2,A,B,C,D +foo,one,2,3,4,5 +foo,two,7,8,9,10 +foo,three,12,13,14,15 +bar,one,12,13,14,15 +bar,two,12,13,14,15 +""" + headless_data = "\n".join(data.split("\n")[1:]) + + names = ["A", "B", "C", "D"] + parser = all_parsers + + result = parser.read_csv( + StringIO(headless_data), index_col=index_col, header=None, names=names + ) + expected = parser.read_csv(StringIO(data), index_col=index_col) + + # No index names in headless data. + expected.index.names = [None] * 2 + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_multi_index_no_level_names_implicit(all_parsers): + parser = all_parsers + data = """A,B,C,D +foo,one,2,3,4,5 +foo,two,7,8,9,10 +foo,three,12,13,14,15 +bar,one,12,13,14,15 +bar,two,12,13,14,15 +""" + + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + [ + [2, 3, 4, 5], + [7, 8, 9, 10], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + ], + columns=["A", "B", "C", "D"], + index=MultiIndex.from_tuples( + [ + ("foo", "one"), + ("foo", "two"), + ("foo", "three"), + ("bar", "one"), + ("bar", "two"), + ] + ), + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize( + "data,expected,header", + [ + ("a,b", DataFrame(columns=["a", "b"]), [0]), + ( + "a,b\nc,d", + DataFrame(columns=MultiIndex.from_tuples([("a", "c"), ("b", "d")])), + [0, 1], + ), + ], +) +@pytest.mark.parametrize("round_trip", [True, False]) +def test_multi_index_blank_df(all_parsers, data, expected, header, round_trip): + # see gh-14545 + parser = all_parsers + data = expected.to_csv(index=False) if round_trip else data + + result = parser.read_csv(StringIO(data), header=header) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_no_unnamed_index(all_parsers): + parser = all_parsers + data = """ id c0 c1 c2 +0 1 0 a b +1 2 0 c d +2 2 2 e f +""" + result = parser.read_csv(StringIO(data), sep=" ") + expected = DataFrame( + [[0, 1, 0, "a", "b"], [1, 2, 0, "c", "d"], [2, 2, 2, "e", "f"]], + columns=["Unnamed: 0", "id", "c0", "c1", "c2"], + ) + tm.assert_frame_equal(result, expected) + + +def test_read_duplicate_index_explicit(all_parsers): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo,12,13,14,15 +bar,12,13,14,15 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=0) + + expected = DataFrame( + [ + [2, 3, 4, 5], + [7, 8, 9, 10], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + ], + columns=["A", "B", "C", "D"], + index=Index(["foo", "bar", "baz", "qux", "foo", "bar"], name="index"), + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_read_duplicate_index_implicit(all_parsers): + data = """A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo,12,13,14,15 +bar,12,13,14,15 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data)) + + expected = DataFrame( + [ + [2, 3, 4, 5], + [7, 8, 9, 10], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + ], + columns=["A", "B", "C", "D"], + index=Index(["foo", "bar", "baz", "qux", "foo", "bar"]), + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_read_csv_no_index_name(all_parsers, csv_dir_path): + parser = all_parsers + csv2 = os.path.join(csv_dir_path, "test2.csv") + result = parser.read_csv(csv2, index_col=0, parse_dates=True) + + expected = DataFrame( + [ + [0.980269, 3.685731, -0.364216805298, -1.159738, "foo"], + [1.047916, -0.041232, -0.16181208307, 0.212549, "bar"], + [0.498581, 0.731168, -0.537677223318, 1.346270, "baz"], + [1.120202, 1.567621, 0.00364077397681, 0.675253, "qux"], + [-0.487094, 0.571455, -1.6116394093, 0.103469, "foo2"], + ], + columns=["A", "B", "C", "D", "E"], + index=Index( + [ + datetime(2000, 1, 3), + datetime(2000, 1, 4), + datetime(2000, 1, 5), + datetime(2000, 1, 6), + datetime(2000, 1, 7), + ] + ), + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_empty_with_index(all_parsers): + # see gh-10184 + data = "x,y" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=0) + + expected = DataFrame(columns=["y"], index=Index([], name="x")) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_empty_with_multi_index(all_parsers): + # see gh-10467 + data = "x,y,z" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=["x", "y"]) + + expected = DataFrame( + columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["x", "y"]) + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_empty_with_reversed_multi_index(all_parsers): + data = "x,y,z" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=[1, 0]) + + expected = DataFrame( + columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["y", "x"]) + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_inf.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_inf.py new file mode 100644 index 0000000..d43fb2f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_inf.py @@ -0,0 +1,68 @@ +""" +Tests that work on both the Python and C engines but do not have a +specific classification into the other test modules. +""" +from io import StringIO + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + option_context, +) +import pandas._testing as tm + +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") + + +@xfail_pyarrow +@pytest.mark.parametrize("na_filter", [True, False]) +def test_inf_parsing(all_parsers, na_filter): + parser = all_parsers + data = """\ +,A +a,inf +b,-inf +c,+Inf +d,-Inf +e,INF +f,-INF +g,+INf +h,-INf +i,inF +j,-inF""" + expected = DataFrame( + {"A": [float("inf"), float("-inf")] * 5}, + index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"], + ) + result = parser.read_csv(StringIO(data), index_col=0, na_filter=na_filter) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize("na_filter", [True, False]) +def test_infinity_parsing(all_parsers, na_filter): + parser = all_parsers + data = """\ +,A +a,Infinity +b,-Infinity +c,+Infinity +""" + expected = DataFrame( + {"A": [float("infinity"), float("-infinity"), float("+infinity")]}, + index=["a", "b", "c"], + ) + result = parser.read_csv(StringIO(data), index_col=0, na_filter=na_filter) + tm.assert_frame_equal(result, expected) + + +def test_read_csv_with_use_inf_as_na(all_parsers): + # https://github.com/pandas-dev/pandas/issues/35493 + parser = all_parsers + data = "1.0\nNaN\n3.0" + with option_context("use_inf_as_na", True): + result = parser.read_csv(StringIO(data), header=None) + expected = DataFrame([1.0, np.nan, 3.0]) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_ints.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_ints.py new file mode 100644 index 0000000..aef2020 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_ints.py @@ -0,0 +1,215 @@ +""" +Tests that work on both the Python and C engines but do not have a +specific classification into the other test modules. +""" +from io import StringIO + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + +# GH#43650: Some expected failures with the pyarrow engine can occasionally +# cause a deadlock instead, so we skip these instead of xfailing +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") + + +def test_int_conversion(all_parsers): + data = """A,B +1.0,1 +2.0,2 +3.0,3 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data)) + + expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]], columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + ( + "A,B\nTrue,1\nFalse,2\nTrue,3", + {}, + DataFrame([[True, 1], [False, 2], [True, 3]], columns=["A", "B"]), + ), + ( + "A,B\nYES,1\nno,2\nyes,3\nNo,3\nYes,3", + {"true_values": ["yes", "Yes", "YES"], "false_values": ["no", "NO", "No"]}, + DataFrame( + [[True, 1], [False, 2], [True, 3], [False, 3], [True, 3]], + columns=["A", "B"], + ), + ), + ( + "A,B\nTRUE,1\nFALSE,2\nTRUE,3", + {}, + DataFrame([[True, 1], [False, 2], [True, 3]], columns=["A", "B"]), + ), + ( + "A,B\nfoo,bar\nbar,foo", + {"true_values": ["foo"], "false_values": ["bar"]}, + DataFrame([[True, False], [False, True]], columns=["A", "B"]), + ), + ], +) +def test_parse_bool(all_parsers, data, kwargs, expected): + parser = all_parsers + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_parse_integers_above_fp_precision(all_parsers): + data = """Numbers +17007000002000191 +17007000002000191 +17007000002000191 +17007000002000191 +17007000002000192 +17007000002000192 +17007000002000192 +17007000002000192 +17007000002000192 +17007000002000194""" + parser = all_parsers + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + { + "Numbers": [ + 17007000002000191, + 17007000002000191, + 17007000002000191, + 17007000002000191, + 17007000002000192, + 17007000002000192, + 17007000002000192, + 17007000002000192, + 17007000002000192, + 17007000002000194, + ] + } + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow # Flaky +@pytest.mark.parametrize("sep", [" ", r"\s+"]) +def test_integer_overflow_bug(all_parsers, sep): + # see gh-2601 + data = "65248E10 11\n55555E55 22\n" + parser = all_parsers + + result = parser.read_csv(StringIO(data), header=None, sep=sep) + expected = DataFrame([[6.5248e14, 11], [5.5555e59, 22]]) + tm.assert_frame_equal(result, expected) + + +def test_int64_min_issues(all_parsers): + # see gh-2599 + parser = all_parsers + data = "A,B\n0,0\n0," + result = parser.read_csv(StringIO(data)) + + expected = DataFrame({"A": [0, 0], "B": [0, np.nan]}) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize("conv", [None, np.int64, np.uint64]) +def test_int64_overflow(all_parsers, conv): + data = """ID +00013007854817840016671868 +00013007854817840016749251 +00013007854817840016754630 +00013007854817840016781876 +00013007854817840017028824 +00013007854817840017963235 +00013007854817840018860166""" + parser = all_parsers + + if conv is None: + # 13007854817840016671868 > UINT64_MAX, so this + # will overflow and return object as the dtype. + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + [ + "00013007854817840016671868", + "00013007854817840016749251", + "00013007854817840016754630", + "00013007854817840016781876", + "00013007854817840017028824", + "00013007854817840017963235", + "00013007854817840018860166", + ], + columns=["ID"], + ) + tm.assert_frame_equal(result, expected) + else: + # 13007854817840016671868 > UINT64_MAX, so attempts + # to cast to either int64 or uint64 will result in + # an OverflowError being raised. + msg = ( + "(Python int too large to convert to C long)|" + "(long too big to convert)|" + "(int too big to convert)" + ) + + with pytest.raises(OverflowError, match=msg): + parser.read_csv(StringIO(data), converters={"ID": conv}) + + +@skip_pyarrow +@pytest.mark.parametrize( + "val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min] +) +def test_int64_uint64_range(all_parsers, val): + # These numbers fall right inside the int64-uint64 + # range, so they should be parsed as string. + parser = all_parsers + result = parser.read_csv(StringIO(str(val)), header=None) + + expected = DataFrame([val]) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1] +) +def test_outside_int64_uint64_range(all_parsers, val): + # These numbers fall just outside the int64-uint64 + # range, so they should be parsed as string. + parser = all_parsers + result = parser.read_csv(StringIO(str(val)), header=None) + + expected = DataFrame([str(val)]) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize("exp_data", [[str(-1), str(2 ** 63)], [str(2 ** 63), str(-1)]]) +def test_numeric_range_too_wide(all_parsers, exp_data): + # No numerical dtype can hold both negative and uint64 + # values, so they should be cast as string. + parser = all_parsers + data = "\n".join(exp_data) + expected = DataFrame(exp_data) + + result = parser.read_csv(StringIO(data), header=None) + tm.assert_frame_equal(result, expected) + + +def test_integer_precision(all_parsers): + # Gh 7072 + s = """1,1;0;0;0;1;1;3844;3844;3844;1;1;1;1;1;1;0;0;1;1;0;0,,,4321583677327450765 +5,1;0;0;0;1;1;843;843;843;1;1;1;1;1;1;0;0;1;1;0;0,64.0,;,4321113141090630389""" + parser = all_parsers + result = parser.read_csv(StringIO(s), header=None)[4] + expected = Series([4321583677327450765, 4321113141090630389], name=4) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_iterator.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_iterator.py new file mode 100644 index 0000000..5966a2f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_iterator.py @@ -0,0 +1,109 @@ +""" +Tests that work on both the Python and C engines but do not have a +specific classification into the other test modules. +""" +from io import StringIO + +import pytest + +from pandas import ( + DataFrame, + concat, +) +import pandas._testing as tm + +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +def test_iterator(all_parsers): + # see gh-6607 + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + parser = all_parsers + kwargs = {"index_col": 0} + + expected = parser.read_csv(StringIO(data), **kwargs) + with parser.read_csv(StringIO(data), iterator=True, **kwargs) as reader: + + first_chunk = reader.read(3) + tm.assert_frame_equal(first_chunk, expected[:3]) + + last_chunk = reader.read(5) + tm.assert_frame_equal(last_chunk, expected[3:]) + + +def test_iterator2(all_parsers): + parser = all_parsers + data = """A,B,C +foo,1,2,3 +bar,4,5,6 +baz,7,8,9 +""" + + with parser.read_csv(StringIO(data), iterator=True) as reader: + result = list(reader) + + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["foo", "bar", "baz"], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(result[0], expected) + + +def test_iterator_stop_on_chunksize(all_parsers): + # gh-3967: stopping iteration when chunksize is specified + parser = all_parsers + data = """A,B,C +foo,1,2,3 +bar,4,5,6 +baz,7,8,9 +""" + + with parser.read_csv(StringIO(data), chunksize=1) as reader: + result = list(reader) + + assert len(result) == 3 + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["foo", "bar", "baz"], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(concat(result), expected) + + +@pytest.mark.parametrize( + "kwargs", [{"iterator": True, "chunksize": 1}, {"iterator": True}, {"chunksize": 1}] +) +def test_iterator_skipfooter_errors(all_parsers, kwargs): + msg = "'skipfooter' not supported for iteration" + parser = all_parsers + data = "a\n1\n2" + + with pytest.raises(ValueError, match=msg): + with parser.read_csv(StringIO(data), skipfooter=1, **kwargs) as _: + pass + + +def test_iteration_open_handle(all_parsers): + parser = all_parsers + kwargs = {"header": None} + + with tm.ensure_clean() as path: + with open(path, "w") as f: + f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG") + + with open(path) as f: + for line in f: + if "CCC" in line: + break + + result = parser.read_csv(f, **kwargs) + expected = DataFrame({0: ["DDD", "EEE", "FFF", "GGG"]}) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_read_errors.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_read_errors.py new file mode 100644 index 0000000..fe00afb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_read_errors.py @@ -0,0 +1,280 @@ +""" +Tests that work on both the Python and C engines but do not have a +specific classification into the other test modules. +""" +import codecs +import csv +from io import StringIO +import os +from pathlib import Path +import warnings + +import numpy as np +import pytest + +from pandas.errors import ( + EmptyDataError, + ParserError, +) +import pandas.util._test_decorators as td + +from pandas import DataFrame +import pandas._testing as tm + +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +def test_empty_decimal_marker(all_parsers): + data = """A|B|C +1|2,334|5 +10|13|10. +""" + # Parsers support only length-1 decimals + msg = "Only length-1 decimal markers supported" + parser = all_parsers + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), decimal="") + + +def test_bad_stream_exception(all_parsers, csv_dir_path): + # see gh-13652 + # + # This test validates that both the Python engine and C engine will + # raise UnicodeDecodeError instead of C engine raising ParserError + # and swallowing the exception that caused read to fail. + path = os.path.join(csv_dir_path, "sauron.SHIFT_JIS.csv") + codec = codecs.lookup("utf-8") + utf8 = codecs.lookup("utf-8") + parser = all_parsers + msg = "'utf-8' codec can't decode byte" + + # Stream must be binary UTF8. + with open(path, "rb") as handle, codecs.StreamRecoder( + handle, utf8.encode, utf8.decode, codec.streamreader, codec.streamwriter + ) as stream: + + with pytest.raises(UnicodeDecodeError, match=msg): + parser.read_csv(stream) + + +def test_malformed(all_parsers): + # see gh-6607 + parser = all_parsers + data = """ignore +A,B,C +1,2,3 # comment +1,2,3,4,5 +2,3,4 +""" + msg = "Expected 3 fields in line 4, saw 5" + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), header=1, comment="#") + + +@pytest.mark.parametrize("nrows", [5, 3, None]) +def test_malformed_chunks(all_parsers, nrows): + data = """ignore +A,B,C +skip +1,2,3 +3,5,10 # comment +1,2,3,4,5 +2,3,4 +""" + parser = all_parsers + msg = "Expected 3 fields in line 6, saw 5" + with parser.read_csv( + StringIO(data), header=1, comment="#", iterator=True, chunksize=1, skiprows=[2] + ) as reader: + with pytest.raises(ParserError, match=msg): + reader.read(nrows) + + +def test_catch_too_many_names(all_parsers): + # see gh-5156 + data = """\ +1,2,3 +4,,6 +7,8,9 +10,11,12\n""" + parser = all_parsers + msg = ( + "Too many columns specified: expected 4 and found 3" + if parser.engine == "c" + else "Number of passed names did not match " + "number of header fields in the file" + ) + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"]) + + +@pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5]) +def test_raise_on_no_columns(all_parsers, nrows): + parser = all_parsers + data = "\n" * nrows + + msg = "No columns to parse from file" + with pytest.raises(EmptyDataError, match=msg): + parser.read_csv(StringIO(data)) + + +def test_read_csv_raises_on_header_prefix(all_parsers): + # gh-27394 + parser = all_parsers + msg = "Argument prefix must be None if argument header is not None" + + s = StringIO("0,1\n2,3") + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + parser.read_csv(s, header=0, prefix="_X") + + +def test_unexpected_keyword_parameter_exception(all_parsers): + # GH-34976 + parser = all_parsers + + msg = "{}\\(\\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg.format("read_csv")): + parser.read_csv("foo.csv", foo=1) + with pytest.raises(TypeError, match=msg.format("read_table")): + parser.read_table("foo.tsv", foo=1) + + +@pytest.mark.parametrize( + "kwargs", + [ + pytest.param( + {"error_bad_lines": False, "warn_bad_lines": False}, + marks=pytest.mark.filterwarnings("ignore"), + ), + {"on_bad_lines": "skip"}, + ], +) +def test_suppress_error_output(all_parsers, capsys, kwargs): + # see gh-15925 + parser = all_parsers + data = "a\n1\n1,2,3\n4\n5,6,7" + expected = DataFrame({"a": [1, 4]}) + + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + captured = capsys.readouterr() + assert captured.err == "" + + +@pytest.mark.filterwarnings("ignore") +@pytest.mark.parametrize( + "kwargs", + [{}, {"error_bad_lines": True}], # Default is True. # Explicitly pass in. +) +@pytest.mark.parametrize( + "warn_kwargs", + [{}, {"warn_bad_lines": True}, {"warn_bad_lines": False}], +) +def test_error_bad_lines(all_parsers, kwargs, warn_kwargs): + # see gh-15925 + parser = all_parsers + kwargs.update(**warn_kwargs) + data = "a\n1\n1,2,3\n4\n5,6,7" + + msg = "Expected 1 fields in line 3, saw 3" + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + + +@pytest.mark.parametrize( + "kwargs", + [ + pytest.param( + {"error_bad_lines": False, "warn_bad_lines": True}, + marks=pytest.mark.filterwarnings("ignore"), + ), + {"on_bad_lines": "warn"}, + ], +) +def test_warn_bad_lines(all_parsers, capsys, kwargs): + # see gh-15925 + parser = all_parsers + data = "a\n1\n1,2,3\n4\n5,6,7" + expected = DataFrame({"a": [1, 4]}) + + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + captured = capsys.readouterr() + assert "Skipping line 3" in captured.err + assert "Skipping line 5" in captured.err + + +def test_read_csv_wrong_num_columns(all_parsers): + # Too few columns. + data = """A,B,C,D,E,F +1,2,3,4,5,6 +6,7,8,9,10,11,12 +11,12,13,14,15,16 +""" + parser = all_parsers + msg = "Expected 6 fields in line 3, saw 7" + + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data)) + + +def test_null_byte_char(all_parsers): + # see gh-2741 + data = "\x00,foo" + names = ["a", "b"] + parser = all_parsers + + if parser.engine == "c": + expected = DataFrame([[np.nan, "foo"]], columns=names) + out = parser.read_csv(StringIO(data), names=names) + tm.assert_frame_equal(out, expected) + else: + msg = "NULL byte detected" + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), names=names) + + +@td.check_file_leaks +def test_open_file(all_parsers): + # GH 39024 + parser = all_parsers + if parser.engine == "c": + pytest.skip("'c' engine does not support sep=None with delim_whitespace=False") + + with tm.ensure_clean() as path: + file = Path(path) + file.write_bytes(b"\xe4\na\n1") + + # should not trigger a ResourceWarning + warnings.simplefilter("always", category=ResourceWarning) + with warnings.catch_warnings(record=True) as record: + with pytest.raises(csv.Error, match="Could not determine delimiter"): + parser.read_csv(file, sep=None, encoding_errors="replace") + assert len(record) == 0, record[0].message + + +def test_invalid_on_bad_line(all_parsers): + parser = all_parsers + data = "a\n1\n1,2,3\n4\n5,6,7" + with pytest.raises(ValueError, match="Argument abc is invalid for on_bad_lines"): + parser.read_csv(StringIO(data), on_bad_lines="abc") + + +@pytest.mark.parametrize("error_bad_lines", [True, False]) +@pytest.mark.parametrize("warn_bad_lines", [True, False]) +def test_conflict_on_bad_line(all_parsers, error_bad_lines, warn_bad_lines): + parser = all_parsers + data = "a\n1\n1,2,3\n4\n5,6,7" + kwds = {"error_bad_lines": error_bad_lines, "warn_bad_lines": warn_bad_lines} + with pytest.raises( + ValueError, + match="Both on_bad_lines and error_bad_lines/warn_bad_lines are set. " + "Please only set on_bad_lines.", + ): + parser.read_csv(StringIO(data), on_bad_lines="error", **kwds) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_verbose.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_verbose.py new file mode 100644 index 0000000..335065d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/common/test_verbose.py @@ -0,0 +1,55 @@ +""" +Tests that work on both the Python and C engines but do not have a +specific classification into the other test modules. +""" +from io import StringIO + +import pytest + +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +def test_verbose_read(all_parsers, capsys): + parser = all_parsers + data = """a,b,c,d +one,1,2,3 +one,1,2,3 +,1,2,3 +one,1,2,3 +,1,2,3 +,1,2,3 +one,1,2,3 +two,1,2,3""" + + # Engines are verbose in different ways. + parser.read_csv(StringIO(data), verbose=True) + captured = capsys.readouterr() + + if parser.engine == "c": + assert "Tokenization took:" in captured.out + assert "Parser memory cleanup took:" in captured.out + else: # Python engine + assert captured.out == "Filled 3 NA values in column a\n" + + +def test_verbose_read2(all_parsers, capsys): + parser = all_parsers + data = """a,b,c,d +one,1,2,3 +two,1,2,3 +three,1,2,3 +four,1,2,3 +five,1,2,3 +,1,2,3 +seven,1,2,3 +eight,1,2,3""" + + parser.read_csv(StringIO(data), verbose=True, index_col=0) + captured = capsys.readouterr() + + # Engines are verbose in different ways. + if parser.engine == "c": + assert "Tokenization took:" in captured.out + assert "Parser memory cleanup took:" in captured.out + else: # Python engine + assert captured.out == "Filled 1 NA values in column a\n" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/conftest.py new file mode 100644 index 0000000..b2d2be3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/conftest.py @@ -0,0 +1,282 @@ +from __future__ import annotations + +import os + +import pytest + +from pandas.compat._optional import VERSIONS + +from pandas import ( + read_csv, + read_table, +) +import pandas._testing as tm + + +class BaseParser: + engine: str | None = None + low_memory = True + float_precision_choices: list[str | None] = [] + + def update_kwargs(self, kwargs): + kwargs = kwargs.copy() + kwargs.update({"engine": self.engine, "low_memory": self.low_memory}) + + return kwargs + + def read_csv(self, *args, **kwargs): + kwargs = self.update_kwargs(kwargs) + return read_csv(*args, **kwargs) + + def read_csv_check_warnings( + self, warn_type: type[Warning], warn_msg: str, *args, **kwargs + ): + # We need to check the stacklevel here instead of in the tests + # since this is where read_csv is called and where the warning + # should point to. + kwargs = self.update_kwargs(kwargs) + with tm.assert_produces_warning(warn_type, match=warn_msg): + return read_csv(*args, **kwargs) + + def read_table(self, *args, **kwargs): + kwargs = self.update_kwargs(kwargs) + return read_table(*args, **kwargs) + + +class CParser(BaseParser): + engine = "c" + float_precision_choices = [None, "high", "round_trip"] + + +class CParserHighMemory(CParser): + low_memory = False + + +class CParserLowMemory(CParser): + low_memory = True + + +class PythonParser(BaseParser): + engine = "python" + float_precision_choices = [None] + + +class PyArrowParser(BaseParser): + engine = "pyarrow" + float_precision_choices = [None] + + +@pytest.fixture +def csv_dir_path(datapath): + """ + The directory path to the data files needed for parser tests. + """ + return datapath("io", "parser", "data") + + +@pytest.fixture +def csv1(datapath): + """ + The path to the data file "test1.csv" needed for parser tests. + """ + return os.path.join(datapath("io", "data", "csv"), "test1.csv") + + +_cParserHighMemory = CParserHighMemory +_cParserLowMemory = CParserLowMemory +_pythonParser = PythonParser +_pyarrowParser = PyArrowParser + +_py_parsers_only = [_pythonParser] +_c_parsers_only = [_cParserHighMemory, _cParserLowMemory] +_pyarrow_parsers_only = [_pyarrowParser] + +_all_parsers = [*_c_parsers_only, *_py_parsers_only, *_pyarrow_parsers_only] + +_py_parser_ids = ["python"] +_c_parser_ids = ["c_high", "c_low"] +_pyarrow_parsers_ids = ["pyarrow"] + +_all_parser_ids = [*_c_parser_ids, *_py_parser_ids, *_pyarrow_parsers_ids] + + +@pytest.fixture(params=_all_parsers, ids=_all_parser_ids) +def all_parsers(request): + """ + Fixture all of the CSV parsers. + """ + parser = request.param() + if parser.engine == "pyarrow": + pytest.importorskip("pyarrow", VERSIONS["pyarrow"]) + # Try setting num cpus to 1 to avoid hangs on Azure MacOS/Windows builds + # or better yet find a way to disable threads + # TODO(GH#44584) pytest.mark.single these tests + import pyarrow + + pyarrow.set_cpu_count(1) + return parser + + +@pytest.fixture(params=_c_parsers_only, ids=_c_parser_ids) +def c_parser_only(request): + """ + Fixture all of the CSV parsers using the C engine. + """ + return request.param() + + +@pytest.fixture(params=_py_parsers_only, ids=_py_parser_ids) +def python_parser_only(request): + """ + Fixture all of the CSV parsers using the Python engine. + """ + return request.param() + + +@pytest.fixture(params=_pyarrow_parsers_only, ids=_pyarrow_parsers_ids) +def pyarrow_parser_only(request): + """ + Fixture all of the CSV parsers using the Pyarrow engine. + """ + return request.param() + + +def _get_all_parser_float_precision_combinations(): + """ + Return all allowable parser and float precision + combinations and corresponding ids. + """ + params = [] + ids = [] + for parser, parser_id in zip(_all_parsers, _all_parser_ids): + for precision in parser.float_precision_choices: + params.append((parser(), precision)) + ids.append(f"{parser_id}-{precision}") + + return {"params": params, "ids": ids} + + +@pytest.fixture( + params=_get_all_parser_float_precision_combinations()["params"], + ids=_get_all_parser_float_precision_combinations()["ids"], +) +def all_parsers_all_precisions(request): + """ + Fixture for all allowable combinations of parser + and float precision + """ + return request.param + + +_utf_values = [8, 16, 32] + +_encoding_seps = ["", "-", "_"] +_encoding_prefixes = ["utf", "UTF"] + +_encoding_fmts = [ + f"{prefix}{sep}" + "{0}" for sep in _encoding_seps for prefix in _encoding_prefixes +] + + +@pytest.fixture(params=_utf_values) +def utf_value(request): + """ + Fixture for all possible integer values for a UTF encoding. + """ + return request.param + + +@pytest.fixture(params=_encoding_fmts) +def encoding_fmt(request): + """ + Fixture for all possible string formats of a UTF encoding. + """ + return request.param + + +@pytest.fixture( + params=[ + ("-1,0", -1.0), + ("-1,2e0", -1.2), + ("-1e0", -1.0), + ("+1e0", 1.0), + ("+1e+0", 1.0), + ("+1e-1", 0.1), + ("+,1e1", 1.0), + ("+1,e0", 1.0), + ("-,1e1", -1.0), + ("-1,e0", -1.0), + ("0,1", 0.1), + ("1,", 1.0), + (",1", 0.1), + ("-,1", -0.1), + ("1_,", 1.0), + ("1_234,56", 1234.56), + ("1_234,56e0", 1234.56), + # negative cases; must not parse as float + ("_", "_"), + ("-_", "-_"), + ("-_1", "-_1"), + ("-_1e0", "-_1e0"), + ("_1", "_1"), + ("_1,", "_1,"), + ("_1,_", "_1,_"), + ("_1e0", "_1e0"), + ("1,2e_1", "1,2e_1"), + ("1,2e1_0", "1,2e1_0"), + ("1,_2", "1,_2"), + (",1__2", ",1__2"), + (",1e", ",1e"), + ("-,1e", "-,1e"), + ("1_000,000_000", "1_000,000_000"), + ("1,e1_2", "1,e1_2"), + ("e11,2", "e11,2"), + ("1e11,2", "1e11,2"), + ("1,2,2", "1,2,2"), + ("1,2_1", "1,2_1"), + ("1,2e-10e1", "1,2e-10e1"), + ("--1,2", "--1,2"), + ("1a_2,1", "1a_2,1"), + ("1,2E-1", 0.12), + ("1,2E1", 12.0), + ] +) +def numeric_decimal(request): + """ + Fixture for all numeric formats which should get recognized. The first entry + represents the value to read while the second represents the expected result. + """ + return request.param + + +@pytest.fixture +def pyarrow_xfail(request): + """ + Fixture that xfails a test if the engine is pyarrow. + """ + if "all_parsers" in request.fixturenames: + parser = request.getfixturevalue("all_parsers") + elif "all_parsers_all_precisions" in request.fixturenames: + # Return value is tuple of (engine, precision) + parser = request.getfixturevalue("all_parsers_all_precisions")[0] + else: + return + if parser.engine == "pyarrow": + mark = pytest.mark.xfail(reason="pyarrow doesn't support this.") + request.node.add_marker(mark) + + +@pytest.fixture +def pyarrow_skip(request): + """ + Fixture that skips a test if the engine is pyarrow. + """ + if "all_parsers" in request.fixturenames: + parser = request.getfixturevalue("all_parsers") + elif "all_parsers_all_precisions" in request.fixturenames: + # Return value is tuple of (engine, precision) + parser = request.getfixturevalue("all_parsers_all_precisions")[0] + else: + return + if parser.engine == "pyarrow": + pytest.skip("pyarrow doesn't support this.") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..aa33321 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/__pycache__/test_categorical.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/__pycache__/test_categorical.cpython-38.pyc new file mode 100644 index 0000000..c5df7f7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/__pycache__/test_categorical.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/__pycache__/test_dtypes_basic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/__pycache__/test_dtypes_basic.cpython-38.pyc new file mode 100644 index 0000000..4959fbf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/__pycache__/test_dtypes_basic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/__pycache__/test_empty.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/__pycache__/test_empty.cpython-38.pyc new file mode 100644 index 0000000..7f1f573 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/__pycache__/test_empty.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/test_categorical.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/test_categorical.py new file mode 100644 index 0000000..3b8c520 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/test_categorical.py @@ -0,0 +1,310 @@ +""" +Tests dtype specification during parsing +for all of the parsers defined in parsers.py +""" +from io import StringIO +import os + +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Timestamp, +) +import pandas._testing as tm + +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") + + +@xfail_pyarrow +@pytest.mark.parametrize( + "dtype", + [ + "category", + CategoricalDtype(), + {"a": "category", "b": "category", "c": CategoricalDtype()}, + ], +) +def test_categorical_dtype(all_parsers, dtype): + # see gh-10153 + parser = all_parsers + data = """a,b,c +1,a,3.4 +1,a,3.4 +2,b,4.5""" + expected = DataFrame( + { + "a": Categorical(["1", "1", "2"]), + "b": Categorical(["a", "a", "b"]), + "c": Categorical(["3.4", "3.4", "4.5"]), + } + ) + actual = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(actual, expected) + + +@skip_pyarrow # Flaky +@pytest.mark.parametrize("dtype", [{"b": "category"}, {1: "category"}]) +def test_categorical_dtype_single(all_parsers, dtype): + # see gh-10153 + parser = all_parsers + data = """a,b,c +1,a,3.4 +1,a,3.4 +2,b,4.5""" + expected = DataFrame( + {"a": [1, 1, 2], "b": Categorical(["a", "a", "b"]), "c": [3.4, 3.4, 4.5]} + ) + actual = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(actual, expected) + + +@xfail_pyarrow +def test_categorical_dtype_unsorted(all_parsers): + # see gh-10153 + parser = all_parsers + data = """a,b,c +1,b,3.4 +1,b,3.4 +2,a,4.5""" + expected = DataFrame( + { + "a": Categorical(["1", "1", "2"]), + "b": Categorical(["b", "b", "a"]), + "c": Categorical(["3.4", "3.4", "4.5"]), + } + ) + actual = parser.read_csv(StringIO(data), dtype="category") + tm.assert_frame_equal(actual, expected) + + +@xfail_pyarrow +def test_categorical_dtype_missing(all_parsers): + # see gh-10153 + parser = all_parsers + data = """a,b,c +1,b,3.4 +1,nan,3.4 +2,a,4.5""" + expected = DataFrame( + { + "a": Categorical(["1", "1", "2"]), + "b": Categorical(["b", np.nan, "a"]), + "c": Categorical(["3.4", "3.4", "4.5"]), + } + ) + actual = parser.read_csv(StringIO(data), dtype="category") + tm.assert_frame_equal(actual, expected) + + +@xfail_pyarrow +@pytest.mark.slow +def test_categorical_dtype_high_cardinality_numeric(all_parsers): + # see gh-18186 + parser = all_parsers + data = np.sort([str(i) for i in range(524289)]) + expected = DataFrame({"a": Categorical(data, ordered=True)}) + + actual = parser.read_csv(StringIO("a\n" + "\n".join(data)), dtype="category") + actual["a"] = actual["a"].cat.reorder_categories( + np.sort(actual.a.cat.categories), ordered=True + ) + tm.assert_frame_equal(actual, expected) + + +@xfail_pyarrow +def test_categorical_dtype_utf16(all_parsers, csv_dir_path): + # see gh-10153 + pth = os.path.join(csv_dir_path, "utf16_ex.txt") + parser = all_parsers + encoding = "utf-16" + sep = "\t" + + expected = parser.read_csv(pth, sep=sep, encoding=encoding) + expected = expected.apply(Categorical) + + actual = parser.read_csv(pth, sep=sep, encoding=encoding, dtype="category") + tm.assert_frame_equal(actual, expected) + + +@xfail_pyarrow +def test_categorical_dtype_chunksize_infer_categories(all_parsers): + # see gh-10153 + parser = all_parsers + data = """a,b +1,a +1,b +1,b +2,c""" + expecteds = [ + DataFrame({"a": [1, 1], "b": Categorical(["a", "b"])}), + DataFrame({"a": [1, 2], "b": Categorical(["b", "c"])}, index=[2, 3]), + ] + with parser.read_csv( + StringIO(data), dtype={"b": "category"}, chunksize=2 + ) as actuals: + for actual, expected in zip(actuals, expecteds): + tm.assert_frame_equal(actual, expected) + + +@xfail_pyarrow +def test_categorical_dtype_chunksize_explicit_categories(all_parsers): + # see gh-10153 + parser = all_parsers + data = """a,b +1,a +1,b +1,b +2,c""" + cats = ["a", "b", "c"] + expecteds = [ + DataFrame({"a": [1, 1], "b": Categorical(["a", "b"], categories=cats)}), + DataFrame( + {"a": [1, 2], "b": Categorical(["b", "c"], categories=cats)}, + index=[2, 3], + ), + ] + dtype = CategoricalDtype(cats) + with parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2) as actuals: + for actual, expected in zip(actuals, expecteds): + tm.assert_frame_equal(actual, expected) + + +def test_categorical_dtype_latin1(all_parsers, csv_dir_path): + # see gh-10153 + pth = os.path.join(csv_dir_path, "unicode_series.csv") + parser = all_parsers + encoding = "latin-1" + + expected = parser.read_csv(pth, header=None, encoding=encoding) + expected[1] = Categorical(expected[1]) + + actual = parser.read_csv(pth, header=None, encoding=encoding, dtype={1: "category"}) + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.parametrize("ordered", [False, True]) +@pytest.mark.parametrize( + "categories", + [["a", "b", "c"], ["a", "c", "b"], ["a", "b", "c", "d"], ["c", "b", "a"]], +) +def test_categorical_category_dtype(all_parsers, categories, ordered): + parser = all_parsers + data = """a,b +1,a +1,b +1,b +2,c""" + expected = DataFrame( + { + "a": [1, 1, 1, 2], + "b": Categorical( + ["a", "b", "b", "c"], categories=categories, ordered=ordered + ), + } + ) + + dtype = {"b": CategoricalDtype(categories=categories, ordered=ordered)} + result = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + +def test_categorical_category_dtype_unsorted(all_parsers): + parser = all_parsers + data = """a,b +1,a +1,b +1,b +2,c""" + dtype = CategoricalDtype(["c", "b", "a"]) + expected = DataFrame( + { + "a": [1, 1, 1, 2], + "b": Categorical(["a", "b", "b", "c"], categories=["c", "b", "a"]), + } + ) + + result = parser.read_csv(StringIO(data), dtype={"b": dtype}) + tm.assert_frame_equal(result, expected) + + +def test_categorical_coerces_numeric(all_parsers): + parser = all_parsers + dtype = {"b": CategoricalDtype([1, 2, 3])} + + data = "b\n1\n1\n2\n3" + expected = DataFrame({"b": Categorical([1, 1, 2, 3])}) + + result = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow # Flaky +def test_categorical_coerces_datetime(all_parsers): + parser = all_parsers + dti = pd.DatetimeIndex(["2017-01-01", "2018-01-01", "2019-01-01"], freq=None) + dtype = {"b": CategoricalDtype(dti)} + + data = "b\n2017-01-01\n2018-01-01\n2019-01-01" + expected = DataFrame({"b": Categorical(dtype["b"].categories)}) + + result = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + +def test_categorical_coerces_timestamp(all_parsers): + parser = all_parsers + dtype = {"b": CategoricalDtype([Timestamp("2014")])} + + data = "b\n2014-01-01\n2014-01-01T00:00:00" + expected = DataFrame({"b": Categorical([Timestamp("2014")] * 2)}) + + result = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + +def test_categorical_coerces_timedelta(all_parsers): + parser = all_parsers + dtype = {"b": CategoricalDtype(pd.to_timedelta(["1H", "2H", "3H"]))} + + data = "b\n1H\n2H\n3H" + expected = DataFrame({"b": Categorical(dtype["b"].categories)}) + + result = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [ + "b\nTrue\nFalse\nNA\nFalse", + "b\ntrue\nfalse\nNA\nfalse", + "b\nTRUE\nFALSE\nNA\nFALSE", + "b\nTrue\nFalse\nNA\nFALSE", + ], +) +def test_categorical_dtype_coerces_boolean(all_parsers, data): + # see gh-20498 + parser = all_parsers + dtype = {"b": CategoricalDtype([False, True])} + expected = DataFrame({"b": Categorical([True, False, None, False])}) + + result = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + +def test_categorical_unexpected_categories(all_parsers): + parser = all_parsers + dtype = {"b": CategoricalDtype(["a", "b", "d", "e"])} + + data = "b\nd\na\nc\nd" # Unexpected c + expected = DataFrame({"b": Categorical(list("dacd"), dtype=dtype["b"])}) + + result = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/test_dtypes_basic.py new file mode 100644 index 0000000..cdf9c0a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -0,0 +1,336 @@ +""" +Tests dtype specification during parsing +for all of the parsers defined in parsers.py +""" +from io import StringIO + +import numpy as np +import pytest + +from pandas.errors import ParserWarning + +import pandas as pd +from pandas import ( + DataFrame, + Timestamp, +) +import pandas._testing as tm + +# TODO(1.4): Change me into xfail at release time +# and xfail individual tests +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +@pytest.mark.parametrize("dtype", [str, object]) +@pytest.mark.parametrize("check_orig", [True, False]) +def test_dtype_all_columns(all_parsers, dtype, check_orig): + # see gh-3795, gh-6607 + parser = all_parsers + + df = DataFrame( + np.random.rand(5, 2).round(4), + columns=list("AB"), + index=["1A", "1B", "1C", "1D", "1E"], + ) + + with tm.ensure_clean("__passing_str_as_dtype__.csv") as path: + df.to_csv(path) + + result = parser.read_csv(path, dtype=dtype, index_col=0) + + if check_orig: + expected = df.copy() + result = result.astype(float) + else: + expected = df.astype(str) + + tm.assert_frame_equal(result, expected) + + +def test_dtype_per_column(all_parsers): + parser = all_parsers + data = """\ +one,two +1,2.5 +2,3.5 +3,4.5 +4,5.5""" + expected = DataFrame( + [[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]], columns=["one", "two"] + ) + expected["one"] = expected["one"].astype(np.float64) + expected["two"] = expected["two"].astype(object) + + result = parser.read_csv(StringIO(data), dtype={"one": np.float64, 1: str}) + tm.assert_frame_equal(result, expected) + + +def test_invalid_dtype_per_column(all_parsers): + parser = all_parsers + data = """\ +one,two +1,2.5 +2,3.5 +3,4.5 +4,5.5""" + + with pytest.raises(TypeError, match="data type [\"']foo[\"'] not understood"): + parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"}) + + +def test_raise_on_passed_int_dtype_with_nas(all_parsers): + # see gh-2631 + parser = all_parsers + data = """YEAR, DOY, a +2001,106380451,10 +2001,,11 +2001,106380451,67""" + + msg = ( + "Integer column has NA values" + if parser.engine == "c" + else "Unable to convert column DOY" + ) + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True) + + +def test_dtype_with_converters(all_parsers): + parser = all_parsers + data = """a,b +1.1,2.2 +1.2,2.3""" + + # Dtype spec ignored if converted specified. + with tm.assert_produces_warning(ParserWarning): + result = parser.read_csv( + StringIO(data), dtype={"a": "i8"}, converters={"a": lambda x: str(x)} + ) + expected = DataFrame({"a": ["1.1", "1.2"], "b": [2.2, 2.3]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "dtype", list(np.typecodes["AllInteger"] + np.typecodes["Float"]) +) +def test_numeric_dtype(all_parsers, dtype): + data = "0\n1" + parser = all_parsers + expected = DataFrame([0, 1], dtype=dtype) + + result = parser.read_csv(StringIO(data), header=None, dtype=dtype) + tm.assert_frame_equal(expected, result) + + +def test_boolean_dtype(all_parsers): + parser = all_parsers + data = "\n".join( + [ + "a", + "True", + "TRUE", + "true", + "1", + "1.0", + "False", + "FALSE", + "false", + "0", + "0.0", + "NaN", + "nan", + "NA", + "null", + "NULL", + ] + ) + + result = parser.read_csv(StringIO(data), dtype="boolean") + expected = DataFrame( + { + "a": pd.array( + [ + True, + True, + True, + True, + True, + False, + False, + False, + False, + False, + None, + None, + None, + None, + None, + ], + dtype="boolean", + ) + } + ) + + tm.assert_frame_equal(result, expected) + + +def test_delimiter_with_usecols_and_parse_dates(all_parsers): + # GH#35873 + result = all_parsers.read_csv( + StringIO('"dump","-9,1","-9,1",20101010'), + engine="python", + names=["col", "col1", "col2", "col3"], + usecols=["col1", "col2", "col3"], + parse_dates=["col3"], + decimal=",", + ) + expected = DataFrame( + {"col1": [-9.1], "col2": [-9.1], "col3": [Timestamp("2010-10-10")]} + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("thousands", ["_", None]) +def test_decimal_and_exponential(python_parser_only, numeric_decimal, thousands): + # GH#31920 + decimal_number_check(python_parser_only, numeric_decimal, thousands, None) + + +@pytest.mark.parametrize("thousands", ["_", None]) +@pytest.mark.parametrize("float_precision", [None, "legacy", "high", "round_trip"]) +def test_1000_sep_decimal_float_precision( + c_parser_only, numeric_decimal, float_precision, thousands +): + # test decimal and thousand sep handling in across 'float_precision' + # parsers + decimal_number_check(c_parser_only, numeric_decimal, thousands, float_precision) + text, value = numeric_decimal + text = " " + text + " " + if isinstance(value, str): # the negative cases (parse as text) + value = " " + value + " " + decimal_number_check(c_parser_only, (text, value), thousands, float_precision) + + +def decimal_number_check(parser, numeric_decimal, thousands, float_precision): + # GH#31920 + value = numeric_decimal[0] + if thousands is None and "_" in value: + pytest.skip("Skip test if no thousands sep is defined and sep is in value") + df = parser.read_csv( + StringIO(value), + sep="|", + thousands=thousands, + decimal=",", + header=None, + ) + val = df.iloc[0, 0] + assert val == numeric_decimal[1] + + +@pytest.mark.parametrize("float_precision", [None, "legacy", "high", "round_trip"]) +def test_skip_whitespace(c_parser_only, float_precision): + DATA = """id\tnum\t +1\t1.2 \t +1\t 2.1\t +2\t 1\t +2\t 1.2 \t +""" + df = c_parser_only.read_csv( + StringIO(DATA), + float_precision=float_precision, + sep="\t", + header=0, + dtype={1: np.float64}, + ) + tm.assert_series_equal(df.iloc[:, 1], pd.Series([1.2, 2.1, 1.0, 1.2], name="num")) + + +def test_true_values_cast_to_bool(all_parsers): + # GH#34655 + text = """a,b +yes,xxx +no,yyy +1,zzz +0,aaa + """ + parser = all_parsers + result = parser.read_csv( + StringIO(text), + true_values=["yes"], + false_values=["no"], + dtype={"a": "boolean"}, + ) + expected = DataFrame( + {"a": [True, False, True, False], "b": ["xxx", "yyy", "zzz", "aaa"]} + ) + expected["a"] = expected["a"].astype("boolean") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtypes, exp_value", [({}, "1"), ({"a.1": "int64"}, 1)]) +def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value): + # GH#35211 + parser = all_parsers + data = """a,a\n1,1""" + dtype_dict = {"a": str, **dtypes} + # GH#42462 + dtype_dict_copy = dtype_dict.copy() + result = parser.read_csv(StringIO(data), dtype=dtype_dict) + expected = DataFrame({"a": ["1"], "a.1": [exp_value]}) + assert dtype_dict == dtype_dict_copy, "dtype dict changed" + tm.assert_frame_equal(result, expected) + + +def test_dtype_mangle_dup_cols_single_dtype(all_parsers): + # GH#42022 + parser = all_parsers + data = """a,a\n1,1""" + result = parser.read_csv(StringIO(data), dtype=str) + expected = DataFrame({"a": ["1"], "a.1": ["1"]}) + tm.assert_frame_equal(result, expected) + + +def test_dtype_multi_index(all_parsers): + # GH 42446 + parser = all_parsers + data = "A,B,B\nX,Y,Z\n1,2,3" + + result = parser.read_csv( + StringIO(data), + header=list(range(2)), + dtype={ + ("A", "X"): np.int32, + ("B", "Y"): np.int32, + ("B", "Z"): np.float32, + }, + ) + + expected = DataFrame( + { + ("A", "X"): np.int32([1]), + ("B", "Y"): np.int32([2]), + ("B", "Z"): np.float32([3]), + } + ) + + tm.assert_frame_equal(result, expected) + + +def test_nullable_int_dtype(all_parsers, any_int_ea_dtype): + # GH 25472 + parser = all_parsers + dtype = any_int_ea_dtype + + data = """a,b,c +,3,5 +1,,6 +2,4,""" + expected = DataFrame( + { + "a": pd.array([pd.NA, 1, 2], dtype=dtype), + "b": pd.array([3, pd.NA, 4], dtype=dtype), + "c": pd.array([5, 6, pd.NA], dtype=dtype), + } + ) + actual = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(actual, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/test_empty.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/test_empty.py new file mode 100644 index 0000000..ee02af7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/dtypes/test_empty.py @@ -0,0 +1,182 @@ +""" +Tests dtype specification during parsing +for all of the parsers defined in parsers.py +""" +from io import StringIO + +import numpy as np +import pytest + +from pandas import ( + Categorical, + DataFrame, + Index, + MultiIndex, + Series, + concat, +) +import pandas._testing as tm + +# TODO(1.4): Change me into individual xfails at release time +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +def test_dtype_all_columns_empty(all_parsers): + # see gh-12048 + parser = all_parsers + result = parser.read_csv(StringIO("A,B"), dtype=str) + + expected = DataFrame({"A": [], "B": []}, index=[], dtype=str) + tm.assert_frame_equal(result, expected) + + +def test_empty_pass_dtype(all_parsers): + parser = all_parsers + + data = "one,two" + result = parser.read_csv(StringIO(data), dtype={"one": "u1"}) + + expected = DataFrame( + {"one": np.empty(0, dtype="u1"), "two": np.empty(0, dtype=object)}, + index=Index([], dtype=object), + ) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_index_pass_dtype(all_parsers): + parser = all_parsers + + data = "one,two" + result = parser.read_csv( + StringIO(data), index_col=["one"], dtype={"one": "u1", 1: "f"} + ) + + expected = DataFrame( + {"two": np.empty(0, dtype="f")}, index=Index([], dtype="u1", name="one") + ) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_multi_index_pass_dtype(all_parsers): + parser = all_parsers + + data = "one,two,three" + result = parser.read_csv( + StringIO(data), index_col=["one", "two"], dtype={"one": "u1", 1: "f8"} + ) + + exp_idx = MultiIndex.from_arrays( + [np.empty(0, dtype="u1"), np.empty(0, dtype=np.float64)], + names=["one", "two"], + ) + expected = DataFrame({"three": np.empty(0, dtype=object)}, index=exp_idx) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers): + parser = all_parsers + + data = "one,one" + result = parser.read_csv(StringIO(data), dtype={"one": "u1", "one.1": "f"}) + + expected = DataFrame( + {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")}, + index=Index([], dtype=object), + ) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers): + parser = all_parsers + + data = "one,one" + result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"}) + + expected = DataFrame( + {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")}, + index=Index([], dtype=object), + ) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers): + # see gh-9424 + parser = all_parsers + expected = concat( + [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")], + axis=1, + ) + expected.index = expected.index.astype(object) + + data = "one,one" + result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"}) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers): + # see gh-9424 + parser = all_parsers + expected = concat( + [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")], + axis=1, + ) + expected.index = expected.index.astype(object) + + with pytest.raises(ValueError, match="Duplicate names"): + data = "" + parser.read_csv(StringIO(data), names=["one", "one"], dtype={0: "u1", 1: "f"}) + + +@pytest.mark.parametrize( + "dtype,expected", + [ + (np.float64, DataFrame(columns=["a", "b"], dtype=np.float64)), + ( + "category", + DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]), + ), + ( + {"a": "category", "b": "category"}, + DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]), + ), + ("datetime64[ns]", DataFrame(columns=["a", "b"], dtype="datetime64[ns]")), + ( + "timedelta64[ns]", + DataFrame( + { + "a": Series([], dtype="timedelta64[ns]"), + "b": Series([], dtype="timedelta64[ns]"), + }, + index=[], + ), + ), + ( + {"a": np.int64, "b": np.int32}, + DataFrame( + {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, + index=[], + ), + ), + ( + {0: np.int64, 1: np.int32}, + DataFrame( + {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, + index=[], + ), + ), + ( + {"a": np.int64, 1: np.int32}, + DataFrame( + {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, + index=[], + ), + ), + ], +) +def test_empty_dtype(all_parsers, dtype, expected): + # see gh-14712 + parser = all_parsers + data = "a,b" + + result = parser.read_csv(StringIO(data), header=0, dtype=dtype) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_c_parser_only.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_c_parser_only.py new file mode 100644 index 0000000..5df4470 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_c_parser_only.py @@ -0,0 +1,684 @@ +""" +Tests that apply specifically to the CParser. Unless specifically stated +as a CParser-specific issue, the goal is to eventually move as many of +these tests out of this module as soon as the Python parser can accept +further arguments when parsing. +""" + +from io import ( + BytesIO, + StringIO, + TextIOWrapper, +) +import mmap +import os +import tarfile + +import numpy as np +import pytest + +from pandas.compat import IS64 +from pandas.errors import ParserError +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + concat, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "malformed", + ["1\r1\r1\r 1\r 1\r", "1\r1\r1\r 1\r 1\r11\r", "1\r1\r1\r 1\r 1\r11\r1\r"], + ids=["words pointer", "stream pointer", "lines pointer"], +) +def test_buffer_overflow(c_parser_only, malformed): + # see gh-9205: test certain malformed input files that cause + # buffer overflows in tokenizer.c + msg = "Buffer overflow caught - possible malformed input file." + parser = c_parser_only + + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(malformed)) + + +def test_buffer_rd_bytes(c_parser_only): + # see gh-12098: src->buffer in the C parser can be freed twice leading + # to a segfault if a corrupt gzip file is read with 'read_csv', and the + # buffer is filled more than once before gzip raises an Exception. + + data = ( + "\x1F\x8B\x08\x00\x00\x00\x00\x00\x00\x03\xED\xC3\x41\x09" + "\x00\x00\x08\x00\xB1\xB7\xB6\xBA\xFE\xA5\xCC\x21\x6C\xB0" + "\xA6\x4D" + "\x55" * 267 + "\x7D\xF7\x00\x91\xE0\x47\x97\x14\x38\x04\x00" + "\x1f\x8b\x08\x00VT\x97V\x00\x03\xed]\xefO" + ) + parser = c_parser_only + + with tm.assert_produces_warning(RuntimeWarning): + # compression has no effect when passing a non-binary object as input + for _ in range(100): + try: + parser.read_csv( + StringIO(data), compression="gzip", delim_whitespace=True + ) + except Exception: + pass + + +def test_delim_whitespace_custom_terminator(c_parser_only): + # See gh-12912 + data = "a b c~1 2 3~4 5 6~7 8 9" + parser = c_parser_only + + df = parser.read_csv(StringIO(data), lineterminator="~", delim_whitespace=True) + expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) + tm.assert_frame_equal(df, expected) + + +def test_dtype_and_names_error(c_parser_only): + # see gh-8833: passing both dtype and names + # resulting in an error reporting issue + parser = c_parser_only + data = """ +1.0 1 +2.0 2 +3.0 3 +""" + # base cases + result = parser.read_csv(StringIO(data), sep=r"\s+", header=None) + expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]]) + tm.assert_frame_equal(result, expected) + + result = parser.read_csv(StringIO(data), sep=r"\s+", header=None, names=["a", "b"]) + expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + # fallback casting + result = parser.read_csv( + StringIO(data), sep=r"\s+", header=None, names=["a", "b"], dtype={"a": np.int32} + ) + expected = DataFrame([[1, 1], [2, 2], [3, 3]], columns=["a", "b"]) + expected["a"] = expected["a"].astype(np.int32) + tm.assert_frame_equal(result, expected) + + data = """ +1.0 1 +nan 2 +3.0 3 +""" + # fallback casting, but not castable + with pytest.raises(ValueError, match="cannot safely convert"): + parser.read_csv( + StringIO(data), + sep=r"\s+", + header=None, + names=["a", "b"], + dtype={"a": np.int32}, + ) + + +@pytest.mark.parametrize( + "match,kwargs", + [ + # For each of these cases, all of the dtypes are valid, just unsupported. + ( + ( + "the dtype datetime64 is not supported for parsing, " + "pass this column using parse_dates instead" + ), + {"dtype": {"A": "datetime64", "B": "float64"}}, + ), + ( + ( + "the dtype datetime64 is not supported for parsing, " + "pass this column using parse_dates instead" + ), + {"dtype": {"A": "datetime64", "B": "float64"}, "parse_dates": ["B"]}, + ), + ( + "the dtype timedelta64 is not supported for parsing", + {"dtype": {"A": "timedelta64", "B": "float64"}}, + ), + ("the dtype 262144b) + parser = c_parser_only + header_narrow = "\t".join(["COL_HEADER_" + str(i) for i in range(10)]) + "\n" + data_narrow = "\t".join(["somedatasomedatasomedata1" for _ in range(10)]) + "\n" + header_wide = "\t".join(["COL_HEADER_" + str(i) for i in range(15)]) + "\n" + data_wide = "\t".join(["somedatasomedatasomedata2" for _ in range(15)]) + "\n" + test_input = header_narrow + data_narrow * 1050 + header_wide + data_wide * 2 + + df = parser.read_csv(StringIO(test_input), sep="\t", nrows=1010) + + assert df.size == 1010 * 10 + + +def test_float_precision_round_trip_with_text(c_parser_only): + # see gh-15140 + parser = c_parser_only + df = parser.read_csv(StringIO("a"), header=None, float_precision="round_trip") + tm.assert_frame_equal(df, DataFrame({0: ["a"]})) + + +def test_large_difference_in_columns(c_parser_only): + # see gh-14125 + parser = c_parser_only + + count = 10000 + large_row = ("X," * count)[:-1] + "\n" + normal_row = "XXXXXX XXXXXX,111111111111111\n" + test_input = (large_row + normal_row * 6)[:-1] + + result = parser.read_csv(StringIO(test_input), header=None, usecols=[0]) + rows = test_input.split("\n") + + expected = DataFrame([row.split(",")[0] for row in rows]) + tm.assert_frame_equal(result, expected) + + +def test_data_after_quote(c_parser_only): + # see gh-15910 + parser = c_parser_only + + data = 'a\n1\n"b"a' + result = parser.read_csv(StringIO(data)) + + expected = DataFrame({"a": ["1", "ba"]}) + tm.assert_frame_equal(result, expected) + + +def test_comment_whitespace_delimited(c_parser_only, capsys): + parser = c_parser_only + test_input = """\ +1 2 +2 2 3 +3 2 3 # 3 fields +4 2 3# 3 fields +5 2 # 2 fields +6 2# 2 fields +7 # 1 field, NaN +8# 1 field, NaN +9 2 3 # skipped line +# comment""" + df = parser.read_csv( + StringIO(test_input), + comment="#", + header=None, + delimiter="\\s+", + skiprows=0, + on_bad_lines="warn", + ) + captured = capsys.readouterr() + # skipped lines 2, 3, 4, 9 + for line_num in (2, 3, 4, 9): + assert f"Skipping line {line_num}" in captured.err + expected = DataFrame([[1, 2], [5, 2], [6, 2], [7, np.nan], [8, np.nan]]) + tm.assert_frame_equal(df, expected) + + +def test_file_like_no_next(c_parser_only): + # gh-16530: the file-like need not have a "next" or "__next__" + # attribute despite having an "__iter__" attribute. + # + # NOTE: This is only true for the C engine, not Python engine. + class NoNextBuffer(StringIO): + def __next__(self): + raise AttributeError("No next method") + + next = __next__ + + parser = c_parser_only + data = "a\n1" + + expected = DataFrame({"a": [1]}) + result = parser.read_csv(NoNextBuffer(data)) + + tm.assert_frame_equal(result, expected) + + +def test_buffer_rd_bytes_bad_unicode(c_parser_only): + # see gh-22748 + t = BytesIO(b"\xB0") + t = TextIOWrapper(t, encoding="ascii", errors="surrogateescape") + msg = "'utf-8' codec can't encode character" + with pytest.raises(UnicodeError, match=msg): + c_parser_only.read_csv(t, encoding="UTF-8") + + +@pytest.mark.parametrize("tar_suffix", [".tar", ".tar.gz"]) +def test_read_tarfile(c_parser_only, csv_dir_path, tar_suffix): + # see gh-16530 + # + # Unfortunately, Python's CSV library can't handle + # tarfile objects (expects string, not bytes when + # iterating through a file-like). + parser = c_parser_only + tar_path = os.path.join(csv_dir_path, "tar_csv" + tar_suffix) + + with tarfile.open(tar_path, "r") as tar: + data_file = tar.extractfile("tar_data.csv") + + out = parser.read_csv(data_file) + expected = DataFrame({"a": [1]}) + tm.assert_frame_equal(out, expected) + + +@pytest.mark.high_memory +def test_bytes_exceed_2gb(c_parser_only): + # see gh-16798 + # + # Read from a "CSV" that has a column larger than 2GB. + parser = c_parser_only + + if parser.low_memory: + pytest.skip("not a high_memory test") + + csv = StringIO("strings\n" + "\n".join(["x" * (1 << 20) for _ in range(2100)])) + df = parser.read_csv(csv) + assert not df.empty + + +def test_chunk_whitespace_on_boundary(c_parser_only): + # see gh-9735: this issue is C parser-specific (bug when + # parsing whitespace and characters at chunk boundary) + # + # This test case has a field too large for the Python parser / CSV library. + parser = c_parser_only + + chunk1 = "a" * (1024 * 256 - 2) + "\na" + chunk2 = "\n a" + result = parser.read_csv(StringIO(chunk1 + chunk2), header=None) + + expected = DataFrame(["a" * (1024 * 256 - 2), "a", " a"]) + tm.assert_frame_equal(result, expected) + + +def test_file_handles_mmap(c_parser_only, csv1): + # gh-14418 + # + # Don't close user provided file handles. + parser = c_parser_only + + with open(csv1) as f: + m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + parser.read_csv(m) + + assert not m.closed + m.close() + + +def test_file_binary_mode(c_parser_only): + # see gh-23779 + parser = c_parser_only + expected = DataFrame([[1, 2, 3], [4, 5, 6]]) + + with tm.ensure_clean() as path: + with open(path, "w") as f: + f.write("1,2,3\n4,5,6") + + with open(path, "rb") as f: + result = parser.read_csv(f, header=None) + tm.assert_frame_equal(result, expected) + + +def test_unix_style_breaks(c_parser_only): + # GH 11020 + parser = c_parser_only + with tm.ensure_clean() as path: + with open(path, "w", newline="\n") as f: + f.write("blah\n\ncol_1,col_2,col_3\n\n") + result = parser.read_csv(path, skiprows=2, encoding="utf-8", engine="c") + expected = DataFrame(columns=["col_1", "col_2", "col_3"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("float_precision", [None, "legacy", "high", "round_trip"]) +@pytest.mark.parametrize( + "data,thousands,decimal", + [ + ( + """A|B|C +1|2,334.01|5 +10|13|10. +""", + ",", + ".", + ), + ( + """A|B|C +1|2.334,01|5 +10|13|10, +""", + ".", + ",", + ), + ], +) +def test_1000_sep_with_decimal( + c_parser_only, data, thousands, decimal, float_precision +): + parser = c_parser_only + expected = DataFrame({"A": [1, 10], "B": [2334.01, 13], "C": [5, 10.0]}) + + result = parser.read_csv( + StringIO(data), + sep="|", + thousands=thousands, + decimal=decimal, + float_precision=float_precision, + ) + tm.assert_frame_equal(result, expected) + + +def test_float_precision_options(c_parser_only): + # GH 17154, 36228 + parser = c_parser_only + s = "foo\n243.164\n" + df = parser.read_csv(StringIO(s)) + df2 = parser.read_csv(StringIO(s), float_precision="high") + + tm.assert_frame_equal(df, df2) + + df3 = parser.read_csv(StringIO(s), float_precision="legacy") + + if IS64: + assert not df.iloc[0, 0] == df3.iloc[0, 0] + else: + assert df.iloc[0, 0] == df3.iloc[0, 0] + + msg = "Unrecognized float_precision option: junk" + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(s), float_precision="junk") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_comment.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_comment.py new file mode 100644 index 0000000..9a14e67 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_comment.py @@ -0,0 +1,168 @@ +""" +Tests that comments are properly handled during parsing +for all of the parsers defined in parsers.py +""" +from io import StringIO + +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm + +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +@pytest.mark.parametrize("na_values", [None, ["NaN"]]) +def test_comment(all_parsers, na_values): + parser = all_parsers + data = """A,B,C +1,2.,4.#hello world +5.,NaN,10.0 +""" + expected = DataFrame( + [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"] + ) + result = parser.read_csv(StringIO(data), comment="#", na_values=na_values) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "read_kwargs", [{}, {"lineterminator": "*"}, {"delim_whitespace": True}] +) +def test_line_comment(all_parsers, read_kwargs, request): + parser = all_parsers + data = """# empty +A,B,C +1,2.,4.#hello world +#ignore this line +5.,NaN,10.0 +""" + if read_kwargs.get("delim_whitespace"): + data = data.replace(",", " ") + elif read_kwargs.get("lineterminator"): + if parser.engine != "c": + mark = pytest.mark.xfail( + reason="Custom terminator not supported with Python engine" + ) + request.node.add_marker(mark) + + data = data.replace("\n", read_kwargs.get("lineterminator")) + + read_kwargs["comment"] = "#" + result = parser.read_csv(StringIO(data), **read_kwargs) + + expected = DataFrame( + [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"] + ) + tm.assert_frame_equal(result, expected) + + +def test_comment_skiprows(all_parsers): + parser = all_parsers + data = """# empty +random line +# second empty line +1,2,3 +A,B,C +1,2.,4. +5.,NaN,10.0 +""" + # This should ignore the first four lines (including comments). + expected = DataFrame( + [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"] + ) + result = parser.read_csv(StringIO(data), comment="#", skiprows=4) + tm.assert_frame_equal(result, expected) + + +def test_comment_header(all_parsers): + parser = all_parsers + data = """# empty +# second empty line +1,2,3 +A,B,C +1,2.,4. +5.,NaN,10.0 +""" + # Header should begin at the second non-comment line. + expected = DataFrame( + [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"] + ) + result = parser.read_csv(StringIO(data), comment="#", header=1) + tm.assert_frame_equal(result, expected) + + +def test_comment_skiprows_header(all_parsers): + parser = all_parsers + data = """# empty +# second empty line +# third empty line +X,Y,Z +1,2,3 +A,B,C +1,2.,4. +5.,NaN,10.0 +""" + # Skiprows should skip the first 4 lines (including comments), + # while header should start from the second non-commented line, + # starting with line 5. + expected = DataFrame( + [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"] + ) + result = parser.read_csv(StringIO(data), comment="#", skiprows=4, header=1) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("comment_char", ["#", "~", "&", "^", "*", "@"]) +def test_custom_comment_char(all_parsers, comment_char): + parser = all_parsers + data = "a,b,c\n1,2,3#ignore this!\n4,5,6#ignorethistoo" + result = parser.read_csv( + StringIO(data.replace("#", comment_char)), comment=comment_char + ) + + expected = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("header", ["infer", None]) +def test_comment_first_line(all_parsers, header): + # see gh-4623 + parser = all_parsers + data = "# notes\na,b,c\n# more notes\n1,2,3" + + if header is None: + expected = DataFrame({0: ["a", "1"], 1: ["b", "2"], 2: ["c", "3"]}) + else: + expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"]) + + result = parser.read_csv(StringIO(data), comment="#", header=header) + tm.assert_frame_equal(result, expected) + + +def test_comment_char_in_default_value(all_parsers, request): + # GH#34002 + if all_parsers.engine == "c": + reason = "see gh-34002: works on the python engine but not the c engine" + # NA value containing comment char is interpreted as comment + request.node.add_marker(pytest.mark.xfail(reason=reason, raises=AssertionError)) + parser = all_parsers + + data = ( + "# this is a comment\n" + "col1,col2,col3,col4\n" + "1,2,3,4#inline comment\n" + "4,5#,6,10\n" + "7,8,#N/A,11\n" + ) + result = parser.read_csv(StringIO(data), comment="#", na_values="#N/A") + expected = DataFrame( + { + "col1": [1, 4, 7], + "col2": [2, 5, 8], + "col3": [3.0, np.nan, np.nan], + "col4": [4.0, np.nan, 11.0], + } + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_compression.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_compression.py new file mode 100644 index 0000000..d97b594 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_compression.py @@ -0,0 +1,176 @@ +""" +Tests compressed data parsing functionality for all +of the parsers defined in parsers.py +""" + +import os +from pathlib import Path +import zipfile + +import pytest + +from pandas import DataFrame +import pandas._testing as tm + +import pandas.io.common as icom + +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") + + +@pytest.fixture(params=[True, False]) +def buffer(request): + return request.param + + +@pytest.fixture +def parser_and_data(all_parsers, csv1): + parser = all_parsers + + with open(csv1, "rb") as f: + data = f.read() + expected = parser.read_csv(csv1) + + return parser, data, expected + + +@skip_pyarrow +@pytest.mark.parametrize("compression", ["zip", "infer", "zip2"]) +def test_zip(parser_and_data, compression): + parser, data, expected = parser_and_data + + with tm.ensure_clean("test_file.zip") as path: + with zipfile.ZipFile(path, mode="w") as tmp: + tmp.writestr("test_file", data) + + if compression == "zip2": + with open(path, "rb") as f: + result = parser.read_csv(f, compression="zip") + else: + result = parser.read_csv(path, compression=compression) + + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize("compression", ["zip", "infer"]) +def test_zip_error_multiple_files(parser_and_data, compression): + parser, data, expected = parser_and_data + + with tm.ensure_clean("combined_zip.zip") as path: + inner_file_names = ["test_file", "second_file"] + + with zipfile.ZipFile(path, mode="w") as tmp: + for file_name in inner_file_names: + tmp.writestr(file_name, data) + + with pytest.raises(ValueError, match="Multiple files"): + parser.read_csv(path, compression=compression) + + +@skip_pyarrow +def test_zip_error_no_files(parser_and_data): + parser, _, _ = parser_and_data + + with tm.ensure_clean() as path: + with zipfile.ZipFile(path, mode="w"): + pass + + with pytest.raises(ValueError, match="Zero files"): + parser.read_csv(path, compression="zip") + + +@skip_pyarrow +def test_zip_error_invalid_zip(parser_and_data): + parser, _, _ = parser_and_data + + with tm.ensure_clean() as path: + with open(path, "rb") as f: + with pytest.raises(zipfile.BadZipfile, match="File is not a zip file"): + parser.read_csv(f, compression="zip") + + +@skip_pyarrow +@pytest.mark.parametrize("filename", [None, "test.{ext}"]) +def test_compression(parser_and_data, compression_only, buffer, filename): + parser, data, expected = parser_and_data + compress_type = compression_only + + ext = icom._compression_to_extension[compress_type] + filename = filename if filename is None else filename.format(ext=ext) + + if filename and buffer: + pytest.skip("Cannot deduce compression from buffer of compressed data.") + + with tm.ensure_clean(filename=filename) as path: + tm.write_to_compressed(compress_type, path, data) + compression = "infer" if filename else compress_type + + if buffer: + with open(path, "rb") as f: + result = parser.read_csv(f, compression=compression) + else: + result = parser.read_csv(path, compression=compression) + + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize("ext", [None, "gz", "bz2"]) +def test_infer_compression(all_parsers, csv1, buffer, ext): + # see gh-9770 + parser = all_parsers + kwargs = {"index_col": 0, "parse_dates": True} + + expected = parser.read_csv(csv1, **kwargs) + kwargs["compression"] = "infer" + + if buffer: + with open(csv1) as f: + result = parser.read_csv(f, **kwargs) + else: + ext = "." + ext if ext else "" + result = parser.read_csv(csv1 + ext, **kwargs) + + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_compression_utf_encoding(all_parsers, csv_dir_path, utf_value, encoding_fmt): + # see gh-18071, gh-24130 + parser = all_parsers + encoding = encoding_fmt.format(utf_value) + path = os.path.join(csv_dir_path, f"utf{utf_value}_ex_small.zip") + + result = parser.read_csv(path, encoding=encoding, compression="zip", sep="\t") + expected = DataFrame( + { + "Country": ["Venezuela", "Venezuela"], + "Twitter": ["Hugo Chávez Frías", "Henrique Capriles R."], + } + ) + + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize("invalid_compression", ["sfark", "bz3", "zipper"]) +def test_invalid_compression(all_parsers, invalid_compression): + parser = all_parsers + compress_kwargs = {"compression": invalid_compression} + + msg = f"Unrecognized compression type: {invalid_compression}" + + with pytest.raises(ValueError, match=msg): + parser.read_csv("test_file.zip", **compress_kwargs) + + +def test_ignore_compression_extension(all_parsers): + parser = all_parsers + df = DataFrame({"a": [0, 1]}) + with tm.ensure_clean("test.csv") as path_csv: + with tm.ensure_clean("test.csv.zip") as path_zip: + # make sure to create un-compressed file with zip extension + df.to_csv(path_csv, index=False) + Path(path_zip).write_text(Path(path_csv).read_text()) + + tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_converters.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_converters.py new file mode 100644 index 0000000..21933d8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_converters.py @@ -0,0 +1,191 @@ +""" +Tests column conversion functionality during parsing +for all of the parsers defined in parsers.py +""" +from io import StringIO + +from dateutil.parser import parse +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, +) +import pandas._testing as tm + +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +def test_converters_type_must_be_dict(all_parsers): + parser = all_parsers + data = """index,A,B,C,D +foo,2,3,4,5 +""" + + with pytest.raises(TypeError, match="Type converters.+"): + parser.read_csv(StringIO(data), converters=0) + + +@pytest.mark.parametrize("column", [3, "D"]) +@pytest.mark.parametrize( + "converter", [parse, lambda x: int(x.split("/")[2])] # Produce integer. +) +def test_converters(all_parsers, column, converter): + parser = all_parsers + data = """A,B,C,D +a,1,2,01/01/2009 +b,3,4,01/02/2009 +c,4,5,01/03/2009 +""" + result = parser.read_csv(StringIO(data), converters={column: converter}) + + expected = parser.read_csv(StringIO(data)) + expected["D"] = expected["D"].map(converter) + + tm.assert_frame_equal(result, expected) + + +def test_converters_no_implicit_conv(all_parsers): + # see gh-2184 + parser = all_parsers + data = """000102,1.2,A\n001245,2,B""" + + converters = {0: lambda x: x.strip()} + result = parser.read_csv(StringIO(data), header=None, converters=converters) + + # Column 0 should not be casted to numeric and should remain as object. + expected = DataFrame([["000102", 1.2, "A"], ["001245", 2, "B"]]) + tm.assert_frame_equal(result, expected) + + +def test_converters_euro_decimal_format(all_parsers): + # see gh-583 + converters = {} + parser = all_parsers + + data = """Id;Number1;Number2;Text1;Text2;Number3 +1;1521,1541;187101,9543;ABC;poi;4,7387 +2;121,12;14897,76;DEF;uyt;0,3773 +3;878,158;108013,434;GHI;rez;2,7356""" + converters["Number1"] = converters["Number2"] = converters[ + "Number3" + ] = lambda x: float(x.replace(",", ".")) + + result = parser.read_csv(StringIO(data), sep=";", converters=converters) + expected = DataFrame( + [ + [1, 1521.1541, 187101.9543, "ABC", "poi", 4.7387], + [2, 121.12, 14897.76, "DEF", "uyt", 0.3773], + [3, 878.158, 108013.434, "GHI", "rez", 2.7356], + ], + columns=["Id", "Number1", "Number2", "Text1", "Text2", "Number3"], + ) + tm.assert_frame_equal(result, expected) + + +def test_converters_corner_with_nans(all_parsers): + parser = all_parsers + data = """id,score,days +1,2,12 +2,2-5, +3,,14+ +4,6-12,2""" + + # Example converters. + def convert_days(x): + x = x.strip() + + if not x: + return np.nan + + is_plus = x.endswith("+") + + if is_plus: + x = int(x[:-1]) + 1 + else: + x = int(x) + + return x + + def convert_days_sentinel(x): + x = x.strip() + + if not x: + return np.nan + + is_plus = x.endswith("+") + + if is_plus: + x = int(x[:-1]) + 1 + else: + x = int(x) + + return x + + def convert_score(x): + x = x.strip() + + if not x: + return np.nan + + if x.find("-") > 0: + val_min, val_max = map(int, x.split("-")) + val = 0.5 * (val_min + val_max) + else: + val = float(x) + + return val + + results = [] + + for day_converter in [convert_days, convert_days_sentinel]: + result = parser.read_csv( + StringIO(data), + converters={"score": convert_score, "days": day_converter}, + na_values=["", None], + ) + assert pd.isna(result["days"][1]) + results.append(result) + + tm.assert_frame_equal(results[0], results[1]) + + +def test_converter_index_col_bug(all_parsers): + # see gh-1835 + parser = all_parsers + data = "A;B\n1;2\n3;4" + + rs = parser.read_csv( + StringIO(data), sep=";", index_col="A", converters={"A": lambda x: x} + ) + + xp = DataFrame({"B": [2, 4]}, index=Index([1, 3], name="A")) + tm.assert_frame_equal(rs, xp) + + +def test_converter_multi_index(all_parsers): + # GH 42446 + parser = all_parsers + data = "A,B,B\nX,Y,Z\n1,2,3" + + result = parser.read_csv( + StringIO(data), + header=list(range(2)), + converters={ + ("A", "X"): np.int32, + ("B", "Y"): np.int32, + ("B", "Z"): np.float32, + }, + ) + + expected = DataFrame( + { + ("A", "X"): np.int32([1]), + ("B", "Y"): np.int32([2]), + ("B", "Z"): np.float32([3]), + } + ) + + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_dialect.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_dialect.py new file mode 100644 index 0000000..55b1939 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_dialect.py @@ -0,0 +1,146 @@ +""" +Tests that dialects are properly handled during parsing +for all of the parsers defined in parsers.py +""" + +import csv +from io import StringIO + +import pytest + +from pandas.errors import ParserWarning + +from pandas import DataFrame +import pandas._testing as tm + +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +@pytest.fixture +def custom_dialect(): + dialect_name = "weird" + dialect_kwargs = { + "doublequote": False, + "escapechar": "~", + "delimiter": ":", + "skipinitialspace": False, + "quotechar": "~", + "quoting": 3, + } + return dialect_name, dialect_kwargs + + +def test_dialect(all_parsers): + parser = all_parsers + data = """\ +label1,label2,label3 +index1,"a,c,e +index2,b,d,f +""" + + dia = csv.excel() + dia.quoting = csv.QUOTE_NONE + df = parser.read_csv(StringIO(data), dialect=dia) + + data = """\ +label1,label2,label3 +index1,a,c,e +index2,b,d,f +""" + exp = parser.read_csv(StringIO(data)) + exp.replace("a", '"a', inplace=True) + tm.assert_frame_equal(df, exp) + + +def test_dialect_str(all_parsers): + dialect_name = "mydialect" + parser = all_parsers + data = """\ +fruit:vegetable +apple:broccoli +pear:tomato +""" + exp = DataFrame({"fruit": ["apple", "pear"], "vegetable": ["broccoli", "tomato"]}) + + with tm.with_csv_dialect(dialect_name, delimiter=":"): + df = parser.read_csv(StringIO(data), dialect=dialect_name) + tm.assert_frame_equal(df, exp) + + +def test_invalid_dialect(all_parsers): + class InvalidDialect: + pass + + data = "a\n1" + parser = all_parsers + msg = "Invalid dialect" + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), dialect=InvalidDialect) + + +@pytest.mark.parametrize( + "arg", + [None, "doublequote", "escapechar", "skipinitialspace", "quotechar", "quoting"], +) +@pytest.mark.parametrize("value", ["dialect", "default", "other"]) +def test_dialect_conflict_except_delimiter(all_parsers, custom_dialect, arg, value): + # see gh-23761. + dialect_name, dialect_kwargs = custom_dialect + parser = all_parsers + + expected = DataFrame({"a": [1], "b": [2]}) + data = "a:b\n1:2" + + warning_klass = None + kwds = {} + + # arg=None tests when we pass in the dialect without any other arguments. + if arg is not None: + if "value" == "dialect": # No conflict --> no warning. + kwds[arg] = dialect_kwargs[arg] + elif "value" == "default": # Default --> no warning. + from pandas.io.parsers.base_parser import parser_defaults + + kwds[arg] = parser_defaults[arg] + else: # Non-default + conflict with dialect --> warning. + warning_klass = ParserWarning + kwds[arg] = "blah" + + with tm.with_csv_dialect(dialect_name, **dialect_kwargs): + with tm.assert_produces_warning(warning_klass): + result = parser.read_csv(StringIO(data), dialect=dialect_name, **kwds) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,warning_klass", + [ + ({"sep": ","}, None), # sep is default --> sep_override=True + ({"sep": "."}, ParserWarning), # sep isn't default --> sep_override=False + ({"delimiter": ":"}, None), # No conflict + ({"delimiter": None}, None), # Default arguments --> sep_override=True + ({"delimiter": ","}, ParserWarning), # Conflict + ({"delimiter": "."}, ParserWarning), # Conflict + ], + ids=[ + "sep-override-true", + "sep-override-false", + "delimiter-no-conflict", + "delimiter-default-arg", + "delimiter-conflict", + "delimiter-conflict2", + ], +) +def test_dialect_conflict_delimiter(all_parsers, custom_dialect, kwargs, warning_klass): + # see gh-23761. + dialect_name, dialect_kwargs = custom_dialect + parser = all_parsers + + expected = DataFrame({"a": [1], "b": [2]}) + data = "a:b\n1:2" + + with tm.with_csv_dialect(dialect_name, **dialect_kwargs): + with tm.assert_produces_warning(warning_klass): + result = parser.read_csv(StringIO(data), dialect=dialect_name, **kwargs) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_encoding.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_encoding.py new file mode 100644 index 0000000..2b27332 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_encoding.py @@ -0,0 +1,315 @@ +""" +Tests encoding functionality during parsing +for all of the parsers defined in parsers.py +""" + +from io import BytesIO +import os +import tempfile + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + read_csv, +) +import pandas._testing as tm + +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") + + +@skip_pyarrow +def test_bytes_io_input(all_parsers): + encoding = "cp1255" + parser = all_parsers + + data = BytesIO("שלום:1234\n562:123".encode(encoding)) + result = parser.read_csv(data, sep=":", encoding=encoding) + + expected = DataFrame([[562, 123]], columns=["שלום", "1234"]) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_read_csv_unicode(all_parsers): + parser = all_parsers + data = BytesIO("\u0141aski, Jan;1".encode()) + + result = parser.read_csv(data, sep=";", encoding="utf-8", header=None) + expected = DataFrame([["\u0141aski, Jan", 1]]) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize("sep", [",", "\t"]) +@pytest.mark.parametrize("encoding", ["utf-16", "utf-16le", "utf-16be"]) +def test_utf16_bom_skiprows(all_parsers, sep, encoding): + # see gh-2298 + parser = all_parsers + data = """skip this +skip this too +A,B,C +1,2,3 +4,5,6""".replace( + ",", sep + ) + path = f"__{tm.rands(10)}__.csv" + kwargs = {"sep": sep, "skiprows": 2} + utf8 = "utf-8" + + with tm.ensure_clean(path) as path: + from io import TextIOWrapper + + bytes_data = data.encode(encoding) + + with open(path, "wb") as f: + f.write(bytes_data) + + bytes_buffer = BytesIO(data.encode(utf8)) + bytes_buffer = TextIOWrapper(bytes_buffer, encoding=utf8) + + result = parser.read_csv(path, encoding=encoding, **kwargs) + expected = parser.read_csv(bytes_buffer, encoding=utf8, **kwargs) + + bytes_buffer.close() + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_utf16_example(all_parsers, csv_dir_path): + path = os.path.join(csv_dir_path, "utf16_ex.txt") + parser = all_parsers + result = parser.read_csv(path, encoding="utf-16", sep="\t") + assert len(result) == 50 + + +@skip_pyarrow +def test_unicode_encoding(all_parsers, csv_dir_path): + path = os.path.join(csv_dir_path, "unicode_series.csv") + parser = all_parsers + + result = parser.read_csv(path, header=None, encoding="latin-1") + result = result.set_index(0) + got = result[1][1632] + + expected = "\xc1 k\xf6ldum klaka (Cold Fever) (1994)" + assert got == expected + + +@skip_pyarrow +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + # Basic test + ("a\n1", {}, DataFrame({"a": [1]})), + # "Regular" quoting + ('"a"\n1', {"quotechar": '"'}, DataFrame({"a": [1]})), + # Test in a data row instead of header + ("b\n1", {"names": ["a"]}, DataFrame({"a": ["b", "1"]})), + # Test in empty data row with skipping + ("\n1", {"names": ["a"], "skip_blank_lines": True}, DataFrame({"a": [1]})), + # Test in empty data row without skipping + ( + "\n1", + {"names": ["a"], "skip_blank_lines": False}, + DataFrame({"a": [np.nan, 1]}), + ), + ], +) +def test_utf8_bom(all_parsers, data, kwargs, expected): + # see gh-4793 + parser = all_parsers + bom = "\ufeff" + utf8 = "utf-8" + + def _encode_data_with_bom(_data): + bom_data = (bom + _data).encode(utf8) + return BytesIO(bom_data) + + result = parser.read_csv(_encode_data_with_bom(data), encoding=utf8, **kwargs) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_read_csv_utf_aliases(all_parsers, utf_value, encoding_fmt): + # see gh-13549 + expected = DataFrame({"mb_num": [4.8], "multibyte": ["test"]}) + parser = all_parsers + + encoding = encoding_fmt.format(utf_value) + data = "mb_num,multibyte\n4.8,test".encode(encoding) + + result = parser.read_csv(BytesIO(data), encoding=encoding) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "file_path,encoding", + [ + (("io", "data", "csv", "test1.csv"), "utf-8"), + (("io", "parser", "data", "unicode_series.csv"), "latin-1"), + (("io", "parser", "data", "sauron.SHIFT_JIS.csv"), "shiftjis"), + ], +) +def test_binary_mode_file_buffers( + all_parsers, csv_dir_path, file_path, encoding, datapath +): + # gh-23779: Python csv engine shouldn't error on files opened in binary. + # gh-31575: Python csv engine shouldn't error on files opened in raw binary. + parser = all_parsers + + fpath = datapath(*file_path) + expected = parser.read_csv(fpath, encoding=encoding) + + with open(fpath, encoding=encoding) as fa: + result = parser.read_csv(fa) + assert not fa.closed + tm.assert_frame_equal(expected, result) + + with open(fpath, mode="rb") as fb: + result = parser.read_csv(fb, encoding=encoding) + assert not fb.closed + tm.assert_frame_equal(expected, result) + + with open(fpath, mode="rb", buffering=0) as fb: + result = parser.read_csv(fb, encoding=encoding) + assert not fb.closed + tm.assert_frame_equal(expected, result) + + +@skip_pyarrow +@pytest.mark.parametrize("pass_encoding", [True, False]) +def test_encoding_temp_file(all_parsers, utf_value, encoding_fmt, pass_encoding): + # see gh-24130 + parser = all_parsers + encoding = encoding_fmt.format(utf_value) + + expected = DataFrame({"foo": ["bar"]}) + + with tm.ensure_clean(mode="w+", encoding=encoding, return_filelike=True) as f: + f.write("foo\nbar") + f.seek(0) + + result = parser.read_csv(f, encoding=encoding if pass_encoding else None) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_encoding_named_temp_file(all_parsers): + # see gh-31819 + parser = all_parsers + encoding = "shift-jis" + + title = "てすと" + data = "こむ" + + expected = DataFrame({title: [data]}) + + with tempfile.NamedTemporaryFile() as f: + f.write(f"{title}\n{data}".encode(encoding)) + + f.seek(0) + + result = parser.read_csv(f, encoding=encoding) + tm.assert_frame_equal(result, expected) + assert not f.closed + + +@pytest.mark.parametrize( + "encoding", ["utf-8", "utf-16", "utf-16-be", "utf-16-le", "utf-32"] +) +def test_parse_encoded_special_characters(encoding): + # GH16218 Verify parsing of data with encoded special characters + # Data contains a Unicode 'FULLWIDTH COLON' (U+FF1A) at position (0,"a") + data = "a\tb\n:foo\t0\nbar\t1\nbaz\t2" + encoded_data = BytesIO(data.encode(encoding)) + result = read_csv(encoded_data, delimiter="\t", encoding=encoding) + + expected = DataFrame(data=[[":foo", 0], ["bar", 1], ["baz", 2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize("encoding", ["utf-8", None, "utf-16", "cp1255", "latin-1"]) +def test_encoding_memory_map(all_parsers, encoding): + # GH40986 + parser = all_parsers + expected = DataFrame( + { + "name": ["Raphael", "Donatello", "Miguel Angel", "Leonardo"], + "mask": ["red", "purple", "orange", "blue"], + "weapon": ["sai", "bo staff", "nunchunk", "katana"], + } + ) + with tm.ensure_clean() as file: + expected.to_csv(file, index=False, encoding=encoding) + df = parser.read_csv(file, encoding=encoding, memory_map=True) + tm.assert_frame_equal(df, expected) + + +@skip_pyarrow +def test_chunk_splits_multibyte_char(all_parsers): + """ + Chunk splits a multibyte character with memory_map=True + + GH 43540 + """ + parser = all_parsers + # DEFAULT_CHUNKSIZE = 262144, defined in parsers.pyx + df = DataFrame(data=["a" * 127] * 2048) + + # Put two-bytes utf-8 encoded character "ą" at the end of chunk + # utf-8 encoding of "ą" is b'\xc4\x85' + df.iloc[2047] = "a" * 127 + "ą" + with tm.ensure_clean("bug-gh43540.csv") as fname: + df.to_csv(fname, index=False, header=False, encoding="utf-8") + dfr = parser.read_csv(fname, header=None, memory_map=True, engine="c") + tm.assert_frame_equal(dfr, df) + + +@skip_pyarrow +def test_readcsv_memmap_utf8(all_parsers): + """ + GH 43787 + + Test correct handling of UTF-8 chars when memory_map=True and encoding is UTF-8 + """ + lines = [] + line_length = 128 + start_char = " " + end_char = "\U00010080" + # This for loop creates a list of 128-char strings + # consisting of consecutive Unicode chars + for lnum in range(ord(start_char), ord(end_char), line_length): + line = "".join([chr(c) for c in range(lnum, lnum + 0x80)]) + "\n" + try: + line.encode("utf-8") + except UnicodeEncodeError: + continue + lines.append(line) + parser = all_parsers + df = DataFrame(lines) + with tm.ensure_clean("utf8test.csv") as fname: + df.to_csv(fname, index=False, header=False, encoding="utf-8") + dfr = parser.read_csv( + fname, header=None, memory_map=True, engine="c", encoding="utf-8" + ) + tm.assert_frame_equal(df, dfr) + + +@pytest.mark.usefixtures("pyarrow_xfail") +@pytest.mark.parametrize("mode", ["w+b", "w+t"]) +def test_not_readable(all_parsers, mode): + # GH43439 + parser = all_parsers + content = b"abcd" + if "t" in mode: + content = "abcd" + with tempfile.SpooledTemporaryFile(mode=mode) as handle: + handle.write(content) + handle.seek(0) + df = parser.read_csv(handle) + expected = DataFrame([], columns=["abcd"]) + tm.assert_frame_equal(df, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_header.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_header.py new file mode 100644 index 0000000..3fc2352 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_header.py @@ -0,0 +1,668 @@ +""" +Tests that the file header is properly handled or inferred +during parsing for all of the parsers defined in parsers.py +""" + +from collections import namedtuple +from io import StringIO + +import numpy as np +import pytest + +from pandas.errors import ParserError + +from pandas import ( + DataFrame, + Index, + MultiIndex, +) +import pandas._testing as tm + +# TODO(1.4): Change me to xfails at release time +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") + + +@skip_pyarrow +def test_read_with_bad_header(all_parsers): + parser = all_parsers + msg = r"but only \d+ lines in file" + + with pytest.raises(ValueError, match=msg): + s = StringIO(",,") + parser.read_csv(s, header=[10]) + + +def test_negative_header(all_parsers): + # see gh-27779 + parser = all_parsers + data = """1,2,3,4,5 +6,7,8,9,10 +11,12,13,14,15 +""" + with pytest.raises( + ValueError, + match="Passing negative integer to header is invalid. " + "For no header, use header=None instead", + ): + parser.read_csv(StringIO(data), header=-1) + + +@pytest.mark.parametrize("header", [([-1, 2, 4]), ([-5, 0])]) +def test_negative_multi_index_header(all_parsers, header): + # see gh-27779 + parser = all_parsers + data = """1,2,3,4,5 + 6,7,8,9,10 + 11,12,13,14,15 + """ + with pytest.raises( + ValueError, match="cannot specify multi-index header with negative integers" + ): + parser.read_csv(StringIO(data), header=header) + + +@pytest.mark.parametrize("header", [True, False]) +def test_bool_header_arg(all_parsers, header): + # see gh-6114 + parser = all_parsers + data = """\ +MyColumn +a +b +a +b""" + msg = "Passing a bool to header is invalid" + with pytest.raises(TypeError, match=msg): + parser.read_csv(StringIO(data), header=header) + + +def test_no_header_prefix(all_parsers): + parser = all_parsers + data = """1,2,3,4,5 +6,7,8,9,10 +11,12,13,14,15 +""" + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = parser.read_csv(StringIO(data), prefix="Field", header=None) + expected = DataFrame( + [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], + columns=["Field0", "Field1", "Field2", "Field3", "Field4"], + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_header_with_index_col(all_parsers): + parser = all_parsers + data = """foo,1,2,3 +bar,4,5,6 +baz,7,8,9 +""" + names = ["A", "B", "C"] + result = parser.read_csv(StringIO(data), names=names) + + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["foo", "bar", "baz"], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(result, expected) + + +def test_header_not_first_line(all_parsers): + parser = all_parsers + data = """got,to,ignore,this,line +got,to,ignore,this,line +index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +""" + data2 = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +""" + + result = parser.read_csv(StringIO(data), header=2, index_col=0) + expected = parser.read_csv(StringIO(data2), header=0, index_col=0) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_header_multi_index(all_parsers): + parser = all_parsers + expected = tm.makeCustomDataframe(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + + data = """\ +C0,,C_l0_g0,C_l0_g1,C_l0_g2 + +C1,,C_l1_g0,C_l1_g1,C_l1_g2 +C2,,C_l2_g0,C_l2_g1,C_l2_g2 +C3,,C_l3_g0,C_l3_g1,C_l3_g2 +R0,R1,,, +R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2 +R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2 +R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2 +R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2 +R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2 +""" + result = parser.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[0, 1]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,msg", + [ + ( + {"index_col": ["foo", "bar"]}, + ( + "index_col must only contain " + "row numbers when specifying " + "a multi-index header" + ), + ), + ( + {"index_col": [0, 1], "names": ["foo", "bar"]}, + ("cannot specify names when specifying a multi-index header"), + ), + ( + {"index_col": [0, 1], "usecols": ["foo", "bar"]}, + ("cannot specify usecols when specifying a multi-index header"), + ), + ], +) +def test_header_multi_index_invalid(all_parsers, kwargs, msg): + data = """\ +C0,,C_l0_g0,C_l0_g1,C_l0_g2 + +C1,,C_l1_g0,C_l1_g1,C_l1_g2 +C2,,C_l2_g0,C_l2_g1,C_l2_g2 +C3,,C_l3_g0,C_l3_g1,C_l3_g2 +R0,R1,,, +R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2 +R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2 +R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2 +R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2 +R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2 +""" + parser = all_parsers + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), header=[0, 1, 2, 3], **kwargs) + + +_TestTuple = namedtuple("_TestTuple", ["first", "second"]) + + +@skip_pyarrow +@pytest.mark.parametrize( + "kwargs", + [ + {"header": [0, 1]}, + { + "skiprows": 3, + "names": [ + ("a", "q"), + ("a", "r"), + ("a", "s"), + ("b", "t"), + ("c", "u"), + ("c", "v"), + ], + }, + { + "skiprows": 3, + "names": [ + _TestTuple("a", "q"), + _TestTuple("a", "r"), + _TestTuple("a", "s"), + _TestTuple("b", "t"), + _TestTuple("c", "u"), + _TestTuple("c", "v"), + ], + }, + ], +) +def test_header_multi_index_common_format1(all_parsers, kwargs): + parser = all_parsers + expected = DataFrame( + [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], + index=["one", "two"], + columns=MultiIndex.from_tuples( + [("a", "q"), ("a", "r"), ("a", "s"), ("b", "t"), ("c", "u"), ("c", "v")] + ), + ) + data = """,a,a,a,b,c,c +,q,r,s,t,u,v +,,,,,, +one,1,2,3,4,5,6 +two,7,8,9,10,11,12""" + + result = parser.read_csv(StringIO(data), index_col=0, **kwargs) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "kwargs", + [ + {"header": [0, 1]}, + { + "skiprows": 2, + "names": [ + ("a", "q"), + ("a", "r"), + ("a", "s"), + ("b", "t"), + ("c", "u"), + ("c", "v"), + ], + }, + { + "skiprows": 2, + "names": [ + _TestTuple("a", "q"), + _TestTuple("a", "r"), + _TestTuple("a", "s"), + _TestTuple("b", "t"), + _TestTuple("c", "u"), + _TestTuple("c", "v"), + ], + }, + ], +) +def test_header_multi_index_common_format2(all_parsers, kwargs): + parser = all_parsers + expected = DataFrame( + [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], + index=["one", "two"], + columns=MultiIndex.from_tuples( + [("a", "q"), ("a", "r"), ("a", "s"), ("b", "t"), ("c", "u"), ("c", "v")] + ), + ) + data = """,a,a,a,b,c,c +,q,r,s,t,u,v +one,1,2,3,4,5,6 +two,7,8,9,10,11,12""" + + result = parser.read_csv(StringIO(data), index_col=0, **kwargs) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "kwargs", + [ + {"header": [0, 1]}, + { + "skiprows": 2, + "names": [ + ("a", "q"), + ("a", "r"), + ("a", "s"), + ("b", "t"), + ("c", "u"), + ("c", "v"), + ], + }, + { + "skiprows": 2, + "names": [ + _TestTuple("a", "q"), + _TestTuple("a", "r"), + _TestTuple("a", "s"), + _TestTuple("b", "t"), + _TestTuple("c", "u"), + _TestTuple("c", "v"), + ], + }, + ], +) +def test_header_multi_index_common_format3(all_parsers, kwargs): + parser = all_parsers + expected = DataFrame( + [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], + index=["one", "two"], + columns=MultiIndex.from_tuples( + [("a", "q"), ("a", "r"), ("a", "s"), ("b", "t"), ("c", "u"), ("c", "v")] + ), + ) + expected = expected.reset_index(drop=True) + data = """a,a,a,b,c,c +q,r,s,t,u,v +1,2,3,4,5,6 +7,8,9,10,11,12""" + + result = parser.read_csv(StringIO(data), index_col=None, **kwargs) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_header_multi_index_common_format_malformed1(all_parsers): + parser = all_parsers + expected = DataFrame( + np.array([[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]], dtype="int64"), + index=Index([1, 7]), + columns=MultiIndex( + levels=[["a", "b", "c"], ["r", "s", "t", "u", "v"]], + codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]], + names=["a", "q"], + ), + ) + data = """a,a,a,b,c,c +q,r,s,t,u,v +1,2,3,4,5,6 +7,8,9,10,11,12""" + + result = parser.read_csv(StringIO(data), header=[0, 1], index_col=0) + tm.assert_frame_equal(expected, result) + + +@skip_pyarrow +def test_header_multi_index_common_format_malformed2(all_parsers): + parser = all_parsers + expected = DataFrame( + np.array([[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]], dtype="int64"), + index=Index([1, 7]), + columns=MultiIndex( + levels=[["a", "b", "c"], ["r", "s", "t", "u", "v"]], + codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]], + names=[None, "q"], + ), + ) + + data = """,a,a,b,c,c +q,r,s,t,u,v +1,2,3,4,5,6 +7,8,9,10,11,12""" + + result = parser.read_csv(StringIO(data), header=[0, 1], index_col=0) + tm.assert_frame_equal(expected, result) + + +@skip_pyarrow +def test_header_multi_index_common_format_malformed3(all_parsers): + parser = all_parsers + expected = DataFrame( + np.array([[3, 4, 5, 6], [9, 10, 11, 12]], dtype="int64"), + index=MultiIndex(levels=[[1, 7], [2, 8]], codes=[[0, 1], [0, 1]]), + columns=MultiIndex( + levels=[["a", "b", "c"], ["s", "t", "u", "v"]], + codes=[[0, 1, 2, 2], [0, 1, 2, 3]], + names=[None, "q"], + ), + ) + data = """,a,a,b,c,c +q,r,s,t,u,v +1,2,3,4,5,6 +7,8,9,10,11,12""" + + result = parser.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1]) + tm.assert_frame_equal(expected, result) + + +@skip_pyarrow +def test_header_multi_index_blank_line(all_parsers): + # GH 40442 + parser = all_parsers + data = [[None, None], [1, 2], [3, 4]] + columns = MultiIndex.from_tuples([("a", "A"), ("b", "B")]) + expected = DataFrame(data, columns=columns) + data = "a,b\nA,B\n,\n1,2\n3,4" + result = parser.read_csv(StringIO(data), header=[0, 1]) + tm.assert_frame_equal(expected, result) + + +@skip_pyarrow +@pytest.mark.parametrize( + "data,header", [("1,2,3\n4,5,6", None), ("foo,bar,baz\n1,2,3\n4,5,6", 0)] +) +def test_header_names_backward_compat(all_parsers, data, header): + # see gh-2539 + parser = all_parsers + expected = parser.read_csv(StringIO("1,2,3\n4,5,6"), names=["a", "b", "c"]) + + result = parser.read_csv(StringIO(data), names=["a", "b", "c"], header=header) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize("kwargs", [{}, {"index_col": False}]) +def test_read_only_header_no_rows(all_parsers, kwargs): + # See gh-7773 + parser = all_parsers + expected = DataFrame(columns=["a", "b", "c"]) + + result = parser.read_csv(StringIO("a,b,c"), **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,names", + [ + ({}, [0, 1, 2, 3, 4]), + ({"prefix": "X"}, ["X0", "X1", "X2", "X3", "X4"]), + ( + {"names": ["foo", "bar", "baz", "quux", "panda"]}, + ["foo", "bar", "baz", "quux", "panda"], + ), + ], +) +def test_no_header(all_parsers, kwargs, names): + parser = all_parsers + data = """1,2,3,4,5 +6,7,8,9,10 +11,12,13,14,15 +""" + expected = DataFrame( + [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], columns=names + ) + if "prefix" in kwargs.keys(): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = parser.read_csv(StringIO(data), header=None, **kwargs) + else: + result = parser.read_csv(StringIO(data), header=None, **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("header", [["a", "b"], "string_header"]) +def test_non_int_header(all_parsers, header): + # see gh-16338 + msg = "header must be integer or list of integers" + data = """1,2\n3,4""" + parser = all_parsers + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), header=header) + + +@skip_pyarrow +def test_singleton_header(all_parsers): + # see gh-7757 + data = """a,b,c\n0,1,2\n1,2,3""" + parser = all_parsers + + expected = DataFrame({"a": [0, 1], "b": [1, 2], "c": [2, 3]}) + result = parser.read_csv(StringIO(data), header=[0]) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "data,expected", + [ + ( + "A,A,A,B\none,one,one,two\n0,40,34,0.1", + DataFrame( + [[0, 40, 34, 0.1]], + columns=MultiIndex.from_tuples( + [("A", "one"), ("A", "one.1"), ("A", "one.2"), ("B", "two")] + ), + ), + ), + ( + "A,A,A,B\none,one,one.1,two\n0,40,34,0.1", + DataFrame( + [[0, 40, 34, 0.1]], + columns=MultiIndex.from_tuples( + [("A", "one"), ("A", "one.1"), ("A", "one.1.1"), ("B", "two")] + ), + ), + ), + ( + "A,A,A,B,B\none,one,one.1,two,two\n0,40,34,0.1,0.1", + DataFrame( + [[0, 40, 34, 0.1, 0.1]], + columns=MultiIndex.from_tuples( + [ + ("A", "one"), + ("A", "one.1"), + ("A", "one.1.1"), + ("B", "two"), + ("B", "two.1"), + ] + ), + ), + ), + ], +) +def test_mangles_multi_index(all_parsers, data, expected): + # see gh-18062 + parser = all_parsers + + result = parser.read_csv(StringIO(data), header=[0, 1]) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize("index_col", [None, [0]]) +@pytest.mark.parametrize( + "columns", [None, (["", "Unnamed"]), (["Unnamed", ""]), (["Unnamed", "NotUnnamed"])] +) +def test_multi_index_unnamed(all_parsers, index_col, columns): + # see gh-23687 + # + # When specifying a multi-index header, make sure that + # we don't error just because one of the rows in our header + # has ALL column names containing the string "Unnamed". The + # correct condition to check is whether the row contains + # ALL columns that did not have names (and instead were given + # placeholder ones). + parser = all_parsers + header = [0, 1] + + if index_col is None: + data = ",".join(columns or ["", ""]) + "\n0,1\n2,3\n4,5\n" + else: + data = ",".join([""] + (columns or ["", ""])) + "\n,0,1\n0,2,3\n1,4,5\n" + + result = parser.read_csv(StringIO(data), header=header, index_col=index_col) + exp_columns = [] + + if columns is None: + columns = ["", "", ""] + + for i, col in enumerate(columns): + if not col: # Unnamed. + col = f"Unnamed: {i if index_col is None else i + 1}_level_0" + + exp_columns.append(col) + + columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"])) + expected = DataFrame([[2, 3], [4, 5]], columns=columns) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_names_longer_than_header_but_equal_with_data_rows(all_parsers): + # GH#38453 + parser = all_parsers + data = """a, b +1,2,3 +5,6,4 +""" + result = parser.read_csv(StringIO(data), header=0, names=["A", "B", "C"]) + expected = DataFrame({"A": [1, 5], "B": [2, 6], "C": [3, 4]}) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_read_csv_multiindex_columns(all_parsers): + # GH#6051 + parser = all_parsers + + s1 = "Male, Male, Male, Female, Female\nR, R, L, R, R\n.86, .67, .88, .78, .81" + s2 = ( + "Male, Male, Male, Female, Female\n" + "R, R, L, R, R\n" + ".86, .67, .88, .78, .81\n" + ".86, .67, .88, .78, .82" + ) + + mi = MultiIndex.from_tuples( + [ + ("Male", "R"), + (" Male", " R"), + (" Male", " L"), + (" Female", " R"), + (" Female", " R.1"), + ] + ) + expected = DataFrame( + [[0.86, 0.67, 0.88, 0.78, 0.81], [0.86, 0.67, 0.88, 0.78, 0.82]], columns=mi + ) + + df1 = parser.read_csv(StringIO(s1), header=[0, 1]) + tm.assert_frame_equal(df1, expected.iloc[:1]) + df2 = parser.read_csv(StringIO(s2), header=[0, 1]) + tm.assert_frame_equal(df2, expected) + + +@skip_pyarrow +def test_read_csv_multi_header_length_check(all_parsers): + # GH#43102 + parser = all_parsers + + case = """row11,row12,row13 +row21,row22, row23 +row31,row32 +""" + + with pytest.raises( + ParserError, match="Header rows must have an equal number of columns." + ): + parser.read_csv(StringIO(case), header=[0, 2]) + + +@skip_pyarrow +def test_header_none_and_implicit_index(all_parsers): + # GH#22144 + parser = all_parsers + data = "x,1,5\ny,2\nz,3\n" + result = parser.read_csv(StringIO(data), names=["a", "b"], header=None) + expected = DataFrame( + {"a": [1, 2, 3], "b": [5, np.nan, np.nan]}, index=["x", "y", "z"] + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_header_none_and_implicit_index_in_second_row(all_parsers): + # GH#22144 + parser = all_parsers + data = "x,1\ny,2,5\nz,3\n" + with pytest.raises(ParserError, match="Expected 2 fields in line 2, saw 3"): + parser.read_csv(StringIO(data), names=["a", "b"], header=None) + + +@skip_pyarrow +def test_header_none_and_on_bad_lines_skip(all_parsers): + # GH#22144 + parser = all_parsers + data = "x,1\ny,2,5\nz,3\n" + result = parser.read_csv( + StringIO(data), names=["a", "b"], header=None, on_bad_lines="skip" + ) + expected = DataFrame({"a": ["x", "z"], "b": [1, 3]}) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_index_col.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_index_col.py new file mode 100644 index 0000000..f30aba3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_index_col.py @@ -0,0 +1,354 @@ +""" +Tests that the specified index column (a.k.a "index_col") +is properly handled or inferred during parsing for all of +the parsers defined in parsers.py +""" +from io import StringIO + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, +) +import pandas._testing as tm + +# TODO(1.4): Change me to xfails at release time +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") + + +@pytest.mark.parametrize("with_header", [True, False]) +def test_index_col_named(all_parsers, with_header): + parser = all_parsers + no_header = """\ +KORD1,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD2,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD3,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD4,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" + header = "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" + + if with_header: + data = header + no_header + + result = parser.read_csv(StringIO(data), index_col="ID") + expected = parser.read_csv(StringIO(data), header=0).set_index("ID") + tm.assert_frame_equal(result, expected) + else: + data = no_header + msg = "Index ID invalid" + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), index_col="ID") + + +def test_index_col_named2(all_parsers): + parser = all_parsers + data = """\ +1,2,3,4,hello +5,6,7,8,world +9,10,11,12,foo +""" + + expected = DataFrame( + {"a": [1, 5, 9], "b": [2, 6, 10], "c": [3, 7, 11], "d": [4, 8, 12]}, + index=Index(["hello", "world", "foo"], name="message"), + ) + names = ["a", "b", "c", "d", "message"] + + result = parser.read_csv(StringIO(data), names=names, index_col=["message"]) + tm.assert_frame_equal(result, expected) + + +def test_index_col_is_true(all_parsers): + # see gh-9798 + data = "a,b\n1,2" + parser = all_parsers + + msg = "The value of index_col couldn't be 'True'" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), index_col=True) + + +@skip_pyarrow +def test_infer_index_col(all_parsers): + data = """A,B,C +foo,1,2,3 +bar,4,5,6 +baz,7,8,9 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data)) + + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["foo", "bar", "baz"], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "index_col,kwargs", + [ + (None, {"columns": ["x", "y", "z"]}), + (False, {"columns": ["x", "y", "z"]}), + (0, {"columns": ["y", "z"], "index": Index([], name="x")}), + (1, {"columns": ["x", "z"], "index": Index([], name="y")}), + ("x", {"columns": ["y", "z"], "index": Index([], name="x")}), + ("y", {"columns": ["x", "z"], "index": Index([], name="y")}), + ( + [0, 1], + { + "columns": ["z"], + "index": MultiIndex.from_arrays([[]] * 2, names=["x", "y"]), + }, + ), + ( + ["x", "y"], + { + "columns": ["z"], + "index": MultiIndex.from_arrays([[]] * 2, names=["x", "y"]), + }, + ), + ( + [1, 0], + { + "columns": ["z"], + "index": MultiIndex.from_arrays([[]] * 2, names=["y", "x"]), + }, + ), + ( + ["y", "x"], + { + "columns": ["z"], + "index": MultiIndex.from_arrays([[]] * 2, names=["y", "x"]), + }, + ), + ], +) +def test_index_col_empty_data(all_parsers, index_col, kwargs): + data = "x,y,z" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=index_col) + + expected = DataFrame(**kwargs) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_empty_with_index_col_false(all_parsers): + # see gh-10413 + data = "x,y" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=False) + + expected = DataFrame(columns=["x", "y"]) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "index_names", + [ + ["", ""], + ["foo", ""], + ["", "bar"], + ["foo", "bar"], + ["NotReallyUnnamed", "Unnamed: 0"], + ], +) +def test_multi_index_naming(all_parsers, index_names): + parser = all_parsers + + # We don't want empty index names being replaced with "Unnamed: 0" + data = ",".join(index_names + ["col\na,c,1\na,d,2\nb,c,3\nb,d,4"]) + result = parser.read_csv(StringIO(data), index_col=[0, 1]) + + expected = DataFrame( + {"col": [1, 2, 3, 4]}, index=MultiIndex.from_product([["a", "b"], ["c", "d"]]) + ) + expected.index.names = [name if name else None for name in index_names] + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_multi_index_naming_not_all_at_beginning(all_parsers): + parser = all_parsers + data = ",Unnamed: 2,\na,c,1\na,d,2\nb,c,3\nb,d,4" + result = parser.read_csv(StringIO(data), index_col=[0, 2]) + + expected = DataFrame( + {"Unnamed: 2": ["c", "d", "c", "d"]}, + index=MultiIndex( + levels=[["a", "b"], [1, 2, 3, 4]], codes=[[0, 0, 1, 1], [0, 1, 2, 3]] + ), + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_no_multi_index_level_names_empty(all_parsers): + # GH 10984 + parser = all_parsers + midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)]) + expected = DataFrame(np.random.randn(3, 3), index=midx, columns=["x", "y", "z"]) + with tm.ensure_clean() as path: + expected.to_csv(path) + result = parser.read_csv(path, index_col=[0, 1, 2]) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_header_with_index_col(all_parsers): + # GH 33476 + parser = all_parsers + data = """ +I11,A,A +I12,B,B +I2,1,3 +""" + midx = MultiIndex.from_tuples([("A", "B"), ("A", "B.1")], names=["I11", "I12"]) + idx = Index(["I2"]) + expected = DataFrame([[1, 3]], index=idx, columns=midx) + + result = parser.read_csv(StringIO(data), index_col=0, header=[0, 1]) + tm.assert_frame_equal(result, expected) + + col_idx = Index(["A", "A.1"]) + idx = Index(["I12", "I2"], name="I11") + expected = DataFrame([["B", "B"], ["1", "3"]], index=idx, columns=col_idx) + + result = parser.read_csv(StringIO(data), index_col="I11", header=0) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.slow +def test_index_col_large_csv(all_parsers): + # https://github.com/pandas-dev/pandas/issues/37094 + parser = all_parsers + + N = 1_000_001 + df = DataFrame({"a": range(N), "b": np.random.randn(N)}) + + with tm.ensure_clean() as path: + df.to_csv(path, index=False) + result = parser.read_csv(path, index_col=[0]) + + tm.assert_frame_equal(result, df.set_index("a")) + + +@skip_pyarrow +def test_index_col_multiindex_columns_no_data(all_parsers): + # GH#38292 + parser = all_parsers + result = parser.read_csv( + StringIO("a0,a1,a2\nb0,b1,b2\n"), header=[0, 1], index_col=0 + ) + expected = DataFrame( + [], + columns=MultiIndex.from_arrays( + [["a1", "a2"], ["b1", "b2"]], names=["a0", "b0"] + ), + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_index_col_header_no_data(all_parsers): + # GH#38292 + parser = all_parsers + result = parser.read_csv(StringIO("a0,a1,a2\n"), header=[0], index_col=0) + expected = DataFrame( + [], + columns=["a1", "a2"], + index=Index([], name="a0"), + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_multiindex_columns_no_data(all_parsers): + # GH#38292 + parser = all_parsers + result = parser.read_csv(StringIO("a0,a1,a2\nb0,b1,b2\n"), header=[0, 1]) + expected = DataFrame( + [], columns=MultiIndex.from_arrays([["a0", "a1", "a2"], ["b0", "b1", "b2"]]) + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_multiindex_columns_index_col_with_data(all_parsers): + # GH#38292 + parser = all_parsers + result = parser.read_csv( + StringIO("a0,a1,a2\nb0,b1,b2\ndata,data,data"), header=[0, 1], index_col=0 + ) + expected = DataFrame( + [["data", "data"]], + columns=MultiIndex.from_arrays( + [["a1", "a2"], ["b1", "b2"]], names=["a0", "b0"] + ), + index=Index(["data"]), + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_infer_types_boolean_sum(all_parsers): + # GH#44079 + parser = all_parsers + result = parser.read_csv( + StringIO("0,1"), + names=["a", "b"], + index_col=["a"], + dtype={"a": "UInt8"}, + ) + expected = DataFrame( + data={ + "a": [ + 0, + ], + "b": [1], + } + ).set_index("a") + # Not checking index type now, because the C parser will return a + # index column of dtype 'object', and the Python parser will return a + # index column of dtype 'int64'. + tm.assert_frame_equal(result, expected, check_index_type=False) + + +@skip_pyarrow +@pytest.mark.parametrize("dtype, val", [(object, "01"), ("int64", 1)]) +def test_specify_dtype_for_index_col(all_parsers, dtype, val): + # GH#9435 + data = "a,b\n01,2" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col="a", dtype={"a": dtype}) + expected = DataFrame({"b": [2]}, index=Index([val], name="a")) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_multiindex_columns_not_leading_index_col(all_parsers): + # GH#38549 + parser = all_parsers + data = """a,b,c,d +e,f,g,h +x,y,1,2 +""" + result = parser.read_csv( + StringIO(data), + header=[0, 1], + index_col=1, + ) + cols = MultiIndex.from_tuples( + [("a", "e"), ("c", "g"), ("d", "h")], names=["b", "f"] + ) + expected = DataFrame([["x", 1, 2]], columns=cols, index=["y"]) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_mangle_dupes.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_mangle_dupes.py new file mode 100644 index 0000000..3f7b1b5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_mangle_dupes.py @@ -0,0 +1,168 @@ +""" +Tests that duplicate columns are handled appropriately when parsed by the +CSV engine. In general, the expected result is that they are either thoroughly +de-duplicated (if mangling requested) or ignored otherwise. +""" +from io import StringIO + +import pytest + +from pandas import DataFrame +import pandas._testing as tm + +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") + + +@skip_pyarrow +@pytest.mark.parametrize("kwargs", [{}, {"mangle_dupe_cols": True}]) +def test_basic(all_parsers, kwargs): + # TODO: add test for condition "mangle_dupe_cols=False" + # once it is actually supported (gh-12935) + parser = all_parsers + + data = "a,a,b,b,b\n1,2,3,4,5" + result = parser.read_csv(StringIO(data), sep=",", **kwargs) + + expected = DataFrame([[1, 2, 3, 4, 5]], columns=["a", "a.1", "b", "b.1", "b.2"]) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_basic_names(all_parsers): + # See gh-7160 + parser = all_parsers + + data = "a,b,a\n0,1,2\n3,4,5" + expected = DataFrame([[0, 1, 2], [3, 4, 5]], columns=["a", "b", "a.1"]) + + result = parser.read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +def test_basic_names_raise(all_parsers): + # See gh-7160 + parser = all_parsers + + data = "0,1,2\n3,4,5" + with pytest.raises(ValueError, match="Duplicate names"): + parser.read_csv(StringIO(data), names=["a", "b", "a"]) + + +@skip_pyarrow +@pytest.mark.parametrize( + "data,expected", + [ + ("a,a,a.1\n1,2,3", DataFrame([[1, 2, 3]], columns=["a", "a.2", "a.1"])), + ( + "a,a,a.1,a.1.1,a.1.1.1,a.1.1.1.1\n1,2,3,4,5,6", + DataFrame( + [[1, 2, 3, 4, 5, 6]], + columns=["a", "a.2", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"], + ), + ), + ( + "a,a,a.3,a.1,a.2,a,a\n1,2,3,4,5,6,7", + DataFrame( + [[1, 2, 3, 4, 5, 6, 7]], + columns=["a", "a.4", "a.3", "a.1", "a.2", "a.5", "a.6"], + ), + ), + ], +) +def test_thorough_mangle_columns(all_parsers, data, expected): + # see gh-17060 + parser = all_parsers + + result = parser.read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "data,names,expected", + [ + ( + "a,b,b\n1,2,3", + ["a.1", "a.1", "a.1.1"], + DataFrame( + [["a", "b", "b"], ["1", "2", "3"]], columns=["a.1", "a.1.1", "a.1.1.1"] + ), + ), + ( + "a,b,c,d,e,f\n1,2,3,4,5,6", + ["a", "a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"], + DataFrame( + [["a", "b", "c", "d", "e", "f"], ["1", "2", "3", "4", "5", "6"]], + columns=["a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1", "a.1.1.1.1.1"], + ), + ), + ( + "a,b,c,d,e,f,g\n1,2,3,4,5,6,7", + ["a", "a", "a.3", "a.1", "a.2", "a", "a"], + DataFrame( + [ + ["a", "b", "c", "d", "e", "f", "g"], + ["1", "2", "3", "4", "5", "6", "7"], + ], + columns=["a", "a.1", "a.3", "a.1.1", "a.2", "a.2.1", "a.3.1"], + ), + ), + ], +) +def test_thorough_mangle_names(all_parsers, data, names, expected): + # see gh-17095 + parser = all_parsers + + with pytest.raises(ValueError, match="Duplicate names"): + parser.read_csv(StringIO(data), names=names) + + +@skip_pyarrow +def test_mangled_unnamed_placeholders(all_parsers): + # xref gh-13017 + orig_key = "0" + parser = all_parsers + + orig_value = [1, 2, 3] + df = DataFrame({orig_key: orig_value}) + + # This test recursively updates `df`. + for i in range(3): + expected = DataFrame() + + for j in range(i + 1): + col_name = "Unnamed: 0" + f".{1*j}" * min(j, 1) + expected.insert(loc=0, column=col_name, value=[0, 1, 2]) + + expected[orig_key] = orig_value + df = parser.read_csv(StringIO(df.to_csv())) + + tm.assert_frame_equal(df, expected) + + +@skip_pyarrow +def test_mangle_dupe_cols_already_exists(all_parsers): + # GH#14704 + parser = all_parsers + + data = "a,a,a.1,a,a.3,a.1,a.1.1\n1,2,3,4,5,6,7" + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + [[1, 2, 3, 4, 5, 6, 7]], + columns=["a", "a.2", "a.1", "a.4", "a.3", "a.1.2", "a.1.1"], + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_mangle_dupe_cols_already_exists_unnamed_col(all_parsers): + # GH#14704 + parser = all_parsers + + data = ",Unnamed: 0,,Unnamed: 2\n1,2,3,4" + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + [[1, 2, 3, 4]], + columns=["Unnamed: 0.1", "Unnamed: 0", "Unnamed: 2.1", "Unnamed: 2"], + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_multi_thread.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_multi_thread.py new file mode 100644 index 0000000..ab27847 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_multi_thread.py @@ -0,0 +1,154 @@ +""" +Tests multithreading behaviour for reading and +parsing files for each parser defined in parsers.py +""" +from contextlib import ExitStack +from io import BytesIO +from multiprocessing.pool import ThreadPool + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm + +# We'll probably always skip these for pyarrow +# Maybe we'll add our own tests for pyarrow too +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +def _construct_dataframe(num_rows): + """ + Construct a DataFrame for testing. + + Parameters + ---------- + num_rows : int + The number of rows for our DataFrame. + + Returns + ------- + df : DataFrame + """ + df = DataFrame(np.random.rand(num_rows, 5), columns=list("abcde")) + df["foo"] = "foo" + df["bar"] = "bar" + df["baz"] = "baz" + df["date"] = pd.date_range("20000101 09:00:00", periods=num_rows, freq="s") + df["int"] = np.arange(num_rows, dtype="int64") + return df + + +@pytest.mark.slow +def test_multi_thread_string_io_read_csv(all_parsers): + # see gh-11786 + parser = all_parsers + max_row_range = 10000 + num_files = 100 + + bytes_to_df = [ + "\n".join([f"{i:d},{i:d},{i:d}" for i in range(max_row_range)]).encode() + for _ in range(num_files) + ] + + # Read all files in many threads. + with ExitStack() as stack: + files = [stack.enter_context(BytesIO(b)) for b in bytes_to_df] + + pool = stack.enter_context(ThreadPool(8)) + + results = pool.map(parser.read_csv, files) + first_result = results[0] + + for result in results: + tm.assert_frame_equal(first_result, result) + + +def _generate_multi_thread_dataframe(parser, path, num_rows, num_tasks): + """ + Generate a DataFrame via multi-thread. + + Parameters + ---------- + parser : BaseParser + The parser object to use for reading the data. + path : str + The location of the CSV file to read. + num_rows : int + The number of rows to read per task. + num_tasks : int + The number of tasks to use for reading this DataFrame. + + Returns + ------- + df : DataFrame + """ + + def reader(arg): + """ + Create a reader for part of the CSV. + + Parameters + ---------- + arg : tuple + A tuple of the following: + + * start : int + The starting row to start for parsing CSV + * nrows : int + The number of rows to read. + + Returns + ------- + df : DataFrame + """ + start, nrows = arg + + if not start: + return parser.read_csv( + path, index_col=0, header=0, nrows=nrows, parse_dates=["date"] + ) + + return parser.read_csv( + path, + index_col=0, + header=None, + skiprows=int(start) + 1, + nrows=nrows, + parse_dates=[9], + ) + + tasks = [ + (num_rows * i // num_tasks, num_rows // num_tasks) for i in range(num_tasks) + ] + + with ThreadPool(processes=num_tasks) as pool: + results = pool.map(reader, tasks) + + header = results[0].columns + + for r in results[1:]: + r.columns = header + + final_dataframe = pd.concat(results) + return final_dataframe + + +@pytest.mark.slow +def test_multi_thread_path_multipart_read_csv(all_parsers): + # see gh-11786 + num_tasks = 4 + num_rows = 100000 + + parser = all_parsers + file_name = "__thread_pool_reader__.csv" + df = _construct_dataframe(num_rows) + + with tm.ensure_clean(file_name) as path: + df.to_csv(path) + + final_dataframe = _generate_multi_thread_dataframe( + parser, path, num_rows, num_tasks + ) + tm.assert_frame_equal(df, final_dataframe) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_na_values.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_na_values.py new file mode 100644 index 0000000..f9356df --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_na_values.py @@ -0,0 +1,656 @@ +""" +Tests that NA values are properly handled during +parsing for all of the parsers defined in parsers.py +""" +from io import StringIO + +import numpy as np +import pytest + +from pandas._libs.parsers import STR_NA_VALUES + +from pandas import ( + DataFrame, + Index, + MultiIndex, +) +import pandas._testing as tm + +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") + + +@skip_pyarrow +def test_string_nas(all_parsers): + parser = all_parsers + data = """A,B,C +a,b,c +d,,f +,g,h +""" + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + [["a", "b", "c"], ["d", np.nan, "f"], [np.nan, "g", "h"]], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_detect_string_na(all_parsers): + parser = all_parsers + data = """A,B +foo,bar +NA,baz +NaN,nan +""" + expected = DataFrame( + [["foo", "bar"], [np.nan, "baz"], [np.nan, np.nan]], columns=["A", "B"] + ) + result = parser.read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "na_values", + [ + ["-999.0", "-999"], + [-999, -999.0], + [-999.0, -999], + ["-999.0"], + ["-999"], + [-999.0], + [-999], + ], +) +@pytest.mark.parametrize( + "data", + [ + """A,B +-999,1.2 +2,-999 +3,4.5 +""", + """A,B +-999,1.200 +2,-999.000 +3,4.500 +""", + ], +) +def test_non_string_na_values(all_parsers, data, na_values): + # see gh-3611: with an odd float format, we can't match + # the string "999.0" exactly but still need float matching + parser = all_parsers + expected = DataFrame([[np.nan, 1.2], [2.0, np.nan], [3.0, 4.5]], columns=["A", "B"]) + + result = parser.read_csv(StringIO(data), na_values=na_values) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_default_na_values(all_parsers): + _NA_VALUES = { + "-1.#IND", + "1.#QNAN", + "1.#IND", + "-1.#QNAN", + "#N/A", + "N/A", + "n/a", + "NA", + "", + "#NA", + "NULL", + "null", + "NaN", + "nan", + "-NaN", + "-nan", + "#N/A N/A", + "", + } + assert _NA_VALUES == STR_NA_VALUES + + parser = all_parsers + nv = len(_NA_VALUES) + + def f(i, v): + if i == 0: + buf = "" + elif i > 0: + buf = "".join([","] * i) + + buf = f"{buf}{v}" + + if i < nv - 1: + joined = "".join([","] * (nv - i - 1)) + buf = f"{buf}{joined}" + + return buf + + data = StringIO("\n".join([f(i, v) for i, v in enumerate(_NA_VALUES)])) + expected = DataFrame(np.nan, columns=range(nv), index=range(nv)) + + result = parser.read_csv(data, header=None) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize("na_values", ["baz", ["baz"]]) +def test_custom_na_values(all_parsers, na_values): + parser = all_parsers + data = """A,B,C +ignore,this,row +1,NA,3 +-1.#IND,5,baz +7,8,NaN +""" + expected = DataFrame( + [[1.0, np.nan, 3], [np.nan, 5, np.nan], [7, 8, np.nan]], columns=["A", "B", "C"] + ) + result = parser.read_csv(StringIO(data), na_values=na_values, skiprows=[1]) + tm.assert_frame_equal(result, expected) + + +def test_bool_na_values(all_parsers): + data = """A,B,C +True,False,True +NA,True,False +False,NA,True""" + parser = all_parsers + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + { + "A": np.array([True, np.nan, False], dtype=object), + "B": np.array([False, True, np.nan], dtype=object), + "C": [True, False, True], + } + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_na_value_dict(all_parsers): + data = """A,B,C +foo,bar,NA +bar,foo,foo +foo,bar,NA +bar,foo,foo""" + parser = all_parsers + df = parser.read_csv(StringIO(data), na_values={"A": ["foo"], "B": ["bar"]}) + expected = DataFrame( + { + "A": [np.nan, "bar", np.nan, "bar"], + "B": [np.nan, "foo", np.nan, "foo"], + "C": [np.nan, "foo", np.nan, "foo"], + } + ) + tm.assert_frame_equal(df, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "index_col,expected", + [ + ( + [0], + DataFrame({"b": [np.nan], "c": [1], "d": [5]}, index=Index([0], name="a")), + ), + ( + [0, 2], + DataFrame( + {"b": [np.nan], "d": [5]}, + index=MultiIndex.from_tuples([(0, 1)], names=["a", "c"]), + ), + ), + ( + ["a", "c"], + DataFrame( + {"b": [np.nan], "d": [5]}, + index=MultiIndex.from_tuples([(0, 1)], names=["a", "c"]), + ), + ), + ], +) +def test_na_value_dict_multi_index(all_parsers, index_col, expected): + data = """\ +a,b,c,d +0,NA,1,5 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), na_values=set(), index_col=index_col) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "kwargs,expected", + [ + ( + {}, + DataFrame( + { + "A": ["a", "b", np.nan, "d", "e", np.nan, "g"], + "B": [1, 2, 3, 4, 5, 6, 7], + "C": ["one", "two", "three", np.nan, "five", np.nan, "seven"], + } + ), + ), + ( + {"na_values": {"A": [], "C": []}, "keep_default_na": False}, + DataFrame( + { + "A": ["a", "b", "", "d", "e", "nan", "g"], + "B": [1, 2, 3, 4, 5, 6, 7], + "C": ["one", "two", "three", "nan", "five", "", "seven"], + } + ), + ), + ( + {"na_values": ["a"], "keep_default_na": False}, + DataFrame( + { + "A": [np.nan, "b", "", "d", "e", "nan", "g"], + "B": [1, 2, 3, 4, 5, 6, 7], + "C": ["one", "two", "three", "nan", "five", "", "seven"], + } + ), + ), + ( + {"na_values": {"A": [], "C": []}}, + DataFrame( + { + "A": ["a", "b", np.nan, "d", "e", np.nan, "g"], + "B": [1, 2, 3, 4, 5, 6, 7], + "C": ["one", "two", "three", np.nan, "five", np.nan, "seven"], + } + ), + ), + ], +) +def test_na_values_keep_default(all_parsers, kwargs, expected): + data = """\ +A,B,C +a,1,one +b,2,two +,3,three +d,4,nan +e,5,five +nan,6, +g,7,seven +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_no_na_values_no_keep_default(all_parsers): + # see gh-4318: passing na_values=None and + # keep_default_na=False yields 'None" as a na_value + data = """\ +A,B,C +a,1,None +b,2,two +,3,None +d,4,nan +e,5,five +nan,6, +g,7,seven +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), keep_default_na=False) + + expected = DataFrame( + { + "A": ["a", "b", "", "d", "e", "nan", "g"], + "B": [1, 2, 3, 4, 5, 6, 7], + "C": ["None", "two", "None", "nan", "five", "", "seven"], + } + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_no_keep_default_na_dict_na_values(all_parsers): + # see gh-19227 + data = "a,b\n,2" + parser = all_parsers + result = parser.read_csv( + StringIO(data), na_values={"b": ["2"]}, keep_default_na=False + ) + expected = DataFrame({"a": [""], "b": [np.nan]}) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_no_keep_default_na_dict_na_scalar_values(all_parsers): + # see gh-19227 + # + # Scalar values shouldn't cause the parsing to crash or fail. + data = "a,b\n1,2" + parser = all_parsers + df = parser.read_csv(StringIO(data), na_values={"b": 2}, keep_default_na=False) + expected = DataFrame({"a": [1], "b": [np.nan]}) + tm.assert_frame_equal(df, expected) + + +@skip_pyarrow +@pytest.mark.parametrize("col_zero_na_values", [113125, "113125"]) +def test_no_keep_default_na_dict_na_values_diff_reprs(all_parsers, col_zero_na_values): + # see gh-19227 + data = """\ +113125,"blah","/blaha",kjsdkj,412.166,225.874,214.008 +729639,"qwer","",asdfkj,466.681,,252.373 +""" + parser = all_parsers + expected = DataFrame( + { + 0: [np.nan, 729639.0], + 1: [np.nan, "qwer"], + 2: ["/blaha", np.nan], + 3: ["kjsdkj", "asdfkj"], + 4: [412.166, 466.681], + 5: ["225.874", ""], + 6: [np.nan, 252.373], + } + ) + + result = parser.read_csv( + StringIO(data), + header=None, + keep_default_na=False, + na_values={2: "", 6: "214.008", 1: "blah", 0: col_zero_na_values}, + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "na_filter,row_data", + [ + (True, [[1, "A"], [np.nan, np.nan], [3, "C"]]), + (False, [["1", "A"], ["nan", "B"], ["3", "C"]]), + ], +) +def test_na_values_na_filter_override(all_parsers, na_filter, row_data): + data = """\ +A,B +1,A +nan,B +3,C +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), na_values=["B"], na_filter=na_filter) + + expected = DataFrame(row_data, columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_na_trailing_columns(all_parsers): + parser = all_parsers + data = """Date,Currency,Symbol,Type,Units,UnitPrice,Cost,Tax +2012-03-14,USD,AAPL,BUY,1000 +2012-05-12,USD,SBUX,SELL,500""" + + # Trailing columns should be all NaN. + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + [ + ["2012-03-14", "USD", "AAPL", "BUY", 1000, np.nan, np.nan, np.nan], + ["2012-05-12", "USD", "SBUX", "SELL", 500, np.nan, np.nan, np.nan], + ], + columns=[ + "Date", + "Currency", + "Symbol", + "Type", + "Units", + "UnitPrice", + "Cost", + "Tax", + ], + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "na_values,row_data", + [ + (1, [[np.nan, 2.0], [2.0, np.nan]]), + ({"a": 2, "b": 1}, [[1.0, 2.0], [np.nan, np.nan]]), + ], +) +def test_na_values_scalar(all_parsers, na_values, row_data): + # see gh-12224 + parser = all_parsers + names = ["a", "b"] + data = "1,2\n2,1" + + result = parser.read_csv(StringIO(data), names=names, na_values=na_values) + expected = DataFrame(row_data, columns=names) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_na_values_dict_aliasing(all_parsers): + parser = all_parsers + na_values = {"a": 2, "b": 1} + na_values_copy = na_values.copy() + + names = ["a", "b"] + data = "1,2\n2,1" + + expected = DataFrame([[1.0, 2.0], [np.nan, np.nan]], columns=names) + result = parser.read_csv(StringIO(data), names=names, na_values=na_values) + + tm.assert_frame_equal(result, expected) + tm.assert_dict_equal(na_values, na_values_copy) + + +@skip_pyarrow +def test_na_values_dict_col_index(all_parsers): + # see gh-14203 + data = "a\nfoo\n1" + parser = all_parsers + na_values = {0: "foo"} + + result = parser.read_csv(StringIO(data), na_values=na_values) + expected = DataFrame({"a": [np.nan, 1]}) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + ( + str(2 ** 63) + "\n" + str(2 ** 63 + 1), + {"na_values": [2 ** 63]}, + DataFrame([str(2 ** 63), str(2 ** 63 + 1)]), + ), + (str(2 ** 63) + ",1" + "\n,2", {}, DataFrame([[str(2 ** 63), 1], ["", 2]])), + (str(2 ** 63) + "\n1", {"na_values": [2 ** 63]}, DataFrame([np.nan, 1])), + ], +) +def test_na_values_uint64(all_parsers, data, kwargs, expected): + # see gh-14983 + parser = all_parsers + result = parser.read_csv(StringIO(data), header=None, **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_empty_na_values_no_default_with_index(all_parsers): + # see gh-15835 + data = "a,1\nb,2" + parser = all_parsers + expected = DataFrame({"1": [2]}, index=Index(["b"], name="a")) + + result = parser.read_csv(StringIO(data), index_col=0, keep_default_na=False) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "na_filter,index_data", [(False, ["", "5"]), (True, [np.nan, 5.0])] +) +def test_no_na_filter_on_index(all_parsers, na_filter, index_data): + # see gh-5239 + # + # Don't parse NA-values in index unless na_filter=True + parser = all_parsers + data = "a,b,c\n1,,3\n4,5,6" + + expected = DataFrame({"a": [1, 4], "c": [3, 6]}, index=Index(index_data, name="b")) + result = parser.read_csv(StringIO(data), index_col=[1], na_filter=na_filter) + tm.assert_frame_equal(result, expected) + + +def test_inf_na_values_with_int_index(all_parsers): + # see gh-17128 + parser = all_parsers + data = "idx,col1,col2\n1,3,4\n2,inf,-inf" + + # Don't fail with OverflowError with inf's and integer index column. + out = parser.read_csv(StringIO(data), index_col=[0], na_values=["inf", "-inf"]) + expected = DataFrame( + {"col1": [3, np.nan], "col2": [4, np.nan]}, index=Index([1, 2], name="idx") + ) + tm.assert_frame_equal(out, expected) + + +@skip_pyarrow +@pytest.mark.parametrize("na_filter", [True, False]) +def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter): + # see gh-20377 + parser = all_parsers + data = "a,b,c\n1,,3\n4,5,6" + + # na_filter=True --> missing value becomes NaN. + # na_filter=False --> missing value remains empty string. + empty = np.nan if na_filter else "" + expected = DataFrame({"a": ["1", "4"], "b": [empty, "5"], "c": ["3", "6"]}) + + result = parser.read_csv(StringIO(data), na_filter=na_filter, dtype=str) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "data, na_values", + [ + ("false,1\n,1\ntrue", None), + ("false,1\nnull,1\ntrue", None), + ("false,1\nnan,1\ntrue", None), + ("false,1\nfoo,1\ntrue", "foo"), + ("false,1\nfoo,1\ntrue", ["foo"]), + ("false,1\nfoo,1\ntrue", {"a": "foo"}), + ], +) +def test_cast_NA_to_bool_raises_error(all_parsers, data, na_values): + parser = all_parsers + msg = ( + "(Bool column has NA values in column [0a])|" + "(cannot safely convert passed user dtype of " + "bool for object dtyped data in column 0)" + ) + with pytest.raises(ValueError, match=msg): + parser.read_csv( + StringIO(data), + header=None, + names=["a", "b"], + dtype={"a": "bool"}, + na_values=na_values, + ) + + +@skip_pyarrow +def test_str_nan_dropped(all_parsers): + # see gh-21131 + parser = all_parsers + + data = """File: small.csv,, +10010010233,0123,654 +foo,,bar +01001000155,4530,898""" + + result = parser.read_csv( + StringIO(data), + header=None, + names=["col1", "col2", "col3"], + dtype={"col1": str, "col2": str, "col3": str}, + ).dropna() + + expected = DataFrame( + { + "col1": ["10010010233", "01001000155"], + "col2": ["0123", "4530"], + "col3": ["654", "898"], + }, + index=[1, 3], + ) + + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_nan_multi_index(all_parsers): + # GH 42446 + parser = all_parsers + data = "A,B,B\nX,Y,Z\n1,2,inf" + + result = parser.read_csv( + StringIO(data), header=list(range(2)), na_values={("B", "Z"): "inf"} + ) + + expected = DataFrame( + { + ("A", "X"): [1], + ("B", "Y"): [2], + ("B", "Z"): [np.nan], + } + ) + + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_bool_and_nan_to_bool(all_parsers): + # GH#42808 + parser = all_parsers + data = """0 +NaN +True +False +""" + with pytest.raises(ValueError, match="NA values"): + parser.read_csv(StringIO(data), dtype="bool") + + +def test_bool_and_nan_to_int(all_parsers): + # GH#42808 + parser = all_parsers + data = """0 +NaN +True +False +""" + with pytest.raises(ValueError, match="convert|NoneType"): + parser.read_csv(StringIO(data), dtype="int") + + +def test_bool_and_nan_to_float(all_parsers): + # GH#42808 + parser = all_parsers + data = """0 +NaN +True +False +""" + result = parser.read_csv(StringIO(data), dtype="float") + expected = DataFrame.from_dict({"0": [np.nan, 1.0, 0.0]}) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_network.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_network.py new file mode 100644 index 0000000..f5470b8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_network.py @@ -0,0 +1,302 @@ +""" +Tests parsers ability to read and parse non-local files +and hence require a network connection to be read. +""" +from io import ( + BytesIO, + StringIO, +) +import logging + +import numpy as np +import pytest + +from pandas.compat import is_ci_environment +import pandas.util._test_decorators as td + +from pandas import DataFrame +import pandas._testing as tm + +import pandas.io.common as icom +from pandas.io.feather_format import read_feather +from pandas.io.parsers import read_csv + + +@pytest.mark.network +@tm.network +@pytest.mark.parametrize("mode", ["explicit", "infer"]) +@pytest.mark.parametrize("engine", ["python", "c"]) +def test_compressed_urls(salaries_table, mode, engine, compression_only): + # test reading compressed urls with various engines and + # extension inference + extension = icom._compression_to_extension[compression_only] + base_url = ( + "https://github.com/pandas-dev/pandas/raw/main/" + "pandas/tests/io/parser/data/salaries.csv" + ) + + url = base_url + extension + + if mode != "explicit": + compression_only = mode + + url_table = read_csv(url, sep="\t", compression=compression_only, engine=engine) + tm.assert_frame_equal(url_table, salaries_table) + + +@pytest.mark.network +@tm.network +def test_url_encoding_csv(): + """ + read_csv should honor the requested encoding for URLs. + + GH 10424 + """ + path = ( + "https://raw.githubusercontent.com/pandas-dev/pandas/main/" + + "pandas/tests/io/parser/data/unicode_series.csv" + ) + df = read_csv(path, encoding="latin-1", header=None) + assert df.loc[15, 1] == "Á köldum klaka (Cold Fever) (1994)" + + +@pytest.fixture +def tips_df(datapath): + """DataFrame with the tips dataset.""" + return read_csv(datapath("io", "data", "csv", "tips.csv")) + + +@pytest.mark.usefixtures("s3_resource") +@td.skip_if_not_us_locale() +class TestS3: + @td.skip_if_no("s3fs") + def test_parse_public_s3_bucket(self, tips_df, s3so): + + # more of an integration test due to the not-public contents portion + # can probably mock this though. + for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]: + df = read_csv( + "s3://pandas-test/tips.csv" + ext, + compression=comp, + storage_options=s3so, + ) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(df, tips_df) + + # Read public file from bucket with not-public contents + df = read_csv("s3://cant_get_it/tips.csv", storage_options=s3so) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(df, tips_df) + + def test_parse_public_s3n_bucket(self, tips_df, s3so): + + # Read from AWS s3 as "s3n" URL + df = read_csv("s3n://pandas-test/tips.csv", nrows=10, storage_options=s3so) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(tips_df.iloc[:10], df) + + def test_parse_public_s3a_bucket(self, tips_df, s3so): + # Read from AWS s3 as "s3a" URL + df = read_csv("s3a://pandas-test/tips.csv", nrows=10, storage_options=s3so) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(tips_df.iloc[:10], df) + + def test_parse_public_s3_bucket_nrows(self, tips_df, s3so): + for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]: + df = read_csv( + "s3://pandas-test/tips.csv" + ext, + nrows=10, + compression=comp, + storage_options=s3so, + ) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(tips_df.iloc[:10], df) + + def test_parse_public_s3_bucket_chunked(self, tips_df, s3so): + # Read with a chunksize + chunksize = 5 + for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]: + with read_csv( + "s3://pandas-test/tips.csv" + ext, + chunksize=chunksize, + compression=comp, + storage_options=s3so, + ) as df_reader: + assert df_reader.chunksize == chunksize + for i_chunk in [0, 1, 2]: + # Read a couple of chunks and make sure we see them + # properly. + df = df_reader.get_chunk() + assert isinstance(df, DataFrame) + assert not df.empty + true_df = tips_df.iloc[ + chunksize * i_chunk : chunksize * (i_chunk + 1) + ] + tm.assert_frame_equal(true_df, df) + + def test_parse_public_s3_bucket_chunked_python(self, tips_df, s3so): + # Read with a chunksize using the Python parser + chunksize = 5 + for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]: + with read_csv( + "s3://pandas-test/tips.csv" + ext, + chunksize=chunksize, + compression=comp, + engine="python", + storage_options=s3so, + ) as df_reader: + assert df_reader.chunksize == chunksize + for i_chunk in [0, 1, 2]: + # Read a couple of chunks and make sure we see them properly. + df = df_reader.get_chunk() + assert isinstance(df, DataFrame) + assert not df.empty + true_df = tips_df.iloc[ + chunksize * i_chunk : chunksize * (i_chunk + 1) + ] + tm.assert_frame_equal(true_df, df) + + def test_parse_public_s3_bucket_python(self, tips_df, s3so): + for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]: + df = read_csv( + "s3://pandas-test/tips.csv" + ext, + engine="python", + compression=comp, + storage_options=s3so, + ) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(df, tips_df) + + def test_infer_s3_compression(self, tips_df, s3so): + for ext in ["", ".gz", ".bz2"]: + df = read_csv( + "s3://pandas-test/tips.csv" + ext, + engine="python", + compression="infer", + storage_options=s3so, + ) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(df, tips_df) + + def test_parse_public_s3_bucket_nrows_python(self, tips_df, s3so): + for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]: + df = read_csv( + "s3://pandas-test/tips.csv" + ext, + engine="python", + nrows=10, + compression=comp, + storage_options=s3so, + ) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(tips_df.iloc[:10], df) + + def test_read_s3_fails(self, s3so): + msg = "The specified bucket does not exist" + with pytest.raises(OSError, match=msg): + read_csv("s3://nyqpug/asdf.csv", storage_options=s3so) + + # Receive a permission error when trying to read a private bucket. + # It's irrelevant here that this isn't actually a table. + with pytest.raises(OSError, match=msg): + read_csv("s3://cant_get_it/file.csv") + + @pytest.mark.xfail(reason="GH#39155 s3fs upgrade", strict=False) + def test_write_s3_csv_fails(self, tips_df, s3so): + # GH 32486 + # Attempting to write to an invalid S3 path should raise + import botocore + + # GH 34087 + # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html + # Catch a ClientError since AWS Service Errors are defined dynamically + error = (FileNotFoundError, botocore.exceptions.ClientError) + + with pytest.raises(error, match="The specified bucket does not exist"): + tips_df.to_csv( + "s3://an_s3_bucket_data_doesnt_exit/not_real.csv", storage_options=s3so + ) + + @pytest.mark.xfail(reason="GH#39155 s3fs upgrade", strict=False) + @td.skip_if_no("pyarrow") + def test_write_s3_parquet_fails(self, tips_df, s3so): + # GH 27679 + # Attempting to write to an invalid S3 path should raise + import botocore + + # GH 34087 + # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html + # Catch a ClientError since AWS Service Errors are defined dynamically + error = (FileNotFoundError, botocore.exceptions.ClientError) + + with pytest.raises(error, match="The specified bucket does not exist"): + tips_df.to_parquet( + "s3://an_s3_bucket_data_doesnt_exit/not_real.parquet", + storage_options=s3so, + ) + + def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file): + # see gh-16135 + + s3_object = s3_resource.meta.client.get_object( + Bucket="pandas-test", Key="tips.csv" + ) + + with BytesIO(s3_object["Body"].read()) as buffer: + result = read_csv(buffer, encoding="utf8") + assert isinstance(result, DataFrame) + assert not result.empty + + expected = read_csv(tips_file) + tm.assert_frame_equal(result, expected) + + @pytest.mark.skipif( + is_ci_environment(), + reason="This test can hang in our CI min_versions build " + "and leads to '##[error]The runner has " + "received a shutdown signal...' in GHA. GH: 45651", + ) + def test_read_csv_chunked_download(self, s3_resource, caplog, s3so): + # 8 MB, S3FS uses 5MB chunks + import s3fs + + df = DataFrame(np.random.randn(100000, 4), columns=list("abcd")) + str_buf = StringIO() + + df.to_csv(str_buf) + + buf = BytesIO(str_buf.getvalue().encode("utf-8")) + + s3_resource.Bucket("pandas-test").put_object(Key="large-file.csv", Body=buf) + + # Possibly some state leaking in between tests. + # If we don't clear this cache, we saw `GetObject operation: Forbidden`. + # Presumably the s3fs instance is being cached, with the directory listing + # from *before* we add the large-file.csv in the pandas-test bucket. + s3fs.S3FileSystem.clear_instance_cache() + + with caplog.at_level(logging.DEBUG, logger="s3fs"): + read_csv("s3://pandas-test/large-file.csv", nrows=5, storage_options=s3so) + # log of fetch_range (start, stop) + assert (0, 5505024) in (x.args[-2:] for x in caplog.records) + + def test_read_s3_with_hash_in_key(self, tips_df, s3so): + # GH 25945 + result = read_csv("s3://pandas-test/tips#1.csv", storage_options=s3so) + tm.assert_frame_equal(tips_df, result) + + @td.skip_if_no("pyarrow") + def test_read_feather_s3_file_path(self, feather_file, s3so): + # GH 29055 + expected = read_feather(feather_file) + res = read_feather( + "s3://pandas-test/simple_dataset.feather", storage_options=s3so + ) + tm.assert_frame_equal(expected, res) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_parse_dates.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_parse_dates.py new file mode 100644 index 0000000..4777bed --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_parse_dates.py @@ -0,0 +1,1983 @@ +""" +Tests date parsing functionality for all of the +parsers defined in parsers.py +""" + +from datetime import ( + date, + datetime, +) +from io import StringIO +import warnings + +from dateutil.parser import parse as du_parse +from hypothesis import given +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import parsing +from pandas._libs.tslibs.parsing import parse_datetime_string +from pandas.compat.pyarrow import pa_version_under6p0 + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + Timestamp, +) +import pandas._testing as tm +from pandas._testing._hypothesis import DATETIME_NO_TZ +from pandas.core.indexes.datetimes import date_range + +import pandas.io.date_converters as conv +from pandas.io.parsers import read_csv + +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") + +# GH#43650: Some expected failures with the pyarrow engine can occasionally +# cause a deadlock instead, so we skip these instead of xfailing +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") + +# constant +_DEFAULT_DATETIME = datetime(1, 1, 1) + + +@xfail_pyarrow +def test_read_csv_with_custom_date_parser(all_parsers): + # GH36111 + def __custom_date_parser(time): + time = time.astype(np.float_) + time = time.astype(np.int_) # convert float seconds to int type + return pd.to_timedelta(time, unit="s") + + testdata = StringIO( + """time e n h + 41047.00 -98573.7297 871458.0640 389.0089 + 41048.00 -98573.7299 871458.0640 389.0089 + 41049.00 -98573.7300 871458.0642 389.0088 + 41050.00 -98573.7299 871458.0643 389.0088 + 41051.00 -98573.7302 871458.0640 389.0086 + """ + ) + result = all_parsers.read_csv( + testdata, + delim_whitespace=True, + parse_dates=True, + date_parser=__custom_date_parser, + index_col="time", + ) + time = [41047, 41048, 41049, 41050, 41051] + time = pd.TimedeltaIndex([pd.to_timedelta(i, unit="s") for i in time], name="time") + expected = DataFrame( + { + "e": [-98573.7297, -98573.7299, -98573.7300, -98573.7299, -98573.7302], + "n": [871458.0640, 871458.0640, 871458.0642, 871458.0643, 871458.0640], + "h": [389.0089, 389.0089, 389.0088, 389.0088, 389.0086], + }, + index=time, + ) + + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_read_csv_with_custom_date_parser_parse_dates_false(all_parsers): + # GH44366 + def __custom_date_parser(time): + time = time.astype(np.float_) + time = time.astype(np.int_) # convert float seconds to int type + return pd.to_timedelta(time, unit="s") + + testdata = StringIO( + """time e + 41047.00 -93.77 + 41048.00 -95.79 + 41049.00 -98.73 + 41050.00 -93.99 + 41051.00 -97.72 + """ + ) + result = all_parsers.read_csv( + testdata, + delim_whitespace=True, + parse_dates=False, + date_parser=__custom_date_parser, + index_col="time", + ) + time = Series([41047.00, 41048.00, 41049.00, 41050.00, 41051.00], name="time") + expected = DataFrame( + {"e": [-93.77, -95.79, -98.73, -93.99, -97.72]}, + index=time, + ) + + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_separator_date_conflict(all_parsers): + # Regression test for gh-4678 + # + # Make sure thousands separator and + # date parsing do not conflict. + parser = all_parsers + data = "06-02-2013;13:00;1-000.215" + expected = DataFrame( + [[datetime(2013, 6, 2, 13, 0, 0), 1000.215]], columns=["Date", 2] + ) + + df = parser.read_csv( + StringIO(data), + sep=";", + thousands="-", + parse_dates={"Date": [0, 1]}, + header=None, + ) + tm.assert_frame_equal(df, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize("keep_date_col", [True, False]) +def test_multiple_date_col_custom(all_parsers, keep_date_col): + data = """\ +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 +""" + parser = all_parsers + + def date_parser(*date_cols): + """ + Test date parser. + + Parameters + ---------- + date_cols : args + The list of data columns to parse. + + Returns + ------- + parsed : Series + """ + return parsing.try_parse_dates(parsing.concat_date_cols(date_cols)) + + kwds = { + "header": None, + "date_parser": date_parser, + "prefix": "X", + "parse_dates": {"actual": [1, 2], "nominal": [1, 3]}, + "keep_date_col": keep_date_col, + } + result = parser.read_csv_check_warnings( + FutureWarning, + "The prefix argument has been deprecated " + "and will be removed in a future version. .*\n\n", + StringIO(data), + **kwds, + ) + + expected = DataFrame( + [ + [ + datetime(1999, 1, 27, 19, 0), + datetime(1999, 1, 27, 18, 56), + "KORD", + "19990127", + " 19:00:00", + " 18:56:00", + 0.81, + 2.81, + 7.2, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 20, 0), + datetime(1999, 1, 27, 19, 56), + "KORD", + "19990127", + " 20:00:00", + " 19:56:00", + 0.01, + 2.21, + 7.2, + 0.0, + 260.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 20, 56), + "KORD", + "19990127", + " 21:00:00", + " 20:56:00", + -0.59, + 2.21, + 5.7, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 21, 18), + "KORD", + "19990127", + " 21:00:00", + " 21:18:00", + -0.99, + 2.01, + 3.6, + 0.0, + 270.0, + ], + [ + datetime(1999, 1, 27, 22, 0), + datetime(1999, 1, 27, 21, 56), + "KORD", + "19990127", + " 22:00:00", + " 21:56:00", + -0.59, + 1.71, + 5.1, + 0.0, + 290.0, + ], + [ + datetime(1999, 1, 27, 23, 0), + datetime(1999, 1, 27, 22, 56), + "KORD", + "19990127", + " 23:00:00", + " 22:56:00", + -0.59, + 1.71, + 4.6, + 0.0, + 280.0, + ], + ], + columns=[ + "actual", + "nominal", + "X0", + "X1", + "X2", + "X3", + "X4", + "X5", + "X6", + "X7", + "X8", + ], + ) + + if not keep_date_col: + expected = expected.drop(["X1", "X2", "X3"], axis=1) + + # Python can sometimes be flaky about how + # the aggregated columns are entered, so + # this standardizes the order. + result = result[expected.columns] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("container", [list, tuple, Index, Series]) +@pytest.mark.parametrize("dim", [1, 2]) +def test_concat_date_col_fail(container, dim): + msg = "not all elements from date_cols are numpy arrays" + value = "19990127" + + date_cols = tuple(container([value]) for _ in range(dim)) + + with pytest.raises(ValueError, match=msg): + parsing.concat_date_cols(date_cols) + + +@xfail_pyarrow +@pytest.mark.parametrize("keep_date_col", [True, False]) +def test_multiple_date_col(all_parsers, keep_date_col): + data = """\ +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 +""" + parser = all_parsers + kwds = { + "header": None, + "prefix": "X", + "parse_dates": [[1, 2], [1, 3]], + "keep_date_col": keep_date_col, + } + result = parser.read_csv_check_warnings( + FutureWarning, + "The prefix argument has been deprecated " + "and will be removed in a future version. .*\n\n", + StringIO(data), + **kwds, + ) + + expected = DataFrame( + [ + [ + datetime(1999, 1, 27, 19, 0), + datetime(1999, 1, 27, 18, 56), + "KORD", + "19990127", + " 19:00:00", + " 18:56:00", + 0.81, + 2.81, + 7.2, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 20, 0), + datetime(1999, 1, 27, 19, 56), + "KORD", + "19990127", + " 20:00:00", + " 19:56:00", + 0.01, + 2.21, + 7.2, + 0.0, + 260.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 20, 56), + "KORD", + "19990127", + " 21:00:00", + " 20:56:00", + -0.59, + 2.21, + 5.7, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 21, 18), + "KORD", + "19990127", + " 21:00:00", + " 21:18:00", + -0.99, + 2.01, + 3.6, + 0.0, + 270.0, + ], + [ + datetime(1999, 1, 27, 22, 0), + datetime(1999, 1, 27, 21, 56), + "KORD", + "19990127", + " 22:00:00", + " 21:56:00", + -0.59, + 1.71, + 5.1, + 0.0, + 290.0, + ], + [ + datetime(1999, 1, 27, 23, 0), + datetime(1999, 1, 27, 22, 56), + "KORD", + "19990127", + " 23:00:00", + " 22:56:00", + -0.59, + 1.71, + 4.6, + 0.0, + 280.0, + ], + ], + columns=[ + "X1_X2", + "X1_X3", + "X0", + "X1", + "X2", + "X3", + "X4", + "X5", + "X6", + "X7", + "X8", + ], + ) + + if not keep_date_col: + expected = expected.drop(["X1", "X2", "X3"], axis=1) + + tm.assert_frame_equal(result, expected) + + +def test_date_col_as_index_col(all_parsers): + data = """\ +KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +""" + parser = all_parsers + kwds = {"header": None, "prefix": "X", "parse_dates": [1], "index_col": 1} + result = parser.read_csv_check_warnings( + FutureWarning, + "The prefix argument has been deprecated " + "and will be removed in a future version. .*\n\n", + StringIO(data), + **kwds, + ) + + index = Index( + [ + datetime(1999, 1, 27, 19, 0), + datetime(1999, 1, 27, 20, 0), + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 22, 0), + ], + name="X1", + ) + expected = DataFrame( + [ + ["KORD", " 18:56:00", 0.81, 2.81, 7.2, 0.0, 280.0], + ["KORD", " 19:56:00", 0.01, 2.21, 7.2, 0.0, 260.0], + ["KORD", " 20:56:00", -0.59, 2.21, 5.7, 0.0, 280.0], + ["KORD", " 21:18:00", -0.99, 2.01, 3.6, 0.0, 270.0], + ["KORD", " 21:56:00", -0.59, 1.71, 5.1, 0.0, 290.0], + ], + columns=["X0", "X2", "X3", "X4", "X5", "X6", "X7"], + index=index, + ) + if parser.engine == "pyarrow" and not pa_version_under6p0: + # https://github.com/pandas-dev/pandas/issues/44231 + # pyarrow 6.0 starts to infer time type + expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time + + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize( + "date_parser, warning", + ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]), +) +def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning): + data = ( + "KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" + "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" + "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" + "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" + "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" + "KORD,19990127, 23:00:00, 22:56:00, -0.5900" + ) + parse_dates = {"actual": [1, 2], "nominal": [1, 3]} + parser = all_parsers + + kwds = { + "header": None, + "prefix": "X", + "parse_dates": parse_dates, + "date_parser": date_parser, + } + result = parser.read_csv_check_warnings( + FutureWarning, + "The prefix argument has been deprecated " + "and will be removed in a future version. .*\n\n", + StringIO(data), + **kwds, + ) + + expected = DataFrame( + [ + [datetime(1999, 1, 27, 19, 0), datetime(1999, 1, 27, 18, 56), "KORD", 0.81], + [datetime(1999, 1, 27, 20, 0), datetime(1999, 1, 27, 19, 56), "KORD", 0.01], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 20, 56), + "KORD", + -0.59, + ], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 21, 18), + "KORD", + -0.99, + ], + [ + datetime(1999, 1, 27, 22, 0), + datetime(1999, 1, 27, 21, 56), + "KORD", + -0.59, + ], + [ + datetime(1999, 1, 27, 23, 0), + datetime(1999, 1, 27, 22, 56), + "KORD", + -0.59, + ], + ], + columns=["actual", "nominal", "X0", "X4"], + ) + + # Python can sometimes be flaky about how + # the aggregated columns are entered, so + # this standardizes the order. + result = result[expected.columns] + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_multiple_date_col_timestamp_parse(all_parsers): + parser = all_parsers + data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25 +05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25""" + + result = parser.read_csv( + StringIO(data), parse_dates=[[0, 1]], header=None, date_parser=Timestamp + ) + expected = DataFrame( + [ + [ + Timestamp("05/31/2012, 15:30:00.029"), + 1306.25, + 1, + "E", + 0, + np.nan, + 1306.25, + ], + [ + Timestamp("05/31/2012, 15:30:00.029"), + 1306.25, + 8, + "E", + 0, + np.nan, + 1306.25, + ], + ], + columns=["0_1", 2, 3, 4, 5, 6, 7], + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_multiple_date_cols_with_header(all_parsers): + parser = all_parsers + data = """\ +ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" + + result = parser.read_csv(StringIO(data), parse_dates={"nominal": [1, 2]}) + expected = DataFrame( + [ + [ + datetime(1999, 1, 27, 19, 0), + "KORD", + " 18:56:00", + 0.81, + 2.81, + 7.2, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 20, 0), + "KORD", + " 19:56:00", + 0.01, + 2.21, + 7.2, + 0.0, + 260.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + "KORD", + " 20:56:00", + -0.59, + 2.21, + 5.7, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + "KORD", + " 21:18:00", + -0.99, + 2.01, + 3.6, + 0.0, + 270.0, + ], + [ + datetime(1999, 1, 27, 22, 0), + "KORD", + " 21:56:00", + -0.59, + 1.71, + 5.1, + 0.0, + 290.0, + ], + [ + datetime(1999, 1, 27, 23, 0), + "KORD", + " 22:56:00", + -0.59, + 1.71, + 4.6, + 0.0, + 280.0, + ], + ], + columns=[ + "nominal", + "ID", + "ActualTime", + "TDew", + "TAir", + "Windspeed", + "Precip", + "WindDir", + ], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,parse_dates,msg", + [ + ( + """\ +date_NominalTime,date,NominalTime +KORD1,19990127, 19:00:00 +KORD2,19990127, 20:00:00""", + [[1, 2]], + ("New date column already in dict date_NominalTime"), + ), + ( + """\ +ID,date,nominalTime +KORD,19990127, 19:00:00 +KORD,19990127, 20:00:00""", + {"ID": [1, 2]}, + "Date column ID already in dict", + ), + ], +) +def test_multiple_date_col_name_collision(all_parsers, data, parse_dates, msg): + parser = all_parsers + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), parse_dates=parse_dates) + + +def test_date_parser_int_bug(all_parsers): + # see gh-3071 + parser = all_parsers + data = ( + "posix_timestamp,elapsed,sys,user,queries,query_time,rows," + "accountid,userid,contactid,level,silo,method\n" + "1343103150,0.062353,0,4,6,0.01690,3," + "12345,1,-1,3,invoice_InvoiceResource,search\n" + ) + + result = parser.read_csv( + StringIO(data), + index_col=0, + parse_dates=[0], + date_parser=lambda x: datetime.utcfromtimestamp(int(x)), + ) + expected = DataFrame( + [ + [ + 0.062353, + 0, + 4, + 6, + 0.01690, + 3, + 12345, + 1, + -1, + 3, + "invoice_InvoiceResource", + "search", + ] + ], + columns=[ + "elapsed", + "sys", + "user", + "queries", + "query_time", + "rows", + "accountid", + "userid", + "contactid", + "level", + "silo", + "method", + ], + index=Index([Timestamp("2012-07-24 04:12:30")], name="posix_timestamp"), + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_nat_parse(all_parsers): + # see gh-3062 + parser = all_parsers + df = DataFrame( + dict({"A": np.arange(10, dtype="float64"), "B": Timestamp("20010101")}) + ) + df.iloc[3:6, :] = np.nan + + with tm.ensure_clean("__nat_parse_.csv") as path: + df.to_csv(path) + + result = parser.read_csv(path, index_col=0, parse_dates=["B"]) + tm.assert_frame_equal(result, df) + + +@xfail_pyarrow +def test_csv_custom_parser(all_parsers): + data = """A,B,C +20090101,a,1,2 +20090102,b,3,4 +20090103,c,4,5 +""" + parser = all_parsers + result = parser.read_csv( + StringIO(data), date_parser=lambda x: datetime.strptime(x, "%Y%m%d") + ) + expected = parser.read_csv(StringIO(data), parse_dates=True) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_parse_dates_implicit_first_col(all_parsers): + data = """A,B,C +20090101,a,1,2 +20090102,b,3,4 +20090103,c,4,5 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), parse_dates=True) + + expected = parser.read_csv(StringIO(data), index_col=0, parse_dates=True) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_parse_dates_string(all_parsers): + data = """date,A,B,C +20090101,a,1,2 +20090102,b,3,4 +20090103,c,4,5 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col="date", parse_dates=["date"]) + # freq doesn't round-trip + index = DatetimeIndex( + list(date_range("1/1/2009", periods=3)), name="date", freq=None + ) + + expected = DataFrame( + {"A": ["a", "b", "c"], "B": [1, 3, 4], "C": [2, 4, 5]}, index=index + ) + tm.assert_frame_equal(result, expected) + + +# Bug in https://github.com/dateutil/dateutil/issues/217 +# has been addressed, but we just don't pass in the `yearfirst` +@pytest.mark.xfail(reason="yearfirst is not surfaced in read_*") +@pytest.mark.parametrize("parse_dates", [[["date", "time"]], [[0, 1]]]) +def test_yy_format_with_year_first(all_parsers, parse_dates): + data = """date,time,B,C +090131,0010,1,2 +090228,1020,3,4 +090331,0830,5,6 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=0, parse_dates=parse_dates) + index = DatetimeIndex( + [ + datetime(2009, 1, 31, 0, 10, 0), + datetime(2009, 2, 28, 10, 20, 0), + datetime(2009, 3, 31, 8, 30, 0), + ], + dtype=object, + name="date_time", + ) + expected = DataFrame({"B": [1, 3, 5], "C": [2, 4, 6]}, index=index) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize("parse_dates", [[0, 2], ["a", "c"]]) +def test_parse_dates_column_list(all_parsers, parse_dates): + data = "a,b,c\n01/01/2010,1,15/02/2010" + parser = all_parsers + + expected = DataFrame( + {"a": [datetime(2010, 1, 1)], "b": [1], "c": [datetime(2010, 2, 15)]} + ) + expected = expected.set_index(["a", "b"]) + + result = parser.read_csv( + StringIO(data), index_col=[0, 1], parse_dates=parse_dates, dayfirst=True + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize("index_col", [[0, 1], [1, 0]]) +def test_multi_index_parse_dates(all_parsers, index_col): + data = """index1,index2,A,B,C +20090101,one,a,1,2 +20090101,two,b,3,4 +20090101,three,c,4,5 +20090102,one,a,1,2 +20090102,two,b,3,4 +20090102,three,c,4,5 +20090103,one,a,1,2 +20090103,two,b,3,4 +20090103,three,c,4,5 +""" + parser = all_parsers + index = MultiIndex.from_product( + [ + (datetime(2009, 1, 1), datetime(2009, 1, 2), datetime(2009, 1, 3)), + ("one", "two", "three"), + ], + names=["index1", "index2"], + ) + + # Out of order. + if index_col == [1, 0]: + index = index.swaplevel(0, 1) + + expected = DataFrame( + [ + ["a", 1, 2], + ["b", 3, 4], + ["c", 4, 5], + ["a", 1, 2], + ["b", 3, 4], + ["c", 4, 5], + ["a", 1, 2], + ["b", 3, 4], + ["c", 4, 5], + ], + columns=["A", "B", "C"], + index=index, + ) + result = parser.read_csv(StringIO(data), index_col=index_col, parse_dates=True) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize("kwargs", [{"dayfirst": True}, {"day_first": True}]) +def test_parse_dates_custom_euro_format(all_parsers, kwargs): + parser = all_parsers + data = """foo,bar,baz +31/01/2010,1,2 +01/02/2010,1,NA +02/02/2010,1,2 +""" + if "dayfirst" in kwargs: + df = parser.read_csv( + StringIO(data), + names=["time", "Q", "NTU"], + date_parser=lambda d: du_parse(d, **kwargs), + header=0, + index_col=0, + parse_dates=True, + na_values=["NA"], + ) + exp_index = Index( + [datetime(2010, 1, 31), datetime(2010, 2, 1), datetime(2010, 2, 2)], + name="time", + ) + expected = DataFrame( + {"Q": [1, 1, 1], "NTU": [2, np.nan, 2]}, + index=exp_index, + columns=["Q", "NTU"], + ) + tm.assert_frame_equal(df, expected) + else: + msg = "got an unexpected keyword argument 'day_first'" + with pytest.raises(TypeError, match=msg), tm.assert_produces_warning( + FutureWarning + ): + parser.read_csv( + StringIO(data), + names=["time", "Q", "NTU"], + date_parser=lambda d: du_parse(d, **kwargs), + skiprows=[0], + index_col=0, + parse_dates=True, + na_values=["NA"], + ) + + +@xfail_pyarrow +def test_parse_tz_aware(all_parsers): + # See gh-1693 + parser = all_parsers + data = "Date,x\n2012-06-13T01:39:00Z,0.5" + + result = parser.read_csv(StringIO(data), index_col=0, parse_dates=True) + expected = DataFrame( + {"x": [0.5]}, index=Index([Timestamp("2012-06-13 01:39:00+00:00")], name="Date") + ) + tm.assert_frame_equal(result, expected) + assert result.index.tz is pytz.utc + + +@xfail_pyarrow +@pytest.mark.parametrize( + "parse_dates,index_col", + [({"nominal": [1, 2]}, "nominal"), ({"nominal": [1, 2]}, 0), ([[1, 2]], 0)], +) +def test_multiple_date_cols_index(all_parsers, parse_dates, index_col): + parser = all_parsers + data = """ +ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir +KORD1,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD2,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD3,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD4,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 +""" + expected = DataFrame( + [ + [ + datetime(1999, 1, 27, 19, 0), + "KORD1", + " 18:56:00", + 0.81, + 2.81, + 7.2, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 20, 0), + "KORD2", + " 19:56:00", + 0.01, + 2.21, + 7.2, + 0.0, + 260.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + "KORD3", + " 20:56:00", + -0.59, + 2.21, + 5.7, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + "KORD4", + " 21:18:00", + -0.99, + 2.01, + 3.6, + 0.0, + 270.0, + ], + [ + datetime(1999, 1, 27, 22, 0), + "KORD5", + " 21:56:00", + -0.59, + 1.71, + 5.1, + 0.0, + 290.0, + ], + [ + datetime(1999, 1, 27, 23, 0), + "KORD6", + " 22:56:00", + -0.59, + 1.71, + 4.6, + 0.0, + 280.0, + ], + ], + columns=[ + "nominal", + "ID", + "ActualTime", + "TDew", + "TAir", + "Windspeed", + "Precip", + "WindDir", + ], + ) + expected = expected.set_index("nominal") + + if not isinstance(parse_dates, dict): + expected.index.name = "date_NominalTime" + + result = parser.read_csv( + StringIO(data), parse_dates=parse_dates, index_col=index_col + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_multiple_date_cols_chunked(all_parsers): + parser = all_parsers + data = """\ +ID,date,nominalTime,actualTime,A,B,C,D,E +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 +""" + + expected = DataFrame( + [ + [ + datetime(1999, 1, 27, 19, 0), + "KORD", + " 18:56:00", + 0.81, + 2.81, + 7.2, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 20, 0), + "KORD", + " 19:56:00", + 0.01, + 2.21, + 7.2, + 0.0, + 260.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + "KORD", + " 20:56:00", + -0.59, + 2.21, + 5.7, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + "KORD", + " 21:18:00", + -0.99, + 2.01, + 3.6, + 0.0, + 270.0, + ], + [ + datetime(1999, 1, 27, 22, 0), + "KORD", + " 21:56:00", + -0.59, + 1.71, + 5.1, + 0.0, + 290.0, + ], + [ + datetime(1999, 1, 27, 23, 0), + "KORD", + " 22:56:00", + -0.59, + 1.71, + 4.6, + 0.0, + 280.0, + ], + ], + columns=["nominal", "ID", "actualTime", "A", "B", "C", "D", "E"], + ) + expected = expected.set_index("nominal") + + with parser.read_csv( + StringIO(data), + parse_dates={"nominal": [1, 2]}, + index_col="nominal", + chunksize=2, + ) as reader: + chunks = list(reader) + + tm.assert_frame_equal(chunks[0], expected[:2]) + tm.assert_frame_equal(chunks[1], expected[2:4]) + tm.assert_frame_equal(chunks[2], expected[4:]) + + +@xfail_pyarrow +def test_multiple_date_col_named_index_compat(all_parsers): + parser = all_parsers + data = """\ +ID,date,nominalTime,actualTime,A,B,C,D,E +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 +""" + + with_indices = parser.read_csv( + StringIO(data), parse_dates={"nominal": [1, 2]}, index_col="nominal" + ) + with_names = parser.read_csv( + StringIO(data), + index_col="nominal", + parse_dates={"nominal": ["date", "nominalTime"]}, + ) + tm.assert_frame_equal(with_indices, with_names) + + +@xfail_pyarrow +def test_multiple_date_col_multiple_index_compat(all_parsers): + parser = all_parsers + data = """\ +ID,date,nominalTime,actualTime,A,B,C,D,E +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 +""" + result = parser.read_csv( + StringIO(data), index_col=["nominal", "ID"], parse_dates={"nominal": [1, 2]} + ) + expected = parser.read_csv(StringIO(data), parse_dates={"nominal": [1, 2]}) + + expected = expected.set_index(["nominal", "ID"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("kwargs", [{}, {"index_col": "C"}]) +def test_read_with_parse_dates_scalar_non_bool(all_parsers, kwargs): + # see gh-5636 + parser = all_parsers + msg = ( + "Only booleans, lists, and dictionaries " + "are accepted for the 'parse_dates' parameter" + ) + data = """A,B,C + 1,2,2003-11-1""" + + with pytest.raises(TypeError, match=msg): + parser.read_csv(StringIO(data), parse_dates="C", **kwargs) + + +@pytest.mark.parametrize("parse_dates", [(1,), np.array([4, 5]), {1, 3, 3}]) +def test_read_with_parse_dates_invalid_type(all_parsers, parse_dates): + parser = all_parsers + msg = ( + "Only booleans, lists, and dictionaries " + "are accepted for the 'parse_dates' parameter" + ) + data = """A,B,C + 1,2,2003-11-1""" + + with pytest.raises(TypeError, match=msg): + parser.read_csv(StringIO(data), parse_dates=(1,)) + + +@pytest.mark.parametrize("cache_dates", [True, False]) +@pytest.mark.parametrize("value", ["nan", "0", ""]) +def test_bad_date_parse(all_parsers, cache_dates, value): + # if we have an invalid date make sure that we handle this with + # and w/o the cache properly + parser = all_parsers + s = StringIO((f"{value},\n") * 50000) + + parser.read_csv( + s, + header=None, + names=["foo", "bar"], + parse_dates=["foo"], + infer_datetime_format=False, + cache_dates=cache_dates, + ) + + +@xfail_pyarrow +def test_parse_dates_empty_string(all_parsers): + # see gh-2263 + parser = all_parsers + data = "Date,test\n2012-01-01,1\n,2" + result = parser.read_csv(StringIO(data), parse_dates=["Date"], na_filter=False) + + expected = DataFrame( + [[datetime(2012, 1, 1), 1], [pd.NaT, 2]], columns=["Date", "test"] + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + ( + "a\n04.15.2016", + {"parse_dates": ["a"]}, + DataFrame([datetime(2016, 4, 15)], columns=["a"]), + ), + ( + "a\n04.15.2016", + {"parse_dates": True, "index_col": 0}, + DataFrame(index=DatetimeIndex(["2016-04-15"], name="a")), + ), + ( + "a,b\n04.15.2016,09.16.2013", + {"parse_dates": ["a", "b"]}, + DataFrame( + [[datetime(2016, 4, 15), datetime(2013, 9, 16)]], columns=["a", "b"] + ), + ), + ( + "a,b\n04.15.2016,09.16.2013", + {"parse_dates": True, "index_col": [0, 1]}, + DataFrame( + index=MultiIndex.from_tuples( + [(datetime(2016, 4, 15), datetime(2013, 9, 16))], names=["a", "b"] + ) + ), + ), + ], +) +def test_parse_dates_no_convert_thousands(all_parsers, data, kwargs, expected): + # see gh-14066 + parser = all_parsers + + result = parser.read_csv(StringIO(data), thousands=".", **kwargs) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize( + "date_parser, warning", + ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]), +) +def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warning): + data = """\ +D,T,A,B +date, time,a,b +2001-01-05, 09:00:00, 0.0, 10. +2001-01-06, 00:00:00, 1.0, 11. +""" + parser = all_parsers + with tm.assert_produces_warning(warning, check_stacklevel=False): + result = parser.read_csv( + StringIO(data), + header=[0, 1], + parse_dates={"date_time": [0, 1]}, + date_parser=date_parser, + ) + + expected_data = [ + [datetime(2001, 1, 5, 9, 0, 0), 0.0, 10.0], + [datetime(2001, 1, 6, 0, 0, 0), 1.0, 11.0], + ] + expected = DataFrame(expected_data, columns=["date_time", ("A", "a"), ("B", "b")]) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize( + "date_parser, warning", + ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]), +) +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + ( + """\ +date,time,a,b +2001-01-05, 10:00:00, 0.0, 10. +2001-01-05, 00:00:00, 1., 11. +""", + {"header": 0, "parse_dates": {"date_time": [0, 1]}}, + DataFrame( + [ + [datetime(2001, 1, 5, 10, 0, 0), 0.0, 10], + [datetime(2001, 1, 5, 0, 0, 0), 1.0, 11.0], + ], + columns=["date_time", "a", "b"], + ), + ), + ( + ( + "KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" + "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" + "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" + "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" + "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" + "KORD,19990127, 23:00:00, 22:56:00, -0.5900" + ), + {"header": None, "parse_dates": {"actual": [1, 2], "nominal": [1, 3]}}, + DataFrame( + [ + [ + datetime(1999, 1, 27, 19, 0), + datetime(1999, 1, 27, 18, 56), + "KORD", + 0.81, + ], + [ + datetime(1999, 1, 27, 20, 0), + datetime(1999, 1, 27, 19, 56), + "KORD", + 0.01, + ], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 20, 56), + "KORD", + -0.59, + ], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 21, 18), + "KORD", + -0.99, + ], + [ + datetime(1999, 1, 27, 22, 0), + datetime(1999, 1, 27, 21, 56), + "KORD", + -0.59, + ], + [ + datetime(1999, 1, 27, 23, 0), + datetime(1999, 1, 27, 22, 56), + "KORD", + -0.59, + ], + ], + columns=["actual", "nominal", 0, 4], + ), + ), + ], +) +def test_parse_date_time(all_parsers, data, kwargs, expected, date_parser, warning): + parser = all_parsers + with tm.assert_produces_warning(warning, check_stacklevel=False): + result = parser.read_csv(StringIO(data), date_parser=date_parser, **kwargs) + + # Python can sometimes be flaky about how + # the aggregated columns are entered, so + # this standardizes the order. + result = result[expected.columns] + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize( + "date_parser, warning", + ([conv.parse_date_fields, FutureWarning], [pd.to_datetime, None]), +) +def test_parse_date_fields(all_parsers, date_parser, warning): + parser = all_parsers + data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11." + with tm.assert_produces_warning(warning, check_stacklevel=False): + result = parser.read_csv( + StringIO(data), + header=0, + parse_dates={"ymd": [0, 1, 2]}, + date_parser=date_parser, + ) + + expected = DataFrame( + [[datetime(2001, 1, 10), 10.0], [datetime(2001, 2, 1), 11.0]], + columns=["ymd", "a"], + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize( + "date_parser, warning", + ( + [conv.parse_all_fields, FutureWarning], + [lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S"), None], + ), +) +def test_parse_date_all_fields(all_parsers, date_parser, warning): + parser = all_parsers + data = """\ +year,month,day,hour,minute,second,a,b +2001,01,05,10,00,0,0.0,10. +2001,01,5,10,0,00,1.,11. +""" + with tm.assert_produces_warning(warning, check_stacklevel=False): + result = parser.read_csv( + StringIO(data), + header=0, + date_parser=date_parser, + parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]}, + ) + expected = DataFrame( + [ + [datetime(2001, 1, 5, 10, 0, 0), 0.0, 10.0], + [datetime(2001, 1, 5, 10, 0, 0), 1.0, 11.0], + ], + columns=["ymdHMS", "a", "b"], + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize( + "date_parser, warning", + ( + [conv.parse_all_fields, FutureWarning], + [lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S.%f"), None], + ), +) +def test_datetime_fractional_seconds(all_parsers, date_parser, warning): + parser = all_parsers + data = """\ +year,month,day,hour,minute,second,a,b +2001,01,05,10,00,0.123456,0.0,10. +2001,01,5,10,0,0.500000,1.,11. +""" + with tm.assert_produces_warning(warning, check_stacklevel=False): + result = parser.read_csv( + StringIO(data), + header=0, + date_parser=date_parser, + parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]}, + ) + expected = DataFrame( + [ + [datetime(2001, 1, 5, 10, 0, 0, microsecond=123456), 0.0, 10.0], + [datetime(2001, 1, 5, 10, 0, 0, microsecond=500000), 1.0, 11.0], + ], + columns=["ymdHMS", "a", "b"], + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_generic(all_parsers): + parser = all_parsers + data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11." + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = parser.read_csv( + StringIO(data), + header=0, + parse_dates={"ym": [0, 1]}, + date_parser=lambda y, m: date(year=int(y), month=int(m), day=1), + ) + expected = DataFrame( + [[date(2001, 1, 1), 10, 10.0], [date(2001, 2, 1), 1, 11.0]], + columns=["ym", "day", "a"], + ) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_date_parser_resolution_if_not_ns(all_parsers): + # see gh-10245 + parser = all_parsers + data = """\ +date,time,prn,rxstatus +2013-11-03,19:00:00,126,00E80000 +2013-11-03,19:00:00,23,00E80000 +2013-11-03,19:00:00,13,00E80000 +""" + + def date_parser(dt, time): + return np.array(dt + "T" + time, dtype="datetime64[s]") + + result = parser.read_csv( + StringIO(data), + date_parser=date_parser, + parse_dates={"datetime": ["date", "time"]}, + index_col=["datetime", "prn"], + ) + + datetimes = np.array(["2013-11-03T19:00:00"] * 3, dtype="datetime64[s]") + expected = DataFrame( + data={"rxstatus": ["00E80000"] * 3}, + index=MultiIndex.from_tuples( + [(datetimes[0], 126), (datetimes[1], 23), (datetimes[2], 13)], + names=["datetime", "prn"], + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_parse_date_column_with_empty_string(all_parsers): + # see gh-6428 + parser = all_parsers + data = "case,opdate\n7,10/18/2006\n7,10/18/2008\n621, " + result = parser.read_csv(StringIO(data), parse_dates=["opdate"]) + + expected_data = [[7, "10/18/2006"], [7, "10/18/2008"], [621, " "]] + expected = DataFrame(expected_data, columns=["case", "opdate"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,expected", + [ + ( + "a\n135217135789158401\n1352171357E+5", + DataFrame({"a": [135217135789158401, 135217135700000]}, dtype="float64"), + ), + ( + "a\n99999999999\n123456789012345\n1234E+0", + DataFrame({"a": [99999999999, 123456789012345, 1234]}, dtype="float64"), + ), + ], +) +@pytest.mark.parametrize("parse_dates", [True, False]) +def test_parse_date_float(all_parsers, data, expected, parse_dates): + # see gh-2697 + # + # Date parsing should fail, so we leave the data untouched + # (i.e. float precision should remain unchanged). + parser = all_parsers + + result = parser.read_csv(StringIO(data), parse_dates=parse_dates) + tm.assert_frame_equal(result, expected) + + +def test_parse_timezone(all_parsers): + # see gh-22256 + parser = all_parsers + data = """dt,val + 2018-01-04 09:01:00+09:00,23350 + 2018-01-04 09:02:00+09:00,23400 + 2018-01-04 09:03:00+09:00,23400 + 2018-01-04 09:04:00+09:00,23400 + 2018-01-04 09:05:00+09:00,23400""" + result = parser.read_csv(StringIO(data), parse_dates=["dt"]) + + dti = DatetimeIndex( + list( + date_range( + start="2018-01-04 09:01:00", + end="2018-01-04 09:05:00", + freq="1min", + tz=pytz.FixedOffset(540), + ) + ), + freq=None, + ) + expected_data = {"dt": dti, "val": [23350, 23400, 23400, 23400, 23400]} + + expected = DataFrame(expected_data) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "date_string", + ["32/32/2019", "02/30/2019", "13/13/2019", "13/2019", "a3/11/2018", "10/11/2o17"], +) +def test_invalid_parse_delimited_date(all_parsers, date_string): + parser = all_parsers + expected = DataFrame({0: [date_string]}, dtype="object") + result = parser.read_csv(StringIO(date_string), header=None, parse_dates=[0]) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "date_string,dayfirst,expected", + [ + # %d/%m/%Y; month > 12 thus replacement + ("13/02/2019", True, datetime(2019, 2, 13)), + # %m/%d/%Y; day > 12 thus there will be no replacement + ("02/13/2019", False, datetime(2019, 2, 13)), + # %d/%m/%Y; dayfirst==True thus replacement + ("04/02/2019", True, datetime(2019, 2, 4)), + ], +) +def test_parse_delimited_date_swap_no_warning( + all_parsers, date_string, dayfirst, expected +): + parser = all_parsers + expected = DataFrame({0: [expected]}, dtype="datetime64[ns]") + result = parser.read_csv( + StringIO(date_string), header=None, dayfirst=dayfirst, parse_dates=[0] + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "date_string,dayfirst,expected", + [ + # %d/%m/%Y; month > 12 thus replacement + ("13/02/2019", False, datetime(2019, 2, 13)), + # %m/%d/%Y; day > 12 thus there will be no replacement + ("02/13/2019", True, datetime(2019, 2, 13)), + ], +) +def test_parse_delimited_date_swap_with_warning( + all_parsers, date_string, dayfirst, expected +): + parser = all_parsers + expected = DataFrame({0: [expected]}, dtype="datetime64[ns]") + warning_msg = ( + "Provide format or specify infer_datetime_format=True for consistent parsing" + ) + with tm.assert_produces_warning(UserWarning, match=warning_msg): + result = parser.read_csv( + StringIO(date_string), header=None, dayfirst=dayfirst, parse_dates=[0] + ) + tm.assert_frame_equal(result, expected) + + +def _helper_hypothesis_delimited_date(call, date_string, **kwargs): + msg, result = None, None + try: + result = call(date_string, **kwargs) + except ValueError as er: + msg = str(er) + pass + return msg, result + + +@skip_pyarrow +@given(DATETIME_NO_TZ) +@pytest.mark.parametrize("delimiter", list(" -./")) +@pytest.mark.parametrize("dayfirst", [True, False]) +@pytest.mark.parametrize( + "date_format", + ["%d %m %Y", "%m %d %Y", "%m %Y", "%Y %m %d", "%y %m %d", "%Y%m%d", "%y%m%d"], +) +def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, test_datetime): + if date_format == "%m %Y" and delimiter == ".": + pytest.skip( + "parse_datetime_string cannot reliably tell whether " + "e.g. %m.%Y is a float or a date, thus we skip it" + ) + result, expected = None, None + except_in_dateutil, except_out_dateutil = None, None + date_string = test_datetime.strftime(date_format.replace(" ", delimiter)) + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + except_out_dateutil, result = _helper_hypothesis_delimited_date( + parse_datetime_string, date_string, dayfirst=dayfirst + ) + except_in_dateutil, expected = _helper_hypothesis_delimited_date( + du_parse, + date_string, + default=_DEFAULT_DATETIME, + dayfirst=dayfirst, + yearfirst=False, + ) + + assert except_out_dateutil == except_in_dateutil + assert result == expected + + +@skip_pyarrow +@pytest.mark.parametrize( + "names, usecols, parse_dates, missing_cols", + [ + (None, ["val"], ["date", "time"], "date, time"), + (None, ["val"], [0, "time"], "time"), + (None, ["val"], [["date", "time"]], "date, time"), + (None, ["val"], [[0, "time"]], "time"), + (None, ["val"], {"date": [0, "time"]}, "time"), + (None, ["val"], {"date": ["date", "time"]}, "date, time"), + (None, ["val"], [["date", "time"], "date"], "date, time"), + (["date1", "time1", "temperature"], None, ["date", "time"], "date, time"), + ( + ["date1", "time1", "temperature"], + ["date1", "temperature"], + ["date1", "time"], + "time", + ), + ], +) +def test_missing_parse_dates_column_raises( + all_parsers, names, usecols, parse_dates, missing_cols +): + # gh-31251 column names provided in parse_dates could be missing. + parser = all_parsers + content = StringIO("date,time,val\n2020-01-31,04:20:32,32\n") + msg = f"Missing column provided to 'parse_dates': '{missing_cols}'" + with pytest.raises(ValueError, match=msg): + parser.read_csv( + content, sep=",", names=names, usecols=usecols, parse_dates=parse_dates + ) + + +@skip_pyarrow +def test_date_parser_and_names(all_parsers): + # GH#33699 + parser = all_parsers + data = StringIO("""x,y\n1,2""") + result = parser.read_csv(data, parse_dates=["B"], names=["B"]) + expected = DataFrame({"B": ["y", "2"]}, index=["x", "1"]) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_date_parser_multiindex_columns(all_parsers): + parser = all_parsers + data = """a,b +1,2 +2019-12-31,6""" + result = parser.read_csv(StringIO(data), parse_dates=[("a", "1")], header=[0, 1]) + expected = DataFrame({("a", "1"): Timestamp("2019-12-31"), ("b", "2"): [6]}) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +@pytest.mark.parametrize( + "parse_spec, col_name", + [ + ([[("a", "1"), ("b", "2")]], ("a_b", "1_2")), + ({("foo", "1"): [("a", "1"), ("b", "2")]}, ("foo", "1")), + ], +) +def test_date_parser_multiindex_columns_combine_cols(all_parsers, parse_spec, col_name): + parser = all_parsers + data = """a,b,c +1,2,3 +2019-12,-31,6""" + result = parser.read_csv( + StringIO(data), + parse_dates=parse_spec, + header=[0, 1], + ) + expected = DataFrame({col_name: Timestamp("2019-12-31"), ("c", "3"): [6]}) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_date_parser_usecols_thousands(all_parsers): + # GH#39365 + data = """A,B,C + 1,3,20-09-01-01 + 2,4,20-09-01-01 + """ + + parser = all_parsers + result = parser.read_csv( + StringIO(data), + parse_dates=[1], + usecols=[1, 2], + thousands="-", + ) + expected = DataFrame({"B": [3, 4], "C": [Timestamp("20-09-2001 01:00:00")] * 2}) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_parse_dates_and_keep_orgin_column(all_parsers): + # GH#13378 + parser = all_parsers + data = """A +20150908 +20150909 +""" + result = parser.read_csv( + StringIO(data), parse_dates={"date": ["A"]}, keep_date_col=True + ) + expected_data = [Timestamp("2015-09-08"), Timestamp("2015-09-09")] + expected = DataFrame({"date": expected_data, "A": expected_data}) + tm.assert_frame_equal(result, expected) + + +def test_dayfirst_warnings(): + # GH 12585 + warning_msg_day_first = ( + "Parsing '31/12/2014' in DD/MM/YYYY format. Provide " + "format or specify infer_datetime_format=True for consistent parsing." + ) + warning_msg_month_first = ( + "Parsing '03/30/2011' in MM/DD/YYYY format. Provide " + "format or specify infer_datetime_format=True for consistent parsing." + ) + + # CASE 1: valid input + input = "date\n31/12/2014\n10/03/2011" + expected_consistent = DatetimeIndex( + ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None, name="date" + ) + expected_inconsistent = DatetimeIndex( + ["2014-12-31", "2011-10-03"], dtype="datetime64[ns]", freq=None, name="date" + ) + + # A. dayfirst arg correct, no warning + res1 = read_csv( + StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date" + ).index + tm.assert_index_equal(expected_consistent, res1) + + # B. dayfirst arg incorrect, warning + incorrect output + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res2 = read_csv( + StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date" + ).index + tm.assert_index_equal(expected_inconsistent, res2) + + # C. dayfirst default arg, same as B + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res3 = read_csv( + StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date" + ).index + tm.assert_index_equal(expected_inconsistent, res3) + + # D. infer_datetime_format=True overrides dayfirst default + # no warning + correct result + res4 = read_csv( + StringIO(input), + parse_dates=["date"], + infer_datetime_format=True, + index_col="date", + ).index + tm.assert_index_equal(expected_consistent, res4) + + # CASE 2: invalid input + # cannot consistently process with single format + # warnings *always* raised + + # first in DD/MM/YYYY, second in MM/DD/YYYY + input = "date\n31/12/2014\n03/30/2011" + expected = DatetimeIndex( + ["2014-12-31", "2011-03-30"], dtype="datetime64[ns]", freq=None, name="date" + ) + + # A. use dayfirst=True + with tm.assert_produces_warning(UserWarning, match=warning_msg_month_first): + res5 = read_csv( + StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date" + ).index + tm.assert_index_equal(expected, res5) + + # B. use dayfirst=False + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res6 = read_csv( + StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date" + ).index + tm.assert_index_equal(expected, res6) + + # C. use dayfirst default arg, same as B + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res7 = read_csv( + StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date" + ).index + tm.assert_index_equal(expected, res7) + + # D. use infer_datetime_format=True + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res8 = read_csv( + StringIO(input), + parse_dates=["date"], + infer_datetime_format=True, + index_col="date", + ).index + tm.assert_index_equal(expected, res8) + + +@skip_pyarrow +def test_infer_first_column_as_index(all_parsers): + # GH#11019 + parser = all_parsers + data = "a,b,c\n1970-01-01,2,3,4" + result = parser.read_csv(StringIO(data), parse_dates=["a"]) + expected = DataFrame({"a": "2", "b": 3, "c": 4}, index=["1970-01-01"]) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_replace_nans_before_parsing_dates(all_parsers): + # GH#26203 + parser = all_parsers + data = """Test +2012-10-01 +0 +2015-05-15 +# +2017-09-09 +""" + result = parser.read_csv( + StringIO(data), + na_values={"Test": ["#", "0"]}, + parse_dates=["Test"], + date_parser=lambda x: pd.to_datetime(x, format="%Y-%m-%d"), + ) + expected = DataFrame( + { + "Test": [ + Timestamp("2012-10-01"), + pd.NaT, + Timestamp("2015-05-15"), + pd.NaT, + Timestamp("2017-09-09"), + ] + } + ) + tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_parse_dates_and_string_dtype(all_parsers): + # GH#34066 + parser = all_parsers + data = """a,b +1,2019-12-31 +""" + result = parser.read_csv(StringIO(data), dtype="string", parse_dates=["b"]) + expected = DataFrame({"a": ["1"], "b": [Timestamp("2019-12-31")]}) + expected["a"] = expected["a"].astype("string") + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_python_parser_only.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_python_parser_only.py new file mode 100644 index 0000000..73a6c82 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_python_parser_only.py @@ -0,0 +1,460 @@ +""" +Tests that apply specifically to the Python parser. Unless specifically +stated as a Python-specific issue, the goal is to eventually move as many of +these tests out of this module as soon as the C parser can accept further +arguments when parsing. +""" +from __future__ import annotations + +import csv +from io import ( + BytesIO, + StringIO, +) + +import pytest + +from pandas.errors import ( + ParserError, + ParserWarning, +) + +from pandas import ( + DataFrame, + Index, + MultiIndex, +) +import pandas._testing as tm + + +def test_default_separator(python_parser_only): + # see gh-17333 + # + # csv.Sniffer in Python treats "o" as separator. + data = "aob\n1o2\n3o4" + parser = python_parser_only + expected = DataFrame({"a": [1, 3], "b": [2, 4]}) + + result = parser.read_csv(StringIO(data), sep=None) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("skipfooter", ["foo", 1.5, True]) +def test_invalid_skipfooter_non_int(python_parser_only, skipfooter): + # see gh-15925 (comment) + data = "a\n1\n2" + parser = python_parser_only + msg = "skipfooter must be an integer" + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), skipfooter=skipfooter) + + +def test_invalid_skipfooter_negative(python_parser_only): + # see gh-15925 (comment) + data = "a\n1\n2" + parser = python_parser_only + msg = "skipfooter cannot be negative" + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), skipfooter=-1) + + +@pytest.mark.parametrize("kwargs", [{"sep": None}, {"delimiter": "|"}]) +def test_sniff_delimiter(python_parser_only, kwargs): + data = """index|A|B|C +foo|1|2|3 +bar|4|5|6 +baz|7|8|9 +""" + parser = python_parser_only + result = parser.read_csv(StringIO(data), index_col=0, **kwargs) + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + columns=["A", "B", "C"], + index=Index(["foo", "bar", "baz"], name="index"), + ) + tm.assert_frame_equal(result, expected) + + +def test_sniff_delimiter_comment(python_parser_only): + data = """# comment line +index|A|B|C +# comment line +foo|1|2|3 # ignore | this +bar|4|5|6 +baz|7|8|9 +""" + parser = python_parser_only + result = parser.read_csv(StringIO(data), index_col=0, sep=None, comment="#") + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + columns=["A", "B", "C"], + index=Index(["foo", "bar", "baz"], name="index"), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("encoding", [None, "utf-8"]) +def test_sniff_delimiter_encoding(python_parser_only, encoding): + parser = python_parser_only + data = """ignore this +ignore this too +index|A|B|C +foo|1|2|3 +bar|4|5|6 +baz|7|8|9 +""" + + if encoding is not None: + from io import TextIOWrapper + + data = data.encode(encoding) + data = BytesIO(data) + data = TextIOWrapper(data, encoding=encoding) + else: + data = StringIO(data) + + result = parser.read_csv(data, index_col=0, sep=None, skiprows=2, encoding=encoding) + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + columns=["A", "B", "C"], + index=Index(["foo", "bar", "baz"], name="index"), + ) + tm.assert_frame_equal(result, expected) + + +def test_single_line(python_parser_only): + # see gh-6607: sniff separator + parser = python_parser_only + result = parser.read_csv(StringIO("1,2"), names=["a", "b"], header=None, sep=None) + + expected = DataFrame({"a": [1], "b": [2]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("kwargs", [{"skipfooter": 2}, {"nrows": 3}]) +def test_skipfooter(python_parser_only, kwargs): + # see gh-6607 + data = """A,B,C +1,2,3 +4,5,6 +7,8,9 +want to skip this +also also skip this +""" + parser = python_parser_only + result = parser.read_csv(StringIO(data), **kwargs) + + expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "compression,klass", [("gzip", "GzipFile"), ("bz2", "BZ2File")] +) +def test_decompression_regex_sep(python_parser_only, csv1, compression, klass): + # see gh-6607 + parser = python_parser_only + + with open(csv1, "rb") as f: + data = f.read() + + data = data.replace(b",", b"::") + expected = parser.read_csv(csv1) + + module = pytest.importorskip(compression) + klass = getattr(module, klass) + + with tm.ensure_clean() as path: + tmp = klass(path, mode="wb") + tmp.write(data) + tmp.close() + + result = parser.read_csv(path, sep="::", compression=compression) + tm.assert_frame_equal(result, expected) + + +def test_read_csv_buglet_4x_multi_index(python_parser_only): + # see gh-6607 + data = """ A B C D E +one two three four +a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 +a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 +x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" + parser = python_parser_only + + expected = DataFrame( + [ + [-0.5109, -2.3358, -0.4645, 0.05076, 0.3640], + [0.4473, 1.4152, 0.2834, 1.00661, 0.1744], + [-0.6662, -0.5243, -0.3580, 0.89145, 2.5838], + ], + columns=["A", "B", "C", "D", "E"], + index=MultiIndex.from_tuples( + [("a", "b", 10.0032, 5), ("a", "q", 20, 4), ("x", "q", 30, 3)], + names=["one", "two", "three", "four"], + ), + ) + result = parser.read_csv(StringIO(data), sep=r"\s+") + tm.assert_frame_equal(result, expected) + + +def test_read_csv_buglet_4x_multi_index2(python_parser_only): + # see gh-6893 + data = " A B C\na b c\n1 3 7 0 3 6\n3 1 4 1 5 9" + parser = python_parser_only + + expected = DataFrame.from_records( + [(1, 3, 7, 0, 3, 6), (3, 1, 4, 1, 5, 9)], + columns=list("abcABC"), + index=list("abc"), + ) + result = parser.read_csv(StringIO(data), sep=r"\s+") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("add_footer", [True, False]) +def test_skipfooter_with_decimal(python_parser_only, add_footer): + # see gh-6971 + data = "1#2\n3#4" + parser = python_parser_only + expected = DataFrame({"a": [1.2, 3.4]}) + + if add_footer: + # The stray footer line should not mess with the + # casting of the first two lines if we skip it. + kwargs = {"skipfooter": 1} + data += "\nFooter" + else: + kwargs = {} + + result = parser.read_csv(StringIO(data), names=["a"], decimal="#", **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "sep", ["::", "#####", "!!!", "123", "#1!c5", "%!c!d", "@@#4:2", "_!pd#_"] +) +@pytest.mark.parametrize( + "encoding", ["utf-16", "utf-16-be", "utf-16-le", "utf-32", "cp037"] +) +def test_encoding_non_utf8_multichar_sep(python_parser_only, sep, encoding): + # see gh-3404 + expected = DataFrame({"a": [1], "b": [2]}) + parser = python_parser_only + + data = "1" + sep + "2" + encoded_data = data.encode(encoding) + + result = parser.read_csv( + BytesIO(encoded_data), sep=sep, names=["a", "b"], encoding=encoding + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("quoting", [csv.QUOTE_MINIMAL, csv.QUOTE_NONE]) +def test_multi_char_sep_quotes(python_parser_only, quoting): + # see gh-13374 + kwargs = {"sep": ",,"} + parser = python_parser_only + + data = 'a,,b\n1,,a\n2,,"2,,b"' + + if quoting == csv.QUOTE_NONE: + msg = "Expected 2 fields in line 3, saw 3" + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), quoting=quoting, **kwargs) + else: + msg = "ignored when a multi-char delimiter is used" + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), quoting=quoting, **kwargs) + + +def test_none_delimiter(python_parser_only, capsys): + # see gh-13374 and gh-17465 + parser = python_parser_only + data = "a,b,c\n0,1,2\n3,4,5,6\n7,8,9" + expected = DataFrame({"a": [0, 7], "b": [1, 8], "c": [2, 9]}) + + # We expect the third line in the data to be + # skipped because it is malformed, but we do + # not expect any errors to occur. + result = parser.read_csv(StringIO(data), header=0, sep=None, on_bad_lines="warn") + tm.assert_frame_equal(result, expected) + + captured = capsys.readouterr() + assert "Skipping line 3" in captured.err + + +@pytest.mark.parametrize("data", ['a\n1\n"b"a', 'a,b,c\ncat,foo,bar\ndog,foo,"baz']) +@pytest.mark.parametrize("skipfooter", [0, 1]) +def test_skipfooter_bad_row(python_parser_only, data, skipfooter): + # see gh-13879 and gh-15910 + parser = python_parser_only + if skipfooter: + msg = "parsing errors in the skipped footer rows" + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), skipfooter=skipfooter) + else: + msg = "unexpected end of data|expected after" + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), skipfooter=skipfooter) + + +def test_malformed_skipfooter(python_parser_only): + parser = python_parser_only + data = """ignore +A,B,C +1,2,3 # comment +1,2,3,4,5 +2,3,4 +footer +""" + msg = "Expected 3 fields in line 4, saw 5" + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1) + + +def test_python_engine_file_no_next(python_parser_only): + parser = python_parser_only + + class NoNextBuffer: + def __init__(self, csv_data): + self.data = csv_data + + def __iter__(self): + return self.data.__iter__() + + def read(self): + return self.data + + def readline(self): + return self.data + + parser.read_csv(NoNextBuffer("a\n1")) + + +@pytest.mark.parametrize("bad_line_func", [lambda x: ["2", "3"], lambda x: x[:2]]) +def test_on_bad_lines_callable(python_parser_only, bad_line_func): + # GH 5686 + parser = python_parser_only + data = """a,b +1,2 +2,3,4,5,6 +3,4 +""" + bad_sio = StringIO(data) + result = parser.read_csv(bad_sio, on_bad_lines=bad_line_func) + expected = DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]}) + tm.assert_frame_equal(result, expected) + + +def test_on_bad_lines_callable_write_to_external_list(python_parser_only): + # GH 5686 + parser = python_parser_only + data = """a,b +1,2 +2,3,4,5,6 +3,4 +""" + bad_sio = StringIO(data) + lst = [] + + def bad_line_func(bad_line: list[str]) -> list[str]: + lst.append(bad_line) + return ["2", "3"] + + result = parser.read_csv(bad_sio, on_bad_lines=bad_line_func) + expected = DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]}) + tm.assert_frame_equal(result, expected) + assert lst == [["2", "3", "4", "5", "6"]] + + +@pytest.mark.parametrize("bad_line_func", [lambda x: ["foo", "bar"], lambda x: x[:2]]) +@pytest.mark.parametrize("sep", [",", "111"]) +def test_on_bad_lines_callable_iterator_true(python_parser_only, bad_line_func, sep): + # GH 5686 + # iterator=True has a separate code path than iterator=False + parser = python_parser_only + data = f""" +0{sep}1 +hi{sep}there +foo{sep}bar{sep}baz +good{sep}bye +""" + bad_sio = StringIO(data) + result_iter = parser.read_csv( + bad_sio, on_bad_lines=bad_line_func, chunksize=1, iterator=True, sep=sep + ) + expecteds = [ + {"0": "hi", "1": "there"}, + {"0": "foo", "1": "bar"}, + {"0": "good", "1": "bye"}, + ] + for i, (result, expected) in enumerate(zip(result_iter, expecteds)): + expected = DataFrame(expected, index=range(i, i + 1)) + tm.assert_frame_equal(result, expected) + + +def test_on_bad_lines_callable_dont_swallow_errors(python_parser_only): + # GH 5686 + parser = python_parser_only + data = """a,b +1,2 +2,3,4,5,6 +3,4 +""" + bad_sio = StringIO(data) + msg = "This function is buggy." + + def bad_line_func(bad_line): + raise ValueError(msg) + + with pytest.raises(ValueError, match=msg): + parser.read_csv(bad_sio, on_bad_lines=bad_line_func) + + +def test_on_bad_lines_callable_not_expected_length(python_parser_only): + # GH 5686 + parser = python_parser_only + data = """a,b +1,2 +2,3,4,5,6 +3,4 +""" + bad_sio = StringIO(data) + + with tm.assert_produces_warning(ParserWarning, match="Length of header or names"): + result = parser.read_csv(bad_sio, on_bad_lines=lambda x: x) + expected = DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]}) + tm.assert_frame_equal(result, expected) + + +def test_on_bad_lines_callable_returns_none(python_parser_only): + # GH 5686 + parser = python_parser_only + data = """a,b +1,2 +2,3,4,5,6 +3,4 +""" + bad_sio = StringIO(data) + + result = parser.read_csv(bad_sio, on_bad_lines=lambda x: None) + expected = DataFrame({"a": [1, 3], "b": [2, 4]}) + tm.assert_frame_equal(result, expected) + + +def test_on_bad_lines_index_col_inferred(python_parser_only): + # GH 5686 + parser = python_parser_only + data = """a,b +1,2,3 +4,5,6 +""" + bad_sio = StringIO(data) + + result = parser.read_csv(bad_sio, on_bad_lines=lambda x: ["99", "99"]) + expected = DataFrame({"a": [2, 5], "b": [3, 6]}, index=[1, 4]) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_quoting.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_quoting.py new file mode 100644 index 0000000..456dd04 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_quoting.py @@ -0,0 +1,161 @@ +""" +Tests that quoting specifications are properly handled +during parsing for all of the parsers defined in parsers.py +""" + +import csv +from io import StringIO + +import pytest + +from pandas.errors import ParserError + +from pandas import DataFrame +import pandas._testing as tm + +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +@pytest.mark.parametrize( + "kwargs,msg", + [ + ({"quotechar": "foo"}, '"quotechar" must be a(n)? 1-character string'), + ( + {"quotechar": None, "quoting": csv.QUOTE_MINIMAL}, + "quotechar must be set if quoting enabled", + ), + ({"quotechar": 2}, '"quotechar" must be string( or None)?, not int'), + ], +) +def test_bad_quote_char(all_parsers, kwargs, msg): + data = "1,2,3" + parser = all_parsers + + with pytest.raises(TypeError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + + +@pytest.mark.parametrize( + "quoting,msg", + [ + ("foo", '"quoting" must be an integer'), + (5, 'bad "quoting" value'), # quoting must be in the range [0, 3] + ], +) +def test_bad_quoting(all_parsers, quoting, msg): + data = "1,2,3" + parser = all_parsers + + with pytest.raises(TypeError, match=msg): + parser.read_csv(StringIO(data), quoting=quoting) + + +def test_quote_char_basic(all_parsers): + parser = all_parsers + data = 'a,b,c\n1,2,"cat"' + expected = DataFrame([[1, 2, "cat"]], columns=["a", "b", "c"]) + + result = parser.read_csv(StringIO(data), quotechar='"') + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("quote_char", ["~", "*", "%", "$", "@", "P"]) +def test_quote_char_various(all_parsers, quote_char): + parser = all_parsers + expected = DataFrame([[1, 2, "cat"]], columns=["a", "b", "c"]) + + data = 'a,b,c\n1,2,"cat"' + new_data = data.replace('"', quote_char) + + result = parser.read_csv(StringIO(new_data), quotechar=quote_char) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("quoting", [csv.QUOTE_MINIMAL, csv.QUOTE_NONE]) +@pytest.mark.parametrize("quote_char", ["", None]) +def test_null_quote_char(all_parsers, quoting, quote_char): + kwargs = {"quotechar": quote_char, "quoting": quoting} + data = "a,b,c\n1,2,3" + parser = all_parsers + + if quoting != csv.QUOTE_NONE: + # Sanity checking. + msg = "quotechar must be set if quoting enabled" + + with pytest.raises(TypeError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + else: + expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"]) + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,exp_data", + [ + ({}, [[1, 2, "foo"]]), # Test default. + # QUOTE_MINIMAL only applies to CSV writing, so no effect on reading. + ({"quotechar": '"', "quoting": csv.QUOTE_MINIMAL}, [[1, 2, "foo"]]), + # QUOTE_MINIMAL only applies to CSV writing, so no effect on reading. + ({"quotechar": '"', "quoting": csv.QUOTE_ALL}, [[1, 2, "foo"]]), + # QUOTE_NONE tells the reader to do no special handling + # of quote characters and leave them alone. + ({"quotechar": '"', "quoting": csv.QUOTE_NONE}, [[1, 2, '"foo"']]), + # QUOTE_NONNUMERIC tells the reader to cast + # all non-quoted fields to float + ({"quotechar": '"', "quoting": csv.QUOTE_NONNUMERIC}, [[1.0, 2.0, "foo"]]), + ], +) +def test_quoting_various(all_parsers, kwargs, exp_data): + data = '1,2,"foo"' + parser = all_parsers + columns = ["a", "b", "c"] + + result = parser.read_csv(StringIO(data), names=columns, **kwargs) + expected = DataFrame(exp_data, columns=columns) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "doublequote,exp_data", [(True, [[3, '4 " 5']]), (False, [[3, '4 " 5"']])] +) +def test_double_quote(all_parsers, doublequote, exp_data): + parser = all_parsers + data = 'a,b\n3,"4 "" 5"' + + result = parser.read_csv(StringIO(data), quotechar='"', doublequote=doublequote) + expected = DataFrame(exp_data, columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("quotechar", ['"', "\u0001"]) +def test_quotechar_unicode(all_parsers, quotechar): + # see gh-14477 + data = "a\n1" + parser = all_parsers + expected = DataFrame({"a": [1]}) + + result = parser.read_csv(StringIO(data), quotechar=quotechar) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("balanced", [True, False]) +def test_unbalanced_quoting(all_parsers, balanced): + # see gh-22789. + parser = all_parsers + data = 'a,b,c\n1,2,"3' + + if balanced: + # Re-balance the quoting and read in without errors. + expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"]) + result = parser.read_csv(StringIO(data + '"')) + tm.assert_frame_equal(result, expected) + else: + msg = ( + "EOF inside string starting at row 1" + if parser.engine == "c" + else "unexpected end of data" + ) + + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_read_fwf.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_read_fwf.py new file mode 100644 index 0000000..3de73e5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_read_fwf.py @@ -0,0 +1,932 @@ +""" +Tests the 'read_fwf' function in parsers.py. This +test suite is independent of the others because the +engine is set to 'python-fwf' internally. +""" + +from datetime import datetime +from io import ( + BytesIO, + StringIO, +) +from pathlib import Path + +import numpy as np +import pytest + +from pandas.errors import EmptyDataError + +from pandas import ( + DataFrame, + DatetimeIndex, +) +import pandas._testing as tm + +import pandas.io.common as icom +from pandas.io.parsers import ( + read_csv, + read_fwf, +) + + +def test_basic(): + data = """\ +A B C D +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 +201160 364.136849 183.628767 11806.2 +201161 413.836124 184.375703 11916.8 +201162 502.953953 173.237159 12468.3 +""" + result = read_fwf(StringIO(data)) + expected = DataFrame( + [ + [201158, 360.242940, 149.910199, 11950.7], + [201159, 444.953632, 166.985655, 11788.4], + [201160, 364.136849, 183.628767, 11806.2], + [201161, 413.836124, 184.375703, 11916.8], + [201162, 502.953953, 173.237159, 12468.3], + ], + columns=["A", "B", "C", "D"], + ) + tm.assert_frame_equal(result, expected) + + +def test_colspecs(): + data = """\ +A B C D E +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 +201160 364.136849 183.628767 11806.2 +201161 413.836124 184.375703 11916.8 +201162 502.953953 173.237159 12468.3 +""" + colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)] + result = read_fwf(StringIO(data), colspecs=colspecs) + + expected = DataFrame( + [ + [2011, 58, 360.242940, 149.910199, 11950.7], + [2011, 59, 444.953632, 166.985655, 11788.4], + [2011, 60, 364.136849, 183.628767, 11806.2], + [2011, 61, 413.836124, 184.375703, 11916.8], + [2011, 62, 502.953953, 173.237159, 12468.3], + ], + columns=["A", "B", "C", "D", "E"], + ) + tm.assert_frame_equal(result, expected) + + +def test_widths(): + data = """\ +A B C D E +2011 58 360.242940 149.910199 11950.7 +2011 59 444.953632 166.985655 11788.4 +2011 60 364.136849 183.628767 11806.2 +2011 61 413.836124 184.375703 11916.8 +2011 62 502.953953 173.237159 12468.3 +""" + result = read_fwf(StringIO(data), widths=[5, 5, 13, 13, 7]) + + expected = DataFrame( + [ + [2011, 58, 360.242940, 149.910199, 11950.7], + [2011, 59, 444.953632, 166.985655, 11788.4], + [2011, 60, 364.136849, 183.628767, 11806.2], + [2011, 61, 413.836124, 184.375703, 11916.8], + [2011, 62, 502.953953, 173.237159, 12468.3], + ], + columns=["A", "B", "C", "D", "E"], + ) + tm.assert_frame_equal(result, expected) + + +def test_non_space_filler(): + # From Thomas Kluyver: + # + # Apparently, some non-space filler characters can be seen, this is + # supported by specifying the 'delimiter' character: + # + # http://publib.boulder.ibm.com/infocenter/dmndhelp/v6r1mx/index.jsp?topic=/com.ibm.wbit.612.help.config.doc/topics/rfixwidth.html + data = """\ +A~~~~B~~~~C~~~~~~~~~~~~D~~~~~~~~~~~~E +201158~~~~360.242940~~~149.910199~~~11950.7 +201159~~~~444.953632~~~166.985655~~~11788.4 +201160~~~~364.136849~~~183.628767~~~11806.2 +201161~~~~413.836124~~~184.375703~~~11916.8 +201162~~~~502.953953~~~173.237159~~~12468.3 +""" + colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)] + result = read_fwf(StringIO(data), colspecs=colspecs, delimiter="~") + + expected = DataFrame( + [ + [2011, 58, 360.242940, 149.910199, 11950.7], + [2011, 59, 444.953632, 166.985655, 11788.4], + [2011, 60, 364.136849, 183.628767, 11806.2], + [2011, 61, 413.836124, 184.375703, 11916.8], + [2011, 62, 502.953953, 173.237159, 12468.3], + ], + columns=["A", "B", "C", "D", "E"], + ) + tm.assert_frame_equal(result, expected) + + +def test_over_specified(): + data = """\ +A B C D E +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 +201160 364.136849 183.628767 11806.2 +201161 413.836124 184.375703 11916.8 +201162 502.953953 173.237159 12468.3 +""" + colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)] + + with pytest.raises(ValueError, match="must specify only one of"): + read_fwf(StringIO(data), colspecs=colspecs, widths=[6, 10, 10, 7]) + + +def test_under_specified(): + data = """\ +A B C D E +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 +201160 364.136849 183.628767 11806.2 +201161 413.836124 184.375703 11916.8 +201162 502.953953 173.237159 12468.3 +""" + with pytest.raises(ValueError, match="Must specify either"): + read_fwf(StringIO(data), colspecs=None, widths=None) + + +def test_read_csv_compat(): + csv_data = """\ +A,B,C,D,E +2011,58,360.242940,149.910199,11950.7 +2011,59,444.953632,166.985655,11788.4 +2011,60,364.136849,183.628767,11806.2 +2011,61,413.836124,184.375703,11916.8 +2011,62,502.953953,173.237159,12468.3 +""" + expected = read_csv(StringIO(csv_data), engine="python") + + fwf_data = """\ +A B C D E +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 +201160 364.136849 183.628767 11806.2 +201161 413.836124 184.375703 11916.8 +201162 502.953953 173.237159 12468.3 +""" + colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)] + result = read_fwf(StringIO(fwf_data), colspecs=colspecs) + tm.assert_frame_equal(result, expected) + + +def test_bytes_io_input(): + result = read_fwf(BytesIO("שלום\nשלום".encode()), widths=[2, 2], encoding="utf8") + expected = DataFrame([["של", "ום"]], columns=["של", "ום"]) + tm.assert_frame_equal(result, expected) + + +def test_fwf_colspecs_is_list_or_tuple(): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + + msg = "column specifications must be a list or tuple.+" + + with pytest.raises(TypeError, match=msg): + read_fwf(StringIO(data), colspecs={"a": 1}, delimiter=",") + + +def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples(): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + + msg = "Each column specification must be.+" + + with pytest.raises(TypeError, match=msg): + read_fwf(StringIO(data), colspecs=[("a", 1)]) + + +@pytest.mark.parametrize( + "colspecs,exp_data", + [ + ([(0, 3), (3, None)], [[123, 456], [456, 789]]), + ([(None, 3), (3, 6)], [[123, 456], [456, 789]]), + ([(0, None), (3, None)], [[123456, 456], [456789, 789]]), + ([(None, None), (3, 6)], [[123456, 456], [456789, 789]]), + ], +) +def test_fwf_colspecs_none(colspecs, exp_data): + # see gh-7079 + data = """\ +123456 +456789 +""" + expected = DataFrame(exp_data) + + result = read_fwf(StringIO(data), colspecs=colspecs, header=None) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "infer_nrows,exp_data", + [ + # infer_nrows --> colspec == [(2, 3), (5, 6)] + (1, [[1, 2], [3, 8]]), + # infer_nrows > number of rows + (10, [[1, 2], [123, 98]]), + ], +) +def test_fwf_colspecs_infer_nrows(infer_nrows, exp_data): + # see gh-15138 + data = """\ + 1 2 +123 98 +""" + expected = DataFrame(exp_data) + + result = read_fwf(StringIO(data), infer_nrows=infer_nrows, header=None) + tm.assert_frame_equal(result, expected) + + +def test_fwf_regression(): + # see gh-3594 + # + # Turns out "T060" is parsable as a datetime slice! + tz_list = [1, 10, 20, 30, 60, 80, 100] + widths = [16] + [8] * len(tz_list) + names = ["SST"] + [f"T{z:03d}" for z in tz_list[1:]] + + data = """ 2009164202000 9.5403 9.4105 8.6571 7.8372 6.0612 5.8843 5.5192 +2009164203000 9.5435 9.2010 8.6167 7.8176 6.0804 5.8728 5.4869 +2009164204000 9.5873 9.1326 8.4694 7.5889 6.0422 5.8526 5.4657 +2009164205000 9.5810 9.0896 8.4009 7.4652 6.0322 5.8189 5.4379 +2009164210000 9.6034 9.0897 8.3822 7.4905 6.0908 5.7904 5.4039 +""" + + result = read_fwf( + StringIO(data), + index_col=0, + header=None, + names=names, + widths=widths, + parse_dates=True, + date_parser=lambda s: datetime.strptime(s, "%Y%j%H%M%S"), + ) + expected = DataFrame( + [ + [9.5403, 9.4105, 8.6571, 7.8372, 6.0612, 5.8843, 5.5192], + [9.5435, 9.2010, 8.6167, 7.8176, 6.0804, 5.8728, 5.4869], + [9.5873, 9.1326, 8.4694, 7.5889, 6.0422, 5.8526, 5.4657], + [9.5810, 9.0896, 8.4009, 7.4652, 6.0322, 5.8189, 5.4379], + [9.6034, 9.0897, 8.3822, 7.4905, 6.0908, 5.7904, 5.4039], + ], + index=DatetimeIndex( + [ + "2009-06-13 20:20:00", + "2009-06-13 20:30:00", + "2009-06-13 20:40:00", + "2009-06-13 20:50:00", + "2009-06-13 21:00:00", + ] + ), + columns=["SST", "T010", "T020", "T030", "T060", "T080", "T100"], + ) + tm.assert_frame_equal(result, expected) + + +def test_fwf_for_uint8(): + data = """1421302965.213420 PRI=3 PGN=0xef00 DST=0x17 SRC=0x28 04 154 00 00 00 00 00 127 +1421302964.226776 PRI=6 PGN=0xf002 SRC=0x47 243 00 00 255 247 00 00 71""" # noqa:E501 + df = read_fwf( + StringIO(data), + colspecs=[(0, 17), (25, 26), (33, 37), (49, 51), (58, 62), (63, 1000)], + names=["time", "pri", "pgn", "dst", "src", "data"], + converters={ + "pgn": lambda x: int(x, 16), + "src": lambda x: int(x, 16), + "dst": lambda x: int(x, 16), + "data": lambda x: len(x.split(" ")), + }, + ) + + expected = DataFrame( + [ + [1421302965.213420, 3, 61184, 23, 40, 8], + [1421302964.226776, 6, 61442, None, 71, 8], + ], + columns=["time", "pri", "pgn", "dst", "src", "data"], + ) + expected["dst"] = expected["dst"].astype(object) + tm.assert_frame_equal(df, expected) + + +@pytest.mark.parametrize("comment", ["#", "~", "!"]) +def test_fwf_comment(comment): + data = """\ + 1 2. 4 #hello world + 5 NaN 10.0 +""" + data = data.replace("#", comment) + + colspecs = [(0, 3), (4, 9), (9, 25)] + expected = DataFrame([[1, 2.0, 4], [5, np.nan, 10.0]]) + + result = read_fwf(StringIO(data), colspecs=colspecs, header=None, comment=comment) + tm.assert_almost_equal(result, expected) + + +def test_fwf_skip_blank_lines(): + data = """ + +A B C D + +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 + + +201162 502.953953 173.237159 12468.3 + +""" + result = read_fwf(StringIO(data), skip_blank_lines=True) + expected = DataFrame( + [ + [201158, 360.242940, 149.910199, 11950.7], + [201159, 444.953632, 166.985655, 11788.4], + [201162, 502.953953, 173.237159, 12468.3], + ], + columns=["A", "B", "C", "D"], + ) + tm.assert_frame_equal(result, expected) + + data = """\ +A B C D +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 + + +201162 502.953953 173.237159 12468.3 +""" + result = read_fwf(StringIO(data), skip_blank_lines=False) + expected = DataFrame( + [ + [201158, 360.242940, 149.910199, 11950.7], + [201159, 444.953632, 166.985655, 11788.4], + [np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan], + [201162, 502.953953, 173.237159, 12468.3], + ], + columns=["A", "B", "C", "D"], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("thousands", [",", "#", "~"]) +def test_fwf_thousands(thousands): + data = """\ + 1 2,334.0 5 +10 13 10. +""" + data = data.replace(",", thousands) + + colspecs = [(0, 3), (3, 11), (12, 16)] + expected = DataFrame([[1, 2334.0, 5], [10, 13, 10.0]]) + + result = read_fwf( + StringIO(data), header=None, colspecs=colspecs, thousands=thousands + ) + tm.assert_almost_equal(result, expected) + + +@pytest.mark.parametrize("header", [True, False]) +def test_bool_header_arg(header): + # see gh-6114 + data = """\ +MyColumn + a + b + a + b""" + + msg = "Passing a bool to header is invalid" + with pytest.raises(TypeError, match=msg): + read_fwf(StringIO(data), header=header) + + +def test_full_file(): + # File with all values. + test = """index A B C +2000-01-03T00:00:00 0.980268513777 3 foo +2000-01-04T00:00:00 1.04791624281 -4 bar +2000-01-05T00:00:00 0.498580885705 73 baz +2000-01-06T00:00:00 1.12020151869 1 foo +2000-01-07T00:00:00 0.487094399463 0 bar +2000-01-10T00:00:00 0.836648671666 2 baz +2000-01-11T00:00:00 0.157160753327 34 foo""" + colspecs = ((0, 19), (21, 35), (38, 40), (42, 45)) + expected = read_fwf(StringIO(test), colspecs=colspecs) + + result = read_fwf(StringIO(test)) + tm.assert_frame_equal(result, expected) + + +def test_full_file_with_missing(): + # File with missing values. + test = """index A B C +2000-01-03T00:00:00 0.980268513777 3 foo +2000-01-04T00:00:00 1.04791624281 -4 bar + 0.498580885705 73 baz +2000-01-06T00:00:00 1.12020151869 1 foo +2000-01-07T00:00:00 0 bar +2000-01-10T00:00:00 0.836648671666 2 baz + 34""" + colspecs = ((0, 19), (21, 35), (38, 40), (42, 45)) + expected = read_fwf(StringIO(test), colspecs=colspecs) + + result = read_fwf(StringIO(test)) + tm.assert_frame_equal(result, expected) + + +def test_full_file_with_spaces(): + # File with spaces in columns. + test = """ +Account Name Balance CreditLimit AccountCreated +101 Keanu Reeves 9315.45 10000.00 1/17/1998 +312 Gerard Butler 90.00 1000.00 8/6/2003 +868 Jennifer Love Hewitt 0 17000.00 5/25/1985 +761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006 +317 Bill Murray 789.65 5000.00 2/5/2007 +""".strip( + "\r\n" + ) + colspecs = ((0, 7), (8, 28), (30, 38), (42, 53), (56, 70)) + expected = read_fwf(StringIO(test), colspecs=colspecs) + + result = read_fwf(StringIO(test)) + tm.assert_frame_equal(result, expected) + + +def test_full_file_with_spaces_and_missing(): + # File with spaces and missing values in columns. + test = """ +Account Name Balance CreditLimit AccountCreated +101 10000.00 1/17/1998 +312 Gerard Butler 90.00 1000.00 8/6/2003 +868 5/25/1985 +761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006 +317 Bill Murray 789.65 +""".strip( + "\r\n" + ) + colspecs = ((0, 7), (8, 28), (30, 38), (42, 53), (56, 70)) + expected = read_fwf(StringIO(test), colspecs=colspecs) + + result = read_fwf(StringIO(test)) + tm.assert_frame_equal(result, expected) + + +def test_messed_up_data(): + # Completely messed up file. + test = """ + Account Name Balance Credit Limit Account Created + 101 10000.00 1/17/1998 + 312 Gerard Butler 90.00 1000.00 + + 761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006 + 317 Bill Murray 789.65 +""".strip( + "\r\n" + ) + colspecs = ((2, 10), (15, 33), (37, 45), (49, 61), (64, 79)) + expected = read_fwf(StringIO(test), colspecs=colspecs) + + result = read_fwf(StringIO(test)) + tm.assert_frame_equal(result, expected) + + +def test_multiple_delimiters(): + test = r""" +col1~~~~~col2 col3++++++++++++++++++col4 +~~22.....11.0+++foo~~~~~~~~~~Keanu Reeves + 33+++122.33\\\bar.........Gerard Butler +++44~~~~12.01 baz~~Jennifer Love Hewitt +~~55 11+++foo++++Jada Pinkett-Smith +..66++++++.03~~~bar Bill Murray +""".strip( + "\r\n" + ) + delimiter = " +~.\\" + colspecs = ((0, 4), (7, 13), (15, 19), (21, 41)) + expected = read_fwf(StringIO(test), colspecs=colspecs, delimiter=delimiter) + + result = read_fwf(StringIO(test), delimiter=delimiter) + tm.assert_frame_equal(result, expected) + + +def test_variable_width_unicode(): + data = """ +שלום שלום +ום שלל +של ום +""".strip( + "\r\n" + ) + encoding = "utf8" + kwargs = {"header": None, "encoding": encoding} + + expected = read_fwf( + BytesIO(data.encode(encoding)), colspecs=[(0, 4), (5, 9)], **kwargs + ) + result = read_fwf(BytesIO(data.encode(encoding)), **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", [{}, {"a": "float64", "b": str, "c": "int32"}]) +def test_dtype(dtype): + data = """ a b c +1 2 3.2 +3 4 5.2 +""" + colspecs = [(0, 5), (5, 10), (10, None)] + result = read_fwf(StringIO(data), colspecs=colspecs, dtype=dtype) + + expected = DataFrame( + {"a": [1, 3], "b": [2, 4], "c": [3.2, 5.2]}, columns=["a", "b", "c"] + ) + + for col, dt in dtype.items(): + expected[col] = expected[col].astype(dt) + + tm.assert_frame_equal(result, expected) + + +def test_skiprows_inference(): + # see gh-11256 + data = """ +Text contained in the file header + +DataCol1 DataCol2 + 0.0 1.0 + 101.6 956.1 +""".strip() + skiprows = 2 + expected = read_csv(StringIO(data), skiprows=skiprows, delim_whitespace=True) + + result = read_fwf(StringIO(data), skiprows=skiprows) + tm.assert_frame_equal(result, expected) + + +def test_skiprows_by_index_inference(): + data = """ +To be skipped +Not To Be Skipped +Once more to be skipped +123 34 8 123 +456 78 9 456 +""".strip() + skiprows = [0, 2] + expected = read_csv(StringIO(data), skiprows=skiprows, delim_whitespace=True) + + result = read_fwf(StringIO(data), skiprows=skiprows) + tm.assert_frame_equal(result, expected) + + +def test_skiprows_inference_empty(): + data = """ +AA BBB C +12 345 6 +78 901 2 +""".strip() + + msg = "No rows from which to infer column width" + with pytest.raises(EmptyDataError, match=msg): + read_fwf(StringIO(data), skiprows=3) + + +def test_whitespace_preservation(): + # see gh-16772 + header = None + csv_data = """ + a ,bbb + cc,dd """ + + fwf_data = """ + a bbb + ccdd """ + result = read_fwf( + StringIO(fwf_data), widths=[3, 3], header=header, skiprows=[0], delimiter="\n\t" + ) + expected = read_csv(StringIO(csv_data), header=header) + tm.assert_frame_equal(result, expected) + + +def test_default_delimiter(): + header = None + csv_data = """ +a,bbb +cc,dd""" + + fwf_data = """ +a \tbbb +cc\tdd """ + result = read_fwf(StringIO(fwf_data), widths=[3, 3], header=header, skiprows=[0]) + expected = read_csv(StringIO(csv_data), header=header) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("infer", [True, False]) +def test_fwf_compression(compression_only, infer): + data = """1111111111 + 2222222222 + 3333333333""".strip() + + compression = compression_only + extension = icom._compression_to_extension[compression] + + kwargs = {"widths": [5, 5], "names": ["one", "two"]} + expected = read_fwf(StringIO(data), **kwargs) + + data = bytes(data, encoding="utf-8") + + with tm.ensure_clean(filename="tmp." + extension) as path: + tm.write_to_compressed(compression, path, data) + + if infer is not None: + kwargs["compression"] = "infer" if infer else compression + + result = read_fwf(path, **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_binary_mode(): + """ + read_fwf supports opening files in binary mode. + + GH 18035. + """ + data = """aas aas aas +bba bab b a""" + df_reference = DataFrame( + [["bba", "bab", "b a"]], columns=["aas", "aas.1", "aas.2"], index=[0] + ) + with tm.ensure_clean() as path: + Path(path).write_text(data) + with open(path, "rb") as file: + df = read_fwf(file) + file.seek(0) + tm.assert_frame_equal(df, df_reference) + + +@pytest.mark.parametrize("memory_map", [True, False]) +def test_encoding_mmap(memory_map): + """ + encoding should be working, even when using a memory-mapped file. + + GH 23254. + """ + encoding = "iso8859_1" + with tm.ensure_clean() as path: + Path(path).write_bytes(" 1 A Ä 2\n".encode(encoding)) + df = read_fwf( + path, + header=None, + widths=[2, 2, 2, 2], + encoding=encoding, + memory_map=memory_map, + ) + df_reference = DataFrame([[1, "A", "Ä", 2]]) + tm.assert_frame_equal(df, df_reference) + + +@pytest.mark.parametrize( + "colspecs, names, widths, index_col", + [ + ( + [(0, 6), (6, 12), (12, 18), (18, None)], + list("abcde"), + None, + None, + ), + ( + None, + list("abcde"), + [6] * 4, + None, + ), + ( + [(0, 6), (6, 12), (12, 18), (18, None)], + list("abcde"), + None, + True, + ), + ( + None, + list("abcde"), + [6] * 4, + False, + ), + ( + None, + list("abcde"), + [6] * 4, + True, + ), + ( + [(0, 6), (6, 12), (12, 18), (18, None)], + list("abcde"), + None, + False, + ), + ], +) +def test_len_colspecs_len_names(colspecs, names, widths, index_col): + # GH#40830 + data = """col1 col2 col3 col4 + bab ba 2""" + msg = "Length of colspecs must match length of names" + with pytest.raises(ValueError, match=msg): + read_fwf( + StringIO(data), + colspecs=colspecs, + names=names, + widths=widths, + index_col=index_col, + ) + + +@pytest.mark.parametrize( + "colspecs, names, widths, index_col, expected", + [ + ( + [(0, 6), (6, 12), (12, 18), (18, None)], + list("abc"), + None, + 0, + DataFrame( + index=["col1", "ba"], + columns=["a", "b", "c"], + data=[["col2", "col3", "col4"], ["b ba", "2", np.nan]], + ), + ), + ( + [(0, 6), (6, 12), (12, 18), (18, None)], + list("ab"), + None, + [0, 1], + DataFrame( + index=[["col1", "ba"], ["col2", "b ba"]], + columns=["a", "b"], + data=[["col3", "col4"], ["2", np.nan]], + ), + ), + ( + [(0, 6), (6, 12), (12, 18), (18, None)], + list("a"), + None, + [0, 1, 2], + DataFrame( + index=[["col1", "ba"], ["col2", "b ba"], ["col3", "2"]], + columns=["a"], + data=[["col4"], [np.nan]], + ), + ), + ( + None, + list("abc"), + [6] * 4, + 0, + DataFrame( + index=["col1", "ba"], + columns=["a", "b", "c"], + data=[["col2", "col3", "col4"], ["b ba", "2", np.nan]], + ), + ), + ( + None, + list("ab"), + [6] * 4, + [0, 1], + DataFrame( + index=[["col1", "ba"], ["col2", "b ba"]], + columns=["a", "b"], + data=[["col3", "col4"], ["2", np.nan]], + ), + ), + ( + None, + list("a"), + [6] * 4, + [0, 1, 2], + DataFrame( + index=[["col1", "ba"], ["col2", "b ba"], ["col3", "2"]], + columns=["a"], + data=[["col4"], [np.nan]], + ), + ), + ], +) +def test_len_colspecs_len_names_with_index_col( + colspecs, names, widths, index_col, expected +): + # GH#40830 + data = """col1 col2 col3 col4 + bab ba 2""" + result = read_fwf( + StringIO(data), + colspecs=colspecs, + names=names, + widths=widths, + index_col=index_col, + ) + tm.assert_frame_equal(result, expected) + + +def test_colspecs_with_comment(): + # GH 14135 + result = read_fwf( + StringIO("#\nA1K\n"), colspecs=[(1, 2), (2, 3)], comment="#", header=None + ) + expected = DataFrame([[1, "K"]], columns=[0, 1]) + tm.assert_frame_equal(result, expected) + + +def test_skip_rows_and_n_rows(): + # GH#44021 + data = """a\tb +1\t a +2\t b +3\t c +4\t d +5\t e +6\t f + """ + result = read_fwf(StringIO(data), nrows=4, skiprows=[2, 4]) + expected = DataFrame({"a": [1, 3, 5, 6], "b": ["a", "c", "e", "f"]}) + tm.assert_frame_equal(result, expected) + + +def test_skiprows_with_iterator(): + # GH#10261 + data = """0 +1 +2 +3 +4 +5 +6 +7 +8 +9 + """ + df_iter = read_fwf( + StringIO(data), + colspecs=[(0, 2)], + names=["a"], + iterator=True, + chunksize=2, + skiprows=[0, 1, 2, 6, 9], + ) + expected_frames = [ + DataFrame({"a": [3, 4]}), + DataFrame({"a": [5, 7, 8]}, index=[2, 3, 4]), + DataFrame({"a": []}, index=[], dtype="object"), + ] + for i, result in enumerate(df_iter): + tm.assert_frame_equal(result, expected_frames[i]) + + +def test_skiprows_passing_as_positional_deprecated(): + # GH#41485 + data = """0 +1 +2 +""" + with tm.assert_produces_warning(FutureWarning, match="keyword-only"): + result = read_fwf(StringIO(data), [(0, 2)]) + expected = DataFrame({"0": [1, 2]}) + tm.assert_frame_equal(result, expected) + + +def test_names_and_infer_colspecs(): + # GH#45337 + data = """X Y Z + 959.0 345 22.2 + """ + result = read_fwf(StringIO(data), skiprows=1, usecols=[0, 2], names=["a", "b"]) + expected = DataFrame({"a": [959.0], "b": 22.2}) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_skiprows.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_skiprows.py new file mode 100644 index 0000000..5b722b5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_skiprows.py @@ -0,0 +1,277 @@ +""" +Tests that skipped rows are properly handled during +parsing for all of the parsers defined in parsers.py +""" + +from datetime import datetime +from io import StringIO + +import numpy as np +import pytest + +from pandas.errors import EmptyDataError + +from pandas import ( + DataFrame, + Index, +) +import pandas._testing as tm + +# XFAIL ME PLS once hanging tests issues identified +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +@pytest.mark.parametrize("skiprows", [list(range(6)), 6]) +def test_skip_rows_bug(all_parsers, skiprows): + # see gh-505 + parser = all_parsers + text = """#foo,a,b,c +#foo,a,b,c +#foo,a,b,c +#foo,a,b,c +#foo,a,b,c +#foo,a,b,c +1/1/2000,1.,2.,3. +1/2/2000,4,5,6 +1/3/2000,7,8,9 +""" + result = parser.read_csv( + StringIO(text), skiprows=skiprows, header=None, index_col=0, parse_dates=True + ) + index = Index( + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], name=0 + ) + + expected = DataFrame( + np.arange(1.0, 10.0).reshape((3, 3)), columns=[1, 2, 3], index=index + ) + tm.assert_frame_equal(result, expected) + + +def test_deep_skip_rows(all_parsers): + # see gh-4382 + parser = all_parsers + data = "a,b,c\n" + "\n".join( + [",".join([str(i), str(i + 1), str(i + 2)]) for i in range(10)] + ) + condensed_data = "a,b,c\n" + "\n".join( + [",".join([str(i), str(i + 1), str(i + 2)]) for i in [0, 1, 2, 3, 4, 6, 8, 9]] + ) + + result = parser.read_csv(StringIO(data), skiprows=[6, 8]) + condensed_result = parser.read_csv(StringIO(condensed_data)) + tm.assert_frame_equal(result, condensed_result) + + +def test_skip_rows_blank(all_parsers): + # see gh-9832 + parser = all_parsers + text = """#foo,a,b,c +#foo,a,b,c + +#foo,a,b,c +#foo,a,b,c + +1/1/2000,1.,2.,3. +1/2/2000,4,5,6 +1/3/2000,7,8,9 +""" + data = parser.read_csv( + StringIO(text), skiprows=6, header=None, index_col=0, parse_dates=True + ) + index = Index( + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], name=0 + ) + + expected = DataFrame( + np.arange(1.0, 10.0).reshape((3, 3)), columns=[1, 2, 3], index=index + ) + tm.assert_frame_equal(data, expected) + + +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + ( + """id,text,num_lines +1,"line 11 +line 12",2 +2,"line 21 +line 22",2 +3,"line 31",1""", + {"skiprows": [1]}, + DataFrame( + [[2, "line 21\nline 22", 2], [3, "line 31", 1]], + columns=["id", "text", "num_lines"], + ), + ), + ( + "a,b,c\n~a\n b~,~e\n d~,~f\n f~\n1,2,~12\n 13\n 14~", + {"quotechar": "~", "skiprows": [2]}, + DataFrame([["a\n b", "e\n d", "f\n f"]], columns=["a", "b", "c"]), + ), + ( + ( + "Text,url\n~example\n " + "sentence\n one~,url1\n~" + "example\n sentence\n two~,url2\n~" + "example\n sentence\n three~,url3" + ), + {"quotechar": "~", "skiprows": [1, 3]}, + DataFrame([["example\n sentence\n two", "url2"]], columns=["Text", "url"]), + ), + ], +) +def test_skip_row_with_newline(all_parsers, data, kwargs, expected): + # see gh-12775 and gh-10911 + parser = all_parsers + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_skip_row_with_quote(all_parsers): + # see gh-12775 and gh-10911 + parser = all_parsers + data = """id,text,num_lines +1,"line '11' line 12",2 +2,"line '21' line 22",2 +3,"line '31' line 32",1""" + + exp_data = [[2, "line '21' line 22", 2], [3, "line '31' line 32", 1]] + expected = DataFrame(exp_data, columns=["id", "text", "num_lines"]) + + result = parser.read_csv(StringIO(data), skiprows=[1]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,exp_data", + [ + ( + """id,text,num_lines +1,"line \n'11' line 12",2 +2,"line \n'21' line 22",2 +3,"line \n'31' line 32",1""", + [[2, "line \n'21' line 22", 2], [3, "line \n'31' line 32", 1]], + ), + ( + """id,text,num_lines +1,"line '11\n' line 12",2 +2,"line '21\n' line 22",2 +3,"line '31\n' line 32",1""", + [[2, "line '21\n' line 22", 2], [3, "line '31\n' line 32", 1]], + ), + ( + """id,text,num_lines +1,"line '11\n' \r\tline 12",2 +2,"line '21\n' \r\tline 22",2 +3,"line '31\n' \r\tline 32",1""", + [[2, "line '21\n' \r\tline 22", 2], [3, "line '31\n' \r\tline 32", 1]], + ), + ], +) +def test_skip_row_with_newline_and_quote(all_parsers, data, exp_data): + # see gh-12775 and gh-10911 + parser = all_parsers + result = parser.read_csv(StringIO(data), skiprows=[1]) + + expected = DataFrame(exp_data, columns=["id", "text", "num_lines"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "line_terminator", ["\n", "\r\n", "\r"] # "LF" # "CRLF" # "CR" +) +def test_skiprows_lineterminator(all_parsers, line_terminator, request): + # see gh-9079 + parser = all_parsers + data = "\n".join( + [ + "SMOSMANIA ThetaProbe-ML2X ", + "2007/01/01 01:00 0.2140 U M ", + "2007/01/01 02:00 0.2141 M O ", + "2007/01/01 04:00 0.2142 D M ", + ] + ) + expected = DataFrame( + [ + ["2007/01/01", "01:00", 0.2140, "U", "M"], + ["2007/01/01", "02:00", 0.2141, "M", "O"], + ["2007/01/01", "04:00", 0.2142, "D", "M"], + ], + columns=["date", "time", "var", "flag", "oflag"], + ) + + if parser.engine == "python" and line_terminator == "\r": + mark = pytest.mark.xfail(reason="'CR' not respect with the Python parser yet") + request.node.add_marker(mark) + + data = data.replace("\n", line_terminator) + result = parser.read_csv( + StringIO(data), + skiprows=1, + delim_whitespace=True, + names=["date", "time", "var", "flag", "oflag"], + ) + tm.assert_frame_equal(result, expected) + + +def test_skiprows_infield_quote(all_parsers): + # see gh-14459 + parser = all_parsers + data = 'a"\nb"\na\n1' + expected = DataFrame({"a": [1]}) + + result = parser.read_csv(StringIO(data), skiprows=2) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,expected", + [ + ({}, DataFrame({"1": [3, 5]})), + ({"header": 0, "names": ["foo"]}, DataFrame({"foo": [3, 5]})), + ], +) +def test_skip_rows_callable(all_parsers, kwargs, expected): + parser = all_parsers + data = "a\n1\n2\n3\n4\n5" + + result = parser.read_csv(StringIO(data), skiprows=lambda x: x % 2 == 0, **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_skip_rows_skip_all(all_parsers): + parser = all_parsers + data = "a\n1\n2\n3\n4\n5" + msg = "No columns to parse from file" + + with pytest.raises(EmptyDataError, match=msg): + parser.read_csv(StringIO(data), skiprows=lambda x: True) + + +def test_skip_rows_bad_callable(all_parsers): + msg = "by zero" + parser = all_parsers + data = "a\n1\n2\n3\n4\n5" + + with pytest.raises(ZeroDivisionError, match=msg): + parser.read_csv(StringIO(data), skiprows=lambda x: 1 / 0) + + +def test_skip_rows_and_n_rows(all_parsers): + # GH#44021 + data = """a,b +1,a +2,b +3,c +4,d +5,e +6,f +7,g +8,h +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), nrows=5, skiprows=[2, 4, 6]) + expected = DataFrame({"a": [1, 3, 5, 7, 8], "b": ["a", "c", "e", "g", "h"]}) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_textreader.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_textreader.py new file mode 100644 index 0000000..d594bf8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_textreader.py @@ -0,0 +1,350 @@ +""" +Tests the TextReader class in parsers.pyx, which +is integral to the C engine in parsers.py +""" +from io import ( + BytesIO, + StringIO, +) +import os + +import numpy as np +import pytest + +import pandas._libs.parsers as parser +from pandas._libs.parsers import TextReader + +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.parsers import ( + TextFileReader, + read_csv, +) +from pandas.io.parsers.c_parser_wrapper import ensure_dtype_objs + + +class TestTextReader: + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + self.dirpath = datapath("io", "parser", "data") + csv1_dirpath = datapath("io", "data", "csv") + self.csv1 = os.path.join(csv1_dirpath, "test1.csv") + self.csv2 = os.path.join(self.dirpath, "test2.csv") + self.xls1 = os.path.join(self.dirpath, "test.xls") + + def test_file_handle(self): + with open(self.csv1, "rb") as f: + reader = TextReader(f) + reader.read() + + def test_file_handle_mmap(self): + # this was never using memory_map=True + with open(self.csv1, "rb") as f: + reader = TextReader(f, header=None) + reader.read() + + def test_StringIO(self): + with open(self.csv1, "rb") as f: + text = f.read() + src = BytesIO(text) + reader = TextReader(src, header=None) + reader.read() + + def test_string_factorize(self): + # should this be optional? + data = "a\nb\na\nb\na" + reader = TextReader(StringIO(data), header=None) + result = reader.read() + assert len(set(map(id, result[0]))) == 2 + + def test_skipinitialspace(self): + data = "a, b\na, b\na, b\na, b" + + reader = TextReader(StringIO(data), skipinitialspace=True, header=None) + result = reader.read() + + tm.assert_numpy_array_equal( + result[0], np.array(["a", "a", "a", "a"], dtype=np.object_) + ) + tm.assert_numpy_array_equal( + result[1], np.array(["b", "b", "b", "b"], dtype=np.object_) + ) + + def test_parse_booleans(self): + data = "True\nFalse\nTrue\nTrue" + + reader = TextReader(StringIO(data), header=None) + result = reader.read() + + assert result[0].dtype == np.bool_ + + def test_delimit_whitespace(self): + data = 'a b\na\t\t "b"\n"a"\t \t b' + + reader = TextReader(StringIO(data), delim_whitespace=True, header=None) + result = reader.read() + + tm.assert_numpy_array_equal( + result[0], np.array(["a", "a", "a"], dtype=np.object_) + ) + tm.assert_numpy_array_equal( + result[1], np.array(["b", "b", "b"], dtype=np.object_) + ) + + def test_embedded_newline(self): + data = 'a\n"hello\nthere"\nthis' + + reader = TextReader(StringIO(data), header=None) + result = reader.read() + + expected = np.array(["a", "hello\nthere", "this"], dtype=np.object_) + tm.assert_numpy_array_equal(result[0], expected) + + def test_euro_decimal(self): + data = "12345,67\n345,678" + + reader = TextReader(StringIO(data), delimiter=":", decimal=",", header=None) + result = reader.read() + + expected = np.array([12345.67, 345.678]) + tm.assert_almost_equal(result[0], expected) + + def test_integer_thousands(self): + data = "123,456\n12,500" + + reader = TextReader(StringIO(data), delimiter=":", thousands=",", header=None) + result = reader.read() + + expected = np.array([123456, 12500], dtype=np.int64) + tm.assert_almost_equal(result[0], expected) + + def test_integer_thousands_alt(self): + data = "123.456\n12.500" + + reader = TextFileReader( + StringIO(data), delimiter=":", thousands=".", header=None + ) + result = reader.read() + + expected = DataFrame([123456, 12500]) + tm.assert_frame_equal(result, expected) + + def test_skip_bad_lines(self, capsys): + # too many lines, see #2430 for why + data = "a:b:c\nd:e:f\ng:h:i\nj:k:l:m\nl:m:n\no:p:q:r" + + reader = TextReader(StringIO(data), delimiter=":", header=None) + msg = r"Error tokenizing data\. C error: Expected 3 fields in line 4, saw 4" + with pytest.raises(parser.ParserError, match=msg): + reader.read() + + reader = TextReader( + StringIO(data), delimiter=":", header=None, on_bad_lines=2 # Skip + ) + result = reader.read() + expected = { + 0: np.array(["a", "d", "g", "l"], dtype=object), + 1: np.array(["b", "e", "h", "m"], dtype=object), + 2: np.array(["c", "f", "i", "n"], dtype=object), + } + assert_array_dicts_equal(result, expected) + + reader = TextReader( + StringIO(data), delimiter=":", header=None, on_bad_lines=1 # Warn + ) + reader.read() + captured = capsys.readouterr() + + assert "Skipping line 4" in captured.err + assert "Skipping line 6" in captured.err + + def test_header_not_enough_lines(self): + data = "skip this\nskip this\na,b,c\n1,2,3\n4,5,6" + + reader = TextReader(StringIO(data), delimiter=",", header=2) + header = reader.header + expected = [["a", "b", "c"]] + assert header == expected + + recs = reader.read() + expected = { + 0: np.array([1, 4], dtype=np.int64), + 1: np.array([2, 5], dtype=np.int64), + 2: np.array([3, 6], dtype=np.int64), + } + assert_array_dicts_equal(recs, expected) + + def test_escapechar(self): + data = '\\"hello world"\n\\"hello world"\n\\"hello world"' + + reader = TextReader(StringIO(data), delimiter=",", header=None, escapechar="\\") + result = reader.read() + expected = {0: np.array(['"hello world"'] * 3, dtype=object)} + assert_array_dicts_equal(result, expected) + + def test_eof_has_eol(self): + # handling of new line at EOF + pass + + def test_na_substitution(self): + pass + + def test_numpy_string_dtype(self): + data = """\ +a,1 +aa,2 +aaa,3 +aaaa,4 +aaaaa,5""" + + def _make_reader(**kwds): + if "dtype" in kwds: + kwds["dtype"] = ensure_dtype_objs(kwds["dtype"]) + return TextReader(StringIO(data), delimiter=",", header=None, **kwds) + + reader = _make_reader(dtype="S5,i4") + result = reader.read() + + assert result[0].dtype == "S5" + + ex_values = np.array(["a", "aa", "aaa", "aaaa", "aaaaa"], dtype="S5") + assert (result[0] == ex_values).all() + assert result[1].dtype == "i4" + + reader = _make_reader(dtype="S4") + result = reader.read() + assert result[0].dtype == "S4" + ex_values = np.array(["a", "aa", "aaa", "aaaa", "aaaa"], dtype="S4") + assert (result[0] == ex_values).all() + assert result[1].dtype == "S4" + + def test_pass_dtype(self): + data = """\ +one,two +1,a +2,b +3,c +4,d""" + + def _make_reader(**kwds): + if "dtype" in kwds: + kwds["dtype"] = ensure_dtype_objs(kwds["dtype"]) + return TextReader(StringIO(data), delimiter=",", **kwds) + + reader = _make_reader(dtype={"one": "u1", 1: "S1"}) + result = reader.read() + assert result[0].dtype == "u1" + assert result[1].dtype == "S1" + + reader = _make_reader(dtype={"one": np.uint8, 1: object}) + result = reader.read() + assert result[0].dtype == "u1" + assert result[1].dtype == "O" + + reader = _make_reader(dtype={"one": np.dtype("u1"), 1: np.dtype("O")}) + result = reader.read() + assert result[0].dtype == "u1" + assert result[1].dtype == "O" + + def test_usecols(self): + data = """\ +a,b,c +1,2,3 +4,5,6 +7,8,9 +10,11,12""" + + def _make_reader(**kwds): + return TextReader(StringIO(data), delimiter=",", **kwds) + + reader = _make_reader(usecols=(1, 2)) + result = reader.read() + + exp = _make_reader().read() + assert len(result) == 2 + assert (result[1] == exp[1]).all() + assert (result[2] == exp[2]).all() + + def test_cr_delimited(self): + def _test(text, **kwargs): + nice_text = text.replace("\r", "\r\n") + result = TextReader(StringIO(text), **kwargs).read() + expected = TextReader(StringIO(nice_text), **kwargs).read() + assert_array_dicts_equal(result, expected) + + data = "a,b,c\r1,2,3\r4,5,6\r7,8,9\r10,11,12" + _test(data, delimiter=",") + + data = "a b c\r1 2 3\r4 5 6\r7 8 9\r10 11 12" + _test(data, delim_whitespace=True) + + data = "a,b,c\r1,2,3\r4,5,6\r,88,9\r10,11,12" + _test(data, delimiter=",") + + sample = ( + "A,B,C,D,E,F,G,H,I,J,K,L,M,N,O\r" + "AAAAA,BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0\r" + ",BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0" + ) + _test(sample, delimiter=",") + + data = "A B C\r 2 3\r4 5 6" + _test(data, delim_whitespace=True) + + data = "A B C\r2 3\r4 5 6" + _test(data, delim_whitespace=True) + + def test_empty_field_eof(self): + data = "a,b,c\n1,2,3\n4,," + + result = TextReader(StringIO(data), delimiter=",").read() + + expected = { + 0: np.array([1, 4], dtype=np.int64), + 1: np.array(["2", ""], dtype=object), + 2: np.array(["3", ""], dtype=object), + } + assert_array_dicts_equal(result, expected) + + # GH5664 + a = DataFrame([["b"], [np.nan]], columns=["a"], index=["a", "c"]) + b = DataFrame([[1, 1, 1, 0], [1, 1, 1, 0]], columns=list("abcd"), index=[1, 1]) + c = DataFrame( + [ + [1, 2, 3, 4], + [6, np.nan, np.nan, np.nan], + [8, 9, 10, 11], + [13, 14, np.nan, np.nan], + ], + columns=list("abcd"), + index=[0, 5, 7, 12], + ) + + for _ in range(100): + df = read_csv(StringIO("a,b\nc\n"), skiprows=0, names=["a"], engine="c") + tm.assert_frame_equal(df, a) + + df = read_csv( + StringIO("1,1,1,1,0\n" * 2 + "\n" * 2), names=list("abcd"), engine="c" + ) + tm.assert_frame_equal(df, b) + + df = read_csv( + StringIO("0,1,2,3,4\n5,6\n7,8,9,10,11\n12,13,14"), + names=list("abcd"), + engine="c", + ) + tm.assert_frame_equal(df, c) + + def test_empty_csv_input(self): + # GH14867 + with read_csv( + StringIO(), chunksize=20, header=None, names=["a", "b", "c"] + ) as df: + assert isinstance(df, TextFileReader) + + +def assert_array_dicts_equal(left, right): + for k, v in left.items(): + tm.assert_numpy_array_equal(np.asarray(v), np.asarray(right[k])) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_unsupported.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_unsupported.py new file mode 100644 index 0000000..fad5c3d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/test_unsupported.py @@ -0,0 +1,191 @@ +""" +Tests that features that are currently unsupported in +either the Python or C parser are actually enforced +and are clearly communicated to the user. + +Ultimately, the goal is to remove test cases from this +test suite as new feature support is added to the parsers. +""" +from io import StringIO +import os +from pathlib import Path + +import pytest + +from pandas.compat import ( + is_ci_environment, + is_platform_mac, + is_platform_windows, +) +from pandas.errors import ParserError + +import pandas._testing as tm + +from pandas.io.parsers import read_csv +import pandas.io.parsers.readers as parsers + + +@pytest.fixture(params=["python", "python-fwf"], ids=lambda val: val) +def python_engine(request): + return request.param + + +class TestUnsupportedFeatures: + def test_mangle_dupe_cols_false(self): + # see gh-12935 + data = "a b c\n1 2 3" + msg = "is not supported" + + for engine in ("c", "python"): + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), engine=engine, mangle_dupe_cols=False) + + def test_c_engine(self): + # see gh-6607 + data = "a b c\n1 2 3" + msg = "does not support" + + # specify C engine with unsupported options (raise) + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), engine="c", sep=None, delim_whitespace=False) + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), engine="c", sep=r"\s") + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), engine="c", sep="\t", quotechar=chr(128)) + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), engine="c", skipfooter=1) + + # specify C-unsupported options without python-unsupported options + with tm.assert_produces_warning(parsers.ParserWarning): + read_csv(StringIO(data), sep=None, delim_whitespace=False) + with tm.assert_produces_warning(parsers.ParserWarning): + read_csv(StringIO(data), sep=r"\s") + with tm.assert_produces_warning(parsers.ParserWarning): + read_csv(StringIO(data), sep="\t", quotechar=chr(128)) + with tm.assert_produces_warning(parsers.ParserWarning): + read_csv(StringIO(data), skipfooter=1) + + text = """ A B C D E +one two three four +a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 +a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 +x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" + msg = "Error tokenizing data" + + with pytest.raises(ParserError, match=msg): + read_csv(StringIO(text), sep="\\s+") + with pytest.raises(ParserError, match=msg): + read_csv(StringIO(text), engine="c", sep="\\s+") + + msg = "Only length-1 thousands markers supported" + data = """A|B|C +1|2,334|5 +10|13|10. +""" + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), thousands=",,") + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), thousands="") + + msg = "Only length-1 line terminators supported" + data = "a,b,c~~1,2,3~~4,5,6" + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), lineterminator="~~") + + def test_python_engine(self, python_engine): + from pandas.io.parsers.readers import _python_unsupported as py_unsupported + + data = """1,2,3,, +1,2,3,4, +1,2,3,4,5 +1,2,,, +1,2,3,4,""" + + for default in py_unsupported: + msg = ( + f"The {repr(default)} option is not " + f"supported with the {repr(python_engine)} engine" + ) + + kwargs = {default: object()} + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), engine=python_engine, **kwargs) + + def test_python_engine_file_no_iter(self, python_engine): + # see gh-16530 + class NoNextBuffer: + def __init__(self, csv_data): + self.data = csv_data + + def __next__(self): + return self.data.__next__() + + def read(self): + return self.data + + def readline(self): + return self.data + + data = "a\n1" + msg = "'NoNextBuffer' object is not iterable|argument 1 must be an iterator" + + with pytest.raises(TypeError, match=msg): + read_csv(NoNextBuffer(data), engine=python_engine) + + def test_pyarrow_engine(self): + from pandas.io.parsers.readers import _pyarrow_unsupported as pa_unsupported + + data = """1,2,3,, + 1,2,3,4, + 1,2,3,4,5 + 1,2,,, + 1,2,3,4,""" + + for default in pa_unsupported: + msg = ( + f"The {repr(default)} option is not " + f"supported with the 'pyarrow' engine" + ) + kwargs = {default: object()} + default_needs_bool = {"warn_bad_lines", "error_bad_lines"} + if default == "dialect": + kwargs[default] = "excel" # test a random dialect + elif default in default_needs_bool: + kwargs[default] = True + elif default == "on_bad_lines": + kwargs[default] = "warn" + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), engine="pyarrow", **kwargs) + + def test_on_bad_lines_callable_python_only(self, all_parsers): + # GH 5686 + sio = StringIO("a,b\n1,2") + bad_lines_func = lambda x: x + parser = all_parsers + if all_parsers.engine != "python": + msg = "on_bad_line can only be a callable function if engine='python'" + with pytest.raises(ValueError, match=msg): + parser.read_csv(sio, on_bad_lines=bad_lines_func) + else: + parser.read_csv(sio, on_bad_lines=bad_lines_func) + + +def test_close_file_handle_on_invalide_usecols(all_parsers): + # GH 45384 + parser = all_parsers + + error = ValueError + if parser.engine == "pyarrow": + pyarrow = pytest.importorskip("pyarrow") + error = pyarrow.lib.ArrowKeyError + if is_ci_environment() and (is_platform_windows() or is_platform_mac()): + # GH#45547 causes timeouts on windows/mac builds + pytest.skip("GH#45547 causing timeouts on windows/mac builds 2022-01-22") + + with tm.ensure_clean("test.csv") as fname: + Path(fname).write_text("col1,col2\na,b\n1,2") + with tm.assert_produces_warning(False): + with pytest.raises(error, match="col3"): + parser.read_csv(fname, usecols=["col1", "col2", "col3"]) + # unlink fails on windows if file handles still point to it + os.unlink(fname) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..8bb4e14 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/__pycache__/test_parse_dates.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/__pycache__/test_parse_dates.cpython-38.pyc new file mode 100644 index 0000000..ff237bc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/__pycache__/test_parse_dates.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/__pycache__/test_strings.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/__pycache__/test_strings.cpython-38.pyc new file mode 100644 index 0000000..d65028a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/__pycache__/test_strings.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/__pycache__/test_usecols_basic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/__pycache__/test_usecols_basic.cpython-38.pyc new file mode 100644 index 0000000..d0243e5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/__pycache__/test_usecols_basic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/test_parse_dates.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/test_parse_dates.py new file mode 100644 index 0000000..50000da --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/test_parse_dates.py @@ -0,0 +1,155 @@ +""" +Tests the usecols functionality during parsing +for all of the parsers defined in parsers.py +""" +from io import StringIO + +import pytest + +from pandas import ( + DataFrame, + Index, + Timestamp, +) +import pandas._testing as tm + +_msg_validate_usecols_arg = ( + "'usecols' must either be list-like " + "of all strings, all unicode, all " + "integers or a callable." +) +_msg_validate_usecols_names = ( + "Usecols do not match columns, columns expected but not found: {0}" +) + +# TODO(1.4): Change these to xfails whenever parse_dates support(which was +# intentionally disable to keep small PR sizes) is added back +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +@pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]]) +def test_usecols_with_parse_dates(all_parsers, usecols): + # see gh-9755 + data = """a,b,c,d,e +0,1,20140101,0900,4 +0,1,20140102,1000,4""" + parser = all_parsers + parse_dates = [[1, 2]] + + cols = { + "a": [0, 0], + "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")], + } + expected = DataFrame(cols, columns=["c_d", "a"]) + result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates) + tm.assert_frame_equal(result, expected) + + +def test_usecols_with_parse_dates2(all_parsers): + # see gh-13604 + parser = all_parsers + data = """2008-02-07 09:40,1032.43 +2008-02-07 09:50,1042.54 +2008-02-07 10:00,1051.65""" + + names = ["date", "values"] + usecols = names[:] + parse_dates = [0] + + index = Index( + [ + Timestamp("2008-02-07 09:40"), + Timestamp("2008-02-07 09:50"), + Timestamp("2008-02-07 10:00"), + ], + name="date", + ) + cols = {"values": [1032.43, 1042.54, 1051.65]} + expected = DataFrame(cols, index=index) + + result = parser.read_csv( + StringIO(data), + parse_dates=parse_dates, + index_col=0, + usecols=usecols, + header=None, + names=names, + ) + tm.assert_frame_equal(result, expected) + + +def test_usecols_with_parse_dates3(all_parsers): + # see gh-14792 + parser = all_parsers + data = """a,b,c,d,e,f,g,h,i,j +2016/09/21,1,1,2,3,4,5,6,7,8""" + + usecols = list("abcdefghij") + parse_dates = [0] + + cols = { + "a": Timestamp("2016-09-21"), + "b": [1], + "c": [1], + "d": [2], + "e": [3], + "f": [4], + "g": [5], + "h": [6], + "i": [7], + "j": [8], + } + expected = DataFrame(cols, columns=usecols) + + result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates) + tm.assert_frame_equal(result, expected) + + +def test_usecols_with_parse_dates4(all_parsers): + data = "a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8" + usecols = list("abcdefghij") + parse_dates = [[0, 1]] + parser = all_parsers + + cols = { + "a_b": "2016/09/21 1", + "c": [1], + "d": [2], + "e": [3], + "f": [4], + "g": [5], + "h": [6], + "i": [7], + "j": [8], + } + expected = DataFrame(cols, columns=["a_b"] + list("cdefghij")) + + result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]]) +@pytest.mark.parametrize( + "names", + [ + list("abcde"), # Names span all columns in original data. + list("acd"), # Names span only the selected columns. + ], +) +def test_usecols_with_parse_dates_and_names(all_parsers, usecols, names): + # see gh-9755 + s = """0,1,20140101,0900,4 +0,1,20140102,1000,4""" + parse_dates = [[1, 2]] + parser = all_parsers + + cols = { + "a": [0, 0], + "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")], + } + expected = DataFrame(cols, columns=["c_d", "a"]) + + result = parser.read_csv( + StringIO(s), names=names, parse_dates=parse_dates, usecols=usecols + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/test_strings.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/test_strings.py new file mode 100644 index 0000000..8cecf1f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/test_strings.py @@ -0,0 +1,97 @@ +""" +Tests the usecols functionality during parsing +for all of the parsers defined in parsers.py +""" +from io import StringIO + +import pytest + +from pandas import DataFrame +import pandas._testing as tm + +_msg_validate_usecols_arg = ( + "'usecols' must either be list-like " + "of all strings, all unicode, all " + "integers or a callable." +) +_msg_validate_usecols_names = ( + "Usecols do not match columns, columns expected but not found: {0}" +) + + +def test_usecols_with_unicode_strings(all_parsers): + # see gh-13219 + data = """AAA,BBB,CCC,DDD +0.056674973,8,True,a +2.613230982,2,False,b +3.568935038,7,False,a""" + parser = all_parsers + + exp_data = { + "AAA": { + 0: 0.056674972999999997, + 1: 2.6132309819999997, + 2: 3.5689350380000002, + }, + "BBB": {0: 8, 1: 2, 2: 7}, + } + expected = DataFrame(exp_data) + + result = parser.read_csv(StringIO(data), usecols=["AAA", "BBB"]) + tm.assert_frame_equal(result, expected) + + +def test_usecols_with_single_byte_unicode_strings(all_parsers): + # see gh-13219 + data = """A,B,C,D +0.056674973,8,True,a +2.613230982,2,False,b +3.568935038,7,False,a""" + parser = all_parsers + + exp_data = { + "A": { + 0: 0.056674972999999997, + 1: 2.6132309819999997, + 2: 3.5689350380000002, + }, + "B": {0: 8, 1: 2, 2: 7}, + } + expected = DataFrame(exp_data) + + result = parser.read_csv(StringIO(data), usecols=["A", "B"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("usecols", [["AAA", b"BBB"], [b"AAA", "BBB"]]) +def test_usecols_with_mixed_encoding_strings(all_parsers, usecols): + data = """AAA,BBB,CCC,DDD +0.056674973,8,True,a +2.613230982,2,False,b +3.568935038,7,False,a""" + parser = all_parsers + + with pytest.raises(ValueError, match=_msg_validate_usecols_arg): + parser.read_csv(StringIO(data), usecols=usecols) + + +@pytest.mark.parametrize("usecols", [["あああ", "いい"], ["あああ", "いい"]]) +def test_usecols_with_multi_byte_characters(all_parsers, usecols): + data = """あああ,いい,ううう,ええええ +0.056674973,8,True,a +2.613230982,2,False,b +3.568935038,7,False,a""" + parser = all_parsers + + exp_data = { + "あああ": { + 0: 0.056674972999999997, + 1: 2.6132309819999997, + 2: 3.5689350380000002, + }, + "いい": {0: 8, 1: 2, 2: 7}, + } + expected = DataFrame(exp_data) + + result = parser.read_csv(StringIO(data), usecols=usecols) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/test_usecols_basic.py b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/test_usecols_basic.py new file mode 100644 index 0000000..f35caf3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/parser/usecols/test_usecols_basic.py @@ -0,0 +1,418 @@ +""" +Tests the usecols functionality during parsing +for all of the parsers defined in parsers.py +""" +from io import StringIO + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, +) +import pandas._testing as tm + +_msg_validate_usecols_arg = ( + "'usecols' must either be list-like " + "of all strings, all unicode, all " + "integers or a callable." +) +_msg_validate_usecols_names = ( + "Usecols do not match columns, columns expected but not found: {0}" +) + +# TODO(1.4): Change to xfails at release time +pytestmark = pytest.mark.usefixtures("pyarrow_skip") + + +def test_raise_on_mixed_dtype_usecols(all_parsers): + # See gh-12678 + data = """a,b,c + 1000,2000,3000 + 4000,5000,6000 + """ + usecols = [0, "b", 2] + parser = all_parsers + + with pytest.raises(ValueError, match=_msg_validate_usecols_arg): + parser.read_csv(StringIO(data), usecols=usecols) + + +@pytest.mark.parametrize("usecols", [(1, 2), ("b", "c")]) +def test_usecols(all_parsers, usecols): + data = """\ +a,b,c +1,2,3 +4,5,6 +7,8,9 +10,11,12""" + parser = all_parsers + result = parser.read_csv(StringIO(data), usecols=usecols) + + expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"]) + tm.assert_frame_equal(result, expected) + + +def test_usecols_with_names(all_parsers): + data = """\ +a,b,c +1,2,3 +4,5,6 +7,8,9 +10,11,12""" + parser = all_parsers + names = ["foo", "bar"] + result = parser.read_csv(StringIO(data), names=names, usecols=[1, 2], header=0) + + expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=names) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "names,usecols", [(["b", "c"], [1, 2]), (["a", "b", "c"], ["b", "c"])] +) +def test_usecols_relative_to_names(all_parsers, names, usecols): + data = """\ +1,2,3 +4,5,6 +7,8,9 +10,11,12""" + parser = all_parsers + result = parser.read_csv(StringIO(data), names=names, header=None, usecols=usecols) + + expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"]) + tm.assert_frame_equal(result, expected) + + +def test_usecols_relative_to_names2(all_parsers): + # see gh-5766 + data = """\ +1,2,3 +4,5,6 +7,8,9 +10,11,12""" + parser = all_parsers + result = parser.read_csv( + StringIO(data), names=["a", "b"], header=None, usecols=[0, 1] + ) + + expected = DataFrame([[1, 2], [4, 5], [7, 8], [10, 11]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + +def test_usecols_name_length_conflict(all_parsers): + data = """\ +1,2,3 +4,5,6 +7,8,9 +10,11,12""" + parser = all_parsers + msg = "Number of passed names did not match number of header fields in the file" + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), names=["a", "b"], header=None, usecols=[1]) + + +def test_usecols_single_string(all_parsers): + # see gh-20558 + parser = all_parsers + data = """foo, bar, baz +1000, 2000, 3000 +4000, 5000, 6000""" + + with pytest.raises(ValueError, match=_msg_validate_usecols_arg): + parser.read_csv(StringIO(data), usecols="foo") + + +@pytest.mark.parametrize( + "data", ["a,b,c,d\n1,2,3,4\n5,6,7,8", "a,b,c,d\n1,2,3,4,\n5,6,7,8,"] +) +def test_usecols_index_col_false(all_parsers, data): + # see gh-9082 + parser = all_parsers + usecols = ["a", "c", "d"] + expected = DataFrame({"a": [1, 5], "c": [3, 7], "d": [4, 8]}) + + result = parser.read_csv(StringIO(data), usecols=usecols, index_col=False) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("index_col", ["b", 0]) +@pytest.mark.parametrize("usecols", [["b", "c"], [1, 2]]) +def test_usecols_index_col_conflict(all_parsers, usecols, index_col): + # see gh-4201: test that index_col as integer reflects usecols + parser = all_parsers + data = "a,b,c,d\nA,a,1,one\nB,b,2,two" + expected = DataFrame({"c": [1, 2]}, index=Index(["a", "b"], name="b")) + + result = parser.read_csv(StringIO(data), usecols=usecols, index_col=index_col) + tm.assert_frame_equal(result, expected) + + +def test_usecols_index_col_conflict2(all_parsers): + # see gh-4201: test that index_col as integer reflects usecols + parser = all_parsers + data = "a,b,c,d\nA,a,1,one\nB,b,2,two" + + expected = DataFrame({"b": ["a", "b"], "c": [1, 2], "d": ("one", "two")}) + expected = expected.set_index(["b", "c"]) + + result = parser.read_csv( + StringIO(data), usecols=["b", "c", "d"], index_col=["b", "c"] + ) + tm.assert_frame_equal(result, expected) + + +def test_usecols_implicit_index_col(all_parsers): + # see gh-2654 + parser = all_parsers + data = "a,b,c\n4,apple,bat,5.7\n8,orange,cow,10" + + result = parser.read_csv(StringIO(data), usecols=["a", "b"]) + expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8]) + tm.assert_frame_equal(result, expected) + + +def test_usecols_index_col_middle(all_parsers): + # GH#9098 + parser = all_parsers + data = """a,b,c,d +1,2,3,4 +""" + result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"], index_col="c") + expected = DataFrame({"b": [2], "d": [4]}, index=Index([3], name="c")) + tm.assert_frame_equal(result, expected) + + +def test_usecols_index_col_end(all_parsers): + # GH#9098 + parser = all_parsers + data = """a,b,c,d +1,2,3,4 +""" + result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"], index_col="d") + expected = DataFrame({"b": [2], "c": [3]}, index=Index([4], name="d")) + tm.assert_frame_equal(result, expected) + + +def test_usecols_regex_sep(all_parsers): + # see gh-2733 + parser = all_parsers + data = "a b c\n4 apple bat 5.7\n8 orange cow 10" + result = parser.read_csv(StringIO(data), sep=r"\s+", usecols=("a", "b")) + + expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8]) + tm.assert_frame_equal(result, expected) + + +def test_usecols_with_whitespace(all_parsers): + parser = all_parsers + data = "a b c\n4 apple bat 5.7\n8 orange cow 10" + + result = parser.read_csv(StringIO(data), delim_whitespace=True, usecols=("a", "b")) + expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "usecols,expected", + [ + # Column selection by index. + ([0, 1], DataFrame(data=[[1000, 2000], [4000, 5000]], columns=["2", "0"])), + # Column selection by name. + ( + ["0", "1"], + DataFrame(data=[[2000, 3000], [5000, 6000]], columns=["0", "1"]), + ), + ], +) +def test_usecols_with_integer_like_header(all_parsers, usecols, expected): + parser = all_parsers + data = """2,0,1 +1000,2000,3000 +4000,5000,6000""" + + result = parser.read_csv(StringIO(data), usecols=usecols) + tm.assert_frame_equal(result, expected) + + +def test_empty_usecols(all_parsers): + data = "a,b,c\n1,2,3\n4,5,6" + expected = DataFrame() + parser = all_parsers + + result = parser.read_csv(StringIO(data), usecols=set()) + tm.assert_frame_equal(result, expected) + + +def test_np_array_usecols(all_parsers): + # see gh-12546 + parser = all_parsers + data = "a,b,c\n1,2,3" + usecols = np.array(["a", "b"]) + + expected = DataFrame([[1, 2]], columns=usecols) + result = parser.read_csv(StringIO(data), usecols=usecols) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "usecols,expected", + [ + ( + lambda x: x.upper() in ["AAA", "BBB", "DDD"], + DataFrame( + { + "AaA": { + 0: 0.056674972999999997, + 1: 2.6132309819999997, + 2: 3.5689350380000002, + }, + "bBb": {0: 8, 1: 2, 2: 7}, + "ddd": {0: "a", 1: "b", 2: "a"}, + } + ), + ), + (lambda x: False, DataFrame()), + ], +) +def test_callable_usecols(all_parsers, usecols, expected): + # see gh-14154 + data = """AaA,bBb,CCC,ddd +0.056674973,8,True,a +2.613230982,2,False,b +3.568935038,7,False,a""" + parser = all_parsers + + result = parser.read_csv(StringIO(data), usecols=usecols) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("usecols", [["a", "c"], lambda x: x in ["a", "c"]]) +def test_incomplete_first_row(all_parsers, usecols): + # see gh-6710 + data = "1,2\n1,2,3" + parser = all_parsers + names = ["a", "b", "c"] + expected = DataFrame({"a": [1, 1], "c": [np.nan, 3]}) + + result = parser.read_csv(StringIO(data), names=names, usecols=usecols) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,usecols,kwargs,expected", + [ + # see gh-8985 + ( + "19,29,39\n" * 2 + "10,20,30,40", + [0, 1, 2], + {"header": None}, + DataFrame([[19, 29, 39], [19, 29, 39], [10, 20, 30]]), + ), + # see gh-9549 + ( + ("A,B,C\n1,2,3\n3,4,5\n1,2,4,5,1,6\n1,2,3,,,1,\n1,2,3\n5,6,7"), + ["A", "B", "C"], + {}, + DataFrame( + { + "A": [1, 3, 1, 1, 1, 5], + "B": [2, 4, 2, 2, 2, 6], + "C": [3, 5, 4, 3, 3, 7], + } + ), + ), + ], +) +def test_uneven_length_cols(all_parsers, data, usecols, kwargs, expected): + # see gh-8985 + parser = all_parsers + result = parser.read_csv(StringIO(data), usecols=usecols, **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "usecols,kwargs,expected,msg", + [ + ( + ["a", "b", "c", "d"], + {}, + DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}), + None, + ), + ( + ["a", "b", "c", "f"], + {}, + None, + _msg_validate_usecols_names.format(r"\['f'\]"), + ), + (["a", "b", "f"], {}, None, _msg_validate_usecols_names.format(r"\['f'\]")), + ( + ["a", "b", "f", "g"], + {}, + None, + _msg_validate_usecols_names.format(r"\[('f', 'g'|'g', 'f')\]"), + ), + # see gh-14671 + ( + None, + {"header": 0, "names": ["A", "B", "C", "D"]}, + DataFrame({"A": [1, 5], "B": [2, 6], "C": [3, 7], "D": [4, 8]}), + None, + ), + ( + ["A", "B", "C", "f"], + {"header": 0, "names": ["A", "B", "C", "D"]}, + None, + _msg_validate_usecols_names.format(r"\['f'\]"), + ), + ( + ["A", "B", "f"], + {"names": ["A", "B", "C", "D"]}, + None, + _msg_validate_usecols_names.format(r"\['f'\]"), + ), + ], +) +def test_raises_on_usecols_names_mismatch(all_parsers, usecols, kwargs, expected, msg): + data = "a,b,c,d\n1,2,3,4\n5,6,7,8" + kwargs.update(usecols=usecols) + parser = all_parsers + + if expected is None: + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + else: + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("usecols", [["A", "C"], [0, 2]]) +def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols): + data = "a,b,c,d\n1,2,3,4\n5,6,7,8" + names = ["A", "B", "C", "D"] + parser = all_parsers + + result = parser.read_csv(StringIO(data), header=0, names=names, usecols=usecols) + expected = DataFrame({"A": [1, 5], "C": [3, 7]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("names", [None, ["a", "b"]]) +def test_usecols_indices_out_of_bounds(all_parsers, names): + # GH#25623 + parser = all_parsers + data = """ +a,b +1,2 + """ + with tm.assert_produces_warning( + FutureWarning, check_stacklevel=False, raise_on_extra_warnings=False + ): + result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0) + expected = DataFrame({"a": [1], "b": [None]}) + if names is None and parser.engine == "python": + expected = DataFrame({"a": [1]}) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__init__.py new file mode 100644 index 0000000..cbf848a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__init__.py @@ -0,0 +1,15 @@ +import pytest + +pytestmark = [ + # pytables https://github.com/PyTables/PyTables/issues/822 + pytest.mark.filterwarnings( + "ignore:a closed node found in the registry:UserWarning" + ), + pytest.mark.filterwarnings(r"ignore:tostring\(\) is deprecated:DeprecationWarning"), + pytest.mark.filterwarnings( + r"ignore:`np\.object` is a deprecated alias:DeprecationWarning" + ), + pytest.mark.filterwarnings( + r"ignore:`np\.bool` is a deprecated alias:DeprecationWarning" + ), +] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ff1c8d8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..a595750 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..5153fed Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_append.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_append.cpython-38.pyc new file mode 100644 index 0000000..2eddc40 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_append.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_categorical.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_categorical.cpython-38.pyc new file mode 100644 index 0000000..d66c6ce Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_categorical.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_compat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_compat.cpython-38.pyc new file mode 100644 index 0000000..5854ccf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_compat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_complex.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_complex.cpython-38.pyc new file mode 100644 index 0000000..b27c6ca Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_complex.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_errors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_errors.cpython-38.pyc new file mode 100644 index 0000000..ee410df Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_errors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_file_handling.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_file_handling.cpython-38.pyc new file mode 100644 index 0000000..1d87e11 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_file_handling.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_keys.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_keys.cpython-38.pyc new file mode 100644 index 0000000..4b33ff4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_keys.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_put.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_put.cpython-38.pyc new file mode 100644 index 0000000..79a3afc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_put.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_pytables_missing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_pytables_missing.cpython-38.pyc new file mode 100644 index 0000000..29c6840 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_pytables_missing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_read.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_read.cpython-38.pyc new file mode 100644 index 0000000..51290f6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_read.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_retain_attributes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_retain_attributes.cpython-38.pyc new file mode 100644 index 0000000..9036a92 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_retain_attributes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_round_trip.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_round_trip.cpython-38.pyc new file mode 100644 index 0000000..c72ea05 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_round_trip.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_select.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_select.cpython-38.pyc new file mode 100644 index 0000000..2b7c4ad Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_select.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_store.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_store.cpython-38.pyc new file mode 100644 index 0000000..4857bb8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_store.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_subclass.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_subclass.cpython-38.pyc new file mode 100644 index 0000000..1637522 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_subclass.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_time_series.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_time_series.cpython-38.pyc new file mode 100644 index 0000000..552c0e2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_time_series.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_timezones.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_timezones.cpython-38.pyc new file mode 100644 index 0000000..d090396 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/__pycache__/test_timezones.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/common.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/common.py new file mode 100644 index 0000000..67c3a29 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/common.py @@ -0,0 +1,84 @@ +from contextlib import contextmanager +import os +import tempfile + +import pytest + +from pandas.io.pytables import HDFStore + +tables = pytest.importorskip("tables") +# set these parameters so we don't have file sharing +tables.parameters.MAX_NUMEXPR_THREADS = 1 +tables.parameters.MAX_BLOSC_THREADS = 1 +tables.parameters.MAX_THREADS = 1 + + +def safe_remove(path): + if path is not None: + try: + os.remove(path) # noqa: PDF008 + except OSError: + pass + + +def safe_close(store): + try: + if store is not None: + store.close() + except OSError: + pass + + +def create_tempfile(path): + """create an unopened named temporary file""" + return os.path.join(tempfile.gettempdir(), path) + + +# contextmanager to ensure the file cleanup +@contextmanager +def ensure_clean_store(path, mode="a", complevel=None, complib=None, fletcher32=False): + + try: + + # put in the temporary path if we don't have one already + if not len(os.path.dirname(path)): + path = create_tempfile(path) + + store = HDFStore( + path, mode=mode, complevel=complevel, complib=complib, fletcher32=False + ) + yield store + finally: + safe_close(store) + if mode == "w" or mode == "a": + safe_remove(path) + + +@contextmanager +def ensure_clean_path(path): + """ + return essentially a named temporary file that is not opened + and deleted on exiting; if path is a list, then create and + return list of filenames + """ + try: + if isinstance(path, list): + filenames = [create_tempfile(p) for p in path] + yield filenames + else: + filenames = [create_tempfile(path)] + yield filenames[0] + finally: + for f in filenames: + safe_remove(f) + + +def _maybe_remove(store, key): + """ + For tests using tables, try removing the table to be sure there is + no content from previous tests using the same table name. + """ + try: + store.remove(key) + except (ValueError, KeyError): + pass diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/conftest.py new file mode 100644 index 0000000..988f78c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/conftest.py @@ -0,0 +1,17 @@ +import pytest + +import pandas._testing as tm + + +@pytest.fixture +def setup_path(): + """Fixture for setup path""" + return f"tmp.__{tm.rands(10)}__.h5" + + +@pytest.fixture(scope="module", autouse=True) +def setup_mode(): + """Reset testing mode fixture""" + tm.reset_testing_mode() + yield + tm.set_testing_mode() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_append.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_append.py new file mode 100644 index 0000000..5544b81 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_append.py @@ -0,0 +1,940 @@ +import datetime +from datetime import timedelta +import re +from warnings import catch_warnings + +import numpy as np +import pytest + +from pandas._libs.tslibs import Timestamp +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Series, + _testing as tm, + concat, + date_range, + read_hdf, +) +from pandas.tests.io.pytables.common import ( + _maybe_remove, + ensure_clean_path, + ensure_clean_store, +) + +pytestmark = pytest.mark.single + + +@pytest.mark.filterwarnings("ignore:object name:tables.exceptions.NaturalNameWarning") +def test_append(setup_path): + + with ensure_clean_store(setup_path) as store: + + # this is allowed by almost always don't want to do it + # tables.NaturalNameWarning): + with catch_warnings(record=True): + + df = tm.makeTimeDataFrame() + _maybe_remove(store, "df1") + store.append("df1", df[:10]) + store.append("df1", df[10:]) + tm.assert_frame_equal(store["df1"], df) + + _maybe_remove(store, "df2") + store.put("df2", df[:10], format="table") + store.append("df2", df[10:]) + tm.assert_frame_equal(store["df2"], df) + + _maybe_remove(store, "df3") + store.append("/df3", df[:10]) + store.append("/df3", df[10:]) + tm.assert_frame_equal(store["df3"], df) + + # this is allowed by almost always don't want to do it + # tables.NaturalNameWarning + _maybe_remove(store, "/df3 foo") + store.append("/df3 foo", df[:10]) + store.append("/df3 foo", df[10:]) + tm.assert_frame_equal(store["df3 foo"], df) + + # dtype issues - mizxed type in a single object column + df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]]) + df["mixed_column"] = "testing" + df.loc[2, "mixed_column"] = np.nan + _maybe_remove(store, "df") + store.append("df", df) + tm.assert_frame_equal(store["df"], df) + + # uints - test storage of uints + uint_data = DataFrame( + { + "u08": Series( + np.random.randint(0, high=255, size=5), dtype=np.uint8 + ), + "u16": Series( + np.random.randint(0, high=65535, size=5), dtype=np.uint16 + ), + "u32": Series( + np.random.randint(0, high=2 ** 30, size=5), dtype=np.uint32 + ), + "u64": Series( + [2 ** 58, 2 ** 59, 2 ** 60, 2 ** 61, 2 ** 62], + dtype=np.uint64, + ), + }, + index=np.arange(5), + ) + _maybe_remove(store, "uints") + store.append("uints", uint_data) + tm.assert_frame_equal(store["uints"], uint_data) + + # uints - test storage of uints in indexable columns + _maybe_remove(store, "uints") + # 64-bit indices not yet supported + store.append("uints", uint_data, data_columns=["u08", "u16", "u32"]) + tm.assert_frame_equal(store["uints"], uint_data) + + +def test_append_series(setup_path): + + with ensure_clean_store(setup_path) as store: + + # basic + ss = tm.makeStringSeries() + ts = tm.makeTimeSeries() + ns = Series(np.arange(100)) + + store.append("ss", ss) + result = store["ss"] + tm.assert_series_equal(result, ss) + assert result.name is None + + store.append("ts", ts) + result = store["ts"] + tm.assert_series_equal(result, ts) + assert result.name is None + + ns.name = "foo" + store.append("ns", ns) + result = store["ns"] + tm.assert_series_equal(result, ns) + assert result.name == ns.name + + # select on the values + expected = ns[ns > 60] + result = store.select("ns", "foo>60") + tm.assert_series_equal(result, expected) + + # select on the index and values + expected = ns[(ns > 70) & (ns.index < 90)] + result = store.select("ns", "foo>70 and index<90") + tm.assert_series_equal(result, expected) + + # multi-index + mi = DataFrame(np.random.randn(5, 1), columns=["A"]) + mi["B"] = np.arange(len(mi)) + mi["C"] = "foo" + mi.loc[3:5, "C"] = "bar" + mi.set_index(["C", "B"], inplace=True) + s = mi.stack() + s.index = s.index.droplevel(2) + store.append("mi", s) + tm.assert_series_equal(store["mi"], s) + + +def test_append_some_nans(setup_path): + + with ensure_clean_store(setup_path) as store: + df = DataFrame( + { + "A": Series(np.random.randn(20)).astype("int32"), + "A1": np.random.randn(20), + "A2": np.random.randn(20), + "B": "foo", + "C": "bar", + "D": Timestamp("20010101"), + "E": datetime.datetime(2001, 1, 2, 0, 0), + }, + index=np.arange(20), + ) + # some nans + _maybe_remove(store, "df1") + df.loc[0:15, ["A1", "B", "D", "E"]] = np.nan + store.append("df1", df[:10]) + store.append("df1", df[10:]) + tm.assert_frame_equal(store["df1"], df) + + # first column + df1 = df.copy() + df1.loc[:, "A1"] = np.nan + _maybe_remove(store, "df1") + store.append("df1", df1[:10]) + store.append("df1", df1[10:]) + tm.assert_frame_equal(store["df1"], df1) + + # 2nd column + df2 = df.copy() + df2.loc[:, "A2"] = np.nan + _maybe_remove(store, "df2") + store.append("df2", df2[:10]) + store.append("df2", df2[10:]) + tm.assert_frame_equal(store["df2"], df2) + + # datetimes + df3 = df.copy() + df3.loc[:, "E"] = np.nan + _maybe_remove(store, "df3") + store.append("df3", df3[:10]) + store.append("df3", df3[10:]) + tm.assert_frame_equal(store["df3"], df3) + + +def test_append_all_nans(setup_path): + + with ensure_clean_store(setup_path) as store: + + df = DataFrame( + {"A1": np.random.randn(20), "A2": np.random.randn(20)}, + index=np.arange(20), + ) + df.loc[0:15, :] = np.nan + + # nan some entire rows (dropna=True) + _maybe_remove(store, "df") + store.append("df", df[:10], dropna=True) + store.append("df", df[10:], dropna=True) + tm.assert_frame_equal(store["df"], df[-4:]) + + # nan some entire rows (dropna=False) + _maybe_remove(store, "df2") + store.append("df2", df[:10], dropna=False) + store.append("df2", df[10:], dropna=False) + tm.assert_frame_equal(store["df2"], df) + + # tests the option io.hdf.dropna_table + pd.set_option("io.hdf.dropna_table", False) + _maybe_remove(store, "df3") + store.append("df3", df[:10]) + store.append("df3", df[10:]) + tm.assert_frame_equal(store["df3"], df) + + pd.set_option("io.hdf.dropna_table", True) + _maybe_remove(store, "df4") + store.append("df4", df[:10]) + store.append("df4", df[10:]) + tm.assert_frame_equal(store["df4"], df[-4:]) + + # nan some entire rows (string are still written!) + df = DataFrame( + { + "A1": np.random.randn(20), + "A2": np.random.randn(20), + "B": "foo", + "C": "bar", + }, + index=np.arange(20), + ) + + df.loc[0:15, :] = np.nan + + _maybe_remove(store, "df") + store.append("df", df[:10], dropna=True) + store.append("df", df[10:], dropna=True) + tm.assert_frame_equal(store["df"], df) + + _maybe_remove(store, "df2") + store.append("df2", df[:10], dropna=False) + store.append("df2", df[10:], dropna=False) + tm.assert_frame_equal(store["df2"], df) + + # nan some entire rows (but since we have dates they are still + # written!) + df = DataFrame( + { + "A1": np.random.randn(20), + "A2": np.random.randn(20), + "B": "foo", + "C": "bar", + "D": Timestamp("20010101"), + "E": datetime.datetime(2001, 1, 2, 0, 0), + }, + index=np.arange(20), + ) + + df.loc[0:15, :] = np.nan + + _maybe_remove(store, "df") + store.append("df", df[:10], dropna=True) + store.append("df", df[10:], dropna=True) + tm.assert_frame_equal(store["df"], df) + + _maybe_remove(store, "df2") + store.append("df2", df[:10], dropna=False) + store.append("df2", df[10:], dropna=False) + tm.assert_frame_equal(store["df2"], df) + + +def test_append_frame_column_oriented(setup_path): + with ensure_clean_store(setup_path) as store: + + # column oriented + df = tm.makeTimeDataFrame() + df.index = df.index._with_freq(None) # freq doesn't round-trip + + _maybe_remove(store, "df1") + store.append("df1", df.iloc[:, :2], axes=["columns"]) + store.append("df1", df.iloc[:, 2:]) + tm.assert_frame_equal(store["df1"], df) + + result = store.select("df1", "columns=A") + expected = df.reindex(columns=["A"]) + tm.assert_frame_equal(expected, result) + + # selection on the non-indexable + result = store.select("df1", ("columns=A", "index=df.index[0:4]")) + expected = df.reindex(columns=["A"], index=df.index[0:4]) + tm.assert_frame_equal(expected, result) + + # this isn't supported + msg = re.escape( + "passing a filterable condition to a non-table indexer " + "[Filter: Not Initialized]" + ) + with pytest.raises(TypeError, match=msg): + store.select("df1", "columns=A and index>df.index[4]") + + +def test_append_with_different_block_ordering(setup_path): + + # GH 4096; using same frames, but different block orderings + with ensure_clean_store(setup_path) as store: + + for i in range(10): + + df = DataFrame(np.random.randn(10, 2), columns=list("AB")) + df["index"] = range(10) + df["index"] += i * 10 + df["int64"] = Series([1] * len(df), dtype="int64") + df["int16"] = Series([1] * len(df), dtype="int16") + + if i % 2 == 0: + del df["int64"] + df["int64"] = Series([1] * len(df), dtype="int64") + if i % 3 == 0: + a = df.pop("A") + df["A"] = a + + df.set_index("index", inplace=True) + + store.append("df", df) + + # test a different ordering but with more fields (like invalid + # combinations) + with ensure_clean_store(setup_path) as store: + + df = DataFrame(np.random.randn(10, 2), columns=list("AB"), dtype="float64") + df["int64"] = Series([1] * len(df), dtype="int64") + df["int16"] = Series([1] * len(df), dtype="int16") + store.append("df", df) + + # store additional fields in different blocks + df["int16_2"] = Series([1] * len(df), dtype="int16") + msg = re.escape( + "cannot match existing table structure for [int16] on appending data" + ) + with pytest.raises(ValueError, match=msg): + store.append("df", df) + + # store multiple additional fields in different blocks + df["float_3"] = Series([1.0] * len(df), dtype="float64") + msg = re.escape( + "cannot match existing table structure for [A,B] on appending data" + ) + with pytest.raises(ValueError, match=msg): + store.append("df", df) + + +def test_append_with_strings(setup_path): + + with ensure_clean_store(setup_path) as store: + with catch_warnings(record=True): + + def check_col(key, name, size): + assert ( + getattr(store.get_storer(key).table.description, name).itemsize + == size + ) + + # avoid truncation on elements + df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]]) + store.append("df_big", df) + tm.assert_frame_equal(store.select("df_big"), df) + check_col("df_big", "values_block_1", 15) + + # appending smaller string ok + df2 = DataFrame([[124, "asdqy"], [346, "dggnhefbdfb"]]) + store.append("df_big", df2) + expected = concat([df, df2]) + tm.assert_frame_equal(store.select("df_big"), expected) + check_col("df_big", "values_block_1", 15) + + # avoid truncation on elements + df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]]) + store.append("df_big2", df, min_itemsize={"values": 50}) + tm.assert_frame_equal(store.select("df_big2"), df) + check_col("df_big2", "values_block_1", 50) + + # bigger string on next append + store.append("df_new", df) + df_new = DataFrame( + [[124, "abcdefqhij"], [346, "abcdefghijklmnopqrtsuvwxyz"]] + ) + msg = ( + r"Trying to store a string with len \[26\] in " + r"\[values_block_1\] column but\n" + r"this column has a limit of \[15\]!\n" + "Consider using min_itemsize to preset the sizes on these " + "columns" + ) + with pytest.raises(ValueError, match=msg): + store.append("df_new", df_new) + + # min_itemsize on Series index (GH 11412) + df = tm.makeMixedDataFrame().set_index("C") + store.append("ss", df["B"], min_itemsize={"index": 4}) + tm.assert_series_equal(store.select("ss"), df["B"]) + + # same as above, with data_columns=True + store.append("ss2", df["B"], data_columns=True, min_itemsize={"index": 4}) + tm.assert_series_equal(store.select("ss2"), df["B"]) + + # min_itemsize in index without appending (GH 10381) + store.put("ss3", df, format="table", min_itemsize={"index": 6}) + # just make sure there is a longer string: + df2 = df.copy().reset_index().assign(C="longer").set_index("C") + store.append("ss3", df2) + tm.assert_frame_equal(store.select("ss3"), concat([df, df2])) + + # same as above, with a Series + store.put("ss4", df["B"], format="table", min_itemsize={"index": 6}) + store.append("ss4", df2["B"]) + tm.assert_series_equal(store.select("ss4"), concat([df["B"], df2["B"]])) + + # with nans + _maybe_remove(store, "df") + df = tm.makeTimeDataFrame() + df["string"] = "foo" + df.loc[df.index[1:4], "string"] = np.nan + df["string2"] = "bar" + df.loc[df.index[4:8], "string2"] = np.nan + df["string3"] = "bah" + df.loc[df.index[1:], "string3"] = np.nan + store.append("df", df) + result = store.select("df") + tm.assert_frame_equal(result, df) + + with ensure_clean_store(setup_path) as store: + + def check_col(key, name, size): + assert getattr(store.get_storer(key).table.description, name).itemsize, size + + df = DataFrame({"A": "foo", "B": "bar"}, index=range(10)) + + # a min_itemsize that creates a data_column + _maybe_remove(store, "df") + store.append("df", df, min_itemsize={"A": 200}) + check_col("df", "A", 200) + assert store.get_storer("df").data_columns == ["A"] + + # a min_itemsize that creates a data_column2 + _maybe_remove(store, "df") + store.append("df", df, data_columns=["B"], min_itemsize={"A": 200}) + check_col("df", "A", 200) + assert store.get_storer("df").data_columns == ["B", "A"] + + # a min_itemsize that creates a data_column2 + _maybe_remove(store, "df") + store.append("df", df, data_columns=["B"], min_itemsize={"values": 200}) + check_col("df", "B", 200) + check_col("df", "values_block_0", 200) + assert store.get_storer("df").data_columns == ["B"] + + # infer the .typ on subsequent appends + _maybe_remove(store, "df") + store.append("df", df[:5], min_itemsize=200) + store.append("df", df[5:], min_itemsize=200) + tm.assert_frame_equal(store["df"], df) + + # invalid min_itemsize keys + df = DataFrame(["foo", "foo", "foo", "barh", "barh", "barh"], columns=["A"]) + _maybe_remove(store, "df") + msg = re.escape( + "min_itemsize has the key [foo] which is not an axis or data_column" + ) + with pytest.raises(ValueError, match=msg): + store.append("df", df, min_itemsize={"foo": 20, "foobar": 20}) + + +def test_append_with_empty_string(setup_path): + + with ensure_clean_store(setup_path) as store: + + # with all empty strings (GH 12242) + df = DataFrame({"x": ["a", "b", "c", "d", "e", "f", ""]}) + store.append("df", df[:-1], min_itemsize={"x": 1}) + store.append("df", df[-1:], min_itemsize={"x": 1}) + tm.assert_frame_equal(store.select("df"), df) + + +def test_append_with_data_columns(setup_path): + + with ensure_clean_store(setup_path) as store: + df = tm.makeTimeDataFrame() + df.iloc[0, df.columns.get_loc("B")] = 1.0 + _maybe_remove(store, "df") + store.append("df", df[:2], data_columns=["B"]) + store.append("df", df[2:]) + tm.assert_frame_equal(store["df"], df) + + # check that we have indices created + assert store._handle.root.df.table.cols.index.is_indexed is True + assert store._handle.root.df.table.cols.B.is_indexed is True + + # data column searching + result = store.select("df", "B>0") + expected = df[df.B > 0] + tm.assert_frame_equal(result, expected) + + # data column searching (with an indexable and a data_columns) + result = store.select("df", "B>0 and index>df.index[3]") + df_new = df.reindex(index=df.index[4:]) + expected = df_new[df_new.B > 0] + tm.assert_frame_equal(result, expected) + + # data column selection with a string data_column + df_new = df.copy() + df_new["string"] = "foo" + df_new.loc[df_new.index[1:4], "string"] = np.nan + df_new.loc[df_new.index[5:6], "string"] = "bar" + _maybe_remove(store, "df") + store.append("df", df_new, data_columns=["string"]) + result = store.select("df", "string='foo'") + expected = df_new[df_new.string == "foo"] + tm.assert_frame_equal(result, expected) + + # using min_itemsize and a data column + def check_col(key, name, size): + assert ( + getattr(store.get_storer(key).table.description, name).itemsize == size + ) + + with ensure_clean_store(setup_path) as store: + _maybe_remove(store, "df") + store.append("df", df_new, data_columns=["string"], min_itemsize={"string": 30}) + check_col("df", "string", 30) + _maybe_remove(store, "df") + store.append("df", df_new, data_columns=["string"], min_itemsize=30) + check_col("df", "string", 30) + _maybe_remove(store, "df") + store.append("df", df_new, data_columns=["string"], min_itemsize={"values": 30}) + check_col("df", "string", 30) + + with ensure_clean_store(setup_path) as store: + df_new["string2"] = "foobarbah" + df_new["string_block1"] = "foobarbah1" + df_new["string_block2"] = "foobarbah2" + _maybe_remove(store, "df") + store.append( + "df", + df_new, + data_columns=["string", "string2"], + min_itemsize={"string": 30, "string2": 40, "values": 50}, + ) + check_col("df", "string", 30) + check_col("df", "string2", 40) + check_col("df", "values_block_1", 50) + + with ensure_clean_store(setup_path) as store: + # multiple data columns + df_new = df.copy() + df_new.iloc[0, df_new.columns.get_loc("A")] = 1.0 + df_new.iloc[0, df_new.columns.get_loc("B")] = -1.0 + df_new["string"] = "foo" + + sl = df_new.columns.get_loc("string") + df_new.iloc[1:4, sl] = np.nan + df_new.iloc[5:6, sl] = "bar" + + df_new["string2"] = "foo" + sl = df_new.columns.get_loc("string2") + df_new.iloc[2:5, sl] = np.nan + df_new.iloc[7:8, sl] = "bar" + _maybe_remove(store, "df") + store.append("df", df_new, data_columns=["A", "B", "string", "string2"]) + result = store.select("df", "string='foo' and string2='foo' and A>0 and B<0") + expected = df_new[ + (df_new.string == "foo") + & (df_new.string2 == "foo") + & (df_new.A > 0) + & (df_new.B < 0) + ] + tm.assert_frame_equal(result, expected, check_freq=False) + # FIXME: 2020-05-07 freq check randomly fails in the CI + + # yield an empty frame + result = store.select("df", "string='foo' and string2='cool'") + expected = df_new[(df_new.string == "foo") & (df_new.string2 == "cool")] + tm.assert_frame_equal(result, expected) + + with ensure_clean_store(setup_path) as store: + # doc example + df_dc = df.copy() + df_dc["string"] = "foo" + df_dc.loc[df_dc.index[4:6], "string"] = np.nan + df_dc.loc[df_dc.index[7:9], "string"] = "bar" + df_dc["string2"] = "cool" + df_dc["datetime"] = Timestamp("20010102") + df_dc = df_dc._convert(datetime=True) + df_dc.loc[df_dc.index[3:5], ["A", "B", "datetime"]] = np.nan + + _maybe_remove(store, "df_dc") + store.append( + "df_dc", df_dc, data_columns=["B", "C", "string", "string2", "datetime"] + ) + result = store.select("df_dc", "B>0") + + expected = df_dc[df_dc.B > 0] + tm.assert_frame_equal(result, expected) + + result = store.select("df_dc", ["B > 0", "C > 0", "string == foo"]) + expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")] + tm.assert_frame_equal(result, expected, check_freq=False) + # FIXME: 2020-12-07 intermittent build failures here with freq of + # None instead of BDay(4) + + with ensure_clean_store(setup_path) as store: + # doc example part 2 + np.random.seed(1234) + index = date_range("1/1/2000", periods=8) + df_dc = DataFrame(np.random.randn(8, 3), index=index, columns=["A", "B", "C"]) + df_dc["string"] = "foo" + df_dc.loc[df_dc.index[4:6], "string"] = np.nan + df_dc.loc[df_dc.index[7:9], "string"] = "bar" + df_dc.loc[:, ["B", "C"]] = df_dc.loc[:, ["B", "C"]].abs() + df_dc["string2"] = "cool" + + # on-disk operations + store.append("df_dc", df_dc, data_columns=["B", "C", "string", "string2"]) + + result = store.select("df_dc", "B>0") + expected = df_dc[df_dc.B > 0] + tm.assert_frame_equal(result, expected) + + result = store.select("df_dc", ["B > 0", "C > 0", 'string == "foo"']) + expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")] + tm.assert_frame_equal(result, expected) + + +def test_append_hierarchical(setup_path, multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + df.columns.name = None + + with ensure_clean_store(setup_path) as store: + store.append("mi", df) + result = store.select("mi") + tm.assert_frame_equal(result, df) + + # GH 3748 + result = store.select("mi", columns=["A", "B"]) + expected = df.reindex(columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + with ensure_clean_path("test.hdf") as path: + df.to_hdf(path, "df", format="table") + result = read_hdf(path, "df", columns=["A", "B"]) + expected = df.reindex(columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + +def test_append_misc(setup_path): + + with ensure_clean_store(setup_path) as store: + df = tm.makeDataFrame() + store.append("df", df, chunksize=1) + result = store.select("df") + tm.assert_frame_equal(result, df) + + store.append("df1", df, expectedrows=10) + result = store.select("df1") + tm.assert_frame_equal(result, df) + + +@pytest.mark.parametrize("chunksize", [10, 200, 1000]) +def test_append_misc_chunksize(setup_path, chunksize): + # more chunksize in append tests + df = tm.makeDataFrame() + df["string"] = "foo" + df["float322"] = 1.0 + df["float322"] = df["float322"].astype("float32") + df["bool"] = df["float322"] > 0 + df["time1"] = Timestamp("20130101") + df["time2"] = Timestamp("20130102") + with ensure_clean_store(setup_path, mode="w") as store: + store.append("obj", df, chunksize=chunksize) + result = store.select("obj") + tm.assert_frame_equal(result, df) + + +def test_append_misc_empty_frame(setup_path): + # empty frame, GH4273 + with ensure_clean_store(setup_path) as store: + + # 0 len + df_empty = DataFrame(columns=list("ABC")) + store.append("df", df_empty) + with pytest.raises(KeyError, match="'No object named df in the file'"): + store.select("df") + + # repeated append of 0/non-zero frames + df = DataFrame(np.random.rand(10, 3), columns=list("ABC")) + store.append("df", df) + tm.assert_frame_equal(store.select("df"), df) + store.append("df", df_empty) + tm.assert_frame_equal(store.select("df"), df) + + # store + df = DataFrame(columns=list("ABC")) + store.put("df2", df) + tm.assert_frame_equal(store.select("df2"), df) + + +# TODO(ArrayManager) currently we rely on falling back to BlockManager, but +# the conversion from AM->BM converts the invalid object dtype column into +# a datetime64 column no longer raising an error +@td.skip_array_manager_not_yet_implemented +def test_append_raise(setup_path): + + with ensure_clean_store(setup_path) as store: + + # test append with invalid input to get good error messages + + # list in column + df = tm.makeDataFrame() + df["invalid"] = [["a"]] * len(df) + assert df.dtypes["invalid"] == np.object_ + msg = re.escape( + """Cannot serialize the column [invalid] +because its data contents are not [string] but [mixed] object dtype""" + ) + with pytest.raises(TypeError, match=msg): + store.append("df", df) + + # multiple invalid columns + df["invalid2"] = [["a"]] * len(df) + df["invalid3"] = [["a"]] * len(df) + with pytest.raises(TypeError, match=msg): + store.append("df", df) + + # datetime with embedded nans as object + df = tm.makeDataFrame() + s = Series(datetime.datetime(2001, 1, 2), index=df.index) + s = s.astype(object) + s[0:5] = np.nan + df["invalid"] = s + assert df.dtypes["invalid"] == np.object_ + msg = "too many timezones in this block, create separate data columns" + with pytest.raises(TypeError, match=msg): + store.append("df", df) + + # directly ndarray + msg = "value must be None, Series, or DataFrame" + with pytest.raises(TypeError, match=msg): + store.append("df", np.arange(10)) + + # series directly + msg = re.escape( + "cannot properly create the storer for: " + "[group->df,value->]" + ) + with pytest.raises(TypeError, match=msg): + store.append("df", Series(np.arange(10))) + + # appending an incompatible table + df = tm.makeDataFrame() + store.append("df", df) + + df["foo"] = "foo" + msg = re.escape( + "invalid combination of [non_index_axes] on appending data " + "[(1, ['A', 'B', 'C', 'D', 'foo'])] vs current table " + "[(1, ['A', 'B', 'C', 'D'])]" + ) + with pytest.raises(ValueError, match=msg): + store.append("df", df) + + # incompatible type (GH 41897) + _maybe_remove(store, "df") + df["foo"] = Timestamp("20130101") + store.append("df", df) + df["foo"] = "bar" + msg = re.escape( + "invalid combination of [values_axes] on appending data " + "[name->values_block_1,cname->values_block_1," + "dtype->bytes24,kind->string,shape->(1, 30)] " + "vs current table " + "[name->values_block_1,cname->values_block_1," + "dtype->datetime64,kind->datetime64,shape->None]" + ) + with pytest.raises(ValueError, match=msg): + store.append("df", df) + + +def test_append_with_timedelta(setup_path): + # GH 3577 + # append timedelta + + df = DataFrame( + { + "A": Timestamp("20130101"), + "B": [ + Timestamp("20130101") + timedelta(days=i, seconds=10) for i in range(10) + ], + } + ) + df["C"] = df["A"] - df["B"] + df.loc[3:5, "C"] = np.nan + + with ensure_clean_store(setup_path) as store: + + # table + _maybe_remove(store, "df") + store.append("df", df, data_columns=True) + result = store.select("df") + tm.assert_frame_equal(result, df) + + result = store.select("df", where="C<100000") + tm.assert_frame_equal(result, df) + + result = store.select("df", where="C0", "B>0"], selector="df1" + ) + expected = df[(df.A > 0) & (df.B > 0)] + tm.assert_frame_equal(result, expected) + + +def test_append_to_multiple_dropna(setup_path): + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) + df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan + df = concat([df1, df2], axis=1) + + with ensure_clean_store(setup_path) as store: + + # dropna=True should guarantee rows are synchronized + store.append_to_multiple( + {"df1": ["A", "B"], "df2": None}, df, selector="df1", dropna=True + ) + result = store.select_as_multiple(["df1", "df2"]) + expected = df.dropna() + tm.assert_frame_equal(result, expected) + tm.assert_index_equal(store.select("df1").index, store.select("df2").index) + + +def test_append_to_multiple_dropna_false(setup_path): + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) + df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan + df = concat([df1, df2], axis=1) + + with ensure_clean_store(setup_path) as store: + + # dropna=False shouldn't synchronize row indexes + store.append_to_multiple( + {"df1a": ["A", "B"], "df2a": None}, df, selector="df1a", dropna=False + ) + + msg = "all tables must have exactly the same nrows!" + with pytest.raises(ValueError, match=msg): + store.select_as_multiple(["df1a", "df2a"]) + + assert not store.select("df1a").index.equals(store.select("df2a").index) + + +def test_append_to_multiple_min_itemsize(setup_path): + # GH 11238 + df = DataFrame( + { + "IX": np.arange(1, 21), + "Num": np.arange(1, 21), + "BigNum": np.arange(1, 21) * 88, + "Str": ["a" for _ in range(20)], + "LongStr": ["abcde" for _ in range(20)], + } + ) + expected = df.iloc[[0]] + + with ensure_clean_store(setup_path) as store: + store.append_to_multiple( + { + "index": ["IX"], + "nums": ["Num", "BigNum"], + "strs": ["Str", "LongStr"], + }, + df.iloc[[0]], + "index", + min_itemsize={"Str": 10, "LongStr": 100, "Num": 2}, + ) + result = store.select_as_multiple(["index", "nums", "strs"]) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_categorical.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_categorical.py new file mode 100644 index 0000000..dee3730 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_categorical.py @@ -0,0 +1,222 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + DataFrame, + Series, + _testing as tm, + concat, + read_hdf, +) +from pandas.tests.io.pytables.common import ( + _maybe_remove, + ensure_clean_path, + ensure_clean_store, +) + +pytestmark = [ + pytest.mark.single, + # pytables https://github.com/PyTables/PyTables/issues/822 + pytest.mark.filterwarnings( + "ignore:a closed node found in the registry:UserWarning" + ), +] + + +def test_categorical(setup_path): + + with ensure_clean_store(setup_path) as store: + + # Basic + _maybe_remove(store, "s") + s = Series( + Categorical( + ["a", "b", "b", "a", "a", "c"], + categories=["a", "b", "c", "d"], + ordered=False, + ) + ) + store.append("s", s, format="table") + result = store.select("s") + tm.assert_series_equal(s, result) + + _maybe_remove(store, "s_ordered") + s = Series( + Categorical( + ["a", "b", "b", "a", "a", "c"], + categories=["a", "b", "c", "d"], + ordered=True, + ) + ) + store.append("s_ordered", s, format="table") + result = store.select("s_ordered") + tm.assert_series_equal(s, result) + + _maybe_remove(store, "df") + df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]}) + store.append("df", df, format="table") + result = store.select("df") + tm.assert_frame_equal(result, df) + + # Dtypes + _maybe_remove(store, "si") + s = Series([1, 1, 2, 2, 3, 4, 5]).astype("category") + store.append("si", s) + result = store.select("si") + tm.assert_series_equal(result, s) + + _maybe_remove(store, "si2") + s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype("category") + store.append("si2", s) + result = store.select("si2") + tm.assert_series_equal(result, s) + + # Multiple + _maybe_remove(store, "df2") + df2 = df.copy() + df2["s2"] = Series(list("abcdefg")).astype("category") + store.append("df2", df2) + result = store.select("df2") + tm.assert_frame_equal(result, df2) + + # Make sure the metadata is OK + info = store.info() + assert "/df2 " in info + # df2._mgr.blocks[0] and df2._mgr.blocks[2] are Categorical + assert "/df2/meta/values_block_0/meta" in info + assert "/df2/meta/values_block_2/meta" in info + + # unordered + _maybe_remove(store, "s2") + s = Series( + Categorical( + ["a", "b", "b", "a", "a", "c"], + categories=["a", "b", "c", "d"], + ordered=False, + ) + ) + store.append("s2", s, format="table") + result = store.select("s2") + tm.assert_series_equal(result, s) + + # Query + _maybe_remove(store, "df3") + store.append("df3", df, data_columns=["s"]) + expected = df[df.s.isin(["b", "c"])] + result = store.select("df3", where=['s in ["b","c"]']) + tm.assert_frame_equal(result, expected) + + expected = df[df.s.isin(["b", "c"])] + result = store.select("df3", where=['s = ["b","c"]']) + tm.assert_frame_equal(result, expected) + + expected = df[df.s.isin(["d"])] + result = store.select("df3", where=['s in ["d"]']) + tm.assert_frame_equal(result, expected) + + expected = df[df.s.isin(["f"])] + result = store.select("df3", where=['s in ["f"]']) + tm.assert_frame_equal(result, expected) + + # Appending with same categories is ok + store.append("df3", df) + + df = concat([df, df]) + expected = df[df.s.isin(["b", "c"])] + result = store.select("df3", where=['s in ["b","c"]']) + tm.assert_frame_equal(result, expected) + + # Appending must have the same categories + df3 = df.copy() + df3["s"] = df3["s"].cat.remove_unused_categories() + + msg = "cannot append a categorical with different categories to the existing" + with pytest.raises(ValueError, match=msg): + store.append("df3", df3) + + # Remove, and make sure meta data is removed (its a recursive + # removal so should be). + result = store.select("df3/meta/s/meta") + assert result is not None + store.remove("df3") + + with pytest.raises( + KeyError, match="'No object named df3/meta/s/meta in the file'" + ): + store.select("df3/meta/s/meta") + + +def test_categorical_conversion(setup_path): + + # GH13322 + # Check that read_hdf with categorical columns doesn't return rows if + # where criteria isn't met. + obsids = ["ESP_012345_6789", "ESP_987654_3210"] + imgids = ["APF00006np", "APF0001imm"] + data = [4.3, 9.8] + + # Test without categories + df = DataFrame({"obsids": obsids, "imgids": imgids, "data": data}) + + # We are expecting an empty DataFrame matching types of df + expected = df.iloc[[], :] + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table", data_columns=True) + result = read_hdf(path, "df", where="obsids=B") + tm.assert_frame_equal(result, expected) + + # Test with categories + df.obsids = df.obsids.astype("category") + df.imgids = df.imgids.astype("category") + + # We are expecting an empty DataFrame matching types of df + expected = df.iloc[[], :] + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table", data_columns=True) + result = read_hdf(path, "df", where="obsids=B") + tm.assert_frame_equal(result, expected) + + +def test_categorical_nan_only_columns(setup_path): + # GH18413 + # Check that read_hdf with categorical columns with NaN-only values can + # be read back. + df = DataFrame( + { + "a": ["a", "b", "c", np.nan], + "b": [np.nan, np.nan, np.nan, np.nan], + "c": [1, 2, 3, 4], + "d": Series([None] * 4, dtype=object), + } + ) + df["a"] = df.a.astype("category") + df["b"] = df.b.astype("category") + df["d"] = df.b.astype("category") + expected = df + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table", data_columns=True) + result = read_hdf(path, "df") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "where, df, expected", + [ + ('col=="q"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": []})), + ('col=="a"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": ["a"]})), + ], +) +def test_convert_value(setup_path, where: str, df: DataFrame, expected: DataFrame): + # GH39420 + # Check that read_hdf with categorical columns can filter by where condition. + df.col = df.col.astype("category") + max_widths = {"col": 1} + categorical_values = sorted(df.col.unique()) + expected.col = expected.col.astype("category") + expected.col = expected.col.cat.set_categories(categorical_values) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table", min_itemsize=max_widths) + result = read_hdf(path, where=where) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_compat.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_compat.py new file mode 100644 index 0000000..c720038 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_compat.py @@ -0,0 +1,77 @@ +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.io.pytables.common import ensure_clean_path + +tables = pytest.importorskip("tables") + + +@pytest.fixture +def pytables_hdf5_file(): + """ + Use PyTables to create a simple HDF5 file. + """ + table_schema = { + "c0": tables.Time64Col(pos=0), + "c1": tables.StringCol(5, pos=1), + "c2": tables.Int64Col(pos=2), + } + + t0 = 1_561_105_000.0 + + testsamples = [ + {"c0": t0, "c1": "aaaaa", "c2": 1}, + {"c0": t0 + 1, "c1": "bbbbb", "c2": 2}, + {"c0": t0 + 2, "c1": "ccccc", "c2": 10 ** 5}, + {"c0": t0 + 3, "c1": "ddddd", "c2": 4_294_967_295}, + ] + + objname = "pandas_test_timeseries" + + with ensure_clean_path("written_with_pytables.h5") as path: + # The `ensure_clean_path` context mgr removes the temp file upon exit. + with tables.open_file(path, mode="w") as f: + t = f.create_table("/", name=objname, description=table_schema) + for sample in testsamples: + for key, value in sample.items(): + t.row[key] = value + t.row.append() + + yield path, objname, pd.DataFrame(testsamples) + + +class TestReadPyTablesHDF5: + """ + A group of tests which covers reading HDF5 files written by plain PyTables + (not written by pandas). + + Was introduced for regression-testing issue 11188. + """ + + def test_read_complete(self, pytables_hdf5_file): + path, objname, df = pytables_hdf5_file + result = pd.read_hdf(path, key=objname) + expected = df + tm.assert_frame_equal(result, expected) + + def test_read_with_start(self, pytables_hdf5_file): + path, objname, df = pytables_hdf5_file + # This is a regression test for pandas-dev/pandas/issues/11188 + result = pd.read_hdf(path, key=objname, start=1) + expected = df[1:].reset_index(drop=True) + tm.assert_frame_equal(result, expected) + + def test_read_with_stop(self, pytables_hdf5_file): + path, objname, df = pytables_hdf5_file + # This is a regression test for pandas-dev/pandas/issues/11188 + result = pd.read_hdf(path, key=objname, stop=1) + expected = df[:1].reset_index(drop=True) + tm.assert_frame_equal(result, expected) + + def test_read_with_startstop(self, pytables_hdf5_file): + path, objname, df = pytables_hdf5_file + # This is a regression test for pandas-dev/pandas/issues/11188 + result = pd.read_hdf(path, key=objname, start=1, stop=2) + expected = df[1:2].reset_index(drop=True) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_complex.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_complex.py new file mode 100644 index 0000000..f3a43f6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_complex.py @@ -0,0 +1,203 @@ +from warnings import catch_warnings + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm +from pandas.tests.io.pytables.common import ( + ensure_clean_path, + ensure_clean_store, +) + +from pandas.io.pytables import read_hdf + + +def test_complex_fixed(setup_path): + df = DataFrame( + np.random.rand(4, 5).astype(np.complex64), + index=list("abcd"), + columns=list("ABCDE"), + ) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) + + df = DataFrame( + np.random.rand(4, 5).astype(np.complex128), + index=list("abcd"), + columns=list("ABCDE"), + ) + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) + + +def test_complex_table(setup_path): + df = DataFrame( + np.random.rand(4, 5).astype(np.complex64), + index=list("abcd"), + columns=list("ABCDE"), + ) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) + + df = DataFrame( + np.random.rand(4, 5).astype(np.complex128), + index=list("abcd"), + columns=list("ABCDE"), + ) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table", mode="w") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) + + +def test_complex_mixed_fixed(setup_path): + complex64 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 + ) + complex128 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 + ) + df = DataFrame( + { + "A": [1, 2, 3, 4], + "B": ["a", "b", "c", "d"], + "C": complex64, + "D": complex128, + "E": [1.0, 2.0, 3.0, 4.0], + }, + index=list("abcd"), + ) + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) + + +def test_complex_mixed_table(setup_path): + complex64 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 + ) + complex128 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 + ) + df = DataFrame( + { + "A": [1, 2, 3, 4], + "B": ["a", "b", "c", "d"], + "C": complex64, + "D": complex128, + "E": [1.0, 2.0, 3.0, 4.0], + }, + index=list("abcd"), + ) + + with ensure_clean_store(setup_path) as store: + store.append("df", df, data_columns=["A", "B"]) + result = store.select("df", where="A>2") + tm.assert_frame_equal(df.loc[df.A > 2], result) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) + + +def test_complex_across_dimensions_fixed(setup_path): + with catch_warnings(record=True): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list("abcd")) + df = DataFrame({"A": s, "B": s}) + + objs = [s, df] + comps = [tm.assert_series_equal, tm.assert_frame_equal] + for obj, comp in zip(objs, comps): + with ensure_clean_path(setup_path) as path: + obj.to_hdf(path, "obj", format="fixed") + reread = read_hdf(path, "obj") + comp(obj, reread) + + +def test_complex_across_dimensions(setup_path): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list("abcd")) + df = DataFrame({"A": s, "B": s}) + + with catch_warnings(record=True): + + objs = [df] + comps = [tm.assert_frame_equal] + for obj, comp in zip(objs, comps): + with ensure_clean_path(setup_path) as path: + obj.to_hdf(path, "obj", format="table") + reread = read_hdf(path, "obj") + comp(obj, reread) + + +def test_complex_indexing_error(setup_path): + complex128 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 + ) + df = DataFrame( + {"A": [1, 2, 3, 4], "B": ["a", "b", "c", "d"], "C": complex128}, + index=list("abcd"), + ) + + msg = ( + "Columns containing complex values can be stored " + "but cannot be indexed when using table format. " + "Either use fixed format, set index=False, " + "or do not include the columns containing complex " + "values to data_columns when initializing the table." + ) + + with ensure_clean_store(setup_path) as store: + with pytest.raises(TypeError, match=msg): + store.append("df", df, data_columns=["C"]) + + +def test_complex_series_error(setup_path): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list("abcd")) + + msg = ( + "Columns containing complex values can be stored " + "but cannot be indexed when using table format. " + "Either use fixed format, set index=False, " + "or do not include the columns containing complex " + "values to data_columns when initializing the table." + ) + + with ensure_clean_path(setup_path) as path: + with pytest.raises(TypeError, match=msg): + s.to_hdf(path, "obj", format="t") + + with ensure_clean_path(setup_path) as path: + s.to_hdf(path, "obj", format="t", index=False) + reread = read_hdf(path, "obj") + tm.assert_series_equal(s, reread) + + +def test_complex_append(setup_path): + df = DataFrame( + {"a": np.random.randn(100).astype(np.complex128), "b": np.random.randn(100)} + ) + + with ensure_clean_store(setup_path) as store: + store.append("df", df, data_columns=["b"]) + store.append("df", df) + result = store.select("df") + tm.assert_frame_equal(pd.concat([df, df], axis=0), result) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_errors.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_errors.py new file mode 100644 index 0000000..dbcd112 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_errors.py @@ -0,0 +1,239 @@ +import datetime +from io import BytesIO +import re +from warnings import catch_warnings + +import numpy as np +import pytest + +from pandas import ( + CategoricalIndex, + DataFrame, + HDFStore, + MultiIndex, + _testing as tm, + date_range, + read_hdf, +) +from pandas.tests.io.pytables.common import ( + ensure_clean_path, + ensure_clean_store, +) + +from pandas.io.pytables import ( + Term, + _maybe_adjust_name, +) + +pytestmark = pytest.mark.single + + +def test_pass_spec_to_storer(setup_path): + + df = tm.makeDataFrame() + + with ensure_clean_store(setup_path) as store: + store.put("df", df) + msg = ( + "cannot pass a column specification when reading a Fixed format " + "store. this store must be selected in its entirety" + ) + with pytest.raises(TypeError, match=msg): + store.select("df", columns=["A"]) + msg = ( + "cannot pass a where specification when reading from a Fixed " + "format store. this store must be selected in its entirety" + ) + with pytest.raises(TypeError, match=msg): + store.select("df", where=[("columns=A")]) + + +def test_table_index_incompatible_dtypes(setup_path): + df1 = DataFrame({"a": [1, 2, 3]}) + df2 = DataFrame({"a": [4, 5, 6]}, index=date_range("1/1/2000", periods=3)) + + with ensure_clean_store(setup_path) as store: + store.put("frame", df1, format="table") + msg = re.escape("incompatible kind in col [integer - datetime64]") + with pytest.raises(TypeError, match=msg): + store.put("frame", df2, format="table", append=True) + + +def test_unimplemented_dtypes_table_columns(setup_path): + + with ensure_clean_store(setup_path) as store: + + dtypes = [("date", datetime.date(2001, 1, 2))] + + # currently not supported dtypes #### + for n, f in dtypes: + df = tm.makeDataFrame() + df[n] = f + msg = re.escape(f"[{n}] is not implemented as a table column") + with pytest.raises(TypeError, match=msg): + store.append(f"df1_{n}", df) + + # frame + df = tm.makeDataFrame() + df["obj1"] = "foo" + df["obj2"] = "bar" + df["datetime1"] = datetime.date(2001, 1, 2) + df = df._consolidate()._convert(datetime=True) + + with ensure_clean_store(setup_path) as store: + # this fails because we have a date in the object block...... + msg = re.escape( + """Cannot serialize the column [datetime1] +because its data contents are not [string] but [date] object dtype""" + ) + with pytest.raises(TypeError, match=msg): + store.append("df_unimplemented", df) + + +def test_invalid_terms(setup_path): + + with ensure_clean_store(setup_path) as store: + + with catch_warnings(record=True): + + df = tm.makeTimeDataFrame() + df["string"] = "foo" + df.loc[df.index[0:4], "string"] = "bar" + + store.put("df", df, format="table") + + # some invalid terms + msg = re.escape( + "__init__() missing 1 required positional argument: 'where'" + ) + with pytest.raises(TypeError, match=msg): + Term() + + # more invalid + msg = re.escape( + "cannot process expression [df.index[3]], " + "[2000-01-06 00:00:00] is not a valid condition" + ) + with pytest.raises(ValueError, match=msg): + store.select("df", "df.index[3]") + + msg = "invalid syntax" + with pytest.raises(SyntaxError, match=msg): + store.select("df", "index>") + + # from the docs + with ensure_clean_path(setup_path) as path: + dfq = DataFrame( + np.random.randn(10, 4), + columns=list("ABCD"), + index=date_range("20130101", periods=10), + ) + dfq.to_hdf(path, "dfq", format="table", data_columns=True) + + # check ok + read_hdf(path, "dfq", where="index>Timestamp('20130104') & columns=['A', 'B']") + read_hdf(path, "dfq", where="A>0 or C>0") + + # catch the invalid reference + with ensure_clean_path(setup_path) as path: + dfq = DataFrame( + np.random.randn(10, 4), + columns=list("ABCD"), + index=date_range("20130101", periods=10), + ) + dfq.to_hdf(path, "dfq", format="table") + + msg = ( + r"The passed where expression: A>0 or C>0\n\s*" + r"contains an invalid variable reference\n\s*" + r"all of the variable references must be a reference to\n\s*" + r"an axis \(e.g. 'index' or 'columns'\), or a data_column\n\s*" + r"The currently defined references are: index,columns\n" + ) + with pytest.raises(ValueError, match=msg): + read_hdf(path, "dfq", where="A>0 or C>0") + + +def test_append_with_diff_col_name_types_raises_value_error(setup_path): + df = DataFrame(np.random.randn(10, 1)) + df2 = DataFrame({"a": np.random.randn(10)}) + df3 = DataFrame({(1, 2): np.random.randn(10)}) + df4 = DataFrame({("1", 2): np.random.randn(10)}) + df5 = DataFrame({("1", 2, object): np.random.randn(10)}) + + with ensure_clean_store(setup_path) as store: + name = f"df_{tm.rands(10)}" + store.append(name, df) + + for d in (df2, df3, df4, df5): + msg = re.escape( + "cannot match existing table structure for [0] on appending data" + ) + with pytest.raises(ValueError, match=msg): + store.append(name, d) + + +def test_invalid_complib(setup_path): + df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + with tm.ensure_clean(setup_path) as path: + msg = r"complib only supports \[.*\] compression." + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df", complib="foolib") + + +@pytest.mark.parametrize( + "idx", + [ + date_range("2019", freq="D", periods=3, tz="UTC"), + CategoricalIndex(list("abc")), + ], +) +def test_to_hdf_multiindex_extension_dtype(idx, setup_path): + # GH 7775 + mi = MultiIndex.from_arrays([idx, idx]) + df = DataFrame(0, index=mi, columns=["a"]) + with ensure_clean_path(setup_path) as path: + with pytest.raises(NotImplementedError, match="Saving a MultiIndex"): + df.to_hdf(path, "df") + + +def test_unsuppored_hdf_file_error(datapath): + # GH 9539 + data_path = datapath("io", "data", "legacy_hdf/incompatible_dataset.h5") + message = ( + r"Dataset\(s\) incompatible with Pandas data types, " + "not table, or no datasets found in HDF5 file." + ) + + with pytest.raises(ValueError, match=message): + read_hdf(data_path) + + +def test_read_hdf_errors(setup_path): + df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + + with ensure_clean_path(setup_path) as path: + msg = r"File [\S]* does not exist" + with pytest.raises(OSError, match=msg): + read_hdf(path, "key") + + df.to_hdf(path, "df") + store = HDFStore(path, mode="r") + store.close() + + msg = "The HDFStore must be open for reading." + with pytest.raises(OSError, match=msg): + read_hdf(store, "df") + + +def test_read_hdf_generic_buffer_errors(): + msg = "Support for generic buffers has not been implemented." + with pytest.raises(NotImplementedError, match=msg): + read_hdf(BytesIO(b""), "df") + + +@pytest.mark.parametrize("bad_version", [(1, 2), (1,), [], "12", "123"]) +def test_maybe_adjust_name_bad_version_raises(bad_version): + msg = "Version is incorrect, expected sequence of 3 integers" + with pytest.raises(ValueError, match=msg): + _maybe_adjust_name("values_block_0", version=bad_version) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_file_handling.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_file_handling.py new file mode 100644 index 0000000..027c3d0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_file_handling.py @@ -0,0 +1,447 @@ +import os + +import numpy as np +import pytest + +from pandas.compat import is_platform_little_endian + +from pandas import ( + DataFrame, + HDFStore, + Series, + _testing as tm, + read_hdf, +) +from pandas.tests.io.pytables.common import ( + _maybe_remove, + ensure_clean_path, + ensure_clean_store, + tables, +) + +from pandas.io import pytables as pytables +from pandas.io.pytables import ( + ClosedFileError, + PossibleDataLossError, + Term, +) + +pytestmark = pytest.mark.single + + +@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"]) +def test_mode(setup_path, mode): + + df = tm.makeTimeDataFrame() + msg = r"[\S]* does not exist" + with ensure_clean_path(setup_path) as path: + + # constructor + if mode in ["r", "r+"]: + with pytest.raises(OSError, match=msg): + HDFStore(path, mode=mode) + + else: + store = HDFStore(path, mode=mode) + assert store._handle.mode == mode + store.close() + + with ensure_clean_path(setup_path) as path: + + # context + if mode in ["r", "r+"]: + with pytest.raises(OSError, match=msg): + with HDFStore(path, mode=mode) as store: + pass + else: + with HDFStore(path, mode=mode) as store: + assert store._handle.mode == mode + + with ensure_clean_path(setup_path) as path: + + # conv write + if mode in ["r", "r+"]: + with pytest.raises(OSError, match=msg): + df.to_hdf(path, "df", mode=mode) + df.to_hdf(path, "df", mode="w") + else: + df.to_hdf(path, "df", mode=mode) + + # conv read + if mode in ["w"]: + msg = ( + "mode w is not allowed while performing a read. " + r"Allowed modes are r, r\+ and a." + ) + with pytest.raises(ValueError, match=msg): + read_hdf(path, "df", mode=mode) + else: + result = read_hdf(path, "df", mode=mode) + tm.assert_frame_equal(result, df) + + +def test_default_mode(setup_path): + # read_hdf uses default mode + df = tm.makeTimeDataFrame() + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", mode="w") + result = read_hdf(path, "df") + tm.assert_frame_equal(result, df) + + +def test_reopen_handle(setup_path): + + with ensure_clean_path(setup_path) as path: + + store = HDFStore(path, mode="a") + store["a"] = tm.makeTimeSeries() + + msg = ( + r"Re-opening the file \[[\S]*\] with mode \[a\] will delete the " + "current file!" + ) + # invalid mode change + with pytest.raises(PossibleDataLossError, match=msg): + store.open("w") + + store.close() + assert not store.is_open + + # truncation ok here + store.open("w") + assert store.is_open + assert len(store) == 0 + store.close() + assert not store.is_open + + store = HDFStore(path, mode="a") + store["a"] = tm.makeTimeSeries() + + # reopen as read + store.open("r") + assert store.is_open + assert len(store) == 1 + assert store._mode == "r" + store.close() + assert not store.is_open + + # reopen as append + store.open("a") + assert store.is_open + assert len(store) == 1 + assert store._mode == "a" + store.close() + assert not store.is_open + + # reopen as append (again) + store.open("a") + assert store.is_open + assert len(store) == 1 + assert store._mode == "a" + store.close() + assert not store.is_open + + +def test_open_args(setup_path): + + with tm.ensure_clean(setup_path) as path: + + df = tm.makeDataFrame() + + # create an in memory store + store = HDFStore( + path, mode="a", driver="H5FD_CORE", driver_core_backing_store=0 + ) + store["df"] = df + store.append("df2", df) + + tm.assert_frame_equal(store["df"], df) + tm.assert_frame_equal(store["df2"], df) + + store.close() + + # the file should not have actually been written + assert not os.path.exists(path) + + +def test_flush(setup_path): + + with ensure_clean_store(setup_path) as store: + store["a"] = tm.makeTimeSeries() + store.flush() + store.flush(fsync=True) + + +def test_complibs_default_settings(setup_path): + # GH15943 + df = tm.makeDataFrame() + + # Set complevel and check if complib is automatically set to + # default value + with ensure_clean_path(setup_path) as tmpfile: + df.to_hdf(tmpfile, "df", complevel=9) + result = read_hdf(tmpfile, "df") + tm.assert_frame_equal(result, df) + + with tables.open_file(tmpfile, mode="r") as h5file: + for node in h5file.walk_nodes(where="/df", classname="Leaf"): + assert node.filters.complevel == 9 + assert node.filters.complib == "zlib" + + # Set complib and check to see if compression is disabled + with ensure_clean_path(setup_path) as tmpfile: + df.to_hdf(tmpfile, "df", complib="zlib") + result = read_hdf(tmpfile, "df") + tm.assert_frame_equal(result, df) + + with tables.open_file(tmpfile, mode="r") as h5file: + for node in h5file.walk_nodes(where="/df", classname="Leaf"): + assert node.filters.complevel == 0 + assert node.filters.complib is None + + # Check if not setting complib or complevel results in no compression + with ensure_clean_path(setup_path) as tmpfile: + df.to_hdf(tmpfile, "df") + result = read_hdf(tmpfile, "df") + tm.assert_frame_equal(result, df) + + with tables.open_file(tmpfile, mode="r") as h5file: + for node in h5file.walk_nodes(where="/df", classname="Leaf"): + assert node.filters.complevel == 0 + assert node.filters.complib is None + + # Check if file-defaults can be overridden on a per table basis + with ensure_clean_path(setup_path) as tmpfile: + store = HDFStore(tmpfile) + store.append("dfc", df, complevel=9, complib="blosc") + store.append("df", df) + store.close() + + with tables.open_file(tmpfile, mode="r") as h5file: + for node in h5file.walk_nodes(where="/df", classname="Leaf"): + assert node.filters.complevel == 0 + assert node.filters.complib is None + for node in h5file.walk_nodes(where="/dfc", classname="Leaf"): + assert node.filters.complevel == 9 + assert node.filters.complib == "blosc" + + +def test_complibs(setup_path): + # GH14478 + df = tm.makeDataFrame() + + # Building list of all complibs and complevels tuples + all_complibs = tables.filters.all_complibs + # Remove lzo if its not available on this platform + if not tables.which_lib_version("lzo"): + all_complibs.remove("lzo") + # Remove bzip2 if its not available on this platform + if not tables.which_lib_version("bzip2"): + all_complibs.remove("bzip2") + + all_levels = range(0, 10) + all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels] + + for (lib, lvl) in all_tests: + with ensure_clean_path(setup_path) as tmpfile: + gname = "foo" + + # Write and read file to see if data is consistent + df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl) + result = read_hdf(tmpfile, gname) + tm.assert_frame_equal(result, df) + + # Open file and check metadata + # for correct amount of compression + h5table = tables.open_file(tmpfile, mode="r") + for node in h5table.walk_nodes(where="/" + gname, classname="Leaf"): + assert node.filters.complevel == lvl + if lvl == 0: + assert node.filters.complib is None + else: + assert node.filters.complib == lib + h5table.close() + + +@pytest.mark.skipif( + not is_platform_little_endian(), reason="reason platform is not little endian" +) +def test_encoding(setup_path): + + with ensure_clean_store(setup_path) as store: + df = DataFrame({"A": "foo", "B": "bar"}, index=range(5)) + df.loc[2, "A"] = np.nan + df.loc[3, "B"] = np.nan + _maybe_remove(store, "df") + store.append("df", df, encoding="ascii") + tm.assert_frame_equal(store["df"], df) + + expected = df.reindex(columns=["A"]) + result = store.select("df", Term("columns=A", encoding="ascii")) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "val", + [ + [b"E\xc9, 17", b"", b"a", b"b", b"c"], + [b"E\xc9, 17", b"a", b"b", b"c"], + [b"EE, 17", b"", b"a", b"b", b"c"], + [b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"], + [b"", b"a", b"b", b"c"], + [b"\xf8\xfc", b"a", b"b", b"c"], + [b"A\xf8\xfc", b"", b"a", b"b", b"c"], + [np.nan, b"", b"b", b"c"], + [b"A\xf8\xfc", np.nan, b"", b"b", b"c"], + ], +) +@pytest.mark.parametrize("dtype", ["category", object]) +def test_latin_encoding(setup_path, dtype, val): + enc = "latin-1" + nan_rep = "" + key = "data" + + val = [x.decode(enc) if isinstance(x, bytes) else x for x in val] + ser = Series(val, dtype=dtype) + + with ensure_clean_path(setup_path) as store: + ser.to_hdf(store, key, format="table", encoding=enc, nan_rep=nan_rep) + retr = read_hdf(store, key) + + s_nan = ser.replace(nan_rep, np.nan) + + tm.assert_series_equal(s_nan, retr) + + +def test_multiple_open_close(setup_path): + # gh-4409: open & close multiple times + + with ensure_clean_path(setup_path) as path: + + df = tm.makeDataFrame() + df.to_hdf(path, "df", mode="w", format="table") + + # single + store = HDFStore(path) + assert "CLOSED" not in store.info() + assert store.is_open + + store.close() + assert "CLOSED" in store.info() + assert not store.is_open + + with ensure_clean_path(setup_path) as path: + + if pytables._table_file_open_policy_is_strict: + # multiples + store1 = HDFStore(path) + msg = ( + r"The file [\S]* is already opened\. Please close it before " + r"reopening in write mode\." + ) + with pytest.raises(ValueError, match=msg): + HDFStore(path) + + store1.close() + else: + + # multiples + store1 = HDFStore(path) + store2 = HDFStore(path) + + assert "CLOSED" not in store1.info() + assert "CLOSED" not in store2.info() + assert store1.is_open + assert store2.is_open + + store1.close() + assert "CLOSED" in store1.info() + assert not store1.is_open + assert "CLOSED" not in store2.info() + assert store2.is_open + + store2.close() + assert "CLOSED" in store1.info() + assert "CLOSED" in store2.info() + assert not store1.is_open + assert not store2.is_open + + # nested close + store = HDFStore(path, mode="w") + store.append("df", df) + + store2 = HDFStore(path) + store2.append("df2", df) + store2.close() + assert "CLOSED" in store2.info() + assert not store2.is_open + + store.close() + assert "CLOSED" in store.info() + assert not store.is_open + + # double closing + store = HDFStore(path, mode="w") + store.append("df", df) + + store2 = HDFStore(path) + store.close() + assert "CLOSED" in store.info() + assert not store.is_open + + store2.close() + assert "CLOSED" in store2.info() + assert not store2.is_open + + # ops on a closed store + with ensure_clean_path(setup_path) as path: + + df = tm.makeDataFrame() + df.to_hdf(path, "df", mode="w", format="table") + + store = HDFStore(path) + store.close() + + msg = r"[\S]* file is not open!" + with pytest.raises(ClosedFileError, match=msg): + store.keys() + + with pytest.raises(ClosedFileError, match=msg): + "df" in store + + with pytest.raises(ClosedFileError, match=msg): + len(store) + + with pytest.raises(ClosedFileError, match=msg): + store["df"] + + with pytest.raises(ClosedFileError, match=msg): + store.select("df") + + with pytest.raises(ClosedFileError, match=msg): + store.get("df") + + with pytest.raises(ClosedFileError, match=msg): + store.append("df2", df) + + with pytest.raises(ClosedFileError, match=msg): + store.put("df3", df) + + with pytest.raises(ClosedFileError, match=msg): + store.get_storer("df2") + + with pytest.raises(ClosedFileError, match=msg): + store.remove("df2") + + with pytest.raises(ClosedFileError, match=msg): + store.select("df") + + msg = "'HDFStore' object has no attribute 'df'" + with pytest.raises(AttributeError, match=msg): + store.df + + +def test_fspath(): + with tm.ensure_clean("foo.h5") as path: + with HDFStore(path) as store: + assert os.fspath(store) == str(path) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_keys.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_keys.py new file mode 100644 index 0000000..02b79bd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_keys.py @@ -0,0 +1,80 @@ +import pytest + +from pandas import ( + DataFrame, + HDFStore, + _testing as tm, +) +from pandas.tests.io.pytables.common import ( + ensure_clean_path, + ensure_clean_store, + tables, +) + +pytestmark = pytest.mark.single + + +def test_keys(setup_path): + + with ensure_clean_store(setup_path) as store: + store["a"] = tm.makeTimeSeries() + store["b"] = tm.makeStringSeries() + store["c"] = tm.makeDataFrame() + + assert len(store) == 3 + expected = {"/a", "/b", "/c"} + assert set(store.keys()) == expected + assert set(store) == expected + + +def test_non_pandas_keys(setup_path): + class Table1(tables.IsDescription): + value1 = tables.Float32Col() + + class Table2(tables.IsDescription): + value2 = tables.Float32Col() + + class Table3(tables.IsDescription): + value3 = tables.Float32Col() + + with ensure_clean_path(setup_path) as path: + with tables.open_file(path, mode="w") as h5file: + group = h5file.create_group("/", "group") + h5file.create_table(group, "table1", Table1, "Table 1") + h5file.create_table(group, "table2", Table2, "Table 2") + h5file.create_table(group, "table3", Table3, "Table 3") + with HDFStore(path) as store: + assert len(store.keys(include="native")) == 3 + expected = {"/group/table1", "/group/table2", "/group/table3"} + assert set(store.keys(include="native")) == expected + assert set(store.keys(include="pandas")) == set() + for name in expected: + df = store.get(name) + assert len(df.columns) == 1 + + +def test_keys_illegal_include_keyword_value(setup_path): + with ensure_clean_store(setup_path) as store: + with pytest.raises( + ValueError, + match="`include` should be either 'pandas' or 'native' but is 'illegal'", + ): + store.keys(include="illegal") + + +def test_keys_ignore_hdf_softlink(setup_path): + + # GH 20523 + # Puts a softlink into HDF file and rereads + + with ensure_clean_store(setup_path) as store: + + df = DataFrame({"A": range(5), "B": range(5)}) + store.put("df", df) + + assert store.keys() == ["/df"] + + store._handle.create_soft_link(store._handle.root, "symlink", "df") + + # Should ignore the softlink + assert store.keys() == ["/df"] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_put.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_put.py new file mode 100644 index 0000000..8e7b31b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_put.py @@ -0,0 +1,374 @@ +import datetime +import re +from warnings import ( + catch_warnings, + simplefilter, +) + +import numpy as np +import pytest + +from pandas._libs.tslibs import Timestamp + +import pandas as pd +from pandas import ( + DataFrame, + HDFStore, + Index, + MultiIndex, + RangeIndex, + Series, + _testing as tm, + concat, +) +from pandas.core.api import Int64Index +from pandas.tests.io.pytables.common import ( + _maybe_remove, + ensure_clean_path, + ensure_clean_store, +) +from pandas.util import _test_decorators as td + +pytestmark = pytest.mark.single + + +def test_format_type(setup_path): + df = DataFrame({"A": [1, 2]}) + with ensure_clean_path(setup_path) as path: + with HDFStore(path) as store: + store.put("a", df, format="fixed") + store.put("b", df, format="table") + + assert store.get_storer("a").format_type == "fixed" + assert store.get_storer("b").format_type == "table" + + +def test_format_kwarg_in_constructor(setup_path): + # GH 13291 + + msg = "format is not a defined argument for HDFStore" + + with tm.ensure_clean(setup_path) as path: + with pytest.raises(ValueError, match=msg): + HDFStore(path, format="table") + + +def test_api_default_format(setup_path): + + # default_format option + with ensure_clean_store(setup_path) as store: + df = tm.makeDataFrame() + + pd.set_option("io.hdf.default_format", "fixed") + _maybe_remove(store, "df") + store.put("df", df) + assert not store.get_storer("df").is_table + + msg = "Can only append to Tables" + + with pytest.raises(ValueError, match=msg): + store.append("df2", df) + + pd.set_option("io.hdf.default_format", "table") + _maybe_remove(store, "df") + store.put("df", df) + assert store.get_storer("df").is_table + _maybe_remove(store, "df2") + store.append("df2", df) + assert store.get_storer("df").is_table + + pd.set_option("io.hdf.default_format", None) + + with ensure_clean_path(setup_path) as path: + + df = tm.makeDataFrame() + + pd.set_option("io.hdf.default_format", "fixed") + df.to_hdf(path, "df") + with HDFStore(path) as store: + assert not store.get_storer("df").is_table + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df2", append=True) + + pd.set_option("io.hdf.default_format", "table") + df.to_hdf(path, "df3") + with HDFStore(path) as store: + assert store.get_storer("df3").is_table + df.to_hdf(path, "df4", append=True) + with HDFStore(path) as store: + assert store.get_storer("df4").is_table + + pd.set_option("io.hdf.default_format", None) + + +def test_put(setup_path): + + with ensure_clean_store(setup_path) as store: + + ts = tm.makeTimeSeries() + df = tm.makeTimeDataFrame() + store["a"] = ts + store["b"] = df[:10] + store["foo/bar/bah"] = df[:10] + store["foo"] = df[:10] + store["/foo"] = df[:10] + store.put("c", df[:10], format="table") + + # not OK, not a table + msg = "Can only append to Tables" + with pytest.raises(ValueError, match=msg): + store.put("b", df[10:], append=True) + + # node does not currently exist, test _is_table_type returns False + # in this case + _maybe_remove(store, "f") + with pytest.raises(ValueError, match=msg): + store.put("f", df[10:], append=True) + + # can't put to a table (use append instead) + with pytest.raises(ValueError, match=msg): + store.put("c", df[10:], append=True) + + # overwrite table + store.put("c", df[:10], format="table", append=False) + tm.assert_frame_equal(df[:10], store["c"]) + + +def test_put_string_index(setup_path): + + with ensure_clean_store(setup_path) as store: + + index = Index([f"I am a very long string index: {i}" for i in range(20)]) + s = Series(np.arange(20), index=index) + df = DataFrame({"A": s, "B": s}) + + store["a"] = s + tm.assert_series_equal(store["a"], s) + + store["b"] = df + tm.assert_frame_equal(store["b"], df) + + # mixed length + index = Index( + ["abcdefghijklmnopqrstuvwxyz1234567890"] + + [f"I am a very long string index: {i}" for i in range(20)] + ) + s = Series(np.arange(21), index=index) + df = DataFrame({"A": s, "B": s}) + store["a"] = s + tm.assert_series_equal(store["a"], s) + + store["b"] = df + tm.assert_frame_equal(store["b"], df) + + +def test_put_compression(setup_path): + + with ensure_clean_store(setup_path) as store: + df = tm.makeTimeDataFrame() + + store.put("c", df, format="table", complib="zlib") + tm.assert_frame_equal(store["c"], df) + + # can't compress if format='fixed' + msg = "Compression not supported on Fixed format stores" + with pytest.raises(ValueError, match=msg): + store.put("b", df, format="fixed", complib="zlib") + + +@td.skip_if_windows +def test_put_compression_blosc(setup_path): + df = tm.makeTimeDataFrame() + + with ensure_clean_store(setup_path) as store: + + # can't compress if format='fixed' + msg = "Compression not supported on Fixed format stores" + with pytest.raises(ValueError, match=msg): + store.put("b", df, format="fixed", complib="blosc") + + store.put("c", df, format="table", complib="blosc") + tm.assert_frame_equal(store["c"], df) + + +def test_put_mixed_type(setup_path): + df = tm.makeTimeDataFrame() + df["obj1"] = "foo" + df["obj2"] = "bar" + df["bool1"] = df["A"] > 0 + df["bool2"] = df["B"] > 0 + df["bool3"] = True + df["int1"] = 1 + df["int2"] = 2 + df["timestamp1"] = Timestamp("20010102") + df["timestamp2"] = Timestamp("20010103") + df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) + df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) + df.loc[df.index[3:6], ["obj1"]] = np.nan + df = df._consolidate()._convert(datetime=True) + + with ensure_clean_store(setup_path) as store: + _maybe_remove(store, "df") + + # PerformanceWarning + with catch_warnings(record=True): + simplefilter("ignore", pd.errors.PerformanceWarning) + store.put("df", df) + + expected = store.get("df") + tm.assert_frame_equal(expected, df) + + +@pytest.mark.parametrize( + "format, index", + [ + ["table", tm.makeFloatIndex], + ["table", tm.makeStringIndex], + ["table", tm.makeIntIndex], + ["table", tm.makeDateIndex], + ["fixed", tm.makeFloatIndex], + ["fixed", tm.makeStringIndex], + ["fixed", tm.makeIntIndex], + ["fixed", tm.makeDateIndex], + ["table", tm.makePeriodIndex], # GH#7796 + ["fixed", tm.makePeriodIndex], + ["table", tm.makeUnicodeIndex], + ["fixed", tm.makeUnicodeIndex], + ], +) +def test_store_index_types(setup_path, format, index): + # GH5386 + # test storing various index types + + with ensure_clean_store(setup_path) as store: + + df = DataFrame(np.random.randn(10, 2), columns=list("AB")) + df.index = index(len(df)) + + _maybe_remove(store, "df") + store.put("df", df, format=format) + tm.assert_frame_equal(df, store["df"]) + + +def test_column_multiindex(setup_path): + # GH 4710 + # recreate multi-indexes properly + + index = MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")], names=["first", "second"] + ) + df = DataFrame(np.arange(12).reshape(3, 4), columns=index) + expected = df.copy() + if isinstance(expected.index, RangeIndex): + expected.index = Int64Index(expected.index) + + with ensure_clean_store(setup_path) as store: + + store.put("df", df) + tm.assert_frame_equal( + store["df"], expected, check_index_type=True, check_column_type=True + ) + + store.put("df1", df, format="table") + tm.assert_frame_equal( + store["df1"], expected, check_index_type=True, check_column_type=True + ) + + msg = re.escape("cannot use a multi-index on axis [1] with data_columns ['A']") + with pytest.raises(ValueError, match=msg): + store.put("df2", df, format="table", data_columns=["A"]) + msg = re.escape("cannot use a multi-index on axis [1] with data_columns True") + with pytest.raises(ValueError, match=msg): + store.put("df3", df, format="table", data_columns=True) + + # appending multi-column on existing table (see GH 6167) + with ensure_clean_store(setup_path) as store: + store.append("df2", df) + store.append("df2", df) + + tm.assert_frame_equal(store["df2"], concat((df, df))) + + # non_index_axes name + df = DataFrame(np.arange(12).reshape(3, 4), columns=Index(list("ABCD"), name="foo")) + expected = df.copy() + if isinstance(expected.index, RangeIndex): + expected.index = Int64Index(expected.index) + + with ensure_clean_store(setup_path) as store: + + store.put("df1", df, format="table") + tm.assert_frame_equal( + store["df1"], expected, check_index_type=True, check_column_type=True + ) + + +def test_store_multiindex(setup_path): + + # validate multi-index names + # GH 5527 + with ensure_clean_store(setup_path) as store: + + def make_index(names=None): + return MultiIndex.from_tuples( + [ + (datetime.datetime(2013, 12, d), s, t) + for d in range(1, 3) + for s in range(2) + for t in range(3) + ], + names=names, + ) + + # no names + _maybe_remove(store, "df") + df = DataFrame(np.zeros((12, 2)), columns=["a", "b"], index=make_index()) + store.append("df", df) + tm.assert_frame_equal(store.select("df"), df) + + # partial names + _maybe_remove(store, "df") + df = DataFrame( + np.zeros((12, 2)), + columns=["a", "b"], + index=make_index(["date", None, None]), + ) + store.append("df", df) + tm.assert_frame_equal(store.select("df"), df) + + # series + _maybe_remove(store, "s") + s = Series(np.zeros(12), index=make_index(["date", None, None])) + store.append("s", s) + xp = Series(np.zeros(12), index=make_index(["date", "level_1", "level_2"])) + tm.assert_series_equal(store.select("s"), xp) + + # dup with column + _maybe_remove(store, "df") + df = DataFrame( + np.zeros((12, 2)), + columns=["a", "b"], + index=make_index(["date", "a", "t"]), + ) + msg = "duplicate names/columns in the multi-index when storing as a table" + with pytest.raises(ValueError, match=msg): + store.append("df", df) + + # dup within level + _maybe_remove(store, "df") + df = DataFrame( + np.zeros((12, 2)), + columns=["a", "b"], + index=make_index(["date", "date", "date"]), + ) + with pytest.raises(ValueError, match=msg): + store.append("df", df) + + # fully names + _maybe_remove(store, "df") + df = DataFrame( + np.zeros((12, 2)), + columns=["a", "b"], + index=make_index(["date", "s", "t"]), + ) + store.append("df", df) + tm.assert_frame_equal(store.select("df"), df) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_pytables_missing.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_pytables_missing.py new file mode 100644 index 0000000..9adb0a6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_pytables_missing.py @@ -0,0 +1,14 @@ +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm + + +@td.skip_if_installed("tables") +def test_pytables_raises(): + df = pd.DataFrame({"A": [1, 2]}) + with pytest.raises(ImportError, match="tables"): + with tm.ensure_clean("foo.h5") as path: + df.to_hdf(path, "df") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_read.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_read.py new file mode 100644 index 0000000..1c9e63c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_read.py @@ -0,0 +1,345 @@ +from pathlib import Path +import re + +import numpy as np +import pytest + +from pandas._libs.tslibs import Timestamp +from pandas.compat import is_platform_windows + +import pandas as pd +from pandas import ( + DataFrame, + HDFStore, + Index, + Series, + _testing as tm, + read_hdf, +) +from pandas.tests.io.pytables.common import ( + _maybe_remove, + ensure_clean_path, + ensure_clean_store, +) +from pandas.util import _test_decorators as td + +from pandas.io.pytables import TableIterator + +pytestmark = pytest.mark.single + + +def test_read_missing_key_close_store(setup_path): + # GH 25766 + with ensure_clean_path(setup_path) as path: + df = DataFrame({"a": range(2), "b": range(2)}) + df.to_hdf(path, "k1") + + with pytest.raises(KeyError, match="'No object named k2 in the file'"): + read_hdf(path, "k2") + + # smoke test to test that file is properly closed after + # read with KeyError before another write + df.to_hdf(path, "k2") + + +def test_read_missing_key_opened_store(setup_path): + # GH 28699 + with ensure_clean_path(setup_path) as path: + df = DataFrame({"a": range(2), "b": range(2)}) + df.to_hdf(path, "k1") + + with HDFStore(path, "r") as store: + + with pytest.raises(KeyError, match="'No object named k2 in the file'"): + read_hdf(store, "k2") + + # Test that the file is still open after a KeyError and that we can + # still read from it. + read_hdf(store, "k1") + + +def test_read_column(setup_path): + + df = tm.makeTimeDataFrame() + + with ensure_clean_store(setup_path) as store: + _maybe_remove(store, "df") + + # GH 17912 + # HDFStore.select_column should raise a KeyError + # exception if the key is not a valid store + with pytest.raises(KeyError, match="No object named df in the file"): + store.select_column("df", "index") + + store.append("df", df) + # error + with pytest.raises( + KeyError, match=re.escape("'column [foo] not found in the table'") + ): + store.select_column("df", "foo") + + msg = re.escape("select_column() got an unexpected keyword argument 'where'") + with pytest.raises(TypeError, match=msg): + store.select_column("df", "index", where=["index>5"]) + + # valid + result = store.select_column("df", "index") + tm.assert_almost_equal(result.values, Series(df.index).values) + assert isinstance(result, Series) + + # not a data indexable column + msg = re.escape( + "column [values_block_0] can not be extracted individually; " + "it is not data indexable" + ) + with pytest.raises(ValueError, match=msg): + store.select_column("df", "values_block_0") + + # a data column + df2 = df.copy() + df2["string"] = "foo" + store.append("df2", df2, data_columns=["string"]) + result = store.select_column("df2", "string") + tm.assert_almost_equal(result.values, df2["string"].values) + + # a data column with NaNs, result excludes the NaNs + df3 = df.copy() + df3["string"] = "foo" + df3.loc[df3.index[4:6], "string"] = np.nan + store.append("df3", df3, data_columns=["string"]) + result = store.select_column("df3", "string") + tm.assert_almost_equal(result.values, df3["string"].values) + + # start/stop + result = store.select_column("df3", "string", start=2) + tm.assert_almost_equal(result.values, df3["string"].values[2:]) + + result = store.select_column("df3", "string", start=-2) + tm.assert_almost_equal(result.values, df3["string"].values[-2:]) + + result = store.select_column("df3", "string", stop=2) + tm.assert_almost_equal(result.values, df3["string"].values[:2]) + + result = store.select_column("df3", "string", stop=-2) + tm.assert_almost_equal(result.values, df3["string"].values[:-2]) + + result = store.select_column("df3", "string", start=2, stop=-2) + tm.assert_almost_equal(result.values, df3["string"].values[2:-2]) + + result = store.select_column("df3", "string", start=-2, stop=2) + tm.assert_almost_equal(result.values, df3["string"].values[-2:2]) + + # GH 10392 - make sure column name is preserved + df4 = DataFrame({"A": np.random.randn(10), "B": "foo"}) + store.append("df4", df4, data_columns=True) + expected = df4["B"] + result = store.select_column("df4", "B") + tm.assert_series_equal(result, expected) + + +def test_pytables_native_read(datapath, setup_path): + with ensure_clean_store( + datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r" + ) as store: + d2 = store["detector/readout"] + assert isinstance(d2, DataFrame) + + +@pytest.mark.skipif(is_platform_windows(), reason="native2 read fails oddly on windows") +def test_pytables_native2_read(datapath, setup_path): + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "pytables_native2.h5"), mode="r" + ) as store: + str(store) + d1 = store["detector"] + assert isinstance(d1, DataFrame) + + +def test_legacy_table_fixed_format_read_py2(datapath, setup_path): + # GH 24510 + # legacy table with fixed format written in Python 2 + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "legacy_table_fixed_py2.h5"), mode="r" + ) as store: + result = store.select("df") + expected = DataFrame( + [[1, 2, 3, "D"]], + columns=["A", "B", "C", "D"], + index=Index(["ABC"], name="INDEX_NAME"), + ) + tm.assert_frame_equal(expected, result) + + +def test_legacy_table_fixed_format_read_datetime_py2(datapath, setup_path): + # GH 31750 + # legacy table with fixed format and datetime64 column written in Python 2 + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "legacy_table_fixed_datetime_py2.h5"), + mode="r", + ) as store: + result = store.select("df") + expected = DataFrame( + [[Timestamp("2020-02-06T18:00")]], + columns=["A"], + index=Index(["date"]), + ) + tm.assert_frame_equal(expected, result) + + +def test_legacy_table_read_py2(datapath, setup_path): + # issue: 24925 + # legacy table written in Python 2 + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "legacy_table_py2.h5"), mode="r" + ) as store: + result = store.select("table") + + expected = DataFrame({"a": ["a", "b"], "b": [2, 3]}) + tm.assert_frame_equal(expected, result) + + +def test_read_hdf_open_store(setup_path): + # GH10330 + # No check for non-string path_or-buf, and no test of open store + df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + df.index.name = "letters" + df = df.set_index(keys="E", append=True) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", mode="w") + direct = read_hdf(path, "df") + store = HDFStore(path, mode="r") + indirect = read_hdf(store, "df") + tm.assert_frame_equal(direct, indirect) + assert store.is_open + store.close() + + +def test_read_hdf_iterator(setup_path): + df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + df.index.name = "letters" + df = df.set_index(keys="E", append=True) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", mode="w", format="t") + direct = read_hdf(path, "df") + iterator = read_hdf(path, "df", iterator=True) + assert isinstance(iterator, TableIterator) + indirect = next(iterator.__iter__()) + tm.assert_frame_equal(direct, indirect) + iterator.store.close() + + +def test_read_nokey(setup_path): + # GH10443 + df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + + # Categorical dtype not supported for "fixed" format. So no need + # to test with that dtype in the dataframe here. + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", mode="a") + reread = read_hdf(path) + tm.assert_frame_equal(df, reread) + df.to_hdf(path, "df2", mode="a") + + msg = "key must be provided when HDF5 file contains multiple datasets." + with pytest.raises(ValueError, match=msg): + read_hdf(path) + + +def test_read_nokey_table(setup_path): + # GH13231 + df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")}) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", mode="a", format="table") + reread = read_hdf(path) + tm.assert_frame_equal(df, reread) + df.to_hdf(path, "df2", mode="a", format="table") + + msg = "key must be provided when HDF5 file contains multiple datasets." + with pytest.raises(ValueError, match=msg): + read_hdf(path) + + +def test_read_nokey_empty(setup_path): + with ensure_clean_path(setup_path) as path: + store = HDFStore(path) + store.close() + msg = re.escape( + "Dataset(s) incompatible with Pandas data types, not table, or no " + "datasets found in HDF5 file." + ) + with pytest.raises(ValueError, match=msg): + read_hdf(path) + + +def test_read_from_pathlib_path(setup_path): + + # GH11773 + expected = DataFrame( + np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE") + ) + with ensure_clean_path(setup_path) as filename: + path_obj = Path(filename) + + expected.to_hdf(path_obj, "df", mode="a") + actual = read_hdf(path_obj, "df") + + tm.assert_frame_equal(expected, actual) + + +@td.skip_if_no("py.path") +def test_read_from_py_localpath(setup_path): + + # GH11773 + from py.path import local as LocalPath + + expected = DataFrame( + np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE") + ) + with ensure_clean_path(setup_path) as filename: + path_obj = LocalPath(filename) + + expected.to_hdf(path_obj, "df", mode="a") + actual = read_hdf(path_obj, "df") + + tm.assert_frame_equal(expected, actual) + + +@pytest.mark.parametrize("format", ["fixed", "table"]) +def test_read_hdf_series_mode_r(format, setup_path): + # GH 16583 + # Tests that reading a Series saved to an HDF file + # still works if a mode='r' argument is supplied + series = tm.makeFloatSeries() + with ensure_clean_path(setup_path) as path: + series.to_hdf(path, key="data", format=format) + result = read_hdf(path, key="data", mode="r") + tm.assert_series_equal(result, series) + + +def test_read_py2_hdf_file_in_py3(datapath): + # GH 16781 + + # tests reading a PeriodIndex DataFrame written in Python2 in Python3 + + # the file was generated in Python 2.7 like so: + # + # df = DataFrame([1.,2,3], index=pd.PeriodIndex( + # ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B')) + # df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p') + + expected = DataFrame( + [1.0, 2, 3], + index=pd.PeriodIndex(["2015-01-01", "2015-01-02", "2015-01-05"], freq="B"), + ) + + with ensure_clean_store( + datapath( + "io", "data", "legacy_hdf", "periodindex_0.20.1_x86_64_darwin_2.7.13.h5" + ), + mode="r", + ) as store: + result = store["p"] + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_retain_attributes.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_retain_attributes.py new file mode 100644 index 0000000..16772d0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_retain_attributes.py @@ -0,0 +1,117 @@ +from warnings import catch_warnings + +import pytest + +from pandas._libs.tslibs import Timestamp + +from pandas import ( + DataFrame, + Series, + _testing as tm, + date_range, + read_hdf, +) +from pandas.tests.io.pytables.common import ( + _maybe_remove, + ensure_clean_path, + ensure_clean_store, +) + +pytestmark = pytest.mark.single + + +def test_retain_index_attributes(setup_path): + + # GH 3499, losing frequency info on index recreation + df = DataFrame( + {"A": Series(range(3), index=date_range("2000-1-1", periods=3, freq="H"))} + ) + + with ensure_clean_store(setup_path) as store: + _maybe_remove(store, "data") + store.put("data", df, format="table") + + result = store.get("data") + tm.assert_frame_equal(df, result) + + for attr in ["freq", "tz", "name"]: + for idx in ["index", "columns"]: + assert getattr(getattr(df, idx), attr, None) == getattr( + getattr(result, idx), attr, None + ) + + # try to append a table with a different frequency + with catch_warnings(record=True): + df2 = DataFrame( + { + "A": Series( + range(3), index=date_range("2002-1-1", periods=3, freq="D") + ) + } + ) + store.append("data", df2) + + assert store.get_storer("data").info["index"]["freq"] is None + + # this is ok + _maybe_remove(store, "df2") + df2 = DataFrame( + { + "A": Series( + range(3), + index=[ + Timestamp("20010101"), + Timestamp("20010102"), + Timestamp("20020101"), + ], + ) + } + ) + store.append("df2", df2) + df3 = DataFrame( + {"A": Series(range(3), index=date_range("2002-1-1", periods=3, freq="D"))} + ) + store.append("df2", df3) + + +@pytest.mark.filterwarnings( + "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning" +) +def test_retain_index_attributes2(setup_path): + with ensure_clean_path(setup_path) as path: + + with catch_warnings(record=True): + + df = DataFrame( + { + "A": Series( + range(3), index=date_range("2000-1-1", periods=3, freq="H") + ) + } + ) + df.to_hdf(path, "data", mode="w", append=True) + df2 = DataFrame( + { + "A": Series( + range(3), index=date_range("2002-1-1", periods=3, freq="D") + ) + } + ) + + df2.to_hdf(path, "data", append=True) + + idx = date_range("2000-1-1", periods=3, freq="H") + idx.name = "foo" + df = DataFrame({"A": Series(range(3), index=idx)}) + df.to_hdf(path, "data", mode="w", append=True) + + assert read_hdf(path, "data").index.name == "foo" + + with catch_warnings(record=True): + + idx2 = date_range("2001-1-1", periods=3, freq="H") + idx2.name = "bar" + df2 = DataFrame({"A": Series(range(3), index=idx2)}) + df2.to_hdf(path, "data", append=True) + + assert read_hdf(path, "data").index.name is None diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_round_trip.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_round_trip.py new file mode 100644 index 0000000..867525a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_round_trip.py @@ -0,0 +1,564 @@ +import datetime +import re +from warnings import ( + catch_warnings, + simplefilter, +) + +import numpy as np +import pytest + +from pandas._libs.tslibs import Timestamp +from pandas.compat import is_platform_windows + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + _testing as tm, + bdate_range, + read_hdf, +) +from pandas.tests.io.pytables.common import ( + _maybe_remove, + ensure_clean_path, + ensure_clean_store, +) +from pandas.util import _test_decorators as td + +_default_compressor = "blosc" + + +pytestmark = pytest.mark.single + + +def test_conv_read_write(setup_path): + with tm.ensure_clean() as path: + + def roundtrip(key, obj, **kwargs): + obj.to_hdf(path, key, **kwargs) + return read_hdf(path, key) + + o = tm.makeTimeSeries() + tm.assert_series_equal(o, roundtrip("series", o)) + + o = tm.makeStringSeries() + tm.assert_series_equal(o, roundtrip("string_series", o)) + + o = tm.makeDataFrame() + tm.assert_frame_equal(o, roundtrip("frame", o)) + + # table + df = DataFrame({"A": range(5), "B": range(5)}) + df.to_hdf(path, "table", append=True) + result = read_hdf(path, "table", where=["index>2"]) + tm.assert_frame_equal(df[df.index > 2], result) + + +def test_long_strings(setup_path): + + # GH6166 + df = DataFrame( + {"a": tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10) + ) + + with ensure_clean_store(setup_path) as store: + store.append("df", df, data_columns=["a"]) + + result = store.select("df") + tm.assert_frame_equal(df, result) + + +def test_api(setup_path): + + # GH4584 + # API issue when to_hdf doesn't accept append AND format args + with ensure_clean_path(setup_path) as path: + + df = tm.makeDataFrame() + df.iloc[:10].to_hdf(path, "df", append=True, format="table") + df.iloc[10:].to_hdf(path, "df", append=True, format="table") + tm.assert_frame_equal(read_hdf(path, "df"), df) + + # append to False + df.iloc[:10].to_hdf(path, "df", append=False, format="table") + df.iloc[10:].to_hdf(path, "df", append=True, format="table") + tm.assert_frame_equal(read_hdf(path, "df"), df) + + with ensure_clean_path(setup_path) as path: + + df = tm.makeDataFrame() + df.iloc[:10].to_hdf(path, "df", append=True) + df.iloc[10:].to_hdf(path, "df", append=True, format="table") + tm.assert_frame_equal(read_hdf(path, "df"), df) + + # append to False + df.iloc[:10].to_hdf(path, "df", append=False, format="table") + df.iloc[10:].to_hdf(path, "df", append=True) + tm.assert_frame_equal(read_hdf(path, "df"), df) + + with ensure_clean_path(setup_path) as path: + + df = tm.makeDataFrame() + df.to_hdf(path, "df", append=False, format="fixed") + tm.assert_frame_equal(read_hdf(path, "df"), df) + + df.to_hdf(path, "df", append=False, format="f") + tm.assert_frame_equal(read_hdf(path, "df"), df) + + df.to_hdf(path, "df", append=False) + tm.assert_frame_equal(read_hdf(path, "df"), df) + + df.to_hdf(path, "df") + tm.assert_frame_equal(read_hdf(path, "df"), df) + + with ensure_clean_store(setup_path) as store: + + df = tm.makeDataFrame() + + _maybe_remove(store, "df") + store.append("df", df.iloc[:10], append=True, format="table") + store.append("df", df.iloc[10:], append=True, format="table") + tm.assert_frame_equal(store.select("df"), df) + + # append to False + _maybe_remove(store, "df") + store.append("df", df.iloc[:10], append=False, format="table") + store.append("df", df.iloc[10:], append=True, format="table") + tm.assert_frame_equal(store.select("df"), df) + + # formats + _maybe_remove(store, "df") + store.append("df", df.iloc[:10], append=False, format="table") + store.append("df", df.iloc[10:], append=True, format="table") + tm.assert_frame_equal(store.select("df"), df) + + _maybe_remove(store, "df") + store.append("df", df.iloc[:10], append=False, format="table") + store.append("df", df.iloc[10:], append=True, format=None) + tm.assert_frame_equal(store.select("df"), df) + + with ensure_clean_path(setup_path) as path: + # Invalid. + df = tm.makeDataFrame() + + msg = "Can only append to Tables" + + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df", append=True, format="f") + + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df", append=True, format="fixed") + + msg = r"invalid HDFStore format specified \[foo\]" + + with pytest.raises(TypeError, match=msg): + df.to_hdf(path, "df", append=True, format="foo") + + with pytest.raises(TypeError, match=msg): + df.to_hdf(path, "df", append=False, format="foo") + + # File path doesn't exist + path = "" + msg = f"File {path} does not exist" + + with pytest.raises(FileNotFoundError, match=msg): + read_hdf(path, "df") + + +def test_get(setup_path): + + with ensure_clean_store(setup_path) as store: + store["a"] = tm.makeTimeSeries() + left = store.get("a") + right = store["a"] + tm.assert_series_equal(left, right) + + left = store.get("/a") + right = store["/a"] + tm.assert_series_equal(left, right) + + with pytest.raises(KeyError, match="'No object named b in the file'"): + store.get("b") + + +def test_put_integer(setup_path): + # non-date, non-string index + df = DataFrame(np.random.randn(50, 100)) + _check_roundtrip(df, tm.assert_frame_equal, setup_path) + + +def test_table_values_dtypes_roundtrip(setup_path): + + with ensure_clean_store(setup_path) as store: + df1 = DataFrame({"a": [1, 2, 3]}, dtype="f8") + store.append("df_f8", df1) + tm.assert_series_equal(df1.dtypes, store["df_f8"].dtypes) + + df2 = DataFrame({"a": [1, 2, 3]}, dtype="i8") + store.append("df_i8", df2) + tm.assert_series_equal(df2.dtypes, store["df_i8"].dtypes) + + # incompatible dtype + msg = re.escape( + "invalid combination of [values_axes] on appending data " + "[name->values_block_0,cname->values_block_0," + "dtype->float64,kind->float,shape->(1, 3)] vs " + "current table [name->values_block_0," + "cname->values_block_0,dtype->int64,kind->integer," + "shape->None]" + ) + with pytest.raises(ValueError, match=msg): + store.append("df_i8", df1) + + # check creation/storage/retrieval of float32 (a bit hacky to + # actually create them thought) + df1 = DataFrame(np.array([[1], [2], [3]], dtype="f4"), columns=["A"]) + store.append("df_f4", df1) + tm.assert_series_equal(df1.dtypes, store["df_f4"].dtypes) + assert df1.dtypes[0] == "float32" + + # check with mixed dtypes + df1 = DataFrame( + { + c: Series(np.random.randint(5), dtype=c) + for c in ["float32", "float64", "int32", "int64", "int16", "int8"] + } + ) + df1["string"] = "foo" + df1["float322"] = 1.0 + df1["float322"] = df1["float322"].astype("float32") + df1["bool"] = df1["float32"] > 0 + df1["time1"] = Timestamp("20130101") + df1["time2"] = Timestamp("20130102") + + store.append("df_mixed_dtypes1", df1) + result = store.select("df_mixed_dtypes1").dtypes.value_counts() + result.index = [str(i) for i in result.index] + expected = Series( + { + "float32": 2, + "float64": 1, + "int32": 1, + "bool": 1, + "int16": 1, + "int8": 1, + "int64": 1, + "object": 1, + "datetime64[ns]": 2, + } + ) + result = result.sort_index() + expected = expected.sort_index() + tm.assert_series_equal(result, expected) + + +def test_series(setup_path): + + s = tm.makeStringSeries() + _check_roundtrip(s, tm.assert_series_equal, path=setup_path) + + ts = tm.makeTimeSeries() + _check_roundtrip(ts, tm.assert_series_equal, path=setup_path) + + ts2 = Series(ts.index, Index(ts.index, dtype=object)) + _check_roundtrip(ts2, tm.assert_series_equal, path=setup_path) + + ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object)) + _check_roundtrip( + ts3, tm.assert_series_equal, path=setup_path, check_index_type=False + ) + + +def test_float_index(setup_path): + + # GH #454 + index = np.random.randn(10) + s = Series(np.random.randn(10), index=index) + _check_roundtrip(s, tm.assert_series_equal, path=setup_path) + + +def test_tuple_index(setup_path): + + # GH #492 + col = np.arange(10) + idx = [(0.0, 1.0), (2.0, 3.0), (4.0, 5.0)] + data = np.random.randn(30).reshape((3, 10)) + DF = DataFrame(data, index=idx, columns=col) + + with catch_warnings(record=True): + simplefilter("ignore", pd.errors.PerformanceWarning) + _check_roundtrip(DF, tm.assert_frame_equal, path=setup_path) + + +@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") +def test_index_types(setup_path): + with catch_warnings(record=True): + values = np.random.randn(2) + + func = lambda l, r: tm.assert_series_equal(l, r, check_index_type=True) + + with catch_warnings(record=True): + ser = Series(values, [0, "y"]) + _check_roundtrip(ser, func, path=setup_path) + + with catch_warnings(record=True): + ser = Series(values, [datetime.datetime.today(), 0]) + _check_roundtrip(ser, func, path=setup_path) + + with catch_warnings(record=True): + ser = Series(values, ["y", 0]) + _check_roundtrip(ser, func, path=setup_path) + + with catch_warnings(record=True): + ser = Series(values, [datetime.date.today(), "a"]) + _check_roundtrip(ser, func, path=setup_path) + + with catch_warnings(record=True): + ser = Series(values, [0, "y"]) + _check_roundtrip(ser, func, path=setup_path) + + ser = Series(values, [datetime.datetime.today(), 0]) + _check_roundtrip(ser, func, path=setup_path) + + ser = Series(values, ["y", 0]) + _check_roundtrip(ser, func, path=setup_path) + + ser = Series(values, [datetime.date.today(), "a"]) + _check_roundtrip(ser, func, path=setup_path) + + ser = Series(values, [1.23, "b"]) + _check_roundtrip(ser, func, path=setup_path) + + ser = Series(values, [1, 1.53]) + _check_roundtrip(ser, func, path=setup_path) + + ser = Series(values, [1, 5]) + _check_roundtrip(ser, func, path=setup_path) + + ser = Series( + values, [datetime.datetime(2012, 1, 1), datetime.datetime(2012, 1, 2)] + ) + _check_roundtrip(ser, func, path=setup_path) + + +def test_timeseries_preepoch(setup_path): + + dr = bdate_range("1/1/1940", "1/1/1960") + ts = Series(np.random.randn(len(dr)), index=dr) + try: + _check_roundtrip(ts, tm.assert_series_equal, path=setup_path) + except OverflowError: + if is_platform_windows(): + pytest.xfail("known failure on some windows platforms") + else: + raise + + +@pytest.mark.parametrize( + "compression", [False, pytest.param(True, marks=td.skip_if_windows)] +) +def test_frame(compression, setup_path): + + df = tm.makeDataFrame() + + # put in some random NAs + df.values[0, 0] = np.nan + df.values[5, 3] = np.nan + + _check_roundtrip_table( + df, tm.assert_frame_equal, path=setup_path, compression=compression + ) + _check_roundtrip( + df, tm.assert_frame_equal, path=setup_path, compression=compression + ) + + tdf = tm.makeTimeDataFrame() + _check_roundtrip( + tdf, tm.assert_frame_equal, path=setup_path, compression=compression + ) + + with ensure_clean_store(setup_path) as store: + # not consolidated + df["foo"] = np.random.randn(len(df)) + store["df"] = df + recons = store["df"] + assert recons._mgr.is_consolidated() + + # empty + _check_roundtrip(df[:0], tm.assert_frame_equal, path=setup_path) + + +def test_empty_series_frame(setup_path): + s0 = Series(dtype=object) + s1 = Series(name="myseries", dtype=object) + df0 = DataFrame() + df1 = DataFrame(index=["a", "b", "c"]) + df2 = DataFrame(columns=["d", "e", "f"]) + + _check_roundtrip(s0, tm.assert_series_equal, path=setup_path) + _check_roundtrip(s1, tm.assert_series_equal, path=setup_path) + _check_roundtrip(df0, tm.assert_frame_equal, path=setup_path) + _check_roundtrip(df1, tm.assert_frame_equal, path=setup_path) + _check_roundtrip(df2, tm.assert_frame_equal, path=setup_path) + + +@pytest.mark.parametrize("dtype", [np.int64, np.float64, object, "m8[ns]", "M8[ns]"]) +def test_empty_series(dtype, setup_path): + s = Series(dtype=dtype) + _check_roundtrip(s, tm.assert_series_equal, path=setup_path) + + +def test_can_serialize_dates(setup_path): + + rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")] + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + _check_roundtrip(frame, tm.assert_frame_equal, path=setup_path) + + +def test_store_hierarchical(setup_path, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + _check_roundtrip(frame, tm.assert_frame_equal, path=setup_path) + _check_roundtrip(frame.T, tm.assert_frame_equal, path=setup_path) + _check_roundtrip(frame["A"], tm.assert_series_equal, path=setup_path) + + # check that the names are stored + with ensure_clean_store(setup_path) as store: + store["frame"] = frame + recons = store["frame"] + tm.assert_frame_equal(recons, frame) + + +@pytest.mark.parametrize( + "compression", [False, pytest.param(True, marks=td.skip_if_windows)] +) +def test_store_mixed(compression, setup_path): + def _make_one(): + df = tm.makeDataFrame() + df["obj1"] = "foo" + df["obj2"] = "bar" + df["bool1"] = df["A"] > 0 + df["bool2"] = df["B"] > 0 + df["int1"] = 1 + df["int2"] = 2 + return df._consolidate() + + df1 = _make_one() + df2 = _make_one() + + _check_roundtrip(df1, tm.assert_frame_equal, path=setup_path) + _check_roundtrip(df2, tm.assert_frame_equal, path=setup_path) + + with ensure_clean_store(setup_path) as store: + store["obj"] = df1 + tm.assert_frame_equal(store["obj"], df1) + store["obj"] = df2 + tm.assert_frame_equal(store["obj"], df2) + + # check that can store Series of all of these types + _check_roundtrip( + df1["obj1"], + tm.assert_series_equal, + path=setup_path, + compression=compression, + ) + _check_roundtrip( + df1["bool1"], + tm.assert_series_equal, + path=setup_path, + compression=compression, + ) + _check_roundtrip( + df1["int1"], + tm.assert_series_equal, + path=setup_path, + compression=compression, + ) + + +def _check_roundtrip(obj, comparator, path, compression=False, **kwargs): + + options = {} + if compression: + options["complib"] = _default_compressor + + with ensure_clean_store(path, "w", **options) as store: + store["obj"] = obj + retrieved = store["obj"] + comparator(retrieved, obj, **kwargs) + + +def _check_double_roundtrip(self, obj, comparator, path, compression=False, **kwargs): + options = {} + if compression: + options["complib"] = compression or _default_compressor + + with ensure_clean_store(path, "w", **options) as store: + store["obj"] = obj + retrieved = store["obj"] + comparator(retrieved, obj, **kwargs) + store["obj"] = retrieved + again = store["obj"] + comparator(again, obj, **kwargs) + + +def _check_roundtrip_table(obj, comparator, path, compression=False): + options = {} + if compression: + options["complib"] = _default_compressor + + with ensure_clean_store(path, "w", **options) as store: + store.put("obj", obj, format="table") + retrieved = store["obj"] + + comparator(retrieved, obj) + + +def test_unicode_index(setup_path): + + unicode_values = ["\u03c3", "\u03c3\u03c3"] + + # PerformanceWarning + with catch_warnings(record=True): + simplefilter("ignore", pd.errors.PerformanceWarning) + s = Series(np.random.randn(len(unicode_values)), unicode_values) + _check_roundtrip(s, tm.assert_series_equal, path=setup_path) + + +def test_unicode_longer_encoded(setup_path): + # GH 11234 + char = "\u0394" + df = DataFrame({"A": [char]}) + with ensure_clean_store(setup_path) as store: + store.put("df", df, format="table", encoding="utf-8") + result = store.get("df") + tm.assert_frame_equal(result, df) + + df = DataFrame({"A": ["a", char], "B": ["b", "b"]}) + with ensure_clean_store(setup_path) as store: + store.put("df", df, format="table", encoding="utf-8") + result = store.get("df") + tm.assert_frame_equal(result, df) + + +def test_store_datetime_mixed(setup_path): + + df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]}) + ts = tm.makeTimeSeries() + df["d"] = ts.index[:3] + _check_roundtrip(df, tm.assert_frame_equal, path=setup_path) + + +def test_round_trip_equals(setup_path): + # GH 9330 + df = DataFrame({"B": [1, 2], "A": ["x", "y"]}) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table") + other = read_hdf(path, "df") + tm.assert_frame_equal(df, other) + assert df.equals(other) + assert other.equals(df) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_select.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_select.py new file mode 100644 index 0000000..b644c34 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_select.py @@ -0,0 +1,976 @@ +from warnings import catch_warnings + +import numpy as np +import pytest + +from pandas._libs.tslibs import Timestamp + +import pandas as pd +from pandas import ( + DataFrame, + HDFStore, + Index, + MultiIndex, + Series, + _testing as tm, + bdate_range, + concat, + date_range, + isna, + read_hdf, +) +from pandas.tests.io.pytables.common import ( + _maybe_remove, + ensure_clean_path, + ensure_clean_store, +) + +from pandas.io.pytables import Term + +pytestmark = pytest.mark.single + + +def test_select_columns_in_where(setup_path): + + # GH 6169 + # recreate multi-indexes when columns is passed + # in the `where` argument + index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["foo_name", "bar_name"], + ) + + # With a DataFrame + df = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"]) + + with ensure_clean_store(setup_path) as store: + store.put("df", df, format="table") + expected = df[["A"]] + + tm.assert_frame_equal(store.select("df", columns=["A"]), expected) + + tm.assert_frame_equal(store.select("df", where="columns=['A']"), expected) + + # With a Series + s = Series(np.random.randn(10), index=index, name="A") + with ensure_clean_store(setup_path) as store: + store.put("s", s, format="table") + tm.assert_series_equal(store.select("s", where="columns=['A']"), s) + + +def test_select_with_dups(setup_path): + + # single dtypes + df = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]) + df.index = date_range("20130101 9:30", periods=10, freq="T") + + with ensure_clean_store(setup_path) as store: + store.append("df", df) + + result = store.select("df") + expected = df + tm.assert_frame_equal(result, expected, by_blocks=True) + + result = store.select("df", columns=df.columns) + expected = df + tm.assert_frame_equal(result, expected, by_blocks=True) + + result = store.select("df", columns=["A"]) + expected = df.loc[:, ["A"]] + tm.assert_frame_equal(result, expected) + + # dups across dtypes + df = concat( + [ + DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]), + DataFrame( + np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] + ), + ], + axis=1, + ) + df.index = date_range("20130101 9:30", periods=10, freq="T") + + with ensure_clean_store(setup_path) as store: + store.append("df", df) + + result = store.select("df") + expected = df + tm.assert_frame_equal(result, expected, by_blocks=True) + + result = store.select("df", columns=df.columns) + expected = df + tm.assert_frame_equal(result, expected, by_blocks=True) + + expected = df.loc[:, ["A"]] + result = store.select("df", columns=["A"]) + tm.assert_frame_equal(result, expected, by_blocks=True) + + expected = df.loc[:, ["B", "A"]] + result = store.select("df", columns=["B", "A"]) + tm.assert_frame_equal(result, expected, by_blocks=True) + + # duplicates on both index and columns + with ensure_clean_store(setup_path) as store: + store.append("df", df) + store.append("df", df) + + expected = df.loc[:, ["B", "A"]] + expected = concat([expected, expected]) + result = store.select("df", columns=["B", "A"]) + tm.assert_frame_equal(result, expected, by_blocks=True) + + +def test_select(setup_path): + + with ensure_clean_store(setup_path) as store: + + with catch_warnings(record=True): + + # select with columns= + df = tm.makeTimeDataFrame() + _maybe_remove(store, "df") + store.append("df", df) + result = store.select("df", columns=["A", "B"]) + expected = df.reindex(columns=["A", "B"]) + tm.assert_frame_equal(expected, result) + + # equivalently + result = store.select("df", [("columns=['A', 'B']")]) + expected = df.reindex(columns=["A", "B"]) + tm.assert_frame_equal(expected, result) + + # with a data column + _maybe_remove(store, "df") + store.append("df", df, data_columns=["A"]) + result = store.select("df", ["A > 0"], columns=["A", "B"]) + expected = df[df.A > 0].reindex(columns=["A", "B"]) + tm.assert_frame_equal(expected, result) + + # all a data columns + _maybe_remove(store, "df") + store.append("df", df, data_columns=True) + result = store.select("df", ["A > 0"], columns=["A", "B"]) + expected = df[df.A > 0].reindex(columns=["A", "B"]) + tm.assert_frame_equal(expected, result) + + # with a data column, but different columns + _maybe_remove(store, "df") + store.append("df", df, data_columns=["A"]) + result = store.select("df", ["A > 0"], columns=["C", "D"]) + expected = df[df.A > 0].reindex(columns=["C", "D"]) + tm.assert_frame_equal(expected, result) + + +def test_select_dtypes(setup_path): + + with ensure_clean_store(setup_path) as store: + # with a Timestamp data column (GH #2637) + df = DataFrame( + { + "ts": bdate_range("2012-01-01", periods=300), + "A": np.random.randn(300), + } + ) + _maybe_remove(store, "df") + store.append("df", df, data_columns=["ts", "A"]) + + result = store.select("df", "ts>=Timestamp('2012-02-01')") + expected = df[df.ts >= Timestamp("2012-02-01")] + tm.assert_frame_equal(expected, result) + + # bool columns (GH #2849) + df = DataFrame(np.random.randn(5, 2), columns=["A", "B"]) + df["object"] = "foo" + df.loc[4:5, "object"] = "bar" + df["boolv"] = df["A"] > 0 + _maybe_remove(store, "df") + store.append("df", df, data_columns=True) + + expected = df[df.boolv == True].reindex(columns=["A", "boolv"]) # noqa:E712 + for v in [True, "true", 1]: + result = store.select("df", f"boolv == {v}", columns=["A", "boolv"]) + tm.assert_frame_equal(expected, result) + + expected = df[df.boolv == False].reindex(columns=["A", "boolv"]) # noqa:E712 + for v in [False, "false", 0]: + result = store.select("df", f"boolv == {v}", columns=["A", "boolv"]) + tm.assert_frame_equal(expected, result) + + # integer index + df = DataFrame({"A": np.random.rand(20), "B": np.random.rand(20)}) + _maybe_remove(store, "df_int") + store.append("df_int", df) + result = store.select("df_int", "index<10 and columns=['A']") + expected = df.reindex(index=list(df.index)[0:10], columns=["A"]) + tm.assert_frame_equal(expected, result) + + # float index + df = DataFrame( + { + "A": np.random.rand(20), + "B": np.random.rand(20), + "index": np.arange(20, dtype="f8"), + } + ) + _maybe_remove(store, "df_float") + store.append("df_float", df) + result = store.select("df_float", "index<10.0 and columns=['A']") + expected = df.reindex(index=list(df.index)[0:10], columns=["A"]) + tm.assert_frame_equal(expected, result) + + with ensure_clean_store(setup_path) as store: + + # floats w/o NaN + df = DataFrame({"cols": range(11), "values": range(11)}, dtype="float64") + df["cols"] = (df["cols"] + 10).apply(str) + + store.append("df1", df, data_columns=True) + result = store.select("df1", where="values>2.0") + expected = df[df["values"] > 2.0] + tm.assert_frame_equal(expected, result) + + # floats with NaN + df.iloc[0] = np.nan + expected = df[df["values"] > 2.0] + + store.append("df2", df, data_columns=True, index=False) + result = store.select("df2", where="values>2.0") + tm.assert_frame_equal(expected, result) + + # https://github.com/PyTables/PyTables/issues/282 + # bug in selection when 0th row has a np.nan and an index + # store.append('df3',df,data_columns=True) + # result = store.select( + # 'df3', where='values>2.0') + # tm.assert_frame_equal(expected, result) + + # not in first position float with NaN ok too + df = DataFrame({"cols": range(11), "values": range(11)}, dtype="float64") + df["cols"] = (df["cols"] + 10).apply(str) + + df.iloc[1] = np.nan + expected = df[df["values"] > 2.0] + + store.append("df4", df, data_columns=True) + result = store.select("df4", where="values>2.0") + tm.assert_frame_equal(expected, result) + + # test selection with comparison against numpy scalar + # GH 11283 + with ensure_clean_store(setup_path) as store: + df = tm.makeDataFrame() + + expected = df[df["A"] > 0] + + store.append("df", df, data_columns=True) + np_zero = np.float64(0) # noqa:F841 + result = store.select("df", where=["A>np_zero"]) + tm.assert_frame_equal(expected, result) + + +def test_select_with_many_inputs(setup_path): + + with ensure_clean_store(setup_path) as store: + + df = DataFrame( + { + "ts": bdate_range("2012-01-01", periods=300), + "A": np.random.randn(300), + "B": range(300), + "users": ["a"] * 50 + + ["b"] * 50 + + ["c"] * 100 + + [f"a{i:03d}" for i in range(100)], + } + ) + _maybe_remove(store, "df") + store.append("df", df, data_columns=["ts", "A", "B", "users"]) + + # regular select + result = store.select("df", "ts>=Timestamp('2012-02-01')") + expected = df[df.ts >= Timestamp("2012-02-01")] + tm.assert_frame_equal(expected, result) + + # small selector + result = store.select("df", "ts>=Timestamp('2012-02-01') & users=['a','b','c']") + expected = df[ + (df.ts >= Timestamp("2012-02-01")) & df.users.isin(["a", "b", "c"]) + ] + tm.assert_frame_equal(expected, result) + + # big selector along the columns + selector = ["a", "b", "c"] + [f"a{i:03d}" for i in range(60)] + result = store.select("df", "ts>=Timestamp('2012-02-01') and users=selector") + expected = df[(df.ts >= Timestamp("2012-02-01")) & df.users.isin(selector)] + tm.assert_frame_equal(expected, result) + + selector = range(100, 200) + result = store.select("df", "B=selector") + expected = df[df.B.isin(selector)] + tm.assert_frame_equal(expected, result) + assert len(result) == 100 + + # big selector along the index + selector = Index(df.ts[0:100].values) + result = store.select("df", "ts=selector") + expected = df[df.ts.isin(selector.values)] + tm.assert_frame_equal(expected, result) + assert len(result) == 100 + + +def test_select_iterator(setup_path): + + # single table + with ensure_clean_store(setup_path) as store: + + df = tm.makeTimeDataFrame(500) + _maybe_remove(store, "df") + store.append("df", df) + + expected = store.select("df") + + results = list(store.select("df", iterator=True)) + result = concat(results) + tm.assert_frame_equal(expected, result) + + results = list(store.select("df", chunksize=100)) + assert len(results) == 5 + result = concat(results) + tm.assert_frame_equal(expected, result) + + results = list(store.select("df", chunksize=150)) + result = concat(results) + tm.assert_frame_equal(result, expected) + + with ensure_clean_path(setup_path) as path: + + df = tm.makeTimeDataFrame(500) + df.to_hdf(path, "df_non_table") + + msg = "can only use an iterator or chunksize on a table" + with pytest.raises(TypeError, match=msg): + read_hdf(path, "df_non_table", chunksize=100) + + with pytest.raises(TypeError, match=msg): + read_hdf(path, "df_non_table", iterator=True) + + with ensure_clean_path(setup_path) as path: + + df = tm.makeTimeDataFrame(500) + df.to_hdf(path, "df", format="table") + + results = list(read_hdf(path, "df", chunksize=100)) + result = concat(results) + + assert len(results) == 5 + tm.assert_frame_equal(result, df) + tm.assert_frame_equal(result, read_hdf(path, "df")) + + # multiple + + with ensure_clean_store(setup_path) as store: + + df1 = tm.makeTimeDataFrame(500) + store.append("df1", df1, data_columns=True) + df2 = tm.makeTimeDataFrame(500).rename(columns="{}_2".format) + df2["foo"] = "bar" + store.append("df2", df2) + + df = concat([df1, df2], axis=1) + + # full selection + expected = store.select_as_multiple(["df1", "df2"], selector="df1") + results = list( + store.select_as_multiple(["df1", "df2"], selector="df1", chunksize=150) + ) + result = concat(results) + tm.assert_frame_equal(expected, result) + + +def test_select_iterator_complete_8014(setup_path): + + # GH 8014 + # using iterator and where clause + chunksize = 1e4 + + # no iterator + with ensure_clean_store(setup_path) as store: + + expected = tm.makeTimeDataFrame(100064, "S") + _maybe_remove(store, "df") + store.append("df", expected) + + beg_dt = expected.index[0] + end_dt = expected.index[-1] + + # select w/o iteration and no where clause works + result = store.select("df") + tm.assert_frame_equal(expected, result) + + # select w/o iterator and where clause, single term, begin + # of range, works + where = f"index >= '{beg_dt}'" + result = store.select("df", where=where) + tm.assert_frame_equal(expected, result) + + # select w/o iterator and where clause, single term, end + # of range, works + where = f"index <= '{end_dt}'" + result = store.select("df", where=where) + tm.assert_frame_equal(expected, result) + + # select w/o iterator and where clause, inclusive range, + # works + where = f"index >= '{beg_dt}' & index <= '{end_dt}'" + result = store.select("df", where=where) + tm.assert_frame_equal(expected, result) + + # with iterator, full range + with ensure_clean_store(setup_path) as store: + + expected = tm.makeTimeDataFrame(100064, "S") + _maybe_remove(store, "df") + store.append("df", expected) + + beg_dt = expected.index[0] + end_dt = expected.index[-1] + + # select w/iterator and no where clause works + results = list(store.select("df", chunksize=chunksize)) + result = concat(results) + tm.assert_frame_equal(expected, result) + + # select w/iterator and where clause, single term, begin of range + where = f"index >= '{beg_dt}'" + results = list(store.select("df", where=where, chunksize=chunksize)) + result = concat(results) + tm.assert_frame_equal(expected, result) + + # select w/iterator and where clause, single term, end of range + where = f"index <= '{end_dt}'" + results = list(store.select("df", where=where, chunksize=chunksize)) + result = concat(results) + tm.assert_frame_equal(expected, result) + + # select w/iterator and where clause, inclusive range + where = f"index >= '{beg_dt}' & index <= '{end_dt}'" + results = list(store.select("df", where=where, chunksize=chunksize)) + result = concat(results) + tm.assert_frame_equal(expected, result) + + +def test_select_iterator_non_complete_8014(setup_path): + + # GH 8014 + # using iterator and where clause + chunksize = 1e4 + + # with iterator, non complete range + with ensure_clean_store(setup_path) as store: + + expected = tm.makeTimeDataFrame(100064, "S") + _maybe_remove(store, "df") + store.append("df", expected) + + beg_dt = expected.index[1] + end_dt = expected.index[-2] + + # select w/iterator and where clause, single term, begin of range + where = f"index >= '{beg_dt}'" + results = list(store.select("df", where=where, chunksize=chunksize)) + result = concat(results) + rexpected = expected[expected.index >= beg_dt] + tm.assert_frame_equal(rexpected, result) + + # select w/iterator and where clause, single term, end of range + where = f"index <= '{end_dt}'" + results = list(store.select("df", where=where, chunksize=chunksize)) + result = concat(results) + rexpected = expected[expected.index <= end_dt] + tm.assert_frame_equal(rexpected, result) + + # select w/iterator and where clause, inclusive range + where = f"index >= '{beg_dt}' & index <= '{end_dt}'" + results = list(store.select("df", where=where, chunksize=chunksize)) + result = concat(results) + rexpected = expected[(expected.index >= beg_dt) & (expected.index <= end_dt)] + tm.assert_frame_equal(rexpected, result) + + # with iterator, empty where + with ensure_clean_store(setup_path) as store: + + expected = tm.makeTimeDataFrame(100064, "S") + _maybe_remove(store, "df") + store.append("df", expected) + + end_dt = expected.index[-1] + + # select w/iterator and where clause, single term, begin of range + where = f"index > '{end_dt}'" + results = list(store.select("df", where=where, chunksize=chunksize)) + assert 0 == len(results) + + +def test_select_iterator_many_empty_frames(setup_path): + + # GH 8014 + # using iterator and where clause can return many empty + # frames. + chunksize = 10_000 + + # with iterator, range limited to the first chunk + with ensure_clean_store(setup_path) as store: + + expected = tm.makeTimeDataFrame(100000, "S") + _maybe_remove(store, "df") + store.append("df", expected) + + beg_dt = expected.index[0] + end_dt = expected.index[chunksize - 1] + + # select w/iterator and where clause, single term, begin of range + where = f"index >= '{beg_dt}'" + results = list(store.select("df", where=where, chunksize=chunksize)) + result = concat(results) + rexpected = expected[expected.index >= beg_dt] + tm.assert_frame_equal(rexpected, result) + + # select w/iterator and where clause, single term, end of range + where = f"index <= '{end_dt}'" + results = list(store.select("df", where=where, chunksize=chunksize)) + + assert len(results) == 1 + result = concat(results) + rexpected = expected[expected.index <= end_dt] + tm.assert_frame_equal(rexpected, result) + + # select w/iterator and where clause, inclusive range + where = f"index >= '{beg_dt}' & index <= '{end_dt}'" + results = list(store.select("df", where=where, chunksize=chunksize)) + + # should be 1, is 10 + assert len(results) == 1 + result = concat(results) + rexpected = expected[(expected.index >= beg_dt) & (expected.index <= end_dt)] + tm.assert_frame_equal(rexpected, result) + + # select w/iterator and where clause which selects + # *nothing*. + # + # To be consistent with Python idiom I suggest this should + # return [] e.g. `for e in []: print True` never prints + # True. + + where = f"index <= '{beg_dt}' & index >= '{end_dt}'" + results = list(store.select("df", where=where, chunksize=chunksize)) + + # should be [] + assert len(results) == 0 + + +def test_frame_select(setup_path): + + df = tm.makeTimeDataFrame() + + with ensure_clean_store(setup_path) as store: + store.put("frame", df, format="table") + date = df.index[len(df) // 2] + + crit1 = Term("index>=date") + assert crit1.env.scope["date"] == date + + crit2 = "columns=['A', 'D']" + crit3 = "columns=A" + + result = store.select("frame", [crit1, crit2]) + expected = df.loc[date:, ["A", "D"]] + tm.assert_frame_equal(result, expected) + + result = store.select("frame", [crit3]) + expected = df.loc[:, ["A"]] + tm.assert_frame_equal(result, expected) + + # invalid terms + df = tm.makeTimeDataFrame() + store.append("df_time", df) + msg = "could not convert string to Timestamp" + with pytest.raises(ValueError, match=msg): + store.select("df_time", "index>0") + + # can't select if not written as table + # store['frame'] = df + # with pytest.raises(ValueError): + # store.select('frame', [crit1, crit2]) + + +def test_frame_select_complex(setup_path): + # select via complex criteria + + df = tm.makeTimeDataFrame() + df["string"] = "foo" + df.loc[df.index[0:4], "string"] = "bar" + + with ensure_clean_store(setup_path) as store: + store.put("df", df, format="table", data_columns=["string"]) + + # empty + result = store.select("df", 'index>df.index[3] & string="bar"') + expected = df.loc[(df.index > df.index[3]) & (df.string == "bar")] + tm.assert_frame_equal(result, expected) + + result = store.select("df", 'index>df.index[3] & string="foo"') + expected = df.loc[(df.index > df.index[3]) & (df.string == "foo")] + tm.assert_frame_equal(result, expected) + + # or + result = store.select("df", 'index>df.index[3] | string="bar"') + expected = df.loc[(df.index > df.index[3]) | (df.string == "bar")] + tm.assert_frame_equal(result, expected) + + result = store.select( + "df", '(index>df.index[3] & index<=df.index[6]) | string="bar"' + ) + expected = df.loc[ + ((df.index > df.index[3]) & (df.index <= df.index[6])) + | (df.string == "bar") + ] + tm.assert_frame_equal(result, expected) + + # invert + result = store.select("df", 'string!="bar"') + expected = df.loc[df.string != "bar"] + tm.assert_frame_equal(result, expected) + + # invert not implemented in numexpr :( + msg = "cannot use an invert condition when passing to numexpr" + with pytest.raises(NotImplementedError, match=msg): + store.select("df", '~(string="bar")') + + # invert ok for filters + result = store.select("df", "~(columns=['A','B'])") + expected = df.loc[:, df.columns.difference(["A", "B"])] + tm.assert_frame_equal(result, expected) + + # in + result = store.select("df", "index>df.index[3] & columns in ['A','B']") + expected = df.loc[df.index > df.index[3]].reindex(columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + +def test_frame_select_complex2(setup_path): + + with ensure_clean_path(["params.hdf", "hist.hdf"]) as paths: + + pp, hh = paths + + # use non-trivial selection criteria + params = DataFrame({"A": [1, 1, 2, 2, 3]}) + params.to_hdf(pp, "df", mode="w", format="table", data_columns=["A"]) + + selection = read_hdf(pp, "df", where="A=[2,3]") + hist = DataFrame( + np.random.randn(25, 1), + columns=["data"], + index=MultiIndex.from_tuples( + [(i, j) for i in range(5) for j in range(5)], names=["l1", "l2"] + ), + ) + + hist.to_hdf(hh, "df", mode="w", format="table") + + expected = read_hdf(hh, "df", where="l1=[2, 3, 4]") + + # scope with list like + l0 = selection.index.tolist() # noqa:F841 + store = HDFStore(hh) + result = store.select("df", where="l1=l0") + tm.assert_frame_equal(result, expected) + store.close() + + result = read_hdf(hh, "df", where="l1=l0") + tm.assert_frame_equal(result, expected) + + # index + index = selection.index # noqa:F841 + result = read_hdf(hh, "df", where="l1=index") + tm.assert_frame_equal(result, expected) + + result = read_hdf(hh, "df", where="l1=selection.index") + tm.assert_frame_equal(result, expected) + + result = read_hdf(hh, "df", where="l1=selection.index.tolist()") + tm.assert_frame_equal(result, expected) + + result = read_hdf(hh, "df", where="l1=list(selection.index)") + tm.assert_frame_equal(result, expected) + + # scope with index + store = HDFStore(hh) + + result = store.select("df", where="l1=index") + tm.assert_frame_equal(result, expected) + + result = store.select("df", where="l1=selection.index") + tm.assert_frame_equal(result, expected) + + result = store.select("df", where="l1=selection.index.tolist()") + tm.assert_frame_equal(result, expected) + + result = store.select("df", where="l1=list(selection.index)") + tm.assert_frame_equal(result, expected) + + store.close() + + +def test_invalid_filtering(setup_path): + + # can't use more than one filter (atm) + + df = tm.makeTimeDataFrame() + + with ensure_clean_store(setup_path) as store: + store.put("df", df, format="table") + + msg = "unable to collapse Joint Filters" + # not implemented + with pytest.raises(NotImplementedError, match=msg): + store.select("df", "columns=['A'] | columns=['B']") + + # in theory we could deal with this + with pytest.raises(NotImplementedError, match=msg): + store.select("df", "columns=['A','B'] & columns=['C']") + + +def test_string_select(setup_path): + # GH 2973 + with ensure_clean_store(setup_path) as store: + + df = tm.makeTimeDataFrame() + + # test string ==/!= + df["x"] = "none" + df.loc[df.index[2:7], "x"] = "" + + store.append("df", df, data_columns=["x"]) + + result = store.select("df", "x=none") + expected = df[df.x == "none"] + tm.assert_frame_equal(result, expected) + + result = store.select("df", "x!=none") + expected = df[df.x != "none"] + tm.assert_frame_equal(result, expected) + + df2 = df.copy() + df2.loc[df2.x == "", "x"] = np.nan + + store.append("df2", df2, data_columns=["x"]) + result = store.select("df2", "x!=none") + expected = df2[isna(df2.x)] + tm.assert_frame_equal(result, expected) + + # int ==/!= + df["int"] = 1 + df.loc[df.index[2:7], "int"] = 2 + + store.append("df3", df, data_columns=["int"]) + + result = store.select("df3", "int=2") + expected = df[df.int == 2] + tm.assert_frame_equal(result, expected) + + result = store.select("df3", "int!=2") + expected = df[df.int != 2] + tm.assert_frame_equal(result, expected) + + +def test_select_as_multiple(setup_path): + + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) + df2["foo"] = "bar" + + with ensure_clean_store(setup_path) as store: + + msg = "keys must be a list/tuple" + # no tables stored + with pytest.raises(TypeError, match=msg): + store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1") + + store.append("df1", df1, data_columns=["A", "B"]) + store.append("df2", df2) + + # exceptions + with pytest.raises(TypeError, match=msg): + store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1") + + with pytest.raises(TypeError, match=msg): + store.select_as_multiple([None], where=["A>0", "B>0"], selector="df1") + + msg = "'No object named df3 in the file'" + with pytest.raises(KeyError, match=msg): + store.select_as_multiple( + ["df1", "df3"], where=["A>0", "B>0"], selector="df1" + ) + + with pytest.raises(KeyError, match=msg): + store.select_as_multiple(["df3"], where=["A>0", "B>0"], selector="df1") + + with pytest.raises(KeyError, match="'No object named df4 in the file'"): + store.select_as_multiple( + ["df1", "df2"], where=["A>0", "B>0"], selector="df4" + ) + + # default select + result = store.select("df1", ["A>0", "B>0"]) + expected = store.select_as_multiple( + ["df1"], where=["A>0", "B>0"], selector="df1" + ) + tm.assert_frame_equal(result, expected) + expected = store.select_as_multiple("df1", where=["A>0", "B>0"], selector="df1") + tm.assert_frame_equal(result, expected) + + # multiple + result = store.select_as_multiple( + ["df1", "df2"], where=["A>0", "B>0"], selector="df1" + ) + expected = concat([df1, df2], axis=1) + expected = expected[(expected.A > 0) & (expected.B > 0)] + tm.assert_frame_equal(result, expected, check_freq=False) + # FIXME: 2021-01-20 this is failing with freq None vs 4B on some builds + + # multiple (diff selector) + result = store.select_as_multiple( + ["df1", "df2"], where="index>df2.index[4]", selector="df2" + ) + expected = concat([df1, df2], axis=1) + expected = expected[5:] + tm.assert_frame_equal(result, expected) + + # test exception for diff rows + store.append("df3", tm.makeTimeDataFrame(nper=50)) + msg = "all tables must have exactly the same nrows!" + with pytest.raises(ValueError, match=msg): + store.select_as_multiple( + ["df1", "df3"], where=["A>0", "B>0"], selector="df1" + ) + + +def test_nan_selection_bug_4858(setup_path): + + with ensure_clean_store(setup_path) as store: + + df = DataFrame({"cols": range(6), "values": range(6)}, dtype="float64") + df["cols"] = (df["cols"] + 10).apply(str) + df.iloc[0] = np.nan + + expected = DataFrame( + {"cols": ["13.0", "14.0", "15.0"], "values": [3.0, 4.0, 5.0]}, + index=[3, 4, 5], + ) + + # write w/o the index on that particular column + store.append("df", df, data_columns=True, index=["cols"]) + result = store.select("df", where="values>2.0") + tm.assert_frame_equal(result, expected) + + +def test_query_with_nested_special_character(setup_path): + df = DataFrame( + { + "a": ["a", "a", "c", "b", "test & test", "c", "b", "e"], + "b": [1, 2, 3, 4, 5, 6, 7, 8], + } + ) + expected = df[df.a == "test & test"] + with ensure_clean_store(setup_path) as store: + store.append("test", df, format="table", data_columns=True) + result = store.select("test", 'a = "test & test"') + tm.assert_frame_equal(expected, result) + + +def test_query_long_float_literal(setup_path): + # GH 14241 + df = DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]}) + + with ensure_clean_store(setup_path) as store: + store.append("test", df, format="table", data_columns=True) + + cutoff = 1000000000.0006 + result = store.select("test", f"A < {cutoff:.4f}") + assert result.empty + + cutoff = 1000000000.0010 + result = store.select("test", f"A > {cutoff:.4f}") + expected = df.loc[[1, 2], :] + tm.assert_frame_equal(expected, result) + + exact = 1000000000.0011 + result = store.select("test", f"A == {exact:.4f}") + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) + + +def test_query_compare_column_type(setup_path): + # GH 15492 + df = DataFrame( + { + "date": ["2014-01-01", "2014-01-02"], + "real_date": date_range("2014-01-01", periods=2), + "float": [1.1, 1.2], + "int": [1, 2], + }, + columns=["date", "real_date", "float", "int"], + ) + + with ensure_clean_store(setup_path) as store: + store.append("test", df, format="table", data_columns=True) + + ts = Timestamp("2014-01-01") # noqa:F841 + result = store.select("test", where="real_date > ts") + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) + + for op in ["<", ">", "=="]: + # non strings to string column always fail + for v in [2.1, True, Timestamp("2014-01-01"), pd.Timedelta(1, "s")]: + query = f"date {op} v" + msg = f"Cannot compare {v} of type {type(v)} to string column" + with pytest.raises(TypeError, match=msg): + store.select("test", where=query) + + # strings to other columns must be convertible to type + v = "a" + for col in ["int", "float", "real_date"]: + query = f"{col} {op} v" + msg = "could not convert string to " + with pytest.raises(ValueError, match=msg): + store.select("test", where=query) + + for v, col in zip( + ["1", "1.1", "2014-01-01"], ["int", "float", "real_date"] + ): + query = f"{col} {op} v" + result = store.select("test", where=query) + + if op == "==": + expected = df.loc[[0], :] + elif op == ">": + expected = df.loc[[1], :] + else: + expected = df.loc[[], :] + tm.assert_frame_equal(expected, result) + + +@pytest.mark.parametrize("where", ["", (), (None,), [], [None]]) +def test_select_empty_where(where): + # GH26610 + + df = DataFrame([1, 2, 3]) + with ensure_clean_path("empty_where.h5") as path: + with HDFStore(path) as store: + store.put("df", df, "t") + result = read_hdf(store, "df", where=where) + tm.assert_frame_equal(result, df) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_store.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_store.py new file mode 100644 index 0000000..cbca8bb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_store.py @@ -0,0 +1,1013 @@ +import datetime +import hashlib +import os +import time +from warnings import ( + catch_warnings, + simplefilter, +) + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + Timestamp, + concat, + date_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.tests.io.pytables.common import ( + _maybe_remove, + ensure_clean_path, + ensure_clean_store, + safe_close, +) + +_default_compressor = "blosc" +ignore_natural_naming_warning = pytest.mark.filterwarnings( + "ignore:object name:tables.exceptions.NaturalNameWarning" +) + +from pandas.io.pytables import ( + HDFStore, + read_hdf, +) + +pytestmark = pytest.mark.single + + +def test_context(setup_path): + with tm.ensure_clean(setup_path) as path: + try: + with HDFStore(path) as tbl: + raise ValueError("blah") + except ValueError: + pass + with tm.ensure_clean(setup_path) as path: + with HDFStore(path) as tbl: + tbl["a"] = tm.makeDataFrame() + assert len(tbl) == 1 + assert type(tbl["a"]) == DataFrame + + +def test_no_track_times(setup_path): + + # GH 32682 + # enables to set track_times (see `pytables` `create_table` documentation) + + def checksum(filename, hash_factory=hashlib.md5, chunk_num_blocks=128): + h = hash_factory() + with open(filename, "rb") as f: + for chunk in iter(lambda: f.read(chunk_num_blocks * h.block_size), b""): + h.update(chunk) + return h.digest() + + def create_h5_and_return_checksum(track_times): + with ensure_clean_path(setup_path) as path: + df = DataFrame({"a": [1]}) + + with HDFStore(path, mode="w") as hdf: + hdf.put( + "table", + df, + format="table", + data_columns=True, + index=None, + track_times=track_times, + ) + + return checksum(path) + + checksum_0_tt_false = create_h5_and_return_checksum(track_times=False) + checksum_0_tt_true = create_h5_and_return_checksum(track_times=True) + + # sleep is necessary to create h5 with different creation time + time.sleep(1) + + checksum_1_tt_false = create_h5_and_return_checksum(track_times=False) + checksum_1_tt_true = create_h5_and_return_checksum(track_times=True) + + # checksums are the same if track_time = False + assert checksum_0_tt_false == checksum_1_tt_false + + # checksums are NOT same if track_time = True + assert checksum_0_tt_true != checksum_1_tt_true + + +def test_iter_empty(setup_path): + + with ensure_clean_store(setup_path) as store: + # GH 12221 + assert list(store) == [] + + +def test_repr(setup_path): + + with ensure_clean_store(setup_path) as store: + repr(store) + store.info() + store["a"] = tm.makeTimeSeries() + store["b"] = tm.makeStringSeries() + store["c"] = tm.makeDataFrame() + + df = tm.makeDataFrame() + df["obj1"] = "foo" + df["obj2"] = "bar" + df["bool1"] = df["A"] > 0 + df["bool2"] = df["B"] > 0 + df["bool3"] = True + df["int1"] = 1 + df["int2"] = 2 + df["timestamp1"] = Timestamp("20010102") + df["timestamp2"] = Timestamp("20010103") + df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) + df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) + df.loc[df.index[3:6], ["obj1"]] = np.nan + df = df._consolidate()._convert(datetime=True) + + with catch_warnings(record=True): + simplefilter("ignore", pd.errors.PerformanceWarning) + store["df"] = df + + # make a random group in hdf space + store._handle.create_group(store._handle.root, "bah") + + assert store.filename in repr(store) + assert store.filename in str(store) + store.info() + + # storers + with ensure_clean_store(setup_path) as store: + + df = tm.makeDataFrame() + store.append("df", df) + + s = store.get_storer("df") + repr(s) + str(s) + + +@pytest.mark.filterwarnings("ignore:object name:tables.exceptions.NaturalNameWarning") +def test_contains(setup_path): + + with ensure_clean_store(setup_path) as store: + store["a"] = tm.makeTimeSeries() + store["b"] = tm.makeDataFrame() + store["foo/bar"] = tm.makeDataFrame() + assert "a" in store + assert "b" in store + assert "c" not in store + assert "foo/bar" in store + assert "/foo/bar" in store + assert "/foo/b" not in store + assert "bar" not in store + + # gh-2694: tables.NaturalNameWarning + with catch_warnings(record=True): + store["node())"] = tm.makeDataFrame() + assert "node())" in store + + +def test_versioning(setup_path): + + with ensure_clean_store(setup_path) as store: + store["a"] = tm.makeTimeSeries() + store["b"] = tm.makeDataFrame() + df = tm.makeTimeDataFrame() + _maybe_remove(store, "df1") + store.append("df1", df[:10]) + store.append("df1", df[10:]) + assert store.root.a._v_attrs.pandas_version == "0.15.2" + assert store.root.b._v_attrs.pandas_version == "0.15.2" + assert store.root.df1._v_attrs.pandas_version == "0.15.2" + + # write a file and wipe its versioning + _maybe_remove(store, "df2") + store.append("df2", df) + + # this is an error because its table_type is appendable, but no + # version info + store.get_node("df2")._v_attrs.pandas_version = None + + msg = "'NoneType' object has no attribute 'startswith'" + + with pytest.raises(Exception, match=msg): + store.select("df2") + + +@pytest.mark.parametrize( + "where, expected", + [ + ( + "/", + { + "": ({"first_group", "second_group"}, set()), + "/first_group": (set(), {"df1", "df2"}), + "/second_group": ({"third_group"}, {"df3", "s1"}), + "/second_group/third_group": (set(), {"df4"}), + }, + ), + ( + "/second_group", + { + "/second_group": ({"third_group"}, {"df3", "s1"}), + "/second_group/third_group": (set(), {"df4"}), + }, + ), + ], +) +def test_walk(where, expected, setup_path): + # GH10143 + objs = { + "df1": DataFrame([1, 2, 3]), + "df2": DataFrame([4, 5, 6]), + "df3": DataFrame([6, 7, 8]), + "df4": DataFrame([9, 10, 11]), + "s1": Series([10, 9, 8]), + # Next 3 items aren't pandas objects and should be ignored + "a1": np.array([[1, 2, 3], [4, 5, 6]]), + "tb1": np.array([(1, 2, 3), (4, 5, 6)], dtype="i,i,i"), + "tb2": np.array([(7, 8, 9), (10, 11, 12)], dtype="i,i,i"), + } + + with ensure_clean_store("walk_groups.hdf", mode="w") as store: + store.put("/first_group/df1", objs["df1"]) + store.put("/first_group/df2", objs["df2"]) + store.put("/second_group/df3", objs["df3"]) + store.put("/second_group/s1", objs["s1"]) + store.put("/second_group/third_group/df4", objs["df4"]) + # Create non-pandas objects + store._handle.create_array("/first_group", "a1", objs["a1"]) + store._handle.create_table("/first_group", "tb1", obj=objs["tb1"]) + store._handle.create_table("/second_group", "tb2", obj=objs["tb2"]) + + assert len(list(store.walk(where=where))) == len(expected) + for path, groups, leaves in store.walk(where=where): + assert path in expected + expected_groups, expected_frames = expected[path] + assert expected_groups == set(groups) + assert expected_frames == set(leaves) + for leaf in leaves: + frame_path = "/".join([path, leaf]) + obj = store.get(frame_path) + if "df" in leaf: + tm.assert_frame_equal(obj, objs[leaf]) + else: + tm.assert_series_equal(obj, objs[leaf]) + + +def test_getattr(setup_path): + + with ensure_clean_store(setup_path) as store: + + s = tm.makeTimeSeries() + store["a"] = s + + # test attribute access + result = store.a + tm.assert_series_equal(result, s) + result = getattr(store, "a") + tm.assert_series_equal(result, s) + + df = tm.makeTimeDataFrame() + store["df"] = df + result = store.df + tm.assert_frame_equal(result, df) + + # errors + for x in ["d", "mode", "path", "handle", "complib"]: + msg = f"'HDFStore' object has no attribute '{x}'" + with pytest.raises(AttributeError, match=msg): + getattr(store, x) + + # not stores + for x in ["mode", "path", "handle", "complib"]: + getattr(store, f"_{x}") + + +def test_store_dropna(setup_path): + df_with_missing = DataFrame( + {"col1": [0.0, np.nan, 2.0], "col2": [1.0, np.nan, np.nan]}, + index=list("abc"), + ) + df_without_missing = DataFrame( + {"col1": [0.0, 2.0], "col2": [1.0, np.nan]}, index=list("ac") + ) + + # # Test to make sure defaults are to not drop. + # # Corresponding to Issue 9382 + with ensure_clean_path(setup_path) as path: + df_with_missing.to_hdf(path, "df", format="table") + reloaded = read_hdf(path, "df") + tm.assert_frame_equal(df_with_missing, reloaded) + + with ensure_clean_path(setup_path) as path: + df_with_missing.to_hdf(path, "df", format="table", dropna=False) + reloaded = read_hdf(path, "df") + tm.assert_frame_equal(df_with_missing, reloaded) + + with ensure_clean_path(setup_path) as path: + df_with_missing.to_hdf(path, "df", format="table", dropna=True) + reloaded = read_hdf(path, "df") + tm.assert_frame_equal(df_without_missing, reloaded) + + +def test_to_hdf_with_min_itemsize(setup_path): + + with ensure_clean_path(setup_path) as path: + + # min_itemsize in index with to_hdf (GH 10381) + df = tm.makeMixedDataFrame().set_index("C") + df.to_hdf(path, "ss3", format="table", min_itemsize={"index": 6}) + # just make sure there is a longer string: + df2 = df.copy().reset_index().assign(C="longer").set_index("C") + df2.to_hdf(path, "ss3", append=True, format="table") + tm.assert_frame_equal(read_hdf(path, "ss3"), concat([df, df2])) + + # same as above, with a Series + df["B"].to_hdf(path, "ss4", format="table", min_itemsize={"index": 6}) + df2["B"].to_hdf(path, "ss4", append=True, format="table") + tm.assert_series_equal(read_hdf(path, "ss4"), concat([df["B"], df2["B"]])) + + +@pytest.mark.parametrize("format", ["fixed", "table"]) +def test_to_hdf_errors(format, setup_path): + + data = ["\ud800foo"] + ser = Series(data, index=Index(data)) + with ensure_clean_path(setup_path) as path: + # GH 20835 + ser.to_hdf(path, "table", format=format, errors="surrogatepass") + + result = read_hdf(path, "table", errors="surrogatepass") + tm.assert_series_equal(result, ser) + + +def test_create_table_index(setup_path): + + with ensure_clean_store(setup_path) as store: + + with catch_warnings(record=True): + + def col(t, column): + return getattr(store.get_storer(t).table.cols, column) + + # data columns + df = tm.makeTimeDataFrame() + df["string"] = "foo" + df["string2"] = "bar" + store.append("f", df, data_columns=["string", "string2"]) + assert col("f", "index").is_indexed is True + assert col("f", "string").is_indexed is True + assert col("f", "string2").is_indexed is True + + # specify index=columns + store.append("f2", df, index=["string"], data_columns=["string", "string2"]) + assert col("f2", "index").is_indexed is False + assert col("f2", "string").is_indexed is True + assert col("f2", "string2").is_indexed is False + + # try to index a non-table + _maybe_remove(store, "f2") + store.put("f2", df) + msg = "cannot create table index on a Fixed format store" + with pytest.raises(TypeError, match=msg): + store.create_table_index("f2") + + +def test_create_table_index_data_columns_argument(setup_path): + # GH 28156 + + with ensure_clean_store(setup_path) as store: + + with catch_warnings(record=True): + + def col(t, column): + return getattr(store.get_storer(t).table.cols, column) + + # data columns + df = tm.makeTimeDataFrame() + df["string"] = "foo" + df["string2"] = "bar" + store.append("f", df, data_columns=["string"]) + assert col("f", "index").is_indexed is True + assert col("f", "string").is_indexed is True + + msg = "'Cols' object has no attribute 'string2'" + with pytest.raises(AttributeError, match=msg): + col("f", "string2").is_indexed + + # try to index a col which isn't a data_column + msg = ( + "column string2 is not a data_column.\n" + "In order to read column string2 you must reload the dataframe \n" + "into HDFStore and include string2 with the data_columns argument." + ) + with pytest.raises(AttributeError, match=msg): + store.create_table_index("f", columns=["string2"]) + + +def test_mi_data_columns(setup_path): + # GH 14435 + idx = MultiIndex.from_arrays( + [date_range("2000-01-01", periods=5), range(5)], names=["date", "id"] + ) + df = DataFrame({"a": [1.1, 1.2, 1.3, 1.4, 1.5]}, index=idx) + + with ensure_clean_store(setup_path) as store: + store.append("df", df, data_columns=True) + + actual = store.select("df", where="id == 1") + expected = df.iloc[[1], :] + tm.assert_frame_equal(actual, expected) + + +def test_table_mixed_dtypes(setup_path): + + # frame + df = tm.makeDataFrame() + df["obj1"] = "foo" + df["obj2"] = "bar" + df["bool1"] = df["A"] > 0 + df["bool2"] = df["B"] > 0 + df["bool3"] = True + df["int1"] = 1 + df["int2"] = 2 + df["timestamp1"] = Timestamp("20010102") + df["timestamp2"] = Timestamp("20010103") + df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) + df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) + df.loc[df.index[3:6], ["obj1"]] = np.nan + df = df._consolidate()._convert(datetime=True) + + with ensure_clean_store(setup_path) as store: + store.append("df1_mixed", df) + tm.assert_frame_equal(store.select("df1_mixed"), df) + + +def test_calendar_roundtrip_issue(setup_path): + + # 8591 + # doc example from tseries holiday section + weekmask_egypt = "Sun Mon Tue Wed Thu" + holidays = [ + "2012-05-01", + datetime.datetime(2013, 5, 1), + np.datetime64("2014-05-01"), + ] + bday_egypt = pd.offsets.CustomBusinessDay( + holidays=holidays, weekmask=weekmask_egypt + ) + dt = datetime.datetime(2013, 4, 30) + dts = date_range(dt, periods=5, freq=bday_egypt) + + s = Series(dts.weekday, dts).map(Series("Mon Tue Wed Thu Fri Sat Sun".split())) + + with ensure_clean_store(setup_path) as store: + + store.put("fixed", s) + result = store.select("fixed") + tm.assert_series_equal(result, s) + + store.append("table", s) + result = store.select("table") + tm.assert_series_equal(result, s) + + +def test_remove(setup_path): + + with ensure_clean_store(setup_path) as store: + + ts = tm.makeTimeSeries() + df = tm.makeDataFrame() + store["a"] = ts + store["b"] = df + _maybe_remove(store, "a") + assert len(store) == 1 + tm.assert_frame_equal(df, store["b"]) + + _maybe_remove(store, "b") + assert len(store) == 0 + + # nonexistence + with pytest.raises( + KeyError, match="'No object named a_nonexistent_store in the file'" + ): + store.remove("a_nonexistent_store") + + # pathing + store["a"] = ts + store["b/foo"] = df + _maybe_remove(store, "foo") + _maybe_remove(store, "b/foo") + assert len(store) == 1 + + store["a"] = ts + store["b/foo"] = df + _maybe_remove(store, "b") + assert len(store) == 1 + + # __delitem__ + store["a"] = ts + store["b"] = df + del store["a"] + del store["b"] + assert len(store) == 0 + + +def test_same_name_scoping(setup_path): + + with ensure_clean_store(setup_path) as store: + + df = DataFrame(np.random.randn(20, 2), index=date_range("20130101", periods=20)) + store.put("df", df, format="table") + expected = df[df.index > Timestamp("20130105")] + + result = store.select("df", "index>datetime.datetime(2013,1,5)") + tm.assert_frame_equal(result, expected) + + # changes what 'datetime' points to in the namespace where + # 'select' does the lookup + from datetime import datetime # noqa:F401 + + # technically an error, but allow it + result = store.select("df", "index>datetime.datetime(2013,1,5)") + tm.assert_frame_equal(result, expected) + + result = store.select("df", "index>datetime(2013,1,5)") + tm.assert_frame_equal(result, expected) + + +def test_store_index_name(setup_path): + df = tm.makeDataFrame() + df.index.name = "foo" + + with ensure_clean_store(setup_path) as store: + store["frame"] = df + recons = store["frame"] + tm.assert_frame_equal(recons, df) + + +@pytest.mark.parametrize("table_format", ["table", "fixed"]) +def test_store_index_name_numpy_str(table_format, setup_path): + # GH #13492 + idx = Index( + pd.to_datetime([datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)]), + name="cols\u05d2", + ) + idx1 = Index( + pd.to_datetime([datetime.date(2010, 1, 1), datetime.date(2010, 1, 2)]), + name="rows\u05d0", + ) + df = DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1) + + # This used to fail, returning numpy strings instead of python strings. + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format=table_format) + df2 = read_hdf(path, "df") + + tm.assert_frame_equal(df, df2, check_names=True) + + assert type(df2.index.name) == str + assert type(df2.columns.name) == str + + +def test_store_series_name(setup_path): + df = tm.makeDataFrame() + series = df["A"] + + with ensure_clean_store(setup_path) as store: + store["series"] = series + recons = store["series"] + tm.assert_series_equal(recons, series) + + +@pytest.mark.filterwarnings("ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning") +def test_overwrite_node(setup_path): + + with ensure_clean_store(setup_path) as store: + store["a"] = tm.makeTimeDataFrame() + ts = tm.makeTimeSeries() + store["a"] = ts + + tm.assert_series_equal(store["a"], ts) + + +@pytest.mark.filterwarnings( + "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning" +) +def test_coordinates(setup_path): + df = tm.makeTimeDataFrame() + + with ensure_clean_store(setup_path) as store: + + _maybe_remove(store, "df") + store.append("df", df) + + # all + c = store.select_as_coordinates("df") + assert (c.values == np.arange(len(df.index))).all() + + # get coordinates back & test vs frame + _maybe_remove(store, "df") + + df = DataFrame({"A": range(5), "B": range(5)}) + store.append("df", df) + c = store.select_as_coordinates("df", ["index<3"]) + assert (c.values == np.arange(3)).all() + result = store.select("df", where=c) + expected = df.loc[0:2, :] + tm.assert_frame_equal(result, expected) + + c = store.select_as_coordinates("df", ["index>=3", "index<=4"]) + assert (c.values == np.arange(2) + 3).all() + result = store.select("df", where=c) + expected = df.loc[3:4, :] + tm.assert_frame_equal(result, expected) + assert isinstance(c, Index) + + # multiple tables + _maybe_remove(store, "df1") + _maybe_remove(store, "df2") + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) + store.append("df1", df1, data_columns=["A", "B"]) + store.append("df2", df2) + + c = store.select_as_coordinates("df1", ["A>0", "B>0"]) + df1_result = store.select("df1", c) + df2_result = store.select("df2", c) + result = concat([df1_result, df2_result], axis=1) + + expected = concat([df1, df2], axis=1) + expected = expected[(expected.A > 0) & (expected.B > 0)] + tm.assert_frame_equal(result, expected, check_freq=False) + # FIXME: 2021-01-18 on some (mostly windows) builds we get freq=None + # but expect freq="18B" + + # pass array/mask as the coordinates + with ensure_clean_store(setup_path) as store: + + df = DataFrame( + np.random.randn(1000, 2), index=date_range("20000101", periods=1000) + ) + store.append("df", df) + c = store.select_column("df", "index") + where = c[DatetimeIndex(c).month == 5].index + expected = df.iloc[where] + + # locations + result = store.select("df", where=where) + tm.assert_frame_equal(result, expected) + + # boolean + result = store.select("df", where=where) + tm.assert_frame_equal(result, expected) + + # invalid + msg = ( + "where must be passed as a string, PyTablesExpr, " + "or list-like of PyTablesExpr" + ) + with pytest.raises(TypeError, match=msg): + store.select("df", where=np.arange(len(df), dtype="float64")) + + with pytest.raises(TypeError, match=msg): + store.select("df", where=np.arange(len(df) + 1)) + + with pytest.raises(TypeError, match=msg): + store.select("df", where=np.arange(len(df)), start=5) + + with pytest.raises(TypeError, match=msg): + store.select("df", where=np.arange(len(df)), start=5, stop=10) + + # selection with filter + selection = date_range("20000101", periods=500) + result = store.select("df", where="index in selection") + expected = df[df.index.isin(selection)] + tm.assert_frame_equal(result, expected) + + # list + df = DataFrame(np.random.randn(10, 2)) + store.append("df2", df) + result = store.select("df2", where=[0, 3, 5]) + expected = df.iloc[[0, 3, 5]] + tm.assert_frame_equal(result, expected) + + # boolean + where = [True] * 10 + where[-2] = False + result = store.select("df2", where=where) + expected = df.loc[where] + tm.assert_frame_equal(result, expected) + + # start/stop + result = store.select("df2", start=5, stop=10) + expected = df[5:10] + tm.assert_frame_equal(result, expected) + + +def test_start_stop_table(setup_path): + + with ensure_clean_store(setup_path) as store: + + # table + df = DataFrame({"A": np.random.rand(20), "B": np.random.rand(20)}) + store.append("df", df) + + result = store.select("df", "columns=['A']", start=0, stop=5) + expected = df.loc[0:4, ["A"]] + tm.assert_frame_equal(result, expected) + + # out of range + result = store.select("df", "columns=['A']", start=30, stop=40) + assert len(result) == 0 + expected = df.loc[30:40, ["A"]] + tm.assert_frame_equal(result, expected) + + +def test_start_stop_multiple(setup_path): + + # GH 16209 + with ensure_clean_store(setup_path) as store: + + df = DataFrame({"foo": [1, 2], "bar": [1, 2]}) + + store.append_to_multiple( + {"selector": ["foo"], "data": None}, df, selector="selector" + ) + result = store.select_as_multiple( + ["selector", "data"], selector="selector", start=0, stop=1 + ) + expected = df.loc[[0], ["foo", "bar"]] + tm.assert_frame_equal(result, expected) + + +def test_start_stop_fixed(setup_path): + + with ensure_clean_store(setup_path) as store: + + # fixed, GH 8287 + df = DataFrame( + {"A": np.random.rand(20), "B": np.random.rand(20)}, + index=date_range("20130101", periods=20), + ) + store.put("df", df) + + result = store.select("df", start=0, stop=5) + expected = df.iloc[0:5, :] + tm.assert_frame_equal(result, expected) + + result = store.select("df", start=5, stop=10) + expected = df.iloc[5:10, :] + tm.assert_frame_equal(result, expected) + + # out of range + result = store.select("df", start=30, stop=40) + expected = df.iloc[30:40, :] + tm.assert_frame_equal(result, expected) + + # series + s = df.A + store.put("s", s) + result = store.select("s", start=0, stop=5) + expected = s.iloc[0:5] + tm.assert_series_equal(result, expected) + + result = store.select("s", start=5, stop=10) + expected = s.iloc[5:10] + tm.assert_series_equal(result, expected) + + # sparse; not implemented + df = tm.makeDataFrame() + df.iloc[3:5, 1:3] = np.nan + df.iloc[8:10, -2] = np.nan + + +def test_select_filter_corner(setup_path): + + df = DataFrame(np.random.randn(50, 100)) + df.index = [f"{c:3d}" for c in df.index] + df.columns = [f"{c:3d}" for c in df.columns] + + with ensure_clean_store(setup_path) as store: + store.put("frame", df, format="table") + + crit = "columns=df.columns[:75]" + result = store.select("frame", [crit]) + tm.assert_frame_equal(result, df.loc[:, df.columns[:75]]) + + crit = "columns=df.columns[:75:2]" + result = store.select("frame", [crit]) + tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]]) + + +def test_path_pathlib(setup_path): + df = tm.makeDataFrame() + + result = tm.round_trip_pathlib( + lambda p: df.to_hdf(p, "df"), lambda p: read_hdf(p, "df") + ) + tm.assert_frame_equal(df, result) + + +@pytest.mark.parametrize("start, stop", [(0, 2), (1, 2), (None, None)]) +def test_contiguous_mixed_data_table(start, stop, setup_path): + # GH 17021 + df = DataFrame( + { + "a": Series([20111010, 20111011, 20111012]), + "b": Series(["ab", "cd", "ab"]), + } + ) + + with ensure_clean_store(setup_path) as store: + store.append("test_dataset", df) + + result = store.select("test_dataset", start=start, stop=stop) + tm.assert_frame_equal(df[start:stop], result) + + +def test_path_pathlib_hdfstore(setup_path): + df = tm.makeDataFrame() + + def writer(path): + with HDFStore(path) as store: + df.to_hdf(store, "df") + + def reader(path): + with HDFStore(path) as store: + return read_hdf(store, "df") + + result = tm.round_trip_pathlib(writer, reader) + tm.assert_frame_equal(df, result) + + +def test_pickle_path_localpath(setup_path): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib( + lambda p: df.to_hdf(p, "df"), lambda p: read_hdf(p, "df") + ) + tm.assert_frame_equal(df, result) + + +def test_path_localpath_hdfstore(setup_path): + df = tm.makeDataFrame() + + def writer(path): + with HDFStore(path) as store: + df.to_hdf(store, "df") + + def reader(path): + with HDFStore(path) as store: + return read_hdf(store, "df") + + result = tm.round_trip_localpath(writer, reader) + tm.assert_frame_equal(df, result) + + +def test_copy(setup_path): + + with catch_warnings(record=True): + + def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): + try: + store = HDFStore(f, "r") + + if new_f is None: + import tempfile + + fd, new_f = tempfile.mkstemp() + tstore = store.copy(new_f, keys=keys, propindexes=propindexes, **kwargs) + + # check keys + if keys is None: + keys = store.keys() + assert set(keys) == set(tstore.keys()) + + # check indices & nrows + for k in tstore.keys(): + if tstore.get_storer(k).is_table: + new_t = tstore.get_storer(k) + orig_t = store.get_storer(k) + + assert orig_t.nrows == new_t.nrows + + # check propindixes + if propindexes: + for a in orig_t.axes: + if a.is_indexed: + assert new_t[a.name].is_indexed + + finally: + safe_close(store) + safe_close(tstore) + try: + os.close(fd) + except (OSError, ValueError): + pass + os.remove(new_f) # noqa: PDF008 + + # new table + df = tm.makeDataFrame() + + with tm.ensure_clean() as path: + st = HDFStore(path) + st.append("df", df, data_columns=["A"]) + st.close() + do_copy(f=path) + do_copy(f=path, propindexes=False) + + +def test_duplicate_column_name(setup_path): + df = DataFrame(columns=["a", "a"], data=[[0, 0]]) + + with ensure_clean_path(setup_path) as path: + msg = "Columns index has to be unique for fixed format" + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df", format="fixed") + + df.to_hdf(path, "df", format="table") + other = read_hdf(path, "df") + + tm.assert_frame_equal(df, other) + assert df.equals(other) + assert other.equals(df) + + +def test_preserve_timedeltaindex_type(setup_path): + # GH9635 + df = DataFrame(np.random.normal(size=(10, 5))) + df.index = timedelta_range(start="0s", periods=10, freq="1s", name="example") + + with ensure_clean_store(setup_path) as store: + + store["df"] = df + tm.assert_frame_equal(store["df"], df) + + +def test_columns_multiindex_modified(setup_path): + # BUG: 7212 + + df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + df.index.name = "letters" + df = df.set_index(keys="E", append=True) + + data_columns = df.index.names + df.columns.tolist() + with ensure_clean_path(setup_path) as path: + df.to_hdf( + path, + "df", + mode="a", + append=True, + data_columns=data_columns, + index=False, + ) + cols2load = list("BCD") + cols2load_original = list(cols2load) + # GH#10055 make sure read_hdf call does not alter cols2load inplace + read_hdf(path, "df", columns=cols2load) + assert cols2load_original == cols2load + + +pytest.mark.filterwarnings("ignore:object name:tables.exceptions.NaturalNameWarning") + + +def test_to_hdf_with_object_column_names(setup_path): + # GH9057 + + types_should_fail = [ + tm.makeIntIndex, + tm.makeFloatIndex, + tm.makeDateIndex, + tm.makeTimedeltaIndex, + tm.makePeriodIndex, + ] + types_should_run = [ + tm.makeStringIndex, + tm.makeCategoricalIndex, + tm.makeUnicodeIndex, + ] + + for index in types_should_fail: + df = DataFrame(np.random.randn(10, 2), columns=index(2)) + with ensure_clean_path(setup_path) as path: + with catch_warnings(record=True): + msg = "cannot have non-object label DataIndexableCol" + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df", format="table", data_columns=True) + + for index in types_should_run: + df = DataFrame(np.random.randn(10, 2), columns=index(2)) + with ensure_clean_path(setup_path) as path: + with catch_warnings(record=True): + df.to_hdf(path, "df", format="table", data_columns=True) + result = read_hdf(path, "df", where=f"index = [{df.index[0]}]") + assert len(result) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_subclass.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_subclass.py new file mode 100644 index 0000000..75b04f3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_subclass.py @@ -0,0 +1,50 @@ +import numpy as np + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm +from pandas.tests.io.pytables.common import ensure_clean_path + +from pandas.io.pytables import ( + HDFStore, + read_hdf, +) + + +class TestHDFStoreSubclass: + # GH 33748 + def test_supported_for_subclass_dataframe(self): + data = {"a": [1, 2], "b": [3, 4]} + sdf = tm.SubclassedDataFrame(data, dtype=np.intp) + + expected = DataFrame(data, dtype=np.intp) + + with ensure_clean_path("temp.h5") as path: + sdf.to_hdf(path, "df") + result = read_hdf(path, "df") + tm.assert_frame_equal(result, expected) + + with ensure_clean_path("temp.h5") as path: + with HDFStore(path) as store: + store.put("df", sdf) + result = read_hdf(path, "df") + tm.assert_frame_equal(result, expected) + + def test_supported_for_subclass_series(self): + data = [1, 2, 3] + sser = tm.SubclassedSeries(data, dtype=np.intp) + + expected = Series(data, dtype=np.intp) + + with ensure_clean_path("temp.h5") as path: + sser.to_hdf(path, "ser") + result = read_hdf(path, "ser") + tm.assert_series_equal(result, expected) + + with ensure_clean_path("temp.h5") as path: + with HDFStore(path) as store: + store.put("ser", sser) + result = read_hdf(path, "ser") + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_time_series.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_time_series.py new file mode 100644 index 0000000..5e42dbd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_time_series.py @@ -0,0 +1,66 @@ +import datetime + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, + _testing as tm, +) +from pandas.tests.io.pytables.common import ensure_clean_store + +pytestmark = pytest.mark.single + + +def test_store_datetime_fractional_secs(setup_path): + + with ensure_clean_store(setup_path) as store: + dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456) + series = Series([0], [dt]) + store["a"] = series + assert store["a"].index[0] == dt + + +def test_tseries_indices_series(setup_path): + + with ensure_clean_store(setup_path) as store: + idx = tm.makeDateIndex(10) + ser = Series(np.random.randn(len(idx)), idx) + store["a"] = ser + result = store["a"] + + tm.assert_series_equal(result, ser) + assert result.index.freq == ser.index.freq + tm.assert_class_equal(result.index, ser.index, obj="series index") + + idx = tm.makePeriodIndex(10) + ser = Series(np.random.randn(len(idx)), idx) + store["a"] = ser + result = store["a"] + + tm.assert_series_equal(result, ser) + assert result.index.freq == ser.index.freq + tm.assert_class_equal(result.index, ser.index, obj="series index") + + +def test_tseries_indices_frame(setup_path): + + with ensure_clean_store(setup_path) as store: + idx = tm.makeDateIndex(10) + df = DataFrame(np.random.randn(len(idx), 3), index=idx) + store["a"] = df + result = store["a"] + + tm.assert_frame_equal(result, df) + assert result.index.freq == df.index.freq + tm.assert_class_equal(result.index, df.index, obj="dataframe index") + + idx = tm.makePeriodIndex(10) + df = DataFrame(np.random.randn(len(idx), 3), idx) + store["a"] = df + result = store["a"] + + tm.assert_frame_equal(result, df) + assert result.index.freq == df.index.freq + tm.assert_class_equal(result.index, df.index, obj="dataframe index") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_timezones.py b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_timezones.py new file mode 100644 index 0000000..36fa79d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/pytables/test_timezones.py @@ -0,0 +1,370 @@ +from datetime import ( + date, + timedelta, +) + +import numpy as np +import pytest + +from pandas._libs.tslibs.timezones import maybe_get_tz +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.tests.io.pytables.common import ( + _maybe_remove, + ensure_clean_path, + ensure_clean_store, +) + + +def _compare_with_tz(a, b): + tm.assert_frame_equal(a, b) + + # compare the zones on each element + for c in a.columns: + for i in a.index: + a_e = a.loc[i, c] + b_e = b.loc[i, c] + if not (a_e == b_e and a_e.tz == b_e.tz): + raise AssertionError(f"invalid tz comparison [{a_e}] [{b_e}]") + + +# use maybe_get_tz instead of dateutil.tz.gettz to handle the windows +# filename issues. +gettz_dateutil = lambda x: maybe_get_tz("dateutil/" + x) +gettz_pytz = lambda x: x + + +@pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz]) +def test_append_with_timezones(setup_path, gettz): + # as columns + + # Single-tzinfo, no DST transition + df_est = DataFrame( + { + "A": [ + Timestamp("20130102 2:00:00", tz=gettz("US/Eastern")) + + timedelta(hours=1) * i + for i in range(5) + ] + } + ) + + # frame with all columns having same tzinfo, but different sides + # of DST transition + df_crosses_dst = DataFrame( + { + "A": Timestamp("20130102", tz=gettz("US/Eastern")), + "B": Timestamp("20130603", tz=gettz("US/Eastern")), + }, + index=range(5), + ) + + df_mixed_tz = DataFrame( + { + "A": Timestamp("20130102", tz=gettz("US/Eastern")), + "B": Timestamp("20130102", tz=gettz("EET")), + }, + index=range(5), + ) + + df_different_tz = DataFrame( + { + "A": Timestamp("20130102", tz=gettz("US/Eastern")), + "B": Timestamp("20130102", tz=gettz("CET")), + }, + index=range(5), + ) + + with ensure_clean_store(setup_path) as store: + + _maybe_remove(store, "df_tz") + store.append("df_tz", df_est, data_columns=["A"]) + result = store["df_tz"] + _compare_with_tz(result, df_est) + tm.assert_frame_equal(result, df_est) + + # select with tz aware + expected = df_est[df_est.A >= df_est.A[3]] + result = store.select("df_tz", where="A>=df_est.A[3]") + _compare_with_tz(result, expected) + + # ensure we include dates in DST and STD time here. + _maybe_remove(store, "df_tz") + store.append("df_tz", df_crosses_dst) + result = store["df_tz"] + _compare_with_tz(result, df_crosses_dst) + tm.assert_frame_equal(result, df_crosses_dst) + + msg = ( + r"invalid info for \[values_block_1\] for \[tz\], " + r"existing_value \[(dateutil/.*)?US/Eastern\] " + r"conflicts with new value \[(dateutil/.*)?EET\]" + ) + with pytest.raises(ValueError, match=msg): + store.append("df_tz", df_mixed_tz) + + # this is ok + _maybe_remove(store, "df_tz") + store.append("df_tz", df_mixed_tz, data_columns=["A", "B"]) + result = store["df_tz"] + _compare_with_tz(result, df_mixed_tz) + tm.assert_frame_equal(result, df_mixed_tz) + + # can't append with diff timezone + msg = ( + r"invalid info for \[B\] for \[tz\], " + r"existing_value \[(dateutil/.*)?EET\] " + r"conflicts with new value \[(dateutil/.*)?CET\]" + ) + with pytest.raises(ValueError, match=msg): + store.append("df_tz", df_different_tz) + + +@pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz]) +def test_append_with_timezones_as_index(setup_path, gettz): + # GH#4098 example + + dti = date_range("2000-1-1", periods=3, freq="H", tz=gettz("US/Eastern")) + dti = dti._with_freq(None) # freq doesn't round-trip + + df = DataFrame({"A": Series(range(3), index=dti)}) + + with ensure_clean_store(setup_path) as store: + + _maybe_remove(store, "df") + store.put("df", df) + result = store.select("df") + tm.assert_frame_equal(result, df) + + _maybe_remove(store, "df") + store.append("df", df) + result = store.select("df") + tm.assert_frame_equal(result, df) + + +def test_roundtrip_tz_aware_index(setup_path): + # GH 17618 + time = Timestamp("2000-01-01 01:00:00", tz="US/Eastern") + df = DataFrame(data=[0], index=[time]) + + with ensure_clean_store(setup_path) as store: + store.put("frame", df, format="fixed") + recons = store["frame"] + tm.assert_frame_equal(recons, df) + assert recons.index[0].value == 946706400000000000 + + +def test_store_index_name_with_tz(setup_path): + # GH 13884 + df = DataFrame({"A": [1, 2]}) + df.index = DatetimeIndex([1234567890123456787, 1234567890123456788]) + df.index = df.index.tz_localize("UTC") + df.index.name = "foo" + + with ensure_clean_store(setup_path) as store: + store.put("frame", df, format="table") + recons = store["frame"] + tm.assert_frame_equal(recons, df) + + +def test_tseries_select_index_column(setup_path): + # GH7777 + # selecting a UTC datetimeindex column did + # not preserve UTC tzinfo set before storing + + # check that no tz still works + rng = date_range("1/1/2000", "1/30/2000") + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(setup_path) as store: + store.append("frame", frame) + result = store.select_column("frame", "index") + assert rng.tz == DatetimeIndex(result.values).tz + + # check utc + rng = date_range("1/1/2000", "1/30/2000", tz="UTC") + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(setup_path) as store: + store.append("frame", frame) + result = store.select_column("frame", "index") + assert rng.tz == result.dt.tz + + # double check non-utc + rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(setup_path) as store: + store.append("frame", frame) + result = store.select_column("frame", "index") + assert rng.tz == result.dt.tz + + +def test_timezones_fixed_format_frame_non_empty(setup_path): + with ensure_clean_store(setup_path) as store: + + # index + rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") + rng = rng._with_freq(None) # freq doesn't round-trip + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + store["df"] = df + result = store["df"] + tm.assert_frame_equal(result, df) + + # as data + # GH11411 + _maybe_remove(store, "df") + df = DataFrame( + { + "A": rng, + "B": rng.tz_convert("UTC").tz_localize(None), + "C": rng.tz_convert("CET"), + "D": range(len(rng)), + }, + index=rng, + ) + store["df"] = df + result = store["df"] + tm.assert_frame_equal(result, df) + + +def test_timezones_fixed_format_empty(setup_path, tz_aware_fixture, frame_or_series): + # GH 20594 + + dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture) + + obj = Series(dtype=dtype, name="A") + if frame_or_series is DataFrame: + obj = obj.to_frame() + + with ensure_clean_store(setup_path) as store: + store["obj"] = obj + result = store["obj"] + tm.assert_equal(result, obj) + + +def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture): + # GH 20594 + + dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture) + + with ensure_clean_store(setup_path) as store: + s = Series([0], dtype=dtype) + store["s"] = s + result = store["s"] + tm.assert_series_equal(result, s) + + +def test_fixed_offset_tz(setup_path): + rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00") + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(setup_path) as store: + store["frame"] = frame + recons = store["frame"] + tm.assert_index_equal(recons.index, rng) + assert rng.tz == recons.index.tz + + +@td.skip_if_windows +def test_store_timezone(setup_path): + # GH2852 + # issue storing datetime.date with a timezone as it resets when read + # back in a new timezone + + # original method + with ensure_clean_store(setup_path) as store: + + today = date(2013, 9, 10) + df = DataFrame([1, 2, 3], index=[today, today, today]) + store["obj1"] = df + result = store["obj1"] + tm.assert_frame_equal(result, df) + + # with tz setting + with ensure_clean_store(setup_path) as store: + + with tm.set_timezone("EST5EDT"): + today = date(2013, 9, 10) + df = DataFrame([1, 2, 3], index=[today, today, today]) + store["obj1"] = df + + with tm.set_timezone("CST6CDT"): + result = store["obj1"] + + tm.assert_frame_equal(result, df) + + +def test_legacy_datetimetz_object(datapath, setup_path): + # legacy from < 0.17.0 + # 8260 + expected = DataFrame( + { + "A": Timestamp("20130102", tz="US/Eastern"), + "B": Timestamp("20130603", tz="CET"), + }, + index=range(5), + ) + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r" + ) as store: + result = store["df"] + tm.assert_frame_equal(result, expected) + + +def test_dst_transitions(setup_path): + # make sure we are not failing on transitions + with ensure_clean_store(setup_path) as store: + times = date_range( + "2013-10-26 23:00", + "2013-10-27 01:00", + tz="Europe/London", + freq="H", + ambiguous="infer", + ) + times = times._with_freq(None) # freq doesn't round-trip + + for i in [times, times + pd.Timedelta("10min")]: + _maybe_remove(store, "df") + df = DataFrame({"A": range(len(i)), "B": i}, index=i) + store.append("df", df) + result = store.select("df") + tm.assert_frame_equal(result, df) + + +def test_read_with_where_tz_aware_index(setup_path): + # GH 11926 + periods = 10 + dts = date_range("20151201", periods=periods, freq="D", tz="UTC") + mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"]) + expected = DataFrame({"MYCOL": 0}, index=mi) + + key = "mykey" + with ensure_clean_path(setup_path) as path: + with pd.HDFStore(path) as store: + store.append(key, expected, format="table", append=True) + result = pd.read_hdf(path, key, where="DATE > 20151130") + tm.assert_frame_equal(result, expected) + + +def test_py2_created_with_datetimez(datapath, setup_path): + # The test HDF5 file was created in Python 2, but could not be read in + # Python 3. + # + # GH26443 + index = [Timestamp("2019-01-01T18:00").tz_localize("America/New_York")] + expected = DataFrame({"data": 123}, index=index) + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r" + ) as store: + result = store["key"] + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/sas/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/sas/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..799700d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/sas/__pycache__/test_sas.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/__pycache__/test_sas.cpython-38.pyc new file mode 100644 index 0000000..787e03d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/__pycache__/test_sas.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/sas/__pycache__/test_sas7bdat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/__pycache__/test_sas7bdat.cpython-38.pyc new file mode 100644 index 0000000..ae0b044 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/__pycache__/test_sas7bdat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/sas/__pycache__/test_xport.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/__pycache__/test_xport.cpython-38.pyc new file mode 100644 index 0000000..7a3eae8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/__pycache__/test_xport.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/sas/test_sas.py b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/test_sas.py new file mode 100644 index 0000000..5d2643c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/test_sas.py @@ -0,0 +1,26 @@ +from io import StringIO + +import pytest + +from pandas import read_sas +import pandas._testing as tm + + +class TestSas: + def test_sas_buffer_format(self): + # see gh-14947 + b = StringIO("") + + msg = ( + "If this is a buffer object rather than a string " + "name, you must specify a format string" + ) + with pytest.raises(ValueError, match=msg): + read_sas(b) + + def test_sas_read_no_format_or_extension(self): + # see gh-24548 + msg = "unable to infer format of SAS file" + with tm.ensure_clean("test_file_no_extension") as path: + with pytest.raises(ValueError, match=msg): + read_sas(path) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/sas/test_sas7bdat.py b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/test_sas7bdat.py new file mode 100644 index 0000000..5477559 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/test_sas7bdat.py @@ -0,0 +1,343 @@ +from datetime import datetime +import io +import os +from pathlib import Path + +import dateutil.parser +import numpy as np +import pytest + +from pandas.errors import EmptyDataError +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm + + +# https://github.com/cython/cython/issues/1720 +@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") +class TestSAS7BDAT: + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + self.dirpath = datapath("io", "sas", "data") + self.data = [] + self.test_ix = [list(range(1, 16)), [16]] + for j in 1, 2: + fname = os.path.join(self.dirpath, f"test_sas7bdat_{j}.csv") + df = pd.read_csv(fname) + epoch = datetime(1960, 1, 1) + t1 = pd.to_timedelta(df["Column4"], unit="d") + df["Column4"] = epoch + t1 + t2 = pd.to_timedelta(df["Column12"], unit="d") + df["Column12"] = epoch + t2 + for k in range(df.shape[1]): + col = df.iloc[:, k] + if col.dtype == np.int64: + df.iloc[:, k] = df.iloc[:, k].astype(np.float64) + self.data.append(df) + + @pytest.mark.slow + def test_from_file(self): + for j in 0, 1: + df0 = self.data[j] + for k in self.test_ix[j]: + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") + df = pd.read_sas(fname, encoding="utf-8") + tm.assert_frame_equal(df, df0) + + @pytest.mark.slow + def test_from_buffer(self): + for j in 0, 1: + df0 = self.data[j] + for k in self.test_ix[j]: + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") + with open(fname, "rb") as f: + byts = f.read() + buf = io.BytesIO(byts) + with pd.read_sas( + buf, format="sas7bdat", iterator=True, encoding="utf-8" + ) as rdr: + df = rdr.read() + tm.assert_frame_equal(df, df0, check_exact=False) + + @pytest.mark.slow + def test_from_iterator(self): + for j in 0, 1: + df0 = self.data[j] + for k in self.test_ix[j]: + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") + with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr: + df = rdr.read(2) + tm.assert_frame_equal(df, df0.iloc[0:2, :]) + df = rdr.read(3) + tm.assert_frame_equal(df, df0.iloc[2:5, :]) + + @pytest.mark.slow + def test_path_pathlib(self): + for j in 0, 1: + df0 = self.data[j] + for k in self.test_ix[j]: + fname = Path(os.path.join(self.dirpath, f"test{k}.sas7bdat")) + df = pd.read_sas(fname, encoding="utf-8") + tm.assert_frame_equal(df, df0) + + @td.skip_if_no("py.path") + @pytest.mark.slow + def test_path_localpath(self): + from py.path import local as LocalPath + + for j in 0, 1: + df0 = self.data[j] + for k in self.test_ix[j]: + fname = LocalPath(os.path.join(self.dirpath, f"test{k}.sas7bdat")) + df = pd.read_sas(fname, encoding="utf-8") + tm.assert_frame_equal(df, df0) + + @pytest.mark.slow + def test_iterator_loop(self): + # github #13654 + for j in 0, 1: + for k in self.test_ix[j]: + for chunksize in (3, 5, 10, 11): + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") + with pd.read_sas( + fname, chunksize=chunksize, encoding="utf-8" + ) as rdr: + y = 0 + for x in rdr: + y += x.shape[0] + assert y == rdr.row_count + + def test_iterator_read_too_much(self): + # github #14734 + k = self.test_ix[0][0] + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") + with pd.read_sas( + fname, format="sas7bdat", iterator=True, encoding="utf-8" + ) as rdr: + d1 = rdr.read(rdr.row_count + 20) + + with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr: + d2 = rdr.read(rdr.row_count + 20) + tm.assert_frame_equal(d1, d2) + + +def test_encoding_options(datapath): + fname = datapath("io", "sas", "data", "test1.sas7bdat") + df1 = pd.read_sas(fname) + df2 = pd.read_sas(fname, encoding="utf-8") + for col in df1.columns: + try: + df1[col] = df1[col].str.decode("utf-8") + except AttributeError: + pass + tm.assert_frame_equal(df1, df2) + + from pandas.io.sas.sas7bdat import SAS7BDATReader + + rdr = SAS7BDATReader(fname, convert_header_text=False) + df3 = rdr.read() + rdr.close() + for x, y in zip(df1.columns, df3.columns): + assert x == y.decode() + + +def test_productsales(datapath): + fname = datapath("io", "sas", "data", "productsales.sas7bdat") + df = pd.read_sas(fname, encoding="utf-8") + fname = datapath("io", "sas", "data", "productsales.csv") + df0 = pd.read_csv(fname, parse_dates=["MONTH"]) + vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"] + df0[vn] = df0[vn].astype(np.float64) + tm.assert_frame_equal(df, df0) + + +def test_12659(datapath): + fname = datapath("io", "sas", "data", "test_12659.sas7bdat") + df = pd.read_sas(fname) + fname = datapath("io", "sas", "data", "test_12659.csv") + df0 = pd.read_csv(fname) + df0 = df0.astype(np.float64) + tm.assert_frame_equal(df, df0) + + +def test_airline(datapath): + fname = datapath("io", "sas", "data", "airline.sas7bdat") + df = pd.read_sas(fname) + fname = datapath("io", "sas", "data", "airline.csv") + df0 = pd.read_csv(fname) + df0 = df0.astype(np.float64) + tm.assert_frame_equal(df, df0, check_exact=False) + + +def test_date_time(datapath): + # Support of different SAS date/datetime formats (PR #15871) + fname = datapath("io", "sas", "data", "datetime.sas7bdat") + df = pd.read_sas(fname) + fname = datapath("io", "sas", "data", "datetime.csv") + df0 = pd.read_csv( + fname, parse_dates=["Date1", "Date2", "DateTime", "DateTimeHi", "Taiw"] + ) + # GH 19732: Timestamps imported from sas will incur floating point errors + df.iloc[:, 3] = df.iloc[:, 3].dt.round("us") + tm.assert_frame_equal(df, df0) + + +def test_compact_numerical_values(datapath): + # Regression test for #21616 + fname = datapath("io", "sas", "data", "cars.sas7bdat") + df = pd.read_sas(fname, encoding="latin-1") + # The two columns CYL and WGT in cars.sas7bdat have column + # width < 8 and only contain integral values. + # Test that pandas doesn't corrupt the numbers by adding + # decimals. + result = df["WGT"] + expected = df["WGT"].round() + tm.assert_series_equal(result, expected, check_exact=True) + result = df["CYL"] + expected = df["CYL"].round() + tm.assert_series_equal(result, expected, check_exact=True) + + +def test_many_columns(datapath): + # Test for looking for column information in more places (PR #22628) + fname = datapath("io", "sas", "data", "many_columns.sas7bdat") + + df = pd.read_sas(fname, encoding="latin-1") + + fname = datapath("io", "sas", "data", "many_columns.csv") + df0 = pd.read_csv(fname, encoding="latin-1") + tm.assert_frame_equal(df, df0) + + +def test_inconsistent_number_of_rows(datapath): + # Regression test for issue #16615. (PR #22628) + fname = datapath("io", "sas", "data", "load_log.sas7bdat") + df = pd.read_sas(fname, encoding="latin-1") + assert len(df) == 2097 + + +def test_zero_variables(datapath): + # Check if the SAS file has zero variables (PR #18184) + fname = datapath("io", "sas", "data", "zero_variables.sas7bdat") + with pytest.raises(EmptyDataError, match="No columns to parse from file"): + pd.read_sas(fname) + + +def test_corrupt_read(datapath): + # We don't really care about the exact failure, the important thing is + # that the resource should be cleaned up afterwards (BUG #35566) + fname = datapath("io", "sas", "data", "corrupt.sas7bdat") + msg = "'SAS7BDATReader' object has no attribute 'row_count'" + with pytest.raises(AttributeError, match=msg): + pd.read_sas(fname) + + +def round_datetime_to_ms(ts): + if isinstance(ts, datetime): + return ts.replace(microsecond=int(round(ts.microsecond, -3) / 1000) * 1000) + elif isinstance(ts, str): + _ts = dateutil.parser.parse(timestr=ts) + return _ts.replace(microsecond=int(round(_ts.microsecond, -3) / 1000) * 1000) + else: + return ts + + +def test_max_sas_date(datapath): + # GH 20927 + # NB. max datetime in SAS dataset is 31DEC9999:23:59:59.999 + # but this is read as 29DEC9999:23:59:59.998993 by a buggy + # sas7bdat module + fname = datapath("io", "sas", "data", "max_sas_date.sas7bdat") + df = pd.read_sas(fname, encoding="iso-8859-1") + + # SAS likes to left pad strings with spaces - lstrip before comparing + df = df.applymap(lambda x: x.lstrip() if isinstance(x, str) else x) + # GH 19732: Timestamps imported from sas will incur floating point errors + try: + df["dt_as_dt"] = df["dt_as_dt"].dt.round("us") + except pd._libs.tslibs.np_datetime.OutOfBoundsDatetime: + df = df.applymap(round_datetime_to_ms) + except AttributeError: + df["dt_as_dt"] = df["dt_as_dt"].apply(round_datetime_to_ms) + # if there are any date/times > pandas.Timestamp.max then ALL in that chunk + # are returned as datetime.datetime + expected = pd.DataFrame( + { + "text": ["max", "normal"], + "dt_as_float": [253717747199.999, 1880323199.999], + "dt_as_dt": [ + datetime(9999, 12, 29, 23, 59, 59, 999000), + datetime(2019, 8, 1, 23, 59, 59, 999000), + ], + "date_as_float": [2936547.0, 21762.0], + "date_as_date": [datetime(9999, 12, 29), datetime(2019, 8, 1)], + }, + columns=["text", "dt_as_float", "dt_as_dt", "date_as_float", "date_as_date"], + ) + tm.assert_frame_equal(df, expected) + + +def test_max_sas_date_iterator(datapath): + # GH 20927 + # when called as an iterator, only those chunks with a date > pd.Timestamp.max + # are returned as datetime.datetime, if this happens that whole chunk is returned + # as datetime.datetime + col_order = ["text", "dt_as_float", "dt_as_dt", "date_as_float", "date_as_date"] + fname = datapath("io", "sas", "data", "max_sas_date.sas7bdat") + results = [] + for df in pd.read_sas(fname, encoding="iso-8859-1", chunksize=1): + # SAS likes to left pad strings with spaces - lstrip before comparing + df = df.applymap(lambda x: x.lstrip() if isinstance(x, str) else x) + # GH 19732: Timestamps imported from sas will incur floating point errors + try: + df["dt_as_dt"] = df["dt_as_dt"].dt.round("us") + except pd._libs.tslibs.np_datetime.OutOfBoundsDatetime: + df = df.applymap(round_datetime_to_ms) + except AttributeError: + df["dt_as_dt"] = df["dt_as_dt"].apply(round_datetime_to_ms) + df.reset_index(inplace=True, drop=True) + results.append(df) + expected = [ + pd.DataFrame( + { + "text": ["max"], + "dt_as_float": [253717747199.999], + "dt_as_dt": [datetime(9999, 12, 29, 23, 59, 59, 999000)], + "date_as_float": [2936547.0], + "date_as_date": [datetime(9999, 12, 29)], + }, + columns=col_order, + ), + pd.DataFrame( + { + "text": ["normal"], + "dt_as_float": [1880323199.999], + "dt_as_dt": [np.datetime64("2019-08-01 23:59:59.999")], + "date_as_float": [21762.0], + "date_as_date": [np.datetime64("2019-08-01")], + }, + columns=col_order, + ), + ] + for result, expected in zip(results, expected): + tm.assert_frame_equal(result, expected) + + +def test_null_date(datapath): + fname = datapath("io", "sas", "data", "dates_null.sas7bdat") + df = pd.read_sas(fname, encoding="utf-8") + + expected = pd.DataFrame( + { + "datecol": [ + datetime(9999, 12, 29), + pd.NaT, + ], + "datetimecol": [ + datetime(9999, 12, 29, 23, 59, 59, 998993), + pd.NaT, + ], + }, + ) + tm.assert_frame_equal(df, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/sas/test_xport.py b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/test_xport.py new file mode 100644 index 0000000..9232ea8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/sas/test_xport.py @@ -0,0 +1,168 @@ +import os + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm + +from pandas.io.sas.sasreader import read_sas + +# CSV versions of test xpt files were obtained using the R foreign library + +# Numbers in a SAS xport file are always float64, so need to convert +# before making comparisons. + + +def numeric_as_float(data): + for v in data.columns: + if data[v].dtype is np.dtype("int64"): + data[v] = data[v].astype(np.float64) + + +class TestXport: + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + self.dirpath = datapath("io", "sas", "data") + self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt") + self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt") + self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt") + self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt") + self.file05 = os.path.join(self.dirpath, "DEMO_PUF.cpt") + + with td.file_leak_context(): + yield + + @pytest.mark.slow + def test1_basic(self): + # Tests with DEMO_G.xpt (all numeric file) + + # Compare to this + data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv")) + numeric_as_float(data_csv) + + # Read full file + data = read_sas(self.file01, format="xport") + tm.assert_frame_equal(data, data_csv) + num_rows = data.shape[0] + + # Test reading beyond end of file + with read_sas(self.file01, format="xport", iterator=True) as reader: + data = reader.read(num_rows + 100) + assert data.shape[0] == num_rows + + # Test incremental read with `read` method. + with read_sas(self.file01, format="xport", iterator=True) as reader: + data = reader.read(10) + tm.assert_frame_equal(data, data_csv.iloc[0:10, :]) + + # Test incremental read with `get_chunk` method. + with read_sas(self.file01, format="xport", chunksize=10) as reader: + data = reader.get_chunk() + tm.assert_frame_equal(data, data_csv.iloc[0:10, :]) + + # Test read in loop + m = 0 + with read_sas(self.file01, format="xport", chunksize=100) as reader: + for x in reader: + m += x.shape[0] + assert m == num_rows + + # Read full file with `read_sas` method + data = read_sas(self.file01) + tm.assert_frame_equal(data, data_csv) + + def test1_index(self): + # Tests with DEMO_G.xpt using index (all numeric file) + + # Compare to this + data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv")) + data_csv = data_csv.set_index("SEQN") + numeric_as_float(data_csv) + + # Read full file + data = read_sas(self.file01, index="SEQN", format="xport") + tm.assert_frame_equal(data, data_csv, check_index_type=False) + + # Test incremental read with `read` method. + with read_sas( + self.file01, index="SEQN", format="xport", iterator=True + ) as reader: + data = reader.read(10) + tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_index_type=False) + + # Test incremental read with `get_chunk` method. + with read_sas( + self.file01, index="SEQN", format="xport", chunksize=10 + ) as reader: + data = reader.get_chunk() + tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_index_type=False) + + def test1_incremental(self): + # Test with DEMO_G.xpt, reading full file incrementally + + data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv")) + data_csv = data_csv.set_index("SEQN") + numeric_as_float(data_csv) + + with read_sas(self.file01, index="SEQN", chunksize=1000) as reader: + all_data = list(reader) + data = pd.concat(all_data, axis=0) + + tm.assert_frame_equal(data, data_csv, check_index_type=False) + + def test2(self): + # Test with SSHSV1_A.xpt + + # Compare to this + data_csv = pd.read_csv(self.file02.replace(".xpt", ".csv")) + numeric_as_float(data_csv) + + data = read_sas(self.file02) + tm.assert_frame_equal(data, data_csv) + + def test2_binary(self): + # Test with SSHSV1_A.xpt, read as a binary file + + # Compare to this + data_csv = pd.read_csv(self.file02.replace(".xpt", ".csv")) + numeric_as_float(data_csv) + + with open(self.file02, "rb") as fd: + with td.file_leak_context(): + # GH#35693 ensure that if we pass an open file, we + # dont incorrectly close it in read_sas + data = read_sas(fd, format="xport") + + tm.assert_frame_equal(data, data_csv) + + def test_multiple_types(self): + # Test with DRXFCD_G.xpt (contains text and numeric variables) + + # Compare to this + data_csv = pd.read_csv(self.file03.replace(".xpt", ".csv")) + + data = read_sas(self.file03, encoding="utf-8") + tm.assert_frame_equal(data, data_csv) + + def test_truncated_float_support(self): + # Test with paxraw_d_short.xpt, a shortened version of: + # http://wwwn.cdc.gov/Nchs/Nhanes/2005-2006/PAXRAW_D.ZIP + # This file has truncated floats (5 bytes in this case). + + # GH 11713 + + data_csv = pd.read_csv(self.file04.replace(".xpt", ".csv")) + + data = read_sas(self.file04, format="xport") + tm.assert_frame_equal(data.astype("int64"), data_csv) + + def test_cport_header_found_raises(self): + # Test with DEMO_PUF.cpt, the beginning of puf2019_1_fall.xpt + # from https://www.cms.gov/files/zip/puf2019.zip + # (despite the extension, it's a cpt file) + msg = "Header record indicates a CPORT file, which is not readable." + with pytest.raises(ValueError, match=msg): + read_sas(self.file05, format="xport") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_clipboard.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_clipboard.py new file mode 100644 index 0000000..aacfccd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_clipboard.py @@ -0,0 +1,315 @@ +from textwrap import dedent + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + get_option, + read_clipboard, +) +import pandas._testing as tm + +from pandas.io.clipboard import ( + clipboard_get, + clipboard_set, +) + + +def build_kwargs(sep, excel): + kwargs = {} + if excel != "default": + kwargs["excel"] = excel + if sep != "default": + kwargs["sep"] = sep + return kwargs + + +@pytest.fixture( + params=[ + "delims", + "utf8", + "utf16", + "string", + "long", + "nonascii", + "colwidth", + "mixed", + "float", + "int", + ] +) +def df(request): + data_type = request.param + + if data_type == "delims": + return DataFrame({"a": ['"a,\t"b|c', "d\tef´"], "b": ["hi'j", "k''lm"]}) + elif data_type == "utf8": + return DataFrame({"a": ["µasd", "Ωœ∑´"], "b": ["øπ∆˚¬", "œ∑´®"]}) + elif data_type == "utf16": + return DataFrame( + {"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]} + ) + elif data_type == "string": + return tm.makeCustomDataframe( + 5, 3, c_idx_type="s", r_idx_type="i", c_idx_names=[None], r_idx_names=[None] + ) + elif data_type == "long": + max_rows = get_option("display.max_rows") + return tm.makeCustomDataframe( + max_rows + 1, + 3, + data_gen_f=lambda *args: np.random.randint(2), + c_idx_type="s", + r_idx_type="i", + c_idx_names=[None], + r_idx_names=[None], + ) + elif data_type == "nonascii": + return DataFrame({"en": "in English".split(), "es": "en español".split()}) + elif data_type == "colwidth": + _cw = get_option("display.max_colwidth") + 1 + return tm.makeCustomDataframe( + 5, + 3, + data_gen_f=lambda *args: "x" * _cw, + c_idx_type="s", + r_idx_type="i", + c_idx_names=[None], + r_idx_names=[None], + ) + elif data_type == "mixed": + return DataFrame( + { + "a": np.arange(1.0, 6.0) + 0.01, + "b": np.arange(1, 6).astype(np.int64), + "c": list("abcde"), + } + ) + elif data_type == "float": + return tm.makeCustomDataframe( + 5, + 3, + data_gen_f=lambda r, c: float(r) + 0.01, + c_idx_type="s", + r_idx_type="i", + c_idx_names=[None], + r_idx_names=[None], + ) + elif data_type == "int": + return tm.makeCustomDataframe( + 5, + 3, + data_gen_f=lambda *args: np.random.randint(2), + c_idx_type="s", + r_idx_type="i", + c_idx_names=[None], + r_idx_names=[None], + ) + else: + raise ValueError + + +@pytest.fixture +def mock_clipboard(monkeypatch, request): + """Fixture mocking clipboard IO. + + This mocks pandas.io.clipboard.clipboard_get and + pandas.io.clipboard.clipboard_set. + + This uses a local dict for storing data. The dictionary + key used is the test ID, available with ``request.node.name``. + + This returns the local dictionary, for direct manipulation by + tests. + """ + # our local clipboard for tests + _mock_data = {} + + def _mock_set(data): + _mock_data[request.node.name] = data + + def _mock_get(): + return _mock_data[request.node.name] + + monkeypatch.setattr("pandas.io.clipboard.clipboard_set", _mock_set) + monkeypatch.setattr("pandas.io.clipboard.clipboard_get", _mock_get) + + yield _mock_data + + +@pytest.mark.clipboard +def test_mock_clipboard(mock_clipboard): + import pandas.io.clipboard + + pandas.io.clipboard.clipboard_set("abc") + assert "abc" in set(mock_clipboard.values()) + result = pandas.io.clipboard.clipboard_get() + assert result == "abc" + + +@pytest.mark.single +@pytest.mark.clipboard +@pytest.mark.usefixtures("mock_clipboard") +class TestClipboard: + def check_round_trip_frame(self, data, excel=None, sep=None, encoding=None): + data.to_clipboard(excel=excel, sep=sep, encoding=encoding) + result = read_clipboard(sep=sep or "\t", index_col=0, encoding=encoding) + tm.assert_frame_equal(data, result) + + # Test that default arguments copy as tab delimited + def test_round_trip_frame(self, df): + self.check_round_trip_frame(df) + + # Test that explicit delimiters are respected + @pytest.mark.parametrize("sep", ["\t", ",", "|"]) + def test_round_trip_frame_sep(self, df, sep): + self.check_round_trip_frame(df, sep=sep) + + # Test white space separator + def test_round_trip_frame_string(self, df): + df.to_clipboard(excel=False, sep=None) + result = read_clipboard() + assert df.to_string() == result.to_string() + assert df.shape == result.shape + + # Two character separator is not supported in to_clipboard + # Test that multi-character separators are not silently passed + def test_excel_sep_warning(self, df): + with tm.assert_produces_warning(): + df.to_clipboard(excel=True, sep=r"\t") + + # Separator is ignored when excel=False and should produce a warning + def test_copy_delim_warning(self, df): + with tm.assert_produces_warning(): + df.to_clipboard(excel=False, sep="\t") + + # Tests that the default behavior of to_clipboard is tab + # delimited and excel="True" + @pytest.mark.parametrize("sep", ["\t", None, "default"]) + @pytest.mark.parametrize("excel", [True, None, "default"]) + def test_clipboard_copy_tabs_default(self, sep, excel, df, request, mock_clipboard): + kwargs = build_kwargs(sep, excel) + df.to_clipboard(**kwargs) + assert mock_clipboard[request.node.name] == df.to_csv(sep="\t") + + # Tests reading of white space separated tables + @pytest.mark.parametrize("sep", [None, "default"]) + @pytest.mark.parametrize("excel", [False]) + def test_clipboard_copy_strings(self, sep, excel, df): + kwargs = build_kwargs(sep, excel) + df.to_clipboard(**kwargs) + result = read_clipboard(sep=r"\s+") + assert result.to_string() == df.to_string() + assert df.shape == result.shape + + def test_read_clipboard_infer_excel(self, request, mock_clipboard): + # gh-19010: avoid warnings + clip_kwargs = {"engine": "python"} + + text = dedent( + """ + John James Charlie Mingus + 1 2 + 4 Harry Carney + """.strip() + ) + mock_clipboard[request.node.name] = text + df = read_clipboard(**clip_kwargs) + + # excel data is parsed correctly + assert df.iloc[1][1] == "Harry Carney" + + # having diff tab counts doesn't trigger it + text = dedent( + """ + a\t b + 1 2 + 3 4 + """.strip() + ) + mock_clipboard[request.node.name] = text + res = read_clipboard(**clip_kwargs) + + text = dedent( + """ + a b + 1 2 + 3 4 + """.strip() + ) + mock_clipboard[request.node.name] = text + exp = read_clipboard(**clip_kwargs) + + tm.assert_frame_equal(res, exp) + + def test_infer_excel_with_nulls(self, request, mock_clipboard): + # GH41108 + text = "col1\tcol2\n1\tred\n\tblue\n2\tgreen" + + mock_clipboard[request.node.name] = text + df = read_clipboard() + df_expected = DataFrame( + data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]} + ) + + # excel data is parsed correctly + tm.assert_frame_equal(df, df_expected) + + @pytest.mark.parametrize( + "multiindex", + [ + ( # Can't use `dedent` here as it will remove the leading `\t` + "\n".join( + [ + "\t\t\tcol1\tcol2", + "A\t0\tTrue\t1\tred", + "A\t1\tTrue\t\tblue", + "B\t0\tFalse\t2\tgreen", + ] + ), + [["A", "A", "B"], [0, 1, 0], [True, True, False]], + ), + ( + "\n".join( + ["\t\tcol1\tcol2", "A\t0\t1\tred", "A\t1\t\tblue", "B\t0\t2\tgreen"] + ), + [["A", "A", "B"], [0, 1, 0]], + ), + ], + ) + def test_infer_excel_with_multiindex(self, request, mock_clipboard, multiindex): + # GH41108 + + mock_clipboard[request.node.name] = multiindex[0] + df = read_clipboard() + df_expected = DataFrame( + data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]}, + index=multiindex[1], + ) + + # excel data is parsed correctly + tm.assert_frame_equal(df, df_expected) + + def test_invalid_encoding(self, df): + msg = "clipboard only supports utf-8 encoding" + # test case for testing invalid encoding + with pytest.raises(ValueError, match=msg): + df.to_clipboard(encoding="ascii") + with pytest.raises(NotImplementedError, match=msg): + read_clipboard(encoding="ascii") + + @pytest.mark.parametrize("enc", ["UTF-8", "utf-8", "utf8"]) + def test_round_trip_valid_encodings(self, enc, df): + self.check_round_trip_frame(df, encoding=enc) + + @pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑´...", "abcd..."]) + @pytest.mark.xfail( + reason="Flaky test in multi-process CI environment: GH 44584", + raises=AssertionError, + strict=False, + ) + def test_raw_roundtrip(self, data): + # PR #25040 wide unicode wasn't copied correctly on PY3 on windows + clipboard_set(data) + assert data == clipboard_get() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_common.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_common.py new file mode 100644 index 0000000..b458f33 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_common.py @@ -0,0 +1,602 @@ +""" +Tests for the pandas.io.common functionalities +""" +import codecs +import errno +from functools import partial +from io import ( + BytesIO, + StringIO, + UnsupportedOperation, +) +import mmap +import os +from pathlib import Path +import tempfile + +import pytest + +from pandas.compat import is_platform_windows +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm + +import pandas.io.common as icom + + +class CustomFSPath: + """For testing fspath on unknown objects""" + + def __init__(self, path): + self.path = path + + def __fspath__(self): + return self.path + + +# Functions that consume a string path and return a string or path-like object +path_types = [str, CustomFSPath, Path] + +try: + from py.path import local as LocalPath + + path_types.append(LocalPath) +except ImportError: + pass + +HERE = os.path.abspath(os.path.dirname(__file__)) + + +# https://github.com/cython/cython/issues/1720 +@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") +class TestCommonIOCapabilities: + data1 = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + + def test_expand_user(self): + filename = "~/sometest" + expanded_name = icom._expand_user(filename) + + assert expanded_name != filename + assert os.path.isabs(expanded_name) + assert os.path.expanduser(filename) == expanded_name + + def test_expand_user_normal_path(self): + filename = "/somefolder/sometest" + expanded_name = icom._expand_user(filename) + + assert expanded_name == filename + assert os.path.expanduser(filename) == expanded_name + + def test_stringify_path_pathlib(self): + rel_path = icom.stringify_path(Path(".")) + assert rel_path == "." + redundant_path = icom.stringify_path(Path("foo//bar")) + assert redundant_path == os.path.join("foo", "bar") + + @td.skip_if_no("py.path") + def test_stringify_path_localpath(self): + path = os.path.join("foo", "bar") + abs_path = os.path.abspath(path) + lpath = LocalPath(path) + assert icom.stringify_path(lpath) == abs_path + + def test_stringify_path_fspath(self): + p = CustomFSPath("foo/bar.csv") + result = icom.stringify_path(p) + assert result == "foo/bar.csv" + + def test_stringify_file_and_path_like(self): + # GH 38125: do not stringify file objects that are also path-like + fsspec = pytest.importorskip("fsspec") + with tm.ensure_clean() as path: + with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj: + assert fsspec_obj == icom.stringify_path(fsspec_obj) + + @pytest.mark.parametrize("path_type", path_types) + def test_infer_compression_from_path(self, compression_format, path_type): + extension, expected = compression_format + path = path_type("foo/bar.csv" + extension) + compression = icom.infer_compression(path, compression="infer") + assert compression == expected + + @pytest.mark.parametrize("path_type", [str, CustomFSPath, Path]) + def test_get_handle_with_path(self, path_type): + # ignore LocalPath: it creates strange paths: /absolute/~/sometest + with tempfile.TemporaryDirectory(dir=Path.home()) as tmp: + filename = path_type("~/" + Path(tmp).name + "/sometest") + with icom.get_handle(filename, "w") as handles: + assert Path(handles.handle.name).is_absolute() + assert os.path.expanduser(filename) == handles.handle.name + + def test_get_handle_with_buffer(self): + input_buffer = StringIO() + with icom.get_handle(input_buffer, "r") as handles: + assert handles.handle == input_buffer + assert not input_buffer.closed + input_buffer.close() + + # Test that BytesIOWrapper(get_handle) returns correct amount of bytes every time + def test_bytesiowrapper_returns_correct_bytes(self): + # Test latin1, ucs-2, and ucs-4 chars + data = """a,b,c +1,2,3 +©,®,® +Look,a snake,🐍""" + with icom.get_handle(StringIO(data), "rb", is_text=False) as handles: + result = b"" + chunksize = 5 + while True: + chunk = handles.handle.read(chunksize) + # Make sure each chunk is correct amount of bytes + assert len(chunk) <= chunksize + if len(chunk) < chunksize: + # Can be less amount of bytes, but only at EOF + # which happens when read returns empty + assert len(handles.handle.read()) == 0 + result += chunk + break + result += chunk + assert result == data.encode("utf-8") + + # Test that pyarrow can handle a file opened with get_handle + @td.skip_if_no("pyarrow", min_version="0.15.0") + def test_get_handle_pyarrow_compat(self): + from pyarrow import csv + + # Test latin1, ucs-2, and ucs-4 chars + data = """a,b,c +1,2,3 +©,®,® +Look,a snake,🐍""" + expected = pd.DataFrame( + {"a": ["1", "©", "Look"], "b": ["2", "®", "a snake"], "c": ["3", "®", "🐍"]} + ) + s = StringIO(data) + with icom.get_handle(s, "rb", is_text=False) as handles: + df = csv.read_csv(handles.handle).to_pandas() + tm.assert_frame_equal(df, expected) + assert not s.closed + + def test_iterator(self): + with pd.read_csv(StringIO(self.data1), chunksize=1) as reader: + result = pd.concat(reader, ignore_index=True) + expected = pd.read_csv(StringIO(self.data1)) + tm.assert_frame_equal(result, expected) + + # GH12153 + with pd.read_csv(StringIO(self.data1), chunksize=1) as it: + first = next(it) + tm.assert_frame_equal(first, expected.iloc[[0]]) + tm.assert_frame_equal(pd.concat(it), expected.iloc[1:]) + + @pytest.mark.parametrize( + "reader, module, error_class, fn_ext", + [ + (pd.read_csv, "os", FileNotFoundError, "csv"), + (pd.read_fwf, "os", FileNotFoundError, "txt"), + (pd.read_excel, "xlrd", FileNotFoundError, "xlsx"), + (pd.read_feather, "pyarrow", OSError, "feather"), + (pd.read_hdf, "tables", FileNotFoundError, "h5"), + (pd.read_stata, "os", FileNotFoundError, "dta"), + (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), + (pd.read_json, "os", ValueError, "json"), + (pd.read_pickle, "os", FileNotFoundError, "pickle"), + ], + ) + def test_read_non_existent(self, reader, module, error_class, fn_ext): + pytest.importorskip(module) + + path = os.path.join(HERE, "data", "does_not_exist." + fn_ext) + msg1 = fr"File (b')?.+does_not_exist\.{fn_ext}'? does not exist" + msg2 = fr"\[Errno 2\] No such file or directory: '.+does_not_exist\.{fn_ext}'" + msg3 = "Expected object or value" + msg4 = "path_or_buf needs to be a string file path or file-like" + msg5 = ( + fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist: " + fr"'.+does_not_exist\.{fn_ext}'" + ) + msg6 = fr"\[Errno 2\] 没有那个文件或目录: '.+does_not_exist\.{fn_ext}'" + msg7 = ( + fr"\[Errno 2\] File o directory non esistente: '.+does_not_exist\.{fn_ext}'" + ) + msg8 = fr"Failed to open local file.+does_not_exist\.{fn_ext}" + + with pytest.raises( + error_class, + match=fr"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6}|{msg7}|{msg8})", + ): + reader(path) + + @pytest.mark.parametrize( + "method, module, error_class, fn_ext", + [ + (pd.DataFrame.to_csv, "os", OSError, "csv"), + (pd.DataFrame.to_html, "os", OSError, "html"), + (pd.DataFrame.to_excel, "xlrd", OSError, "xlsx"), + (pd.DataFrame.to_feather, "pyarrow", OSError, "feather"), + (pd.DataFrame.to_parquet, "pyarrow", OSError, "parquet"), + (pd.DataFrame.to_stata, "os", OSError, "dta"), + (pd.DataFrame.to_json, "os", OSError, "json"), + (pd.DataFrame.to_pickle, "os", OSError, "pickle"), + ], + ) + # NOTE: Missing parent directory for pd.DataFrame.to_hdf is handled by PyTables + def test_write_missing_parent_directory(self, method, module, error_class, fn_ext): + pytest.importorskip(module) + + dummy_frame = pd.DataFrame({"a": [1, 2, 3], "b": [2, 3, 4], "c": [3, 4, 5]}) + + path = os.path.join(HERE, "data", "missing_folder", "does_not_exist." + fn_ext) + + with pytest.raises( + error_class, + match=r"Cannot save file into a non-existent directory: .*missing_folder", + ): + method(dummy_frame, path) + + @pytest.mark.parametrize( + "reader, module, error_class, fn_ext", + [ + (pd.read_csv, "os", FileNotFoundError, "csv"), + (pd.read_table, "os", FileNotFoundError, "csv"), + (pd.read_fwf, "os", FileNotFoundError, "txt"), + (pd.read_excel, "xlrd", FileNotFoundError, "xlsx"), + (pd.read_feather, "pyarrow", OSError, "feather"), + (pd.read_hdf, "tables", FileNotFoundError, "h5"), + (pd.read_stata, "os", FileNotFoundError, "dta"), + (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), + (pd.read_json, "os", ValueError, "json"), + (pd.read_pickle, "os", FileNotFoundError, "pickle"), + ], + ) + def test_read_expands_user_home_dir( + self, reader, module, error_class, fn_ext, monkeypatch + ): + pytest.importorskip(module) + + path = os.path.join("~", "does_not_exist." + fn_ext) + monkeypatch.setattr(icom, "_expand_user", lambda x: os.path.join("foo", x)) + + msg1 = fr"File (b')?.+does_not_exist\.{fn_ext}'? does not exist" + msg2 = fr"\[Errno 2\] No such file or directory: '.+does_not_exist\.{fn_ext}'" + msg3 = "Unexpected character found when decoding 'false'" + msg4 = "path_or_buf needs to be a string file path or file-like" + msg5 = ( + fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist: " + fr"'.+does_not_exist\.{fn_ext}'" + ) + msg6 = fr"\[Errno 2\] 没有那个文件或目录: '.+does_not_exist\.{fn_ext}'" + msg7 = ( + fr"\[Errno 2\] File o directory non esistente: '.+does_not_exist\.{fn_ext}'" + ) + msg8 = fr"Failed to open local file.+does_not_exist\.{fn_ext}" + + with pytest.raises( + error_class, + match=fr"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6}|{msg7}|{msg8})", + ): + reader(path) + + @pytest.mark.parametrize( + "reader, module, path", + [ + (pd.read_csv, "os", ("io", "data", "csv", "iris.csv")), + (pd.read_table, "os", ("io", "data", "csv", "iris.csv")), + ( + pd.read_fwf, + "os", + ("io", "data", "fixed_width", "fixed_width_format.txt"), + ), + (pd.read_excel, "xlrd", ("io", "data", "excel", "test1.xlsx")), + ( + pd.read_feather, + "pyarrow", + ("io", "data", "feather", "feather-0_3_1.feather"), + ), + ( + pd.read_hdf, + "tables", + ("io", "data", "legacy_hdf", "datetimetz_object.h5"), + ), + (pd.read_stata, "os", ("io", "data", "stata", "stata10_115.dta")), + (pd.read_sas, "os", ("io", "sas", "data", "test1.sas7bdat")), + (pd.read_json, "os", ("io", "json", "data", "tsframe_v012.json")), + ( + pd.read_pickle, + "os", + ("io", "data", "pickle", "categorical.0.25.0.pickle"), + ), + ], + ) + @pytest.mark.filterwarnings( + "ignore:CategoricalBlock is deprecated:DeprecationWarning" + ) + @pytest.mark.filterwarnings( # pytables np.object usage + "ignore:`np.object` is a deprecated alias:DeprecationWarning" + ) + def test_read_fspath_all(self, reader, module, path, datapath): + pytest.importorskip(module) + path = datapath(*path) + + mypath = CustomFSPath(path) + result = reader(mypath) + expected = reader(path) + + if path.endswith(".pickle"): + # categorical + tm.assert_categorical_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:In future versions `DataFrame.to_latex`") + @pytest.mark.parametrize( + "writer_name, writer_kwargs, module", + [ + ("to_csv", {}, "os"), + ("to_excel", {"engine": "xlwt"}, "xlwt"), + ("to_feather", {}, "pyarrow"), + ("to_html", {}, "os"), + ("to_json", {}, "os"), + ("to_latex", {}, "os"), + ("to_pickle", {}, "os"), + ("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"), + ], + ) + def test_write_fspath_all(self, writer_name, writer_kwargs, module): + p1 = tm.ensure_clean("string") + p2 = tm.ensure_clean("fspath") + df = pd.DataFrame({"A": [1, 2]}) + + with p1 as string, p2 as fspath: + pytest.importorskip(module) + mypath = CustomFSPath(fspath) + writer = getattr(df, writer_name) + + writer(string, **writer_kwargs) + with open(string, "rb") as f: + expected = f.read() + + writer(mypath, **writer_kwargs) + with open(fspath, "rb") as f: + result = f.read() + + assert result == expected + + @pytest.mark.filterwarnings( # pytables np.object usage + "ignore:`np.object` is a deprecated alias:DeprecationWarning" + ) + def test_write_fspath_hdf5(self): + # Same test as write_fspath_all, except HDF5 files aren't + # necessarily byte-for-byte identical for a given dataframe, so we'll + # have to read and compare equality + pytest.importorskip("tables") + + df = pd.DataFrame({"A": [1, 2]}) + p1 = tm.ensure_clean("string") + p2 = tm.ensure_clean("fspath") + + with p1 as string, p2 as fspath: + mypath = CustomFSPath(fspath) + df.to_hdf(mypath, key="bar") + df.to_hdf(string, key="bar") + + result = pd.read_hdf(fspath, key="bar") + expected = pd.read_hdf(string, key="bar") + + tm.assert_frame_equal(result, expected) + + +@pytest.fixture +def mmap_file(datapath): + return datapath("io", "data", "csv", "test_mmap.csv") + + +class TestMMapWrapper: + def test_constructor_bad_file(self, mmap_file): + non_file = StringIO("I am not a file") + non_file.fileno = lambda: -1 + + # the error raised is different on Windows + if is_platform_windows(): + msg = "The parameter is incorrect" + err = OSError + else: + msg = "[Errno 22]" + err = mmap.error + + with pytest.raises(err, match=msg): + icom._MMapWrapper(non_file) + + target = open(mmap_file) + target.close() + + msg = "I/O operation on closed file" + with pytest.raises(ValueError, match=msg): + icom._MMapWrapper(target) + + def test_get_attr(self, mmap_file): + with open(mmap_file) as target: + wrapper = icom._MMapWrapper(target) + + attrs = dir(wrapper.mmap) + attrs = [attr for attr in attrs if not attr.startswith("__")] + attrs.append("__next__") + + for attr in attrs: + assert hasattr(wrapper, attr) + + assert not hasattr(wrapper, "foo") + + def test_next(self, mmap_file): + with open(mmap_file) as target: + wrapper = icom._MMapWrapper(target) + lines = target.readlines() + + for line in lines: + next_line = next(wrapper) + assert next_line.strip() == line.strip() + + with pytest.raises(StopIteration, match=r"^$"): + next(wrapper) + + def test_unknown_engine(self): + with tm.ensure_clean() as path: + df = tm.makeDataFrame() + df.to_csv(path) + with pytest.raises(ValueError, match="Unknown engine"): + pd.read_csv(path, engine="pyt") + + def test_binary_mode(self): + """ + 'encoding' shouldn't be passed to 'open' in binary mode. + + GH 35058 + """ + with tm.ensure_clean() as path: + df = tm.makeDataFrame() + df.to_csv(path, mode="w+b") + tm.assert_frame_equal(df, pd.read_csv(path, index_col=0)) + + @pytest.mark.parametrize("encoding", ["utf-16", "utf-32"]) + @pytest.mark.parametrize("compression_", ["bz2", "xz"]) + def test_warning_missing_utf_bom(self, encoding, compression_): + """ + bz2 and xz do not write the byte order mark (BOM) for utf-16/32. + + https://stackoverflow.com/questions/55171439 + + GH 35681 + """ + df = tm.makeDataFrame() + with tm.ensure_clean() as path: + with tm.assert_produces_warning(UnicodeWarning): + df.to_csv(path, compression=compression_, encoding=encoding) + + # reading should fail (otherwise we wouldn't need the warning) + msg = r"UTF-\d+ stream does not start with BOM" + with pytest.raises(UnicodeError, match=msg): + pd.read_csv(path, compression=compression_, encoding=encoding) + + +def test_is_fsspec_url(): + assert icom.is_fsspec_url("gcs://pandas/somethingelse.com") + assert icom.is_fsspec_url("gs://pandas/somethingelse.com") + # the following is the only remote URL that is handled without fsspec + assert not icom.is_fsspec_url("http://pandas/somethingelse.com") + assert not icom.is_fsspec_url("random:pandas/somethingelse.com") + assert not icom.is_fsspec_url("/local/path") + assert not icom.is_fsspec_url("relative/local/path") + # fsspec URL in string should not be recognized + assert not icom.is_fsspec_url("this is not fsspec://url") + assert not icom.is_fsspec_url("{'url': 'gs://pandas/somethingelse.com'}") + # accept everything that conforms to RFC 3986 schema + assert icom.is_fsspec_url("RFC-3986+compliant.spec://something") + + +@pytest.mark.parametrize("encoding", [None, "utf-8"]) +@pytest.mark.parametrize("format", ["csv", "json"]) +def test_codecs_encoding(encoding, format): + # GH39247 + expected = tm.makeDataFrame() + with tm.ensure_clean() as path: + with codecs.open(path, mode="w", encoding=encoding) as handle: + getattr(expected, f"to_{format}")(handle) + with codecs.open(path, mode="r", encoding=encoding) as handle: + if format == "csv": + df = pd.read_csv(handle, index_col=0) + else: + df = pd.read_json(handle) + tm.assert_frame_equal(expected, df) + + +def test_codecs_get_writer_reader(): + # GH39247 + expected = tm.makeDataFrame() + with tm.ensure_clean() as path: + with open(path, "wb") as handle: + with codecs.getwriter("utf-8")(handle) as encoded: + expected.to_csv(encoded) + with open(path, "rb") as handle: + with codecs.getreader("utf-8")(handle) as encoded: + df = pd.read_csv(encoded, index_col=0) + tm.assert_frame_equal(expected, df) + + +@pytest.mark.parametrize( + "io_class,mode,msg", + [ + (BytesIO, "t", "a bytes-like object is required, not 'str'"), + (StringIO, "b", "string argument expected, got 'bytes'"), + ], +) +def test_explicit_encoding(io_class, mode, msg): + # GH39247; this test makes sure that if a user provides mode="*t" or "*b", + # it is used. In the case of this test it leads to an error as intentionally the + # wrong mode is requested + expected = tm.makeDataFrame() + with io_class() as buffer: + with pytest.raises(TypeError, match=msg): + expected.to_csv(buffer, mode=f"w{mode}") + + +@pytest.mark.parametrize("encoding_errors", [None, "strict", "replace"]) +@pytest.mark.parametrize("format", ["csv", "json"]) +def test_encoding_errors(encoding_errors, format): + # GH39450 + msg = "'utf-8' codec can't decode byte" + bad_encoding = b"\xe4" + + if format == "csv": + content = b"," + bad_encoding + b"\n" + bad_encoding * 2 + b"," + bad_encoding + reader = partial(pd.read_csv, index_col=0) + else: + content = ( + b'{"' + + bad_encoding * 2 + + b'": {"' + + bad_encoding + + b'":"' + + bad_encoding + + b'"}}' + ) + reader = partial(pd.read_json, orient="index") + with tm.ensure_clean() as path: + file = Path(path) + file.write_bytes(content) + + if encoding_errors != "replace": + with pytest.raises(UnicodeDecodeError, match=msg): + reader(path, encoding_errors=encoding_errors) + else: + df = reader(path, encoding_errors=encoding_errors) + decoded = bad_encoding.decode(errors=encoding_errors) + expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2]) + tm.assert_frame_equal(df, expected) + + +def test_bad_encdoing_errors(): + # GH 39777 + with tm.ensure_clean() as path: + with pytest.raises(LookupError, match="unknown error handler name"): + icom.get_handle(path, "w", errors="bad") + + +def test_errno_attribute(): + # GH 13872 + with pytest.raises(FileNotFoundError, match="\\[Errno 2\\]") as err: + pd.read_csv("doesnt_exist") + assert err.errno == errno.ENOENT + + +def test_fail_mmap(): + with pytest.raises(UnsupportedOperation, match="fileno"): + with BytesIO() as buffer: + icom.get_handle(buffer, "rb", memory_map=True) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_compression.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_compression.py new file mode 100644 index 0000000..3c278cb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_compression.py @@ -0,0 +1,257 @@ +import io +import os +from pathlib import Path +import subprocess +import sys +import textwrap +import time + +import pytest + +import pandas as pd +import pandas._testing as tm + +import pandas.io.common as icom + + +@pytest.mark.parametrize( + "obj", + [ + pd.DataFrame( + 100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + columns=["X", "Y", "Z"], + ), + pd.Series(100 * [0.123456, 0.234567, 0.567567], name="X"), + ], +) +@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"]) +def test_compression_size(obj, method, compression_only): + with tm.ensure_clean() as path: + getattr(obj, method)(path, compression=compression_only) + compressed_size = os.path.getsize(path) + getattr(obj, method)(path, compression=None) + uncompressed_size = os.path.getsize(path) + assert uncompressed_size > compressed_size + + +@pytest.mark.parametrize( + "obj", + [ + pd.DataFrame( + 100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + columns=["X", "Y", "Z"], + ), + pd.Series(100 * [0.123456, 0.234567, 0.567567], name="X"), + ], +) +@pytest.mark.parametrize("method", ["to_csv", "to_json"]) +def test_compression_size_fh(obj, method, compression_only): + with tm.ensure_clean() as path: + with icom.get_handle(path, "w", compression=compression_only) as handles: + getattr(obj, method)(handles.handle) + assert not handles.handle.closed + compressed_size = os.path.getsize(path) + with tm.ensure_clean() as path: + with icom.get_handle(path, "w", compression=None) as handles: + getattr(obj, method)(handles.handle) + assert not handles.handle.closed + uncompressed_size = os.path.getsize(path) + assert uncompressed_size > compressed_size + + +@pytest.mark.parametrize( + "write_method, write_kwargs, read_method", + [ + ("to_csv", {"index": False}, pd.read_csv), + ("to_json", {}, pd.read_json), + ("to_pickle", {}, pd.read_pickle), + ], +) +def test_dataframe_compression_defaults_to_infer( + write_method, write_kwargs, read_method, compression_only +): + # GH22004 + input = pd.DataFrame([[1.0, 0, -4], [3.4, 5, 2]], columns=["X", "Y", "Z"]) + extension = icom._compression_to_extension[compression_only] + with tm.ensure_clean("compressed" + extension) as path: + getattr(input, write_method)(path, **write_kwargs) + output = read_method(path, compression=compression_only) + tm.assert_frame_equal(output, input) + + +@pytest.mark.parametrize( + "write_method,write_kwargs,read_method,read_kwargs", + [ + ("to_csv", {"index": False, "header": True}, pd.read_csv, {"squeeze": True}), + ("to_json", {}, pd.read_json, {"typ": "series"}), + ("to_pickle", {}, pd.read_pickle, {}), + ], +) +def test_series_compression_defaults_to_infer( + write_method, write_kwargs, read_method, read_kwargs, compression_only +): + # GH22004 + input = pd.Series([0, 5, -2, 10], name="X") + extension = icom._compression_to_extension[compression_only] + with tm.ensure_clean("compressed" + extension) as path: + getattr(input, write_method)(path, **write_kwargs) + if "squeeze" in read_kwargs: + kwargs = read_kwargs.copy() + del kwargs["squeeze"] + output = read_method(path, compression=compression_only, **kwargs).squeeze( + "columns" + ) + else: + output = read_method(path, compression=compression_only, **read_kwargs) + tm.assert_series_equal(output, input, check_names=False) + + +def test_compression_warning(compression_only): + # Assert that passing a file object to to_csv while explicitly specifying a + # compression protocol triggers a RuntimeWarning, as per GH21227. + df = pd.DataFrame( + 100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + columns=["X", "Y", "Z"], + ) + with tm.ensure_clean() as path: + with icom.get_handle(path, "w", compression=compression_only) as handles: + with tm.assert_produces_warning(RuntimeWarning): + df.to_csv(handles.handle, compression=compression_only) + + +def test_compression_binary(compression_only): + """ + Binary file handles support compression. + + GH22555 + """ + df = tm.makeDataFrame() + + # with a file + with tm.ensure_clean() as path: + with open(path, mode="wb") as file: + df.to_csv(file, mode="wb", compression=compression_only) + file.seek(0) # file shouldn't be closed + tm.assert_frame_equal( + df, pd.read_csv(path, index_col=0, compression=compression_only) + ) + + # with BytesIO + file = io.BytesIO() + df.to_csv(file, mode="wb", compression=compression_only) + file.seek(0) # file shouldn't be closed + tm.assert_frame_equal( + df, pd.read_csv(file, index_col=0, compression=compression_only) + ) + + +def test_gzip_reproducibility_file_name(): + """ + Gzip should create reproducible archives with mtime. + + Note: Archives created with different filenames will still be different! + + GH 28103 + """ + df = tm.makeDataFrame() + compression_options = {"method": "gzip", "mtime": 1} + + # test for filename + with tm.ensure_clean() as path: + path = Path(path) + df.to_csv(path, compression=compression_options) + time.sleep(2) + output = path.read_bytes() + df.to_csv(path, compression=compression_options) + assert output == path.read_bytes() + + +def test_gzip_reproducibility_file_object(): + """ + Gzip should create reproducible archives with mtime. + + GH 28103 + """ + df = tm.makeDataFrame() + compression_options = {"method": "gzip", "mtime": 1} + + # test for file object + buffer = io.BytesIO() + df.to_csv(buffer, compression=compression_options, mode="wb") + output = buffer.getvalue() + time.sleep(2) + buffer = io.BytesIO() + df.to_csv(buffer, compression=compression_options, mode="wb") + assert output == buffer.getvalue() + + +def test_with_missing_lzma(): + """Tests if import pandas works when lzma is not present.""" + # https://github.com/pandas-dev/pandas/issues/27575 + code = textwrap.dedent( + """\ + import sys + sys.modules['lzma'] = None + import pandas + """ + ) + subprocess.check_output([sys.executable, "-c", code], stderr=subprocess.PIPE) + + +def test_with_missing_lzma_runtime(): + """Tests if RuntimeError is hit when calling lzma without + having the module available. + """ + code = textwrap.dedent( + """ + import sys + import pytest + sys.modules['lzma'] = None + import pandas as pd + df = pd.DataFrame() + with pytest.raises(RuntimeError, match='lzma module'): + df.to_csv('foo.csv', compression='xz') + """ + ) + subprocess.check_output([sys.executable, "-c", code], stderr=subprocess.PIPE) + + +@pytest.mark.parametrize( + "obj", + [ + pd.DataFrame( + 100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + columns=["X", "Y", "Z"], + ), + pd.Series(100 * [0.123456, 0.234567, 0.567567], name="X"), + ], +) +@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"]) +def test_gzip_compression_level(obj, method): + # GH33196 + with tm.ensure_clean() as path: + getattr(obj, method)(path, compression="gzip") + compressed_size_default = os.path.getsize(path) + getattr(obj, method)(path, compression={"method": "gzip", "compresslevel": 1}) + compressed_size_fast = os.path.getsize(path) + assert compressed_size_default < compressed_size_fast + + +@pytest.mark.parametrize( + "obj", + [ + pd.DataFrame( + 100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + columns=["X", "Y", "Z"], + ), + pd.Series(100 * [0.123456, 0.234567, 0.567567], name="X"), + ], +) +@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"]) +def test_bzip_compression_level(obj, method): + """GH33196 bzip needs file size > 100k to show a size difference between + compression levels, so here we just check if the call works when + compression is passed as a dict. + """ + with tm.ensure_clean() as path: + getattr(obj, method)(path, compression={"method": "bz2", "compresslevel": 1}) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_date_converters.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_date_converters.py new file mode 100644 index 0000000..a9fa27e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_date_converters.py @@ -0,0 +1,43 @@ +from datetime import datetime + +import numpy as np + +import pandas._testing as tm + +import pandas.io.date_converters as conv + + +def test_parse_date_time(): + + dates = np.array(["2007/1/3", "2008/2/4"], dtype=object) + times = np.array(["05:07:09", "06:08:00"], dtype=object) + expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)]) + with tm.assert_produces_warning(FutureWarning): + result = conv.parse_date_time(dates, times) + tm.assert_numpy_array_equal(result, expected) + + +def test_parse_date_fields(): + days = np.array([3, 4]) + months = np.array([1, 2]) + years = np.array([2007, 2008]) + expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)]) + + with tm.assert_produces_warning(FutureWarning): + result = conv.parse_date_fields(years, months, days) + tm.assert_numpy_array_equal(result, expected) + + +def test_parse_all_fields(): + hours = np.array([5, 6]) + minutes = np.array([7, 8]) + seconds = np.array([9, 0]) + + days = np.array([3, 4]) + years = np.array([2007, 2008]) + months = np.array([1, 2]) + expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)]) + + with tm.assert_produces_warning(FutureWarning): + result = conv.parse_all_fields(years, months, days, hours, minutes, seconds) + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_feather.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_feather.py new file mode 100644 index 0000000..bc1a526 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_feather.py @@ -0,0 +1,194 @@ +""" test feather-format compat """ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +from pandas.io.feather_format import read_feather, to_feather # isort:skip + +pyarrow = pytest.importorskip("pyarrow", minversion="1.0.1") + + +filter_sparse = pytest.mark.filterwarnings("ignore:The Sparse") + + +@filter_sparse +@pytest.mark.single +@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:DeprecationWarning") +class TestFeather: + def check_error_on_write(self, df, exc, err_msg): + # check that we are raising the exception + # on writing + + with pytest.raises(exc, match=err_msg): + with tm.ensure_clean() as path: + to_feather(df, path) + + def check_external_error_on_write(self, df): + # check that we are raising the exception + # on writing + + with tm.external_error_raised(Exception): + with tm.ensure_clean() as path: + to_feather(df, path) + + def check_round_trip(self, df, expected=None, write_kwargs={}, **read_kwargs): + + if expected is None: + expected = df + + with tm.ensure_clean() as path: + to_feather(df, path, **write_kwargs) + + result = read_feather(path, **read_kwargs) + tm.assert_frame_equal(result, expected) + + def test_error(self): + + msg = "feather only support IO with DataFrames" + for obj in [ + pd.Series([1, 2, 3]), + 1, + "foo", + pd.Timestamp("20130101"), + np.array([1, 2, 3]), + ]: + self.check_error_on_write(obj, ValueError, msg) + + def test_basic(self): + + df = pd.DataFrame( + { + "string": list("abc"), + "int": list(range(1, 4)), + "uint": np.arange(3, 6).astype("u1"), + "float": np.arange(4.0, 7.0, dtype="float64"), + "float_with_null": [1.0, np.nan, 3], + "bool": [True, False, True], + "bool_with_null": [True, np.nan, False], + "cat": pd.Categorical(list("abc")), + "dt": pd.DatetimeIndex( + list(pd.date_range("20130101", periods=3)), freq=None + ), + "dttz": pd.DatetimeIndex( + list(pd.date_range("20130101", periods=3, tz="US/Eastern")), + freq=None, + ), + "dt_with_null": [ + pd.Timestamp("20130101"), + pd.NaT, + pd.Timestamp("20130103"), + ], + "dtns": pd.DatetimeIndex( + list(pd.date_range("20130101", periods=3, freq="ns")), freq=None + ), + } + ) + df["periods"] = pd.period_range("2013", freq="M", periods=3) + df["timedeltas"] = pd.timedelta_range("1 day", periods=3) + df["intervals"] = pd.interval_range(0, 3, 3) + + assert df.dttz.dtype.tz.zone == "US/Eastern" + self.check_round_trip(df) + + def test_duplicate_columns(self): + + # https://github.com/wesm/feather/issues/53 + # not currently able to handle duplicate columns + df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy() + self.check_external_error_on_write(df) + + def test_stringify_columns(self): + + df = pd.DataFrame(np.arange(12).reshape(4, 3)).copy() + msg = "feather must have string column names" + self.check_error_on_write(df, ValueError, msg) + + def test_read_columns(self): + # GH 24025 + df = pd.DataFrame( + { + "col1": list("abc"), + "col2": list(range(1, 4)), + "col3": list("xyz"), + "col4": list(range(4, 7)), + } + ) + columns = ["col1", "col3"] + self.check_round_trip(df, expected=df[columns], columns=columns) + + def read_columns_different_order(self): + # GH 33878 + df = pd.DataFrame({"A": [1, 2], "B": ["x", "y"], "C": [True, False]}) + self.check_round_trip(df, columns=["B", "A"]) + + def test_unsupported_other(self): + + # mixed python objects + df = pd.DataFrame({"a": ["a", 1, 2.0]}) + self.check_external_error_on_write(df) + + def test_rw_use_threads(self): + df = pd.DataFrame({"A": np.arange(100000)}) + self.check_round_trip(df, use_threads=True) + self.check_round_trip(df, use_threads=False) + + def test_write_with_index(self): + + df = pd.DataFrame({"A": [1, 2, 3]}) + self.check_round_trip(df) + + msg = ( + r"feather does not support serializing .* for the index; " + r"you can \.reset_index\(\) to make the index into column\(s\)" + ) + # non-default index + for index in [ + [2, 3, 4], + pd.date_range("20130101", periods=3), + list("abc"), + [1, 3, 4], + pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]), + ]: + + df.index = index + self.check_error_on_write(df, ValueError, msg) + + # index with meta-data + df.index = [0, 1, 2] + df.index.name = "foo" + msg = "feather does not serialize index meta-data on a default index" + self.check_error_on_write(df, ValueError, msg) + + # column multi-index + df.index = [0, 1, 2] + df.columns = pd.MultiIndex.from_tuples([("a", 1)]) + msg = "feather must have string column names" + self.check_error_on_write(df, ValueError, msg) + + def test_path_pathlib(self): + df = tm.makeDataFrame().reset_index() + result = tm.round_trip_pathlib(df.to_feather, read_feather) + tm.assert_frame_equal(df, result) + + def test_path_localpath(self): + df = tm.makeDataFrame().reset_index() + result = tm.round_trip_localpath(df.to_feather, read_feather) + tm.assert_frame_equal(df, result) + + def test_passthrough_keywords(self): + df = tm.makeDataFrame().reset_index() + self.check_round_trip(df, write_kwargs={"version": 1}) + + @pytest.mark.network + @tm.network + def test_http_path(self, feather_file): + # GH 29055 + url = ( + "https://raw.githubusercontent.com/pandas-dev/pandas/main/" + "pandas/tests/io/data/feather/feather-0_3_1.feather" + ) + expected = read_feather(feather_file) + res = read_feather(url) + tm.assert_frame_equal(expected, res) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_fsspec.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_fsspec.py new file mode 100644 index 0000000..f1040c0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_fsspec.py @@ -0,0 +1,329 @@ +import io + +import numpy as np +import pytest + +from pandas.compat._optional import VERSIONS + +from pandas import ( + DataFrame, + date_range, + read_csv, + read_excel, + read_feather, + read_json, + read_parquet, + read_pickle, + read_stata, + read_table, +) +import pandas._testing as tm +from pandas.util import _test_decorators as td + +df1 = DataFrame( + { + "int": [1, 3], + "float": [2.0, np.nan], + "str": ["t", "s"], + "dt": date_range("2018-06-18", periods=2), + } +) +text = str(df1.to_csv(index=False)).encode() + + +@pytest.fixture +def cleared_fs(): + fsspec = pytest.importorskip("fsspec") + + memfs = fsspec.filesystem("memory") + yield memfs + memfs.store.clear() + + +def test_read_csv(cleared_fs): + with cleared_fs.open("test/test.csv", "wb") as w: + w.write(text) + df2 = read_csv("memory://test/test.csv", parse_dates=["dt"]) + + tm.assert_frame_equal(df1, df2) + + +def test_reasonable_error(monkeypatch, cleared_fs): + from fsspec import registry + from fsspec.registry import known_implementations + + registry.target.clear() + with pytest.raises(ValueError, match="nosuchprotocol"): + read_csv("nosuchprotocol://test/test.csv") + err_msg = "test error message" + monkeypatch.setitem( + known_implementations, + "couldexist", + {"class": "unimportable.CouldExist", "err": err_msg}, + ) + with pytest.raises(ImportError, match=err_msg): + read_csv("couldexist://test/test.csv") + + +def test_to_csv(cleared_fs): + df1.to_csv("memory://test/test.csv", index=True) + + df2 = read_csv("memory://test/test.csv", parse_dates=["dt"], index_col=0) + + tm.assert_frame_equal(df1, df2) + + +@pytest.mark.parametrize("ext", ["xls", "xlsx"]) +def test_to_excel(cleared_fs, ext): + if ext == "xls": + pytest.importorskip("xlwt") + else: + pytest.importorskip("openpyxl") + + path = f"memory://test/test.{ext}" + df1.to_excel(path, index=True) + + df2 = read_excel(path, parse_dates=["dt"], index_col=0) + + tm.assert_frame_equal(df1, df2) + + +@pytest.mark.parametrize("binary_mode", [False, True]) +def test_to_csv_fsspec_object(cleared_fs, binary_mode): + fsspec = pytest.importorskip("fsspec") + + path = "memory://test/test.csv" + mode = "wb" if binary_mode else "w" + fsspec_object = fsspec.open(path, mode=mode).open() + + df1.to_csv(fsspec_object, index=True) + assert not fsspec_object.closed + fsspec_object.close() + + mode = mode.replace("w", "r") + fsspec_object = fsspec.open(path, mode=mode).open() + + df2 = read_csv( + fsspec_object, + parse_dates=["dt"], + index_col=0, + ) + assert not fsspec_object.closed + fsspec_object.close() + + tm.assert_frame_equal(df1, df2) + + +def test_csv_options(fsspectest): + df = DataFrame({"a": [0]}) + df.to_csv( + "testmem://test/test.csv", storage_options={"test": "csv_write"}, index=False + ) + assert fsspectest.test[0] == "csv_write" + read_csv("testmem://test/test.csv", storage_options={"test": "csv_read"}) + assert fsspectest.test[0] == "csv_read" + + +def test_read_table_options(fsspectest): + # GH #39167 + df = DataFrame({"a": [0]}) + df.to_csv( + "testmem://test/test.csv", storage_options={"test": "csv_write"}, index=False + ) + assert fsspectest.test[0] == "csv_write" + read_table("testmem://test/test.csv", storage_options={"test": "csv_read"}) + assert fsspectest.test[0] == "csv_read" + + +@pytest.mark.parametrize("extension", ["xlsx", "xls"]) +def test_excel_options(fsspectest, extension): + if extension == "xls": + pytest.importorskip("xlwt") + else: + pytest.importorskip("openpyxl") + + df = DataFrame({"a": [0]}) + + path = f"testmem://test/test.{extension}" + + df.to_excel(path, storage_options={"test": "write"}, index=False) + assert fsspectest.test[0] == "write" + read_excel(path, storage_options={"test": "read"}) + assert fsspectest.test[0] == "read" + + +@td.skip_if_no("fastparquet") +def test_to_parquet_new_file(monkeypatch, cleared_fs): + """Regression test for writing to a not-yet-existent GCS Parquet file.""" + df1.to_parquet( + "memory://test/test.csv", index=True, engine="fastparquet", compression=None + ) + + +@td.skip_if_no("pyarrow", min_version="2") +def test_arrowparquet_options(fsspectest): + """Regression test for writing to a not-yet-existent GCS Parquet file.""" + df = DataFrame({"a": [0]}) + df.to_parquet( + "testmem://test/test.csv", + engine="pyarrow", + compression=None, + storage_options={"test": "parquet_write"}, + ) + assert fsspectest.test[0] == "parquet_write" + read_parquet( + "testmem://test/test.csv", + engine="pyarrow", + storage_options={"test": "parquet_read"}, + ) + assert fsspectest.test[0] == "parquet_read" + + +@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) fastparquet +@td.skip_if_no("fastparquet") +def test_fastparquet_options(fsspectest): + """Regression test for writing to a not-yet-existent GCS Parquet file.""" + df = DataFrame({"a": [0]}) + df.to_parquet( + "testmem://test/test.csv", + engine="fastparquet", + compression=None, + storage_options={"test": "parquet_write"}, + ) + assert fsspectest.test[0] == "parquet_write" + read_parquet( + "testmem://test/test.csv", + engine="fastparquet", + storage_options={"test": "parquet_read"}, + ) + assert fsspectest.test[0] == "parquet_read" + + +@td.skip_if_no("s3fs") +def test_from_s3_csv(s3_resource, tips_file, s3so): + tm.assert_equal( + read_csv("s3://pandas-test/tips.csv", storage_options=s3so), read_csv(tips_file) + ) + # the following are decompressed by pandas, not fsspec + tm.assert_equal( + read_csv("s3://pandas-test/tips.csv.gz", storage_options=s3so), + read_csv(tips_file), + ) + tm.assert_equal( + read_csv("s3://pandas-test/tips.csv.bz2", storage_options=s3so), + read_csv(tips_file), + ) + + +@pytest.mark.parametrize("protocol", ["s3", "s3a", "s3n"]) +@td.skip_if_no("s3fs") +def test_s3_protocols(s3_resource, tips_file, protocol, s3so): + tm.assert_equal( + read_csv("%s://pandas-test/tips.csv" % protocol, storage_options=s3so), + read_csv(tips_file), + ) + + +@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) fastparquet +@td.skip_if_no("s3fs") +@td.skip_if_no("fastparquet") +def test_s3_parquet(s3_resource, s3so): + fn = "s3://pandas-test/test.parquet" + df1.to_parquet( + fn, index=False, engine="fastparquet", compression=None, storage_options=s3so + ) + df2 = read_parquet(fn, engine="fastparquet", storage_options=s3so) + tm.assert_equal(df1, df2) + + +@td.skip_if_installed("fsspec") +def test_not_present_exception(): + msg = "Missing optional dependency 'fsspec'|fsspec library is required" + with pytest.raises(ImportError, match=msg): + read_csv("memory://test/test.csv") + + +@td.skip_if_no("pyarrow") +def test_feather_options(fsspectest): + df = DataFrame({"a": [0]}) + df.to_feather("testmem://afile", storage_options={"test": "feather_write"}) + assert fsspectest.test[0] == "feather_write" + out = read_feather("testmem://afile", storage_options={"test": "feather_read"}) + assert fsspectest.test[0] == "feather_read" + tm.assert_frame_equal(df, out) + + +def test_pickle_options(fsspectest): + df = DataFrame({"a": [0]}) + df.to_pickle("testmem://afile", storage_options={"test": "pickle_write"}) + assert fsspectest.test[0] == "pickle_write" + out = read_pickle("testmem://afile", storage_options={"test": "pickle_read"}) + assert fsspectest.test[0] == "pickle_read" + tm.assert_frame_equal(df, out) + + +def test_json_options(fsspectest, compression): + df = DataFrame({"a": [0]}) + df.to_json( + "testmem://afile", + compression=compression, + storage_options={"test": "json_write"}, + ) + assert fsspectest.test[0] == "json_write" + out = read_json( + "testmem://afile", + compression=compression, + storage_options={"test": "json_read"}, + ) + assert fsspectest.test[0] == "json_read" + tm.assert_frame_equal(df, out) + + +def test_stata_options(fsspectest): + df = DataFrame({"a": [0]}) + df.to_stata( + "testmem://afile", storage_options={"test": "stata_write"}, write_index=False + ) + assert fsspectest.test[0] == "stata_write" + out = read_stata("testmem://afile", storage_options={"test": "stata_read"}) + assert fsspectest.test[0] == "stata_read" + tm.assert_frame_equal(df, out.astype("int64")) + + +@td.skip_if_no("tabulate") +def test_markdown_options(request, fsspectest): + import fsspec + + # error: Library stubs not installed for "tabulate" + # (or incompatible with Python 3.8) + import tabulate # type: ignore[import] + + request.node.add_marker( + pytest.mark.xfail( + fsspec.__version__ == VERSIONS["fsspec"] + and tabulate.__version__ == VERSIONS["tabulate"], + reason="Fails on the min version build", + raises=FileNotFoundError, + ) + ) + df = DataFrame({"a": [0]}) + df.to_markdown("testmem://afile", storage_options={"test": "md_write"}) + assert fsspectest.test[0] == "md_write" + assert fsspectest.cat("testmem://afile") + + +@td.skip_if_no("pyarrow") +def test_non_fsspec_options(): + with pytest.raises(ValueError, match="storage_options"): + read_csv("localfile", storage_options={"a": True}) + with pytest.raises(ValueError, match="storage_options"): + # separate test for parquet, which has a different code path + read_parquet("localfile", storage_options={"a": True}) + by = io.BytesIO() + + with pytest.raises(ValueError, match="storage_options"): + read_csv(by, storage_options={"a": True}) + + df = DataFrame({"a": [0]}) + with pytest.raises(ValueError, match="storage_options"): + df.to_parquet("nonfsspecpath", storage_options={"a": True}) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_gcs.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_gcs.py new file mode 100644 index 0000000..5e0da6f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_gcs.py @@ -0,0 +1,194 @@ +from io import BytesIO +import os +import zipfile + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + date_range, + read_csv, + read_excel, + read_json, + read_parquet, +) +import pandas._testing as tm +from pandas.util import _test_decorators as td + +import pandas.io.common as icom + + +@pytest.fixture +def gcs_buffer(monkeypatch): + """Emulate GCS using a binary buffer.""" + from fsspec import ( + AbstractFileSystem, + registry, + ) + + registry.target.clear() # remove state + + gcs_buffer = BytesIO() + gcs_buffer.close = lambda: True + + class MockGCSFileSystem(AbstractFileSystem): + def open(*args, **kwargs): + gcs_buffer.seek(0) + return gcs_buffer + + def ls(self, path, **kwargs): + # needed for pyarrow + return [{"name": path, "type": "file"}] + + monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem) + + return gcs_buffer + + +@td.skip_if_no("gcsfs") +@pytest.mark.parametrize("format", ["csv", "json", "parquet", "excel", "markdown"]) +def test_to_read_gcs(gcs_buffer, format): + """ + Test that many to/read functions support GCS. + + GH 33987 + """ + from fsspec import registry + + registry.target.clear() # remove state + + df1 = DataFrame( + { + "int": [1, 3], + "float": [2.0, np.nan], + "str": ["t", "s"], + "dt": date_range("2018-06-18", periods=2), + } + ) + + path = f"gs://test/test.{format}" + + if format == "csv": + df1.to_csv(path, index=True) + df2 = read_csv(path, parse_dates=["dt"], index_col=0) + elif format == "excel": + path = "gs://test/test.xls" + df1.to_excel(path) + df2 = read_excel(path, parse_dates=["dt"], index_col=0) + elif format == "json": + df1.to_json(path) + df2 = read_json(path, convert_dates=["dt"]) + elif format == "parquet": + pytest.importorskip("pyarrow") + df1.to_parquet(path) + df2 = read_parquet(path) + elif format == "markdown": + pytest.importorskip("tabulate") + df1.to_markdown(path) + df2 = df1 + + tm.assert_frame_equal(df1, df2) + + +def assert_equal_zip_safe(result: bytes, expected: bytes, compression: str): + """ + For zip compression, only compare the CRC-32 checksum of the file contents + to avoid checking the time-dependent last-modified timestamp which + in some CI builds is off-by-one + + See https://en.wikipedia.org/wiki/ZIP_(file_format)#File_headers + """ + if compression == "zip": + # Only compare the CRC checksum of the file contents + with zipfile.ZipFile(BytesIO(result)) as exp, zipfile.ZipFile( + BytesIO(expected) + ) as res: + for res_info, exp_info in zip(res.infolist(), exp.infolist()): + assert res_info.CRC == exp_info.CRC + else: + assert result == expected + + +@td.skip_if_no("gcsfs") +@pytest.mark.parametrize("encoding", ["utf-8", "cp1251"]) +def test_to_csv_compression_encoding_gcs(gcs_buffer, compression_only, encoding): + """ + Compression and encoding should with GCS. + + GH 35677 (to_csv, compression), GH 26124 (to_csv, encoding), and + GH 32392 (read_csv, encoding) + """ + from fsspec import registry + + registry.target.clear() # remove state + df = tm.makeDataFrame() + + # reference of compressed and encoded file + compression = {"method": compression_only} + if compression_only == "gzip": + compression["mtime"] = 1 # be reproducible + buffer = BytesIO() + df.to_csv(buffer, compression=compression, encoding=encoding, mode="wb") + + # write compressed file with explicit compression + path_gcs = "gs://test/test.csv" + df.to_csv(path_gcs, compression=compression, encoding=encoding) + res = gcs_buffer.getvalue() + expected = buffer.getvalue() + assert_equal_zip_safe(res, expected, compression_only) + + read_df = read_csv( + path_gcs, index_col=0, compression=compression_only, encoding=encoding + ) + tm.assert_frame_equal(df, read_df) + + # write compressed file with implicit compression + file_ext = icom._compression_to_extension[compression_only] + compression["method"] = "infer" + path_gcs += f".{file_ext}" + df.to_csv(path_gcs, compression=compression, encoding=encoding) + + res = gcs_buffer.getvalue() + expected = buffer.getvalue() + assert_equal_zip_safe(res, expected, compression_only) + + read_df = read_csv(path_gcs, index_col=0, compression="infer", encoding=encoding) + tm.assert_frame_equal(df, read_df) + + +@td.skip_if_no("fastparquet") +@td.skip_if_no("gcsfs") +def test_to_parquet_gcs_new_file(monkeypatch, tmpdir): + """Regression test for writing to a not-yet-existent GCS Parquet file.""" + from fsspec import ( + AbstractFileSystem, + registry, + ) + + registry.target.clear() # remove state + df1 = DataFrame( + { + "int": [1, 3], + "float": [2.0, np.nan], + "str": ["t", "s"], + "dt": date_range("2018-06-18", periods=2), + } + ) + + class MockGCSFileSystem(AbstractFileSystem): + def open(self, path, mode="r", *args): + if "w" not in mode: + raise FileNotFoundError + return open(os.path.join(tmpdir, "test.parquet"), mode) + + monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem) + df1.to_parquet( + "gs://test/test.csv", index=True, engine="fastparquet", compression=None + ) + + +@td.skip_if_installed("gcsfs") +def test_gcs_not_present_exception(): + with tm.external_error_raised(ImportError): + read_csv("gs://test/test.csv") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_html.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_html.py new file mode 100644 index 0000000..16c04c9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_html.py @@ -0,0 +1,1291 @@ +from functools import partial +from importlib import reload +from io import ( + BytesIO, + StringIO, +) +import os +from pathlib import Path +import re +import threading +from urllib.error import URLError + +import numpy as np +import pytest + +from pandas.compat import is_platform_windows +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + MultiIndex, + Series, + Timestamp, + date_range, + read_csv, + to_datetime, +) +import pandas._testing as tm + +from pandas.io.common import file_path_to_url +import pandas.io.html +from pandas.io.html import read_html + +HERE = os.path.dirname(__file__) + + +@pytest.fixture( + params=[ + "chinese_utf-16.html", + "chinese_utf-32.html", + "chinese_utf-8.html", + "letz_latin1.html", + ] +) +def html_encoding_file(request, datapath): + """Parametrized fixture for HTML encoding test filenames.""" + return datapath("io", "data", "html_encoding", request.param) + + +def assert_framelist_equal(list1, list2, *args, **kwargs): + assert len(list1) == len(list2), ( + "lists are not of equal size " + f"len(list1) == {len(list1)}, " + f"len(list2) == {len(list2)}" + ) + msg = "not all list elements are DataFrames" + both_frames = all( + map( + lambda x, y: isinstance(x, DataFrame) and isinstance(y, DataFrame), + list1, + list2, + ) + ) + assert both_frames, msg + for frame_i, frame_j in zip(list1, list2): + tm.assert_frame_equal(frame_i, frame_j, *args, **kwargs) + assert not frame_i.empty, "frames are both empty" + + +@td.skip_if_no("bs4") +@td.skip_if_no("html5lib") +def test_bs4_version_fails(monkeypatch, datapath): + import bs4 + + monkeypatch.setattr(bs4, "__version__", "4.2") + with pytest.raises(ImportError, match="Pandas requires version"): + read_html(datapath("io", "data", "html", "spam.html"), flavor="bs4") + + +def test_invalid_flavor(): + url = "google.com" + flavor = "invalid flavor" + msg = r"\{" + flavor + r"\} is not a valid set of flavors" + + with pytest.raises(ValueError, match=msg): + read_html(url, match="google", flavor=flavor) + + +@td.skip_if_no("bs4") +@td.skip_if_no("lxml") +@td.skip_if_no("html5lib") +def test_same_ordering(datapath): + filename = datapath("io", "data", "html", "valid_markup.html") + dfs_lxml = read_html(filename, index_col=0, flavor=["lxml"]) + dfs_bs4 = read_html(filename, index_col=0, flavor=["bs4"]) + assert_framelist_equal(dfs_lxml, dfs_bs4) + + +@pytest.mark.parametrize( + "flavor", + [ + pytest.param("bs4", marks=[td.skip_if_no("bs4"), td.skip_if_no("html5lib")]), + pytest.param("lxml", marks=td.skip_if_no("lxml")), + ], + scope="class", +) +class TestReadHtml: + @pytest.fixture(autouse=True) + def set_files(self, datapath): + self.spam_data = datapath("io", "data", "html", "spam.html") + self.spam_data_kwargs = {} + self.spam_data_kwargs["encoding"] = "UTF-8" + self.banklist_data = datapath("io", "data", "html", "banklist.html") + + @pytest.fixture(autouse=True, scope="function") + def set_defaults(self, flavor, request): + self.read_html = partial(read_html, flavor=flavor) + yield + + def test_to_html_compat(self): + df = ( + tm.makeCustomDataframe( + 4, + 3, + data_gen_f=lambda *args: np.random.rand(), + c_idx_names=False, + r_idx_names=False, + ) + .applymap("{:.3f}".format) + .astype(float) + ) + out = df.to_html() + res = self.read_html(out, attrs={"class": "dataframe"}, index_col=0)[0] + tm.assert_frame_equal(res, df) + + @pytest.mark.network + @tm.network + def test_banklist_url_positional_match(self): + url = "http://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501 + # Passing match argument as positional should cause a FutureWarning. + with tm.assert_produces_warning(FutureWarning): + df1 = self.read_html( + # lxml cannot find attrs leave out for now + url, + "First Federal Bank of Florida", # attrs={"class": "dataTable"} + ) + with tm.assert_produces_warning(FutureWarning): + # lxml cannot find attrs leave out for now + df2 = self.read_html( + url, + "Metcalf Bank", + ) # attrs={"class": "dataTable"}) + + assert_framelist_equal(df1, df2) + + @pytest.mark.network + @tm.network + def test_banklist_url(self): + url = "http://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501 + df1 = self.read_html( + # lxml cannot find attrs leave out for now + url, + match="First Federal Bank of Florida", # attrs={"class": "dataTable"} + ) + # lxml cannot find attrs leave out for now + df2 = self.read_html( + url, + match="Metcalf Bank", + ) # attrs={"class": "dataTable"}) + + assert_framelist_equal(df1, df2) + + @pytest.mark.network + @tm.network + def test_spam_url(self): + url = ( + "https://raw.githubusercontent.com/pandas-dev/pandas/main/" + "pandas/tests/io/data/html/spam.html" + ) + df1 = self.read_html(url, match=".*Water.*") + df2 = self.read_html(url, match="Unit") + + assert_framelist_equal(df1, df2) + + @pytest.mark.slow + def test_banklist(self): + df1 = self.read_html( + self.banklist_data, match=".*Florida.*", attrs={"id": "table"} + ) + df2 = self.read_html( + self.banklist_data, match="Metcalf Bank", attrs={"id": "table"} + ) + + assert_framelist_equal(df1, df2) + + def test_spam(self): + df1 = self.read_html(self.spam_data, match=".*Water.*") + df2 = self.read_html(self.spam_data, match="Unit") + assert_framelist_equal(df1, df2) + + assert df1[0].iloc[0, 0] == "Proximates" + assert df1[0].columns[0] == "Nutrient" + + def test_spam_no_match(self): + dfs = self.read_html(self.spam_data) + for df in dfs: + assert isinstance(df, DataFrame) + + def test_banklist_no_match(self): + dfs = self.read_html(self.banklist_data, attrs={"id": "table"}) + for df in dfs: + assert isinstance(df, DataFrame) + + def test_spam_header(self): + df = self.read_html(self.spam_data, match=".*Water.*", header=2)[0] + assert df.columns[0] == "Proximates" + assert not df.empty + + def test_skiprows_int(self): + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=1) + df2 = self.read_html(self.spam_data, match="Unit", skiprows=1) + + assert_framelist_equal(df1, df2) + + def test_skiprows_range(self): + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=range(2)) + df2 = self.read_html(self.spam_data, match="Unit", skiprows=range(2)) + + assert_framelist_equal(df1, df2) + + def test_skiprows_list(self): + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=[1, 2]) + df2 = self.read_html(self.spam_data, match="Unit", skiprows=[2, 1]) + + assert_framelist_equal(df1, df2) + + def test_skiprows_set(self): + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows={1, 2}) + df2 = self.read_html(self.spam_data, match="Unit", skiprows={2, 1}) + + assert_framelist_equal(df1, df2) + + def test_skiprows_slice(self): + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=1) + df2 = self.read_html(self.spam_data, match="Unit", skiprows=1) + + assert_framelist_equal(df1, df2) + + def test_skiprows_slice_short(self): + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=slice(2)) + df2 = self.read_html(self.spam_data, match="Unit", skiprows=slice(2)) + + assert_framelist_equal(df1, df2) + + def test_skiprows_slice_long(self): + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=slice(2, 5)) + df2 = self.read_html(self.spam_data, match="Unit", skiprows=slice(4, 1, -1)) + + assert_framelist_equal(df1, df2) + + def test_skiprows_ndarray(self): + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=np.arange(2)) + df2 = self.read_html(self.spam_data, match="Unit", skiprows=np.arange(2)) + + assert_framelist_equal(df1, df2) + + def test_skiprows_invalid(self): + with pytest.raises(TypeError, match=("is not a valid type for skipping rows")): + self.read_html(self.spam_data, match=".*Water.*", skiprows="asdf") + + def test_index(self): + df1 = self.read_html(self.spam_data, match=".*Water.*", index_col=0) + df2 = self.read_html(self.spam_data, match="Unit", index_col=0) + assert_framelist_equal(df1, df2) + + def test_header_and_index_no_types(self): + df1 = self.read_html(self.spam_data, match=".*Water.*", header=1, index_col=0) + df2 = self.read_html(self.spam_data, match="Unit", header=1, index_col=0) + assert_framelist_equal(df1, df2) + + def test_header_and_index_with_types(self): + df1 = self.read_html(self.spam_data, match=".*Water.*", header=1, index_col=0) + df2 = self.read_html(self.spam_data, match="Unit", header=1, index_col=0) + assert_framelist_equal(df1, df2) + + def test_infer_types(self): + + # 10892 infer_types removed + df1 = self.read_html(self.spam_data, match=".*Water.*", index_col=0) + df2 = self.read_html(self.spam_data, match="Unit", index_col=0) + assert_framelist_equal(df1, df2) + + def test_string_io(self): + with open(self.spam_data, **self.spam_data_kwargs) as f: + data1 = StringIO(f.read()) + + with open(self.spam_data, **self.spam_data_kwargs) as f: + data2 = StringIO(f.read()) + + df1 = self.read_html(data1, match=".*Water.*") + df2 = self.read_html(data2, match="Unit") + assert_framelist_equal(df1, df2) + + def test_string(self): + with open(self.spam_data, **self.spam_data_kwargs) as f: + data = f.read() + + df1 = self.read_html(data, match=".*Water.*") + df2 = self.read_html(data, match="Unit") + + assert_framelist_equal(df1, df2) + + def test_file_like(self): + with open(self.spam_data, **self.spam_data_kwargs) as f: + df1 = self.read_html(f, match=".*Water.*") + + with open(self.spam_data, **self.spam_data_kwargs) as f: + df2 = self.read_html(f, match="Unit") + + assert_framelist_equal(df1, df2) + + @pytest.mark.network + @tm.network + def test_bad_url_protocol(self): + with pytest.raises(URLError, match="urlopen error unknown url type: git"): + self.read_html("git://github.com", match=".*Water.*") + + @pytest.mark.slow + @pytest.mark.network + @tm.network + def test_invalid_url(self): + msg = ( + "Name or service not known|Temporary failure in name resolution|" + "No tables found" + ) + with pytest.raises((URLError, ValueError), match=msg): + self.read_html("http://www.a23950sdfa908sd.com", match=".*Water.*") + + @pytest.mark.slow + def test_file_url(self): + url = self.banklist_data + dfs = self.read_html( + file_path_to_url(os.path.abspath(url)), match="First", attrs={"id": "table"} + ) + assert isinstance(dfs, list) + for df in dfs: + assert isinstance(df, DataFrame) + + @pytest.mark.slow + def test_invalid_table_attrs(self): + url = self.banklist_data + with pytest.raises(ValueError, match="No tables found"): + self.read_html( + url, match="First Federal Bank of Florida", attrs={"id": "tasdfable"} + ) + + def _bank_data(self, *args, **kwargs): + return self.read_html( + self.banklist_data, match="Metcalf", attrs={"id": "table"}, *args, **kwargs + ) + + @pytest.mark.slow + def test_multiindex_header(self): + df = self._bank_data(header=[0, 1])[0] + assert isinstance(df.columns, MultiIndex) + + @pytest.mark.slow + def test_multiindex_index(self): + df = self._bank_data(index_col=[0, 1])[0] + assert isinstance(df.index, MultiIndex) + + @pytest.mark.slow + def test_multiindex_header_index(self): + df = self._bank_data(header=[0, 1], index_col=[0, 1])[0] + assert isinstance(df.columns, MultiIndex) + assert isinstance(df.index, MultiIndex) + + @pytest.mark.slow + def test_multiindex_header_skiprows_tuples(self): + df = self._bank_data(header=[0, 1], skiprows=1)[0] + assert isinstance(df.columns, MultiIndex) + + @pytest.mark.slow + def test_multiindex_header_skiprows(self): + df = self._bank_data(header=[0, 1], skiprows=1)[0] + assert isinstance(df.columns, MultiIndex) + + @pytest.mark.slow + def test_multiindex_header_index_skiprows(self): + df = self._bank_data(header=[0, 1], index_col=[0, 1], skiprows=1)[0] + assert isinstance(df.index, MultiIndex) + assert isinstance(df.columns, MultiIndex) + + @pytest.mark.slow + def test_regex_idempotency(self): + url = self.banklist_data + dfs = self.read_html( + file_path_to_url(os.path.abspath(url)), + match=re.compile(re.compile("Florida")), + attrs={"id": "table"}, + ) + assert isinstance(dfs, list) + for df in dfs: + assert isinstance(df, DataFrame) + + def test_negative_skiprows(self): + msg = r"\(you passed a negative value\)" + with pytest.raises(ValueError, match=msg): + self.read_html(self.spam_data, match="Water", skiprows=-1) + + @pytest.mark.network + @tm.network + def test_multiple_matches(self): + url = "https://docs.python.org/2/" + dfs = self.read_html(url, match="Python") + assert len(dfs) > 1 + + @pytest.mark.network + @tm.network + def test_python_docs_table(self): + url = "https://docs.python.org/2/" + dfs = self.read_html(url, match="Python") + zz = [df.iloc[0, 0][0:4] for df in dfs] + assert sorted(zz) == sorted(["Repo", "What"]) + + def test_empty_tables(self): + """ + Make sure that read_html ignores empty tables. + """ + html = """ + + + + + + + + + + + + + +
AB
12
+ + + +
+ """ + result = self.read_html(html) + assert len(result) == 1 + + def test_multiple_tbody(self): + # GH-20690 + # Read all tbody tags within a single table. + result = self.read_html( + """ + + + + + + + + + + + + + + + + + + +
AB
12
34
""" + )[0] + + expected = DataFrame(data=[[1, 2], [3, 4]], columns=["A", "B"]) + + tm.assert_frame_equal(result, expected) + + def test_header_and_one_column(self): + """ + Don't fail with bs4 when there is a header and only one column + as described in issue #9178 + """ + result = self.read_html( + """ + + + + + + + + + + +
Header
first
""" + )[0] + + expected = DataFrame(data={"Header": "first"}, index=[0]) + + tm.assert_frame_equal(result, expected) + + def test_thead_without_tr(self): + """ + Ensure parser adds
+ + + + + + + + + + + + + + +
CountryMunicipalityYear
UkraineOdessa1944
""" + )[0] + + expected = DataFrame( + data=[["Ukraine", "Odessa", 1944]], + columns=["Country", "Municipality", "Year"], + ) + + tm.assert_frame_equal(result, expected) + + def test_tfoot_read(self): + """ + Make sure that read_html reads tfoot, containing td or th. + Ignores empty tfoot + """ + data_template = """ + + + + + + + + + + + + + + {footer} + +
AB
bodyAbodyB
""" + + expected1 = DataFrame(data=[["bodyA", "bodyB"]], columns=["A", "B"]) + + expected2 = DataFrame( + data=[["bodyA", "bodyB"], ["footA", "footB"]], columns=["A", "B"] + ) + + data1 = data_template.format(footer="") + data2 = data_template.format(footer="footAfootB") + + result1 = self.read_html(data1)[0] + result2 = self.read_html(data2)[0] + + tm.assert_frame_equal(result1, expected1) + tm.assert_frame_equal(result2, expected2) + + def test_parse_header_of_non_string_column(self): + # GH5048: if header is specified explicitly, an int column should be + # parsed as int while its header is parsed as str + result = self.read_html( + """ + + + + + + + + + +
SI
text1944
+ """, + header=0, + )[0] + + expected = DataFrame([["text", 1944]], columns=("S", "I")) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.slow + def test_banklist_header(self, datapath): + from pandas.io.html import _remove_whitespace + + def try_remove_ws(x): + try: + return _remove_whitespace(x) + except AttributeError: + return x + + df = self.read_html(self.banklist_data, match="Metcalf", attrs={"id": "table"})[ + 0 + ] + ground_truth = read_csv( + datapath("io", "data", "csv", "banklist.csv"), + converters={"Updated Date": Timestamp, "Closing Date": Timestamp}, + ) + assert df.shape == ground_truth.shape + old = [ + "First Vietnamese American BankIn Vietnamese", + "Westernbank Puerto RicoEn Espanol", + "R-G Premier Bank of Puerto RicoEn Espanol", + "EurobankEn Espanol", + "Sanderson State BankEn Espanol", + "Washington Mutual Bank(Including its subsidiary Washington " + "Mutual Bank FSB)", + "Silver State BankEn Espanol", + "AmTrade International BankEn Espanol", + "Hamilton Bank, NAEn Espanol", + "The Citizens Savings BankPioneer Community Bank, Inc.", + ] + new = [ + "First Vietnamese American Bank", + "Westernbank Puerto Rico", + "R-G Premier Bank of Puerto Rico", + "Eurobank", + "Sanderson State Bank", + "Washington Mutual Bank", + "Silver State Bank", + "AmTrade International Bank", + "Hamilton Bank, NA", + "The Citizens Savings Bank", + ] + dfnew = df.applymap(try_remove_ws).replace(old, new) + gtnew = ground_truth.applymap(try_remove_ws) + converted = dfnew._convert(datetime=True, numeric=True) + date_cols = ["Closing Date", "Updated Date"] + converted[date_cols] = converted[date_cols].apply(to_datetime) + tm.assert_frame_equal(converted, gtnew) + + @pytest.mark.slow + def test_gold_canyon(self): + gc = "Gold Canyon" + with open(self.banklist_data) as f: + raw_text = f.read() + + assert gc in raw_text + df = self.read_html( + self.banklist_data, match="Gold Canyon", attrs={"id": "table"} + )[0] + assert gc in df.to_string() + + def test_different_number_of_cols(self): + expected = self.read_html( + """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
C_l0_g0C_l0_g1C_l0_g2C_l0_g3C_l0_g4
R_l0_g0 0.763 0.233 nan nan nan
R_l0_g1 0.244 0.285 0.392 0.137 0.222
""", + index_col=0, + )[0] + + result = self.read_html( + """ + + + + + + + + + + + + + + + + + + + + + + + + + +
C_l0_g0C_l0_g1C_l0_g2C_l0_g3C_l0_g4
R_l0_g0 0.763 0.233
R_l0_g1 0.244 0.285 0.392 0.137 0.222
""", + index_col=0, + )[0] + + tm.assert_frame_equal(result, expected) + + def test_colspan_rowspan_1(self): + # GH17054 + result = self.read_html( + """ + + + + + + + + + + + +
ABC
abc
+ """ + )[0] + + expected = DataFrame([["a", "b", "c"]], columns=["A", "B", "C"]) + + tm.assert_frame_equal(result, expected) + + def test_colspan_rowspan_copy_values(self): + # GH17054 + + # In ASCII, with lowercase letters being copies: + # + # X x Y Z W + # A B b z C + + result = self.read_html( + """ + + + + + + + + + + + + +
XYZW
ABC
+ """, + header=0, + )[0] + + expected = DataFrame( + data=[["A", "B", "B", "Z", "C"]], columns=["X", "X.1", "Y", "Z", "W"] + ) + + tm.assert_frame_equal(result, expected) + + def test_colspan_rowspan_both_not_1(self): + # GH17054 + + # In ASCII, with lowercase letters being copies: + # + # A B b b C + # a b b b D + + result = self.read_html( + """ + + + + + + + + + +
ABC
D
+ """, + header=0, + )[0] + + expected = DataFrame( + data=[["A", "B", "B", "B", "D"]], columns=["A", "B", "B.1", "B.2", "C"] + ) + + tm.assert_frame_equal(result, expected) + + def test_rowspan_at_end_of_row(self): + # GH17054 + + # In ASCII, with lowercase letters being copies: + # + # A B + # C b + + result = self.read_html( + """ + + + + + + + + +
AB
C
+ """, + header=0, + )[0] + + expected = DataFrame(data=[["C", "B"]], columns=["A", "B"]) + + tm.assert_frame_equal(result, expected) + + def test_rowspan_only_rows(self): + # GH17054 + + result = self.read_html( + """ + + + + + +
AB
+ """, + header=0, + )[0] + + expected = DataFrame(data=[["A", "B"], ["A", "B"]], columns=["A", "B"]) + + tm.assert_frame_equal(result, expected) + + def test_header_inferred_from_rows_with_only_th(self): + # GH17054 + result = self.read_html( + """ + + + + + + + + + + + + + +
AB
ab
12
+ """ + )[0] + + columns = MultiIndex(levels=[["A", "B"], ["a", "b"]], codes=[[0, 1], [0, 1]]) + expected = DataFrame(data=[[1, 2]], columns=columns) + + tm.assert_frame_equal(result, expected) + + def test_parse_dates_list(self): + df = DataFrame({"date": date_range("1/1/2001", periods=10)}) + expected = df.to_html() + res = self.read_html(expected, parse_dates=[1], index_col=0) + tm.assert_frame_equal(df, res[0]) + res = self.read_html(expected, parse_dates=["date"], index_col=0) + tm.assert_frame_equal(df, res[0]) + + def test_parse_dates_combine(self): + raw_dates = Series(date_range("1/1/2001", periods=10)) + df = DataFrame( + { + "date": raw_dates.map(lambda x: str(x.date())), + "time": raw_dates.map(lambda x: str(x.time())), + } + ) + res = self.read_html( + df.to_html(), parse_dates={"datetime": [1, 2]}, index_col=1 + ) + newdf = DataFrame({"datetime": raw_dates}) + tm.assert_frame_equal(newdf, res[0]) + + def test_wikipedia_states_table(self, datapath): + data = datapath("io", "data", "html", "wikipedia_states.html") + assert os.path.isfile(data), f"{repr(data)} is not a file" + assert os.path.getsize(data), f"{repr(data)} is an empty file" + result = self.read_html(data, match="Arizona", header=1)[0] + assert result.shape == (60, 12) + assert "Unnamed" in result.columns[-1] + assert result["sq mi"].dtype == np.dtype("float64") + assert np.allclose(result.loc[0, "sq mi"], 665384.04) + + def test_wikipedia_states_multiindex(self, datapath): + data = datapath("io", "data", "html", "wikipedia_states.html") + result = self.read_html(data, match="Arizona", index_col=0)[0] + assert result.shape == (60, 11) + assert "Unnamed" in result.columns[-1][1] + assert result.columns.nlevels == 2 + assert np.allclose(result.loc["Alaska", ("Total area[2]", "sq mi")], 665384.04) + + def test_parser_error_on_empty_header_row(self): + result = self.read_html( + """ + + + + + + + + +
AB
ab
+ """, + header=[0, 1], + ) + expected = DataFrame( + [["a", "b"]], + columns=MultiIndex.from_tuples( + [("Unnamed: 0_level_0", "A"), ("Unnamed: 1_level_0", "B")] + ), + ) + tm.assert_frame_equal(result[0], expected) + + def test_decimal_rows(self): + # GH 12907 + result = self.read_html( + """ + + + + + + + + + + + + +
Header
1100#101
+ + """, + decimal="#", + )[0] + + expected = DataFrame(data={"Header": 1100.101}, index=[0]) + + assert result["Header"].dtype == np.dtype("float64") + tm.assert_frame_equal(result, expected) + + def test_bool_header_arg(self): + # GH 6114 + msg = re.escape( + "Passing a bool to header is invalid. Use header=None for no header or " + "header=int or list-like of ints to specify the row(s) making up the " + "column names" + ) + for arg in [True, False]: + with pytest.raises(TypeError, match=msg): + self.read_html(self.spam_data, header=arg) + + def test_converters(self): + # GH 13461 + result = self.read_html( + """ + + + + + + + + + + + + + +
a
0.763
0.244
""", + converters={"a": str}, + )[0] + + expected = DataFrame({"a": ["0.763", "0.244"]}) + + tm.assert_frame_equal(result, expected) + + def test_na_values(self): + # GH 13461 + result = self.read_html( + """ + + + + + + + + + + + + + +
a
0.763
0.244
""", + na_values=[0.244], + )[0] + + expected = DataFrame({"a": [0.763, np.nan]}) + + tm.assert_frame_equal(result, expected) + + def test_keep_default_na(self): + html_data = """ + + + + + + + + + + + + + +
a
N/A
NA
""" + + expected_df = DataFrame({"a": ["N/A", "NA"]}) + html_df = self.read_html(html_data, keep_default_na=False)[0] + tm.assert_frame_equal(expected_df, html_df) + + expected_df = DataFrame({"a": [np.nan, np.nan]}) + html_df = self.read_html(html_data, keep_default_na=True)[0] + tm.assert_frame_equal(expected_df, html_df) + + def test_preserve_empty_rows(self): + result = self.read_html( + """ + + + + + + + + + + + + + +
AB
ab
+ """ + )[0] + + expected = DataFrame(data=[["a", "b"], [np.nan, np.nan]], columns=["A", "B"]) + + tm.assert_frame_equal(result, expected) + + def test_ignore_empty_rows_when_inferring_header(self): + result = self.read_html( + """ + + + + + + + + + +
AB
ab
12
+ """ + )[0] + + columns = MultiIndex(levels=[["A", "B"], ["a", "b"]], codes=[[0, 1], [0, 1]]) + expected = DataFrame(data=[[1, 2]], columns=columns) + + tm.assert_frame_equal(result, expected) + + def test_multiple_header_rows(self): + # Issue #13434 + expected_df = DataFrame( + data=[("Hillary", 68, "D"), ("Bernie", 74, "D"), ("Donald", 69, "R")] + ) + expected_df.columns = [ + ["Unnamed: 0_level_0", "Age", "Party"], + ["Name", "Unnamed: 1_level_1", "Unnamed: 2_level_1"], + ] + html = expected_df.to_html(index=False) + html_df = self.read_html(html)[0] + tm.assert_frame_equal(expected_df, html_df) + + def test_works_on_valid_markup(self, datapath): + filename = datapath("io", "data", "html", "valid_markup.html") + dfs = self.read_html(filename, index_col=0) + assert isinstance(dfs, list) + assert isinstance(dfs[0], DataFrame) + + @pytest.mark.slow + def test_fallback_success(self, datapath): + banklist_data = datapath("io", "data", "html", "banklist.html") + self.read_html(banklist_data, match=".*Water.*", flavor=["lxml", "html5lib"]) + + def test_to_html_timestamp(self): + rng = date_range("2000-01-01", periods=10) + df = DataFrame(np.random.randn(10, 4), index=rng) + + result = df.to_html() + assert "2000-01-01" in result + + @pytest.mark.parametrize( + "displayed_only,exp0,exp1", + [ + (True, DataFrame(["foo"]), None), + (False, DataFrame(["foo bar baz qux"]), DataFrame(["foo"])), + ], + ) + def test_displayed_only(self, displayed_only, exp0, exp1): + # GH 20027 + data = StringIO( + """ + + + + + +
+ foo + bar + baz + qux +
+ + + + +
foo
+ + """ + ) + + dfs = self.read_html(data, displayed_only=displayed_only) + tm.assert_frame_equal(dfs[0], exp0) + + if exp1 is not None: + tm.assert_frame_equal(dfs[1], exp1) + else: + assert len(dfs) == 1 # Should not parse hidden table + + @pytest.mark.filterwarnings( + "ignore:You provided Unicode markup but also provided a value for " + "from_encoding.*:UserWarning" + ) + def test_encode(self, html_encoding_file): + base_path = os.path.basename(html_encoding_file) + root = os.path.splitext(base_path)[0] + _, encoding = root.split("_") + + try: + with open(html_encoding_file, "rb") as fobj: + from_string = self.read_html( + fobj.read(), encoding=encoding, index_col=0 + ).pop() + + with open(html_encoding_file, "rb") as fobj: + from_file_like = self.read_html( + BytesIO(fobj.read()), encoding=encoding, index_col=0 + ).pop() + + from_filename = self.read_html( + html_encoding_file, encoding=encoding, index_col=0 + ).pop() + tm.assert_frame_equal(from_string, from_file_like) + tm.assert_frame_equal(from_string, from_filename) + except Exception: + # seems utf-16/32 fail on windows + if is_platform_windows(): + if "16" in encoding or "32" in encoding: + pytest.skip() + raise + + def test_parse_failure_unseekable(self): + # Issue #17975 + + if self.read_html.keywords.get("flavor") == "lxml": + pytest.skip("Not applicable for lxml") + + class UnseekableStringIO(StringIO): + def seekable(self): + return False + + bad = UnseekableStringIO( + """ +
spameggs
""" + ) + + assert self.read_html(bad) + + with pytest.raises(ValueError, match="passed a non-rewindable file object"): + self.read_html(bad) + + def test_parse_failure_rewinds(self): + # Issue #17975 + + class MockFile: + def __init__(self, data): + self.data = data + self.at_end = False + + def read(self, size=None): + data = "" if self.at_end else self.data + self.at_end = True + return data + + def seek(self, offset): + self.at_end = False + + def seekable(self): + return True + + def __iter__(self): + # to fool `is_file_like`, should never end up here + assert False + + good = MockFile("
spam
eggs
") + bad = MockFile("
spameggs
") + + assert self.read_html(good) + assert self.read_html(bad) + + @pytest.mark.slow + def test_importcheck_thread_safety(self, datapath): + # see gh-16928 + + class ErrorThread(threading.Thread): + def run(self): + try: + super().run() + except Exception as err: + self.err = err + else: + self.err = None + + # force import check by reinitalising global vars in html.py + reload(pandas.io.html) + + filename = datapath("io", "data", "html", "valid_markup.html") + helper_thread1 = ErrorThread(target=self.read_html, args=(filename,)) + helper_thread2 = ErrorThread(target=self.read_html, args=(filename,)) + + helper_thread1.start() + helper_thread2.start() + + while helper_thread1.is_alive() or helper_thread2.is_alive(): + pass + assert None is helper_thread1.err is helper_thread2.err + + def test_parse_path_object(self, datapath): + # GH 37705 + file_path_string = datapath("io", "data", "html", "spam.html") + file_path = Path(file_path_string) + df1 = self.read_html(file_path_string)[0] + df2 = self.read_html(file_path)[0] + tm.assert_frame_equal(df1, df2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_orc.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_orc.py new file mode 100644 index 0000000..f34e9b9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_orc.py @@ -0,0 +1,226 @@ +""" test orc compat """ +import datetime +import os + +import numpy as np +import pytest + +import pandas as pd +from pandas import read_orc +import pandas._testing as tm + +pytest.importorskip("pyarrow.orc") + +pytestmark = pytest.mark.filterwarnings( + "ignore:RangeIndex.* is deprecated:DeprecationWarning" +) + + +@pytest.fixture +def dirpath(datapath): + return datapath("io", "data", "orc") + + +def test_orc_reader_empty(dirpath): + columns = [ + "boolean1", + "byte1", + "short1", + "int1", + "long1", + "float1", + "double1", + "bytes1", + "string1", + ] + dtypes = [ + "bool", + "int8", + "int16", + "int32", + "int64", + "float32", + "float64", + "object", + "object", + ] + expected = pd.DataFrame(index=pd.RangeIndex(0)) + for colname, dtype in zip(columns, dtypes): + expected[colname] = pd.Series(dtype=dtype) + + inputfile = os.path.join(dirpath, "TestOrcFile.emptyFile.orc") + got = read_orc(inputfile, columns=columns) + + tm.assert_equal(expected, got) + + +def test_orc_reader_basic(dirpath): + data = { + "boolean1": np.array([False, True], dtype="bool"), + "byte1": np.array([1, 100], dtype="int8"), + "short1": np.array([1024, 2048], dtype="int16"), + "int1": np.array([65536, 65536], dtype="int32"), + "long1": np.array([9223372036854775807, 9223372036854775807], dtype="int64"), + "float1": np.array([1.0, 2.0], dtype="float32"), + "double1": np.array([-15.0, -5.0], dtype="float64"), + "bytes1": np.array([b"\x00\x01\x02\x03\x04", b""], dtype="object"), + "string1": np.array(["hi", "bye"], dtype="object"), + } + expected = pd.DataFrame.from_dict(data) + + inputfile = os.path.join(dirpath, "TestOrcFile.test1.orc") + got = read_orc(inputfile, columns=data.keys()) + + tm.assert_equal(expected, got) + + +def test_orc_reader_decimal(dirpath): + from decimal import Decimal + + # Only testing the first 10 rows of data + data = { + "_col0": np.array( + [ + Decimal("-1000.50000"), + Decimal("-999.60000"), + Decimal("-998.70000"), + Decimal("-997.80000"), + Decimal("-996.90000"), + Decimal("-995.10000"), + Decimal("-994.11000"), + Decimal("-993.12000"), + Decimal("-992.13000"), + Decimal("-991.14000"), + ], + dtype="object", + ) + } + expected = pd.DataFrame.from_dict(data) + + inputfile = os.path.join(dirpath, "TestOrcFile.decimal.orc") + got = read_orc(inputfile).iloc[:10] + + tm.assert_equal(expected, got) + + +def test_orc_reader_date_low(dirpath): + data = { + "time": np.array( + [ + "1900-05-05 12:34:56.100000", + "1900-05-05 12:34:56.100100", + "1900-05-05 12:34:56.100200", + "1900-05-05 12:34:56.100300", + "1900-05-05 12:34:56.100400", + "1900-05-05 12:34:56.100500", + "1900-05-05 12:34:56.100600", + "1900-05-05 12:34:56.100700", + "1900-05-05 12:34:56.100800", + "1900-05-05 12:34:56.100900", + ], + dtype="datetime64[ns]", + ), + "date": np.array( + [ + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + ], + dtype="object", + ), + } + expected = pd.DataFrame.from_dict(data) + + inputfile = os.path.join(dirpath, "TestOrcFile.testDate1900.orc") + got = read_orc(inputfile).iloc[:10] + + tm.assert_equal(expected, got) + + +def test_orc_reader_date_high(dirpath): + data = { + "time": np.array( + [ + "2038-05-05 12:34:56.100000", + "2038-05-05 12:34:56.100100", + "2038-05-05 12:34:56.100200", + "2038-05-05 12:34:56.100300", + "2038-05-05 12:34:56.100400", + "2038-05-05 12:34:56.100500", + "2038-05-05 12:34:56.100600", + "2038-05-05 12:34:56.100700", + "2038-05-05 12:34:56.100800", + "2038-05-05 12:34:56.100900", + ], + dtype="datetime64[ns]", + ), + "date": np.array( + [ + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + ], + dtype="object", + ), + } + expected = pd.DataFrame.from_dict(data) + + inputfile = os.path.join(dirpath, "TestOrcFile.testDate2038.orc") + got = read_orc(inputfile).iloc[:10] + + tm.assert_equal(expected, got) + + +def test_orc_reader_snappy_compressed(dirpath): + data = { + "int1": np.array( + [ + -1160101563, + 1181413113, + 2065821249, + -267157795, + 172111193, + 1752363137, + 1406072123, + 1911809390, + -1308542224, + -467100286, + ], + dtype="int32", + ), + "string1": np.array( + [ + "f50dcb8", + "382fdaaa", + "90758c6", + "9e8caf3f", + "ee97332b", + "d634da1", + "2bea4396", + "d67d89e8", + "ad71007e", + "e8c82066", + ], + dtype="object", + ), + } + expected = pd.DataFrame.from_dict(data) + + inputfile = os.path.join(dirpath, "TestOrcFile.testSnappy.orc") + got = read_orc(inputfile).iloc[:10] + + tm.assert_equal(expected, got) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_parquet.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_parquet.py new file mode 100644 index 0000000..e6dbcef --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_parquet.py @@ -0,0 +1,1110 @@ +""" test parquet compat """ +import datetime +from io import BytesIO +import os +import pathlib +from warnings import ( + catch_warnings, + filterwarnings, +) + +import numpy as np +import pytest + +from pandas._config import get_option + +from pandas.compat.pyarrow import ( + pa_version_under2p0, + pa_version_under5p0, + pa_version_under6p0, +) +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm +from pandas.util.version import Version + +from pandas.io.parquet import ( + FastParquetImpl, + PyArrowImpl, + get_engine, + read_parquet, + to_parquet, +) + +try: + import pyarrow + + _HAVE_PYARROW = True +except ImportError: + _HAVE_PYARROW = False + +try: + with catch_warnings(): + # `np.bool` is a deprecated alias... + filterwarnings("ignore", "`np.bool`", category=DeprecationWarning) + # accessing pd.Int64Index in pd namespace + filterwarnings("ignore", ".*Int64Index.*", category=FutureWarning) + + import fastparquet + + _HAVE_FASTPARQUET = True +except ImportError: + _HAVE_FASTPARQUET = False + + +pytestmark = pytest.mark.filterwarnings( + "ignore:RangeIndex.* is deprecated:DeprecationWarning" +) + + +# TODO(ArrayManager) fastparquet relies on BlockManager internals + +# setup engines & skips +@pytest.fixture( + params=[ + pytest.param( + "fastparquet", + marks=pytest.mark.skipif( + not _HAVE_FASTPARQUET or get_option("mode.data_manager") == "array", + reason="fastparquet is not installed or ArrayManager is used", + ), + ), + pytest.param( + "pyarrow", + marks=pytest.mark.skipif( + not _HAVE_PYARROW, reason="pyarrow is not installed" + ), + ), + ] +) +def engine(request): + return request.param + + +@pytest.fixture +def pa(): + if not _HAVE_PYARROW: + pytest.skip("pyarrow is not installed") + return "pyarrow" + + +@pytest.fixture +def fp(): + if not _HAVE_FASTPARQUET: + pytest.skip("fastparquet is not installed") + elif get_option("mode.data_manager") == "array": + pytest.skip("ArrayManager is not supported with fastparquet") + return "fastparquet" + + +@pytest.fixture +def df_compat(): + return pd.DataFrame({"A": [1, 2, 3], "B": "foo"}) + + +@pytest.fixture +def df_cross_compat(): + df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + # 'c': np.arange(3, 6).astype('u1'), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("20130101", periods=3), + # 'g': pd.date_range('20130101', periods=3, + # tz='US/Eastern'), + # 'h': pd.date_range('20130101', periods=3, freq='ns') + } + ) + return df + + +@pytest.fixture +def df_full(): + return pd.DataFrame( + { + "string": list("abc"), + "string_with_nan": ["a", np.nan, "c"], + "string_with_none": ["a", None, "c"], + "bytes": [b"foo", b"bar", b"baz"], + "unicode": ["foo", "bar", "baz"], + "int": list(range(1, 4)), + "uint": np.arange(3, 6).astype("u1"), + "float": np.arange(4.0, 7.0, dtype="float64"), + "float_with_nan": [2.0, np.nan, 3.0], + "bool": [True, False, True], + "datetime": pd.date_range("20130101", periods=3), + "datetime_with_nat": [ + pd.Timestamp("20130101"), + pd.NaT, + pd.Timestamp("20130103"), + ], + } + ) + + +@pytest.fixture( + params=[ + datetime.datetime.now(datetime.timezone.utc), + datetime.datetime.now(datetime.timezone.min), + datetime.datetime.now(datetime.timezone.max), + datetime.datetime.strptime("2019-01-04T16:41:24+0200", "%Y-%m-%dT%H:%M:%S%z"), + datetime.datetime.strptime("2019-01-04T16:41:24+0215", "%Y-%m-%dT%H:%M:%S%z"), + datetime.datetime.strptime("2019-01-04T16:41:24-0200", "%Y-%m-%dT%H:%M:%S%z"), + datetime.datetime.strptime("2019-01-04T16:41:24-0215", "%Y-%m-%dT%H:%M:%S%z"), + ] +) +def timezone_aware_date_list(request): + return request.param + + +def check_round_trip( + df, + engine=None, + path=None, + write_kwargs=None, + read_kwargs=None, + expected=None, + check_names=True, + check_like=False, + check_dtype=True, + repeat=2, +): + """Verify parquet serializer and deserializer produce the same results. + + Performs a pandas to disk and disk to pandas round trip, + then compares the 2 resulting DataFrames to verify equality. + + Parameters + ---------- + df: Dataframe + engine: str, optional + 'pyarrow' or 'fastparquet' + path: str, optional + write_kwargs: dict of str:str, optional + read_kwargs: dict of str:str, optional + expected: DataFrame, optional + Expected deserialization result, otherwise will be equal to `df` + check_names: list of str, optional + Closed set of column names to be compared + check_like: bool, optional + If True, ignore the order of index & columns. + repeat: int, optional + How many times to repeat the test + """ + write_kwargs = write_kwargs or {"compression": None} + read_kwargs = read_kwargs or {} + + if expected is None: + expected = df + + if engine: + write_kwargs["engine"] = engine + read_kwargs["engine"] = engine + + def compare(repeat): + for _ in range(repeat): + df.to_parquet(path, **write_kwargs) + with catch_warnings(record=True): + actual = read_parquet(path, **read_kwargs) + + tm.assert_frame_equal( + expected, + actual, + check_names=check_names, + check_like=check_like, + check_dtype=check_dtype, + ) + + if path is None: + with tm.ensure_clean() as path: + compare(repeat) + else: + compare(repeat) + + +def check_partition_names(path, expected): + """Check partitions of a parquet file are as expected. + + Parameters + ---------- + path: str + Path of the dataset. + expected: iterable of str + Expected partition names. + """ + if pa_version_under5p0: + import pyarrow.parquet as pq + + dataset = pq.ParquetDataset(path, validate_schema=False) + assert len(dataset.partitions.partition_names) == len(expected) + assert dataset.partitions.partition_names == set(expected) + else: + import pyarrow.dataset as ds + + dataset = ds.dataset(path, partitioning="hive") + assert dataset.partitioning.schema.names == expected + + +def test_invalid_engine(df_compat): + msg = "engine must be one of 'pyarrow', 'fastparquet'" + with pytest.raises(ValueError, match=msg): + check_round_trip(df_compat, "foo", "bar") + + +def test_options_py(df_compat, pa): + # use the set option + + with pd.option_context("io.parquet.engine", "pyarrow"): + check_round_trip(df_compat) + + +def test_options_fp(df_compat, fp): + # use the set option + + with pd.option_context("io.parquet.engine", "fastparquet"): + check_round_trip(df_compat) + + +def test_options_auto(df_compat, fp, pa): + # use the set option + + with pd.option_context("io.parquet.engine", "auto"): + check_round_trip(df_compat) + + +def test_options_get_engine(fp, pa): + assert isinstance(get_engine("pyarrow"), PyArrowImpl) + assert isinstance(get_engine("fastparquet"), FastParquetImpl) + + with pd.option_context("io.parquet.engine", "pyarrow"): + assert isinstance(get_engine("auto"), PyArrowImpl) + assert isinstance(get_engine("pyarrow"), PyArrowImpl) + assert isinstance(get_engine("fastparquet"), FastParquetImpl) + + with pd.option_context("io.parquet.engine", "fastparquet"): + assert isinstance(get_engine("auto"), FastParquetImpl) + assert isinstance(get_engine("pyarrow"), PyArrowImpl) + assert isinstance(get_engine("fastparquet"), FastParquetImpl) + + with pd.option_context("io.parquet.engine", "auto"): + assert isinstance(get_engine("auto"), PyArrowImpl) + assert isinstance(get_engine("pyarrow"), PyArrowImpl) + assert isinstance(get_engine("fastparquet"), FastParquetImpl) + + +def test_get_engine_auto_error_message(): + # Expect different error messages from get_engine(engine="auto") + # if engines aren't installed vs. are installed but bad version + from pandas.compat._optional import VERSIONS + + # Do we have engines installed, but a bad version of them? + pa_min_ver = VERSIONS.get("pyarrow") + fp_min_ver = VERSIONS.get("fastparquet") + have_pa_bad_version = ( + False + if not _HAVE_PYARROW + else Version(pyarrow.__version__) < Version(pa_min_ver) + ) + have_fp_bad_version = ( + False + if not _HAVE_FASTPARQUET + else Version(fastparquet.__version__) < Version(fp_min_ver) + ) + # Do we have usable engines installed? + have_usable_pa = _HAVE_PYARROW and not have_pa_bad_version + have_usable_fp = _HAVE_FASTPARQUET and not have_fp_bad_version + + if not have_usable_pa and not have_usable_fp: + # No usable engines found. + if have_pa_bad_version: + match = f"Pandas requires version .{pa_min_ver}. or newer of .pyarrow." + with pytest.raises(ImportError, match=match): + get_engine("auto") + else: + match = "Missing optional dependency .pyarrow." + with pytest.raises(ImportError, match=match): + get_engine("auto") + + if have_fp_bad_version: + match = f"Pandas requires version .{fp_min_ver}. or newer of .fastparquet." + with pytest.raises(ImportError, match=match): + get_engine("auto") + else: + match = "Missing optional dependency .fastparquet." + with pytest.raises(ImportError, match=match): + get_engine("auto") + + +def test_cross_engine_pa_fp(df_cross_compat, pa, fp): + # cross-compat with differing reading/writing engines + + df = df_cross_compat + with tm.ensure_clean() as path: + df.to_parquet(path, engine=pa, compression=None) + + result = read_parquet(path, engine=fp) + tm.assert_frame_equal(result, df) + + result = read_parquet(path, engine=fp, columns=["a", "d"]) + tm.assert_frame_equal(result, df[["a", "d"]]) + + +def test_cross_engine_fp_pa(request, df_cross_compat, pa, fp): + # cross-compat with differing reading/writing engines + df = df_cross_compat + with tm.ensure_clean() as path: + df.to_parquet(path, engine=fp, compression=None) + + with catch_warnings(record=True): + result = read_parquet(path, engine=pa) + tm.assert_frame_equal(result, df) + + result = read_parquet(path, engine=pa, columns=["a", "d"]) + tm.assert_frame_equal(result, df[["a", "d"]]) + + +class Base: + def check_error_on_write(self, df, engine, exc, err_msg): + # check that we are raising the exception on writing + with tm.ensure_clean() as path: + with pytest.raises(exc, match=err_msg): + to_parquet(df, path, engine, compression=None) + + def check_external_error_on_write(self, df, engine, exc): + # check that an external library is raising the exception on writing + with tm.ensure_clean() as path: + with tm.external_error_raised(exc): + to_parquet(df, path, engine, compression=None) + + @pytest.mark.network + @tm.network + def test_parquet_read_from_url(self, df_compat, engine): + if engine != "auto": + pytest.importorskip(engine) + url = ( + "https://raw.githubusercontent.com/pandas-dev/pandas/" + "main/pandas/tests/io/data/parquet/simple.parquet" + ) + df = read_parquet(url) + tm.assert_frame_equal(df, df_compat) + + +class TestBasic(Base): + def test_error(self, engine): + for obj in [ + pd.Series([1, 2, 3]), + 1, + "foo", + pd.Timestamp("20130101"), + np.array([1, 2, 3]), + ]: + msg = "to_parquet only supports IO with DataFrames" + self.check_error_on_write(obj, engine, ValueError, msg) + + def test_columns_dtypes(self, engine): + df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))}) + + # unicode + df.columns = ["foo", "bar"] + check_round_trip(df, engine) + + def test_columns_dtypes_invalid(self, engine): + df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))}) + + msg = "parquet must have string column names" + # numeric + df.columns = [0, 1] + self.check_error_on_write(df, engine, ValueError, msg) + + # bytes + df.columns = [b"foo", b"bar"] + self.check_error_on_write(df, engine, ValueError, msg) + + # python object + df.columns = [ + datetime.datetime(2011, 1, 1, 0, 0), + datetime.datetime(2011, 1, 1, 1, 1), + ] + self.check_error_on_write(df, engine, ValueError, msg) + + @pytest.mark.parametrize("compression", [None, "gzip", "snappy", "brotli"]) + def test_compression(self, engine, compression): + + if compression == "snappy": + pytest.importorskip("snappy") + + elif compression == "brotli": + pytest.importorskip("brotli") + + df = pd.DataFrame({"A": [1, 2, 3]}) + check_round_trip(df, engine, write_kwargs={"compression": compression}) + + def test_read_columns(self, engine): + # GH18154 + df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))}) + + expected = pd.DataFrame({"string": list("abc")}) + check_round_trip( + df, engine, expected=expected, read_kwargs={"columns": ["string"]} + ) + + def test_write_index(self, engine): + check_names = engine != "fastparquet" + + df = pd.DataFrame({"A": [1, 2, 3]}) + check_round_trip(df, engine) + + indexes = [ + [2, 3, 4], + pd.date_range("20130101", periods=3), + list("abc"), + [1, 3, 4], + ] + # non-default index + for index in indexes: + df.index = index + if isinstance(index, pd.DatetimeIndex): + df.index = df.index._with_freq(None) # freq doesn't round-trip + check_round_trip(df, engine, check_names=check_names) + + # index with meta-data + df.index = [0, 1, 2] + df.index.name = "foo" + check_round_trip(df, engine) + + def test_write_multiindex(self, pa): + # Not supported in fastparquet as of 0.1.3 or older pyarrow version + engine = pa + + df = pd.DataFrame({"A": [1, 2, 3]}) + index = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]) + df.index = index + check_round_trip(df, engine) + + def test_multiindex_with_columns(self, pa): + engine = pa + dates = pd.date_range("01-Jan-2018", "01-Dec-2018", freq="MS") + df = pd.DataFrame(np.random.randn(2 * len(dates), 3), columns=list("ABC")) + index1 = pd.MultiIndex.from_product( + [["Level1", "Level2"], dates], names=["level", "date"] + ) + index2 = index1.copy(names=None) + for index in [index1, index2]: + df.index = index + + check_round_trip(df, engine) + check_round_trip( + df, engine, read_kwargs={"columns": ["A", "B"]}, expected=df[["A", "B"]] + ) + + def test_write_ignoring_index(self, engine): + # ENH 20768 + # Ensure index=False omits the index from the written Parquet file. + df = pd.DataFrame({"a": [1, 2, 3], "b": ["q", "r", "s"]}) + + write_kwargs = {"compression": None, "index": False} + + # Because we're dropping the index, we expect the loaded dataframe to + # have the default integer index. + expected = df.reset_index(drop=True) + + check_round_trip(df, engine, write_kwargs=write_kwargs, expected=expected) + + # Ignore custom index + df = pd.DataFrame( + {"a": [1, 2, 3], "b": ["q", "r", "s"]}, index=["zyx", "wvu", "tsr"] + ) + + check_round_trip(df, engine, write_kwargs=write_kwargs, expected=expected) + + # Ignore multi-indexes as well. + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + df = pd.DataFrame( + {"one": list(range(8)), "two": [-i for i in range(8)]}, index=arrays + ) + + expected = df.reset_index(drop=True) + check_round_trip(df, engine, write_kwargs=write_kwargs, expected=expected) + + def test_write_column_multiindex(self, engine): + # Not able to write column multi-indexes with non-string column names. + mi_columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]) + df = pd.DataFrame(np.random.randn(4, 3), columns=mi_columns) + msg = ( + r"\s*parquet must have string column names for all values in\s*" + "each level of the MultiIndex" + ) + self.check_error_on_write(df, engine, ValueError, msg) + + def test_write_column_multiindex_nonstring(self, pa): + # GH #34777 + # Not supported in fastparquet as of 0.1.3 + engine = pa + + # Not able to write column multi-indexes with non-string column names + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + [1, 2, 1, 2, 1, 2, 1, 2], + ] + df = pd.DataFrame(np.random.randn(8, 8), columns=arrays) + df.columns.names = ["Level1", "Level2"] + msg = ( + r"\s*parquet must have string column names for all values in\s*" + "each level of the MultiIndex" + ) + self.check_error_on_write(df, engine, ValueError, msg) + + def test_write_column_multiindex_string(self, pa): + # GH #34777 + # Not supported in fastparquet as of 0.1.3 + engine = pa + + # Write column multi-indexes with string column names + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + df = pd.DataFrame(np.random.randn(8, 8), columns=arrays) + df.columns.names = ["ColLevel1", "ColLevel2"] + + check_round_trip(df, engine) + + def test_write_column_index_string(self, pa): + # GH #34777 + # Not supported in fastparquet as of 0.1.3 + engine = pa + + # Write column indexes with string column names + arrays = ["bar", "baz", "foo", "qux"] + df = pd.DataFrame(np.random.randn(8, 4), columns=arrays) + df.columns.name = "StringCol" + + check_round_trip(df, engine) + + def test_write_column_index_nonstring(self, pa): + # GH #34777 + # Not supported in fastparquet as of 0.1.3 + engine = pa + + # Write column indexes with string column names + arrays = [1, 2, 3, 4] + df = pd.DataFrame(np.random.randn(8, 4), columns=arrays) + df.columns.name = "NonStringCol" + msg = r"parquet must have string column names" + self.check_error_on_write(df, engine, ValueError, msg) + + def test_use_nullable_dtypes(self, engine, request): + import pyarrow.parquet as pq + + if engine == "fastparquet": + # We are manually disabling fastparquet's + # nullable dtype support pending discussion + mark = pytest.mark.xfail( + reason="Fastparquet nullable dtype support is disabled" + ) + request.node.add_marker(mark) + + table = pyarrow.table( + { + "a": pyarrow.array([1, 2, 3, None], "int64"), + "b": pyarrow.array([1, 2, 3, None], "uint8"), + "c": pyarrow.array(["a", "b", "c", None]), + "d": pyarrow.array([True, False, True, None]), + # Test that nullable dtypes used even in absence of nulls + "e": pyarrow.array([1, 2, 3, 4], "int64"), + } + ) + with tm.ensure_clean() as path: + # write manually with pyarrow to write integers + pq.write_table(table, path) + result1 = read_parquet(path, engine=engine) + result2 = read_parquet(path, engine=engine, use_nullable_dtypes=True) + + assert result1["a"].dtype == np.dtype("float64") + expected = pd.DataFrame( + { + "a": pd.array([1, 2, 3, None], dtype="Int64"), + "b": pd.array([1, 2, 3, None], dtype="UInt8"), + "c": pd.array(["a", "b", "c", None], dtype="string"), + "d": pd.array([True, False, True, None], dtype="boolean"), + "e": pd.array([1, 2, 3, 4], dtype="Int64"), + } + ) + if engine == "fastparquet": + # Fastparquet doesn't support string columns yet + # Only int and boolean + result2 = result2.drop("c", axis=1) + expected = expected.drop("c", axis=1) + tm.assert_frame_equal(result2, expected) + + @pytest.mark.parametrize( + "dtype", + [ + "Int64", + "UInt8", + "boolean", + "object", + "datetime64[ns, UTC]", + "float", + "period[D]", + "Float64", + "string", + ], + ) + def test_read_empty_array(self, pa, dtype): + # GH #41241 + df = pd.DataFrame( + { + "value": pd.array([], dtype=dtype), + } + ) + check_round_trip(df, pa, read_kwargs={"use_nullable_dtypes": True}) + + +@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:DeprecationWarning") +class TestParquetPyArrow(Base): + def test_basic(self, pa, df_full): + + df = df_full + + # additional supported types for pyarrow + dti = pd.date_range("20130101", periods=3, tz="Europe/Brussels") + dti = dti._with_freq(None) # freq doesn't round-trip + df["datetime_tz"] = dti + df["bool_with_none"] = [True, None, True] + + check_round_trip(df, pa) + + def test_basic_subset_columns(self, pa, df_full): + # GH18628 + + df = df_full + # additional supported types for pyarrow + df["datetime_tz"] = pd.date_range("20130101", periods=3, tz="Europe/Brussels") + + check_round_trip( + df, + pa, + expected=df[["string", "int"]], + read_kwargs={"columns": ["string", "int"]}, + ) + + def test_to_bytes_without_path_or_buf_provided(self, pa, df_full): + # GH 37105 + + buf_bytes = df_full.to_parquet(engine=pa) + assert isinstance(buf_bytes, bytes) + + buf_stream = BytesIO(buf_bytes) + res = read_parquet(buf_stream) + + tm.assert_frame_equal(df_full, res) + + def test_duplicate_columns(self, pa): + # not currently able to handle duplicate columns + df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy() + self.check_error_on_write(df, pa, ValueError, "Duplicate column names found") + + def test_unsupported(self, pa): + # timedelta + df = pd.DataFrame({"a": pd.timedelta_range("1 day", periods=3)}) + self.check_external_error_on_write(df, pa, NotImplementedError) + + # mixed python objects + df = pd.DataFrame({"a": ["a", 1, 2.0]}) + # pyarrow 0.11 raises ArrowTypeError + # older pyarrows raise ArrowInvalid + self.check_external_error_on_write(df, pa, pyarrow.ArrowException) + + def test_categorical(self, pa): + + # supported in >= 0.7.0 + df = pd.DataFrame() + df["a"] = pd.Categorical(list("abcdef")) + + # test for null, out-of-order values, and unobserved category + df["b"] = pd.Categorical( + ["bar", "foo", "foo", "bar", None, "bar"], + dtype=pd.CategoricalDtype(["foo", "bar", "baz"]), + ) + + # test for ordered flag + df["c"] = pd.Categorical( + ["a", "b", "c", "a", "c", "b"], categories=["b", "c", "d"], ordered=True + ) + + check_round_trip(df, pa) + + def test_s3_roundtrip_explicit_fs(self, df_compat, s3_resource, pa, s3so): + s3fs = pytest.importorskip("s3fs") + s3 = s3fs.S3FileSystem(**s3so) + kw = {"filesystem": s3} + check_round_trip( + df_compat, + pa, + path="pandas-test/pyarrow.parquet", + read_kwargs=kw, + write_kwargs=kw, + ) + + def test_s3_roundtrip(self, df_compat, s3_resource, pa, s3so): + # GH #19134 + s3so = {"storage_options": s3so} + check_round_trip( + df_compat, + pa, + path="s3://pandas-test/pyarrow.parquet", + read_kwargs=s3so, + write_kwargs=s3so, + ) + + @td.skip_if_no("s3fs") # also requires flask + @pytest.mark.parametrize( + "partition_col", + [ + ["A"], + [], + ], + ) + def test_s3_roundtrip_for_dir( + self, df_compat, s3_resource, pa, partition_col, s3so + ): + # GH #26388 + expected_df = df_compat.copy() + + # GH #35791 + # read_table uses the new Arrow Datasets API since pyarrow 1.0.0 + # Previous behaviour was pyarrow partitioned columns become 'category' dtypes + # These are added to back of dataframe on read. In new API category dtype is + # only used if partition field is string, but this changed again to use + # category dtype for all types (not only strings) in pyarrow 2.0.0 + if partition_col: + partition_col_type = "int32" if pa_version_under2p0 else "category" + + expected_df[partition_col] = expected_df[partition_col].astype( + partition_col_type + ) + + check_round_trip( + df_compat, + pa, + expected=expected_df, + path="s3://pandas-test/parquet_dir", + read_kwargs={"storage_options": s3so}, + write_kwargs={ + "partition_cols": partition_col, + "compression": None, + "storage_options": s3so, + }, + check_like=True, + repeat=1, + ) + + @td.skip_if_no("pyarrow") + def test_read_file_like_obj_support(self, df_compat): + buffer = BytesIO() + df_compat.to_parquet(buffer) + df_from_buf = read_parquet(buffer) + tm.assert_frame_equal(df_compat, df_from_buf) + + @td.skip_if_no("pyarrow") + def test_expand_user(self, df_compat, monkeypatch): + monkeypatch.setenv("HOME", "TestingUser") + monkeypatch.setenv("USERPROFILE", "TestingUser") + with pytest.raises(OSError, match=r".*TestingUser.*"): + read_parquet("~/file.parquet") + with pytest.raises(OSError, match=r".*TestingUser.*"): + df_compat.to_parquet("~/file.parquet") + + def test_partition_cols_supported(self, pa, df_full): + # GH #23283 + partition_cols = ["bool", "int"] + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet(path, partition_cols=partition_cols, compression=None) + check_partition_names(path, partition_cols) + assert read_parquet(path).shape == df.shape + + def test_partition_cols_string(self, pa, df_full): + # GH #27117 + partition_cols = "bool" + partition_cols_list = [partition_cols] + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet(path, partition_cols=partition_cols, compression=None) + check_partition_names(path, partition_cols_list) + assert read_parquet(path).shape == df.shape + + @pytest.mark.parametrize("path_type", [str, pathlib.Path]) + def test_partition_cols_pathlib(self, pa, df_compat, path_type): + # GH 35902 + + partition_cols = "B" + partition_cols_list = [partition_cols] + df = df_compat + + with tm.ensure_clean_dir() as path_str: + path = path_type(path_str) + df.to_parquet(path, partition_cols=partition_cols_list) + assert read_parquet(path).shape == df.shape + + def test_empty_dataframe(self, pa): + # GH #27339 + df = pd.DataFrame() + check_round_trip(df, pa) + + def test_write_with_schema(self, pa): + import pyarrow + + df = pd.DataFrame({"x": [0, 1]}) + schema = pyarrow.schema([pyarrow.field("x", type=pyarrow.bool_())]) + out_df = df.astype(bool) + check_round_trip(df, pa, write_kwargs={"schema": schema}, expected=out_df) + + @td.skip_if_no("pyarrow") + def test_additional_extension_arrays(self, pa): + # test additional ExtensionArrays that are supported through the + # __arrow_array__ protocol + df = pd.DataFrame( + { + "a": pd.Series([1, 2, 3], dtype="Int64"), + "b": pd.Series([1, 2, 3], dtype="UInt32"), + "c": pd.Series(["a", None, "c"], dtype="string"), + } + ) + check_round_trip(df, pa) + + df = pd.DataFrame({"a": pd.Series([1, 2, 3, None], dtype="Int64")}) + check_round_trip(df, pa) + + @td.skip_if_no("pyarrow", min_version="1.0.0") + def test_pyarrow_backed_string_array(self, pa, string_storage): + # test ArrowStringArray supported through the __arrow_array__ protocol + df = pd.DataFrame({"a": pd.Series(["a", None, "c"], dtype="string[pyarrow]")}) + with pd.option_context("string_storage", string_storage): + check_round_trip(df, pa, expected=df.astype(f"string[{string_storage}]")) + + @td.skip_if_no("pyarrow") + def test_additional_extension_types(self, pa): + # test additional ExtensionArrays that are supported through the + # __arrow_array__ protocol + by defining a custom ExtensionType + df = pd.DataFrame( + { + # Arrow does not yet support struct in writing to Parquet (ARROW-1644) + # "c": pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2), (3, 4)]), + "d": pd.period_range("2012-01-01", periods=3, freq="D"), + } + ) + check_round_trip(df, pa) + + def test_timestamp_nanoseconds(self, pa): + # with version 2.6, pyarrow defaults to writing the nanoseconds, so + # this should work without error + # Note in previous pyarrows(<6.0.0), only the pseudo-version 2.0 was available + if not pa_version_under6p0: + ver = "2.6" + else: + ver = "2.0" + df = pd.DataFrame({"a": pd.date_range("2017-01-01", freq="1n", periods=10)}) + check_round_trip(df, pa, write_kwargs={"version": ver}) + + def test_timezone_aware_index(self, pa, timezone_aware_date_list): + if not pa_version_under2p0: + # temporary skip this test until it is properly resolved + # https://github.com/pandas-dev/pandas/issues/37286 + pytest.skip() + idx = 5 * [timezone_aware_date_list] + df = pd.DataFrame(index=idx, data={"index_as_col": idx}) + + # see gh-36004 + # compare time(zone) values only, skip their class: + # pyarrow always creates fixed offset timezones using pytz.FixedOffset() + # even if it was datetime.timezone() originally + # + # technically they are the same: + # they both implement datetime.tzinfo + # they both wrap datetime.timedelta() + # this use-case sets the resolution to 1 minute + check_round_trip(df, pa, check_dtype=False) + + @td.skip_if_no("pyarrow", min_version="1.0.0") + def test_filter_row_groups(self, pa): + # https://github.com/pandas-dev/pandas/issues/26551 + df = pd.DataFrame({"a": list(range(0, 3))}) + with tm.ensure_clean() as path: + df.to_parquet(path, pa) + result = read_parquet( + path, pa, filters=[("a", "==", 0)], use_legacy_dataset=False + ) + assert len(result) == 1 + + def test_read_parquet_manager(self, pa, using_array_manager): + # ensure that read_parquet honors the pandas.options.mode.data_manager option + df = pd.DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"]) + + with tm.ensure_clean() as path: + df.to_parquet(path, pa) + result = read_parquet(path, pa) + if using_array_manager: + assert isinstance(result._mgr, pd.core.internals.ArrayManager) + else: + assert isinstance(result._mgr, pd.core.internals.BlockManager) + + +class TestParquetFastParquet(Base): + def test_basic(self, fp, df_full): + df = df_full + + dti = pd.date_range("20130101", periods=3, tz="US/Eastern") + dti = dti._with_freq(None) # freq doesn't round-trip + df["datetime_tz"] = dti + df["timedelta"] = pd.timedelta_range("1 day", periods=3) + check_round_trip(df, fp) + + @pytest.mark.skip(reason="not supported") + def test_duplicate_columns(self, fp): + + # not currently able to handle duplicate columns + df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy() + msg = "Cannot create parquet dataset with duplicate column names" + self.check_error_on_write(df, fp, ValueError, msg) + + def test_bool_with_none(self, fp): + df = pd.DataFrame({"a": [True, None, False]}) + expected = pd.DataFrame({"a": [1.0, np.nan, 0.0]}, dtype="float16") + # Fastparquet bug in 0.7.1 makes it so that this dtype becomes + # float64 + check_round_trip(df, fp, expected=expected, check_dtype=False) + + def test_unsupported(self, fp): + + # period + df = pd.DataFrame({"a": pd.period_range("2013", freq="M", periods=3)}) + # error from fastparquet -> don't check exact error message + self.check_error_on_write(df, fp, ValueError, None) + + # mixed + df = pd.DataFrame({"a": ["a", 1, 2.0]}) + msg = "Can't infer object conversion type" + self.check_error_on_write(df, fp, ValueError, msg) + + def test_categorical(self, fp): + df = pd.DataFrame({"a": pd.Categorical(list("abc"))}) + check_round_trip(df, fp) + + def test_filter_row_groups(self, fp): + d = {"a": list(range(0, 3))} + df = pd.DataFrame(d) + with tm.ensure_clean() as path: + df.to_parquet(path, fp, compression=None, row_group_offsets=1) + result = read_parquet(path, fp, filters=[("a", "==", 0)]) + assert len(result) == 1 + + def test_s3_roundtrip(self, df_compat, s3_resource, fp, s3so): + # GH #19134 + check_round_trip( + df_compat, + fp, + path="s3://pandas-test/fastparquet.parquet", + read_kwargs={"storage_options": s3so}, + write_kwargs={"compression": None, "storage_options": s3so}, + ) + + def test_partition_cols_supported(self, fp, df_full): + # GH #23283 + partition_cols = ["bool", "int"] + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet( + path, + engine="fastparquet", + partition_cols=partition_cols, + compression=None, + ) + assert os.path.exists(path) + import fastparquet + + actual_partition_cols = fastparquet.ParquetFile(path, False).cats + assert len(actual_partition_cols) == 2 + + def test_partition_cols_string(self, fp, df_full): + # GH #27117 + partition_cols = "bool" + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet( + path, + engine="fastparquet", + partition_cols=partition_cols, + compression=None, + ) + assert os.path.exists(path) + import fastparquet + + actual_partition_cols = fastparquet.ParquetFile(path, False).cats + assert len(actual_partition_cols) == 1 + + def test_partition_on_supported(self, fp, df_full): + # GH #23283 + partition_cols = ["bool", "int"] + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet( + path, + engine="fastparquet", + compression=None, + partition_on=partition_cols, + ) + assert os.path.exists(path) + import fastparquet + + actual_partition_cols = fastparquet.ParquetFile(path, False).cats + assert len(actual_partition_cols) == 2 + + def test_error_on_using_partition_cols_and_partition_on(self, fp, df_full): + # GH #23283 + partition_cols = ["bool", "int"] + df = df_full + msg = ( + "Cannot use both partition_on and partition_cols. Use partition_cols for " + "partitioning data" + ) + with pytest.raises(ValueError, match=msg): + with tm.ensure_clean_dir() as path: + df.to_parquet( + path, + engine="fastparquet", + compression=None, + partition_on=partition_cols, + partition_cols=partition_cols, + ) + + def test_empty_dataframe(self, fp): + # GH #27339 + df = pd.DataFrame() + expected = df.copy() + expected.index.name = "index" + check_round_trip(df, fp, expected=expected) + + def test_timezone_aware_index(self, fp, timezone_aware_date_list): + idx = 5 * [timezone_aware_date_list] + + df = pd.DataFrame(index=idx, data={"index_as_col": idx}) + + expected = df.copy() + expected.index.name = "index" + check_round_trip(df, fp, expected=expected) + + def test_use_nullable_dtypes_not_supported(self, monkeypatch, fp): + df = pd.DataFrame({"a": [1, 2]}) + + with tm.ensure_clean() as path: + df.to_parquet(path) + with pytest.raises(ValueError, match="not supported for the fastparquet"): + read_parquet(path, engine="fastparquet", use_nullable_dtypes=True) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_pickle.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_pickle.py new file mode 100644 index 0000000..4700e30 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_pickle.py @@ -0,0 +1,630 @@ +""" +manage legacy pickle tests + +How to add pickle tests: + +1. Install pandas version intended to output the pickle. + +2. Execute "generate_legacy_storage_files.py" to create the pickle. +$ python generate_legacy_storage_files.py pickle + +3. Move the created pickle to "data/legacy_pickle/" directory. +""" +import bz2 +import datetime +import functools +from functools import partial +import glob +import gzip +import io +import os +from pathlib import Path +import pickle +import shutil +from warnings import ( + catch_warnings, + filterwarnings, + simplefilter, +) +import zipfile + +import numpy as np +import pytest + +from pandas.compat import ( + get_lzma_file, + is_platform_little_endian, +) +from pandas.compat._optional import import_optional_dependency +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Index, + Series, + period_range, +) +import pandas._testing as tm + +import pandas.io.common as icom +from pandas.tseries.offsets import ( + Day, + MonthEnd, +) + +pytestmark = pytest.mark.filterwarnings( + "ignore:Timestamp.freq is deprecated:FutureWarning" +) + + +@pytest.fixture(scope="module") +def current_pickle_data(): + # our current version pickle data + from pandas.tests.io.generate_legacy_storage_files import create_pickle_data + + with catch_warnings(): + filterwarnings( + "ignore", "The 'freq' argument in Timestamp", category=FutureWarning + ) + + return create_pickle_data() + + +# --------------------- +# comparison functions +# --------------------- +def compare_element(result, expected, typ, version=None): + if isinstance(expected, Index): + tm.assert_index_equal(expected, result) + return + + if typ.startswith("sp_"): + comparator = tm.assert_equal + comparator(result, expected) + elif typ == "timestamp": + if expected is pd.NaT: + assert result is pd.NaT + else: + assert result == expected + assert result.freq == expected.freq + else: + comparator = getattr(tm, f"assert_{typ}_equal", tm.assert_almost_equal) + comparator(result, expected) + + +def compare(data, vf, version): + + data = pd.read_pickle(vf) + + m = globals() + for typ, dv in data.items(): + for dt, result in dv.items(): + expected = data[typ][dt] + + # use a specific comparator + # if available + comparator = f"compare_{typ}_{dt}" + + comparator = m.get(comparator, m["compare_element"]) + comparator(result, expected, typ, version) + return data + + +def compare_series_ts(result, expected, typ, version): + # GH 7748 + tm.assert_series_equal(result, expected) + assert result.index.freq == expected.index.freq + assert not result.index.freq.normalize + tm.assert_series_equal(result > 0, expected > 0) + + # GH 9291 + freq = result.index.freq + assert freq + Day(1) == Day(2) + + res = freq + pd.Timedelta(hours=1) + assert isinstance(res, pd.Timedelta) + assert res == pd.Timedelta(days=1, hours=1) + + res = freq + pd.Timedelta(nanoseconds=1) + assert isinstance(res, pd.Timedelta) + assert res == pd.Timedelta(days=1, nanoseconds=1) + + +def compare_series_dt_tz(result, expected, typ, version): + tm.assert_series_equal(result, expected) + + +def compare_series_cat(result, expected, typ, version): + tm.assert_series_equal(result, expected) + + +def compare_frame_dt_mixed_tzs(result, expected, typ, version): + tm.assert_frame_equal(result, expected) + + +def compare_frame_cat_onecol(result, expected, typ, version): + tm.assert_frame_equal(result, expected) + + +def compare_frame_cat_and_float(result, expected, typ, version): + compare_frame_cat_onecol(result, expected, typ, version) + + +def compare_index_period(result, expected, typ, version): + tm.assert_index_equal(result, expected) + assert isinstance(result.freq, MonthEnd) + assert result.freq == MonthEnd() + assert result.freqstr == "M" + tm.assert_index_equal(result.shift(2), expected.shift(2)) + + +here = os.path.dirname(__file__) +legacy_dirname = os.path.join(here, "data", "legacy_pickle") +files = glob.glob(os.path.join(legacy_dirname, "*", "*.pickle")) + + +@pytest.fixture(params=files) +def legacy_pickle(request, datapath): + return datapath(request.param) + + +# --------------------- +# tests +# --------------------- +def test_pickles(current_pickle_data, legacy_pickle): + if not is_platform_little_endian(): + pytest.skip("known failure on non-little endian") + + version = os.path.basename(os.path.dirname(legacy_pickle)) + with catch_warnings(record=True): + simplefilter("ignore") + compare(current_pickle_data, legacy_pickle, version) + + +def python_pickler(obj, path): + with open(path, "wb") as fh: + pickle.dump(obj, fh, protocol=-1) + + +def python_unpickler(path): + with open(path, "rb") as fh: + fh.seek(0) + return pickle.load(fh) + + +@pytest.mark.parametrize( + "pickle_writer", + [ + pytest.param(python_pickler, id="python"), + pytest.param(pd.to_pickle, id="pandas_proto_default"), + pytest.param( + functools.partial(pd.to_pickle, protocol=pickle.HIGHEST_PROTOCOL), + id="pandas_proto_highest", + ), + pytest.param(functools.partial(pd.to_pickle, protocol=4), id="pandas_proto_4"), + pytest.param( + functools.partial(pd.to_pickle, protocol=5), + id="pandas_proto_5", + ), + ], +) +@pytest.mark.filterwarnings("ignore:The 'freq' argument in Timestamp:FutureWarning") +def test_round_trip_current(current_pickle_data, pickle_writer): + data = current_pickle_data + for typ, dv in data.items(): + for dt, expected in dv.items(): + + for writer in [pd.to_pickle, python_pickler]: + with tm.ensure_clean() as path: + # test writing with each pickler + pickle_writer(expected, path) + + # test reading with each unpickler + result = pd.read_pickle(path) + compare_element(result, expected, typ) + + result = python_unpickler(path) + compare_element(result, expected, typ) + + # and the same for file objects (GH 35679) + with open(path, mode="wb") as handle: + writer(expected, path) + handle.seek(0) # shouldn't close file handle + with open(path, mode="rb") as handle: + result = pd.read_pickle(handle) + handle.seek(0) # shouldn't close file handle + compare_element(result, expected, typ) + + +def test_pickle_path_pathlib(): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle) + tm.assert_frame_equal(df, result) + + +def test_pickle_path_localpath(): + df = tm.makeDataFrame() + result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle) + tm.assert_frame_equal(df, result) + + +def test_legacy_sparse_warning(datapath): + """ + + Generated with + + >>> df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [0, 0, 1, 1]}).to_sparse() + >>> df.to_pickle("pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz", + ... compression="gzip") + + >>> s = df['B'] + >>> s.to_pickle("pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz", + ... compression="gzip") + """ + with tm.assert_produces_warning(FutureWarning): + simplefilter("ignore", DeprecationWarning) # from boto + pd.read_pickle( + datapath("io", "data", "pickle", "sparseseries-0.20.3.pickle.gz"), + compression="gzip", + ) + + with tm.assert_produces_warning(FutureWarning): + simplefilter("ignore", DeprecationWarning) # from boto + pd.read_pickle( + datapath("io", "data", "pickle", "sparseframe-0.20.3.pickle.gz"), + compression="gzip", + ) + + +# --------------------- +# test pickle compression +# --------------------- + + +@pytest.fixture +def get_random_path(): + return f"__{tm.rands(10)}__.pickle" + + +class TestCompression: + + _extension_to_compression = { + ext: compression for compression, ext in icom._compression_to_extension.items() + } + + def compress_file(self, src_path, dest_path, compression): + if compression is None: + shutil.copyfile(src_path, dest_path) + return + + if compression == "gzip": + f = gzip.open(dest_path, "w") + elif compression == "bz2": + f = bz2.BZ2File(dest_path, "w") + elif compression == "zip": + with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_DEFLATED) as f: + f.write(src_path, os.path.basename(src_path)) + elif compression == "xz": + f = get_lzma_file()(dest_path, "w") + elif compression == "zstd": + f = import_optional_dependency("zstandard").open(dest_path, "wb") + else: + msg = f"Unrecognized compression type: {compression}" + raise ValueError(msg) + + if compression != "zip": + with open(src_path, "rb") as fh, f: + f.write(fh.read()) + + def test_write_explicit(self, compression, get_random_path): + base = get_random_path + path1 = base + ".compressed" + path2 = base + ".raw" + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to compressed file + df.to_pickle(p1, compression=compression) + + # decompress + with tm.decompress_file(p1, compression=compression) as f: + with open(p2, "wb") as fh: + fh.write(f.read()) + + # read decompressed file + df2 = pd.read_pickle(p2, compression=None) + + tm.assert_frame_equal(df, df2) + + @pytest.mark.parametrize("compression", ["", "None", "bad", "7z"]) + def test_write_explicit_bad(self, compression, get_random_path): + with pytest.raises(ValueError, match="Unrecognized compression type"): + with tm.ensure_clean(get_random_path) as path: + df = tm.makeDataFrame() + df.to_pickle(path, compression=compression) + + def test_write_infer(self, compression_ext, get_random_path): + base = get_random_path + path1 = base + compression_ext + path2 = base + ".raw" + compression = self._extension_to_compression.get(compression_ext.lower()) + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to compressed file by inferred compression method + df.to_pickle(p1) + + # decompress + with tm.decompress_file(p1, compression=compression) as f: + with open(p2, "wb") as fh: + fh.write(f.read()) + + # read decompressed file + df2 = pd.read_pickle(p2, compression=None) + + tm.assert_frame_equal(df, df2) + + def test_read_explicit(self, compression, get_random_path): + base = get_random_path + path1 = base + ".raw" + path2 = base + ".compressed" + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to uncompressed file + df.to_pickle(p1, compression=None) + + # compress + self.compress_file(p1, p2, compression=compression) + + # read compressed file + df2 = pd.read_pickle(p2, compression=compression) + + tm.assert_frame_equal(df, df2) + + def test_read_infer(self, compression_ext, get_random_path): + base = get_random_path + path1 = base + ".raw" + path2 = base + compression_ext + compression = self._extension_to_compression.get(compression_ext.lower()) + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to uncompressed file + df.to_pickle(p1, compression=None) + + # compress + self.compress_file(p1, p2, compression=compression) + + # read compressed file by inferred compression method + df2 = pd.read_pickle(p2) + + tm.assert_frame_equal(df, df2) + + +# --------------------- +# test pickle compression +# --------------------- + + +class TestProtocol: + @pytest.mark.parametrize("protocol", [-1, 0, 1, 2]) + def test_read(self, protocol, get_random_path): + with tm.ensure_clean(get_random_path) as path: + df = tm.makeDataFrame() + df.to_pickle(path, protocol=protocol) + df2 = pd.read_pickle(path) + tm.assert_frame_equal(df, df2) + + +@pytest.mark.parametrize( + ["pickle_file", "excols"], + [ + ("test_py27.pkl", Index(["a", "b", "c"])), + ( + "test_mi_py27.pkl", + pd.MultiIndex.from_arrays([["a", "b", "c"], ["A", "B", "C"]]), + ), + ], +) +def test_unicode_decode_error(datapath, pickle_file, excols): + # pickle file written with py27, should be readable without raising + # UnicodeDecodeError, see GH#28645 and GH#31988 + path = datapath("io", "data", "pickle", pickle_file) + df = pd.read_pickle(path) + + # just test the columns are correct since the values are random + tm.assert_index_equal(df.columns, excols) + + +# --------------------- +# tests for buffer I/O +# --------------------- + + +def test_pickle_buffer_roundtrip(): + with tm.ensure_clean() as path: + df = tm.makeDataFrame() + with open(path, "wb") as fh: + df.to_pickle(fh) + with open(path, "rb") as fh: + result = pd.read_pickle(fh) + tm.assert_frame_equal(df, result) + + +# --------------------- +# tests for URL I/O +# --------------------- + + +@pytest.mark.parametrize( + "mockurl", ["http://url.com", "ftp://test.com", "http://gzip.com"] +) +def test_pickle_generalurl_read(monkeypatch, mockurl): + def python_pickler(obj, path): + with open(path, "wb") as fh: + pickle.dump(obj, fh, protocol=-1) + + class MockReadResponse: + def __init__(self, path): + self.file = open(path, "rb") + if "gzip" in path: + self.headers = {"Content-Encoding": "gzip"} + else: + self.headers = {"Content-Encoding": None} + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + def read(self): + return self.file.read() + + def close(self): + return self.file.close() + + with tm.ensure_clean() as path: + + def mock_urlopen_read(*args, **kwargs): + return MockReadResponse(path) + + df = tm.makeDataFrame() + python_pickler(df, path) + monkeypatch.setattr("urllib.request.urlopen", mock_urlopen_read) + result = pd.read_pickle(mockurl) + tm.assert_frame_equal(df, result) + + +@td.skip_if_no("fsspec") +def test_pickle_fsspec_roundtrip(): + with tm.ensure_clean(): + mockurl = "memory://afile" + df = tm.makeDataFrame() + df.to_pickle(mockurl) + result = pd.read_pickle(mockurl) + tm.assert_frame_equal(df, result) + + +class MyTz(datetime.tzinfo): + def __init__(self): + pass + + +def test_read_pickle_with_subclass(): + # GH 12163 + expected = Series(dtype=object), MyTz() + result = tm.round_trip_pickle(expected) + + tm.assert_series_equal(result[0], expected[0]) + assert isinstance(result[1], MyTz) + + +def test_pickle_binary_object_compression(compression): + """ + Read/write from binary file-objects w/wo compression. + + GH 26237, GH 29054, and GH 29570 + """ + df = tm.makeDataFrame() + + # reference for compression + with tm.ensure_clean() as path: + df.to_pickle(path, compression=compression) + reference = Path(path).read_bytes() + + # write + buffer = io.BytesIO() + df.to_pickle(buffer, compression=compression) + buffer.seek(0) + + # gzip and zip safe the filename: cannot compare the compressed content + assert buffer.getvalue() == reference or compression in ("gzip", "zip") + + # read + read_df = pd.read_pickle(buffer, compression=compression) + buffer.seek(0) + tm.assert_frame_equal(df, read_df) + + +def test_pickle_dataframe_with_multilevel_index( + multiindex_year_month_day_dataframe_random_data, + multiindex_dataframe_random_data, +): + ymd = multiindex_year_month_day_dataframe_random_data + frame = multiindex_dataframe_random_data + + def _test_roundtrip(frame): + unpickled = tm.round_trip_pickle(frame) + tm.assert_frame_equal(frame, unpickled) + + _test_roundtrip(frame) + _test_roundtrip(frame.T) + _test_roundtrip(ymd) + _test_roundtrip(ymd.T) + + +def test_pickle_timeseries_periodindex(): + # GH#2891 + prng = period_range("1/1/2011", "1/1/2012", freq="M") + ts = Series(np.random.randn(len(prng)), prng) + new_ts = tm.round_trip_pickle(ts) + assert new_ts.index.freq == "M" + + +@pytest.mark.parametrize( + "name", [777, 777.0, "name", datetime.datetime(2001, 11, 11), (1, 2)] +) +def test_pickle_preserve_name(name): + + unpickled = tm.round_trip_pickle(tm.makeTimeSeries(name=name)) + assert unpickled.name == name + + +def test_pickle_datetimes(datetime_series): + unp_ts = tm.round_trip_pickle(datetime_series) + tm.assert_series_equal(unp_ts, datetime_series) + + +def test_pickle_strings(string_series): + unp_series = tm.round_trip_pickle(string_series) + tm.assert_series_equal(unp_series, string_series) + + +@td.skip_array_manager_invalid_test +def test_pickle_preserves_block_ndim(): + # GH#37631 + ser = Series(list("abc")).astype("category").iloc[[0]] + res = tm.round_trip_pickle(ser) + + assert res._mgr.blocks[0].ndim == 1 + assert res._mgr.blocks[0].shape == (1,) + + # GH#37631 OP issue was about indexing, underlying problem was pickle + tm.assert_series_equal(res[[True]], ser) + + +@pytest.mark.parametrize("protocol", [pickle.DEFAULT_PROTOCOL, pickle.HIGHEST_PROTOCOL]) +def test_pickle_big_dataframe_compression(protocol, compression): + # GH#39002 + df = pd.DataFrame(range(100000)) + result = tm.round_trip_pathlib( + partial(df.to_pickle, protocol=protocol, compression=compression), + partial(pd.read_pickle, compression=compression), + ) + tm.assert_frame_equal(df, result) + + +def test_pickle_frame_v124_unpickle_130(): + # GH#42345 DataFrame created in 1.2.x, unpickle in 1.3.x + path = os.path.join(legacy_dirname, "1.2.4", "empty_frame_v1_2_4-GH#42345.pkl") + with open(path, "rb") as fd: + df = pickle.load(fd) + + expected = pd.DataFrame() + tm.assert_frame_equal(df, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_s3.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_s3.py new file mode 100644 index 0000000..6702d58 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_s3.py @@ -0,0 +1,50 @@ +from io import BytesIO +import os + +import pytest + +import pandas.util._test_decorators as td + +from pandas import read_csv +import pandas._testing as tm + + +def test_streaming_s3_objects(): + # GH17135 + # botocore gained iteration support in 1.10.47, can now be used in read_* + pytest.importorskip("botocore", minversion="1.10.47") + from botocore.response import StreamingBody + + data = [b"foo,bar,baz\n1,2,3\n4,5,6\n", b"just,the,header\n"] + for el in data: + body = StreamingBody(BytesIO(el), content_length=len(el)) + read_csv(body) + + +@td.skip_if_no("s3fs") +@pytest.mark.network +@tm.network +def test_read_without_creds_from_pub_bucket(): + # GH 34626 + # Use Amazon Open Data Registry - https://registry.opendata.aws/gdelt + result = read_csv("s3://gdelt-open-data/events/1981.csv", nrows=3) + assert len(result) == 3 + + +@td.skip_if_no("s3fs") +@pytest.mark.network +@tm.network +def test_read_with_creds_from_pub_bucket(): + # Ensure we can read from a public bucket with credentials + # GH 34626 + # Use Amazon Open Data Registry - https://registry.opendata.aws/gdelt + + with tm.ensure_safe_environment_variables(): + # temporary workaround as moto fails for botocore >= 1.11 otherwise, + # see https://github.com/spulec/moto/issues/1924 & 1952 + os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key") + os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret") + df = read_csv( + "s3://gdelt-open-data/events/1981.csv", nrows=5, sep="\t", header=None + ) + assert len(df) == 5 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_spss.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_spss.py new file mode 100644 index 0000000..a4894ff --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_spss.py @@ -0,0 +1,76 @@ +from pathlib import Path + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +pyreadstat = pytest.importorskip("pyreadstat") + + +@pytest.mark.parametrize("path_klass", [lambda p: p, Path]) +def test_spss_labelled_num(path_klass, datapath): + # test file from the Haven project (https://haven.tidyverse.org/) + fname = path_klass(datapath("io", "data", "spss", "labelled-num.sav")) + + df = pd.read_spss(fname, convert_categoricals=True) + expected = pd.DataFrame({"VAR00002": "This is one"}, index=[0]) + expected["VAR00002"] = pd.Categorical(expected["VAR00002"]) + tm.assert_frame_equal(df, expected) + + df = pd.read_spss(fname, convert_categoricals=False) + expected = pd.DataFrame({"VAR00002": 1.0}, index=[0]) + tm.assert_frame_equal(df, expected) + + +def test_spss_labelled_num_na(datapath): + # test file from the Haven project (https://haven.tidyverse.org/) + fname = datapath("io", "data", "spss", "labelled-num-na.sav") + + df = pd.read_spss(fname, convert_categoricals=True) + expected = pd.DataFrame({"VAR00002": ["This is one", None]}) + expected["VAR00002"] = pd.Categorical(expected["VAR00002"]) + tm.assert_frame_equal(df, expected) + + df = pd.read_spss(fname, convert_categoricals=False) + expected = pd.DataFrame({"VAR00002": [1.0, np.nan]}) + tm.assert_frame_equal(df, expected) + + +def test_spss_labelled_str(datapath): + # test file from the Haven project (https://haven.tidyverse.org/) + fname = datapath("io", "data", "spss", "labelled-str.sav") + + df = pd.read_spss(fname, convert_categoricals=True) + expected = pd.DataFrame({"gender": ["Male", "Female"]}) + expected["gender"] = pd.Categorical(expected["gender"]) + tm.assert_frame_equal(df, expected) + + df = pd.read_spss(fname, convert_categoricals=False) + expected = pd.DataFrame({"gender": ["M", "F"]}) + tm.assert_frame_equal(df, expected) + + +def test_spss_umlauts(datapath): + # test file from the Haven project (https://haven.tidyverse.org/) + fname = datapath("io", "data", "spss", "umlauts.sav") + + df = pd.read_spss(fname, convert_categoricals=True) + expected = pd.DataFrame( + {"var1": ["the ä umlaut", "the ü umlaut", "the ä umlaut", "the ö umlaut"]} + ) + expected["var1"] = pd.Categorical(expected["var1"]) + tm.assert_frame_equal(df, expected) + + df = pd.read_spss(fname, convert_categoricals=False) + expected = pd.DataFrame({"var1": [1.0, 2.0, 1.0, 3.0]}) + tm.assert_frame_equal(df, expected) + + +def test_spss_usecols(datapath): + # usecols must be list-like + fname = datapath("io", "data", "spss", "labelled-num.sav") + + with pytest.raises(TypeError, match="usecols must be list-like."): + pd.read_spss(fname, usecols="VAR00002") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_sql.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_sql.py new file mode 100644 index 0000000..1731495 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_sql.py @@ -0,0 +1,2999 @@ +"""SQL io tests + +The SQL tests are broken down in different classes: + +- `PandasSQLTest`: base class with common methods for all test classes +- Tests for the public API (only tests with sqlite3) + - `_TestSQLApi` base class + - `TestSQLApi`: test the public API with sqlalchemy engine + - `TestSQLiteFallbackApi`: test the public API with a sqlite DBAPI + connection +- Tests for the different SQL flavors (flavor specific type conversions) + - Tests for the sqlalchemy mode: `_TestSQLAlchemy` is the base class with + common methods, `_TestSQLAlchemyConn` tests the API with a SQLAlchemy + Connection object. The different tested flavors (sqlite3, MySQL, + PostgreSQL) derive from the base class + - Tests for the fallback mode (`TestSQLiteFallback`) + +""" +from __future__ import annotations + +import csv +from datetime import ( + date, + datetime, + time, +) +from io import StringIO +from pathlib import Path +import sqlite3 + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import ( + is_datetime64_dtype, + is_datetime64tz_dtype, +) + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + concat, + date_range, + isna, + to_datetime, + to_timedelta, +) +import pandas._testing as tm + +import pandas.io.sql as sql +from pandas.io.sql import ( + SQLAlchemyEngine, + SQLDatabase, + SQLiteDatabase, + _gt14, + get_engine, + pandasSQL_builder, + read_sql_query, + read_sql_table, +) + +try: + import sqlalchemy + + SQLALCHEMY_INSTALLED = True +except ImportError: + SQLALCHEMY_INSTALLED = False + +SQL_STRINGS = { + "read_parameters": { + "sqlite": "SELECT * FROM iris WHERE Name=? AND SepalLength=?", + "mysql": "SELECT * FROM iris WHERE `Name`=%s AND `SepalLength`=%s", + "postgresql": 'SELECT * FROM iris WHERE "Name"=%s AND "SepalLength"=%s', + }, + "read_named_parameters": { + "sqlite": """ + SELECT * FROM iris WHERE Name=:name AND SepalLength=:length + """, + "mysql": """ + SELECT * FROM iris WHERE + `Name`=%(name)s AND `SepalLength`=%(length)s + """, + "postgresql": """ + SELECT * FROM iris WHERE + "Name"=%(name)s AND "SepalLength"=%(length)s + """, + }, + "read_no_parameters_with_percent": { + "sqlite": "SELECT * FROM iris WHERE Name LIKE '%'", + "mysql": "SELECT * FROM iris WHERE `Name` LIKE '%'", + "postgresql": "SELECT * FROM iris WHERE \"Name\" LIKE '%'", + }, +} + + +def iris_table_metadata(dialect: str): + from sqlalchemy import ( + REAL, + Column, + Float, + MetaData, + String, + Table, + ) + + dtype = Float if dialect == "postgresql" else REAL + metadata = MetaData() + iris = Table( + "iris", + metadata, + Column("SepalLength", dtype), + Column("SepalWidth", dtype), + Column("PetalLength", dtype), + Column("PetalWidth", dtype), + Column("Name", String(200)), + ) + return iris + + +def create_and_load_iris_sqlite3(conn: sqlite3.Connection, iris_file: Path): + cur = conn.cursor() + stmt = """CREATE TABLE iris ( + "SepalLength" REAL, + "SepalWidth" REAL, + "PetalLength" REAL, + "PetalWidth" REAL, + "Name" TEXT + )""" + cur.execute(stmt) + with iris_file.open(newline=None) as csvfile: + reader = csv.reader(csvfile) + next(reader) + stmt = "INSERT INTO iris VALUES(?, ?, ?, ?, ?)" + cur.executemany(stmt, reader) + + +def create_and_load_iris(conn, iris_file: Path, dialect: str): + from sqlalchemy import insert + from sqlalchemy.engine import Engine + + iris = iris_table_metadata(dialect) + iris.drop(conn, checkfirst=True) + iris.create(bind=conn) + + with iris_file.open(newline=None) as csvfile: + reader = csv.reader(csvfile) + header = next(reader) + params = [{key: value for key, value in zip(header, row)} for row in reader] + stmt = insert(iris).values(params) + if isinstance(conn, Engine): + with conn.connect() as conn: + with conn.begin(): + conn.execute(stmt) + else: + conn.execute(stmt) + + +def create_and_load_iris_view(conn): + stmt = "CREATE VIEW iris_view AS SELECT * FROM iris" + if isinstance(conn, sqlite3.Connection): + cur = conn.cursor() + cur.execute(stmt) + else: + from sqlalchemy import text + from sqlalchemy.engine import Engine + + stmt = text(stmt) + if isinstance(conn, Engine): + with conn.connect() as conn: + with conn.begin(): + conn.execute(stmt) + else: + conn.execute(stmt) + + +def types_table_metadata(dialect: str): + from sqlalchemy import ( + TEXT, + Boolean, + Column, + DateTime, + Float, + Integer, + MetaData, + Table, + ) + + date_type = TEXT if dialect == "sqlite" else DateTime + bool_type = Integer if dialect == "sqlite" else Boolean + metadata = MetaData() + types = Table( + "types", + metadata, + Column("TextCol", TEXT), + Column("DateCol", date_type), + Column("IntDateCol", Integer), + Column("IntDateOnlyCol", Integer), + Column("FloatCol", Float), + Column("IntCol", Integer), + Column("BoolCol", bool_type), + Column("IntColWithNull", Integer), + Column("BoolColWithNull", bool_type), + ) + if dialect == "postgresql": + types.append_column(Column("DateColWithTz", DateTime(timezone=True))) + return types + + +def create_and_load_types_sqlite3(conn: sqlite3.Connection, types_data: list[dict]): + cur = conn.cursor() + stmt = """CREATE TABLE types ( + "TextCol" TEXT, + "DateCol" TEXT, + "IntDateCol" INTEGER, + "IntDateOnlyCol" INTEGER, + "FloatCol" REAL, + "IntCol" INTEGER, + "BoolCol" INTEGER, + "IntColWithNull" INTEGER, + "BoolColWithNull" INTEGER + )""" + cur.execute(stmt) + + stmt = """ + INSERT INTO types + VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?) + """ + cur.executemany(stmt, types_data) + + +def create_and_load_types(conn, types_data: list[dict], dialect: str): + from sqlalchemy import insert + from sqlalchemy.engine import Engine + + types = types_table_metadata(dialect) + types.drop(conn, checkfirst=True) + types.create(bind=conn) + + stmt = insert(types).values(types_data) + if isinstance(conn, Engine): + with conn.connect() as conn: + with conn.begin(): + conn.execute(stmt) + else: + conn.execute(stmt) + + +def check_iris_frame(frame: DataFrame): + pytype = frame.dtypes[0].type + row = frame.iloc[0] + assert issubclass(pytype, np.floating) + tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, "Iris-setosa"]) + + +def count_rows(conn, table_name: str): + stmt = f"SELECT count(*) AS count_1 FROM {table_name}" + if isinstance(conn, sqlite3.Connection): + cur = conn.cursor() + result = cur.execute(stmt) + else: + from sqlalchemy import text + from sqlalchemy.engine import Engine + + stmt = text(stmt) + if isinstance(conn, Engine): + with conn.connect() as conn: + result = conn.execute(stmt) + else: + result = conn.execute(stmt) + return result.fetchone()[0] + + +@pytest.fixture +def iris_path(datapath): + iris_path = datapath("io", "data", "csv", "iris.csv") + return Path(iris_path) + + +@pytest.fixture +def types_data(): + return [ + { + "TextCol": "first", + "DateCol": "2000-01-03 00:00:00", + "IntDateCol": 535852800, + "IntDateOnlyCol": 20101010, + "FloatCol": 10.10, + "IntCol": 1, + "BoolCol": False, + "IntColWithNull": 1, + "BoolColWithNull": False, + "DateColWithTz": "2000-01-01 00:00:00-08:00", + }, + { + "TextCol": "first", + "DateCol": "2000-01-04 00:00:00", + "IntDateCol": 1356998400, + "IntDateOnlyCol": 20101212, + "FloatCol": 10.10, + "IntCol": 1, + "BoolCol": False, + "IntColWithNull": None, + "BoolColWithNull": None, + "DateColWithTz": "2000-06-01 00:00:00-07:00", + }, + ] + + +@pytest.fixture +def types_data_frame(types_data): + dtypes = { + "TextCol": "str", + "DateCol": "str", + "IntDateCol": "int64", + "IntDateOnlyCol": "int64", + "FloatCol": "float", + "IntCol": "int64", + "BoolCol": "int64", + "IntColWithNull": "float", + "BoolColWithNull": "float", + } + df = DataFrame(types_data) + return df[dtypes.keys()].astype(dtypes) + + +@pytest.fixture +def test_frame1(): + columns = ["index", "A", "B", "C", "D"] + data = [ + ( + "2000-01-03 00:00:00", + 0.980268513777, + 3.68573087906, + -0.364216805298, + -1.15973806169, + ), + ( + "2000-01-04 00:00:00", + 1.04791624281, + -0.0412318367011, + -0.16181208307, + 0.212549316967, + ), + ( + "2000-01-05 00:00:00", + 0.498580885705, + 0.731167677815, + -0.537677223318, + 1.34627041952, + ), + ( + "2000-01-06 00:00:00", + 1.12020151869, + 1.56762092543, + 0.00364077397681, + 0.67525259227, + ), + ] + return DataFrame(data, columns=columns) + + +@pytest.fixture +def test_frame3(): + columns = ["index", "A", "B"] + data = [ + ("2000-01-03 00:00:00", 2 ** 31 - 1, -1.987670), + ("2000-01-04 00:00:00", -29, -0.0412318367011), + ("2000-01-05 00:00:00", 20000, 0.731167677815), + ("2000-01-06 00:00:00", -290867, 1.56762092543), + ] + return DataFrame(data, columns=columns) + + +@pytest.fixture +def mysql_pymysql_engine(iris_path, types_data): + sqlalchemy = pytest.importorskip("sqlalchemy") + pymysql = pytest.importorskip("pymysql") + engine = sqlalchemy.create_engine( + "mysql+pymysql://root@localhost:3306/pandas", + connect_args={"client_flag": pymysql.constants.CLIENT.MULTI_STATEMENTS}, + ) + check_target = sqlalchemy.inspect(engine) if _gt14() else engine + if not check_target.has_table("iris"): + create_and_load_iris(engine, iris_path, "mysql") + if not check_target.has_table("types"): + for entry in types_data: + entry.pop("DateColWithTz") + create_and_load_types(engine, types_data, "mysql") + yield engine + with engine.connect() as conn: + with conn.begin(): + stmt = sqlalchemy.text("DROP TABLE IF EXISTS test_frame;") + conn.execute(stmt) + engine.dispose() + + +@pytest.fixture +def mysql_pymysql_conn(mysql_pymysql_engine): + yield mysql_pymysql_engine.connect() + + +@pytest.fixture +def postgresql_psycopg2_engine(iris_path, types_data): + sqlalchemy = pytest.importorskip("sqlalchemy") + pytest.importorskip("psycopg2") + engine = sqlalchemy.create_engine( + "postgresql+psycopg2://postgres:postgres@localhost:5432/pandas" + ) + check_target = sqlalchemy.inspect(engine) if _gt14() else engine + if not check_target.has_table("iris"): + create_and_load_iris(engine, iris_path, "postgresql") + if not check_target.has_table("types"): + create_and_load_types(engine, types_data, "postgresql") + yield engine + with engine.connect() as conn: + with conn.begin(): + stmt = sqlalchemy.text("DROP TABLE IF EXISTS test_frame;") + conn.execute(stmt) + engine.dispose() + + +@pytest.fixture +def postgresql_psycopg2_conn(postgresql_psycopg2_engine): + yield postgresql_psycopg2_engine.connect() + + +@pytest.fixture +def sqlite_engine(): + sqlalchemy = pytest.importorskip("sqlalchemy") + engine = sqlalchemy.create_engine("sqlite://") + yield engine + engine.dispose() + + +@pytest.fixture +def sqlite_conn(sqlite_engine): + yield sqlite_engine.connect() + + +@pytest.fixture +def sqlite_iris_engine(sqlite_engine, iris_path): + create_and_load_iris(sqlite_engine, iris_path, "sqlite") + return sqlite_engine + + +@pytest.fixture +def sqlite_iris_conn(sqlite_iris_engine): + yield sqlite_iris_engine.connect() + + +@pytest.fixture +def sqlite_buildin(): + conn = sqlite3.connect(":memory:") + yield conn + conn.close() + + +@pytest.fixture +def sqlite_buildin_iris(sqlite_buildin, iris_path): + create_and_load_iris_sqlite3(sqlite_buildin, iris_path) + return sqlite_buildin + + +mysql_connectable = [ + "mysql_pymysql_engine", + "mysql_pymysql_conn", +] + + +postgresql_connectable = [ + "postgresql_psycopg2_engine", + "postgresql_psycopg2_conn", +] + +sqlite_connectable = [ + "sqlite_engine", + "sqlite_conn", +] + +sqlite_iris_connectable = [ + "sqlite_iris_engine", + "sqlite_iris_conn", +] + +sqlalchemy_connectable = mysql_connectable + postgresql_connectable + sqlite_connectable + +sqlalchemy_connectable_iris = ( + mysql_connectable + postgresql_connectable + sqlite_iris_connectable +) + +all_connectable = sqlalchemy_connectable + ["sqlite_buildin"] + +all_connectable_iris = sqlalchemy_connectable_iris + ["sqlite_buildin_iris"] + + +@pytest.mark.db +@pytest.mark.parametrize("conn", all_connectable) +@pytest.mark.parametrize("method", [None, "multi"]) +def test_to_sql(conn, method, test_frame1, request): + conn = request.getfixturevalue(conn) + pandasSQL = pandasSQL_builder(conn) + pandasSQL.to_sql(test_frame1, "test_frame", method=method) + assert pandasSQL.has_table("test_frame") + assert count_rows(conn, "test_frame") == len(test_frame1) + + +@pytest.mark.db +@pytest.mark.parametrize("conn", all_connectable) +@pytest.mark.parametrize("mode, num_row_coef", [("replace", 1), ("append", 2)]) +def test_to_sql_exist(conn, mode, num_row_coef, test_frame1, request): + conn = request.getfixturevalue(conn) + pandasSQL = pandasSQL_builder(conn) + pandasSQL.to_sql(test_frame1, "test_frame", if_exists="fail") + pandasSQL.to_sql(test_frame1, "test_frame", if_exists=mode) + assert pandasSQL.has_table("test_frame") + assert count_rows(conn, "test_frame") == num_row_coef * len(test_frame1) + + +@pytest.mark.db +@pytest.mark.parametrize("conn", all_connectable) +def test_to_sql_exist_fail(conn, test_frame1, request): + conn = request.getfixturevalue(conn) + pandasSQL = pandasSQL_builder(conn) + pandasSQL.to_sql(test_frame1, "test_frame", if_exists="fail") + assert pandasSQL.has_table("test_frame") + + msg = "Table 'test_frame' already exists" + with pytest.raises(ValueError, match=msg): + pandasSQL.to_sql(test_frame1, "test_frame", if_exists="fail") + + +@pytest.mark.db +@pytest.mark.parametrize("conn", all_connectable_iris) +def test_read_iris(conn, request): + conn = request.getfixturevalue(conn) + pandasSQL = pandasSQL_builder(conn) + iris_frame = pandasSQL.read_query("SELECT * FROM iris") + check_iris_frame(iris_frame) + + +@pytest.mark.db +@pytest.mark.parametrize("conn", sqlalchemy_connectable) +def test_to_sql_callable(conn, test_frame1, request): + conn = request.getfixturevalue(conn) + pandasSQL = pandasSQL_builder(conn) + + check = [] # used to double check function below is really being used + + def sample(pd_table, conn, keys, data_iter): + check.append(1) + data = [dict(zip(keys, row)) for row in data_iter] + conn.execute(pd_table.table.insert(), data) + + pandasSQL.to_sql(test_frame1, "test_frame", method=sample) + assert pandasSQL.has_table("test_frame") + assert check == [1] + assert count_rows(conn, "test_frame") == len(test_frame1) + + +@pytest.mark.db +@pytest.mark.parametrize("conn", mysql_connectable) +def test_default_type_conversion(conn, request): + conn = request.getfixturevalue(conn) + df = sql.read_sql_table("types", conn) + + assert issubclass(df.FloatCol.dtype.type, np.floating) + assert issubclass(df.IntCol.dtype.type, np.integer) + + # MySQL has no real BOOL type (it's an alias for TINYINT) + assert issubclass(df.BoolCol.dtype.type, np.integer) + + # Int column with NA values stays as float + assert issubclass(df.IntColWithNull.dtype.type, np.floating) + + # Bool column with NA = int column with NA values => becomes float + assert issubclass(df.BoolColWithNull.dtype.type, np.floating) + + +@pytest.mark.db +@pytest.mark.parametrize("conn", mysql_connectable) +def test_read_procedure(conn, request): + conn = request.getfixturevalue(conn) + + # GH 7324 + # Although it is more an api test, it is added to the + # mysql tests as sqlite does not have stored procedures + from sqlalchemy import text + from sqlalchemy.engine import Engine + + df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) + df.to_sql("test_frame", conn, index=False) + + proc = """DROP PROCEDURE IF EXISTS get_testdb; + + CREATE PROCEDURE get_testdb () + + BEGIN + SELECT * FROM test_frame; + END""" + proc = text(proc) + if isinstance(conn, Engine): + with conn.connect() as engine_conn: + with engine_conn.begin(): + engine_conn.execute(proc) + else: + conn.execute(proc) + + res1 = sql.read_sql_query("CALL get_testdb();", conn) + tm.assert_frame_equal(df, res1) + + # test delegation to read_sql_query + res2 = sql.read_sql("CALL get_testdb();", conn) + tm.assert_frame_equal(df, res2) + + +@pytest.mark.db +@pytest.mark.parametrize("conn", postgresql_connectable) +def test_copy_from_callable_insertion_method(conn, request): + # GH 8953 + # Example in io.rst found under _io.sql.method + # not available in sqlite, mysql + def psql_insert_copy(table, conn, keys, data_iter): + # gets a DBAPI connection that can provide a cursor + dbapi_conn = conn.connection + with dbapi_conn.cursor() as cur: + s_buf = StringIO() + writer = csv.writer(s_buf) + writer.writerows(data_iter) + s_buf.seek(0) + + columns = ", ".join([f'"{k}"' for k in keys]) + if table.schema: + table_name = f"{table.schema}.{table.name}" + else: + table_name = table.name + + sql_query = f"COPY {table_name} ({columns}) FROM STDIN WITH CSV" + cur.copy_expert(sql=sql_query, file=s_buf) + + conn = request.getfixturevalue(conn) + expected = DataFrame({"col1": [1, 2], "col2": [0.1, 0.2], "col3": ["a", "n"]}) + expected.to_sql("test_frame", conn, index=False, method=psql_insert_copy) + result = sql.read_sql_table("test_frame", conn) + tm.assert_frame_equal(result, expected) + + +class MixInBase: + def teardown_method(self, method): + # if setup fails, there may not be a connection to close. + if hasattr(self, "conn"): + for tbl in self._get_all_tables(): + self.drop_table(tbl) + self._close_conn() + + +class SQLiteMixIn(MixInBase): + def drop_table(self, table_name): + self.conn.execute( + f"DROP TABLE IF EXISTS {sql._get_valid_sqlite_name(table_name)}" + ) + self.conn.commit() + + def _get_all_tables(self): + c = self.conn.execute("SELECT name FROM sqlite_master WHERE type='table'") + return [table[0] for table in c.fetchall()] + + def _close_conn(self): + self.conn.close() + + +class SQLAlchemyMixIn(MixInBase): + def drop_table(self, table_name): + sql.SQLDatabase(self.conn).drop_table(table_name) + + def _get_all_tables(self): + from sqlalchemy import inspect + + return inspect(self.conn).get_table_names() + + def _close_conn(self): + # https://docs.sqlalchemy.org/en/13/core/connections.html#engine-disposal + self.conn.dispose() + + +class PandasSQLTest: + """ + Base class with common private methods for SQLAlchemy and fallback cases. + + """ + + @pytest.fixture + def load_iris_data(self, iris_path): + if not hasattr(self, "conn"): + self.setup_connect() + self.drop_table("iris") + if isinstance(self.conn, sqlite3.Connection): + create_and_load_iris_sqlite3(self.conn, iris_path) + else: + create_and_load_iris(self.conn, iris_path, self.flavor) + + @pytest.fixture + def load_types_data(self, types_data): + if not hasattr(self, "conn"): + self.setup_connect() + if self.flavor != "postgresql": + for entry in types_data: + entry.pop("DateColWithTz") + if isinstance(self.conn, sqlite3.Connection): + types_data = [tuple(entry.values()) for entry in types_data] + create_and_load_types_sqlite3(self.conn, types_data) + else: + create_and_load_types(self.conn, types_data, self.flavor) + + def _read_sql_iris_parameter(self): + query = SQL_STRINGS["read_parameters"][self.flavor] + params = ["Iris-setosa", 5.1] + iris_frame = self.pandasSQL.read_query(query, params=params) + check_iris_frame(iris_frame) + + def _read_sql_iris_named_parameter(self): + query = SQL_STRINGS["read_named_parameters"][self.flavor] + params = {"name": "Iris-setosa", "length": 5.1} + iris_frame = self.pandasSQL.read_query(query, params=params) + check_iris_frame(iris_frame) + + def _read_sql_iris_no_parameter_with_percent(self): + query = SQL_STRINGS["read_no_parameters_with_percent"][self.flavor] + iris_frame = self.pandasSQL.read_query(query, params=None) + check_iris_frame(iris_frame) + + def _to_sql_empty(self, test_frame1): + self.drop_table("test_frame1") + assert self.pandasSQL.to_sql(test_frame1.iloc[:0], "test_frame1") == 0 + + def _to_sql_with_sql_engine(self, test_frame1, engine="auto", **engine_kwargs): + """`to_sql` with the `engine` param""" + # mostly copied from this class's `_to_sql()` method + self.drop_table("test_frame1") + + assert ( + self.pandasSQL.to_sql( + test_frame1, "test_frame1", engine=engine, **engine_kwargs + ) + == 4 + ) + assert self.pandasSQL.has_table("test_frame1") + + num_entries = len(test_frame1) + num_rows = count_rows(self.conn, "test_frame1") + assert num_rows == num_entries + + # Nuke table + self.drop_table("test_frame1") + + def _roundtrip(self, test_frame1): + self.drop_table("test_frame_roundtrip") + assert self.pandasSQL.to_sql(test_frame1, "test_frame_roundtrip") == 4 + result = self.pandasSQL.read_query("SELECT * FROM test_frame_roundtrip") + + result.set_index("level_0", inplace=True) + # result.index.astype(int) + + result.index.name = None + + tm.assert_frame_equal(result, test_frame1) + + def _execute_sql(self): + # drop_sql = "DROP TABLE IF EXISTS test" # should already be done + iris_results = self.pandasSQL.execute("SELECT * FROM iris") + row = iris_results.fetchone() + tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, "Iris-setosa"]) + + def _to_sql_save_index(self): + df = DataFrame.from_records( + [(1, 2.1, "line1"), (2, 1.5, "line2")], columns=["A", "B", "C"], index=["A"] + ) + assert self.pandasSQL.to_sql(df, "test_to_sql_saves_index") == 2 + ix_cols = self._get_index_columns("test_to_sql_saves_index") + assert ix_cols == [["A"]] + + def _transaction_test(self): + with self.pandasSQL.run_transaction() as trans: + stmt = "CREATE TABLE test_trans (A INT, B TEXT)" + if isinstance(self.pandasSQL, SQLiteDatabase): + trans.execute(stmt) + else: + from sqlalchemy import text + + stmt = text(stmt) + trans.execute(stmt) + + class DummyException(Exception): + pass + + # Make sure when transaction is rolled back, no rows get inserted + ins_sql = "INSERT INTO test_trans (A,B) VALUES (1, 'blah')" + if isinstance(self.pandasSQL, SQLDatabase): + from sqlalchemy import text + + ins_sql = text(ins_sql) + try: + with self.pandasSQL.run_transaction() as trans: + trans.execute(ins_sql) + raise DummyException("error") + except DummyException: + # ignore raised exception + pass + res = self.pandasSQL.read_query("SELECT * FROM test_trans") + assert len(res) == 0 + + # Make sure when transaction is committed, rows do get inserted + with self.pandasSQL.run_transaction() as trans: + trans.execute(ins_sql) + res2 = self.pandasSQL.read_query("SELECT * FROM test_trans") + assert len(res2) == 1 + + +# ----------------------------------------------------------------------------- +# -- Testing the public API + + +class _TestSQLApi(PandasSQLTest): + """ + Base class to test the public API. + + From this two classes are derived to run these tests for both the + sqlalchemy mode (`TestSQLApi`) and the fallback mode + (`TestSQLiteFallbackApi`). These tests are run with sqlite3. Specific + tests for the different sql flavours are included in `_TestSQLAlchemy`. + + Notes: + flavor can always be passed even in SQLAlchemy mode, + should be correctly ignored. + + we don't use drop_table because that isn't part of the public api + + """ + + flavor = "sqlite" + mode: str + + def setup_connect(self): + self.conn = self.connect() + + @pytest.fixture(autouse=True) + def setup_method(self, load_iris_data, load_types_data): + self.load_test_data_and_sql() + + def load_test_data_and_sql(self): + create_and_load_iris_view(self.conn) + + def test_read_sql_view(self): + iris_frame = sql.read_sql_query("SELECT * FROM iris_view", self.conn) + check_iris_frame(iris_frame) + + def test_read_sql_with_chunksize_no_result(self): + query = "SELECT * FROM iris_view WHERE SepalLength < 0.0" + with_batch = sql.read_sql_query(query, self.conn, chunksize=5) + without_batch = sql.read_sql_query(query, self.conn) + tm.assert_frame_equal(concat(with_batch), without_batch) + + def test_to_sql(self, test_frame1): + sql.to_sql(test_frame1, "test_frame1", self.conn) + assert sql.has_table("test_frame1", self.conn) + + def test_to_sql_fail(self, test_frame1): + sql.to_sql(test_frame1, "test_frame2", self.conn, if_exists="fail") + assert sql.has_table("test_frame2", self.conn) + + msg = "Table 'test_frame2' already exists" + with pytest.raises(ValueError, match=msg): + sql.to_sql(test_frame1, "test_frame2", self.conn, if_exists="fail") + + def test_to_sql_replace(self, test_frame1): + sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="fail") + # Add to table again + sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="replace") + assert sql.has_table("test_frame3", self.conn) + + num_entries = len(test_frame1) + num_rows = count_rows(self.conn, "test_frame3") + + assert num_rows == num_entries + + def test_to_sql_append(self, test_frame1): + assert sql.to_sql(test_frame1, "test_frame4", self.conn, if_exists="fail") == 4 + + # Add to table again + assert ( + sql.to_sql(test_frame1, "test_frame4", self.conn, if_exists="append") == 4 + ) + assert sql.has_table("test_frame4", self.conn) + + num_entries = 2 * len(test_frame1) + num_rows = count_rows(self.conn, "test_frame4") + + assert num_rows == num_entries + + def test_to_sql_type_mapping(self, test_frame3): + sql.to_sql(test_frame3, "test_frame5", self.conn, index=False) + result = sql.read_sql("SELECT * FROM test_frame5", self.conn) + + tm.assert_frame_equal(test_frame3, result) + + def test_to_sql_series(self): + s = Series(np.arange(5, dtype="int64"), name="series") + sql.to_sql(s, "test_series", self.conn, index=False) + s2 = sql.read_sql_query("SELECT * FROM test_series", self.conn) + tm.assert_frame_equal(s.to_frame(), s2) + + def test_roundtrip(self, test_frame1): + sql.to_sql(test_frame1, "test_frame_roundtrip", con=self.conn) + result = sql.read_sql_query("SELECT * FROM test_frame_roundtrip", con=self.conn) + + # HACK! + result.index = test_frame1.index + result.set_index("level_0", inplace=True) + result.index.astype(int) + result.index.name = None + tm.assert_frame_equal(result, test_frame1) + + def test_roundtrip_chunksize(self, test_frame1): + sql.to_sql( + test_frame1, + "test_frame_roundtrip", + con=self.conn, + index=False, + chunksize=2, + ) + result = sql.read_sql_query("SELECT * FROM test_frame_roundtrip", con=self.conn) + tm.assert_frame_equal(result, test_frame1) + + def test_execute_sql(self): + # drop_sql = "DROP TABLE IF EXISTS test" # should already be done + iris_results = sql.execute("SELECT * FROM iris", con=self.conn) + row = iris_results.fetchone() + tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, "Iris-setosa"]) + + def test_date_parsing(self): + # Test date parsing in read_sql + # No Parsing + df = sql.read_sql_query("SELECT * FROM types", self.conn) + assert not issubclass(df.DateCol.dtype.type, np.datetime64) + + df = sql.read_sql_query( + "SELECT * FROM types", self.conn, parse_dates=["DateCol"] + ) + assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert df.DateCol.tolist() == [ + Timestamp(2000, 1, 3, 0, 0, 0), + Timestamp(2000, 1, 4, 0, 0, 0), + ] + + df = sql.read_sql_query( + "SELECT * FROM types", + self.conn, + parse_dates={"DateCol": "%Y-%m-%d %H:%M:%S"}, + ) + assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert df.DateCol.tolist() == [ + Timestamp(2000, 1, 3, 0, 0, 0), + Timestamp(2000, 1, 4, 0, 0, 0), + ] + + df = sql.read_sql_query( + "SELECT * FROM types", self.conn, parse_dates=["IntDateCol"] + ) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + assert df.IntDateCol.tolist() == [ + Timestamp(1986, 12, 25, 0, 0, 0), + Timestamp(2013, 1, 1, 0, 0, 0), + ] + + df = sql.read_sql_query( + "SELECT * FROM types", self.conn, parse_dates={"IntDateCol": "s"} + ) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + assert df.IntDateCol.tolist() == [ + Timestamp(1986, 12, 25, 0, 0, 0), + Timestamp(2013, 1, 1, 0, 0, 0), + ] + + df = sql.read_sql_query( + "SELECT * FROM types", + self.conn, + parse_dates={"IntDateOnlyCol": "%Y%m%d"}, + ) + assert issubclass(df.IntDateOnlyCol.dtype.type, np.datetime64) + assert df.IntDateOnlyCol.tolist() == [ + Timestamp("2010-10-10"), + Timestamp("2010-12-12"), + ] + + @pytest.mark.parametrize("error", ["ignore", "raise", "coerce"]) + @pytest.mark.parametrize( + "read_sql, text, mode", + [ + (sql.read_sql, "SELECT * FROM types", ("sqlalchemy", "fallback")), + (sql.read_sql, "types", ("sqlalchemy")), + ( + sql.read_sql_query, + "SELECT * FROM types", + ("sqlalchemy", "fallback"), + ), + (sql.read_sql_table, "types", ("sqlalchemy")), + ], + ) + def test_custom_dateparsing_error( + self, read_sql, text, mode, error, types_data_frame + ): + if self.mode in mode: + expected = types_data_frame.astype({"DateCol": "datetime64[ns]"}) + + result = read_sql( + text, + con=self.conn, + parse_dates={ + "DateCol": {"errors": error}, + }, + ) + + tm.assert_frame_equal(result, expected) + + def test_date_and_index(self): + # Test case where same column appears in parse_date and index_col + + df = sql.read_sql_query( + "SELECT * FROM types", + self.conn, + index_col="DateCol", + parse_dates=["DateCol", "IntDateCol"], + ) + + assert issubclass(df.index.dtype.type, np.datetime64) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + + def test_timedelta(self): + + # see #6921 + df = to_timedelta(Series(["00:00:01", "00:00:03"], name="foo")).to_frame() + with tm.assert_produces_warning(UserWarning): + result_count = df.to_sql("test_timedelta", self.conn) + assert result_count == 2 + result = sql.read_sql_query("SELECT * FROM test_timedelta", self.conn) + tm.assert_series_equal(result["foo"], df["foo"].view("int64")) + + def test_complex_raises(self): + df = DataFrame({"a": [1 + 1j, 2j]}) + msg = "Complex datatypes not supported" + with pytest.raises(ValueError, match=msg): + assert df.to_sql("test_complex", self.conn) is None + + @pytest.mark.parametrize( + "index_name,index_label,expected", + [ + # no index name, defaults to 'index' + (None, None, "index"), + # specifying index_label + (None, "other_label", "other_label"), + # using the index name + ("index_name", None, "index_name"), + # has index name, but specifying index_label + ("index_name", "other_label", "other_label"), + # index name is integer + (0, None, "0"), + # index name is None but index label is integer + (None, 0, "0"), + ], + ) + def test_to_sql_index_label(self, index_name, index_label, expected): + temp_frame = DataFrame({"col1": range(4)}) + temp_frame.index.name = index_name + query = "SELECT * FROM test_index_label" + sql.to_sql(temp_frame, "test_index_label", self.conn, index_label=index_label) + frame = sql.read_sql_query(query, self.conn) + assert frame.columns[0] == expected + + def test_to_sql_index_label_multiindex(self): + expected_row_count = 4 + temp_frame = DataFrame( + {"col1": range(4)}, + index=MultiIndex.from_product([("A0", "A1"), ("B0", "B1")]), + ) + + # no index name, defaults to 'level_0' and 'level_1' + result = sql.to_sql(temp_frame, "test_index_label", self.conn) + assert result == expected_row_count + frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn) + assert frame.columns[0] == "level_0" + assert frame.columns[1] == "level_1" + + # specifying index_label + result = sql.to_sql( + temp_frame, + "test_index_label", + self.conn, + if_exists="replace", + index_label=["A", "B"], + ) + assert result == expected_row_count + frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn) + assert frame.columns[:2].tolist() == ["A", "B"] + + # using the index name + temp_frame.index.names = ["A", "B"] + result = sql.to_sql( + temp_frame, "test_index_label", self.conn, if_exists="replace" + ) + assert result == expected_row_count + frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn) + assert frame.columns[:2].tolist() == ["A", "B"] + + # has index name, but specifying index_label + result = sql.to_sql( + temp_frame, + "test_index_label", + self.conn, + if_exists="replace", + index_label=["C", "D"], + ) + assert result == expected_row_count + frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn) + assert frame.columns[:2].tolist() == ["C", "D"] + + msg = "Length of 'index_label' should match number of levels, which is 2" + with pytest.raises(ValueError, match=msg): + sql.to_sql( + temp_frame, + "test_index_label", + self.conn, + if_exists="replace", + index_label="C", + ) + + def test_multiindex_roundtrip(self): + df = DataFrame.from_records( + [(1, 2.1, "line1"), (2, 1.5, "line2")], + columns=["A", "B", "C"], + index=["A", "B"], + ) + + df.to_sql("test_multiindex_roundtrip", self.conn) + result = sql.read_sql_query( + "SELECT * FROM test_multiindex_roundtrip", self.conn, index_col=["A", "B"] + ) + tm.assert_frame_equal(df, result, check_index_type=True) + + @pytest.mark.parametrize( + "dtype", + [ + None, + int, + float, + {"A": int, "B": float}, + ], + ) + def test_dtype_argument(self, dtype): + # GH10285 Add dtype argument to read_sql_query + df = DataFrame([[1.2, 3.4], [5.6, 7.8]], columns=["A", "B"]) + assert df.to_sql("test_dtype_argument", self.conn) == 2 + + expected = df.astype(dtype) + result = sql.read_sql_query( + "SELECT A, B FROM test_dtype_argument", con=self.conn, dtype=dtype + ) + + tm.assert_frame_equal(result, expected) + + def test_integer_col_names(self): + df = DataFrame([[1, 2], [3, 4]], columns=[0, 1]) + sql.to_sql(df, "test_frame_integer_col_names", self.conn, if_exists="replace") + + def test_get_schema(self, test_frame1): + create_sql = sql.get_schema(test_frame1, "test", con=self.conn) + assert "CREATE" in create_sql + + def test_get_schema_with_schema(self, test_frame1): + # GH28486 + create_sql = sql.get_schema(test_frame1, "test", con=self.conn, schema="pypi") + assert "CREATE TABLE pypi." in create_sql + + def test_get_schema_dtypes(self): + if self.mode == "sqlalchemy": + from sqlalchemy import Integer + + dtype = Integer + else: + dtype = "INTEGER" + + float_frame = DataFrame({"a": [1.1, 1.2], "b": [2.1, 2.2]}) + create_sql = sql.get_schema( + float_frame, "test", con=self.conn, dtype={"b": dtype} + ) + assert "CREATE" in create_sql + assert "INTEGER" in create_sql + + def test_get_schema_keys(self, test_frame1): + frame = DataFrame({"Col1": [1.1, 1.2], "Col2": [2.1, 2.2]}) + create_sql = sql.get_schema(frame, "test", con=self.conn, keys="Col1") + constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("Col1")' + assert constraint_sentence in create_sql + + # multiple columns as key (GH10385) + create_sql = sql.get_schema(test_frame1, "test", con=self.conn, keys=["A", "B"]) + constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("A", "B")' + assert constraint_sentence in create_sql + + def test_chunksize_read(self): + df = DataFrame(np.random.randn(22, 5), columns=list("abcde")) + df.to_sql("test_chunksize", self.conn, index=False) + + # reading the query in one time + res1 = sql.read_sql_query("select * from test_chunksize", self.conn) + + # reading the query in chunks with read_sql_query + res2 = DataFrame() + i = 0 + sizes = [5, 5, 5, 5, 2] + + for chunk in sql.read_sql_query( + "select * from test_chunksize", self.conn, chunksize=5 + ): + res2 = concat([res2, chunk], ignore_index=True) + assert len(chunk) == sizes[i] + i += 1 + + tm.assert_frame_equal(res1, res2) + + # reading the query in chunks with read_sql_query + if self.mode == "sqlalchemy": + res3 = DataFrame() + i = 0 + sizes = [5, 5, 5, 5, 2] + + for chunk in sql.read_sql_table("test_chunksize", self.conn, chunksize=5): + res3 = concat([res3, chunk], ignore_index=True) + assert len(chunk) == sizes[i] + i += 1 + + tm.assert_frame_equal(res1, res3) + + def test_categorical(self): + # GH8624 + # test that categorical gets written correctly as dense column + df = DataFrame( + { + "person_id": [1, 2, 3], + "person_name": ["John P. Doe", "Jane Dove", "John P. Doe"], + } + ) + df2 = df.copy() + df2["person_name"] = df2["person_name"].astype("category") + + df2.to_sql("test_categorical", self.conn, index=False) + res = sql.read_sql_query("SELECT * FROM test_categorical", self.conn) + + tm.assert_frame_equal(res, df) + + def test_unicode_column_name(self): + # GH 11431 + df = DataFrame([[1, 2], [3, 4]], columns=["\xe9", "b"]) + df.to_sql("test_unicode", self.conn, index=False) + + def test_escaped_table_name(self): + # GH 13206 + df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]}) + df.to_sql("d1187b08-4943-4c8d-a7f6", self.conn, index=False) + + res = sql.read_sql_query("SELECT * FROM `d1187b08-4943-4c8d-a7f6`", self.conn) + + tm.assert_frame_equal(res, df) + + +@pytest.mark.single +@pytest.mark.skipif(not SQLALCHEMY_INSTALLED, reason="SQLAlchemy not installed") +class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi): + """ + Test the public API as it would be used directly + + Tests for `read_sql_table` are included here, as this is specific for the + sqlalchemy mode. + + """ + + flavor = "sqlite" + mode = "sqlalchemy" + + def connect(self): + return sqlalchemy.create_engine("sqlite:///:memory:") + + def test_read_table_columns(self, test_frame1): + # test columns argument in read_table + sql.to_sql(test_frame1, "test_frame", self.conn) + + cols = ["A", "B"] + result = sql.read_sql_table("test_frame", self.conn, columns=cols) + assert result.columns.tolist() == cols + + def test_read_table_index_col(self, test_frame1): + # test columns argument in read_table + sql.to_sql(test_frame1, "test_frame", self.conn) + + result = sql.read_sql_table("test_frame", self.conn, index_col="index") + assert result.index.names == ["index"] + + result = sql.read_sql_table("test_frame", self.conn, index_col=["A", "B"]) + assert result.index.names == ["A", "B"] + + result = sql.read_sql_table( + "test_frame", self.conn, index_col=["A", "B"], columns=["C", "D"] + ) + assert result.index.names == ["A", "B"] + assert result.columns.tolist() == ["C", "D"] + + def test_read_sql_delegate(self): + iris_frame1 = sql.read_sql_query("SELECT * FROM iris", self.conn) + iris_frame2 = sql.read_sql("SELECT * FROM iris", self.conn) + tm.assert_frame_equal(iris_frame1, iris_frame2) + + iris_frame1 = sql.read_sql_table("iris", self.conn) + iris_frame2 = sql.read_sql("iris", self.conn) + tm.assert_frame_equal(iris_frame1, iris_frame2) + + def test_not_reflect_all_tables(self): + from sqlalchemy import text + from sqlalchemy.engine import Engine + + # create invalid table + query_list = [ + text("CREATE TABLE invalid (x INTEGER, y UNKNOWN);"), + text("CREATE TABLE other_table (x INTEGER, y INTEGER);"), + ] + for query in query_list: + if isinstance(self.conn, Engine): + with self.conn.connect() as conn: + with conn.begin(): + conn.execute(query) + else: + self.conn.execute(query) + + with tm.assert_produces_warning(None): + sql.read_sql_table("other_table", self.conn) + sql.read_sql_query("SELECT * FROM other_table", self.conn) + + def test_warning_case_insensitive_table_name(self, test_frame1): + # see gh-7815 + # + # We can't test that this warning is triggered, a the database + # configuration would have to be altered. But here we test that + # the warning is certainly NOT triggered in a normal case. + with tm.assert_produces_warning(None): + test_frame1.to_sql("CaseSensitive", self.conn) + + def _get_index_columns(self, tbl_name): + from sqlalchemy.engine import reflection + + insp = reflection.Inspector.from_engine(self.conn) + ixs = insp.get_indexes("test_index_saved") + ixs = [i["column_names"] for i in ixs] + return ixs + + def test_sqlalchemy_type_mapping(self): + from sqlalchemy import TIMESTAMP + + # Test Timestamp objects (no datetime64 because of timezone) (GH9085) + df = DataFrame( + {"time": to_datetime(["201412120154", "201412110254"], utc=True)} + ) + db = sql.SQLDatabase(self.conn) + table = sql.SQLTable("test_type", db, frame=df) + # GH 9086: TIMESTAMP is the suggested type for datetimes with timezones + assert isinstance(table.table.c["time"].type, TIMESTAMP) + + @pytest.mark.parametrize( + "integer, expected", + [ + ("int8", "SMALLINT"), + ("Int8", "SMALLINT"), + ("uint8", "SMALLINT"), + ("UInt8", "SMALLINT"), + ("int16", "SMALLINT"), + ("Int16", "SMALLINT"), + ("uint16", "INTEGER"), + ("UInt16", "INTEGER"), + ("int32", "INTEGER"), + ("Int32", "INTEGER"), + ("uint32", "BIGINT"), + ("UInt32", "BIGINT"), + ("int64", "BIGINT"), + ("Int64", "BIGINT"), + (int, "BIGINT" if np.dtype(int).name == "int64" else "INTEGER"), + ], + ) + def test_sqlalchemy_integer_mapping(self, integer, expected): + # GH35076 Map pandas integer to optimal SQLAlchemy integer type + df = DataFrame([0, 1], columns=["a"], dtype=integer) + db = sql.SQLDatabase(self.conn) + table = sql.SQLTable("test_type", db, frame=df) + + result = str(table.table.c.a.type) + assert result == expected + + @pytest.mark.parametrize("integer", ["uint64", "UInt64"]) + def test_sqlalchemy_integer_overload_mapping(self, integer): + # GH35076 Map pandas integer to optimal SQLAlchemy integer type + df = DataFrame([0, 1], columns=["a"], dtype=integer) + db = sql.SQLDatabase(self.conn) + with pytest.raises( + ValueError, match="Unsigned 64 bit integer datatype is not supported" + ): + sql.SQLTable("test_type", db, frame=df) + + def test_database_uri_string(self, test_frame1): + # Test read_sql and .to_sql method with a database URI (GH10654) + # db_uri = 'sqlite:///:memory:' # raises + # sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) near + # "iris": syntax error [SQL: 'iris'] + with tm.ensure_clean() as name: + db_uri = "sqlite:///" + name + table = "iris" + test_frame1.to_sql(table, db_uri, if_exists="replace", index=False) + test_frame2 = sql.read_sql(table, db_uri) + test_frame3 = sql.read_sql_table(table, db_uri) + query = "SELECT * FROM iris" + test_frame4 = sql.read_sql_query(query, db_uri) + tm.assert_frame_equal(test_frame1, test_frame2) + tm.assert_frame_equal(test_frame1, test_frame3) + tm.assert_frame_equal(test_frame1, test_frame4) + + @td.skip_if_installed("pg8000") + def test_pg8000_sqlalchemy_passthrough_error(self): + # using driver that will not be installed on CI to trigger error + # in sqlalchemy.create_engine -> test passing of this error to user + db_uri = "postgresql+pg8000://user:pass@host/dbname" + with pytest.raises(ImportError, match="pg8000"): + sql.read_sql("select * from table", db_uri) + + def test_query_by_text_obj(self): + # WIP : GH10846 + from sqlalchemy import text + + name_text = text("select * from iris where name=:name") + iris_df = sql.read_sql(name_text, self.conn, params={"name": "Iris-versicolor"}) + all_names = set(iris_df["Name"]) + assert all_names == {"Iris-versicolor"} + + def test_query_by_select_obj(self): + # WIP : GH10846 + from sqlalchemy import ( + bindparam, + select, + ) + + iris = iris_table_metadata(self.flavor) + iris_select = iris if _gt14() else [iris] + name_select = select(iris_select).where(iris.c.Name == bindparam("name")) + iris_df = sql.read_sql(name_select, self.conn, params={"name": "Iris-setosa"}) + all_names = set(iris_df["Name"]) + assert all_names == {"Iris-setosa"} + + def test_column_with_percentage(self): + # GH 37157 + df = DataFrame({"A": [0, 1, 2], "%_variation": [3, 4, 5]}) + df.to_sql("test_column_percentage", self.conn, index=False) + + res = sql.read_sql_table("test_column_percentage", self.conn) + + tm.assert_frame_equal(res, df) + + +class _EngineToConnMixin: + """ + A mixin that causes setup_connect to create a conn rather than an engine. + """ + + @pytest.fixture(autouse=True) + def setup_method(self, load_iris_data, load_types_data): + super().load_test_data_and_sql() + engine = self.conn + conn = engine.connect() + self.__tx = conn.begin() + self.pandasSQL = sql.SQLDatabase(conn) + self.__engine = engine + self.conn = conn + + yield + + self.__tx.rollback() + self.conn.close() + self.conn = self.__engine + self.pandasSQL = sql.SQLDatabase(self.__engine) + + +@pytest.mark.single +class TestSQLApiConn(_EngineToConnMixin, TestSQLApi): + pass + + +@pytest.mark.single +class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi): + """ + Test the public sqlite connection fallback API + + """ + + flavor = "sqlite" + mode = "fallback" + + def connect(self, database=":memory:"): + return sqlite3.connect(database) + + def test_sql_open_close(self, test_frame3): + # Test if the IO in the database still work if the connection closed + # between the writing and reading (as in many real situations). + + with tm.ensure_clean() as name: + + conn = self.connect(name) + assert sql.to_sql(test_frame3, "test_frame3_legacy", conn, index=False) == 4 + conn.close() + + conn = self.connect(name) + result = sql.read_sql_query("SELECT * FROM test_frame3_legacy;", conn) + conn.close() + + tm.assert_frame_equal(test_frame3, result) + + @pytest.mark.skipif(SQLALCHEMY_INSTALLED, reason="SQLAlchemy is installed") + def test_con_string_import_error(self): + conn = "mysql://root@localhost/pandas" + msg = "Using URI string without sqlalchemy installed" + with pytest.raises(ImportError, match=msg): + sql.read_sql("SELECT * FROM iris", conn) + + @pytest.mark.skipif(SQLALCHEMY_INSTALLED, reason="SQLAlchemy is installed") + def test_con_unknown_dbapi2_class_does_not_error_without_sql_alchemy_installed( + self, + ): + class MockSqliteConnection: + def __init__(self, *args, **kwargs): + self.conn = sqlite3.Connection(*args, **kwargs) + + def __getattr__(self, name): + return getattr(self.conn, name) + + conn = MockSqliteConnection(":memory:") + with tm.assert_produces_warning(UserWarning): + sql.read_sql("SELECT 1", conn) + + def test_read_sql_delegate(self): + iris_frame1 = sql.read_sql_query("SELECT * FROM iris", self.conn) + iris_frame2 = sql.read_sql("SELECT * FROM iris", self.conn) + tm.assert_frame_equal(iris_frame1, iris_frame2) + + msg = "Execution failed on sql 'iris': near \"iris\": syntax error" + with pytest.raises(sql.DatabaseError, match=msg): + sql.read_sql("iris", self.conn) + + def test_get_schema2(self, test_frame1): + # without providing a connection object (available for backwards comp) + create_sql = sql.get_schema(test_frame1, "test") + assert "CREATE" in create_sql + + def _get_sqlite_column_type(self, schema, column): + + for col in schema.split("\n"): + if col.split()[0].strip('""') == column: + return col.split()[1] + raise ValueError(f"Column {column} not found") + + def test_sqlite_type_mapping(self): + + # Test Timestamp objects (no datetime64 because of timezone) (GH9085) + df = DataFrame( + {"time": to_datetime(["201412120154", "201412110254"], utc=True)} + ) + db = sql.SQLiteDatabase(self.conn) + table = sql.SQLiteTable("test_type", db, frame=df) + schema = table.sql_schema() + assert self._get_sqlite_column_type(schema, "time") == "TIMESTAMP" + + +# ----------------------------------------------------------------------------- +# -- Database flavor specific tests + + +class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest): + """ + Base class for testing the sqlalchemy backend. + + Subclasses for specific database types are created below. Tests that + deviate for each flavor are overwritten there. + + """ + + flavor: str + + @pytest.fixture(autouse=True, scope="class") + def setup_class(cls): + cls.setup_import() + cls.setup_driver() + conn = cls.conn = cls.connect() + conn.connect() + + def load_test_data_and_sql(self): + pass + + @pytest.fixture(autouse=True) + def setup_method(self, load_iris_data, load_types_data): + pass + + @classmethod + def setup_import(cls): + # Skip this test if SQLAlchemy not available + if not SQLALCHEMY_INSTALLED: + pytest.skip("SQLAlchemy not installed") + + @classmethod + def setup_driver(cls): + raise NotImplementedError() + + @classmethod + def connect(cls): + raise NotImplementedError() + + def setup_connect(self): + try: + self.conn = self.connect() + self.pandasSQL = sql.SQLDatabase(self.conn) + # to test if connection can be made: + self.conn.connect() + except sqlalchemy.exc.OperationalError: + pytest.skip(f"Can't connect to {self.flavor} server") + + def test_read_sql_parameter(self): + self._read_sql_iris_parameter() + + def test_read_sql_named_parameter(self): + self._read_sql_iris_named_parameter() + + def test_to_sql_empty(self, test_frame1): + self._to_sql_empty(test_frame1) + + def test_create_table(self): + temp_conn = self.connect() + temp_frame = DataFrame( + {"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]} + ) + + pandasSQL = sql.SQLDatabase(temp_conn) + assert pandasSQL.to_sql(temp_frame, "temp_frame") == 4 + + if _gt14(): + from sqlalchemy import inspect + + insp = inspect(temp_conn) + assert insp.has_table("temp_frame") + else: + assert temp_conn.has_table("temp_frame") + + def test_drop_table(self): + temp_conn = self.connect() + + temp_frame = DataFrame( + {"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]} + ) + + pandasSQL = sql.SQLDatabase(temp_conn) + assert pandasSQL.to_sql(temp_frame, "temp_frame") == 4 + + if _gt14(): + from sqlalchemy import inspect + + insp = inspect(temp_conn) + assert insp.has_table("temp_frame") + else: + assert temp_conn.has_table("temp_frame") + + pandasSQL.drop_table("temp_frame") + + if _gt14(): + assert not insp.has_table("temp_frame") + else: + assert not temp_conn.has_table("temp_frame") + + def test_roundtrip(self, test_frame1): + self._roundtrip(test_frame1) + + def test_execute_sql(self): + self._execute_sql() + + def test_read_table(self): + iris_frame = sql.read_sql_table("iris", con=self.conn) + check_iris_frame(iris_frame) + + def test_read_table_columns(self): + iris_frame = sql.read_sql_table( + "iris", con=self.conn, columns=["SepalLength", "SepalLength"] + ) + tm.equalContents(iris_frame.columns.values, ["SepalLength", "SepalLength"]) + + def test_read_table_absent_raises(self): + msg = "Table this_doesnt_exist not found" + with pytest.raises(ValueError, match=msg): + sql.read_sql_table("this_doesnt_exist", con=self.conn) + + def test_default_type_conversion(self): + df = sql.read_sql_table("types", self.conn) + + assert issubclass(df.FloatCol.dtype.type, np.floating) + assert issubclass(df.IntCol.dtype.type, np.integer) + assert issubclass(df.BoolCol.dtype.type, np.bool_) + + # Int column with NA values stays as float + assert issubclass(df.IntColWithNull.dtype.type, np.floating) + # Bool column with NA values becomes object + assert issubclass(df.BoolColWithNull.dtype.type, object) + + def test_bigint(self): + # int64 should be converted to BigInteger, GH7433 + df = DataFrame(data={"i64": [2 ** 62]}) + assert df.to_sql("test_bigint", self.conn, index=False) == 1 + result = sql.read_sql_table("test_bigint", self.conn) + + tm.assert_frame_equal(df, result) + + def test_default_date_load(self): + df = sql.read_sql_table("types", self.conn) + + # IMPORTANT - sqlite has no native date type, so shouldn't parse, but + # MySQL SHOULD be converted. + assert issubclass(df.DateCol.dtype.type, np.datetime64) + + def test_datetime_with_timezone(self): + # edge case that converts postgresql datetime with time zone types + # to datetime64[ns,psycopg2.tz.FixedOffsetTimezone..], which is ok + # but should be more natural, so coerce to datetime64[ns] for now + + def check(col): + # check that a column is either datetime64[ns] + # or datetime64[ns, UTC] + if is_datetime64_dtype(col.dtype): + + # "2000-01-01 00:00:00-08:00" should convert to + # "2000-01-01 08:00:00" + assert col[0] == Timestamp("2000-01-01 08:00:00") + + # "2000-06-01 00:00:00-07:00" should convert to + # "2000-06-01 07:00:00" + assert col[1] == Timestamp("2000-06-01 07:00:00") + + elif is_datetime64tz_dtype(col.dtype): + assert str(col.dt.tz) == "UTC" + + # "2000-01-01 00:00:00-08:00" should convert to + # "2000-01-01 08:00:00" + # "2000-06-01 00:00:00-07:00" should convert to + # "2000-06-01 07:00:00" + # GH 6415 + expected_data = [ + Timestamp("2000-01-01 08:00:00", tz="UTC"), + Timestamp("2000-06-01 07:00:00", tz="UTC"), + ] + expected = Series(expected_data, name=col.name) + tm.assert_series_equal(col, expected) + + else: + raise AssertionError( + f"DateCol loaded with incorrect type -> {col.dtype}" + ) + + # GH11216 + df = read_sql_query("select * from types", self.conn) + if not hasattr(df, "DateColWithTz"): + pytest.skip("no column with datetime with time zone") + + # this is parsed on Travis (linux), but not on macosx for some reason + # even with the same versions of psycopg2 & sqlalchemy, possibly a + # Postgresql server version difference + col = df.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + + df = read_sql_query( + "select * from types", self.conn, parse_dates=["DateColWithTz"] + ) + if not hasattr(df, "DateColWithTz"): + pytest.skip("no column with datetime with time zone") + col = df.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + assert str(col.dt.tz) == "UTC" + check(df.DateColWithTz) + + df = concat( + list(read_sql_query("select * from types", self.conn, chunksize=1)), + ignore_index=True, + ) + col = df.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + assert str(col.dt.tz) == "UTC" + expected = sql.read_sql_table("types", self.conn) + col = expected.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz) + + # xref #7139 + # this might or might not be converted depending on the postgres driver + df = sql.read_sql_table("types", self.conn) + check(df.DateColWithTz) + + def test_datetime_with_timezone_roundtrip(self): + # GH 9086 + # Write datetimetz data to a db and read it back + # For dbs that support timestamps with timezones, should get back UTC + # otherwise naive data should be returned + expected = DataFrame( + {"A": date_range("2013-01-01 09:00:00", periods=3, tz="US/Pacific")} + ) + assert expected.to_sql("test_datetime_tz", self.conn, index=False) == 3 + + if self.flavor == "postgresql": + # SQLAlchemy "timezones" (i.e. offsets) are coerced to UTC + expected["A"] = expected["A"].dt.tz_convert("UTC") + else: + # Otherwise, timestamps are returned as local, naive + expected["A"] = expected["A"].dt.tz_localize(None) + + result = sql.read_sql_table("test_datetime_tz", self.conn) + tm.assert_frame_equal(result, expected) + + result = sql.read_sql_query("SELECT * FROM test_datetime_tz", self.conn) + if self.flavor == "sqlite": + # read_sql_query does not return datetime type like read_sql_table + assert isinstance(result.loc[0, "A"], str) + result["A"] = to_datetime(result["A"]) + tm.assert_frame_equal(result, expected) + + def test_out_of_bounds_datetime(self): + # GH 26761 + data = DataFrame({"date": datetime(9999, 1, 1)}, index=[0]) + assert data.to_sql("test_datetime_obb", self.conn, index=False) == 1 + result = sql.read_sql_table("test_datetime_obb", self.conn) + expected = DataFrame([pd.NaT], columns=["date"]) + tm.assert_frame_equal(result, expected) + + def test_naive_datetimeindex_roundtrip(self): + # GH 23510 + # Ensure that a naive DatetimeIndex isn't converted to UTC + dates = date_range("2018-01-01", periods=5, freq="6H")._with_freq(None) + expected = DataFrame({"nums": range(5)}, index=dates) + assert expected.to_sql("foo_table", self.conn, index_label="info_date") == 5 + result = sql.read_sql_table("foo_table", self.conn, index_col="info_date") + # result index with gain a name from a set_index operation; expected + tm.assert_frame_equal(result, expected, check_names=False) + + def test_date_parsing(self): + # No Parsing + df = sql.read_sql_table("types", self.conn) + expected_type = object if self.flavor == "sqlite" else np.datetime64 + assert issubclass(df.DateCol.dtype.type, expected_type) + + df = sql.read_sql_table("types", self.conn, parse_dates=["DateCol"]) + assert issubclass(df.DateCol.dtype.type, np.datetime64) + + df = sql.read_sql_table( + "types", self.conn, parse_dates={"DateCol": "%Y-%m-%d %H:%M:%S"} + ) + assert issubclass(df.DateCol.dtype.type, np.datetime64) + + df = sql.read_sql_table( + "types", + self.conn, + parse_dates={"DateCol": {"format": "%Y-%m-%d %H:%M:%S"}}, + ) + assert issubclass(df.DateCol.dtype.type, np.datetime64) + + df = sql.read_sql_table("types", self.conn, parse_dates=["IntDateCol"]) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + + df = sql.read_sql_table("types", self.conn, parse_dates={"IntDateCol": "s"}) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + + df = sql.read_sql_table( + "types", self.conn, parse_dates={"IntDateCol": {"unit": "s"}} + ) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + + def test_datetime(self): + df = DataFrame( + {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)} + ) + assert df.to_sql("test_datetime", self.conn) == 3 + + # with read_table -> type information from schema used + result = sql.read_sql_table("test_datetime", self.conn) + result = result.drop("index", axis=1) + tm.assert_frame_equal(result, df) + + # with read_sql -> no type information -> sqlite has no native + result = sql.read_sql_query("SELECT * FROM test_datetime", self.conn) + result = result.drop("index", axis=1) + if self.flavor == "sqlite": + assert isinstance(result.loc[0, "A"], str) + result["A"] = to_datetime(result["A"]) + tm.assert_frame_equal(result, df) + else: + tm.assert_frame_equal(result, df) + + def test_datetime_NaT(self): + df = DataFrame( + {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)} + ) + df.loc[1, "A"] = np.nan + assert df.to_sql("test_datetime", self.conn, index=False) == 3 + + # with read_table -> type information from schema used + result = sql.read_sql_table("test_datetime", self.conn) + tm.assert_frame_equal(result, df) + + # with read_sql -> no type information -> sqlite has no native + result = sql.read_sql_query("SELECT * FROM test_datetime", self.conn) + if self.flavor == "sqlite": + assert isinstance(result.loc[0, "A"], str) + result["A"] = to_datetime(result["A"], errors="coerce") + tm.assert_frame_equal(result, df) + else: + tm.assert_frame_equal(result, df) + + def test_datetime_date(self): + # test support for datetime.date + df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) + assert df.to_sql("test_date", self.conn, index=False) == 2 + res = read_sql_table("test_date", self.conn) + result = res["a"] + expected = to_datetime(df["a"]) + # comes back as datetime64 + tm.assert_series_equal(result, expected) + + def test_datetime_time(self): + # test support for datetime.time + df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"]) + assert df.to_sql("test_time", self.conn, index=False) == 2 + res = read_sql_table("test_time", self.conn) + tm.assert_frame_equal(res, df) + + # GH8341 + # first, use the fallback to have the sqlite adapter put in place + sqlite_conn = TestSQLiteFallback.connect() + assert sql.to_sql(df, "test_time2", sqlite_conn, index=False) == 2 + res = sql.read_sql_query("SELECT * FROM test_time2", sqlite_conn) + ref = df.applymap(lambda _: _.strftime("%H:%M:%S.%f")) + tm.assert_frame_equal(ref, res) # check if adapter is in place + # then test if sqlalchemy is unaffected by the sqlite adapter + assert sql.to_sql(df, "test_time3", self.conn, index=False) == 2 + if self.flavor == "sqlite": + res = sql.read_sql_query("SELECT * FROM test_time3", self.conn) + ref = df.applymap(lambda _: _.strftime("%H:%M:%S.%f")) + tm.assert_frame_equal(ref, res) + res = sql.read_sql_table("test_time3", self.conn) + tm.assert_frame_equal(df, res) + + def test_mixed_dtype_insert(self): + # see GH6509 + s1 = Series(2 ** 25 + 1, dtype=np.int32) + s2 = Series(0.0, dtype=np.float32) + df = DataFrame({"s1": s1, "s2": s2}) + + # write and read again + assert df.to_sql("test_read_write", self.conn, index=False) == 1 + df2 = sql.read_sql_table("test_read_write", self.conn) + + tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True) + + def test_nan_numeric(self): + # NaNs in numeric float column + df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]}) + assert df.to_sql("test_nan", self.conn, index=False) == 3 + + # with read_table + result = sql.read_sql_table("test_nan", self.conn) + tm.assert_frame_equal(result, df) + + # with read_sql + result = sql.read_sql_query("SELECT * FROM test_nan", self.conn) + tm.assert_frame_equal(result, df) + + def test_nan_fullcolumn(self): + # full NaN column (numeric float column) + df = DataFrame({"A": [0, 1, 2], "B": [np.nan, np.nan, np.nan]}) + assert df.to_sql("test_nan", self.conn, index=False) == 3 + + # with read_table + result = sql.read_sql_table("test_nan", self.conn) + tm.assert_frame_equal(result, df) + + # with read_sql -> not type info from table -> stays None + df["B"] = df["B"].astype("object") + df["B"] = None + result = sql.read_sql_query("SELECT * FROM test_nan", self.conn) + tm.assert_frame_equal(result, df) + + def test_nan_string(self): + # NaNs in string column + df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", np.nan]}) + assert df.to_sql("test_nan", self.conn, index=False) == 3 + + # NaNs are coming back as None + df.loc[2, "B"] = None + + # with read_table + result = sql.read_sql_table("test_nan", self.conn) + tm.assert_frame_equal(result, df) + + # with read_sql + result = sql.read_sql_query("SELECT * FROM test_nan", self.conn) + tm.assert_frame_equal(result, df) + + def _get_index_columns(self, tbl_name): + from sqlalchemy import inspect + + insp = inspect(self.conn) + + ixs = insp.get_indexes(tbl_name) + ixs = [i["column_names"] for i in ixs] + return ixs + + def test_to_sql_save_index(self): + self._to_sql_save_index() + + def test_transactions(self): + self._transaction_test() + + def test_get_schema_create_table(self, test_frame3): + # Use a dataframe without a bool column, since MySQL converts bool to + # TINYINT (which read_sql_table returns as an int and causes a dtype + # mismatch) + from sqlalchemy import text + from sqlalchemy.engine import Engine + + tbl = "test_get_schema_create_table" + create_sql = sql.get_schema(test_frame3, tbl, con=self.conn) + blank_test_df = test_frame3.iloc[:0] + + self.drop_table(tbl) + create_sql = text(create_sql) + if isinstance(self.conn, Engine): + with self.conn.connect() as conn: + with conn.begin(): + conn.execute(create_sql) + else: + self.conn.execute(create_sql) + returned_df = sql.read_sql_table(tbl, self.conn) + tm.assert_frame_equal(returned_df, blank_test_df, check_index_type=False) + self.drop_table(tbl) + + def test_dtype(self): + from sqlalchemy import ( + TEXT, + String, + ) + from sqlalchemy.schema import MetaData + + cols = ["A", "B"] + data = [(0.8, True), (0.9, None)] + df = DataFrame(data, columns=cols) + assert df.to_sql("dtype_test", self.conn) == 2 + assert df.to_sql("dtype_test2", self.conn, dtype={"B": TEXT}) == 2 + meta = MetaData() + meta.reflect(bind=self.conn) + sqltype = meta.tables["dtype_test2"].columns["B"].type + assert isinstance(sqltype, TEXT) + msg = "The type of B is not a SQLAlchemy type" + with pytest.raises(ValueError, match=msg): + df.to_sql("error", self.conn, dtype={"B": str}) + + # GH9083 + assert df.to_sql("dtype_test3", self.conn, dtype={"B": String(10)}) == 2 + meta.reflect(bind=self.conn) + sqltype = meta.tables["dtype_test3"].columns["B"].type + assert isinstance(sqltype, String) + assert sqltype.length == 10 + + # single dtype + assert df.to_sql("single_dtype_test", self.conn, dtype=TEXT) == 2 + meta.reflect(bind=self.conn) + sqltypea = meta.tables["single_dtype_test"].columns["A"].type + sqltypeb = meta.tables["single_dtype_test"].columns["B"].type + assert isinstance(sqltypea, TEXT) + assert isinstance(sqltypeb, TEXT) + + def test_notna_dtype(self): + from sqlalchemy import ( + Boolean, + DateTime, + Float, + Integer, + ) + from sqlalchemy.schema import MetaData + + cols = { + "Bool": Series([True, None]), + "Date": Series([datetime(2012, 5, 1), None]), + "Int": Series([1, None], dtype="object"), + "Float": Series([1.1, None]), + } + df = DataFrame(cols) + + tbl = "notna_dtype_test" + assert df.to_sql(tbl, self.conn) == 2 + _ = sql.read_sql_table(tbl, self.conn) + meta = MetaData() + meta.reflect(bind=self.conn) + my_type = Integer if self.flavor == "mysql" else Boolean + col_dict = meta.tables[tbl].columns + assert isinstance(col_dict["Bool"].type, my_type) + assert isinstance(col_dict["Date"].type, DateTime) + assert isinstance(col_dict["Int"].type, Integer) + assert isinstance(col_dict["Float"].type, Float) + + def test_double_precision(self): + from sqlalchemy import ( + BigInteger, + Float, + Integer, + ) + from sqlalchemy.schema import MetaData + + V = 1.23456789101112131415 + + df = DataFrame( + { + "f32": Series([V], dtype="float32"), + "f64": Series([V], dtype="float64"), + "f64_as_f32": Series([V], dtype="float64"), + "i32": Series([5], dtype="int32"), + "i64": Series([5], dtype="int64"), + } + ) + + assert ( + df.to_sql( + "test_dtypes", + self.conn, + index=False, + if_exists="replace", + dtype={"f64_as_f32": Float(precision=23)}, + ) + == 1 + ) + res = sql.read_sql_table("test_dtypes", self.conn) + + # check precision of float64 + assert np.round(df["f64"].iloc[0], 14) == np.round(res["f64"].iloc[0], 14) + + # check sql types + meta = MetaData() + meta.reflect(bind=self.conn) + col_dict = meta.tables["test_dtypes"].columns + assert str(col_dict["f32"].type) == str(col_dict["f64_as_f32"].type) + assert isinstance(col_dict["f32"].type, Float) + assert isinstance(col_dict["f64"].type, Float) + assert isinstance(col_dict["i32"].type, Integer) + assert isinstance(col_dict["i64"].type, BigInteger) + + def test_connectable_issue_example(self): + # This tests the example raised in issue + # https://github.com/pandas-dev/pandas/issues/10104 + from sqlalchemy.engine import Engine + + def foo(connection): + query = "SELECT test_foo_data FROM test_foo_data" + return sql.read_sql_query(query, con=connection) + + def bar(connection, data): + data.to_sql(name="test_foo_data", con=connection, if_exists="append") + + def baz(conn): + if _gt14(): + # https://github.com/sqlalchemy/sqlalchemy/commit/ + # 00b5c10846e800304caa86549ab9da373b42fa5d#r48323973 + foo_data = foo(conn) + bar(conn, foo_data) + else: + foo_data = conn.run_callable(foo) + conn.run_callable(bar, foo_data) + + def main(connectable): + if isinstance(connectable, Engine): + with connectable.connect() as conn: + with conn.begin(): + baz(conn) + else: + baz(connectable) + + assert ( + DataFrame({"test_foo_data": [0, 1, 2]}).to_sql("test_foo_data", self.conn) + == 3 + ) + main(self.conn) + + @pytest.mark.parametrize( + "input", + [{"foo": [np.inf]}, {"foo": [-np.inf]}, {"foo": [-np.inf], "infe0": ["bar"]}], + ) + def test_to_sql_with_negative_npinf(self, input, request): + # GH 34431 + + df = DataFrame(input) + + if self.flavor == "mysql": + # GH 36465 + # The input {"foo": [-np.inf], "infe0": ["bar"]} does not raise any error + # for pymysql version >= 0.10 + # TODO(GH#36465): remove this version check after GH 36465 is fixed + import pymysql + + if pymysql.VERSION[0:3] >= (0, 10, 0) and "infe0" in df.columns: + mark = pytest.mark.xfail(reason="GH 36465") + request.node.add_marker(mark) + + msg = "inf cannot be used with MySQL" + with pytest.raises(ValueError, match=msg): + df.to_sql("foobar", self.conn, index=False) + else: + assert df.to_sql("foobar", self.conn, index=False) == 1 + res = sql.read_sql_table("foobar", self.conn) + tm.assert_equal(df, res) + + def test_temporary_table(self): + from sqlalchemy import ( + Column, + Integer, + Unicode, + select, + ) + from sqlalchemy.orm import ( + Session, + sessionmaker, + ) + + if _gt14(): + from sqlalchemy.orm import declarative_base + else: + from sqlalchemy.ext.declarative import declarative_base + + test_data = "Hello, World!" + expected = DataFrame({"spam": [test_data]}) + Base = declarative_base() + + class Temporary(Base): + __tablename__ = "temp_test" + __table_args__ = {"prefixes": ["TEMPORARY"]} + id = Column(Integer, primary_key=True) + spam = Column(Unicode(30), nullable=False) + + if _gt14(): + with Session(self.conn) as session: + with session.begin(): + conn = session.connection() + Temporary.__table__.create(conn) + session.add(Temporary(spam=test_data)) + session.flush() + df = sql.read_sql_query(sql=select(Temporary.spam), con=conn) + else: + Session = sessionmaker() + session = Session(bind=self.conn) + with session.transaction: + conn = session.connection() + Temporary.__table__.create(conn) + session.add(Temporary(spam=test_data)) + session.flush() + df = sql.read_sql_query(sql=select([Temporary.spam]), con=conn) + + tm.assert_frame_equal(df, expected) + + # -- SQL Engine tests (in the base class for now) + def test_invalid_engine(self, test_frame1): + msg = "engine must be one of 'auto', 'sqlalchemy'" + with pytest.raises(ValueError, match=msg): + self._to_sql_with_sql_engine(test_frame1, "bad_engine") + + def test_options_sqlalchemy(self, test_frame1): + # use the set option + with pd.option_context("io.sql.engine", "sqlalchemy"): + self._to_sql_with_sql_engine(test_frame1) + + def test_options_auto(self, test_frame1): + # use the set option + with pd.option_context("io.sql.engine", "auto"): + self._to_sql_with_sql_engine(test_frame1) + + def test_options_get_engine(self): + assert isinstance(get_engine("sqlalchemy"), SQLAlchemyEngine) + + with pd.option_context("io.sql.engine", "sqlalchemy"): + assert isinstance(get_engine("auto"), SQLAlchemyEngine) + assert isinstance(get_engine("sqlalchemy"), SQLAlchemyEngine) + + with pd.option_context("io.sql.engine", "auto"): + assert isinstance(get_engine("auto"), SQLAlchemyEngine) + assert isinstance(get_engine("sqlalchemy"), SQLAlchemyEngine) + + def test_get_engine_auto_error_message(self): + # Expect different error messages from get_engine(engine="auto") + # if engines aren't installed vs. are installed but bad version + pass + # TODO(GH#36893) fill this in when we add more engines + + +class _TestSQLAlchemyConn(_EngineToConnMixin, _TestSQLAlchemy): + def test_transactions(self): + pytest.skip("Nested transactions rollbacks don't work with Pandas") + + +class _TestSQLiteAlchemy: + """ + Test the sqlalchemy backend against an in-memory sqlite database. + + """ + + flavor = "sqlite" + + @classmethod + def connect(cls): + return sqlalchemy.create_engine("sqlite:///:memory:") + + @classmethod + def setup_driver(cls): + # sqlite3 is built-in + cls.driver = None + + def test_default_type_conversion(self): + df = sql.read_sql_table("types", self.conn) + + assert issubclass(df.FloatCol.dtype.type, np.floating) + assert issubclass(df.IntCol.dtype.type, np.integer) + + # sqlite has no boolean type, so integer type is returned + assert issubclass(df.BoolCol.dtype.type, np.integer) + + # Int column with NA values stays as float + assert issubclass(df.IntColWithNull.dtype.type, np.floating) + + # Non-native Bool column with NA values stays as float + assert issubclass(df.BoolColWithNull.dtype.type, np.floating) + + def test_default_date_load(self): + df = sql.read_sql_table("types", self.conn) + + # IMPORTANT - sqlite has no native date type, so shouldn't parse, but + assert not issubclass(df.DateCol.dtype.type, np.datetime64) + + def test_bigint_warning(self): + # test no warning for BIGINT (to support int64) is raised (GH7433) + df = DataFrame({"a": [1, 2]}, dtype="int64") + assert df.to_sql("test_bigintwarning", self.conn, index=False) == 2 + + with tm.assert_produces_warning(None): + sql.read_sql_table("test_bigintwarning", self.conn) + + def test_row_object_is_named_tuple(self): + # GH 40682 + # Test for the is_named_tuple() function + # Placed here due to its usage of sqlalchemy + + from sqlalchemy import ( + Column, + Integer, + String, + ) + from sqlalchemy.orm import sessionmaker + + if _gt14(): + from sqlalchemy.orm import declarative_base + else: + from sqlalchemy.ext.declarative import declarative_base + + BaseModel = declarative_base() + + class Test(BaseModel): + __tablename__ = "test_frame" + id = Column(Integer, primary_key=True) + foo = Column(String(50)) + + BaseModel.metadata.create_all(self.conn) + Session = sessionmaker(bind=self.conn) + session = Session() + + df = DataFrame({"id": [0, 1], "foo": ["hello", "world"]}) + assert ( + df.to_sql("test_frame", con=self.conn, index=False, if_exists="replace") + == 2 + ) + + session.commit() + foo = session.query(Test.id, Test.foo) + df = DataFrame(foo) + session.close() + + assert list(df.columns) == ["id", "foo"] + + +class _TestMySQLAlchemy: + """ + Test the sqlalchemy backend against an MySQL database. + + """ + + flavor = "mysql" + port = 3306 + + @classmethod + def connect(cls): + return sqlalchemy.create_engine( + f"mysql+{cls.driver}://root@localhost:{cls.port}/pandas", + connect_args=cls.connect_args, + ) + + @classmethod + def setup_driver(cls): + pymysql = pytest.importorskip("pymysql") + cls.driver = "pymysql" + cls.connect_args = {"client_flag": pymysql.constants.CLIENT.MULTI_STATEMENTS} + + def test_default_type_conversion(self): + pass + + +class _TestPostgreSQLAlchemy: + """ + Test the sqlalchemy backend against an PostgreSQL database. + + """ + + flavor = "postgresql" + port = 5432 + + @classmethod + def connect(cls): + return sqlalchemy.create_engine( + f"postgresql+{cls.driver}://postgres:postgres@localhost:{cls.port}/pandas" + ) + + @classmethod + def setup_driver(cls): + pytest.importorskip("psycopg2") + cls.driver = "psycopg2" + + def test_schema_support(self): + from sqlalchemy.engine import Engine + + # only test this for postgresql (schema's not supported in + # mysql/sqlite) + df = DataFrame({"col1": [1, 2], "col2": [0.1, 0.2], "col3": ["a", "n"]}) + + # create a schema + self.conn.execute("DROP SCHEMA IF EXISTS other CASCADE;") + self.conn.execute("CREATE SCHEMA other;") + + # write dataframe to different schema's + assert df.to_sql("test_schema_public", self.conn, index=False) == 2 + assert ( + df.to_sql( + "test_schema_public_explicit", self.conn, index=False, schema="public" + ) + == 2 + ) + assert ( + df.to_sql("test_schema_other", self.conn, index=False, schema="other") == 2 + ) + + # read dataframes back in + res1 = sql.read_sql_table("test_schema_public", self.conn) + tm.assert_frame_equal(df, res1) + res2 = sql.read_sql_table("test_schema_public_explicit", self.conn) + tm.assert_frame_equal(df, res2) + res3 = sql.read_sql_table( + "test_schema_public_explicit", self.conn, schema="public" + ) + tm.assert_frame_equal(df, res3) + res4 = sql.read_sql_table("test_schema_other", self.conn, schema="other") + tm.assert_frame_equal(df, res4) + msg = "Table test_schema_other not found" + with pytest.raises(ValueError, match=msg): + sql.read_sql_table("test_schema_other", self.conn, schema="public") + + # different if_exists options + + # create a schema + self.conn.execute("DROP SCHEMA IF EXISTS other CASCADE;") + self.conn.execute("CREATE SCHEMA other;") + + # write dataframe with different if_exists options + assert ( + df.to_sql("test_schema_other", self.conn, schema="other", index=False) == 2 + ) + df.to_sql( + "test_schema_other", + self.conn, + schema="other", + index=False, + if_exists="replace", + ) + assert ( + df.to_sql( + "test_schema_other", + self.conn, + schema="other", + index=False, + if_exists="append", + ) + == 2 + ) + res = sql.read_sql_table("test_schema_other", self.conn, schema="other") + tm.assert_frame_equal(concat([df, df], ignore_index=True), res) + + # specifying schema in user-provided meta + + # The schema won't be applied on another Connection + # because of transactional schemas + if isinstance(self.conn, Engine): + engine2 = self.connect() + pdsql = sql.SQLDatabase(engine2, schema="other") + assert pdsql.to_sql(df, "test_schema_other2", index=False) == 2 + assert ( + pdsql.to_sql(df, "test_schema_other2", index=False, if_exists="replace") + == 2 + ) + assert ( + pdsql.to_sql(df, "test_schema_other2", index=False, if_exists="append") + == 2 + ) + res1 = sql.read_sql_table("test_schema_other2", self.conn, schema="other") + res2 = pdsql.read_table("test_schema_other2") + tm.assert_frame_equal(res1, res2) + + +@pytest.mark.single +@pytest.mark.db +class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy): + pass + + +@pytest.mark.single +@pytest.mark.db +class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn): + pass + + +@pytest.mark.single +@pytest.mark.db +class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy): + pass + + +@pytest.mark.single +@pytest.mark.db +class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn): + pass + + +@pytest.mark.single +class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy): + pass + + +@pytest.mark.single +class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn): + pass + + +# ----------------------------------------------------------------------------- +# -- Test Sqlite / MySQL fallback + + +@pytest.mark.single +class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest): + """ + Test the fallback mode against an in-memory sqlite database. + + """ + + flavor = "sqlite" + + @classmethod + def connect(cls): + return sqlite3.connect(":memory:") + + def setup_connect(self): + self.conn = self.connect() + + @pytest.fixture(autouse=True) + def setup_method(self, load_iris_data, load_types_data): + self.pandasSQL = sql.SQLiteDatabase(self.conn) + + def test_read_sql_parameter(self): + self._read_sql_iris_parameter() + + def test_read_sql_named_parameter(self): + self._read_sql_iris_named_parameter() + + def test_to_sql_empty(self, test_frame1): + self._to_sql_empty(test_frame1) + + def test_create_and_drop_table(self): + temp_frame = DataFrame( + {"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]} + ) + + assert self.pandasSQL.to_sql(temp_frame, "drop_test_frame") == 4 + + assert self.pandasSQL.has_table("drop_test_frame") + + self.pandasSQL.drop_table("drop_test_frame") + + assert not self.pandasSQL.has_table("drop_test_frame") + + def test_roundtrip(self, test_frame1): + self._roundtrip(test_frame1) + + def test_execute_sql(self): + self._execute_sql() + + def test_datetime_date(self): + # test support for datetime.date + df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) + assert df.to_sql("test_date", self.conn, index=False) == 2 + res = read_sql_query("SELECT * FROM test_date", self.conn) + if self.flavor == "sqlite": + # comes back as strings + tm.assert_frame_equal(res, df.astype(str)) + elif self.flavor == "mysql": + tm.assert_frame_equal(res, df) + + def test_datetime_time(self): + # test support for datetime.time, GH #8341 + df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"]) + assert df.to_sql("test_time", self.conn, index=False) == 2 + res = read_sql_query("SELECT * FROM test_time", self.conn) + if self.flavor == "sqlite": + # comes back as strings + expected = df.applymap(lambda _: _.strftime("%H:%M:%S.%f")) + tm.assert_frame_equal(res, expected) + + def _get_index_columns(self, tbl_name): + ixs = sql.read_sql_query( + "SELECT * FROM sqlite_master WHERE type = 'index' " + + f"AND tbl_name = '{tbl_name}'", + self.conn, + ) + ix_cols = [] + for ix_name in ixs.name: + ix_info = sql.read_sql_query(f"PRAGMA index_info({ix_name})", self.conn) + ix_cols.append(ix_info.name.tolist()) + return ix_cols + + def test_to_sql_save_index(self): + self._to_sql_save_index() + + def test_transactions(self): + self._transaction_test() + + def _get_sqlite_column_type(self, table, column): + recs = self.conn.execute(f"PRAGMA table_info({table})") + for cid, name, ctype, not_null, default, pk in recs: + if name == column: + return ctype + raise ValueError(f"Table {table}, column {column} not found") + + def test_dtype(self): + if self.flavor == "mysql": + pytest.skip("Not applicable to MySQL legacy") + cols = ["A", "B"] + data = [(0.8, True), (0.9, None)] + df = DataFrame(data, columns=cols) + assert df.to_sql("dtype_test", self.conn) == 2 + assert df.to_sql("dtype_test2", self.conn, dtype={"B": "STRING"}) == 2 + + # sqlite stores Boolean values as INTEGER + assert self._get_sqlite_column_type("dtype_test", "B") == "INTEGER" + + assert self._get_sqlite_column_type("dtype_test2", "B") == "STRING" + msg = r"B \(\) not a string" + with pytest.raises(ValueError, match=msg): + df.to_sql("error", self.conn, dtype={"B": bool}) + + # single dtype + assert df.to_sql("single_dtype_test", self.conn, dtype="STRING") == 2 + assert self._get_sqlite_column_type("single_dtype_test", "A") == "STRING" + assert self._get_sqlite_column_type("single_dtype_test", "B") == "STRING" + + def test_notna_dtype(self): + if self.flavor == "mysql": + pytest.skip("Not applicable to MySQL legacy") + + cols = { + "Bool": Series([True, None]), + "Date": Series([datetime(2012, 5, 1), None]), + "Int": Series([1, None], dtype="object"), + "Float": Series([1.1, None]), + } + df = DataFrame(cols) + + tbl = "notna_dtype_test" + assert df.to_sql(tbl, self.conn) == 2 + + assert self._get_sqlite_column_type(tbl, "Bool") == "INTEGER" + assert self._get_sqlite_column_type(tbl, "Date") == "TIMESTAMP" + assert self._get_sqlite_column_type(tbl, "Int") == "INTEGER" + assert self._get_sqlite_column_type(tbl, "Float") == "REAL" + + def test_illegal_names(self): + # For sqlite, these should work fine + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + + msg = "Empty table or column name specified" + with pytest.raises(ValueError, match=msg): + df.to_sql("", self.conn) + + for ndx, weird_name in enumerate( + [ + "test_weird_name]", + "test_weird_name[", + "test_weird_name`", + 'test_weird_name"', + "test_weird_name'", + "_b.test_weird_name_01-30", + '"_b.test_weird_name_01-30"', + "99beginswithnumber", + "12345", + "\xe9", + ] + ): + assert df.to_sql(weird_name, self.conn) == 2 + sql.table_exists(weird_name, self.conn) + + df2 = DataFrame([[1, 2], [3, 4]], columns=["a", weird_name]) + c_tbl = f"test_weird_col_name{ndx:d}" + assert df2.to_sql(c_tbl, self.conn) == 2 + sql.table_exists(c_tbl, self.conn) + + +# ----------------------------------------------------------------------------- +# -- Old tests from 0.13.1 (before refactor using sqlalchemy) + + +_formatters = { + datetime: "'{}'".format, + str: "'{}'".format, + np.str_: "'{}'".format, + bytes: "'{}'".format, + float: "{:.8f}".format, + int: "{:d}".format, + type(None): lambda x: "NULL", + np.float64: "{:.10f}".format, + bool: "'{!s}'".format, +} + + +def format_query(sql, *args): + processed_args = [] + for arg in args: + if isinstance(arg, float) and isna(arg): + arg = None + + formatter = _formatters[type(arg)] + processed_args.append(formatter(arg)) + + return sql % tuple(processed_args) + + +def tquery(query, con=None): + """Replace removed sql.tquery function""" + res = sql.execute(query, con=con).fetchall() + return None if res is None else list(res) + + +class TestXSQLite: + def setup_method(self): + self.conn = sqlite3.connect(":memory:") + + def teardown_method(self): + self.conn.close() + + def drop_table(self, table_name): + cur = self.conn.cursor() + cur.execute(f"DROP TABLE IF EXISTS {sql._get_valid_sqlite_name(table_name)}") + self.conn.commit() + + def test_basic(self): + frame = tm.makeTimeDataFrame() + assert sql.to_sql(frame, name="test_table", con=self.conn, index=False) == 30 + result = sql.read_sql("select * from test_table", self.conn) + + # HACK! Change this once indexes are handled properly. + result.index = frame.index + + expected = frame + tm.assert_frame_equal(result, frame) + + frame["txt"] = ["a"] * len(frame) + frame2 = frame.copy() + new_idx = Index(np.arange(len(frame2))) + 10 + frame2["Idx"] = new_idx.copy() + assert sql.to_sql(frame2, name="test_table2", con=self.conn, index=False) == 30 + result = sql.read_sql("select * from test_table2", self.conn, index_col="Idx") + expected = frame.copy() + expected.index = new_idx + expected.index.name = "Idx" + tm.assert_frame_equal(expected, result) + + def test_write_row_by_row(self): + frame = tm.makeTimeDataFrame() + frame.iloc[0, 0] = np.nan + create_sql = sql.get_schema(frame, "test") + cur = self.conn.cursor() + cur.execute(create_sql) + + ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" + for _, row in frame.iterrows(): + fmt_sql = format_query(ins, *row) + tquery(fmt_sql, con=self.conn) + + self.conn.commit() + + result = sql.read_sql("select * from test", con=self.conn) + result.index = frame.index + tm.assert_frame_equal(result, frame, rtol=1e-3) + + def test_execute(self): + frame = tm.makeTimeDataFrame() + create_sql = sql.get_schema(frame, "test") + cur = self.conn.cursor() + cur.execute(create_sql) + ins = "INSERT INTO test VALUES (?, ?, ?, ?)" + + row = frame.iloc[0] + sql.execute(ins, self.conn, params=tuple(row)) + self.conn.commit() + + result = sql.read_sql("select * from test", self.conn) + result.index = frame.index[:1] + tm.assert_frame_equal(result, frame[:1]) + + def test_schema(self): + frame = tm.makeTimeDataFrame() + create_sql = sql.get_schema(frame, "test") + lines = create_sql.splitlines() + for line in lines: + tokens = line.split(" ") + if len(tokens) == 2 and tokens[0] == "A": + assert tokens[1] == "DATETIME" + + create_sql = sql.get_schema(frame, "test", keys=["A", "B"]) + lines = create_sql.splitlines() + assert 'PRIMARY KEY ("A", "B")' in create_sql + cur = self.conn.cursor() + cur.execute(create_sql) + + def test_execute_fail(self): + create_sql = """ + CREATE TABLE test + ( + a TEXT, + b TEXT, + c REAL, + PRIMARY KEY (a, b) + ); + """ + cur = self.conn.cursor() + cur.execute(create_sql) + + sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn) + sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.conn) + + with pytest.raises(sql.DatabaseError, match="Execution failed on sql"): + sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn) + + def test_execute_closed_connection(self): + create_sql = """ + CREATE TABLE test + ( + a TEXT, + b TEXT, + c REAL, + PRIMARY KEY (a, b) + ); + """ + cur = self.conn.cursor() + cur.execute(create_sql) + + sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn) + self.conn.close() + + msg = "Cannot operate on a closed database." + with pytest.raises(sqlite3.ProgrammingError, match=msg): + tquery("select * from test", con=self.conn) + + def test_keyword_as_column_names(self): + df = DataFrame({"From": np.ones(5)}) + assert sql.to_sql(df, con=self.conn, name="testkeywords", index=False) == 5 + + def test_onecolumn_of_integer(self): + # GH 3628 + # a column_of_integers dataframe should transfer well to sql + + mono_df = DataFrame([1, 2], columns=["c0"]) + assert sql.to_sql(mono_df, con=self.conn, name="mono_df", index=False) == 2 + # computing the sum via sql + con_x = self.conn + the_sum = sum(my_c0[0] for my_c0 in con_x.execute("select * from mono_df")) + # it should not fail, and gives 3 ( Issue #3628 ) + assert the_sum == 3 + + result = sql.read_sql("select * from mono_df", con_x) + tm.assert_frame_equal(result, mono_df) + + def test_if_exists(self): + df_if_exists_1 = DataFrame({"col1": [1, 2], "col2": ["A", "B"]}) + df_if_exists_2 = DataFrame({"col1": [3, 4, 5], "col2": ["C", "D", "E"]}) + table_name = "table_if_exists" + sql_select = f"SELECT * FROM {table_name}" + + msg = "'notvalidvalue' is not valid for if_exists" + with pytest.raises(ValueError, match=msg): + sql.to_sql( + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists="notvalidvalue", + ) + self.drop_table(table_name) + + # test if_exists='fail' + sql.to_sql( + frame=df_if_exists_1, con=self.conn, name=table_name, if_exists="fail" + ) + msg = "Table 'table_if_exists' already exists" + with pytest.raises(ValueError, match=msg): + sql.to_sql( + frame=df_if_exists_1, con=self.conn, name=table_name, if_exists="fail" + ) + # test if_exists='replace' + sql.to_sql( + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists="replace", + index=False, + ) + assert tquery(sql_select, con=self.conn) == [(1, "A"), (2, "B")] + assert ( + sql.to_sql( + frame=df_if_exists_2, + con=self.conn, + name=table_name, + if_exists="replace", + index=False, + ) + == 3 + ) + assert tquery(sql_select, con=self.conn) == [(3, "C"), (4, "D"), (5, "E")] + self.drop_table(table_name) + + # test if_exists='append' + assert ( + sql.to_sql( + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists="fail", + index=False, + ) + == 2 + ) + assert tquery(sql_select, con=self.conn) == [(1, "A"), (2, "B")] + assert ( + sql.to_sql( + frame=df_if_exists_2, + con=self.conn, + name=table_name, + if_exists="append", + index=False, + ) + == 3 + ) + assert tquery(sql_select, con=self.conn) == [ + (1, "A"), + (2, "B"), + (3, "C"), + (4, "D"), + (5, "E"), + ] + self.drop_table(table_name) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_stata.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_stata.py new file mode 100644 index 0000000..f0fd391 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_stata.py @@ -0,0 +1,2214 @@ +import bz2 +import datetime as dt +from datetime import datetime +import gzip +import io +import os +import struct +import warnings +import zipfile + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_categorical_dtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.frame import ( + DataFrame, + Series, +) +from pandas.core.indexes.api import ensure_index + +import pandas.io.common as icom +from pandas.io.parsers import read_csv +from pandas.io.stata import ( + CategoricalConversionWarning, + InvalidColumnName, + PossiblePrecisionLoss, + StataMissingValue, + StataReader, + StataWriter, + StataWriterUTF8, + ValueLabelTypeMismatch, + read_stata, +) + + +@pytest.fixture() +def mixed_frame(): + return DataFrame( + { + "a": [1, 2, 3, 4], + "b": [1.0, 3.0, 27.0, 81.0], + "c": ["Atlanta", "Birmingham", "Cincinnati", "Detroit"], + } + ) + + +@pytest.fixture +def dirpath(datapath): + return datapath("io", "data", "stata") + + +@pytest.fixture +def parsed_114(dirpath): + dta14_114 = os.path.join(dirpath, "stata5_114.dta") + parsed_114 = read_stata(dta14_114, convert_dates=True) + parsed_114.index.name = "index" + return parsed_114 + + +class TestStata: + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + self.dirpath = datapath("io", "data", "stata") + self.dta1_114 = os.path.join(self.dirpath, "stata1_114.dta") + self.dta1_117 = os.path.join(self.dirpath, "stata1_117.dta") + + self.dta2_113 = os.path.join(self.dirpath, "stata2_113.dta") + self.dta2_114 = os.path.join(self.dirpath, "stata2_114.dta") + self.dta2_115 = os.path.join(self.dirpath, "stata2_115.dta") + self.dta2_117 = os.path.join(self.dirpath, "stata2_117.dta") + + self.dta3_113 = os.path.join(self.dirpath, "stata3_113.dta") + self.dta3_114 = os.path.join(self.dirpath, "stata3_114.dta") + self.dta3_115 = os.path.join(self.dirpath, "stata3_115.dta") + self.dta3_117 = os.path.join(self.dirpath, "stata3_117.dta") + self.csv3 = os.path.join(self.dirpath, "stata3.csv") + + self.dta4_113 = os.path.join(self.dirpath, "stata4_113.dta") + self.dta4_114 = os.path.join(self.dirpath, "stata4_114.dta") + self.dta4_115 = os.path.join(self.dirpath, "stata4_115.dta") + self.dta4_117 = os.path.join(self.dirpath, "stata4_117.dta") + + self.dta_encoding = os.path.join(self.dirpath, "stata1_encoding.dta") + self.dta_encoding_118 = os.path.join(self.dirpath, "stata1_encoding_118.dta") + + self.csv14 = os.path.join(self.dirpath, "stata5.csv") + self.dta14_113 = os.path.join(self.dirpath, "stata5_113.dta") + self.dta14_114 = os.path.join(self.dirpath, "stata5_114.dta") + self.dta14_115 = os.path.join(self.dirpath, "stata5_115.dta") + self.dta14_117 = os.path.join(self.dirpath, "stata5_117.dta") + + self.csv15 = os.path.join(self.dirpath, "stata6.csv") + self.dta15_113 = os.path.join(self.dirpath, "stata6_113.dta") + self.dta15_114 = os.path.join(self.dirpath, "stata6_114.dta") + self.dta15_115 = os.path.join(self.dirpath, "stata6_115.dta") + self.dta15_117 = os.path.join(self.dirpath, "stata6_117.dta") + + self.dta16_115 = os.path.join(self.dirpath, "stata7_115.dta") + self.dta16_117 = os.path.join(self.dirpath, "stata7_117.dta") + + self.dta17_113 = os.path.join(self.dirpath, "stata8_113.dta") + self.dta17_115 = os.path.join(self.dirpath, "stata8_115.dta") + self.dta17_117 = os.path.join(self.dirpath, "stata8_117.dta") + + self.dta18_115 = os.path.join(self.dirpath, "stata9_115.dta") + self.dta18_117 = os.path.join(self.dirpath, "stata9_117.dta") + + self.dta19_115 = os.path.join(self.dirpath, "stata10_115.dta") + self.dta19_117 = os.path.join(self.dirpath, "stata10_117.dta") + + self.dta20_115 = os.path.join(self.dirpath, "stata11_115.dta") + self.dta20_117 = os.path.join(self.dirpath, "stata11_117.dta") + + self.dta21_117 = os.path.join(self.dirpath, "stata12_117.dta") + + self.dta22_118 = os.path.join(self.dirpath, "stata14_118.dta") + self.dta23 = os.path.join(self.dirpath, "stata15.dta") + + self.dta24_111 = os.path.join(self.dirpath, "stata7_111.dta") + self.dta25_118 = os.path.join(self.dirpath, "stata16_118.dta") + + self.dta26_119 = os.path.join(self.dirpath, "stata1_119.dta.gz") + + self.stata_dates = os.path.join(self.dirpath, "stata13_dates.dta") + + def read_dta(self, file): + # Legacy default reader configuration + return read_stata(file, convert_dates=True) + + def read_csv(self, file): + return read_csv(file, parse_dates=True) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_read_empty_dta(self, version): + empty_ds = DataFrame(columns=["unit"]) + # GH 7369, make sure can read a 0-obs dta file + with tm.ensure_clean() as path: + empty_ds.to_stata(path, write_index=False, version=version) + empty_ds2 = read_stata(path) + tm.assert_frame_equal(empty_ds, empty_ds2) + + @pytest.mark.parametrize("file", ["dta1_114", "dta1_117"]) + def test_read_dta1(self, file): + + file = getattr(self, file) + parsed = self.read_dta(file) + + # Pandas uses np.nan as missing value. + # Thus, all columns will be of type float, regardless of their name. + expected = DataFrame( + [(np.nan, np.nan, np.nan, np.nan, np.nan)], + columns=["float_miss", "double_miss", "byte_miss", "int_miss", "long_miss"], + ) + + # this is an oddity as really the nan should be float64, but + # the casting doesn't fail so need to match stata here + expected["float_miss"] = expected["float_miss"].astype(np.float32) + + tm.assert_frame_equal(parsed, expected) + + def test_read_dta2(self): + + expected = DataFrame.from_records( + [ + ( + datetime(2006, 11, 19, 23, 13, 20), + 1479596223000, + datetime(2010, 1, 20), + datetime(2010, 1, 8), + datetime(2010, 1, 1), + datetime(1974, 7, 1), + datetime(2010, 1, 1), + datetime(2010, 1, 1), + ), + ( + datetime(1959, 12, 31, 20, 3, 20), + -1479590, + datetime(1953, 10, 2), + datetime(1948, 6, 10), + datetime(1955, 1, 1), + datetime(1955, 7, 1), + datetime(1955, 1, 1), + datetime(2, 1, 1), + ), + (pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT), + ], + columns=[ + "datetime_c", + "datetime_big_c", + "date", + "weekly_date", + "monthly_date", + "quarterly_date", + "half_yearly_date", + "yearly_date", + ], + ) + expected["yearly_date"] = expected["yearly_date"].astype("O") + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + parsed_114 = self.read_dta(self.dta2_114) + parsed_115 = self.read_dta(self.dta2_115) + parsed_117 = self.read_dta(self.dta2_117) + # 113 is buggy due to limits of date format support in Stata + # parsed_113 = self.read_dta(self.dta2_113) + + # Remove resource warnings + w = [x for x in w if x.category is UserWarning] + + # should get warning for each call to read_dta + assert len(w) == 3 + + # buggy test because of the NaT comparison on certain platforms + # Format 113 test fails since it does not support tc and tC formats + # tm.assert_frame_equal(parsed_113, expected) + tm.assert_frame_equal(parsed_114, expected, check_datetimelike_compat=True) + tm.assert_frame_equal(parsed_115, expected, check_datetimelike_compat=True) + tm.assert_frame_equal(parsed_117, expected, check_datetimelike_compat=True) + + @pytest.mark.parametrize("file", ["dta3_113", "dta3_114", "dta3_115", "dta3_117"]) + def test_read_dta3(self, file): + + file = getattr(self, file) + parsed = self.read_dta(file) + + # match stata here + expected = self.read_csv(self.csv3) + expected = expected.astype(np.float32) + expected["year"] = expected["year"].astype(np.int16) + expected["quarter"] = expected["quarter"].astype(np.int8) + + tm.assert_frame_equal(parsed, expected) + + @pytest.mark.parametrize("file", ["dta4_113", "dta4_114", "dta4_115", "dta4_117"]) + def test_read_dta4(self, file): + + file = getattr(self, file) + parsed = self.read_dta(file) + + expected = DataFrame.from_records( + [ + ["one", "ten", "one", "one", "one"], + ["two", "nine", "two", "two", "two"], + ["three", "eight", "three", "three", "three"], + ["four", "seven", 4, "four", "four"], + ["five", "six", 5, np.nan, "five"], + ["six", "five", 6, np.nan, "six"], + ["seven", "four", 7, np.nan, "seven"], + ["eight", "three", 8, np.nan, "eight"], + ["nine", "two", 9, np.nan, "nine"], + ["ten", "one", "ten", np.nan, "ten"], + ], + columns=[ + "fully_labeled", + "fully_labeled2", + "incompletely_labeled", + "labeled_with_missings", + "float_labelled", + ], + ) + + # these are all categoricals + for col in expected: + orig = expected[col].copy() + + categories = np.asarray(expected["fully_labeled"][orig.notna()]) + if col == "incompletely_labeled": + categories = orig + + cat = orig.astype("category")._values + cat = cat.set_categories(categories, ordered=True) + cat.categories.rename(None, inplace=True) + + expected[col] = cat + + # stata doesn't save .category metadata + tm.assert_frame_equal(parsed, expected) + + # File containing strls + def test_read_dta12(self): + parsed_117 = self.read_dta(self.dta21_117) + expected = DataFrame.from_records( + [ + [1, "abc", "abcdefghi"], + [3, "cba", "qwertywertyqwerty"], + [93, "", "strl"], + ], + columns=["x", "y", "z"], + ) + + tm.assert_frame_equal(parsed_117, expected, check_dtype=False) + + def test_read_dta18(self): + parsed_118 = self.read_dta(self.dta22_118) + parsed_118["Bytes"] = parsed_118["Bytes"].astype("O") + expected = DataFrame.from_records( + [ + ["Cat", "Bogota", "Bogotá", 1, 1.0, "option b Ünicode", 1.0], + ["Dog", "Boston", "Uzunköprü", np.nan, np.nan, np.nan, np.nan], + ["Plane", "Rome", "Tromsø", 0, 0.0, "option a", 0.0], + ["Potato", "Tokyo", "Elâzığ", -4, 4.0, 4, 4], + ["", "", "", 0, 0.3332999, "option a", 1 / 3.0], + ], + columns=[ + "Things", + "Cities", + "Unicode_Cities_Strl", + "Ints", + "Floats", + "Bytes", + "Longs", + ], + ) + expected["Floats"] = expected["Floats"].astype(np.float32) + for col in parsed_118.columns: + tm.assert_almost_equal(parsed_118[col], expected[col]) + + with StataReader(self.dta22_118) as rdr: + vl = rdr.variable_labels() + vl_expected = { + "Unicode_Cities_Strl": "Here are some strls with Ünicode chars", + "Longs": "long data", + "Things": "Here are some things", + "Bytes": "byte data", + "Ints": "int data", + "Cities": "Here are some cities", + "Floats": "float data", + } + tm.assert_dict_equal(vl, vl_expected) + + assert rdr.data_label == "This is a Ünicode data label" + + def test_read_write_dta5(self): + original = DataFrame( + [(np.nan, np.nan, np.nan, np.nan, np.nan)], + columns=["float_miss", "double_miss", "byte_miss", "int_miss", "long_miss"], + ) + original.index.name = "index" + + with tm.ensure_clean() as path: + original.to_stata(path, None) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), original) + + def test_write_dta6(self): + original = self.read_csv(self.csv3) + original.index.name = "index" + original.index = original.index.astype(np.int32) + original["year"] = original["year"].astype(np.int32) + original["quarter"] = original["quarter"].astype(np.int32) + + with tm.ensure_clean() as path: + original.to_stata(path, None) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal( + written_and_read_again.set_index("index"), + original, + check_index_type=False, + ) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_read_write_dta10(self, version): + original = DataFrame( + data=[["string", "object", 1, 1.1, np.datetime64("2003-12-25")]], + columns=["string", "object", "integer", "floating", "datetime"], + ) + original["object"] = Series(original["object"], dtype=object) + original.index.name = "index" + original.index = original.index.astype(np.int32) + original["integer"] = original["integer"].astype(np.int32) + + with tm.ensure_clean() as path: + original.to_stata(path, {"datetime": "tc"}, version=version) + written_and_read_again = self.read_dta(path) + # original.index is np.int32, read index is np.int64 + tm.assert_frame_equal( + written_and_read_again.set_index("index"), + original, + check_index_type=False, + ) + + def test_stata_doc_examples(self): + with tm.ensure_clean() as path: + df = DataFrame(np.random.randn(10, 2), columns=list("AB")) + df.to_stata(path) + + def test_write_preserves_original(self): + # 9795 + np.random.seed(423) + df = DataFrame(np.random.randn(5, 4), columns=list("abcd")) + df.loc[2, "a":"c"] = np.nan + df_copy = df.copy() + with tm.ensure_clean() as path: + df.to_stata(path, write_index=False) + tm.assert_frame_equal(df, df_copy) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_encoding(self, version): + + # GH 4626, proper encoding handling + raw = read_stata(self.dta_encoding) + encoded = read_stata(self.dta_encoding) + result = encoded.kreis1849[0] + + expected = raw.kreis1849[0] + assert result == expected + assert isinstance(result, str) + + with tm.ensure_clean() as path: + encoded.to_stata(path, write_index=False, version=version) + reread_encoded = read_stata(path) + tm.assert_frame_equal(encoded, reread_encoded) + + def test_read_write_dta11(self): + original = DataFrame( + [(1, 2, 3, 4)], + columns=[ + "good", + "b\u00E4d", + "8number", + "astringwithmorethan32characters______", + ], + ) + formatted = DataFrame( + [(1, 2, 3, 4)], + columns=["good", "b_d", "_8number", "astringwithmorethan32characters_"], + ) + formatted.index.name = "index" + formatted = formatted.astype(np.int32) + + with tm.ensure_clean() as path: + with tm.assert_produces_warning(InvalidColumnName): + original.to_stata(path, None) + + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_read_write_dta12(self, version): + original = DataFrame( + [(1, 2, 3, 4, 5, 6)], + columns=[ + "astringwithmorethan32characters_1", + "astringwithmorethan32characters_2", + "+", + "-", + "short", + "delete", + ], + ) + formatted = DataFrame( + [(1, 2, 3, 4, 5, 6)], + columns=[ + "astringwithmorethan32characters_", + "_0astringwithmorethan32character", + "_", + "_1_", + "_short", + "_delete", + ], + ) + formatted.index.name = "index" + formatted = formatted.astype(np.int32) + + with tm.ensure_clean() as path: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always", InvalidColumnName) + original.to_stata(path, None, version=version) + # should get a warning for that format. + assert len(w) == 1 + + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted) + + def test_read_write_dta13(self): + s1 = Series(2 ** 9, dtype=np.int16) + s2 = Series(2 ** 17, dtype=np.int32) + s3 = Series(2 ** 33, dtype=np.int64) + original = DataFrame({"int16": s1, "int32": s2, "int64": s3}) + original.index.name = "index" + + formatted = original + formatted["int64"] = formatted["int64"].astype(np.float64) + + with tm.ensure_clean() as path: + original.to_stata(path) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + @pytest.mark.parametrize( + "file", ["dta14_113", "dta14_114", "dta14_115", "dta14_117"] + ) + def test_read_write_reread_dta14(self, file, parsed_114, version): + file = getattr(self, file) + parsed = self.read_dta(file) + parsed.index.name = "index" + + expected = self.read_csv(self.csv14) + cols = ["byte_", "int_", "long_", "float_", "double_"] + for col in cols: + expected[col] = expected[col]._convert(datetime=True, numeric=True) + expected["float_"] = expected["float_"].astype(np.float32) + expected["date_td"] = pd.to_datetime(expected["date_td"], errors="coerce") + + tm.assert_frame_equal(parsed_114, parsed) + + with tm.ensure_clean() as path: + parsed_114.to_stata(path, {"date_td": "td"}, version=version) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), parsed_114) + + @pytest.mark.parametrize( + "file", ["dta15_113", "dta15_114", "dta15_115", "dta15_117"] + ) + def test_read_write_reread_dta15(self, file): + + expected = self.read_csv(self.csv15) + expected["byte_"] = expected["byte_"].astype(np.int8) + expected["int_"] = expected["int_"].astype(np.int16) + expected["long_"] = expected["long_"].astype(np.int32) + expected["float_"] = expected["float_"].astype(np.float32) + expected["double_"] = expected["double_"].astype(np.float64) + expected["date_td"] = expected["date_td"].apply( + datetime.strptime, args=("%Y-%m-%d",) + ) + + file = getattr(self, file) + parsed = self.read_dta(file) + + tm.assert_frame_equal(expected, parsed) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_timestamp_and_label(self, version): + original = DataFrame([(1,)], columns=["variable"]) + time_stamp = datetime(2000, 2, 29, 14, 21) + data_label = "This is a data file." + with tm.ensure_clean() as path: + original.to_stata( + path, time_stamp=time_stamp, data_label=data_label, version=version + ) + + with StataReader(path) as reader: + assert reader.time_stamp == "29 Feb 2000 14:21" + assert reader.data_label == data_label + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_invalid_timestamp(self, version): + original = DataFrame([(1,)], columns=["variable"]) + time_stamp = "01 Jan 2000, 00:00:00" + with tm.ensure_clean() as path: + msg = "time_stamp should be datetime type" + with pytest.raises(ValueError, match=msg): + original.to_stata(path, time_stamp=time_stamp, version=version) + assert not os.path.isfile(path) + + def test_numeric_column_names(self): + original = DataFrame(np.reshape(np.arange(25.0), (5, 5))) + original.index.name = "index" + with tm.ensure_clean() as path: + # should get a warning for that format. + with tm.assert_produces_warning(InvalidColumnName): + original.to_stata(path) + + written_and_read_again = self.read_dta(path) + written_and_read_again = written_and_read_again.set_index("index") + columns = list(written_and_read_again.columns) + convert_col_name = lambda x: int(x[1]) + written_and_read_again.columns = map(convert_col_name, columns) + tm.assert_frame_equal(original, written_and_read_again) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_nan_to_missing_value(self, version): + s1 = Series(np.arange(4.0), dtype=np.float32) + s2 = Series(np.arange(4.0), dtype=np.float64) + s1[::2] = np.nan + s2[1::2] = np.nan + original = DataFrame({"s1": s1, "s2": s2}) + original.index.name = "index" + with tm.ensure_clean() as path: + original.to_stata(path, version=version) + written_and_read_again = self.read_dta(path) + written_and_read_again = written_and_read_again.set_index("index") + tm.assert_frame_equal(written_and_read_again, original) + + def test_no_index(self): + columns = ["x", "y"] + original = DataFrame(np.reshape(np.arange(10.0), (5, 2)), columns=columns) + original.index.name = "index_not_written" + with tm.ensure_clean() as path: + original.to_stata(path, write_index=False) + written_and_read_again = self.read_dta(path) + with pytest.raises(KeyError, match=original.index.name): + written_and_read_again["index_not_written"] + + def test_string_no_dates(self): + s1 = Series(["a", "A longer string"]) + s2 = Series([1.0, 2.0], dtype=np.float64) + original = DataFrame({"s1": s1, "s2": s2}) + original.index.name = "index" + with tm.ensure_clean() as path: + original.to_stata(path) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), original) + + def test_large_value_conversion(self): + s0 = Series([1, 99], dtype=np.int8) + s1 = Series([1, 127], dtype=np.int8) + s2 = Series([1, 2 ** 15 - 1], dtype=np.int16) + s3 = Series([1, 2 ** 63 - 1], dtype=np.int64) + original = DataFrame({"s0": s0, "s1": s1, "s2": s2, "s3": s3}) + original.index.name = "index" + with tm.ensure_clean() as path: + with tm.assert_produces_warning(PossiblePrecisionLoss): + original.to_stata(path) + + written_and_read_again = self.read_dta(path) + modified = original.copy() + modified["s1"] = Series(modified["s1"], dtype=np.int16) + modified["s2"] = Series(modified["s2"], dtype=np.int32) + modified["s3"] = Series(modified["s3"], dtype=np.float64) + tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) + + def test_dates_invalid_column(self): + original = DataFrame([datetime(2006, 11, 19, 23, 13, 20)]) + original.index.name = "index" + with tm.ensure_clean() as path: + with tm.assert_produces_warning(InvalidColumnName): + original.to_stata(path, {0: "tc"}) + + written_and_read_again = self.read_dta(path) + modified = original.copy() + modified.columns = ["_0"] + tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) + + def test_105(self): + # Data obtained from: + # http://go.worldbank.org/ZXY29PVJ21 + dpath = os.path.join(self.dirpath, "S4_EDUC1.dta") + df = read_stata(dpath) + df0 = [[1, 1, 3, -2], [2, 1, 2, -2], [4, 1, 1, -2]] + df0 = DataFrame(df0) + df0.columns = ["clustnum", "pri_schl", "psch_num", "psch_dis"] + df0["clustnum"] = df0["clustnum"].astype(np.int16) + df0["pri_schl"] = df0["pri_schl"].astype(np.int8) + df0["psch_num"] = df0["psch_num"].astype(np.int8) + df0["psch_dis"] = df0["psch_dis"].astype(np.float32) + tm.assert_frame_equal(df.head(3), df0) + + def test_value_labels_old_format(self): + # GH 19417 + # + # Test that value_labels() returns an empty dict if the file format + # predates supporting value labels. + dpath = os.path.join(self.dirpath, "S4_EDUC1.dta") + reader = StataReader(dpath) + assert reader.value_labels() == {} + reader.close() + + def test_date_export_formats(self): + columns = ["tc", "td", "tw", "tm", "tq", "th", "ty"] + conversions = {c: c for c in columns} + data = [datetime(2006, 11, 20, 23, 13, 20)] * len(columns) + original = DataFrame([data], columns=columns) + original.index.name = "index" + expected_values = [ + datetime(2006, 11, 20, 23, 13, 20), # Time + datetime(2006, 11, 20), # Day + datetime(2006, 11, 19), # Week + datetime(2006, 11, 1), # Month + datetime(2006, 10, 1), # Quarter year + datetime(2006, 7, 1), # Half year + datetime(2006, 1, 1), + ] # Year + + expected = DataFrame([expected_values], columns=columns) + expected.index.name = "index" + with tm.ensure_clean() as path: + original.to_stata(path, conversions) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) + + def test_write_missing_strings(self): + original = DataFrame([["1"], [None]], columns=["foo"]) + expected = DataFrame([["1"], [""]], columns=["foo"]) + expected.index.name = "index" + with tm.ensure_clean() as path: + original.to_stata(path) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + @pytest.mark.parametrize("byteorder", [">", "<"]) + def test_bool_uint(self, byteorder, version): + s0 = Series([0, 1, True], dtype=np.bool_) + s1 = Series([0, 1, 100], dtype=np.uint8) + s2 = Series([0, 1, 255], dtype=np.uint8) + s3 = Series([0, 1, 2 ** 15 - 100], dtype=np.uint16) + s4 = Series([0, 1, 2 ** 16 - 1], dtype=np.uint16) + s5 = Series([0, 1, 2 ** 31 - 100], dtype=np.uint32) + s6 = Series([0, 1, 2 ** 32 - 1], dtype=np.uint32) + + original = DataFrame( + {"s0": s0, "s1": s1, "s2": s2, "s3": s3, "s4": s4, "s5": s5, "s6": s6} + ) + original.index.name = "index" + expected = original.copy() + expected_types = ( + np.int8, + np.int8, + np.int16, + np.int16, + np.int32, + np.int32, + np.float64, + ) + for c, t in zip(expected.columns, expected_types): + expected[c] = expected[c].astype(t) + + with tm.ensure_clean() as path: + original.to_stata(path, byteorder=byteorder, version=version) + written_and_read_again = self.read_dta(path) + written_and_read_again = written_and_read_again.set_index("index") + tm.assert_frame_equal(written_and_read_again, expected) + + def test_variable_labels(self): + with StataReader(self.dta16_115) as rdr: + sr_115 = rdr.variable_labels() + with StataReader(self.dta16_117) as rdr: + sr_117 = rdr.variable_labels() + keys = ("var1", "var2", "var3") + labels = ("label1", "label2", "label3") + for k, v in sr_115.items(): + assert k in sr_117 + assert v == sr_117[k] + assert k in keys + assert v in labels + + def test_minimal_size_col(self): + str_lens = (1, 100, 244) + s = {} + for str_len in str_lens: + s["s" + str(str_len)] = Series( + ["a" * str_len, "b" * str_len, "c" * str_len] + ) + original = DataFrame(s) + with tm.ensure_clean() as path: + original.to_stata(path, write_index=False) + + with StataReader(path) as sr: + typlist = sr.typlist + variables = sr.varlist + formats = sr.fmtlist + for variable, fmt, typ in zip(variables, formats, typlist): + assert int(variable[1:]) == int(fmt[1:-1]) + assert int(variable[1:]) == typ + + def test_excessively_long_string(self): + str_lens = (1, 244, 500) + s = {} + for str_len in str_lens: + s["s" + str(str_len)] = Series( + ["a" * str_len, "b" * str_len, "c" * str_len] + ) + original = DataFrame(s) + msg = ( + r"Fixed width strings in Stata \.dta files are limited to 244 " + r"\(or fewer\)\ncharacters\. Column 's500' does not satisfy " + r"this restriction\. Use the\n'version=117' parameter to write " + r"the newer \(Stata 13 and later\) format\." + ) + with pytest.raises(ValueError, match=msg): + with tm.ensure_clean() as path: + original.to_stata(path) + + def test_missing_value_generator(self): + types = ("b", "h", "l") + df = DataFrame([[0.0]], columns=["float_"]) + with tm.ensure_clean() as path: + df.to_stata(path) + with StataReader(path) as rdr: + valid_range = rdr.VALID_RANGE + expected_values = ["." + chr(97 + i) for i in range(26)] + expected_values.insert(0, ".") + for t in types: + offset = valid_range[t][1] + for i in range(0, 27): + val = StataMissingValue(offset + 1 + i) + assert val.string == expected_values[i] + + # Test extremes for floats + val = StataMissingValue(struct.unpack(" DataFrame: + """ + Emulate the categorical casting behavior we expect from roundtripping. + """ + for col in from_frame: + ser = from_frame[col] + if is_categorical_dtype(ser.dtype): + cat = ser._values.remove_unused_categories() + if cat.categories.dtype == object: + categories = ensure_index(cat.categories._values) + cat = cat.set_categories(categories) + from_frame[col] = cat + return from_frame + + def test_iterator(self): + + fname = self.dta3_117 + + parsed = read_stata(fname) + + with read_stata(fname, iterator=True) as itr: + chunk = itr.read(5) + tm.assert_frame_equal(parsed.iloc[0:5, :], chunk) + + with read_stata(fname, chunksize=5) as itr: + chunk = list(itr) + tm.assert_frame_equal(parsed.iloc[0:5, :], chunk[0]) + + with read_stata(fname, iterator=True) as itr: + chunk = itr.get_chunk(5) + tm.assert_frame_equal(parsed.iloc[0:5, :], chunk) + + with read_stata(fname, chunksize=5) as itr: + chunk = itr.get_chunk() + tm.assert_frame_equal(parsed.iloc[0:5, :], chunk) + + # GH12153 + with read_stata(fname, chunksize=4) as itr: + from_chunks = pd.concat(itr) + tm.assert_frame_equal(parsed, from_chunks) + + @pytest.mark.parametrize( + "file", + [ + "dta2_115", + "dta3_115", + "dta4_115", + "dta14_115", + "dta15_115", + "dta16_115", + "dta17_115", + "dta18_115", + "dta19_115", + "dta20_115", + ], + ) + @pytest.mark.parametrize("chunksize", [1, 2]) + @pytest.mark.parametrize("convert_categoricals", [False, True]) + @pytest.mark.parametrize("convert_dates", [False, True]) + def test_read_chunks_115( + self, file, chunksize, convert_categoricals, convert_dates + ): + fname = getattr(self, file) + + # Read the whole file + with warnings.catch_warnings(record=True): + warnings.simplefilter("always") + parsed = read_stata( + fname, + convert_categoricals=convert_categoricals, + convert_dates=convert_dates, + ) + + # Compare to what we get when reading by chunk + itr = read_stata( + fname, + iterator=True, + convert_dates=convert_dates, + convert_categoricals=convert_categoricals, + ) + pos = 0 + for j in range(5): + with warnings.catch_warnings(record=True): + warnings.simplefilter("always") + try: + chunk = itr.read(chunksize) + except StopIteration: + break + from_frame = parsed.iloc[pos : pos + chunksize, :].copy() + from_frame = self._convert_categorical(from_frame) + tm.assert_frame_equal( + from_frame, chunk, check_dtype=False, check_datetimelike_compat=True + ) + + pos += chunksize + itr.close() + + def test_read_chunks_columns(self): + fname = self.dta3_117 + columns = ["quarter", "cpi", "m1"] + chunksize = 2 + + parsed = read_stata(fname, columns=columns) + with read_stata(fname, iterator=True) as itr: + pos = 0 + for j in range(5): + chunk = itr.read(chunksize, columns=columns) + if chunk is None: + break + from_frame = parsed.iloc[pos : pos + chunksize, :] + tm.assert_frame_equal(from_frame, chunk, check_dtype=False) + pos += chunksize + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_write_variable_labels(self, version, mixed_frame): + # GH 13631, add support for writing variable labels + mixed_frame.index.name = "index" + variable_labels = {"a": "City Rank", "b": "City Exponent", "c": "City"} + with tm.ensure_clean() as path: + mixed_frame.to_stata(path, variable_labels=variable_labels, version=version) + with StataReader(path) as sr: + read_labels = sr.variable_labels() + expected_labels = { + "index": "", + "a": "City Rank", + "b": "City Exponent", + "c": "City", + } + assert read_labels == expected_labels + + variable_labels["index"] = "The Index" + with tm.ensure_clean() as path: + mixed_frame.to_stata(path, variable_labels=variable_labels, version=version) + with StataReader(path) as sr: + read_labels = sr.variable_labels() + assert read_labels == variable_labels + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_invalid_variable_labels(self, version, mixed_frame): + mixed_frame.index.name = "index" + variable_labels = {"a": "very long" * 10, "b": "City Exponent", "c": "City"} + with tm.ensure_clean() as path: + msg = "Variable labels must be 80 characters or fewer" + with pytest.raises(ValueError, match=msg): + mixed_frame.to_stata( + path, variable_labels=variable_labels, version=version + ) + + @pytest.mark.parametrize("version", [114, 117]) + def test_invalid_variable_label_encoding(self, version, mixed_frame): + mixed_frame.index.name = "index" + variable_labels = {"a": "very long" * 10, "b": "City Exponent", "c": "City"} + variable_labels["a"] = "invalid character Œ" + with tm.ensure_clean() as path: + with pytest.raises( + ValueError, match="Variable labels must contain only characters" + ): + mixed_frame.to_stata( + path, variable_labels=variable_labels, version=version + ) + + def test_write_variable_label_errors(self, mixed_frame): + values = ["\u03A1", "\u0391", "\u039D", "\u0394", "\u0391", "\u03A3"] + + variable_labels_utf8 = { + "a": "City Rank", + "b": "City Exponent", + "c": "".join(values), + } + + msg = ( + "Variable labels must contain only characters that can be " + "encoded in Latin-1" + ) + with pytest.raises(ValueError, match=msg): + with tm.ensure_clean() as path: + mixed_frame.to_stata(path, variable_labels=variable_labels_utf8) + + variable_labels_long = { + "a": "City Rank", + "b": "City Exponent", + "c": "A very, very, very long variable label " + "that is too long for Stata which means " + "that it has more than 80 characters", + } + + msg = "Variable labels must be 80 characters or fewer" + with pytest.raises(ValueError, match=msg): + with tm.ensure_clean() as path: + mixed_frame.to_stata(path, variable_labels=variable_labels_long) + + def test_default_date_conversion(self): + # GH 12259 + dates = [ + dt.datetime(1999, 12, 31, 12, 12, 12, 12000), + dt.datetime(2012, 12, 21, 12, 21, 12, 21000), + dt.datetime(1776, 7, 4, 7, 4, 7, 4000), + ] + original = DataFrame( + { + "nums": [1.0, 2.0, 3.0], + "strs": ["apple", "banana", "cherry"], + "dates": dates, + } + ) + + with tm.ensure_clean() as path: + original.to_stata(path, write_index=False) + reread = read_stata(path, convert_dates=True) + tm.assert_frame_equal(original, reread) + + original.to_stata(path, write_index=False, convert_dates={"dates": "tc"}) + direct = read_stata(path, convert_dates=True) + tm.assert_frame_equal(reread, direct) + + dates_idx = original.columns.tolist().index("dates") + original.to_stata(path, write_index=False, convert_dates={dates_idx: "tc"}) + direct = read_stata(path, convert_dates=True) + tm.assert_frame_equal(reread, direct) + + def test_unsupported_type(self): + original = DataFrame({"a": [1 + 2j, 2 + 4j]}) + + msg = "Data type complex128 not supported" + with pytest.raises(NotImplementedError, match=msg): + with tm.ensure_clean() as path: + original.to_stata(path) + + def test_unsupported_datetype(self): + dates = [ + dt.datetime(1999, 12, 31, 12, 12, 12, 12000), + dt.datetime(2012, 12, 21, 12, 21, 12, 21000), + dt.datetime(1776, 7, 4, 7, 4, 7, 4000), + ] + original = DataFrame( + { + "nums": [1.0, 2.0, 3.0], + "strs": ["apple", "banana", "cherry"], + "dates": dates, + } + ) + + msg = "Format %tC not implemented" + with pytest.raises(NotImplementedError, match=msg): + with tm.ensure_clean() as path: + original.to_stata(path, convert_dates={"dates": "tC"}) + + dates = pd.date_range("1-1-1990", periods=3, tz="Asia/Hong_Kong") + original = DataFrame( + { + "nums": [1.0, 2.0, 3.0], + "strs": ["apple", "banana", "cherry"], + "dates": dates, + } + ) + with pytest.raises(NotImplementedError, match="Data type datetime64"): + with tm.ensure_clean() as path: + original.to_stata(path) + + def test_repeated_column_labels(self): + # GH 13923, 25772 + msg = """ +Value labels for column ethnicsn are not unique. These cannot be converted to +pandas categoricals. + +Either read the file with `convert_categoricals` set to False or use the +low level interface in `StataReader` to separately read the values and the +value_labels. + +The repeated labels are:\n-+\nwolof +""" + with pytest.raises(ValueError, match=msg): + read_stata(self.dta23, convert_categoricals=True) + + def test_stata_111(self): + # 111 is an old version but still used by current versions of + # SAS when exporting to Stata format. We do not know of any + # on-line documentation for this version. + df = read_stata(self.dta24_111) + original = DataFrame( + { + "y": [1, 1, 1, 1, 1, 0, 0, np.NaN, 0, 0], + "x": [1, 2, 1, 3, np.NaN, 4, 3, 5, 1, 6], + "w": [2, np.NaN, 5, 2, 4, 4, 3, 1, 2, 3], + "z": ["a", "b", "c", "d", "e", "", "g", "h", "i", "j"], + } + ) + original = original[["y", "x", "w", "z"]] + tm.assert_frame_equal(original, df) + + def test_out_of_range_double(self): + # GH 14618 + df = DataFrame( + { + "ColumnOk": [0.0, np.finfo(np.double).eps, 4.49423283715579e307], + "ColumnTooBig": [0.0, np.finfo(np.double).eps, np.finfo(np.double).max], + } + ) + msg = ( + r"Column ColumnTooBig has a maximum value \(.+\) outside the range " + r"supported by Stata \(.+\)" + ) + with pytest.raises(ValueError, match=msg): + with tm.ensure_clean() as path: + df.to_stata(path) + + df.loc[2, "ColumnTooBig"] = np.inf + msg = ( + "Column ColumnTooBig has a maximum value of infinity which is outside " + "the range supported by Stata" + ) + with pytest.raises(ValueError, match=msg): + with tm.ensure_clean() as path: + df.to_stata(path) + + def test_out_of_range_float(self): + original = DataFrame( + { + "ColumnOk": [ + 0.0, + np.finfo(np.float32).eps, + np.finfo(np.float32).max / 10.0, + ], + "ColumnTooBig": [ + 0.0, + np.finfo(np.float32).eps, + np.finfo(np.float32).max, + ], + } + ) + original.index.name = "index" + for col in original: + original[col] = original[col].astype(np.float32) + + with tm.ensure_clean() as path: + original.to_stata(path) + reread = read_stata(path) + original["ColumnTooBig"] = original["ColumnTooBig"].astype(np.float64) + tm.assert_frame_equal(original, reread.set_index("index")) + + original.loc[2, "ColumnTooBig"] = np.inf + msg = ( + "Column ColumnTooBig has a maximum value of infinity which " + "is outside the range supported by Stata" + ) + with pytest.raises(ValueError, match=msg): + with tm.ensure_clean() as path: + original.to_stata(path) + + def test_path_pathlib(self): + df = tm.makeDataFrame() + df.index.name = "index" + reader = lambda x: read_stata(x).set_index("index") + result = tm.round_trip_pathlib(df.to_stata, reader) + tm.assert_frame_equal(df, result) + + def test_pickle_path_localpath(self): + df = tm.makeDataFrame() + df.index.name = "index" + reader = lambda x: read_stata(x).set_index("index") + result = tm.round_trip_localpath(df.to_stata, reader) + tm.assert_frame_equal(df, result) + + @pytest.mark.parametrize("write_index", [True, False]) + def test_value_labels_iterator(self, write_index): + # GH 16923 + d = {"A": ["B", "E", "C", "A", "E"]} + df = DataFrame(data=d) + df["A"] = df["A"].astype("category") + with tm.ensure_clean() as path: + df.to_stata(path, write_index=write_index) + + with read_stata(path, iterator=True) as dta_iter: + value_labels = dta_iter.value_labels() + assert value_labels == {"A": {0: "A", 1: "B", 2: "C", 3: "E"}} + + def test_set_index(self): + # GH 17328 + df = tm.makeDataFrame() + df.index.name = "index" + with tm.ensure_clean() as path: + df.to_stata(path) + reread = read_stata(path, index_col="index") + tm.assert_frame_equal(df, reread) + + @pytest.mark.parametrize( + "column", ["ms", "day", "week", "month", "qtr", "half", "yr"] + ) + def test_date_parsing_ignores_format_details(self, column): + # GH 17797 + # + # Test that display formats are ignored when determining if a numeric + # column is a date value. + # + # All date types are stored as numbers and format associated with the + # column denotes both the type of the date and the display format. + # + # STATA supports 9 date types which each have distinct units. We test 7 + # of the 9 types, ignoring %tC and %tb. %tC is a variant of %tc that + # accounts for leap seconds and %tb relies on STATAs business calendar. + df = read_stata(self.stata_dates) + unformatted = df.loc[0, column] + formatted = df.loc[0, column + "_fmt"] + assert unformatted == formatted + + def test_writer_117(self): + original = DataFrame( + data=[ + [ + "string", + "object", + 1, + 1, + 1, + 1.1, + 1.1, + np.datetime64("2003-12-25"), + "a", + "a" * 2045, + "a" * 5000, + "a", + ], + [ + "string-1", + "object-1", + 1, + 1, + 1, + 1.1, + 1.1, + np.datetime64("2003-12-26"), + "b", + "b" * 2045, + "", + "", + ], + ], + columns=[ + "string", + "object", + "int8", + "int16", + "int32", + "float32", + "float64", + "datetime", + "s1", + "s2045", + "srtl", + "forced_strl", + ], + ) + original["object"] = Series(original["object"], dtype=object) + original["int8"] = Series(original["int8"], dtype=np.int8) + original["int16"] = Series(original["int16"], dtype=np.int16) + original["int32"] = original["int32"].astype(np.int32) + original["float32"] = Series(original["float32"], dtype=np.float32) + original.index.name = "index" + original.index = original.index.astype(np.int32) + copy = original.copy() + with tm.ensure_clean() as path: + original.to_stata( + path, + convert_dates={"datetime": "tc"}, + convert_strl=["forced_strl"], + version=117, + ) + written_and_read_again = self.read_dta(path) + # original.index is np.int32, read index is np.int64 + tm.assert_frame_equal( + written_and_read_again.set_index("index"), + original, + check_index_type=False, + ) + tm.assert_frame_equal(original, copy) + + def test_convert_strl_name_swap(self): + original = DataFrame( + [["a" * 3000, "A", "apple"], ["b" * 1000, "B", "banana"]], + columns=["long1" * 10, "long", 1], + ) + original.index.name = "index" + + with tm.assert_produces_warning(InvalidColumnName): + with tm.ensure_clean() as path: + original.to_stata(path, convert_strl=["long", 1], version=117) + reread = self.read_dta(path) + reread = reread.set_index("index") + reread.columns = original.columns + tm.assert_frame_equal(reread, original, check_index_type=False) + + def test_invalid_date_conversion(self): + # GH 12259 + dates = [ + dt.datetime(1999, 12, 31, 12, 12, 12, 12000), + dt.datetime(2012, 12, 21, 12, 21, 12, 21000), + dt.datetime(1776, 7, 4, 7, 4, 7, 4000), + ] + original = DataFrame( + { + "nums": [1.0, 2.0, 3.0], + "strs": ["apple", "banana", "cherry"], + "dates": dates, + } + ) + + with tm.ensure_clean() as path: + msg = "convert_dates key must be a column or an integer" + with pytest.raises(ValueError, match=msg): + original.to_stata(path, convert_dates={"wrong_name": "tc"}) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_nonfile_writing(self, version): + # GH 21041 + bio = io.BytesIO() + df = tm.makeDataFrame() + df.index.name = "index" + with tm.ensure_clean() as path: + df.to_stata(bio, version=version) + bio.seek(0) + with open(path, "wb") as dta: + dta.write(bio.read()) + reread = read_stata(path, index_col="index") + tm.assert_frame_equal(df, reread) + + def test_gzip_writing(self): + # writing version 117 requires seek and cannot be used with gzip + df = tm.makeDataFrame() + df.index.name = "index" + with tm.ensure_clean() as path: + with gzip.GzipFile(path, "wb") as gz: + df.to_stata(gz, version=114) + with gzip.GzipFile(path, "rb") as gz: + reread = read_stata(gz, index_col="index") + tm.assert_frame_equal(df, reread) + + def test_unicode_dta_118(self): + unicode_df = self.read_dta(self.dta25_118) + + columns = ["utf8", "latin1", "ascii", "utf8_strl", "ascii_strl"] + values = [ + ["ραηδας", "PÄNDÄS", "p", "ραηδας", "p"], + ["ƤĀńĐąŜ", "Ö", "a", "ƤĀńĐąŜ", "a"], + ["ᴘᴀᴎᴅᴀS", "Ü", "n", "ᴘᴀᴎᴅᴀS", "n"], + [" ", " ", "d", " ", "d"], + [" ", "", "a", " ", "a"], + ["", "", "s", "", "s"], + ["", "", " ", "", " "], + ] + expected = DataFrame(values, columns=columns) + + tm.assert_frame_equal(unicode_df, expected) + + def test_mixed_string_strl(self): + # GH 23633 + output = [{"mixed": "string" * 500, "number": 0}, {"mixed": None, "number": 1}] + output = DataFrame(output) + output.number = output.number.astype("int32") + + with tm.ensure_clean() as path: + output.to_stata(path, write_index=False, version=117) + reread = read_stata(path) + expected = output.fillna("") + tm.assert_frame_equal(reread, expected) + + # Check strl supports all None (null) + output.loc[:, "mixed"] = None + output.to_stata( + path, write_index=False, convert_strl=["mixed"], version=117 + ) + reread = read_stata(path) + expected = output.fillna("") + tm.assert_frame_equal(reread, expected) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_all_none_exception(self, version): + output = [{"none": "none", "number": 0}, {"none": None, "number": 1}] + output = DataFrame(output) + output.loc[:, "none"] = None + with tm.ensure_clean() as path: + with pytest.raises(ValueError, match="Column `none` cannot be exported"): + output.to_stata(path, version=version) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_invalid_file_not_written(self, version): + content = "Here is one __�__ Another one __·__ Another one __½__" + df = DataFrame([content], columns=["invalid"]) + with tm.ensure_clean() as path: + msg1 = ( + r"'latin-1' codec can't encode character '\\ufffd' " + r"in position 14: ordinal not in range\(256\)" + ) + msg2 = ( + "'ascii' codec can't decode byte 0xef in position 14: " + r"ordinal not in range\(128\)" + ) + with pytest.raises(UnicodeEncodeError, match=f"{msg1}|{msg2}"): + with tm.assert_produces_warning(ResourceWarning): + df.to_stata(path) + + def test_strl_latin1(self): + # GH 23573, correct GSO data to reflect correct size + output = DataFrame( + [["pandas"] * 2, ["þâÑÐŧ"] * 2], columns=["var_str", "var_strl"] + ) + + with tm.ensure_clean() as path: + output.to_stata(path, version=117, convert_strl=["var_strl"]) + with open(path, "rb") as reread: + content = reread.read() + expected = "þâÑÐŧ" + assert expected.encode("latin-1") in content + assert expected.encode("utf-8") in content + gsos = content.split(b"strls")[1][1:-2] + for gso in gsos.split(b"GSO")[1:]: + val = gso.split(b"\x00")[-2] + size = gso[gso.find(b"\x82") + 1] + assert len(val) == size - 1 + + def test_encoding_latin1_118(self): + # GH 25960 + msg = """ +One or more strings in the dta file could not be decoded using utf-8, and +so the fallback encoding of latin-1 is being used. This can happen when a file +has been incorrectly encoded by Stata or some other software. You should verify +the string values returned are correct.""" + with tm.assert_produces_warning(UnicodeWarning) as w: + encoded = read_stata(self.dta_encoding_118) + assert len(w) == 151 + assert w[0].message.args[0] == msg + + expected = DataFrame([["Düsseldorf"]] * 151, columns=["kreis1849"]) + tm.assert_frame_equal(encoded, expected) + + @pytest.mark.slow + def test_stata_119(self): + # Gzipped since contains 32,999 variables and uncompressed is 20MiB + with gzip.open(self.dta26_119, "rb") as gz: + df = read_stata(gz) + assert df.shape == (1, 32999) + assert df.iloc[0, 6] == "A" * 3000 + assert df.iloc[0, 7] == 3.14 + assert df.iloc[0, -1] == 1 + assert df.iloc[0, 0] == pd.Timestamp(datetime(2012, 12, 21, 21, 12, 21)) + + @pytest.mark.parametrize("version", [118, 119, None]) + def test_utf8_writer(self, version): + cat = pd.Categorical(["a", "β", "ĉ"], ordered=True) + data = DataFrame( + [ + [1.0, 1, "ᴬ", "ᴀ relatively long ŝtring"], + [2.0, 2, "ᴮ", ""], + [3.0, 3, "ᴰ", None], + ], + columns=["a", "β", "ĉ", "strls"], + ) + data["ᴐᴬᵀ"] = cat + variable_labels = { + "a": "apple", + "β": "ᵈᵉᵊ", + "ĉ": "ᴎტჄႲႳႴႶႺ", + "strls": "Long Strings", + "ᴐᴬᵀ": "", + } + data_label = "ᴅaᵀa-label" + data["β"] = data["β"].astype(np.int32) + with tm.ensure_clean() as path: + writer = StataWriterUTF8( + path, + data, + data_label=data_label, + convert_strl=["strls"], + variable_labels=variable_labels, + write_index=False, + version=version, + ) + writer.write_file() + reread_encoded = read_stata(path) + # Missing is intentionally converted to empty strl + data["strls"] = data["strls"].fillna("") + tm.assert_frame_equal(data, reread_encoded) + reader = StataReader(path) + assert reader.data_label == data_label + assert reader.variable_labels() == variable_labels + + data.to_stata(path, version=version, write_index=False) + reread_to_stata = read_stata(path) + tm.assert_frame_equal(data, reread_to_stata) + + def test_writer_118_exceptions(self): + df = DataFrame(np.zeros((1, 33000), dtype=np.int8)) + with tm.ensure_clean() as path: + with pytest.raises(ValueError, match="version must be either 118 or 119."): + StataWriterUTF8(path, df, version=117) + with tm.ensure_clean() as path: + with pytest.raises(ValueError, match="You must use version 119"): + StataWriterUTF8(path, df, version=118) + + +@pytest.mark.parametrize("version", [105, 108, 111, 113, 114]) +def test_backward_compat(version, datapath): + data_base = datapath("io", "data", "stata") + ref = os.path.join(data_base, "stata-compat-118.dta") + old = os.path.join(data_base, f"stata-compat-{version}.dta") + expected = read_stata(ref) + old_dta = read_stata(old) + tm.assert_frame_equal(old_dta, expected, check_dtype=False) + + +@pytest.mark.parametrize("version", [114, 117, 118, 119, None]) +@pytest.mark.parametrize("use_dict", [True, False]) +@pytest.mark.parametrize("infer", [True, False]) +def test_compression(compression, version, use_dict, infer): + file_name = "dta_inferred_compression.dta" + if compression: + if use_dict: + file_ext = compression + else: + file_ext = icom._compression_to_extension[compression] + file_name += f".{file_ext}" + compression_arg = compression + if infer: + compression_arg = "infer" + if use_dict: + compression_arg = {"method": compression} + + df = DataFrame(np.random.randn(10, 2), columns=list("AB")) + df.index.name = "index" + with tm.ensure_clean(file_name) as path: + df.to_stata(path, version=version, compression=compression_arg) + if compression == "gzip": + with gzip.open(path, "rb") as comp: + fp = io.BytesIO(comp.read()) + elif compression == "zip": + with zipfile.ZipFile(path, "r") as comp: + fp = io.BytesIO(comp.read(comp.filelist[0])) + elif compression == "bz2": + with bz2.open(path, "rb") as comp: + fp = io.BytesIO(comp.read()) + elif compression == "zstd": + zstd = pytest.importorskip("zstandard") + with zstd.open(path, "rb") as comp: + fp = io.BytesIO(comp.read()) + elif compression == "xz": + lzma = pytest.importorskip("lzma") + with lzma.open(path, "rb") as comp: + fp = io.BytesIO(comp.read()) + elif compression is None: + fp = path + reread = read_stata(fp, index_col="index") + tm.assert_frame_equal(reread, df) + + +@pytest.mark.parametrize("method", ["zip", "infer"]) +@pytest.mark.parametrize("file_ext", [None, "dta", "zip"]) +def test_compression_dict(method, file_ext): + file_name = f"test.{file_ext}" + archive_name = "test.dta" + df = DataFrame(np.random.randn(10, 2), columns=list("AB")) + df.index.name = "index" + with tm.ensure_clean(file_name) as path: + compression = {"method": method, "archive_name": archive_name} + df.to_stata(path, compression=compression) + if method == "zip" or file_ext == "zip": + with zipfile.ZipFile(path, "r") as zp: + assert len(zp.filelist) == 1 + assert zp.filelist[0].filename == archive_name + fp = io.BytesIO(zp.read(zp.filelist[0])) + else: + fp = path + reread = read_stata(fp, index_col="index") + tm.assert_frame_equal(reread, df) + + +@pytest.mark.parametrize("version", [114, 117, 118, 119, None]) +def test_chunked_categorical(version): + df = DataFrame({"cats": Series(["a", "b", "a", "b", "c"], dtype="category")}) + df.index.name = "index" + with tm.ensure_clean() as path: + df.to_stata(path, version=version) + reader = StataReader(path, chunksize=2, order_categoricals=False) + for i, block in enumerate(reader): + block = block.set_index("index") + assert "cats" in block + tm.assert_series_equal(block.cats, df.cats.iloc[2 * i : 2 * (i + 1)]) + + +def test_chunked_categorical_partial(dirpath): + dta_file = os.path.join(dirpath, "stata-dta-partially-labeled.dta") + values = ["a", "b", "a", "b", 3.0] + with StataReader(dta_file, chunksize=2) as reader: + with tm.assert_produces_warning(CategoricalConversionWarning): + for i, block in enumerate(reader): + assert list(block.cats) == values[2 * i : 2 * (i + 1)] + if i < 2: + idx = pd.Index(["a", "b"]) + else: + idx = pd.Index([3.0], dtype="float64") + tm.assert_index_equal(block.cats.cat.categories, idx) + with tm.assert_produces_warning(CategoricalConversionWarning): + with StataReader(dta_file, chunksize=5) as reader: + large_chunk = reader.__next__() + direct = read_stata(dta_file) + tm.assert_frame_equal(direct, large_chunk) + + +def test_iterator_errors(dirpath): + dta_file = os.path.join(dirpath, "stata-dta-partially-labeled.dta") + with pytest.raises(ValueError, match="chunksize must be a positive"): + StataReader(dta_file, chunksize=-1) + with pytest.raises(ValueError, match="chunksize must be a positive"): + StataReader(dta_file, chunksize=0) + with pytest.raises(ValueError, match="chunksize must be a positive"): + StataReader(dta_file, chunksize="apple") + + +def test_iterator_value_labels(): + # GH 31544 + values = ["c_label", "b_label"] + ["a_label"] * 500 + df = DataFrame({f"col{k}": pd.Categorical(values, ordered=True) for k in range(2)}) + with tm.ensure_clean() as path: + df.to_stata(path, write_index=False) + expected = pd.Index(["a_label", "b_label", "c_label"], dtype="object") + with read_stata(path, chunksize=100) as reader: + for j, chunk in enumerate(reader): + for i in range(2): + tm.assert_index_equal(chunk.dtypes[i].categories, expected) + tm.assert_frame_equal(chunk, df.iloc[j * 100 : (j + 1) * 100]) + + +def test_precision_loss(): + df = DataFrame( + [[sum(2 ** i for i in range(60)), sum(2 ** i for i in range(52))]], + columns=["big", "little"], + ) + with tm.ensure_clean() as path: + with tm.assert_produces_warning( + PossiblePrecisionLoss, match="Column converted from int64 to float64" + ): + df.to_stata(path, write_index=False) + reread = read_stata(path) + expected_dt = Series([np.float64, np.float64], index=["big", "little"]) + tm.assert_series_equal(reread.dtypes, expected_dt) + assert reread.loc[0, "little"] == df.loc[0, "little"] + assert reread.loc[0, "big"] == float(df.loc[0, "big"]) + + +def test_compression_roundtrip(compression): + df = DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + df.index.name = "index" + + with tm.ensure_clean() as path: + + df.to_stata(path, compression=compression) + reread = read_stata(path, compression=compression, index_col="index") + tm.assert_frame_equal(df, reread) + + # explicitly ensure file was compressed. + with tm.decompress_file(path, compression) as fh: + contents = io.BytesIO(fh.read()) + reread = read_stata(contents, index_col="index") + tm.assert_frame_equal(df, reread) + + +@pytest.mark.parametrize("to_infer", [True, False]) +@pytest.mark.parametrize("read_infer", [True, False]) +def test_stata_compression(compression_only, read_infer, to_infer): + compression = compression_only + + ext = icom._compression_to_extension[compression] + filename = f"test.{ext}" + + df = DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + df.index.name = "index" + + to_compression = "infer" if to_infer else compression + read_compression = "infer" if read_infer else compression + + with tm.ensure_clean(filename) as path: + df.to_stata(path, compression=to_compression) + result = read_stata(path, compression=read_compression, index_col="index") + tm.assert_frame_equal(result, df) + + +def test_non_categorical_value_labels(): + data = DataFrame( + { + "fully_labelled": [1, 2, 3, 3, 1], + "partially_labelled": [1.0, 2.0, np.nan, 9.0, np.nan], + "Y": [7, 7, 9, 8, 10], + "Z": pd.Categorical(["j", "k", "l", "k", "j"]), + } + ) + + with tm.ensure_clean() as path: + value_labels = { + "fully_labelled": {1: "one", 2: "two", 3: "three"}, + "partially_labelled": {1.0: "one", 2.0: "two"}, + } + expected = {**value_labels, "Z": {0: "j", 1: "k", 2: "l"}} + + writer = StataWriter(path, data, value_labels=value_labels) + writer.write_file() + + reader = StataReader(path) + reader_value_labels = reader.value_labels() + assert reader_value_labels == expected + + msg = "Can't create value labels for notY, it wasn't found in the dataset." + with pytest.raises(KeyError, match=msg): + value_labels = {"notY": {7: "label1", 8: "label2"}} + writer = StataWriter(path, data, value_labels=value_labels) + + msg = ( + "Can't create value labels for Z, value labels " + "can only be applied to numeric columns." + ) + with pytest.raises(ValueError, match=msg): + value_labels = {"Z": {1: "a", 2: "k", 3: "j", 4: "i"}} + writer = StataWriter(path, data, value_labels=value_labels) + + +def test_non_categorical_value_label_name_conversion(): + # Check conversion of invalid variable names + data = DataFrame( + { + "invalid~!": [1, 1, 2, 3, 5, 8], # Only alphanumeric and _ + "6_invalid": [1, 1, 2, 3, 5, 8], # Must start with letter or _ + "invalid_name_longer_than_32_characters": [8, 8, 9, 9, 8, 8], # Too long + "aggregate": [2, 5, 5, 6, 6, 9], # Reserved words + (1, 2): [1, 2, 3, 4, 5, 6], # Hashable non-string + } + ) + + value_labels = { + "invalid~!": {1: "label1", 2: "label2"}, + "6_invalid": {1: "label1", 2: "label2"}, + "invalid_name_longer_than_32_characters": {8: "eight", 9: "nine"}, + "aggregate": {5: "five"}, + (1, 2): {3: "three"}, + } + + expected = { + "invalid__": {1: "label1", 2: "label2"}, + "_6_invalid": {1: "label1", 2: "label2"}, + "invalid_name_longer_than_32_char": {8: "eight", 9: "nine"}, + "_aggregate": {5: "five"}, + "_1__2_": {3: "three"}, + } + + with tm.ensure_clean() as path: + with tm.assert_produces_warning(InvalidColumnName): + data.to_stata(path, value_labels=value_labels) + + reader = StataReader(path) + reader_value_labels = reader.value_labels() + assert reader_value_labels == expected + + +def test_non_categorical_value_label_convert_categoricals_error(): + # Mapping more than one value to the same label is valid for Stata + # labels, but can't be read with convert_categoricals=True + value_labels = { + "repeated_labels": {10: "Ten", 20: "More than ten", 40: "More than ten"} + } + + data = DataFrame( + { + "repeated_labels": [10, 10, 20, 20, 40, 40], + } + ) + + with tm.ensure_clean() as path: + data.to_stata(path, value_labels=value_labels) + + reader = StataReader(path, convert_categoricals=False) + reader_value_labels = reader.value_labels() + assert reader_value_labels == value_labels + + col = "repeated_labels" + repeats = "-" * 80 + "\n" + "\n".join(["More than ten"]) + + msg = f""" +Value labels for column {col} are not unique. These cannot be converted to +pandas categoricals. + +Either read the file with `convert_categoricals` set to False or use the +low level interface in `StataReader` to separately read the values and the +value_labels. + +The repeated labels are: +{repeats} +""" + with pytest.raises(ValueError, match=msg): + read_stata(path, convert_categoricals=True) + + +@pytest.mark.parametrize("version", [114, 117, 118, 119, None]) +@pytest.mark.parametrize( + "dtype", + [ + pd.BooleanDtype, + pd.Int8Dtype, + pd.Int16Dtype, + pd.Int32Dtype, + pd.Int64Dtype, + pd.UInt8Dtype, + pd.UInt16Dtype, + pd.UInt32Dtype, + pd.UInt64Dtype, + ], +) +def test_nullable_support(dtype, version): + df = DataFrame( + { + "a": Series([1.0, 2.0, 3.0]), + "b": Series([1, pd.NA, pd.NA], dtype=dtype.name), + "c": Series(["a", "b", None]), + } + ) + dtype_name = df.b.dtype.numpy_dtype.name + # Only use supported names: no uint, bool or int64 + dtype_name = dtype_name.replace("u", "") + if dtype_name == "int64": + dtype_name = "int32" + elif dtype_name == "bool": + dtype_name = "int8" + value = StataMissingValue.BASE_MISSING_VALUES[dtype_name] + smv = StataMissingValue(value) + expected_b = Series([1, smv, smv], dtype=object, name="b") + expected_c = Series(["a", "b", ""], name="c") + with tm.ensure_clean() as path: + df.to_stata(path, write_index=False, version=version) + reread = read_stata(path, convert_missing=True) + tm.assert_series_equal(df.a, reread.a) + tm.assert_series_equal(reread.b, expected_b) + tm.assert_series_equal(reread.c, expected_c) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/test_user_agent.py b/.local/lib/python3.8/site-packages/pandas/tests/io/test_user_agent.py new file mode 100644 index 0000000..ac4ca5d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/test_user_agent.py @@ -0,0 +1,379 @@ +""" +Tests for the pandas custom headers in http(s) requests +""" +import gzip +import http.server +from io import BytesIO +import multiprocessing +import socket +import time +import urllib.error + +import pytest + +from pandas.compat import is_ci_environment +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm + +pytestmark = pytest.mark.skipif( + is_ci_environment(), + reason="This test can hang in our CI min_versions build " + "and leads to '##[error]The runner has " + "received a shutdown signal...' in GHA. GH 45651", +) + + +class BaseUserAgentResponder(http.server.BaseHTTPRequestHandler): + """ + Base class for setting up a server that can be set up to respond + with a particular file format with accompanying content-type headers. + The interfaces on the different io methods are different enough + that this seemed logical to do. + """ + + def start_processing_headers(self): + """ + shared logic at the start of a GET request + """ + self.send_response(200) + self.requested_from_user_agent = self.headers["User-Agent"] + response_df = pd.DataFrame( + { + "header": [self.requested_from_user_agent], + } + ) + return response_df + + def gzip_bytes(self, response_bytes): + """ + some web servers will send back gzipped files to save bandwidth + """ + with BytesIO() as bio: + with gzip.GzipFile(fileobj=bio, mode="w") as zipper: + zipper.write(response_bytes) + response_bytes = bio.getvalue() + return response_bytes + + def write_back_bytes(self, response_bytes): + """ + shared logic at the end of a GET request + """ + self.wfile.write(response_bytes) + + +class CSVUserAgentResponder(BaseUserAgentResponder): + def do_GET(self): + response_df = self.start_processing_headers() + + self.send_header("Content-Type", "text/csv") + self.end_headers() + + response_bytes = response_df.to_csv(index=False).encode("utf-8") + self.write_back_bytes(response_bytes) + + +class GzippedCSVUserAgentResponder(BaseUserAgentResponder): + def do_GET(self): + response_df = self.start_processing_headers() + self.send_header("Content-Type", "text/csv") + self.send_header("Content-Encoding", "gzip") + self.end_headers() + + response_bytes = response_df.to_csv(index=False).encode("utf-8") + response_bytes = self.gzip_bytes(response_bytes) + + self.write_back_bytes(response_bytes) + + +class JSONUserAgentResponder(BaseUserAgentResponder): + def do_GET(self): + response_df = self.start_processing_headers() + self.send_header("Content-Type", "application/json") + self.end_headers() + + response_bytes = response_df.to_json().encode("utf-8") + + self.write_back_bytes(response_bytes) + + +class GzippedJSONUserAgentResponder(BaseUserAgentResponder): + def do_GET(self): + response_df = self.start_processing_headers() + self.send_header("Content-Type", "application/json") + self.send_header("Content-Encoding", "gzip") + self.end_headers() + + response_bytes = response_df.to_json().encode("utf-8") + response_bytes = self.gzip_bytes(response_bytes) + + self.write_back_bytes(response_bytes) + + +class ParquetPyArrowUserAgentResponder(BaseUserAgentResponder): + def do_GET(self): + response_df = self.start_processing_headers() + self.send_header("Content-Type", "application/octet-stream") + self.end_headers() + + response_bytes = response_df.to_parquet(index=False, engine="pyarrow") + + self.write_back_bytes(response_bytes) + + +class ParquetFastParquetUserAgentResponder(BaseUserAgentResponder): + def do_GET(self): + response_df = self.start_processing_headers() + self.send_header("Content-Type", "application/octet-stream") + self.end_headers() + + # the fastparquet engine doesn't like to write to a buffer + # it can do it via the open_with function being set appropriately + # however it automatically calls the close method and wipes the buffer + # so just overwrite that attribute on this instance to not do that + + # protected by an importorskip in the respective test + import fsspec + + response_df.to_parquet( + "memory://fastparquet_user_agent.parquet", + index=False, + engine="fastparquet", + compression=None, + ) + with fsspec.open("memory://fastparquet_user_agent.parquet", "rb") as f: + response_bytes = f.read() + + self.write_back_bytes(response_bytes) + + +class PickleUserAgentResponder(BaseUserAgentResponder): + def do_GET(self): + response_df = self.start_processing_headers() + self.send_header("Content-Type", "application/octet-stream") + self.end_headers() + + bio = BytesIO() + response_df.to_pickle(bio) + response_bytes = bio.getvalue() + + self.write_back_bytes(response_bytes) + + +class StataUserAgentResponder(BaseUserAgentResponder): + def do_GET(self): + response_df = self.start_processing_headers() + self.send_header("Content-Type", "application/octet-stream") + self.end_headers() + + bio = BytesIO() + response_df.to_stata(bio, write_index=False) + response_bytes = bio.getvalue() + + self.write_back_bytes(response_bytes) + + +class AllHeaderCSVResponder(http.server.BaseHTTPRequestHandler): + """ + Send all request headers back for checking round trip + """ + + def do_GET(self): + response_df = pd.DataFrame(self.headers.items()) + self.send_response(200) + self.send_header("Content-Type", "text/csv") + self.end_headers() + response_bytes = response_df.to_csv(index=False).encode("utf-8") + self.wfile.write(response_bytes) + + +def wait_until_ready(func, *args, **kwargs): + def inner(*args, **kwargs): + while True: + try: + return func(*args, **kwargs) + except urllib.error.URLError: + # Connection refused as http server is starting + time.sleep(0.1) + + return inner + + +def process_server(responder, port): + with http.server.HTTPServer(("localhost", port), responder) as server: + server.handle_request() + server.server_close() + + +@pytest.fixture +def responder(request): + """ + Fixture that starts a local http server in a separate process on localhost + and returns the port. + + Running in a separate process instead of a thread to allow termination/killing + of http server upon cleanup. + """ + # Find an available port + with socket.socket() as sock: + sock.bind(("localhost", 0)) + port = sock.getsockname()[1] + + server_process = multiprocessing.Process( + target=process_server, args=(request.param, port) + ) + server_process.start() + yield port + server_process.join(10) + server_process.terminate() + kill_time = 5 + wait_time = 0 + while server_process.is_alive(): + if wait_time > kill_time: + server_process.kill() + break + else: + wait_time += 0.1 + time.sleep(0.1) + server_process.close() + + +@pytest.mark.parametrize( + "responder, read_method, parquet_engine", + [ + (CSVUserAgentResponder, pd.read_csv, None), + (JSONUserAgentResponder, pd.read_json, None), + (ParquetPyArrowUserAgentResponder, pd.read_parquet, "pyarrow"), + pytest.param( + ParquetFastParquetUserAgentResponder, + pd.read_parquet, + "fastparquet", + # TODO(ArrayManager) fastparquet + marks=[ + td.skip_array_manager_not_yet_implemented, + ], + ), + (PickleUserAgentResponder, pd.read_pickle, None), + (StataUserAgentResponder, pd.read_stata, None), + (GzippedCSVUserAgentResponder, pd.read_csv, None), + (GzippedJSONUserAgentResponder, pd.read_json, None), + ], + indirect=["responder"], +) +def test_server_and_default_headers(responder, read_method, parquet_engine): + if parquet_engine is not None: + pytest.importorskip(parquet_engine) + if parquet_engine == "fastparquet": + pytest.importorskip("fsspec") + + read_method = wait_until_ready(read_method) + if parquet_engine is None: + df_http = read_method(f"http://localhost:{responder}") + else: + df_http = read_method(f"http://localhost:{responder}", engine=parquet_engine) + + assert not df_http.empty + + +@pytest.mark.parametrize( + "responder, read_method, parquet_engine", + [ + (CSVUserAgentResponder, pd.read_csv, None), + (JSONUserAgentResponder, pd.read_json, None), + (ParquetPyArrowUserAgentResponder, pd.read_parquet, "pyarrow"), + pytest.param( + ParquetFastParquetUserAgentResponder, + pd.read_parquet, + "fastparquet", + # TODO(ArrayManager) fastparquet + marks=[ + td.skip_array_manager_not_yet_implemented, + ], + ), + (PickleUserAgentResponder, pd.read_pickle, None), + (StataUserAgentResponder, pd.read_stata, None), + (GzippedCSVUserAgentResponder, pd.read_csv, None), + (GzippedJSONUserAgentResponder, pd.read_json, None), + ], + indirect=["responder"], +) +def test_server_and_custom_headers(responder, read_method, parquet_engine): + if parquet_engine is not None: + pytest.importorskip(parquet_engine) + if parquet_engine == "fastparquet": + pytest.importorskip("fsspec") + + custom_user_agent = "Super Cool One" + df_true = pd.DataFrame({"header": [custom_user_agent]}) + + read_method = wait_until_ready(read_method) + if parquet_engine is None: + df_http = read_method( + f"http://localhost:{responder}", + storage_options={"User-Agent": custom_user_agent}, + ) + else: + df_http = read_method( + f"http://localhost:{responder}", + storage_options={"User-Agent": custom_user_agent}, + engine=parquet_engine, + ) + + tm.assert_frame_equal(df_true, df_http) + + +@pytest.mark.parametrize( + "responder, read_method", + [ + (AllHeaderCSVResponder, pd.read_csv), + ], + indirect=["responder"], +) +def test_server_and_all_custom_headers(responder, read_method): + custom_user_agent = "Super Cool One" + custom_auth_token = "Super Secret One" + storage_options = { + "User-Agent": custom_user_agent, + "Auth": custom_auth_token, + } + read_method = wait_until_ready(read_method) + df_http = read_method( + f"http://localhost:{responder}", + storage_options=storage_options, + ) + + df_http = df_http[df_http["0"].isin(storage_options.keys())] + df_http = df_http.sort_values(["0"]).reset_index() + df_http = df_http[["0", "1"]] + + keys = list(storage_options.keys()) + df_true = pd.DataFrame({"0": keys, "1": [storage_options[k] for k in keys]}) + df_true = df_true.sort_values(["0"]) + df_true = df_true.reset_index().drop(["index"], axis=1) + + tm.assert_frame_equal(df_true, df_http) + + +@pytest.mark.parametrize( + "engine", + [ + "pyarrow", + "fastparquet", + ], +) +def test_to_parquet_to_disk_with_storage_options(engine): + headers = { + "User-Agent": "custom", + "Auth": "other_custom", + } + + pytest.importorskip(engine) + + true_df = pd.DataFrame({"column_name": ["column_value"]}) + msg = ( + "storage_options passed with file object or non-fsspec file path|" + "storage_options passed with buffer, or non-supported URL" + ) + with pytest.raises(ValueError, match=msg): + true_df.to_parquet("/tmp/junk.parquet", storage_options=headers, engine=engine) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/xml/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/io/xml/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/xml/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/xml/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..cf5769f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/xml/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/xml/__pycache__/test_to_xml.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/xml/__pycache__/test_to_xml.cpython-38.pyc new file mode 100644 index 0000000..304d6f2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/xml/__pycache__/test_to_xml.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/xml/__pycache__/test_xml.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/io/xml/__pycache__/test_xml.cpython-38.pyc new file mode 100644 index 0000000..6d93005 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/io/xml/__pycache__/test_xml.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/xml/test_to_xml.py b/.local/lib/python3.8/site-packages/pandas/tests/io/xml/test_to_xml.py new file mode 100644 index 0000000..beaa6d6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/xml/test_to_xml.py @@ -0,0 +1,1344 @@ +from __future__ import annotations + +from io import ( + BytesIO, + StringIO, +) +import os + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + NA, + DataFrame, + Index, +) +import pandas._testing as tm + +import pandas.io.common as icom +from pandas.io.common import get_handle +from pandas.io.xml import read_xml + +""" +CHECKLIST + +[x] - ValueError: "Values for parser can only be lxml or etree." + +etree +[x] - ImportError: "lxml not found, please install or use the etree parser." +[X] - TypeError: "...is not a valid type for attr_cols" +[X] - TypeError: "...is not a valid type for elem_cols" +[X] - LookupError: "unknown encoding" +[X] - KeyError: "...is not included in namespaces" +[X] - KeyError: "no valid column" +[X] - ValueError: "To use stylesheet, you need lxml installed..." +[] - OSError: (NEED PERMISSOIN ISSUE, DISK FULL, ETC.) +[X] - FileNotFoundError: "No such file or directory" +[X] - PermissionError: "Forbidden" + +lxml +[X] - TypeError: "...is not a valid type for attr_cols" +[X] - TypeError: "...is not a valid type for elem_cols" +[X] - LookupError: "unknown encoding" +[] - OSError: (NEED PERMISSOIN ISSUE, DISK FULL, ETC.) +[X] - FileNotFoundError: "No such file or directory" +[X] - KeyError: "...is not included in namespaces" +[X] - KeyError: "no valid column" +[X] - ValueError: "stylesheet is not a url, file, or xml string." +[] - LookupError: (NEED WRONG ENCODING FOR FILE OUTPUT) +[] - URLError: (USUALLY DUE TO NETWORKING) +[] - HTTPError: (NEED AN ONLINE STYLESHEET) +[X] - OSError: "failed to load external entity" +[X] - XMLSyntaxError: "Opening and ending tag mismatch" +[X] - XSLTApplyError: "Cannot resolve URI" +[X] - XSLTParseError: "failed to compile" +[X] - PermissionError: "Forbidden" +""" + +geom_df = DataFrame( + { + "shape": ["square", "circle", "triangle"], + "degrees": [360, 360, 180], + "sides": [4, np.nan, 3], + } +) + +planet_df = DataFrame( + { + "planet": [ + "Mercury", + "Venus", + "Earth", + "Mars", + "Jupiter", + "Saturn", + "Uranus", + "Neptune", + ], + "type": [ + "terrestrial", + "terrestrial", + "terrestrial", + "terrestrial", + "gas giant", + "gas giant", + "ice giant", + "ice giant", + ], + "location": [ + "inner", + "inner", + "inner", + "inner", + "outer", + "outer", + "outer", + "outer", + ], + "mass": [ + 0.330114, + 4.86747, + 5.97237, + 0.641712, + 1898.187, + 568.3174, + 86.8127, + 102.4126, + ], + } +) + +from_file_expected = """\ + + + + 0 + cooking + Everyday Italian + Giada De Laurentiis + 2005 + 30.0 + + + 1 + children + Harry Potter + J K. Rowling + 2005 + 29.99 + + + 2 + web + Learning XML + Erik T. Ray + 2003 + 39.95 + +""" + + +def equalize_decl(doc): + # etree and lxml differ on quotes and case in xml declaration + if doc is not None: + doc = doc.replace( + ' + + + cooking + Everyday Italian + Giada De Laurentiis + 2005 + 30.0 + + + children + Harry Potter + J K. Rowling + 2005 + 29.99 + + + web + Learning XML + Erik T. Ray + 2003 + 39.95 + +""" + + filename = datapath("io", "data", "xml", "books.xml") + df_file = read_xml(filename, parser=parser) + + with tm.ensure_clean("test.xml") as path: + df_file.to_xml(path, index=False, parser=parser) + with open(path, "rb") as f: + output = f.read().decode("utf-8").strip() + + output = equalize_decl(output) + + assert output == expected + + +def test_index_false_rename_row_root(datapath, parser): + expected = """\ + + + + cooking + Everyday Italian + Giada De Laurentiis + 2005 + 30.0 + + + children + Harry Potter + J K. Rowling + 2005 + 29.99 + + + web + Learning XML + Erik T. Ray + 2003 + 39.95 + +""" + + filename = datapath("io", "data", "xml", "books.xml") + df_file = read_xml(filename, parser=parser) + + with tm.ensure_clean("test.xml") as path: + df_file.to_xml( + path, index=False, root_name="books", row_name="book", parser=parser + ) + with open(path, "rb") as f: + output = f.read().decode("utf-8").strip() + + output = equalize_decl(output) + + assert output == expected + + +@pytest.mark.parametrize( + "offset_index", [list(range(10, 13)), [str(i) for i in range(10, 13)]] +) +def test_index_false_with_offset_input_index(parser, offset_index): + """ + Tests that the output does not contain the `` field when the index of the + input Dataframe has an offset. + + This is a regression test for issue #42458. + """ + + expected = """\ + + + + square + 360 + 4.0 + + + circle + 360 + + + + triangle + 180 + 3.0 + +""" + + offset_geom_df = geom_df.copy() + offset_geom_df.index = Index(offset_index) + output = offset_geom_df.to_xml(index=False, parser=parser) + output = equalize_decl(output) + + assert output == expected + + +# NA_REP + +na_expected = """\ + + + + 0 + square + 360 + 4.0 + + + 1 + circle + 360 + + + + 2 + triangle + 180 + 3.0 + +""" + + +def test_na_elem_output(datapath, parser): + output = geom_df.to_xml(parser=parser) + output = equalize_decl(output) + + assert output == na_expected + + +def test_na_empty_str_elem_option(datapath, parser): + output = geom_df.to_xml(na_rep="", parser=parser) + output = equalize_decl(output) + + assert output == na_expected + + +def test_na_empty_elem_option(datapath, parser): + expected = """\ + + + + 0 + square + 360 + 4.0 + + + 1 + circle + 360 + 0.0 + + + 2 + triangle + 180 + 3.0 + +""" + + output = geom_df.to_xml(na_rep="0.0", parser=parser) + output = equalize_decl(output) + + assert output == expected + + +# ATTR_COLS + + +def test_attrs_cols_nan_output(datapath, parser): + expected = """\ + + + + + +""" + + output = geom_df.to_xml(attr_cols=["shape", "degrees", "sides"], parser=parser) + output = equalize_decl(output) + + assert output == expected + + +def test_attrs_cols_prefix(datapath, parser): + expected = """\ + + + + + +""" + + output = geom_df.to_xml( + attr_cols=["index", "shape", "degrees", "sides"], + namespaces={"doc": "http://example.xom"}, + prefix="doc", + parser=parser, + ) + output = equalize_decl(output) + + assert output == expected + + +def test_attrs_unknown_column(parser): + with pytest.raises(KeyError, match=("no valid column")): + geom_df.to_xml(attr_cols=["shape", "degree", "sides"], parser=parser) + + +def test_attrs_wrong_type(parser): + with pytest.raises(TypeError, match=("is not a valid type for attr_cols")): + geom_df.to_xml(attr_cols='"shape", "degree", "sides"', parser=parser) + + +# ELEM_COLS + + +def test_elems_cols_nan_output(datapath, parser): + elems_cols_expected = """\ + + + + 360 + 4.0 + square + + + 360 + + circle + + + 180 + 3.0 + triangle + +""" + + output = geom_df.to_xml( + index=False, elem_cols=["degrees", "sides", "shape"], parser=parser + ) + output = equalize_decl(output) + + assert output == elems_cols_expected + + +def test_elems_unknown_column(parser): + with pytest.raises(KeyError, match=("no valid column")): + geom_df.to_xml(elem_cols=["shape", "degree", "sides"], parser=parser) + + +def test_elems_wrong_type(parser): + with pytest.raises(TypeError, match=("is not a valid type for elem_cols")): + geom_df.to_xml(elem_cols='"shape", "degree", "sides"', parser=parser) + + +def test_elems_and_attrs_cols(datapath, parser): + elems_cols_expected = """\ + + + + 360 + 4.0 + + + 360 + + + + 180 + 3.0 + +""" + + output = geom_df.to_xml( + index=False, + elem_cols=["degrees", "sides"], + attr_cols=["shape"], + parser=parser, + ) + output = equalize_decl(output) + + assert output == elems_cols_expected + + +# HIERARCHICAL COLUMNS + + +def test_hierarchical_columns(datapath, parser): + expected = """\ + + + + inner + terrestrial + 4 + 11.81 + 2.95 + + + outer + gas giant + 2 + 2466.5 + 1233.25 + + + outer + ice giant + 2 + 189.23 + 94.61 + + + All + + 8 + 2667.54 + 333.44 + +""" + + pvt = planet_df.pivot_table( + index=["location", "type"], + values="mass", + aggfunc=["count", "sum", "mean"], + margins=True, + ).round(2) + + output = pvt.to_xml(parser=parser) + output = equalize_decl(output) + + assert output == expected + + +def test_hierarchical_attrs_columns(datapath, parser): + expected = """\ + + + + + + +""" + + pvt = planet_df.pivot_table( + index=["location", "type"], + values="mass", + aggfunc=["count", "sum", "mean"], + margins=True, + ).round(2) + + output = pvt.to_xml(attr_cols=list(pvt.reset_index().columns.values), parser=parser) + output = equalize_decl(output) + + assert output == expected + + +# MULTIINDEX + + +def test_multi_index(datapath, parser): + expected = """\ + + + + inner + terrestrial + 4 + 11.81 + 2.95 + + + outer + gas giant + 2 + 2466.5 + 1233.25 + + + outer + ice giant + 2 + 189.23 + 94.61 + +""" + + agg = ( + planet_df.groupby(["location", "type"])["mass"] + .agg(["count", "sum", "mean"]) + .round(2) + ) + + output = agg.to_xml(parser=parser) + output = equalize_decl(output) + + assert output == expected + + +def test_multi_index_attrs_cols(datapath, parser): + expected = """\ + + + + + +""" + + agg = ( + planet_df.groupby(["location", "type"])["mass"] + .agg(["count", "sum", "mean"]) + .round(2) + ) + output = agg.to_xml(attr_cols=list(agg.reset_index().columns.values), parser=parser) + output = equalize_decl(output) + + assert output == expected + + +# NAMESPACE + + +def test_default_namespace(parser): + expected = """\ + + + + 0 + square + 360 + 4.0 + + + 1 + circle + 360 + + + + 2 + triangle + 180 + 3.0 + +""" + + output = geom_df.to_xml(namespaces={"": "http://example.com"}, parser=parser) + output = equalize_decl(output) + + assert output == expected + + +# PREFIX + + +def test_namespace_prefix(parser): + expected = """\ + + + + 0 + square + 360 + 4.0 + + + 1 + circle + 360 + + + + 2 + triangle + 180 + 3.0 + +""" + + output = geom_df.to_xml( + namespaces={"doc": "http://example.com"}, prefix="doc", parser=parser + ) + output = equalize_decl(output) + + assert output == expected + + +def test_missing_prefix_in_nmsp(parser): + with pytest.raises(KeyError, match=("doc is not included in namespaces")): + + geom_df.to_xml( + namespaces={"": "http://example.com"}, prefix="doc", parser=parser + ) + + +def test_namespace_prefix_and_default(parser): + expected = """\ + + + + 0 + square + 360 + 4.0 + + + 1 + circle + 360 + + + + 2 + triangle + 180 + 3.0 + +""" + + output = geom_df.to_xml( + namespaces={"": "http://example.com", "doc": "http://other.org"}, + prefix="doc", + parser=parser, + ) + output = equalize_decl(output) + + if output is not None: + # etree and lxml differs on order of namespace prefixes + output = output.replace( + 'xmlns:doc="http://other.org" xmlns="http://example.com"', + 'xmlns="http://example.com" xmlns:doc="http://other.org"', + ) + + assert output == expected + + +# ENCODING + +encoding_expected = """\ + + + + 0 + 1 + José + Sofía + + + 1 + 2 + Luis + Valentina + + + 2 + 3 + Carlos + Isabella + + + 3 + 4 + Juan + Camila + + + 4 + 5 + Jorge + Valeria + +""" + + +def test_encoding_option_str(datapath, parser): + filename = datapath("io", "data", "xml", "baby_names.xml") + df_file = read_xml(filename, parser=parser, encoding="ISO-8859-1").head(5) + + output = df_file.to_xml(encoding="ISO-8859-1", parser=parser) + + if output is not None: + # etree and lxml differ on quotes and case in xml declaration + output = output.replace( + ' + + 0 + square + 360 + 4.0 + + + 1 + circle + 360 + + + + 2 + triangle + 180 + 3.0 + +""" + + output = geom_df.to_xml(xml_declaration=False) + + assert output == expected + + +def test_no_pretty_print_with_decl(parser): + expected = ( + "\n" + "0square" + "3604.0" + "1circle360" + "2" + "triangle1803.0" + "" + ) + + output = geom_df.to_xml(pretty_print=False, parser=parser) + output = equalize_decl(output) + + # etree adds space for closed tags + if output is not None: + output = output.replace(" />", "/>") + + assert output == expected + + +def test_no_pretty_print_no_decl(parser): + expected = ( + "0square" + "3604.0" + "1circle360" + "2" + "triangle1803.0" + "" + ) + + output = geom_df.to_xml(xml_declaration=False, pretty_print=False, parser=parser) + + # etree adds space for closed tags + if output is not None: + output = output.replace(" />", "/>") + + assert output == expected + + +# PARSER + + +@td.skip_if_installed("lxml") +def test_default_parser_no_lxml(): + with pytest.raises( + ImportError, match=("lxml not found, please install or use the etree parser.") + ): + geom_df.to_xml() + + +def test_unknown_parser(): + with pytest.raises( + ValueError, match=("Values for parser can only be lxml or etree.") + ): + geom_df.to_xml(parser="bs4") + + +# STYLESHEET + +xsl_expected = """\ + + + + 0 + square + 360 + 4.0 + + + 1 + circle + 360 + + + + 2 + triangle + 180 + 3.0 + +""" + + +@td.skip_if_no("lxml") +def test_stylesheet_file_like(datapath, mode): + xsl = datapath("io", "data", "xml", "row_field_output.xsl") + + with open(xsl, mode) as f: + assert geom_df.to_xml(stylesheet=f) == xsl_expected + + +@td.skip_if_no("lxml") +def test_stylesheet_io(datapath, mode): + xsl_path = datapath("io", "data", "xml", "row_field_output.xsl") + + xsl_obj: BytesIO | StringIO + + with open(xsl_path, mode) as f: + if mode == "rb": + xsl_obj = BytesIO(f.read()) + else: + xsl_obj = StringIO(f.read()) + + output = geom_df.to_xml(stylesheet=xsl_obj) + + assert output == xsl_expected + + +@td.skip_if_no("lxml") +def test_stylesheet_buffered_reader(datapath, mode): + xsl = datapath("io", "data", "xml", "row_field_output.xsl") + + with open(xsl, mode) as f: + xsl_obj = f.read() + + output = geom_df.to_xml(stylesheet=xsl_obj) + + assert output == xsl_expected + + +@td.skip_if_no("lxml") +def test_stylesheet_wrong_path(datapath): + from lxml.etree import XMLSyntaxError + + xsl = os.path.join("data", "xml", "row_field_output.xslt") + + with pytest.raises( + XMLSyntaxError, + match=("Start tag expected, '<' not found"), + ): + geom_df.to_xml(stylesheet=xsl) + + +@td.skip_if_no("lxml") +@pytest.mark.parametrize("val", ["", b""]) +def test_empty_string_stylesheet(val): + from lxml.etree import XMLSyntaxError + + with pytest.raises( + XMLSyntaxError, match=("Document is empty|Start tag expected, '<' not found") + ): + geom_df.to_xml(stylesheet=val) + + +@td.skip_if_no("lxml") +def test_incorrect_xsl_syntax(): + from lxml.etree import XMLSyntaxError + + xsl = """\ + + + + + + + + + + + + + + + + + + +""" + + with pytest.raises(XMLSyntaxError, match=("Opening and ending tag mismatch")): + geom_df.to_xml(stylesheet=xsl) + + +@td.skip_if_no("lxml") +def test_incorrect_xsl_eval(): + from lxml.etree import XSLTParseError + + xsl = """\ + + + + + + + + + + + + + + + + + + +""" + + with pytest.raises(XSLTParseError, match=("failed to compile")): + geom_df.to_xml(stylesheet=xsl) + + +@td.skip_if_no("lxml") +def test_incorrect_xsl_apply(parser): + from lxml.etree import XSLTApplyError + + xsl = """\ + + + + + + + + + +""" + + with pytest.raises(XSLTApplyError, match=("Cannot resolve URI")): + with tm.ensure_clean("test.xml") as path: + geom_df.to_xml(path, stylesheet=xsl) + + +def test_stylesheet_with_etree(datapath): + xsl = """\ + + + + + + + + + """ + + with pytest.raises( + ValueError, match=("To use stylesheet, you need lxml installed") + ): + geom_df.to_xml(parser="etree", stylesheet=xsl) + + +@td.skip_if_no("lxml") +def test_style_to_csv(): + xsl = """\ + + + + + , + + ,shape,degrees,sides + + + + + + + +""" + + out_csv = geom_df.to_csv(line_terminator="\n") + + if out_csv is not None: + out_csv = out_csv.strip() + out_xml = geom_df.to_xml(stylesheet=xsl) + + assert out_csv == out_xml + + +@td.skip_if_no("lxml") +def test_style_to_string(): + xsl = """\ + + + + + + + shape degrees sides + + + + + + + +""" + + out_str = geom_df.to_string() + out_xml = geom_df.to_xml(na_rep="NaN", stylesheet=xsl) + + assert out_xml == out_str + + +@td.skip_if_no("lxml") +def test_style_to_json(): + xsl = """\ + + + + + " + + + {"shape":{ + + },"degrees":{ + + },"sides":{ + + }} + + + + + + + + + + + + + + + + + , + + +""" + + out_json = geom_df.to_json() + out_xml = geom_df.to_xml(stylesheet=xsl) + + assert out_json == out_xml + + +# COMPRESSION + + +geom_xml = """\ + + + + 0 + square + 360 + 4.0 + + + 1 + circle + 360 + + + + 2 + triangle + 180 + 3.0 + +""" + + +def test_compression_output(parser, compression_only): + with tm.ensure_clean() as path: + geom_df.to_xml(path, parser=parser, compression=compression_only) + + with get_handle( + path, + "r", + compression=compression_only, + ) as handle_obj: + output = handle_obj.handle.read() + + output = equalize_decl(output) + + assert geom_xml == output.strip() + + +def test_filename_and_suffix_comp(parser, compression_only): + compfile = "xml." + icom._compression_to_extension[compression_only] + with tm.ensure_clean(filename=compfile) as path: + geom_df.to_xml(path, parser=parser, compression=compression_only) + + with get_handle( + path, + "r", + compression=compression_only, + ) as handle_obj: + output = handle_obj.handle.read() + + output = equalize_decl(output) + + assert geom_xml == output.strip() + + +def test_ea_dtypes(any_numeric_ea_dtype, parser): + # GH#43903 + expected = """ + + + 0 + + +""" + df = DataFrame({"a": [NA]}).astype(any_numeric_ea_dtype) + result = df.to_xml(parser=parser) + assert equalize_decl(result).strip() == expected + + +def test_unsuported_compression(datapath, parser): + with pytest.raises(ValueError, match="Unrecognized compression type"): + with tm.ensure_clean() as path: + geom_df.to_xml(path, parser=parser, compression="7z") + + +# STORAGE OPTIONS + + +@td.skip_if_no("s3fs") +@td.skip_if_no("lxml") +def test_s3_permission_output(parser, s3_resource): + # s3_resource hosts pandas-test + import s3fs + + with pytest.raises(PermissionError, match="Access Denied"): + fs = s3fs.S3FileSystem(anon=True) + fs.ls("pandas-test") + + geom_df.to_xml("s3://pandas-test/geom.xml", compression="zip", parser=parser) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/io/xml/test_xml.py b/.local/lib/python3.8/site-packages/pandas/tests/io/xml/test_xml.py new file mode 100644 index 0000000..809324c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/io/xml/test_xml.py @@ -0,0 +1,1135 @@ +from __future__ import annotations + +from io import ( + BytesIO, + StringIO, +) +from lzma import LZMAError +import os +from urllib.error import HTTPError +from zipfile import BadZipFile + +import numpy as np +import pytest + +from pandas.compat import is_ci_environment +from pandas.compat._optional import import_optional_dependency +import pandas.util._test_decorators as td + +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.xml import read_xml + +""" +CHECK LIST + +[x] - ValueError: "Values for parser can only be lxml or etree." + +etree +[X] - ImportError: "lxml not found, please install or use the etree parser." +[X] - TypeError: "expected str, bytes or os.PathLike object, not NoneType" +[X] - ValueError: "Either element or attributes can be parsed not both." +[X] - ValueError: "xpath does not return any nodes..." +[X] - SyntaxError: "You have used an incorrect or unsupported XPath" +[X] - ValueError: "names does not match length of child elements in xpath." +[X] - TypeError: "...is not a valid type for names" +[X] - ValueError: "To use stylesheet, you need lxml installed..." +[] - URLError: (GENERAL ERROR WITH HTTPError AS SUBCLASS) +[X] - HTTPError: "HTTP Error 404: Not Found" +[] - OSError: (GENERAL ERROR WITH FileNotFoundError AS SUBCLASS) +[X] - FileNotFoundError: "No such file or directory" +[] - ParseError (FAILSAFE CATCH ALL FOR VERY COMPLEX XML) +[X] - UnicodeDecodeError: "'utf-8' codec can't decode byte 0xe9..." +[X] - UnicodeError: "UTF-16 stream does not start with BOM" +[X] - BadZipFile: "File is not a zip file" +[X] - OSError: "Invalid data stream" +[X] - LZMAError: "Input format not supported by decoder" +[X] - ValueError: "Unrecognized compression type" +[X] - PermissionError: "Forbidden" + +lxml +[X] - ValueError: "Either element or attributes can be parsed not both." +[X] - AttributeError: "__enter__" +[X] - XSLTApplyError: "Cannot resolve URI" +[X] - XSLTParseError: "document is not a stylesheet" +[X] - ValueError: "xpath does not return any nodes." +[X] - XPathEvalError: "Invalid expression" +[] - XPathSyntaxError: (OLD VERSION IN lxml FOR XPATH ERRORS) +[X] - TypeError: "empty namespace prefix is not supported in XPath" +[X] - ValueError: "names does not match length of child elements in xpath." +[X] - TypeError: "...is not a valid type for names" +[X] - LookupError: "unknown encoding" +[] - URLError: (USUALLY DUE TO NETWORKING) +[X - HTTPError: "HTTP Error 404: Not Found" +[X] - OSError: "failed to load external entity" +[X] - XMLSyntaxError: "Start tag expected, '<' not found" +[] - ParserError: (FAILSAFE CATCH ALL FOR VERY COMPLEX XML +[X] - ValueError: "Values for parser can only be lxml or etree." +[X] - UnicodeDecodeError: "'utf-8' codec can't decode byte 0xe9..." +[X] - UnicodeError: "UTF-16 stream does not start with BOM" +[X] - BadZipFile: "File is not a zip file" +[X] - OSError: "Invalid data stream" +[X] - LZMAError: "Input format not supported by decoder" +[X] - ValueError: "Unrecognized compression type" +[X] - PermissionError: "Forbidden" +""" + +geom_df = DataFrame( + { + "shape": ["square", "circle", "triangle"], + "degrees": [360, 360, 180], + "sides": [4, np.nan, 3], + } +) + +xml_default_nmsp = """\ + + + + square + 360 + 4 + + + circle + 360 + + + + triangle + 180 + 3 + +""" + +xml_prefix_nmsp = """\ + + + + square + 360 + 4.0 + + + circle + 360 + + + + triangle + 180 + 3.0 + +""" + + +df_kml = DataFrame( + { + "id": { + 0: "ID_00001", + 1: "ID_00002", + 2: "ID_00003", + 3: "ID_00004", + 4: "ID_00005", + }, + "name": { + 0: "Blue Line (Forest Park)", + 1: "Red, Purple Line", + 2: "Red, Purple Line", + 3: "Red, Purple Line", + 4: "Red, Purple Line", + }, + "styleUrl": { + 0: "#LineStyle01", + 1: "#LineStyle01", + 2: "#LineStyle01", + 3: "#LineStyle01", + 4: "#LineStyle01", + }, + "extrude": {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}, + "altitudeMode": { + 0: "clampedToGround", + 1: "clampedToGround", + 2: "clampedToGround", + 3: "clampedToGround", + 4: "clampedToGround", + }, + "coordinates": { + 0: ( + "-87.77678526964958,41.8708863930319,0 " + "-87.77826234150609,41.87097820122218,0 " + "-87.78251583439344,41.87130129991005,0 " + "-87.78418294588424,41.87145055520308,0 " + "-87.7872369165933,41.8717239119163,0 " + "-87.79160214925886,41.87210797280065,0" + ), + 1: ( + "-87.65758750947528,41.96427269188822,0 " + "-87.65802133507393,41.96581929055245,0 " + "-87.65819033925305,41.96621846093642,0 " + "-87.6583189819129,41.96650362897086,0 " + "-87.65835858701473,41.96669002089185,0 " + "-87.65838428411853,41.96688150295095,0 " + "-87.65842208882658,41.96745896091846,0 " + "-87.65846556843937,41.9683761425439,0 " + "-87.65849296214573,41.96913893870342,0" + ), + 2: ( + "-87.65492939166126,41.95377494531437,0 " + "-87.65557043199591,41.95376544118533,0 " + "-87.65606302030132,41.95376391658746,0 " + "-87.65623502146268,41.95377379126367,0 " + "-87.65634748981634,41.95380103566435,0 " + "-87.65646537904269,41.95387703994676,0 " + "-87.65656532461145,41.95396622645799,0 " + "-87.65664760856414,41.95404201996044,0 " + "-87.65671750555913,41.95416647054043,0 " + "-87.65673983607117,41.95429949810849,0 " + "-87.65673866475777,41.95441024240925,0 " + "-87.6567690255541,41.95490657227902,0 " + "-87.65683672482363,41.95692259283837,0 " + "-87.6568900886376,41.95861070983142,0 " + "-87.65699865558875,41.96181418669004,0 " + "-87.65756347177603,41.96397045777844,0 " + "-87.65758750947528,41.96427269188822,0" + ), + 3: ( + "-87.65362593118043,41.94742799535678,0 " + "-87.65363554415794,41.94819886386848,0 " + "-87.6536456393239,41.95059994675451,0 " + "-87.65365831235026,41.95108288489359,0 " + "-87.6536604873874,41.9519954657554,0 " + "-87.65362592053201,41.95245597302328,0 " + "-87.65367158496069,41.95311153649393,0 " + "-87.65368468595476,41.9533202828916,0 " + "-87.65369271253692,41.95343095587119,0 " + "-87.65373335834569,41.95351536301472,0 " + "-87.65378605844126,41.95358212680591,0 " + "-87.65385067928185,41.95364452823767,0 " + "-87.6539390793817,41.95370263886964,0 " + "-87.6540786298351,41.95373403675265,0 " + "-87.65430648647626,41.9537535411832,0 " + "-87.65492939166126,41.95377494531437,0" + ), + 4: ( + "-87.65345391792157,41.94217681262115,0 " + "-87.65342448305786,41.94237224420864,0 " + "-87.65339745703922,41.94268217746244,0 " + "-87.65337753982941,41.94288140770284,0 " + "-87.65336256753105,41.94317369618263,0 " + "-87.65338799707138,41.94357253961736,0 " + "-87.65340240886648,41.94389158188269,0 " + "-87.65341837392448,41.94406444407721,0 " + "-87.65342275247338,41.94421065714904,0 " + "-87.65347469646018,41.94434829382345,0 " + "-87.65351486483024,41.94447699917548,0 " + "-87.65353483605053,41.9453896864472,0 " + "-87.65361975532807,41.94689193720703,0 " + "-87.65362593118043,41.94742799535678,0" + ), + }, + } +) + + +@pytest.fixture(params=["rb", "r"]) +def mode(request): + return request.param + + +@pytest.fixture(params=[pytest.param("lxml", marks=td.skip_if_no("lxml")), "etree"]) +def parser(request): + return request.param + + +# FILE / URL + + +@td.skip_if_no("lxml") +def test_parser_consistency_file(datapath): + filename = datapath("io", "data", "xml", "books.xml") + df_file_lxml = read_xml(filename, parser="lxml") + df_file_etree = read_xml(filename, parser="etree") + + tm.assert_frame_equal(df_file_lxml, df_file_etree) + + +@pytest.mark.network +@pytest.mark.slow +@td.skip_if_no("lxml") +@tm.network +def test_parser_consistency_url(): + url = ( + "https://data.cityofchicago.org/api/views/" + "8pix-ypme/rows.xml?accessType=DOWNLOAD" + ) + df_url_lxml = read_xml(url, xpath=".//row/row", parser="lxml") + df_url_etree = read_xml(url, xpath=".//row/row", parser="etree") + + tm.assert_frame_equal(df_url_lxml, df_url_etree) + + +def test_file_like(datapath, parser, mode): + filename = datapath("io", "data", "xml", "books.xml") + with open(filename, mode) as f: + df_file = read_xml(f, parser=parser) + + df_expected = DataFrame( + { + "category": ["cooking", "children", "web"], + "title": ["Everyday Italian", "Harry Potter", "Learning XML"], + "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"], + "year": [2005, 2005, 2003], + "price": [30.00, 29.99, 39.95], + } + ) + + tm.assert_frame_equal(df_file, df_expected) + + +def test_file_io(datapath, parser, mode): + filename = datapath("io", "data", "xml", "books.xml") + with open(filename, mode) as f: + xml_obj = f.read() + + df_io = read_xml( + (BytesIO(xml_obj) if isinstance(xml_obj, bytes) else StringIO(xml_obj)), + parser=parser, + ) + + df_expected = DataFrame( + { + "category": ["cooking", "children", "web"], + "title": ["Everyday Italian", "Harry Potter", "Learning XML"], + "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"], + "year": [2005, 2005, 2003], + "price": [30.00, 29.99, 39.95], + } + ) + + tm.assert_frame_equal(df_io, df_expected) + + +def test_file_buffered_reader_string(datapath, parser, mode): + filename = datapath("io", "data", "xml", "books.xml") + with open(filename, mode) as f: + xml_obj = f.read() + + df_str = read_xml(xml_obj, parser=parser) + + df_expected = DataFrame( + { + "category": ["cooking", "children", "web"], + "title": ["Everyday Italian", "Harry Potter", "Learning XML"], + "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"], + "year": [2005, 2005, 2003], + "price": [30.00, 29.99, 39.95], + } + ) + + tm.assert_frame_equal(df_str, df_expected) + + +def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode): + filename = datapath("io", "data", "xml", "books.xml") + with open(filename, mode) as f: + next(f) + xml_obj = f.read() + + df_str = read_xml(xml_obj, parser=parser) + + df_expected = DataFrame( + { + "category": ["cooking", "children", "web"], + "title": ["Everyday Italian", "Harry Potter", "Learning XML"], + "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"], + "year": [2005, 2005, 2003], + "price": [30.00, 29.99, 39.95], + } + ) + + tm.assert_frame_equal(df_str, df_expected) + + +def test_file_handle_close(datapath, parser): + xml_file = datapath("io", "data", "xml", "books.xml") + + with open(xml_file, "rb") as f: + read_xml(BytesIO(f.read()), parser=parser) + + assert not f.closed + + +@td.skip_if_no("lxml") +@pytest.mark.parametrize("val", ["", b""]) +def test_empty_string_lxml(val): + from lxml.etree import XMLSyntaxError + + with pytest.raises(XMLSyntaxError, match="Document is empty"): + read_xml(val, parser="lxml") + + +@pytest.mark.parametrize("val", ["", b""]) +def test_empty_string_etree(val): + from xml.etree.ElementTree import ParseError + + with pytest.raises(ParseError, match="no element found"): + read_xml(val, parser="etree") + + +@td.skip_if_no("lxml") +def test_wrong_file_path_lxml(): + from lxml.etree import XMLSyntaxError + + filename = os.path.join("data", "html", "books.xml") + + with pytest.raises( + XMLSyntaxError, + match=("Start tag expected, '<' not found"), + ): + read_xml(filename, parser="lxml") + + +def test_wrong_file_path_etree(): + from xml.etree.ElementTree import ParseError + + filename = os.path.join("data", "html", "books.xml") + + with pytest.raises( + ParseError, + match=("not well-formed"), + ): + read_xml(filename, parser="etree") + + +@pytest.mark.network +@tm.network +@td.skip_if_no("lxml") +def test_url(): + url = "https://www.w3schools.com/xml/books.xml" + df_url = read_xml(url, xpath=".//book[count(*)=4]") + + df_expected = DataFrame( + { + "category": ["cooking", "children", "web"], + "title": ["Everyday Italian", "Harry Potter", "Learning XML"], + "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"], + "year": [2005, 2005, 2003], + "price": [30.00, 29.99, 39.95], + "cover": [None, None, "paperback"], + } + ) + + tm.assert_frame_equal(df_url, df_expected) + + +@pytest.mark.network +@tm.network +def test_wrong_url(parser): + with pytest.raises(HTTPError, match=("HTTP Error 404: Not Found")): + url = "https://www.w3schools.com/xml/python.xml" + read_xml(url, xpath=".//book[count(*)=4]", parser=parser) + + +# XPATH + + +@td.skip_if_no("lxml") +def test_empty_xpath_lxml(datapath): + filename = datapath("io", "data", "xml", "books.xml") + with pytest.raises(ValueError, match=("xpath does not return any nodes")): + read_xml(filename, xpath=".//python", parser="lxml") + + +def test_bad_xpath_etree(datapath): + filename = datapath("io", "data", "xml", "books.xml") + with pytest.raises( + SyntaxError, match=("You have used an incorrect or unsupported XPath") + ): + read_xml(filename, xpath=".//[book]", parser="etree") + + +@td.skip_if_no("lxml") +def test_bad_xpath_lxml(datapath): + from lxml.etree import XPathEvalError + + filename = datapath("io", "data", "xml", "books.xml") + with pytest.raises(XPathEvalError, match=("Invalid expression")): + read_xml(filename, xpath=".//[book]", parser="lxml") + + +# NAMESPACE + + +def test_default_namespace(parser): + df_nmsp = read_xml( + xml_default_nmsp, + xpath=".//ns:row", + namespaces={"ns": "http://example.com"}, + parser=parser, + ) + + df_expected = DataFrame( + { + "shape": ["square", "circle", "triangle"], + "degrees": [360, 360, 180], + "sides": [4.0, float("nan"), 3.0], + } + ) + + tm.assert_frame_equal(df_nmsp, df_expected) + + +def test_prefix_namespace(parser): + df_nmsp = read_xml( + xml_prefix_nmsp, + xpath=".//doc:row", + namespaces={"doc": "http://example.com"}, + parser=parser, + ) + + df_expected = DataFrame( + { + "shape": ["square", "circle", "triangle"], + "degrees": [360, 360, 180], + "sides": [4.0, float("nan"), 3.0], + } + ) + + tm.assert_frame_equal(df_nmsp, df_expected) + + +@td.skip_if_no("lxml") +def test_consistency_default_namespace(): + df_lxml = read_xml( + xml_default_nmsp, + xpath=".//ns:row", + namespaces={"ns": "http://example.com"}, + parser="lxml", + ) + + df_etree = read_xml( + xml_default_nmsp, + xpath=".//doc:row", + namespaces={"doc": "http://example.com"}, + parser="etree", + ) + + tm.assert_frame_equal(df_lxml, df_etree) + + +@td.skip_if_no("lxml") +def test_consistency_prefix_namespace(): + df_lxml = read_xml( + xml_prefix_nmsp, + xpath=".//doc:row", + namespaces={"doc": "http://example.com"}, + parser="lxml", + ) + + df_etree = read_xml( + xml_prefix_nmsp, + xpath=".//doc:row", + namespaces={"doc": "http://example.com"}, + parser="etree", + ) + + tm.assert_frame_equal(df_lxml, df_etree) + + +# PREFIX + + +def test_missing_prefix_with_default_namespace(datapath, parser): + filename = datapath("io", "data", "xml", "books.xml") + with pytest.raises(ValueError, match=("xpath does not return any nodes")): + read_xml(filename, xpath=".//Placemark", parser=parser) + + +def test_missing_prefix_definition_etree(datapath): + filename = datapath("io", "data", "xml", "cta_rail_lines.kml") + with pytest.raises(SyntaxError, match=("you used an undeclared namespace prefix")): + read_xml(filename, xpath=".//kml:Placemark", parser="etree") + + +@td.skip_if_no("lxml") +def test_missing_prefix_definition_lxml(datapath): + from lxml.etree import XPathEvalError + + filename = datapath("io", "data", "xml", "cta_rail_lines.kml") + with pytest.raises(XPathEvalError, match=("Undefined namespace prefix")): + read_xml(filename, xpath=".//kml:Placemark", parser="lxml") + + +@td.skip_if_no("lxml") +@pytest.mark.parametrize("key", ["", None]) +def test_none_namespace_prefix(key): + with pytest.raises( + TypeError, match=("empty namespace prefix is not supported in XPath") + ): + read_xml( + xml_default_nmsp, + xpath=".//kml:Placemark", + namespaces={key: "http://www.opengis.net/kml/2.2"}, + parser="lxml", + ) + + +# ELEMS AND ATTRS + + +def test_file_elems_and_attrs(datapath, parser): + filename = datapath("io", "data", "xml", "books.xml") + df_file = read_xml(filename, parser=parser) + df_expected = DataFrame( + { + "category": ["cooking", "children", "web"], + "title": ["Everyday Italian", "Harry Potter", "Learning XML"], + "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"], + "year": [2005, 2005, 2003], + "price": [30.00, 29.99, 39.95], + } + ) + + tm.assert_frame_equal(df_file, df_expected) + + +def test_file_only_attrs(datapath, parser): + filename = datapath("io", "data", "xml", "books.xml") + df_file = read_xml(filename, attrs_only=True, parser=parser) + df_expected = DataFrame({"category": ["cooking", "children", "web"]}) + + tm.assert_frame_equal(df_file, df_expected) + + +def test_file_only_elems(datapath, parser): + filename = datapath("io", "data", "xml", "books.xml") + df_file = read_xml(filename, elems_only=True, parser=parser) + df_expected = DataFrame( + { + "title": ["Everyday Italian", "Harry Potter", "Learning XML"], + "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"], + "year": [2005, 2005, 2003], + "price": [30.00, 29.99, 39.95], + } + ) + + tm.assert_frame_equal(df_file, df_expected) + + +def test_elem_and_attrs_only(datapath, parser): + filename = datapath("io", "data", "xml", "cta_rail_lines.kml") + with pytest.raises( + ValueError, + match=("Either element or attributes can be parsed not both"), + ): + read_xml(filename, elems_only=True, attrs_only=True, parser=parser) + + +@td.skip_if_no("lxml") +def test_attribute_centric_xml(): + xml = """\ + + + + + + + + + + + + + + + + + +""" + + df_lxml = read_xml(xml, xpath=".//station") + df_etree = read_xml(xml, xpath=".//station", parser="etree") + + tm.assert_frame_equal(df_lxml, df_etree) + + +# NAMES + + +def test_names_option_output(datapath, parser): + filename = datapath("io", "data", "xml", "books.xml") + df_file = read_xml( + filename, names=["Col1", "Col2", "Col3", "Col4", "Col5"], parser=parser + ) + + df_expected = DataFrame( + { + "Col1": ["cooking", "children", "web"], + "Col2": ["Everyday Italian", "Harry Potter", "Learning XML"], + "Col3": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"], + "Col4": [2005, 2005, 2003], + "Col5": [30.00, 29.99, 39.95], + } + ) + + tm.assert_frame_equal(df_file, df_expected) + + +def test_names_option_wrong_length(datapath, parser): + filename = datapath("io", "data", "xml", "books.xml") + + with pytest.raises(ValueError, match=("names does not match length")): + read_xml(filename, names=["Col1", "Col2", "Col3"], parser=parser) + + +def test_names_option_wrong_type(datapath, parser): + filename = datapath("io", "data", "xml", "books.xml") + + with pytest.raises(TypeError, match=("is not a valid type for names")): + read_xml(filename, names="Col1, Col2, Col3", parser=parser) + + +# ENCODING + + +def test_wrong_encoding(datapath, parser): + filename = datapath("io", "data", "xml", "baby_names.xml") + with pytest.raises(UnicodeDecodeError, match=("'utf-8' codec can't decode")): + read_xml(filename, parser=parser) + + +def test_utf16_encoding(datapath, parser): + filename = datapath("io", "data", "xml", "baby_names.xml") + with pytest.raises( + UnicodeError, + match=( + "UTF-16 stream does not start with BOM|" + "'utf-16-le' codec can't decode byte" + ), + ): + read_xml(filename, encoding="UTF-16", parser=parser) + + +def test_unknown_encoding(datapath, parser): + filename = datapath("io", "data", "xml", "baby_names.xml") + with pytest.raises(LookupError, match=("unknown encoding: UFT-8")): + read_xml(filename, encoding="UFT-8", parser=parser) + + +def test_ascii_encoding(datapath, parser): + filename = datapath("io", "data", "xml", "baby_names.xml") + with pytest.raises(UnicodeDecodeError, match=("'ascii' codec can't decode byte")): + read_xml(filename, encoding="ascii", parser=parser) + + +@td.skip_if_no("lxml") +def test_parser_consistency_with_encoding(datapath): + filename = datapath("io", "data", "xml", "baby_names.xml") + df_lxml = read_xml(filename, parser="lxml", encoding="ISO-8859-1") + df_etree = read_xml(filename, parser="etree", encoding="iso-8859-1") + + tm.assert_frame_equal(df_lxml, df_etree) + + +@td.skip_if_no("lxml") +def test_wrong_encoding_for_lxml(): + # GH#45133 + data = """ + + c + + +""" + with pytest.raises(TypeError, match="encoding None"): + read_xml(StringIO(data), parser="lxml", encoding=None) + + +def test_none_encoding_etree(): + # GH#45133 + data = """ + + c + + +""" + result = read_xml(StringIO(data), parser="etree", encoding=None) + expected = DataFrame({"a": ["c"]}) + tm.assert_frame_equal(result, expected) + + +# PARSER + + +@td.skip_if_installed("lxml") +def test_default_parser_no_lxml(datapath): + filename = datapath("io", "data", "xml", "books.xml") + + with pytest.raises( + ImportError, match=("lxml not found, please install or use the etree parser.") + ): + read_xml(filename) + + +def test_wrong_parser(datapath): + filename = datapath("io", "data", "xml", "books.xml") + + with pytest.raises( + ValueError, match=("Values for parser can only be lxml or etree.") + ): + read_xml(filename, parser="bs4") + + +# STYLESHEET + + +@td.skip_if_no("lxml") +def test_stylesheet_file(datapath): + kml = datapath("io", "data", "xml", "cta_rail_lines.kml") + xsl = datapath("io", "data", "xml", "flatten_doc.xsl") + + df_style = read_xml( + kml, + xpath=".//k:Placemark", + namespaces={"k": "http://www.opengis.net/kml/2.2"}, + stylesheet=xsl, + ) + + tm.assert_frame_equal(df_kml, df_style) + + +def test_read_xml_passing_as_positional_deprecated(datapath, parser): + # GH#45133 + kml = datapath("io", "data", "xml", "cta_rail_lines.kml") + + with tm.assert_produces_warning(FutureWarning, match="keyword-only"): + read_xml( + kml, + ".//k:Placemark", + namespaces={"k": "http://www.opengis.net/kml/2.2"}, + parser=parser, + ) + + +@td.skip_if_no("lxml") +def test_stylesheet_file_like(datapath, mode): + kml = datapath("io", "data", "xml", "cta_rail_lines.kml") + xsl = datapath("io", "data", "xml", "flatten_doc.xsl") + + with open(xsl, mode) as f: + df_style = read_xml( + kml, + xpath=".//k:Placemark", + namespaces={"k": "http://www.opengis.net/kml/2.2"}, + stylesheet=f, + ) + + tm.assert_frame_equal(df_kml, df_style) + + +@td.skip_if_no("lxml") +def test_stylesheet_io(datapath, mode): + kml = datapath("io", "data", "xml", "cta_rail_lines.kml") + xsl = datapath("io", "data", "xml", "flatten_doc.xsl") + + xsl_obj: BytesIO | StringIO + + with open(xsl, mode) as f: + if mode == "rb": + xsl_obj = BytesIO(f.read()) + else: + xsl_obj = StringIO(f.read()) + + df_style = read_xml( + kml, + xpath=".//k:Placemark", + namespaces={"k": "http://www.opengis.net/kml/2.2"}, + stylesheet=xsl_obj, + ) + + tm.assert_frame_equal(df_kml, df_style) + + +@td.skip_if_no("lxml") +def test_stylesheet_buffered_reader(datapath, mode): + kml = datapath("io", "data", "xml", "cta_rail_lines.kml") + xsl = datapath("io", "data", "xml", "flatten_doc.xsl") + + with open(xsl, mode) as f: + xsl_obj = f.read() + + df_style = read_xml( + kml, + xpath=".//k:Placemark", + namespaces={"k": "http://www.opengis.net/kml/2.2"}, + stylesheet=xsl_obj, + ) + + tm.assert_frame_equal(df_kml, df_style) + + +@td.skip_if_no("lxml") +def test_not_stylesheet(datapath): + from lxml.etree import XSLTParseError + + kml = datapath("io", "data", "xml", "cta_rail_lines.kml") + xsl = datapath("io", "data", "xml", "books.xml") + + with pytest.raises(XSLTParseError, match=("document is not a stylesheet")): + read_xml(kml, stylesheet=xsl) + + +@td.skip_if_no("lxml") +def test_incorrect_xsl_syntax(datapath): + from lxml.etree import XMLSyntaxError + + xsl = """\ + + + + + + + + + + + + + + + +""" + + kml = datapath("io", "data", "xml", "cta_rail_lines.kml") + + with pytest.raises( + XMLSyntaxError, match=("Extra content at the end of the document") + ): + read_xml(kml, stylesheet=xsl) + + +@td.skip_if_no("lxml") +def test_incorrect_xsl_eval(datapath): + from lxml.etree import XSLTParseError + + xsl = """\ + + + + + + + + + + + + + + + +""" + + kml = datapath("io", "data", "xml", "cta_rail_lines.kml") + + with pytest.raises(XSLTParseError, match=("failed to compile")): + read_xml(kml, stylesheet=xsl) + + +@td.skip_if_no("lxml") +def test_incorrect_xsl_apply(datapath): + from lxml.etree import XSLTApplyError + + xsl = """\ + + + + + + + + + +""" + + kml = datapath("io", "data", "xml", "cta_rail_lines.kml") + + with pytest.raises(XSLTApplyError, match=("Cannot resolve URI")): + read_xml(kml, stylesheet=xsl) + + +@td.skip_if_no("lxml") +def test_wrong_stylesheet(): + from lxml.etree import XMLSyntaxError + + kml = os.path.join("data", "xml", "cta_rail_lines.kml") + xsl = os.path.join("data", "xml", "flatten.xsl") + + with pytest.raises( + XMLSyntaxError, + match=("Start tag expected, '<' not found"), + ): + read_xml(kml, stylesheet=xsl) + + +@td.skip_if_no("lxml") +def test_stylesheet_file_close(datapath, mode): + kml = datapath("io", "data", "xml", "cta_rail_lines.kml") + xsl = datapath("io", "data", "xml", "flatten_doc.xsl") + + xsl_obj: BytesIO | StringIO + + with open(xsl, mode) as f: + if mode == "rb": + xsl_obj = BytesIO(f.read()) + else: + xsl_obj = StringIO(f.read()) + + read_xml(kml, stylesheet=xsl_obj) + + assert not f.closed + + +@td.skip_if_no("lxml") +def test_stylesheet_with_etree(datapath): + kml = os.path.join("data", "xml", "cta_rail_lines.kml") + xsl = os.path.join("data", "xml", "flatten_doc.xsl") + + with pytest.raises( + ValueError, match=("To use stylesheet, you need lxml installed") + ): + read_xml(kml, parser="etree", stylesheet=xsl) + + +@td.skip_if_no("lxml") +@pytest.mark.parametrize("val", ["", b""]) +def test_empty_stylesheet(val): + from lxml.etree import XMLSyntaxError + + kml = os.path.join("data", "xml", "cta_rail_lines.kml") + + with pytest.raises( + XMLSyntaxError, match=("Document is empty|Start tag expected, '<' not found") + ): + read_xml(kml, stylesheet=val) + + +@pytest.mark.network +@td.skip_if_no("lxml") +@tm.network +def test_online_stylesheet(): + xml = "https://www.w3schools.com/xml/cdcatalog_with_xsl.xml" + xsl = "https://www.w3schools.com/xml/cdcatalog.xsl" + + df_xsl = read_xml( + xml, + xpath=".//tr[td and position() <= 6]", + names=["title", "artist"], + stylesheet=xsl, + ) + + df_expected = DataFrame( + { + "title": { + 0: "Empire Burlesque", + 1: "Hide your heart", + 2: "Greatest Hits", + 3: "Still got the blues", + 4: "Eros", + }, + "artist": { + 0: "Bob Dylan", + 1: "Bonnie Tyler", + 2: "Dolly Parton", + 3: "Gary Moore", + 4: "Eros Ramazzotti", + }, + } + ) + + tm.assert_frame_equal(df_expected, df_xsl) + + +# COMPRESSION + + +def test_compression_read(parser, compression_only): + with tm.ensure_clean() as path: + geom_df.to_xml(path, index=False, parser=parser, compression=compression_only) + + xml_df = read_xml(path, parser=parser, compression=compression_only) + + tm.assert_frame_equal(xml_df, geom_df) + + +def test_wrong_compression(parser, compression, compression_only): + actual_compression = compression + attempted_compression = compression_only + + if actual_compression == attempted_compression: + return + + errors = { + "bz2": (OSError, "Invalid data stream"), + "gzip": (OSError, "Not a gzipped file"), + "zip": (BadZipFile, "File is not a zip file"), + } + zstd = import_optional_dependency("zstandard", errors="ignore") + if zstd is not None: + errors["zstd"] = (zstd.ZstdError, "Unknown frame descriptor") + lzma = import_optional_dependency("lzma", errors="ignore") + if lzma is not None: + errors["xz"] = (LZMAError, "Input format not supported by decoder") + error_cls, error_str = errors[attempted_compression] + + with tm.ensure_clean() as path: + geom_df.to_xml(path, parser=parser, compression=actual_compression) + + with pytest.raises(error_cls, match=error_str): + read_xml(path, parser=parser, compression=attempted_compression) + + +def test_unsuported_compression(datapath, parser): + with pytest.raises(ValueError, match="Unrecognized compression type"): + with tm.ensure_clean() as path: + read_xml(path, parser=parser, compression="7z") + + +# STORAGE OPTIONS + + +@pytest.mark.network +@td.skip_if_no("s3fs") +@td.skip_if_no("lxml") +@pytest.mark.skipif( + is_ci_environment(), + reason="2022.1.17: Hanging on the CI min versions build.", +) +@tm.network +def test_s3_parser_consistency(): + # Python Software Foundation (2019 IRS-990 RETURN) + s3 = "s3://irs-form-990/201923199349319487_public.xml" + + df_lxml = read_xml( + s3, + xpath=".//irs:Form990PartVIISectionAGrp", + namespaces={"irs": "http://www.irs.gov/efile"}, + parser="lxml", + storage_options={"anon": True}, + ) + + df_etree = read_xml( + s3, + xpath=".//irs:Form990PartVIISectionAGrp", + namespaces={"irs": "http://www.irs.gov/efile"}, + parser="etree", + storage_options={"anon": True}, + ) + + tm.assert_frame_equal(df_lxml, df_etree) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/libs/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/libs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/libs/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/libs/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..884dcd4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/libs/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/libs/__pycache__/test_hashtable.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/libs/__pycache__/test_hashtable.cpython-38.pyc new file mode 100644 index 0000000..d508f27 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/libs/__pycache__/test_hashtable.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/libs/__pycache__/test_join.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/libs/__pycache__/test_join.cpython-38.pyc new file mode 100644 index 0000000..5b1bf6f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/libs/__pycache__/test_join.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/libs/__pycache__/test_lib.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/libs/__pycache__/test_lib.cpython-38.pyc new file mode 100644 index 0000000..5ab6494 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/libs/__pycache__/test_lib.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/libs/test_hashtable.py b/.local/lib/python3.8/site-packages/pandas/tests/libs/test_hashtable.py new file mode 100644 index 0000000..937eccf --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/libs/test_hashtable.py @@ -0,0 +1,529 @@ +from contextlib import contextmanager +import tracemalloc + +import numpy as np +import pytest + +from pandas._libs import hashtable as ht + +import pandas as pd +import pandas._testing as tm +from pandas.core.algorithms import isin + + +@contextmanager +def activated_tracemalloc(): + tracemalloc.start() + try: + yield + finally: + tracemalloc.stop() + + +def get_allocated_khash_memory(): + snapshot = tracemalloc.take_snapshot() + snapshot = snapshot.filter_traces( + (tracemalloc.DomainFilter(True, ht.get_hashtable_trace_domain()),) + ) + return sum(map(lambda x: x.size, snapshot.traces)) + + +@pytest.mark.parametrize( + "table_type, dtype", + [ + (ht.PyObjectHashTable, np.object_), + (ht.Complex128HashTable, np.complex128), + (ht.Int64HashTable, np.int64), + (ht.UInt64HashTable, np.uint64), + (ht.Float64HashTable, np.float64), + (ht.Complex64HashTable, np.complex64), + (ht.Int32HashTable, np.int32), + (ht.UInt32HashTable, np.uint32), + (ht.Float32HashTable, np.float32), + (ht.Int16HashTable, np.int16), + (ht.UInt16HashTable, np.uint16), + (ht.Int8HashTable, np.int8), + (ht.UInt8HashTable, np.uint8), + (ht.IntpHashTable, np.intp), + ], +) +class TestHashTable: + def test_get_set_contains_len(self, table_type, dtype): + index = 5 + table = table_type(55) + assert len(table) == 0 + assert index not in table + + table.set_item(index, 42) + assert len(table) == 1 + assert index in table + assert table.get_item(index) == 42 + + table.set_item(index + 1, 41) + assert index in table + assert index + 1 in table + assert len(table) == 2 + assert table.get_item(index) == 42 + assert table.get_item(index + 1) == 41 + + table.set_item(index, 21) + assert index in table + assert index + 1 in table + assert len(table) == 2 + assert table.get_item(index) == 21 + assert table.get_item(index + 1) == 41 + assert index + 2 not in table + + with pytest.raises(KeyError, match=str(index + 2)): + table.get_item(index + 2) + + def test_map(self, table_type, dtype, writable): + # PyObjectHashTable has no map-method + if table_type != ht.PyObjectHashTable: + N = 77 + table = table_type() + keys = np.arange(N).astype(dtype) + vals = np.arange(N).astype(np.int64) + N + keys.flags.writeable = writable + vals.flags.writeable = writable + table.map(keys, vals) + for i in range(N): + assert table.get_item(keys[i]) == i + N + + def test_map_locations(self, table_type, dtype, writable): + N = 8 + table = table_type() + keys = (np.arange(N) + N).astype(dtype) + keys.flags.writeable = writable + table.map_locations(keys) + for i in range(N): + assert table.get_item(keys[i]) == i + + def test_lookup(self, table_type, dtype, writable): + N = 3 + table = table_type() + keys = (np.arange(N) + N).astype(dtype) + keys.flags.writeable = writable + table.map_locations(keys) + result = table.lookup(keys) + expected = np.arange(N) + tm.assert_numpy_array_equal(result.astype(np.int64), expected.astype(np.int64)) + + def test_lookup_wrong(self, table_type, dtype): + if dtype in (np.int8, np.uint8): + N = 100 + else: + N = 512 + table = table_type() + keys = (np.arange(N) + N).astype(dtype) + table.map_locations(keys) + wrong_keys = np.arange(N).astype(dtype) + result = table.lookup(wrong_keys) + assert np.all(result == -1) + + def test_unique(self, table_type, dtype, writable): + if dtype in (np.int8, np.uint8): + N = 88 + else: + N = 1000 + table = table_type() + expected = (np.arange(N) + N).astype(dtype) + keys = np.repeat(expected, 5) + keys.flags.writeable = writable + unique = table.unique(keys) + tm.assert_numpy_array_equal(unique, expected) + + def test_tracemalloc_works(self, table_type, dtype): + if dtype in (np.int8, np.uint8): + N = 256 + else: + N = 30000 + keys = np.arange(N).astype(dtype) + with activated_tracemalloc(): + table = table_type() + table.map_locations(keys) + used = get_allocated_khash_memory() + my_size = table.sizeof() + assert used == my_size + del table + assert get_allocated_khash_memory() == 0 + + def test_tracemalloc_for_empty(self, table_type, dtype): + with activated_tracemalloc(): + table = table_type() + used = get_allocated_khash_memory() + my_size = table.sizeof() + assert used == my_size + del table + assert get_allocated_khash_memory() == 0 + + def test_get_state(self, table_type, dtype): + table = table_type(1000) + state = table.get_state() + assert state["size"] == 0 + assert state["n_occupied"] == 0 + assert "n_buckets" in state + assert "upper_bound" in state + + def test_no_reallocation(self, table_type, dtype): + for N in range(1, 110): + keys = np.arange(N).astype(dtype) + preallocated_table = table_type(N) + n_buckets_start = preallocated_table.get_state()["n_buckets"] + preallocated_table.map_locations(keys) + n_buckets_end = preallocated_table.get_state()["n_buckets"] + # original number of buckets was enough: + assert n_buckets_start == n_buckets_end + # check with clean table (not too much preallocated) + clean_table = table_type() + clean_table.map_locations(keys) + assert n_buckets_start == clean_table.get_state()["n_buckets"] + + +class TestPyObjectHashTableWithNans: + def test_nan_float(self): + nan1 = float("nan") + nan2 = float("nan") + assert nan1 is not nan2 + table = ht.PyObjectHashTable() + table.set_item(nan1, 42) + assert table.get_item(nan2) == 42 + + def test_nan_complex_both(self): + nan1 = complex(float("nan"), float("nan")) + nan2 = complex(float("nan"), float("nan")) + assert nan1 is not nan2 + table = ht.PyObjectHashTable() + table.set_item(nan1, 42) + assert table.get_item(nan2) == 42 + + def test_nan_complex_real(self): + nan1 = complex(float("nan"), 1) + nan2 = complex(float("nan"), 1) + other = complex(float("nan"), 2) + assert nan1 is not nan2 + table = ht.PyObjectHashTable() + table.set_item(nan1, 42) + assert table.get_item(nan2) == 42 + with pytest.raises(KeyError, match=None) as error: + table.get_item(other) + assert str(error.value) == str(other) + + def test_nan_complex_imag(self): + nan1 = complex(1, float("nan")) + nan2 = complex(1, float("nan")) + other = complex(2, float("nan")) + assert nan1 is not nan2 + table = ht.PyObjectHashTable() + table.set_item(nan1, 42) + assert table.get_item(nan2) == 42 + with pytest.raises(KeyError, match=None) as error: + table.get_item(other) + assert str(error.value) == str(other) + + def test_nan_in_tuple(self): + nan1 = (float("nan"),) + nan2 = (float("nan"),) + assert nan1[0] is not nan2[0] + table = ht.PyObjectHashTable() + table.set_item(nan1, 42) + assert table.get_item(nan2) == 42 + + def test_nan_in_nested_tuple(self): + nan1 = (1, (2, (float("nan"),))) + nan2 = (1, (2, (float("nan"),))) + other = (1, 2) + table = ht.PyObjectHashTable() + table.set_item(nan1, 42) + assert table.get_item(nan2) == 42 + with pytest.raises(KeyError, match=None) as error: + table.get_item(other) + assert str(error.value) == str(other) + + +def test_hash_equal_tuple_with_nans(): + a = (float("nan"), (float("nan"), float("nan"))) + b = (float("nan"), (float("nan"), float("nan"))) + assert ht.object_hash(a) == ht.object_hash(b) + assert ht.objects_are_equal(a, b) + + +def test_get_labels_groupby_for_Int64(writable): + table = ht.Int64HashTable() + vals = np.array([1, 2, -1, 2, 1, -1], dtype=np.int64) + vals.flags.writeable = writable + arr, unique = table.get_labels_groupby(vals) + expected_arr = np.array([0, 1, -1, 1, 0, -1], dtype=np.intp) + expected_unique = np.array([1, 2], dtype=np.int64) + tm.assert_numpy_array_equal(arr, expected_arr) + tm.assert_numpy_array_equal(unique, expected_unique) + + +def test_tracemalloc_works_for_StringHashTable(): + N = 1000 + keys = np.arange(N).astype(np.compat.unicode).astype(np.object_) + with activated_tracemalloc(): + table = ht.StringHashTable() + table.map_locations(keys) + used = get_allocated_khash_memory() + my_size = table.sizeof() + assert used == my_size + del table + assert get_allocated_khash_memory() == 0 + + +def test_tracemalloc_for_empty_StringHashTable(): + with activated_tracemalloc(): + table = ht.StringHashTable() + used = get_allocated_khash_memory() + my_size = table.sizeof() + assert used == my_size + del table + assert get_allocated_khash_memory() == 0 + + +def test_no_reallocation_StringHashTable(): + for N in range(1, 110): + keys = np.arange(N).astype(np.compat.unicode).astype(np.object_) + preallocated_table = ht.StringHashTable(N) + n_buckets_start = preallocated_table.get_state()["n_buckets"] + preallocated_table.map_locations(keys) + n_buckets_end = preallocated_table.get_state()["n_buckets"] + # original number of buckets was enough: + assert n_buckets_start == n_buckets_end + # check with clean table (not too much preallocated) + clean_table = ht.StringHashTable() + clean_table.map_locations(keys) + assert n_buckets_start == clean_table.get_state()["n_buckets"] + + +@pytest.mark.parametrize( + "table_type, dtype", + [ + (ht.Float64HashTable, np.float64), + (ht.Float32HashTable, np.float32), + (ht.Complex128HashTable, np.complex128), + (ht.Complex64HashTable, np.complex64), + ], +) +class TestHashTableWithNans: + def test_get_set_contains_len(self, table_type, dtype): + index = float("nan") + table = table_type() + assert index not in table + + table.set_item(index, 42) + assert len(table) == 1 + assert index in table + assert table.get_item(index) == 42 + + table.set_item(index, 41) + assert len(table) == 1 + assert index in table + assert table.get_item(index) == 41 + + def test_map(self, table_type, dtype): + N = 332 + table = table_type() + keys = np.full(N, np.nan, dtype=dtype) + vals = (np.arange(N) + N).astype(np.int64) + table.map(keys, vals) + assert len(table) == 1 + assert table.get_item(np.nan) == 2 * N - 1 + + def test_map_locations(self, table_type, dtype): + N = 10 + table = table_type() + keys = np.full(N, np.nan, dtype=dtype) + table.map_locations(keys) + assert len(table) == 1 + assert table.get_item(np.nan) == N - 1 + + def test_unique(self, table_type, dtype): + N = 1020 + table = table_type() + keys = np.full(N, np.nan, dtype=dtype) + unique = table.unique(keys) + assert np.all(np.isnan(unique)) and len(unique) == 1 + + +def test_unique_for_nan_objects_floats(): + table = ht.PyObjectHashTable() + keys = np.array([float("nan") for i in range(50)], dtype=np.object_) + unique = table.unique(keys) + assert len(unique) == 1 + + +def test_unique_for_nan_objects_complex(): + table = ht.PyObjectHashTable() + keys = np.array([complex(float("nan"), 1.0) for i in range(50)], dtype=np.object_) + unique = table.unique(keys) + assert len(unique) == 1 + + +def test_unique_for_nan_objects_tuple(): + table = ht.PyObjectHashTable() + keys = np.array( + [1] + [(1.0, (float("nan"), 1.0)) for i in range(50)], dtype=np.object_ + ) + unique = table.unique(keys) + assert len(unique) == 2 + + +@pytest.mark.parametrize( + "dtype", + [ + np.object_, + np.complex128, + np.int64, + np.uint64, + np.float64, + np.complex64, + np.int32, + np.uint32, + np.float32, + np.int16, + np.uint16, + np.int8, + np.uint8, + np.intp, + ], +) +class TestHelpFunctions: + def test_value_count(self, dtype, writable): + N = 43 + expected = (np.arange(N) + N).astype(dtype) + values = np.repeat(expected, 5) + values.flags.writeable = writable + keys, counts = ht.value_count(values, False) + tm.assert_numpy_array_equal(np.sort(keys), expected) + assert np.all(counts == 5) + + def test_value_count_stable(self, dtype, writable): + # GH12679 + values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype) + values.flags.writeable = writable + keys, counts = ht.value_count(values, False) + tm.assert_numpy_array_equal(keys, values) + assert np.all(counts == 1) + + def test_duplicated_first(self, dtype, writable): + N = 100 + values = np.repeat(np.arange(N).astype(dtype), 5) + values.flags.writeable = writable + result = ht.duplicated(values) + expected = np.ones_like(values, dtype=np.bool_) + expected[::5] = False + tm.assert_numpy_array_equal(result, expected) + + def test_ismember_yes(self, dtype, writable): + N = 127 + arr = np.arange(N).astype(dtype) + values = np.arange(N).astype(dtype) + arr.flags.writeable = writable + values.flags.writeable = writable + result = ht.ismember(arr, values) + expected = np.ones_like(values, dtype=np.bool_) + tm.assert_numpy_array_equal(result, expected) + + def test_ismember_no(self, dtype): + N = 17 + arr = np.arange(N).astype(dtype) + values = (np.arange(N) + N).astype(dtype) + result = ht.ismember(arr, values) + expected = np.zeros_like(values, dtype=np.bool_) + tm.assert_numpy_array_equal(result, expected) + + def test_mode(self, dtype, writable): + if dtype in (np.int8, np.uint8): + N = 53 + else: + N = 11111 + values = np.repeat(np.arange(N).astype(dtype), 5) + values[0] = 42 + values.flags.writeable = writable + result = ht.mode(values, False) + assert result == 42 + + def test_mode_stable(self, dtype, writable): + values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype) + values.flags.writeable = writable + keys = ht.mode(values, False) + tm.assert_numpy_array_equal(keys, values) + + +def test_modes_with_nans(): + # GH42688, nans aren't mangled + nulls = [pd.NA, np.nan, pd.NaT, None] + values = np.array([True] + nulls * 2, dtype=np.object_) + modes = ht.mode(values, False) + assert modes.size == len(nulls) + + +def test_unique_label_indices_intp(writable): + keys = np.array([1, 2, 2, 2, 1, 3], dtype=np.intp) + keys.flags.writeable = writable + result = ht.unique_label_indices(keys) + expected = np.array([0, 1, 5], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "dtype", + [ + np.float64, + np.float32, + np.complex128, + np.complex64, + ], +) +class TestHelpFunctionsWithNans: + def test_value_count(self, dtype): + values = np.array([np.nan, np.nan, np.nan], dtype=dtype) + keys, counts = ht.value_count(values, True) + assert len(keys) == 0 + keys, counts = ht.value_count(values, False) + assert len(keys) == 1 and np.all(np.isnan(keys)) + assert counts[0] == 3 + + def test_duplicated_first(self, dtype): + values = np.array([np.nan, np.nan, np.nan], dtype=dtype) + result = ht.duplicated(values) + expected = np.array([False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + def test_ismember_yes(self, dtype): + arr = np.array([np.nan, np.nan, np.nan], dtype=dtype) + values = np.array([np.nan, np.nan], dtype=dtype) + result = ht.ismember(arr, values) + expected = np.array([True, True, True], dtype=np.bool_) + tm.assert_numpy_array_equal(result, expected) + + def test_ismember_no(self, dtype): + arr = np.array([np.nan, np.nan, np.nan], dtype=dtype) + values = np.array([1], dtype=dtype) + result = ht.ismember(arr, values) + expected = np.array([False, False, False], dtype=np.bool_) + tm.assert_numpy_array_equal(result, expected) + + def test_mode(self, dtype): + values = np.array([42, np.nan, np.nan, np.nan], dtype=dtype) + assert ht.mode(values, True) == 42 + assert np.isnan(ht.mode(values, False)) + + +def test_ismember_tuple_with_nans(): + # GH-41836 + values = [("a", float("nan")), ("b", 1)] + comps = [("a", float("nan"))] + result = isin(values, comps) + expected = np.array([True, False], dtype=np.bool_) + tm.assert_numpy_array_equal(result, expected) + + +def test_float_complex_int_are_equal_as_objects(): + values = ["a", 5, 5.0, 5.0 + 0j] + comps = list(range(129)) + result = isin(values, comps) + expected = np.array([False, True, True, True], dtype=np.bool_) + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/libs/test_join.py b/.local/lib/python3.8/site-packages/pandas/tests/libs/test_join.py new file mode 100644 index 0000000..ba2e6e7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/libs/test_join.py @@ -0,0 +1,390 @@ +import numpy as np +import pytest + +from pandas._libs import join as libjoin +from pandas._libs.join import ( + inner_join, + left_outer_join, +) + +import pandas._testing as tm + + +class TestIndexer: + @pytest.mark.parametrize( + "dtype", ["int32", "int64", "float32", "float64", "object"] + ) + def test_outer_join_indexer(self, dtype): + indexer = libjoin.outer_join_indexer + + left = np.arange(3, dtype=dtype) + right = np.arange(2, 5, dtype=dtype) + empty = np.array([], dtype=dtype) + + result, lindexer, rindexer = indexer(left, right) + assert isinstance(result, np.ndarray) + assert isinstance(lindexer, np.ndarray) + assert isinstance(rindexer, np.ndarray) + tm.assert_numpy_array_equal(result, np.arange(5, dtype=dtype)) + exp = np.array([0, 1, 2, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(lindexer, exp) + exp = np.array([-1, -1, 0, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(rindexer, exp) + + result, lindexer, rindexer = indexer(empty, right) + tm.assert_numpy_array_equal(result, right) + exp = np.array([-1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(lindexer, exp) + exp = np.array([0, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(rindexer, exp) + + result, lindexer, rindexer = indexer(left, empty) + tm.assert_numpy_array_equal(result, left) + exp = np.array([0, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(lindexer, exp) + exp = np.array([-1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(rindexer, exp) + + def test_cython_left_outer_join(self): + left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.intp) + right = np.array([1, 1, 0, 4, 2, 2, 1], dtype=np.intp) + max_group = 5 + + ls, rs = left_outer_join(left, right, max_group) + + exp_ls = left.argsort(kind="mergesort") + exp_rs = right.argsort(kind="mergesort") + + exp_li = np.array([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10]) + exp_ri = np.array( + [0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 4, 5, 4, 5, -1, -1] + ) + + exp_ls = exp_ls.take(exp_li) + exp_ls[exp_li == -1] = -1 + + exp_rs = exp_rs.take(exp_ri) + exp_rs[exp_ri == -1] = -1 + + tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) + tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) + + def test_cython_right_outer_join(self): + left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.intp) + right = np.array([1, 1, 0, 4, 2, 2, 1], dtype=np.intp) + max_group = 5 + + rs, ls = left_outer_join(right, left, max_group) + + exp_ls = left.argsort(kind="mergesort") + exp_rs = right.argsort(kind="mergesort") + + # 0 1 1 1 + exp_li = np.array( + [ + 0, + 1, + 2, + 3, + 4, + 5, + 3, + 4, + 5, + 3, + 4, + 5, + # 2 2 4 + 6, + 7, + 8, + 6, + 7, + 8, + -1, + ] + ) + exp_ri = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6]) + + exp_ls = exp_ls.take(exp_li) + exp_ls[exp_li == -1] = -1 + + exp_rs = exp_rs.take(exp_ri) + exp_rs[exp_ri == -1] = -1 + + tm.assert_numpy_array_equal(ls, exp_ls) + tm.assert_numpy_array_equal(rs, exp_rs) + + def test_cython_inner_join(self): + left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.intp) + right = np.array([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.intp) + max_group = 5 + + ls, rs = inner_join(left, right, max_group) + + exp_ls = left.argsort(kind="mergesort") + exp_rs = right.argsort(kind="mergesort") + + exp_li = np.array([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8]) + exp_ri = np.array([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 4, 5, 4, 5]) + + exp_ls = exp_ls.take(exp_li) + exp_ls[exp_li == -1] = -1 + + exp_rs = exp_rs.take(exp_ri) + exp_rs[exp_ri == -1] = -1 + + tm.assert_numpy_array_equal(ls, exp_ls) + tm.assert_numpy_array_equal(rs, exp_rs) + + +@pytest.mark.parametrize("readonly", [True, False]) +def test_left_join_indexer_unique(readonly): + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) + b = np.array([2, 2, 3, 4, 4], dtype=np.int64) + if readonly: + # GH#37312, GH#37264 + a.setflags(write=False) + b.setflags(write=False) + + result = libjoin.left_join_indexer_unique(b, a) + expected = np.array([1, 1, 2, 3, 3], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + +def test_left_outer_join_bug(): + left = np.array( + [ + 0, + 1, + 0, + 1, + 1, + 2, + 3, + 1, + 0, + 2, + 1, + 2, + 0, + 1, + 1, + 2, + 3, + 2, + 3, + 2, + 1, + 1, + 3, + 0, + 3, + 2, + 3, + 0, + 0, + 2, + 3, + 2, + 0, + 3, + 1, + 3, + 0, + 1, + 3, + 0, + 0, + 1, + 0, + 3, + 1, + 0, + 1, + 0, + 1, + 1, + 0, + 2, + 2, + 2, + 2, + 2, + 0, + 3, + 1, + 2, + 0, + 0, + 3, + 1, + 3, + 2, + 2, + 0, + 1, + 3, + 0, + 2, + 3, + 2, + 3, + 3, + 2, + 3, + 3, + 1, + 3, + 2, + 0, + 0, + 3, + 1, + 1, + 1, + 0, + 2, + 3, + 3, + 1, + 2, + 0, + 3, + 1, + 2, + 0, + 2, + ], + dtype=np.intp, + ) + + right = np.array([3, 1], dtype=np.intp) + max_groups = 4 + + lidx, ridx = libjoin.left_outer_join(left, right, max_groups, sort=False) + + exp_lidx = np.arange(len(left), dtype=np.intp) + exp_ridx = -np.ones(len(left), dtype=np.intp) + + exp_ridx[left == 1] = 1 + exp_ridx[left == 3] = 0 + + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + +def test_inner_join_indexer(): + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) + b = np.array([0, 3, 5, 7, 9], dtype=np.int64) + + index, ares, bres = libjoin.inner_join_indexer(a, b) + + index_exp = np.array([3, 5], dtype=np.int64) + tm.assert_almost_equal(index, index_exp) + + aexp = np.array([2, 4], dtype=np.intp) + bexp = np.array([1, 2], dtype=np.intp) + tm.assert_almost_equal(ares, aexp) + tm.assert_almost_equal(bres, bexp) + + a = np.array([5], dtype=np.int64) + b = np.array([5], dtype=np.int64) + + index, ares, bres = libjoin.inner_join_indexer(a, b) + tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) + tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.intp)) + tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.intp)) + + +def test_outer_join_indexer(): + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) + b = np.array([0, 3, 5, 7, 9], dtype=np.int64) + + index, ares, bres = libjoin.outer_join_indexer(a, b) + + index_exp = np.array([0, 1, 2, 3, 4, 5, 7, 9], dtype=np.int64) + tm.assert_almost_equal(index, index_exp) + + aexp = np.array([-1, 0, 1, 2, 3, 4, -1, -1], dtype=np.intp) + bexp = np.array([0, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) + tm.assert_almost_equal(ares, aexp) + tm.assert_almost_equal(bres, bexp) + + a = np.array([5], dtype=np.int64) + b = np.array([5], dtype=np.int64) + + index, ares, bres = libjoin.outer_join_indexer(a, b) + tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) + tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.intp)) + tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.intp)) + + +def test_left_join_indexer(): + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) + b = np.array([0, 3, 5, 7, 9], dtype=np.int64) + + index, ares, bres = libjoin.left_join_indexer(a, b) + + tm.assert_almost_equal(index, a) + + aexp = np.array([0, 1, 2, 3, 4], dtype=np.intp) + bexp = np.array([-1, -1, 1, -1, 2], dtype=np.intp) + tm.assert_almost_equal(ares, aexp) + tm.assert_almost_equal(bres, bexp) + + a = np.array([5], dtype=np.int64) + b = np.array([5], dtype=np.int64) + + index, ares, bres = libjoin.left_join_indexer(a, b) + tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) + tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.intp)) + tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.intp)) + + +def test_left_join_indexer2(): + idx = np.array([1, 1, 2, 5], dtype=np.int64) + idx2 = np.array([1, 2, 5, 7, 9], dtype=np.int64) + + res, lidx, ridx = libjoin.left_join_indexer(idx2, idx) + + exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) + tm.assert_almost_equal(res, exp_res) + + exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_almost_equal(lidx, exp_lidx) + + exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + tm.assert_almost_equal(ridx, exp_ridx) + + +def test_outer_join_indexer2(): + idx = np.array([1, 1, 2, 5], dtype=np.int64) + idx2 = np.array([1, 2, 5, 7, 9], dtype=np.int64) + + res, lidx, ridx = libjoin.outer_join_indexer(idx2, idx) + + exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) + tm.assert_almost_equal(res, exp_res) + + exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_almost_equal(lidx, exp_lidx) + + exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + tm.assert_almost_equal(ridx, exp_ridx) + + +def test_inner_join_indexer2(): + idx = np.array([1, 1, 2, 5], dtype=np.int64) + idx2 = np.array([1, 2, 5, 7, 9], dtype=np.int64) + + res, lidx, ridx = libjoin.inner_join_indexer(idx2, idx) + + exp_res = np.array([1, 1, 2, 5], dtype=np.int64) + tm.assert_almost_equal(res, exp_res) + + exp_lidx = np.array([0, 0, 1, 2], dtype=np.intp) + tm.assert_almost_equal(lidx, exp_lidx) + + exp_ridx = np.array([0, 1, 2, 3], dtype=np.intp) + tm.assert_almost_equal(ridx, exp_ridx) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/libs/test_lib.py b/.local/lib/python3.8/site-packages/pandas/tests/libs/test_lib.py new file mode 100644 index 0000000..5b7e90f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/libs/test_lib.py @@ -0,0 +1,208 @@ +import numpy as np +import pytest + +from pandas._libs import ( + lib, + writers as libwriters, +) + +from pandas import Index +import pandas._testing as tm + + +class TestMisc: + def test_max_len_string_array(self): + + arr = a = np.array(["foo", "b", np.nan], dtype="object") + assert libwriters.max_len_string_array(arr) == 3 + + # unicode + arr = a.astype("U").astype(object) + assert libwriters.max_len_string_array(arr) == 3 + + # bytes for python3 + arr = a.astype("S").astype(object) + assert libwriters.max_len_string_array(arr) == 3 + + # raises + msg = "No matching signature found" + with pytest.raises(TypeError, match=msg): + libwriters.max_len_string_array(arr.astype("U")) + + def test_fast_unique_multiple_list_gen_sort(self): + keys = [["p", "a"], ["n", "d"], ["a", "s"]] + + gen = (key for key in keys) + expected = np.array(["a", "d", "n", "p", "s"]) + out = lib.fast_unique_multiple_list_gen(gen, sort=True) + tm.assert_numpy_array_equal(np.array(out), expected) + + gen = (key for key in keys) + expected = np.array(["p", "a", "n", "d", "s"]) + out = lib.fast_unique_multiple_list_gen(gen, sort=False) + tm.assert_numpy_array_equal(np.array(out), expected) + + +class TestIndexing: + def test_maybe_indices_to_slice_left_edge(self): + target = np.arange(100) + + # slice + indices = np.array([], dtype=np.intp) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + for end in [1, 2, 5, 20, 99]: + for step in [1, 2, 4]: + indices = np.arange(0, end, step, dtype=np.intp) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # reverse + indices = indices[::-1] + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # not slice + for case in [[2, 1, 2, 0], [2, 2, 1, 0], [0, 1, 2, 1], [-2, 0, 2], [2, 0, -2]]: + indices = np.array(case, dtype=np.intp) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + def test_maybe_indices_to_slice_right_edge(self): + target = np.arange(100) + + # slice + for start in [0, 2, 5, 20, 97, 98]: + for step in [1, 2, 4]: + indices = np.arange(start, 99, step, dtype=np.intp) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # reverse + indices = indices[::-1] + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # not slice + indices = np.array([97, 98, 99, 100], dtype=np.intp) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + + msg = "index 100 is out of bounds for axis (0|1) with size 100" + + with pytest.raises(IndexError, match=msg): + target[indices] + with pytest.raises(IndexError, match=msg): + target[maybe_slice] + + indices = np.array([100, 99, 98, 97], dtype=np.intp) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + + with pytest.raises(IndexError, match=msg): + target[indices] + with pytest.raises(IndexError, match=msg): + target[maybe_slice] + + for case in [[99, 97, 99, 96], [99, 99, 98, 97], [98, 98, 97, 96]]: + indices = np.array(case, dtype=np.intp) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + def test_maybe_indices_to_slice_both_edges(self): + target = np.arange(10) + + # slice + for step in [1, 2, 4, 5, 8, 9]: + indices = np.arange(0, 9, step, dtype=np.intp) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # reverse + indices = indices[::-1] + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # not slice + for case in [[4, 2, 0, -2], [2, 2, 1, 0], [0, 1, 2, 1]]: + indices = np.array(case, dtype=np.intp) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + def test_maybe_indices_to_slice_middle(self): + target = np.arange(100) + + # slice + for start, end in [(2, 10), (5, 25), (65, 97)]: + for step in [1, 2, 4, 20]: + indices = np.arange(start, end, step, dtype=np.intp) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # reverse + indices = indices[::-1] + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # not slice + for case in [[14, 12, 10, 12], [12, 12, 11, 10], [10, 11, 12, 11]]: + indices = np.array(case, dtype=np.intp) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + def test_maybe_booleans_to_slice(self): + arr = np.array([0, 0, 1, 1, 1, 0, 1], dtype=np.uint8) + result = lib.maybe_booleans_to_slice(arr) + assert result.dtype == np.bool_ + + result = lib.maybe_booleans_to_slice(arr[:0]) + assert result == slice(0, 0) + + def test_get_reverse_indexer(self): + indexer = np.array([-1, -1, 1, 2, 0, -1, 3, 4], dtype=np.intp) + result = lib.get_reverse_indexer(indexer, 5) + expected = np.array([4, 2, 3, 6, 7], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + +def test_cache_readonly_preserve_docstrings(): + # GH18197 + assert Index.hasnans.__doc__ is not None + + +def test_no_default_pickle(): + # GH#40397 + obj = tm.round_trip_pickle(lib.no_default) + assert obj is lib.no_default diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..50caecc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..1c747ff Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_backend.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_backend.cpython-38.pyc new file mode 100644 index 0000000..3b8be5e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_backend.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_boxplot_method.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_boxplot_method.cpython-38.pyc new file mode 100644 index 0000000..d36a943 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_boxplot_method.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_common.cpython-38.pyc new file mode 100644 index 0000000..caa9d44 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_converter.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_converter.cpython-38.pyc new file mode 100644 index 0000000..fd5d0b7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_converter.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_datetimelike.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_datetimelike.cpython-38.pyc new file mode 100644 index 0000000..9e12ccf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_datetimelike.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_groupby.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_groupby.cpython-38.pyc new file mode 100644 index 0000000..04e23cb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_groupby.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_hist_method.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_hist_method.cpython-38.pyc new file mode 100644 index 0000000..d49802c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_hist_method.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_misc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_misc.cpython-38.pyc new file mode 100644 index 0000000..bb566f7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_misc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_series.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_series.cpython-38.pyc new file mode 100644 index 0000000..eb81b8c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_series.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_style.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_style.cpython-38.pyc new file mode 100644 index 0000000..78b14da Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/__pycache__/test_style.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/common.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/common.py new file mode 100644 index 0000000..ae9db5e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/common.py @@ -0,0 +1,663 @@ +""" +Module consolidating common testing functions for checking plotting. + +Currently all plotting tests are marked as slow via +``pytestmark = pytest.mark.slow`` at the module level. +""" + +from __future__ import annotations + +import os +from typing import ( + TYPE_CHECKING, + Sequence, +) +import warnings + +import numpy as np + +from pandas.util._decorators import cache_readonly +import pandas.util._test_decorators as td + +from pandas.core.dtypes.api import is_list_like + +import pandas as pd +from pandas import ( + DataFrame, + Series, + to_datetime, +) +import pandas._testing as tm + +if TYPE_CHECKING: + from matplotlib.axes import Axes + + +@td.skip_if_no_mpl +class TestPlotBase: + """ + This is a common base class used for various plotting tests + """ + + def setup_method(self, method): + + import matplotlib as mpl + + from pandas.plotting._matplotlib import compat + + self.compat = compat + + mpl.rcdefaults() + + self.start_date_to_int64 = 812419200000000000 + self.end_date_to_int64 = 819331200000000000 + + self.mpl_ge_2_2_3 = compat.mpl_ge_2_2_3() + self.mpl_ge_3_0_0 = compat.mpl_ge_3_0_0() + self.mpl_ge_3_1_0 = compat.mpl_ge_3_1_0() + self.mpl_ge_3_2_0 = compat.mpl_ge_3_2_0() + + self.bp_n_objects = 7 + self.polycollection_factor = 2 + self.default_figsize = (6.4, 4.8) + self.default_tick_position = "left" + + n = 100 + with tm.RNGContext(42): + gender = np.random.choice(["Male", "Female"], size=n) + classroom = np.random.choice(["A", "B", "C"], size=n) + + self.hist_df = DataFrame( + { + "gender": gender, + "classroom": classroom, + "height": np.random.normal(66, 4, size=n), + "weight": np.random.normal(161, 32, size=n), + "category": np.random.randint(4, size=n), + "datetime": to_datetime( + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=n, + dtype=np.int64, + ) + ), + } + ) + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def teardown_method(self, method): + tm.close() + + @cache_readonly + def plt(self): + import matplotlib.pyplot as plt + + return plt + + @cache_readonly + def colorconverter(self): + import matplotlib.colors as colors + + return colors.colorConverter + + def _check_legend_labels(self, axes, labels=None, visible=True): + """ + Check each axes has expected legend labels + + Parameters + ---------- + axes : matplotlib Axes object, or its list-like + labels : list-like + expected legend labels + visible : bool + expected legend visibility. labels are checked only when visible is + True + """ + if visible and (labels is None): + raise ValueError("labels must be specified when visible is True") + axes = self._flatten_visible(axes) + for ax in axes: + if visible: + assert ax.get_legend() is not None + self._check_text_labels(ax.get_legend().get_texts(), labels) + else: + assert ax.get_legend() is None + + def _check_legend_marker(self, ax, expected_markers=None, visible=True): + """ + Check ax has expected legend markers + + Parameters + ---------- + ax : matplotlib Axes object + expected_markers : list-like + expected legend markers + visible : bool + expected legend visibility. labels are checked only when visible is + True + """ + if visible and (expected_markers is None): + raise ValueError("Markers must be specified when visible is True") + if visible: + handles, _ = ax.get_legend_handles_labels() + markers = [handle.get_marker() for handle in handles] + assert markers == expected_markers + else: + assert ax.get_legend() is None + + def _check_data(self, xp, rs): + """ + Check each axes has identical lines + + Parameters + ---------- + xp : matplotlib Axes object + rs : matplotlib Axes object + """ + xp_lines = xp.get_lines() + rs_lines = rs.get_lines() + + def check_line(xpl, rsl): + xpdata = xpl.get_xydata() + rsdata = rsl.get_xydata() + tm.assert_almost_equal(xpdata, rsdata) + + assert len(xp_lines) == len(rs_lines) + [check_line(xpl, rsl) for xpl, rsl in zip(xp_lines, rs_lines)] + tm.close() + + def _check_visible(self, collections, visible=True): + """ + Check each artist is visible or not + + Parameters + ---------- + collections : matplotlib Artist or its list-like + target Artist or its list or collection + visible : bool + expected visibility + """ + from matplotlib.collections import Collection + + if not isinstance(collections, Collection) and not is_list_like(collections): + collections = [collections] + + for patch in collections: + assert patch.get_visible() == visible + + def _check_patches_all_filled( + self, axes: Axes | Sequence[Axes], filled: bool = True + ) -> None: + """ + Check for each artist whether it is filled or not + + Parameters + ---------- + axes : matplotlib Axes object, or its list-like + filled : bool + expected filling + """ + + axes = self._flatten_visible(axes) + for ax in axes: + for patch in ax.patches: + assert patch.fill == filled + + def _get_colors_mapped(self, series, colors): + unique = series.unique() + # unique and colors length can be differed + # depending on slice value + mapped = dict(zip(unique, colors)) + return [mapped[v] for v in series.values] + + def _check_colors( + self, collections, linecolors=None, facecolors=None, mapping=None + ): + """ + Check each artist has expected line colors and face colors + + Parameters + ---------- + collections : list-like + list or collection of target artist + linecolors : list-like which has the same length as collections + list of expected line colors + facecolors : list-like which has the same length as collections + list of expected face colors + mapping : Series + Series used for color grouping key + used for andrew_curves, parallel_coordinates, radviz test + """ + from matplotlib.collections import ( + Collection, + LineCollection, + PolyCollection, + ) + from matplotlib.lines import Line2D + + conv = self.colorconverter + if linecolors is not None: + + if mapping is not None: + linecolors = self._get_colors_mapped(mapping, linecolors) + linecolors = linecolors[: len(collections)] + + assert len(collections) == len(linecolors) + for patch, color in zip(collections, linecolors): + if isinstance(patch, Line2D): + result = patch.get_color() + # Line2D may contains string color expression + result = conv.to_rgba(result) + elif isinstance(patch, (PolyCollection, LineCollection)): + result = tuple(patch.get_edgecolor()[0]) + else: + result = patch.get_edgecolor() + + expected = conv.to_rgba(color) + assert result == expected + + if facecolors is not None: + + if mapping is not None: + facecolors = self._get_colors_mapped(mapping, facecolors) + facecolors = facecolors[: len(collections)] + + assert len(collections) == len(facecolors) + for patch, color in zip(collections, facecolors): + if isinstance(patch, Collection): + # returned as list of np.array + result = patch.get_facecolor()[0] + else: + result = patch.get_facecolor() + + if isinstance(result, np.ndarray): + result = tuple(result) + + expected = conv.to_rgba(color) + assert result == expected + + def _check_text_labels(self, texts, expected): + """ + Check each text has expected labels + + Parameters + ---------- + texts : matplotlib Text object, or its list-like + target text, or its list + expected : str or list-like which has the same length as texts + expected text label, or its list + """ + if not is_list_like(texts): + assert texts.get_text() == expected + else: + labels = [t.get_text() for t in texts] + assert len(labels) == len(expected) + for label, e in zip(labels, expected): + assert label == e + + def _check_ticks_props( + self, axes, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None + ): + """ + Check each axes has expected tick properties + + Parameters + ---------- + axes : matplotlib Axes object, or its list-like + xlabelsize : number + expected xticks font size + xrot : number + expected xticks rotation + ylabelsize : number + expected yticks font size + yrot : number + expected yticks rotation + """ + from matplotlib.ticker import NullFormatter + + axes = self._flatten_visible(axes) + for ax in axes: + if xlabelsize is not None or xrot is not None: + if isinstance(ax.xaxis.get_minor_formatter(), NullFormatter): + # If minor ticks has NullFormatter, rot / fontsize are not + # retained + labels = ax.get_xticklabels() + else: + labels = ax.get_xticklabels() + ax.get_xticklabels(minor=True) + + for label in labels: + if xlabelsize is not None: + tm.assert_almost_equal(label.get_fontsize(), xlabelsize) + if xrot is not None: + tm.assert_almost_equal(label.get_rotation(), xrot) + + if ylabelsize is not None or yrot is not None: + if isinstance(ax.yaxis.get_minor_formatter(), NullFormatter): + labels = ax.get_yticklabels() + else: + labels = ax.get_yticklabels() + ax.get_yticklabels(minor=True) + + for label in labels: + if ylabelsize is not None: + tm.assert_almost_equal(label.get_fontsize(), ylabelsize) + if yrot is not None: + tm.assert_almost_equal(label.get_rotation(), yrot) + + def _check_ax_scales(self, axes, xaxis="linear", yaxis="linear"): + """ + Check each axes has expected scales + + Parameters + ---------- + axes : matplotlib Axes object, or its list-like + xaxis : {'linear', 'log'} + expected xaxis scale + yaxis : {'linear', 'log'} + expected yaxis scale + """ + axes = self._flatten_visible(axes) + for ax in axes: + assert ax.xaxis.get_scale() == xaxis + assert ax.yaxis.get_scale() == yaxis + + def _check_axes_shape(self, axes, axes_num=None, layout=None, figsize=None): + """ + Check expected number of axes is drawn in expected layout + + Parameters + ---------- + axes : matplotlib Axes object, or its list-like + axes_num : number + expected number of axes. Unnecessary axes should be set to + invisible. + layout : tuple + expected layout, (expected number of rows , columns) + figsize : tuple + expected figsize. default is matplotlib default + """ + from pandas.plotting._matplotlib.tools import flatten_axes + + if figsize is None: + figsize = self.default_figsize + visible_axes = self._flatten_visible(axes) + + if axes_num is not None: + assert len(visible_axes) == axes_num + for ax in visible_axes: + # check something drawn on visible axes + assert len(ax.get_children()) > 0 + + if layout is not None: + result = self._get_axes_layout(flatten_axes(axes)) + assert result == layout + + tm.assert_numpy_array_equal( + visible_axes[0].figure.get_size_inches(), + np.array(figsize, dtype=np.float64), + ) + + def _get_axes_layout(self, axes): + x_set = set() + y_set = set() + for ax in axes: + # check axes coordinates to estimate layout + points = ax.get_position().get_points() + x_set.add(points[0][0]) + y_set.add(points[0][1]) + return (len(y_set), len(x_set)) + + def _flatten_visible(self, axes): + """ + Flatten axes, and filter only visible + + Parameters + ---------- + axes : matplotlib Axes object, or its list-like + + """ + from pandas.plotting._matplotlib.tools import flatten_axes + + axes = flatten_axes(axes) + axes = [ax for ax in axes if ax.get_visible()] + return axes + + def _check_has_errorbars(self, axes, xerr=0, yerr=0): + """ + Check axes has expected number of errorbars + + Parameters + ---------- + axes : matplotlib Axes object, or its list-like + xerr : number + expected number of x errorbar + yerr : number + expected number of y errorbar + """ + axes = self._flatten_visible(axes) + for ax in axes: + containers = ax.containers + xerr_count = 0 + yerr_count = 0 + for c in containers: + has_xerr = getattr(c, "has_xerr", False) + has_yerr = getattr(c, "has_yerr", False) + if has_xerr: + xerr_count += 1 + if has_yerr: + yerr_count += 1 + assert xerr == xerr_count + assert yerr == yerr_count + + def _check_box_return_type( + self, returned, return_type, expected_keys=None, check_ax_title=True + ): + """ + Check box returned type is correct + + Parameters + ---------- + returned : object to be tested, returned from boxplot + return_type : str + return_type passed to boxplot + expected_keys : list-like, optional + group labels in subplot case. If not passed, + the function checks assuming boxplot uses single ax + check_ax_title : bool + Whether to check the ax.title is the same as expected_key + Intended to be checked by calling from ``boxplot``. + Normal ``plot`` doesn't attach ``ax.title``, it must be disabled. + """ + from matplotlib.axes import Axes + + types = {"dict": dict, "axes": Axes, "both": tuple} + if expected_keys is None: + # should be fixed when the returning default is changed + if return_type is None: + return_type = "dict" + + assert isinstance(returned, types[return_type]) + if return_type == "both": + assert isinstance(returned.ax, Axes) + assert isinstance(returned.lines, dict) + else: + # should be fixed when the returning default is changed + if return_type is None: + for r in self._flatten_visible(returned): + assert isinstance(r, Axes) + return + + assert isinstance(returned, Series) + + assert sorted(returned.keys()) == sorted(expected_keys) + for key, value in returned.items(): + assert isinstance(value, types[return_type]) + # check returned dict has correct mapping + if return_type == "axes": + if check_ax_title: + assert value.get_title() == key + elif return_type == "both": + if check_ax_title: + assert value.ax.get_title() == key + assert isinstance(value.ax, Axes) + assert isinstance(value.lines, dict) + elif return_type == "dict": + line = value["medians"][0] + axes = line.axes + if check_ax_title: + assert axes.get_title() == key + else: + raise AssertionError + + def _check_grid_settings(self, obj, kinds, kws={}): + # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 + + import matplotlib as mpl + + def is_grid_on(): + xticks = self.plt.gca().xaxis.get_major_ticks() + yticks = self.plt.gca().yaxis.get_major_ticks() + # for mpl 2.2.2, gridOn and gridline.get_visible disagree. + # for new MPL, they are the same. + + if self.mpl_ge_3_1_0: + xoff = all(not g.gridline.get_visible() for g in xticks) + yoff = all(not g.gridline.get_visible() for g in yticks) + else: + xoff = all(not g.gridOn for g in xticks) + yoff = all(not g.gridOn for g in yticks) + + return not (xoff and yoff) + + spndx = 1 + for kind in kinds: + + self.plt.subplot(1, 4 * len(kinds), spndx) + spndx += 1 + mpl.rc("axes", grid=False) + obj.plot(kind=kind, **kws) + assert not is_grid_on() + + self.plt.subplot(1, 4 * len(kinds), spndx) + spndx += 1 + mpl.rc("axes", grid=True) + obj.plot(kind=kind, grid=False, **kws) + assert not is_grid_on() + + if kind not in ["pie", "hexbin", "scatter"]: + self.plt.subplot(1, 4 * len(kinds), spndx) + spndx += 1 + mpl.rc("axes", grid=True) + obj.plot(kind=kind, **kws) + assert is_grid_on() + + self.plt.subplot(1, 4 * len(kinds), spndx) + spndx += 1 + mpl.rc("axes", grid=False) + obj.plot(kind=kind, grid=True, **kws) + assert is_grid_on() + + def _unpack_cycler(self, rcParams, field="color"): + """ + Auxiliary function for correctly unpacking cycler after MPL >= 1.5 + """ + return [v[field] for v in rcParams["axes.prop_cycle"]] + + def get_x_axis(self, ax): + return ax._shared_axes["x"] if self.compat.mpl_ge_3_5_0() else ax._shared_x_axes + + def get_y_axis(self, ax): + return ax._shared_axes["y"] if self.compat.mpl_ge_3_5_0() else ax._shared_y_axes + + +def _check_plot_works(f, filterwarnings="always", default_axes=False, **kwargs): + """ + Create plot and ensure that plot return object is valid. + + Parameters + ---------- + f : func + Plotting function. + filterwarnings : str + Warnings filter. + See https://docs.python.org/3/library/warnings.html#warning-filter + default_axes : bool, optional + If False (default): + - If `ax` not in `kwargs`, then create subplot(211) and plot there + - Create new subplot(212) and plot there as well + - Mind special corner case for bootstrap_plot (see `_gen_two_subplots`) + If True: + - Simply run plotting function with kwargs provided + - All required axes instances will be created automatically + - It is recommended to use it when the plotting function + creates multiple axes itself. It helps avoid warnings like + 'UserWarning: To output multiple subplots, + the figure containing the passed axes is being cleared' + **kwargs + Keyword arguments passed to the plotting function. + + Returns + ------- + Plot object returned by the last plotting. + """ + import matplotlib.pyplot as plt + + if default_axes: + gen_plots = _gen_default_plot + else: + gen_plots = _gen_two_subplots + + ret = None + with warnings.catch_warnings(): + warnings.simplefilter(filterwarnings) + try: + fig = kwargs.get("figure", plt.gcf()) + plt.clf() + + for ret in gen_plots(f, fig, **kwargs): + tm.assert_is_valid_plot_return_object(ret) + + with tm.ensure_clean(return_filelike=True) as path: + plt.savefig(path) + + except Exception as err: + raise err + finally: + tm.close(fig) + + return ret + + +def _gen_default_plot(f, fig, **kwargs): + """ + Create plot in a default way. + """ + yield f(**kwargs) + + +def _gen_two_subplots(f, fig, **kwargs): + """ + Create plot on two subplots forcefully created. + """ + if "ax" not in kwargs: + fig.add_subplot(211) + yield f(**kwargs) + + if f is pd.plotting.bootstrap_plot: + assert "ax" not in kwargs + else: + kwargs["ax"] = fig.add_subplot(212) + yield f(**kwargs) + + +def curpath(): + pth, _ = os.path.split(os.path.abspath(__file__)) + return pth diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..1fc4a8e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame.cpython-38.pyc new file mode 100644 index 0000000..0556378 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame_color.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame_color.cpython-38.pyc new file mode 100644 index 0000000..30e9362 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame_color.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame_groupby.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame_groupby.cpython-38.pyc new file mode 100644 index 0000000..7198483 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame_groupby.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame_legend.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame_legend.cpython-38.pyc new file mode 100644 index 0000000..0bff37b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame_legend.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame_subplots.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame_subplots.cpython-38.pyc new file mode 100644 index 0000000..b108cd3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_frame_subplots.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_hist_box_by.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_hist_box_by.cpython-38.pyc new file mode 100644 index 0000000..1dcb187 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/__pycache__/test_hist_box_by.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame.py new file mode 100644 index 0000000..ff24734 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame.py @@ -0,0 +1,2103 @@ +""" Test cases for DataFrame.plot """ +from datetime import ( + date, + datetime, +) +import itertools +import re +import string +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.api import is_list_like + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + PeriodIndex, + Series, + bdate_range, + date_range, +) +import pandas._testing as tm +from pandas.tests.plotting.common import ( + TestPlotBase, + _check_plot_works, +) + +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting + +pytestmark = pytest.mark.slow + + +@td.skip_if_no_mpl +class TestDataFramePlots(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def test_plot(self): + from pandas.plotting._matplotlib.compat import mpl_ge_3_1_0 + + df = self.tdf + _check_plot_works(df.plot, grid=False) + + # _check_plot_works adds an ax so use default_axes=True to avoid warning + axes = _check_plot_works(df.plot, default_axes=True, subplots=True) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + axes = _check_plot_works( + df.plot, + default_axes=True, + subplots=True, + layout=(-1, 2), + ) + self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) + + axes = _check_plot_works( + df.plot, + default_axes=True, + subplots=True, + use_index=False, + ) + self._check_ticks_props(axes, xrot=0) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + df = DataFrame({"x": [1, 2], "y": [3, 4]}) + if mpl_ge_3_1_0(): + msg = "'Line2D' object has no property 'blarg'" + else: + msg = "Unknown property blarg" + with pytest.raises(AttributeError, match=msg): + df.plot.line(blarg=True) + + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + ax = _check_plot_works(df.plot, use_index=True) + self._check_ticks_props(ax, xrot=0) + _check_plot_works(df.plot, sort_columns=False) + _check_plot_works(df.plot, yticks=[1, 5, 10]) + _check_plot_works(df.plot, xticks=[1, 5, 10]) + _check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100)) + + _check_plot_works(df.plot, default_axes=True, subplots=True, title="blah") + + # We have to redo it here because _check_plot_works does two plots, + # once without an ax kwarg and once with an ax kwarg and the new sharex + # behaviour does not remove the visibility of the latter axis (as ax is + # present). see: https://github.com/pandas-dev/pandas/issues/9737 + + axes = df.plot(subplots=True, title="blah") + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + # axes[0].figure.savefig("test.png") + for ax in axes[:2]: + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible([ax.xaxis.get_label()], visible=False) + for ax in [axes[2]]: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible([ax.xaxis.get_label()]) + self._check_ticks_props(ax, xrot=0) + + _check_plot_works(df.plot, title="blah") + + tuples = zip(string.ascii_letters[:10], range(10)) + df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) + ax = _check_plot_works(df.plot, use_index=True) + self._check_ticks_props(ax, xrot=0) + + # unicode + index = MultiIndex.from_tuples( + [ + ("\u03b1", 0), + ("\u03b1", 1), + ("\u03b2", 2), + ("\u03b2", 3), + ("\u03b3", 4), + ("\u03b3", 5), + ("\u03b4", 6), + ("\u03b4", 7), + ], + names=["i0", "i1"], + ) + columns = MultiIndex.from_tuples( + [("bar", "\u0394"), ("bar", "\u0395")], names=["c0", "c1"] + ) + df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index) + _check_plot_works(df.plot, title="\u03A3") + + # GH 6951 + # Test with single column + df = DataFrame({"x": np.random.rand(10)}) + axes = _check_plot_works(df.plot.bar, subplots=True) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + axes = _check_plot_works(df.plot.bar, subplots=True, layout=(-1, 1)) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + # When ax is supplied and required number of axes is 1, + # passed ax should be used: + fig, ax = self.plt.subplots() + axes = df.plot.bar(subplots=True, ax=ax) + assert len(axes) == 1 + result = ax.axes + assert result is axes[0] + + def test_nullable_int_plot(self): + # GH 32073 + dates = ["2008", "2009", None, "2011", "2012"] + df = DataFrame( + { + "A": [1, 2, 3, 4, 5], + "B": [1.0, 2.0, 3.0, 4.0, 5.0], + "C": [7, 5, np.nan, 3, 2], + "D": pd.to_datetime(dates, format="%Y").view("i8"), + "E": pd.to_datetime(dates, format="%Y", utc=True).view("i8"), + }, + dtype=np.int64, + ) + + _check_plot_works(df.plot, x="A", y="B") + _check_plot_works(df[["A", "B"]].plot, x="A", y="B") + _check_plot_works(df[["C", "A"]].plot, x="C", y="A") # nullable value on x-axis + _check_plot_works(df[["A", "C"]].plot, x="A", y="C") + _check_plot_works(df[["B", "C"]].plot, x="B", y="C") + _check_plot_works(df[["A", "D"]].plot, x="A", y="D") + _check_plot_works(df[["A", "E"]].plot, x="A", y="E") + + def test_integer_array_plot(self): + # GH 25587 + arr = pd.array([1, 2, 3, 4], dtype="UInt32") + + s = Series(arr) + _check_plot_works(s.plot.line) + _check_plot_works(s.plot.bar) + _check_plot_works(s.plot.hist) + _check_plot_works(s.plot.pie) + + df = DataFrame({"x": arr, "y": arr}) + _check_plot_works(df.plot.line) + _check_plot_works(df.plot.bar) + _check_plot_works(df.plot.hist) + _check_plot_works(df.plot.pie, y="y") + _check_plot_works(df.plot.scatter, x="x", y="y") + _check_plot_works(df.plot.hexbin, x="x", y="y") + + def test_nonnumeric_exclude(self): + df = DataFrame({"A": ["x", "y", "z"], "B": [1, 2, 3]}) + ax = df.plot() + assert len(ax.get_lines()) == 1 # B was plotted + + def test_implicit_label(self): + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + ax = df.plot(x="a", y="b") + self._check_text_labels(ax.xaxis.get_label(), "a") + + def test_donot_overwrite_index_name(self): + # GH 8494 + df = DataFrame(np.random.randn(2, 2), columns=["a", "b"]) + df.index.name = "NAME" + df.plot(y="b", label="LABEL") + assert df.index.name == "NAME" + + def test_plot_xy(self): + # columns.inferred_type == 'string' + df = self.tdf + self._check_data(df.plot(x=0, y=1), df.set_index("A")["B"].plot()) + self._check_data(df.plot(x=0), df.set_index("A").plot()) + self._check_data(df.plot(y=0), df.B.plot()) + self._check_data(df.plot(x="A", y="B"), df.set_index("A").B.plot()) + self._check_data(df.plot(x="A"), df.set_index("A").plot()) + self._check_data(df.plot(y="B"), df.B.plot()) + + # columns.inferred_type == 'integer' + df.columns = np.arange(1, len(df.columns) + 1) + self._check_data(df.plot(x=1, y=2), df.set_index(1)[2].plot()) + self._check_data(df.plot(x=1), df.set_index(1).plot()) + self._check_data(df.plot(y=1), df[1].plot()) + + # figsize and title + ax = df.plot(x=1, y=2, title="Test", figsize=(16, 8)) + self._check_text_labels(ax.title, "Test") + self._check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16.0, 8.0)) + + # columns.inferred_type == 'mixed' + # TODO add MultiIndex test + + @pytest.mark.parametrize( + "input_log, expected_log", [(True, "log"), ("sym", "symlog")] + ) + def test_logscales(self, input_log, expected_log): + df = DataFrame({"a": np.arange(100)}, index=np.arange(100)) + + ax = df.plot(logy=input_log) + self._check_ax_scales(ax, yaxis=expected_log) + assert ax.get_yscale() == expected_log + + ax = df.plot(logx=input_log) + self._check_ax_scales(ax, xaxis=expected_log) + assert ax.get_xscale() == expected_log + + ax = df.plot(loglog=input_log) + self._check_ax_scales(ax, xaxis=expected_log, yaxis=expected_log) + assert ax.get_xscale() == expected_log + assert ax.get_yscale() == expected_log + + @pytest.mark.parametrize("input_param", ["logx", "logy", "loglog"]) + def test_invalid_logscale(self, input_param): + # GH: 24867 + df = DataFrame({"a": np.arange(100)}, index=np.arange(100)) + + msg = "Boolean, None and 'sym' are valid options, 'sm' is given." + with pytest.raises(ValueError, match=msg): + df.plot(**{input_param: "sm"}) + + def test_xcompat(self): + + df = self.tdf + ax = df.plot(x_compat=True) + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + self._check_ticks_props(ax, xrot=30) + + tm.close() + plotting.plot_params["xaxis.compat"] = True + ax = df.plot() + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + self._check_ticks_props(ax, xrot=30) + + tm.close() + plotting.plot_params["x_compat"] = False + + ax = df.plot() + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex) + + tm.close() + # useful if you're plotting a bunch together + with plotting.plot_params.use("x_compat", True): + ax = df.plot() + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + self._check_ticks_props(ax, xrot=30) + + tm.close() + ax = df.plot() + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex) + self._check_ticks_props(ax, xrot=0) + + def test_period_compat(self): + # GH 9012 + # period-array conversions + df = DataFrame( + np.random.rand(21, 2), + index=bdate_range(datetime(2000, 1, 1), datetime(2000, 1, 31)), + columns=["a", "b"], + ) + + df.plot() + self.plt.axhline(y=0) + tm.close() + + def test_unsorted_index(self): + df = DataFrame( + {"y": np.arange(100)}, index=np.arange(99, -1, -1), dtype=np.int64 + ) + ax = df.plot() + lines = ax.get_lines()[0] + rs = lines.get_xydata() + rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name="y") + tm.assert_series_equal(rs, df.y, check_index_type=False) + tm.close() + + df.index = pd.Index(np.arange(99, -1, -1), dtype=np.float64) + ax = df.plot() + lines = ax.get_lines()[0] + rs = lines.get_xydata() + rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name="y") + tm.assert_series_equal(rs, df.y) + + def test_unsorted_index_lims(self): + df = DataFrame({"y": [0.0, 1.0, 2.0, 3.0]}, index=[1.0, 0.0, 3.0, 2.0]) + ax = df.plot() + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + + df = DataFrame( + {"y": [0.0, 1.0, np.nan, 3.0, 4.0, 5.0, 6.0]}, + index=[1.0, 0.0, 3.0, 2.0, np.nan, 3.0, 2.0], + ) + ax = df.plot() + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + + df = DataFrame({"y": [0.0, 1.0, 2.0, 3.0], "z": [91.0, 90.0, 93.0, 92.0]}) + ax = df.plot(x="z", y="y") + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + + def test_negative_log(self): + df = -DataFrame( + np.random.rand(6, 4), + index=list(string.ascii_letters[:6]), + columns=["x", "y", "z", "four"], + ) + msg = "Log-y scales are not supported in area plot" + with pytest.raises(ValueError, match=msg): + df.plot.area(logy=True) + with pytest.raises(ValueError, match=msg): + df.plot.area(loglog=True) + + def _compare_stacked_y_cood(self, normal_lines, stacked_lines): + base = np.zeros(len(normal_lines[0].get_data()[1])) + for nl, sl in zip(normal_lines, stacked_lines): + base += nl.get_data()[1] # get y coordinates + sy = sl.get_data()[1] + tm.assert_numpy_array_equal(base, sy) + + def test_line_area_stacked(self): + with tm.RNGContext(42): + df = DataFrame(np.random.rand(6, 4), columns=["w", "x", "y", "z"]) + neg_df = -df + # each column has either positive or negative value + sep_df = DataFrame( + { + "w": np.random.rand(6), + "x": np.random.rand(6), + "y": -np.random.rand(6), + "z": -np.random.rand(6), + } + ) + # each column has positive-negative mixed value + mixed_df = DataFrame( + np.random.randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["w", "x", "y", "z"], + ) + + for kind in ["line", "area"]: + ax1 = _check_plot_works(df.plot, kind=kind, stacked=False) + ax2 = _check_plot_works(df.plot, kind=kind, stacked=True) + self._compare_stacked_y_cood(ax1.lines, ax2.lines) + + ax1 = _check_plot_works(neg_df.plot, kind=kind, stacked=False) + ax2 = _check_plot_works(neg_df.plot, kind=kind, stacked=True) + self._compare_stacked_y_cood(ax1.lines, ax2.lines) + + ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False) + ax2 = _check_plot_works(sep_df.plot, kind=kind, stacked=True) + self._compare_stacked_y_cood(ax1.lines[:2], ax2.lines[:2]) + self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:]) + + _check_plot_works(mixed_df.plot, stacked=False) + msg = ( + "When stacked is True, each column must be either all positive or " + "all negative. Column 'w' contains both positive and negative " + "values" + ) + with pytest.raises(ValueError, match=msg): + mixed_df.plot(stacked=True) + + # Use an index with strictly positive values, preventing + # matplotlib from warning about ignoring xlim + df2 = df.set_index(df.index + 1) + _check_plot_works(df2.plot, kind=kind, logx=True, stacked=True) + + def test_line_area_nan_df(self): + values1 = [1, 2, np.nan, 3] + values2 = [3, np.nan, 2, 1] + df = DataFrame({"a": values1, "b": values2}) + tdf = DataFrame({"a": values1, "b": values2}, index=tm.makeDateIndex(k=4)) + + for d in [df, tdf]: + ax = _check_plot_works(d.plot) + masked1 = ax.lines[0].get_ydata() + masked2 = ax.lines[1].get_ydata() + # remove nan for comparison purpose + + exp = np.array([1, 2, 3], dtype=np.float64) + tm.assert_numpy_array_equal(np.delete(masked1.data, 2), exp) + + exp = np.array([3, 2, 1], dtype=np.float64) + tm.assert_numpy_array_equal(np.delete(masked2.data, 1), exp) + tm.assert_numpy_array_equal( + masked1.mask, np.array([False, False, True, False]) + ) + tm.assert_numpy_array_equal( + masked2.mask, np.array([False, True, False, False]) + ) + + expected1 = np.array([1, 2, 0, 3], dtype=np.float64) + expected2 = np.array([3, 0, 2, 1], dtype=np.float64) + + ax = _check_plot_works(d.plot, stacked=True) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) + tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2) + + ax = _check_plot_works(d.plot.area) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) + tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2) + + ax = _check_plot_works(d.plot.area, stacked=False) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) + tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2) + + def test_line_lim(self): + df = DataFrame(np.random.rand(6, 3), columns=["x", "y", "z"]) + ax = df.plot() + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] + + ax = df.plot(secondary_y=True) + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] + + axes = df.plot(secondary_y=True, subplots=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + for ax in axes: + assert hasattr(ax, "left_ax") + assert not hasattr(ax, "right_ax") + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] + + @pytest.mark.xfail( + strict=False, + reason="2020-12-01 this has been failing periodically on the " + "ymin==0 assertion for a week or so.", + ) + @pytest.mark.parametrize("stacked", [True, False]) + def test_area_lim(self, stacked): + df = DataFrame(np.random.rand(6, 4), columns=["x", "y", "z", "four"]) + + neg_df = -df + + ax = _check_plot_works(df.plot.area, stacked=stacked) + xmin, xmax = ax.get_xlim() + ymin, ymax = ax.get_ylim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] + assert ymin == 0 + + ax = _check_plot_works(neg_df.plot.area, stacked=stacked) + ymin, ymax = ax.get_ylim() + assert ymax == 0 + + def test_area_sharey_dont_overwrite(self): + # GH37942 + df = DataFrame(np.random.rand(4, 2), columns=["x", "y"]) + fig, (ax1, ax2) = self.plt.subplots(1, 2, sharey=True) + + df.plot(ax=ax1, kind="area") + df.plot(ax=ax2, kind="area") + + assert self.get_y_axis(ax1).joined(ax1, ax2) + assert self.get_y_axis(ax2).joined(ax1, ax2) + + def test_bar_linewidth(self): + df = DataFrame(np.random.randn(5, 5)) + + # regular + ax = df.plot.bar(linewidth=2) + for r in ax.patches: + assert r.get_linewidth() == 2 + + # stacked + ax = df.plot.bar(stacked=True, linewidth=2) + for r in ax.patches: + assert r.get_linewidth() == 2 + + # subplots + axes = df.plot.bar(linewidth=2, subplots=True) + self._check_axes_shape(axes, axes_num=5, layout=(5, 1)) + for ax in axes: + for r in ax.patches: + assert r.get_linewidth() == 2 + + def test_bar_barwidth(self): + df = DataFrame(np.random.randn(5, 5)) + + width = 0.9 + + # regular + ax = df.plot.bar(width=width) + for r in ax.patches: + assert r.get_width() == width / len(df.columns) + + # stacked + ax = df.plot.bar(stacked=True, width=width) + for r in ax.patches: + assert r.get_width() == width + + # horizontal regular + ax = df.plot.barh(width=width) + for r in ax.patches: + assert r.get_height() == width / len(df.columns) + + # horizontal stacked + ax = df.plot.barh(stacked=True, width=width) + for r in ax.patches: + assert r.get_height() == width + + # subplots + axes = df.plot.bar(width=width, subplots=True) + for ax in axes: + for r in ax.patches: + assert r.get_width() == width + + # horizontal subplots + axes = df.plot.barh(width=width, subplots=True) + for ax in axes: + for r in ax.patches: + assert r.get_height() == width + + def test_bar_bottom_left(self): + df = DataFrame(np.random.rand(5, 5)) + ax = df.plot.bar(stacked=False, bottom=1) + result = [p.get_y() for p in ax.patches] + assert result == [1] * 25 + + ax = df.plot.bar(stacked=True, bottom=[-1, -2, -3, -4, -5]) + result = [p.get_y() for p in ax.patches[:5]] + assert result == [-1, -2, -3, -4, -5] + + ax = df.plot.barh(stacked=False, left=np.array([1, 1, 1, 1, 1])) + result = [p.get_x() for p in ax.patches] + assert result == [1] * 25 + + ax = df.plot.barh(stacked=True, left=[1, 2, 3, 4, 5]) + result = [p.get_x() for p in ax.patches[:5]] + assert result == [1, 2, 3, 4, 5] + + axes = df.plot.bar(subplots=True, bottom=-1) + for ax in axes: + result = [p.get_y() for p in ax.patches] + assert result == [-1] * 5 + + axes = df.plot.barh(subplots=True, left=np.array([1, 1, 1, 1, 1])) + for ax in axes: + result = [p.get_x() for p in ax.patches] + assert result == [1] * 5 + + def test_bar_nan(self): + df = DataFrame({"A": [10, np.nan, 20], "B": [5, 10, 20], "C": [1, 2, 3]}) + ax = df.plot.bar() + expected = [10, 0, 20, 5, 10, 20, 1, 2, 3] + result = [p.get_height() for p in ax.patches] + assert result == expected + + ax = df.plot.bar(stacked=True) + result = [p.get_height() for p in ax.patches] + assert result == expected + + result = [p.get_y() for p in ax.patches] + expected = [0.0, 0.0, 0.0, 10.0, 0.0, 20.0, 15.0, 10.0, 40.0] + assert result == expected + + def test_bar_categorical(self): + # GH 13019 + df1 = DataFrame( + np.random.randn(6, 5), + index=pd.Index(list("ABCDEF")), + columns=pd.Index(list("abcde")), + ) + # categorical index must behave the same + df2 = DataFrame( + np.random.randn(6, 5), + index=pd.CategoricalIndex(list("ABCDEF")), + columns=pd.CategoricalIndex(list("abcde")), + ) + + for df in [df1, df2]: + ax = df.plot.bar() + ticks = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5])) + assert ax.get_xlim() == (-0.5, 5.5) + # check left-edge of bars + assert ax.patches[0].get_x() == -0.25 + assert ax.patches[-1].get_x() == 5.15 + + ax = df.plot.bar(stacked=True) + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5])) + assert ax.get_xlim() == (-0.5, 5.5) + assert ax.patches[0].get_x() == -0.25 + assert ax.patches[-1].get_x() == 4.75 + + def test_plot_scatter(self): + df = DataFrame( + np.random.randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["x", "y", "z", "four"], + ) + + _check_plot_works(df.plot.scatter, x="x", y="y") + _check_plot_works(df.plot.scatter, x=1, y=2) + + msg = re.escape("scatter() missing 1 required positional argument: 'y'") + with pytest.raises(TypeError, match=msg): + df.plot.scatter(x="x") + msg = re.escape("scatter() missing 1 required positional argument: 'x'") + with pytest.raises(TypeError, match=msg): + df.plot.scatter(y="y") + + # GH 6951 + axes = df.plot(x="x", y="y", kind="scatter", subplots=True) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + def test_raise_error_on_datetime_time_data(self): + # GH 8113, datetime.time type is not supported by matplotlib in scatter + df = DataFrame(np.random.randn(10), columns=["a"]) + df["dtime"] = date_range(start="2014-01-01", freq="h", periods=10).time + msg = "must be a string or a (real )?number, not 'datetime.time'" + + with pytest.raises(TypeError, match=msg): + df.plot(kind="scatter", x="dtime", y="a") + + def test_scatterplot_datetime_data(self): + # GH 30391 + dates = date_range(start=date(2019, 1, 1), periods=12, freq="W") + vals = np.random.normal(0, 1, len(dates)) + df = DataFrame({"dates": dates, "vals": vals}) + + _check_plot_works(df.plot.scatter, x="dates", y="vals") + _check_plot_works(df.plot.scatter, x=0, y=1) + + def test_scatterplot_object_data(self): + # GH 18755 + df = DataFrame({"a": ["A", "B", "C"], "b": [2, 3, 4]}) + + _check_plot_works(df.plot.scatter, x="a", y="b") + _check_plot_works(df.plot.scatter, x=0, y=1) + + df = DataFrame({"a": ["A", "B", "C"], "b": ["a", "b", "c"]}) + + _check_plot_works(df.plot.scatter, x="a", y="b") + _check_plot_works(df.plot.scatter, x=0, y=1) + + @pytest.mark.parametrize("ordered", [True, False]) + @pytest.mark.parametrize( + "categories", + (["setosa", "versicolor", "virginica"], ["versicolor", "virginica", "setosa"]), + ) + def test_scatterplot_color_by_categorical(self, ordered, categories): + df = DataFrame( + [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], + columns=["length", "width"], + ) + df["species"] = pd.Categorical( + ["setosa", "setosa", "virginica", "virginica", "versicolor"], + ordered=ordered, + categories=categories, + ) + ax = df.plot.scatter(x=0, y=1, c="species") + (colorbar_collection,) = ax.collections + colorbar = colorbar_collection.colorbar + + expected_ticks = np.array([0.5, 1.5, 2.5]) + result_ticks = colorbar.get_ticks() + tm.assert_numpy_array_equal(result_ticks, expected_ticks) + + expected_boundaries = np.array([0.0, 1.0, 2.0, 3.0]) + result_boundaries = colorbar._boundaries + tm.assert_numpy_array_equal(result_boundaries, expected_boundaries) + + expected_yticklabels = categories + result_yticklabels = [i.get_text() for i in colorbar.ax.get_ymajorticklabels()] + assert all(i == j for i, j in zip(result_yticklabels, expected_yticklabels)) + + @pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")]) + def test_plot_scatter_with_categorical_data(self, x, y): + # after fixing GH 18755, should be able to plot categorical data + df = DataFrame({"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])}) + + _check_plot_works(df.plot.scatter, x=x, y=y) + + def test_plot_scatter_with_c(self, request): + from pandas.plotting._matplotlib.compat import mpl_ge_3_4_0 + + df = DataFrame( + np.random.randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["x", "y", "z", "four"], + ) + + axes = [df.plot.scatter(x="x", y="y", c="z"), df.plot.scatter(x=0, y=1, c=2)] + for ax in axes: + # default to Greys + assert ax.collections[0].cmap.name == "Greys" + + if mpl_ge_3_4_0(): + assert ax.collections[0].colorbar.ax.get_ylabel() == "z" + else: + assert ax.collections[0].colorbar._label == "z" + + cm = "cubehelix" + ax = df.plot.scatter(x="x", y="y", c="z", colormap=cm) + assert ax.collections[0].cmap.name == cm + + # verify turning off colorbar works + ax = df.plot.scatter(x="x", y="y", c="z", colorbar=False) + assert ax.collections[0].colorbar is None + + # verify that we can still plot a solid color + ax = df.plot.scatter(x=0, y=1, c="red") + assert ax.collections[0].colorbar is None + self._check_colors(ax.collections, facecolors=["r"]) + + # Ensure that we can pass an np.array straight through to matplotlib, + # this functionality was accidentally removed previously. + # See https://github.com/pandas-dev/pandas/issues/8852 for bug report + # + # Exercise colormap path and non-colormap path as they are independent + # + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + red_rgba = [1.0, 0.0, 0.0, 1.0] + green_rgba = [0.0, 1.0, 0.0, 1.0] + rgba_array = np.array([red_rgba, green_rgba]) + ax = df.plot.scatter(x="A", y="B", c=rgba_array) + # expect the face colors of the points in the non-colormap path to be + # identical to the values we supplied, normally we'd be on shaky ground + # comparing floats for equality but here we expect them to be + # identical. + tm.assert_numpy_array_equal(ax.collections[0].get_facecolor(), rgba_array) + # we don't test the colors of the faces in this next plot because they + # are dependent on the spring colormap, which may change its colors + # later. + float_array = np.array([0.0, 1.0]) + df.plot.scatter(x="A", y="B", c=float_array, cmap="spring") + + def test_plot_scatter_with_s(self): + # this refers to GH 32904 + df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"]) + + ax = df.plot.scatter(x="a", y="b", s="c") + tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes()) + + def test_plot_bar(self): + df = DataFrame( + np.random.randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["one", "two", "three", "four"], + ) + + _check_plot_works(df.plot.bar) + _check_plot_works(df.plot.bar, legend=False) + _check_plot_works(df.plot.bar, default_axes=True, subplots=True) + _check_plot_works(df.plot.bar, stacked=True) + + df = DataFrame( + np.random.randn(10, 15), + index=list(string.ascii_letters[:10]), + columns=range(15), + ) + _check_plot_works(df.plot.bar) + + df = DataFrame({"a": [0, 1], "b": [1, 0]}) + ax = _check_plot_works(df.plot.bar) + self._check_ticks_props(ax, xrot=90) + + ax = df.plot.bar(rot=35, fontsize=10) + self._check_ticks_props(ax, xrot=35, xlabelsize=10, ylabelsize=10) + + ax = _check_plot_works(df.plot.barh) + self._check_ticks_props(ax, yrot=0) + + ax = df.plot.barh(rot=55, fontsize=11) + self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11) + + def test_boxplot(self): + df = self.hist_df + series = df["height"] + numeric_cols = df._get_numeric_data().columns + labels = [pprint_thing(c) for c in numeric_cols] + + ax = _check_plot_works(df.plot.box) + self._check_text_labels(ax.get_xticklabels(), labels) + tm.assert_numpy_array_equal( + ax.xaxis.get_ticklocs(), np.arange(1, len(numeric_cols) + 1) + ) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) + tm.close() + + axes = series.plot.box(rot=40) + self._check_ticks_props(axes, xrot=40, yrot=0) + tm.close() + + ax = _check_plot_works(series.plot.box) + + positions = np.array([1, 6, 7]) + ax = df.plot.box(positions=positions) + numeric_cols = df._get_numeric_data().columns + labels = [pprint_thing(c) for c in numeric_cols] + self._check_text_labels(ax.get_xticklabels(), labels) + tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) + + def test_boxplot_vertical(self): + df = self.hist_df + numeric_cols = df._get_numeric_data().columns + labels = [pprint_thing(c) for c in numeric_cols] + + # if horizontal, yticklabels are rotated + ax = df.plot.box(rot=50, fontsize=8, vert=False) + self._check_ticks_props(ax, xrot=0, yrot=50, ylabelsize=8) + self._check_text_labels(ax.get_yticklabels(), labels) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) + + axes = _check_plot_works( + df.plot.box, + default_axes=True, + subplots=True, + vert=False, + logx=True, + ) + self._check_axes_shape(axes, axes_num=3, layout=(1, 3)) + self._check_ax_scales(axes, xaxis="log") + for ax, label in zip(axes, labels): + self._check_text_labels(ax.get_yticklabels(), [label]) + assert len(ax.lines) == self.bp_n_objects + + positions = np.array([3, 2, 8]) + ax = df.plot.box(positions=positions, vert=False) + self._check_text_labels(ax.get_yticklabels(), labels) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), positions) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) + + def test_boxplot_return_type(self): + df = DataFrame( + np.random.randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["one", "two", "three", "four"], + ) + msg = "return_type must be {None, 'axes', 'dict', 'both'}" + with pytest.raises(ValueError, match=msg): + df.plot.box(return_type="not_a_type") + + result = df.plot.box(return_type="dict") + self._check_box_return_type(result, "dict") + + result = df.plot.box(return_type="axes") + self._check_box_return_type(result, "axes") + + result = df.plot.box() # default axes + self._check_box_return_type(result, "axes") + + result = df.plot.box(return_type="both") + self._check_box_return_type(result, "both") + + @td.skip_if_no_scipy + def test_kde_df(self): + df = DataFrame(np.random.randn(100, 4)) + ax = _check_plot_works(df.plot, kind="kde") + expected = [pprint_thing(c) for c in df.columns] + self._check_legend_labels(ax, labels=expected) + self._check_ticks_props(ax, xrot=0) + + ax = df.plot(kind="kde", rot=20, fontsize=5) + self._check_ticks_props(ax, xrot=20, xlabelsize=5, ylabelsize=5) + + axes = _check_plot_works( + df.plot, + default_axes=True, + kind="kde", + subplots=True, + ) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + axes = df.plot(kind="kde", logy=True, subplots=True) + self._check_ax_scales(axes, yaxis="log") + + @td.skip_if_no_scipy + def test_kde_missing_vals(self): + df = DataFrame(np.random.uniform(size=(100, 4))) + df.loc[0, 0] = np.nan + _check_plot_works(df.plot, kind="kde") + + def test_hist_df(self): + from matplotlib.patches import Rectangle + + df = DataFrame(np.random.randn(100, 4)) + series = df[0] + + ax = _check_plot_works(df.plot.hist) + expected = [pprint_thing(c) for c in df.columns] + self._check_legend_labels(ax, labels=expected) + + axes = _check_plot_works( + df.plot.hist, + default_axes=True, + subplots=True, + logy=True, + ) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + self._check_ax_scales(axes, yaxis="log") + + axes = series.plot.hist(rot=40) + self._check_ticks_props(axes, xrot=40, yrot=0) + tm.close() + + ax = series.plot.hist(cumulative=True, bins=4, density=True) + # height of last bin (index 5) must be 1.0 + rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] + tm.assert_almost_equal(rects[-1].get_height(), 1.0) + tm.close() + + ax = series.plot.hist(cumulative=True, bins=4) + rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] + + tm.assert_almost_equal(rects[-2].get_height(), 100.0) + tm.close() + + # if horizontal, yticklabels are rotated + axes = df.plot.hist(rot=50, fontsize=8, orientation="horizontal") + self._check_ticks_props(axes, xrot=0, yrot=50, ylabelsize=8) + + @pytest.mark.parametrize( + "weights", [0.1 * np.ones(shape=(100,)), 0.1 * np.ones(shape=(100, 2))] + ) + def test_hist_weights(self, weights): + # GH 33173 + np.random.seed(0) + df = DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) + + ax1 = _check_plot_works(df.plot, kind="hist", weights=weights) + ax2 = _check_plot_works(df.plot, kind="hist") + + patch_height_with_weights = [patch.get_height() for patch in ax1.patches] + + # original heights with no weights, and we manually multiply with example + # weights, so after multiplication, they should be almost same + expected_patch_height = [0.1 * patch.get_height() for patch in ax2.patches] + + tm.assert_almost_equal(patch_height_with_weights, expected_patch_height) + + def _check_box_coord( + self, + patches, + expected_y=None, + expected_h=None, + expected_x=None, + expected_w=None, + ): + result_y = np.array([p.get_y() for p in patches]) + result_height = np.array([p.get_height() for p in patches]) + result_x = np.array([p.get_x() for p in patches]) + result_width = np.array([p.get_width() for p in patches]) + # dtype is depending on above values, no need to check + + if expected_y is not None: + tm.assert_numpy_array_equal(result_y, expected_y, check_dtype=False) + if expected_h is not None: + tm.assert_numpy_array_equal(result_height, expected_h, check_dtype=False) + if expected_x is not None: + tm.assert_numpy_array_equal(result_x, expected_x, check_dtype=False) + if expected_w is not None: + tm.assert_numpy_array_equal(result_width, expected_w, check_dtype=False) + + def test_hist_df_coord(self): + normal_df = DataFrame( + { + "A": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([10, 9, 8, 7, 6])), + "B": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([8, 8, 8, 8, 8])), + "C": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([6, 7, 8, 9, 10])), + }, + columns=["A", "B", "C"], + ) + + nan_df = DataFrame( + { + "A": np.repeat( + np.array([np.nan, 1, 2, 3, 4, 5]), np.array([3, 10, 9, 8, 7, 6]) + ), + "B": np.repeat( + np.array([1, np.nan, 2, 3, 4, 5]), np.array([8, 3, 8, 8, 8, 8]) + ), + "C": np.repeat( + np.array([1, 2, 3, np.nan, 4, 5]), np.array([6, 7, 8, 3, 9, 10]) + ), + }, + columns=["A", "B", "C"], + ) + + for df in [normal_df, nan_df]: + ax = df.plot.hist(bins=5) + self._check_box_coord( + ax.patches[:5], + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + ax.patches[5:10], + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + ax.patches[10:], + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([6, 7, 8, 9, 10]), + ) + + ax = df.plot.hist(bins=5, stacked=True) + self._check_box_coord( + ax.patches[:5], + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + ax.patches[5:10], + expected_y=np.array([10, 9, 8, 7, 6]), + expected_h=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + ax.patches[10:], + expected_y=np.array([18, 17, 16, 15, 14]), + expected_h=np.array([6, 7, 8, 9, 10]), + ) + + axes = df.plot.hist(bins=5, stacked=True, subplots=True) + self._check_box_coord( + axes[0].patches, + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + axes[1].patches, + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + axes[2].patches, + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([6, 7, 8, 9, 10]), + ) + + # horizontal + ax = df.plot.hist(bins=5, orientation="horizontal") + self._check_box_coord( + ax.patches[:5], + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + ax.patches[5:10], + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + ax.patches[10:], + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([6, 7, 8, 9, 10]), + ) + + ax = df.plot.hist(bins=5, stacked=True, orientation="horizontal") + self._check_box_coord( + ax.patches[:5], + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + ax.patches[5:10], + expected_x=np.array([10, 9, 8, 7, 6]), + expected_w=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + ax.patches[10:], + expected_x=np.array([18, 17, 16, 15, 14]), + expected_w=np.array([6, 7, 8, 9, 10]), + ) + + axes = df.plot.hist( + bins=5, stacked=True, subplots=True, orientation="horizontal" + ) + self._check_box_coord( + axes[0].patches, + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + axes[1].patches, + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + axes[2].patches, + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([6, 7, 8, 9, 10]), + ) + + def test_plot_int_columns(self): + df = DataFrame(np.random.randn(100, 4)).cumsum() + _check_plot_works(df.plot, legend=True) + + def test_style_by_column(self): + import matplotlib.pyplot as plt + + fig = plt.gcf() + + df = DataFrame(np.random.randn(100, 3)) + for markers in [ + {0: "^", 1: "+", 2: "o"}, + {0: "^", 1: "+"}, + ["^", "+", "o"], + ["^", "+"], + ]: + fig.clf() + fig.add_subplot(111) + ax = df.plot(style=markers) + for idx, line in enumerate(ax.get_lines()[: len(markers)]): + assert line.get_marker() == markers[idx] + + def test_line_label_none(self): + s = Series([1, 2]) + ax = s.plot() + assert ax.get_legend() is None + + ax = s.plot(legend=True) + assert ax.get_legend().get_texts()[0].get_text() == "None" + + @pytest.mark.parametrize( + "props, expected", + [ + ("boxprops", "boxes"), + ("whiskerprops", "whiskers"), + ("capprops", "caps"), + ("medianprops", "medians"), + ], + ) + def test_specified_props_kwd_plot_box(self, props, expected): + # GH 30346 + df = DataFrame({k: np.random.random(100) for k in "ABC"}) + kwd = {props: {"color": "C1"}} + result = df.plot.box(return_type="dict", **kwd) + + assert result[expected][0].get_color() == "C1" + + def test_unordered_ts(self): + df = DataFrame( + np.array([3.0, 2.0, 1.0]), + index=[date(2012, 10, 1), date(2012, 9, 1), date(2012, 8, 1)], + columns=["test"], + ) + ax = df.plot() + xticks = ax.lines[0].get_xdata() + assert xticks[0] < xticks[1] + ydata = ax.lines[0].get_ydata() + tm.assert_numpy_array_equal(ydata, np.array([1.0, 2.0, 3.0])) + + @td.skip_if_no_scipy + def test_kind_both_ways(self): + df = DataFrame({"x": [1, 2, 3]}) + for kind in plotting.PlotAccessor._common_kinds: + + df.plot(kind=kind) + getattr(df.plot, kind)() + for kind in ["scatter", "hexbin"]: + df.plot("x", "x", kind=kind) + getattr(df.plot, kind)("x", "x") + + def test_all_invalid_plot_data(self): + df = DataFrame(list("abcd")) + for kind in plotting.PlotAccessor._common_kinds: + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + df.plot(kind=kind) + + def test_partially_invalid_plot_data(self): + with tm.RNGContext(42): + df = DataFrame(np.random.randn(10, 2), dtype=object) + df[np.random.rand(df.shape[0]) > 0.5] = "a" + for kind in plotting.PlotAccessor._common_kinds: + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + df.plot(kind=kind) + + with tm.RNGContext(42): + # area plot doesn't support positive/negative mixed data + df = DataFrame(np.random.rand(10, 2), dtype=object) + df[np.random.rand(df.shape[0]) > 0.5] = "a" + with pytest.raises(TypeError, match="no numeric data to plot"): + df.plot(kind="area") + + def test_invalid_kind(self): + df = DataFrame(np.random.randn(10, 2)) + msg = "invalid_plot_kind is not a valid plot kind" + with pytest.raises(ValueError, match=msg): + df.plot(kind="invalid_plot_kind") + + @pytest.mark.parametrize( + "x,y,lbl", + [ + (["B", "C"], "A", "a"), + (["A"], ["B", "C"], ["b", "c"]), + ], + ) + def test_invalid_xy_args(self, x, y, lbl): + # GH 18671, 19699 allows y to be list-like but not x + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + with pytest.raises(ValueError, match="x must be a label or position"): + df.plot(x=x, y=y, label=lbl) + + def test_bad_label(self): + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + msg = "label should be list-like and same length as y" + with pytest.raises(ValueError, match=msg): + df.plot(x="A", y=["B", "C"], label="bad_label") + + @pytest.mark.parametrize("x,y", [("A", "B"), (["A"], "B")]) + def test_invalid_xy_args_dup_cols(self, x, y): + # GH 18671, 19699 allows y to be list-like but not x + df = DataFrame([[1, 3, 5], [2, 4, 6]], columns=list("AAB")) + with pytest.raises(ValueError, match="x must be a label or position"): + df.plot(x=x, y=y) + + @pytest.mark.parametrize( + "x,y,lbl,colors", + [ + ("A", ["B"], ["b"], ["red"]), + ("A", ["B", "C"], ["b", "c"], ["red", "blue"]), + (0, [1, 2], ["bokeh", "cython"], ["green", "yellow"]), + ], + ) + def test_y_listlike(self, x, y, lbl, colors): + # GH 19699: tests list-like y and verifies lbls & colors + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + _check_plot_works(df.plot, x="A", y=y, label=lbl) + + ax = df.plot(x=x, y=y, label=lbl, color=colors) + assert len(ax.lines) == len(y) + self._check_colors(ax.get_lines(), linecolors=colors) + + @pytest.mark.parametrize("x,y,colnames", [(0, 1, ["A", "B"]), (1, 0, [0, 1])]) + def test_xy_args_integer(self, x, y, colnames): + # GH 20056: tests integer args for xy and checks col names + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + df.columns = colnames + _check_plot_works(df.plot, x=x, y=y) + + def test_hexbin_basic(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", gridsize=10) + # TODO: need better way to test. This just does existence. + assert len(ax.collections) == 1 + + # GH 6951 + axes = df.plot.hexbin(x="A", y="B", subplots=True) + # hexbin should have 2 axes in the figure, 1 for plotting and another + # is colorbar + assert len(axes[0].figure.axes) == 2 + # return value is single axes + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + def test_hexbin_with_c(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", C="C") + assert len(ax.collections) == 1 + + ax = df.plot.hexbin(x="A", y="B", C="C", reduce_C_function=np.std) + assert len(ax.collections) == 1 + + @pytest.mark.parametrize( + "kwargs, expected", + [ + ({}, "BuGn"), # default cmap + ({"colormap": "cubehelix"}, "cubehelix"), + ({"cmap": "YlGn"}, "YlGn"), + ], + ) + def test_hexbin_cmap(self, kwargs, expected): + df = self.hexbin_df + ax = df.plot.hexbin(x="A", y="B", **kwargs) + assert ax.collections[0].cmap.name == expected + + def test_pie_df(self): + df = DataFrame( + np.random.rand(5, 3), + columns=["X", "Y", "Z"], + index=["a", "b", "c", "d", "e"], + ) + msg = "pie requires either y column or 'subplots=True'" + with pytest.raises(ValueError, match=msg): + df.plot.pie() + + ax = _check_plot_works(df.plot.pie, y="Y") + self._check_text_labels(ax.texts, df.index) + + ax = _check_plot_works(df.plot.pie, y=2) + self._check_text_labels(ax.texts, df.index) + + axes = _check_plot_works( + df.plot.pie, + default_axes=True, + subplots=True, + ) + assert len(axes) == len(df.columns) + for ax in axes: + self._check_text_labels(ax.texts, df.index) + for ax, ylabel in zip(axes, df.columns): + assert ax.get_ylabel() == ylabel + + labels = ["A", "B", "C", "D", "E"] + color_args = ["r", "g", "b", "c", "m"] + axes = _check_plot_works( + df.plot.pie, + default_axes=True, + subplots=True, + labels=labels, + colors=color_args, + ) + assert len(axes) == len(df.columns) + + for ax in axes: + self._check_text_labels(ax.texts, labels) + self._check_colors(ax.patches, facecolors=color_args) + + def test_pie_df_nan(self): + import matplotlib as mpl + + df = DataFrame(np.random.rand(4, 4)) + for i in range(4): + df.iloc[i, i] = np.nan + fig, axes = self.plt.subplots(ncols=4) + + # GH 37668 + kwargs = {} + if mpl.__version__ >= "3.3": + kwargs = {"normalize": True} + + with tm.assert_produces_warning(None): + df.plot.pie(subplots=True, ax=axes, legend=True, **kwargs) + + base_expected = ["0", "1", "2", "3"] + for i, ax in enumerate(axes): + expected = list(base_expected) # force copy + expected[i] = "" + result = [x.get_text() for x in ax.texts] + assert result == expected + + # legend labels + # NaN's not included in legend with subplots + # see https://github.com/pandas-dev/pandas/issues/8390 + result_labels = [x.get_text() for x in ax.get_legend().get_texts()] + expected_labels = base_expected[:i] + base_expected[i + 1 :] + assert result_labels == expected_labels + + def test_errorbar_plot(self): + d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} + df = DataFrame(d) + d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4} + df_err = DataFrame(d_err) + + # check line plots + ax = _check_plot_works(df.plot, yerr=df_err, logy=True) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works(df.plot, yerr=df_err, loglog=True) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works( + (df + 1).plot, yerr=df_err, xerr=df_err, kind="bar", log=True + ) + self._check_has_errorbars(ax, xerr=2, yerr=2) + + # yerr is raw error values + ax = _check_plot_works(df["y"].plot, yerr=np.ones(12) * 0.4) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + # yerr is column name + for yerr in ["yerr", "誤差"]: + s_df = df.copy() + s_df[yerr] = np.ones(12) * 0.2 + + ax = _check_plot_works(s_df.plot, yerr=yerr) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works(s_df.plot, y="y", x="x", yerr=yerr) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + with tm.external_error_raised(ValueError): + df.plot(yerr=np.random.randn(11)) + + df_err = DataFrame({"x": ["zzz"] * 12, "y": ["zzz"] * 12}) + with tm.external_error_raised(TypeError): + df.plot(yerr=df_err) + + @pytest.mark.parametrize("kind", ["line", "bar", "barh"]) + def test_errorbar_plot_different_kinds(self, kind): + d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} + df = DataFrame(d) + d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4} + df_err = DataFrame(d_err) + + ax = _check_plot_works(df.plot, yerr=df_err["x"], kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works(df.plot, yerr=d_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + + ax = _check_plot_works(df.plot, yerr=df_err["x"], xerr=df_err["x"], kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + + ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + + axes = _check_plot_works( + df.plot, + default_axes=True, + yerr=df_err, + xerr=df_err, + subplots=True, + kind=kind, + ) + self._check_has_errorbars(axes, xerr=1, yerr=1) + + @pytest.mark.xfail(reason="Iterator is consumed", raises=ValueError) + def test_errorbar_plot_iterator(self): + with warnings.catch_warnings(): + d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} + df = DataFrame(d) + + # yerr is iterator + ax = _check_plot_works(df.plot, yerr=itertools.repeat(0.1, len(df))) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + def test_errorbar_with_integer_column_names(self): + # test with integer column names + df = DataFrame(np.random.randn(10, 2)) + df_err = DataFrame(np.random.randn(10, 2)) + ax = _check_plot_works(df.plot, yerr=df_err) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(df.plot, y=0, yerr=1) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + def test_errorbar_with_partial_columns(self): + df = DataFrame(np.random.randn(10, 3)) + df_err = DataFrame(np.random.randn(10, 2), columns=[0, 2]) + kinds = ["line", "bar"] + for kind in kinds: + ax = _check_plot_works(df.plot, yerr=df_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ix = date_range("1/1/2000", periods=10, freq="M") + df.set_index(ix, inplace=True) + df_err.set_index(ix, inplace=True) + ax = _check_plot_works(df.plot, yerr=df_err, kind="line") + self._check_has_errorbars(ax, xerr=0, yerr=2) + + d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} + df = DataFrame(d) + d_err = {"x": np.ones(12) * 0.2, "z": np.ones(12) * 0.4} + df_err = DataFrame(d_err) + for err in [d_err, df_err]: + ax = _check_plot_works(df.plot, yerr=err) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + @pytest.mark.parametrize("kind", ["line", "bar", "barh"]) + def test_errorbar_timeseries(self, kind): + d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} + d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4} + + # check time-series plots + ix = date_range("1/1/2000", "1/1/2001", freq="M") + tdf = DataFrame(d, index=ix) + tdf_err = DataFrame(d_err, index=ix) + + ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works(tdf.plot, y="y", yerr=tdf_err["x"], kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + ax = _check_plot_works(tdf.plot, y="y", yerr="x", kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + axes = _check_plot_works( + tdf.plot, + default_axes=True, + kind=kind, + yerr=tdf_err, + subplots=True, + ) + self._check_has_errorbars(axes, xerr=0, yerr=1) + + def test_errorbar_asymmetrical(self): + np.random.seed(0) + err = np.random.rand(3, 2, 5) + + # each column is [0, 1, 2, 3, 4], [3, 4, 5, 6, 7]... + df = DataFrame(np.arange(15).reshape(3, 5)).T + + ax = df.plot(yerr=err, xerr=err / 2) + + yerr_0_0 = ax.collections[1].get_paths()[0].vertices[:, 1] + expected_0_0 = err[0, :, 0] * np.array([-1, 1]) + tm.assert_almost_equal(yerr_0_0, expected_0_0) + + msg = re.escape( + "Asymmetrical error bars should be provided with the shape (3, 2, 5)" + ) + with pytest.raises(ValueError, match=msg): + df.plot(yerr=err.T) + + tm.close() + + def test_table(self): + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + _check_plot_works(df.plot, table=True) + _check_plot_works(df.plot, table=df) + + # GH 35945 UserWarning + with tm.assert_produces_warning(None): + ax = df.plot() + assert len(ax.tables) == 0 + plotting.table(ax, df.T) + assert len(ax.tables) == 1 + + def test_errorbar_scatter(self): + df = DataFrame(np.random.randn(5, 2), index=range(5), columns=["x", "y"]) + df_err = DataFrame( + np.random.randn(5, 2) / 5, index=range(5), columns=["x", "y"] + ) + + ax = _check_plot_works(df.plot.scatter, x="x", y="y") + self._check_has_errorbars(ax, xerr=0, yerr=0) + ax = _check_plot_works(df.plot.scatter, x="x", y="y", xerr=df_err) + self._check_has_errorbars(ax, xerr=1, yerr=0) + + ax = _check_plot_works(df.plot.scatter, x="x", y="y", yerr=df_err) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(df.plot.scatter, x="x", y="y", xerr=df_err, yerr=df_err) + self._check_has_errorbars(ax, xerr=1, yerr=1) + + def _check_errorbar_color(containers, expected, has_err="has_xerr"): + lines = [] + errs = [c.lines for c in ax.containers if getattr(c, has_err, False)][0] + for el in errs: + if is_list_like(el): + lines.extend(el) + else: + lines.append(el) + err_lines = [x for x in lines if x in ax.collections] + self._check_colors( + err_lines, linecolors=np.array([expected] * len(err_lines)) + ) + + # GH 8081 + df = DataFrame(np.random.randn(10, 5), columns=["a", "b", "c", "d", "e"]) + ax = df.plot.scatter(x="a", y="b", xerr="d", yerr="e", c="red") + self._check_has_errorbars(ax, xerr=1, yerr=1) + _check_errorbar_color(ax.containers, "red", has_err="has_xerr") + _check_errorbar_color(ax.containers, "red", has_err="has_yerr") + + ax = df.plot.scatter(x="a", y="b", yerr="e", color="green") + self._check_has_errorbars(ax, xerr=0, yerr=1) + _check_errorbar_color(ax.containers, "green", has_err="has_yerr") + + def test_sharex_and_ax(self): + # https://github.com/pandas-dev/pandas/issues/9737 using gridspec, + # the axis in fig.get_axis() are sorted differently than pandas + # expected them, so make sure that only the right ones are removed + import matplotlib.pyplot as plt + + plt.close("all") + gs, axes = _generate_4_axes_via_gridspec() + + df = DataFrame( + { + "a": [1, 2, 3, 4, 5, 6], + "b": [1, 2, 3, 4, 5, 6], + "c": [1, 2, 3, 4, 5, 6], + "d": [1, 2, 3, 4, 5, 6], + } + ) + + def _check(axes): + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + for ax in [axes[0], axes[2]]: + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + for ax in [axes[1], axes[3]]: + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + + for ax in axes: + df.plot(x="a", y="b", title="title", ax=ax, sharex=True) + gs.tight_layout(plt.gcf()) + _check(axes) + tm.close() + + gs, axes = _generate_4_axes_via_gridspec() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True) + _check(axes) + tm.close() + + gs, axes = _generate_4_axes_via_gridspec() + # without sharex, no labels should be touched! + for ax in axes: + df.plot(x="a", y="b", title="title", ax=ax) + + gs.tight_layout(plt.gcf()) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + def test_sharey_and_ax(self): + # https://github.com/pandas-dev/pandas/issues/9737 using gridspec, + # the axis in fig.get_axis() are sorted differently than pandas + # expected them, so make sure that only the right ones are removed + import matplotlib.pyplot as plt + + gs, axes = _generate_4_axes_via_gridspec() + + df = DataFrame( + { + "a": [1, 2, 3, 4, 5, 6], + "b": [1, 2, 3, 4, 5, 6], + "c": [1, 2, 3, 4, 5, 6], + "d": [1, 2, 3, 4, 5, 6], + } + ) + + def _check(axes): + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + for ax in [axes[0], axes[1]]: + self._check_visible(ax.get_yticklabels(), visible=True) + for ax in [axes[2], axes[3]]: + self._check_visible(ax.get_yticklabels(), visible=False) + + for ax in axes: + df.plot(x="a", y="b", title="title", ax=ax, sharey=True) + gs.tight_layout(plt.gcf()) + _check(axes) + tm.close() + + gs, axes = _generate_4_axes_via_gridspec() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharey=True) + + gs.tight_layout(plt.gcf()) + _check(axes) + tm.close() + + gs, axes = _generate_4_axes_via_gridspec() + # without sharex, no labels should be touched! + for ax in axes: + df.plot(x="a", y="b", title="title", ax=ax) + + gs.tight_layout(plt.gcf()) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + + @td.skip_if_no_scipy + def test_memory_leak(self): + """Check that every plot type gets properly collected.""" + import gc + import weakref + + results = {} + for kind in plotting.PlotAccessor._all_kinds: + + args = {} + if kind in ["hexbin", "scatter", "pie"]: + df = self.hexbin_df + args = {"x": "A", "y": "B"} + elif kind == "area": + df = self.tdf.abs() + else: + df = self.tdf + + # Use a weakref so we can see if the object gets collected without + # also preventing it from being collected + results[kind] = weakref.proxy(df.plot(kind=kind, **args)) + + # have matplotlib delete all the figures + tm.close() + # force a garbage collection + gc.collect() + msg = "weakly-referenced object no longer exists" + for key in results: + # check that every plot was collected + with pytest.raises(ReferenceError, match=msg): + # need to actually access something to get an error + results[key].lines + + def test_df_gridspec_patterns(self): + # GH 10819 + import matplotlib.gridspec as gridspec + import matplotlib.pyplot as plt + + ts = Series(np.random.randn(10), index=date_range("1/1/2000", periods=10)) + + df = DataFrame(np.random.randn(10, 2), index=ts.index, columns=list("AB")) + + def _get_vertical_grid(): + gs = gridspec.GridSpec(3, 1) + fig = plt.figure() + ax1 = fig.add_subplot(gs[:2, :]) + ax2 = fig.add_subplot(gs[2, :]) + return ax1, ax2 + + def _get_horizontal_grid(): + gs = gridspec.GridSpec(1, 3) + fig = plt.figure() + ax1 = fig.add_subplot(gs[:, :2]) + ax2 = fig.add_subplot(gs[:, 2]) + return ax1, ax2 + + for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]: + ax1 = ts.plot(ax=ax1) + assert len(ax1.lines) == 1 + ax2 = df.plot(ax=ax2) + assert len(ax2.lines) == 2 + for ax in [ax1, ax2]: + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + # subplots=True + for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]: + axes = df.plot(subplots=True, ax=[ax1, ax2]) + assert len(ax1.lines) == 1 + assert len(ax2.lines) == 1 + for ax in axes: + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + # vertical / subplots / sharex=True / sharey=True + ax1, ax2 = _get_vertical_grid() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True) + assert len(axes[0].lines) == 1 + assert len(axes[1].lines) == 1 + for ax in [ax1, ax2]: + # yaxis are visible because there is only one column + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of axes0 (top) are hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + # horizontal / subplots / sharex=True / sharey=True + ax1, ax2 = _get_horizontal_grid() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True) + assert len(axes[0].lines) == 1 + assert len(axes[1].lines) == 1 + self._check_visible(axes[0].get_yticklabels(), visible=True) + # yaxis of axes1 (right) are hidden + self._check_visible(axes[1].get_yticklabels(), visible=False) + for ax in [ax1, ax2]: + # xaxis are visible because there is only one column + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + # boxed + def _get_boxed_grid(): + gs = gridspec.GridSpec(3, 3) + fig = plt.figure() + ax1 = fig.add_subplot(gs[:2, :2]) + ax2 = fig.add_subplot(gs[:2, 2]) + ax3 = fig.add_subplot(gs[2, :2]) + ax4 = fig.add_subplot(gs[2, 2]) + return ax1, ax2, ax3, ax4 + + axes = _get_boxed_grid() + df = DataFrame(np.random.randn(10, 4), index=ts.index, columns=list("ABCD")) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + # axis are visible because these are not shared + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + # subplots / sharex=True / sharey=True + axes = _get_boxed_grid() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True, sharey=True) + for ax in axes: + assert len(ax.lines) == 1 + for ax in [axes[0], axes[2]]: # left column + self._check_visible(ax.get_yticklabels(), visible=True) + for ax in [axes[1], axes[3]]: # right column + self._check_visible(ax.get_yticklabels(), visible=False) + for ax in [axes[0], axes[1]]: # top row + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + for ax in [axes[2], axes[3]]: # bottom row + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + def test_df_grid_settings(self): + # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 + self._check_grid_settings( + DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]}), + plotting.PlotAccessor._dataframe_kinds, + kws={"x": "a", "y": "b"}, + ) + + def test_plain_axes(self): + + # supplied ax itself is a SubplotAxes, but figure contains also + # a plain Axes object (GH11556) + fig, ax = self.plt.subplots() + fig.add_axes([0.2, 0.2, 0.2, 0.2]) + Series(np.random.rand(10)).plot(ax=ax) + + # supplied ax itself is a plain Axes, but because the cmap keyword + # a new ax is created for the colorbar -> also multiples axes (GH11520) + df = DataFrame({"a": np.random.randn(8), "b": np.random.randn(8)}) + fig = self.plt.figure() + ax = fig.add_axes((0, 0, 1, 1)) + df.plot(kind="scatter", ax=ax, x="a", y="b", c="a", cmap="hsv") + + # other examples + fig, ax = self.plt.subplots() + from mpl_toolkits.axes_grid1 import make_axes_locatable + + divider = make_axes_locatable(ax) + cax = divider.append_axes("right", size="5%", pad=0.05) + Series(np.random.rand(10)).plot(ax=ax) + Series(np.random.rand(10)).plot(ax=cax) + + fig, ax = self.plt.subplots() + from mpl_toolkits.axes_grid1.inset_locator import inset_axes + + iax = inset_axes(ax, width="30%", height=1.0, loc=3) + Series(np.random.rand(10)).plot(ax=ax) + Series(np.random.rand(10)).plot(ax=iax) + + @pytest.mark.parametrize("method", ["line", "barh", "bar"]) + def test_secondary_axis_font_size(self, method): + # GH: 12565 + df = ( + DataFrame(np.random.randn(15, 2), columns=list("AB")) + .assign(C=lambda df: df.B.cumsum()) + .assign(D=lambda df: df.C * 1.1) + ) + + fontsize = 20 + sy = ["C", "D"] + + kwargs = {"secondary_y": sy, "fontsize": fontsize, "mark_right": True} + ax = getattr(df.plot, method)(**kwargs) + self._check_ticks_props(axes=ax.right_ax, ylabelsize=fontsize) + + def test_x_string_values_ticks(self): + # Test if string plot index have a fixed xtick position + # GH: 7612, GH: 22334 + df = DataFrame( + { + "sales": [3, 2, 3], + "visits": [20, 42, 28], + "day": ["Monday", "Tuesday", "Wednesday"], + } + ) + ax = df.plot.area(x="day") + ax.set_xlim(-1, 3) + xticklabels = [t.get_text() for t in ax.get_xticklabels()] + labels_position = dict(zip(xticklabels, ax.get_xticks())) + # Testing if the label stayed at the right position + assert labels_position["Monday"] == 0.0 + assert labels_position["Tuesday"] == 1.0 + assert labels_position["Wednesday"] == 2.0 + + def test_x_multiindex_values_ticks(self): + # Test if multiindex plot index have a fixed xtick position + # GH: 15912 + index = MultiIndex.from_product([[2012, 2013], [1, 2]]) + df = DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) + ax = df.plot() + ax.set_xlim(-1, 4) + xticklabels = [t.get_text() for t in ax.get_xticklabels()] + labels_position = dict(zip(xticklabels, ax.get_xticks())) + # Testing if the label stayed at the right position + assert labels_position["(2012, 1)"] == 0.0 + assert labels_position["(2012, 2)"] == 1.0 + assert labels_position["(2013, 1)"] == 2.0 + assert labels_position["(2013, 2)"] == 3.0 + + @pytest.mark.parametrize("kind", ["line", "area"]) + def test_xlim_plot_line(self, kind): + # test if xlim is set correctly in plot.line and plot.area + # GH 27686 + df = DataFrame([2, 4], index=[1, 2]) + ax = df.plot(kind=kind) + xlims = ax.get_xlim() + assert xlims[0] < 1 + assert xlims[1] > 2 + + def test_xlim_plot_line_correctly_in_mixed_plot_type(self): + # test if xlim is set correctly when ax contains multiple different kinds + # of plots, GH 27686 + fig, ax = self.plt.subplots() + + indexes = ["k1", "k2", "k3", "k4"] + df = DataFrame( + { + "s1": [1000, 2000, 1500, 2000], + "s2": [900, 1400, 2000, 3000], + "s3": [1500, 1500, 1600, 1200], + "secondary_y": [1, 3, 4, 3], + }, + index=indexes, + ) + df[["s1", "s2", "s3"]].plot.bar(ax=ax, stacked=False) + df[["secondary_y"]].plot(ax=ax, secondary_y=True) + + xlims = ax.get_xlim() + assert xlims[0] < 0 + assert xlims[1] > 3 + + # make sure axis labels are plotted correctly as well + xticklabels = [t.get_text() for t in ax.get_xticklabels()] + assert xticklabels == indexes + + def test_plot_no_rows(self): + # GH 27758 + df = DataFrame(columns=["foo"], dtype=int) + assert df.empty + ax = df.plot() + assert len(ax.get_lines()) == 1 + line = ax.get_lines()[0] + assert len(line.get_xdata()) == 0 + assert len(line.get_ydata()) == 0 + + def test_plot_no_numeric_data(self): + df = DataFrame(["a", "b", "c"]) + with pytest.raises(TypeError, match="no numeric data to plot"): + df.plot() + + @pytest.mark.parametrize( + "index_name, old_label, new_label", + [ + (None, "", "new"), + ("old", "old", "new"), + (None, "", ""), + (None, "", 1), + (None, "", [1, 2]), + ], + ) + @pytest.mark.parametrize("kind", ["line", "area", "bar"]) + def test_xlabel_ylabel_dataframe_single_plot( + self, kind, index_name, old_label, new_label + ): + # GH 9093 + df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df.index.name = index_name + + # default is the ylabel is not shown and xlabel is index name + ax = df.plot(kind=kind) + assert ax.get_xlabel() == old_label + assert ax.get_ylabel() == "" + + # old xlabel will be overridden and assigned ylabel will be used as ylabel + ax = df.plot(kind=kind, ylabel=new_label, xlabel=new_label) + assert ax.get_ylabel() == str(new_label) + assert ax.get_xlabel() == str(new_label) + + @pytest.mark.parametrize( + "xlabel, ylabel", + [ + (None, None), + ("X Label", None), + (None, "Y Label"), + ("X Label", "Y Label"), + ], + ) + @pytest.mark.parametrize("kind", ["scatter", "hexbin"]) + def test_xlabel_ylabel_dataframe_plane_plot(self, kind, xlabel, ylabel): + # GH 37001 + xcol = "Type A" + ycol = "Type B" + df = DataFrame([[1, 2], [2, 5]], columns=[xcol, ycol]) + + # default is the labels are column names + ax = df.plot(kind=kind, x=xcol, y=ycol, xlabel=xlabel, ylabel=ylabel) + assert ax.get_xlabel() == (xcol if xlabel is None else xlabel) + assert ax.get_ylabel() == (ycol if ylabel is None else ylabel) + + +def _generate_4_axes_via_gridspec(): + import matplotlib as mpl + import matplotlib.gridspec + import matplotlib.pyplot as plt + + gs = mpl.gridspec.GridSpec(2, 2) + ax_tl = plt.subplot(gs[0, 0]) + ax_ll = plt.subplot(gs[1, 0]) + ax_tr = plt.subplot(gs[0, 1]) + ax_lr = plt.subplot(gs[1, 1]) + + return gs, [ax_tl, ax_ll, ax_tr, ax_lr] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame_color.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame_color.py new file mode 100644 index 0000000..b46bb95 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame_color.py @@ -0,0 +1,664 @@ +""" Test cases for DataFrame.plot """ +import re +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm +from pandas.tests.plotting.common import ( + TestPlotBase, + _check_plot_works, +) + +pytestmark = pytest.mark.slow + + +@td.skip_if_no_mpl +class TestDataFrameColor(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def test_mpl2_color_cycle_str(self): + # GH 15516 + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always", "MatplotlibDeprecationWarning") + + for color in colors: + _check_plot_works(df.plot, color=color) + + # if warning is raised, check that it is the exact problematic one + # GH 36972 + if w: + match = "Support for uppercase single-letter colors is deprecated" + warning_message = str(w[0].message) + msg = "MatplotlibDeprecationWarning related to CN colors was raised" + assert match not in warning_message, msg + + def test_color_single_series_list(self): + # GH 3486 + df = DataFrame({"A": [1, 2, 3]}) + _check_plot_works(df.plot, color=["red"]) + + @pytest.mark.parametrize("color", [(1, 0, 0), (1, 0, 0, 0.5)]) + def test_rgb_tuple_color(self, color): + # GH 16695 + df = DataFrame({"x": [1, 2], "y": [3, 4]}) + _check_plot_works(df.plot, x="x", y="y", color=color) + + def test_color_empty_string(self): + df = DataFrame(np.random.randn(10, 2)) + with pytest.raises(ValueError, match="Invalid color argument:"): + df.plot(color="") + + def test_color_and_style_arguments(self): + df = DataFrame({"x": [1, 2], "y": [3, 4]}) + # passing both 'color' and 'style' arguments should be allowed + # if there is no color symbol in the style strings: + ax = df.plot(color=["red", "black"], style=["-", "--"]) + # check that the linestyles are correctly set: + linestyle = [line.get_linestyle() for line in ax.lines] + assert linestyle == ["-", "--"] + # check that the colors are correctly set: + color = [line.get_color() for line in ax.lines] + assert color == ["red", "black"] + # passing both 'color' and 'style' arguments should not be allowed + # if there is a color symbol in the style strings: + msg = ( + "Cannot pass 'style' string with a color symbol and 'color' keyword " + "argument. Please use one or the other or pass 'style' without a color " + "symbol" + ) + with pytest.raises(ValueError, match=msg): + df.plot(color=["red", "black"], style=["k-", "r--"]) + + @pytest.mark.parametrize( + "color, expected", + [ + ("green", ["green"] * 4), + (["yellow", "red", "green", "blue"], ["yellow", "red", "green", "blue"]), + ], + ) + def test_color_and_marker(self, color, expected): + # GH 21003 + df = DataFrame(np.random.random((7, 4))) + ax = df.plot(color=color, style="d--") + # check colors + result = [i.get_color() for i in ax.lines] + assert result == expected + # check markers and linestyles + assert all(i.get_linestyle() == "--" for i in ax.lines) + assert all(i.get_marker() == "d" for i in ax.lines) + + def test_bar_colors(self): + import matplotlib.pyplot as plt + + default_colors = self._unpack_cycler(plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + ax = df.plot.bar() + self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) + tm.close() + + custom_colors = "rgcby" + ax = df.plot.bar(color=custom_colors) + self._check_colors(ax.patches[::5], facecolors=custom_colors) + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + ax = df.plot.bar(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::5], facecolors=rgba_colors) + tm.close() + + # Test colormap functionality + ax = df.plot.bar(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::5], facecolors=rgba_colors) + tm.close() + + ax = df.loc[:, [0]].plot.bar(color="DodgerBlue") + self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) + tm.close() + + ax = df.plot(kind="bar", color="green") + self._check_colors(ax.patches[::5], facecolors=["green"] * 5) + tm.close() + + def test_bar_user_colors(self): + df = DataFrame( + {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} + ) + # This should *only* work when `y` is specified, else + # we use one color per column + ax = df.plot.bar(y="A", color=df["color"]) + result = [p.get_facecolor() for p in ax.patches] + expected = [ + (1.0, 0.0, 0.0, 1.0), + (0.0, 0.0, 1.0, 1.0), + (0.0, 0.0, 1.0, 1.0), + (1.0, 0.0, 0.0, 1.0), + ] + assert result == expected + + def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): + # addressing issue #10611, to ensure colobar does not + # interfere with x-axis label and ticklabels with + # ipython inline backend. + random_array = np.random.random((1000, 3)) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + + ax1 = df.plot.scatter(x="A label", y="B label") + ax2 = df.plot.scatter(x="A label", y="B label", c="C label") + + vis1 = [vis.get_visible() for vis in ax1.xaxis.get_minorticklabels()] + vis2 = [vis.get_visible() for vis in ax2.xaxis.get_minorticklabels()] + assert vis1 == vis2 + + vis1 = [vis.get_visible() for vis in ax1.xaxis.get_majorticklabels()] + vis2 = [vis.get_visible() for vis in ax2.xaxis.get_majorticklabels()] + assert vis1 == vis2 + + assert ( + ax1.xaxis.get_label().get_visible() == ax2.xaxis.get_label().get_visible() + ) + + def test_if_hexbin_xaxis_label_is_visible(self): + # addressing issue #10678, to ensure colobar does not + # interfere with x-axis label and ticklabels with + # ipython inline backend. + random_array = np.random.random((1000, 3)) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + + ax = df.plot.hexbin("A label", "B label", gridsize=12) + assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) + assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels()) + assert ax.xaxis.get_label().get_visible() + + def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): + import matplotlib.pyplot as plt + + random_array = np.random.random((1000, 3)) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + + fig, axes = plt.subplots(1, 2) + df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) + df.plot.scatter("A label", "B label", c="C label", ax=axes[1]) + plt.tight_layout() + + points = np.array([ax.get_position().get_points() for ax in fig.axes]) + axes_x_coords = points[:, :, 0] + parent_distance = axes_x_coords[1, :] - axes_x_coords[0, :] + colorbar_distance = axes_x_coords[3, :] - axes_x_coords[2, :] + assert np.isclose(parent_distance, colorbar_distance, atol=1e-7).all() + + @pytest.mark.parametrize("cmap", [None, "Greys"]) + def test_scatter_with_c_column_name_with_colors(self, cmap): + # https://github.com/pandas-dev/pandas/issues/34316 + df = DataFrame( + [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], + columns=["length", "width"], + ) + df["species"] = ["r", "r", "g", "g", "b"] + ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) + assert ax.collections[0].colorbar is None + + def test_scatter_colors(self): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) + with pytest.raises(TypeError, match="Specify exactly one of `c` and `color`"): + df.plot.scatter(x="a", y="b", c="c", color="green") + + default_colors = self._unpack_cycler(self.plt.rcParams) + + ax = df.plot.scatter(x="a", y="b", c="c") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array(self.colorconverter.to_rgba(default_colors[0])), + ) + + ax = df.plot.scatter(x="a", y="b", color="white") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array([1, 1, 1, 1], dtype=np.float64), + ) + + def test_scatter_colorbar_different_cmap(self): + # GH 33389 + import matplotlib.pyplot as plt + + df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df["x2"] = df["x"] + 1 + + fig, ax = plt.subplots() + df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) + df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) + + assert ax.collections[0].cmap.name == "cividis" + assert ax.collections[1].cmap.name == "magma" + + def test_line_colors(self): + from matplotlib import cm + + custom_colors = "rgcby" + df = DataFrame(np.random.randn(5, 5)) + + ax = df.plot(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + + tm.close() + + ax2 = df.plot(color=custom_colors) + lines2 = ax2.get_lines() + + for l1, l2 in zip(ax.get_lines(), lines2): + assert l1.get_color() == l2.get_color() + + tm.close() + + ax = df.plot(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + ax = df.plot(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + ax = df.loc[:, [0]].plot(color="DodgerBlue") + self._check_colors(ax.lines, linecolors=["DodgerBlue"]) + + ax = df.plot(color="red") + self._check_colors(ax.get_lines(), linecolors=["red"] * 5) + tm.close() + + # GH 10299 + custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] + ax = df.plot(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + tm.close() + + def test_dont_modify_colors(self): + colors = ["r", "g", "b"] + DataFrame(np.random.rand(10, 2)).plot(color=colors) + assert len(colors) == 3 + + def test_line_colors_and_styles_subplots(self): + # GH 9894 + from matplotlib import cm + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + + axes = df.plot(subplots=True) + for ax, c in zip(axes, list(default_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # single color char + axes = df.plot(subplots=True, color="k") + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["k"]) + tm.close() + + # single color str + axes = df.plot(subplots=True, color="green") + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["green"]) + tm.close() + + custom_colors = "rgcby" + axes = df.plot(color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + axes = df.plot(color=list(custom_colors), subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # GH 10299 + custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] + axes = df.plot(color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + for cmap in ["jet", cm.jet]: + axes = df.plot(colormap=cmap, subplots=True) + for ax, c in zip(axes, rgba_colors): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + axes = df.loc[:, [0]].plot(color="DodgerBlue", subplots=True) + self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) + + # single character style + axes = df.plot(style="r", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["r"]) + tm.close() + + # list of styles + styles = list("rgcby") + axes = df.plot(style=styles, subplots=True) + for ax, c in zip(axes, styles): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + def test_area_colors(self): + from matplotlib import cm + from matplotlib.collections import PolyCollection + + custom_colors = "rgcby" + df = DataFrame(np.random.rand(5, 5)) + + ax = df.plot.area(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + self._check_colors(poly, facecolors=custom_colors) + + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, facecolors=custom_colors) + + for h in handles: + assert h.get_alpha() is None + tm.close() + + ax = df.plot.area(colormap="jet") + jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=jet_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + self._check_colors(poly, facecolors=jet_colors) + + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, facecolors=jet_colors) + for h in handles: + assert h.get_alpha() is None + tm.close() + + # When stacked=False, alpha is set to 0.5 + ax = df.plot.area(colormap=cm.jet, stacked=False) + self._check_colors(ax.get_lines(), linecolors=jet_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + jet_with_alpha = [(c[0], c[1], c[2], 0.5) for c in jet_colors] + self._check_colors(poly, facecolors=jet_with_alpha) + + handles, labels = ax.get_legend_handles_labels() + linecolors = jet_with_alpha + self._check_colors(handles[: len(jet_colors)], linecolors=linecolors) + for h in handles: + assert h.get_alpha() == 0.5 + + def test_hist_colors(self): + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + ax = df.plot.hist() + self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) + tm.close() + + custom_colors = "rgcby" + ax = df.plot.hist(color=custom_colors) + self._check_colors(ax.patches[::10], facecolors=custom_colors) + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + ax = df.plot.hist(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::10], facecolors=rgba_colors) + tm.close() + + # Test colormap functionality + ax = df.plot.hist(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::10], facecolors=rgba_colors) + tm.close() + + ax = df.loc[:, [0]].plot.hist(color="DodgerBlue") + self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) + + ax = df.plot(kind="hist", color="green") + self._check_colors(ax.patches[::10], facecolors=["green"] * 5) + tm.close() + + @td.skip_if_no_scipy + def test_kde_colors(self): + from matplotlib import cm + + custom_colors = "rgcby" + df = DataFrame(np.random.rand(5, 5)) + + ax = df.plot.kde(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + tm.close() + + ax = df.plot.kde(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + ax = df.plot.kde(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + + @td.skip_if_no_scipy + def test_kde_colors_and_styles_subplots(self): + from matplotlib import cm + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + + axes = df.plot(kind="kde", subplots=True) + for ax, c in zip(axes, list(default_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # single color char + axes = df.plot(kind="kde", color="k", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["k"]) + tm.close() + + # single color str + axes = df.plot(kind="kde", color="red", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["red"]) + tm.close() + + custom_colors = "rgcby" + axes = df.plot(kind="kde", color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + for cmap in ["jet", cm.jet]: + axes = df.plot(kind="kde", colormap=cmap, subplots=True) + for ax, c in zip(axes, rgba_colors): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + axes = df.loc[:, [0]].plot(kind="kde", color="DodgerBlue", subplots=True) + self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) + + # single character style + axes = df.plot(kind="kde", style="r", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["r"]) + tm.close() + + # list of styles + styles = list("rgcby") + axes = df.plot(kind="kde", style=styles, subplots=True) + for ax, c in zip(axes, styles): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + def test_boxplot_colors(self): + def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): + # TODO: outside this func? + if fliers_c is None: + fliers_c = "k" + self._check_colors(bp["boxes"], linecolors=[box_c] * len(bp["boxes"])) + self._check_colors( + bp["whiskers"], linecolors=[whiskers_c] * len(bp["whiskers"]) + ) + self._check_colors( + bp["medians"], linecolors=[medians_c] * len(bp["medians"]) + ) + self._check_colors(bp["fliers"], linecolors=[fliers_c] * len(bp["fliers"])) + self._check_colors(bp["caps"], linecolors=[caps_c] * len(bp["caps"])) + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + bp = df.plot.box(return_type="dict") + _check_colors( + bp, + default_colors[0], + default_colors[0], + default_colors[2], + default_colors[0], + ) + tm.close() + + dict_colors = { + "boxes": "#572923", + "whiskers": "#982042", + "medians": "#804823", + "caps": "#123456", + } + bp = df.plot.box(color=dict_colors, sym="r+", return_type="dict") + _check_colors( + bp, + dict_colors["boxes"], + dict_colors["whiskers"], + dict_colors["medians"], + dict_colors["caps"], + "r", + ) + tm.close() + + # partial colors + dict_colors = {"whiskers": "c", "medians": "m"} + bp = df.plot.box(color=dict_colors, return_type="dict") + _check_colors(bp, default_colors[0], "c", "m", default_colors[0]) + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + bp = df.plot.box(colormap="jet", return_type="dict") + jet_colors = [cm.jet(n) for n in np.linspace(0, 1, 3)] + _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2], jet_colors[0]) + tm.close() + + # Test colormap functionality + bp = df.plot.box(colormap=cm.jet, return_type="dict") + _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2], jet_colors[0]) + tm.close() + + # string color is applied to all artists except fliers + bp = df.plot.box(color="DodgerBlue", return_type="dict") + _check_colors(bp, "DodgerBlue", "DodgerBlue", "DodgerBlue", "DodgerBlue") + + # tuple is also applied to all artists except fliers + bp = df.plot.box(color=(0, 1, 0), sym="#123456", return_type="dict") + _check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), "#123456") + + msg = re.escape( + "color dict contains invalid key 'xxxx'. The key must be either " + "['boxes', 'whiskers', 'medians', 'caps']" + ) + with pytest.raises(ValueError, match=msg): + # Color contains invalid key results in ValueError + df.plot.box(color={"boxes": "red", "xxxx": "blue"}) + + def test_default_color_cycle(self): + import cycler + import matplotlib.pyplot as plt + + colors = list("rgbk") + plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) + + df = DataFrame(np.random.randn(5, 3)) + ax = df.plot() + + expected = self._unpack_cycler(plt.rcParams)[:3] + self._check_colors(ax.get_lines(), linecolors=expected) + + def test_no_color_bar(self): + df = self.hexbin_df + ax = df.plot.hexbin(x="A", y="B", colorbar=None) + assert ax.collections[0].colorbar is None + + def test_mixing_cmap_and_colormap_raises(self): + df = self.hexbin_df + msg = "Only specify one of `cmap` and `colormap`" + with pytest.raises(TypeError, match=msg): + df.plot.hexbin(x="A", y="B", cmap="YlGn", colormap="BuGn") + + def test_passed_bar_colors(self): + import matplotlib as mpl + + color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] + colormap = mpl.colors.ListedColormap(color_tuples) + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + assert color_tuples == [c.get_facecolor() for c in barplot.patches] + + def test_rcParams_bar_colors(self): + import matplotlib as mpl + + color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] + with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") + assert color_tuples == [c.get_facecolor() for c in barplot.patches] + + def test_colors_of_columns_with_same_name(self): + # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 + # Creating a DataFrame with duplicate column labels and testing colors of them. + df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = DataFrame({"a": [2, 4, 6]}) + df_concat = pd.concat([df, df1], axis=1) + result = df_concat.plot() + for legend, line in zip(result.get_legend().legendHandles, result.lines): + assert legend.get_color() == line.get_color() + + def test_invalid_colormap(self): + df = DataFrame(np.random.randn(3, 2), columns=["A", "B"]) + msg = "'invalid_colormap' is not a valid value for name; supported values are " + with pytest.raises(ValueError, match=msg): + df.plot(colormap="invalid_colormap") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame_groupby.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame_groupby.py new file mode 100644 index 0000000..bc35e02 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame_groupby.py @@ -0,0 +1,92 @@ +""" Test cases for DataFrame.plot """ + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame +import pandas._testing as tm +from pandas.tests.plotting.common import TestPlotBase + +pytestmark = pytest.mark.slow + + +@td.skip_if_no_mpl +class TestDataFramePlotsGroupby(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def _assert_ytickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_yticklabels(), visible=exp) + + def _assert_xtickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_xticklabels(), visible=exp) + + @pytest.mark.parametrize( + "kwargs, expected", + [ + # behavior without keyword + ({}, [True, False, True, False]), + # set sharey=True should be identical + ({"sharey": True}, [True, False, True, False]), + # sharey=False, all yticklabels should be visible + ({"sharey": False}, [True, True, True, True]), + ], + ) + def test_groupby_boxplot_sharey(self, kwargs, expected): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharey can now be switched check whether the right + # pair of axes is turned on or off + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + axes = df.groupby("c").boxplot(**kwargs) + self._assert_ytickslabels_visibility(axes, expected) + + @pytest.mark.parametrize( + "kwargs, expected", + [ + # behavior without keyword + ({}, [True, True, True, True]), + # set sharex=False should be identical + ({"sharex": False}, [True, True, True, True]), + # sharex=True, xticklabels should be visible + # only for bottom plots + ({"sharex": True}, [False, False, True, True]), + ], + ) + def test_groupby_boxplot_sharex(self, kwargs, expected): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharex can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + axes = df.groupby("c").boxplot(**kwargs) + self._assert_xtickslabels_visibility(axes, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame_legend.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame_legend.py new file mode 100644 index 0000000..9501047 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame_legend.py @@ -0,0 +1,195 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + date_range, +) +from pandas.tests.plotting.common import TestPlotBase + +pytestmark = pytest.mark.slow + + +class TestFrameLegend(TestPlotBase): + @pytest.mark.xfail( + reason=( + "Open bug in matplotlib " + "https://github.com/matplotlib/matplotlib/issues/11357" + ) + ) + def test_mixed_yerr(self): + # https://github.com/pandas-dev/pandas/issues/39522 + from matplotlib.collections import LineCollection + from matplotlib.lines import Line2D + + df = DataFrame([{"x": 1, "a": 1, "b": 1}, {"x": 2, "a": 2, "b": 3}]) + + ax = df.plot("x", "a", c="orange", yerr=0.1, label="orange") + df.plot("x", "b", c="blue", yerr=None, ax=ax, label="blue") + + legend = ax.get_legend() + result_handles = legend.legendHandles + + assert isinstance(result_handles[0], LineCollection) + assert isinstance(result_handles[1], Line2D) + + def test_legend_false(self): + # https://github.com/pandas-dev/pandas/issues/40044 + df = DataFrame({"a": [1, 1], "b": [2, 3]}) + df2 = DataFrame({"d": [2.5, 2.5]}) + + ax = df.plot(legend=True, color={"a": "blue", "b": "green"}, secondary_y="b") + df2.plot(legend=True, color={"d": "red"}, ax=ax) + legend = ax.get_legend() + result = [handle.get_color() for handle in legend.legendHandles] + expected = ["blue", "green", "red"] + assert result == expected + + def test_df_legend_labels(self): + kinds = ["line", "bar", "barh", "kde", "area", "hist"] + df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) + df2 = DataFrame(np.random.rand(3, 3), columns=["d", "e", "f"]) + df3 = DataFrame(np.random.rand(3, 3), columns=["g", "h", "i"]) + df4 = DataFrame(np.random.rand(3, 3), columns=["j", "k", "l"]) + + for kind in kinds: + + ax = df.plot(kind=kind, legend=True) + self._check_legend_labels(ax, labels=df.columns) + + ax = df2.plot(kind=kind, legend=False, ax=ax) + self._check_legend_labels(ax, labels=df.columns) + + ax = df3.plot(kind=kind, legend=True, ax=ax) + self._check_legend_labels(ax, labels=df.columns.union(df3.columns)) + + ax = df4.plot(kind=kind, legend="reverse", ax=ax) + expected = list(df.columns.union(df3.columns)) + list(reversed(df4.columns)) + self._check_legend_labels(ax, labels=expected) + + # Secondary Y + ax = df.plot(legend=True, secondary_y="b") + self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) + ax = df2.plot(legend=False, ax=ax) + self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) + ax = df3.plot(kind="bar", legend=True, secondary_y="h", ax=ax) + self._check_legend_labels( + ax, labels=["a", "b (right)", "c", "g", "h (right)", "i"] + ) + + # Time Series + ind = date_range("1/1/2014", periods=3) + df = DataFrame(np.random.randn(3, 3), columns=["a", "b", "c"], index=ind) + df2 = DataFrame(np.random.randn(3, 3), columns=["d", "e", "f"], index=ind) + df3 = DataFrame(np.random.randn(3, 3), columns=["g", "h", "i"], index=ind) + ax = df.plot(legend=True, secondary_y="b") + self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) + ax = df2.plot(legend=False, ax=ax) + self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) + ax = df3.plot(legend=True, ax=ax) + self._check_legend_labels(ax, labels=["a", "b (right)", "c", "g", "h", "i"]) + + # scatter + ax = df.plot.scatter(x="a", y="b", label="data1") + self._check_legend_labels(ax, labels=["data1"]) + ax = df2.plot.scatter(x="d", y="e", legend=False, label="data2", ax=ax) + self._check_legend_labels(ax, labels=["data1"]) + ax = df3.plot.scatter(x="g", y="h", label="data3", ax=ax) + self._check_legend_labels(ax, labels=["data1", "data3"]) + + # ensure label args pass through and + # index name does not mutate + # column names don't mutate + df5 = df.set_index("a") + ax = df5.plot(y="b") + self._check_legend_labels(ax, labels=["b"]) + ax = df5.plot(y="b", label="LABEL_b") + self._check_legend_labels(ax, labels=["LABEL_b"]) + self._check_text_labels(ax.xaxis.get_label(), "a") + ax = df5.plot(y="c", label="LABEL_c", ax=ax) + self._check_legend_labels(ax, labels=["LABEL_b", "LABEL_c"]) + assert df5.columns.tolist() == ["b", "c"] + + def test_missing_marker_multi_plots_on_same_ax(self): + # GH 18222 + df = DataFrame(data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"]) + fig, ax = self.plt.subplots(nrows=1, ncols=3) + # Left plot + df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[0]) + df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[0]) + df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[0]) + self._check_legend_labels(ax[0], labels=["r", "g", "b"]) + self._check_legend_marker(ax[0], expected_markers=["o", "x", "o"]) + # Center plot + df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[1]) + df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[1]) + df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[1]) + self._check_legend_labels(ax[1], labels=["b", "r", "g"]) + self._check_legend_marker(ax[1], expected_markers=["o", "o", "x"]) + # Right plot + df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[2]) + df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[2]) + df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[2]) + self._check_legend_labels(ax[2], labels=["g", "b", "r"]) + self._check_legend_marker(ax[2], expected_markers=["x", "o", "o"]) + + def test_legend_name(self): + multi = DataFrame( + np.random.randn(4, 4), + columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])], + ) + multi.columns.names = ["group", "individual"] + + ax = multi.plot() + leg_title = ax.legend_.get_title() + self._check_text_labels(leg_title, "group,individual") + + df = DataFrame(np.random.randn(5, 5)) + ax = df.plot(legend=True, ax=ax) + leg_title = ax.legend_.get_title() + self._check_text_labels(leg_title, "group,individual") + + df.columns.name = "new" + ax = df.plot(legend=False, ax=ax) + leg_title = ax.legend_.get_title() + self._check_text_labels(leg_title, "group,individual") + + ax = df.plot(legend=True, ax=ax) + leg_title = ax.legend_.get_title() + self._check_text_labels(leg_title, "new") + + def test_no_legend(self): + kinds = ["line", "bar", "barh", "kde", "area", "hist"] + df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) + + for kind in kinds: + ax = df.plot(kind=kind, legend=False) + self._check_legend_labels(ax, visible=False) + + def test_missing_markers_legend(self): + # 14958 + df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) + ax = df.plot(y=["A"], marker="x", linestyle="solid") + df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) + df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax) + + self._check_legend_labels(ax, labels=["A", "B", "C"]) + self._check_legend_marker(ax, expected_markers=["x", "o", "<"]) + + def test_missing_markers_legend_using_style(self): + # 14563 + df = DataFrame( + { + "A": [1, 2, 3, 4, 5, 6], + "B": [2, 4, 1, 3, 2, 4], + "C": [3, 3, 2, 6, 4, 2], + "X": [1, 2, 3, 4, 5, 6], + } + ) + + fig, ax = self.plt.subplots() + for kind in "ABC": + df.plot("X", kind, label=kind, ax=ax, style=".") + + self._check_legend_labels(ax, labels=["A", "B", "C"]) + self._check_legend_marker(ax, expected_markers=[".", ".", "."]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame_subplots.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame_subplots.py new file mode 100644 index 0000000..fa4a132 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_frame_subplots.py @@ -0,0 +1,687 @@ +""" Test cases for DataFrame.plot """ + +import string +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm +from pandas.tests.plotting.common import TestPlotBase + +from pandas.io.formats.printing import pprint_thing + +pytestmark = pytest.mark.slow + + +@td.skip_if_no_mpl +class TestDataFramePlotsSubplots(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def test_subplots(self): + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + for kind in ["bar", "barh", "line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + assert axes.shape == (3,) + + for ax, column in zip(axes, df.columns): + self._check_legend_labels(ax, labels=[pprint_thing(column)]) + + for ax in axes[:-2]: + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + if not (kind == "bar" and self.mpl_ge_3_1_0): + # change https://github.com/pandas-dev/pandas/issues/26714 + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, sharex=False) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, legend=False) + for ax in axes: + assert ax.get_legend() is None + + def test_subplots_timeseries(self): + idx = date_range(start="2014-07-01", freq="M", periods=10) + df = DataFrame(np.random.rand(10, 3), index=idx) + + for kind in ["line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + + for ax in axes[:-2]: + # GH 7801 + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + self._check_ticks_props(axes, xrot=0) + + axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) + + def test_subplots_timeseries_y_axis(self): + # GH16953 + data = { + "numeric": np.array([1, 2, 5]), + "timedelta": [ + pd.Timedelta(-10, unit="s"), + pd.Timedelta(10, unit="m"), + pd.Timedelta(10, unit="h"), + ], + "datetime_no_tz": [ + pd.to_datetime("2017-08-01 00:00:00"), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + "datetime_all_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00", utc=True), + pd.to_datetime("2017-08-02 00:00:00", utc=True), + ], + "text": ["This", "should", "fail"], + } + testdata = DataFrame(data) + + y_cols = ["numeric", "timedelta", "datetime_no_tz", "datetime_all_tz"] + for col in y_cols: + ax = testdata.plot(y=col) + result = ax.get_lines()[0].get_data()[1] + expected = testdata[col].values + assert (result == expected).all() + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + testdata.plot(y="text") + + @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") + def test_subplots_timeseries_y_axis_not_supported(self): + """ + This test will fail for: + period: + since period isn't yet implemented in ``select_dtypes`` + and because it will need a custom value converter + + tick formatter (as was done for x-axis plots) + + categorical: + because it will need a custom value converter + + tick formatter (also doesn't work for x-axis, as of now) + + datetime_mixed_tz: + because of the way how pandas handles ``Series`` of + ``datetime`` objects with different timezone, + generally converting ``datetime`` objects in a tz-aware + form could help with this problem + """ + data = { + "numeric": np.array([1, 2, 5]), + "period": [ + pd.Period("2017-08-01 00:00:00", freq="H"), + pd.Period("2017-08-01 02:00", freq="H"), + pd.Period("2017-08-02 00:00:00", freq="H"), + ], + "categorical": pd.Categorical( + ["c", "b", "a"], categories=["a", "b", "c"], ordered=False + ), + "datetime_mixed_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + } + testdata = DataFrame(data) + ax_period = testdata.plot(x="numeric", y="period") + assert ( + ax_period.get_lines()[0].get_data()[1] == testdata["period"].values + ).all() + ax_categorical = testdata.plot(x="numeric", y="categorical") + assert ( + ax_categorical.get_lines()[0].get_data()[1] + == testdata["categorical"].values + ).all() + ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") + assert ( + ax_datetime_mixed_tz.get_lines()[0].get_data()[1] + == testdata["datetime_mixed_tz"].values + ).all() + + def test_subplots_layout_multi_column(self): + # GH 6667 + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, layout=(2, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(-1, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(2, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(-1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(4, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) + assert axes.shape == (4, 1) + + msg = "Layout of 1x1 must be larger than required size 3" + + with pytest.raises(ValueError, match=msg): + df.plot(subplots=True, layout=(1, 1)) + + msg = "At least one dimension of layout must be positive" + with pytest.raises(ValueError, match=msg): + df.plot(subplots=True, layout=(-1, -1)) + + @pytest.mark.parametrize( + "kwargs, expected_axes_num, expected_layout, expected_shape", + [ + ({}, 1, (1, 1), (1,)), + ({"layout": (3, 3)}, 1, (3, 3), (3, 3)), + ], + ) + def test_subplots_layout_single_column( + self, kwargs, expected_axes_num, expected_layout, expected_shape + ): + + # GH 6667 + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + axes = df.plot(subplots=True, **kwargs) + self._check_axes_shape( + axes, + axes_num=expected_axes_num, + layout=expected_layout, + ) + assert axes.shape == expected_shape + + def test_subplots_warnings(self): + # GH 9464 + with tm.assert_produces_warning(None): + df = DataFrame(np.random.randn(100, 4)) + df.plot(subplots=True, layout=(3, 2)) + + df = DataFrame( + np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) + ) + df.plot(subplots=True, layout=(3, 2)) + + def test_subplots_multiple_axes(self): + # GH 5353, 6970, GH 7069 + fig, axes = self.plt.subplots(2, 3) + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + # draw on second row + returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) + tm.close() + + msg = "The number of passed axes must be 3, the same as the output plot" + + with pytest.raises(ValueError, match=msg): + fig, axes = self.plt.subplots(2, 3) + # pass different number of axes from required + df.plot(subplots=True, ax=axes) + + # pass 2-dim axes and invalid layout + # invalid lauout should not affect to input and return value + # (show warning is tested in + # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes + fig, axes = self.plt.subplots(2, 2) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + # single column + fig, axes = self.plt.subplots(1, 1) + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + def test_subplots_ts_share_axes(self): + # GH 3964 + fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) + self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) + df = DataFrame( + np.random.randn(10, 9), + index=date_range(start="2014-07-01", freq="M", periods=10), + ) + for i, ax in enumerate(axes.ravel()): + df[i].plot(ax=ax, fontsize=5) + + # Rows other than bottom should not be visible + for ax in axes[0:-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=False) + + # Bottom row should be visible + for ax in axes[-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=True) + + # First column should be visible + for ax in axes[[0, 1, 2], [0]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + # Other columns should not be visible + for ax in axes[[0, 1, 2], [1]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + for ax in axes[[0, 1, 2], [2]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + + def test_subplots_sharex_axes_existing_axes(self): + # GH 9158 + d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} + df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) + + axes = df[["A", "B"]].plot(subplots=True) + df["C"].plot(ax=axes[0], secondary_y=True) + + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + for ax in axes.ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + def test_subplots_dup_columns(self): + # GH 10962 + df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) + axes = df.plot(subplots=True) + for ax in axes: + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + axes = df.plot(subplots=True, secondary_y="a") + for ax in axes: + # (right) is only attached when subplots=False + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + ax = df.plot(secondary_y="a") + self._check_legend_labels(ax, labels=["a (right)"] * 5) + assert len(ax.lines) == 0 + assert len(ax.right_ax.lines) == 5 + + def test_bar_log_no_subplots(self): + # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 + # regressions in 1.2.1 + expected = np.array([0.1, 1.0, 10.0, 100]) + + # no subplots + df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) + ax = df.plot.bar(grid=True, log=True) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) + + def test_bar_log_subplots(self): + expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) + + ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( + log=True, subplots=True + ) + + tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) + tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) + + def test_boxplot_subplots_return_type(self): + df = self.hist_df + + # normal style: return_type=None + result = df.plot.box(subplots=True) + assert isinstance(result, Series) + self._check_box_return_type( + result, None, expected_keys=["height", "weight", "category"] + ) + + for t in ["dict", "axes", "both"]: + returned = df.plot.box(return_type=t, subplots=True) + self._check_box_return_type( + returned, + t, + expected_keys=["height", "weight", "category"], + check_ax_title=False, + ) + + def test_df_subplots_patterns_minorticks(self): + # GH 10657 + import matplotlib.pyplot as plt + + df = DataFrame( + np.random.randn(10, 2), + index=date_range("1/1/2000", periods=10), + columns=list("AB"), + ) + + # shared subplots + fig, axes = plt.subplots(2, 1, sharex=True) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + fig, axes = plt.subplots(2, 1) + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + # not shared + fig, axes = plt.subplots(2, 1) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + def test_subplots_sharex_false(self): + # test when sharex is set to False, two plots should have different + # labels, GH 25160 + df = DataFrame(np.random.rand(10, 2)) + df.iloc[5:, 1] = np.nan + df.iloc[:5, 0] = np.nan + + figs, axs = self.plt.subplots(2, 1) + df.plot.line(ax=axs, subplots=True, sharex=False) + + expected_ax1 = np.arange(4.5, 10, 0.5) + expected_ax2 = np.arange(-0.5, 5, 0.5) + + tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) + tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) + + def test_subplots_constrained_layout(self): + # GH 25261 + idx = date_range(start="now", periods=10) + df = DataFrame(np.random.rand(10, 3), index=idx) + kwargs = {} + if hasattr(self.plt.Figure, "get_constrained_layout"): + kwargs["constrained_layout"] = True + fig, axes = self.plt.subplots(2, **kwargs) + with tm.assert_produces_warning(None): + df.plot(ax=axes[0]) + with tm.ensure_clean(return_filelike=True) as path: + self.plt.savefig(path) + + @pytest.mark.parametrize( + "index_name, old_label, new_label", + [ + (None, "", "new"), + ("old", "old", "new"), + (None, "", ""), + (None, "", 1), + (None, "", [1, 2]), + ], + ) + @pytest.mark.parametrize("kind", ["line", "area", "bar"]) + def test_xlabel_ylabel_dataframe_subplots( + self, kind, index_name, old_label, new_label + ): + # GH 9093 + df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df.index.name = index_name + + # default is the ylabel is not shown and xlabel is index name + axes = df.plot(kind=kind, subplots=True) + assert all(ax.get_ylabel() == "" for ax in axes) + assert all(ax.get_xlabel() == old_label for ax in axes) + + # old xlabel will be overridden and assigned ylabel will be used as ylabel + axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) + assert all(ax.get_ylabel() == str(new_label) for ax in axes) + assert all(ax.get_xlabel() == str(new_label) for ax in axes) + + @pytest.mark.parametrize( + "kwargs", + [ + # stacked center + {"kind": "bar", "stacked": True}, + {"kind": "bar", "stacked": True, "width": 0.9}, + {"kind": "barh", "stacked": True}, + {"kind": "barh", "stacked": True, "width": 0.9}, + # center + {"kind": "bar", "stacked": False}, + {"kind": "bar", "stacked": False, "width": 0.9}, + {"kind": "barh", "stacked": False}, + {"kind": "barh", "stacked": False, "width": 0.9}, + # subplots center + {"kind": "bar", "subplots": True}, + {"kind": "bar", "subplots": True, "width": 0.9}, + {"kind": "barh", "subplots": True}, + {"kind": "barh", "subplots": True, "width": 0.9}, + # align edge + {"kind": "bar", "stacked": True, "align": "edge"}, + {"kind": "bar", "stacked": True, "width": 0.9, "align": "edge"}, + {"kind": "barh", "stacked": True, "align": "edge"}, + {"kind": "barh", "stacked": True, "width": 0.9, "align": "edge"}, + {"kind": "bar", "stacked": False, "align": "edge"}, + {"kind": "bar", "stacked": False, "width": 0.9, "align": "edge"}, + {"kind": "barh", "stacked": False, "align": "edge"}, + {"kind": "barh", "stacked": False, "width": 0.9, "align": "edge"}, + {"kind": "bar", "subplots": True, "align": "edge"}, + {"kind": "bar", "subplots": True, "width": 0.9, "align": "edge"}, + {"kind": "barh", "subplots": True, "align": "edge"}, + {"kind": "barh", "subplots": True, "width": 0.9, "align": "edge"}, + ], + ) + def test_bar_align_multiple_columns(self, kwargs): + # GH2157 + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, **kwargs) + + @pytest.mark.parametrize( + "kwargs", + [ + {"kind": "bar", "stacked": False}, + {"kind": "bar", "stacked": True}, + {"kind": "barh", "stacked": False}, + {"kind": "barh", "stacked": True}, + {"kind": "bar", "subplots": True}, + {"kind": "barh", "subplots": True}, + ], + ) + def test_bar_align_single_column(self, kwargs): + df = DataFrame(np.random.randn(5)) + self._check_bar_alignment(df, **kwargs) + + @pytest.mark.parametrize( + "kwargs", + [ + {"kind": "bar", "stacked": False}, + {"kind": "bar", "stacked": True}, + {"kind": "barh", "stacked": False}, + {"kind": "barh", "stacked": True}, + {"kind": "bar", "subplots": True}, + {"kind": "barh", "subplots": True}, + ], + ) + def test_bar_barwidth_position(self, kwargs): + df = DataFrame(np.random.randn(5, 5)) + self._check_bar_alignment(df, width=0.9, position=0.2, **kwargs) + + def test_bar_barwidth_position_int(self): + # GH 12979 + df = DataFrame(np.random.randn(5, 5)) + + for w in [1, 1.0]: + ax = df.plot.bar(stacked=True, width=w) + ticks = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) + assert ax.get_xlim() == (-0.75, 4.75) + # check left-edge of bars + assert ax.patches[0].get_x() == -0.5 + assert ax.patches[-1].get_x() == 3.5 + + self._check_bar_alignment(df, kind="bar", stacked=True, width=1) + self._check_bar_alignment(df, kind="barh", stacked=False, width=1) + self._check_bar_alignment(df, kind="barh", stacked=True, width=1) + self._check_bar_alignment(df, kind="bar", subplots=True, width=1) + self._check_bar_alignment(df, kind="barh", subplots=True, width=1) + + def _check_bar_alignment( + self, + df, + kind="bar", + stacked=False, + subplots=False, + align="center", + width=0.5, + position=0.5, + ): + + axes = df.plot( + kind=kind, + stacked=stacked, + subplots=subplots, + align=align, + width=width, + position=position, + grid=True, + ) + + axes = self._flatten_visible(axes) + + for ax in axes: + if kind == "bar": + axis = ax.xaxis + ax_min, ax_max = ax.get_xlim() + min_edge = min(p.get_x() for p in ax.patches) + max_edge = max(p.get_x() + p.get_width() for p in ax.patches) + elif kind == "barh": + axis = ax.yaxis + ax_min, ax_max = ax.get_ylim() + min_edge = min(p.get_y() for p in ax.patches) + max_edge = max(p.get_y() + p.get_height() for p in ax.patches) + else: + raise ValueError + + # GH 7498 + # compare margins between lim and bar edges + tm.assert_almost_equal(ax_min, min_edge - 0.25) + tm.assert_almost_equal(ax_max, max_edge + 0.25) + + p = ax.patches[0] + if kind == "bar" and (stacked is True or subplots is True): + edge = p.get_x() + center = edge + p.get_width() * position + elif kind == "bar" and stacked is False: + center = p.get_x() + p.get_width() * len(df.columns) * position + edge = p.get_x() + elif kind == "barh" and (stacked is True or subplots is True): + center = p.get_y() + p.get_height() * position + edge = p.get_y() + elif kind == "barh" and stacked is False: + center = p.get_y() + p.get_height() * len(df.columns) * position + edge = p.get_y() + else: + raise ValueError + + # Check the ticks locates on integer + assert (axis.get_ticklocs() == np.arange(len(df))).all() + + if align == "center": + # Check whether the bar locates on center + tm.assert_almost_equal(axis.get_ticklocs()[0], center) + elif align == "edge": + # Check whether the bar's edge starts from the tick + tm.assert_almost_equal(axis.get_ticklocs()[0], edge) + else: + raise ValueError + + return axes diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_hist_box_by.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_hist_box_by.py new file mode 100644 index 0000000..c92d952 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/frame/test_hist_box_by.py @@ -0,0 +1,389 @@ +import re + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame +import pandas._testing as tm +from pandas.tests.plotting.common import ( + TestPlotBase, + _check_plot_works, +) + + +def _create_hist_box_with_by_df(): + np.random.seed(0) + df = DataFrame(np.random.randn(30, 2), columns=["A", "B"]) + df["C"] = np.random.choice(["a", "b", "c"], 30) + df["D"] = np.random.choice(["a", "b", "c"], 30) + return df + + +@td.skip_if_no_mpl +class TestHistWithBy(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + self.hist_df = _create_hist_box_with_by_df() + + @pytest.mark.parametrize( + "by, column, titles, legends", + [ + ("C", "A", ["a", "b", "c"], [["A"]] * 3), + ("C", ["A", "B"], ["a", "b", "c"], [["A", "B"]] * 3), + ("C", None, ["a", "b", "c"], [["A", "B"]] * 3), + ( + ["C", "D"], + "A", + [ + "(a, a)", + "(a, b)", + "(a, c)", + "(b, a)", + "(b, b)", + "(b, c)", + "(c, a)", + "(c, b)", + "(c, c)", + ], + [["A"]] * 9, + ), + ( + ["C", "D"], + ["A", "B"], + [ + "(a, a)", + "(a, b)", + "(a, c)", + "(b, a)", + "(b, b)", + "(b, c)", + "(c, a)", + "(c, b)", + "(c, c)", + ], + [["A", "B"]] * 9, + ), + ( + ["C", "D"], + None, + [ + "(a, a)", + "(a, b)", + "(a, c)", + "(b, a)", + "(b, b)", + "(b, c)", + "(c, a)", + "(c, b)", + "(c, c)", + ], + [["A", "B"]] * 9, + ), + ], + ) + def test_hist_plot_by_argument(self, by, column, titles, legends): + # GH 15079 + axes = _check_plot_works(self.hist_df.plot.hist, column=column, by=by) + result_titles = [ax.get_title() for ax in axes] + result_legends = [ + [legend.get_text() for legend in ax.get_legend().texts] for ax in axes + ] + + assert result_legends == legends + assert result_titles == titles + + @pytest.mark.parametrize( + "by, column, titles, legends", + [ + (0, "A", ["a", "b", "c"], [["A"]] * 3), + (0, None, ["a", "b", "c"], [["A", "B"]] * 3), + ( + [0, "D"], + "A", + [ + "(a, a)", + "(a, b)", + "(a, c)", + "(b, a)", + "(b, b)", + "(b, c)", + "(c, a)", + "(c, b)", + "(c, c)", + ], + [["A"]] * 9, + ), + ], + ) + def test_hist_plot_by_0(self, by, column, titles, legends): + # GH 15079 + df = self.hist_df.copy() + df = df.rename(columns={"C": 0}) + + axes = _check_plot_works(df.plot.hist, column=column, by=by) + result_titles = [ax.get_title() for ax in axes] + result_legends = [ + [legend.get_text() for legend in ax.get_legend().texts] for ax in axes + ] + + assert result_legends == legends + assert result_titles == titles + + @pytest.mark.parametrize( + "by, column", + [ + ([], ["A"]), + ([], ["A", "B"]), + ((), None), + ((), ["A", "B"]), + ], + ) + def test_hist_plot_empty_list_string_tuple_by(self, by, column): + # GH 15079 + msg = "No group keys passed" + with pytest.raises(ValueError, match=msg): + _check_plot_works(self.hist_df.plot.hist, column=column, by=by) + + @pytest.mark.slow + @pytest.mark.parametrize( + "by, column, layout, axes_num", + [ + (["C"], "A", (2, 2), 3), + ("C", "A", (2, 2), 3), + (["C"], ["A"], (1, 3), 3), + ("C", None, (3, 1), 3), + ("C", ["A", "B"], (3, 1), 3), + (["C", "D"], "A", (9, 1), 9), + (["C", "D"], "A", (3, 3), 9), + (["C", "D"], ["A"], (5, 2), 9), + (["C", "D"], ["A", "B"], (9, 1), 9), + (["C", "D"], None, (9, 1), 9), + (["C", "D"], ["A", "B"], (5, 2), 9), + ], + ) + def test_hist_plot_layout_with_by(self, by, column, layout, axes_num): + # GH 15079 + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works( + self.hist_df.plot.hist, column=column, by=by, layout=layout + ) + self._check_axes_shape(axes, axes_num=axes_num, layout=layout) + + @pytest.mark.parametrize( + "msg, by, layout", + [ + ("larger than required size", ["C", "D"], (1, 1)), + (re.escape("Layout must be a tuple of (rows, columns)"), "C", (1,)), + ("At least one dimension of layout must be positive", "C", (-1, -1)), + ], + ) + def test_hist_plot_invalid_layout_with_by_raises(self, msg, by, layout): + # GH 15079, test if error is raised when invalid layout is given + + with pytest.raises(ValueError, match=msg): + self.hist_df.plot.hist(column=["A", "B"], by=by, layout=layout) + + @pytest.mark.slow + def test_axis_share_x_with_by(self): + # GH 15079 + ax1, ax2, ax3 = self.hist_df.plot.hist(column="A", by="C", sharex=True) + + # share x + assert self.get_x_axis(ax1).joined(ax1, ax2) + assert self.get_x_axis(ax2).joined(ax1, ax2) + assert self.get_x_axis(ax3).joined(ax1, ax3) + assert self.get_x_axis(ax3).joined(ax2, ax3) + + # don't share y + assert not self.get_y_axis(ax1).joined(ax1, ax2) + assert not self.get_y_axis(ax2).joined(ax1, ax2) + assert not self.get_y_axis(ax3).joined(ax1, ax3) + assert not self.get_y_axis(ax3).joined(ax2, ax3) + + @pytest.mark.slow + def test_axis_share_y_with_by(self): + # GH 15079 + ax1, ax2, ax3 = self.hist_df.plot.hist(column="A", by="C", sharey=True) + + # share y + assert self.get_y_axis(ax1).joined(ax1, ax2) + assert self.get_y_axis(ax2).joined(ax1, ax2) + assert self.get_y_axis(ax3).joined(ax1, ax3) + assert self.get_y_axis(ax3).joined(ax2, ax3) + + # don't share x + assert not self.get_x_axis(ax1).joined(ax1, ax2) + assert not self.get_x_axis(ax2).joined(ax1, ax2) + assert not self.get_x_axis(ax3).joined(ax1, ax3) + assert not self.get_x_axis(ax3).joined(ax2, ax3) + + @pytest.mark.parametrize("figsize", [(12, 8), (20, 10)]) + def test_figure_shape_hist_with_by(self, figsize): + # GH 15079 + axes = self.hist_df.plot.hist(column="A", by="C", figsize=figsize) + self._check_axes_shape(axes, axes_num=3, figsize=figsize) + + +@td.skip_if_no_mpl +class TestBoxWithBy(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + self.box_df = _create_hist_box_with_by_df() + + @pytest.mark.parametrize( + "by, column, titles, xticklabels", + [ + ("C", "A", ["A"], [["a", "b", "c"]]), + ( + ["C", "D"], + "A", + ["A"], + [ + [ + "(a, a)", + "(a, b)", + "(a, c)", + "(b, a)", + "(b, b)", + "(b, c)", + "(c, a)", + "(c, b)", + "(c, c)", + ] + ], + ), + ("C", ["A", "B"], ["A", "B"], [["a", "b", "c"]] * 2), + ( + ["C", "D"], + ["A", "B"], + ["A", "B"], + [ + [ + "(a, a)", + "(a, b)", + "(a, c)", + "(b, a)", + "(b, b)", + "(b, c)", + "(c, a)", + "(c, b)", + "(c, c)", + ] + ] + * 2, + ), + (["C"], None, ["A", "B"], [["a", "b", "c"]] * 2), + ], + ) + def test_box_plot_by_argument(self, by, column, titles, xticklabels): + # GH 15079 + axes = _check_plot_works(self.box_df.plot.box, column=column, by=by) + result_titles = [ax.get_title() for ax in axes] + result_xticklabels = [ + [label.get_text() for label in ax.get_xticklabels()] for ax in axes + ] + + assert result_xticklabels == xticklabels + assert result_titles == titles + + @pytest.mark.parametrize( + "by, column, titles, xticklabels", + [ + (0, "A", ["A"], [["a", "b", "c"]]), + ( + [0, "D"], + "A", + ["A"], + [ + [ + "(a, a)", + "(a, b)", + "(a, c)", + "(b, a)", + "(b, b)", + "(b, c)", + "(c, a)", + "(c, b)", + "(c, c)", + ] + ], + ), + (0, None, ["A", "B"], [["a", "b", "c"]] * 2), + ], + ) + def test_box_plot_by_0(self, by, column, titles, xticklabels): + # GH 15079 + df = self.box_df.copy() + df = df.rename(columns={"C": 0}) + + axes = _check_plot_works(df.plot.box, column=column, by=by) + result_titles = [ax.get_title() for ax in axes] + result_xticklabels = [ + [label.get_text() for label in ax.get_xticklabels()] for ax in axes + ] + + assert result_xticklabels == xticklabels + assert result_titles == titles + + @pytest.mark.parametrize( + "by, column", + [ + ([], ["A"]), + ((), "A"), + ([], None), + ((), ["A", "B"]), + ], + ) + def test_box_plot_with_none_empty_list_by(self, by, column): + # GH 15079 + msg = "No group keys passed" + with pytest.raises(ValueError, match=msg): + _check_plot_works(self.box_df.plot.box, column=column, by=by) + + @pytest.mark.slow + @pytest.mark.parametrize( + "by, column, layout, axes_num", + [ + (["C"], "A", (1, 1), 1), + ("C", "A", (1, 1), 1), + ("C", None, (2, 1), 2), + ("C", ["A", "B"], (1, 2), 2), + (["C", "D"], "A", (1, 1), 1), + (["C", "D"], None, (1, 2), 2), + ], + ) + def test_box_plot_layout_with_by(self, by, column, layout, axes_num): + # GH 15079 + axes = _check_plot_works( + self.box_df.plot.box, column=column, by=by, layout=layout + ) + self._check_axes_shape(axes, axes_num=axes_num, layout=layout) + + @pytest.mark.parametrize( + "msg, by, layout", + [ + ("larger than required size", ["C", "D"], (1, 1)), + (re.escape("Layout must be a tuple of (rows, columns)"), "C", (1,)), + ("At least one dimension of layout must be positive", "C", (-1, -1)), + ], + ) + def test_box_plot_invalid_layout_with_by_raises(self, msg, by, layout): + # GH 15079, test if error is raised when invalid layout is given + + with pytest.raises(ValueError, match=msg): + self.box_df.plot.box(column=["A", "B"], by=by, layout=layout) + + @pytest.mark.parametrize("figsize", [(12, 8), (20, 10)]) + def test_figure_shape_hist_with_by(self, figsize): + # GH 15079 + axes = self.box_df.plot.box(column="A", by="C", figsize=figsize) + self._check_axes_shape(axes, axes_num=1, figsize=figsize) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_backend.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_backend.py new file mode 100644 index 0000000..be053a8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_backend.py @@ -0,0 +1,111 @@ +import sys +import types + +import pkg_resources +import pytest + +import pandas.util._test_decorators as td + +import pandas + +dummy_backend = types.ModuleType("pandas_dummy_backend") +setattr(dummy_backend, "plot", lambda *args, **kwargs: "used_dummy") + + +pytestmark = pytest.mark.slow + + +@pytest.fixture +def restore_backend(): + """Restore the plotting backend to matplotlib""" + pandas.set_option("plotting.backend", "matplotlib") + yield + pandas.set_option("plotting.backend", "matplotlib") + + +def test_backend_is_not_module(): + msg = "Could not find plotting backend 'not_an_existing_module'." + with pytest.raises(ValueError, match=msg): + pandas.set_option("plotting.backend", "not_an_existing_module") + + assert pandas.options.plotting.backend == "matplotlib" + + +def test_backend_is_correct(monkeypatch, restore_backend): + monkeypatch.setitem(sys.modules, "pandas_dummy_backend", dummy_backend) + + pandas.set_option("plotting.backend", "pandas_dummy_backend") + assert pandas.get_option("plotting.backend") == "pandas_dummy_backend" + assert ( + pandas.plotting._core._get_plot_backend("pandas_dummy_backend") is dummy_backend + ) + + +def test_backend_can_be_set_in_plot_call(monkeypatch, restore_backend): + monkeypatch.setitem(sys.modules, "pandas_dummy_backend", dummy_backend) + df = pandas.DataFrame([1, 2, 3]) + + assert pandas.get_option("plotting.backend") == "matplotlib" + assert df.plot(backend="pandas_dummy_backend") == "used_dummy" + + +@td.skip_if_no_mpl +def test_register_entrypoint(restore_backend): + + dist = pkg_resources.get_distribution("pandas") + if dist.module_path not in pandas.__file__: + # We are running from a non-installed pandas, and this test is invalid + pytest.skip("Testing a non-installed pandas") + + mod = types.ModuleType("my_backend") + mod.plot = lambda *args, **kwargs: 1 + + backends = pkg_resources.get_entry_map("pandas") + my_entrypoint = pkg_resources.EntryPoint( + "pandas_plotting_backend", mod.__name__, dist=dist + ) + backends["pandas_plotting_backends"]["my_backend"] = my_entrypoint + # TODO: the docs recommend importlib.util.module_from_spec. But this works for now. + sys.modules["my_backend"] = mod + + result = pandas.plotting._core._get_plot_backend("my_backend") + assert result is mod + + # TODO(GH#27517): https://github.com/pandas-dev/pandas/issues/27517 + # Remove the td.skip_if_no_mpl + with pandas.option_context("plotting.backend", "my_backend"): + result = pandas.plotting._core._get_plot_backend() + + assert result is mod + + +def test_setting_backend_without_plot_raises(): + # GH-28163 + module = types.ModuleType("pandas_plot_backend") + sys.modules["pandas_plot_backend"] = module + + assert pandas.options.plotting.backend == "matplotlib" + with pytest.raises( + ValueError, match="Could not find plotting backend 'pandas_plot_backend'." + ): + pandas.set_option("plotting.backend", "pandas_plot_backend") + + assert pandas.options.plotting.backend == "matplotlib" + + +@td.skip_if_mpl +def test_no_matplotlib_ok(): + msg = ( + 'matplotlib is required for plotting when the default backend "matplotlib" is ' + "selected." + ) + with pytest.raises(ImportError, match=msg): + pandas.plotting._core._get_plot_backend("matplotlib") + + +def test_extra_kinds_ok(monkeypatch, restore_backend): + # https://github.com/pandas-dev/pandas/pull/28647 + monkeypatch.setitem(sys.modules, "pandas_dummy_backend", dummy_backend) + pandas.set_option("plotting.backend", "pandas_dummy_backend") + df = pandas.DataFrame({"A": [1, 2, 3]}) + df.plot(kind="not a real kind") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_boxplot_method.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_boxplot_method.py new file mode 100644 index 0000000..5c543f9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_boxplot_method.py @@ -0,0 +1,568 @@ +""" Test cases for .boxplot method """ + +import itertools +import string + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + MultiIndex, + Series, + date_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.tests.plotting.common import ( + TestPlotBase, + _check_plot_works, +) + +import pandas.plotting as plotting + +pytestmark = pytest.mark.slow + + +@td.skip_if_no_mpl +class TestDataFramePlots(TestPlotBase): + def test_boxplot_legacy1(self): + df = DataFrame( + np.random.randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["one", "two", "three", "four"], + ) + df["indic"] = ["foo", "bar"] * 3 + df["indic2"] = ["foo", "bar", "foo"] * 2 + + _check_plot_works(df.boxplot, return_type="dict") + _check_plot_works(df.boxplot, column=["one", "two"], return_type="dict") + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.boxplot, column=["one", "two"], by="indic") + _check_plot_works(df.boxplot, column="one", by=["indic", "indic2"]) + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.boxplot, by="indic") + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.boxplot, by=["indic", "indic2"]) + _check_plot_works(plotting._core.boxplot, data=df["one"], return_type="dict") + _check_plot_works(df.boxplot, notch=1, return_type="dict") + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.boxplot, by="indic", notch=1) + + def test_boxplot_legacy2(self): + df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"]) + df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) + df["Y"] = Series(["A"] * 10) + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.boxplot, by="X") + + # When ax is supplied and required number of axes is 1, + # passed ax should be used: + fig, ax = self.plt.subplots() + axes = df.boxplot("Col1", by="X", ax=ax) + ax_axes = ax.axes + assert ax_axes is axes + + fig, ax = self.plt.subplots() + axes = df.groupby("Y").boxplot(ax=ax, return_type="axes") + ax_axes = ax.axes + assert ax_axes is axes["A"] + + # Multiple columns with an ax argument should use same figure + fig, ax = self.plt.subplots() + with tm.assert_produces_warning(UserWarning): + axes = df.boxplot( + column=["Col1", "Col2"], by="X", ax=ax, return_type="axes" + ) + assert axes["Col1"].get_figure() is fig + + # When by is None, check that all relevant lines are present in the + # dict + fig, ax = self.plt.subplots() + d = df.boxplot(ax=ax, return_type="dict") + lines = list(itertools.chain.from_iterable(d.values())) + assert len(ax.get_lines()) == len(lines) + + def test_boxplot_return_type_none(self): + # GH 12216; return_type=None & by=None -> axes + result = self.hist_df.boxplot() + assert isinstance(result, self.plt.Axes) + + def test_boxplot_return_type_legacy(self): + # API change in https://github.com/pandas-dev/pandas/pull/7096 + + df = DataFrame( + np.random.randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["one", "two", "three", "four"], + ) + msg = "return_type must be {'axes', 'dict', 'both'}" + with pytest.raises(ValueError, match=msg): + df.boxplot(return_type="NOT_A_TYPE") + + result = df.boxplot() + self._check_box_return_type(result, "axes") + + with tm.assert_produces_warning(False): + result = df.boxplot(return_type="dict") + self._check_box_return_type(result, "dict") + + with tm.assert_produces_warning(False): + result = df.boxplot(return_type="axes") + self._check_box_return_type(result, "axes") + + with tm.assert_produces_warning(False): + result = df.boxplot(return_type="both") + self._check_box_return_type(result, "both") + + def test_boxplot_axis_limits(self): + def _check_ax_limits(col, ax): + y_min, y_max = ax.get_ylim() + assert y_min <= col.min() + assert y_max >= col.max() + + df = self.hist_df.copy() + df["age"] = np.random.randint(1, 20, df.shape[0]) + # One full row + height_ax, weight_ax = df.boxplot(["height", "weight"], by="category") + _check_ax_limits(df["height"], height_ax) + _check_ax_limits(df["weight"], weight_ax) + assert weight_ax._sharey == height_ax + + # Two rows, one partial + p = df.boxplot(["height", "weight", "age"], by="category") + height_ax, weight_ax, age_ax = p[0, 0], p[0, 1], p[1, 0] + dummy_ax = p[1, 1] + + _check_ax_limits(df["height"], height_ax) + _check_ax_limits(df["weight"], weight_ax) + _check_ax_limits(df["age"], age_ax) + assert weight_ax._sharey == height_ax + assert age_ax._sharey == height_ax + assert dummy_ax._sharey is None + + def test_boxplot_empty_column(self): + df = DataFrame(np.random.randn(20, 4)) + df.loc[:, 0] = np.nan + _check_plot_works(df.boxplot, return_type="axes") + + def test_figsize(self): + df = DataFrame(np.random.rand(10, 5), columns=["A", "B", "C", "D", "E"]) + result = df.boxplot(return_type="axes", figsize=(12, 8)) + assert result.figure.bbox_inches.width == 12 + assert result.figure.bbox_inches.height == 8 + + def test_fontsize(self): + df = DataFrame({"a": [1, 2, 3, 4, 5, 6]}) + self._check_ticks_props( + df.boxplot("a", fontsize=16), xlabelsize=16, ylabelsize=16 + ) + + def test_boxplot_numeric_data(self): + # GH 22799 + df = DataFrame( + { + "a": date_range("2012-01-01", periods=100), + "b": np.random.randn(100), + "c": np.random.randn(100) + 2, + "d": date_range("2012-01-01", periods=100).astype(str), + "e": date_range("2012-01-01", periods=100, tz="UTC"), + "f": timedelta_range("1 days", periods=100), + } + ) + ax = df.plot(kind="box") + assert [x.get_text() for x in ax.get_xticklabels()] == ["b", "c"] + + @pytest.mark.parametrize( + "colors_kwd, expected", + [ + ( + {"boxes": "r", "whiskers": "b", "medians": "g", "caps": "c"}, + {"boxes": "r", "whiskers": "b", "medians": "g", "caps": "c"}, + ), + ({"boxes": "r"}, {"boxes": "r"}), + ("r", {"boxes": "r", "whiskers": "r", "medians": "r", "caps": "r"}), + ], + ) + def test_color_kwd(self, colors_kwd, expected): + # GH: 26214 + df = DataFrame(np.random.rand(10, 2)) + result = df.boxplot(color=colors_kwd, return_type="dict") + for k, v in expected.items(): + assert result[k][0].get_color() == v + + @pytest.mark.parametrize( + "scheme,expected", + [ + ( + "dark_background", + { + "boxes": "#8dd3c7", + "whiskers": "#8dd3c7", + "medians": "#bfbbd9", + "caps": "#8dd3c7", + }, + ), + ( + "default", + { + "boxes": "#1f77b4", + "whiskers": "#1f77b4", + "medians": "#2ca02c", + "caps": "#1f77b4", + }, + ), + ], + ) + def test_colors_in_theme(self, scheme, expected): + # GH: 40769 + df = DataFrame(np.random.rand(10, 2)) + import matplotlib.pyplot as plt + + plt.style.use(scheme) + result = df.plot.box(return_type="dict") + for k, v in expected.items(): + assert result[k][0].get_color() == v + + @pytest.mark.parametrize( + "dict_colors, msg", + [({"boxes": "r", "invalid_key": "r"}, "invalid key 'invalid_key'")], + ) + def test_color_kwd_errors(self, dict_colors, msg): + # GH: 26214 + df = DataFrame(np.random.rand(10, 2)) + with pytest.raises(ValueError, match=msg): + df.boxplot(color=dict_colors, return_type="dict") + + @pytest.mark.parametrize( + "props, expected", + [ + ("boxprops", "boxes"), + ("whiskerprops", "whiskers"), + ("capprops", "caps"), + ("medianprops", "medians"), + ], + ) + def test_specified_props_kwd(self, props, expected): + # GH 30346 + df = DataFrame({k: np.random.random(100) for k in "ABC"}) + kwd = {props: {"color": "C1"}} + result = df.boxplot(return_type="dict", **kwd) + + assert result[expected][0].get_color() == "C1" + + +@td.skip_if_no_mpl +class TestDataFrameGroupByPlots(TestPlotBase): + def test_boxplot_legacy1(self): + grouped = self.hist_df.groupby(by="gender") + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(grouped.boxplot, return_type="axes") + self._check_axes_shape(list(axes.values), axes_num=2, layout=(1, 2)) + axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + def test_boxplot_legacy2(self): + tuples = zip(string.ascii_letters[:10], range(10)) + df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) + grouped = df.groupby(level=1) + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(grouped.boxplot, return_type="axes") + self._check_axes_shape(list(axes.values), axes_num=10, layout=(4, 3)) + + axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + def test_boxplot_legacy3(self): + tuples = zip(string.ascii_letters[:10], range(10)) + df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) + grouped = df.unstack(level=1).groupby(level=0, axis=1) + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(grouped.boxplot, return_type="axes") + self._check_axes_shape(list(axes.values), axes_num=3, layout=(2, 2)) + axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + def test_grouped_plot_fignums(self): + n = 10 + weight = Series(np.random.normal(166, 20, size=n)) + height = Series(np.random.normal(60, 10, size=n)) + with tm.RNGContext(42): + gender = np.random.choice(["male", "female"], size=n) + df = DataFrame({"height": height, "weight": weight, "gender": gender}) + gb = df.groupby("gender") + + res = gb.plot() + assert len(self.plt.get_fignums()) == 2 + assert len(res) == 2 + tm.close() + + res = gb.boxplot(return_type="axes") + assert len(self.plt.get_fignums()) == 1 + assert len(res) == 2 + tm.close() + + # now works with GH 5610 as gender is excluded + res = df.groupby("gender").hist() + tm.close() + + def test_grouped_box_return_type(self): + df = self.hist_df + + # old style: return_type=None + result = df.boxplot(by="gender") + assert isinstance(result, np.ndarray) + self._check_box_return_type( + result, None, expected_keys=["height", "weight", "category"] + ) + + # now for groupby + result = df.groupby("gender").boxplot(return_type="dict") + self._check_box_return_type(result, "dict", expected_keys=["Male", "Female"]) + + columns2 = "X B C D A G Y N Q O".split() + df2 = DataFrame(np.random.randn(50, 10), columns=columns2) + categories2 = "A B C D E F G H I J".split() + df2["category"] = categories2 * 5 + + for t in ["dict", "axes", "both"]: + returned = df.groupby("classroom").boxplot(return_type=t) + self._check_box_return_type(returned, t, expected_keys=["A", "B", "C"]) + + returned = df.boxplot(by="classroom", return_type=t) + self._check_box_return_type( + returned, t, expected_keys=["height", "weight", "category"] + ) + + returned = df2.groupby("category").boxplot(return_type=t) + self._check_box_return_type(returned, t, expected_keys=categories2) + + returned = df2.boxplot(by="category", return_type=t) + self._check_box_return_type(returned, t, expected_keys=columns2) + + def test_grouped_box_layout(self): + df = self.hist_df + + msg = "Layout of 1x1 must be larger than required size 2" + with pytest.raises(ValueError, match=msg): + df.boxplot(column=["weight", "height"], by=df.gender, layout=(1, 1)) + + msg = "The 'layout' keyword is not supported when 'by' is None" + with pytest.raises(ValueError, match=msg): + df.boxplot( + column=["height", "weight", "category"], + layout=(2, 1), + return_type="dict", + ) + + msg = "At least one dimension of layout must be positive" + with pytest.raises(ValueError, match=msg): + df.boxplot(column=["weight", "height"], by=df.gender, layout=(-1, -1)) + + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + box = _check_plot_works( + df.groupby("gender").boxplot, column="height", return_type="dict" + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=2, layout=(1, 2)) + + with tm.assert_produces_warning(UserWarning): + box = _check_plot_works( + df.groupby("category").boxplot, column="height", return_type="dict" + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2)) + + # GH 6769 + with tm.assert_produces_warning(UserWarning): + box = _check_plot_works( + df.groupby("classroom").boxplot, column="height", return_type="dict" + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) + + # GH 5897 + axes = df.boxplot( + column=["height", "weight", "category"], by="gender", return_type="axes" + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) + for ax in [axes["height"]]: + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible([ax.xaxis.get_label()], visible=False) + for ax in [axes["weight"], axes["category"]]: + self._check_visible(ax.get_xticklabels()) + self._check_visible([ax.xaxis.get_label()]) + + box = df.groupby("classroom").boxplot( + column=["height", "weight", "category"], return_type="dict" + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) + + with tm.assert_produces_warning(UserWarning): + box = _check_plot_works( + df.groupby("category").boxplot, + column="height", + layout=(3, 2), + return_type="dict", + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2)) + with tm.assert_produces_warning(UserWarning): + box = _check_plot_works( + df.groupby("category").boxplot, + column="height", + layout=(3, -1), + return_type="dict", + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2)) + + box = df.boxplot( + column=["height", "weight", "category"], by="gender", layout=(4, 1) + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(4, 1)) + + box = df.boxplot( + column=["height", "weight", "category"], by="gender", layout=(-1, 1) + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(3, 1)) + + box = df.groupby("classroom").boxplot( + column=["height", "weight", "category"], layout=(1, 4), return_type="dict" + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 4)) + + box = df.groupby("classroom").boxplot( # noqa + column=["height", "weight", "category"], layout=(1, -1), return_type="dict" + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 3)) + + def test_grouped_box_multiple_axes(self): + # GH 6970, GH 7069 + df = self.hist_df + + # check warning to ignore sharex / sharey + # this check should be done in the first function which + # passes multiple axes to plot, hist or boxplot + # location should be changed if other test is added + # which has earlier alphabetical order + with tm.assert_produces_warning(UserWarning): + fig, axes = self.plt.subplots(2, 2) + df.groupby("category").boxplot(column="height", return_type="axes", ax=axes) + self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2)) + + fig, axes = self.plt.subplots(2, 3) + with tm.assert_produces_warning(UserWarning): + returned = df.boxplot( + column=["height", "weight", "category"], + by="gender", + return_type="axes", + ax=axes[0], + ) + returned = np.array(list(returned.values)) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + tm.assert_numpy_array_equal(returned, axes[0]) + assert returned[0].figure is fig + + # draw on second row + with tm.assert_produces_warning(UserWarning): + returned = df.groupby("classroom").boxplot( + column=["height", "weight", "category"], return_type="axes", ax=axes[1] + ) + returned = np.array(list(returned.values)) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + tm.assert_numpy_array_equal(returned, axes[1]) + assert returned[0].figure is fig + + msg = "The number of passed axes must be 3, the same as the output plot" + with pytest.raises(ValueError, match=msg): + fig, axes = self.plt.subplots(2, 3) + # pass different number of axes from required + with tm.assert_produces_warning(UserWarning): + axes = df.groupby("classroom").boxplot(ax=axes) + + def test_fontsize(self): + df = DataFrame({"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]}) + self._check_ticks_props( + df.boxplot("a", by="b", fontsize=16), xlabelsize=16, ylabelsize=16 + ) + + @pytest.mark.parametrize( + "col, expected_xticklabel", + [ + ("v", ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]), + (["v"], ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]), + ("v1", ["(a, v1)", "(b, v1)", "(c, v1)", "(d, v1)", "(e, v1)"]), + ( + ["v", "v1"], + [ + "(a, v)", + "(a, v1)", + "(b, v)", + "(b, v1)", + "(c, v)", + "(c, v1)", + "(d, v)", + "(d, v1)", + "(e, v)", + "(e, v1)", + ], + ), + ( + None, + [ + "(a, v)", + "(a, v1)", + "(b, v)", + "(b, v1)", + "(c, v)", + "(c, v1)", + "(d, v)", + "(d, v1)", + "(e, v)", + "(e, v1)", + ], + ), + ], + ) + def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel): + # GH 16748 + df = DataFrame( + { + "cat": np.random.choice(list("abcde"), 100), + "v": np.random.rand(100), + "v1": np.random.rand(100), + } + ) + grouped = df.groupby("cat") + + axes = _check_plot_works( + grouped.boxplot, subplots=False, column=col, return_type="axes" + ) + + result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] + assert expected_xticklabel == result_xticklabel + + def test_groupby_boxplot_object(self): + # GH 43480 + df = self.hist_df.astype("object") + grouped = df.groupby("gender") + msg = "boxplot method requires numerical columns, nothing to plot" + with pytest.raises(ValueError, match=msg): + _check_plot_works(grouped.boxplot, subplots=False) + + def test_boxplot_multiindex_column(self): + # GH 16748 + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = list(zip(*arrays)) + index = MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index) + + col = [("bar", "one"), ("bar", "two")] + axes = _check_plot_works(df.boxplot, column=col, return_type="axes") + + expected_xticklabel = ["(bar, one)", "(bar, two)"] + result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] + assert expected_xticklabel == result_xticklabel diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_common.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_common.py new file mode 100644 index 0000000..6eebf0c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_common.py @@ -0,0 +1,44 @@ +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame +from pandas.tests.plotting.common import ( + TestPlotBase, + _check_plot_works, + _gen_two_subplots, +) + +pytestmark = pytest.mark.slow + + +@td.skip_if_no_mpl +class TestCommon(TestPlotBase): + def test__check_ticks_props(self): + # GH 34768 + df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + ax = _check_plot_works(df.plot, rot=30) + ax.yaxis.set_tick_params(rotation=30) + msg = "expected 0.00000 but got " + with pytest.raises(AssertionError, match=msg): + self._check_ticks_props(ax, xrot=0) + with pytest.raises(AssertionError, match=msg): + self._check_ticks_props(ax, xlabelsize=0) + with pytest.raises(AssertionError, match=msg): + self._check_ticks_props(ax, yrot=0) + with pytest.raises(AssertionError, match=msg): + self._check_ticks_props(ax, ylabelsize=0) + + def test__gen_two_subplots_with_ax(self): + fig = self.plt.gcf() + gen = _gen_two_subplots(f=lambda **kwargs: None, fig=fig, ax="test") + # On the first yield, no subplot should be added since ax was passed + next(gen) + assert fig.get_axes() == [] + # On the second, the one axis should match fig.subplot(2, 1, 2) + next(gen) + axes = fig.get_axes() + assert len(axes) == 1 + subplot_geometry = list(axes[0].get_subplotspec().get_geometry()[:-1]) + subplot_geometry[-1] += 1 + assert subplot_geometry == [2, 1, 2] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_converter.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_converter.py new file mode 100644 index 0000000..fe8620e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_converter.py @@ -0,0 +1,384 @@ +from datetime import ( + date, + datetime, +) +import subprocess +import sys + +import numpy as np +import pytest + +import pandas._config.config as cf + +import pandas.util._test_decorators as td + +from pandas import ( + Index, + Period, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + +from pandas.plotting import ( + deregister_matplotlib_converters, + register_matplotlib_converters, +) +from pandas.tseries.offsets import ( + Day, + Micro, + Milli, + Second, +) + +try: + from pandas.plotting._matplotlib import converter +except ImportError: + # try / except, rather than skip, to avoid internal refactoring + # causing an improper skip + pass + +pytest.importorskip("matplotlib.pyplot") +dates = pytest.importorskip("matplotlib.dates") + + +pytestmark = pytest.mark.slow + + +def test_registry_mpl_resets(): + # Check that Matplotlib converters are properly reset (see issue #27481) + code = ( + "import matplotlib.units as units; " + "import matplotlib.dates as mdates; " + "n_conv = len(units.registry); " + "import pandas as pd; " + "pd.plotting.register_matplotlib_converters(); " + "pd.plotting.deregister_matplotlib_converters(); " + "assert len(units.registry) == n_conv" + ) + call = [sys.executable, "-c", code] + subprocess.check_output(call) + + +def test_timtetonum_accepts_unicode(): + assert converter.time2num("00:01") == converter.time2num("00:01") + + +class TestRegistration: + def test_dont_register_by_default(self): + # Run in subprocess to ensure a clean state + code = ( + "import matplotlib.units; " + "import pandas as pd; " + "units = dict(matplotlib.units.registry); " + "assert pd.Timestamp not in units" + ) + call = [sys.executable, "-c", code] + assert subprocess.check_call(call) == 0 + + @td.skip_if_no("matplotlib", min_version="3.1.3") + def test_registering_no_warning(self): + plt = pytest.importorskip("matplotlib.pyplot") + s = Series(range(12), index=date_range("2017", periods=12)) + _, ax = plt.subplots() + + # Set to the "warn" state, in case this isn't the first test run + register_matplotlib_converters() + ax.plot(s.index, s.values) + plt.close() + + def test_pandas_plots_register(self): + plt = pytest.importorskip("matplotlib.pyplot") + s = Series(range(12), index=date_range("2017", periods=12)) + # Set to the "warn" state, in case this isn't the first test run + with tm.assert_produces_warning(None) as w: + s.plot() + + try: + assert len(w) == 0 + finally: + plt.close() + + def test_matplotlib_formatters(self): + units = pytest.importorskip("matplotlib.units") + + # Can't make any assertion about the start state. + # We we check that toggling converters off removes it, and toggling it + # on restores it. + + with cf.option_context("plotting.matplotlib.register_converters", True): + with cf.option_context("plotting.matplotlib.register_converters", False): + assert Timestamp not in units.registry + assert Timestamp in units.registry + + @td.skip_if_no("matplotlib", min_version="3.1.3") + def test_option_no_warning(self): + pytest.importorskip("matplotlib.pyplot") + ctx = cf.option_context("plotting.matplotlib.register_converters", False) + plt = pytest.importorskip("matplotlib.pyplot") + s = Series(range(12), index=date_range("2017", periods=12)) + _, ax = plt.subplots() + + # Test without registering first, no warning + with ctx: + ax.plot(s.index, s.values) + + # Now test with registering + register_matplotlib_converters() + with ctx: + ax.plot(s.index, s.values) + plt.close() + + def test_registry_resets(self): + units = pytest.importorskip("matplotlib.units") + dates = pytest.importorskip("matplotlib.dates") + + # make a copy, to reset to + original = dict(units.registry) + + try: + # get to a known state + units.registry.clear() + date_converter = dates.DateConverter() + units.registry[datetime] = date_converter + units.registry[date] = date_converter + + register_matplotlib_converters() + assert units.registry[date] is not date_converter + deregister_matplotlib_converters() + assert units.registry[date] is date_converter + + finally: + # restore original stater + units.registry.clear() + for k, v in original.items(): + units.registry[k] = v + + +class TestDateTimeConverter: + def setup_method(self, method): + self.dtc = converter.DatetimeConverter() + self.tc = converter.TimeFormatter(None) + + def test_convert_accepts_unicode(self): + r1 = self.dtc.convert("12:22", None, None) + r2 = self.dtc.convert("12:22", None, None) + assert r1 == r2, "DatetimeConverter.convert should accept unicode" + + def test_conversion(self): + rs = self.dtc.convert(["2012-1-1"], None, None)[0] + xp = dates.date2num(datetime(2012, 1, 1)) + assert rs == xp + + rs = self.dtc.convert("2012-1-1", None, None) + assert rs == xp + + rs = self.dtc.convert(date(2012, 1, 1), None, None) + assert rs == xp + + rs = self.dtc.convert("2012-1-1", None, None) + assert rs == xp + + rs = self.dtc.convert(Timestamp("2012-1-1"), None, None) + assert rs == xp + + # also testing datetime64 dtype (GH8614) + rs = self.dtc.convert("2012-01-01", None, None) + assert rs == xp + + rs = self.dtc.convert("2012-01-01 00:00:00+0000", None, None) + assert rs == xp + + rs = self.dtc.convert( + np.array(["2012-01-01 00:00:00+0000", "2012-01-02 00:00:00+0000"]), + None, + None, + ) + assert rs[0] == xp + + # we have a tz-aware date (constructed to that when we turn to utc it + # is the same as our sample) + ts = Timestamp("2012-01-01").tz_localize("UTC").tz_convert("US/Eastern") + rs = self.dtc.convert(ts, None, None) + assert rs == xp + + rs = self.dtc.convert(ts.to_pydatetime(), None, None) + assert rs == xp + + rs = self.dtc.convert(Index([ts - Day(1), ts]), None, None) + assert rs[1] == xp + + rs = self.dtc.convert(Index([ts - Day(1), ts]).to_pydatetime(), None, None) + assert rs[1] == xp + + def test_conversion_float(self): + rtol = 0.5 * 10 ** -9 + + rs = self.dtc.convert(Timestamp("2012-1-1 01:02:03", tz="UTC"), None, None) + xp = converter.dates.date2num(Timestamp("2012-1-1 01:02:03", tz="UTC")) + tm.assert_almost_equal(rs, xp, rtol=rtol) + + rs = self.dtc.convert( + Timestamp("2012-1-1 09:02:03", tz="Asia/Hong_Kong"), None, None + ) + tm.assert_almost_equal(rs, xp, rtol=rtol) + + rs = self.dtc.convert(datetime(2012, 1, 1, 1, 2, 3), None, None) + tm.assert_almost_equal(rs, xp, rtol=rtol) + + def test_conversion_outofbounds_datetime(self): + # 2579 + values = [date(1677, 1, 1), date(1677, 1, 2)] + rs = self.dtc.convert(values, None, None) + xp = converter.dates.date2num(values) + tm.assert_numpy_array_equal(rs, xp) + rs = self.dtc.convert(values[0], None, None) + xp = converter.dates.date2num(values[0]) + assert rs == xp + + values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] + rs = self.dtc.convert(values, None, None) + xp = converter.dates.date2num(values) + tm.assert_numpy_array_equal(rs, xp) + rs = self.dtc.convert(values[0], None, None) + xp = converter.dates.date2num(values[0]) + assert rs == xp + + @pytest.mark.parametrize( + "time,format_expected", + [ + (0, "00:00"), # time2num(datetime.time.min) + (86399.999999, "23:59:59.999999"), # time2num(datetime.time.max) + (90000, "01:00"), + (3723, "01:02:03"), + (39723.2, "11:02:03.200"), + ], + ) + def test_time_formatter(self, time, format_expected): + # issue 18478 + result = self.tc(time) + assert result == format_expected + + def test_dateindex_conversion(self): + rtol = 10 ** -9 + + for freq in ("B", "L", "S"): + dateindex = tm.makeDateIndex(k=10, freq=freq) + rs = self.dtc.convert(dateindex, None, None) + xp = converter.dates.date2num(dateindex._mpl_repr()) + tm.assert_almost_equal(rs, xp, rtol=rtol) + + def test_resolution(self): + def _assert_less(ts1, ts2): + val1 = self.dtc.convert(ts1, None, None) + val2 = self.dtc.convert(ts2, None, None) + if not val1 < val2: + raise AssertionError(f"{val1} is not less than {val2}.") + + # Matplotlib's time representation using floats cannot distinguish + # intervals smaller than ~10 microsecond in the common range of years. + ts = Timestamp("2012-1-1") + _assert_less(ts, ts + Second()) + _assert_less(ts, ts + Milli()) + _assert_less(ts, ts + Micro(50)) + + def test_convert_nested(self): + inner = [Timestamp("2017-01-01"), Timestamp("2017-01-02")] + data = [inner, inner] + result = self.dtc.convert(data, None, None) + expected = [self.dtc.convert(x, None, None) for x in data] + assert (np.array(result) == expected).all() + + +class TestPeriodConverter: + def setup_method(self, method): + self.pc = converter.PeriodConverter() + + class Axis: + pass + + self.axis = Axis() + self.axis.freq = "D" + + def test_convert_accepts_unicode(self): + r1 = self.pc.convert("2012-1-1", None, self.axis) + r2 = self.pc.convert("2012-1-1", None, self.axis) + assert r1 == r2 + + def test_conversion(self): + rs = self.pc.convert(["2012-1-1"], None, self.axis)[0] + xp = Period("2012-1-1").ordinal + assert rs == xp + + rs = self.pc.convert("2012-1-1", None, self.axis) + assert rs == xp + + rs = self.pc.convert([date(2012, 1, 1)], None, self.axis)[0] + assert rs == xp + + rs = self.pc.convert(date(2012, 1, 1), None, self.axis) + assert rs == xp + + rs = self.pc.convert([Timestamp("2012-1-1")], None, self.axis)[0] + assert rs == xp + + rs = self.pc.convert(Timestamp("2012-1-1"), None, self.axis) + assert rs == xp + + rs = self.pc.convert("2012-01-01", None, self.axis) + assert rs == xp + + rs = self.pc.convert("2012-01-01 00:00:00+0000", None, self.axis) + assert rs == xp + + rs = self.pc.convert( + np.array( + ["2012-01-01 00:00:00+0000", "2012-01-02 00:00:00+0000"], + dtype="datetime64[ns]", + ), + None, + self.axis, + ) + assert rs[0] == xp + + def test_integer_passthrough(self): + # GH9012 + rs = self.pc.convert([0, 1], None, self.axis) + xp = [0, 1] + assert rs == xp + + def test_convert_nested(self): + data = ["2012-1-1", "2012-1-2"] + r1 = self.pc.convert([data, data], None, self.axis) + r2 = [self.pc.convert(data, None, self.axis) for _ in range(2)] + assert r1 == r2 + + +class TestTimeDeltaConverter: + """Test timedelta converter""" + + @pytest.mark.parametrize( + "x, decimal, format_expected", + [ + (0.0, 0, "00:00:00"), + (3972320000000, 1, "01:06:12.3"), + (713233432000000, 2, "8 days 06:07:13.43"), + (32423432000000, 4, "09:00:23.4320"), + ], + ) + def test_format_timedelta_ticks(self, x, decimal, format_expected): + tdc = converter.TimeSeries_TimedeltaFormatter + result = tdc.format_timedelta_ticks(x, pos=None, n_decimals=decimal) + assert result == format_expected + + @pytest.mark.parametrize("view_interval", [(1, 2), (2, 1)]) + def test_call_w_different_view_intervals(self, view_interval, monkeypatch): + # previously broke on reversed xlmits; see GH37454 + class mock_axis: + def get_view_interval(self): + return view_interval + + tdc = converter.TimeSeries_TimedeltaFormatter() + monkeypatch.setattr(tdc, "axis", mock_axis()) + tdc(0.0, 0) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_datetimelike.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_datetimelike.py new file mode 100644 index 0000000..8fcd028 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_datetimelike.py @@ -0,0 +1,1528 @@ +""" Test cases for time series specific (freq conversion, etc) """ +from datetime import ( + date, + datetime, + time, + timedelta, +) +import pickle + +import numpy as np +import pytest + +from pandas._libs.tslibs import ( + BaseOffset, + to_offset, +) +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Index, + NaT, + Series, + concat, + isna, + to_datetime, +) +import pandas._testing as tm +from pandas.core.indexes.datetimes import ( + DatetimeIndex, + bdate_range, + date_range, +) +from pandas.core.indexes.period import ( + Period, + PeriodIndex, + period_range, +) +from pandas.core.indexes.timedeltas import timedelta_range +from pandas.tests.plotting.common import TestPlotBase + +from pandas.tseries.offsets import WeekOfMonth + +pytestmark = pytest.mark.slow + + +@td.skip_if_no_mpl +class TestTSPlot(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + + self.freq = ["S", "T", "H", "D", "W", "M", "Q", "A"] + idx = [period_range("12/31/1999", freq=x, periods=100) for x in self.freq] + self.period_ser = [Series(np.random.randn(len(x)), x) for x in idx] + self.period_df = [ + DataFrame(np.random.randn(len(x), 3), index=x, columns=["A", "B", "C"]) + for x in idx + ] + + freq = ["S", "T", "H", "D", "W", "M", "Q-DEC", "A", "1B30Min"] + idx = [date_range("12/31/1999", freq=x, periods=100) for x in freq] + self.datetime_ser = [Series(np.random.randn(len(x)), x) for x in idx] + self.datetime_df = [ + DataFrame(np.random.randn(len(x), 3), index=x, columns=["A", "B", "C"]) + for x in idx + ] + + def teardown_method(self, method): + tm.close() + + def test_ts_plot_with_tz(self, tz_aware_fixture): + # GH2877, GH17173, GH31205, GH31580 + tz = tz_aware_fixture + index = date_range("1/1/2011", periods=2, freq="H", tz=tz) + ts = Series([188.5, 328.25], index=index) + with tm.assert_produces_warning(None): + _check_plot_works(ts.plot) + ax = ts.plot() + xdata = list(ax.get_lines())[0].get_xdata() + # Check first and last points' labels are correct + assert (xdata[0].hour, xdata[0].minute) == (0, 0) + assert (xdata[-1].hour, xdata[-1].minute) == (1, 0) + + def test_fontsize_set_correctly(self): + # For issue #8765 + df = DataFrame(np.random.randn(10, 9), index=range(10)) + fig, ax = self.plt.subplots() + df.plot(fontsize=2, ax=ax) + for label in ax.get_xticklabels() + ax.get_yticklabels(): + assert label.get_fontsize() == 2 + + def test_frame_inferred(self): + # inferred freq + idx = date_range("1/1/1987", freq="MS", periods=100) + idx = DatetimeIndex(idx.values, freq=None) + + df = DataFrame(np.random.randn(len(idx), 3), index=idx) + _check_plot_works(df.plot) + + # axes freq + idx = idx[0:40].union(idx[45:99]) + df2 = DataFrame(np.random.randn(len(idx), 3), index=idx) + _check_plot_works(df2.plot) + + # N > 1 + idx = date_range("2008-1-1 00:15:00", freq="15T", periods=10) + idx = DatetimeIndex(idx.values, freq=None) + df = DataFrame(np.random.randn(len(idx), 3), index=idx) + _check_plot_works(df.plot) + + def test_is_error_nozeroindex(self): + # GH11858 + i = np.array([1, 2, 3]) + a = DataFrame(i, index=i) + _check_plot_works(a.plot, xerr=a) + _check_plot_works(a.plot, yerr=a) + + def test_nonnumeric_exclude(self): + idx = date_range("1/1/1987", freq="A", periods=3) + df = DataFrame({"A": ["x", "y", "z"], "B": [1, 2, 3]}, idx) + + fig, ax = self.plt.subplots() + df.plot(ax=ax) # it works + assert len(ax.get_lines()) == 1 # B was plotted + self.plt.close(fig) + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + df["A"].plot() + + def test_tsplot(self): + + _, ax = self.plt.subplots() + ts = tm.makeTimeSeries() + + for s in self.period_ser: + _check_plot_works(s.plot, ax=ax) + + for s in self.datetime_ser: + _check_plot_works(s.plot, ax=ax) + + _, ax = self.plt.subplots() + ts.plot(style="k", ax=ax) + color = (0.0, 0.0, 0.0, 1) + assert color == ax.get_lines()[0].get_color() + + def test_both_style_and_color(self): + + ts = tm.makeTimeSeries() + msg = ( + "Cannot pass 'style' string with a color symbol and 'color' " + "keyword argument. Please use one or the other or pass 'style' " + "without a color symbol" + ) + with pytest.raises(ValueError, match=msg): + ts.plot(style="b-", color="#000099") + + s = ts.reset_index(drop=True) + with pytest.raises(ValueError, match=msg): + s.plot(style="b-", color="#000099") + + def test_high_freq(self): + freaks = ["ms", "us"] + for freq in freaks: + _, ax = self.plt.subplots() + rng = date_range("1/1/2012", periods=100, freq=freq) + ser = Series(np.random.randn(len(rng)), rng) + _check_plot_works(ser.plot, ax=ax) + + def test_get_datevalue(self): + from pandas.plotting._matplotlib.converter import get_datevalue + + assert get_datevalue(None, "D") is None + assert get_datevalue(1987, "A") == 1987 + assert get_datevalue(Period(1987, "A"), "M") == Period("1987-12", "M").ordinal + assert get_datevalue("1/1/1987", "D") == Period("1987-1-1", "D").ordinal + + def test_ts_plot_format_coord(self): + def check_format_of_first_point(ax, expected_string): + first_line = ax.get_lines()[0] + first_x = first_line.get_xdata()[0].ordinal + first_y = first_line.get_ydata()[0] + assert expected_string == ax.format_coord(first_x, first_y) + + annual = Series(1, index=date_range("2014-01-01", periods=3, freq="A-DEC")) + _, ax = self.plt.subplots() + annual.plot(ax=ax) + check_format_of_first_point(ax, "t = 2014 y = 1.000000") + + # note this is added to the annual plot already in existence, and + # changes its freq field + daily = Series(1, index=date_range("2014-01-01", periods=3, freq="D")) + daily.plot(ax=ax) + check_format_of_first_point(ax, "t = 2014-01-01 y = 1.000000") + tm.close() + + def test_line_plot_period_series(self): + for s in self.period_ser: + _check_plot_works(s.plot, s.index.freq) + + @pytest.mark.parametrize( + "frqncy", ["1S", "3S", "5T", "7H", "4D", "8W", "11M", "3A"] + ) + def test_line_plot_period_mlt_series(self, frqncy): + # test period index line plot for series with multiples (`mlt`) of the + # frequency (`frqncy`) rule code. tests resolution of issue #14763 + idx = period_range("12/31/1999", freq=frqncy, periods=100) + s = Series(np.random.randn(len(idx)), idx) + _check_plot_works(s.plot, s.index.freq.rule_code) + + def test_line_plot_datetime_series(self): + for s in self.datetime_ser: + _check_plot_works(s.plot, s.index.freq.rule_code) + + def test_line_plot_period_frame(self): + for df in self.period_df: + _check_plot_works(df.plot, df.index.freq) + + @pytest.mark.parametrize( + "frqncy", ["1S", "3S", "5T", "7H", "4D", "8W", "11M", "3A"] + ) + def test_line_plot_period_mlt_frame(self, frqncy): + # test period index line plot for DataFrames with multiples (`mlt`) + # of the frequency (`frqncy`) rule code. tests resolution of issue + # #14763 + idx = period_range("12/31/1999", freq=frqncy, periods=100) + df = DataFrame(np.random.randn(len(idx), 3), index=idx, columns=["A", "B", "C"]) + freq = df.index.asfreq(df.index.freq.rule_code).freq + _check_plot_works(df.plot, freq) + + def test_line_plot_datetime_frame(self): + for df in self.datetime_df: + freq = df.index.to_period(df.index.freq.rule_code).freq + _check_plot_works(df.plot, freq) + + def test_line_plot_inferred_freq(self): + for ser in self.datetime_ser: + ser = Series(ser.values, Index(np.asarray(ser.index))) + _check_plot_works(ser.plot, ser.index.inferred_freq) + + ser = ser[[0, 3, 5, 6]] + _check_plot_works(ser.plot) + + def test_fake_inferred_business(self): + _, ax = self.plt.subplots() + rng = date_range("2001-1-1", "2001-1-10") + ts = Series(range(len(rng)), index=rng) + ts = concat([ts[:3], ts[5:]]) + ts.plot(ax=ax) + assert not hasattr(ax, "freq") + + def test_plot_offset_freq(self): + ser = tm.makeTimeSeries() + _check_plot_works(ser.plot) + + dr = date_range(ser.index[0], freq="BQS", periods=10) + ser = Series(np.random.randn(len(dr)), index=dr) + _check_plot_works(ser.plot) + + def test_plot_multiple_inferred_freq(self): + dr = Index([datetime(2000, 1, 1), datetime(2000, 1, 6), datetime(2000, 1, 11)]) + ser = Series(np.random.randn(len(dr)), index=dr) + _check_plot_works(ser.plot) + + def test_uhf(self): + import pandas.plotting._matplotlib.converter as conv + + idx = date_range("2012-6-22 21:59:51.960928", freq="L", periods=500) + df = DataFrame(np.random.randn(len(idx), 2), index=idx) + + _, ax = self.plt.subplots() + df.plot(ax=ax) + axis = ax.get_xaxis() + + tlocs = axis.get_ticklocs() + tlabels = axis.get_ticklabels() + for loc, label in zip(tlocs, tlabels): + xp = conv._from_ordinal(loc).strftime("%H:%M:%S.%f") + rs = str(label.get_text()) + if len(rs): + assert xp == rs + + def test_irreg_hf(self): + idx = date_range("2012-6-22 21:59:51", freq="S", periods=100) + df = DataFrame(np.random.randn(len(idx), 2), index=idx) + + irreg = df.iloc[[0, 1, 3, 4]] + _, ax = self.plt.subplots() + irreg.plot(ax=ax) + diffs = Series(ax.get_lines()[0].get_xydata()[:, 0]).diff() + + sec = 1.0 / 24 / 60 / 60 + assert (np.fabs(diffs[1:] - [sec, sec * 2, sec]) < 1e-8).all() + + _, ax = self.plt.subplots() + df2 = df.copy() + df2.index = df.index.astype(object) + df2.plot(ax=ax) + diffs = Series(ax.get_lines()[0].get_xydata()[:, 0]).diff() + assert (np.fabs(diffs[1:] - sec) < 1e-8).all() + + def test_irregular_datetime64_repr_bug(self): + ser = tm.makeTimeSeries() + ser = ser[[0, 1, 2, 7]] + + _, ax = self.plt.subplots() + + ret = ser.plot(ax=ax) + assert ret is not None + + for rs, xp in zip(ax.get_lines()[0].get_xdata(), ser.index): + assert rs == xp + + def test_business_freq(self): + bts = tm.makePeriodSeries() + _, ax = self.plt.subplots() + bts.plot(ax=ax) + assert ax.get_lines()[0].get_xydata()[0, 0] == bts.index[0].ordinal + idx = ax.get_lines()[0].get_xdata() + assert PeriodIndex(data=idx).freqstr == "B" + + def test_business_freq_convert(self): + bts = tm.makeTimeSeries(300).asfreq("BM") + ts = bts.to_period("M") + _, ax = self.plt.subplots() + bts.plot(ax=ax) + assert ax.get_lines()[0].get_xydata()[0, 0] == ts.index[0].ordinal + idx = ax.get_lines()[0].get_xdata() + assert PeriodIndex(data=idx).freqstr == "M" + + def test_freq_with_no_period_alias(self): + # GH34487 + freq = WeekOfMonth() + bts = tm.makeTimeSeries(5).asfreq(freq) + _, ax = self.plt.subplots() + bts.plot(ax=ax) + + idx = ax.get_lines()[0].get_xdata() + msg = "freq not specified and cannot be inferred" + with pytest.raises(ValueError, match=msg): + PeriodIndex(data=idx) + + def test_nonzero_base(self): + # GH2571 + idx = date_range("2012-12-20", periods=24, freq="H") + timedelta(minutes=30) + df = DataFrame(np.arange(24), index=idx) + _, ax = self.plt.subplots() + df.plot(ax=ax) + rs = ax.get_lines()[0].get_xdata() + assert not Index(rs).is_normalized + + def test_dataframe(self): + bts = DataFrame({"a": tm.makeTimeSeries()}) + _, ax = self.plt.subplots() + bts.plot(ax=ax) + idx = ax.get_lines()[0].get_xdata() + tm.assert_index_equal(bts.index.to_period(), PeriodIndex(idx)) + + def test_axis_limits(self): + def _test(ax): + xlim = ax.get_xlim() + ax.set_xlim(xlim[0] - 5, xlim[1] + 10) + result = ax.get_xlim() + assert result[0] == xlim[0] - 5 + assert result[1] == xlim[1] + 10 + + # string + expected = (Period("1/1/2000", ax.freq), Period("4/1/2000", ax.freq)) + ax.set_xlim("1/1/2000", "4/1/2000") + result = ax.get_xlim() + assert int(result[0]) == expected[0].ordinal + assert int(result[1]) == expected[1].ordinal + + # datetime + expected = (Period("1/1/2000", ax.freq), Period("4/1/2000", ax.freq)) + ax.set_xlim(datetime(2000, 1, 1), datetime(2000, 4, 1)) + result = ax.get_xlim() + assert int(result[0]) == expected[0].ordinal + assert int(result[1]) == expected[1].ordinal + fig = ax.get_figure() + self.plt.close(fig) + + ser = tm.makeTimeSeries() + _, ax = self.plt.subplots() + ser.plot(ax=ax) + _test(ax) + + _, ax = self.plt.subplots() + df = DataFrame({"a": ser, "b": ser + 1}) + df.plot(ax=ax) + _test(ax) + + df = DataFrame({"a": ser, "b": ser + 1}) + axes = df.plot(subplots=True) + + for ax in axes: + _test(ax) + + def test_get_finder(self): + import pandas.plotting._matplotlib.converter as conv + + assert conv.get_finder(to_offset("B")) == conv._daily_finder + assert conv.get_finder(to_offset("D")) == conv._daily_finder + assert conv.get_finder(to_offset("M")) == conv._monthly_finder + assert conv.get_finder(to_offset("Q")) == conv._quarterly_finder + assert conv.get_finder(to_offset("A")) == conv._annual_finder + assert conv.get_finder(to_offset("W")) == conv._daily_finder + + def test_finder_daily(self): + day_lst = [10, 40, 252, 400, 950, 2750, 10000] + + xpl1 = xpl2 = [Period("1999-1-1", freq="B").ordinal] * len(day_lst) + rs1 = [] + rs2 = [] + for n in day_lst: + rng = bdate_range("1999-1-1", periods=n) + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ser.plot(ax=ax) + xaxis = ax.get_xaxis() + rs1.append(xaxis.get_majorticklocs()[0]) + + vmin, vmax = ax.get_xlim() + ax.set_xlim(vmin + 0.9, vmax) + rs2.append(xaxis.get_majorticklocs()[0]) + self.plt.close(ax.get_figure()) + + assert rs1 == xpl1 + assert rs2 == xpl2 + + def test_finder_quarterly(self): + yrs = [3.5, 11] + + xpl1 = xpl2 = [Period("1988Q1").ordinal] * len(yrs) + rs1 = [] + rs2 = [] + for n in yrs: + rng = period_range("1987Q2", periods=int(n * 4), freq="Q") + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ser.plot(ax=ax) + xaxis = ax.get_xaxis() + rs1.append(xaxis.get_majorticklocs()[0]) + + (vmin, vmax) = ax.get_xlim() + ax.set_xlim(vmin + 0.9, vmax) + rs2.append(xaxis.get_majorticklocs()[0]) + self.plt.close(ax.get_figure()) + + assert rs1 == xpl1 + assert rs2 == xpl2 + + def test_finder_monthly(self): + yrs = [1.15, 2.5, 4, 11] + + xpl1 = xpl2 = [Period("Jan 1988").ordinal] * len(yrs) + rs1 = [] + rs2 = [] + for n in yrs: + rng = period_range("1987Q2", periods=int(n * 12), freq="M") + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ser.plot(ax=ax) + xaxis = ax.get_xaxis() + rs1.append(xaxis.get_majorticklocs()[0]) + + vmin, vmax = ax.get_xlim() + ax.set_xlim(vmin + 0.9, vmax) + rs2.append(xaxis.get_majorticklocs()[0]) + self.plt.close(ax.get_figure()) + + assert rs1 == xpl1 + assert rs2 == xpl2 + + def test_finder_monthly_long(self): + rng = period_range("1988Q1", periods=24 * 12, freq="M") + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ser.plot(ax=ax) + xaxis = ax.get_xaxis() + rs = xaxis.get_majorticklocs()[0] + xp = Period("1989Q1", "M").ordinal + assert rs == xp + + def test_finder_annual(self): + xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170] + xp = [Period(x, freq="A").ordinal for x in xp] + rs = [] + for nyears in [5, 10, 19, 49, 99, 199, 599, 1001]: + rng = period_range("1987", periods=nyears, freq="A") + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ser.plot(ax=ax) + xaxis = ax.get_xaxis() + rs.append(xaxis.get_majorticklocs()[0]) + self.plt.close(ax.get_figure()) + + assert rs == xp + + def test_finder_minutely(self): + nminutes = 50 * 24 * 60 + rng = date_range("1/1/1999", freq="Min", periods=nminutes) + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ser.plot(ax=ax) + xaxis = ax.get_xaxis() + rs = xaxis.get_majorticklocs()[0] + xp = Period("1/1/1999", freq="Min").ordinal + + assert rs == xp + + def test_finder_hourly(self): + nhours = 23 + rng = date_range("1/1/1999", freq="H", periods=nhours) + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ser.plot(ax=ax) + xaxis = ax.get_xaxis() + rs = xaxis.get_majorticklocs()[0] + xp = Period("1/1/1999", freq="H").ordinal + + assert rs == xp + + def test_gaps(self): + ts = tm.makeTimeSeries() + ts[5:25] = np.nan + _, ax = self.plt.subplots() + ts.plot(ax=ax) + lines = ax.get_lines() + assert len(lines) == 1 + line = lines[0] + data = line.get_xydata() + + if self.mpl_ge_3_0_0 or not self.mpl_ge_2_2_3: + data = np.ma.MaskedArray(data, mask=isna(data), fill_value=np.nan) + + assert isinstance(data, np.ma.core.MaskedArray) + mask = data.mask + assert mask[5:25, 1].all() + self.plt.close(ax.get_figure()) + + # irregular + ts = tm.makeTimeSeries() + ts = ts[[0, 1, 2, 5, 7, 9, 12, 15, 20]] + ts[2:5] = np.nan + _, ax = self.plt.subplots() + ax = ts.plot(ax=ax) + lines = ax.get_lines() + assert len(lines) == 1 + line = lines[0] + data = line.get_xydata() + + if self.mpl_ge_3_0_0 or not self.mpl_ge_2_2_3: + data = np.ma.MaskedArray(data, mask=isna(data), fill_value=np.nan) + + assert isinstance(data, np.ma.core.MaskedArray) + mask = data.mask + assert mask[2:5, 1].all() + self.plt.close(ax.get_figure()) + + # non-ts + idx = [0, 1, 2, 5, 7, 9, 12, 15, 20] + ser = Series(np.random.randn(len(idx)), idx) + ser[2:5] = np.nan + _, ax = self.plt.subplots() + ser.plot(ax=ax) + lines = ax.get_lines() + assert len(lines) == 1 + line = lines[0] + data = line.get_xydata() + if self.mpl_ge_3_0_0 or not self.mpl_ge_2_2_3: + data = np.ma.MaskedArray(data, mask=isna(data), fill_value=np.nan) + + assert isinstance(data, np.ma.core.MaskedArray) + mask = data.mask + assert mask[2:5, 1].all() + + def test_gap_upsample(self): + low = tm.makeTimeSeries() + low[5:25] = np.nan + _, ax = self.plt.subplots() + low.plot(ax=ax) + + idxh = date_range(low.index[0], low.index[-1], freq="12h") + s = Series(np.random.randn(len(idxh)), idxh) + s.plot(secondary_y=True) + lines = ax.get_lines() + assert len(lines) == 1 + assert len(ax.right_ax.get_lines()) == 1 + + line = lines[0] + data = line.get_xydata() + if self.mpl_ge_3_0_0 or not self.mpl_ge_2_2_3: + data = np.ma.MaskedArray(data, mask=isna(data), fill_value=np.nan) + + assert isinstance(data, np.ma.core.MaskedArray) + mask = data.mask + assert mask[5:25, 1].all() + + def test_secondary_y(self): + ser = Series(np.random.randn(10)) + ser2 = Series(np.random.randn(10)) + fig, _ = self.plt.subplots() + ax = ser.plot(secondary_y=True) + assert hasattr(ax, "left_ax") + assert not hasattr(ax, "right_ax") + axes = fig.get_axes() + line = ax.get_lines()[0] + xp = Series(line.get_ydata(), line.get_xdata()) + tm.assert_series_equal(ser, xp) + assert ax.get_yaxis().get_ticks_position() == "right" + assert not axes[0].get_yaxis().get_visible() + self.plt.close(fig) + + _, ax2 = self.plt.subplots() + ser2.plot(ax=ax2) + assert ax2.get_yaxis().get_ticks_position() == self.default_tick_position + self.plt.close(ax2.get_figure()) + + ax = ser2.plot() + ax2 = ser.plot(secondary_y=True) + assert ax.get_yaxis().get_visible() + assert not hasattr(ax, "left_ax") + assert hasattr(ax, "right_ax") + assert hasattr(ax2, "left_ax") + assert not hasattr(ax2, "right_ax") + + def test_secondary_y_ts(self): + idx = date_range("1/1/2000", periods=10) + ser = Series(np.random.randn(10), idx) + ser2 = Series(np.random.randn(10), idx) + fig, _ = self.plt.subplots() + ax = ser.plot(secondary_y=True) + assert hasattr(ax, "left_ax") + assert not hasattr(ax, "right_ax") + axes = fig.get_axes() + line = ax.get_lines()[0] + xp = Series(line.get_ydata(), line.get_xdata()).to_timestamp() + tm.assert_series_equal(ser, xp) + assert ax.get_yaxis().get_ticks_position() == "right" + assert not axes[0].get_yaxis().get_visible() + self.plt.close(fig) + + _, ax2 = self.plt.subplots() + ser2.plot(ax=ax2) + assert ax2.get_yaxis().get_ticks_position() == self.default_tick_position + self.plt.close(ax2.get_figure()) + + ax = ser2.plot() + ax2 = ser.plot(secondary_y=True) + assert ax.get_yaxis().get_visible() + + @td.skip_if_no_scipy + def test_secondary_kde(self): + + ser = Series(np.random.randn(10)) + fig, ax = self.plt.subplots() + ax = ser.plot(secondary_y=True, kind="density", ax=ax) + assert hasattr(ax, "left_ax") + assert not hasattr(ax, "right_ax") + axes = fig.get_axes() + assert axes[1].get_yaxis().get_ticks_position() == "right" + + def test_secondary_bar(self): + ser = Series(np.random.randn(10)) + fig, ax = self.plt.subplots() + ser.plot(secondary_y=True, kind="bar", ax=ax) + axes = fig.get_axes() + assert axes[1].get_yaxis().get_ticks_position() == "right" + + def test_secondary_frame(self): + df = DataFrame(np.random.randn(5, 3), columns=["a", "b", "c"]) + axes = df.plot(secondary_y=["a", "c"], subplots=True) + assert axes[0].get_yaxis().get_ticks_position() == "right" + assert axes[1].get_yaxis().get_ticks_position() == self.default_tick_position + assert axes[2].get_yaxis().get_ticks_position() == "right" + + def test_secondary_bar_frame(self): + df = DataFrame(np.random.randn(5, 3), columns=["a", "b", "c"]) + axes = df.plot(kind="bar", secondary_y=["a", "c"], subplots=True) + assert axes[0].get_yaxis().get_ticks_position() == "right" + assert axes[1].get_yaxis().get_ticks_position() == self.default_tick_position + assert axes[2].get_yaxis().get_ticks_position() == "right" + + def test_mixed_freq_regular_first(self): + # TODO + s1 = tm.makeTimeSeries() + s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]] + + # it works! + _, ax = self.plt.subplots() + s1.plot(ax=ax) + + ax2 = s2.plot(style="g", ax=ax) + lines = ax2.get_lines() + idx1 = PeriodIndex(lines[0].get_xdata()) + idx2 = PeriodIndex(lines[1].get_xdata()) + + tm.assert_index_equal(idx1, s1.index.to_period("B")) + tm.assert_index_equal(idx2, s2.index.to_period("B")) + + left, right = ax2.get_xlim() + pidx = s1.index.to_period() + assert left <= pidx[0].ordinal + assert right >= pidx[-1].ordinal + + def test_mixed_freq_irregular_first(self): + s1 = tm.makeTimeSeries() + s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]] + _, ax = self.plt.subplots() + s2.plot(style="g", ax=ax) + s1.plot(ax=ax) + assert not hasattr(ax, "freq") + lines = ax.get_lines() + x1 = lines[0].get_xdata() + tm.assert_numpy_array_equal(x1, s2.index.astype(object).values) + x2 = lines[1].get_xdata() + tm.assert_numpy_array_equal(x2, s1.index.astype(object).values) + + def test_mixed_freq_regular_first_df(self): + # GH 9852 + s1 = tm.makeTimeSeries().to_frame() + s2 = s1.iloc[[0, 5, 10, 11, 12, 13, 14, 15], :] + _, ax = self.plt.subplots() + s1.plot(ax=ax) + ax2 = s2.plot(style="g", ax=ax) + lines = ax2.get_lines() + idx1 = PeriodIndex(lines[0].get_xdata()) + idx2 = PeriodIndex(lines[1].get_xdata()) + assert idx1.equals(s1.index.to_period("B")) + assert idx2.equals(s2.index.to_period("B")) + left, right = ax2.get_xlim() + pidx = s1.index.to_period() + assert left <= pidx[0].ordinal + assert right >= pidx[-1].ordinal + + def test_mixed_freq_irregular_first_df(self): + # GH 9852 + s1 = tm.makeTimeSeries().to_frame() + s2 = s1.iloc[[0, 5, 10, 11, 12, 13, 14, 15], :] + _, ax = self.plt.subplots() + s2.plot(style="g", ax=ax) + s1.plot(ax=ax) + assert not hasattr(ax, "freq") + lines = ax.get_lines() + x1 = lines[0].get_xdata() + tm.assert_numpy_array_equal(x1, s2.index.astype(object).values) + x2 = lines[1].get_xdata() + tm.assert_numpy_array_equal(x2, s1.index.astype(object).values) + + def test_mixed_freq_hf_first(self): + idxh = date_range("1/1/1999", periods=365, freq="D") + idxl = date_range("1/1/1999", periods=12, freq="M") + high = Series(np.random.randn(len(idxh)), idxh) + low = Series(np.random.randn(len(idxl)), idxl) + _, ax = self.plt.subplots() + high.plot(ax=ax) + low.plot(ax=ax) + for line in ax.get_lines(): + assert PeriodIndex(data=line.get_xdata()).freq == "D" + + def test_mixed_freq_alignment(self): + ts_ind = date_range("2012-01-01 13:00", "2012-01-02", freq="H") + ts_data = np.random.randn(12) + + ts = Series(ts_data, index=ts_ind) + ts2 = ts.asfreq("T").interpolate() + + _, ax = self.plt.subplots() + ax = ts.plot(ax=ax) + ts2.plot(style="r", ax=ax) + + assert ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0] + + def test_mixed_freq_lf_first(self): + + idxh = date_range("1/1/1999", periods=365, freq="D") + idxl = date_range("1/1/1999", periods=12, freq="M") + high = Series(np.random.randn(len(idxh)), idxh) + low = Series(np.random.randn(len(idxl)), idxl) + _, ax = self.plt.subplots() + low.plot(legend=True, ax=ax) + high.plot(legend=True, ax=ax) + for line in ax.get_lines(): + assert PeriodIndex(data=line.get_xdata()).freq == "D" + leg = ax.get_legend() + assert len(leg.texts) == 2 + self.plt.close(ax.get_figure()) + + idxh = date_range("1/1/1999", periods=240, freq="T") + idxl = date_range("1/1/1999", periods=4, freq="H") + high = Series(np.random.randn(len(idxh)), idxh) + low = Series(np.random.randn(len(idxl)), idxl) + _, ax = self.plt.subplots() + low.plot(ax=ax) + high.plot(ax=ax) + for line in ax.get_lines(): + assert PeriodIndex(data=line.get_xdata()).freq == "T" + + def test_mixed_freq_irreg_period(self): + ts = tm.makeTimeSeries() + irreg = ts[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 18, 29]] + rng = period_range("1/3/2000", periods=30, freq="B") + ps = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + irreg.plot(ax=ax) + ps.plot(ax=ax) + + def test_mixed_freq_shared_ax(self): + + # GH13341, using sharex=True + idx1 = date_range("2015-01-01", periods=3, freq="M") + idx2 = idx1[:1].union(idx1[2:]) + s1 = Series(range(len(idx1)), idx1) + s2 = Series(range(len(idx2)), idx2) + + fig, (ax1, ax2) = self.plt.subplots(nrows=2, sharex=True) + s1.plot(ax=ax1) + s2.plot(ax=ax2) + + assert ax1.freq == "M" + assert ax2.freq == "M" + assert ax1.lines[0].get_xydata()[0, 0] == ax2.lines[0].get_xydata()[0, 0] + + # using twinx + fig, ax1 = self.plt.subplots() + ax2 = ax1.twinx() + s1.plot(ax=ax1) + s2.plot(ax=ax2) + + assert ax1.lines[0].get_xydata()[0, 0] == ax2.lines[0].get_xydata()[0, 0] + + # TODO (GH14330, GH14322) + # plotting the irregular first does not yet work + # fig, ax1 = plt.subplots() + # ax2 = ax1.twinx() + # s2.plot(ax=ax1) + # s1.plot(ax=ax2) + # assert (ax1.lines[0].get_xydata()[0, 0] == + # ax2.lines[0].get_xydata()[0, 0]) + + def test_nat_handling(self): + + _, ax = self.plt.subplots() + + dti = DatetimeIndex(["2015-01-01", NaT, "2015-01-03"]) + s = Series(range(len(dti)), dti) + s.plot(ax=ax) + xdata = ax.get_lines()[0].get_xdata() + # plot x data is bounded by index values + assert s.index.min() <= Series(xdata).min() + assert Series(xdata).max() <= s.index.max() + + def test_to_weekly_resampling(self): + idxh = date_range("1/1/1999", periods=52, freq="W") + idxl = date_range("1/1/1999", periods=12, freq="M") + high = Series(np.random.randn(len(idxh)), idxh) + low = Series(np.random.randn(len(idxl)), idxl) + _, ax = self.plt.subplots() + high.plot(ax=ax) + low.plot(ax=ax) + for line in ax.get_lines(): + assert PeriodIndex(data=line.get_xdata()).freq == idxh.freq + + def test_from_weekly_resampling(self): + idxh = date_range("1/1/1999", periods=52, freq="W") + idxl = date_range("1/1/1999", periods=12, freq="M") + high = Series(np.random.randn(len(idxh)), idxh) + low = Series(np.random.randn(len(idxl)), idxl) + _, ax = self.plt.subplots() + low.plot(ax=ax) + high.plot(ax=ax) + + expected_h = idxh.to_period().asi8.astype(np.float64) + expected_l = np.array( + [1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562], + dtype=np.float64, + ) + for line in ax.get_lines(): + assert PeriodIndex(data=line.get_xdata()).freq == idxh.freq + xdata = line.get_xdata(orig=False) + if len(xdata) == 12: # idxl lines + tm.assert_numpy_array_equal(xdata, expected_l) + else: + tm.assert_numpy_array_equal(xdata, expected_h) + tm.close() + + def test_from_resampling_area_line_mixed(self): + idxh = date_range("1/1/1999", periods=52, freq="W") + idxl = date_range("1/1/1999", periods=12, freq="M") + high = DataFrame(np.random.rand(len(idxh), 3), index=idxh, columns=[0, 1, 2]) + low = DataFrame(np.random.rand(len(idxl), 3), index=idxl, columns=[0, 1, 2]) + + # low to high + for kind1, kind2 in [("line", "area"), ("area", "line")]: + _, ax = self.plt.subplots() + low.plot(kind=kind1, stacked=True, ax=ax) + high.plot(kind=kind2, stacked=True, ax=ax) + + # check low dataframe result + expected_x = np.array( + [ + 1514, + 1519, + 1523, + 1527, + 1531, + 1536, + 1540, + 1544, + 1549, + 1553, + 1558, + 1562, + ], + dtype=np.float64, + ) + expected_y = np.zeros(len(expected_x), dtype=np.float64) + for i in range(3): + line = ax.lines[i] + assert PeriodIndex(line.get_xdata()).freq == idxh.freq + tm.assert_numpy_array_equal(line.get_xdata(orig=False), expected_x) + # check stacked values are correct + expected_y += low[i].values + tm.assert_numpy_array_equal(line.get_ydata(orig=False), expected_y) + + # check high dataframe result + expected_x = idxh.to_period().asi8.astype(np.float64) + expected_y = np.zeros(len(expected_x), dtype=np.float64) + for i in range(3): + line = ax.lines[3 + i] + assert PeriodIndex(data=line.get_xdata()).freq == idxh.freq + tm.assert_numpy_array_equal(line.get_xdata(orig=False), expected_x) + expected_y += high[i].values + tm.assert_numpy_array_equal(line.get_ydata(orig=False), expected_y) + + # high to low + for kind1, kind2 in [("line", "area"), ("area", "line")]: + _, ax = self.plt.subplots() + high.plot(kind=kind1, stacked=True, ax=ax) + low.plot(kind=kind2, stacked=True, ax=ax) + + # check high dataframe result + expected_x = idxh.to_period().asi8.astype(np.float64) + expected_y = np.zeros(len(expected_x), dtype=np.float64) + for i in range(3): + line = ax.lines[i] + assert PeriodIndex(data=line.get_xdata()).freq == idxh.freq + tm.assert_numpy_array_equal(line.get_xdata(orig=False), expected_x) + expected_y += high[i].values + tm.assert_numpy_array_equal(line.get_ydata(orig=False), expected_y) + + # check low dataframe result + expected_x = np.array( + [ + 1514, + 1519, + 1523, + 1527, + 1531, + 1536, + 1540, + 1544, + 1549, + 1553, + 1558, + 1562, + ], + dtype=np.float64, + ) + expected_y = np.zeros(len(expected_x), dtype=np.float64) + for i in range(3): + lines = ax.lines[3 + i] + assert PeriodIndex(data=lines.get_xdata()).freq == idxh.freq + tm.assert_numpy_array_equal(lines.get_xdata(orig=False), expected_x) + expected_y += low[i].values + tm.assert_numpy_array_equal(lines.get_ydata(orig=False), expected_y) + + def test_mixed_freq_second_millisecond(self): + # GH 7772, GH 7760 + idxh = date_range("2014-07-01 09:00", freq="S", periods=50) + idxl = date_range("2014-07-01 09:00", freq="100L", periods=500) + high = Series(np.random.randn(len(idxh)), idxh) + low = Series(np.random.randn(len(idxl)), idxl) + # high to low + _, ax = self.plt.subplots() + high.plot(ax=ax) + low.plot(ax=ax) + assert len(ax.get_lines()) == 2 + for line in ax.get_lines(): + assert PeriodIndex(data=line.get_xdata()).freq == "L" + tm.close() + + # low to high + _, ax = self.plt.subplots() + low.plot(ax=ax) + high.plot(ax=ax) + assert len(ax.get_lines()) == 2 + for line in ax.get_lines(): + assert PeriodIndex(data=line.get_xdata()).freq == "L" + + def test_irreg_dtypes(self): + # date + idx = [date(2000, 1, 1), date(2000, 1, 5), date(2000, 1, 20)] + df = DataFrame(np.random.randn(len(idx), 3), Index(idx, dtype=object)) + _check_plot_works(df.plot) + + # np.datetime64 + idx = date_range("1/1/2000", periods=10) + idx = idx[[0, 2, 5, 9]].astype(object) + df = DataFrame(np.random.randn(len(idx), 3), idx) + _, ax = self.plt.subplots() + _check_plot_works(df.plot, ax=ax) + + def test_time(self): + t = datetime(1, 1, 1, 3, 30, 0) + deltas = np.random.randint(1, 20, 3).cumsum() + ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]) + df = DataFrame( + {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts + ) + fig, ax = self.plt.subplots() + df.plot(ax=ax) + + # verify tick labels + ticks = ax.get_xticks() + labels = ax.get_xticklabels() + for t, l in zip(ticks, labels): + m, s = divmod(int(t), 60) + h, m = divmod(m, 60) + rs = l.get_text() + if len(rs) > 0: + if s != 0: + xp = time(h, m, s).strftime("%H:%M:%S") + else: + xp = time(h, m, s).strftime("%H:%M") + assert xp == rs + + def test_time_change_xlim(self): + t = datetime(1, 1, 1, 3, 30, 0) + deltas = np.random.randint(1, 20, 3).cumsum() + ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]) + df = DataFrame( + {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts + ) + fig, ax = self.plt.subplots() + df.plot(ax=ax) + + # verify tick labels + ticks = ax.get_xticks() + labels = ax.get_xticklabels() + for t, l in zip(ticks, labels): + m, s = divmod(int(t), 60) + h, m = divmod(m, 60) + rs = l.get_text() + if len(rs) > 0: + if s != 0: + xp = time(h, m, s).strftime("%H:%M:%S") + else: + xp = time(h, m, s).strftime("%H:%M") + assert xp == rs + + # change xlim + ax.set_xlim("1:30", "5:00") + + # check tick labels again + ticks = ax.get_xticks() + labels = ax.get_xticklabels() + for t, l in zip(ticks, labels): + m, s = divmod(int(t), 60) + h, m = divmod(m, 60) + rs = l.get_text() + if len(rs) > 0: + if s != 0: + xp = time(h, m, s).strftime("%H:%M:%S") + else: + xp = time(h, m, s).strftime("%H:%M") + assert xp == rs + + def test_time_musec(self): + t = datetime(1, 1, 1, 3, 30, 0) + deltas = np.random.randint(1, 20, 3).cumsum() + ts = np.array([(t + timedelta(microseconds=int(x))).time() for x in deltas]) + df = DataFrame( + {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts + ) + fig, ax = self.plt.subplots() + ax = df.plot(ax=ax) + + # verify tick labels + ticks = ax.get_xticks() + labels = ax.get_xticklabels() + for t, l in zip(ticks, labels): + m, s = divmod(int(t), 60) + + us = round((t - int(t)) * 1e6) + + h, m = divmod(m, 60) + rs = l.get_text() + if len(rs) > 0: + if (us % 1000) != 0: + xp = time(h, m, s, us).strftime("%H:%M:%S.%f") + elif (us // 1000) != 0: + xp = time(h, m, s, us).strftime("%H:%M:%S.%f")[:-3] + elif s != 0: + xp = time(h, m, s, us).strftime("%H:%M:%S") + else: + xp = time(h, m, s, us).strftime("%H:%M") + assert xp == rs + + def test_secondary_upsample(self): + idxh = date_range("1/1/1999", periods=365, freq="D") + idxl = date_range("1/1/1999", periods=12, freq="M") + high = Series(np.random.randn(len(idxh)), idxh) + low = Series(np.random.randn(len(idxl)), idxl) + _, ax = self.plt.subplots() + low.plot(ax=ax) + ax = high.plot(secondary_y=True, ax=ax) + for line in ax.get_lines(): + assert PeriodIndex(line.get_xdata()).freq == "D" + assert hasattr(ax, "left_ax") + assert not hasattr(ax, "right_ax") + for line in ax.left_ax.get_lines(): + assert PeriodIndex(line.get_xdata()).freq == "D" + + def test_secondary_legend(self): + fig = self.plt.figure() + ax = fig.add_subplot(211) + + # ts + df = tm.makeTimeDataFrame() + df.plot(secondary_y=["A", "B"], ax=ax) + leg = ax.get_legend() + assert len(leg.get_lines()) == 4 + assert leg.get_texts()[0].get_text() == "A (right)" + assert leg.get_texts()[1].get_text() == "B (right)" + assert leg.get_texts()[2].get_text() == "C" + assert leg.get_texts()[3].get_text() == "D" + assert ax.right_ax.get_legend() is None + colors = set() + for line in leg.get_lines(): + colors.add(line.get_color()) + + # TODO: color cycle problems + assert len(colors) == 4 + self.plt.close(fig) + + fig = self.plt.figure() + ax = fig.add_subplot(211) + df.plot(secondary_y=["A", "C"], mark_right=False, ax=ax) + leg = ax.get_legend() + assert len(leg.get_lines()) == 4 + assert leg.get_texts()[0].get_text() == "A" + assert leg.get_texts()[1].get_text() == "B" + assert leg.get_texts()[2].get_text() == "C" + assert leg.get_texts()[3].get_text() == "D" + self.plt.close(fig) + + fig, ax = self.plt.subplots() + df.plot(kind="bar", secondary_y=["A"], ax=ax) + leg = ax.get_legend() + assert leg.get_texts()[0].get_text() == "A (right)" + assert leg.get_texts()[1].get_text() == "B" + self.plt.close(fig) + + fig, ax = self.plt.subplots() + df.plot(kind="bar", secondary_y=["A"], mark_right=False, ax=ax) + leg = ax.get_legend() + assert leg.get_texts()[0].get_text() == "A" + assert leg.get_texts()[1].get_text() == "B" + self.plt.close(fig) + + fig = self.plt.figure() + ax = fig.add_subplot(211) + df = tm.makeTimeDataFrame() + ax = df.plot(secondary_y=["C", "D"], ax=ax) + leg = ax.get_legend() + assert len(leg.get_lines()) == 4 + assert ax.right_ax.get_legend() is None + colors = set() + for line in leg.get_lines(): + colors.add(line.get_color()) + + # TODO: color cycle problems + assert len(colors) == 4 + self.plt.close(fig) + + # non-ts + df = tm.makeDataFrame() + fig = self.plt.figure() + ax = fig.add_subplot(211) + ax = df.plot(secondary_y=["A", "B"], ax=ax) + leg = ax.get_legend() + assert len(leg.get_lines()) == 4 + assert ax.right_ax.get_legend() is None + colors = set() + for line in leg.get_lines(): + colors.add(line.get_color()) + + # TODO: color cycle problems + assert len(colors) == 4 + self.plt.close() + + fig = self.plt.figure() + ax = fig.add_subplot(211) + ax = df.plot(secondary_y=["C", "D"], ax=ax) + leg = ax.get_legend() + assert len(leg.get_lines()) == 4 + assert ax.right_ax.get_legend() is None + colors = set() + for line in leg.get_lines(): + colors.add(line.get_color()) + + # TODO: color cycle problems + assert len(colors) == 4 + + def test_format_date_axis(self): + rng = date_range("1/1/2012", periods=12, freq="M") + df = DataFrame(np.random.randn(len(rng), 3), rng) + _, ax = self.plt.subplots() + ax = df.plot(ax=ax) + xaxis = ax.get_xaxis() + for line in xaxis.get_ticklabels(): + if len(line.get_text()) > 0: + assert line.get_rotation() == 30 + + def test_ax_plot(self): + x = date_range(start="2012-01-02", periods=10, freq="D") + y = list(range(len(x))) + _, ax = self.plt.subplots() + lines = ax.plot(x, y, label="Y") + tm.assert_index_equal(DatetimeIndex(lines[0].get_xdata()), x) + + def test_mpl_nopandas(self): + dates = [date(2008, 12, 31), date(2009, 1, 31)] + values1 = np.arange(10.0, 11.0, 0.5) + values2 = np.arange(11.0, 12.0, 0.5) + + kw = {"fmt": "-", "lw": 4} + + _, ax = self.plt.subplots() + ax.plot_date([x.toordinal() for x in dates], values1, **kw) + ax.plot_date([x.toordinal() for x in dates], values2, **kw) + + line1, line2 = ax.get_lines() + + exp = np.array([x.toordinal() for x in dates], dtype=np.float64) + tm.assert_numpy_array_equal(line1.get_xydata()[:, 0], exp) + exp = np.array([x.toordinal() for x in dates], dtype=np.float64) + tm.assert_numpy_array_equal(line2.get_xydata()[:, 0], exp) + + def test_irregular_ts_shared_ax_xlim(self): + # GH 2960 + from pandas.plotting._matplotlib.converter import DatetimeConverter + + ts = tm.makeTimeSeries()[:20] + ts_irregular = ts[[1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 17, 18]] + + # plot the left section of the irregular series, then the right section + _, ax = self.plt.subplots() + ts_irregular[:5].plot(ax=ax) + ts_irregular[5:].plot(ax=ax) + + # check that axis limits are correct + left, right = ax.get_xlim() + assert left <= DatetimeConverter.convert(ts_irregular.index.min(), "", ax) + assert right >= DatetimeConverter.convert(ts_irregular.index.max(), "", ax) + + def test_secondary_y_non_ts_xlim(self): + # GH 3490 - non-timeseries with secondary y + index_1 = [1, 2, 3, 4] + index_2 = [5, 6, 7, 8] + s1 = Series(1, index=index_1) + s2 = Series(2, index=index_2) + + _, ax = self.plt.subplots() + s1.plot(ax=ax) + left_before, right_before = ax.get_xlim() + s2.plot(secondary_y=True, ax=ax) + left_after, right_after = ax.get_xlim() + + assert left_before >= left_after + assert right_before < right_after + + def test_secondary_y_regular_ts_xlim(self): + # GH 3490 - regular-timeseries with secondary y + index_1 = date_range(start="2000-01-01", periods=4, freq="D") + index_2 = date_range(start="2000-01-05", periods=4, freq="D") + s1 = Series(1, index=index_1) + s2 = Series(2, index=index_2) + + _, ax = self.plt.subplots() + s1.plot(ax=ax) + left_before, right_before = ax.get_xlim() + s2.plot(secondary_y=True, ax=ax) + left_after, right_after = ax.get_xlim() + + assert left_before >= left_after + assert right_before < right_after + + def test_secondary_y_mixed_freq_ts_xlim(self): + # GH 3490 - mixed frequency timeseries with secondary y + rng = date_range("2000-01-01", periods=10000, freq="min") + ts = Series(1, index=rng) + + _, ax = self.plt.subplots() + ts.plot(ax=ax) + left_before, right_before = ax.get_xlim() + ts.resample("D").mean().plot(secondary_y=True, ax=ax) + left_after, right_after = ax.get_xlim() + + # a downsample should not have changed either limit + assert left_before == left_after + assert right_before == right_after + + def test_secondary_y_irregular_ts_xlim(self): + # GH 3490 - irregular-timeseries with secondary y + from pandas.plotting._matplotlib.converter import DatetimeConverter + + ts = tm.makeTimeSeries()[:20] + ts_irregular = ts[[1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 17, 18]] + + _, ax = self.plt.subplots() + ts_irregular[:5].plot(ax=ax) + # plot higher-x values on secondary axis + ts_irregular[5:].plot(secondary_y=True, ax=ax) + # ensure secondary limits aren't overwritten by plot on primary + ts_irregular[:5].plot(ax=ax) + + left, right = ax.get_xlim() + assert left <= DatetimeConverter.convert(ts_irregular.index.min(), "", ax) + assert right >= DatetimeConverter.convert(ts_irregular.index.max(), "", ax) + + def test_plot_outofbounds_datetime(self): + # 2579 - checking this does not raise + values = [date(1677, 1, 1), date(1677, 1, 2)] + _, ax = self.plt.subplots() + ax.plot(values) + + values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] + ax.plot(values) + + def test_format_timedelta_ticks_narrow(self): + + expected_labels = [f"00:00:00.0000000{i:0>2d}" for i in np.arange(10)] + + rng = timedelta_range("0", periods=10, freq="ns") + df = DataFrame(np.random.randn(len(rng), 3), rng) + fig, ax = self.plt.subplots() + df.plot(fontsize=2, ax=ax) + self.plt.draw() + labels = ax.get_xticklabels() + + result_labels = [x.get_text() for x in labels] + assert len(result_labels) == len(expected_labels) + assert result_labels == expected_labels + + def test_format_timedelta_ticks_wide(self): + expected_labels = [ + "00:00:00", + "1 days 03:46:40", + "2 days 07:33:20", + "3 days 11:20:00", + "4 days 15:06:40", + "5 days 18:53:20", + "6 days 22:40:00", + "8 days 02:26:40", + "9 days 06:13:20", + ] + + rng = timedelta_range("0", periods=10, freq="1 d") + df = DataFrame(np.random.randn(len(rng), 3), rng) + fig, ax = self.plt.subplots() + ax = df.plot(fontsize=2, ax=ax) + self.plt.draw() + labels = ax.get_xticklabels() + + result_labels = [x.get_text() for x in labels] + assert len(result_labels) == len(expected_labels) + assert result_labels == expected_labels + + def test_timedelta_plot(self): + # test issue #8711 + s = Series(range(5), timedelta_range("1day", periods=5)) + _, ax = self.plt.subplots() + _check_plot_works(s.plot, ax=ax) + + # test long period + index = timedelta_range("1 day 2 hr 30 min 10 s", periods=10, freq="1 d") + s = Series(np.random.randn(len(index)), index) + _, ax = self.plt.subplots() + _check_plot_works(s.plot, ax=ax) + + # test short period + index = timedelta_range("1 day 2 hr 30 min 10 s", periods=10, freq="1 ns") + s = Series(np.random.randn(len(index)), index) + _, ax = self.plt.subplots() + _check_plot_works(s.plot, ax=ax) + + def test_hist(self): + # https://github.com/matplotlib/matplotlib/issues/8459 + rng = date_range("1/1/2011", periods=10, freq="H") + x = rng + w1 = np.arange(0, 1, 0.1) + w2 = np.arange(0, 1, 0.1)[::-1] + _, ax = self.plt.subplots() + ax.hist([x, x], weights=[w1, w2]) + + def test_overlapping_datetime(self): + # GB 6608 + s1 = Series( + [1, 2, 3], + index=[ + datetime(1995, 12, 31), + datetime(2000, 12, 31), + datetime(2005, 12, 31), + ], + ) + s2 = Series( + [1, 2, 3], + index=[ + datetime(1997, 12, 31), + datetime(2003, 12, 31), + datetime(2008, 12, 31), + ], + ) + + # plot first series, then add the second series to those axes, + # then try adding the first series again + _, ax = self.plt.subplots() + s1.plot(ax=ax) + s2.plot(ax=ax) + s1.plot(ax=ax) + + @pytest.mark.xfail(reason="GH9053 matplotlib does not use ax.xaxis.converter") + def test_add_matplotlib_datetime64(self): + # GH9053 - ensure that a plot with PeriodConverter still understands + # datetime64 data. This still fails because matplotlib overrides the + # ax.xaxis.converter with a DatetimeConverter + s = Series(np.random.randn(10), index=date_range("1970-01-02", periods=10)) + ax = s.plot() + with tm.assert_produces_warning(DeprecationWarning): + # multi-dimensional indexing + ax.plot(s.index, s.values, color="g") + l1, l2 = ax.lines + tm.assert_numpy_array_equal(l1.get_xydata(), l2.get_xydata()) + + def test_matplotlib_scatter_datetime64(self): + # https://github.com/matplotlib/matplotlib/issues/11391 + df = DataFrame(np.random.RandomState(0).rand(10, 2), columns=["x", "y"]) + df["time"] = date_range("2018-01-01", periods=10, freq="D") + fig, ax = self.plt.subplots() + ax.scatter(x="time", y="y", data=df) + self.plt.draw() + label = ax.get_xticklabels()[0] + if self.mpl_ge_3_2_0: + expected = "2018-01-01" + elif self.mpl_ge_3_0_0: + expected = "2017-12-08" + else: + expected = "2017-12-12" + assert label.get_text() == expected + + def test_check_xticks_rot(self): + # https://github.com/pandas-dev/pandas/issues/29460 + # regular time series + x = to_datetime(["2020-05-01", "2020-05-02", "2020-05-03"]) + df = DataFrame({"x": x, "y": [1, 2, 3]}) + axes = df.plot(x="x", y="y") + self._check_ticks_props(axes, xrot=0) + + # irregular time series + x = to_datetime(["2020-05-01", "2020-05-02", "2020-05-04"]) + df = DataFrame({"x": x, "y": [1, 2, 3]}) + axes = df.plot(x="x", y="y") + self._check_ticks_props(axes, xrot=30) + + # use timeseries index or not + axes = df.set_index("x").plot(y="y", use_index=True) + self._check_ticks_props(axes, xrot=30) + axes = df.set_index("x").plot(y="y", use_index=False) + self._check_ticks_props(axes, xrot=0) + + # separate subplots + axes = df.plot(x="x", y="y", subplots=True, sharex=True) + self._check_ticks_props(axes, xrot=30) + axes = df.plot(x="x", y="y", subplots=True, sharex=False) + self._check_ticks_props(axes, xrot=0) + + +def _check_plot_works(f, freq=None, series=None, *args, **kwargs): + import matplotlib.pyplot as plt + + fig = plt.gcf() + + try: + plt.clf() + ax = fig.add_subplot(211) + orig_ax = kwargs.pop("ax", plt.gca()) + orig_axfreq = getattr(orig_ax, "freq", None) + + ret = f(*args, **kwargs) + assert ret is not None # do something more intelligent + + ax = kwargs.pop("ax", plt.gca()) + if series is not None: + dfreq = series.index.freq + if isinstance(dfreq, BaseOffset): + dfreq = dfreq.rule_code + if orig_axfreq is None: + assert ax.freq == dfreq + + if freq is not None and orig_axfreq is None: + assert ax.freq == freq + + ax = fig.add_subplot(212) + kwargs["ax"] = ax + ret = f(*args, **kwargs) + assert ret is not None # TODO: do something more intelligent + + with tm.ensure_clean(return_filelike=True) as path: + plt.savefig(path) + + # GH18439, GH#24088, statsmodels#4772 + with tm.ensure_clean(return_filelike=True) as path: + pickle.dump(fig, path) + finally: + plt.close(fig) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_groupby.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_groupby.py new file mode 100644 index 0000000..997f5ab --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_groupby.py @@ -0,0 +1,120 @@ +""" Test cases for GroupBy.plot """ + + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm +from pandas.tests.plotting.common import TestPlotBase + +pytestmark = pytest.mark.slow + + +@td.skip_if_no_mpl +class TestDataFrameGroupByPlots(TestPlotBase): + def test_series_groupby_plotting_nominally_works(self): + n = 10 + weight = Series(np.random.normal(166, 20, size=n)) + height = Series(np.random.normal(60, 10, size=n)) + with tm.RNGContext(42): + gender = np.random.choice(["male", "female"], size=n) + + weight.groupby(gender).plot() + tm.close() + height.groupby(gender).hist() + tm.close() + # Regression test for GH8733 + height.groupby(gender).plot(alpha=0.5) + tm.close() + + def test_plotting_with_float_index_works(self): + # GH 7025 + df = DataFrame( + {"def": [1, 1, 1, 2, 2, 2, 3, 3, 3], "val": np.random.randn(9)}, + index=[1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0], + ) + + df.groupby("def")["val"].plot() + tm.close() + df.groupby("def")["val"].apply(lambda x: x.plot()) + tm.close() + + def test_hist_single_row(self): + # GH10214 + bins = np.arange(80, 100 + 2, 1) + df = DataFrame({"Name": ["AAA", "BBB"], "ByCol": [1, 2], "Mark": [85, 89]}) + df["Mark"].hist(by=df["ByCol"], bins=bins) + df = DataFrame({"Name": ["AAA"], "ByCol": [1], "Mark": [85]}) + df["Mark"].hist(by=df["ByCol"], bins=bins) + + def test_plot_submethod_works(self): + df = DataFrame({"x": [1, 2, 3, 4, 5], "y": [1, 2, 3, 2, 1], "z": list("ababa")}) + df.groupby("z").plot.scatter("x", "y") + tm.close() + df.groupby("z")["x"].plot.line() + tm.close() + + def test_plot_kwargs(self): + + df = DataFrame({"x": [1, 2, 3, 4, 5], "y": [1, 2, 3, 2, 1], "z": list("ababa")}) + + res = df.groupby("z").plot(kind="scatter", x="x", y="y") + # check that a scatter plot is effectively plotted: the axes should + # contain a PathCollection from the scatter plot (GH11805) + assert len(res["a"].collections) == 1 + + res = df.groupby("z").plot.scatter(x="x", y="y") + assert len(res["a"].collections) == 1 + + @pytest.mark.parametrize("column, expected_axes_num", [(None, 2), ("b", 1)]) + def test_groupby_hist_frame_with_legend(self, column, expected_axes_num): + # GH 6279 - DataFrameGroupBy histogram can have a legend + expected_layout = (1, expected_axes_num) + expected_labels = column or [["a"], ["b"]] + + index = Index(15 * ["1"] + 15 * ["2"], name="c") + df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + g = df.groupby("c") + + for axes in g.hist(legend=True, column=column): + self._check_axes_shape( + axes, axes_num=expected_axes_num, layout=expected_layout + ) + for ax, expected_label in zip(axes[0], expected_labels): + self._check_legend_labels(ax, expected_label) + + @pytest.mark.parametrize("column", [None, "b"]) + def test_groupby_hist_frame_with_legend_raises(self, column): + # GH 6279 - DataFrameGroupBy histogram with legend and label raises + index = Index(15 * ["1"] + 15 * ["2"], name="c") + df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + g = df.groupby("c") + + with pytest.raises(ValueError, match="Cannot use both legend and label"): + g.hist(legend=True, column=column, label="d") + + def test_groupby_hist_series_with_legend(self): + # GH 6279 - SeriesGroupBy histogram can have a legend + index = Index(15 * ["1"] + 15 * ["2"], name="c") + df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + g = df.groupby("c") + + for ax in g["a"].hist(legend=True): + self._check_axes_shape(ax, axes_num=1, layout=(1, 1)) + self._check_legend_labels(ax, ["1", "2"]) + + def test_groupby_hist_series_with_legend_raises(self): + # GH 6279 - SeriesGroupBy histogram with legend and label raises + index = Index(15 * ["1"] + 15 * ["2"], name="c") + df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + g = df.groupby("c") + + with pytest.raises(ValueError, match="Cannot use both legend and label"): + g.hist(legend=True, label="d") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_hist_method.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_hist_method.py new file mode 100644 index 0000000..403f4a2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_hist_method.py @@ -0,0 +1,774 @@ +""" Test cases for .hist method """ +import re + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Index, + Series, + to_datetime, +) +import pandas._testing as tm +from pandas.tests.plotting.common import ( + TestPlotBase, + _check_plot_works, +) + +pytestmark = pytest.mark.slow + + +@td.skip_if_no_mpl +class TestSeriesPlots(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.ts = tm.makeTimeSeries() + self.ts.name = "ts" + + def test_hist_legacy(self): + _check_plot_works(self.ts.hist) + _check_plot_works(self.ts.hist, grid=False) + _check_plot_works(self.ts.hist, figsize=(8, 10)) + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + _check_plot_works(self.ts.hist, by=self.ts.index.month) + with tm.assert_produces_warning(UserWarning): + _check_plot_works(self.ts.hist, by=self.ts.index.month, bins=5) + + fig, ax = self.plt.subplots(1, 1) + _check_plot_works(self.ts.hist, ax=ax, default_axes=True) + _check_plot_works(self.ts.hist, ax=ax, figure=fig, default_axes=True) + _check_plot_works(self.ts.hist, figure=fig, default_axes=True) + tm.close() + + fig, (ax1, ax2) = self.plt.subplots(1, 2) + _check_plot_works(self.ts.hist, figure=fig, ax=ax1, default_axes=True) + _check_plot_works(self.ts.hist, figure=fig, ax=ax2, default_axes=True) + + msg = ( + "Cannot pass 'figure' when using the 'by' argument, since a new 'Figure' " + "instance will be created" + ) + with pytest.raises(ValueError, match=msg): + self.ts.hist(by=self.ts.index, figure=fig) + + def test_hist_bins_legacy(self): + df = DataFrame(np.random.randn(10, 2)) + ax = df.hist(bins=2)[0][0] + assert len(ax.patches) == 2 + + def test_hist_layout(self): + df = self.hist_df + msg = "The 'layout' keyword is not supported when 'by' is None" + with pytest.raises(ValueError, match=msg): + df.height.hist(layout=(1, 1)) + + with pytest.raises(ValueError, match=msg): + df.height.hist(layout=[1, 1]) + + def test_hist_layout_with_by(self): + df = self.hist_df + + # _check_plot_works adds an `ax` kwarg to the method call + # so we get a warning about an axis being cleared, even + # though we don't explicing pass one, see GH #13188 + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.gender, layout=(2, 1)) + self._check_axes_shape(axes, axes_num=2, layout=(2, 1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.gender, layout=(3, -1)) + self._check_axes_shape(axes, axes_num=2, layout=(3, 1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.category, layout=(4, 1)) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.category, layout=(2, -1)) + self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.category, layout=(3, -1)) + self._check_axes_shape(axes, axes_num=4, layout=(3, 2)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.category, layout=(-1, 4)) + self._check_axes_shape(axes, axes_num=4, layout=(1, 4)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.classroom, layout=(2, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + + axes = df.height.hist(by=df.category, layout=(4, 2), figsize=(12, 7)) + self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 7)) + + def test_hist_no_overlap(self): + from matplotlib.pyplot import ( + gcf, + subplot, + ) + + x = Series(np.random.randn(2)) + y = Series(np.random.randn(2)) + subplot(121) + x.hist() + subplot(122) + y.hist() + fig = gcf() + axes = fig.axes + assert len(axes) == 2 + + def test_hist_by_no_extra_plots(self): + df = self.hist_df + axes = df.height.hist(by=df.gender) # noqa + assert len(self.plt.get_fignums()) == 1 + + def test_plot_fails_when_ax_differs_from_figure(self): + from pylab import figure + + fig1 = figure() + fig2 = figure() + ax1 = fig1.add_subplot(111) + msg = "passed axis not bound to passed figure" + with pytest.raises(AssertionError, match=msg): + self.ts.hist(ax=ax1, figure=fig2) + + @pytest.mark.parametrize( + "histtype, expected", + [ + ("bar", True), + ("barstacked", True), + ("step", False), + ("stepfilled", True), + ], + ) + def test_histtype_argument(self, histtype, expected): + # GH23992 Verify functioning of histtype argument + ser = Series(np.random.randint(1, 10)) + ax = ser.hist(histtype=histtype) + self._check_patches_all_filled(ax, filled=expected) + + @pytest.mark.parametrize( + "by, expected_axes_num, expected_layout", [(None, 1, (1, 1)), ("b", 2, (1, 2))] + ) + def test_hist_with_legend(self, by, expected_axes_num, expected_layout): + # GH 6279 - Series histogram can have a legend + index = 15 * ["1"] + 15 * ["2"] + s = Series(np.random.randn(30), index=index, name="a") + s.index.name = "b" + + # Use default_axes=True when plotting method generate subplots itself + axes = _check_plot_works(s.hist, default_axes=True, legend=True, by=by) + self._check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout) + self._check_legend_labels(axes, "a") + + @pytest.mark.parametrize("by", [None, "b"]) + def test_hist_with_legend_raises(self, by): + # GH 6279 - Series histogram with legend and label raises + index = 15 * ["1"] + 15 * ["2"] + s = Series(np.random.randn(30), index=index, name="a") + s.index.name = "b" + + with pytest.raises(ValueError, match="Cannot use both legend and label"): + s.hist(legend=True, by=by, label="c") + + def test_hist_kwargs(self): + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(bins=5, ax=ax) + assert len(ax.patches) == 5 + self._check_text_labels(ax.yaxis.get_label(), "Frequency") + tm.close() + + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(orientation="horizontal", ax=ax) + self._check_text_labels(ax.xaxis.get_label(), "Frequency") + tm.close() + + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(align="left", stacked=True, ax=ax) + tm.close() + + @td.skip_if_no_scipy + def test_hist_kde(self): + + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(logy=True, ax=ax) + self._check_ax_scales(ax, yaxis="log") + xlabels = ax.get_xticklabels() + # ticks are values, thus ticklabels are blank + self._check_text_labels(xlabels, [""] * len(xlabels)) + ylabels = ax.get_yticklabels() + self._check_text_labels(ylabels, [""] * len(ylabels)) + + _check_plot_works(self.ts.plot.kde) + _check_plot_works(self.ts.plot.density) + _, ax = self.plt.subplots() + ax = self.ts.plot.kde(logy=True, ax=ax) + self._check_ax_scales(ax, yaxis="log") + xlabels = ax.get_xticklabels() + self._check_text_labels(xlabels, [""] * len(xlabels)) + ylabels = ax.get_yticklabels() + self._check_text_labels(ylabels, [""] * len(ylabels)) + + @td.skip_if_no_scipy + def test_hist_kde_color(self): + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(logy=True, bins=10, color="b", ax=ax) + self._check_ax_scales(ax, yaxis="log") + assert len(ax.patches) == 10 + self._check_colors(ax.patches, facecolors=["b"] * 10) + + _, ax = self.plt.subplots() + ax = self.ts.plot.kde(logy=True, color="r", ax=ax) + self._check_ax_scales(ax, yaxis="log") + lines = ax.get_lines() + assert len(lines) == 1 + self._check_colors(lines, ["r"]) + + +@td.skip_if_no_mpl +class TestDataFramePlots(TestPlotBase): + def test_hist_df_legacy(self): + from matplotlib.patches import Rectangle + + with tm.assert_produces_warning(UserWarning): + _check_plot_works(self.hist_df.hist) + + # make sure layout is handled + df = DataFrame(np.random.randn(100, 2)) + df[2] = to_datetime( + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=100, + dtype=np.int64, + ) + ) + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.hist, grid=False) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert not axes[1, 1].get_visible() + + _check_plot_works(df[[2]].hist) + df = DataFrame(np.random.randn(100, 1)) + _check_plot_works(df.hist) + + # make sure layout is handled + df = DataFrame(np.random.randn(100, 5)) + df[5] = to_datetime( + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=100, + dtype=np.int64, + ) + ) + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.hist, layout=(4, 2)) + self._check_axes_shape(axes, axes_num=6, layout=(4, 2)) + + # make sure sharex, sharey is handled + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.hist, sharex=True, sharey=True) + + # handle figsize arg + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.hist, figsize=(8, 10)) + + # check bins argument + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.hist, bins=5) + + # make sure xlabelsize and xrot are handled + ser = df[0] + xf, yf = 20, 18 + xrot, yrot = 30, 40 + axes = ser.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot) + self._check_ticks_props( + axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot + ) + + xf, yf = 20, 18 + xrot, yrot = 30, 40 + axes = df.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot) + self._check_ticks_props( + axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot + ) + + tm.close() + + ax = ser.hist(cumulative=True, bins=4, density=True) + # height of last bin (index 5) must be 1.0 + rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] + tm.assert_almost_equal(rects[-1].get_height(), 1.0) + + tm.close() + ax = ser.hist(log=True) + # scale of y must be 'log' + self._check_ax_scales(ax, yaxis="log") + + tm.close() + + # propagate attr exception from matplotlib.Axes.hist + with tm.external_error_raised(AttributeError): + ser.hist(foo="bar") + + def test_hist_non_numerical_or_datetime_raises(self): + # gh-10444, GH32590 + df = DataFrame( + { + "a": np.random.rand(10), + "b": np.random.randint(0, 10, 10), + "c": to_datetime( + np.random.randint( + 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 + ) + ), + "d": to_datetime( + np.random.randint( + 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 + ), + utc=True, + ), + } + ) + df_o = df.astype(object) + + msg = "hist method requires numerical or datetime columns, nothing to plot." + with pytest.raises(ValueError, match=msg): + df_o.hist() + + def test_hist_layout(self): + df = DataFrame(np.random.randn(100, 2)) + df[2] = to_datetime( + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=100, + dtype=np.int64, + ) + ) + + layout_to_expected_size = ( + {"layout": None, "expected_size": (2, 2)}, # default is 2x2 + {"layout": (2, 2), "expected_size": (2, 2)}, + {"layout": (4, 1), "expected_size": (4, 1)}, + {"layout": (1, 4), "expected_size": (1, 4)}, + {"layout": (3, 3), "expected_size": (3, 3)}, + {"layout": (-1, 4), "expected_size": (1, 4)}, + {"layout": (4, -1), "expected_size": (4, 1)}, + {"layout": (-1, 2), "expected_size": (2, 2)}, + {"layout": (2, -1), "expected_size": (2, 2)}, + ) + + for layout_test in layout_to_expected_size: + axes = df.hist(layout=layout_test["layout"]) + expected = layout_test["expected_size"] + self._check_axes_shape(axes, axes_num=3, layout=expected) + + # layout too small for all 4 plots + msg = "Layout of 1x1 must be larger than required size 3" + with pytest.raises(ValueError, match=msg): + df.hist(layout=(1, 1)) + + # invalid format for layout + msg = re.escape("Layout must be a tuple of (rows, columns)") + with pytest.raises(ValueError, match=msg): + df.hist(layout=(1,)) + msg = "At least one dimension of layout must be positive" + with pytest.raises(ValueError, match=msg): + df.hist(layout=(-1, -1)) + + # GH 9351 + def test_tight_layout(self): + df = DataFrame(np.random.randn(100, 2)) + df[2] = to_datetime( + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=100, + dtype=np.int64, + ) + ) + # Use default_axes=True when plotting method generate subplots itself + _check_plot_works(df.hist, default_axes=True) + self.plt.tight_layout() + + tm.close() + + def test_hist_subplot_xrot(self): + # GH 30288 + df = DataFrame( + { + "length": [1.5, 0.5, 1.2, 0.9, 3], + "animal": ["pig", "rabbit", "pig", "pig", "rabbit"], + } + ) + # Use default_axes=True when plotting method generate subplots itself + axes = _check_plot_works( + df.hist, + default_axes=True, + filterwarnings="always", + column="length", + by="animal", + bins=5, + xrot=0, + ) + self._check_ticks_props(axes, xrot=0) + + @pytest.mark.parametrize( + "column, expected", + [ + (None, ["width", "length", "height"]), + (["length", "width", "height"], ["length", "width", "height"]), + ], + ) + def test_hist_column_order_unchanged(self, column, expected): + # GH29235 + + df = DataFrame( + { + "width": [0.7, 0.2, 0.15, 0.2, 1.1], + "length": [1.5, 0.5, 1.2, 0.9, 3], + "height": [3, 0.5, 3.4, 2, 1], + }, + index=["pig", "rabbit", "duck", "chicken", "horse"], + ) + + # Use default_axes=True when plotting method generate subplots itself + axes = _check_plot_works( + df.hist, + default_axes=True, + column=column, + layout=(1, 3), + ) + result = [axes[0, i].get_title() for i in range(3)] + assert result == expected + + @pytest.mark.parametrize( + "histtype, expected", + [ + ("bar", True), + ("barstacked", True), + ("step", False), + ("stepfilled", True), + ], + ) + def test_histtype_argument(self, histtype, expected): + # GH23992 Verify functioning of histtype argument + df = DataFrame(np.random.randint(1, 10, size=(100, 2)), columns=["a", "b"]) + ax = df.hist(histtype=histtype) + self._check_patches_all_filled(ax, filled=expected) + + @pytest.mark.parametrize("by", [None, "c"]) + @pytest.mark.parametrize("column", [None, "b"]) + def test_hist_with_legend(self, by, column): + # GH 6279 - DataFrame histogram can have a legend + expected_axes_num = 1 if by is None and column is not None else 2 + expected_layout = (1, expected_axes_num) + expected_labels = column or ["a", "b"] + if by is not None: + expected_labels = [expected_labels] * 2 + + index = Index(15 * ["1"] + 15 * ["2"], name="c") + df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + + # Use default_axes=True when plotting method generate subplots itself + axes = _check_plot_works( + df.hist, + default_axes=True, + legend=True, + by=by, + column=column, + ) + + self._check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout) + if by is None and column is None: + axes = axes[0] + for expected_label, ax in zip(expected_labels, axes): + self._check_legend_labels(ax, expected_label) + + @pytest.mark.parametrize("by", [None, "c"]) + @pytest.mark.parametrize("column", [None, "b"]) + def test_hist_with_legend_raises(self, by, column): + # GH 6279 - DataFrame histogram with legend and label raises + index = Index(15 * ["1"] + 15 * ["2"], name="c") + df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + + with pytest.raises(ValueError, match="Cannot use both legend and label"): + df.hist(legend=True, by=by, column=column, label="d") + + def test_hist_df_kwargs(self): + df = DataFrame(np.random.randn(10, 2)) + _, ax = self.plt.subplots() + ax = df.plot.hist(bins=5, ax=ax) + assert len(ax.patches) == 10 + + def test_hist_df_with_nonnumerics(self): + # GH 9853 + with tm.RNGContext(1): + df = DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"]) + df["E"] = ["x", "y"] * 5 + _, ax = self.plt.subplots() + ax = df.plot.hist(bins=5, ax=ax) + assert len(ax.patches) == 20 + + _, ax = self.plt.subplots() + ax = df.plot.hist(ax=ax) # bins=10 + assert len(ax.patches) == 40 + + def test_hist_secondary_legend(self): + # GH 9610 + df = DataFrame(np.random.randn(30, 4), columns=list("abcd")) + + # primary -> secondary + _, ax = self.plt.subplots() + ax = df["a"].plot.hist(legend=True, ax=ax) + df["b"].plot.hist(ax=ax, legend=True, secondary_y=True) + # both legends are drawn on left ax + # left and right axis must be visible + self._check_legend_labels(ax, labels=["a", "b (right)"]) + assert ax.get_yaxis().get_visible() + assert ax.right_ax.get_yaxis().get_visible() + tm.close() + + # secondary -> secondary + _, ax = self.plt.subplots() + ax = df["a"].plot.hist(legend=True, secondary_y=True, ax=ax) + df["b"].plot.hist(ax=ax, legend=True, secondary_y=True) + # both legends are draw on left ax + # left axis must be invisible, right axis must be visible + self._check_legend_labels(ax.left_ax, labels=["a (right)", "b (right)"]) + assert not ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() + tm.close() + + # secondary -> primary + _, ax = self.plt.subplots() + ax = df["a"].plot.hist(legend=True, secondary_y=True, ax=ax) + # right axes is returned + df["b"].plot.hist(ax=ax, legend=True) + # both legends are draw on left ax + # left and right axis must be visible + self._check_legend_labels(ax.left_ax, labels=["a (right)", "b"]) + assert ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() + tm.close() + + +@td.skip_if_no_mpl +class TestDataFrameGroupByPlots(TestPlotBase): + def test_grouped_hist_legacy(self): + from matplotlib.patches import Rectangle + + from pandas.plotting._matplotlib.hist import _grouped_hist + + df = DataFrame(np.random.randn(500, 1), columns=["A"]) + df["B"] = to_datetime( + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=500, + dtype=np.int64, + ) + ) + df["C"] = np.random.randint(0, 4, 500) + df["D"] = ["X"] * 500 + + axes = _grouped_hist(df.A, by=df.C) + self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) + + tm.close() + axes = df.hist(by=df.C) + self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) + + tm.close() + # group by a key with single value + axes = df.hist(by="D", rot=30) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + self._check_ticks_props(axes, xrot=30) + + tm.close() + # make sure kwargs to hist are handled + xf, yf = 20, 18 + xrot, yrot = 30, 40 + + axes = _grouped_hist( + df.A, + by=df.C, + cumulative=True, + bins=4, + xlabelsize=xf, + xrot=xrot, + ylabelsize=yf, + yrot=yrot, + density=True, + ) + # height of last bin (index 5) must be 1.0 + for ax in axes.ravel(): + rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] + height = rects[-1].get_height() + tm.assert_almost_equal(height, 1.0) + self._check_ticks_props( + axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot + ) + + tm.close() + axes = _grouped_hist(df.A, by=df.C, log=True) + # scale of y must be 'log' + self._check_ax_scales(axes, yaxis="log") + + tm.close() + # propagate attr exception from matplotlib.Axes.hist + with tm.external_error_raised(AttributeError): + _grouped_hist(df.A, by=df.C, foo="bar") + + msg = "Specify figure size by tuple instead" + with pytest.raises(ValueError, match=msg): + df.hist(by="C", figsize="default") + + def test_grouped_hist_legacy2(self): + n = 10 + weight = Series(np.random.normal(166, 20, size=n)) + height = Series(np.random.normal(60, 10, size=n)) + with tm.RNGContext(42): + gender_int = np.random.choice([0, 1], size=n) + df_int = DataFrame({"height": height, "weight": weight, "gender": gender_int}) + gb = df_int.groupby("gender") + axes = gb.hist() + assert len(axes) == 2 + assert len(self.plt.get_fignums()) == 2 + tm.close() + + def test_grouped_hist_layout(self): + df = self.hist_df + msg = "Layout of 1x1 must be larger than required size 2" + with pytest.raises(ValueError, match=msg): + df.hist(column="weight", by=df.gender, layout=(1, 1)) + + msg = "Layout of 1x3 must be larger than required size 4" + with pytest.raises(ValueError, match=msg): + df.hist(column="height", by=df.category, layout=(1, 3)) + + msg = "At least one dimension of layout must be positive" + with pytest.raises(ValueError, match=msg): + df.hist(column="height", by=df.category, layout=(-1, -1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works( + df.hist, column="height", by=df.gender, layout=(2, 1) + ) + self._check_axes_shape(axes, axes_num=2, layout=(2, 1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works( + df.hist, column="height", by=df.gender, layout=(2, -1) + ) + self._check_axes_shape(axes, axes_num=2, layout=(2, 1)) + + axes = df.hist(column="height", by=df.category, layout=(4, 1)) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + axes = df.hist(column="height", by=df.category, layout=(-1, 1)) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + axes = df.hist(column="height", by=df.category, layout=(4, 2), figsize=(12, 8)) + self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 8)) + tm.close() + + # GH 6769 + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works( + df.hist, column="height", by="classroom", layout=(2, 2) + ) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + + # without column + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.hist, by="classroom") + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + + axes = df.hist(by="gender", layout=(3, 5)) + self._check_axes_shape(axes, axes_num=2, layout=(3, 5)) + + axes = df.hist(column=["height", "weight", "category"]) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + + def test_grouped_hist_multiple_axes(self): + # GH 6970, GH 7069 + df = self.hist_df + + fig, axes = self.plt.subplots(2, 3) + returned = df.hist(column=["height", "weight", "category"], ax=axes[0]) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + tm.assert_numpy_array_equal(returned, axes[0]) + assert returned[0].figure is fig + returned = df.hist(by="classroom", ax=axes[1]) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + tm.assert_numpy_array_equal(returned, axes[1]) + assert returned[0].figure is fig + + fig, axes = self.plt.subplots(2, 3) + # pass different number of axes from required + msg = "The number of passed axes must be 1, the same as the output plot" + with pytest.raises(ValueError, match=msg): + axes = df.hist(column="height", ax=axes) + + def test_axis_share_x(self): + df = self.hist_df + # GH4089 + ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True) + + # share x + assert self.get_x_axis(ax1).joined(ax1, ax2) + assert self.get_x_axis(ax2).joined(ax1, ax2) + + # don't share y + assert not self.get_y_axis(ax1).joined(ax1, ax2) + assert not self.get_y_axis(ax2).joined(ax1, ax2) + + def test_axis_share_y(self): + df = self.hist_df + ax1, ax2 = df.hist(column="height", by=df.gender, sharey=True) + + # share y + assert self.get_y_axis(ax1).joined(ax1, ax2) + assert self.get_y_axis(ax2).joined(ax1, ax2) + + # don't share x + assert not self.get_x_axis(ax1).joined(ax1, ax2) + assert not self.get_x_axis(ax2).joined(ax1, ax2) + + def test_axis_share_xy(self): + df = self.hist_df + ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True, sharey=True) + + # share both x and y + assert self.get_x_axis(ax1).joined(ax1, ax2) + assert self.get_x_axis(ax2).joined(ax1, ax2) + + assert self.get_y_axis(ax1).joined(ax1, ax2) + assert self.get_y_axis(ax2).joined(ax1, ax2) + + @pytest.mark.parametrize( + "histtype, expected", + [ + ("bar", True), + ("barstacked", True), + ("step", False), + ("stepfilled", True), + ], + ) + def test_histtype_argument(self, histtype, expected): + # GH23992 Verify functioning of histtype argument + df = DataFrame(np.random.randint(1, 10, size=(100, 2)), columns=["a", "b"]) + ax = df.hist(by="a", histtype=histtype) + self._check_patches_all_filled(ax, filled=expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_misc.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_misc.py new file mode 100644 index 0000000..adda95f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_misc.py @@ -0,0 +1,561 @@ +""" Test cases for misc plot functions """ + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm +from pandas.tests.plotting.common import ( + TestPlotBase, + _check_plot_works, +) + +import pandas.plotting as plotting + +pytestmark = pytest.mark.slow + + +@td.skip_if_mpl +def test_import_error_message(): + # GH-19810 + df = DataFrame({"A": [1, 2]}) + + with pytest.raises(ImportError, match="matplotlib is required for plotting"): + df.plot() + + +def test_get_accessor_args(): + func = plotting._core.PlotAccessor._get_call_args + + msg = "Called plot accessor for type list, expected Series or DataFrame" + with pytest.raises(TypeError, match=msg): + func(backend_name="", data=[], args=[], kwargs={}) + + msg = "should not be called with positional arguments" + with pytest.raises(TypeError, match=msg): + func(backend_name="", data=Series(dtype=object), args=["line", None], kwargs={}) + + x, y, kind, kwargs = func( + backend_name="", + data=DataFrame(), + args=["x"], + kwargs={"y": "y", "kind": "bar", "grid": False}, + ) + assert x == "x" + assert y == "y" + assert kind == "bar" + assert kwargs == {"grid": False} + + x, y, kind, kwargs = func( + backend_name="pandas.plotting._matplotlib", + data=Series(dtype=object), + args=[], + kwargs={}, + ) + assert x is None + assert y is None + assert kind == "line" + assert len(kwargs) == 24 + + +@td.skip_if_no_mpl +class TestSeriesPlots(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.ts = tm.makeTimeSeries() + self.ts.name = "ts" + + def test_autocorrelation_plot(self): + from pandas.plotting import autocorrelation_plot + + # Ensure no UserWarning when making plot + with tm.assert_produces_warning(None): + _check_plot_works(autocorrelation_plot, series=self.ts) + _check_plot_works(autocorrelation_plot, series=self.ts.values) + + ax = autocorrelation_plot(self.ts, label="Test") + self._check_legend_labels(ax, labels=["Test"]) + + def test_lag_plot(self): + from pandas.plotting import lag_plot + + _check_plot_works(lag_plot, series=self.ts) + _check_plot_works(lag_plot, series=self.ts, lag=5) + + def test_bootstrap_plot(self): + from pandas.plotting import bootstrap_plot + + _check_plot_works(bootstrap_plot, series=self.ts, size=10) + + +@td.skip_if_no_mpl +class TestDataFramePlots(TestPlotBase): + @td.skip_if_no_scipy + @pytest.mark.parametrize("pass_axis", [False, True]) + def test_scatter_matrix_axis(self, pass_axis): + from pandas.plotting._matplotlib.compat import mpl_ge_3_0_0 + + scatter_matrix = plotting.scatter_matrix + + ax = None + if pass_axis: + _, ax = self.plt.subplots(3, 3) + + with tm.RNGContext(42): + df = DataFrame(np.random.randn(100, 3)) + + # we are plotting multiples on a sub-plot + with tm.assert_produces_warning( + UserWarning, raise_on_extra_warnings=mpl_ge_3_0_0() + ): + axes = _check_plot_works( + scatter_matrix, + filterwarnings="always", + frame=df, + range_padding=0.1, + ax=ax, + ) + axes0_labels = axes[0][0].yaxis.get_majorticklabels() + + # GH 5662 + expected = ["-2", "0", "2"] + self._check_text_labels(axes0_labels, expected) + self._check_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + + df[0] = (df[0] - 2) / 3 + + # we are plotting multiples on a sub-plot + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works( + scatter_matrix, + filterwarnings="always", + frame=df, + range_padding=0.1, + ax=ax, + ) + axes0_labels = axes[0][0].yaxis.get_majorticklabels() + expected = ["-1.0", "-0.5", "0.0"] + self._check_text_labels(axes0_labels, expected) + self._check_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + + def test_andrews_curves(self, iris): + from matplotlib import cm + + from pandas.plotting import andrews_curves + + df = iris + # Ensure no UserWarning when making plot + with tm.assert_produces_warning(None): + _check_plot_works(andrews_curves, frame=df, class_column="Name") + + rgba = ("#556270", "#4ECDC4", "#C7F464") + ax = _check_plot_works( + andrews_curves, frame=df, class_column="Name", color=rgba + ) + self._check_colors( + ax.get_lines()[:10], linecolors=rgba, mapping=df["Name"][:10] + ) + + cnames = ["dodgerblue", "aquamarine", "seagreen"] + ax = _check_plot_works( + andrews_curves, frame=df, class_column="Name", color=cnames + ) + self._check_colors( + ax.get_lines()[:10], linecolors=cnames, mapping=df["Name"][:10] + ) + + ax = _check_plot_works( + andrews_curves, frame=df, class_column="Name", colormap=cm.jet + ) + cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] + self._check_colors( + ax.get_lines()[:10], linecolors=cmaps, mapping=df["Name"][:10] + ) + + length = 10 + df = DataFrame( + { + "A": np.random.rand(length), + "B": np.random.rand(length), + "C": np.random.rand(length), + "Name": ["A"] * length, + } + ) + + _check_plot_works(andrews_curves, frame=df, class_column="Name") + + rgba = ("#556270", "#4ECDC4", "#C7F464") + ax = _check_plot_works( + andrews_curves, frame=df, class_column="Name", color=rgba + ) + self._check_colors( + ax.get_lines()[:10], linecolors=rgba, mapping=df["Name"][:10] + ) + + cnames = ["dodgerblue", "aquamarine", "seagreen"] + ax = _check_plot_works( + andrews_curves, frame=df, class_column="Name", color=cnames + ) + self._check_colors( + ax.get_lines()[:10], linecolors=cnames, mapping=df["Name"][:10] + ) + + ax = _check_plot_works( + andrews_curves, frame=df, class_column="Name", colormap=cm.jet + ) + cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] + self._check_colors( + ax.get_lines()[:10], linecolors=cmaps, mapping=df["Name"][:10] + ) + + colors = ["b", "g", "r"] + df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3], "C": [1, 2, 3], "Name": colors}) + ax = andrews_curves(df, "Name", color=colors) + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, linecolors=colors) + + def test_parallel_coordinates(self, iris): + from matplotlib import cm + + from pandas.plotting import parallel_coordinates + + df = iris + + ax = _check_plot_works(parallel_coordinates, frame=df, class_column="Name") + nlines = len(ax.get_lines()) + nxticks = len(ax.xaxis.get_ticklabels()) + + rgba = ("#556270", "#4ECDC4", "#C7F464") + ax = _check_plot_works( + parallel_coordinates, frame=df, class_column="Name", color=rgba + ) + self._check_colors( + ax.get_lines()[:10], linecolors=rgba, mapping=df["Name"][:10] + ) + + cnames = ["dodgerblue", "aquamarine", "seagreen"] + ax = _check_plot_works( + parallel_coordinates, frame=df, class_column="Name", color=cnames + ) + self._check_colors( + ax.get_lines()[:10], linecolors=cnames, mapping=df["Name"][:10] + ) + + ax = _check_plot_works( + parallel_coordinates, frame=df, class_column="Name", colormap=cm.jet + ) + cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] + self._check_colors( + ax.get_lines()[:10], linecolors=cmaps, mapping=df["Name"][:10] + ) + + ax = _check_plot_works( + parallel_coordinates, frame=df, class_column="Name", axvlines=False + ) + assert len(ax.get_lines()) == (nlines - nxticks) + + colors = ["b", "g", "r"] + df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3], "C": [1, 2, 3], "Name": colors}) + ax = parallel_coordinates(df, "Name", color=colors) + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, linecolors=colors) + + # not sure if this is indicative of a problem + @pytest.mark.filterwarnings("ignore:Attempting to set:UserWarning") + def test_parallel_coordinates_with_sorted_labels(self): + """For #15908""" + from pandas.plotting import parallel_coordinates + + df = DataFrame( + { + "feat": list(range(30)), + "class": [2 for _ in range(10)] + + [3 for _ in range(10)] + + [1 for _ in range(10)], + } + ) + ax = parallel_coordinates(df, "class", sort_labels=True) + polylines, labels = ax.get_legend_handles_labels() + color_label_tuples = zip( + [polyline.get_color() for polyline in polylines], labels + ) + ordered_color_label_tuples = sorted(color_label_tuples, key=lambda x: x[1]) + prev_next_tupels = zip( + list(ordered_color_label_tuples[0:-1]), list(ordered_color_label_tuples[1:]) + ) + for prev, nxt in prev_next_tupels: + # labels and colors are ordered strictly increasing + assert prev[1] < nxt[1] and prev[0] < nxt[0] + + def test_radviz(self, iris): + from matplotlib import cm + + from pandas.plotting import radviz + + df = iris + # Ensure no UserWarning when making plot + with tm.assert_produces_warning(None): + _check_plot_works(radviz, frame=df, class_column="Name") + + rgba = ("#556270", "#4ECDC4", "#C7F464") + ax = _check_plot_works(radviz, frame=df, class_column="Name", color=rgba) + # skip Circle drawn as ticks + patches = [p for p in ax.patches[:20] if p.get_label() != ""] + self._check_colors(patches[:10], facecolors=rgba, mapping=df["Name"][:10]) + + cnames = ["dodgerblue", "aquamarine", "seagreen"] + _check_plot_works(radviz, frame=df, class_column="Name", color=cnames) + patches = [p for p in ax.patches[:20] if p.get_label() != ""] + self._check_colors(patches, facecolors=cnames, mapping=df["Name"][:10]) + + _check_plot_works(radviz, frame=df, class_column="Name", colormap=cm.jet) + cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] + patches = [p for p in ax.patches[:20] if p.get_label() != ""] + self._check_colors(patches, facecolors=cmaps, mapping=df["Name"][:10]) + + colors = [[0.0, 0.0, 1.0, 1.0], [0.0, 0.5, 1.0, 1.0], [1.0, 0.0, 0.0, 1.0]] + df = DataFrame( + {"A": [1, 2, 3], "B": [2, 1, 3], "C": [3, 2, 1], "Name": ["b", "g", "r"]} + ) + ax = radviz(df, "Name", color=colors) + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, facecolors=colors) + + def test_subplot_titles(self, iris): + df = iris.drop("Name", axis=1).head() + # Use the column names as the subplot titles + title = list(df.columns) + + # Case len(title) == len(df) + plot = df.plot(subplots=True, title=title) + assert [p.get_title() for p in plot] == title + + # Case len(title) > len(df) + msg = ( + "The length of `title` must equal the number of columns if " + "using `title` of type `list` and `subplots=True`" + ) + with pytest.raises(ValueError, match=msg): + df.plot(subplots=True, title=title + ["kittens > puppies"]) + + # Case len(title) < len(df) + with pytest.raises(ValueError, match=msg): + df.plot(subplots=True, title=title[:2]) + + # Case subplots=False and title is of type list + msg = ( + "Using `title` of type `list` is not supported unless " + "`subplots=True` is passed" + ) + with pytest.raises(ValueError, match=msg): + df.plot(subplots=False, title=title) + + # Case df with 3 numeric columns but layout of (2,2) + plot = df.drop("SepalWidth", axis=1).plot( + subplots=True, layout=(2, 2), title=title[:-1] + ) + title_list = [ax.get_title() for sublist in plot for ax in sublist] + assert title_list == title[:3] + [""] + + def test_get_standard_colors_random_seed(self): + # GH17525 + df = DataFrame(np.zeros((10, 10))) + + # Make sure that the np.random.seed isn't reset by get_standard_colors + plotting.parallel_coordinates(df, 0) + rand1 = np.random.random() + plotting.parallel_coordinates(df, 0) + rand2 = np.random.random() + assert rand1 != rand2 + + # Make sure it produces the same colors every time it's called + from pandas.plotting._matplotlib.style import get_standard_colors + + color1 = get_standard_colors(1, color_type="random") + color2 = get_standard_colors(1, color_type="random") + assert color1 == color2 + + def test_get_standard_colors_default_num_colors(self): + from pandas.plotting._matplotlib.style import get_standard_colors + + # Make sure the default color_types returns the specified amount + color1 = get_standard_colors(1, color_type="default") + color2 = get_standard_colors(9, color_type="default") + color3 = get_standard_colors(20, color_type="default") + assert len(color1) == 1 + assert len(color2) == 9 + assert len(color3) == 20 + + def test_plot_single_color(self): + # Example from #20585. All 3 bars should have the same color + df = DataFrame( + { + "account-start": ["2017-02-03", "2017-03-03", "2017-01-01"], + "client": ["Alice Anders", "Bob Baker", "Charlie Chaplin"], + "balance": [-1432.32, 10.43, 30000.00], + "db-id": [1234, 2424, 251], + "proxy-id": [525, 1525, 2542], + "rank": [52, 525, 32], + } + ) + ax = df.client.value_counts().plot.bar() + colors = [rect.get_facecolor() for rect in ax.get_children()[0:3]] + assert all(color == colors[0] for color in colors) + + def test_get_standard_colors_no_appending(self): + # GH20726 + + # Make sure not to add more colors so that matplotlib can cycle + # correctly. + from matplotlib import cm + + from pandas.plotting._matplotlib.style import get_standard_colors + + color_before = cm.gnuplot(range(5)) + color_after = get_standard_colors(1, color=color_before) + assert len(color_after) == len(color_before) + + df = DataFrame(np.random.randn(48, 4), columns=list("ABCD")) + + color_list = cm.gnuplot(np.linspace(0, 1, 16)) + p = df.A.plot.bar(figsize=(16, 7), color=color_list) + assert p.patches[1].get_facecolor() == p.patches[17].get_facecolor() + + def test_dictionary_color(self): + # issue-8193 + # Test plot color dictionary format + data_files = ["a", "b"] + + expected = [(0.5, 0.24, 0.6), (0.3, 0.7, 0.7)] + + df1 = DataFrame(np.random.rand(2, 2), columns=data_files) + dic_color = {"b": (0.3, 0.7, 0.7), "a": (0.5, 0.24, 0.6)} + + # Bar color test + ax = df1.plot(kind="bar", color=dic_color) + colors = [rect.get_facecolor()[0:-1] for rect in ax.get_children()[0:3:2]] + assert all(color == expected[index] for index, color in enumerate(colors)) + + # Line color test + ax = df1.plot(kind="line", color=dic_color) + colors = [rect.get_color() for rect in ax.get_lines()[0:2]] + assert all(color == expected[index] for index, color in enumerate(colors)) + + def test_has_externally_shared_axis_x_axis(self): + # GH33819 + # Test _has_externally_shared_axis() works for x-axis + func = plotting._matplotlib.tools._has_externally_shared_axis + + fig = self.plt.figure() + plots = fig.subplots(2, 4) + + # Create *externally* shared axes for first and third columns + plots[0][0] = fig.add_subplot(231, sharex=plots[1][0]) + plots[0][2] = fig.add_subplot(233, sharex=plots[1][2]) + + # Create *internally* shared axes for second and third columns + plots[0][1].twinx() + plots[0][2].twinx() + + # First column is only externally shared + # Second column is only internally shared + # Third column is both + # Fourth column is neither + assert func(plots[0][0], "x") + assert not func(plots[0][1], "x") + assert func(plots[0][2], "x") + assert not func(plots[0][3], "x") + + def test_has_externally_shared_axis_y_axis(self): + # GH33819 + # Test _has_externally_shared_axis() works for y-axis + func = plotting._matplotlib.tools._has_externally_shared_axis + + fig = self.plt.figure() + plots = fig.subplots(4, 2) + + # Create *externally* shared axes for first and third rows + plots[0][0] = fig.add_subplot(321, sharey=plots[0][1]) + plots[2][0] = fig.add_subplot(325, sharey=plots[2][1]) + + # Create *internally* shared axes for second and third rows + plots[1][0].twiny() + plots[2][0].twiny() + + # First row is only externally shared + # Second row is only internally shared + # Third row is both + # Fourth row is neither + assert func(plots[0][0], "y") + assert not func(plots[1][0], "y") + assert func(plots[2][0], "y") + assert not func(plots[3][0], "y") + + def test_has_externally_shared_axis_invalid_compare_axis(self): + # GH33819 + # Test _has_externally_shared_axis() raises an exception when + # passed an invalid value as compare_axis parameter + func = plotting._matplotlib.tools._has_externally_shared_axis + + fig = self.plt.figure() + plots = fig.subplots(4, 2) + + # Create arbitrary axes + plots[0][0] = fig.add_subplot(321, sharey=plots[0][1]) + + # Check that an invalid compare_axis value triggers the expected exception + msg = "needs 'x' or 'y' as a second parameter" + with pytest.raises(ValueError, match=msg): + func(plots[0][0], "z") + + def test_externally_shared_axes(self): + # Example from GH33819 + # Create data + df = DataFrame({"a": np.random.randn(1000), "b": np.random.randn(1000)}) + + # Create figure + fig = self.plt.figure() + plots = fig.subplots(2, 3) + + # Create *externally* shared axes + plots[0][0] = fig.add_subplot(231, sharex=plots[1][0]) + # note: no plots[0][1] that's the twin only case + plots[0][2] = fig.add_subplot(233, sharex=plots[1][2]) + + # Create *internally* shared axes + # note: no plots[0][0] that's the external only case + twin_ax1 = plots[0][1].twinx() + twin_ax2 = plots[0][2].twinx() + + # Plot data to primary axes + df["a"].plot(ax=plots[0][0], title="External share only").set_xlabel( + "this label should never be visible" + ) + df["a"].plot(ax=plots[1][0]) + + df["a"].plot(ax=plots[0][1], title="Internal share (twin) only").set_xlabel( + "this label should always be visible" + ) + df["a"].plot(ax=plots[1][1]) + + df["a"].plot(ax=plots[0][2], title="Both").set_xlabel( + "this label should never be visible" + ) + df["a"].plot(ax=plots[1][2]) + + # Plot data to twinned axes + df["b"].plot(ax=twin_ax1, color="green") + df["b"].plot(ax=twin_ax2, color="yellow") + + assert not plots[0][0].xaxis.get_label().get_visible() + assert plots[0][1].xaxis.get_label().get_visible() + assert not plots[0][2].xaxis.get_label().get_visible() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_series.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_series.py new file mode 100644 index 0000000..44fc604 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_series.py @@ -0,0 +1,817 @@ +""" Test cases for Series.plot """ + + +from datetime import datetime +from itertools import chain + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm +from pandas.tests.plotting.common import ( + TestPlotBase, + _check_plot_works, +) + +import pandas.plotting as plotting + +pytestmark = pytest.mark.slow + + +@td.skip_if_no_mpl +class TestSeriesPlots(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.ts = tm.makeTimeSeries() + self.ts.name = "ts" + + self.series = tm.makeStringSeries() + self.series.name = "series" + + self.iseries = tm.makePeriodSeries() + self.iseries.name = "iseries" + + def test_plot(self): + _check_plot_works(self.ts.plot, label="foo") + _check_plot_works(self.ts.plot, use_index=False) + axes = _check_plot_works(self.ts.plot, rot=0) + self._check_ticks_props(axes, xrot=0) + + ax = _check_plot_works(self.ts.plot, style=".", logy=True) + self._check_ax_scales(ax, yaxis="log") + + ax = _check_plot_works(self.ts.plot, style=".", logx=True) + self._check_ax_scales(ax, xaxis="log") + + ax = _check_plot_works(self.ts.plot, style=".", loglog=True) + self._check_ax_scales(ax, xaxis="log", yaxis="log") + + _check_plot_works(self.ts[:10].plot.bar) + _check_plot_works(self.ts.plot.area, stacked=False) + _check_plot_works(self.iseries.plot) + + for kind in ["line", "bar", "barh", "kde", "hist", "box"]: + _check_plot_works(self.series[:5].plot, kind=kind) + + _check_plot_works(self.series[:10].plot.barh) + ax = _check_plot_works(Series(np.random.randn(10)).plot.bar, color="black") + self._check_colors([ax.patches[0]], facecolors=["black"]) + + # GH 6951 + ax = _check_plot_works(self.ts.plot, subplots=True) + self._check_axes_shape(ax, axes_num=1, layout=(1, 1)) + + ax = _check_plot_works(self.ts.plot, subplots=True, layout=(-1, 1)) + self._check_axes_shape(ax, axes_num=1, layout=(1, 1)) + ax = _check_plot_works(self.ts.plot, subplots=True, layout=(1, -1)) + self._check_axes_shape(ax, axes_num=1, layout=(1, 1)) + + def test_plot_figsize_and_title(self): + # figsize and title + _, ax = self.plt.subplots() + ax = self.series.plot(title="Test", figsize=(16, 8), ax=ax) + self._check_text_labels(ax.title, "Test") + self._check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16, 8)) + + def test_dont_modify_rcParams(self): + # GH 8242 + key = "axes.prop_cycle" + colors = self.plt.rcParams[key] + _, ax = self.plt.subplots() + Series([1, 2, 3]).plot(ax=ax) + assert colors == self.plt.rcParams[key] + + def test_ts_line_lim(self): + fig, ax = self.plt.subplots() + ax = self.ts.plot(ax=ax) + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data(orig=False)[0][0] + assert xmax >= lines[0].get_data(orig=False)[0][-1] + tm.close() + + ax = self.ts.plot(secondary_y=True, ax=ax) + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data(orig=False)[0][0] + assert xmax >= lines[0].get_data(orig=False)[0][-1] + + def test_ts_area_lim(self): + _, ax = self.plt.subplots() + ax = self.ts.plot.area(stacked=False, ax=ax) + xmin, xmax = ax.get_xlim() + line = ax.get_lines()[0].get_data(orig=False)[0] + assert xmin <= line[0] + assert xmax >= line[-1] + self._check_ticks_props(ax, xrot=0) + tm.close() + + # GH 7471 + _, ax = self.plt.subplots() + ax = self.ts.plot.area(stacked=False, x_compat=True, ax=ax) + xmin, xmax = ax.get_xlim() + line = ax.get_lines()[0].get_data(orig=False)[0] + assert xmin <= line[0] + assert xmax >= line[-1] + self._check_ticks_props(ax, xrot=30) + tm.close() + + tz_ts = self.ts.copy() + tz_ts.index = tz_ts.tz_localize("GMT").tz_convert("CET") + _, ax = self.plt.subplots() + ax = tz_ts.plot.area(stacked=False, x_compat=True, ax=ax) + xmin, xmax = ax.get_xlim() + line = ax.get_lines()[0].get_data(orig=False)[0] + assert xmin <= line[0] + assert xmax >= line[-1] + self._check_ticks_props(ax, xrot=0) + tm.close() + + _, ax = self.plt.subplots() + ax = tz_ts.plot.area(stacked=False, secondary_y=True, ax=ax) + xmin, xmax = ax.get_xlim() + line = ax.get_lines()[0].get_data(orig=False)[0] + assert xmin <= line[0] + assert xmax >= line[-1] + self._check_ticks_props(ax, xrot=0) + + def test_area_sharey_dont_overwrite(self): + # GH37942 + fig, (ax1, ax2) = self.plt.subplots(1, 2, sharey=True) + + abs(self.ts).plot(ax=ax1, kind="area") + abs(self.ts).plot(ax=ax2, kind="area") + + assert self.get_y_axis(ax1).joined(ax1, ax2) + assert self.get_y_axis(ax2).joined(ax1, ax2) + + def test_label(self): + s = Series([1, 2]) + _, ax = self.plt.subplots() + ax = s.plot(label="LABEL", legend=True, ax=ax) + self._check_legend_labels(ax, labels=["LABEL"]) + self.plt.close() + _, ax = self.plt.subplots() + ax = s.plot(legend=True, ax=ax) + self._check_legend_labels(ax, labels=["None"]) + self.plt.close() + # get name from index + s.name = "NAME" + _, ax = self.plt.subplots() + ax = s.plot(legend=True, ax=ax) + self._check_legend_labels(ax, labels=["NAME"]) + self.plt.close() + # override the default + _, ax = self.plt.subplots() + ax = s.plot(legend=True, label="LABEL", ax=ax) + self._check_legend_labels(ax, labels=["LABEL"]) + self.plt.close() + # Add lebel info, but don't draw + _, ax = self.plt.subplots() + ax = s.plot(legend=False, label="LABEL", ax=ax) + assert ax.get_legend() is None # Hasn't been drawn + ax.legend() # draw it + self._check_legend_labels(ax, labels=["LABEL"]) + + def test_boolean(self): + # GH 23719 + s = Series([False, False, True]) + _check_plot_works(s.plot, include_bool=True) + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + _check_plot_works(s.plot) + + def test_line_area_nan_series(self): + values = [1, 2, np.nan, 3] + s = Series(values) + ts = Series(values, index=tm.makeDateIndex(k=4)) + + for d in [s, ts]: + ax = _check_plot_works(d.plot) + masked = ax.lines[0].get_ydata() + # remove nan for comparison purpose + exp = np.array([1, 2, 3], dtype=np.float64) + tm.assert_numpy_array_equal(np.delete(masked.data, 2), exp) + tm.assert_numpy_array_equal( + masked.mask, np.array([False, False, True, False]) + ) + + expected = np.array([1, 2, 0, 3], dtype=np.float64) + ax = _check_plot_works(d.plot, stacked=True) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) + ax = _check_plot_works(d.plot.area) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) + ax = _check_plot_works(d.plot.area, stacked=False) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) + + def test_line_use_index_false(self): + s = Series([1, 2, 3], index=["a", "b", "c"]) + s.index.name = "The Index" + _, ax = self.plt.subplots() + ax = s.plot(use_index=False, ax=ax) + label = ax.get_xlabel() + assert label == "" + _, ax = self.plt.subplots() + ax2 = s.plot.bar(use_index=False, ax=ax) + label2 = ax2.get_xlabel() + assert label2 == "" + + def test_bar_log(self): + expected = np.array([1e-1, 1e0, 1e1, 1e2, 1e3, 1e4]) + + _, ax = self.plt.subplots() + ax = Series([200, 500]).plot.bar(log=True, ax=ax) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) + tm.close() + + _, ax = self.plt.subplots() + ax = Series([200, 500]).plot.barh(log=True, ax=ax) + tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), expected) + tm.close() + + # GH 9905 + expected = np.array([1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1]) + + _, ax = self.plt.subplots() + ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind="bar", ax=ax) + ymin = 0.0007943282347242822 + ymax = 0.12589254117941673 + res = ax.get_ylim() + tm.assert_almost_equal(res[0], ymin) + tm.assert_almost_equal(res[1], ymax) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) + tm.close() + + _, ax = self.plt.subplots() + ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind="barh", ax=ax) + res = ax.get_xlim() + tm.assert_almost_equal(res[0], ymin) + tm.assert_almost_equal(res[1], ymax) + tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), expected) + + def test_bar_ignore_index(self): + df = Series([1, 2, 3, 4], index=["a", "b", "c", "d"]) + _, ax = self.plt.subplots() + ax = df.plot.bar(use_index=False, ax=ax) + self._check_text_labels(ax.get_xticklabels(), ["0", "1", "2", "3"]) + + def test_bar_user_colors(self): + s = Series([1, 2, 3, 4]) + ax = s.plot.bar(color=["red", "blue", "blue", "red"]) + result = [p.get_facecolor() for p in ax.patches] + expected = [ + (1.0, 0.0, 0.0, 1.0), + (0.0, 0.0, 1.0, 1.0), + (0.0, 0.0, 1.0, 1.0), + (1.0, 0.0, 0.0, 1.0), + ] + assert result == expected + + def test_rotation(self): + df = DataFrame(np.random.randn(5, 5)) + # Default rot 0 + _, ax = self.plt.subplots() + axes = df.plot(ax=ax) + self._check_ticks_props(axes, xrot=0) + + _, ax = self.plt.subplots() + axes = df.plot(rot=30, ax=ax) + self._check_ticks_props(axes, xrot=30) + + def test_irregular_datetime(self): + from pandas.plotting._matplotlib.converter import DatetimeConverter + + rng = date_range("1/1/2000", "3/1/2000") + rng = rng[[0, 1, 2, 3, 5, 9, 10, 11, 12]] + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ax = ser.plot(ax=ax) + xp = DatetimeConverter.convert(datetime(1999, 1, 1), "", ax) + ax.set_xlim("1/1/1999", "1/1/2001") + assert xp == ax.get_xlim()[0] + self._check_ticks_props(ax, xrot=30) + + def test_unsorted_index_xlim(self): + ser = Series( + [0.0, 1.0, np.nan, 3.0, 4.0, 5.0, 6.0], + index=[1.0, 0.0, 3.0, 2.0, np.nan, 3.0, 2.0], + ) + _, ax = self.plt.subplots() + ax = ser.plot(ax=ax) + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data(orig=False)[0]) + assert xmax >= np.nanmax(lines[0].get_data(orig=False)[0]) + + def test_pie_series(self): + # if sum of values is less than 1.0, pie handle them as rate and draw + # semicircle. + series = Series( + np.random.randint(1, 5), index=["a", "b", "c", "d", "e"], name="YLABEL" + ) + ax = _check_plot_works(series.plot.pie) + self._check_text_labels(ax.texts, series.index) + assert ax.get_ylabel() == "YLABEL" + + # without wedge labels + ax = _check_plot_works(series.plot.pie, labels=None) + self._check_text_labels(ax.texts, [""] * 5) + + # with less colors than elements + color_args = ["r", "g", "b"] + ax = _check_plot_works(series.plot.pie, colors=color_args) + + color_expected = ["r", "g", "b", "r", "g"] + self._check_colors(ax.patches, facecolors=color_expected) + + # with labels and colors + labels = ["A", "B", "C", "D", "E"] + color_args = ["r", "g", "b", "c", "m"] + ax = _check_plot_works(series.plot.pie, labels=labels, colors=color_args) + self._check_text_labels(ax.texts, labels) + self._check_colors(ax.patches, facecolors=color_args) + + # with autopct and fontsize + ax = _check_plot_works( + series.plot.pie, colors=color_args, autopct="%.2f", fontsize=7 + ) + pcts = [f"{s*100:.2f}" for s in series.values / series.sum()] + expected_texts = list(chain.from_iterable(zip(series.index, pcts))) + self._check_text_labels(ax.texts, expected_texts) + for t in ax.texts: + assert t.get_fontsize() == 7 + + # includes negative value + series = Series([1, 2, 0, 4, -1], index=["a", "b", "c", "d", "e"]) + with pytest.raises(ValueError, match="pie plot doesn't allow negative values"): + series.plot.pie() + + # includes nan + series = Series([1, 2, np.nan, 4], index=["a", "b", "c", "d"], name="YLABEL") + ax = _check_plot_works(series.plot.pie) + self._check_text_labels(ax.texts, ["a", "b", "", "d"]) + + def test_pie_nan(self): + s = Series([1, np.nan, 1, 1]) + _, ax = self.plt.subplots() + ax = s.plot.pie(legend=True, ax=ax) + expected = ["0", "", "2", "3"] + result = [x.get_text() for x in ax.texts] + assert result == expected + + def test_df_series_secondary_legend(self): + # GH 9779 + df = DataFrame(np.random.randn(30, 3), columns=list("abc")) + s = Series(np.random.randn(30), name="x") + + # primary -> secondary (without passing ax) + _, ax = self.plt.subplots() + ax = df.plot(ax=ax) + s.plot(legend=True, secondary_y=True, ax=ax) + # both legends are drawn on left ax + # left and right axis must be visible + self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"]) + assert ax.get_yaxis().get_visible() + assert ax.right_ax.get_yaxis().get_visible() + tm.close() + + # primary -> secondary (with passing ax) + _, ax = self.plt.subplots() + ax = df.plot(ax=ax) + s.plot(ax=ax, legend=True, secondary_y=True) + # both legends are drawn on left ax + # left and right axis must be visible + self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"]) + assert ax.get_yaxis().get_visible() + assert ax.right_ax.get_yaxis().get_visible() + tm.close() + + # secondary -> secondary (without passing ax) + _, ax = self.plt.subplots() + ax = df.plot(secondary_y=True, ax=ax) + s.plot(legend=True, secondary_y=True, ax=ax) + # both legends are drawn on left ax + # left axis must be invisible and right axis must be visible + expected = ["a (right)", "b (right)", "c (right)", "x (right)"] + self._check_legend_labels(ax.left_ax, labels=expected) + assert not ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() + tm.close() + + # secondary -> secondary (with passing ax) + _, ax = self.plt.subplots() + ax = df.plot(secondary_y=True, ax=ax) + s.plot(ax=ax, legend=True, secondary_y=True) + # both legends are drawn on left ax + # left axis must be invisible and right axis must be visible + expected = ["a (right)", "b (right)", "c (right)", "x (right)"] + self._check_legend_labels(ax.left_ax, expected) + assert not ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() + tm.close() + + # secondary -> secondary (with passing ax) + _, ax = self.plt.subplots() + ax = df.plot(secondary_y=True, mark_right=False, ax=ax) + s.plot(ax=ax, legend=True, secondary_y=True) + # both legends are drawn on left ax + # left axis must be invisible and right axis must be visible + expected = ["a", "b", "c", "x (right)"] + self._check_legend_labels(ax.left_ax, expected) + assert not ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() + tm.close() + + @pytest.mark.parametrize( + "input_logy, expected_scale", [(True, "log"), ("sym", "symlog")] + ) + def test_secondary_logy(self, input_logy, expected_scale): + # GH 25545 + s1 = Series(np.random.randn(30)) + s2 = Series(np.random.randn(30)) + + # GH 24980 + ax1 = s1.plot(logy=input_logy) + ax2 = s2.plot(secondary_y=True, logy=input_logy) + + assert ax1.get_yscale() == expected_scale + assert ax2.get_yscale() == expected_scale + + def test_plot_fails_with_dupe_color_and_style(self): + x = Series(np.random.randn(2)) + _, ax = self.plt.subplots() + msg = ( + "Cannot pass 'style' string with a color symbol and 'color' keyword " + "argument. Please use one or the other or pass 'style' without a color " + "symbol" + ) + with pytest.raises(ValueError, match=msg): + x.plot(style="k--", color="k", ax=ax) + + @td.skip_if_no_scipy + def test_kde_kwargs(self): + sample_points = np.linspace(-100, 100, 20) + _check_plot_works(self.ts.plot.kde, bw_method="scott", ind=20) + _check_plot_works(self.ts.plot.kde, bw_method=None, ind=20) + _check_plot_works(self.ts.plot.kde, bw_method=None, ind=np.int_(20)) + _check_plot_works(self.ts.plot.kde, bw_method=0.5, ind=sample_points) + _check_plot_works(self.ts.plot.density, bw_method=0.5, ind=sample_points) + _, ax = self.plt.subplots() + ax = self.ts.plot.kde(logy=True, bw_method=0.5, ind=sample_points, ax=ax) + self._check_ax_scales(ax, yaxis="log") + self._check_text_labels(ax.yaxis.get_label(), "Density") + + @td.skip_if_no_scipy + def test_kde_missing_vals(self): + s = Series(np.random.uniform(size=50)) + s[0] = np.nan + axes = _check_plot_works(s.plot.kde) + + # gh-14821: check if the values have any missing values + assert any(~np.isnan(axes.lines[0].get_xdata())) + + def test_boxplot_series(self): + _, ax = self.plt.subplots() + ax = self.ts.plot.box(logy=True, ax=ax) + self._check_ax_scales(ax, yaxis="log") + xlabels = ax.get_xticklabels() + self._check_text_labels(xlabels, [self.ts.name]) + ylabels = ax.get_yticklabels() + self._check_text_labels(ylabels, [""] * len(ylabels)) + + def test_kind_both_ways(self): + s = Series(range(3)) + kinds = ( + plotting.PlotAccessor._common_kinds + plotting.PlotAccessor._series_kinds + ) + for kind in kinds: + _, ax = self.plt.subplots() + s.plot(kind=kind, ax=ax) + self.plt.close() + _, ax = self.plt.subplots() + getattr(s.plot, kind)() + self.plt.close() + + def test_invalid_plot_data(self): + s = Series(list("abcd")) + _, ax = self.plt.subplots() + for kind in plotting.PlotAccessor._common_kinds: + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + s.plot(kind=kind, ax=ax) + + def test_valid_object_plot(self): + s = Series(range(10), dtype=object) + for kind in plotting.PlotAccessor._common_kinds: + _check_plot_works(s.plot, kind=kind) + + def test_partially_invalid_plot_data(self): + s = Series(["a", "b", 1.0, 2]) + _, ax = self.plt.subplots() + for kind in plotting.PlotAccessor._common_kinds: + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + s.plot(kind=kind, ax=ax) + + def test_invalid_kind(self): + s = Series([1, 2]) + with pytest.raises(ValueError, match="invalid_kind is not a valid plot kind"): + s.plot(kind="invalid_kind") + + def test_dup_datetime_index_plot(self): + dr1 = date_range("1/1/2009", periods=4) + dr2 = date_range("1/2/2009", periods=4) + index = dr1.append(dr2) + values = np.random.randn(index.size) + s = Series(values, index=index) + _check_plot_works(s.plot) + + def test_errorbar_asymmetrical(self): + # GH9536 + s = Series(np.arange(10), name="x") + err = np.random.rand(2, 10) + + ax = s.plot(yerr=err, xerr=err) + + result = np.vstack([i.vertices[:, 1] for i in ax.collections[1].get_paths()]) + expected = (err.T * np.array([-1, 1])) + s.to_numpy().reshape(-1, 1) + tm.assert_numpy_array_equal(result, expected) + + msg = ( + "Asymmetrical error bars should be provided " + f"with the shape \\(2, {len(s)}\\)" + ) + with pytest.raises(ValueError, match=msg): + s.plot(yerr=np.random.rand(2, 11)) + + tm.close() + + def test_errorbar_plot(self): + + s = Series(np.arange(10), name="x") + s_err = np.random.randn(10) + d_err = DataFrame(np.random.randn(10, 2), index=s.index, columns=["x", "y"]) + # test line and bar plots + kinds = ["line", "bar"] + for kind in kinds: + ax = _check_plot_works(s.plot, yerr=Series(s_err), kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(s.plot, yerr=s_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(s.plot, yerr=s_err.tolist(), kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(s.plot, yerr=d_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(s.plot, xerr=0.2, yerr=0.2, kind=kind) + self._check_has_errorbars(ax, xerr=1, yerr=1) + + ax = _check_plot_works(s.plot, xerr=s_err) + self._check_has_errorbars(ax, xerr=1, yerr=0) + + # test time series plotting + ix = date_range("1/1/2000", "1/1/2001", freq="M") + ts = Series(np.arange(12), index=ix, name="x") + ts_err = Series(np.random.randn(12), index=ix) + td_err = DataFrame(np.random.randn(12, 2), index=ix, columns=["x", "y"]) + + ax = _check_plot_works(ts.plot, yerr=ts_err) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(ts.plot, yerr=td_err) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + # check incorrect lengths and types + with tm.external_error_raised(ValueError): + s.plot(yerr=np.arange(11)) + + s_err = ["zzz"] * 10 + with tm.external_error_raised(TypeError): + s.plot(yerr=s_err) + + def test_table(self): + _check_plot_works(self.series.plot, table=True) + _check_plot_works(self.series.plot, table=self.series) + + def test_series_grid_settings(self): + # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 + self._check_grid_settings( + Series([1, 2, 3]), + plotting.PlotAccessor._series_kinds + plotting.PlotAccessor._common_kinds, + ) + + def test_standard_colors(self): + from pandas.plotting._matplotlib.style import get_standard_colors + + for c in ["r", "red", "green", "#FF0000"]: + result = get_standard_colors(1, color=c) + assert result == [c] + + result = get_standard_colors(1, color=[c]) + assert result == [c] + + result = get_standard_colors(3, color=c) + assert result == [c] * 3 + + result = get_standard_colors(3, color=[c]) + assert result == [c] * 3 + + def test_standard_colors_all(self): + import matplotlib.colors as colors + + from pandas.plotting._matplotlib.style import get_standard_colors + + # multiple colors like mediumaquamarine + for c in colors.cnames: + result = get_standard_colors(num_colors=1, color=c) + assert result == [c] + + result = get_standard_colors(num_colors=1, color=[c]) + assert result == [c] + + result = get_standard_colors(num_colors=3, color=c) + assert result == [c] * 3 + + result = get_standard_colors(num_colors=3, color=[c]) + assert result == [c] * 3 + + # single letter colors like k + for c in colors.ColorConverter.colors: + result = get_standard_colors(num_colors=1, color=c) + assert result == [c] + + result = get_standard_colors(num_colors=1, color=[c]) + assert result == [c] + + result = get_standard_colors(num_colors=3, color=c) + assert result == [c] * 3 + + result = get_standard_colors(num_colors=3, color=[c]) + assert result == [c] * 3 + + def test_series_plot_color_kwargs(self): + # GH1890 + _, ax = self.plt.subplots() + ax = Series(np.arange(12) + 1).plot(color="green", ax=ax) + self._check_colors(ax.get_lines(), linecolors=["green"]) + + def test_time_series_plot_color_kwargs(self): + # #1890 + _, ax = self.plt.subplots() + ax = Series(np.arange(12) + 1, index=date_range("1/1/2000", periods=12)).plot( + color="green", ax=ax + ) + self._check_colors(ax.get_lines(), linecolors=["green"]) + + def test_time_series_plot_color_with_empty_kwargs(self): + import matplotlib as mpl + + def_colors = self._unpack_cycler(mpl.rcParams) + index = date_range("1/1/2000", periods=12) + s = Series(np.arange(1, 13), index=index) + + ncolors = 3 + + _, ax = self.plt.subplots() + for i in range(ncolors): + ax = s.plot(ax=ax) + self._check_colors(ax.get_lines(), linecolors=def_colors[:ncolors]) + + def test_xticklabels(self): + # GH11529 + s = Series(np.arange(10), index=[f"P{i:02d}" for i in range(10)]) + _, ax = self.plt.subplots() + ax = s.plot(xticks=[0, 3, 5, 9], ax=ax) + exp = [f"P{i:02d}" for i in [0, 3, 5, 9]] + self._check_text_labels(ax.get_xticklabels(), exp) + + def test_xtick_barPlot(self): + # GH28172 + s = Series(range(10), index=[f"P{i:02d}" for i in range(10)]) + ax = s.plot.bar(xticks=range(0, 11, 2)) + exp = np.array(list(range(0, 11, 2))) + tm.assert_numpy_array_equal(exp, ax.get_xticks()) + + def test_custom_business_day_freq(self): + # GH7222 + from pandas.tseries.offsets import CustomBusinessDay + + s = Series( + range(100, 121), + index=pd.bdate_range( + start="2014-05-01", + end="2014-06-01", + freq=CustomBusinessDay(holidays=["2014-05-26"]), + ), + ) + + _check_plot_works(s.plot) + + @pytest.mark.xfail(reason="GH#24426") + def test_plot_accessor_updates_on_inplace(self): + ser = Series([1, 2, 3, 4]) + _, ax = self.plt.subplots() + ax = ser.plot(ax=ax) + before = ax.xaxis.get_ticklocs() + + ser.drop([0, 1], inplace=True) + _, ax = self.plt.subplots() + after = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(before, after) + + @pytest.mark.parametrize("kind", ["line", "area"]) + def test_plot_xlim_for_series(self, kind): + # test if xlim is also correctly plotted in Series for line and area + # GH 27686 + s = Series([2, 3]) + _, ax = self.plt.subplots() + s.plot(kind=kind, ax=ax) + xlims = ax.get_xlim() + + assert xlims[0] < 0 + assert xlims[1] > 1 + + def test_plot_no_rows(self): + # GH 27758 + df = Series(dtype=int) + assert df.empty + ax = df.plot() + assert len(ax.get_lines()) == 1 + line = ax.get_lines()[0] + assert len(line.get_xdata()) == 0 + assert len(line.get_ydata()) == 0 + + def test_plot_no_numeric_data(self): + df = Series(["a", "b", "c"]) + with pytest.raises(TypeError, match="no numeric data to plot"): + df.plot() + + @pytest.mark.parametrize( + "data, index", + [ + ([1, 2, 3, 4], [3, 2, 1, 0]), + ([10, 50, 20, 30], [1910, 1920, 1980, 1950]), + ], + ) + def test_plot_order(self, data, index): + # GH38865 Verify plot order of a Series + ser = Series(data=data, index=index) + ax = ser.plot(kind="bar") + + expected = ser.tolist() + result = [ + patch.get_bbox().ymax + for patch in sorted(ax.patches, key=lambda patch: patch.get_bbox().xmax) + ] + assert expected == result + + def test_style_single_ok(self): + s = Series([1, 2]) + ax = s.plot(style="s", color="C3") + assert ax.lines[0].get_color() == "C3" + + @pytest.mark.parametrize( + "index_name, old_label, new_label", + [(None, "", "new"), ("old", "old", "new"), (None, "", "")], + ) + @pytest.mark.parametrize("kind", ["line", "area", "bar"]) + def test_xlabel_ylabel_series(self, kind, index_name, old_label, new_label): + # GH 9093 + ser = Series([1, 2, 3, 4]) + ser.index.name = index_name + + # default is the ylabel is not shown and xlabel is index name + ax = ser.plot(kind=kind) + assert ax.get_ylabel() == "" + assert ax.get_xlabel() == old_label + + # old xlabel will be overridden and assigned ylabel will be used as ylabel + ax = ser.plot(kind=kind, ylabel=new_label, xlabel=new_label) + assert ax.get_ylabel() == new_label + assert ax.get_xlabel() == new_label + + @pytest.mark.parametrize( + "index", + [ + pd.timedelta_range(start=0, periods=2, freq="D"), + [pd.Timedelta(days=1), pd.Timedelta(days=2)], + ], + ) + def test_timedelta_index(self, index): + # GH37454 + xlims = (3, 1) + ax = Series([1, 2], index=index).plot(xlim=(xlims)) + assert ax.get_xlim() == (3, 1) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_style.py b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_style.py new file mode 100644 index 0000000..3c48eea --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/plotting/test_style.py @@ -0,0 +1,159 @@ +import pytest + +from pandas import Series + +pytest.importorskip("matplotlib") +from pandas.plotting._matplotlib.style import get_standard_colors + +pytestmark = pytest.mark.slow + + +class TestGetStandardColors: + @pytest.mark.parametrize( + "num_colors, expected", + [ + (3, ["red", "green", "blue"]), + (5, ["red", "green", "blue", "red", "green"]), + (7, ["red", "green", "blue", "red", "green", "blue", "red"]), + (2, ["red", "green"]), + (1, ["red"]), + ], + ) + def test_default_colors_named_from_prop_cycle(self, num_colors, expected): + import matplotlib as mpl + from matplotlib.pyplot import cycler + + mpl_params = { + "axes.prop_cycle": cycler(color=["red", "green", "blue"]), + } + with mpl.rc_context(rc=mpl_params): + result = get_standard_colors(num_colors=num_colors) + assert result == expected + + @pytest.mark.parametrize( + "num_colors, expected", + [ + (1, ["b"]), + (3, ["b", "g", "r"]), + (4, ["b", "g", "r", "y"]), + (5, ["b", "g", "r", "y", "b"]), + (7, ["b", "g", "r", "y", "b", "g", "r"]), + ], + ) + def test_default_colors_named_from_prop_cycle_string(self, num_colors, expected): + import matplotlib as mpl + from matplotlib.pyplot import cycler + + mpl_params = { + "axes.prop_cycle": cycler(color="bgry"), + } + with mpl.rc_context(rc=mpl_params): + result = get_standard_colors(num_colors=num_colors) + assert result == expected + + @pytest.mark.parametrize( + "num_colors, expected_name", + [ + (1, ["C0"]), + (3, ["C0", "C1", "C2"]), + ( + 12, + [ + "C0", + "C1", + "C2", + "C3", + "C4", + "C5", + "C6", + "C7", + "C8", + "C9", + "C0", + "C1", + ], + ), + ], + ) + def test_default_colors_named_undefined_prop_cycle(self, num_colors, expected_name): + import matplotlib as mpl + import matplotlib.colors as mcolors + + with mpl.rc_context(rc={}): + expected = [mcolors.to_hex(x) for x in expected_name] + result = get_standard_colors(num_colors=num_colors) + assert result == expected + + @pytest.mark.parametrize( + "num_colors, expected", + [ + (1, ["red", "green", (0.1, 0.2, 0.3)]), + (2, ["red", "green", (0.1, 0.2, 0.3)]), + (3, ["red", "green", (0.1, 0.2, 0.3)]), + (4, ["red", "green", (0.1, 0.2, 0.3), "red"]), + ], + ) + def test_user_input_color_sequence(self, num_colors, expected): + color = ["red", "green", (0.1, 0.2, 0.3)] + result = get_standard_colors(color=color, num_colors=num_colors) + assert result == expected + + @pytest.mark.parametrize( + "num_colors, expected", + [ + (1, ["r", "g", "b", "k"]), + (2, ["r", "g", "b", "k"]), + (3, ["r", "g", "b", "k"]), + (4, ["r", "g", "b", "k"]), + (5, ["r", "g", "b", "k", "r"]), + (6, ["r", "g", "b", "k", "r", "g"]), + ], + ) + def test_user_input_color_string(self, num_colors, expected): + color = "rgbk" + result = get_standard_colors(color=color, num_colors=num_colors) + assert result == expected + + @pytest.mark.parametrize( + "num_colors, expected", + [ + (1, [(0.1, 0.2, 0.3)]), + (2, [(0.1, 0.2, 0.3), (0.1, 0.2, 0.3)]), + (3, [(0.1, 0.2, 0.3), (0.1, 0.2, 0.3), (0.1, 0.2, 0.3)]), + ], + ) + def test_user_input_color_floats(self, num_colors, expected): + color = (0.1, 0.2, 0.3) + result = get_standard_colors(color=color, num_colors=num_colors) + assert result == expected + + @pytest.mark.parametrize( + "color, num_colors, expected", + [ + ("Crimson", 1, ["Crimson"]), + ("DodgerBlue", 2, ["DodgerBlue", "DodgerBlue"]), + ("firebrick", 3, ["firebrick", "firebrick", "firebrick"]), + ], + ) + def test_user_input_named_color_string(self, color, num_colors, expected): + result = get_standard_colors(color=color, num_colors=num_colors) + assert result == expected + + @pytest.mark.parametrize("color", ["", [], (), Series([], dtype="object")]) + def test_empty_color_raises(self, color): + with pytest.raises(ValueError, match="Invalid color argument"): + get_standard_colors(color=color, num_colors=1) + + @pytest.mark.parametrize( + "color", + [ + "bad_color", + ("red", "green", "bad_color"), + (0.1,), + (0.1, 0.2), + (0.1, 0.2, 0.3, 0.4, 0.5), # must be either 3 or 4 floats + ], + ) + def test_bad_color_raises(self, color): + with pytest.raises(ValueError, match="Invalid color"): + get_standard_colors(color=color, num_colors=5) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reductions/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/reductions/__init__.py new file mode 100644 index 0000000..e385175 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reductions/__init__.py @@ -0,0 +1,4 @@ +""" +Tests for reductions where we want to test for matching behavior across +Array, Index, Series, and DataFrame methods. +""" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reductions/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reductions/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..6e370e6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reductions/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reductions/__pycache__/test_reductions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reductions/__pycache__/test_reductions.cpython-38.pyc new file mode 100644 index 0000000..926eb5c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reductions/__pycache__/test_reductions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reductions/__pycache__/test_stat_reductions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reductions/__pycache__/test_stat_reductions.cpython-38.pyc new file mode 100644 index 0000000..2c2fdc4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reductions/__pycache__/test_stat_reductions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reductions/test_reductions.py b/.local/lib/python3.8/site-packages/pandas/tests/reductions/test_reductions.py new file mode 100644 index 0000000..70e739d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reductions/test_reductions.py @@ -0,0 +1,1533 @@ +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + NaT, + Period, + PeriodIndex, + RangeIndex, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, + date_range, + isna, + timedelta_range, + to_timedelta, +) +import pandas._testing as tm +from pandas.core import nanops + + +def get_objs(): + indexes = [ + tm.makeBoolIndex(10, name="a"), + tm.makeIntIndex(10, name="a"), + tm.makeFloatIndex(10, name="a"), + tm.makeDateIndex(10, name="a"), + tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern"), + tm.makePeriodIndex(10, name="a"), + tm.makeStringIndex(10, name="a"), + tm.makeUnicodeIndex(10, name="a"), + ] + + arr = np.random.randn(10) + series = [Series(arr, index=idx, name="a") for idx in indexes] + + objs = indexes + series + return objs + + +objs = get_objs() + + +class TestReductions: + @pytest.mark.parametrize("opname", ["max", "min"]) + @pytest.mark.parametrize("obj", objs) + def test_ops(self, opname, obj): + result = getattr(obj, opname)() + if not isinstance(obj, PeriodIndex): + expected = getattr(obj.values, opname)() + else: + expected = Period(ordinal=getattr(obj.asi8, opname)(), freq=obj.freq) + + if getattr(obj, "tz", None) is not None: + # We need to de-localize before comparing to the numpy-produced result + expected = expected.astype("M8[ns]").astype("int64") + assert result.value == expected + else: + assert result == expected + + @pytest.mark.parametrize("opname", ["max", "min"]) + @pytest.mark.parametrize( + "dtype, val", + [ + ("object", 2.0), + ("float64", 2.0), + ("datetime64[ns]", datetime(2011, 11, 1)), + ("Int64", 2), + ("boolean", True), + ], + ) + def test_nanminmax(self, opname, dtype, val, index_or_series): + # GH#7261 + klass = index_or_series + + def check_missing(res): + if dtype == "datetime64[ns]": + return res is NaT + elif dtype in ["Int64", "boolean"]: + return res is pd.NA + else: + return isna(res) + + obj = klass([None], dtype=dtype) + assert check_missing(getattr(obj, opname)()) + assert check_missing(getattr(obj, opname)(skipna=False)) + + obj = klass([], dtype=dtype) + assert check_missing(getattr(obj, opname)()) + assert check_missing(getattr(obj, opname)(skipna=False)) + + if dtype == "object": + # generic test with object only works for empty / all NaN + return + + obj = klass([None, val], dtype=dtype) + assert getattr(obj, opname)() == val + assert check_missing(getattr(obj, opname)(skipna=False)) + + obj = klass([None, val, None], dtype=dtype) + assert getattr(obj, opname)() == val + assert check_missing(getattr(obj, opname)(skipna=False)) + + @pytest.mark.parametrize("opname", ["max", "min"]) + def test_nanargminmax(self, opname, index_or_series): + # GH#7261 + klass = index_or_series + arg_op = "arg" + opname if klass is Index else "idx" + opname + + obj = klass([NaT, datetime(2011, 11, 1)]) + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 + + obj = klass([NaT, datetime(2011, 11, 1), NaT]) + # check DatetimeIndex non-monotonic path + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 + + @pytest.mark.parametrize("opname", ["max", "min"]) + @pytest.mark.parametrize("dtype", ["M8[ns]", "datetime64[ns, UTC]"]) + def test_nanops_empty_object(self, opname, index_or_series, dtype): + klass = index_or_series + arg_op = "arg" + opname if klass is Index else "idx" + opname + + obj = klass([], dtype=dtype) + + assert getattr(obj, opname)() is NaT + assert getattr(obj, opname)(skipna=False) is NaT + + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)() + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)(skipna=False) + + def test_argminmax(self): + obj = Index(np.arange(5, dtype="int64")) + assert obj.argmin() == 0 + assert obj.argmax() == 4 + + obj = Index([np.nan, 1, np.nan, 2]) + assert obj.argmin() == 1 + assert obj.argmax() == 3 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 + + obj = Index([np.nan]) + assert obj.argmin() == -1 + assert obj.argmax() == -1 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 + + obj = Index([NaT, datetime(2011, 11, 1), datetime(2011, 11, 2), NaT]) + assert obj.argmin() == 1 + assert obj.argmax() == 2 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 + + obj = Index([NaT]) + assert obj.argmin() == -1 + assert obj.argmax() == -1 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 + + @pytest.mark.parametrize("op, expected_col", [["max", "a"], ["min", "b"]]) + def test_same_tz_min_max_axis_1(self, op, expected_col): + # GH 10390 + df = DataFrame( + date_range("2016-01-01 00:00:00", periods=3, tz="UTC"), columns=["a"] + ) + df["b"] = df.a.subtract(Timedelta(seconds=3600)) + result = getattr(df, op)(axis=1) + expected = df[expected_col].rename(None) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("func", ["maximum", "minimum"]) + def test_numpy_reduction_with_tz_aware_dtype(self, tz_aware_fixture, func): + # GH 15552 + tz = tz_aware_fixture + arg = pd.to_datetime(["2019"]).tz_localize(tz) + expected = Series(arg) + result = getattr(np, func)(expected, expected) + tm.assert_series_equal(result, expected) + + +class TestIndexReductions: + # Note: the name TestIndexReductions indicates these tests + # were moved from a Index-specific test file, _not_ that these tests are + # intended long-term to be Index-specific + + @pytest.mark.parametrize( + "start,stop,step", + [ + (0, 400, 3), + (500, 0, -6), + (-(10 ** 6), 10 ** 6, 4), + (10 ** 6, -(10 ** 6), -4), + (0, 10, 20), + ], + ) + def test_max_min_range(self, start, stop, step): + # GH#17607 + idx = RangeIndex(start, stop, step) + expected = idx._values.max() + result = idx.max() + assert result == expected + + # skipna should be irrelevant since RangeIndex should never have NAs + result2 = idx.max(skipna=False) + assert result2 == expected + + expected = idx._values.min() + result = idx.min() + assert result == expected + + # skipna should be irrelevant since RangeIndex should never have NAs + result2 = idx.min(skipna=False) + assert result2 == expected + + # empty + idx = RangeIndex(start, stop, -step) + assert isna(idx.max()) + assert isna(idx.min()) + + def test_minmax_timedelta64(self): + + # monotonic + idx1 = TimedeltaIndex(["1 days", "2 days", "3 days"]) + assert idx1.is_monotonic + + # non-monotonic + idx2 = TimedeltaIndex(["1 days", np.nan, "3 days", "NaT"]) + assert not idx2.is_monotonic + + for idx in [idx1, idx2]: + assert idx.min() == Timedelta("1 days") + assert idx.max() == Timedelta("3 days") + assert idx.argmin() == 0 + assert idx.argmax() == 2 + + @pytest.mark.parametrize("op", ["min", "max"]) + def test_minmax_timedelta_empty_or_na(self, op): + # Return NaT + obj = TimedeltaIndex([]) + assert getattr(obj, op)() is NaT + + obj = TimedeltaIndex([NaT]) + assert getattr(obj, op)() is NaT + + obj = TimedeltaIndex([NaT, NaT, NaT]) + assert getattr(obj, op)() is NaT + + def test_numpy_minmax_timedelta64(self): + td = timedelta_range("16815 days", "16820 days", freq="D") + + assert np.min(td) == Timedelta("16815 days") + assert np.max(td) == Timedelta("16820 days") + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.min(td, out=0) + with pytest.raises(ValueError, match=errmsg): + np.max(td, out=0) + + assert np.argmin(td) == 0 + assert np.argmax(td) == 5 + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.argmin(td, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmax(td, out=0) + + def test_timedelta_ops(self): + # GH#4984 + # make sure ops return Timedelta + s = Series( + [Timestamp("20130101") + timedelta(seconds=i * i) for i in range(10)] + ) + td = s.diff() + + result = td.mean() + expected = to_timedelta(timedelta(seconds=9)) + assert result == expected + + result = td.to_frame().mean() + assert result[0] == expected + + result = td.quantile(0.1) + expected = Timedelta(np.timedelta64(2600, "ms")) + assert result == expected + + result = td.median() + expected = to_timedelta("00:00:09") + assert result == expected + + result = td.to_frame().median() + assert result[0] == expected + + # GH#6462 + # consistency in returned values for sum + result = td.sum() + expected = to_timedelta("00:01:21") + assert result == expected + + result = td.to_frame().sum() + assert result[0] == expected + + # std + result = td.std() + expected = to_timedelta(Series(td.dropna().values).std()) + assert result == expected + + result = td.to_frame().std() + assert result[0] == expected + + # GH#10040 + # make sure NaT is properly handled by median() + s = Series([Timestamp("2015-02-03"), Timestamp("2015-02-07")]) + assert s.diff().median() == timedelta(days=4) + + s = Series( + [Timestamp("2015-02-03"), Timestamp("2015-02-07"), Timestamp("2015-02-15")] + ) + assert s.diff().median() == timedelta(days=6) + + @pytest.mark.parametrize("opname", ["skew", "kurt", "sem", "prod", "var"]) + def test_invalid_td64_reductions(self, opname): + s = Series( + [Timestamp("20130101") + timedelta(seconds=i * i) for i in range(10)] + ) + td = s.diff() + + msg = "|".join( + [ + f"reduction operation '{opname}' not allowed for this dtype", + rf"cannot perform {opname} with type timedelta64\[ns\]", + f"does not support reduction '{opname}'", + ] + ) + + with pytest.raises(TypeError, match=msg): + getattr(td, opname)() + + with pytest.raises(TypeError, match=msg): + getattr(td.to_frame(), opname)(numeric_only=False) + + def test_minmax_tz(self, tz_naive_fixture): + tz = tz_naive_fixture + # monotonic + idx1 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz=tz) + assert idx1.is_monotonic + + # non-monotonic + idx2 = DatetimeIndex( + ["2011-01-01", NaT, "2011-01-03", "2011-01-02", NaT], tz=tz + ) + assert not idx2.is_monotonic + + for idx in [idx1, idx2]: + assert idx.min() == Timestamp("2011-01-01", tz=tz) + assert idx.max() == Timestamp("2011-01-03", tz=tz) + assert idx.argmin() == 0 + assert idx.argmax() == 2 + + @pytest.mark.parametrize("op", ["min", "max"]) + def test_minmax_nat_datetime64(self, op): + # Return NaT + obj = DatetimeIndex([]) + assert isna(getattr(obj, op)()) + + obj = DatetimeIndex([NaT]) + assert isna(getattr(obj, op)()) + + obj = DatetimeIndex([NaT, NaT, NaT]) + assert isna(getattr(obj, op)()) + + def test_numpy_minmax_integer(self): + # GH#26125 + idx = Index([1, 2, 3]) + + expected = idx.values.max() + result = np.max(idx) + assert result == expected + + expected = idx.values.min() + result = np.min(idx) + assert result == expected + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.min(idx, out=0) + with pytest.raises(ValueError, match=errmsg): + np.max(idx, out=0) + + expected = idx.values.argmax() + result = np.argmax(idx) + assert result == expected + + expected = idx.values.argmin() + result = np.argmin(idx) + assert result == expected + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.argmin(idx, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmax(idx, out=0) + + def test_numpy_minmax_range(self): + # GH#26125 + idx = RangeIndex(0, 10, 3) + + result = np.max(idx) + assert result == 9 + + result = np.min(idx) + assert result == 0 + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.min(idx, out=0) + with pytest.raises(ValueError, match=errmsg): + np.max(idx, out=0) + + # No need to test again argmax/argmin compat since the implementation + # is the same as basic integer index + + def test_numpy_minmax_datetime64(self): + dr = date_range(start="2016-01-15", end="2016-01-20") + + assert np.min(dr) == Timestamp("2016-01-15 00:00:00") + assert np.max(dr) == Timestamp("2016-01-20 00:00:00") + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.min(dr, out=0) + + with pytest.raises(ValueError, match=errmsg): + np.max(dr, out=0) + + assert np.argmin(dr) == 0 + assert np.argmax(dr) == 5 + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.argmin(dr, out=0) + + with pytest.raises(ValueError, match=errmsg): + np.argmax(dr, out=0) + + def test_minmax_period(self): + + # monotonic + idx1 = PeriodIndex([NaT, "2011-01-01", "2011-01-02", "2011-01-03"], freq="D") + assert not idx1.is_monotonic + assert idx1[1:].is_monotonic + + # non-monotonic + idx2 = PeriodIndex( + ["2011-01-01", NaT, "2011-01-03", "2011-01-02", NaT], freq="D" + ) + assert not idx2.is_monotonic + + for idx in [idx1, idx2]: + assert idx.min() == Period("2011-01-01", freq="D") + assert idx.max() == Period("2011-01-03", freq="D") + assert idx1.argmin() == 1 + assert idx2.argmin() == 0 + assert idx1.argmax() == 3 + assert idx2.argmax() == 2 + + for op in ["min", "max"]: + # Return NaT + obj = PeriodIndex([], freq="M") + result = getattr(obj, op)() + assert result is NaT + + obj = PeriodIndex([NaT], freq="M") + result = getattr(obj, op)() + assert result is NaT + + obj = PeriodIndex([NaT, NaT, NaT], freq="M") + result = getattr(obj, op)() + assert result is NaT + + def test_numpy_minmax_period(self): + pr = pd.period_range(start="2016-01-15", end="2016-01-20") + + assert np.min(pr) == Period("2016-01-15", freq="D") + assert np.max(pr) == Period("2016-01-20", freq="D") + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.min(pr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.max(pr, out=0) + + assert np.argmin(pr) == 0 + assert np.argmax(pr) == 5 + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.argmin(pr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmax(pr, out=0) + + def test_min_max_categorical(self): + + ci = pd.CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) + msg = ( + r"Categorical is not ordered for operation min\n" + r"you can use .as_ordered\(\) to change the Categorical to an ordered one\n" + ) + with pytest.raises(TypeError, match=msg): + ci.min() + msg = ( + r"Categorical is not ordered for operation max\n" + r"you can use .as_ordered\(\) to change the Categorical to an ordered one\n" + ) + with pytest.raises(TypeError, match=msg): + ci.max() + + ci = pd.CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=True) + assert ci.min() == "c" + assert ci.max() == "b" + + +class TestSeriesReductions: + # Note: the name TestSeriesReductions indicates these tests + # were moved from a series-specific test file, _not_ that these tests are + # intended long-term to be series-specific + + def test_sum_inf(self): + s = Series(np.random.randn(10)) + s2 = s.copy() + + s[5:8] = np.inf + s2[5:8] = np.nan + + assert np.isinf(s.sum()) + + arr = np.random.randn(100, 100).astype("f4") + arr[:, 2] = np.inf + + with pd.option_context("mode.use_inf_as_na", True): + tm.assert_almost_equal(s.sum(), s2.sum()) + + res = nanops.nansum(arr, axis=1) + assert np.isinf(res).all() + + @pytest.mark.parametrize( + "dtype", ["float64", "Float32", "Int64", "boolean", "object"] + ) + @pytest.mark.parametrize("use_bottleneck", [True, False]) + @pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)]) + def test_empty(self, method, unit, use_bottleneck, dtype): + with pd.option_context("use_bottleneck", use_bottleneck): + # GH#9422 / GH#18921 + # Entirely empty + s = Series([], dtype=dtype) + # NA by default + result = getattr(s, method)() + assert result == unit + + # Explicit + result = getattr(s, method)(min_count=0) + assert result == unit + + result = getattr(s, method)(min_count=1) + assert isna(result) + + # Skipna, default + result = getattr(s, method)(skipna=True) + result == unit + + # Skipna, explicit + result = getattr(s, method)(skipna=True, min_count=0) + assert result == unit + + result = getattr(s, method)(skipna=True, min_count=1) + assert isna(result) + + result = getattr(s, method)(skipna=False, min_count=0) + assert result == unit + + result = getattr(s, method)(skipna=False, min_count=1) + assert isna(result) + + # All-NA + s = Series([np.nan], dtype=dtype) + # NA by default + result = getattr(s, method)() + assert result == unit + + # Explicit + result = getattr(s, method)(min_count=0) + assert result == unit + + result = getattr(s, method)(min_count=1) + assert isna(result) + + # Skipna, default + result = getattr(s, method)(skipna=True) + result == unit + + # skipna, explicit + result = getattr(s, method)(skipna=True, min_count=0) + assert result == unit + + result = getattr(s, method)(skipna=True, min_count=1) + assert isna(result) + + # Mix of valid, empty + s = Series([np.nan, 1], dtype=dtype) + # Default + result = getattr(s, method)() + assert result == 1.0 + + # Explicit + result = getattr(s, method)(min_count=0) + assert result == 1.0 + + result = getattr(s, method)(min_count=1) + assert result == 1.0 + + # Skipna + result = getattr(s, method)(skipna=True) + assert result == 1.0 + + result = getattr(s, method)(skipna=True, min_count=0) + assert result == 1.0 + + # GH#844 (changed in GH#9422) + df = DataFrame(np.empty((10, 0)), dtype=dtype) + assert (getattr(df, method)(1) == unit).all() + + s = Series([1], dtype=dtype) + result = getattr(s, method)(min_count=2) + assert isna(result) + + result = getattr(s, method)(skipna=False, min_count=2) + assert isna(result) + + s = Series([np.nan], dtype=dtype) + result = getattr(s, method)(min_count=2) + assert isna(result) + + s = Series([np.nan, 1], dtype=dtype) + result = getattr(s, method)(min_count=2) + assert isna(result) + + @pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)]) + def test_empty_multi(self, method, unit): + s = Series( + [1, np.nan, np.nan, np.nan], + index=pd.MultiIndex.from_product([("a", "b"), (0, 1)]), + ) + # 1 / 0 by default + with tm.assert_produces_warning(FutureWarning): + result = getattr(s, method)(level=0) + expected = Series([1, unit], index=["a", "b"]) + tm.assert_series_equal(result, expected) + + # min_count=0 + with tm.assert_produces_warning(FutureWarning): + result = getattr(s, method)(level=0, min_count=0) + expected = Series([1, unit], index=["a", "b"]) + tm.assert_series_equal(result, expected) + + # min_count=1 + with tm.assert_produces_warning(FutureWarning): + result = getattr(s, method)(level=0, min_count=1) + expected = Series([1, np.nan], index=["a", "b"]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("method", ["mean"]) + @pytest.mark.parametrize("dtype", ["Float64", "Int64", "boolean"]) + def test_ops_consistency_on_empty_nullable(self, method, dtype): + + # GH#34814 + # consistency for nullable dtypes on empty or ALL-NA mean + + # empty series + eser = Series([], dtype=dtype) + result = getattr(eser, method)() + assert result is pd.NA + + # ALL-NA series + nser = Series([np.nan], dtype=dtype) + result = getattr(nser, method)() + assert result is pd.NA + + @pytest.mark.parametrize("method", ["mean", "median", "std", "var"]) + def test_ops_consistency_on_empty(self, method): + + # GH#7869 + # consistency on empty + + # float + result = getattr(Series(dtype=float), method)() + assert isna(result) + + # timedelta64[ns] + tdser = Series([], dtype="m8[ns]") + if method == "var": + msg = "|".join( + [ + "operation 'var' not allowed", + r"cannot perform var with type timedelta64\[ns\]", + "does not support reduction 'var'", + ] + ) + with pytest.raises(TypeError, match=msg): + getattr(tdser, method)() + else: + result = getattr(tdser, method)() + assert result is NaT + + def test_nansum_buglet(self): + ser = Series([1.0, np.nan], index=[0, 1]) + result = np.nansum(ser) + tm.assert_almost_equal(result, 1) + + @pytest.mark.parametrize("use_bottleneck", [True, False]) + def test_sum_overflow(self, use_bottleneck): + + with pd.option_context("use_bottleneck", use_bottleneck): + # GH#6915 + # overflowing on the smaller int dtypes + for dtype in ["int32", "int64"]: + v = np.arange(5000000, dtype=dtype) + s = Series(v) + + result = s.sum(skipna=False) + assert int(result) == v.sum(dtype="int64") + result = s.min(skipna=False) + assert int(result) == 0 + result = s.max(skipna=False) + assert int(result) == v[-1] + + for dtype in ["float32", "float64"]: + v = np.arange(5000000, dtype=dtype) + s = Series(v) + + result = s.sum(skipna=False) + assert result == v.sum(dtype=dtype) + result = s.min(skipna=False) + assert np.allclose(float(result), 0.0) + result = s.max(skipna=False) + assert np.allclose(float(result), v[-1]) + + def test_empty_timeseries_reductions_return_nat(self): + # covers GH#11245 + for dtype in ("m8[ns]", "m8[ns]", "M8[ns]", "M8[ns, UTC]"): + assert Series([], dtype=dtype).min() is NaT + assert Series([], dtype=dtype).max() is NaT + assert Series([], dtype=dtype).min(skipna=False) is NaT + assert Series([], dtype=dtype).max(skipna=False) is NaT + + def test_numpy_argmin(self): + # See GH#16830 + data = np.arange(1, 11) + + s = Series(data, index=data) + result = np.argmin(s) + + expected = np.argmin(data) + assert result == expected + + result = s.argmin() + + assert result == expected + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argmin(s, out=data) + + def test_numpy_argmax(self): + # See GH#16830 + data = np.arange(1, 11) + + s = Series(data, index=data) + result = np.argmax(s) + expected = np.argmax(data) + assert result == expected + + result = s.argmax() + + assert result == expected + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argmax(s, out=data) + + def test_idxmin(self): + # test idxmin + # _check_stat_op approach can not be used here because of isna check. + string_series = tm.makeStringSeries().rename("series") + + # add some NaNs + string_series[5:15] = np.NaN + + # skipna or no + assert string_series[string_series.idxmin()] == string_series.min() + assert isna(string_series.idxmin(skipna=False)) + + # no NaNs + nona = string_series.dropna() + assert nona[nona.idxmin()] == nona.min() + assert nona.index.values.tolist().index(nona.idxmin()) == nona.values.argmin() + + # all NaNs + allna = string_series * np.nan + assert isna(allna.idxmin()) + + # datetime64[ns] + s = Series(date_range("20130102", periods=6)) + result = s.idxmin() + assert result == 0 + + s[0] = np.nan + result = s.idxmin() + assert result == 1 + + def test_idxmax(self): + # test idxmax + # _check_stat_op approach can not be used here because of isna check. + string_series = tm.makeStringSeries().rename("series") + + # add some NaNs + string_series[5:15] = np.NaN + + # skipna or no + assert string_series[string_series.idxmax()] == string_series.max() + assert isna(string_series.idxmax(skipna=False)) + + # no NaNs + nona = string_series.dropna() + assert nona[nona.idxmax()] == nona.max() + assert nona.index.values.tolist().index(nona.idxmax()) == nona.values.argmax() + + # all NaNs + allna = string_series * np.nan + assert isna(allna.idxmax()) + + from pandas import date_range + + s = Series(date_range("20130102", periods=6)) + result = s.idxmax() + assert result == 5 + + s[5] = np.nan + result = s.idxmax() + assert result == 4 + + # Float64Index + # GH#5914 + s = Series([1, 2, 3], [1.1, 2.1, 3.1]) + result = s.idxmax() + assert result == 3.1 + result = s.idxmin() + assert result == 1.1 + + s = Series(s.index, s.index) + result = s.idxmax() + assert result == 3.1 + result = s.idxmin() + assert result == 1.1 + + def test_all_any(self): + ts = tm.makeTimeSeries() + bool_series = ts > 0 + assert not bool_series.all() + assert bool_series.any() + + # Alternative types, with implicit 'object' dtype. + s = Series(["abc", True]) + assert s.any() + + @pytest.mark.parametrize("klass", [Index, Series]) + def test_numpy_all_any(self, klass): + # GH#40180 + idx = klass([0, 1, 2]) + assert not np.all(idx) + assert np.any(idx) + idx = Index([1, 2, 3]) + assert np.all(idx) + + def test_all_any_params(self): + # Check skipna, with implicit 'object' dtype. + s1 = Series([np.nan, True]) + s2 = Series([np.nan, False]) + assert s1.all(skipna=False) # nan && True => True + assert s1.all(skipna=True) + assert s2.any(skipna=False) + assert not s2.any(skipna=True) + + # Check level. + s = Series([False, False, True, True, False, True], index=[0, 0, 1, 1, 2, 2]) + with tm.assert_produces_warning(FutureWarning): + tm.assert_series_equal(s.all(level=0), Series([False, True, False])) + with tm.assert_produces_warning(FutureWarning): + tm.assert_series_equal(s.any(level=0), Series([False, True, True])) + + msg = "Option bool_only is not implemented with option level" + with pytest.raises(NotImplementedError, match=msg): + with tm.assert_produces_warning(FutureWarning): + s.any(bool_only=True, level=0) + with pytest.raises(NotImplementedError, match=msg): + with tm.assert_produces_warning(FutureWarning): + s.all(bool_only=True, level=0) + + # GH#38810 bool_only is not implemented alone. + msg = "Series.any does not implement bool_only" + with pytest.raises(NotImplementedError, match=msg): + s.any(bool_only=True) + msg = "Series.all does not implement bool_only." + with pytest.raises(NotImplementedError, match=msg): + s.all(bool_only=True) + + @pytest.mark.parametrize("bool_agg_func", ["any", "all"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_any_all_object_dtype(self, bool_agg_func, skipna): + # GH#12863 + ser = Series(["a", "b", "c", "d", "e"], dtype=object) + result = getattr(ser, bool_agg_func)(skipna=skipna) + expected = True + + assert result == expected + + @pytest.mark.parametrize("bool_agg_func", ["any", "all"]) + @pytest.mark.parametrize( + "data", [[False, None], [None, False], [False, np.nan], [np.nan, False]] + ) + def test_any_all_object_dtype_missing(self, data, bool_agg_func): + # GH#27709 + ser = Series(data) + result = getattr(ser, bool_agg_func)(skipna=False) + + # None is treated is False, but np.nan is treated as True + expected = bool_agg_func == "any" and None not in data + assert result == expected + + @pytest.mark.parametrize("dtype", ["boolean", "Int64", "UInt64", "Float64"]) + @pytest.mark.parametrize("bool_agg_func", ["any", "all"]) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize( + # expected_data indexed as [[skipna=False/any, skipna=False/all], + # [skipna=True/any, skipna=True/all]] + "data,expected_data", + [ + ([0, 0, 0], [[False, False], [False, False]]), + ([1, 1, 1], [[True, True], [True, True]]), + ([pd.NA, pd.NA, pd.NA], [[pd.NA, pd.NA], [False, True]]), + ([0, pd.NA, 0], [[pd.NA, False], [False, False]]), + ([1, pd.NA, 1], [[True, pd.NA], [True, True]]), + ([1, pd.NA, 0], [[True, False], [True, False]]), + ], + ) + def test_any_all_nullable_kleene_logic( + self, bool_agg_func, skipna, data, dtype, expected_data + ): + # GH-37506, GH-41967 + ser = Series(data, dtype=dtype) + expected = expected_data[skipna][bool_agg_func == "all"] + + result = getattr(ser, bool_agg_func)(skipna=skipna) + assert (result is pd.NA and expected is pd.NA) or result == expected + + @pytest.mark.parametrize( + "bool_agg_func,expected", + [("all", [False, True, False]), ("any", [False, True, True])], + ) + def test_any_all_boolean_level(self, bool_agg_func, expected): + # GH#33449 + ser = Series( + [False, False, True, True, False, True], + index=[0, 0, 1, 1, 2, 2], + dtype="boolean", + ) + with tm.assert_produces_warning(FutureWarning): + result = getattr(ser, bool_agg_func)(level=0) + expected = Series(expected, dtype="boolean") + tm.assert_series_equal(result, expected) + + def test_any_axis1_bool_only(self): + # GH#32432 + df = DataFrame({"A": [True, False], "B": [1, 2]}) + result = df.any(axis=1, bool_only=True) + expected = Series([True, False]) + tm.assert_series_equal(result, expected) + + def test_any_all_datetimelike(self): + # GH#38723 these may not be the desired long-term behavior (GH#34479) + # but in the interim should be internally consistent + dta = date_range("1995-01-02", periods=3)._data + ser = Series(dta) + df = DataFrame(ser) + + assert dta.all() + assert dta.any() + + assert ser.all() + assert ser.any() + + assert df.any().all() + assert df.all().all() + + dta = dta.tz_localize("UTC") + ser = Series(dta) + df = DataFrame(ser) + + assert dta.all() + assert dta.any() + + assert ser.all() + assert ser.any() + + assert df.any().all() + assert df.all().all() + + tda = dta - dta[0] + ser = Series(tda) + df = DataFrame(ser) + + assert tda.any() + assert not tda.all() + + assert ser.any() + assert not ser.all() + + assert df.any().all() + assert not df.all().any() + + def test_timedelta64_analytics(self): + + # index min/max + dti = date_range("2012-1-1", periods=3, freq="D") + td = Series(dti) - Timestamp("20120101") + + result = td.idxmin() + assert result == 0 + + result = td.idxmax() + assert result == 2 + + # GH#2982 + # with NaT + td[0] = np.nan + + result = td.idxmin() + assert result == 1 + + result = td.idxmax() + assert result == 2 + + # abs + s1 = Series(date_range("20120101", periods=3)) + s2 = Series(date_range("20120102", periods=3)) + expected = Series(s2 - s1) + + result = np.abs(s1 - s2) + tm.assert_series_equal(result, expected) + + result = (s1 - s2).abs() + tm.assert_series_equal(result, expected) + + # max/min + result = td.max() + expected = Timedelta("2 days") + assert result == expected + + result = td.min() + expected = Timedelta("1 days") + assert result == expected + + @pytest.mark.parametrize( + "test_input,error_type", + [ + (Series([], dtype="float64"), ValueError), + # For strings, or any Series with dtype 'O' + (Series(["foo", "bar", "baz"]), TypeError), + (Series([(1,), (2,)]), TypeError), + # For mixed data types + (Series(["foo", "foo", "bar", "bar", None, np.nan, "baz"]), TypeError), + ], + ) + def test_assert_idxminmax_raises(self, test_input, error_type): + """ + Cases where ``Series.argmax`` and related should raise an exception + """ + msg = ( + "reduction operation 'argmin' not allowed for this dtype|" + "attempt to get argmin of an empty sequence" + ) + with pytest.raises(error_type, match=msg): + test_input.idxmin() + with pytest.raises(error_type, match=msg): + test_input.idxmin(skipna=False) + msg = ( + "reduction operation 'argmax' not allowed for this dtype|" + "attempt to get argmax of an empty sequence" + ) + with pytest.raises(error_type, match=msg): + test_input.idxmax() + with pytest.raises(error_type, match=msg): + test_input.idxmax(skipna=False) + + def test_idxminmax_with_inf(self): + # For numeric data with NA and Inf (GH #13595) + s = Series([0, -np.inf, np.inf, np.nan]) + + assert s.idxmin() == 1 + assert np.isnan(s.idxmin(skipna=False)) + + assert s.idxmax() == 2 + assert np.isnan(s.idxmax(skipna=False)) + + # Using old-style behavior that treats floating point nan, -inf, and + # +inf as missing + with pd.option_context("mode.use_inf_as_na", True): + assert s.idxmin() == 0 + assert np.isnan(s.idxmin(skipna=False)) + assert s.idxmax() == 0 + np.isnan(s.idxmax(skipna=False)) + + +class TestDatetime64SeriesReductions: + # Note: the name TestDatetime64SeriesReductions indicates these tests + # were moved from a series-specific test file, _not_ that these tests are + # intended long-term to be series-specific + + @pytest.mark.parametrize( + "nat_ser", + [ + Series([NaT, NaT]), + Series([NaT, Timedelta("nat")]), + Series([Timedelta("nat"), Timedelta("nat")]), + ], + ) + def test_minmax_nat_series(self, nat_ser): + # GH#23282 + assert nat_ser.min() is NaT + assert nat_ser.max() is NaT + assert nat_ser.min(skipna=False) is NaT + assert nat_ser.max(skipna=False) is NaT + + @pytest.mark.parametrize( + "nat_df", + [ + DataFrame([NaT, NaT]), + DataFrame([NaT, Timedelta("nat")]), + DataFrame([Timedelta("nat"), Timedelta("nat")]), + ], + ) + def test_minmax_nat_dataframe(self, nat_df): + # GH#23282 + assert nat_df.min()[0] is NaT + assert nat_df.max()[0] is NaT + assert nat_df.min(skipna=False)[0] is NaT + assert nat_df.max(skipna=False)[0] is NaT + + def test_min_max(self): + rng = date_range("1/1/2000", "12/31/2000") + rng2 = rng.take(np.random.permutation(len(rng))) + + the_min = rng2.min() + the_max = rng2.max() + assert isinstance(the_min, Timestamp) + assert isinstance(the_max, Timestamp) + assert the_min == rng[0] + assert the_max == rng[-1] + + assert rng.min() == rng[0] + assert rng.max() == rng[-1] + + def test_min_max_series(self): + rng = date_range("1/1/2000", periods=10, freq="4h") + lvls = ["A", "A", "A", "B", "B", "B", "C", "C", "C", "C"] + df = DataFrame({"TS": rng, "V": np.random.randn(len(rng)), "L": lvls}) + + result = df.TS.max() + exp = Timestamp(df.TS.iat[-1]) + assert isinstance(result, Timestamp) + assert result == exp + + result = df.TS.min() + exp = Timestamp(df.TS.iat[0]) + assert isinstance(result, Timestamp) + assert result == exp + + +class TestCategoricalSeriesReductions: + # Note: the name TestCategoricalSeriesReductions indicates these tests + # were moved from a series-specific test file, _not_ that these tests are + # intended long-term to be series-specific + + @pytest.mark.parametrize("function", ["min", "max"]) + def test_min_max_unordered_raises(self, function): + # unordered cats have no min/max + cat = Series(Categorical(["a", "b", "c", "d"], ordered=False)) + msg = f"Categorical is not ordered for operation {function}" + with pytest.raises(TypeError, match=msg): + getattr(cat, function)() + + @pytest.mark.parametrize( + "values, categories", + [ + (list("abc"), list("abc")), + (list("abc"), list("cba")), + (list("abc") + [np.nan], list("cba")), + ([1, 2, 3], [3, 2, 1]), + ([1, 2, 3, np.nan], [3, 2, 1]), + ], + ) + @pytest.mark.parametrize("function", ["min", "max"]) + def test_min_max_ordered(self, values, categories, function): + # GH 25303 + cat = Series(Categorical(values, categories=categories, ordered=True)) + result = getattr(cat, function)(skipna=True) + expected = categories[0] if function == "min" else categories[2] + assert result == expected + + @pytest.mark.parametrize("function", ["min", "max"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_ordered_with_nan_only(self, function, skipna): + # https://github.com/pandas-dev/pandas/issues/33450 + cat = Series(Categorical([np.nan], categories=[1, 2], ordered=True)) + result = getattr(cat, function)(skipna=skipna) + assert result is np.nan + + @pytest.mark.parametrize("function", ["min", "max"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_skipna(self, function, skipna): + cat = Series( + Categorical(["a", "b", np.nan, "a"], categories=["b", "a"], ordered=True) + ) + result = getattr(cat, function)(skipna=skipna) + + if skipna is True: + expected = "b" if function == "min" else "a" + assert result == expected + else: + assert result is np.nan + + +class TestSeriesMode: + # Note: the name TestSeriesMode indicates these tests + # were moved from a series-specific test file, _not_ that these tests are + # intended long-term to be series-specific + + @pytest.mark.parametrize( + "dropna, expected", + [(True, Series([], dtype=np.float64)), (False, Series([], dtype=np.float64))], + ) + def test_mode_empty(self, dropna, expected): + s = Series([], dtype=np.float64) + result = s.mode(dropna) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dropna, data, expected", + [ + (True, [1, 1, 1, 2], [1]), + (True, [1, 1, 1, 2, 3, 3, 3], [1, 3]), + (False, [1, 1, 1, 2], [1]), + (False, [1, 1, 1, 2, 3, 3, 3], [1, 3]), + ], + ) + @pytest.mark.parametrize( + "dt", list(np.typecodes["AllInteger"] + np.typecodes["Float"]) + ) + def test_mode_numerical(self, dropna, data, expected, dt): + s = Series(data, dtype=dt) + result = s.mode(dropna) + expected = Series(expected, dtype=dt) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dropna, expected", [(True, [1.0]), (False, [1, np.nan])]) + def test_mode_numerical_nan(self, dropna, expected): + s = Series([1, 1, 2, np.nan, np.nan]) + result = s.mode(dropna) + expected = Series(expected) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dropna, expected1, expected2, expected3", + [(True, ["b"], ["bar"], ["nan"]), (False, ["b"], [np.nan], ["nan"])], + ) + def test_mode_str_obj(self, dropna, expected1, expected2, expected3): + # Test string and object types. + data = ["a"] * 2 + ["b"] * 3 + + s = Series(data, dtype="c") + result = s.mode(dropna) + expected1 = Series(expected1, dtype="c") + tm.assert_series_equal(result, expected1) + + data = ["foo", "bar", "bar", np.nan, np.nan, np.nan] + + s = Series(data, dtype=object) + result = s.mode(dropna) + expected2 = Series(expected2, dtype=object) + tm.assert_series_equal(result, expected2) + + data = ["foo", "bar", "bar", np.nan, np.nan, np.nan] + + s = Series(data, dtype=object).astype(str) + result = s.mode(dropna) + expected3 = Series(expected3, dtype=str) + tm.assert_series_equal(result, expected3) + + @pytest.mark.parametrize( + "dropna, expected1, expected2", + [(True, ["foo"], ["foo"]), (False, ["foo"], [np.nan])], + ) + def test_mode_mixeddtype(self, dropna, expected1, expected2): + s = Series([1, "foo", "foo"]) + result = s.mode(dropna) + expected = Series(expected1) + tm.assert_series_equal(result, expected) + + s = Series([1, "foo", "foo", np.nan, np.nan, np.nan]) + result = s.mode(dropna) + expected = Series(expected2, dtype=object) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dropna, expected1, expected2", + [ + ( + True, + ["1900-05-03", "2011-01-03", "2013-01-02"], + ["2011-01-03", "2013-01-02"], + ), + (False, [np.nan], [np.nan, "2011-01-03", "2013-01-02"]), + ], + ) + def test_mode_datetime(self, dropna, expected1, expected2): + s = Series( + ["2011-01-03", "2013-01-02", "1900-05-03", "nan", "nan"], dtype="M8[ns]" + ) + result = s.mode(dropna) + expected1 = Series(expected1, dtype="M8[ns]") + tm.assert_series_equal(result, expected1) + + s = Series( + [ + "2011-01-03", + "2013-01-02", + "1900-05-03", + "2011-01-03", + "2013-01-02", + "nan", + "nan", + ], + dtype="M8[ns]", + ) + result = s.mode(dropna) + expected2 = Series(expected2, dtype="M8[ns]") + tm.assert_series_equal(result, expected2) + + @pytest.mark.parametrize( + "dropna, expected1, expected2", + [ + (True, ["-1 days", "0 days", "1 days"], ["2 min", "1 day"]), + (False, [np.nan], [np.nan, "2 min", "1 day"]), + ], + ) + def test_mode_timedelta(self, dropna, expected1, expected2): + # gh-5986: Test timedelta types. + + s = Series( + ["1 days", "-1 days", "0 days", "nan", "nan"], dtype="timedelta64[ns]" + ) + result = s.mode(dropna) + expected1 = Series(expected1, dtype="timedelta64[ns]") + tm.assert_series_equal(result, expected1) + + s = Series( + [ + "1 day", + "1 day", + "-1 day", + "-1 day 2 min", + "2 min", + "2 min", + "nan", + "nan", + ], + dtype="timedelta64[ns]", + ) + result = s.mode(dropna) + expected2 = Series(expected2, dtype="timedelta64[ns]") + tm.assert_series_equal(result, expected2) + + @pytest.mark.parametrize( + "dropna, expected1, expected2, expected3", + [ + ( + True, + Categorical([1, 2], categories=[1, 2]), + Categorical(["a"], categories=[1, "a"]), + Categorical([3, 1], categories=[3, 2, 1], ordered=True), + ), + ( + False, + Categorical([np.nan], categories=[1, 2]), + Categorical([np.nan, "a"], categories=[1, "a"]), + Categorical([np.nan, 3, 1], categories=[3, 2, 1], ordered=True), + ), + ], + ) + def test_mode_category(self, dropna, expected1, expected2, expected3): + s = Series(Categorical([1, 2, np.nan, np.nan])) + result = s.mode(dropna) + expected1 = Series(expected1, dtype="category") + tm.assert_series_equal(result, expected1) + + s = Series(Categorical([1, "a", "a", np.nan, np.nan])) + result = s.mode(dropna) + expected2 = Series(expected2, dtype="category") + tm.assert_series_equal(result, expected2) + + s = Series( + Categorical( + [1, 1, 2, 3, 3, np.nan, np.nan], categories=[3, 2, 1], ordered=True + ) + ) + result = s.mode(dropna) + expected3 = Series(expected3, dtype="category") + tm.assert_series_equal(result, expected3) + + @pytest.mark.parametrize( + "dropna, expected1, expected2", + [(True, [2 ** 63], [1, 2 ** 63]), (False, [2 ** 63], [1, 2 ** 63])], + ) + def test_mode_intoverflow(self, dropna, expected1, expected2): + # Test for uint64 overflow. + s = Series([1, 2 ** 63, 2 ** 63], dtype=np.uint64) + result = s.mode(dropna) + expected1 = Series(expected1, dtype=np.uint64) + tm.assert_series_equal(result, expected1) + + s = Series([1, 2 ** 63], dtype=np.uint64) + result = s.mode(dropna) + expected2 = Series(expected2, dtype=np.uint64) + tm.assert_series_equal(result, expected2) + + def test_mode_sortwarning(self): + # Check for the warning that is raised when the mode + # results cannot be sorted + + expected = Series(["foo", np.nan]) + s = Series([1, "foo", "foo", np.nan, np.nan]) + + with tm.assert_produces_warning(UserWarning): + result = s.mode(dropna=False) + result = result.sort_values().reset_index(drop=True) + + tm.assert_series_equal(result, expected) + + def test_mode_boolean_with_na(self): + # GH#42107 + ser = Series([True, False, True, pd.NA], dtype="boolean") + result = ser.mode() + expected = Series({0: True}, dtype="boolean") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "array,expected,dtype", + [ + ( + [0, 1j, 1, 1, 1 + 1j, 1 + 2j], + Series([1], dtype=np.complex128), + np.complex128, + ), + ( + [0, 1j, 1, 1, 1 + 1j, 1 + 2j], + Series([1], dtype=np.complex64), + np.complex64, + ), + ( + [1 + 1j, 2j, 1 + 1j], + Series([1 + 1j], dtype=np.complex128), + np.complex128, + ), + ], + ) + def test_single_mode_value_complex(self, array, expected, dtype): + result = Series(array, dtype=dtype).mode() + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "array,expected,dtype", + [ + ( + # no modes + [0, 1j, 1, 1 + 1j, 1 + 2j], + Series([0j, 1j, 1 + 0j, 1 + 1j, 1 + 2j], dtype=np.complex128), + np.complex128, + ), + ( + [1 + 1j, 2j, 1 + 1j, 2j, 3], + Series([2j, 1 + 1j], dtype=np.complex64), + np.complex64, + ), + ], + ) + def test_multimode_complex(self, array, expected, dtype): + # GH 17927 + # mode tries to sort multimodal series. + # Complex numbers are sorted by their magnitude + result = Series(array, dtype=dtype).mode() + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reductions/test_stat_reductions.py b/.local/lib/python3.8/site-packages/pandas/tests/reductions/test_stat_reductions.py new file mode 100644 index 0000000..2f1ae5d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reductions/test_stat_reductions.py @@ -0,0 +1,279 @@ +""" +Tests for statistical reductions of 2nd moment or higher: var, skew, kurt, ... +""" +import inspect + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + TimedeltaArray, +) + + +class TestDatetimeLikeStatReductions: + @pytest.mark.parametrize("box", [Series, pd.Index, DatetimeArray]) + def test_dt64_mean(self, tz_naive_fixture, box): + tz = tz_naive_fixture + + dti = pd.date_range("2001-01-01", periods=11, tz=tz) + # shuffle so that we are not just working with monotone-increasing + dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6]) + dtarr = dti._data + + obj = box(dtarr) + assert obj.mean() == pd.Timestamp("2001-01-06", tz=tz) + assert obj.mean(skipna=False) == pd.Timestamp("2001-01-06", tz=tz) + + # dtarr[-2] will be the first date 2001-01-1 + dtarr[-2] = pd.NaT + + obj = box(dtarr) + assert obj.mean() == pd.Timestamp("2001-01-06 07:12:00", tz=tz) + assert obj.mean(skipna=False) is pd.NaT + + @pytest.mark.parametrize("box", [Series, pd.Index, PeriodArray]) + @pytest.mark.parametrize("freq", ["S", "H", "D", "W", "B"]) + def test_period_mean(self, box, freq): + # GH#24757 + dti = pd.date_range("2001-01-01", periods=11) + # shuffle so that we are not just working with monotone-increasing + dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6]) + + parr = dti._data.to_period(freq) + obj = box(parr) + with pytest.raises(TypeError, match="ambiguous"): + obj.mean() + with pytest.raises(TypeError, match="ambiguous"): + obj.mean(skipna=True) + + # parr[-2] will be the first date 2001-01-1 + parr[-2] = pd.NaT + + with pytest.raises(TypeError, match="ambiguous"): + obj.mean() + with pytest.raises(TypeError, match="ambiguous"): + obj.mean(skipna=True) + + @pytest.mark.parametrize("box", [Series, pd.Index, TimedeltaArray]) + def test_td64_mean(self, box): + tdi = pd.TimedeltaIndex([0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4], unit="D") + + tdarr = tdi._data + obj = box(tdarr) + + result = obj.mean() + expected = np.array(tdarr).mean() + assert result == expected + + tdarr[0] = pd.NaT + assert obj.mean(skipna=False) is pd.NaT + + result2 = obj.mean(skipna=True) + assert result2 == tdi[1:].mean() + + # exact equality fails by 1 nanosecond + assert result2.round("us") == (result * 11.0 / 10).round("us") + + +class TestSeriesStatReductions: + # Note: the name TestSeriesStatReductions indicates these tests + # were moved from a series-specific test file, _not_ that these tests are + # intended long-term to be series-specific + + def _check_stat_op( + self, name, alternate, string_series_, check_objects=False, check_allna=False + ): + + with pd.option_context("use_bottleneck", False): + f = getattr(Series, name) + + # add some NaNs + string_series_[5:15] = np.NaN + + # mean, idxmax, idxmin, min, and max are valid for dates + if name not in ["max", "min", "mean", "median", "std"]: + ds = Series(pd.date_range("1/1/2001", periods=10)) + msg = f"does not support reduction '{name}'" + with pytest.raises(TypeError, match=msg): + f(ds) + + # skipna or no + assert pd.notna(f(string_series_)) + assert pd.isna(f(string_series_, skipna=False)) + + # check the result is correct + nona = string_series_.dropna() + tm.assert_almost_equal(f(nona), alternate(nona.values)) + tm.assert_almost_equal(f(string_series_), alternate(nona.values)) + + allna = string_series_ * np.nan + + if check_allna: + assert np.isnan(f(allna)) + + # dtype=object with None, it works! + s = Series([1, 2, 3, None, 5]) + f(s) + + # GH#2888 + items = [0] + items.extend(range(2 ** 40, 2 ** 40 + 1000)) + s = Series(items, dtype="int64") + tm.assert_almost_equal(float(f(s)), float(alternate(s.values))) + + # check date range + if check_objects: + s = Series(pd.bdate_range("1/1/2000", periods=10)) + res = f(s) + exp = alternate(s) + assert res == exp + + # check on string data + if name not in ["sum", "min", "max"]: + with pytest.raises(TypeError, match=None): + f(Series(list("abc"))) + + # Invalid axis. + msg = "No axis named 1 for object type Series" + with pytest.raises(ValueError, match=msg): + f(string_series_, axis=1) + + # Unimplemented numeric_only parameter. + if "numeric_only" in inspect.getfullargspec(f).args: + with pytest.raises(NotImplementedError, match=name): + f(string_series_, numeric_only=True) + + def test_sum(self): + string_series = tm.makeStringSeries().rename("series") + self._check_stat_op("sum", np.sum, string_series, check_allna=False) + + def test_mean(self): + string_series = tm.makeStringSeries().rename("series") + self._check_stat_op("mean", np.mean, string_series) + + def test_median(self): + string_series = tm.makeStringSeries().rename("series") + self._check_stat_op("median", np.median, string_series) + + # test with integers, test failure + int_ts = Series(np.ones(10, dtype=int), index=range(10)) + tm.assert_almost_equal(np.median(int_ts), int_ts.median()) + + def test_prod(self): + string_series = tm.makeStringSeries().rename("series") + self._check_stat_op("prod", np.prod, string_series) + + def test_min(self): + string_series = tm.makeStringSeries().rename("series") + self._check_stat_op("min", np.min, string_series, check_objects=True) + + def test_max(self): + string_series = tm.makeStringSeries().rename("series") + self._check_stat_op("max", np.max, string_series, check_objects=True) + + def test_var_std(self): + string_series = tm.makeStringSeries().rename("series") + datetime_series = tm.makeTimeSeries().rename("ts") + + alt = lambda x: np.std(x, ddof=1) + self._check_stat_op("std", alt, string_series) + + alt = lambda x: np.var(x, ddof=1) + self._check_stat_op("var", alt, string_series) + + result = datetime_series.std(ddof=4) + expected = np.std(datetime_series.values, ddof=4) + tm.assert_almost_equal(result, expected) + + result = datetime_series.var(ddof=4) + expected = np.var(datetime_series.values, ddof=4) + tm.assert_almost_equal(result, expected) + + # 1 - element series with ddof=1 + s = datetime_series.iloc[[0]] + result = s.var(ddof=1) + assert pd.isna(result) + + result = s.std(ddof=1) + assert pd.isna(result) + + def test_sem(self): + string_series = tm.makeStringSeries().rename("series") + datetime_series = tm.makeTimeSeries().rename("ts") + + alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) + self._check_stat_op("sem", alt, string_series) + + result = datetime_series.sem(ddof=4) + expected = np.std(datetime_series.values, ddof=4) / np.sqrt( + len(datetime_series.values) + ) + tm.assert_almost_equal(result, expected) + + # 1 - element series with ddof=1 + s = datetime_series.iloc[[0]] + result = s.sem(ddof=1) + assert pd.isna(result) + + @td.skip_if_no_scipy + def test_skew(self): + from scipy.stats import skew + + string_series = tm.makeStringSeries().rename("series") + + alt = lambda x: skew(x, bias=False) + self._check_stat_op("skew", alt, string_series) + + # test corner cases, skew() returns NaN unless there's at least 3 + # values + min_N = 3 + for i in range(1, min_N + 1): + s = Series(np.ones(i)) + df = DataFrame(np.ones((i, i))) + if i < min_N: + assert np.isnan(s.skew()) + assert np.isnan(df.skew()).all() + else: + assert 0 == s.skew() + assert (df.skew() == 0).all() + + @td.skip_if_no_scipy + def test_kurt(self): + from scipy.stats import kurtosis + + string_series = tm.makeStringSeries().rename("series") + + alt = lambda x: kurtosis(x, bias=False) + self._check_stat_op("kurt", alt, string_series) + + index = pd.MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + s = Series(np.random.randn(6), index=index) + with tm.assert_produces_warning(FutureWarning): + tm.assert_almost_equal(s.kurt(), s.kurt(level=0)["bar"]) + + # test corner cases, kurt() returns NaN unless there's at least 4 + # values + min_N = 4 + for i in range(1, min_N + 1): + s = Series(np.ones(i)) + df = DataFrame(np.ones((i, i))) + if i < min_N: + assert np.isnan(s.kurt()) + assert np.isnan(df.kurt()).all() + else: + assert 0 == s.kurt() + assert (df.kurt() == 0).all() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/resample/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..be51e53 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..7811486 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_base.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_base.cpython-38.pyc new file mode 100644 index 0000000..0c9f081 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_base.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_datetime_index.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_datetime_index.cpython-38.pyc new file mode 100644 index 0000000..2e9099d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_datetime_index.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_deprecated.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_deprecated.cpython-38.pyc new file mode 100644 index 0000000..11c0b02 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_deprecated.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_period_index.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_period_index.cpython-38.pyc new file mode 100644 index 0000000..d889145 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_period_index.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_resample_api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_resample_api.cpython-38.pyc new file mode 100644 index 0000000..368e06a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_resample_api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_resampler_grouper.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_resampler_grouper.cpython-38.pyc new file mode 100644 index 0000000..0e84a4c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_resampler_grouper.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_time_grouper.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_time_grouper.cpython-38.pyc new file mode 100644 index 0000000..d48d4c8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_time_grouper.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_timedelta.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_timedelta.cpython-38.pyc new file mode 100644 index 0000000..c409924 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/resample/__pycache__/test_timedelta.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/resample/conftest.py new file mode 100644 index 0000000..420c302 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/resample/conftest.py @@ -0,0 +1,171 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import period_range + +# The various methods we support +downsample_methods = [ + "min", + "max", + "first", + "last", + "sum", + "mean", + "sem", + "median", + "prod", + "var", + "std", + "ohlc", + "quantile", +] +upsample_methods = ["count", "size"] +series_methods = ["nunique"] +resample_methods = downsample_methods + upsample_methods + series_methods + + +@pytest.fixture(params=downsample_methods) +def downsample_method(request): + """Fixture for parametrization of Grouper downsample methods.""" + return request.param + + +@pytest.fixture(params=resample_methods) +def resample_method(request): + """Fixture for parametrization of Grouper resample methods.""" + return request.param + + +@pytest.fixture +def simple_date_range_series(): + """ + Series with date range index and random data for test purposes. + """ + + def _simple_date_range_series(start, end, freq="D"): + rng = date_range(start, end, freq=freq) + return Series(np.random.randn(len(rng)), index=rng) + + return _simple_date_range_series + + +@pytest.fixture +def simple_period_range_series(): + """ + Series with period range index and random data for test purposes. + """ + + def _simple_period_range_series(start, end, freq="D"): + rng = period_range(start, end, freq=freq) + return Series(np.random.randn(len(rng)), index=rng) + + return _simple_period_range_series + + +@pytest.fixture +def _index_start(): + """Fixture for parametrization of index, series and frame.""" + return datetime(2005, 1, 1) + + +@pytest.fixture +def _index_end(): + """Fixture for parametrization of index, series and frame.""" + return datetime(2005, 1, 10) + + +@pytest.fixture +def _index_freq(): + """Fixture for parametrization of index, series and frame.""" + return "D" + + +@pytest.fixture +def _index_name(): + """Fixture for parametrization of index, series and frame.""" + return None + + +@pytest.fixture +def index(_index_factory, _index_start, _index_end, _index_freq, _index_name): + """ + Fixture for parametrization of date_range, period_range and + timedelta_range indexes + """ + return _index_factory(_index_start, _index_end, freq=_index_freq, name=_index_name) + + +@pytest.fixture +def _static_values(index): + """ + Fixture for parametrization of values used in parametrization of + Series and DataFrames with date_range, period_range and + timedelta_range indexes + """ + return np.arange(len(index)) + + +@pytest.fixture +def _series_name(): + """ + Fixture for parametrization of Series name for Series used with + date_range, period_range and timedelta_range indexes + """ + return None + + +@pytest.fixture +def series(index, _series_name, _static_values): + """ + Fixture for parametrization of Series with date_range, period_range and + timedelta_range indexes + """ + return Series(_static_values, index=index, name=_series_name) + + +@pytest.fixture +def empty_series_dti(series): + """ + Fixture for parametrization of empty Series with date_range, + period_range and timedelta_range indexes + """ + return series[:0] + + +@pytest.fixture +def frame(index, _series_name, _static_values): + """ + Fixture for parametrization of DataFrame with date_range, period_range + and timedelta_range indexes + """ + # _series_name is intentionally unused + return DataFrame({"value": _static_values}, index=index) + + +@pytest.fixture +def empty_frame_dti(series): + """ + Fixture for parametrization of empty DataFrame with date_range, + period_range and timedelta_range indexes + """ + index = series.index[:0] + return DataFrame(index=index) + + +@pytest.fixture(params=[Series, DataFrame]) +def series_and_frame(request, series, frame): + """ + Fixture for parametrization of Series and DataFrame with date_range, + period_range and timedelta_range indexes + """ + if request.param == Series: + return series + if request.param == DataFrame: + return frame diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/test_base.py b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_base.py new file mode 100644 index 0000000..2dae9ee --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_base.py @@ -0,0 +1,256 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + NaT, + PeriodIndex, + Series, +) +import pandas._testing as tm +from pandas.core.groupby.groupby import DataError +from pandas.core.groupby.grouper import Grouper +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import period_range +from pandas.core.indexes.timedeltas import timedelta_range +from pandas.core.resample import _asfreq_compat + +# a fixture value can be overridden by the test parameter value. Note that the +# value of the fixture can be overridden this way even if the test doesn't use +# it directly (doesn't mention it in the function prototype). +# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa:E501 +# in this module we override the fixture values defined in conftest.py +# tuples of '_index_factory,_series_name,_index_start,_index_end' +DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10)) +PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10)) +TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day") + +all_ts = pytest.mark.parametrize( + "_index_factory,_series_name,_index_start,_index_end", + [DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE], +) + + +@pytest.fixture +def create_index(_index_factory): + def _create_index(*args, **kwargs): + """return the _index_factory created using the args, kwargs""" + return _index_factory(*args, **kwargs) + + return _create_index + + +@pytest.mark.parametrize("freq", ["2D", "1H"]) +@pytest.mark.parametrize( + "_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE] +) +def test_asfreq(series_and_frame, freq, create_index): + obj = series_and_frame + + result = obj.resample(freq).asfreq() + new_index = create_index(obj.index[0], obj.index[-1], freq=freq) + expected = obj.reindex(new_index) + tm.assert_almost_equal(result, expected) + + +@pytest.mark.parametrize( + "_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE] +) +def test_asfreq_fill_value(series, create_index): + # test for fill value during resampling, issue 3715 + + s = series + + result = s.resample("1H").asfreq() + new_index = create_index(s.index[0], s.index[-1], freq="1H") + expected = s.reindex(new_index) + tm.assert_series_equal(result, expected) + + frame = s.to_frame("value") + frame.iloc[1] = None + result = frame.resample("1H").asfreq(fill_value=4.0) + new_index = create_index(frame.index[0], frame.index[-1], freq="1H") + expected = frame.reindex(new_index, fill_value=4.0) + tm.assert_frame_equal(result, expected) + + +@all_ts +def test_resample_interpolate(frame): + # # 12925 + df = frame + tm.assert_frame_equal( + df.resample("1T").asfreq().interpolate(), df.resample("1T").interpolate() + ) + + +def test_raises_on_non_datetimelike_index(): + # this is a non datetimelike index + xp = DataFrame() + msg = ( + "Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, " + "but got an instance of 'Index'" + ) + with pytest.raises(TypeError, match=msg): + xp.resample("A").mean() + + +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +def test_resample_empty_series(freq, empty_series_dti, resample_method): + # GH12771 & GH12868 + + if resample_method == "ohlc": + pytest.skip("need to test for ohlc from GH13083") + + s = empty_series_dti + result = getattr(s.resample(freq), resample_method)() + + expected = s.copy() + expected.index = _asfreq_compat(s.index, freq) + + tm.assert_index_equal(result.index, expected.index) + assert result.index.freq == expected.index.freq + tm.assert_series_equal(result, expected, check_dtype=False) + + +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +def test_resample_nat_index_series(request, freq, series, resample_method): + # GH39227 + + if freq == "M": + request.node.add_marker(pytest.mark.xfail(reason="Don't know why this fails")) + + s = series.copy() + s.index = PeriodIndex([NaT] * len(s), freq=freq) + result = getattr(s.resample(freq), resample_method)() + + if resample_method == "ohlc": + expected = DataFrame( + [], index=s.index[:0].copy(), columns=["open", "high", "low", "close"] + ) + tm.assert_frame_equal(result, expected, check_dtype=False) + else: + expected = s[:0].copy() + tm.assert_series_equal(result, expected, check_dtype=False) + tm.assert_index_equal(result.index, expected.index) + assert result.index.freq == expected.index.freq + + +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +@pytest.mark.parametrize("resample_method", ["count", "size"]) +def test_resample_count_empty_series(freq, empty_series_dti, resample_method): + # GH28427 + result = getattr(empty_series_dti.resample(freq), resample_method)() + + index = _asfreq_compat(empty_series_dti.index, freq) + + expected = Series([], dtype="int64", index=index, name=empty_series_dti.name) + + tm.assert_series_equal(result, expected) + + +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method): + # GH13212 + df = empty_frame_dti + # count retains dimensions too + result = getattr(df.resample(freq), resample_method)() + if resample_method != "size": + expected = df.copy() + else: + # GH14962 + expected = Series([], dtype=object) + + expected.index = _asfreq_compat(df.index, freq) + + tm.assert_index_equal(result.index, expected.index) + assert result.index.freq == expected.index.freq + tm.assert_almost_equal(result, expected, check_dtype=False) + + # test size for GH13212 (currently stays as df) + + +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +def test_resample_count_empty_dataframe(freq, empty_frame_dti): + # GH28427 + + empty_frame_dti["a"] = [] + + result = empty_frame_dti.resample(freq).count() + + index = _asfreq_compat(empty_frame_dti.index, freq) + + expected = DataFrame({"a": []}, dtype="int64", index=index) + + tm.assert_frame_equal(result, expected) + + +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +def test_resample_size_empty_dataframe(freq, empty_frame_dti): + # GH28427 + + empty_frame_dti["a"] = [] + + result = empty_frame_dti.resample(freq).size() + + index = _asfreq_compat(empty_frame_dti.index, freq) + + expected = Series([], dtype="int64", index=index) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0)) +@pytest.mark.parametrize("dtype", [float, int, object, "datetime64[ns]"]) +def test_resample_empty_dtypes(index, dtype, resample_method): + # Empty series were sometimes causing a segfault (for the functions + # with Cython bounds-checking disabled) or an IndexError. We just run + # them to ensure they no longer do. (GH #10228) + empty_series_dti = Series([], index, dtype) + try: + getattr(empty_series_dti.resample("d"), resample_method)() + except DataError: + # Ignore these since some combinations are invalid + # (ex: doing mean with dtype of np.object_) + pass + + +@all_ts +def test_apply_to_empty_series(empty_series_dti): + # GH 14313 + s = empty_series_dti + for freq in ["M", "D", "H"]: + result = s.resample(freq).apply(lambda x: 1) + expected = s.resample(freq).apply(np.sum) + + tm.assert_series_equal(result, expected, check_dtype=False) + + +@all_ts +def test_resampler_is_iterable(series): + # GH 15314 + freq = "H" + tg = Grouper(freq=freq, convention="start") + grouped = series.groupby(tg) + resampled = series.resample(freq) + for (rk, rv), (gk, gv) in zip(resampled, grouped): + assert rk == gk + tm.assert_series_equal(rv, gv) + + +@all_ts +def test_resample_quantile(series): + # GH 15023 + s = series + q = 0.75 + freq = "H" + result = s.resample(freq).quantile(q) + expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/test_datetime_index.py b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_datetime_index.py new file mode 100644 index 0000000..8b4fc4e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_datetime_index.py @@ -0,0 +1,1854 @@ +from datetime import datetime +from functools import partial +from io import StringIO + +import numpy as np +import pytest +import pytz + +from pandas._libs import lib +from pandas.errors import UnsupportedFunctionCall + +import pandas as pd +from pandas import ( + DataFrame, + Series, + Timedelta, + Timestamp, + isna, + notna, +) +import pandas._testing as tm +from pandas.core.groupby.grouper import Grouper +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import ( + Period, + period_range, +) +from pandas.core.resample import ( + DatetimeIndex, + _get_timestamp_range_edges, +) + +import pandas.tseries.offsets as offsets +from pandas.tseries.offsets import Minute + + +@pytest.fixture() +def _index_factory(): + return date_range + + +@pytest.fixture +def _index_freq(): + return "Min" + + +@pytest.fixture +def _static_values(index): + return np.random.rand(len(index)) + + +def test_custom_grouper(index): + + dti = index + s = Series(np.array([1] * len(dti)), index=dti, dtype="int64") + + b = Grouper(freq=Minute(5)) + g = s.groupby(b) + + # check all cython functions work + g.ohlc() # doesn't use _cython_agg_general + funcs = ["add", "mean", "prod", "min", "max", "var"] + for f in funcs: + g._cython_agg_general(f, alt=None, numeric_only=True) + + b = Grouper(freq=Minute(5), closed="right", label="right") + g = s.groupby(b) + # check all cython functions work + g.ohlc() # doesn't use _cython_agg_general + funcs = ["add", "mean", "prod", "min", "max", "var"] + for f in funcs: + g._cython_agg_general(f, alt=None, numeric_only=True) + + assert g.ngroups == 2593 + assert notna(g.mean()).all() + + # construct expected val + arr = [1] + [5] * 2592 + idx = dti[0:-1:5] + idx = idx.append(dti[-1:]) + idx = DatetimeIndex(idx, freq="5T") + expect = Series(arr, index=idx) + + # GH2763 - return input dtype if we can + result = g.agg(np.sum) + tm.assert_series_equal(result, expect) + + df = DataFrame(np.random.rand(len(dti), 10), index=dti, dtype="float64") + r = df.groupby(b).agg(np.sum) + + assert len(r.columns) == 10 + assert len(r.index) == 2593 + + +@pytest.mark.parametrize( + "_index_start,_index_end,_index_name", + [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")], +) +@pytest.mark.parametrize( + "closed, expected", + [ + ( + "right", + lambda s: Series( + [s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], + index=date_range("1/1/2000", periods=4, freq="5min", name="index"), + ), + ), + ( + "left", + lambda s: Series( + [s[:5].mean(), s[5:10].mean(), s[10:].mean()], + index=date_range( + "1/1/2000 00:05", periods=3, freq="5min", name="index" + ), + ), + ), + ], +) +def test_resample_basic(series, closed, expected): + s = series + expected = expected(s) + result = s.resample("5min", closed=closed, label="right").mean() + tm.assert_series_equal(result, expected) + + +def test_resample_integerarray(): + # GH 25580, resample on IntegerArray + ts = Series( + range(9), index=date_range("1/1/2000", periods=9, freq="T"), dtype="Int64" + ) + result = ts.resample("3T").sum() + expected = Series( + [3, 12, 21], + index=date_range("1/1/2000", periods=3, freq="3T"), + dtype="Int64", + ) + tm.assert_series_equal(result, expected) + + result = ts.resample("3T").mean() + expected = Series( + [1, 4, 7], + index=date_range("1/1/2000", periods=3, freq="3T"), + dtype="Float64", + ) + tm.assert_series_equal(result, expected) + + +def test_resample_basic_grouper(series): + s = series + result = s.resample("5Min").last() + grouper = Grouper(freq=Minute(5), closed="left", label="left") + expected = s.groupby(grouper).agg(lambda x: x[-1]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "_index_start,_index_end,_index_name", + [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")], +) +@pytest.mark.parametrize( + "keyword,value", + [("label", "righttt"), ("closed", "righttt"), ("convention", "starttt")], +) +def test_resample_string_kwargs(series, keyword, value): + # see gh-19303 + # Check that wrong keyword argument strings raise an error + msg = f"Unsupported value {value} for `{keyword}`" + with pytest.raises(ValueError, match=msg): + series.resample("5min", **({keyword: value})) + + +@pytest.mark.parametrize( + "_index_start,_index_end,_index_name", + [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")], +) +def test_resample_how(series, downsample_method): + if downsample_method == "ohlc": + pytest.skip("covered by test_resample_how_ohlc") + + s = series + grouplist = np.ones_like(s) + grouplist[0] = 0 + grouplist[1:6] = 1 + grouplist[6:11] = 2 + grouplist[11:] = 3 + expected = s.groupby(grouplist).agg(downsample_method) + expected.index = date_range("1/1/2000", periods=4, freq="5min", name="index") + + result = getattr( + s.resample("5min", closed="right", label="right"), downsample_method + )() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "_index_start,_index_end,_index_name", + [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")], +) +def test_resample_how_ohlc(series): + s = series + grouplist = np.ones_like(s) + grouplist[0] = 0 + grouplist[1:6] = 1 + grouplist[6:11] = 2 + grouplist[11:] = 3 + + def _ohlc(group): + if isna(group).all(): + return np.repeat(np.nan, 4) + return [group[0], group.max(), group.min(), group[-1]] + + expected = DataFrame( + s.groupby(grouplist).agg(_ohlc).values.tolist(), + index=date_range("1/1/2000", periods=4, freq="5min", name="index"), + columns=["open", "high", "low", "close"], + ) + + result = s.resample("5min", closed="right", label="right").ohlc() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max", "sum", "prod", "mean", "var", "std"]) +def test_numpy_compat(func): + # see gh-12811 + s = Series([1, 2, 3, 4, 5], index=date_range("20130101", periods=5, freq="s")) + r = s.resample("2s") + + msg = "numpy operations are not valid with resample" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, func)(func, 1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, func)(axis=1) + + +def test_resample_how_callables(): + # GH#7929 + data = np.arange(5, dtype=np.int64) + ind = date_range(start="2014-01-01", periods=len(data), freq="d") + df = DataFrame({"A": data, "B": data}, index=ind) + + def fn(x, a=1): + return str(type(x)) + + class FnClass: + def __call__(self, x): + return str(type(x)) + + df_standard = df.resample("M").apply(fn) + df_lambda = df.resample("M").apply(lambda x: str(type(x))) + df_partial = df.resample("M").apply(partial(fn)) + df_partial2 = df.resample("M").apply(partial(fn, a=2)) + df_class = df.resample("M").apply(FnClass()) + + tm.assert_frame_equal(df_standard, df_lambda) + tm.assert_frame_equal(df_standard, df_partial) + tm.assert_frame_equal(df_standard, df_partial2) + tm.assert_frame_equal(df_standard, df_class) + + +def test_resample_rounding(): + # GH 8371 + # odd results when rounding is needed + + data = """date,time,value +11-08-2014,00:00:01.093,1 +11-08-2014,00:00:02.159,1 +11-08-2014,00:00:02.667,1 +11-08-2014,00:00:03.175,1 +11-08-2014,00:00:07.058,1 +11-08-2014,00:00:07.362,1 +11-08-2014,00:00:08.324,1 +11-08-2014,00:00:08.830,1 +11-08-2014,00:00:08.982,1 +11-08-2014,00:00:09.815,1 +11-08-2014,00:00:10.540,1 +11-08-2014,00:00:11.061,1 +11-08-2014,00:00:11.617,1 +11-08-2014,00:00:13.607,1 +11-08-2014,00:00:14.535,1 +11-08-2014,00:00:15.525,1 +11-08-2014,00:00:17.960,1 +11-08-2014,00:00:20.674,1 +11-08-2014,00:00:21.191,1""" + + df = pd.read_csv( + StringIO(data), + parse_dates={"timestamp": ["date", "time"]}, + index_col="timestamp", + ) + df.index.name = None + result = df.resample("6s").sum() + expected = DataFrame( + {"value": [4, 9, 4, 2]}, index=date_range("2014-11-08", freq="6s", periods=4) + ) + tm.assert_frame_equal(result, expected) + + result = df.resample("7s").sum() + expected = DataFrame( + {"value": [4, 10, 4, 1]}, index=date_range("2014-11-08", freq="7s", periods=4) + ) + tm.assert_frame_equal(result, expected) + + result = df.resample("11s").sum() + expected = DataFrame( + {"value": [11, 8]}, index=date_range("2014-11-08", freq="11s", periods=2) + ) + tm.assert_frame_equal(result, expected) + + result = df.resample("13s").sum() + expected = DataFrame( + {"value": [13, 6]}, index=date_range("2014-11-08", freq="13s", periods=2) + ) + tm.assert_frame_equal(result, expected) + + result = df.resample("17s").sum() + expected = DataFrame( + {"value": [16, 3]}, index=date_range("2014-11-08", freq="17s", periods=2) + ) + tm.assert_frame_equal(result, expected) + + +def test_resample_basic_from_daily(): + # from daily + dti = date_range( + start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D", name="index" + ) + + s = Series(np.random.rand(len(dti)), dti) + + # to weekly + result = s.resample("w-sun").last() + + assert len(result) == 3 + assert (result.index.dayofweek == [6, 6, 6]).all() + assert result.iloc[0] == s["1/2/2005"] + assert result.iloc[1] == s["1/9/2005"] + assert result.iloc[2] == s.iloc[-1] + + result = s.resample("W-MON").last() + assert len(result) == 2 + assert (result.index.dayofweek == [0, 0]).all() + assert result.iloc[0] == s["1/3/2005"] + assert result.iloc[1] == s["1/10/2005"] + + result = s.resample("W-TUE").last() + assert len(result) == 2 + assert (result.index.dayofweek == [1, 1]).all() + assert result.iloc[0] == s["1/4/2005"] + assert result.iloc[1] == s["1/10/2005"] + + result = s.resample("W-WED").last() + assert len(result) == 2 + assert (result.index.dayofweek == [2, 2]).all() + assert result.iloc[0] == s["1/5/2005"] + assert result.iloc[1] == s["1/10/2005"] + + result = s.resample("W-THU").last() + assert len(result) == 2 + assert (result.index.dayofweek == [3, 3]).all() + assert result.iloc[0] == s["1/6/2005"] + assert result.iloc[1] == s["1/10/2005"] + + result = s.resample("W-FRI").last() + assert len(result) == 2 + assert (result.index.dayofweek == [4, 4]).all() + assert result.iloc[0] == s["1/7/2005"] + assert result.iloc[1] == s["1/10/2005"] + + # to biz day + result = s.resample("B").last() + assert len(result) == 7 + assert (result.index.dayofweek == [4, 0, 1, 2, 3, 4, 0]).all() + + assert result.iloc[0] == s["1/2/2005"] + assert result.iloc[1] == s["1/3/2005"] + assert result.iloc[5] == s["1/9/2005"] + assert result.index.name == "index" + + +def test_resample_upsampling_picked_but_not_correct(): + + # Test for issue #3020 + dates = date_range("01-Jan-2014", "05-Jan-2014", freq="D") + series = Series(1, index=dates) + + result = series.resample("D").mean() + assert result.index[0] == dates[0] + + # GH 5955 + # incorrect deciding to upsample when the axis frequency matches the + # resample frequency + + s = Series( + np.arange(1.0, 6), index=[datetime(1975, 1, i, 12, 0) for i in range(1, 6)] + ) + expected = Series( + np.arange(1.0, 6), index=date_range("19750101", periods=5, freq="D") + ) + + result = s.resample("D").count() + tm.assert_series_equal(result, Series(1, index=expected.index)) + + result1 = s.resample("D").sum() + result2 = s.resample("D").mean() + tm.assert_series_equal(result1, expected) + tm.assert_series_equal(result2, expected) + + +def test_resample_frame_basic(): + df = tm.makeTimeDataFrame() + + b = Grouper(freq="M") + g = df.groupby(b) + + # check all cython functions work + funcs = ["add", "mean", "prod", "min", "max", "var"] + for f in funcs: + g._cython_agg_general(f, alt=None, numeric_only=True) + + result = df.resample("A").mean() + tm.assert_series_equal(result["A"], df["A"].resample("A").mean()) + + result = df.resample("M").mean() + tm.assert_series_equal(result["A"], df["A"].resample("M").mean()) + + df.resample("M", kind="period").mean() + df.resample("W-WED", kind="period").mean() + + +def test_resample_upsample(): + # from daily + dti = date_range( + start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D", name="index" + ) + + s = Series(np.random.rand(len(dti)), dti) + + # to minutely, by padding + result = s.resample("Min").ffill() + assert len(result) == 12961 + assert result[0] == s[0] + assert result[-1] == s[-1] + + assert result.index.name == "index" + + +def test_resample_how_method(): + # GH9915 + s = Series( + [11, 22], + index=[ + Timestamp("2015-03-31 21:48:52.672000"), + Timestamp("2015-03-31 21:49:52.739000"), + ], + ) + expected = Series( + [11, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22], + index=DatetimeIndex( + [ + Timestamp("2015-03-31 21:48:50"), + Timestamp("2015-03-31 21:49:00"), + Timestamp("2015-03-31 21:49:10"), + Timestamp("2015-03-31 21:49:20"), + Timestamp("2015-03-31 21:49:30"), + Timestamp("2015-03-31 21:49:40"), + Timestamp("2015-03-31 21:49:50"), + ], + freq="10s", + ), + ) + tm.assert_series_equal(s.resample("10S").mean(), expected) + + +def test_resample_extra_index_point(): + # GH#9756 + index = date_range(start="20150101", end="20150331", freq="BM") + expected = DataFrame({"A": Series([21, 41, 63], index=index)}) + + index = date_range(start="20150101", end="20150331", freq="B") + df = DataFrame({"A": Series(range(len(index)), index=index)}, dtype="int64") + result = df.resample("BM").last() + tm.assert_frame_equal(result, expected) + + +def test_upsample_with_limit(): + rng = date_range("1/1/2000", periods=3, freq="5t") + ts = Series(np.random.randn(len(rng)), rng) + + result = ts.resample("t").ffill(limit=2) + expected = ts.reindex(result.index, method="ffill", limit=2) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("freq", ["5D", "10H", "5Min", "10S"]) +@pytest.mark.parametrize("rule", ["Y", "3M", "15D", "30H", "15Min", "30S"]) +def test_nearest_upsample_with_limit(tz_aware_fixture, freq, rule): + # GH 33939 + rng = date_range("1/1/2000", periods=3, freq=freq, tz=tz_aware_fixture) + ts = Series(np.random.randn(len(rng)), rng) + + result = ts.resample(rule).nearest(limit=2) + expected = ts.reindex(result.index, method="nearest", limit=2) + tm.assert_series_equal(result, expected) + + +def test_resample_ohlc(series): + s = series + + grouper = Grouper(freq=Minute(5)) + expect = s.groupby(grouper).agg(lambda x: x[-1]) + result = s.resample("5Min").ohlc() + + assert len(result) == len(expect) + assert len(result.columns) == 4 + + xs = result.iloc[-2] + assert xs["open"] == s[-6] + assert xs["high"] == s[-6:-1].max() + assert xs["low"] == s[-6:-1].min() + assert xs["close"] == s[-2] + + xs = result.iloc[0] + assert xs["open"] == s[0] + assert xs["high"] == s[:5].max() + assert xs["low"] == s[:5].min() + assert xs["close"] == s[4] + + +def test_resample_ohlc_result(): + + # GH 12332 + index = date_range("1-1-2000", "2-15-2000", freq="h") + index = index.union(date_range("4-15-2000", "5-15-2000", freq="h")) + s = Series(range(len(index)), index=index) + + a = s.loc[:"4-15-2000"].resample("30T").ohlc() + assert isinstance(a, DataFrame) + + b = s.loc[:"4-14-2000"].resample("30T").ohlc() + assert isinstance(b, DataFrame) + + # GH12348 + # raising on odd period + rng = date_range("2013-12-30", "2014-01-07") + index = rng.drop( + [ + Timestamp("2014-01-01"), + Timestamp("2013-12-31"), + Timestamp("2014-01-04"), + Timestamp("2014-01-05"), + ] + ) + df = DataFrame(data=np.arange(len(index)), index=index) + result = df.resample("B").mean() + expected = df.reindex(index=date_range(rng[0], rng[-1], freq="B")) + tm.assert_frame_equal(result, expected) + + +def test_resample_ohlc_dataframe(): + df = ( + DataFrame( + { + "PRICE": { + Timestamp("2011-01-06 10:59:05", tz=None): 24990, + Timestamp("2011-01-06 12:43:33", tz=None): 25499, + Timestamp("2011-01-06 12:54:09", tz=None): 25499, + }, + "VOLUME": { + Timestamp("2011-01-06 10:59:05", tz=None): 1500000000, + Timestamp("2011-01-06 12:43:33", tz=None): 5000000000, + Timestamp("2011-01-06 12:54:09", tz=None): 100000000, + }, + } + ) + ).reindex(["VOLUME", "PRICE"], axis=1) + df.columns.name = "Cols" + res = df.resample("H").ohlc() + exp = pd.concat( + [df["VOLUME"].resample("H").ohlc(), df["PRICE"].resample("H").ohlc()], + axis=1, + keys=df.columns, + ) + assert exp.columns.names[0] == "Cols" + tm.assert_frame_equal(exp, res) + + df.columns = [["a", "b"], ["c", "d"]] + res = df.resample("H").ohlc() + exp.columns = pd.MultiIndex.from_tuples( + [ + ("a", "c", "open"), + ("a", "c", "high"), + ("a", "c", "low"), + ("a", "c", "close"), + ("b", "d", "open"), + ("b", "d", "high"), + ("b", "d", "low"), + ("b", "d", "close"), + ] + ) + tm.assert_frame_equal(exp, res) + + # dupe columns fail atm + # df.columns = ['PRICE', 'PRICE'] + + +def test_resample_dup_index(): + + # GH 4812 + # dup columns with resample raising + df = DataFrame( + np.random.randn(4, 12), + index=[2000, 2000, 2000, 2000], + columns=[Period(year=2000, month=i + 1, freq="M") for i in range(12)], + ) + df.iloc[3, :] = np.nan + result = df.resample("Q", axis=1).mean() + expected = df.groupby(lambda x: int((x.month - 1) / 3), axis=1).mean() + expected.columns = [Period(year=2000, quarter=i + 1, freq="Q") for i in range(4)] + tm.assert_frame_equal(result, expected) + + +def test_resample_reresample(): + dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D") + s = Series(np.random.rand(len(dti)), dti) + bs = s.resample("B", closed="right", label="right").mean() + result = bs.resample("8H").mean() + assert len(result) == 22 + assert isinstance(result.index.freq, offsets.DateOffset) + assert result.index.freq == offsets.Hour(8) + + +def test_resample_timestamp_to_period(simple_date_range_series): + ts = simple_date_range_series("1/1/1990", "1/1/2000") + + result = ts.resample("A-DEC", kind="period").mean() + expected = ts.resample("A-DEC").mean() + expected.index = period_range("1990", "2000", freq="a-dec") + tm.assert_series_equal(result, expected) + + result = ts.resample("A-JUN", kind="period").mean() + expected = ts.resample("A-JUN").mean() + expected.index = period_range("1990", "2000", freq="a-jun") + tm.assert_series_equal(result, expected) + + result = ts.resample("M", kind="period").mean() + expected = ts.resample("M").mean() + expected.index = period_range("1990-01", "2000-01", freq="M") + tm.assert_series_equal(result, expected) + + result = ts.resample("M", kind="period").mean() + expected = ts.resample("M").mean() + expected.index = period_range("1990-01", "2000-01", freq="M") + tm.assert_series_equal(result, expected) + + +def test_ohlc_5min(): + def _ohlc(group): + if isna(group).all(): + return np.repeat(np.nan, 4) + return [group[0], group.max(), group.min(), group[-1]] + + rng = date_range("1/1/2000 00:00:00", "1/1/2000 5:59:50", freq="10s") + ts = Series(np.random.randn(len(rng)), index=rng) + + resampled = ts.resample("5min", closed="right", label="right").ohlc() + + assert (resampled.loc["1/1/2000 00:00"] == ts[0]).all() + + exp = _ohlc(ts[1:31]) + assert (resampled.loc["1/1/2000 00:05"] == exp).all() + + exp = _ohlc(ts["1/1/2000 5:55:01":]) + assert (resampled.loc["1/1/2000 6:00:00"] == exp).all() + + +def test_downsample_non_unique(): + rng = date_range("1/1/2000", "2/29/2000") + rng2 = rng.repeat(5).values + ts = Series(np.random.randn(len(rng2)), index=rng2) + + result = ts.resample("M").mean() + + expected = ts.groupby(lambda x: x.month).mean() + assert len(result) == 2 + tm.assert_almost_equal(result[0], expected[1]) + tm.assert_almost_equal(result[1], expected[2]) + + +def test_asfreq_non_unique(): + # GH #1077 + rng = date_range("1/1/2000", "2/29/2000") + rng2 = rng.repeat(2).values + ts = Series(np.random.randn(len(rng2)), index=rng2) + + msg = "cannot reindex on an axis with duplicate labels" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + ts.asfreq("B") + + +def test_resample_axis1(): + rng = date_range("1/1/2000", "2/29/2000") + df = DataFrame(np.random.randn(3, len(rng)), columns=rng, index=["a", "b", "c"]) + + result = df.resample("M", axis=1).mean() + expected = df.T.resample("M").mean().T + tm.assert_frame_equal(result, expected) + + +def test_resample_anchored_ticks(): + # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should + # "anchor" the origin at midnight so we get regular intervals rather + # than starting from the first timestamp which might start in the + # middle of a desired interval + + rng = date_range("1/1/2000 04:00:00", periods=86400, freq="s") + ts = Series(np.random.randn(len(rng)), index=rng) + ts[:2] = np.nan # so results are the same + + freqs = ["t", "5t", "15t", "30t", "4h", "12h"] + for freq in freqs: + result = ts[2:].resample(freq, closed="left", label="left").mean() + expected = ts.resample(freq, closed="left", label="left").mean() + tm.assert_series_equal(result, expected) + + +def test_resample_single_group(): + mysum = lambda x: x.sum() + + rng = date_range("2000-1-1", "2000-2-10", freq="D") + ts = Series(np.random.randn(len(rng)), index=rng) + tm.assert_series_equal(ts.resample("M").sum(), ts.resample("M").apply(mysum)) + + rng = date_range("2000-1-1", "2000-1-10", freq="D") + ts = Series(np.random.randn(len(rng)), index=rng) + tm.assert_series_equal(ts.resample("M").sum(), ts.resample("M").apply(mysum)) + + # GH 3849 + s = Series( + [30.1, 31.6], + index=[Timestamp("20070915 15:30:00"), Timestamp("20070915 15:40:00")], + ) + expected = Series([0.75], index=DatetimeIndex([Timestamp("20070915")], freq="D")) + result = s.resample("D").apply(lambda x: np.std(x)) + tm.assert_series_equal(result, expected) + + +def test_resample_offset(): + # GH 31809 + + rng = date_range("1/1/2000 00:00:00", "1/1/2000 02:00", freq="s") + ts = Series(np.random.randn(len(rng)), index=rng) + + resampled = ts.resample("5min", offset="2min").mean() + exp_rng = date_range("12/31/1999 23:57:00", "1/1/2000 01:57", freq="5min") + tm.assert_index_equal(resampled.index, exp_rng) + + +def test_resample_origin(): + # GH 31809 + rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s") + ts = Series(np.random.randn(len(rng)), index=rng) + + exp_rng = date_range("1999-12-31 23:57:00", "2000-01-01 01:57", freq="5min") + + resampled = ts.resample("5min", origin="1999-12-31 23:57:00").mean() + tm.assert_index_equal(resampled.index, exp_rng) + + offset_timestamp = Timestamp(0) + Timedelta("2min") + resampled = ts.resample("5min", origin=offset_timestamp).mean() + tm.assert_index_equal(resampled.index, exp_rng) + + resampled = ts.resample("5min", origin="epoch", offset="2m").mean() + tm.assert_index_equal(resampled.index, exp_rng) + + # origin of '1999-31-12 12:02:00' should be equivalent for this case + resampled = ts.resample("5min", origin="1999-12-31 12:02:00").mean() + tm.assert_index_equal(resampled.index, exp_rng) + + resampled = ts.resample("5min", offset="-3m").mean() + tm.assert_index_equal(resampled.index, exp_rng) + + +@pytest.mark.parametrize( + "origin", ["invalid_value", "epch", "startday", "startt", "2000-30-30", object()] +) +def test_resample_bad_origin(origin): + rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s") + ts = Series(np.random.randn(len(rng)), index=rng) + msg = ( + "'origin' should be equal to 'epoch', 'start', 'start_day', " + "'end', 'end_day' or should be a Timestamp convertible type. Got " + f"'{origin}' instead." + ) + with pytest.raises(ValueError, match=msg): + ts.resample("5min", origin=origin) + + +@pytest.mark.parametrize("offset", ["invalid_value", "12dayys", "2000-30-30", object()]) +def test_resample_bad_offset(offset): + rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s") + ts = Series(np.random.randn(len(rng)), index=rng) + msg = f"'offset' should be a Timedelta convertible type. Got '{offset}' instead." + with pytest.raises(ValueError, match=msg): + ts.resample("5min", offset=offset) + + +def test_resample_origin_prime_freq(): + # GH 31809 + start, end = "2000-10-01 23:30:00", "2000-10-02 00:30:00" + rng = date_range(start, end, freq="7min") + ts = Series(np.random.randn(len(rng)), index=rng) + + exp_rng = date_range("2000-10-01 23:14:00", "2000-10-02 00:22:00", freq="17min") + resampled = ts.resample("17min").mean() + tm.assert_index_equal(resampled.index, exp_rng) + resampled = ts.resample("17min", origin="start_day").mean() + tm.assert_index_equal(resampled.index, exp_rng) + + exp_rng = date_range("2000-10-01 23:30:00", "2000-10-02 00:21:00", freq="17min") + resampled = ts.resample("17min", origin="start").mean() + tm.assert_index_equal(resampled.index, exp_rng) + resampled = ts.resample("17min", offset="23h30min").mean() + tm.assert_index_equal(resampled.index, exp_rng) + resampled = ts.resample("17min", origin="start_day", offset="23h30min").mean() + tm.assert_index_equal(resampled.index, exp_rng) + + exp_rng = date_range("2000-10-01 23:18:00", "2000-10-02 00:26:00", freq="17min") + resampled = ts.resample("17min", origin="epoch").mean() + tm.assert_index_equal(resampled.index, exp_rng) + + exp_rng = date_range("2000-10-01 23:24:00", "2000-10-02 00:15:00", freq="17min") + resampled = ts.resample("17min", origin="2000-01-01").mean() + tm.assert_index_equal(resampled.index, exp_rng) + + +def test_resample_origin_with_tz(): + # GH 31809 + msg = "The origin must have the same timezone as the index." + + tz = "Europe/Paris" + rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s", tz=tz) + ts = Series(np.random.randn(len(rng)), index=rng) + + exp_rng = date_range("1999-12-31 23:57:00", "2000-01-01 01:57", freq="5min", tz=tz) + resampled = ts.resample("5min", origin="1999-12-31 23:57:00+00:00").mean() + tm.assert_index_equal(resampled.index, exp_rng) + + # origin of '1999-31-12 12:02:00+03:00' should be equivalent for this case + resampled = ts.resample("5min", origin="1999-12-31 12:02:00+03:00").mean() + tm.assert_index_equal(resampled.index, exp_rng) + + resampled = ts.resample("5min", origin="epoch", offset="2m").mean() + tm.assert_index_equal(resampled.index, exp_rng) + + with pytest.raises(ValueError, match=msg): + ts.resample("5min", origin="12/31/1999 23:57:00").mean() + + # if the series is not tz aware, origin should not be tz aware + rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s") + ts = Series(np.random.randn(len(rng)), index=rng) + with pytest.raises(ValueError, match=msg): + ts.resample("5min", origin="12/31/1999 23:57:00+03:00").mean() + + +def test_resample_origin_epoch_with_tz_day_vs_24h(): + # GH 34474 + start, end = "2000-10-01 23:30:00+0500", "2000-12-02 00:30:00+0500" + rng = date_range(start, end, freq="7min") + random_values = np.random.randn(len(rng)) + ts_1 = Series(random_values, index=rng) + + result_1 = ts_1.resample("D", origin="epoch").mean() + result_2 = ts_1.resample("24H", origin="epoch").mean() + tm.assert_series_equal(result_1, result_2) + + # check that we have the same behavior with epoch even if we are not timezone aware + ts_no_tz = ts_1.tz_localize(None) + result_3 = ts_no_tz.resample("D", origin="epoch").mean() + result_4 = ts_no_tz.resample("24H", origin="epoch").mean() + tm.assert_series_equal(result_1, result_3.tz_localize(rng.tz), check_freq=False) + tm.assert_series_equal(result_1, result_4.tz_localize(rng.tz), check_freq=False) + + # check that we have the similar results with two different timezones (+2H and +5H) + start, end = "2000-10-01 23:30:00+0200", "2000-12-02 00:30:00+0200" + rng = date_range(start, end, freq="7min") + ts_2 = Series(random_values, index=rng) + result_5 = ts_2.resample("D", origin="epoch").mean() + result_6 = ts_2.resample("24H", origin="epoch").mean() + tm.assert_series_equal(result_1.tz_localize(None), result_5.tz_localize(None)) + tm.assert_series_equal(result_1.tz_localize(None), result_6.tz_localize(None)) + + +def test_resample_origin_with_day_freq_on_dst(): + # GH 31809 + tz = "America/Chicago" + + def _create_series(values, timestamps, freq="D"): + return Series( + values, + index=DatetimeIndex( + [Timestamp(t, tz=tz) for t in timestamps], freq=freq, ambiguous=True + ), + ) + + # test classical behavior of origin in a DST context + start = Timestamp("2013-11-02", tz=tz) + end = Timestamp("2013-11-03 23:59", tz=tz) + rng = date_range(start, end, freq="1h") + ts = Series(np.ones(len(rng)), index=rng) + + expected = _create_series([24.0, 25.0], ["2013-11-02", "2013-11-03"]) + for origin in ["epoch", "start", "start_day", start, None]: + result = ts.resample("D", origin=origin).sum() + tm.assert_series_equal(result, expected) + + # test complex behavior of origin/offset in a DST context + start = Timestamp("2013-11-03", tz=tz) + end = Timestamp("2013-11-03 23:59", tz=tz) + rng = date_range(start, end, freq="1h") + ts = Series(np.ones(len(rng)), index=rng) + + expected_ts = ["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"] + expected = _create_series([23.0, 2.0], expected_ts) + result = ts.resample("D", origin="start", offset="-2H").sum() + tm.assert_series_equal(result, expected) + + expected_ts = ["2013-11-02 22:00-05:00", "2013-11-03 21:00-06:00"] + expected = _create_series([22.0, 3.0], expected_ts, freq="24H") + result = ts.resample("24H", origin="start", offset="-2H").sum() + tm.assert_series_equal(result, expected) + + expected_ts = ["2013-11-02 02:00-05:00", "2013-11-03 02:00-06:00"] + expected = _create_series([3.0, 22.0], expected_ts) + result = ts.resample("D", origin="start", offset="2H").sum() + tm.assert_series_equal(result, expected) + + expected_ts = ["2013-11-02 23:00-05:00", "2013-11-03 23:00-06:00"] + expected = _create_series([24.0, 1.0], expected_ts) + result = ts.resample("D", origin="start", offset="-1H").sum() + tm.assert_series_equal(result, expected) + + expected_ts = ["2013-11-02 01:00-05:00", "2013-11-03 01:00:00-0500"] + expected = _create_series([1.0, 24.0], expected_ts) + result = ts.resample("D", origin="start", offset="1H").sum() + tm.assert_series_equal(result, expected) + + +def test_resample_daily_anchored(): + rng = date_range("1/1/2000 0:00:00", periods=10000, freq="T") + ts = Series(np.random.randn(len(rng)), index=rng) + ts[:2] = np.nan # so results are the same + + result = ts[2:].resample("D", closed="left", label="left").mean() + expected = ts.resample("D", closed="left", label="left").mean() + tm.assert_series_equal(result, expected) + + +def test_resample_to_period_monthly_buglet(): + # GH #1259 + + rng = date_range("1/1/2000", "12/31/2000") + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample("M", kind="period").mean() + exp_index = period_range("Jan-2000", "Dec-2000", freq="M") + tm.assert_index_equal(result.index, exp_index) + + +def test_period_with_agg(): + + # aggregate a period resampler with a lambda + s2 = Series( + np.random.randint(0, 5, 50), + index=period_range("2012-01-01", freq="H", periods=50), + dtype="float64", + ) + + expected = s2.to_timestamp().resample("D").mean().to_period() + result = s2.resample("D").agg(lambda x: x.mean()) + tm.assert_series_equal(result, expected) + + +def test_resample_segfault(): + # GH 8573 + # segfaulting in older versions + all_wins_and_wagers = [ + (1, datetime(2013, 10, 1, 16, 20), 1, 0), + (2, datetime(2013, 10, 1, 16, 10), 1, 0), + (2, datetime(2013, 10, 1, 18, 15), 1, 0), + (2, datetime(2013, 10, 1, 16, 10, 31), 1, 0), + ] + + df = DataFrame.from_records( + all_wins_and_wagers, columns=("ID", "timestamp", "A", "B") + ).set_index("timestamp") + result = df.groupby("ID").resample("5min").sum() + expected = df.groupby("ID").apply(lambda x: x.resample("5min").sum()) + tm.assert_frame_equal(result, expected) + + +def test_resample_dtype_preservation(): + + # GH 12202 + # validation tests for dtype preservation + + df = DataFrame( + { + "date": date_range(start="2016-01-01", periods=4, freq="W"), + "group": [1, 1, 2, 2], + "val": Series([5, 6, 7, 8], dtype="int32"), + } + ).set_index("date") + + result = df.resample("1D").ffill() + assert result.val.dtype == np.int32 + + result = df.groupby("group").resample("1D").ffill() + assert result.val.dtype == np.int32 + + +def test_resample_dtype_coercion(): + + pytest.importorskip("scipy.interpolate") + + # GH 16361 + df = {"a": [1, 3, 1, 4]} + df = DataFrame(df, index=date_range("2017-01-01", "2017-01-04")) + + expected = df.astype("float64").resample("H").mean()["a"].interpolate("cubic") + + result = df.resample("H")["a"].mean().interpolate("cubic") + tm.assert_series_equal(result, expected) + + result = df.resample("H").mean()["a"].interpolate("cubic") + tm.assert_series_equal(result, expected) + + +def test_weekly_resample_buglet(): + # #1327 + rng = date_range("1/1/2000", freq="B", periods=20) + ts = Series(np.random.randn(len(rng)), index=rng) + + resampled = ts.resample("W").mean() + expected = ts.resample("W-SUN").mean() + tm.assert_series_equal(resampled, expected) + + +def test_monthly_resample_error(): + # #1451 + dates = date_range("4/16/2012 20:00", periods=5000, freq="h") + ts = Series(np.random.randn(len(dates)), index=dates) + # it works! + ts.resample("M") + + +def test_nanosecond_resample_error(): + # GH 12307 - Values falls after last bin when + # Resampling using pd.tseries.offsets.Nano as period + start = 1443707890427 + exp_start = 1443707890400 + indx = date_range(start=pd.to_datetime(start), periods=10, freq="100n") + ts = Series(range(len(indx)), index=indx) + r = ts.resample(pd.tseries.offsets.Nano(100)) + result = r.agg("mean") + + exp_indx = date_range(start=pd.to_datetime(exp_start), periods=10, freq="100n") + exp = Series(range(len(exp_indx)), index=exp_indx, dtype=float) + + tm.assert_series_equal(result, exp) + + +def test_resample_anchored_intraday(simple_date_range_series): + # #1471, #1458 + + rng = date_range("1/1/2012", "4/1/2012", freq="100min") + df = DataFrame(rng.month, index=rng) + + result = df.resample("M").mean() + expected = df.resample("M", kind="period").mean().to_timestamp(how="end") + expected.index += Timedelta(1, "ns") - Timedelta(1, "D") + expected.index = expected.index._with_freq("infer") + assert expected.index.freq == "M" + tm.assert_frame_equal(result, expected) + + result = df.resample("M", closed="left").mean() + exp = df.shift(1, freq="D").resample("M", kind="period").mean() + exp = exp.to_timestamp(how="end") + + exp.index = exp.index + Timedelta(1, "ns") - Timedelta(1, "D") + exp.index = exp.index._with_freq("infer") + assert exp.index.freq == "M" + tm.assert_frame_equal(result, exp) + + rng = date_range("1/1/2012", "4/1/2012", freq="100min") + df = DataFrame(rng.month, index=rng) + + result = df.resample("Q").mean() + expected = df.resample("Q", kind="period").mean().to_timestamp(how="end") + expected.index += Timedelta(1, "ns") - Timedelta(1, "D") + expected.index._data.freq = "Q" + expected.index._freq = lib.no_default + tm.assert_frame_equal(result, expected) + + result = df.resample("Q", closed="left").mean() + expected = df.shift(1, freq="D").resample("Q", kind="period", closed="left").mean() + expected = expected.to_timestamp(how="end") + expected.index += Timedelta(1, "ns") - Timedelta(1, "D") + expected.index._data.freq = "Q" + expected.index._freq = lib.no_default + tm.assert_frame_equal(result, expected) + + ts = simple_date_range_series("2012-04-29 23:00", "2012-04-30 5:00", freq="h") + resampled = ts.resample("M").mean() + assert len(resampled) == 1 + + +def test_resample_anchored_monthstart(simple_date_range_series): + ts = simple_date_range_series("1/1/2000", "12/31/2002") + + freqs = ["MS", "BMS", "QS-MAR", "AS-DEC", "AS-JUN"] + + for freq in freqs: + ts.resample(freq).mean() + + +def test_resample_anchored_multiday(): + # When resampling a range spanning multiple days, ensure that the + # start date gets used to determine the offset. Fixes issue where + # a one day period is not a multiple of the frequency. + # + # See: https://github.com/pandas-dev/pandas/issues/8683 + + index1 = date_range("2014-10-14 23:06:23.206", periods=3, freq="400L") + index2 = date_range("2014-10-15 23:00:00", periods=2, freq="2200L") + index = index1.union(index2) + + s = Series(np.random.randn(5), index=index) + + # Ensure left closing works + result = s.resample("2200L").mean() + assert result.index[-1] == Timestamp("2014-10-15 23:00:02.000") + + # Ensure right closing works + result = s.resample("2200L", label="right").mean() + assert result.index[-1] == Timestamp("2014-10-15 23:00:04.200") + + +def test_corner_cases(simple_period_range_series, simple_date_range_series): + # miscellaneous test coverage + + rng = date_range("1/1/2000", periods=12, freq="t") + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample("5t", closed="right", label="left").mean() + ex_index = date_range("1999-12-31 23:55", periods=4, freq="5t") + tm.assert_index_equal(result.index, ex_index) + + len0pts = simple_period_range_series("2007-01", "2010-05", freq="M")[:0] + # it works + result = len0pts.resample("A-DEC").mean() + assert len(result) == 0 + + # resample to periods + ts = simple_date_range_series("2000-04-28", "2000-04-30 11:00", freq="h") + result = ts.resample("M", kind="period").mean() + assert len(result) == 1 + assert result.index[0] == Period("2000-04", freq="M") + + +def test_anchored_lowercase_buglet(): + dates = date_range("4/16/2012 20:00", periods=50000, freq="s") + ts = Series(np.random.randn(len(dates)), index=dates) + # it works! + ts.resample("d").mean() + + +def test_upsample_apply_functions(): + # #1596 + rng = date_range("2012-06-12", periods=4, freq="h") + + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample("20min").aggregate(["mean", "sum"]) + assert isinstance(result, DataFrame) + + +def test_resample_not_monotonic(): + rng = date_range("2012-06-12", periods=200, freq="h") + ts = Series(np.random.randn(len(rng)), index=rng) + + ts = ts.take(np.random.permutation(len(ts))) + + result = ts.resample("D").sum() + exp = ts.sort_index().resample("D").sum() + tm.assert_series_equal(result, exp) + + +def test_resample_median_bug_1688(): + + for dtype in ["int64", "int32", "float64", "float32"]: + df = DataFrame( + [1, 2], + index=[datetime(2012, 1, 1, 0, 0, 0), datetime(2012, 1, 1, 0, 5, 0)], + dtype=dtype, + ) + + result = df.resample("T").apply(lambda x: x.mean()) + exp = df.asfreq("T") + if dtype == "float32": + # TODO: Empty groups cause x.mean() to return float64 + exp = exp.astype("float64") + tm.assert_frame_equal(result, exp) + + result = df.resample("T").median() + exp = df.asfreq("T") + tm.assert_frame_equal(result, exp) + + +def test_how_lambda_functions(simple_date_range_series): + + ts = simple_date_range_series("1/1/2000", "4/1/2000") + + result = ts.resample("M").apply(lambda x: x.mean()) + exp = ts.resample("M").mean() + tm.assert_series_equal(result, exp) + + foo_exp = ts.resample("M").mean() + foo_exp.name = "foo" + bar_exp = ts.resample("M").std() + bar_exp.name = "bar" + + result = ts.resample("M").apply([lambda x: x.mean(), lambda x: x.std(ddof=1)]) + result.columns = ["foo", "bar"] + tm.assert_series_equal(result["foo"], foo_exp) + tm.assert_series_equal(result["bar"], bar_exp) + + # this is a MI Series, so comparing the names of the results + # doesn't make sense + result = ts.resample("M").aggregate( + {"foo": lambda x: x.mean(), "bar": lambda x: x.std(ddof=1)} + ) + tm.assert_series_equal(result["foo"], foo_exp, check_names=False) + tm.assert_series_equal(result["bar"], bar_exp, check_names=False) + + +def test_resample_unequal_times(): + # #1772 + start = datetime(1999, 3, 1, 5) + # end hour is less than start + end = datetime(2012, 7, 31, 4) + bad_ind = date_range(start, end, freq="30min") + df = DataFrame({"close": 1}, index=bad_ind) + + # it works! + df.resample("AS").sum() + + +def test_resample_consistency(): + + # GH 6418 + # resample with bfill / limit / reindex consistency + + i30 = date_range("2002-02-02", periods=4, freq="30T") + s = Series(np.arange(4.0), index=i30) + s[2] = np.NaN + + # Upsample by factor 3 with reindex() and resample() methods: + i10 = date_range(i30[0], i30[-1], freq="10T") + + s10 = s.reindex(index=i10, method="bfill") + s10_2 = s.reindex(index=i10, method="bfill", limit=2) + rl = s.reindex_like(s10, method="bfill", limit=2) + r10_2 = s.resample("10Min").bfill(limit=2) + r10 = s.resample("10Min").bfill() + + # s10_2, r10, r10_2, rl should all be equal + tm.assert_series_equal(s10_2, r10) + tm.assert_series_equal(s10_2, r10_2) + tm.assert_series_equal(s10_2, rl) + + +def test_resample_timegrouper(): + # GH 7227 + dates1 = [ + datetime(2014, 10, 1), + datetime(2014, 9, 3), + datetime(2014, 11, 5), + datetime(2014, 9, 5), + datetime(2014, 10, 8), + datetime(2014, 7, 15), + ] + + dates2 = dates1[:2] + [pd.NaT] + dates1[2:4] + [pd.NaT] + dates1[4:] + dates3 = [pd.NaT] + dates1 + [pd.NaT] + + for dates in [dates1, dates2, dates3]: + df = DataFrame({"A": dates, "B": np.arange(len(dates))}) + result = df.set_index("A").resample("M").count() + exp_idx = DatetimeIndex( + ["2014-07-31", "2014-08-31", "2014-09-30", "2014-10-31", "2014-11-30"], + freq="M", + name="A", + ) + expected = DataFrame({"B": [1, 0, 2, 2, 1]}, index=exp_idx) + if df["A"].isna().any(): + expected.index = expected.index._with_freq(None) + tm.assert_frame_equal(result, expected) + + result = df.groupby(Grouper(freq="M", key="A")).count() + tm.assert_frame_equal(result, expected) + + df = DataFrame( + {"A": dates, "B": np.arange(len(dates)), "C": np.arange(len(dates))} + ) + result = df.set_index("A").resample("M").count() + expected = DataFrame( + {"B": [1, 0, 2, 2, 1], "C": [1, 0, 2, 2, 1]}, + index=exp_idx, + columns=["B", "C"], + ) + if df["A"].isna().any(): + expected.index = expected.index._with_freq(None) + tm.assert_frame_equal(result, expected) + + result = df.groupby(Grouper(freq="M", key="A")).count() + tm.assert_frame_equal(result, expected) + + +def test_resample_nunique(): + + # GH 12352 + df = DataFrame( + { + "ID": { + Timestamp("2015-06-05 00:00:00"): "0010100903", + Timestamp("2015-06-08 00:00:00"): "0010150847", + }, + "DATE": { + Timestamp("2015-06-05 00:00:00"): "2015-06-05", + Timestamp("2015-06-08 00:00:00"): "2015-06-08", + }, + } + ) + r = df.resample("D") + g = df.groupby(Grouper(freq="D")) + expected = df.groupby(Grouper(freq="D")).ID.apply(lambda x: x.nunique()) + assert expected.name == "ID" + + for t in [r, g]: + result = t.ID.nunique() + tm.assert_series_equal(result, expected) + + result = df.ID.resample("D").nunique() + tm.assert_series_equal(result, expected) + + result = df.ID.groupby(Grouper(freq="D")).nunique() + tm.assert_series_equal(result, expected) + + +def test_resample_nunique_preserves_column_level_names(): + # see gh-23222 + df = tm.makeTimeDataFrame(freq="1D").abs() + df.columns = pd.MultiIndex.from_arrays( + [df.columns.tolist()] * 2, names=["lev0", "lev1"] + ) + result = df.resample("1h").nunique() + tm.assert_index_equal(df.columns, result.columns) + + +def test_resample_nunique_with_date_gap(): + # GH 13453 + index = date_range("1-1-2000", "2-15-2000", freq="h") + index2 = date_range("4-15-2000", "5-15-2000", freq="h") + index3 = index.append(index2) + s = Series(range(len(index3)), index=index3, dtype="int64") + r = s.resample("M") + + # Since all elements are unique, these should all be the same + results = [r.count(), r.nunique(), r.agg(Series.nunique), r.agg("nunique")] + + tm.assert_series_equal(results[0], results[1]) + tm.assert_series_equal(results[0], results[2]) + tm.assert_series_equal(results[0], results[3]) + + +@pytest.mark.parametrize("n", [10000, 100000]) +@pytest.mark.parametrize("k", [10, 100, 1000]) +def test_resample_group_info(n, k): + # GH10914 + + # use a fixed seed to always have the same uniques + prng = np.random.RandomState(1234) + + dr = date_range(start="2015-08-27", periods=n // 10, freq="T") + ts = Series(prng.randint(0, n // k, n).astype("int64"), index=prng.choice(dr, n)) + + left = ts.resample("30T").nunique() + ix = date_range(start=ts.index.min(), end=ts.index.max(), freq="30T") + + vals = ts.values + bins = np.searchsorted(ix.values, ts.index, side="right") + + sorter = np.lexsort((vals, bins)) + vals, bins = vals[sorter], bins[sorter] + + mask = np.r_[True, vals[1:] != vals[:-1]] + mask |= np.r_[True, bins[1:] != bins[:-1]] + + arr = np.bincount(bins[mask] - 1, minlength=len(ix)).astype("int64", copy=False) + right = Series(arr, index=ix) + + tm.assert_series_equal(left, right) + + +def test_resample_size(): + n = 10000 + dr = date_range("2015-09-19", periods=n, freq="T") + ts = Series(np.random.randn(n), index=np.random.choice(dr, n)) + + left = ts.resample("7T").size() + ix = date_range(start=left.index.min(), end=ts.index.max(), freq="7T") + + bins = np.searchsorted(ix.values, ts.index.values, side="right") + val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype("int64", copy=False) + + right = Series(val, index=ix) + tm.assert_series_equal(left, right) + + +def test_resample_across_dst(): + # The test resamples a DatetimeIndex with values before and after a + # DST change + # Issue: 14682 + + # The DatetimeIndex we will start with + # (note that DST happens at 03:00+02:00 -> 02:00+01:00) + # 2016-10-30 02:23:00+02:00, 2016-10-30 02:23:00+01:00 + df1 = DataFrame([1477786980, 1477790580], columns=["ts"]) + dti1 = DatetimeIndex( + pd.to_datetime(df1.ts, unit="s") + .dt.tz_localize("UTC") + .dt.tz_convert("Europe/Madrid") + ) + + # The expected DatetimeIndex after resampling. + # 2016-10-30 02:00:00+02:00, 2016-10-30 02:00:00+01:00 + df2 = DataFrame([1477785600, 1477789200], columns=["ts"]) + dti2 = DatetimeIndex( + pd.to_datetime(df2.ts, unit="s") + .dt.tz_localize("UTC") + .dt.tz_convert("Europe/Madrid"), + freq="H", + ) + df = DataFrame([5, 5], index=dti1) + + result = df.resample(rule="H").sum() + expected = DataFrame([5, 5], index=dti2) + + tm.assert_frame_equal(result, expected) + + +def test_groupby_with_dst_time_change(): + # GH 24972 + index = DatetimeIndex( + [1478064900001000000, 1480037118776792000], tz="UTC" + ).tz_convert("America/Chicago") + + df = DataFrame([1, 2], index=index) + result = df.groupby(Grouper(freq="1d")).last() + expected_index_values = date_range( + "2016-11-02", "2016-11-24", freq="d", tz="America/Chicago" + ) + + index = DatetimeIndex(expected_index_values) + expected = DataFrame([1.0] + ([np.nan] * 21) + [2.0], index=index) + tm.assert_frame_equal(result, expected) + + +def test_resample_dst_anchor(): + # 5172 + dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz="US/Eastern") + df = DataFrame([5], index=dti) + + dti = DatetimeIndex(df.index.normalize(), freq="D") + expected = DataFrame([5], index=dti) + tm.assert_frame_equal(df.resample(rule="D").sum(), expected) + df.resample(rule="MS").sum() + tm.assert_frame_equal( + df.resample(rule="MS").sum(), + DataFrame( + [5], + index=DatetimeIndex([datetime(2012, 11, 1)], tz="US/Eastern", freq="MS"), + ), + ) + + dti = date_range("2013-09-30", "2013-11-02", freq="30Min", tz="Europe/Paris") + values = range(dti.size) + df = DataFrame({"a": values, "b": values, "c": values}, index=dti, dtype="int64") + how = {"a": "min", "b": "max", "c": "count"} + + tm.assert_frame_equal( + df.resample("W-MON").agg(how)[["a", "b", "c"]], + DataFrame( + { + "a": [0, 48, 384, 720, 1056, 1394], + "b": [47, 383, 719, 1055, 1393, 1586], + "c": [48, 336, 336, 336, 338, 193], + }, + index=date_range("9/30/2013", "11/4/2013", freq="W-MON", tz="Europe/Paris"), + ), + "W-MON Frequency", + ) + + tm.assert_frame_equal( + df.resample("2W-MON").agg(how)[["a", "b", "c"]], + DataFrame( + { + "a": [0, 48, 720, 1394], + "b": [47, 719, 1393, 1586], + "c": [48, 672, 674, 193], + }, + index=date_range( + "9/30/2013", "11/11/2013", freq="2W-MON", tz="Europe/Paris" + ), + ), + "2W-MON Frequency", + ) + + tm.assert_frame_equal( + df.resample("MS").agg(how)[["a", "b", "c"]], + DataFrame( + {"a": [0, 48, 1538], "b": [47, 1537, 1586], "c": [48, 1490, 49]}, + index=date_range("9/1/2013", "11/1/2013", freq="MS", tz="Europe/Paris"), + ), + "MS Frequency", + ) + + tm.assert_frame_equal( + df.resample("2MS").agg(how)[["a", "b", "c"]], + DataFrame( + {"a": [0, 1538], "b": [1537, 1586], "c": [1538, 49]}, + index=date_range("9/1/2013", "11/1/2013", freq="2MS", tz="Europe/Paris"), + ), + "2MS Frequency", + ) + + df_daily = df["10/26/2013":"10/29/2013"] + tm.assert_frame_equal( + df_daily.resample("D").agg({"a": "min", "b": "max", "c": "count"})[ + ["a", "b", "c"] + ], + DataFrame( + { + "a": [1248, 1296, 1346, 1394], + "b": [1295, 1345, 1393, 1441], + "c": [48, 50, 48, 48], + }, + index=date_range("10/26/2013", "10/29/2013", freq="D", tz="Europe/Paris"), + ), + "D Frequency", + ) + + +def test_downsample_across_dst(): + # GH 8531 + tz = pytz.timezone("Europe/Berlin") + dt = datetime(2014, 10, 26) + dates = date_range(tz.localize(dt), periods=4, freq="2H") + result = Series(5, index=dates).resample("H").mean() + expected = Series( + [5.0, np.nan] * 3 + [5.0], + index=date_range(tz.localize(dt), periods=7, freq="H"), + ) + tm.assert_series_equal(result, expected) + + +def test_downsample_across_dst_weekly(): + # GH 9119, GH 21459 + df = DataFrame( + index=DatetimeIndex( + ["2017-03-25", "2017-03-26", "2017-03-27", "2017-03-28", "2017-03-29"], + tz="Europe/Amsterdam", + ), + data=[11, 12, 13, 14, 15], + ) + result = df.resample("1W").sum() + expected = DataFrame( + [23, 42], + index=DatetimeIndex( + ["2017-03-26", "2017-04-02"], tz="Europe/Amsterdam", freq="W" + ), + ) + tm.assert_frame_equal(result, expected) + + idx = date_range("2013-04-01", "2013-05-01", tz="Europe/London", freq="H") + s = Series(index=idx, dtype=np.float64) + result = s.resample("W").mean() + expected = Series( + index=date_range("2013-04-07", freq="W", periods=5, tz="Europe/London"), + dtype=np.float64, + ) + tm.assert_series_equal(result, expected) + + +def test_downsample_dst_at_midnight(): + # GH 25758 + start = datetime(2018, 11, 3, 12) + end = datetime(2018, 11, 5, 12) + index = date_range(start, end, freq="1H") + index = index.tz_localize("UTC").tz_convert("America/Havana") + data = list(range(len(index))) + dataframe = DataFrame(data, index=index) + result = dataframe.groupby(Grouper(freq="1D")).mean() + + dti = date_range("2018-11-03", periods=3).tz_localize( + "America/Havana", ambiguous=True + ) + dti = DatetimeIndex(dti, freq="D") + expected = DataFrame([7.5, 28.0, 44.5], index=dti) + tm.assert_frame_equal(result, expected) + + +def test_resample_with_nat(): + # GH 13020 + index = DatetimeIndex( + [ + pd.NaT, + "1970-01-01 00:00:00", + pd.NaT, + "1970-01-01 00:00:01", + "1970-01-01 00:00:02", + ] + ) + frame = DataFrame([2, 3, 5, 7, 11], index=index) + + index_1s = DatetimeIndex( + ["1970-01-01 00:00:00", "1970-01-01 00:00:01", "1970-01-01 00:00:02"] + ) + frame_1s = DataFrame([3.0, 7.0, 11.0], index=index_1s) + tm.assert_frame_equal(frame.resample("1s").mean(), frame_1s) + + index_2s = DatetimeIndex(["1970-01-01 00:00:00", "1970-01-01 00:00:02"]) + frame_2s = DataFrame([5.0, 11.0], index=index_2s) + tm.assert_frame_equal(frame.resample("2s").mean(), frame_2s) + + index_3s = DatetimeIndex(["1970-01-01 00:00:00"]) + frame_3s = DataFrame([7.0], index=index_3s) + tm.assert_frame_equal(frame.resample("3s").mean(), frame_3s) + + tm.assert_frame_equal(frame.resample("60s").mean(), frame_3s) + + +def test_resample_datetime_values(): + # GH 13119 + # check that datetime dtype is preserved when NaT values are + # introduced by the resampling + + dates = [datetime(2016, 1, 15), datetime(2016, 1, 19)] + df = DataFrame({"timestamp": dates}, index=dates) + + exp = Series( + [datetime(2016, 1, 15), pd.NaT, datetime(2016, 1, 19)], + index=date_range("2016-01-15", periods=3, freq="2D"), + name="timestamp", + ) + + res = df.resample("2D").first()["timestamp"] + tm.assert_series_equal(res, exp) + res = df["timestamp"].resample("2D").first() + tm.assert_series_equal(res, exp) + + +def test_resample_apply_with_additional_args(series): + # GH 14615 + def f(data, add_arg): + return np.mean(data) * add_arg + + multiplier = 10 + result = series.resample("D").apply(f, multiplier) + expected = series.resample("D").mean().multiply(multiplier) + tm.assert_series_equal(result, expected) + + # Testing as kwarg + result = series.resample("D").apply(f, add_arg=multiplier) + expected = series.resample("D").mean().multiply(multiplier) + tm.assert_series_equal(result, expected) + + # Testing dataframe + df = DataFrame({"A": 1, "B": 2}, index=date_range("2017", periods=10)) + result = df.groupby("A").resample("D").agg(f, multiplier).astype(float) + expected = df.groupby("A").resample("D").mean().multiply(multiplier) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("k", [1, 2, 3]) +@pytest.mark.parametrize( + "n1, freq1, n2, freq2", + [ + (30, "S", 0.5, "Min"), + (60, "S", 1, "Min"), + (3600, "S", 1, "H"), + (60, "Min", 1, "H"), + (21600, "S", 0.25, "D"), + (86400, "S", 1, "D"), + (43200, "S", 0.5, "D"), + (1440, "Min", 1, "D"), + (12, "H", 0.5, "D"), + (24, "H", 1, "D"), + ], +) +def test_resample_equivalent_offsets(n1, freq1, n2, freq2, k): + # GH 24127 + n1_ = n1 * k + n2_ = n2 * k + s = Series(0, index=date_range("19910905 13:00", "19911005 07:00", freq=freq1)) + s = s + range(len(s)) + + result1 = s.resample(str(n1_) + freq1).mean() + result2 = s.resample(str(n2_) + freq2).mean() + tm.assert_series_equal(result1, result2) + + +@pytest.mark.parametrize( + "first,last,freq,exp_first,exp_last", + [ + ("19910905", "19920406", "D", "19910905", "19920407"), + ("19910905 00:00", "19920406 06:00", "D", "19910905", "19920407"), + ("19910905 06:00", "19920406 06:00", "H", "19910905 06:00", "19920406 07:00"), + ("19910906", "19920406", "M", "19910831", "19920430"), + ("19910831", "19920430", "M", "19910831", "19920531"), + ("1991-08", "1992-04", "M", "19910831", "19920531"), + ], +) +def test_get_timestamp_range_edges(first, last, freq, exp_first, exp_last): + first = Period(first) + first = first.to_timestamp(first.freq) + last = Period(last) + last = last.to_timestamp(last.freq) + + exp_first = Timestamp(exp_first) + exp_last = Timestamp(exp_last) + + freq = pd.tseries.frequencies.to_offset(freq) + result = _get_timestamp_range_edges(first, last, freq) + expected = (exp_first, exp_last) + assert result == expected + + +@pytest.mark.parametrize("duplicates", [True, False]) +def test_resample_apply_product(duplicates): + # GH 5586 + index = date_range(start="2012-01-31", freq="M", periods=12) + + ts = Series(range(12), index=index) + df = DataFrame({"A": ts, "B": ts + 2}) + if duplicates: + df.columns = ["A", "A"] + + result = df.resample("Q").apply(np.product) + expected = DataFrame( + np.array([[0, 24], [60, 210], [336, 720], [990, 1716]], dtype=np.int64), + index=DatetimeIndex( + ["2012-03-31", "2012-06-30", "2012-09-30", "2012-12-31"], freq="Q-DEC" + ), + columns=df.columns, + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "first,last,freq_in,freq_out,exp_last", + [ + ( + "2020-03-28", + "2020-03-31", + "D", + "24H", + "2020-03-30 01:00", + ), # includes transition into DST + ( + "2020-03-28", + "2020-10-27", + "D", + "24H", + "2020-10-27 00:00", + ), # includes transition into and out of DST + ( + "2020-10-25", + "2020-10-27", + "D", + "24H", + "2020-10-26 23:00", + ), # includes transition out of DST + ( + "2020-03-28", + "2020-03-31", + "24H", + "D", + "2020-03-30 00:00", + ), # same as above, but from 24H to D + ("2020-03-28", "2020-10-27", "24H", "D", "2020-10-27 00:00"), + ("2020-10-25", "2020-10-27", "24H", "D", "2020-10-26 00:00"), + ], +) +def test_resample_calendar_day_with_dst( + first: str, last: str, freq_in: str, freq_out: str, exp_last: str +): + # GH 35219 + ts = Series(1.0, date_range(first, last, freq=freq_in, tz="Europe/Amsterdam")) + result = ts.resample(freq_out).ffill() + expected = Series( + 1.0, date_range(first, exp_last, freq=freq_out, tz="Europe/Amsterdam") + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max", "first", "last"]) +def test_resample_aggregate_functions_min_count(func): + # GH#37768 + index = date_range(start="2020", freq="M", periods=3) + ser = Series([1, np.nan, np.nan], index) + result = getattr(ser.resample("Q"), func)(min_count=2) + expected = Series( + [np.nan], + index=DatetimeIndex(["2020-03-31"], dtype="datetime64[ns]", freq="Q-DEC"), + ) + tm.assert_series_equal(result, expected) + + +def test_resample_unsigned_int(any_unsigned_int_numpy_dtype): + # gh-43329 + df = DataFrame( + index=date_range(start="2000-01-01", end="2000-01-03 23", freq="12H"), + columns=["x"], + data=[0, 1, 0] * 2, + dtype=any_unsigned_int_numpy_dtype, + ) + df = df.loc[(df.index < "2000-01-02") | (df.index > "2000-01-03"), :] + + if any_unsigned_int_numpy_dtype == "uint64": + with pytest.raises(RuntimeError, match="empty group with uint64_t"): + result = df.resample("D").max() + else: + result = df.resample("D").max() + + expected = DataFrame( + [1, np.nan, 0], + columns=["x"], + index=date_range(start="2000-01-01", end="2000-01-03 23", freq="D"), + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/test_deprecated.py b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_deprecated.py new file mode 100644 index 0000000..126ca05 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_deprecated.py @@ -0,0 +1,316 @@ +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import ( + PeriodIndex, + period_range, +) +from pandas.core.indexes.timedeltas import timedelta_range + +from pandas.tseries.offsets import ( + BDay, + Minute, +) + +DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10)) +PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10)) +TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day") + +all_ts = pytest.mark.parametrize( + "_index_factory,_series_name,_index_start,_index_end", + [DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE], +) + + +@pytest.fixture() +def _index_factory(): + return period_range + + +@pytest.fixture +def create_index(_index_factory): + def _create_index(*args, **kwargs): + """return the _index_factory created using the args, kwargs""" + return _index_factory(*args, **kwargs) + + return _create_index + + +# new test to check that all FutureWarning are triggered +def test_deprecating_on_loffset_and_base(): + # GH 31809 + + idx = date_range("2001-01-01", periods=4, freq="T") + df = DataFrame(data=4 * [range(2)], index=idx, columns=["a", "b"]) + + with tm.assert_produces_warning(FutureWarning): + pd.Grouper(freq="10s", base=0) + with tm.assert_produces_warning(FutureWarning): + pd.Grouper(freq="10s", loffset="0s") + + # not checking the stacklevel for .groupby().resample() because it's complicated to + # reconcile it with the stacklevel for Series.resample() and DataFrame.resample(); + # see GH #37603 + with tm.assert_produces_warning(FutureWarning): + df.groupby("a").resample("3T", base=0).sum() + with tm.assert_produces_warning(FutureWarning): + df.groupby("a").resample("3T", loffset="0s").sum() + msg = "'offset' and 'base' cannot be present at the same time" + with tm.assert_produces_warning(FutureWarning): + with pytest.raises(ValueError, match=msg): + df.groupby("a").resample("3T", base=0, offset=0).sum() + + with tm.assert_produces_warning(FutureWarning): + df.resample("3T", base=0).sum() + with tm.assert_produces_warning(FutureWarning): + df.resample("3T", loffset="0s").sum() + + +@all_ts +@pytest.mark.parametrize("arg", ["mean", {"value": "mean"}, ["mean"]]) +def test_resample_loffset_arg_type(frame, create_index, arg): + # GH 13218, 15002 + df = frame + expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)] + expected_index = create_index(df.index[0], periods=len(df.index) / 2, freq="2D") + + # loffset coerces PeriodIndex to DateTimeIndex + if isinstance(expected_index, PeriodIndex): + expected_index = expected_index.to_timestamp() + + expected_index += timedelta(hours=2) + expected = DataFrame({"value": expected_means}, index=expected_index) + + with tm.assert_produces_warning(FutureWarning): + result_agg = df.resample("2D", loffset="2H").agg(arg) + + if isinstance(arg, list): + expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) + + tm.assert_frame_equal(result_agg, expected) + + +@pytest.mark.parametrize( + "loffset", [timedelta(minutes=1), "1min", Minute(1), np.timedelta64(1, "m")] +) +def test_resample_loffset(loffset): + # GH 7687 + rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min") + s = Series(np.random.randn(14), index=rng) + + with tm.assert_produces_warning(FutureWarning): + result = s.resample( + "5min", closed="right", label="right", loffset=loffset + ).mean() + idx = date_range("1/1/2000", periods=4, freq="5min") + expected = Series( + [s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], + index=idx + timedelta(minutes=1), + ) + tm.assert_series_equal(result, expected) + assert result.index.freq == Minute(5) + + # from daily + dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D") + ser = Series(np.random.rand(len(dti)), dti) + + # to weekly + result = ser.resample("w-sun").last() + business_day_offset = BDay() + with tm.assert_produces_warning(FutureWarning): + expected = ser.resample("w-sun", loffset=-business_day_offset).last() + assert result.index[0] - business_day_offset == expected.index[0] + + +def test_resample_loffset_upsample(): + # GH 20744 + rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min") + s = Series(np.random.randn(14), index=rng) + + with tm.assert_produces_warning(FutureWarning): + result = s.resample( + "5min", closed="right", label="right", loffset=timedelta(minutes=1) + ).ffill() + idx = date_range("1/1/2000", periods=4, freq="5min") + expected = Series([s[0], s[5], s[10], s[-1]], index=idx + timedelta(minutes=1)) + + tm.assert_series_equal(result, expected) + + +def test_resample_loffset_count(): + # GH 12725 + start_time = "1/1/2000 00:00:00" + rng = date_range(start_time, periods=100, freq="S") + ts = Series(np.random.randn(len(rng)), index=rng) + + with tm.assert_produces_warning(FutureWarning): + result = ts.resample("10S", loffset="1s").count() + + expected_index = date_range(start_time, periods=10, freq="10S") + timedelta( + seconds=1 + ) + expected = Series(10, index=expected_index) + + tm.assert_series_equal(result, expected) + + # Same issue should apply to .size() since it goes through + # same code path + with tm.assert_produces_warning(FutureWarning): + result = ts.resample("10S", loffset="1s").size() + + tm.assert_series_equal(result, expected) + + +def test_resample_base(): + rng = date_range("1/1/2000 00:00:00", "1/1/2000 02:00", freq="s") + ts = Series(np.random.randn(len(rng)), index=rng) + + with tm.assert_produces_warning(FutureWarning): + resampled = ts.resample("5min", base=2).mean() + exp_rng = date_range("12/31/1999 23:57:00", "1/1/2000 01:57", freq="5min") + tm.assert_index_equal(resampled.index, exp_rng) + + +def test_resample_float_base(): + # GH25161 + dt = pd.to_datetime( + ["2018-11-26 16:17:43.51", "2018-11-26 16:17:44.51", "2018-11-26 16:17:45.51"] + ) + s = Series(np.arange(3), index=dt) + + base = 17 + 43.51 / 60 + with tm.assert_produces_warning(FutureWarning): + result = s.resample("3min", base=base).size() + expected = Series( + 3, index=pd.DatetimeIndex(["2018-11-26 16:17:43.51"], freq="3min") + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("kind", ["period", None, "timestamp"]) +@pytest.mark.parametrize("agg_arg", ["mean", {"value": "mean"}, ["mean"]]) +def test_loffset_returns_datetimeindex(frame, kind, agg_arg): + # make sure passing loffset returns DatetimeIndex in all cases + # basic method taken from Base.test_resample_loffset_arg_type() + df = frame + expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)] + expected_index = period_range(df.index[0], periods=len(df.index) / 2, freq="2D") + + # loffset coerces PeriodIndex to DateTimeIndex + expected_index = expected_index.to_timestamp() + expected_index += timedelta(hours=2) + expected = DataFrame({"value": expected_means}, index=expected_index) + + with tm.assert_produces_warning(FutureWarning): + result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg) + if isinstance(agg_arg, list): + expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) + tm.assert_frame_equal(result_agg, expected) + + +@pytest.mark.parametrize( + "start,end,start_freq,end_freq,base,offset", + [ + ("19910905", "19910909 03:00", "H", "24H", 10, "10H"), + ("19910905", "19910909 12:00", "H", "24H", 10, "10H"), + ("19910905", "19910909 23:00", "H", "24H", 10, "10H"), + ("19910905 10:00", "19910909", "H", "24H", 10, "10H"), + ("19910905 10:00", "19910909 10:00", "H", "24H", 10, "10H"), + ("19910905", "19910909 10:00", "H", "24H", 10, "10H"), + ("19910905 12:00", "19910909", "H", "24H", 10, "10H"), + ("19910905 12:00", "19910909 03:00", "H", "24H", 10, "10H"), + ("19910905 12:00", "19910909 12:00", "H", "24H", 10, "10H"), + ("19910905 12:00", "19910909 12:00", "H", "24H", 34, "34H"), + ("19910905 12:00", "19910909 12:00", "H", "17H", 10, "10H"), + ("19910905 12:00", "19910909 12:00", "H", "17H", 3, "3H"), + ("19910905 12:00", "19910909 1:00", "H", "M", 3, "3H"), + ("19910905", "19910913 06:00", "2H", "24H", 10, "10H"), + ("19910905", "19910905 01:39", "Min", "5Min", 3, "3Min"), + ("19910905", "19910905 03:18", "2Min", "5Min", 3, "3Min"), + ], +) +def test_resample_with_non_zero_base(start, end, start_freq, end_freq, base, offset): + # GH 23882 + s = Series(0, index=period_range(start, end, freq=start_freq)) + s = s + np.arange(len(s)) + with tm.assert_produces_warning(FutureWarning): + result = s.resample(end_freq, base=base).mean() + result = result.to_timestamp(end_freq) + + # test that the replacement argument 'offset' works + result_offset = s.resample(end_freq, offset=offset).mean() + result_offset = result_offset.to_timestamp(end_freq) + tm.assert_series_equal(result, result_offset) + + # to_timestamp casts 24H -> D + result = result.asfreq(end_freq) if end_freq == "24H" else result + with tm.assert_produces_warning(FutureWarning): + expected = s.to_timestamp().resample(end_freq, base=base).mean() + if end_freq == "M": + # TODO: is non-tick the relevant characteristic? (GH 33815) + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(result, expected) + + +def test_resample_base_with_timedeltaindex(): + # GH 10530 + rng = timedelta_range(start="0s", periods=25, freq="s") + ts = Series(np.random.randn(len(rng)), index=rng) + + with tm.assert_produces_warning(FutureWarning): + with_base = ts.resample("2s", base=5).mean() + without_base = ts.resample("2s").mean() + + exp_without_base = timedelta_range(start="0s", end="25s", freq="2s") + exp_with_base = timedelta_range(start="5s", end="29s", freq="2s") + + tm.assert_index_equal(without_base.index, exp_without_base) + tm.assert_index_equal(with_base.index, exp_with_base) + + +def test_interpolate_posargs_deprecation(): + # GH 41485 + idx = pd.to_datetime(["1992-08-27 07:46:48", "1992-08-27 07:46:59"]) + s = Series([1, 4], index=idx) + + msg = ( + r"In a future version of pandas all arguments of Resampler\.interpolate " + r"except for the argument 'method' will be keyword-only" + ) + + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.resample("3s").interpolate("linear", 0) + + idx = pd.to_datetime( + [ + "1992-08-27 07:46:48", + "1992-08-27 07:46:51", + "1992-08-27 07:46:54", + "1992-08-27 07:46:57", + ] + ) + expected = Series([1.0, 1.0, 1.0, 1.0], index=idx) + + expected.index._data.freq = "3s" + tm.assert_series_equal(result, expected) + + +def test_pad_backfill_deprecation(): + # GH 33396 + s = Series([1, 2, 3], index=date_range("20180101", periods=3, freq="h")) + with tm.assert_produces_warning(FutureWarning, match="backfill"): + s.resample("30min").backfill() + with tm.assert_produces_warning(FutureWarning, match="pad"): + s.resample("30min").pad() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/test_period_index.py b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_period_index.py new file mode 100644 index 0000000..70d37f8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_period_index.py @@ -0,0 +1,878 @@ +from datetime import datetime + +import dateutil +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs.ccalendar import ( + DAYS, + MONTHS, +) +from pandas._libs.tslibs.period import IncompatibleFrequency +from pandas.errors import InvalidIndexError + +import pandas as pd +from pandas import ( + DataFrame, + Series, + Timestamp, +) +import pandas._testing as tm +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import ( + Period, + PeriodIndex, + period_range, +) +from pandas.core.resample import _get_period_range_edges + +import pandas.tseries.offsets as offsets + + +@pytest.fixture() +def _index_factory(): + return period_range + + +@pytest.fixture +def _series_name(): + return "pi" + + +class TestPeriodIndex: + @pytest.mark.parametrize("freq", ["2D", "1H", "2H"]) + @pytest.mark.parametrize("kind", ["period", None, "timestamp"]) + def test_asfreq(self, series_and_frame, freq, kind): + # GH 12884, 15944 + # make sure .asfreq() returns PeriodIndex (except kind='timestamp') + + obj = series_and_frame + if kind == "timestamp": + expected = obj.to_timestamp().resample(freq).asfreq() + else: + start = obj.index[0].to_timestamp(how="start") + end = (obj.index[-1] + obj.index.freq).to_timestamp(how="start") + new_index = date_range(start=start, end=end, freq=freq, inclusive="left") + expected = obj.to_timestamp().reindex(new_index).to_period(freq) + result = obj.resample(freq, kind=kind).asfreq() + tm.assert_almost_equal(result, expected) + + def test_asfreq_fill_value(self, series): + # test for fill value during resampling, issue 3715 + + s = series + new_index = date_range( + s.index[0].to_timestamp(how="start"), + (s.index[-1]).to_timestamp(how="start"), + freq="1H", + ) + expected = s.to_timestamp().reindex(new_index, fill_value=4.0) + result = s.resample("1H", kind="timestamp").asfreq(fill_value=4.0) + tm.assert_series_equal(result, expected) + + frame = s.to_frame("value") + new_index = date_range( + frame.index[0].to_timestamp(how="start"), + (frame.index[-1]).to_timestamp(how="start"), + freq="1H", + ) + expected = frame.to_timestamp().reindex(new_index, fill_value=3.0) + result = frame.resample("1H", kind="timestamp").asfreq(fill_value=3.0) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("freq", ["H", "12H", "2D", "W"]) + @pytest.mark.parametrize("kind", [None, "period", "timestamp"]) + @pytest.mark.parametrize("kwargs", [{"on": "date"}, {"level": "d"}]) + def test_selection(self, index, freq, kind, kwargs): + # This is a bug, these should be implemented + # GH 14008 + rng = np.arange(len(index), dtype=np.int64) + df = DataFrame( + {"date": index, "a": rng}, + index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]), + ) + msg = ( + "Resampling from level= or on= selection with a PeriodIndex is " + r"not currently supported, use \.set_index\(\.\.\.\) to " + "explicitly set index" + ) + with pytest.raises(NotImplementedError, match=msg): + df.resample(freq, kind=kind, **kwargs) + + @pytest.mark.parametrize("month", MONTHS) + @pytest.mark.parametrize("meth", ["ffill", "bfill"]) + @pytest.mark.parametrize("conv", ["start", "end"]) + @pytest.mark.parametrize("targ", ["D", "B", "M"]) + def test_annual_upsample_cases( + self, targ, conv, meth, month, simple_period_range_series + ): + ts = simple_period_range_series("1/1/1990", "12/31/1991", freq=f"A-{month}") + + result = getattr(ts.resample(targ, convention=conv), meth)() + expected = result.to_timestamp(targ, how=conv) + expected = expected.asfreq(targ, meth).to_period() + tm.assert_series_equal(result, expected) + + def test_basic_downsample(self, simple_period_range_series): + ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M") + result = ts.resample("a-dec").mean() + + expected = ts.groupby(ts.index.year).mean() + expected.index = period_range("1/1/1990", "6/30/1995", freq="a-dec") + tm.assert_series_equal(result, expected) + + # this is ok + tm.assert_series_equal(ts.resample("a-dec").mean(), result) + tm.assert_series_equal(ts.resample("a").mean(), result) + + @pytest.mark.parametrize( + "rule,expected_error_msg", + [ + ("a-dec", ""), + ("q-mar", ""), + ("M", ""), + ("w-thu", ""), + ], + ) + def test_not_subperiod(self, simple_period_range_series, rule, expected_error_msg): + # These are incompatible period rules for resampling + ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="w-wed") + msg = ( + "Frequency cannot be resampled to " + f"{expected_error_msg}, as they are not sub or super periods" + ) + with pytest.raises(IncompatibleFrequency, match=msg): + ts.resample(rule).mean() + + @pytest.mark.parametrize("freq", ["D", "2D"]) + def test_basic_upsample(self, freq, simple_period_range_series): + ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M") + result = ts.resample("a-dec").mean() + + resampled = result.resample(freq, convention="end").ffill() + expected = result.to_timestamp(freq, how="end") + expected = expected.asfreq(freq, "ffill").to_period(freq) + tm.assert_series_equal(resampled, expected) + + def test_upsample_with_limit(self): + rng = period_range("1/1/2000", periods=5, freq="A") + ts = Series(np.random.randn(len(rng)), rng) + + result = ts.resample("M", convention="end").ffill(limit=2) + expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2) + tm.assert_series_equal(result, expected) + + def test_annual_upsample(self, simple_period_range_series): + ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="A-DEC") + df = DataFrame({"a": ts}) + rdf = df.resample("D").ffill() + exp = df["a"].resample("D").ffill() + tm.assert_series_equal(rdf["a"], exp) + + rng = period_range("2000", "2003", freq="A-DEC") + ts = Series([1, 2, 3, 4], index=rng) + + result = ts.resample("M").ffill() + ex_index = period_range("2000-01", "2003-12", freq="M") + + expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("month", MONTHS) + @pytest.mark.parametrize("target", ["D", "B", "M"]) + @pytest.mark.parametrize("convention", ["start", "end"]) + def test_quarterly_upsample( + self, month, target, convention, simple_period_range_series + ): + freq = f"Q-{month}" + ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq) + result = ts.resample(target, convention=convention).ffill() + expected = result.to_timestamp(target, how=convention) + expected = expected.asfreq(target, "ffill").to_period() + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("target", ["D", "B"]) + @pytest.mark.parametrize("convention", ["start", "end"]) + def test_monthly_upsample(self, target, convention, simple_period_range_series): + ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M") + result = ts.resample(target, convention=convention).ffill() + expected = result.to_timestamp(target, how=convention) + expected = expected.asfreq(target, "ffill").to_period() + tm.assert_series_equal(result, expected) + + def test_resample_basic(self): + # GH3609 + s = Series( + range(100), + index=date_range("20130101", freq="s", periods=100, name="idx"), + dtype="float", + ) + s[10:30] = np.nan + index = PeriodIndex( + [Period("2013-01-01 00:00", "T"), Period("2013-01-01 00:01", "T")], + name="idx", + ) + expected = Series([34.5, 79.5], index=index) + result = s.to_period().resample("T", kind="period").mean() + tm.assert_series_equal(result, expected) + result2 = s.resample("T", kind="period").mean() + tm.assert_series_equal(result2, expected) + + @pytest.mark.parametrize( + "freq,expected_vals", [("M", [31, 29, 31, 9]), ("2M", [31 + 29, 31 + 9])] + ) + def test_resample_count(self, freq, expected_vals): + # GH12774 + series = Series(1, index=period_range(start="2000", periods=100)) + result = series.resample(freq).count() + expected_index = period_range( + start="2000", freq=freq, periods=len(expected_vals) + ) + expected = Series(expected_vals, index=expected_index) + tm.assert_series_equal(result, expected) + + def test_resample_same_freq(self, resample_method): + + # GH12770 + series = Series(range(3), index=period_range(start="2000", periods=3, freq="M")) + expected = series + + result = getattr(series.resample("M"), resample_method)() + tm.assert_series_equal(result, expected) + + def test_resample_incompat_freq(self): + msg = ( + "Frequency cannot be resampled to , " + "as they are not sub or super periods" + ) + with pytest.raises(IncompatibleFrequency, match=msg): + Series( + range(3), index=period_range(start="2000", periods=3, freq="M") + ).resample("W").mean() + + def test_with_local_timezone_pytz(self): + # see gh-5430 + local_timezone = pytz.timezone("America/Los_Angeles") + + start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=pytz.utc) + # 1 day later + end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=pytz.utc) + + index = date_range(start, end, freq="H") + + series = Series(1, index=index) + series = series.tz_convert(local_timezone) + result = series.resample("D", kind="period").mean() + + # Create the expected series + # Index is moved back a day with the timezone conversion from UTC to + # Pacific + expected_index = period_range(start=start, end=end, freq="D") - offsets.Day() + expected = Series(1.0, index=expected_index) + tm.assert_series_equal(result, expected) + + def test_resample_with_pytz(self): + # GH 13238 + s = Series( + 2, index=date_range("2017-01-01", periods=48, freq="H", tz="US/Eastern") + ) + result = s.resample("D").mean() + expected = Series( + 2.0, + index=pd.DatetimeIndex( + ["2017-01-01", "2017-01-02"], tz="US/Eastern", freq="D" + ), + ) + tm.assert_series_equal(result, expected) + # Especially assert that the timezone is LMT for pytz + assert result.index.tz == pytz.timezone("US/Eastern") + + def test_with_local_timezone_dateutil(self): + # see gh-5430 + local_timezone = "dateutil/America/Los_Angeles" + + start = datetime( + year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.tzutc() + ) + # 1 day later + end = datetime( + year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.tzutc() + ) + + index = date_range(start, end, freq="H", name="idx") + + series = Series(1, index=index) + series = series.tz_convert(local_timezone) + result = series.resample("D", kind="period").mean() + + # Create the expected series + # Index is moved back a day with the timezone conversion from UTC to + # Pacific + expected_index = ( + period_range(start=start, end=end, freq="D", name="idx") - offsets.Day() + ) + expected = Series(1.0, index=expected_index) + tm.assert_series_equal(result, expected) + + def test_resample_nonexistent_time_bin_edge(self): + # GH 19375 + index = date_range("2017-03-12", "2017-03-12 1:45:00", freq="15T") + s = Series(np.zeros(len(index)), index=index) + expected = s.tz_localize("US/Pacific") + expected.index = pd.DatetimeIndex(expected.index, freq="900S") + result = expected.resample("900S").mean() + tm.assert_series_equal(result, expected) + + # GH 23742 + index = date_range(start="2017-10-10", end="2017-10-20", freq="1H") + index = index.tz_localize("UTC").tz_convert("America/Sao_Paulo") + df = DataFrame(data=list(range(len(index))), index=index) + result = df.groupby(pd.Grouper(freq="1D")).count() + expected = date_range( + start="2017-10-09", + end="2017-10-20", + freq="D", + tz="America/Sao_Paulo", + nonexistent="shift_forward", + inclusive="left", + ) + tm.assert_index_equal(result.index, expected) + + def test_resample_ambiguous_time_bin_edge(self): + # GH 10117 + idx = date_range( + "2014-10-25 22:00:00", "2014-10-26 00:30:00", freq="30T", tz="Europe/London" + ) + expected = Series(np.zeros(len(idx)), index=idx) + result = expected.resample("30T").mean() + tm.assert_series_equal(result, expected) + + def test_fill_method_and_how_upsample(self): + # GH2073 + s = Series( + np.arange(9, dtype="int64"), + index=date_range("2010-01-01", periods=9, freq="Q"), + ) + last = s.resample("M").ffill() + both = s.resample("M").ffill().resample("M").last().astype("int64") + tm.assert_series_equal(last, both) + + @pytest.mark.parametrize("day", DAYS) + @pytest.mark.parametrize("target", ["D", "B"]) + @pytest.mark.parametrize("convention", ["start", "end"]) + def test_weekly_upsample(self, day, target, convention, simple_period_range_series): + freq = f"W-{day}" + ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq) + result = ts.resample(target, convention=convention).ffill() + expected = result.to_timestamp(target, how=convention) + expected = expected.asfreq(target, "ffill").to_period() + tm.assert_series_equal(result, expected) + + def test_resample_to_timestamps(self, simple_period_range_series): + ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M") + + result = ts.resample("A-DEC", kind="timestamp").mean() + expected = ts.to_timestamp(how="start").resample("A-DEC").mean() + tm.assert_series_equal(result, expected) + + def test_resample_to_quarterly(self, simple_period_range_series): + for month in MONTHS: + ts = simple_period_range_series("1990", "1992", freq=f"A-{month}") + quar_ts = ts.resample(f"Q-{month}").ffill() + + stamps = ts.to_timestamp("D", how="start") + qdates = period_range( + ts.index[0].asfreq("D", "start"), + ts.index[-1].asfreq("D", "end"), + freq=f"Q-{month}", + ) + + expected = stamps.reindex(qdates.to_timestamp("D", "s"), method="ffill") + expected.index = qdates + + tm.assert_series_equal(quar_ts, expected) + + # conforms, but different month + ts = simple_period_range_series("1990", "1992", freq="A-JUN") + + for how in ["start", "end"]: + result = ts.resample("Q-MAR", convention=how).ffill() + expected = ts.asfreq("Q-MAR", how=how) + expected = expected.reindex(result.index, method="ffill") + + # .to_timestamp('D') + # expected = expected.resample('Q-MAR').ffill() + + tm.assert_series_equal(result, expected) + + def test_resample_fill_missing(self): + rng = PeriodIndex([2000, 2005, 2007, 2009], freq="A") + + s = Series(np.random.randn(4), index=rng) + + stamps = s.to_timestamp() + filled = s.resample("A").ffill() + expected = stamps.resample("A").ffill().to_period("A") + tm.assert_series_equal(filled, expected) + + def test_cant_fill_missing_dups(self): + rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq="A") + s = Series(np.random.randn(5), index=rng) + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): + s.resample("A").ffill() + + @pytest.mark.parametrize("freq", ["5min"]) + @pytest.mark.parametrize("kind", ["period", None, "timestamp"]) + def test_resample_5minute(self, freq, kind): + rng = period_range("1/1/2000", "1/5/2000", freq="T") + ts = Series(np.random.randn(len(rng)), index=rng) + expected = ts.to_timestamp().resample(freq).mean() + if kind != "timestamp": + expected = expected.to_period(freq) + result = ts.resample(freq, kind=kind).mean() + tm.assert_series_equal(result, expected) + + def test_upsample_daily_business_daily(self, simple_period_range_series): + ts = simple_period_range_series("1/1/2000", "2/1/2000", freq="B") + + result = ts.resample("D").asfreq() + expected = ts.asfreq("D").reindex(period_range("1/3/2000", "2/1/2000")) + tm.assert_series_equal(result, expected) + + ts = simple_period_range_series("1/1/2000", "2/1/2000") + result = ts.resample("H", convention="s").asfreq() + exp_rng = period_range("1/1/2000", "2/1/2000 23:00", freq="H") + expected = ts.asfreq("H", how="s").reindex(exp_rng) + tm.assert_series_equal(result, expected) + + def test_resample_irregular_sparse(self): + dr = date_range(start="1/1/2012", freq="5min", periods=1000) + s = Series(np.array(100), index=dr) + # subset the data. + subset = s[:"2012-01-04 06:55"] + + result = subset.resample("10min").apply(len) + expected = s.resample("10min").apply(len).loc[result.index] + tm.assert_series_equal(result, expected) + + def test_resample_weekly_all_na(self): + rng = date_range("1/1/2000", periods=10, freq="W-WED") + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample("W-THU").asfreq() + + assert result.isna().all() + + result = ts.resample("W-THU").asfreq().ffill()[:-1] + expected = ts.asfreq("W-THU").ffill() + tm.assert_series_equal(result, expected) + + def test_resample_tz_localized(self): + dr = date_range(start="2012-4-13", end="2012-5-1") + ts = Series(range(len(dr)), index=dr) + + ts_utc = ts.tz_localize("UTC") + ts_local = ts_utc.tz_convert("America/Los_Angeles") + + result = ts_local.resample("W").mean() + + ts_local_naive = ts_local.copy() + ts_local_naive.index = [ + x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime() + ] + + exp = ts_local_naive.resample("W").mean().tz_localize("America/Los_Angeles") + exp.index = pd.DatetimeIndex(exp.index, freq="W") + + tm.assert_series_equal(result, exp) + + # it works + result = ts_local.resample("D").mean() + + # #2245 + idx = date_range( + "2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney" + ) + s = Series([1, 2], index=idx) + + result = s.resample("D", closed="right", label="right").mean() + ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney") + expected = Series([1.5], index=ex_index) + + tm.assert_series_equal(result, expected) + + # for good measure + result = s.resample("D", kind="period").mean() + ex_index = period_range("2001-09-20", periods=1, freq="D") + expected = Series([1.5], index=ex_index) + tm.assert_series_equal(result, expected) + + # GH 6397 + # comparing an offset that doesn't propagate tz's + rng = date_range("1/1/2011", periods=20000, freq="H") + rng = rng.tz_localize("EST") + ts = DataFrame(index=rng) + ts["first"] = np.random.randn(len(rng)) + ts["second"] = np.cumsum(np.random.randn(len(rng))) + expected = DataFrame( + { + "first": ts.resample("A").sum()["first"], + "second": ts.resample("A").mean()["second"], + }, + columns=["first", "second"], + ) + result = ( + ts.resample("A") + .agg({"first": np.sum, "second": np.mean}) + .reindex(columns=["first", "second"]) + ) + tm.assert_frame_equal(result, expected) + + def test_closed_left_corner(self): + # #1465 + s = Series( + np.random.randn(21), + index=date_range(start="1/1/2012 9:30", freq="1min", periods=21), + ) + s[0] = np.nan + + result = s.resample("10min", closed="left", label="right").mean() + exp = s[1:].resample("10min", closed="left", label="right").mean() + tm.assert_series_equal(result, exp) + + result = s.resample("10min", closed="left", label="left").mean() + exp = s[1:].resample("10min", closed="left", label="left").mean() + + ex_index = date_range(start="1/1/2012 9:30", freq="10min", periods=3) + + tm.assert_index_equal(result.index, ex_index) + tm.assert_series_equal(result, exp) + + def test_quarterly_resampling(self): + rng = period_range("2000Q1", periods=10, freq="Q-DEC") + ts = Series(np.arange(10), index=rng) + + result = ts.resample("A").mean() + exp = ts.to_timestamp().resample("A").mean().to_period() + tm.assert_series_equal(result, exp) + + def test_resample_weekly_bug_1726(self): + # 8/6/12 is a Monday + ind = date_range(start="8/6/2012", end="8/26/2012", freq="D") + n = len(ind) + data = [[x] * 5 for x in range(n)] + df = DataFrame(data, columns=["open", "high", "low", "close", "vol"], index=ind) + + # it works! + df.resample("W-MON", closed="left", label="left").first() + + def test_resample_with_dst_time_change(self): + # GH 15549 + index = ( + pd.DatetimeIndex([1457537600000000000, 1458059600000000000]) + .tz_localize("UTC") + .tz_convert("America/Chicago") + ) + df = DataFrame([1, 2], index=index) + result = df.resample("12h", closed="right", label="right").last().ffill() + + expected_index_values = [ + "2016-03-09 12:00:00-06:00", + "2016-03-10 00:00:00-06:00", + "2016-03-10 12:00:00-06:00", + "2016-03-11 00:00:00-06:00", + "2016-03-11 12:00:00-06:00", + "2016-03-12 00:00:00-06:00", + "2016-03-12 12:00:00-06:00", + "2016-03-13 00:00:00-06:00", + "2016-03-13 13:00:00-05:00", + "2016-03-14 01:00:00-05:00", + "2016-03-14 13:00:00-05:00", + "2016-03-15 01:00:00-05:00", + "2016-03-15 13:00:00-05:00", + ] + index = pd.to_datetime(expected_index_values, utc=True).tz_convert( + "America/Chicago" + ) + index = pd.DatetimeIndex(index, freq="12h") + expected = DataFrame( + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], + index=index, + ) + tm.assert_frame_equal(result, expected) + + def test_resample_bms_2752(self): + # GH2753 + foo = Series(index=pd.bdate_range("20000101", "20000201"), dtype=np.float64) + res1 = foo.resample("BMS").mean() + res2 = foo.resample("BMS").mean().resample("B").mean() + assert res1.index[0] == Timestamp("20000103") + assert res1.index[0] == res2.index[0] + + # def test_monthly_convention_span(self): + # rng = period_range('2000-01', periods=3, freq='M') + # ts = Series(np.arange(3), index=rng) + + # # hacky way to get same thing + # exp_index = period_range('2000-01-01', '2000-03-31', freq='D') + # expected = ts.asfreq('D', how='end').reindex(exp_index) + # expected = expected.fillna(method='bfill') + + # result = ts.resample('D', convention='span').mean() + + # tm.assert_series_equal(result, expected) + + def test_default_right_closed_label(self): + end_freq = ["D", "Q", "M", "D"] + end_types = ["M", "A", "Q", "W"] + + for from_freq, to_freq in zip(end_freq, end_types): + idx = date_range(start="8/15/2012", periods=100, freq=from_freq) + df = DataFrame(np.random.randn(len(idx), 2), idx) + + resampled = df.resample(to_freq).mean() + tm.assert_frame_equal( + resampled, df.resample(to_freq, closed="right", label="right").mean() + ) + + def test_default_left_closed_label(self): + others = ["MS", "AS", "QS", "D", "H"] + others_freq = ["D", "Q", "M", "H", "T"] + + for from_freq, to_freq in zip(others_freq, others): + idx = date_range(start="8/15/2012", periods=100, freq=from_freq) + df = DataFrame(np.random.randn(len(idx), 2), idx) + + resampled = df.resample(to_freq).mean() + tm.assert_frame_equal( + resampled, df.resample(to_freq, closed="left", label="left").mean() + ) + + def test_all_values_single_bin(self): + # 2070 + index = period_range(start="2012-01-01", end="2012-12-31", freq="M") + s = Series(np.random.randn(len(index)), index=index) + + result = s.resample("A").mean() + tm.assert_almost_equal(result[0], s.mean()) + + def test_evenly_divisible_with_no_extra_bins(self): + # 4076 + # when the frequency is evenly divisible, sometimes extra bins + + df = DataFrame(np.random.randn(9, 3), index=date_range("2000-1-1", periods=9)) + result = df.resample("5D").mean() + expected = pd.concat([df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T + expected.index = pd.DatetimeIndex( + [Timestamp("2000-1-1"), Timestamp("2000-1-6")], freq="5D" + ) + tm.assert_frame_equal(result, expected) + + index = date_range(start="2001-5-4", periods=28) + df = DataFrame( + [ + { + "REST_KEY": 1, + "DLY_TRN_QT": 80, + "DLY_SLS_AMT": 90, + "COOP_DLY_TRN_QT": 30, + "COOP_DLY_SLS_AMT": 20, + } + ] + * 28 + + [ + { + "REST_KEY": 2, + "DLY_TRN_QT": 70, + "DLY_SLS_AMT": 10, + "COOP_DLY_TRN_QT": 50, + "COOP_DLY_SLS_AMT": 20, + } + ] + * 28, + index=index.append(index), + ).sort_index() + + index = date_range("2001-5-4", periods=4, freq="7D") + expected = DataFrame( + [ + { + "REST_KEY": 14, + "DLY_TRN_QT": 14, + "DLY_SLS_AMT": 14, + "COOP_DLY_TRN_QT": 14, + "COOP_DLY_SLS_AMT": 14, + } + ] + * 4, + index=index, + ) + result = df.resample("7D").count() + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + [ + { + "REST_KEY": 21, + "DLY_TRN_QT": 1050, + "DLY_SLS_AMT": 700, + "COOP_DLY_TRN_QT": 560, + "COOP_DLY_SLS_AMT": 280, + } + ] + * 4, + index=index, + ) + result = df.resample("7D").sum() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("freq, period_mult", [("H", 24), ("12H", 2)]) + @pytest.mark.parametrize("kind", [None, "period"]) + def test_upsampling_ohlc(self, freq, period_mult, kind): + # GH 13083 + pi = period_range(start="2000", freq="D", periods=10) + s = Series(range(len(pi)), index=pi) + expected = s.to_timestamp().resample(freq).ohlc().to_period(freq) + + # timestamp-based resampling doesn't include all sub-periods + # of the last original period, so extend accordingly: + new_index = period_range(start="2000", freq=freq, periods=period_mult * len(pi)) + expected = expected.reindex(new_index) + result = s.resample(freq, kind=kind).ohlc() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "periods, values", + [ + ( + [ + pd.NaT, + "1970-01-01 00:00:00", + pd.NaT, + "1970-01-01 00:00:02", + "1970-01-01 00:00:03", + ], + [2, 3, 5, 7, 11], + ), + ( + [ + pd.NaT, + pd.NaT, + "1970-01-01 00:00:00", + pd.NaT, + pd.NaT, + pd.NaT, + "1970-01-01 00:00:02", + "1970-01-01 00:00:03", + pd.NaT, + pd.NaT, + ], + [1, 2, 3, 5, 6, 8, 7, 11, 12, 13], + ), + ], + ) + @pytest.mark.parametrize( + "freq, expected_values", + [ + ("1s", [3, np.NaN, 7, 11]), + ("2s", [3, (7 + 11) / 2]), + ("3s", [(3 + 7) / 2, 11]), + ], + ) + def test_resample_with_nat(self, periods, values, freq, expected_values): + # GH 13224 + index = PeriodIndex(periods, freq="S") + frame = DataFrame(values, index=index) + + expected_index = period_range( + "1970-01-01 00:00:00", periods=len(expected_values), freq=freq + ) + expected = DataFrame(expected_values, index=expected_index) + result = frame.resample(freq).mean() + tm.assert_frame_equal(result, expected) + + def test_resample_with_only_nat(self): + # GH 13224 + pi = PeriodIndex([pd.NaT] * 3, freq="S") + frame = DataFrame([2, 3, 5], index=pi, columns=["a"]) + expected_index = PeriodIndex(data=[], freq=pi.freq) + expected = DataFrame(index=expected_index, columns=["a"], dtype="float64") + result = frame.resample("1s").mean() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "start,end,start_freq,end_freq,offset", + [ + ("19910905", "19910909 03:00", "H", "24H", "10H"), + ("19910905", "19910909 12:00", "H", "24H", "10H"), + ("19910905", "19910909 23:00", "H", "24H", "10H"), + ("19910905 10:00", "19910909", "H", "24H", "10H"), + ("19910905 10:00", "19910909 10:00", "H", "24H", "10H"), + ("19910905", "19910909 10:00", "H", "24H", "10H"), + ("19910905 12:00", "19910909", "H", "24H", "10H"), + ("19910905 12:00", "19910909 03:00", "H", "24H", "10H"), + ("19910905 12:00", "19910909 12:00", "H", "24H", "10H"), + ("19910905 12:00", "19910909 12:00", "H", "24H", "34H"), + ("19910905 12:00", "19910909 12:00", "H", "17H", "10H"), + ("19910905 12:00", "19910909 12:00", "H", "17H", "3H"), + ("19910905 12:00", "19910909 1:00", "H", "M", "3H"), + ("19910905", "19910913 06:00", "2H", "24H", "10H"), + ("19910905", "19910905 01:39", "Min", "5Min", "3Min"), + ("19910905", "19910905 03:18", "2Min", "5Min", "3Min"), + ], + ) + def test_resample_with_offset(self, start, end, start_freq, end_freq, offset): + # GH 23882 & 31809 + s = Series(0, index=period_range(start, end, freq=start_freq)) + s = s + np.arange(len(s)) + result = s.resample(end_freq, offset=offset).mean() + result = result.to_timestamp(end_freq) + + expected = s.to_timestamp().resample(end_freq, offset=offset).mean() + if end_freq == "M": + # TODO: is non-tick the relevant characteristic? (GH 33815) + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "first,last,freq,exp_first,exp_last", + [ + ("19910905", "19920406", "D", "19910905", "19920406"), + ("19910905 00:00", "19920406 06:00", "D", "19910905", "19920406"), + ( + "19910905 06:00", + "19920406 06:00", + "H", + "19910905 06:00", + "19920406 06:00", + ), + ("19910906", "19920406", "M", "1991-09", "1992-04"), + ("19910831", "19920430", "M", "1991-08", "1992-04"), + ("1991-08", "1992-04", "M", "1991-08", "1992-04"), + ], + ) + def test_get_period_range_edges(self, first, last, freq, exp_first, exp_last): + first = Period(first) + last = Period(last) + + exp_first = Period(exp_first, freq=freq) + exp_last = Period(exp_last, freq=freq) + + freq = pd.tseries.frequencies.to_offset(freq) + result = _get_period_range_edges(first, last, freq) + expected = (exp_first, exp_last) + assert result == expected + + def test_sum_min_count(self): + # GH 19974 + index = date_range(start="2018", freq="M", periods=6) + data = np.ones(6) + data[3:6] = np.nan + s = Series(data, index).to_period() + result = s.resample("Q").sum(min_count=1) + expected = Series( + [3.0, np.nan], index=PeriodIndex(["2018Q1", "2018Q2"], freq="Q-DEC") + ) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/test_resample_api.py b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_resample_api.py new file mode 100644 index 0000000..bb49450 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_resample_api.py @@ -0,0 +1,738 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + NamedAgg, + Series, +) +import pandas._testing as tm +from pandas.core.indexes.datetimes import date_range + +dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min") + +test_series = Series(np.random.rand(len(dti)), dti) +_test_frame = DataFrame({"A": test_series, "B": test_series, "C": np.arange(len(dti))}) + + +@pytest.fixture +def test_frame(): + return _test_frame.copy() + + +def test_str(): + + r = test_series.resample("H") + assert ( + "DatetimeIndexResampler [freq=, axis=0, closed=left, " + "label=left, convention=start, origin=start_day]" in str(r) + ) + + r = test_series.resample("H", origin="2000-01-01") + assert ( + "DatetimeIndexResampler [freq=, axis=0, closed=left, " + "label=left, convention=start, origin=2000-01-01 00:00:00]" in str(r) + ) + + +def test_api(): + + r = test_series.resample("H") + result = r.mean() + assert isinstance(result, Series) + assert len(result) == 217 + + r = test_series.to_frame().resample("H") + result = r.mean() + assert isinstance(result, DataFrame) + assert len(result) == 217 + + +def test_groupby_resample_api(): + + # GH 12448 + # .groupby(...).resample(...) hitting warnings + # when appropriate + df = DataFrame( + { + "date": date_range(start="2016-01-01", periods=4, freq="W"), + "group": [1, 1, 2, 2], + "val": [5, 6, 7, 8], + } + ).set_index("date") + + # replication step + i = ( + date_range("2016-01-03", periods=8).tolist() + + date_range("2016-01-17", periods=8).tolist() + ) + index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i], names=["group", "date"]) + expected = DataFrame({"val": [5] * 7 + [6] + [7] * 7 + [8]}, index=index) + result = df.groupby("group").apply(lambda x: x.resample("1D").ffill())[["val"]] + tm.assert_frame_equal(result, expected) + + +def test_groupby_resample_on_api(): + + # GH 15021 + # .groupby(...).resample(on=...) results in an unexpected + # keyword warning. + df = DataFrame( + { + "key": ["A", "B"] * 5, + "dates": date_range("2016-01-01", periods=10), + "values": np.random.randn(10), + } + ) + + expected = df.set_index("dates").groupby("key").resample("D").mean() + + result = df.groupby("key").resample("D", on="dates").mean() + tm.assert_frame_equal(result, expected) + + +def test_pipe(test_frame): + # GH17905 + + # series + r = test_series.resample("H") + expected = r.max() - r.mean() + result = r.pipe(lambda x: x.max() - x.mean()) + tm.assert_series_equal(result, expected) + + # dataframe + r = test_frame.resample("H") + expected = r.max() - r.mean() + result = r.pipe(lambda x: x.max() - x.mean()) + tm.assert_frame_equal(result, expected) + + +def test_getitem(test_frame): + + r = test_frame.resample("H") + tm.assert_index_equal(r._selected_obj.columns, test_frame.columns) + + r = test_frame.resample("H")["B"] + assert r._selected_obj.name == test_frame.columns[1] + + # technically this is allowed + r = test_frame.resample("H")["A", "B"] + tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]]) + + r = test_frame.resample("H")["A", "B"] + tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]]) + + +@pytest.mark.parametrize("key", [["D"], ["A", "D"]]) +def test_select_bad_cols(key, test_frame): + g = test_frame.resample("H") + # 'A' should not be referenced as a bad column... + # will have to rethink regex if you change message! + msg = r"^\"Columns not found: 'D'\"$" + with pytest.raises(KeyError, match=msg): + g[key] + + +def test_attribute_access(test_frame): + + r = test_frame.resample("H") + tm.assert_series_equal(r.A.sum(), r["A"].sum()) + + +def test_api_compat_before_use(): + + # make sure that we are setting the binner + # on these attributes + for attr in ["groups", "ngroups", "indices"]: + rng = date_range("1/1/2012", periods=100, freq="S") + ts = Series(np.arange(len(rng)), index=rng) + rs = ts.resample("30s") + + # before use + getattr(rs, attr) + + # after grouper is initialized is ok + rs.mean() + getattr(rs, attr) + + +def tests_skip_nuisance(test_frame): + + df = test_frame + df["D"] = "foo" + r = df.resample("H") + result = r[["A", "B"]].sum() + expected = pd.concat([r.A.sum(), r.B.sum()], axis=1) + tm.assert_frame_equal(result, expected) + + expected = r[["A", "B", "C"]].sum() + result = r.sum() + tm.assert_frame_equal(result, expected) + + +def test_downsample_but_actually_upsampling(): + + # this is reindex / asfreq + rng = date_range("1/1/2012", periods=100, freq="S") + ts = Series(np.arange(len(rng), dtype="int64"), index=rng) + result = ts.resample("20s").asfreq() + expected = Series( + [0, 20, 40, 60, 80], + index=date_range("2012-01-01 00:00:00", freq="20s", periods=5), + ) + tm.assert_series_equal(result, expected) + + +def test_combined_up_downsampling_of_irregular(): + + # since we are really doing an operation like this + # ts2.resample('2s').mean().ffill() + # preserve these semantics + + rng = date_range("1/1/2012", periods=100, freq="S") + ts = Series(np.arange(len(rng)), index=rng) + ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]] + + result = ts2.resample("2s").mean().ffill() + expected = Series( + [ + 0.5, + 2.5, + 5.0, + 7.0, + 7.0, + 11.0, + 11.0, + 15.0, + 16.0, + 16.0, + 16.0, + 16.0, + 25.0, + 25.0, + 25.0, + 30.0, + ], + index=pd.DatetimeIndex( + [ + "2012-01-01 00:00:00", + "2012-01-01 00:00:02", + "2012-01-01 00:00:04", + "2012-01-01 00:00:06", + "2012-01-01 00:00:08", + "2012-01-01 00:00:10", + "2012-01-01 00:00:12", + "2012-01-01 00:00:14", + "2012-01-01 00:00:16", + "2012-01-01 00:00:18", + "2012-01-01 00:00:20", + "2012-01-01 00:00:22", + "2012-01-01 00:00:24", + "2012-01-01 00:00:26", + "2012-01-01 00:00:28", + "2012-01-01 00:00:30", + ], + dtype="datetime64[ns]", + freq="2S", + ), + ) + tm.assert_series_equal(result, expected) + + +def test_transform(): + + r = test_series.resample("20min") + expected = test_series.groupby(pd.Grouper(freq="20min")).transform("mean") + result = r.transform("mean") + tm.assert_series_equal(result, expected) + + +def test_fillna(): + + # need to upsample here + rng = date_range("1/1/2012", periods=10, freq="2S") + ts = Series(np.arange(len(rng), dtype="int64"), index=rng) + r = ts.resample("s") + + expected = r.ffill() + result = r.fillna(method="ffill") + tm.assert_series_equal(result, expected) + + expected = r.bfill() + result = r.fillna(method="bfill") + tm.assert_series_equal(result, expected) + + msg = ( + r"Invalid fill method\. Expecting pad \(ffill\), backfill " + r"\(bfill\) or nearest\. Got 0" + ) + with pytest.raises(ValueError, match=msg): + r.fillna(0) + + +def test_apply_without_aggregation(): + + # both resample and groupby should work w/o aggregation + r = test_series.resample("20min") + g = test_series.groupby(pd.Grouper(freq="20min")) + + for t in [g, r]: + result = t.apply(lambda x: x) + tm.assert_series_equal(result, test_series) + + +def test_agg_consistency(): + + # make sure that we are consistent across + # similar aggregations with and w/o selection list + df = DataFrame( + np.random.randn(1000, 3), + index=date_range("1/1/2012", freq="S", periods=1000), + columns=["A", "B", "C"], + ) + + r = df.resample("3T") + + msg = r"Column\(s\) \['r1', 'r2'\] do not exist" + with pytest.raises(KeyError, match=msg): + r.agg({"r1": "mean", "r2": "sum"}) + + +def test_agg_consistency_int_str_column_mix(): + # GH#39025 + df = DataFrame( + np.random.randn(1000, 2), + index=date_range("1/1/2012", freq="S", periods=1000), + columns=[1, "a"], + ) + + r = df.resample("3T") + + msg = r"Column\(s\) \[2, 'b'\] do not exist" + with pytest.raises(KeyError, match=msg): + r.agg({2: "mean", "b": "sum"}) + + +# TODO(GH#14008): once GH 14008 is fixed, move these tests into +# `Base` test class + + +def test_agg(): + # test with all three Resampler apis and TimeGrouper + + np.random.seed(1234) + index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") + index.name = "date" + df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index) + df_col = df.reset_index() + df_mult = df_col.copy() + df_mult.index = pd.MultiIndex.from_arrays( + [range(10), df.index], names=["index", "date"] + ) + r = df.resample("2D") + cases = [ + r, + df_col.resample("2D", on="date"), + df_mult.resample("2D", level="date"), + df.groupby(pd.Grouper(freq="2D")), + ] + + a_mean = r["A"].mean() + a_std = r["A"].std() + a_sum = r["A"].sum() + b_mean = r["B"].mean() + b_std = r["B"].std() + b_sum = r["B"].sum() + + expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) + for t in cases: + warn = FutureWarning if t in cases[1:3] else None + with tm.assert_produces_warning( + warn, + match=r"\['date'\] did not aggregate successfully", + ): + # .var on dt64 column raises and is dropped + result = t.aggregate([np.mean, np.std]) + tm.assert_frame_equal(result, expected) + + expected = pd.concat([a_mean, b_std], axis=1) + for t in cases: + result = t.aggregate({"A": np.mean, "B": np.std}) + tm.assert_frame_equal(result, expected, check_like=True) + + result = t.aggregate(A=("A", np.mean), B=("B", np.std)) + tm.assert_frame_equal(result, expected, check_like=True) + + result = t.aggregate(A=NamedAgg("A", np.mean), B=NamedAgg("B", np.std)) + tm.assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([a_mean, a_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")]) + for t in cases: + result = t.aggregate({"A": ["mean", "std"]}) + tm.assert_frame_equal(result, expected) + + expected = pd.concat([a_mean, a_sum], axis=1) + expected.columns = ["mean", "sum"] + for t in cases: + result = t["A"].aggregate(["mean", "sum"]) + tm.assert_frame_equal(result, expected) + + result = t["A"].aggregate(mean="mean", sum="sum") + tm.assert_frame_equal(result, expected) + + msg = "nested renamer is not supported" + for t in cases: + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t.aggregate({"A": {"mean": "mean", "sum": "sum"}}) + + expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) + expected.columns = pd.MultiIndex.from_tuples( + [("A", "mean"), ("A", "sum"), ("B", "mean2"), ("B", "sum2")] + ) + for t in cases: + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t.aggregate( + { + "A": {"mean": "mean", "sum": "sum"}, + "B": {"mean2": "mean", "sum2": "sum"}, + } + ) + + expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples( + [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")] + ) + for t in cases: + result = t.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]}) + tm.assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) + expected.columns = pd.MultiIndex.from_tuples( + [ + ("r1", "A", "mean"), + ("r1", "A", "sum"), + ("r2", "B", "mean"), + ("r2", "B", "sum"), + ] + ) + + +def test_agg_misc(): + # test with all three Resampler apis and TimeGrouper + + np.random.seed(1234) + index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") + index.name = "date" + df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index) + df_col = df.reset_index() + df_mult = df_col.copy() + df_mult.index = pd.MultiIndex.from_arrays( + [range(10), df.index], names=["index", "date"] + ) + + r = df.resample("2D") + cases = [ + r, + df_col.resample("2D", on="date"), + df_mult.resample("2D", level="date"), + df.groupby(pd.Grouper(freq="2D")), + ] + + # passed lambda + for t in cases: + result = t.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)}) + rcustom = t["B"].apply(lambda x: np.std(x, ddof=1)) + expected = pd.concat([r["A"].sum(), rcustom], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) + + result = t.agg(A=("A", np.sum), B=("B", lambda x: np.std(x, ddof=1))) + tm.assert_frame_equal(result, expected, check_like=True) + + result = t.agg( + A=NamedAgg("A", np.sum), B=NamedAgg("B", lambda x: np.std(x, ddof=1)) + ) + tm.assert_frame_equal(result, expected, check_like=True) + + # agg with renamers + expected = pd.concat( + [t["A"].sum(), t["B"].sum(), t["A"].mean(), t["B"].mean()], axis=1 + ) + expected.columns = pd.MultiIndex.from_tuples( + [("result1", "A"), ("result1", "B"), ("result2", "A"), ("result2", "B")] + ) + + msg = r"Column\(s\) \['result1', 'result2'\] do not exist" + for t in cases: + with pytest.raises(KeyError, match=msg): + t[["A", "B"]].agg({"result1": np.sum, "result2": np.mean}) + + with pytest.raises(KeyError, match=msg): + t[["A", "B"]].agg(A=("result1", np.sum), B=("result2", np.mean)) + + with pytest.raises(KeyError, match=msg): + t[["A", "B"]].agg( + A=NamedAgg("result1", np.sum), B=NamedAgg("result2", np.mean) + ) + + # agg with different hows + expected = pd.concat( + [t["A"].sum(), t["A"].std(), t["B"].mean(), t["B"].std()], axis=1 + ) + expected.columns = pd.MultiIndex.from_tuples( + [("A", "sum"), ("A", "std"), ("B", "mean"), ("B", "std")] + ) + for t in cases: + result = t.agg({"A": ["sum", "std"], "B": ["mean", "std"]}) + tm.assert_frame_equal(result, expected, check_like=True) + + # equivalent of using a selection list / or not + for t in cases: + result = t[["A", "B"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]}) + tm.assert_frame_equal(result, expected, check_like=True) + + msg = "nested renamer is not supported" + + # series like aggs + for t in cases: + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t["A"].agg({"A": ["sum", "std"]}) + + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t["A"].agg({"A": ["sum", "std"], "B": ["mean", "std"]}) + + # errors + # invalid names in the agg specification + msg = r"Column\(s\) \['B'\] do not exist" + for t in cases: + with pytest.raises(KeyError, match=msg): + t[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]}) + + +def test_agg_nested_dicts(): + + np.random.seed(1234) + index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") + index.name = "date" + df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index) + df_col = df.reset_index() + df_mult = df_col.copy() + df_mult.index = pd.MultiIndex.from_arrays( + [range(10), df.index], names=["index", "date"] + ) + r = df.resample("2D") + cases = [ + r, + df_col.resample("2D", on="date"), + df_mult.resample("2D", level="date"), + df.groupby(pd.Grouper(freq="2D")), + ] + + msg = "nested renamer is not supported" + for t in cases: + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}}) + + for t in cases: + + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t[["A", "B"]].agg( + {"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}} + ) + + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) + + +def test_try_aggregate_non_existing_column(): + # GH 16766 + data = [ + {"dt": datetime(2017, 6, 1, 0), "x": 1.0, "y": 2.0}, + {"dt": datetime(2017, 6, 1, 1), "x": 2.0, "y": 2.0}, + {"dt": datetime(2017, 6, 1, 2), "x": 3.0, "y": 1.5}, + ] + df = DataFrame(data).set_index("dt") + + # Error as we don't have 'z' column + msg = r"Column\(s\) \['z'\] do not exist" + with pytest.raises(KeyError, match=msg): + df.resample("30T").agg({"x": ["mean"], "y": ["median"], "z": ["sum"]}) + + +def test_selection_api_validation(): + # GH 13500 + index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") + + rng = np.arange(len(index), dtype=np.int64) + df = DataFrame( + {"date": index, "a": rng}, + index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]), + ) + df_exp = DataFrame({"a": rng}, index=index) + + # non DatetimeIndex + msg = ( + "Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, " + "but got an instance of 'Int64Index'" + ) + with pytest.raises(TypeError, match=msg): + df.resample("2D", level="v") + + msg = "The Grouper cannot specify both a key and a level!" + with pytest.raises(ValueError, match=msg): + df.resample("2D", on="date", level="d") + + msg = "unhashable type: 'list'" + with pytest.raises(TypeError, match=msg): + df.resample("2D", on=["a", "date"]) + + msg = r"\"Level \['a', 'date'\] not found\"" + with pytest.raises(KeyError, match=msg): + df.resample("2D", level=["a", "date"]) + + # upsampling not allowed + msg = ( + "Upsampling from level= or on= selection is not supported, use " + r"\.set_index\(\.\.\.\) to explicitly set index to datetime-like" + ) + with pytest.raises(ValueError, match=msg): + df.resample("2D", level="d").asfreq() + with pytest.raises(ValueError, match=msg): + df.resample("2D", on="date").asfreq() + + exp = df_exp.resample("2D").sum() + exp.index.name = "date" + tm.assert_frame_equal(exp, df.resample("2D", on="date").sum()) + + exp.index.name = "d" + tm.assert_frame_equal(exp, df.resample("2D", level="d").sum()) + + +@pytest.mark.parametrize( + "col_name", ["t2", "t2x", "t2q", "T_2M", "t2p", "t2m", "t2m1", "T2M"] +) +def test_agg_with_datetime_index_list_agg_func(col_name): + # GH 22660 + # The parametrized column names would get converted to dates by our + # date parser. Some would result in OutOfBoundsError (ValueError) while + # others would result in OverflowError when passed into Timestamp. + # We catch these errors and move on to the correct branch. + df = DataFrame( + list(range(200)), + index=date_range( + start="2017-01-01", freq="15min", periods=200, tz="Europe/Berlin" + ), + columns=[col_name], + ) + result = df.resample("1d").aggregate(["mean"]) + expected = DataFrame( + [47.5, 143.5, 195.5], + index=date_range(start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"), + columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]), + ) + tm.assert_frame_equal(result, expected) + + +def test_resample_agg_readonly(): + # GH#31710 cython needs to allow readonly data + index = date_range("2020-01-01", "2020-01-02", freq="1h") + arr = np.zeros_like(index) + arr.setflags(write=False) + + ser = Series(arr, index=index) + rs = ser.resample("1D") + + expected = Series([pd.Timestamp(0), pd.Timestamp(0)], index=index[::24]) + + result = rs.agg("last") + tm.assert_series_equal(result, expected) + + result = rs.agg("first") + tm.assert_series_equal(result, expected) + + result = rs.agg("max") + tm.assert_series_equal(result, expected) + + result = rs.agg("min") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "start,end,freq,data,resample_freq,origin,closed,exp_data,exp_end,exp_periods", + [ + ( + "2000-10-01 23:30:00", + "2000-10-02 00:26:00", + "7min", + [0, 3, 6, 9, 12, 15, 18, 21, 24], + "17min", + "end", + None, + [0, 18, 27, 63], + "20001002 00:26:00", + 4, + ), + ( + "20200101 8:26:35", + "20200101 9:31:58", + "77s", + [1] * 51, + "7min", + "end", + "right", + [1, 6, 5, 6, 5, 6, 5, 6, 5, 6], + "2020-01-01 09:30:45", + 10, + ), + ( + "2000-10-01 23:30:00", + "2000-10-02 00:26:00", + "7min", + [0, 3, 6, 9, 12, 15, 18, 21, 24], + "17min", + "end", + "left", + [0, 18, 27, 39, 24], + "20001002 00:43:00", + 5, + ), + ( + "2000-10-01 23:30:00", + "2000-10-02 00:26:00", + "7min", + [0, 3, 6, 9, 12, 15, 18, 21, 24], + "17min", + "end_day", + None, + [3, 15, 45, 45], + "2000-10-02 00:29:00", + 4, + ), + ], +) +def test_end_and_end_day_origin( + start, + end, + freq, + data, + resample_freq, + origin, + closed, + exp_data, + exp_end, + exp_periods, +): + rng = date_range(start, end, freq=freq) + ts = Series(data, index=rng) + + res = ts.resample(resample_freq, origin=origin, closed=closed).sum() + expected = Series( + exp_data, + index=date_range(end=exp_end, freq=resample_freq, periods=exp_periods), + ) + + tm.assert_series_equal(res, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/test_resampler_grouper.py b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_resampler_grouper.py new file mode 100644 index 0000000..dab3404 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_resampler_grouper.py @@ -0,0 +1,464 @@ +from textwrap import dedent + +import numpy as np +import pytest + +import pandas.util._test_decorators as td +from pandas.util._test_decorators import async_mark + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + TimedeltaIndex, + Timestamp, +) +import pandas._testing as tm +from pandas.core.api import Int64Index +from pandas.core.indexes.datetimes import date_range + +test_frame = DataFrame( + {"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)}, + index=date_range("1/1/2000", freq="s", periods=40), +) + + +@async_mark() +@td.check_file_leaks +async def test_tab_complete_ipython6_warning(ip): + from IPython.core.completer import provisionalcompleter + + code = dedent( + """\ + import pandas._testing as tm + s = tm.makeTimeSeries() + rs = s.resample("D") + """ + ) + await ip.run_code(code) + + # GH 31324 newer jedi version raises Deprecation warning; + # appears resolved 2021-02-02 + with tm.assert_produces_warning(None): + with provisionalcompleter("ignore"): + list(ip.Completer.completions("rs.", 1)) + + +def test_deferred_with_groupby(): + + # GH 12486 + # support deferred resample ops with groupby + data = [ + ["2010-01-01", "A", 2], + ["2010-01-02", "A", 3], + ["2010-01-05", "A", 8], + ["2010-01-10", "A", 7], + ["2010-01-13", "A", 3], + ["2010-01-01", "B", 5], + ["2010-01-03", "B", 2], + ["2010-01-04", "B", 1], + ["2010-01-11", "B", 7], + ["2010-01-14", "B", 3], + ] + + df = DataFrame(data, columns=["date", "id", "score"]) + df.date = pd.to_datetime(df.date) + + def f(x): + return x.set_index("date").resample("D").asfreq() + + expected = df.groupby("id").apply(f) + result = df.set_index("date").groupby("id").resample("D").asfreq() + tm.assert_frame_equal(result, expected) + + df = DataFrame( + { + "date": date_range(start="2016-01-01", periods=4, freq="W"), + "group": [1, 1, 2, 2], + "val": [5, 6, 7, 8], + } + ).set_index("date") + + def f(x): + return x.resample("1D").ffill() + + expected = df.groupby("group").apply(f) + result = df.groupby("group").resample("1D").ffill() + tm.assert_frame_equal(result, expected) + + +def test_getitem(): + g = test_frame.groupby("A") + + expected = g.B.apply(lambda x: x.resample("2s").mean()) + + result = g.resample("2s").B.mean() + tm.assert_series_equal(result, expected) + + result = g.B.resample("2s").mean() + tm.assert_series_equal(result, expected) + + result = g.resample("2s").mean().B + tm.assert_series_equal(result, expected) + + +def test_getitem_multiple(): + + # GH 13174 + # multiple calls after selection causing an issue with aliasing + data = [{"id": 1, "buyer": "A"}, {"id": 2, "buyer": "B"}] + df = DataFrame(data, index=date_range("2016-01-01", periods=2)) + r = df.groupby("id").resample("1D") + result = r["buyer"].count() + expected = Series( + [1, 1], + index=pd.MultiIndex.from_tuples( + [(1, Timestamp("2016-01-01")), (2, Timestamp("2016-01-02"))], + names=["id", None], + ), + name="buyer", + ) + tm.assert_series_equal(result, expected) + + result = r["buyer"].count() + tm.assert_series_equal(result, expected) + + +def test_groupby_resample_on_api_with_getitem(): + # GH 17813 + df = DataFrame( + {"id": list("aabbb"), "date": date_range("1-1-2016", periods=5), "data": 1} + ) + exp = df.set_index("date").groupby("id").resample("2D")["data"].sum() + result = df.groupby("id").resample("2D", on="date")["data"].sum() + tm.assert_series_equal(result, exp) + + +def test_groupby_with_origin(): + # GH 31809 + + freq = "1399min" # prime number that is smaller than 24h + start, end = "1/1/2000 00:00:00", "1/31/2000 00:00" + middle = "1/15/2000 00:00:00" + + rng = date_range(start, end, freq="1231min") # prime number + ts = Series(np.random.randn(len(rng)), index=rng) + ts2 = ts[middle:end] + + # proves that grouper without a fixed origin does not work + # when dealing with unusual frequencies + simple_grouper = pd.Grouper(freq=freq) + count_ts = ts.groupby(simple_grouper).agg("count") + count_ts = count_ts[middle:end] + count_ts2 = ts2.groupby(simple_grouper).agg("count") + with pytest.raises(AssertionError, match="Index are different"): + tm.assert_index_equal(count_ts.index, count_ts2.index) + + # test origin on 1970-01-01 00:00:00 + origin = Timestamp(0) + adjusted_grouper = pd.Grouper(freq=freq, origin=origin) + adjusted_count_ts = ts.groupby(adjusted_grouper).agg("count") + adjusted_count_ts = adjusted_count_ts[middle:end] + adjusted_count_ts2 = ts2.groupby(adjusted_grouper).agg("count") + tm.assert_series_equal(adjusted_count_ts, adjusted_count_ts2) + + # test origin on 2049-10-18 20:00:00 + origin_future = Timestamp(0) + pd.Timedelta("1399min") * 30_000 + adjusted_grouper2 = pd.Grouper(freq=freq, origin=origin_future) + adjusted2_count_ts = ts.groupby(adjusted_grouper2).agg("count") + adjusted2_count_ts = adjusted2_count_ts[middle:end] + adjusted2_count_ts2 = ts2.groupby(adjusted_grouper2).agg("count") + tm.assert_series_equal(adjusted2_count_ts, adjusted2_count_ts2) + + # both grouper use an adjusted timestamp that is a multiple of 1399 min + # they should be equals even if the adjusted_timestamp is in the future + tm.assert_series_equal(adjusted_count_ts, adjusted2_count_ts2) + + +def test_nearest(): + + # GH 17496 + # Resample nearest + index = date_range("1/1/2000", periods=3, freq="T") + result = Series(range(3), index=index).resample("20s").nearest() + + expected = Series( + [0, 0, 1, 1, 1, 2, 2], + index=pd.DatetimeIndex( + [ + "2000-01-01 00:00:00", + "2000-01-01 00:00:20", + "2000-01-01 00:00:40", + "2000-01-01 00:01:00", + "2000-01-01 00:01:20", + "2000-01-01 00:01:40", + "2000-01-01 00:02:00", + ], + dtype="datetime64[ns]", + freq="20S", + ), + ) + tm.assert_series_equal(result, expected) + + +def test_methods(): + g = test_frame.groupby("A") + r = g.resample("2s") + + for f in ["first", "last", "median", "sem", "sum", "mean", "min", "max"]: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample("2s"), f)()) + tm.assert_frame_equal(result, expected) + + for f in ["size"]: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample("2s"), f)()) + tm.assert_series_equal(result, expected) + + for f in ["count"]: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample("2s"), f)()) + tm.assert_frame_equal(result, expected) + + # series only + for f in ["nunique"]: + result = getattr(r.B, f)() + expected = g.B.apply(lambda x: getattr(x.resample("2s"), f)()) + tm.assert_series_equal(result, expected) + + for f in ["nearest", "bfill", "ffill", "asfreq"]: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample("2s"), f)()) + tm.assert_frame_equal(result, expected) + + result = r.ohlc() + expected = g.apply(lambda x: x.resample("2s").ohlc()) + tm.assert_frame_equal(result, expected) + + for f in ["std", "var"]: + result = getattr(r, f)(ddof=1) + expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1)) + tm.assert_frame_equal(result, expected) + + +def test_apply(): + + g = test_frame.groupby("A") + r = g.resample("2s") + + # reduction + expected = g.resample("2s").sum() + + def f(x): + return x.resample("2s").sum() + + result = r.apply(f) + tm.assert_frame_equal(result, expected) + + def f(x): + return x.resample("2s").apply(lambda y: y.sum()) + + result = g.apply(f) + # y.sum() results in int64 instead of int32 on 32-bit architectures + expected = expected.astype("int64") + tm.assert_frame_equal(result, expected) + + +def test_apply_with_mutated_index(): + # GH 15169 + index = date_range("1-1-2015", "12-31-15", freq="D") + df = DataFrame(data={"col1": np.random.rand(len(index))}, index=index) + + def f(x): + s = Series([1, 2], index=["a", "b"]) + return s + + expected = df.groupby(pd.Grouper(freq="M")).apply(f) + + result = df.resample("M").apply(f) + tm.assert_frame_equal(result, expected) + + # A case for series + expected = df["col1"].groupby(pd.Grouper(freq="M")).apply(f) + result = df["col1"].resample("M").apply(f) + tm.assert_series_equal(result, expected) + + +def test_apply_columns_multilevel(): + # GH 16231 + cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"), ("B", "b", "i", "two")]) + ind = date_range(start="2017-01-01", freq="15Min", periods=8) + df = DataFrame(np.array([0] * 16).reshape(8, 2), index=ind, columns=cols) + agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns} + result = df.resample("H").apply(lambda x: agg_dict[x.name](x)) + expected = DataFrame( + 2 * [[0, 0.0]], + index=date_range(start="2017-01-01", freq="1H", periods=2), + columns=pd.MultiIndex.from_tuples( + [("A", "a", "", "one"), ("B", "b", "i", "two")] + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_resample_groupby_with_label(): + # GH 13235 + index = date_range("2000-01-01", freq="2D", periods=5) + df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]}) + result = df.groupby("col0").resample("1W", label="left").sum() + + mi = [ + np.array([0, 0, 1, 2]), + pd.to_datetime( + np.array(["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"]) + ), + ] + mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None]) + expected = DataFrame( + data={"col0": [0, 0, 2, 2], "col1": [1, 1, 2, 1]}, index=mindex + ) + + tm.assert_frame_equal(result, expected) + + +def test_consistency_with_window(): + + # consistent return values with window + df = test_frame + expected = Int64Index([1, 2, 3], name="A") + result = df.groupby("A").resample("2s").mean() + assert result.index.nlevels == 2 + tm.assert_index_equal(result.index.levels[0], expected) + + result = df.groupby("A").rolling(20).mean() + assert result.index.nlevels == 2 + tm.assert_index_equal(result.index.levels[0], expected) + + +def test_median_duplicate_columns(): + # GH 14233 + + df = DataFrame( + np.random.randn(20, 3), + columns=list("aaa"), + index=date_range("2012-01-01", periods=20, freq="s"), + ) + df2 = df.copy() + df2.columns = ["a", "b", "c"] + expected = df2.resample("5s").median() + result = df.resample("5s").median() + expected.columns = result.columns + tm.assert_frame_equal(result, expected) + + +def test_apply_to_one_column_of_df(): + # GH: 36951 + df = DataFrame( + {"col": range(10), "col1": range(10, 20)}, + index=date_range("2012-01-01", periods=10, freq="20min"), + ) + + # access "col" via getattr -> make sure we handle AttributeError + result = df.resample("H").apply(lambda group: group.col.sum()) + expected = Series( + [3, 12, 21, 9], index=date_range("2012-01-01", periods=4, freq="H") + ) + tm.assert_series_equal(result, expected) + + # access "col" via _getitem__ -> make sure we handle KeyErrpr + result = df.resample("H").apply(lambda group: group["col"].sum()) + tm.assert_series_equal(result, expected) + + +def test_resample_groupby_agg(): + # GH: 33548 + df = DataFrame( + { + "cat": [ + "cat_1", + "cat_1", + "cat_2", + "cat_1", + "cat_2", + "cat_1", + "cat_2", + "cat_1", + ], + "num": [5, 20, 22, 3, 4, 30, 10, 50], + "date": [ + "2019-2-1", + "2018-02-03", + "2020-3-11", + "2019-2-2", + "2019-2-2", + "2018-12-4", + "2020-3-11", + "2020-12-12", + ], + } + ) + df["date"] = pd.to_datetime(df["date"]) + + resampled = df.groupby("cat").resample("Y", on="date") + expected = resampled.sum() + result = resampled.agg({"num": "sum"}) + + tm.assert_frame_equal(result, expected) + + +def test_resample_groupby_agg_listlike(): + # GH 42905 + ts = Timestamp("2021-02-28 00:00:00") + df = DataFrame({"class": ["beta"], "value": [69]}, index=Index([ts], name="date")) + resampled = df.groupby("class").resample("M")["value"] + result = resampled.agg(["sum", "size"]) + expected = DataFrame( + [[69, 1]], + index=pd.MultiIndex.from_tuples([("beta", ts)], names=["class", "date"]), + columns=["sum", "size"], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("keys", [["a"], ["a", "b"]]) +def test_empty(keys): + # GH 26411 + df = DataFrame([], columns=["a", "b"], index=TimedeltaIndex([])) + result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean() + expected = DataFrame(columns=["a", "b"]).set_index(keys, drop=False) + if len(keys) == 1: + expected.index.name = keys[0] + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("consolidate", [True, False]) +def test_resample_groupby_agg_object_dtype_all_nan(consolidate): + # https://github.com/pandas-dev/pandas/issues/39329 + + dates = date_range("2020-01-01", periods=15, freq="D") + df1 = DataFrame({"key": "A", "date": dates, "col1": range(15), "col_object": "val"}) + df2 = DataFrame({"key": "B", "date": dates, "col1": range(15)}) + df = pd.concat([df1, df2], ignore_index=True) + if consolidate: + df = df._consolidate() + + result = df.groupby(["key"]).resample("W", on="date").min() + idx = pd.MultiIndex.from_arrays( + [ + ["A"] * 3 + ["B"] * 3, + pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2), + ], + names=["key", "date"], + ) + expected = DataFrame( + { + "key": ["A"] * 3 + ["B"] * 3, + "date": pd.to_datetime(["2020-01-01", "2020-01-06", "2020-01-13"] * 2), + "col1": [0, 5, 12] * 2, + "col_object": ["val"] * 3 + [np.nan] * 3, + }, + index=idx, + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/test_time_grouper.py b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_time_grouper.py new file mode 100644 index 0000000..2e512d7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_time_grouper.py @@ -0,0 +1,357 @@ +from datetime import datetime +from operator import methodcaller + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, + Timestamp, +) +import pandas._testing as tm +from pandas.core.groupby.grouper import Grouper +from pandas.core.indexes.datetimes import date_range + +test_series = Series(np.random.randn(1000), index=date_range("1/1/2000", periods=1000)) + + +def test_apply(): + grouper = Grouper(freq="A", label="right", closed="right") + + grouped = test_series.groupby(grouper) + + def f(x): + return x.sort_values()[-3:] + + applied = grouped.apply(f) + expected = test_series.groupby(lambda x: x.year).apply(f) + + applied.index = applied.index.droplevel(0) + expected.index = expected.index.droplevel(0) + tm.assert_series_equal(applied, expected) + + +def test_count(): + test_series[::3] = np.nan + + expected = test_series.groupby(lambda x: x.year).count() + + grouper = Grouper(freq="A", label="right", closed="right") + result = test_series.groupby(grouper).count() + expected.index = result.index + tm.assert_series_equal(result, expected) + + result = test_series.resample("A").count() + expected.index = result.index + tm.assert_series_equal(result, expected) + + +def test_numpy_reduction(): + result = test_series.resample("A", closed="right").prod() + + expected = test_series.groupby(lambda x: x.year).agg(np.prod) + expected.index = result.index + + tm.assert_series_equal(result, expected) + + +def test_apply_iteration(): + # #2300 + N = 1000 + ind = date_range(start="2000-01-01", freq="D", periods=N) + df = DataFrame({"open": 1, "close": 2}, index=ind) + tg = Grouper(freq="M") + + _, grouper, _ = tg._get_grouper(df) + + # Errors + grouped = df.groupby(grouper, group_keys=False) + + def f(df): + return df["close"] / df["open"] + + # it works! + result = grouped.apply(f) + tm.assert_index_equal(result.index, df.index) + + +@pytest.mark.parametrize( + "name, func", + [ + ("Int64Index", tm.makeIntIndex), + ("Index", tm.makeUnicodeIndex), + ("Float64Index", tm.makeFloatIndex), + ("MultiIndex", lambda m: tm.makeCustomIndex(m, 2)), + ], +) +def test_fails_on_no_datetime_index(name, func): + n = 2 + index = func(n) + df = DataFrame({"a": np.random.randn(n)}, index=index) + + msg = ( + "Only valid with DatetimeIndex, TimedeltaIndex " + f"or PeriodIndex, but got an instance of '{name}'" + ) + with pytest.raises(TypeError, match=msg): + df.groupby(Grouper(freq="D")) + + +def test_aaa_group_order(): + # GH 12840 + # check TimeGrouper perform stable sorts + n = 20 + data = np.random.randn(n, 4) + df = DataFrame(data, columns=["A", "B", "C", "D"]) + df["key"] = [ + datetime(2013, 1, 1), + datetime(2013, 1, 2), + datetime(2013, 1, 3), + datetime(2013, 1, 4), + datetime(2013, 1, 5), + ] * 4 + grouped = df.groupby(Grouper(key="key", freq="D")) + + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5]) + + +def test_aggregate_normal(resample_method): + """Check TimeGrouper's aggregation is identical as normal groupby.""" + + data = np.random.randn(20, 4) + normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) + normal_df["key"] = [1, 2, 3, 4, 5] * 4 + + dt_df = DataFrame(data, columns=["A", "B", "C", "D"]) + dt_df["key"] = [ + datetime(2013, 1, 1), + datetime(2013, 1, 2), + datetime(2013, 1, 3), + datetime(2013, 1, 4), + datetime(2013, 1, 5), + ] * 4 + + normal_grouped = normal_df.groupby("key") + dt_grouped = dt_df.groupby(Grouper(key="key", freq="D")) + + expected = getattr(normal_grouped, resample_method)() + dt_result = getattr(dt_grouped, resample_method)() + expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key") + tm.assert_equal(expected, dt_result) + + # if TimeGrouper is used included, 'nth' doesn't work yet + + """ + for func in ['nth']: + expected = getattr(normal_grouped, func)(3) + expected.index = date_range(start='2013-01-01', + freq='D', periods=5, name='key') + dt_result = getattr(dt_grouped, func)(3) + tm.assert_frame_equal(expected, dt_result) + """ + + +@pytest.mark.parametrize( + "method, method_args, unit", + [ + ("sum", {}, 0), + ("sum", {"min_count": 0}, 0), + ("sum", {"min_count": 1}, np.nan), + ("prod", {}, 1), + ("prod", {"min_count": 0}, 1), + ("prod", {"min_count": 1}, np.nan), + ], +) +def test_resample_entirely_nat_window(method, method_args, unit): + s = Series([0] * 2 + [np.nan] * 2, index=date_range("2017", periods=4)) + result = methodcaller(method, **method_args)(s.resample("2d")) + expected = Series( + [0.0, unit], index=pd.DatetimeIndex(["2017-01-01", "2017-01-03"], freq="2D") + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "func, fill_value", + [("min", np.nan), ("max", np.nan), ("sum", 0), ("prod", 1), ("count", 0)], +) +def test_aggregate_with_nat(func, fill_value): + # check TimeGrouper's aggregation is identical as normal groupby + # if NaT is included, 'var', 'std', 'mean', 'first','last' + # and 'nth' doesn't work yet + + n = 20 + data = np.random.randn(n, 4).astype("int64") + normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) + normal_df["key"] = [1, 2, np.nan, 4, 5] * 4 + + dt_df = DataFrame(data, columns=["A", "B", "C", "D"]) + dt_df["key"] = [ + datetime(2013, 1, 1), + datetime(2013, 1, 2), + pd.NaT, + datetime(2013, 1, 4), + datetime(2013, 1, 5), + ] * 4 + + normal_grouped = normal_df.groupby("key") + dt_grouped = dt_df.groupby(Grouper(key="key", freq="D")) + + normal_result = getattr(normal_grouped, func)() + dt_result = getattr(dt_grouped, func)() + + pad = DataFrame([[fill_value] * 4], index=[3], columns=["A", "B", "C", "D"]) + expected = pd.concat([normal_result, pad]) + expected = expected.sort_index() + dti = date_range(start="2013-01-01", freq="D", periods=5, name="key") + expected.index = dti._with_freq(None) # TODO: is this desired? + tm.assert_frame_equal(expected, dt_result) + assert dt_result.index.name == "key" + + +def test_aggregate_with_nat_size(): + # GH 9925 + n = 20 + data = np.random.randn(n, 4).astype("int64") + normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) + normal_df["key"] = [1, 2, np.nan, 4, 5] * 4 + + dt_df = DataFrame(data, columns=["A", "B", "C", "D"]) + dt_df["key"] = [ + datetime(2013, 1, 1), + datetime(2013, 1, 2), + pd.NaT, + datetime(2013, 1, 4), + datetime(2013, 1, 5), + ] * 4 + + normal_grouped = normal_df.groupby("key") + dt_grouped = dt_df.groupby(Grouper(key="key", freq="D")) + + normal_result = normal_grouped.size() + dt_result = dt_grouped.size() + + pad = Series([0], index=[3]) + expected = pd.concat([normal_result, pad]) + expected = expected.sort_index() + expected.index = date_range( + start="2013-01-01", freq="D", periods=5, name="key" + )._with_freq(None) + tm.assert_series_equal(expected, dt_result) + assert dt_result.index.name == "key" + + +def test_repr(): + # GH18203 + result = repr(Grouper(key="A", freq="H")) + expected = ( + "TimeGrouper(key='A', freq=, axis=0, sort=True, " + "closed='left', label='left', how='mean', " + "convention='e', origin='start_day')" + ) + assert result == expected + + result = repr(Grouper(key="A", freq="H", origin="2000-01-01")) + expected = ( + "TimeGrouper(key='A', freq=, axis=0, sort=True, " + "closed='left', label='left', how='mean', " + "convention='e', origin=Timestamp('2000-01-01 00:00:00'))" + ) + assert result == expected + + +@pytest.mark.parametrize( + "method, method_args, expected_values", + [ + ("sum", {}, [1, 0, 1]), + ("sum", {"min_count": 0}, [1, 0, 1]), + ("sum", {"min_count": 1}, [1, np.nan, 1]), + ("sum", {"min_count": 2}, [np.nan, np.nan, np.nan]), + ("prod", {}, [1, 1, 1]), + ("prod", {"min_count": 0}, [1, 1, 1]), + ("prod", {"min_count": 1}, [1, np.nan, 1]), + ("prod", {"min_count": 2}, [np.nan, np.nan, np.nan]), + ], +) +def test_upsample_sum(method, method_args, expected_values): + s = Series(1, index=date_range("2017", periods=2, freq="H")) + resampled = s.resample("30T") + index = pd.DatetimeIndex( + ["2017-01-01T00:00:00", "2017-01-01T00:30:00", "2017-01-01T01:00:00"], + freq="30T", + ) + result = methodcaller(method, **method_args)(resampled) + expected = Series(expected_values, index=index) + tm.assert_series_equal(result, expected) + + +def test_groupby_resample_interpolate(): + # GH 35325 + d = {"price": [10, 11, 9], "volume": [50, 60, 50]} + + df = DataFrame(d) + + df["week_starting"] = date_range("01/01/2018", periods=3, freq="W") + + result = ( + df.set_index("week_starting") + .groupby("volume") + .resample("1D") + .interpolate(method="linear") + ) + + msg = "containing strings is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + expected_ind = pd.MultiIndex.from_tuples( + [ + (50, "2018-01-07"), + (50, Timestamp("2018-01-08")), + (50, Timestamp("2018-01-09")), + (50, Timestamp("2018-01-10")), + (50, Timestamp("2018-01-11")), + (50, Timestamp("2018-01-12")), + (50, Timestamp("2018-01-13")), + (50, Timestamp("2018-01-14")), + (50, Timestamp("2018-01-15")), + (50, Timestamp("2018-01-16")), + (50, Timestamp("2018-01-17")), + (50, Timestamp("2018-01-18")), + (50, Timestamp("2018-01-19")), + (50, Timestamp("2018-01-20")), + (50, Timestamp("2018-01-21")), + (60, Timestamp("2018-01-14")), + ], + names=["volume", "week_starting"], + ) + + expected = DataFrame( + data={ + "price": [ + 10.0, + 9.928571428571429, + 9.857142857142858, + 9.785714285714286, + 9.714285714285714, + 9.642857142857142, + 9.571428571428571, + 9.5, + 9.428571428571429, + 9.357142857142858, + 9.285714285714286, + 9.214285714285714, + 9.142857142857142, + 9.071428571428571, + 9.0, + 11.0, + ], + "volume": [50.0] * 15 + [60], + }, + index=expected_ind, + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/resample/test_timedelta.py b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_timedelta.py new file mode 100644 index 0000000..d55dbfc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/resample/test_timedelta.py @@ -0,0 +1,193 @@ +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm +from pandas.core.indexes.timedeltas import timedelta_range + + +def test_asfreq_bug(): + df = DataFrame(data=[1, 3], index=[timedelta(), timedelta(minutes=3)]) + result = df.resample("1T").asfreq() + expected = DataFrame( + data=[1, np.nan, np.nan, 3], + index=timedelta_range("0 day", periods=4, freq="1T"), + ) + tm.assert_frame_equal(result, expected) + + +def test_resample_with_nat(): + # GH 13223 + index = pd.to_timedelta(["0s", pd.NaT, "2s"]) + result = DataFrame({"value": [2, 3, 5]}, index).resample("1s").mean() + expected = DataFrame( + {"value": [2.5, np.nan, 5.0]}, + index=timedelta_range("0 day", periods=3, freq="1S"), + ) + tm.assert_frame_equal(result, expected) + + +def test_resample_as_freq_with_subperiod(): + # GH 13022 + index = timedelta_range("00:00:00", "00:10:00", freq="5T") + df = DataFrame(data={"value": [1, 5, 10]}, index=index) + result = df.resample("2T").asfreq() + expected_data = {"value": [1, np.nan, np.nan, np.nan, np.nan, 10]} + expected = DataFrame( + data=expected_data, index=timedelta_range("00:00:00", "00:10:00", freq="2T") + ) + tm.assert_frame_equal(result, expected) + + +def test_resample_with_timedeltas(): + + expected = DataFrame({"A": np.arange(1480)}) + expected = expected.groupby(expected.index // 30).sum() + expected.index = timedelta_range("0 days", freq="30T", periods=50) + + df = DataFrame( + {"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), unit="T") + ) + result = df.resample("30T").sum() + + tm.assert_frame_equal(result, expected) + + s = df["A"] + result = s.resample("30T").sum() + tm.assert_series_equal(result, expected["A"]) + + +def test_resample_single_period_timedelta(): + + s = Series(list(range(5)), index=timedelta_range("1 day", freq="s", periods=5)) + result = s.resample("2s").sum() + expected = Series([1, 5, 4], index=timedelta_range("1 day", freq="2s", periods=3)) + tm.assert_series_equal(result, expected) + + +def test_resample_timedelta_idempotency(): + + # GH 12072 + index = timedelta_range("0", periods=9, freq="10L") + series = Series(range(9), index=index) + result = series.resample("10L").mean() + expected = series.astype(float) + tm.assert_series_equal(result, expected) + + +def test_resample_offset_with_timedeltaindex(): + # GH 10530 & 31809 + rng = timedelta_range(start="0s", periods=25, freq="s") + ts = Series(np.random.randn(len(rng)), index=rng) + + with_base = ts.resample("2s", offset="5s").mean() + without_base = ts.resample("2s").mean() + + exp_without_base = timedelta_range(start="0s", end="25s", freq="2s") + exp_with_base = timedelta_range(start="5s", end="29s", freq="2s") + + tm.assert_index_equal(without_base.index, exp_without_base) + tm.assert_index_equal(with_base.index, exp_with_base) + + +def test_resample_categorical_data_with_timedeltaindex(): + # GH #12169 + df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s")) + df["Group"] = df["Group_obj"].astype("category") + result = df.resample("10s").agg(lambda x: (x.value_counts().index[0])) + expected = DataFrame( + {"Group_obj": ["A", "A"], "Group": ["A", "A"]}, + index=pd.TimedeltaIndex([0, 10], unit="s", freq="10s"), + ) + expected = expected.reindex(["Group_obj", "Group"], axis=1) + expected["Group"] = expected["Group_obj"] + tm.assert_frame_equal(result, expected) + + +def test_resample_timedelta_values(): + # GH 13119 + # check that timedelta dtype is preserved when NaT values are + # introduced by the resampling + + times = timedelta_range("1 day", "6 day", freq="4D") + df = DataFrame({"time": times}, index=times) + + times2 = timedelta_range("1 day", "6 day", freq="2D") + exp = Series(times2, index=times2, name="time") + exp.iloc[1] = pd.NaT + + res = df.resample("2D").first()["time"] + tm.assert_series_equal(res, exp) + res = df["time"].resample("2D").first() + tm.assert_series_equal(res, exp) + + +@pytest.mark.parametrize( + "start, end, freq, resample_freq", + [ + ("8H", "21h59min50s", "10S", "3H"), # GH 30353 example + ("3H", "22H", "1H", "5H"), + ("527D", "5006D", "3D", "10D"), + ("1D", "10D", "1D", "2D"), # GH 13022 example + # tests that worked before GH 33498: + ("8H", "21h59min50s", "10S", "2H"), + ("0H", "21h59min50s", "10S", "3H"), + ("10D", "85D", "D", "2D"), + ], +) +def test_resample_timedelta_edge_case(start, end, freq, resample_freq): + # GH 33498 + # check that the timedelta bins does not contains an extra bin + idx = timedelta_range(start=start, end=end, freq=freq) + s = Series(np.arange(len(idx)), index=idx) + result = s.resample(resample_freq).min() + expected_index = timedelta_range(freq=resample_freq, start=start, end=end) + tm.assert_index_equal(result.index, expected_index) + assert result.index.freq == expected_index.freq + assert not np.isnan(result[-1]) + + +@pytest.mark.parametrize("duplicates", [True, False]) +def test_resample_with_timedelta_yields_no_empty_groups(duplicates): + # GH 10603 + df = DataFrame( + np.random.normal(size=(10000, 4)), + index=timedelta_range(start="0s", periods=10000, freq="3906250n"), + ) + if duplicates: + # case with non-unique columns + df.columns = ["A", "B", "A", "C"] + + result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x)) + + expected = DataFrame( + [[768] * 4] * 12 + [[528] * 4], + index=timedelta_range(start="1s", periods=13, freq="3s"), + ) + expected.columns = df.columns + tm.assert_frame_equal(result, expected) + + +def test_resample_quantile_timedelta(): + # GH: 29485 + df = DataFrame( + {"value": pd.to_timedelta(np.arange(4), unit="s")}, + index=pd.date_range("20200101", periods=4, tz="UTC"), + ) + result = df.resample("2D").quantile(0.99) + expected = DataFrame( + { + "value": [ + pd.Timedelta("0 days 00:00:00.990000"), + pd.Timedelta("0 days 00:00:02.990000"), + ] + }, + index=pd.date_range("20200101", periods=2, tz="UTC", freq="2D"), + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..6e88542 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_crosstab.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_crosstab.cpython-38.pyc new file mode 100644 index 0000000..f5ff647 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_crosstab.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_cut.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_cut.cpython-38.pyc new file mode 100644 index 0000000..79a84b3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_cut.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_get_dummies.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_get_dummies.cpython-38.pyc new file mode 100644 index 0000000..22eb561 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_get_dummies.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_melt.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_melt.cpython-38.pyc new file mode 100644 index 0000000..bfec67d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_melt.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_pivot.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_pivot.cpython-38.pyc new file mode 100644 index 0000000..019bab7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_pivot.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_pivot_multilevel.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_pivot_multilevel.cpython-38.pyc new file mode 100644 index 0000000..78ab319 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_pivot_multilevel.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_qcut.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_qcut.cpython-38.pyc new file mode 100644 index 0000000..7c811fb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_qcut.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_union_categoricals.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_union_categoricals.cpython-38.pyc new file mode 100644 index 0000000..8e2f18c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_union_categoricals.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_util.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_util.cpython-38.pyc new file mode 100644 index 0000000..6bca155 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/__pycache__/test_util.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..8d63233 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..19950df Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_append.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_append.cpython-38.pyc new file mode 100644 index 0000000..54bbbd5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_append.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_append_common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_append_common.cpython-38.pyc new file mode 100644 index 0000000..944bf73 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_append_common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_categorical.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_categorical.cpython-38.pyc new file mode 100644 index 0000000..f937d9d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_categorical.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_concat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_concat.cpython-38.pyc new file mode 100644 index 0000000..77a893c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_concat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_dataframe.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_dataframe.cpython-38.pyc new file mode 100644 index 0000000..c948ef2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_dataframe.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_datetimes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_datetimes.cpython-38.pyc new file mode 100644 index 0000000..ff9e994 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_datetimes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_empty.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_empty.cpython-38.pyc new file mode 100644 index 0000000..83d620d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_empty.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_index.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_index.cpython-38.pyc new file mode 100644 index 0000000..57bd81e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_index.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_invalid.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_invalid.cpython-38.pyc new file mode 100644 index 0000000..0df6730 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_invalid.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_series.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_series.cpython-38.pyc new file mode 100644 index 0000000..16a109c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_series.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_sort.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_sort.cpython-38.pyc new file mode 100644 index 0000000..82772d6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/__pycache__/test_sort.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/conftest.py new file mode 100644 index 0000000..62b8c59 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(params=[True, False]) +def sort(request): + """Boolean sort keyword for concat and DataFrame.append.""" + return request.param diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_append.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_append.py new file mode 100644 index 0000000..0b1d1c4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_append.py @@ -0,0 +1,378 @@ +import datetime as dt +from itertools import combinations + +import dateutil +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + Timestamp, + concat, + isna, +) +import pandas._testing as tm + + +class TestAppend: + def test_append(self, sort, float_frame): + mixed_frame = float_frame.copy() + mixed_frame["foo"] = "bar" + + begin_index = float_frame.index[:5] + end_index = float_frame.index[5:] + + begin_frame = float_frame.reindex(begin_index) + end_frame = float_frame.reindex(end_index) + + appended = begin_frame._append(end_frame) + tm.assert_almost_equal(appended["A"], float_frame["A"]) + + del end_frame["A"] + partial_appended = begin_frame._append(end_frame, sort=sort) + assert "A" in partial_appended + + partial_appended = end_frame._append(begin_frame, sort=sort) + assert "A" in partial_appended + + # mixed type handling + appended = mixed_frame[:5]._append(mixed_frame[5:]) + tm.assert_frame_equal(appended, mixed_frame) + + # what to test here + mixed_appended = mixed_frame[:5]._append(float_frame[5:], sort=sort) + mixed_appended2 = float_frame[:5]._append(mixed_frame[5:], sort=sort) + + # all equal except 'foo' column + tm.assert_frame_equal( + mixed_appended.reindex(columns=["A", "B", "C", "D"]), + mixed_appended2.reindex(columns=["A", "B", "C", "D"]), + ) + + def test_append_empty(self, float_frame): + empty = DataFrame() + + appended = float_frame._append(empty) + tm.assert_frame_equal(float_frame, appended) + assert appended is not float_frame + + appended = empty._append(float_frame) + tm.assert_frame_equal(float_frame, appended) + assert appended is not float_frame + + def test_append_overlap_raises(self, float_frame): + msg = "Indexes have overlapping values" + with pytest.raises(ValueError, match=msg): + float_frame._append(float_frame, verify_integrity=True) + + def test_append_new_columns(self): + # see gh-6129: new columns + df = DataFrame({"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}}) + row = Series([5, 6, 7], index=["a", "b", "c"], name="z") + expected = DataFrame( + { + "a": {"x": 1, "y": 2, "z": 5}, + "b": {"x": 3, "y": 4, "z": 6}, + "c": {"z": 7}, + } + ) + result = df._append(row) + tm.assert_frame_equal(result, expected) + + def test_append_length0_frame(self, sort): + df = DataFrame(columns=["A", "B", "C"]) + df3 = DataFrame(index=[0, 1], columns=["A", "B"]) + df5 = df._append(df3, sort=sort) + + expected = DataFrame(index=[0, 1], columns=["A", "B", "C"]) + tm.assert_frame_equal(df5, expected) + + def test_append_records(self): + arr1 = np.zeros((2,), dtype=("i4,f4,a10")) + arr1[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")] + + arr2 = np.zeros((3,), dtype=("i4,f4,a10")) + arr2[:] = [(3, 4.0, "foo"), (5, 6.0, "bar"), (7.0, 8.0, "baz")] + + df1 = DataFrame(arr1) + df2 = DataFrame(arr2) + + result = df1._append(df2, ignore_index=True) + expected = DataFrame(np.concatenate((arr1, arr2))) + tm.assert_frame_equal(result, expected) + + # rewrite sort fixture, since we also want to test default of None + def test_append_sorts(self, sort): + df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"]) + df2 = DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3]) + + result = df1._append(df2, sort=sort) + + # for None / True + expected = DataFrame( + {"b": [1, 2, None, None], "a": [1, 2, 1, 2], "c": [None, None, 3, 4]}, + columns=["a", "b", "c"], + ) + if sort is False: + expected = expected[["b", "a", "c"]] + tm.assert_frame_equal(result, expected) + + def test_append_different_columns(self, sort): + df = DataFrame( + { + "bools": np.random.randn(10) > 0, + "ints": np.random.randint(0, 10, 10), + "floats": np.random.randn(10), + "strings": ["foo", "bar"] * 5, + } + ) + + a = df[:5].loc[:, ["bools", "ints", "floats"]] + b = df[5:].loc[:, ["strings", "ints", "floats"]] + + appended = a._append(b, sort=sort) + assert isna(appended["strings"][0:4]).all() + assert isna(appended["bools"][5:]).all() + + def test_append_many(self, sort, float_frame): + chunks = [ + float_frame[:5], + float_frame[5:10], + float_frame[10:15], + float_frame[15:], + ] + + result = chunks[0]._append(chunks[1:]) + tm.assert_frame_equal(result, float_frame) + + chunks[-1] = chunks[-1].copy() + chunks[-1]["foo"] = "bar" + result = chunks[0]._append(chunks[1:], sort=sort) + tm.assert_frame_equal(result.loc[:, float_frame.columns], float_frame) + assert (result["foo"][15:] == "bar").all() + assert result["foo"][:15].isna().all() + + def test_append_preserve_index_name(self): + # #980 + df1 = DataFrame(columns=["A", "B", "C"]) + df1 = df1.set_index(["A"]) + df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"]) + df2 = df2.set_index(["A"]) + + result = df1._append(df2) + assert result.index.name == "A" + + indexes_can_append = [ + pd.RangeIndex(3), + Index([4, 5, 6]), + Index([4.5, 5.5, 6.5]), + Index(list("abc")), + pd.CategoricalIndex("A B C".split()), + pd.CategoricalIndex("D E F".split(), ordered=True), + pd.IntervalIndex.from_breaks([7, 8, 9, 10]), + pd.DatetimeIndex( + [ + dt.datetime(2013, 1, 3, 0, 0), + dt.datetime(2013, 1, 3, 6, 10), + dt.datetime(2013, 1, 3, 7, 12), + ] + ), + pd.MultiIndex.from_arrays(["A B C".split(), "D E F".split()]), + ] + + @pytest.mark.parametrize( + "index", indexes_can_append, ids=lambda x: type(x).__name__ + ) + def test_append_same_columns_type(self, index): + # GH18359 + + # df wider than ser + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=index) + ser_index = index[:2] + ser = Series([7, 8], index=ser_index, name=2) + result = df._append(ser) + expected = DataFrame( + [[1, 2, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index + ) + # integer dtype is preserved for columns present in ser.index + assert expected.dtypes.iloc[0].kind == "i" + assert expected.dtypes.iloc[1].kind == "i" + + tm.assert_frame_equal(result, expected) + + # ser wider than df + ser_index = index + index = index[:2] + df = DataFrame([[1, 2], [4, 5]], columns=index) + ser = Series([7, 8, 9], index=ser_index, name=2) + result = df._append(ser) + expected = DataFrame( + [[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]], + index=[0, 1, 2], + columns=ser_index, + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "df_columns, series_index", + combinations(indexes_can_append, r=2), + ids=lambda x: type(x).__name__, + ) + def test_append_different_columns_types(self, df_columns, series_index): + # GH18359 + # See also test 'test_append_different_columns_types_raises' below + # for errors raised when appending + + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) + ser = Series([7, 8, 9], index=series_index, name=2) + + result = df._append(ser) + idx_diff = ser.index.difference(df_columns) + combined_columns = Index(df_columns.tolist()).append(idx_diff) + expected = DataFrame( + [ + [1.0, 2.0, 3.0, np.nan, np.nan, np.nan], + [4, 5, 6, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, 7, 8, 9], + ], + index=[0, 1, 2], + columns=combined_columns, + ) + tm.assert_frame_equal(result, expected) + + def test_append_dtype_coerce(self, sort): + + # GH 4993 + # appending with datetime will incorrectly convert datetime64 + + df1 = DataFrame( + index=[1, 2], + data=[dt.datetime(2013, 1, 1, 0, 0), dt.datetime(2013, 1, 2, 0, 0)], + columns=["start_time"], + ) + df2 = DataFrame( + index=[4, 5], + data=[ + [dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10)], + [dt.datetime(2013, 1, 4, 0, 0), dt.datetime(2013, 1, 4, 7, 10)], + ], + columns=["start_time", "end_time"], + ) + + expected = concat( + [ + Series( + [ + pd.NaT, + pd.NaT, + dt.datetime(2013, 1, 3, 6, 10), + dt.datetime(2013, 1, 4, 7, 10), + ], + name="end_time", + ), + Series( + [ + dt.datetime(2013, 1, 1, 0, 0), + dt.datetime(2013, 1, 2, 0, 0), + dt.datetime(2013, 1, 3, 0, 0), + dt.datetime(2013, 1, 4, 0, 0), + ], + name="start_time", + ), + ], + axis=1, + sort=sort, + ) + result = df1._append(df2, ignore_index=True, sort=sort) + if sort: + expected = expected[["end_time", "start_time"]] + else: + expected = expected[["start_time", "end_time"]] + + tm.assert_frame_equal(result, expected) + + def test_append_missing_column_proper_upcast(self, sort): + df1 = DataFrame({"A": np.array([1, 2, 3, 4], dtype="i8")}) + df2 = DataFrame({"B": np.array([True, False, True, False], dtype=bool)}) + + appended = df1._append(df2, ignore_index=True, sort=sort) + assert appended["A"].dtype == "f8" + assert appended["B"].dtype == "O" + + def test_append_empty_frame_to_series_with_dateutil_tz(self): + # GH 23682 + date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc()) + ser = Series({"a": 1.0, "b": 2.0, "date": date}) + df = DataFrame(columns=["c", "d"]) + result_a = df._append(ser, ignore_index=True) + expected = DataFrame( + [[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"] + ) + # These columns get cast to object after append + expected["c"] = expected["c"].astype(object) + expected["d"] = expected["d"].astype(object) + tm.assert_frame_equal(result_a, expected) + + expected = DataFrame( + [[np.nan, np.nan, 1.0, 2.0, date]] * 2, columns=["c", "d", "a", "b", "date"] + ) + expected["c"] = expected["c"].astype(object) + expected["d"] = expected["d"].astype(object) + result_b = result_a._append(ser, ignore_index=True) + tm.assert_frame_equal(result_b, expected) + + result = df._append([ser, ser], ignore_index=True) + tm.assert_frame_equal(result, expected) + + def test_append_empty_tz_frame_with_datetime64ns(self): + # https://github.com/pandas-dev/pandas/issues/35460 + df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") + + # pd.NaT gets inferred as tz-naive, so append result is tz-naive + result = df._append({"a": pd.NaT}, ignore_index=True) + expected = DataFrame({"a": [pd.NaT]}).astype(object) + tm.assert_frame_equal(result, expected) + + # also test with typed value to append + df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") + other = Series({"a": pd.NaT}, dtype="datetime64[ns]") + result = df._append(other, ignore_index=True) + expected = DataFrame({"a": [pd.NaT]}).astype(object) + tm.assert_frame_equal(result, expected) + + # mismatched tz + other = Series({"a": pd.NaT}, dtype="datetime64[ns, US/Pacific]") + result = df._append(other, ignore_index=True) + expected = DataFrame({"a": [pd.NaT]}).astype(object) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"] + ) + @pytest.mark.parametrize("val", [1, "NaT"]) + def test_append_empty_frame_with_timedelta64ns_nat(self, dtype_str, val): + # https://github.com/pandas-dev/pandas/issues/35460 + df = DataFrame(columns=["a"]).astype(dtype_str) + + other = DataFrame({"a": [np.timedelta64(val, "ns")]}) + result = df._append(other, ignore_index=True) + + expected = other.astype(object) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"] + ) + @pytest.mark.parametrize("val", [1, "NaT"]) + def test_append_frame_with_timedelta64ns_nat(self, dtype_str, val): + # https://github.com/pandas-dev/pandas/issues/35460 + df = DataFrame({"a": pd.array([1], dtype=dtype_str)}) + + other = DataFrame({"a": [np.timedelta64(val, "ns")]}) + result = df._append(other, ignore_index=True) + + expected = DataFrame({"a": [df.iloc[0, 0], other.iloc[0, 0]]}, dtype=object) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_append_common.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_append_common.py new file mode 100644 index 0000000..f36bee9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_append_common.py @@ -0,0 +1,765 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Index, + Series, +) +import pandas._testing as tm + +dt_data = [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), +] +tz_data = [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timestamp("2011-01-03", tz="US/Eastern"), +] +td_data = [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), +] +period_data = [ + pd.Period("2011-01", freq="M"), + pd.Period("2011-02", freq="M"), + pd.Period("2011-03", freq="M"), +] +data_dict = { + "bool": [True, False, True], + "int64": [1, 2, 3], + "float64": [1.1, np.nan, 3.3], + "category": Categorical(["X", "Y", "Z"]), + "object": ["a", "b", "c"], + "datetime64[ns]": dt_data, + "datetime64[ns, US/Eastern]": tz_data, + "timedelta64[ns]": td_data, + "period[M]": period_data, +} + + +class TestConcatAppendCommon: + """ + Test common dtype coercion rules between concat and append. + """ + + @pytest.fixture(params=sorted(data_dict.keys())) + def item(self, request): + key = request.param + return key, data_dict[key] + + item2 = item + + def _check_expected_dtype(self, obj, label): + """ + Check whether obj has expected dtype depending on label + considering not-supported dtypes + """ + if isinstance(obj, Index): + if label == "bool": + assert obj.dtype == "object" + else: + assert obj.dtype == label + elif isinstance(obj, Series): + if label.startswith("period"): + assert obj.dtype == "Period[M]" + else: + assert obj.dtype == label + else: + raise ValueError + + def test_dtypes(self, item): + # to confirm test case covers intended dtypes + typ, vals = item + self._check_expected_dtype(Index(vals), typ) + self._check_expected_dtype(Series(vals), typ) + + def test_concatlike_same_dtypes(self, item): + # GH 13660 + typ1, vals1 = item + + vals2 = vals1 + vals3 = vals1 + + if typ1 == "category": + exp_data = Categorical(list(vals1) + list(vals2)) + exp_data3 = Categorical(list(vals1) + list(vals2) + list(vals3)) + else: + exp_data = vals1 + vals2 + exp_data3 = vals1 + vals2 + vals3 + + # ----- Index ----- # + + # index.append + res = Index(vals1).append(Index(vals2)) + exp = Index(exp_data) + tm.assert_index_equal(res, exp) + + # 3 elements + res = Index(vals1).append([Index(vals2), Index(vals3)]) + exp = Index(exp_data3) + tm.assert_index_equal(res, exp) + + # index.append name mismatch + i1 = Index(vals1, name="x") + i2 = Index(vals2, name="y") + res = i1.append(i2) + exp = Index(exp_data) + tm.assert_index_equal(res, exp) + + # index.append name match + i1 = Index(vals1, name="x") + i2 = Index(vals2, name="x") + res = i1.append(i2) + exp = Index(exp_data, name="x") + tm.assert_index_equal(res, exp) + + # cannot append non-index + with pytest.raises(TypeError, match="all inputs must be Index"): + Index(vals1).append(vals2) + + with pytest.raises(TypeError, match="all inputs must be Index"): + Index(vals1).append([Index(vals2), vals3]) + + # ----- Series ----- # + + # series.append + res = Series(vals1)._append(Series(vals2), ignore_index=True) + exp = Series(exp_data) + tm.assert_series_equal(res, exp, check_index_type=True) + + # concat + res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True) + tm.assert_series_equal(res, exp, check_index_type=True) + + # 3 elements + res = Series(vals1)._append([Series(vals2), Series(vals3)], ignore_index=True) + exp = Series(exp_data3) + tm.assert_series_equal(res, exp) + + res = pd.concat( + [Series(vals1), Series(vals2), Series(vals3)], + ignore_index=True, + ) + tm.assert_series_equal(res, exp) + + # name mismatch + s1 = Series(vals1, name="x") + s2 = Series(vals2, name="y") + res = s1._append(s2, ignore_index=True) + exp = Series(exp_data) + tm.assert_series_equal(res, exp, check_index_type=True) + + res = pd.concat([s1, s2], ignore_index=True) + tm.assert_series_equal(res, exp, check_index_type=True) + + # name match + s1 = Series(vals1, name="x") + s2 = Series(vals2, name="x") + res = s1._append(s2, ignore_index=True) + exp = Series(exp_data, name="x") + tm.assert_series_equal(res, exp, check_index_type=True) + + res = pd.concat([s1, s2], ignore_index=True) + tm.assert_series_equal(res, exp, check_index_type=True) + + # cannot append non-index + msg = ( + r"cannot concatenate object of type '.+'; " + "only Series and DataFrame objs are valid" + ) + with pytest.raises(TypeError, match=msg): + Series(vals1)._append(vals2) + + with pytest.raises(TypeError, match=msg): + Series(vals1)._append([Series(vals2), vals3]) + + with pytest.raises(TypeError, match=msg): + pd.concat([Series(vals1), vals2]) + + with pytest.raises(TypeError, match=msg): + pd.concat([Series(vals1), Series(vals2), vals3]) + + def test_concatlike_dtypes_coercion(self, item, item2): + # GH 13660 + typ1, vals1 = item + typ2, vals2 = item2 + + vals3 = vals2 + + # basically infer + exp_index_dtype = None + exp_series_dtype = None + + if typ1 == typ2: + # same dtype is tested in test_concatlike_same_dtypes + return + elif typ1 == "category" or typ2 == "category": + # The `vals1 + vals2` below fails bc one of these is a Categorical + # instead of a list; we have separate dedicated tests for categorical + return + + warn = None + # specify expected dtype + if typ1 == "bool" and typ2 in ("int64", "float64"): + # series coerces to numeric based on numpy rule + # index doesn't because bool is object dtype + exp_series_dtype = typ2 + warn = FutureWarning + elif typ2 == "bool" and typ1 in ("int64", "float64"): + exp_series_dtype = typ1 + warn = FutureWarning + elif ( + typ1 == "datetime64[ns, US/Eastern]" + or typ2 == "datetime64[ns, US/Eastern]" + or typ1 == "timedelta64[ns]" + or typ2 == "timedelta64[ns]" + ): + exp_index_dtype = object + exp_series_dtype = object + + exp_data = vals1 + vals2 + exp_data3 = vals1 + vals2 + vals3 + + # ----- Index ----- # + + # index.append + res = Index(vals1).append(Index(vals2)) + exp = Index(exp_data, dtype=exp_index_dtype) + tm.assert_index_equal(res, exp) + + # 3 elements + res = Index(vals1).append([Index(vals2), Index(vals3)]) + exp = Index(exp_data3, dtype=exp_index_dtype) + tm.assert_index_equal(res, exp) + + # ----- Series ----- # + + # series._append + with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): + # GH#39817 + res = Series(vals1)._append(Series(vals2), ignore_index=True) + exp = Series(exp_data, dtype=exp_series_dtype) + tm.assert_series_equal(res, exp, check_index_type=True) + + # concat + with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): + # GH#39817 + res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True) + tm.assert_series_equal(res, exp, check_index_type=True) + + # 3 elements + with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): + # GH#39817 + res = Series(vals1)._append( + [Series(vals2), Series(vals3)], ignore_index=True + ) + exp = Series(exp_data3, dtype=exp_series_dtype) + tm.assert_series_equal(res, exp) + + with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): + # GH#39817 + res = pd.concat( + [Series(vals1), Series(vals2), Series(vals3)], + ignore_index=True, + ) + tm.assert_series_equal(res, exp) + + def test_concatlike_common_coerce_to_pandas_object(self): + # GH 13626 + # result must be Timestamp/Timedelta, not datetime.datetime/timedelta + dti = pd.DatetimeIndex(["2011-01-01", "2011-01-02"]) + tdi = pd.TimedeltaIndex(["1 days", "2 days"]) + + exp = Index( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + ] + ) + + res = dti.append(tdi) + tm.assert_index_equal(res, exp) + assert isinstance(res[0], pd.Timestamp) + assert isinstance(res[-1], pd.Timedelta) + + dts = Series(dti) + tds = Series(tdi) + res = dts._append(tds) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + assert isinstance(res.iloc[0], pd.Timestamp) + assert isinstance(res.iloc[-1], pd.Timedelta) + + res = pd.concat([dts, tds]) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + assert isinstance(res.iloc[0], pd.Timestamp) + assert isinstance(res.iloc[-1], pd.Timedelta) + + def test_concatlike_datetimetz(self, tz_aware_fixture): + tz = tz_aware_fixture + # GH 7795 + dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) + dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz=tz) + + exp = pd.DatetimeIndex( + ["2011-01-01", "2011-01-02", "2012-01-01", "2012-01-02"], tz=tz + ) + + res = dti1.append(dti2) + tm.assert_index_equal(res, exp) + + dts1 = Series(dti1) + dts2 = Series(dti2) + res = dts1._append(dts2) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([dts1, dts2]) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + + @pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo", "EST5EDT"]) + def test_concatlike_datetimetz_short(self, tz): + # GH#7795 + ix1 = pd.date_range(start="2014-07-15", end="2014-07-17", freq="D", tz=tz) + ix2 = pd.DatetimeIndex(["2014-07-11", "2014-07-21"], tz=tz) + df1 = DataFrame(0, index=ix1, columns=["A", "B"]) + df2 = DataFrame(0, index=ix2, columns=["A", "B"]) + + exp_idx = pd.DatetimeIndex( + ["2014-07-15", "2014-07-16", "2014-07-17", "2014-07-11", "2014-07-21"], + tz=tz, + ) + exp = DataFrame(0, index=exp_idx, columns=["A", "B"]) + + tm.assert_frame_equal(df1._append(df2), exp) + tm.assert_frame_equal(pd.concat([df1, df2]), exp) + + def test_concatlike_datetimetz_to_object(self, tz_aware_fixture): + tz = tz_aware_fixture + # GH 13660 + + # different tz coerces to object + dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) + dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"]) + + exp = Index( + [ + pd.Timestamp("2011-01-01", tz=tz), + pd.Timestamp("2011-01-02", tz=tz), + pd.Timestamp("2012-01-01"), + pd.Timestamp("2012-01-02"), + ], + dtype=object, + ) + + res = dti1.append(dti2) + tm.assert_index_equal(res, exp) + + dts1 = Series(dti1) + dts2 = Series(dti2) + res = dts1._append(dts2) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([dts1, dts2]) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + + # different tz + dti3 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz="US/Pacific") + + exp = Index( + [ + pd.Timestamp("2011-01-01", tz=tz), + pd.Timestamp("2011-01-02", tz=tz), + pd.Timestamp("2012-01-01", tz="US/Pacific"), + pd.Timestamp("2012-01-02", tz="US/Pacific"), + ], + dtype=object, + ) + + res = dti1.append(dti3) + tm.assert_index_equal(res, exp) + + dts1 = Series(dti1) + dts3 = Series(dti3) + res = dts1._append(dts3) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([dts1, dts3]) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + + def test_concatlike_common_period(self): + # GH 13660 + pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M") + pi2 = pd.PeriodIndex(["2012-01", "2012-02"], freq="M") + + exp = pd.PeriodIndex(["2011-01", "2011-02", "2012-01", "2012-02"], freq="M") + + res = pi1.append(pi2) + tm.assert_index_equal(res, exp) + + ps1 = Series(pi1) + ps2 = Series(pi2) + res = ps1._append(ps2) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([ps1, ps2]) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + + def test_concatlike_common_period_diff_freq_to_object(self): + # GH 13221 + pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M") + pi2 = pd.PeriodIndex(["2012-01-01", "2012-02-01"], freq="D") + + exp = Index( + [ + pd.Period("2011-01", freq="M"), + pd.Period("2011-02", freq="M"), + pd.Period("2012-01-01", freq="D"), + pd.Period("2012-02-01", freq="D"), + ], + dtype=object, + ) + + res = pi1.append(pi2) + tm.assert_index_equal(res, exp) + + ps1 = Series(pi1) + ps2 = Series(pi2) + res = ps1._append(ps2) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([ps1, ps2]) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + + def test_concatlike_common_period_mixed_dt_to_object(self): + # GH 13221 + # different datetimelike + pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M") + tdi = pd.TimedeltaIndex(["1 days", "2 days"]) + exp = Index( + [ + pd.Period("2011-01", freq="M"), + pd.Period("2011-02", freq="M"), + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + ], + dtype=object, + ) + + res = pi1.append(tdi) + tm.assert_index_equal(res, exp) + + ps1 = Series(pi1) + tds = Series(tdi) + res = ps1._append(tds) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([ps1, tds]) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + + # inverse + exp = Index( + [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Period("2011-01", freq="M"), + pd.Period("2011-02", freq="M"), + ], + dtype=object, + ) + + res = tdi.append(pi1) + tm.assert_index_equal(res, exp) + + ps1 = Series(pi1) + tds = Series(tdi) + res = tds._append(ps1) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([tds, ps1]) + tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) + + def test_concat_categorical(self): + # GH 13524 + + # same categories -> category + s1 = Series([1, 2, np.nan], dtype="category") + s2 = Series([2, 1, 2], dtype="category") + + exp = Series([1, 2, np.nan, 2, 1, 2], dtype="category") + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + + # partially different categories => not-category + s1 = Series([3, 2], dtype="category") + s2 = Series([2, 1], dtype="category") + + exp = Series([3, 2, 2, 1]) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + + # completely different categories (same dtype) => not-category + s1 = Series([10, 11, np.nan], dtype="category") + s2 = Series([np.nan, 1, 3, 2], dtype="category") + + exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype="object") + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + + def test_union_categorical_same_categories_different_order(self): + # https://github.com/pandas-dev/pandas/issues/19096 + a = Series(Categorical(["a", "b", "c"], categories=["a", "b", "c"])) + b = Series(Categorical(["a", "b", "c"], categories=["b", "a", "c"])) + result = pd.concat([a, b], ignore_index=True) + expected = Series( + Categorical(["a", "b", "c", "a", "b", "c"], categories=["a", "b", "c"]) + ) + tm.assert_series_equal(result, expected) + + def test_concat_categorical_coercion(self): + # GH 13524 + + # category + not-category => not-category + s1 = Series([1, 2, np.nan], dtype="category") + s2 = Series([2, 1, 2]) + + exp = Series([1, 2, np.nan, 2, 1, 2], dtype="object") + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + + # result shouldn't be affected by 1st elem dtype + exp = Series([2, 1, 2, 1, 2, np.nan], dtype="object") + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) + + # all values are not in category => not-category + s1 = Series([3, 2], dtype="category") + s2 = Series([2, 1]) + + exp = Series([3, 2, 2, 1]) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + + exp = Series([2, 1, 3, 2]) + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) + + # completely different categories => not-category + s1 = Series([10, 11, np.nan], dtype="category") + s2 = Series([1, 3, 2]) + + exp = Series([10, 11, np.nan, 1, 3, 2], dtype="object") + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + + exp = Series([1, 3, 2, 10, 11, np.nan], dtype="object") + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) + + # different dtype => not-category + s1 = Series([10, 11, np.nan], dtype="category") + s2 = Series(["a", "b", "c"]) + + exp = Series([10, 11, np.nan, "a", "b", "c"]) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + + exp = Series(["a", "b", "c", 10, 11, np.nan]) + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) + + # if normal series only contains NaN-likes => not-category + s1 = Series([10, 11], dtype="category") + s2 = Series([np.nan, np.nan, np.nan]) + + exp = Series([10, 11, np.nan, np.nan, np.nan]) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + + exp = Series([np.nan, np.nan, np.nan, 10, 11]) + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) + + def test_concat_categorical_3elem_coercion(self): + # GH 13524 + + # mixed dtypes => not-category + s1 = Series([1, 2, np.nan], dtype="category") + s2 = Series([2, 1, 2], dtype="category") + s3 = Series([1, 2, 1, 2, np.nan]) + + exp = Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="float") + tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) + tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp) + + exp = Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="float") + tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp) + + # values are all in either category => not-category + s1 = Series([4, 5, 6], dtype="category") + s2 = Series([1, 2, 3], dtype="category") + s3 = Series([1, 3, 4]) + + exp = Series([4, 5, 6, 1, 2, 3, 1, 3, 4]) + tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) + tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp) + + exp = Series([1, 3, 4, 4, 5, 6, 1, 2, 3]) + tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp) + + # values are all in either category => not-category + s1 = Series([4, 5, 6], dtype="category") + s2 = Series([1, 2, 3], dtype="category") + s3 = Series([10, 11, 12]) + + exp = Series([4, 5, 6, 1, 2, 3, 10, 11, 12]) + tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) + tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp) + + exp = Series([10, 11, 12, 4, 5, 6, 1, 2, 3]) + tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp) + + def test_concat_categorical_multi_coercion(self): + # GH 13524 + + s1 = Series([1, 3], dtype="category") + s2 = Series([3, 4], dtype="category") + s3 = Series([2, 3]) + s4 = Series([2, 2], dtype="category") + s5 = Series([1, np.nan]) + s6 = Series([1, 3, 2], dtype="category") + + # mixed dtype, values are all in categories => not-category + exp = Series([1, 3, 3, 4, 2, 3, 2, 2, 1, np.nan, 1, 3, 2]) + res = pd.concat([s1, s2, s3, s4, s5, s6], ignore_index=True) + tm.assert_series_equal(res, exp) + res = s1._append([s2, s3, s4, s5, s6], ignore_index=True) + tm.assert_series_equal(res, exp) + + exp = Series([1, 3, 2, 1, np.nan, 2, 2, 2, 3, 3, 4, 1, 3]) + res = pd.concat([s6, s5, s4, s3, s2, s1], ignore_index=True) + tm.assert_series_equal(res, exp) + res = s6._append([s5, s4, s3, s2, s1], ignore_index=True) + tm.assert_series_equal(res, exp) + + def test_concat_categorical_ordered(self): + # GH 13524 + + s1 = Series(Categorical([1, 2, np.nan], ordered=True)) + s2 = Series(Categorical([2, 1, 2], ordered=True)) + + exp = Series(Categorical([1, 2, np.nan, 2, 1, 2], ordered=True)) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + + exp = Series(Categorical([1, 2, np.nan, 2, 1, 2, 1, 2, np.nan], ordered=True)) + tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s1._append([s2, s1], ignore_index=True), exp) + + def test_concat_categorical_coercion_nan(self): + # GH 13524 + + # some edge cases + # category + not-category => not category + s1 = Series(np.array([np.nan, np.nan], dtype=np.float64), dtype="category") + s2 = Series([np.nan, 1]) + + exp = Series([np.nan, np.nan, np.nan, 1]) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + + s1 = Series([1, np.nan], dtype="category") + s2 = Series([np.nan, np.nan]) + + exp = Series([1, np.nan, np.nan, np.nan], dtype="float") + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + + # mixed dtype, all nan-likes => not-category + s1 = Series([np.nan, np.nan], dtype="category") + s2 = Series([np.nan, np.nan]) + + exp = Series([np.nan, np.nan, np.nan, np.nan]) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) + + # all category nan-likes => category + s1 = Series([np.nan, np.nan], dtype="category") + s2 = Series([np.nan, np.nan], dtype="category") + + exp = Series([np.nan, np.nan, np.nan, np.nan], dtype="category") + + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + + def test_concat_categorical_empty(self): + # GH 13524 + + s1 = Series([], dtype="category") + s2 = Series([1, 2], dtype="category") + + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) + tm.assert_series_equal(s1._append(s2, ignore_index=True), s2) + + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2) + tm.assert_series_equal(s2._append(s1, ignore_index=True), s2) + + s1 = Series([], dtype="category") + s2 = Series([], dtype="category") + + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) + tm.assert_series_equal(s1._append(s2, ignore_index=True), s2) + + s1 = Series([], dtype="category") + s2 = Series([], dtype="object") + + # different dtype => not-category + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) + tm.assert_series_equal(s1._append(s2, ignore_index=True), s2) + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2) + tm.assert_series_equal(s2._append(s1, ignore_index=True), s2) + + s1 = Series([], dtype="category") + s2 = Series([np.nan, np.nan]) + + # empty Series is ignored + exp = Series([np.nan, np.nan]) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) + + def test_categorical_concat_append(self): + cat = Categorical(["a", "b"], categories=["a", "b"]) + vals = [1, 2] + df = DataFrame({"cats": cat, "vals": vals}) + cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"]) + vals2 = [1, 2, 1, 2] + exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1])) + + tm.assert_frame_equal(pd.concat([df, df]), exp) + tm.assert_frame_equal(df._append(df), exp) + + # GH 13524 can concat different categories + cat3 = Categorical(["a", "b"], categories=["a", "b", "c"]) + vals3 = [1, 2] + df_different_categories = DataFrame({"cats": cat3, "vals": vals3}) + + res = pd.concat([df, df_different_categories], ignore_index=True) + exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]}) + tm.assert_frame_equal(res, exp) + + res = df._append(df_different_categories, ignore_index=True) + tm.assert_frame_equal(res, exp) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_categorical.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_categorical.py new file mode 100644 index 0000000..5bafd2e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_categorical.py @@ -0,0 +1,240 @@ +import numpy as np + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestCategoricalConcat: + def test_categorical_concat(self, sort): + # See GH 10177 + df1 = DataFrame( + np.arange(18, dtype="int64").reshape(6, 3), columns=["a", "b", "c"] + ) + + df2 = DataFrame(np.arange(14, dtype="int64").reshape(7, 2), columns=["a", "c"]) + + cat_values = ["one", "one", "two", "one", "two", "two", "one"] + df2["h"] = Series(Categorical(cat_values)) + + res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=sort) + exp = DataFrame( + { + "a": [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12], + "b": [ + 1, + 4, + 7, + 10, + 13, + 16, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + ], + "c": [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13], + "h": [None] * 6 + cat_values, + } + ) + exp["h"] = exp["h"].astype(df2["h"].dtype) + tm.assert_frame_equal(res, exp) + + def test_categorical_concat_dtypes(self): + + # GH8143 + index = ["cat", "obj", "num"] + cat = Categorical(["a", "b", "c"]) + obj = Series(["a", "b", "c"]) + num = Series([1, 2, 3]) + df = pd.concat([Series(cat), obj, num], axis=1, keys=index) + + result = df.dtypes == "object" + expected = Series([False, True, False], index=index) + tm.assert_series_equal(result, expected) + + result = df.dtypes == "int64" + expected = Series([False, False, True], index=index) + tm.assert_series_equal(result, expected) + + result = df.dtypes == "category" + expected = Series([True, False, False], index=index) + tm.assert_series_equal(result, expected) + + def test_concat_categoricalindex(self): + # GH 16111, categories that aren't lexsorted + categories = [9, 0, 1, 2, 3] + + a = Series(1, index=pd.CategoricalIndex([9, 0], categories=categories)) + b = Series(2, index=pd.CategoricalIndex([0, 1], categories=categories)) + c = Series(3, index=pd.CategoricalIndex([1, 2], categories=categories)) + + result = pd.concat([a, b, c], axis=1) + + exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories) + exp = DataFrame( + { + 0: [1, 1, np.nan, np.nan], + 1: [np.nan, 2, 2, np.nan], + 2: [np.nan, np.nan, 3, 3], + }, + columns=[0, 1, 2], + index=exp_idx, + ) + tm.assert_frame_equal(result, exp) + + def test_categorical_concat_preserve(self): + + # GH 8641 series concat not preserving category dtype + # GH 13524 can concat different categories + s = Series(list("abc"), dtype="category") + s2 = Series(list("abd"), dtype="category") + + exp = Series(list("abcabd")) + res = pd.concat([s, s2], ignore_index=True) + tm.assert_series_equal(res, exp) + + exp = Series(list("abcabc"), dtype="category") + res = pd.concat([s, s], ignore_index=True) + tm.assert_series_equal(res, exp) + + exp = Series(list("abcabc"), index=[0, 1, 2, 0, 1, 2], dtype="category") + res = pd.concat([s, s]) + tm.assert_series_equal(res, exp) + + a = Series(np.arange(6, dtype="int64")) + b = Series(list("aabbca")) + + df2 = DataFrame({"A": a, "B": b.astype(CategoricalDtype(list("cab")))}) + res = pd.concat([df2, df2]) + exp = DataFrame( + { + "A": pd.concat([a, a]), + "B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))), + } + ) + tm.assert_frame_equal(res, exp) + + def test_categorical_index_preserver(self): + + a = Series(np.arange(6, dtype="int64")) + b = Series(list("aabbca")) + + df2 = DataFrame( + {"A": a, "B": b.astype(CategoricalDtype(list("cab")))} + ).set_index("B") + result = pd.concat([df2, df2]) + expected = DataFrame( + { + "A": pd.concat([a, a]), + "B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))), + } + ).set_index("B") + tm.assert_frame_equal(result, expected) + + # wrong categories -> uses concat_compat, which casts to object + df3 = DataFrame( + {"A": a, "B": Categorical(b, categories=list("abe"))} + ).set_index("B") + result = pd.concat([df2, df3]) + expected = pd.concat( + [ + df2.set_axis(df2.index.astype(object), axis=0), + df3.set_axis(df3.index.astype(object), axis=0), + ] + ) + tm.assert_frame_equal(result, expected) + + def test_concat_categorical_tz(self): + # GH-23816 + a = Series(pd.date_range("2017-01-01", periods=2, tz="US/Pacific")) + b = Series(["a", "b"], dtype="category") + result = pd.concat([a, b], ignore_index=True) + expected = Series( + [ + pd.Timestamp("2017-01-01", tz="US/Pacific"), + pd.Timestamp("2017-01-02", tz="US/Pacific"), + "a", + "b", + ] + ) + tm.assert_series_equal(result, expected) + + def test_concat_categorical_unchanged(self): + # GH-12007 + # test fix for when concat on categorical and float + # coerces dtype categorical -> float + df = DataFrame(Series(["a", "b", "c"], dtype="category", name="A")) + ser = Series([0, 1, 2], index=[0, 1, 3], name="B") + result = pd.concat([df, ser], axis=1) + expected = DataFrame( + { + "A": Series(["a", "b", "c", np.nan], dtype="category"), + "B": Series([0, 1, np.nan, 2], dtype="float"), + } + ) + tm.assert_equal(result, expected) + + def test_categorical_concat_gh7864(self): + # GH 7864 + # make sure ordering is preserved + df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": list("abbaae")}) + df["grade"] = Categorical(df["raw_grade"]) + df["grade"].cat.set_categories(["e", "a", "b"]) + + df1 = df[0:3] + df2 = df[3:] + + tm.assert_index_equal(df["grade"].cat.categories, df1["grade"].cat.categories) + tm.assert_index_equal(df["grade"].cat.categories, df2["grade"].cat.categories) + + dfx = pd.concat([df1, df2]) + tm.assert_index_equal(df["grade"].cat.categories, dfx["grade"].cat.categories) + + dfa = df1._append(df2) + tm.assert_index_equal(df["grade"].cat.categories, dfa["grade"].cat.categories) + + def test_categorical_index_upcast(self): + # GH 17629 + # test upcasting to object when concatinating on categorical indexes + # with non-identical categories + + a = DataFrame({"foo": [1, 2]}, index=Categorical(["foo", "bar"])) + b = DataFrame({"foo": [4, 3]}, index=Categorical(["baz", "bar"])) + + res = pd.concat([a, b]) + exp = DataFrame({"foo": [1, 2, 4, 3]}, index=["foo", "bar", "baz", "bar"]) + + tm.assert_equal(res, exp) + + a = Series([1, 2], index=Categorical(["foo", "bar"])) + b = Series([4, 3], index=Categorical(["baz", "bar"])) + + res = pd.concat([a, b]) + exp = Series([1, 2, 4, 3], index=["foo", "bar", "baz", "bar"]) + + tm.assert_equal(res, exp) + + def test_categorical_missing_from_one_frame(self): + # GH 25412 + df1 = DataFrame({"f1": [1, 2, 3]}) + df2 = DataFrame({"f1": [2, 3, 1], "f2": Series([4, 4, 4]).astype("category")}) + result = pd.concat([df1, df2], sort=True) + dtype = CategoricalDtype([4]) + expected = DataFrame( + { + "f1": [1, 2, 3, 2, 3, 1], + "f2": Categorical.from_codes([-1, -1, -1, 0, 0, 0], dtype=dtype), + }, + index=[0, 1, 2, 0, 1, 2], + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_concat.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_concat.py new file mode 100644 index 0000000..a7b3c77 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_concat.py @@ -0,0 +1,746 @@ +from collections import ( + abc, + deque, +) +from decimal import Decimal +from warnings import ( + catch_warnings, + simplefilter, +) + +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + PeriodIndex, + Series, + concat, + date_range, +) +import pandas._testing as tm +from pandas.core.arrays import SparseArray +from pandas.core.construction import create_series_with_explicit_dtype +from pandas.tests.extension.decimal import to_decimal + + +class TestConcatenate: + def test_append_concat(self): + # GH#1815 + d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC") + d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC") + + s1 = Series(np.random.randn(10), d1) + s2 = Series(np.random.randn(10), d2) + + s1 = s1.to_period() + s2 = s2.to_period() + + # drops index + result = concat([s1, s2]) + assert isinstance(result.index, PeriodIndex) + assert result.index[0] == s1.index[0] + + def test_concat_copy(self, using_array_manager): + df = DataFrame(np.random.randn(4, 3)) + df2 = DataFrame(np.random.randint(0, 10, size=4).reshape(4, 1)) + df3 = DataFrame({5: "foo"}, index=range(4)) + + # These are actual copies. + result = concat([df, df2, df3], axis=1, copy=True) + + for arr in result._mgr.arrays: + assert arr.base is None + + # These are the same. + result = concat([df, df2, df3], axis=1, copy=False) + + for arr in result._mgr.arrays: + if arr.dtype.kind == "f": + assert arr.base is df._mgr.arrays[0].base + elif arr.dtype.kind in ["i", "u"]: + assert arr.base is df2._mgr.arrays[0].base + elif arr.dtype == object: + if using_array_manager: + # we get the same array object, which has no base + assert arr is df3._mgr.arrays[0] + else: + assert arr.base is not None + + # Float block was consolidated. + df4 = DataFrame(np.random.randn(4, 1)) + result = concat([df, df2, df3, df4], axis=1, copy=False) + for arr in result._mgr.arrays: + if arr.dtype.kind == "f": + if using_array_manager: + # this is a view on some array in either df or df4 + assert any( + np.shares_memory(arr, other) + for other in df._mgr.arrays + df4._mgr.arrays + ) + else: + # the block was consolidated, so we got a copy anyway + assert arr.base is None + elif arr.dtype.kind in ["i", "u"]: + assert arr.base is df2._mgr.arrays[0].base + elif arr.dtype == object: + # this is a view on df3 + assert any(np.shares_memory(arr, other) for other in df3._mgr.arrays) + + def test_concat_with_group_keys(self): + # axis=0 + df = DataFrame(np.random.randn(3, 4)) + df2 = DataFrame(np.random.randn(4, 4)) + + result = concat([df, df2], keys=[0, 1]) + exp_index = MultiIndex.from_arrays( + [[0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 0, 1, 2, 3]] + ) + expected = DataFrame(np.r_[df.values, df2.values], index=exp_index) + tm.assert_frame_equal(result, expected) + + result = concat([df, df], keys=[0, 1]) + exp_index2 = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) + expected = DataFrame(np.r_[df.values, df.values], index=exp_index2) + tm.assert_frame_equal(result, expected) + + # axis=1 + df = DataFrame(np.random.randn(4, 3)) + df2 = DataFrame(np.random.randn(4, 4)) + + result = concat([df, df2], keys=[0, 1], axis=1) + expected = DataFrame(np.c_[df.values, df2.values], columns=exp_index) + tm.assert_frame_equal(result, expected) + + result = concat([df, df], keys=[0, 1], axis=1) + expected = DataFrame(np.c_[df.values, df.values], columns=exp_index2) + tm.assert_frame_equal(result, expected) + + def test_concat_keys_specific_levels(self): + df = DataFrame(np.random.randn(10, 4)) + pieces = [df.iloc[:, [0, 1]], df.iloc[:, [2]], df.iloc[:, [3]]] + level = ["three", "two", "one", "zero"] + result = concat( + pieces, + axis=1, + keys=["one", "two", "three"], + levels=[level], + names=["group_key"], + ) + + tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key")) + tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3])) + + assert result.columns.names == ["group_key", None] + + @pytest.mark.parametrize("mapping", ["mapping", "dict"]) + def test_concat_mapping(self, mapping, non_dict_mapping_subclass): + constructor = dict if mapping == "dict" else non_dict_mapping_subclass + frames = constructor( + { + "foo": DataFrame(np.random.randn(4, 3)), + "bar": DataFrame(np.random.randn(4, 3)), + "baz": DataFrame(np.random.randn(4, 3)), + "qux": DataFrame(np.random.randn(4, 3)), + } + ) + + sorted_keys = list(frames.keys()) + + result = concat(frames) + expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys) + tm.assert_frame_equal(result, expected) + + result = concat(frames, axis=1) + expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys, axis=1) + tm.assert_frame_equal(result, expected) + + keys = ["baz", "foo", "bar"] + result = concat(frames, keys=keys) + expected = concat([frames[k] for k in keys], keys=keys) + tm.assert_frame_equal(result, expected) + + def test_concat_keys_and_levels(self): + df = DataFrame(np.random.randn(1, 3)) + df2 = DataFrame(np.random.randn(1, 4)) + + levels = [["foo", "baz"], ["one", "two"]] + names = ["first", "second"] + result = concat( + [df, df2, df, df2], + keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")], + levels=levels, + names=names, + ) + expected = concat([df, df2, df, df2]) + exp_index = MultiIndex( + levels=levels + [[0]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1], [0, 0, 0, 0]], + names=names + [None], + ) + expected.index = exp_index + + tm.assert_frame_equal(result, expected) + + # no names + result = concat( + [df, df2, df, df2], + keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")], + levels=levels, + ) + assert result.index.names == (None,) * 3 + + # no levels + result = concat( + [df, df2, df, df2], + keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")], + names=["first", "second"], + ) + assert result.index.names == ("first", "second", None) + tm.assert_index_equal( + result.index.levels[0], Index(["baz", "foo"], name="first") + ) + + def test_concat_keys_levels_no_overlap(self): + # GH #1406 + df = DataFrame(np.random.randn(1, 3), index=["a"]) + df2 = DataFrame(np.random.randn(1, 4), index=["b"]) + + msg = "Values not found in passed level" + with pytest.raises(ValueError, match=msg): + concat([df, df], keys=["one", "two"], levels=[["foo", "bar", "baz"]]) + + msg = "Key one not in level" + with pytest.raises(ValueError, match=msg): + concat([df, df2], keys=["one", "two"], levels=[["foo", "bar", "baz"]]) + + def test_crossed_dtypes_weird_corner(self): + columns = ["A", "B", "C", "D"] + df1 = DataFrame( + { + "A": np.array([1, 2, 3, 4], dtype="f8"), + "B": np.array([1, 2, 3, 4], dtype="i8"), + "C": np.array([1, 2, 3, 4], dtype="f8"), + "D": np.array([1, 2, 3, 4], dtype="i8"), + }, + columns=columns, + ) + + df2 = DataFrame( + { + "A": np.array([1, 2, 3, 4], dtype="i8"), + "B": np.array([1, 2, 3, 4], dtype="f8"), + "C": np.array([1, 2, 3, 4], dtype="i8"), + "D": np.array([1, 2, 3, 4], dtype="f8"), + }, + columns=columns, + ) + + appended = concat([df1, df2], ignore_index=True) + expected = DataFrame( + np.concatenate([df1.values, df2.values], axis=0), columns=columns + ) + tm.assert_frame_equal(appended, expected) + + df = DataFrame(np.random.randn(1, 3), index=["a"]) + df2 = DataFrame(np.random.randn(1, 4), index=["b"]) + result = concat([df, df2], keys=["one", "two"], names=["first", "second"]) + assert result.index.names == ("first", "second") + + def test_with_mixed_tuples(self, sort): + # 10697 + # columns have mixed tuples, so handle properly + df1 = DataFrame({"A": "foo", ("B", 1): "bar"}, index=range(2)) + df2 = DataFrame({"B": "foo", ("B", 1): "bar"}, index=range(2)) + + # it works + concat([df1, df2], sort=sort) + + def test_concat_mixed_objs(self): + + # concat mixed series/frames + # G2385 + + # axis 1 + index = date_range("01-Jan-2013", periods=10, freq="H") + arr = np.arange(10, dtype="int64") + s1 = Series(arr, index=index) + s2 = Series(arr, index=index) + df = DataFrame(arr.reshape(-1, 1), index=index) + + expected = DataFrame( + np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 0] + ) + result = concat([df, df], axis=1) + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 1] + ) + result = concat([s1, s2], axis=1) + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2] + ) + result = concat([s1, s2, s1], axis=1) + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + np.repeat(arr, 5).reshape(-1, 5), index=index, columns=[0, 0, 1, 2, 3] + ) + result = concat([s1, df, s2, s2, s1], axis=1) + tm.assert_frame_equal(result, expected) + + # with names + s1.name = "foo" + expected = DataFrame( + np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, 0] + ) + result = concat([s1, df, s2], axis=1) + tm.assert_frame_equal(result, expected) + + s2.name = "bar" + expected = DataFrame( + np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, "bar"] + ) + result = concat([s1, df, s2], axis=1) + tm.assert_frame_equal(result, expected) + + # ignore index + expected = DataFrame( + np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2] + ) + result = concat([s1, df, s2], axis=1, ignore_index=True) + tm.assert_frame_equal(result, expected) + + # axis 0 + expected = DataFrame( + np.tile(arr, 3).reshape(-1, 1), index=index.tolist() * 3, columns=[0] + ) + result = concat([s1, df, s2]) + tm.assert_frame_equal(result, expected) + + expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0]) + result = concat([s1, df, s2], ignore_index=True) + tm.assert_frame_equal(result, expected) + + def test_dtype_coerceion(self): + + # 12411 + df = DataFrame({"date": [pd.Timestamp("20130101").tz_localize("UTC"), pd.NaT]}) + + result = concat([df.iloc[[0]], df.iloc[[1]]]) + tm.assert_series_equal(result.dtypes, df.dtypes) + + # 12045 + import datetime + + df = DataFrame( + {"date": [datetime.datetime(2012, 1, 1), datetime.datetime(1012, 1, 2)]} + ) + result = concat([df.iloc[[0]], df.iloc[[1]]]) + tm.assert_series_equal(result.dtypes, df.dtypes) + + # 11594 + df = DataFrame({"text": ["some words"] + [None] * 9}) + result = concat([df.iloc[[0]], df.iloc[[1]]]) + tm.assert_series_equal(result.dtypes, df.dtypes) + + def test_concat_single_with_key(self): + df = DataFrame(np.random.randn(10, 4)) + + result = concat([df], keys=["foo"]) + expected = concat([df, df], keys=["foo", "bar"]) + tm.assert_frame_equal(result, expected[:10]) + + def test_concat_no_items_raises(self): + with pytest.raises(ValueError, match="No objects to concatenate"): + concat([]) + + def test_concat_exclude_none(self): + df = DataFrame(np.random.randn(10, 4)) + + pieces = [df[:5], None, None, df[5:]] + result = concat(pieces) + tm.assert_frame_equal(result, df) + with pytest.raises(ValueError, match="All objects passed were None"): + concat([None, None]) + + def test_concat_keys_with_none(self): + # #1649 + df0 = DataFrame([[10, 20, 30], [10, 20, 30], [10, 20, 30]]) + + result = concat({"a": None, "b": df0, "c": df0[:2], "d": df0[:1], "e": df0}) + expected = concat({"b": df0, "c": df0[:2], "d": df0[:1], "e": df0}) + tm.assert_frame_equal(result, expected) + + result = concat( + [None, df0, df0[:2], df0[:1], df0], keys=["a", "b", "c", "d", "e"] + ) + expected = concat([df0, df0[:2], df0[:1], df0], keys=["b", "c", "d", "e"]) + tm.assert_frame_equal(result, expected) + + def test_concat_bug_1719(self): + ts1 = tm.makeTimeSeries() + ts2 = tm.makeTimeSeries()[::2] + + # to join with union + # these two are of different length! + left = concat([ts1, ts2], join="outer", axis=1) + right = concat([ts2, ts1], join="outer", axis=1) + + assert len(left) == len(right) + + def test_concat_bug_2972(self): + ts0 = Series(np.zeros(5)) + ts1 = Series(np.ones(5)) + ts0.name = ts1.name = "same name" + result = concat([ts0, ts1], axis=1) + + expected = DataFrame({0: ts0, 1: ts1}) + expected.columns = ["same name", "same name"] + tm.assert_frame_equal(result, expected) + + def test_concat_bug_3602(self): + + # GH 3602, duplicate columns + df1 = DataFrame( + { + "firmNo": [0, 0, 0, 0], + "prc": [6, 6, 6, 6], + "stringvar": ["rrr", "rrr", "rrr", "rrr"], + } + ) + df2 = DataFrame( + {"C": [9, 10, 11, 12], "misc": [1, 2, 3, 4], "prc": [6, 6, 6, 6]} + ) + expected = DataFrame( + [ + [0, 6, "rrr", 9, 1, 6], + [0, 6, "rrr", 10, 2, 6], + [0, 6, "rrr", 11, 3, 6], + [0, 6, "rrr", 12, 4, 6], + ] + ) + expected.columns = ["firmNo", "prc", "stringvar", "C", "misc", "prc"] + + result = concat([df1, df2], axis=1) + tm.assert_frame_equal(result, expected) + + def test_concat_iterables(self): + # GH8645 check concat works with tuples, list, generators, and weird + # stuff like deque and custom iterables + df1 = DataFrame([1, 2, 3]) + df2 = DataFrame([4, 5, 6]) + expected = DataFrame([1, 2, 3, 4, 5, 6]) + tm.assert_frame_equal(concat((df1, df2), ignore_index=True), expected) + tm.assert_frame_equal(concat([df1, df2], ignore_index=True), expected) + tm.assert_frame_equal( + concat((df for df in (df1, df2)), ignore_index=True), expected + ) + tm.assert_frame_equal(concat(deque((df1, df2)), ignore_index=True), expected) + + class CustomIterator1: + def __len__(self) -> int: + return 2 + + def __getitem__(self, index): + try: + return {0: df1, 1: df2}[index] + except KeyError as err: + raise IndexError from err + + tm.assert_frame_equal(concat(CustomIterator1(), ignore_index=True), expected) + + class CustomIterator2(abc.Iterable): + def __iter__(self): + yield df1 + yield df2 + + tm.assert_frame_equal(concat(CustomIterator2(), ignore_index=True), expected) + + def test_concat_order(self): + # GH 17344 + dfs = [DataFrame(index=range(3), columns=["a", 1, None])] + dfs += [DataFrame(index=range(3), columns=[None, 1, "a"]) for i in range(100)] + + result = concat(dfs, sort=True).columns + expected = dfs[0].columns + tm.assert_index_equal(result, expected) + + def test_concat_different_extension_dtypes_upcasts(self): + a = Series(pd.array([1, 2], dtype="Int64")) + b = Series(to_decimal([1, 2])) + + result = concat([a, b], ignore_index=True) + expected = Series([1, 2, Decimal(1), Decimal(2)], dtype=object) + tm.assert_series_equal(result, expected) + + def test_concat_ordered_dict(self): + # GH 21510 + expected = concat( + [Series(range(3)), Series(range(4))], keys=["First", "Another"] + ) + result = concat({"First": Series(range(3)), "Another": Series(range(4))}) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("pdt", [Series, DataFrame]) +@pytest.mark.parametrize("dt", np.sctypes["float"]) +def test_concat_no_unnecessary_upcast(dt, pdt): + # GH 13247 + dims = pdt(dtype=object).ndim + + dfs = [ + pdt(np.array([1], dtype=dt, ndmin=dims)), + pdt(np.array([np.nan], dtype=dt, ndmin=dims)), + pdt(np.array([5], dtype=dt, ndmin=dims)), + ] + x = concat(dfs) + assert x.values.dtype == dt + + +@pytest.mark.parametrize("pdt", [create_series_with_explicit_dtype, DataFrame]) +@pytest.mark.parametrize("dt", np.sctypes["int"]) +def test_concat_will_upcast(dt, pdt): + with catch_warnings(record=True): + dims = pdt().ndim + dfs = [ + pdt(np.array([1], dtype=dt, ndmin=dims)), + pdt(np.array([np.nan], ndmin=dims)), + pdt(np.array([5], dtype=dt, ndmin=dims)), + ] + x = concat(dfs) + assert x.values.dtype == "float64" + + +def test_concat_empty_and_non_empty_frame_regression(): + # GH 18178 regression test + df1 = DataFrame({"foo": [1]}) + df2 = DataFrame({"foo": []}) + expected = DataFrame({"foo": [1.0]}) + result = concat([df1, df2]) + tm.assert_frame_equal(result, expected) + + +def test_concat_sparse(): + # GH 23557 + a = Series(SparseArray([0, 1, 2])) + expected = DataFrame(data=[[0, 0], [1, 1], [2, 2]]).astype( + pd.SparseDtype(np.int64, 0) + ) + result = concat([a, a], axis=1) + tm.assert_frame_equal(result, expected) + + +def test_concat_dense_sparse(): + # GH 30668 + a = Series(pd.arrays.SparseArray([1, None]), dtype=float) + b = Series([1], dtype=float) + expected = Series(data=[1, None, 1], index=[0, 1, 0]).astype( + pd.SparseDtype(np.float64, None) + ) + result = concat([a, b], axis=0) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("keys", [["e", "f", "f"], ["f", "e", "f"]]) +def test_duplicate_keys(keys): + # GH 33654 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + s1 = Series([7, 8, 9], name="c") + s2 = Series([10, 11, 12], name="d") + result = concat([df, s1, s2], axis=1, keys=keys) + expected_values = [[1, 4, 7, 10], [2, 5, 8, 11], [3, 6, 9, 12]] + expected_columns = MultiIndex.from_tuples( + [(keys[0], "a"), (keys[0], "b"), (keys[1], "c"), (keys[2], "d")] + ) + expected = DataFrame(expected_values, columns=expected_columns) + tm.assert_frame_equal(result, expected) + + +def test_duplicate_keys_same_frame(): + # GH 43595 + keys = ["e", "e"] + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + result = concat([df, df], axis=1, keys=keys) + expected_values = [[1, 4, 1, 4], [2, 5, 2, 5], [3, 6, 3, 6]] + expected_columns = MultiIndex.from_tuples( + [(keys[0], "a"), (keys[0], "b"), (keys[1], "a"), (keys[1], "b")] + ) + expected = DataFrame(expected_values, columns=expected_columns) + with catch_warnings(): + # result.columns not sorted, resulting in performance warning + simplefilter("ignore", PerformanceWarning) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "obj", + [ + tm.SubclassedDataFrame({"A": np.arange(0, 10)}), + tm.SubclassedSeries(np.arange(0, 10), name="A"), + ], +) +def test_concat_preserves_subclass(obj): + # GH28330 -- preserve subclass + + result = concat([obj, obj]) + assert isinstance(result, type(obj)) + + +def test_concat_frame_axis0_extension_dtypes(): + # preserve extension dtype (through common_dtype mechanism) + df1 = DataFrame({"a": pd.array([1, 2, 3], dtype="Int64")}) + df2 = DataFrame({"a": np.array([4, 5, 6])}) + + result = concat([df1, df2], ignore_index=True) + expected = DataFrame({"a": [1, 2, 3, 4, 5, 6]}, dtype="Int64") + tm.assert_frame_equal(result, expected) + + result = concat([df2, df1], ignore_index=True) + expected = DataFrame({"a": [4, 5, 6, 1, 2, 3]}, dtype="Int64") + tm.assert_frame_equal(result, expected) + + +def test_concat_preserves_extension_int64_dtype(): + # GH 24768 + df_a = DataFrame({"a": [-1]}, dtype="Int64") + df_b = DataFrame({"b": [1]}, dtype="Int64") + result = concat([df_a, df_b], ignore_index=True) + expected = DataFrame({"a": [-1, None], "b": [None, 1]}, dtype="Int64") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "dtype1,dtype2,expected_dtype", + [ + ("bool", "bool", "bool"), + ("boolean", "bool", "boolean"), + ("bool", "boolean", "boolean"), + ("boolean", "boolean", "boolean"), + ], +) +def test_concat_bool_types(dtype1, dtype2, expected_dtype): + # GH 42800 + ser1 = Series([True, False], dtype=dtype1) + ser2 = Series([False, True], dtype=dtype2) + result = concat([ser1, ser2], ignore_index=True) + expected = Series([True, False, False, True], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + ("keys", "integrity"), + [ + (["red"] * 3, True), + (["red"] * 3, False), + (["red", "blue", "red"], False), + (["red", "blue", "red"], True), + ], +) +def test_concat_repeated_keys(keys, integrity): + # GH: 20816 + series_list = [Series({"a": 1}), Series({"b": 2}), Series({"c": 3})] + result = concat(series_list, keys=keys, verify_integrity=integrity) + tuples = list(zip(keys, ["a", "b", "c"])) + expected = Series([1, 2, 3], index=MultiIndex.from_tuples(tuples)) + tm.assert_series_equal(result, expected) + + +def test_concat_null_object_with_dti(): + # GH#40841 + dti = pd.DatetimeIndex( + ["2021-04-08 21:21:14+00:00"], dtype="datetime64[ns, UTC]", name="Time (UTC)" + ) + right = DataFrame(data={"C": [0.5274]}, index=dti) + + idx = Index([None], dtype="object", name="Maybe Time (UTC)") + left = DataFrame(data={"A": [None], "B": [np.nan]}, index=idx) + + result = concat([left, right], axis="columns") + + exp_index = Index([None, dti[0]], dtype=object) + expected = DataFrame( + {"A": [None, None], "B": [np.nan, np.nan], "C": [np.nan, 0.5274]}, + index=exp_index, + ) + tm.assert_frame_equal(result, expected) + + +def test_concat_multiindex_with_empty_rangeindex(): + # GH#41234 + mi = MultiIndex.from_tuples([("B", 1), ("C", 1)]) + df1 = DataFrame([[1, 2]], columns=mi) + df2 = DataFrame(index=[1], columns=pd.RangeIndex(0)) + + result = concat([df1, df2]) + expected = DataFrame([[1, 2], [np.nan, np.nan]], columns=mi) + tm.assert_frame_equal(result, expected) + + +def test_concat_posargs_deprecation(): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame([[1, 2, 3]], index=["a"]) + df2 = DataFrame([[4, 5, 6]], index=["b"]) + + msg = ( + "In a future version of pandas all arguments of concat " + "except for the argument 'objs' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = concat([df, df2], 0) + expected = DataFrame([[1, 2, 3], [4, 5, 6]], index=["a", "b"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [ + Series(data=[1, 2]), + DataFrame( + data={ + "col1": [1, 2], + } + ), + DataFrame(dtype=float), + Series(dtype=float), + ], +) +def test_concat_drop_attrs(data): + # GH#41828 + df1 = data.copy() + df1.attrs = {1: 1} + df2 = data.copy() + df2.attrs = {1: 2} + df = concat([df1, df2]) + assert len(df.attrs) == 0 + + +@pytest.mark.parametrize( + "data", + [ + Series(data=[1, 2]), + DataFrame( + data={ + "col1": [1, 2], + } + ), + DataFrame(dtype=float), + Series(dtype=float), + ], +) +def test_concat_retain_attrs(data): + # GH#41828 + df1 = data.copy() + df1.attrs = {1: 1} + df2 = data.copy() + df2.attrs = {1: 1} + df = concat([df1, df2]) + assert df.attrs[1] == 1 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_dataframe.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_dataframe.py new file mode 100644 index 0000000..0176392 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_dataframe.py @@ -0,0 +1,207 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + concat, +) +import pandas._testing as tm + + +class TestDataFrameConcat: + def test_concat_multiple_frames_dtypes(self): + + # GH#2759 + df1 = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64) + df2 = DataFrame(data=np.ones((10, 2)), dtype=np.float32) + results = concat((df1, df2), axis=1).dtypes + expected = Series( + [np.dtype("float64")] * 2 + [np.dtype("float32")] * 2, + index=["foo", "bar", 0, 1], + ) + tm.assert_series_equal(results, expected) + + def test_concat_tuple_keys(self): + # GH#14438 + df1 = DataFrame(np.ones((2, 2)), columns=list("AB")) + df2 = DataFrame(np.ones((3, 2)) * 2, columns=list("AB")) + results = concat((df1, df2), keys=[("bee", "bah"), ("bee", "boo")]) + expected = DataFrame( + { + "A": { + ("bee", "bah", 0): 1.0, + ("bee", "bah", 1): 1.0, + ("bee", "boo", 0): 2.0, + ("bee", "boo", 1): 2.0, + ("bee", "boo", 2): 2.0, + }, + "B": { + ("bee", "bah", 0): 1.0, + ("bee", "bah", 1): 1.0, + ("bee", "boo", 0): 2.0, + ("bee", "boo", 1): 2.0, + ("bee", "boo", 2): 2.0, + }, + } + ) + tm.assert_frame_equal(results, expected) + + def test_concat_named_keys(self): + # GH#14252 + df = DataFrame({"foo": [1, 2], "bar": [0.1, 0.2]}) + index = Index(["a", "b"], name="baz") + concatted_named_from_keys = concat([df, df], keys=index) + expected_named = DataFrame( + {"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]}, + index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=["baz", None]), + ) + tm.assert_frame_equal(concatted_named_from_keys, expected_named) + + index_no_name = Index(["a", "b"], name=None) + concatted_named_from_names = concat([df, df], keys=index_no_name, names=["baz"]) + tm.assert_frame_equal(concatted_named_from_names, expected_named) + + concatted_unnamed = concat([df, df], keys=index_no_name) + expected_unnamed = DataFrame( + {"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]}, + index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=[None, None]), + ) + tm.assert_frame_equal(concatted_unnamed, expected_unnamed) + + def test_concat_axis_parameter(self): + # GH#14369 + df1 = DataFrame({"A": [0.1, 0.2]}, index=range(2)) + df2 = DataFrame({"A": [0.3, 0.4]}, index=range(2)) + + # Index/row/0 DataFrame + expected_index = DataFrame({"A": [0.1, 0.2, 0.3, 0.4]}, index=[0, 1, 0, 1]) + + concatted_index = concat([df1, df2], axis="index") + tm.assert_frame_equal(concatted_index, expected_index) + + concatted_row = concat([df1, df2], axis="rows") + tm.assert_frame_equal(concatted_row, expected_index) + + concatted_0 = concat([df1, df2], axis=0) + tm.assert_frame_equal(concatted_0, expected_index) + + # Columns/1 DataFrame + expected_columns = DataFrame( + [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=["A", "A"] + ) + + concatted_columns = concat([df1, df2], axis="columns") + tm.assert_frame_equal(concatted_columns, expected_columns) + + concatted_1 = concat([df1, df2], axis=1) + tm.assert_frame_equal(concatted_1, expected_columns) + + series1 = Series([0.1, 0.2]) + series2 = Series([0.3, 0.4]) + + # Index/row/0 Series + expected_index_series = Series([0.1, 0.2, 0.3, 0.4], index=[0, 1, 0, 1]) + + concatted_index_series = concat([series1, series2], axis="index") + tm.assert_series_equal(concatted_index_series, expected_index_series) + + concatted_row_series = concat([series1, series2], axis="rows") + tm.assert_series_equal(concatted_row_series, expected_index_series) + + concatted_0_series = concat([series1, series2], axis=0) + tm.assert_series_equal(concatted_0_series, expected_index_series) + + # Columns/1 Series + expected_columns_series = DataFrame( + [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=[0, 1] + ) + + concatted_columns_series = concat([series1, series2], axis="columns") + tm.assert_frame_equal(concatted_columns_series, expected_columns_series) + + concatted_1_series = concat([series1, series2], axis=1) + tm.assert_frame_equal(concatted_1_series, expected_columns_series) + + # Testing ValueError + with pytest.raises(ValueError, match="No axis named"): + concat([series1, series2], axis="something") + + def test_concat_numerical_names(self): + # GH#15262, GH#12223 + df = DataFrame( + {"col": range(9)}, + dtype="int32", + index=( + pd.MultiIndex.from_product( + [["A0", "A1", "A2"], ["B0", "B1", "B2"]], names=[1, 2] + ) + ), + ) + result = concat((df.iloc[:2, :], df.iloc[-2:, :])) + expected = DataFrame( + {"col": [0, 1, 7, 8]}, + dtype="int32", + index=pd.MultiIndex.from_tuples( + [("A0", "B0"), ("A0", "B1"), ("A2", "B1"), ("A2", "B2")], names=[1, 2] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_concat_astype_dup_col(self): + # GH#23049 + df = DataFrame([{"a": "b"}]) + df = concat([df, df], axis=1) + + result = df.astype("category") + expected = DataFrame( + np.array(["b", "b"]).reshape(1, 2), columns=["a", "a"] + ).astype("category") + tm.assert_frame_equal(result, expected) + + def test_concat_dataframe_keys_bug(self, sort): + t1 = DataFrame( + {"value": Series([1, 2, 3], index=Index(["a", "b", "c"], name="id"))} + ) + t2 = DataFrame({"value": Series([7, 8], index=Index(["a", "b"], name="id"))}) + + # it works + result = concat([t1, t2], axis=1, keys=["t1", "t2"], sort=sort) + assert list(result.columns) == [("t1", "value"), ("t2", "value")] + + def test_concat_bool_with_int(self): + # GH#42092 we may want to change this to return object, but that + # would need a deprecation + df1 = DataFrame(Series([True, False, True, True], dtype="bool")) + df2 = DataFrame(Series([1, 0, 1], dtype="int64")) + + result = concat([df1, df2]) + expected = concat([df1.astype("int64"), df2]) + tm.assert_frame_equal(result, expected) + + def test_concat_duplicates_in_index_with_keys(self): + # GH#42651 + index = [1, 1, 3] + data = [1, 2, 3] + + df = DataFrame(data=data, index=index) + result = concat([df], keys=["A"], names=["ID", "date"]) + mi = pd.MultiIndex.from_product([["A"], index], names=["ID", "date"]) + expected = DataFrame(data=data, index=mi) + tm.assert_frame_equal(result, expected) + tm.assert_index_equal(result.index.levels[1], Index([1, 3], name="date")) + + @pytest.mark.parametrize("ignore_index", [True, False]) + @pytest.mark.parametrize("order", ["C", "F"]) + @pytest.mark.parametrize("axis", [0, 1]) + def test_concat_copies(self, axis, order, ignore_index): + # based on asv ConcatDataFrames + df = DataFrame(np.zeros((10000, 200), dtype=np.float32, order=order)) + + res = concat([df] * 5, axis=axis, ignore_index=ignore_index, copy=True) + + for arr in res._iter_column_arrays(): + for arr2 in df._iter_column_arrays(): + assert not np.shares_memory(arr, arr2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_datetimes.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_datetimes.py new file mode 100644 index 0000000..1af54a1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_datetimes.py @@ -0,0 +1,543 @@ +import datetime as dt +from datetime import datetime + +import dateutil +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + Timestamp, + concat, + date_range, + to_timedelta, +) +import pandas._testing as tm + + +class TestDatetimeConcat: + def test_concat_datetime64_block(self): + from pandas.core.indexes.datetimes import date_range + + rng = date_range("1/1/2000", periods=10) + + df = DataFrame({"time": rng}) + + result = concat([df, df]) + assert (result.iloc[:10]["time"] == rng).all() + assert (result.iloc[10:]["time"] == rng).all() + + def test_concat_datetime_datetime64_frame(self): + # GH#2624 + rows = [] + rows.append([datetime(2010, 1, 1), 1]) + rows.append([datetime(2010, 1, 2), "hi"]) + + df2_obj = DataFrame.from_records(rows, columns=["date", "test"]) + + ind = date_range(start="2000/1/1", freq="D", periods=10) + df1 = DataFrame({"date": ind, "test": range(10)}) + + # it works! + concat([df1, df2_obj]) + + def test_concat_datetime_timezone(self): + # GH 18523 + idx1 = date_range("2011-01-01", periods=3, freq="H", tz="Europe/Paris") + idx2 = date_range(start=idx1[0], end=idx1[-1], freq="H") + df1 = DataFrame({"a": [1, 2, 3]}, index=idx1) + df2 = DataFrame({"b": [1, 2, 3]}, index=idx2) + result = concat([df1, df2], axis=1) + + exp_idx = ( + DatetimeIndex( + [ + "2011-01-01 00:00:00+01:00", + "2011-01-01 01:00:00+01:00", + "2011-01-01 02:00:00+01:00", + ], + freq="H", + ) + .tz_convert("UTC") + .tz_convert("Europe/Paris") + ) + + expected = DataFrame( + [[1, 1], [2, 2], [3, 3]], index=exp_idx, columns=["a", "b"] + ) + + tm.assert_frame_equal(result, expected) + + idx3 = date_range("2011-01-01", periods=3, freq="H", tz="Asia/Tokyo") + df3 = DataFrame({"b": [1, 2, 3]}, index=idx3) + result = concat([df1, df3], axis=1) + + exp_idx = DatetimeIndex( + [ + "2010-12-31 15:00:00+00:00", + "2010-12-31 16:00:00+00:00", + "2010-12-31 17:00:00+00:00", + "2010-12-31 23:00:00+00:00", + "2011-01-01 00:00:00+00:00", + "2011-01-01 01:00:00+00:00", + ] + ) + + expected = DataFrame( + [ + [np.nan, 1], + [np.nan, 2], + [np.nan, 3], + [1, np.nan], + [2, np.nan], + [3, np.nan], + ], + index=exp_idx, + columns=["a", "b"], + ) + + tm.assert_frame_equal(result, expected) + + # GH 13783: Concat after resample + result = concat([df1.resample("H").mean(), df2.resample("H").mean()], sort=True) + expected = DataFrame( + {"a": [1, 2, 3] + [np.nan] * 3, "b": [np.nan] * 3 + [1, 2, 3]}, + index=idx1.append(idx1), + ) + tm.assert_frame_equal(result, expected) + + def test_concat_datetimeindex_freq(self): + # GH 3232 + # Monotonic index result + dr = date_range("01-Jan-2013", periods=100, freq="50L", tz="UTC") + data = list(range(100)) + expected = DataFrame(data, index=dr) + result = concat([expected[:50], expected[50:]]) + tm.assert_frame_equal(result, expected) + + # Non-monotonic index result + result = concat([expected[50:], expected[:50]]) + expected = DataFrame(data[50:] + data[:50], index=dr[50:].append(dr[:50])) + expected.index._data.freq = None + tm.assert_frame_equal(result, expected) + + def test_concat_multiindex_datetime_object_index(self): + # https://github.com/pandas-dev/pandas/issues/11058 + idx = Index( + [dt.date(2013, 1, 1), dt.date(2014, 1, 1), dt.date(2015, 1, 1)], + dtype="object", + ) + + s = Series( + ["a", "b"], + index=MultiIndex.from_arrays( + [ + [1, 2], + idx[:-1], + ], + names=["first", "second"], + ), + ) + s2 = Series( + ["a", "b"], + index=MultiIndex.from_arrays( + [[1, 2], idx[::2]], + names=["first", "second"], + ), + ) + mi = MultiIndex.from_arrays( + [[1, 2, 2], idx], + names=["first", "second"], + ) + assert mi.levels[1].dtype == object + + expected = DataFrame( + [["a", "a"], ["b", np.nan], [np.nan, "b"]], + index=mi, + ) + result = concat([s, s2], axis=1) + tm.assert_frame_equal(result, expected) + + def test_concat_NaT_series(self): + # GH 11693 + # test for merging NaT series with datetime series. + x = Series( + date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="US/Eastern") + ) + y = Series(pd.NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]") + expected = Series([x[0], x[1], pd.NaT, pd.NaT]) + + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # all NaT with tz + expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns, US/Eastern]") + result = concat([y, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # without tz + x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h")) + y = Series(date_range("20151124 10:00", "20151124 11:00", freq="1h")) + y[:] = pd.NaT + expected = Series([x[0], x[1], pd.NaT, pd.NaT]) + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # all NaT without tz + x[:] = pd.NaT + expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns]") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_concat_NaT_dataframes(self, tz): + # GH 12396 + + first = DataFrame([[pd.NaT], [pd.NaT]]) + first = first.apply(lambda x: x.dt.tz_localize(tz)) + second = DataFrame( + [[Timestamp("2015/01/01", tz=tz)], [Timestamp("2016/01/01", tz=tz)]], + index=[2, 3], + ) + expected = DataFrame( + [ + pd.NaT, + pd.NaT, + Timestamp("2015/01/01", tz=tz), + Timestamp("2016/01/01", tz=tz), + ] + ) + + result = concat([first, second], axis=0) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz1", [None, "UTC"]) + @pytest.mark.parametrize("tz2", [None, "UTC"]) + @pytest.mark.parametrize("s", [pd.NaT, Timestamp("20150101")]) + def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, s): + # GH 12396 + + # tz-naive + first = DataFrame([[pd.NaT], [pd.NaT]]).apply(lambda x: x.dt.tz_localize(tz1)) + second = DataFrame([s]).apply(lambda x: x.dt.tz_localize(tz2)) + + result = concat([first, second], axis=0) + expected = DataFrame(Series([pd.NaT, pd.NaT, s], index=[0, 1, 0])) + expected = expected.apply(lambda x: x.dt.tz_localize(tz2)) + if tz1 != tz2: + expected = expected.astype(object) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz1", [None, "UTC"]) + @pytest.mark.parametrize("tz2", [None, "UTC"]) + def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2): + # GH 12396 + + first = DataFrame(Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1)) + second = DataFrame(Series([pd.NaT]).dt.tz_localize(tz2), columns=[1]) + expected = DataFrame( + { + 0: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1), + 1: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz2), + } + ) + result = concat([first, second], axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz1", [None, "UTC"]) + @pytest.mark.parametrize("tz2", [None, "UTC"]) + def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2): + # GH 12396 + + # tz-naive + first = Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1) + second = DataFrame( + [ + [Timestamp("2015/01/01", tz=tz2)], + [Timestamp("2016/01/01", tz=tz2)], + ], + index=[2, 3], + ) + + expected = DataFrame( + [ + pd.NaT, + pd.NaT, + Timestamp("2015/01/01", tz=tz2), + Timestamp("2016/01/01", tz=tz2), + ] + ) + if tz1 != tz2: + expected = expected.astype(object) + + result = concat([first, second]) + tm.assert_frame_equal(result, expected) + + +class TestTimezoneConcat: + def test_concat_tz_series(self): + # gh-11755: tz and no tz + x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC")) + y = Series(date_range("2012-01-01", "2012-01-02")) + expected = Series([x[0], x[1], y[0], y[1]], dtype="object") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # gh-11887: concat tz and object + x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC")) + y = Series(["a", "b"]) + expected = Series([x[0], x[1], y[0], y[1]], dtype="object") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # see gh-12217 and gh-12306 + # Concatenating two UTC times + first = DataFrame([[datetime(2016, 1, 1)]]) + first[0] = first[0].dt.tz_localize("UTC") + + second = DataFrame([[datetime(2016, 1, 2)]]) + second[0] = second[0].dt.tz_localize("UTC") + + result = concat([first, second]) + assert result[0].dtype == "datetime64[ns, UTC]" + + # Concatenating two London times + first = DataFrame([[datetime(2016, 1, 1)]]) + first[0] = first[0].dt.tz_localize("Europe/London") + + second = DataFrame([[datetime(2016, 1, 2)]]) + second[0] = second[0].dt.tz_localize("Europe/London") + + result = concat([first, second]) + assert result[0].dtype == "datetime64[ns, Europe/London]" + + # Concatenating 2+1 London times + first = DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]]) + first[0] = first[0].dt.tz_localize("Europe/London") + + second = DataFrame([[datetime(2016, 1, 3)]]) + second[0] = second[0].dt.tz_localize("Europe/London") + + result = concat([first, second]) + assert result[0].dtype == "datetime64[ns, Europe/London]" + + # Concat'ing 1+2 London times + first = DataFrame([[datetime(2016, 1, 1)]]) + first[0] = first[0].dt.tz_localize("Europe/London") + + second = DataFrame([[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]]) + second[0] = second[0].dt.tz_localize("Europe/London") + + result = concat([first, second]) + assert result[0].dtype == "datetime64[ns, Europe/London]" + + def test_concat_tz_series_tzlocal(self): + # see gh-13583 + x = [ + Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()), + Timestamp("2011-02-01", tz=dateutil.tz.tzlocal()), + ] + y = [ + Timestamp("2012-01-01", tz=dateutil.tz.tzlocal()), + Timestamp("2012-02-01", tz=dateutil.tz.tzlocal()), + ] + + result = concat([Series(x), Series(y)], ignore_index=True) + tm.assert_series_equal(result, Series(x + y)) + assert result.dtype == "datetime64[ns, tzlocal()]" + + def test_concat_tz_series_with_datetimelike(self): + # see gh-12620: tz and timedelta + x = [ + Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-02-01", tz="US/Eastern"), + ] + y = [pd.Timedelta("1 day"), pd.Timedelta("2 day")] + result = concat([Series(x), Series(y)], ignore_index=True) + tm.assert_series_equal(result, Series(x + y, dtype="object")) + + # tz and period + y = [pd.Period("2011-03", freq="M"), pd.Period("2011-04", freq="M")] + result = concat([Series(x), Series(y)], ignore_index=True) + tm.assert_series_equal(result, Series(x + y, dtype="object")) + + def test_concat_tz_frame(self): + df2 = DataFrame( + { + "A": Timestamp("20130102", tz="US/Eastern"), + "B": Timestamp("20130603", tz="CET"), + }, + index=range(5), + ) + + # concat + df3 = concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) + tm.assert_frame_equal(df2, df3) + + def test_concat_multiple_tzs(self): + # GH#12467 + # combining datetime tz-aware and naive DataFrames + ts1 = Timestamp("2015-01-01", tz=None) + ts2 = Timestamp("2015-01-01", tz="UTC") + ts3 = Timestamp("2015-01-01", tz="EST") + + df1 = DataFrame({"time": [ts1]}) + df2 = DataFrame({"time": [ts2]}) + df3 = DataFrame({"time": [ts3]}) + + results = concat([df1, df2]).reset_index(drop=True) + expected = DataFrame({"time": [ts1, ts2]}, dtype=object) + tm.assert_frame_equal(results, expected) + + results = concat([df1, df3]).reset_index(drop=True) + expected = DataFrame({"time": [ts1, ts3]}, dtype=object) + tm.assert_frame_equal(results, expected) + + results = concat([df2, df3]).reset_index(drop=True) + expected = DataFrame({"time": [ts2, ts3]}) + tm.assert_frame_equal(results, expected) + + @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") + def test_concat_multiindex_with_tz(self): + # GH 6606 + df = DataFrame( + { + "dt": [ + datetime(2014, 1, 1), + datetime(2014, 1, 2), + datetime(2014, 1, 3), + ], + "b": ["A", "B", "C"], + "c": [1, 2, 3], + "d": [4, 5, 6], + } + ) + df["dt"] = df["dt"].apply(lambda d: Timestamp(d, tz="US/Pacific")) + df = df.set_index(["dt", "b"]) + + exp_idx1 = DatetimeIndex( + ["2014-01-01", "2014-01-02", "2014-01-03"] * 2, tz="US/Pacific", name="dt" + ) + exp_idx2 = Index(["A", "B", "C"] * 2, name="b") + exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) + expected = DataFrame( + {"c": [1, 2, 3] * 2, "d": [4, 5, 6] * 2}, index=exp_idx, columns=["c", "d"] + ) + + result = concat([df, df]) + tm.assert_frame_equal(result, expected) + + def test_concat_tz_not_aligned(self): + # GH#22796 + ts = pd.to_datetime([1, 2]).tz_localize("UTC") + a = DataFrame({"A": ts}) + b = DataFrame({"A": ts, "B": ts}) + result = concat([a, b], sort=True, ignore_index=True) + expected = DataFrame( + {"A": list(ts) + list(ts), "B": [pd.NaT, pd.NaT] + list(ts)} + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "t1", + [ + "2015-01-01", + pytest.param( + pd.NaT, + marks=pytest.mark.xfail( + reason="GH23037 incorrect dtype when concatenating" + ), + ), + ], + ) + def test_concat_tz_NaT(self, t1): + # GH#22796 + # Concatenating tz-aware multicolumn DataFrames + ts1 = Timestamp(t1, tz="UTC") + ts2 = Timestamp("2015-01-01", tz="UTC") + ts3 = Timestamp("2015-01-01", tz="UTC") + + df1 = DataFrame([[ts1, ts2]]) + df2 = DataFrame([[ts3]]) + + result = concat([df1, df2]) + expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0]) + + tm.assert_frame_equal(result, expected) + + def test_concat_tz_with_empty(self): + # GH 9188 + result = concat( + [DataFrame(date_range("2000", periods=1, tz="UTC")), DataFrame()] + ) + expected = DataFrame(date_range("2000", periods=1, tz="UTC")) + tm.assert_frame_equal(result, expected) + + +class TestPeriodConcat: + def test_concat_period_series(self): + x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) + y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="D")) + expected = Series([x[0], x[1], y[0], y[1]], dtype="Period[D]") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + def test_concat_period_multiple_freq_series(self): + x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) + y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="M")) + expected = Series([x[0], x[1], y[0], y[1]], dtype="object") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + assert result.dtype == "object" + + def test_concat_period_other_series(self): + x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) + y = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="M")) + expected = Series([x[0], x[1], y[0], y[1]], dtype="object") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + assert result.dtype == "object" + + # non-period + x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) + y = Series(DatetimeIndex(["2015-11-01", "2015-12-01"])) + expected = Series([x[0], x[1], y[0], y[1]], dtype="object") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + assert result.dtype == "object" + + x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) + y = Series(["A", "B"]) + expected = Series([x[0], x[1], y[0], y[1]], dtype="object") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + assert result.dtype == "object" + + +def test_concat_timedelta64_block(): + rng = to_timedelta(np.arange(10), unit="s") + + df = DataFrame({"time": rng}) + + result = concat([df, df]) + tm.assert_frame_equal(result.iloc[:10], df) + tm.assert_frame_equal(result.iloc[10:], df) + + +def test_concat_multiindex_datetime_nat(): + # GH#44900 + left = DataFrame({"a": 1}, index=MultiIndex.from_tuples([(1, pd.NaT)])) + right = DataFrame( + {"b": 2}, index=MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)]) + ) + result = concat([left, right], axis="columns") + expected = DataFrame( + {"a": [1.0, np.nan], "b": 2}, MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)]) + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_empty.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_empty.py new file mode 100644 index 0000000..82d2a8a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_empty.py @@ -0,0 +1,283 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + concat, + date_range, +) +import pandas._testing as tm + + +class TestEmptyConcat: + def test_handle_empty_objects(self, sort): + df = DataFrame(np.random.randn(10, 4), columns=list("abcd")) + + baz = df[:5].copy() + baz["foo"] = "bar" + empty = df[5:5] + + frames = [baz, empty, empty, df[5:]] + concatted = concat(frames, axis=0, sort=sort) + + expected = df.reindex(columns=["a", "b", "c", "d", "foo"]) + expected["foo"] = expected["foo"].astype("O") + expected.loc[0:4, "foo"] = "bar" + + tm.assert_frame_equal(concatted, expected) + + # empty as first element with time series + # GH3259 + df = DataFrame( + {"A": range(10000)}, index=date_range("20130101", periods=10000, freq="s") + ) + empty = DataFrame() + result = concat([df, empty], axis=1) + tm.assert_frame_equal(result, df) + result = concat([empty, df], axis=1) + tm.assert_frame_equal(result, df) + + result = concat([df, empty]) + tm.assert_frame_equal(result, df) + result = concat([empty, df]) + tm.assert_frame_equal(result, df) + + def test_concat_empty_series(self): + # GH 11082 + s1 = Series([1, 2, 3], name="x") + s2 = Series(name="y", dtype="float64") + res = concat([s1, s2], axis=1) + exp = DataFrame( + {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan]}, + index=Index([0, 1, 2], dtype="O"), + ) + tm.assert_frame_equal(res, exp) + + s1 = Series([1, 2, 3], name="x") + s2 = Series(name="y", dtype="float64") + res = concat([s1, s2], axis=0) + # name will be reset + exp = Series([1, 2, 3]) + tm.assert_series_equal(res, exp) + + # empty Series with no name + s1 = Series([1, 2, 3], name="x") + s2 = Series(name=None, dtype="float64") + res = concat([s1, s2], axis=1) + exp = DataFrame( + {"x": [1, 2, 3], 0: [np.nan, np.nan, np.nan]}, + columns=["x", 0], + index=Index([0, 1, 2], dtype="O"), + ) + tm.assert_frame_equal(res, exp) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + @pytest.mark.parametrize("values", [[], [1, 2, 3]]) + def test_concat_empty_series_timelike(self, tz, values): + # GH 18447 + + first = Series([], dtype="M8[ns]").dt.tz_localize(tz) + dtype = None if values else np.float64 + second = Series(values, dtype=dtype) + + expected = DataFrame( + { + 0: Series([pd.NaT] * len(values), dtype="M8[ns]").dt.tz_localize(tz), + 1: values, + } + ) + result = concat([first, second], axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "left,right,expected", + [ + # booleans + (np.bool_, np.int32, np.int32), + (np.bool_, np.float32, np.object_), + # datetime-like + ("m8[ns]", np.bool_, np.object_), + ("m8[ns]", np.int64, np.object_), + ("M8[ns]", np.bool_, np.object_), + ("M8[ns]", np.int64, np.object_), + # categorical + ("category", "category", "category"), + ("category", "object", "object"), + ], + ) + def test_concat_empty_series_dtypes(self, left, right, expected): + warn = None + if (left is np.bool_ or right is np.bool_) and expected is not np.object_: + warn = FutureWarning + with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): + # GH#39817 + result = concat([Series(dtype=left), Series(dtype=right)]) + assert result.dtype == expected + + @pytest.mark.parametrize( + "dtype", ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"] + ) + def test_concat_empty_series_dtypes_match_roundtrips(self, dtype): + dtype = np.dtype(dtype) + + result = concat([Series(dtype=dtype)]) + assert result.dtype == dtype + + result = concat([Series(dtype=dtype), Series(dtype=dtype)]) + assert result.dtype == dtype + + def test_concat_empty_series_dtypes_roundtrips(self): + + # round-tripping with self & like self + dtypes = map(np.dtype, ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"]) + + def int_result_type(dtype, dtype2): + typs = {dtype.kind, dtype2.kind} + if not len(typs - {"i", "u", "b"}) and ( + dtype.kind == "i" or dtype2.kind == "i" + ): + return "i" + elif not len(typs - {"u", "b"}) and ( + dtype.kind == "u" or dtype2.kind == "u" + ): + return "u" + return None + + def float_result_type(dtype, dtype2): + typs = {dtype.kind, dtype2.kind} + if not len(typs - {"f", "i", "u"}) and ( + dtype.kind == "f" or dtype2.kind == "f" + ): + return "f" + return None + + def get_result_type(dtype, dtype2): + result = float_result_type(dtype, dtype2) + if result is not None: + return result + result = int_result_type(dtype, dtype2) + if result is not None: + return result + return "O" + + for dtype in dtypes: + for dtype2 in dtypes: + if dtype == dtype2: + continue + + expected = get_result_type(dtype, dtype2) + result = concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype + assert result.kind == expected + + def test_concat_empty_series_dtypes_triple(self): + + assert ( + concat( + [Series(dtype="M8[ns]"), Series(dtype=np.bool_), Series(dtype=np.int64)] + ).dtype + == np.object_ + ) + + def test_concat_empty_series_dtype_category_with_array(self): + # GH#18515 + assert ( + concat( + [Series(np.array([]), dtype="category"), Series(dtype="float64")] + ).dtype + == "float64" + ) + + def test_concat_empty_series_dtypes_sparse(self): + result = concat( + [ + Series(dtype="float64").astype("Sparse"), + Series(dtype="float64").astype("Sparse"), + ] + ) + assert result.dtype == "Sparse[float64]" + + result = concat( + [Series(dtype="float64").astype("Sparse"), Series(dtype="float64")] + ) + expected = pd.SparseDtype(np.float64) + assert result.dtype == expected + + result = concat( + [Series(dtype="float64").astype("Sparse"), Series(dtype="object")] + ) + expected = pd.SparseDtype("object") + assert result.dtype == expected + + def test_concat_empty_df_object_dtype(self): + # GH 9149 + df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]}) + df_2 = DataFrame(columns=df_1.columns) + result = concat([df_1, df_2], axis=0) + expected = df_1.astype(object) + tm.assert_frame_equal(result, expected) + + def test_concat_empty_dataframe_dtypes(self): + df = DataFrame(columns=list("abc")) + df["a"] = df["a"].astype(np.bool_) + df["b"] = df["b"].astype(np.int32) + df["c"] = df["c"].astype(np.float64) + + result = concat([df, df]) + assert result["a"].dtype == np.bool_ + assert result["b"].dtype == np.int32 + assert result["c"].dtype == np.float64 + + result = concat([df, df.astype(np.float64)]) + assert result["a"].dtype == np.object_ + assert result["b"].dtype == np.float64 + assert result["c"].dtype == np.float64 + + def test_concat_inner_join_empty(self): + # GH 15328 + df_empty = DataFrame() + df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") + df_expected = DataFrame({"a": []}, index=[], dtype="int64") + + for how, expected in [("inner", df_expected), ("outer", df_a)]: + result = concat([df_a, df_empty], axis=1, join=how) + tm.assert_frame_equal(result, expected) + + def test_empty_dtype_coerce(self): + + # xref to #12411 + # xref to #12045 + # xref to #11594 + # see below + + # 10571 + df1 = DataFrame(data=[[1, None], [2, None]], columns=["a", "b"]) + df2 = DataFrame(data=[[3, None], [4, None]], columns=["a", "b"]) + result = concat([df1, df2]) + expected = df1.dtypes + tm.assert_series_equal(result.dtypes, expected) + + def test_concat_empty_dataframe(self): + # 39037 + df1 = DataFrame(columns=["a", "b"]) + df2 = DataFrame(columns=["b", "c"]) + result = concat([df1, df2, df1]) + expected = DataFrame(columns=["a", "b", "c"]) + tm.assert_frame_equal(result, expected) + + df3 = DataFrame(columns=["a", "b"]) + df4 = DataFrame(columns=["b"]) + result = concat([df3, df4]) + expected = DataFrame(columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + def test_concat_empty_dataframe_different_dtypes(self): + # 39037 + df1 = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) + df2 = DataFrame({"a": [1, 2, 3]}) + + result = concat([df1[:0], df2[:0]]) + assert result["a"].dtype == np.int64 + assert result["b"].dtype == np.object_ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_index.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_index.py new file mode 100644 index 0000000..1692446 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_index.py @@ -0,0 +1,308 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + concat, +) +import pandas._testing as tm + + +class TestIndexConcat: + def test_concat_ignore_index(self, sort): + frame1 = DataFrame( + {"test1": ["a", "b", "c"], "test2": [1, 2, 3], "test3": [4.5, 3.2, 1.2]} + ) + frame2 = DataFrame({"test3": [5.2, 2.2, 4.3]}) + frame1.index = Index(["x", "y", "z"]) + frame2.index = Index(["x", "y", "q"]) + + v1 = concat([frame1, frame2], axis=1, ignore_index=True, sort=sort) + + nan = np.nan + expected = DataFrame( + [ + [nan, nan, nan, 4.3], + ["a", 1, 4.5, 5.2], + ["b", 2, 3.2, 2.2], + ["c", 3, 1.2, nan], + ], + index=Index(["q", "x", "y", "z"]), + ) + if not sort: + expected = expected.loc[["x", "y", "z", "q"]] + + tm.assert_frame_equal(v1, expected) + + @pytest.mark.parametrize( + "name_in1,name_in2,name_in3,name_out", + [ + ("idx", "idx", "idx", "idx"), + ("idx", "idx", None, None), + ("idx", None, None, None), + ("idx1", "idx2", None, None), + ("idx1", "idx1", "idx2", None), + ("idx1", "idx2", "idx3", None), + (None, None, None, None), + ], + ) + def test_concat_same_index_names(self, name_in1, name_in2, name_in3, name_out): + # GH13475 + indices = [ + Index(["a", "b", "c"], name=name_in1), + Index(["b", "c", "d"], name=name_in2), + Index(["c", "d", "e"], name=name_in3), + ] + frames = [ + DataFrame({c: [0, 1, 2]}, index=i) for i, c in zip(indices, ["x", "y", "z"]) + ] + result = concat(frames, axis=1) + + exp_ind = Index(["a", "b", "c", "d", "e"], name=name_out) + expected = DataFrame( + { + "x": [0, 1, 2, np.nan, np.nan], + "y": [np.nan, 0, 1, 2, np.nan], + "z": [np.nan, np.nan, 0, 1, 2], + }, + index=exp_ind, + ) + + tm.assert_frame_equal(result, expected) + + def test_concat_rename_index(self): + a = DataFrame( + np.random.rand(3, 3), + columns=list("ABC"), + index=Index(list("abc"), name="index_a"), + ) + b = DataFrame( + np.random.rand(3, 3), + columns=list("ABC"), + index=Index(list("abc"), name="index_b"), + ) + + result = concat([a, b], keys=["key0", "key1"], names=["lvl0", "lvl1"]) + + exp = concat([a, b], keys=["key0", "key1"], names=["lvl0"]) + names = list(exp.index.names) + names[1] = "lvl1" + exp.index.set_names(names, inplace=True) + + tm.assert_frame_equal(result, exp) + assert result.index.names == exp.index.names + + def test_concat_copy_index_series(self, axis): + # GH 29879 + ser = Series([1, 2]) + comb = concat([ser, ser], axis=axis, copy=True) + assert comb.index is not ser.index + + def test_concat_copy_index_frame(self, axis): + # GH 29879 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + comb = concat([df, df], axis=axis, copy=True) + assert comb.index is not df.index + assert comb.columns is not df.columns + + def test_default_index(self): + # is_series and ignore_index + s1 = Series([1, 2, 3], name="x") + s2 = Series([4, 5, 6], name="y") + res = concat([s1, s2], axis=1, ignore_index=True) + assert isinstance(res.columns, pd.RangeIndex) + exp = DataFrame([[1, 4], [2, 5], [3, 6]]) + # use check_index_type=True to check the result have + # RangeIndex (default index) + tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) + + # is_series and all inputs have no names + s1 = Series([1, 2, 3]) + s2 = Series([4, 5, 6]) + res = concat([s1, s2], axis=1, ignore_index=False) + assert isinstance(res.columns, pd.RangeIndex) + exp = DataFrame([[1, 4], [2, 5], [3, 6]]) + exp.columns = pd.RangeIndex(2) + tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) + + # is_dataframe and ignore_index + df1 = DataFrame({"A": [1, 2], "B": [5, 6]}) + df2 = DataFrame({"A": [3, 4], "B": [7, 8]}) + + res = concat([df1, df2], axis=0, ignore_index=True) + exp = DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]], columns=["A", "B"]) + tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) + + res = concat([df1, df2], axis=1, ignore_index=True) + exp = DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]]) + tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) + + def test_dups_index(self): + # GH 4771 + + # single dtypes + df = DataFrame( + np.random.randint(0, 10, size=40).reshape(10, 4), + columns=["A", "A", "C", "C"], + ) + + result = concat([df, df], axis=1) + tm.assert_frame_equal(result.iloc[:, :4], df) + tm.assert_frame_equal(result.iloc[:, 4:], df) + + result = concat([df, df], axis=0) + tm.assert_frame_equal(result.iloc[:10], df) + tm.assert_frame_equal(result.iloc[10:], df) + + # multi dtypes + df = concat( + [ + DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]), + DataFrame( + np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] + ), + ], + axis=1, + ) + + result = concat([df, df], axis=1) + tm.assert_frame_equal(result.iloc[:, :6], df) + tm.assert_frame_equal(result.iloc[:, 6:], df) + + result = concat([df, df], axis=0) + tm.assert_frame_equal(result.iloc[:10], df) + tm.assert_frame_equal(result.iloc[10:], df) + + # append + result = df.iloc[0:8, :]._append(df.iloc[8:]) + tm.assert_frame_equal(result, df) + + result = df.iloc[0:8, :]._append(df.iloc[8:9])._append(df.iloc[9:10]) + tm.assert_frame_equal(result, df) + + expected = concat([df, df], axis=0) + result = df._append(df) + tm.assert_frame_equal(result, expected) + + +class TestMultiIndexConcat: + def test_concat_multiindex_with_keys(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + index = frame.index + result = concat([frame, frame], keys=[0, 1], names=["iteration"]) + + assert result.index.names == ("iteration",) + index.names + tm.assert_frame_equal(result.loc[0], frame) + tm.assert_frame_equal(result.loc[1], frame) + assert result.index.nlevels == 3 + + def test_concat_multiindex_with_none_in_index_names(self): + # GH 15787 + index = MultiIndex.from_product([[1], range(5)], names=["level1", None]) + df = DataFrame({"col": range(5)}, index=index, dtype=np.int32) + + result = concat([df, df], keys=[1, 2], names=["level2"]) + index = MultiIndex.from_product( + [[1, 2], [1], range(5)], names=["level2", "level1", None] + ) + expected = DataFrame({"col": list(range(5)) * 2}, index=index, dtype=np.int32) + tm.assert_frame_equal(result, expected) + + result = concat([df, df[:2]], keys=[1, 2], names=["level2"]) + level2 = [1] * 5 + [2] * 2 + level1 = [1] * 7 + no_name = list(range(5)) + list(range(2)) + tuples = list(zip(level2, level1, no_name)) + index = MultiIndex.from_tuples(tuples, names=["level2", "level1", None]) + expected = DataFrame({"col": no_name}, index=index, dtype=np.int32) + tm.assert_frame_equal(result, expected) + + def test_concat_multiindex_rangeindex(self): + # GH13542 + # when multi-index levels are RangeIndex objects + # there is a bug in concat with objects of len 1 + + df = DataFrame(np.random.randn(9, 2)) + df.index = MultiIndex( + levels=[pd.RangeIndex(3), pd.RangeIndex(3)], + codes=[np.repeat(np.arange(3), 3), np.tile(np.arange(3), 3)], + ) + + res = concat([df.iloc[[2, 3, 4], :], df.iloc[[5], :]]) + exp = df.iloc[[2, 3, 4, 5], :] + tm.assert_frame_equal(res, exp) + + def test_concat_multiindex_dfs_with_deepcopy(self): + # GH 9967 + from copy import deepcopy + + example_multiindex1 = MultiIndex.from_product([["a"], ["b"]]) + example_dataframe1 = DataFrame([0], index=example_multiindex1) + + example_multiindex2 = MultiIndex.from_product([["a"], ["c"]]) + example_dataframe2 = DataFrame([1], index=example_multiindex2) + + example_dict = {"s1": example_dataframe1, "s2": example_dataframe2} + expected_index = MultiIndex( + levels=[["s1", "s2"], ["a"], ["b", "c"]], + codes=[[0, 1], [0, 0], [0, 1]], + names=["testname", None, None], + ) + expected = DataFrame([[0], [1]], index=expected_index) + result_copy = concat(deepcopy(example_dict), names=["testname"]) + tm.assert_frame_equal(result_copy, expected) + result_no_copy = concat(example_dict, names=["testname"]) + tm.assert_frame_equal(result_no_copy, expected) + + @pytest.mark.parametrize( + "mi1_list", + [ + [["a"], range(2)], + [["b"], np.arange(2.0, 4.0)], + [["c"], ["A", "B"]], + [["d"], pd.date_range(start="2017", end="2018", periods=2)], + ], + ) + @pytest.mark.parametrize( + "mi2_list", + [ + [["a"], range(2)], + [["b"], np.arange(2.0, 4.0)], + [["c"], ["A", "B"]], + [["d"], pd.date_range(start="2017", end="2018", periods=2)], + ], + ) + def test_concat_with_various_multiindex_dtypes( + self, mi1_list: list, mi2_list: list + ): + # GitHub #23478 + mi1 = MultiIndex.from_product(mi1_list) + mi2 = MultiIndex.from_product(mi2_list) + + df1 = DataFrame(np.zeros((1, len(mi1))), columns=mi1) + df2 = DataFrame(np.zeros((1, len(mi2))), columns=mi2) + + if mi1_list[0] == mi2_list[0]: + expected_mi = MultiIndex( + levels=[mi1_list[0], list(mi1_list[1])], + codes=[[0, 0, 0, 0], [0, 1, 0, 1]], + ) + else: + expected_mi = MultiIndex( + levels=[ + mi1_list[0] + mi2_list[0], + list(mi1_list[1]) + list(mi2_list[1]), + ], + codes=[[0, 0, 1, 1], [0, 1, 2, 3]], + ) + + expected_df = DataFrame(np.zeros((1, len(expected_mi))), columns=expected_mi) + + with tm.assert_produces_warning(None): + result_df = concat((df1, df2), axis=1) + + tm.assert_frame_equal(expected_df, result_df) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_invalid.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_invalid.py new file mode 100644 index 0000000..920d31d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_invalid.py @@ -0,0 +1,56 @@ +from io import StringIO + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + concat, + read_csv, +) +import pandas._testing as tm + + +class TestInvalidConcat: + def test_concat_invalid(self): + + # trying to concat a ndframe with a non-ndframe + df1 = tm.makeCustomDataframe(10, 2) + for obj in [1, {}, [1, 2], (1, 2)]: + + msg = ( + f"cannot concatenate object of type '{type(obj)}'; " + "only Series and DataFrame objs are valid" + ) + with pytest.raises(TypeError, match=msg): + concat([df1, obj]) + + def test_concat_invalid_first_argument(self): + df1 = tm.makeCustomDataframe(10, 2) + msg = ( + "first argument must be an iterable of pandas " + 'objects, you passed an object of type "DataFrame"' + ) + with pytest.raises(TypeError, match=msg): + concat(df1) + + def test_concat_generator_obj(self): + # generator ok though + concat(DataFrame(np.random.rand(5, 5)) for _ in range(3)) + + def test_concat_textreader_obj(self): + # text reader ok + # GH6583 + data = """index,A,B,C,D + foo,2,3,4,5 + bar,7,8,9,10 + baz,12,13,14,15 + qux,12,13,14,15 + foo2,12,13,14,15 + bar2,12,13,14,15 + """ + + with read_csv(StringIO(data), chunksize=1) as reader: + result = concat(reader, ignore_index=True) + expected = read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_series.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_series.py new file mode 100644 index 0000000..8fa5988 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_series.py @@ -0,0 +1,148 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + concat, + date_range, +) +import pandas._testing as tm + + +class TestSeriesConcat: + def test_concat_series(self): + + ts = tm.makeTimeSeries() + ts.name = "foo" + + pieces = [ts[:5], ts[5:15], ts[15:]] + + result = concat(pieces) + tm.assert_series_equal(result, ts) + assert result.name == ts.name + + result = concat(pieces, keys=[0, 1, 2]) + expected = ts.copy() + + ts.index = DatetimeIndex(np.array(ts.index.values, dtype="M8[ns]")) + + exp_codes = [np.repeat([0, 1, 2], [len(x) for x in pieces]), np.arange(len(ts))] + exp_index = MultiIndex(levels=[[0, 1, 2], ts.index], codes=exp_codes) + expected.index = exp_index + tm.assert_series_equal(result, expected) + + def test_concat_empty_and_non_empty_series_regression(self): + # GH 18187 regression test + s1 = Series([1]) + s2 = Series([], dtype=object) + + expected = s1 + result = concat([s1, s2]) + tm.assert_series_equal(result, expected) + + def test_concat_series_axis1(self): + ts = tm.makeTimeSeries() + + pieces = [ts[:-2], ts[2:], ts[2:-2]] + + result = concat(pieces, axis=1) + expected = DataFrame(pieces).T + tm.assert_frame_equal(result, expected) + + result = concat(pieces, keys=["A", "B", "C"], axis=1) + expected = DataFrame(pieces, index=["A", "B", "C"]).T + tm.assert_frame_equal(result, expected) + + def test_concat_series_axis1_preserves_series_names(self): + # preserve series names, #2489 + s = Series(np.random.randn(5), name="A") + s2 = Series(np.random.randn(5), name="B") + + result = concat([s, s2], axis=1) + expected = DataFrame({"A": s, "B": s2}) + tm.assert_frame_equal(result, expected) + + s2.name = None + result = concat([s, s2], axis=1) + tm.assert_index_equal(result.columns, Index(["A", 0], dtype="object")) + + def test_concat_series_axis1_with_reindex(self, sort): + # must reindex, #2603 + s = Series(np.random.randn(3), index=["c", "a", "b"], name="A") + s2 = Series(np.random.randn(4), index=["d", "a", "b", "c"], name="B") + result = concat([s, s2], axis=1, sort=sort) + expected = DataFrame({"A": s, "B": s2}, index=["c", "a", "b", "d"]) + if sort: + expected = expected.sort_index() + tm.assert_frame_equal(result, expected) + + def test_concat_series_axis1_names_applied(self): + # ensure names argument is not ignored on axis=1, #23490 + s = Series([1, 2, 3]) + s2 = Series([4, 5, 6]) + result = concat([s, s2], axis=1, keys=["a", "b"], names=["A"]) + expected = DataFrame( + [[1, 4], [2, 5], [3, 6]], columns=Index(["a", "b"], name="A") + ) + tm.assert_frame_equal(result, expected) + + result = concat([s, s2], axis=1, keys=[("a", 1), ("b", 2)], names=["A", "B"]) + expected = DataFrame( + [[1, 4], [2, 5], [3, 6]], + columns=MultiIndex.from_tuples([("a", 1), ("b", 2)], names=["A", "B"]), + ) + tm.assert_frame_equal(result, expected) + + def test_concat_series_axis1_same_names_ignore_index(self): + dates = date_range("01-Jan-2013", "01-Jan-2014", freq="MS")[0:-1] + s1 = Series(np.random.randn(len(dates)), index=dates, name="value") + s2 = Series(np.random.randn(len(dates)), index=dates, name="value") + + result = concat([s1, s2], axis=1, ignore_index=True) + expected = Index(range(2)) + + tm.assert_index_equal(result.columns, expected, exact=True) + + @pytest.mark.parametrize( + "s1name,s2name", [(np.int64(190), (43, 0)), (190, (43, 0))] + ) + def test_concat_series_name_npscalar_tuple(self, s1name, s2name): + # GH21015 + s1 = Series({"a": 1, "b": 2}, name=s1name) + s2 = Series({"c": 5, "d": 6}, name=s2name) + result = concat([s1, s2]) + expected = Series({"a": 1, "b": 2, "c": 5, "d": 6}) + tm.assert_series_equal(result, expected) + + def test_concat_series_partial_columns_names(self): + # GH10698 + foo = Series([1, 2], name="foo") + bar = Series([1, 2]) + baz = Series([4, 5]) + + result = concat([foo, bar, baz], axis=1) + expected = DataFrame( + {"foo": [1, 2], 0: [1, 2], 1: [4, 5]}, columns=["foo", 0, 1] + ) + tm.assert_frame_equal(result, expected) + + result = concat([foo, bar, baz], axis=1, keys=["red", "blue", "yellow"]) + expected = DataFrame( + {"red": [1, 2], "blue": [1, 2], "yellow": [4, 5]}, + columns=["red", "blue", "yellow"], + ) + tm.assert_frame_equal(result, expected) + + result = concat([foo, bar, baz], axis=1, ignore_index=True) + expected = DataFrame({0: [1, 2], 1: [1, 2], 2: [4, 5]}) + tm.assert_frame_equal(result, expected) + + def test_concat_series_length_one_reversed(self, frame_or_series): + # GH39401 + obj = frame_or_series([100]) + result = concat([obj.iloc[::-1]]) + tm.assert_equal(result, obj) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_sort.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_sort.py new file mode 100644 index 0000000..a789dc0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/concat/test_sort.py @@ -0,0 +1,100 @@ +import numpy as np + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm + + +class TestConcatSort: + def test_concat_sorts_columns(self, sort): + # GH-4588 + df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"]) + df2 = DataFrame({"a": [3, 4], "c": [5, 6]}) + + # for sort=True/None + expected = DataFrame( + {"a": [1, 2, 3, 4], "b": [1, 2, None, None], "c": [None, None, 5, 6]}, + columns=["a", "b", "c"], + ) + + if sort is False: + expected = expected[["b", "a", "c"]] + + # default + with tm.assert_produces_warning(None): + result = pd.concat([df1, df2], ignore_index=True, sort=sort) + tm.assert_frame_equal(result, expected) + + def test_concat_sorts_index(self, sort): + df1 = DataFrame({"a": [1, 2, 3]}, index=["c", "a", "b"]) + df2 = DataFrame({"b": [1, 2]}, index=["a", "b"]) + + # For True/None + expected = DataFrame( + {"a": [2, 3, 1], "b": [1, 2, None]}, + index=["a", "b", "c"], + columns=["a", "b"], + ) + if sort is False: + expected = expected.loc[["c", "a", "b"]] + + # Warn and sort by default + with tm.assert_produces_warning(None): + result = pd.concat([df1, df2], axis=1, sort=sort) + tm.assert_frame_equal(result, expected) + + def test_concat_inner_sort(self, sort): + # https://github.com/pandas-dev/pandas/pull/20613 + df1 = DataFrame( + {"a": [1, 2], "b": [1, 2], "c": [1, 2]}, columns=["b", "a", "c"] + ) + df2 = DataFrame({"a": [1, 2], "b": [3, 4]}, index=[3, 4]) + + with tm.assert_produces_warning(None): + # unset sort should *not* warn for inner join + # since that never sorted + result = pd.concat([df1, df2], sort=sort, join="inner", ignore_index=True) + + expected = DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]}, columns=["b", "a"]) + if sort is True: + expected = expected[["a", "b"]] + tm.assert_frame_equal(result, expected) + + def test_concat_aligned_sort(self): + # GH-4588 + df = DataFrame({"c": [1, 2], "b": [3, 4], "a": [5, 6]}, columns=["c", "b", "a"]) + result = pd.concat([df, df], sort=True, ignore_index=True) + expected = DataFrame( + {"a": [5, 6, 5, 6], "b": [3, 4, 3, 4], "c": [1, 2, 1, 2]}, + columns=["a", "b", "c"], + ) + tm.assert_frame_equal(result, expected) + + result = pd.concat( + [df, df[["c", "b"]]], join="inner", sort=True, ignore_index=True + ) + expected = expected[["b", "c"]] + tm.assert_frame_equal(result, expected) + + def test_concat_aligned_sort_does_not_raise(self): + # GH-4588 + # We catch TypeErrors from sorting internally and do not re-raise. + df = DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, "a"]) + expected = DataFrame({1: [1, 2, 1, 2], "a": [3, 4, 3, 4]}, columns=[1, "a"]) + result = pd.concat([df, df], ignore_index=True, sort=True) + tm.assert_frame_equal(result, expected) + + def test_concat_frame_with_sort_false(self): + # GH 43375 + result = pd.concat( + [DataFrame({i: i}, index=[i]) for i in range(2, 0, -1)], sort=False + ) + expected = DataFrame([[2, np.nan], [np.nan, 1]], index=[2, 1], columns=[2, 1]) + + tm.assert_frame_equal(result, expected) + + def test_concat_sort_none_warning(self): + # GH#41518 + df = DataFrame({1: [1, 2], "a": [3, 4]}) + with tm.assert_produces_warning(FutureWarning, match="sort"): + pd.concat([df, df], sort=None) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..b718060 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_join.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_join.cpython-38.pyc new file mode 100644 index 0000000..7320fb2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_join.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge.cpython-38.pyc new file mode 100644 index 0000000..c6d0df6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge_asof.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge_asof.cpython-38.pyc new file mode 100644 index 0000000..7e5675a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge_asof.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge_cross.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge_cross.cpython-38.pyc new file mode 100644 index 0000000..55b22dd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge_cross.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge_index_as_string.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge_index_as_string.cpython-38.pyc new file mode 100644 index 0000000..68f54b1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge_index_as_string.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge_ordered.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge_ordered.cpython-38.pyc new file mode 100644 index 0000000..3dffb71 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_merge_ordered.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_multi.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_multi.cpython-38.pyc new file mode 100644 index 0000000..b3601e8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/__pycache__/test_multi.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_join.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_join.py new file mode 100644 index 0000000..a091d07 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_join.py @@ -0,0 +1,883 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + concat, + merge, +) +import pandas._testing as tm +from pandas.tests.reshape.merge.test_merge import ( + NGROUPS, + N, + get_test_data, +) + +a_ = np.array + + +class TestJoin: + def setup_method(self, method): + # aggregate multiple columns + self.df = DataFrame( + { + "key1": get_test_data(), + "key2": get_test_data(), + "data1": np.random.randn(N), + "data2": np.random.randn(N), + } + ) + + # exclude a couple keys for fun + self.df = self.df[self.df["key2"] > 1] + + self.df2 = DataFrame( + { + "key1": get_test_data(n=N // 5), + "key2": get_test_data(ngroups=NGROUPS // 2, n=N // 5), + "value": np.random.randn(N // 5), + } + ) + + index, data = tm.getMixedTypeDict() + self.target = DataFrame(data, index=index) + + # Join on string value + self.source = DataFrame( + {"MergedA": data["A"], "MergedD": data["D"]}, index=data["C"] + ) + + def test_left_outer_join(self): + joined_key2 = merge(self.df, self.df2, on="key2") + _check_join(self.df, self.df2, joined_key2, ["key2"], how="left") + + joined_both = merge(self.df, self.df2) + _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="left") + + def test_right_outer_join(self): + joined_key2 = merge(self.df, self.df2, on="key2", how="right") + _check_join(self.df, self.df2, joined_key2, ["key2"], how="right") + + joined_both = merge(self.df, self.df2, how="right") + _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="right") + + def test_full_outer_join(self): + joined_key2 = merge(self.df, self.df2, on="key2", how="outer") + _check_join(self.df, self.df2, joined_key2, ["key2"], how="outer") + + joined_both = merge(self.df, self.df2, how="outer") + _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="outer") + + def test_inner_join(self): + joined_key2 = merge(self.df, self.df2, on="key2", how="inner") + _check_join(self.df, self.df2, joined_key2, ["key2"], how="inner") + + joined_both = merge(self.df, self.df2, how="inner") + _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="inner") + + def test_handle_overlap(self): + joined = merge(self.df, self.df2, on="key2", suffixes=(".foo", ".bar")) + + assert "key1.foo" in joined + assert "key1.bar" in joined + + def test_handle_overlap_arbitrary_key(self): + joined = merge( + self.df, + self.df2, + left_on="key2", + right_on="key1", + suffixes=(".foo", ".bar"), + ) + assert "key1.foo" in joined + assert "key2.bar" in joined + + def test_join_on(self): + target = self.target + source = self.source + + merged = target.join(source, on="C") + tm.assert_series_equal(merged["MergedA"], target["A"], check_names=False) + tm.assert_series_equal(merged["MergedD"], target["D"], check_names=False) + + # join with duplicates (fix regression from DataFrame/Matrix merge) + df = DataFrame({"key": ["a", "a", "b", "b", "c"]}) + df2 = DataFrame({"value": [0, 1, 2]}, index=["a", "b", "c"]) + joined = df.join(df2, on="key") + expected = DataFrame( + {"key": ["a", "a", "b", "b", "c"], "value": [0, 0, 1, 1, 2]} + ) + tm.assert_frame_equal(joined, expected) + + # Test when some are missing + df_a = DataFrame([[1], [2], [3]], index=["a", "b", "c"], columns=["one"]) + df_b = DataFrame([["foo"], ["bar"]], index=[1, 2], columns=["two"]) + df_c = DataFrame([[1], [2]], index=[1, 2], columns=["three"]) + joined = df_a.join(df_b, on="one") + joined = joined.join(df_c, on="one") + assert np.isnan(joined["two"]["c"]) + assert np.isnan(joined["three"]["c"]) + + # merge column not p resent + with pytest.raises(KeyError, match="^'E'$"): + target.join(source, on="E") + + # overlap + source_copy = source.copy() + source_copy["A"] = 0 + msg = ( + "You are trying to merge on float64 and object columns. If " + "you wish to proceed you should use pd.concat" + ) + with pytest.raises(ValueError, match=msg): + target.join(source_copy, on="A") + + def test_join_on_fails_with_different_right_index(self): + df = DataFrame( + {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)} + ) + df2 = DataFrame( + {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)}, + index=tm.makeCustomIndex(10, 2), + ) + msg = r'len\(left_on\) must equal the number of levels in the index of "right"' + with pytest.raises(ValueError, match=msg): + merge(df, df2, left_on="a", right_index=True) + + def test_join_on_fails_with_different_left_index(self): + df = DataFrame( + {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)}, + index=tm.makeCustomIndex(3, 2), + ) + df2 = DataFrame( + {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)} + ) + msg = r'len\(right_on\) must equal the number of levels in the index of "left"' + with pytest.raises(ValueError, match=msg): + merge(df, df2, right_on="b", left_index=True) + + def test_join_on_fails_with_different_column_counts(self): + df = DataFrame( + {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)} + ) + df2 = DataFrame( + {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)}, + index=tm.makeCustomIndex(10, 2), + ) + msg = r"len\(right_on\) must equal len\(left_on\)" + with pytest.raises(ValueError, match=msg): + merge(df, df2, right_on="a", left_on=["a", "b"]) + + @pytest.mark.parametrize("wrong_type", [2, "str", None, np.array([0, 1])]) + def test_join_on_fails_with_wrong_object_type(self, wrong_type): + # GH12081 - original issue + + # GH21220 - merging of Series and DataFrame is now allowed + # Edited test to remove the Series object from test parameters + + df = DataFrame({"a": [1, 1]}) + msg = ( + "Can only merge Series or DataFrame objects, " + f"a {type(wrong_type)} was passed" + ) + with pytest.raises(TypeError, match=msg): + merge(wrong_type, df, left_on="a", right_on="a") + with pytest.raises(TypeError, match=msg): + merge(df, wrong_type, left_on="a", right_on="a") + + def test_join_on_pass_vector(self): + expected = self.target.join(self.source, on="C") + del expected["C"] + + join_col = self.target.pop("C") + result = self.target.join(self.source, on=join_col) + tm.assert_frame_equal(result, expected) + + def test_join_with_len0(self): + # nothing to merge + merged = self.target.join(self.source.reindex([]), on="C") + for col in self.source: + assert col in merged + assert merged[col].isna().all() + + merged2 = self.target.join(self.source.reindex([]), on="C", how="inner") + tm.assert_index_equal(merged2.columns, merged.columns) + assert len(merged2) == 0 + + def test_join_on_inner(self): + df = DataFrame({"key": ["a", "a", "d", "b", "b", "c"]}) + df2 = DataFrame({"value": [0, 1]}, index=["a", "b"]) + + joined = df.join(df2, on="key", how="inner") + + expected = df.join(df2, on="key") + expected = expected[expected["value"].notna()] + tm.assert_series_equal(joined["key"], expected["key"]) + tm.assert_series_equal(joined["value"], expected["value"], check_dtype=False) + tm.assert_index_equal(joined.index, expected.index) + + def test_join_on_singlekey_list(self): + df = DataFrame({"key": ["a", "a", "b", "b", "c"]}) + df2 = DataFrame({"value": [0, 1, 2]}, index=["a", "b", "c"]) + + # corner cases + joined = df.join(df2, on=["key"]) + expected = df.join(df2, on="key") + + tm.assert_frame_equal(joined, expected) + + def test_join_on_series(self): + result = self.target.join(self.source["MergedA"], on="C") + expected = self.target.join(self.source[["MergedA"]], on="C") + tm.assert_frame_equal(result, expected) + + def test_join_on_series_buglet(self): + # GH #638 + df = DataFrame({"a": [1, 1]}) + ds = Series([2], index=[1], name="b") + result = df.join(ds, on="a") + expected = DataFrame({"a": [1, 1], "b": [2, 2]}, index=df.index) + tm.assert_frame_equal(result, expected) + + def test_join_index_mixed(self, join_type): + # no overlapping blocks + df1 = DataFrame(index=np.arange(10)) + df1["bool"] = True + df1["string"] = "foo" + + df2 = DataFrame(index=np.arange(5, 15)) + df2["int"] = 1 + df2["float"] = 1.0 + + joined = df1.join(df2, how=join_type) + expected = _join_by_hand(df1, df2, how=join_type) + tm.assert_frame_equal(joined, expected) + + joined = df2.join(df1, how=join_type) + expected = _join_by_hand(df2, df1, how=join_type) + tm.assert_frame_equal(joined, expected) + + def test_join_index_mixed_overlap(self): + df1 = DataFrame( + {"A": 1.0, "B": 2, "C": "foo", "D": True}, + index=np.arange(10), + columns=["A", "B", "C", "D"], + ) + assert df1["B"].dtype == np.int64 + assert df1["D"].dtype == np.bool_ + + df2 = DataFrame( + {"A": 1.0, "B": 2, "C": "foo", "D": True}, + index=np.arange(0, 10, 2), + columns=["A", "B", "C", "D"], + ) + + # overlap + joined = df1.join(df2, lsuffix="_one", rsuffix="_two") + expected_columns = [ + "A_one", + "B_one", + "C_one", + "D_one", + "A_two", + "B_two", + "C_two", + "D_two", + ] + df1.columns = expected_columns[:4] + df2.columns = expected_columns[4:] + expected = _join_by_hand(df1, df2) + tm.assert_frame_equal(joined, expected) + + def test_join_empty_bug(self): + # generated an exception in 0.4.3 + x = DataFrame() + x.join(DataFrame([3], index=[0], columns=["A"]), how="outer") + + def test_join_unconsolidated(self): + # GH #331 + a = DataFrame(np.random.randn(30, 2), columns=["a", "b"]) + c = Series(np.random.randn(30)) + a["c"] = c + d = DataFrame(np.random.randn(30, 1), columns=["q"]) + + # it works! + a.join(d) + d.join(a) + + def test_join_multiindex(self): + index1 = MultiIndex.from_arrays( + [["a", "a", "a", "b", "b", "b"], [1, 2, 3, 1, 2, 3]], + names=["first", "second"], + ) + + index2 = MultiIndex.from_arrays( + [["b", "b", "b", "c", "c", "c"], [1, 2, 3, 1, 2, 3]], + names=["first", "second"], + ) + + df1 = DataFrame(data=np.random.randn(6), index=index1, columns=["var X"]) + df2 = DataFrame(data=np.random.randn(6), index=index2, columns=["var Y"]) + + df1 = df1.sort_index(level=0) + df2 = df2.sort_index(level=0) + + joined = df1.join(df2, how="outer") + ex_index = Index(index1.values).union(Index(index2.values)) + expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) + expected.index.names = index1.names + tm.assert_frame_equal(joined, expected) + assert joined.index.names == index1.names + + df1 = df1.sort_index(level=1) + df2 = df2.sort_index(level=1) + + joined = df1.join(df2, how="outer").sort_index(level=0) + ex_index = Index(index1.values).union(Index(index2.values)) + expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) + expected.index.names = index1.names + + tm.assert_frame_equal(joined, expected) + assert joined.index.names == index1.names + + def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex): + key1 = ["bar", "bar", "bar", "foo", "foo", "baz", "baz", "qux", "qux", "snap"] + key2 = [ + "two", + "one", + "three", + "one", + "two", + "one", + "two", + "two", + "three", + "one", + ] + + data = np.random.randn(len(key1)) + data = DataFrame({"key1": key1, "key2": key2, "data": data}) + + index = lexsorted_two_level_string_multiindex + to_join = DataFrame( + np.random.randn(10, 3), index=index, columns=["j_one", "j_two", "j_three"] + ) + + joined = data.join(to_join, on=["key1", "key2"], how="inner") + expected = merge( + data, + to_join.reset_index(), + left_on=["key1", "key2"], + right_on=["first", "second"], + how="inner", + sort=False, + ) + + expected2 = merge( + to_join, + data, + right_on=["key1", "key2"], + left_index=True, + how="inner", + sort=False, + ) + tm.assert_frame_equal(joined, expected2.reindex_like(joined)) + + expected2 = merge( + to_join, + data, + right_on=["key1", "key2"], + left_index=True, + how="inner", + sort=False, + ) + + expected = expected.drop(["first", "second"], axis=1) + expected.index = joined.index + + assert joined.index.is_monotonic + tm.assert_frame_equal(joined, expected) + + # _assert_same_contents(expected, expected2.loc[:, expected.columns]) + + def test_join_hierarchical_mixed(self): + # GH 2024 + df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "c"]) + new_df = df.groupby(["a"]).agg({"b": [np.mean, np.sum]}) + other_df = DataFrame([(1, 2, 3), (7, 10, 6)], columns=["a", "b", "d"]) + other_df.set_index("a", inplace=True) + # GH 9455, 12219 + msg = "merging between different levels is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = merge(new_df, other_df, left_index=True, right_index=True) + assert ("b", "mean") in result + assert "b" in result + + def test_join_float64_float32(self): + + a = DataFrame(np.random.randn(10, 2), columns=["a", "b"], dtype=np.float64) + b = DataFrame(np.random.randn(10, 1), columns=["c"], dtype=np.float32) + joined = a.join(b) + assert joined.dtypes["a"] == "float64" + assert joined.dtypes["b"] == "float64" + assert joined.dtypes["c"] == "float32" + + a = np.random.randint(0, 5, 100).astype("int64") + b = np.random.random(100).astype("float64") + c = np.random.random(100).astype("float32") + df = DataFrame({"a": a, "b": b, "c": c}) + xpdf = DataFrame({"a": a, "b": b, "c": c}) + s = DataFrame(np.random.random(5).astype("float32"), columns=["md"]) + rs = df.merge(s, left_on="a", right_index=True) + assert rs.dtypes["a"] == "int64" + assert rs.dtypes["b"] == "float64" + assert rs.dtypes["c"] == "float32" + assert rs.dtypes["md"] == "float32" + + xp = xpdf.merge(s, left_on="a", right_index=True) + tm.assert_frame_equal(rs, xp) + + def test_join_many_non_unique_index(self): + df1 = DataFrame({"a": [1, 1], "b": [1, 1], "c": [10, 20]}) + df2 = DataFrame({"a": [1, 1], "b": [1, 2], "d": [100, 200]}) + df3 = DataFrame({"a": [1, 1], "b": [1, 2], "e": [1000, 2000]}) + idf1 = df1.set_index(["a", "b"]) + idf2 = df2.set_index(["a", "b"]) + idf3 = df3.set_index(["a", "b"]) + + result = idf1.join([idf2, idf3], how="outer") + + df_partially_merged = merge(df1, df2, on=["a", "b"], how="outer") + expected = merge(df_partially_merged, df3, on=["a", "b"], how="outer") + + result = result.reset_index() + expected = expected[result.columns] + expected["a"] = expected.a.astype("int64") + expected["b"] = expected.b.astype("int64") + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 1], "c": [10, 20, 30]}) + df2 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 2], "d": [100, 200, 300]}) + df3 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 2], "e": [1000, 2000, 3000]}) + idf1 = df1.set_index(["a", "b"]) + idf2 = df2.set_index(["a", "b"]) + idf3 = df3.set_index(["a", "b"]) + result = idf1.join([idf2, idf3], how="inner") + + df_partially_merged = merge(df1, df2, on=["a", "b"], how="inner") + expected = merge(df_partially_merged, df3, on=["a", "b"], how="inner") + + result = result.reset_index() + + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + # GH 11519 + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + s = Series( + np.repeat(np.arange(8), 2), index=np.repeat(np.arange(8), 2), name="TEST" + ) + inner = df.join(s, how="inner") + outer = df.join(s, how="outer") + left = df.join(s, how="left") + right = df.join(s, how="right") + tm.assert_frame_equal(inner, outer) + tm.assert_frame_equal(inner, left) + tm.assert_frame_equal(inner, right) + + def test_join_sort(self): + left = DataFrame({"key": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 4]}) + right = DataFrame({"value2": ["a", "b", "c"]}, index=["bar", "baz", "foo"]) + + joined = left.join(right, on="key", sort=True) + expected = DataFrame( + { + "key": ["bar", "baz", "foo", "foo"], + "value": [2, 3, 1, 4], + "value2": ["a", "b", "c", "c"], + }, + index=[1, 2, 0, 3], + ) + tm.assert_frame_equal(joined, expected) + + # smoke test + joined = left.join(right, on="key", sort=False) + tm.assert_index_equal(joined.index, Index(range(4)), exact=True) + + def test_join_mixed_non_unique_index(self): + # GH 12814, unorderable types in py3 with a non-unique index + df1 = DataFrame({"a": [1, 2, 3, 4]}, index=[1, 2, 3, "a"]) + df2 = DataFrame({"b": [5, 6, 7, 8]}, index=[1, 3, 3, 4]) + result = df1.join(df2) + expected = DataFrame( + {"a": [1, 2, 3, 3, 4], "b": [5, np.nan, 6, 7, np.nan]}, + index=[1, 2, 3, 3, "a"], + ) + tm.assert_frame_equal(result, expected) + + df3 = DataFrame({"a": [1, 2, 3, 4]}, index=[1, 2, 2, "a"]) + df4 = DataFrame({"b": [5, 6, 7, 8]}, index=[1, 2, 3, 4]) + result = df3.join(df4) + expected = DataFrame( + {"a": [1, 2, 3, 4], "b": [5, 6, 6, np.nan]}, index=[1, 2, 2, "a"] + ) + tm.assert_frame_equal(result, expected) + + def test_join_non_unique_period_index(self): + # GH #16871 + index = pd.period_range("2016-01-01", periods=16, freq="M") + df = DataFrame(list(range(len(index))), index=index, columns=["pnum"]) + df2 = concat([df, df]) + result = df.join(df2, how="inner", rsuffix="_df2") + expected = DataFrame( + np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2), + columns=["pnum", "pnum_df2"], + index=df2.sort_index().index, + ) + tm.assert_frame_equal(result, expected) + + def test_mixed_type_join_with_suffix(self): + # GH #916 + df = DataFrame(np.random.randn(20, 6), columns=["a", "b", "c", "d", "e", "f"]) + df.insert(0, "id", 0) + df.insert(5, "dt", "foo") + + grouped = df.groupby("id") + mn = grouped.mean() + cn = grouped.count() + + # it works! + mn.join(cn, rsuffix="_right") + + def test_join_many(self): + df = DataFrame(np.random.randn(10, 6), columns=list("abcdef")) + df_list = [df[["a", "b"]], df[["c", "d"]], df[["e", "f"]]] + + joined = df_list[0].join(df_list[1:]) + tm.assert_frame_equal(joined, df) + + df_list = [df[["a", "b"]][:-2], df[["c", "d"]][2:], df[["e", "f"]][1:9]] + + def _check_diff_index(df_list, result, exp_index): + reindexed = [x.reindex(exp_index) for x in df_list] + expected = reindexed[0].join(reindexed[1:]) + tm.assert_frame_equal(result, expected) + + # different join types + joined = df_list[0].join(df_list[1:], how="outer") + _check_diff_index(df_list, joined, df.index) + + joined = df_list[0].join(df_list[1:]) + _check_diff_index(df_list, joined, df_list[0].index) + + joined = df_list[0].join(df_list[1:], how="inner") + _check_diff_index(df_list, joined, df.index[2:8]) + + msg = "Joining multiple DataFrames only supported for joining on index" + with pytest.raises(ValueError, match=msg): + df_list[0].join(df_list[1:], on="a") + + def test_join_many_mixed(self): + df = DataFrame(np.random.randn(8, 4), columns=["A", "B", "C", "D"]) + df["key"] = ["foo", "bar"] * 4 + df1 = df.loc[:, ["A", "B"]] + df2 = df.loc[:, ["C", "D"]] + df3 = df.loc[:, ["key"]] + + result = df1.join([df2, df3]) + tm.assert_frame_equal(result, df) + + def test_join_dups(self): + + # joining dups + df = concat( + [ + DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]), + DataFrame( + np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] + ), + ], + axis=1, + ) + + expected = concat([df, df], axis=1) + result = df.join(df, rsuffix="_2") + result.columns = expected.columns + tm.assert_frame_equal(result, expected) + + # GH 4975, invalid join on dups + w = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) + x = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) + y = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) + z = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) + + dta = x.merge(y, left_index=True, right_index=True).merge( + z, left_index=True, right_index=True, how="outer" + ) + with tm.assert_produces_warning(FutureWarning): + dta = dta.merge(w, left_index=True, right_index=True) + expected = concat([x, y, z, w], axis=1) + expected.columns = ["x_x", "y_x", "x_y", "y_y", "x_x", "y_x", "x_y", "y_y"] + tm.assert_frame_equal(dta, expected) + + def test_join_multi_to_multi(self, join_type): + # GH 20475 + leftindex = MultiIndex.from_product( + [list("abc"), list("xy"), [1, 2]], names=["abc", "xy", "num"] + ) + left = DataFrame({"v1": range(12)}, index=leftindex) + + rightindex = MultiIndex.from_product( + [list("abc"), list("xy")], names=["abc", "xy"] + ) + right = DataFrame({"v2": [100 * i for i in range(1, 7)]}, index=rightindex) + + result = left.join(right, on=["abc", "xy"], how=join_type) + expected = ( + left.reset_index() + .merge(right.reset_index(), on=["abc", "xy"], how=join_type) + .set_index(["abc", "xy", "num"]) + ) + tm.assert_frame_equal(expected, result) + + msg = r'len\(left_on\) must equal the number of levels in the index of "right"' + with pytest.raises(ValueError, match=msg): + left.join(right, on="xy", how=join_type) + + with pytest.raises(ValueError, match=msg): + right.join(left, on=["abc", "xy"], how=join_type) + + def test_join_on_tz_aware_datetimeindex(self): + # GH 23931, 26335 + df1 = DataFrame( + { + "date": pd.date_range( + start="2018-01-01", periods=5, tz="America/Chicago" + ), + "vals": list("abcde"), + } + ) + + df2 = DataFrame( + { + "date": pd.date_range( + start="2018-01-03", periods=5, tz="America/Chicago" + ), + "vals_2": list("tuvwx"), + } + ) + result = df1.join(df2.set_index("date"), on="date") + expected = df1.copy() + expected["vals_2"] = Series([np.nan] * 2 + list("tuv"), dtype=object) + tm.assert_frame_equal(result, expected) + + def test_join_datetime_string(self): + # GH 5647 + dfa = DataFrame( + [ + ["2012-08-02", "L", 10], + ["2012-08-02", "J", 15], + ["2013-04-06", "L", 20], + ["2013-04-06", "J", 25], + ], + columns=["x", "y", "a"], + ) + dfa["x"] = pd.to_datetime(dfa["x"]) + dfb = DataFrame( + [["2012-08-02", "J", 1], ["2013-04-06", "L", 2]], + columns=["x", "y", "z"], + index=[2, 4], + ) + dfb["x"] = pd.to_datetime(dfb["x"]) + result = dfb.join(dfa.set_index(["x", "y"]), on=["x", "y"]) + expected = DataFrame( + [ + [Timestamp("2012-08-02 00:00:00"), "J", 1, 15], + [Timestamp("2013-04-06 00:00:00"), "L", 2, 20], + ], + index=[2, 4], + columns=["x", "y", "z", "a"], + ) + tm.assert_frame_equal(result, expected) + + +def _check_join(left, right, result, join_col, how="left", lsuffix="_x", rsuffix="_y"): + + # some smoke tests + for c in join_col: + assert result[c].notna().all() + + left_grouped = left.groupby(join_col) + right_grouped = right.groupby(join_col) + + for group_key, group in result.groupby(join_col): + l_joined = _restrict_to_columns(group, left.columns, lsuffix) + r_joined = _restrict_to_columns(group, right.columns, rsuffix) + + try: + lgroup = left_grouped.get_group(group_key) + except KeyError as err: + if how in ("left", "inner"): + raise AssertionError( + f"key {group_key} should not have been in the join" + ) from err + + _assert_all_na(l_joined, left.columns, join_col) + else: + _assert_same_contents(l_joined, lgroup) + + try: + rgroup = right_grouped.get_group(group_key) + except KeyError as err: + if how in ("right", "inner"): + raise AssertionError( + f"key {group_key} should not have been in the join" + ) from err + + _assert_all_na(r_joined, right.columns, join_col) + else: + _assert_same_contents(r_joined, rgroup) + + +def _restrict_to_columns(group, columns, suffix): + found = [ + c for c in group.columns if c in columns or c.replace(suffix, "") in columns + ] + + # filter + group = group.loc[:, found] + + # get rid of suffixes, if any + group = group.rename(columns=lambda x: x.replace(suffix, "")) + + # put in the right order... + group = group.loc[:, columns] + + return group + + +def _assert_same_contents(join_chunk, source): + NA_SENTINEL = -1234567 # drop_duplicates not so NA-friendly... + + jvalues = join_chunk.fillna(NA_SENTINEL).drop_duplicates().values + svalues = source.fillna(NA_SENTINEL).drop_duplicates().values + + rows = {tuple(row) for row in jvalues} + assert len(rows) == len(source) + assert all(tuple(row) in rows for row in svalues) + + +def _assert_all_na(join_chunk, source_columns, join_col): + for c in source_columns: + if c in join_col: + continue + assert join_chunk[c].isna().all() + + +def _join_by_hand(a, b, how="left"): + join_index = a.index.join(b.index, how=how) + + a_re = a.reindex(join_index) + b_re = b.reindex(join_index) + + result_columns = a.columns.append(b.columns) + + for col, s in b_re.items(): + a_re[col] = s + return a_re.reindex(columns=result_columns) + + +def test_join_inner_multiindex_deterministic_order(): + # GH: 36910 + left = DataFrame( + data={"e": 5}, + index=MultiIndex.from_tuples([(1, 2, 4)], names=("a", "b", "d")), + ) + right = DataFrame( + data={"f": 6}, index=MultiIndex.from_tuples([(2, 3)], names=("b", "c")) + ) + result = left.join(right, how="inner") + expected = DataFrame( + {"e": [5], "f": [6]}, + index=MultiIndex.from_tuples([(2, 1, 4, 3)], names=("b", "a", "d", "c")), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + ("input_col", "output_cols"), [("b", ["a", "b"]), ("a", ["a_x", "a_y"])] +) +def test_join_cross(input_col, output_cols): + # GH#5401 + left = DataFrame({"a": [1, 3]}) + right = DataFrame({input_col: [3, 4]}) + result = left.join(right, how="cross", lsuffix="_x", rsuffix="_y") + expected = DataFrame({output_cols[0]: [1, 1, 3, 3], output_cols[1]: [3, 4, 3, 4]}) + tm.assert_frame_equal(result, expected) + + +def test_join_multiindex_one_level(join_type): + # GH#36909 + left = DataFrame( + data={"c": 3}, index=MultiIndex.from_tuples([(1, 2)], names=("a", "b")) + ) + right = DataFrame(data={"d": 4}, index=MultiIndex.from_tuples([(2,)], names=("b",))) + result = left.join(right, how=join_type) + expected = DataFrame( + {"c": [3], "d": [4]}, + index=MultiIndex.from_tuples([(2, 1)], names=["b", "a"]), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "categories, values", + [ + (["Y", "X"], ["Y", "X", "X"]), + ([2, 1], [2, 1, 1]), + ([2.5, 1.5], [2.5, 1.5, 1.5]), + ( + [Timestamp("2020-12-31"), Timestamp("2019-12-31")], + [Timestamp("2020-12-31"), Timestamp("2019-12-31"), Timestamp("2019-12-31")], + ), + ], +) +def test_join_multiindex_not_alphabetical_categorical(categories, values): + # GH#38502 + left = DataFrame( + { + "first": ["A", "A"], + "second": Categorical(categories, categories=categories), + "value": [1, 2], + } + ).set_index(["first", "second"]) + right = DataFrame( + { + "first": ["A", "A", "B"], + "second": Categorical(values, categories=categories), + "value": [3, 4, 5], + } + ).set_index(["first", "second"]) + result = left.join(right, lsuffix="_left", rsuffix="_right") + + expected = DataFrame( + { + "first": ["A", "A"], + "second": Categorical(categories, categories=categories), + "value_left": [1, 2], + "value_right": [3, 4], + } + ).set_index(["first", "second"]) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge.py new file mode 100644 index 0000000..1249194 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge.py @@ -0,0 +1,2631 @@ +from datetime import ( + date, + datetime, + timedelta, +) +import random +import re + +import numpy as np +import pytest + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_object_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + DatetimeIndex, + IntervalIndex, + MultiIndex, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype as CDT +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) +from pandas.core.reshape.concat import concat +from pandas.core.reshape.merge import ( + MergeError, + merge, +) + +N = 50 +NGROUPS = 8 + + +def get_test_data(ngroups=NGROUPS, n=N): + unique_groups = list(range(ngroups)) + arr = np.asarray(np.tile(unique_groups, n // ngroups)) + + if len(arr) < n: + arr = np.asarray(list(arr) + unique_groups[: n - len(arr)]) + + random.shuffle(arr) + return arr + + +def get_series(): + return [ + Series([1], dtype="int64"), + Series([1], dtype="Int64"), + Series([1.23]), + Series(["foo"]), + Series([True]), + Series([pd.Timestamp("2018-01-01")]), + Series([pd.Timestamp("2018-01-01", tz="US/Eastern")]), + ] + + +def get_series_na(): + return [ + Series([np.nan], dtype="Int64"), + Series([np.nan], dtype="float"), + Series([np.nan], dtype="object"), + Series([pd.NaT]), + ] + + +@pytest.fixture(params=get_series(), ids=lambda x: x.dtype.name) +def series_of_dtype(request): + """ + A parametrized fixture returning a variety of Series of different + dtypes + """ + return request.param + + +@pytest.fixture(params=get_series(), ids=lambda x: x.dtype.name) +def series_of_dtype2(request): + """ + A duplicate of the series_of_dtype fixture, so that it can be used + twice by a single function + """ + return request.param + + +@pytest.fixture(params=get_series_na(), ids=lambda x: x.dtype.name) +def series_of_dtype_all_na(request): + """ + A parametrized fixture returning a variety of Series with all NA + values + """ + return request.param + + +@pytest.fixture +def dfs_for_indicator(): + df1 = DataFrame({"col1": [0, 1], "col_conflict": [1, 2], "col_left": ["a", "b"]}) + df2 = DataFrame( + { + "col1": [1, 2, 3, 4, 5], + "col_conflict": [1, 2, 3, 4, 5], + "col_right": [2, 2, 2, 2, 2], + } + ) + return df1, df2 + + +class TestMerge: + def setup_method(self, method): + # aggregate multiple columns + self.df = DataFrame( + { + "key1": get_test_data(), + "key2": get_test_data(), + "data1": np.random.randn(N), + "data2": np.random.randn(N), + } + ) + + # exclude a couple keys for fun + self.df = self.df[self.df["key2"] > 1] + + self.df2 = DataFrame( + { + "key1": get_test_data(n=N // 5), + "key2": get_test_data(ngroups=NGROUPS // 2, n=N // 5), + "value": np.random.randn(N // 5), + } + ) + + self.left = DataFrame( + {"key": ["a", "b", "c", "d", "e", "e", "a"], "v1": np.random.randn(7)} + ) + self.right = DataFrame({"v2": np.random.randn(4)}, index=["d", "b", "c", "a"]) + + def test_merge_inner_join_empty(self): + # GH 15328 + df_empty = DataFrame() + df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") + result = merge(df_empty, df_a, left_index=True, right_index=True) + expected = DataFrame({"a": []}, index=[], dtype="int64") + tm.assert_frame_equal(result, expected) + + def test_merge_common(self): + joined = merge(self.df, self.df2) + exp = merge(self.df, self.df2, on=["key1", "key2"]) + tm.assert_frame_equal(joined, exp) + + def test_merge_non_string_columns(self): + # https://github.com/pandas-dev/pandas/issues/17962 + # Checks that method runs for non string column names + left = DataFrame( + {0: [1, 0, 1, 0], 1: [0, 1, 0, 0], 2: [0, 0, 2, 0], 3: [1, 0, 0, 3]} + ) + + right = left.astype(float) + expected = left + result = merge(left, right) + tm.assert_frame_equal(expected, result) + + def test_merge_index_as_on_arg(self): + # GH14355 + + left = self.df.set_index("key1") + right = self.df2.set_index("key1") + result = merge(left, right, on="key1") + expected = merge(self.df, self.df2, on="key1").set_index("key1") + tm.assert_frame_equal(result, expected) + + def test_merge_index_singlekey_right_vs_left(self): + left = DataFrame( + {"key": ["a", "b", "c", "d", "e", "e", "a"], "v1": np.random.randn(7)} + ) + right = DataFrame({"v2": np.random.randn(4)}, index=["d", "b", "c", "a"]) + + merged1 = merge( + left, right, left_on="key", right_index=True, how="left", sort=False + ) + merged2 = merge( + right, left, right_on="key", left_index=True, how="right", sort=False + ) + tm.assert_frame_equal(merged1, merged2.loc[:, merged1.columns]) + + merged1 = merge( + left, right, left_on="key", right_index=True, how="left", sort=True + ) + merged2 = merge( + right, left, right_on="key", left_index=True, how="right", sort=True + ) + tm.assert_frame_equal(merged1, merged2.loc[:, merged1.columns]) + + def test_merge_index_singlekey_inner(self): + left = DataFrame( + {"key": ["a", "b", "c", "d", "e", "e", "a"], "v1": np.random.randn(7)} + ) + right = DataFrame({"v2": np.random.randn(4)}, index=["d", "b", "c", "a"]) + + # inner join + result = merge(left, right, left_on="key", right_index=True, how="inner") + expected = left.join(right, on="key").loc[result.index] + tm.assert_frame_equal(result, expected) + + result = merge(right, left, right_on="key", left_index=True, how="inner") + expected = left.join(right, on="key").loc[result.index] + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + def test_merge_misspecified(self): + msg = "Must pass right_on or right_index=True" + with pytest.raises(pd.errors.MergeError, match=msg): + merge(self.left, self.right, left_index=True) + msg = "Must pass left_on or left_index=True" + with pytest.raises(pd.errors.MergeError, match=msg): + merge(self.left, self.right, right_index=True) + + msg = ( + 'Can only pass argument "on" OR "left_on" and "right_on", not ' + "a combination of both" + ) + with pytest.raises(pd.errors.MergeError, match=msg): + merge(self.left, self.left, left_on="key", on="key") + + msg = r"len\(right_on\) must equal len\(left_on\)" + with pytest.raises(ValueError, match=msg): + merge(self.df, self.df2, left_on=["key1"], right_on=["key1", "key2"]) + + def test_index_and_on_parameters_confusion(self): + msg = "right_index parameter must be of type bool, not " + with pytest.raises(ValueError, match=msg): + merge( + self.df, + self.df2, + how="left", + left_index=False, + right_index=["key1", "key2"], + ) + msg = "left_index parameter must be of type bool, not " + with pytest.raises(ValueError, match=msg): + merge( + self.df, + self.df2, + how="left", + left_index=["key1", "key2"], + right_index=False, + ) + with pytest.raises(ValueError, match=msg): + merge( + self.df, + self.df2, + how="left", + left_index=["key1", "key2"], + right_index=["key1", "key2"], + ) + + def test_merge_overlap(self): + merged = merge(self.left, self.left, on="key") + exp_len = (self.left["key"].value_counts() ** 2).sum() + assert len(merged) == exp_len + assert "v1_x" in merged + assert "v1_y" in merged + + def test_merge_different_column_key_names(self): + left = DataFrame({"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 4]}) + right = DataFrame({"rkey": ["foo", "bar", "qux", "foo"], "value": [5, 6, 7, 8]}) + + merged = left.merge( + right, left_on="lkey", right_on="rkey", how="outer", sort=True + ) + + exp = Series(["bar", "baz", "foo", "foo", "foo", "foo", np.nan], name="lkey") + tm.assert_series_equal(merged["lkey"], exp) + + exp = Series(["bar", np.nan, "foo", "foo", "foo", "foo", "qux"], name="rkey") + tm.assert_series_equal(merged["rkey"], exp) + + exp = Series([2, 3, 1, 1, 4, 4, np.nan], name="value_x") + tm.assert_series_equal(merged["value_x"], exp) + + exp = Series([6, np.nan, 5, 8, 5, 8, 7], name="value_y") + tm.assert_series_equal(merged["value_y"], exp) + + def test_merge_copy(self): + left = DataFrame({"a": 0, "b": 1}, index=range(10)) + right = DataFrame({"c": "foo", "d": "bar"}, index=range(10)) + + merged = merge(left, right, left_index=True, right_index=True, copy=True) + + merged["a"] = 6 + assert (left["a"] == 0).all() + + merged["d"] = "peekaboo" + assert (right["d"] == "bar").all() + + def test_merge_nocopy(self, using_array_manager): + left = DataFrame({"a": 0, "b": 1}, index=range(10)) + right = DataFrame({"c": "foo", "d": "bar"}, index=range(10)) + + merged = merge(left, right, left_index=True, right_index=True, copy=False) + + assert np.shares_memory(merged["a"]._values, left["a"]._values) + assert np.shares_memory(merged["d"]._values, right["d"]._values) + + def test_intelligently_handle_join_key(self): + # #733, be a bit more 1337 about not returning unconsolidated DataFrame + + left = DataFrame( + {"key": [1, 1, 2, 2, 3], "value": list(range(5))}, columns=["value", "key"] + ) + right = DataFrame({"key": [1, 1, 2, 3, 4, 5], "rvalue": list(range(6))}) + + joined = merge(left, right, on="key", how="outer") + expected = DataFrame( + { + "key": [1, 1, 1, 1, 2, 2, 3, 4, 5], + "value": np.array([0, 0, 1, 1, 2, 3, 4, np.nan, np.nan]), + "rvalue": [0, 1, 0, 1, 2, 2, 3, 4, 5], + }, + columns=["value", "key", "rvalue"], + ) + tm.assert_frame_equal(joined, expected) + + def test_merge_join_key_dtype_cast(self): + # #8596 + + df1 = DataFrame({"key": [1], "v1": [10]}) + df2 = DataFrame({"key": [2], "v1": [20]}) + df = merge(df1, df2, how="outer") + assert df["key"].dtype == "int64" + + df1 = DataFrame({"key": [True], "v1": [1]}) + df2 = DataFrame({"key": [False], "v1": [0]}) + df = merge(df1, df2, how="outer") + + # GH13169 + # GH#40073 + assert df["key"].dtype == "bool" + + df1 = DataFrame({"val": [1]}) + df2 = DataFrame({"val": [2]}) + lkey = np.array([1]) + rkey = np.array([2]) + df = merge(df1, df2, left_on=lkey, right_on=rkey, how="outer") + assert df["key_0"].dtype == "int64" + + def test_handle_join_key_pass_array(self): + left = DataFrame( + {"key": [1, 1, 2, 2, 3], "value": np.arange(5)}, + columns=["value", "key"], + dtype="int64", + ) + right = DataFrame({"rvalue": np.arange(6)}, dtype="int64") + key = np.array([1, 1, 2, 3, 4, 5], dtype="int64") + + merged = merge(left, right, left_on="key", right_on=key, how="outer") + merged2 = merge(right, left, left_on=key, right_on="key", how="outer") + + tm.assert_series_equal(merged["key"], merged2["key"]) + assert merged["key"].notna().all() + assert merged2["key"].notna().all() + + left = DataFrame({"value": np.arange(5)}, columns=["value"]) + right = DataFrame({"rvalue": np.arange(6)}) + lkey = np.array([1, 1, 2, 2, 3]) + rkey = np.array([1, 1, 2, 3, 4, 5]) + + merged = merge(left, right, left_on=lkey, right_on=rkey, how="outer") + tm.assert_series_equal( + merged["key_0"], Series([1, 1, 1, 1, 2, 2, 3, 4, 5], name="key_0") + ) + + left = DataFrame({"value": np.arange(3)}) + right = DataFrame({"rvalue": np.arange(6)}) + + key = np.array([0, 1, 1, 2, 2, 3], dtype=np.int64) + merged = merge(left, right, left_index=True, right_on=key, how="outer") + tm.assert_series_equal(merged["key_0"], Series(key, name="key_0")) + + def test_no_overlap_more_informative_error(self): + dt = datetime.now() + df1 = DataFrame({"x": ["a"]}, index=[dt]) + + df2 = DataFrame({"y": ["b", "c"]}, index=[dt, dt]) + + msg = ( + "No common columns to perform merge on. " + f"Merge options: left_on={None}, right_on={None}, " + f"left_index={False}, right_index={False}" + ) + + with pytest.raises(MergeError, match=msg): + merge(df1, df2) + + def test_merge_non_unique_indexes(self): + + dt = datetime(2012, 5, 1) + dt2 = datetime(2012, 5, 2) + dt3 = datetime(2012, 5, 3) + dt4 = datetime(2012, 5, 4) + + df1 = DataFrame({"x": ["a"]}, index=[dt]) + df2 = DataFrame({"y": ["b", "c"]}, index=[dt, dt]) + _check_merge(df1, df2) + + # Not monotonic + df1 = DataFrame({"x": ["a", "b", "q"]}, index=[dt2, dt, dt4]) + df2 = DataFrame( + {"y": ["c", "d", "e", "f", "g", "h"]}, index=[dt3, dt3, dt2, dt2, dt, dt] + ) + _check_merge(df1, df2) + + df1 = DataFrame({"x": ["a", "b"]}, index=[dt, dt]) + df2 = DataFrame({"y": ["c", "d"]}, index=[dt, dt]) + _check_merge(df1, df2) + + def test_merge_non_unique_index_many_to_many(self): + dt = datetime(2012, 5, 1) + dt2 = datetime(2012, 5, 2) + dt3 = datetime(2012, 5, 3) + df1 = DataFrame({"x": ["a", "b", "c", "d"]}, index=[dt2, dt2, dt, dt]) + df2 = DataFrame( + {"y": ["e", "f", "g", " h", "i"]}, index=[dt2, dt2, dt3, dt, dt] + ) + _check_merge(df1, df2) + + def test_left_merge_empty_dataframe(self): + left = DataFrame({"key": [1], "value": [2]}) + right = DataFrame({"key": []}) + + result = merge(left, right, on="key", how="left") + tm.assert_frame_equal(result, left) + + result = merge(right, left, on="key", how="right") + tm.assert_frame_equal(result, left) + + @pytest.mark.parametrize( + "kwarg", + [ + {"left_index": True, "right_index": True}, + {"left_index": True, "right_on": "x"}, + {"left_on": "a", "right_index": True}, + {"left_on": "a", "right_on": "x"}, + ], + ) + def test_merge_left_empty_right_empty(self, join_type, kwarg): + # GH 10824 + left = DataFrame(columns=["a", "b", "c"]) + right = DataFrame(columns=["x", "y", "z"]) + + exp_in = DataFrame( + columns=["a", "b", "c", "x", "y", "z"], + index=pd.Index([], dtype=object), + dtype=object, + ) + + result = merge(left, right, how=join_type, **kwarg) + tm.assert_frame_equal(result, exp_in) + + def test_merge_left_empty_right_notempty(self): + # GH 10824 + left = DataFrame(columns=["a", "b", "c"]) + right = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["x", "y", "z"]) + + exp_out = DataFrame( + { + "a": np.array([np.nan] * 3, dtype=object), + "b": np.array([np.nan] * 3, dtype=object), + "c": np.array([np.nan] * 3, dtype=object), + "x": [1, 4, 7], + "y": [2, 5, 8], + "z": [3, 6, 9], + }, + columns=["a", "b", "c", "x", "y", "z"], + ) + exp_in = exp_out[0:0] # make empty DataFrame keeping dtype + # result will have object dtype + exp_in.index = exp_in.index.astype(object) + + def check1(exp, kwarg): + result = merge(left, right, how="inner", **kwarg) + tm.assert_frame_equal(result, exp) + result = merge(left, right, how="left", **kwarg) + tm.assert_frame_equal(result, exp) + + def check2(exp, kwarg): + result = merge(left, right, how="right", **kwarg) + tm.assert_frame_equal(result, exp) + result = merge(left, right, how="outer", **kwarg) + tm.assert_frame_equal(result, exp) + + for kwarg in [ + {"left_index": True, "right_index": True}, + {"left_index": True, "right_on": "x"}, + ]: + check1(exp_in, kwarg) + check2(exp_out, kwarg) + + kwarg = {"left_on": "a", "right_index": True} + check1(exp_in, kwarg) + exp_out["a"] = [0, 1, 2] + check2(exp_out, kwarg) + + kwarg = {"left_on": "a", "right_on": "x"} + check1(exp_in, kwarg) + exp_out["a"] = np.array([np.nan] * 3, dtype=object) + check2(exp_out, kwarg) + + def test_merge_left_notempty_right_empty(self): + # GH 10824 + left = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) + right = DataFrame(columns=["x", "y", "z"]) + + exp_out = DataFrame( + { + "a": [1, 4, 7], + "b": [2, 5, 8], + "c": [3, 6, 9], + "x": np.array([np.nan] * 3, dtype=object), + "y": np.array([np.nan] * 3, dtype=object), + "z": np.array([np.nan] * 3, dtype=object), + }, + columns=["a", "b", "c", "x", "y", "z"], + ) + exp_in = exp_out[0:0] # make empty DataFrame keeping dtype + # result will have object dtype + exp_in.index = exp_in.index.astype(object) + + def check1(exp, kwarg): + result = merge(left, right, how="inner", **kwarg) + tm.assert_frame_equal(result, exp) + result = merge(left, right, how="right", **kwarg) + tm.assert_frame_equal(result, exp) + + def check2(exp, kwarg): + result = merge(left, right, how="left", **kwarg) + tm.assert_frame_equal(result, exp) + result = merge(left, right, how="outer", **kwarg) + tm.assert_frame_equal(result, exp) + + # TODO: should the next loop be un-indented? doing so breaks this test + for kwarg in [ + {"left_index": True, "right_index": True}, + {"left_index": True, "right_on": "x"}, + {"left_on": "a", "right_index": True}, + {"left_on": "a", "right_on": "x"}, + ]: + check1(exp_in, kwarg) + check2(exp_out, kwarg) + + def test_merge_empty_frame(self, series_of_dtype, series_of_dtype2): + # GH 25183 + df = DataFrame( + {"key": series_of_dtype, "value": series_of_dtype2}, + columns=["key", "value"], + ) + df_empty = df[:0] + expected = DataFrame( + { + "value_x": Series(dtype=df.dtypes["value"]), + "key": Series(dtype=df.dtypes["key"]), + "value_y": Series(dtype=df.dtypes["value"]), + }, + columns=["value_x", "key", "value_y"], + ) + actual = df_empty.merge(df, on="key") + tm.assert_frame_equal(actual, expected) + + def test_merge_all_na_column(self, series_of_dtype, series_of_dtype_all_na): + # GH 25183 + df_left = DataFrame( + {"key": series_of_dtype, "value": series_of_dtype_all_na}, + columns=["key", "value"], + ) + df_right = DataFrame( + {"key": series_of_dtype, "value": series_of_dtype_all_na}, + columns=["key", "value"], + ) + expected = DataFrame( + { + "key": series_of_dtype, + "value_x": series_of_dtype_all_na, + "value_y": series_of_dtype_all_na, + }, + columns=["key", "value_x", "value_y"], + ) + actual = df_left.merge(df_right, on="key") + tm.assert_frame_equal(actual, expected) + + def test_merge_nosort(self): + # GH#2098 + + d = { + "var1": np.random.randint(0, 10, size=10), + "var2": np.random.randint(0, 10, size=10), + "var3": [ + datetime(2012, 1, 12), + datetime(2011, 2, 4), + datetime(2010, 2, 3), + datetime(2012, 1, 12), + datetime(2011, 2, 4), + datetime(2012, 4, 3), + datetime(2012, 3, 4), + datetime(2008, 5, 1), + datetime(2010, 2, 3), + datetime(2012, 2, 3), + ], + } + df = DataFrame.from_dict(d) + var3 = df.var3.unique() + var3.sort() + new = DataFrame.from_dict({"var3": var3, "var8": np.random.random(7)}) + + result = df.merge(new, on="var3", sort=False) + exp = merge(df, new, on="var3", sort=False) + tm.assert_frame_equal(result, exp) + + assert (df.var3.unique() == result.var3.unique()).all() + + @pytest.mark.parametrize( + ("sort", "values"), [(False, [1, 1, 0, 1, 1]), (True, [0, 1, 1, 1, 1])] + ) + @pytest.mark.parametrize("how", ["left", "right"]) + def test_merge_same_order_left_right(self, sort, values, how): + # GH#35382 + df = DataFrame({"a": [1, 0, 1]}) + + result = df.merge(df, on="a", how=how, sort=sort) + expected = DataFrame(values, columns=["a"]) + tm.assert_frame_equal(result, expected) + + def test_merge_nan_right(self): + df1 = DataFrame({"i1": [0, 1], "i2": [0, 1]}) + df2 = DataFrame({"i1": [0], "i3": [0]}) + result = df1.join(df2, on="i1", rsuffix="_") + expected = ( + DataFrame( + { + "i1": {0: 0.0, 1: 1}, + "i2": {0: 0, 1: 1}, + "i1_": {0: 0, 1: np.nan}, + "i3": {0: 0.0, 1: np.nan}, + None: {0: 0, 1: 0}, + } + ) + .set_index(None) + .reset_index()[["i1", "i2", "i1_", "i3"]] + ) + tm.assert_frame_equal(result, expected, check_dtype=False) + + def test_merge_nan_right2(self): + df1 = DataFrame({"i1": [0, 1], "i2": [0.5, 1.5]}) + df2 = DataFrame({"i1": [0], "i3": [0.7]}) + result = df1.join(df2, rsuffix="_", on="i1") + expected = DataFrame( + { + "i1": {0: 0, 1: 1}, + "i1_": {0: 0.0, 1: np.nan}, + "i2": {0: 0.5, 1: 1.5}, + "i3": {0: 0.69999999999999996, 1: np.nan}, + } + )[["i1", "i2", "i1_", "i3"]] + tm.assert_frame_equal(result, expected) + + def test_merge_type(self): + class NotADataFrame(DataFrame): + @property + def _constructor(self): + return NotADataFrame + + nad = NotADataFrame(self.df) + result = nad.merge(self.df2, on="key1") + + assert isinstance(result, NotADataFrame) + + def test_join_append_timedeltas(self): + # timedelta64 issues with join/merge + # GH 5695 + + d = DataFrame.from_dict( + {"d": [datetime(2013, 11, 5, 5, 56)], "t": [timedelta(0, 22500)]} + ) + df = DataFrame(columns=list("dt")) + df = concat([df, d], ignore_index=True) + result = concat([df, d], ignore_index=True) + expected = DataFrame( + { + "d": [datetime(2013, 11, 5, 5, 56), datetime(2013, 11, 5, 5, 56)], + "t": [timedelta(0, 22500), timedelta(0, 22500)], + }, + dtype=object, + ) + tm.assert_frame_equal(result, expected) + + def test_join_append_timedeltas2(self): + # timedelta64 issues with join/merge + # GH 5695 + td = np.timedelta64(300000000) + lhs = DataFrame(Series([td, td], index=["A", "B"])) + rhs = DataFrame(Series([td], index=["A"])) + + result = lhs.join(rhs, rsuffix="r", how="left") + expected = DataFrame( + { + "0": Series([td, td], index=list("AB")), + "0r": Series([td, pd.NaT], index=list("AB")), + } + ) + tm.assert_frame_equal(result, expected) + + def test_other_datetime_unit(self): + # GH 13389 + df1 = DataFrame({"entity_id": [101, 102]}) + s = Series([None, None], index=[101, 102], name="days") + + for dtype in [ + "datetime64[D]", + "datetime64[h]", + "datetime64[m]", + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[ns]", + ]: + + df2 = s.astype(dtype).to_frame("days") + # coerces to datetime64[ns], thus should not be affected + assert df2["days"].dtype == "datetime64[ns]" + + result = df1.merge(df2, left_on="entity_id", right_index=True) + + exp = DataFrame( + { + "entity_id": [101, 102], + "days": np.array(["nat", "nat"], dtype="datetime64[ns]"), + }, + columns=["entity_id", "days"], + ) + tm.assert_frame_equal(result, exp) + + @pytest.mark.parametrize("unit", ["D", "h", "m", "s", "ms", "us", "ns"]) + def test_other_timedelta_unit(self, unit): + # GH 13389 + df1 = DataFrame({"entity_id": [101, 102]}) + s = Series([None, None], index=[101, 102], name="days") + + dtype = f"m8[{unit}]" + df2 = s.astype(dtype).to_frame("days") + assert df2["days"].dtype == "m8[ns]" + + result = df1.merge(df2, left_on="entity_id", right_index=True) + + exp = DataFrame( + {"entity_id": [101, 102], "days": np.array(["nat", "nat"], dtype=dtype)}, + columns=["entity_id", "days"], + ) + tm.assert_frame_equal(result, exp) + + def test_overlapping_columns_error_message(self): + df = DataFrame({"key": [1, 2, 3], "v1": [4, 5, 6], "v2": [7, 8, 9]}) + df2 = DataFrame({"key": [1, 2, 3], "v1": [4, 5, 6], "v2": [7, 8, 9]}) + + df.columns = ["key", "foo", "foo"] + df2.columns = ["key", "bar", "bar"] + expected = DataFrame( + { + "key": [1, 2, 3], + "v1": [4, 5, 6], + "v2": [7, 8, 9], + "v3": [4, 5, 6], + "v4": [7, 8, 9], + } + ) + expected.columns = ["key", "foo", "foo", "bar", "bar"] + tm.assert_frame_equal(merge(df, df2), expected) + + # #2649, #10639 + df2.columns = ["key1", "foo", "foo"] + msg = r"Data columns not unique: Index\(\['foo'\], dtype='object'\)" + with pytest.raises(MergeError, match=msg): + merge(df, df2) + + def test_merge_on_datetime64tz(self): + + # GH11405 + left = DataFrame( + { + "key": pd.date_range("20151010", periods=2, tz="US/Eastern"), + "value": [1, 2], + } + ) + right = DataFrame( + { + "key": pd.date_range("20151011", periods=3, tz="US/Eastern"), + "value": [1, 2, 3], + } + ) + + expected = DataFrame( + { + "key": pd.date_range("20151010", periods=4, tz="US/Eastern"), + "value_x": [1, 2, np.nan, np.nan], + "value_y": [np.nan, 1, 2, 3], + } + ) + result = merge(left, right, on="key", how="outer") + tm.assert_frame_equal(result, expected) + + def test_merge_datetime64tz_values(self): + left = DataFrame( + { + "key": [1, 2], + "value": pd.date_range("20151010", periods=2, tz="US/Eastern"), + } + ) + right = DataFrame( + { + "key": [2, 3], + "value": pd.date_range("20151011", periods=2, tz="US/Eastern"), + } + ) + expected = DataFrame( + { + "key": [1, 2, 3], + "value_x": list(pd.date_range("20151010", periods=2, tz="US/Eastern")) + + [pd.NaT], + "value_y": [pd.NaT] + + list(pd.date_range("20151011", periods=2, tz="US/Eastern")), + } + ) + result = merge(left, right, on="key", how="outer") + tm.assert_frame_equal(result, expected) + assert result["value_x"].dtype == "datetime64[ns, US/Eastern]" + assert result["value_y"].dtype == "datetime64[ns, US/Eastern]" + + def test_merge_on_datetime64tz_empty(self): + # https://github.com/pandas-dev/pandas/issues/25014 + dtz = pd.DatetimeTZDtype(tz="UTC") + right = DataFrame( + { + "date": [pd.Timestamp("2018", tz=dtz.tz)], + "value": [4.0], + "date2": [pd.Timestamp("2019", tz=dtz.tz)], + }, + columns=["date", "value", "date2"], + ) + left = right[:0] + result = left.merge(right, on="date") + expected = DataFrame( + { + "value_x": Series(dtype=float), + "date2_x": Series(dtype=dtz), + "date": Series(dtype=dtz), + "value_y": Series(dtype=float), + "date2_y": Series(dtype=dtz), + }, + columns=["value_x", "date2_x", "date", "value_y", "date2_y"], + ) + tm.assert_frame_equal(result, expected) + + def test_merge_datetime64tz_with_dst_transition(self): + # GH 18885 + df1 = DataFrame( + pd.date_range("2017-10-29 01:00", periods=4, freq="H", tz="Europe/Madrid"), + columns=["date"], + ) + df1["value"] = 1 + df2 = DataFrame( + { + "date": pd.to_datetime( + [ + "2017-10-29 03:00:00", + "2017-10-29 04:00:00", + "2017-10-29 05:00:00", + ] + ), + "value": 2, + } + ) + df2["date"] = df2["date"].dt.tz_localize("UTC").dt.tz_convert("Europe/Madrid") + result = merge(df1, df2, how="outer", on="date") + expected = DataFrame( + { + "date": pd.date_range( + "2017-10-29 01:00", periods=7, freq="H", tz="Europe/Madrid" + ), + "value_x": [1] * 4 + [np.nan] * 3, + "value_y": [np.nan] * 4 + [2] * 3, + } + ) + tm.assert_frame_equal(result, expected) + + def test_merge_non_unique_period_index(self): + # GH #16871 + index = pd.period_range("2016-01-01", periods=16, freq="M") + df = DataFrame(list(range(len(index))), index=index, columns=["pnum"]) + df2 = concat([df, df]) + result = df.merge(df2, left_index=True, right_index=True, how="inner") + expected = DataFrame( + np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2), + columns=["pnum_x", "pnum_y"], + index=df2.sort_index().index, + ) + tm.assert_frame_equal(result, expected) + + def test_merge_on_periods(self): + left = DataFrame( + {"key": pd.period_range("20151010", periods=2, freq="D"), "value": [1, 2]} + ) + right = DataFrame( + { + "key": pd.period_range("20151011", periods=3, freq="D"), + "value": [1, 2, 3], + } + ) + + expected = DataFrame( + { + "key": pd.period_range("20151010", periods=4, freq="D"), + "value_x": [1, 2, np.nan, np.nan], + "value_y": [np.nan, 1, 2, 3], + } + ) + result = merge(left, right, on="key", how="outer") + tm.assert_frame_equal(result, expected) + + def test_merge_period_values(self): + left = DataFrame( + {"key": [1, 2], "value": pd.period_range("20151010", periods=2, freq="D")} + ) + right = DataFrame( + {"key": [2, 3], "value": pd.period_range("20151011", periods=2, freq="D")} + ) + + exp_x = pd.period_range("20151010", periods=2, freq="D") + exp_y = pd.period_range("20151011", periods=2, freq="D") + expected = DataFrame( + { + "key": [1, 2, 3], + "value_x": list(exp_x) + [pd.NaT], + "value_y": [pd.NaT] + list(exp_y), + } + ) + result = merge(left, right, on="key", how="outer") + tm.assert_frame_equal(result, expected) + assert result["value_x"].dtype == "Period[D]" + assert result["value_y"].dtype == "Period[D]" + + def test_indicator(self, dfs_for_indicator): + # PR #10054. xref #7412 and closes #8790. + df1, df2 = dfs_for_indicator + df1_copy = df1.copy() + + df2_copy = df2.copy() + + df_result = DataFrame( + { + "col1": [0, 1, 2, 3, 4, 5], + "col_conflict_x": [1, 2, np.nan, np.nan, np.nan, np.nan], + "col_left": ["a", "b", np.nan, np.nan, np.nan, np.nan], + "col_conflict_y": [np.nan, 1, 2, 3, 4, 5], + "col_right": [np.nan, 2, 2, 2, 2, 2], + } + ) + df_result["_merge"] = Categorical( + [ + "left_only", + "both", + "right_only", + "right_only", + "right_only", + "right_only", + ], + categories=["left_only", "right_only", "both"], + ) + + df_result = df_result[ + [ + "col1", + "col_conflict_x", + "col_left", + "col_conflict_y", + "col_right", + "_merge", + ] + ] + + test = merge(df1, df2, on="col1", how="outer", indicator=True) + tm.assert_frame_equal(test, df_result) + test = df1.merge(df2, on="col1", how="outer", indicator=True) + tm.assert_frame_equal(test, df_result) + + # No side effects + tm.assert_frame_equal(df1, df1_copy) + tm.assert_frame_equal(df2, df2_copy) + + # Check with custom name + df_result_custom_name = df_result + df_result_custom_name = df_result_custom_name.rename( + columns={"_merge": "custom_name"} + ) + + test_custom_name = merge( + df1, df2, on="col1", how="outer", indicator="custom_name" + ) + tm.assert_frame_equal(test_custom_name, df_result_custom_name) + test_custom_name = df1.merge( + df2, on="col1", how="outer", indicator="custom_name" + ) + tm.assert_frame_equal(test_custom_name, df_result_custom_name) + + def test_merge_indicator_arg_validation(self, dfs_for_indicator): + # Check only accepts strings and booleans + df1, df2 = dfs_for_indicator + + msg = "indicator option can only accept boolean or string arguments" + with pytest.raises(ValueError, match=msg): + merge(df1, df2, on="col1", how="outer", indicator=5) + with pytest.raises(ValueError, match=msg): + df1.merge(df2, on="col1", how="outer", indicator=5) + + def test_merge_indicator_result_integrity(self, dfs_for_indicator): + # Check result integrity + df1, df2 = dfs_for_indicator + + test2 = merge(df1, df2, on="col1", how="left", indicator=True) + assert (test2._merge != "right_only").all() + test2 = df1.merge(df2, on="col1", how="left", indicator=True) + assert (test2._merge != "right_only").all() + + test3 = merge(df1, df2, on="col1", how="right", indicator=True) + assert (test3._merge != "left_only").all() + test3 = df1.merge(df2, on="col1", how="right", indicator=True) + assert (test3._merge != "left_only").all() + + test4 = merge(df1, df2, on="col1", how="inner", indicator=True) + assert (test4._merge == "both").all() + test4 = df1.merge(df2, on="col1", how="inner", indicator=True) + assert (test4._merge == "both").all() + + def test_merge_indicator_invalid(self, dfs_for_indicator): + # Check if working name in df + df1, _ = dfs_for_indicator + + for i in ["_right_indicator", "_left_indicator", "_merge"]: + df_badcolumn = DataFrame({"col1": [1, 2], i: [2, 2]}) + + msg = ( + "Cannot use `indicator=True` option when data contains a " + f"column named {i}|" + "Cannot use name of an existing column for indicator column" + ) + with pytest.raises(ValueError, match=msg): + merge(df1, df_badcolumn, on="col1", how="outer", indicator=True) + with pytest.raises(ValueError, match=msg): + df1.merge(df_badcolumn, on="col1", how="outer", indicator=True) + + # Check for name conflict with custom name + df_badcolumn = DataFrame({"col1": [1, 2], "custom_column_name": [2, 2]}) + + msg = "Cannot use name of an existing column for indicator column" + with pytest.raises(ValueError, match=msg): + merge( + df1, + df_badcolumn, + on="col1", + how="outer", + indicator="custom_column_name", + ) + with pytest.raises(ValueError, match=msg): + df1.merge( + df_badcolumn, on="col1", how="outer", indicator="custom_column_name" + ) + + def test_merge_indicator_multiple_columns(self): + # Merge on multiple columns + df3 = DataFrame({"col1": [0, 1], "col2": ["a", "b"]}) + + df4 = DataFrame({"col1": [1, 1, 3], "col2": ["b", "x", "y"]}) + + hand_coded_result = DataFrame( + {"col1": [0, 1, 1, 3], "col2": ["a", "b", "x", "y"]} + ) + hand_coded_result["_merge"] = Categorical( + ["left_only", "both", "right_only", "right_only"], + categories=["left_only", "right_only", "both"], + ) + + test5 = merge(df3, df4, on=["col1", "col2"], how="outer", indicator=True) + tm.assert_frame_equal(test5, hand_coded_result) + test5 = df3.merge(df4, on=["col1", "col2"], how="outer", indicator=True) + tm.assert_frame_equal(test5, hand_coded_result) + + def test_validation(self): + left = DataFrame( + {"a": ["a", "b", "c", "d"], "b": ["cat", "dog", "weasel", "horse"]}, + index=range(4), + ) + + right = DataFrame( + { + "a": ["a", "b", "c", "d", "e"], + "c": ["meow", "bark", "um... weasel noise?", "nay", "chirp"], + }, + index=range(5), + ) + + # Make sure no side effects. + left_copy = left.copy() + right_copy = right.copy() + + result = merge(left, right, left_index=True, right_index=True, validate="1:1") + tm.assert_frame_equal(left, left_copy) + tm.assert_frame_equal(right, right_copy) + + # make sure merge still correct + expected = DataFrame( + { + "a_x": ["a", "b", "c", "d"], + "b": ["cat", "dog", "weasel", "horse"], + "a_y": ["a", "b", "c", "d"], + "c": ["meow", "bark", "um... weasel noise?", "nay"], + }, + index=range(4), + columns=["a_x", "b", "a_y", "c"], + ) + + result = merge( + left, right, left_index=True, right_index=True, validate="one_to_one" + ) + tm.assert_frame_equal(result, expected) + + expected_2 = DataFrame( + { + "a": ["a", "b", "c", "d"], + "b": ["cat", "dog", "weasel", "horse"], + "c": ["meow", "bark", "um... weasel noise?", "nay"], + }, + index=range(4), + ) + + result = merge(left, right, on="a", validate="1:1") + tm.assert_frame_equal(left, left_copy) + tm.assert_frame_equal(right, right_copy) + tm.assert_frame_equal(result, expected_2) + + result = merge(left, right, on="a", validate="one_to_one") + tm.assert_frame_equal(result, expected_2) + + # One index, one column + expected_3 = DataFrame( + { + "b": ["cat", "dog", "weasel", "horse"], + "a": ["a", "b", "c", "d"], + "c": ["meow", "bark", "um... weasel noise?", "nay"], + }, + columns=["b", "a", "c"], + index=range(4), + ) + + left_index_reset = left.set_index("a") + result = merge( + left_index_reset, + right, + left_index=True, + right_on="a", + validate="one_to_one", + ) + tm.assert_frame_equal(result, expected_3) + + # Dups on right + right_w_dups = concat([right, DataFrame({"a": ["e"], "c": ["moo"]}, index=[4])]) + merge( + left, + right_w_dups, + left_index=True, + right_index=True, + validate="one_to_many", + ) + + msg = "Merge keys are not unique in right dataset; not a one-to-one merge" + with pytest.raises(MergeError, match=msg): + merge( + left, + right_w_dups, + left_index=True, + right_index=True, + validate="one_to_one", + ) + + with pytest.raises(MergeError, match=msg): + merge(left, right_w_dups, on="a", validate="one_to_one") + + # Dups on left + left_w_dups = concat( + [left, DataFrame({"a": ["a"], "c": ["cow"]}, index=[3])], sort=True + ) + merge( + left_w_dups, + right, + left_index=True, + right_index=True, + validate="many_to_one", + ) + + msg = "Merge keys are not unique in left dataset; not a one-to-one merge" + with pytest.raises(MergeError, match=msg): + merge( + left_w_dups, + right, + left_index=True, + right_index=True, + validate="one_to_one", + ) + + with pytest.raises(MergeError, match=msg): + merge(left_w_dups, right, on="a", validate="one_to_one") + + # Dups on both + merge(left_w_dups, right_w_dups, on="a", validate="many_to_many") + + msg = "Merge keys are not unique in right dataset; not a many-to-one merge" + with pytest.raises(MergeError, match=msg): + merge( + left_w_dups, + right_w_dups, + left_index=True, + right_index=True, + validate="many_to_one", + ) + + msg = "Merge keys are not unique in left dataset; not a one-to-many merge" + with pytest.raises(MergeError, match=msg): + merge(left_w_dups, right_w_dups, on="a", validate="one_to_many") + + # Check invalid arguments + msg = "Not a valid argument for validate" + with pytest.raises(ValueError, match=msg): + merge(left, right, on="a", validate="jibberish") + + # Two column merge, dups in both, but jointly no dups. + left = DataFrame( + { + "a": ["a", "a", "b", "b"], + "b": [0, 1, 0, 1], + "c": ["cat", "dog", "weasel", "horse"], + }, + index=range(4), + ) + + right = DataFrame( + { + "a": ["a", "a", "b"], + "b": [0, 1, 0], + "d": ["meow", "bark", "um... weasel noise?"], + }, + index=range(3), + ) + + expected_multi = DataFrame( + { + "a": ["a", "a", "b"], + "b": [0, 1, 0], + "c": ["cat", "dog", "weasel"], + "d": ["meow", "bark", "um... weasel noise?"], + }, + index=range(3), + ) + + msg = ( + "Merge keys are not unique in either left or right dataset; " + "not a one-to-one merge" + ) + with pytest.raises(MergeError, match=msg): + merge(left, right, on="a", validate="1:1") + + result = merge(left, right, on=["a", "b"], validate="1:1") + tm.assert_frame_equal(result, expected_multi) + + def test_merge_two_empty_df_no_division_error(self): + # GH17776, PR #17846 + a = DataFrame({"a": [], "b": [], "c": []}) + with np.errstate(divide="raise"): + merge(a, a, on=("a", "b")) + + @pytest.mark.parametrize("how", ["right", "outer"]) + @pytest.mark.parametrize( + "index,expected_index", + [ + ( + CategoricalIndex([1, 2, 4]), + CategoricalIndex([1, 2, 4, None, None, None]), + ), + ( + DatetimeIndex(["2001-01-01", "2002-02-02", "2003-03-03"]), + DatetimeIndex( + ["2001-01-01", "2002-02-02", "2003-03-03", pd.NaT, pd.NaT, pd.NaT] + ), + ), + (Float64Index([1, 2, 3]), Float64Index([1, 2, 3, None, None, None])), + (Int64Index([1, 2, 3]), Float64Index([1, 2, 3, None, None, None])), + ( + IntervalIndex.from_tuples([(1, 2), (2, 3), (3, 4)]), + IntervalIndex.from_tuples( + [(1, 2), (2, 3), (3, 4), np.nan, np.nan, np.nan] + ), + ), + ( + PeriodIndex(["2001-01-01", "2001-01-02", "2001-01-03"], freq="D"), + PeriodIndex( + ["2001-01-01", "2001-01-02", "2001-01-03", pd.NaT, pd.NaT, pd.NaT], + freq="D", + ), + ), + ( + TimedeltaIndex(["1d", "2d", "3d"]), + TimedeltaIndex(["1d", "2d", "3d", pd.NaT, pd.NaT, pd.NaT]), + ), + ], + ) + def test_merge_on_index_with_more_values(self, how, index, expected_index): + # GH 24212 + # pd.merge gets [0, 1, 2, -1, -1, -1] as left_indexer, ensure that + # -1 is interpreted as a missing value instead of the last element + df1 = DataFrame({"a": [0, 1, 2], "key": [0, 1, 2]}, index=index) + df2 = DataFrame({"b": [0, 1, 2, 3, 4, 5]}) + result = df1.merge(df2, left_on="key", right_index=True, how=how) + expected = DataFrame( + [ + [0, 0, 0], + [1, 1, 1], + [2, 2, 2], + [np.nan, 3, 3], + [np.nan, 4, 4], + [np.nan, 5, 5], + ], + columns=["a", "key", "b"], + ) + expected.set_index(expected_index, inplace=True) + tm.assert_frame_equal(result, expected) + + def test_merge_right_index_right(self): + # Note: the expected output here is probably incorrect. + # See https://github.com/pandas-dev/pandas/issues/17257 for more. + # We include this as a regression test for GH-24897. + left = DataFrame({"a": [1, 2, 3], "key": [0, 1, 1]}) + right = DataFrame({"b": [1, 2, 3]}) + + expected = DataFrame( + {"a": [1, 2, 3, None], "key": [0, 1, 1, 2], "b": [1, 2, 2, 3]}, + columns=["a", "key", "b"], + index=[0, 1, 2, np.nan], + ) + result = left.merge(right, left_on="key", right_index=True, how="right") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("how", ["left", "right"]) + def test_merge_preserves_row_order(self, how): + # GH 27453 + left_df = DataFrame({"animal": ["dog", "pig"], "max_speed": [40, 11]}) + right_df = DataFrame({"animal": ["quetzal", "pig"], "max_speed": [80, 11]}) + result = left_df.merge(right_df, on=["animal", "max_speed"], how=how) + if how == "right": + expected = DataFrame({"animal": ["quetzal", "pig"], "max_speed": [80, 11]}) + else: + expected = DataFrame({"animal": ["dog", "pig"], "max_speed": [40, 11]}) + tm.assert_frame_equal(result, expected) + + def test_merge_take_missing_values_from_index_of_other_dtype(self): + # GH 24212 + left = DataFrame( + { + "a": [1, 2, 3], + "key": Categorical(["a", "a", "b"], categories=list("abc")), + } + ) + right = DataFrame({"b": [1, 2, 3]}, index=CategoricalIndex(["a", "b", "c"])) + result = left.merge(right, left_on="key", right_index=True, how="right") + expected = DataFrame( + { + "a": [1, 2, 3, None], + "key": Categorical(["a", "a", "b", "c"]), + "b": [1, 1, 2, 3], + }, + index=[0, 1, 2, np.nan], + ) + expected = expected.reindex(columns=["a", "key", "b"]) + tm.assert_frame_equal(result, expected) + + def test_merge_readonly(self): + # https://github.com/pandas-dev/pandas/issues/27943 + data1 = DataFrame( + np.arange(20).reshape((4, 5)) + 1, columns=["a", "b", "c", "d", "e"] + ) + data2 = DataFrame( + np.arange(20).reshape((5, 4)) + 1, columns=["a", "b", "x", "y"] + ) + + # make each underlying block array / column array read-only + for arr in data1._mgr.arrays: + arr.flags.writeable = False + + data1.merge(data2) # no error + + +def _check_merge(x, y): + for how in ["inner", "left", "outer"]: + result = x.join(y, how=how) + + expected = merge(x.reset_index(), y.reset_index(), how=how, sort=True) + expected = expected.set_index("index") + + # TODO check_names on merge? + tm.assert_frame_equal(result, expected, check_names=False) + + +class TestMergeDtypes: + @pytest.mark.parametrize( + "right_vals", [["foo", "bar"], Series(["foo", "bar"]).astype("category")] + ) + def test_different(self, right_vals): + + left = DataFrame( + { + "A": ["foo", "bar"], + "B": Series(["foo", "bar"]).astype("category"), + "C": [1, 2], + "D": [1.0, 2.0], + "E": Series([1, 2], dtype="uint64"), + "F": Series([1, 2], dtype="int32"), + } + ) + right = DataFrame({"A": right_vals}) + + # GH 9780 + # We allow merging on object and categorical cols and cast + # categorical cols to object + result = merge(left, right, on="A") + assert is_object_dtype(result.A.dtype) + + @pytest.mark.parametrize("d1", [np.int64, np.int32, np.int16, np.int8, np.uint8]) + @pytest.mark.parametrize("d2", [np.int64, np.float64, np.float32, np.float16]) + def test_join_multi_dtypes(self, d1, d2): + + dtype1 = np.dtype(d1) + dtype2 = np.dtype(d2) + + left = DataFrame( + { + "k1": np.array([0, 1, 2] * 8, dtype=dtype1), + "k2": ["foo", "bar"] * 12, + "v": np.array(np.arange(24), dtype=np.int64), + } + ) + + index = MultiIndex.from_tuples([(2, "bar"), (1, "foo")]) + right = DataFrame({"v2": np.array([5, 7], dtype=dtype2)}, index=index) + + result = left.join(right, on=["k1", "k2"]) + + expected = left.copy() + + if dtype2.kind == "i": + dtype2 = np.dtype("float64") + expected["v2"] = np.array(np.nan, dtype=dtype2) + expected.loc[(expected.k1 == 2) & (expected.k2 == "bar"), "v2"] = 5 + expected.loc[(expected.k1 == 1) & (expected.k2 == "foo"), "v2"] = 7 + + tm.assert_frame_equal(result, expected) + + result = left.join(right, on=["k1", "k2"], sort=True) + expected.sort_values(["k1", "k2"], kind="mergesort", inplace=True) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "int_vals, float_vals, exp_vals", + [ + ([1, 2, 3], [1.0, 2.0, 3.0], {"X": [1, 2, 3], "Y": [1.0, 2.0, 3.0]}), + ([1, 2, 3], [1.0, 3.0], {"X": [1, 3], "Y": [1.0, 3.0]}), + ([1, 2], [1.0, 2.0, 3.0], {"X": [1, 2], "Y": [1.0, 2.0]}), + ], + ) + def test_merge_on_ints_floats(self, int_vals, float_vals, exp_vals): + # GH 16572 + # Check that float column is not cast to object if + # merging on float and int columns + A = DataFrame({"X": int_vals}) + B = DataFrame({"Y": float_vals}) + expected = DataFrame(exp_vals) + + result = A.merge(B, left_on="X", right_on="Y") + tm.assert_frame_equal(result, expected) + + result = B.merge(A, left_on="Y", right_on="X") + tm.assert_frame_equal(result, expected[["Y", "X"]]) + + def test_merge_key_dtype_cast(self): + # GH 17044 + df1 = DataFrame({"key": [1.0, 2.0], "v1": [10, 20]}, columns=["key", "v1"]) + df2 = DataFrame({"key": [2], "v2": [200]}, columns=["key", "v2"]) + result = df1.merge(df2, on="key", how="left") + expected = DataFrame( + {"key": [1.0, 2.0], "v1": [10, 20], "v2": [np.nan, 200.0]}, + columns=["key", "v1", "v2"], + ) + tm.assert_frame_equal(result, expected) + + def test_merge_on_ints_floats_warning(self): + # GH 16572 + # merge will produce a warning when merging on int and + # float columns where the float values are not exactly + # equal to their int representation + A = DataFrame({"X": [1, 2, 3]}) + B = DataFrame({"Y": [1.1, 2.5, 3.0]}) + expected = DataFrame({"X": [3], "Y": [3.0]}) + + with tm.assert_produces_warning(UserWarning): + result = A.merge(B, left_on="X", right_on="Y") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(UserWarning): + result = B.merge(A, left_on="Y", right_on="X") + tm.assert_frame_equal(result, expected[["Y", "X"]]) + + # test no warning if float has NaNs + B = DataFrame({"Y": [np.nan, np.nan, 3.0]}) + + with tm.assert_produces_warning(None): + result = B.merge(A, left_on="Y", right_on="X") + tm.assert_frame_equal(result, expected[["Y", "X"]]) + + def test_merge_incompat_infer_boolean_object(self): + # GH21119: bool + object bool merge OK + df1 = DataFrame({"key": Series([True, False], dtype=object)}) + df2 = DataFrame({"key": [True, False]}) + + expected = DataFrame({"key": [True, False]}, dtype=object) + result = merge(df1, df2, on="key") + tm.assert_frame_equal(result, expected) + result = merge(df2, df1, on="key") + tm.assert_frame_equal(result, expected) + + def test_merge_incompat_infer_boolean_object_with_missing(self): + # GH21119: bool + object bool merge OK + # with missing value + df1 = DataFrame({"key": Series([True, False, np.nan], dtype=object)}) + df2 = DataFrame({"key": [True, False]}) + + expected = DataFrame({"key": [True, False]}, dtype=object) + result = merge(df1, df2, on="key") + tm.assert_frame_equal(result, expected) + result = merge(df2, df1, on="key") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "df1_vals, df2_vals", + [ + # merge on category coerces to object + ([0, 1, 2], Series(["a", "b", "a"]).astype("category")), + ([0.0, 1.0, 2.0], Series(["a", "b", "a"]).astype("category")), + # no not infer + ([0, 1], Series([False, True], dtype=object)), + ([0, 1], Series([False, True], dtype=bool)), + ], + ) + def test_merge_incompat_dtypes_are_ok(self, df1_vals, df2_vals): + # these are explicitly allowed incompat merges, that pass thru + # the result type is dependent on if the values on the rhs are + # inferred, otherwise these will be coerced to object + + df1 = DataFrame({"A": df1_vals}) + df2 = DataFrame({"A": df2_vals}) + + result = merge(df1, df2, on=["A"]) + assert is_object_dtype(result.A.dtype) + result = merge(df2, df1, on=["A"]) + assert is_object_dtype(result.A.dtype) + + @pytest.mark.parametrize( + "df1_vals, df2_vals", + [ + # do not infer to numeric + (Series([1, 2], dtype="uint64"), ["a", "b", "c"]), + (Series([1, 2], dtype="int32"), ["a", "b", "c"]), + ([0, 1, 2], ["0", "1", "2"]), + ([0.0, 1.0, 2.0], ["0", "1", "2"]), + ([0, 1, 2], ["0", "1", "2"]), + ( + pd.date_range("1/1/2011", periods=2, freq="D"), + ["2011-01-01", "2011-01-02"], + ), + (pd.date_range("1/1/2011", periods=2, freq="D"), [0, 1]), + (pd.date_range("1/1/2011", periods=2, freq="D"), [0.0, 1.0]), + ( + pd.date_range("20130101", periods=3), + pd.date_range("20130101", periods=3, tz="US/Eastern"), + ), + ], + ) + def test_merge_incompat_dtypes_error(self, df1_vals, df2_vals): + # GH 9780, GH 15800 + # Raise a ValueError when a user tries to merge on + # dtypes that are incompatible (e.g., obj and int/float) + + df1 = DataFrame({"A": df1_vals}) + df2 = DataFrame({"A": df2_vals}) + + msg = ( + f"You are trying to merge on {df1['A'].dtype} and " + f"{df2['A'].dtype} columns. If you wish to proceed " + "you should use pd.concat" + ) + msg = re.escape(msg) + with pytest.raises(ValueError, match=msg): + merge(df1, df2, on=["A"]) + + # Check that error still raised when swapping order of dataframes + msg = ( + f"You are trying to merge on {df2['A'].dtype} and " + f"{df1['A'].dtype} columns. If you wish to proceed " + "you should use pd.concat" + ) + msg = re.escape(msg) + with pytest.raises(ValueError, match=msg): + merge(df2, df1, on=["A"]) + + @pytest.mark.parametrize( + "expected_data, how", + [ + ([1, 2], "outer"), + ([], "inner"), + ([2], "right"), + ([1], "left"), + ], + ) + def test_merge_EA_dtype(self, any_numeric_ea_dtype, how, expected_data): + # GH#40073 + d1 = DataFrame([(1,)], columns=["id"], dtype=any_numeric_ea_dtype) + d2 = DataFrame([(2,)], columns=["id"], dtype=any_numeric_ea_dtype) + result = merge(d1, d2, how=how) + expected = DataFrame(expected_data, columns=["id"], dtype=any_numeric_ea_dtype) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "expected_data, how", + [ + (["a", "b"], "outer"), + ([], "inner"), + (["b"], "right"), + (["a"], "left"), + ], + ) + def test_merge_string_dtype(self, how, expected_data, any_string_dtype): + # GH#40073 + d1 = DataFrame([("a",)], columns=["id"], dtype=any_string_dtype) + d2 = DataFrame([("b",)], columns=["id"], dtype=any_string_dtype) + result = merge(d1, d2, how=how) + expected = DataFrame(expected_data, columns=["id"], dtype=any_string_dtype) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "how, expected_data", + [ + ("inner", [[True, 1, 4], [False, 5, 3]]), + ("outer", [[True, 1, 4], [False, 5, 3]]), + ("left", [[True, 1, 4], [False, 5, 3]]), + ("right", [[False, 5, 3], [True, 1, 4]]), + ], + ) + def test_merge_bool_dtype(self, how, expected_data): + # GH#40073 + df1 = DataFrame({"A": [True, False], "B": [1, 5]}) + df2 = DataFrame({"A": [False, True], "C": [3, 4]}) + result = merge(df1, df2, how=how) + expected = DataFrame(expected_data, columns=["A", "B", "C"]) + tm.assert_frame_equal(result, expected) + + def test_merge_ea_with_string(self, join_type, string_dtype): + # GH 43734 Avoid the use of `assign` with multi-index + df1 = DataFrame( + data={ + ("lvl0", "lvl1-a"): ["1", "2", "3", "4", None], + ("lvl0", "lvl1-b"): ["4", "5", "6", "7", "8"], + }, + dtype=pd.StringDtype(), + ) + df1_copy = df1.copy() + df2 = DataFrame( + data={ + ("lvl0", "lvl1-a"): ["1", "2", "3", pd.NA, "5"], + ("lvl0", "lvl1-c"): ["7", "8", "9", pd.NA, "11"], + }, + dtype=string_dtype, + ) + df2_copy = df2.copy() + merged = merge(left=df1, right=df2, on=[("lvl0", "lvl1-a")], how=join_type) + + # No change in df1 and df2 + tm.assert_frame_equal(df1, df1_copy) + tm.assert_frame_equal(df2, df2_copy) + + # Check the expected types for the merged data frame + expected = Series( + [np.dtype("O"), pd.StringDtype(), np.dtype("O")], + index=MultiIndex.from_tuples( + [("lvl0", "lvl1-a"), ("lvl0", "lvl1-b"), ("lvl0", "lvl1-c")] + ), + ) + tm.assert_series_equal(merged.dtypes, expected) + + +@pytest.fixture +def left(): + np.random.seed(1234) + return DataFrame( + { + "X": Series(np.random.choice(["foo", "bar"], size=(10,))).astype( + CDT(["foo", "bar"]) + ), + "Y": np.random.choice(["one", "two", "three"], size=(10,)), + } + ) + + +@pytest.fixture +def right(): + np.random.seed(1234) + return DataFrame( + {"X": Series(["foo", "bar"]).astype(CDT(["foo", "bar"])), "Z": [1, 2]} + ) + + +class TestMergeCategorical: + def test_identical(self, left): + # merging on the same, should preserve dtypes + merged = merge(left, left, on="X") + result = merged.dtypes.sort_index() + expected = Series( + [CategoricalDtype(categories=["foo", "bar"]), np.dtype("O"), np.dtype("O")], + index=["X", "Y_x", "Y_y"], + ) + tm.assert_series_equal(result, expected) + + def test_basic(self, left, right): + # we have matching Categorical dtypes in X + # so should preserve the merged column + merged = merge(left, right, on="X") + result = merged.dtypes.sort_index() + expected = Series( + [ + CategoricalDtype(categories=["foo", "bar"]), + np.dtype("O"), + np.dtype("int64"), + ], + index=["X", "Y", "Z"], + ) + tm.assert_series_equal(result, expected) + + def test_merge_categorical(self): + # GH 9426 + + right = DataFrame( + { + "c": {0: "a", 1: "b", 2: "c", 3: "d", 4: "e"}, + "d": {0: "null", 1: "null", 2: "null", 3: "null", 4: "null"}, + } + ) + left = DataFrame( + { + "a": {0: "f", 1: "f", 2: "f", 3: "f", 4: "f"}, + "b": {0: "g", 1: "g", 2: "g", 3: "g", 4: "g"}, + } + ) + df = merge(left, right, how="left", left_on="b", right_on="c") + + # object-object + expected = df.copy() + + # object-cat + # note that we propagate the category + # because we don't have any matching rows + cright = right.copy() + cright["d"] = cright["d"].astype("category") + result = merge(left, cright, how="left", left_on="b", right_on="c") + expected["d"] = expected["d"].astype(CategoricalDtype(["null"])) + tm.assert_frame_equal(result, expected) + + # cat-object + cleft = left.copy() + cleft["b"] = cleft["b"].astype("category") + result = merge(cleft, cright, how="left", left_on="b", right_on="c") + tm.assert_frame_equal(result, expected) + + # cat-cat + cright = right.copy() + cright["d"] = cright["d"].astype("category") + cleft = left.copy() + cleft["b"] = cleft["b"].astype("category") + result = merge(cleft, cright, how="left", left_on="b", right_on="c") + tm.assert_frame_equal(result, expected) + + def tests_merge_categorical_unordered_equal(self): + # GH-19551 + df1 = DataFrame( + { + "Foo": Categorical(["A", "B", "C"], categories=["A", "B", "C"]), + "Left": ["A0", "B0", "C0"], + } + ) + + df2 = DataFrame( + { + "Foo": Categorical(["C", "B", "A"], categories=["C", "B", "A"]), + "Right": ["C1", "B1", "A1"], + } + ) + result = merge(df1, df2, on=["Foo"]) + expected = DataFrame( + { + "Foo": Categorical(["A", "B", "C"]), + "Left": ["A0", "B0", "C0"], + "Right": ["A1", "B1", "C1"], + } + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("ordered", [True, False]) + def test_multiindex_merge_with_unordered_categoricalindex(self, ordered): + # GH 36973 + pcat = CategoricalDtype(categories=["P2", "P1"], ordered=ordered) + df1 = DataFrame( + { + "id": ["C", "C", "D"], + "p": Categorical(["P2", "P1", "P2"], dtype=pcat), + "a": [0, 1, 2], + } + ).set_index(["id", "p"]) + df2 = DataFrame( + { + "id": ["A", "C", "C"], + "p": Categorical(["P2", "P2", "P1"], dtype=pcat), + "d1": [10, 11, 12], + } + ).set_index(["id", "p"]) + result = merge(df1, df2, how="left", left_index=True, right_index=True) + expected = DataFrame( + { + "id": ["C", "C", "D"], + "p": Categorical(["P2", "P1", "P2"], dtype=pcat), + "a": [0, 1, 2], + "d1": [11.0, 12.0, np.nan], + } + ).set_index(["id", "p"]) + tm.assert_frame_equal(result, expected) + + def test_other_columns(self, left, right): + # non-merge columns should preserve if possible + right = right.assign(Z=right.Z.astype("category")) + + merged = merge(left, right, on="X") + result = merged.dtypes.sort_index() + expected = Series( + [ + CategoricalDtype(categories=["foo", "bar"]), + np.dtype("O"), + CategoricalDtype(categories=[1, 2]), + ], + index=["X", "Y", "Z"], + ) + tm.assert_series_equal(result, expected) + + # categories are preserved + assert left.X.values._categories_match_up_to_permutation(merged.X.values) + assert right.Z.values._categories_match_up_to_permutation(merged.Z.values) + + @pytest.mark.parametrize( + "change", + [ + lambda x: x, + lambda x: x.astype(CDT(["foo", "bar", "bah"])), + lambda x: x.astype(CDT(ordered=True)), + ], + ) + def test_dtype_on_merged_different(self, change, join_type, left, right): + # our merging columns, X now has 2 different dtypes + # so we must be object as a result + + X = change(right.X.astype("object")) + right = right.assign(X=X) + assert is_categorical_dtype(left.X.values.dtype) + # assert not left.X.values._categories_match_up_to_permutation(right.X.values) + + merged = merge(left, right, on="X", how=join_type) + + result = merged.dtypes.sort_index() + expected = Series( + [np.dtype("O"), np.dtype("O"), np.dtype("int64")], index=["X", "Y", "Z"] + ) + tm.assert_series_equal(result, expected) + + def test_self_join_multiple_categories(self): + # GH 16767 + # non-duplicates should work with multiple categories + m = 5 + df = DataFrame( + { + "a": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] * m, + "b": ["t", "w", "x", "y", "z"] * 2 * m, + "c": [ + letter + for each in ["m", "n", "u", "p", "o"] + for letter in [each] * 2 * m + ], + "d": [ + letter + for each in [ + "aa", + "bb", + "cc", + "dd", + "ee", + "ff", + "gg", + "hh", + "ii", + "jj", + ] + for letter in [each] * m + ], + } + ) + + # change them all to categorical variables + df = df.apply(lambda x: x.astype("category")) + + # self-join should equal ourselves + result = merge(df, df, on=list(df.columns)) + + tm.assert_frame_equal(result, df) + + def test_dtype_on_categorical_dates(self): + # GH 16900 + # dates should not be coerced to ints + + df = DataFrame( + [[date(2001, 1, 1), 1.1], [date(2001, 1, 2), 1.3]], columns=["date", "num2"] + ) + df["date"] = df["date"].astype("category") + + df2 = DataFrame( + [[date(2001, 1, 1), 1.3], [date(2001, 1, 3), 1.4]], columns=["date", "num4"] + ) + df2["date"] = df2["date"].astype("category") + + expected_outer = DataFrame( + [ + [pd.Timestamp("2001-01-01").date(), 1.1, 1.3], + [pd.Timestamp("2001-01-02").date(), 1.3, np.nan], + [pd.Timestamp("2001-01-03").date(), np.nan, 1.4], + ], + columns=["date", "num2", "num4"], + ) + result_outer = merge(df, df2, how="outer", on=["date"]) + tm.assert_frame_equal(result_outer, expected_outer) + + expected_inner = DataFrame( + [[pd.Timestamp("2001-01-01").date(), 1.1, 1.3]], + columns=["date", "num2", "num4"], + ) + result_inner = merge(df, df2, how="inner", on=["date"]) + tm.assert_frame_equal(result_inner, expected_inner) + + @pytest.mark.parametrize("ordered", [True, False]) + @pytest.mark.parametrize( + "category_column,categories,expected_categories", + [ + ([False, True, True, False], [True, False], [True, False]), + ([2, 1, 1, 2], [1, 2], [1, 2]), + (["False", "True", "True", "False"], ["True", "False"], ["True", "False"]), + ], + ) + def test_merging_with_bool_or_int_cateorical_column( + self, category_column, categories, expected_categories, ordered + ): + # GH 17187 + # merging with a boolean/int categorical column + df1 = DataFrame({"id": [1, 2, 3, 4], "cat": category_column}) + df1["cat"] = df1["cat"].astype(CDT(categories, ordered=ordered)) + df2 = DataFrame({"id": [2, 4], "num": [1, 9]}) + result = df1.merge(df2) + expected = DataFrame({"id": [2, 4], "cat": expected_categories, "num": [1, 9]}) + expected["cat"] = expected["cat"].astype(CDT(categories, ordered=ordered)) + tm.assert_frame_equal(expected, result) + + def test_merge_on_int_array(self): + # GH 23020 + df = DataFrame({"A": Series([1, 2, np.nan], dtype="Int64"), "B": 1}) + result = merge(df, df, on="A") + expected = DataFrame( + {"A": Series([1, 2, np.nan], dtype="Int64"), "B_x": 1, "B_y": 1} + ) + tm.assert_frame_equal(result, expected) + + +@pytest.fixture +def left_df(): + return DataFrame({"a": [20, 10, 0]}, index=[2, 1, 0]) + + +@pytest.fixture +def right_df(): + return DataFrame({"b": [300, 100, 200]}, index=[3, 1, 2]) + + +class TestMergeOnIndexes: + @pytest.mark.parametrize( + "how, sort, expected", + [ + ("inner", False, DataFrame({"a": [20, 10], "b": [200, 100]}, index=[2, 1])), + ("inner", True, DataFrame({"a": [10, 20], "b": [100, 200]}, index=[1, 2])), + ( + "left", + False, + DataFrame({"a": [20, 10, 0], "b": [200, 100, np.nan]}, index=[2, 1, 0]), + ), + ( + "left", + True, + DataFrame({"a": [0, 10, 20], "b": [np.nan, 100, 200]}, index=[0, 1, 2]), + ), + ( + "right", + False, + DataFrame( + {"a": [np.nan, 10, 20], "b": [300, 100, 200]}, index=[3, 1, 2] + ), + ), + ( + "right", + True, + DataFrame( + {"a": [10, 20, np.nan], "b": [100, 200, 300]}, index=[1, 2, 3] + ), + ), + ( + "outer", + False, + DataFrame( + {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3], + ), + ), + ( + "outer", + True, + DataFrame( + {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3], + ), + ), + ], + ) + def test_merge_on_indexes(self, left_df, right_df, how, sort, expected): + result = merge( + left_df, right_df, left_index=True, right_index=True, how=how, sort=sort + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "index", + [ + CategoricalIndex(["A", "B"], categories=["A", "B"], name="index_col"), + Float64Index([1.0, 2.0], name="index_col"), + Int64Index([1, 2], name="index_col"), + UInt64Index([1, 2], name="index_col"), + RangeIndex(start=0, stop=2, name="index_col"), + DatetimeIndex(["2018-01-01", "2018-01-02"], name="index_col"), + ], + ids=lambda x: type(x).__name__, +) +def test_merge_index_types(index): + # gh-20777 + # assert key access is consistent across index types + left = DataFrame({"left_data": [1, 2]}, index=index) + right = DataFrame({"right_data": [1.0, 2.0]}, index=index) + + result = left.merge(right, on=["index_col"]) + + expected = DataFrame({"left_data": [1, 2], "right_data": [1.0, 2.0]}, index=index) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "on,left_on,right_on,left_index,right_index,nm", + [ + (["outer", "inner"], None, None, False, False, "B"), + (None, None, None, True, True, "B"), + (None, ["outer", "inner"], None, False, True, "B"), + (None, None, ["outer", "inner"], True, False, "B"), + (["outer", "inner"], None, None, False, False, None), + (None, None, None, True, True, None), + (None, ["outer", "inner"], None, False, True, None), + (None, None, ["outer", "inner"], True, False, None), + ], +) +def test_merge_series(on, left_on, right_on, left_index, right_index, nm): + # GH 21220 + a = DataFrame( + {"A": [1, 2, 3, 4]}, + index=MultiIndex.from_product([["a", "b"], [0, 1]], names=["outer", "inner"]), + ) + b = Series( + [1, 2, 3, 4], + index=MultiIndex.from_product([["a", "b"], [1, 2]], names=["outer", "inner"]), + name=nm, + ) + expected = DataFrame( + {"A": [2, 4], "B": [1, 3]}, + index=MultiIndex.from_product([["a", "b"], [1]], names=["outer", "inner"]), + ) + if nm is not None: + result = merge( + a, + b, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + ) + tm.assert_frame_equal(result, expected) + else: + msg = "Cannot merge a Series without a name" + with pytest.raises(ValueError, match=msg): + result = merge( + a, + b, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + ) + + +@pytest.mark.parametrize( + "col1, col2, kwargs, expected_cols", + [ + (0, 0, {"suffixes": ("", "_dup")}, ["0", "0_dup"]), + (0, 0, {"suffixes": (None, "_dup")}, [0, "0_dup"]), + (0, 0, {"suffixes": ("_x", "_y")}, ["0_x", "0_y"]), + (0, 0, {"suffixes": ["_x", "_y"]}, ["0_x", "0_y"]), + ("a", 0, {"suffixes": (None, "_y")}, ["a", 0]), + (0.0, 0.0, {"suffixes": ("_x", None)}, ["0.0_x", 0.0]), + ("b", "b", {"suffixes": (None, "_y")}, ["b", "b_y"]), + ("a", "a", {"suffixes": ("_x", None)}, ["a_x", "a"]), + ("a", "b", {"suffixes": ("_x", None)}, ["a", "b"]), + ("a", "a", {"suffixes": (None, "_x")}, ["a", "a_x"]), + (0, 0, {"suffixes": ("_a", None)}, ["0_a", 0]), + ("a", "a", {}, ["a_x", "a_y"]), + (0, 0, {}, ["0_x", "0_y"]), + ], +) +def test_merge_suffix(col1, col2, kwargs, expected_cols): + # issue: 24782 + a = DataFrame({col1: [1, 2, 3]}) + b = DataFrame({col2: [4, 5, 6]}) + + expected = DataFrame([[1, 4], [2, 5], [3, 6]], columns=expected_cols) + + result = a.merge(b, left_index=True, right_index=True, **kwargs) + tm.assert_frame_equal(result, expected) + + result = merge(a, b, left_index=True, right_index=True, **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "how,expected", + [ + ( + "right", + DataFrame( + {"A": [100, 200, 300], "B1": [60, 70, np.nan], "B2": [600, 700, 800]} + ), + ), + ( + "outer", + DataFrame( + { + "A": [100, 200, 1, 300], + "B1": [60, 70, 80, np.nan], + "B2": [600, 700, np.nan, 800], + } + ), + ), + ], +) +def test_merge_duplicate_suffix(how, expected): + left_df = DataFrame({"A": [100, 200, 1], "B": [60, 70, 80]}) + right_df = DataFrame({"A": [100, 200, 300], "B": [600, 700, 800]}) + result = merge(left_df, right_df, on="A", how=how, suffixes=("_x", "_x")) + expected.columns = ["A", "B_x", "B_x"] + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "col1, col2, suffixes", + [("a", "a", (None, None)), ("a", "a", ("", None)), (0, 0, (None, ""))], +) +def test_merge_suffix_error(col1, col2, suffixes): + # issue: 24782 + a = DataFrame({col1: [1, 2, 3]}) + b = DataFrame({col2: [3, 4, 5]}) + + # TODO: might reconsider current raise behaviour, see issue 24782 + msg = "columns overlap but no suffix specified" + with pytest.raises(ValueError, match=msg): + merge(a, b, left_index=True, right_index=True, suffixes=suffixes) + + +@pytest.mark.parametrize("suffixes", [{"left", "right"}, {"left": 0, "right": 0}]) +def test_merge_suffix_warns(suffixes): + a = DataFrame({"a": [1, 2, 3]}) + b = DataFrame({"b": [3, 4, 5]}) + + with tm.assert_produces_warning(FutureWarning): + merge(a, b, left_index=True, right_index=True, suffixes={"left", "right"}) + + +@pytest.mark.parametrize( + "col1, col2, suffixes, msg", + [ + ("a", "a", ("a", "b", "c"), r"too many values to unpack \(expected 2\)"), + ("a", "a", tuple("a"), r"not enough values to unpack \(expected 2, got 1\)"), + ], +) +def test_merge_suffix_length_error(col1, col2, suffixes, msg): + a = DataFrame({col1: [1, 2, 3]}) + b = DataFrame({col2: [3, 4, 5]}) + + with pytest.raises(ValueError, match=msg): + merge(a, b, left_index=True, right_index=True, suffixes=suffixes) + + +@pytest.mark.parametrize("cat_dtype", ["one", "two"]) +@pytest.mark.parametrize("reverse", [True, False]) +def test_merge_equal_cat_dtypes(cat_dtype, reverse): + # see gh-22501 + cat_dtypes = { + "one": CategoricalDtype(categories=["a", "b", "c"], ordered=False), + "two": CategoricalDtype(categories=["a", "b", "c"], ordered=False), + } + + df1 = DataFrame( + {"foo": Series(["a", "b", "c"]).astype(cat_dtypes["one"]), "left": [1, 2, 3]} + ).set_index("foo") + + data_foo = ["a", "b", "c"] + data_right = [1, 2, 3] + + if reverse: + data_foo.reverse() + data_right.reverse() + + df2 = DataFrame( + {"foo": Series(data_foo).astype(cat_dtypes[cat_dtype]), "right": data_right} + ).set_index("foo") + + result = df1.merge(df2, left_index=True, right_index=True) + + expected = DataFrame( + { + "left": [1, 2, 3], + "right": [1, 2, 3], + "foo": Series(["a", "b", "c"]).astype(cat_dtypes["one"]), + } + ).set_index("foo") + + tm.assert_frame_equal(result, expected) + + +def test_merge_equal_cat_dtypes2(): + # see gh-22501 + cat_dtype = CategoricalDtype(categories=["a", "b", "c"], ordered=False) + + # Test Data + df1 = DataFrame( + {"foo": Series(["a", "b"]).astype(cat_dtype), "left": [1, 2]} + ).set_index("foo") + + df2 = DataFrame( + {"foo": Series(["a", "b", "c"]).astype(cat_dtype), "right": [3, 2, 1]} + ).set_index("foo") + + result = df1.merge(df2, left_index=True, right_index=True) + + expected = DataFrame( + {"left": [1, 2], "right": [3, 2], "foo": Series(["a", "b"]).astype(cat_dtype)} + ).set_index("foo") + + tm.assert_frame_equal(result, expected) + + +def test_merge_on_cat_and_ext_array(): + # GH 28668 + right = DataFrame( + {"a": Series([pd.Interval(0, 1), pd.Interval(1, 2)], dtype="interval")} + ) + left = right.copy() + left["a"] = left["a"].astype("category") + + result = merge(left, right, how="inner", on="a") + expected = right.copy() + + tm.assert_frame_equal(result, expected) + + +def test_merge_multiindex_columns(): + # Issue #28518 + # Verify that merging two dataframes give the expected labels + # The original cause of this issue come from a bug lexsort_depth and is tested in + # test_lexsort_depth + + letters = ["a", "b", "c", "d"] + numbers = ["1", "2", "3"] + index = MultiIndex.from_product((letters, numbers), names=["outer", "inner"]) + + frame_x = DataFrame(columns=index) + frame_x["id"] = "" + frame_y = DataFrame(columns=index) + frame_y["id"] = "" + + l_suf = "_x" + r_suf = "_y" + result = frame_x.merge(frame_y, on="id", suffixes=((l_suf, r_suf))) + + # Constructing the expected results + expected_labels = [letter + l_suf for letter in letters] + [ + letter + r_suf for letter in letters + ] + expected_index = MultiIndex.from_product( + [expected_labels, numbers], names=["outer", "inner"] + ) + expected = DataFrame(columns=expected_index) + expected["id"] = "" + + tm.assert_frame_equal(result, expected) + + +def test_merge_datetime_upcast_dtype(): + # https://github.com/pandas-dev/pandas/issues/31208 + df1 = DataFrame({"x": ["a", "b", "c"], "y": ["1", "2", "4"]}) + df2 = DataFrame( + {"y": ["1", "2", "3"], "z": pd.to_datetime(["2000", "2001", "2002"])} + ) + result = merge(df1, df2, how="left", on="y") + expected = DataFrame( + { + "x": ["a", "b", "c"], + "y": ["1", "2", "4"], + "z": pd.to_datetime(["2000", "2001", "NaT"]), + } + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("n_categories", [5, 128]) +def test_categorical_non_unique_monotonic(n_categories): + # GH 28189 + # With n_categories as 5, we test the int8 case is hit in libjoin, + # with n_categories as 128 we test the int16 case. + left_index = CategoricalIndex([0] + list(range(n_categories))) + df1 = DataFrame(range(n_categories + 1), columns=["value"], index=left_index) + df2 = DataFrame( + [[6]], + columns=["value"], + index=CategoricalIndex([0], categories=np.arange(n_categories)), + ) + + result = merge(df1, df2, how="left", left_index=True, right_index=True) + expected = DataFrame( + [[i, 6.0] if i < 2 else [i, np.nan] for i in range(n_categories + 1)], + columns=["value_x", "value_y"], + index=left_index, + ) + tm.assert_frame_equal(expected, result) + + +def test_merge_join_categorical_multiindex(): + # From issue 16627 + a = { + "Cat1": Categorical(["a", "b", "a", "c", "a", "b"], ["a", "b", "c"]), + "Int1": [0, 1, 0, 1, 0, 0], + } + a = DataFrame(a) + + b = { + "Cat": Categorical(["a", "b", "c", "a", "b", "c"], ["a", "b", "c"]), + "Int": [0, 0, 0, 1, 1, 1], + "Factor": [1.1, 1.2, 1.3, 1.4, 1.5, 1.6], + } + b = DataFrame(b).set_index(["Cat", "Int"])["Factor"] + + expected = merge( + a, + b.reset_index(), + left_on=["Cat1", "Int1"], + right_on=["Cat", "Int"], + how="left", + ) + expected = expected.drop(["Cat", "Int"], axis=1) + result = a.join(b, on=["Cat1", "Int1"]) + tm.assert_frame_equal(expected, result) + + # Same test, but with ordered categorical + a = { + "Cat1": Categorical( + ["a", "b", "a", "c", "a", "b"], ["b", "a", "c"], ordered=True + ), + "Int1": [0, 1, 0, 1, 0, 0], + } + a = DataFrame(a) + + b = { + "Cat": Categorical( + ["a", "b", "c", "a", "b", "c"], ["b", "a", "c"], ordered=True + ), + "Int": [0, 0, 0, 1, 1, 1], + "Factor": [1.1, 1.2, 1.3, 1.4, 1.5, 1.6], + } + b = DataFrame(b).set_index(["Cat", "Int"])["Factor"] + + expected = merge( + a, + b.reset_index(), + left_on=["Cat1", "Int1"], + right_on=["Cat", "Int"], + how="left", + ) + expected = expected.drop(["Cat", "Int"], axis=1) + result = a.join(b, on=["Cat1", "Int1"]) + tm.assert_frame_equal(expected, result) + + +@pytest.mark.parametrize("func", ["merge", "merge_asof"]) +@pytest.mark.parametrize( + ("kwargs", "err_msg"), + [ + ({"left_on": "a", "left_index": True}, ["left_on", "left_index"]), + ({"right_on": "a", "right_index": True}, ["right_on", "right_index"]), + ], +) +def test_merge_join_cols_error_reporting_duplicates(func, kwargs, err_msg): + # GH: 16228 + left = DataFrame({"a": [1, 2], "b": [3, 4]}) + right = DataFrame({"a": [1, 1], "c": [5, 6]}) + msg = rf'Can only pass argument "{err_msg[0]}" OR "{err_msg[1]}" not both\.' + with pytest.raises(MergeError, match=msg): + getattr(pd, func)(left, right, **kwargs) + + +@pytest.mark.parametrize("func", ["merge", "merge_asof"]) +@pytest.mark.parametrize( + ("kwargs", "err_msg"), + [ + ({"left_on": "a"}, ["right_on", "right_index"]), + ({"right_on": "a"}, ["left_on", "left_index"]), + ], +) +def test_merge_join_cols_error_reporting_missing(func, kwargs, err_msg): + # GH: 16228 + left = DataFrame({"a": [1, 2], "b": [3, 4]}) + right = DataFrame({"a": [1, 1], "c": [5, 6]}) + msg = rf'Must pass "{err_msg[0]}" OR "{err_msg[1]}"\.' + with pytest.raises(MergeError, match=msg): + getattr(pd, func)(left, right, **kwargs) + + +@pytest.mark.parametrize("func", ["merge", "merge_asof"]) +@pytest.mark.parametrize( + "kwargs", + [ + {"right_index": True}, + {"left_index": True}, + ], +) +def test_merge_join_cols_error_reporting_on_and_index(func, kwargs): + # GH: 16228 + left = DataFrame({"a": [1, 2], "b": [3, 4]}) + right = DataFrame({"a": [1, 1], "c": [5, 6]}) + msg = ( + r'Can only pass argument "on" OR "left_index" ' + r'and "right_index", not a combination of both\.' + ) + with pytest.raises(MergeError, match=msg): + getattr(pd, func)(left, right, on="a", **kwargs) + + +def test_merge_right_left_index(): + # GH#38616 + left = DataFrame({"x": [1, 1], "z": ["foo", "foo"]}) + right = DataFrame({"x": [1, 1], "z": ["foo", "foo"]}) + result = merge(left, right, how="right", left_index=True, right_on="x") + expected = DataFrame( + { + "x": [1, 1], + "x_x": [1, 1], + "z_x": ["foo", "foo"], + "x_y": [1, 1], + "z_y": ["foo", "foo"], + } + ) + tm.assert_frame_equal(result, expected) + + +def test_merge_result_empty_index_and_on(): + # GH#33814 + df1 = DataFrame({"a": [1], "b": [2]}).set_index(["a", "b"]) + df2 = DataFrame({"b": [1]}).set_index(["b"]) + expected = DataFrame({"a": [], "b": []}, dtype=np.int64).set_index(["a", "b"]) + result = merge(df1, df2, left_on=["b"], right_index=True) + tm.assert_frame_equal(result, expected) + + result = merge(df2, df1, left_index=True, right_on=["b"]) + tm.assert_frame_equal(result, expected) + + +def test_merge_suffixes_produce_dup_columns_warns(): + # GH#22818 + left = DataFrame({"a": [1, 2, 3], "b": 1, "b_x": 2}) + right = DataFrame({"a": [1, 2, 3], "b": 2}) + expected = DataFrame( + [[1, 1, 2, 2], [2, 1, 2, 2], [3, 1, 2, 2]], columns=["a", "b_x", "b_x", "b_y"] + ) + with tm.assert_produces_warning(FutureWarning): + result = merge(left, right, on="a") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + merge(right, left, on="a", suffixes=("_y", "_x")) + tm.assert_frame_equal(result, expected) + + +def test_merge_duplicate_columns_with_suffix_no_warning(): + # GH#22818 + # Do not raise warning when duplicates are caused by duplicates in origin + left = DataFrame([[1, 1, 1], [2, 2, 2]], columns=["a", "b", "b"]) + right = DataFrame({"a": [1, 3], "b": 2}) + result = merge(left, right, on="a") + expected = DataFrame([[1, 1, 1, 2]], columns=["a", "b_x", "b_x", "b_y"]) + tm.assert_frame_equal(result, expected) + + +def test_merge_duplicate_columns_with_suffix_causing_another_duplicate(): + # GH#22818 + # This should raise warning because suffixes cause another collision + left = DataFrame([[1, 1, 1, 1], [2, 2, 2, 2]], columns=["a", "b", "b", "b_x"]) + right = DataFrame({"a": [1, 3], "b": 2}) + with tm.assert_produces_warning(FutureWarning): + result = merge(left, right, on="a") + expected = DataFrame([[1, 1, 1, 1, 2]], columns=["a", "b_x", "b_x", "b_x", "b_y"]) + tm.assert_frame_equal(result, expected) + + +def test_merge_string_float_column_result(): + # GH 13353 + df1 = DataFrame([[1, 2], [3, 4]], columns=pd.Index(["a", 114.0])) + df2 = DataFrame([[9, 10], [11, 12]], columns=["x", "y"]) + result = merge(df2, df1, how="inner", left_index=True, right_index=True) + expected = DataFrame( + [[9, 10, 1, 2], [11, 12, 3, 4]], columns=pd.Index(["x", "y", "a", 114.0]) + ) + tm.assert_frame_equal(result, expected) + + +def test_mergeerror_on_left_index_mismatched_dtypes(): + # GH 22449 + df_1 = DataFrame(data=["X"], columns=["C"], index=[22]) + df_2 = DataFrame(data=["X"], columns=["C"], index=[999]) + with pytest.raises(MergeError, match="Can only pass argument"): + merge(df_1, df_2, on=["C"], left_index=True) + + +@pytest.mark.parametrize("dtype", [None, "Int64"]) +def test_merge_outer_with_NaN(dtype): + # GH#43550 + left = DataFrame({"key": [1, 2], "col1": [1, 2]}, dtype=dtype) + right = DataFrame({"key": [np.nan, np.nan], "col2": [3, 4]}, dtype=dtype) + result = merge(left, right, on="key", how="outer") + expected = DataFrame( + { + "key": [1, 2, np.nan, np.nan], + "col1": [1, 2, np.nan, np.nan], + "col2": [np.nan, np.nan, 3, 4], + }, + dtype=dtype, + ) + tm.assert_frame_equal(result, expected) + + # switch left and right + result = merge(right, left, on="key", how="outer") + expected = DataFrame( + { + "key": [np.nan, np.nan, 1, 2], + "col2": [3, 4, np.nan, np.nan], + "col1": [np.nan, np.nan, 1, 2], + }, + dtype=dtype, + ) + tm.assert_frame_equal(result, expected) + + +def test_merge_different_index_names(): + # GH#45094 + left = DataFrame({"a": [1]}, index=pd.Index([1], name="c")) + right = DataFrame({"a": [1]}, index=pd.Index([1], name="d")) + result = merge(left, right, left_on="c", right_on="d") + expected = DataFrame({"a_x": [1], "a_y": 1}) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge_asof.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge_asof.py new file mode 100644 index 0000000..f9310db --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge_asof.py @@ -0,0 +1,1527 @@ +import datetime + +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import ( + Index, + Timedelta, + merge_asof, + read_csv, + to_datetime, +) +import pandas._testing as tm +from pandas.core.reshape.merge import MergeError + + +class TestAsOfMerge: + def read_data(self, datapath, name, dedupe=False): + path = datapath("reshape", "merge", "data", name) + x = read_csv(path) + if dedupe: + x = x.drop_duplicates(["time", "ticker"], keep="last").reset_index( + drop=True + ) + x.time = to_datetime(x.time) + return x + + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + + self.trades = self.read_data(datapath, "trades.csv") + self.quotes = self.read_data(datapath, "quotes.csv", dedupe=True) + self.asof = self.read_data(datapath, "asof.csv") + self.tolerance = self.read_data(datapath, "tolerance.csv") + self.allow_exact_matches = self.read_data(datapath, "allow_exact_matches.csv") + self.allow_exact_matches_and_tolerance = self.read_data( + datapath, "allow_exact_matches_and_tolerance.csv" + ) + + def test_examples1(self): + """doc-string examples""" + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, 3, 7]} + ) + + result = merge_asof(left, right, on="a") + tm.assert_frame_equal(result, expected) + + def test_examples2(self): + """doc-string examples""" + trades = pd.DataFrame( + { + "time": to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.038", + "20160525 13:30:00.048", + "20160525 13:30:00.048", + "20160525 13:30:00.048", + ] + ), + "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], + "price": [51.95, 51.95, 720.77, 720.92, 98.00], + "quantity": [75, 155, 100, 100, 100], + }, + columns=["time", "ticker", "price", "quantity"], + ) + + quotes = pd.DataFrame( + { + "time": to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.030", + "20160525 13:30:00.041", + "20160525 13:30:00.048", + "20160525 13:30:00.049", + "20160525 13:30:00.072", + "20160525 13:30:00.075", + ] + ), + "ticker": [ + "GOOG", + "MSFT", + "MSFT", + "MSFT", + "GOOG", + "AAPL", + "GOOG", + "MSFT", + ], + "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01], + "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03], + }, + columns=["time", "ticker", "bid", "ask"], + ) + + merge_asof(trades, quotes, on="time", by="ticker") + + merge_asof(trades, quotes, on="time", by="ticker", tolerance=Timedelta("2ms")) + + expected = pd.DataFrame( + { + "time": to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.038", + "20160525 13:30:00.048", + "20160525 13:30:00.048", + "20160525 13:30:00.048", + ] + ), + "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], + "price": [51.95, 51.95, 720.77, 720.92, 98.00], + "quantity": [75, 155, 100, 100, 100], + "bid": [np.nan, 51.97, np.nan, np.nan, np.nan], + "ask": [np.nan, 51.98, np.nan, np.nan, np.nan], + }, + columns=["time", "ticker", "price", "quantity", "bid", "ask"], + ) + + result = merge_asof( + trades, + quotes, + on="time", + by="ticker", + tolerance=Timedelta("10ms"), + allow_exact_matches=False, + ) + tm.assert_frame_equal(result, expected) + + def test_examples3(self): + """doc-string examples""" + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, 6, np.nan]} + ) + + result = merge_asof(left, right, on="a", direction="forward") + tm.assert_frame_equal(result, expected) + + def test_examples4(self): + """doc-string examples""" + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, 6, 7]} + ) + + result = merge_asof(left, right, on="a", direction="nearest") + tm.assert_frame_equal(result, expected) + + def test_basic(self): + + expected = self.asof + trades = self.trades + quotes = self.quotes + + result = merge_asof(trades, quotes, on="time", by="ticker") + tm.assert_frame_equal(result, expected) + + def test_basic_categorical(self): + + expected = self.asof + trades = self.trades.copy() + trades.ticker = trades.ticker.astype("category") + quotes = self.quotes.copy() + quotes.ticker = quotes.ticker.astype("category") + expected.ticker = expected.ticker.astype("category") + + result = merge_asof(trades, quotes, on="time", by="ticker") + tm.assert_frame_equal(result, expected) + + def test_basic_left_index(self): + + # GH14253 + expected = self.asof + trades = self.trades.set_index("time") + quotes = self.quotes + + result = merge_asof( + trades, quotes, left_index=True, right_on="time", by="ticker" + ) + # left-only index uses right"s index, oddly + expected.index = result.index + # time column appears after left"s columns + expected = expected[result.columns] + tm.assert_frame_equal(result, expected) + + def test_basic_right_index(self): + + expected = self.asof + trades = self.trades + quotes = self.quotes.set_index("time") + + result = merge_asof( + trades, quotes, left_on="time", right_index=True, by="ticker" + ) + tm.assert_frame_equal(result, expected) + + def test_basic_left_index_right_index(self): + + expected = self.asof.set_index("time") + trades = self.trades.set_index("time") + quotes = self.quotes.set_index("time") + + result = merge_asof( + trades, quotes, left_index=True, right_index=True, by="ticker" + ) + tm.assert_frame_equal(result, expected) + + def test_multi_index(self): + + # MultiIndex is prohibited + trades = self.trades.set_index(["time", "price"]) + quotes = self.quotes.set_index("time") + with pytest.raises(MergeError, match="left can only have one index"): + merge_asof(trades, quotes, left_index=True, right_index=True) + + trades = self.trades.set_index("time") + quotes = self.quotes.set_index(["time", "bid"]) + with pytest.raises(MergeError, match="right can only have one index"): + merge_asof(trades, quotes, left_index=True, right_index=True) + + def test_on_and_index(self): + + # "on" parameter and index together is prohibited + trades = self.trades.set_index("time") + quotes = self.quotes.set_index("time") + msg = 'Can only pass argument "left_on" OR "left_index" not both.' + with pytest.raises(MergeError, match=msg): + merge_asof( + trades, quotes, left_on="price", left_index=True, right_index=True + ) + + trades = self.trades.set_index("time") + quotes = self.quotes.set_index("time") + msg = 'Can only pass argument "right_on" OR "right_index" not both.' + with pytest.raises(MergeError, match=msg): + merge_asof( + trades, quotes, right_on="bid", left_index=True, right_index=True + ) + + def test_basic_left_by_right_by(self): + + # GH14253 + expected = self.asof + trades = self.trades + quotes = self.quotes + + result = merge_asof( + trades, quotes, on="time", left_by="ticker", right_by="ticker" + ) + tm.assert_frame_equal(result, expected) + + def test_missing_right_by(self): + + expected = self.asof + trades = self.trades + quotes = self.quotes + + q = quotes[quotes.ticker != "MSFT"] + result = merge_asof(trades, q, on="time", by="ticker") + expected.loc[expected.ticker == "MSFT", ["bid", "ask"]] = np.nan + tm.assert_frame_equal(result, expected) + + def test_multiby(self): + # GH13936 + trades = pd.DataFrame( + { + "time": to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.046", + "20160525 13:30:00.048", + "20160525 13:30:00.050", + ] + ), + "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], + "exch": ["ARCA", "NSDQ", "NSDQ", "BATS", "NSDQ"], + "price": [51.95, 51.95, 720.77, 720.92, 98.00], + "quantity": [75, 155, 100, 100, 100], + }, + columns=["time", "ticker", "exch", "price", "quantity"], + ) + + quotes = pd.DataFrame( + { + "time": to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.030", + "20160525 13:30:00.041", + "20160525 13:30:00.045", + "20160525 13:30:00.049", + ] + ), + "ticker": ["GOOG", "MSFT", "MSFT", "MSFT", "GOOG", "AAPL"], + "exch": ["BATS", "NSDQ", "ARCA", "ARCA", "NSDQ", "ARCA"], + "bid": [720.51, 51.95, 51.97, 51.99, 720.50, 97.99], + "ask": [720.92, 51.96, 51.98, 52.00, 720.93, 98.01], + }, + columns=["time", "ticker", "exch", "bid", "ask"], + ) + + expected = pd.DataFrame( + { + "time": to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.046", + "20160525 13:30:00.048", + "20160525 13:30:00.050", + ] + ), + "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], + "exch": ["ARCA", "NSDQ", "NSDQ", "BATS", "NSDQ"], + "price": [51.95, 51.95, 720.77, 720.92, 98.00], + "quantity": [75, 155, 100, 100, 100], + "bid": [np.nan, 51.95, 720.50, 720.51, np.nan], + "ask": [np.nan, 51.96, 720.93, 720.92, np.nan], + }, + columns=["time", "ticker", "exch", "price", "quantity", "bid", "ask"], + ) + + result = merge_asof(trades, quotes, on="time", by=["ticker", "exch"]) + tm.assert_frame_equal(result, expected) + + def test_multiby_heterogeneous_types(self): + # GH13936 + trades = pd.DataFrame( + { + "time": to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.046", + "20160525 13:30:00.048", + "20160525 13:30:00.050", + ] + ), + "ticker": [0, 0, 1, 1, 2], + "exch": ["ARCA", "NSDQ", "NSDQ", "BATS", "NSDQ"], + "price": [51.95, 51.95, 720.77, 720.92, 98.00], + "quantity": [75, 155, 100, 100, 100], + }, + columns=["time", "ticker", "exch", "price", "quantity"], + ) + + quotes = pd.DataFrame( + { + "time": to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.030", + "20160525 13:30:00.041", + "20160525 13:30:00.045", + "20160525 13:30:00.049", + ] + ), + "ticker": [1, 0, 0, 0, 1, 2], + "exch": ["BATS", "NSDQ", "ARCA", "ARCA", "NSDQ", "ARCA"], + "bid": [720.51, 51.95, 51.97, 51.99, 720.50, 97.99], + "ask": [720.92, 51.96, 51.98, 52.00, 720.93, 98.01], + }, + columns=["time", "ticker", "exch", "bid", "ask"], + ) + + expected = pd.DataFrame( + { + "time": to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.046", + "20160525 13:30:00.048", + "20160525 13:30:00.050", + ] + ), + "ticker": [0, 0, 1, 1, 2], + "exch": ["ARCA", "NSDQ", "NSDQ", "BATS", "NSDQ"], + "price": [51.95, 51.95, 720.77, 720.92, 98.00], + "quantity": [75, 155, 100, 100, 100], + "bid": [np.nan, 51.95, 720.50, 720.51, np.nan], + "ask": [np.nan, 51.96, 720.93, 720.92, np.nan], + }, + columns=["time", "ticker", "exch", "price", "quantity", "bid", "ask"], + ) + + result = merge_asof(trades, quotes, on="time", by=["ticker", "exch"]) + tm.assert_frame_equal(result, expected) + + def test_multiby_indexed(self): + # GH15676 + left = pd.DataFrame( + [ + [to_datetime("20160602"), 1, "a"], + [to_datetime("20160602"), 2, "a"], + [to_datetime("20160603"), 1, "b"], + [to_datetime("20160603"), 2, "b"], + ], + columns=["time", "k1", "k2"], + ).set_index("time") + + right = pd.DataFrame( + [ + [to_datetime("20160502"), 1, "a", 1.0], + [to_datetime("20160502"), 2, "a", 2.0], + [to_datetime("20160503"), 1, "b", 3.0], + [to_datetime("20160503"), 2, "b", 4.0], + ], + columns=["time", "k1", "k2", "value"], + ).set_index("time") + + expected = pd.DataFrame( + [ + [to_datetime("20160602"), 1, "a", 1.0], + [to_datetime("20160602"), 2, "a", 2.0], + [to_datetime("20160603"), 1, "b", 3.0], + [to_datetime("20160603"), 2, "b", 4.0], + ], + columns=["time", "k1", "k2", "value"], + ).set_index("time") + + result = merge_asof( + left, right, left_index=True, right_index=True, by=["k1", "k2"] + ) + + tm.assert_frame_equal(expected, result) + + with pytest.raises( + MergeError, match="left_by and right_by must be same length" + ): + merge_asof( + left, + right, + left_index=True, + right_index=True, + left_by=["k1", "k2"], + right_by=["k1"], + ) + + def test_basic2(self, datapath): + + expected = self.read_data(datapath, "asof2.csv") + trades = self.read_data(datapath, "trades2.csv") + quotes = self.read_data(datapath, "quotes2.csv", dedupe=True) + + result = merge_asof(trades, quotes, on="time", by="ticker") + tm.assert_frame_equal(result, expected) + + def test_basic_no_by(self): + f = ( + lambda x: x[x.ticker == "MSFT"] + .drop("ticker", axis=1) + .reset_index(drop=True) + ) + + # just use a single ticker + expected = f(self.asof) + trades = f(self.trades) + quotes = f(self.quotes) + + result = merge_asof(trades, quotes, on="time") + tm.assert_frame_equal(result, expected) + + def test_valid_join_keys(self): + + trades = self.trades + quotes = self.quotes + + msg = r"incompatible merge keys \[1\] .* must be the same type" + + with pytest.raises(MergeError, match=msg): + merge_asof(trades, quotes, left_on="time", right_on="bid", by="ticker") + + with pytest.raises(MergeError, match="can only asof on a key for left"): + merge_asof(trades, quotes, on=["time", "ticker"], by="ticker") + + with pytest.raises(MergeError, match="can only asof on a key for left"): + merge_asof(trades, quotes, by="ticker") + + def test_with_duplicates(self, datapath): + + q = ( + pd.concat([self.quotes, self.quotes]) + .sort_values(["time", "ticker"]) + .reset_index(drop=True) + ) + result = merge_asof(self.trades, q, on="time", by="ticker") + expected = self.read_data(datapath, "asof.csv") + tm.assert_frame_equal(result, expected) + + def test_with_duplicates_no_on(self): + + df1 = pd.DataFrame({"key": [1, 1, 3], "left_val": [1, 2, 3]}) + df2 = pd.DataFrame({"key": [1, 2, 2], "right_val": [1, 2, 3]}) + result = merge_asof(df1, df2, on="key") + expected = pd.DataFrame( + {"key": [1, 1, 3], "left_val": [1, 2, 3], "right_val": [1, 1, 3]} + ) + tm.assert_frame_equal(result, expected) + + def test_valid_allow_exact_matches(self): + + trades = self.trades + quotes = self.quotes + + msg = "allow_exact_matches must be boolean, passed foo" + + with pytest.raises(MergeError, match=msg): + merge_asof( + trades, quotes, on="time", by="ticker", allow_exact_matches="foo" + ) + + def test_valid_tolerance(self): + + trades = self.trades + quotes = self.quotes + + # dti + merge_asof(trades, quotes, on="time", by="ticker", tolerance=Timedelta("1s")) + + # integer + merge_asof( + trades.reset_index(), + quotes.reset_index(), + on="index", + by="ticker", + tolerance=1, + ) + + msg = r"incompatible tolerance .*, must be compat with type .*" + + # incompat + with pytest.raises(MergeError, match=msg): + merge_asof(trades, quotes, on="time", by="ticker", tolerance=1) + + # invalid + with pytest.raises(MergeError, match=msg): + merge_asof( + trades.reset_index(), + quotes.reset_index(), + on="index", + by="ticker", + tolerance=1.0, + ) + + msg = "tolerance must be positive" + + # invalid negative + with pytest.raises(MergeError, match=msg): + merge_asof( + trades, quotes, on="time", by="ticker", tolerance=-Timedelta("1s") + ) + + with pytest.raises(MergeError, match=msg): + merge_asof( + trades.reset_index(), + quotes.reset_index(), + on="index", + by="ticker", + tolerance=-1, + ) + + def test_non_sorted(self): + + trades = self.trades.sort_values("time", ascending=False) + quotes = self.quotes.sort_values("time", ascending=False) + + # we require that we are already sorted on time & quotes + assert not trades.time.is_monotonic + assert not quotes.time.is_monotonic + with pytest.raises(ValueError, match="left keys must be sorted"): + merge_asof(trades, quotes, on="time", by="ticker") + + trades = self.trades.sort_values("time") + assert trades.time.is_monotonic + assert not quotes.time.is_monotonic + with pytest.raises(ValueError, match="right keys must be sorted"): + merge_asof(trades, quotes, on="time", by="ticker") + + quotes = self.quotes.sort_values("time") + assert trades.time.is_monotonic + assert quotes.time.is_monotonic + + # ok, though has dupes + merge_asof(trades, self.quotes, on="time", by="ticker") + + @pytest.mark.parametrize( + "tolerance", + [Timedelta("1day"), datetime.timedelta(days=1)], + ids=["Timedelta", "datetime.timedelta"], + ) + def test_tolerance(self, tolerance): + + trades = self.trades + quotes = self.quotes + + result = merge_asof(trades, quotes, on="time", by="ticker", tolerance=tolerance) + expected = self.tolerance + tm.assert_frame_equal(result, expected) + + def test_tolerance_forward(self): + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 7, 11], "right_val": [1, 2, 3, 7, 11]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, np.nan, 11]} + ) + + result = merge_asof(left, right, on="a", direction="forward", tolerance=1) + tm.assert_frame_equal(result, expected) + + def test_tolerance_nearest(self): + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 7, 11], "right_val": [1, 2, 3, 7, 11]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, np.nan, 11]} + ) + + result = merge_asof(left, right, on="a", direction="nearest", tolerance=1) + tm.assert_frame_equal(result, expected) + + def test_tolerance_tz(self): + # GH 14844 + left = pd.DataFrame( + { + "date": pd.date_range( + start=to_datetime("2016-01-02"), + freq="D", + periods=5, + tz=pytz.timezone("UTC"), + ), + "value1": np.arange(5), + } + ) + right = pd.DataFrame( + { + "date": pd.date_range( + start=to_datetime("2016-01-01"), + freq="D", + periods=5, + tz=pytz.timezone("UTC"), + ), + "value2": list("ABCDE"), + } + ) + result = merge_asof(left, right, on="date", tolerance=Timedelta("1 day")) + + expected = pd.DataFrame( + { + "date": pd.date_range( + start=to_datetime("2016-01-02"), + freq="D", + periods=5, + tz=pytz.timezone("UTC"), + ), + "value1": np.arange(5), + "value2": list("BCDEE"), + } + ) + tm.assert_frame_equal(result, expected) + + def test_tolerance_float(self): + # GH22981 + left = pd.DataFrame({"a": [1.1, 3.5, 10.9], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame( + {"a": [1.0, 2.5, 3.3, 7.5, 11.5], "right_val": [1.0, 2.5, 3.3, 7.5, 11.5]} + ) + + expected = pd.DataFrame( + { + "a": [1.1, 3.5, 10.9], + "left_val": ["a", "b", "c"], + "right_val": [1, 3.3, np.nan], + } + ) + + result = merge_asof(left, right, on="a", direction="nearest", tolerance=0.5) + tm.assert_frame_equal(result, expected) + + def test_index_tolerance(self): + # GH 15135 + expected = self.tolerance.set_index("time") + trades = self.trades.set_index("time") + quotes = self.quotes.set_index("time") + + result = merge_asof( + trades, + quotes, + left_index=True, + right_index=True, + by="ticker", + tolerance=Timedelta("1day"), + ) + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches(self): + + result = merge_asof( + self.trades, self.quotes, on="time", by="ticker", allow_exact_matches=False + ) + expected = self.allow_exact_matches + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches_forward(self): + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 7, 11], "right_val": [1, 2, 3, 7, 11]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [2, 7, 11]} + ) + + result = merge_asof( + left, right, on="a", direction="forward", allow_exact_matches=False + ) + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches_nearest(self): + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 7, 11], "right_val": [1, 2, 3, 7, 11]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [2, 3, 11]} + ) + + result = merge_asof( + left, right, on="a", direction="nearest", allow_exact_matches=False + ) + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches_and_tolerance(self): + + result = merge_asof( + self.trades, + self.quotes, + on="time", + by="ticker", + tolerance=Timedelta("100ms"), + allow_exact_matches=False, + ) + expected = self.allow_exact_matches_and_tolerance + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches_and_tolerance2(self): + # GH 13695 + df1 = pd.DataFrame( + {"time": to_datetime(["2016-07-15 13:30:00.030"]), "username": ["bob"]} + ) + df2 = pd.DataFrame( + { + "time": to_datetime( + ["2016-07-15 13:30:00.000", "2016-07-15 13:30:00.030"] + ), + "version": [1, 2], + } + ) + + result = merge_asof(df1, df2, on="time") + expected = pd.DataFrame( + { + "time": to_datetime(["2016-07-15 13:30:00.030"]), + "username": ["bob"], + "version": [2], + } + ) + tm.assert_frame_equal(result, expected) + + result = merge_asof(df1, df2, on="time", allow_exact_matches=False) + expected = pd.DataFrame( + { + "time": to_datetime(["2016-07-15 13:30:00.030"]), + "username": ["bob"], + "version": [1], + } + ) + tm.assert_frame_equal(result, expected) + + result = merge_asof( + df1, + df2, + on="time", + allow_exact_matches=False, + tolerance=Timedelta("10ms"), + ) + expected = pd.DataFrame( + { + "time": to_datetime(["2016-07-15 13:30:00.030"]), + "username": ["bob"], + "version": [np.nan], + } + ) + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches_and_tolerance3(self): + # GH 13709 + df1 = pd.DataFrame( + { + "time": to_datetime( + ["2016-07-15 13:30:00.030", "2016-07-15 13:30:00.030"] + ), + "username": ["bob", "charlie"], + } + ) + df2 = pd.DataFrame( + { + "time": to_datetime( + ["2016-07-15 13:30:00.000", "2016-07-15 13:30:00.030"] + ), + "version": [1, 2], + } + ) + + result = merge_asof( + df1, + df2, + on="time", + allow_exact_matches=False, + tolerance=Timedelta("10ms"), + ) + expected = pd.DataFrame( + { + "time": to_datetime( + ["2016-07-15 13:30:00.030", "2016-07-15 13:30:00.030"] + ), + "username": ["bob", "charlie"], + "version": [np.nan, np.nan], + } + ) + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches_and_tolerance_forward(self): + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 3, 4, 6, 11], "right_val": [1, 3, 4, 6, 11]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [np.nan, 6, 11]} + ) + + result = merge_asof( + left, + right, + on="a", + direction="forward", + allow_exact_matches=False, + tolerance=1, + ) + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches_and_tolerance_nearest(self): + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 3, 4, 6, 11], "right_val": [1, 3, 4, 7, 11]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [np.nan, 4, 11]} + ) + + result = merge_asof( + left, + right, + on="a", + direction="nearest", + allow_exact_matches=False, + tolerance=1, + ) + tm.assert_frame_equal(result, expected) + + def test_forward_by(self): + # GH14887 + + left = pd.DataFrame( + { + "a": [1, 5, 10, 12, 15], + "b": ["X", "X", "Y", "Z", "Y"], + "left_val": ["a", "b", "c", "d", "e"], + } + ) + right = pd.DataFrame( + { + "a": [1, 6, 11, 15, 16], + "b": ["X", "Z", "Y", "Z", "Y"], + "right_val": [1, 6, 11, 15, 16], + } + ) + + expected = pd.DataFrame( + { + "a": [1, 5, 10, 12, 15], + "b": ["X", "X", "Y", "Z", "Y"], + "left_val": ["a", "b", "c", "d", "e"], + "right_val": [1, np.nan, 11, 15, 16], + } + ) + + result = merge_asof(left, right, on="a", by="b", direction="forward") + tm.assert_frame_equal(result, expected) + + def test_nearest_by(self): + # GH14887 + + left = pd.DataFrame( + { + "a": [1, 5, 10, 12, 15], + "b": ["X", "X", "Z", "Z", "Y"], + "left_val": ["a", "b", "c", "d", "e"], + } + ) + right = pd.DataFrame( + { + "a": [1, 6, 11, 15, 16], + "b": ["X", "Z", "Z", "Z", "Y"], + "right_val": [1, 6, 11, 15, 16], + } + ) + + expected = pd.DataFrame( + { + "a": [1, 5, 10, 12, 15], + "b": ["X", "X", "Z", "Z", "Y"], + "left_val": ["a", "b", "c", "d", "e"], + "right_val": [1, 1, 11, 11, 16], + } + ) + + result = merge_asof(left, right, on="a", by="b", direction="nearest") + tm.assert_frame_equal(result, expected) + + def test_by_int(self): + # we specialize by type, so test that this is correct + df1 = pd.DataFrame( + { + "time": to_datetime( + [ + "20160525 13:30:00.020", + "20160525 13:30:00.030", + "20160525 13:30:00.040", + "20160525 13:30:00.050", + "20160525 13:30:00.060", + ] + ), + "key": [1, 2, 1, 3, 2], + "value1": [1.1, 1.2, 1.3, 1.4, 1.5], + }, + columns=["time", "key", "value1"], + ) + + df2 = pd.DataFrame( + { + "time": to_datetime( + [ + "20160525 13:30:00.015", + "20160525 13:30:00.020", + "20160525 13:30:00.025", + "20160525 13:30:00.035", + "20160525 13:30:00.040", + "20160525 13:30:00.055", + "20160525 13:30:00.060", + "20160525 13:30:00.065", + ] + ), + "key": [2, 1, 1, 3, 2, 1, 2, 3], + "value2": [2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8], + }, + columns=["time", "key", "value2"], + ) + + result = merge_asof(df1, df2, on="time", by="key") + + expected = pd.DataFrame( + { + "time": to_datetime( + [ + "20160525 13:30:00.020", + "20160525 13:30:00.030", + "20160525 13:30:00.040", + "20160525 13:30:00.050", + "20160525 13:30:00.060", + ] + ), + "key": [1, 2, 1, 3, 2], + "value1": [1.1, 1.2, 1.3, 1.4, 1.5], + "value2": [2.2, 2.1, 2.3, 2.4, 2.7], + }, + columns=["time", "key", "value1", "value2"], + ) + + tm.assert_frame_equal(result, expected) + + def test_on_float(self): + # mimics how to determine the minimum-price variation + df1 = pd.DataFrame( + { + "price": [5.01, 0.0023, 25.13, 340.05, 30.78, 1040.90, 0.0078], + "symbol": list("ABCDEFG"), + }, + columns=["symbol", "price"], + ) + + df2 = pd.DataFrame( + {"price": [0.0, 1.0, 100.0], "mpv": [0.0001, 0.01, 0.05]}, + columns=["price", "mpv"], + ) + + df1 = df1.sort_values("price").reset_index(drop=True) + + result = merge_asof(df1, df2, on="price") + + expected = pd.DataFrame( + { + "symbol": list("BGACEDF"), + "price": [0.0023, 0.0078, 5.01, 25.13, 30.78, 340.05, 1040.90], + "mpv": [0.0001, 0.0001, 0.01, 0.01, 0.01, 0.05, 0.05], + }, + columns=["symbol", "price", "mpv"], + ) + + tm.assert_frame_equal(result, expected) + + def test_on_specialized_type(self, any_real_numpy_dtype): + # see gh-13936 + dtype = np.dtype(any_real_numpy_dtype).type + + df1 = pd.DataFrame( + {"value": [5, 2, 25, 100, 78, 120, 79], "symbol": list("ABCDEFG")}, + columns=["symbol", "value"], + ) + df1.value = dtype(df1.value) + + df2 = pd.DataFrame( + {"value": [0, 80, 120, 125], "result": list("xyzw")}, + columns=["value", "result"], + ) + df2.value = dtype(df2.value) + + df1 = df1.sort_values("value").reset_index(drop=True) + result = merge_asof(df1, df2, on="value") + + expected = pd.DataFrame( + { + "symbol": list("BACEGDF"), + "value": [2, 5, 25, 78, 79, 100, 120], + "result": list("xxxxxyz"), + }, + columns=["symbol", "value", "result"], + ) + expected.value = dtype(expected.value) + + tm.assert_frame_equal(result, expected) + + def test_on_specialized_type_by_int(self, any_real_numpy_dtype): + # see gh-13936 + dtype = np.dtype(any_real_numpy_dtype).type + + df1 = pd.DataFrame( + { + "value": [5, 2, 25, 100, 78, 120, 79], + "key": [1, 2, 3, 2, 3, 1, 2], + "symbol": list("ABCDEFG"), + }, + columns=["symbol", "key", "value"], + ) + df1.value = dtype(df1.value) + + df2 = pd.DataFrame( + {"value": [0, 80, 120, 125], "key": [1, 2, 2, 3], "result": list("xyzw")}, + columns=["value", "key", "result"], + ) + df2.value = dtype(df2.value) + + df1 = df1.sort_values("value").reset_index(drop=True) + result = merge_asof(df1, df2, on="value", by="key") + + expected = pd.DataFrame( + { + "symbol": list("BACEGDF"), + "key": [2, 1, 3, 3, 2, 2, 1], + "value": [2, 5, 25, 78, 79, 100, 120], + "result": [np.nan, "x", np.nan, np.nan, np.nan, "y", "x"], + }, + columns=["symbol", "key", "value", "result"], + ) + expected.value = dtype(expected.value) + + tm.assert_frame_equal(result, expected) + + def test_on_float_by_int(self): + # type specialize both "by" and "on" parameters + df1 = pd.DataFrame( + { + "symbol": list("AAABBBCCC"), + "exch": [1, 2, 3, 1, 2, 3, 1, 2, 3], + "price": [ + 3.26, + 3.2599, + 3.2598, + 12.58, + 12.59, + 12.5, + 378.15, + 378.2, + 378.25, + ], + }, + columns=["symbol", "exch", "price"], + ) + + df2 = pd.DataFrame( + { + "exch": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "price": [0.0, 1.0, 100.0, 0.0, 5.0, 100.0, 0.0, 5.0, 1000.0], + "mpv": [0.0001, 0.01, 0.05, 0.0001, 0.01, 0.1, 0.0001, 0.25, 1.0], + }, + columns=["exch", "price", "mpv"], + ) + + df1 = df1.sort_values("price").reset_index(drop=True) + df2 = df2.sort_values("price").reset_index(drop=True) + + result = merge_asof(df1, df2, on="price", by="exch") + + expected = pd.DataFrame( + { + "symbol": list("AAABBBCCC"), + "exch": [3, 2, 1, 3, 1, 2, 1, 2, 3], + "price": [ + 3.2598, + 3.2599, + 3.26, + 12.5, + 12.58, + 12.59, + 378.15, + 378.2, + 378.25, + ], + "mpv": [0.0001, 0.0001, 0.01, 0.25, 0.01, 0.01, 0.05, 0.1, 0.25], + }, + columns=["symbol", "exch", "price", "mpv"], + ) + + tm.assert_frame_equal(result, expected) + + def test_merge_datatype_error_raises(self): + msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype" + + left = pd.DataFrame({"left_val": [1, 5, 10], "a": ["a", "b", "c"]}) + right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7], "a": [1, 2, 3, 6, 7]}) + + with pytest.raises(MergeError, match=msg): + merge_asof(left, right, on="a") + + def test_merge_datatype_categorical_error_raises(self): + msg = ( + r"incompatible merge keys \[0\] .* both sides category, " + "but not equal ones" + ) + + left = pd.DataFrame( + {"left_val": [1, 5, 10], "a": pd.Categorical(["a", "b", "c"])} + ) + right = pd.DataFrame( + { + "right_val": [1, 2, 3, 6, 7], + "a": pd.Categorical(["a", "X", "c", "X", "b"]), + } + ) + + with pytest.raises(MergeError, match=msg): + merge_asof(left, right, on="a") + + def test_merge_groupby_multiple_column_with_categorical_column(self): + # GH 16454 + df = pd.DataFrame({"x": [0], "y": [0], "z": pd.Categorical([0])}) + result = merge_asof(df, df, on="x", by=["y", "z"]) + expected = pd.DataFrame({"x": [0], "y": [0], "z": pd.Categorical([0])}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "func", [lambda x: x, lambda x: to_datetime(x)], ids=["numeric", "datetime"] + ) + @pytest.mark.parametrize("side", ["left", "right"]) + def test_merge_on_nans(self, func, side): + # GH 23189 + msg = f"Merge keys contain null values on {side} side" + nulls = func([1.0, 5.0, np.nan]) + non_nulls = func([1.0, 5.0, 10.0]) + df_null = pd.DataFrame({"a": nulls, "left_val": ["a", "b", "c"]}) + df = pd.DataFrame({"a": non_nulls, "right_val": [1, 6, 11]}) + + with pytest.raises(ValueError, match=msg): + if side == "left": + merge_asof(df_null, df, on="a") + else: + merge_asof(df, df_null, on="a") + + def test_merge_by_col_tz_aware(self): + # GH 21184 + left = pd.DataFrame( + { + "by_col": pd.DatetimeIndex(["2018-01-01"]).tz_localize("UTC"), + "on_col": [2], + "values": ["a"], + } + ) + right = pd.DataFrame( + { + "by_col": pd.DatetimeIndex(["2018-01-01"]).tz_localize("UTC"), + "on_col": [1], + "values": ["b"], + } + ) + result = merge_asof(left, right, by="by_col", on="on_col") + expected = pd.DataFrame( + [[pd.Timestamp("2018-01-01", tz="UTC"), 2, "a", "b"]], + columns=["by_col", "on_col", "values_x", "values_y"], + ) + tm.assert_frame_equal(result, expected) + + def test_by_mixed_tz_aware(self): + # GH 26649 + left = pd.DataFrame( + { + "by_col1": pd.DatetimeIndex(["2018-01-01"]).tz_localize("UTC"), + "by_col2": ["HELLO"], + "on_col": [2], + "value": ["a"], + } + ) + right = pd.DataFrame( + { + "by_col1": pd.DatetimeIndex(["2018-01-01"]).tz_localize("UTC"), + "by_col2": ["WORLD"], + "on_col": [1], + "value": ["b"], + } + ) + result = merge_asof(left, right, by=["by_col1", "by_col2"], on="on_col") + expected = pd.DataFrame( + [[pd.Timestamp("2018-01-01", tz="UTC"), "HELLO", 2, "a"]], + columns=["by_col1", "by_col2", "on_col", "value_x"], + ) + expected["value_y"] = np.array([np.nan], dtype=object) + tm.assert_frame_equal(result, expected) + + def test_timedelta_tolerance_nearest(self): + # GH 27642 + + left = pd.DataFrame( + list(zip([0, 5, 10, 15, 20, 25], [0, 1, 2, 3, 4, 5])), + columns=["time", "left"], + ) + + left["time"] = pd.to_timedelta(left["time"], "ms") + + right = pd.DataFrame( + list(zip([0, 3, 9, 12, 15, 18], [0, 1, 2, 3, 4, 5])), + columns=["time", "right"], + ) + + right["time"] = pd.to_timedelta(right["time"], "ms") + + expected = pd.DataFrame( + list( + zip( + [0, 5, 10, 15, 20, 25], + [0, 1, 2, 3, 4, 5], + [0, np.nan, 2, 4, np.nan, np.nan], + ) + ), + columns=["time", "left", "right"], + ) + + expected["time"] = pd.to_timedelta(expected["time"], "ms") + + result = merge_asof( + left, right, on="time", tolerance=Timedelta("1ms"), direction="nearest" + ) + + tm.assert_frame_equal(result, expected) + + def test_int_type_tolerance(self, any_int_numpy_dtype): + # GH #28870 + + left = pd.DataFrame({"a": [0, 10, 20], "left_val": [1, 2, 3]}) + right = pd.DataFrame({"a": [5, 15, 25], "right_val": [1, 2, 3]}) + left["a"] = left["a"].astype(any_int_numpy_dtype) + right["a"] = right["a"].astype(any_int_numpy_dtype) + + expected = pd.DataFrame( + {"a": [0, 10, 20], "left_val": [1, 2, 3], "right_val": [np.nan, 1.0, 2.0]} + ) + expected["a"] = expected["a"].astype(any_int_numpy_dtype) + + result = merge_asof(left, right, on="a", tolerance=10) + tm.assert_frame_equal(result, expected) + + def test_merge_index_column_tz(self): + # GH 29864 + index = pd.date_range("2019-10-01", freq="30min", periods=5, tz="UTC") + left = pd.DataFrame([0.9, 0.8, 0.7, 0.6], columns=["xyz"], index=index[1:]) + right = pd.DataFrame({"from_date": index, "abc": [2.46] * 4 + [2.19]}) + result = merge_asof( + left=left, right=right, left_index=True, right_on=["from_date"] + ) + expected = pd.DataFrame( + { + "xyz": [0.9, 0.8, 0.7, 0.6], + "from_date": index[1:], + "abc": [2.46] * 3 + [2.19], + }, + index=pd.date_range( + "2019-10-01 00:30:00", freq="30min", periods=4, tz="UTC" + ), + ) + tm.assert_frame_equal(result, expected) + + result = merge_asof( + left=right, right=left, right_index=True, left_on=["from_date"] + ) + expected = pd.DataFrame( + { + "from_date": index, + "abc": [2.46] * 4 + [2.19], + "xyz": [np.nan, 0.9, 0.8, 0.7, 0.6], + }, + index=Index([0, 1, 2, 3, 4]), + ) + tm.assert_frame_equal(result, expected) + + def test_left_index_right_index_tolerance(self): + # https://github.com/pandas-dev/pandas/issues/35558 + dr1 = pd.date_range(start="1/1/2020", end="1/20/2020", freq="2D") + Timedelta( + seconds=0.4 + ) + dr2 = pd.date_range(start="1/1/2020", end="2/1/2020") + + df1 = pd.DataFrame({"val1": "foo"}, index=pd.DatetimeIndex(dr1)) + df2 = pd.DataFrame({"val2": "bar"}, index=pd.DatetimeIndex(dr2)) + + expected = pd.DataFrame( + {"val1": "foo", "val2": "bar"}, index=pd.DatetimeIndex(dr1) + ) + result = merge_asof( + df1, + df2, + left_index=True, + right_index=True, + tolerance=Timedelta(seconds=0.5), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs", [{"on": "x"}, {"left_index": True, "right_index": True}] +) +@pytest.mark.parametrize( + "data", + [["2019-06-01 00:09:12", "2019-06-01 00:10:29"], [1.0, "2019-06-01 00:10:29"]], +) +def test_merge_asof_non_numerical_dtype(kwargs, data): + # GH#29130 + left = pd.DataFrame({"x": data}, index=data) + right = pd.DataFrame({"x": data}, index=data) + with pytest.raises( + MergeError, + match=r"Incompatible merge dtype, .*, both sides must have numeric dtype", + ): + merge_asof(left, right, **kwargs) + + +def test_merge_asof_non_numerical_dtype_object(): + # GH#29130 + left = pd.DataFrame({"a": ["12", "13", "15"], "left_val1": ["a", "b", "c"]}) + right = pd.DataFrame({"a": ["a", "b", "c"], "left_val": ["d", "e", "f"]}) + with pytest.raises( + MergeError, + match=r"Incompatible merge dtype, .*, both sides must have numeric dtype", + ): + merge_asof( + left, + right, + left_on="left_val1", + right_on="a", + left_by="a", + right_by="left_val", + ) + + +@pytest.mark.parametrize( + "kwargs", + [ + {"right_index": True, "left_index": True}, + {"left_on": "left_time", "right_index": True}, + {"left_index": True, "right_on": "right"}, + ], +) +def test_merge_asof_index_behavior(kwargs): + # GH 33463 + index = Index([1, 5, 10], name="test") + left = pd.DataFrame({"left": ["a", "b", "c"], "left_time": [1, 4, 10]}, index=index) + right = pd.DataFrame({"right": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7]) + result = merge_asof(left, right, **kwargs) + + expected = pd.DataFrame( + {"left": ["a", "b", "c"], "left_time": [1, 4, 10], "right": [1, 3, 7]}, + index=index, + ) + tm.assert_frame_equal(result, expected) + + +def test_merge_asof_numeri_column_in_index(): + # GH#34488 + left = pd.DataFrame({"b": [10, 11, 12]}, index=Index([1, 2, 3], name="a")) + right = pd.DataFrame({"c": [20, 21, 22]}, index=Index([0, 2, 3], name="a")) + + result = merge_asof(left, right, left_on="a", right_on="a") + expected = pd.DataFrame({"a": [1, 2, 3], "b": [10, 11, 12], "c": [20, 21, 22]}) + tm.assert_frame_equal(result, expected) + + +def test_merge_asof_numeri_column_in_multiindex(): + # GH#34488 + left = pd.DataFrame( + {"b": [10, 11, 12]}, + index=pd.MultiIndex.from_arrays([[1, 2, 3], ["a", "b", "c"]], names=["a", "z"]), + ) + right = pd.DataFrame( + {"c": [20, 21, 22]}, + index=pd.MultiIndex.from_arrays([[1, 2, 3], ["x", "y", "z"]], names=["a", "y"]), + ) + + result = merge_asof(left, right, left_on="a", right_on="a") + expected = pd.DataFrame({"a": [1, 2, 3], "b": [10, 11, 12], "c": [20, 21, 22]}) + tm.assert_frame_equal(result, expected) + + +def test_merge_asof_numeri_column_in_index_object_dtype(): + # GH#34488 + left = pd.DataFrame({"b": [10, 11, 12]}, index=Index(["1", "2", "3"], name="a")) + right = pd.DataFrame({"c": [20, 21, 22]}, index=Index(["m", "n", "o"], name="a")) + + with pytest.raises( + MergeError, + match=r"Incompatible merge dtype, .*, both sides must have numeric dtype", + ): + merge_asof(left, right, left_on="a", right_on="a") + + left = left.reset_index().set_index(["a", "b"]) + right = right.reset_index().set_index(["a", "c"]) + + with pytest.raises( + MergeError, + match=r"Incompatible merge dtype, .*, both sides must have numeric dtype", + ): + merge_asof(left, right, left_on="a", right_on="a") + + +def test_merge_asof_array_as_on(): + # GH#42844 + right = pd.DataFrame( + { + "a": [2, 6], + "ts": [pd.Timestamp("2021/01/01 00:37"), pd.Timestamp("2021/01/01 01:40")], + } + ) + ts_merge = pd.date_range( + start=pd.Timestamp("2021/01/01 00:00"), periods=3, freq="1h" + ) + left = pd.DataFrame({"b": [4, 8, 7]}) + result = merge_asof( + left, + right, + left_on=ts_merge, + right_on="ts", + allow_exact_matches=False, + direction="backward", + ) + expected = pd.DataFrame({"b": [4, 8, 7], "a": [np.nan, 2, 6], "ts": ts_merge}) + tm.assert_frame_equal(result, expected) + + result = merge_asof( + right, + left, + left_on="ts", + right_on=ts_merge, + allow_exact_matches=False, + direction="backward", + ) + expected = pd.DataFrame( + { + "a": [2, 6], + "ts": [pd.Timestamp("2021/01/01 00:37"), pd.Timestamp("2021/01/01 01:40")], + "b": [4, 8], + } + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge_cross.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge_cross.py new file mode 100644 index 0000000..7e14b51 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge_cross.py @@ -0,0 +1,98 @@ +import pytest + +from pandas import DataFrame +import pandas._testing as tm +from pandas.core.reshape.merge import ( + MergeError, + merge, +) + + +@pytest.mark.parametrize( + ("input_col", "output_cols"), [("b", ["a", "b"]), ("a", ["a_x", "a_y"])] +) +def test_merge_cross(input_col, output_cols): + # GH#5401 + left = DataFrame({"a": [1, 3]}) + right = DataFrame({input_col: [3, 4]}) + left_copy = left.copy() + right_copy = right.copy() + result = merge(left, right, how="cross") + expected = DataFrame({output_cols[0]: [1, 1, 3, 3], output_cols[1]: [3, 4, 3, 4]}) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(left, left_copy) + tm.assert_frame_equal(right, right_copy) + + +@pytest.mark.parametrize( + "kwargs", + [ + {"left_index": True}, + {"right_index": True}, + {"on": "a"}, + {"left_on": "a"}, + {"right_on": "b"}, + ], +) +def test_merge_cross_error_reporting(kwargs): + # GH#5401 + left = DataFrame({"a": [1, 3]}) + right = DataFrame({"b": [3, 4]}) + msg = ( + "Can not pass on, right_on, left_on or set right_index=True or " + "left_index=True" + ) + with pytest.raises(MergeError, match=msg): + merge(left, right, how="cross", **kwargs) + + +def test_merge_cross_mixed_dtypes(): + # GH#5401 + left = DataFrame(["a", "b", "c"], columns=["A"]) + right = DataFrame(range(2), columns=["B"]) + result = merge(left, right, how="cross") + expected = DataFrame({"A": ["a", "a", "b", "b", "c", "c"], "B": [0, 1, 0, 1, 0, 1]}) + tm.assert_frame_equal(result, expected) + + +def test_merge_cross_more_than_one_column(): + # GH#5401 + left = DataFrame({"A": list("ab"), "B": [2, 1]}) + right = DataFrame({"C": range(2), "D": range(4, 6)}) + result = merge(left, right, how="cross") + expected = DataFrame( + { + "A": ["a", "a", "b", "b"], + "B": [2, 2, 1, 1], + "C": [0, 1, 0, 1], + "D": [4, 5, 4, 5], + } + ) + tm.assert_frame_equal(result, expected) + + +def test_merge_cross_null_values(nulls_fixture): + # GH#5401 + left = DataFrame({"a": [1, nulls_fixture]}) + right = DataFrame({"b": ["a", "b"], "c": [1.0, 2.0]}) + result = merge(left, right, how="cross") + expected = DataFrame( + { + "a": [1, 1, nulls_fixture, nulls_fixture], + "b": ["a", "b", "a", "b"], + "c": [1.0, 2.0, 1.0, 2.0], + } + ) + tm.assert_frame_equal(result, expected) + + +def test_join_cross_error_reporting(): + # GH#5401 + left = DataFrame({"a": [1, 3]}) + right = DataFrame({"a": [3, 4]}) + msg = ( + "Can not pass on, right_on, left_on or set right_index=True or " + "left_index=True" + ) + with pytest.raises(MergeError, match=msg): + left.join(right, how="cross", on="a") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge_index_as_string.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge_index_as_string.py new file mode 100644 index 0000000..c3e0a92 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge_index_as_string.py @@ -0,0 +1,189 @@ +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +@pytest.fixture +def df1(): + return DataFrame( + { + "outer": [1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4], + "inner": [1, 2, 3, 1, 2, 3, 4, 1, 2, 1, 2], + "v1": np.linspace(0, 1, 11), + } + ) + + +@pytest.fixture +def df2(): + return DataFrame( + { + "outer": [1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3], + "inner": [1, 2, 2, 3, 3, 4, 2, 3, 1, 1, 2, 3], + "v2": np.linspace(10, 11, 12), + } + ) + + +@pytest.fixture(params=[[], ["outer"], ["outer", "inner"]]) +def left_df(request, df1): + """Construct left test DataFrame with specified levels + (any of 'outer', 'inner', and 'v1') + """ + levels = request.param + if levels: + df1 = df1.set_index(levels) + + return df1 + + +@pytest.fixture(params=[[], ["outer"], ["outer", "inner"]]) +def right_df(request, df2): + """Construct right test DataFrame with specified levels + (any of 'outer', 'inner', and 'v2') + """ + levels = request.param + + if levels: + df2 = df2.set_index(levels) + + return df2 + + +def compute_expected(df_left, df_right, on=None, left_on=None, right_on=None, how=None): + """ + Compute the expected merge result for the test case. + + This method computes the expected result of merging two DataFrames on + a combination of their columns and index levels. It does so by + explicitly dropping/resetting their named index levels, performing a + merge on their columns, and then finally restoring the appropriate + index in the result. + + Parameters + ---------- + df_left : DataFrame + The left DataFrame (may have zero or more named index levels) + df_right : DataFrame + The right DataFrame (may have zero or more named index levels) + on : list of str + The on parameter to the merge operation + left_on : list of str + The left_on parameter to the merge operation + right_on : list of str + The right_on parameter to the merge operation + how : str + The how parameter to the merge operation + + Returns + ------- + DataFrame + The expected merge result + """ + # Handle on param if specified + if on is not None: + left_on, right_on = on, on + + # Compute input named index levels + left_levels = [n for n in df_left.index.names if n is not None] + right_levels = [n for n in df_right.index.names if n is not None] + + # Compute output named index levels + output_levels = [i for i in left_on if i in right_levels and i in left_levels] + + # Drop index levels that aren't involved in the merge + drop_left = [n for n in left_levels if n not in left_on] + if drop_left: + df_left = df_left.reset_index(drop_left, drop=True) + + drop_right = [n for n in right_levels if n not in right_on] + if drop_right: + df_right = df_right.reset_index(drop_right, drop=True) + + # Convert remaining index levels to columns + reset_left = [n for n in left_levels if n in left_on] + if reset_left: + df_left = df_left.reset_index(level=reset_left) + + reset_right = [n for n in right_levels if n in right_on] + if reset_right: + df_right = df_right.reset_index(level=reset_right) + + # Perform merge + expected = df_left.merge(df_right, left_on=left_on, right_on=right_on, how=how) + + # Restore index levels + if output_levels: + expected = expected.set_index(output_levels) + + return expected + + +@pytest.mark.parametrize( + "on,how", + [ + (["outer"], "inner"), + (["inner"], "left"), + (["outer", "inner"], "right"), + (["inner", "outer"], "outer"), + ], +) +def test_merge_indexes_and_columns_on(left_df, right_df, on, how): + + # Construct expected result + expected = compute_expected(left_df, right_df, on=on, how=how) + + # Perform merge + result = left_df.merge(right_df, on=on, how=how) + tm.assert_frame_equal(result, expected, check_like=True) + + +@pytest.mark.parametrize( + "left_on,right_on,how", + [ + (["outer"], ["outer"], "inner"), + (["inner"], ["inner"], "right"), + (["outer", "inner"], ["outer", "inner"], "left"), + (["inner", "outer"], ["inner", "outer"], "outer"), + ], +) +def test_merge_indexes_and_columns_lefton_righton( + left_df, right_df, left_on, right_on, how +): + + # Construct expected result + expected = compute_expected( + left_df, right_df, left_on=left_on, right_on=right_on, how=how + ) + + # Perform merge + result = left_df.merge(right_df, left_on=left_on, right_on=right_on, how=how) + tm.assert_frame_equal(result, expected, check_like=True) + + +@pytest.mark.parametrize("left_index", ["inner", ["inner", "outer"]]) +def test_join_indexes_and_columns_on(df1, df2, left_index, join_type): + + # Construct left_df + left_df = df1.set_index(left_index) + + # Construct right_df + right_df = df2.set_index(["outer", "inner"]) + + # Result + expected = ( + left_df.reset_index() + .join( + right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y" + ) + .set_index(left_index) + ) + + # Perform join + result = left_df.join( + right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y" + ) + + tm.assert_frame_equal(result, expected, check_like=True) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge_ordered.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge_ordered.py new file mode 100644 index 0000000..0268801 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_merge_ordered.py @@ -0,0 +1,201 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + merge_ordered, +) +import pandas._testing as tm + + +class TestMergeOrdered: + def setup_method(self, method): + self.left = DataFrame({"key": ["a", "c", "e"], "lvalue": [1, 2.0, 3]}) + + self.right = DataFrame({"key": ["b", "c", "d", "f"], "rvalue": [1, 2, 3.0, 4]}) + + def test_basic(self): + result = merge_ordered(self.left, self.right, on="key") + expected = DataFrame( + { + "key": ["a", "b", "c", "d", "e", "f"], + "lvalue": [1, np.nan, 2, np.nan, 3, np.nan], + "rvalue": [np.nan, 1, 2, 3, np.nan, 4], + } + ) + + tm.assert_frame_equal(result, expected) + + def test_ffill(self): + result = merge_ordered(self.left, self.right, on="key", fill_method="ffill") + expected = DataFrame( + { + "key": ["a", "b", "c", "d", "e", "f"], + "lvalue": [1.0, 1, 2, 2, 3, 3.0], + "rvalue": [np.nan, 1, 2, 3, 3, 4], + } + ) + tm.assert_frame_equal(result, expected) + + def test_multigroup(self): + left = pd.concat([self.left, self.left], ignore_index=True) + + left["group"] = ["a"] * 3 + ["b"] * 3 + + result = merge_ordered( + left, self.right, on="key", left_by="group", fill_method="ffill" + ) + expected = DataFrame( + { + "key": ["a", "b", "c", "d", "e", "f"] * 2, + "lvalue": [1.0, 1, 2, 2, 3, 3.0] * 2, + "rvalue": [np.nan, 1, 2, 3, 3, 4] * 2, + } + ) + expected["group"] = ["a"] * 6 + ["b"] * 6 + + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + result2 = merge_ordered( + self.right, left, on="key", right_by="group", fill_method="ffill" + ) + tm.assert_frame_equal(result, result2.loc[:, result.columns]) + + result = merge_ordered(left, self.right, on="key", left_by="group") + assert result["group"].notna().all() + + def test_merge_type(self): + class NotADataFrame(DataFrame): + @property + def _constructor(self): + return NotADataFrame + + nad = NotADataFrame(self.left) + result = nad.merge(self.right, on="key") + + assert isinstance(result, NotADataFrame) + + def test_empty_sequence_concat(self): + # GH 9157 + empty_pat = "[Nn]o objects" + none_pat = "objects.*None" + test_cases = [ + ((), empty_pat), + ([], empty_pat), + ({}, empty_pat), + ([None], none_pat), + ([None, None], none_pat), + ] + for df_seq, pattern in test_cases: + with pytest.raises(ValueError, match=pattern): + pd.concat(df_seq) + + pd.concat([DataFrame()]) + pd.concat([None, DataFrame()]) + pd.concat([DataFrame(), None]) + + def test_doc_example(self): + left = DataFrame( + { + "group": list("aaabbb"), + "key": ["a", "c", "e", "a", "c", "e"], + "lvalue": [1, 2, 3] * 2, + } + ) + + right = DataFrame({"key": ["b", "c", "d"], "rvalue": [1, 2, 3]}) + + result = merge_ordered(left, right, fill_method="ffill", left_by="group") + + expected = DataFrame( + { + "group": list("aaaaabbbbb"), + "key": ["a", "b", "c", "d", "e"] * 2, + "lvalue": [1, 1, 2, 2, 3] * 2, + "rvalue": [np.nan, 1, 2, 3, 3] * 2, + } + ) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "left, right, on, left_by, right_by, expected", + [ + ( + DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}), + DataFrame({"T": [2], "E": [1]}), + ["T"], + ["G", "H"], + None, + DataFrame( + { + "G": ["g"] * 3, + "H": ["h"] * 3, + "T": [1, 2, 3], + "E": [np.nan, 1.0, np.nan], + } + ), + ), + ( + DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}), + DataFrame({"T": [2], "E": [1]}), + "T", + ["G", "H"], + None, + DataFrame( + { + "G": ["g"] * 3, + "H": ["h"] * 3, + "T": [1, 2, 3], + "E": [np.nan, 1.0, np.nan], + } + ), + ), + ( + DataFrame({"T": [2], "E": [1]}), + DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}), + ["T"], + None, + ["G", "H"], + DataFrame( + { + "T": [1, 2, 3], + "E": [np.nan, 1.0, np.nan], + "G": ["g"] * 3, + "H": ["h"] * 3, + } + ), + ), + ], + ) + def test_list_type_by(self, left, right, on, left_by, right_by, expected): + # GH 35269 + result = merge_ordered( + left=left, + right=right, + on=on, + left_by=left_by, + right_by=right_by, + ) + + tm.assert_frame_equal(result, expected) + + def test_left_by_length_equals_to_right_shape0(self): + # GH 38166 + left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHE")) + right = DataFrame([[2, 1]], columns=list("ET")) + result = merge_ordered(left, right, on="E", left_by=["G", "H"]) + expected = DataFrame( + {"G": ["g"] * 3, "H": ["h"] * 3, "E": [1, 2, 3], "T": [np.nan, 1.0, np.nan]} + ) + + tm.assert_frame_equal(result, expected) + + def test_elements_not_in_by_but_in_df(self): + # GH 38167 + left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHE")) + right = DataFrame([[2, 1]], columns=list("ET")) + msg = r"\{'h'\} not found in left columns" + with pytest.raises(KeyError, match=msg): + merge_ordered(left, right, on="E", left_by=["G", "h"]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_multi.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_multi.py new file mode 100644 index 0000000..b5945f7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/merge/test_multi.py @@ -0,0 +1,909 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, +) +import pandas._testing as tm +from pandas.core.reshape.concat import concat +from pandas.core.reshape.merge import merge + + +@pytest.fixture +def left(): + """left dataframe (not multi-indexed) for multi-index join tests""" + # a little relevant example with NAs + key1 = ["bar", "bar", "bar", "foo", "foo", "baz", "baz", "qux", "qux", "snap"] + key2 = ["two", "one", "three", "one", "two", "one", "two", "two", "three", "one"] + + data = np.random.randn(len(key1)) + return DataFrame({"key1": key1, "key2": key2, "data": data}) + + +@pytest.fixture +def right(multiindex_dataframe_random_data): + """right dataframe (multi-indexed) for multi-index join tests""" + df = multiindex_dataframe_random_data + df.index.names = ["key1", "key2"] + + df.columns = ["j_one", "j_two", "j_three"] + return df + + +@pytest.fixture +def left_multi(): + return DataFrame( + { + "Origin": ["A", "A", "B", "B", "C"], + "Destination": ["A", "B", "A", "C", "A"], + "Period": ["AM", "AM", "IP", "AM", "OP"], + "TripPurp": ["hbw", "nhb", "hbo", "nhb", "hbw"], + "Trips": [1987, 3647, 2470, 4296, 4444], + }, + columns=["Origin", "Destination", "Period", "TripPurp", "Trips"], + ).set_index(["Origin", "Destination", "Period", "TripPurp"]) + + +@pytest.fixture +def right_multi(): + return DataFrame( + { + "Origin": ["A", "A", "B", "B", "C", "C", "E"], + "Destination": ["A", "B", "A", "B", "A", "B", "F"], + "Period": ["AM", "AM", "IP", "AM", "OP", "IP", "AM"], + "LinkType": ["a", "b", "c", "b", "a", "b", "a"], + "Distance": [100, 80, 90, 80, 75, 35, 55], + }, + columns=["Origin", "Destination", "Period", "LinkType", "Distance"], + ).set_index(["Origin", "Destination", "Period", "LinkType"]) + + +@pytest.fixture +def on_cols_multi(): + return ["Origin", "Destination", "Period"] + + +@pytest.fixture +def idx_cols_multi(): + return ["Origin", "Destination", "Period", "TripPurp", "LinkType"] + + +class TestMergeMulti: + def test_merge_on_multikey(self, left, right, join_type): + on_cols = ["key1", "key2"] + result = left.join(right, on=on_cols, how=join_type).reset_index(drop=True) + + expected = merge(left, right.reset_index(), on=on_cols, how=join_type) + + tm.assert_frame_equal(result, expected) + + result = left.join(right, on=on_cols, how=join_type, sort=True).reset_index( + drop=True + ) + + expected = merge( + left, right.reset_index(), on=on_cols, how=join_type, sort=True + ) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("sort", [False, True]) + def test_left_join_multi_index(self, left, right, sort): + icols = ["1st", "2nd", "3rd"] + + def bind_cols(df): + iord = lambda a: 0 if a != a else ord(a) + f = lambda ts: ts.map(iord) - ord("a") + return f(df["1st"]) + f(df["3rd"]) * 1e2 + df["2nd"].fillna(0) * 1e4 + + def run_asserts(left, right, sort): + res = left.join(right, on=icols, how="left", sort=sort) + + assert len(left) < len(res) + 1 + assert not res["4th"].isna().any() + assert not res["5th"].isna().any() + + tm.assert_series_equal(res["4th"], -res["5th"], check_names=False) + result = bind_cols(res.iloc[:, :-2]) + tm.assert_series_equal(res["4th"], result, check_names=False) + assert result.name is None + + if sort: + tm.assert_frame_equal(res, res.sort_values(icols, kind="mergesort")) + + out = merge(left, right.reset_index(), on=icols, sort=sort, how="left") + + res.index = np.arange(len(res)) + tm.assert_frame_equal(out, res) + + lc = list(map(chr, np.arange(ord("a"), ord("z") + 1))) + left = DataFrame(np.random.choice(lc, (5000, 2)), columns=["1st", "3rd"]) + left.insert(1, "2nd", np.random.randint(0, 1000, len(left))) + + i = np.random.permutation(len(left)) + right = left.iloc[i].copy() + + left["4th"] = bind_cols(left) + right["5th"] = -bind_cols(right) + right.set_index(icols, inplace=True) + + run_asserts(left, right, sort) + + # inject some nulls + left.loc[1::23, "1st"] = np.nan + left.loc[2::37, "2nd"] = np.nan + left.loc[3::43, "3rd"] = np.nan + left["4th"] = bind_cols(left) + + i = np.random.permutation(len(left)) + right = left.iloc[i, :-1] + right["5th"] = -bind_cols(right) + right.set_index(icols, inplace=True) + + run_asserts(left, right, sort) + + @pytest.mark.parametrize("sort", [False, True]) + def test_merge_right_vs_left(self, left, right, sort): + # compare left vs right merge with multikey + on_cols = ["key1", "key2"] + merged_left_right = left.merge( + right, left_on=on_cols, right_index=True, how="left", sort=sort + ) + + merge_right_left = right.merge( + left, right_on=on_cols, left_index=True, how="right", sort=sort + ) + + # Reorder columns + merge_right_left = merge_right_left[merged_left_right.columns] + + tm.assert_frame_equal(merged_left_right, merge_right_left) + + def test_merge_multiple_cols_with_mixed_cols_index(self): + # GH29522 + s = Series( + range(6), + MultiIndex.from_product([["A", "B"], [1, 2, 3]], names=["lev1", "lev2"]), + name="Amount", + ) + df = DataFrame({"lev1": list("AAABBB"), "lev2": [1, 2, 3, 1, 2, 3], "col": 0}) + result = merge(df, s.reset_index(), on=["lev1", "lev2"]) + expected = DataFrame( + { + "lev1": list("AAABBB"), + "lev2": [1, 2, 3, 1, 2, 3], + "col": [0] * 6, + "Amount": range(6), + } + ) + tm.assert_frame_equal(result, expected) + + def test_compress_group_combinations(self): + + # ~ 40000000 possible unique groups + key1 = tm.rands_array(10, 10000) + key1 = np.tile(key1, 2) + key2 = key1[::-1] + + df = DataFrame({"key1": key1, "key2": key2, "value1": np.random.randn(20000)}) + + df2 = DataFrame( + {"key1": key1[::2], "key2": key2[::2], "value2": np.random.randn(10000)} + ) + + # just to hit the label compression code path + merge(df, df2, how="outer") + + def test_left_join_index_preserve_order(self): + + on_cols = ["k1", "k2"] + left = DataFrame( + { + "k1": [0, 1, 2] * 8, + "k2": ["foo", "bar"] * 12, + "v": np.array(np.arange(24), dtype=np.int64), + } + ) + + index = MultiIndex.from_tuples([(2, "bar"), (1, "foo")]) + right = DataFrame({"v2": [5, 7]}, index=index) + + result = left.join(right, on=on_cols) + + expected = left.copy() + expected["v2"] = np.nan + expected.loc[(expected.k1 == 2) & (expected.k2 == "bar"), "v2"] = 5 + expected.loc[(expected.k1 == 1) & (expected.k2 == "foo"), "v2"] = 7 + + tm.assert_frame_equal(result, expected) + + result.sort_values(on_cols, kind="mergesort", inplace=True) + expected = left.join(right, on=on_cols, sort=True) + + tm.assert_frame_equal(result, expected) + + # test join with multi dtypes blocks + left = DataFrame( + { + "k1": [0, 1, 2] * 8, + "k2": ["foo", "bar"] * 12, + "k3": np.array([0, 1, 2] * 8, dtype=np.float32), + "v": np.array(np.arange(24), dtype=np.int32), + } + ) + + index = MultiIndex.from_tuples([(2, "bar"), (1, "foo")]) + right = DataFrame({"v2": [5, 7]}, index=index) + + result = left.join(right, on=on_cols) + + expected = left.copy() + expected["v2"] = np.nan + expected.loc[(expected.k1 == 2) & (expected.k2 == "bar"), "v2"] = 5 + expected.loc[(expected.k1 == 1) & (expected.k2 == "foo"), "v2"] = 7 + + tm.assert_frame_equal(result, expected) + + result = result.sort_values(on_cols, kind="mergesort") + expected = left.join(right, on=on_cols, sort=True) + + tm.assert_frame_equal(result, expected) + + def test_left_join_index_multi_match_multiindex(self): + left = DataFrame( + [ + ["X", "Y", "C", "a"], + ["W", "Y", "C", "e"], + ["V", "Q", "A", "h"], + ["V", "R", "D", "i"], + ["X", "Y", "D", "b"], + ["X", "Y", "A", "c"], + ["W", "Q", "B", "f"], + ["W", "R", "C", "g"], + ["V", "Y", "C", "j"], + ["X", "Y", "B", "d"], + ], + columns=["cola", "colb", "colc", "tag"], + index=[3, 2, 0, 1, 7, 6, 4, 5, 9, 8], + ) + + right = DataFrame( + [ + ["W", "R", "C", 0], + ["W", "Q", "B", 3], + ["W", "Q", "B", 8], + ["X", "Y", "A", 1], + ["X", "Y", "A", 4], + ["X", "Y", "B", 5], + ["X", "Y", "C", 6], + ["X", "Y", "C", 9], + ["X", "Q", "C", -6], + ["X", "R", "C", -9], + ["V", "Y", "C", 7], + ["V", "R", "D", 2], + ["V", "R", "D", -1], + ["V", "Q", "A", -3], + ], + columns=["col1", "col2", "col3", "val"], + ).set_index(["col1", "col2", "col3"]) + + result = left.join(right, on=["cola", "colb", "colc"], how="left") + + expected = DataFrame( + [ + ["X", "Y", "C", "a", 6], + ["X", "Y", "C", "a", 9], + ["W", "Y", "C", "e", np.nan], + ["V", "Q", "A", "h", -3], + ["V", "R", "D", "i", 2], + ["V", "R", "D", "i", -1], + ["X", "Y", "D", "b", np.nan], + ["X", "Y", "A", "c", 1], + ["X", "Y", "A", "c", 4], + ["W", "Q", "B", "f", 3], + ["W", "Q", "B", "f", 8], + ["W", "R", "C", "g", 0], + ["V", "Y", "C", "j", 7], + ["X", "Y", "B", "d", 5], + ], + columns=["cola", "colb", "colc", "tag", "val"], + index=[3, 3, 2, 0, 1, 1, 7, 6, 6, 4, 4, 5, 9, 8], + ) + + tm.assert_frame_equal(result, expected) + + result = left.join(right, on=["cola", "colb", "colc"], how="left", sort=True) + + expected = expected.sort_values(["cola", "colb", "colc"], kind="mergesort") + + tm.assert_frame_equal(result, expected) + + def test_left_join_index_multi_match(self): + left = DataFrame( + [["c", 0], ["b", 1], ["a", 2], ["b", 3]], + columns=["tag", "val"], + index=[2, 0, 1, 3], + ) + + right = DataFrame( + [ + ["a", "v"], + ["c", "w"], + ["c", "x"], + ["d", "y"], + ["a", "z"], + ["c", "r"], + ["e", "q"], + ["c", "s"], + ], + columns=["tag", "char"], + ).set_index("tag") + + result = left.join(right, on="tag", how="left") + + expected = DataFrame( + [ + ["c", 0, "w"], + ["c", 0, "x"], + ["c", 0, "r"], + ["c", 0, "s"], + ["b", 1, np.nan], + ["a", 2, "v"], + ["a", 2, "z"], + ["b", 3, np.nan], + ], + columns=["tag", "val", "char"], + index=[2, 2, 2, 2, 0, 1, 1, 3], + ) + + tm.assert_frame_equal(result, expected) + + result = left.join(right, on="tag", how="left", sort=True) + expected2 = expected.sort_values("tag", kind="mergesort") + + tm.assert_frame_equal(result, expected2) + + # GH7331 - maintain left frame order in left merge + result = merge(left, right.reset_index(), how="left", on="tag") + expected.index = np.arange(len(expected)) + tm.assert_frame_equal(result, expected) + + def test_left_merge_na_buglet(self): + left = DataFrame( + { + "id": list("abcde"), + "v1": np.random.randn(5), + "v2": np.random.randn(5), + "dummy": list("abcde"), + "v3": np.random.randn(5), + }, + columns=["id", "v1", "v2", "dummy", "v3"], + ) + right = DataFrame( + { + "id": ["a", "b", np.nan, np.nan, np.nan], + "sv3": [1.234, 5.678, np.nan, np.nan, np.nan], + } + ) + + result = merge(left, right, on="id", how="left") + + rdf = right.drop(["id"], axis=1) + expected = left.join(rdf) + tm.assert_frame_equal(result, expected) + + def test_merge_na_keys(self): + data = [ + [1950, "A", 1.5], + [1950, "B", 1.5], + [1955, "B", 1.5], + [1960, "B", np.nan], + [1970, "B", 4.0], + [1950, "C", 4.0], + [1960, "C", np.nan], + [1965, "C", 3.0], + [1970, "C", 4.0], + ] + + frame = DataFrame(data, columns=["year", "panel", "data"]) + + other_data = [ + [1960, "A", np.nan], + [1970, "A", np.nan], + [1955, "A", np.nan], + [1965, "A", np.nan], + [1965, "B", np.nan], + [1955, "C", np.nan], + ] + other = DataFrame(other_data, columns=["year", "panel", "data"]) + + result = frame.merge(other, how="outer") + + expected = frame.fillna(-999).merge(other.fillna(-999), how="outer") + expected = expected.replace(-999, np.nan) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("klass", [None, np.asarray, Series, Index]) + def test_merge_datetime_index(self, klass): + # see gh-19038 + df = DataFrame( + [1, 2, 3], ["2016-01-01", "2017-01-01", "2018-01-01"], columns=["a"] + ) + df.index = pd.to_datetime(df.index) + on_vector = df.index.year + + if klass is not None: + on_vector = klass(on_vector) + + expected = DataFrame({"a": [1, 2, 3], "key_1": [2016, 2017, 2018]}) + + result = df.merge(df, on=["a", on_vector], how="inner") + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + {"key_0": [2016, 2017, 2018], "a_x": [1, 2, 3], "a_y": [1, 2, 3]} + ) + + result = df.merge(df, on=[df.index.year], how="inner") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("merge_type", ["left", "right"]) + def test_merge_datetime_multi_index_empty_df(self, merge_type): + # see gh-36895 + + left = DataFrame( + data={ + "data": [1.5, 1.5], + }, + index=MultiIndex.from_tuples( + [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]], + names=["date", "panel"], + ), + ) + + right = DataFrame( + index=MultiIndex.from_tuples([], names=["date", "panel"]), columns=["state"] + ) + + expected_index = MultiIndex.from_tuples( + [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]], + names=["date", "panel"], + ) + + if merge_type == "left": + expected = DataFrame( + data={ + "data": [1.5, 1.5], + "state": [None, None], + }, + index=expected_index, + ) + results_merge = left.merge(right, how="left", on=["date", "panel"]) + results_join = left.join(right, how="left") + else: + expected = DataFrame( + data={ + "state": [None, None], + "data": [1.5, 1.5], + }, + index=expected_index, + ) + results_merge = right.merge(left, how="right", on=["date", "panel"]) + results_join = right.join(left, how="right") + + tm.assert_frame_equal(results_merge, expected) + tm.assert_frame_equal(results_join, expected) + + @pytest.fixture + def household(self): + household = DataFrame( + { + "household_id": [1, 2, 3], + "male": [0, 1, 0], + "wealth": [196087.3, 316478.7, 294750], + }, + columns=["household_id", "male", "wealth"], + ).set_index("household_id") + return household + + @pytest.fixture + def portfolio(self): + portfolio = DataFrame( + { + "household_id": [1, 2, 2, 3, 3, 3, 4], + "asset_id": [ + "nl0000301109", + "nl0000289783", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "nl0000289965", + np.nan, + ], + "name": [ + "ABN Amro", + "Robeco", + "Royal Dutch Shell", + "Royal Dutch Shell", + "AAB Eastern Europe Equity Fund", + "Postbank BioTech Fonds", + np.nan, + ], + "share": [1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0], + }, + columns=["household_id", "asset_id", "name", "share"], + ).set_index(["household_id", "asset_id"]) + return portfolio + + @pytest.fixture + def expected(self): + expected = ( + DataFrame( + { + "male": [0, 1, 1, 0, 0, 0], + "wealth": [ + 196087.3, + 316478.7, + 316478.7, + 294750.0, + 294750.0, + 294750.0, + ], + "name": [ + "ABN Amro", + "Robeco", + "Royal Dutch Shell", + "Royal Dutch Shell", + "AAB Eastern Europe Equity Fund", + "Postbank BioTech Fonds", + ], + "share": [1.00, 0.40, 0.60, 0.15, 0.60, 0.25], + "household_id": [1, 2, 2, 3, 3, 3], + "asset_id": [ + "nl0000301109", + "nl0000289783", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "nl0000289965", + ], + } + ) + .set_index(["household_id", "asset_id"]) + .reindex(columns=["male", "wealth", "name", "share"]) + ) + return expected + + def test_join_multi_levels(self, portfolio, household, expected): + portfolio = portfolio.copy() + household = household.copy() + + # GH 3662 + # merge multi-levels + result = household.join(portfolio, how="inner") + tm.assert_frame_equal(result, expected) + + def test_join_multi_levels_merge_equivalence(self, portfolio, household, expected): + portfolio = portfolio.copy() + household = household.copy() + + # equivalency + result = merge( + household.reset_index(), + portfolio.reset_index(), + on=["household_id"], + how="inner", + ).set_index(["household_id", "asset_id"]) + tm.assert_frame_equal(result, expected) + + def test_join_multi_levels_outer(self, portfolio, household, expected): + portfolio = portfolio.copy() + household = household.copy() + + result = household.join(portfolio, how="outer") + expected = concat( + [ + expected, + ( + DataFrame( + {"share": [1.00]}, + index=MultiIndex.from_tuples( + [(4, np.nan)], names=["household_id", "asset_id"] + ), + ) + ), + ], + axis=0, + sort=True, + ).reindex(columns=expected.columns) + tm.assert_frame_equal(result, expected) + + def test_join_multi_levels_invalid(self, portfolio, household): + portfolio = portfolio.copy() + household = household.copy() + + # invalid cases + household.index.name = "foo" + + with pytest.raises( + ValueError, match="cannot join with no overlapping index names" + ): + household.join(portfolio, how="inner") + + portfolio2 = portfolio.copy() + portfolio2.index.set_names(["household_id", "foo"]) + + with pytest.raises(ValueError, match="columns overlap but no suffix specified"): + portfolio2.join(portfolio, how="inner") + + def test_join_multi_levels2(self): + + # some more advanced merges + # GH6360 + household = DataFrame( + { + "household_id": [1, 2, 2, 3, 3, 3, 4], + "asset_id": [ + "nl0000301109", + "nl0000301109", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "nl0000289965", + np.nan, + ], + "share": [1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0], + }, + columns=["household_id", "asset_id", "share"], + ).set_index(["household_id", "asset_id"]) + + log_return = DataFrame( + { + "asset_id": [ + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "lu0197800237", + ], + "t": [233, 234, 235, 180, 181], + "log_return": [ + 0.09604978, + -0.06524096, + 0.03532373, + 0.03025441, + 0.036997, + ], + } + ).set_index(["asset_id", "t"]) + + expected = ( + DataFrame( + { + "household_id": [2, 2, 2, 3, 3, 3, 3, 3], + "asset_id": [ + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "lu0197800237", + ], + "t": [233, 234, 235, 233, 234, 235, 180, 181], + "share": [0.6, 0.6, 0.6, 0.15, 0.15, 0.15, 0.6, 0.6], + "log_return": [ + 0.09604978, + -0.06524096, + 0.03532373, + 0.09604978, + -0.06524096, + 0.03532373, + 0.03025441, + 0.036997, + ], + } + ) + .set_index(["household_id", "asset_id", "t"]) + .reindex(columns=["share", "log_return"]) + ) + + # this is the equivalency + result = merge( + household.reset_index(), + log_return.reset_index(), + on=["asset_id"], + how="inner", + ).set_index(["household_id", "asset_id", "t"]) + tm.assert_frame_equal(result, expected) + + expected = ( + DataFrame( + { + "household_id": [1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4], + "asset_id": [ + "nl0000301109", + "nl0000301109", + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "lu0197800237", + "nl0000289965", + None, + ], + "t": [ + None, + None, + 233, + 234, + 235, + 233, + 234, + 235, + 180, + 181, + None, + None, + ], + "share": [ + 1.0, + 0.4, + 0.6, + 0.6, + 0.6, + 0.15, + 0.15, + 0.15, + 0.6, + 0.6, + 0.25, + 1.0, + ], + "log_return": [ + None, + None, + 0.09604978, + -0.06524096, + 0.03532373, + 0.09604978, + -0.06524096, + 0.03532373, + 0.03025441, + 0.036997, + None, + None, + ], + } + ) + .set_index(["household_id", "asset_id", "t"]) + .reindex(columns=["share", "log_return"]) + ) + + result = merge( + household.reset_index(), + log_return.reset_index(), + on=["asset_id"], + how="outer", + ).set_index(["household_id", "asset_id", "t"]) + + tm.assert_frame_equal(result, expected) + + +class TestJoinMultiMulti: + def test_join_multi_multi( + self, left_multi, right_multi, join_type, on_cols_multi, idx_cols_multi + ): + # Multi-index join tests + expected = ( + merge( + left_multi.reset_index(), + right_multi.reset_index(), + how=join_type, + on=on_cols_multi, + ) + .set_index(idx_cols_multi) + .sort_index() + ) + + result = left_multi.join(right_multi, how=join_type).sort_index() + tm.assert_frame_equal(result, expected) + + def test_join_multi_empty_frames( + self, left_multi, right_multi, join_type, on_cols_multi, idx_cols_multi + ): + + left_multi = left_multi.drop(columns=left_multi.columns) + right_multi = right_multi.drop(columns=right_multi.columns) + + expected = ( + merge( + left_multi.reset_index(), + right_multi.reset_index(), + how=join_type, + on=on_cols_multi, + ) + .set_index(idx_cols_multi) + .sort_index() + ) + + result = left_multi.join(right_multi, how=join_type).sort_index() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("box", [None, np.asarray, Series, Index]) + def test_merge_datetime_index(self, box): + # see gh-19038 + df = DataFrame( + [1, 2, 3], ["2016-01-01", "2017-01-01", "2018-01-01"], columns=["a"] + ) + df.index = pd.to_datetime(df.index) + on_vector = df.index.year + + if box is not None: + on_vector = box(on_vector) + + expected = DataFrame({"a": [1, 2, 3], "key_1": [2016, 2017, 2018]}) + + result = df.merge(df, on=["a", on_vector], how="inner") + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + {"key_0": [2016, 2017, 2018], "a_x": [1, 2, 3], "a_y": [1, 2, 3]} + ) + + result = df.merge(df, on=[df.index.year], how="inner") + tm.assert_frame_equal(result, expected) + + def test_single_common_level(self): + index_left = MultiIndex.from_tuples( + [("K0", "X0"), ("K0", "X1"), ("K1", "X2")], names=["key", "X"] + ) + + left = DataFrame( + {"A": ["A0", "A1", "A2"], "B": ["B0", "B1", "B2"]}, index=index_left + ) + + index_right = MultiIndex.from_tuples( + [("K0", "Y0"), ("K1", "Y1"), ("K2", "Y2"), ("K2", "Y3")], names=["key", "Y"] + ) + + right = DataFrame( + {"C": ["C0", "C1", "C2", "C3"], "D": ["D0", "D1", "D2", "D3"]}, + index=index_right, + ) + + result = left.join(right) + expected = merge( + left.reset_index(), right.reset_index(), on=["key"], how="inner" + ).set_index(["key", "X", "Y"]) + + tm.assert_frame_equal(result, expected) + + def test_join_multi_wrong_order(self): + # GH 25760 + # GH 28956 + + midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) + midx3 = MultiIndex.from_tuples([(4, 1), (3, 2), (3, 1)], names=["b", "a"]) + + left = DataFrame(index=midx1, data={"x": [10, 20, 30, 40]}) + right = DataFrame(index=midx3, data={"y": ["foo", "bar", "fing"]}) + + result = left.join(right) + + expected = DataFrame( + index=midx1, + data={"x": [10, 20, 30, 40], "y": ["fing", "foo", "bar", np.nan]}, + ) + + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_crosstab.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_crosstab.py new file mode 100644 index 0000000..cc6eec6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_crosstab.py @@ -0,0 +1,827 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_categorical_dtype + +import pandas as pd +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + Series, + crosstab, +) +import pandas._testing as tm + + +class TestCrosstab: + def setup_method(self, method): + df = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + self.df = pd.concat([df, df], ignore_index=True) + + def test_crosstab_single(self): + df = self.df + result = crosstab(df["A"], df["C"]) + expected = df.groupby(["A", "C"]).size().unstack() + tm.assert_frame_equal(result, expected.fillna(0).astype(np.int64)) + + def test_crosstab_multiple(self): + df = self.df + + result = crosstab(df["A"], [df["B"], df["C"]]) + expected = df.groupby(["A", "B", "C"]).size() + expected = expected.unstack("B").unstack("C").fillna(0).astype(np.int64) + tm.assert_frame_equal(result, expected) + + result = crosstab([df["B"], df["C"]], df["A"]) + expected = df.groupby(["B", "C", "A"]).size() + expected = expected.unstack("A").fillna(0).astype(np.int64) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("box", [np.array, list, tuple]) + def test_crosstab_ndarray(self, box): + # GH 44076 + a = box(np.random.randint(0, 5, size=100)) + b = box(np.random.randint(0, 3, size=100)) + c = box(np.random.randint(0, 10, size=100)) + + df = DataFrame({"a": a, "b": b, "c": c}) + + result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c")) + expected = crosstab(df["a"], [df["b"], df["c"]]) + tm.assert_frame_equal(result, expected) + + result = crosstab([b, c], a, colnames=["a"], rownames=("b", "c")) + expected = crosstab([df["b"], df["c"]], df["a"]) + tm.assert_frame_equal(result, expected) + + # assign arbitrary names + result = crosstab(a, c) + expected = crosstab(df["a"], df["c"]) + expected.index.names = ["row_0"] + expected.columns.names = ["col_0"] + tm.assert_frame_equal(result, expected) + + def test_crosstab_non_aligned(self): + # GH 17005 + a = Series([0, 1, 1], index=["a", "b", "c"]) + b = Series([3, 4, 3, 4, 3], index=["a", "b", "c", "d", "f"]) + c = np.array([3, 4, 3]) + + expected = DataFrame( + [[1, 0], [1, 1]], + index=Index([0, 1], name="row_0"), + columns=Index([3, 4], name="col_0"), + ) + + result = crosstab(a, b) + tm.assert_frame_equal(result, expected) + + result = crosstab(a, c) + tm.assert_frame_equal(result, expected) + + def test_crosstab_margins(self): + a = np.random.randint(0, 7, size=100) + b = np.random.randint(0, 3, size=100) + c = np.random.randint(0, 5, size=100) + + df = DataFrame({"a": a, "b": b, "c": c}) + + result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"), margins=True) + + assert result.index.names == ("a",) + assert result.columns.names == ["b", "c"] + + all_cols = result["All", ""] + exp_cols = df.groupby(["a"]).size().astype("i8") + # to keep index.name + exp_margin = Series([len(df)], index=Index(["All"], name="a")) + exp_cols = pd.concat([exp_cols, exp_margin]) + exp_cols.name = ("All", "") + + tm.assert_series_equal(all_cols, exp_cols) + + all_rows = result.loc["All"] + exp_rows = df.groupby(["b", "c"]).size().astype("i8") + exp_rows = pd.concat([exp_rows, Series([len(df)], index=[("All", "")])]) + exp_rows.name = "All" + + exp_rows = exp_rows.reindex(all_rows.index) + exp_rows = exp_rows.fillna(0).astype(np.int64) + tm.assert_series_equal(all_rows, exp_rows) + + def test_crosstab_margins_set_margin_name(self): + # GH 15972 + a = np.random.randint(0, 7, size=100) + b = np.random.randint(0, 3, size=100) + c = np.random.randint(0, 5, size=100) + + df = DataFrame({"a": a, "b": b, "c": c}) + + result = crosstab( + a, + [b, c], + rownames=["a"], + colnames=("b", "c"), + margins=True, + margins_name="TOTAL", + ) + + assert result.index.names == ("a",) + assert result.columns.names == ["b", "c"] + + all_cols = result["TOTAL", ""] + exp_cols = df.groupby(["a"]).size().astype("i8") + # to keep index.name + exp_margin = Series([len(df)], index=Index(["TOTAL"], name="a")) + exp_cols = pd.concat([exp_cols, exp_margin]) + exp_cols.name = ("TOTAL", "") + + tm.assert_series_equal(all_cols, exp_cols) + + all_rows = result.loc["TOTAL"] + exp_rows = df.groupby(["b", "c"]).size().astype("i8") + exp_rows = pd.concat([exp_rows, Series([len(df)], index=[("TOTAL", "")])]) + exp_rows.name = "TOTAL" + + exp_rows = exp_rows.reindex(all_rows.index) + exp_rows = exp_rows.fillna(0).astype(np.int64) + tm.assert_series_equal(all_rows, exp_rows) + + msg = "margins_name argument must be a string" + for margins_name in [666, None, ["a", "b"]]: + with pytest.raises(ValueError, match=msg): + crosstab( + a, + [b, c], + rownames=["a"], + colnames=("b", "c"), + margins=True, + margins_name=margins_name, + ) + + def test_crosstab_pass_values(self): + a = np.random.randint(0, 7, size=100) + b = np.random.randint(0, 3, size=100) + c = np.random.randint(0, 5, size=100) + values = np.random.randn(100) + + table = crosstab( + [a, b], c, values, aggfunc=np.sum, rownames=["foo", "bar"], colnames=["baz"] + ) + + df = DataFrame({"foo": a, "bar": b, "baz": c, "values": values}) + + expected = df.pivot_table( + "values", index=["foo", "bar"], columns="baz", aggfunc=np.sum + ) + tm.assert_frame_equal(table, expected) + + def test_crosstab_dropna(self): + # GH 3820 + a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object) + b = np.array(["one", "one", "two", "one", "two", "two", "two"], dtype=object) + c = np.array( + ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object + ) + res = crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=False) + m = MultiIndex.from_tuples( + [("one", "dull"), ("one", "shiny"), ("two", "dull"), ("two", "shiny")], + names=["b", "c"], + ) + tm.assert_index_equal(res.columns, m) + + def test_crosstab_no_overlap(self): + # GS 10291 + + s1 = Series([1, 2, 3], index=[1, 2, 3]) + s2 = Series([4, 5, 6], index=[4, 5, 6]) + + actual = crosstab(s1, s2) + expected = DataFrame( + index=Index([], dtype="int64", name="row_0"), + columns=Index([], dtype="int64", name="col_0"), + ) + + tm.assert_frame_equal(actual, expected) + + def test_margin_dropna(self): + # GH 12577 + # pivot_table counts null into margin ('All') + # when margins=true and dropna=true + + df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]}) + actual = crosstab(df.a, df.b, margins=True, dropna=True) + expected = DataFrame([[1, 0, 1], [1, 3, 4], [2, 3, 5]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3, 4, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + def test_margin_dropna2(self): + + df = DataFrame( + {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]} + ) + actual = crosstab(df.a, df.b, margins=True, dropna=True) + expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3.0, 4.0, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + def test_margin_dropna3(self): + + df = DataFrame( + {"a": [1, np.nan, np.nan, np.nan, np.nan, 2], "b": [3, 3, 4, 4, 4, 4]} + ) + actual = crosstab(df.a, df.b, margins=True, dropna=True) + expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3, 4, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + def test_margin_dropna4(self): + # GH 12642 + # _add_margins raises KeyError: Level None not found + # when margins=True and dropna=False + df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]}) + actual = crosstab(df.a, df.b, margins=True, dropna=False) + expected = DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3, 4, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + def test_margin_dropna5(self): + df = DataFrame( + {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]} + ) + actual = crosstab(df.a, df.b, margins=True, dropna=False) + expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3.0, 4.0, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + def test_margin_dropna6(self): + a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object) + b = np.array(["one", "one", "two", "one", "two", np.nan, "two"], dtype=object) + c = np.array( + ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object + ) + + actual = crosstab( + a, [b, c], rownames=["a"], colnames=["b", "c"], margins=True, dropna=False + ) + m = MultiIndex.from_arrays( + [ + ["one", "one", "two", "two", "All"], + ["dull", "shiny", "dull", "shiny", ""], + ], + names=["b", "c"], + ) + expected = DataFrame( + [[1, 0, 1, 0, 2], [2, 0, 1, 1, 5], [3, 0, 2, 1, 7]], columns=m + ) + expected.index = Index(["bar", "foo", "All"], name="a") + tm.assert_frame_equal(actual, expected) + + actual = crosstab( + [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=False + ) + m = MultiIndex.from_arrays( + [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]], + names=["a", "b"], + ) + expected = DataFrame( + [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m + ) + expected.columns = Index(["dull", "shiny", "All"], name="c") + tm.assert_frame_equal(actual, expected) + + actual = crosstab( + [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=True + ) + m = MultiIndex.from_arrays( + [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]], + names=["a", "b"], + ) + expected = DataFrame( + [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 1, 6]], index=m + ) + expected.columns = Index(["dull", "shiny", "All"], name="c") + tm.assert_frame_equal(actual, expected) + + def test_crosstab_normalize(self): + # Issue 12578 + df = DataFrame( + {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]} + ) + + rindex = Index([1, 2], name="a") + cindex = Index([3, 4], name="b") + full_normal = DataFrame([[0.2, 0], [0.2, 0.6]], index=rindex, columns=cindex) + row_normal = DataFrame([[1.0, 0], [0.25, 0.75]], index=rindex, columns=cindex) + col_normal = DataFrame([[0.5, 0], [0.5, 1.0]], index=rindex, columns=cindex) + + # Check all normalize args + tm.assert_frame_equal(crosstab(df.a, df.b, normalize="all"), full_normal) + tm.assert_frame_equal(crosstab(df.a, df.b, normalize=True), full_normal) + tm.assert_frame_equal(crosstab(df.a, df.b, normalize="index"), row_normal) + tm.assert_frame_equal(crosstab(df.a, df.b, normalize="columns"), col_normal) + tm.assert_frame_equal( + crosstab(df.a, df.b, normalize=1), + crosstab(df.a, df.b, normalize="columns"), + ) + tm.assert_frame_equal( + crosstab(df.a, df.b, normalize=0), crosstab(df.a, df.b, normalize="index") + ) + + row_normal_margins = DataFrame( + [[1.0, 0], [0.25, 0.75], [0.4, 0.6]], + index=Index([1, 2, "All"], name="a", dtype="object"), + columns=Index([3, 4], name="b", dtype="object"), + ) + col_normal_margins = DataFrame( + [[0.5, 0, 0.2], [0.5, 1.0, 0.8]], + index=Index([1, 2], name="a", dtype="object"), + columns=Index([3, 4, "All"], name="b", dtype="object"), + ) + + all_normal_margins = DataFrame( + [[0.2, 0, 0.2], [0.2, 0.6, 0.8], [0.4, 0.6, 1]], + index=Index([1, 2, "All"], name="a", dtype="object"), + columns=Index([3, 4, "All"], name="b", dtype="object"), + ) + tm.assert_frame_equal( + crosstab(df.a, df.b, normalize="index", margins=True), row_normal_margins + ) + tm.assert_frame_equal( + crosstab(df.a, df.b, normalize="columns", margins=True), col_normal_margins + ) + tm.assert_frame_equal( + crosstab(df.a, df.b, normalize=True, margins=True), all_normal_margins + ) + + def test_crosstab_normalize_arrays(self): + # GH#12578 + df = DataFrame( + {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]} + ) + + # Test arrays + crosstab( + [np.array([1, 1, 2, 2]), np.array([1, 2, 1, 2])], np.array([1, 2, 1, 2]) + ) + + # Test with aggfunc + norm_counts = DataFrame( + [[0.25, 0, 0.25], [0.25, 0.5, 0.75], [0.5, 0.5, 1]], + index=Index([1, 2, "All"], name="a", dtype="object"), + columns=Index([3, 4, "All"], name="b"), + ) + test_case = crosstab( + df.a, df.b, df.c, aggfunc="count", normalize="all", margins=True + ) + tm.assert_frame_equal(test_case, norm_counts) + + df = DataFrame( + {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [0, 4, np.nan, 3, 3]} + ) + + norm_sum = DataFrame( + [[0, 0, 0.0], [0.4, 0.6, 1], [0.4, 0.6, 1]], + index=Index([1, 2, "All"], name="a", dtype="object"), + columns=Index([3, 4, "All"], name="b", dtype="object"), + ) + test_case = crosstab( + df.a, df.b, df.c, aggfunc=np.sum, normalize="all", margins=True + ) + tm.assert_frame_equal(test_case, norm_sum) + + def test_crosstab_with_empties(self, using_array_manager): + # Check handling of empties + df = DataFrame( + { + "a": [1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4], + "c": [np.nan, np.nan, np.nan, np.nan, np.nan], + } + ) + + empty = DataFrame( + [[0.0, 0.0], [0.0, 0.0]], + index=Index([1, 2], name="a", dtype="int64"), + columns=Index([3, 4], name="b"), + ) + + for i in [True, "index", "columns"]: + calculated = crosstab(df.a, df.b, values=df.c, aggfunc="count", normalize=i) + tm.assert_frame_equal(empty, calculated) + + nans = DataFrame( + [[0.0, np.nan], [0.0, 0.0]], + index=Index([1, 2], name="a", dtype="int64"), + columns=Index([3, 4], name="b"), + ) + if using_array_manager: + # INFO(ArrayManager) column without NaNs can preserve int dtype + nans[3] = nans[3].astype("int64") + + calculated = crosstab(df.a, df.b, values=df.c, aggfunc="count", normalize=False) + tm.assert_frame_equal(nans, calculated) + + def test_crosstab_errors(self): + # Issue 12578 + + df = DataFrame( + {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]} + ) + + error = "values cannot be used without an aggfunc." + with pytest.raises(ValueError, match=error): + crosstab(df.a, df.b, values=df.c) + + error = "aggfunc cannot be used without values" + with pytest.raises(ValueError, match=error): + crosstab(df.a, df.b, aggfunc=np.mean) + + error = "Not a valid normalize argument" + with pytest.raises(ValueError, match=error): + crosstab(df.a, df.b, normalize="42") + + with pytest.raises(ValueError, match=error): + crosstab(df.a, df.b, normalize=42) + + error = "Not a valid margins argument" + with pytest.raises(ValueError, match=error): + crosstab(df.a, df.b, normalize="all", margins=42) + + def test_crosstab_with_categorial_columns(self): + # GH 8860 + df = DataFrame( + { + "MAKE": ["Honda", "Acura", "Tesla", "Honda", "Honda", "Acura"], + "MODEL": ["Sedan", "Sedan", "Electric", "Pickup", "Sedan", "Sedan"], + } + ) + categories = ["Sedan", "Electric", "Pickup"] + df["MODEL"] = df["MODEL"].astype("category").cat.set_categories(categories) + result = crosstab(df["MAKE"], df["MODEL"]) + + expected_index = Index(["Acura", "Honda", "Tesla"], name="MAKE") + expected_columns = CategoricalIndex( + categories, categories=categories, ordered=False, name="MODEL" + ) + expected_data = [[2, 0, 0], [2, 0, 1], [0, 1, 0]] + expected = DataFrame( + expected_data, index=expected_index, columns=expected_columns + ) + tm.assert_frame_equal(result, expected) + + def test_crosstab_with_numpy_size(self): + # GH 4003 + df = DataFrame( + { + "A": ["one", "one", "two", "three"] * 6, + "B": ["A", "B", "C"] * 8, + "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4, + "D": np.random.randn(24), + "E": np.random.randn(24), + } + ) + result = crosstab( + index=[df["A"], df["B"]], + columns=[df["C"]], + margins=True, + aggfunc=np.size, + values=df["D"], + ) + expected_index = MultiIndex( + levels=[["All", "one", "three", "two"], ["", "A", "B", "C"]], + codes=[[1, 1, 1, 2, 2, 2, 3, 3, 3, 0], [1, 2, 3, 1, 2, 3, 1, 2, 3, 0]], + names=["A", "B"], + ) + expected_column = Index(["bar", "foo", "All"], dtype="object", name="C") + expected_data = np.array( + [ + [2.0, 2.0, 4.0], + [2.0, 2.0, 4.0], + [2.0, 2.0, 4.0], + [2.0, np.nan, 2.0], + [np.nan, 2.0, 2.0], + [2.0, np.nan, 2.0], + [np.nan, 2.0, 2.0], + [2.0, np.nan, 2.0], + [np.nan, 2.0, 2.0], + [12.0, 12.0, 24.0], + ] + ) + expected = DataFrame( + expected_data, index=expected_index, columns=expected_column + ) + # aggfunc is np.size, resulting in integers + expected["All"] = expected["All"].astype("int64") + tm.assert_frame_equal(result, expected) + + def test_crosstab_duplicate_names(self): + # GH 13279 / 22529 + + s1 = Series(range(3), name="foo") + s2_foo = Series(range(1, 4), name="foo") + s2_bar = Series(range(1, 4), name="bar") + s3 = Series(range(3), name="waldo") + + # check result computed with duplicate labels against + # result computed with unique labels, then relabelled + mapper = {"bar": "foo"} + + # duplicate row, column labels + result = crosstab(s1, s2_foo) + expected = crosstab(s1, s2_bar).rename_axis(columns=mapper, axis=1) + tm.assert_frame_equal(result, expected) + + # duplicate row, unique column labels + result = crosstab([s1, s2_foo], s3) + expected = crosstab([s1, s2_bar], s3).rename_axis(index=mapper, axis=0) + tm.assert_frame_equal(result, expected) + + # unique row, duplicate column labels + result = crosstab(s3, [s1, s2_foo]) + expected = crosstab(s3, [s1, s2_bar]).rename_axis(columns=mapper, axis=1) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("names", [["a", ("b", "c")], [("a", "b"), "c"]]) + def test_crosstab_tuple_name(self, names): + s1 = Series(range(3), name=names[0]) + s2 = Series(range(1, 4), name=names[1]) + + mi = MultiIndex.from_arrays([range(3), range(1, 4)], names=names) + expected = Series(1, index=mi).unstack(1, fill_value=0) + + result = crosstab(s1, s2) + tm.assert_frame_equal(result, expected) + + def test_crosstab_both_tuple_names(self): + # GH 18321 + s1 = Series(range(3), name=("a", "b")) + s2 = Series(range(3), name=("c", "d")) + + expected = DataFrame( + np.eye(3, dtype="int64"), + index=Index(range(3), name=("a", "b")), + columns=Index(range(3), name=("c", "d")), + ) + result = crosstab(s1, s2) + tm.assert_frame_equal(result, expected) + + def test_crosstab_unsorted_order(self): + df = DataFrame({"b": [3, 1, 2], "a": [5, 4, 6]}, index=["C", "A", "B"]) + result = crosstab(df.index, [df.b, df.a]) + e_idx = Index(["A", "B", "C"], name="row_0") + e_columns = MultiIndex.from_tuples([(1, 4), (2, 6), (3, 5)], names=["b", "a"]) + expected = DataFrame( + [[1, 0, 0], [0, 1, 0], [0, 0, 1]], index=e_idx, columns=e_columns + ) + tm.assert_frame_equal(result, expected) + + def test_crosstab_normalize_multiple_columns(self): + # GH 15150 + df = DataFrame( + { + "A": ["one", "one", "two", "three"] * 6, + "B": ["A", "B", "C"] * 8, + "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4, + "D": [0] * 24, + "E": [0] * 24, + } + ) + result = crosstab( + [df.A, df.B], + df.C, + values=df.D, + aggfunc=np.sum, + normalize=True, + margins=True, + ) + expected = DataFrame( + np.array([0] * 29 + [1], dtype=float).reshape(10, 3), + columns=Index(["bar", "foo", "All"], dtype="object", name="C"), + index=MultiIndex.from_tuples( + [ + ("one", "A"), + ("one", "B"), + ("one", "C"), + ("three", "A"), + ("three", "B"), + ("three", "C"), + ("two", "A"), + ("two", "B"), + ("two", "C"), + ("All", ""), + ], + names=["A", "B"], + ), + ) + tm.assert_frame_equal(result, expected) + + def test_margin_normalize(self): + # GH 27500 + df = DataFrame( + { + "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"], + "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"], + "C": [ + "small", + "large", + "large", + "small", + "small", + "large", + "small", + "small", + "large", + ], + "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], + } + ) + # normalize on index + result = crosstab( + [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=0 + ) + expected = DataFrame( + [[0.5, 0.5], [0.5, 0.5], [0.666667, 0.333333], [0, 1], [0.444444, 0.555556]] + ) + expected.index = MultiIndex( + levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]], + codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], + names=["A", "B"], + ) + expected.columns = Index(["large", "small"], dtype="object", name="C") + tm.assert_frame_equal(result, expected) + + # normalize on columns + result = crosstab( + [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=1 + ) + expected = DataFrame( + [ + [0.25, 0.2, 0.222222], + [0.25, 0.2, 0.222222], + [0.5, 0.2, 0.333333], + [0, 0.4, 0.222222], + ] + ) + expected.columns = Index( + ["large", "small", "Sub-Total"], dtype="object", name="C" + ) + expected.index = MultiIndex( + levels=[["bar", "foo"], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=["A", "B"], + ) + tm.assert_frame_equal(result, expected) + + # normalize on both index and column + result = crosstab( + [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=True + ) + expected = DataFrame( + [ + [0.111111, 0.111111, 0.222222], + [0.111111, 0.111111, 0.222222], + [0.222222, 0.111111, 0.333333], + [0.000000, 0.222222, 0.222222], + [0.444444, 0.555555, 1], + ] + ) + expected.columns = Index( + ["large", "small", "Sub-Total"], dtype="object", name="C" + ) + expected.index = MultiIndex( + levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]], + codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], + names=["A", "B"], + ) + tm.assert_frame_equal(result, expected) + + def test_margin_normalize_multiple_columns(self): + # GH 35144 + # use multiple columns with margins and normalization + df = DataFrame( + { + "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"], + "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"], + "C": [ + "small", + "large", + "large", + "small", + "small", + "large", + "small", + "small", + "large", + ], + "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], + } + ) + result = crosstab( + index=df.C, + columns=[df.A, df.B], + margins=True, + margins_name="margin", + normalize=True, + ) + expected = DataFrame( + [ + [0.111111, 0.111111, 0.222222, 0.000000, 0.444444], + [0.111111, 0.111111, 0.111111, 0.222222, 0.555556], + [0.222222, 0.222222, 0.333333, 0.222222, 1.0], + ], + index=["large", "small", "margin"], + ) + expected.columns = MultiIndex( + levels=[["bar", "foo", "margin"], ["", "one", "two"]], + codes=[[0, 0, 1, 1, 2], [1, 2, 1, 2, 0]], + names=["A", "B"], + ) + expected.index.name = "C" + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("a_dtype", ["category", "int64"]) +@pytest.mark.parametrize("b_dtype", ["category", "int64"]) +def test_categoricals(a_dtype, b_dtype): + # https://github.com/pandas-dev/pandas/issues/37465 + g = np.random.RandomState(25982704) + a = Series(g.randint(0, 3, size=100)).astype(a_dtype) + b = Series(g.randint(0, 2, size=100)).astype(b_dtype) + result = crosstab(a, b, margins=True, dropna=False) + columns = Index([0, 1, "All"], dtype="object", name="col_0") + index = Index([0, 1, 2, "All"], dtype="object", name="row_0") + values = [[18, 16, 34], [18, 16, 34], [16, 16, 32], [52, 48, 100]] + expected = DataFrame(values, index, columns) + tm.assert_frame_equal(result, expected) + + # Verify when categorical does not have all values present + a.loc[a == 1] = 2 + a_is_cat = is_categorical_dtype(a.dtype) + assert not a_is_cat or a.value_counts().loc[1] == 0 + result = crosstab(a, b, margins=True, dropna=False) + values = [[18, 16, 34], [0, 0, 0], [34, 32, 66], [52, 48, 100]] + expected = DataFrame(values, index, columns) + if not a_is_cat: + expected = expected.loc[[0, 2, "All"]] + expected["All"] = expected["All"].astype("int64") + repr(result) + repr(expected) + repr(expected.loc[[0, 2, "All"]]) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_cut.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_cut.py new file mode 100644 index 0000000..1425686 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_cut.py @@ -0,0 +1,736 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + Interval, + IntervalIndex, + Series, + TimedeltaIndex, + Timestamp, + cut, + date_range, + isna, + qcut, + timedelta_range, + to_datetime, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype as CDT +import pandas.core.reshape.tile as tmod + + +def test_simple(): + data = np.ones(5, dtype="int64") + result = cut(data, 4, labels=False) + + expected = np.array([1, 1, 1, 1, 1]) + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + +@pytest.mark.parametrize("func", [list, np.array]) +def test_bins(func): + data = func([0.2, 1.4, 2.5, 6.2, 9.7, 2.1]) + result, bins = cut(data, 3, retbins=True) + + intervals = IntervalIndex.from_breaks(bins.round(3)) + intervals = intervals.take([0, 0, 0, 1, 2, 0]) + expected = Categorical(intervals, ordered=True) + + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667, 6.53333333, 9.7])) + + +def test_right(): + data = np.array([0.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) + result, bins = cut(data, 4, right=True, retbins=True) + + intervals = IntervalIndex.from_breaks(bins.round(3)) + expected = Categorical(intervals, ordered=True) + expected = expected.take([0, 0, 0, 2, 3, 0, 0]) + + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.1905, 2.575, 4.95, 7.325, 9.7])) + + +def test_no_right(): + data = np.array([0.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) + result, bins = cut(data, 4, right=False, retbins=True) + + intervals = IntervalIndex.from_breaks(bins.round(3), closed="left") + intervals = intervals.take([0, 0, 0, 2, 3, 0, 1]) + expected = Categorical(intervals, ordered=True) + + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.2, 2.575, 4.95, 7.325, 9.7095])) + + +def test_bins_from_interval_index(): + c = cut(range(5), 3) + expected = c + result = cut(range(5), bins=expected.categories) + tm.assert_categorical_equal(result, expected) + + expected = Categorical.from_codes( + np.append(c.codes, -1), categories=c.categories, ordered=True + ) + result = cut(range(6), bins=expected.categories) + tm.assert_categorical_equal(result, expected) + + +def test_bins_from_interval_index_doc_example(): + # Make sure we preserve the bins. + ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60]) + c = cut(ages, bins=[0, 18, 35, 70]) + expected = IntervalIndex.from_tuples([(0, 18), (18, 35), (35, 70)]) + tm.assert_index_equal(c.categories, expected) + + result = cut([25, 20, 50], bins=c.categories) + tm.assert_index_equal(result.categories, expected) + tm.assert_numpy_array_equal(result.codes, np.array([1, 1, 2], dtype="int8")) + + +def test_bins_not_overlapping_from_interval_index(): + # see gh-23980 + msg = "Overlapping IntervalIndex is not accepted" + ii = IntervalIndex.from_tuples([(0, 10), (2, 12), (4, 14)]) + + with pytest.raises(ValueError, match=msg): + cut([5, 6], bins=ii) + + +def test_bins_not_monotonic(): + msg = "bins must increase monotonically" + data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1] + + with pytest.raises(ValueError, match=msg): + cut(data, [0.1, 1.5, 1, 10]) + + +@pytest.mark.parametrize( + "x, bins, expected", + [ + ( + date_range("2017-12-31", periods=3), + [Timestamp.min, Timestamp("2018-01-01"), Timestamp.max], + IntervalIndex.from_tuples( + [ + (Timestamp.min, Timestamp("2018-01-01")), + (Timestamp("2018-01-01"), Timestamp.max), + ] + ), + ), + ( + [-1, 0, 1], + np.array( + [np.iinfo(np.int64).min, 0, np.iinfo(np.int64).max], dtype="int64" + ), + IntervalIndex.from_tuples( + [(np.iinfo(np.int64).min, 0), (0, np.iinfo(np.int64).max)] + ), + ), + ( + [ + np.timedelta64(-1, "ns"), + np.timedelta64(0, "ns"), + np.timedelta64(1, "ns"), + ], + np.array( + [ + np.timedelta64(-np.iinfo(np.int64).max, "ns"), + np.timedelta64(0, "ns"), + np.timedelta64(np.iinfo(np.int64).max, "ns"), + ] + ), + IntervalIndex.from_tuples( + [ + ( + np.timedelta64(-np.iinfo(np.int64).max, "ns"), + np.timedelta64(0, "ns"), + ), + ( + np.timedelta64(0, "ns"), + np.timedelta64(np.iinfo(np.int64).max, "ns"), + ), + ] + ), + ), + ], +) +def test_bins_monotonic_not_overflowing(x, bins, expected): + # GH 26045 + result = cut(x, bins) + tm.assert_index_equal(result.categories, expected) + + +def test_wrong_num_labels(): + msg = "Bin labels must be one fewer than the number of bin edges" + data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1] + + with pytest.raises(ValueError, match=msg): + cut(data, [0, 1, 10], labels=["foo", "bar", "baz"]) + + +@pytest.mark.parametrize( + "x,bins,msg", + [ + ([], 2, "Cannot cut empty array"), + ([1, 2, 3], 0.5, "`bins` should be a positive integer"), + ], +) +def test_cut_corner(x, bins, msg): + with pytest.raises(ValueError, match=msg): + cut(x, bins) + + +@pytest.mark.parametrize("arg", [2, np.eye(2), DataFrame(np.eye(2))]) +@pytest.mark.parametrize("cut_func", [cut, qcut]) +def test_cut_not_1d_arg(arg, cut_func): + msg = "Input array must be 1 dimensional" + with pytest.raises(ValueError, match=msg): + cut_func(arg, 2) + + +@pytest.mark.parametrize( + "data", + [ + [0, 1, 2, 3, 4, np.inf], + [-np.inf, 0, 1, 2, 3, 4], + [-np.inf, 0, 1, 2, 3, 4, np.inf], + ], +) +def test_int_bins_with_inf(data): + # GH 24314 + msg = "cannot specify integer `bins` when input data contains infinity" + with pytest.raises(ValueError, match=msg): + cut(data, bins=3) + + +def test_cut_out_of_range_more(): + # see gh-1511 + name = "x" + + ser = Series([0, -1, 0, 1, -3], name=name) + ind = cut(ser, [0, 1], labels=False) + + exp = Series([np.nan, np.nan, np.nan, 0, np.nan], name=name) + tm.assert_series_equal(ind, exp) + + +@pytest.mark.parametrize( + "right,breaks,closed", + [ + (True, [-1e-3, 0.25, 0.5, 0.75, 1], "right"), + (False, [0, 0.25, 0.5, 0.75, 1 + 1e-3], "left"), + ], +) +def test_labels(right, breaks, closed): + arr = np.tile(np.arange(0, 1.01, 0.1), 4) + + result, bins = cut(arr, 4, retbins=True, right=right) + ex_levels = IntervalIndex.from_breaks(breaks, closed=closed) + tm.assert_index_equal(result.categories, ex_levels) + + +def test_cut_pass_series_name_to_factor(): + name = "foo" + ser = Series(np.random.randn(100), name=name) + + factor = cut(ser, 4) + assert factor.name == name + + +def test_label_precision(): + arr = np.arange(0, 0.73, 0.01) + result = cut(arr, 4, precision=2) + + ex_levels = IntervalIndex.from_breaks([-0.00072, 0.18, 0.36, 0.54, 0.72]) + tm.assert_index_equal(result.categories, ex_levels) + + +@pytest.mark.parametrize("labels", [None, False]) +def test_na_handling(labels): + arr = np.arange(0, 0.75, 0.01) + arr[::3] = np.nan + + result = cut(arr, 4, labels=labels) + result = np.asarray(result) + + expected = np.where(isna(arr), np.nan, result) + tm.assert_almost_equal(result, expected) + + +def test_inf_handling(): + data = np.arange(6) + data_ser = Series(data, dtype="int64") + + bins = [-np.inf, 2, 4, np.inf] + result = cut(data, bins) + result_ser = cut(data_ser, bins) + + ex_uniques = IntervalIndex.from_breaks(bins) + tm.assert_index_equal(result.categories, ex_uniques) + + assert result[5] == Interval(4, np.inf) + assert result[0] == Interval(-np.inf, 2) + assert result_ser[5] == Interval(4, np.inf) + assert result_ser[0] == Interval(-np.inf, 2) + + +def test_cut_out_of_bounds(): + arr = np.random.randn(100) + result = cut(arr, [-1, 0, 1]) + + mask = isna(result) + ex_mask = (arr < -1) | (arr > 1) + tm.assert_numpy_array_equal(mask, ex_mask) + + +@pytest.mark.parametrize( + "get_labels,get_expected", + [ + ( + lambda labels: labels, + lambda labels: Categorical( + ["Medium"] + 4 * ["Small"] + ["Medium", "Large"], + categories=labels, + ordered=True, + ), + ), + ( + lambda labels: Categorical.from_codes([0, 1, 2], labels), + lambda labels: Categorical.from_codes([1] + 4 * [0] + [1, 2], labels), + ), + ], +) +def test_cut_pass_labels(get_labels, get_expected): + bins = [0, 25, 50, 100] + arr = [50, 5, 10, 15, 20, 30, 70] + labels = ["Small", "Medium", "Large"] + + result = cut(arr, bins, labels=get_labels(labels)) + tm.assert_categorical_equal(result, get_expected(labels)) + + +def test_cut_pass_labels_compat(): + # see gh-16459 + arr = [50, 5, 10, 15, 20, 30, 70] + labels = ["Good", "Medium", "Bad"] + + result = cut(arr, 3, labels=labels) + exp = cut(arr, 3, labels=Categorical(labels, categories=labels, ordered=True)) + tm.assert_categorical_equal(result, exp) + + +@pytest.mark.parametrize("x", [np.arange(11.0), np.arange(11.0) / 1e10]) +def test_round_frac_just_works(x): + # It works. + cut(x, 2) + + +@pytest.mark.parametrize( + "val,precision,expected", + [ + (-117.9998, 3, -118), + (117.9998, 3, 118), + (117.9998, 2, 118), + (0.000123456, 2, 0.00012), + ], +) +def test_round_frac(val, precision, expected): + # see gh-1979 + result = tmod._round_frac(val, precision=precision) + assert result == expected + + +def test_cut_return_intervals(): + ser = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) + result = cut(ser, 3) + + exp_bins = np.linspace(0, 8, num=4).round(3) + exp_bins[0] -= 0.008 + + expected = Series( + IntervalIndex.from_breaks(exp_bins, closed="right").take( + [0, 0, 0, 1, 1, 1, 2, 2, 2] + ) + ).astype(CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + +def test_series_ret_bins(): + # see gh-8589 + ser = Series(np.arange(4)) + result, bins = cut(ser, 2, retbins=True) + + expected = Series( + IntervalIndex.from_breaks([-0.003, 1.5, 3], closed="right").repeat(2) + ).astype(CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,msg", + [ + ({"duplicates": "drop"}, None), + ({}, "Bin edges must be unique"), + ({"duplicates": "raise"}, "Bin edges must be unique"), + ({"duplicates": "foo"}, "invalid value for 'duplicates' parameter"), + ], +) +def test_cut_duplicates_bin(kwargs, msg): + # see gh-20947 + bins = [0, 2, 4, 6, 10, 10] + values = Series(np.array([1, 3, 5, 7, 9]), index=["a", "b", "c", "d", "e"]) + + if msg is not None: + with pytest.raises(ValueError, match=msg): + cut(values, bins, **kwargs) + else: + result = cut(values, bins, **kwargs) + expected = cut(values, pd.unique(bins)) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("data", [9.0, -9.0, 0.0]) +@pytest.mark.parametrize("length", [1, 2]) +def test_single_bin(data, length): + # see gh-14652, gh-15428 + ser = Series([data] * length) + result = cut(ser, 1, labels=False) + + expected = Series([0] * length, dtype=np.intp) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "array_1_writeable,array_2_writeable", [(True, True), (True, False), (False, False)] +) +def test_cut_read_only(array_1_writeable, array_2_writeable): + # issue 18773 + array_1 = np.arange(0, 100, 10) + array_1.flags.writeable = array_1_writeable + + array_2 = np.arange(0, 100, 10) + array_2.flags.writeable = array_2_writeable + + hundred_elements = np.arange(100) + tm.assert_categorical_equal( + cut(hundred_elements, array_1), cut(hundred_elements, array_2) + ) + + +@pytest.mark.parametrize( + "conv", + [ + lambda v: Timestamp(v), + lambda v: to_datetime(v), + lambda v: np.datetime64(v), + lambda v: Timestamp(v).to_pydatetime(), + ], +) +def test_datetime_bin(conv): + data = [np.datetime64("2012-12-13"), np.datetime64("2012-12-15")] + bin_data = ["2012-12-12", "2012-12-14", "2012-12-16"] + + expected = Series( + IntervalIndex( + [ + Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])), + Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2])), + ] + ) + ).astype(CDT(ordered=True)) + + bins = [conv(v) for v in bin_data] + result = Series(cut(data, bins=bins)) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [ + to_datetime(Series(["2013-01-01", "2013-01-02", "2013-01-03"])), + [ + np.datetime64("2013-01-01"), + np.datetime64("2013-01-02"), + np.datetime64("2013-01-03"), + ], + np.array( + [ + np.datetime64("2013-01-01"), + np.datetime64("2013-01-02"), + np.datetime64("2013-01-03"), + ] + ), + DatetimeIndex(["2013-01-01", "2013-01-02", "2013-01-03"]), + ], +) +def test_datetime_cut(data): + # see gh-14714 + # + # Testing time data when it comes in various collection types. + result, _ = cut(data, 3, retbins=True) + expected = Series( + IntervalIndex( + [ + Interval( + Timestamp("2012-12-31 23:57:07.200000"), + Timestamp("2013-01-01 16:00:00"), + ), + Interval( + Timestamp("2013-01-01 16:00:00"), Timestamp("2013-01-02 08:00:00") + ), + Interval( + Timestamp("2013-01-02 08:00:00"), Timestamp("2013-01-03 00:00:00") + ), + ] + ) + ).astype(CDT(ordered=True)) + tm.assert_series_equal(Series(result), expected) + + +@pytest.mark.parametrize( + "bins", + [ + 3, + [ + Timestamp("2013-01-01 04:57:07.200000"), + Timestamp("2013-01-01 21:00:00"), + Timestamp("2013-01-02 13:00:00"), + Timestamp("2013-01-03 05:00:00"), + ], + ], +) +@pytest.mark.parametrize("box", [list, np.array, Index, Series]) +def test_datetime_tz_cut(bins, box): + # see gh-19872 + tz = "US/Eastern" + s = Series(date_range("20130101", periods=3, tz=tz)) + + if not isinstance(bins, int): + bins = box(bins) + + result = cut(s, bins) + expected = Series( + IntervalIndex( + [ + Interval( + Timestamp("2012-12-31 23:57:07.200000", tz=tz), + Timestamp("2013-01-01 16:00:00", tz=tz), + ), + Interval( + Timestamp("2013-01-01 16:00:00", tz=tz), + Timestamp("2013-01-02 08:00:00", tz=tz), + ), + Interval( + Timestamp("2013-01-02 08:00:00", tz=tz), + Timestamp("2013-01-03 00:00:00", tz=tz), + ), + ] + ) + ).astype(CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + +def test_datetime_nan_error(): + msg = "bins must be of datetime64 dtype" + + with pytest.raises(ValueError, match=msg): + cut(date_range("20130101", periods=3), bins=[0, 2, 4]) + + +def test_datetime_nan_mask(): + result = cut( + date_range("20130102", periods=5), bins=date_range("20130101", periods=2) + ) + + mask = result.categories.isna() + tm.assert_numpy_array_equal(mask, np.array([False])) + + mask = result.isna() + tm.assert_numpy_array_equal(mask, np.array([False, True, True, True, True])) + + +@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"]) +def test_datetime_cut_roundtrip(tz): + # see gh-19891 + ser = Series(date_range("20180101", periods=3, tz=tz)) + result, result_bins = cut(ser, 2, retbins=True) + + expected = cut(ser, result_bins) + tm.assert_series_equal(result, expected) + + expected_bins = DatetimeIndex( + ["2017-12-31 23:57:07.200000", "2018-01-02 00:00:00", "2018-01-03 00:00:00"] + ) + expected_bins = expected_bins.tz_localize(tz) + tm.assert_index_equal(result_bins, expected_bins) + + +def test_timedelta_cut_roundtrip(): + # see gh-19891 + ser = Series(timedelta_range("1day", periods=3)) + result, result_bins = cut(ser, 2, retbins=True) + + expected = cut(ser, result_bins) + tm.assert_series_equal(result, expected) + + expected_bins = TimedeltaIndex( + ["0 days 23:57:07.200000", "2 days 00:00:00", "3 days 00:00:00"] + ) + tm.assert_index_equal(result_bins, expected_bins) + + +@pytest.mark.parametrize("bins", [6, 7]) +@pytest.mark.parametrize( + "box, compare", + [ + (Series, tm.assert_series_equal), + (np.array, tm.assert_categorical_equal), + (list, tm.assert_equal), + ], +) +def test_cut_bool_coercion_to_int(bins, box, compare): + # issue 20303 + data_expected = box([0, 1, 1, 0, 1] * 10) + data_result = box([False, True, True, False, True] * 10) + expected = cut(data_expected, bins, duplicates="drop") + result = cut(data_result, bins, duplicates="drop") + compare(result, expected) + + +@pytest.mark.parametrize("labels", ["foo", 1, True]) +def test_cut_incorrect_labels(labels): + # GH 13318 + values = range(5) + msg = "Bin labels must either be False, None or passed in as a list-like argument" + with pytest.raises(ValueError, match=msg): + cut(values, 4, labels=labels) + + +@pytest.mark.parametrize("bins", [3, [0, 5, 15]]) +@pytest.mark.parametrize("right", [True, False]) +@pytest.mark.parametrize("include_lowest", [True, False]) +def test_cut_nullable_integer(bins, right, include_lowest): + a = np.random.randint(0, 10, size=50).astype(float) + a[::2] = np.nan + result = cut( + pd.array(a, dtype="Int64"), bins, right=right, include_lowest=include_lowest + ) + expected = cut(a, bins, right=right, include_lowest=include_lowest) + tm.assert_categorical_equal(result, expected) + + +@pytest.mark.parametrize( + "data, bins, labels, expected_codes, expected_labels", + [ + ([15, 17, 19], [14, 16, 18, 20], ["A", "B", "A"], [0, 1, 0], ["A", "B"]), + ([1, 3, 5], [0, 2, 4, 6, 8], [2, 0, 1, 2], [2, 0, 1], [0, 1, 2]), + ], +) +def test_cut_non_unique_labels(data, bins, labels, expected_codes, expected_labels): + # GH 33141 + result = cut(data, bins=bins, labels=labels, ordered=False) + expected = Categorical.from_codes( + expected_codes, categories=expected_labels, ordered=False + ) + tm.assert_categorical_equal(result, expected) + + +@pytest.mark.parametrize( + "data, bins, labels, expected_codes, expected_labels", + [ + ([15, 17, 19], [14, 16, 18, 20], ["C", "B", "A"], [0, 1, 2], ["C", "B", "A"]), + ([1, 3, 5], [0, 2, 4, 6, 8], [3, 0, 1, 2], [0, 1, 2], [3, 0, 1, 2]), + ], +) +def test_cut_unordered_labels(data, bins, labels, expected_codes, expected_labels): + # GH 33141 + result = cut(data, bins=bins, labels=labels, ordered=False) + expected = Categorical.from_codes( + expected_codes, categories=expected_labels, ordered=False + ) + tm.assert_categorical_equal(result, expected) + + +def test_cut_unordered_with_missing_labels_raises_error(): + # GH 33141 + msg = "'labels' must be provided if 'ordered = False'" + with pytest.raises(ValueError, match=msg): + cut([0.5, 3], bins=[0, 1, 2], ordered=False) + + +def test_cut_unordered_with_series_labels(): + # https://github.com/pandas-dev/pandas/issues/36603 + s = Series([1, 2, 3, 4, 5]) + bins = Series([0, 2, 4, 6]) + labels = Series(["a", "b", "c"]) + result = cut(s, bins=bins, labels=labels, ordered=False) + expected = Series(["a", "a", "b", "b", "c"], dtype="category") + tm.assert_series_equal(result, expected) + + +def test_cut_no_warnings(): + df = DataFrame({"value": np.random.randint(0, 100, 20)}) + labels = [f"{i} - {i + 9}" for i in range(0, 100, 10)] + with tm.assert_produces_warning(False): + df["group"] = cut(df.value, range(0, 105, 10), right=False, labels=labels) + + +def test_cut_with_duplicated_index_lowest_included(): + # GH 42185 + expected = Series( + [Interval(-0.001, 2, closed="right")] * 3 + + [Interval(2, 4, closed="right"), Interval(-0.001, 2, closed="right")], + index=[0, 1, 2, 3, 0], + dtype="category", + ).cat.as_ordered() + + s = Series([0, 1, 2, 3, 0], index=[0, 1, 2, 3, 0]) + result = cut(s, bins=[0, 2, 4], include_lowest=True) + tm.assert_series_equal(result, expected) + + +def test_cut_with_nonexact_categorical_indices(): + # GH 42424 + + ser = Series(range(0, 100)) + ser1 = cut(ser, 10).value_counts().head(5) + ser2 = cut(ser, 10).value_counts().tail(5) + result = DataFrame({"1": ser1, "2": ser2}) + + index = pd.CategoricalIndex( + [ + Interval(-0.099, 9.9, closed="right"), + Interval(9.9, 19.8, closed="right"), + Interval(19.8, 29.7, closed="right"), + Interval(29.7, 39.6, closed="right"), + Interval(39.6, 49.5, closed="right"), + Interval(49.5, 59.4, closed="right"), + Interval(59.4, 69.3, closed="right"), + Interval(69.3, 79.2, closed="right"), + Interval(79.2, 89.1, closed="right"), + Interval(89.1, 99, closed="right"), + ], + ordered=True, + ) + + expected = DataFrame( + {"1": [10] * 5 + [np.nan] * 5, "2": [np.nan] * 5 + [10] * 5}, index=index + ) + + tm.assert_frame_equal(expected, result) + + +def test_cut_with_timestamp_tuple_labels(): + # GH 40661 + labels = [(Timestamp(10),), (Timestamp(20),), (Timestamp(30),)] + result = cut([2, 4, 6], bins=[1, 3, 5, 7], labels=labels) + + expected = Categorical.from_codes([0, 1, 2], labels, ordered=True) + tm.assert_categorical_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_get_dummies.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_get_dummies.py new file mode 100644 index 0000000..653ea88 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_get_dummies.py @@ -0,0 +1,639 @@ +import re + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_integer_dtype + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Series, + get_dummies, +) +import pandas._testing as tm +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) + + +class TestGetDummies: + @pytest.fixture + def df(self): + return DataFrame({"A": ["a", "b", "a"], "B": ["b", "b", "c"], "C": [1, 2, 3]}) + + @pytest.fixture(params=["uint8", "i8", np.float64, bool, None]) + def dtype(self, request): + return np.dtype(request.param) + + @pytest.fixture(params=["dense", "sparse"]) + def sparse(self, request): + # params are strings to simplify reading test results, + # e.g. TestGetDummies::test_basic[uint8-sparse] instead of [uint8-True] + return request.param == "sparse" + + def effective_dtype(self, dtype): + if dtype is None: + return np.uint8 + return dtype + + def test_get_dummies_raises_on_dtype_object(self, df): + msg = "dtype=object is not a valid dtype for get_dummies" + with pytest.raises(ValueError, match=msg): + get_dummies(df, dtype="object") + + def test_get_dummies_basic(self, sparse, dtype): + s_list = list("abc") + s_series = Series(s_list) + s_series_index = Series(s_list, list("ABC")) + + expected = DataFrame( + {"a": [1, 0, 0], "b": [0, 1, 0], "c": [0, 0, 1]}, + dtype=self.effective_dtype(dtype), + ) + if sparse: + expected = expected.apply(SparseArray, fill_value=0.0) + result = get_dummies(s_list, sparse=sparse, dtype=dtype) + tm.assert_frame_equal(result, expected) + + result = get_dummies(s_series, sparse=sparse, dtype=dtype) + tm.assert_frame_equal(result, expected) + + expected.index = list("ABC") + result = get_dummies(s_series_index, sparse=sparse, dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_get_dummies_basic_types(self, sparse, dtype): + # GH 10531 + s_list = list("abc") + s_series = Series(s_list) + s_df = DataFrame( + {"a": [0, 1, 0, 1, 2], "b": ["A", "A", "B", "C", "C"], "c": [2, 3, 3, 3, 2]} + ) + + expected = DataFrame( + {"a": [1, 0, 0], "b": [0, 1, 0], "c": [0, 0, 1]}, + dtype=self.effective_dtype(dtype), + columns=list("abc"), + ) + if sparse: + if is_integer_dtype(dtype): + fill_value = 0 + elif dtype == bool: + fill_value = False + else: + fill_value = 0.0 + + expected = expected.apply(SparseArray, fill_value=fill_value) + result = get_dummies(s_list, sparse=sparse, dtype=dtype) + tm.assert_frame_equal(result, expected) + + result = get_dummies(s_series, sparse=sparse, dtype=dtype) + tm.assert_frame_equal(result, expected) + + result = get_dummies(s_df, columns=s_df.columns, sparse=sparse, dtype=dtype) + if sparse: + dtype_name = f"Sparse[{self.effective_dtype(dtype).name}, {fill_value}]" + else: + dtype_name = self.effective_dtype(dtype).name + + expected = Series({dtype_name: 8}) + result = result.dtypes.value_counts() + result.index = [str(i) for i in result.index] + tm.assert_series_equal(result, expected) + + result = get_dummies(s_df, columns=["a"], sparse=sparse, dtype=dtype) + + expected_counts = {"int64": 1, "object": 1} + expected_counts[dtype_name] = 3 + expected_counts.get(dtype_name, 0) + + expected = Series(expected_counts).sort_index() + result = result.dtypes.value_counts() + result.index = [str(i) for i in result.index] + result = result.sort_index() + tm.assert_series_equal(result, expected) + + def test_get_dummies_just_na(self, sparse): + just_na_list = [np.nan] + just_na_series = Series(just_na_list) + just_na_series_index = Series(just_na_list, index=["A"]) + + res_list = get_dummies(just_na_list, sparse=sparse) + res_series = get_dummies(just_na_series, sparse=sparse) + res_series_index = get_dummies(just_na_series_index, sparse=sparse) + + assert res_list.empty + assert res_series.empty + assert res_series_index.empty + + assert res_list.index.tolist() == [0] + assert res_series.index.tolist() == [0] + assert res_series_index.index.tolist() == ["A"] + + def test_get_dummies_include_na(self, sparse, dtype): + s = ["a", "b", np.nan] + res = get_dummies(s, sparse=sparse, dtype=dtype) + exp = DataFrame( + {"a": [1, 0, 0], "b": [0, 1, 0]}, dtype=self.effective_dtype(dtype) + ) + if sparse: + exp = exp.apply(SparseArray, fill_value=0.0) + tm.assert_frame_equal(res, exp) + + # Sparse dataframes do not allow nan labelled columns, see #GH8822 + res_na = get_dummies(s, dummy_na=True, sparse=sparse, dtype=dtype) + exp_na = DataFrame( + {np.nan: [0, 0, 1], "a": [1, 0, 0], "b": [0, 1, 0]}, + dtype=self.effective_dtype(dtype), + ) + exp_na = exp_na.reindex(["a", "b", np.nan], axis=1) + # hack (NaN handling in assert_index_equal) + exp_na.columns = res_na.columns + if sparse: + exp_na = exp_na.apply(SparseArray, fill_value=0.0) + tm.assert_frame_equal(res_na, exp_na) + + res_just_na = get_dummies([np.nan], dummy_na=True, sparse=sparse, dtype=dtype) + exp_just_na = DataFrame( + Series(1, index=[0]), columns=[np.nan], dtype=self.effective_dtype(dtype) + ) + tm.assert_numpy_array_equal(res_just_na.values, exp_just_na.values) + + def test_get_dummies_unicode(self, sparse): + # See GH 6885 - get_dummies chokes on unicode values + import unicodedata + + e = "e" + eacute = unicodedata.lookup("LATIN SMALL LETTER E WITH ACUTE") + s = [e, eacute, eacute] + res = get_dummies(s, prefix="letter", sparse=sparse) + exp = DataFrame( + {"letter_e": [1, 0, 0], f"letter_{eacute}": [0, 1, 1]}, dtype=np.uint8 + ) + if sparse: + exp = exp.apply(SparseArray, fill_value=0) + tm.assert_frame_equal(res, exp) + + def test_dataframe_dummies_all_obj(self, df, sparse): + df = df[["A", "B"]] + result = get_dummies(df, sparse=sparse) + expected = DataFrame( + {"A_a": [1, 0, 1], "A_b": [0, 1, 0], "B_b": [1, 1, 0], "B_c": [0, 0, 1]}, + dtype=np.uint8, + ) + if sparse: + expected = DataFrame( + { + "A_a": SparseArray([1, 0, 1], dtype="uint8"), + "A_b": SparseArray([0, 1, 0], dtype="uint8"), + "B_b": SparseArray([1, 1, 0], dtype="uint8"), + "B_c": SparseArray([0, 0, 1], dtype="uint8"), + } + ) + + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_mix_default(self, df, sparse, dtype): + result = get_dummies(df, sparse=sparse, dtype=dtype) + if sparse: + arr = SparseArray + typ = SparseDtype(dtype, 0) + else: + arr = np.array + typ = dtype + expected = DataFrame( + { + "C": [1, 2, 3], + "A_a": arr([1, 0, 1], dtype=typ), + "A_b": arr([0, 1, 0], dtype=typ), + "B_b": arr([1, 1, 0], dtype=typ), + "B_c": arr([0, 0, 1], dtype=typ), + } + ) + expected = expected[["C", "A_a", "A_b", "B_b", "B_c"]] + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_prefix_list(self, df, sparse): + prefixes = ["from_A", "from_B"] + result = get_dummies(df, prefix=prefixes, sparse=sparse) + expected = DataFrame( + { + "C": [1, 2, 3], + "from_A_a": [1, 0, 1], + "from_A_b": [0, 1, 0], + "from_B_b": [1, 1, 0], + "from_B_c": [0, 0, 1], + }, + dtype=np.uint8, + ) + expected[["C"]] = df[["C"]] + cols = ["from_A_a", "from_A_b", "from_B_b", "from_B_c"] + expected = expected[["C"] + cols] + + typ = SparseArray if sparse else Series + expected[cols] = expected[cols].apply(lambda x: typ(x)) + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_prefix_str(self, df, sparse): + # not that you should do this... + result = get_dummies(df, prefix="bad", sparse=sparse) + bad_columns = ["bad_a", "bad_b", "bad_b", "bad_c"] + expected = DataFrame( + [[1, 1, 0, 1, 0], [2, 0, 1, 1, 0], [3, 1, 0, 0, 1]], + columns=["C"] + bad_columns, + dtype=np.uint8, + ) + expected = expected.astype({"C": np.int64}) + if sparse: + # work around astyping & assigning with duplicate columns + # https://github.com/pandas-dev/pandas/issues/14427 + expected = pd.concat( + [ + Series([1, 2, 3], name="C"), + Series([1, 0, 1], name="bad_a", dtype="Sparse[uint8]"), + Series([0, 1, 0], name="bad_b", dtype="Sparse[uint8]"), + Series([1, 1, 0], name="bad_b", dtype="Sparse[uint8]"), + Series([0, 0, 1], name="bad_c", dtype="Sparse[uint8]"), + ], + axis=1, + ) + + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_subset(self, df, sparse): + result = get_dummies(df, prefix=["from_A"], columns=["A"], sparse=sparse) + expected = DataFrame( + { + "B": ["b", "b", "c"], + "C": [1, 2, 3], + "from_A_a": [1, 0, 1], + "from_A_b": [0, 1, 0], + }, + ) + cols = expected.columns + expected[cols[1:]] = expected[cols[1:]].astype(np.uint8) + expected[["C"]] = df[["C"]] + if sparse: + cols = ["from_A_a", "from_A_b"] + expected[cols] = expected[cols].astype(SparseDtype("uint8", 0)) + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_prefix_sep(self, df, sparse): + result = get_dummies(df, prefix_sep="..", sparse=sparse) + expected = DataFrame( + { + "C": [1, 2, 3], + "A..a": [1, 0, 1], + "A..b": [0, 1, 0], + "B..b": [1, 1, 0], + "B..c": [0, 0, 1], + }, + dtype=np.uint8, + ) + expected[["C"]] = df[["C"]] + expected = expected[["C", "A..a", "A..b", "B..b", "B..c"]] + if sparse: + cols = ["A..a", "A..b", "B..b", "B..c"] + expected[cols] = expected[cols].astype(SparseDtype("uint8", 0)) + + tm.assert_frame_equal(result, expected) + + result = get_dummies(df, prefix_sep=["..", "__"], sparse=sparse) + expected = expected.rename(columns={"B..b": "B__b", "B..c": "B__c"}) + tm.assert_frame_equal(result, expected) + + result = get_dummies(df, prefix_sep={"A": "..", "B": "__"}, sparse=sparse) + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_prefix_bad_length(self, df, sparse): + msg = re.escape( + "Length of 'prefix' (1) did not match the length of the columns being " + "encoded (2)" + ) + with pytest.raises(ValueError, match=msg): + get_dummies(df, prefix=["too few"], sparse=sparse) + + def test_dataframe_dummies_prefix_sep_bad_length(self, df, sparse): + msg = re.escape( + "Length of 'prefix_sep' (1) did not match the length of the columns being " + "encoded (2)" + ) + with pytest.raises(ValueError, match=msg): + get_dummies(df, prefix_sep=["bad"], sparse=sparse) + + def test_dataframe_dummies_prefix_dict(self, sparse): + prefixes = {"A": "from_A", "B": "from_B"} + df = DataFrame({"C": [1, 2, 3], "A": ["a", "b", "a"], "B": ["b", "b", "c"]}) + result = get_dummies(df, prefix=prefixes, sparse=sparse) + + expected = DataFrame( + { + "C": [1, 2, 3], + "from_A_a": [1, 0, 1], + "from_A_b": [0, 1, 0], + "from_B_b": [1, 1, 0], + "from_B_c": [0, 0, 1], + } + ) + + columns = ["from_A_a", "from_A_b", "from_B_b", "from_B_c"] + expected[columns] = expected[columns].astype(np.uint8) + if sparse: + expected[columns] = expected[columns].astype(SparseDtype("uint8", 0)) + + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_with_na(self, df, sparse, dtype): + df.loc[3, :] = [np.nan, np.nan, np.nan] + result = get_dummies(df, dummy_na=True, sparse=sparse, dtype=dtype).sort_index( + axis=1 + ) + + if sparse: + arr = SparseArray + typ = SparseDtype(dtype, 0) + else: + arr = np.array + typ = dtype + + expected = DataFrame( + { + "C": [1, 2, 3, np.nan], + "A_a": arr([1, 0, 1, 0], dtype=typ), + "A_b": arr([0, 1, 0, 0], dtype=typ), + "A_nan": arr([0, 0, 0, 1], dtype=typ), + "B_b": arr([1, 1, 0, 0], dtype=typ), + "B_c": arr([0, 0, 1, 0], dtype=typ), + "B_nan": arr([0, 0, 0, 1], dtype=typ), + } + ).sort_index(axis=1) + + tm.assert_frame_equal(result, expected) + + result = get_dummies(df, dummy_na=False, sparse=sparse, dtype=dtype) + expected = expected[["C", "A_a", "A_b", "B_b", "B_c"]] + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_with_categorical(self, df, sparse, dtype): + df["cat"] = Categorical(["x", "y", "y"]) + result = get_dummies(df, sparse=sparse, dtype=dtype).sort_index(axis=1) + if sparse: + arr = SparseArray + typ = SparseDtype(dtype, 0) + else: + arr = np.array + typ = dtype + + expected = DataFrame( + { + "C": [1, 2, 3], + "A_a": arr([1, 0, 1], dtype=typ), + "A_b": arr([0, 1, 0], dtype=typ), + "B_b": arr([1, 1, 0], dtype=typ), + "B_c": arr([0, 0, 1], dtype=typ), + "cat_x": arr([1, 0, 0], dtype=typ), + "cat_y": arr([0, 1, 1], dtype=typ), + } + ).sort_index(axis=1) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "get_dummies_kwargs,expected", + [ + ( + {"data": DataFrame({"ä": ["a"]})}, + DataFrame({"ä_a": [1]}, dtype=np.uint8), + ), + ( + {"data": DataFrame({"x": ["ä"]})}, + DataFrame({"x_ä": [1]}, dtype=np.uint8), + ), + ( + {"data": DataFrame({"x": ["a"]}), "prefix": "ä"}, + DataFrame({"ä_a": [1]}, dtype=np.uint8), + ), + ( + {"data": DataFrame({"x": ["a"]}), "prefix_sep": "ä"}, + DataFrame({"xäa": [1]}, dtype=np.uint8), + ), + ], + ) + def test_dataframe_dummies_unicode(self, get_dummies_kwargs, expected): + # GH22084 get_dummies incorrectly encodes unicode characters + # in dataframe column names + result = get_dummies(**get_dummies_kwargs) + tm.assert_frame_equal(result, expected) + + def test_get_dummies_basic_drop_first(self, sparse): + # GH12402 Add a new parameter `drop_first` to avoid collinearity + # Basic case + s_list = list("abc") + s_series = Series(s_list) + s_series_index = Series(s_list, list("ABC")) + + expected = DataFrame({"b": [0, 1, 0], "c": [0, 0, 1]}, dtype=np.uint8) + + result = get_dummies(s_list, drop_first=True, sparse=sparse) + if sparse: + expected = expected.apply(SparseArray, fill_value=0) + tm.assert_frame_equal(result, expected) + + result = get_dummies(s_series, drop_first=True, sparse=sparse) + tm.assert_frame_equal(result, expected) + + expected.index = list("ABC") + result = get_dummies(s_series_index, drop_first=True, sparse=sparse) + tm.assert_frame_equal(result, expected) + + def test_get_dummies_basic_drop_first_one_level(self, sparse): + # Test the case that categorical variable only has one level. + s_list = list("aaa") + s_series = Series(s_list) + s_series_index = Series(s_list, list("ABC")) + + expected = DataFrame(index=np.arange(3)) + + result = get_dummies(s_list, drop_first=True, sparse=sparse) + tm.assert_frame_equal(result, expected) + + result = get_dummies(s_series, drop_first=True, sparse=sparse) + tm.assert_frame_equal(result, expected) + + expected = DataFrame(index=list("ABC")) + result = get_dummies(s_series_index, drop_first=True, sparse=sparse) + tm.assert_frame_equal(result, expected) + + def test_get_dummies_basic_drop_first_NA(self, sparse): + # Test NA handling together with drop_first + s_NA = ["a", "b", np.nan] + res = get_dummies(s_NA, drop_first=True, sparse=sparse) + exp = DataFrame({"b": [0, 1, 0]}, dtype=np.uint8) + if sparse: + exp = exp.apply(SparseArray, fill_value=0) + + tm.assert_frame_equal(res, exp) + + res_na = get_dummies(s_NA, dummy_na=True, drop_first=True, sparse=sparse) + exp_na = DataFrame({"b": [0, 1, 0], np.nan: [0, 0, 1]}, dtype=np.uint8).reindex( + ["b", np.nan], axis=1 + ) + if sparse: + exp_na = exp_na.apply(SparseArray, fill_value=0) + tm.assert_frame_equal(res_na, exp_na) + + res_just_na = get_dummies( + [np.nan], dummy_na=True, drop_first=True, sparse=sparse + ) + exp_just_na = DataFrame(index=np.arange(1)) + tm.assert_frame_equal(res_just_na, exp_just_na) + + def test_dataframe_dummies_drop_first(self, df, sparse): + df = df[["A", "B"]] + result = get_dummies(df, drop_first=True, sparse=sparse) + expected = DataFrame({"A_b": [0, 1, 0], "B_c": [0, 0, 1]}, dtype=np.uint8) + if sparse: + expected = expected.apply(SparseArray, fill_value=0) + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_drop_first_with_categorical(self, df, sparse, dtype): + df["cat"] = Categorical(["x", "y", "y"]) + result = get_dummies(df, drop_first=True, sparse=sparse) + expected = DataFrame( + {"C": [1, 2, 3], "A_b": [0, 1, 0], "B_c": [0, 0, 1], "cat_y": [0, 1, 1]} + ) + cols = ["A_b", "B_c", "cat_y"] + expected[cols] = expected[cols].astype(np.uint8) + expected = expected[["C", "A_b", "B_c", "cat_y"]] + if sparse: + for col in cols: + expected[col] = SparseArray(expected[col]) + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_drop_first_with_na(self, df, sparse): + df.loc[3, :] = [np.nan, np.nan, np.nan] + result = get_dummies( + df, dummy_na=True, drop_first=True, sparse=sparse + ).sort_index(axis=1) + expected = DataFrame( + { + "C": [1, 2, 3, np.nan], + "A_b": [0, 1, 0, 0], + "A_nan": [0, 0, 0, 1], + "B_c": [0, 0, 1, 0], + "B_nan": [0, 0, 0, 1], + } + ) + cols = ["A_b", "A_nan", "B_c", "B_nan"] + expected[cols] = expected[cols].astype(np.uint8) + expected = expected.sort_index(axis=1) + if sparse: + for col in cols: + expected[col] = SparseArray(expected[col]) + + tm.assert_frame_equal(result, expected) + + result = get_dummies(df, dummy_na=False, drop_first=True, sparse=sparse) + expected = expected[["C", "A_b", "B_c"]] + tm.assert_frame_equal(result, expected) + + def test_get_dummies_int_int(self): + data = Series([1, 2, 1]) + result = get_dummies(data) + expected = DataFrame([[1, 0], [0, 1], [1, 0]], columns=[1, 2], dtype=np.uint8) + tm.assert_frame_equal(result, expected) + + data = Series(Categorical(["a", "b", "a"])) + result = get_dummies(data) + expected = DataFrame( + [[1, 0], [0, 1], [1, 0]], columns=Categorical(["a", "b"]), dtype=np.uint8 + ) + tm.assert_frame_equal(result, expected) + + def test_get_dummies_int_df(self, dtype): + data = DataFrame( + { + "A": [1, 2, 1], + "B": Categorical(["a", "b", "a"]), + "C": [1, 2, 1], + "D": [1.0, 2.0, 1.0], + } + ) + columns = ["C", "D", "A_1", "A_2", "B_a", "B_b"] + expected = DataFrame( + [[1, 1.0, 1, 0, 1, 0], [2, 2.0, 0, 1, 0, 1], [1, 1.0, 1, 0, 1, 0]], + columns=columns, + ) + expected[columns[2:]] = expected[columns[2:]].astype(dtype) + result = get_dummies(data, columns=["A", "B"], dtype=dtype) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("ordered", [True, False]) + def test_dataframe_dummies_preserve_categorical_dtype(self, dtype, ordered): + # GH13854 + cat = Categorical(list("xy"), categories=list("xyz"), ordered=ordered) + result = get_dummies(cat, dtype=dtype) + + data = np.array([[1, 0, 0], [0, 1, 0]], dtype=self.effective_dtype(dtype)) + cols = CategoricalIndex( + cat.categories, categories=cat.categories, ordered=ordered + ) + expected = DataFrame(data, columns=cols, dtype=self.effective_dtype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("sparse", [True, False]) + def test_get_dummies_dont_sparsify_all_columns(self, sparse): + # GH18914 + df = DataFrame.from_dict({"GDP": [1, 2], "Nation": ["AB", "CD"]}) + df = get_dummies(df, columns=["Nation"], sparse=sparse) + df2 = df.reindex(columns=["GDP"]) + + tm.assert_frame_equal(df[["GDP"]], df2) + + def test_get_dummies_duplicate_columns(self, df): + # GH20839 + df.columns = ["A", "A", "A"] + result = get_dummies(df).sort_index(axis=1) + + expected = DataFrame( + [[1, 1, 0, 1, 0], [2, 0, 1, 1, 0], [3, 1, 0, 0, 1]], + columns=["A", "A_a", "A_b", "A_b", "A_c"], + dtype=np.uint8, + ).sort_index(axis=1) + + expected = expected.astype({"A": np.int64}) + + tm.assert_frame_equal(result, expected) + + def test_get_dummies_all_sparse(self): + df = DataFrame({"A": [1, 2]}) + result = get_dummies(df, columns=["A"], sparse=True) + dtype = SparseDtype("uint8", 0) + expected = DataFrame( + { + "A_1": SparseArray([1, 0], dtype=dtype), + "A_2": SparseArray([0, 1], dtype=dtype), + } + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("values", ["baz"]) + def test_get_dummies_with_string_values(self, values): + # issue #28383 + df = DataFrame( + { + "bar": [1, 2, 3, 4, 5, 6], + "foo": ["one", "one", "one", "two", "two", "two"], + "baz": ["A", "B", "C", "A", "B", "C"], + "zoo": ["x", "y", "z", "q", "w", "t"], + } + ) + + msg = "Input must be a list-like for parameter `columns`" + + with pytest.raises(TypeError, match=msg): + get_dummies(df, columns=values) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_melt.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_melt.py new file mode 100644 index 0000000..8a83cdc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_melt.py @@ -0,0 +1,1082 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + lreshape, + melt, + wide_to_long, +) +import pandas._testing as tm + + +class TestMelt: + def setup_method(self, method): + self.df = tm.makeTimeDataFrame()[:10] + self.df["id1"] = (self.df["A"] > 0).astype(np.int64) + self.df["id2"] = (self.df["B"] > 0).astype(np.int64) + + self.var_name = "var" + self.value_name = "val" + + self.df1 = DataFrame( + [ + [1.067683, -1.110463, 0.20867], + [-1.321405, 0.368915, -1.055342], + [-0.807333, 0.08298, -0.873361], + ] + ) + self.df1.columns = [list("ABC"), list("abc")] + self.df1.columns.names = ["CAP", "low"] + + def test_top_level_method(self): + result = melt(self.df) + assert result.columns.tolist() == ["variable", "value"] + + def test_method_signatures(self): + tm.assert_frame_equal(self.df.melt(), melt(self.df)) + + tm.assert_frame_equal( + self.df.melt(id_vars=["id1", "id2"], value_vars=["A", "B"]), + melt(self.df, id_vars=["id1", "id2"], value_vars=["A", "B"]), + ) + + tm.assert_frame_equal( + self.df.melt(var_name=self.var_name, value_name=self.value_name), + melt(self.df, var_name=self.var_name, value_name=self.value_name), + ) + + tm.assert_frame_equal(self.df1.melt(col_level=0), melt(self.df1, col_level=0)) + + def test_default_col_names(self): + result = self.df.melt() + assert result.columns.tolist() == ["variable", "value"] + + result1 = self.df.melt(id_vars=["id1"]) + assert result1.columns.tolist() == ["id1", "variable", "value"] + + result2 = self.df.melt(id_vars=["id1", "id2"]) + assert result2.columns.tolist() == ["id1", "id2", "variable", "value"] + + def test_value_vars(self): + result3 = self.df.melt(id_vars=["id1", "id2"], value_vars="A") + assert len(result3) == 10 + + result4 = self.df.melt(id_vars=["id1", "id2"], value_vars=["A", "B"]) + expected4 = DataFrame( + { + "id1": self.df["id1"].tolist() * 2, + "id2": self.df["id2"].tolist() * 2, + "variable": ["A"] * 10 + ["B"] * 10, + "value": (self.df["A"].tolist() + self.df["B"].tolist()), + }, + columns=["id1", "id2", "variable", "value"], + ) + tm.assert_frame_equal(result4, expected4) + + def test_value_vars_types(self): + # GH 15348 + expected = DataFrame( + { + "id1": self.df["id1"].tolist() * 2, + "id2": self.df["id2"].tolist() * 2, + "variable": ["A"] * 10 + ["B"] * 10, + "value": (self.df["A"].tolist() + self.df["B"].tolist()), + }, + columns=["id1", "id2", "variable", "value"], + ) + + for type_ in (tuple, list, np.array): + result = self.df.melt(id_vars=["id1", "id2"], value_vars=type_(("A", "B"))) + tm.assert_frame_equal(result, expected) + + def test_vars_work_with_multiindex(self): + expected = DataFrame( + { + ("A", "a"): self.df1[("A", "a")], + "CAP": ["B"] * len(self.df1), + "low": ["b"] * len(self.df1), + "value": self.df1[("B", "b")], + }, + columns=[("A", "a"), "CAP", "low", "value"], + ) + + result = self.df1.melt(id_vars=[("A", "a")], value_vars=[("B", "b")]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "id_vars, value_vars, col_level, expected", + [ + ( + ["A"], + ["B"], + 0, + DataFrame( + { + "A": {0: 1.067683, 1: -1.321405, 2: -0.807333}, + "CAP": {0: "B", 1: "B", 2: "B"}, + "value": {0: -1.110463, 1: 0.368915, 2: 0.08298}, + } + ), + ), + ( + ["a"], + ["b"], + 1, + DataFrame( + { + "a": {0: 1.067683, 1: -1.321405, 2: -0.807333}, + "low": {0: "b", 1: "b", 2: "b"}, + "value": {0: -1.110463, 1: 0.368915, 2: 0.08298}, + } + ), + ), + ], + ) + def test_single_vars_work_with_multiindex( + self, id_vars, value_vars, col_level, expected + ): + result = self.df1.melt(id_vars, value_vars, col_level=col_level) + tm.assert_frame_equal(result, expected) + + def test_tuple_vars_fail_with_multiindex(self): + # melt should fail with an informative error message if + # the columns have a MultiIndex and a tuple is passed + # for id_vars or value_vars. + tuple_a = ("A", "a") + list_a = [tuple_a] + tuple_b = ("B", "b") + list_b = [tuple_b] + + msg = r"(id|value)_vars must be a list of tuples when columns are a MultiIndex" + for id_vars, value_vars in ( + (tuple_a, list_b), + (list_a, tuple_b), + (tuple_a, tuple_b), + ): + with pytest.raises(ValueError, match=msg): + self.df1.melt(id_vars=id_vars, value_vars=value_vars) + + def test_custom_var_name(self): + result5 = self.df.melt(var_name=self.var_name) + assert result5.columns.tolist() == ["var", "value"] + + result6 = self.df.melt(id_vars=["id1"], var_name=self.var_name) + assert result6.columns.tolist() == ["id1", "var", "value"] + + result7 = self.df.melt(id_vars=["id1", "id2"], var_name=self.var_name) + assert result7.columns.tolist() == ["id1", "id2", "var", "value"] + + result8 = self.df.melt( + id_vars=["id1", "id2"], value_vars="A", var_name=self.var_name + ) + assert result8.columns.tolist() == ["id1", "id2", "var", "value"] + + result9 = self.df.melt( + id_vars=["id1", "id2"], value_vars=["A", "B"], var_name=self.var_name + ) + expected9 = DataFrame( + { + "id1": self.df["id1"].tolist() * 2, + "id2": self.df["id2"].tolist() * 2, + self.var_name: ["A"] * 10 + ["B"] * 10, + "value": (self.df["A"].tolist() + self.df["B"].tolist()), + }, + columns=["id1", "id2", self.var_name, "value"], + ) + tm.assert_frame_equal(result9, expected9) + + def test_custom_value_name(self): + result10 = self.df.melt(value_name=self.value_name) + assert result10.columns.tolist() == ["variable", "val"] + + result11 = self.df.melt(id_vars=["id1"], value_name=self.value_name) + assert result11.columns.tolist() == ["id1", "variable", "val"] + + result12 = self.df.melt(id_vars=["id1", "id2"], value_name=self.value_name) + assert result12.columns.tolist() == ["id1", "id2", "variable", "val"] + + result13 = self.df.melt( + id_vars=["id1", "id2"], value_vars="A", value_name=self.value_name + ) + assert result13.columns.tolist() == ["id1", "id2", "variable", "val"] + + result14 = self.df.melt( + id_vars=["id1", "id2"], value_vars=["A", "B"], value_name=self.value_name + ) + expected14 = DataFrame( + { + "id1": self.df["id1"].tolist() * 2, + "id2": self.df["id2"].tolist() * 2, + "variable": ["A"] * 10 + ["B"] * 10, + self.value_name: (self.df["A"].tolist() + self.df["B"].tolist()), + }, + columns=["id1", "id2", "variable", self.value_name], + ) + tm.assert_frame_equal(result14, expected14) + + def test_custom_var_and_value_name(self): + + result15 = self.df.melt(var_name=self.var_name, value_name=self.value_name) + assert result15.columns.tolist() == ["var", "val"] + + result16 = self.df.melt( + id_vars=["id1"], var_name=self.var_name, value_name=self.value_name + ) + assert result16.columns.tolist() == ["id1", "var", "val"] + + result17 = self.df.melt( + id_vars=["id1", "id2"], var_name=self.var_name, value_name=self.value_name + ) + assert result17.columns.tolist() == ["id1", "id2", "var", "val"] + + result18 = self.df.melt( + id_vars=["id1", "id2"], + value_vars="A", + var_name=self.var_name, + value_name=self.value_name, + ) + assert result18.columns.tolist() == ["id1", "id2", "var", "val"] + + result19 = self.df.melt( + id_vars=["id1", "id2"], + value_vars=["A", "B"], + var_name=self.var_name, + value_name=self.value_name, + ) + expected19 = DataFrame( + { + "id1": self.df["id1"].tolist() * 2, + "id2": self.df["id2"].tolist() * 2, + self.var_name: ["A"] * 10 + ["B"] * 10, + self.value_name: (self.df["A"].tolist() + self.df["B"].tolist()), + }, + columns=["id1", "id2", self.var_name, self.value_name], + ) + tm.assert_frame_equal(result19, expected19) + + df20 = self.df.copy() + df20.columns.name = "foo" + result20 = df20.melt() + assert result20.columns.tolist() == ["foo", "value"] + + def test_col_level(self): + res1 = self.df1.melt(col_level=0) + res2 = self.df1.melt(col_level="CAP") + assert res1.columns.tolist() == ["CAP", "value"] + assert res2.columns.tolist() == ["CAP", "value"] + + def test_multiindex(self): + res = self.df1.melt() + assert res.columns.tolist() == ["CAP", "low", "value"] + + @pytest.mark.parametrize( + "col", + [ + pd.Series(pd.date_range("2010", periods=5, tz="US/Pacific")), + pd.Series(["a", "b", "c", "a", "d"], dtype="category"), + pd.Series([0, 1, 0, 0, 0]), + ], + ) + def test_pandas_dtypes(self, col): + # GH 15785 + df = DataFrame( + {"klass": range(5), "col": col, "attr1": [1, 0, 0, 0, 0], "attr2": col} + ) + expected_value = pd.concat([pd.Series([1, 0, 0, 0, 0]), col], ignore_index=True) + result = melt( + df, id_vars=["klass", "col"], var_name="attribute", value_name="value" + ) + expected = DataFrame( + { + 0: list(range(5)) * 2, + 1: pd.concat([col] * 2, ignore_index=True), + 2: ["attr1"] * 5 + ["attr2"] * 5, + 3: expected_value, + } + ) + expected.columns = ["klass", "col", "attribute", "value"] + tm.assert_frame_equal(result, expected) + + def test_preserve_category(self): + # GH 15853 + data = DataFrame({"A": [1, 2], "B": pd.Categorical(["X", "Y"])}) + result = melt(data, ["B"], ["A"]) + expected = DataFrame( + {"B": pd.Categorical(["X", "Y"]), "variable": ["A", "A"], "value": [1, 2]} + ) + + tm.assert_frame_equal(result, expected) + + def test_melt_missing_columns_raises(self): + # GH-23575 + # This test is to ensure that pandas raises an error if melting is + # attempted with column names absent from the dataframe + + # Generate data + df = DataFrame(np.random.randn(5, 4), columns=list("abcd")) + + # Try to melt with missing `value_vars` column name + msg = "The following '{Var}' are not present in the DataFrame: {Col}" + with pytest.raises( + KeyError, match=msg.format(Var="value_vars", Col="\\['C'\\]") + ): + df.melt(["a", "b"], ["C", "d"]) + + # Try to melt with missing `id_vars` column name + with pytest.raises(KeyError, match=msg.format(Var="id_vars", Col="\\['A'\\]")): + df.melt(["A", "b"], ["c", "d"]) + + # Multiple missing + with pytest.raises( + KeyError, + match=msg.format(Var="id_vars", Col="\\['not_here', 'or_there'\\]"), + ): + df.melt(["a", "b", "not_here", "or_there"], ["c", "d"]) + + # Multiindex melt fails if column is missing from multilevel melt + multi = df.copy() + multi.columns = [list("ABCD"), list("abcd")] + with pytest.raises(KeyError, match=msg.format(Var="id_vars", Col="\\['E'\\]")): + multi.melt([("E", "a")], [("B", "b")]) + # Multiindex fails if column is missing from single level melt + with pytest.raises( + KeyError, match=msg.format(Var="value_vars", Col="\\['F'\\]") + ): + multi.melt(["A"], ["F"], col_level=0) + + def test_melt_mixed_int_str_id_vars(self): + # GH 29718 + df = DataFrame({0: ["foo"], "a": ["bar"], "b": [1], "d": [2]}) + result = melt(df, id_vars=[0, "a"], value_vars=["b", "d"]) + expected = DataFrame( + {0: ["foo"] * 2, "a": ["bar"] * 2, "variable": list("bd"), "value": [1, 2]} + ) + tm.assert_frame_equal(result, expected) + + def test_melt_mixed_int_str_value_vars(self): + # GH 29718 + df = DataFrame({0: ["foo"], "a": ["bar"]}) + result = melt(df, value_vars=[0, "a"]) + expected = DataFrame({"variable": [0, "a"], "value": ["foo", "bar"]}) + tm.assert_frame_equal(result, expected) + + def test_ignore_index(self): + # GH 17440 + df = DataFrame({"foo": [0], "bar": [1]}, index=["first"]) + result = melt(df, ignore_index=False) + expected = DataFrame( + {"variable": ["foo", "bar"], "value": [0, 1]}, index=["first", "first"] + ) + tm.assert_frame_equal(result, expected) + + def test_ignore_multiindex(self): + # GH 17440 + index = pd.MultiIndex.from_tuples( + [("first", "second"), ("first", "third")], names=["baz", "foobar"] + ) + df = DataFrame({"foo": [0, 1], "bar": [2, 3]}, index=index) + result = melt(df, ignore_index=False) + + expected_index = pd.MultiIndex.from_tuples( + [("first", "second"), ("first", "third")] * 2, names=["baz", "foobar"] + ) + expected = DataFrame( + {"variable": ["foo"] * 2 + ["bar"] * 2, "value": [0, 1, 2, 3]}, + index=expected_index, + ) + + tm.assert_frame_equal(result, expected) + + def test_ignore_index_name_and_type(self): + # GH 17440 + index = pd.Index(["foo", "bar"], dtype="category", name="baz") + df = DataFrame({"x": [0, 1], "y": [2, 3]}, index=index) + result = melt(df, ignore_index=False) + + expected_index = pd.Index(["foo", "bar"] * 2, dtype="category", name="baz") + expected = DataFrame( + {"variable": ["x", "x", "y", "y"], "value": [0, 1, 2, 3]}, + index=expected_index, + ) + + tm.assert_frame_equal(result, expected) + + def test_melt_with_duplicate_columns(self): + # GH#41951 + df = DataFrame([["id", 2, 3]], columns=["a", "b", "b"]) + result = df.melt(id_vars=["a"], value_vars=["b"]) + expected = DataFrame( + [["id", "b", 2], ["id", "b", 3]], columns=["a", "variable", "value"] + ) + tm.assert_frame_equal(result, expected) + + +class TestLreshape: + def test_pairs(self): + data = { + "birthdt": [ + "08jan2009", + "20dec2008", + "30dec2008", + "21dec2008", + "11jan2009", + ], + "birthwt": [1766, 3301, 1454, 3139, 4133], + "id": [101, 102, 103, 104, 105], + "sex": ["Male", "Female", "Female", "Female", "Female"], + "visitdt1": [ + "11jan2009", + "22dec2008", + "04jan2009", + "29dec2008", + "20jan2009", + ], + "visitdt2": ["21jan2009", np.nan, "22jan2009", "31dec2008", "03feb2009"], + "visitdt3": ["05feb2009", np.nan, np.nan, "02jan2009", "15feb2009"], + "wt1": [1823, 3338, 1549, 3298, 4306], + "wt2": [2011.0, np.nan, 1892.0, 3338.0, 4575.0], + "wt3": [2293.0, np.nan, np.nan, 3377.0, 4805.0], + } + + df = DataFrame(data) + + spec = { + "visitdt": [f"visitdt{i:d}" for i in range(1, 4)], + "wt": [f"wt{i:d}" for i in range(1, 4)], + } + result = lreshape(df, spec) + + exp_data = { + "birthdt": [ + "08jan2009", + "20dec2008", + "30dec2008", + "21dec2008", + "11jan2009", + "08jan2009", + "30dec2008", + "21dec2008", + "11jan2009", + "08jan2009", + "21dec2008", + "11jan2009", + ], + "birthwt": [ + 1766, + 3301, + 1454, + 3139, + 4133, + 1766, + 1454, + 3139, + 4133, + 1766, + 3139, + 4133, + ], + "id": [101, 102, 103, 104, 105, 101, 103, 104, 105, 101, 104, 105], + "sex": [ + "Male", + "Female", + "Female", + "Female", + "Female", + "Male", + "Female", + "Female", + "Female", + "Male", + "Female", + "Female", + ], + "visitdt": [ + "11jan2009", + "22dec2008", + "04jan2009", + "29dec2008", + "20jan2009", + "21jan2009", + "22jan2009", + "31dec2008", + "03feb2009", + "05feb2009", + "02jan2009", + "15feb2009", + ], + "wt": [ + 1823.0, + 3338.0, + 1549.0, + 3298.0, + 4306.0, + 2011.0, + 1892.0, + 3338.0, + 4575.0, + 2293.0, + 3377.0, + 4805.0, + ], + } + exp = DataFrame(exp_data, columns=result.columns) + tm.assert_frame_equal(result, exp) + + result = lreshape(df, spec, dropna=False) + exp_data = { + "birthdt": [ + "08jan2009", + "20dec2008", + "30dec2008", + "21dec2008", + "11jan2009", + "08jan2009", + "20dec2008", + "30dec2008", + "21dec2008", + "11jan2009", + "08jan2009", + "20dec2008", + "30dec2008", + "21dec2008", + "11jan2009", + ], + "birthwt": [ + 1766, + 3301, + 1454, + 3139, + 4133, + 1766, + 3301, + 1454, + 3139, + 4133, + 1766, + 3301, + 1454, + 3139, + 4133, + ], + "id": [ + 101, + 102, + 103, + 104, + 105, + 101, + 102, + 103, + 104, + 105, + 101, + 102, + 103, + 104, + 105, + ], + "sex": [ + "Male", + "Female", + "Female", + "Female", + "Female", + "Male", + "Female", + "Female", + "Female", + "Female", + "Male", + "Female", + "Female", + "Female", + "Female", + ], + "visitdt": [ + "11jan2009", + "22dec2008", + "04jan2009", + "29dec2008", + "20jan2009", + "21jan2009", + np.nan, + "22jan2009", + "31dec2008", + "03feb2009", + "05feb2009", + np.nan, + np.nan, + "02jan2009", + "15feb2009", + ], + "wt": [ + 1823.0, + 3338.0, + 1549.0, + 3298.0, + 4306.0, + 2011.0, + np.nan, + 1892.0, + 3338.0, + 4575.0, + 2293.0, + np.nan, + np.nan, + 3377.0, + 4805.0, + ], + } + exp = DataFrame(exp_data, columns=result.columns) + tm.assert_frame_equal(result, exp) + + with tm.assert_produces_warning(FutureWarning): + result = lreshape(df, spec, dropna=False, label="foo") + + spec = { + "visitdt": [f"visitdt{i:d}" for i in range(1, 3)], + "wt": [f"wt{i:d}" for i in range(1, 4)], + } + msg = "All column lists must be same length" + with pytest.raises(ValueError, match=msg): + lreshape(df, spec) + + +class TestWideToLong: + def test_simple(self): + np.random.seed(123) + x = np.random.randn(3) + df = DataFrame( + { + "A1970": {0: "a", 1: "b", 2: "c"}, + "A1980": {0: "d", 1: "e", 2: "f"}, + "B1970": {0: 2.5, 1: 1.2, 2: 0.7}, + "B1980": {0: 3.2, 1: 1.3, 2: 0.1}, + "X": dict(zip(range(3), x)), + } + ) + df["id"] = df.index + exp_data = { + "X": x.tolist() + x.tolist(), + "A": ["a", "b", "c", "d", "e", "f"], + "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2], + } + expected = DataFrame(exp_data) + expected = expected.set_index(["id", "year"])[["X", "A", "B"]] + result = wide_to_long(df, ["A", "B"], i="id", j="year") + tm.assert_frame_equal(result, expected) + + def test_stubs(self): + # GH9204 wide_to_long call should not modify 'stubs' list + df = DataFrame([[0, 1, 2, 3, 8], [4, 5, 6, 7, 9]]) + df.columns = ["id", "inc1", "inc2", "edu1", "edu2"] + stubs = ["inc", "edu"] + + wide_to_long(df, stubs, i="id", j="age") + + assert stubs == ["inc", "edu"] + + def test_separating_character(self): + # GH14779 + np.random.seed(123) + x = np.random.randn(3) + df = DataFrame( + { + "A.1970": {0: "a", 1: "b", 2: "c"}, + "A.1980": {0: "d", 1: "e", 2: "f"}, + "B.1970": {0: 2.5, 1: 1.2, 2: 0.7}, + "B.1980": {0: 3.2, 1: 1.3, 2: 0.1}, + "X": dict(zip(range(3), x)), + } + ) + df["id"] = df.index + exp_data = { + "X": x.tolist() + x.tolist(), + "A": ["a", "b", "c", "d", "e", "f"], + "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2], + } + expected = DataFrame(exp_data) + expected = expected.set_index(["id", "year"])[["X", "A", "B"]] + result = wide_to_long(df, ["A", "B"], i="id", j="year", sep=".") + tm.assert_frame_equal(result, expected) + + def test_escapable_characters(self): + np.random.seed(123) + x = np.random.randn(3) + df = DataFrame( + { + "A(quarterly)1970": {0: "a", 1: "b", 2: "c"}, + "A(quarterly)1980": {0: "d", 1: "e", 2: "f"}, + "B(quarterly)1970": {0: 2.5, 1: 1.2, 2: 0.7}, + "B(quarterly)1980": {0: 3.2, 1: 1.3, 2: 0.1}, + "X": dict(zip(range(3), x)), + } + ) + df["id"] = df.index + exp_data = { + "X": x.tolist() + x.tolist(), + "A(quarterly)": ["a", "b", "c", "d", "e", "f"], + "B(quarterly)": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2], + } + expected = DataFrame(exp_data) + expected = expected.set_index(["id", "year"])[ + ["X", "A(quarterly)", "B(quarterly)"] + ] + result = wide_to_long(df, ["A(quarterly)", "B(quarterly)"], i="id", j="year") + tm.assert_frame_equal(result, expected) + + def test_unbalanced(self): + # test that we can have a varying amount of time variables + df = DataFrame( + { + "A2010": [1.0, 2.0], + "A2011": [3.0, 4.0], + "B2010": [5.0, 6.0], + "X": ["X1", "X2"], + } + ) + df["id"] = df.index + exp_data = { + "X": ["X1", "X2", "X1", "X2"], + "A": [1.0, 2.0, 3.0, 4.0], + "B": [5.0, 6.0, np.nan, np.nan], + "id": [0, 1, 0, 1], + "year": [2010, 2010, 2011, 2011], + } + expected = DataFrame(exp_data) + expected = expected.set_index(["id", "year"])[["X", "A", "B"]] + result = wide_to_long(df, ["A", "B"], i="id", j="year") + tm.assert_frame_equal(result, expected) + + def test_character_overlap(self): + # Test we handle overlapping characters in both id_vars and value_vars + df = DataFrame( + { + "A11": ["a11", "a22", "a33"], + "A12": ["a21", "a22", "a23"], + "B11": ["b11", "b12", "b13"], + "B12": ["b21", "b22", "b23"], + "BB11": [1, 2, 3], + "BB12": [4, 5, 6], + "BBBX": [91, 92, 93], + "BBBZ": [91, 92, 93], + } + ) + df["id"] = df.index + expected = DataFrame( + { + "BBBX": [91, 92, 93, 91, 92, 93], + "BBBZ": [91, 92, 93, 91, 92, 93], + "A": ["a11", "a22", "a33", "a21", "a22", "a23"], + "B": ["b11", "b12", "b13", "b21", "b22", "b23"], + "BB": [1, 2, 3, 4, 5, 6], + "id": [0, 1, 2, 0, 1, 2], + "year": [11, 11, 11, 12, 12, 12], + } + ) + expected = expected.set_index(["id", "year"])[["BBBX", "BBBZ", "A", "B", "BB"]] + result = wide_to_long(df, ["A", "B", "BB"], i="id", j="year") + tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1)) + + def test_invalid_separator(self): + # if an invalid separator is supplied a empty data frame is returned + sep = "nope!" + df = DataFrame( + { + "A2010": [1.0, 2.0], + "A2011": [3.0, 4.0], + "B2010": [5.0, 6.0], + "X": ["X1", "X2"], + } + ) + df["id"] = df.index + exp_data = { + "X": "", + "A2010": [], + "A2011": [], + "B2010": [], + "id": [], + "year": [], + "A": [], + "B": [], + } + expected = DataFrame(exp_data).astype({"year": "int"}) + expected = expected.set_index(["id", "year"])[ + ["X", "A2010", "A2011", "B2010", "A", "B"] + ] + expected.index = expected.index.set_levels([0, 1], level=0) + result = wide_to_long(df, ["A", "B"], i="id", j="year", sep=sep) + tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1)) + + def test_num_string_disambiguation(self): + # Test that we can disambiguate number value_vars from + # string value_vars + df = DataFrame( + { + "A11": ["a11", "a22", "a33"], + "A12": ["a21", "a22", "a23"], + "B11": ["b11", "b12", "b13"], + "B12": ["b21", "b22", "b23"], + "BB11": [1, 2, 3], + "BB12": [4, 5, 6], + "Arating": [91, 92, 93], + "Arating_old": [91, 92, 93], + } + ) + df["id"] = df.index + expected = DataFrame( + { + "Arating": [91, 92, 93, 91, 92, 93], + "Arating_old": [91, 92, 93, 91, 92, 93], + "A": ["a11", "a22", "a33", "a21", "a22", "a23"], + "B": ["b11", "b12", "b13", "b21", "b22", "b23"], + "BB": [1, 2, 3, 4, 5, 6], + "id": [0, 1, 2, 0, 1, 2], + "year": [11, 11, 11, 12, 12, 12], + } + ) + expected = expected.set_index(["id", "year"])[ + ["Arating", "Arating_old", "A", "B", "BB"] + ] + result = wide_to_long(df, ["A", "B", "BB"], i="id", j="year") + tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1)) + + def test_invalid_suffixtype(self): + # If all stubs names end with a string, but a numeric suffix is + # assumed, an empty data frame is returned + df = DataFrame( + { + "Aone": [1.0, 2.0], + "Atwo": [3.0, 4.0], + "Bone": [5.0, 6.0], + "X": ["X1", "X2"], + } + ) + df["id"] = df.index + exp_data = { + "X": "", + "Aone": [], + "Atwo": [], + "Bone": [], + "id": [], + "year": [], + "A": [], + "B": [], + } + expected = DataFrame(exp_data).astype({"year": "int"}) + + expected = expected.set_index(["id", "year"]) + expected.index = expected.index.set_levels([0, 1], level=0) + result = wide_to_long(df, ["A", "B"], i="id", j="year") + tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1)) + + def test_multiple_id_columns(self): + # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm + df = DataFrame( + { + "famid": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "birth": [1, 2, 3, 1, 2, 3, 1, 2, 3], + "ht1": [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], + "ht2": [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9], + } + ) + expected = DataFrame( + { + "ht": [ + 2.8, + 3.4, + 2.9, + 3.8, + 2.2, + 2.9, + 2.0, + 3.2, + 1.8, + 2.8, + 1.9, + 2.4, + 2.2, + 3.3, + 2.3, + 3.4, + 2.1, + 2.9, + ], + "famid": [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3], + "birth": [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3], + "age": [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2], + } + ) + expected = expected.set_index(["famid", "birth", "age"])[["ht"]] + result = wide_to_long(df, "ht", i=["famid", "birth"], j="age") + tm.assert_frame_equal(result, expected) + + def test_non_unique_idvars(self): + # GH16382 + # Raise an error message if non unique id vars (i) are passed + df = DataFrame( + {"A_A1": [1, 2, 3, 4, 5], "B_B1": [1, 2, 3, 4, 5], "x": [1, 1, 1, 1, 1]} + ) + msg = "the id variables need to uniquely identify each row" + with pytest.raises(ValueError, match=msg): + wide_to_long(df, ["A_A", "B_B"], i="x", j="colname") + + def test_cast_j_int(self): + df = DataFrame( + { + "actor_1": ["CCH Pounder", "Johnny Depp", "Christoph Waltz"], + "actor_2": ["Joel David Moore", "Orlando Bloom", "Rory Kinnear"], + "actor_fb_likes_1": [1000.0, 40000.0, 11000.0], + "actor_fb_likes_2": [936.0, 5000.0, 393.0], + "title": ["Avatar", "Pirates of the Caribbean", "Spectre"], + } + ) + + expected = DataFrame( + { + "actor": [ + "CCH Pounder", + "Johnny Depp", + "Christoph Waltz", + "Joel David Moore", + "Orlando Bloom", + "Rory Kinnear", + ], + "actor_fb_likes": [1000.0, 40000.0, 11000.0, 936.0, 5000.0, 393.0], + "num": [1, 1, 1, 2, 2, 2], + "title": [ + "Avatar", + "Pirates of the Caribbean", + "Spectre", + "Avatar", + "Pirates of the Caribbean", + "Spectre", + ], + } + ).set_index(["title", "num"]) + result = wide_to_long( + df, ["actor", "actor_fb_likes"], i="title", j="num", sep="_" + ) + + tm.assert_frame_equal(result, expected) + + def test_identical_stubnames(self): + df = DataFrame( + { + "A2010": [1.0, 2.0], + "A2011": [3.0, 4.0], + "B2010": [5.0, 6.0], + "A": ["X1", "X2"], + } + ) + msg = "stubname can't be identical to a column name" + with pytest.raises(ValueError, match=msg): + wide_to_long(df, ["A", "B"], i="A", j="colname") + + def test_nonnumeric_suffix(self): + df = DataFrame( + { + "treatment_placebo": [1.0, 2.0], + "treatment_test": [3.0, 4.0], + "result_placebo": [5.0, 6.0], + "A": ["X1", "X2"], + } + ) + expected = DataFrame( + { + "A": ["X1", "X2", "X1", "X2"], + "colname": ["placebo", "placebo", "test", "test"], + "result": [5.0, 6.0, np.nan, np.nan], + "treatment": [1.0, 2.0, 3.0, 4.0], + } + ) + expected = expected.set_index(["A", "colname"]) + result = wide_to_long( + df, ["result", "treatment"], i="A", j="colname", suffix="[a-z]+", sep="_" + ) + tm.assert_frame_equal(result, expected) + + def test_mixed_type_suffix(self): + df = DataFrame( + { + "A": ["X1", "X2"], + "result_1": [0, 9], + "result_foo": [5.0, 6.0], + "treatment_1": [1.0, 2.0], + "treatment_foo": [3.0, 4.0], + } + ) + expected = DataFrame( + { + "A": ["X1", "X2", "X1", "X2"], + "colname": ["1", "1", "foo", "foo"], + "result": [0.0, 9.0, 5.0, 6.0], + "treatment": [1.0, 2.0, 3.0, 4.0], + } + ).set_index(["A", "colname"]) + result = wide_to_long( + df, ["result", "treatment"], i="A", j="colname", suffix=".+", sep="_" + ) + tm.assert_frame_equal(result, expected) + + def test_float_suffix(self): + df = DataFrame( + { + "treatment_1.1": [1.0, 2.0], + "treatment_2.1": [3.0, 4.0], + "result_1.2": [5.0, 6.0], + "result_1": [0, 9], + "A": ["X1", "X2"], + } + ) + expected = DataFrame( + { + "A": ["X1", "X2", "X1", "X2", "X1", "X2", "X1", "X2"], + "colname": [1.2, 1.2, 1.0, 1.0, 1.1, 1.1, 2.1, 2.1], + "result": [5.0, 6.0, 0.0, 9.0, np.nan, np.nan, np.nan, np.nan], + "treatment": [np.nan, np.nan, np.nan, np.nan, 1.0, 2.0, 3.0, 4.0], + } + ) + expected = expected.set_index(["A", "colname"]) + result = wide_to_long( + df, ["result", "treatment"], i="A", j="colname", suffix="[0-9.]+", sep="_" + ) + tm.assert_frame_equal(result, expected) + + def test_col_substring_of_stubname(self): + # GH22468 + # Don't raise ValueError when a column name is a substring + # of a stubname that's been passed as a string + wide_data = { + "node_id": {0: 0, 1: 1, 2: 2, 3: 3, 4: 4}, + "A": {0: 0.80, 1: 0.0, 2: 0.25, 3: 1.0, 4: 0.81}, + "PA0": {0: 0.74, 1: 0.56, 2: 0.56, 3: 0.98, 4: 0.6}, + "PA1": {0: 0.77, 1: 0.64, 2: 0.52, 3: 0.98, 4: 0.67}, + "PA3": {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67}, + } + wide_df = DataFrame.from_dict(wide_data) + expected = wide_to_long(wide_df, stubnames=["PA"], i=["node_id", "A"], j="time") + result = wide_to_long(wide_df, stubnames="PA", i=["node_id", "A"], j="time") + tm.assert_frame_equal(result, expected) + + def test_warn_of_column_name_value(self): + # GH34731 + # raise a warning if the resultant value column name matches + # a name in the dataframe already (default name is "value") + df = DataFrame({"col": list("ABC"), "value": range(10, 16, 2)}) + expected = DataFrame( + [["A", "col", "A"], ["B", "col", "B"], ["C", "col", "C"]], + columns=["value", "variable", "value"], + ) + + with tm.assert_produces_warning(FutureWarning): + result = df.melt(id_vars="value") + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_pivot.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_pivot.py new file mode 100644 index 0000000..a023adf --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_pivot.py @@ -0,0 +1,2311 @@ +from datetime import ( + date, + datetime, + timedelta, +) +from itertools import product + +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Grouper, + Index, + MultiIndex, + Series, + concat, + date_range, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype as CDT +from pandas.core.reshape import reshape as reshape_lib +from pandas.core.reshape.pivot import pivot_table + + +@pytest.fixture(params=[True, False]) +def dropna(request): + return request.param + + +@pytest.fixture(params=[([0] * 4, [1] * 4), (range(0, 3), range(1, 4))]) +def interval_values(request, closed): + left, right = request.param + return Categorical(pd.IntervalIndex.from_arrays(left, right, closed)) + + +class TestPivotTable: + def setup_method(self, method): + self.data = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + def test_pivot_table(self, observed): + index = ["A", "B"] + columns = "C" + table = pivot_table( + self.data, values="D", index=index, columns=columns, observed=observed + ) + + table2 = self.data.pivot_table( + values="D", index=index, columns=columns, observed=observed + ) + tm.assert_frame_equal(table, table2) + + # this works + pivot_table(self.data, values="D", index=index, observed=observed) + + if len(index) > 1: + assert table.index.names == tuple(index) + else: + assert table.index.name == index[0] + + if len(columns) > 1: + assert table.columns.names == columns + else: + assert table.columns.name == columns[0] + + expected = self.data.groupby(index + [columns])["D"].agg(np.mean).unstack() + tm.assert_frame_equal(table, expected) + + def test_pivot_table_categorical_observed_equal(self, observed): + # issue #24923 + df = DataFrame( + {"col1": list("abcde"), "col2": list("fghij"), "col3": [1, 2, 3, 4, 5]} + ) + + expected = df.pivot_table( + index="col1", values="col3", columns="col2", aggfunc=np.sum, fill_value=0 + ) + + expected.index = expected.index.astype("category") + expected.columns = expected.columns.astype("category") + + df.col1 = df.col1.astype("category") + df.col2 = df.col2.astype("category") + + result = df.pivot_table( + index="col1", + values="col3", + columns="col2", + aggfunc=np.sum, + fill_value=0, + observed=observed, + ) + + tm.assert_frame_equal(result, expected) + + def test_pivot_table_nocols(self): + df = DataFrame( + {"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]} + ) + rs = df.pivot_table(columns="cols", aggfunc=np.sum) + xp = df.pivot_table(index="cols", aggfunc=np.sum).T + tm.assert_frame_equal(rs, xp) + + rs = df.pivot_table(columns="cols", aggfunc={"values": "mean"}) + xp = df.pivot_table(index="cols", aggfunc={"values": "mean"}).T + tm.assert_frame_equal(rs, xp) + + def test_pivot_table_dropna(self): + df = DataFrame( + { + "amount": {0: 60000, 1: 100000, 2: 50000, 3: 30000}, + "customer": {0: "A", 1: "A", 2: "B", 3: "C"}, + "month": {0: 201307, 1: 201309, 2: 201308, 3: 201310}, + "product": {0: "a", 1: "b", 2: "c", 3: "d"}, + "quantity": {0: 2000000, 1: 500000, 2: 1000000, 3: 1000000}, + } + ) + pv_col = df.pivot_table( + "quantity", "month", ["customer", "product"], dropna=False + ) + pv_ind = df.pivot_table( + "quantity", ["customer", "product"], "month", dropna=False + ) + + m = MultiIndex.from_tuples( + [ + ("A", "a"), + ("A", "b"), + ("A", "c"), + ("A", "d"), + ("B", "a"), + ("B", "b"), + ("B", "c"), + ("B", "d"), + ("C", "a"), + ("C", "b"), + ("C", "c"), + ("C", "d"), + ], + names=["customer", "product"], + ) + tm.assert_index_equal(pv_col.columns, m) + tm.assert_index_equal(pv_ind.index, m) + + def test_pivot_table_categorical(self): + + cat1 = Categorical( + ["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True + ) + cat2 = Categorical( + ["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True + ) + df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) + result = pivot_table(df, values="values", index=["A", "B"], dropna=True) + + exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"]) + expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index) + tm.assert_frame_equal(result, expected) + + def test_pivot_table_dropna_categoricals(self, dropna): + # GH 15193 + categories = ["a", "b", "c", "d"] + + df = DataFrame( + { + "A": ["a", "a", "a", "b", "b", "b", "c", "c", "c"], + "B": [1, 2, 3, 1, 2, 3, 1, 2, 3], + "C": range(0, 9), + } + ) + + df["A"] = df["A"].astype(CDT(categories, ordered=False)) + result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna) + expected_columns = Series(["a", "b", "c"], name="A") + expected_columns = expected_columns.astype(CDT(categories, ordered=False)) + expected_index = Series([1, 2, 3], name="B") + expected = DataFrame( + [[0, 3, 6], [1, 4, 7], [2, 5, 8]], + index=expected_index, + columns=expected_columns, + ) + if not dropna: + # add back the non observed to compare + expected = expected.reindex(columns=Categorical(categories)).astype("float") + + tm.assert_frame_equal(result, expected) + + def test_pivot_with_non_observable_dropna(self, dropna): + # gh-21133 + df = DataFrame( + { + "A": Categorical( + [np.nan, "low", "high", "low", "high"], + categories=["low", "high"], + ordered=True, + ), + "B": [0.0, 1.0, 2.0, 3.0, 4.0], + } + ) + + result = df.pivot_table(index="A", values="B", dropna=dropna) + expected = DataFrame( + {"B": [2.0, 3.0]}, + index=Index( + Categorical.from_codes( + [0, 1], categories=["low", "high"], ordered=True + ), + name="A", + ), + ) + + tm.assert_frame_equal(result, expected) + + # gh-21378 + df = DataFrame( + { + "A": Categorical( + ["left", "low", "high", "low", "high"], + categories=["low", "high", "left"], + ordered=True, + ), + "B": range(5), + } + ) + + result = df.pivot_table(index="A", values="B", dropna=dropna) + expected = DataFrame( + {"B": [2, 3, 0]}, + index=Index( + Categorical.from_codes( + [0, 1, 2], categories=["low", "high", "left"], ordered=True + ), + name="A", + ), + ) + if not dropna: + expected["B"] = expected["B"].astype(float) + + tm.assert_frame_equal(result, expected) + + def test_pivot_with_interval_index(self, interval_values, dropna): + # GH 25814 + df = DataFrame({"A": interval_values, "B": 1}) + result = df.pivot_table(index="A", values="B", dropna=dropna) + expected = DataFrame({"B": 1}, index=Index(interval_values.unique(), name="A")) + if not dropna: + expected = expected.astype(float) + tm.assert_frame_equal(result, expected) + + def test_pivot_with_interval_index_margins(self): + # GH 25815 + ordered_cat = pd.IntervalIndex.from_arrays([0, 0, 1, 1], [1, 1, 2, 2]) + df = DataFrame( + { + "A": np.arange(4, 0, -1, dtype=np.intp), + "B": ["a", "b", "a", "b"], + "C": Categorical(ordered_cat, ordered=True).sort_values( + ascending=False + ), + } + ) + + pivot_tab = pivot_table( + df, index="C", columns="B", values="A", aggfunc="sum", margins=True + ) + + result = pivot_tab["All"] + expected = Series( + [3, 7, 10], + index=Index([pd.Interval(0, 1), pd.Interval(1, 2), "All"], name="C"), + name="All", + dtype=np.intp, + ) + tm.assert_series_equal(result, expected) + + def test_pass_array(self): + result = self.data.pivot_table("D", index=self.data.A, columns=self.data.C) + expected = self.data.pivot_table("D", index="A", columns="C") + tm.assert_frame_equal(result, expected) + + def test_pass_function(self): + result = self.data.pivot_table("D", index=lambda x: x // 5, columns=self.data.C) + expected = self.data.pivot_table("D", index=self.data.index // 5, columns="C") + tm.assert_frame_equal(result, expected) + + def test_pivot_table_multiple(self): + index = ["A", "B"] + columns = "C" + table = pivot_table(self.data, index=index, columns=columns) + expected = self.data.groupby(index + [columns]).agg(np.mean).unstack() + tm.assert_frame_equal(table, expected) + + def test_pivot_dtypes(self): + + # can convert dtypes + f = DataFrame( + { + "a": ["cat", "bat", "cat", "bat"], + "v": [1, 2, 3, 4], + "i": ["a", "b", "a", "b"], + } + ) + assert f.dtypes["v"] == "int64" + + z = pivot_table( + f, values="v", index=["a"], columns=["i"], fill_value=0, aggfunc=np.sum + ) + result = z.dtypes + expected = Series([np.dtype("int64")] * 2, index=Index(list("ab"), name="i")) + tm.assert_series_equal(result, expected) + + # cannot convert dtypes + f = DataFrame( + { + "a": ["cat", "bat", "cat", "bat"], + "v": [1.5, 2.5, 3.5, 4.5], + "i": ["a", "b", "a", "b"], + } + ) + assert f.dtypes["v"] == "float64" + + z = pivot_table( + f, values="v", index=["a"], columns=["i"], fill_value=0, aggfunc=np.mean + ) + result = z.dtypes + expected = Series([np.dtype("float64")] * 2, index=Index(list("ab"), name="i")) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "columns,values", + [ + ("bool1", ["float1", "float2"]), + ("bool1", ["float1", "float2", "bool1"]), + ("bool2", ["float1", "float2", "bool1"]), + ], + ) + def test_pivot_preserve_dtypes(self, columns, values): + # GH 7142 regression test + v = np.arange(5, dtype=np.float64) + df = DataFrame( + {"float1": v, "float2": v + 2.0, "bool1": v <= 2, "bool2": v <= 3} + ) + + df_res = df.reset_index().pivot_table( + index="index", columns=columns, values=values + ) + + result = dict(df_res.dtypes) + expected = {col: np.dtype("float64") for col in df_res} + assert result == expected + + def test_pivot_no_values(self): + # GH 14380 + idx = pd.DatetimeIndex( + ["2011-01-01", "2011-02-01", "2011-01-02", "2011-01-01", "2011-01-02"] + ) + df = DataFrame({"A": [1, 2, 3, 4, 5]}, index=idx) + res = df.pivot_table(index=df.index.month, columns=df.index.day) + + exp_columns = MultiIndex.from_tuples([("A", 1), ("A", 2)]) + exp = DataFrame([[2.5, 4.0], [2.0, np.nan]], index=[1, 2], columns=exp_columns) + tm.assert_frame_equal(res, exp) + + df = DataFrame( + { + "A": [1, 2, 3, 4, 5], + "dt": date_range("2011-01-01", freq="D", periods=5), + }, + index=idx, + ) + res = df.pivot_table(index=df.index.month, columns=Grouper(key="dt", freq="M")) + exp_columns = MultiIndex.from_tuples([("A", pd.Timestamp("2011-01-31"))]) + exp_columns.names = [None, "dt"] + exp = DataFrame([3.25, 2.0], index=[1, 2], columns=exp_columns) + tm.assert_frame_equal(res, exp) + + res = df.pivot_table( + index=Grouper(freq="A"), columns=Grouper(key="dt", freq="M") + ) + exp = DataFrame( + [3], index=pd.DatetimeIndex(["2011-12-31"], freq="A"), columns=exp_columns + ) + tm.assert_frame_equal(res, exp) + + def test_pivot_multi_values(self): + result = pivot_table( + self.data, values=["D", "E"], index="A", columns=["B", "C"], fill_value=0 + ) + expected = pivot_table( + self.data.drop(["F"], axis=1), index="A", columns=["B", "C"], fill_value=0 + ) + tm.assert_frame_equal(result, expected) + + def test_pivot_multi_functions(self): + f = lambda func: pivot_table( + self.data, values=["D", "E"], index=["A", "B"], columns="C", aggfunc=func + ) + result = f([np.mean, np.std]) + means = f(np.mean) + stds = f(np.std) + expected = concat([means, stds], keys=["mean", "std"], axis=1) + tm.assert_frame_equal(result, expected) + + # margins not supported?? + f = lambda func: pivot_table( + self.data, + values=["D", "E"], + index=["A", "B"], + columns="C", + aggfunc=func, + margins=True, + ) + result = f([np.mean, np.std]) + means = f(np.mean) + stds = f(np.std) + expected = concat([means, stds], keys=["mean", "std"], axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_index_with_nan(self, method): + # GH 3588 + nan = np.nan + df = DataFrame( + { + "a": ["R1", "R2", nan, "R4"], + "b": ["C1", "C2", "C3", "C4"], + "c": [10, 15, 17, 20], + } + ) + if method: + result = df.pivot("a", "b", "c") + else: + result = pd.pivot(df, "a", "b", "c") + expected = DataFrame( + [ + [nan, nan, 17, nan], + [10, nan, nan, nan], + [nan, 15, nan, nan], + [nan, nan, nan, 20], + ], + index=Index([nan, "R1", "R2", "R4"], name="a"), + columns=Index(["C1", "C2", "C3", "C4"], name="b"), + ) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(df.pivot("b", "a", "c"), expected.T) + + # GH9491 + df = DataFrame( + { + "a": date_range("2014-02-01", periods=6, freq="D"), + "c": 100 + np.arange(6), + } + ) + df["b"] = df["a"] - pd.Timestamp("2014-02-02") + df.loc[1, "a"] = df.loc[3, "a"] = nan + df.loc[1, "b"] = df.loc[4, "b"] = nan + + if method: + pv = df.pivot("a", "b", "c") + else: + pv = pd.pivot(df, "a", "b", "c") + assert pv.notna().values.sum() == len(df) + + for _, row in df.iterrows(): + assert pv.loc[row["a"], row["b"]] == row["c"] + + if method: + result = df.pivot("b", "a", "c") + else: + result = pd.pivot(df, "b", "a", "c") + tm.assert_frame_equal(result, pv.T) + + @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_with_tz(self, method): + # GH 5878 + df = DataFrame( + { + "dt1": [ + datetime(2013, 1, 1, 9, 0), + datetime(2013, 1, 2, 9, 0), + datetime(2013, 1, 1, 9, 0), + datetime(2013, 1, 2, 9, 0), + ], + "dt2": [ + datetime(2014, 1, 1, 9, 0), + datetime(2014, 1, 1, 9, 0), + datetime(2014, 1, 2, 9, 0), + datetime(2014, 1, 2, 9, 0), + ], + "data1": np.arange(4, dtype="int64"), + "data2": np.arange(4, dtype="int64"), + } + ) + + df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d, tz="US/Pacific")) + df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d, tz="Asia/Tokyo")) + + exp_col1 = Index(["data1", "data1", "data2", "data2"]) + exp_col2 = pd.DatetimeIndex( + ["2014/01/01 09:00", "2014/01/02 09:00"] * 2, name="dt2", tz="Asia/Tokyo" + ) + exp_col = MultiIndex.from_arrays([exp_col1, exp_col2]) + expected = DataFrame( + [[0, 2, 0, 2], [1, 3, 1, 3]], + index=pd.DatetimeIndex( + ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" + ), + columns=exp_col, + ) + + if method: + pv = df.pivot(index="dt1", columns="dt2") + else: + pv = pd.pivot(df, index="dt1", columns="dt2") + tm.assert_frame_equal(pv, expected) + + expected = DataFrame( + [[0, 2], [1, 3]], + index=pd.DatetimeIndex( + ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" + ), + columns=pd.DatetimeIndex( + ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" + ), + ) + + if method: + pv = df.pivot(index="dt1", columns="dt2", values="data1") + else: + pv = pd.pivot(df, index="dt1", columns="dt2", values="data1") + tm.assert_frame_equal(pv, expected) + + def test_pivot_tz_in_values(self): + # GH 14948 + df = DataFrame( + [ + { + "uid": "aa", + "ts": pd.Timestamp("2016-08-12 13:00:00-0700", tz="US/Pacific"), + }, + { + "uid": "aa", + "ts": pd.Timestamp("2016-08-12 08:00:00-0700", tz="US/Pacific"), + }, + { + "uid": "aa", + "ts": pd.Timestamp("2016-08-12 14:00:00-0700", tz="US/Pacific"), + }, + { + "uid": "aa", + "ts": pd.Timestamp("2016-08-25 11:00:00-0700", tz="US/Pacific"), + }, + { + "uid": "aa", + "ts": pd.Timestamp("2016-08-25 13:00:00-0700", tz="US/Pacific"), + }, + ] + ) + + df = df.set_index("ts").reset_index() + mins = df.ts.map(lambda x: x.replace(hour=0, minute=0, second=0, microsecond=0)) + + result = pivot_table( + df.set_index("ts").reset_index(), + values="ts", + index=["uid"], + columns=[mins], + aggfunc=np.min, + ) + expected = DataFrame( + [ + [ + pd.Timestamp("2016-08-12 08:00:00-0700", tz="US/Pacific"), + pd.Timestamp("2016-08-25 11:00:00-0700", tz="US/Pacific"), + ] + ], + index=Index(["aa"], name="uid"), + columns=pd.DatetimeIndex( + [ + pd.Timestamp("2016-08-12 00:00:00", tz="US/Pacific"), + pd.Timestamp("2016-08-25 00:00:00", tz="US/Pacific"), + ], + name="ts", + ), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_periods(self, method): + df = DataFrame( + { + "p1": [ + pd.Period("2013-01-01", "D"), + pd.Period("2013-01-02", "D"), + pd.Period("2013-01-01", "D"), + pd.Period("2013-01-02", "D"), + ], + "p2": [ + pd.Period("2013-01", "M"), + pd.Period("2013-01", "M"), + pd.Period("2013-02", "M"), + pd.Period("2013-02", "M"), + ], + "data1": np.arange(4, dtype="int64"), + "data2": np.arange(4, dtype="int64"), + } + ) + + exp_col1 = Index(["data1", "data1", "data2", "data2"]) + exp_col2 = pd.PeriodIndex(["2013-01", "2013-02"] * 2, name="p2", freq="M") + exp_col = MultiIndex.from_arrays([exp_col1, exp_col2]) + expected = DataFrame( + [[0, 2, 0, 2], [1, 3, 1, 3]], + index=pd.PeriodIndex(["2013-01-01", "2013-01-02"], name="p1", freq="D"), + columns=exp_col, + ) + if method: + pv = df.pivot(index="p1", columns="p2") + else: + pv = pd.pivot(df, index="p1", columns="p2") + tm.assert_frame_equal(pv, expected) + + expected = DataFrame( + [[0, 2], [1, 3]], + index=pd.PeriodIndex(["2013-01-01", "2013-01-02"], name="p1", freq="D"), + columns=pd.PeriodIndex(["2013-01", "2013-02"], name="p2", freq="M"), + ) + if method: + pv = df.pivot(index="p1", columns="p2", values="data1") + else: + pv = pd.pivot(df, index="p1", columns="p2", values="data1") + tm.assert_frame_equal(pv, expected) + + def test_pivot_periods_with_margins(self): + # GH 28323 + df = DataFrame( + { + "a": [1, 1, 2, 2], + "b": [ + pd.Period("2019Q1"), + pd.Period("2019Q2"), + pd.Period("2019Q1"), + pd.Period("2019Q2"), + ], + "x": 1.0, + } + ) + + expected = DataFrame( + data=1.0, + index=Index([1, 2, "All"], name="a"), + columns=Index([pd.Period("2019Q1"), pd.Period("2019Q2"), "All"], name="b"), + ) + + result = df.pivot_table(index="a", columns="b", values="x", margins=True) + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "values", + [ + ["baz", "zoo"], + np.array(["baz", "zoo"]), + Series(["baz", "zoo"]), + Index(["baz", "zoo"]), + ], + ) + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_with_list_like_values(self, values, method): + # issue #17160 + df = DataFrame( + { + "foo": ["one", "one", "one", "two", "two", "two"], + "bar": ["A", "B", "C", "A", "B", "C"], + "baz": [1, 2, 3, 4, 5, 6], + "zoo": ["x", "y", "z", "q", "w", "t"], + } + ) + + if method: + result = df.pivot(index="foo", columns="bar", values=values) + else: + result = pd.pivot(df, index="foo", columns="bar", values=values) + + data = [[1, 2, 3, "x", "y", "z"], [4, 5, 6, "q", "w", "t"]] + index = Index(data=["one", "two"], name="foo") + columns = MultiIndex( + levels=[["baz", "zoo"], ["A", "B", "C"]], + codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], + names=[None, "bar"], + ) + expected = DataFrame(data=data, index=index, columns=columns, dtype="object") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "values", + [ + ["bar", "baz"], + np.array(["bar", "baz"]), + Series(["bar", "baz"]), + Index(["bar", "baz"]), + ], + ) + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_with_list_like_values_nans(self, values, method): + # issue #17160 + df = DataFrame( + { + "foo": ["one", "one", "one", "two", "two", "two"], + "bar": ["A", "B", "C", "A", "B", "C"], + "baz": [1, 2, 3, 4, 5, 6], + "zoo": ["x", "y", "z", "q", "w", "t"], + } + ) + + if method: + result = df.pivot(index="zoo", columns="foo", values=values) + else: + result = pd.pivot(df, index="zoo", columns="foo", values=values) + + data = [ + [np.nan, "A", np.nan, 4], + [np.nan, "C", np.nan, 6], + [np.nan, "B", np.nan, 5], + ["A", np.nan, 1, np.nan], + ["B", np.nan, 2, np.nan], + ["C", np.nan, 3, np.nan], + ] + index = Index(data=["q", "t", "w", "x", "y", "z"], name="zoo") + columns = MultiIndex( + levels=[["bar", "baz"], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[None, "foo"], + ) + expected = DataFrame(data=data, index=index, columns=columns, dtype="object") + tm.assert_frame_equal(result, expected) + + def test_pivot_columns_none_raise_error(self): + # GH 30924 + df = DataFrame({"col1": ["a", "b", "c"], "col2": [1, 2, 3], "col3": [1, 2, 3]}) + msg = r"pivot\(\) missing 1 required argument: 'columns'" + with pytest.raises(TypeError, match=msg): + df.pivot(index="col1", values="col3") + + @pytest.mark.xfail( + reason="MultiIndexed unstack with tuple names fails with KeyError GH#19966" + ) + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_with_multiindex(self, method): + # issue #17160 + index = Index(data=[0, 1, 2, 3, 4, 5]) + data = [ + ["one", "A", 1, "x"], + ["one", "B", 2, "y"], + ["one", "C", 3, "z"], + ["two", "A", 4, "q"], + ["two", "B", 5, "w"], + ["two", "C", 6, "t"], + ] + columns = MultiIndex( + levels=[["bar", "baz"], ["first", "second"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + ) + df = DataFrame(data=data, index=index, columns=columns, dtype="object") + if method: + result = df.pivot( + index=("bar", "first"), + columns=("bar", "second"), + values=("baz", "first"), + ) + else: + result = pd.pivot( + df, + index=("bar", "first"), + columns=("bar", "second"), + values=("baz", "first"), + ) + + data = { + "A": Series([1, 4], index=["one", "two"]), + "B": Series([2, 5], index=["one", "two"]), + "C": Series([3, 6], index=["one", "two"]), + } + expected = DataFrame(data) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_with_tuple_of_values(self, method): + # issue #17160 + df = DataFrame( + { + "foo": ["one", "one", "one", "two", "two", "two"], + "bar": ["A", "B", "C", "A", "B", "C"], + "baz": [1, 2, 3, 4, 5, 6], + "zoo": ["x", "y", "z", "q", "w", "t"], + } + ) + with pytest.raises(KeyError, match=r"^\('bar', 'baz'\)$"): + # tuple is seen as a single column name + if method: + df.pivot(index="zoo", columns="foo", values=("bar", "baz")) + else: + pd.pivot(df, index="zoo", columns="foo", values=("bar", "baz")) + + def test_margins(self): + def _check_output( + result, values_col, index=["A", "B"], columns=["C"], margins_col="All" + ): + col_margins = result.loc[result.index[:-1], margins_col] + expected_col_margins = self.data.groupby(index)[values_col].mean() + tm.assert_series_equal(col_margins, expected_col_margins, check_names=False) + assert col_margins.name == margins_col + + result = result.sort_index() + index_margins = result.loc[(margins_col, "")].iloc[:-1] + + expected_ix_margins = self.data.groupby(columns)[values_col].mean() + tm.assert_series_equal( + index_margins, expected_ix_margins, check_names=False + ) + assert index_margins.name == (margins_col, "") + + grand_total_margins = result.loc[(margins_col, ""), margins_col] + expected_total_margins = self.data[values_col].mean() + assert grand_total_margins == expected_total_margins + + # column specified + result = self.data.pivot_table( + values="D", index=["A", "B"], columns="C", margins=True, aggfunc=np.mean + ) + _check_output(result, "D") + + # Set a different margins_name (not 'All') + result = self.data.pivot_table( + values="D", + index=["A", "B"], + columns="C", + margins=True, + aggfunc=np.mean, + margins_name="Totals", + ) + _check_output(result, "D", margins_col="Totals") + + # no column specified + table = self.data.pivot_table( + index=["A", "B"], columns="C", margins=True, aggfunc=np.mean + ) + for value_col in table.columns.levels[0]: + _check_output(table[value_col], value_col) + + # no col + + # to help with a buglet + self.data.columns = [k * 2 for k in self.data.columns] + table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc=np.mean) + for value_col in table.columns: + totals = table.loc[("All", ""), value_col] + assert totals == self.data[value_col].mean() + + table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean") + for item in ["DD", "EE", "FF"]: + totals = table.loc[("All", ""), item] + assert totals == self.data[item].mean() + + @pytest.mark.parametrize( + "columns, aggfunc, values, expected_columns", + [ + ( + "A", + np.mean, + [[5.5, 5.5, 2.2, 2.2], [8.0, 8.0, 4.4, 4.4]], + Index(["bar", "All", "foo", "All"], name="A"), + ), + ( + ["A", "B"], + "sum", + [[9, 13, 22, 5, 6, 11], [14, 18, 32, 11, 11, 22]], + MultiIndex.from_tuples( + [ + ("bar", "one"), + ("bar", "two"), + ("bar", "All"), + ("foo", "one"), + ("foo", "two"), + ("foo", "All"), + ], + names=["A", "B"], + ), + ), + ], + ) + def test_margin_with_only_columns_defined( + self, columns, aggfunc, values, expected_columns + ): + # GH 31016 + df = DataFrame( + { + "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"], + "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"], + "C": [ + "small", + "large", + "large", + "small", + "small", + "large", + "small", + "small", + "large", + ], + "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], + } + ) + + result = df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc) + expected = DataFrame(values, index=Index(["D", "E"]), columns=expected_columns) + + tm.assert_frame_equal(result, expected) + + def test_margins_dtype(self): + # GH 17013 + + df = self.data.copy() + df[["D", "E", "F"]] = np.arange(len(df) * 3).reshape(len(df), 3).astype("i8") + + mi_val = list(product(["bar", "foo"], ["one", "two"])) + [("All", "")] + mi = MultiIndex.from_tuples(mi_val, names=("A", "B")) + expected = DataFrame( + {"dull": [12, 21, 3, 9, 45], "shiny": [33, 0, 36, 51, 120]}, index=mi + ).rename_axis("C", axis=1) + expected["All"] = expected["dull"] + expected["shiny"] + + result = df.pivot_table( + values="D", + index=["A", "B"], + columns="C", + margins=True, + aggfunc=np.sum, + fill_value=0, + ) + + tm.assert_frame_equal(expected, result) + + def test_margins_dtype_len(self): + mi_val = list(product(["bar", "foo"], ["one", "two"])) + [("All", "")] + mi = MultiIndex.from_tuples(mi_val, names=("A", "B")) + expected = DataFrame( + {"dull": [1, 1, 2, 1, 5], "shiny": [2, 0, 2, 2, 6]}, index=mi + ).rename_axis("C", axis=1) + expected["All"] = expected["dull"] + expected["shiny"] + + result = self.data.pivot_table( + values="D", + index=["A", "B"], + columns="C", + margins=True, + aggfunc=len, + fill_value=0, + ) + + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)]) + def test_pivot_table_multiindex_only(self, cols): + # GH 17038 + df2 = DataFrame({cols[0]: [1, 2, 3], cols[1]: [1, 2, 3], "v": [4, 5, 6]}) + + result = df2.pivot_table(values="v", columns=cols) + expected = DataFrame( + [[4, 5, 6]], + columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols), + index=Index(["v"]), + ) + + tm.assert_frame_equal(result, expected) + + def test_pivot_table_retains_tz(self): + dti = date_range("2016-01-01", periods=3, tz="Europe/Amsterdam") + df = DataFrame({"A": np.random.randn(3), "B": np.random.randn(3), "C": dti}) + result = df.pivot_table(index=["B", "C"], dropna=False) + + # check tz retention + assert result.index.levels[1].equals(dti) + + def test_pivot_integer_columns(self): + # caused by upstream bug in unstack + + d = date.min + data = list( + product( + ["foo", "bar"], + ["A", "B", "C"], + ["x1", "x2"], + [d + timedelta(i) for i in range(20)], + [1.0], + ) + ) + df = DataFrame(data) + table = df.pivot_table(values=4, index=[0, 1, 3], columns=[2]) + + df2 = df.rename(columns=str) + table2 = df2.pivot_table(values="4", index=["0", "1", "3"], columns=["2"]) + + tm.assert_frame_equal(table, table2, check_names=False) + + def test_pivot_no_level_overlap(self): + # GH #1181 + + data = DataFrame( + { + "a": ["a", "a", "a", "a", "b", "b", "b", "b"] * 2, + "b": [0, 0, 0, 0, 1, 1, 1, 1] * 2, + "c": (["foo"] * 4 + ["bar"] * 4) * 2, + "value": np.random.randn(16), + } + ) + + table = data.pivot_table("value", index="a", columns=["b", "c"]) + + grouped = data.groupby(["a", "b", "c"])["value"].mean() + expected = grouped.unstack("b").unstack("c").dropna(axis=1, how="all") + tm.assert_frame_equal(table, expected) + + def test_pivot_columns_lexsorted(self): + + n = 10000 + + dtype = np.dtype( + [ + ("Index", object), + ("Symbol", object), + ("Year", int), + ("Month", int), + ("Day", int), + ("Quantity", int), + ("Price", float), + ] + ) + + products = np.array( + [ + ("SP500", "ADBE"), + ("SP500", "NVDA"), + ("SP500", "ORCL"), + ("NDQ100", "AAPL"), + ("NDQ100", "MSFT"), + ("NDQ100", "GOOG"), + ("FTSE", "DGE.L"), + ("FTSE", "TSCO.L"), + ("FTSE", "GSK.L"), + ], + dtype=[("Index", object), ("Symbol", object)], + ) + items = np.empty(n, dtype=dtype) + iproduct = np.random.randint(0, len(products), n) + items["Index"] = products["Index"][iproduct] + items["Symbol"] = products["Symbol"][iproduct] + dr = date_range(date(2000, 1, 1), date(2010, 12, 31)) + dates = dr[np.random.randint(0, len(dr), n)] + items["Year"] = dates.year + items["Month"] = dates.month + items["Day"] = dates.day + items["Price"] = np.random.lognormal(4.0, 2.0, n) + + df = DataFrame(items) + + pivoted = df.pivot_table( + "Price", + index=["Month", "Day"], + columns=["Index", "Symbol", "Year"], + aggfunc="mean", + ) + + assert pivoted.columns.is_monotonic + + def test_pivot_complex_aggfunc(self): + f = {"D": ["std"], "E": ["sum"]} + expected = self.data.groupby(["A", "B"]).agg(f).unstack("B") + result = self.data.pivot_table(index="A", columns="B", aggfunc=f) + + tm.assert_frame_equal(result, expected) + + def test_margins_no_values_no_cols(self): + # Regression test on pivot table: no values or cols passed. + result = self.data[["A", "B"]].pivot_table( + index=["A", "B"], aggfunc=len, margins=True + ) + result_list = result.tolist() + assert sum(result_list[:-1]) == result_list[-1] + + def test_margins_no_values_two_rows(self): + # Regression test on pivot table: no values passed but rows are a + # multi-index + result = self.data[["A", "B", "C"]].pivot_table( + index=["A", "B"], columns="C", aggfunc=len, margins=True + ) + assert result.All.tolist() == [3.0, 1.0, 4.0, 3.0, 11.0] + + def test_margins_no_values_one_row_one_col(self): + # Regression test on pivot table: no values passed but row and col + # defined + result = self.data[["A", "B"]].pivot_table( + index="A", columns="B", aggfunc=len, margins=True + ) + assert result.All.tolist() == [4.0, 7.0, 11.0] + + def test_margins_no_values_two_row_two_cols(self): + # Regression test on pivot table: no values passed but rows and cols + # are multi-indexed + self.data["D"] = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"] + result = self.data[["A", "B", "C", "D"]].pivot_table( + index=["A", "B"], columns=["C", "D"], aggfunc=len, margins=True + ) + assert result.All.tolist() == [3.0, 1.0, 4.0, 3.0, 11.0] + + @pytest.mark.parametrize("margin_name", ["foo", "one", 666, None, ["a", "b"]]) + def test_pivot_table_with_margins_set_margin_name(self, margin_name): + # see gh-3335 + msg = ( + f'Conflicting name "{margin_name}" in margins|' + "margins_name argument must be a string" + ) + with pytest.raises(ValueError, match=msg): + # multi-index index + pivot_table( + self.data, + values="D", + index=["A", "B"], + columns=["C"], + margins=True, + margins_name=margin_name, + ) + with pytest.raises(ValueError, match=msg): + # multi-index column + pivot_table( + self.data, + values="D", + index=["C"], + columns=["A", "B"], + margins=True, + margins_name=margin_name, + ) + with pytest.raises(ValueError, match=msg): + # non-multi-index index/column + pivot_table( + self.data, + values="D", + index=["A"], + columns=["B"], + margins=True, + margins_name=margin_name, + ) + + def test_pivot_timegrouper(self, using_array_manager): + df = DataFrame( + { + "Branch": "A A A A A A A B".split(), + "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(), + "Quantity": [1, 3, 5, 1, 8, 1, 9, 3], + "Date": [ + datetime(2013, 1, 1), + datetime(2013, 1, 1), + datetime(2013, 10, 1), + datetime(2013, 10, 2), + datetime(2013, 10, 1), + datetime(2013, 10, 2), + datetime(2013, 12, 2), + datetime(2013, 12, 2), + ], + } + ).set_index("Date") + + expected = DataFrame( + np.array([10, 18, 3], dtype="int64").reshape(1, 3), + index=pd.DatetimeIndex([datetime(2013, 12, 31)], freq="A"), + columns="Carl Joe Mark".split(), + ) + expected.index.name = "Date" + expected.columns.name = "Buyer" + + result = pivot_table( + df, + index=Grouper(freq="A"), + columns="Buyer", + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index="Buyer", + columns=Grouper(freq="A"), + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected.T) + + expected = DataFrame( + np.array([1, np.nan, 3, 9, 18, np.nan]).reshape(2, 3), + index=pd.DatetimeIndex( + [datetime(2013, 1, 1), datetime(2013, 7, 1)], freq="6MS" + ), + columns="Carl Joe Mark".split(), + ) + expected.index.name = "Date" + expected.columns.name = "Buyer" + if using_array_manager: + # INFO(ArrayManager) column without NaNs can preserve int dtype + expected["Carl"] = expected["Carl"].astype("int64") + + result = pivot_table( + df, + index=Grouper(freq="6MS"), + columns="Buyer", + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index="Buyer", + columns=Grouper(freq="6MS"), + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected.T) + + # passing the name + df = df.reset_index() + result = pivot_table( + df, + index=Grouper(freq="6MS", key="Date"), + columns="Buyer", + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index="Buyer", + columns=Grouper(freq="6MS", key="Date"), + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected.T) + + msg = "'The grouper name foo is not found'" + with pytest.raises(KeyError, match=msg): + pivot_table( + df, + index=Grouper(freq="6MS", key="foo"), + columns="Buyer", + values="Quantity", + aggfunc=np.sum, + ) + with pytest.raises(KeyError, match=msg): + pivot_table( + df, + index="Buyer", + columns=Grouper(freq="6MS", key="foo"), + values="Quantity", + aggfunc=np.sum, + ) + + # passing the level + df = df.set_index("Date") + result = pivot_table( + df, + index=Grouper(freq="6MS", level="Date"), + columns="Buyer", + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index="Buyer", + columns=Grouper(freq="6MS", level="Date"), + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected.T) + + msg = "The level foo is not valid" + with pytest.raises(ValueError, match=msg): + pivot_table( + df, + index=Grouper(freq="6MS", level="foo"), + columns="Buyer", + values="Quantity", + aggfunc=np.sum, + ) + with pytest.raises(ValueError, match=msg): + pivot_table( + df, + index="Buyer", + columns=Grouper(freq="6MS", level="foo"), + values="Quantity", + aggfunc=np.sum, + ) + + # double grouper + df = DataFrame( + { + "Branch": "A A A A A A A B".split(), + "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(), + "Quantity": [1, 3, 5, 1, 8, 1, 9, 3], + "Date": [ + datetime(2013, 11, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 11, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 2, 12, 0), + datetime(2013, 12, 5, 14, 0), + ], + "PayDay": [ + datetime(2013, 10, 4, 0, 0), + datetime(2013, 10, 15, 13, 5), + datetime(2013, 9, 5, 20, 0), + datetime(2013, 11, 2, 10, 0), + datetime(2013, 10, 7, 20, 0), + datetime(2013, 9, 5, 10, 0), + datetime(2013, 12, 30, 12, 0), + datetime(2013, 11, 20, 14, 0), + ], + } + ) + + result = pivot_table( + df, + index=Grouper(freq="M", key="Date"), + columns=Grouper(freq="M", key="PayDay"), + values="Quantity", + aggfunc=np.sum, + ) + expected = DataFrame( + np.array( + [ + np.nan, + 3, + np.nan, + np.nan, + 6, + np.nan, + 1, + 9, + np.nan, + 9, + np.nan, + np.nan, + np.nan, + np.nan, + 3, + np.nan, + ] + ).reshape(4, 4), + index=pd.DatetimeIndex( + [ + datetime(2013, 9, 30), + datetime(2013, 10, 31), + datetime(2013, 11, 30), + datetime(2013, 12, 31), + ], + freq="M", + ), + columns=pd.DatetimeIndex( + [ + datetime(2013, 9, 30), + datetime(2013, 10, 31), + datetime(2013, 11, 30), + datetime(2013, 12, 31), + ], + freq="M", + ), + ) + expected.index.name = "Date" + expected.columns.name = "PayDay" + + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index=Grouper(freq="M", key="PayDay"), + columns=Grouper(freq="M", key="Date"), + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected.T) + + tuples = [ + (datetime(2013, 9, 30), datetime(2013, 10, 31)), + (datetime(2013, 10, 31), datetime(2013, 9, 30)), + (datetime(2013, 10, 31), datetime(2013, 11, 30)), + (datetime(2013, 10, 31), datetime(2013, 12, 31)), + (datetime(2013, 11, 30), datetime(2013, 10, 31)), + (datetime(2013, 12, 31), datetime(2013, 11, 30)), + ] + idx = MultiIndex.from_tuples(tuples, names=["Date", "PayDay"]) + expected = DataFrame( + np.array( + [3, np.nan, 6, np.nan, 1, np.nan, 9, np.nan, 9, np.nan, np.nan, 3] + ).reshape(6, 2), + index=idx, + columns=["A", "B"], + ) + expected.columns.name = "Branch" + + result = pivot_table( + df, + index=[Grouper(freq="M", key="Date"), Grouper(freq="M", key="PayDay")], + columns=["Branch"], + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index=["Branch"], + columns=[Grouper(freq="M", key="Date"), Grouper(freq="M", key="PayDay")], + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected.T) + + def test_pivot_datetime_tz(self): + dates1 = [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ] + dates2 = [ + "2013-01-01 15:00:00", + "2013-01-01 15:00:00", + "2013-01-01 15:00:00", + "2013-02-01 15:00:00", + "2013-02-01 15:00:00", + "2013-02-01 15:00:00", + ] + df = DataFrame( + { + "label": ["a", "a", "a", "b", "b", "b"], + "dt1": dates1, + "dt2": dates2, + "value1": np.arange(6, dtype="int64"), + "value2": [1, 2] * 3, + } + ) + df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d, tz="US/Pacific")) + df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d, tz="Asia/Tokyo")) + + exp_idx = pd.DatetimeIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + tz="US/Pacific", + name="dt1", + ) + exp_col1 = Index(["value1", "value1"]) + exp_col2 = Index(["a", "b"], name="label") + exp_col = MultiIndex.from_arrays([exp_col1, exp_col2]) + expected = DataFrame([[0, 3], [1, 4], [2, 5]], index=exp_idx, columns=exp_col) + result = pivot_table(df, index=["dt1"], columns=["label"], values=["value1"]) + tm.assert_frame_equal(result, expected) + + exp_col1 = Index(["sum", "sum", "sum", "sum", "mean", "mean", "mean", "mean"]) + exp_col2 = Index(["value1", "value1", "value2", "value2"] * 2) + exp_col3 = pd.DatetimeIndex( + ["2013-01-01 15:00:00", "2013-02-01 15:00:00"] * 4, + tz="Asia/Tokyo", + name="dt2", + ) + exp_col = MultiIndex.from_arrays([exp_col1, exp_col2, exp_col3]) + expected = DataFrame( + np.array( + [ + [0, 3, 1, 2, 0, 3, 1, 2], + [1, 4, 2, 1, 1, 4, 2, 1], + [2, 5, 1, 2, 2, 5, 1, 2], + ], + dtype="int64", + ), + index=exp_idx, + columns=exp_col, + ) + + result = pivot_table( + df, + index=["dt1"], + columns=["dt2"], + values=["value1", "value2"], + aggfunc=[np.sum, np.mean], + ) + tm.assert_frame_equal(result, expected) + + def test_pivot_dtaccessor(self): + # GH 8103 + dates1 = [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ] + dates2 = [ + "2013-01-01 15:00:00", + "2013-01-01 15:00:00", + "2013-01-01 15:00:00", + "2013-02-01 15:00:00", + "2013-02-01 15:00:00", + "2013-02-01 15:00:00", + ] + df = DataFrame( + { + "label": ["a", "a", "a", "b", "b", "b"], + "dt1": dates1, + "dt2": dates2, + "value1": np.arange(6, dtype="int64"), + "value2": [1, 2] * 3, + } + ) + df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d)) + df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d)) + + result = pivot_table( + df, index="label", columns=df["dt1"].dt.hour, values="value1" + ) + + exp_idx = Index(["a", "b"], name="label") + expected = DataFrame( + {7: [0, 3], 8: [1, 4], 9: [2, 5]}, + index=exp_idx, + columns=Index([7, 8, 9], name="dt1"), + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, index=df["dt2"].dt.month, columns=df["dt1"].dt.hour, values="value1" + ) + + expected = DataFrame( + {7: [0, 3], 8: [1, 4], 9: [2, 5]}, + index=Index([1, 2], name="dt2"), + columns=Index([7, 8, 9], name="dt1"), + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index=df["dt2"].dt.year.values, + columns=[df["dt1"].dt.hour, df["dt2"].dt.month], + values="value1", + ) + + exp_col = MultiIndex.from_arrays( + [[7, 7, 8, 8, 9, 9], [1, 2] * 3], names=["dt1", "dt2"] + ) + expected = DataFrame( + np.array([[0, 3, 1, 4, 2, 5]], dtype="int64"), index=[2013], columns=exp_col + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index=np.array(["X", "X", "X", "X", "Y", "Y"]), + columns=[df["dt1"].dt.hour, df["dt2"].dt.month], + values="value1", + ) + expected = DataFrame( + np.array( + [[0, 3, 1, np.nan, 2, np.nan], [np.nan, np.nan, np.nan, 4, np.nan, 5]] + ), + index=["X", "Y"], + columns=exp_col, + ) + tm.assert_frame_equal(result, expected) + + def test_daily(self): + rng = date_range("1/1/2000", "12/31/2004", freq="D") + ts = Series(np.random.randn(len(rng)), index=rng) + + annual = pivot_table( + DataFrame(ts), index=ts.index.year, columns=ts.index.dayofyear + ) + annual.columns = annual.columns.droplevel(0) + + doy = np.asarray(ts.index.dayofyear) + + for i in range(1, 367): + subset = ts[doy == i] + subset.index = subset.index.year + + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + assert result.name == i + + def test_monthly(self): + rng = date_range("1/1/2000", "12/31/2004", freq="M") + ts = Series(np.random.randn(len(rng)), index=rng) + + annual = pivot_table(DataFrame(ts), index=ts.index.year, columns=ts.index.month) + annual.columns = annual.columns.droplevel(0) + + month = ts.index.month + for i in range(1, 13): + subset = ts[month == i] + subset.index = subset.index.year + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + assert result.name == i + + def test_pivot_table_with_iterator_values(self): + # GH 12017 + aggs = {"D": "sum", "E": "mean"} + + pivot_values_list = pivot_table( + self.data, index=["A"], values=list(aggs.keys()), aggfunc=aggs + ) + + pivot_values_keys = pivot_table( + self.data, index=["A"], values=aggs.keys(), aggfunc=aggs + ) + tm.assert_frame_equal(pivot_values_keys, pivot_values_list) + + agg_values_gen = (value for value in aggs.keys()) + pivot_values_gen = pivot_table( + self.data, index=["A"], values=agg_values_gen, aggfunc=aggs + ) + tm.assert_frame_equal(pivot_values_gen, pivot_values_list) + + def test_pivot_table_margins_name_with_aggfunc_list(self): + # GH 13354 + margins_name = "Weekly" + costs = DataFrame( + { + "item": ["bacon", "cheese", "bacon", "cheese"], + "cost": [2.5, 4.5, 3.2, 3.3], + "day": ["M", "M", "T", "T"], + } + ) + table = costs.pivot_table( + index="item", + columns="day", + margins=True, + margins_name=margins_name, + aggfunc=[np.mean, max], + ) + ix = Index(["bacon", "cheese", margins_name], dtype="object", name="item") + tups = [ + ("mean", "cost", "M"), + ("mean", "cost", "T"), + ("mean", "cost", margins_name), + ("max", "cost", "M"), + ("max", "cost", "T"), + ("max", "cost", margins_name), + ] + cols = MultiIndex.from_tuples(tups, names=[None, None, "day"]) + expected = DataFrame(table.values, index=ix, columns=cols) + tm.assert_frame_equal(table, expected) + + def test_categorical_margins(self, observed, request): + if observed: + request.node.add_marker( + pytest.mark.xfail( + reason="GH#17035 (np.mean of ints is casted back to ints)" + ) + ) + # GH 10989 + df = DataFrame( + {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2} + ) + + expected = DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) + expected.index = Index([0, 1, "All"], name="y") + expected.columns = Index([0, 1, "All"], name="z") + + table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) + tm.assert_frame_equal(table, expected) + + def test_categorical_margins_category(self, observed, request): + if observed: + request.node.add_marker( + pytest.mark.xfail( + reason="GH#17035 (np.mean of ints is casted back to ints)" + ) + ) + df = DataFrame( + {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2} + ) + + expected = DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) + expected.index = Index([0, 1, "All"], name="y") + expected.columns = Index([0, 1, "All"], name="z") + + df.y = df.y.astype("category") + df.z = df.z.astype("category") + table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) + tm.assert_frame_equal(table, expected) + + def test_margins_casted_to_float(self, observed): + # GH 24893 + df = DataFrame( + { + "A": [2, 4, 6, 8], + "B": [1, 4, 5, 8], + "C": [1, 3, 4, 6], + "D": ["X", "X", "Y", "Y"], + } + ) + + result = pivot_table(df, index="D", margins=True) + expected = DataFrame( + {"A": [3, 7, 5], "B": [2.5, 6.5, 4.5], "C": [2, 5, 3.5]}, + index=Index(["X", "Y", "All"], name="D"), + ) + tm.assert_frame_equal(result, expected) + + def test_pivot_with_categorical(self, observed, ordered): + # gh-21370 + idx = [np.nan, "low", "high", "low", np.nan] + col = [np.nan, "A", "B", np.nan, "A"] + df = DataFrame( + { + "In": Categorical(idx, categories=["low", "high"], ordered=ordered), + "Col": Categorical(col, categories=["A", "B"], ordered=ordered), + "Val": range(1, 6), + } + ) + # case with index/columns/value + result = df.pivot_table( + index="In", columns="Col", values="Val", observed=observed + ) + + expected_cols = pd.CategoricalIndex(["A", "B"], ordered=ordered, name="Col") + + expected = DataFrame(data=[[2.0, np.nan], [np.nan, 3.0]], columns=expected_cols) + expected.index = Index( + Categorical(["low", "high"], categories=["low", "high"], ordered=ordered), + name="In", + ) + + tm.assert_frame_equal(result, expected) + + # case with columns/value + result = df.pivot_table(columns="Col", values="Val", observed=observed) + + expected = DataFrame( + data=[[3.5, 3.0]], columns=expected_cols, index=Index(["Val"]) + ) + + tm.assert_frame_equal(result, expected) + + def test_categorical_aggfunc(self, observed): + # GH 9534 + df = DataFrame( + {"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]} + ) + df["C1"] = df["C1"].astype("category") + result = df.pivot_table( + "V", index="C1", columns="C2", dropna=observed, aggfunc="count" + ) + + expected_index = pd.CategoricalIndex( + ["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1" + ) + expected_columns = Index(["a", "b"], name="C2") + expected_data = np.array([[1, 0], [1, 0], [0, 2]], dtype=np.int64) + expected = DataFrame( + expected_data, index=expected_index, columns=expected_columns + ) + tm.assert_frame_equal(result, expected) + + def test_categorical_pivot_index_ordering(self, observed): + # GH 8731 + df = DataFrame( + { + "Sales": [100, 120, 220], + "Month": ["January", "January", "January"], + "Year": [2013, 2014, 2013], + } + ) + months = [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", + ] + df["Month"] = df["Month"].astype("category").cat.set_categories(months) + result = df.pivot_table( + values="Sales", + index="Month", + columns="Year", + observed=observed, + aggfunc="sum", + ) + expected_columns = Index([2013, 2014], name="Year", dtype="int64") + expected_index = pd.CategoricalIndex( + months, categories=months, ordered=False, name="Month" + ) + expected_data = [[320, 120]] + [[0, 0]] * 11 + expected = DataFrame( + expected_data, index=expected_index, columns=expected_columns + ) + if observed: + expected = expected.loc[["January"]] + + tm.assert_frame_equal(result, expected) + + def test_pivot_table_not_series(self): + # GH 4386 + # pivot_table always returns a DataFrame + # when values is not list like and columns is None + # and aggfunc is not instance of list + df = DataFrame({"col1": [3, 4, 5], "col2": ["C", "D", "E"], "col3": [1, 3, 9]}) + + result = df.pivot_table("col1", index=["col3", "col2"], aggfunc=np.sum) + m = MultiIndex.from_arrays([[1, 3, 9], ["C", "D", "E"]], names=["col3", "col2"]) + expected = DataFrame([3, 4, 5], index=m, columns=["col1"]) + + tm.assert_frame_equal(result, expected) + + result = df.pivot_table("col1", index="col3", columns="col2", aggfunc=np.sum) + expected = DataFrame( + [[3, np.NaN, np.NaN], [np.NaN, 4, np.NaN], [np.NaN, np.NaN, 5]], + index=Index([1, 3, 9], name="col3"), + columns=Index(["C", "D", "E"], name="col2"), + ) + + tm.assert_frame_equal(result, expected) + + result = df.pivot_table("col1", index="col3", aggfunc=[np.sum]) + m = MultiIndex.from_arrays([["sum"], ["col1"]]) + expected = DataFrame([3, 4, 5], index=Index([1, 3, 9], name="col3"), columns=m) + + tm.assert_frame_equal(result, expected) + + def test_pivot_margins_name_unicode(self): + # issue #13292 + greek = "\u0394\u03bf\u03ba\u03b9\u03bc\u03ae" + frame = DataFrame({"foo": [1, 2, 3]}) + table = pivot_table( + frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek + ) + index = Index([1, 2, 3, greek], dtype="object", name="foo") + expected = DataFrame(index=index) + tm.assert_frame_equal(table, expected) + + def test_pivot_string_as_func(self): + # GH #18713 + # for correctness purposes + data = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": range(11), + } + ) + + result = pivot_table(data, index="A", columns="B", aggfunc="sum") + mi = MultiIndex( + levels=[["C"], ["one", "two"]], codes=[[0, 0], [0, 1]], names=[None, "B"] + ) + expected = DataFrame( + {("C", "one"): {"bar": 15, "foo": 13}, ("C", "two"): {"bar": 7, "foo": 20}}, + columns=mi, + ).rename_axis("A") + tm.assert_frame_equal(result, expected) + + result = pivot_table(data, index="A", columns="B", aggfunc=["sum", "mean"]) + mi = MultiIndex( + levels=[["sum", "mean"], ["C"], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 0, 0, 0], [0, 1, 0, 1]], + names=[None, None, "B"], + ) + expected = DataFrame( + { + ("mean", "C", "one"): {"bar": 5.0, "foo": 3.25}, + ("mean", "C", "two"): {"bar": 7.0, "foo": 6.666666666666667}, + ("sum", "C", "one"): {"bar": 15, "foo": 13}, + ("sum", "C", "two"): {"bar": 7, "foo": 20}, + }, + columns=mi, + ).rename_axis("A") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "f, f_numpy", + [ + ("sum", np.sum), + ("mean", np.mean), + ("std", np.std), + (["sum", "mean"], [np.sum, np.mean]), + (["sum", "std"], [np.sum, np.std]), + (["std", "mean"], [np.std, np.mean]), + ], + ) + def test_pivot_string_func_vs_func(self, f, f_numpy): + # GH #18713 + # for consistency purposes + result = pivot_table(self.data, index="A", columns="B", aggfunc=f) + expected = pivot_table(self.data, index="A", columns="B", aggfunc=f_numpy) + tm.assert_frame_equal(result, expected) + + @pytest.mark.slow + def test_pivot_number_of_levels_larger_than_int32(self, monkeypatch): + # GH 20601 + # GH 26314: Change ValueError to PerformanceWarning + class MockUnstacker(reshape_lib._Unstacker): + def __init__(self, *args, **kwargs): + # __init__ will raise the warning + super().__init__(*args, **kwargs) + raise Exception("Don't compute final result.") + + with monkeypatch.context() as m: + m.setattr(reshape_lib, "_Unstacker", MockUnstacker) + df = DataFrame( + {"ind1": np.arange(2 ** 16), "ind2": np.arange(2 ** 16), "count": 0} + ) + + msg = "The following operation may generate" + with tm.assert_produces_warning(PerformanceWarning, match=msg): + with pytest.raises(Exception, match="Don't compute final result."): + df.pivot_table( + index="ind1", columns="ind2", values="count", aggfunc="count" + ) + + def test_pivot_table_aggfunc_dropna(self, dropna): + # GH 22159 + df = DataFrame( + { + "fruit": ["apple", "peach", "apple"], + "size": [1, 1, 2], + "taste": [7, 6, 6], + } + ) + + def ret_one(x): + return 1 + + def ret_sum(x): + return sum(x) + + def ret_none(x): + return np.nan + + result = pivot_table( + df, columns="fruit", aggfunc=[ret_sum, ret_none, ret_one], dropna=dropna + ) + + data = [[3, 1, np.nan, np.nan, 1, 1], [13, 6, np.nan, np.nan, 1, 1]] + col = MultiIndex.from_product( + [["ret_sum", "ret_none", "ret_one"], ["apple", "peach"]], + names=[None, "fruit"], + ) + expected = DataFrame(data, index=["size", "taste"], columns=col) + + if dropna: + expected = expected.dropna(axis="columns") + + tm.assert_frame_equal(result, expected) + + def test_pivot_table_aggfunc_scalar_dropna(self, dropna): + # GH 22159 + df = DataFrame( + {"A": ["one", "two", "one"], "x": [3, np.nan, 2], "y": [1, np.nan, np.nan]} + ) + + result = pivot_table(df, columns="A", aggfunc=np.mean, dropna=dropna) + + data = [[2.5, np.nan], [1, np.nan]] + col = Index(["one", "two"], name="A") + expected = DataFrame(data, index=["x", "y"], columns=col) + + if dropna: + expected = expected.dropna(axis="columns") + + tm.assert_frame_equal(result, expected) + + def test_pivot_table_empty_aggfunc(self): + # GH 9186 & GH 13483 + df = DataFrame( + { + "A": [2, 2, 3, 3, 2], + "id": [5, 6, 7, 8, 9], + "C": ["p", "q", "q", "p", "q"], + "D": [None, None, None, None, None], + } + ) + result = df.pivot_table(index="A", columns="D", values="id", aggfunc=np.size) + expected = DataFrame(index=Index([], dtype="int64", name="A")) + expected.columns.name = "D" + tm.assert_frame_equal(result, expected) + + def test_pivot_table_no_column_raises(self): + # GH 10326 + def agg(arr): + return np.mean(arr) + + foo = DataFrame({"X": [0, 0, 1, 1], "Y": [0, 1, 0, 1], "Z": [10, 20, 30, 40]}) + with pytest.raises(KeyError, match="notpresent"): + foo.pivot_table("notpresent", "X", "Y", aggfunc=agg) + + def test_pivot_table_multiindex_columns_doctest_case(self): + # The relevant characteristic is that the call + # to maybe_downcast_to_dtype(agged[v], data[v].dtype) in + # __internal_pivot_table has `agged[v]` a DataFrame instead of Series, + # In this case this is because agged.columns is a MultiIndex and 'v' + # is only indexing on its first level. + df = DataFrame( + { + "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"], + "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"], + "C": [ + "small", + "large", + "large", + "small", + "small", + "large", + "small", + "small", + "large", + ], + "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], + } + ) + + table = pivot_table( + df, + values=["D", "E"], + index=["A", "C"], + aggfunc={"D": np.mean, "E": [min, max, np.mean]}, + ) + cols = MultiIndex.from_tuples( + [("D", "mean"), ("E", "max"), ("E", "mean"), ("E", "min")] + ) + index = MultiIndex.from_tuples( + [("bar", "large"), ("bar", "small"), ("foo", "large"), ("foo", "small")], + names=["A", "C"], + ) + vals = np.array( + [ + [5.5, 9.0, 7.5, 6.0], + [5.5, 9.0, 8.5, 8.0], + [2.0, 5.0, 4.5, 4.0], + [2.33333333, 6.0, 4.33333333, 2.0], + ] + ) + expected = DataFrame(vals, columns=cols, index=index) + expected[("E", "min")] = expected[("E", "min")].astype(np.int64) + expected[("E", "max")] = expected[("E", "max")].astype(np.int64) + tm.assert_frame_equal(table, expected) + + def test_pivot_table_sort_false(self): + # GH#39143 + df = DataFrame( + { + "a": ["d1", "d4", "d3"], + "col": ["a", "b", "c"], + "num": [23, 21, 34], + "year": ["2018", "2018", "2019"], + } + ) + result = df.pivot_table( + index=["a", "col"], columns="year", values="num", aggfunc="sum", sort=False + ) + expected = DataFrame( + [[23, np.nan], [21, np.nan], [np.nan, 34]], + columns=Index(["2018", "2019"], name="year"), + index=MultiIndex.from_arrays( + [["d1", "d4", "d3"], ["a", "b", "c"]], names=["a", "col"] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_pivot_table_with_margins_and_numeric_columns(self): + # GH 26568 + df = DataFrame([["a", "x", 1], ["a", "y", 2], ["b", "y", 3], ["b", "z", 4]]) + df.columns = [10, 20, 30] + + result = df.pivot_table( + index=10, columns=20, values=30, aggfunc="sum", fill_value=0, margins=True + ) + + expected = DataFrame([[1, 2, 0, 3], [0, 3, 4, 7], [1, 5, 4, 10]]) + expected.columns = ["x", "y", "z", "All"] + expected.index = ["a", "b", "All"] + expected.columns.name = 20 + expected.index.name = 10 + + tm.assert_frame_equal(result, expected) + + +class TestPivot: + def test_pivot(self): + data = { + "index": ["A", "B", "C", "C", "B", "A"], + "columns": ["One", "One", "One", "Two", "Two", "Two"], + "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0], + } + + frame = DataFrame(data) + pivoted = frame.pivot(index="index", columns="columns", values="values") + + expected = DataFrame( + { + "One": {"A": 1.0, "B": 2.0, "C": 3.0}, + "Two": {"A": 1.0, "B": 2.0, "C": 3.0}, + } + ) + + expected.index.name, expected.columns.name = "index", "columns" + tm.assert_frame_equal(pivoted, expected) + + # name tracking + assert pivoted.index.name == "index" + assert pivoted.columns.name == "columns" + + # don't specify values + pivoted = frame.pivot(index="index", columns="columns") + assert pivoted.index.name == "index" + assert pivoted.columns.names == (None, "columns") + + def test_pivot_duplicates(self): + data = DataFrame( + { + "a": ["bar", "bar", "foo", "foo", "foo"], + "b": ["one", "two", "one", "one", "two"], + "c": [1.0, 2.0, 3.0, 3.0, 4.0], + } + ) + with pytest.raises(ValueError, match="duplicate entries"): + data.pivot("a", "b", "c") + + def test_pivot_empty(self): + df = DataFrame(columns=["a", "b", "c"]) + result = df.pivot("a", "b", "c") + expected = DataFrame() + tm.assert_frame_equal(result, expected, check_names=False) + + def test_pivot_integer_bug(self): + df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")]) + + result = df.pivot(index=1, columns=0, values=2) + repr(result) + tm.assert_index_equal(result.columns, Index(["A", "B"], name=0)) + + def test_pivot_index_none(self): + # GH#3962 + data = { + "index": ["A", "B", "C", "C", "B", "A"], + "columns": ["One", "One", "One", "Two", "Two", "Two"], + "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0], + } + + frame = DataFrame(data).set_index("index") + result = frame.pivot(columns="columns", values="values") + expected = DataFrame( + { + "One": {"A": 1.0, "B": 2.0, "C": 3.0}, + "Two": {"A": 1.0, "B": 2.0, "C": 3.0}, + } + ) + + expected.index.name, expected.columns.name = "index", "columns" + tm.assert_frame_equal(result, expected) + + # omit values + result = frame.pivot(columns="columns") + + expected.columns = MultiIndex.from_tuples( + [("values", "One"), ("values", "Two")], names=[None, "columns"] + ) + expected.index.name = "index" + tm.assert_frame_equal(result, expected, check_names=False) + assert result.index.name == "index" + assert result.columns.names == (None, "columns") + expected.columns = expected.columns.droplevel(0) + result = frame.pivot(columns="columns", values="values") + + expected.columns.name = "columns" + tm.assert_frame_equal(result, expected) + + def test_pivot_index_list_values_none_immutable_args(self): + # GH37635 + df = DataFrame( + { + "lev1": [1, 1, 1, 2, 2, 2], + "lev2": [1, 1, 2, 1, 1, 2], + "lev3": [1, 2, 1, 2, 1, 2], + "lev4": [1, 2, 3, 4, 5, 6], + "values": [0, 1, 2, 3, 4, 5], + } + ) + index = ["lev1", "lev2"] + columns = ["lev3"] + result = df.pivot(index=index, columns=columns, values=None) + + expected = DataFrame( + np.array( + [ + [1.0, 2.0, 0.0, 1.0], + [3.0, np.nan, 2.0, np.nan], + [5.0, 4.0, 4.0, 3.0], + [np.nan, 6.0, np.nan, 5.0], + ] + ), + index=MultiIndex.from_arrays( + [(1, 1, 2, 2), (1, 2, 1, 2)], names=["lev1", "lev2"] + ), + columns=MultiIndex.from_arrays( + [("lev4", "lev4", "values", "values"), (1, 2, 1, 2)], + names=[None, "lev3"], + ), + ) + + tm.assert_frame_equal(result, expected) + + assert index == ["lev1", "lev2"] + assert columns == ["lev3"] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_pivot_multilevel.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_pivot_multilevel.py new file mode 100644 index 0000000..308f732 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_pivot_multilevel.py @@ -0,0 +1,252 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "input_index, input_columns, input_values, " + "expected_values, expected_columns, expected_index", + [ + ( + ["lev4"], + "lev3", + "values", + [ + [0.0, np.nan], + [np.nan, 1.0], + [2.0, np.nan], + [np.nan, 3.0], + [4.0, np.nan], + [np.nan, 5.0], + [6.0, np.nan], + [np.nan, 7.0], + ], + Index([1, 2], name="lev3"), + Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), + ), + ( + ["lev4"], + "lev3", + None, + [ + [1.0, np.nan, 1.0, np.nan, 0.0, np.nan], + [np.nan, 1.0, np.nan, 1.0, np.nan, 1.0], + [1.0, np.nan, 2.0, np.nan, 2.0, np.nan], + [np.nan, 1.0, np.nan, 2.0, np.nan, 3.0], + [2.0, np.nan, 1.0, np.nan, 4.0, np.nan], + [np.nan, 2.0, np.nan, 1.0, np.nan, 5.0], + [2.0, np.nan, 2.0, np.nan, 6.0, np.nan], + [np.nan, 2.0, np.nan, 2.0, np.nan, 7.0], + ], + MultiIndex.from_tuples( + [ + ("lev1", 1), + ("lev1", 2), + ("lev2", 1), + ("lev2", 2), + ("values", 1), + ("values", 2), + ], + names=[None, "lev3"], + ), + Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), + ), + ( + ["lev1", "lev2"], + "lev3", + "values", + [[0, 1], [2, 3], [4, 5], [6, 7]], + Index([1, 2], name="lev3"), + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] + ), + ), + ( + ["lev1", "lev2"], + "lev3", + None, + [[1, 2, 0, 1], [3, 4, 2, 3], [5, 6, 4, 5], [7, 8, 6, 7]], + MultiIndex.from_tuples( + [("lev4", 1), ("lev4", 2), ("values", 1), ("values", 2)], + names=[None, "lev3"], + ), + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] + ), + ), + ], +) +def test_pivot_list_like_index( + input_index, + input_columns, + input_values, + expected_values, + expected_columns, + expected_index, +): + # GH 21425, test when index is given a list + df = pd.DataFrame( + { + "lev1": [1, 1, 1, 1, 2, 2, 2, 2], + "lev2": [1, 1, 2, 2, 1, 1, 2, 2], + "lev3": [1, 2, 1, 2, 1, 2, 1, 2], + "lev4": [1, 2, 3, 4, 5, 6, 7, 8], + "values": [0, 1, 2, 3, 4, 5, 6, 7], + } + ) + + result = df.pivot(index=input_index, columns=input_columns, values=input_values) + expected = pd.DataFrame( + expected_values, columns=expected_columns, index=expected_index + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "input_index, input_columns, input_values, " + "expected_values, expected_columns, expected_index", + [ + ( + "lev4", + ["lev3"], + "values", + [ + [0.0, np.nan], + [np.nan, 1.0], + [2.0, np.nan], + [np.nan, 3.0], + [4.0, np.nan], + [np.nan, 5.0], + [6.0, np.nan], + [np.nan, 7.0], + ], + Index([1, 2], name="lev3"), + Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), + ), + ( + ["lev1", "lev2"], + ["lev3"], + "values", + [[0, 1], [2, 3], [4, 5], [6, 7]], + Index([1, 2], name="lev3"), + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] + ), + ), + ( + ["lev1"], + ["lev2", "lev3"], + "values", + [[0, 1, 2, 3], [4, 5, 6, 7]], + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev2", "lev3"] + ), + Index([1, 2], name="lev1"), + ), + ( + ["lev1", "lev2"], + ["lev3", "lev4"], + "values", + [ + [0.0, 1.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, 2.0, 3.0, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, 4.0, 5.0, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 6.0, 7.0], + ], + MultiIndex.from_tuples( + [(1, 1), (2, 2), (1, 3), (2, 4), (1, 5), (2, 6), (1, 7), (2, 8)], + names=["lev3", "lev4"], + ), + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] + ), + ), + ], +) +def test_pivot_list_like_columns( + input_index, + input_columns, + input_values, + expected_values, + expected_columns, + expected_index, +): + # GH 21425, test when columns is given a list + df = pd.DataFrame( + { + "lev1": [1, 1, 1, 1, 2, 2, 2, 2], + "lev2": [1, 1, 2, 2, 1, 1, 2, 2], + "lev3": [1, 2, 1, 2, 1, 2, 1, 2], + "lev4": [1, 2, 3, 4, 5, 6, 7, 8], + "values": [0, 1, 2, 3, 4, 5, 6, 7], + } + ) + + result = df.pivot(index=input_index, columns=input_columns, values=input_values) + expected = pd.DataFrame( + expected_values, columns=expected_columns, index=expected_index + ) + tm.assert_frame_equal(result, expected) + + +def test_pivot_multiindexed_rows_and_cols(using_array_manager): + # GH 36360 + + df = pd.DataFrame( + data=np.arange(12).reshape(4, 3), + columns=MultiIndex.from_tuples( + [(0, 0), (0, 1), (0, 2)], names=["col_L0", "col_L1"] + ), + index=MultiIndex.from_tuples( + [(0, 0, 0), (0, 0, 1), (1, 1, 1), (1, 0, 0)], + names=["idx_L0", "idx_L1", "idx_L2"], + ), + ) + + res = df.pivot_table( + index=["idx_L0"], + columns=["idx_L1"], + values=[(0, 1)], + aggfunc=lambda col: col.values.sum(), + ) + + expected = pd.DataFrame( + data=[[5, np.nan], [10, 7.0]], + columns=MultiIndex.from_tuples( + [(0, 1, 0), (0, 1, 1)], names=["col_L0", "col_L1", "idx_L1"] + ), + index=Index([0, 1], dtype="int64", name="idx_L0"), + ) + if not using_array_manager: + # BlockManager does not preserve the dtypes + expected = expected.astype("float64") + + tm.assert_frame_equal(res, expected) + + +def test_pivot_df_multiindex_index_none(): + # GH 23955 + df = pd.DataFrame( + [ + ["A", "A1", "label1", 1], + ["A", "A2", "label2", 2], + ["B", "A1", "label1", 3], + ["B", "A2", "label2", 4], + ], + columns=["index_1", "index_2", "label", "value"], + ) + df = df.set_index(["index_1", "index_2"]) + + result = df.pivot(index=None, columns="label", values="value") + expected = pd.DataFrame( + [[1.0, np.nan], [np.nan, 2.0], [3.0, np.nan], [np.nan, 4.0]], + index=df.index, + columns=Index(["label1", "label2"], name="label"), + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_qcut.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_qcut.py new file mode 100644 index 0000000..f7c7204 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_qcut.py @@ -0,0 +1,302 @@ +import os + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DatetimeIndex, + Interval, + IntervalIndex, + NaT, + Series, + TimedeltaIndex, + Timestamp, + cut, + date_range, + isna, + qcut, + timedelta_range, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype as CDT + +from pandas.tseries.offsets import ( + Day, + Nano, +) + + +def test_qcut(): + arr = np.random.randn(1000) + + # We store the bins as Index that have been + # rounded to comparisons are a bit tricky. + labels, _ = qcut(arr, 4, retbins=True) + ex_bins = np.quantile(arr, [0, 0.25, 0.5, 0.75, 1.0]) + + result = labels.categories.left.values + assert np.allclose(result, ex_bins[:-1], atol=1e-2) + + result = labels.categories.right.values + assert np.allclose(result, ex_bins[1:], atol=1e-2) + + ex_levels = cut(arr, ex_bins, include_lowest=True) + tm.assert_categorical_equal(labels, ex_levels) + + +def test_qcut_bounds(): + arr = np.random.randn(1000) + + factor = qcut(arr, 10, labels=False) + assert len(np.unique(factor)) == 10 + + +def test_qcut_specify_quantiles(): + arr = np.random.randn(100) + factor = qcut(arr, [0, 0.25, 0.5, 0.75, 1.0]) + + expected = qcut(arr, 4) + tm.assert_categorical_equal(factor, expected) + + +def test_qcut_all_bins_same(): + with pytest.raises(ValueError, match="edges.*unique"): + qcut([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3) + + +def test_qcut_include_lowest(): + values = np.arange(10) + ii = qcut(values, 4) + + ex_levels = IntervalIndex( + [ + Interval(-0.001, 2.25), + Interval(2.25, 4.5), + Interval(4.5, 6.75), + Interval(6.75, 9), + ] + ) + tm.assert_index_equal(ii.categories, ex_levels) + + +def test_qcut_nas(): + arr = np.random.randn(100) + arr[:20] = np.nan + + result = qcut(arr, 4) + assert isna(result[:20]).all() + + +def test_qcut_index(): + result = qcut([0, 2], 2) + intervals = [Interval(-0.001, 1), Interval(1, 2)] + + expected = Categorical(intervals, ordered=True) + tm.assert_categorical_equal(result, expected) + + +def test_qcut_binning_issues(datapath): + # see gh-1978, gh-1979 + cut_file = datapath(os.path.join("reshape", "data", "cut_data.csv")) + arr = np.loadtxt(cut_file) + result = qcut(arr, 20) + + starts = [] + ends = [] + + for lev in np.unique(result): + s = lev.left + e = lev.right + assert s != e + + starts.append(float(s)) + ends.append(float(e)) + + for (sp, sn), (ep, en) in zip( + zip(starts[:-1], starts[1:]), zip(ends[:-1], ends[1:]) + ): + assert sp < sn + assert ep < en + assert ep <= sn + + +def test_qcut_return_intervals(): + ser = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) + res = qcut(ser, [0, 0.333, 0.666, 1]) + + exp_levels = np.array( + [Interval(-0.001, 2.664), Interval(2.664, 5.328), Interval(5.328, 8)] + ) + exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(CDT(ordered=True)) + tm.assert_series_equal(res, exp) + + +@pytest.mark.parametrize("labels", ["foo", 1, True]) +def test_qcut_incorrect_labels(labels): + # GH 13318 + values = range(5) + msg = "Bin labels must either be False, None or passed in as a list-like argument" + with pytest.raises(ValueError, match=msg): + qcut(values, 4, labels=labels) + + +@pytest.mark.parametrize("labels", [["a", "b", "c"], list(range(3))]) +def test_qcut_wrong_length_labels(labels): + # GH 13318 + values = range(10) + msg = "Bin labels must be one fewer than the number of bin edges" + with pytest.raises(ValueError, match=msg): + qcut(values, 4, labels=labels) + + +@pytest.mark.parametrize( + "labels, expected", + [ + (["a", "b", "c"], Categorical(["a", "b", "c"], ordered=True)), + (list(range(3)), Categorical([0, 1, 2], ordered=True)), + ], +) +def test_qcut_list_like_labels(labels, expected): + # GH 13318 + values = range(3) + result = qcut(values, 3, labels=labels) + tm.assert_categorical_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,msg", + [ + ({"duplicates": "drop"}, None), + ({}, "Bin edges must be unique"), + ({"duplicates": "raise"}, "Bin edges must be unique"), + ({"duplicates": "foo"}, "invalid value for 'duplicates' parameter"), + ], +) +def test_qcut_duplicates_bin(kwargs, msg): + # see gh-7751 + values = [0, 0, 0, 0, 1, 2, 3] + + if msg is not None: + with pytest.raises(ValueError, match=msg): + qcut(values, 3, **kwargs) + else: + result = qcut(values, 3, **kwargs) + expected = IntervalIndex([Interval(-0.001, 1), Interval(1, 3)]) + tm.assert_index_equal(result.categories, expected) + + +@pytest.mark.parametrize( + "data,start,end", [(9.0, 8.999, 9.0), (0.0, -0.001, 0.0), (-9.0, -9.001, -9.0)] +) +@pytest.mark.parametrize("length", [1, 2]) +@pytest.mark.parametrize("labels", [None, False]) +def test_single_quantile(data, start, end, length, labels): + # see gh-15431 + ser = Series([data] * length) + result = qcut(ser, 1, labels=labels) + + if labels is None: + intervals = IntervalIndex([Interval(start, end)] * length, closed="right") + expected = Series(intervals).astype(CDT(ordered=True)) + else: + expected = Series([0] * length, dtype=np.intp) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "ser", + [ + Series(DatetimeIndex(["20180101", NaT, "20180103"])), + Series(TimedeltaIndex(["0 days", NaT, "2 days"])), + ], + ids=lambda x: str(x.dtype), +) +def test_qcut_nat(ser): + # see gh-19768 + intervals = IntervalIndex.from_tuples( + [(ser[0] - Nano(), ser[2] - Day()), np.nan, (ser[2] - Day(), ser[2])] + ) + expected = Series(Categorical(intervals, ordered=True)) + + result = qcut(ser, 2) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("bins", [3, np.linspace(0, 1, 4)]) +def test_datetime_tz_qcut(bins): + # see gh-19872 + tz = "US/Eastern" + ser = Series(date_range("20130101", periods=3, tz=tz)) + + result = qcut(ser, bins) + expected = Series( + IntervalIndex( + [ + Interval( + Timestamp("2012-12-31 23:59:59.999999999", tz=tz), + Timestamp("2013-01-01 16:00:00", tz=tz), + ), + Interval( + Timestamp("2013-01-01 16:00:00", tz=tz), + Timestamp("2013-01-02 08:00:00", tz=tz), + ), + Interval( + Timestamp("2013-01-02 08:00:00", tz=tz), + Timestamp("2013-01-03 00:00:00", tz=tz), + ), + ] + ) + ).astype(CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "arg,expected_bins", + [ + [ + timedelta_range("1day", periods=3), + TimedeltaIndex(["1 days", "2 days", "3 days"]), + ], + [ + date_range("20180101", periods=3), + DatetimeIndex(["2018-01-01", "2018-01-02", "2018-01-03"]), + ], + ], +) +def test_date_like_qcut_bins(arg, expected_bins): + # see gh-19891 + ser = Series(arg) + result, result_bins = qcut(ser, 2, retbins=True) + tm.assert_index_equal(result_bins, expected_bins) + + +@pytest.mark.parametrize("bins", [6, 7]) +@pytest.mark.parametrize( + "box, compare", + [ + (Series, tm.assert_series_equal), + (np.array, tm.assert_categorical_equal), + (list, tm.assert_equal), + ], +) +def test_qcut_bool_coercion_to_int(bins, box, compare): + # issue 20303 + data_expected = box([0, 1, 1, 0, 1] * 10) + data_result = box([False, True, True, False, True] * 10) + expected = qcut(data_expected, bins, duplicates="drop") + result = qcut(data_result, bins, duplicates="drop") + compare(result, expected) + + +@pytest.mark.parametrize("q", [2, 5, 10]) +def test_qcut_nullable_integer(q, any_numeric_ea_dtype): + arr = pd.array(np.arange(100), dtype=any_numeric_ea_dtype) + arr[::2] = pd.NA + + result = qcut(arr, q) + expected = qcut(arr.astype(float), q) + + tm.assert_categorical_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_union_categoricals.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_union_categoricals.py new file mode 100644 index 0000000..f39b5de --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_union_categoricals.py @@ -0,0 +1,354 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.concat import union_categoricals + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + Series, +) +import pandas._testing as tm + + +class TestUnionCategoricals: + def test_union_categorical(self): + # GH 13361 + data = [ + (list("abc"), list("abd"), list("abcabd")), + ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]), + ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]), + ( + ["b", "b", np.nan, "a"], + ["a", np.nan, "c"], + ["b", "b", np.nan, "a", "a", np.nan, "c"], + ), + ( + pd.date_range("2014-01-01", "2014-01-05"), + pd.date_range("2014-01-06", "2014-01-07"), + pd.date_range("2014-01-01", "2014-01-07"), + ), + ( + pd.date_range("2014-01-01", "2014-01-05", tz="US/Central"), + pd.date_range("2014-01-06", "2014-01-07", tz="US/Central"), + pd.date_range("2014-01-01", "2014-01-07", tz="US/Central"), + ), + ( + pd.period_range("2014-01-01", "2014-01-05"), + pd.period_range("2014-01-06", "2014-01-07"), + pd.period_range("2014-01-01", "2014-01-07"), + ), + ] + + for a, b, combined in data: + for box in [Categorical, CategoricalIndex, Series]: + result = union_categoricals([box(Categorical(a)), box(Categorical(b))]) + expected = Categorical(combined) + tm.assert_categorical_equal(result, expected) + + # new categories ordered by appearance + s = Categorical(["x", "y", "z"]) + s2 = Categorical(["a", "b", "c"]) + result = union_categoricals([s, s2]) + expected = Categorical( + ["x", "y", "z", "a", "b", "c"], categories=["x", "y", "z", "a", "b", "c"] + ) + tm.assert_categorical_equal(result, expected) + + s = Categorical([0, 1.2, 2], ordered=True) + s2 = Categorical([0, 1.2, 2], ordered=True) + result = union_categoricals([s, s2]) + expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True) + tm.assert_categorical_equal(result, expected) + + # must exactly match types + s = Categorical([0, 1.2, 2]) + s2 = Categorical([2, 3, 4]) + msg = "dtype of categories must be the same" + with pytest.raises(TypeError, match=msg): + union_categoricals([s, s2]) + + msg = "No Categoricals to union" + with pytest.raises(ValueError, match=msg): + union_categoricals([]) + + def test_union_categoricals_nan(self): + # GH 13759 + res = union_categoricals( + [Categorical([1, 2, np.nan]), Categorical([3, 2, np.nan])] + ) + exp = Categorical([1, 2, np.nan, 3, 2, np.nan]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals( + [Categorical(["A", "B"]), Categorical(["B", "B", np.nan])] + ) + exp = Categorical(["A", "B", "B", "B", np.nan]) + tm.assert_categorical_equal(res, exp) + + val1 = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-03-01"), pd.NaT] + val2 = [pd.NaT, pd.Timestamp("2011-01-01"), pd.Timestamp("2011-02-01")] + + res = union_categoricals([Categorical(val1), Categorical(val2)]) + exp = Categorical( + val1 + val2, + categories=[ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-03-01"), + pd.Timestamp("2011-02-01"), + ], + ) + tm.assert_categorical_equal(res, exp) + + # all NaN + res = union_categoricals( + [ + Categorical(np.array([np.nan, np.nan], dtype=object)), + Categorical(["X"]), + ] + ) + exp = Categorical([np.nan, np.nan, "X"]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals( + [Categorical([np.nan, np.nan]), Categorical([np.nan, np.nan])] + ) + exp = Categorical([np.nan, np.nan, np.nan, np.nan]) + tm.assert_categorical_equal(res, exp) + + def test_union_categoricals_empty(self): + # GH 13759 + res = union_categoricals([Categorical([]), Categorical([])]) + exp = Categorical([]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([Categorical([]), Categorical(["1"])]) + exp = Categorical(["1"]) + tm.assert_categorical_equal(res, exp) + + def test_union_categorical_same_category(self): + # check fastpath + c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) + c2 = Categorical([3, 2, 1, np.nan], categories=[1, 2, 3, 4]) + res = union_categoricals([c1, c2]) + exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan], categories=[1, 2, 3, 4]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical(["z", "z", "z"], categories=["x", "y", "z"]) + c2 = Categorical(["x", "x", "x"], categories=["x", "y", "z"]) + res = union_categoricals([c1, c2]) + exp = Categorical(["z", "z", "z", "x", "x", "x"], categories=["x", "y", "z"]) + tm.assert_categorical_equal(res, exp) + + def test_union_categorical_same_categories_different_order(self): + # https://github.com/pandas-dev/pandas/issues/19096 + c1 = Categorical(["a", "b", "c"], categories=["a", "b", "c"]) + c2 = Categorical(["a", "b", "c"], categories=["b", "a", "c"]) + result = union_categoricals([c1, c2]) + expected = Categorical( + ["a", "b", "c", "a", "b", "c"], categories=["a", "b", "c"] + ) + tm.assert_categorical_equal(result, expected) + + def test_union_categoricals_ordered(self): + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], ordered=False) + + msg = "Categorical.ordered must be the same" + with pytest.raises(TypeError, match=msg): + union_categoricals([c1, c2]) + + res = union_categoricals([c1, c1]) + exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3, np.nan], ordered=True) + c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) + + res = union_categoricals([c1, c2]) + exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) + + msg = "to union ordered Categoricals, all categories must be the same" + with pytest.raises(TypeError, match=msg): + union_categoricals([c1, c2]) + + def test_union_categoricals_ignore_order(self): + # GH 15219 + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], ordered=False) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + msg = "Categorical.ordered must be the same" + with pytest.raises(TypeError, match=msg): + union_categoricals([c1, c2], ignore_order=False) + + res = union_categoricals([c1, c1], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([c1, c1], ignore_order=False) + exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3, np.nan], ordered=True) + c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, np.nan, 3, 2]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([c2, c1], ignore_order=True, sort_categories=True) + exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([4, 5, 6], ordered=True) + result = union_categoricals([c1, c2], ignore_order=True) + expected = Categorical([1, 2, 3, 4, 5, 6]) + tm.assert_categorical_equal(result, expected) + + msg = "to union ordered Categoricals, all categories must be the same" + with pytest.raises(TypeError, match=msg): + union_categoricals([c1, c2], ignore_order=False) + + with pytest.raises(TypeError, match=msg): + union_categoricals([c1, c2]) + + def test_union_categoricals_sort(self): + # GH 13846 + c1 = Categorical(["x", "y", "z"]) + c2 = Categorical(["a", "b", "c"]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical( + ["x", "y", "z", "a", "b", "c"], categories=["a", "b", "c", "x", "y", "z"] + ) + tm.assert_categorical_equal(result, expected) + + # fastpath + c1 = Categorical(["a", "b"], categories=["b", "a", "c"]) + c2 = Categorical(["b", "c"], categories=["b", "a", "c"]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(["a", "b"], categories=["c", "a", "b"]) + c2 = Categorical(["b", "c"], categories=["c", "a", "b"]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + # fastpath - skip resort + c1 = Categorical(["a", "b"], categories=["a", "b", "c"]) + c2 = Categorical(["b", "c"], categories=["a", "b", "c"]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(["x", np.nan]) + c2 = Categorical([np.nan, "b"]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(["x", np.nan, np.nan, "b"], categories=["b", "x"]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([np.nan]) + c2 = Categorical([np.nan]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical([np.nan, np.nan]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([]) + c2 = Categorical([]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical([]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(["b", "a"], categories=["b", "a", "c"], ordered=True) + c2 = Categorical(["a", "c"], categories=["b", "a", "c"], ordered=True) + msg = "Cannot use sort_categories=True with ordered Categoricals" + with pytest.raises(TypeError, match=msg): + union_categoricals([c1, c2], sort_categories=True) + + def test_union_categoricals_sort_false(self): + # GH 13846 + c1 = Categorical(["x", "y", "z"]) + c2 = Categorical(["a", "b", "c"]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical( + ["x", "y", "z", "a", "b", "c"], categories=["x", "y", "z", "a", "b", "c"] + ) + tm.assert_categorical_equal(result, expected) + + # fastpath + c1 = Categorical(["a", "b"], categories=["b", "a", "c"]) + c2 = Categorical(["b", "c"], categories=["b", "a", "c"]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(["a", "b", "b", "c"], categories=["b", "a", "c"]) + tm.assert_categorical_equal(result, expected) + + # fastpath - skip resort + c1 = Categorical(["a", "b"], categories=["a", "b", "c"]) + c2 = Categorical(["b", "c"], categories=["a", "b", "c"]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(["x", np.nan]) + c2 = Categorical([np.nan, "b"]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(["x", np.nan, np.nan, "b"], categories=["x", "b"]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([np.nan]) + c2 = Categorical([np.nan]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical([np.nan, np.nan]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([]) + c2 = Categorical([]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical([]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(["b", "a"], categories=["b", "a", "c"], ordered=True) + c2 = Categorical(["a", "c"], categories=["b", "a", "c"], ordered=True) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical( + ["b", "a", "a", "c"], categories=["b", "a", "c"], ordered=True + ) + tm.assert_categorical_equal(result, expected) + + def test_union_categorical_unwrap(self): + # GH 14173 + c1 = Categorical(["a", "b"]) + c2 = Series(["b", "c"], dtype="category") + result = union_categoricals([c1, c2]) + expected = Categorical(["a", "b", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + c2 = CategoricalIndex(c2) + result = union_categoricals([c1, c2]) + tm.assert_categorical_equal(result, expected) + + c1 = Series(c1) + result = union_categoricals([c1, c2]) + tm.assert_categorical_equal(result, expected) + + msg = "all components to combine must be Categorical" + with pytest.raises(TypeError, match=msg): + union_categoricals([c1, ["a", "b", "c"]]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_util.py b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_util.py new file mode 100644 index 0000000..1ebe96a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/reshape/test_util.py @@ -0,0 +1,80 @@ +import numpy as np +import pytest + +from pandas import ( + Index, + date_range, +) +import pandas._testing as tm +from pandas.core.reshape.util import cartesian_product + + +class TestCartesianProduct: + def test_simple(self): + x, y = list("ABC"), [1, 22] + result1, result2 = cartesian_product([x, y]) + expected1 = np.array(["A", "A", "B", "B", "C", "C"]) + expected2 = np.array([1, 22, 1, 22, 1, 22]) + tm.assert_numpy_array_equal(result1, expected1) + tm.assert_numpy_array_equal(result2, expected2) + + def test_datetimeindex(self): + # regression test for GitHub issue #6439 + # make sure that the ordering on datetimeindex is consistent + x = date_range("2000-01-01", periods=2) + result1, result2 = (Index(y).day for y in cartesian_product([x, x])) + expected1 = Index([1, 1, 2, 2]) + expected2 = Index([1, 2, 1, 2]) + tm.assert_index_equal(result1, expected1) + tm.assert_index_equal(result2, expected2) + + def test_tzaware_retained(self): + x = date_range("2000-01-01", periods=2, tz="US/Pacific") + y = np.array([3, 4]) + result1, result2 = cartesian_product([x, y]) + + expected = x.repeat(2) + tm.assert_index_equal(result1, expected) + + def test_tzaware_retained_categorical(self): + x = date_range("2000-01-01", periods=2, tz="US/Pacific").astype("category") + y = np.array([3, 4]) + result1, result2 = cartesian_product([x, y]) + + expected = x.repeat(2) + tm.assert_index_equal(result1, expected) + + def test_empty(self): + # product of empty factors + X = [[], [0, 1], []] + Y = [[], [], ["a", "b", "c"]] + for x, y in zip(X, Y): + expected1 = np.array([], dtype=np.asarray(x).dtype) + expected2 = np.array([], dtype=np.asarray(y).dtype) + result1, result2 = cartesian_product([x, y]) + tm.assert_numpy_array_equal(result1, expected1) + tm.assert_numpy_array_equal(result2, expected2) + + # empty product (empty input): + result = cartesian_product([]) + expected = [] + assert result == expected + + @pytest.mark.parametrize( + "X", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]] + ) + def test_invalid_input(self, X): + msg = "Input must be a list-like of list-likes" + + with pytest.raises(TypeError, match=msg): + cartesian_product(X=X) + + def test_exceed_product_space(self): + # GH31355: raise useful error when produce space is too large + msg = "Product space too large to allocate arrays!" + + with pytest.raises(ValueError, match=msg): + dims = [np.arange(0, 22, dtype=np.int16) for i in range(12)] + [ + (np.arange(15128, dtype=np.int16)), + ] + cartesian_product(X=dims) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..9d8c826 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/__pycache__/test_na_scalar.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/__pycache__/test_na_scalar.cpython-38.pyc new file mode 100644 index 0000000..2d3c77b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/__pycache__/test_na_scalar.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/__pycache__/test_nat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/__pycache__/test_nat.cpython-38.pyc new file mode 100644 index 0000000..9e4f244 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/__pycache__/test_nat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..5e60d30 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/__pycache__/test_arithmetic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/__pycache__/test_arithmetic.cpython-38.pyc new file mode 100644 index 0000000..116fb72 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/__pycache__/test_arithmetic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/__pycache__/test_interval.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/__pycache__/test_interval.cpython-38.pyc new file mode 100644 index 0000000..7b4d614 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/__pycache__/test_interval.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/__pycache__/test_ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/__pycache__/test_ops.cpython-38.pyc new file mode 100644 index 0000000..6abcaa9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/__pycache__/test_ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/test_arithmetic.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/test_arithmetic.py new file mode 100644 index 0000000..987f7d5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/test_arithmetic.py @@ -0,0 +1,63 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas import ( + Interval, + Timedelta, + Timestamp, +) + + +@pytest.mark.parametrize("method", ["__add__", "__sub__"]) +@pytest.mark.parametrize( + "interval", + [ + Interval(Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00")), + Interval(Timedelta(days=7), Timedelta(days=14)), + ], +) +@pytest.mark.parametrize( + "delta", [Timedelta(days=7), timedelta(7), np.timedelta64(7, "D")] +) +def test_time_interval_add_subtract_timedelta(interval, delta, method): + # https://github.com/pandas-dev/pandas/issues/32023 + result = getattr(interval, method)(delta) + left = getattr(interval.left, method)(delta) + right = getattr(interval.right, method)(delta) + expected = Interval(left, right) + + assert result == expected + + +@pytest.mark.parametrize("interval", [Interval(1, 2), Interval(1.0, 2.0)]) +@pytest.mark.parametrize( + "delta", [Timedelta(days=7), timedelta(7), np.timedelta64(7, "D")] +) +def test_numeric_interval_add_timedelta_raises(interval, delta): + # https://github.com/pandas-dev/pandas/issues/32023 + msg = "|".join( + [ + "unsupported operand", + "cannot use operands", + "Only numeric, Timestamp and Timedelta endpoints are allowed", + ] + ) + with pytest.raises((TypeError, ValueError), match=msg): + interval + delta + + with pytest.raises((TypeError, ValueError), match=msg): + delta + interval + + +@pytest.mark.parametrize("klass", [timedelta, np.timedelta64, Timedelta]) +def test_timdelta_add_timestamp_interval(klass): + delta = klass(0) + expected = Interval(Timestamp("2020-01-01"), Timestamp("2020-02-01")) + + result = delta + expected + assert result == expected + + result = expected + delta + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/test_interval.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/test_interval.py new file mode 100644 index 0000000..1f76a7d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/test_interval.py @@ -0,0 +1,283 @@ +import numpy as np +import pytest + +from pandas import ( + Interval, + Period, + Timedelta, + Timestamp, +) +import pandas._testing as tm +import pandas.core.common as com + + +@pytest.fixture +def interval(): + return Interval(0, 1) + + +class TestInterval: + def test_properties(self, interval): + assert interval.closed == "right" + assert interval.left == 0 + assert interval.right == 1 + assert interval.mid == 0.5 + + def test_repr(self, interval): + assert repr(interval) == "Interval(0, 1, closed='right')" + assert str(interval) == "(0, 1]" + + interval_left = Interval(0, 1, closed="left") + assert repr(interval_left) == "Interval(0, 1, closed='left')" + assert str(interval_left) == "[0, 1)" + + def test_contains(self, interval): + assert 0.5 in interval + assert 1 in interval + assert 0 not in interval + + msg = "__contains__ not defined for two intervals" + with pytest.raises(TypeError, match=msg): + interval in interval + + interval_both = Interval(0, 1, closed="both") + assert 0 in interval_both + assert 1 in interval_both + + interval_neither = Interval(0, 1, closed="neither") + assert 0 not in interval_neither + assert 0.5 in interval_neither + assert 1 not in interval_neither + + def test_equal(self): + assert Interval(0, 1) == Interval(0, 1, closed="right") + assert Interval(0, 1) != Interval(0, 1, closed="left") + assert Interval(0, 1) != 0 + + def test_comparison(self): + msg = ( + "'<' not supported between instances of " + "'pandas._libs.interval.Interval' and 'int'" + ) + with pytest.raises(TypeError, match=msg): + Interval(0, 1) < 2 + + assert Interval(0, 1) < Interval(1, 2) + assert Interval(0, 1) < Interval(0, 2) + assert Interval(0, 1) < Interval(0.5, 1.5) + assert Interval(0, 1) <= Interval(0, 1) + assert Interval(0, 1) > Interval(-1, 2) + assert Interval(0, 1) >= Interval(0, 1) + + def test_hash(self, interval): + # should not raise + hash(interval) + + @pytest.mark.parametrize( + "left, right, expected", + [ + (0, 5, 5), + (-2, 5.5, 7.5), + (10, 10, 0), + (10, np.inf, np.inf), + (-np.inf, -5, np.inf), + (-np.inf, np.inf, np.inf), + (Timedelta("0 days"), Timedelta("5 days"), Timedelta("5 days")), + (Timedelta("10 days"), Timedelta("10 days"), Timedelta("0 days")), + (Timedelta("1H10min"), Timedelta("5H5min"), Timedelta("3H55min")), + (Timedelta("5S"), Timedelta("1H"), Timedelta("59min55S")), + ], + ) + def test_length(self, left, right, expected): + # GH 18789 + iv = Interval(left, right) + result = iv.length + assert result == expected + + @pytest.mark.parametrize( + "left, right, expected", + [ + ("2017-01-01", "2017-01-06", "5 days"), + ("2017-01-01", "2017-01-01 12:00:00", "12 hours"), + ("2017-01-01 12:00", "2017-01-01 12:00:00", "0 days"), + ("2017-01-01 12:01", "2017-01-05 17:31:00", "4 days 5 hours 30 min"), + ], + ) + @pytest.mark.parametrize("tz", (None, "UTC", "CET", "US/Eastern")) + def test_length_timestamp(self, tz, left, right, expected): + # GH 18789 + iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz)) + result = iv.length + expected = Timedelta(expected) + assert result == expected + + @pytest.mark.parametrize( + "left, right", + [ + (0, 1), + (Timedelta("0 days"), Timedelta("1 day")), + (Timestamp("2018-01-01"), Timestamp("2018-01-02")), + ( + Timestamp("2018-01-01", tz="US/Eastern"), + Timestamp("2018-01-02", tz="US/Eastern"), + ), + ], + ) + def test_is_empty(self, left, right, closed): + # GH27219 + # non-empty always return False + iv = Interval(left, right, closed) + assert iv.is_empty is False + + # same endpoint is empty except when closed='both' (contains one point) + iv = Interval(left, left, closed) + result = iv.is_empty + expected = closed != "both" + assert result is expected + + @pytest.mark.parametrize( + "left, right", + [ + ("a", "z"), + (("a", "b"), ("c", "d")), + (list("AB"), list("ab")), + (Interval(0, 1), Interval(1, 2)), + (Period("2018Q1", freq="Q"), Period("2018Q1", freq="Q")), + ], + ) + def test_construct_errors(self, left, right): + # GH 23013 + msg = "Only numeric, Timestamp and Timedelta endpoints are allowed" + with pytest.raises(ValueError, match=msg): + Interval(left, right) + + def test_math_add(self, closed): + interval = Interval(0, 1, closed=closed) + expected = Interval(1, 2, closed=closed) + + result = interval + 1 + assert result == expected + + result = 1 + interval + assert result == expected + + result = interval + result += 1 + assert result == expected + + msg = r"unsupported operand type\(s\) for \+" + with pytest.raises(TypeError, match=msg): + interval + interval + + with pytest.raises(TypeError, match=msg): + interval + "foo" + + def test_math_sub(self, closed): + interval = Interval(0, 1, closed=closed) + expected = Interval(-1, 0, closed=closed) + + result = interval - 1 + assert result == expected + + result = interval + result -= 1 + assert result == expected + + msg = r"unsupported operand type\(s\) for -" + with pytest.raises(TypeError, match=msg): + interval - interval + + with pytest.raises(TypeError, match=msg): + interval - "foo" + + def test_math_mult(self, closed): + interval = Interval(0, 1, closed=closed) + expected = Interval(0, 2, closed=closed) + + result = interval * 2 + assert result == expected + + result = 2 * interval + assert result == expected + + result = interval + result *= 2 + assert result == expected + + msg = r"unsupported operand type\(s\) for \*" + with pytest.raises(TypeError, match=msg): + interval * interval + + msg = r"can\'t multiply sequence by non-int" + with pytest.raises(TypeError, match=msg): + interval * "foo" + + def test_math_div(self, closed): + interval = Interval(0, 1, closed=closed) + expected = Interval(0, 0.5, closed=closed) + + result = interval / 2.0 + assert result == expected + + result = interval + result /= 2.0 + assert result == expected + + msg = r"unsupported operand type\(s\) for /" + with pytest.raises(TypeError, match=msg): + interval / interval + + with pytest.raises(TypeError, match=msg): + interval / "foo" + + def test_math_floordiv(self, closed): + interval = Interval(1, 2, closed=closed) + expected = Interval(0, 1, closed=closed) + + result = interval // 2 + assert result == expected + + result = interval + result //= 2 + assert result == expected + + msg = r"unsupported operand type\(s\) for //" + with pytest.raises(TypeError, match=msg): + interval // interval + + with pytest.raises(TypeError, match=msg): + interval // "foo" + + def test_constructor_errors(self): + msg = "invalid option for 'closed': foo" + with pytest.raises(ValueError, match=msg): + Interval(0, 1, closed="foo") + + msg = "left side of interval must be <= right side" + with pytest.raises(ValueError, match=msg): + Interval(1, 0) + + @pytest.mark.parametrize( + "tz_left, tz_right", [(None, "UTC"), ("UTC", None), ("UTC", "US/Eastern")] + ) + def test_constructor_errors_tz(self, tz_left, tz_right): + # GH 18538 + left = Timestamp("2017-01-01", tz=tz_left) + right = Timestamp("2017-01-02", tz=tz_right) + + if com.any_none(tz_left, tz_right): + error = TypeError + msg = "Cannot compare tz-naive and tz-aware timestamps" + else: + error = ValueError + msg = "left and right must have the same time zone" + with pytest.raises(error, match=msg): + Interval(left, right) + + def test_equality_comparison_broadcasts_over_array(self): + # https://github.com/pandas-dev/pandas/issues/35931 + interval = Interval(0, 1) + arr = np.array([interval, interval]) + result = interval == arr + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/test_ops.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/test_ops.py new file mode 100644 index 0000000..9fe40c2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/interval/test_ops.py @@ -0,0 +1,68 @@ +"""Tests for Interval-Interval operations, such as overlaps, contains, etc.""" +import pytest + +from pandas import ( + Interval, + Timedelta, + Timestamp, +) + + +@pytest.fixture( + params=[ + (Timedelta("0 days"), Timedelta("1 day")), + (Timestamp("2018-01-01"), Timedelta("1 day")), + (0, 1), + ], + ids=lambda x: type(x[0]).__name__, +) +def start_shift(request): + """ + Fixture for generating intervals of types from a start value and a shift + value that can be added to start to generate an endpoint + """ + return request.param + + +class TestOverlaps: + def test_overlaps_self(self, start_shift, closed): + start, shift = start_shift + interval = Interval(start, start + shift, closed) + assert interval.overlaps(interval) + + def test_overlaps_nested(self, start_shift, closed, other_closed): + start, shift = start_shift + interval1 = Interval(start, start + 3 * shift, other_closed) + interval2 = Interval(start + shift, start + 2 * shift, closed) + + # nested intervals should always overlap + assert interval1.overlaps(interval2) + + def test_overlaps_disjoint(self, start_shift, closed, other_closed): + start, shift = start_shift + interval1 = Interval(start, start + shift, other_closed) + interval2 = Interval(start + 2 * shift, start + 3 * shift, closed) + + # disjoint intervals should never overlap + assert not interval1.overlaps(interval2) + + def test_overlaps_endpoint(self, start_shift, closed, other_closed): + start, shift = start_shift + interval1 = Interval(start, start + shift, other_closed) + interval2 = Interval(start + shift, start + 2 * shift, closed) + + # overlap if shared endpoint is closed for both (overlap at a point) + result = interval1.overlaps(interval2) + expected = interval1.closed_right and interval2.closed_left + assert result == expected + + @pytest.mark.parametrize( + "other", + [10, True, "foo", Timedelta("1 day"), Timestamp("2018-01-01")], + ids=lambda x: type(x).__name__, + ) + def test_overlaps_invalid_type(self, other): + interval = Interval(0, 1) + msg = f"`other` must be an Interval, got {type(other).__name__}" + with pytest.raises(TypeError, match=msg): + interval.overlaps(other) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..894be2f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/__pycache__/test_asfreq.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/__pycache__/test_asfreq.cpython-38.pyc new file mode 100644 index 0000000..da3ba5b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/__pycache__/test_asfreq.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/__pycache__/test_period.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/__pycache__/test_period.cpython-38.pyc new file mode 100644 index 0000000..bf1f4f1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/__pycache__/test_period.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/test_asfreq.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/test_asfreq.py new file mode 100644 index 0000000..386ab41 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/test_asfreq.py @@ -0,0 +1,801 @@ +import pytest + +from pandas._libs.tslibs.dtypes import _period_code_map +from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG +from pandas.errors import OutOfBoundsDatetime + +from pandas import ( + Period, + Timestamp, + offsets, +) + + +class TestFreqConversion: + """Test frequency conversion of date objects""" + + @pytest.mark.parametrize("freq", ["A", "Q", "M", "W", "B", "D"]) + def test_asfreq_near_zero(self, freq): + # GH#19643, GH#19650 + per = Period("0001-01-01", freq=freq) + tup1 = (per.year, per.hour, per.day) + + prev = per - 1 + assert prev.ordinal == per.ordinal - 1 + tup2 = (prev.year, prev.month, prev.day) + assert tup2 < tup1 + + def test_asfreq_near_zero_weekly(self): + # GH#19834 + per1 = Period("0001-01-01", "D") + 6 + per2 = Period("0001-01-01", "D") - 6 + week1 = per1.asfreq("W") + week2 = per2.asfreq("W") + assert week1 != week2 + assert week1.asfreq("D", "E") >= per1 + assert week2.asfreq("D", "S") <= per2 + + def test_to_timestamp_out_of_bounds(self): + # GH#19643, used to incorrectly give Timestamp in 1754 + per = Period("0001-01-01", freq="B") + msg = "Out of bounds nanosecond timestamp" + with pytest.raises(OutOfBoundsDatetime, match=msg): + per.to_timestamp() + + def test_asfreq_corner(self): + val = Period(freq="A", year=2007) + result1 = val.asfreq("5t") + result2 = val.asfreq("t") + expected = Period("2007-12-31 23:59", freq="t") + assert result1.ordinal == expected.ordinal + assert result1.freqstr == "5T" + assert result2.ordinal == expected.ordinal + assert result2.freqstr == "T" + + def test_conv_annual(self): + # frequency conversion tests: from Annual Frequency + + ival_A = Period(freq="A", year=2007) + + ival_AJAN = Period(freq="A-JAN", year=2007) + ival_AJUN = Period(freq="A-JUN", year=2007) + ival_ANOV = Period(freq="A-NOV", year=2007) + + ival_A_to_Q_start = Period(freq="Q", year=2007, quarter=1) + ival_A_to_Q_end = Period(freq="Q", year=2007, quarter=4) + ival_A_to_M_start = Period(freq="M", year=2007, month=1) + ival_A_to_M_end = Period(freq="M", year=2007, month=12) + ival_A_to_W_start = Period(freq="W", year=2007, month=1, day=1) + ival_A_to_W_end = Period(freq="W", year=2007, month=12, day=31) + ival_A_to_B_start = Period(freq="B", year=2007, month=1, day=1) + ival_A_to_B_end = Period(freq="B", year=2007, month=12, day=31) + ival_A_to_D_start = Period(freq="D", year=2007, month=1, day=1) + ival_A_to_D_end = Period(freq="D", year=2007, month=12, day=31) + ival_A_to_H_start = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_A_to_H_end = Period(freq="H", year=2007, month=12, day=31, hour=23) + ival_A_to_T_start = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0 + ) + ival_A_to_T_end = Period( + freq="Min", year=2007, month=12, day=31, hour=23, minute=59 + ) + ival_A_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_A_to_S_end = Period( + freq="S", year=2007, month=12, day=31, hour=23, minute=59, second=59 + ) + + ival_AJAN_to_D_end = Period(freq="D", year=2007, month=1, day=31) + ival_AJAN_to_D_start = Period(freq="D", year=2006, month=2, day=1) + ival_AJUN_to_D_end = Period(freq="D", year=2007, month=6, day=30) + ival_AJUN_to_D_start = Period(freq="D", year=2006, month=7, day=1) + ival_ANOV_to_D_end = Period(freq="D", year=2007, month=11, day=30) + ival_ANOV_to_D_start = Period(freq="D", year=2006, month=12, day=1) + + assert ival_A.asfreq("Q", "S") == ival_A_to_Q_start + assert ival_A.asfreq("Q", "e") == ival_A_to_Q_end + assert ival_A.asfreq("M", "s") == ival_A_to_M_start + assert ival_A.asfreq("M", "E") == ival_A_to_M_end + assert ival_A.asfreq("W", "S") == ival_A_to_W_start + assert ival_A.asfreq("W", "E") == ival_A_to_W_end + assert ival_A.asfreq("B", "S") == ival_A_to_B_start + assert ival_A.asfreq("B", "E") == ival_A_to_B_end + assert ival_A.asfreq("D", "S") == ival_A_to_D_start + assert ival_A.asfreq("D", "E") == ival_A_to_D_end + assert ival_A.asfreq("H", "S") == ival_A_to_H_start + assert ival_A.asfreq("H", "E") == ival_A_to_H_end + assert ival_A.asfreq("min", "S") == ival_A_to_T_start + assert ival_A.asfreq("min", "E") == ival_A_to_T_end + assert ival_A.asfreq("T", "S") == ival_A_to_T_start + assert ival_A.asfreq("T", "E") == ival_A_to_T_end + assert ival_A.asfreq("S", "S") == ival_A_to_S_start + assert ival_A.asfreq("S", "E") == ival_A_to_S_end + + assert ival_AJAN.asfreq("D", "S") == ival_AJAN_to_D_start + assert ival_AJAN.asfreq("D", "E") == ival_AJAN_to_D_end + + assert ival_AJUN.asfreq("D", "S") == ival_AJUN_to_D_start + assert ival_AJUN.asfreq("D", "E") == ival_AJUN_to_D_end + + assert ival_ANOV.asfreq("D", "S") == ival_ANOV_to_D_start + assert ival_ANOV.asfreq("D", "E") == ival_ANOV_to_D_end + + assert ival_A.asfreq("A") == ival_A + + def test_conv_quarterly(self): + # frequency conversion tests: from Quarterly Frequency + + ival_Q = Period(freq="Q", year=2007, quarter=1) + ival_Q_end_of_year = Period(freq="Q", year=2007, quarter=4) + + ival_QEJAN = Period(freq="Q-JAN", year=2007, quarter=1) + ival_QEJUN = Period(freq="Q-JUN", year=2007, quarter=1) + + ival_Q_to_A = Period(freq="A", year=2007) + ival_Q_to_M_start = Period(freq="M", year=2007, month=1) + ival_Q_to_M_end = Period(freq="M", year=2007, month=3) + ival_Q_to_W_start = Period(freq="W", year=2007, month=1, day=1) + ival_Q_to_W_end = Period(freq="W", year=2007, month=3, day=31) + ival_Q_to_B_start = Period(freq="B", year=2007, month=1, day=1) + ival_Q_to_B_end = Period(freq="B", year=2007, month=3, day=30) + ival_Q_to_D_start = Period(freq="D", year=2007, month=1, day=1) + ival_Q_to_D_end = Period(freq="D", year=2007, month=3, day=31) + ival_Q_to_H_start = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_Q_to_H_end = Period(freq="H", year=2007, month=3, day=31, hour=23) + ival_Q_to_T_start = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0 + ) + ival_Q_to_T_end = Period( + freq="Min", year=2007, month=3, day=31, hour=23, minute=59 + ) + ival_Q_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_Q_to_S_end = Period( + freq="S", year=2007, month=3, day=31, hour=23, minute=59, second=59 + ) + + ival_QEJAN_to_D_start = Period(freq="D", year=2006, month=2, day=1) + ival_QEJAN_to_D_end = Period(freq="D", year=2006, month=4, day=30) + + ival_QEJUN_to_D_start = Period(freq="D", year=2006, month=7, day=1) + ival_QEJUN_to_D_end = Period(freq="D", year=2006, month=9, day=30) + + assert ival_Q.asfreq("A") == ival_Q_to_A + assert ival_Q_end_of_year.asfreq("A") == ival_Q_to_A + + assert ival_Q.asfreq("M", "S") == ival_Q_to_M_start + assert ival_Q.asfreq("M", "E") == ival_Q_to_M_end + assert ival_Q.asfreq("W", "S") == ival_Q_to_W_start + assert ival_Q.asfreq("W", "E") == ival_Q_to_W_end + assert ival_Q.asfreq("B", "S") == ival_Q_to_B_start + assert ival_Q.asfreq("B", "E") == ival_Q_to_B_end + assert ival_Q.asfreq("D", "S") == ival_Q_to_D_start + assert ival_Q.asfreq("D", "E") == ival_Q_to_D_end + assert ival_Q.asfreq("H", "S") == ival_Q_to_H_start + assert ival_Q.asfreq("H", "E") == ival_Q_to_H_end + assert ival_Q.asfreq("Min", "S") == ival_Q_to_T_start + assert ival_Q.asfreq("Min", "E") == ival_Q_to_T_end + assert ival_Q.asfreq("S", "S") == ival_Q_to_S_start + assert ival_Q.asfreq("S", "E") == ival_Q_to_S_end + + assert ival_QEJAN.asfreq("D", "S") == ival_QEJAN_to_D_start + assert ival_QEJAN.asfreq("D", "E") == ival_QEJAN_to_D_end + assert ival_QEJUN.asfreq("D", "S") == ival_QEJUN_to_D_start + assert ival_QEJUN.asfreq("D", "E") == ival_QEJUN_to_D_end + + assert ival_Q.asfreq("Q") == ival_Q + + def test_conv_monthly(self): + # frequency conversion tests: from Monthly Frequency + + ival_M = Period(freq="M", year=2007, month=1) + ival_M_end_of_year = Period(freq="M", year=2007, month=12) + ival_M_end_of_quarter = Period(freq="M", year=2007, month=3) + ival_M_to_A = Period(freq="A", year=2007) + ival_M_to_Q = Period(freq="Q", year=2007, quarter=1) + ival_M_to_W_start = Period(freq="W", year=2007, month=1, day=1) + ival_M_to_W_end = Period(freq="W", year=2007, month=1, day=31) + ival_M_to_B_start = Period(freq="B", year=2007, month=1, day=1) + ival_M_to_B_end = Period(freq="B", year=2007, month=1, day=31) + ival_M_to_D_start = Period(freq="D", year=2007, month=1, day=1) + ival_M_to_D_end = Period(freq="D", year=2007, month=1, day=31) + ival_M_to_H_start = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_M_to_H_end = Period(freq="H", year=2007, month=1, day=31, hour=23) + ival_M_to_T_start = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0 + ) + ival_M_to_T_end = Period( + freq="Min", year=2007, month=1, day=31, hour=23, minute=59 + ) + ival_M_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_M_to_S_end = Period( + freq="S", year=2007, month=1, day=31, hour=23, minute=59, second=59 + ) + + assert ival_M.asfreq("A") == ival_M_to_A + assert ival_M_end_of_year.asfreq("A") == ival_M_to_A + assert ival_M.asfreq("Q") == ival_M_to_Q + assert ival_M_end_of_quarter.asfreq("Q") == ival_M_to_Q + + assert ival_M.asfreq("W", "S") == ival_M_to_W_start + assert ival_M.asfreq("W", "E") == ival_M_to_W_end + assert ival_M.asfreq("B", "S") == ival_M_to_B_start + assert ival_M.asfreq("B", "E") == ival_M_to_B_end + assert ival_M.asfreq("D", "S") == ival_M_to_D_start + assert ival_M.asfreq("D", "E") == ival_M_to_D_end + assert ival_M.asfreq("H", "S") == ival_M_to_H_start + assert ival_M.asfreq("H", "E") == ival_M_to_H_end + assert ival_M.asfreq("Min", "S") == ival_M_to_T_start + assert ival_M.asfreq("Min", "E") == ival_M_to_T_end + assert ival_M.asfreq("S", "S") == ival_M_to_S_start + assert ival_M.asfreq("S", "E") == ival_M_to_S_end + + assert ival_M.asfreq("M") == ival_M + + def test_conv_weekly(self): + # frequency conversion tests: from Weekly Frequency + ival_W = Period(freq="W", year=2007, month=1, day=1) + + ival_WSUN = Period(freq="W", year=2007, month=1, day=7) + ival_WSAT = Period(freq="W-SAT", year=2007, month=1, day=6) + ival_WFRI = Period(freq="W-FRI", year=2007, month=1, day=5) + ival_WTHU = Period(freq="W-THU", year=2007, month=1, day=4) + ival_WWED = Period(freq="W-WED", year=2007, month=1, day=3) + ival_WTUE = Period(freq="W-TUE", year=2007, month=1, day=2) + ival_WMON = Period(freq="W-MON", year=2007, month=1, day=1) + + ival_WSUN_to_D_start = Period(freq="D", year=2007, month=1, day=1) + ival_WSUN_to_D_end = Period(freq="D", year=2007, month=1, day=7) + ival_WSAT_to_D_start = Period(freq="D", year=2006, month=12, day=31) + ival_WSAT_to_D_end = Period(freq="D", year=2007, month=1, day=6) + ival_WFRI_to_D_start = Period(freq="D", year=2006, month=12, day=30) + ival_WFRI_to_D_end = Period(freq="D", year=2007, month=1, day=5) + ival_WTHU_to_D_start = Period(freq="D", year=2006, month=12, day=29) + ival_WTHU_to_D_end = Period(freq="D", year=2007, month=1, day=4) + ival_WWED_to_D_start = Period(freq="D", year=2006, month=12, day=28) + ival_WWED_to_D_end = Period(freq="D", year=2007, month=1, day=3) + ival_WTUE_to_D_start = Period(freq="D", year=2006, month=12, day=27) + ival_WTUE_to_D_end = Period(freq="D", year=2007, month=1, day=2) + ival_WMON_to_D_start = Period(freq="D", year=2006, month=12, day=26) + ival_WMON_to_D_end = Period(freq="D", year=2007, month=1, day=1) + + ival_W_end_of_year = Period(freq="W", year=2007, month=12, day=31) + ival_W_end_of_quarter = Period(freq="W", year=2007, month=3, day=31) + ival_W_end_of_month = Period(freq="W", year=2007, month=1, day=31) + ival_W_to_A = Period(freq="A", year=2007) + ival_W_to_Q = Period(freq="Q", year=2007, quarter=1) + ival_W_to_M = Period(freq="M", year=2007, month=1) + + if Period(freq="D", year=2007, month=12, day=31).weekday == 6: + ival_W_to_A_end_of_year = Period(freq="A", year=2007) + else: + ival_W_to_A_end_of_year = Period(freq="A", year=2008) + + if Period(freq="D", year=2007, month=3, day=31).weekday == 6: + ival_W_to_Q_end_of_quarter = Period(freq="Q", year=2007, quarter=1) + else: + ival_W_to_Q_end_of_quarter = Period(freq="Q", year=2007, quarter=2) + + if Period(freq="D", year=2007, month=1, day=31).weekday == 6: + ival_W_to_M_end_of_month = Period(freq="M", year=2007, month=1) + else: + ival_W_to_M_end_of_month = Period(freq="M", year=2007, month=2) + + ival_W_to_B_start = Period(freq="B", year=2007, month=1, day=1) + ival_W_to_B_end = Period(freq="B", year=2007, month=1, day=5) + ival_W_to_D_start = Period(freq="D", year=2007, month=1, day=1) + ival_W_to_D_end = Period(freq="D", year=2007, month=1, day=7) + ival_W_to_H_start = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_W_to_H_end = Period(freq="H", year=2007, month=1, day=7, hour=23) + ival_W_to_T_start = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0 + ) + ival_W_to_T_end = Period( + freq="Min", year=2007, month=1, day=7, hour=23, minute=59 + ) + ival_W_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_W_to_S_end = Period( + freq="S", year=2007, month=1, day=7, hour=23, minute=59, second=59 + ) + + assert ival_W.asfreq("A") == ival_W_to_A + assert ival_W_end_of_year.asfreq("A") == ival_W_to_A_end_of_year + + assert ival_W.asfreq("Q") == ival_W_to_Q + assert ival_W_end_of_quarter.asfreq("Q") == ival_W_to_Q_end_of_quarter + + assert ival_W.asfreq("M") == ival_W_to_M + assert ival_W_end_of_month.asfreq("M") == ival_W_to_M_end_of_month + + assert ival_W.asfreq("B", "S") == ival_W_to_B_start + assert ival_W.asfreq("B", "E") == ival_W_to_B_end + + assert ival_W.asfreq("D", "S") == ival_W_to_D_start + assert ival_W.asfreq("D", "E") == ival_W_to_D_end + + assert ival_WSUN.asfreq("D", "S") == ival_WSUN_to_D_start + assert ival_WSUN.asfreq("D", "E") == ival_WSUN_to_D_end + assert ival_WSAT.asfreq("D", "S") == ival_WSAT_to_D_start + assert ival_WSAT.asfreq("D", "E") == ival_WSAT_to_D_end + assert ival_WFRI.asfreq("D", "S") == ival_WFRI_to_D_start + assert ival_WFRI.asfreq("D", "E") == ival_WFRI_to_D_end + assert ival_WTHU.asfreq("D", "S") == ival_WTHU_to_D_start + assert ival_WTHU.asfreq("D", "E") == ival_WTHU_to_D_end + assert ival_WWED.asfreq("D", "S") == ival_WWED_to_D_start + assert ival_WWED.asfreq("D", "E") == ival_WWED_to_D_end + assert ival_WTUE.asfreq("D", "S") == ival_WTUE_to_D_start + assert ival_WTUE.asfreq("D", "E") == ival_WTUE_to_D_end + assert ival_WMON.asfreq("D", "S") == ival_WMON_to_D_start + assert ival_WMON.asfreq("D", "E") == ival_WMON_to_D_end + + assert ival_W.asfreq("H", "S") == ival_W_to_H_start + assert ival_W.asfreq("H", "E") == ival_W_to_H_end + assert ival_W.asfreq("Min", "S") == ival_W_to_T_start + assert ival_W.asfreq("Min", "E") == ival_W_to_T_end + assert ival_W.asfreq("S", "S") == ival_W_to_S_start + assert ival_W.asfreq("S", "E") == ival_W_to_S_end + + assert ival_W.asfreq("W") == ival_W + + msg = INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + ival_W.asfreq("WK") + + def test_conv_weekly_legacy(self): + # frequency conversion tests: from Weekly Frequency + msg = INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + Period(freq="WK", year=2007, month=1, day=1) + + with pytest.raises(ValueError, match=msg): + Period(freq="WK-SAT", year=2007, month=1, day=6) + with pytest.raises(ValueError, match=msg): + Period(freq="WK-FRI", year=2007, month=1, day=5) + with pytest.raises(ValueError, match=msg): + Period(freq="WK-THU", year=2007, month=1, day=4) + with pytest.raises(ValueError, match=msg): + Period(freq="WK-WED", year=2007, month=1, day=3) + with pytest.raises(ValueError, match=msg): + Period(freq="WK-TUE", year=2007, month=1, day=2) + with pytest.raises(ValueError, match=msg): + Period(freq="WK-MON", year=2007, month=1, day=1) + + def test_conv_business(self): + # frequency conversion tests: from Business Frequency" + + ival_B = Period(freq="B", year=2007, month=1, day=1) + ival_B_end_of_year = Period(freq="B", year=2007, month=12, day=31) + ival_B_end_of_quarter = Period(freq="B", year=2007, month=3, day=30) + ival_B_end_of_month = Period(freq="B", year=2007, month=1, day=31) + ival_B_end_of_week = Period(freq="B", year=2007, month=1, day=5) + + ival_B_to_A = Period(freq="A", year=2007) + ival_B_to_Q = Period(freq="Q", year=2007, quarter=1) + ival_B_to_M = Period(freq="M", year=2007, month=1) + ival_B_to_W = Period(freq="W", year=2007, month=1, day=7) + ival_B_to_D = Period(freq="D", year=2007, month=1, day=1) + ival_B_to_H_start = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_B_to_H_end = Period(freq="H", year=2007, month=1, day=1, hour=23) + ival_B_to_T_start = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0 + ) + ival_B_to_T_end = Period( + freq="Min", year=2007, month=1, day=1, hour=23, minute=59 + ) + ival_B_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_B_to_S_end = Period( + freq="S", year=2007, month=1, day=1, hour=23, minute=59, second=59 + ) + + assert ival_B.asfreq("A") == ival_B_to_A + assert ival_B_end_of_year.asfreq("A") == ival_B_to_A + assert ival_B.asfreq("Q") == ival_B_to_Q + assert ival_B_end_of_quarter.asfreq("Q") == ival_B_to_Q + assert ival_B.asfreq("M") == ival_B_to_M + assert ival_B_end_of_month.asfreq("M") == ival_B_to_M + assert ival_B.asfreq("W") == ival_B_to_W + assert ival_B_end_of_week.asfreq("W") == ival_B_to_W + + assert ival_B.asfreq("D") == ival_B_to_D + + assert ival_B.asfreq("H", "S") == ival_B_to_H_start + assert ival_B.asfreq("H", "E") == ival_B_to_H_end + assert ival_B.asfreq("Min", "S") == ival_B_to_T_start + assert ival_B.asfreq("Min", "E") == ival_B_to_T_end + assert ival_B.asfreq("S", "S") == ival_B_to_S_start + assert ival_B.asfreq("S", "E") == ival_B_to_S_end + + assert ival_B.asfreq("B") == ival_B + + def test_conv_daily(self): + # frequency conversion tests: from Business Frequency" + + ival_D = Period(freq="D", year=2007, month=1, day=1) + ival_D_end_of_year = Period(freq="D", year=2007, month=12, day=31) + ival_D_end_of_quarter = Period(freq="D", year=2007, month=3, day=31) + ival_D_end_of_month = Period(freq="D", year=2007, month=1, day=31) + ival_D_end_of_week = Period(freq="D", year=2007, month=1, day=7) + + ival_D_friday = Period(freq="D", year=2007, month=1, day=5) + ival_D_saturday = Period(freq="D", year=2007, month=1, day=6) + ival_D_sunday = Period(freq="D", year=2007, month=1, day=7) + + ival_B_friday = Period(freq="B", year=2007, month=1, day=5) + ival_B_monday = Period(freq="B", year=2007, month=1, day=8) + + ival_D_to_A = Period(freq="A", year=2007) + + ival_Deoq_to_AJAN = Period(freq="A-JAN", year=2008) + ival_Deoq_to_AJUN = Period(freq="A-JUN", year=2007) + ival_Deoq_to_ADEC = Period(freq="A-DEC", year=2007) + + ival_D_to_QEJAN = Period(freq="Q-JAN", year=2007, quarter=4) + ival_D_to_QEJUN = Period(freq="Q-JUN", year=2007, quarter=3) + ival_D_to_QEDEC = Period(freq="Q-DEC", year=2007, quarter=1) + + ival_D_to_M = Period(freq="M", year=2007, month=1) + ival_D_to_W = Period(freq="W", year=2007, month=1, day=7) + + ival_D_to_H_start = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_D_to_H_end = Period(freq="H", year=2007, month=1, day=1, hour=23) + ival_D_to_T_start = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0 + ) + ival_D_to_T_end = Period( + freq="Min", year=2007, month=1, day=1, hour=23, minute=59 + ) + ival_D_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_D_to_S_end = Period( + freq="S", year=2007, month=1, day=1, hour=23, minute=59, second=59 + ) + + assert ival_D.asfreq("A") == ival_D_to_A + + assert ival_D_end_of_quarter.asfreq("A-JAN") == ival_Deoq_to_AJAN + assert ival_D_end_of_quarter.asfreq("A-JUN") == ival_Deoq_to_AJUN + assert ival_D_end_of_quarter.asfreq("A-DEC") == ival_Deoq_to_ADEC + + assert ival_D_end_of_year.asfreq("A") == ival_D_to_A + assert ival_D_end_of_quarter.asfreq("Q") == ival_D_to_QEDEC + assert ival_D.asfreq("Q-JAN") == ival_D_to_QEJAN + assert ival_D.asfreq("Q-JUN") == ival_D_to_QEJUN + assert ival_D.asfreq("Q-DEC") == ival_D_to_QEDEC + assert ival_D.asfreq("M") == ival_D_to_M + assert ival_D_end_of_month.asfreq("M") == ival_D_to_M + assert ival_D.asfreq("W") == ival_D_to_W + assert ival_D_end_of_week.asfreq("W") == ival_D_to_W + + assert ival_D_friday.asfreq("B") == ival_B_friday + assert ival_D_saturday.asfreq("B", "S") == ival_B_friday + assert ival_D_saturday.asfreq("B", "E") == ival_B_monday + assert ival_D_sunday.asfreq("B", "S") == ival_B_friday + assert ival_D_sunday.asfreq("B", "E") == ival_B_monday + + assert ival_D.asfreq("H", "S") == ival_D_to_H_start + assert ival_D.asfreq("H", "E") == ival_D_to_H_end + assert ival_D.asfreq("Min", "S") == ival_D_to_T_start + assert ival_D.asfreq("Min", "E") == ival_D_to_T_end + assert ival_D.asfreq("S", "S") == ival_D_to_S_start + assert ival_D.asfreq("S", "E") == ival_D_to_S_end + + assert ival_D.asfreq("D") == ival_D + + def test_conv_hourly(self): + # frequency conversion tests: from Hourly Frequency" + + ival_H = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_H_end_of_year = Period(freq="H", year=2007, month=12, day=31, hour=23) + ival_H_end_of_quarter = Period(freq="H", year=2007, month=3, day=31, hour=23) + ival_H_end_of_month = Period(freq="H", year=2007, month=1, day=31, hour=23) + ival_H_end_of_week = Period(freq="H", year=2007, month=1, day=7, hour=23) + ival_H_end_of_day = Period(freq="H", year=2007, month=1, day=1, hour=23) + ival_H_end_of_bus = Period(freq="H", year=2007, month=1, day=1, hour=23) + + ival_H_to_A = Period(freq="A", year=2007) + ival_H_to_Q = Period(freq="Q", year=2007, quarter=1) + ival_H_to_M = Period(freq="M", year=2007, month=1) + ival_H_to_W = Period(freq="W", year=2007, month=1, day=7) + ival_H_to_D = Period(freq="D", year=2007, month=1, day=1) + ival_H_to_B = Period(freq="B", year=2007, month=1, day=1) + + ival_H_to_T_start = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0 + ) + ival_H_to_T_end = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=59 + ) + ival_H_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_H_to_S_end = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=59, second=59 + ) + + assert ival_H.asfreq("A") == ival_H_to_A + assert ival_H_end_of_year.asfreq("A") == ival_H_to_A + assert ival_H.asfreq("Q") == ival_H_to_Q + assert ival_H_end_of_quarter.asfreq("Q") == ival_H_to_Q + assert ival_H.asfreq("M") == ival_H_to_M + assert ival_H_end_of_month.asfreq("M") == ival_H_to_M + assert ival_H.asfreq("W") == ival_H_to_W + assert ival_H_end_of_week.asfreq("W") == ival_H_to_W + assert ival_H.asfreq("D") == ival_H_to_D + assert ival_H_end_of_day.asfreq("D") == ival_H_to_D + assert ival_H.asfreq("B") == ival_H_to_B + assert ival_H_end_of_bus.asfreq("B") == ival_H_to_B + + assert ival_H.asfreq("Min", "S") == ival_H_to_T_start + assert ival_H.asfreq("Min", "E") == ival_H_to_T_end + assert ival_H.asfreq("S", "S") == ival_H_to_S_start + assert ival_H.asfreq("S", "E") == ival_H_to_S_end + + assert ival_H.asfreq("H") == ival_H + + def test_conv_minutely(self): + # frequency conversion tests: from Minutely Frequency" + + ival_T = Period(freq="Min", year=2007, month=1, day=1, hour=0, minute=0) + ival_T_end_of_year = Period( + freq="Min", year=2007, month=12, day=31, hour=23, minute=59 + ) + ival_T_end_of_quarter = Period( + freq="Min", year=2007, month=3, day=31, hour=23, minute=59 + ) + ival_T_end_of_month = Period( + freq="Min", year=2007, month=1, day=31, hour=23, minute=59 + ) + ival_T_end_of_week = Period( + freq="Min", year=2007, month=1, day=7, hour=23, minute=59 + ) + ival_T_end_of_day = Period( + freq="Min", year=2007, month=1, day=1, hour=23, minute=59 + ) + ival_T_end_of_bus = Period( + freq="Min", year=2007, month=1, day=1, hour=23, minute=59 + ) + ival_T_end_of_hour = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=59 + ) + + ival_T_to_A = Period(freq="A", year=2007) + ival_T_to_Q = Period(freq="Q", year=2007, quarter=1) + ival_T_to_M = Period(freq="M", year=2007, month=1) + ival_T_to_W = Period(freq="W", year=2007, month=1, day=7) + ival_T_to_D = Period(freq="D", year=2007, month=1, day=1) + ival_T_to_B = Period(freq="B", year=2007, month=1, day=1) + ival_T_to_H = Period(freq="H", year=2007, month=1, day=1, hour=0) + + ival_T_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_T_to_S_end = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=59 + ) + + assert ival_T.asfreq("A") == ival_T_to_A + assert ival_T_end_of_year.asfreq("A") == ival_T_to_A + assert ival_T.asfreq("Q") == ival_T_to_Q + assert ival_T_end_of_quarter.asfreq("Q") == ival_T_to_Q + assert ival_T.asfreq("M") == ival_T_to_M + assert ival_T_end_of_month.asfreq("M") == ival_T_to_M + assert ival_T.asfreq("W") == ival_T_to_W + assert ival_T_end_of_week.asfreq("W") == ival_T_to_W + assert ival_T.asfreq("D") == ival_T_to_D + assert ival_T_end_of_day.asfreq("D") == ival_T_to_D + assert ival_T.asfreq("B") == ival_T_to_B + assert ival_T_end_of_bus.asfreq("B") == ival_T_to_B + assert ival_T.asfreq("H") == ival_T_to_H + assert ival_T_end_of_hour.asfreq("H") == ival_T_to_H + + assert ival_T.asfreq("S", "S") == ival_T_to_S_start + assert ival_T.asfreq("S", "E") == ival_T_to_S_end + + assert ival_T.asfreq("Min") == ival_T + + def test_conv_secondly(self): + # frequency conversion tests: from Secondly Frequency" + + ival_S = Period(freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0) + ival_S_end_of_year = Period( + freq="S", year=2007, month=12, day=31, hour=23, minute=59, second=59 + ) + ival_S_end_of_quarter = Period( + freq="S", year=2007, month=3, day=31, hour=23, minute=59, second=59 + ) + ival_S_end_of_month = Period( + freq="S", year=2007, month=1, day=31, hour=23, minute=59, second=59 + ) + ival_S_end_of_week = Period( + freq="S", year=2007, month=1, day=7, hour=23, minute=59, second=59 + ) + ival_S_end_of_day = Period( + freq="S", year=2007, month=1, day=1, hour=23, minute=59, second=59 + ) + ival_S_end_of_bus = Period( + freq="S", year=2007, month=1, day=1, hour=23, minute=59, second=59 + ) + ival_S_end_of_hour = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=59, second=59 + ) + ival_S_end_of_minute = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=59 + ) + + ival_S_to_A = Period(freq="A", year=2007) + ival_S_to_Q = Period(freq="Q", year=2007, quarter=1) + ival_S_to_M = Period(freq="M", year=2007, month=1) + ival_S_to_W = Period(freq="W", year=2007, month=1, day=7) + ival_S_to_D = Period(freq="D", year=2007, month=1, day=1) + ival_S_to_B = Period(freq="B", year=2007, month=1, day=1) + ival_S_to_H = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_S_to_T = Period(freq="Min", year=2007, month=1, day=1, hour=0, minute=0) + + assert ival_S.asfreq("A") == ival_S_to_A + assert ival_S_end_of_year.asfreq("A") == ival_S_to_A + assert ival_S.asfreq("Q") == ival_S_to_Q + assert ival_S_end_of_quarter.asfreq("Q") == ival_S_to_Q + assert ival_S.asfreq("M") == ival_S_to_M + assert ival_S_end_of_month.asfreq("M") == ival_S_to_M + assert ival_S.asfreq("W") == ival_S_to_W + assert ival_S_end_of_week.asfreq("W") == ival_S_to_W + assert ival_S.asfreq("D") == ival_S_to_D + assert ival_S_end_of_day.asfreq("D") == ival_S_to_D + assert ival_S.asfreq("B") == ival_S_to_B + assert ival_S_end_of_bus.asfreq("B") == ival_S_to_B + assert ival_S.asfreq("H") == ival_S_to_H + assert ival_S_end_of_hour.asfreq("H") == ival_S_to_H + assert ival_S.asfreq("Min") == ival_S_to_T + assert ival_S_end_of_minute.asfreq("Min") == ival_S_to_T + + assert ival_S.asfreq("S") == ival_S + + def test_conv_microsecond(self): + # GH#31475 Avoid floating point errors dropping the start_time to + # before the beginning of the Period + per = Period("2020-01-30 15:57:27.576166", freq="U") + assert per.ordinal == 1580399847576166 + + start = per.start_time + expected = Timestamp("2020-01-30 15:57:27.576166") + assert start == expected + assert start.value == per.ordinal * 1000 + + per2 = Period("2300-01-01", "us") + msg = "2300-01-01" + with pytest.raises(OutOfBoundsDatetime, match=msg): + per2.start_time + with pytest.raises(OutOfBoundsDatetime, match=msg): + per2.end_time + + def test_asfreq_mult(self): + # normal freq to mult freq + p = Period(freq="A", year=2007) + # ordinal will not change + for freq in ["3A", offsets.YearEnd(3)]: + result = p.asfreq(freq) + expected = Period("2007", freq="3A") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + # ordinal will not change + for freq in ["3A", offsets.YearEnd(3)]: + result = p.asfreq(freq, how="S") + expected = Period("2007", freq="3A") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + + # mult freq to normal freq + p = Period(freq="3A", year=2007) + # ordinal will change because how=E is the default + for freq in ["A", offsets.YearEnd()]: + result = p.asfreq(freq) + expected = Period("2009", freq="A") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + # ordinal will not change + for freq in ["A", offsets.YearEnd()]: + result = p.asfreq(freq, how="S") + expected = Period("2007", freq="A") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + + p = Period(freq="A", year=2007) + for freq in ["2M", offsets.MonthEnd(2)]: + result = p.asfreq(freq) + expected = Period("2007-12", freq="2M") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + for freq in ["2M", offsets.MonthEnd(2)]: + result = p.asfreq(freq, how="S") + expected = Period("2007-01", freq="2M") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + + p = Period(freq="3A", year=2007) + for freq in ["2M", offsets.MonthEnd(2)]: + result = p.asfreq(freq) + expected = Period("2009-12", freq="2M") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + for freq in ["2M", offsets.MonthEnd(2)]: + result = p.asfreq(freq, how="S") + expected = Period("2007-01", freq="2M") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + + def test_asfreq_combined(self): + # normal freq to combined freq + p = Period("2007", freq="H") + + # ordinal will not change + expected = Period("2007", freq="25H") + for freq, how in zip(["1D1H", "1H1D"], ["E", "S"]): + result = p.asfreq(freq, how=how) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + + # combined freq to normal freq + p1 = Period(freq="1D1H", year=2007) + p2 = Period(freq="1H1D", year=2007) + + # ordinal will change because how=E is the default + result1 = p1.asfreq("H") + result2 = p2.asfreq("H") + expected = Period("2007-01-02", freq="H") + assert result1 == expected + assert result1.ordinal == expected.ordinal + assert result1.freq == expected.freq + assert result2 == expected + assert result2.ordinal == expected.ordinal + assert result2.freq == expected.freq + + # ordinal will not change + result1 = p1.asfreq("H", how="S") + result2 = p2.asfreq("H", how="S") + expected = Period("2007-01-01", freq="H") + assert result1 == expected + assert result1.ordinal == expected.ordinal + assert result1.freq == expected.freq + assert result2 == expected + assert result2.ordinal == expected.ordinal + assert result2.freq == expected.freq + + def test_asfreq_MS(self): + initial = Period("2013") + + assert initial.asfreq(freq="M", how="S") == Period("2013-01", "M") + + msg = INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + initial.asfreq(freq="MS", how="S") + + with pytest.raises(ValueError, match=msg): + Period("2013-01", "MS") + + assert _period_code_map.get("MS") is None diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/test_period.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/test_period.py new file mode 100644 index 0000000..7d437f3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/period/test_period.py @@ -0,0 +1,1588 @@ +from datetime import ( + date, + datetime, + timedelta, +) + +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import ( + iNaT, + period as libperiod, +) +from pandas._libs.tslibs.ccalendar import ( + DAYS, + MONTHS, +) +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime +from pandas._libs.tslibs.parsing import DateParseError +from pandas._libs.tslibs.period import ( + INVALID_FREQ_ERR_MSG, + IncompatibleFrequency, +) +from pandas._libs.tslibs.timezones import ( + dateutil_gettz, + maybe_get_tz, +) + +import pandas as pd +from pandas import ( + NaT, + Period, + Timedelta, + Timestamp, + offsets, +) +import pandas._testing as tm + + +class TestPeriodConstruction: + def test_from_td64nat_raises(self): + # GH#44507 + td = NaT.to_numpy("m8[ns]") + + msg = "Value must be Period, string, integer, or datetime" + with pytest.raises(ValueError, match=msg): + Period(td) + + with pytest.raises(ValueError, match=msg): + Period(td, freq="D") + + def test_construction(self): + i1 = Period("1/1/2005", freq="M") + i2 = Period("Jan 2005") + + assert i1 == i2 + + i1 = Period("2005", freq="A") + i2 = Period("2005") + i3 = Period("2005", freq="a") + + assert i1 == i2 + assert i1 == i3 + + i4 = Period("2005", freq="M") + i5 = Period("2005", freq="m") + + assert i1 != i4 + assert i4 == i5 + + i1 = Period.now("Q") + i2 = Period(datetime.now(), freq="Q") + i3 = Period.now("q") + + assert i1 == i2 + assert i1 == i3 + + i1 = Period("1982", freq="min") + i2 = Period("1982", freq="MIN") + assert i1 == i2 + + i1 = Period(year=2005, month=3, day=1, freq="D") + i2 = Period("3/1/2005", freq="D") + assert i1 == i2 + + i3 = Period(year=2005, month=3, day=1, freq="d") + assert i1 == i3 + + i1 = Period("2007-01-01 09:00:00.001") + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq="L") + assert i1 == expected + + expected = Period("2007-01-01 09:00:00.001", freq="L") + assert i1 == expected + + i1 = Period("2007-01-01 09:00:00.00101") + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq="U") + assert i1 == expected + + expected = Period("2007-01-01 09:00:00.00101", freq="U") + assert i1 == expected + + msg = "Must supply freq for ordinal value" + with pytest.raises(ValueError, match=msg): + Period(ordinal=200701) + + msg = "Invalid frequency: X" + with pytest.raises(ValueError, match=msg): + Period("2007-1-1", freq="X") + + # GH#34703 tuple freq disallowed + with pytest.raises(TypeError, match="pass as a string instead"): + Period("1982", freq=("Min", 1)) + + def test_construction_bday(self): + + # Biz day construction, roll forward if non-weekday + i1 = Period("3/10/12", freq="B") + i2 = Period("3/10/12", freq="D") + assert i1 == i2.asfreq("B") + i2 = Period("3/11/12", freq="D") + assert i1 == i2.asfreq("B") + i2 = Period("3/12/12", freq="D") + assert i1 == i2.asfreq("B") + + i3 = Period("3/10/12", freq="b") + assert i1 == i3 + + i1 = Period(year=2012, month=3, day=10, freq="B") + i2 = Period("3/12/12", freq="B") + assert i1 == i2 + + def test_construction_quarter(self): + + i1 = Period(year=2005, quarter=1, freq="Q") + i2 = Period("1/1/2005", freq="Q") + assert i1 == i2 + + i1 = Period(year=2005, quarter=3, freq="Q") + i2 = Period("9/1/2005", freq="Q") + assert i1 == i2 + + i1 = Period("2005Q1") + i2 = Period(year=2005, quarter=1, freq="Q") + i3 = Period("2005q1") + assert i1 == i2 + assert i1 == i3 + + i1 = Period("05Q1") + assert i1 == i2 + lower = Period("05q1") + assert i1 == lower + + i1 = Period("1Q2005") + assert i1 == i2 + lower = Period("1q2005") + assert i1 == lower + + i1 = Period("1Q05") + assert i1 == i2 + lower = Period("1q05") + assert i1 == lower + + i1 = Period("4Q1984") + assert i1.year == 1984 + lower = Period("4q1984") + assert i1 == lower + + def test_construction_month(self): + + expected = Period("2007-01", freq="M") + i1 = Period("200701", freq="M") + assert i1 == expected + + i1 = Period("200701", freq="M") + assert i1 == expected + + i1 = Period(200701, freq="M") + assert i1 == expected + + i1 = Period(ordinal=200701, freq="M") + assert i1.year == 18695 + + i1 = Period(datetime(2007, 1, 1), freq="M") + i2 = Period("200701", freq="M") + assert i1 == i2 + + i1 = Period(date(2007, 1, 1), freq="M") + i2 = Period(datetime(2007, 1, 1), freq="M") + i3 = Period(np.datetime64("2007-01-01"), freq="M") + i4 = Period("2007-01-01 00:00:00", freq="M") + i5 = Period("2007-01-01 00:00:00.000", freq="M") + assert i1 == i2 + assert i1 == i3 + assert i1 == i4 + assert i1 == i5 + + def test_period_constructor_offsets(self): + assert Period("1/1/2005", freq=offsets.MonthEnd()) == Period( + "1/1/2005", freq="M" + ) + assert Period("2005", freq=offsets.YearEnd()) == Period("2005", freq="A") + assert Period("2005", freq=offsets.MonthEnd()) == Period("2005", freq="M") + assert Period("3/10/12", freq=offsets.BusinessDay()) == Period( + "3/10/12", freq="B" + ) + assert Period("3/10/12", freq=offsets.Day()) == Period("3/10/12", freq="D") + + assert Period( + year=2005, quarter=1, freq=offsets.QuarterEnd(startingMonth=12) + ) == Period(year=2005, quarter=1, freq="Q") + assert Period( + year=2005, quarter=2, freq=offsets.QuarterEnd(startingMonth=12) + ) == Period(year=2005, quarter=2, freq="Q") + + assert Period(year=2005, month=3, day=1, freq=offsets.Day()) == Period( + year=2005, month=3, day=1, freq="D" + ) + assert Period(year=2012, month=3, day=10, freq=offsets.BDay()) == Period( + year=2012, month=3, day=10, freq="B" + ) + + expected = Period("2005-03-01", freq="3D") + assert Period(year=2005, month=3, day=1, freq=offsets.Day(3)) == expected + assert Period(year=2005, month=3, day=1, freq="3D") == expected + + assert Period(year=2012, month=3, day=10, freq=offsets.BDay(3)) == Period( + year=2012, month=3, day=10, freq="3B" + ) + + assert Period(200701, freq=offsets.MonthEnd()) == Period(200701, freq="M") + + i1 = Period(ordinal=200701, freq=offsets.MonthEnd()) + i2 = Period(ordinal=200701, freq="M") + assert i1 == i2 + assert i1.year == 18695 + assert i2.year == 18695 + + i1 = Period(datetime(2007, 1, 1), freq="M") + i2 = Period("200701", freq="M") + assert i1 == i2 + + i1 = Period(date(2007, 1, 1), freq="M") + i2 = Period(datetime(2007, 1, 1), freq="M") + i3 = Period(np.datetime64("2007-01-01"), freq="M") + i4 = Period("2007-01-01 00:00:00", freq="M") + i5 = Period("2007-01-01 00:00:00.000", freq="M") + assert i1 == i2 + assert i1 == i3 + assert i1 == i4 + assert i1 == i5 + + i1 = Period("2007-01-01 09:00:00.001") + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq="L") + assert i1 == expected + + expected = Period("2007-01-01 09:00:00.001", freq="L") + assert i1 == expected + + i1 = Period("2007-01-01 09:00:00.00101") + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq="U") + assert i1 == expected + + expected = Period("2007-01-01 09:00:00.00101", freq="U") + assert i1 == expected + + def test_invalid_arguments(self): + msg = "Must supply freq for datetime value" + with pytest.raises(ValueError, match=msg): + Period(datetime.now()) + with pytest.raises(ValueError, match=msg): + Period(datetime.now().date()) + + msg = "Value must be Period, string, integer, or datetime" + with pytest.raises(ValueError, match=msg): + Period(1.6, freq="D") + msg = "Ordinal must be an integer" + with pytest.raises(ValueError, match=msg): + Period(ordinal=1.6, freq="D") + msg = "Only value or ordinal but not both should be given but not both" + with pytest.raises(ValueError, match=msg): + Period(ordinal=2, value=1, freq="D") + + msg = "If value is None, freq cannot be None" + with pytest.raises(ValueError, match=msg): + Period(month=1) + + msg = "Given date string not likely a datetime" + with pytest.raises(ValueError, match=msg): + Period("-2000", "A") + msg = "day is out of range for month" + with pytest.raises(DateParseError, match=msg): + Period("0", "A") + msg = "Unknown datetime string format, unable to parse" + with pytest.raises(DateParseError, match=msg): + Period("1/1/-2000", "A") + + def test_constructor_corner(self): + expected = Period("2007-01", freq="2M") + assert Period(year=2007, month=1, freq="2M") == expected + + assert Period(None) is NaT + + p = Period("2007-01-01", freq="D") + + result = Period(p, freq="A") + exp = Period("2007", freq="A") + assert result == exp + + def test_constructor_infer_freq(self): + p = Period("2007-01-01") + assert p.freq == "D" + + p = Period("2007-01-01 07") + assert p.freq == "H" + + p = Period("2007-01-01 07:10") + assert p.freq == "T" + + p = Period("2007-01-01 07:10:15") + assert p.freq == "S" + + p = Period("2007-01-01 07:10:15.123") + assert p.freq == "L" + + p = Period("2007-01-01 07:10:15.123000") + assert p.freq == "L" + + p = Period("2007-01-01 07:10:15.123400") + assert p.freq == "U" + + def test_multiples(self): + result1 = Period("1989", freq="2A") + result2 = Period("1989", freq="A") + assert result1.ordinal == result2.ordinal + assert result1.freqstr == "2A-DEC" + assert result2.freqstr == "A-DEC" + assert result1.freq == offsets.YearEnd(2) + assert result2.freq == offsets.YearEnd() + + assert (result1 + 1).ordinal == result1.ordinal + 2 + assert (1 + result1).ordinal == result1.ordinal + 2 + assert (result1 - 1).ordinal == result2.ordinal - 2 + assert (-1 + result1).ordinal == result2.ordinal - 2 + + @pytest.mark.parametrize("month", MONTHS) + def test_period_cons_quarterly(self, month): + # bugs in scikits.timeseries + freq = f"Q-{month}" + exp = Period("1989Q3", freq=freq) + assert "1989Q3" in str(exp) + stamp = exp.to_timestamp("D", how="end") + p = Period(stamp, freq=freq) + assert p == exp + + stamp = exp.to_timestamp("3D", how="end") + p = Period(stamp, freq=freq) + assert p == exp + + @pytest.mark.parametrize("month", MONTHS) + def test_period_cons_annual(self, month): + # bugs in scikits.timeseries + freq = f"A-{month}" + exp = Period("1989", freq=freq) + stamp = exp.to_timestamp("D", how="end") + timedelta(days=30) + p = Period(stamp, freq=freq) + + assert p == exp + 1 + assert isinstance(p, Period) + + @pytest.mark.parametrize("day", DAYS) + @pytest.mark.parametrize("num", range(10, 17)) + def test_period_cons_weekly(self, num, day): + daystr = f"2011-02-{num}" + freq = f"W-{day}" + + result = Period(daystr, freq=freq) + expected = Period(daystr, freq="D").asfreq(freq) + assert result == expected + assert isinstance(result, Period) + + def test_period_from_ordinal(self): + p = Period("2011-01", freq="M") + res = Period._from_ordinal(p.ordinal, freq="M") + assert p == res + assert isinstance(res, Period) + + @pytest.mark.parametrize("freq", ["A", "M", "D", "H"]) + def test_construct_from_nat_string_and_freq(self, freq): + per = Period("NaT", freq=freq) + assert per is NaT + + per = Period("NaT", freq="2" + freq) + assert per is NaT + + per = Period("NaT", freq="3" + freq) + assert per is NaT + + def test_period_cons_nat(self): + p = Period("nat", freq="W-SUN") + assert p is NaT + + p = Period(iNaT, freq="D") + assert p is NaT + + p = Period(iNaT, freq="3D") + assert p is NaT + + p = Period(iNaT, freq="1D1H") + assert p is NaT + + p = Period("NaT") + assert p is NaT + + p = Period(iNaT) + assert p is NaT + + def test_period_cons_mult(self): + p1 = Period("2011-01", freq="3M") + p2 = Period("2011-01", freq="M") + assert p1.ordinal == p2.ordinal + + assert p1.freq == offsets.MonthEnd(3) + assert p1.freqstr == "3M" + + assert p2.freq == offsets.MonthEnd() + assert p2.freqstr == "M" + + result = p1 + 1 + assert result.ordinal == (p2 + 3).ordinal + + assert result.freq == p1.freq + assert result.freqstr == "3M" + + result = p1 - 1 + assert result.ordinal == (p2 - 3).ordinal + assert result.freq == p1.freq + assert result.freqstr == "3M" + + msg = "Frequency must be positive, because it represents span: -3M" + with pytest.raises(ValueError, match=msg): + Period("2011-01", freq="-3M") + + msg = "Frequency must be positive, because it represents span: 0M" + with pytest.raises(ValueError, match=msg): + Period("2011-01", freq="0M") + + def test_period_cons_combined(self): + p = [ + ( + Period("2011-01", freq="1D1H"), + Period("2011-01", freq="1H1D"), + Period("2011-01", freq="H"), + ), + ( + Period(ordinal=1, freq="1D1H"), + Period(ordinal=1, freq="1H1D"), + Period(ordinal=1, freq="H"), + ), + ] + + for p1, p2, p3 in p: + assert p1.ordinal == p3.ordinal + assert p2.ordinal == p3.ordinal + + assert p1.freq == offsets.Hour(25) + assert p1.freqstr == "25H" + + assert p2.freq == offsets.Hour(25) + assert p2.freqstr == "25H" + + assert p3.freq == offsets.Hour() + assert p3.freqstr == "H" + + result = p1 + 1 + assert result.ordinal == (p3 + 25).ordinal + assert result.freq == p1.freq + assert result.freqstr == "25H" + + result = p2 + 1 + assert result.ordinal == (p3 + 25).ordinal + assert result.freq == p2.freq + assert result.freqstr == "25H" + + result = p1 - 1 + assert result.ordinal == (p3 - 25).ordinal + assert result.freq == p1.freq + assert result.freqstr == "25H" + + result = p2 - 1 + assert result.ordinal == (p3 - 25).ordinal + assert result.freq == p2.freq + assert result.freqstr == "25H" + + msg = "Frequency must be positive, because it represents span: -25H" + with pytest.raises(ValueError, match=msg): + Period("2011-01", freq="-1D1H") + with pytest.raises(ValueError, match=msg): + Period("2011-01", freq="-1H1D") + with pytest.raises(ValueError, match=msg): + Period(ordinal=1, freq="-1D1H") + with pytest.raises(ValueError, match=msg): + Period(ordinal=1, freq="-1H1D") + + msg = "Frequency must be positive, because it represents span: 0D" + with pytest.raises(ValueError, match=msg): + Period("2011-01", freq="0D0H") + with pytest.raises(ValueError, match=msg): + Period(ordinal=1, freq="0D0H") + + # You can only combine together day and intraday offsets + msg = "Invalid frequency: 1W1D" + with pytest.raises(ValueError, match=msg): + Period("2011-01", freq="1W1D") + msg = "Invalid frequency: 1D1W" + with pytest.raises(ValueError, match=msg): + Period("2011-01", freq="1D1W") + + @pytest.mark.parametrize("day", ["1970/01/01 ", "2020-12-31 ", "1981/09/13 "]) + @pytest.mark.parametrize("hour", ["00:00:00", "00:00:01", "23:59:59", "12:00:59"]) + @pytest.mark.parametrize( + "sec_float, expected", + [ + (".000000001", 1), + (".000000999", 999), + (".123456789", 789), + (".999999999", 999), + ], + ) + def test_period_constructor_nanosecond(self, day, hour, sec_float, expected): + # GH 34621 + + assert Period(day + hour + sec_float).start_time.nanosecond == expected + + @pytest.mark.parametrize("hour", range(24)) + def test_period_large_ordinal(self, hour): + # Issue #36430 + # Integer overflow for Period over the maximum timestamp + p = Period(ordinal=2562048 + hour, freq="1H") + assert p.hour == hour + + +class TestPeriodMethods: + def test_round_trip(self): + p = Period("2000Q1") + new_p = tm.round_trip_pickle(p) + assert new_p == p + + def test_hash(self): + assert hash(Period("2011-01", freq="M")) == hash(Period("2011-01", freq="M")) + + assert hash(Period("2011-01-01", freq="D")) != hash(Period("2011-01", freq="M")) + + assert hash(Period("2011-01", freq="3M")) != hash(Period("2011-01", freq="2M")) + + assert hash(Period("2011-01", freq="M")) != hash(Period("2011-02", freq="M")) + + # -------------------------------------------------------------- + # to_timestamp + + @pytest.mark.parametrize("tzstr", ["Europe/Brussels", "Asia/Tokyo", "US/Pacific"]) + def test_to_timestamp_tz_arg(self, tzstr): + # GH#34522 tz kwarg deprecated + with tm.assert_produces_warning(FutureWarning): + p = Period("1/1/2005", freq="M").to_timestamp(tz=tzstr) + exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + with tm.assert_produces_warning(FutureWarning): + p = Period("1/1/2005", freq="3H").to_timestamp(tz=tzstr) + exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + with tm.assert_produces_warning(FutureWarning): + p = Period("1/1/2005", freq="A").to_timestamp(freq="A", tz=tzstr) + exp = Timestamp(day=31, month=12, year=2005, tz="UTC").tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + with tm.assert_produces_warning(FutureWarning): + p = Period("1/1/2005", freq="A").to_timestamp(freq="3H", tz=tzstr) + exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + @pytest.mark.parametrize( + "tzstr", + ["dateutil/Europe/Brussels", "dateutil/Asia/Tokyo", "dateutil/US/Pacific"], + ) + def test_to_timestamp_tz_arg_dateutil(self, tzstr): + tz = maybe_get_tz(tzstr) + with tm.assert_produces_warning(FutureWarning): + p = Period("1/1/2005", freq="M").to_timestamp(tz=tz) + exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) + assert p == exp + assert p.tz == dateutil_gettz(tzstr.split("/", 1)[1]) + assert p.tz == exp.tz + + with tm.assert_produces_warning(FutureWarning): + p = Period("1/1/2005", freq="M").to_timestamp(freq="3H", tz=tz) + exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) + assert p == exp + assert p.tz == dateutil_gettz(tzstr.split("/", 1)[1]) + assert p.tz == exp.tz + + def test_to_timestamp_tz_arg_dateutil_from_string(self): + with tm.assert_produces_warning(FutureWarning): + p = Period("1/1/2005", freq="M").to_timestamp(tz="dateutil/Europe/Brussels") + assert p.tz == dateutil_gettz("Europe/Brussels") + + def test_to_timestamp_mult(self): + p = Period("2011-01", freq="M") + assert p.to_timestamp(how="S") == Timestamp("2011-01-01") + expected = Timestamp("2011-02-01") - Timedelta(1, "ns") + assert p.to_timestamp(how="E") == expected + + p = Period("2011-01", freq="3M") + assert p.to_timestamp(how="S") == Timestamp("2011-01-01") + expected = Timestamp("2011-04-01") - Timedelta(1, "ns") + assert p.to_timestamp(how="E") == expected + + def test_to_timestamp(self): + p = Period("1982", freq="A") + start_ts = p.to_timestamp(how="S") + aliases = ["s", "StarT", "BEGIn"] + for a in aliases: + assert start_ts == p.to_timestamp("D", how=a) + # freq with mult should not affect to the result + assert start_ts == p.to_timestamp("3D", how=a) + + end_ts = p.to_timestamp(how="E") + aliases = ["e", "end", "FINIsH"] + for a in aliases: + assert end_ts == p.to_timestamp("D", how=a) + assert end_ts == p.to_timestamp("3D", how=a) + + from_lst = ["A", "Q", "M", "W", "B", "D", "H", "Min", "S"] + + def _ex(p): + if p.freq == "B": + return p.start_time + Timedelta(days=1, nanoseconds=-1) + return Timestamp((p + p.freq).start_time.value - 1) + + for fcode in from_lst: + p = Period("1982", freq=fcode) + result = p.to_timestamp().to_period(fcode) + assert result == p + + assert p.start_time == p.to_timestamp(how="S") + + assert p.end_time == _ex(p) + + # Frequency other than daily + + p = Period("1985", freq="A") + + result = p.to_timestamp("H", how="end") + expected = Timestamp(1986, 1, 1) - Timedelta(1, "ns") + assert result == expected + result = p.to_timestamp("3H", how="end") + assert result == expected + + result = p.to_timestamp("T", how="end") + expected = Timestamp(1986, 1, 1) - Timedelta(1, "ns") + assert result == expected + result = p.to_timestamp("2T", how="end") + assert result == expected + + result = p.to_timestamp(how="end") + expected = Timestamp(1986, 1, 1) - Timedelta(1, "ns") + assert result == expected + + expected = datetime(1985, 1, 1) + result = p.to_timestamp("H", how="start") + assert result == expected + result = p.to_timestamp("T", how="start") + assert result == expected + result = p.to_timestamp("S", how="start") + assert result == expected + result = p.to_timestamp("3H", how="start") + assert result == expected + result = p.to_timestamp("5S", how="start") + assert result == expected + + def test_to_timestamp_business_end(self): + per = Period("1990-01-05", "B") # Friday + result = per.to_timestamp("B", how="E") + + expected = Timestamp("1990-01-06") - Timedelta(nanoseconds=1) + assert result == expected + + @pytest.mark.parametrize( + "ts, expected", + [ + ("1970-01-01 00:00:00", 0), + ("1970-01-01 00:00:00.000001", 1), + ("1970-01-01 00:00:00.00001", 10), + ("1970-01-01 00:00:00.499", 499000), + ("1999-12-31 23:59:59.999", 999000), + ("1999-12-31 23:59:59.999999", 999999), + ("2050-12-31 23:59:59.5", 500000), + ("2050-12-31 23:59:59.500001", 500001), + ("2050-12-31 23:59:59.123456", 123456), + ], + ) + @pytest.mark.parametrize("freq", [None, "us", "ns"]) + def test_to_timestamp_microsecond(self, ts, expected, freq): + # GH 24444 + result = Period(ts).to_timestamp(freq=freq).microsecond + assert result == expected + + # -------------------------------------------------------------- + # Rendering: __repr__, strftime, etc + + def test_repr(self): + p = Period("Jan-2000") + assert "2000-01" in repr(p) + + p = Period("2000-12-15") + assert "2000-12-15" in repr(p) + + def test_repr_nat(self): + p = Period("nat", freq="M") + assert repr(NaT) in repr(p) + + def test_millisecond_repr(self): + p = Period("2000-01-01 12:15:02.123") + + assert repr(p) == "Period('2000-01-01 12:15:02.123', 'L')" + + def test_microsecond_repr(self): + p = Period("2000-01-01 12:15:02.123567") + + assert repr(p) == "Period('2000-01-01 12:15:02.123567', 'U')" + + def test_strftime(self): + # GH#3363 + p = Period("2000-1-1 12:34:12", freq="S") + res = p.strftime("%Y-%m-%d %H:%M:%S") + assert res == "2000-01-01 12:34:12" + assert isinstance(res, str) + + +class TestPeriodProperties: + """Test properties such as year, month, weekday, etc....""" + + @pytest.mark.parametrize("freq", ["A", "M", "D", "H"]) + def test_is_leap_year(self, freq): + # GH 13727 + p = Period("2000-01-01 00:00:00", freq=freq) + assert p.is_leap_year + assert isinstance(p.is_leap_year, bool) + + p = Period("1999-01-01 00:00:00", freq=freq) + assert not p.is_leap_year + + p = Period("2004-01-01 00:00:00", freq=freq) + assert p.is_leap_year + + p = Period("2100-01-01 00:00:00", freq=freq) + assert not p.is_leap_year + + def test_quarterly_negative_ordinals(self): + p = Period(ordinal=-1, freq="Q-DEC") + assert p.year == 1969 + assert p.quarter == 4 + assert isinstance(p, Period) + + p = Period(ordinal=-2, freq="Q-DEC") + assert p.year == 1969 + assert p.quarter == 3 + assert isinstance(p, Period) + + p = Period(ordinal=-2, freq="M") + assert p.year == 1969 + assert p.month == 11 + assert isinstance(p, Period) + + def test_freq_str(self): + i1 = Period("1982", freq="Min") + assert i1.freq == offsets.Minute() + assert i1.freqstr == "T" + + def test_period_deprecated_freq(self): + cases = { + "M": ["MTH", "MONTH", "MONTHLY", "Mth", "month", "monthly"], + "B": ["BUS", "BUSINESS", "BUSINESSLY", "WEEKDAY", "bus"], + "D": ["DAY", "DLY", "DAILY", "Day", "Dly", "Daily"], + "H": ["HR", "HOUR", "HRLY", "HOURLY", "hr", "Hour", "HRly"], + "T": ["minute", "MINUTE", "MINUTELY", "minutely"], + "S": ["sec", "SEC", "SECOND", "SECONDLY", "second"], + "L": ["MILLISECOND", "MILLISECONDLY", "millisecond"], + "U": ["MICROSECOND", "MICROSECONDLY", "microsecond"], + "N": ["NANOSECOND", "NANOSECONDLY", "nanosecond"], + } + + msg = INVALID_FREQ_ERR_MSG + for exp, freqs in cases.items(): + for freq in freqs: + with pytest.raises(ValueError, match=msg): + Period("2016-03-01 09:00", freq=freq) + with pytest.raises(ValueError, match=msg): + Period(ordinal=1, freq=freq) + + # check supported freq-aliases still works + p1 = Period("2016-03-01 09:00", freq=exp) + p2 = Period(ordinal=1, freq=exp) + assert isinstance(p1, Period) + assert isinstance(p2, Period) + + def _period_constructor(bound, offset): + return Period( + year=bound.year, + month=bound.month, + day=bound.day, + hour=bound.hour, + minute=bound.minute, + second=bound.second + offset, + freq="us", + ) + + @pytest.mark.parametrize("bound, offset", [(Timestamp.min, -1), (Timestamp.max, 1)]) + @pytest.mark.parametrize("period_property", ["start_time", "end_time"]) + def test_outter_bounds_start_and_end_time(self, bound, offset, period_property): + # GH #13346 + period = TestPeriodProperties._period_constructor(bound, offset) + with pytest.raises(OutOfBoundsDatetime, match="Out of bounds nanosecond"): + getattr(period, period_property) + + @pytest.mark.parametrize("bound, offset", [(Timestamp.min, -1), (Timestamp.max, 1)]) + @pytest.mark.parametrize("period_property", ["start_time", "end_time"]) + def test_inner_bounds_start_and_end_time(self, bound, offset, period_property): + # GH #13346 + period = TestPeriodProperties._period_constructor(bound, -offset) + expected = period.to_timestamp().round(freq="S") + assert getattr(period, period_property).round(freq="S") == expected + expected = (bound - offset * Timedelta(1, unit="S")).floor("S") + assert getattr(period, period_property).floor("S") == expected + + def test_start_time(self): + freq_lst = ["A", "Q", "M", "D", "H", "T", "S"] + xp = datetime(2012, 1, 1) + for f in freq_lst: + p = Period("2012", freq=f) + assert p.start_time == xp + assert Period("2012", freq="B").start_time == datetime(2012, 1, 2) + assert Period("2012", freq="W").start_time == datetime(2011, 12, 26) + + def test_end_time(self): + p = Period("2012", freq="A") + + def _ex(*args): + return Timestamp(Timestamp(datetime(*args)).value - 1) + + xp = _ex(2013, 1, 1) + assert xp == p.end_time + + p = Period("2012", freq="Q") + xp = _ex(2012, 4, 1) + assert xp == p.end_time + + p = Period("2012", freq="M") + xp = _ex(2012, 2, 1) + assert xp == p.end_time + + p = Period("2012", freq="D") + xp = _ex(2012, 1, 2) + assert xp == p.end_time + + p = Period("2012", freq="H") + xp = _ex(2012, 1, 1, 1) + assert xp == p.end_time + + p = Period("2012", freq="B") + xp = _ex(2012, 1, 3) + assert xp == p.end_time + + p = Period("2012", freq="W") + xp = _ex(2012, 1, 2) + assert xp == p.end_time + + # Test for GH 11738 + p = Period("2012", freq="15D") + xp = _ex(2012, 1, 16) + assert xp == p.end_time + + p = Period("2012", freq="1D1H") + xp = _ex(2012, 1, 2, 1) + assert xp == p.end_time + + p = Period("2012", freq="1H1D") + xp = _ex(2012, 1, 2, 1) + assert xp == p.end_time + + def test_end_time_business_friday(self): + # GH#34449 + per = Period("1990-01-05", "B") + result = per.end_time + + expected = Timestamp("1990-01-06") - Timedelta(nanoseconds=1) + assert result == expected + + def test_anchor_week_end_time(self): + def _ex(*args): + return Timestamp(Timestamp(datetime(*args)).value - 1) + + p = Period("2013-1-1", "W-SAT") + xp = _ex(2013, 1, 6) + assert p.end_time == xp + + def test_properties_annually(self): + # Test properties on Periods with annually frequency. + a_date = Period(freq="A", year=2007) + assert a_date.year == 2007 + + def test_properties_quarterly(self): + # Test properties on Periods with daily frequency. + qedec_date = Period(freq="Q-DEC", year=2007, quarter=1) + qejan_date = Period(freq="Q-JAN", year=2007, quarter=1) + qejun_date = Period(freq="Q-JUN", year=2007, quarter=1) + # + for x in range(3): + for qd in (qedec_date, qejan_date, qejun_date): + assert (qd + x).qyear == 2007 + assert (qd + x).quarter == x + 1 + + def test_properties_monthly(self): + # Test properties on Periods with daily frequency. + m_date = Period(freq="M", year=2007, month=1) + for x in range(11): + m_ival_x = m_date + x + assert m_ival_x.year == 2007 + if 1 <= x + 1 <= 3: + assert m_ival_x.quarter == 1 + elif 4 <= x + 1 <= 6: + assert m_ival_x.quarter == 2 + elif 7 <= x + 1 <= 9: + assert m_ival_x.quarter == 3 + elif 10 <= x + 1 <= 12: + assert m_ival_x.quarter == 4 + assert m_ival_x.month == x + 1 + + def test_properties_weekly(self): + # Test properties on Periods with daily frequency. + w_date = Period(freq="W", year=2007, month=1, day=7) + # + assert w_date.year == 2007 + assert w_date.quarter == 1 + assert w_date.month == 1 + assert w_date.week == 1 + assert (w_date - 1).week == 52 + assert w_date.days_in_month == 31 + assert Period(freq="W", year=2012, month=2, day=1).days_in_month == 29 + + def test_properties_weekly_legacy(self): + # Test properties on Periods with daily frequency. + w_date = Period(freq="W", year=2007, month=1, day=7) + assert w_date.year == 2007 + assert w_date.quarter == 1 + assert w_date.month == 1 + assert w_date.week == 1 + assert (w_date - 1).week == 52 + assert w_date.days_in_month == 31 + + exp = Period(freq="W", year=2012, month=2, day=1) + assert exp.days_in_month == 29 + + msg = INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + Period(freq="WK", year=2007, month=1, day=7) + + def test_properties_daily(self): + # Test properties on Periods with daily frequency. + b_date = Period(freq="B", year=2007, month=1, day=1) + # + assert b_date.year == 2007 + assert b_date.quarter == 1 + assert b_date.month == 1 + assert b_date.day == 1 + assert b_date.weekday == 0 + assert b_date.dayofyear == 1 + assert b_date.days_in_month == 31 + assert Period(freq="B", year=2012, month=2, day=1).days_in_month == 29 + + d_date = Period(freq="D", year=2007, month=1, day=1) + + assert d_date.year == 2007 + assert d_date.quarter == 1 + assert d_date.month == 1 + assert d_date.day == 1 + assert d_date.weekday == 0 + assert d_date.dayofyear == 1 + assert d_date.days_in_month == 31 + assert Period(freq="D", year=2012, month=2, day=1).days_in_month == 29 + + def test_properties_hourly(self): + # Test properties on Periods with hourly frequency. + h_date1 = Period(freq="H", year=2007, month=1, day=1, hour=0) + h_date2 = Period(freq="2H", year=2007, month=1, day=1, hour=0) + + for h_date in [h_date1, h_date2]: + assert h_date.year == 2007 + assert h_date.quarter == 1 + assert h_date.month == 1 + assert h_date.day == 1 + assert h_date.weekday == 0 + assert h_date.dayofyear == 1 + assert h_date.hour == 0 + assert h_date.days_in_month == 31 + assert ( + Period(freq="H", year=2012, month=2, day=1, hour=0).days_in_month == 29 + ) + + def test_properties_minutely(self): + # Test properties on Periods with minutely frequency. + t_date = Period(freq="Min", year=2007, month=1, day=1, hour=0, minute=0) + # + assert t_date.quarter == 1 + assert t_date.month == 1 + assert t_date.day == 1 + assert t_date.weekday == 0 + assert t_date.dayofyear == 1 + assert t_date.hour == 0 + assert t_date.minute == 0 + assert t_date.days_in_month == 31 + assert ( + Period(freq="D", year=2012, month=2, day=1, hour=0, minute=0).days_in_month + == 29 + ) + + def test_properties_secondly(self): + # Test properties on Periods with secondly frequency. + s_date = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + # + assert s_date.year == 2007 + assert s_date.quarter == 1 + assert s_date.month == 1 + assert s_date.day == 1 + assert s_date.weekday == 0 + assert s_date.dayofyear == 1 + assert s_date.hour == 0 + assert s_date.minute == 0 + assert s_date.second == 0 + assert s_date.days_in_month == 31 + assert ( + Period( + freq="Min", year=2012, month=2, day=1, hour=0, minute=0, second=0 + ).days_in_month + == 29 + ) + + +class TestPeriodField: + def test_get_period_field_array_raises_on_out_of_range(self): + msg = "Buffer dtype mismatch, expected 'const int64_t' but got 'double'" + with pytest.raises(ValueError, match=msg): + libperiod.get_period_field_arr(-1, np.empty(1), 0) + + +class TestPeriodComparisons: + def test_comparison_same_period_different_object(self): + # Separate Period objects for the same period + left = Period("2000-01", "M") + right = Period("2000-01", "M") + + assert left == right + assert left >= right + assert left <= right + assert not left < right + assert not left > right + + def test_comparison_same_freq(self): + jan = Period("2000-01", "M") + feb = Period("2000-02", "M") + + assert not jan == feb + assert jan != feb + assert jan < feb + assert jan <= feb + assert not jan > feb + assert not jan >= feb + + def test_comparison_mismatched_freq(self): + jan = Period("2000-01", "M") + day = Period("2012-01-01", "D") + + assert not jan == day + assert jan != day + msg = r"Input has different freq=D from Period\(freq=M\)" + with pytest.raises(IncompatibleFrequency, match=msg): + jan < day + with pytest.raises(IncompatibleFrequency, match=msg): + jan <= day + with pytest.raises(IncompatibleFrequency, match=msg): + jan > day + with pytest.raises(IncompatibleFrequency, match=msg): + jan >= day + + def test_comparison_invalid_type(self): + jan = Period("2000-01", "M") + + assert not jan == 1 + assert jan != 1 + + int_or_per = "'(Period|int)'" + msg = f"not supported between instances of {int_or_per} and {int_or_per}" + for left, right in [(jan, 1), (1, jan)]: + + with pytest.raises(TypeError, match=msg): + left > right + with pytest.raises(TypeError, match=msg): + left >= right + with pytest.raises(TypeError, match=msg): + left < right + with pytest.raises(TypeError, match=msg): + left <= right + + def test_sort_periods(self): + jan = Period("2000-01", "M") + feb = Period("2000-02", "M") + mar = Period("2000-03", "M") + periods = [mar, jan, feb] + correctPeriods = [jan, feb, mar] + assert sorted(periods) == correctPeriods + + def test_period_cmp_nat(self): + p = Period("2011-01-01", freq="D") + + t = Timestamp("2011-01-01") + # confirm Period('NaT') work identical with Timestamp('NaT') + for left, right in [ + (NaT, p), + (p, NaT), + (NaT, t), + (t, NaT), + ]: + assert not left < right + assert not left > right + assert not left == right + assert left != right + assert not left <= right + assert not left >= right + + @pytest.mark.parametrize( + "zerodim_arr, expected", + ((np.array(0), False), (np.array(Period("2000-01", "M")), True)), + ) + def test_comparison_numpy_zerodim_arr(self, zerodim_arr, expected): + p = Period("2000-01", "M") + + assert (p == zerodim_arr) is expected + assert (zerodim_arr == p) is expected + + +class TestArithmetic: + def test_sub_delta(self): + left, right = Period("2011", freq="A"), Period("2007", freq="A") + result = left - right + assert result == 4 * right.freq + + msg = r"Input has different freq=M from Period\(freq=A-DEC\)" + with pytest.raises(IncompatibleFrequency, match=msg): + left - Period("2007-01", freq="M") + + def test_add_integer(self): + per1 = Period(freq="D", year=2008, month=1, day=1) + per2 = Period(freq="D", year=2008, month=1, day=2) + assert per1 + 1 == per2 + assert 1 + per1 == per2 + + def test_add_sub_nat(self): + # GH#13071 + p = Period("2011-01", freq="M") + assert p + NaT is NaT + assert NaT + p is NaT + assert p - NaT is NaT + assert NaT - p is NaT + + def test_add_invalid(self): + # GH#4731 + per1 = Period(freq="D", year=2008, month=1, day=1) + per2 = Period(freq="D", year=2008, month=1, day=2) + + msg = "|".join( + [ + r"unsupported operand type\(s\)", + "can only concatenate str", + "must be str, not Period", + ] + ) + with pytest.raises(TypeError, match=msg): + per1 + "str" + with pytest.raises(TypeError, match=msg): + "str" + per1 + with pytest.raises(TypeError, match=msg): + per1 + per2 + + boxes = [lambda x: x, lambda x: pd.Series([x]), lambda x: pd.Index([x])] + ids = ["identity", "Series", "Index"] + + @pytest.mark.parametrize("lbox", boxes, ids=ids) + @pytest.mark.parametrize("rbox", boxes, ids=ids) + def test_add_timestamp_raises(self, rbox, lbox): + # GH#17983 + ts = Timestamp("2017") + per = Period("2017", freq="M") + + # We may get a different message depending on which class raises + # the error. + msg = "|".join( + [ + "cannot add", + "unsupported operand", + "can only operate on a", + "incompatible type", + "ufunc add cannot use operands", + ] + ) + with pytest.raises(TypeError, match=msg): + lbox(ts) + rbox(per) + + with pytest.raises(TypeError, match=msg): + lbox(per) + rbox(ts) + + with pytest.raises(TypeError, match=msg): + lbox(per) + rbox(per) + + def test_sub(self): + per1 = Period("2011-01-01", freq="D") + per2 = Period("2011-01-15", freq="D") + + off = per1.freq + assert per1 - per2 == -14 * off + assert per2 - per1 == 14 * off + + msg = r"Input has different freq=M from Period\(freq=D\)" + with pytest.raises(IncompatibleFrequency, match=msg): + per1 - Period("2011-02", freq="M") + + @pytest.mark.parametrize("n", [1, 2, 3, 4]) + def test_sub_n_gt_1_ticks(self, tick_classes, n): + # GH 23878 + p1 = Period("19910905", freq=tick_classes(n)) + p2 = Period("19920406", freq=tick_classes(n)) + + expected = Period(str(p2), freq=p2.freq.base) - Period( + str(p1), freq=p1.freq.base + ) + + assert (p2 - p1) == expected + + @pytest.mark.parametrize("normalize", [True, False]) + @pytest.mark.parametrize("n", [1, 2, 3, 4]) + @pytest.mark.parametrize( + "offset, kwd_name", + [ + (offsets.YearEnd, "month"), + (offsets.QuarterEnd, "startingMonth"), + (offsets.MonthEnd, None), + (offsets.Week, "weekday"), + ], + ) + def test_sub_n_gt_1_offsets(self, offset, kwd_name, n, normalize): + # GH 23878 + kwds = {kwd_name: 3} if kwd_name is not None else {} + p1_d = "19910905" + p2_d = "19920406" + p1 = Period(p1_d, freq=offset(n, normalize, **kwds)) + p2 = Period(p2_d, freq=offset(n, normalize, **kwds)) + + expected = Period(p2_d, freq=p2.freq.base) - Period(p1_d, freq=p1.freq.base) + + assert (p2 - p1) == expected + + def test_add_offset(self): + # freq is DateOffset + for freq in ["A", "2A", "3A"]: + p = Period("2011", freq=freq) + exp = Period("2013", freq=freq) + assert p + offsets.YearEnd(2) == exp + assert offsets.YearEnd(2) + p == exp + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(365, "D"), + timedelta(365), + ]: + msg = "Input has different freq|Input cannot be converted to Period" + with pytest.raises(IncompatibleFrequency, match=msg): + p + o + with pytest.raises(IncompatibleFrequency, match=msg): + o + p + + for freq in ["M", "2M", "3M"]: + p = Period("2011-03", freq=freq) + exp = Period("2011-05", freq=freq) + assert p + offsets.MonthEnd(2) == exp + assert offsets.MonthEnd(2) + p == exp + + exp = Period("2012-03", freq=freq) + assert p + offsets.MonthEnd(12) == exp + assert offsets.MonthEnd(12) + p == exp + + msg = "|".join( + [ + "Input has different freq", + "Input cannot be converted to Period", + ] + ) + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(365, "D"), + timedelta(365), + ]: + + with pytest.raises(IncompatibleFrequency, match=msg): + p + o + with pytest.raises(IncompatibleFrequency, match=msg): + o + p + + # freq is Tick + for freq in ["D", "2D", "3D"]: + p = Period("2011-04-01", freq=freq) + + exp = Period("2011-04-06", freq=freq) + assert p + offsets.Day(5) == exp + assert offsets.Day(5) + p == exp + + exp = Period("2011-04-02", freq=freq) + assert p + offsets.Hour(24) == exp + assert offsets.Hour(24) + p == exp + + exp = Period("2011-04-03", freq=freq) + assert p + np.timedelta64(2, "D") == exp + assert np.timedelta64(2, "D") + p == exp + + exp = Period("2011-04-02", freq=freq) + assert p + np.timedelta64(3600 * 24, "s") == exp + assert np.timedelta64(3600 * 24, "s") + p == exp + + exp = Period("2011-03-30", freq=freq) + assert p + timedelta(-2) == exp + assert timedelta(-2) + p == exp + + exp = Period("2011-04-03", freq=freq) + assert p + timedelta(hours=48) == exp + assert timedelta(hours=48) + p == exp + + msg = "|".join( + [ + "Input has different freq", + "Input cannot be converted to Period", + ] + ) + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(4, "h"), + timedelta(hours=23), + ]: + with pytest.raises(IncompatibleFrequency, match=msg): + p + o + with pytest.raises(IncompatibleFrequency, match=msg): + o + p + + for freq in ["H", "2H", "3H"]: + p = Period("2011-04-01 09:00", freq=freq) + + exp = Period("2011-04-03 09:00", freq=freq) + assert p + offsets.Day(2) == exp + assert offsets.Day(2) + p == exp + + exp = Period("2011-04-01 12:00", freq=freq) + assert p + offsets.Hour(3) == exp + assert offsets.Hour(3) + p == exp + + msg = "cannot use operands with types" + exp = Period("2011-04-01 12:00", freq=freq) + assert p + np.timedelta64(3, "h") == exp + assert np.timedelta64(3, "h") + p == exp + + exp = Period("2011-04-01 10:00", freq=freq) + assert p + np.timedelta64(3600, "s") == exp + assert np.timedelta64(3600, "s") + p == exp + + exp = Period("2011-04-01 11:00", freq=freq) + assert p + timedelta(minutes=120) == exp + assert timedelta(minutes=120) + p == exp + + exp = Period("2011-04-05 12:00", freq=freq) + assert p + timedelta(days=4, minutes=180) == exp + assert timedelta(days=4, minutes=180) + p == exp + + msg = "|".join( + [ + "Input has different freq", + "Input cannot be converted to Period", + ] + ) + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(3200, "s"), + timedelta(hours=23, minutes=30), + ]: + with pytest.raises(IncompatibleFrequency, match=msg): + p + o + with pytest.raises(IncompatibleFrequency, match=msg): + o + p + + def test_sub_offset(self): + # freq is DateOffset + msg = "|".join( + [ + "Input has different freq", + "Input cannot be converted to Period", + ] + ) + + for freq in ["A", "2A", "3A"]: + p = Period("2011", freq=freq) + assert p - offsets.YearEnd(2) == Period("2009", freq=freq) + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(365, "D"), + timedelta(365), + ]: + with pytest.raises(IncompatibleFrequency, match=msg): + p - o + + for freq in ["M", "2M", "3M"]: + p = Period("2011-03", freq=freq) + assert p - offsets.MonthEnd(2) == Period("2011-01", freq=freq) + assert p - offsets.MonthEnd(12) == Period("2010-03", freq=freq) + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(365, "D"), + timedelta(365), + ]: + with pytest.raises(IncompatibleFrequency, match=msg): + p - o + + # freq is Tick + for freq in ["D", "2D", "3D"]: + p = Period("2011-04-01", freq=freq) + assert p - offsets.Day(5) == Period("2011-03-27", freq=freq) + assert p - offsets.Hour(24) == Period("2011-03-31", freq=freq) + assert p - np.timedelta64(2, "D") == Period("2011-03-30", freq=freq) + assert p - np.timedelta64(3600 * 24, "s") == Period("2011-03-31", freq=freq) + assert p - timedelta(-2) == Period("2011-04-03", freq=freq) + assert p - timedelta(hours=48) == Period("2011-03-30", freq=freq) + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(4, "h"), + timedelta(hours=23), + ]: + with pytest.raises(IncompatibleFrequency, match=msg): + p - o + + for freq in ["H", "2H", "3H"]: + p = Period("2011-04-01 09:00", freq=freq) + assert p - offsets.Day(2) == Period("2011-03-30 09:00", freq=freq) + assert p - offsets.Hour(3) == Period("2011-04-01 06:00", freq=freq) + assert p - np.timedelta64(3, "h") == Period("2011-04-01 06:00", freq=freq) + assert p - np.timedelta64(3600, "s") == Period( + "2011-04-01 08:00", freq=freq + ) + assert p - timedelta(minutes=120) == Period("2011-04-01 07:00", freq=freq) + assert p - timedelta(days=4, minutes=180) == Period( + "2011-03-28 06:00", freq=freq + ) + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(3200, "s"), + timedelta(hours=23, minutes=30), + ]: + with pytest.raises(IncompatibleFrequency, match=msg): + p - o + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_period_addsub_nat(self, freq): + per = Period("2011-01", freq=freq) + + # For subtraction, NaT is treated as another Period object + assert NaT - per is NaT + assert per - NaT is NaT + + # For addition, NaT is treated as offset-like + assert NaT + per is NaT + assert per + NaT is NaT + + def test_period_ops_offset(self): + p = Period("2011-04-01", freq="D") + result = p + offsets.Day() + exp = Period("2011-04-02", freq="D") + assert result == exp + + result = p - offsets.Day(2) + exp = Period("2011-03-30", freq="D") + assert result == exp + + msg = r"Input cannot be converted to Period\(freq=D\)" + with pytest.raises(IncompatibleFrequency, match=msg): + p + offsets.Hour(2) + + with pytest.raises(IncompatibleFrequency, match=msg): + p - offsets.Hour(2) + + +def test_period_immutable(): + # see gh-17116 + msg = "not writable" + + per = Period("2014Q1") + with pytest.raises(AttributeError, match=msg): + per.ordinal = 14 + + freq = per.freq + with pytest.raises(AttributeError, match=msg): + per.freq = 2 * freq + + +def test_small_year_parsing(): + per1 = Period("0001-01-07", "D") + assert per1.year == 1 + assert per1.day == 7 + + +def test_negone_ordinals(): + freqs = ["A", "M", "Q", "D", "H", "T", "S"] + + period = Period(ordinal=-1, freq="D") + for freq in freqs: + repr(period.asfreq(freq)) + + for freq in freqs: + period = Period(ordinal=-1, freq=freq) + repr(period) + assert period.year == 1969 + + period = Period(ordinal=-1, freq="B") + repr(period) + period = Period(ordinal=-1, freq="W") + repr(period) + + +def test_invalid_frequency_error_message(): + msg = "Invalid frequency: " + with pytest.raises(ValueError, match=msg): + Period("2012-01-02", freq="WOM-1MON") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/test_na_scalar.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/test_na_scalar.py new file mode 100644 index 0000000..77265e8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/test_na_scalar.py @@ -0,0 +1,307 @@ +import pickle + +import numpy as np +import pytest + +from pandas._libs.missing import NA + +from pandas.core.dtypes.common import is_scalar + +import pandas as pd +import pandas._testing as tm + + +def test_singleton(): + assert NA is NA + new_NA = type(NA)() + assert new_NA is NA + + +def test_repr(): + assert repr(NA) == "" + assert str(NA) == "" + + +def test_format(): + # GH-34740 + assert format(NA) == "" + assert format(NA, ">10") == " " + assert format(NA, "xxx") == "" # NA is flexible, accept any format spec + + assert f"{NA}" == "" + assert f"{NA:>10}" == " " + assert f"{NA:xxx}" == "" + + +def test_truthiness(): + msg = "boolean value of NA is ambiguous" + + with pytest.raises(TypeError, match=msg): + bool(NA) + + with pytest.raises(TypeError, match=msg): + not NA + + +def test_hashable(): + assert hash(NA) == hash(NA) + d = {NA: "test"} + assert d[NA] == "test" + + +def test_arithmetic_ops(all_arithmetic_functions): + op = all_arithmetic_functions + + for other in [NA, 1, 1.0, "a", np.int64(1), np.nan]: + if op.__name__ in ("pow", "rpow", "rmod") and isinstance(other, str): + continue + if op.__name__ in ("divmod", "rdivmod"): + assert op(NA, other) is (NA, NA) + else: + if op.__name__ == "rpow": + # avoid special case + other += 1 + assert op(NA, other) is NA + + +def test_comparison_ops(): + + for other in [NA, 1, 1.0, "a", np.int64(1), np.nan, np.bool_(True)]: + assert (NA == other) is NA + assert (NA != other) is NA + assert (NA > other) is NA + assert (NA >= other) is NA + assert (NA < other) is NA + assert (NA <= other) is NA + assert (other == NA) is NA + assert (other != NA) is NA + assert (other > NA) is NA + assert (other >= NA) is NA + assert (other < NA) is NA + assert (other <= NA) is NA + + +@pytest.mark.parametrize( + "value", + [ + 0, + 0.0, + -0, + -0.0, + False, + np.bool_(False), + np.int_(0), + np.float_(0), + np.int_(-0), + np.float_(-0), + ], +) +@pytest.mark.parametrize("asarray", [True, False]) +def test_pow_special(value, asarray): + if asarray: + value = np.array([value]) + result = NA ** value + + if asarray: + result = result[0] + else: + # this assertion isn't possible for ndarray. + assert isinstance(result, type(value)) + assert result == 1 + + +@pytest.mark.parametrize( + "value", [1, 1.0, True, np.bool_(True), np.int_(1), np.float_(1)] +) +@pytest.mark.parametrize("asarray", [True, False]) +def test_rpow_special(value, asarray): + if asarray: + value = np.array([value]) + result = value ** NA + + if asarray: + result = result[0] + elif not isinstance(value, (np.float_, np.bool_, np.int_)): + # this assertion isn't possible with asarray=True + assert isinstance(result, type(value)) + + assert result == value + + +@pytest.mark.parametrize("value", [-1, -1.0, np.int_(-1), np.float_(-1)]) +@pytest.mark.parametrize("asarray", [True, False]) +def test_rpow_minus_one(value, asarray): + if asarray: + value = np.array([value]) + result = value ** NA + + if asarray: + result = result[0] + + assert pd.isna(result) + + +def test_unary_ops(): + assert +NA is NA + assert -NA is NA + assert abs(NA) is NA + assert ~NA is NA + + +def test_logical_and(): + + assert NA & True is NA + assert True & NA is NA + assert NA & False is False + assert False & NA is False + assert NA & NA is NA + + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + NA & 5 + + +def test_logical_or(): + + assert NA | True is True + assert True | NA is True + assert NA | False is NA + assert False | NA is NA + assert NA | NA is NA + + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + NA | 5 + + +def test_logical_xor(): + + assert NA ^ True is NA + assert True ^ NA is NA + assert NA ^ False is NA + assert False ^ NA is NA + assert NA ^ NA is NA + + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + NA ^ 5 + + +def test_logical_not(): + assert ~NA is NA + + +@pytest.mark.parametrize("shape", [(3,), (3, 3), (1, 2, 3)]) +def test_arithmetic_ndarray(shape, all_arithmetic_functions): + op = all_arithmetic_functions + a = np.zeros(shape) + if op.__name__ == "pow": + a += 5 + result = op(NA, a) + expected = np.full(a.shape, NA, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +def test_is_scalar(): + assert is_scalar(NA) is True + + +def test_isna(): + assert pd.isna(NA) is True + assert pd.notna(NA) is False + + +def test_series_isna(): + s = pd.Series([1, NA], dtype=object) + expected = pd.Series([False, True]) + tm.assert_series_equal(s.isna(), expected) + + +def test_ufunc(): + assert np.log(NA) is NA + assert np.add(NA, 1) is NA + result = np.divmod(NA, 1) + assert result[0] is NA and result[1] is NA + + result = np.frexp(NA) + assert result[0] is NA and result[1] is NA + + +def test_ufunc_raises(): + msg = "ufunc method 'at'" + with pytest.raises(ValueError, match=msg): + np.log.at(NA, 0) + + +def test_binary_input_not_dunder(): + a = np.array([1, 2, 3]) + expected = np.array([NA, NA, NA], dtype=object) + result = np.logaddexp(a, NA) + tm.assert_numpy_array_equal(result, expected) + + result = np.logaddexp(NA, a) + tm.assert_numpy_array_equal(result, expected) + + # all NA, multiple inputs + assert np.logaddexp(NA, NA) is NA + + result = np.modf(NA, NA) + assert len(result) == 2 + assert all(x is NA for x in result) + + +def test_divmod_ufunc(): + # binary in, binary out. + a = np.array([1, 2, 3]) + expected = np.array([NA, NA, NA], dtype=object) + + result = np.divmod(a, NA) + assert isinstance(result, tuple) + for arr in result: + tm.assert_numpy_array_equal(arr, expected) + tm.assert_numpy_array_equal(arr, expected) + + result = np.divmod(NA, a) + for arr in result: + tm.assert_numpy_array_equal(arr, expected) + tm.assert_numpy_array_equal(arr, expected) + + +def test_integer_hash_collision_dict(): + # GH 30013 + result = {NA: "foo", hash(NA): "bar"} + + assert result[NA] == "foo" + assert result[hash(NA)] == "bar" + + +def test_integer_hash_collision_set(): + # GH 30013 + result = {NA, hash(NA)} + + assert len(result) == 2 + assert NA in result + assert hash(NA) in result + + +def test_pickle_roundtrip(): + # https://github.com/pandas-dev/pandas/issues/31847 + result = pickle.loads(pickle.dumps(NA)) + assert result is NA + + +def test_pickle_roundtrip_pandas(): + result = tm.round_trip_pickle(NA) + assert result is NA + + +@pytest.mark.parametrize( + "values, dtype", [([1, 2, NA], "Int64"), (["A", "B", NA], "string")] +) +@pytest.mark.parametrize("as_frame", [True, False]) +def test_pickle_roundtrip_containers(as_frame, values, dtype): + s = pd.Series(pd.array(values, dtype=dtype)) + if as_frame: + s = s.to_frame(name="A") + result = tm.round_trip_pickle(s) + tm.assert_equal(result, s) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/test_nat.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/test_nat.py new file mode 100644 index 0000000..7850a20 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/test_nat.py @@ -0,0 +1,725 @@ +from datetime import ( + datetime, + timedelta, +) +import operator + +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import iNaT + +from pandas.core.dtypes.common import is_datetime64_any_dtype + +from pandas import ( + DatetimeIndex, + DatetimeTZDtype, + Index, + NaT, + Period, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, + isna, + offsets, +) +import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.ops import roperator + + +@pytest.mark.parametrize( + "nat,idx", + [ + (Timestamp("NaT"), DatetimeArray), + (Timedelta("NaT"), TimedeltaArray), + (Period("NaT", freq="M"), PeriodArray), + ], +) +def test_nat_fields(nat, idx): + + for field in idx._field_ops: + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == "weekday": + continue + + result = getattr(NaT, field) + assert np.isnan(result) + + result = getattr(nat, field) + assert np.isnan(result) + + for field in idx._bool_ops: + + result = getattr(NaT, field) + assert result is False + + result = getattr(nat, field) + assert result is False + + +def test_nat_vector_field_access(): + idx = DatetimeIndex(["1/1/2000", None, None, "1/4/2000"]) + + for field in DatetimeArray._field_ops: + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == "weekday": + continue + if field in ["week", "weekofyear"]: + # GH#33595 Deprecate week and weekofyear + continue + + result = getattr(idx, field) + expected = Index([getattr(x, field) for x in idx]) + tm.assert_index_equal(result, expected) + + ser = Series(idx) + + for field in DatetimeArray._field_ops: + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == "weekday": + continue + if field in ["week", "weekofyear"]: + # GH#33595 Deprecate week and weekofyear + continue + + result = getattr(ser.dt, field) + expected = [getattr(x, field) for x in idx] + tm.assert_series_equal(result, Series(expected)) + + for field in DatetimeArray._bool_ops: + result = getattr(ser.dt, field) + expected = [getattr(x, field) for x in idx] + tm.assert_series_equal(result, Series(expected)) + + +@pytest.mark.parametrize("klass", [Timestamp, Timedelta, Period]) +@pytest.mark.parametrize("value", [None, np.nan, iNaT, float("nan"), NaT, "NaT", "nat"]) +def test_identity(klass, value): + assert klass(value) is NaT + + +@pytest.mark.parametrize("klass", [Timestamp, Timedelta, Period]) +@pytest.mark.parametrize("value", ["", "nat", "NAT", None, np.nan]) +def test_equality(klass, value): + if klass is Period and value == "": + pytest.skip("Period cannot parse empty string") + + assert klass(value).value == iNaT + + +@pytest.mark.parametrize("klass", [Timestamp, Timedelta]) +@pytest.mark.parametrize("method", ["round", "floor", "ceil"]) +@pytest.mark.parametrize("freq", ["s", "5s", "min", "5min", "h", "5h"]) +def test_round_nat(klass, method, freq): + # see gh-14940 + ts = klass("nat") + + round_method = getattr(ts, method) + assert round_method(freq) is ts + + +@pytest.mark.parametrize( + "method", + [ + "astimezone", + "combine", + "ctime", + "dst", + "fromordinal", + "fromtimestamp", + "fromisocalendar", + "isocalendar", + "strftime", + "strptime", + "time", + "timestamp", + "timetuple", + "timetz", + "toordinal", + "tzname", + "utcfromtimestamp", + "utcnow", + "utcoffset", + "utctimetuple", + "timestamp", + ], +) +def test_nat_methods_raise(method): + # see gh-9513, gh-17329 + msg = f"NaTType does not support {method}" + + with pytest.raises(ValueError, match=msg): + getattr(NaT, method)() + + +@pytest.mark.parametrize("method", ["weekday", "isoweekday"]) +def test_nat_methods_nan(method): + # see gh-9513, gh-17329 + assert np.isnan(getattr(NaT, method)()) + + +@pytest.mark.parametrize( + "method", ["date", "now", "replace", "today", "tz_convert", "tz_localize"] +) +def test_nat_methods_nat(method): + # see gh-8254, gh-9513, gh-17329 + assert getattr(NaT, method)() is NaT + + +@pytest.mark.parametrize( + "get_nat", [lambda x: NaT, lambda x: Timedelta(x), lambda x: Timestamp(x)] +) +def test_nat_iso_format(get_nat): + # see gh-12300 + assert get_nat("NaT").isoformat() == "NaT" + assert get_nat("NaT").isoformat(timespec="nanoseconds") == "NaT" + + +@pytest.mark.parametrize( + "klass,expected", + [ + (Timestamp, ["freqstr", "normalize", "to_julian_date", "to_period", "tz"]), + ( + Timedelta, + [ + "components", + "delta", + "is_populated", + "resolution_string", + "to_pytimedelta", + "to_timedelta64", + "view", + ], + ), + ], +) +def test_missing_public_nat_methods(klass, expected): + # see gh-17327 + # + # NaT should have *most* of the Timestamp and Timedelta methods. + # Here, we check which public methods NaT does not have. We + # ignore any missing private methods. + nat_names = dir(NaT) + klass_names = dir(klass) + + missing = [x for x in klass_names if x not in nat_names and not x.startswith("_")] + missing.sort() + + assert missing == expected + + +def _get_overlap_public_nat_methods(klass, as_tuple=False): + """ + Get overlapping public methods between NaT and another class. + + Parameters + ---------- + klass : type + The class to compare with NaT + as_tuple : bool, default False + Whether to return a list of tuples of the form (klass, method). + + Returns + ------- + overlap : list + """ + nat_names = dir(NaT) + klass_names = dir(klass) + + overlap = [ + x + for x in nat_names + if x in klass_names and not x.startswith("_") and callable(getattr(klass, x)) + ] + + # Timestamp takes precedence over Timedelta in terms of overlap. + if klass is Timedelta: + ts_names = dir(Timestamp) + overlap = [x for x in overlap if x not in ts_names] + + if as_tuple: + overlap = [(klass, method) for method in overlap] + + overlap.sort() + return overlap + + +@pytest.mark.parametrize( + "klass,expected", + [ + ( + Timestamp, + [ + "astimezone", + "ceil", + "combine", + "ctime", + "date", + "day_name", + "dst", + "floor", + "fromisocalendar", + "fromisoformat", + "fromordinal", + "fromtimestamp", + "isocalendar", + "isoformat", + "isoweekday", + "month_name", + "now", + "replace", + "round", + "strftime", + "strptime", + "time", + "timestamp", + "timetuple", + "timetz", + "to_datetime64", + "to_numpy", + "to_pydatetime", + "today", + "toordinal", + "tz_convert", + "tz_localize", + "tzname", + "utcfromtimestamp", + "utcnow", + "utcoffset", + "utctimetuple", + "weekday", + ], + ), + (Timedelta, ["total_seconds"]), + ], +) +def test_overlap_public_nat_methods(klass, expected): + # see gh-17327 + # + # NaT should have *most* of the Timestamp and Timedelta methods. + # In case when Timestamp, Timedelta, and NaT are overlap, the overlap + # is considered to be with Timestamp and NaT, not Timedelta. + assert _get_overlap_public_nat_methods(klass) == expected + + +@pytest.mark.parametrize( + "compare", + ( + _get_overlap_public_nat_methods(Timestamp, True) + + _get_overlap_public_nat_methods(Timedelta, True) + ), +) +def test_nat_doc_strings(compare): + # see gh-17327 + # + # The docstrings for overlapping methods should match. + klass, method = compare + klass_doc = getattr(klass, method).__doc__ + + # Ignore differences with Timestamp.isoformat() as they're intentional + if klass == Timestamp and method == "isoformat": + return + + if method == "to_numpy": + # GH#44460 can return either dt64 or td64 depending on dtype, + # different docstring is intentional + return + + nat_doc = getattr(NaT, method).__doc__ + assert klass_doc == nat_doc + + +_ops = { + "left_plus_right": lambda a, b: a + b, + "right_plus_left": lambda a, b: b + a, + "left_minus_right": lambda a, b: a - b, + "right_minus_left": lambda a, b: b - a, + "left_times_right": lambda a, b: a * b, + "right_times_left": lambda a, b: b * a, + "left_div_right": lambda a, b: a / b, + "right_div_left": lambda a, b: b / a, +} + + +@pytest.mark.parametrize("op_name", list(_ops.keys())) +@pytest.mark.parametrize( + "value,val_type", + [ + (2, "scalar"), + (1.5, "floating"), + (np.nan, "floating"), + ("foo", "str"), + (timedelta(3600), "timedelta"), + (Timedelta("5s"), "timedelta"), + (datetime(2014, 1, 1), "timestamp"), + (Timestamp("2014-01-01"), "timestamp"), + (Timestamp("2014-01-01", tz="UTC"), "timestamp"), + (Timestamp("2014-01-01", tz="US/Eastern"), "timestamp"), + (pytz.timezone("Asia/Tokyo").localize(datetime(2014, 1, 1)), "timestamp"), + ], +) +def test_nat_arithmetic_scalar(op_name, value, val_type): + # see gh-6873 + invalid_ops = { + "scalar": {"right_div_left"}, + "floating": { + "right_div_left", + "left_minus_right", + "right_minus_left", + "left_plus_right", + "right_plus_left", + }, + "str": set(_ops.keys()), + "timedelta": {"left_times_right", "right_times_left"}, + "timestamp": { + "left_times_right", + "right_times_left", + "left_div_right", + "right_div_left", + }, + } + + op = _ops[op_name] + + if op_name in invalid_ops.get(val_type, set()): + if ( + val_type == "timedelta" + and "times" in op_name + and isinstance(value, Timedelta) + ): + typs = "(Timedelta|NaTType)" + msg = rf"unsupported operand type\(s\) for \*: '{typs}' and '{typs}'" + elif val_type == "str": + # un-specific check here because the message comes from str + # and varies by method + msg = "|".join( + [ + "can only concatenate str", + "unsupported operand type", + "can't multiply sequence", + "Can't convert 'NaTType'", + "must be str, not NaTType", + ] + ) + else: + msg = "unsupported operand type" + + with pytest.raises(TypeError, match=msg): + op(NaT, value) + else: + if val_type == "timedelta" and "div" in op_name: + expected = np.nan + else: + expected = NaT + + assert op(NaT, value) is expected + + +@pytest.mark.parametrize( + "val,expected", [(np.nan, NaT), (NaT, np.nan), (np.timedelta64("NaT"), np.nan)] +) +def test_nat_rfloordiv_timedelta(val, expected): + # see gh-#18846 + # + # See also test_timedelta.TestTimedeltaArithmetic.test_floordiv + td = Timedelta(hours=3, minutes=4) + assert td // val is expected + + +@pytest.mark.parametrize( + "op_name", + ["left_plus_right", "right_plus_left", "left_minus_right", "right_minus_left"], +) +@pytest.mark.parametrize( + "value", + [ + DatetimeIndex(["2011-01-01", "2011-01-02"], name="x"), + DatetimeIndex(["2011-01-01", "2011-01-02"], tz="US/Eastern", name="x"), + DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"]), + DatetimeArray._from_sequence( + ["2011-01-01", "2011-01-02"], dtype=DatetimeTZDtype(tz="US/Pacific") + ), + TimedeltaIndex(["1 day", "2 day"], name="x"), + ], +) +def test_nat_arithmetic_index(op_name, value): + # see gh-11718 + exp_name = "x" + exp_data = [NaT] * 2 + + if is_datetime64_any_dtype(value.dtype) and "plus" in op_name: + expected = DatetimeIndex(exp_data, tz=value.tz, name=exp_name) + else: + expected = TimedeltaIndex(exp_data, name=exp_name) + + if not isinstance(value, Index): + expected = expected.array + + op = _ops[op_name] + result = op(NaT, value) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "op_name", + ["left_plus_right", "right_plus_left", "left_minus_right", "right_minus_left"], +) +@pytest.mark.parametrize("box", [TimedeltaIndex, Series, TimedeltaArray._from_sequence]) +def test_nat_arithmetic_td64_vector(op_name, box): + # see gh-19124 + vec = box(["1 day", "2 day"], dtype="timedelta64[ns]") + box_nat = box([NaT, NaT], dtype="timedelta64[ns]") + tm.assert_equal(_ops[op_name](vec, NaT), box_nat) + + +@pytest.mark.parametrize( + "dtype,op,out_dtype", + [ + ("datetime64[ns]", operator.add, "datetime64[ns]"), + ("datetime64[ns]", roperator.radd, "datetime64[ns]"), + ("datetime64[ns]", operator.sub, "timedelta64[ns]"), + ("datetime64[ns]", roperator.rsub, "timedelta64[ns]"), + ("timedelta64[ns]", operator.add, "datetime64[ns]"), + ("timedelta64[ns]", roperator.radd, "datetime64[ns]"), + ("timedelta64[ns]", operator.sub, "datetime64[ns]"), + ("timedelta64[ns]", roperator.rsub, "timedelta64[ns]"), + ], +) +def test_nat_arithmetic_ndarray(dtype, op, out_dtype): + other = np.arange(10).astype(dtype) + result = op(NaT, other) + + expected = np.empty(other.shape, dtype=out_dtype) + expected.fill("NaT") + tm.assert_numpy_array_equal(result, expected) + + +def test_nat_pinned_docstrings(): + # see gh-17327 + assert NaT.ctime.__doc__ == datetime.ctime.__doc__ + + +def test_to_numpy_alias(): + # GH 24653: alias .to_numpy() for scalars + expected = NaT.to_datetime64() + result = NaT.to_numpy() + + assert isna(expected) and isna(result) + + # GH#44460 + result = NaT.to_numpy("M8[s]") + assert isinstance(result, np.datetime64) + assert result.dtype == "M8[s]" + + result = NaT.to_numpy("m8[ns]") + assert isinstance(result, np.timedelta64) + assert result.dtype == "m8[ns]" + + result = NaT.to_numpy("m8[s]") + assert isinstance(result, np.timedelta64) + assert result.dtype == "m8[s]" + + with pytest.raises(ValueError, match="NaT.to_numpy dtype must be a "): + NaT.to_numpy(np.int64) + + +@pytest.mark.parametrize( + "other", + [ + Timedelta(0), + Timedelta(0).to_pytimedelta(), + pytest.param( + Timedelta(0).to_timedelta64(), + marks=pytest.mark.xfail( + reason="td64 doesn't return NotImplemented, see numpy#17017" + ), + ), + Timestamp(0), + Timestamp(0).to_pydatetime(), + pytest.param( + Timestamp(0).to_datetime64(), + marks=pytest.mark.xfail( + reason="dt64 doesn't return NotImplemented, see numpy#17017" + ), + ), + Timestamp(0).tz_localize("UTC"), + NaT, + ], +) +def test_nat_comparisons(compare_operators_no_eq_ne, other): + # GH 26039 + opname = compare_operators_no_eq_ne + + assert getattr(NaT, opname)(other) is False + + op = getattr(operator, opname.strip("_")) + assert op(NaT, other) is False + assert op(other, NaT) is False + + +@pytest.mark.parametrize("other", [np.timedelta64(0, "ns"), np.datetime64("now", "ns")]) +def test_nat_comparisons_numpy(other): + # Once numpy#17017 is fixed and the xfailed cases in test_nat_comparisons + # pass, this test can be removed + assert not NaT == other + assert NaT != other + assert not NaT < other + assert not NaT > other + assert not NaT <= other + assert not NaT >= other + + +@pytest.mark.parametrize("other_and_type", [("foo", "str"), (2, "int"), (2.0, "float")]) +@pytest.mark.parametrize( + "symbol_and_op", + [("<=", operator.le), ("<", operator.lt), (">=", operator.ge), (">", operator.gt)], +) +def test_nat_comparisons_invalid(other_and_type, symbol_and_op): + # GH#35585 + other, other_type = other_and_type + symbol, op = symbol_and_op + + assert not NaT == other + assert not other == NaT + + assert NaT != other + assert other != NaT + + msg = f"'{symbol}' not supported between instances of 'NaTType' and '{other_type}'" + with pytest.raises(TypeError, match=msg): + op(NaT, other) + + msg = f"'{symbol}' not supported between instances of '{other_type}' and 'NaTType'" + with pytest.raises(TypeError, match=msg): + op(other, NaT) + + +@pytest.mark.parametrize( + "other", + [ + np.array(["foo"] * 2, dtype=object), + np.array([2, 3], dtype="int64"), + np.array([2.0, 3.5], dtype="float64"), + ], + ids=["str", "int", "float"], +) +def test_nat_comparisons_invalid_ndarray(other): + # GH#40722 + expected = np.array([False, False]) + result = NaT == other + tm.assert_numpy_array_equal(result, expected) + result = other == NaT + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([True, True]) + result = NaT != other + tm.assert_numpy_array_equal(result, expected) + result = other != NaT + tm.assert_numpy_array_equal(result, expected) + + for symbol, op in [ + ("<=", operator.le), + ("<", operator.lt), + (">=", operator.ge), + (">", operator.gt), + ]: + msg = f"'{symbol}' not supported between" + + with pytest.raises(TypeError, match=msg): + op(NaT, other) + + if other.dtype == np.dtype("object"): + # uses the reverse operator, so symbol changes + msg = None + with pytest.raises(TypeError, match=msg): + op(other, NaT) + + +def test_compare_date(fixed_now_ts): + # GH#39151 comparing NaT with date object is deprecated + # See also: tests.scalar.timestamps.test_comparisons::test_compare_date + + dt = fixed_now_ts.to_pydatetime().date() + + for left, right in [(NaT, dt), (dt, NaT)]: + assert not left == right + assert left != right + + with tm.assert_produces_warning(FutureWarning): + assert not left < right + with tm.assert_produces_warning(FutureWarning): + assert not left <= right + with tm.assert_produces_warning(FutureWarning): + assert not left > right + with tm.assert_produces_warning(FutureWarning): + assert not left >= right + + # Once the deprecation is enforced, the following assertions + # can be enabled: + # assert not left == right + # assert left != right + # + # with pytest.raises(TypeError): + # left < right + # with pytest.raises(TypeError): + # left <= right + # with pytest.raises(TypeError): + # left > right + # with pytest.raises(TypeError): + # left >= right + + +@pytest.mark.parametrize( + "obj", + [ + offsets.YearEnd(2), + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.MonthEnd(2), + offsets.MonthEnd(12), + offsets.Day(2), + offsets.Day(5), + offsets.Hour(24), + offsets.Hour(3), + offsets.Minute(), + np.timedelta64(3, "h"), + np.timedelta64(4, "h"), + np.timedelta64(3200, "s"), + np.timedelta64(3600, "s"), + np.timedelta64(3600 * 24, "s"), + np.timedelta64(2, "D"), + np.timedelta64(365, "D"), + timedelta(-2), + timedelta(365), + timedelta(minutes=120), + timedelta(days=4, minutes=180), + timedelta(hours=23), + timedelta(hours=23, minutes=30), + timedelta(hours=48), + ], +) +def test_nat_addsub_tdlike_scalar(obj): + assert NaT + obj is NaT + assert obj + NaT is NaT + assert NaT - obj is NaT + + +def test_pickle(): + # GH#4606 + p = tm.round_trip_pickle(NaT) + assert p is NaT + + +def test_freq_deprecated(): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + NaT.freq diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..c3f169f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/test_arithmetic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/test_arithmetic.cpython-38.pyc new file mode 100644 index 0000000..c96d4fb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/test_arithmetic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..26b6ca1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/test_formats.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/test_formats.cpython-38.pyc new file mode 100644 index 0000000..16e1c65 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/test_formats.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/test_timedelta.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/test_timedelta.cpython-38.pyc new file mode 100644 index 0000000..195f8d9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/__pycache__/test_timedelta.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/test_arithmetic.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/test_arithmetic.py new file mode 100644 index 0000000..36ad24f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -0,0 +1,1068 @@ +""" +Tests for scalar Timedelta arithmetic ops +""" +from datetime import ( + datetime, + timedelta, +) +import operator + +import numpy as np +import pytest + +from pandas.errors import OutOfBoundsTimedelta + +import pandas as pd +from pandas import ( + NaT, + Timedelta, + Timestamp, + offsets, +) +import pandas._testing as tm +from pandas.core import ops + + +class TestTimedeltaAdditionSubtraction: + """ + Tests for Timedelta methods: + + __add__, __radd__, + __sub__, __rsub__ + """ + + @pytest.mark.parametrize( + "ten_seconds", + [ + Timedelta(10, unit="s"), + timedelta(seconds=10), + np.timedelta64(10, "s"), + np.timedelta64(10000000000, "ns"), + offsets.Second(10), + ], + ) + def test_td_add_sub_ten_seconds(self, ten_seconds): + # GH#6808 + base = Timestamp("20130101 09:01:12.123456") + expected_add = Timestamp("20130101 09:01:22.123456") + expected_sub = Timestamp("20130101 09:01:02.123456") + + result = base + ten_seconds + assert result == expected_add + + result = base - ten_seconds + assert result == expected_sub + + @pytest.mark.parametrize( + "one_day_ten_secs", + [ + Timedelta("1 day, 00:00:10"), + Timedelta("1 days, 00:00:10"), + timedelta(days=1, seconds=10), + np.timedelta64(1, "D") + np.timedelta64(10, "s"), + offsets.Day() + offsets.Second(10), + ], + ) + def test_td_add_sub_one_day_ten_seconds(self, one_day_ten_secs): + # GH#6808 + base = Timestamp("20130102 09:01:12.123456") + expected_add = Timestamp("20130103 09:01:22.123456") + expected_sub = Timestamp("20130101 09:01:02.123456") + + result = base + one_day_ten_secs + assert result == expected_add + + result = base - one_day_ten_secs + assert result == expected_sub + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_td_add_datetimelike_scalar(self, op): + # GH#19738 + td = Timedelta(10, unit="d") + + result = op(td, datetime(2016, 1, 1)) + if op is operator.add: + # datetime + Timedelta does _not_ call Timedelta.__radd__, + # so we get a datetime back instead of a Timestamp + assert isinstance(result, Timestamp) + assert result == Timestamp(2016, 1, 11) + + result = op(td, Timestamp("2018-01-12 18:09")) + assert isinstance(result, Timestamp) + assert result == Timestamp("2018-01-22 18:09") + + result = op(td, np.datetime64("2018-01-12")) + assert isinstance(result, Timestamp) + assert result == Timestamp("2018-01-22") + + result = op(td, NaT) + assert result is NaT + + def test_td_add_timestamp_overflow(self): + msg = "int too (large|big) to convert" + with pytest.raises(OverflowError, match=msg): + Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D") + + with pytest.raises(OutOfBoundsTimedelta, match=msg): + Timestamp("1700-01-01") + timedelta(days=13 * 19999) + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_td_add_td(self, op): + td = Timedelta(10, unit="d") + + result = op(td, Timedelta(days=10)) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=20) + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_td_add_pytimedelta(self, op): + td = Timedelta(10, unit="d") + result = op(td, timedelta(days=9)) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=19) + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_td_add_timedelta64(self, op): + td = Timedelta(10, unit="d") + result = op(td, np.timedelta64(-4, "D")) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=6) + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_td_add_offset(self, op): + td = Timedelta(10, unit="d") + + result = op(td, offsets.Hour(6)) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=10, hours=6) + + def test_td_sub_td(self): + td = Timedelta(10, unit="d") + expected = Timedelta(0, unit="ns") + result = td - td + assert isinstance(result, Timedelta) + assert result == expected + + def test_td_sub_pytimedelta(self): + td = Timedelta(10, unit="d") + expected = Timedelta(0, unit="ns") + + result = td - td.to_pytimedelta() + assert isinstance(result, Timedelta) + assert result == expected + + result = td.to_pytimedelta() - td + assert isinstance(result, Timedelta) + assert result == expected + + def test_td_sub_timedelta64(self): + td = Timedelta(10, unit="d") + expected = Timedelta(0, unit="ns") + + result = td - td.to_timedelta64() + assert isinstance(result, Timedelta) + assert result == expected + + result = td.to_timedelta64() - td + assert isinstance(result, Timedelta) + assert result == expected + + def test_td_sub_nat(self): + # In this context pd.NaT is treated as timedelta-like + td = Timedelta(10, unit="d") + result = td - NaT + assert result is NaT + + def test_td_sub_td64_nat(self): + td = Timedelta(10, unit="d") + td_nat = np.timedelta64("NaT") + + result = td - td_nat + assert result is NaT + + result = td_nat - td + assert result is NaT + + def test_td_sub_offset(self): + td = Timedelta(10, unit="d") + result = td - offsets.Hour(1) + assert isinstance(result, Timedelta) + assert result == Timedelta(239, unit="h") + + def test_td_add_sub_numeric_raises(self): + td = Timedelta(10, unit="d") + msg = "unsupported operand type" + for other in [2, 2.0, np.int64(2), np.float64(2)]: + with pytest.raises(TypeError, match=msg): + td + other + with pytest.raises(TypeError, match=msg): + other + td + with pytest.raises(TypeError, match=msg): + td - other + with pytest.raises(TypeError, match=msg): + other - td + + def test_td_add_sub_int_ndarray(self): + td = Timedelta("1 day") + other = np.array([1]) + + msg = r"unsupported operand type\(s\) for \+: 'Timedelta' and 'int'" + with pytest.raises(TypeError, match=msg): + td + np.array([1]) + + msg = "|".join( + [ + ( + r"unsupported operand type\(s\) for \+: 'numpy.ndarray' " + "and 'Timedelta'" + ), + # This message goes on to say "Please do not rely on this error; + # it may not be given on all Python implementations" + "Concatenation operation is not implemented for NumPy arrays", + ] + ) + with pytest.raises(TypeError, match=msg): + other + td + msg = r"unsupported operand type\(s\) for -: 'Timedelta' and 'int'" + with pytest.raises(TypeError, match=msg): + td - other + msg = r"unsupported operand type\(s\) for -: 'numpy.ndarray' and 'Timedelta'" + with pytest.raises(TypeError, match=msg): + other - td + + def test_td_rsub_nat(self): + td = Timedelta(10, unit="d") + result = NaT - td + assert result is NaT + + result = np.datetime64("NaT") - td + assert result is NaT + + def test_td_rsub_offset(self): + result = offsets.Hour(1) - Timedelta(10, unit="d") + assert isinstance(result, Timedelta) + assert result == Timedelta(-239, unit="h") + + def test_td_sub_timedeltalike_object_dtype_array(self): + # GH#21980 + arr = np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")]) + exp = np.array([Timestamp("20121231 9:01"), Timestamp("20121229 9:02")]) + res = arr - Timedelta("1D") + tm.assert_numpy_array_equal(res, exp) + + def test_td_sub_mixed_most_timedeltalike_object_dtype_array(self): + # GH#21980 + now = Timestamp("2021-11-09 09:54:00") + arr = np.array([now, Timedelta("1D"), np.timedelta64(2, "h")]) + exp = np.array( + [ + now - Timedelta("1D"), + Timedelta("0D"), + np.timedelta64(2, "h") - Timedelta("1D"), + ] + ) + res = arr - Timedelta("1D") + tm.assert_numpy_array_equal(res, exp) + + def test_td_rsub_mixed_most_timedeltalike_object_dtype_array(self): + # GH#21980 + now = Timestamp("2021-11-09 09:54:00") + arr = np.array([now, Timedelta("1D"), np.timedelta64(2, "h")]) + msg = r"unsupported operand type\(s\) for \-: 'Timedelta' and 'Timestamp'" + with pytest.raises(TypeError, match=msg): + Timedelta("1D") - arr + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_td_add_timedeltalike_object_dtype_array(self, op): + # GH#21980 + arr = np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")]) + exp = np.array([Timestamp("20130102 9:01"), Timestamp("20121231 9:02")]) + res = op(arr, Timedelta("1D")) + tm.assert_numpy_array_equal(res, exp) + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_td_add_mixed_timedeltalike_object_dtype_array(self, op): + # GH#21980 + now = Timestamp("2021-11-09 09:54:00") + arr = np.array([now, Timedelta("1D")]) + exp = np.array([now + Timedelta("1D"), Timedelta("2D")]) + res = op(arr, Timedelta("1D")) + tm.assert_numpy_array_equal(res, exp) + + def test_td_add_sub_td64_ndarray(self): + td = Timedelta("1 day") + + other = np.array([td.to_timedelta64()]) + expected = np.array([Timedelta("2 Days").to_timedelta64()]) + + result = td + other + tm.assert_numpy_array_equal(result, expected) + result = other + td + tm.assert_numpy_array_equal(result, expected) + + result = td - other + tm.assert_numpy_array_equal(result, expected * 0) + result = other - td + tm.assert_numpy_array_equal(result, expected * 0) + + def test_td_add_sub_dt64_ndarray(self): + td = Timedelta("1 day") + other = pd.to_datetime(["2000-01-01"]).values + + expected = pd.to_datetime(["2000-01-02"]).values + tm.assert_numpy_array_equal(td + other, expected) + tm.assert_numpy_array_equal(other + td, expected) + + expected = pd.to_datetime(["1999-12-31"]).values + tm.assert_numpy_array_equal(-td + other, expected) + tm.assert_numpy_array_equal(other - td, expected) + + +class TestTimedeltaMultiplicationDivision: + """ + Tests for Timedelta methods: + + __mul__, __rmul__, + __div__, __rdiv__, + __truediv__, __rtruediv__, + __floordiv__, __rfloordiv__, + __mod__, __rmod__, + __divmod__, __rdivmod__ + """ + + # --------------------------------------------------------------- + # Timedelta.__mul__, __rmul__ + + @pytest.mark.parametrize( + "td_nat", [NaT, np.timedelta64("NaT", "ns"), np.timedelta64("NaT")] + ) + @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) + def test_td_mul_nat(self, op, td_nat): + # GH#19819 + td = Timedelta(10, unit="d") + typs = "|".join(["numpy.timedelta64", "NaTType", "Timedelta"]) + msg = "|".join( + [ + rf"unsupported operand type\(s\) for \*: '{typs}' and '{typs}'", + r"ufunc '?multiply'? cannot use operands with types", + ] + ) + with pytest.raises(TypeError, match=msg): + op(td, td_nat) + + @pytest.mark.parametrize("nan", [np.nan, np.float64("NaN"), float("nan")]) + @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) + def test_td_mul_nan(self, op, nan): + # np.float64('NaN') has a 'dtype' attr, avoid treating as array + td = Timedelta(10, unit="d") + result = op(td, nan) + assert result is NaT + + @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) + def test_td_mul_scalar(self, op): + # GH#19738 + td = Timedelta(minutes=3) + + result = op(td, 2) + assert result == Timedelta(minutes=6) + + result = op(td, 1.5) + assert result == Timedelta(minutes=4, seconds=30) + + assert op(td, np.nan) is NaT + + assert op(-1, td).value == -1 * td.value + assert op(-1.0, td).value == -1.0 * td.value + + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + # timedelta * datetime is gibberish + op(td, Timestamp(2016, 1, 2)) + + with pytest.raises(TypeError, match=msg): + # invalid multiply with another timedelta + op(td, td) + + def test_td_mul_numeric_ndarray(self): + td = Timedelta("1 day") + other = np.array([2]) + expected = np.array([Timedelta("2 Days").to_timedelta64()]) + + result = td * other + tm.assert_numpy_array_equal(result, expected) + + result = other * td + tm.assert_numpy_array_equal(result, expected) + + def test_td_mul_td64_ndarray_invalid(self): + td = Timedelta("1 day") + other = np.array([Timedelta("2 Days").to_timedelta64()]) + + msg = ( + "ufunc '?multiply'? cannot use operands with types " + r"dtype\('= off + assert not td < off + assert not td > off + + assert not td == 2 * off + assert td != 2 * off + assert td <= 2 * off + assert td < 2 * off + assert not td >= 2 * off + assert not td > 2 * off + + def test_comparison_object_array(self): + # analogous to GH#15183 + td = Timedelta("2 days") + other = Timedelta("3 hours") + + arr = np.array([other, td], dtype=object) + res = arr == td + expected = np.array([False, True], dtype=bool) + assert (res == expected).all() + + # 2D case + arr = np.array([[other, td], [td, other]], dtype=object) + res = arr != td + expected = np.array([[True, False], [False, True]], dtype=bool) + assert res.shape == expected.shape + assert (res == expected).all() + + def test_compare_timedelta_ndarray(self): + # GH#11835 + periods = [Timedelta("0 days 01:00:00"), Timedelta("0 days 01:00:00")] + arr = np.array(periods) + result = arr[0] > arr + expected = np.array([False, False]) + tm.assert_numpy_array_equal(result, expected) + + def test_compare_td64_ndarray(self): + # GG#33441 + arr = np.arange(5).astype("timedelta64[ns]") + td = Timedelta(arr[1]) + + expected = np.array([False, True, False, False, False], dtype=bool) + + result = td == arr + tm.assert_numpy_array_equal(result, expected) + + result = arr == td + tm.assert_numpy_array_equal(result, expected) + + result = td != arr + tm.assert_numpy_array_equal(result, ~expected) + + result = arr != td + tm.assert_numpy_array_equal(result, ~expected) + + def test_compare_custom_object(self): + """ + Make sure non supported operations on Timedelta returns NonImplemented + and yields to other operand (GH#20829). + """ + + class CustomClass: + def __init__(self, cmp_result=None): + self.cmp_result = cmp_result + + def generic_result(self): + if self.cmp_result is None: + return NotImplemented + else: + return self.cmp_result + + def __eq__(self, other): + return self.generic_result() + + def __gt__(self, other): + return self.generic_result() + + t = Timedelta("1s") + + assert not (t == "string") + assert not (t == 1) + assert not (t == CustomClass()) + assert not (t == CustomClass(cmp_result=False)) + + assert t < CustomClass(cmp_result=True) + assert not (t < CustomClass(cmp_result=False)) + + assert t == CustomClass(cmp_result=True) + + @pytest.mark.parametrize("val", ["string", 1]) + def test_compare_unknown_type(self, val): + # GH#20829 + t = Timedelta("1s") + msg = "not supported between instances of 'Timedelta' and '(int|str)'" + with pytest.raises(TypeError, match=msg): + t >= val + with pytest.raises(TypeError, match=msg): + t > val + with pytest.raises(TypeError, match=msg): + t <= val + with pytest.raises(TypeError, match=msg): + t < val + + +def test_ops_notimplemented(): + class Other: + pass + + other = Other() + + td = Timedelta("1 day") + assert td.__add__(other) is NotImplemented + assert td.__sub__(other) is NotImplemented + assert td.__truediv__(other) is NotImplemented + assert td.__mul__(other) is NotImplemented + assert td.__floordiv__(other) is NotImplemented + + +def test_ops_error_str(): + # GH#13624 + td = Timedelta("1 day") + + for left, right in [(td, "a"), ("a", td)]: + + msg = "|".join( + [ + "unsupported operand type", + r'can only concatenate str \(not "Timedelta"\) to str', + "must be str, not Timedelta", + ] + ) + with pytest.raises(TypeError, match=msg): + left + right + + msg = "not supported between instances of" + with pytest.raises(TypeError, match=msg): + left > right + + assert not left == right + assert left != right diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/test_constructors.py new file mode 100644 index 0000000..34b725e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/test_constructors.py @@ -0,0 +1,379 @@ +from datetime import timedelta +from itertools import product + +import numpy as np +import pytest + +from pandas._libs.tslibs import OutOfBoundsTimedelta + +from pandas import ( + Timedelta, + offsets, + to_timedelta, +) + + +def test_construction(): + expected = np.timedelta64(10, "D").astype("m8[ns]").view("i8") + assert Timedelta(10, unit="d").value == expected + assert Timedelta(10.0, unit="d").value == expected + assert Timedelta("10 days").value == expected + assert Timedelta(days=10).value == expected + assert Timedelta(days=10.0).value == expected + + expected += np.timedelta64(10, "s").astype("m8[ns]").view("i8") + assert Timedelta("10 days 00:00:10").value == expected + assert Timedelta(days=10, seconds=10).value == expected + assert Timedelta(days=10, milliseconds=10 * 1000).value == expected + assert Timedelta(days=10, microseconds=10 * 1000 * 1000).value == expected + + # rounding cases + assert Timedelta(82739999850000).value == 82739999850000 + assert "0 days 22:58:59.999850" in str(Timedelta(82739999850000)) + assert Timedelta(123072001000000).value == 123072001000000 + assert "1 days 10:11:12.001" in str(Timedelta(123072001000000)) + + # string conversion with/without leading zero + # GH#9570 + assert Timedelta("0:00:00") == timedelta(hours=0) + assert Timedelta("00:00:00") == timedelta(hours=0) + assert Timedelta("-1:00:00") == -timedelta(hours=1) + assert Timedelta("-01:00:00") == -timedelta(hours=1) + + # more strings & abbrevs + # GH#8190 + assert Timedelta("1 h") == timedelta(hours=1) + assert Timedelta("1 hour") == timedelta(hours=1) + assert Timedelta("1 hr") == timedelta(hours=1) + assert Timedelta("1 hours") == timedelta(hours=1) + assert Timedelta("-1 hours") == -timedelta(hours=1) + assert Timedelta("1 m") == timedelta(minutes=1) + assert Timedelta("1.5 m") == timedelta(seconds=90) + assert Timedelta("1 minute") == timedelta(minutes=1) + assert Timedelta("1 minutes") == timedelta(minutes=1) + assert Timedelta("1 s") == timedelta(seconds=1) + assert Timedelta("1 second") == timedelta(seconds=1) + assert Timedelta("1 seconds") == timedelta(seconds=1) + assert Timedelta("1 ms") == timedelta(milliseconds=1) + assert Timedelta("1 milli") == timedelta(milliseconds=1) + assert Timedelta("1 millisecond") == timedelta(milliseconds=1) + assert Timedelta("1 us") == timedelta(microseconds=1) + assert Timedelta("1 µs") == timedelta(microseconds=1) + assert Timedelta("1 micros") == timedelta(microseconds=1) + assert Timedelta("1 microsecond") == timedelta(microseconds=1) + assert Timedelta("1.5 microsecond") == Timedelta("00:00:00.000001500") + assert Timedelta("1 ns") == Timedelta("00:00:00.000000001") + assert Timedelta("1 nano") == Timedelta("00:00:00.000000001") + assert Timedelta("1 nanosecond") == Timedelta("00:00:00.000000001") + + # combos + assert Timedelta("10 days 1 hour") == timedelta(days=10, hours=1) + assert Timedelta("10 days 1 h") == timedelta(days=10, hours=1) + assert Timedelta("10 days 1 h 1m 1s") == timedelta( + days=10, hours=1, minutes=1, seconds=1 + ) + assert Timedelta("-10 days 1 h 1m 1s") == -timedelta( + days=10, hours=1, minutes=1, seconds=1 + ) + assert Timedelta("-10 days 1 h 1m 1s") == -timedelta( + days=10, hours=1, minutes=1, seconds=1 + ) + assert Timedelta("-10 days 1 h 1m 1s 3us") == -timedelta( + days=10, hours=1, minutes=1, seconds=1, microseconds=3 + ) + assert Timedelta("-10 days 1 h 1.5m 1s 3us") == -timedelta( + days=10, hours=1, minutes=1, seconds=31, microseconds=3 + ) + + # Currently invalid as it has a - on the hh:mm:dd part + # (only allowed on the days) + msg = "only leading negative signs are allowed" + with pytest.raises(ValueError, match=msg): + Timedelta("-10 days -1 h 1.5m 1s 3us") + + # only leading neg signs are allowed + with pytest.raises(ValueError, match=msg): + Timedelta("10 days -1 h 1.5m 1s 3us") + + # no units specified + msg = "no units specified" + with pytest.raises(ValueError, match=msg): + Timedelta("3.1415") + + # invalid construction + msg = "cannot construct a Timedelta" + with pytest.raises(ValueError, match=msg): + Timedelta() + + msg = "unit abbreviation w/o a number" + with pytest.raises(ValueError, match=msg): + Timedelta("foo") + + msg = ( + "cannot construct a Timedelta from " + "the passed arguments, allowed keywords are " + ) + with pytest.raises(ValueError, match=msg): + Timedelta(day=10) + + # floats + expected = np.timedelta64(10, "s").astype("m8[ns]").view("i8") + np.timedelta64( + 500, "ms" + ).astype("m8[ns]").view("i8") + assert Timedelta(10.5, unit="s").value == expected + + # offset + assert to_timedelta(offsets.Hour(2)) == Timedelta(hours=2) + assert Timedelta(offsets.Hour(2)) == Timedelta(hours=2) + assert Timedelta(offsets.Second(2)) == Timedelta(seconds=2) + + # GH#11995: unicode + expected = Timedelta("1H") + result = Timedelta("1H") + assert result == expected + assert to_timedelta(offsets.Hour(2)) == Timedelta("0 days, 02:00:00") + + msg = "unit abbreviation w/o a number" + with pytest.raises(ValueError, match=msg): + Timedelta("foo bar") + + +@pytest.mark.parametrize( + "item", + list( + { + "days": "D", + "seconds": "s", + "microseconds": "us", + "milliseconds": "ms", + "minutes": "m", + "hours": "h", + "weeks": "W", + }.items() + ), +) +@pytest.mark.parametrize( + "npdtype", [np.int64, np.int32, np.int16, np.float64, np.float32, np.float16] +) +def test_td_construction_with_np_dtypes(npdtype, item): + # GH#8757: test construction with np dtypes + pykwarg, npkwarg = item + expected = np.timedelta64(1, npkwarg).astype("m8[ns]").view("i8") + assert Timedelta(**{pykwarg: npdtype(1)}).value == expected + + +@pytest.mark.parametrize( + "val", + [ + "1s", + "-1s", + "1us", + "-1us", + "1 day", + "-1 day", + "-23:59:59.999999", + "-1 days +23:59:59.999999", + "-1ns", + "1ns", + "-23:59:59.999999999", + ], +) +def test_td_from_repr_roundtrip(val): + # round-trip both for string and value + td = Timedelta(val) + assert Timedelta(td.value) == td + + assert Timedelta(str(td)) == td + assert Timedelta(td._repr_base(format="all")) == td + assert Timedelta(td._repr_base()) == td + + +def test_overflow_on_construction(): + msg = "int too (large|big) to convert" + + # GH#3374 + value = Timedelta("1day").value * 20169940 + with pytest.raises(OverflowError, match=msg): + Timedelta(value) + + # xref GH#17637 + with pytest.raises(OverflowError, match=msg): + Timedelta(7 * 19999, unit="D") + + with pytest.raises(OutOfBoundsTimedelta, match=msg): + Timedelta(timedelta(days=13 * 19999)) + + +@pytest.mark.parametrize( + "val, unit, name", + [ + (3508, "M", " months"), + (15251, "W", " weeks"), # 1 + (106752, "D", " days"), # change from previous: + (2562048, "h", " hours"), # 0 hours + (153722868, "m", " minutes"), # 13 minutes + (9223372037, "s", " seconds"), # 44 seconds + ], +) +def test_construction_out_of_bounds_td64(val, unit, name): + # TODO: parametrize over units just above/below the implementation bounds + # once GH#38964 is resolved + + # Timedelta.max is just under 106752 days + td64 = np.timedelta64(val, unit) + assert td64.astype("m8[ns]").view("i8") < 0 # i.e. naive astype will be wrong + + msg = str(val) + name + with pytest.raises(OutOfBoundsTimedelta, match=msg): + Timedelta(td64) + + # But just back in bounds and we are OK + assert Timedelta(td64 - 1) == td64 - 1 + + td64 *= -1 + assert td64.astype("m8[ns]").view("i8") > 0 # i.e. naive astype will be wrong + + with pytest.raises(OutOfBoundsTimedelta, match="-" + msg): + Timedelta(td64) + + # But just back in bounds and we are OK + assert Timedelta(td64 + 1) == td64 + 1 + + +@pytest.mark.parametrize( + "fmt,exp", + [ + ( + "P6DT0H50M3.010010012S", + Timedelta( + days=6, + minutes=50, + seconds=3, + milliseconds=10, + microseconds=10, + nanoseconds=12, + ), + ), + ( + "P-6DT0H50M3.010010012S", + Timedelta( + days=-6, + minutes=50, + seconds=3, + milliseconds=10, + microseconds=10, + nanoseconds=12, + ), + ), + ("P4DT12H30M5S", Timedelta(days=4, hours=12, minutes=30, seconds=5)), + ("P0DT0H0M0.000000123S", Timedelta(nanoseconds=123)), + ("P0DT0H0M0.00001S", Timedelta(microseconds=10)), + ("P0DT0H0M0.001S", Timedelta(milliseconds=1)), + ("P0DT0H1M0S", Timedelta(minutes=1)), + ("P1DT25H61M61S", Timedelta(days=1, hours=25, minutes=61, seconds=61)), + ("PT1S", Timedelta(seconds=1)), + ("PT0S", Timedelta(seconds=0)), + ("P1WT0S", Timedelta(days=7, seconds=0)), + ("P1D", Timedelta(days=1)), + ("P1DT1H", Timedelta(days=1, hours=1)), + ("P1W", Timedelta(days=7)), + ("PT300S", Timedelta(seconds=300)), + ("P1DT0H0M00000000000S", Timedelta(days=1)), + ("PT-6H3M", Timedelta(hours=-6, minutes=3)), + ("-PT6H3M", Timedelta(hours=-6, minutes=-3)), + ("-PT-6H+3M", Timedelta(hours=6, minutes=-3)), + ], +) +def test_iso_constructor(fmt, exp): + assert Timedelta(fmt) == exp + + +@pytest.mark.parametrize( + "fmt", + [ + "PPPPPPPPPPPP", + "PDTHMS", + "P0DT999H999M999S", + "P1DT0H0M0.0000000000000S", + "P1DT0H0M0.S", + "P", + "-P", + ], +) +def test_iso_constructor_raises(fmt): + msg = f"Invalid ISO 8601 Duration format - {fmt}" + with pytest.raises(ValueError, match=msg): + Timedelta(fmt) + + +@pytest.mark.parametrize( + "constructed_td, conversion", + [ + (Timedelta(nanoseconds=100), "100ns"), + ( + Timedelta( + days=1, + hours=1, + minutes=1, + weeks=1, + seconds=1, + milliseconds=1, + microseconds=1, + nanoseconds=1, + ), + 694861001001001, + ), + (Timedelta(microseconds=1) + Timedelta(nanoseconds=1), "1us1ns"), + (Timedelta(microseconds=1) - Timedelta(nanoseconds=1), "999ns"), + (Timedelta(microseconds=1) + 5 * Timedelta(nanoseconds=-2), "990ns"), + ], +) +def test_td_constructor_on_nanoseconds(constructed_td, conversion): + # GH#9273 + assert constructed_td == Timedelta(conversion) + + +def test_td_constructor_value_error(): + msg = "Invalid type . Must be int or float." + with pytest.raises(TypeError, match=msg): + Timedelta(nanoseconds="abc") + + +def test_timedelta_constructor_identity(): + # Test for #30543 + expected = Timedelta(np.timedelta64(1, "s")) + result = Timedelta(expected) + assert result is expected + + +@pytest.mark.parametrize( + "constructor, value, unit, expectation", + [ + (Timedelta, "10s", "ms", (ValueError, "unit must not be specified")), + (to_timedelta, "10s", "ms", (ValueError, "unit must not be specified")), + (to_timedelta, ["1", 2, 3], "s", (ValueError, "unit must not be specified")), + ], +) +def test_string_with_unit(constructor, value, unit, expectation): + exp, match = expectation + with pytest.raises(exp, match=match): + _ = constructor(value, unit=unit) + + +@pytest.mark.parametrize( + "value", + [ + "".join(elements) + for repetition in (1, 2) + for elements in product("+-, ", repeat=repetition) + ], +) +def test_string_without_numbers(value): + # GH39710 Timedelta input string with only symbols and no digits raises an error + msg = ( + "symbols w/o a number" + if value != "--" + else "only leading negative signs are allowed" + ) + with pytest.raises(ValueError, match=msg): + Timedelta(value) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/test_formats.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/test_formats.py new file mode 100644 index 0000000..753186e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/test_formats.py @@ -0,0 +1,44 @@ +import pytest + +from pandas import Timedelta + + +@pytest.mark.parametrize( + "td, expected_repr", + [ + (Timedelta(10, unit="d"), "Timedelta('10 days 00:00:00')"), + (Timedelta(10, unit="s"), "Timedelta('0 days 00:00:10')"), + (Timedelta(10, unit="ms"), "Timedelta('0 days 00:00:00.010000')"), + (Timedelta(-10, unit="ms"), "Timedelta('-1 days +23:59:59.990000')"), + ], +) +def test_repr(td, expected_repr): + assert repr(td) == expected_repr + + +@pytest.mark.parametrize( + "td, expected_iso", + [ + ( + Timedelta( + days=6, + minutes=50, + seconds=3, + milliseconds=10, + microseconds=10, + nanoseconds=12, + ), + "P6DT0H50M3.010010012S", + ), + (Timedelta(days=4, hours=12, minutes=30, seconds=5), "P4DT12H30M5S"), + (Timedelta(nanoseconds=123), "P0DT0H0M0.000000123S"), + # trim nano + (Timedelta(microseconds=10), "P0DT0H0M0.00001S"), + # trim micro + (Timedelta(milliseconds=1), "P0DT0H0M0.001S"), + # don't strip every 0 + (Timedelta(minutes=1), "P0DT0H1M0S"), + ], +) +def test_isoformat(td, expected_iso): + assert td.isoformat() == expected_iso diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/test_timedelta.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/test_timedelta.py new file mode 100644 index 0000000..7a32c93 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timedelta/test_timedelta.py @@ -0,0 +1,661 @@ +""" test the scalar Timedelta """ +from datetime import timedelta + +from hypothesis import ( + given, + strategies as st, +) +import numpy as np +import pytest + +from pandas._libs import lib +from pandas._libs.tslibs import ( + NaT, + iNaT, +) + +import pandas as pd +from pandas import ( + Timedelta, + TimedeltaIndex, + offsets, + to_timedelta, +) +import pandas._testing as tm + + +class TestTimedeltaUnaryOps: + def test_invert(self): + td = Timedelta(10, unit="d") + + msg = "bad operand type for unary ~" + with pytest.raises(TypeError, match=msg): + ~td + + # check this matches pytimedelta and timedelta64 + with pytest.raises(TypeError, match=msg): + ~(td.to_pytimedelta()) + + umsg = "ufunc 'invert' not supported for the input types" + with pytest.raises(TypeError, match=umsg): + ~(td.to_timedelta64()) + + def test_unary_ops(self): + td = Timedelta(10, unit="d") + + # __neg__, __pos__ + assert -td == Timedelta(-10, unit="d") + assert -td == Timedelta("-10d") + assert +td == Timedelta(10, unit="d") + + # __abs__, __abs__(__neg__) + assert abs(td) == td + assert abs(-td) == td + assert abs(-td) == Timedelta("10d") + + +class TestTimedeltas: + @pytest.mark.parametrize( + "unit, value, expected", + [ + ("us", 9.999, 9999), + ("ms", 9.999999, 9999999), + ("s", 9.999999999, 9999999999), + ], + ) + def test_rounding_on_int_unit_construction(self, unit, value, expected): + # GH 12690 + result = Timedelta(value, unit=unit) + assert result.value == expected + result = Timedelta(str(value) + unit) + assert result.value == expected + + def test_total_seconds_scalar(self): + # see gh-10939 + rng = Timedelta("1 days, 10:11:12.100123456") + expt = 1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456.0 / 1e9 + tm.assert_almost_equal(rng.total_seconds(), expt) + + rng = Timedelta(np.nan) + assert np.isnan(rng.total_seconds()) + + def test_conversion(self): + + for td in [Timedelta(10, unit="d"), Timedelta("1 days, 10:11:12.012345")]: + pydt = td.to_pytimedelta() + assert td == Timedelta(pydt) + assert td == pydt + assert isinstance(pydt, timedelta) and not isinstance(pydt, Timedelta) + + assert td == np.timedelta64(td.value, "ns") + td64 = td.to_timedelta64() + + assert td64 == np.timedelta64(td.value, "ns") + assert td == td64 + + assert isinstance(td64, np.timedelta64) + + # this is NOT equal and cannot be roundtripped (because of the nanos) + td = Timedelta("1 days, 10:11:12.012345678") + assert td != td.to_pytimedelta() + + def test_fields(self): + def check(value): + # that we are int + assert isinstance(value, int) + + # compat to datetime.timedelta + rng = to_timedelta("1 days, 10:11:12") + assert rng.days == 1 + assert rng.seconds == 10 * 3600 + 11 * 60 + 12 + assert rng.microseconds == 0 + assert rng.nanoseconds == 0 + + msg = "'Timedelta' object has no attribute '{}'" + with pytest.raises(AttributeError, match=msg.format("hours")): + rng.hours + with pytest.raises(AttributeError, match=msg.format("minutes")): + rng.minutes + with pytest.raises(AttributeError, match=msg.format("milliseconds")): + rng.milliseconds + + # GH 10050 + check(rng.days) + check(rng.seconds) + check(rng.microseconds) + check(rng.nanoseconds) + + td = Timedelta("-1 days, 10:11:12") + assert abs(td) == Timedelta("13:48:48") + assert str(td) == "-1 days +10:11:12" + assert -td == Timedelta("0 days 13:48:48") + assert -Timedelta("-1 days, 10:11:12").value == 49728000000000 + assert Timedelta("-1 days, 10:11:12").value == -49728000000000 + + rng = to_timedelta("-1 days, 10:11:12.100123456") + assert rng.days == -1 + assert rng.seconds == 10 * 3600 + 11 * 60 + 12 + assert rng.microseconds == 100 * 1000 + 123 + assert rng.nanoseconds == 456 + msg = "'Timedelta' object has no attribute '{}'" + with pytest.raises(AttributeError, match=msg.format("hours")): + rng.hours + with pytest.raises(AttributeError, match=msg.format("minutes")): + rng.minutes + with pytest.raises(AttributeError, match=msg.format("milliseconds")): + rng.milliseconds + + # components + tup = to_timedelta(-1, "us").components + assert tup.days == -1 + assert tup.hours == 23 + assert tup.minutes == 59 + assert tup.seconds == 59 + assert tup.milliseconds == 999 + assert tup.microseconds == 999 + assert tup.nanoseconds == 0 + + # GH 10050 + check(tup.days) + check(tup.hours) + check(tup.minutes) + check(tup.seconds) + check(tup.milliseconds) + check(tup.microseconds) + check(tup.nanoseconds) + + tup = Timedelta("-1 days 1 us").components + assert tup.days == -2 + assert tup.hours == 23 + assert tup.minutes == 59 + assert tup.seconds == 59 + assert tup.milliseconds == 999 + assert tup.microseconds == 999 + assert tup.nanoseconds == 0 + + def test_iso_conversion(self): + # GH #21877 + expected = Timedelta(1, unit="s") + assert to_timedelta("P0DT0H0M1S") == expected + + def test_nat_converters(self): + result = to_timedelta("nat").to_numpy() + assert result.dtype.kind == "M" + assert result.astype("int64") == iNaT + + result = to_timedelta("nan").to_numpy() + assert result.dtype.kind == "M" + assert result.astype("int64") == iNaT + + @pytest.mark.parametrize( + "unit, np_unit", + [(value, "W") for value in ["W", "w"]] + + [(value, "D") for value in ["D", "d", "days", "day", "Days", "Day"]] + + [ + (value, "m") + for value in [ + "m", + "minute", + "min", + "minutes", + "t", + "Minute", + "Min", + "Minutes", + "T", + ] + ] + + [ + (value, "s") + for value in [ + "s", + "seconds", + "sec", + "second", + "S", + "Seconds", + "Sec", + "Second", + ] + ] + + [ + (value, "ms") + for value in [ + "ms", + "milliseconds", + "millisecond", + "milli", + "millis", + "l", + "MS", + "Milliseconds", + "Millisecond", + "Milli", + "Millis", + "L", + ] + ] + + [ + (value, "us") + for value in [ + "us", + "microseconds", + "microsecond", + "micro", + "micros", + "u", + "US", + "Microseconds", + "Microsecond", + "Micro", + "Micros", + "U", + ] + ] + + [ + (value, "ns") + for value in [ + "ns", + "nanoseconds", + "nanosecond", + "nano", + "nanos", + "n", + "NS", + "Nanoseconds", + "Nanosecond", + "Nano", + "Nanos", + "N", + ] + ], + ) + @pytest.mark.parametrize("wrapper", [np.array, list, pd.Index]) + def test_unit_parser(self, unit, np_unit, wrapper): + # validate all units, GH 6855, GH 21762 + # array-likes + expected = TimedeltaIndex( + [np.timedelta64(i, np_unit) for i in np.arange(5).tolist()] + ) + result = to_timedelta(wrapper(range(5)), unit=unit) + tm.assert_index_equal(result, expected) + result = TimedeltaIndex(wrapper(range(5)), unit=unit) + tm.assert_index_equal(result, expected) + + str_repr = [f"{x}{unit}" for x in np.arange(5)] + result = to_timedelta(wrapper(str_repr)) + tm.assert_index_equal(result, expected) + result = to_timedelta(wrapper(str_repr)) + tm.assert_index_equal(result, expected) + + # scalar + expected = Timedelta(np.timedelta64(2, np_unit).astype("timedelta64[ns]")) + result = to_timedelta(2, unit=unit) + assert result == expected + result = Timedelta(2, unit=unit) + assert result == expected + + result = to_timedelta(f"2{unit}") + assert result == expected + result = Timedelta(f"2{unit}") + assert result == expected + + @pytest.mark.parametrize("unit", ["Y", "y", "M"]) + def test_unit_m_y_raises(self, unit): + msg = "Units 'M', 'Y', and 'y' are no longer supported" + with pytest.raises(ValueError, match=msg): + Timedelta(10, unit) + + with pytest.raises(ValueError, match=msg): + to_timedelta(10, unit) + + with pytest.raises(ValueError, match=msg): + to_timedelta([1, 2], unit) + + def test_numeric_conversions(self): + assert Timedelta(0) == np.timedelta64(0, "ns") + assert Timedelta(10) == np.timedelta64(10, "ns") + assert Timedelta(10, unit="ns") == np.timedelta64(10, "ns") + + assert Timedelta(10, unit="us") == np.timedelta64(10, "us") + assert Timedelta(10, unit="ms") == np.timedelta64(10, "ms") + assert Timedelta(10, unit="s") == np.timedelta64(10, "s") + assert Timedelta(10, unit="d") == np.timedelta64(10, "D") + + def test_timedelta_conversions(self): + assert Timedelta(timedelta(seconds=1)) == np.timedelta64(1, "s").astype( + "m8[ns]" + ) + assert Timedelta(timedelta(microseconds=1)) == np.timedelta64(1, "us").astype( + "m8[ns]" + ) + assert Timedelta(timedelta(days=1)) == np.timedelta64(1, "D").astype("m8[ns]") + + def test_to_numpy_alias(self): + # GH 24653: alias .to_numpy() for scalars + td = Timedelta("10m7s") + assert td.to_timedelta64() == td.to_numpy() + + # GH#44460 + msg = "dtype and copy arguments are ignored" + with pytest.raises(ValueError, match=msg): + td.to_numpy("m8[s]") + with pytest.raises(ValueError, match=msg): + td.to_numpy(copy=True) + + @pytest.mark.parametrize( + "freq,s1,s2", + [ + # This first case has s1, s2 being the same as t1,t2 below + ( + "N", + Timedelta("1 days 02:34:56.789123456"), + Timedelta("-1 days 02:34:56.789123456"), + ), + ( + "U", + Timedelta("1 days 02:34:56.789123000"), + Timedelta("-1 days 02:34:56.789123000"), + ), + ( + "L", + Timedelta("1 days 02:34:56.789000000"), + Timedelta("-1 days 02:34:56.789000000"), + ), + ("S", Timedelta("1 days 02:34:57"), Timedelta("-1 days 02:34:57")), + ("2S", Timedelta("1 days 02:34:56"), Timedelta("-1 days 02:34:56")), + ("5S", Timedelta("1 days 02:34:55"), Timedelta("-1 days 02:34:55")), + ("T", Timedelta("1 days 02:35:00"), Timedelta("-1 days 02:35:00")), + ("12T", Timedelta("1 days 02:36:00"), Timedelta("-1 days 02:36:00")), + ("H", Timedelta("1 days 03:00:00"), Timedelta("-1 days 03:00:00")), + ("d", Timedelta("1 days"), Timedelta("-1 days")), + ], + ) + def test_round(self, freq, s1, s2): + + t1 = Timedelta("1 days 02:34:56.789123456") + t2 = Timedelta("-1 days 02:34:56.789123456") + + r1 = t1.round(freq) + assert r1 == s1 + r2 = t2.round(freq) + assert r2 == s2 + + def test_round_invalid(self): + t1 = Timedelta("1 days 02:34:56.789123456") + + for freq, msg in [ + ("Y", " is a non-fixed frequency"), + ("M", " is a non-fixed frequency"), + ("foobar", "Invalid frequency: foobar"), + ]: + with pytest.raises(ValueError, match=msg): + t1.round(freq) + + def test_round_implementation_bounds(self): + # See also: analogous test for Timestamp + # GH#38964 + result = Timedelta.min.ceil("s") + expected = Timedelta.min + Timedelta(seconds=1) - Timedelta(145224193) + assert result == expected + + result = Timedelta.max.floor("s") + expected = Timedelta.max - Timedelta(854775807) + assert result == expected + + with pytest.raises(OverflowError, match="value too large"): + Timedelta.min.floor("s") + + # the second message here shows up in windows builds + msg = "|".join( + ["Python int too large to convert to C long", "int too big to convert"] + ) + with pytest.raises(OverflowError, match=msg): + Timedelta.max.ceil("s") + + @given(val=st.integers(min_value=iNaT + 1, max_value=lib.i8max)) + @pytest.mark.parametrize( + "method", [Timedelta.round, Timedelta.floor, Timedelta.ceil] + ) + def test_round_sanity(self, val, method): + val = np.int64(val) + td = Timedelta(val) + + assert method(td, "ns") == td + + res = method(td, "us") + nanos = 1000 + assert np.abs((res - td).value) < nanos + assert res.value % nanos == 0 + + res = method(td, "ms") + nanos = 1_000_000 + assert np.abs((res - td).value) < nanos + assert res.value % nanos == 0 + + res = method(td, "s") + nanos = 1_000_000_000 + assert np.abs((res - td).value) < nanos + assert res.value % nanos == 0 + + res = method(td, "min") + nanos = 60 * 1_000_000_000 + assert np.abs((res - td).value) < nanos + assert res.value % nanos == 0 + + res = method(td, "h") + nanos = 60 * 60 * 1_000_000_000 + assert np.abs((res - td).value) < nanos + assert res.value % nanos == 0 + + res = method(td, "D") + nanos = 24 * 60 * 60 * 1_000_000_000 + assert np.abs((res - td).value) < nanos + assert res.value % nanos == 0 + + def test_contains(self): + # Checking for any NaT-like objects + # GH 13603 + td = to_timedelta(range(5), unit="d") + offsets.Hour(1) + for v in [NaT, None, float("nan"), np.nan]: + assert not (v in td) + + td = to_timedelta([NaT]) + for v in [NaT, None, float("nan"), np.nan]: + assert v in td + + def test_identity(self): + + td = Timedelta(10, unit="d") + assert isinstance(td, Timedelta) + assert isinstance(td, timedelta) + + def test_short_format_converters(self): + def conv(v): + return v.astype("m8[ns]") + + assert Timedelta("10") == np.timedelta64(10, "ns") + assert Timedelta("10ns") == np.timedelta64(10, "ns") + assert Timedelta("100") == np.timedelta64(100, "ns") + assert Timedelta("100ns") == np.timedelta64(100, "ns") + + assert Timedelta("1000") == np.timedelta64(1000, "ns") + assert Timedelta("1000ns") == np.timedelta64(1000, "ns") + assert Timedelta("1000NS") == np.timedelta64(1000, "ns") + + assert Timedelta("10us") == np.timedelta64(10000, "ns") + assert Timedelta("100us") == np.timedelta64(100000, "ns") + assert Timedelta("1000us") == np.timedelta64(1000000, "ns") + assert Timedelta("1000Us") == np.timedelta64(1000000, "ns") + assert Timedelta("1000uS") == np.timedelta64(1000000, "ns") + + assert Timedelta("1ms") == np.timedelta64(1000000, "ns") + assert Timedelta("10ms") == np.timedelta64(10000000, "ns") + assert Timedelta("100ms") == np.timedelta64(100000000, "ns") + assert Timedelta("1000ms") == np.timedelta64(1000000000, "ns") + + assert Timedelta("-1s") == -np.timedelta64(1000000000, "ns") + assert Timedelta("1s") == np.timedelta64(1000000000, "ns") + assert Timedelta("10s") == np.timedelta64(10000000000, "ns") + assert Timedelta("100s") == np.timedelta64(100000000000, "ns") + assert Timedelta("1000s") == np.timedelta64(1000000000000, "ns") + + assert Timedelta("1d") == conv(np.timedelta64(1, "D")) + assert Timedelta("-1d") == -conv(np.timedelta64(1, "D")) + assert Timedelta("1D") == conv(np.timedelta64(1, "D")) + assert Timedelta("10D") == conv(np.timedelta64(10, "D")) + assert Timedelta("100D") == conv(np.timedelta64(100, "D")) + assert Timedelta("1000D") == conv(np.timedelta64(1000, "D")) + assert Timedelta("10000D") == conv(np.timedelta64(10000, "D")) + + # space + assert Timedelta(" 10000D ") == conv(np.timedelta64(10000, "D")) + assert Timedelta(" - 10000D ") == -conv(np.timedelta64(10000, "D")) + + # invalid + msg = "invalid unit abbreviation" + with pytest.raises(ValueError, match=msg): + Timedelta("1foo") + msg = "unit abbreviation w/o a number" + with pytest.raises(ValueError, match=msg): + Timedelta("foo") + + def test_full_format_converters(self): + def conv(v): + return v.astype("m8[ns]") + + d1 = np.timedelta64(1, "D") + + assert Timedelta("1days") == conv(d1) + assert Timedelta("1days,") == conv(d1) + assert Timedelta("- 1days,") == -conv(d1) + + assert Timedelta("00:00:01") == conv(np.timedelta64(1, "s")) + assert Timedelta("06:00:01") == conv(np.timedelta64(6 * 3600 + 1, "s")) + assert Timedelta("06:00:01.0") == conv(np.timedelta64(6 * 3600 + 1, "s")) + assert Timedelta("06:00:01.01") == conv( + np.timedelta64(1000 * (6 * 3600 + 1) + 10, "ms") + ) + + assert Timedelta("- 1days, 00:00:01") == conv(-d1 + np.timedelta64(1, "s")) + assert Timedelta("1days, 06:00:01") == conv( + d1 + np.timedelta64(6 * 3600 + 1, "s") + ) + assert Timedelta("1days, 06:00:01.01") == conv( + d1 + np.timedelta64(1000 * (6 * 3600 + 1) + 10, "ms") + ) + + # invalid + msg = "have leftover units" + with pytest.raises(ValueError, match=msg): + Timedelta("- 1days, 00") + + def test_pickle(self): + + v = Timedelta("1 days 10:11:12.0123456") + v_p = tm.round_trip_pickle(v) + assert v == v_p + + def test_timedelta_hash_equality(self): + # GH 11129 + v = Timedelta(1, "D") + td = timedelta(days=1) + assert hash(v) == hash(td) + + d = {td: 2} + assert d[v] == 2 + + tds = [Timedelta(seconds=1) + Timedelta(days=n) for n in range(20)] + assert all(hash(td) == hash(td.to_pytimedelta()) for td in tds) + + # python timedeltas drop ns resolution + ns_td = Timedelta(1, "ns") + assert hash(ns_td) != hash(ns_td.to_pytimedelta()) + + def test_implementation_limits(self): + min_td = Timedelta(Timedelta.min) + max_td = Timedelta(Timedelta.max) + + # GH 12727 + # timedelta limits correspond to int64 boundaries + assert min_td.value == iNaT + 1 + assert max_td.value == lib.i8max + + # Beyond lower limit, a NAT before the Overflow + assert (min_td - Timedelta(1, "ns")) is NaT + + msg = "int too (large|big) to convert" + with pytest.raises(OverflowError, match=msg): + min_td - Timedelta(2, "ns") + + with pytest.raises(OverflowError, match=msg): + max_td + Timedelta(1, "ns") + + # Same tests using the internal nanosecond values + td = Timedelta(min_td.value - 1, "ns") + assert td is NaT + + with pytest.raises(OverflowError, match=msg): + Timedelta(min_td.value - 2, "ns") + + with pytest.raises(OverflowError, match=msg): + Timedelta(max_td.value + 1, "ns") + + def test_total_seconds_precision(self): + # GH 19458 + assert Timedelta("30S").total_seconds() == 30.0 + assert Timedelta("0").total_seconds() == 0.0 + assert Timedelta("-2S").total_seconds() == -2.0 + assert Timedelta("5.324S").total_seconds() == 5.324 + assert (Timedelta("30S").total_seconds() - 30.0) < 1e-20 + assert (30.0 - Timedelta("30S").total_seconds()) < 1e-20 + + def test_resolution_string(self): + assert Timedelta(days=1).resolution_string == "D" + assert Timedelta(days=1, hours=6).resolution_string == "H" + assert Timedelta(days=1, minutes=6).resolution_string == "T" + assert Timedelta(days=1, seconds=6).resolution_string == "S" + assert Timedelta(days=1, milliseconds=6).resolution_string == "L" + assert Timedelta(days=1, microseconds=6).resolution_string == "U" + assert Timedelta(days=1, nanoseconds=6).resolution_string == "N" + + def test_resolution_deprecated(self): + # GH#21344 + td = Timedelta(days=4, hours=3) + result = td.resolution + assert result == Timedelta(nanoseconds=1) + + # Check that the attribute is available on the class, mirroring + # the stdlib timedelta behavior + result = Timedelta.resolution + assert result == Timedelta(nanoseconds=1) + + +@pytest.mark.parametrize( + "value, expected", + [ + (Timedelta("10S"), True), + (Timedelta("-10S"), True), + (Timedelta(10, unit="ns"), True), + (Timedelta(0, unit="ns"), False), + (Timedelta(-10, unit="ns"), True), + (Timedelta(None), True), + (NaT, True), + ], +) +def test_truthiness(value, expected): + # https://github.com/pandas-dev/pandas/issues/21484 + assert bool(value) is expected + + +def test_timedelta_attribute_precision(): + # GH 31354 + td = Timedelta(1552211999999999872, unit="ns") + result = td.days * 86400 + result += td.seconds + result *= 1000000 + result += td.microseconds + result *= 1000 + result += td.nanoseconds + expected = td.value + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..c633985 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_arithmetic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_arithmetic.cpython-38.pyc new file mode 100644 index 0000000..3bc7783 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_arithmetic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_comparisons.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_comparisons.cpython-38.pyc new file mode 100644 index 0000000..fa4f700 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_comparisons.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..072d04b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_formats.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_formats.cpython-38.pyc new file mode 100644 index 0000000..38484c8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_formats.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_rendering.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_rendering.cpython-38.pyc new file mode 100644 index 0000000..7188757 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_rendering.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_timestamp.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_timestamp.cpython-38.pyc new file mode 100644 index 0000000..4062ff2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_timestamp.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_timezones.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_timezones.cpython-38.pyc new file mode 100644 index 0000000..4435518 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_timezones.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_unary_ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_unary_ops.cpython-38.pyc new file mode 100644 index 0000000..7bbfd71 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/__pycache__/test_unary_ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_arithmetic.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_arithmetic.py new file mode 100644 index 0000000..2ff1de5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -0,0 +1,321 @@ +from datetime import ( + datetime, + timedelta, + timezone, +) + +import numpy as np +import pytest + +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + Timedelta, + Timestamp, + offsets, + to_offset, +) + +import pandas._testing as tm + + +class TestTimestampArithmetic: + def test_overflow_offset(self): + # no overflow expected + + stamp = Timestamp("2000/1/1") + offset_no_overflow = to_offset("D") * 100 + + expected = Timestamp("2000/04/10") + assert stamp + offset_no_overflow == expected + + assert offset_no_overflow + stamp == expected + + expected = Timestamp("1999/09/23") + assert stamp - offset_no_overflow == expected + + def test_overflow_offset_raises(self): + # xref https://github.com/statsmodels/statsmodels/issues/3374 + # ends up multiplying really large numbers which overflow + + stamp = Timestamp("2017-01-13 00:00:00") + offset_overflow = 20169940 * offsets.Day(1) + msg = ( + "the add operation between " + r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} " + "will overflow" + ) + lmsg = "|".join( + ["Python int too large to convert to C long", "int too big to convert"] + ) + + with pytest.raises(OverflowError, match=lmsg): + stamp + offset_overflow + + with pytest.raises(OverflowError, match=msg): + offset_overflow + stamp + + with pytest.raises(OverflowError, match=lmsg): + stamp - offset_overflow + + # xref https://github.com/pandas-dev/pandas/issues/14080 + # used to crash, so check for proper overflow exception + + stamp = Timestamp("2000/1/1") + offset_overflow = to_offset("D") * 100 ** 5 + + with pytest.raises(OverflowError, match=lmsg): + stamp + offset_overflow + + with pytest.raises(OverflowError, match=msg): + offset_overflow + stamp + + with pytest.raises(OverflowError, match=lmsg): + stamp - offset_overflow + + def test_overflow_timestamp_raises(self): + # https://github.com/pandas-dev/pandas/issues/31774 + msg = "Result is too large" + a = Timestamp("2101-01-01 00:00:00") + b = Timestamp("1688-01-01 00:00:00") + + with pytest.raises(OutOfBoundsDatetime, match=msg): + a - b + + # but we're OK for timestamp and datetime.datetime + assert (a - b.to_pydatetime()) == (a.to_pydatetime() - b) + + def test_delta_preserve_nanos(self): + val = Timestamp(1337299200000000123) + result = val + timedelta(1) + assert result.nanosecond == val.nanosecond + + def test_rsub_dtscalars(self, tz_naive_fixture): + # In particular, check that datetime64 - Timestamp works GH#28286 + td = Timedelta(1235345642000) + ts = Timestamp("2021-01-01", tz=tz_naive_fixture) + other = ts + td + + assert other - ts == td + assert other.to_pydatetime() - ts == td + if tz_naive_fixture is None: + assert other.to_datetime64() - ts == td + else: + msg = "Cannot subtract tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg): + other.to_datetime64() - ts + + def test_timestamp_sub_datetime(self): + dt = datetime(2013, 10, 12) + ts = Timestamp(datetime(2013, 10, 13)) + assert (ts - dt).days == 1 + assert (dt - ts).days == -1 + + def test_subtract_tzaware_datetime(self): + t1 = Timestamp("2020-10-22T22:00:00+00:00") + t2 = datetime(2020, 10, 22, 22, tzinfo=timezone.utc) + + result = t1 - t2 + + assert isinstance(result, Timedelta) + assert result == Timedelta("0 days") + + def test_subtract_timestamp_from_different_timezone(self): + t1 = Timestamp("20130101").tz_localize("US/Eastern") + t2 = Timestamp("20130101").tz_localize("CET") + + result = t1 - t2 + + assert isinstance(result, Timedelta) + assert result == Timedelta("0 days 06:00:00") + + def test_subtracting_involving_datetime_with_different_tz(self): + t1 = datetime(2013, 1, 1, tzinfo=timezone(timedelta(hours=-5))) + t2 = Timestamp("20130101").tz_localize("CET") + + result = t1 - t2 + + assert isinstance(result, Timedelta) + assert result == Timedelta("0 days 06:00:00") + + result = t2 - t1 + assert isinstance(result, Timedelta) + assert result == Timedelta("-1 days +18:00:00") + + def test_subtracting_different_timezones(self, tz_aware_fixture): + t_raw = Timestamp("20130101") + t_UTC = t_raw.tz_localize("UTC") + t_diff = t_UTC.tz_convert(tz_aware_fixture) + Timedelta("0 days 05:00:00") + + result = t_diff - t_UTC + + assert isinstance(result, Timedelta) + assert result == Timedelta("0 days 05:00:00") + + def test_addition_subtraction_types(self): + # Assert on the types resulting from Timestamp +/- various date/time + # objects + dt = datetime(2014, 3, 4) + td = timedelta(seconds=1) + # build a timestamp with a frequency, since then it supports + # addition/subtraction of integers + with tm.assert_produces_warning(FutureWarning, match="The 'freq' argument"): + # freq deprecated + ts = Timestamp(dt, freq="D") + + msg = "Addition/subtraction of integers" + with pytest.raises(TypeError, match=msg): + # GH#22535 add/sub with integers is deprecated + ts + 1 + with pytest.raises(TypeError, match=msg): + ts - 1 + + # Timestamp + datetime not supported, though subtraction is supported + # and yields timedelta more tests in tseries/base/tests/test_base.py + assert type(ts - dt) == Timedelta + assert type(ts + td) == Timestamp + assert type(ts - td) == Timestamp + + # Timestamp +/- datetime64 not supported, so not tested (could possibly + # assert error raised?) + td64 = np.timedelta64(1, "D") + assert type(ts + td64) == Timestamp + assert type(ts - td64) == Timestamp + + @pytest.mark.parametrize( + "freq, td, td64", + [ + ("S", timedelta(seconds=1), np.timedelta64(1, "s")), + ("min", timedelta(minutes=1), np.timedelta64(1, "m")), + ("H", timedelta(hours=1), np.timedelta64(1, "h")), + ("D", timedelta(days=1), np.timedelta64(1, "D")), + ("W", timedelta(weeks=1), np.timedelta64(1, "W")), + ("M", None, np.timedelta64(1, "M")), + ], + ) + @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") + @pytest.mark.filterwarnings("ignore:The 'freq' argument:FutureWarning") + def test_addition_subtraction_preserve_frequency(self, freq, td, td64): + ts = Timestamp("2014-03-05 00:00:00", freq=freq) + original_freq = ts.freq + + assert (ts + 1 * original_freq).freq == original_freq + assert (ts - 1 * original_freq).freq == original_freq + + if td is not None: + # timedelta does not support months as unit + assert (ts + td).freq == original_freq + assert (ts - td).freq == original_freq + + assert (ts + td64).freq == original_freq + assert (ts - td64).freq == original_freq + + @pytest.mark.parametrize( + "td", [Timedelta(hours=3), np.timedelta64(3, "h"), timedelta(hours=3)] + ) + def test_radd_tdscalar(self, td, fixed_now_ts): + # GH#24775 timedelta64+Timestamp should not raise + ts = fixed_now_ts + assert td + ts == ts + td + + @pytest.mark.parametrize( + "other,expected_difference", + [ + (np.timedelta64(-123, "ns"), -123), + (np.timedelta64(1234567898, "ns"), 1234567898), + (np.timedelta64(-123, "us"), -123000), + (np.timedelta64(-123, "ms"), -123000000), + ], + ) + def test_timestamp_add_timedelta64_unit(self, other, expected_difference): + ts = Timestamp(datetime.utcnow()) + result = ts + other + valdiff = result.value - ts.value + assert valdiff == expected_difference + + @pytest.mark.parametrize( + "ts", + [ + Timestamp("1776-07-04"), + Timestamp("1776-07-04", tz="UTC"), + ], + ) + @pytest.mark.parametrize( + "other", + [ + 1, + np.int64(1), + np.array([1, 2], dtype=np.int32), + np.array([3, 4], dtype=np.uint64), + ], + ) + def test_add_int_with_freq(self, ts, other): + msg = "Addition/subtraction of integers and integer-arrays" + with pytest.raises(TypeError, match=msg): + ts + other + with pytest.raises(TypeError, match=msg): + other + ts + + with pytest.raises(TypeError, match=msg): + ts - other + + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + other - ts + + @pytest.mark.parametrize("shape", [(6,), (2, 3)]) + def test_addsub_m8ndarray(self, shape): + # GH#33296 + ts = Timestamp("2020-04-04 15:45") + other = np.arange(6).astype("m8[h]").reshape(shape) + + result = ts + other + + ex_stamps = [ts + Timedelta(hours=n) for n in range(6)] + expected = np.array([x.asm8 for x in ex_stamps], dtype="M8[ns]").reshape(shape) + tm.assert_numpy_array_equal(result, expected) + + result = other + ts + tm.assert_numpy_array_equal(result, expected) + + result = ts - other + ex_stamps = [ts - Timedelta(hours=n) for n in range(6)] + expected = np.array([x.asm8 for x in ex_stamps], dtype="M8[ns]").reshape(shape) + tm.assert_numpy_array_equal(result, expected) + + msg = r"unsupported operand type\(s\) for -: 'numpy.ndarray' and 'Timestamp'" + with pytest.raises(TypeError, match=msg): + other - ts + + @pytest.mark.parametrize("shape", [(6,), (2, 3)]) + def test_addsub_m8ndarray_tzaware(self, shape): + # GH#33296 + ts = Timestamp("2020-04-04 15:45", tz="US/Pacific") + + other = np.arange(6).astype("m8[h]").reshape(shape) + + result = ts + other + + ex_stamps = [ts + Timedelta(hours=n) for n in range(6)] + expected = np.array(ex_stamps).reshape(shape) + tm.assert_numpy_array_equal(result, expected) + + result = other + ts + tm.assert_numpy_array_equal(result, expected) + + result = ts - other + ex_stamps = [ts - Timedelta(hours=n) for n in range(6)] + expected = np.array(ex_stamps).reshape(shape) + tm.assert_numpy_array_equal(result, expected) + + msg = r"unsupported operand type\(s\) for -: 'numpy.ndarray' and 'Timestamp'" + with pytest.raises(TypeError, match=msg): + other - ts + + def test_subtract_different_utc_objects(self, utc_fixture, utc_fixture2): + # GH 32619 + dt = datetime(2021, 1, 1) + ts1 = Timestamp(dt, tz=utc_fixture) + ts2 = Timestamp(dt, tz=utc_fixture2) + result = ts1 - ts2 + expected = Timedelta(0) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_comparisons.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_comparisons.py new file mode 100644 index 0000000..7ed0a6a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_comparisons.py @@ -0,0 +1,321 @@ +from datetime import ( + datetime, + timedelta, +) +import operator + +import numpy as np +import pytest + +from pandas import Timestamp +import pandas._testing as tm + + +class TestTimestampComparison: + def test_comparison_dt64_ndarray(self, fixed_now_ts): + ts = Timestamp("2021-01-01") + ts2 = Timestamp("2019-04-05") + arr = np.array([[ts.asm8, ts2.asm8]], dtype="M8[ns]") + + result = ts == arr + expected = np.array([[True, False]], dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + result = arr == ts + tm.assert_numpy_array_equal(result, expected) + + result = ts != arr + tm.assert_numpy_array_equal(result, ~expected) + + result = arr != ts + tm.assert_numpy_array_equal(result, ~expected) + + result = ts2 < arr + tm.assert_numpy_array_equal(result, expected) + + result = arr < ts2 + tm.assert_numpy_array_equal(result, np.array([[False, False]], dtype=bool)) + + result = ts2 <= arr + tm.assert_numpy_array_equal(result, np.array([[True, True]], dtype=bool)) + + result = arr <= ts2 + tm.assert_numpy_array_equal(result, ~expected) + + result = ts >= arr + tm.assert_numpy_array_equal(result, np.array([[True, True]], dtype=bool)) + + result = arr >= ts + tm.assert_numpy_array_equal(result, np.array([[True, False]], dtype=bool)) + + @pytest.mark.parametrize("reverse", [True, False]) + def test_comparison_dt64_ndarray_tzaware(self, reverse, comparison_op): + + ts = Timestamp("2021-01-01 00:00:00.00000", tz="UTC") + arr = np.array([ts.asm8, ts.asm8], dtype="M8[ns]") + + left, right = ts, arr + if reverse: + left, right = arr, ts + + if comparison_op is operator.eq: + expected = np.array([False, False], dtype=bool) + result = comparison_op(left, right) + tm.assert_numpy_array_equal(result, expected) + elif comparison_op is operator.ne: + expected = np.array([True, True], dtype=bool) + result = comparison_op(left, right) + tm.assert_numpy_array_equal(result, expected) + else: + msg = "Cannot compare tz-naive and tz-aware timestamps" + with pytest.raises(TypeError, match=msg): + comparison_op(left, right) + + def test_comparison_object_array(self): + # GH#15183 + ts = Timestamp("2011-01-03 00:00:00-0500", tz="US/Eastern") + other = Timestamp("2011-01-01 00:00:00-0500", tz="US/Eastern") + naive = Timestamp("2011-01-01 00:00:00") + + arr = np.array([other, ts], dtype=object) + res = arr == ts + expected = np.array([False, True], dtype=bool) + assert (res == expected).all() + + # 2D case + arr = np.array([[other, ts], [ts, other]], dtype=object) + res = arr != ts + expected = np.array([[True, False], [False, True]], dtype=bool) + assert res.shape == expected.shape + assert (res == expected).all() + + # tzaware mismatch + arr = np.array([naive], dtype=object) + msg = "Cannot compare tz-naive and tz-aware timestamps" + with pytest.raises(TypeError, match=msg): + arr < ts + + def test_comparison(self): + # 5-18-2012 00:00:00.000 + stamp = 1337299200000000000 + + val = Timestamp(stamp) + + assert val == val + assert not val != val + assert not val < val + assert val <= val + assert not val > val + assert val >= val + + other = datetime(2012, 5, 18) + assert val == other + assert not val != other + assert not val < other + assert val <= other + assert not val > other + assert val >= other + + other = Timestamp(stamp + 100) + + assert val != other + assert val != other + assert val < other + assert val <= other + assert other > val + assert other >= val + + def test_compare_invalid(self): + # GH#8058 + val = Timestamp("20130101 12:01:02") + assert not val == "foo" + assert not val == 10.0 + assert not val == 1 + assert not val == [] + assert not val == {"foo": 1} + assert not val == np.float64(1) + assert not val == np.int64(1) + + assert val != "foo" + assert val != 10.0 + assert val != 1 + assert val != [] + assert val != {"foo": 1} + assert val != np.float64(1) + assert val != np.int64(1) + + @pytest.mark.parametrize("tz", [None, "US/Pacific"]) + def test_compare_date(self, tz): + # GH#36131 comparing Timestamp with date object is deprecated + ts = Timestamp("2021-01-01 00:00:00.00000", tz=tz) + dt = ts.to_pydatetime().date() + # These are incorrectly considered as equal because they + # dispatch to the date comparisons which truncates ts + + for left, right in [(ts, dt), (dt, ts)]: + with tm.assert_produces_warning(FutureWarning): + assert left == right + with tm.assert_produces_warning(FutureWarning): + assert not left != right + with tm.assert_produces_warning(FutureWarning): + assert not left < right + with tm.assert_produces_warning(FutureWarning): + assert left <= right + with tm.assert_produces_warning(FutureWarning): + assert not left > right + with tm.assert_produces_warning(FutureWarning): + assert left >= right + + # Once the deprecation is enforced, the following assertions + # can be enabled: + # assert not left == right + # assert left != right + # + # with pytest.raises(TypeError): + # left < right + # with pytest.raises(TypeError): + # left <= right + # with pytest.raises(TypeError): + # left > right + # with pytest.raises(TypeError): + # left >= right + + def test_cant_compare_tz_naive_w_aware(self, utc_fixture): + # see GH#1404 + a = Timestamp("3/12/2012") + b = Timestamp("3/12/2012", tz=utc_fixture) + + msg = "Cannot compare tz-naive and tz-aware timestamps" + assert not a == b + assert a != b + with pytest.raises(TypeError, match=msg): + a < b + with pytest.raises(TypeError, match=msg): + a <= b + with pytest.raises(TypeError, match=msg): + a > b + with pytest.raises(TypeError, match=msg): + a >= b + + assert not b == a + assert b != a + with pytest.raises(TypeError, match=msg): + b < a + with pytest.raises(TypeError, match=msg): + b <= a + with pytest.raises(TypeError, match=msg): + b > a + with pytest.raises(TypeError, match=msg): + b >= a + + assert not a == b.to_pydatetime() + assert not a.to_pydatetime() == b + + def test_timestamp_compare_scalars(self): + # case where ndim == 0 + lhs = np.datetime64(datetime(2013, 12, 6)) + rhs = Timestamp("now") + nat = Timestamp("nat") + + ops = {"gt": "lt", "lt": "gt", "ge": "le", "le": "ge", "eq": "eq", "ne": "ne"} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + expected = left_f(lhs, rhs) + + result = right_f(rhs, lhs) + assert result == expected + + expected = left_f(rhs, nat) + result = right_f(nat, rhs) + assert result == expected + + def test_timestamp_compare_with_early_datetime(self): + # e.g. datetime.min + stamp = Timestamp("2012-01-01") + + assert not stamp == datetime.min + assert not stamp == datetime(1600, 1, 1) + assert not stamp == datetime(2700, 1, 1) + assert stamp != datetime.min + assert stamp != datetime(1600, 1, 1) + assert stamp != datetime(2700, 1, 1) + assert stamp > datetime(1600, 1, 1) + assert stamp >= datetime(1600, 1, 1) + assert stamp < datetime(2700, 1, 1) + assert stamp <= datetime(2700, 1, 1) + + other = Timestamp.min.to_pydatetime(warn=False) + assert other - timedelta(microseconds=1) < Timestamp.min + + def test_timestamp_compare_oob_dt64(self): + us = np.timedelta64(1, "us") + other = np.datetime64(Timestamp.min).astype("M8[us]") + + # This may change if the implementation bound is dropped to match + # DatetimeArray/DatetimeIndex GH#24124 + assert Timestamp.min > other + # Note: numpy gets the reversed comparison wrong + + other = np.datetime64(Timestamp.max).astype("M8[us]") + assert Timestamp.max > other # not actually OOB + assert other < Timestamp.max + + assert Timestamp.max < other + us + # Note: numpy gets the reversed comparison wrong + + # GH-42794 + other = datetime(9999, 9, 9) + assert Timestamp.min < other + assert other > Timestamp.min + assert Timestamp.max < other + assert other > Timestamp.max + + other = datetime(1, 1, 1) + assert Timestamp.max > other + assert other < Timestamp.max + assert Timestamp.min > other + assert other < Timestamp.min + + def test_compare_zerodim_array(self, fixed_now_ts): + # GH#26916 + ts = fixed_now_ts + dt64 = np.datetime64("2016-01-01", "ns") + arr = np.array(dt64) + assert arr.ndim == 0 + + result = arr < ts + assert result is np.bool_(True) + result = arr > ts + assert result is np.bool_(False) + + +def test_rich_comparison_with_unsupported_type(): + # Comparisons with unsupported objects should return NotImplemented + # (it previously raised TypeError, see #24011) + + class Inf: + def __lt__(self, o): + return False + + def __le__(self, o): + return isinstance(o, Inf) + + def __gt__(self, o): + return not isinstance(o, Inf) + + def __ge__(self, o): + return True + + def __eq__(self, other) -> bool: + return isinstance(other, Inf) + + inf = Inf() + timestamp = Timestamp("2018-11-30") + + for left, right in [(inf, timestamp), (timestamp, inf)]: + assert left > right or left < right + assert left >= right or left <= right + assert not (left == right) + assert left != right diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_constructors.py new file mode 100644 index 0000000..b3deb1a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_constructors.py @@ -0,0 +1,640 @@ +import calendar +from datetime import ( + datetime, + timedelta, +) + +import dateutil.tz +from dateutil.tz import tzutc +import numpy as np +import pytest +import pytz + +from pandas.compat import PY310 +from pandas.errors import OutOfBoundsDatetime + +from pandas import ( + Period, + Timedelta, + Timestamp, +) +import pandas._testing as tm + +from pandas.tseries import offsets + + +class TestTimestampConstructors: + def test_constructor_datetime64_with_tz(self): + # GH#42288, GH#24559 + dt = np.datetime64("1970-01-01 05:00:00") + tzstr = "UTC+05:00" + + msg = "interpreted as a wall time" + with tm.assert_produces_warning(FutureWarning, match=msg): + ts = Timestamp(dt, tz=tzstr) + + # Check that we match the old behavior + alt = Timestamp(dt).tz_localize("UTC").tz_convert(tzstr) + assert ts == alt + + # Check that we *don't* match the future behavior + assert ts.hour != 5 + expected_future = Timestamp(dt).tz_localize(tzstr) + assert ts != expected_future + + def test_constructor(self): + base_str = "2014-07-01 09:00" + base_dt = datetime(2014, 7, 1, 9) + base_expected = 1_404_205_200_000_000_000 + + # confirm base representation is correct + assert calendar.timegm(base_dt.timetuple()) * 1_000_000_000 == base_expected + + tests = [ + (base_str, base_dt, base_expected), + ( + "2014-07-01 10:00", + datetime(2014, 7, 1, 10), + base_expected + 3600 * 1_000_000_000, + ), + ( + "2014-07-01 09:00:00.000008000", + datetime(2014, 7, 1, 9, 0, 0, 8), + base_expected + 8000, + ), + ( + "2014-07-01 09:00:00.000000005", + Timestamp("2014-07-01 09:00:00.000000005"), + base_expected + 5, + ), + ] + + timezones = [ + (None, 0), + ("UTC", 0), + (pytz.utc, 0), + ("Asia/Tokyo", 9), + ("US/Eastern", -4), + ("dateutil/US/Pacific", -7), + (pytz.FixedOffset(-180), -3), + (dateutil.tz.tzoffset(None, 18000), 5), + ] + + for date_str, date, expected in tests: + for result in [Timestamp(date_str), Timestamp(date)]: + # only with timestring + assert result.value == expected + + # re-creation shouldn't affect to internal value + result = Timestamp(result) + assert result.value == expected + + # with timezone + for tz, offset in timezones: + for result in [Timestamp(date_str, tz=tz), Timestamp(date, tz=tz)]: + expected_tz = expected - offset * 3600 * 1_000_000_000 + assert result.value == expected_tz + + # should preserve tz + result = Timestamp(result) + assert result.value == expected_tz + + # should convert to UTC + if tz is not None: + result = Timestamp(result).tz_convert("UTC") + else: + result = Timestamp(result, tz="UTC") + expected_utc = expected - offset * 3600 * 1_000_000_000 + assert result.value == expected_utc + + def test_constructor_with_stringoffset(self): + # GH 7833 + base_str = "2014-07-01 11:00:00+02:00" + base_dt = datetime(2014, 7, 1, 9) + base_expected = 1_404_205_200_000_000_000 + + # confirm base representation is correct + assert calendar.timegm(base_dt.timetuple()) * 1_000_000_000 == base_expected + + tests = [ + (base_str, base_expected), + ("2014-07-01 12:00:00+02:00", base_expected + 3600 * 1_000_000_000), + ("2014-07-01 11:00:00.000008000+02:00", base_expected + 8000), + ("2014-07-01 11:00:00.000000005+02:00", base_expected + 5), + ] + + timezones = [ + (None, 0), + ("UTC", 0), + (pytz.utc, 0), + ("Asia/Tokyo", 9), + ("US/Eastern", -4), + ("dateutil/US/Pacific", -7), + (pytz.FixedOffset(-180), -3), + (dateutil.tz.tzoffset(None, 18000), 5), + ] + + for date_str, expected in tests: + for result in [Timestamp(date_str)]: + # only with timestring + assert result.value == expected + + # re-creation shouldn't affect to internal value + result = Timestamp(result) + assert result.value == expected + + # with timezone + for tz, offset in timezones: + result = Timestamp(date_str, tz=tz) + expected_tz = expected + assert result.value == expected_tz + + # should preserve tz + result = Timestamp(result) + assert result.value == expected_tz + + # should convert to UTC + result = Timestamp(result).tz_convert("UTC") + expected_utc = expected + assert result.value == expected_utc + + # This should be 2013-11-01 05:00 in UTC + # converted to Chicago tz + result = Timestamp("2013-11-01 00:00:00-0500", tz="America/Chicago") + assert result.value == Timestamp("2013-11-01 05:00").value + expected = "Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago')" + assert repr(result) == expected + assert result == eval(repr(result)) + + # This should be 2013-11-01 05:00 in UTC + # converted to Tokyo tz (+09:00) + result = Timestamp("2013-11-01 00:00:00-0500", tz="Asia/Tokyo") + assert result.value == Timestamp("2013-11-01 05:00").value + expected = "Timestamp('2013-11-01 14:00:00+0900', tz='Asia/Tokyo')" + assert repr(result) == expected + assert result == eval(repr(result)) + + # GH11708 + # This should be 2015-11-18 10:00 in UTC + # converted to Asia/Katmandu + result = Timestamp("2015-11-18 15:45:00+05:45", tz="Asia/Katmandu") + assert result.value == Timestamp("2015-11-18 10:00").value + expected = "Timestamp('2015-11-18 15:45:00+0545', tz='Asia/Katmandu')" + assert repr(result) == expected + assert result == eval(repr(result)) + + # This should be 2015-11-18 10:00 in UTC + # converted to Asia/Kolkata + result = Timestamp("2015-11-18 15:30:00+05:30", tz="Asia/Kolkata") + assert result.value == Timestamp("2015-11-18 10:00").value + expected = "Timestamp('2015-11-18 15:30:00+0530', tz='Asia/Kolkata')" + assert repr(result) == expected + assert result == eval(repr(result)) + + def test_constructor_invalid(self): + msg = "Cannot convert input" + with pytest.raises(TypeError, match=msg): + Timestamp(slice(2)) + msg = "Cannot convert Period" + with pytest.raises(ValueError, match=msg): + Timestamp(Period("1000-01-01")) + + def test_constructor_invalid_tz(self): + # GH#17690 + msg = ( + "Argument 'tzinfo' has incorrect type " + r"\(expected datetime.tzinfo, got str\)" + ) + with pytest.raises(TypeError, match=msg): + Timestamp("2017-10-22", tzinfo="US/Eastern") + + msg = "at most one of" + with pytest.raises(ValueError, match=msg): + Timestamp("2017-10-22", tzinfo=pytz.utc, tz="UTC") + + msg = "Invalid frequency:" + msg2 = "The 'freq' argument" + with pytest.raises(ValueError, match=msg): + # GH#5168 + # case where user tries to pass tz as an arg, not kwarg, gets + # interpreted as a `freq` + with tm.assert_produces_warning(FutureWarning, match=msg2): + Timestamp("2012-01-01", "US/Pacific") + + def test_constructor_strptime(self): + # GH25016 + # Test support for Timestamp.strptime + fmt = "%Y%m%d-%H%M%S-%f%z" + ts = "20190129-235348-000001+0000" + msg = r"Timestamp.strptime\(\) is not implemented" + with pytest.raises(NotImplementedError, match=msg): + Timestamp.strptime(ts, fmt) + + def test_constructor_tz_or_tzinfo(self): + # GH#17943, GH#17690, GH#5168 + stamps = [ + Timestamp(year=2017, month=10, day=22, tz="UTC"), + Timestamp(year=2017, month=10, day=22, tzinfo=pytz.utc), + Timestamp(year=2017, month=10, day=22, tz=pytz.utc), + Timestamp(datetime(2017, 10, 22), tzinfo=pytz.utc), + Timestamp(datetime(2017, 10, 22), tz="UTC"), + Timestamp(datetime(2017, 10, 22), tz=pytz.utc), + ] + assert all(ts == stamps[0] for ts in stamps) + + def test_constructor_positional(self): + # see gh-10758 + msg = ( + "'NoneType' object cannot be interpreted as an integer" + if PY310 + else "an integer is required" + ) + with pytest.raises(TypeError, match=msg): + Timestamp(2000, 1) + + msg = "month must be in 1..12" + with pytest.raises(ValueError, match=msg): + Timestamp(2000, 0, 1) + with pytest.raises(ValueError, match=msg): + Timestamp(2000, 13, 1) + + msg = "day is out of range for month" + with pytest.raises(ValueError, match=msg): + Timestamp(2000, 1, 0) + with pytest.raises(ValueError, match=msg): + Timestamp(2000, 1, 32) + + # see gh-11630 + assert repr(Timestamp(2015, 11, 12)) == repr(Timestamp("20151112")) + assert repr(Timestamp(2015, 11, 12, 1, 2, 3, 999999)) == repr( + Timestamp("2015-11-12 01:02:03.999999") + ) + + def test_constructor_keyword(self): + # GH 10758 + msg = "function missing required argument 'day'|Required argument 'day'" + with pytest.raises(TypeError, match=msg): + Timestamp(year=2000, month=1) + + msg = "month must be in 1..12" + with pytest.raises(ValueError, match=msg): + Timestamp(year=2000, month=0, day=1) + with pytest.raises(ValueError, match=msg): + Timestamp(year=2000, month=13, day=1) + + msg = "day is out of range for month" + with pytest.raises(ValueError, match=msg): + Timestamp(year=2000, month=1, day=0) + with pytest.raises(ValueError, match=msg): + Timestamp(year=2000, month=1, day=32) + + assert repr(Timestamp(year=2015, month=11, day=12)) == repr( + Timestamp("20151112") + ) + + assert ( + repr( + Timestamp( + year=2015, + month=11, + day=12, + hour=1, + minute=2, + second=3, + microsecond=999999, + ) + ) + == repr(Timestamp("2015-11-12 01:02:03.999999")) + ) + + @pytest.mark.filterwarnings("ignore:Timestamp.freq is:FutureWarning") + @pytest.mark.filterwarnings("ignore:The 'freq' argument:FutureWarning") + def test_constructor_fromordinal(self): + base = datetime(2000, 1, 1) + + ts = Timestamp.fromordinal(base.toordinal(), freq="D") + assert base == ts + assert ts.freq == "D" + assert base.toordinal() == ts.toordinal() + + ts = Timestamp.fromordinal(base.toordinal(), tz="US/Eastern") + assert Timestamp("2000-01-01", tz="US/Eastern") == ts + assert base.toordinal() == ts.toordinal() + + # GH#3042 + dt = datetime(2011, 4, 16, 0, 0) + ts = Timestamp.fromordinal(dt.toordinal()) + assert ts.to_pydatetime() == dt + + # with a tzinfo + stamp = Timestamp("2011-4-16", tz="US/Eastern") + dt_tz = stamp.to_pydatetime() + ts = Timestamp.fromordinal(dt_tz.toordinal(), tz="US/Eastern") + assert ts.to_pydatetime() == dt_tz + + @pytest.mark.parametrize( + "result", + [ + Timestamp(datetime(2000, 1, 2, 3, 4, 5, 6), nanosecond=1), + Timestamp( + year=2000, + month=1, + day=2, + hour=3, + minute=4, + second=5, + microsecond=6, + nanosecond=1, + ), + Timestamp( + year=2000, + month=1, + day=2, + hour=3, + minute=4, + second=5, + microsecond=6, + nanosecond=1, + tz="UTC", + ), + Timestamp(2000, 1, 2, 3, 4, 5, 6, 1, None), + # error: Argument 9 to "Timestamp" has incompatible type "_UTCclass"; + # expected "Optional[int]" + Timestamp(2000, 1, 2, 3, 4, 5, 6, 1, pytz.UTC), # type: ignore[arg-type] + ], + ) + def test_constructor_nanosecond(self, result): + # GH 18898 + expected = Timestamp(datetime(2000, 1, 2, 3, 4, 5, 6), tz=result.tz) + expected = expected + Timedelta(nanoseconds=1) + assert result == expected + + @pytest.mark.parametrize("z", ["Z0", "Z00"]) + def test_constructor_invalid_Z0_isostring(self, z): + # GH 8910 + msg = "could not convert string to Timestamp" + with pytest.raises(ValueError, match=msg): + Timestamp(f"2014-11-02 01:00{z}") + + @pytest.mark.parametrize( + "arg", + [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "microsecond", + "nanosecond", + ], + ) + def test_invalid_date_kwarg_with_string_input(self, arg): + kwarg = {arg: 1} + msg = "Cannot pass a date attribute keyword argument" + with pytest.raises(ValueError, match=msg): + Timestamp("2010-10-10 12:59:59.999999999", **kwarg) + + def test_out_of_bounds_integer_value(self): + # GH#26651 check that we raise OutOfBoundsDatetime, not OverflowError + msg = str(Timestamp.max.value * 2) + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp(Timestamp.max.value * 2) + msg = str(Timestamp.min.value * 2) + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp(Timestamp.min.value * 2) + + def test_out_of_bounds_value(self): + one_us = np.timedelta64(1).astype("timedelta64[us]") + + # By definition we can't go out of bounds in [ns], so we + # convert the datetime64s to [us] so we can go out of bounds + min_ts_us = np.datetime64(Timestamp.min).astype("M8[us]") + one_us + max_ts_us = np.datetime64(Timestamp.max).astype("M8[us]") + + # No error for the min/max datetimes + Timestamp(min_ts_us) + Timestamp(max_ts_us) + + msg = "Out of bounds" + # One us less than the minimum is an error + with pytest.raises(ValueError, match=msg): + Timestamp(min_ts_us - one_us) + + # One us more than the maximum is an error + with pytest.raises(ValueError, match=msg): + Timestamp(max_ts_us + one_us) + + def test_out_of_bounds_string(self): + msg = "Out of bounds" + with pytest.raises(ValueError, match=msg): + Timestamp("1676-01-01") + with pytest.raises(ValueError, match=msg): + Timestamp("2263-01-01") + + def test_barely_out_of_bounds(self): + # GH#19529 + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + msg = "Out of bounds nanosecond timestamp: 2262-04-11 23:47:16" + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp("2262-04-11 23:47:16.854775808") + + def test_bounds_with_different_units(self): + out_of_bounds_dates = ("1677-09-21", "2262-04-12") + + time_units = ("D", "h", "m", "s", "ms", "us") + + for date_string in out_of_bounds_dates: + for unit in time_units: + dt64 = np.datetime64(date_string, unit) + msg = "Out of bounds" + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp(dt64) + + in_bounds_dates = ("1677-09-23", "2262-04-11") + + for date_string in in_bounds_dates: + for unit in time_units: + dt64 = np.datetime64(date_string, unit) + Timestamp(dt64) + + @pytest.mark.parametrize("arg", ["001-01-01", "0001-01-01"]) + def test_out_of_bounds_string_consistency(self, arg): + # GH 15829 + msg = "Out of bounds" + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp(arg) + + def test_min_valid(self): + # Ensure that Timestamp.min is a valid Timestamp + Timestamp(Timestamp.min) + + def test_max_valid(self): + # Ensure that Timestamp.max is a valid Timestamp + Timestamp(Timestamp.max) + + def test_now(self): + # GH#9000 + ts_from_string = Timestamp("now") + ts_from_method = Timestamp.now() + ts_datetime = datetime.now() + + ts_from_string_tz = Timestamp("now", tz="US/Eastern") + ts_from_method_tz = Timestamp.now(tz="US/Eastern") + + # Check that the delta between the times is less than 1s (arbitrarily + # small) + delta = Timedelta(seconds=1) + assert abs(ts_from_method - ts_from_string) < delta + assert abs(ts_datetime - ts_from_method) < delta + assert abs(ts_from_method_tz - ts_from_string_tz) < delta + assert ( + abs( + ts_from_string_tz.tz_localize(None) + - ts_from_method_tz.tz_localize(None) + ) + < delta + ) + + def test_today(self): + ts_from_string = Timestamp("today") + ts_from_method = Timestamp.today() + ts_datetime = datetime.today() + + ts_from_string_tz = Timestamp("today", tz="US/Eastern") + ts_from_method_tz = Timestamp.today(tz="US/Eastern") + + # Check that the delta between the times is less than 1s (arbitrarily + # small) + delta = Timedelta(seconds=1) + assert abs(ts_from_method - ts_from_string) < delta + assert abs(ts_datetime - ts_from_method) < delta + assert abs(ts_from_method_tz - ts_from_string_tz) < delta + assert ( + abs( + ts_from_string_tz.tz_localize(None) + - ts_from_method_tz.tz_localize(None) + ) + < delta + ) + + @pytest.mark.parametrize("tz", [None, pytz.timezone("US/Pacific")]) + def test_disallow_setting_tz(self, tz): + # GH 3746 + ts = Timestamp("2010") + msg = "Cannot directly set timezone" + with pytest.raises(AttributeError, match=msg): + ts.tz = tz + + @pytest.mark.parametrize("offset", ["+0300", "+0200"]) + def test_construct_timestamp_near_dst(self, offset): + # GH 20854 + expected = Timestamp(f"2016-10-30 03:00:00{offset}", tz="Europe/Helsinki") + result = Timestamp(expected).tz_convert("Europe/Helsinki") + assert result == expected + + @pytest.mark.parametrize( + "arg", ["2013/01/01 00:00:00+09:00", "2013-01-01 00:00:00+09:00"] + ) + def test_construct_with_different_string_format(self, arg): + # GH 12064 + result = Timestamp(arg) + expected = Timestamp(datetime(2013, 1, 1), tz=pytz.FixedOffset(540)) + assert result == expected + + def test_construct_timestamp_preserve_original_frequency(self): + # GH 22311 + with tm.assert_produces_warning(FutureWarning, match="The 'freq' argument"): + result = Timestamp(Timestamp("2010-08-08", freq="D")).freq + expected = offsets.Day() + assert result == expected + + def test_constructor_invalid_frequency(self): + # GH 22311 + msg = "Invalid frequency:" + msg2 = "The 'freq' argument" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match=msg2): + Timestamp("2012-01-01", freq=[]) + + @pytest.mark.parametrize("box", [datetime, Timestamp]) + def test_raise_tz_and_tzinfo_in_datetime_input(self, box): + # GH 23579 + kwargs = {"year": 2018, "month": 1, "day": 1, "tzinfo": pytz.utc} + msg = "Cannot pass a datetime or Timestamp" + with pytest.raises(ValueError, match=msg): + Timestamp(box(**kwargs), tz="US/Pacific") + msg = "Cannot pass a datetime or Timestamp" + with pytest.raises(ValueError, match=msg): + Timestamp(box(**kwargs), tzinfo=pytz.timezone("US/Pacific")) + + def test_dont_convert_dateutil_utc_to_pytz_utc(self): + result = Timestamp(datetime(2018, 1, 1), tz=tzutc()) + expected = Timestamp(datetime(2018, 1, 1)).tz_localize(tzutc()) + assert result == expected + + def test_constructor_subclassed_datetime(self): + # GH 25851 + # ensure that subclassed datetime works for + # Timestamp creation + class SubDatetime(datetime): + pass + + data = SubDatetime(2000, 1, 1) + result = Timestamp(data) + expected = Timestamp(2000, 1, 1) + assert result == expected + + def test_constructor_fromisocalendar(self): + # GH 30395 + expected_timestamp = Timestamp("2000-01-03 00:00:00") + expected_stdlib = datetime.fromisocalendar(2000, 1, 1) + result = Timestamp.fromisocalendar(2000, 1, 1) + assert result == expected_timestamp + assert result == expected_stdlib + assert isinstance(result, Timestamp) + + +def test_constructor_ambigous_dst(): + # GH 24329 + # Make sure that calling Timestamp constructor + # on Timestamp created from ambiguous time + # doesn't change Timestamp.value + ts = Timestamp(1382835600000000000, tz="dateutil/Europe/London") + expected = ts.value + result = Timestamp(ts).value + assert result == expected + + +@pytest.mark.parametrize("epoch", [1552211999999999872, 1552211999999999999]) +def test_constructor_before_dst_switch(epoch): + # GH 31043 + # Make sure that calling Timestamp constructor + # on time just before DST switch doesn't lead to + # nonexistent time or value change + ts = Timestamp(epoch, tz="dateutil/America/Los_Angeles") + result = ts.tz.dst(ts) + expected = timedelta(seconds=0) + assert Timestamp(ts).value == epoch + assert result == expected + + +def test_timestamp_constructor_identity(): + # Test for #30543 + expected = Timestamp("2017-01-01T12") + result = Timestamp(expected) + assert result is expected + + +@pytest.mark.parametrize("kwargs", [{}, {"year": 2020}, {"year": 2020, "month": 1}]) +def test_constructor_missing_keyword(kwargs): + # GH 31200 + + # The exact error message of datetime() depends on its version + msg1 = r"function missing required argument '(year|month|day)' \(pos [123]\)" + msg2 = r"Required argument '(year|month|day)' \(pos [123]\) not found" + msg = "|".join([msg1, msg2]) + + with pytest.raises(TypeError, match=msg): + Timestamp(**kwargs) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_formats.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_formats.py new file mode 100644 index 0000000..71dbf35 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_formats.py @@ -0,0 +1,71 @@ +import pytest + +from pandas import Timestamp + +ts_no_ns = Timestamp( + year=2019, + month=5, + day=18, + hour=15, + minute=17, + second=8, + microsecond=132263, +) +ts_ns = Timestamp( + year=2019, + month=5, + day=18, + hour=15, + minute=17, + second=8, + microsecond=132263, + nanosecond=123, +) +ts_ns_tz = Timestamp( + year=2019, + month=5, + day=18, + hour=15, + minute=17, + second=8, + microsecond=132263, + nanosecond=123, + tz="UTC", +) +ts_no_us = Timestamp( + year=2019, + month=5, + day=18, + hour=15, + minute=17, + second=8, + microsecond=0, + nanosecond=123, +) + + +@pytest.mark.parametrize( + "ts, timespec, expected_iso", + [ + (ts_no_ns, "auto", "2019-05-18T15:17:08.132263"), + (ts_no_ns, "seconds", "2019-05-18T15:17:08"), + (ts_no_ns, "nanoseconds", "2019-05-18T15:17:08.132263000"), + (ts_ns, "auto", "2019-05-18T15:17:08.132263123"), + (ts_ns, "hours", "2019-05-18T15"), + (ts_ns, "minutes", "2019-05-18T15:17"), + (ts_ns, "seconds", "2019-05-18T15:17:08"), + (ts_ns, "milliseconds", "2019-05-18T15:17:08.132"), + (ts_ns, "microseconds", "2019-05-18T15:17:08.132263"), + (ts_ns, "nanoseconds", "2019-05-18T15:17:08.132263123"), + (ts_ns_tz, "auto", "2019-05-18T15:17:08.132263123+00:00"), + (ts_ns_tz, "hours", "2019-05-18T15+00:00"), + (ts_ns_tz, "minutes", "2019-05-18T15:17+00:00"), + (ts_ns_tz, "seconds", "2019-05-18T15:17:08+00:00"), + (ts_ns_tz, "milliseconds", "2019-05-18T15:17:08.132+00:00"), + (ts_ns_tz, "microseconds", "2019-05-18T15:17:08.132263+00:00"), + (ts_ns_tz, "nanoseconds", "2019-05-18T15:17:08.132263123+00:00"), + (ts_no_us, "auto", "2019-05-18T15:17:08.000000123"), + ], +) +def test_isoformat(ts, timespec, expected_iso): + assert ts.isoformat(timespec=timespec) == expected_iso diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_rendering.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_rendering.py new file mode 100644 index 0000000..2f88f96 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_rendering.py @@ -0,0 +1,107 @@ +import pprint + +import pytest +import pytz # noqa # a test below uses pytz but only inside a `eval` call + +from pandas import Timestamp +import pandas._testing as tm + + +class TestTimestampRendering: + + timezones = ["UTC", "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"] + + @pytest.mark.parametrize("tz", timezones) + @pytest.mark.parametrize("freq", ["D", "M", "S", "N"]) + @pytest.mark.parametrize( + "date", ["2014-03-07", "2014-01-01 09:00", "2014-01-01 00:00:00.000000001"] + ) + def test_repr(self, date, freq, tz): + # avoid to match with timezone name + freq_repr = f"'{freq}'" + if tz.startswith("dateutil"): + tz_repr = tz.replace("dateutil", "") + else: + tz_repr = tz + + date_only = Timestamp(date) + assert date in repr(date_only) + assert tz_repr not in repr(date_only) + assert freq_repr not in repr(date_only) + assert date_only == eval(repr(date_only)) + + date_tz = Timestamp(date, tz=tz) + assert date in repr(date_tz) + assert tz_repr in repr(date_tz) + assert freq_repr not in repr(date_tz) + assert date_tz == eval(repr(date_tz)) + + msg = "The 'freq' argument in Timestamp" + with tm.assert_produces_warning(FutureWarning, match=msg): + date_freq = Timestamp(date, freq=freq) + assert date in repr(date_freq) + assert tz_repr not in repr(date_freq) + assert freq_repr in repr(date_freq) + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + assert date_freq == eval(repr(date_freq)) + + with tm.assert_produces_warning(FutureWarning, match=msg): + date_tz_freq = Timestamp(date, tz=tz, freq=freq) + assert date in repr(date_tz_freq) + assert tz_repr in repr(date_tz_freq) + assert freq_repr in repr(date_tz_freq) + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + assert date_tz_freq == eval(repr(date_tz_freq)) + + def test_repr_utcoffset(self): + # This can cause the tz field to be populated, but it's redundant to + # include this information in the date-string. + date_with_utc_offset = Timestamp("2014-03-13 00:00:00-0400", tz=None) + assert "2014-03-13 00:00:00-0400" in repr(date_with_utc_offset) + assert "tzoffset" not in repr(date_with_utc_offset) + assert "pytz.FixedOffset(-240)" in repr(date_with_utc_offset) + expr = repr(date_with_utc_offset).replace( + "'pytz.FixedOffset(-240)'", "pytz.FixedOffset(-240)" + ) + assert date_with_utc_offset == eval(expr) + + def test_timestamp_repr_pre1900(self): + # pre-1900 + stamp = Timestamp("1850-01-01", tz="US/Eastern") + repr(stamp) + + iso8601 = "1850-01-01 01:23:45.012345" + stamp = Timestamp(iso8601, tz="US/Eastern") + result = repr(stamp) + assert iso8601 in result + + def test_pprint(self): + # GH#12622 + nested_obj = {"foo": 1, "bar": [{"w": {"a": Timestamp("2011-01-01")}}] * 10} + result = pprint.pformat(nested_obj, width=50) + expected = r"""{'bar': [{'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}], + 'foo': 1}""" + assert result == expected + + def test_to_timestamp_repr_is_code(self): + zs = [ + Timestamp("99-04-17 00:00:00", tz="UTC"), + Timestamp("2001-04-17 00:00:00", tz="UTC"), + Timestamp("2001-04-17 00:00:00", tz="America/Los_Angeles"), + Timestamp("2001-04-17 00:00:00", tz=None), + ] + for z in zs: + assert eval(repr(z)) == z diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_timestamp.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_timestamp.py new file mode 100644 index 0000000..5f7cca9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_timestamp.py @@ -0,0 +1,694 @@ +""" test the scalar Timestamp """ + +import calendar +from datetime import ( + datetime, + timedelta, +) +import locale +import pickle +import unicodedata + +from dateutil.tz import tzutc +import numpy as np +import pytest +import pytz +from pytz import ( + timezone, + utc, +) + +from pandas._libs.tslibs.timezones import ( + dateutil_gettz as gettz, + get_timezone, +) +import pandas.util._test_decorators as td + +from pandas import ( + NaT, + Timedelta, + Timestamp, +) +import pandas._testing as tm + +from pandas.tseries import offsets + + +class TestTimestampProperties: + def test_freq_deprecation(self): + # GH#41586 + msg = "The 'freq' argument in Timestamp is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + # warning issued at construction + ts = Timestamp("2021-06-01", freq="D") + ts2 = Timestamp("2021-06-01", freq="B") + + msg = "Timestamp.freq is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + # warning issued at attribute lookup + ts.freq + + for per in ["month", "quarter", "year"]: + for side in ["start", "end"]: + attr = f"is_{per}_{side}" + + with tm.assert_produces_warning(FutureWarning, match=msg): + getattr(ts2, attr) + + # is_(month|quarter|year)_(start|end) does _not_ issue a warning + # with freq="D" bc the result will be unaffected by the deprecation + with tm.assert_produces_warning(None): + getattr(ts, attr) + + @pytest.mark.filterwarnings("ignore:The 'freq' argument:FutureWarning") + @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") + def test_properties_business(self): + ts = Timestamp("2017-10-01", freq="B") + control = Timestamp("2017-10-01") + assert ts.dayofweek == 6 + assert ts.day_of_week == 6 + assert not ts.is_month_start # not a weekday + assert not ts.freq.is_month_start(ts) + assert ts.freq.is_month_start(ts + Timedelta(days=1)) + assert not ts.is_quarter_start # not a weekday + assert not ts.freq.is_quarter_start(ts) + assert ts.freq.is_quarter_start(ts + Timedelta(days=1)) + # Control case: non-business is month/qtr start + assert control.is_month_start + assert control.is_quarter_start + + ts = Timestamp("2017-09-30", freq="B") + control = Timestamp("2017-09-30") + assert ts.dayofweek == 5 + assert ts.day_of_week == 5 + assert not ts.is_month_end # not a weekday + assert not ts.freq.is_month_end(ts) + assert ts.freq.is_month_end(ts - Timedelta(days=1)) + assert not ts.is_quarter_end # not a weekday + assert not ts.freq.is_quarter_end(ts) + assert ts.freq.is_quarter_end(ts - Timedelta(days=1)) + # Control case: non-business is month/qtr start + assert control.is_month_end + assert control.is_quarter_end + + @pytest.mark.parametrize( + "attr, expected", + [ + ["year", 2014], + ["month", 12], + ["day", 31], + ["hour", 23], + ["minute", 59], + ["second", 0], + ["microsecond", 0], + ["nanosecond", 0], + ["dayofweek", 2], + ["day_of_week", 2], + ["quarter", 4], + ["dayofyear", 365], + ["day_of_year", 365], + ["week", 1], + ["daysinmonth", 31], + ], + ) + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + def test_fields(self, attr, expected, tz): + # GH 10050 + # GH 13303 + ts = Timestamp("2014-12-31 23:59:00", tz=tz) + result = getattr(ts, attr) + # that we are int like + assert isinstance(result, int) + assert result == expected + + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + def test_millisecond_raises(self, tz): + ts = Timestamp("2014-12-31 23:59:00", tz=tz) + msg = "'Timestamp' object has no attribute 'millisecond'" + with pytest.raises(AttributeError, match=msg): + ts.millisecond + + @pytest.mark.parametrize( + "start", ["is_month_start", "is_quarter_start", "is_year_start"] + ) + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + def test_is_start(self, start, tz): + ts = Timestamp("2014-01-01 00:00:00", tz=tz) + assert getattr(ts, start) + + @pytest.mark.parametrize("end", ["is_month_end", "is_year_end", "is_quarter_end"]) + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + def test_is_end(self, end, tz): + ts = Timestamp("2014-12-31 23:59:59", tz=tz) + assert getattr(ts, end) + + # GH 12806 + @pytest.mark.parametrize( + "data", + [Timestamp("2017-08-28 23:00:00"), Timestamp("2017-08-28 23:00:00", tz="EST")], + ) + # error: Unsupported operand types for + ("List[None]" and "List[str]") + @pytest.mark.parametrize( + "time_locale", [None] + (tm.get_locales() or []) # type: ignore[operator] + ) + def test_names(self, data, time_locale): + # GH 17354 + # Test .day_name(), .month_name + if time_locale is None: + expected_day = "Monday" + expected_month = "August" + else: + with tm.set_locale(time_locale, locale.LC_TIME): + expected_day = calendar.day_name[0].capitalize() + expected_month = calendar.month_name[8].capitalize() + + result_day = data.day_name(time_locale) + result_month = data.month_name(time_locale) + + # Work around https://github.com/pandas-dev/pandas/issues/22342 + # different normalizations + expected_day = unicodedata.normalize("NFD", expected_day) + expected_month = unicodedata.normalize("NFD", expected_month) + + result_day = unicodedata.normalize("NFD", result_day) + result_month = unicodedata.normalize("NFD", result_month) + + assert result_day == expected_day + assert result_month == expected_month + + # Test NaT + nan_ts = Timestamp(NaT) + assert np.isnan(nan_ts.day_name(time_locale)) + assert np.isnan(nan_ts.month_name(time_locale)) + + def test_is_leap_year(self, tz_naive_fixture): + tz = tz_naive_fixture + # GH 13727 + dt = Timestamp("2000-01-01 00:00:00", tz=tz) + assert dt.is_leap_year + assert isinstance(dt.is_leap_year, bool) + + dt = Timestamp("1999-01-01 00:00:00", tz=tz) + assert not dt.is_leap_year + + dt = Timestamp("2004-01-01 00:00:00", tz=tz) + assert dt.is_leap_year + + dt = Timestamp("2100-01-01 00:00:00", tz=tz) + assert not dt.is_leap_year + + def test_woy_boundary(self): + # make sure weeks at year boundaries are correct + d = datetime(2013, 12, 31) + result = Timestamp(d).week + expected = 1 # ISO standard + assert result == expected + + d = datetime(2008, 12, 28) + result = Timestamp(d).week + expected = 52 # ISO standard + assert result == expected + + d = datetime(2009, 12, 31) + result = Timestamp(d).week + expected = 53 # ISO standard + assert result == expected + + d = datetime(2010, 1, 1) + result = Timestamp(d).week + expected = 53 # ISO standard + assert result == expected + + d = datetime(2010, 1, 3) + result = Timestamp(d).week + expected = 53 # ISO standard + assert result == expected + + result = np.array( + [ + Timestamp(datetime(*args)).week + for args in [(2000, 1, 1), (2000, 1, 2), (2005, 1, 1), (2005, 1, 2)] + ] + ) + assert (result == [52, 52, 53, 53]).all() + + def test_resolution(self): + # GH#21336, GH#21365 + dt = Timestamp("2100-01-01 00:00:00") + assert dt.resolution == Timedelta(nanoseconds=1) + + # Check that the attribute is available on the class, mirroring + # the stdlib datetime behavior + assert Timestamp.resolution == Timedelta(nanoseconds=1) + + +class TestTimestamp: + def test_tz(self): + tstr = "2014-02-01 09:00" + ts = Timestamp(tstr) + local = ts.tz_localize("Asia/Tokyo") + assert local.hour == 9 + assert local == Timestamp(tstr, tz="Asia/Tokyo") + conv = local.tz_convert("US/Eastern") + assert conv == Timestamp("2014-01-31 19:00", tz="US/Eastern") + assert conv.hour == 19 + + # preserves nanosecond + ts = Timestamp(tstr) + offsets.Nano(5) + local = ts.tz_localize("Asia/Tokyo") + assert local.hour == 9 + assert local.nanosecond == 5 + conv = local.tz_convert("US/Eastern") + assert conv.nanosecond == 5 + assert conv.hour == 19 + + def test_utc_z_designator(self): + assert get_timezone(Timestamp("2014-11-02 01:00Z").tzinfo) is utc + + def test_asm8(self): + np.random.seed(7_960_929) + ns = [Timestamp.min.value, Timestamp.max.value, 1000] + + for n in ns: + assert ( + Timestamp(n).asm8.view("i8") == np.datetime64(n, "ns").view("i8") == n + ) + + assert Timestamp("nat").asm8.view("i8") == np.datetime64("nat", "ns").view("i8") + + def test_class_ops_pytz(self): + def compare(x, y): + assert int((Timestamp(x).value - Timestamp(y).value) / 1e9) == 0 + + compare(Timestamp.now(), datetime.now()) + compare(Timestamp.now("UTC"), datetime.now(timezone("UTC"))) + compare(Timestamp.utcnow(), datetime.utcnow()) + compare(Timestamp.today(), datetime.today()) + current_time = calendar.timegm(datetime.now().utctimetuple()) + msg = "timezone-aware Timestamp with UTC" + with tm.assert_produces_warning(FutureWarning, match=msg): + # GH#22451 + ts_utc = Timestamp.utcfromtimestamp(current_time) + compare( + ts_utc, + datetime.utcfromtimestamp(current_time), + ) + compare( + Timestamp.fromtimestamp(current_time), datetime.fromtimestamp(current_time) + ) + compare( + # Support tz kwarg in Timestamp.fromtimestamp + Timestamp.fromtimestamp(current_time, "UTC"), + datetime.fromtimestamp(current_time, utc), + ) + compare( + # Support tz kwarg in Timestamp.fromtimestamp + Timestamp.fromtimestamp(current_time, tz="UTC"), + datetime.fromtimestamp(current_time, utc), + ) + + date_component = datetime.utcnow() + time_component = (date_component + timedelta(minutes=10)).time() + compare( + Timestamp.combine(date_component, time_component), + datetime.combine(date_component, time_component), + ) + + def test_class_ops_dateutil(self): + def compare(x, y): + assert ( + int( + np.round(Timestamp(x).value / 1e9) + - np.round(Timestamp(y).value / 1e9) + ) + == 0 + ) + + compare(Timestamp.now(), datetime.now()) + compare(Timestamp.now("UTC"), datetime.now(tzutc())) + compare(Timestamp.utcnow(), datetime.utcnow()) + compare(Timestamp.today(), datetime.today()) + current_time = calendar.timegm(datetime.now().utctimetuple()) + + msg = "timezone-aware Timestamp with UTC" + with tm.assert_produces_warning(FutureWarning, match=msg): + # GH#22451 + ts_utc = Timestamp.utcfromtimestamp(current_time) + + compare( + ts_utc, + datetime.utcfromtimestamp(current_time), + ) + compare( + Timestamp.fromtimestamp(current_time), datetime.fromtimestamp(current_time) + ) + + date_component = datetime.utcnow() + time_component = (date_component + timedelta(minutes=10)).time() + compare( + Timestamp.combine(date_component, time_component), + datetime.combine(date_component, time_component), + ) + + def test_basics_nanos(self): + val = np.int64(946_684_800_000_000_000).view("M8[ns]") + stamp = Timestamp(val.view("i8") + 500) + assert stamp.year == 2000 + assert stamp.month == 1 + assert stamp.microsecond == 0 + assert stamp.nanosecond == 500 + + # GH 14415 + val = np.iinfo(np.int64).min + 80_000_000_000_000 + stamp = Timestamp(val) + assert stamp.year == 1677 + assert stamp.month == 9 + assert stamp.day == 21 + assert stamp.microsecond == 145224 + assert stamp.nanosecond == 192 + + @pytest.mark.parametrize( + "value, check_kwargs", + [ + [946688461000000000, {}], + [946688461000000000 / 1000, {"unit": "us"}], + [946688461000000000 / 1_000_000, {"unit": "ms"}], + [946688461000000000 / 1_000_000_000, {"unit": "s"}], + [10957, {"unit": "D", "h": 0}], + [ + (946688461000000000 + 500000) / 1000000000, + {"unit": "s", "us": 499, "ns": 964}, + ], + [ + (946688461000000000 + 500000000) / 1000000000, + {"unit": "s", "us": 500000}, + ], + [(946688461000000000 + 500000) / 1000000, {"unit": "ms", "us": 500}], + [(946688461000000000 + 500000) / 1000, {"unit": "us", "us": 500}], + [(946688461000000000 + 500000000) / 1000000, {"unit": "ms", "us": 500000}], + [946688461000000000 / 1000.0 + 5, {"unit": "us", "us": 5}], + [946688461000000000 / 1000.0 + 5000, {"unit": "us", "us": 5000}], + [946688461000000000 / 1000000.0 + 0.5, {"unit": "ms", "us": 500}], + [946688461000000000 / 1000000.0 + 0.005, {"unit": "ms", "us": 5, "ns": 5}], + [946688461000000000 / 1000000000.0 + 0.5, {"unit": "s", "us": 500000}], + [10957 + 0.5, {"unit": "D", "h": 12}], + ], + ) + def test_unit(self, value, check_kwargs): + def check(value, unit=None, h=1, s=1, us=0, ns=0): + stamp = Timestamp(value, unit=unit) + assert stamp.year == 2000 + assert stamp.month == 1 + assert stamp.day == 1 + assert stamp.hour == h + if unit != "D": + assert stamp.minute == 1 + assert stamp.second == s + assert stamp.microsecond == us + else: + assert stamp.minute == 0 + assert stamp.second == 0 + assert stamp.microsecond == 0 + assert stamp.nanosecond == ns + + check(value, **check_kwargs) + + def test_roundtrip(self): + + # test value to string and back conversions + # further test accessors + base = Timestamp("20140101 00:00:00") + + result = Timestamp(base.value + Timedelta("5ms").value) + assert result == Timestamp(f"{base}.005000") + assert result.microsecond == 5000 + + result = Timestamp(base.value + Timedelta("5us").value) + assert result == Timestamp(f"{base}.000005") + assert result.microsecond == 5 + + result = Timestamp(base.value + Timedelta("5ns").value) + assert result == Timestamp(f"{base}.000000005") + assert result.nanosecond == 5 + assert result.microsecond == 0 + + result = Timestamp(base.value + Timedelta("6ms 5us").value) + assert result == Timestamp(f"{base}.006005") + assert result.microsecond == 5 + 6 * 1000 + + result = Timestamp(base.value + Timedelta("200ms 5us").value) + assert result == Timestamp(f"{base}.200005") + assert result.microsecond == 5 + 200 * 1000 + + def test_hash_equivalent(self): + d = {datetime(2011, 1, 1): 5} + stamp = Timestamp(datetime(2011, 1, 1)) + assert d[stamp] == 5 + + @pytest.mark.parametrize( + "timezone, year, month, day, hour", + [["America/Chicago", 2013, 11, 3, 1], ["America/Santiago", 2021, 4, 3, 23]], + ) + def test_hash_timestamp_with_fold(self, timezone, year, month, day, hour): + # see gh-33931 + test_timezone = gettz(timezone) + transition_1 = Timestamp( + year=year, + month=month, + day=day, + hour=hour, + minute=0, + fold=0, + tzinfo=test_timezone, + ) + transition_2 = Timestamp( + year=year, + month=month, + day=day, + hour=hour, + minute=0, + fold=1, + tzinfo=test_timezone, + ) + assert hash(transition_1) == hash(transition_2) + + def test_tz_conversion_freq(self, tz_naive_fixture): + # GH25241 + with tm.assert_produces_warning(FutureWarning, match="freq"): + t1 = Timestamp("2019-01-01 10:00", freq="H") + assert t1.tz_localize(tz=tz_naive_fixture).freq == t1.freq + with tm.assert_produces_warning(FutureWarning, match="freq"): + t2 = Timestamp("2019-01-02 12:00", tz="UTC", freq="T") + assert t2.tz_convert(tz="UTC").freq == t2.freq + + def test_pickle_freq_no_warning(self): + # GH#41949 we don't want a warning on unpickling + with tm.assert_produces_warning(FutureWarning, match="freq"): + ts = Timestamp("2019-01-01 10:00", freq="H") + + out = pickle.dumps(ts) + with tm.assert_produces_warning(None): + res = pickle.loads(out) + + assert res._freq == ts._freq + + +class TestTimestampNsOperations: + def test_nanosecond_string_parsing(self): + ts = Timestamp("2013-05-01 07:15:45.123456789") + # GH 7878 + expected_repr = "2013-05-01 07:15:45.123456789" + expected_value = 1_367_392_545_123_456_789 + assert ts.value == expected_value + assert expected_repr in repr(ts) + + ts = Timestamp("2013-05-01 07:15:45.123456789+09:00", tz="Asia/Tokyo") + assert ts.value == expected_value - 9 * 3600 * 1_000_000_000 + assert expected_repr in repr(ts) + + ts = Timestamp("2013-05-01 07:15:45.123456789", tz="UTC") + assert ts.value == expected_value + assert expected_repr in repr(ts) + + ts = Timestamp("2013-05-01 07:15:45.123456789", tz="US/Eastern") + assert ts.value == expected_value + 4 * 3600 * 1_000_000_000 + assert expected_repr in repr(ts) + + # GH 10041 + ts = Timestamp("20130501T071545.123456789") + assert ts.value == expected_value + assert expected_repr in repr(ts) + + def test_nanosecond_timestamp(self): + # GH 7610 + expected = 1_293_840_000_000_000_005 + t = Timestamp("2011-01-01") + offsets.Nano(5) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000005')" + assert t.value == expected + assert t.nanosecond == 5 + + t = Timestamp(t) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000005')" + assert t.value == expected + assert t.nanosecond == 5 + + t = Timestamp("2011-01-01 00:00:00.000000005") + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000005')" + assert t.value == expected + assert t.nanosecond == 5 + + expected = 1_293_840_000_000_000_010 + t = t + offsets.Nano(5) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000010')" + assert t.value == expected + assert t.nanosecond == 10 + + t = Timestamp(t) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000010')" + assert t.value == expected + assert t.nanosecond == 10 + + t = Timestamp("2011-01-01 00:00:00.000000010") + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000010')" + assert t.value == expected + assert t.nanosecond == 10 + + +class TestTimestampToJulianDate: + def test_compare_1700(self): + r = Timestamp("1700-06-23").to_julian_date() + assert r == 2_342_145.5 + + def test_compare_2000(self): + r = Timestamp("2000-04-12").to_julian_date() + assert r == 2_451_646.5 + + def test_compare_2100(self): + r = Timestamp("2100-08-12").to_julian_date() + assert r == 2_488_292.5 + + def test_compare_hour01(self): + r = Timestamp("2000-08-12T01:00:00").to_julian_date() + assert r == 2_451_768.5416666666666666 + + def test_compare_hour13(self): + r = Timestamp("2000-08-12T13:00:00").to_julian_date() + assert r == 2_451_769.0416666666666666 + + +class TestTimestampConversion: + def test_conversion(self): + # GH#9255 + ts = Timestamp("2000-01-01") + + result = ts.to_pydatetime() + expected = datetime(2000, 1, 1) + assert result == expected + assert type(result) == type(expected) + + result = ts.to_datetime64() + expected = np.datetime64(ts.value, "ns") + assert result == expected + assert type(result) == type(expected) + assert result.dtype == expected.dtype + + def test_to_pydatetime_fold(self): + # GH#45087 + tzstr = "dateutil/usr/share/zoneinfo/America/Chicago" + ts = Timestamp(year=2013, month=11, day=3, hour=1, minute=0, fold=1, tz=tzstr) + dt = ts.to_pydatetime() + assert dt.fold == 1 + + def test_to_pydatetime_nonzero_nano(self): + ts = Timestamp("2011-01-01 9:00:00.123456789") + + # Warn the user of data loss (nanoseconds). + with tm.assert_produces_warning(UserWarning): + expected = datetime(2011, 1, 1, 9, 0, 0, 123456) + result = ts.to_pydatetime() + assert result == expected + + def test_timestamp_to_datetime(self): + stamp = Timestamp("20090415", tz="US/Eastern") + dtval = stamp.to_pydatetime() + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo + + def test_timestamp_to_datetime_dateutil(self): + stamp = Timestamp("20090415", tz="dateutil/US/Eastern") + dtval = stamp.to_pydatetime() + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo + + def test_timestamp_to_datetime_explicit_pytz(self): + stamp = Timestamp("20090415", tz=pytz.timezone("US/Eastern")) + dtval = stamp.to_pydatetime() + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo + + @td.skip_if_windows + def test_timestamp_to_datetime_explicit_dateutil(self): + stamp = Timestamp("20090415", tz=gettz("US/Eastern")) + dtval = stamp.to_pydatetime() + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo + + def test_to_datetime_bijective(self): + # Ensure that converting to datetime and back only loses precision + # by going from nanoseconds to microseconds. + exp_warning = None if Timestamp.max.nanosecond == 0 else UserWarning + with tm.assert_produces_warning(exp_warning): + pydt_max = Timestamp.max.to_pydatetime() + + assert Timestamp(pydt_max).value / 1000 == Timestamp.max.value / 1000 + + exp_warning = None if Timestamp.min.nanosecond == 0 else UserWarning + with tm.assert_produces_warning(exp_warning): + pydt_min = Timestamp.min.to_pydatetime() + + # The next assertion can be enabled once GH#39221 is merged + # assert pydt_min < Timestamp.min # this is bc nanos are dropped + tdus = timedelta(microseconds=1) + assert pydt_min + tdus > Timestamp.min + + assert Timestamp(pydt_min + tdus).value / 1000 == Timestamp.min.value / 1000 + + def test_to_period_tz_warning(self): + # GH#21333 make sure a warning is issued when timezone + # info is lost + ts = Timestamp("2009-04-15 16:17:18", tz="US/Eastern") + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + ts.to_period("D") + + def test_to_numpy_alias(self): + # GH 24653: alias .to_numpy() for scalars + ts = Timestamp(datetime.now()) + assert ts.to_datetime64() == ts.to_numpy() + + # GH#44460 + msg = "dtype and copy arguments are ignored" + with pytest.raises(ValueError, match=msg): + ts.to_numpy("M8[s]") + with pytest.raises(ValueError, match=msg): + ts.to_numpy(copy=True) + + +class SubDatetime(datetime): + pass + + +@pytest.mark.parametrize( + "lh,rh", + [ + (SubDatetime(2000, 1, 1), Timedelta(hours=1)), + (Timedelta(hours=1), SubDatetime(2000, 1, 1)), + ], +) +def test_dt_subclass_add_timedelta(lh, rh): + # GH#25851 + # ensure that subclassed datetime works for + # Timedelta operations + result = lh + rh + expected = SubDatetime(2000, 1, 1, 1) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_timezones.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_timezones.py new file mode 100644 index 0000000..a7f7393 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_timezones.py @@ -0,0 +1,455 @@ +""" +Tests for Timestamp timezone-related methods +""" +from datetime import ( + date, + datetime, + timedelta, +) + +import dateutil +from dateutil.tz import ( + gettz, + tzoffset, +) +import pytest +import pytz +from pytz.exceptions import ( + AmbiguousTimeError, + NonExistentTimeError, +) + +from pandas._libs.tslibs import timezones +from pandas.errors import OutOfBoundsDatetime +import pandas.util._test_decorators as td + +from pandas import ( + NaT, + Timestamp, +) + + +class TestTimestampTZOperations: + # -------------------------------------------------------------- + # Timestamp.tz_localize + + def test_tz_localize_pushes_out_of_bounds(self): + # GH#12677 + # tz_localize that pushes away from the boundary is OK + msg = ( + f"Converting {Timestamp.min.strftime('%Y-%m-%d %H:%M:%S')} " + f"underflows past {Timestamp.min}" + ) + pac = Timestamp.min.tz_localize("US/Pacific") + assert pac.value > Timestamp.min.value + pac.tz_convert("Asia/Tokyo") # tz_convert doesn't change value + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp.min.tz_localize("Asia/Tokyo") + + # tz_localize that pushes away from the boundary is OK + msg = ( + f"Converting {Timestamp.max.strftime('%Y-%m-%d %H:%M:%S')} " + f"overflows past {Timestamp.max}" + ) + tokyo = Timestamp.max.tz_localize("Asia/Tokyo") + assert tokyo.value < Timestamp.max.value + tokyo.tz_convert("US/Pacific") # tz_convert doesn't change value + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp.max.tz_localize("US/Pacific") + + def test_tz_localize_ambiguous_bool(self): + # make sure that we are correctly accepting bool values as ambiguous + # GH#14402 + ts = Timestamp("2015-11-01 01:00:03") + expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central") + expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central") + + msg = "Cannot infer dst time from 2015-11-01 01:00:03" + with pytest.raises(pytz.AmbiguousTimeError, match=msg): + ts.tz_localize("US/Central") + + result = ts.tz_localize("US/Central", ambiguous=True) + assert result == expected0 + + result = ts.tz_localize("US/Central", ambiguous=False) + assert result == expected1 + + def test_tz_localize_ambiguous(self): + ts = Timestamp("2014-11-02 01:00") + ts_dst = ts.tz_localize("US/Eastern", ambiguous=True) + ts_no_dst = ts.tz_localize("US/Eastern", ambiguous=False) + + assert (ts_no_dst.value - ts_dst.value) / 1e9 == 3600 + msg = "Cannot infer offset with only one time" + with pytest.raises(ValueError, match=msg): + ts.tz_localize("US/Eastern", ambiguous="infer") + + # GH#8025 + msg = "Cannot localize tz-aware Timestamp, use tz_convert for conversions" + with pytest.raises(TypeError, match=msg): + Timestamp("2011-01-01", tz="US/Eastern").tz_localize("Asia/Tokyo") + + msg = "Cannot convert tz-naive Timestamp, use tz_localize to localize" + with pytest.raises(TypeError, match=msg): + Timestamp("2011-01-01").tz_convert("Asia/Tokyo") + + @pytest.mark.parametrize( + "stamp, tz", + [ + ("2015-03-08 02:00", "US/Eastern"), + ("2015-03-08 02:30", "US/Pacific"), + ("2015-03-29 02:00", "Europe/Paris"), + ("2015-03-29 02:30", "Europe/Belgrade"), + ], + ) + def test_tz_localize_nonexistent(self, stamp, tz): + # GH#13057 + ts = Timestamp(stamp) + with pytest.raises(NonExistentTimeError, match=stamp): + ts.tz_localize(tz) + # GH 22644 + with pytest.raises(NonExistentTimeError, match=stamp): + ts.tz_localize(tz, nonexistent="raise") + assert ts.tz_localize(tz, nonexistent="NaT") is NaT + + def test_tz_localize_ambiguous_raise(self): + # GH#13057 + ts = Timestamp("2015-11-1 01:00") + msg = "Cannot infer dst time from 2015-11-01 01:00:00," + with pytest.raises(AmbiguousTimeError, match=msg): + ts.tz_localize("US/Pacific", ambiguous="raise") + + def test_tz_localize_nonexistent_invalid_arg(self): + # GH 22644 + tz = "Europe/Warsaw" + ts = Timestamp("2015-03-29 02:00:00") + msg = ( + "The nonexistent argument must be one of 'raise', 'NaT', " + "'shift_forward', 'shift_backward' or a timedelta object" + ) + with pytest.raises(ValueError, match=msg): + ts.tz_localize(tz, nonexistent="foo") + + @pytest.mark.parametrize( + "stamp", + [ + "2014-02-01 09:00", + "2014-07-08 09:00", + "2014-11-01 17:00", + "2014-11-05 00:00", + ], + ) + def test_tz_localize_roundtrip(self, stamp, tz_aware_fixture): + tz = tz_aware_fixture + ts = Timestamp(stamp) + localized = ts.tz_localize(tz) + assert localized == Timestamp(stamp, tz=tz) + + msg = "Cannot localize tz-aware Timestamp" + with pytest.raises(TypeError, match=msg): + localized.tz_localize(tz) + + reset = localized.tz_localize(None) + assert reset == ts + assert reset.tzinfo is None + + def test_tz_localize_ambiguous_compat(self): + # validate that pytz and dateutil are compat for dst + # when the transition happens + naive = Timestamp("2013-10-27 01:00:00") + + pytz_zone = "Europe/London" + dateutil_zone = "dateutil/Europe/London" + result_pytz = naive.tz_localize(pytz_zone, ambiguous=0) + result_dateutil = naive.tz_localize(dateutil_zone, ambiguous=0) + assert result_pytz.value == result_dateutil.value + assert result_pytz.value == 1382835600000000000 + + # fixed ambiguous behavior + # see gh-14621, GH#45087 + assert result_pytz.to_pydatetime().tzname() == "GMT" + assert result_dateutil.to_pydatetime().tzname() == "GMT" + assert str(result_pytz) == str(result_dateutil) + + # 1 hour difference + result_pytz = naive.tz_localize(pytz_zone, ambiguous=1) + result_dateutil = naive.tz_localize(dateutil_zone, ambiguous=1) + assert result_pytz.value == result_dateutil.value + assert result_pytz.value == 1382832000000000000 + + # see gh-14621 + assert str(result_pytz) == str(result_dateutil) + assert ( + result_pytz.to_pydatetime().tzname() + == result_dateutil.to_pydatetime().tzname() + ) + + @pytest.mark.parametrize( + "tz", + [ + pytz.timezone("US/Eastern"), + gettz("US/Eastern"), + "US/Eastern", + "dateutil/US/Eastern", + ], + ) + def test_timestamp_tz_localize(self, tz): + stamp = Timestamp("3/11/2012 04:00") + + result = stamp.tz_localize(tz) + expected = Timestamp("3/11/2012 04:00", tz=tz) + assert result.hour == expected.hour + assert result == expected + + @pytest.mark.parametrize( + "start_ts, tz, end_ts, shift", + [ + ["2015-03-29 02:20:00", "Europe/Warsaw", "2015-03-29 03:00:00", "forward"], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 01:59:59.999999999", + "backward", + ], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 03:20:00", + timedelta(hours=1), + ], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 01:20:00", + timedelta(hours=-1), + ], + ["2018-03-11 02:33:00", "US/Pacific", "2018-03-11 03:00:00", "forward"], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 01:59:59.999999999", + "backward", + ], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 03:33:00", + timedelta(hours=1), + ], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 01:33:00", + timedelta(hours=-1), + ], + ], + ) + @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) + def test_timestamp_tz_localize_nonexistent_shift( + self, start_ts, tz, end_ts, shift, tz_type + ): + # GH 8917, 24466 + tz = tz_type + tz + if isinstance(shift, str): + shift = "shift_" + shift + ts = Timestamp(start_ts) + result = ts.tz_localize(tz, nonexistent=shift) + expected = Timestamp(end_ts).tz_localize(tz) + assert result == expected + + @pytest.mark.parametrize("offset", [-1, 1]) + @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) + def test_timestamp_tz_localize_nonexistent_shift_invalid(self, offset, tz_type): + # GH 8917, 24466 + tz = tz_type + "Europe/Warsaw" + ts = Timestamp("2015-03-29 02:20:00") + msg = "The provided timedelta will relocalize on a nonexistent time" + with pytest.raises(ValueError, match=msg): + ts.tz_localize(tz, nonexistent=timedelta(seconds=offset)) + + @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) + def test_timestamp_tz_localize_nonexistent_NaT(self, tz): + # GH 8917 + ts = Timestamp("2015-03-29 02:20:00") + result = ts.tz_localize(tz, nonexistent="NaT") + assert result is NaT + + @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) + def test_timestamp_tz_localize_nonexistent_raise(self, tz): + # GH 8917 + ts = Timestamp("2015-03-29 02:20:00") + msg = "2015-03-29 02:20:00" + with pytest.raises(pytz.NonExistentTimeError, match=msg): + ts.tz_localize(tz, nonexistent="raise") + msg = ( + "The nonexistent argument must be one of 'raise', 'NaT', " + "'shift_forward', 'shift_backward' or a timedelta object" + ) + with pytest.raises(ValueError, match=msg): + ts.tz_localize(tz, nonexistent="foo") + + # ------------------------------------------------------------------ + # Timestamp.tz_convert + + @pytest.mark.parametrize( + "stamp", + [ + "2014-02-01 09:00", + "2014-07-08 09:00", + "2014-11-01 17:00", + "2014-11-05 00:00", + ], + ) + def test_tz_convert_roundtrip(self, stamp, tz_aware_fixture): + tz = tz_aware_fixture + + ts = Timestamp(stamp, tz="UTC") + converted = ts.tz_convert(tz) + + reset = converted.tz_convert(None) + assert reset == Timestamp(stamp) + assert reset.tzinfo is None + assert reset == converted.tz_convert("UTC").tz_localize(None) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_astimezone(self, tzstr): + # astimezone is an alias for tz_convert, so keep it with + # the tz_convert tests + utcdate = Timestamp("3/11/2012 22:00", tz="UTC") + expected = utcdate.tz_convert(tzstr) + result = utcdate.astimezone(tzstr) + assert expected == result + assert isinstance(result, Timestamp) + + @td.skip_if_windows + def test_tz_convert_utc_with_system_utc(self): + + # from system utc to real utc + ts = Timestamp("2001-01-05 11:56", tz=timezones.maybe_get_tz("dateutil/UTC")) + # check that the time hasn't changed. + assert ts == ts.tz_convert(dateutil.tz.tzutc()) + + # from system utc to real utc + ts = Timestamp("2001-01-05 11:56", tz=timezones.maybe_get_tz("dateutil/UTC")) + # check that the time hasn't changed. + assert ts == ts.tz_convert(dateutil.tz.tzutc()) + + # ------------------------------------------------------------------ + # Timestamp.__init__ with tz str or tzinfo + + def test_timestamp_constructor_tz_utc(self): + utc_stamp = Timestamp("3/11/2012 05:00", tz="utc") + assert utc_stamp.tzinfo is pytz.utc + assert utc_stamp.hour == 5 + + utc_stamp = Timestamp("3/11/2012 05:00").tz_localize("utc") + assert utc_stamp.hour == 5 + + def test_timestamp_to_datetime_tzoffset(self): + tzinfo = tzoffset(None, 7200) + expected = Timestamp("3/11/2012 04:00", tz=tzinfo) + result = Timestamp(expected.to_pydatetime()) + assert expected == result + + def test_timestamp_constructor_near_dst_boundary(self): + # GH#11481 & GH#15777 + # Naive string timestamps were being localized incorrectly + # with tz_convert_from_utc_single instead of tz_localize_to_utc + + for tz in ["Europe/Brussels", "Europe/Prague"]: + result = Timestamp("2015-10-25 01:00", tz=tz) + expected = Timestamp("2015-10-25 01:00").tz_localize(tz) + assert result == expected + + msg = "Cannot infer dst time from 2015-10-25 02:00:00" + with pytest.raises(pytz.AmbiguousTimeError, match=msg): + Timestamp("2015-10-25 02:00", tz=tz) + + result = Timestamp("2017-03-26 01:00", tz="Europe/Paris") + expected = Timestamp("2017-03-26 01:00").tz_localize("Europe/Paris") + assert result == expected + + msg = "2017-03-26 02:00" + with pytest.raises(pytz.NonExistentTimeError, match=msg): + Timestamp("2017-03-26 02:00", tz="Europe/Paris") + + # GH#11708 + naive = Timestamp("2015-11-18 10:00:00") + result = naive.tz_localize("UTC").tz_convert("Asia/Kolkata") + expected = Timestamp("2015-11-18 15:30:00+0530", tz="Asia/Kolkata") + assert result == expected + + # GH#15823 + result = Timestamp("2017-03-26 00:00", tz="Europe/Paris") + expected = Timestamp("2017-03-26 00:00:00+0100", tz="Europe/Paris") + assert result == expected + + result = Timestamp("2017-03-26 01:00", tz="Europe/Paris") + expected = Timestamp("2017-03-26 01:00:00+0100", tz="Europe/Paris") + assert result == expected + + msg = "2017-03-26 02:00" + with pytest.raises(pytz.NonExistentTimeError, match=msg): + Timestamp("2017-03-26 02:00", tz="Europe/Paris") + + result = Timestamp("2017-03-26 02:00:00+0100", tz="Europe/Paris") + naive = Timestamp(result.value) + expected = naive.tz_localize("UTC").tz_convert("Europe/Paris") + assert result == expected + + result = Timestamp("2017-03-26 03:00", tz="Europe/Paris") + expected = Timestamp("2017-03-26 03:00:00+0200", tz="Europe/Paris") + assert result == expected + + @pytest.mark.parametrize( + "tz", + [ + pytz.timezone("US/Eastern"), + gettz("US/Eastern"), + "US/Eastern", + "dateutil/US/Eastern", + ], + ) + def test_timestamp_constructed_by_date_and_tz(self, tz): + # GH#2993, Timestamp cannot be constructed by datetime.date + # and tz correctly + + result = Timestamp(date(2012, 3, 11), tz=tz) + + expected = Timestamp("3/11/2012", tz=tz) + assert result.hour == expected.hour + assert result == expected + + @pytest.mark.parametrize( + "tz", + [ + pytz.timezone("US/Eastern"), + gettz("US/Eastern"), + "US/Eastern", + "dateutil/US/Eastern", + ], + ) + def test_timestamp_add_timedelta_push_over_dst_boundary(self, tz): + # GH#1389 + + # 4 hours before DST transition + stamp = Timestamp("3/10/2012 22:00", tz=tz) + + result = stamp + timedelta(hours=6) + + # spring forward, + "7" hours + expected = Timestamp("3/11/2012 05:00", tz=tz) + + assert result == expected + + def test_timestamp_timetz_equivalent_with_datetime_tz(self, tz_naive_fixture): + # GH21358 + tz = timezones.maybe_get_tz(tz_naive_fixture) + + stamp = Timestamp("2018-06-04 10:20:30", tz=tz) + _datetime = datetime(2018, 6, 4, hour=10, minute=20, second=30, tzinfo=tz) + + result = stamp.timetz() + expected = _datetime.timetz() + + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_unary_ops.py b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_unary_ops.py new file mode 100644 index 0000000..5f07cab --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -0,0 +1,525 @@ +from datetime import datetime + +from dateutil.tz import gettz +from hypothesis import ( + given, + strategies as st, +) +import numpy as np +import pytest +import pytz +from pytz import utc + +from pandas._libs import lib +from pandas._libs.tslibs import ( + NaT, + Timedelta, + Timestamp, + conversion, + iNaT, + to_offset, +) +from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG +import pandas.util._test_decorators as td + +import pandas._testing as tm + + +class TestTimestampUnaryOps: + + # -------------------------------------------------------------- + # Timestamp.round + @pytest.mark.parametrize( + "timestamp, freq, expected", + [ + ("20130101 09:10:11", "D", "20130101"), + ("20130101 19:10:11", "D", "20130102"), + ("20130201 12:00:00", "D", "20130202"), + ("20130104 12:00:00", "D", "20130105"), + ("2000-01-05 05:09:15.13", "D", "2000-01-05 00:00:00"), + ("2000-01-05 05:09:15.13", "H", "2000-01-05 05:00:00"), + ("2000-01-05 05:09:15.13", "S", "2000-01-05 05:09:15"), + ], + ) + def test_round_frequencies(self, timestamp, freq, expected): + dt = Timestamp(timestamp) + result = dt.round(freq) + expected = Timestamp(expected) + assert result == expected + + def test_round_tzaware(self): + dt = Timestamp("20130101 09:10:11", tz="US/Eastern") + result = dt.round("D") + expected = Timestamp("20130101", tz="US/Eastern") + assert result == expected + + dt = Timestamp("20130101 09:10:11", tz="US/Eastern") + result = dt.round("s") + assert result == dt + + def test_round_30min(self): + # round + dt = Timestamp("20130104 12:32:00") + result = dt.round("30Min") + expected = Timestamp("20130104 12:30:00") + assert result == expected + + def test_round_subsecond(self): + # GH#14440 & GH#15578 + result = Timestamp("2016-10-17 12:00:00.0015").round("ms") + expected = Timestamp("2016-10-17 12:00:00.002000") + assert result == expected + + result = Timestamp("2016-10-17 12:00:00.00149").round("ms") + expected = Timestamp("2016-10-17 12:00:00.001000") + assert result == expected + + ts = Timestamp("2016-10-17 12:00:00.0015") + for freq in ["us", "ns"]: + assert ts == ts.round(freq) + + result = Timestamp("2016-10-17 12:00:00.001501031").round("10ns") + expected = Timestamp("2016-10-17 12:00:00.001501030") + assert result == expected + + def test_round_nonstandard_freq(self): + with tm.assert_produces_warning(False): + Timestamp("2016-10-17 12:00:00.001501031").round("1010ns") + + def test_round_invalid_arg(self): + stamp = Timestamp("2000-01-05 05:09:15.13") + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + stamp.round("foo") + + @pytest.mark.parametrize( + "test_input, rounder, freq, expected", + [ + ("2117-01-01 00:00:45", "floor", "15s", "2117-01-01 00:00:45"), + ("2117-01-01 00:00:45", "ceil", "15s", "2117-01-01 00:00:45"), + ( + "2117-01-01 00:00:45.000000012", + "floor", + "10ns", + "2117-01-01 00:00:45.000000010", + ), + ( + "1823-01-01 00:00:01.000000012", + "ceil", + "10ns", + "1823-01-01 00:00:01.000000020", + ), + ("1823-01-01 00:00:01", "floor", "1s", "1823-01-01 00:00:01"), + ("1823-01-01 00:00:01", "ceil", "1s", "1823-01-01 00:00:01"), + ("NaT", "floor", "1s", "NaT"), + ("NaT", "ceil", "1s", "NaT"), + ], + ) + def test_ceil_floor_edge(self, test_input, rounder, freq, expected): + dt = Timestamp(test_input) + func = getattr(dt, rounder) + result = func(freq) + + if dt is NaT: + assert result is NaT + else: + expected = Timestamp(expected) + assert result == expected + + @pytest.mark.parametrize( + "test_input, freq, expected", + [ + ("2018-01-01 00:02:06", "2s", "2018-01-01 00:02:06"), + ("2018-01-01 00:02:00", "2T", "2018-01-01 00:02:00"), + ("2018-01-01 00:04:00", "4T", "2018-01-01 00:04:00"), + ("2018-01-01 00:15:00", "15T", "2018-01-01 00:15:00"), + ("2018-01-01 00:20:00", "20T", "2018-01-01 00:20:00"), + ("2018-01-01 03:00:00", "3H", "2018-01-01 03:00:00"), + ], + ) + @pytest.mark.parametrize("rounder", ["ceil", "floor", "round"]) + def test_round_minute_freq(self, test_input, freq, expected, rounder): + # Ensure timestamps that shouldn't round dont! + # GH#21262 + + dt = Timestamp(test_input) + expected = Timestamp(expected) + func = getattr(dt, rounder) + result = func(freq) + assert result == expected + + def test_ceil(self): + dt = Timestamp("20130101 09:10:11") + result = dt.ceil("D") + expected = Timestamp("20130102") + assert result == expected + + def test_floor(self): + dt = Timestamp("20130101 09:10:11") + result = dt.floor("D") + expected = Timestamp("20130101") + assert result == expected + + @pytest.mark.parametrize("method", ["ceil", "round", "floor"]) + def test_round_dst_border_ambiguous(self, method): + # GH 18946 round near "fall back" DST + ts = Timestamp("2017-10-29 00:00:00", tz="UTC").tz_convert("Europe/Madrid") + # + result = getattr(ts, method)("H", ambiguous=True) + assert result == ts + + result = getattr(ts, method)("H", ambiguous=False) + expected = Timestamp("2017-10-29 01:00:00", tz="UTC").tz_convert( + "Europe/Madrid" + ) + assert result == expected + + result = getattr(ts, method)("H", ambiguous="NaT") + assert result is NaT + + msg = "Cannot infer dst time" + with pytest.raises(pytz.AmbiguousTimeError, match=msg): + getattr(ts, method)("H", ambiguous="raise") + + @pytest.mark.parametrize( + "method, ts_str, freq", + [ + ["ceil", "2018-03-11 01:59:00-0600", "5min"], + ["round", "2018-03-11 01:59:00-0600", "5min"], + ["floor", "2018-03-11 03:01:00-0500", "2H"], + ], + ) + def test_round_dst_border_nonexistent(self, method, ts_str, freq): + # GH 23324 round near "spring forward" DST + ts = Timestamp(ts_str, tz="America/Chicago") + result = getattr(ts, method)(freq, nonexistent="shift_forward") + expected = Timestamp("2018-03-11 03:00:00", tz="America/Chicago") + assert result == expected + + result = getattr(ts, method)(freq, nonexistent="NaT") + assert result is NaT + + msg = "2018-03-11 02:00:00" + with pytest.raises(pytz.NonExistentTimeError, match=msg): + getattr(ts, method)(freq, nonexistent="raise") + + @pytest.mark.parametrize( + "timestamp", + [ + "2018-01-01 0:0:0.124999360", + "2018-01-01 0:0:0.125000367", + "2018-01-01 0:0:0.125500", + "2018-01-01 0:0:0.126500", + "2018-01-01 12:00:00", + "2019-01-01 12:00:00", + ], + ) + @pytest.mark.parametrize( + "freq", + [ + "2ns", + "3ns", + "4ns", + "5ns", + "6ns", + "7ns", + "250ns", + "500ns", + "750ns", + "1us", + "19us", + "250us", + "500us", + "750us", + "1s", + "2s", + "3s", + "1D", + ], + ) + def test_round_int64(self, timestamp, freq): + # check that all rounding modes are accurate to int64 precision + # see GH#22591 + dt = Timestamp(timestamp) + unit = to_offset(freq).nanos + + # test floor + result = dt.floor(freq) + assert result.value % unit == 0, f"floor not a {freq} multiple" + assert 0 <= dt.value - result.value < unit, "floor error" + + # test ceil + result = dt.ceil(freq) + assert result.value % unit == 0, f"ceil not a {freq} multiple" + assert 0 <= result.value - dt.value < unit, "ceil error" + + # test round + result = dt.round(freq) + assert result.value % unit == 0, f"round not a {freq} multiple" + assert abs(result.value - dt.value) <= unit // 2, "round error" + if unit % 2 == 0 and abs(result.value - dt.value) == unit // 2: + # round half to even + assert result.value // unit % 2 == 0, "round half to even error" + + def test_round_implementation_bounds(self): + # See also: analogous test for Timedelta + result = Timestamp.min.ceil("s") + expected = Timestamp(1677, 9, 21, 0, 12, 44) + assert result == expected + + result = Timestamp.max.floor("s") + expected = Timestamp.max - Timedelta(854775807) + assert result == expected + + with pytest.raises(OverflowError, match="value too large"): + Timestamp.min.floor("s") + + # the second message here shows up in windows builds + msg = "|".join( + ["Python int too large to convert to C long", "int too big to convert"] + ) + with pytest.raises(OverflowError, match=msg): + Timestamp.max.ceil("s") + + @given(val=st.integers(iNaT + 1, lib.i8max)) + @pytest.mark.parametrize( + "method", [Timestamp.round, Timestamp.floor, Timestamp.ceil] + ) + def test_round_sanity(self, val, method): + val = np.int64(val) + ts = Timestamp(val) + + def checker(res, ts, nanos): + if method is Timestamp.round: + diff = np.abs((res - ts).value) + assert diff <= nanos / 2 + elif method is Timestamp.floor: + assert res <= ts + elif method is Timestamp.ceil: + assert res >= ts + + assert method(ts, "ns") == ts + + res = method(ts, "us") + nanos = 1000 + assert np.abs((res - ts).value) < nanos + assert res.value % nanos == 0 + checker(res, ts, nanos) + + res = method(ts, "ms") + nanos = 1_000_000 + assert np.abs((res - ts).value) < nanos + assert res.value % nanos == 0 + checker(res, ts, nanos) + + res = method(ts, "s") + nanos = 1_000_000_000 + assert np.abs((res - ts).value) < nanos + assert res.value % nanos == 0 + checker(res, ts, nanos) + + res = method(ts, "min") + nanos = 60 * 1_000_000_000 + assert np.abs((res - ts).value) < nanos + assert res.value % nanos == 0 + checker(res, ts, nanos) + + res = method(ts, "h") + nanos = 60 * 60 * 1_000_000_000 + assert np.abs((res - ts).value) < nanos + assert res.value % nanos == 0 + checker(res, ts, nanos) + + res = method(ts, "D") + nanos = 24 * 60 * 60 * 1_000_000_000 + assert np.abs((res - ts).value) < nanos + assert res.value % nanos == 0 + checker(res, ts, nanos) + + # -------------------------------------------------------------- + # Timestamp.replace + + def test_replace_naive(self): + # GH#14621, GH#7825 + ts = Timestamp("2016-01-01 09:00:00") + result = ts.replace(hour=0) + expected = Timestamp("2016-01-01 00:00:00") + assert result == expected + + def test_replace_aware(self, tz_aware_fixture): + tz = tz_aware_fixture + # GH#14621, GH#7825 + # replacing datetime components with and w/o presence of a timezone + ts = Timestamp("2016-01-01 09:00:00", tz=tz) + result = ts.replace(hour=0) + expected = Timestamp("2016-01-01 00:00:00", tz=tz) + assert result == expected + + def test_replace_preserves_nanos(self, tz_aware_fixture): + tz = tz_aware_fixture + # GH#14621, GH#7825 + ts = Timestamp("2016-01-01 09:00:00.000000123", tz=tz) + result = ts.replace(hour=0) + expected = Timestamp("2016-01-01 00:00:00.000000123", tz=tz) + assert result == expected + + def test_replace_multiple(self, tz_aware_fixture): + tz = tz_aware_fixture + # GH#14621, GH#7825 + # replacing datetime components with and w/o presence of a timezone + # test all + ts = Timestamp("2016-01-01 09:00:00.000000123", tz=tz) + result = ts.replace( + year=2015, + month=2, + day=2, + hour=0, + minute=5, + second=5, + microsecond=5, + nanosecond=5, + ) + expected = Timestamp("2015-02-02 00:05:05.000005005", tz=tz) + assert result == expected + + def test_replace_invalid_kwarg(self, tz_aware_fixture): + tz = tz_aware_fixture + # GH#14621, GH#7825 + ts = Timestamp("2016-01-01 09:00:00.000000123", tz=tz) + msg = r"replace\(\) got an unexpected keyword argument" + with pytest.raises(TypeError, match=msg): + ts.replace(foo=5) + + def test_replace_integer_args(self, tz_aware_fixture): + tz = tz_aware_fixture + # GH#14621, GH#7825 + ts = Timestamp("2016-01-01 09:00:00.000000123", tz=tz) + msg = "value must be an integer, received for hour" + with pytest.raises(ValueError, match=msg): + ts.replace(hour=0.1) + + def test_replace_tzinfo_equiv_tz_localize_none(self): + # GH#14621, GH#7825 + # assert conversion to naive is the same as replacing tzinfo with None + ts = Timestamp("2013-11-03 01:59:59.999999-0400", tz="US/Eastern") + assert ts.tz_localize(None) == ts.replace(tzinfo=None) + + @td.skip_if_windows + def test_replace_tzinfo(self): + # GH#15683 + dt = datetime(2016, 3, 27, 1) + tzinfo = pytz.timezone("CET").localize(dt, is_dst=False).tzinfo + + result_dt = dt.replace(tzinfo=tzinfo) + result_pd = Timestamp(dt).replace(tzinfo=tzinfo) + + # datetime.timestamp() converts in the local timezone + with tm.set_timezone("UTC"): + assert result_dt.timestamp() == result_pd.timestamp() + + assert result_dt == result_pd + assert result_dt == result_pd.to_pydatetime() + + result_dt = dt.replace(tzinfo=tzinfo).replace(tzinfo=None) + result_pd = Timestamp(dt).replace(tzinfo=tzinfo).replace(tzinfo=None) + + # datetime.timestamp() converts in the local timezone + with tm.set_timezone("UTC"): + assert result_dt.timestamp() == result_pd.timestamp() + + assert result_dt == result_pd + assert result_dt == result_pd.to_pydatetime() + + @pytest.mark.parametrize( + "tz, normalize", + [ + (pytz.timezone("US/Eastern"), lambda x: x.tzinfo.normalize(x)), + (gettz("US/Eastern"), lambda x: x), + ], + ) + def test_replace_across_dst(self, tz, normalize): + # GH#18319 check that 1) timezone is correctly normalized and + # 2) that hour is not incorrectly changed by this normalization + ts_naive = Timestamp("2017-12-03 16:03:30") + ts_aware = conversion.localize_pydatetime(ts_naive, tz) + + # Preliminary sanity-check + assert ts_aware == normalize(ts_aware) + + # Replace across DST boundary + ts2 = ts_aware.replace(month=6) + + # Check that `replace` preserves hour literal + assert (ts2.hour, ts2.minute) == (ts_aware.hour, ts_aware.minute) + + # Check that post-replace object is appropriately normalized + ts2b = normalize(ts2) + assert ts2 == ts2b + + def test_replace_dst_border(self): + # Gh 7825 + t = Timestamp("2013-11-3", tz="America/Chicago") + result = t.replace(hour=3) + expected = Timestamp("2013-11-3 03:00:00", tz="America/Chicago") + assert result == expected + + @pytest.mark.parametrize("fold", [0, 1]) + @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) + def test_replace_dst_fold(self, fold, tz): + # GH 25017 + d = datetime(2019, 10, 27, 2, 30) + ts = Timestamp(d, tz=tz) + result = ts.replace(hour=1, fold=fold) + expected = Timestamp(datetime(2019, 10, 27, 1, 30)).tz_localize( + tz, ambiguous=not fold + ) + assert result == expected + + # -------------------------------------------------------------- + # Timestamp.normalize + + @pytest.mark.parametrize("arg", ["2013-11-30", "2013-11-30 12:00:00"]) + def test_normalize(self, tz_naive_fixture, arg): + tz = tz_naive_fixture + ts = Timestamp(arg, tz=tz) + result = ts.normalize() + expected = Timestamp("2013-11-30", tz=tz) + assert result == expected + + def test_normalize_pre_epoch_dates(self): + # GH: 36294 + result = Timestamp("1969-01-01 09:00:00").normalize() + expected = Timestamp("1969-01-01 00:00:00") + assert result == expected + + # -------------------------------------------------------------- + + @td.skip_if_windows + def test_timestamp(self, fixed_now_ts): + # GH#17329 + # tz-naive --> treat it as if it were UTC for purposes of timestamp() + ts = fixed_now_ts + uts = ts.replace(tzinfo=utc) + assert ts.timestamp() == uts.timestamp() + + tsc = Timestamp("2014-10-11 11:00:01.12345678", tz="US/Central") + utsc = tsc.tz_convert("UTC") + + # utsc is a different representation of the same time + assert tsc.timestamp() == utsc.timestamp() + + # datetime.timestamp() converts in the local timezone + with tm.set_timezone("UTC"): + # should agree with datetime.timestamp method + dt = ts.to_pydatetime() + assert dt.timestamp() == ts.timestamp() + + +@pytest.mark.parametrize("fold", [0, 1]) +def test_replace_preserves_fold(fold): + # GH 37610. Check that replace preserves Timestamp fold property + tz = gettz("Europe/Moscow") + + ts = Timestamp(year=2009, month=10, day=25, hour=2, minute=30, fold=fold, tzinfo=tz) + ts_replaced = ts.replace(second=1) + + assert ts_replaced.fold == fold diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/series/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..539a9c0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_api.cpython-38.pyc new file mode 100644 index 0000000..c4a7f6f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_arithmetic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_arithmetic.cpython-38.pyc new file mode 100644 index 0000000..2ec552e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_arithmetic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_constructors.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_constructors.cpython-38.pyc new file mode 100644 index 0000000..22b816c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_constructors.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_cumulative.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_cumulative.cpython-38.pyc new file mode 100644 index 0000000..642627b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_cumulative.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_iteration.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_iteration.cpython-38.pyc new file mode 100644 index 0000000..8e8618c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_iteration.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_logical_ops.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_logical_ops.cpython-38.pyc new file mode 100644 index 0000000..1225ec1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_logical_ops.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_missing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_missing.cpython-38.pyc new file mode 100644 index 0000000..fb396e1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_missing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_npfuncs.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_npfuncs.cpython-38.pyc new file mode 100644 index 0000000..3bf566a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_npfuncs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_reductions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_reductions.cpython-38.pyc new file mode 100644 index 0000000..bc1d33d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_reductions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_repr.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_repr.cpython-38.pyc new file mode 100644 index 0000000..3d11704 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_repr.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_subclass.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_subclass.cpython-38.pyc new file mode 100644 index 0000000..2c137a2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_subclass.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_ufunc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_ufunc.cpython-38.pyc new file mode 100644 index 0000000..fe0137c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_ufunc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_unary.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_unary.cpython-38.pyc new file mode 100644 index 0000000..4cd4323 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_unary.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_validate.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_validate.cpython-38.pyc new file mode 100644 index 0000000..7df9642 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/__pycache__/test_validate.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..3842dbb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/test_cat_accessor.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/test_cat_accessor.cpython-38.pyc new file mode 100644 index 0000000..ba5013a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/test_cat_accessor.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/test_dt_accessor.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/test_dt_accessor.cpython-38.pyc new file mode 100644 index 0000000..d344e2f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/test_dt_accessor.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/test_sparse_accessor.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/test_sparse_accessor.cpython-38.pyc new file mode 100644 index 0000000..85b9fee Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/test_sparse_accessor.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/test_str_accessor.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/test_str_accessor.cpython-38.pyc new file mode 100644 index 0000000..a6c04ab Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/__pycache__/test_str_accessor.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/test_cat_accessor.py b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/test_cat_accessor.py new file mode 100644 index 0000000..1a03883 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/test_cat_accessor.py @@ -0,0 +1,284 @@ +import warnings + +import numpy as np +import pytest + +from pandas import ( + Categorical, + DataFrame, + Index, + Series, + Timestamp, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.arrays.categorical import CategoricalAccessor +from pandas.core.indexes.accessors import Properties + + +class TestCatAccessor: + @pytest.mark.parametrize( + "method", + [ + lambda x: x.cat.set_categories([1, 2, 3]), + lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True), + lambda x: x.cat.rename_categories([1, 2, 3]), + lambda x: x.cat.remove_unused_categories(), + lambda x: x.cat.remove_categories([2]), + lambda x: x.cat.add_categories([4]), + lambda x: x.cat.as_ordered(), + lambda x: x.cat.as_unordered(), + ], + ) + def test_getname_categorical_accessor(self, method): + # GH#17509 + ser = Series([1, 2, 3], name="A").astype("category") + expected = "A" + result = method(ser).name + assert result == expected + + def test_cat_accessor(self): + ser = Series(Categorical(["a", "b", np.nan, "a"])) + tm.assert_index_equal(ser.cat.categories, Index(["a", "b"])) + assert not ser.cat.ordered, False + + exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"]) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # issue #37643 inplace kwarg deprecated + return_value = ser.cat.set_categories(["b", "a"], inplace=True) + + assert return_value is None + tm.assert_categorical_equal(ser.values, exp) + + res = ser.cat.set_categories(["b", "a"]) + tm.assert_categorical_equal(res.values, exp) + + ser[:] = "a" + ser = ser.cat.remove_unused_categories() + tm.assert_index_equal(ser.cat.categories, Index(["a"])) + + def test_cat_accessor_api(self): + # GH#9322 + + assert Series.cat is CategoricalAccessor + ser = Series(list("aabbcde")).astype("category") + assert isinstance(ser.cat, CategoricalAccessor) + + invalid = Series([1]) + with pytest.raises(AttributeError, match="only use .cat accessor"): + invalid.cat + assert not hasattr(invalid, "cat") + + def test_cat_accessor_no_new_attributes(self): + # https://github.com/pandas-dev/pandas/issues/10673 + cat = Series(list("aabbcde")).astype("category") + with pytest.raises(AttributeError, match="You cannot add any new attribute"): + cat.cat.xlabel = "a" + + def test_cat_accessor_updates_on_inplace(self): + ser = Series(list("abc")).astype("category") + return_value = ser.drop(0, inplace=True) + assert return_value is None + + with tm.assert_produces_warning(FutureWarning): + return_value = ser.cat.remove_unused_categories(inplace=True) + + assert return_value is None + assert len(ser.cat.categories) == 2 + + def test_categorical_delegations(self): + + # invalid accessor + msg = r"Can only use \.cat accessor with a 'category' dtype" + with pytest.raises(AttributeError, match=msg): + Series([1, 2, 3]).cat + with pytest.raises(AttributeError, match=msg): + Series([1, 2, 3]).cat() + with pytest.raises(AttributeError, match=msg): + Series(["a", "b", "c"]).cat + with pytest.raises(AttributeError, match=msg): + Series(np.arange(5.0)).cat + with pytest.raises(AttributeError, match=msg): + Series([Timestamp("20130101")]).cat + + # Series should delegate calls to '.categories', '.codes', '.ordered' + # and the methods '.set_categories()' 'drop_unused_categories()' to the + # categorical + ser = Series(Categorical(["a", "b", "c", "a"], ordered=True)) + exp_categories = Index(["a", "b", "c"]) + tm.assert_index_equal(ser.cat.categories, exp_categories) + ser.cat.categories = [1, 2, 3] + exp_categories = Index([1, 2, 3]) + tm.assert_index_equal(ser.cat.categories, exp_categories) + + exp_codes = Series([0, 1, 2, 0], dtype="int8") + tm.assert_series_equal(ser.cat.codes, exp_codes) + + assert ser.cat.ordered + ser = ser.cat.as_unordered() + assert not ser.cat.ordered + return_value = ser.cat.as_ordered(inplace=True) + assert return_value is None + assert ser.cat.ordered + + # reorder + ser = Series(Categorical(["a", "b", "c", "a"], ordered=True)) + exp_categories = Index(["c", "b", "a"]) + exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) + ser = ser.cat.set_categories(["c", "b", "a"]) + tm.assert_index_equal(ser.cat.categories, exp_categories) + tm.assert_numpy_array_equal(ser.values.__array__(), exp_values) + tm.assert_numpy_array_equal(ser.__array__(), exp_values) + + # remove unused categories + ser = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"])) + exp_categories = Index(["a", "b"]) + exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_) + ser = ser.cat.remove_unused_categories() + tm.assert_index_equal(ser.cat.categories, exp_categories) + tm.assert_numpy_array_equal(ser.values.__array__(), exp_values) + tm.assert_numpy_array_equal(ser.__array__(), exp_values) + + # This method is likely to be confused, so test that it raises an error + # on wrong inputs: + msg = "'Series' object has no attribute 'set_categories'" + with pytest.raises(AttributeError, match=msg): + ser.set_categories([4, 3, 2, 1]) + + # right: ser.cat.set_categories([4,3,2,1]) + + # GH#18862 (let Series.cat.rename_categories take callables) + ser = Series(Categorical(["a", "b", "c", "a"], ordered=True)) + result = ser.cat.rename_categories(lambda x: x.upper()) + expected = Series( + Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True) + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "idx", + [ + date_range("1/1/2015", periods=5), + date_range("1/1/2015", periods=5, tz="MET"), + period_range("1/1/2015", freq="D", periods=5), + timedelta_range("1 days", "10 days"), + ], + ) + def test_dt_accessor_api_for_categorical(self, idx): + # https://github.com/pandas-dev/pandas/issues/10661 + + ser = Series(idx) + cat = ser.astype("category") + + # only testing field (like .day) + # and bool (is_month_start) + attr_names = type(ser._values)._datetimelike_ops + + assert isinstance(cat.dt, Properties) + + special_func_defs = [ + ("strftime", ("%Y-%m-%d",), {}), + ("round", ("D",), {}), + ("floor", ("D",), {}), + ("ceil", ("D",), {}), + ("asfreq", ("D",), {}), + ] + if idx.dtype == "M8[ns]": + # exclude dt64tz since that is already localized and would raise + tup = ("tz_localize", ("UTC",), {}) + special_func_defs.append(tup) + elif idx.dtype.kind == "M": + # exclude dt64 since that is not localized so would raise + tup = ("tz_convert", ("EST",), {}) + special_func_defs.append(tup) + + _special_func_names = [f[0] for f in special_func_defs] + + _ignore_names = ["components", "tz_localize", "tz_convert"] + + func_names = [ + fname + for fname in dir(ser.dt) + if not ( + fname.startswith("_") + or fname in attr_names + or fname in _special_func_names + or fname in _ignore_names + ) + ] + + func_defs = [(fname, (), {}) for fname in func_names] + + for f_def in special_func_defs: + if f_def[0] in dir(ser.dt): + func_defs.append(f_def) + + for func, args, kwargs in func_defs: + with warnings.catch_warnings(): + if func == "to_period": + # dropping TZ + warnings.simplefilter("ignore", UserWarning) + res = getattr(cat.dt, func)(*args, **kwargs) + exp = getattr(ser.dt, func)(*args, **kwargs) + + tm.assert_equal(res, exp) + + for attr in attr_names: + if attr in ["week", "weekofyear"]: + # GH#33595 Deprecate week and weekofyear + continue + res = getattr(cat.dt, attr) + exp = getattr(ser.dt, attr) + + tm.assert_equal(res, exp) + + def test_dt_accessor_api_for_categorical_invalid(self): + invalid = Series([1, 2, 3]).astype("category") + msg = "Can only use .dt accessor with datetimelike" + + with pytest.raises(AttributeError, match=msg): + invalid.dt + assert not hasattr(invalid, "str") + + def test_reorder_categories_updates_dtype(self): + # GH#43232 + ser = Series(["a", "b", "c"], dtype="category") + orig_dtype = ser.dtype + + # Need to construct this before calling reorder_categories inplace + expected = ser.cat.reorder_categories(["c", "b", "a"]) + + with tm.assert_produces_warning(FutureWarning, match="`inplace` parameter"): + ser.cat.reorder_categories(["c", "b", "a"], inplace=True) + + assert not orig_dtype.categories.equals(ser.dtype.categories) + assert not orig_dtype.categories.equals(expected.dtype.categories) + assert ser.dtype == expected.dtype + assert ser.dtype.categories.equals(expected.dtype.categories) + + tm.assert_series_equal(ser, expected) + + def test_set_categories_setitem(self): + # GH#43334 + + df = DataFrame({"Survived": [1, 0, 1], "Sex": [0, 1, 1]}, dtype="category") + + # change the dtype in-place + df["Survived"].cat.categories = ["No", "Yes"] + df["Sex"].cat.categories = ["female", "male"] + + # values should not be coerced to NaN + assert list(df["Sex"]) == ["female", "male", "male"] + assert list(df["Survived"]) == ["Yes", "No", "Yes"] + + df["Sex"] = Categorical(df["Sex"], categories=["female", "male"], ordered=False) + df["Survived"] = Categorical( + df["Survived"], categories=["No", "Yes"], ordered=False + ) + + # values should not be coerced to NaN + assert list(df["Sex"]) == ["female", "male", "male"] + assert list(df["Survived"]) == ["Yes", "No", "Yes"] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/test_dt_accessor.py b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/test_dt_accessor.py new file mode 100644 index 0000000..41b5e55 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/test_dt_accessor.py @@ -0,0 +1,796 @@ +import calendar +from datetime import ( + date, + datetime, + time, +) +import locale +import unicodedata + +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs.timezones import maybe_get_tz + +from pandas.core.dtypes.common import ( + is_integer_dtype, + is_list_like, +) + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Period, + PeriodIndex, + Series, + TimedeltaIndex, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + TimedeltaArray, +) +import pandas.core.common as com + +ok_for_period = PeriodArray._datetimelike_ops +ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"] +ok_for_dt = DatetimeArray._datetimelike_ops +ok_for_dt_methods = [ + "to_period", + "to_pydatetime", + "tz_localize", + "tz_convert", + "normalize", + "strftime", + "round", + "floor", + "ceil", + "day_name", + "month_name", + "isocalendar", +] +ok_for_td = TimedeltaArray._datetimelike_ops +ok_for_td_methods = [ + "components", + "to_pytimedelta", + "total_seconds", + "round", + "floor", + "ceil", +] + + +def get_dir(ser): + # check limited display api + results = [r for r in ser.dt.__dir__() if not r.startswith("_")] + return sorted(set(results)) + + +class TestSeriesDatetimeValues: + def _compare(self, ser, name): + # GH 7207, 11128 + # test .dt namespace accessor + + def get_expected(ser, prop): + result = getattr(Index(ser._values), prop) + if isinstance(result, np.ndarray): + if is_integer_dtype(result): + result = result.astype("int64") + elif not is_list_like(result) or isinstance(result, DataFrame): + return result + return Series(result, index=ser.index, name=ser.name) + + left = getattr(ser.dt, name) + right = get_expected(ser, name) + if not (is_list_like(left) and is_list_like(right)): + assert left == right + elif isinstance(left, DataFrame): + tm.assert_frame_equal(left, right) + else: + tm.assert_series_equal(left, right) + + @pytest.mark.parametrize("freq", ["D", "s", "ms"]) + def test_dt_namespace_accessor_datetime64(self, freq): + # GH#7207, GH#11128 + # test .dt namespace accessor + + # datetimeindex + dti = date_range("20130101", periods=5, freq=freq) + ser = Series(dti, name="xxx") + + for prop in ok_for_dt: + # we test freq below + # we ignore week and weekofyear because they are deprecated + if prop not in ["freq", "week", "weekofyear"]: + self._compare(ser, prop) + + for prop in ok_for_dt_methods: + getattr(ser.dt, prop) + + result = ser.dt.to_pydatetime() + assert isinstance(result, np.ndarray) + assert result.dtype == object + + result = ser.dt.tz_localize("US/Eastern") + exp_values = DatetimeIndex(ser.values).tz_localize("US/Eastern") + expected = Series(exp_values, index=ser.index, name="xxx") + tm.assert_series_equal(result, expected) + + tz_result = result.dt.tz + assert str(tz_result) == "US/Eastern" + freq_result = ser.dt.freq + assert freq_result == DatetimeIndex(ser.values, freq="infer").freq + + # let's localize, then convert + result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern") + exp_values = ( + DatetimeIndex(ser.values).tz_localize("UTC").tz_convert("US/Eastern") + ) + expected = Series(exp_values, index=ser.index, name="xxx") + tm.assert_series_equal(result, expected) + + def test_dt_namespace_accessor_datetime64tz(self): + # GH#7207, GH#11128 + # test .dt namespace accessor + + # datetimeindex with tz + dti = date_range("20130101", periods=5, tz="US/Eastern") + ser = Series(dti, name="xxx") + for prop in ok_for_dt: + + # we test freq below + # we ignore week and weekofyear because they are deprecated + if prop not in ["freq", "week", "weekofyear"]: + self._compare(ser, prop) + + for prop in ok_for_dt_methods: + getattr(ser.dt, prop) + + result = ser.dt.to_pydatetime() + assert isinstance(result, np.ndarray) + assert result.dtype == object + + result = ser.dt.tz_convert("CET") + expected = Series(ser._values.tz_convert("CET"), index=ser.index, name="xxx") + tm.assert_series_equal(result, expected) + + tz_result = result.dt.tz + assert str(tz_result) == "CET" + freq_result = ser.dt.freq + assert freq_result == DatetimeIndex(ser.values, freq="infer").freq + + def test_dt_namespace_accessor_timedelta(self): + # GH#7207, GH#11128 + # test .dt namespace accessor + + # timedelta index + cases = [ + Series( + timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx" + ), + Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"), + Series( + timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"), + name="xxx", + ), + ] + for ser in cases: + for prop in ok_for_td: + # we test freq below + if prop != "freq": + self._compare(ser, prop) + + for prop in ok_for_td_methods: + getattr(ser.dt, prop) + + result = ser.dt.components + assert isinstance(result, DataFrame) + tm.assert_index_equal(result.index, ser.index) + + result = ser.dt.to_pytimedelta() + assert isinstance(result, np.ndarray) + assert result.dtype == object + + result = ser.dt.total_seconds() + assert isinstance(result, Series) + assert result.dtype == "float64" + + freq_result = ser.dt.freq + assert freq_result == TimedeltaIndex(ser.values, freq="infer").freq + + def test_dt_namespace_accessor_period(self): + # GH#7207, GH#11128 + # test .dt namespace accessor + + # periodindex + pi = period_range("20130101", periods=5, freq="D") + ser = Series(pi, name="xxx") + + for prop in ok_for_period: + # we test freq below + if prop != "freq": + self._compare(ser, prop) + + for prop in ok_for_period_methods: + getattr(ser.dt, prop) + + freq_result = ser.dt.freq + assert freq_result == PeriodIndex(ser.values).freq + + def test_dt_namespace_accessor_index_and_values(self): + + # both + index = date_range("20130101", periods=3, freq="D") + dti = date_range("20140204", periods=3, freq="s") + ser = Series(dti, index=index, name="xxx") + exp = Series( + np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx" + ) + tm.assert_series_equal(ser.dt.year, exp) + + exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx") + tm.assert_series_equal(ser.dt.month, exp) + + exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx") + tm.assert_series_equal(ser.dt.second, exp) + + exp = Series([ser[0]] * 3, index=index, name="xxx") + tm.assert_series_equal(ser.dt.normalize(), exp) + + def test_dt_accessor_limited_display_api(self): + # tznaive + ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx") + results = get_dir(ser) + tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) + + # tzaware + ser = Series(date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx") + ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago") + results = get_dir(ser) + tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) + + # Period + ser = Series( + period_range("20130101", periods=5, freq="D", name="xxx").astype(object) + ) + results = get_dir(ser) + tm.assert_almost_equal( + results, sorted(set(ok_for_period + ok_for_period_methods)) + ) + + def test_dt_accessor_ambiguous_freq_conversions(self): + # GH#11295 + # ambiguous time error on the conversions + ser = Series(date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx") + ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago") + + exp_values = date_range( + "2015-01-01", "2016-01-01", freq="T", tz="UTC" + ).tz_convert("America/Chicago") + # freq not preserved by tz_localize above + exp_values = exp_values._with_freq(None) + expected = Series(exp_values, name="xxx") + tm.assert_series_equal(ser, expected) + + def test_dt_accessor_not_writeable(self): + # no setting allowed + ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx") + with pytest.raises(ValueError, match="modifications"): + ser.dt.hour = 5 + + # trying to set a copy + msg = "modifications to a property of a datetimelike.+not supported" + with pd.option_context("chained_assignment", "raise"): + with pytest.raises(com.SettingWithCopyError, match=msg): + ser.dt.hour[0] = 5 + + @pytest.mark.parametrize( + "method, dates", + [ + ["round", ["2012-01-02", "2012-01-02", "2012-01-01"]], + ["floor", ["2012-01-01", "2012-01-01", "2012-01-01"]], + ["ceil", ["2012-01-02", "2012-01-02", "2012-01-02"]], + ], + ) + def test_dt_round(self, method, dates): + # round + ser = Series( + pd.to_datetime( + ["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"] + ), + name="xxx", + ) + result = getattr(ser.dt, method)("D") + expected = Series(pd.to_datetime(dates), name="xxx") + tm.assert_series_equal(result, expected) + + def test_dt_round_tz(self): + ser = Series( + pd.to_datetime( + ["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"] + ), + name="xxx", + ) + result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern").dt.round("D") + + exp_values = pd.to_datetime( + ["2012-01-01", "2012-01-01", "2012-01-01"] + ).tz_localize("US/Eastern") + expected = Series(exp_values, name="xxx") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("method", ["ceil", "round", "floor"]) + def test_dt_round_tz_ambiguous(self, method): + # GH 18946 round near "fall back" DST + df1 = DataFrame( + [ + pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True), + pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True), + pd.to_datetime("2017-10-29 03:00:00+01:00", utc=True), + ], + columns=["date"], + ) + df1["date"] = df1["date"].dt.tz_convert("Europe/Madrid") + # infer + result = getattr(df1.date.dt, method)("H", ambiguous="infer") + expected = df1["date"] + tm.assert_series_equal(result, expected) + + # bool-array + result = getattr(df1.date.dt, method)("H", ambiguous=[True, False, False]) + tm.assert_series_equal(result, expected) + + # NaT + result = getattr(df1.date.dt, method)("H", ambiguous="NaT") + expected = df1["date"].copy() + expected.iloc[0:2] = pd.NaT + tm.assert_series_equal(result, expected) + + # raise + with tm.external_error_raised(pytz.AmbiguousTimeError): + getattr(df1.date.dt, method)("H", ambiguous="raise") + + @pytest.mark.parametrize( + "method, ts_str, freq", + [ + ["ceil", "2018-03-11 01:59:00-0600", "5min"], + ["round", "2018-03-11 01:59:00-0600", "5min"], + ["floor", "2018-03-11 03:01:00-0500", "2H"], + ], + ) + def test_dt_round_tz_nonexistent(self, method, ts_str, freq): + # GH 23324 round near "spring forward" DST + ser = Series([pd.Timestamp(ts_str, tz="America/Chicago")]) + result = getattr(ser.dt, method)(freq, nonexistent="shift_forward") + expected = Series([pd.Timestamp("2018-03-11 03:00:00", tz="America/Chicago")]) + tm.assert_series_equal(result, expected) + + result = getattr(ser.dt, method)(freq, nonexistent="NaT") + expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz) + tm.assert_series_equal(result, expected) + + with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"): + getattr(ser.dt, method)(freq, nonexistent="raise") + + def test_dt_namespace_accessor_categorical(self): + # GH 19468 + dti = DatetimeIndex(["20171111", "20181212"]).repeat(2) + ser = Series(pd.Categorical(dti), name="foo") + result = ser.dt.year + expected = Series([2017, 2017, 2018, 2018], name="foo") + tm.assert_series_equal(result, expected) + + def test_dt_tz_localize_categorical(self, tz_aware_fixture): + # GH 27952 + tz = tz_aware_fixture + datetimes = Series( + ["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns]" + ) + categorical = datetimes.astype("category") + result = categorical.dt.tz_localize(tz) + expected = datetimes.dt.tz_localize(tz) + tm.assert_series_equal(result, expected) + + def test_dt_tz_convert_categorical(self, tz_aware_fixture): + # GH 27952 + tz = tz_aware_fixture + datetimes = Series( + ["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns, MET]" + ) + categorical = datetimes.astype("category") + result = categorical.dt.tz_convert(tz) + expected = datetimes.dt.tz_convert(tz) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("accessor", ["year", "month", "day"]) + def test_dt_other_accessors_categorical(self, accessor): + # GH 27952 + datetimes = Series( + ["2018-01-01", "2018-01-01", "2019-01-02"], dtype="datetime64[ns]" + ) + categorical = datetimes.astype("category") + result = getattr(categorical.dt, accessor) + expected = getattr(datetimes.dt, accessor) + tm.assert_series_equal(result, expected) + + def test_dt_accessor_no_new_attributes(self): + # https://github.com/pandas-dev/pandas/issues/10673 + ser = Series(date_range("20130101", periods=5, freq="D")) + with pytest.raises(AttributeError, match="You cannot add any new attribute"): + ser.dt.xlabel = "a" + + # error: Unsupported operand types for + ("List[None]" and "List[str]") + @pytest.mark.parametrize( + "time_locale", [None] + (tm.get_locales() or []) # type: ignore[operator] + ) + def test_dt_accessor_datetime_name_accessors(self, time_locale): + # Test Monday -> Sunday and January -> December, in that sequence + if time_locale is None: + # If the time_locale is None, day-name and month_name should + # return the english attributes + expected_days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + expected_months = [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", + ] + else: + with tm.set_locale(time_locale, locale.LC_TIME): + expected_days = calendar.day_name[:] + expected_months = calendar.month_name[1:] + + ser = Series(date_range(freq="D", start=datetime(1998, 1, 1), periods=365)) + english_days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + for day, name, eng_name in zip(range(4, 11), expected_days, english_days): + name = name.capitalize() + assert ser.dt.day_name(locale=time_locale)[day] == name + assert ser.dt.day_name(locale=None)[day] == eng_name + ser = pd.concat([ser, Series([pd.NaT])]) + assert np.isnan(ser.dt.day_name(locale=time_locale).iloc[-1]) + + ser = Series(date_range(freq="M", start="2012", end="2013")) + result = ser.dt.month_name(locale=time_locale) + expected = Series([month.capitalize() for month in expected_months]) + + # work around https://github.com/pandas-dev/pandas/issues/22342 + result = result.str.normalize("NFD") + expected = expected.str.normalize("NFD") + + tm.assert_series_equal(result, expected) + + for s_date, expected in zip(ser, expected_months): + result = s_date.month_name(locale=time_locale) + expected = expected.capitalize() + + result = unicodedata.normalize("NFD", result) + expected = unicodedata.normalize("NFD", expected) + + assert result == expected + + ser = pd.concat([ser, Series([pd.NaT])]) + assert np.isnan(ser.dt.month_name(locale=time_locale).iloc[-1]) + + def test_strftime(self): + # GH 10086 + ser = Series(date_range("20130101", periods=5)) + result = ser.dt.strftime("%Y/%m/%d") + expected = Series( + ["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"] + ) + tm.assert_series_equal(result, expected) + + ser = Series(date_range("2015-02-03 11:22:33.4567", periods=5)) + result = ser.dt.strftime("%Y/%m/%d %H-%M-%S") + expected = Series( + [ + "2015/02/03 11-22-33", + "2015/02/04 11-22-33", + "2015/02/05 11-22-33", + "2015/02/06 11-22-33", + "2015/02/07 11-22-33", + ] + ) + tm.assert_series_equal(result, expected) + + ser = Series(period_range("20130101", periods=5)) + result = ser.dt.strftime("%Y/%m/%d") + expected = Series( + ["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"] + ) + tm.assert_series_equal(result, expected) + + ser = Series(period_range("2015-02-03 11:22:33.4567", periods=5, freq="s")) + result = ser.dt.strftime("%Y/%m/%d %H-%M-%S") + expected = Series( + [ + "2015/02/03 11-22-33", + "2015/02/03 11-22-34", + "2015/02/03 11-22-35", + "2015/02/03 11-22-36", + "2015/02/03 11-22-37", + ] + ) + tm.assert_series_equal(result, expected) + + def test_strftime_dt64_days(self): + ser = Series(date_range("20130101", periods=5)) + ser.iloc[0] = pd.NaT + result = ser.dt.strftime("%Y/%m/%d") + expected = Series( + [np.nan, "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"] + ) + tm.assert_series_equal(result, expected) + + datetime_index = date_range("20150301", periods=5) + result = datetime_index.strftime("%Y/%m/%d") + + expected = Index( + ["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"], + dtype=np.object_, + ) + # dtype may be S10 or U10 depending on python version + tm.assert_index_equal(result, expected) + + def test_strftime_period_days(self): + period_index = period_range("20150301", periods=5) + result = period_index.strftime("%Y/%m/%d") + expected = Index( + ["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"], + dtype="=U10", + ) + tm.assert_index_equal(result, expected) + + def test_strftime_dt64_microsecond_resolution(self): + ser = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)]) + result = ser.dt.strftime("%Y-%m-%d %H:%M:%S") + expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"]) + tm.assert_series_equal(result, expected) + + def test_strftime_period_hours(self): + ser = Series(period_range("20130101", periods=4, freq="H")) + result = ser.dt.strftime("%Y/%m/%d %H:%M:%S") + expected = Series( + [ + "2013/01/01 00:00:00", + "2013/01/01 01:00:00", + "2013/01/01 02:00:00", + "2013/01/01 03:00:00", + ] + ) + tm.assert_series_equal(result, expected) + + def test_strftime_period_minutes(self): + ser = Series(period_range("20130101", periods=4, freq="L")) + result = ser.dt.strftime("%Y/%m/%d %H:%M:%S.%l") + expected = Series( + [ + "2013/01/01 00:00:00.000", + "2013/01/01 00:00:00.001", + "2013/01/01 00:00:00.002", + "2013/01/01 00:00:00.003", + ] + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data", + [ + DatetimeIndex(["2019-01-01", pd.NaT]), + PeriodIndex(["2019-01-01", pd.NaT], dtype="period[D]"), + ], + ) + def test_strftime_nat(self, data): + # GH 29578 + ser = Series(data) + result = ser.dt.strftime("%Y-%m-%d") + expected = Series(["2019-01-01", np.nan]) + tm.assert_series_equal(result, expected) + + def test_valid_dt_with_missing_values(self): + + from datetime import ( + date, + time, + ) + + # GH 8689 + ser = Series(date_range("20130101", periods=5, freq="D")) + ser.iloc[2] = pd.NaT + + for attr in ["microsecond", "nanosecond", "second", "minute", "hour", "day"]: + expected = getattr(ser.dt, attr).copy() + expected.iloc[2] = np.nan + result = getattr(ser.dt, attr) + tm.assert_series_equal(result, expected) + + result = ser.dt.date + expected = Series( + [ + date(2013, 1, 1), + date(2013, 1, 2), + np.nan, + date(2013, 1, 4), + date(2013, 1, 5), + ], + dtype="object", + ) + tm.assert_series_equal(result, expected) + + result = ser.dt.time + expected = Series([time(0), time(0), np.nan, time(0), time(0)], dtype="object") + tm.assert_series_equal(result, expected) + + def test_dt_accessor_api(self): + # GH 9322 + from pandas.core.indexes.accessors import ( + CombinedDatetimelikeProperties, + DatetimeProperties, + ) + + assert Series.dt is CombinedDatetimelikeProperties + + ser = Series(date_range("2000-01-01", periods=3)) + assert isinstance(ser.dt, DatetimeProperties) + + @pytest.mark.parametrize( + "ser", [Series(np.arange(5)), Series(list("abcde")), Series(np.random.randn(5))] + ) + def test_dt_accessor_invalid(self, ser): + # GH#9322 check that series with incorrect dtypes don't have attr + with pytest.raises(AttributeError, match="only use .dt accessor"): + ser.dt + assert not hasattr(ser, "dt") + + def test_dt_accessor_updates_on_inplace(self): + ser = Series(date_range("2018-01-01", periods=10)) + ser[2] = None + return_value = ser.fillna(pd.Timestamp("2018-01-01"), inplace=True) + assert return_value is None + result = ser.dt.date + assert result[0] == result[2] + + def test_date_tz(self): + # GH11757 + rng = DatetimeIndex( + ["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], + tz="US/Eastern", + ) + ser = Series(rng) + expected = Series([date(2014, 4, 4), date(2014, 7, 18), date(2015, 11, 22)]) + tm.assert_series_equal(ser.dt.date, expected) + tm.assert_series_equal(ser.apply(lambda x: x.date()), expected) + + def test_dt_timetz_accessor(self, tz_naive_fixture): + # GH21358 + tz = maybe_get_tz(tz_naive_fixture) + + dtindex = DatetimeIndex( + ["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], tz=tz + ) + ser = Series(dtindex) + expected = Series( + [time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz)] + ) + result = ser.dt.timetz + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "input_series, expected_output", + [ + [["2020-01-01"], [[2020, 1, 3]]], + [[pd.NaT], [[np.NaN, np.NaN, np.NaN]]], + [["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]]], + [["2010-01-01", pd.NaT], [[2009, 53, 5], [np.NaN, np.NaN, np.NaN]]], + # see GH#36032 + [["2016-01-08", "2016-01-04"], [[2016, 1, 5], [2016, 1, 1]]], + [["2016-01-07", "2016-01-01"], [[2016, 1, 4], [2015, 53, 5]]], + ], + ) + @pytest.mark.filterwarnings("ignore:Inferring datetime64:FutureWarning") + def test_isocalendar(self, input_series, expected_output): + result = pd.to_datetime(Series(input_series)).dt.isocalendar() + expected_frame = DataFrame( + expected_output, columns=["year", "week", "day"], dtype="UInt32" + ) + tm.assert_frame_equal(result, expected_frame) + + def test_hour_index(self): + dt_series = Series( + date_range(start="2021-01-01", periods=5, freq="h"), + index=[2, 6, 7, 8, 11], + dtype="category", + ) + result = dt_series.dt.hour + expected = Series( + [0, 1, 2, 3, 4], + index=[2, 6, 7, 8, 11], + ) + tm.assert_series_equal(result, expected) + + +class TestSeriesPeriodValuesDtAccessor: + @pytest.mark.parametrize( + "input_vals", + [ + [Period("2016-01", freq="M"), Period("2016-02", freq="M")], + [Period("2016-01-01", freq="D"), Period("2016-01-02", freq="D")], + [ + Period("2016-01-01 00:00:00", freq="H"), + Period("2016-01-01 01:00:00", freq="H"), + ], + [ + Period("2016-01-01 00:00:00", freq="M"), + Period("2016-01-01 00:01:00", freq="M"), + ], + [ + Period("2016-01-01 00:00:00", freq="S"), + Period("2016-01-01 00:00:01", freq="S"), + ], + ], + ) + def test_end_time_timevalues(self, input_vals): + # GH#17157 + # Check that the time part of the Period is adjusted by end_time + # when using the dt accessor on a Series + input_vals = PeriodArray._from_sequence(np.asarray(input_vals)) + + ser = Series(input_vals) + result = ser.dt.end_time + expected = ser.apply(lambda x: x.end_time) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("input_vals", [("2001"), ("NaT")]) + def test_to_period(self, input_vals): + # GH#21205 + expected = Series([input_vals], dtype="Period[D]") + result = Series([input_vals], dtype="datetime64[ns]").dt.to_period("D") + tm.assert_series_equal(result, expected) + + +def test_week_and_weekofyear_are_deprecated(): + # GH#33595 Deprecate week and weekofyear + series = pd.to_datetime(Series(["2020-01-01"])) + with tm.assert_produces_warning(FutureWarning): + series.dt.week + with tm.assert_produces_warning(FutureWarning): + series.dt.weekofyear + + +def test_normalize_pre_epoch_dates(): + # GH: 36294 + ser = pd.to_datetime(Series(["1969-01-01 09:00:00", "2016-01-01 09:00:00"])) + result = ser.dt.normalize() + expected = pd.to_datetime(Series(["1969-01-01", "2016-01-01"])) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/test_sparse_accessor.py b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/test_sparse_accessor.py new file mode 100644 index 0000000..118095b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/test_sparse_accessor.py @@ -0,0 +1,9 @@ +from pandas import Series + + +class TestSparseAccessor: + def test_sparse_accessor_updates_on_inplace(self): + ser = Series([1, 1, 2, 3], dtype="Sparse[int]") + return_value = ser.drop([0, 1], inplace=True) + assert return_value is None + assert ser.sparse.density == 1.0 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/test_str_accessor.py b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/test_str_accessor.py new file mode 100644 index 0000000..09d965e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/accessors/test_str_accessor.py @@ -0,0 +1,25 @@ +import pytest + +from pandas import Series +import pandas._testing as tm + + +class TestStrAccessor: + def test_str_attribute(self): + # GH#9068 + methods = ["strip", "rstrip", "lstrip"] + ser = Series([" jack", "jill ", " jesse ", "frank"]) + for method in methods: + expected = Series([getattr(str, method)(x) for x in ser.values]) + tm.assert_series_equal(getattr(Series.str, method)(ser.str), expected) + + # str accessor only valid with string values + ser = Series(range(5)) + with pytest.raises(AttributeError, match="only use .str accessor"): + ser.str.repeat(2) + + def test_str_accessor_updates_on_inplace(self): + ser = Series(list("abc")) + return_value = ser.drop([0], inplace=True) + assert return_value is None + assert len(ser.str.lower()) == 2 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..cf7a4b6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_datetime.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_datetime.cpython-38.pyc new file mode 100644 index 0000000..4f4493f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_datetime.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_delitem.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_delitem.cpython-38.pyc new file mode 100644 index 0000000..154b8c4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_delitem.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_get.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_get.cpython-38.pyc new file mode 100644 index 0000000..7ff54fe Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_get.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_getitem.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_getitem.cpython-38.pyc new file mode 100644 index 0000000..68e0d51 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_getitem.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_indexing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_indexing.cpython-38.pyc new file mode 100644 index 0000000..155e814 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_indexing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_mask.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_mask.cpython-38.pyc new file mode 100644 index 0000000..01709bd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_mask.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_set_value.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_set_value.cpython-38.pyc new file mode 100644 index 0000000..3eee42f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_set_value.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_setitem.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_setitem.cpython-38.pyc new file mode 100644 index 0000000..448c9f3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_setitem.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_take.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_take.cpython-38.pyc new file mode 100644 index 0000000..003abe2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_take.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_where.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_where.cpython-38.pyc new file mode 100644 index 0000000..4bb9373 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_where.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_xs.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_xs.cpython-38.pyc new file mode 100644 index 0000000..fa78e3e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/__pycache__/test_xs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_datetime.py b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_datetime.py new file mode 100644 index 0000000..b829147 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_datetime.py @@ -0,0 +1,484 @@ +""" +Also test support for datetime64[ns] in Series / DataFrame +""" +from datetime import ( + datetime, + timedelta, +) +import re + +from dateutil.tz import ( + gettz, + tzutc, +) +import numpy as np +import pytest +import pytz + +from pandas._libs import index as libindex + +import pandas as pd +from pandas import ( + DataFrame, + Series, + Timestamp, + date_range, + period_range, +) +import pandas._testing as tm + + +def test_fancy_getitem(): + dti = date_range( + freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1) + ) + + s = Series(np.arange(len(dti)), index=dti) + + assert s[48] == 48 + assert s["1/2/2009"] == 48 + assert s["2009-1-2"] == 48 + assert s[datetime(2009, 1, 2)] == 48 + assert s[Timestamp(datetime(2009, 1, 2))] == 48 + with pytest.raises(KeyError, match=r"^'2009-1-3'$"): + s["2009-1-3"] + tm.assert_series_equal( + s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)] + ) + + +def test_fancy_setitem(): + dti = date_range( + freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1) + ) + + s = Series(np.arange(len(dti)), index=dti) + s[48] = -1 + assert s[48] == -1 + s["1/2/2009"] = -2 + assert s[48] == -2 + s["1/2/2009":"2009-06-05"] = -3 + assert (s[48:54] == -3).all() + + +@pytest.mark.parametrize("tz_source", ["pytz", "dateutil"]) +def test_getitem_setitem_datetime_tz(tz_source): + if tz_source == "pytz": + tzget = pytz.timezone + else: + # handle special case for utc in dateutil + tzget = lambda x: tzutc() if x == "UTC" else gettz(x) + + N = 50 + # testing with timezone, GH #2785 + rng = date_range("1/1/1990", periods=N, freq="H", tz=tzget("US/Eastern")) + ts = Series(np.random.randn(N), index=rng) + + # also test Timestamp tz handling, GH #2789 + result = ts.copy() + result["1990-01-01 09:00:00+00:00"] = 0 + result["1990-01-01 09:00:00+00:00"] = ts[4] + tm.assert_series_equal(result, ts) + + result = ts.copy() + result["1990-01-01 03:00:00-06:00"] = 0 + result["1990-01-01 03:00:00-06:00"] = ts[4] + tm.assert_series_equal(result, ts) + + # repeat with datetimes + result = ts.copy() + result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = 0 + result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = ts[4] + tm.assert_series_equal(result, ts) + + result = ts.copy() + dt = Timestamp(1990, 1, 1, 3).tz_localize(tzget("US/Central")) + dt = dt.to_pydatetime() + result[dt] = 0 + result[dt] = ts[4] + tm.assert_series_equal(result, ts) + + +def test_getitem_setitem_datetimeindex(): + N = 50 + # testing with timezone, GH #2785 + rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern") + ts = Series(np.random.randn(N), index=rng) + + result = ts["1990-01-01 04:00:00"] + expected = ts[4] + assert result == expected + + result = ts.copy() + result["1990-01-01 04:00:00"] = 0 + result["1990-01-01 04:00:00"] = ts[4] + tm.assert_series_equal(result, ts) + + result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0 + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8] + tm.assert_series_equal(result, ts) + + lb = "1990-01-01 04:00:00" + rb = "1990-01-01 07:00:00" + # GH#18435 strings get a pass from tzawareness compat + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + lb = "1990-01-01 04:00:00-0500" + rb = "1990-01-01 07:00:00-0500" + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + # But we do not give datetimes a pass on tzawareness compat + msg = "Cannot compare tz-naive and tz-aware datetime-like objects" + naive = datetime(1990, 1, 1, 4) + for key in [naive, Timestamp(naive), np.datetime64(naive, "ns")]: + with tm.assert_produces_warning(FutureWarning): + # GH#36148 will require tzawareness compat + result = ts[key] + expected = ts[4] + assert result == expected + + result = ts.copy() + with tm.assert_produces_warning(FutureWarning): + # GH#36148 will require tzawareness compat + result[datetime(1990, 1, 1, 4)] = 0 + with tm.assert_produces_warning(FutureWarning): + # GH#36148 will require tzawareness compat + result[datetime(1990, 1, 1, 4)] = ts[4] + tm.assert_series_equal(result, ts) + + with tm.assert_produces_warning(FutureWarning): + # GH#36148 will require tzawareness compat + result = ts[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + result = ts.copy() + with tm.assert_produces_warning(FutureWarning): + # GH#36148 will require tzawareness compat + result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = 0 + with tm.assert_produces_warning(FutureWarning): + # GH#36148 will require tzawareness compat + result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = ts[4:8] + tm.assert_series_equal(result, ts) + + lb = datetime(1990, 1, 1, 4) + rb = datetime(1990, 1, 1, 7) + msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime" + with pytest.raises(TypeError, match=msg): + # tznaive vs tzaware comparison is invalid + # see GH#18376, GH#18162 + ts[(ts.index >= lb) & (ts.index <= rb)] + + lb = Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo) + rb = Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo) + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + result = ts[ts.index[4]] + expected = ts[4] + assert result == expected + + result = ts[ts.index[4:8]] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + result = ts.copy() + result[ts.index[4:8]] = 0 + result.iloc[4:8] = ts.iloc[4:8] + tm.assert_series_equal(result, ts) + + # also test partial date slicing + result = ts["1990-01-02"] + expected = ts[24:48] + tm.assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-02"] = 0 + result["1990-01-02"] = ts[24:48] + tm.assert_series_equal(result, ts) + + +def test_getitem_setitem_periodindex(): + + N = 50 + rng = period_range("1/1/1990", periods=N, freq="H") + ts = Series(np.random.randn(N), index=rng) + + result = ts["1990-01-01 04"] + expected = ts[4] + assert result == expected + + result = ts.copy() + result["1990-01-01 04"] = 0 + result["1990-01-01 04"] = ts[4] + tm.assert_series_equal(result, ts) + + result = ts["1990-01-01 04":"1990-01-01 07"] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-01 04":"1990-01-01 07"] = 0 + result["1990-01-01 04":"1990-01-01 07"] = ts[4:8] + tm.assert_series_equal(result, ts) + + lb = "1990-01-01 04" + rb = "1990-01-01 07" + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + # GH 2782 + result = ts[ts.index[4]] + expected = ts[4] + assert result == expected + + result = ts[ts.index[4:8]] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + result = ts.copy() + result[ts.index[4:8]] = 0 + result.iloc[4:8] = ts.iloc[4:8] + tm.assert_series_equal(result, ts) + + +def test_datetime_indexing(): + + index = date_range("1/1/2000", "1/7/2000") + index = index.repeat(3) + + s = Series(len(index), index=index) + stamp = Timestamp("1/8/2000") + + with pytest.raises(KeyError, match=re.escape(repr(stamp))): + s[stamp] + s[stamp] = 0 + assert s[stamp] == 0 + + # not monotonic + s = Series(len(index), index=index) + s = s[::-1] + + with pytest.raises(KeyError, match=re.escape(repr(stamp))): + s[stamp] + s[stamp] = 0 + assert s[stamp] == 0 + + +""" +test duplicates in time series +""" + + +def test_indexing_with_duplicate_datetimeindex( + rand_series_with_duplicate_datetimeindex, +): + ts = rand_series_with_duplicate_datetimeindex + + uniques = ts.index.unique() + for date in uniques: + result = ts[date] + + mask = ts.index == date + total = (ts.index == date).sum() + expected = ts[mask] + if total > 1: + tm.assert_series_equal(result, expected) + else: + tm.assert_almost_equal(result, expected[0]) + + cp = ts.copy() + cp[date] = 0 + expected = Series(np.where(mask, 0, ts), index=ts.index) + tm.assert_series_equal(cp, expected) + + key = datetime(2000, 1, 6) + with pytest.raises(KeyError, match=re.escape(repr(key))): + ts[key] + + # new index + ts[datetime(2000, 1, 6)] = 0 + assert ts[datetime(2000, 1, 6)] == 0 + + +def test_loc_getitem_over_size_cutoff(monkeypatch): + # #1821 + + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000) + + # create large list of non periodic datetime + dates = [] + sec = timedelta(seconds=1) + half_sec = timedelta(microseconds=500000) + d = datetime(2011, 12, 5, 20, 30) + n = 1100 + for i in range(n): + dates.append(d) + dates.append(d + sec) + dates.append(d + sec + half_sec) + dates.append(d + sec + sec + half_sec) + d += 3 * sec + + # duplicate some values in the list + duplicate_positions = np.random.randint(0, len(dates) - 1, 20) + for p in duplicate_positions: + dates[p + 1] = dates[p] + + df = DataFrame(np.random.randn(len(dates), 4), index=dates, columns=list("ABCD")) + + pos = n * 3 + timestamp = df.index[pos] + assert timestamp in df.index + + # it works! + df.loc[timestamp] + assert len(df.loc[[timestamp]]) > 0 + + +def test_indexing_over_size_cutoff_period_index(monkeypatch): + # GH 27136 + + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000) + + n = 1100 + idx = period_range("1/1/2000", freq="T", periods=n) + assert idx._engine.over_size_threshold + + s = Series(np.random.randn(len(idx)), index=idx) + + pos = n - 1 + timestamp = idx[pos] + assert timestamp in s.index + + # it works! + s[timestamp] + assert len(s.loc[[timestamp]]) > 0 + + +def test_indexing_unordered(): + # GH 2437 + rng = date_range(start="2011-01-01", end="2011-01-15") + ts = Series(np.random.rand(len(rng)), index=rng) + ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]]) + + for t in ts.index: + + expected = ts[t] + result = ts2[t] + assert expected == result + + # GH 3448 (ranges) + def compare(slobj): + result = ts2[slobj].copy() + result = result.sort_index() + expected = ts[slobj] + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(result, expected) + + compare(slice("2011-01-01", "2011-01-15")) + with tm.assert_produces_warning(FutureWarning): + compare(slice("2010-12-30", "2011-01-15")) + compare(slice("2011-01-01", "2011-01-16")) + + # partial ranges + compare(slice("2011-01-01", "2011-01-6")) + compare(slice("2011-01-06", "2011-01-8")) + compare(slice("2011-01-06", "2011-01-12")) + + # single values + result = ts2["2011"].sort_index() + expected = ts["2011"] + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(result, expected) + + +def test_indexing_unordered2(): + + # diff freq + rng = date_range(datetime(2005, 1, 1), periods=20, freq="M") + ts = Series(np.arange(len(rng)), index=rng) + ts = ts.take(np.random.permutation(20)) + + result = ts["2005"] + for t in result.index: + assert t.year == 2005 + + +def test_indexing(): + idx = date_range("2001-1-1", periods=20, freq="M") + ts = Series(np.random.rand(len(idx)), index=idx) + + # getting + + # GH 3070, make sure semantics work on Series/Frame + expected = ts["2001"] + expected.name = "A" + + df = DataFrame({"A": ts}) + with tm.assert_produces_warning(FutureWarning): + # GH#36179 string indexing on rows for DataFrame deprecated + result = df["2001"]["A"] + tm.assert_series_equal(expected, result) + + # setting + ts["2001"] = 1 + expected = ts["2001"] + expected.name = "A" + + df.loc["2001", "A"] = 1 + + with tm.assert_produces_warning(FutureWarning): + # GH#36179 string indexing on rows for DataFrame deprecated + result = df["2001"]["A"] + tm.assert_series_equal(expected, result) + + +def test_getitem_str_month_with_datetimeindex(): + # GH3546 (not including times on the last day) + idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="H") + ts = Series(range(len(idx)), index=idx) + expected = ts["2013-05"] + tm.assert_series_equal(expected, ts) + + idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="S") + ts = Series(range(len(idx)), index=idx) + expected = ts["2013-05"] + tm.assert_series_equal(expected, ts) + + +def test_getitem_str_year_with_datetimeindex(): + idx = [ + Timestamp("2013-05-31 00:00"), + Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)), + ] + ts = Series(range(len(idx)), index=idx) + expected = ts["2013"] + tm.assert_series_equal(expected, ts) + + +def test_getitem_str_second_with_datetimeindex(): + # GH14826, indexing with a seconds resolution string / datetime object + df = DataFrame( + np.random.rand(5, 5), + columns=["open", "high", "low", "close", "volume"], + index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"), + ) + + # this is a single date, so will raise + with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"): + df["2012-01-02 18:01:02"] + + msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)" + with pytest.raises(KeyError, match=msg): + df[df.index[2]] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_delitem.py b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_delitem.py new file mode 100644 index 0000000..af6b391 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_delitem.py @@ -0,0 +1,73 @@ +import pytest + +from pandas import ( + Index, + Series, + date_range, +) +import pandas._testing as tm + + +class TestSeriesDelItem: + def test_delitem(self): + # GH#5542 + # should delete the item inplace + s = Series(range(5)) + del s[0] + + expected = Series(range(1, 5), index=range(1, 5)) + tm.assert_series_equal(s, expected) + + del s[1] + expected = Series(range(2, 5), index=range(2, 5)) + tm.assert_series_equal(s, expected) + + # only 1 left, del, add, del + s = Series(1) + del s[0] + tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64"))) + s[0] = 1 + tm.assert_series_equal(s, Series(1)) + del s[0] + tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64"))) + + def test_delitem_object_index(self): + # Index(dtype=object) + s = Series(1, index=["a"]) + del s["a"] + tm.assert_series_equal( + s, Series(dtype="int64", index=Index([], dtype="object")) + ) + s["a"] = 1 + tm.assert_series_equal(s, Series(1, index=["a"])) + del s["a"] + tm.assert_series_equal( + s, Series(dtype="int64", index=Index([], dtype="object")) + ) + + def test_delitem_missing_key(self): + # empty + s = Series(dtype=object) + + with pytest.raises(KeyError, match=r"^0$"): + del s[0] + + def test_delitem_extension_dtype(self): + # GH#40386 + # DatetimeTZDtype + dti = date_range("2016-01-01", periods=3, tz="US/Pacific") + ser = Series(dti) + + expected = ser[[0, 2]] + del ser[1] + assert ser.dtype == dti.dtype + tm.assert_series_equal(ser, expected) + + # PeriodDtype + pi = dti.tz_localize(None).to_period("D") + ser = Series(pi) + + expected = ser[:2] + del ser[2] + assert ser.dtype == pi.dtype + tm.assert_series_equal(ser, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_get.py b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_get.py new file mode 100644 index 0000000..e8034bd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_get.py @@ -0,0 +1,214 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Series +import pandas._testing as tm +from pandas.core.api import Float64Index + + +def test_get(): + # GH 6383 + s = Series( + np.array( + [ + 43, + 48, + 60, + 48, + 50, + 51, + 50, + 45, + 57, + 48, + 56, + 45, + 51, + 39, + 55, + 43, + 54, + 52, + 51, + 54, + ] + ) + ) + + result = s.get(25, 0) + expected = 0 + assert result == expected + + s = Series( + np.array( + [ + 43, + 48, + 60, + 48, + 50, + 51, + 50, + 45, + 57, + 48, + 56, + 45, + 51, + 39, + 55, + 43, + 54, + 52, + 51, + 54, + ] + ), + index=Float64Index( + [ + 25.0, + 36.0, + 49.0, + 64.0, + 81.0, + 100.0, + 121.0, + 144.0, + 169.0, + 196.0, + 1225.0, + 1296.0, + 1369.0, + 1444.0, + 1521.0, + 1600.0, + 1681.0, + 1764.0, + 1849.0, + 1936.0, + ] + ), + ) + + result = s.get(25, 0) + expected = 43 + assert result == expected + + # GH 7407 + # with a boolean accessor + df = pd.DataFrame({"i": [0] * 3, "b": [False] * 3}) + vc = df.i.value_counts() + result = vc.get(99, default="Missing") + assert result == "Missing" + + vc = df.b.value_counts() + result = vc.get(False, default="Missing") + assert result == 3 + + result = vc.get(True, default="Missing") + assert result == "Missing" + + +def test_get_nan(): + # GH 8569 + s = Float64Index(range(10)).to_series() + assert s.get(np.nan) is None + assert s.get(np.nan, default="Missing") == "Missing" + + +def test_get_nan_multiple(): + # GH 8569 + # ensure that fixing "test_get_nan" above hasn't broken get + # with multiple elements + s = Float64Index(range(10)).to_series() + + idx = [2, 30] + assert s.get(idx) is None + + idx = [2, np.nan] + assert s.get(idx) is None + + # GH 17295 - all missing keys + idx = [20, 30] + assert s.get(idx) is None + + idx = [np.nan, np.nan] + assert s.get(idx) is None + + +def test_get_with_default(): + # GH#7725 + d0 = ["a", "b", "c", "d"] + d1 = np.arange(4, dtype="int64") + others = ["e", 10] + + for data, index in ((d0, d1), (d1, d0)): + s = Series(data, index=index) + for i, d in zip(index, data): + assert s.get(i) == d + assert s.get(i, d) == d + assert s.get(i, "z") == d + for other in others: + assert s.get(other, "z") == "z" + assert s.get(other, other) == other + + +@pytest.mark.parametrize( + "arr", + [np.random.randn(10), tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern")], +) +def test_get_with_ea(arr): + # GH#21260 + ser = Series(arr, index=[2 * i for i in range(len(arr))]) + assert ser.get(4) == ser.iloc[2] + + result = ser.get([4, 6]) + expected = ser.iloc[[2, 3]] + tm.assert_series_equal(result, expected) + + result = ser.get(slice(2)) + expected = ser.iloc[[0, 1]] + tm.assert_series_equal(result, expected) + + assert ser.get(-1) is None + assert ser.get(ser.index.max() + 1) is None + + ser = Series(arr[:6], index=list("abcdef")) + assert ser.get("c") == ser.iloc[2] + + result = ser.get(slice("b", "d")) + expected = ser.iloc[[1, 2, 3]] + tm.assert_series_equal(result, expected) + + result = ser.get("Z") + assert result is None + + assert ser.get(4) == ser.iloc[4] + assert ser.get(-1) == ser.iloc[-1] + assert ser.get(len(ser)) is None + + # GH#21257 + ser = Series(arr) + ser2 = ser[::2] + assert ser2.get(1) is None + + +def test_getitem_get(string_series, object_series): + for obj in [string_series, object_series]: + idx = obj.index[5] + + assert obj[idx] == obj.get(idx) + assert obj[idx] == obj[5] + + assert string_series.get(-1) == string_series.get(string_series.index[-1]) + assert string_series[5] == string_series.get(string_series.index[5]) + + +def test_get_none(): + # GH#5652 + s1 = Series(dtype=object) + s2 = Series(dtype=object, index=list("abc")) + for s in [s1, s2]: + result = s.get(None) + assert result is None diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_getitem.py b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_getitem.py new file mode 100644 index 0000000..0da376c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_getitem.py @@ -0,0 +1,714 @@ +""" +Series.__getitem__ test classes are organized by the type of key passed. +""" +from datetime import ( + date, + datetime, + time, +) + +import numpy as np +import pytest + +from pandas._libs.tslibs import ( + conversion, + timezones, +) + +from pandas.core.dtypes.common import is_scalar + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + Series, + Timestamp, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.indexing import IndexingError + +from pandas.tseries.offsets import BDay + + +class TestSeriesGetitemScalars: + def test_getitem_object_index_float_string(self): + # GH#17286 + ser = Series([1] * 4, index=Index(["a", "b", "c", 1.0])) + assert ser["a"] == 1 + assert ser[1.0] == 1 + + def test_getitem_float_keys_tuple_values(self): + # see GH#13509 + + # unique Index + ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name="foo") + result = ser[0.0] + assert result == (1, 1) + + # non-unique Index + expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name="foo") + ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name="foo") + + result = ser[0.0] + tm.assert_series_equal(result, expected) + + def test_getitem_unrecognized_scalar(self): + # GH#32684 a scalar key that is not recognized by lib.is_scalar + + # a series that might be produced via `frame.dtypes` + ser = Series([1, 2], index=[np.dtype("O"), np.dtype("i8")]) + + key = ser.index[1] + + result = ser[key] + assert result == 2 + + def test_getitem_negative_out_of_bounds(self): + ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10)) + + msg = "index -11 is out of bounds for axis 0 with size 10" + with pytest.raises(IndexError, match=msg): + ser[-11] + + def test_getitem_out_of_bounds_indexerror(self, datetime_series): + # don't segfault, GH#495 + msg = r"index \d+ is out of bounds for axis 0 with size \d+" + with pytest.raises(IndexError, match=msg): + datetime_series[len(datetime_series)] + + def test_getitem_out_of_bounds_empty_rangeindex_keyerror(self): + # GH#917 + # With a RangeIndex, an int key gives a KeyError + ser = Series([], dtype=object) + with pytest.raises(KeyError, match="-1"): + ser[-1] + + def test_getitem_keyerror_with_int64index(self): + ser = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2]) + + with pytest.raises(KeyError, match=r"^5$"): + ser[5] + + with pytest.raises(KeyError, match=r"^'c'$"): + ser["c"] + + # not monotonic + ser = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1]) + + with pytest.raises(KeyError, match=r"^5$"): + ser[5] + + with pytest.raises(KeyError, match=r"^'c'$"): + ser["c"] + + def test_getitem_int64(self, datetime_series): + idx = np.int64(5) + assert datetime_series[idx] == datetime_series[5] + + def test_getitem_full_range(self): + # github.com/pandas-dev/pandas/commit/4f433773141d2eb384325714a2776bcc5b2e20f7 + ser = Series(range(5), index=list(range(5))) + result = ser[list(range(5))] + tm.assert_series_equal(result, ser) + + # ------------------------------------------------------------------ + # Series with DatetimeIndex + + @pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"]) + def test_getitem_pydatetime_tz(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + + index = date_range( + start="2012-12-24 16:00", end="2012-12-24 18:00", freq="H", tz=tzstr + ) + ts = Series(index=index, data=index.hour) + time_pandas = Timestamp("2012-12-24 17:00", tz=tzstr) + + dt = datetime(2012, 12, 24, 17, 0) + time_datetime = conversion.localize_pydatetime(dt, tz) + assert ts[time_pandas] == ts[time_datetime] + + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_string_index_alias_tz_aware(self, tz): + rng = date_range("1/1/2000", periods=10, tz=tz) + ser = Series(np.random.randn(len(rng)), index=rng) + + result = ser["1/3/2000"] + tm.assert_almost_equal(result, ser[2]) + + def test_getitem_time_object(self): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = Series(np.random.randn(len(rng)), index=rng) + + mask = (rng.hour == 9) & (rng.minute == 30) + result = ts[time(9, 30)] + expected = ts[mask] + result.index = result.index._with_freq(None) + tm.assert_series_equal(result, expected) + + # ------------------------------------------------------------------ + # Series with CategoricalIndex + + def test_getitem_scalar_categorical_index(self): + cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")]) + + ser = Series([1, 2], index=cats) + + expected = ser.iloc[0] + result = ser[cats[0]] + assert result == expected + + def test_getitem_numeric_categorical_listlike_matches_scalar(self): + # GH#15470 + ser = Series(["a", "b", "c"], index=pd.CategoricalIndex([2, 1, 0])) + + # 0 is treated as a label + assert ser[0] == "c" + + # the listlike analogue should also be treated as labels + res = ser[[0]] + expected = ser.iloc[-1:] + tm.assert_series_equal(res, expected) + + res2 = ser[[0, 1, 2]] + tm.assert_series_equal(res2, ser.iloc[::-1]) + + def test_getitem_integer_categorical_not_positional(self): + # GH#14865 + ser = Series(["a", "b", "c"], index=Index([1, 2, 3], dtype="category")) + assert ser.get(3) == "c" + assert ser[3] == "c" + + def test_getitem_str_with_timedeltaindex(self): + rng = timedelta_range("1 day 10:11:12", freq="h", periods=500) + ser = Series(np.arange(len(rng)), index=rng) + + key = "6 days, 23:11:12" + indexer = rng.get_loc(key) + assert indexer == 133 + + result = ser[key] + assert result == ser.iloc[133] + + msg = r"^Timedelta\('50 days 00:00:00'\)$" + with pytest.raises(KeyError, match=msg): + rng.get_loc("50 days") + with pytest.raises(KeyError, match=msg): + ser["50 days"] + + +class TestSeriesGetitemSlices: + def test_getitem_partial_str_slice_with_datetimeindex(self): + # GH#34860 + arr = date_range("1/1/2008", "1/1/2009") + ser = arr.to_series() + result = ser["2008"] + + rng = date_range(start="2008-01-01", end="2008-12-31") + expected = Series(rng, index=rng) + + tm.assert_series_equal(result, expected) + + def test_getitem_slice_strings_with_datetimeindex(self): + idx = DatetimeIndex( + ["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"] + ) + + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts["1/2/2000":] + expected = ts[1:] + tm.assert_series_equal(result, expected) + + result = ts["1/2/2000":"1/3/2000"] + expected = ts[1:4] + tm.assert_series_equal(result, expected) + + def test_getitem_partial_str_slice_with_timedeltaindex(self): + rng = timedelta_range("1 day 10:11:12", freq="h", periods=500) + ser = Series(np.arange(len(rng)), index=rng) + + result = ser["5 day":"6 day"] + expected = ser.iloc[86:134] + tm.assert_series_equal(result, expected) + + result = ser["5 day":] + expected = ser.iloc[86:] + tm.assert_series_equal(result, expected) + + result = ser[:"6 day"] + expected = ser.iloc[:134] + tm.assert_series_equal(result, expected) + + def test_getitem_partial_str_slice_high_reso_with_timedeltaindex(self): + # higher reso + rng = timedelta_range("1 day 10:11:12", freq="us", periods=2000) + ser = Series(np.arange(len(rng)), index=rng) + + result = ser["1 day 10:11:12":] + expected = ser.iloc[0:] + tm.assert_series_equal(result, expected) + + result = ser["1 day 10:11:12.001":] + expected = ser.iloc[1000:] + tm.assert_series_equal(result, expected) + + result = ser["1 days, 10:11:12.001001"] + assert result == ser.iloc[1001] + + def test_getitem_slice_2d(self, datetime_series): + # GH#30588 multi-dimensional indexing deprecated + + with tm.assert_produces_warning( + FutureWarning, match="Support for multi-dimensional indexing" + ): + # GH#30867 Don't want to support this long-term, but + # for now ensure that the warning from Index + # doesn't comes through via Series.__getitem__. + result = datetime_series[:, np.newaxis] + expected = datetime_series.values[:, np.newaxis] + tm.assert_almost_equal(result, expected) + + # FutureWarning from NumPy. + @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") + def test_getitem_median_slice_bug(self): + index = date_range("20090415", "20090519", freq="2B") + s = Series(np.random.randn(13), index=index) + + indexer = [slice(6, 7, None)] + with tm.assert_produces_warning(FutureWarning): + # GH#31299 + result = s[indexer] + expected = s[indexer[0]] + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "slc, positions", + [ + [slice(date(2018, 1, 1), None), [0, 1, 2]], + [slice(date(2019, 1, 2), None), [2]], + [slice(date(2020, 1, 1), None), []], + [slice(None, date(2020, 1, 1)), [0, 1, 2]], + [slice(None, date(2019, 1, 1)), [0]], + ], + ) + def test_getitem_slice_date(self, slc, positions): + # https://github.com/pandas-dev/pandas/issues/31501 + ser = Series( + [0, 1, 2], + DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]), + ) + result = ser[slc] + expected = ser.take(positions) + tm.assert_series_equal(result, expected) + + def test_getitem_slice_float_raises(self, datetime_series): + msg = ( + "cannot do slice indexing on DatetimeIndex with these indexers " + r"\[{key}\] of type float" + ) + with pytest.raises(TypeError, match=msg.format(key=r"4\.0")): + datetime_series[4.0:10.0] + + with pytest.raises(TypeError, match=msg.format(key=r"4\.5")): + datetime_series[4.5:10.0] + + def test_getitem_slice_bug(self): + ser = Series(range(10), index=list(range(10))) + result = ser[-12:] + tm.assert_series_equal(result, ser) + + result = ser[-7:] + tm.assert_series_equal(result, ser[3:]) + + result = ser[:-12] + tm.assert_series_equal(result, ser[:0]) + + def test_getitem_slice_integers(self): + ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16]) + + result = ser[:4] + expected = Series(ser.values[:4], index=[2, 4, 6, 8]) + tm.assert_series_equal(result, expected) + + +class TestSeriesGetitemListLike: + @pytest.mark.parametrize("box", [list, np.array, Index, Series]) + def test_getitem_no_matches(self, box): + # GH#33462 we expect the same behavior for list/ndarray/Index/Series + ser = Series(["A", "B"]) + + key = Series(["C"], dtype=object) + key = box(key) + + msg = r"None of \[Index\(\['C'\], dtype='object'\)\] are in the \[index\]" + with pytest.raises(KeyError, match=msg): + ser[key] + + def test_getitem_intlist_intindex_periodvalues(self): + ser = Series(period_range("2000-01-01", periods=10, freq="D")) + + result = ser[[2, 4]] + exp = Series( + [pd.Period("2000-01-03", freq="D"), pd.Period("2000-01-05", freq="D")], + index=[2, 4], + dtype="Period[D]", + ) + tm.assert_series_equal(result, exp) + assert result.dtype == "Period[D]" + + @pytest.mark.parametrize("box", [list, np.array, Index]) + def test_getitem_intlist_intervalindex_non_int(self, box): + # GH#33404 fall back to positional since ints are unambiguous + dti = date_range("2000-01-03", periods=3)._with_freq(None) + ii = pd.IntervalIndex.from_breaks(dti) + ser = Series(range(len(ii)), index=ii) + + expected = ser.iloc[:1] + key = box([0]) + result = ser[key] + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("box", [list, np.array, Index]) + @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64]) + def test_getitem_intlist_multiindex_numeric_level(self, dtype, box): + # GH#33404 do _not_ fall back to positional since ints are ambiguous + idx = Index(range(4)).astype(dtype) + dti = date_range("2000-01-03", periods=3) + mi = pd.MultiIndex.from_product([idx, dti]) + ser = Series(range(len(mi))[::-1], index=mi) + + key = box([5]) + with pytest.raises(KeyError, match="5"): + ser[key] + + def test_getitem_uint_array_key(self, any_unsigned_int_numpy_dtype): + # GH #37218 + ser = Series([1, 2, 3]) + key = np.array([4], dtype=any_unsigned_int_numpy_dtype) + + with pytest.raises(KeyError, match="4"): + ser[key] + with pytest.raises(KeyError, match="4"): + ser.loc[key] + + +class TestGetitemBooleanMask: + def test_getitem_boolean(self, string_series): + ser = string_series + mask = ser > ser.median() + + # passing list is OK + result = ser[list(mask)] + expected = ser[mask] + tm.assert_series_equal(result, expected) + tm.assert_index_equal(result.index, ser.index[mask]) + + def test_getitem_boolean_empty(self): + ser = Series([], dtype=np.int64) + ser.index.name = "index_name" + ser = ser[ser.isna()] + assert ser.index.name == "index_name" + assert ser.dtype == np.int64 + + # GH#5877 + # indexing with empty series + ser = Series(["A", "B"]) + expected = Series(dtype=object, index=Index([], dtype="int64")) + result = ser[Series([], dtype=object)] + tm.assert_series_equal(result, expected) + + # invalid because of the boolean indexer + # that's empty or not-aligned + msg = ( + r"Unalignable boolean Series provided as indexer \(index of " + r"the boolean Series and of the indexed object do not match" + ) + with pytest.raises(IndexingError, match=msg): + ser[Series([], dtype=bool)] + + with pytest.raises(IndexingError, match=msg): + ser[Series([True], dtype=bool)] + + def test_getitem_boolean_object(self, string_series): + # using column from DataFrame + + ser = string_series + mask = ser > ser.median() + omask = mask.astype(object) + + # getitem + result = ser[omask] + expected = ser[mask] + tm.assert_series_equal(result, expected) + + # setitem + s2 = ser.copy() + cop = ser.copy() + cop[omask] = 5 + s2[mask] = 5 + tm.assert_series_equal(cop, s2) + + # nans raise exception + omask[5:10] = np.nan + msg = "Cannot mask with non-boolean array containing NA / NaN values" + with pytest.raises(ValueError, match=msg): + ser[omask] + with pytest.raises(ValueError, match=msg): + ser[omask] = 5 + + def test_getitem_boolean_dt64_copies(self): + # GH#36210 + dti = date_range("2016-01-01", periods=4, tz="US/Pacific") + key = np.array([True, True, False, False]) + + ser = Series(dti._data) + + res = ser[key] + assert res._values._data.base is None + + # compare with numeric case for reference + ser2 = Series(range(4)) + res2 = ser2[key] + assert res2._values.base is None + + def test_getitem_boolean_corner(self, datetime_series): + ts = datetime_series + mask_shifted = ts.shift(1, freq=BDay()) > ts.median() + + msg = ( + r"Unalignable boolean Series provided as indexer \(index of " + r"the boolean Series and of the indexed object do not match" + ) + with pytest.raises(IndexingError, match=msg): + ts[mask_shifted] + + with pytest.raises(IndexingError, match=msg): + ts.loc[mask_shifted] + + def test_getitem_boolean_different_order(self, string_series): + ordered = string_series.sort_values() + + sel = string_series[ordered > 0] + exp = string_series[string_series > 0] + tm.assert_series_equal(sel, exp) + + def test_getitem_boolean_contiguous_preserve_freq(self): + rng = date_range("1/1/2000", "3/1/2000", freq="B") + + mask = np.zeros(len(rng), dtype=bool) + mask[10:20] = True + + masked = rng[mask] + expected = rng[10:20] + assert expected.freq == rng.freq + tm.assert_index_equal(masked, expected) + + mask[22] = True + masked = rng[mask] + assert masked.freq is None + + +class TestGetitemCallable: + def test_getitem_callable(self): + # GH#12533 + ser = Series(4, index=list("ABCD")) + result = ser[lambda x: "A"] + assert result == ser.loc["A"] + + result = ser[lambda x: ["A", "B"]] + expected = ser.loc[["A", "B"]] + tm.assert_series_equal(result, expected) + + result = ser[lambda x: [True, False, True, True]] + expected = ser.iloc[[0, 2, 3]] + tm.assert_series_equal(result, expected) + + +def test_getitem_generator(string_series): + gen = (x > 0 for x in string_series) + result = string_series[gen] + result2 = string_series[iter(string_series > 0)] + expected = string_series[string_series > 0] + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + +@pytest.mark.parametrize( + "series", + [ + Series([0, 1]), + Series(date_range("2012-01-01", periods=2)), + Series(date_range("2012-01-01", periods=2, tz="CET")), + ], +) +def test_getitem_ndim_deprecated(series): + with tm.assert_produces_warning( + FutureWarning, + match="Support for multi-dimensional indexing", + ): + result = series[:, None] + + expected = np.asarray(series)[:, None] + tm.assert_numpy_array_equal(result, expected) + + +def test_getitem_multilevel_scalar_slice_not_implemented( + multiindex_year_month_day_dataframe_random_data, +): + # not implementing this for now + df = multiindex_year_month_day_dataframe_random_data + ser = df["A"] + + msg = r"\(2000, slice\(3, 4, None\)\)" + with pytest.raises(TypeError, match=msg): + ser[2000, 3:4] + + +def test_getitem_dataframe_raises(): + rng = list(range(10)) + ser = Series(10, index=rng) + df = DataFrame(rng, index=rng) + msg = ( + "Indexing a Series with DataFrame is not supported, " + "use the appropriate DataFrame column" + ) + with pytest.raises(TypeError, match=msg): + ser[df > 5] + + +def test_getitem_assignment_series_aligment(): + # https://github.com/pandas-dev/pandas/issues/37427 + # with getitem, when assigning with a Series, it is not first aligned + ser = Series(range(10)) + idx = np.array([2, 4, 9]) + ser[idx] = Series([10, 11, 12]) + expected = Series([0, 1, 10, 3, 11, 5, 6, 7, 8, 12]) + tm.assert_series_equal(ser, expected) + + +def test_getitem_duplicate_index_mistyped_key_raises_keyerror(): + # GH#29189 float_index.get_loc(None) should raise KeyError, not TypeError + ser = Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0]) + with pytest.raises(KeyError, match="None"): + ser[None] + + with pytest.raises(KeyError, match="None"): + ser.index.get_loc(None) + + with pytest.raises(KeyError, match="None"): + ser.index._engine.get_loc(None) + + +def test_getitem_1tuple_slice_without_multiindex(): + ser = Series(range(5)) + key = (slice(3),) + + result = ser[key] + expected = ser[key[0]] + tm.assert_series_equal(result, expected) + + +def test_getitem_preserve_name(datetime_series): + result = datetime_series[datetime_series > 0] + assert result.name == datetime_series.name + + result = datetime_series[[0, 2, 4]] + assert result.name == datetime_series.name + + result = datetime_series[5:10] + assert result.name == datetime_series.name + + +def test_getitem_with_integer_labels(): + # integer indexes, be careful + ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) + inds = [0, 2, 5, 7, 8] + arr_inds = np.array([0, 2, 5, 7, 8]) + with pytest.raises(KeyError, match="not in index"): + ser[inds] + + with pytest.raises(KeyError, match="not in index"): + ser[arr_inds] + + +def test_getitem_missing(datetime_series): + # missing + d = datetime_series.index[0] - BDay() + msg = r"Timestamp\('1999-12-31 00:00:00', freq='B'\)" + with pytest.raises(KeyError, match=msg): + datetime_series[d] + + +def test_getitem_fancy(string_series, object_series): + slice1 = string_series[[1, 2, 3]] + slice2 = object_series[[1, 2, 3]] + assert string_series.index[2] == slice1.index[1] + assert object_series.index[2] == slice2.index[1] + assert string_series[2] == slice1[1] + assert object_series[2] == slice2[1] + + +def test_getitem_box_float64(datetime_series): + value = datetime_series[5] + assert isinstance(value, np.float64) + + +def test_getitem_unordered_dup(): + obj = Series(range(5), index=["c", "a", "a", "b", "b"]) + assert is_scalar(obj["c"]) + assert obj["c"] == 0 + + +def test_getitem_dups(): + ser = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64) + expected = Series([3, 4], index=["C", "C"], dtype=np.int64) + result = ser["C"] + tm.assert_series_equal(result, expected) + + +def test_getitem_categorical_str(): + # GH#31765 + ser = Series(range(5), index=Categorical(["a", "b", "c", "a", "b"])) + result = ser["a"] + expected = ser.iloc[[0, 3]] + tm.assert_series_equal(result, expected) + + # Check the intermediate steps work as expected + with tm.assert_produces_warning(FutureWarning): + result = ser.index.get_value(ser, "a") + tm.assert_series_equal(result, expected) + + +def test_slice_can_reorder_not_uniquely_indexed(): + ser = Series(1, index=["a", "a", "b", "b", "c"]) + ser[::-1] # it works! + + +@pytest.mark.parametrize("index_vals", ["aabcd", "aadcb"]) +def test_duplicated_index_getitem_positional_indexer(index_vals): + # GH 11747 + s = Series(range(5), index=list(index_vals)) + result = s[3] + assert result == 3 + + +class TestGetitemDeprecatedIndexers: + @pytest.mark.parametrize("key", [{1}, {1: 1}]) + def test_getitem_dict_and_set_deprecated(self, key): + # GH#42825 + ser = Series([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + ser[key] + + @pytest.mark.parametrize("key", [{1}, {1: 1}]) + def test_setitem_dict_and_set_deprecated(self, key): + # GH#42825 + ser = Series([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + ser[key] = 1 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_indexing.py b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_indexing.py new file mode 100644 index 0000000..25fc93a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_indexing.py @@ -0,0 +1,360 @@ +""" test get/set & misc """ +from datetime import timedelta +import re + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + IndexSlice, + MultiIndex, + Series, + Timedelta, + Timestamp, + concat, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm + + +def test_basic_indexing(): + s = Series(np.random.randn(5), index=["a", "b", "a", "a", "b"]) + + msg = "index 5 is out of bounds for axis 0 with size 5" + with pytest.raises(IndexError, match=msg): + s[5] + with pytest.raises(IndexError, match=msg): + s[5] = 0 + + with pytest.raises(KeyError, match=r"^'c'$"): + s["c"] + + s = s.sort_index() + + with pytest.raises(IndexError, match=msg): + s[5] + msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$" + with pytest.raises(IndexError, match=msg): + s[5] = 0 + + +def test_basic_getitem_with_labels(datetime_series): + indices = datetime_series.index[[5, 10, 15]] + + result = datetime_series[indices] + expected = datetime_series.reindex(indices) + tm.assert_series_equal(result, expected) + + result = datetime_series[indices[0] : indices[2]] + expected = datetime_series.loc[indices[0] : indices[2]] + tm.assert_series_equal(result, expected) + + +def test_basic_getitem_dt64tz_values(): + + # GH12089 + # with tz for values + ser = Series( + date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"] + ) + expected = Timestamp("2011-01-01", tz="US/Eastern") + result = ser.loc["a"] + assert result == expected + result = ser.iloc[0] + assert result == expected + result = ser["a"] + assert result == expected + + +def test_getitem_setitem_ellipsis(): + s = Series(np.random.randn(10)) + + np.fix(s) + + result = s[...] + tm.assert_series_equal(result, s) + + s[...] = 5 + assert (result == 5).all() + + +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") +@pytest.mark.parametrize( + "result_1, duplicate_item, expected_1", + [ + [ + Series({1: 12, 2: [1, 2, 2, 3]}), + Series({1: 313}), + Series({1: 12}, dtype=object), + ], + [ + Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}), + Series({1: [1, 2, 3]}), + Series({1: [1, 2, 3]}), + ], + ], +) +def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1): + # GH 17610 + result = result_1.append(duplicate_item) + expected = expected_1.append(duplicate_item) + tm.assert_series_equal(result[1], expected) + assert result[2] == result_1[2] + + +def test_getitem_setitem_integers(): + # caused bug without test + s = Series([1, 2, 3], ["a", "b", "c"]) + + assert s.iloc[0] == s["a"] + s.iloc[0] = 5 + tm.assert_almost_equal(s["a"], 5) + + +def test_series_box_timestamp(): + rng = date_range("20090415", "20090519", freq="B") + ser = Series(rng) + assert isinstance(ser[0], Timestamp) + assert isinstance(ser.at[1], Timestamp) + assert isinstance(ser.iat[2], Timestamp) + assert isinstance(ser.loc[3], Timestamp) + assert isinstance(ser.iloc[4], Timestamp) + + ser = Series(rng, index=rng) + assert isinstance(ser[0], Timestamp) + assert isinstance(ser.at[rng[1]], Timestamp) + assert isinstance(ser.iat[2], Timestamp) + assert isinstance(ser.loc[rng[3]], Timestamp) + assert isinstance(ser.iloc[4], Timestamp) + + +def test_series_box_timedelta(): + rng = timedelta_range("1 day 1 s", periods=5, freq="h") + ser = Series(rng) + assert isinstance(ser[0], Timedelta) + assert isinstance(ser.at[1], Timedelta) + assert isinstance(ser.iat[2], Timedelta) + assert isinstance(ser.loc[3], Timedelta) + assert isinstance(ser.iloc[4], Timedelta) + + +def test_getitem_ambiguous_keyerror(indexer_sl): + ser = Series(range(10), index=list(range(0, 20, 2))) + with pytest.raises(KeyError, match=r"^1$"): + indexer_sl(ser)[1] + + +def test_getitem_dups_with_missing(indexer_sl): + # breaks reindex, so need to use .loc internally + # GH 4246 + ser = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"]) + with pytest.raises(KeyError, match=re.escape("['bam'] not in index")): + indexer_sl(ser)[["foo", "bar", "bah", "bam"]] + + +def test_setitem_ambiguous_keyerror(indexer_sl): + s = Series(range(10), index=list(range(0, 20, 2))) + + # equivalent of an append + s2 = s.copy() + indexer_sl(s2)[1] = 5 + expected = concat([s, Series([5], index=[1])]) + tm.assert_series_equal(s2, expected) + + +def test_setitem(datetime_series, string_series): + datetime_series[datetime_series.index[5]] = np.NaN + datetime_series[[1, 2, 17]] = np.NaN + datetime_series[6] = np.NaN + assert np.isnan(datetime_series[6]) + assert np.isnan(datetime_series[2]) + datetime_series[np.isnan(datetime_series)] = 5 + assert not np.isnan(datetime_series[2]) + + +def test_setslice(datetime_series): + sl = datetime_series[5:20] + assert len(sl) == len(sl.index) + assert sl.index.is_unique is True + + +# FutureWarning from NumPy about [slice(None, 5). +@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") +def test_basic_getitem_setitem_corner(datetime_series): + # invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2] + msg = "key of type tuple not found and not a MultiIndex" + with pytest.raises(KeyError, match=msg): + datetime_series[:, 2] + with pytest.raises(KeyError, match=msg): + datetime_series[:, 2] = 2 + + # weird lists. [slice(0, 5)] will work but not two slices + with tm.assert_produces_warning(FutureWarning): + # GH#31299 + result = datetime_series[[slice(None, 5)]] + expected = datetime_series[:5] + tm.assert_series_equal(result, expected) + + # OK + msg = r"unhashable type(: 'slice')?" + with pytest.raises(TypeError, match=msg): + datetime_series[[5, slice(None, None)]] + with pytest.raises(TypeError, match=msg): + datetime_series[[5, slice(None, None)]] = 2 + + +def test_slice(string_series, object_series): + numSlice = string_series[10:20] + numSliceEnd = string_series[-10:] + objSlice = object_series[10:20] + + assert string_series.index[9] not in numSlice.index + assert object_series.index[9] not in objSlice.index + + assert len(numSlice) == len(numSlice.index) + assert string_series[numSlice.index[0]] == numSlice[numSlice.index[0]] + + assert numSlice.index[1] == string_series.index[11] + assert tm.equalContents(numSliceEnd, np.array(string_series)[-10:]) + + # Test return view. + sl = string_series[10:20] + sl[:] = 0 + + assert (string_series[10:20] == 0).all() + + +def test_timedelta_assignment(): + # GH 8209 + s = Series([], dtype=object) + s.loc["B"] = timedelta(1) + tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"])) + + s = s.reindex(s.index.insert(0, "A")) + tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"])) + + s.loc["A"] = timedelta(1) + expected = Series(Timedelta("1 days"), index=["A", "B"]) + tm.assert_series_equal(s, expected) + + +def test_underlying_data_conversion(): + # GH 4080 + df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]}) + return_value = df.set_index(["a", "b", "c"], inplace=True) + assert return_value is None + s = Series([1], index=[(2, 2, 2)]) + df["val"] = 0 + df + df["val"].update(s) + + expected = DataFrame( + {"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3], "val": [0, 1, 0]} + ) + return_value = expected.set_index(["a", "b", "c"], inplace=True) + assert return_value is None + tm.assert_frame_equal(df, expected) + + +def test_preserve_refs(datetime_series): + seq = datetime_series[[5, 10, 15]] + seq[1] = np.NaN + assert not np.isnan(datetime_series[10]) + + +def test_multilevel_preserve_name(lexsorted_two_level_string_multiindex, indexer_sl): + index = lexsorted_two_level_string_multiindex + ser = Series(np.random.randn(len(index)), index=index, name="sth") + + result = indexer_sl(ser)["foo"] + assert result.name == ser.name + + +""" +miscellaneous methods +""" + + +@pytest.mark.parametrize( + "index", + [ + date_range("2014-01-01", periods=20, freq="MS"), + period_range("2014-01", periods=20, freq="M"), + timedelta_range("0", periods=20, freq="H"), + ], +) +def test_slice_with_negative_step(index): + keystr1 = str(index[9]) + keystr2 = str(index[13]) + + ser = Series(np.arange(20), index) + SLC = IndexSlice + + for key in [keystr1, index[9]]: + tm.assert_indexing_slices_equivalent(ser, SLC[key::-1], SLC[9::-1]) + tm.assert_indexing_slices_equivalent(ser, SLC[:key:-1], SLC[:8:-1]) + + for key2 in [keystr2, index[13]]: + tm.assert_indexing_slices_equivalent(ser, SLC[key2:key:-1], SLC[13:8:-1]) + tm.assert_indexing_slices_equivalent(ser, SLC[key:key2:-1], SLC[0:0:-1]) + + +def test_tuple_index(): + # GH 35534 - Selecting values when a Series has an Index of tuples + s = Series([1, 2], index=[("a",), ("b",)]) + assert s[("a",)] == 1 + assert s[("b",)] == 2 + s[("b",)] = 3 + assert s[("b",)] == 3 + + +def test_frozenset_index(): + # GH35747 - Selecting values when a Series has an Index of frozenset + idx0, idx1 = frozenset("a"), frozenset("b") + s = Series([1, 2], index=[idx0, idx1]) + assert s[idx0] == 1 + assert s[idx1] == 2 + s[idx1] = 3 + assert s[idx1] == 3 + + +def test_loc_setitem_all_false_indexer(): + # GH#45778 + ser = Series([1, 2], index=["a", "b"]) + expected = ser.copy() + rhs = Series([6, 7], index=["a", "b"]) + ser.loc[ser > 100] = rhs + tm.assert_series_equal(ser, expected) + + +class TestDepreactedIndexers: + @pytest.mark.parametrize("key", [{1}, {1: 1}]) + def test_getitem_dict_and_set_deprecated(self, key): + # GH#42825 + ser = Series([1, 2]) + with tm.assert_produces_warning(FutureWarning): + ser.loc[key] + + @pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)]) + def test_getitem_dict_and_set_deprecated_multiindex(self, key): + # GH#42825 + ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)])) + with tm.assert_produces_warning(FutureWarning): + ser.loc[key] + + @pytest.mark.parametrize("key", [{1}, {1: 1}]) + def test_setitem_dict_and_set_deprecated(self, key): + # GH#42825 + ser = Series([1, 2]) + with tm.assert_produces_warning(FutureWarning): + ser.loc[key] = 1 + + @pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)]) + def test_setitem_dict_and_set_deprecated_multiindex(self, key): + # GH#42825 + ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)])) + with tm.assert_produces_warning(FutureWarning): + ser.loc[key] = 1 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_mask.py b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_mask.py new file mode 100644 index 0000000..28235a8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_mask.py @@ -0,0 +1,69 @@ +import numpy as np +import pytest + +from pandas import Series +import pandas._testing as tm + + +def test_mask(): + # compare with tested results in test_where + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.where(~cond, np.nan) + tm.assert_series_equal(rs, s.mask(cond)) + + rs = s.where(~cond) + rs2 = s.mask(cond) + tm.assert_series_equal(rs, rs2) + + rs = s.where(~cond, -s) + rs2 = s.mask(cond, -s) + tm.assert_series_equal(rs, rs2) + + cond = Series([True, False, False, True, False], index=s.index) + s2 = -(s.abs()) + rs = s2.where(~cond[:3]) + rs2 = s2.mask(cond[:3]) + tm.assert_series_equal(rs, rs2) + + rs = s2.where(~cond[:3], -s2) + rs2 = s2.mask(cond[:3], -s2) + tm.assert_series_equal(rs, rs2) + + msg = "Array conditional must be same shape as self" + with pytest.raises(ValueError, match=msg): + s.mask(1) + with pytest.raises(ValueError, match=msg): + s.mask(cond[:3].values, -s) + + +def test_mask_casts(): + # dtype changes + ser = Series([1, 2, 3, 4]) + result = ser.mask(ser > 2, np.nan) + expected = Series([1, 2, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + +def test_mask_casts2(): + # see gh-21891 + ser = Series([1, 2]) + res = ser.mask([True, False]) + + exp = Series([np.nan, 2]) + tm.assert_series_equal(res, exp) + + +def test_mask_inplace(): + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.copy() + rs.mask(cond, inplace=True) + tm.assert_series_equal(rs.dropna(), s[~cond]) + tm.assert_series_equal(rs, s.mask(cond)) + + rs = s.copy() + rs.mask(cond, -s, inplace=True) + tm.assert_series_equal(rs, s.mask(cond, -s)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_set_value.py b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_set_value.py new file mode 100644 index 0000000..cbe1a8b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_set_value.py @@ -0,0 +1,45 @@ +from datetime import datetime + +import numpy as np + +from pandas import ( + DatetimeIndex, + Series, +) +import pandas._testing as tm + + +def test_series_set_value(): + # GH#1561 + + dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] + index = DatetimeIndex(dates) + + s = Series(dtype=object) + s._set_value(dates[0], 1.0) + s._set_value(dates[1], np.nan) + + expected = Series([1.0, np.nan], index=index) + + tm.assert_series_equal(s, expected) + + +def test_set_value_dt64(datetime_series): + idx = datetime_series.index[10] + res = datetime_series._set_value(idx, 0) + assert res is None + assert datetime_series[idx] == 0 + + +def test_set_value_str_index(string_series): + # equiv + ser = string_series.copy() + res = ser._set_value("foobar", 0) + assert res is None + assert ser.index[-1] == "foobar" + assert ser["foobar"] == 0 + + ser2 = string_series.copy() + ser2.loc["foobar"] = 0 + assert ser2.index[-1] == "foobar" + assert ser2["foobar"] == 0 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_setitem.py b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_setitem.py new file mode 100644 index 0000000..fb07b28 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_setitem.py @@ -0,0 +1,1546 @@ +from datetime import ( + date, + datetime, +) + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_list_like + +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + Interval, + IntervalIndex, + MultiIndex, + NaT, + Series, + Timedelta, + Timestamp, + concat, + date_range, + period_range, +) +import pandas._testing as tm +from pandas.core.indexing import IndexingError + +from pandas.tseries.offsets import BDay + + +class TestSetitemDT64Values: + def test_setitem_none_nan(self): + series = Series(date_range("1/1/2000", periods=10)) + series[3] = None + assert series[3] is NaT + + series[3:5] = None + assert series[4] is NaT + + series[5] = np.nan + assert series[5] is NaT + + series[5:7] = np.nan + assert series[6] is NaT + + def test_setitem_multiindex_empty_slice(self): + # https://github.com/pandas-dev/pandas/issues/35878 + idx = MultiIndex.from_tuples([("a", 1), ("b", 2)]) + result = Series([1, 2], index=idx) + expected = result.copy() + result.loc[[]] = 0 + tm.assert_series_equal(result, expected) + + def test_setitem_with_string_index(self): + # GH#23451 + ser = Series([1, 2, 3], index=["Date", "b", "other"]) + ser["Date"] = date.today() + assert ser.Date == date.today() + assert ser["Date"] == date.today() + + def test_setitem_tuple_with_datetimetz_values(self): + # GH#20441 + arr = date_range("2017", periods=4, tz="US/Eastern") + index = [(0, 1), (0, 2), (0, 3), (0, 4)] + result = Series(arr, index=index) + expected = result.copy() + result[(0, 1)] = np.nan + expected.iloc[0] = np.nan + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tz", ["US/Eastern", "UTC", "Asia/Tokyo"]) + def test_setitem_with_tz(self, tz, indexer_sli): + orig = Series(date_range("2016-01-01", freq="H", periods=3, tz=tz)) + assert orig.dtype == f"datetime64[ns, {tz}]" + + exp = Series( + [ + Timestamp("2016-01-01 00:00", tz=tz), + Timestamp("2011-01-01 00:00", tz=tz), + Timestamp("2016-01-01 02:00", tz=tz), + ] + ) + + # scalar + ser = orig.copy() + indexer_sli(ser)[1] = Timestamp("2011-01-01", tz=tz) + tm.assert_series_equal(ser, exp) + + # vector + vals = Series( + [Timestamp("2011-01-01", tz=tz), Timestamp("2012-01-01", tz=tz)], + index=[1, 2], + ) + assert vals.dtype == f"datetime64[ns, {tz}]" + + exp = Series( + [ + Timestamp("2016-01-01 00:00", tz=tz), + Timestamp("2011-01-01 00:00", tz=tz), + Timestamp("2012-01-01 00:00", tz=tz), + ] + ) + + ser = orig.copy() + indexer_sli(ser)[[1, 2]] = vals + tm.assert_series_equal(ser, exp) + + def test_setitem_with_tz_dst(self, indexer_sli): + # GH#14146 trouble setting values near DST boundary + tz = "US/Eastern" + orig = Series(date_range("2016-11-06", freq="H", periods=3, tz=tz)) + assert orig.dtype == f"datetime64[ns, {tz}]" + + exp = Series( + [ + Timestamp("2016-11-06 00:00-04:00", tz=tz), + Timestamp("2011-01-01 00:00-05:00", tz=tz), + Timestamp("2016-11-06 01:00-05:00", tz=tz), + ] + ) + + # scalar + ser = orig.copy() + indexer_sli(ser)[1] = Timestamp("2011-01-01", tz=tz) + tm.assert_series_equal(ser, exp) + + # vector + vals = Series( + [Timestamp("2011-01-01", tz=tz), Timestamp("2012-01-01", tz=tz)], + index=[1, 2], + ) + assert vals.dtype == f"datetime64[ns, {tz}]" + + exp = Series( + [ + Timestamp("2016-11-06 00:00", tz=tz), + Timestamp("2011-01-01 00:00", tz=tz), + Timestamp("2012-01-01 00:00", tz=tz), + ] + ) + + ser = orig.copy() + indexer_sli(ser)[[1, 2]] = vals + tm.assert_series_equal(ser, exp) + + def test_object_series_setitem_dt64array_exact_match(self): + # make sure the dt64 isn't cast by numpy to integers + # https://github.com/numpy/numpy/issues/12550 + + ser = Series({"X": np.nan}, dtype=object) + + indexer = [True] + + # "exact_match" -> size of array being set matches size of ser + value = np.array([4], dtype="M8[ns]") + + ser.iloc[indexer] = value + + expected = Series([value[0]], index=["X"], dtype=object) + assert all(isinstance(x, np.datetime64) for x in expected.values) + + tm.assert_series_equal(ser, expected) + + +class TestSetitemScalarIndexer: + def test_setitem_negative_out_of_bounds(self): + ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10)) + + msg = "index -11 is out of bounds for axis 0 with size 10" + with pytest.raises(IndexError, match=msg): + ser[-11] = "foo" + + @pytest.mark.parametrize("indexer", [tm.loc, tm.at]) + @pytest.mark.parametrize("ser_index", [0, 1]) + def test_setitem_series_object_dtype(self, indexer, ser_index): + # GH#38303 + ser = Series([0, 0], dtype="object") + idxr = indexer(ser) + idxr[0] = Series([42], index=[ser_index]) + expected = Series([Series([42], index=[ser_index]), 0], dtype="object") + tm.assert_series_equal(ser, expected) + + @pytest.mark.parametrize("index, exp_value", [(0, 42), (1, np.nan)]) + def test_setitem_series(self, index, exp_value): + # GH#38303 + ser = Series([0, 0]) + ser.loc[0] = Series([42], index=[index]) + expected = Series([exp_value, 0]) + tm.assert_series_equal(ser, expected) + + +class TestSetitemSlices: + def test_setitem_slice_float_raises(self, datetime_series): + msg = ( + "cannot do slice indexing on DatetimeIndex with these indexers " + r"\[{key}\] of type float" + ) + with pytest.raises(TypeError, match=msg.format(key=r"4\.0")): + datetime_series[4.0:10.0] = 0 + + with pytest.raises(TypeError, match=msg.format(key=r"4\.5")): + datetime_series[4.5:10.0] = 0 + + def test_setitem_slice(self): + ser = Series(range(10), index=list(range(10))) + ser[-12:] = 0 + assert (ser == 0).all() + + ser[:-12] = 5 + assert (ser == 0).all() + + def test_setitem_slice_integers(self): + ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16]) + + ser[:4] = 0 + assert (ser[:4] == 0).all() + assert not (ser[4:] == 0).any() + + def test_setitem_slicestep(self): + # caught this bug when writing tests + series = Series(tm.makeIntIndex(20).astype(float), index=tm.makeIntIndex(20)) + + series[::2] = 0 + assert (series[::2] == 0).all() + + def test_setitem_multiindex_slice(self, indexer_sli): + # GH 8856 + mi = MultiIndex.from_product(([0, 1], list("abcde"))) + result = Series(np.arange(10, dtype=np.int64), mi) + indexer_sli(result)[::4] = 100 + expected = Series([100, 1, 2, 3, 100, 5, 6, 7, 100, 9], mi) + tm.assert_series_equal(result, expected) + + +class TestSetitemBooleanMask: + def test_setitem_mask_cast(self): + # GH#2746 + # need to upcast + ser = Series([1, 2], index=[1, 2], dtype="int64") + ser[[True, False]] = Series([0], index=[1], dtype="int64") + expected = Series([0, 2], index=[1, 2], dtype="int64") + + tm.assert_series_equal(ser, expected) + + def test_setitem_mask_align_and_promote(self): + # GH#8387: test that changing types does not break alignment + ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5) + mask = ts > 0 + left = ts.copy() + right = ts[mask].copy().map(str) + left[mask] = right + expected = ts.map(lambda t: str(t) if t > 0 else t) + tm.assert_series_equal(left, expected) + + def test_setitem_mask_promote_strs(self): + ser = Series([0, 1, 2, 0]) + mask = ser > 0 + ser2 = ser[mask].map(str) + ser[mask] = ser2 + + expected = Series([0, "1", "2", 0]) + tm.assert_series_equal(ser, expected) + + def test_setitem_mask_promote(self): + ser = Series([0, "foo", "bar", 0]) + mask = Series([False, True, True, False]) + ser2 = ser[mask] + ser[mask] = ser2 + + expected = Series([0, "foo", "bar", 0]) + tm.assert_series_equal(ser, expected) + + def test_setitem_boolean(self, string_series): + mask = string_series > string_series.median() + + # similar indexed series + result = string_series.copy() + result[mask] = string_series * 2 + expected = string_series * 2 + tm.assert_series_equal(result[mask], expected[mask]) + + # needs alignment + result = string_series.copy() + result[mask] = (string_series * 2)[0:5] + expected = (string_series * 2)[0:5].reindex_like(string_series) + expected[-mask] = string_series[mask] + tm.assert_series_equal(result[mask], expected[mask]) + + def test_setitem_boolean_corner(self, datetime_series): + ts = datetime_series + mask_shifted = ts.shift(1, freq=BDay()) > ts.median() + + msg = ( + r"Unalignable boolean Series provided as indexer \(index of " + r"the boolean Series and of the indexed object do not match" + ) + with pytest.raises(IndexingError, match=msg): + ts[mask_shifted] = 1 + + with pytest.raises(IndexingError, match=msg): + ts.loc[mask_shifted] = 1 + + def test_setitem_boolean_different_order(self, string_series): + ordered = string_series.sort_values() + + copy = string_series.copy() + copy[ordered > 0] = 0 + + expected = string_series.copy() + expected[expected > 0] = 0 + + tm.assert_series_equal(copy, expected) + + @pytest.mark.parametrize("func", [list, np.array, Series]) + def test_setitem_boolean_python_list(self, func): + # GH19406 + ser = Series([None, "b", None]) + mask = func([True, False, True]) + ser[mask] = ["a", "c"] + expected = Series(["a", "b", "c"]) + tm.assert_series_equal(ser, expected) + + def test_setitem_boolean_nullable_int_types(self, any_numeric_ea_dtype): + # GH: 26468 + ser = Series([5, 6, 7, 8], dtype=any_numeric_ea_dtype) + ser[ser > 6] = Series(range(4), dtype=any_numeric_ea_dtype) + expected = Series([5, 6, 2, 3], dtype=any_numeric_ea_dtype) + tm.assert_series_equal(ser, expected) + + ser = Series([5, 6, 7, 8], dtype=any_numeric_ea_dtype) + ser.loc[ser > 6] = Series(range(4), dtype=any_numeric_ea_dtype) + tm.assert_series_equal(ser, expected) + + ser = Series([5, 6, 7, 8], dtype=any_numeric_ea_dtype) + loc_ser = Series(range(4), dtype=any_numeric_ea_dtype) + ser.loc[ser > 6] = loc_ser.loc[loc_ser > 1] + tm.assert_series_equal(ser, expected) + + def test_setitem_with_bool_mask_and_values_matching_n_trues_in_length(self): + # GH#30567 + ser = Series([None] * 10) + mask = [False] * 3 + [True] * 5 + [False] * 2 + ser[mask] = range(5) + result = ser + expected = Series([None] * 3 + list(range(5)) + [None] * 2).astype("object") + tm.assert_series_equal(result, expected) + + def test_setitem_nan_with_bool(self): + # GH 13034 + result = Series([True, False, True]) + result[0] = np.nan + expected = Series([np.nan, False, True], dtype=object) + tm.assert_series_equal(result, expected) + + +class TestSetitemViewCopySemantics: + def test_setitem_invalidates_datetime_index_freq(self): + # GH#24096 altering a datetime64tz Series inplace invalidates the + # `freq` attribute on the underlying DatetimeIndex + + dti = date_range("20130101", periods=3, tz="US/Eastern") + ts = dti[1] + ser = Series(dti) + assert ser._values is not dti + assert ser._values._data.base is not dti._data._data.base + assert dti.freq == "D" + ser.iloc[1] = NaT + assert ser._values.freq is None + + # check that the DatetimeIndex was not altered in place + assert ser._values is not dti + assert ser._values._data.base is not dti._data._data.base + assert dti[1] == ts + assert dti.freq == "D" + + def test_dt64tz_setitem_does_not_mutate_dti(self): + # GH#21907, GH#24096 + dti = date_range("2016-01-01", periods=10, tz="US/Pacific") + ts = dti[0] + ser = Series(dti) + assert ser._values is not dti + assert ser._values._data.base is not dti._data._data.base + assert ser._mgr.arrays[0] is not dti + assert ser._mgr.arrays[0]._data.base is not dti._data._data.base + + ser[::3] = NaT + assert ser[0] is NaT + assert dti[0] == ts + + +class TestSetitemCallable: + def test_setitem_callable_key(self): + # GH#12533 + ser = Series([1, 2, 3, 4], index=list("ABCD")) + ser[lambda x: "A"] = -1 + + expected = Series([-1, 2, 3, 4], index=list("ABCD")) + tm.assert_series_equal(ser, expected) + + def test_setitem_callable_other(self): + # GH#13299 + inc = lambda x: x + 1 + + ser = Series([1, 2, -1, 4]) + ser[ser < 0] = inc + + expected = Series([1, 2, inc, 4]) + tm.assert_series_equal(ser, expected) + + +class TestSetitemWithExpansion: + def test_setitem_empty_series(self): + # GH#10193 + key = Timestamp("2012-01-01") + series = Series(dtype=object) + series[key] = 47 + expected = Series(47, [key]) + tm.assert_series_equal(series, expected) + + def test_setitem_empty_series_datetimeindex_preserves_freq(self): + # GH#33573 our index should retain its freq + series = Series([], DatetimeIndex([], freq="D"), dtype=object) + key = Timestamp("2012-01-01") + series[key] = 47 + expected = Series(47, DatetimeIndex([key], freq="D")) + tm.assert_series_equal(series, expected) + assert series.index.freq == expected.index.freq + + def test_setitem_empty_series_timestamp_preserves_dtype(self): + # GH 21881 + timestamp = Timestamp(1412526600000000000) + series = Series([timestamp], index=["timestamp"], dtype=object) + expected = series["timestamp"] + + series = Series([], dtype=object) + series["anything"] = 300.0 + series["timestamp"] = timestamp + result = series["timestamp"] + assert result == expected + + @pytest.mark.parametrize( + "td", + [ + Timedelta("9 days"), + Timedelta("9 days").to_timedelta64(), + Timedelta("9 days").to_pytimedelta(), + ], + ) + def test_append_timedelta_does_not_cast(self, td): + # GH#22717 inserting a Timedelta should _not_ cast to int64 + expected = Series(["x", td], index=[0, "td"], dtype=object) + + ser = Series(["x"]) + ser["td"] = td + tm.assert_series_equal(ser, expected) + assert isinstance(ser["td"], Timedelta) + + ser = Series(["x"]) + ser.loc["td"] = Timedelta("9 days") + tm.assert_series_equal(ser, expected) + assert isinstance(ser["td"], Timedelta) + + def test_setitem_with_expansion_type_promotion(self): + # GH#12599 + ser = Series(dtype=object) + ser["a"] = Timestamp("2016-01-01") + ser["b"] = 3.0 + ser["c"] = "foo" + expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"]) + tm.assert_series_equal(ser, expected) + + def test_setitem_not_contained(self, string_series): + # set item that's not contained + ser = string_series.copy() + assert "foobar" not in ser.index + ser["foobar"] = 1 + + app = Series([1], index=["foobar"], name="series") + expected = concat([string_series, app]) + tm.assert_series_equal(ser, expected) + + +def test_setitem_scalar_into_readonly_backing_data(): + # GH#14359: test that you cannot mutate a read only buffer + + array = np.zeros(5) + array.flags.writeable = False # make the array immutable + series = Series(array) + + for n in range(len(series)): + msg = "assignment destination is read-only" + with pytest.raises(ValueError, match=msg): + series[n] = 1 + + assert array[n] == 0 + + +def test_setitem_slice_into_readonly_backing_data(): + # GH#14359: test that you cannot mutate a read only buffer + + array = np.zeros(5) + array.flags.writeable = False # make the array immutable + series = Series(array) + + msg = "assignment destination is read-only" + with pytest.raises(ValueError, match=msg): + series[1:3] = 1 + + assert not array.any() + + +def test_setitem_categorical_assigning_ops(): + orig = Series(Categorical(["b", "b"], categories=["a", "b"])) + ser = orig.copy() + ser[:] = "a" + exp = Series(Categorical(["a", "a"], categories=["a", "b"])) + tm.assert_series_equal(ser, exp) + + ser = orig.copy() + ser[1] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) + tm.assert_series_equal(ser, exp) + + ser = orig.copy() + ser[ser.index > 0] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) + tm.assert_series_equal(ser, exp) + + ser = orig.copy() + ser[[False, True]] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) + tm.assert_series_equal(ser, exp) + + ser = orig.copy() + ser.index = ["x", "y"] + ser["y"] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"]), index=["x", "y"]) + tm.assert_series_equal(ser, exp) + + +def test_setitem_nan_into_categorical(): + # ensure that one can set something to np.nan + ser = Series(Categorical([1, 2, 3])) + exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3])) + ser[1] = np.nan + tm.assert_series_equal(ser, exp) + + +class TestSetitemCasting: + @pytest.mark.parametrize("unique", [True, False]) + @pytest.mark.parametrize("val", [3, 3.0, "3"], ids=type) + def test_setitem_non_bool_into_bool(self, val, indexer_sli, unique): + # dont cast these 3-like values to bool + ser = Series([True, False]) + if not unique: + ser.index = [1, 1] + + indexer_sli(ser)[1] = val + assert type(ser.iloc[1]) == type(val) + + expected = Series([True, val], dtype=object, index=ser.index) + if not unique and indexer_sli is not tm.iloc: + expected = Series([val, val], dtype=object, index=[1, 1]) + tm.assert_series_equal(ser, expected) + + +class SetitemCastingEquivalents: + """ + Check each of several methods that _should_ be equivalent to `obj[key] = val` + + We assume that + - obj.index is the default Index(range(len(obj))) + - the setitem does not expand the obj + """ + + @pytest.fixture + def is_inplace(self): + """ + Indicate that we are not (yet) checking whether or not setting is inplace. + """ + return None + + def check_indexer(self, obj, key, expected, val, indexer, is_inplace): + orig = obj + obj = obj.copy() + arr = obj._values + + indexer(obj)[key] = val + tm.assert_series_equal(obj, expected) + + self._check_inplace(is_inplace, orig, arr, obj) + + def _check_inplace(self, is_inplace, orig, arr, obj): + if is_inplace is None: + # We are not (yet) checking whether setting is inplace or not + pass + elif is_inplace: + if arr.dtype.kind in ["m", "M"]: + # We may not have the same DTA/TDA, but will have the same + # underlying data + assert arr._ndarray is obj._values._ndarray + else: + assert obj._values is arr + else: + # otherwise original array should be unchanged + tm.assert_equal(arr, orig._values) + + def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace): + if not isinstance(key, int): + return + + self.check_indexer(obj, key, expected, val, indexer_sli, is_inplace) + + if indexer_sli is tm.loc: + self.check_indexer(obj, key, expected, val, tm.at, is_inplace) + elif indexer_sli is tm.iloc: + self.check_indexer(obj, key, expected, val, tm.iat, is_inplace) + + rng = range(key, key + 1) + self.check_indexer(obj, rng, expected, val, indexer_sli, is_inplace) + + if indexer_sli is not tm.loc: + # Note: no .loc because that handles slice edges differently + slc = slice(key, key + 1) + self.check_indexer(obj, slc, expected, val, indexer_sli, is_inplace) + + ilkey = [key] + self.check_indexer(obj, ilkey, expected, val, indexer_sli, is_inplace) + + indkey = np.array(ilkey) + self.check_indexer(obj, indkey, expected, val, indexer_sli, is_inplace) + + genkey = (x for x in [key]) + self.check_indexer(obj, genkey, expected, val, indexer_sli, is_inplace) + + def test_slice_key(self, obj, key, expected, val, indexer_sli, is_inplace): + if not isinstance(key, slice): + return + + if indexer_sli is not tm.loc: + # Note: no .loc because that handles slice edges differently + self.check_indexer(obj, key, expected, val, indexer_sli, is_inplace) + + ilkey = list(range(len(obj)))[key] + self.check_indexer(obj, ilkey, expected, val, indexer_sli, is_inplace) + + indkey = np.array(ilkey) + self.check_indexer(obj, indkey, expected, val, indexer_sli, is_inplace) + + genkey = (x for x in indkey) + self.check_indexer(obj, genkey, expected, val, indexer_sli, is_inplace) + + def test_mask_key(self, obj, key, expected, val, indexer_sli): + # setitem with boolean mask + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + obj = obj.copy() + + if is_list_like(val) and len(val) < mask.sum(): + msg = "boolean index did not match indexed array along dimension" + with pytest.raises(IndexError, match=msg): + indexer_sli(obj)[mask] = val + return + + indexer_sli(obj)[mask] = val + tm.assert_series_equal(obj, expected) + + def test_series_where(self, obj, key, expected, val, is_inplace): + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + if is_list_like(val) and len(val) < len(obj): + # Series.where is not valid here + msg = "operands could not be broadcast together with shapes" + with pytest.raises(ValueError, match=msg): + obj.where(~mask, val) + return + + orig = obj + obj = obj.copy() + arr = obj._values + + res = obj.where(~mask, val) + tm.assert_series_equal(res, expected) + + self._check_inplace(is_inplace, orig, arr, obj) + + def test_index_where(self, obj, key, expected, val, request): + if obj.dtype == bool: + # TODO(GH#45061): Should become unreachable + pytest.skip("test not applicable for this dtype") + + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + res = Index(obj).where(~mask, val) + tm.assert_index_equal(res, Index(expected)) + + def test_index_putmask(self, obj, key, expected, val): + if obj.dtype == bool: + # TODO(GH#45061): Should become unreachable + pytest.skip("test not applicable for this dtype") + + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + res = Index(obj).putmask(mask, val) + tm.assert_index_equal(res, Index(expected)) + + +@pytest.mark.parametrize( + "obj,expected,key", + [ + pytest.param( + # these induce dtype changes + Series([2, 3, 4, 5, 6, 7, 8, 9, 10]), + Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]), + slice(None, None, 2), + id="int_series_slice_key_step", + ), + pytest.param( + Series([True, True, False, False]), + Series([np.nan, True, np.nan, False], dtype=object), + slice(None, None, 2), + id="bool_series_slice_key_step", + ), + pytest.param( + # these induce dtype changes + Series(np.arange(10)), + Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]), + slice(None, 5), + id="int_series_slice_key", + ), + pytest.param( + # changes dtype GH#4463 + Series([1, 2, 3]), + Series([np.nan, 2, 3]), + 0, + id="int_series_int_key", + ), + pytest.param( + # changes dtype GH#4463 + Series([False]), + Series([np.nan], dtype=object), + # TODO: maybe go to float64 since we are changing the _whole_ Series? + 0, + id="bool_series_int_key_change_all", + ), + pytest.param( + # changes dtype GH#4463 + Series([False, True]), + Series([np.nan, True], dtype=object), + 0, + id="bool_series_int_key", + ), + ], +) +class TestSetitemCastingEquivalents(SetitemCastingEquivalents): + @pytest.fixture(params=[np.nan, np.float64("NaN")]) + def val(self, request): + """ + One python float NaN, one np.float64. Only np.float64 has a `dtype` + attribute. + """ + return request.param + + +class TestSetitemTimedelta64IntoNumeric(SetitemCastingEquivalents): + # timedelta64 should not be treated as integers when setting into + # numeric Series + + @pytest.fixture + def val(self): + td = np.timedelta64(4, "ns") + return td + # TODO: could also try np.full((1,), td) + + @pytest.fixture(params=[complex, int, float]) + def dtype(self, request): + return request.param + + @pytest.fixture + def obj(self, dtype): + arr = np.arange(5).astype(dtype) + ser = Series(arr) + return ser + + @pytest.fixture + def expected(self, dtype): + arr = np.arange(5).astype(dtype) + ser = Series(arr) + ser = ser.astype(object) + ser.values[0] = np.timedelta64(4, "ns") + return ser + + @pytest.fixture + def key(self): + return 0 + + @pytest.fixture + def is_inplace(self): + """ + Indicate we do _not_ expect the setting to be done inplace. + """ + return False + + +class TestSetitemDT64IntoInt(SetitemCastingEquivalents): + # GH#39619 dont cast dt64 to int when doing this setitem + + @pytest.fixture(params=["M8[ns]", "m8[ns]"]) + def dtype(self, request): + return request.param + + @pytest.fixture + def scalar(self, dtype): + val = np.datetime64("2021-01-18 13:25:00", "ns") + if dtype == "m8[ns]": + val = val - val + return val + + @pytest.fixture + def expected(self, scalar): + expected = Series([scalar, scalar, 3], dtype=object) + assert isinstance(expected[0], type(scalar)) + return expected + + @pytest.fixture + def obj(self): + return Series([1, 2, 3]) + + @pytest.fixture + def key(self): + return slice(None, -1) + + @pytest.fixture(params=[None, list, np.array]) + def val(self, scalar, request): + box = request.param + if box is None: + return scalar + return box([scalar, scalar]) + + @pytest.fixture + def is_inplace(self): + return False + + +class TestSetitemNAPeriodDtype(SetitemCastingEquivalents): + # Setting compatible NA values into Series with PeriodDtype + + @pytest.fixture + def expected(self, key): + exp = Series(period_range("2000-01-01", periods=10, freq="D")) + exp._values.view("i8")[key] = NaT.value + assert exp[key] is NaT or all(x is NaT for x in exp[key]) + return exp + + @pytest.fixture + def obj(self): + return Series(period_range("2000-01-01", periods=10, freq="D")) + + @pytest.fixture(params=[3, slice(3, 5)]) + def key(self, request): + return request.param + + @pytest.fixture(params=[None, np.nan]) + def val(self, request): + return request.param + + @pytest.fixture + def is_inplace(self): + return True + + +class TestSetitemNADatetimeLikeDtype(SetitemCastingEquivalents): + # some nat-like values should be cast to datetime64/timedelta64 when + # inserting into a datetime64/timedelta64 series. Others should coerce + # to object and retain their dtypes. + # GH#18586 for td64 and boolean mask case + + @pytest.fixture( + params=["m8[ns]", "M8[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Central]"] + ) + def dtype(self, request): + return request.param + + @pytest.fixture + def obj(self, dtype): + i8vals = date_range("2016-01-01", periods=3).asi8 + idx = Index(i8vals, dtype=dtype) + assert idx.dtype == dtype + return Series(idx) + + @pytest.fixture( + params=[ + None, + np.nan, + NaT, + np.timedelta64("NaT", "ns"), + np.datetime64("NaT", "ns"), + ] + ) + def val(self, request): + return request.param + + @pytest.fixture + def is_inplace(self, val, obj): + # td64 -> cast to object iff val is datetime64("NaT") + # dt64 -> cast to object iff val is timedelta64("NaT") + # dt64tz -> cast to object with anything _but_ NaT + return val is NaT or val is None or val is np.nan or obj.dtype == val.dtype + + @pytest.fixture + def expected(self, obj, val, is_inplace): + dtype = obj.dtype if is_inplace else object + expected = Series([val] + list(obj[1:]), dtype=dtype) + return expected + + @pytest.fixture + def key(self): + return 0 + + +class TestSetitemMismatchedTZCastsToObject(SetitemCastingEquivalents): + # GH#24024 + @pytest.fixture + def obj(self): + return Series(date_range("2000", periods=2, tz="US/Central")) + + @pytest.fixture + def val(self): + return Timestamp("2000", tz="US/Eastern") + + @pytest.fixture + def key(self): + return 0 + + @pytest.fixture + def expected(self): + expected = Series( + [ + Timestamp("2000-01-01 00:00:00-05:00", tz="US/Eastern"), + Timestamp("2000-01-02 00:00:00-06:00", tz="US/Central"), + ], + dtype=object, + ) + return expected + + @pytest.fixture(autouse=True) + def assert_warns(self, request): + # check that we issue a FutureWarning about timezone-matching + if request.function.__name__ == "test_slice_key": + key = request.getfixturevalue("key") + if not isinstance(key, slice): + # The test is a no-op, so no warning will be issued + yield + return + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + yield + + +@pytest.mark.parametrize( + "obj,expected", + [ + # For numeric series, we should coerce to NaN. + (Series([1, 2, 3]), Series([np.nan, 2, 3])), + (Series([1.0, 2.0, 3.0]), Series([np.nan, 2.0, 3.0])), + # For datetime series, we should coerce to NaT. + ( + Series([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]), + Series([NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]), + ), + # For objects, we should preserve the None value. + (Series(["foo", "bar", "baz"]), Series([None, "bar", "baz"])), + ], +) +class TestSeriesNoneCoercion(SetitemCastingEquivalents): + @pytest.fixture + def key(self): + return 0 + + @pytest.fixture + def val(self): + return None + + @pytest.fixture + def is_inplace(self, obj): + # This is specific to the 4 cases currently implemented for this class. + return obj.dtype.kind != "i" + + +class TestSetitemFloatIntervalWithIntIntervalValues(SetitemCastingEquivalents): + # GH#44201 Cast to shared IntervalDtype rather than object + + def test_setitem_example(self): + # Just a case here to make obvious what this test class is aimed at + idx = IntervalIndex.from_breaks(range(4)) + obj = Series(idx) + val = Interval(0.5, 1.5) + + obj[0] = val + assert obj.dtype == "Interval[float64, right]" + + @pytest.fixture + def obj(self): + idx = IntervalIndex.from_breaks(range(4)) + return Series(idx) + + @pytest.fixture + def val(self): + return Interval(0.5, 1.5) + + @pytest.fixture + def key(self): + return 0 + + @pytest.fixture + def expected(self, obj, val): + data = [val] + list(obj[1:]) + idx = IntervalIndex(data, dtype="Interval[float64]") + return Series(idx) + + +class TestSetitemRangeIntoIntegerSeries(SetitemCastingEquivalents): + # GH#44261 Setting a range with sufficiently-small integers into + # small-itemsize integer dtypes should not need to upcast + + @pytest.fixture + def obj(self, any_int_numpy_dtype): + dtype = np.dtype(any_int_numpy_dtype) + ser = Series(range(5), dtype=dtype) + return ser + + @pytest.fixture + def val(self): + return range(2, 4) + + @pytest.fixture + def key(self): + return slice(0, 2) + + @pytest.fixture + def expected(self, any_int_numpy_dtype): + dtype = np.dtype(any_int_numpy_dtype) + exp = Series([2, 3, 2, 3, 4], dtype=dtype) + return exp + + @pytest.fixture + def inplace(self): + return True + + +@pytest.mark.parametrize( + "val", + [ + np.array([2.0, 3.0]), + np.array([2.5, 3.5]), + np.array([2 ** 65, 2 ** 65 + 1], dtype=np.float64), # all ints, but can't cast + ], +) +class TestSetitemFloatNDarrayIntoIntegerSeries(SetitemCastingEquivalents): + @pytest.fixture + def obj(self): + return Series(range(5), dtype=np.int64) + + @pytest.fixture + def key(self): + return slice(0, 2) + + @pytest.fixture + def inplace(self, val): + # NB: this condition is based on currently-harcoded "val" cases + return val[0] == 2 + + @pytest.fixture + def expected(self, val, inplace): + if inplace: + dtype = np.int64 + else: + dtype = np.float64 + res_values = np.array(range(5), dtype=dtype) + res_values[:2] = val + return Series(res_values) + + +@pytest.mark.parametrize("val", [512, np.int16(512)]) +class TestSetitemIntoIntegerSeriesNeedsUpcast(SetitemCastingEquivalents): + @pytest.fixture + def obj(self): + return Series([1, 2, 3], dtype=np.int8) + + @pytest.fixture + def key(self): + return 1 + + @pytest.fixture + def inplace(self): + return False + + @pytest.fixture + def expected(self): + return Series([1, 512, 3], dtype=np.int16) + + def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace, request): + if not isinstance(val, np.int16): + mark = pytest.mark.xfail + request.node.add_marker(mark) + super().test_int_key(obj, key, expected, val, indexer_sli, is_inplace) + + def test_mask_key(self, obj, key, expected, val, indexer_sli, request): + if not isinstance(val, np.int16): + mark = pytest.mark.xfail + request.node.add_marker(mark) + super().test_mask_key(obj, key, expected, val, indexer_sli) + + +@pytest.mark.parametrize("val", [2 ** 33 + 1.0, 2 ** 33 + 1.1, 2 ** 62]) +class TestSmallIntegerSetitemUpcast(SetitemCastingEquivalents): + # https://github.com/pandas-dev/pandas/issues/39584#issuecomment-941212124 + @pytest.fixture + def obj(self): + return Series([1, 2, 3], dtype="i4") + + @pytest.fixture + def key(self): + return 0 + + @pytest.fixture + def inplace(self): + return False + + @pytest.fixture + def expected(self, val): + if val == 2 ** 62: + return Series([val, 2, 3], dtype="i8") + elif val == 2 ** 33 + 1.1: + return Series([val, 2, 3], dtype="f8") + else: + return Series([val, 2, 3], dtype="i8") + + def test_series_where(self, obj, key, expected, val, is_inplace, request): + if isinstance(val, float) and val % 1 == 0: + mark = pytest.mark.xfail + request.node.add_marker(mark) + super().test_series_where(obj, key, expected, val, is_inplace) + + def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace, request): + if val % 1 == 0: + mark = pytest.mark.xfail + request.node.add_marker(mark) + super().test_int_key(obj, key, expected, val, indexer_sli, is_inplace) + + def test_mask_key(self, obj, key, expected, val, indexer_sli, request): + if val % 1 == 0: + mark = pytest.mark.xfail + request.node.add_marker(mark) + super().test_mask_key(obj, key, expected, val, indexer_sli) + + +def test_20643(): + # closed by GH#45121 + orig = Series([0, 1, 2], index=["a", "b", "c"]) + + expected = Series([0, 2.7, 2], index=["a", "b", "c"]) + + ser = orig.copy() + ser.at["b"] = 2.7 + tm.assert_series_equal(ser, expected) + + ser = orig.copy() + ser.loc["b"] = 2.7 + tm.assert_series_equal(ser, expected) + + ser = orig.copy() + ser["b"] = 2.7 + tm.assert_series_equal(ser, expected) + + ser = orig.copy() + ser.iat[1] = 2.7 + tm.assert_series_equal(ser, expected) + + ser = orig.copy() + ser.iloc[1] = 2.7 + tm.assert_series_equal(ser, expected) + + orig_df = orig.to_frame("A") + expected_df = expected.to_frame("A") + + df = orig_df.copy() + df.at["b", "A"] = 2.7 + tm.assert_frame_equal(df, expected_df) + + df = orig_df.copy() + df.loc["b", "A"] = 2.7 + tm.assert_frame_equal(df, expected_df) + + df = orig_df.copy() + df.iloc[1, 0] = 2.7 + tm.assert_frame_equal(df, expected_df) + + df = orig_df.copy() + df.iat[1, 0] = 2.7 + tm.assert_frame_equal(df, expected_df) + + +def test_20643_comment(): + # https://github.com/pandas-dev/pandas/issues/20643#issuecomment-431244590 + # fixed sometime prior to GH#45121 + orig = Series([0, 1, 2], index=["a", "b", "c"]) + expected = Series([np.nan, 1, 2], index=["a", "b", "c"]) + + ser = orig.copy() + ser.iat[0] = None + tm.assert_series_equal(ser, expected) + + ser = orig.copy() + ser.iloc[0] = None + tm.assert_series_equal(ser, expected) + + +def test_15413(): + # fixed by GH#45121 + ser = Series([1, 2, 3]) + + ser[ser == 2] += 0.5 + expected = Series([1, 2.5, 3]) + tm.assert_series_equal(ser, expected) + + ser = Series([1, 2, 3]) + ser[1] += 0.5 + tm.assert_series_equal(ser, expected) + + ser = Series([1, 2, 3]) + ser.loc[1] += 0.5 + tm.assert_series_equal(ser, expected) + + ser = Series([1, 2, 3]) + ser.iloc[1] += 0.5 + tm.assert_series_equal(ser, expected) + + ser = Series([1, 2, 3]) + ser.iat[1] += 0.5 + tm.assert_series_equal(ser, expected) + + ser = Series([1, 2, 3]) + ser.at[1] += 0.5 + tm.assert_series_equal(ser, expected) + + +def test_37477(): + # fixed by GH#45121 + orig = DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]}) + expected = DataFrame({"A": [1, 2, 3], "B": [3, 1.2, 5]}) + + df = orig.copy() + df.at[1, "B"] = 1.2 + tm.assert_frame_equal(df, expected) + + df = orig.copy() + df.loc[1, "B"] = 1.2 + tm.assert_frame_equal(df, expected) + + df = orig.copy() + df.iat[1, 1] = 1.2 + tm.assert_frame_equal(df, expected) + + df = orig.copy() + df.iloc[1, 1] = 1.2 + tm.assert_frame_equal(df, expected) + + +def test_32878_int_itemsize(): + # Fixed by GH#45121 + arr = np.arange(5).astype("i4") + ser = Series(arr) + val = np.int64(np.iinfo(np.int64).max) + ser[0] = val + expected = Series([val, 1, 2, 3, 4], dtype=np.int64) + tm.assert_series_equal(ser, expected) + + +def test_26395(indexer_al): + # .at case fixed by GH#45121 (best guess) + df = DataFrame(index=["A", "B", "C"]) + df["D"] = 0 + + indexer_al(df)["C", "D"] = 2 + expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64) + tm.assert_frame_equal(df, expected) + + indexer_al(df)["C", "D"] = 44.5 + expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64) + tm.assert_frame_equal(df, expected) + + indexer_al(df)["C", "D"] = "hello" + expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object) + tm.assert_frame_equal(df, expected) + + +def test_37692(indexer_al): + # GH#37692 + ser = Series([1, 2, 3], index=["a", "b", "c"]) + indexer_al(ser)["b"] = "test" + expected = Series([1, "test", 3], index=["a", "b", "c"], dtype=object) + tm.assert_series_equal(ser, expected) + + +def test_setitem_bool_int_float_consistency(indexer_sli): + # GH#21513 + # bool-with-int and bool-with-float both upcast to object + # int-with-float and float-with-int are both non-casting so long + # as the setitem can be done losslessly + for dtype in [np.float64, np.int64]: + ser = Series(0, index=range(3), dtype=dtype) + indexer_sli(ser)[0] = True + assert ser.dtype == object + + ser = Series(0, index=range(3), dtype=bool) + ser[0] = dtype(1) + assert ser.dtype == object + + # 1.0 can be held losslessly, so no casting + ser = Series(0, index=range(3), dtype=np.int64) + indexer_sli(ser)[0] = np.float64(1.0) + assert ser.dtype == np.int64 + + # 1 can be held losslessly, so no casting + ser = Series(0, index=range(3), dtype=np.float64) + indexer_sli(ser)[0] = np.int64(1) + + +def test_6942(indexer_al): + # check that the .at __setitem__ after setting "Live" actually sets the data + start = Timestamp("2014-04-01") + t1 = Timestamp("2014-04-23 12:42:38.883082") + t2 = Timestamp("2014-04-24 01:33:30.040039") + + dti = date_range(start, periods=1) + orig = DataFrame(index=dti, columns=["timenow", "Live"]) + + df = orig.copy() + indexer_al(df)[start, "timenow"] = t1 + + df["Live"] = True + + df.at[start, "timenow"] = t2 + assert df.iloc[0, 0] == t2 + + +@pytest.mark.xfail(reason="Doesn't catch when numpy raises.") +def test_45070(): + ser = Series([1, 2, 3], index=["a", "b", "c"]) + + ser[0] = "X" + expected = Series(["X", 2, 3], index=["a", "b", "c"], dtype=object) + tm.assert_series_equal(ser, expected) + + +@pytest.mark.xfail(reason="unwanted upcast") +def test_15231(): + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + df.loc[2] = Series({"a": 5, "b": 6}) + assert (df.dtypes == np.int64).all() + + df.loc[3] = Series({"a": 7}) + + # df["a"] doesn't have any NaNs, should not have been cast + exp_dtypes = Series([np.int64, np.float64], dtype=object, index=["a", "b"]) + tm.assert_series_equal(df.dtypes, exp_dtypes) + + +@pytest.mark.xfail(reason="Fails to upcast") +def test_32878_complex_itemsize(): + # TODO: when fixed, put adjacent to test_32878_int_itemsize + arr = np.arange(5).astype("c8") + ser = Series(arr) + val = np.finfo(np.float64).max + val = val.astype("c16") + + # GH#32878 used to coerce val to inf+0.000000e+00j + ser[0] = val + assert ser[0] == val + expected = Series([val, 1, 2, 3, 4], dtype="c16") + tm.assert_series_equal(ser, expected) + + +@pytest.mark.xfail(reason="Unnecessarily upcasts to float64") +def test_iloc_setitem_unnecesssary_float_upcasting(): + # GH#12255 + df = DataFrame( + { + 0: np.array([1, 3], dtype=np.float32), + 1: np.array([2, 4], dtype=np.float32), + 2: ["a", "b"], + } + ) + orig = df.copy() + + values = df[0].values.reshape(2, 1) + df.iloc[:, 0:1] = values + + tm.assert_frame_equal(df, orig) + + +@pytest.mark.xfail(reason="unwanted casting to dt64") +def test_12499(): + # TODO: OP in GH#12499 used np.datetim64("NaT") instead of pd.NaT, + # which has consequences for the expected df["two"] (though i think at + # the time it might not have because of a separate bug). See if it makes + # a difference which one we use here. + ts = Timestamp("2016-03-01 03:13:22.98986", tz="UTC") + + data = [{"one": 0, "two": ts}] + orig = DataFrame(data) + df = orig.copy() + df.loc[1] = [np.nan, NaT] + + expected = DataFrame( + {"one": [0, np.nan], "two": Series([ts, NaT], dtype="datetime64[ns, UTC]")} + ) + tm.assert_frame_equal(df, expected) + + data = [{"one": 0, "two": ts}] + df = orig.copy() + df.loc[1, :] = [np.nan, NaT] + tm.assert_frame_equal(df, expected) + + +@pytest.mark.xfail(reason="Too many columns cast to float64") +def test_20476(): + mi = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]]) + df = DataFrame(-1, index=range(3), columns=mi) + filler = DataFrame([[1, 2, 3.0]] * 3, index=range(3), columns=["a", "b", "c"]) + df["A"] = filler + + expected = DataFrame( + { + 0: [1, 1, 1], + 1: [2, 2, 2], + 2: [3.0, 3.0, 3.0], + 3: [-1, -1, -1], + 4: [-1, -1, -1], + 5: [-1, -1, -1], + } + ) + expected.columns = mi + exp_dtypes = Series( + [np.dtype(np.int64)] * 2 + [np.dtype(np.float64)] + [np.dtype(np.int64)] * 3, + index=mi, + ) + tm.assert_series_equal(df.dtypes, exp_dtypes) + + +def test_setitem_int_as_positional_fallback_deprecation(): + # GH#42215 deprecated falling back to positional on __setitem__ with an + # int not contained in the index + ser = Series([1, 2, 3, 4], index=[1.1, 2.1, 3.0, 4.1]) + assert not ser.index._should_fallback_to_positional + # assert not ser.index.astype(object)._should_fallback_to_positional + + with tm.assert_produces_warning(None): + # 3.0 is in our index, so future behavior is unchanged + ser[3] = 10 + expected = Series([1, 2, 10, 4], index=ser.index) + tm.assert_series_equal(ser, expected) + + msg = "Treating integers as positional in Series.__setitem__" + with tm.assert_produces_warning(FutureWarning, match=msg): + with pytest.raises(IndexError, match="index 5 is out of bounds"): + ser[5] = 5 + # Once the deprecation is enforced, we will have + # expected = Series([1, 2, 3, 4, 5], index=[1.1, 2.1, 3.0, 4.1, 5.0]) + + ii = IntervalIndex.from_breaks(range(10))[::2] + ser2 = Series(range(len(ii)), index=ii) + expected2 = ser2.copy() + expected2.iloc[-1] = 9 + with tm.assert_produces_warning(FutureWarning, match=msg): + ser2[4] = 9 + tm.assert_series_equal(ser2, expected2) + + mi = MultiIndex.from_product([ser.index, ["A", "B"]]) + ser3 = Series(range(len(mi)), index=mi) + expected3 = ser3.copy() + expected3.iloc[4] = 99 + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser3[4] = 99 + tm.assert_series_equal(ser3, expected3) + + +def test_setitem_with_bool_indexer(): + # GH#42530 + + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + result = df.pop("b") + result[[True, False, False]] = 9 + expected = Series(data=[9, 5, 6], name="b") + tm.assert_series_equal(result, expected) + + df.loc[[True, False, False], "a"] = 10 + expected = DataFrame({"a": [10, 2, 3]}) + tm.assert_frame_equal(df, expected) + + +@pytest.mark.parametrize("size", range(2, 6)) +@pytest.mark.parametrize( + "mask", [[True, False, False, False, False], [True, False], [False]] +) +@pytest.mark.parametrize( + "item", [2.0, np.nan, np.finfo(float).max, np.finfo(float).min] +) +# Test numpy arrays, lists and tuples as the input to be +# broadcast +@pytest.mark.parametrize( + "box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)] +) +def test_setitem_bool_indexer_dont_broadcast_length1_values(size, mask, item, box): + # GH#44265 + # see also tests.series.indexing.test_where.test_broadcast + + selection = np.resize(mask, size) + + data = np.arange(size, dtype=float) + + ser = Series(data) + + if selection.sum() != 1: + msg = ( + "cannot set using a list-like indexer with a different " + "length than the value" + ) + with pytest.raises(ValueError, match=msg): + # GH#44265 + ser[selection] = box(item) + else: + # In this corner case setting is equivalent to setting with the unboxed + # item + ser[selection] = box(item) + + expected = Series(np.arange(size, dtype=float)) + expected[selection] = item + tm.assert_series_equal(ser, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_take.py b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_take.py new file mode 100644 index 0000000..dc161b6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_take.py @@ -0,0 +1,33 @@ +import pytest + +import pandas as pd +from pandas import Series +import pandas._testing as tm + + +def test_take(): + ser = Series([-1, 5, 6, 2, 4]) + + actual = ser.take([1, 3, 4]) + expected = Series([5, 2, 4], index=[1, 3, 4]) + tm.assert_series_equal(actual, expected) + + actual = ser.take([-1, 3, 4]) + expected = Series([4, 2, 4], index=[4, 3, 4]) + tm.assert_series_equal(actual, expected) + + msg = lambda x: f"index {x} is out of bounds for( axis 0 with)? size 5" + with pytest.raises(IndexError, match=msg(10)): + ser.take([1, 10]) + with pytest.raises(IndexError, match=msg(5)): + ser.take([2, 5]) + + +def test_take_categorical(): + # https://github.com/pandas-dev/pandas/issues/20664 + ser = Series(pd.Categorical(["a", "b", "c"])) + result = ser.take([-2, -2, 0]) + expected = Series( + pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0] + ) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_where.py b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_where.py new file mode 100644 index 0000000..9ed0488 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_where.py @@ -0,0 +1,466 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_integer + +import pandas as pd +from pandas import ( + Series, + Timestamp, + date_range, + isna, +) +import pandas._testing as tm + + +def test_where_unsafe_int(any_signed_int_numpy_dtype): + s = Series(np.arange(10), dtype=any_signed_int_numpy_dtype) + mask = s < 5 + + s[mask] = range(2, 7) + expected = Series( + list(range(2, 7)) + list(range(5, 10)), + dtype=any_signed_int_numpy_dtype, + ) + + tm.assert_series_equal(s, expected) + + +def test_where_unsafe_float(float_numpy_dtype): + s = Series(np.arange(10), dtype=float_numpy_dtype) + mask = s < 5 + + s[mask] = range(2, 7) + data = list(range(2, 7)) + list(range(5, 10)) + expected = Series(data, dtype=float_numpy_dtype) + + tm.assert_series_equal(s, expected) + + +@pytest.mark.parametrize( + "dtype,expected_dtype", + [ + (np.int8, np.float64), + (np.int16, np.float64), + (np.int32, np.float64), + (np.int64, np.float64), + (np.float32, np.float32), + (np.float64, np.float64), + ], +) +def test_where_unsafe_upcast(dtype, expected_dtype): + # see gh-9743 + s = Series(np.arange(10), dtype=dtype) + values = [2.5, 3.5, 4.5, 5.5, 6.5] + mask = s < 5 + expected = Series(values + list(range(5, 10)), dtype=expected_dtype) + s[mask] = values + tm.assert_series_equal(s, expected) + + +def test_where_unsafe(): + # see gh-9731 + s = Series(np.arange(10), dtype="int64") + values = [2.5, 3.5, 4.5, 5.5] + + mask = s > 5 + expected = Series(list(range(6)) + values, dtype="float64") + + s[mask] = values + tm.assert_series_equal(s, expected) + + # see gh-3235 + s = Series(np.arange(10), dtype="int64") + mask = s < 5 + s[mask] = range(2, 7) + expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype="int64") + tm.assert_series_equal(s, expected) + assert s.dtype == expected.dtype + + s = Series(np.arange(10), dtype="int64") + mask = s > 5 + s[mask] = [0] * 4 + expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype="int64") + tm.assert_series_equal(s, expected) + + s = Series(np.arange(10)) + mask = s > 5 + + msg = "cannot set using a list-like indexer with a different length than the value" + with pytest.raises(ValueError, match=msg): + s[mask] = [5, 4, 3, 2, 1] + + with pytest.raises(ValueError, match=msg): + s[mask] = [0] * 5 + + # dtype changes + s = Series([1, 2, 3, 4]) + result = s.where(s > 2, np.nan) + expected = Series([np.nan, np.nan, 3, 4]) + tm.assert_series_equal(result, expected) + + # GH 4667 + # setting with None changes dtype + s = Series(range(10)).astype(float) + s[8] = None + result = s[8] + assert isna(result) + + s = Series(range(10)).astype(float) + s[s > 8] = None + result = s[isna(s)] + expected = Series(np.nan, index=[9]) + tm.assert_series_equal(result, expected) + + +def test_where(): + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.where(cond).dropna() + rs2 = s[cond] + tm.assert_series_equal(rs, rs2) + + rs = s.where(cond, -s) + tm.assert_series_equal(rs, s.abs()) + + rs = s.where(cond) + assert s.shape == rs.shape + assert rs is not s + + # test alignment + cond = Series([True, False, False, True, False], index=s.index) + s2 = -(s.abs()) + + expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index) + rs = s2.where(cond[:3]) + tm.assert_series_equal(rs, expected) + + expected = s2.abs() + expected.iloc[0] = s2[0] + rs = s2.where(cond[:3], -s2) + tm.assert_series_equal(rs, expected) + + +def test_where_error(): + s = Series(np.random.randn(5)) + cond = s > 0 + + msg = "Array conditional must be same shape as self" + with pytest.raises(ValueError, match=msg): + s.where(1) + with pytest.raises(ValueError, match=msg): + s.where(cond[:3].values, -s) + + # GH 2745 + s = Series([1, 2]) + s[[True, False]] = [0, 1] + expected = Series([0, 2]) + tm.assert_series_equal(s, expected) + + # failures + msg = "cannot set using a list-like indexer with a different length than the value" + with pytest.raises(ValueError, match=msg): + s[[True, False]] = [0, 2, 3] + + with pytest.raises(ValueError, match=msg): + s[[True, False]] = [] + + +@pytest.mark.parametrize("klass", [list, tuple, np.array, Series]) +def test_where_array_like(klass): + # see gh-15414 + s = Series([1, 2, 3]) + cond = [False, True, True] + expected = Series([np.nan, 2, 3]) + + result = s.where(klass(cond)) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "cond", + [ + [1, 0, 1], + Series([2, 5, 7]), + ["True", "False", "True"], + [Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")], + ], +) +def test_where_invalid_input(cond): + # see gh-15414: only boolean arrays accepted + s = Series([1, 2, 3]) + msg = "Boolean array expected for the condition" + + with pytest.raises(ValueError, match=msg): + s.where(cond) + + msg = "Array conditional must be same shape as self" + with pytest.raises(ValueError, match=msg): + s.where([True]) + + +def test_where_ndframe_align(): + msg = "Array conditional must be same shape as self" + s = Series([1, 2, 3]) + + cond = [True] + with pytest.raises(ValueError, match=msg): + s.where(cond) + + expected = Series([1, np.nan, np.nan]) + + out = s.where(Series(cond)) + tm.assert_series_equal(out, expected) + + cond = np.array([False, True, False, True]) + with pytest.raises(ValueError, match=msg): + s.where(cond) + + expected = Series([np.nan, 2, np.nan]) + + out = s.where(Series(cond)) + tm.assert_series_equal(out, expected) + + +def test_where_setitem_invalid(): + # GH 2702 + # make sure correct exceptions are raised on invalid list assignment + + msg = ( + lambda x: f"cannot set using a {x} indexer with a " + "different length than the value" + ) + # slice + s = Series(list("abc")) + + with pytest.raises(ValueError, match=msg("slice")): + s[0:3] = list(range(27)) + + s[0:3] = list(range(3)) + expected = Series([0, 1, 2]) + tm.assert_series_equal(s.astype(np.int64), expected) + + # slice with step + s = Series(list("abcdef")) + + with pytest.raises(ValueError, match=msg("slice")): + s[0:4:2] = list(range(27)) + + s = Series(list("abcdef")) + s[0:4:2] = list(range(2)) + expected = Series([0, "b", 1, "d", "e", "f"]) + tm.assert_series_equal(s, expected) + + # neg slices + s = Series(list("abcdef")) + + with pytest.raises(ValueError, match=msg("slice")): + s[:-1] = list(range(27)) + + s[-3:-1] = list(range(2)) + expected = Series(["a", "b", "c", 0, 1, "f"]) + tm.assert_series_equal(s, expected) + + # list + s = Series(list("abc")) + + with pytest.raises(ValueError, match=msg("list-like")): + s[[0, 1, 2]] = list(range(27)) + + s = Series(list("abc")) + + with pytest.raises(ValueError, match=msg("list-like")): + s[[0, 1, 2]] = list(range(2)) + + # scalar + s = Series(list("abc")) + s[0] = list(range(10)) + expected = Series([list(range(10)), "b", "c"]) + tm.assert_series_equal(s, expected) + + +@pytest.mark.parametrize("size", range(2, 6)) +@pytest.mark.parametrize( + "mask", [[True, False, False, False, False], [True, False], [False]] +) +@pytest.mark.parametrize( + "item", [2.0, np.nan, np.finfo(float).max, np.finfo(float).min] +) +# Test numpy arrays, lists and tuples as the input to be +# broadcast +@pytest.mark.parametrize( + "box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)] +) +def test_broadcast(size, mask, item, box): + # GH#8801, GH#4195 + selection = np.resize(mask, size) + + data = np.arange(size, dtype=float) + + # Construct the expected series by taking the source + # data or item based on the selection + expected = Series( + [item if use_item else data[i] for i, use_item in enumerate(selection)] + ) + + s = Series(data) + + s[selection] = item + tm.assert_series_equal(s, expected) + + s = Series(data) + result = s.where(~selection, box(item)) + tm.assert_series_equal(result, expected) + + s = Series(data) + result = s.mask(selection, box(item)) + tm.assert_series_equal(result, expected) + + +def test_where_inplace(): + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.copy() + + rs.where(cond, inplace=True) + tm.assert_series_equal(rs.dropna(), s[cond]) + tm.assert_series_equal(rs, s.where(cond)) + + rs = s.copy() + rs.where(cond, -s, inplace=True) + tm.assert_series_equal(rs, s.where(cond, -s)) + + +def test_where_dups(): + # GH 4550 + # where crashes with dups in index + s1 = Series(list(range(3))) + s2 = Series(list(range(3))) + comb = pd.concat([s1, s2]) + result = comb.where(comb < 2) + expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2]) + tm.assert_series_equal(result, expected) + + # GH 4548 + # inplace updating not working with dups + comb[comb < 1] = 5 + expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2]) + tm.assert_series_equal(comb, expected) + + comb[comb < 2] += 10 + expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2]) + tm.assert_series_equal(comb, expected) + + +def test_where_numeric_with_string(): + # GH 9280 + s = Series([1, 2, 3]) + w = s.where(s > 1, "X") + + assert not is_integer(w[0]) + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == "object" + + w = s.where(s > 1, ["X", "Y", "Z"]) + assert not is_integer(w[0]) + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == "object" + + w = s.where(s > 1, np.array(["X", "Y", "Z"])) + assert not is_integer(w[0]) + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == "object" + + +@pytest.mark.parametrize("dtype", ["timedelta64[ns]", "datetime64[ns]"]) +def test_where_datetimelike_coerce(dtype): + ser = Series([1, 2], dtype=dtype) + expected = Series([10, 10]) + mask = np.array([False, False]) + + rs = ser.where(mask, [10, 10]) + tm.assert_series_equal(rs, expected) + + rs = ser.where(mask, 10) + tm.assert_series_equal(rs, expected) + + rs = ser.where(mask, 10.0) + tm.assert_series_equal(rs, expected) + + rs = ser.where(mask, [10.0, 10.0]) + tm.assert_series_equal(rs, expected) + + rs = ser.where(mask, [10.0, np.nan]) + expected = Series([10, None], dtype="object") + tm.assert_series_equal(rs, expected) + + +def test_where_datetimetz(): + # GH 15701 + timestamps = ["2016-12-31 12:00:04+00:00", "2016-12-31 12:00:04.010000+00:00"] + ser = Series([Timestamp(t) for t in timestamps], dtype="datetime64[ns, UTC]") + rs = ser.where(Series([False, True])) + expected = Series([pd.NaT, ser[1]], dtype="datetime64[ns, UTC]") + tm.assert_series_equal(rs, expected) + + +def test_where_sparse(): + # GH#17198 make sure we dont get an AttributeError for sp_index + ser = Series(pd.arrays.SparseArray([1, 2])) + result = ser.where(ser >= 2, 0) + expected = Series(pd.arrays.SparseArray([0, 2])) + tm.assert_series_equal(result, expected) + + +def test_where_empty_series_and_empty_cond_having_non_bool_dtypes(): + # https://github.com/pandas-dev/pandas/issues/34592 + ser = Series([], dtype=float) + result = ser.where([]) + tm.assert_series_equal(result, ser) + + +def test_where_categorical(frame_or_series): + # https://github.com/pandas-dev/pandas/issues/18888 + exp = frame_or_series( + pd.Categorical(["A", "A", "B", "B", np.nan], categories=["A", "B", "C"]), + dtype="category", + ) + df = frame_or_series(["A", "A", "B", "B", "C"], dtype="category") + res = df.where(df != "C") + tm.assert_equal(exp, res) + + +def test_where_datetimelike_categorical(request, tz_naive_fixture): + # GH#37682 + tz = tz_naive_fixture + + dr = date_range("2001-01-01", periods=3, tz=tz)._with_freq(None) + lvals = pd.DatetimeIndex([dr[0], dr[1], pd.NaT]) + rvals = pd.Categorical([dr[0], pd.NaT, dr[2]]) + + mask = np.array([True, True, False]) + + # DatetimeIndex.where + res = lvals.where(mask, rvals) + tm.assert_index_equal(res, dr) + + # DatetimeArray.where + res = lvals._data._where(mask, rvals) + tm.assert_datetime_array_equal(res, dr._data) + + # Series.where + res = Series(lvals).where(mask, rvals) + tm.assert_series_equal(res, Series(dr)) + + # DataFrame.where + res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals)) + + tm.assert_frame_equal(res, pd.DataFrame(dr)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_xs.py b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_xs.py new file mode 100644 index 0000000..9a27778 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/indexing/test_xs.py @@ -0,0 +1,81 @@ +import numpy as np + +from pandas import ( + MultiIndex, + Series, + date_range, +) +import pandas._testing as tm + + +def test_xs_datetimelike_wrapping(): + # GH#31630 a case where we shouldn't wrap datetime64 in Timestamp + arr = date_range("2016-01-01", periods=3)._data._data + + ser = Series(arr, dtype=object) + for i in range(len(ser)): + ser.iloc[i] = arr[i] + assert ser.dtype == object + assert isinstance(ser[0], np.datetime64) + + result = ser.xs(0) + assert isinstance(result, np.datetime64) + + +class TestXSWithMultiIndex: + def test_xs_level_series(self, multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + ser = df["A"] + expected = ser[:, "two"] + result = df.xs("two", level=1)["A"] + tm.assert_series_equal(result, expected) + + def test_series_getitem_multiindex_xs_by_label(self): + # GH#5684 + idx = MultiIndex.from_tuples( + [("a", "one"), ("a", "two"), ("b", "one"), ("b", "two")] + ) + ser = Series([1, 2, 3, 4], index=idx) + return_value = ser.index.set_names(["L1", "L2"], inplace=True) + assert return_value is None + expected = Series([1, 3], index=["a", "b"]) + return_value = expected.index.set_names(["L1"], inplace=True) + assert return_value is None + + result = ser.xs("one", level="L2") + tm.assert_series_equal(result, expected) + + def test_series_getitem_multiindex_xs(xs): + # GH#6258 + dt = list(date_range("20130903", periods=3)) + idx = MultiIndex.from_product([list("AB"), dt]) + ser = Series([1, 3, 4, 1, 3, 4], index=idx) + expected = Series([1, 1], index=list("AB")) + + result = ser.xs("20130903", level=1) + tm.assert_series_equal(result, expected) + + def test_series_xs_droplevel_false(self): + # GH: 19056 + mi = MultiIndex.from_tuples( + [("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"] + ) + ser = Series([1, 1, 1], index=mi) + result = ser.xs("a", axis=0, drop_level=False) + expected = Series( + [1, 1], + index=MultiIndex.from_tuples( + [("a", "x"), ("a", "y")], names=["level1", "level2"] + ), + ) + tm.assert_series_equal(result, expected) + + def test_xs_key_as_list(self): + # GH#41760 + mi = MultiIndex.from_tuples([("a", "x")], names=["level1", "level2"]) + ser = Series([1], index=mi) + with tm.assert_produces_warning(FutureWarning): + ser.xs(["a", "x"], axis=0, drop_level=False) + + with tm.assert_produces_warning(FutureWarning): + ser.xs(["a"], axis=0, drop_level=False) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__init__.py new file mode 100644 index 0000000..bcb0d30 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__init__.py @@ -0,0 +1,7 @@ +""" +Test files dedicated to individual (stand-alone) Series methods + +Ideally these files/tests should correspond 1-to-1 with tests.frame.methods + +These may also present opportunities for sharing/de-duplicating test code. +""" diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..fe80462 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_align.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_align.cpython-38.pyc new file mode 100644 index 0000000..971c288 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_align.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_append.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_append.cpython-38.pyc new file mode 100644 index 0000000..b6c8941 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_append.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_argsort.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_argsort.cpython-38.pyc new file mode 100644 index 0000000..d8d5797 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_argsort.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_asof.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_asof.cpython-38.pyc new file mode 100644 index 0000000..8e4437d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_asof.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_astype.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_astype.cpython-38.pyc new file mode 100644 index 0000000..9452dff Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_astype.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_autocorr.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_autocorr.cpython-38.pyc new file mode 100644 index 0000000..ef36b66 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_autocorr.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_between.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_between.cpython-38.pyc new file mode 100644 index 0000000..0b6d800 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_between.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_clip.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_clip.cpython-38.pyc new file mode 100644 index 0000000..1109bab Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_clip.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_combine.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_combine.cpython-38.pyc new file mode 100644 index 0000000..fccb8e3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_combine.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_combine_first.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_combine_first.cpython-38.pyc new file mode 100644 index 0000000..cab9241 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_combine_first.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_compare.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_compare.cpython-38.pyc new file mode 100644 index 0000000..905abce Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_compare.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_convert.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_convert.cpython-38.pyc new file mode 100644 index 0000000..fcf9804 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_convert.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_convert_dtypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_convert_dtypes.cpython-38.pyc new file mode 100644 index 0000000..69633da Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_convert_dtypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_copy.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_copy.cpython-38.pyc new file mode 100644 index 0000000..02e5dbe Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_copy.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_count.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_count.cpython-38.pyc new file mode 100644 index 0000000..248c6ad Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_count.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_cov_corr.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_cov_corr.cpython-38.pyc new file mode 100644 index 0000000..9f09ce4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_cov_corr.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_describe.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_describe.cpython-38.pyc new file mode 100644 index 0000000..1c2a82f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_describe.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_diff.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_diff.cpython-38.pyc new file mode 100644 index 0000000..e03221b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_diff.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_drop.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_drop.cpython-38.pyc new file mode 100644 index 0000000..a3412ae Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_drop.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_drop_duplicates.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_drop_duplicates.cpython-38.pyc new file mode 100644 index 0000000..c9abab3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_drop_duplicates.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_dropna.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_dropna.cpython-38.pyc new file mode 100644 index 0000000..ea2b579 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_dropna.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_dtypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_dtypes.cpython-38.pyc new file mode 100644 index 0000000..2dae09c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_dtypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_duplicated.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_duplicated.cpython-38.pyc new file mode 100644 index 0000000..9cf1cf0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_duplicated.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_equals.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_equals.cpython-38.pyc new file mode 100644 index 0000000..23b089f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_equals.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_explode.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_explode.cpython-38.pyc new file mode 100644 index 0000000..db19f8d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_explode.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_fillna.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_fillna.cpython-38.pyc new file mode 100644 index 0000000..8c84824 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_fillna.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_get_numeric_data.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_get_numeric_data.cpython-38.pyc new file mode 100644 index 0000000..5527ed8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_get_numeric_data.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_head_tail.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_head_tail.cpython-38.pyc new file mode 100644 index 0000000..cf94d0c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_head_tail.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_infer_objects.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_infer_objects.cpython-38.pyc new file mode 100644 index 0000000..7a7c7f2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_infer_objects.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_interpolate.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_interpolate.cpython-38.pyc new file mode 100644 index 0000000..42d2b57 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_interpolate.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_is_monotonic.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_is_monotonic.cpython-38.pyc new file mode 100644 index 0000000..ff2f50a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_is_monotonic.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_is_unique.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_is_unique.cpython-38.pyc new file mode 100644 index 0000000..56ea469 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_is_unique.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_isin.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_isin.cpython-38.pyc new file mode 100644 index 0000000..e6fa9a0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_isin.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_isna.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_isna.cpython-38.pyc new file mode 100644 index 0000000..c211c8e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_isna.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_item.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_item.cpython-38.pyc new file mode 100644 index 0000000..ff9120a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_item.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_matmul.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_matmul.cpython-38.pyc new file mode 100644 index 0000000..5f65c27 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_matmul.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_nlargest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_nlargest.cpython-38.pyc new file mode 100644 index 0000000..c2ffa21 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_nlargest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_nunique.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_nunique.cpython-38.pyc new file mode 100644 index 0000000..177db54 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_nunique.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_pct_change.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_pct_change.cpython-38.pyc new file mode 100644 index 0000000..2bfe9ce Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_pct_change.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_pop.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_pop.cpython-38.pyc new file mode 100644 index 0000000..e383deb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_pop.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_quantile.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_quantile.cpython-38.pyc new file mode 100644 index 0000000..f0f2e3c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_quantile.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_rank.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_rank.cpython-38.pyc new file mode 100644 index 0000000..869b891 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_rank.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_reindex.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_reindex.cpython-38.pyc new file mode 100644 index 0000000..bf8a7be Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_reindex.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_reindex_like.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_reindex_like.cpython-38.pyc new file mode 100644 index 0000000..bf78821 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_reindex_like.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_rename.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_rename.cpython-38.pyc new file mode 100644 index 0000000..b178565 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_rename.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_rename_axis.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_rename_axis.cpython-38.pyc new file mode 100644 index 0000000..ddb7699 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_rename_axis.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_repeat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_repeat.cpython-38.pyc new file mode 100644 index 0000000..5b6490b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_repeat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_replace.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_replace.cpython-38.pyc new file mode 100644 index 0000000..4bb1edb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_replace.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_reset_index.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_reset_index.cpython-38.pyc new file mode 100644 index 0000000..654a403 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_reset_index.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_round.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_round.cpython-38.pyc new file mode 100644 index 0000000..f3da397 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_round.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_searchsorted.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_searchsorted.cpython-38.pyc new file mode 100644 index 0000000..db0add3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_searchsorted.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_set_name.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_set_name.cpython-38.pyc new file mode 100644 index 0000000..7d4d0cf Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_set_name.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_sort_index.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_sort_index.cpython-38.pyc new file mode 100644 index 0000000..bde92f7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_sort_index.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_sort_values.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_sort_values.cpython-38.pyc new file mode 100644 index 0000000..f7a6c69 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_sort_values.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_to_csv.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_to_csv.cpython-38.pyc new file mode 100644 index 0000000..3694b2e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_to_csv.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_to_dict.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_to_dict.cpython-38.pyc new file mode 100644 index 0000000..eebe6ba Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_to_dict.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_to_frame.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_to_frame.cpython-38.pyc new file mode 100644 index 0000000..eca61ef Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_to_frame.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_truncate.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_truncate.cpython-38.pyc new file mode 100644 index 0000000..d717617 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_truncate.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_tz_localize.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_tz_localize.cpython-38.pyc new file mode 100644 index 0000000..9cdbb1a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_tz_localize.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_unique.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_unique.cpython-38.pyc new file mode 100644 index 0000000..4b1d15c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_unique.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_unstack.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_unstack.cpython-38.pyc new file mode 100644 index 0000000..7ff6682 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_unstack.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_update.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_update.cpython-38.pyc new file mode 100644 index 0000000..43fbf49 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_update.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_value_counts.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_value_counts.cpython-38.pyc new file mode 100644 index 0000000..382fe0e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_value_counts.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_values.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_values.cpython-38.pyc new file mode 100644 index 0000000..a9fec87 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_values.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_view.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_view.cpython-38.pyc new file mode 100644 index 0000000..d6f1c76 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/__pycache__/test_view.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_align.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_align.py new file mode 100644 index 0000000..8769ab0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_align.py @@ -0,0 +1,186 @@ +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import ( + Series, + date_range, + period_range, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "first_slice,second_slice", + [ + [[2, None], [None, -5]], + [[None, 0], [None, -5]], + [[None, -5], [None, 0]], + [[None, 0], [None, 0]], + ], +) +@pytest.mark.parametrize("fill", [None, -1]) +def test_align(datetime_series, first_slice, second_slice, join_type, fill): + a = datetime_series[slice(*first_slice)] + b = datetime_series[slice(*second_slice)] + + aa, ab = a.align(b, join=join_type, fill_value=fill) + + join_index = a.index.join(b.index, how=join_type) + if fill is not None: + diff_a = aa.index.difference(join_index) + diff_b = ab.index.difference(join_index) + if len(diff_a) > 0: + assert (aa.reindex(diff_a) == fill).all() + if len(diff_b) > 0: + assert (ab.reindex(diff_b) == fill).all() + + ea = a.reindex(join_index) + eb = b.reindex(join_index) + + if fill is not None: + ea = ea.fillna(fill) + eb = eb.fillna(fill) + + tm.assert_series_equal(aa, ea) + tm.assert_series_equal(ab, eb) + assert aa.name == "ts" + assert ea.name == "ts" + assert ab.name == "ts" + assert eb.name == "ts" + + +@pytest.mark.parametrize( + "first_slice,second_slice", + [ + [[2, None], [None, -5]], + [[None, 0], [None, -5]], + [[None, -5], [None, 0]], + [[None, 0], [None, 0]], + ], +) +@pytest.mark.parametrize("method", ["pad", "bfill"]) +@pytest.mark.parametrize("limit", [None, 1]) +def test_align_fill_method( + datetime_series, first_slice, second_slice, join_type, method, limit +): + a = datetime_series[slice(*first_slice)] + b = datetime_series[slice(*second_slice)] + + aa, ab = a.align(b, join=join_type, method=method, limit=limit) + + join_index = a.index.join(b.index, how=join_type) + ea = a.reindex(join_index) + eb = b.reindex(join_index) + + ea = ea.fillna(method=method, limit=limit) + eb = eb.fillna(method=method, limit=limit) + + tm.assert_series_equal(aa, ea) + tm.assert_series_equal(ab, eb) + + +def test_align_nocopy(datetime_series): + b = datetime_series[:5].copy() + + # do copy + a = datetime_series.copy() + ra, _ = a.align(b, join="left") + ra[:5] = 5 + assert not (a[:5] == 5).any() + + # do not copy + a = datetime_series.copy() + ra, _ = a.align(b, join="left", copy=False) + ra[:5] = 5 + assert (a[:5] == 5).all() + + # do copy + a = datetime_series.copy() + b = datetime_series[:5].copy() + _, rb = a.align(b, join="right") + rb[:3] = 5 + assert not (b[:3] == 5).any() + + # do not copy + a = datetime_series.copy() + b = datetime_series[:5].copy() + _, rb = a.align(b, join="right", copy=False) + rb[:2] = 5 + assert (b[:2] == 5).all() + + +def test_align_same_index(datetime_series): + a, b = datetime_series.align(datetime_series, copy=False) + assert a.index is datetime_series.index + assert b.index is datetime_series.index + + a, b = datetime_series.align(datetime_series, copy=True) + assert a.index is not datetime_series.index + assert b.index is not datetime_series.index + + +def test_align_multiindex(): + # GH 10665 + + midx = pd.MultiIndex.from_product( + [range(2), range(3), range(2)], names=("a", "b", "c") + ) + idx = pd.Index(range(2), name="b") + s1 = Series(np.arange(12, dtype="int64"), index=midx) + s2 = Series(np.arange(2, dtype="int64"), index=idx) + + # these must be the same results (but flipped) + res1l, res1r = s1.align(s2, join="left") + res2l, res2r = s2.align(s1, join="right") + + expl = s1 + tm.assert_series_equal(expl, res1l) + tm.assert_series_equal(expl, res2r) + expr = Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx) + tm.assert_series_equal(expr, res1r) + tm.assert_series_equal(expr, res2l) + + res1l, res1r = s1.align(s2, join="right") + res2l, res2r = s2.align(s1, join="left") + + exp_idx = pd.MultiIndex.from_product( + [range(2), range(2), range(2)], names=("a", "b", "c") + ) + expl = Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx) + tm.assert_series_equal(expl, res1l) + tm.assert_series_equal(expl, res2r) + expr = Series([0, 0, 1, 1] * 2, index=exp_idx) + tm.assert_series_equal(expr, res1r) + tm.assert_series_equal(expr, res2l) + + +@pytest.mark.parametrize("method", ["backfill", "bfill", "pad", "ffill", None]) +def test_align_with_dataframe_method(method): + # GH31788 + ser = Series(range(3), index=range(3)) + df = pd.DataFrame(0.0, index=range(3), columns=range(3)) + + result_ser, result_df = ser.align(df, method=method) + tm.assert_series_equal(result_ser, ser) + tm.assert_frame_equal(result_df, df) + + +def test_align_dt64tzindex_mismatched_tzs(): + idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") + ser = Series(np.random.randn(len(idx1)), index=idx1) + ser_central = ser.tz_convert("US/Central") + # different timezones convert to UTC + + new1, new2 = ser.align(ser_central) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + +def test_align_periodindex(join_type): + rng = period_range("1/1/2000", "1/1/2010", freq="A") + ts = Series(np.random.randn(len(rng)), index=rng) + + # TODO: assert something? + ts.align(ts[::2], join=join_type) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_append.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_append.py new file mode 100644 index 0000000..6f8852a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_append.py @@ -0,0 +1,271 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestSeriesAppend: + def test_append_preserve_name(self, datetime_series): + result = datetime_series[:5]._append(datetime_series[5:]) + assert result.name == datetime_series.name + + def test_append(self, datetime_series, string_series, object_series): + appended_series = string_series._append(object_series) + for idx, value in appended_series.items(): + if idx in string_series.index: + assert value == string_series[idx] + elif idx in object_series.index: + assert value == object_series[idx] + else: + raise AssertionError("orphaned index!") + + msg = "Indexes have overlapping values:" + with pytest.raises(ValueError, match=msg): + datetime_series._append(datetime_series, verify_integrity=True) + + def test_append_many(self, datetime_series): + pieces = [datetime_series[:5], datetime_series[5:10], datetime_series[10:]] + + result = pieces[0]._append(pieces[1:]) + tm.assert_series_equal(result, datetime_series) + + def test_append_duplicates(self): + # GH 13677 + s1 = Series([1, 2, 3]) + s2 = Series([4, 5, 6]) + exp = Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2]) + tm.assert_series_equal(s1._append(s2), exp) + tm.assert_series_equal(pd.concat([s1, s2]), exp) + + # the result must have RangeIndex + exp = Series([1, 2, 3, 4, 5, 6]) + tm.assert_series_equal( + s1._append(s2, ignore_index=True), exp, check_index_type=True + ) + tm.assert_series_equal( + pd.concat([s1, s2], ignore_index=True), exp, check_index_type=True + ) + + msg = "Indexes have overlapping values:" + with pytest.raises(ValueError, match=msg): + s1._append(s2, verify_integrity=True) + with pytest.raises(ValueError, match=msg): + pd.concat([s1, s2], verify_integrity=True) + + def test_append_tuples(self): + # GH 28410 + s = Series([1, 2, 3]) + list_input = [s, s] + tuple_input = (s, s) + + expected = s._append(list_input) + result = s._append(tuple_input) + + tm.assert_series_equal(expected, result) + + def test_append_dataframe_raises(self): + # GH 31413 + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + + msg = "to_append should be a Series or list/tuple of Series, got DataFrame" + with pytest.raises(TypeError, match=msg): + df.A._append(df) + with pytest.raises(TypeError, match=msg): + df.A._append([df]) + + def test_append_raises_future_warning(self): + # GH#35407 + with tm.assert_produces_warning(FutureWarning): + Series([1, 2]).append(Series([3, 4])) + + +class TestSeriesAppendWithDatetimeIndex: + def test_append(self): + rng = date_range("5/8/2012 1:45", periods=10, freq="5T") + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + + result = ts._append(ts) + result_df = df._append(df) + ex_index = DatetimeIndex(np.tile(rng.values, 2)) + tm.assert_index_equal(result.index, ex_index) + tm.assert_index_equal(result_df.index, ex_index) + + appended = rng.append(rng) + tm.assert_index_equal(appended, ex_index) + + appended = rng.append([rng, rng]) + ex_index = DatetimeIndex(np.tile(rng.values, 3)) + tm.assert_index_equal(appended, ex_index) + + # different index names + rng1 = rng.copy() + rng2 = rng.copy() + rng1.name = "foo" + rng2.name = "bar" + + assert rng1.append(rng1).name == "foo" + assert rng1.append(rng2).name is None + + def test_append_tz(self): + # see gh-2938 + rng = date_range("5/8/2012 1:45", periods=10, freq="5T", tz="US/Eastern") + rng2 = date_range("5/8/2012 2:35", periods=10, freq="5T", tz="US/Eastern") + rng3 = date_range("5/8/2012 1:45", periods=20, freq="5T", tz="US/Eastern") + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts._append(ts2) + result_df = df._append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_append_tz_explicit_pytz(self): + # see gh-2938 + from pytz import timezone as timezone + + rng = date_range( + "5/8/2012 1:45", periods=10, freq="5T", tz=timezone("US/Eastern") + ) + rng2 = date_range( + "5/8/2012 2:35", periods=10, freq="5T", tz=timezone("US/Eastern") + ) + rng3 = date_range( + "5/8/2012 1:45", periods=20, freq="5T", tz=timezone("US/Eastern") + ) + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts._append(ts2) + result_df = df._append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_append_tz_dateutil(self): + # see gh-2938 + rng = date_range( + "5/8/2012 1:45", periods=10, freq="5T", tz="dateutil/US/Eastern" + ) + rng2 = date_range( + "5/8/2012 2:35", periods=10, freq="5T", tz="dateutil/US/Eastern" + ) + rng3 = date_range( + "5/8/2012 1:45", periods=20, freq="5T", tz="dateutil/US/Eastern" + ) + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts._append(ts2) + result_df = df._append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_series_append_aware(self): + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1._append(ser2) + + exp_index = DatetimeIndex( + ["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern", freq="H" + ) + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + assert ts_result.index.tz == rng1.tz + + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="UTC") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC") + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1._append(ser2) + + exp_index = DatetimeIndex( + ["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC", freq="H" + ) + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + utc = rng1.tz + assert utc == ts_result.index.tz + + # GH#7795 + # different tz coerces to object dtype, not UTC + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central") + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1._append(ser2) + exp_index = Index( + [ + Timestamp("1/1/2011 01:00", tz="US/Eastern"), + Timestamp("1/1/2011 02:00", tz="US/Central"), + ] + ) + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + + def test_series_append_aware_naive(self): + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") + ser1 = Series(np.random.randn(len(rng1)), index=rng1) + ser2 = Series(np.random.randn(len(rng2)), index=rng2) + ts_result = ser1._append(ser2) + + expected = ser1.index.astype(object).append(ser2.index.astype(object)) + assert ts_result.index.equals(expected) + + # mixed + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") + rng2 = range(100) + ser1 = Series(np.random.randn(len(rng1)), index=rng1) + ser2 = Series(np.random.randn(len(rng2)), index=rng2) + ts_result = ser1._append(ser2) + + expected = ser1.index.astype(object).append(ser2.index) + assert ts_result.index.equals(expected) + + def test_series_append_dst(self): + rng1 = date_range("1/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") + rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") + ser1 = Series([1, 2, 3], index=rng1) + ser2 = Series([10, 11, 12], index=rng2) + ts_result = ser1._append(ser2) + + exp_index = DatetimeIndex( + [ + "2016-01-01 01:00", + "2016-01-01 02:00", + "2016-01-01 03:00", + "2016-08-01 01:00", + "2016-08-01 02:00", + "2016-08-01 03:00", + ], + tz="US/Eastern", + ) + exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) + tm.assert_series_equal(ts_result, exp) + assert ts_result.index.tz == rng1.tz diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_argsort.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_argsort.py new file mode 100644 index 0000000..1fbc9ed --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_argsort.py @@ -0,0 +1,67 @@ +import numpy as np +import pytest + +from pandas import ( + Series, + Timestamp, + isna, +) +import pandas._testing as tm + + +class TestSeriesArgsort: + def _check_accum_op(self, name, ser, check_dtype=True): + func = getattr(np, name) + tm.assert_numpy_array_equal( + func(ser).values, func(np.array(ser)), check_dtype=check_dtype + ) + + # with missing values + ts = ser.copy() + ts[::2] = np.NaN + + result = func(ts)[1::2] + expected = func(np.array(ts.dropna())) + + tm.assert_numpy_array_equal(result.values, expected, check_dtype=False) + + def test_argsort(self, datetime_series): + self._check_accum_op("argsort", datetime_series, check_dtype=False) + argsorted = datetime_series.argsort() + assert issubclass(argsorted.dtype.type, np.integer) + + # GH#2967 (introduced bug in 0.11-dev I think) + s = Series([Timestamp(f"201301{i:02d}") for i in range(1, 6)]) + assert s.dtype == "datetime64[ns]" + shifted = s.shift(-1) + assert shifted.dtype == "datetime64[ns]" + assert isna(shifted[4]) + + result = s.argsort() + expected = Series(range(5), dtype=np.intp) + tm.assert_series_equal(result, expected) + + result = shifted.argsort() + expected = Series(list(range(4)) + [-1], dtype=np.intp) + tm.assert_series_equal(result, expected) + + def test_argsort_stable(self): + s = Series(np.random.randint(0, 100, size=10000)) + mindexer = s.argsort(kind="mergesort") + qindexer = s.argsort() + + mexpected = np.argsort(s.values, kind="mergesort") + qexpected = np.argsort(s.values, kind="quicksort") + + tm.assert_series_equal(mindexer.astype(np.intp), Series(mexpected)) + tm.assert_series_equal(qindexer.astype(np.intp), Series(qexpected)) + msg = ( + r"ndarray Expected type , " + r"found instead" + ) + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(qindexer, mindexer) + + def test_argsort_preserve_name(self, datetime_series): + result = datetime_series.argsort() + assert result.name == datetime_series.name diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_asof.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_asof.py new file mode 100644 index 0000000..7ede868 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_asof.py @@ -0,0 +1,207 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import IncompatibleFrequency + +from pandas import ( + DatetimeIndex, + Series, + Timestamp, + date_range, + isna, + notna, + offsets, +) +import pandas._testing as tm + + +class TestSeriesAsof: + def test_asof_nanosecond_index_access(self): + ts = Timestamp("20130101").value + dti = DatetimeIndex([ts + 50 + i for i in range(100)]) + ser = Series(np.random.randn(100), index=dti) + + first_value = ser.asof(ser.index[0]) + + # this used to not work bc parsing was done by dateutil that didn't + # handle nanoseconds + assert first_value == ser["2013-01-01 00:00:00.000000050+0000"] + + expected_ts = np.datetime64("2013-01-01 00:00:00.000000050", "ns") + assert first_value == ser[Timestamp(expected_ts)] + + def test_basic(self): + + # array or list or dates + N = 50 + rng = date_range("1/1/1990", periods=N, freq="53s") + ts = Series(np.random.randn(N), index=rng) + ts.iloc[15:30] = np.nan + dates = date_range("1/1/1990", periods=N * 3, freq="25s") + + result = ts.asof(dates) + assert notna(result).all() + lb = ts.index[14] + ub = ts.index[30] + + result = ts.asof(list(dates)) + assert notna(result).all() + lb = ts.index[14] + ub = ts.index[30] + + mask = (result.index >= lb) & (result.index < ub) + rs = result[mask] + assert (rs == ts[lb]).all() + + val = result[result.index[result.index >= ub][0]] + assert ts[ub] == val + + def test_scalar(self): + + N = 30 + rng = date_range("1/1/1990", periods=N, freq="53s") + ts = Series(np.arange(N), index=rng) + ts.iloc[5:10] = np.NaN + ts.iloc[15:20] = np.NaN + + val1 = ts.asof(ts.index[7]) + val2 = ts.asof(ts.index[19]) + + assert val1 == ts[4] + assert val2 == ts[14] + + # accepts strings + val1 = ts.asof(str(ts.index[7])) + assert val1 == ts[4] + + # in there + result = ts.asof(ts.index[3]) + assert result == ts[3] + + # no as of value + d = ts.index[0] - offsets.BDay() + assert np.isnan(ts.asof(d)) + + def test_with_nan(self): + # basic asof test + rng = date_range("1/1/2000", "1/2/2000", freq="4h") + s = Series(np.arange(len(rng)), index=rng) + r = s.resample("2h").mean() + + result = r.asof(r.index) + expected = Series( + [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.0], + index=date_range("1/1/2000", "1/2/2000", freq="2h"), + ) + tm.assert_series_equal(result, expected) + + r.iloc[3:5] = np.nan + result = r.asof(r.index) + expected = Series( + [0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.0], + index=date_range("1/1/2000", "1/2/2000", freq="2h"), + ) + tm.assert_series_equal(result, expected) + + r.iloc[-3:] = np.nan + result = r.asof(r.index) + expected = Series( + [0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.0], + index=date_range("1/1/2000", "1/2/2000", freq="2h"), + ) + tm.assert_series_equal(result, expected) + + def test_periodindex(self): + from pandas import ( + PeriodIndex, + period_range, + ) + + # array or list or dates + N = 50 + rng = period_range("1/1/1990", periods=N, freq="H") + ts = Series(np.random.randn(N), index=rng) + ts.iloc[15:30] = np.nan + dates = date_range("1/1/1990", periods=N * 3, freq="37min") + + result = ts.asof(dates) + assert notna(result).all() + lb = ts.index[14] + ub = ts.index[30] + + result = ts.asof(list(dates)) + assert notna(result).all() + lb = ts.index[14] + ub = ts.index[30] + + pix = PeriodIndex(result.index.values, freq="H") + mask = (pix >= lb) & (pix < ub) + rs = result[mask] + assert (rs == ts[lb]).all() + + ts.iloc[5:10] = np.nan + ts.iloc[15:20] = np.nan + + val1 = ts.asof(ts.index[7]) + val2 = ts.asof(ts.index[19]) + + assert val1 == ts[4] + assert val2 == ts[14] + + # accepts strings + val1 = ts.asof(str(ts.index[7])) + assert val1 == ts[4] + + # in there + assert ts.asof(ts.index[3]) == ts[3] + + # no as of value + d = ts.index[0].to_timestamp() - offsets.BDay() + assert isna(ts.asof(d)) + + # Mismatched freq + msg = "Input has different freq" + with pytest.raises(IncompatibleFrequency, match=msg): + ts.asof(rng.asfreq("D")) + + def test_errors(self): + + s = Series( + [1, 2, 3], + index=[Timestamp("20130101"), Timestamp("20130103"), Timestamp("20130102")], + ) + + # non-monotonic + assert not s.index.is_monotonic + with pytest.raises(ValueError, match="requires a sorted index"): + s.asof(s.index[0]) + + # subset with Series + N = 10 + rng = date_range("1/1/1990", periods=N, freq="53s") + s = Series(np.random.randn(N), index=rng) + with pytest.raises(ValueError, match="not valid for Series"): + s.asof(s.index[0], subset="foo") + + def test_all_nans(self): + # GH 15713 + # series is all nans + + # testing non-default indexes + N = 50 + rng = date_range("1/1/1990", periods=N, freq="53s") + + dates = date_range("1/1/1990", periods=N * 3, freq="25s") + result = Series(np.nan, index=rng).asof(dates) + expected = Series(np.nan, index=dates) + tm.assert_series_equal(result, expected) + + # testing scalar input + date = date_range("1/1/1990", periods=N * 3, freq="25s")[0] + result = Series(np.nan, index=rng).asof(date) + assert isna(result) + + # test name is propagated + result = Series(np.nan, index=[1, 2, 3, 4], name="test").asof([4, 5]) + expected = Series(np.nan, index=[4, 5], name="test") + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_astype.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_astype.py new file mode 100644 index 0000000..8197722 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_astype.py @@ -0,0 +1,586 @@ +from datetime import ( + datetime, + timedelta, +) +from importlib import reload +import string +import sys + +import numpy as np +import pytest + +from pandas._libs.tslibs import iNaT +import pandas.util._test_decorators as td + +from pandas import ( + NA, + Categorical, + CategoricalDtype, + DatetimeTZDtype, + Index, + Interval, + NaT, + Series, + Timedelta, + Timestamp, + cut, + date_range, +) +import pandas._testing as tm + + +class TestAstypeAPI: + def test_arg_for_errors_in_astype(self): + # see GH#14878 + ser = Series([1, 2, 3]) + + msg = ( + r"Expected value of kwarg 'errors' to be one of \['raise', " + r"'ignore'\]\. Supplied value is 'False'" + ) + with pytest.raises(ValueError, match=msg): + ser.astype(np.float64, errors=False) + + ser.astype(np.int8, errors="raise") + + @pytest.mark.parametrize("dtype_class", [dict, Series]) + def test_astype_dict_like(self, dtype_class): + # see GH#7271 + ser = Series(range(0, 10, 2), name="abc") + + dt1 = dtype_class({"abc": str}) + result = ser.astype(dt1) + expected = Series(["0", "2", "4", "6", "8"], name="abc") + tm.assert_series_equal(result, expected) + + dt2 = dtype_class({"abc": "float64"}) + result = ser.astype(dt2) + expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc") + tm.assert_series_equal(result, expected) + + dt3 = dtype_class({"abc": str, "def": str}) + msg = ( + "Only the Series name can be used for the key in Series dtype " + r"mappings\." + ) + with pytest.raises(KeyError, match=msg): + ser.astype(dt3) + + dt4 = dtype_class({0: str}) + with pytest.raises(KeyError, match=msg): + ser.astype(dt4) + + # GH#16717 + # if dtypes provided is empty, it should error + if dtype_class is Series: + dt5 = dtype_class({}, dtype=object) + else: + dt5 = dtype_class({}) + + with pytest.raises(KeyError, match=msg): + ser.astype(dt5) + + +class TestAstype: + @pytest.mark.parametrize("dtype", np.typecodes["All"]) + def test_astype_empty_constructor_equality(self, dtype): + # see GH#15524 + + if dtype not in ( + "S", + "V", # poor support (if any) currently + "M", + "m", # Generic timestamps raise a ValueError. Already tested. + ): + init_empty = Series([], dtype=dtype) + with tm.assert_produces_warning(FutureWarning): + as_type_empty = Series([]).astype(dtype) + tm.assert_series_equal(init_empty, as_type_empty) + + @pytest.mark.parametrize("dtype", [str, np.str_]) + @pytest.mark.parametrize( + "series", + [ + Series([string.digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), + Series([string.digits * 10, tm.rands(63), tm.rands(64), np.nan, 1.0]), + ], + ) + def test_astype_str_map(self, dtype, series): + # see GH#4405 + result = series.astype(dtype) + expected = series.map(str) + tm.assert_series_equal(result, expected) + + def test_astype_float_to_period(self): + result = Series([np.nan]).astype("period[D]") + expected = Series([NaT], dtype="period[D]") + tm.assert_series_equal(result, expected) + + def test_astype_no_pandas_dtype(self): + # https://github.com/pandas-dev/pandas/pull/24866 + ser = Series([1, 2], dtype="int64") + # Don't have PandasDtype in the public API, so we use `.array.dtype`, + # which is a PandasDtype. + result = ser.astype(ser.array.dtype) + tm.assert_series_equal(result, ser) + + @pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64]) + def test_astype_generic_timestamp_no_frequency(self, dtype, request): + # see GH#15524, GH#15987 + data = [1] + ser = Series(data) + + if np.dtype(dtype).name not in ["timedelta64", "datetime64"]: + mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit") + request.node.add_marker(mark) + + msg = ( + fr"The '{dtype.__name__}' dtype has no unit\. " + fr"Please pass in '{dtype.__name__}\[ns\]' instead." + ) + with pytest.raises(ValueError, match=msg): + ser.astype(dtype) + + def test_astype_dt64_to_str(self): + # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex + dti = date_range("2012-01-01", periods=3) + result = Series(dti).astype(str) + expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object) + tm.assert_series_equal(result, expected) + + def test_astype_dt64tz_to_str(self): + # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex + dti_tz = date_range("2012-01-01", periods=3, tz="US/Eastern") + result = Series(dti_tz).astype(str) + expected = Series( + [ + "2012-01-01 00:00:00-05:00", + "2012-01-02 00:00:00-05:00", + "2012-01-03 00:00:00-05:00", + ], + dtype=object, + ) + tm.assert_series_equal(result, expected) + + def test_astype_datetime(self): + ser = Series(iNaT, dtype="M8[ns]", index=range(5)) + + ser = ser.astype("O") + assert ser.dtype == np.object_ + + ser = Series([datetime(2001, 1, 2, 0, 0)]) + + ser = ser.astype("O") + assert ser.dtype == np.object_ + + ser = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)]) + + ser[1] = np.nan + assert ser.dtype == "M8[ns]" + + ser = ser.astype("O") + assert ser.dtype == np.object_ + + def test_astype_datetime64tz(self): + ser = Series(date_range("20130101", periods=3, tz="US/Eastern")) + + # astype + result = ser.astype(object) + expected = Series(ser.astype(object), dtype=object) + tm.assert_series_equal(result, expected) + + result = Series(ser.values).dt.tz_localize("UTC").dt.tz_convert(ser.dt.tz) + tm.assert_series_equal(result, ser) + + # astype - object, preserves on construction + result = Series(ser.astype(object)) + expected = ser.astype(object) + tm.assert_series_equal(result, expected) + + # astype - datetime64[ns, tz] + with tm.assert_produces_warning(FutureWarning): + # dt64->dt64tz astype deprecated + result = Series(ser.values).astype("datetime64[ns, US/Eastern]") + tm.assert_series_equal(result, ser) + + with tm.assert_produces_warning(FutureWarning): + # dt64->dt64tz astype deprecated + result = Series(ser.values).astype(ser.dtype) + tm.assert_series_equal(result, ser) + + result = ser.astype("datetime64[ns, CET]") + expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET")) + tm.assert_series_equal(result, expected) + + def test_astype_str_cast_dt64(self): + # see GH#9757 + ts = Series([Timestamp("2010-01-04 00:00:00")]) + res = ts.astype(str) + + expected = Series(["2010-01-04"]) + tm.assert_series_equal(res, expected) + + ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")]) + res = ts.astype(str) + + expected = Series(["2010-01-04 00:00:00-05:00"]) + tm.assert_series_equal(res, expected) + + def test_astype_str_cast_td64(self): + # see GH#9757 + + td = Series([Timedelta(1, unit="d")]) + ser = td.astype(str) + + expected = Series(["1 days"]) + tm.assert_series_equal(ser, expected) + + def test_dt64_series_astype_object(self): + dt64ser = Series(date_range("20130101", periods=3)) + result = dt64ser.astype(object) + assert isinstance(result.iloc[0], datetime) + assert result.dtype == np.object_ + + def test_td64_series_astype_object(self): + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]") + result = tdser.astype(object) + assert isinstance(result.iloc[0], timedelta) + assert result.dtype == np.object_ + + @pytest.mark.parametrize( + "data, dtype", + [ + (["x", "y", "z"], "string[python]"), + pytest.param( + ["x", "y", "z"], + "string[pyarrow]", + marks=td.skip_if_no("pyarrow", min_version="1.0.0"), + ), + (["x", "y", "z"], "category"), + (3 * [Timestamp("2020-01-01", tz="UTC")], None), + (3 * [Interval(0, 1)], None), + ], + ) + @pytest.mark.parametrize("errors", ["raise", "ignore"]) + def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors): + # https://github.com/pandas-dev/pandas/issues/35471 + ser = Series(data, dtype=dtype) + if errors == "ignore": + expected = ser + result = ser.astype(float, errors="ignore") + tm.assert_series_equal(result, expected) + else: + msg = "(Cannot cast)|(could not convert)" + with pytest.raises((ValueError, TypeError), match=msg): + ser.astype(float, errors=errors) + + @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) + def test_astype_from_float_to_str(self, dtype): + # https://github.com/pandas-dev/pandas/issues/36451 + ser = Series([0.1], dtype=dtype) + result = ser.astype(str) + expected = Series(["0.1"]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "value, string_value", + [ + (None, "None"), + (np.nan, "nan"), + (NA, ""), + ], + ) + def test_astype_to_str_preserves_na(self, value, string_value): + # https://github.com/pandas-dev/pandas/issues/36904 + ser = Series(["a", "b", value], dtype=object) + result = ser.astype(str) + expected = Series(["a", "b", string_value], dtype=object) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"]) + def test_astype(self, dtype): + ser = Series(np.random.randn(5), name="foo") + as_typed = ser.astype(dtype) + + assert as_typed.dtype == dtype + assert as_typed.name == ser.name + + @pytest.mark.parametrize("value", [np.nan, np.inf]) + @pytest.mark.parametrize("dtype", [np.int32, np.int64]) + def test_astype_cast_nan_inf_int(self, dtype, value): + # gh-14265: check NaN and inf raise error when converting to int + msg = "Cannot convert non-finite values \\(NA or inf\\) to integer" + ser = Series([value]) + + with pytest.raises(ValueError, match=msg): + ser.astype(dtype) + + @pytest.mark.parametrize("dtype", [int, np.int8, np.int64]) + def test_astype_cast_object_int_fail(self, dtype): + arr = Series(["car", "house", "tree", "1"]) + msg = r"invalid literal for int\(\) with base 10: 'car'" + with pytest.raises(ValueError, match=msg): + arr.astype(dtype) + + def test_astype_cast_object_int(self): + arr = Series(["1", "2", "3", "4"], dtype=object) + result = arr.astype(int) + + tm.assert_series_equal(result, Series(np.arange(1, 5))) + + def test_astype_unicode(self): + # see GH#7758: A bit of magic is required to set + # default encoding to utf-8 + digits = string.digits + test_series = [ + Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), + Series(["データーサイエンス、お前はもう死んでいる"]), + ] + + former_encoding = None + + if sys.getdefaultencoding() == "utf-8": + test_series.append(Series(["野菜食べないとやばい".encode()])) + + for ser in test_series: + res = ser.astype("unicode") + expec = ser.map(str) + tm.assert_series_equal(res, expec) + + # Restore the former encoding + if former_encoding is not None and former_encoding != "utf-8": + reload(sys) + sys.setdefaultencoding(former_encoding) + + def test_astype_bytes(self): + # GH#39474 + result = Series(["foo", "bar", "baz"]).astype(bytes) + assert result.dtypes == np.dtype("S3") + + def test_astype_nan_to_bool(self): + # GH#43018 + ser = Series(np.nan, dtype="object") + result = ser.astype("bool") + expected = Series(True, dtype="bool") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + tm.ALL_INT_EA_DTYPES + tm.FLOAT_EA_DTYPES, + ) + def test_astype_ea_to_datetimetzdtype(self, dtype): + # GH37553 + result = Series([4, 0, 9], dtype=dtype).astype(DatetimeTZDtype(tz="US/Pacific")) + expected = Series( + { + 0: Timestamp("1969-12-31 16:00:00.000000004-08:00", tz="US/Pacific"), + 1: Timestamp("1969-12-31 16:00:00.000000000-08:00", tz="US/Pacific"), + 2: Timestamp("1969-12-31 16:00:00.000000009-08:00", tz="US/Pacific"), + } + ) + + if dtype in tm.FLOAT_EA_DTYPES: + expected = Series( + { + 0: Timestamp( + "1970-01-01 00:00:00.000000004-08:00", tz="US/Pacific" + ), + 1: Timestamp( + "1970-01-01 00:00:00.000000000-08:00", tz="US/Pacific" + ), + 2: Timestamp( + "1970-01-01 00:00:00.000000009-08:00", tz="US/Pacific" + ), + } + ) + + tm.assert_series_equal(result, expected) + + def test_astype_retain_Attrs(self, any_numpy_dtype): + # GH#44414 + ser = Series([0, 1, 2, 3]) + ser.attrs["Location"] = "Michigan" + + result = ser.astype(any_numpy_dtype).attrs + expected = ser.attrs + + tm.assert_dict_equal(expected, result) + + +class TestAstypeString: + @pytest.mark.parametrize( + "data, dtype", + [ + ([True, NA], "boolean"), + (["A", NA], "category"), + (["2020-10-10", "2020-10-10"], "datetime64[ns]"), + (["2020-10-10", "2020-10-10", NaT], "datetime64[ns]"), + ( + ["2012-01-01 00:00:00-05:00", NaT], + "datetime64[ns, US/Eastern]", + ), + ([1, None], "UInt16"), + (["1/1/2021", "2/1/2021"], "period[M]"), + (["1/1/2021", "2/1/2021", NaT], "period[M]"), + (["1 Day", "59 Days", NaT], "timedelta64[ns]"), + # currently no way to parse IntervalArray from a list of strings + ], + ) + def test_astype_string_to_extension_dtype_roundtrip( + self, data, dtype, request, nullable_string_dtype + ): + if dtype == "boolean" or ( + dtype in ("period[M]", "datetime64[ns]", "timedelta64[ns]") and NaT in data + ): + mark = pytest.mark.xfail( + reason="TODO StringArray.astype() with missing values #GH40566" + ) + request.node.add_marker(mark) + # GH-40351 + ser = Series(data, dtype=dtype) + + # Note: just passing .astype(dtype) fails for dtype="category" + # with bc ser.dtype.categories will be object dtype whereas + # result.dtype.categories will have string dtype + result = ser.astype(nullable_string_dtype).astype(ser.dtype) + tm.assert_series_equal(result, ser) + + +class TestAstypeCategorical: + def test_astype_categorical_to_other(self): + cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) + ser = Series(np.random.RandomState(0).randint(0, 10000, 100)).sort_values() + ser = cut(ser, range(0, 10500, 500), right=False, labels=cat) + + expected = ser + tm.assert_series_equal(ser.astype("category"), expected) + tm.assert_series_equal(ser.astype(CategoricalDtype()), expected) + msg = r"Cannot cast object dtype to float64" + with pytest.raises(ValueError, match=msg): + ser.astype("float64") + + cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])) + exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"]) + tm.assert_series_equal(cat.astype("str"), exp) + s2 = Series(Categorical(["1", "2", "3", "4"])) + exp2 = Series([1, 2, 3, 4]).astype("int") + tm.assert_series_equal(s2.astype("int"), exp2) + + # object don't sort correctly, so just compare that we have the same + # values + def cmp(a, b): + tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b))) + + expected = Series(np.array(ser.values), name="value_group") + cmp(ser.astype("object"), expected) + cmp(ser.astype(np.object_), expected) + + # array conversion + tm.assert_almost_equal(np.array(ser), np.array(ser.values)) + + tm.assert_series_equal(ser.astype("category"), ser) + tm.assert_series_equal(ser.astype(CategoricalDtype()), ser) + + roundtrip_expected = ser.cat.set_categories( + ser.cat.categories.sort_values() + ).cat.remove_unused_categories() + result = ser.astype("object").astype("category") + tm.assert_series_equal(result, roundtrip_expected) + result = ser.astype("object").astype(CategoricalDtype()) + tm.assert_series_equal(result, roundtrip_expected) + + def test_astype_categorical_invalid_conversions(self): + # invalid conversion (these are NOT a dtype) + cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) + ser = Series(np.random.randint(0, 10000, 100)).sort_values() + ser = cut(ser, range(0, 10500, 500), right=False, labels=cat) + + msg = ( + "dtype '' " + "not understood" + ) + with pytest.raises(TypeError, match=msg): + ser.astype(Categorical) + with pytest.raises(TypeError, match=msg): + ser.astype("object").astype(Categorical) + + def test_astype_categoricaldtype(self): + ser = Series(["a", "b", "a"]) + result = ser.astype(CategoricalDtype(["a", "b"], ordered=True)) + expected = Series(Categorical(["a", "b", "a"], ordered=True)) + tm.assert_series_equal(result, expected) + + result = ser.astype(CategoricalDtype(["a", "b"], ordered=False)) + expected = Series(Categorical(["a", "b", "a"], ordered=False)) + tm.assert_series_equal(result, expected) + + result = ser.astype(CategoricalDtype(["a", "b", "c"], ordered=False)) + expected = Series( + Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False) + ) + tm.assert_series_equal(result, expected) + tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"])) + + @pytest.mark.parametrize("name", [None, "foo"]) + @pytest.mark.parametrize("dtype_ordered", [True, False]) + @pytest.mark.parametrize("series_ordered", [True, False]) + def test_astype_categorical_to_categorical( + self, name, dtype_ordered, series_ordered + ): + # GH#10696, GH#18593 + s_data = list("abcaacbab") + s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered) + ser = Series(s_data, dtype=s_dtype, name=name) + + # unspecified categories + dtype = CategoricalDtype(ordered=dtype_ordered) + result = ser.astype(dtype) + exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered) + expected = Series(s_data, name=name, dtype=exp_dtype) + tm.assert_series_equal(result, expected) + + # different categories + dtype = CategoricalDtype(list("adc"), dtype_ordered) + result = ser.astype(dtype) + expected = Series(s_data, name=name, dtype=dtype) + tm.assert_series_equal(result, expected) + + if dtype_ordered is False: + # not specifying ordered, so only test once + expected = ser + result = ser.astype("category") + tm.assert_series_equal(result, expected) + + def test_astype_bool_missing_to_categorical(self): + # GH-19182 + ser = Series([True, False, np.nan]) + assert ser.dtypes == np.object_ + + result = ser.astype(CategoricalDtype(categories=[True, False])) + expected = Series(Categorical([True, False, np.nan], categories=[True, False])) + tm.assert_series_equal(result, expected) + + def test_astype_categories_raises(self): + # deprecated GH#17636, removed in GH#27141 + ser = Series(["a", "b", "a"]) + with pytest.raises(TypeError, match="got an unexpected"): + ser.astype("category", categories=["a", "b"], ordered=True) + + @pytest.mark.parametrize("items", [["a", "b", "c", "a"], [1, 2, 3, 1]]) + def test_astype_from_categorical(self, items): + ser = Series(items) + exp = Series(Categorical(items)) + res = ser.astype("category") + tm.assert_series_equal(res, exp) + + def test_astype_from_categorical_with_keywords(self): + # with keywords + lst = ["a", "b", "c", "a"] + ser = Series(lst) + exp = Series(Categorical(lst, ordered=True)) + res = ser.astype(CategoricalDtype(None, ordered=True)) + tm.assert_series_equal(res, exp) + + exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True)) + res = ser.astype(CategoricalDtype(list("abcdef"), ordered=True)) + tm.assert_series_equal(res, exp) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_autocorr.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_autocorr.py new file mode 100644 index 0000000..05e3540 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_autocorr.py @@ -0,0 +1,30 @@ +import numpy as np + + +class TestAutoCorr: + def test_autocorr(self, datetime_series): + # Just run the function + corr1 = datetime_series.autocorr() + + # Now run it with the lag parameter + corr2 = datetime_series.autocorr(lag=1) + + # corr() with lag needs Series of at least length 2 + if len(datetime_series) <= 2: + assert np.isnan(corr1) + assert np.isnan(corr2) + else: + assert corr1 == corr2 + + # Choose a random lag between 1 and length of Series - 2 + # and compare the result with the Series corr() function + n = 1 + np.random.randint(max(1, len(datetime_series) - 2)) + corr1 = datetime_series.corr(datetime_series.shift(n)) + corr2 = datetime_series.autocorr(lag=n) + + # corr() with lag needs Series of at least length 2 + if len(datetime_series) <= 2: + assert np.isnan(corr1) + assert np.isnan(corr2) + else: + assert corr1 == corr2 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_between.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_between.py new file mode 100644 index 0000000..d810176 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_between.py @@ -0,0 +1,84 @@ +import numpy as np +import pytest + +from pandas import ( + Series, + bdate_range, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestBetween: + def test_between(self): + series = Series(date_range("1/1/2000", periods=10)) + left, right = series[[2, 7]] + + result = series.between(left, right) + expected = (series >= left) & (series <= right) + tm.assert_series_equal(result, expected) + + def test_between_datetime_object_dtype(self): + ser = Series(bdate_range("1/1/2000", periods=20).astype(object)) + ser[::2] = np.nan + + result = ser[ser.between(ser[3], ser[17])] + expected = ser[3:18].dropna() + tm.assert_series_equal(result, expected) + + result = ser[ser.between(ser[3], ser[17], inclusive="neither")] + expected = ser[5:16].dropna() + tm.assert_series_equal(result, expected) + + def test_between_period_values(self): + ser = Series(period_range("2000-01-01", periods=10, freq="D")) + left, right = ser[[2, 7]] + result = ser.between(left, right) + expected = (ser >= left) & (ser <= right) + tm.assert_series_equal(result, expected) + + def test_between_inclusive_string(self): # :issue:`40628` + series = Series(date_range("1/1/2000", periods=10)) + left, right = series[[2, 7]] + + result = series.between(left, right, inclusive="both") + expected = (series >= left) & (series <= right) + tm.assert_series_equal(result, expected) + + result = series.between(left, right, inclusive="left") + expected = (series >= left) & (series < right) + tm.assert_series_equal(result, expected) + + result = series.between(left, right, inclusive="right") + expected = (series > left) & (series <= right) + tm.assert_series_equal(result, expected) + + result = series.between(left, right, inclusive="neither") + expected = (series > left) & (series < right) + tm.assert_series_equal(result, expected) + + def test_between_error_args(self): # :issue:`40628` + series = Series(date_range("1/1/2000", periods=10)) + left, right = series[[2, 7]] + + value_error_msg = ( + "Inclusive has to be either string of 'both'," + "'left', 'right', or 'neither'." + ) + + with pytest.raises(ValueError, match=value_error_msg): + series = Series(date_range("1/1/2000", periods=10)) + series.between(left, right, inclusive="yes") + + def test_between_inclusive_warning(self): + series = Series(date_range("1/1/2000", periods=10)) + left, right = series[[2, 7]] + with tm.assert_produces_warning(FutureWarning): + result = series.between(left, right, inclusive=False) + expected = (series > left) & (series < right) + tm.assert_series_equal(result, expected) + with tm.assert_produces_warning(FutureWarning): + result = series.between(left, right, inclusive=True) + expected = (series >= left) & (series <= right) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_clip.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_clip.py new file mode 100644 index 0000000..bc6d5ae --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_clip.py @@ -0,0 +1,151 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Series, + Timestamp, + isna, + notna, +) +import pandas._testing as tm + + +class TestSeriesClip: + def test_clip(self, datetime_series): + val = datetime_series.median() + + assert datetime_series.clip(lower=val).min() == val + assert datetime_series.clip(upper=val).max() == val + + result = datetime_series.clip(-0.5, 0.5) + expected = np.clip(datetime_series, -0.5, 0.5) + tm.assert_series_equal(result, expected) + assert isinstance(expected, Series) + + def test_clip_types_and_nulls(self): + + sers = [ + Series([np.nan, 1.0, 2.0, 3.0]), + Series([None, "a", "b", "c"]), + Series(pd.to_datetime([np.nan, 1, 2, 3], unit="D")), + ] + + for s in sers: + thresh = s[2] + lower = s.clip(lower=thresh) + upper = s.clip(upper=thresh) + assert lower[notna(lower)].min() == thresh + assert upper[notna(upper)].max() == thresh + assert list(isna(s)) == list(isna(lower)) + assert list(isna(s)) == list(isna(upper)) + + def test_series_clipping_with_na_values(self, any_numeric_ea_dtype, nulls_fixture): + # Ensure that clipping method can handle NA values with out failing + # GH#40581 + + if nulls_fixture is pd.NaT: + # constructor will raise, see + # test_constructor_mismatched_null_nullable_dtype + return + + ser = Series([nulls_fixture, 1.0, 3.0], dtype=any_numeric_ea_dtype) + s_clipped_upper = ser.clip(upper=2.0) + s_clipped_lower = ser.clip(lower=2.0) + + expected_upper = Series([nulls_fixture, 1.0, 2.0], dtype=any_numeric_ea_dtype) + expected_lower = Series([nulls_fixture, 2.0, 3.0], dtype=any_numeric_ea_dtype) + + tm.assert_series_equal(s_clipped_upper, expected_upper) + tm.assert_series_equal(s_clipped_lower, expected_lower) + + def test_clip_with_na_args(self): + """Should process np.nan argument as None""" + # GH#17276 + s = Series([1, 2, 3]) + + tm.assert_series_equal(s.clip(np.nan), Series([1, 2, 3])) + tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3])) + + # GH#19992 + tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, 3])) + tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, 2, 1])) + + # GH#40420 + s = Series([1, 2, 3]) + result = s.clip(0, [np.nan, np.nan, np.nan]) + tm.assert_series_equal(s, result) + + def test_clip_against_series(self): + # GH#6966 + + s = Series([1.0, 1.0, 4.0]) + + lower = Series([1.0, 2.0, 3.0]) + upper = Series([1.5, 2.5, 3.5]) + + tm.assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5])) + tm.assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5])) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])]) + def test_clip_against_list_like(self, inplace, upper): + # GH#15390 + original = Series([5, 6, 7]) + result = original.clip(upper=upper, inplace=inplace) + expected = Series([1, 2, 3]) + + if inplace: + result = original + tm.assert_series_equal(result, expected, check_exact=True) + + def test_clip_with_datetimes(self): + # GH#11838 + # naive and tz-aware datetimes + + t = Timestamp("2015-12-01 09:30:30") + s = Series([Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:31:00")]) + result = s.clip(upper=t) + expected = Series( + [Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:30:30")] + ) + tm.assert_series_equal(result, expected) + + t = Timestamp("2015-12-01 09:30:30", tz="US/Eastern") + s = Series( + [ + Timestamp("2015-12-01 09:30:00", tz="US/Eastern"), + Timestamp("2015-12-01 09:31:00", tz="US/Eastern"), + ] + ) + result = s.clip(upper=t) + expected = Series( + [ + Timestamp("2015-12-01 09:30:00", tz="US/Eastern"), + Timestamp("2015-12-01 09:30:30", tz="US/Eastern"), + ] + ) + tm.assert_series_equal(result, expected) + + def test_clip_with_timestamps_and_oob_datetimes(self): + # GH-42794 + ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)]) + + result = ser.clip(lower=Timestamp.min, upper=Timestamp.max) + expected = Series([Timestamp.min, Timestamp.max], dtype="object") + + tm.assert_series_equal(result, expected) + + def test_clip_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + ser = Series([1, 2, 3]) + msg = ( + r"In a future version of pandas all arguments of Series.clip except " + r"for the arguments 'lower' and 'upper' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.clip(0, 1, 0) + expected = Series([1, 1, 1]) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_combine.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_combine.py new file mode 100644 index 0000000..75d47e3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_combine.py @@ -0,0 +1,17 @@ +from pandas import Series +import pandas._testing as tm + + +class TestCombine: + def test_combine_scalar(self): + # GH#21248 + # Note - combine() with another Series is tested elsewhere because + # it is used when testing operators + ser = Series([i * 10 for i in range(5)]) + result = ser.combine(3, lambda x, y: x + y) + expected = Series([i * 10 + 3 for i in range(5)]) + tm.assert_series_equal(result, expected) + + result = ser.combine(22, lambda x, y: min(x, y)) + expected = Series([min(i * 10, 22) for i in range(5)]) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_combine_first.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_combine_first.py new file mode 100644 index 0000000..b838797 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_combine_first.py @@ -0,0 +1,104 @@ +from datetime import datetime + +import numpy as np + +import pandas as pd +from pandas import ( + Period, + Series, + date_range, + period_range, + to_datetime, +) +import pandas._testing as tm + + +class TestCombineFirst: + def test_combine_first_period_datetime(self): + # GH#3367 + didx = date_range(start="1950-01-31", end="1950-07-31", freq="M") + pidx = period_range(start=Period("1950-1"), end=Period("1950-7"), freq="M") + # check to be consistent with DatetimeIndex + for idx in [didx, pidx]: + a = Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx) + b = Series([9, 9, 9, 9, 9, 9, 9], index=idx) + + result = a.combine_first(b) + expected = Series([1, 9, 9, 4, 5, 9, 7], index=idx, dtype=np.float64) + tm.assert_series_equal(result, expected) + + def test_combine_first_name(self, datetime_series): + result = datetime_series.combine_first(datetime_series[:5]) + assert result.name == datetime_series.name + + def test_combine_first(self): + values = tm.makeIntIndex(20).values.astype(float) + series = Series(values, index=tm.makeIntIndex(20)) + + series_copy = series * 2 + series_copy[::2] = np.NaN + + # nothing used from the input + combined = series.combine_first(series_copy) + + tm.assert_series_equal(combined, series) + + # Holes filled from input + combined = series_copy.combine_first(series) + assert np.isfinite(combined).all() + + tm.assert_series_equal(combined[::2], series[::2]) + tm.assert_series_equal(combined[1::2], series_copy[1::2]) + + # mixed types + index = tm.makeStringIndex(20) + floats = Series(np.random.randn(20), index=index) + strings = Series(tm.makeStringIndex(10), index=index[::2]) + + combined = strings.combine_first(floats) + + tm.assert_series_equal(strings, combined.loc[index[::2]]) + tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]]) + + # corner case + ser = Series([1.0, 2, 3], index=[0, 1, 2]) + empty = Series([], index=[], dtype=object) + result = ser.combine_first(empty) + ser.index = ser.index.astype("O") + tm.assert_series_equal(ser, result) + + def test_combine_first_dt64(self): + + s0 = to_datetime(Series(["2010", np.NaN])) + s1 = to_datetime(Series([np.NaN, "2011"])) + rs = s0.combine_first(s1) + xp = to_datetime(Series(["2010", "2011"])) + tm.assert_series_equal(rs, xp) + + s0 = to_datetime(Series(["2010", np.NaN])) + s1 = Series([np.NaN, "2011"]) + rs = s0.combine_first(s1) + + msg = "containing strings is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + xp = Series([datetime(2010, 1, 1), "2011"]) + + tm.assert_series_equal(rs, xp) + + def test_combine_first_dt_tz_values(self, tz_naive_fixture): + ser1 = Series( + pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture), + name="ser1", + ) + ser2 = Series( + pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture), + index=[2, 3, 4], + name="ser2", + ) + result = ser1.combine_first(ser2) + exp_vals = pd.DatetimeIndex( + ["20150101", "20150102", "20150103", "20160515", "20160516"], + tz=tz_naive_fixture, + ) + exp = Series(exp_vals, name="ser1") + tm.assert_series_equal(exp, result) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_compare.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_compare.py new file mode 100644 index 0000000..8570800 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_compare.py @@ -0,0 +1,116 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"]) +def test_compare_axis(align_axis): + # GH#30429 + s1 = pd.Series(["a", "b", "c"]) + s2 = pd.Series(["x", "b", "z"]) + + result = s1.compare(s2, align_axis=align_axis) + + if align_axis in (1, "columns"): + indices = pd.Index([0, 2]) + columns = pd.Index(["self", "other"]) + expected = pd.DataFrame( + [["a", "x"], ["c", "z"]], index=indices, columns=columns + ) + tm.assert_frame_equal(result, expected) + else: + indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]]) + expected = pd.Series(["a", "x", "c", "z"], index=indices) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "keep_shape, keep_equal", + [ + (True, False), + (False, True), + (True, True), + # False, False case is already covered in test_compare_axis + ], +) +def test_compare_various_formats(keep_shape, keep_equal): + s1 = pd.Series(["a", "b", "c"]) + s2 = pd.Series(["x", "b", "z"]) + + result = s1.compare(s2, keep_shape=keep_shape, keep_equal=keep_equal) + + if keep_shape: + indices = pd.Index([0, 1, 2]) + columns = pd.Index(["self", "other"]) + if keep_equal: + expected = pd.DataFrame( + [["a", "x"], ["b", "b"], ["c", "z"]], index=indices, columns=columns + ) + else: + expected = pd.DataFrame( + [["a", "x"], [np.nan, np.nan], ["c", "z"]], + index=indices, + columns=columns, + ) + else: + indices = pd.Index([0, 2]) + columns = pd.Index(["self", "other"]) + expected = pd.DataFrame( + [["a", "x"], ["c", "z"]], index=indices, columns=columns + ) + tm.assert_frame_equal(result, expected) + + +def test_compare_with_equal_nulls(): + # We want to make sure two NaNs are considered the same + # and dropped where applicable + s1 = pd.Series(["a", "b", np.nan]) + s2 = pd.Series(["x", "b", np.nan]) + + result = s1.compare(s2) + expected = pd.DataFrame([["a", "x"]], columns=["self", "other"]) + tm.assert_frame_equal(result, expected) + + +def test_compare_with_non_equal_nulls(): + # We want to make sure the relevant NaNs do not get dropped + s1 = pd.Series(["a", "b", "c"]) + s2 = pd.Series(["x", "b", np.nan]) + + result = s1.compare(s2, align_axis=0) + + indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]]) + expected = pd.Series(["a", "x", "c", np.nan], index=indices) + tm.assert_series_equal(result, expected) + + +def test_compare_multi_index(): + index = pd.MultiIndex.from_arrays([[0, 0, 1], [0, 1, 2]]) + s1 = pd.Series(["a", "b", "c"], index=index) + s2 = pd.Series(["x", "b", "z"], index=index) + + result = s1.compare(s2, align_axis=0) + + indices = pd.MultiIndex.from_arrays( + [[0, 0, 1, 1], [0, 0, 2, 2], ["self", "other", "self", "other"]] + ) + expected = pd.Series(["a", "x", "c", "z"], index=indices) + tm.assert_series_equal(result, expected) + + +def test_compare_unaligned_objects(): + # test Series with different indices + msg = "Can only compare identically-labeled Series objects" + with pytest.raises(ValueError, match=msg): + ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"]) + ser2 = pd.Series([1, 2, 3], index=["a", "b", "d"]) + ser1.compare(ser2) + + # test Series with different lengths + msg = "Can only compare identically-labeled Series objects" + with pytest.raises(ValueError, match=msg): + ser1 = pd.Series([1, 2, 3]) + ser2 = pd.Series([1, 2, 3, 4]) + ser1.compare(ser2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_convert.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_convert.py new file mode 100644 index 0000000..4832780 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_convert.py @@ -0,0 +1,139 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + Series, + Timestamp, +) +import pandas._testing as tm + + +class TestConvert: + def test_convert(self): + # GH#10265 + dt = datetime(2001, 1, 1, 0, 0) + td = dt - datetime(2000, 1, 1, 0, 0) + + # Test coercion with mixed types + ser = Series(["a", "3.1415", dt, td]) + + results = ser._convert(numeric=True) + expected = Series([np.nan, 3.1415, np.nan, np.nan]) + tm.assert_series_equal(results, expected) + + # Test standard conversion returns original + results = ser._convert(datetime=True) + tm.assert_series_equal(results, ser) + results = ser._convert(numeric=True) + expected = Series([np.nan, 3.1415, np.nan, np.nan]) + tm.assert_series_equal(results, expected) + results = ser._convert(timedelta=True) + tm.assert_series_equal(results, ser) + + def test_convert_numeric_strings_with_other_true_args(self): + # test pass-through and non-conversion when other types selected + ser = Series(["1.0", "2.0", "3.0"]) + results = ser._convert(datetime=True, numeric=True, timedelta=True) + expected = Series([1.0, 2.0, 3.0]) + tm.assert_series_equal(results, expected) + results = ser._convert(True, False, True) + tm.assert_series_equal(results, ser) + + def test_convert_datetime_objects(self): + ser = Series( + [datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)], dtype="O" + ) + results = ser._convert(datetime=True, numeric=True, timedelta=True) + expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)]) + tm.assert_series_equal(results, expected) + results = ser._convert(datetime=False, numeric=True, timedelta=True) + tm.assert_series_equal(results, ser) + + def test_convert_datetime64(self): + # no-op if already dt64 dtype + ser = Series( + [ + datetime(2001, 1, 1, 0, 0), + datetime(2001, 1, 2, 0, 0), + datetime(2001, 1, 3, 0, 0), + ] + ) + + result = ser._convert(datetime=True) + expected = Series( + [Timestamp("20010101"), Timestamp("20010102"), Timestamp("20010103")], + dtype="M8[ns]", + ) + tm.assert_series_equal(result, expected) + + result = ser._convert(datetime=True) + tm.assert_series_equal(result, expected) + + def test_convert_timedeltas(self): + td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0) + ser = Series([td, td], dtype="O") + results = ser._convert(datetime=True, numeric=True, timedelta=True) + expected = Series([td, td]) + tm.assert_series_equal(results, expected) + results = ser._convert(True, True, False) + tm.assert_series_equal(results, ser) + + def test_convert_numeric_strings(self): + ser = Series([1.0, 2, 3], index=["a", "b", "c"]) + result = ser._convert(numeric=True) + tm.assert_series_equal(result, ser) + + # force numeric conversion + res = ser.copy().astype("O") + res["a"] = "1" + result = res._convert(numeric=True) + tm.assert_series_equal(result, ser) + + res = ser.copy().astype("O") + res["a"] = "1." + result = res._convert(numeric=True) + tm.assert_series_equal(result, ser) + + res = ser.copy().astype("O") + res["a"] = "garbled" + result = res._convert(numeric=True) + expected = ser.copy() + expected["a"] = np.nan + tm.assert_series_equal(result, expected) + + def test_convert_mixed_type_noop(self): + # GH 4119, not converting a mixed type (e.g.floats and object) + ser = Series([1, "na", 3, 4]) + result = ser._convert(datetime=True, numeric=True) + expected = Series([1, np.nan, 3, 4]) + tm.assert_series_equal(result, expected) + + ser = Series([1, "", 3, 4]) + result = ser._convert(datetime=True, numeric=True) + tm.assert_series_equal(result, expected) + + def test_convert_preserve_non_object(self): + # preserve if non-object + ser = Series([1], dtype="float32") + result = ser._convert(datetime=True) + tm.assert_series_equal(result, ser) + + def test_convert_no_arg_error(self): + ser = Series(["1.0", "2"]) + msg = r"At least one of datetime, numeric or timedelta must be True\." + with pytest.raises(ValueError, match=msg): + ser._convert() + + def test_convert_preserve_bool(self): + ser = Series([1, True, 3, 5], dtype=object) + res = ser._convert(datetime=True, numeric=True) + expected = Series([1, 1, 3, 5], dtype="i8") + tm.assert_series_equal(res, expected) + + def test_convert_preserve_all_bool(self): + ser = Series([False, True, False, False], dtype=object) + res = ser._convert(datetime=True, numeric=True) + expected = Series([False, True, False, False], dtype=bool) + tm.assert_series_equal(res, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_convert_dtypes.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_convert_dtypes.py new file mode 100644 index 0000000..1e88ddf --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_convert_dtypes.py @@ -0,0 +1,237 @@ +from itertools import product + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_interval_dtype + +import pandas as pd +import pandas._testing as tm + +# Each test case consists of a tuple with the data and dtype to create the +# test Series, the default dtype for the expected result (which is valid +# for most cases), and the specific cases where the result deviates from +# this default. Those overrides are defined as a dict with (keyword, val) as +# dictionary key. In case of multiple items, the last override takes precedence. + +test_cases = [ + ( + # data + [1, 2, 3], + # original dtype + np.dtype("int32"), + # default expected dtype + "Int32", + # exceptions on expected dtype + {("convert_integer", False): np.dtype("int32")}, + ), + ( + [1, 2, 3], + np.dtype("int64"), + "Int64", + {("convert_integer", False): np.dtype("int64")}, + ), + ( + ["x", "y", "z"], + np.dtype("O"), + pd.StringDtype(), + {("convert_string", False): np.dtype("O")}, + ), + ( + [True, False, np.nan], + np.dtype("O"), + pd.BooleanDtype(), + {("convert_boolean", False): np.dtype("O")}, + ), + ( + ["h", "i", np.nan], + np.dtype("O"), + pd.StringDtype(), + {("convert_string", False): np.dtype("O")}, + ), + ( # GH32117 + ["h", "i", 1], + np.dtype("O"), + np.dtype("O"), + {}, + ), + ( + [10, np.nan, 20], + np.dtype("float"), + "Int64", + { + ("convert_integer", False, "convert_floating", True): "Float64", + ("convert_integer", False, "convert_floating", False): np.dtype("float"), + }, + ), + ( + [np.nan, 100.5, 200], + np.dtype("float"), + "Float64", + {("convert_floating", False): np.dtype("float")}, + ), + ( + [3, 4, 5], + "Int8", + "Int8", + {}, + ), + ( + [[1, 2], [3, 4], [5]], + None, + np.dtype("O"), + {}, + ), + ( + [4, 5, 6], + np.dtype("uint32"), + "UInt32", + {("convert_integer", False): np.dtype("uint32")}, + ), + ( + [-10, 12, 13], + np.dtype("i1"), + "Int8", + {("convert_integer", False): np.dtype("i1")}, + ), + ( + [1.2, 1.3], + np.dtype("float32"), + "Float32", + {("convert_floating", False): np.dtype("float32")}, + ), + ( + [1, 2.0], + object, + "Int64", + { + ("convert_integer", False): "Float64", + ("convert_integer", False, "convert_floating", False): np.dtype("float"), + ("infer_objects", False): np.dtype("object"), + }, + ), + ( + [1, 2.5], + object, + "Float64", + { + ("convert_floating", False): np.dtype("float"), + ("infer_objects", False): np.dtype("object"), + }, + ), + (["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}), + ( + pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), + pd.DatetimeTZDtype(tz="UTC"), + pd.DatetimeTZDtype(tz="UTC"), + {}, + ), + ( + pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), + "datetime64[ns]", + np.dtype("datetime64[ns]"), + {}, + ), + ( + pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), + object, + np.dtype("datetime64[ns]"), + {("infer_objects", False): np.dtype("object")}, + ), + (pd.period_range("1/1/2011", freq="M", periods=3), None, pd.PeriodDtype("M"), {}), + ( + pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]), + None, + pd.IntervalDtype("int64", "right"), + {}, + ), +] + + +class TestSeriesConvertDtypes: + @pytest.mark.parametrize( + "data, maindtype, expected_default, expected_other", + test_cases, + ) + @pytest.mark.parametrize("params", product(*[(True, False)] * 5)) + def test_convert_dtypes( + self, data, maindtype, params, expected_default, expected_other + ): + warn = None + if ( + hasattr(data, "dtype") + and data.dtype == "M8[ns]" + and isinstance(maindtype, pd.DatetimeTZDtype) + ): + # this astype is deprecated in favor of tz_localize + warn = FutureWarning + + if maindtype is not None: + with tm.assert_produces_warning(warn): + series = pd.Series(data, dtype=maindtype) + else: + series = pd.Series(data) + + result = series.convert_dtypes(*params) + + param_names = [ + "infer_objects", + "convert_string", + "convert_integer", + "convert_boolean", + "convert_floating", + ] + params_dict = dict(zip(param_names, params)) + + expected_dtype = expected_default + for spec, dtype in expected_other.items(): + if all(params_dict[key] is val for key, val in zip(spec[::2], spec[1::2])): + expected_dtype = dtype + + warn2 = None + if ( + hasattr(data, "dtype") + and data.dtype == "M8[ns]" + and isinstance(expected_dtype, pd.DatetimeTZDtype) + ): + # this astype is deprecated in favor of tz_localize + warn2 = FutureWarning + + with tm.assert_produces_warning(warn2): + expected = pd.Series(data, dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + # Test that it is a copy + copy = series.copy(deep=True) + if is_interval_dtype(result.dtype) and result.dtype.subtype.kind in ["i", "u"]: + msg = "Cannot set float NaN to integer-backed IntervalArray" + with pytest.raises(ValueError, match=msg): + result[result.notna()] = np.nan + else: + result[result.notna()] = np.nan + + # Make sure original not changed + tm.assert_series_equal(series, copy) + + def test_convert_string_dtype(self, nullable_string_dtype): + # https://github.com/pandas-dev/pandas/issues/31731 -> converting columns + # that are already string dtype + df = pd.DataFrame( + {"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype=nullable_string_dtype + ) + result = df.convert_dtypes() + tm.assert_frame_equal(df, result) + + def test_convert_bool_dtype(self): + # GH32287 + df = pd.DataFrame({"A": pd.array([True])}) + tm.assert_frame_equal(df, df.convert_dtypes()) + + def test_convert_byte_string_dtype(self): + # GH-43183 + byte_str = b"binary-string" + + df = pd.DataFrame(data={"A": byte_str}, index=[0]) + result = df.convert_dtypes() + expected = df + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_copy.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_copy.py new file mode 100644 index 0000000..8aa5c14 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_copy.py @@ -0,0 +1,74 @@ +import numpy as np +import pytest + +from pandas import ( + Series, + Timestamp, +) +import pandas._testing as tm + + +class TestCopy: + @pytest.mark.parametrize("deep", [None, False, True]) + def test_copy(self, deep): + + ser = Series(np.arange(10), dtype="float64") + + # default deep is True + if deep is None: + ser2 = ser.copy() + else: + ser2 = ser.copy(deep=deep) + + ser2[::2] = np.NaN + + if deep is None or deep is True: + # Did not modify original Series + assert np.isnan(ser2[0]) + assert not np.isnan(ser[0]) + else: + # we DID modify the original Series + assert np.isnan(ser2[0]) + assert np.isnan(ser[0]) + + @pytest.mark.parametrize("deep", [None, False, True]) + def test_copy_tzaware(self, deep): + # GH#11794 + # copy of tz-aware + expected = Series([Timestamp("2012/01/01", tz="UTC")]) + expected2 = Series([Timestamp("1999/01/01", tz="UTC")]) + + ser = Series([Timestamp("2012/01/01", tz="UTC")]) + + if deep is None: + ser2 = ser.copy() + else: + ser2 = ser.copy(deep=deep) + + ser2[0] = Timestamp("1999/01/01", tz="UTC") + + # default deep is True + if deep is None or deep is True: + # Did not modify original Series + tm.assert_series_equal(ser2, expected2) + tm.assert_series_equal(ser, expected) + else: + # we DID modify the original Series + tm.assert_series_equal(ser2, expected2) + tm.assert_series_equal(ser, expected2) + + def test_copy_name(self, datetime_series): + result = datetime_series.copy() + assert result.name == datetime_series.name + + def test_copy_index_name_checking(self, datetime_series): + # don't want to be able to modify the index stored elsewhere after + # making a copy + + datetime_series.index.name = None + assert datetime_series.index.name is None + assert datetime_series is datetime_series + + cp = datetime_series.copy() + cp.index.name = "foo" + assert datetime_series.index.name is None diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_count.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_count.py new file mode 100644 index 0000000..29fb6aa --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_count.py @@ -0,0 +1,98 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestSeriesCount: + def test_count_level_series(self): + index = MultiIndex( + levels=[["foo", "bar", "baz"], ["one", "two", "three", "four"]], + codes=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]], + ) + + ser = Series(np.random.randn(len(index)), index=index) + + with tm.assert_produces_warning(FutureWarning): + result = ser.count(level=0) + expected = ser.groupby(level=0).count() + tm.assert_series_equal( + result.astype("f8"), expected.reindex(result.index).fillna(0) + ) + + with tm.assert_produces_warning(FutureWarning): + result = ser.count(level=1) + expected = ser.groupby(level=1).count() + tm.assert_series_equal( + result.astype("f8"), expected.reindex(result.index).fillna(0) + ) + + def test_count_multiindex(self, series_with_multilevel_index): + ser = series_with_multilevel_index + + series = ser.copy() + series.index.names = ["a", "b"] + + with tm.assert_produces_warning(FutureWarning): + result = series.count(level="b") + with tm.assert_produces_warning(FutureWarning): + expect = ser.count(level=1).rename_axis("b") + tm.assert_series_equal(result, expect) + + with tm.assert_produces_warning(FutureWarning): + result = series.count(level="a") + with tm.assert_produces_warning(FutureWarning): + expect = ser.count(level=0).rename_axis("a") + tm.assert_series_equal(result, expect) + + msg = "Level x not found" + with pytest.raises(KeyError, match=msg): + with tm.assert_produces_warning(FutureWarning): + series.count("x") + + def test_count_level_without_multiindex(self): + ser = Series(range(3)) + + msg = "Series.count level is only valid with a MultiIndex" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + ser.count(level=1) + + def test_count(self, datetime_series): + assert datetime_series.count() == len(datetime_series) + + datetime_series[::2] = np.NaN + + assert datetime_series.count() == np.isfinite(datetime_series).sum() + + mi = MultiIndex.from_arrays([list("aabbcc"), [1, 2, 2, np.nan, 1, 2]]) + ts = Series(np.arange(len(mi)), index=mi) + + with tm.assert_produces_warning(FutureWarning): + left = ts.count(level=1) + right = Series([2, 3, 1], index=[1, 2, np.nan]) + tm.assert_series_equal(left, right) + + ts.iloc[[0, 3, 5]] = np.nan + with tm.assert_produces_warning(FutureWarning): + tm.assert_series_equal(ts.count(level=1), right - 1) + + # GH#29478 + with pd.option_context("use_inf_as_na", True): + assert Series([pd.Timestamp("1990/1/1")]).count() == 1 + + def test_count_categorical(self): + + ser = Series( + Categorical( + [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True + ) + ) + result = ser.count() + assert result == 2 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_cov_corr.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_cov_corr.py new file mode 100644 index 0000000..58a332a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_cov_corr.py @@ -0,0 +1,176 @@ +import math + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Series, + isna, +) +import pandas._testing as tm + + +class TestSeriesCov: + def test_cov(self, datetime_series): + # full overlap + tm.assert_almost_equal( + datetime_series.cov(datetime_series), datetime_series.std() ** 2 + ) + + # partial overlap + tm.assert_almost_equal( + datetime_series[:15].cov(datetime_series[5:]), + datetime_series[5:15].std() ** 2, + ) + + # No overlap + assert np.isnan(datetime_series[::2].cov(datetime_series[1::2])) + + # all NA + cp = datetime_series[:10].copy() + cp[:] = np.nan + assert isna(cp.cov(cp)) + + # min_periods + assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12)) + + ts1 = datetime_series[:15].reindex(datetime_series.index) + ts2 = datetime_series[5:].reindex(datetime_series.index) + assert isna(ts1.cov(ts2, min_periods=12)) + + @pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3]) + def test_cov_ddof(self, test_ddof): + # GH#34611 + np_array1 = np.random.rand(10) + np_array2 = np.random.rand(10) + + s1 = Series(np_array1) + s2 = Series(np_array2) + + result = s1.cov(s2, ddof=test_ddof) + expected = np.cov(np_array1, np_array2, ddof=test_ddof)[0][1] + assert math.isclose(expected, result) + + +class TestSeriesCorr: + @td.skip_if_no_scipy + def test_corr(self, datetime_series): + import scipy.stats as stats + + # full overlap + tm.assert_almost_equal(datetime_series.corr(datetime_series), 1) + + # partial overlap + tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), 1) + + assert isna(datetime_series[:15].corr(datetime_series[5:], min_periods=12)) + + ts1 = datetime_series[:15].reindex(datetime_series.index) + ts2 = datetime_series[5:].reindex(datetime_series.index) + assert isna(ts1.corr(ts2, min_periods=12)) + + # No overlap + assert np.isnan(datetime_series[::2].corr(datetime_series[1::2])) + + # all NA + cp = datetime_series[:10].copy() + cp[:] = np.nan + assert isna(cp.corr(cp)) + + A = tm.makeTimeSeries() + B = tm.makeTimeSeries() + result = A.corr(B) + expected, _ = stats.pearsonr(A, B) + tm.assert_almost_equal(result, expected) + + @td.skip_if_no_scipy + def test_corr_rank(self): + import scipy.stats as stats + + # kendall and spearman + A = tm.makeTimeSeries() + B = tm.makeTimeSeries() + A[-5:] = A[:5] + result = A.corr(B, method="kendall") + expected = stats.kendalltau(A, B)[0] + tm.assert_almost_equal(result, expected) + + result = A.corr(B, method="spearman") + expected = stats.spearmanr(A, B)[0] + tm.assert_almost_equal(result, expected) + + # results from R + A = Series( + [ + -0.89926396, + 0.94209606, + -1.03289164, + -0.95445587, + 0.76910310, + -0.06430576, + -2.09704447, + 0.40660407, + -0.89926396, + 0.94209606, + ] + ) + B = Series( + [ + -1.01270225, + -0.62210117, + -1.56895827, + 0.59592943, + -0.01680292, + 1.17258718, + -1.06009347, + -0.10222060, + -0.89076239, + 0.89372375, + ] + ) + kexp = 0.4319297 + sexp = 0.5853767 + tm.assert_almost_equal(A.corr(B, method="kendall"), kexp) + tm.assert_almost_equal(A.corr(B, method="spearman"), sexp) + + def test_corr_invalid_method(self): + # GH PR #22298 + s1 = Series(np.random.randn(10)) + s2 = Series(np.random.randn(10)) + msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " + with pytest.raises(ValueError, match=msg): + s1.corr(s2, method="____") + + def test_corr_callable_method(self, datetime_series): + # simple correlation example + # returns 1 if exact equality, 0 otherwise + my_corr = lambda a, b: 1.0 if (a == b).all() else 0.0 + + # simple example + s1 = Series([1, 2, 3, 4, 5]) + s2 = Series([5, 4, 3, 2, 1]) + expected = 0 + tm.assert_almost_equal(s1.corr(s2, method=my_corr), expected) + + # full overlap + tm.assert_almost_equal( + datetime_series.corr(datetime_series, method=my_corr), 1.0 + ) + + # partial overlap + tm.assert_almost_equal( + datetime_series[:15].corr(datetime_series[5:], method=my_corr), 1.0 + ) + + # No overlap + assert np.isnan( + datetime_series[::2].corr(datetime_series[1::2], method=my_corr) + ) + + # dataframe example + df = pd.DataFrame([s1, s2]) + expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}]) + tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_describe.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_describe.py new file mode 100644 index 0000000..e6c6016 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_describe.py @@ -0,0 +1,151 @@ +import numpy as np + +from pandas import ( + Period, + Series, + Timedelta, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestSeriesDescribe: + def test_describe_ints(self): + ser = Series([0, 1, 2, 3, 4], name="int_data") + result = ser.describe() + expected = Series( + [5, 2, ser.std(), 0, 1, 2, 3, 4], + name="int_data", + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_series_equal(result, expected) + + def test_describe_bools(self): + ser = Series([True, True, False, False, False], name="bool_data") + result = ser.describe() + expected = Series( + [5, 2, False, 3], name="bool_data", index=["count", "unique", "top", "freq"] + ) + tm.assert_series_equal(result, expected) + + def test_describe_strs(self): + + ser = Series(["a", "a", "b", "c", "d"], name="str_data") + result = ser.describe() + expected = Series( + [5, 4, "a", 2], name="str_data", index=["count", "unique", "top", "freq"] + ) + tm.assert_series_equal(result, expected) + + def test_describe_timedelta64(self): + ser = Series( + [ + Timedelta("1 days"), + Timedelta("2 days"), + Timedelta("3 days"), + Timedelta("4 days"), + Timedelta("5 days"), + ], + name="timedelta_data", + ) + result = ser.describe() + expected = Series( + [5, ser[2], ser.std(), ser[0], ser[1], ser[2], ser[3], ser[4]], + name="timedelta_data", + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_series_equal(result, expected) + + def test_describe_period(self): + ser = Series( + [Period("2020-01", "M"), Period("2020-01", "M"), Period("2019-12", "M")], + name="period_data", + ) + result = ser.describe() + expected = Series( + [3, 2, ser[0], 2], + name="period_data", + index=["count", "unique", "top", "freq"], + ) + tm.assert_series_equal(result, expected) + + def test_describe_empty_object(self): + # https://github.com/pandas-dev/pandas/issues/27183 + s = Series([None, None], dtype=object) + result = s.describe() + expected = Series( + [0, 0, np.nan, np.nan], + dtype=object, + index=["count", "unique", "top", "freq"], + ) + tm.assert_series_equal(result, expected) + + result = s[:0].describe() + tm.assert_series_equal(result, expected) + # ensure NaN, not None + assert np.isnan(result.iloc[2]) + assert np.isnan(result.iloc[3]) + + def test_describe_with_tz(self, tz_naive_fixture): + # GH 21332 + tz = tz_naive_fixture + name = str(tz_naive_fixture) + start = Timestamp(2018, 1, 1) + end = Timestamp(2018, 1, 5) + s = Series(date_range(start, end, tz=tz), name=name) + result = s.describe(datetime_is_numeric=True) + expected = Series( + [ + 5, + Timestamp(2018, 1, 3).tz_localize(tz), + start.tz_localize(tz), + s[1], + s[2], + s[3], + end.tz_localize(tz), + ], + name=name, + index=["count", "mean", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_series_equal(result, expected) + + def test_describe_with_tz_warns(self): + name = tz = "CET" + start = Timestamp(2018, 1, 1) + end = Timestamp(2018, 1, 5) + s = Series(date_range(start, end, tz=tz), name=name) + + with tm.assert_produces_warning(FutureWarning): + result = s.describe() + + expected = Series( + [ + 5, + 5, + s.value_counts().index[0], + 1, + start.tz_localize(tz), + end.tz_localize(tz), + ], + name=name, + index=["count", "unique", "top", "freq", "first", "last"], + ) + tm.assert_series_equal(result, expected) + + def test_datetime_is_numeric_includes_datetime(self): + s = Series(date_range("2012", periods=3)) + result = s.describe(datetime_is_numeric=True) + expected = Series( + [ + 3, + Timestamp("2012-01-02"), + Timestamp("2012-01-01"), + Timestamp("2012-01-01T12:00:00"), + Timestamp("2012-01-02"), + Timestamp("2012-01-02T12:00:00"), + Timestamp("2012-01-03"), + ], + index=["count", "mean", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_diff.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_diff.py new file mode 100644 index 0000000..938a0f9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_diff.py @@ -0,0 +1,84 @@ +import numpy as np +import pytest + +from pandas import ( + Series, + TimedeltaIndex, + date_range, +) +import pandas._testing as tm + + +class TestSeriesDiff: + def test_diff_np(self): + # TODO(__array_function__): could make np.diff return a Series + # matching ser.diff() + + ser = Series(np.arange(5)) + + res = np.diff(ser) + expected = np.array([1, 1, 1, 1]) + tm.assert_numpy_array_equal(res, expected) + + def test_diff_int(self): + # int dtype + a = 10000000000000000 + b = a + 1 + ser = Series([a, b]) + + result = ser.diff() + assert result[1] == 1 + + def test_diff_tz(self): + # Combined datetime diff, normal diff and boolean diff test + ts = tm.makeTimeSeries(name="ts") + ts.diff() + + # neg n + result = ts.diff(-1) + expected = ts - ts.shift(-1) + tm.assert_series_equal(result, expected) + + # 0 + result = ts.diff(0) + expected = ts - ts + tm.assert_series_equal(result, expected) + + def test_diff_dt64(self): + # datetime diff (GH#3100) + ser = Series(date_range("20130102", periods=5)) + result = ser.diff() + expected = ser - ser.shift(1) + tm.assert_series_equal(result, expected) + + # timedelta diff + result = result - result.shift(1) # previous result + expected = expected.diff() # previously expected + tm.assert_series_equal(result, expected) + + def test_diff_dt64tz(self): + # with tz + ser = Series( + date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo" + ) + result = ser.diff() + expected = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "input,output,diff", + [([False, True, True, False, False], [np.nan, True, False, True, False], 1)], + ) + def test_diff_bool(self, input, output, diff): + # boolean series (test for fixing #17294) + ser = Series(input) + result = ser.diff() + expected = Series(output) + tm.assert_series_equal(result, expected) + + def test_diff_object_dtype(self): + # object series + ser = Series([False, True, 5.0, np.nan, True, False]) + result = ser.diff() + expected = ser - ser.shift(1) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_drop.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_drop.py new file mode 100644 index 0000000..a566f8f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_drop.py @@ -0,0 +1,99 @@ +import pytest + +from pandas import Series +import pandas._testing as tm + + +@pytest.mark.parametrize( + "data, index, drop_labels, axis, expected_data, expected_index", + [ + # Unique Index + ([1, 2], ["one", "two"], ["two"], 0, [1], ["one"]), + ([1, 2], ["one", "two"], ["two"], "rows", [1], ["one"]), + ([1, 1, 2], ["one", "two", "one"], ["two"], 0, [1, 2], ["one", "one"]), + # GH 5248 Non-Unique Index + ([1, 1, 2], ["one", "two", "one"], "two", 0, [1, 2], ["one", "one"]), + ([1, 1, 2], ["one", "two", "one"], ["one"], 0, [1], ["two"]), + ([1, 1, 2], ["one", "two", "one"], "one", 0, [1], ["two"]), + ], +) +def test_drop_unique_and_non_unique_index( + data, index, axis, drop_labels, expected_data, expected_index +): + + s = Series(data=data, index=index) + result = s.drop(drop_labels, axis=axis) + expected = Series(data=expected_data, index=expected_index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "data, index, drop_labels, axis, error_type, error_desc", + [ + # single string/tuple-like + (range(3), list("abc"), "bc", 0, KeyError, "not found in axis"), + # bad axis + (range(3), list("abc"), ("a",), 0, KeyError, "not found in axis"), + (range(3), list("abc"), "one", "columns", ValueError, "No axis named columns"), + ], +) +def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error_desc): + ser = Series(data, index=index) + with pytest.raises(error_type, match=error_desc): + ser.drop(drop_labels, axis=axis) + + +def test_drop_with_ignore_errors(): + # errors='ignore' + s = Series(range(3), index=list("abc")) + result = s.drop("bc", errors="ignore") + tm.assert_series_equal(result, s) + result = s.drop(["a", "d"], errors="ignore") + expected = s.iloc[1:] + tm.assert_series_equal(result, expected) + + # GH 8522 + s = Series([2, 3], index=[True, False]) + assert s.index.is_object() + result = s.drop(True) + expected = Series([3], index=[False]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 3]]) +@pytest.mark.parametrize("drop_labels", [[], [1], [3]]) +def test_drop_empty_list(index, drop_labels): + # GH 21494 + expected_index = [i for i in index if i not in drop_labels] + series = Series(index=index, dtype=object).drop(drop_labels) + expected = Series(index=expected_index, dtype=object) + tm.assert_series_equal(series, expected) + + +@pytest.mark.parametrize( + "data, index, drop_labels", + [ + (None, [1, 2, 3], [1, 4]), + (None, [1, 2, 2], [1, 4]), + ([2, 3], [0, 1], [False, True]), + ], +) +def test_drop_non_empty_list(data, index, drop_labels): + # GH 21494 and GH 16877 + dtype = object if data is None else None + ser = Series(data=data, index=index, dtype=dtype) + with pytest.raises(KeyError, match="not found in axis"): + ser.drop(drop_labels) + + +def test_drop_pos_args_deprecation(): + # https://github.com/pandas-dev/pandas/issues/41485 + ser = Series([1, 2, 3]) + msg = ( + r"In a future version of pandas all arguments of Series\.drop " + r"except for the argument 'labels' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.drop(1, 0) + expected = Series([1, 3], index=[0, 2]) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_drop_duplicates.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_drop_duplicates.py new file mode 100644 index 0000000..c5cffa0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_drop_duplicates.py @@ -0,0 +1,257 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + Series, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", Series([False, False, False, False, True, True, False])), + ("last", Series([False, True, True, False, False, False, False])), + (False, Series([False, True, True, False, True, True, False])), + ], +) +def test_drop_duplicates(any_numpy_dtype, keep, expected): + tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype)) + + if tc.dtype == "bool": + pytest.skip("tested separately in test_drop_duplicates_bool") + + tm.assert_series_equal(tc.duplicated(keep=keep), expected) + tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected]) + sc = tc.copy() + return_value = sc.drop_duplicates(keep=keep, inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc[~expected]) + + +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", Series([False, False, True, True])), + ("last", Series([True, True, False, False])), + (False, Series([True, True, True, True])), + ], +) +def test_drop_duplicates_bool(keep, expected): + tc = Series([True, False, True, False]) + + tm.assert_series_equal(tc.duplicated(keep=keep), expected) + tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected]) + sc = tc.copy() + return_value = sc.drop_duplicates(keep=keep, inplace=True) + tm.assert_series_equal(sc, tc[~expected]) + assert return_value is None + + +@pytest.mark.parametrize("values", [[], list(range(5))]) +def test_drop_duplicates_no_duplicates(any_numpy_dtype, keep, values): + tc = Series(values, dtype=np.dtype(any_numpy_dtype)) + expected = Series([False] * len(tc), dtype="bool") + + if tc.dtype == "bool": + # 0 -> False and 1-> True + # any other value would be duplicated + tc = tc[:2] + expected = expected[:2] + + tm.assert_series_equal(tc.duplicated(keep=keep), expected) + + result_dropped = tc.drop_duplicates(keep=keep) + tm.assert_series_equal(result_dropped, tc) + + # validate shallow copy + assert result_dropped is not tc + + +class TestSeriesDropDuplicates: + @pytest.fixture( + params=["int_", "uint", "float_", "unicode_", "timedelta64[h]", "datetime64[D]"] + ) + def dtype(self, request): + return request.param + + @pytest.fixture + def cat_series_unused_category(self, dtype, ordered): + # Test case 1 + cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype)) + + input1 = np.array([1, 2, 3, 3], dtype=np.dtype(dtype)) + cat = Categorical(input1, categories=cat_array, ordered=ordered) + tc1 = Series(cat) + return tc1 + + def test_drop_duplicates_categorical_non_bool(self, cat_series_unused_category): + tc1 = cat_series_unused_category + + expected = Series([False, False, False, True]) + + result = tc1.duplicated() + tm.assert_series_equal(result, expected) + + result = tc1.drop_duplicates() + tm.assert_series_equal(result, tc1[~expected]) + + sc = tc1.copy() + return_value = sc.drop_duplicates(inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc1[~expected]) + + def test_drop_duplicates_categorical_non_bool_keeplast( + self, cat_series_unused_category + ): + tc1 = cat_series_unused_category + + expected = Series([False, False, True, False]) + + result = tc1.duplicated(keep="last") + tm.assert_series_equal(result, expected) + + result = tc1.drop_duplicates(keep="last") + tm.assert_series_equal(result, tc1[~expected]) + + sc = tc1.copy() + return_value = sc.drop_duplicates(keep="last", inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc1[~expected]) + + def test_drop_duplicates_categorical_non_bool_keepfalse( + self, cat_series_unused_category + ): + tc1 = cat_series_unused_category + + expected = Series([False, False, True, True]) + + result = tc1.duplicated(keep=False) + tm.assert_series_equal(result, expected) + + result = tc1.drop_duplicates(keep=False) + tm.assert_series_equal(result, tc1[~expected]) + + sc = tc1.copy() + return_value = sc.drop_duplicates(keep=False, inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc1[~expected]) + + @pytest.fixture + def cat_series(self, dtype, ordered): + # no unused categories, unlike cat_series_unused_category + cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype)) + + input2 = np.array([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype)) + cat = Categorical(input2, categories=cat_array, ordered=ordered) + tc2 = Series(cat) + return tc2 + + def test_drop_duplicates_categorical_non_bool2(self, cat_series): + tc2 = cat_series + + expected = Series([False, False, False, False, True, True, False]) + + result = tc2.duplicated() + tm.assert_series_equal(result, expected) + + result = tc2.drop_duplicates() + tm.assert_series_equal(result, tc2[~expected]) + + sc = tc2.copy() + return_value = sc.drop_duplicates(inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc2[~expected]) + + def test_drop_duplicates_categorical_non_bool2_keeplast(self, cat_series): + tc2 = cat_series + + expected = Series([False, True, True, False, False, False, False]) + + result = tc2.duplicated(keep="last") + tm.assert_series_equal(result, expected) + + result = tc2.drop_duplicates(keep="last") + tm.assert_series_equal(result, tc2[~expected]) + + sc = tc2.copy() + return_value = sc.drop_duplicates(keep="last", inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc2[~expected]) + + def test_drop_duplicates_categorical_non_bool2_keepfalse(self, cat_series): + tc2 = cat_series + + expected = Series([False, True, True, False, True, True, False]) + + result = tc2.duplicated(keep=False) + tm.assert_series_equal(result, expected) + + result = tc2.drop_duplicates(keep=False) + tm.assert_series_equal(result, tc2[~expected]) + + sc = tc2.copy() + return_value = sc.drop_duplicates(keep=False, inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc2[~expected]) + + def test_drop_duplicates_categorical_bool(self, ordered): + tc = Series( + Categorical( + [True, False, True, False], categories=[True, False], ordered=ordered + ) + ) + + expected = Series([False, False, True, True]) + tm.assert_series_equal(tc.duplicated(), expected) + tm.assert_series_equal(tc.drop_duplicates(), tc[~expected]) + sc = tc.copy() + return_value = sc.drop_duplicates(inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc[~expected]) + + expected = Series([True, True, False, False]) + tm.assert_series_equal(tc.duplicated(keep="last"), expected) + tm.assert_series_equal(tc.drop_duplicates(keep="last"), tc[~expected]) + sc = tc.copy() + return_value = sc.drop_duplicates(keep="last", inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc[~expected]) + + expected = Series([True, True, True, True]) + tm.assert_series_equal(tc.duplicated(keep=False), expected) + tm.assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected]) + sc = tc.copy() + return_value = sc.drop_duplicates(keep=False, inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc[~expected]) + + def test_drop_duplicates_categorical_bool_na(self, nulls_fixture): + # GH#44351 + ser = Series( + Categorical( + [True, False, True, False, nulls_fixture], + categories=[True, False], + ordered=True, + ) + ) + result = ser.drop_duplicates() + expected = Series( + Categorical([True, False, np.nan], categories=[True, False], ordered=True), + index=[0, 1, 4], + ) + tm.assert_series_equal(result, expected) + + +def test_drop_duplicates_pos_args_deprecation(): + # GH#41485 + s = Series(["a", "b", "c", "b"]) + msg = ( + "In a future version of pandas all arguments of " + "Series.drop_duplicates will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.drop_duplicates("last") + expected = Series(["a", "c", "b"], index=[0, 2, 3]) + tm.assert_series_equal(expected, result) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_dropna.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_dropna.py new file mode 100644 index 0000000..0dab927 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_dropna.py @@ -0,0 +1,115 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + IntervalIndex, + NaT, + Period, + Series, + Timestamp, +) +import pandas._testing as tm + + +class TestDropna: + def test_dropna_empty(self): + ser = Series([], dtype=object) + + assert len(ser.dropna()) == 0 + return_value = ser.dropna(inplace=True) + assert return_value is None + assert len(ser) == 0 + + # invalid axis + msg = "No axis named 1 for object type Series" + with pytest.raises(ValueError, match=msg): + ser.dropna(axis=1) + + def test_dropna_preserve_name(self, datetime_series): + datetime_series[:5] = np.nan + result = datetime_series.dropna() + assert result.name == datetime_series.name + name = datetime_series.name + ts = datetime_series.copy() + return_value = ts.dropna(inplace=True) + assert return_value is None + assert ts.name == name + + def test_dropna_no_nan(self): + for ser in [ + Series([1, 2, 3], name="x"), + Series([False, True, False], name="x"), + ]: + + result = ser.dropna() + tm.assert_series_equal(result, ser) + assert result is not ser + + s2 = ser.copy() + return_value = s2.dropna(inplace=True) + assert return_value is None + tm.assert_series_equal(s2, ser) + + def test_dropna_intervals(self): + ser = Series( + [np.nan, 1, 2, 3], + IntervalIndex.from_arrays([np.nan, 0, 1, 2], [np.nan, 1, 2, 3]), + ) + + result = ser.dropna() + expected = ser.iloc[1:] + tm.assert_series_equal(result, expected) + + def test_dropna_period_dtype(self): + # GH#13737 + ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")]) + result = ser.dropna() + expected = Series([Period("2011-01", freq="M")]) + + tm.assert_series_equal(result, expected) + + def test_datetime64_tz_dropna(self): + # DatetimeLikeBlock + ser = Series( + [ + Timestamp("2011-01-01 10:00"), + NaT, + Timestamp("2011-01-03 10:00"), + NaT, + ] + ) + result = ser.dropna() + expected = Series( + [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-03 10:00")], index=[0, 2] + ) + tm.assert_series_equal(result, expected) + + # DatetimeTZBlock + idx = DatetimeIndex( + ["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz="Asia/Tokyo" + ) + ser = Series(idx) + assert ser.dtype == "datetime64[ns, Asia/Tokyo]" + result = ser.dropna() + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-03 10:00", tz="Asia/Tokyo"), + ], + index=[0, 2], + ) + assert result.dtype == "datetime64[ns, Asia/Tokyo]" + tm.assert_series_equal(result, expected) + + def test_dropna_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + ser = Series([1, 2, 3]) + msg = ( + r"In a future version of pandas all arguments of Series\.dropna " + r"will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.dropna(0) + expected = Series([1, 2, 3]) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_dtypes.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_dtypes.py new file mode 100644 index 0000000..abc0e5d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_dtypes.py @@ -0,0 +1,8 @@ +import numpy as np + + +class TestSeriesDtypes: + def test_dtype(self, datetime_series): + + assert datetime_series.dtype == np.dtype("float64") + assert datetime_series.dtypes == np.dtype("float64") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_duplicated.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_duplicated.py new file mode 100644 index 0000000..1c547ee --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_duplicated.py @@ -0,0 +1,52 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + Series, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", Series([False, False, True, False, True], name="name")), + ("last", Series([True, True, False, False, False], name="name")), + (False, Series([True, True, True, False, True], name="name")), + ], +) +def test_duplicated_keep(keep, expected): + ser = Series(["a", "b", "b", "c", "a"], name="name") + + result = ser.duplicated(keep=keep) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", Series([False, False, True, False, True])), + ("last", Series([True, True, False, False, False])), + (False, Series([True, True, True, False, True])), + ], +) +def test_duplicated_nan_none(keep, expected): + ser = Series([np.nan, 3, 3, None, np.nan], dtype=object) + + result = ser.duplicated(keep=keep) + tm.assert_series_equal(result, expected) + + +def test_duplicated_categorical_bool_na(nulls_fixture): + # GH#44351 + ser = Series( + Categorical( + [True, False, True, False, nulls_fixture], + categories=[True, False], + ordered=True, + ) + ) + result = ser.duplicated() + expected = Series([False, False, True, True, False]) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_equals.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_equals.py new file mode 100644 index 0000000..22e27c2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_equals.py @@ -0,0 +1,142 @@ +from contextlib import nullcontext +import copy + +import numpy as np +import pytest + +from pandas._libs.missing import is_matching_na + +from pandas.core.dtypes.common import is_float + +from pandas import ( + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "arr, idx", + [ + ([1, 2, 3, 4], [0, 2, 1, 3]), + ([1, np.nan, 3, np.nan], [0, 2, 1, 3]), + ( + [1, np.nan, 3, np.nan], + MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c"), (3, "c")]), + ), + ], +) +def test_equals(arr, idx): + s1 = Series(arr, index=idx) + s2 = s1.copy() + assert s1.equals(s2) + + s1[1] = 9 + assert not s1.equals(s2) + + +@pytest.mark.parametrize( + "val", [1, 1.1, 1 + 1j, True, "abc", [1, 2], (1, 2), {1, 2}, {"a": 1}, None] +) +def test_equals_list_array(val): + # GH20676 Verify equals operator for list of Numpy arrays + arr = np.array([1, 2]) + s1 = Series([arr, arr]) + s2 = s1.copy() + assert s1.equals(s2) + + s1[1] = val + + cm = ( + tm.assert_produces_warning(FutureWarning, check_stacklevel=False) + if isinstance(val, str) + else nullcontext() + ) + with cm: + assert not s1.equals(s2) + + +def test_equals_false_negative(): + # GH8437 Verify false negative behavior of equals function for dtype object + arr = [False, np.nan] + s1 = Series(arr) + s2 = s1.copy() + s3 = Series(index=range(2), dtype=object) + s4 = s3.copy() + s5 = s3.copy() + s6 = s3.copy() + + s3[:-1] = s4[:-1] = s5[0] = s6[0] = False + assert s1.equals(s1) + assert s1.equals(s2) + assert s1.equals(s3) + assert s1.equals(s4) + assert s1.equals(s5) + assert s5.equals(s6) + + +def test_equals_matching_nas(): + # matching but not identical NAs + left = Series([np.datetime64("NaT")], dtype=object) + right = Series([np.datetime64("NaT")], dtype=object) + assert left.equals(right) + assert Index(left).equals(Index(right)) + assert left.array.equals(right.array) + + left = Series([np.timedelta64("NaT")], dtype=object) + right = Series([np.timedelta64("NaT")], dtype=object) + assert left.equals(right) + assert Index(left).equals(Index(right)) + assert left.array.equals(right.array) + + left = Series([np.float64("NaN")], dtype=object) + right = Series([np.float64("NaN")], dtype=object) + assert left.equals(right) + assert Index(left, dtype=left.dtype).equals(Index(right, dtype=right.dtype)) + assert left.array.equals(right.array) + + +def test_equals_mismatched_nas(nulls_fixture, nulls_fixture2): + # GH#39650 + left = nulls_fixture + right = nulls_fixture2 + if hasattr(right, "copy"): + right = right.copy() + else: + right = copy.copy(right) + + ser = Series([left], dtype=object) + ser2 = Series([right], dtype=object) + + if is_matching_na(left, right): + assert ser.equals(ser2) + elif (left is None and is_float(right)) or (right is None and is_float(left)): + assert ser.equals(ser2) + else: + assert not ser.equals(ser2) + + +def test_equals_none_vs_nan(): + # GH#39650 + ser = Series([1, None], dtype=object) + ser2 = Series([1, np.nan], dtype=object) + + assert ser.equals(ser2) + assert Index(ser, dtype=ser.dtype).equals(Index(ser2, dtype=ser2.dtype)) + assert ser.array.equals(ser2.array) + + +def test_equals_None_vs_float(): + # GH#44190 + left = Series([-np.inf, np.nan, -1.0, 0.0, 1.0, 10 / 3, np.inf], dtype=object) + right = Series([None] * len(left)) + + # these series were found to be equal due to a bug, check that they are correctly + # found to not equal + assert not left.equals(right) + assert not right.equals(left) + assert not left.to_frame().equals(right.to_frame()) + assert not right.to_frame().equals(left.to_frame()) + assert not Index(left, dtype="object").equals(Index(right, dtype="object")) + assert not Index(right, dtype="object").equals(Index(left, dtype="object")) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_explode.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_explode.py new file mode 100644 index 0000000..c73737d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_explode.py @@ -0,0 +1,144 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_basic(): + s = pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd"), name="foo") + result = s.explode() + expected = pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object, name="foo" + ) + tm.assert_series_equal(result, expected) + + +def test_mixed_type(): + s = pd.Series( + [[0, 1, 2], np.nan, None, np.array([]), pd.Series(["a", "b"])], name="foo" + ) + result = s.explode() + expected = pd.Series( + [0, 1, 2, np.nan, None, np.nan, "a", "b"], + index=[0, 0, 0, 1, 2, 3, 4, 4], + dtype=object, + name="foo", + ) + tm.assert_series_equal(result, expected) + + +def test_empty(): + s = pd.Series(dtype=object) + result = s.explode() + expected = s.copy() + tm.assert_series_equal(result, expected) + + +def test_nested_lists(): + s = pd.Series([[[1, 2, 3]], [1, 2], 1]) + result = s.explode() + expected = pd.Series([[1, 2, 3], 1, 2, 1], index=[0, 1, 1, 2]) + tm.assert_series_equal(result, expected) + + +def test_multi_index(): + s = pd.Series( + [[0, 1, 2], np.nan, [], (3, 4)], + name="foo", + index=pd.MultiIndex.from_product([list("ab"), range(2)], names=["foo", "bar"]), + ) + result = s.explode() + index = pd.MultiIndex.from_tuples( + [("a", 0), ("a", 0), ("a", 0), ("a", 1), ("b", 0), ("b", 1), ("b", 1)], + names=["foo", "bar"], + ) + expected = pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4], index=index, dtype=object, name="foo" + ) + tm.assert_series_equal(result, expected) + + +def test_large(): + s = pd.Series([range(256)]).explode() + result = s.explode() + tm.assert_series_equal(result, s) + + +def test_invert_array(): + df = pd.DataFrame({"a": pd.date_range("20190101", periods=3, tz="UTC")}) + + listify = df.apply(lambda x: x.array, axis=1) + result = listify.explode() + tm.assert_series_equal(result, df["a"].rename()) + + +@pytest.mark.parametrize( + "s", [pd.Series([1, 2, 3]), pd.Series(pd.date_range("2019", periods=3, tz="UTC"))] +) +def non_object_dtype(s): + result = s.explode() + tm.assert_series_equal(result, s) + + +def test_typical_usecase(): + + df = pd.DataFrame( + [{"var1": "a,b,c", "var2": 1}, {"var1": "d,e,f", "var2": 2}], + columns=["var1", "var2"], + ) + exploded = df.var1.str.split(",").explode() + result = df[["var2"]].join(exploded) + expected = pd.DataFrame( + {"var2": [1, 1, 1, 2, 2, 2], "var1": list("abcdef")}, + columns=["var2", "var1"], + index=[0, 0, 0, 1, 1, 1], + ) + tm.assert_frame_equal(result, expected) + + +def test_nested_EA(): + # a nested EA array + s = pd.Series( + [ + pd.date_range("20170101", periods=3, tz="UTC"), + pd.date_range("20170104", periods=3, tz="UTC"), + ] + ) + result = s.explode() + expected = pd.Series( + pd.date_range("20170101", periods=6, tz="UTC"), index=[0, 0, 0, 1, 1, 1] + ) + tm.assert_series_equal(result, expected) + + +def test_duplicate_index(): + # GH 28005 + s = pd.Series([[1, 2], [3, 4]], index=[0, 0]) + result = s.explode() + expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object) + tm.assert_series_equal(result, expected) + + +def test_ignore_index(): + # GH 34932 + s = pd.Series([[1, 2], [3, 4]]) + result = s.explode(ignore_index=True) + expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object) + tm.assert_series_equal(result, expected) + + +def test_explode_sets(): + # https://github.com/pandas-dev/pandas/issues/35614 + s = pd.Series([{"a", "b", "c"}], index=[1]) + result = s.explode().sort_values() + expected = pd.Series(["a", "b", "c"], index=[1, 1, 1]) + tm.assert_series_equal(result, expected) + + +def test_explode_scalars_can_ignore_index(): + # https://github.com/pandas-dev/pandas/issues/40487 + s = pd.Series([1, 2, 3], index=["a", "b", "c"]) + result = s.explode(ignore_index=True) + expected = pd.Series([1, 2, 3]) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_fillna.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_fillna.py new file mode 100644 index 0000000..9617565 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_fillna.py @@ -0,0 +1,957 @@ +from datetime import ( + datetime, + timedelta, + timezone, +) + +import numpy as np +import pytest +import pytz + +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + NaT, + Period, + Series, + Timedelta, + Timestamp, + date_range, + isna, +) +import pandas._testing as tm + + +class TestSeriesFillNA: + def test_fillna_nat(self): + series = Series([0, 1, 2, NaT.value], dtype="M8[ns]") + + filled = series.fillna(method="pad") + filled2 = series.fillna(value=series.values[2]) + + expected = series.copy() + expected.values[3] = expected.values[2] + + tm.assert_series_equal(filled, expected) + tm.assert_series_equal(filled2, expected) + + df = DataFrame({"A": series}) + filled = df.fillna(method="pad") + filled2 = df.fillna(value=series.values[2]) + expected = DataFrame({"A": expected}) + tm.assert_frame_equal(filled, expected) + tm.assert_frame_equal(filled2, expected) + + series = Series([NaT.value, 0, 1, 2], dtype="M8[ns]") + + filled = series.fillna(method="bfill") + filled2 = series.fillna(value=series[1]) + + expected = series.copy() + expected[0] = expected[1] + + tm.assert_series_equal(filled, expected) + tm.assert_series_equal(filled2, expected) + + df = DataFrame({"A": series}) + filled = df.fillna(method="bfill") + filled2 = df.fillna(value=series[1]) + expected = DataFrame({"A": expected}) + tm.assert_frame_equal(filled, expected) + tm.assert_frame_equal(filled2, expected) + + def test_fillna_value_or_method(self, datetime_series): + msg = "Cannot specify both 'value' and 'method'" + with pytest.raises(ValueError, match=msg): + datetime_series.fillna(value=0, method="ffill") + + def test_fillna(self): + ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5)) + + tm.assert_series_equal(ts, ts.fillna(method="ffill")) + + ts[2] = np.NaN + + exp = Series([0.0, 1.0, 1.0, 3.0, 4.0], index=ts.index) + tm.assert_series_equal(ts.fillna(method="ffill"), exp) + + exp = Series([0.0, 1.0, 3.0, 3.0, 4.0], index=ts.index) + tm.assert_series_equal(ts.fillna(method="backfill"), exp) + + exp = Series([0.0, 1.0, 5.0, 3.0, 4.0], index=ts.index) + tm.assert_series_equal(ts.fillna(value=5), exp) + + msg = "Must specify a fill 'value' or 'method'" + with pytest.raises(ValueError, match=msg): + ts.fillna() + + def test_fillna_nonscalar(self): + # GH#5703 + s1 = Series([np.nan]) + s2 = Series([1]) + result = s1.fillna(s2) + expected = Series([1.0]) + tm.assert_series_equal(result, expected) + result = s1.fillna({}) + tm.assert_series_equal(result, s1) + result = s1.fillna(Series((), dtype=object)) + tm.assert_series_equal(result, s1) + result = s2.fillna(s1) + tm.assert_series_equal(result, s2) + result = s1.fillna({0: 1}) + tm.assert_series_equal(result, expected) + result = s1.fillna({1: 1}) + tm.assert_series_equal(result, Series([np.nan])) + result = s1.fillna({0: 1, 1: 1}) + tm.assert_series_equal(result, expected) + result = s1.fillna(Series({0: 1, 1: 1})) + tm.assert_series_equal(result, expected) + result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5])) + tm.assert_series_equal(result, s1) + + def test_fillna_aligns(self): + s1 = Series([0, 1, 2], list("abc")) + s2 = Series([0, np.nan, 2], list("bac")) + result = s2.fillna(s1) + expected = Series([0, 0, 2.0], list("bac")) + tm.assert_series_equal(result, expected) + + def test_fillna_limit(self): + ser = Series(np.nan, index=[0, 1, 2]) + result = ser.fillna(999, limit=1) + expected = Series([999, np.nan, np.nan], index=[0, 1, 2]) + tm.assert_series_equal(result, expected) + + result = ser.fillna(999, limit=2) + expected = Series([999, 999, np.nan], index=[0, 1, 2]) + tm.assert_series_equal(result, expected) + + def test_fillna_dont_cast_strings(self): + # GH#9043 + # make sure a string representation of int/float values can be filled + # correctly without raising errors or being converted + vals = ["0", "1.5", "-0.3"] + for val in vals: + ser = Series([0, 1, np.nan, np.nan, 4], dtype="float64") + result = ser.fillna(val) + expected = Series([0, 1, val, val, 4], dtype="object") + tm.assert_series_equal(result, expected) + + def test_fillna_consistency(self): + # GH#16402 + # fillna with a tz aware to a tz-naive, should result in object + + ser = Series([Timestamp("20130101"), NaT]) + + result = ser.fillna(Timestamp("20130101", tz="US/Eastern")) + expected = Series( + [Timestamp("20130101"), Timestamp("2013-01-01", tz="US/Eastern")], + dtype="object", + ) + tm.assert_series_equal(result, expected) + + # where (we ignore the errors=) + result = ser.where( + [True, False], Timestamp("20130101", tz="US/Eastern"), errors="ignore" + ) + tm.assert_series_equal(result, expected) + + result = ser.where( + [True, False], Timestamp("20130101", tz="US/Eastern"), errors="ignore" + ) + tm.assert_series_equal(result, expected) + + # with a non-datetime + result = ser.fillna("foo") + expected = Series([Timestamp("20130101"), "foo"]) + tm.assert_series_equal(result, expected) + + # assignment + ser2 = ser.copy() + ser2[1] = "foo" + tm.assert_series_equal(ser2, expected) + + def test_fillna_downcast(self): + # GH#15277 + # infer int64 from float64 + ser = Series([1.0, np.nan]) + result = ser.fillna(0, downcast="infer") + expected = Series([1, 0]) + tm.assert_series_equal(result, expected) + + # infer int64 from float64 when fillna value is a dict + ser = Series([1.0, np.nan]) + result = ser.fillna({1: 0}, downcast="infer") + expected = Series([1, 0]) + tm.assert_series_equal(result, expected) + + def test_fillna_downcast_infer_objects_to_numeric(self): + # GH#44241 if we have object-dtype, 'downcast="infer"' should + # _actually_ infer + + arr = np.arange(5).astype(object) + arr[3] = np.nan + + ser = Series(arr) + + res = ser.fillna(3, downcast="infer") + expected = Series(np.arange(5), dtype=np.int64) + tm.assert_series_equal(res, expected) + + res = ser.ffill(downcast="infer") + expected = Series([0, 1, 2, 2, 4], dtype=np.int64) + tm.assert_series_equal(res, expected) + + res = ser.bfill(downcast="infer") + expected = Series([0, 1, 2, 4, 4], dtype=np.int64) + tm.assert_series_equal(res, expected) + + # with a non-round float present, we will downcast to float64 + ser[2] = 2.5 + + expected = Series([0, 1, 2.5, 3, 4], dtype=np.float64) + res = ser.fillna(3, downcast="infer") + tm.assert_series_equal(res, expected) + + res = ser.ffill(downcast="infer") + expected = Series([0, 1, 2.5, 2.5, 4], dtype=np.float64) + tm.assert_series_equal(res, expected) + + res = ser.bfill(downcast="infer") + expected = Series([0, 1, 2.5, 4, 4], dtype=np.float64) + tm.assert_series_equal(res, expected) + + def test_timedelta_fillna(self, frame_or_series): + # GH#3371 + ser = Series( + [ + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130102"), + Timestamp("20130103 9:01:01"), + ] + ) + td = ser.diff() + obj = frame_or_series(td) + + # reg fillna + result = obj.fillna(Timedelta(seconds=0)) + expected = Series( + [ + timedelta(0), + timedelta(0), + timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1), + ] + ) + expected = frame_or_series(expected) + tm.assert_equal(result, expected) + + # interpreted as seconds, no longer supported + msg = "value should be a 'Timedelta', 'NaT', or array of those. Got 'int'" + with pytest.raises(TypeError, match=msg): + obj.fillna(1) + + result = obj.fillna(Timedelta(seconds=1)) + expected = Series( + [ + timedelta(seconds=1), + timedelta(0), + timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1), + ] + ) + expected = frame_or_series(expected) + tm.assert_equal(result, expected) + + result = obj.fillna(timedelta(days=1, seconds=1)) + expected = Series( + [ + timedelta(days=1, seconds=1), + timedelta(0), + timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1), + ] + ) + expected = frame_or_series(expected) + tm.assert_equal(result, expected) + + result = obj.fillna(np.timedelta64(10 ** 9)) + expected = Series( + [ + timedelta(seconds=1), + timedelta(0), + timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1), + ] + ) + expected = frame_or_series(expected) + tm.assert_equal(result, expected) + + result = obj.fillna(NaT) + expected = Series( + [ + NaT, + timedelta(0), + timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1), + ], + dtype="m8[ns]", + ) + expected = frame_or_series(expected) + tm.assert_equal(result, expected) + + # ffill + td[2] = np.nan + obj = frame_or_series(td) + result = obj.ffill() + expected = td.fillna(Timedelta(seconds=0)) + expected[0] = np.nan + expected = frame_or_series(expected) + + tm.assert_equal(result, expected) + + # bfill + td[2] = np.nan + obj = frame_or_series(td) + result = obj.bfill() + expected = td.fillna(Timedelta(seconds=0)) + expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1) + expected = frame_or_series(expected) + tm.assert_equal(result, expected) + + def test_datetime64_fillna(self): + + ser = Series( + [ + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130102"), + Timestamp("20130103 9:01:01"), + ] + ) + ser[2] = np.nan + + # ffill + result = ser.ffill() + expected = Series( + [ + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130103 9:01:01"), + ] + ) + tm.assert_series_equal(result, expected) + + # bfill + result = ser.bfill() + expected = Series( + [ + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130103 9:01:01"), + Timestamp("20130103 9:01:01"), + ] + ) + tm.assert_series_equal(result, expected) + + def test_datetime64_fillna_backfill(self): + # GH#6587 + # make sure that we are treating as integer when filling + msg = "containing strings is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + # this also tests inference of a datetime-like with NaT's + ser = Series([NaT, NaT, "2013-08-05 15:30:00.000001"]) + + expected = Series( + [ + "2013-08-05 15:30:00.000001", + "2013-08-05 15:30:00.000001", + "2013-08-05 15:30:00.000001", + ], + dtype="M8[ns]", + ) + result = ser.fillna(method="backfill") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"]) + def test_datetime64_tz_fillna(self, tz): + # DatetimeLikeBlock + ser = Series( + [ + Timestamp("2011-01-01 10:00"), + NaT, + Timestamp("2011-01-03 10:00"), + NaT, + ] + ) + null_loc = Series([False, True, False, True]) + + result = ser.fillna(Timestamp("2011-01-02 10:00")) + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00"), + Timestamp("2011-01-03 10:00"), + Timestamp("2011-01-02 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + # check s is not changed + tm.assert_series_equal(isna(ser), null_loc) + + result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz)) + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz=tz), + Timestamp("2011-01-03 10:00"), + Timestamp("2011-01-02 10:00", tz=tz), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(isna(ser), null_loc) + + result = ser.fillna("AAA") + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + "AAA", + Timestamp("2011-01-03 10:00"), + "AAA", + ], + dtype=object, + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(isna(ser), null_loc) + + result = ser.fillna( + { + 1: Timestamp("2011-01-02 10:00", tz=tz), + 3: Timestamp("2011-01-04 10:00"), + } + ) + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz=tz), + Timestamp("2011-01-03 10:00"), + Timestamp("2011-01-04 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(isna(ser), null_loc) + + result = ser.fillna( + {1: Timestamp("2011-01-02 10:00"), 3: Timestamp("2011-01-04 10:00")} + ) + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00"), + Timestamp("2011-01-03 10:00"), + Timestamp("2011-01-04 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(isna(ser), null_loc) + + # DatetimeTZBlock + idx = DatetimeIndex(["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz=tz) + ser = Series(idx) + assert ser.dtype == f"datetime64[ns, {tz}]" + tm.assert_series_equal(isna(ser), null_loc) + + result = ser.fillna(Timestamp("2011-01-02 10:00")) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2011-01-02 10:00"), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2011-01-02 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(isna(ser), null_loc) + + result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz)) + idx = DatetimeIndex( + [ + "2011-01-01 10:00", + "2011-01-02 10:00", + "2011-01-03 10:00", + "2011-01-02 10:00", + ], + tz=tz, + ) + expected = Series(idx) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(isna(ser), null_loc) + + result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz).to_pydatetime()) + idx = DatetimeIndex( + [ + "2011-01-01 10:00", + "2011-01-02 10:00", + "2011-01-03 10:00", + "2011-01-02 10:00", + ], + tz=tz, + ) + expected = Series(idx) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(isna(ser), null_loc) + + result = ser.fillna("AAA") + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + "AAA", + Timestamp("2011-01-03 10:00", tz=tz), + "AAA", + ], + dtype=object, + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(isna(ser), null_loc) + + result = ser.fillna( + { + 1: Timestamp("2011-01-02 10:00", tz=tz), + 3: Timestamp("2011-01-04 10:00"), + } + ) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2011-01-02 10:00", tz=tz), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2011-01-04 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(isna(ser), null_loc) + + result = ser.fillna( + { + 1: Timestamp("2011-01-02 10:00", tz=tz), + 3: Timestamp("2011-01-04 10:00", tz=tz), + } + ) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2011-01-02 10:00", tz=tz), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2011-01-04 10:00", tz=tz), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(isna(ser), null_loc) + + # filling with a naive/other zone, coerce to object + result = ser.fillna(Timestamp("20130101")) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2013-01-01"), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2013-01-01"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(isna(ser), null_loc) + + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = ser.fillna(Timestamp("20130101", tz="US/Pacific")) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2013-01-01", tz="US/Pacific"), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2013-01-01", tz="US/Pacific"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(isna(ser), null_loc) + + def test_fillna_dt64tz_with_method(self): + # with timezone + # GH#15855 + ser = Series([Timestamp("2012-11-11 00:00:00+01:00"), NaT]) + exp = Series( + [ + Timestamp("2012-11-11 00:00:00+01:00"), + Timestamp("2012-11-11 00:00:00+01:00"), + ] + ) + tm.assert_series_equal(ser.fillna(method="pad"), exp) + + ser = Series([NaT, Timestamp("2012-11-11 00:00:00+01:00")]) + exp = Series( + [ + Timestamp("2012-11-11 00:00:00+01:00"), + Timestamp("2012-11-11 00:00:00+01:00"), + ] + ) + tm.assert_series_equal(ser.fillna(method="bfill"), exp) + + def test_fillna_pytimedelta(self): + # GH#8209 + ser = Series([np.nan, Timedelta("1 days")], index=["A", "B"]) + + result = ser.fillna(timedelta(1)) + expected = Series(Timedelta("1 days"), index=["A", "B"]) + tm.assert_series_equal(result, expected) + + def test_fillna_period(self): + # GH#13737 + ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")]) + + res = ser.fillna(Period("2012-01", freq="M")) + exp = Series([Period("2011-01", freq="M"), Period("2012-01", freq="M")]) + tm.assert_series_equal(res, exp) + assert res.dtype == "Period[M]" + + def test_fillna_dt64_timestamp(self, frame_or_series): + ser = Series( + [ + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130102"), + Timestamp("20130103 9:01:01"), + ] + ) + ser[2] = np.nan + obj = frame_or_series(ser) + + # reg fillna + result = obj.fillna(Timestamp("20130104")) + expected = Series( + [ + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130104"), + Timestamp("20130103 9:01:01"), + ] + ) + expected = frame_or_series(expected) + tm.assert_equal(result, expected) + + result = obj.fillna(NaT) + expected = obj + tm.assert_equal(result, expected) + + def test_fillna_dt64_non_nao(self): + # GH#27419 + ser = Series([Timestamp("2010-01-01"), NaT, Timestamp("2000-01-01")]) + val = np.datetime64("1975-04-05", "ms") + + result = ser.fillna(val) + expected = Series( + [Timestamp("2010-01-01"), Timestamp("1975-04-05"), Timestamp("2000-01-01")] + ) + tm.assert_series_equal(result, expected) + + def test_fillna_numeric_inplace(self): + x = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"]) + y = x.copy() + + return_value = y.fillna(value=0, inplace=True) + assert return_value is None + + expected = x.fillna(value=0) + tm.assert_series_equal(y, expected) + + # --------------------------------------------------------------- + # CategoricalDtype + + @pytest.mark.parametrize( + "fill_value, expected_output", + [ + ("a", ["a", "a", "b", "a", "a"]), + ({1: "a", 3: "b", 4: "b"}, ["a", "a", "b", "b", "b"]), + ({1: "a"}, ["a", "a", "b", np.nan, np.nan]), + ({1: "a", 3: "b"}, ["a", "a", "b", "b", np.nan]), + (Series("a"), ["a", np.nan, "b", np.nan, np.nan]), + (Series("a", index=[1]), ["a", "a", "b", np.nan, np.nan]), + (Series({1: "a", 3: "b"}), ["a", "a", "b", "b", np.nan]), + (Series(["a", "b"], index=[3, 4]), ["a", np.nan, "b", "a", "b"]), + ], + ) + def test_fillna_categorical(self, fill_value, expected_output): + # GH#17033 + # Test fillna for a Categorical series + data = ["a", np.nan, "b", np.nan, np.nan] + ser = Series(Categorical(data, categories=["a", "b"])) + exp = Series(Categorical(expected_output, categories=["a", "b"])) + result = ser.fillna(fill_value) + tm.assert_series_equal(result, exp) + + @pytest.mark.parametrize( + "fill_value, expected_output", + [ + (Series(["a", "b", "c", "d", "e"]), ["a", "b", "b", "d", "e"]), + (Series(["b", "d", "a", "d", "a"]), ["a", "d", "b", "d", "a"]), + ( + Series( + Categorical( + ["b", "d", "a", "d", "a"], categories=["b", "c", "d", "e", "a"] + ) + ), + ["a", "d", "b", "d", "a"], + ), + ], + ) + def test_fillna_categorical_with_new_categories(self, fill_value, expected_output): + # GH#26215 + data = ["a", np.nan, "b", np.nan, np.nan] + ser = Series(Categorical(data, categories=["a", "b", "c", "d", "e"])) + exp = Series(Categorical(expected_output, categories=["a", "b", "c", "d", "e"])) + result = ser.fillna(fill_value) + tm.assert_series_equal(result, exp) + + def test_fillna_categorical_raises(self): + data = ["a", np.nan, "b", np.nan, np.nan] + ser = Series(Categorical(data, categories=["a", "b"])) + cat = ser._values + + msg = "Cannot setitem on a Categorical with a new category" + with pytest.raises(TypeError, match=msg): + ser.fillna("d") + + msg2 = "Length of 'value' does not match." + with pytest.raises(ValueError, match=msg2): + cat.fillna(Series("d")) + + with pytest.raises(TypeError, match=msg): + ser.fillna({1: "d", 3: "a"}) + + msg = '"value" parameter must be a scalar or dict, but you passed a "list"' + with pytest.raises(TypeError, match=msg): + ser.fillna(["a", "b"]) + + msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"' + with pytest.raises(TypeError, match=msg): + ser.fillna(("a", "b")) + + msg = ( + '"value" parameter must be a scalar, dict ' + 'or Series, but you passed a "DataFrame"' + ) + with pytest.raises(TypeError, match=msg): + ser.fillna(DataFrame({1: ["a"], 3: ["b"]})) + + @pytest.mark.parametrize("dtype", [float, "float32", "float64"]) + @pytest.mark.parametrize("fill_type", tm.ALL_REAL_NUMPY_DTYPES) + def test_fillna_float_casting(self, dtype, fill_type): + # GH-43424 + ser = Series([np.nan, 1.2], dtype=dtype) + fill_values = Series([2, 2], dtype=fill_type) + result = ser.fillna(fill_values) + expected = Series([2.0, 1.2], dtype=dtype) + tm.assert_series_equal(result, expected) + + def test_fillna_f32_upcast_with_dict(self): + # GH-43424 + ser = Series([np.nan, 1.2], dtype=np.float32) + result = ser.fillna({0: 1}) + expected = Series([1.0, 1.2], dtype=np.float32) + tm.assert_series_equal(result, expected) + + # --------------------------------------------------------------- + # Invalid Usages + + def test_fillna_invalid_method(self, datetime_series): + try: + datetime_series.fillna(method="ffil") + except ValueError as inst: + assert "ffil" in str(inst) + + def test_fillna_listlike_invalid(self): + ser = Series(np.random.randint(-100, 100, 50)) + msg = '"value" parameter must be a scalar or dict, but you passed a "list"' + with pytest.raises(TypeError, match=msg): + ser.fillna([1, 2]) + + msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"' + with pytest.raises(TypeError, match=msg): + ser.fillna((1, 2)) + + def test_fillna_method_and_limit_invalid(self): + + # related GH#9217, make sure limit is an int and greater than 0 + ser = Series([1, 2, 3, None]) + msg = "|".join( + [ + r"Cannot specify both 'value' and 'method'\.", + "Limit must be greater than 0", + "Limit must be an integer", + ] + ) + for limit in [-1, 0, 1.0, 2.0]: + for method in ["backfill", "bfill", "pad", "ffill", None]: + with pytest.raises(ValueError, match=msg): + ser.fillna(1, limit=limit, method=method) + + def test_fillna_datetime64_with_timezone_tzinfo(self): + # https://github.com/pandas-dev/pandas/issues/38851 + # different tzinfos representing UTC treated as equal + ser = Series(date_range("2020", periods=3, tz="UTC")) + expected = ser.copy() + ser[1] = NaT + result = ser.fillna(datetime(2020, 1, 2, tzinfo=timezone.utc)) + tm.assert_series_equal(result, expected) + + # but we dont (yet) consider distinct tzinfos for non-UTC tz equivalent + ts = Timestamp("2000-01-01", tz="US/Pacific") + ser2 = Series(ser._values.tz_convert("dateutil/US/Pacific")) + assert ser2.dtype.kind == "M" + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = ser2.fillna(ts) + expected = Series([ser[0], ts, ser[2]], dtype=object) + # TODO(2.0): once deprecation is enforced + # expected = Series( + # [ser2[0], ts.tz_convert(ser2.dtype.tz), ser2[2]], + # dtype=ser2.dtype, + # ) + tm.assert_series_equal(result, expected) + + def test_fillna_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + srs = Series([1, 2, 3, np.nan], dtype=float) + msg = ( + r"In a future version of pandas all arguments of Series.fillna " + r"except for the argument 'value' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = srs.fillna(0, None, None) + expected = Series([1, 2, 3, 0], dtype=float) + tm.assert_series_equal(result, expected) + + +class TestFillnaPad: + def test_fillna_bug(self): + ser = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"]) + filled = ser.fillna(method="ffill") + expected = Series([np.nan, 1.0, 1.0, 3.0, 3.0], ser.index) + tm.assert_series_equal(filled, expected) + + filled = ser.fillna(method="bfill") + expected = Series([1.0, 1.0, 3.0, 3.0, np.nan], ser.index) + tm.assert_series_equal(filled, expected) + + def test_ffill(self): + ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5)) + ts[2] = np.NaN + tm.assert_series_equal(ts.ffill(), ts.fillna(method="ffill")) + + def test_ffill_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + ser = Series([1, 2, 3]) + msg = ( + r"In a future version of pandas all arguments of Series.ffill " + r"will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.ffill(0) + expected = Series([1, 2, 3]) + tm.assert_series_equal(result, expected) + + def test_ffill_mixed_dtypes_without_missing_data(self): + # GH#14956 + series = Series([datetime(2015, 1, 1, tzinfo=pytz.utc), 1]) + result = series.ffill() + tm.assert_series_equal(series, result) + + def test_bfill(self): + ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5)) + ts[2] = np.NaN + tm.assert_series_equal(ts.bfill(), ts.fillna(method="bfill")) + + def test_bfill_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + ser = Series([1, 2, 3]) + msg = ( + r"In a future version of pandas all arguments of Series.bfill " + r"will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.bfill(0) + expected = Series([1, 2, 3]) + tm.assert_series_equal(result, expected) + + def test_pad_nan(self): + x = Series( + [np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"], dtype=float + ) + + return_value = x.fillna(method="pad", inplace=True) + assert return_value is None + + expected = Series( + [np.nan, 1.0, 1.0, 3.0, 3.0], ["z", "a", "b", "c", "d"], dtype=float + ) + tm.assert_series_equal(x[1:], expected[1:]) + assert np.isnan(x[0]), np.isnan(expected[0]) + + def test_series_fillna_limit(self): + index = np.arange(10) + s = Series(np.random.randn(10), index=index) + + result = s[:2].reindex(index) + result = result.fillna(method="pad", limit=5) + + expected = s[:2].reindex(index).fillna(method="pad") + expected[-3:] = np.nan + tm.assert_series_equal(result, expected) + + result = s[-2:].reindex(index) + result = result.fillna(method="bfill", limit=5) + + expected = s[-2:].reindex(index).fillna(method="backfill") + expected[:3] = np.nan + tm.assert_series_equal(result, expected) + + def test_series_pad_backfill_limit(self): + index = np.arange(10) + s = Series(np.random.randn(10), index=index) + + result = s[:2].reindex(index, method="pad", limit=5) + + expected = s[:2].reindex(index).fillna(method="pad") + expected[-3:] = np.nan + tm.assert_series_equal(result, expected) + + result = s[-2:].reindex(index, method="backfill", limit=5) + + expected = s[-2:].reindex(index).fillna(method="backfill") + expected[:3] = np.nan + tm.assert_series_equal(result, expected) + + def test_fillna_int(self): + ser = Series(np.random.randint(-100, 100, 50)) + return_value = ser.fillna(method="ffill", inplace=True) + assert return_value is None + tm.assert_series_equal(ser.fillna(method="ffill", inplace=False), ser) + + def test_datetime64tz_fillna_round_issue(self): + # GH#14872 + + data = Series( + [NaT, NaT, datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc)] + ) + + filled = data.fillna(method="bfill") + + expected = Series( + [ + datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc), + datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc), + datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc), + ] + ) + + tm.assert_series_equal(filled, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_get_numeric_data.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_get_numeric_data.py new file mode 100644 index 0000000..e386f4b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_get_numeric_data.py @@ -0,0 +1,29 @@ +from pandas import ( + Index, + Series, + date_range, +) +import pandas._testing as tm + + +class TestGetNumericData: + def test_get_numeric_data_preserve_dtype(self): + + # get the numeric data + obj = Series([1, 2, 3]) + result = obj._get_numeric_data() + tm.assert_series_equal(result, obj) + + obj = Series([1, "2", 3.0]) + result = obj._get_numeric_data() + expected = Series([], dtype=object, index=Index([], dtype=object)) + tm.assert_series_equal(result, expected) + + obj = Series([True, False, True]) + result = obj._get_numeric_data() + tm.assert_series_equal(result, obj) + + obj = Series(date_range("20130101", periods=3)) + result = obj._get_numeric_data() + expected = Series([], dtype="M8[ns]", index=Index([], dtype=object)) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_head_tail.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_head_tail.py new file mode 100644 index 0000000..d9f8d85 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_head_tail.py @@ -0,0 +1,8 @@ +import pandas._testing as tm + + +def test_head_tail(string_series): + tm.assert_series_equal(string_series.head(), string_series[:5]) + tm.assert_series_equal(string_series.head(0), string_series[0:0]) + tm.assert_series_equal(string_series.tail(), string_series[-5:]) + tm.assert_series_equal(string_series.tail(0), string_series[0:0]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_infer_objects.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_infer_objects.py new file mode 100644 index 0000000..bb83f62 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_infer_objects.py @@ -0,0 +1,23 @@ +import numpy as np + +from pandas import Series +import pandas._testing as tm + + +class TestInferObjects: + def test_infer_objects_series(self): + # GH#11221 + actual = Series(np.array([1, 2, 3], dtype="O")).infer_objects() + expected = Series([1, 2, 3]) + tm.assert_series_equal(actual, expected) + + actual = Series(np.array([1, 2, 3, None], dtype="O")).infer_objects() + expected = Series([1.0, 2.0, 3.0, np.nan]) + tm.assert_series_equal(actual, expected) + + # only soft conversions, unconvertable pass thru unchanged + actual = Series(np.array([1, 2, 3, None, "a"], dtype="O")).infer_objects() + expected = Series([1, 2, 3, None, "a"]) + + assert actual.dtype == "object" + tm.assert_series_equal(actual, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_interpolate.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_interpolate.py new file mode 100644 index 0000000..08b9f82 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_interpolate.py @@ -0,0 +1,829 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Index, + MultiIndex, + Series, + date_range, + isna, +) +import pandas._testing as tm +from pandas.util.version import Version + + +@pytest.fixture( + params=[ + "linear", + "index", + "values", + "nearest", + "slinear", + "zero", + "quadratic", + "cubic", + "barycentric", + "krogh", + "polynomial", + "spline", + "piecewise_polynomial", + "from_derivatives", + "pchip", + "akima", + "cubicspline", + ] +) +def nontemporal_method(request): + """Fixture that returns an (method name, required kwargs) pair. + + This fixture does not include method 'time' as a parameterization; that + method requires a Series with a DatetimeIndex, and is generally tested + separately from these non-temporal methods. + """ + method = request.param + kwargs = {"order": 1} if method in ("spline", "polynomial") else {} + return method, kwargs + + +@pytest.fixture( + params=[ + "linear", + "slinear", + "zero", + "quadratic", + "cubic", + "barycentric", + "krogh", + "polynomial", + "spline", + "piecewise_polynomial", + "from_derivatives", + "pchip", + "akima", + "cubicspline", + ] +) +def interp_methods_ind(request): + """Fixture that returns a (method name, required kwargs) pair to + be tested for various Index types. + + This fixture does not include methods - 'time', 'index', 'nearest', + 'values' as a parameterization + """ + method = request.param + kwargs = {"order": 1} if method in ("spline", "polynomial") else {} + return method, kwargs + + +class TestSeriesInterpolateData: + def test_interpolate(self, datetime_series, string_series): + ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index) + + ts_copy = ts.copy() + ts_copy[5:10] = np.NaN + + linear_interp = ts_copy.interpolate(method="linear") + tm.assert_series_equal(linear_interp, ts) + + ord_ts = Series( + [d.toordinal() for d in datetime_series.index], index=datetime_series.index + ).astype(float) + + ord_ts_copy = ord_ts.copy() + ord_ts_copy[5:10] = np.NaN + + time_interp = ord_ts_copy.interpolate(method="time") + tm.assert_series_equal(time_interp, ord_ts) + + def test_interpolate_time_raises_for_non_timeseries(self): + # When method='time' is used on a non-TimeSeries that contains a null + # value, a ValueError should be raised. + non_ts = Series([0, 1, 2, np.NaN]) + msg = "time-weighted interpolation only works on Series.* with a DatetimeIndex" + with pytest.raises(ValueError, match=msg): + non_ts.interpolate(method="time") + + @td.skip_if_no_scipy + def test_interpolate_cubicspline(self): + + ser = Series([10, 11, 12, 13]) + + expected = Series( + [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], + index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), + ) + # interpolate at new_index + new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( + float + ) + result = ser.reindex(new_index).interpolate(method="cubicspline")[1:3] + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_interpolate_pchip(self): + + ser = Series(np.sort(np.random.uniform(size=100))) + + # interpolate at new_index + new_index = ser.index.union( + Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75]) + ).astype(float) + interp_s = ser.reindex(new_index).interpolate(method="pchip") + # does not blow up, GH5977 + interp_s[49:51] + + @td.skip_if_no_scipy + def test_interpolate_akima(self): + + ser = Series([10, 11, 12, 13]) + + # interpolate at new_index where `der` is zero + expected = Series( + [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], + index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), + ) + new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( + float + ) + interp_s = ser.reindex(new_index).interpolate(method="akima") + tm.assert_series_equal(interp_s[1:3], expected) + + # interpolate at new_index where `der` is a non-zero int + expected = Series( + [11.0, 1.0, 1.0, 1.0, 12.0, 1.0, 1.0, 1.0, 13.0], + index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), + ) + new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( + float + ) + interp_s = ser.reindex(new_index).interpolate(method="akima", der=1) + tm.assert_series_equal(interp_s[1:3], expected) + + @td.skip_if_no_scipy + def test_interpolate_piecewise_polynomial(self): + ser = Series([10, 11, 12, 13]) + + expected = Series( + [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], + index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), + ) + # interpolate at new_index + new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( + float + ) + interp_s = ser.reindex(new_index).interpolate(method="piecewise_polynomial") + tm.assert_series_equal(interp_s[1:3], expected) + + @td.skip_if_no_scipy + def test_interpolate_from_derivatives(self): + ser = Series([10, 11, 12, 13]) + + expected = Series( + [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], + index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), + ) + # interpolate at new_index + new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( + float + ) + interp_s = ser.reindex(new_index).interpolate(method="from_derivatives") + tm.assert_series_equal(interp_s[1:3], expected) + + @pytest.mark.parametrize( + "kwargs", + [ + {}, + pytest.param( + {"method": "polynomial", "order": 1}, marks=td.skip_if_no_scipy + ), + ], + ) + def test_interpolate_corners(self, kwargs): + s = Series([np.nan, np.nan]) + tm.assert_series_equal(s.interpolate(**kwargs), s) + + s = Series([], dtype=object).interpolate() + tm.assert_series_equal(s.interpolate(**kwargs), s) + + def test_interpolate_index_values(self): + s = Series(np.nan, index=np.sort(np.random.rand(30))) + s[::3] = np.random.randn(10) + + vals = s.index.values.astype(float) + + result = s.interpolate(method="index") + + expected = s.copy() + bad = isna(expected.values) + good = ~bad + expected = Series( + np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad] + ) + + tm.assert_series_equal(result[bad], expected) + + # 'values' is synonymous with 'index' for the method kwarg + other_result = s.interpolate(method="values") + + tm.assert_series_equal(other_result, result) + tm.assert_series_equal(other_result[bad], expected) + + def test_interpolate_non_ts(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + msg = ( + "time-weighted interpolation only works on Series or DataFrames " + "with a DatetimeIndex" + ) + with pytest.raises(ValueError, match=msg): + s.interpolate(method="time") + + @pytest.mark.parametrize( + "kwargs", + [ + {}, + pytest.param( + {"method": "polynomial", "order": 1}, marks=td.skip_if_no_scipy + ), + ], + ) + def test_nan_interpolate(self, kwargs): + s = Series([0, 1, np.nan, 3]) + result = s.interpolate(**kwargs) + expected = Series([0.0, 1.0, 2.0, 3.0]) + tm.assert_series_equal(result, expected) + + def test_nan_irregular_index(self): + s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9]) + result = s.interpolate() + expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9]) + tm.assert_series_equal(result, expected) + + def test_nan_str_index(self): + s = Series([0, 1, 2, np.nan], index=list("abcd")) + result = s.interpolate() + expected = Series([0.0, 1.0, 2.0, 2.0], index=list("abcd")) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_interp_quad(self): + sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4]) + result = sq.interpolate(method="quadratic") + expected = Series([1.0, 4.0, 9.0, 16.0], index=[1, 2, 3, 4]) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_interp_scipy_basic(self): + s = Series([1, 3, np.nan, 12, np.nan, 25]) + # slinear + expected = Series([1.0, 3.0, 7.5, 12.0, 18.5, 25.0]) + result = s.interpolate(method="slinear") + tm.assert_series_equal(result, expected) + + result = s.interpolate(method="slinear", downcast="infer") + tm.assert_series_equal(result, expected) + # nearest + expected = Series([1, 3, 3, 12, 12, 25]) + result = s.interpolate(method="nearest") + tm.assert_series_equal(result, expected.astype("float")) + + result = s.interpolate(method="nearest", downcast="infer") + tm.assert_series_equal(result, expected) + # zero + expected = Series([1, 3, 3, 12, 12, 25]) + result = s.interpolate(method="zero") + tm.assert_series_equal(result, expected.astype("float")) + + result = s.interpolate(method="zero", downcast="infer") + tm.assert_series_equal(result, expected) + # quadratic + # GH #15662. + expected = Series([1, 3.0, 6.823529, 12.0, 18.058824, 25.0]) + result = s.interpolate(method="quadratic") + tm.assert_series_equal(result, expected) + + result = s.interpolate(method="quadratic", downcast="infer") + tm.assert_series_equal(result, expected) + # cubic + expected = Series([1.0, 3.0, 6.8, 12.0, 18.2, 25.0]) + result = s.interpolate(method="cubic") + tm.assert_series_equal(result, expected) + + def test_interp_limit(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0]) + result = s.interpolate(method="linear", limit=2) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("limit", [-1, 0]) + def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method, limit): + # GH 9217: make sure limit is greater than zero. + s = Series([1, 2, np.nan, 4]) + method, kwargs = nontemporal_method + with pytest.raises(ValueError, match="Limit must be greater than 0"): + s.interpolate(limit=limit, method=method, **kwargs) + + def test_interpolate_invalid_float_limit(self, nontemporal_method): + # GH 9217: make sure limit is an integer. + s = Series([1, 2, np.nan, 4]) + method, kwargs = nontemporal_method + limit = 2.0 + with pytest.raises(ValueError, match="Limit must be an integer"): + s.interpolate(limit=limit, method=method, **kwargs) + + @pytest.mark.parametrize("invalid_method", [None, "nonexistent_method"]) + def test_interp_invalid_method(self, invalid_method): + s = Series([1, 3, np.nan, 12, np.nan, 25]) + + msg = f"method must be one of.* Got '{invalid_method}' instead" + with pytest.raises(ValueError, match=msg): + s.interpolate(method=invalid_method) + + # When an invalid method and invalid limit (such as -1) are + # provided, the error message reflects the invalid method. + with pytest.raises(ValueError, match=msg): + s.interpolate(method=invalid_method, limit=-1) + + def test_interp_invalid_method_and_value(self): + # GH#36624 + ser = Series([1, 3, np.nan, 12, np.nan, 25]) + + msg = "Cannot pass both fill_value and method" + with pytest.raises(ValueError, match=msg): + ser.interpolate(fill_value=3, method="pad") + + def test_interp_limit_forward(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + # Provide 'forward' (the default) explicitly here. + expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0]) + + result = s.interpolate(method="linear", limit=2, limit_direction="forward") + tm.assert_series_equal(result, expected) + + result = s.interpolate(method="linear", limit=2, limit_direction="FORWARD") + tm.assert_series_equal(result, expected) + + def test_interp_unlimited(self): + # these test are for issue #16282 default Limit=None is unlimited + s = Series([np.nan, 1.0, 3.0, np.nan, np.nan, np.nan, 11.0, np.nan]) + expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0]) + result = s.interpolate(method="linear", limit_direction="both") + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0]) + result = s.interpolate(method="linear", limit_direction="forward") + tm.assert_series_equal(result, expected) + + expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, np.nan]) + result = s.interpolate(method="linear", limit_direction="backward") + tm.assert_series_equal(result, expected) + + def test_interp_limit_bad_direction(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + msg = ( + r"Invalid limit_direction: expecting one of \['forward', " + r"'backward', 'both'\], got 'abc'" + ) + with pytest.raises(ValueError, match=msg): + s.interpolate(method="linear", limit=2, limit_direction="abc") + + # raises an error even if no limit is specified. + with pytest.raises(ValueError, match=msg): + s.interpolate(method="linear", limit_direction="abc") + + # limit_area introduced GH #16284 + def test_interp_limit_area(self): + # These tests are for issue #9218 -- fill NaNs in both directions. + s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) + + expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0, 6.0, 7.0, np.nan, np.nan]) + result = s.interpolate(method="linear", limit_area="inside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, 4.0, np.nan, np.nan, 7.0, np.nan, np.nan] + ) + result = s.interpolate(method="linear", limit_area="inside", limit=1) + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, np.nan, 3.0, 4.0, np.nan, 6.0, 7.0, np.nan, np.nan]) + result = s.interpolate( + method="linear", limit_area="inside", limit_direction="both", limit=1 + ) + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) + result = s.interpolate(method="linear", limit_area="outside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan] + ) + result = s.interpolate(method="linear", limit_area="outside", limit=1) + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan]) + result = s.interpolate( + method="linear", limit_area="outside", limit_direction="both", limit=1 + ) + tm.assert_series_equal(result, expected) + + expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) + result = s.interpolate( + method="linear", limit_area="outside", limit_direction="backward" + ) + tm.assert_series_equal(result, expected) + + # raises an error even if limit type is wrong. + msg = r"Invalid limit_area: expecting one of \['inside', 'outside'\], got abc" + with pytest.raises(ValueError, match=msg): + s.interpolate(method="linear", limit_area="abc") + + @pytest.mark.parametrize( + "method, limit_direction, expected", + [ + ("pad", "backward", "forward"), + ("ffill", "backward", "forward"), + ("backfill", "forward", "backward"), + ("bfill", "forward", "backward"), + ("pad", "both", "forward"), + ("ffill", "both", "forward"), + ("backfill", "both", "backward"), + ("bfill", "both", "backward"), + ], + ) + def test_interp_limit_direction_raises(self, method, limit_direction, expected): + # https://github.com/pandas-dev/pandas/pull/34746 + s = Series([1, 2, 3]) + + msg = f"`limit_direction` must be '{expected}' for method `{method}`" + with pytest.raises(ValueError, match=msg): + s.interpolate(method=method, limit_direction=limit_direction) + + @pytest.mark.parametrize( + "data, expected_data, kwargs", + ( + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan], + {"method": "pad", "limit_area": "inside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan], + {"method": "pad", "limit_area": "inside", "limit": 1}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0], + {"method": "pad", "limit_area": "outside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan], + {"method": "pad", "limit_area": "outside", "limit": 1}, + ), + ( + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + {"method": "pad", "limit_area": "outside", "limit": 1}, + ), + ( + range(5), + range(5), + {"method": "pad", "limit_area": "outside", "limit": 1}, + ), + ), + ) + def test_interp_limit_area_with_pad(self, data, expected_data, kwargs): + # GH26796 + + s = Series(data) + expected = Series(expected_data) + result = s.interpolate(**kwargs) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data, expected_data, kwargs", + ( + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan], + {"method": "bfill", "limit_area": "inside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan], + {"method": "bfill", "limit_area": "inside", "limit": 1}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan], + {"method": "bfill", "limit_area": "outside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan], + {"method": "bfill", "limit_area": "outside", "limit": 1}, + ), + ), + ) + def test_interp_limit_area_with_backfill(self, data, expected_data, kwargs): + # GH26796 + + s = Series(data) + expected = Series(expected_data) + result = s.interpolate(**kwargs) + tm.assert_series_equal(result, expected) + + def test_interp_limit_direction(self): + # These tests are for issue #9218 -- fill NaNs in both directions. + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + expected = Series([1.0, 3.0, np.nan, 7.0, 9.0, 11.0]) + result = s.interpolate(method="linear", limit=2, limit_direction="backward") + tm.assert_series_equal(result, expected) + + expected = Series([1.0, 3.0, 5.0, np.nan, 9.0, 11.0]) + result = s.interpolate(method="linear", limit=1, limit_direction="both") + tm.assert_series_equal(result, expected) + + # Check that this works on a longer series of nans. + s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, np.nan]) + + expected = Series([1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0]) + result = s.interpolate(method="linear", limit=2, limit_direction="both") + tm.assert_series_equal(result, expected) + + expected = Series( + [1.0, 3.0, 4.0, np.nan, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0] + ) + result = s.interpolate(method="linear", limit=1, limit_direction="both") + tm.assert_series_equal(result, expected) + + def test_interp_limit_to_ends(self): + # These test are for issue #10420 -- flow back to beginning. + s = Series([np.nan, np.nan, 5, 7, 9, np.nan]) + + expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, np.nan]) + result = s.interpolate(method="linear", limit=2, limit_direction="backward") + tm.assert_series_equal(result, expected) + + expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, 9.0]) + result = s.interpolate(method="linear", limit=2, limit_direction="both") + tm.assert_series_equal(result, expected) + + def test_interp_limit_before_ends(self): + # These test are for issue #11115 -- limit ends properly. + s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan]) + + expected = Series([np.nan, np.nan, 5.0, 7.0, 7.0, np.nan]) + result = s.interpolate(method="linear", limit=1, limit_direction="forward") + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, 5.0, 5.0, 7.0, np.nan, np.nan]) + result = s.interpolate(method="linear", limit=1, limit_direction="backward") + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, 5.0, 5.0, 7.0, 7.0, np.nan]) + result = s.interpolate(method="linear", limit=1, limit_direction="both") + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_interp_all_good(self): + s = Series([1, 2, 3]) + result = s.interpolate(method="polynomial", order=1) + tm.assert_series_equal(result, s) + + # non-scipy + result = s.interpolate() + tm.assert_series_equal(result, s) + + @pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] + ) + def test_interp_multiIndex(self, check_scipy): + idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")]) + s = Series([1, 2, np.nan], index=idx) + + expected = s.copy() + expected.loc[2] = 2 + result = s.interpolate() + tm.assert_series_equal(result, expected) + + msg = "Only `method=linear` interpolation is supported on MultiIndexes" + if check_scipy: + with pytest.raises(ValueError, match=msg): + s.interpolate(method="polynomial", order=1) + + @td.skip_if_no_scipy + def test_interp_nonmono_raise(self): + s = Series([1, np.nan, 3], index=[0, 2, 1]) + msg = "krogh interpolation requires that the index be monotonic" + with pytest.raises(ValueError, match=msg): + s.interpolate(method="krogh") + + @td.skip_if_no_scipy + @pytest.mark.parametrize("method", ["nearest", "pad"]) + def test_interp_datetime64(self, method, tz_naive_fixture): + df = Series( + [1, np.nan, 3], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture) + ) + result = df.interpolate(method=method) + expected = Series( + [1.0, 1.0, 3.0], + index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture), + ) + tm.assert_series_equal(result, expected) + + def test_interp_pad_datetime64tz_values(self): + # GH#27628 missing.interpolate_2d should handle datetimetz values + dti = date_range("2015-04-05", periods=3, tz="US/Central") + ser = Series(dti) + ser[1] = pd.NaT + result = ser.interpolate(method="pad") + + expected = Series(dti) + expected[1] = expected[0] + tm.assert_series_equal(result, expected) + + def test_interp_limit_no_nans(self): + # GH 7173 + s = Series([1.0, 2.0, 3.0]) + result = s.interpolate(limit=1) + expected = s + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("method", ["polynomial", "spline"]) + def test_no_order(self, method): + # see GH-10633, GH-24014 + s = Series([0, 1, np.nan, 3]) + msg = "You must specify the order of the spline or polynomial" + with pytest.raises(ValueError, match=msg): + s.interpolate(method=method) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("order", [-1, -1.0, 0, 0.0, np.nan]) + def test_interpolate_spline_invalid_order(self, order): + s = Series([0, 1, np.nan, 3]) + msg = "order needs to be specified and greater than 0" + with pytest.raises(ValueError, match=msg): + s.interpolate(method="spline", order=order) + + @td.skip_if_no_scipy + def test_spline(self): + s = Series([1, 2, np.nan, 4, 5, np.nan, 7]) + result = s.interpolate(method="spline", order=1) + expected = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_spline_extrapolate(self): + s = Series([1, 2, 3, 4, np.nan, 6, np.nan]) + result3 = s.interpolate(method="spline", order=1, ext=3) + expected3 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0]) + tm.assert_series_equal(result3, expected3) + + result1 = s.interpolate(method="spline", order=1, ext=0) + expected1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) + tm.assert_series_equal(result1, expected1) + + @td.skip_if_no_scipy + def test_spline_smooth(self): + s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7]) + assert ( + s.interpolate(method="spline", order=3, s=0)[5] + != s.interpolate(method="spline", order=3)[5] + ) + + @td.skip_if_no_scipy + def test_spline_interpolation(self): + s = Series(np.arange(10) ** 2) + s[np.random.randint(0, 9, 3)] = np.nan + result1 = s.interpolate(method="spline", order=1) + expected1 = s.interpolate(method="spline", order=1) + tm.assert_series_equal(result1, expected1) + + def test_interp_timedelta64(self): + # GH 6424 + df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 3])) + result = df.interpolate(method="time") + expected = Series([1.0, 2.0, 3.0], index=pd.to_timedelta([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # test for non uniform spacing + df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 4])) + result = df.interpolate(method="time") + expected = Series([1.0, 1.666667, 3.0], index=pd.to_timedelta([1, 2, 4])) + tm.assert_series_equal(result, expected) + + def test_series_interpolate_method_values(self): + # GH#1646 + rng = date_range("1/1/2000", "1/20/2000", freq="D") + ts = Series(np.random.randn(len(rng)), index=rng) + + ts[::2] = np.nan + + result = ts.interpolate(method="values") + exp = ts.interpolate() + tm.assert_series_equal(result, exp) + + def test_series_interpolate_intraday(self): + # #1698 + index = date_range("1/1/2012", periods=4, freq="12D") + ts = Series([0, 12, 24, 36], index) + new_index = index.append(index + pd.DateOffset(days=1)).sort_values() + + exp = ts.reindex(new_index).interpolate(method="time") + + index = date_range("1/1/2012", periods=4, freq="12H") + ts = Series([0, 12, 24, 36], index) + new_index = index.append(index + pd.DateOffset(hours=1)).sort_values() + result = ts.reindex(new_index).interpolate(method="time") + + tm.assert_numpy_array_equal(result.values, exp.values) + + @pytest.mark.parametrize( + "ind", + [ + ["a", "b", "c", "d"], + pd.period_range(start="2019-01-01", periods=4), + pd.interval_range(start=0, end=4), + ], + ) + def test_interp_non_timedelta_index(self, interp_methods_ind, ind): + # gh 21662 + df = pd.DataFrame([0, 1, np.nan, 3], index=ind) + + method, kwargs = interp_methods_ind + if method == "pchip": + pytest.importorskip("scipy") + + if method == "linear": + result = df[0].interpolate(**kwargs) + expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) + tm.assert_series_equal(result, expected) + else: + expected_error = ( + "Index column must be numeric or datetime type when " + f"using {method} method other than linear. " + "Try setting a numeric or datetime index column before " + "interpolating." + ) + with pytest.raises(ValueError, match=expected_error): + df[0].interpolate(method=method, **kwargs) + + @td.skip_if_no_scipy + def test_interpolate_timedelta_index(self, request, interp_methods_ind): + """ + Tests for non numerical index types - object, period, timedelta + Note that all methods except time, index, nearest and values + are tested here. + """ + # gh 21662 + ind = pd.timedelta_range(start=1, periods=4) + df = pd.DataFrame([0, 1, np.nan, 3], index=ind) + + method, kwargs = interp_methods_ind + import scipy + + if method in {"cubic", "zero"} or ( + method == "barycentric" and Version(scipy.__version__) < Version("1.5.0") + ): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{method} interpolation is not supported for TimedeltaIndex" + ) + ) + result = df[0].interpolate(method=method, **kwargs) + expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "ascending, expected_values", + [(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])], + ) + def test_interpolate_unsorted_index(self, ascending, expected_values): + # GH 21037 + ts = Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1]) + result = ts.sort_index(ascending=ascending).interpolate(method="index") + expected = Series(data=expected_values, index=expected_values, dtype=float) + tm.assert_series_equal(result, expected) + + def test_interpolate_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + ser = Series([1, 2, 3]) + msg = ( + r"In a future version of pandas all arguments of Series.interpolate except " + r"for the argument 'method' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.interpolate("pad", 0) + expected = Series([1, 2, 3]) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_is_monotonic.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_is_monotonic.py new file mode 100644 index 0000000..f029393 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_is_monotonic.py @@ -0,0 +1,28 @@ +import numpy as np + +from pandas import ( + Series, + date_range, +) + + +class TestIsMonotonic: + def test_is_monotonic_numeric(self): + + ser = Series(np.random.randint(0, 10, size=1000)) + assert not ser.is_monotonic + ser = Series(np.arange(1000)) + assert ser.is_monotonic is True + assert ser.is_monotonic_increasing is True + ser = Series(np.arange(1000, 0, -1)) + assert ser.is_monotonic_decreasing is True + + def test_is_monotonic_dt64(self): + + ser = Series(date_range("20130101", periods=10)) + assert ser.is_monotonic is True + assert ser.is_monotonic_increasing is True + + ser = Series(list(reversed(ser))) + assert ser.is_monotonic is False + assert ser.is_monotonic_decreasing is True diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_is_unique.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_is_unique.py new file mode 100644 index 0000000..c696d36 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_is_unique.py @@ -0,0 +1,41 @@ +import numpy as np +import pytest + +from pandas import Series +from pandas.core.construction import create_series_with_explicit_dtype + + +@pytest.mark.parametrize( + "data, expected", + [ + (np.random.randint(0, 10, size=1000), False), + (np.arange(1000), True), + ([], True), + ([np.nan], True), + (["foo", "bar", np.nan], True), + (["foo", "foo", np.nan], False), + (["foo", "bar", np.nan, np.nan], False), + ], +) +def test_is_unique(data, expected): + # GH#11946 / GH#25180 + ser = create_series_with_explicit_dtype(data, dtype_if_empty=object) + assert ser.is_unique is expected + + +def test_is_unique_class_ne(capsys): + # GH#20661 + class Foo: + def __init__(self, val): + self._value = val + + def __ne__(self, other): + raise Exception("NEQ not supported") + + with capsys.disabled(): + li = [Foo(i) for i in range(5)] + ser = Series(li, index=list(range(5))) + + ser.is_unique + captured = capsys.readouterr() + assert len(captured.err) == 0 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_isin.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_isin.py new file mode 100644 index 0000000..f769c08 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_isin.py @@ -0,0 +1,203 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Series, + date_range, +) +import pandas._testing as tm +from pandas.core.arrays import PeriodArray + + +class TestSeriesIsIn: + def test_isin(self): + s = Series(["A", "B", "C", "a", "B", "B", "A", "C"]) + + result = s.isin(["A", "C"]) + expected = Series([True, False, True, False, False, False, True, True]) + tm.assert_series_equal(result, expected) + + # GH#16012 + # This specific issue has to have a series over 1e6 in len, but the + # comparison array (in_list) must be large enough so that numpy doesn't + # do a manual masking trick that will avoid this issue altogether + s = Series(list("abcdefghijk" * 10 ** 5)) + # If numpy doesn't do the manual comparison/mask, these + # unorderable mixed types are what cause the exception in numpy + in_list = [-1, "a", "b", "G", "Y", "Z", "E", "K", "E", "S", "I", "R", "R"] * 6 + + assert s.isin(in_list).sum() == 200000 + + def test_isin_with_string_scalar(self): + # GH#4763 + s = Series(["A", "B", "C", "a", "B", "B", "A", "C"]) + msg = ( + r"only list-like objects are allowed to be passed to isin\(\), " + r"you passed a \[str\]" + ) + with pytest.raises(TypeError, match=msg): + s.isin("a") + + s = Series(["aaa", "b", "c"]) + with pytest.raises(TypeError, match=msg): + s.isin("aaa") + + def test_isin_with_i8(self): + # GH#5021 + + expected = Series([True, True, False, False, False]) + expected2 = Series([False, True, False, False, False]) + + # datetime64[ns] + s = Series(date_range("jan-01-2013", "jan-05-2013")) + + result = s.isin(s[0:2]) + tm.assert_series_equal(result, expected) + + result = s.isin(s[0:2].values) + tm.assert_series_equal(result, expected) + + # fails on dtype conversion in the first place + result = s.isin(np.asarray(s[0:2].values).astype("datetime64[D]")) + tm.assert_series_equal(result, expected) + + result = s.isin([s[1]]) + tm.assert_series_equal(result, expected2) + + result = s.isin([np.datetime64(s[1])]) + tm.assert_series_equal(result, expected2) + + result = s.isin(set(s[0:2])) + tm.assert_series_equal(result, expected) + + # timedelta64[ns] + s = Series(pd.to_timedelta(range(5), unit="d")) + result = s.isin(s[0:2]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) + def test_isin_empty(self, empty): + # see GH#16991 + s = Series(["a", "b"]) + expected = Series([False, False]) + + result = s.isin(empty) + tm.assert_series_equal(expected, result) + + def test_isin_read_only(self): + # https://github.com/pandas-dev/pandas/issues/37174 + arr = np.array([1, 2, 3]) + arr.setflags(write=False) + s = Series([1, 2, 3]) + result = s.isin(arr) + expected = Series([True, True, True]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [object, None]) + def test_isin_dt64_values_vs_ints(self, dtype): + # GH#36621 dont cast integers to datetimes for isin + dti = date_range("2013-01-01", "2013-01-05") + ser = Series(dti) + + comps = np.asarray([1356998400000000000], dtype=dtype) + + res = dti.isin(comps) + expected = np.array([False] * len(dti), dtype=bool) + tm.assert_numpy_array_equal(res, expected) + + res = ser.isin(comps) + tm.assert_series_equal(res, Series(expected)) + + res = pd.core.algorithms.isin(ser, comps) + tm.assert_numpy_array_equal(res, expected) + + def test_isin_tzawareness_mismatch(self): + dti = date_range("2013-01-01", "2013-01-05") + ser = Series(dti) + + other = dti.tz_localize("UTC") + + res = dti.isin(other) + expected = np.array([False] * len(dti), dtype=bool) + tm.assert_numpy_array_equal(res, expected) + + res = ser.isin(other) + tm.assert_series_equal(res, Series(expected)) + + res = pd.core.algorithms.isin(ser, other) + tm.assert_numpy_array_equal(res, expected) + + def test_isin_period_freq_mismatch(self): + dti = date_range("2013-01-01", "2013-01-05") + pi = dti.to_period("M") + ser = Series(pi) + + # We construct another PeriodIndex with the same i8 values + # but different dtype + dtype = dti.to_period("Y").dtype + other = PeriodArray._simple_new(pi.asi8, dtype=dtype) + + res = pi.isin(other) + expected = np.array([False] * len(pi), dtype=bool) + tm.assert_numpy_array_equal(res, expected) + + res = ser.isin(other) + tm.assert_series_equal(res, Series(expected)) + + res = pd.core.algorithms.isin(ser, other) + tm.assert_numpy_array_equal(res, expected) + + @pytest.mark.parametrize("values", [[-9.0, 0.0], [-9, 0]]) + def test_isin_float_in_int_series(self, values): + # GH#19356 GH#21804 + ser = Series(values) + result = ser.isin([-9, -0.5]) + expected = Series([True, False]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["boolean", "Int64", "Float64"]) + @pytest.mark.parametrize( + "data,values,expected", + [ + ([0, 1, 0], [1], [False, True, False]), + ([0, 1, 0], [1, pd.NA], [False, True, False]), + ([0, pd.NA, 0], [1, 0], [True, False, True]), + ([0, 1, pd.NA], [1, pd.NA], [False, True, True]), + ([0, 1, pd.NA], [1, np.nan], [False, True, False]), + ([0, pd.NA, pd.NA], [np.nan, pd.NaT, None], [False, False, False]), + ], + ) + def test_isin_masked_types(self, dtype, data, values, expected): + # GH#42405 + ser = Series(data, dtype=dtype) + + result = ser.isin(values) + expected = Series(expected, dtype="boolean") + + tm.assert_series_equal(result, expected) + + +@pytest.mark.slow +def test_isin_large_series_mixed_dtypes_and_nan(): + # https://github.com/pandas-dev/pandas/issues/37094 + # combination of object dtype for the values and > 1_000_000 elements + ser = Series([1, 2, np.nan] * 1_000_000) + result = ser.isin({"foo", "bar"}) + expected = Series([False] * 3 * 1_000_000) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "array,expected", + [ + ( + [0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j], + Series([False, True, True, False, True, True, True], dtype=bool), + ) + ], +) +def test_isin_complex_numbers(array, expected): + # GH 17927 + result = Series(array).isin([1j, 1 + 1j, 1 + 2j]) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_isna.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_isna.py new file mode 100644 index 0000000..7e324aa --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_isna.py @@ -0,0 +1,35 @@ +""" +We also test Series.notna in this file. +""" +import numpy as np + +from pandas import ( + Period, + Series, +) +import pandas._testing as tm + + +class TestIsna: + def test_isna_period_dtype(self): + # GH#13737 + ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")]) + + expected = Series([False, True]) + + result = ser.isna() + tm.assert_series_equal(result, expected) + + result = ser.notna() + tm.assert_series_equal(result, ~expected) + + def test_isna(self): + ser = Series([0, 5.4, 3, np.nan, -0.001]) + expected = Series([False, False, False, True, False]) + tm.assert_series_equal(ser.isna(), expected) + tm.assert_series_equal(ser.notna(), ~expected) + + ser = Series(["hi", "", np.nan]) + expected = Series([False, False, True]) + tm.assert_series_equal(ser.isna(), expected) + tm.assert_series_equal(ser.notna(), ~expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_item.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_item.py new file mode 100644 index 0000000..2bdeb4d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_item.py @@ -0,0 +1,59 @@ +""" +Series.item method, mainly testing that we get python scalars as opposed to +numpy scalars. +""" +import pytest + +from pandas import ( + Series, + Timedelta, + Timestamp, + date_range, +) + + +class TestItem: + def test_item(self): + # We are testing that we get python scalars as opposed to numpy scalars + ser = Series([1]) + result = ser.item() + assert result == 1 + assert result == ser.iloc[0] + assert isinstance(result, int) # i.e. not np.int64 + + ser = Series([0.5], index=[3]) + result = ser.item() + assert isinstance(result, float) + assert result == 0.5 + + ser = Series([1, 2]) + msg = "can only convert an array of size 1" + with pytest.raises(ValueError, match=msg): + ser.item() + + dti = date_range("2016-01-01", periods=2) + with pytest.raises(ValueError, match=msg): + dti.item() + with pytest.raises(ValueError, match=msg): + Series(dti).item() + + val = dti[:1].item() + assert isinstance(val, Timestamp) + val = Series(dti)[:1].item() + assert isinstance(val, Timestamp) + + tdi = dti - dti + with pytest.raises(ValueError, match=msg): + tdi.item() + with pytest.raises(ValueError, match=msg): + Series(tdi).item() + + val = tdi[:1].item() + assert isinstance(val, Timedelta) + val = Series(tdi)[:1].item() + assert isinstance(val, Timedelta) + + # Case where ser[0] would not work + ser = Series(dti, index=[5, 6]) + val = ser[:1].item() + assert val == dti[0] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_matmul.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_matmul.py new file mode 100644 index 0000000..b944395 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_matmul.py @@ -0,0 +1,78 @@ +import operator + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestMatmul: + def test_matmul(self): + # matmul test is for GH#10259 + a = Series(np.random.randn(4), index=["p", "q", "r", "s"]) + b = DataFrame( + np.random.randn(3, 4), index=["1", "2", "3"], columns=["p", "q", "r", "s"] + ).T + + # Series @ DataFrame -> Series + result = operator.matmul(a, b) + expected = Series(np.dot(a.values, b.values), index=["1", "2", "3"]) + tm.assert_series_equal(result, expected) + + # DataFrame @ Series -> Series + result = operator.matmul(b.T, a) + expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"]) + tm.assert_series_equal(result, expected) + + # Series @ Series -> scalar + result = operator.matmul(a, a) + expected = np.dot(a.values, a.values) + tm.assert_almost_equal(result, expected) + + # GH#21530 + # vector (1D np.array) @ Series (__rmatmul__) + result = operator.matmul(a.values, a) + expected = np.dot(a.values, a.values) + tm.assert_almost_equal(result, expected) + + # GH#21530 + # vector (1D list) @ Series (__rmatmul__) + result = operator.matmul(a.values.tolist(), a) + expected = np.dot(a.values, a.values) + tm.assert_almost_equal(result, expected) + + # GH#21530 + # matrix (2D np.array) @ Series (__rmatmul__) + result = operator.matmul(b.T.values, a) + expected = np.dot(b.T.values, a.values) + tm.assert_almost_equal(result, expected) + + # GH#21530 + # matrix (2D nested lists) @ Series (__rmatmul__) + result = operator.matmul(b.T.values.tolist(), a) + expected = np.dot(b.T.values, a.values) + tm.assert_almost_equal(result, expected) + + # mixed dtype DataFrame @ Series + a["p"] = int(a.p) + result = operator.matmul(b.T, a) + expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"]) + tm.assert_series_equal(result, expected) + + # different dtypes DataFrame @ Series + a = a.astype(int) + result = operator.matmul(b.T, a) + expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"]) + tm.assert_series_equal(result, expected) + + msg = r"Dot product shape mismatch, \(4,\) vs \(3,\)" + # exception raised is of type Exception + with pytest.raises(Exception, match=msg): + a.dot(a.values[:3]) + msg = "matrices are not aligned" + with pytest.raises(ValueError, match=msg): + a.dot(b.T) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_nlargest.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_nlargest.py new file mode 100644 index 0000000..ee96ab0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_nlargest.py @@ -0,0 +1,233 @@ +""" +Note: for naming purposes, most tests are title with as e.g. "test_nlargest_foo" +but are implicitly also testing nsmallest_foo. +""" +from itertools import product + +import numpy as np +import pytest + +import pandas as pd +from pandas import Series +import pandas._testing as tm + +main_dtypes = [ + "datetime", + "datetimetz", + "timedelta", + "int8", + "int16", + "int32", + "int64", + "float32", + "float64", + "uint8", + "uint16", + "uint32", + "uint64", +] + + +@pytest.fixture +def s_main_dtypes(): + """ + A DataFrame with many dtypes + + * datetime + * datetimetz + * timedelta + * [u]int{8,16,32,64} + * float{32,64} + + The columns are the name of the dtype. + """ + df = pd.DataFrame( + { + "datetime": pd.to_datetime(["2003", "2002", "2001", "2002", "2005"]), + "datetimetz": pd.to_datetime( + ["2003", "2002", "2001", "2002", "2005"] + ).tz_localize("US/Eastern"), + "timedelta": pd.to_timedelta(["3d", "2d", "1d", "2d", "5d"]), + } + ) + + for dtype in [ + "int8", + "int16", + "int32", + "int64", + "float32", + "float64", + "uint8", + "uint16", + "uint32", + "uint64", + ]: + df[dtype] = Series([3, 2, 1, 2, 5], dtype=dtype) + + return df + + +@pytest.fixture(params=main_dtypes) +def s_main_dtypes_split(request, s_main_dtypes): + """Each series in s_main_dtypes.""" + return s_main_dtypes[request.param] + + +def assert_check_nselect_boundary(vals, dtype, method): + # helper function for 'test_boundary_{dtype}' tests + ser = Series(vals, dtype=dtype) + result = getattr(ser, method)(3) + expected_idxr = [0, 1, 2] if method == "nsmallest" else [3, 2, 1] + expected = ser.loc[expected_idxr] + tm.assert_series_equal(result, expected) + + +class TestSeriesNLargestNSmallest: + @pytest.mark.parametrize( + "r", + [ + Series([3.0, 2, 1, 2, "5"], dtype="object"), + Series([3.0, 2, 1, 2, 5], dtype="object"), + # not supported on some archs + # Series([3., 2, 1, 2, 5], dtype='complex256'), + Series([3.0, 2, 1, 2, 5], dtype="complex128"), + Series(list("abcde")), + Series(list("abcde"), dtype="category"), + ], + ) + def test_nlargest_error(self, r): + dt = r.dtype + msg = f"Cannot use method 'n(largest|smallest)' with dtype {dt}" + args = 2, len(r), 0, -1 + methods = r.nlargest, r.nsmallest + for method, arg in product(methods, args): + with pytest.raises(TypeError, match=msg): + method(arg) + + def test_nsmallest_nlargest(self, s_main_dtypes_split): + # float, int, datetime64 (use i8), timedelts64 (same), + # object that are numbers, object that are strings + ser = s_main_dtypes_split + + tm.assert_series_equal(ser.nsmallest(2), ser.iloc[[2, 1]]) + tm.assert_series_equal(ser.nsmallest(2, keep="last"), ser.iloc[[2, 3]]) + + empty = ser.iloc[0:0] + tm.assert_series_equal(ser.nsmallest(0), empty) + tm.assert_series_equal(ser.nsmallest(-1), empty) + tm.assert_series_equal(ser.nlargest(0), empty) + tm.assert_series_equal(ser.nlargest(-1), empty) + + tm.assert_series_equal(ser.nsmallest(len(ser)), ser.sort_values()) + tm.assert_series_equal(ser.nsmallest(len(ser) + 1), ser.sort_values()) + tm.assert_series_equal(ser.nlargest(len(ser)), ser.iloc[[4, 0, 1, 3, 2]]) + tm.assert_series_equal(ser.nlargest(len(ser) + 1), ser.iloc[[4, 0, 1, 3, 2]]) + + def test_nlargest_misc(self): + + ser = Series([3.0, np.nan, 1, 2, 5]) + result = ser.nlargest() + expected = ser.iloc[[4, 0, 3, 2, 1]] + tm.assert_series_equal(result, expected) + result = ser.nsmallest() + expected = ser.iloc[[2, 3, 0, 4, 1]] + tm.assert_series_equal(result, expected) + + msg = 'keep must be either "first", "last"' + with pytest.raises(ValueError, match=msg): + ser.nsmallest(keep="invalid") + with pytest.raises(ValueError, match=msg): + ser.nlargest(keep="invalid") + + # GH#15297 + ser = Series([1] * 5, index=[1, 2, 3, 4, 5]) + expected_first = Series([1] * 3, index=[1, 2, 3]) + expected_last = Series([1] * 3, index=[5, 4, 3]) + + result = ser.nsmallest(3) + tm.assert_series_equal(result, expected_first) + + result = ser.nsmallest(3, keep="last") + tm.assert_series_equal(result, expected_last) + + result = ser.nlargest(3) + tm.assert_series_equal(result, expected_first) + + result = ser.nlargest(3, keep="last") + tm.assert_series_equal(result, expected_last) + + @pytest.mark.parametrize("n", range(1, 5)) + def test_nlargest_n(self, n): + + # GH 13412 + ser = Series([1, 4, 3, 2], index=[0, 0, 1, 1]) + result = ser.nlargest(n) + expected = ser.sort_values(ascending=False).head(n) + tm.assert_series_equal(result, expected) + + result = ser.nsmallest(n) + expected = ser.sort_values().head(n) + tm.assert_series_equal(result, expected) + + def test_nlargest_boundary_integer(self, nselect_method, any_int_numpy_dtype): + # GH#21426 + dtype_info = np.iinfo(any_int_numpy_dtype) + min_val, max_val = dtype_info.min, dtype_info.max + vals = [min_val, min_val + 1, max_val - 1, max_val] + assert_check_nselect_boundary(vals, any_int_numpy_dtype, nselect_method) + + def test_nlargest_boundary_float(self, nselect_method, float_numpy_dtype): + # GH#21426 + dtype_info = np.finfo(float_numpy_dtype) + min_val, max_val = dtype_info.min, dtype_info.max + min_2nd, max_2nd = np.nextafter([min_val, max_val], 0, dtype=float_numpy_dtype) + vals = [min_val, min_2nd, max_2nd, max_val] + assert_check_nselect_boundary(vals, float_numpy_dtype, nselect_method) + + @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) + def test_nlargest_boundary_datetimelike(self, nselect_method, dtype): + # GH#21426 + # use int64 bounds and +1 to min_val since true minimum is NaT + # (include min_val/NaT at end to maintain same expected_idxr) + dtype_info = np.iinfo("int64") + min_val, max_val = dtype_info.min, dtype_info.max + vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val] + assert_check_nselect_boundary(vals, dtype, nselect_method) + + def test_nlargest_duplicate_keep_all_ties(self): + # see GH#16818 + ser = Series([10, 9, 8, 7, 7, 7, 7, 6]) + result = ser.nlargest(4, keep="all") + expected = Series([10, 9, 8, 7, 7, 7, 7]) + tm.assert_series_equal(result, expected) + + result = ser.nsmallest(2, keep="all") + expected = Series([6, 7, 7, 7, 7], index=[7, 3, 4, 5, 6]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data,expected", [([True, False], [True]), ([True, False, True, True], [True])] + ) + def test_nlargest_boolean(self, data, expected): + # GH#26154 : ensure True > False + ser = Series(data) + result = ser.nlargest(1) + expected = Series(expected) + tm.assert_series_equal(result, expected) + + def test_nlargest_nullable(self, any_numeric_ea_dtype): + # GH#42816 + dtype = any_numeric_ea_dtype + arr = np.random.randn(10).astype(dtype.lower(), copy=False) + + ser = Series(arr.copy(), dtype=dtype) + ser[1] = pd.NA + result = ser.nlargest(5) + + expected = ( + Series(np.delete(arr, 1), index=ser.index.delete(1)) + .nlargest(5) + .astype(dtype) + ) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_nunique.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_nunique.py new file mode 100644 index 0000000..50d3b93 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_nunique.py @@ -0,0 +1,24 @@ +import numpy as np + +from pandas import ( + Categorical, + Series, +) + + +def test_nunique(): + # basics.rst doc example + series = Series(np.random.randn(500)) + series[20:500] = np.nan + series[10:20] = 5000 + result = series.nunique() + assert result == 11 + + +def test_nunique_categorical(): + # GH#18051 + ser = Series(Categorical([])) + assert ser.nunique() == 0 + + ser = Series(Categorical([np.nan])) + assert ser.nunique() == 0 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_pct_change.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_pct_change.py new file mode 100644 index 0000000..017fef5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_pct_change.py @@ -0,0 +1,82 @@ +import numpy as np +import pytest + +from pandas import ( + Series, + date_range, +) +import pandas._testing as tm + + +class TestSeriesPctChange: + def test_pct_change(self, datetime_series): + rs = datetime_series.pct_change(fill_method=None) + tm.assert_series_equal(rs, datetime_series / datetime_series.shift(1) - 1) + + rs = datetime_series.pct_change(2) + filled = datetime_series.fillna(method="pad") + tm.assert_series_equal(rs, filled / filled.shift(2) - 1) + + rs = datetime_series.pct_change(fill_method="bfill", limit=1) + filled = datetime_series.fillna(method="bfill", limit=1) + tm.assert_series_equal(rs, filled / filled.shift(1) - 1) + + rs = datetime_series.pct_change(freq="5D") + filled = datetime_series.fillna(method="pad") + tm.assert_series_equal( + rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) + ) + + def test_pct_change_with_duplicate_axis(self): + # GH#28664 + common_idx = date_range("2019-11-14", periods=5, freq="D") + result = Series(range(5), common_idx).pct_change(freq="B") + + # the reason that the expected should be like this is documented at PR 28681 + expected = Series([np.NaN, np.inf, np.NaN, np.NaN, 3.0], common_idx) + + tm.assert_series_equal(result, expected) + + def test_pct_change_shift_over_nas(self): + s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) + + chg = s.pct_change() + expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) + tm.assert_series_equal(chg, expected) + + @pytest.mark.parametrize( + "freq, periods, fill_method, limit", + [ + ("5B", 5, None, None), + ("3B", 3, None, None), + ("3B", 3, "bfill", None), + ("7B", 7, "pad", 1), + ("7B", 7, "bfill", 3), + ("14B", 14, None, None), + ], + ) + def test_pct_change_periods_freq( + self, freq, periods, fill_method, limit, datetime_series + ): + # GH#7292 + rs_freq = datetime_series.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + rs_periods = datetime_series.pct_change( + periods, fill_method=fill_method, limit=limit + ) + tm.assert_series_equal(rs_freq, rs_periods) + + empty_ts = Series(index=datetime_series.index, dtype=object) + rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) + rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) + tm.assert_series_equal(rs_freq, rs_periods) + + +@pytest.mark.parametrize("fill_method", ["pad", "ffill", None]) +def test_pct_change_with_duplicated_indices(fill_method): + # GH30463 + s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3) + result = s.pct_change(fill_method=fill_method) + expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_pop.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_pop.py new file mode 100644 index 0000000..7453f98 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_pop.py @@ -0,0 +1,13 @@ +from pandas import Series +import pandas._testing as tm + + +def test_pop(): + # GH#6600 + ser = Series([0, 4, 0], index=["A", "B", "C"], name=4) + + result = ser.pop("B") + assert result == 4 + + expected = Series([0, 0], index=["A", "C"], name=4) + tm.assert_series_equal(ser, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_quantile.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_quantile.py new file mode 100644 index 0000000..84bfe85 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_quantile.py @@ -0,0 +1,225 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_integer + +import pandas as pd +from pandas import ( + Index, + Series, +) +import pandas._testing as tm +from pandas.core.indexes.datetimes import Timestamp + + +class TestSeriesQuantile: + def test_quantile(self, datetime_series): + + q = datetime_series.quantile(0.1) + assert q == np.percentile(datetime_series.dropna(), 10) + + q = datetime_series.quantile(0.9) + assert q == np.percentile(datetime_series.dropna(), 90) + + # object dtype + q = Series(datetime_series, dtype=object).quantile(0.9) + assert q == np.percentile(datetime_series.dropna(), 90) + + # datetime64[ns] dtype + dts = datetime_series.index.to_series() + q = dts.quantile(0.2) + assert q == Timestamp("2000-01-10 19:12:00") + + # timedelta64[ns] dtype + tds = dts.diff() + q = tds.quantile(0.25) + assert q == pd.to_timedelta("24:00:00") + + # GH7661 + result = Series([np.timedelta64("NaT")]).sum() + assert result == pd.Timedelta(0) + + msg = "percentiles should all be in the interval \\[0, 1\\]" + for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: + with pytest.raises(ValueError, match=msg): + datetime_series.quantile(invalid) + + def test_quantile_multi(self, datetime_series): + + qs = [0.1, 0.9] + result = datetime_series.quantile(qs) + expected = Series( + [ + np.percentile(datetime_series.dropna(), 10), + np.percentile(datetime_series.dropna(), 90), + ], + index=qs, + name=datetime_series.name, + ) + tm.assert_series_equal(result, expected) + + dts = datetime_series.index.to_series() + dts.name = "xxx" + result = dts.quantile((0.2, 0.2)) + expected = Series( + [Timestamp("2000-01-10 19:12:00"), Timestamp("2000-01-10 19:12:00")], + index=[0.2, 0.2], + name="xxx", + ) + tm.assert_series_equal(result, expected) + + result = datetime_series.quantile([]) + expected = Series( + [], name=datetime_series.name, index=Index([], dtype=float), dtype="float64" + ) + tm.assert_series_equal(result, expected) + + def test_quantile_interpolation(self, datetime_series): + # see gh-10174 + + # interpolation = linear (default case) + q = datetime_series.quantile(0.1, interpolation="linear") + assert q == np.percentile(datetime_series.dropna(), 10) + q1 = datetime_series.quantile(0.1) + assert q1 == np.percentile(datetime_series.dropna(), 10) + + # test with and without interpolation keyword + assert q == q1 + + def test_quantile_interpolation_dtype(self): + # GH #10174 + + # interpolation = linear (default case) + q = Series([1, 3, 4]).quantile(0.5, interpolation="lower") + assert q == np.percentile(np.array([1, 3, 4]), 50) + assert is_integer(q) + + q = Series([1, 3, 4]).quantile(0.5, interpolation="higher") + assert q == np.percentile(np.array([1, 3, 4]), 50) + assert is_integer(q) + + def test_quantile_nan(self): + + # GH 13098 + s = Series([1, 2, 3, 4, np.nan]) + result = s.quantile(0.5) + expected = 2.5 + assert result == expected + + # all nan/empty + s1 = Series([], dtype=object) + cases = [s1, Series([np.nan, np.nan])] + + for s in cases: + res = s.quantile(0.5) + assert np.isnan(res) + + res = s.quantile([0.5]) + tm.assert_series_equal(res, Series([np.nan], index=[0.5])) + + res = s.quantile([0.2, 0.3]) + tm.assert_series_equal(res, Series([np.nan, np.nan], index=[0.2, 0.3])) + + @pytest.mark.parametrize( + "case", + [ + [ + Timestamp("2011-01-01"), + Timestamp("2011-01-02"), + Timestamp("2011-01-03"), + ], + [ + Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + Timestamp("2011-01-03", tz="US/Eastern"), + ], + [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")], + # NaT + [ + Timestamp("2011-01-01"), + Timestamp("2011-01-02"), + Timestamp("2011-01-03"), + pd.NaT, + ], + [ + Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + Timestamp("2011-01-03", tz="US/Eastern"), + pd.NaT, + ], + [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + pd.NaT, + ], + ], + ) + def test_quantile_box(self, case): + s = Series(case, name="XXX") + res = s.quantile(0.5) + assert res == case[1] + + res = s.quantile([0.5]) + exp = Series([case[1]], index=[0.5], name="XXX") + tm.assert_series_equal(res, exp) + + def test_datetime_timedelta_quantiles(self): + # covers #9694 + assert pd.isna(Series([], dtype="M8[ns]").quantile(0.5)) + assert pd.isna(Series([], dtype="m8[ns]").quantile(0.5)) + + def test_quantile_nat(self): + res = Series([pd.NaT, pd.NaT]).quantile(0.5) + assert res is pd.NaT + + res = Series([pd.NaT, pd.NaT]).quantile([0.5]) + tm.assert_series_equal(res, Series([pd.NaT], index=[0.5])) + + @pytest.mark.parametrize( + "values, dtype", + [([0, 0, 0, 1, 2, 3], "Sparse[int]"), ([0.0, None, 1.0, 2.0], "Sparse[float]")], + ) + def test_quantile_sparse(self, values, dtype): + ser = Series(values, dtype=dtype) + result = ser.quantile([0.5]) + expected = Series(np.asarray(ser)).quantile([0.5]) + tm.assert_series_equal(result, expected) + + def test_quantile_empty(self): + + # floats + s = Series([], dtype="float64") + + res = s.quantile(0.5) + assert np.isnan(res) + + res = s.quantile([0.5]) + exp = Series([np.nan], index=[0.5]) + tm.assert_series_equal(res, exp) + + # int + s = Series([], dtype="int64") + + res = s.quantile(0.5) + assert np.isnan(res) + + res = s.quantile([0.5]) + exp = Series([np.nan], index=[0.5]) + tm.assert_series_equal(res, exp) + + # datetime + s = Series([], dtype="datetime64[ns]") + + res = s.quantile(0.5) + assert res is pd.NaT + + res = s.quantile([0.5]) + exp = Series([pd.NaT], index=[0.5]) + tm.assert_series_equal(res, exp) + + @pytest.mark.parametrize("dtype", [int, float, "Int64"]) + def test_quantile_dtypes(self, dtype): + result = Series([1, 2, 3], dtype=dtype).quantile(np.arange(0, 1, 0.25)) + expected = Series(np.arange(1, 3, 0.5), index=np.arange(0, 1, 0.25)) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_rank.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_rank.py new file mode 100644 index 0000000..d85b84b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_rank.py @@ -0,0 +1,484 @@ +from itertools import chain +import operator + +import numpy as np +import pytest + +from pandas._libs.algos import ( + Infinity, + NegInfinity, +) +import pandas.util._test_decorators as td + +from pandas import ( + NaT, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype + + +@pytest.fixture +def ser(): + return Series([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3]) + + +@pytest.fixture( + params=[ + ["average", np.array([1.5, 5.5, 7.0, 3.5, np.nan, 3.5, 1.5, 8.0, np.nan, 5.5])], + ["min", np.array([1, 5, 7, 3, np.nan, 3, 1, 8, np.nan, 5])], + ["max", np.array([2, 6, 7, 4, np.nan, 4, 2, 8, np.nan, 6])], + ["first", np.array([1, 5, 7, 3, np.nan, 4, 2, 8, np.nan, 6])], + ["dense", np.array([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3])], + ] +) +def results(request): + return request.param + + +class TestSeriesRank: + @td.skip_if_no_scipy + def test_rank(self, datetime_series): + from scipy.stats import rankdata + + datetime_series[::2] = np.nan + datetime_series[:10][::3] = 4.0 + + ranks = datetime_series.rank() + oranks = datetime_series.astype("O").rank() + + tm.assert_series_equal(ranks, oranks) + + mask = np.isnan(datetime_series) + filled = datetime_series.fillna(np.inf) + + # rankdata returns a ndarray + exp = Series(rankdata(filled), index=filled.index, name="ts") + exp[mask] = np.nan + + tm.assert_series_equal(ranks, exp) + + iseries = Series(np.arange(5).repeat(2)) + + iranks = iseries.rank() + exp = iseries.astype(float).rank() + tm.assert_series_equal(iranks, exp) + iseries = Series(np.arange(5)) + 1.0 + exp = iseries / 5.0 + iranks = iseries.rank(pct=True) + + tm.assert_series_equal(iranks, exp) + + iseries = Series(np.repeat(1, 100)) + exp = Series(np.repeat(0.505, 100)) + iranks = iseries.rank(pct=True) + tm.assert_series_equal(iranks, exp) + + iseries[1] = np.nan + exp = Series(np.repeat(50.0 / 99.0, 100)) + exp[1] = np.nan + iranks = iseries.rank(pct=True) + tm.assert_series_equal(iranks, exp) + + iseries = Series(np.arange(5)) + 1.0 + iseries[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + tm.assert_series_equal(iranks, exp) + + iseries = Series(np.repeat(np.nan, 100)) + exp = iseries.copy() + iranks = iseries.rank(pct=True) + tm.assert_series_equal(iranks, exp) + + iseries = Series(np.arange(5)) + 1 + iseries[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + tm.assert_series_equal(iranks, exp) + + rng = date_range("1/1/1990", periods=5) + iseries = Series(np.arange(5), rng) + 1 + iseries.iloc[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + tm.assert_series_equal(iranks, exp) + + iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1]) + exp = Series([2, 1, 3, 5, 4, 6.0]) + iranks = iseries.rank() + tm.assert_series_equal(iranks, exp) + + # GH 5968 + iseries = Series(["3 day", "1 day 10m", "-2 day", NaT], dtype="m8[ns]") + exp = Series([3, 2, 1, np.nan]) + iranks = iseries.rank() + tm.assert_series_equal(iranks, exp) + + values = np.array( + [-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40], + dtype="float64", + ) + random_order = np.random.permutation(len(values)) + iseries = Series(values[random_order]) + exp = Series(random_order + 1.0, dtype="float64") + iranks = iseries.rank() + tm.assert_series_equal(iranks, exp) + + def test_rank_categorical(self): + # GH issue #15420 rank incorrectly orders ordered categories + + # Test ascending/descending ranking for ordered categoricals + exp = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) + exp_desc = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0]) + ordered = Series( + ["first", "second", "third", "fourth", "fifth", "sixth"] + ).astype( + CategoricalDtype( + categories=["first", "second", "third", "fourth", "fifth", "sixth"], + ordered=True, + ) + ) + tm.assert_series_equal(ordered.rank(), exp) + tm.assert_series_equal(ordered.rank(ascending=False), exp_desc) + + # Unordered categoricals should be ranked as objects + unordered = Series( + ["first", "second", "third", "fourth", "fifth", "sixth"] + ).astype( + CategoricalDtype( + categories=["first", "second", "third", "fourth", "fifth", "sixth"], + ordered=False, + ) + ) + exp_unordered = Series([2.0, 4.0, 6.0, 3.0, 1.0, 5.0]) + res = unordered.rank() + tm.assert_series_equal(res, exp_unordered) + + unordered1 = Series([1, 2, 3, 4, 5, 6]).astype( + CategoricalDtype([1, 2, 3, 4, 5, 6], False) + ) + exp_unordered1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) + res1 = unordered1.rank() + tm.assert_series_equal(res1, exp_unordered1) + + # Test na_option for rank data + na_ser = Series( + ["first", "second", "third", "fourth", "fifth", "sixth", np.NaN] + ).astype( + CategoricalDtype( + ["first", "second", "third", "fourth", "fifth", "sixth", "seventh"], + True, + ) + ) + + exp_top = Series([2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 1.0]) + exp_bot = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) + exp_keep = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.NaN]) + + tm.assert_series_equal(na_ser.rank(na_option="top"), exp_top) + tm.assert_series_equal(na_ser.rank(na_option="bottom"), exp_bot) + tm.assert_series_equal(na_ser.rank(na_option="keep"), exp_keep) + + # Test na_option for rank data with ascending False + exp_top = Series([7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0]) + exp_bot = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 7.0]) + exp_keep = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, np.NaN]) + + tm.assert_series_equal(na_ser.rank(na_option="top", ascending=False), exp_top) + tm.assert_series_equal( + na_ser.rank(na_option="bottom", ascending=False), exp_bot + ) + tm.assert_series_equal(na_ser.rank(na_option="keep", ascending=False), exp_keep) + + # Test invalid values for na_option + msg = "na_option must be one of 'keep', 'top', or 'bottom'" + + with pytest.raises(ValueError, match=msg): + na_ser.rank(na_option="bad", ascending=False) + + # invalid type + with pytest.raises(ValueError, match=msg): + na_ser.rank(na_option=True, ascending=False) + + # Test with pct=True + na_ser = Series(["first", "second", "third", "fourth", np.NaN]).astype( + CategoricalDtype(["first", "second", "third", "fourth"], True) + ) + exp_top = Series([0.4, 0.6, 0.8, 1.0, 0.2]) + exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.0]) + exp_keep = Series([0.25, 0.5, 0.75, 1.0, np.NaN]) + + tm.assert_series_equal(na_ser.rank(na_option="top", pct=True), exp_top) + tm.assert_series_equal(na_ser.rank(na_option="bottom", pct=True), exp_bot) + tm.assert_series_equal(na_ser.rank(na_option="keep", pct=True), exp_keep) + + def test_rank_signature(self): + s = Series([0, 1]) + s.rank(method="average") + msg = "No axis named average for object type Series" + with pytest.raises(ValueError, match=msg): + s.rank("average") + + @pytest.mark.parametrize("dtype", [None, object]) + def test_rank_tie_methods(self, ser, results, dtype): + method, exp = results + ser = ser if dtype is None else ser.astype(dtype) + result = ser.rank(method=method) + tm.assert_series_equal(result, Series(exp)) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("ascending", [True, False]) + @pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"]) + @pytest.mark.parametrize("na_option", ["top", "bottom", "keep"]) + @pytest.mark.parametrize( + "dtype, na_value, pos_inf, neg_inf", + [ + ("object", None, Infinity(), NegInfinity()), + ("float64", np.nan, np.inf, -np.inf), + ], + ) + def test_rank_tie_methods_on_infs_nans( + self, method, na_option, ascending, dtype, na_value, pos_inf, neg_inf + ): + chunk = 3 + + in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk + iseries = Series(in_arr, dtype=dtype) + exp_ranks = { + "average": ([2, 2, 2], [5, 5, 5], [8, 8, 8]), + "min": ([1, 1, 1], [4, 4, 4], [7, 7, 7]), + "max": ([3, 3, 3], [6, 6, 6], [9, 9, 9]), + "first": ([1, 2, 3], [4, 5, 6], [7, 8, 9]), + "dense": ([1, 1, 1], [2, 2, 2], [3, 3, 3]), + } + ranks = exp_ranks[method] + if na_option == "top": + order = [ranks[1], ranks[0], ranks[2]] + elif na_option == "bottom": + order = [ranks[0], ranks[2], ranks[1]] + else: + order = [ranks[0], [np.nan] * chunk, ranks[1]] + expected = order if ascending else order[::-1] + expected = list(chain.from_iterable(expected)) + result = iseries.rank(method=method, na_option=na_option, ascending=ascending) + tm.assert_series_equal(result, Series(expected, dtype="float64")) + + def test_rank_desc_mix_nans_infs(self): + # GH 19538 + # check descending ranking when mix nans and infs + iseries = Series([1, np.nan, np.inf, -np.inf, 25]) + result = iseries.rank(ascending=False) + exp = Series([3, np.nan, 1, 4, 2], dtype="float64") + tm.assert_series_equal(result, exp) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"]) + @pytest.mark.parametrize( + "op, value", + [ + [operator.add, 0], + [operator.add, 1e6], + [operator.mul, 1e-6], + ], + ) + def test_rank_methods_series(self, method, op, value): + from scipy.stats import rankdata + + xs = np.random.randn(9) + xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates + np.random.shuffle(xs) + + index = [chr(ord("a") + i) for i in range(len(xs))] + vals = op(xs, value) + ts = Series(vals, index=index) + result = ts.rank(method=method) + sprank = rankdata(vals, method if method != "first" else "ordinal") + expected = Series(sprank, index=index).astype("float64") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) + @pytest.mark.parametrize( + "ser, exp", + [ + ([1], [1]), + ([2], [1]), + ([0], [1]), + ([2, 2], [1, 1]), + ([1, 2, 3], [1, 2, 3]), + ([4, 2, 1], [3, 2, 1]), + ([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]), + ([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5]), + ], + ) + def test_rank_dense_method(self, dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method="dense") + expected = Series(exp).astype(result.dtype) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) + def test_rank_descending(self, ser, results, dtype): + method, _ = results + if "i" in dtype: + s = ser.dropna() + else: + s = ser.astype(dtype) + + res = s.rank(ascending=False) + expected = (s.max() - s).rank() + tm.assert_series_equal(res, expected) + + expected = (s.max() - s).rank(method=method) + res2 = s.rank(method=method, ascending=False) + tm.assert_series_equal(res2, expected) + + def test_rank_int(self, ser, results): + method, exp = results + s = ser.dropna().astype("i8") + + result = s.rank(method=method) + expected = Series(exp).dropna() + expected.index = result.index + tm.assert_series_equal(result, expected) + + def test_rank_object_bug(self): + # GH 13445 + + # smoke tests + Series([np.nan] * 32).astype(object).rank(ascending=True) + Series([np.nan] * 32).astype(object).rank(ascending=False) + + def test_rank_modify_inplace(self): + # GH 18521 + # Check rank does not mutate series + s = Series([Timestamp("2017-01-05 10:20:27.569000"), NaT]) + expected = s.copy() + + s.rank() + result = s + tm.assert_series_equal(result, expected) + + +# GH15630, pct should be on 100% basis when method='dense' + + +@pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) +@pytest.mark.parametrize( + "ser, exp", + [ + ([1], [1.0]), + ([1, 2], [1.0 / 2, 2.0 / 2]), + ([2, 2], [1.0, 1.0]), + ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), + ([1, 2, 2], [1.0 / 2, 2.0 / 2, 2.0 / 2]), + ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), + ([1, 1, 5, 5, 3], [1.0 / 3, 1.0 / 3, 3.0 / 3, 3.0 / 3, 2.0 / 3]), + ([1, 1, 3, 3, 5, 5], [1.0 / 3, 1.0 / 3, 2.0 / 3, 2.0 / 3, 3.0 / 3, 3.0 / 3]), + ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), + ], +) +def test_rank_dense_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method="dense", pct=True) + expected = Series(exp).astype(result.dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) +@pytest.mark.parametrize( + "ser, exp", + [ + ([1], [1.0]), + ([1, 2], [1.0 / 2, 2.0 / 2]), + ([2, 2], [1.0 / 2, 1.0 / 2]), + ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), + ([1, 2, 2], [1.0 / 3, 2.0 / 3, 2.0 / 3]), + ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), + ([1, 1, 5, 5, 3], [1.0 / 5, 1.0 / 5, 4.0 / 5, 4.0 / 5, 3.0 / 5]), + ([1, 1, 3, 3, 5, 5], [1.0 / 6, 1.0 / 6, 3.0 / 6, 3.0 / 6, 5.0 / 6, 5.0 / 6]), + ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), + ], +) +def test_rank_min_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method="min", pct=True) + expected = Series(exp).astype(result.dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) +@pytest.mark.parametrize( + "ser, exp", + [ + ([1], [1.0]), + ([1, 2], [1.0 / 2, 2.0 / 2]), + ([2, 2], [1.0, 1.0]), + ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), + ([1, 2, 2], [1.0 / 3, 3.0 / 3, 3.0 / 3]), + ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), + ([1, 1, 5, 5, 3], [2.0 / 5, 2.0 / 5, 5.0 / 5, 5.0 / 5, 3.0 / 5]), + ([1, 1, 3, 3, 5, 5], [2.0 / 6, 2.0 / 6, 4.0 / 6, 4.0 / 6, 6.0 / 6, 6.0 / 6]), + ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), + ], +) +def test_rank_max_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method="max", pct=True) + expected = Series(exp).astype(result.dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) +@pytest.mark.parametrize( + "ser, exp", + [ + ([1], [1.0]), + ([1, 2], [1.0 / 2, 2.0 / 2]), + ([2, 2], [1.5 / 2, 1.5 / 2]), + ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), + ([1, 2, 2], [1.0 / 3, 2.5 / 3, 2.5 / 3]), + ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), + ([1, 1, 5, 5, 3], [1.5 / 5, 1.5 / 5, 4.5 / 5, 4.5 / 5, 3.0 / 5]), + ([1, 1, 3, 3, 5, 5], [1.5 / 6, 1.5 / 6, 3.5 / 6, 3.5 / 6, 5.5 / 6, 5.5 / 6]), + ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), + ], +) +def test_rank_average_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method="average", pct=True) + expected = Series(exp).astype(result.dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["f8", "i8"]) +@pytest.mark.parametrize( + "ser, exp", + [ + ([1], [1.0]), + ([1, 2], [1.0 / 2, 2.0 / 2]), + ([2, 2], [1.0 / 2, 2.0 / 2.0]), + ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), + ([1, 2, 2], [1.0 / 3, 2.0 / 3, 3.0 / 3]), + ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), + ([1, 1, 5, 5, 3], [1.0 / 5, 2.0 / 5, 4.0 / 5, 5.0 / 5, 3.0 / 5]), + ([1, 1, 3, 3, 5, 5], [1.0 / 6, 2.0 / 6, 3.0 / 6, 4.0 / 6, 5.0 / 6, 6.0 / 6]), + ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), + ], +) +def test_rank_first_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method="first", pct=True) + expected = Series(exp).astype(result.dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.single +@pytest.mark.high_memory +def test_pct_max_many_rows(): + # GH 18271 + s = Series(np.arange(2 ** 24 + 1)) + result = s.rank(pct=True).max() + assert result == 1 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_reindex.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_reindex.py new file mode 100644 index 0000000..e0f1491 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_reindex.py @@ -0,0 +1,410 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + Index, + MultiIndex, + NaT, + Period, + PeriodIndex, + Series, + Timedelta, + Timestamp, + date_range, + isna, +) +import pandas._testing as tm + + +def test_reindex(datetime_series, string_series): + identity = string_series.reindex(string_series.index) + + # __array_interface__ is not defined for older numpies + # and on some pythons + try: + assert np.may_share_memory(string_series.index, identity.index) + except AttributeError: + pass + + assert identity.index.is_(string_series.index) + assert identity.index.identical(string_series.index) + + subIndex = string_series.index[10:20] + subSeries = string_series.reindex(subIndex) + + for idx, val in subSeries.items(): + assert val == string_series[idx] + + subIndex2 = datetime_series.index[10:20] + subTS = datetime_series.reindex(subIndex2) + + for idx, val in subTS.items(): + assert val == datetime_series[idx] + stuffSeries = datetime_series.reindex(subIndex) + + assert np.isnan(stuffSeries).all() + + # This is extremely important for the Cython code to not screw up + nonContigIndex = datetime_series.index[::2] + subNonContig = datetime_series.reindex(nonContigIndex) + for idx, val in subNonContig.items(): + assert val == datetime_series[idx] + + # return a copy the same index here + result = datetime_series.reindex() + assert not (result is datetime_series) + + +def test_reindex_nan(): + ts = Series([2, 3, 5, 7], index=[1, 4, np.nan, 8]) + + i, j = [np.nan, 1, np.nan, 8, 4, np.nan], [2, 0, 2, 3, 1, 2] + tm.assert_series_equal(ts.reindex(i), ts.iloc[j]) + + ts.index = ts.index.astype("object") + + # reindex coerces index.dtype to float, loc/iloc doesn't + tm.assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False) + + +def test_reindex_series_add_nat(): + rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s") + series = Series(rng) + + result = series.reindex(range(15)) + assert np.issubdtype(result.dtype, np.dtype("M8[ns]")) + + mask = result.isna() + assert mask[-5:].all() + assert not mask[:-5].any() + + +def test_reindex_with_datetimes(): + rng = date_range("1/1/2000", periods=20) + ts = Series(np.random.randn(20), index=rng) + + result = ts.reindex(list(ts.index[5:10])) + expected = ts[5:10] + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(result, expected) + + result = ts[list(ts.index[5:10])] + tm.assert_series_equal(result, expected) + + +def test_reindex_corner(datetime_series): + # (don't forget to fix this) I think it's fixed + empty = Series(dtype=object) + empty.reindex(datetime_series.index, method="pad") # it works + + # corner case: pad empty series + reindexed = empty.reindex(datetime_series.index, method="pad") + + # pass non-Index + reindexed = datetime_series.reindex(list(datetime_series.index)) + datetime_series.index = datetime_series.index._with_freq(None) + tm.assert_series_equal(datetime_series, reindexed) + + # bad fill method + ts = datetime_series[::2] + msg = ( + r"Invalid fill method\. Expecting pad \(ffill\), backfill " + r"\(bfill\) or nearest\. Got foo" + ) + with pytest.raises(ValueError, match=msg): + ts.reindex(datetime_series.index, method="foo") + + +def test_reindex_pad(): + s = Series(np.arange(10), dtype="int64") + s2 = s[::2] + + reindexed = s2.reindex(s.index, method="pad") + reindexed2 = s2.reindex(s.index, method="ffill") + tm.assert_series_equal(reindexed, reindexed2) + + expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10)) + tm.assert_series_equal(reindexed, expected) + + # GH4604 + s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"]) + new_index = ["a", "g", "c", "f"] + expected = Series([1, 1, 3, 3], index=new_index) + + # this changes dtype because the ffill happens after + result = s.reindex(new_index).ffill() + tm.assert_series_equal(result, expected.astype("float64")) + + result = s.reindex(new_index).ffill(downcast="infer") + tm.assert_series_equal(result, expected) + + expected = Series([1, 5, 3, 5], index=new_index) + result = s.reindex(new_index, method="ffill") + tm.assert_series_equal(result, expected) + + # inference of new dtype + s = Series([True, False, False, True], index=list("abcd")) + new_index = "agc" + result = s.reindex(list(new_index)).ffill() + expected = Series([True, True, False], index=list(new_index)) + tm.assert_series_equal(result, expected) + + # GH4618 shifted series downcasting + s = Series(False, index=range(0, 5)) + result = s.shift(1).fillna(method="bfill") + expected = Series(False, index=range(0, 5)) + tm.assert_series_equal(result, expected) + + +def test_reindex_nearest(): + s = Series(np.arange(10, dtype="int64")) + target = [0.1, 0.9, 1.5, 2.0] + result = s.reindex(target, method="nearest") + expected = Series(np.around(target).astype("int64"), target) + tm.assert_series_equal(expected, result) + + result = s.reindex(target, method="nearest", tolerance=0.2) + expected = Series([0, 1, np.nan, 2], target) + tm.assert_series_equal(expected, result) + + result = s.reindex(target, method="nearest", tolerance=[0.3, 0.01, 0.4, 3]) + expected = Series([0, np.nan, np.nan, 2], target) + tm.assert_series_equal(expected, result) + + +def test_reindex_int(datetime_series): + ts = datetime_series[::2] + int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index) + + # this should work fine + reindexed_int = int_ts.reindex(datetime_series.index) + + # if NaNs introduced + assert reindexed_int.dtype == np.float_ + + # NO NaNs introduced + reindexed_int = int_ts.reindex(int_ts.index[::2]) + assert reindexed_int.dtype == np.int_ + + +def test_reindex_bool(datetime_series): + # A series other than float, int, string, or object + ts = datetime_series[::2] + bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index) + + # this should work fine + reindexed_bool = bool_ts.reindex(datetime_series.index) + + # if NaNs introduced + assert reindexed_bool.dtype == np.object_ + + # NO NaNs introduced + reindexed_bool = bool_ts.reindex(bool_ts.index[::2]) + assert reindexed_bool.dtype == np.bool_ + + +def test_reindex_bool_pad(datetime_series): + # fail + ts = datetime_series[5:] + bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index) + filled_bool = bool_ts.reindex(datetime_series.index, method="pad") + assert isna(filled_bool[:5]).all() + + +def test_reindex_categorical(): + index = date_range("20000101", periods=3) + + # reindexing to an invalid Categorical + s = Series(["a", "b", "c"], dtype="category") + result = s.reindex(index) + expected = Series( + Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"]) + ) + expected.index = index + tm.assert_series_equal(result, expected) + + # partial reindexing + expected = Series(Categorical(values=["b", "c"], categories=["a", "b", "c"])) + expected.index = [1, 2] + result = s.reindex([1, 2]) + tm.assert_series_equal(result, expected) + + expected = Series(Categorical(values=["c", np.nan], categories=["a", "b", "c"])) + expected.index = [2, 3] + result = s.reindex([2, 3]) + tm.assert_series_equal(result, expected) + + +def test_reindex_astype_order_consistency(): + # GH#17444 + ser = Series([1, 2, 3], index=[2, 0, 1]) + new_index = [0, 1, 2] + temp_dtype = "category" + new_dtype = str + result = ser.reindex(new_index).astype(temp_dtype).astype(new_dtype) + expected = ser.astype(temp_dtype).reindex(new_index).astype(new_dtype) + tm.assert_series_equal(result, expected) + + +def test_reindex_fill_value(): + # ----------------------------------------------------------- + # floats + floats = Series([1.0, 2.0, 3.0]) + result = floats.reindex([1, 2, 3]) + expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + result = floats.reindex([1, 2, 3], fill_value=0) + expected = Series([2.0, 3.0, 0], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + # ----------------------------------------------------------- + # ints + ints = Series([1, 2, 3]) + + result = ints.reindex([1, 2, 3]) + expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + # don't upcast + result = ints.reindex([1, 2, 3], fill_value=0) + expected = Series([2, 3, 0], index=[1, 2, 3]) + assert issubclass(result.dtype.type, np.integer) + tm.assert_series_equal(result, expected) + + # ----------------------------------------------------------- + # objects + objects = Series([1, 2, 3], dtype=object) + + result = objects.reindex([1, 2, 3]) + expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object) + tm.assert_series_equal(result, expected) + + result = objects.reindex([1, 2, 3], fill_value="foo") + expected = Series([2, 3, "foo"], index=[1, 2, 3], dtype=object) + tm.assert_series_equal(result, expected) + + # ------------------------------------------------------------ + # bools + bools = Series([True, False, True]) + + result = bools.reindex([1, 2, 3]) + expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object) + tm.assert_series_equal(result, expected) + + result = bools.reindex([1, 2, 3], fill_value=False) + expected = Series([False, True, False], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) +@pytest.mark.parametrize("fill_value", ["string", 0, Timedelta(0)]) +def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value, using_array_manager): + # https://github.com/pandas-dev/pandas/issues/42921 + if using_array_manager: + pytest.skip("Array manager does not promote dtype, hence we fail") + + if dtype == "timedelta64[ns]" and fill_value == Timedelta(0): + # use the scalar that is not compatible with the dtype for this test + fill_value = Timestamp(0) + + ser = Series([NaT], dtype=dtype) + + result = ser.reindex([0, 1], fill_value=fill_value) + expected = Series([None, fill_value], index=[0, 1], dtype=object) + tm.assert_series_equal(result, expected) + + +def test_reindex_datetimeindexes_tz_naive_and_aware(): + # GH 8306 + idx = date_range("20131101", tz="America/Chicago", periods=7) + newidx = date_range("20131103", periods=10, freq="H") + s = Series(range(7), index=idx) + msg = ( + r"Cannot compare dtypes datetime64\[ns, America/Chicago\] " + r"and datetime64\[ns\]" + ) + with pytest.raises(TypeError, match=msg): + s.reindex(newidx, method="ffill") + + +def test_reindex_empty_series_tz_dtype(): + # GH 20869 + result = Series(dtype="datetime64[ns, UTC]").reindex([0, 1]) + expected = Series([NaT] * 2, dtype="datetime64[ns, UTC]") + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "p_values, o_values, values, expected_values", + [ + ( + [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], + [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC"), "All"], + [1.0, 1.0], + [1.0, 1.0, np.nan], + ), + ( + [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], + [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], + [1.0, 1.0], + [1.0, 1.0], + ), + ], +) +def test_reindex_periodindex_with_object(p_values, o_values, values, expected_values): + # GH#28337 + period_index = PeriodIndex(p_values) + object_index = Index(o_values) + + ser = Series(values, index=period_index) + result = ser.reindex(object_index) + expected = Series(expected_values, index=object_index) + tm.assert_series_equal(result, expected) + + +def test_reindex_too_many_args(): + # GH 40980 + ser = Series([1, 2]) + with pytest.raises( + TypeError, match=r"Only one positional argument \('index'\) is allowed" + ): + ser.reindex([2, 3], False) + + +def test_reindex_double_index(): + # GH 40980 + ser = Series([1, 2]) + msg = r"'index' passed as both positional and keyword argument" + with pytest.raises(TypeError, match=msg): + ser.reindex([2, 3], index=[3, 4]) + + +def test_reindex_no_posargs(): + # GH 40980 + ser = Series([1, 2]) + result = ser.reindex(index=[1, 0]) + expected = Series([2, 1], index=[1, 0]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]]) +def test_reindex_empty_with_level(values): + # GH41170 + ser = Series( + range(len(values[0])), index=MultiIndex.from_arrays(values), dtype="object" + ) + result = ser.reindex(np.array(["b"]), level=0) + expected = Series( + index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype="object" + ) + tm.assert_series_equal(result, expected) + + +def test_reindex_missing_category(): + # GH#18185 + ser = Series([1, 2, 3, 1], dtype="category") + msg = r"Cannot setitem on a Categorical with a new category \(-1\)" + with pytest.raises(TypeError, match=msg): + ser.reindex([1, 2, 3, 4, 5], fill_value=-1) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_reindex_like.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_reindex_like.py new file mode 100644 index 0000000..7f24c77 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_reindex_like.py @@ -0,0 +1,41 @@ +from datetime import datetime + +import numpy as np + +from pandas import Series +import pandas._testing as tm + + +def test_reindex_like(datetime_series): + other = datetime_series[::2] + tm.assert_series_equal( + datetime_series.reindex(other.index), datetime_series.reindex_like(other) + ) + + # GH#7179 + day1 = datetime(2013, 3, 5) + day2 = datetime(2013, 5, 5) + day3 = datetime(2014, 3, 5) + + series1 = Series([5, None, None], [day1, day2, day3]) + series2 = Series([None, None], [day1, day3]) + + result = series1.reindex_like(series2, method="pad") + expected = Series([5, np.nan], index=[day1, day3]) + tm.assert_series_equal(result, expected) + + +def test_reindex_like_nearest(): + ser = Series(np.arange(10, dtype="int64")) + + target = [0.1, 0.9, 1.5, 2.0] + other = ser.reindex(target, method="nearest") + expected = Series(np.around(target).astype("int64"), target) + + result = ser.reindex_like(other, method="nearest") + tm.assert_series_equal(expected, result) + + result = ser.reindex_like(other, method="nearest", tolerance=1) + tm.assert_series_equal(expected, result) + result = ser.reindex_like(other, method="nearest", tolerance=[1, 2, 3, 4]) + tm.assert_series_equal(expected, result) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_rename.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_rename.py new file mode 100644 index 0000000..90c8f77 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_rename.py @@ -0,0 +1,136 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestRename: + def test_rename(self, datetime_series): + ts = datetime_series + renamer = lambda x: x.strftime("%Y%m%d") + renamed = ts.rename(renamer) + assert renamed.index[0] == renamer(ts.index[0]) + + # dict + rename_dict = dict(zip(ts.index, renamed.index)) + renamed2 = ts.rename(rename_dict) + tm.assert_series_equal(renamed, renamed2) + + def test_rename_partial_dict(self): + # partial dict + ser = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64") + renamed = ser.rename({"b": "foo", "d": "bar"}) + tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"])) + + def test_rename_retain_index_name(self): + # index with name + renamer = Series( + np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64" + ) + renamed = renamer.rename({}) + assert renamed.index.name == renamer.index.name + + def test_rename_by_series(self): + ser = Series(range(5), name="foo") + renamer = Series({1: 10, 2: 20}) + result = ser.rename(renamer) + expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo") + tm.assert_series_equal(result, expected) + + def test_rename_set_name(self): + ser = Series(range(4), index=list("abcd")) + for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: + result = ser.rename(name) + assert result.name == name + tm.assert_numpy_array_equal(result.index.values, ser.index.values) + assert ser.name is None + + def test_rename_set_name_inplace(self): + ser = Series(range(3), index=list("abc")) + for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: + ser.rename(name, inplace=True) + assert ser.name == name + + exp = np.array(["a", "b", "c"], dtype=np.object_) + tm.assert_numpy_array_equal(ser.index.values, exp) + + def test_rename_axis_supported(self): + # Supporting axis for compatibility, detailed in GH-18589 + ser = Series(range(5)) + ser.rename({}, axis=0) + ser.rename({}, axis="index") + + with pytest.raises(ValueError, match="No axis named 5"): + ser.rename({}, axis=5) + + def test_rename_inplace(self, datetime_series): + renamer = lambda x: x.strftime("%Y%m%d") + expected = renamer(datetime_series.index[0]) + + datetime_series.rename(renamer, inplace=True) + assert datetime_series.index[0] == expected + + def test_rename_with_custom_indexer(self): + # GH 27814 + class MyIndexer: + pass + + ix = MyIndexer() + ser = Series([1, 2, 3]).rename(ix) + assert ser.name is ix + + def test_rename_with_custom_indexer_inplace(self): + # GH 27814 + class MyIndexer: + pass + + ix = MyIndexer() + ser = Series([1, 2, 3]) + ser.rename(ix, inplace=True) + assert ser.name is ix + + def test_rename_callable(self): + # GH 17407 + ser = Series(range(1, 6), index=Index(range(2, 7), name="IntIndex")) + result = ser.rename(str) + expected = ser.rename(lambda i: str(i)) + tm.assert_series_equal(result, expected) + + assert result.name == expected.name + + def test_rename_none(self): + # GH 40977 + ser = Series([1, 2], name="foo") + result = ser.rename(None) + expected = Series([1, 2]) + tm.assert_series_equal(result, expected) + + def test_rename_series_with_multiindex(self): + # issue #43659 + arrays = [ + ["bar", "baz", "baz", "foo", "qux"], + ["one", "one", "two", "two", "one"], + ] + + index = MultiIndex.from_arrays(arrays, names=["first", "second"]) + ser = Series(np.ones(5), index=index) + result = ser.rename(index={"one": "yes"}, level="second", errors="raise") + + arrays_expected = [ + ["bar", "baz", "baz", "foo", "qux"], + ["yes", "yes", "two", "two", "yes"], + ] + + index_expected = MultiIndex.from_arrays( + arrays_expected, names=["first", "second"] + ) + series_expected = Series(np.ones(5), index=index_expected) + + tm.assert_series_equal(result, series_expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_rename_axis.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_rename_axis.py new file mode 100644 index 0000000..58c095d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_rename_axis.py @@ -0,0 +1,47 @@ +import pytest + +from pandas import ( + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestSeriesRenameAxis: + def test_rename_axis_mapper(self): + # GH 19978 + mi = MultiIndex.from_product([["a", "b", "c"], [1, 2]], names=["ll", "nn"]) + ser = Series(list(range(len(mi))), index=mi) + + result = ser.rename_axis(index={"ll": "foo"}) + assert result.index.names == ["foo", "nn"] + + result = ser.rename_axis(index=str.upper, axis=0) + assert result.index.names == ["LL", "NN"] + + result = ser.rename_axis(index=["foo", "goo"]) + assert result.index.names == ["foo", "goo"] + + with pytest.raises(TypeError, match="unexpected"): + ser.rename_axis(columns="wrong") + + def test_rename_axis_inplace(self, datetime_series): + # GH 15704 + expected = datetime_series.rename_axis("foo") + result = datetime_series + no_return = result.rename_axis("foo", inplace=True) + + assert no_return is None + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("kwargs", [{"mapper": None}, {"index": None}, {}]) + def test_rename_axis_none(self, kwargs): + # GH 25034 + index = Index(list("abc"), name="foo") + ser = Series([1, 2, 3], index=index) + + result = ser.rename_axis(**kwargs) + expected_index = index.rename(None) if kwargs else index + expected = Series([1, 2, 3], index=expected_index) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_repeat.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_repeat.py new file mode 100644 index 0000000..e63317f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_repeat.py @@ -0,0 +1,40 @@ +import numpy as np +import pytest + +from pandas import ( + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestRepeat: + def test_repeat(self): + ser = Series(np.random.randn(3), index=["a", "b", "c"]) + + reps = ser.repeat(5) + exp = Series(ser.values.repeat(5), index=ser.index.values.repeat(5)) + tm.assert_series_equal(reps, exp) + + to_rep = [2, 3, 4] + reps = ser.repeat(to_rep) + exp = Series(ser.values.repeat(to_rep), index=ser.index.values.repeat(to_rep)) + tm.assert_series_equal(reps, exp) + + def test_numpy_repeat(self): + ser = Series(np.arange(3), name="x") + expected = Series( + ser.values.repeat(2), name="x", index=ser.index.values.repeat(2) + ) + tm.assert_series_equal(np.repeat(ser, 2), expected) + + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.repeat(ser, 2, axis=0) + + def test_repeat_with_multiindex(self): + # GH#9361, fixed by GH#7891 + m_idx = MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)]) + data = ["a", "b", "c", "d"] + m_df = Series(data, index=m_idx) + assert m_df.repeat(3).shape == (3 * len(data),) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_replace.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_replace.py new file mode 100644 index 0000000..6a8dacf --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_replace.py @@ -0,0 +1,653 @@ +import re + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import IntervalArray + + +class TestSeriesReplace: + def test_replace_explicit_none(self): + # GH#36984 if the user explicitly passes value=None, give it to them + ser = pd.Series([0, 0, ""], dtype=object) + result = ser.replace("", None) + expected = pd.Series([0, 0, None], dtype=object) + tm.assert_series_equal(result, expected) + + df = pd.DataFrame(np.zeros((3, 3))) + df.iloc[2, 2] = "" + result = df.replace("", None) + expected = pd.DataFrame( + { + 0: np.zeros(3), + 1: np.zeros(3), + 2: np.array([0.0, 0.0, None], dtype=object), + } + ) + assert expected.iloc[2, 2] is None + tm.assert_frame_equal(result, expected) + + # GH#19998 same thing with object dtype + ser = pd.Series([10, 20, 30, "a", "a", "b", "a"]) + result = ser.replace("a", None) + expected = pd.Series([10, 20, 30, None, None, "b", None]) + assert expected.iloc[-1] is None + tm.assert_series_equal(result, expected) + + def test_replace_noop_doesnt_downcast(self): + # GH#44498 + ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object) + res = ser.replace({np.nan: None}) # should be a no-op + tm.assert_series_equal(res, ser) + assert res.dtype == object + + # same thing but different calling convention + res = ser.replace(np.nan, None) + tm.assert_series_equal(res, ser) + assert res.dtype == object + + def test_replace(self): + N = 100 + ser = pd.Series(np.random.randn(N)) + ser[0:4] = np.nan + ser[6:10] = 0 + + # replace list with a single value + return_value = ser.replace([np.nan], -1, inplace=True) + assert return_value is None + + exp = ser.fillna(-1) + tm.assert_series_equal(ser, exp) + + rs = ser.replace(0.0, np.nan) + ser[ser == 0.0] = np.nan + tm.assert_series_equal(rs, ser) + + ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) + ser[:5] = np.nan + ser[6:10] = "foo" + ser[20:30] = "bar" + + # replace list with a single value + rs = ser.replace([np.nan, "foo", "bar"], -1) + + assert (rs[:5] == -1).all() + assert (rs[6:10] == -1).all() + assert (rs[20:30] == -1).all() + assert (pd.isna(ser[:5])).all() + + # replace with different values + rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3}) + + assert (rs[:5] == -1).all() + assert (rs[6:10] == -2).all() + assert (rs[20:30] == -3).all() + assert (pd.isna(ser[:5])).all() + + # replace with different values with 2 lists + rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3]) + tm.assert_series_equal(rs, rs2) + + # replace inplace + return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True) + assert return_value is None + + assert (ser[:5] == -1).all() + assert (ser[6:10] == -1).all() + assert (ser[20:30] == -1).all() + + def test_replace_nan_with_inf(self): + ser = pd.Series([np.nan, 0, np.inf]) + tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) + + ser = pd.Series([np.nan, 0, "foo", "bar", np.inf, None, pd.NaT]) + tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) + filled = ser.copy() + filled[4] = 0 + tm.assert_series_equal(ser.replace(np.inf, 0), filled) + + def test_replace_listlike_value_listlike_target(self, datetime_series): + ser = pd.Series(datetime_series.index) + tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) + + # malformed + msg = r"Replacement lists must match in length\. Expecting 3 got 2" + with pytest.raises(ValueError, match=msg): + ser.replace([1, 2, 3], [np.nan, 0]) + + # ser is dt64 so can't hold 1 or 2, so this replace is a no-op + result = ser.replace([1, 2], [np.nan, 0]) + tm.assert_series_equal(result, ser) + + ser = pd.Series([0, 1, 2, 3, 4]) + result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) + tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0])) + + def test_replace_gh5319(self): + # API change from 0.12? + # GH 5319 + ser = pd.Series([0, np.nan, 2, 3, 4]) + expected = ser.ffill() + result = ser.replace([np.nan]) + tm.assert_series_equal(result, expected) + + ser = pd.Series([0, np.nan, 2, 3, 4]) + expected = ser.ffill() + result = ser.replace(np.nan) + tm.assert_series_equal(result, expected) + + def test_replace_datetime64(self): + # GH 5797 + ser = pd.Series(pd.date_range("20130101", periods=5)) + expected = ser.copy() + expected.loc[2] = pd.Timestamp("20120101") + result = ser.replace({pd.Timestamp("20130103"): pd.Timestamp("20120101")}) + tm.assert_series_equal(result, expected) + result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101")) + tm.assert_series_equal(result, expected) + + def test_replace_nat_with_tz(self): + # GH 11792: Test with replacing NaT in a list with tz data + ts = pd.Timestamp("2015/01/01", tz="UTC") + s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")]) + result = s.replace([np.nan, pd.NaT], pd.Timestamp.min) + expected = pd.Series([pd.Timestamp.min, ts], dtype=object) + tm.assert_series_equal(expected, result) + + def test_replace_timedelta_td64(self): + tdi = pd.timedelta_range(0, periods=5) + ser = pd.Series(tdi) + + # Using a single dict argument means we go through replace_list + result = ser.replace({ser[1]: ser[3]}) + + expected = pd.Series([ser[0], ser[3], ser[2], ser[3], ser[4]]) + tm.assert_series_equal(result, expected) + + def test_replace_with_single_list(self): + ser = pd.Series([0, 1, 2, 3, 4]) + result = ser.replace([1, 2, 3]) + tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4])) + + s = ser.copy() + return_value = s.replace([1, 2, 3], inplace=True) + assert return_value is None + tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4])) + + # make sure things don't get corrupted when fillna call fails + s = ser.copy() + msg = ( + r"Invalid fill method\. Expecting pad \(ffill\) or backfill " + r"\(bfill\)\. Got crash_cymbal" + ) + with pytest.raises(ValueError, match=msg): + return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal") + assert return_value is None + tm.assert_series_equal(s, ser) + + def test_replace_mixed_types(self): + ser = pd.Series(np.arange(5), dtype="int64") + + def check_replace(to_rep, val, expected): + sc = ser.copy() + result = ser.replace(to_rep, val) + return_value = sc.replace(to_rep, val, inplace=True) + assert return_value is None + tm.assert_series_equal(expected, result) + tm.assert_series_equal(expected, sc) + + # 3.0 can still be held in our int64 series, so we do not upcast GH#44940 + tr, v = [3], [3.0] + check_replace(tr, v, ser) + # Note this matches what we get with the scalars 3 and 3.0 + check_replace(tr[0], v[0], ser) + + # MUST upcast to float + e = pd.Series([0, 1, 2, 3.5, 4]) + tr, v = [3], [3.5] + check_replace(tr, v, e) + + # casts to object + e = pd.Series([0, 1, 2, 3.5, "a"]) + tr, v = [3, 4], [3.5, "a"] + check_replace(tr, v, e) + + # again casts to object + e = pd.Series([0, 1, 2, 3.5, pd.Timestamp("20130101")]) + tr, v = [3, 4], [3.5, pd.Timestamp("20130101")] + check_replace(tr, v, e) + + # casts to object + e = pd.Series([0, 1, 2, 3.5, True], dtype="object") + tr, v = [3, 4], [3.5, True] + check_replace(tr, v, e) + + # test an object with dates + floats + integers + strings + dr = pd.Series(pd.date_range("1/1/2001", "1/10/2001", freq="D")) + result = dr.astype(object).replace([dr[0], dr[1], dr[2]], [1.0, 2, "a"]) + expected = pd.Series([1.0, 2, "a"] + dr[3:].tolist(), dtype=object) + tm.assert_series_equal(result, expected) + + def test_replace_bool_with_string_no_op(self): + s = pd.Series([True, False, True]) + result = s.replace("fun", "in-the-sun") + tm.assert_series_equal(s, result) + + def test_replace_bool_with_string(self): + # nonexistent elements + s = pd.Series([True, False, True]) + result = s.replace(True, "2u") + expected = pd.Series(["2u", False, "2u"]) + tm.assert_series_equal(expected, result) + + def test_replace_bool_with_bool(self): + s = pd.Series([True, False, True]) + result = s.replace(True, False) + expected = pd.Series([False] * len(s)) + tm.assert_series_equal(expected, result) + + def test_replace_with_dict_with_bool_keys(self): + s = pd.Series([True, False, True]) + result = s.replace({"asdf": "asdb", True: "yes"}) + expected = pd.Series(["yes", False, "yes"]) + tm.assert_series_equal(result, expected) + + def test_replace_Int_with_na(self, any_int_ea_dtype): + # GH 38267 + result = pd.Series([0, None], dtype=any_int_ea_dtype).replace(0, pd.NA) + expected = pd.Series([pd.NA, pd.NA], dtype=any_int_ea_dtype) + tm.assert_series_equal(result, expected) + result = pd.Series([0, 1], dtype=any_int_ea_dtype).replace(0, pd.NA) + result.replace(1, pd.NA, inplace=True) + tm.assert_series_equal(result, expected) + + def test_replace2(self): + N = 100 + ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) + ser[:5] = np.nan + ser[6:10] = "foo" + ser[20:30] = "bar" + + # replace list with a single value + rs = ser.replace([np.nan, "foo", "bar"], -1) + + assert (rs[:5] == -1).all() + assert (rs[6:10] == -1).all() + assert (rs[20:30] == -1).all() + assert (pd.isna(ser[:5])).all() + + # replace with different values + rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3}) + + assert (rs[:5] == -1).all() + assert (rs[6:10] == -2).all() + assert (rs[20:30] == -3).all() + assert (pd.isna(ser[:5])).all() + + # replace with different values with 2 lists + rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3]) + tm.assert_series_equal(rs, rs2) + + # replace inplace + return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True) + assert return_value is None + assert (ser[:5] == -1).all() + assert (ser[6:10] == -1).all() + assert (ser[20:30] == -1).all() + + def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype): + # GH 32621, GH#44940 + ser = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype) + expected = pd.Series(["1", "2", np.nan], dtype=nullable_string_dtype) + result = ser.replace({"one": "1", "two": "2"}) + tm.assert_series_equal(expected, result) + + def test_replace_with_empty_dictlike(self): + # GH 15289 + s = pd.Series(list("abcd")) + tm.assert_series_equal(s, s.replace({})) + + with tm.assert_produces_warning(FutureWarning): + empty_series = pd.Series([]) + tm.assert_series_equal(s, s.replace(empty_series)) + + def test_replace_string_with_number(self): + # GH 15743 + s = pd.Series([1, 2, 3]) + result = s.replace("2", np.nan) + expected = pd.Series([1, 2, 3]) + tm.assert_series_equal(expected, result) + + def test_replace_replacer_equals_replacement(self): + # GH 20656 + # make sure all replacers are matching against original values + s = pd.Series(["a", "b"]) + expected = pd.Series(["b", "a"]) + result = s.replace({"a": "b", "b": "a"}) + tm.assert_series_equal(expected, result) + + def test_replace_unicode_with_number(self): + # GH 15743 + s = pd.Series([1, 2, 3]) + result = s.replace("2", np.nan) + expected = pd.Series([1, 2, 3]) + tm.assert_series_equal(expected, result) + + def test_replace_mixed_types_with_string(self): + # Testing mixed + s = pd.Series([1, 2, 3, "4", 4, 5]) + result = s.replace([2, "4"], np.nan) + expected = pd.Series([1, np.nan, 3, np.nan, 4, 5]) + tm.assert_series_equal(expected, result) + + @pytest.mark.parametrize( + "categorical, numeric", + [ + (pd.Categorical(["A"], categories=["A", "B"]), [1]), + (pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]), + ], + ) + def test_replace_categorical(self, categorical, numeric): + # GH 24971, GH#23305 + ser = pd.Series(categorical) + result = ser.replace({"A": 1, "B": 2}) + expected = pd.Series(numeric).astype("category") + if 2 not in expected.cat.categories: + # i.e. categories should be [1, 2] even if there are no "B"s present + # GH#44940 + expected = expected.cat.add_categories(2) + tm.assert_series_equal(expected, result) + + def test_replace_categorical_single(self): + # GH 26988 + dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") + s = pd.Series(dti) + c = s.astype("category") + + expected = c.copy() + expected = expected.cat.add_categories("foo") + expected[2] = "foo" + expected = expected.cat.remove_unused_categories() + assert c[2] != "foo" + + result = c.replace(c[2], "foo") + tm.assert_series_equal(expected, result) + assert c[2] != "foo" # ensure non-inplace call does not alter original + + return_value = c.replace(c[2], "foo", inplace=True) + assert return_value is None + tm.assert_series_equal(expected, c) + + first_value = c[0] + return_value = c.replace(c[1], c[0], inplace=True) + assert return_value is None + assert c[0] == c[1] == first_value # test replacing with existing value + + def test_replace_with_no_overflowerror(self): + # GH 25616 + # casts to object without Exception from OverflowError + s = pd.Series([0, 1, 2, 3, 4]) + result = s.replace([3], ["100000000000000000000"]) + expected = pd.Series([0, 1, 2, "100000000000000000000", 4]) + tm.assert_series_equal(result, expected) + + s = pd.Series([0, "100000000000000000000", "100000000000000000001"]) + result = s.replace(["100000000000000000000"], [1]) + expected = pd.Series([0, 1, "100000000000000000001"]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "ser, to_replace, exp", + [ + ([1, 2, 3], {1: 2, 2: 3, 3: 4}, [2, 3, 4]), + (["1", "2", "3"], {"1": "2", "2": "3", "3": "4"}, ["2", "3", "4"]), + ], + ) + def test_replace_commutative(self, ser, to_replace, exp): + # GH 16051 + # DataFrame.replace() overwrites when values are non-numeric + + series = pd.Series(ser) + + expected = pd.Series(exp) + result = series.replace(to_replace) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "ser, exp", [([1, 2, 3], [1, True, 3]), (["x", 2, 3], ["x", True, 3])] + ) + def test_replace_no_cast(self, ser, exp): + # GH 9113 + # BUG: replace int64 dtype with bool coerces to int64 + + series = pd.Series(ser) + result = series.replace(2, True) + expected = pd.Series(exp) + + tm.assert_series_equal(result, expected) + + def test_replace_invalid_to_replace(self): + # GH 18634 + # API: replace() should raise an exception if invalid argument is given + series = pd.Series(["a", "b", "c "]) + msg = ( + r"Expecting 'to_replace' to be either a scalar, array-like, " + r"dict or None, got invalid type.*" + ) + with pytest.raises(TypeError, match=msg): + series.replace(lambda x: x.strip()) + + @pytest.mark.parametrize("frame", [False, True]) + def test_replace_nonbool_regex(self, frame): + obj = pd.Series(["a", "b", "c "]) + if frame: + obj = obj.to_frame() + + msg = "'to_replace' must be 'None' if 'regex' is not a bool" + with pytest.raises(ValueError, match=msg): + obj.replace(to_replace=["a"], regex="foo") + + @pytest.mark.parametrize("frame", [False, True]) + def test_replace_empty_copy(self, frame): + obj = pd.Series([], dtype=np.float64) + if frame: + obj = obj.to_frame() + + res = obj.replace(4, 5, inplace=True) + assert res is None + + res = obj.replace(4, 5, inplace=False) + tm.assert_equal(res, obj) + assert res is not obj + + def test_replace_only_one_dictlike_arg(self, fixed_now_ts): + # GH#33340 + + ser = pd.Series([1, 2, "A", fixed_now_ts, True]) + to_replace = {0: 1, 2: "A"} + value = "foo" + msg = "Series.replace cannot use dict-like to_replace and non-None value" + with pytest.raises(ValueError, match=msg): + ser.replace(to_replace, value) + + to_replace = 1 + value = {0: "foo", 2: "bar"} + msg = "Series.replace cannot use dict-value and non-None to_replace" + with pytest.raises(ValueError, match=msg): + ser.replace(to_replace, value) + + def test_replace_extension_other(self, frame_or_series): + # https://github.com/pandas-dev/pandas/issues/34530 + obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64")) + result = obj.replace("", "") # no exception + # should not have changed dtype + tm.assert_equal(obj, result) + + def _check_replace_with_method(self, ser: pd.Series): + df = ser.to_frame() + + res = ser.replace(ser[1], method="pad") + expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype) + tm.assert_series_equal(res, expected) + + res_df = df.replace(ser[1], method="pad") + tm.assert_frame_equal(res_df, expected.to_frame()) + + ser2 = ser.copy() + res2 = ser2.replace(ser[1], method="pad", inplace=True) + assert res2 is None + tm.assert_series_equal(ser2, expected) + + res_df2 = df.replace(ser[1], method="pad", inplace=True) + assert res_df2 is None + tm.assert_frame_equal(df, expected.to_frame()) + + def test_replace_ea_dtype_with_method(self, any_numeric_ea_dtype): + arr = pd.array([1, 2, pd.NA, 4], dtype=any_numeric_ea_dtype) + ser = pd.Series(arr) + + self._check_replace_with_method(ser) + + @pytest.mark.parametrize("as_categorical", [True, False]) + def test_replace_interval_with_method(self, as_categorical): + # in particular interval that can't hold NA + + idx = pd.IntervalIndex.from_breaks(range(4)) + ser = pd.Series(idx) + if as_categorical: + ser = ser.astype("category") + + self._check_replace_with_method(ser) + + @pytest.mark.parametrize("as_period", [True, False]) + @pytest.mark.parametrize("as_categorical", [True, False]) + def test_replace_datetimelike_with_method(self, as_period, as_categorical): + idx = pd.date_range("2016-01-01", periods=5, tz="US/Pacific") + if as_period: + idx = idx.tz_localize(None).to_period("D") + + ser = pd.Series(idx) + ser.iloc[-2] = pd.NaT + if as_categorical: + ser = ser.astype("category") + + self._check_replace_with_method(ser) + + def test_replace_with_compiled_regex(self): + # https://github.com/pandas-dev/pandas/issues/35680 + s = pd.Series(["a", "b", "c"]) + regex = re.compile("^a$") + result = s.replace({regex: "z"}, regex=True) + expected = pd.Series(["z", "b", "c"]) + tm.assert_series_equal(result, expected) + + def test_pandas_replace_na(self): + # GH#43344 + ser = pd.Series(["AA", "BB", "CC", "DD", "EE", "", pd.NA], dtype="string") + regex_mapping = { + "AA": "CC", + "BB": "CC", + "EE": "CC", + "CC": "CC-REPL", + } + result = ser.replace(regex_mapping, regex=True) + exp = pd.Series(["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA], dtype="string") + tm.assert_series_equal(result, exp) + + @pytest.mark.parametrize( + "dtype, input_data, to_replace, expected_data", + [ + ("bool", [True, False], {True: False}, [False, False]), + ("int64", [1, 2], {1: 10, 2: 20}, [10, 20]), + ("Int64", [1, 2], {1: 10, 2: 20}, [10, 20]), + ("float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]), + ("Float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]), + ("string", ["one", "two"], {"one": "1", "two": "2"}, ["1", "2"]), + ( + pd.IntervalDtype("int64"), + IntervalArray([pd.Interval(1, 2), pd.Interval(2, 3)]), + {pd.Interval(1, 2): pd.Interval(10, 20)}, + IntervalArray([pd.Interval(10, 20), pd.Interval(2, 3)]), + ), + ( + pd.IntervalDtype("float64"), + IntervalArray([pd.Interval(1.0, 2.7), pd.Interval(2.8, 3.1)]), + {pd.Interval(1.0, 2.7): pd.Interval(10.6, 20.8)}, + IntervalArray([pd.Interval(10.6, 20.8), pd.Interval(2.8, 3.1)]), + ), + ( + pd.PeriodDtype("M"), + [pd.Period("2020-05", freq="M")], + {pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")}, + [pd.Period("2020-06", freq="M")], + ), + ], + ) + def test_replace_dtype(self, dtype, input_data, to_replace, expected_data): + # GH#33484 + ser = pd.Series(input_data, dtype=dtype) + result = ser.replace(to_replace) + expected = pd.Series(expected_data, dtype=dtype) + tm.assert_series_equal(result, expected) + + def test_replace_string_dtype(self): + # GH#40732, GH#44940 + ser = pd.Series(["one", "two", np.nan], dtype="string") + res = ser.replace({"one": "1", "two": "2"}) + expected = pd.Series(["1", "2", np.nan], dtype="string") + tm.assert_series_equal(res, expected) + + # GH#31644 + ser2 = pd.Series(["A", np.nan], dtype="string") + res2 = ser2.replace("A", "B") + expected2 = pd.Series(["B", np.nan], dtype="string") + tm.assert_series_equal(res2, expected2) + + ser3 = pd.Series(["A", "B"], dtype="string") + res3 = ser3.replace("A", pd.NA) + expected3 = pd.Series([pd.NA, "B"], dtype="string") + tm.assert_series_equal(res3, expected3) + + def test_replace_string_dtype_list_to_replace(self): + # GH#41215, GH#44940 + ser = pd.Series(["abc", "def"], dtype="string") + res = ser.replace(["abc", "any other string"], "xyz") + expected = pd.Series(["xyz", "def"], dtype="string") + tm.assert_series_equal(res, expected) + + def test_replace_string_dtype_regex(self): + # GH#31644 + ser = pd.Series(["A", "B"], dtype="string") + res = ser.replace(r".", "C", regex=True) + expected = pd.Series(["C", "C"], dtype="string") + tm.assert_series_equal(res, expected) + + def test_replace_nullable_numeric(self): + # GH#40732, GH#44940 + + floats = pd.Series([1.0, 2.0, 3.999, 4.4], dtype=pd.Float64Dtype()) + assert floats.replace({1.0: 9}).dtype == floats.dtype + assert floats.replace(1.0, 9).dtype == floats.dtype + assert floats.replace({1.0: 9.0}).dtype == floats.dtype + assert floats.replace(1.0, 9.0).dtype == floats.dtype + + res = floats.replace(to_replace=[1.0, 2.0], value=[9.0, 10.0]) + assert res.dtype == floats.dtype + + ints = pd.Series([1, 2, 3, 4], dtype=pd.Int64Dtype()) + assert ints.replace({1: 9}).dtype == ints.dtype + assert ints.replace(1, 9).dtype == ints.dtype + assert ints.replace({1: 9.0}).dtype == ints.dtype + assert ints.replace(1, 9.0).dtype == ints.dtype + # FIXME: ints.replace({1: 9.5}) raises bc of incorrect _can_hold_element + + @pytest.mark.parametrize("regex", [False, True]) + def test_replace_regex_dtype_series(self, regex): + # GH-48644 + series = pd.Series(["0"]) + expected = pd.Series([1]) + result = series.replace(to_replace="0", value=1, regex=regex) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_reset_index.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_reset_index.py new file mode 100644 index 0000000..f384915 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_reset_index.py @@ -0,0 +1,188 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + RangeIndex, + Series, + date_range, +) +import pandas._testing as tm + + +class TestResetIndex: + def test_reset_index_dti_round_trip(self): + dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")._with_freq(None) + d1 = DataFrame({"v": np.random.rand(len(dti))}, index=dti) + d2 = d1.reset_index() + assert d2.dtypes[0] == np.dtype("M8[ns]") + d3 = d2.set_index("index") + tm.assert_frame_equal(d1, d3, check_names=False) + + # GH#2329 + stamp = datetime(2012, 11, 22) + df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"]) + df = df.set_index("Date") + + assert df.index[0] == stamp + assert df.reset_index()["Date"][0] == stamp + + def test_reset_index(self): + df = tm.makeDataFrame()[:5] + ser = df.stack() + ser.index.names = ["hash", "category"] + + ser.name = "value" + df = ser.reset_index() + assert "value" in df + + df = ser.reset_index(name="value2") + assert "value2" in df + + # check inplace + s = ser.reset_index(drop=True) + s2 = ser + return_value = s2.reset_index(drop=True, inplace=True) + assert return_value is None + tm.assert_series_equal(s, s2) + + # level + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + s = Series(np.random.randn(6), index=index) + rs = s.reset_index(level=1) + assert len(rs.columns) == 2 + + rs = s.reset_index(level=[0, 2], drop=True) + tm.assert_index_equal(rs.index, Index(index.get_level_values(1))) + assert isinstance(rs, Series) + + def test_reset_index_name(self): + s = Series([1, 2, 3], index=Index(range(3), name="x")) + assert s.reset_index().index.name is None + assert s.reset_index(drop=True).index.name is None + + def test_reset_index_level(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]) + + for levels in ["A", "B"], [0, 1]: + # With MultiIndex + s = df.set_index(["A", "B"])["C"] + + result = s.reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = s.reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = s.reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True) + tm.assert_frame_equal(result, df[["C"]]) + + with pytest.raises(KeyError, match="Level E "): + s.reset_index(level=["A", "E"]) + + # With single-level Index + s = df.set_index("A")["B"] + + result = s.reset_index(level=levels[0]) + tm.assert_frame_equal(result, df[["A", "B"]]) + + result = s.reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df[["A", "B"]]) + + result = s.reset_index(level=levels[0], drop=True) + tm.assert_series_equal(result, df["B"]) + + with pytest.raises(IndexError, match="Too many levels"): + s.reset_index(level=[0, 1, 2]) + + # Check that .reset_index([],drop=True) doesn't fail + result = Series(range(4)).reset_index([], drop=True) + expected = Series(range(4)) + tm.assert_series_equal(result, expected) + + def test_reset_index_range(self): + # GH 12071 + s = Series(range(2), name="A", dtype="int64") + series_result = s.reset_index() + assert isinstance(series_result.index, RangeIndex) + series_expected = DataFrame( + [[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2) + ) + tm.assert_frame_equal(series_result, series_expected) + + def test_reset_index_drop_errors(self): + # GH 20925 + + # KeyError raised for series index when passed level name is missing + s = Series(range(4)) + with pytest.raises(KeyError, match="does not match index name"): + s.reset_index("wrong", drop=True) + with pytest.raises(KeyError, match="does not match index name"): + s.reset_index("wrong") + + # KeyError raised for series when level to be dropped is missing + s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2)) + with pytest.raises(KeyError, match="not found"): + s.reset_index("wrong", drop=True) + + def test_reset_index_with_drop(self, series_with_multilevel_index): + ser = series_with_multilevel_index + + deleveled = ser.reset_index() + assert isinstance(deleveled, DataFrame) + assert len(deleveled.columns) == len(ser.index.levels) + 1 + assert deleveled.index.name == ser.index.name + + deleveled = ser.reset_index(drop=True) + assert isinstance(deleveled, Series) + assert deleveled.index.name == ser.index.name + + def test_drop_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + ser = Series([1, 2, 3], index=Index([1, 2, 3], name="a")) + msg = ( + r"In a future version of pandas all arguments of Series\.reset_index " + r"except for the argument 'level' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.reset_index("a", False) + expected = DataFrame({"a": [1, 2, 3], 0: [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + def test_reset_index_inplace_and_drop_ignore_name(self): + # GH#44575 + ser = Series(range(2), name="old") + ser.reset_index(name="new", drop=True, inplace=True) + expected = Series(range(2), name="old") + tm.assert_series_equal(ser, expected) + + +@pytest.mark.parametrize( + "array, dtype", + [ + (["a", "b"], object), + ( + pd.period_range("12-1-2000", periods=2, freq="Q-DEC"), + pd.PeriodDtype(freq="Q-DEC"), + ), + ], +) +def test_reset_index_dtypes_on_empty_series_with_multiindex(array, dtype): + # GH 19602 - Preserve dtype on empty Series with MultiIndex + idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array]) + result = Series(dtype=object, index=idx)[:0].reset_index().dtypes + expected = Series( + {"level_0": np.int64, "level_1": np.float64, "level_2": dtype, 0: object} + ) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_round.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_round.py new file mode 100644 index 0000000..6c40e36 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_round.py @@ -0,0 +1,64 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Series +import pandas._testing as tm + + +class TestSeriesRound: + def test_round(self, datetime_series): + datetime_series.index.name = "index_name" + result = datetime_series.round(2) + expected = Series( + np.round(datetime_series.values, 2), index=datetime_series.index, name="ts" + ) + tm.assert_series_equal(result, expected) + assert result.name == datetime_series.name + + def test_round_numpy(self, any_float_dtype): + # See GH#12600 + ser = Series([1.53, 1.36, 0.06], dtype=any_float_dtype) + out = np.round(ser, decimals=0) + expected = Series([2.0, 1.0, 0.0], dtype=any_float_dtype) + tm.assert_series_equal(out, expected) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.round(ser, decimals=0, out=ser) + + def test_round_numpy_with_nan(self, any_float_dtype): + # See GH#14197 + ser = Series([1.53, np.nan, 0.06], dtype=any_float_dtype) + with tm.assert_produces_warning(None): + result = ser.round() + expected = Series([2.0, np.nan, 0.0], dtype=any_float_dtype) + tm.assert_series_equal(result, expected) + + def test_round_builtin(self, any_float_dtype): + ser = Series( + [1.123, 2.123, 3.123], + index=range(3), + dtype=any_float_dtype, + ) + result = round(ser) + expected_rounded0 = Series( + [1.0, 2.0, 3.0], index=range(3), dtype=any_float_dtype + ) + tm.assert_series_equal(result, expected_rounded0) + + decimals = 2 + expected_rounded = Series( + [1.12, 2.12, 3.12], index=range(3), dtype=any_float_dtype + ) + result = round(ser, decimals) + tm.assert_series_equal(result, expected_rounded) + + @pytest.mark.parametrize("method", ["round", "floor", "ceil"]) + @pytest.mark.parametrize("freq", ["s", "5s", "min", "5min", "h", "5h"]) + def test_round_nat(self, method, freq): + # GH14940 + ser = Series([pd.NaT]) + expected = Series(pd.NaT) + round_method = getattr(ser.dt, method) + tm.assert_series_equal(round_method(freq), expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_searchsorted.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_searchsorted.py new file mode 100644 index 0000000..5a7eb3f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_searchsorted.py @@ -0,0 +1,67 @@ +import numpy as np + +from pandas import ( + Series, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.api.types import is_scalar + + +class TestSeriesSearchSorted: + def test_searchsorted(self): + ser = Series([1, 2, 3]) + + result = ser.searchsorted(1, side="left") + assert is_scalar(result) + assert result == 0 + + result = ser.searchsorted(1, side="right") + assert is_scalar(result) + assert result == 1 + + def test_searchsorted_numeric_dtypes_scalar(self): + ser = Series([1, 2, 90, 1000, 3e9]) + res = ser.searchsorted(30) + assert is_scalar(res) + assert res == 2 + + res = ser.searchsorted([30]) + exp = np.array([2], dtype=np.intp) + tm.assert_numpy_array_equal(res, exp) + + def test_searchsorted_numeric_dtypes_vector(self): + ser = Series([1, 2, 90, 1000, 3e9]) + res = ser.searchsorted([91, 2e6]) + exp = np.array([3, 4], dtype=np.intp) + tm.assert_numpy_array_equal(res, exp) + + def test_searchsorted_datetime64_scalar(self): + ser = Series(date_range("20120101", periods=10, freq="2D")) + val = Timestamp("20120102") + res = ser.searchsorted(val) + assert is_scalar(res) + assert res == 1 + + def test_searchsorted_datetime64_scalar_mixed_timezones(self): + # GH 30086 + ser = Series(date_range("20120101", periods=10, freq="2D", tz="UTC")) + val = Timestamp("20120102", tz="America/New_York") + res = ser.searchsorted(val) + assert is_scalar(res) + assert res == 1 + + def test_searchsorted_datetime64_list(self): + ser = Series(date_range("20120101", periods=10, freq="2D")) + vals = [Timestamp("20120102"), Timestamp("20120104")] + res = ser.searchsorted(vals) + exp = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(res, exp) + + def test_searchsorted_sorter(self): + # GH8490 + ser = Series([3, 1, 2]) + res = ser.searchsorted([0, 3], sorter=np.argsort(ser)) + exp = np.array([0, 2], dtype=np.intp) + tm.assert_numpy_array_equal(res, exp) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_set_name.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_set_name.py new file mode 100644 index 0000000..cbc8ebd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_set_name.py @@ -0,0 +1,21 @@ +from datetime import datetime + +from pandas import Series + + +class TestSetName: + def test_set_name(self): + ser = Series([1, 2, 3]) + ser2 = ser._set_name("foo") + assert ser2.name == "foo" + assert ser.name is None + assert ser is not ser2 + + def test_set_name_attribute(self): + ser = Series([1, 2, 3]) + ser2 = Series([1, 2, 3], name="bar") + for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05D0"]: + ser.name = name + assert ser.name == name + ser2.name = name + assert ser2.name == name diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_sort_index.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_sort_index.py new file mode 100644 index 0000000..d7bd92c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_sort_index.py @@ -0,0 +1,334 @@ +import random + +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + IntervalIndex, + MultiIndex, + Series, +) +import pandas._testing as tm + + +@pytest.fixture(params=["quicksort", "mergesort", "heapsort", "stable"]) +def sort_kind(request): + return request.param + + +class TestSeriesSortIndex: + def test_sort_index_name(self, datetime_series): + result = datetime_series.sort_index(ascending=False) + assert result.name == datetime_series.name + + def test_sort_index(self, datetime_series): + datetime_series.index = datetime_series.index._with_freq(None) + + rindex = list(datetime_series.index) + random.shuffle(rindex) + + random_order = datetime_series.reindex(rindex) + sorted_series = random_order.sort_index() + tm.assert_series_equal(sorted_series, datetime_series) + + # descending + sorted_series = random_order.sort_index(ascending=False) + tm.assert_series_equal( + sorted_series, datetime_series.reindex(datetime_series.index[::-1]) + ) + + # compat on level + sorted_series = random_order.sort_index(level=0) + tm.assert_series_equal(sorted_series, datetime_series) + + # compat on axis + sorted_series = random_order.sort_index(axis=0) + tm.assert_series_equal(sorted_series, datetime_series) + + msg = "No axis named 1 for object type Series" + with pytest.raises(ValueError, match=msg): + random_order.sort_values(axis=1) + + sorted_series = random_order.sort_index(level=0, axis=0) + tm.assert_series_equal(sorted_series, datetime_series) + + with pytest.raises(ValueError, match=msg): + random_order.sort_index(level=0, axis=1) + + def test_sort_index_inplace(self, datetime_series): + datetime_series.index = datetime_series.index._with_freq(None) + + # For GH#11402 + rindex = list(datetime_series.index) + random.shuffle(rindex) + + # descending + random_order = datetime_series.reindex(rindex) + result = random_order.sort_index(ascending=False, inplace=True) + + assert result is None + expected = datetime_series.reindex(datetime_series.index[::-1]) + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(random_order, expected) + + # ascending + random_order = datetime_series.reindex(rindex) + result = random_order.sort_index(ascending=True, inplace=True) + + assert result is None + expected = datetime_series.copy() + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(random_order, expected) + + def test_sort_index_level(self): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) + s = Series([1, 2], mi) + backwards = s.iloc[[1, 0]] + + res = s.sort_index(level="A") + tm.assert_series_equal(backwards, res) + + res = s.sort_index(level=["A", "B"]) + tm.assert_series_equal(backwards, res) + + res = s.sort_index(level="A", sort_remaining=False) + tm.assert_series_equal(s, res) + + res = s.sort_index(level=["A", "B"], sort_remaining=False) + tm.assert_series_equal(s, res) + + @pytest.mark.parametrize("level", ["A", 0]) # GH#21052 + def test_sort_index_multiindex(self, level): + + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) + s = Series([1, 2], mi) + backwards = s.iloc[[1, 0]] + + # implicit sort_remaining=True + res = s.sort_index(level=level) + tm.assert_series_equal(backwards, res) + + # GH#13496 + # sort has no effect without remaining lvls + res = s.sort_index(level=level, sort_remaining=False) + tm.assert_series_equal(s, res) + + def test_sort_index_kind(self, sort_kind): + # GH#14444 & GH#13589: Add support for sort algo choosing + series = Series(index=[3, 2, 1, 4, 3], dtype=object) + expected_series = Series(index=[1, 2, 3, 3, 4], dtype=object) + + index_sorted_series = series.sort_index(kind=sort_kind) + tm.assert_series_equal(expected_series, index_sorted_series) + + def test_sort_index_na_position(self): + series = Series(index=[3, 2, 1, 4, 3, np.nan], dtype=object) + expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4], dtype=object) + + index_sorted_series = series.sort_index(na_position="first") + tm.assert_series_equal(expected_series_first, index_sorted_series) + + expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan], dtype=object) + + index_sorted_series = series.sort_index(na_position="last") + tm.assert_series_equal(expected_series_last, index_sorted_series) + + def test_sort_index_intervals(self): + s = Series( + [np.nan, 1, 2, 3], IntervalIndex.from_arrays([0, 1, 2, 3], [1, 2, 3, 4]) + ) + + result = s.sort_index() + expected = s + tm.assert_series_equal(result, expected) + + result = s.sort_index(ascending=False) + expected = Series( + [3, 2, 1, np.nan], IntervalIndex.from_arrays([3, 2, 1, 0], [4, 3, 2, 1]) + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize( + "original_list, sorted_list, ascending, ignore_index, output_index", + [ + ([2, 3, 6, 1], [2, 3, 6, 1], True, True, [0, 1, 2, 3]), + ([2, 3, 6, 1], [2, 3, 6, 1], True, False, [0, 1, 2, 3]), + ([2, 3, 6, 1], [1, 6, 3, 2], False, True, [0, 1, 2, 3]), + ([2, 3, 6, 1], [1, 6, 3, 2], False, False, [3, 2, 1, 0]), + ], + ) + def test_sort_index_ignore_index( + self, inplace, original_list, sorted_list, ascending, ignore_index, output_index + ): + # GH 30114 + ser = Series(original_list) + expected = Series(sorted_list, index=output_index) + kwargs = { + "ascending": ascending, + "ignore_index": ignore_index, + "inplace": inplace, + } + + if inplace: + result_ser = ser.copy() + result_ser.sort_index(**kwargs) + else: + result_ser = ser.sort_index(**kwargs) + + tm.assert_series_equal(result_ser, expected) + tm.assert_series_equal(ser, Series(original_list)) + + def test_sort_index_ascending_list(self): + # GH#16934 + + # Set up a Series with a three level MultiIndex + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + [4, 3, 2, 1, 4, 3, 2, 1], + ] + tuples = zip(*arrays) + mi = MultiIndex.from_tuples(tuples, names=["first", "second", "third"]) + ser = Series(range(8), index=mi) + + # Sort with boolean ascending + result = ser.sort_index(level=["third", "first"], ascending=False) + expected = ser.iloc[[4, 0, 5, 1, 6, 2, 7, 3]] + tm.assert_series_equal(result, expected) + + # Sort with list of boolean ascending + result = ser.sort_index(level=["third", "first"], ascending=[False, True]) + expected = ser.iloc[[0, 4, 1, 5, 2, 6, 3, 7]] + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "ascending", + [ + None, + (True, None), + (False, "True"), + ], + ) + def test_sort_index_ascending_bad_value_raises(self, ascending): + ser = Series(range(10), index=[0, 3, 2, 1, 4, 5, 7, 6, 8, 9]) + match = 'For argument "ascending" expected type bool' + with pytest.raises(ValueError, match=match): + ser.sort_index(ascending=ascending) + + +class TestSeriesSortIndexKey: + def test_sort_index_multiindex_key(self): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) + s = Series([1, 2], mi) + backwards = s.iloc[[1, 0]] + + result = s.sort_index(level="C", key=lambda x: -x) + tm.assert_series_equal(s, result) + + result = s.sort_index(level="C", key=lambda x: x) # nothing happens + tm.assert_series_equal(backwards, result) + + def test_sort_index_multiindex_key_multi_level(self): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) + s = Series([1, 2], mi) + backwards = s.iloc[[1, 0]] + + result = s.sort_index(level=["A", "C"], key=lambda x: -x) + tm.assert_series_equal(s, result) + + result = s.sort_index(level=["A", "C"], key=lambda x: x) # nothing happens + tm.assert_series_equal(backwards, result) + + def test_sort_index_key(self): + series = Series(np.arange(6, dtype="int64"), index=list("aaBBca")) + + result = series.sort_index() + expected = series.iloc[[2, 3, 0, 1, 5, 4]] + tm.assert_series_equal(result, expected) + + result = series.sort_index(key=lambda x: x.str.lower()) + expected = series.iloc[[0, 1, 5, 2, 3, 4]] + tm.assert_series_equal(result, expected) + + result = series.sort_index(key=lambda x: x.str.lower(), ascending=False) + expected = series.iloc[[4, 2, 3, 0, 1, 5]] + tm.assert_series_equal(result, expected) + + def test_sort_index_key_int(self): + series = Series(np.arange(6, dtype="int64"), index=np.arange(6, dtype="int64")) + + result = series.sort_index() + tm.assert_series_equal(result, series) + + result = series.sort_index(key=lambda x: -x) + expected = series.sort_index(ascending=False) + tm.assert_series_equal(result, expected) + + result = series.sort_index(key=lambda x: 2 * x) + tm.assert_series_equal(result, series) + + def test_sort_index_kind_key(self, sort_kind, sort_by_key): + # GH #14444 & #13589: Add support for sort algo choosing + series = Series(index=[3, 2, 1, 4, 3], dtype=object) + expected_series = Series(index=[1, 2, 3, 3, 4], dtype=object) + + index_sorted_series = series.sort_index(kind=sort_kind, key=sort_by_key) + tm.assert_series_equal(expected_series, index_sorted_series) + + def test_sort_index_kind_neg_key(self, sort_kind): + # GH #14444 & #13589: Add support for sort algo choosing + series = Series(index=[3, 2, 1, 4, 3], dtype=object) + expected_series = Series(index=[4, 3, 3, 2, 1], dtype=object) + + index_sorted_series = series.sort_index(kind=sort_kind, key=lambda x: -x) + tm.assert_series_equal(expected_series, index_sorted_series) + + def test_sort_index_na_position_key(self, sort_by_key): + series = Series(index=[3, 2, 1, 4, 3, np.nan], dtype=object) + expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4], dtype=object) + + index_sorted_series = series.sort_index(na_position="first", key=sort_by_key) + tm.assert_series_equal(expected_series_first, index_sorted_series) + + expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan], dtype=object) + + index_sorted_series = series.sort_index(na_position="last", key=sort_by_key) + tm.assert_series_equal(expected_series_last, index_sorted_series) + + def test_changes_length_raises(self): + s = Series([1, 2, 3]) + with pytest.raises(ValueError, match="change the shape"): + s.sort_index(key=lambda x: x[:1]) + + def test_sort_values_key_type(self): + s = Series([1, 2, 3], DatetimeIndex(["2008-10-24", "2008-11-23", "2007-12-22"])) + + result = s.sort_index(key=lambda x: x.month) + expected = s.iloc[[0, 1, 2]] + tm.assert_series_equal(result, expected) + + result = s.sort_index(key=lambda x: x.day) + expected = s.iloc[[2, 1, 0]] + tm.assert_series_equal(result, expected) + + result = s.sort_index(key=lambda x: x.year) + expected = s.iloc[[2, 0, 1]] + tm.assert_series_equal(result, expected) + + result = s.sort_index(key=lambda x: x.month_name()) + expected = s.iloc[[2, 1, 0]] + tm.assert_series_equal(result, expected) + + def test_sort_index_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + ser = Series([1, 2, 3]) + msg = ( + r"In a future version of pandas all arguments of Series.sort_index " + r"will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.sort_index(0) + expected = Series([1, 2, 3]) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_sort_values.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_sort_values.py new file mode 100644 index 0000000..adc578d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_sort_values.py @@ -0,0 +1,256 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestSeriesSortValues: + def test_sort_values(self, datetime_series): + + # check indexes are reordered corresponding with the values + ser = Series([3, 2, 4, 1], ["A", "B", "C", "D"]) + expected = Series([1, 2, 3, 4], ["D", "B", "A", "C"]) + result = ser.sort_values() + tm.assert_series_equal(expected, result) + + ts = datetime_series.copy() + ts[:5] = np.NaN + vals = ts.values + + result = ts.sort_values() + assert np.isnan(result[-5:]).all() + tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:])) + + # na_position + result = ts.sort_values(na_position="first") + assert np.isnan(result[:5]).all() + tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:])) + + # something object-type + ser = Series(["A", "B"], [1, 2]) + # no failure + ser.sort_values() + + # ascending=False + ordered = ts.sort_values(ascending=False) + expected = np.sort(ts.dropna().values)[::-1] + tm.assert_almost_equal(expected, ordered.dropna().values) + ordered = ts.sort_values(ascending=False, na_position="first") + tm.assert_almost_equal(expected, ordered.dropna().values) + + # ascending=[False] should behave the same as ascending=False + ordered = ts.sort_values(ascending=[False]) + expected = ts.sort_values(ascending=False) + tm.assert_series_equal(expected, ordered) + ordered = ts.sort_values(ascending=[False], na_position="first") + expected = ts.sort_values(ascending=False, na_position="first") + tm.assert_series_equal(expected, ordered) + + msg = 'For argument "ascending" expected type bool, received type NoneType.' + with pytest.raises(ValueError, match=msg): + ts.sort_values(ascending=None) + msg = r"Length of ascending \(0\) must be 1 for Series" + with pytest.raises(ValueError, match=msg): + ts.sort_values(ascending=[]) + msg = r"Length of ascending \(3\) must be 1 for Series" + with pytest.raises(ValueError, match=msg): + ts.sort_values(ascending=[1, 2, 3]) + msg = r"Length of ascending \(2\) must be 1 for Series" + with pytest.raises(ValueError, match=msg): + ts.sort_values(ascending=[False, False]) + msg = 'For argument "ascending" expected type bool, received type str.' + with pytest.raises(ValueError, match=msg): + ts.sort_values(ascending="foobar") + + # inplace=True + ts = datetime_series.copy() + return_value = ts.sort_values(ascending=False, inplace=True) + assert return_value is None + tm.assert_series_equal(ts, datetime_series.sort_values(ascending=False)) + tm.assert_index_equal( + ts.index, datetime_series.sort_values(ascending=False).index + ) + + # GH#5856/5853 + # Series.sort_values operating on a view + df = DataFrame(np.random.randn(10, 4)) + s = df.iloc[:, 0] + + msg = ( + "This Series is a view of some other array, to sort in-place " + "you must create a copy" + ) + with pytest.raises(ValueError, match=msg): + s.sort_values(inplace=True) + + def test_sort_values_categorical(self): + + c = Categorical(["a", "b", "b", "a"], ordered=False) + cat = Series(c.copy()) + + # sort in the categories order + expected = Series( + Categorical(["a", "a", "b", "b"], ordered=False), index=[0, 3, 1, 2] + ) + result = cat.sort_values() + tm.assert_series_equal(result, expected) + + cat = Series(Categorical(["a", "c", "b", "d"], ordered=True)) + res = cat.sort_values() + exp = np.array(["a", "b", "c", "d"], dtype=np.object_) + tm.assert_numpy_array_equal(res.__array__(), exp) + + cat = Series( + Categorical( + ["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True + ) + ) + res = cat.sort_values() + exp = np.array(["a", "b", "c", "d"], dtype=np.object_) + tm.assert_numpy_array_equal(res.__array__(), exp) + + res = cat.sort_values(ascending=False) + exp = np.array(["d", "c", "b", "a"], dtype=np.object_) + tm.assert_numpy_array_equal(res.__array__(), exp) + + raw_cat1 = Categorical( + ["a", "b", "c", "d"], categories=["a", "b", "c", "d"], ordered=False + ) + raw_cat2 = Categorical( + ["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True + ) + s = ["a", "b", "c", "d"] + df = DataFrame( + {"unsort": raw_cat1, "sort": raw_cat2, "string": s, "values": [1, 2, 3, 4]} + ) + + # Cats must be sorted in a dataframe + res = df.sort_values(by=["string"], ascending=False) + exp = np.array(["d", "c", "b", "a"], dtype=np.object_) + tm.assert_numpy_array_equal(res["sort"].values.__array__(), exp) + assert res["sort"].dtype == "category" + + res = df.sort_values(by=["sort"], ascending=False) + exp = df.sort_values(by=["string"], ascending=True) + tm.assert_series_equal(res["values"], exp["values"]) + assert res["sort"].dtype == "category" + assert res["unsort"].dtype == "category" + + # unordered cat, but we allow this + df.sort_values(by=["unsort"], ascending=False) + + # multi-columns sort + # GH#7848 + df = DataFrame( + {"id": [6, 5, 4, 3, 2, 1], "raw_grade": ["a", "b", "b", "a", "a", "e"]} + ) + df["grade"] = Categorical(df["raw_grade"], ordered=True) + df["grade"] = df["grade"].cat.set_categories(["b", "e", "a"]) + + # sorts 'grade' according to the order of the categories + result = df.sort_values(by=["grade"]) + expected = df.iloc[[1, 2, 5, 0, 3, 4]] + tm.assert_frame_equal(result, expected) + + # multi + result = df.sort_values(by=["grade", "id"]) + expected = df.iloc[[2, 1, 5, 4, 3, 0]] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize( + "original_list, sorted_list, ignore_index, output_index", + [ + ([2, 3, 6, 1], [6, 3, 2, 1], True, [0, 1, 2, 3]), + ([2, 3, 6, 1], [6, 3, 2, 1], False, [2, 1, 0, 3]), + ], + ) + def test_sort_values_ignore_index( + self, inplace, original_list, sorted_list, ignore_index, output_index + ): + # GH 30114 + ser = Series(original_list) + expected = Series(sorted_list, index=output_index) + kwargs = {"ignore_index": ignore_index, "inplace": inplace} + + if inplace: + result_ser = ser.copy() + result_ser.sort_values(ascending=False, **kwargs) + else: + result_ser = ser.sort_values(ascending=False, **kwargs) + + tm.assert_series_equal(result_ser, expected) + tm.assert_series_equal(ser, Series(original_list)) + + def test_sort_values_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + ser = Series([1, 2, 3]) + msg = ( + r"In a future version of pandas all arguments of Series\.sort_values " + r"will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.sort_values(0) + expected = Series([1, 2, 3]) + tm.assert_series_equal(result, expected) + + def test_mergesort_decending_stability(self): + # GH 28697 + s = Series([1, 2, 1, 3], ["first", "b", "second", "c"]) + result = s.sort_values(ascending=False, kind="mergesort") + expected = Series([3, 2, 1, 1], ["c", "b", "first", "second"]) + tm.assert_series_equal(result, expected) + + def test_sort_values_validate_ascending_for_value_error(self): + # GH41634 + ser = Series([23, 7, 21]) + + msg = 'For argument "ascending" expected type bool, received type str.' + with pytest.raises(ValueError, match=msg): + ser.sort_values(ascending="False") + + @pytest.mark.parametrize("ascending", [False, 0, 1, True]) + def test_sort_values_validate_ascending_functional(self, ascending): + # GH41634 + ser = Series([23, 7, 21]) + expected = np.sort(ser.values) + + sorted_ser = ser.sort_values(ascending=ascending) + if not ascending: + expected = expected[::-1] + + result = sorted_ser.values + tm.assert_numpy_array_equal(result, expected) + + +class TestSeriesSortingKey: + def test_sort_values_key(self): + series = Series(np.array(["Hello", "goodbye"])) + + result = series.sort_values(axis=0) + expected = series + tm.assert_series_equal(result, expected) + + result = series.sort_values(axis=0, key=lambda x: x.str.lower()) + expected = series[::-1] + tm.assert_series_equal(result, expected) + + def test_sort_values_key_nan(self): + series = Series(np.array([0, 5, np.nan, 3, 2, np.nan])) + + result = series.sort_values(axis=0) + expected = series.iloc[[0, 4, 3, 1, 2, 5]] + tm.assert_series_equal(result, expected) + + result = series.sort_values(axis=0, key=lambda x: x + 5) + expected = series.iloc[[0, 4, 3, 1, 2, 5]] + tm.assert_series_equal(result, expected) + + result = series.sort_values(axis=0, key=lambda x: -x, ascending=False) + expected = series.iloc[[0, 4, 3, 1, 2, 5]] + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_to_csv.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_to_csv.py new file mode 100644 index 0000000..28519fc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_to_csv.py @@ -0,0 +1,181 @@ +from datetime import datetime +from io import StringIO + +import numpy as np +import pytest + +import pandas as pd +from pandas import Series +import pandas._testing as tm + +from pandas.io.common import get_handle + + +class TestSeriesToCSV: + def read_csv(self, path, **kwargs): + params = {"index_col": 0, "header": None, "parse_dates": True} + params.update(**kwargs) + + header = params.get("header") + out = pd.read_csv(path, **params).squeeze("columns") + + if header is None: + out.name = out.index.name = None + + return out + + def test_from_csv(self, datetime_series, string_series): + # freq doesn't round-trip + datetime_series.index = datetime_series.index._with_freq(None) + + with tm.ensure_clean() as path: + datetime_series.to_csv(path, header=False) + ts = self.read_csv(path) + tm.assert_series_equal(datetime_series, ts, check_names=False) + + assert ts.name is None + assert ts.index.name is None + + # see gh-10483 + datetime_series.to_csv(path, header=True) + ts_h = self.read_csv(path, header=0) + assert ts_h.name == "ts" + + string_series.to_csv(path, header=False) + series = self.read_csv(path) + tm.assert_series_equal(string_series, series, check_names=False) + + assert series.name is None + assert series.index.name is None + + string_series.to_csv(path, header=True) + series_h = self.read_csv(path, header=0) + assert series_h.name == "series" + + with open(path, "w") as outfile: + outfile.write("1998-01-01|1.0\n1999-01-01|2.0") + + series = self.read_csv(path, sep="|") + check_series = Series( + {datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0} + ) + tm.assert_series_equal(check_series, series) + + series = self.read_csv(path, sep="|", parse_dates=False) + check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0}) + tm.assert_series_equal(check_series, series) + + def test_to_csv(self, datetime_series): + + with tm.ensure_clean() as path: + datetime_series.to_csv(path, header=False) + + with open(path, newline=None) as f: + lines = f.readlines() + assert lines[1] != "\n" + + datetime_series.to_csv(path, index=False, header=False) + arr = np.loadtxt(path) + tm.assert_almost_equal(arr, datetime_series.values) + + def test_to_csv_unicode_index(self): + buf = StringIO() + s = Series(["\u05d0", "d2"], index=["\u05d0", "\u05d1"]) + + s.to_csv(buf, encoding="UTF-8", header=False) + buf.seek(0) + + s2 = self.read_csv(buf, index_col=0, encoding="UTF-8") + tm.assert_series_equal(s, s2) + + def test_to_csv_float_format(self): + + with tm.ensure_clean() as filename: + ser = Series([0.123456, 0.234567, 0.567567]) + ser.to_csv(filename, float_format="%.2f", header=False) + + rs = self.read_csv(filename) + xp = Series([0.12, 0.23, 0.57]) + tm.assert_series_equal(rs, xp) + + def test_to_csv_list_entries(self): + s = Series(["jack and jill", "jesse and frank"]) + + split = s.str.split(r"\s+and\s+") + + buf = StringIO() + split.to_csv(buf, header=False) + + def test_to_csv_path_is_none(self): + # GH 8215 + # Series.to_csv() was returning None, inconsistent with + # DataFrame.to_csv() which returned string + s = Series([1, 2, 3]) + csv_str = s.to_csv(path_or_buf=None, header=False) + assert isinstance(csv_str, str) + + @pytest.mark.parametrize( + "s,encoding", + [ + ( + Series([0.123456, 0.234567, 0.567567], index=["A", "B", "C"], name="X"), + None, + ), + # GH 21241, 21118 + (Series(["abc", "def", "ghi"], name="X"), "ascii"), + (Series(["123", "你好", "世界"], name="中文"), "gb2312"), + (Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), "cp737"), + ], + ) + def test_to_csv_compression(self, s, encoding, compression): + + with tm.ensure_clean() as filename: + + s.to_csv(filename, compression=compression, encoding=encoding, header=True) + # test the round trip - to_csv -> read_csv + result = pd.read_csv( + filename, + compression=compression, + encoding=encoding, + index_col=0, + ).squeeze("columns") + tm.assert_series_equal(s, result) + + # test the round trip using file handle - to_csv -> read_csv + with get_handle( + filename, "w", compression=compression, encoding=encoding + ) as handles: + s.to_csv(handles.handle, encoding=encoding, header=True) + + result = pd.read_csv( + filename, + compression=compression, + encoding=encoding, + index_col=0, + ).squeeze("columns") + tm.assert_series_equal(s, result) + + # explicitly ensure file was compressed + with tm.decompress_file(filename, compression) as fh: + text = fh.read().decode(encoding or "utf8") + assert s.name in text + + with tm.decompress_file(filename, compression) as fh: + tm.assert_series_equal( + s, + pd.read_csv(fh, index_col=0, encoding=encoding).squeeze("columns"), + ) + + def test_to_csv_interval_index(self): + # GH 28210 + s = Series(["foo", "bar", "baz"], index=pd.interval_range(0, 3)) + + with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path: + s.to_csv(path, header=False) + result = self.read_csv(path, index_col=0) + + # can't roundtrip intervalindex via read_csv so check string repr (GH 23595) + expected = s.copy() + expected.index = expected.index.astype(str) + + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_to_dict.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_to_dict.py new file mode 100644 index 0000000..4c3d959 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_to_dict.py @@ -0,0 +1,38 @@ +import collections + +import numpy as np +import pytest + +from pandas import Series +import pandas._testing as tm + + +class TestSeriesToDict: + @pytest.mark.parametrize( + "mapping", (dict, collections.defaultdict(list), collections.OrderedDict) + ) + def test_to_dict(self, mapping, datetime_series): + # GH#16122 + result = Series(datetime_series.to_dict(mapping), name="ts") + expected = datetime_series.copy() + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(result, expected) + + from_method = Series(datetime_series.to_dict(collections.Counter)) + from_constructor = Series(collections.Counter(datetime_series.items())) + tm.assert_series_equal(from_method, from_constructor) + + @pytest.mark.parametrize( + "input", + ( + {"a": np.int64(64), "b": 10}, + {"a": np.int64(64), "b": 10, "c": "ABC"}, + {"a": np.uint64(64), "b": 10, "c": "ABC"}, + ), + ) + def test_to_dict_return_types(self, input): + # GH25969 + + d = Series(input).to_dict() + assert isinstance(d["a"], int) + assert isinstance(d["b"], int) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_to_frame.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_to_frame.py new file mode 100644 index 0000000..5f303d0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_to_frame.py @@ -0,0 +1,61 @@ +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm + + +class TestToFrame: + def test_to_frame_respects_name_none(self): + # GH#44212 if we explicitly pass name=None, then that should be respected, + # not changed to 0 + # GH-45448 this is first deprecated to only change in the future + ser = Series(range(3)) + with tm.assert_produces_warning(FutureWarning): + result = ser.to_frame(None) + + # exp_index = Index([None], dtype=object) + exp_index = Index([0]) + tm.assert_index_equal(result.columns, exp_index) + + with tm.assert_produces_warning(FutureWarning): + result = ser.rename("foo").to_frame(None) + exp_index = Index(["foo"], dtype=object) + tm.assert_index_equal(result.columns, exp_index) + + def test_to_frame(self, datetime_series): + datetime_series.name = None + rs = datetime_series.to_frame() + xp = DataFrame(datetime_series.values, index=datetime_series.index) + tm.assert_frame_equal(rs, xp) + + datetime_series.name = "testname" + rs = datetime_series.to_frame() + xp = DataFrame( + {"testname": datetime_series.values}, index=datetime_series.index + ) + tm.assert_frame_equal(rs, xp) + + rs = datetime_series.to_frame(name="testdifferent") + xp = DataFrame( + {"testdifferent": datetime_series.values}, index=datetime_series.index + ) + tm.assert_frame_equal(rs, xp) + + def test_to_frame_expanddim(self): + # GH#9762 + + class SubclassedSeries(Series): + @property + def _constructor_expanddim(self): + return SubclassedFrame + + class SubclassedFrame(DataFrame): + pass + + ser = SubclassedSeries([1, 2, 3], name="X") + result = ser.to_frame() + assert isinstance(result, SubclassedFrame) + expected = SubclassedFrame({"X": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_truncate.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_truncate.py new file mode 100644 index 0000000..a3a27a7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_truncate.py @@ -0,0 +1,65 @@ +from datetime import datetime + +import pandas as pd +from pandas import ( + Series, + date_range, +) +import pandas._testing as tm + + +class TestTruncate: + def test_truncate_datetimeindex_tz(self): + # GH 9243 + idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific") + s = Series(range(len(idx)), index=idx) + with tm.assert_produces_warning(FutureWarning): + # GH#36148 in the future will require tzawareness compat + s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4)) + + lb = idx[1] + ub = idx[3] + result = s.truncate(lb.to_pydatetime(), ub.to_pydatetime()) + expected = Series([1, 2, 3], index=idx[1:4]) + tm.assert_series_equal(result, expected) + + def test_truncate_periodindex(self): + # GH 17717 + idx1 = pd.PeriodIndex( + [pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")] + ) + series1 = Series([1, 2, 3], index=idx1) + result1 = series1.truncate(after="2017-09-02") + + expected_idx1 = pd.PeriodIndex( + [pd.Period("2017-09-02"), pd.Period("2017-09-02")] + ) + tm.assert_series_equal(result1, Series([1, 2], index=expected_idx1)) + + idx2 = pd.PeriodIndex( + [pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")] + ) + series2 = Series([1, 2, 3], index=idx2) + result2 = series2.sort_index().truncate(after="2017-09-02") + + expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")]) + tm.assert_series_equal(result2, Series([2], index=expected_idx2)) + + def test_truncate_one_element_series(self): + # GH 35544 + series = Series([0.1], index=pd.DatetimeIndex(["2020-08-04"])) + before = pd.Timestamp("2020-08-02") + after = pd.Timestamp("2020-08-04") + + result = series.truncate(before=before, after=after) + + # the input Series and the expected Series are the same + tm.assert_series_equal(result, series) + + def test_truncate_index_only_one_unique_value(self): + # GH 42365 + obj = Series(0, index=date_range("2021-06-30", "2021-06-30")).repeat(5) + + truncated = obj.truncate("2021-06-28", "2021-07-01") + + tm.assert_series_equal(truncated, obj) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_tz_localize.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_tz_localize.py new file mode 100644 index 0000000..b8a1ea5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_tz_localize.py @@ -0,0 +1,113 @@ +import pytest +import pytz + +from pandas._libs.tslibs import timezones + +from pandas import ( + DatetimeIndex, + NaT, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestTZLocalize: + def test_series_tz_localize_ambiguous_bool(self): + # make sure that we are correctly accepting bool values as ambiguous + + # GH#14402 + ts = Timestamp("2015-11-01 01:00:03") + expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central") + expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central") + + ser = Series([ts]) + expected0 = Series([expected0]) + expected1 = Series([expected1]) + + with tm.external_error_raised(pytz.AmbiguousTimeError): + ser.dt.tz_localize("US/Central") + + result = ser.dt.tz_localize("US/Central", ambiguous=True) + tm.assert_series_equal(result, expected0) + + result = ser.dt.tz_localize("US/Central", ambiguous=[True]) + tm.assert_series_equal(result, expected0) + + result = ser.dt.tz_localize("US/Central", ambiguous=False) + tm.assert_series_equal(result, expected1) + + result = ser.dt.tz_localize("US/Central", ambiguous=[False]) + tm.assert_series_equal(result, expected1) + + def test_series_tz_localize_matching_index(self): + # Matching the index of the result with that of the original series + # GH 43080 + dt_series = Series( + date_range(start="2021-01-01T02:00:00", periods=5, freq="1D"), + index=[2, 6, 7, 8, 11], + dtype="category", + ) + result = dt_series.dt.tz_localize("Europe/Berlin") + expected = Series( + date_range( + start="2021-01-01T02:00:00", periods=5, freq="1D", tz="Europe/Berlin" + ), + index=[2, 6, 7, 8, 11], + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) + @pytest.mark.parametrize( + "method, exp", + [ + ["shift_forward", "2015-03-29 03:00:00"], + ["NaT", NaT], + ["raise", None], + ["foo", "invalid"], + ], + ) + def test_tz_localize_nonexistent(self, tz, method, exp): + # GH 8917 + n = 60 + dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min") + ser = Series(1, index=dti) + df = ser.to_frame() + + if method == "raise": + + with tm.external_error_raised(pytz.NonExistentTimeError): + dti.tz_localize(tz, nonexistent=method) + with tm.external_error_raised(pytz.NonExistentTimeError): + ser.tz_localize(tz, nonexistent=method) + with tm.external_error_raised(pytz.NonExistentTimeError): + df.tz_localize(tz, nonexistent=method) + + elif exp == "invalid": + with pytest.raises(ValueError, match="argument must be one of"): + dti.tz_localize(tz, nonexistent=method) + with pytest.raises(ValueError, match="argument must be one of"): + ser.tz_localize(tz, nonexistent=method) + with pytest.raises(ValueError, match="argument must be one of"): + df.tz_localize(tz, nonexistent=method) + + else: + result = ser.tz_localize(tz, nonexistent=method) + expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz)) + tm.assert_series_equal(result, expected) + + result = df.tz_localize(tz, nonexistent=method) + expected = expected.to_frame() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_series_tz_localize_empty(self, tzstr): + # GH#2248 + ser = Series(dtype=object) + + ser2 = ser.tz_localize("utc") + assert ser2.index.tz == pytz.utc + + ser2 = ser.tz_localize(tzstr) + timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_unique.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_unique.py new file mode 100644 index 0000000..856fe6e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_unique.py @@ -0,0 +1,52 @@ +import numpy as np + +from pandas import ( + Categorical, + Series, +) +import pandas._testing as tm + + +class TestUnique: + def test_unique_data_ownership(self): + # it works! GH#1807 + Series(Series(["a", "c", "b"]).unique()).sort_values() + + def test_unique(self): + # GH#714 also, dtype=float + ser = Series([1.2345] * 100) + ser[::2] = np.nan + result = ser.unique() + assert len(result) == 2 + + # explicit f4 dtype + ser = Series([1.2345] * 100, dtype="f4") + ser[::2] = np.nan + result = ser.unique() + assert len(result) == 2 + + def test_unique_nan_object_dtype(self): + # NAs in object arrays GH#714 + ser = Series(["foo"] * 100, dtype="O") + ser[::2] = np.nan + result = ser.unique() + assert len(result) == 2 + + def test_unique_none(self): + # decision about None + ser = Series([1, 2, 3, None, None, None], dtype=object) + result = ser.unique() + expected = np.array([1, 2, 3, None], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_unique_categorical(self): + # GH#18051 + cat = Categorical([]) + ser = Series(cat) + result = ser.unique() + tm.assert_categorical_equal(result, cat) + + cat = Categorical([np.nan]) + ser = Series(cat) + result = ser.unique() + tm.assert_categorical_equal(result, cat) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_unstack.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_unstack.py new file mode 100644 index 0000000..23b0682 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_unstack.py @@ -0,0 +1,149 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + Series, +) +import pandas._testing as tm + + +def test_unstack_preserves_object(): + mi = MultiIndex.from_product([["bar", "foo"], ["one", "two"]]) + + ser = Series(np.arange(4.0), index=mi, dtype=object) + + res1 = ser.unstack() + assert (res1.dtypes == object).all() + + res2 = ser.unstack(level=0) + assert (res2.dtypes == object).all() + + +def test_unstack(): + index = MultiIndex( + levels=[["bar", "foo"], ["one", "three", "two"]], + codes=[[1, 1, 0, 0], [0, 1, 0, 2]], + ) + + s = Series(np.arange(4.0), index=index) + unstacked = s.unstack() + + expected = DataFrame( + [[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]], + index=["bar", "foo"], + columns=["one", "three", "two"], + ) + + tm.assert_frame_equal(unstacked, expected) + + unstacked = s.unstack(level=0) + tm.assert_frame_equal(unstacked, expected.T) + + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + s = Series(np.random.randn(6), index=index) + exp_index = MultiIndex( + levels=[["one", "two", "three"], [0, 1]], + codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0) + unstacked = s.unstack(0).sort_index() + tm.assert_frame_equal(unstacked, expected) + + # GH5873 + idx = MultiIndex.from_arrays([[101, 102], [3.5, np.nan]]) + ts = Series([1, 2], index=idx) + left = ts.unstack() + right = DataFrame( + [[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5] + ) + tm.assert_frame_equal(left, right) + + idx = MultiIndex.from_arrays( + [ + ["cat", "cat", "cat", "dog", "dog"], + ["a", "a", "b", "a", "b"], + [1, 2, 1, 1, np.nan], + ] + ) + ts = Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx) + right = DataFrame( + [[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]], + columns=["cat", "dog"], + ) + tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)] + right.index = MultiIndex.from_tuples(tpls) + tm.assert_frame_equal(ts.unstack(level=0), right) + + +def test_unstack_tuplename_in_multiindex(): + # GH 19966 + idx = MultiIndex.from_product( + [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")] + ) + ser = Series(1, index=idx) + result = ser.unstack(("A", "a")) + + expected = DataFrame( + [[1, 1, 1], [1, 1, 1], [1, 1, 1]], + columns=MultiIndex.from_tuples([("a",), ("b",), ("c",)], names=[("A", "a")]), + index=pd.Index([1, 2, 3], name=("B", "b")), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "unstack_idx, expected_values, expected_index, expected_columns", + [ + ( + ("A", "a"), + [[1, 1], [1, 1], [1, 1], [1, 1]], + MultiIndex.from_tuples([(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]), + MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]), + ), + ( + (("A", "a"), "B"), + [[1, 1, 1, 1], [1, 1, 1, 1]], + pd.Index([3, 4], name="C"), + MultiIndex.from_tuples( + [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"] + ), + ), + ], +) +def test_unstack_mixed_type_name_in_multiindex( + unstack_idx, expected_values, expected_index, expected_columns +): + # GH 19966 + idx = MultiIndex.from_product( + [["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"] + ) + ser = Series(1, index=idx) + result = ser.unstack(unstack_idx) + + expected = DataFrame( + expected_values, columns=expected_columns, index=expected_index + ) + tm.assert_frame_equal(result, expected) + + +def test_unstack_multi_index_categorical_values(): + + mi = tm.makeTimeDataFrame().stack().index.rename(["major", "minor"]) + ser = Series(["foo"] * len(mi), index=mi, name="category", dtype="category") + + result = ser.unstack() + + dti = ser.index.levels[0] + c = pd.Categorical(["foo"] * len(dti)) + expected = DataFrame( + {"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()}, + columns=pd.Index(list("ABCD"), name="minor"), + index=dti.rename("major"), + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_update.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_update.py new file mode 100644 index 0000000..d9d6641 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_update.py @@ -0,0 +1,129 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + CategoricalDtype, + DataFrame, + NaT, + Series, + Timestamp, +) +import pandas._testing as tm + + +class TestUpdate: + def test_update(self): + s = Series([1.5, np.nan, 3.0, 4.0, np.nan]) + s2 = Series([np.nan, 3.5, np.nan, 5.0]) + s.update(s2) + + expected = Series([1.5, 3.5, 3.0, 5.0, np.nan]) + tm.assert_series_equal(s, expected) + + # GH 3217 + df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) + df["c"] = np.nan + + df["c"].update(Series(["foo"], index=[0])) + expected = DataFrame( + [[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"] + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "other, dtype, expected", + [ + # other is int + ([61, 63], "int32", Series([10, 61, 12], dtype="int32")), + ([61, 63], "int64", Series([10, 61, 12])), + ([61, 63], float, Series([10.0, 61.0, 12.0])), + ([61, 63], object, Series([10, 61, 12], dtype=object)), + # other is float, but can be cast to int + ([61.0, 63.0], "int32", Series([10, 61, 12], dtype="int32")), + ([61.0, 63.0], "int64", Series([10, 61, 12])), + ([61.0, 63.0], float, Series([10.0, 61.0, 12.0])), + ([61.0, 63.0], object, Series([10, 61.0, 12], dtype=object)), + # others is float, cannot be cast to int + ([61.1, 63.1], "int32", Series([10.0, 61.1, 12.0])), + ([61.1, 63.1], "int64", Series([10.0, 61.1, 12.0])), + ([61.1, 63.1], float, Series([10.0, 61.1, 12.0])), + ([61.1, 63.1], object, Series([10, 61.1, 12], dtype=object)), + # other is object, cannot be cast + ([(61,), (63,)], "int32", Series([10, (61,), 12])), + ([(61,), (63,)], "int64", Series([10, (61,), 12])), + ([(61,), (63,)], float, Series([10.0, (61,), 12.0])), + ([(61,), (63,)], object, Series([10, (61,), 12])), + ], + ) + def test_update_dtypes(self, other, dtype, expected): + + ser = Series([10, 11, 12], dtype=dtype) + other = Series(other, index=[1, 3]) + ser.update(other) + + tm.assert_series_equal(ser, expected) + + @pytest.mark.parametrize( + "series, other, expected", + [ + # update by key + ( + Series({"a": 1, "b": 2, "c": 3, "d": 4}), + {"b": 5, "c": np.nan}, + Series({"a": 1, "b": 5, "c": 3, "d": 4}), + ), + # update by position + (Series([1, 2, 3, 4]), [np.nan, 5, 1], Series([1, 5, 1, 4])), + ], + ) + def test_update_from_non_series(self, series, other, expected): + # GH 33215 + series.update(other) + tm.assert_series_equal(series, expected) + + @pytest.mark.parametrize( + "data, other, expected, dtype", + [ + (["a", None], [None, "b"], ["a", "b"], "string[python]"), + pytest.param( + ["a", None], + [None, "b"], + ["a", "b"], + "string[pyarrow]", + marks=td.skip_if_no("pyarrow", min_version="1.0.0"), + ), + ([1, None], [None, 2], [1, 2], "Int64"), + ([True, None], [None, False], [True, False], "boolean"), + ( + ["a", None], + [None, "b"], + ["a", "b"], + CategoricalDtype(categories=["a", "b"]), + ), + ( + [Timestamp(year=2020, month=1, day=1, tz="Europe/London"), NaT], + [NaT, Timestamp(year=2020, month=1, day=1, tz="Europe/London")], + [Timestamp(year=2020, month=1, day=1, tz="Europe/London")] * 2, + "datetime64[ns, Europe/London]", + ), + ], + ) + def test_update_extension_array_series(self, data, other, expected, dtype): + result = Series(data, dtype=dtype) + other = Series(other, dtype=dtype) + expected = Series(expected, dtype=dtype) + + result.update(other) + tm.assert_series_equal(result, expected) + + def test_update_with_categorical_type(self): + # GH 25744 + dtype = CategoricalDtype(["a", "b", "c", "d"]) + s1 = Series(["a", "b", "c"], index=[1, 2, 3], dtype=dtype) + s2 = Series(["b", "a"], index=[1, 2], dtype=dtype) + s1.update(s2) + result = s1 + expected = Series(["b", "a", "c"], index=[1, 2, 3], dtype=dtype) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_value_counts.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_value_counts.py new file mode 100644 index 0000000..c914dba --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_value_counts.py @@ -0,0 +1,228 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + Series, +) +import pandas._testing as tm + + +class TestSeriesValueCounts: + def test_value_counts_datetime(self): + # most dtypes are tested in tests/base + values = [ + pd.Timestamp("2011-01-01 09:00"), + pd.Timestamp("2011-01-01 10:00"), + pd.Timestamp("2011-01-01 11:00"), + pd.Timestamp("2011-01-01 09:00"), + pd.Timestamp("2011-01-01 09:00"), + pd.Timestamp("2011-01-01 11:00"), + ] + + exp_idx = pd.DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"] + ) + exp = Series([3, 2, 1], index=exp_idx, name="xxx") + + ser = Series(values, name="xxx") + tm.assert_series_equal(ser.value_counts(), exp) + # check DatetimeIndex outputs the same result + idx = pd.DatetimeIndex(values, name="xxx") + tm.assert_series_equal(idx.value_counts(), exp) + + # normalize + exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) + + def test_value_counts_datetime_tz(self): + values = [ + pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"), + pd.Timestamp("2011-01-01 10:00", tz="US/Eastern"), + pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"), + pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"), + pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"), + pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"), + ] + + exp_idx = pd.DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"], + tz="US/Eastern", + ) + exp = Series([3, 2, 1], index=exp_idx, name="xxx") + + ser = Series(values, name="xxx") + tm.assert_series_equal(ser.value_counts(), exp) + idx = pd.DatetimeIndex(values, name="xxx") + tm.assert_series_equal(idx.value_counts(), exp) + + exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) + + def test_value_counts_period(self): + values = [ + pd.Period("2011-01", freq="M"), + pd.Period("2011-02", freq="M"), + pd.Period("2011-03", freq="M"), + pd.Period("2011-01", freq="M"), + pd.Period("2011-01", freq="M"), + pd.Period("2011-03", freq="M"), + ] + + exp_idx = pd.PeriodIndex(["2011-01", "2011-03", "2011-02"], freq="M") + exp = Series([3, 2, 1], index=exp_idx, name="xxx") + + ser = Series(values, name="xxx") + tm.assert_series_equal(ser.value_counts(), exp) + # check DatetimeIndex outputs the same result + idx = pd.PeriodIndex(values, name="xxx") + tm.assert_series_equal(idx.value_counts(), exp) + + # normalize + exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) + + def test_value_counts_categorical_ordered(self): + # most dtypes are tested in tests/base + values = Categorical([1, 2, 3, 1, 1, 3], ordered=True) + + exp_idx = CategoricalIndex([1, 3, 2], categories=[1, 2, 3], ordered=True) + exp = Series([3, 2, 1], index=exp_idx, name="xxx") + + ser = Series(values, name="xxx") + tm.assert_series_equal(ser.value_counts(), exp) + # check CategoricalIndex outputs the same result + idx = CategoricalIndex(values, name="xxx") + tm.assert_series_equal(idx.value_counts(), exp) + + # normalize + exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) + + def test_value_counts_categorical_not_ordered(self): + values = Categorical([1, 2, 3, 1, 1, 3], ordered=False) + + exp_idx = CategoricalIndex([1, 3, 2], categories=[1, 2, 3], ordered=False) + exp = Series([3, 2, 1], index=exp_idx, name="xxx") + + ser = Series(values, name="xxx") + tm.assert_series_equal(ser.value_counts(), exp) + # check CategoricalIndex outputs the same result + idx = CategoricalIndex(values, name="xxx") + tm.assert_series_equal(idx.value_counts(), exp) + + # normalize + exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) + + def test_value_counts_categorical(self): + # GH#12835 + cats = Categorical(list("abcccb"), categories=list("cabd")) + ser = Series(cats, name="xxx") + res = ser.value_counts(sort=False) + + exp_index = CategoricalIndex(list("cabd"), categories=cats.categories) + exp = Series([3, 1, 2, 0], name="xxx", index=exp_index) + tm.assert_series_equal(res, exp) + + res = ser.value_counts(sort=True) + + exp_index = CategoricalIndex(list("cbad"), categories=cats.categories) + exp = Series([3, 2, 1, 0], name="xxx", index=exp_index) + tm.assert_series_equal(res, exp) + + # check object dtype handles the Series.name as the same + # (tested in tests/base) + ser = Series(["a", "b", "c", "c", "c", "b"], name="xxx") + res = ser.value_counts() + exp = Series([3, 2, 1], name="xxx", index=["c", "b", "a"]) + tm.assert_series_equal(res, exp) + + def test_value_counts_categorical_with_nan(self): + # see GH#9443 + + # sanity check + ser = Series(["a", "b", "a"], dtype="category") + exp = Series([2, 1], index=CategoricalIndex(["a", "b"])) + + res = ser.value_counts(dropna=True) + tm.assert_series_equal(res, exp) + + res = ser.value_counts(dropna=True) + tm.assert_series_equal(res, exp) + + # same Series via two different constructions --> same behaviour + series = [ + Series(["a", "b", None, "a", None, None], dtype="category"), + Series( + Categorical(["a", "b", None, "a", None, None], categories=["a", "b"]) + ), + ] + + for ser in series: + # None is a NaN value, so we exclude its count here + exp = Series([2, 1], index=CategoricalIndex(["a", "b"])) + res = ser.value_counts(dropna=True) + tm.assert_series_equal(res, exp) + + # we don't exclude the count of None and sort by counts + exp = Series([3, 2, 1], index=CategoricalIndex([np.nan, "a", "b"])) + res = ser.value_counts(dropna=False) + tm.assert_series_equal(res, exp) + + # When we aren't sorting by counts, and np.nan isn't a + # category, it should be last. + exp = Series([2, 1, 3], index=CategoricalIndex(["a", "b", np.nan])) + res = ser.value_counts(dropna=False, sort=False) + tm.assert_series_equal(res, exp) + + @pytest.mark.parametrize( + "ser, dropna, exp", + [ + ( + Series([False, True, True, pd.NA]), + False, + Series([2, 1, 1], index=[True, False, pd.NA]), + ), + ( + Series([False, True, True, pd.NA]), + True, + Series([2, 1], index=[True, False]), + ), + ( + Series(range(3), index=[True, False, np.nan]).index, + False, + Series([1, 1, 1], index=[True, False, np.nan]), + ), + ], + ) + def test_value_counts_bool_with_nan(self, ser, dropna, exp): + # GH32146 + out = ser.value_counts(dropna=dropna) + tm.assert_series_equal(out, exp) + + @pytest.mark.parametrize( + "input_array,expected", + [ + ( + [1 + 1j, 1 + 1j, 1, 3j, 3j, 3j], + Series([3, 2, 1], index=pd.Index([3j, 1 + 1j, 1], dtype=np.complex128)), + ), + ( + [1 + 1j, 1 + 1j, 1, 3j, 3j, 3j], + Series([3, 2, 1], index=pd.Index([3j, 1 + 1j, 1], dtype=np.complex64)), + ), + ], + ) + def test_value_counts_complex_numbers(self, input_array, expected): + # GH 17927 + # Complex Index dtype is cast to object + result = Series(input_array).value_counts() + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_values.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_values.py new file mode 100644 index 0000000..479c703 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_values.py @@ -0,0 +1,29 @@ +import numpy as np +import pytest + +from pandas import ( + IntervalIndex, + Series, + period_range, +) +import pandas._testing as tm + + +class TestValues: + @pytest.mark.parametrize( + "data", + [ + period_range("2000", periods=4), + IntervalIndex.from_breaks([1, 2, 3, 4]), + ], + ) + def test_values_object_extension_dtypes(self, data): + # https://github.com/pandas-dev/pandas/issues/23995 + result = Series(data).values + expected = np.array(data.astype(object)) + tm.assert_numpy_array_equal(result, expected) + + def test_values(self, datetime_series): + tm.assert_almost_equal( + datetime_series.values, datetime_series, check_dtype=False + ) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_view.py b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_view.py new file mode 100644 index 0000000..22902c8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/methods/test_view.py @@ -0,0 +1,58 @@ +import numpy as np +import pytest + +from pandas import ( + Index, + Series, + array, + date_range, +) +import pandas._testing as tm + + +class TestView: + def test_view_i8_to_datetimelike(self): + dti = date_range("2000", periods=4, tz="US/Central") + ser = Series(dti.asi8) + + result = ser.view(dti.dtype) + tm.assert_datetime_array_equal(result._values, dti._data._with_freq(None)) + + pi = dti.tz_localize(None).to_period("D") + ser = Series(pi.asi8) + result = ser.view(pi.dtype) + tm.assert_period_array_equal(result._values, pi._data) + + def test_view_tz(self): + # GH#24024 + ser = Series(date_range("2000", periods=4, tz="US/Central")) + result = ser.view("i8") + expected = Series( + [ + 946706400000000000, + 946792800000000000, + 946879200000000000, + 946965600000000000, + ] + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "first", ["m8[ns]", "M8[ns]", "M8[ns, US/Central]", "period[D]"] + ) + @pytest.mark.parametrize( + "second", ["m8[ns]", "M8[ns]", "M8[ns, US/Central]", "period[D]"] + ) + @pytest.mark.parametrize("box", [Series, Index, array]) + def test_view_between_datetimelike(self, first, second, box): + + dti = date_range("2016-01-01", periods=3) + + orig = box(dti) + obj = orig.view(first) + assert obj.dtype == first + tm.assert_numpy_array_equal(np.asarray(obj.view("i8")), dti.asi8) + + res = obj.view(second) + assert res.dtype == second + tm.assert_numpy_array_equal(np.asarray(obj.view("i8")), dti.asi8) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/test_api.py b/.local/lib/python3.8/site-packages/pandas/tests/series/test_api.py new file mode 100644 index 0000000..146663d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/test_api.py @@ -0,0 +1,205 @@ +import inspect +import pydoc + +import numpy as np +import pytest + +from pandas.util._test_decorators import skip_if_no + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + date_range, +) +import pandas._testing as tm + + +class TestSeriesMisc: + def test_tab_completion(self): + # GH 9910 + s = Series(list("abcd")) + # Series of str values should have .str but not .dt/.cat in __dir__ + assert "str" in dir(s) + assert "dt" not in dir(s) + assert "cat" not in dir(s) + + def test_tab_completion_dt(self): + # similarly for .dt + s = Series(date_range("1/1/2015", periods=5)) + assert "dt" in dir(s) + assert "str" not in dir(s) + assert "cat" not in dir(s) + + def test_tab_completion_cat(self): + # Similarly for .cat, but with the twist that str and dt should be + # there if the categories are of that type first cat and str. + s = Series(list("abbcd"), dtype="category") + assert "cat" in dir(s) + assert "str" in dir(s) # as it is a string categorical + assert "dt" not in dir(s) + + def test_tab_completion_cat_str(self): + # similar to cat and str + s = Series(date_range("1/1/2015", periods=5)).astype("category") + assert "cat" in dir(s) + assert "str" not in dir(s) + assert "dt" in dir(s) # as it is a datetime categorical + + def test_tab_completion_with_categorical(self): + # test the tab completion display + ok_for_cat = [ + "categories", + "codes", + "ordered", + "set_categories", + "add_categories", + "remove_categories", + "rename_categories", + "reorder_categories", + "remove_unused_categories", + "as_ordered", + "as_unordered", + ] + + s = Series(list("aabbcde")).astype("category") + results = sorted({r for r in s.cat.__dir__() if not r.startswith("_")}) + tm.assert_almost_equal(results, sorted(set(ok_for_cat))) + + @pytest.mark.parametrize( + "index", + [ + tm.makeUnicodeIndex(10), + tm.makeStringIndex(10), + tm.makeCategoricalIndex(10), + Index(["foo", "bar", "baz"] * 2), + tm.makeDateIndex(10), + tm.makePeriodIndex(10), + tm.makeTimedeltaIndex(10), + tm.makeIntIndex(10), + tm.makeUIntIndex(10), + tm.makeIntIndex(10), + tm.makeFloatIndex(10), + Index([True, False]), + Index([f"a{i}" for i in range(101)]), + pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")), + pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")), + ], + ) + def test_index_tab_completion(self, index): + # dir contains string-like values of the Index. + s = Series(index=index, dtype=object) + dir_s = dir(s) + for i, x in enumerate(s.index.unique(level=0)): + if i < 100: + assert not isinstance(x, str) or not x.isidentifier() or x in dir_s + else: + assert x not in dir_s + + @pytest.mark.parametrize("ser", [Series(dtype=object), Series([1])]) + def test_not_hashable(self, ser): + msg = "unhashable type: 'Series'" + with pytest.raises(TypeError, match=msg): + hash(ser) + + def test_contains(self, datetime_series): + tm.assert_contains_all(datetime_series.index, datetime_series) + + def test_axis_alias(self): + s = Series([1, 2, np.nan]) + tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index")) + assert s.dropna().sum("rows") == 3 + assert s._get_axis_number("rows") == 0 + assert s._get_axis_name("rows") == "index" + + def test_class_axis(self): + # https://github.com/pandas-dev/pandas/issues/18147 + # no exception and no empty docstring + assert pydoc.getdoc(Series.index) + + def test_ndarray_compat(self): + + # test numpy compat with Series as sub-class of NDFrame + tsdf = DataFrame( + np.random.randn(1000, 3), + columns=["A", "B", "C"], + index=date_range("1/1/2000", periods=1000), + ) + + def f(x): + return x[x.idxmax()] + + result = tsdf.apply(f) + expected = tsdf.max() + tm.assert_series_equal(result, expected) + + def test_ndarray_compat_like_func(self): + # using an ndarray like function + s = Series(np.random.randn(10)) + result = Series(np.ones_like(s)) + expected = Series(1, index=range(10), dtype="float64") + tm.assert_series_equal(result, expected) + + def test_ndarray_compat_ravel(self): + # ravel + s = Series(np.random.randn(10)) + tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F")) + + def test_empty_method(self): + s_empty = Series(dtype=object) + assert s_empty.empty + + @pytest.mark.parametrize("dtype", ["int64", object]) + def test_empty_method_full_series(self, dtype): + full_series = Series(index=[1], dtype=dtype) + assert not full_series.empty + + @pytest.mark.parametrize("dtype", [None, "Int64"]) + def test_integer_series_size(self, dtype): + # GH 25580 + s = Series(range(9), dtype=dtype) + assert s.size == 9 + + def test_attrs(self): + s = Series([0, 1], name="abc") + assert s.attrs == {} + s.attrs["version"] = 1 + result = s + 1 + assert result.attrs == {"version": 1} + + @skip_if_no("jinja2") + def test_inspect_getmembers(self): + # GH38782 + ser = Series(dtype=object) + with tm.assert_produces_warning(None): + inspect.getmembers(ser) + + def test_unknown_attribute(self): + # GH#9680 + tdi = pd.timedelta_range(start=0, periods=10, freq="1s") + ser = Series(np.random.normal(size=10), index=tdi) + assert "foo" not in ser.__dict__.keys() + msg = "'Series' object has no attribute 'foo'" + with pytest.raises(AttributeError, match=msg): + ser.foo + + @pytest.mark.parametrize("op", ["year", "day", "second", "weekday"]) + def test_datetime_series_no_datelike_attrs(self, op, datetime_series): + # GH#7206 + msg = f"'Series' object has no attribute '{op}'" + with pytest.raises(AttributeError, match=msg): + getattr(datetime_series, op) + + def test_series_datetimelike_attribute_access(self): + # attribute access should still work! + ser = Series({"year": 2000, "month": 1, "day": 10}) + assert ser.year == 2000 + assert ser.month == 1 + assert ser.day == 10 + + def test_series_datetimelike_attribute_access_invalid(self): + ser = Series({"year": 2000, "month": 1, "day": 10}) + msg = "'Series' object has no attribute 'weekday'" + with pytest.raises(AttributeError, match=msg): + ser.weekday diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/test_arithmetic.py b/.local/lib/python3.8/site-packages/pandas/tests/series/test_arithmetic.py new file mode 100644 index 0000000..5fbb427 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/test_arithmetic.py @@ -0,0 +1,894 @@ +from datetime import timedelta +import operator + +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import IncompatibleFrequency + +from pandas.core.dtypes.common import ( + is_datetime64_dtype, + is_datetime64tz_dtype, +) + +import pandas as pd +from pandas import ( + Categorical, + Index, + Series, + Timedelta, + bdate_range, + date_range, + isna, +) +import pandas._testing as tm +from pandas.core import ( + nanops, + ops, +) +from pandas.core.computation import expressions as expr + + +@pytest.fixture( + autouse=True, scope="module", params=[0, 1000000], ids=["numexpr", "python"] +) +def switch_numexpr_min_elements(request): + _MIN_ELEMENTS = expr._MIN_ELEMENTS + expr._MIN_ELEMENTS = request.param + yield request.param + expr._MIN_ELEMENTS = _MIN_ELEMENTS + + +def _permute(obj): + return obj.take(np.random.permutation(len(obj))) + + +class TestSeriesFlexArithmetic: + @pytest.mark.parametrize( + "ts", + [ + (lambda x: x, lambda x: x * 2, False), + (lambda x: x, lambda x: x[::2], False), + (lambda x: x, lambda x: 5, True), + (lambda x: tm.makeFloatSeries(), lambda x: tm.makeFloatSeries(), True), + ], + ) + @pytest.mark.parametrize( + "opname", ["add", "sub", "mul", "floordiv", "truediv", "pow"] + ) + def test_flex_method_equivalence(self, opname, ts): + # check that Series.{opname} behaves like Series.__{opname}__, + tser = tm.makeTimeSeries().rename("ts") + + series = ts[0](tser) + other = ts[1](tser) + check_reverse = ts[2] + + op = getattr(Series, opname) + alt = getattr(operator, opname) + + result = op(series, other) + expected = alt(series, other) + tm.assert_almost_equal(result, expected) + if check_reverse: + rop = getattr(Series, "r" + opname) + result = rop(series, other) + expected = alt(other, series) + tm.assert_almost_equal(result, expected) + + def test_flex_method_subclass_metadata_preservation(self, all_arithmetic_operators): + # GH 13208 + class MySeries(Series): + _metadata = ["x"] + + @property + def _constructor(self): + return MySeries + + opname = all_arithmetic_operators + op = getattr(Series, opname) + m = MySeries([1, 2, 3], name="test") + m.x = 42 + result = op(m, 1) + assert result.x == 42 + + def test_flex_add_scalar_fill_value(self): + # GH12723 + ser = Series([0, 1, np.nan, 3, 4, 5]) + + exp = ser.fillna(0).add(2) + res = ser.add(2, fill_value=0) + tm.assert_series_equal(res, exp) + + pairings = [(Series.div, operator.truediv, 1), (Series.rdiv, ops.rtruediv, 1)] + for op in ["add", "sub", "mul", "pow", "truediv", "floordiv"]: + fv = 0 + lop = getattr(Series, op) + lequiv = getattr(operator, op) + rop = getattr(Series, "r" + op) + # bind op at definition time... + requiv = lambda x, y, op=op: getattr(operator, op)(y, x) + pairings.append((lop, lequiv, fv)) + pairings.append((rop, requiv, fv)) + + @pytest.mark.parametrize("op, equiv_op, fv", pairings) + def test_operators_combine(self, op, equiv_op, fv): + def _check_fill(meth, op, a, b, fill_value=0): + exp_index = a.index.union(b.index) + a = a.reindex(exp_index) + b = b.reindex(exp_index) + + amask = isna(a) + bmask = isna(b) + + exp_values = [] + for i in range(len(exp_index)): + with np.errstate(all="ignore"): + if amask[i]: + if bmask[i]: + exp_values.append(np.nan) + continue + exp_values.append(op(fill_value, b[i])) + elif bmask[i]: + if amask[i]: + exp_values.append(np.nan) + continue + exp_values.append(op(a[i], fill_value)) + else: + exp_values.append(op(a[i], b[i])) + + result = meth(a, b, fill_value=fill_value) + expected = Series(exp_values, exp_index) + tm.assert_series_equal(result, expected) + + a = Series([np.nan, 1.0, 2.0, 3.0, np.nan], index=np.arange(5)) + b = Series([np.nan, 1, np.nan, 3, np.nan, 4.0], index=np.arange(6)) + + result = op(a, b) + exp = equiv_op(a, b) + tm.assert_series_equal(result, exp) + _check_fill(op, equiv_op, a, b, fill_value=fv) + # should accept axis=0 or axis='rows' + op(a, b, axis=0) + + +class TestSeriesArithmetic: + # Some of these may end up in tests/arithmetic, but are not yet sorted + + def test_add_series_with_period_index(self): + rng = pd.period_range("1/1/2000", "1/1/2010", freq="A") + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts + ts[::2] + expected = ts + ts + expected.iloc[1::2] = np.nan + tm.assert_series_equal(result, expected) + + result = ts + _permute(ts[::2]) + tm.assert_series_equal(result, expected) + + msg = "Input has different freq=D from Period\\(freq=A-DEC\\)" + with pytest.raises(IncompatibleFrequency, match=msg): + ts + ts.asfreq("D", how="end") + + @pytest.mark.parametrize( + "target_add,input_value,expected_value", + [ + ("!", ["hello", "world"], ["hello!", "world!"]), + ("m", ["hello", "world"], ["hellom", "worldm"]), + ], + ) + def test_string_addition(self, target_add, input_value, expected_value): + # GH28658 - ensure adding 'm' does not raise an error + a = Series(input_value) + + result = a + target_add + expected = Series(expected_value) + tm.assert_series_equal(result, expected) + + def test_divmod(self): + # GH#25557 + a = Series([1, 1, 1, np.nan], index=["a", "b", "c", "d"]) + b = Series([2, np.nan, 1, np.nan], index=["a", "b", "d", "e"]) + + result = a.divmod(b) + expected = divmod(a, b) + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + result = a.rdivmod(b) + expected = divmod(b, a) + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + @pytest.mark.parametrize("index", [None, range(9)]) + def test_series_integer_mod(self, index): + # GH#24396 + s1 = Series(range(1, 10)) + s2 = Series("foo", index=index) + + msg = "not all arguments converted during string formatting" + + with pytest.raises(TypeError, match=msg): + s2 % s1 + + def test_add_with_duplicate_index(self): + # GH14227 + s1 = Series([1, 2], index=[1, 1]) + s2 = Series([10, 10], index=[1, 2]) + result = s1 + s2 + expected = Series([11, 12, np.nan], index=[1, 1, 2]) + tm.assert_series_equal(result, expected) + + def test_add_na_handling(self): + from datetime import date + from decimal import Decimal + + ser = Series( + [Decimal("1.3"), Decimal("2.3")], index=[date(2012, 1, 1), date(2012, 1, 2)] + ) + + result = ser + ser.shift(1) + result2 = ser.shift(1) + ser + assert isna(result[0]) + assert isna(result2[0]) + + def test_add_corner_cases(self, datetime_series): + empty = Series([], index=Index([]), dtype=np.float64) + + result = datetime_series + empty + assert np.isnan(result).all() + + result = empty + empty.copy() + assert len(result) == 0 + + def test_add_float_plus_int(self, datetime_series): + # float + int + int_ts = datetime_series.astype(int)[:-5] + added = datetime_series + int_ts + expected = Series( + datetime_series.values[:-5] + int_ts.values, + index=datetime_series.index[:-5], + name="ts", + ) + tm.assert_series_equal(added[:-5], expected) + + def test_mul_empty_int_corner_case(self): + s1 = Series([], [], dtype=np.int32) + s2 = Series({"x": 0.0}) + tm.assert_series_equal(s1 * s2, Series([np.nan], index=["x"])) + + def test_sub_datetimelike_align(self): + # GH#7500 + # datetimelike ops need to align + dt = Series(date_range("2012-1-1", periods=3, freq="D")) + dt.iloc[2] = np.nan + dt2 = dt[::-1] + + expected = Series([timedelta(0), timedelta(0), pd.NaT]) + # name is reset + result = dt2 - dt + tm.assert_series_equal(result, expected) + + expected = Series(expected, name=0) + result = (dt2.to_frame() - dt.to_frame())[0] + tm.assert_series_equal(result, expected) + + def test_alignment_doesnt_change_tz(self): + # GH#33671 + dti = date_range("2016-01-01", periods=10, tz="CET") + dti_utc = dti.tz_convert("UTC") + ser = Series(10, index=dti) + ser_utc = Series(10, index=dti_utc) + + # we don't care about the result, just that original indexes are unchanged + ser * ser_utc + + assert ser.index is dti + assert ser_utc.index is dti_utc + + def test_arithmetic_with_duplicate_index(self): + + # GH#8363 + # integer ops with a non-unique index + index = [2, 2, 3, 3, 4] + ser = Series(np.arange(1, 6, dtype="int64"), index=index) + other = Series(np.arange(5, dtype="int64"), index=index) + result = ser - other + expected = Series(1, index=[2, 2, 3, 3, 4]) + tm.assert_series_equal(result, expected) + + # GH#8363 + # datetime ops with a non-unique index + ser = Series(date_range("20130101 09:00:00", periods=5), index=index) + other = Series(date_range("20130101", periods=5), index=index) + result = ser - other + expected = Series(Timedelta("9 hours"), index=[2, 2, 3, 3, 4]) + tm.assert_series_equal(result, expected) + + +# ------------------------------------------------------------------ +# Comparisons + + +class TestSeriesFlexComparison: + @pytest.mark.parametrize("axis", [0, None, "index"]) + def test_comparison_flex_basic(self, axis, comparison_op): + left = Series(np.random.randn(10)) + right = Series(np.random.randn(10)) + result = getattr(left, comparison_op.__name__)(right, axis=axis) + expected = comparison_op(left, right) + tm.assert_series_equal(result, expected) + + def test_comparison_bad_axis(self, comparison_op): + left = Series(np.random.randn(10)) + right = Series(np.random.randn(10)) + + msg = "No axis named 1 for object type" + with pytest.raises(ValueError, match=msg): + getattr(left, comparison_op.__name__)(right, axis=1) + + @pytest.mark.parametrize( + "values, op", + [ + ([False, False, True, False], "eq"), + ([True, True, False, True], "ne"), + ([False, False, True, False], "le"), + ([False, False, False, False], "lt"), + ([False, True, True, False], "ge"), + ([False, True, False, False], "gt"), + ], + ) + def test_comparison_flex_alignment(self, values, op): + left = Series([1, 3, 2], index=list("abc")) + right = Series([2, 2, 2], index=list("bcd")) + result = getattr(left, op)(right) + expected = Series(values, index=list("abcd")) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "values, op, fill_value", + [ + ([False, False, True, True], "eq", 2), + ([True, True, False, False], "ne", 2), + ([False, False, True, True], "le", 0), + ([False, False, False, True], "lt", 0), + ([True, True, True, False], "ge", 0), + ([True, True, False, False], "gt", 0), + ], + ) + def test_comparison_flex_alignment_fill(self, values, op, fill_value): + left = Series([1, 3, 2], index=list("abc")) + right = Series([2, 2, 2], index=list("bcd")) + result = getattr(left, op)(right, fill_value=fill_value) + expected = Series(values, index=list("abcd")) + tm.assert_series_equal(result, expected) + + +class TestSeriesComparison: + def test_comparison_different_length(self): + a = Series(["a", "b", "c"]) + b = Series(["b", "a"]) + msg = "only compare identically-labeled Series" + with pytest.raises(ValueError, match=msg): + a < b + + a = Series([1, 2]) + b = Series([2, 3, 4]) + with pytest.raises(ValueError, match=msg): + a == b + + @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) + def test_ser_flex_cmp_return_dtypes(self, opname): + # GH#15115 + ser = Series([1, 3, 2], index=range(3)) + const = 2 + result = getattr(ser, opname)(const).dtypes + expected = np.dtype("bool") + assert result == expected + + @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) + def test_ser_flex_cmp_return_dtypes_empty(self, opname): + # GH#15115 empty Series case + ser = Series([1, 3, 2], index=range(3)) + empty = ser.iloc[:0] + const = 2 + result = getattr(empty, opname)(const).dtypes + expected = np.dtype("bool") + assert result == expected + + @pytest.mark.parametrize( + "names", [(None, None, None), ("foo", "bar", None), ("baz", "baz", "baz")] + ) + def test_ser_cmp_result_names(self, names, comparison_op): + # datetime64 dtype + op = comparison_op + dti = date_range("1949-06-07 03:00:00", freq="H", periods=5, name=names[0]) + ser = Series(dti).rename(names[1]) + result = op(ser, dti) + assert result.name == names[2] + + # datetime64tz dtype + dti = dti.tz_localize("US/Central") + dti = pd.DatetimeIndex(dti, freq="infer") # freq not preserved by tz_localize + ser = Series(dti).rename(names[1]) + result = op(ser, dti) + assert result.name == names[2] + + # timedelta64 dtype + tdi = dti - dti.shift(1) + ser = Series(tdi).rename(names[1]) + result = op(ser, tdi) + assert result.name == names[2] + + # interval dtype + if op in [operator.eq, operator.ne]: + # interval dtype comparisons not yet implemented + ii = pd.interval_range(start=0, periods=5, name=names[0]) + ser = Series(ii).rename(names[1]) + result = op(ser, ii) + assert result.name == names[2] + + # categorical + if op in [operator.eq, operator.ne]: + # categorical dtype comparisons raise for inequalities + cidx = tdi.astype("category") + ser = Series(cidx).rename(names[1]) + result = op(ser, cidx) + assert result.name == names[2] + + def test_comparisons(self): + left = np.random.randn(10) + right = np.random.randn(10) + left[:3] = np.nan + + result = nanops.nangt(left, right) + with np.errstate(invalid="ignore"): + expected = (left > right).astype("O") + expected[:3] = np.nan + + tm.assert_almost_equal(result, expected) + + s = Series(["a", "b", "c"]) + s2 = Series([False, True, False]) + + # it works! + exp = Series([False, False, False]) + tm.assert_series_equal(s == s2, exp) + tm.assert_series_equal(s2 == s, exp) + + # ----------------------------------------------------------------- + # Categorical Dtype Comparisons + + def test_categorical_comparisons(self): + # GH#8938 + # allow equality comparisons + a = Series(list("abc"), dtype="category") + b = Series(list("abc"), dtype="object") + c = Series(["a", "b", "cc"], dtype="object") + d = Series(list("acb"), dtype="object") + e = Categorical(list("abc")) + f = Categorical(list("acb")) + + # vs scalar + assert not (a == "a").all() + assert ((a != "a") == ~(a == "a")).all() + + assert not ("a" == a).all() + assert (a == "a")[0] + assert ("a" == a)[0] + assert not ("a" != a)[0] + + # vs list-like + assert (a == a).all() + assert not (a != a).all() + + assert (a == list(a)).all() + assert (a == b).all() + assert (b == a).all() + assert ((~(a == b)) == (a != b)).all() + assert ((~(b == a)) == (b != a)).all() + + assert not (a == c).all() + assert not (c == a).all() + assert not (a == d).all() + assert not (d == a).all() + + # vs a cat-like + assert (a == e).all() + assert (e == a).all() + assert not (a == f).all() + assert not (f == a).all() + + assert (~(a == e) == (a != e)).all() + assert (~(e == a) == (e != a)).all() + assert (~(a == f) == (a != f)).all() + assert (~(f == a) == (f != a)).all() + + # non-equality is not comparable + msg = "can only compare equality or not" + with pytest.raises(TypeError, match=msg): + a < b + with pytest.raises(TypeError, match=msg): + b < a + with pytest.raises(TypeError, match=msg): + a > b + with pytest.raises(TypeError, match=msg): + b > a + + def test_unequal_categorical_comparison_raises_type_error(self): + # unequal comparison should raise for unordered cats + cat = Series(Categorical(list("abc"))) + msg = "can only compare equality or not" + with pytest.raises(TypeError, match=msg): + cat > "b" + + cat = Series(Categorical(list("abc"), ordered=False)) + with pytest.raises(TypeError, match=msg): + cat > "b" + + # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 + # and following comparisons with scalars not in categories should raise + # for unequal comps, but not for equal/not equal + cat = Series(Categorical(list("abc"), ordered=True)) + + msg = "Invalid comparison between dtype=category and str" + with pytest.raises(TypeError, match=msg): + cat < "d" + with pytest.raises(TypeError, match=msg): + cat > "d" + with pytest.raises(TypeError, match=msg): + "d" < cat + with pytest.raises(TypeError, match=msg): + "d" > cat + + tm.assert_series_equal(cat == "d", Series([False, False, False])) + tm.assert_series_equal(cat != "d", Series([True, True, True])) + + # ----------------------------------------------------------------- + + def test_comparison_tuples(self): + # GH#11339 + # comparisons vs tuple + s = Series([(1, 1), (1, 2)]) + + result = s == (1, 2) + expected = Series([False, True]) + tm.assert_series_equal(result, expected) + + result = s != (1, 2) + expected = Series([True, False]) + tm.assert_series_equal(result, expected) + + result = s == (0, 0) + expected = Series([False, False]) + tm.assert_series_equal(result, expected) + + result = s != (0, 0) + expected = Series([True, True]) + tm.assert_series_equal(result, expected) + + s = Series([(1, 1), (1, 1)]) + + result = s == (1, 1) + expected = Series([True, True]) + tm.assert_series_equal(result, expected) + + result = s != (1, 1) + expected = Series([False, False]) + tm.assert_series_equal(result, expected) + + def test_comparison_frozenset(self): + ser = Series([frozenset([1]), frozenset([1, 2])]) + + result = ser == frozenset([1]) + expected = Series([True, False]) + tm.assert_series_equal(result, expected) + + def test_comparison_operators_with_nas(self, comparison_op): + ser = Series(bdate_range("1/1/2000", periods=10), dtype=object) + ser[::2] = np.nan + + # test that comparisons work + val = ser[5] + + result = comparison_op(ser, val) + expected = comparison_op(ser.dropna(), val).reindex(ser.index) + + if comparison_op is operator.ne: + expected = expected.fillna(True).astype(bool) + else: + expected = expected.fillna(False).astype(bool) + + tm.assert_series_equal(result, expected) + + def test_ne(self): + ts = Series([3, 4, 5, 6, 7], [3, 4, 5, 6, 7], dtype=float) + expected = [True, True, False, True, True] + assert tm.equalContents(ts.index != 5, expected) + assert tm.equalContents(~(ts.index == 5), expected) + + @pytest.mark.parametrize( + "left, right", + [ + ( + Series([1, 2, 3], index=list("ABC"), name="x"), + Series([2, 2, 2], index=list("ABD"), name="x"), + ), + ( + Series([1, 2, 3], index=list("ABC"), name="x"), + Series([2, 2, 2, 2], index=list("ABCD"), name="x"), + ), + ], + ) + def test_comp_ops_df_compat(self, left, right, frame_or_series): + # GH 1134 + msg = f"Can only compare identically-labeled {frame_or_series.__name__} objects" + if frame_or_series is not Series: + left = left.to_frame() + right = right.to_frame() + + with pytest.raises(ValueError, match=msg): + left == right + with pytest.raises(ValueError, match=msg): + right == left + + with pytest.raises(ValueError, match=msg): + left != right + with pytest.raises(ValueError, match=msg): + right != left + + with pytest.raises(ValueError, match=msg): + left < right + with pytest.raises(ValueError, match=msg): + right < left + + def test_compare_series_interval_keyword(self): + # GH#25338 + ser = Series(["IntervalA", "IntervalB", "IntervalC"]) + result = ser == "IntervalA" + expected = Series([True, False, False]) + tm.assert_series_equal(result, expected) + + +# ------------------------------------------------------------------ +# Unsorted +# These arithmetic tests were previously in other files, eventually +# should be parametrized and put into tests.arithmetic + + +class TestTimeSeriesArithmetic: + def test_series_add_tz_mismatch_converts_to_utc(self): + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + + perm = np.random.permutation(100)[:90] + ser1 = Series( + np.random.randn(90), index=rng.take(perm).tz_convert("US/Eastern") + ) + + perm = np.random.permutation(100)[:90] + ser2 = Series( + np.random.randn(90), index=rng.take(perm).tz_convert("Europe/Berlin") + ) + + result = ser1 + ser2 + + uts1 = ser1.tz_convert("utc") + uts2 = ser2.tz_convert("utc") + expected = uts1 + uts2 + + assert result.index.tz == pytz.UTC + tm.assert_series_equal(result, expected) + + def test_series_add_aware_naive_raises(self): + rng = date_range("1/1/2011", periods=10, freq="H") + ser = Series(np.random.randn(len(rng)), index=rng) + + ser_utc = ser.tz_localize("utc") + + msg = "Cannot join tz-naive with tz-aware DatetimeIndex" + with pytest.raises(Exception, match=msg): + ser + ser_utc + + with pytest.raises(Exception, match=msg): + ser_utc + ser + + def test_datetime_understood(self): + # Ensures it doesn't fail to create the right series + # reported in issue#16726 + series = Series(date_range("2012-01-01", periods=3)) + offset = pd.offsets.DateOffset(days=6) + result = series - offset + expected = Series(pd.to_datetime(["2011-12-26", "2011-12-27", "2011-12-28"])) + tm.assert_series_equal(result, expected) + + def test_align_date_objects_with_datetimeindex(self): + rng = date_range("1/1/2000", periods=20) + ts = Series(np.random.randn(20), index=rng) + + ts_slice = ts[5:] + ts2 = ts_slice.copy() + ts2.index = [x.date() for x in ts2.index] + + result = ts + ts2 + result2 = ts2 + ts + expected = ts + ts[5:] + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + +class TestNamePreservation: + @pytest.mark.parametrize("box", [list, tuple, np.array, Index, Series, pd.array]) + @pytest.mark.parametrize("flex", [True, False]) + def test_series_ops_name_retention( + self, request, flex, box, names, all_binary_operators + ): + # GH#33930 consistent name renteiton + op = all_binary_operators + + left = Series(range(10), name=names[0]) + right = Series(range(10), name=names[1]) + + name = op.__name__.strip("_") + is_logical = name in ["and", "rand", "xor", "rxor", "or", "ror"] + is_rlogical = is_logical and name.startswith("r") + + right = box(right) + if flex: + if is_logical: + # Series doesn't have these as flex methods + return + result = getattr(left, name)(right) + else: + # GH#37374 logical ops behaving as set ops deprecated + warn = FutureWarning if is_rlogical and box is Index else None + msg = "operating as a set operation is deprecated" + with tm.assert_produces_warning(warn, match=msg): + # stacklevel is correct for Index op, not reversed op + result = op(left, right) + + if box is Index and is_rlogical: + # Index treats these as set operators, so does not defer + assert isinstance(result, Index) + return + + assert isinstance(result, Series) + if box in [Index, Series]: + assert result.name is names[2] or result.name == names[2] + else: + assert result.name is names[0] or result.name == names[0] + + def test_binop_maybe_preserve_name(self, datetime_series): + # names match, preserve + result = datetime_series * datetime_series + assert result.name == datetime_series.name + result = datetime_series.mul(datetime_series) + assert result.name == datetime_series.name + + result = datetime_series * datetime_series[:-2] + assert result.name == datetime_series.name + + # names don't match, don't preserve + cp = datetime_series.copy() + cp.name = "something else" + result = datetime_series + cp + assert result.name is None + result = datetime_series.add(cp) + assert result.name is None + + ops = ["add", "sub", "mul", "div", "truediv", "floordiv", "mod", "pow"] + ops = ops + ["r" + op for op in ops] + for op in ops: + # names match, preserve + ser = datetime_series.copy() + result = getattr(ser, op)(ser) + assert result.name == datetime_series.name + + # names don't match, don't preserve + cp = datetime_series.copy() + cp.name = "changed" + result = getattr(ser, op)(cp) + assert result.name is None + + def test_scalarop_preserve_name(self, datetime_series): + result = datetime_series * 2 + assert result.name == datetime_series.name + + +class TestInplaceOperations: + @pytest.mark.parametrize( + "dtype1, dtype2, dtype_expected, dtype_mul", + ( + ("Int64", "Int64", "Int64", "Int64"), + ("float", "float", "float", "float"), + ("Int64", "float", "Float64", "Float64"), + ("Int64", "Float64", "Float64", "Float64"), + ), + ) + def test_series_inplace_ops(self, dtype1, dtype2, dtype_expected, dtype_mul): + # GH 37910 + + ser1 = Series([1], dtype=dtype1) + ser2 = Series([2], dtype=dtype2) + ser1 += ser2 + expected = Series([3], dtype=dtype_expected) + tm.assert_series_equal(ser1, expected) + + ser1 -= ser2 + expected = Series([1], dtype=dtype_expected) + tm.assert_series_equal(ser1, expected) + + ser1 *= ser2 + expected = Series([2], dtype=dtype_mul) + tm.assert_series_equal(ser1, expected) + + +def test_none_comparison(series_with_simple_index): + series = series_with_simple_index + + if len(series) < 1: + pytest.skip("Test doesn't make sense on empty data") + + # bug brought up by #1079 + # changed from TypeError in 0.17.0 + series.iloc[0] = np.nan + + # noinspection PyComparisonWithNone + result = series == None # noqa:E711 + assert not result.iat[0] + assert not result.iat[1] + + # noinspection PyComparisonWithNone + result = series != None # noqa:E711 + assert result.iat[0] + assert result.iat[1] + + result = None == series # noqa:E711 + assert not result.iat[0] + assert not result.iat[1] + + result = None != series # noqa:E711 + assert result.iat[0] + assert result.iat[1] + + if is_datetime64_dtype(series.dtype) or is_datetime64tz_dtype(series.dtype): + # Following DatetimeIndex (and Timestamp) convention, + # inequality comparisons with Series[datetime64] raise + msg = "Invalid comparison" + with pytest.raises(TypeError, match=msg): + None > series + with pytest.raises(TypeError, match=msg): + series > None + else: + result = None > series + assert not result.iat[0] + assert not result.iat[1] + + result = series < None + assert not result.iat[0] + assert not result.iat[1] + + +def test_series_varied_multiindex_alignment(): + # GH 20414 + s1 = Series( + range(8), + index=pd.MultiIndex.from_product( + [list("ab"), list("xy"), [1, 2]], names=["ab", "xy", "num"] + ), + ) + s2 = Series( + [1000 * i for i in range(1, 5)], + index=pd.MultiIndex.from_product([list("xy"), [1, 2]], names=["xy", "num"]), + ) + result = s1.loc[pd.IndexSlice[["a"], :, :]] + s2 + expected = Series( + [1000, 2001, 3002, 4003], + index=pd.MultiIndex.from_tuples( + [("a", "x", 1), ("a", "x", 2), ("a", "y", 1), ("a", "y", 2)], + names=["ab", "xy", "num"], + ), + ) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/test_constructors.py b/.local/lib/python3.8/site-packages/pandas/tests/series/test_constructors.py new file mode 100644 index 0000000..cab5fd4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/test_constructors.py @@ -0,0 +1,1906 @@ +from collections import OrderedDict +from datetime import ( + datetime, + timedelta, +) + +from dateutil.tz import tzoffset +import numpy as np +import numpy.ma as ma +import pytest + +from pandas._libs import ( + iNaT, + lib, +) +from pandas.compat.numpy import ( + np_version_under1p19, + np_version_under1p20, +) +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_datetime64tz_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + Interval, + IntervalIndex, + MultiIndex, + NaT, + Period, + RangeIndex, + Series, + Timestamp, + date_range, + isna, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import Int64Index +from pandas.core.arrays import ( + IntervalArray, + period_array, +) +from pandas.core.internals.blocks import NumericBlock + + +class TestSeriesConstructors: + @pytest.mark.parametrize( + "constructor,check_index_type", + [ + # NOTE: some overlap with test_constructor_empty but that test does not + # test for None or an empty generator. + # test_constructor_pass_none tests None but only with the index also + # passed. + (lambda: Series(), True), + (lambda: Series(None), True), + (lambda: Series({}), True), + (lambda: Series(()), False), # creates a RangeIndex + (lambda: Series([]), False), # creates a RangeIndex + (lambda: Series(_ for _ in []), False), # creates a RangeIndex + (lambda: Series(data=None), True), + (lambda: Series(data={}), True), + (lambda: Series(data=()), False), # creates a RangeIndex + (lambda: Series(data=[]), False), # creates a RangeIndex + (lambda: Series(data=(_ for _ in [])), False), # creates a RangeIndex + ], + ) + def test_empty_constructor(self, constructor, check_index_type): + # TODO: share with frame test of the same name + with tm.assert_produces_warning(FutureWarning): + expected = Series() + result = constructor() + + assert len(result.index) == 0 + tm.assert_series_equal(result, expected, check_index_type=check_index_type) + + def test_invalid_dtype(self): + # GH15520 + msg = "not understood" + invalid_list = [Timestamp, "Timestamp", list] + for dtype in invalid_list: + with pytest.raises(TypeError, match=msg): + Series([], name="time", dtype=dtype) + + def test_invalid_compound_dtype(self): + # GH#13296 + c_dtype = np.dtype([("a", "i8"), ("b", "f4")]) + cdt_arr = np.array([(1, 0.4), (256, -13)], dtype=c_dtype) + + with pytest.raises(ValueError, match="Use DataFrame instead"): + Series(cdt_arr, index=["A", "B"]) + + def test_scalar_conversion(self): + + # Pass in scalar is disabled + scalar = Series(0.5) + assert not isinstance(scalar, float) + + # Coercion + assert float(Series([1.0])) == 1.0 + assert int(Series([1.0])) == 1 + + def test_scalar_extension_dtype(self, ea_scalar_and_dtype): + # GH 28401 + + ea_scalar, ea_dtype = ea_scalar_and_dtype + + ser = Series(ea_scalar, index=range(3)) + expected = Series([ea_scalar] * 3, dtype=ea_dtype) + + assert ser.dtype == ea_dtype + tm.assert_series_equal(ser, expected) + + def test_constructor(self, datetime_series): + with tm.assert_produces_warning(FutureWarning): + empty_series = Series() + assert datetime_series.index._is_all_dates + + # Pass in Series + derived = Series(datetime_series) + assert derived.index._is_all_dates + + assert tm.equalContents(derived.index, datetime_series.index) + # Ensure new index is not created + assert id(datetime_series.index) == id(derived.index) + + # Mixed type Series + mixed = Series(["hello", np.NaN], index=[0, 1]) + assert mixed.dtype == np.object_ + assert mixed[1] is np.NaN + + assert not empty_series.index._is_all_dates + with tm.assert_produces_warning(FutureWarning): + assert not Series().index._is_all_dates + + # exception raised is of type ValueError GH35744 + with pytest.raises(ValueError, match="Data must be 1-dimensional"): + Series(np.random.randn(3, 3), index=np.arange(3)) + + mixed.name = "Series" + rs = Series(mixed).name + xp = "Series" + assert rs == xp + + # raise on MultiIndex GH4187 + m = MultiIndex.from_arrays([[1, 2], [3, 4]]) + msg = "initializing a Series from a MultiIndex is not supported" + with pytest.raises(NotImplementedError, match=msg): + Series(m) + + def test_constructor_index_ndim_gt_1_raises(self): + # GH#18579 + df = DataFrame([[1, 2], [3, 4], [5, 6]], index=[3, 6, 9]) + with pytest.raises(ValueError, match="Index data must be 1-dimensional"): + Series([1, 3, 2], index=df) + + @pytest.mark.parametrize("input_class", [list, dict, OrderedDict]) + def test_constructor_empty(self, input_class): + with tm.assert_produces_warning(FutureWarning): + empty = Series() + empty2 = Series(input_class()) + + # these are Index() and RangeIndex() which don't compare type equal + # but are just .equals + tm.assert_series_equal(empty, empty2, check_index_type=False) + + # With explicit dtype: + empty = Series(dtype="float64") + empty2 = Series(input_class(), dtype="float64") + tm.assert_series_equal(empty, empty2, check_index_type=False) + + # GH 18515 : with dtype=category: + empty = Series(dtype="category") + empty2 = Series(input_class(), dtype="category") + tm.assert_series_equal(empty, empty2, check_index_type=False) + + if input_class is not list: + # With index: + with tm.assert_produces_warning(FutureWarning): + empty = Series(index=range(10)) + empty2 = Series(input_class(), index=range(10)) + tm.assert_series_equal(empty, empty2) + + # With index and dtype float64: + empty = Series(np.nan, index=range(10)) + empty2 = Series(input_class(), index=range(10), dtype="float64") + tm.assert_series_equal(empty, empty2) + + # GH 19853 : with empty string, index and dtype str + empty = Series("", dtype=str, index=range(3)) + empty2 = Series("", index=range(3)) + tm.assert_series_equal(empty, empty2) + + @pytest.mark.parametrize("input_arg", [np.nan, float("nan")]) + def test_constructor_nan(self, input_arg): + empty = Series(dtype="float64", index=range(10)) + empty2 = Series(input_arg, index=range(10)) + + tm.assert_series_equal(empty, empty2, check_index_type=False) + + @pytest.mark.parametrize( + "dtype", + ["f8", "i8", "M8[ns]", "m8[ns]", "category", "object", "datetime64[ns, UTC]"], + ) + @pytest.mark.parametrize("index", [None, Index([])]) + def test_constructor_dtype_only(self, dtype, index): + # GH-20865 + result = Series(dtype=dtype, index=index) + assert result.dtype == dtype + assert len(result) == 0 + + def test_constructor_no_data_index_order(self): + with tm.assert_produces_warning(FutureWarning): + result = Series(index=["b", "a", "c"]) + assert result.index.tolist() == ["b", "a", "c"] + + def test_constructor_no_data_string_type(self): + # GH 22477 + result = Series(index=[1], dtype=str) + assert np.isnan(result.iloc[0]) + + @pytest.mark.parametrize("item", ["entry", "ѐ", 13]) + def test_constructor_string_element_string_type(self, item): + # GH 22477 + result = Series(item, index=[1], dtype=str) + assert result.iloc[0] == str(item) + + def test_constructor_dtype_str_na_values(self, string_dtype): + # https://github.com/pandas-dev/pandas/issues/21083 + ser = Series(["x", None], dtype=string_dtype) + result = ser.isna() + expected = Series([False, True]) + tm.assert_series_equal(result, expected) + assert ser.iloc[1] is None + + ser = Series(["x", np.nan], dtype=string_dtype) + assert np.isnan(ser.iloc[1]) + + def test_constructor_series(self): + index1 = ["d", "b", "a", "c"] + index2 = sorted(index1) + s1 = Series([4, 7, -5, 3], index=index1) + s2 = Series(s1, index=index2) + + tm.assert_series_equal(s2, s1.sort_index()) + + def test_constructor_iterable(self): + # GH 21987 + class Iter: + def __iter__(self): + yield from range(10) + + expected = Series(list(range(10)), dtype="int64") + result = Series(Iter(), dtype="int64") + tm.assert_series_equal(result, expected) + + def test_constructor_sequence(self): + # GH 21987 + expected = Series(list(range(10)), dtype="int64") + result = Series(range(10), dtype="int64") + tm.assert_series_equal(result, expected) + + def test_constructor_single_str(self): + # GH 21987 + expected = Series(["abc"]) + result = Series("abc") + tm.assert_series_equal(result, expected) + + def test_constructor_list_like(self): + + # make sure that we are coercing different + # list-likes to standard dtypes and not + # platform specific + expected = Series([1, 2, 3], dtype="int64") + for obj in [[1, 2, 3], (1, 2, 3), np.array([1, 2, 3], dtype="int64")]: + result = Series(obj, index=[0, 1, 2]) + tm.assert_series_equal(result, expected) + + def test_constructor_boolean_index(self): + # GH#18579 + s1 = Series([1, 2, 3], index=[4, 5, 6]) + + index = s1 == 2 + result = Series([1, 3, 2], index=index) + expected = Series([1, 3, 2], index=[False, True, False]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["bool", "int32", "int64", "float64"]) + def test_constructor_index_dtype(self, dtype): + # GH 17088 + + s = Series(Index([0, 2, 4]), dtype=dtype) + assert s.dtype == dtype + + @pytest.mark.parametrize( + "input_vals", + [ + ([1, 2]), + (["1", "2"]), + (list(date_range("1/1/2011", periods=2, freq="H"))), + (list(date_range("1/1/2011", periods=2, freq="H", tz="US/Eastern"))), + ([Interval(left=0, right=5)]), + ], + ) + def test_constructor_list_str(self, input_vals, string_dtype): + # GH 16605 + # Ensure that data elements from a list are converted to strings + # when dtype is str, 'str', or 'U' + result = Series(input_vals, dtype=string_dtype) + expected = Series(input_vals).astype(string_dtype) + tm.assert_series_equal(result, expected) + + def test_constructor_list_str_na(self, string_dtype): + result = Series([1.0, 2.0, np.nan], dtype=string_dtype) + expected = Series(["1.0", "2.0", np.nan], dtype=object) + tm.assert_series_equal(result, expected) + assert np.isnan(result[2]) + + def test_constructor_generator(self): + gen = (i for i in range(10)) + + result = Series(gen) + exp = Series(range(10)) + tm.assert_series_equal(result, exp) + + # same but with non-default index + gen = (i for i in range(10)) + result = Series(gen, index=range(10, 20)) + exp.index = range(10, 20) + tm.assert_series_equal(result, exp) + + def test_constructor_map(self): + # GH8909 + m = map(lambda x: x, range(10)) + + result = Series(m) + exp = Series(range(10)) + tm.assert_series_equal(result, exp) + + # same but with non-default index + m = map(lambda x: x, range(10)) + result = Series(m, index=range(10, 20)) + exp.index = range(10, 20) + tm.assert_series_equal(result, exp) + + def test_constructor_categorical(self): + cat = Categorical([0, 1, 2, 0, 1, 2], ["a", "b", "c"], fastpath=True) + res = Series(cat) + tm.assert_categorical_equal(res.values, cat) + + # can cast to a new dtype + result = Series(Categorical([1, 2, 3]), dtype="int64") + expected = Series([1, 2, 3], dtype="int64") + tm.assert_series_equal(result, expected) + + def test_construct_from_categorical_with_dtype(self): + # GH12574 + cat = Series(Categorical([1, 2, 3]), dtype="category") + assert is_categorical_dtype(cat) + assert is_categorical_dtype(cat.dtype) + + def test_construct_intlist_values_category_dtype(self): + ser = Series([1, 2, 3], dtype="category") + assert is_categorical_dtype(ser) + assert is_categorical_dtype(ser.dtype) + + def test_constructor_categorical_with_coercion(self): + factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]) + # test basic creation / coercion of categoricals + s = Series(factor, name="A") + assert s.dtype == "category" + assert len(s) == len(factor) + str(s.values) + str(s) + + # in a frame + df = DataFrame({"A": factor}) + result = df["A"] + tm.assert_series_equal(result, s) + result = df.iloc[:, 0] + tm.assert_series_equal(result, s) + assert len(df) == len(factor) + str(df.values) + str(df) + + df = DataFrame({"A": s}) + result = df["A"] + tm.assert_series_equal(result, s) + assert len(df) == len(factor) + str(df.values) + str(df) + + # multiples + df = DataFrame({"A": s, "B": s, "C": 1}) + result1 = df["A"] + result2 = df["B"] + tm.assert_series_equal(result1, s) + tm.assert_series_equal(result2, s, check_names=False) + assert result2.name == "B" + assert len(df) == len(factor) + str(df.values) + str(df) + + def test_constructor_categorical_with_coercion2(self): + # GH8623 + x = DataFrame( + [[1, "John P. Doe"], [2, "Jane Dove"], [1, "John P. Doe"]], + columns=["person_id", "person_name"], + ) + x["person_name"] = Categorical(x.person_name) # doing this breaks transform + + expected = x.iloc[0].person_name + result = x.person_name.iloc[0] + assert result == expected + + result = x.person_name[0] + assert result == expected + + result = x.person_name.loc[0] + assert result == expected + + def test_constructor_series_to_categorical(self): + # see GH#16524: test conversion of Series to Categorical + series = Series(["a", "b", "c"]) + + result = Series(series, dtype="category") + expected = Series(["a", "b", "c"], dtype="category") + + tm.assert_series_equal(result, expected) + + def test_constructor_categorical_dtype(self): + result = Series( + ["a", "b"], dtype=CategoricalDtype(["a", "b", "c"], ordered=True) + ) + assert is_categorical_dtype(result.dtype) is True + tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"])) + assert result.cat.ordered + + result = Series(["a", "b"], dtype=CategoricalDtype(["b", "a"])) + assert is_categorical_dtype(result.dtype) + tm.assert_index_equal(result.cat.categories, Index(["b", "a"])) + assert result.cat.ordered is False + + # GH 19565 - Check broadcasting of scalar with Categorical dtype + result = Series( + "a", index=[0, 1], dtype=CategoricalDtype(["a", "b"], ordered=True) + ) + expected = Series( + ["a", "a"], index=[0, 1], dtype=CategoricalDtype(["a", "b"], ordered=True) + ) + tm.assert_series_equal(result, expected) + + def test_constructor_categorical_string(self): + # GH 26336: the string 'category' maintains existing CategoricalDtype + cdt = CategoricalDtype(categories=list("dabc"), ordered=True) + expected = Series(list("abcabc"), dtype=cdt) + + # Series(Categorical, dtype='category') keeps existing dtype + cat = Categorical(list("abcabc"), dtype=cdt) + result = Series(cat, dtype="category") + tm.assert_series_equal(result, expected) + + # Series(Series[Categorical], dtype='category') keeps existing dtype + result = Series(result, dtype="category") + tm.assert_series_equal(result, expected) + + def test_categorical_sideeffects_free(self): + # Passing a categorical to a Series and then changing values in either + # the series or the categorical should not change the values in the + # other one, IF you specify copy! + cat = Categorical(["a", "b", "c", "a"]) + s = Series(cat, copy=True) + assert s.cat is not cat + s.cat.categories = [1, 2, 3] + exp_s = np.array([1, 2, 3, 1], dtype=np.int64) + exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) + tm.assert_numpy_array_equal(s.__array__(), exp_s) + tm.assert_numpy_array_equal(cat.__array__(), exp_cat) + + # setting + s[0] = 2 + exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) + tm.assert_numpy_array_equal(s.__array__(), exp_s2) + tm.assert_numpy_array_equal(cat.__array__(), exp_cat) + + # however, copy is False by default + # so this WILL change values + cat = Categorical(["a", "b", "c", "a"]) + s = Series(cat) + assert s.values is cat + s.cat.categories = [1, 2, 3] + exp_s = np.array([1, 2, 3, 1], dtype=np.int64) + tm.assert_numpy_array_equal(s.__array__(), exp_s) + tm.assert_numpy_array_equal(cat.__array__(), exp_s) + + s[0] = 2 + exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) + tm.assert_numpy_array_equal(s.__array__(), exp_s2) + tm.assert_numpy_array_equal(cat.__array__(), exp_s2) + + def test_unordered_compare_equal(self): + left = Series(["a", "b", "c"], dtype=CategoricalDtype(["a", "b"])) + right = Series(Categorical(["a", "b", np.nan], categories=["a", "b"])) + tm.assert_series_equal(left, right) + + def test_constructor_maskedarray(self): + data = ma.masked_all((3,), dtype=float) + result = Series(data) + expected = Series([np.nan, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + data[0] = 0.0 + data[2] = 2.0 + index = ["a", "b", "c"] + result = Series(data, index=index) + expected = Series([0.0, np.nan, 2.0], index=index) + tm.assert_series_equal(result, expected) + + data[1] = 1.0 + result = Series(data, index=index) + expected = Series([0.0, 1.0, 2.0], index=index) + tm.assert_series_equal(result, expected) + + data = ma.masked_all((3,), dtype=int) + result = Series(data) + expected = Series([np.nan, np.nan, np.nan], dtype=float) + tm.assert_series_equal(result, expected) + + data[0] = 0 + data[2] = 2 + index = ["a", "b", "c"] + result = Series(data, index=index) + expected = Series([0, np.nan, 2], index=index, dtype=float) + tm.assert_series_equal(result, expected) + + data[1] = 1 + result = Series(data, index=index) + expected = Series([0, 1, 2], index=index, dtype=int) + tm.assert_series_equal(result, expected) + + data = ma.masked_all((3,), dtype=bool) + result = Series(data) + expected = Series([np.nan, np.nan, np.nan], dtype=object) + tm.assert_series_equal(result, expected) + + data[0] = True + data[2] = False + index = ["a", "b", "c"] + result = Series(data, index=index) + expected = Series([True, np.nan, False], index=index, dtype=object) + tm.assert_series_equal(result, expected) + + data[1] = True + result = Series(data, index=index) + expected = Series([True, True, False], index=index, dtype=bool) + tm.assert_series_equal(result, expected) + + data = ma.masked_all((3,), dtype="M8[ns]") + result = Series(data) + expected = Series([iNaT, iNaT, iNaT], dtype="M8[ns]") + tm.assert_series_equal(result, expected) + + data[0] = datetime(2001, 1, 1) + data[2] = datetime(2001, 1, 3) + index = ["a", "b", "c"] + result = Series(data, index=index) + expected = Series( + [datetime(2001, 1, 1), iNaT, datetime(2001, 1, 3)], + index=index, + dtype="M8[ns]", + ) + tm.assert_series_equal(result, expected) + + data[1] = datetime(2001, 1, 2) + result = Series(data, index=index) + expected = Series( + [datetime(2001, 1, 1), datetime(2001, 1, 2), datetime(2001, 1, 3)], + index=index, + dtype="M8[ns]", + ) + tm.assert_series_equal(result, expected) + + def test_constructor_maskedarray_hardened(self): + # Check numpy masked arrays with hard masks -- from GH24574 + data = ma.masked_all((3,), dtype=float).harden_mask() + result = Series(data) + expected = Series([np.nan, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + def test_series_ctor_plus_datetimeindex(self): + rng = date_range("20090415", "20090519", freq="B") + data = {k: 1 for k in rng} + + result = Series(data, index=rng) + assert result.index is rng + + def test_constructor_default_index(self): + s = Series([0, 1, 2]) + tm.assert_index_equal(s.index, Index(range(3)), exact=True) + + @pytest.mark.parametrize( + "input", + [ + [1, 2, 3], + (1, 2, 3), + list(range(3)), + Categorical(["a", "b", "a"]), + (i for i in range(3)), + map(lambda x: x, range(3)), + ], + ) + def test_constructor_index_mismatch(self, input): + # GH 19342 + # test that construction of a Series with an index of different length + # raises an error + msg = r"Length of values \(3\) does not match length of index \(4\)" + with pytest.raises(ValueError, match=msg): + Series(input, index=np.arange(4)) + + def test_constructor_numpy_scalar(self): + # GH 19342 + # construction with a numpy scalar + # should not raise + result = Series(np.array(100), index=np.arange(4), dtype="int64") + expected = Series(100, index=np.arange(4), dtype="int64") + tm.assert_series_equal(result, expected) + + def test_constructor_broadcast_list(self): + # GH 19342 + # construction with single-element container and index + # should raise + msg = r"Length of values \(1\) does not match length of index \(3\)" + with pytest.raises(ValueError, match=msg): + Series(["foo"], index=["a", "b", "c"]) + + def test_constructor_corner(self): + df = tm.makeTimeDataFrame() + objs = [df, df] + s = Series(objs, index=[0, 1]) + assert isinstance(s, Series) + + def test_constructor_sanitize(self): + s = Series(np.array([1.0, 1.0, 8.0]), dtype="i8") + assert s.dtype == np.dtype("i8") + + msg = "float-dtype values containing NaN and an integer dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series(np.array([1.0, 1.0, np.nan]), copy=True, dtype="i8") + assert ser.dtype == np.dtype("f8") + + def test_constructor_copy(self): + # GH15125 + # test dtype parameter has no side effects on copy=True + for data in [[1.0], np.array([1.0])]: + x = Series(data) + y = Series(x, copy=True, dtype=float) + + # copy=True maintains original data in Series + tm.assert_series_equal(x, y) + + # changes to origin of copy does not affect the copy + x[0] = 2.0 + assert not x.equals(y) + assert x[0] == 2.0 + assert y[0] == 1.0 + + @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite test + @pytest.mark.parametrize( + "index", + [ + date_range("20170101", periods=3, tz="US/Eastern"), + date_range("20170101", periods=3), + timedelta_range("1 day", periods=3), + period_range("2012Q1", periods=3, freq="Q"), + Index(list("abc")), + Int64Index([1, 2, 3]), + RangeIndex(0, 3), + ], + ids=lambda x: type(x).__name__, + ) + def test_constructor_limit_copies(self, index): + # GH 17449 + # limit copies of input + s = Series(index) + + # we make 1 copy; this is just a smoke test here + assert s._mgr.blocks[0].values is not index + + def test_constructor_pass_none(self): + with tm.assert_produces_warning(FutureWarning): + s = Series(None, index=range(5)) + assert s.dtype == np.float64 + + s = Series(None, index=range(5), dtype=object) + assert s.dtype == np.object_ + + # GH 7431 + # inference on the index + with tm.assert_produces_warning(FutureWarning): + s = Series(index=np.array([None])) + expected = Series(index=Index([None])) + tm.assert_series_equal(s, expected) + + def test_constructor_pass_nan_nat(self): + # GH 13467 + exp = Series([np.nan, np.nan], dtype=np.float64) + assert exp.dtype == np.float64 + tm.assert_series_equal(Series([np.nan, np.nan]), exp) + tm.assert_series_equal(Series(np.array([np.nan, np.nan])), exp) + + exp = Series([NaT, NaT]) + assert exp.dtype == "datetime64[ns]" + tm.assert_series_equal(Series([NaT, NaT]), exp) + tm.assert_series_equal(Series(np.array([NaT, NaT])), exp) + + tm.assert_series_equal(Series([NaT, np.nan]), exp) + tm.assert_series_equal(Series(np.array([NaT, np.nan])), exp) + + tm.assert_series_equal(Series([np.nan, NaT]), exp) + tm.assert_series_equal(Series(np.array([np.nan, NaT])), exp) + + def test_constructor_cast(self): + msg = "could not convert string to float" + with pytest.raises(ValueError, match=msg): + Series(["a", "b", "c"], dtype=float) + + def test_constructor_signed_int_overflow_deprecation(self): + # GH#41734 disallow silent overflow + msg = "Values are too large to be losslessly cast" + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series([1, 200, 923442], dtype="int8") + + expected = Series([1, -56, 50], dtype="int8") + tm.assert_series_equal(ser, expected) + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series([1, 200, 923442], dtype="uint8") + + expected = Series([1, 200, 50], dtype="uint8") + tm.assert_series_equal(ser, expected) + + def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype): + # see gh-15832 + msg = "Trying to coerce negative values to unsigned integers" + with pytest.raises(OverflowError, match=msg): + Series([-1], dtype=any_unsigned_int_numpy_dtype) + + def test_constructor_coerce_float_fail(self, any_int_numpy_dtype): + # see gh-15832 + msg = "Trying to coerce float values to integers" + with pytest.raises(ValueError, match=msg): + Series([1, 2, 3.5], dtype=any_int_numpy_dtype) + + def test_constructor_coerce_float_valid(self, float_numpy_dtype): + s = Series([1, 2, 3.5], dtype=float_numpy_dtype) + expected = Series([1, 2, 3.5]).astype(float_numpy_dtype) + tm.assert_series_equal(s, expected) + + def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtype): + # GH 22585 + + msg = "cannot convert float NaN to integer" + with pytest.raises(ValueError, match=msg): + Series([1, 2, np.nan], dtype=any_int_numpy_dtype) + + def test_constructor_dtype_no_cast(self): + # see gh-1572 + s = Series([1, 2, 3]) + s2 = Series(s, dtype=np.int64) + + s2[1] = 5 + assert s[1] == 5 + + def test_constructor_datelike_coercion(self): + + # GH 9477 + # incorrectly inferring on dateimelike looking when object dtype is + # specified + s = Series([Timestamp("20130101"), "NOV"], dtype=object) + assert s.iloc[0] == Timestamp("20130101") + assert s.iloc[1] == "NOV" + assert s.dtype == object + + def test_constructor_datelike_coercion2(self): + # the dtype was being reset on the slicing and re-inferred to datetime + # even thought the blocks are mixed + belly = "216 3T19".split() + wing1 = "2T15 4H19".split() + wing2 = "416 4T20".split() + mat = pd.to_datetime("2016-01-22 2019-09-07".split()) + df = DataFrame({"wing1": wing1, "wing2": wing2, "mat": mat}, index=belly) + + result = df.loc["3T19"] + assert result.dtype == object + result = df.loc["216"] + assert result.dtype == object + + def test_constructor_mixed_int_and_timestamp(self, frame_or_series): + # specifically Timestamp with nanos, not datetimes + objs = [Timestamp(9), 10, NaT.value] + result = frame_or_series(objs, dtype="M8[ns]") + + expected = frame_or_series([Timestamp(9), Timestamp(10), NaT]) + tm.assert_equal(result, expected) + + def test_constructor_datetimes_with_nulls(self): + # gh-15869 + for arr in [ + np.array([None, None, None, None, datetime.now(), None]), + np.array([None, None, datetime.now(), None]), + ]: + result = Series(arr) + assert result.dtype == "M8[ns]" + + def test_constructor_dtype_datetime64(self): + + s = Series(iNaT, dtype="M8[ns]", index=range(5)) + assert isna(s).all() + + # in theory this should be all nulls, but since + # we are not specifying a dtype is ambiguous + s = Series(iNaT, index=range(5)) + assert not isna(s).all() + + s = Series(np.nan, dtype="M8[ns]", index=range(5)) + assert isna(s).all() + + s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype="M8[ns]") + assert isna(s[1]) + assert s.dtype == "M8[ns]" + + s = Series([datetime(2001, 1, 2, 0, 0), np.nan], dtype="M8[ns]") + assert isna(s[1]) + assert s.dtype == "M8[ns]" + + def test_constructor_dtype_datetime64_10(self): + # GH3416 + dates = [ + np.datetime64(datetime(2013, 1, 1)), + np.datetime64(datetime(2013, 1, 2)), + np.datetime64(datetime(2013, 1, 3)), + ] + + s = Series(dates) + assert s.dtype == "M8[ns]" + + s.iloc[0] = np.nan + assert s.dtype == "M8[ns]" + + # GH3414 related + expected = Series( + [datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3)], + dtype="datetime64[ns]", + ) + + result = Series(Series(dates).view(np.int64) / 1000000, dtype="M8[ms]") + tm.assert_series_equal(result, expected) + + result = Series(dates, dtype="datetime64[ns]") + tm.assert_series_equal(result, expected) + + expected = Series( + [NaT, datetime(2013, 1, 2), datetime(2013, 1, 3)], dtype="datetime64[ns]" + ) + result = Series([np.nan] + dates[1:], dtype="datetime64[ns]") + tm.assert_series_equal(result, expected) + + dts = Series(dates, dtype="datetime64[ns]") + + # valid astype + dts.astype("int64") + + # invalid casting + msg = r"cannot astype a datetimelike from \[datetime64\[ns\]\] to \[int32\]" + with pytest.raises(TypeError, match=msg): + dts.astype("int32") + + # ints are ok + # we test with np.int64 to get similar results on + # windows / 32-bit platforms + result = Series(dts, dtype=np.int64) + expected = Series(dts.astype(np.int64)) + tm.assert_series_equal(result, expected) + + def test_constructor_dtype_datetime64_9(self): + # invalid dates can be help as object + result = Series([datetime(2, 1, 1)]) + assert result[0] == datetime(2, 1, 1, 0, 0) + + result = Series([datetime(3000, 1, 1)]) + assert result[0] == datetime(3000, 1, 1, 0, 0) + + def test_constructor_dtype_datetime64_8(self): + # don't mix types + result = Series([Timestamp("20130101"), 1], index=["a", "b"]) + assert result["a"] == Timestamp("20130101") + assert result["b"] == 1 + + def test_constructor_dtype_datetime64_7(self): + # GH6529 + # coerce datetime64 non-ns properly + dates = date_range("01-Jan-2015", "01-Dec-2015", freq="M") + values2 = dates.view(np.ndarray).astype("datetime64[ns]") + expected = Series(values2, index=dates) + + for dtype in ["s", "D", "ms", "us", "ns"]: + values1 = dates.view(np.ndarray).astype(f"M8[{dtype}]") + result = Series(values1, dates) + tm.assert_series_equal(result, expected) + + # GH 13876 + # coerce to non-ns to object properly + expected = Series(values2, index=dates, dtype=object) + for dtype in ["s", "D", "ms", "us", "ns"]: + values1 = dates.view(np.ndarray).astype(f"M8[{dtype}]") + result = Series(values1, index=dates, dtype=object) + tm.assert_series_equal(result, expected) + + # leave datetime.date alone + dates2 = np.array([d.date() for d in dates.to_pydatetime()], dtype=object) + series1 = Series(dates2, dates) + tm.assert_numpy_array_equal(series1.values, dates2) + assert series1.dtype == object + + def test_constructor_dtype_datetime64_6(self): + # these will correctly infer a datetime + msg = "containing strings is deprecated" + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series([None, NaT, "2013-08-05 15:30:00.000001"]) + assert ser.dtype == "datetime64[ns]" + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series([np.nan, NaT, "2013-08-05 15:30:00.000001"]) + assert ser.dtype == "datetime64[ns]" + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series([NaT, None, "2013-08-05 15:30:00.000001"]) + assert ser.dtype == "datetime64[ns]" + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series([NaT, np.nan, "2013-08-05 15:30:00.000001"]) + assert ser.dtype == "datetime64[ns]" + + def test_constructor_dtype_datetime64_5(self): + # tz-aware (UTC and other tz's) + # GH 8411 + dr = date_range("20130101", periods=3) + assert Series(dr).iloc[0].tz is None + dr = date_range("20130101", periods=3, tz="UTC") + assert str(Series(dr).iloc[0].tz) == "UTC" + dr = date_range("20130101", periods=3, tz="US/Eastern") + assert str(Series(dr).iloc[0].tz) == "US/Eastern" + + def test_constructor_dtype_datetime64_4(self): + # non-convertible + s = Series([1479596223000, -1479590, NaT]) + assert s.dtype == "object" + assert s[2] is NaT + assert "NaT" in str(s) + + def test_constructor_dtype_datetime64_3(self): + # if we passed a NaT it remains + s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), NaT]) + assert s.dtype == "object" + assert s[2] is NaT + assert "NaT" in str(s) + + def test_constructor_dtype_datetime64_2(self): + # if we passed a nan it remains + s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan]) + assert s.dtype == "object" + assert s[2] is np.nan + assert "NaN" in str(s) + + def test_constructor_with_datetime_tz(self): + + # 8260 + # support datetime64 with tz + + dr = date_range("20130101", periods=3, tz="US/Eastern") + s = Series(dr) + assert s.dtype.name == "datetime64[ns, US/Eastern]" + assert s.dtype == "datetime64[ns, US/Eastern]" + assert is_datetime64tz_dtype(s.dtype) + assert "datetime64[ns, US/Eastern]" in str(s) + + # export + result = s.values + assert isinstance(result, np.ndarray) + assert result.dtype == "datetime64[ns]" + + exp = DatetimeIndex(result) + exp = exp.tz_localize("UTC").tz_convert(tz=s.dt.tz) + tm.assert_index_equal(dr, exp) + + # indexing + result = s.iloc[0] + assert result == Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern") + result = s[0] + assert result == Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern") + + result = s[Series([True, True, False], index=s.index)] + tm.assert_series_equal(result, s[0:2]) + + result = s.iloc[0:1] + tm.assert_series_equal(result, Series(dr[0:1])) + + # concat + result = pd.concat([s.iloc[0:1], s.iloc[1:]]) + tm.assert_series_equal(result, s) + + # short str + assert "datetime64[ns, US/Eastern]" in str(s) + + # formatting with NaT + result = s.shift() + assert "datetime64[ns, US/Eastern]" in str(result) + assert "NaT" in str(result) + + # long str + t = Series(date_range("20130101", periods=1000, tz="US/Eastern")) + assert "datetime64[ns, US/Eastern]" in str(t) + + result = DatetimeIndex(s, freq="infer") + tm.assert_index_equal(result, dr) + + def test_constructor_with_datetime_tz4(self): + # inference + s = Series( + [ + Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"), + Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific"), + ] + ) + assert s.dtype == "datetime64[ns, US/Pacific]" + assert lib.infer_dtype(s, skipna=True) == "datetime64" + + def test_constructor_with_datetime_tz3(self): + s = Series( + [ + Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"), + Timestamp("2013-01-02 14:00:00-0800", tz="US/Eastern"), + ] + ) + assert s.dtype == "object" + assert lib.infer_dtype(s, skipna=True) == "datetime" + + def test_constructor_with_datetime_tz2(self): + # with all NaT + s = Series(NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]") + expected = Series(DatetimeIndex(["NaT", "NaT"], tz="US/Eastern")) + tm.assert_series_equal(s, expected) + + def test_constructor_no_partial_datetime_casting(self): + # GH#40111 + vals = [ + "nan", + Timestamp("1990-01-01"), + "2015-03-14T16:15:14.123-08:00", + "2019-03-04T21:56:32.620-07:00", + None, + ] + ser = Series(vals) + assert all(ser[i] is vals[i] for i in range(len(vals))) + + @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64]) + @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) + def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit): + # tests all units + # gh-19223 + dtype = f"{dtype}[{unit}]" + arr = np.array([1, 2, 3], dtype=arr_dtype) + s = Series(arr) + result = s.astype(dtype) + expected = Series(arr.astype(dtype)) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("arg", ["2013-01-01 00:00:00", NaT, np.nan, None]) + def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg): + # GH 17415: With naive string + result = Series([arg], dtype="datetime64[ns, CET]") + expected = Series(Timestamp(arg)).dt.tz_localize("CET") + tm.assert_series_equal(result, expected) + + def test_constructor_datetime64_bigendian(self): + # GH#30976 + ms = np.datetime64(1, "ms") + arr = np.array([np.datetime64(1, "ms")], dtype=">M8[ms]") + + result = Series(arr) + expected = Series([Timestamp(ms)]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("interval_constructor", [IntervalIndex, IntervalArray]) + def test_construction_interval(self, interval_constructor): + # construction from interval & array of intervals + intervals = interval_constructor.from_breaks(np.arange(3), closed="right") + result = Series(intervals) + assert result.dtype == "interval[int64, right]" + tm.assert_index_equal(Index(result.values), Index(intervals)) + + @pytest.mark.parametrize( + "data_constructor", [list, np.array], ids=["list", "ndarray[object]"] + ) + def test_constructor_infer_interval(self, data_constructor): + # GH 23563: consistent closed results in interval dtype + data = [Interval(0, 1), Interval(0, 2), None] + result = Series(data_constructor(data)) + expected = Series(IntervalArray(data)) + assert result.dtype == "interval[float64, right]" + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data_constructor", [list, np.array], ids=["list", "ndarray[object]"] + ) + def test_constructor_interval_mixed_closed(self, data_constructor): + # GH 23563: mixed closed results in object dtype (not interval dtype) + data = [Interval(0, 1, closed="both"), Interval(0, 2, closed="neither")] + result = Series(data_constructor(data)) + assert result.dtype == object + assert result.tolist() == data + + def test_construction_consistency(self): + + # make sure that we are not re-localizing upon construction + # GH 14928 + ser = Series(date_range("20130101", periods=3, tz="US/Eastern")) + + result = Series(ser, dtype=ser.dtype) + tm.assert_series_equal(result, ser) + + result = Series(ser.dt.tz_convert("UTC"), dtype=ser.dtype) + tm.assert_series_equal(result, ser) + + msg = "will interpret the data as wall-times" + with tm.assert_produces_warning(FutureWarning, match=msg): + # deprecate behavior inconsistent with DatetimeIndex GH#33401 + result = Series(ser.values, dtype=ser.dtype) + tm.assert_series_equal(result, ser) + + with tm.assert_produces_warning(None): + # one suggested alternative to the deprecated usage + middle = Series(ser.values).dt.tz_localize("UTC") + result = middle.dt.tz_convert(ser.dtype.tz) + tm.assert_series_equal(result, ser) + + with tm.assert_produces_warning(None): + # the other suggested alternative to the deprecated usage + result = Series(ser.values.view("int64"), dtype=ser.dtype) + tm.assert_series_equal(result, ser) + + @pytest.mark.parametrize( + "data_constructor", [list, np.array], ids=["list", "ndarray[object]"] + ) + def test_constructor_infer_period(self, data_constructor): + data = [Period("2000", "D"), Period("2001", "D"), None] + result = Series(data_constructor(data)) + expected = Series(period_array(data)) + tm.assert_series_equal(result, expected) + assert result.dtype == "Period[D]" + + @pytest.mark.xfail(reason="PeriodDtype Series not supported yet") + def test_construct_from_ints_including_iNaT_scalar_period_dtype(self): + series = Series([0, 1000, 2000, pd._libs.iNaT], dtype="period[D]") + + val = series[3] + assert isna(val) + + series[2] = val + assert isna(series[2]) + + def test_constructor_period_incompatible_frequency(self): + data = [Period("2000", "D"), Period("2001", "A")] + result = Series(data) + assert result.dtype == object + assert result.tolist() == data + + def test_constructor_periodindex(self): + # GH7932 + # converting a PeriodIndex when put in a Series + + pi = period_range("20130101", periods=5, freq="D") + s = Series(pi) + assert s.dtype == "Period[D]" + expected = Series(pi.astype(object)) + tm.assert_series_equal(s, expected) + + def test_constructor_dict(self): + d = {"a": 0.0, "b": 1.0, "c": 2.0} + + result = Series(d) + expected = Series(d, index=sorted(d.keys())) + tm.assert_series_equal(result, expected) + + result = Series(d, index=["b", "c", "d", "a"]) + expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"]) + tm.assert_series_equal(result, expected) + + pidx = tm.makePeriodIndex(100) + d = {pidx[0]: 0, pidx[1]: 1} + result = Series(d, index=pidx) + expected = Series(np.nan, pidx, dtype=np.float64) + expected.iloc[0] = 0 + expected.iloc[1] = 1 + tm.assert_series_equal(result, expected) + + def test_constructor_dict_list_value_explicit_dtype(self): + # GH 18625 + d = {"a": [[2], [3], [4]]} + result = Series(d, index=["a"], dtype="object") + expected = Series(d, index=["a"]) + tm.assert_series_equal(result, expected) + + def test_constructor_dict_order(self): + # GH19018 + # initialization ordering: by insertion order if python>= 3.6, else + # order by value + d = {"b": 1, "a": 0, "c": 2} + result = Series(d) + expected = Series([1, 0, 2], index=list("bac")) + tm.assert_series_equal(result, expected) + + def test_constructor_dict_extension(self, ea_scalar_and_dtype): + ea_scalar, ea_dtype = ea_scalar_and_dtype + d = {"a": ea_scalar} + result = Series(d, index=["a"]) + expected = Series(ea_scalar, index=["a"], dtype=ea_dtype) + + assert result.dtype == ea_dtype + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("value", [2, np.nan, None, float("nan")]) + def test_constructor_dict_nan_key(self, value): + # GH 18480 + d = {1: "a", value: "b", float("nan"): "c", 4: "d"} + result = Series(d).sort_values() + expected = Series(["a", "b", "c", "d"], index=[1, value, np.nan, 4]) + tm.assert_series_equal(result, expected) + + # MultiIndex: + d = {(1, 1): "a", (2, np.nan): "b", (3, value): "c"} + result = Series(d).sort_values() + expected = Series( + ["a", "b", "c"], index=Index([(1, 1), (2, np.nan), (3, value)]) + ) + tm.assert_series_equal(result, expected) + + def test_constructor_dict_datetime64_index(self): + # GH 9456 + + dates_as_str = ["1984-02-19", "1988-11-06", "1989-12-03", "1990-03-15"] + values = [42544017.198965244, 1234565, 40512335.181958228, -1] + + def create_data(constructor): + return dict(zip((constructor(x) for x in dates_as_str), values)) + + data_datetime64 = create_data(np.datetime64) + data_datetime = create_data(lambda x: datetime.strptime(x, "%Y-%m-%d")) + data_Timestamp = create_data(Timestamp) + + expected = Series(values, (Timestamp(x) for x in dates_as_str)) + + result_datetime64 = Series(data_datetime64) + result_datetime = Series(data_datetime) + result_Timestamp = Series(data_Timestamp) + + tm.assert_series_equal(result_datetime64, expected) + tm.assert_series_equal(result_datetime, expected) + tm.assert_series_equal(result_Timestamp, expected) + + def test_constructor_dict_tuple_indexer(self): + # GH 12948 + data = {(1, 1, None): -1.0} + result = Series(data) + expected = Series( + -1.0, index=MultiIndex(levels=[[1], [1], [np.nan]], codes=[[0], [0], [-1]]) + ) + tm.assert_series_equal(result, expected) + + def test_constructor_mapping(self, non_dict_mapping_subclass): + # GH 29788 + ndm = non_dict_mapping_subclass({3: "three"}) + result = Series(ndm) + expected = Series(["three"], index=[3]) + + tm.assert_series_equal(result, expected) + + def test_constructor_list_of_tuples(self): + data = [(1, 1), (2, 2), (2, 3)] + s = Series(data) + assert list(s) == data + + def test_constructor_tuple_of_tuples(self): + data = ((1, 1), (2, 2), (2, 3)) + s = Series(data) + assert tuple(s) == data + + def test_constructor_dict_of_tuples(self): + data = {(1, 2): 3, (None, 5): 6} + result = Series(data).sort_values() + expected = Series([3, 6], index=MultiIndex.from_tuples([(1, 2), (None, 5)])) + tm.assert_series_equal(result, expected) + + # https://github.com/pandas-dev/pandas/issues/22698 + @pytest.mark.filterwarnings("ignore:elementwise comparison:FutureWarning") + def test_fromDict(self): + data = {"a": 0, "b": 1, "c": 2, "d": 3} + + series = Series(data) + tm.assert_is_sorted(series.index) + + data = {"a": 0, "b": "1", "c": "2", "d": datetime.now()} + series = Series(data) + assert series.dtype == np.object_ + + data = {"a": 0, "b": "1", "c": "2", "d": "3"} + series = Series(data) + assert series.dtype == np.object_ + + data = {"a": "0", "b": "1"} + series = Series(data, dtype=float) + assert series.dtype == np.float64 + + def test_fromValue(self, datetime_series): + + nans = Series(np.NaN, index=datetime_series.index, dtype=np.float64) + assert nans.dtype == np.float_ + assert len(nans) == len(datetime_series) + + strings = Series("foo", index=datetime_series.index) + assert strings.dtype == np.object_ + assert len(strings) == len(datetime_series) + + d = datetime.now() + dates = Series(d, index=datetime_series.index) + assert dates.dtype == "M8[ns]" + assert len(dates) == len(datetime_series) + + # GH12336 + # Test construction of categorical series from value + categorical = Series(0, index=datetime_series.index, dtype="category") + expected = Series(0, index=datetime_series.index).astype("category") + assert categorical.dtype == "category" + assert len(categorical) == len(datetime_series) + tm.assert_series_equal(categorical, expected) + + def test_constructor_dtype_timedelta64(self): + + # basic + td = Series([timedelta(days=i) for i in range(3)]) + assert td.dtype == "timedelta64[ns]" + + td = Series([timedelta(days=1)]) + assert td.dtype == "timedelta64[ns]" + + td = Series([timedelta(days=1), timedelta(days=2), np.timedelta64(1, "s")]) + + assert td.dtype == "timedelta64[ns]" + + # mixed with NaT + td = Series([timedelta(days=1), NaT], dtype="m8[ns]") + assert td.dtype == "timedelta64[ns]" + + td = Series([timedelta(days=1), np.nan], dtype="m8[ns]") + assert td.dtype == "timedelta64[ns]" + + td = Series([np.timedelta64(300000000), NaT], dtype="m8[ns]") + assert td.dtype == "timedelta64[ns]" + + # improved inference + # GH5689 + td = Series([np.timedelta64(300000000), NaT]) + assert td.dtype == "timedelta64[ns]" + + # because iNaT is int, not coerced to timedelta + td = Series([np.timedelta64(300000000), iNaT]) + assert td.dtype == "object" + + td = Series([np.timedelta64(300000000), np.nan]) + assert td.dtype == "timedelta64[ns]" + + td = Series([NaT, np.timedelta64(300000000)]) + assert td.dtype == "timedelta64[ns]" + + td = Series([np.timedelta64(1, "s")]) + assert td.dtype == "timedelta64[ns]" + + # valid astype + td.astype("int64") + + # invalid casting + msg = r"cannot astype a datetimelike from \[timedelta64\[ns\]\] to \[int32\]" + with pytest.raises(TypeError, match=msg): + td.astype("int32") + + # this is an invalid casting + msg = "|".join( + [ + "Could not convert object to NumPy timedelta", + "Could not convert 'foo' to NumPy timedelta", + ] + ) + with pytest.raises(ValueError, match=msg): + Series([timedelta(days=1), "foo"], dtype="m8[ns]") + + # leave as object here + td = Series([timedelta(days=i) for i in range(3)] + ["foo"]) + assert td.dtype == "object" + + # these will correctly infer a timedelta + msg = "containing strings is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series([None, NaT, "1 Day"]) + assert ser.dtype == "timedelta64[ns]" + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series([np.nan, NaT, "1 Day"]) + assert ser.dtype == "timedelta64[ns]" + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series([NaT, None, "1 Day"]) + assert ser.dtype == "timedelta64[ns]" + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series([NaT, np.nan, "1 Day"]) + assert ser.dtype == "timedelta64[ns]" + + # GH 16406 + def test_constructor_mixed_tz(self): + s = Series([Timestamp("20130101"), Timestamp("20130101", tz="US/Eastern")]) + expected = Series( + [Timestamp("20130101"), Timestamp("20130101", tz="US/Eastern")], + dtype="object", + ) + tm.assert_series_equal(s, expected) + + def test_NaT_scalar(self): + series = Series([0, 1000, 2000, iNaT], dtype="M8[ns]") + + val = series[3] + assert isna(val) + + series[2] = val + assert isna(series[2]) + + def test_NaT_cast(self): + # GH10747 + result = Series([np.nan]).astype("M8[ns]") + expected = Series([NaT]) + tm.assert_series_equal(result, expected) + + def test_constructor_name_hashable(self): + for n in [777, 777.0, "name", datetime(2001, 11, 11), (1,), "\u05D0"]: + for data in [[1, 2, 3], np.ones(3), {"a": 0, "b": 1}]: + s = Series(data, name=n) + assert s.name == n + + def test_constructor_name_unhashable(self): + msg = r"Series\.name must be a hashable type" + for n in [["name_list"], np.ones(2), {1: 2}]: + for data in [["name_list"], np.ones(2), {1: 2}]: + with pytest.raises(TypeError, match=msg): + Series(data, name=n) + + def test_auto_conversion(self): + series = Series(list(date_range("1/1/2000", periods=10))) + assert series.dtype == "M8[ns]" + + def test_convert_non_ns(self): + # convert from a numpy array of non-ns timedelta64 + arr = np.array([1, 2, 3], dtype="timedelta64[s]") + s = Series(arr) + expected = Series(timedelta_range("00:00:01", periods=3, freq="s")) + tm.assert_series_equal(s, expected) + + # convert from a numpy array of non-ns datetime64 + arr = np.array( + ["2013-01-01", "2013-01-02", "2013-01-03"], dtype="datetime64[D]" + ) + ser = Series(arr) + expected = Series(date_range("20130101", periods=3, freq="D")) + tm.assert_series_equal(ser, expected) + + arr = np.array( + ["2013-01-01 00:00:01", "2013-01-01 00:00:02", "2013-01-01 00:00:03"], + dtype="datetime64[s]", + ) + ser = Series(arr) + expected = Series(date_range("20130101 00:00:01", periods=3, freq="s")) + tm.assert_series_equal(ser, expected) + + @pytest.mark.parametrize( + "index", + [ + date_range("1/1/2000", periods=10), + timedelta_range("1 day", periods=10), + period_range("2000-Q1", periods=10, freq="Q"), + ], + ids=lambda x: type(x).__name__, + ) + def test_constructor_cant_cast_datetimelike(self, index): + + # floats are not ok + # strip Index to convert PeriodIndex -> Period + # We don't care whether the error message says + # PeriodIndex or PeriodArray + msg = f"Cannot cast {type(index).__name__.rstrip('Index')}.*? to " + + with pytest.raises(TypeError, match=msg): + Series(index, dtype=float) + + # ints are ok + # we test with np.int64 to get similar results on + # windows / 32-bit platforms + result = Series(index, dtype=np.int64) + expected = Series(index.astype(np.int64)) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "index", + [ + date_range("1/1/2000", periods=10), + timedelta_range("1 day", periods=10), + period_range("2000-Q1", periods=10, freq="Q"), + ], + ids=lambda x: type(x).__name__, + ) + def test_constructor_cast_object(self, index): + s = Series(index, dtype=object) + exp = Series(index).astype(object) + tm.assert_series_equal(s, exp) + + s = Series(Index(index, dtype=object), dtype=object) + exp = Series(index).astype(object) + tm.assert_series_equal(s, exp) + + s = Series(index.astype(object), dtype=object) + exp = Series(index).astype(object) + tm.assert_series_equal(s, exp) + + @pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64]) + def test_constructor_generic_timestamp_no_frequency(self, dtype, request): + # see gh-15524, gh-15987 + msg = "dtype has no unit. Please pass in" + + if np.dtype(dtype).name not in ["timedelta64", "datetime64"]: + mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit") + request.node.add_marker(mark) + + with pytest.raises(ValueError, match=msg): + Series([], dtype=dtype) + + @pytest.mark.parametrize( + "dtype,msg", + [ + ("m8[ps]", "cannot convert timedeltalike"), + ("M8[ps]", "cannot convert datetimelike"), + ], + ) + def test_constructor_generic_timestamp_bad_frequency(self, dtype, msg): + # see gh-15524, gh-15987 + + with pytest.raises(TypeError, match=msg): + Series([], dtype=dtype) + + @pytest.mark.parametrize("dtype", [None, "uint8", "category"]) + def test_constructor_range_dtype(self, dtype): + # GH 16804 + expected = Series([0, 1, 2, 3, 4], dtype=dtype or "int64") + result = Series(range(5), dtype=dtype) + tm.assert_series_equal(result, expected) + + def test_constructor_range_overflows(self): + # GH#30173 range objects that overflow int64 + rng = range(2 ** 63, 2 ** 63 + 4) + ser = Series(rng) + expected = Series(list(rng)) + tm.assert_series_equal(ser, expected) + assert list(ser) == list(rng) + assert ser.dtype == np.uint64 + + rng2 = range(2 ** 63 + 4, 2 ** 63, -1) + ser2 = Series(rng2) + expected2 = Series(list(rng2)) + tm.assert_series_equal(ser2, expected2) + assert list(ser2) == list(rng2) + assert ser2.dtype == np.uint64 + + rng3 = range(-(2 ** 63), -(2 ** 63) - 4, -1) + ser3 = Series(rng3) + expected3 = Series(list(rng3)) + tm.assert_series_equal(ser3, expected3) + assert list(ser3) == list(rng3) + assert ser3.dtype == object + + rng4 = range(2 ** 73, 2 ** 73 + 4) + ser4 = Series(rng4) + expected4 = Series(list(rng4)) + tm.assert_series_equal(ser4, expected4) + assert list(ser4) == list(rng4) + assert ser4.dtype == object + + def test_constructor_tz_mixed_data(self): + # GH 13051 + dt_list = [ + Timestamp("2016-05-01 02:03:37"), + Timestamp("2016-04-30 19:03:37-0700", tz="US/Pacific"), + ] + result = Series(dt_list) + expected = Series(dt_list, dtype=object) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("pydt", [True, False]) + def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture, pydt): + # GH#25843, GH#41555, GH#33401 + tz = tz_aware_fixture + ts = Timestamp("2019", tz=tz) + if pydt: + ts = ts.to_pydatetime() + ts_naive = Timestamp("2019") + + with tm.assert_produces_warning(FutureWarning): + result = Series([ts], dtype="datetime64[ns]") + expected = Series([ts_naive]) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = Series(np.array([ts], dtype=object), dtype="datetime64[ns]") + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = Series({0: ts}, dtype="datetime64[ns]") + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = Series(ts, index=[0], dtype="datetime64[ns]") + tm.assert_series_equal(result, expected) + + def test_constructor_datetime64(self): + rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") + dates = np.asarray(rng) + + series = Series(dates) + assert np.issubdtype(series.dtype, np.dtype("M8[ns]")) + + def test_constructor_datetimelike_scalar_to_string_dtype( + self, nullable_string_dtype + ): + # https://github.com/pandas-dev/pandas/pull/33846 + result = Series("M", index=[1, 2, 3], dtype=nullable_string_dtype) + expected = Series(["M", "M", "M"], index=[1, 2, 3], dtype=nullable_string_dtype) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "values", + [ + [np.datetime64("2012-01-01"), np.datetime64("2013-01-01")], + ["2012-01-01", "2013-01-01"], + ], + ) + def test_constructor_sparse_datetime64(self, values): + # https://github.com/pandas-dev/pandas/issues/35762 + dtype = pd.SparseDtype("datetime64[ns]") + result = Series(values, dtype=dtype) + arr = pd.arrays.SparseArray(values, dtype=dtype) + expected = Series(arr) + tm.assert_series_equal(result, expected) + + def test_construction_from_ordered_collection(self): + # https://github.com/pandas-dev/pandas/issues/36044 + result = Series({"a": 1, "b": 2}.keys()) + expected = Series(["a", "b"]) + tm.assert_series_equal(result, expected) + + result = Series({"a": 1, "b": 2}.values()) + expected = Series([1, 2]) + tm.assert_series_equal(result, expected) + + def test_construction_from_large_int_scalar_no_overflow(self): + # https://github.com/pandas-dev/pandas/issues/36291 + n = 1_000_000_000_000_000_000_000 + result = Series(n, index=[0]) + expected = Series(n) + tm.assert_series_equal(result, expected) + + def test_constructor_list_of_periods_infers_period_dtype(self): + series = Series(list(period_range("2000-01-01", periods=10, freq="D"))) + assert series.dtype == "Period[D]" + + series = Series( + [Period("2011-01-01", freq="D"), Period("2011-02-01", freq="D")] + ) + assert series.dtype == "Period[D]" + + def test_constructor_subclass_dict(self, dict_subclass): + data = dict_subclass((x, 10.0 * x) for x in range(10)) + series = Series(data) + expected = Series(dict(data.items())) + tm.assert_series_equal(series, expected) + + def test_constructor_ordereddict(self): + # GH3283 + data = OrderedDict((f"col{i}", np.random.random()) for i in range(12)) + + series = Series(data) + expected = Series(list(data.values()), list(data.keys())) + tm.assert_series_equal(series, expected) + + # Test with subclass + class A(OrderedDict): + pass + + series = Series(A(data)) + tm.assert_series_equal(series, expected) + + def test_constructor_dict_multiindex(self): + d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0} + _d = sorted(d.items()) + result = Series(d) + expected = Series( + [x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d]) + ) + tm.assert_series_equal(result, expected) + + d["z"] = 111.0 + _d.insert(0, ("z", d["z"])) + result = Series(d) + expected = Series( + [x[1] for x in _d], index=Index([x[0] for x in _d], tupleize_cols=False) + ) + result = result.reindex(index=expected.index) + tm.assert_series_equal(result, expected) + + def test_constructor_dict_multiindex_reindex_flat(self): + # construction involves reindexing with a MultiIndex corner case + data = {("i", "i"): 0, ("i", "j"): 1, ("j", "i"): 2, "j": np.nan} + expected = Series(data) + + result = Series(expected[:-1].to_dict(), index=expected.index) + tm.assert_series_equal(result, expected) + + def test_constructor_dict_timedelta_index(self): + # GH #12169 : Resample category data with timedelta index + # construct Series from dict as data and TimedeltaIndex as index + # will result NaN in result Series data + expected = Series( + data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], unit="s") + ) + + result = Series( + data={ + pd.to_timedelta(0, unit="s"): "A", + pd.to_timedelta(10, unit="s"): "B", + pd.to_timedelta(20, unit="s"): "C", + }, + index=pd.to_timedelta([0, 10, 20], unit="s"), + ) + tm.assert_series_equal(result, expected) + + def test_constructor_infer_index_tz(self): + values = [188.5, 328.25] + tzinfo = tzoffset(None, 7200) + index = [ + datetime(2012, 5, 11, 11, tzinfo=tzinfo), + datetime(2012, 5, 11, 12, tzinfo=tzinfo), + ] + series = Series(data=values, index=index) + + assert series.index.tz == tzinfo + + # it works! GH#2443 + repr(series.index[0]) + + def test_constructor_with_pandas_dtype(self): + # going through 2D->1D path + vals = [(1,), (2,), (3,)] + ser = Series(vals) + dtype = ser.array.dtype # PandasDtype + ser2 = Series(vals, dtype=dtype) + tm.assert_series_equal(ser, ser2) + + def test_constructor_int_dtype_missing_values(self): + # GH#43017 + result = Series(index=[0], dtype="int64") + expected = Series(np.nan, index=[0], dtype="float64") + tm.assert_series_equal(result, expected) + + def test_constructor_bool_dtype_missing_values(self): + # GH#43018 + result = Series(index=[0], dtype="bool") + expected = Series(True, index=[0], dtype="bool") + tm.assert_series_equal(result, expected) + + @pytest.mark.filterwarnings( + "ignore:elementwise comparison failed:DeprecationWarning" + ) + @pytest.mark.xfail( + np_version_under1p20, reason="np.array([td64nat, float, float]) raises" + ) + @pytest.mark.parametrize("func", [Series, DataFrame, Index, pd.array]) + def test_constructor_mismatched_null_nullable_dtype( + self, func, any_numeric_ea_dtype + ): + # GH#44514 + msg = "|".join( + [ + "cannot safely cast non-equivalent object", + r"int\(\) argument must be a string, a bytes-like object " + "or a (real )?number", + r"Cannot cast array data from dtype\('O'\) to dtype\('float64'\) " + "according to the rule 'safe'", + "object cannot be converted to a FloatingDtype", + "'values' contains non-numeric NA", + ] + ) + + for null in tm.NP_NAT_OBJECTS + [NaT]: + with pytest.raises(TypeError, match=msg): + func([null, 1.0, 3.0], dtype=any_numeric_ea_dtype) + + +class TestSeriesConstructorIndexCoercion: + def test_series_constructor_datetimelike_index_coercion(self): + idx = tm.makeDateIndex(10000) + ser = Series(np.random.randn(len(idx)), idx.astype(object)) + with tm.assert_produces_warning(FutureWarning): + assert ser.index.is_all_dates + assert isinstance(ser.index, DatetimeIndex) + + def test_series_constructor_infer_multiindex(self): + index_lists = [["a", "a", "b", "b"], ["x", "y", "x", "y"]] + + multi = Series(1.0, index=[np.array(x) for x in index_lists]) + assert isinstance(multi.index, MultiIndex) + + multi = Series(1.0, index=index_lists) + assert isinstance(multi.index, MultiIndex) + + multi = Series(range(4), index=index_lists) + assert isinstance(multi.index, MultiIndex) + + +class TestSeriesConstructorInternals: + def test_constructor_no_pandas_array(self, using_array_manager): + ser = Series([1, 2, 3]) + result = Series(ser.array) + tm.assert_series_equal(ser, result) + if not using_array_manager: + assert isinstance(result._mgr.blocks[0], NumericBlock) + + @td.skip_array_manager_invalid_test + def test_from_array(self): + result = Series(pd.array(["1H", "2H"], dtype="timedelta64[ns]")) + assert result._mgr.blocks[0].is_extension is False + + result = Series(pd.array(["2015"], dtype="datetime64[ns]")) + assert result._mgr.blocks[0].is_extension is False + + @td.skip_array_manager_invalid_test + def test_from_list_dtype(self): + result = Series(["1H", "2H"], dtype="timedelta64[ns]") + assert result._mgr.blocks[0].is_extension is False + + result = Series(["2015"], dtype="datetime64[ns]") + assert result._mgr.blocks[0].is_extension is False + + +def test_constructor(rand_series_with_duplicate_datetimeindex): + dups = rand_series_with_duplicate_datetimeindex + assert isinstance(dups, Series) + assert isinstance(dups.index, DatetimeIndex) + + +@pytest.mark.parametrize( + "input_dict,expected", + [ + ({0: 0}, np.array([[0]], dtype=np.int64)), + ({"a": "a"}, np.array([["a"]], dtype=object)), + ({1: 1}, np.array([[1]], dtype=np.int64)), + ], +) +@pytest.mark.skipif(np_version_under1p19, reason="fails on numpy below 1.19") +def test_numpy_array(input_dict, expected): + result = np.array([Series(input_dict)]) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.skipif( + not np_version_under1p19, reason="check failure on numpy below 1.19" +) +def test_numpy_array_np_v1p19(): + with pytest.raises(KeyError, match="0"): + np.array([Series({1: 1})]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/test_cumulative.py b/.local/lib/python3.8/site-packages/pandas/tests/series/test_cumulative.py new file mode 100644 index 0000000..f970d88 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/test_cumulative.py @@ -0,0 +1,131 @@ +""" +Tests for Series cumulative operations. + +See also +-------- +tests.frame.test_cumulative +""" + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +methods = { + "cumsum": np.cumsum, + "cumprod": np.cumprod, + "cummin": np.minimum.accumulate, + "cummax": np.maximum.accumulate, +} + + +class TestSeriesCumulativeOps: + @pytest.mark.parametrize("func", [np.cumsum, np.cumprod]) + def test_datetime_series(self, datetime_series, func): + tm.assert_numpy_array_equal( + func(datetime_series).values, + func(np.array(datetime_series)), + check_dtype=True, + ) + + # with missing values + ts = datetime_series.copy() + ts[::2] = np.NaN + + result = func(ts)[1::2] + expected = func(np.array(ts.dropna())) + + tm.assert_numpy_array_equal(result.values, expected, check_dtype=False) + + @pytest.mark.parametrize("method", ["cummin", "cummax"]) + def test_cummin_cummax(self, datetime_series, method): + ufunc = methods[method] + + result = getattr(datetime_series, method)().values + expected = ufunc(np.array(datetime_series)) + + tm.assert_numpy_array_equal(result, expected) + ts = datetime_series.copy() + ts[::2] = np.NaN + result = getattr(ts, method)()[1::2] + expected = ufunc(ts.dropna()) + + result.index = result.index._with_freq(None) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "ts", + [ + pd.Timedelta(0), + pd.Timestamp("1999-12-31"), + pd.Timestamp("1999-12-31").tz_localize("US/Pacific"), + ], + ) + @pytest.mark.parametrize( + "method, skipna, exp_tdi", + [ + ["cummax", True, ["NaT", "2 days", "NaT", "2 days", "NaT", "3 days"]], + ["cummin", True, ["NaT", "2 days", "NaT", "1 days", "NaT", "1 days"]], + [ + "cummax", + False, + ["NaT", "2 days", "2 days", "2 days", "2 days", "3 days"], + ], + [ + "cummin", + False, + ["NaT", "2 days", "2 days", "1 days", "1 days", "1 days"], + ], + ], + ) + def test_cummin_cummax_datetimelike(self, ts, method, skipna, exp_tdi): + # with ts==pd.Timedelta(0), we are testing td64; with naive Timestamp + # we are testing datetime64[ns]; with Timestamp[US/Pacific] + # we are testing dt64tz + tdi = pd.to_timedelta(["NaT", "2 days", "NaT", "1 days", "NaT", "3 days"]) + ser = pd.Series(tdi + ts) + + exp_tdi = pd.to_timedelta(exp_tdi) + expected = pd.Series(exp_tdi + ts) + result = getattr(ser, method)(skipna=skipna) + tm.assert_series_equal(expected, result) + + @pytest.mark.parametrize( + "arg", + [ + [False, False, False, True, True, False, False], + [False, False, False, False, False, False, False], + ], + ) + @pytest.mark.parametrize( + "func", [lambda x: x, lambda x: ~x], ids=["identity", "inverse"] + ) + @pytest.mark.parametrize("method", methods.keys()) + def test_cummethods_bool(self, arg, func, method): + # GH#6270 + # checking Series method vs the ufunc applied to the values + + ser = func(pd.Series(arg)) + ufunc = methods[method] + + exp_vals = ufunc(ser.values) + expected = pd.Series(exp_vals) + + result = getattr(ser, method)() + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "method, expected", + [ + ["cumsum", pd.Series([0, 1, np.nan, 1], dtype=object)], + ["cumprod", pd.Series([False, 0, np.nan, 0])], + ["cummin", pd.Series([False, False, np.nan, False])], + ["cummax", pd.Series([False, True, np.nan, True])], + ], + ) + def test_cummethods_bool_in_object_dtype(self, method, expected): + ser = pd.Series([False, True, np.nan, False]) + result = getattr(ser, method)() + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/test_iteration.py b/.local/lib/python3.8/site-packages/pandas/tests/series/test_iteration.py new file mode 100644 index 0000000..dc9fe9e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/test_iteration.py @@ -0,0 +1,33 @@ +class TestIteration: + def test_keys(self, datetime_series): + assert datetime_series.keys() is datetime_series.index + + def test_iter_datetimes(self, datetime_series): + for i, val in enumerate(datetime_series): + assert val == datetime_series[i] + + def test_iter_strings(self, string_series): + for i, val in enumerate(string_series): + assert val == string_series[i] + + def test_iteritems_datetimes(self, datetime_series): + for idx, val in datetime_series.iteritems(): + assert val == datetime_series[idx] + + def test_iteritems_strings(self, string_series): + for idx, val in string_series.iteritems(): + assert val == string_series[idx] + + # assert is lazy (generators don't define reverse, lists do) + assert not hasattr(string_series.iteritems(), "reverse") + + def test_items_datetimes(self, datetime_series): + for idx, val in datetime_series.items(): + assert val == datetime_series[idx] + + def test_items_strings(self, string_series): + for idx, val in string_series.items(): + assert val == string_series[idx] + + # assert is lazy (generators don't define reverse, lists do) + assert not hasattr(string_series.items(), "reverse") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/test_logical_ops.py b/.local/lib/python3.8/site-packages/pandas/tests/series/test_logical_ops.py new file mode 100644 index 0000000..9648b01 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/test_logical_ops.py @@ -0,0 +1,487 @@ +from datetime import datetime +import operator + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + Series, + bdate_range, +) +import pandas._testing as tm +from pandas.core import ops + + +class TestSeriesLogicalOps: + @pytest.mark.parametrize("bool_op", [operator.and_, operator.or_, operator.xor]) + def test_bool_operators_with_nas(self, bool_op): + # boolean &, |, ^ should work with object arrays and propagate NAs + ser = Series(bdate_range("1/1/2000", periods=10), dtype=object) + ser[::2] = np.nan + + mask = ser.isna() + filled = ser.fillna(ser[0]) + + result = bool_op(ser < ser[9], ser > ser[3]) + + expected = bool_op(filled < filled[9], filled > filled[3]) + expected[mask] = False + tm.assert_series_equal(result, expected) + + def test_logical_operators_bool_dtype_with_empty(self): + # GH#9016: support bitwise op for integer types + index = list("bca") + + s_tft = Series([True, False, True], index=index) + s_fff = Series([False, False, False], index=index) + s_empty = Series([], dtype=object) + + res = s_tft & s_empty + expected = s_fff + tm.assert_series_equal(res, expected) + + res = s_tft | s_empty + expected = s_tft + tm.assert_series_equal(res, expected) + + def test_logical_operators_int_dtype_with_int_dtype(self): + # GH#9016: support bitwise op for integer types + + s_0123 = Series(range(4), dtype="int64") + s_3333 = Series([3] * 4) + s_4444 = Series([4] * 4) + + res = s_0123 & s_3333 + expected = Series(range(4), dtype="int64") + tm.assert_series_equal(res, expected) + + res = s_0123 | s_4444 + expected = Series(range(4, 8), dtype="int64") + tm.assert_series_equal(res, expected) + + s_1111 = Series([1] * 4, dtype="int8") + res = s_0123 & s_1111 + expected = Series([0, 1, 0, 1], dtype="int64") + tm.assert_series_equal(res, expected) + + res = s_0123.astype(np.int16) | s_1111.astype(np.int32) + expected = Series([1, 1, 3, 3], dtype="int32") + tm.assert_series_equal(res, expected) + + def test_logical_operators_int_dtype_with_int_scalar(self): + # GH#9016: support bitwise op for integer types + s_0123 = Series(range(4), dtype="int64") + + res = s_0123 & 0 + expected = Series([0] * 4) + tm.assert_series_equal(res, expected) + + res = s_0123 & 1 + expected = Series([0, 1, 0, 1]) + tm.assert_series_equal(res, expected) + + def test_logical_operators_int_dtype_with_float(self): + # GH#9016: support bitwise op for integer types + s_0123 = Series(range(4), dtype="int64") + + msg = "Cannot perform.+with a dtyped.+array and scalar of type" + with pytest.raises(TypeError, match=msg): + s_0123 & np.NaN + with pytest.raises(TypeError, match=msg): + s_0123 & 3.14 + msg = "unsupported operand type.+for &:" + with pytest.raises(TypeError, match=msg): + s_0123 & [0.1, 4, 3.14, 2] + with pytest.raises(TypeError, match=msg): + s_0123 & np.array([0.1, 4, 3.14, 2]) + with pytest.raises(TypeError, match=msg): + s_0123 & Series([0.1, 4, -3.14, 2]) + + def test_logical_operators_int_dtype_with_str(self): + s_1111 = Series([1] * 4, dtype="int8") + msg = "Cannot perform 'and_' with a dtyped.+array and scalar of type" + with pytest.raises(TypeError, match=msg): + s_1111 & "a" + with pytest.raises(TypeError, match="unsupported operand.+for &"): + s_1111 & ["a", "b", "c", "d"] + + def test_logical_operators_int_dtype_with_bool(self): + # GH#9016: support bitwise op for integer types + s_0123 = Series(range(4), dtype="int64") + + expected = Series([False] * 4) + + result = s_0123 & False + tm.assert_series_equal(result, expected) + + result = s_0123 & [False] + tm.assert_series_equal(result, expected) + + result = s_0123 & (False,) + tm.assert_series_equal(result, expected) + + result = s_0123 ^ False + expected = Series([False, True, True, True]) + tm.assert_series_equal(result, expected) + + def test_logical_operators_int_dtype_with_object(self): + # GH#9016: support bitwise op for integer types + s_0123 = Series(range(4), dtype="int64") + + result = s_0123 & Series([False, np.NaN, False, False]) + expected = Series([False] * 4) + tm.assert_series_equal(result, expected) + + s_abNd = Series(["a", "b", np.NaN, "d"]) + with pytest.raises(TypeError, match="unsupported.* 'int' and 'str'"): + s_0123 & s_abNd + + def test_logical_operators_bool_dtype_with_int(self): + index = list("bca") + + s_tft = Series([True, False, True], index=index) + s_fff = Series([False, False, False], index=index) + + res = s_tft & 0 + expected = s_fff + tm.assert_series_equal(res, expected) + + res = s_tft & 1 + expected = s_tft + tm.assert_series_equal(res, expected) + + def test_logical_ops_bool_dtype_with_ndarray(self): + # make sure we operate on ndarray the same as Series + left = Series([True, True, True, False, True]) + right = [True, False, None, True, np.nan] + + expected = Series([True, False, False, False, False]) + result = left & right + tm.assert_series_equal(result, expected) + result = left & np.array(right) + tm.assert_series_equal(result, expected) + result = left & Index(right) + tm.assert_series_equal(result, expected) + result = left & Series(right) + tm.assert_series_equal(result, expected) + + expected = Series([True, True, True, True, True]) + result = left | right + tm.assert_series_equal(result, expected) + result = left | np.array(right) + tm.assert_series_equal(result, expected) + result = left | Index(right) + tm.assert_series_equal(result, expected) + result = left | Series(right) + tm.assert_series_equal(result, expected) + + expected = Series([False, True, True, True, True]) + result = left ^ right + tm.assert_series_equal(result, expected) + result = left ^ np.array(right) + tm.assert_series_equal(result, expected) + result = left ^ Index(right) + tm.assert_series_equal(result, expected) + result = left ^ Series(right) + tm.assert_series_equal(result, expected) + + def test_logical_operators_int_dtype_with_bool_dtype_and_reindex(self): + # GH#9016: support bitwise op for integer types + + # with non-matching indexes, logical operators will cast to object + # before operating + index = list("bca") + + s_tft = Series([True, False, True], index=index) + s_tft = Series([True, False, True], index=index) + s_tff = Series([True, False, False], index=index) + + s_0123 = Series(range(4), dtype="int64") + + # s_0123 will be all false now because of reindexing like s_tft + expected = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"]) + result = s_tft & s_0123 + tm.assert_series_equal(result, expected) + + expected = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"]) + result = s_0123 & s_tft + tm.assert_series_equal(result, expected) + + s_a0b1c0 = Series([1], list("b")) + + res = s_tft & s_a0b1c0 + expected = s_tff.reindex(list("abc")) + tm.assert_series_equal(res, expected) + + res = s_tft | s_a0b1c0 + expected = s_tft.reindex(list("abc")) + tm.assert_series_equal(res, expected) + + def test_scalar_na_logical_ops_corners(self): + s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) + + msg = "Cannot perform.+with a dtyped.+array and scalar of type" + with pytest.raises(TypeError, match=msg): + s & datetime(2005, 1, 1) + + s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)]) + s[::2] = np.nan + + expected = Series(True, index=s.index) + expected[::2] = False + result = s & list(s) + tm.assert_series_equal(result, expected) + + def test_scalar_na_logical_ops_corners_aligns(self): + s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)]) + s[::2] = np.nan + d = DataFrame({"A": s}) + + expected = DataFrame(False, index=range(9), columns=["A"] + list(range(9))) + + result = s & d + tm.assert_frame_equal(result, expected) + + result = d & s + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("op", [operator.and_, operator.or_, operator.xor]) + def test_logical_ops_with_index(self, op): + # GH#22092, GH#19792 + ser = Series([True, True, False, False]) + idx1 = Index([True, False, True, False]) + idx2 = Index([1, 0, 1, 0]) + + expected = Series([op(ser[n], idx1[n]) for n in range(len(ser))]) + + result = op(ser, idx1) + tm.assert_series_equal(result, expected) + + expected = Series([op(ser[n], idx2[n]) for n in range(len(ser))], dtype=bool) + + result = op(ser, idx2) + tm.assert_series_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:passing object-dtype arraylike:FutureWarning") + def test_reversed_xor_with_index_returns_index(self): + # GH#22092, GH#19792 + ser = Series([True, True, False, False]) + idx1 = Index([True, False, True, False]) + idx2 = Index([1, 0, 1, 0]) + + msg = "operating as a set operation" + + expected = Index.symmetric_difference(idx1, ser) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = idx1 ^ ser + tm.assert_index_equal(result, expected) + + expected = Index.symmetric_difference(idx2, ser) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = idx2 ^ ser + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "op", + [ + pytest.param( + ops.rand_, + marks=pytest.mark.xfail( + reason="GH#22092 Index __and__ returns Index intersection", + raises=AssertionError, + ), + ), + pytest.param( + ops.ror_, + marks=pytest.mark.xfail( + reason="GH#22092 Index __or__ returns Index union", + raises=AssertionError, + ), + ), + ], + ) + def test_reversed_logical_op_with_index_returns_series(self, op): + # GH#22092, GH#19792 + ser = Series([True, True, False, False]) + idx1 = Index([True, False, True, False]) + idx2 = Index([1, 0, 1, 0]) + + msg = "operating as a set operation" + + expected = Series(op(idx1.values, ser.values)) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = op(ser, idx1) + tm.assert_series_equal(result, expected) + + expected = Series(op(idx2.values, ser.values)) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = op(ser, idx2) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "op, expected", + [ + (ops.rand_, Index([False, True])), + (ops.ror_, Index([False, True])), + (ops.rxor, Index([])), + ], + ) + def test_reverse_ops_with_index(self, op, expected): + # https://github.com/pandas-dev/pandas/pull/23628 + # multi-set Index ops are buggy, so let's avoid duplicates... + ser = Series([True, False]) + idx = Index([False, True]) + + msg = "operating as a set operation" + with tm.assert_produces_warning(FutureWarning, match=msg): + # behaving as set ops is deprecated, will become logical ops + result = op(ser, idx) + tm.assert_index_equal(result, expected) + + def test_logical_ops_label_based(self): + # GH#4947 + # logical ops should be label based + + a = Series([True, False, True], list("bca")) + b = Series([False, True, False], list("abc")) + + expected = Series([False, True, False], list("abc")) + result = a & b + tm.assert_series_equal(result, expected) + + expected = Series([True, True, False], list("abc")) + result = a | b + tm.assert_series_equal(result, expected) + + expected = Series([True, False, False], list("abc")) + result = a ^ b + tm.assert_series_equal(result, expected) + + # rhs is bigger + a = Series([True, False, True], list("bca")) + b = Series([False, True, False, True], list("abcd")) + + expected = Series([False, True, False, False], list("abcd")) + result = a & b + tm.assert_series_equal(result, expected) + + expected = Series([True, True, False, False], list("abcd")) + result = a | b + tm.assert_series_equal(result, expected) + + # filling + + # vs empty + empty = Series([], dtype=object) + + result = a & empty.copy() + expected = Series([False, False, False], list("bca")) + tm.assert_series_equal(result, expected) + + result = a | empty.copy() + expected = Series([True, False, True], list("bca")) + tm.assert_series_equal(result, expected) + + # vs non-matching + result = a & Series([1], ["z"]) + expected = Series([False, False, False, False], list("abcz")) + tm.assert_series_equal(result, expected) + + result = a | Series([1], ["z"]) + expected = Series([True, True, False, False], list("abcz")) + tm.assert_series_equal(result, expected) + + # identity + # we would like s[s|e] == s to hold for any e, whether empty or not + for e in [ + empty.copy(), + Series([1], ["z"]), + Series(np.nan, b.index), + Series(np.nan, a.index), + ]: + result = a[a | e] + tm.assert_series_equal(result, a[a]) + + for e in [Series(["z"])]: + result = a[a | e] + tm.assert_series_equal(result, a[a]) + + # vs scalars + index = list("bca") + t = Series([True, False, True]) + + for v in [True, 1, 2]: + result = Series([True, False, True], index=index) | v + expected = Series([True, True, True], index=index) + tm.assert_series_equal(result, expected) + + msg = "Cannot perform.+with a dtyped.+array and scalar of type" + for v in [np.nan, "foo"]: + with pytest.raises(TypeError, match=msg): + t | v + + for v in [False, 0]: + result = Series([True, False, True], index=index) | v + expected = Series([True, False, True], index=index) + tm.assert_series_equal(result, expected) + + for v in [True, 1]: + result = Series([True, False, True], index=index) & v + expected = Series([True, False, True], index=index) + tm.assert_series_equal(result, expected) + + for v in [False, 0]: + result = Series([True, False, True], index=index) & v + expected = Series([False, False, False], index=index) + tm.assert_series_equal(result, expected) + msg = "Cannot perform.+with a dtyped.+array and scalar of type" + for v in [np.nan]: + with pytest.raises(TypeError, match=msg): + t & v + + def test_logical_ops_df_compat(self): + # GH#1134 + s1 = Series([True, False, True], index=list("ABC"), name="x") + s2 = Series([True, True, False], index=list("ABD"), name="x") + + exp = Series([True, False, False, False], index=list("ABCD"), name="x") + tm.assert_series_equal(s1 & s2, exp) + tm.assert_series_equal(s2 & s1, exp) + + # True | np.nan => True + exp_or1 = Series([True, True, True, False], index=list("ABCD"), name="x") + tm.assert_series_equal(s1 | s2, exp_or1) + # np.nan | True => np.nan, filled with False + exp_or = Series([True, True, False, False], index=list("ABCD"), name="x") + tm.assert_series_equal(s2 | s1, exp_or) + + # DataFrame doesn't fill nan with False + tm.assert_frame_equal(s1.to_frame() & s2.to_frame(), exp.to_frame()) + tm.assert_frame_equal(s2.to_frame() & s1.to_frame(), exp.to_frame()) + + exp = DataFrame({"x": [True, True, np.nan, np.nan]}, index=list("ABCD")) + tm.assert_frame_equal(s1.to_frame() | s2.to_frame(), exp_or1.to_frame()) + tm.assert_frame_equal(s2.to_frame() | s1.to_frame(), exp_or.to_frame()) + + # different length + s3 = Series([True, False, True], index=list("ABC"), name="x") + s4 = Series([True, True, True, True], index=list("ABCD"), name="x") + + exp = Series([True, False, True, False], index=list("ABCD"), name="x") + tm.assert_series_equal(s3 & s4, exp) + tm.assert_series_equal(s4 & s3, exp) + + # np.nan | True => np.nan, filled with False + exp_or1 = Series([True, True, True, False], index=list("ABCD"), name="x") + tm.assert_series_equal(s3 | s4, exp_or1) + # True | np.nan => True + exp_or = Series([True, True, True, True], index=list("ABCD"), name="x") + tm.assert_series_equal(s4 | s3, exp_or) + + tm.assert_frame_equal(s3.to_frame() & s4.to_frame(), exp.to_frame()) + tm.assert_frame_equal(s4.to_frame() & s3.to_frame(), exp.to_frame()) + + tm.assert_frame_equal(s3.to_frame() | s4.to_frame(), exp_or1.to_frame()) + tm.assert_frame_equal(s4.to_frame() | s3.to_frame(), exp_or.to_frame()) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/test_missing.py b/.local/lib/python3.8/site-packages/pandas/tests/series/test_missing.py new file mode 100644 index 0000000..d956b2c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/test_missing.py @@ -0,0 +1,118 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas._libs import iNaT + +import pandas as pd +from pandas import ( + Categorical, + Index, + NaT, + Series, + isna, +) +import pandas._testing as tm + + +class TestSeriesMissingData: + def test_categorical_nan_handling(self): + + # NaNs are represented as -1 in labels + s = Series(Categorical(["a", "b", np.nan, "a"])) + tm.assert_index_equal(s.cat.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal( + s.values.codes, np.array([0, 1, -1, 0], dtype=np.int8) + ) + + def test_isna_for_inf(self): + s = Series(["a", np.inf, np.nan, pd.NA, 1.0]) + with pd.option_context("mode.use_inf_as_na", True): + r = s.isna() + dr = s.dropna() + e = Series([False, True, True, True, False]) + de = Series(["a", 1.0], index=[0, 4]) + tm.assert_series_equal(r, e) + tm.assert_series_equal(dr, de) + + @pytest.mark.parametrize( + "method, expected", + [ + ["isna", Series([False, True, True, False])], + ["dropna", Series(["a", 1.0], index=[0, 3])], + ], + ) + def test_isnull_for_inf_deprecated(self, method, expected): + # gh-17115 + s = Series(["a", np.inf, np.nan, 1.0]) + with pd.option_context("mode.use_inf_as_null", True): + result = getattr(s, method)() + tm.assert_series_equal(result, expected) + + def test_timedelta64_nan(self): + + td = Series([timedelta(days=i) for i in range(10)]) + + # nan ops on timedeltas + td1 = td.copy() + td1[0] = np.nan + assert isna(td1[0]) + assert td1[0].value == iNaT + td1[0] = td[0] + assert not isna(td1[0]) + + # GH#16674 iNaT is treated as an integer when given by the user + td1[1] = iNaT + assert not isna(td1[1]) + assert td1.dtype == np.object_ + assert td1[1] == iNaT + td1[1] = td[1] + assert not isna(td1[1]) + + td1[2] = NaT + assert isna(td1[2]) + assert td1[2].value == iNaT + td1[2] = td[2] + assert not isna(td1[2]) + + # boolean setting + # GH#2899 boolean setting + td3 = np.timedelta64(timedelta(days=3)) + td7 = np.timedelta64(timedelta(days=7)) + td[(td > td3) & (td < td7)] = np.nan + assert isna(td).sum() == 3 + + @pytest.mark.xfail( + reason="Chained inequality raises when trying to define 'selector'" + ) + def test_logical_range_select(self, datetime_series): + # NumPy limitation =( + # https://github.com/pandas-dev/pandas/commit/9030dc021f07c76809848925cb34828f6c8484f3 + np.random.seed(12345) + selector = -0.5 <= datetime_series <= 0.5 + expected = (datetime_series >= -0.5) & (datetime_series <= 0.5) + tm.assert_series_equal(selector, expected) + + def test_valid(self, datetime_series): + ts = datetime_series.copy() + ts.index = ts.index._with_freq(None) + ts[::2] = np.NaN + + result = ts.dropna() + assert len(result) == ts.count() + tm.assert_series_equal(result, ts[1::2]) + tm.assert_series_equal(result, ts[pd.notna(ts)]) + + +def test_hasnans_uncached_for_series(): + # GH#19700 + idx = Index([0, 1]) + assert idx.hasnans is False + assert "hasnans" in idx._cache + ser = idx.to_series() + assert ser.hasnans is False + assert not hasattr(ser, "_cache") + ser.iloc[-1] = np.nan + assert ser.hasnans is True + assert Series.hasnans.__doc__ == Index.hasnans.__doc__ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/test_npfuncs.py b/.local/lib/python3.8/site-packages/pandas/tests/series/test_npfuncs.py new file mode 100644 index 0000000..a0b672f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/test_npfuncs.py @@ -0,0 +1,21 @@ +""" +Tests for np.foo applied to Series, not necessarily ufuncs. +""" + +import numpy as np + +from pandas import Series + + +class TestPtp: + def test_ptp(self): + # GH#21614 + N = 1000 + arr = np.random.randn(N) + ser = Series(arr) + assert np.ptp(ser) == np.ptp(arr) + + +def test_numpy_unique(datetime_series): + # it works! + np.unique(datetime_series) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/test_reductions.py b/.local/lib/python3.8/site-packages/pandas/tests/series/test_reductions.py new file mode 100644 index 0000000..8fb51af --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/test_reductions.py @@ -0,0 +1,135 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + MultiIndex, + Series, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("as_period", [True, False]) +def test_mode_extension_dtype(as_period): + # GH#41927 preserve dt64tz dtype + ser = Series([pd.Timestamp(1979, 4, n) for n in range(1, 5)]) + + if as_period: + ser = ser.dt.to_period("D") + else: + ser = ser.dt.tz_localize("US/Central") + + res = ser.mode() + assert res.dtype == ser.dtype + tm.assert_series_equal(res, ser) + + +def test_reductions_td64_with_nat(): + # GH#8617 + ser = Series([0, pd.NaT], dtype="m8[ns]") + exp = ser[0] + assert ser.median() == exp + assert ser.min() == exp + assert ser.max() == exp + + +@pytest.mark.parametrize("skipna", [True, False]) +def test_td64_sum_empty(skipna): + # GH#37151 + ser = Series([], dtype="timedelta64[ns]") + + result = ser.sum(skipna=skipna) + assert isinstance(result, pd.Timedelta) + assert result == pd.Timedelta(0) + + +def test_td64_summation_overflow(): + # GH#9442 + ser = Series(pd.date_range("20130101", periods=100000, freq="H")) + ser[0] += pd.Timedelta("1s 1ms") + + # mean + result = (ser - ser.min()).mean() + expected = pd.Timedelta((pd.TimedeltaIndex(ser - ser.min()).asi8 / len(ser)).sum()) + + # the computation is converted to float so + # might be some loss of precision + assert np.allclose(result.value / 1000, expected.value / 1000) + + # sum + msg = "overflow in timedelta operation" + with pytest.raises(ValueError, match=msg): + (ser - ser.min()).sum() + + s1 = ser[0:10000] + with pytest.raises(ValueError, match=msg): + (s1 - s1.min()).sum() + s2 = ser[0:1000] + (s2 - s2.min()).sum() + + +def test_prod_numpy16_bug(): + ser = Series([1.0, 1.0, 1.0], index=range(3)) + result = ser.prod() + + assert not isinstance(result, Series) + + +def test_sum_with_level(): + obj = Series([10.0], index=MultiIndex.from_tuples([(2, 3)])) + + with tm.assert_produces_warning(FutureWarning): + result = obj.sum(level=0) + expected = Series([10.0], index=[2]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", [np.any, np.all]) +@pytest.mark.parametrize("kwargs", [{"keepdims": True}, {"out": object()}]) +def test_validate_any_all_out_keepdims_raises(kwargs, func): + ser = Series([1, 2]) + param = list(kwargs)[0] + name = func.__name__ + + msg = ( + f"the '{param}' parameter is not " + "supported in the pandas " + fr"implementation of {name}\(\)" + ) + with pytest.raises(ValueError, match=msg): + func(ser, **kwargs) + + +def test_validate_sum_initial(): + ser = Series([1, 2]) + msg = ( + r"the 'initial' parameter is not " + r"supported in the pandas " + r"implementation of sum\(\)" + ) + with pytest.raises(ValueError, match=msg): + np.sum(ser, initial=10) + + +def test_validate_median_initial(): + ser = Series([1, 2]) + msg = ( + r"the 'overwrite_input' parameter is not " + r"supported in the pandas " + r"implementation of median\(\)" + ) + with pytest.raises(ValueError, match=msg): + # It seems like np.median doesn't dispatch, so we use the + # method instead of the ufunc. + ser.median(overwrite_input=True) + + +def test_validate_stat_keepdims(): + ser = Series([1, 2]) + msg = ( + r"the 'keepdims' parameter is not " + r"supported in the pandas " + r"implementation of sum\(\)" + ) + with pytest.raises(ValueError, match=msg): + np.sum(ser, keepdims=True) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/test_repr.py b/.local/lib/python3.8/site-packages/pandas/tests/series/test_repr.py new file mode 100644 index 0000000..a12bc1d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/test_repr.py @@ -0,0 +1,536 @@ +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Index, + Series, + date_range, + option_context, + period_range, + timedelta_range, +) +import pandas._testing as tm + + +class TestSeriesRepr: + def test_multilevel_name_print(self, lexsorted_two_level_string_multiindex): + index = lexsorted_two_level_string_multiindex + ser = Series(range(len(index)), index=index, name="sth") + expected = [ + "first second", + "foo one 0", + " two 1", + " three 2", + "bar one 3", + " two 4", + "baz two 5", + " three 6", + "qux one 7", + " two 8", + " three 9", + "Name: sth, dtype: int64", + ] + expected = "\n".join(expected) + assert repr(ser) == expected + + def test_small_name_printing(self): + # Test small Series. + s = Series([0, 1, 2]) + + s.name = "test" + assert "Name: test" in repr(s) + + s.name = None + assert "Name:" not in repr(s) + + def test_big_name_printing(self): + # Test big Series (diff code path). + s = Series(range(1000)) + + s.name = "test" + assert "Name: test" in repr(s) + + s.name = None + assert "Name:" not in repr(s) + + def test_empty_name_printing(self): + s = Series(index=date_range("20010101", "20020101"), name="test", dtype=object) + assert "Name: test" in repr(s) + + @pytest.mark.parametrize("args", [(), (0, -1)]) + def test_float_range(self, args): + str(Series(np.random.randn(1000), index=np.arange(1000, *args))) + + def test_empty_object(self): + # empty + str(Series(dtype=object)) + + def test_string(self, string_series): + str(string_series) + str(string_series.astype(int)) + + # with NaNs + string_series[5:7] = np.NaN + str(string_series) + + def test_object(self, object_series): + str(object_series) + + def test_datetime(self, datetime_series): + str(datetime_series) + # with Nones + ots = datetime_series.astype("O") + ots[::2] = None + repr(ots) + + @pytest.mark.parametrize( + "name", + [ + "", + 1, + 1.2, + "foo", + "\u03B1\u03B2\u03B3", + "loooooooooooooooooooooooooooooooooooooooooooooooooooong", + ("foo", "bar", "baz"), + (1, 2), + ("foo", 1, 2.3), + ("\u03B1", "\u03B2", "\u03B3"), + ("\u03B1", "bar"), + ], + ) + def test_various_names(self, name, string_series): + # various names + string_series.name = name + repr(string_series) + + def test_tuple_name(self): + biggie = Series( + np.random.randn(1000), index=np.arange(1000), name=("foo", "bar", "baz") + ) + repr(biggie) + + @pytest.mark.parametrize("arg", [100, 1001]) + def test_tidy_repr_name_0(self, arg): + # tidy repr + ser = Series(np.random.randn(arg), name=0) + rep_str = repr(ser) + assert "Name: 0" in rep_str + + def test_newline(self): + ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"]) + assert "\t" not in repr(ser) + assert "\r" not in repr(ser) + assert "a\n" not in repr(ser) + + @pytest.mark.parametrize( + "name, expected", + [ + ["foo", "Series([], Name: foo, dtype: int64)"], + [None, "Series([], dtype: int64)"], + ], + ) + def test_empty_int64(self, name, expected): + # with empty series (#4651) + s = Series([], dtype=np.int64, name=name) + assert repr(s) == expected + + def test_tidy_repr(self): + a = Series(["\u05d0"] * 1000) + a.name = "title1" + repr(a) # should not raise exception + + def test_repr_bool_fails(self, capsys): + s = Series([DataFrame(np.random.randn(2, 2)) for i in range(5)]) + + # It works (with no Cython exception barf)! + repr(s) + + captured = capsys.readouterr() + assert captured.err == "" + + def test_repr_name_iterable_indexable(self): + s = Series([1, 2, 3], name=np.int64(3)) + + # it works! + repr(s) + + s.name = ("\u05d0",) * 2 + repr(s) + + def test_repr_should_return_str(self): + # https://docs.python.org/3/reference/datamodel.html#object.__repr__ + # ...The return value must be a string object. + + # (str on py2.x, str (unicode) on py3) + + data = [8, 5, 3, 5] + index1 = ["\u03c3", "\u03c4", "\u03c5", "\u03c6"] + df = Series(data, index=index1) + assert type(df.__repr__() == str) # both py2 / 3 + + def test_repr_max_rows(self): + # GH 6863 + with option_context("display.max_rows", None): + str(Series(range(1001))) # should not raise exception + + def test_unicode_string_with_unicode(self): + df = Series(["\u05d0"], name="\u05d1") + str(df) + + def test_str_to_bytes_raises(self): + # GH 26447 + df = Series(["abc"], name="abc") + msg = "^'str' object cannot be interpreted as an integer$" + with pytest.raises(TypeError, match=msg): + bytes(df) + + def test_timeseries_repr_object_dtype(self): + index = Index( + [datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], dtype=object + ) + ts = Series(np.random.randn(len(index)), index) + repr(ts) + + ts = tm.makeTimeSeries(1000) + assert repr(ts).splitlines()[-1].startswith("Freq:") + + ts2 = ts.iloc[np.random.randint(0, len(ts) - 1, 400)] + repr(ts2).splitlines()[-1] + + @pytest.mark.filterwarnings("ignore::FutureWarning") + def test_latex_repr(self): + result = r"""\begin{tabular}{ll} +\toprule +{} & 0 \\ +\midrule +0 & $\alpha$ \\ +1 & b \\ +2 & c \\ +\bottomrule +\end{tabular} +""" + with option_context("display.latex.escape", False, "display.latex.repr", True): + s = Series([r"$\alpha$", "b", "c"]) + assert result == s._repr_latex_() + + assert s._repr_latex_() is None + + def test_index_repr_in_frame_with_nan(self): + # see gh-25061 + i = Index([1, np.nan]) + s = Series([1, 2], index=i) + exp = """1.0 1\nNaN 2\ndtype: int64""" + + assert repr(s) == exp + + def test_format_pre_1900_dates(self): + rng = date_range("1/1/1850", "1/1/1950", freq="A-DEC") + rng.format() + ts = Series(1, index=rng) + repr(ts) + + def test_series_repr_nat(self): + series = Series([0, 1000, 2000, pd.NaT.value], dtype="M8[ns]") + + result = repr(series) + expected = ( + "0 1970-01-01 00:00:00.000000\n" + "1 1970-01-01 00:00:00.000001\n" + "2 1970-01-01 00:00:00.000002\n" + "3 NaT\n" + "dtype: datetime64[ns]" + ) + assert result == expected + + def test_float_repr(self): + # GH#35603 + # check float format when cast to object + ser = Series([1.0]).astype(object) + expected = "0 1.0\ndtype: object" + assert repr(ser) == expected + + def test_different_null_objects(self): + # GH#45263 + ser = Series([1, 2, 3, 4], [True, None, np.nan, pd.NaT]) + result = repr(ser) + expected = "True 1\nNone 2\nNaN 3\nNaT 4\ndtype: int64" + assert result == expected + + +class TestCategoricalRepr: + def test_categorical_repr_unicode(self): + # see gh-21002 + + class County: + name = "San Sebastián" + state = "PR" + + def __repr__(self) -> str: + return self.name + ", " + self.state + + cat = Categorical([County() for _ in range(61)]) + idx = Index(cat) + ser = idx.to_series() + + repr(ser) + str(ser) + + def test_categorical_repr(self): + a = Series(Categorical([1, 2, 3, 4])) + exp = ( + "0 1\n1 2\n2 3\n3 4\n" + + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]" + ) + + assert exp == a.__str__() + + a = Series(Categorical(["a", "b"] * 25)) + exp = ( + "0 a\n1 b\n" + + " ..\n" + + "48 a\n49 b\n" + + "Length: 50, dtype: category\nCategories (2, object): ['a', 'b']" + ) + with option_context("display.max_rows", 5): + assert exp == repr(a) + + levs = list("abcdefghijklmnopqrstuvwxyz") + a = Series(Categorical(["a", "b"], categories=levs, ordered=True)) + exp = ( + "0 a\n1 b\n" + "dtype: category\n" + "Categories (26, object): ['a' < 'b' < 'c' < 'd' ... 'w' < 'x' < 'y' < 'z']" + ) + assert exp == a.__str__() + + def test_categorical_series_repr(self): + s = Series(Categorical([1, 2, 3])) + exp = """0 1 +1 2 +2 3 +dtype: category +Categories (3, int64): [1, 2, 3]""" + + assert repr(s) == exp + + s = Series(Categorical(np.arange(10))) + exp = """0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +dtype: category +Categories (10, int64): [0, 1, 2, 3, ..., 6, 7, 8, 9]""" + + assert repr(s) == exp + + def test_categorical_series_repr_ordered(self): + s = Series(Categorical([1, 2, 3], ordered=True)) + exp = """0 1 +1 2 +2 3 +dtype: category +Categories (3, int64): [1 < 2 < 3]""" + + assert repr(s) == exp + + s = Series(Categorical(np.arange(10), ordered=True)) + exp = """0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +dtype: category +Categories (10, int64): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]""" + + assert repr(s) == exp + + def test_categorical_series_repr_datetime(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + s = Series(Categorical(idx)) + exp = """0 2011-01-01 09:00:00 +1 2011-01-01 10:00:00 +2 2011-01-01 11:00:00 +3 2011-01-01 12:00:00 +4 2011-01-01 13:00:00 +dtype: category +Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, + 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa:E501 + + assert repr(s) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + s = Series(Categorical(idx)) + exp = """0 2011-01-01 09:00:00-05:00 +1 2011-01-01 10:00:00-05:00 +2 2011-01-01 11:00:00-05:00 +3 2011-01-01 12:00:00-05:00 +4 2011-01-01 13:00:00-05:00 +dtype: category +Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, + 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, + 2011-01-01 13:00:00-05:00]""" # noqa:E501 + + assert repr(s) == exp + + def test_categorical_series_repr_datetime_ordered(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + s = Series(Categorical(idx, ordered=True)) + exp = """0 2011-01-01 09:00:00 +1 2011-01-01 10:00:00 +2 2011-01-01 11:00:00 +3 2011-01-01 12:00:00 +4 2011-01-01 13:00:00 +dtype: category +Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa:E501 + + assert repr(s) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + s = Series(Categorical(idx, ordered=True)) + exp = """0 2011-01-01 09:00:00-05:00 +1 2011-01-01 10:00:00-05:00 +2 2011-01-01 11:00:00-05:00 +3 2011-01-01 12:00:00-05:00 +4 2011-01-01 13:00:00-05:00 +dtype: category +Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < + 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < + 2011-01-01 13:00:00-05:00]""" # noqa:E501 + + assert repr(s) == exp + + def test_categorical_series_repr_period(self): + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + s = Series(Categorical(idx)) + exp = """0 2011-01-01 09:00 +1 2011-01-01 10:00 +2 2011-01-01 11:00 +3 2011-01-01 12:00 +4 2011-01-01 13:00 +dtype: category +Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, + 2011-01-01 13:00]""" # noqa:E501 + + assert repr(s) == exp + + idx = period_range("2011-01", freq="M", periods=5) + s = Series(Categorical(idx)) + exp = """0 2011-01 +1 2011-02 +2 2011-03 +3 2011-04 +4 2011-05 +dtype: category +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" + + assert repr(s) == exp + + def test_categorical_series_repr_period_ordered(self): + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + s = Series(Categorical(idx, ordered=True)) + exp = """0 2011-01-01 09:00 +1 2011-01-01 10:00 +2 2011-01-01 11:00 +3 2011-01-01 12:00 +4 2011-01-01 13:00 +dtype: category +Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < + 2011-01-01 13:00]""" # noqa:E501 + + assert repr(s) == exp + + idx = period_range("2011-01", freq="M", periods=5) + s = Series(Categorical(idx, ordered=True)) + exp = """0 2011-01 +1 2011-02 +2 2011-03 +3 2011-04 +4 2011-05 +dtype: category +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" + + assert repr(s) == exp + + def test_categorical_series_repr_timedelta(self): + idx = timedelta_range("1 days", periods=5) + s = Series(Categorical(idx)) + exp = """0 1 days +1 2 days +2 3 days +3 4 days +4 5 days +dtype: category +Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" + + assert repr(s) == exp + + idx = timedelta_range("1 hours", periods=10) + s = Series(Categorical(idx)) + exp = """0 0 days 01:00:00 +1 1 days 01:00:00 +2 2 days 01:00:00 +3 3 days 01:00:00 +4 4 days 01:00:00 +5 5 days 01:00:00 +6 6 days 01:00:00 +7 7 days 01:00:00 +8 8 days 01:00:00 +9 9 days 01:00:00 +dtype: category +Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, + 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, + 8 days 01:00:00, 9 days 01:00:00]""" # noqa:E501 + + assert repr(s) == exp + + def test_categorical_series_repr_timedelta_ordered(self): + idx = timedelta_range("1 days", periods=5) + s = Series(Categorical(idx, ordered=True)) + exp = """0 1 days +1 2 days +2 3 days +3 4 days +4 5 days +dtype: category +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" + + assert repr(s) == exp + + idx = timedelta_range("1 hours", periods=10) + s = Series(Categorical(idx, ordered=True)) + exp = """0 0 days 01:00:00 +1 1 days 01:00:00 +2 2 days 01:00:00 +3 3 days 01:00:00 +4 4 days 01:00:00 +5 5 days 01:00:00 +6 6 days 01:00:00 +7 7 days 01:00:00 +8 8 days 01:00:00 +9 9 days 01:00:00 +dtype: category +Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < + 3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 < + 8 days 01:00:00 < 9 days 01:00:00]""" # noqa:E501 + + assert repr(s) == exp diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/test_subclass.py b/.local/lib/python3.8/site-packages/pandas/tests/series/test_subclass.py new file mode 100644 index 0000000..fd6f4e0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/test_subclass.py @@ -0,0 +1,61 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestSeriesSubclassing: + @pytest.mark.parametrize( + "idx_method, indexer, exp_data, exp_idx", + [ + ["loc", ["a", "b"], [1, 2], "ab"], + ["iloc", [2, 3], [3, 4], "cd"], + ], + ) + def test_indexing_sliced(self, idx_method, indexer, exp_data, exp_idx): + s = tm.SubclassedSeries([1, 2, 3, 4], index=list("abcd")) + res = getattr(s, idx_method)[indexer] + exp = tm.SubclassedSeries(exp_data, index=list(exp_idx)) + tm.assert_series_equal(res, exp) + + def test_to_frame(self): + s = tm.SubclassedSeries([1, 2, 3, 4], index=list("abcd"), name="xxx") + res = s.to_frame() + exp = tm.SubclassedDataFrame({"xxx": [1, 2, 3, 4]}, index=list("abcd")) + tm.assert_frame_equal(res, exp) + + def test_subclass_unstack(self): + # GH 15564 + s = tm.SubclassedSeries([1, 2, 3, 4], index=[list("aabb"), list("xyxy")]) + + res = s.unstack() + exp = tm.SubclassedDataFrame({"x": [1, 3], "y": [2, 4]}, index=["a", "b"]) + + tm.assert_frame_equal(res, exp) + + def test_subclass_empty_repr(self): + with tm.assert_produces_warning(FutureWarning): + sub_series = tm.SubclassedSeries() + assert "SubclassedSeries" in repr(sub_series) + + def test_asof(self): + N = 3 + rng = pd.date_range("1/1/1990", periods=N, freq="53s") + s = tm.SubclassedSeries({"A": [np.nan, np.nan, np.nan]}, index=rng) + + result = s.asof(rng[-2:]) + assert isinstance(result, tm.SubclassedSeries) + + def test_explode(self): + s = tm.SubclassedSeries([[1, 2, 3], "foo", [], [3, 4]]) + result = s.explode() + assert isinstance(result, tm.SubclassedSeries) + + def test_equals(self): + # https://github.com/pandas-dev/pandas/pull/34402 + # allow subclass in both directions + s1 = pd.Series([1, 2, 3]) + s2 = tm.SubclassedSeries([1, 2, 3]) + assert s1.equals(s2) + assert s2.equals(s1) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/test_ufunc.py b/.local/lib/python3.8/site-packages/pandas/tests/series/test_ufunc.py new file mode 100644 index 0000000..ed07a31 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/test_ufunc.py @@ -0,0 +1,445 @@ +from collections import deque +import string + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_dtype_equal + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import SparseArray + +UNARY_UFUNCS = [np.positive, np.floor, np.exp] +BINARY_UFUNCS = [np.add, np.logaddexp] # dunder op +SPARSE = [True, False] +SPARSE_IDS = ["sparse", "dense"] +SHUFFLE = [True, False] + + +@pytest.fixture +def arrays_for_binary_ufunc(): + """ + A pair of random, length-100 integer-dtype arrays, that are mostly 0. + """ + a1 = np.random.randint(0, 10, 100, dtype="int64") + a2 = np.random.randint(0, 10, 100, dtype="int64") + a1[::3] = 0 + a2[::4] = 0 + return a1, a2 + + +@pytest.mark.parametrize("ufunc", UNARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +def test_unary_ufunc(ufunc, sparse): + # Test that ufunc(pd.Series) == pd.Series(ufunc) + arr = np.random.randint(0, 10, 10, dtype="int64") + arr[::2] = 0 + if sparse: + arr = SparseArray(arr, dtype=pd.SparseDtype("int64", 0)) + + index = list(string.ascii_letters[:10]) + name = "name" + series = pd.Series(arr, index=index, name=name) + + result = ufunc(series) + expected = pd.Series(ufunc(arr), index=index, name=name) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"]) +def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc): + # Test that ufunc(pd.Series(a), array) == pd.Series(ufunc(a, b)) + a1, a2 = arrays_for_binary_ufunc + if sparse: + a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) + a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) + + name = "name" # op(pd.Series, array) preserves the name. + series = pd.Series(a1, name=name) + other = a2 + + array_args = (a1, a2) + series_args = (series, other) # ufunc(series, array) + + if flip: + array_args = reversed(array_args) + series_args = reversed(series_args) # ufunc(array, series) + + expected = pd.Series(ufunc(*array_args), name=name) + result = ufunc(*series_args) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"]) +def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc): + # Test that + # * func(pd.Series(a), pd.Series(b)) == pd.Series(ufunc(a, b)) + # * ufunc(Index, pd.Series) dispatches to pd.Series (returns a pd.Series) + a1, a2 = arrays_for_binary_ufunc + if sparse: + a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) + a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) + + name = "name" # op(pd.Series, array) preserves the name. + series = pd.Series(a1, name=name) + + warn = None if not sparse else FutureWarning + with tm.assert_produces_warning(warn): + other = pd.Index(a2, name=name).astype("int64") + + array_args = (a1, a2) + series_args = (series, other) # ufunc(series, array) + + if flip: + array_args = reversed(array_args) + series_args = reversed(series_args) # ufunc(array, series) + + expected = pd.Series(ufunc(*array_args), name=name) + result = ufunc(*series_args) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("shuffle", [True, False], ids=["unaligned", "aligned"]) +@pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"]) +def test_binary_ufunc_with_series( + flip, shuffle, sparse, ufunc, arrays_for_binary_ufunc +): + # Test that + # * func(pd.Series(a), pd.Series(b)) == pd.Series(ufunc(a, b)) + # with alignment between the indices + a1, a2 = arrays_for_binary_ufunc + if sparse: + a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) + a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) + + name = "name" # op(pd.Series, array) preserves the name. + series = pd.Series(a1, name=name) + other = pd.Series(a2, name=name) + + idx = np.random.permutation(len(a1)) + + if shuffle: + other = other.take(idx) + if flip: + index = other.align(series)[0].index + else: + index = series.align(other)[0].index + else: + index = series.index + + array_args = (a1, a2) + series_args = (series, other) # ufunc(series, array) + + if flip: + array_args = tuple(reversed(array_args)) + series_args = tuple(reversed(series_args)) # ufunc(array, series) + + expected = pd.Series(ufunc(*array_args), index=index, name=name) + result = ufunc(*series_args) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("flip", [True, False]) +def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): + # Test that + # * ufunc(pd.Series, scalar) == pd.Series(ufunc(array, scalar)) + # * ufunc(pd.Series, scalar) == ufunc(scalar, pd.Series) + arr, _ = arrays_for_binary_ufunc + if sparse: + arr = SparseArray(arr) + other = 2 + series = pd.Series(arr, name="name") + + series_args = (series, other) + array_args = (arr, other) + + if flip: + series_args = tuple(reversed(series_args)) + array_args = tuple(reversed(array_args)) + + expected = pd.Series(ufunc(*array_args), name="name") + result = ufunc(*series_args) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.divmod]) # TODO: np.modf, np.frexp +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("shuffle", SHUFFLE) +@pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning") +def test_multiple_output_binary_ufuncs( + ufunc, sparse, shuffle, arrays_for_binary_ufunc, request +): + # Test that + # the same conditions from binary_ufunc_scalar apply to + # ufuncs with multiple outputs. + + a1, a2 = arrays_for_binary_ufunc + # work around https://github.com/pandas-dev/pandas/issues/26987 + a1[a1 == 0] = 1 + a2[a2 == 0] = 1 + + if sparse: + a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) + a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) + + s1 = pd.Series(a1) + s2 = pd.Series(a2) + + if shuffle: + # ensure we align before applying the ufunc + s2 = s2.sample(frac=1) + + expected = ufunc(a1, a2) + assert isinstance(expected, tuple) + + result = ufunc(s1, s2) + assert isinstance(result, tuple) + tm.assert_series_equal(result[0], pd.Series(expected[0])) + tm.assert_series_equal(result[1], pd.Series(expected[1])) + + +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +def test_multiple_output_ufunc(sparse, arrays_for_binary_ufunc): + # Test that the same conditions from unary input apply to multi-output + # ufuncs + arr, _ = arrays_for_binary_ufunc + + if sparse: + arr = SparseArray(arr) + + series = pd.Series(arr, name="name") + result = np.modf(series) + expected = np.modf(arr) + + assert isinstance(result, tuple) + assert isinstance(expected, tuple) + + tm.assert_series_equal(result[0], pd.Series(expected[0], name="name")) + tm.assert_series_equal(result[1], pd.Series(expected[1], name="name")) + + +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +def test_binary_ufunc_drops_series_name(ufunc, sparse, arrays_for_binary_ufunc): + # Drop the names when they differ. + a1, a2 = arrays_for_binary_ufunc + s1 = pd.Series(a1, name="a") + s2 = pd.Series(a2, name="b") + + result = ufunc(s1, s2) + assert result.name is None + + +def test_object_series_ok(): + class Dummy: + def __init__(self, value): + self.value = value + + def __add__(self, other): + return self.value + other.value + + arr = np.array([Dummy(0), Dummy(1)]) + ser = pd.Series(arr) + tm.assert_series_equal(np.add(ser, ser), pd.Series(np.add(ser, arr))) + tm.assert_series_equal(np.add(ser, Dummy(1)), pd.Series(np.add(ser, Dummy(1)))) + + +@pytest.fixture( + params=[ + pd.array([1, 3, 2], dtype=np.int64), + pd.array([1, 3, 2], dtype="Int64"), + pd.array([1, 3, 2], dtype="Float32"), + pd.array([1, 10, 2], dtype="Sparse[int]"), + pd.to_datetime(["2000", "2010", "2001"]), + pd.to_datetime(["2000", "2010", "2001"]).tz_localize("CET"), + pd.to_datetime(["2000", "2010", "2001"]).to_period(freq="D"), + pd.to_timedelta(["1 Day", "3 Days", "2 Days"]), + pd.IntervalIndex([pd.Interval(0, 1), pd.Interval(2, 3), pd.Interval(1, 2)]), + ], + ids=lambda x: str(x.dtype), +) +def values_for_np_reduce(request): + # min/max tests assume that these are monotonic increasing + return request.param + + +class TestNumpyReductions: + # TODO: cases with NAs, axis kwarg for DataFrame + + def test_multiply(self, values_for_np_reduce, box_with_array, request): + box = box_with_array + values = values_for_np_reduce + + warn = None + if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index: + warn = FutureWarning + msg = "passing a SparseArray to pd.Index" + with tm.assert_produces_warning(warn, match=msg): + obj = box(values) + + if isinstance(values, pd.core.arrays.SparseArray) and box is not pd.Index: + mark = pytest.mark.xfail(reason="SparseArray has no 'mul'") + request.node.add_marker(mark) + + if values.dtype.kind in "iuf": + result = np.multiply.reduce(obj) + if box is pd.DataFrame: + expected = obj.prod(numeric_only=False) + tm.assert_series_equal(result, expected) + elif box is pd.Index: + # Int64Index, Index has no 'prod' + expected = obj._values.prod() + assert result == expected + else: + + expected = obj.prod() + assert result == expected + else: + msg = "|".join( + [ + "does not support reduction", + "unsupported operand type", + "ufunc 'multiply' cannot use operands", + ] + ) + with pytest.raises(TypeError, match=msg): + np.multiply.reduce(obj) + + def test_add(self, values_for_np_reduce, box_with_array): + box = box_with_array + values = values_for_np_reduce + + warn = None + if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index: + warn = FutureWarning + msg = "passing a SparseArray to pd.Index" + with tm.assert_produces_warning(warn, match=msg): + obj = box(values) + + if values.dtype.kind in "miuf": + result = np.add.reduce(obj) + if box is pd.DataFrame: + expected = obj.sum(numeric_only=False) + tm.assert_series_equal(result, expected) + elif box is pd.Index: + # Int64Index, Index has no 'sum' + expected = obj._values.sum() + assert result == expected + else: + expected = obj.sum() + assert result == expected + else: + msg = "|".join( + [ + "does not support reduction", + "unsupported operand type", + "ufunc 'add' cannot use operands", + ] + ) + with pytest.raises(TypeError, match=msg): + np.add.reduce(obj) + + def test_max(self, values_for_np_reduce, box_with_array): + box = box_with_array + values = values_for_np_reduce + + same_type = True + if box is pd.Index and values.dtype.kind in ["i", "f"]: + # ATM Index casts to object, so we get python ints/floats + same_type = False + + warn = None + if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index: + warn = FutureWarning + msg = "passing a SparseArray to pd.Index" + with tm.assert_produces_warning(warn, match=msg): + obj = box(values) + + result = np.maximum.reduce(obj) + if box is pd.DataFrame: + # TODO: cases with axis kwarg + expected = obj.max(numeric_only=False) + tm.assert_series_equal(result, expected) + else: + expected = values[1] + assert result == expected + if same_type: + # check we have e.g. Timestamp instead of dt64 + assert type(result) == type(expected) + + def test_min(self, values_for_np_reduce, box_with_array): + box = box_with_array + values = values_for_np_reduce + + same_type = True + if box is pd.Index and values.dtype.kind in ["i", "f"]: + # ATM Index casts to object, so we get python ints/floats + same_type = False + + warn = None + if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index: + warn = FutureWarning + msg = "passing a SparseArray to pd.Index" + with tm.assert_produces_warning(warn, match=msg): + obj = box(values) + + result = np.minimum.reduce(obj) + if box is pd.DataFrame: + expected = obj.min(numeric_only=False) + tm.assert_series_equal(result, expected) + else: + expected = values[0] + assert result == expected + if same_type: + # check we have e.g. Timestamp instead of dt64 + assert type(result) == type(expected) + + +@pytest.mark.parametrize("type_", [list, deque, tuple]) +def test_binary_ufunc_other_types(type_): + a = pd.Series([1, 2, 3], name="name") + b = type_([3, 4, 5]) + + result = np.add(a, b) + expected = pd.Series(np.add(a.to_numpy(), b), name="name") + tm.assert_series_equal(result, expected) + + +def test_object_dtype_ok(): + class Thing: + def __init__(self, value): + self.value = value + + def __add__(self, other): + other = getattr(other, "value", other) + return type(self)(self.value + other) + + def __eq__(self, other) -> bool: + return type(other) is Thing and self.value == other.value + + def __repr__(self) -> str: + return f"Thing({self.value})" + + s = pd.Series([Thing(1), Thing(2)]) + result = np.add(s, Thing(1)) + expected = pd.Series([Thing(2), Thing(3)]) + tm.assert_series_equal(result, expected) + + +def test_outer(): + # https://github.com/pandas-dev/pandas/issues/27186 + s = pd.Series([1, 2, 3]) + o = np.array([1, 2, 3]) + + with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN): + np.subtract.outer(s, o) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/test_unary.py b/.local/lib/python3.8/site-packages/pandas/tests/series/test_unary.py new file mode 100644 index 0000000..ad0e344 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/test_unary.py @@ -0,0 +1,52 @@ +import pytest + +from pandas import Series +import pandas._testing as tm + + +class TestSeriesUnaryOps: + # __neg__, __pos__, __invert__ + + def test_neg(self): + ser = tm.makeStringSeries() + ser.name = "series" + tm.assert_series_equal(-ser, -1 * ser) + + def test_invert(self): + ser = tm.makeStringSeries() + ser.name = "series" + tm.assert_series_equal(-(ser < 0), ~(ser < 0)) + + @pytest.mark.parametrize( + "source, neg_target, abs_target", + [ + ([1, 2, 3], [-1, -2, -3], [1, 2, 3]), + ([1, 2, None], [-1, -2, None], [1, 2, None]), + ], + ) + def test_all_numeric_unary_operators( + self, any_numeric_ea_dtype, source, neg_target, abs_target + ): + # GH38794 + dtype = any_numeric_ea_dtype + ser = Series(source, dtype=dtype) + neg_result, pos_result, abs_result = -ser, +ser, abs(ser) + if dtype.startswith("U"): + neg_target = -Series(source, dtype=dtype) + else: + neg_target = Series(neg_target, dtype=dtype) + + abs_target = Series(abs_target, dtype=dtype) + + tm.assert_series_equal(neg_result, neg_target) + tm.assert_series_equal(pos_result, ser) + tm.assert_series_equal(abs_result, abs_target) + + @pytest.mark.parametrize("op", ["__neg__", "__abs__"]) + def test_unary_float_op_mask(self, float_ea_dtype, op): + dtype = float_ea_dtype + ser = Series([1.1, 2.2, 3.3], dtype=dtype) + result = getattr(ser, op)() + target = result.copy(deep=True) + ser[0] = None + tm.assert_series_equal(result, target) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/series/test_validate.py b/.local/lib/python3.8/site-packages/pandas/tests/series/test_validate.py new file mode 100644 index 0000000..3c867f7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/series/test_validate.py @@ -0,0 +1,26 @@ +import pytest + + +@pytest.mark.parametrize( + "func", + [ + "reset_index", + "_set_name", + "sort_values", + "sort_index", + "rename", + "dropna", + "drop_duplicates", + ], +) +@pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) +def test_validate_bool_args(string_series, func, inplace): + """Tests for error handling related to data types of method arguments.""" + msg = 'For argument "inplace" expected type bool' + kwargs = {"inplace": inplace} + + if func == "_set_name": + kwargs["name"] = "hello" + + with pytest.raises(ValueError, match=msg): + getattr(string_series, func)(**kwargs) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/strings/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..dc19671 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..b1bc551 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_api.cpython-38.pyc new file mode 100644 index 0000000..9bae44f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_case_justify.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_case_justify.cpython-38.pyc new file mode 100644 index 0000000..993eff3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_case_justify.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_cat.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_cat.cpython-38.pyc new file mode 100644 index 0000000..f02b4d6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_cat.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_extract.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_extract.cpython-38.pyc new file mode 100644 index 0000000..6e4e01a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_extract.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_find_replace.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_find_replace.cpython-38.pyc new file mode 100644 index 0000000..5aa0549 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_find_replace.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_get_dummies.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_get_dummies.cpython-38.pyc new file mode 100644 index 0000000..352e198 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_get_dummies.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_split_partition.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_split_partition.cpython-38.pyc new file mode 100644 index 0000000..f5d0551 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_split_partition.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_string_array.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_string_array.cpython-38.pyc new file mode 100644 index 0000000..a3fc815 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_string_array.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_strings.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_strings.cpython-38.pyc new file mode 100644 index 0000000..3d93845 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/strings/__pycache__/test_strings.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/strings/conftest.py new file mode 100644 index 0000000..15cc5af --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/strings/conftest.py @@ -0,0 +1,177 @@ +import numpy as np +import pytest + +from pandas import Series +from pandas.core import strings as strings + +_any_string_method = [ + ("cat", (), {"sep": ","}), + ("cat", (Series(list("zyx")),), {"sep": ",", "join": "left"}), + ("center", (10,), {}), + ("contains", ("a",), {}), + ("count", ("a",), {}), + ("decode", ("UTF-8",), {}), + ("encode", ("UTF-8",), {}), + ("endswith", ("a",), {}), + ("endswith", ("a",), {"na": True}), + ("endswith", ("a",), {"na": False}), + ("extract", ("([a-z]*)",), {"expand": False}), + ("extract", ("([a-z]*)",), {"expand": True}), + ("extractall", ("([a-z]*)",), {}), + ("find", ("a",), {}), + ("findall", ("a",), {}), + ("get", (0,), {}), + # because "index" (and "rindex") fail intentionally + # if the string is not found, search only for empty string + ("index", ("",), {}), + ("join", (",",), {}), + ("ljust", (10,), {}), + ("match", ("a",), {}), + ("fullmatch", ("a",), {}), + ("normalize", ("NFC",), {}), + ("pad", (10,), {}), + ("partition", (" ",), {"expand": False}), + ("partition", (" ",), {"expand": True}), + ("repeat", (3,), {}), + ("replace", ("a", "z"), {}), + ("rfind", ("a",), {}), + ("rindex", ("",), {}), + ("rjust", (10,), {}), + ("rpartition", (" ",), {"expand": False}), + ("rpartition", (" ",), {"expand": True}), + ("slice", (0, 1), {}), + ("slice_replace", (0, 1, "z"), {}), + ("split", (" ",), {"expand": False}), + ("split", (" ",), {"expand": True}), + ("startswith", ("a",), {}), + ("startswith", ("a",), {"na": True}), + ("startswith", ("a",), {"na": False}), + ("removeprefix", ("a",), {}), + ("removesuffix", ("a",), {}), + # translating unicode points of "a" to "d" + ("translate", ({97: 100},), {}), + ("wrap", (2,), {}), + ("zfill", (10,), {}), +] + list( + zip( + [ + # methods without positional arguments: zip with empty tuple and empty dict + "capitalize", + "cat", + "get_dummies", + "isalnum", + "isalpha", + "isdecimal", + "isdigit", + "islower", + "isnumeric", + "isspace", + "istitle", + "isupper", + "len", + "lower", + "lstrip", + "partition", + "rpartition", + "rsplit", + "rstrip", + "slice", + "slice_replace", + "split", + "strip", + "swapcase", + "title", + "upper", + "casefold", + ], + [()] * 100, + [{}] * 100, + ) +) +ids, _, _ = zip(*_any_string_method) # use method name as fixture-id +missing_methods = { + f for f in dir(strings.StringMethods) if not f.startswith("_") +} - set(ids) + +# test that the above list captures all methods of StringMethods +assert not missing_methods + + +@pytest.fixture(params=_any_string_method, ids=ids) +def any_string_method(request): + """ + Fixture for all public methods of `StringMethods` + + This fixture returns a tuple of the method name and sample arguments + necessary to call the method. + + Returns + ------- + method_name : str + The name of the method in `StringMethods` + args : tuple + Sample values for the positional arguments + kwargs : dict + Sample values for the keyword arguments + + Examples + -------- + >>> def test_something(any_string_method): + ... s = Series(['a', 'b', np.nan, 'd']) + ... + ... method_name, args, kwargs = any_string_method + ... method = getattr(s.str, method_name) + ... # will not raise + ... method(*args, **kwargs) + """ + return request.param + + +# subset of the full set from pandas/conftest.py +_any_allowed_skipna_inferred_dtype = [ + ("string", ["a", np.nan, "c"]), + ("bytes", [b"a", np.nan, b"c"]), + ("empty", [np.nan, np.nan, np.nan]), + ("empty", []), + ("mixed-integer", ["a", np.nan, 2]), +] +ids, _ = zip(*_any_allowed_skipna_inferred_dtype) # use inferred type as id + + +@pytest.fixture(params=_any_allowed_skipna_inferred_dtype, ids=ids) +def any_allowed_skipna_inferred_dtype(request): + """ + Fixture for all (inferred) dtypes allowed in StringMethods.__init__ + + The covered (inferred) types are: + * 'string' + * 'empty' + * 'bytes' + * 'mixed' + * 'mixed-integer' + + Returns + ------- + inferred_dtype : str + The string for the inferred dtype from _libs.lib.infer_dtype + values : np.ndarray + An array of object dtype that will be inferred to have + `inferred_dtype` + + Examples + -------- + >>> import pandas._libs.lib as lib + >>> + >>> def test_something(any_allowed_skipna_inferred_dtype): + ... inferred_dtype, values = any_allowed_skipna_inferred_dtype + ... # will pass + ... assert lib.infer_dtype(values, skipna=True) == inferred_dtype + ... + ... # constructor for .str-accessor will also pass + ... Series(values).str + """ + inferred_dtype, values = request.param + values = np.array(values, dtype=object) # object dtype to avoid casting + + # correctness of inference tested in tests/dtypes/test_inference.py + return inferred_dtype, values diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/test_api.py b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_api.py new file mode 100644 index 0000000..974ecc1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_api.py @@ -0,0 +1,154 @@ +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + _testing as tm, + get_option, +) +from pandas.core import strings as strings + + +def test_api(any_string_dtype): + + # GH 6106, GH 9322 + assert Series.str is strings.StringMethods + assert isinstance(Series([""], dtype=any_string_dtype).str, strings.StringMethods) + + +def test_api_mi_raises(): + # GH 23679 + mi = MultiIndex.from_arrays([["a", "b", "c"]]) + msg = "Can only use .str accessor with Index, not MultiIndex" + with pytest.raises(AttributeError, match=msg): + mi.str + assert not hasattr(mi, "str") + + +@pytest.mark.parametrize("dtype", [object, "category"]) +def test_api_per_dtype(index_or_series, dtype, any_skipna_inferred_dtype): + # one instance of parametrized fixture + box = index_or_series + inferred_dtype, values = any_skipna_inferred_dtype + + t = box(values, dtype=dtype) # explicit dtype to avoid casting + + types_passing_constructor = [ + "string", + "unicode", + "empty", + "bytes", + "mixed", + "mixed-integer", + ] + if inferred_dtype in types_passing_constructor: + # GH 6106 + assert isinstance(t.str, strings.StringMethods) + else: + # GH 9184, GH 23011, GH 23163 + msg = "Can only use .str accessor with string values.*" + with pytest.raises(AttributeError, match=msg): + t.str + assert not hasattr(t, "str") + + +@pytest.mark.parametrize("dtype", [object, "category"]) +def test_api_per_method( + index_or_series, + dtype, + any_allowed_skipna_inferred_dtype, + any_string_method, + request, +): + # this test does not check correctness of the different methods, + # just that the methods work on the specified (inferred) dtypes, + # and raise on all others + box = index_or_series + + # one instance of each parametrized fixture + inferred_dtype, values = any_allowed_skipna_inferred_dtype + method_name, args, kwargs = any_string_method + + reason = None + if box is Index and values.size == 0: + if method_name in ["partition", "rpartition"] and kwargs.get("expand", True): + raises = TypeError + reason = "Method cannot deal with empty Index" + elif method_name == "split" and kwargs.get("expand", None): + raises = TypeError + reason = "Split fails on empty Series when expand=True" + elif method_name == "get_dummies": + raises = ValueError + reason = "Need to fortify get_dummies corner cases" + + elif ( + box is Index + and inferred_dtype == "empty" + and dtype == object + and method_name == "get_dummies" + ): + raises = ValueError + reason = "Need to fortify get_dummies corner cases" + + if reason is not None: + mark = pytest.mark.xfail(raises=raises, reason=reason) + request.node.add_marker(mark) + + t = box(values, dtype=dtype) # explicit dtype to avoid casting + method = getattr(t.str, method_name) + + bytes_allowed = method_name in ["decode", "get", "len", "slice"] + # as of v0.23.4, all methods except 'cat' are very lenient with the + # allowed data types, just returning NaN for entries that error. + # This could be changed with an 'errors'-kwarg to the `str`-accessor, + # see discussion in GH 13877 + mixed_allowed = method_name not in ["cat"] + + allowed_types = ( + ["string", "unicode", "empty"] + + ["bytes"] * bytes_allowed + + ["mixed", "mixed-integer"] * mixed_allowed + ) + + if inferred_dtype in allowed_types: + # xref GH 23555, GH 23556 + method(*args, **kwargs) # works! + else: + # GH 23011, GH 23163 + msg = ( + f"Cannot use .str.{method_name} with values of " + f"inferred dtype {repr(inferred_dtype)}." + ) + with pytest.raises(TypeError, match=msg): + method(*args, **kwargs) + + +def test_api_for_categorical(any_string_method, any_string_dtype, request): + # https://github.com/pandas-dev/pandas/issues/10661 + + if any_string_dtype == "string[pyarrow]" or ( + any_string_dtype == "string" and get_option("string_storage") == "pyarrow" + ): + # unsupported operand type(s) for +: 'ArrowStringArray' and 'str' + mark = pytest.mark.xfail(raises=TypeError, reason="Not Implemented") + request.node.add_marker(mark) + + s = Series(list("aabb"), dtype=any_string_dtype) + s = s + " " + s + c = s.astype("category") + assert isinstance(c.str, strings.StringMethods) + + method_name, args, kwargs = any_string_method + + result = getattr(c.str, method_name)(*args, **kwargs) + expected = getattr(s.astype("object").str, method_name)(*args, **kwargs) + + if isinstance(result, DataFrame): + tm.assert_frame_equal(result, expected) + elif isinstance(result, Series): + tm.assert_series_equal(result, expected) + else: + # str.cat(others=None) returns string, for example + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/test_case_justify.py b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_case_justify.py new file mode 100644 index 0000000..e88dddb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_case_justify.py @@ -0,0 +1,409 @@ +from datetime import datetime +import operator + +import numpy as np +import pytest + +from pandas import ( + Series, + _testing as tm, +) + + +def test_title(any_string_dtype): + s = Series(["FOO", "BAR", np.nan, "Blah", "blurg"], dtype=any_string_dtype) + result = s.str.title() + expected = Series(["Foo", "Bar", np.nan, "Blah", "Blurg"], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +def test_title_mixed_object(): + s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0]) + result = s.str.title() + expected = Series( + ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", np.nan, np.nan, np.nan] + ) + tm.assert_almost_equal(result, expected) + + +def test_lower_upper(any_string_dtype): + s = Series(["om", np.nan, "nom", "nom"], dtype=any_string_dtype) + + result = s.str.upper() + expected = Series(["OM", np.nan, "NOM", "NOM"], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + result = result.str.lower() + tm.assert_series_equal(result, s) + + +def test_lower_upper_mixed_object(): + s = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0]) + + result = s.str.upper() + expected = Series(["A", np.nan, "B", np.nan, np.nan, "FOO", np.nan, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + result = s.str.lower() + expected = Series(["a", np.nan, "b", np.nan, np.nan, "foo", np.nan, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "data, expected", + [ + ( + ["FOO", "BAR", np.nan, "Blah", "blurg"], + ["Foo", "Bar", np.nan, "Blah", "Blurg"], + ), + (["a", "b", "c"], ["A", "B", "C"]), + (["a b", "a bc. de"], ["A b", "A bc. de"]), + ], +) +def test_capitalize(data, expected, any_string_dtype): + s = Series(data, dtype=any_string_dtype) + result = s.str.capitalize() + expected = Series(expected, dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +def test_capitalize_mixed_object(): + s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0]) + result = s.str.capitalize() + expected = Series( + ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", np.nan, np.nan, np.nan] + ) + tm.assert_series_equal(result, expected) + + +def test_swapcase(any_string_dtype): + s = Series(["FOO", "BAR", np.nan, "Blah", "blurg"], dtype=any_string_dtype) + result = s.str.swapcase() + expected = Series(["foo", "bar", np.nan, "bLAH", "BLURG"], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +def test_swapcase_mixed_object(): + s = Series(["FOO", np.nan, "bar", True, datetime.today(), "Blah", None, 1, 2.0]) + result = s.str.swapcase() + expected = Series( + ["foo", np.nan, "BAR", np.nan, np.nan, "bLAH", np.nan, np.nan, np.nan] + ) + tm.assert_series_equal(result, expected) + + +def test_casefold(): + # GH25405 + expected = Series(["ss", np.nan, "case", "ssd"]) + s = Series(["ß", np.nan, "case", "ßd"]) + result = s.str.casefold() + + tm.assert_series_equal(result, expected) + + +def test_casemethods(any_string_dtype): + values = ["aaa", "bbb", "CCC", "Dddd", "eEEE"] + s = Series(values, dtype=any_string_dtype) + assert s.str.lower().tolist() == [v.lower() for v in values] + assert s.str.upper().tolist() == [v.upper() for v in values] + assert s.str.title().tolist() == [v.title() for v in values] + assert s.str.capitalize().tolist() == [v.capitalize() for v in values] + assert s.str.swapcase().tolist() == [v.swapcase() for v in values] + + +def test_pad(any_string_dtype): + s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype) + + result = s.str.pad(5, side="left") + expected = Series( + [" a", " b", np.nan, " c", np.nan, "eeeeee"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + + result = s.str.pad(5, side="right") + expected = Series( + ["a ", "b ", np.nan, "c ", np.nan, "eeeeee"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + + result = s.str.pad(5, side="both") + expected = Series( + [" a ", " b ", np.nan, " c ", np.nan, "eeeeee"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + + +def test_pad_mixed_object(): + s = Series(["a", np.nan, "b", True, datetime.today(), "ee", None, 1, 2.0]) + + result = s.str.pad(5, side="left") + expected = Series( + [" a", np.nan, " b", np.nan, np.nan, " ee", np.nan, np.nan, np.nan] + ) + tm.assert_series_equal(result, expected) + + result = s.str.pad(5, side="right") + expected = Series( + ["a ", np.nan, "b ", np.nan, np.nan, "ee ", np.nan, np.nan, np.nan] + ) + tm.assert_series_equal(result, expected) + + result = s.str.pad(5, side="both") + expected = Series( + [" a ", np.nan, " b ", np.nan, np.nan, " ee ", np.nan, np.nan, np.nan] + ) + tm.assert_series_equal(result, expected) + + +def test_pad_fillchar(any_string_dtype): + s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype) + + result = s.str.pad(5, side="left", fillchar="X") + expected = Series( + ["XXXXa", "XXXXb", np.nan, "XXXXc", np.nan, "eeeeee"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + + result = s.str.pad(5, side="right", fillchar="X") + expected = Series( + ["aXXXX", "bXXXX", np.nan, "cXXXX", np.nan, "eeeeee"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + + result = s.str.pad(5, side="both", fillchar="X") + expected = Series( + ["XXaXX", "XXbXX", np.nan, "XXcXX", np.nan, "eeeeee"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + + +def test_pad_fillchar_bad_arg_raises(any_string_dtype): + s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype) + + msg = "fillchar must be a character, not str" + with pytest.raises(TypeError, match=msg): + s.str.pad(5, fillchar="XY") + + msg = "fillchar must be a character, not int" + with pytest.raises(TypeError, match=msg): + s.str.pad(5, fillchar=5) + + +@pytest.mark.parametrize("method_name", ["center", "ljust", "rjust", "zfill", "pad"]) +def test_pad_width_bad_arg_raises(method_name, any_string_dtype): + # see gh-13598 + s = Series(["1", "22", "a", "bb"], dtype=any_string_dtype) + op = operator.methodcaller(method_name, "f") + + msg = "width must be of integer type, not str" + with pytest.raises(TypeError, match=msg): + op(s.str) + + +def test_center_ljust_rjust(any_string_dtype): + s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype) + + result = s.str.center(5) + expected = Series( + [" a ", " b ", np.nan, " c ", np.nan, "eeeeee"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + + result = s.str.ljust(5) + expected = Series( + ["a ", "b ", np.nan, "c ", np.nan, "eeeeee"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + + result = s.str.rjust(5) + expected = Series( + [" a", " b", np.nan, " c", np.nan, "eeeeee"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + + +def test_center_ljust_rjust_mixed_object(): + s = Series(["a", np.nan, "b", True, datetime.today(), "c", "eee", None, 1, 2.0]) + + result = s.str.center(5) + expected = Series( + [ + " a ", + np.nan, + " b ", + np.nan, + np.nan, + " c ", + " eee ", + np.nan, + np.nan, + np.nan, + ] + ) + tm.assert_series_equal(result, expected) + + result = s.str.ljust(5) + expected = Series( + [ + "a ", + np.nan, + "b ", + np.nan, + np.nan, + "c ", + "eee ", + np.nan, + np.nan, + np.nan, + ] + ) + tm.assert_series_equal(result, expected) + + result = s.str.rjust(5) + expected = Series( + [ + " a", + np.nan, + " b", + np.nan, + np.nan, + " c", + " eee", + np.nan, + np.nan, + np.nan, + ] + ) + tm.assert_series_equal(result, expected) + + +def test_center_ljust_rjust_fillchar(any_string_dtype): + s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype) + + result = s.str.center(5, fillchar="X") + expected = Series( + ["XXaXX", "XXbbX", "Xcccc", "ddddd", "eeeeee"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + expected = np.array([v.center(5, "X") for v in np.array(s)], dtype=np.object_) + tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected) + + result = s.str.ljust(5, fillchar="X") + expected = Series( + ["aXXXX", "bbXXX", "ccccX", "ddddd", "eeeeee"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + expected = np.array([v.ljust(5, "X") for v in np.array(s)], dtype=np.object_) + tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected) + + result = s.str.rjust(5, fillchar="X") + expected = Series( + ["XXXXa", "XXXbb", "Xcccc", "ddddd", "eeeeee"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + expected = np.array([v.rjust(5, "X") for v in np.array(s)], dtype=np.object_) + tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected) + + +def test_center_ljust_rjust_fillchar_bad_arg_raises(any_string_dtype): + s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype) + + # If fillchar is not a character, normal str raises TypeError + # 'aaa'.ljust(5, 'XY') + # TypeError: must be char, not str + template = "fillchar must be a character, not {dtype}" + + with pytest.raises(TypeError, match=template.format(dtype="str")): + s.str.center(5, fillchar="XY") + + with pytest.raises(TypeError, match=template.format(dtype="str")): + s.str.ljust(5, fillchar="XY") + + with pytest.raises(TypeError, match=template.format(dtype="str")): + s.str.rjust(5, fillchar="XY") + + with pytest.raises(TypeError, match=template.format(dtype="int")): + s.str.center(5, fillchar=1) + + with pytest.raises(TypeError, match=template.format(dtype="int")): + s.str.ljust(5, fillchar=1) + + with pytest.raises(TypeError, match=template.format(dtype="int")): + s.str.rjust(5, fillchar=1) + + +def test_zfill(any_string_dtype): + s = Series(["1", "22", "aaa", "333", "45678"], dtype=any_string_dtype) + + result = s.str.zfill(5) + expected = Series( + ["00001", "00022", "00aaa", "00333", "45678"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + expected = np.array([v.zfill(5) for v in np.array(s)], dtype=np.object_) + tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected) + + result = s.str.zfill(3) + expected = Series(["001", "022", "aaa", "333", "45678"], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + expected = np.array([v.zfill(3) for v in np.array(s)], dtype=np.object_) + tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected) + + s = Series(["1", np.nan, "aaa", np.nan, "45678"], dtype=any_string_dtype) + result = s.str.zfill(5) + expected = Series( + ["00001", np.nan, "00aaa", np.nan, "45678"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + + +def test_wrap(any_string_dtype): + # test values are: two words less than width, two words equal to width, + # two words greater than width, one word less than width, one word + # equal to width, one word greater than width, multiple tokens with + # trailing whitespace equal to width + s = Series( + [ + "hello world", + "hello world!", + "hello world!!", + "abcdefabcde", + "abcdefabcdef", + "abcdefabcdefa", + "ab ab ab ab ", + "ab ab ab ab a", + "\t", + ], + dtype=any_string_dtype, + ) + + # expected values + expected = Series( + [ + "hello world", + "hello world!", + "hello\nworld!!", + "abcdefabcde", + "abcdefabcdef", + "abcdefabcdef\na", + "ab ab ab ab", + "ab ab ab ab\na", + "", + ], + dtype=any_string_dtype, + ) + + result = s.str.wrap(12, break_long_words=True) + tm.assert_series_equal(result, expected) + + +def test_wrap_unicode(any_string_dtype): + # test with pre and post whitespace (non-unicode), NaN, and non-ascii Unicode + s = Series( + [" pre ", np.nan, "\xac\u20ac\U00008000 abadcafe"], dtype=any_string_dtype + ) + expected = Series( + [" pre", np.nan, "\xac\u20ac\U00008000 ab\nadcafe"], dtype=any_string_dtype + ) + result = s.str.wrap(6) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/test_cat.py b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_cat.py new file mode 100644 index 0000000..8abbc59 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_cat.py @@ -0,0 +1,378 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + _testing as tm, + concat, +) +from pandas.tests.strings.test_strings import assert_series_or_index_equal + + +@pytest.mark.parametrize("other", [None, Series, Index]) +def test_str_cat_name(index_or_series, other): + # GH 21053 + box = index_or_series + values = ["a", "b"] + if other: + other = other(values) + else: + other = values + result = box(values, name="name").str.cat(other, sep=",") + assert result.name == "name" + + +def test_str_cat(index_or_series): + box = index_or_series + # test_cat above tests "str_cat" from ndarray; + # here testing "str.cat" from Series/Index to ndarray/list + s = box(["a", "a", "b", "b", "c", np.nan]) + + # single array + result = s.str.cat() + expected = "aabbc" + assert result == expected + + result = s.str.cat(na_rep="-") + expected = "aabbc-" + assert result == expected + + result = s.str.cat(sep="_", na_rep="NA") + expected = "a_a_b_b_c_NA" + assert result == expected + + t = np.array(["a", np.nan, "b", "d", "foo", np.nan], dtype=object) + expected = box(["aa", "a-", "bb", "bd", "cfoo", "--"]) + + # Series/Index with array + result = s.str.cat(t, na_rep="-") + assert_series_or_index_equal(result, expected) + + # Series/Index with list + result = s.str.cat(list(t), na_rep="-") + assert_series_or_index_equal(result, expected) + + # errors for incorrect lengths + rgx = r"If `others` contains arrays or lists \(or other list-likes.*" + z = Series(["1", "2", "3"]) + + with pytest.raises(ValueError, match=rgx): + s.str.cat(z.values) + + with pytest.raises(ValueError, match=rgx): + s.str.cat(list(z)) + + +def test_str_cat_raises_intuitive_error(index_or_series): + # GH 11334 + box = index_or_series + s = box(["a", "b", "c", "d"]) + message = "Did you mean to supply a `sep` keyword?" + with pytest.raises(ValueError, match=message): + s.str.cat("|") + with pytest.raises(ValueError, match=message): + s.str.cat(" ") + + +@pytest.mark.parametrize("sep", ["", None]) +@pytest.mark.parametrize("dtype_target", ["object", "category"]) +@pytest.mark.parametrize("dtype_caller", ["object", "category"]) +def test_str_cat_categorical(index_or_series, dtype_caller, dtype_target, sep): + box = index_or_series + + s = Index(["a", "a", "b", "a"], dtype=dtype_caller) + s = s if box == Index else Series(s, index=s) + t = Index(["b", "a", "b", "c"], dtype=dtype_target) + + expected = Index(["ab", "aa", "bb", "ac"]) + expected = expected if box == Index else Series(expected, index=s) + + # Series/Index with unaligned Index -> t.values + result = s.str.cat(t.values, sep=sep) + assert_series_or_index_equal(result, expected) + + # Series/Index with Series having matching Index + t = Series(t.values, index=s) + result = s.str.cat(t, sep=sep) + assert_series_or_index_equal(result, expected) + + # Series/Index with Series.values + result = s.str.cat(t.values, sep=sep) + assert_series_or_index_equal(result, expected) + + # Series/Index with Series having different Index + t = Series(t.values, index=t.values) + expected = Index(["aa", "aa", "aa", "bb", "bb"]) + expected = expected if box == Index else Series(expected, index=expected.str[:1]) + + result = s.str.cat(t, sep=sep) + assert_series_or_index_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [[1, 2, 3], [0.1, 0.2, 0.3], [1, 2, "b"]], + ids=["integers", "floats", "mixed"], +) +# without dtype=object, np.array would cast [1, 2, 'b'] to ['1', '2', 'b'] +@pytest.mark.parametrize( + "box", + [Series, Index, list, lambda x: np.array(x, dtype=object)], + ids=["Series", "Index", "list", "np.array"], +) +def test_str_cat_wrong_dtype_raises(box, data): + # GH 22722 + s = Series(["a", "b", "c"]) + t = box(data) + + msg = "Concatenation requires list-likes containing only strings.*" + with pytest.raises(TypeError, match=msg): + # need to use outer and na_rep, as otherwise Index would not raise + s.str.cat(t, join="outer", na_rep="-") + + +def test_str_cat_mixed_inputs(index_or_series): + box = index_or_series + s = Index(["a", "b", "c", "d"]) + s = s if box == Index else Series(s, index=s) + + t = Series(["A", "B", "C", "D"], index=s.values) + d = concat([t, Series(s, index=s)], axis=1) + + expected = Index(["aAa", "bBb", "cCc", "dDd"]) + expected = expected if box == Index else Series(expected.values, index=s.values) + + # Series/Index with DataFrame + result = s.str.cat(d) + assert_series_or_index_equal(result, expected) + + # Series/Index with two-dimensional ndarray + result = s.str.cat(d.values) + assert_series_or_index_equal(result, expected) + + # Series/Index with list of Series + result = s.str.cat([t, s]) + assert_series_or_index_equal(result, expected) + + # Series/Index with mixed list of Series/array + result = s.str.cat([t, s.values]) + assert_series_or_index_equal(result, expected) + + # Series/Index with list of Series; different indexes + t.index = ["b", "c", "d", "a"] + expected = box(["aDa", "bAb", "cBc", "dCd"]) + expected = expected if box == Index else Series(expected.values, index=s.values) + result = s.str.cat([t, s]) + assert_series_or_index_equal(result, expected) + + # Series/Index with mixed list; different index + result = s.str.cat([t, s.values]) + assert_series_or_index_equal(result, expected) + + # Series/Index with DataFrame; different indexes + d.index = ["b", "c", "d", "a"] + expected = box(["aDd", "bAa", "cBb", "dCc"]) + expected = expected if box == Index else Series(expected.values, index=s.values) + result = s.str.cat(d) + assert_series_or_index_equal(result, expected) + + # errors for incorrect lengths + rgx = r"If `others` contains arrays or lists \(or other list-likes.*" + z = Series(["1", "2", "3"]) + e = concat([z, z], axis=1) + + # two-dimensional ndarray + with pytest.raises(ValueError, match=rgx): + s.str.cat(e.values) + + # list of list-likes + with pytest.raises(ValueError, match=rgx): + s.str.cat([z.values, s.values]) + + # mixed list of Series/list-like + with pytest.raises(ValueError, match=rgx): + s.str.cat([z.values, s]) + + # errors for incorrect arguments in list-like + rgx = "others must be Series, Index, DataFrame,.*" + # make sure None/NaN do not crash checks in _get_series_list + u = Series(["a", np.nan, "c", None]) + + # mix of string and Series + with pytest.raises(TypeError, match=rgx): + s.str.cat([u, "u"]) + + # DataFrame in list + with pytest.raises(TypeError, match=rgx): + s.str.cat([u, d]) + + # 2-dim ndarray in list + with pytest.raises(TypeError, match=rgx): + s.str.cat([u, d.values]) + + # nested lists + with pytest.raises(TypeError, match=rgx): + s.str.cat([u, [u, d]]) + + # forbidden input type: set + # GH 23009 + with pytest.raises(TypeError, match=rgx): + s.str.cat(set(u)) + + # forbidden input type: set in list + # GH 23009 + with pytest.raises(TypeError, match=rgx): + s.str.cat([u, set(u)]) + + # other forbidden input type, e.g. int + with pytest.raises(TypeError, match=rgx): + s.str.cat(1) + + # nested list-likes + with pytest.raises(TypeError, match=rgx): + s.str.cat(iter([t.values, list(s)])) + + +@pytest.mark.parametrize("join", ["left", "outer", "inner", "right"]) +def test_str_cat_align_indexed(index_or_series, join): + # https://github.com/pandas-dev/pandas/issues/18657 + box = index_or_series + + s = Series(["a", "b", "c", "d"], index=["a", "b", "c", "d"]) + t = Series(["D", "A", "E", "B"], index=["d", "a", "e", "b"]) + sa, ta = s.align(t, join=join) + # result after manual alignment of inputs + expected = sa.str.cat(ta, na_rep="-") + + if box == Index: + s = Index(s) + sa = Index(sa) + expected = Index(expected) + + result = s.str.cat(t, join=join, na_rep="-") + assert_series_or_index_equal(result, expected) + + +@pytest.mark.parametrize("join", ["left", "outer", "inner", "right"]) +def test_str_cat_align_mixed_inputs(join): + s = Series(["a", "b", "c", "d"]) + t = Series(["d", "a", "e", "b"], index=[3, 0, 4, 1]) + d = concat([t, t], axis=1) + + expected_outer = Series(["aaa", "bbb", "c--", "ddd", "-ee"]) + expected = expected_outer.loc[s.index.join(t.index, how=join)] + + # list of Series + result = s.str.cat([t, t], join=join, na_rep="-") + tm.assert_series_equal(result, expected) + + # DataFrame + result = s.str.cat(d, join=join, na_rep="-") + tm.assert_series_equal(result, expected) + + # mixed list of indexed/unindexed + u = np.array(["A", "B", "C", "D"]) + expected_outer = Series(["aaA", "bbB", "c-C", "ddD", "-e-"]) + # joint index of rhs [t, u]; u will be forced have index of s + rhs_idx = ( + t.index.intersection(s.index) + if join == "inner" + else t.index.union(s.index) + if join == "outer" + else t.index.append(s.index.difference(t.index)) + ) + + expected = expected_outer.loc[s.index.join(rhs_idx, how=join)] + result = s.str.cat([t, u], join=join, na_rep="-") + tm.assert_series_equal(result, expected) + + with pytest.raises(TypeError, match="others must be Series,.*"): + # nested lists are forbidden + s.str.cat([t, list(u)], join=join) + + # errors for incorrect lengths + rgx = r"If `others` contains arrays or lists \(or other list-likes.*" + z = Series(["1", "2", "3"]).values + + # unindexed object of wrong length + with pytest.raises(ValueError, match=rgx): + s.str.cat(z, join=join) + + # unindexed object of wrong length in list + with pytest.raises(ValueError, match=rgx): + s.str.cat([t, z], join=join) + + +def test_str_cat_all_na(index_or_series, index_or_series2): + # GH 24044 + box = index_or_series + other = index_or_series2 + + # check that all NaNs in caller / target work + s = Index(["a", "b", "c", "d"]) + s = s if box == Index else Series(s, index=s) + t = other([np.nan] * 4, dtype=object) + # add index of s for alignment + t = t if other == Index else Series(t, index=s) + + # all-NA target + if box == Series: + expected = Series([np.nan] * 4, index=s.index, dtype=object) + else: # box == Index + expected = Index([np.nan] * 4, dtype=object) + result = s.str.cat(t, join="left") + assert_series_or_index_equal(result, expected) + + # all-NA caller (only for Series) + if other == Series: + expected = Series([np.nan] * 4, dtype=object, index=t.index) + result = t.str.cat(s, join="left") + tm.assert_series_equal(result, expected) + + +def test_str_cat_special_cases(): + s = Series(["a", "b", "c", "d"]) + t = Series(["d", "a", "e", "b"], index=[3, 0, 4, 1]) + + # iterator of elements with different types + expected = Series(["aaa", "bbb", "c-c", "ddd", "-e-"]) + result = s.str.cat(iter([t, s.values]), join="outer", na_rep="-") + tm.assert_series_equal(result, expected) + + # right-align with different indexes in others + expected = Series(["aa-", "d-d"], index=[0, 3]) + result = s.str.cat([t.loc[[0]], t.loc[[3]]], join="right", na_rep="-") + tm.assert_series_equal(result, expected) + + +def test_cat_on_filtered_index(): + df = DataFrame( + index=MultiIndex.from_product( + [[2011, 2012], [1, 2, 3]], names=["year", "month"] + ) + ) + + df = df.reset_index() + df = df[df.month > 1] + + str_year = df.year.astype("str") + str_month = df.month.astype("str") + str_both = str_year.str.cat(str_month, sep=" ") + + assert str_both.loc[1] == "2011 2" + + str_multiple = str_year.str.cat([str_month, str_month], sep=" ") + + assert str_multiple.loc[1] == "2011 2 2" + + +@pytest.mark.parametrize("klass", [tuple, list, np.array, Series, Index]) +def test_cat_different_classes(klass): + # https://github.com/pandas-dev/pandas/issues/33425 + s = Series(["a", "b", "c"]) + result = s.str.cat(klass(["x", "y", "z"])) + expected = Series(["ax", "by", "cz"]) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/test_extract.py b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_extract.py new file mode 100644 index 0000000..0f4ffcc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_extract.py @@ -0,0 +1,708 @@ +from datetime import datetime +import re + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + _testing as tm, +) + + +def test_extract_expand_kwarg_wrong_type_raises(any_string_dtype): + # TODO: should this raise TypeError + values = Series(["fooBAD__barBAD", np.nan, "foo"], dtype=any_string_dtype) + with pytest.raises(ValueError, match="expand must be True or False"): + values.str.extract(".*(BAD[_]+).*(BAD)", expand=None) + + +def test_extract_expand_kwarg(any_string_dtype): + s = Series(["fooBAD__barBAD", np.nan, "foo"], dtype=any_string_dtype) + expected = DataFrame(["BAD__", np.nan, np.nan], dtype=any_string_dtype) + + result = s.str.extract(".*(BAD[_]+).*") + tm.assert_frame_equal(result, expected) + + result = s.str.extract(".*(BAD[_]+).*", expand=True) + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + [["BAD__", "BAD"], [np.nan, np.nan], [np.nan, np.nan]], dtype=any_string_dtype + ) + result = s.str.extract(".*(BAD[_]+).*(BAD)", expand=False) + tm.assert_frame_equal(result, expected) + + +def test_extract_expand_False_mixed_object(): + ser = Series( + ["aBAD_BAD", np.nan, "BAD_b_BAD", True, datetime.today(), "foo", None, 1, 2.0] + ) + + # two groups + result = ser.str.extract(".*(BAD[_]+).*(BAD)", expand=False) + er = [np.nan, np.nan] # empty row + expected = DataFrame([["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er]) + tm.assert_frame_equal(result, expected) + + # single group + result = ser.str.extract(".*(BAD[_]+).*BAD", expand=False) + expected = Series( + ["BAD_", np.nan, "BAD_", np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] + ) + tm.assert_series_equal(result, expected) + + +def test_extract_expand_index_raises(): + # GH9980 + # Index only works with one regex group since + # multi-group would expand to a frame + idx = Index(["A1", "A2", "A3", "A4", "B5"]) + msg = "only one regex group is supported with Index" + with pytest.raises(ValueError, match=msg): + idx.str.extract("([AB])([123])", expand=False) + + +def test_extract_expand_no_capture_groups_raises(index_or_series, any_string_dtype): + s_or_idx = index_or_series(["A1", "B2", "C3"], dtype=any_string_dtype) + msg = "pattern contains no capture groups" + + # no groups + with pytest.raises(ValueError, match=msg): + s_or_idx.str.extract("[ABC][123]", expand=False) + + # only non-capturing groups + with pytest.raises(ValueError, match=msg): + s_or_idx.str.extract("(?:[AB]).*", expand=False) + + +def test_extract_expand_single_capture_group(index_or_series, any_string_dtype): + # single group renames series/index properly + s_or_idx = index_or_series(["A1", "A2"], dtype=any_string_dtype) + result = s_or_idx.str.extract(r"(?PA)\d", expand=False) + + expected = index_or_series(["A", "A"], name="uno", dtype=any_string_dtype) + if index_or_series == Series: + tm.assert_series_equal(result, expected) + else: + tm.assert_index_equal(result, expected) + + +def test_extract_expand_capture_groups(any_string_dtype): + s = Series(["A1", "B2", "C3"], dtype=any_string_dtype) + # one group, no matches + result = s.str.extract("(_)", expand=False) + expected = Series([np.nan, np.nan, np.nan], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + # two groups, no matches + result = s.str.extract("(_)(_)", expand=False) + expected = DataFrame( + [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]], dtype=any_string_dtype + ) + tm.assert_frame_equal(result, expected) + + # one group, some matches + result = s.str.extract("([AB])[123]", expand=False) + expected = Series(["A", "B", np.nan], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + # two groups, some matches + result = s.str.extract("([AB])([123])", expand=False) + expected = DataFrame( + [["A", "1"], ["B", "2"], [np.nan, np.nan]], dtype=any_string_dtype + ) + tm.assert_frame_equal(result, expected) + + # one named group + result = s.str.extract("(?P[AB])", expand=False) + expected = Series(["A", "B", np.nan], name="letter", dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + # two named groups + result = s.str.extract("(?P[AB])(?P[123])", expand=False) + expected = DataFrame( + [["A", "1"], ["B", "2"], [np.nan, np.nan]], + columns=["letter", "number"], + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + # mix named and unnamed groups + result = s.str.extract("([AB])(?P[123])", expand=False) + expected = DataFrame( + [["A", "1"], ["B", "2"], [np.nan, np.nan]], + columns=[0, "number"], + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + # one normal group, one non-capturing group + result = s.str.extract("([AB])(?:[123])", expand=False) + expected = Series(["A", "B", np.nan], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + # two normal groups, one non-capturing group + s = Series(["A11", "B22", "C33"], dtype=any_string_dtype) + result = s.str.extract("([AB])([123])(?:[123])", expand=False) + expected = DataFrame( + [["A", "1"], ["B", "2"], [np.nan, np.nan]], dtype=any_string_dtype + ) + tm.assert_frame_equal(result, expected) + + # one optional group followed by one normal group + s = Series(["A1", "B2", "3"], dtype=any_string_dtype) + result = s.str.extract("(?P[AB])?(?P[123])", expand=False) + expected = DataFrame( + [["A", "1"], ["B", "2"], [np.nan, "3"]], + columns=["letter", "number"], + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + # one normal group followed by one optional group + s = Series(["A1", "B2", "C"], dtype=any_string_dtype) + result = s.str.extract("(?P[ABC])(?P[123])?", expand=False) + expected = DataFrame( + [["A", "1"], ["B", "2"], ["C", np.nan]], + columns=["letter", "number"], + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + +def test_extract_expand_capture_groups_index(index, any_string_dtype): + # https://github.com/pandas-dev/pandas/issues/6348 + # not passing index to the extractor + data = ["A1", "B2", "C"] + + if len(index) < len(data): + pytest.skip("Index too short") + + index = index[: len(data)] + s = Series(data, index=index, dtype=any_string_dtype) + + result = s.str.extract(r"(\d)", expand=False) + expected = Series(["1", "2", np.nan], index=index, dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + result = s.str.extract(r"(?P\D)(?P\d)?", expand=False) + expected = DataFrame( + [["A", "1"], ["B", "2"], ["C", np.nan]], + columns=["letter", "number"], + index=index, + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + +def test_extract_single_series_name_is_preserved(any_string_dtype): + s = Series(["a3", "b3", "c2"], name="bob", dtype=any_string_dtype) + result = s.str.extract(r"(?P[a-z])", expand=False) + expected = Series(["a", "b", "c"], name="sue", dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +def test_extract_expand_True(any_string_dtype): + # Contains tests like those in test_match and some others. + s = Series(["fooBAD__barBAD", np.nan, "foo"], dtype=any_string_dtype) + + result = s.str.extract(".*(BAD[_]+).*(BAD)", expand=True) + expected = DataFrame( + [["BAD__", "BAD"], [np.nan, np.nan], [np.nan, np.nan]], dtype=any_string_dtype + ) + tm.assert_frame_equal(result, expected) + + +def test_extract_expand_True_mixed_object(): + er = [np.nan, np.nan] # empty row + mixed = Series( + [ + "aBAD_BAD", + np.nan, + "BAD_b_BAD", + True, + datetime.today(), + "foo", + None, + 1, + 2.0, + ] + ) + + result = mixed.str.extract(".*(BAD[_]+).*(BAD)", expand=True) + expected = DataFrame([["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er]) + tm.assert_frame_equal(result, expected) + + +def test_extract_expand_True_single_capture_group_raises( + index_or_series, any_string_dtype +): + # these should work for both Series and Index + # no groups + s_or_idx = index_or_series(["A1", "B2", "C3"], dtype=any_string_dtype) + msg = "pattern contains no capture groups" + with pytest.raises(ValueError, match=msg): + s_or_idx.str.extract("[ABC][123]", expand=True) + + # only non-capturing groups + with pytest.raises(ValueError, match=msg): + s_or_idx.str.extract("(?:[AB]).*", expand=True) + + +def test_extract_expand_True_single_capture_group(index_or_series, any_string_dtype): + # single group renames series/index properly + s_or_idx = index_or_series(["A1", "A2"], dtype=any_string_dtype) + result = s_or_idx.str.extract(r"(?PA)\d", expand=True) + expected = DataFrame({"uno": ["A", "A"]}, dtype=any_string_dtype) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("name", [None, "series_name"]) +def test_extract_series(name, any_string_dtype): + # extract should give the same result whether or not the series has a name. + s = Series(["A1", "B2", "C3"], name=name, dtype=any_string_dtype) + + # one group, no matches + result = s.str.extract("(_)", expand=True) + expected = DataFrame([np.nan, np.nan, np.nan], dtype=any_string_dtype) + tm.assert_frame_equal(result, expected) + + # two groups, no matches + result = s.str.extract("(_)(_)", expand=True) + expected = DataFrame( + [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]], dtype=any_string_dtype + ) + tm.assert_frame_equal(result, expected) + + # one group, some matches + result = s.str.extract("([AB])[123]", expand=True) + expected = DataFrame(["A", "B", np.nan], dtype=any_string_dtype) + tm.assert_frame_equal(result, expected) + + # two groups, some matches + result = s.str.extract("([AB])([123])", expand=True) + expected = DataFrame( + [["A", "1"], ["B", "2"], [np.nan, np.nan]], dtype=any_string_dtype + ) + tm.assert_frame_equal(result, expected) + + # one named group + result = s.str.extract("(?P[AB])", expand=True) + expected = DataFrame({"letter": ["A", "B", np.nan]}, dtype=any_string_dtype) + tm.assert_frame_equal(result, expected) + + # two named groups + result = s.str.extract("(?P[AB])(?P[123])", expand=True) + expected = DataFrame( + [["A", "1"], ["B", "2"], [np.nan, np.nan]], + columns=["letter", "number"], + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + # mix named and unnamed groups + result = s.str.extract("([AB])(?P[123])", expand=True) + expected = DataFrame( + [["A", "1"], ["B", "2"], [np.nan, np.nan]], + columns=[0, "number"], + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + # one normal group, one non-capturing group + result = s.str.extract("([AB])(?:[123])", expand=True) + expected = DataFrame(["A", "B", np.nan], dtype=any_string_dtype) + tm.assert_frame_equal(result, expected) + + +def test_extract_optional_groups(any_string_dtype): + + # two normal groups, one non-capturing group + s = Series(["A11", "B22", "C33"], dtype=any_string_dtype) + result = s.str.extract("([AB])([123])(?:[123])", expand=True) + expected = DataFrame( + [["A", "1"], ["B", "2"], [np.nan, np.nan]], dtype=any_string_dtype + ) + tm.assert_frame_equal(result, expected) + + # one optional group followed by one normal group + s = Series(["A1", "B2", "3"], dtype=any_string_dtype) + result = s.str.extract("(?P[AB])?(?P[123])", expand=True) + expected = DataFrame( + [["A", "1"], ["B", "2"], [np.nan, "3"]], + columns=["letter", "number"], + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + # one normal group followed by one optional group + s = Series(["A1", "B2", "C"], dtype=any_string_dtype) + result = s.str.extract("(?P[ABC])(?P[123])?", expand=True) + expected = DataFrame( + [["A", "1"], ["B", "2"], ["C", np.nan]], + columns=["letter", "number"], + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + +def test_extract_dataframe_capture_groups_index(index, any_string_dtype): + # GH6348 + # not passing index to the extractor + + data = ["A1", "B2", "C"] + + if len(index) < len(data): + pytest.skip("Index too short") + + index = index[: len(data)] + s = Series(data, index=index, dtype=any_string_dtype) + + result = s.str.extract(r"(\d)", expand=True) + expected = DataFrame(["1", "2", np.nan], index=index, dtype=any_string_dtype) + tm.assert_frame_equal(result, expected) + + result = s.str.extract(r"(?P\D)(?P\d)?", expand=True) + expected = DataFrame( + [["A", "1"], ["B", "2"], ["C", np.nan]], + columns=["letter", "number"], + index=index, + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + +def test_extract_single_group_returns_frame(any_string_dtype): + # GH11386 extract should always return DataFrame, even when + # there is only one group. Prior to v0.18.0, extract returned + # Series when there was only one group in the regex. + s = Series(["a3", "b3", "c2"], name="series_name", dtype=any_string_dtype) + result = s.str.extract(r"(?P[a-z])", expand=True) + expected = DataFrame({"letter": ["a", "b", "c"]}, dtype=any_string_dtype) + tm.assert_frame_equal(result, expected) + + +def test_extractall(any_string_dtype): + data = [ + "dave@google.com", + "tdhock5@gmail.com", + "maudelaperriere@gmail.com", + "rob@gmail.com some text steve@gmail.com", + "a@b.com some text c@d.com and e@f.com", + np.nan, + "", + ] + expected_tuples = [ + ("dave", "google", "com"), + ("tdhock5", "gmail", "com"), + ("maudelaperriere", "gmail", "com"), + ("rob", "gmail", "com"), + ("steve", "gmail", "com"), + ("a", "b", "com"), + ("c", "d", "com"), + ("e", "f", "com"), + ] + pat = r""" + (?P[a-z0-9]+) + @ + (?P[a-z]+) + \. + (?P[a-z]{2,4}) + """ + expected_columns = ["user", "domain", "tld"] + s = Series(data, dtype=any_string_dtype) + # extractall should return a DataFrame with one row for each match, indexed by the + # subject from which the match came. + expected_index = MultiIndex.from_tuples( + [(0, 0), (1, 0), (2, 0), (3, 0), (3, 1), (4, 0), (4, 1), (4, 2)], + names=(None, "match"), + ) + expected = DataFrame( + expected_tuples, expected_index, expected_columns, dtype=any_string_dtype + ) + result = s.str.extractall(pat, flags=re.VERBOSE) + tm.assert_frame_equal(result, expected) + + # The index of the input Series should be used to construct the index of the output + # DataFrame: + mi = MultiIndex.from_tuples( + [ + ("single", "Dave"), + ("single", "Toby"), + ("single", "Maude"), + ("multiple", "robAndSteve"), + ("multiple", "abcdef"), + ("none", "missing"), + ("none", "empty"), + ] + ) + s = Series(data, index=mi, dtype=any_string_dtype) + expected_index = MultiIndex.from_tuples( + [ + ("single", "Dave", 0), + ("single", "Toby", 0), + ("single", "Maude", 0), + ("multiple", "robAndSteve", 0), + ("multiple", "robAndSteve", 1), + ("multiple", "abcdef", 0), + ("multiple", "abcdef", 1), + ("multiple", "abcdef", 2), + ], + names=(None, None, "match"), + ) + expected = DataFrame( + expected_tuples, expected_index, expected_columns, dtype=any_string_dtype + ) + result = s.str.extractall(pat, flags=re.VERBOSE) + tm.assert_frame_equal(result, expected) + + # MultiIndexed subject with names. + s = Series(data, index=mi, dtype=any_string_dtype) + s.index.names = ("matches", "description") + expected_index.names = ("matches", "description", "match") + expected = DataFrame( + expected_tuples, expected_index, expected_columns, dtype=any_string_dtype + ) + result = s.str.extractall(pat, flags=re.VERBOSE) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "pat,expected_names", + [ + # optional groups. + ("(?P[AB])?(?P[123])", ["letter", "number"]), + # only one of two groups has a name. + ("([AB])?(?P[123])", [0, "number"]), + ], +) +def test_extractall_column_names(pat, expected_names, any_string_dtype): + s = Series(["", "A1", "32"], dtype=any_string_dtype) + + result = s.str.extractall(pat) + expected = DataFrame( + [("A", "1"), (np.nan, "3"), (np.nan, "2")], + index=MultiIndex.from_tuples([(1, 0), (2, 0), (2, 1)], names=(None, "match")), + columns=expected_names, + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + +def test_extractall_single_group(any_string_dtype): + s = Series(["a3", "b3", "d4c2"], name="series_name", dtype=any_string_dtype) + expected_index = MultiIndex.from_tuples( + [(0, 0), (1, 0), (2, 0), (2, 1)], names=(None, "match") + ) + + # extractall(one named group) returns DataFrame with one named column. + result = s.str.extractall(r"(?P[a-z])") + expected = DataFrame( + {"letter": ["a", "b", "d", "c"]}, index=expected_index, dtype=any_string_dtype + ) + tm.assert_frame_equal(result, expected) + + # extractall(one un-named group) returns DataFrame with one un-named column. + result = s.str.extractall(r"([a-z])") + expected = DataFrame( + ["a", "b", "d", "c"], index=expected_index, dtype=any_string_dtype + ) + tm.assert_frame_equal(result, expected) + + +def test_extractall_single_group_with_quantifier(any_string_dtype): + # GH#13382 + # extractall(one un-named group with quantifier) returns DataFrame with one un-named + # column. + s = Series(["ab3", "abc3", "d4cd2"], name="series_name", dtype=any_string_dtype) + result = s.str.extractall(r"([a-z]+)") + expected = DataFrame( + ["ab", "abc", "d", "cd"], + index=MultiIndex.from_tuples( + [(0, 0), (1, 0), (2, 0), (2, 1)], names=(None, "match") + ), + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data, names", + [ + ([], (None,)), + ([], ("i1",)), + ([], (None, "i2")), + ([], ("i1", "i2")), + (["a3", "b3", "d4c2"], (None,)), + (["a3", "b3", "d4c2"], ("i1", "i2")), + (["a3", "b3", "d4c2"], (None, "i2")), + (["a3", "b3", "d4c2"], ("i1", "i2")), + ], +) +def test_extractall_no_matches(data, names, any_string_dtype): + # GH19075 extractall with no matches should return a valid MultiIndex + n = len(data) + if len(names) == 1: + index = Index(range(n), name=names[0]) + else: + tuples = (tuple([i] * (n - 1)) for i in range(n)) + index = MultiIndex.from_tuples(tuples, names=names) + s = Series(data, name="series_name", index=index, dtype=any_string_dtype) + expected_index = MultiIndex.from_tuples([], names=(names + ("match",))) + + # one un-named group. + result = s.str.extractall("(z)") + expected = DataFrame(columns=[0], index=expected_index, dtype=any_string_dtype) + tm.assert_frame_equal(result, expected) + + # two un-named groups. + result = s.str.extractall("(z)(z)") + expected = DataFrame(columns=[0, 1], index=expected_index, dtype=any_string_dtype) + tm.assert_frame_equal(result, expected) + + # one named group. + result = s.str.extractall("(?Pz)") + expected = DataFrame( + columns=["first"], index=expected_index, dtype=any_string_dtype + ) + tm.assert_frame_equal(result, expected) + + # two named groups. + result = s.str.extractall("(?Pz)(?Pz)") + expected = DataFrame( + columns=["first", "second"], index=expected_index, dtype=any_string_dtype + ) + tm.assert_frame_equal(result, expected) + + # one named, one un-named. + result = s.str.extractall("(z)(?Pz)") + expected = DataFrame( + columns=[0, "second"], index=expected_index, dtype=any_string_dtype + ) + tm.assert_frame_equal(result, expected) + + +def test_extractall_stringindex(any_string_dtype): + s = Series(["a1a2", "b1", "c1"], name="xxx", dtype=any_string_dtype) + result = s.str.extractall(r"[ab](?P\d)") + expected = DataFrame( + {"digit": ["1", "2", "1"]}, + index=MultiIndex.from_tuples([(0, 0), (0, 1), (1, 0)], names=[None, "match"]), + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + # index should return the same result as the default index without name thus + # index.name doesn't affect to the result + if any_string_dtype == "object": + for idx in [ + Index(["a1a2", "b1", "c1"]), + Index(["a1a2", "b1", "c1"], name="xxx"), + ]: + + result = idx.str.extractall(r"[ab](?P\d)") + tm.assert_frame_equal(result, expected) + + s = Series( + ["a1a2", "b1", "c1"], + name="s_name", + index=Index(["XX", "yy", "zz"], name="idx_name"), + dtype=any_string_dtype, + ) + result = s.str.extractall(r"[ab](?P\d)") + expected = DataFrame( + {"digit": ["1", "2", "1"]}, + index=MultiIndex.from_tuples( + [("XX", 0), ("XX", 1), ("yy", 0)], names=["idx_name", "match"] + ), + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + +def test_extractall_no_capture_groups_raises(any_string_dtype): + # Does not make sense to use extractall with a regex that has no capture groups. + # (it returns DataFrame with one column for each capture group) + s = Series(["a3", "b3", "d4c2"], name="series_name", dtype=any_string_dtype) + with pytest.raises(ValueError, match="no capture groups"): + s.str.extractall(r"[a-z]") + + +def test_extract_index_one_two_groups(): + s = Series(["a3", "b3", "d4c2"], index=["A3", "B3", "D4"], name="series_name") + r = s.index.str.extract(r"([A-Z])", expand=True) + e = DataFrame(["A", "B", "D"]) + tm.assert_frame_equal(r, e) + + # Prior to v0.18.0, index.str.extract(regex with one group) + # returned Index. With more than one group, extract raised an + # error (GH9980). Now extract always returns DataFrame. + r = s.index.str.extract(r"(?P[A-Z])(?P[0-9])", expand=True) + e_list = [("A", "3"), ("B", "3"), ("D", "4")] + e = DataFrame(e_list, columns=["letter", "digit"]) + tm.assert_frame_equal(r, e) + + +def test_extractall_same_as_extract(any_string_dtype): + s = Series(["a3", "b3", "c2"], name="series_name", dtype=any_string_dtype) + + pattern_two_noname = r"([a-z])([0-9])" + extract_two_noname = s.str.extract(pattern_two_noname, expand=True) + has_multi_index = s.str.extractall(pattern_two_noname) + no_multi_index = has_multi_index.xs(0, level="match") + tm.assert_frame_equal(extract_two_noname, no_multi_index) + + pattern_two_named = r"(?P[a-z])(?P[0-9])" + extract_two_named = s.str.extract(pattern_two_named, expand=True) + has_multi_index = s.str.extractall(pattern_two_named) + no_multi_index = has_multi_index.xs(0, level="match") + tm.assert_frame_equal(extract_two_named, no_multi_index) + + pattern_one_named = r"(?P[a-z])" + extract_one_named = s.str.extract(pattern_one_named, expand=True) + has_multi_index = s.str.extractall(pattern_one_named) + no_multi_index = has_multi_index.xs(0, level="match") + tm.assert_frame_equal(extract_one_named, no_multi_index) + + pattern_one_noname = r"([a-z])" + extract_one_noname = s.str.extract(pattern_one_noname, expand=True) + has_multi_index = s.str.extractall(pattern_one_noname) + no_multi_index = has_multi_index.xs(0, level="match") + tm.assert_frame_equal(extract_one_noname, no_multi_index) + + +def test_extractall_same_as_extract_subject_index(any_string_dtype): + # same as above tests, but s has an MultiIndex. + mi = MultiIndex.from_tuples( + [("A", "first"), ("B", "second"), ("C", "third")], + names=("capital", "ordinal"), + ) + s = Series(["a3", "b3", "c2"], index=mi, name="series_name", dtype=any_string_dtype) + + pattern_two_noname = r"([a-z])([0-9])" + extract_two_noname = s.str.extract(pattern_two_noname, expand=True) + has_match_index = s.str.extractall(pattern_two_noname) + no_match_index = has_match_index.xs(0, level="match") + tm.assert_frame_equal(extract_two_noname, no_match_index) + + pattern_two_named = r"(?P[a-z])(?P[0-9])" + extract_two_named = s.str.extract(pattern_two_named, expand=True) + has_match_index = s.str.extractall(pattern_two_named) + no_match_index = has_match_index.xs(0, level="match") + tm.assert_frame_equal(extract_two_named, no_match_index) + + pattern_one_named = r"(?P[a-z])" + extract_one_named = s.str.extract(pattern_one_named, expand=True) + has_match_index = s.str.extractall(pattern_one_named) + no_match_index = has_match_index.xs(0, level="match") + tm.assert_frame_equal(extract_one_named, no_match_index) + + pattern_one_noname = r"([a-z])" + extract_one_noname = s.str.extract(pattern_one_noname, expand=True) + has_match_index = s.str.extractall(pattern_one_noname) + no_match_index = has_match_index.xs(0, level="match") + tm.assert_frame_equal(extract_one_noname, no_match_index) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/test_find_replace.py b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_find_replace.py new file mode 100644 index 0000000..067bcf5 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_find_replace.py @@ -0,0 +1,925 @@ +from datetime import datetime +import re + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Series, + _testing as tm, +) + +# -------------------------------------------------------------------------------------- +# str.contains +# -------------------------------------------------------------------------------------- + + +def test_contains(any_string_dtype): + values = np.array( + ["foo", np.nan, "fooommm__foo", "mmm_", "foommm[_]+bar"], dtype=np.object_ + ) + values = Series(values, dtype=any_string_dtype) + pat = "mmm[_]+" + + result = values.str.contains(pat) + expected_dtype = "object" if any_string_dtype == "object" else "boolean" + expected = Series( + np.array([False, np.nan, True, True, False], dtype=np.object_), + dtype=expected_dtype, + ) + tm.assert_series_equal(result, expected) + + result = values.str.contains(pat, regex=False) + expected = Series( + np.array([False, np.nan, False, False, True], dtype=np.object_), + dtype=expected_dtype, + ) + tm.assert_series_equal(result, expected) + + values = Series( + np.array(["foo", "xyz", "fooommm__foo", "mmm_"], dtype=object), + dtype=any_string_dtype, + ) + result = values.str.contains(pat) + expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" + expected = Series(np.array([False, False, True, True]), dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + # case insensitive using regex + values = Series( + np.array(["Foo", "xYz", "fOOomMm__fOo", "MMM_"], dtype=object), + dtype=any_string_dtype, + ) + result = values.str.contains("FOO|mmm", case=False) + expected = Series(np.array([True, False, True, True]), dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + # case insensitive without regex + result = values.str.contains("foo", regex=False, case=False) + expected = Series(np.array([True, False, True, False]), dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + # unicode + values = Series( + np.array(["foo", np.nan, "fooommm__foo", "mmm_"], dtype=np.object_), + dtype=any_string_dtype, + ) + pat = "mmm[_]+" + + result = values.str.contains(pat) + expected_dtype = "object" if any_string_dtype == "object" else "boolean" + expected = Series( + np.array([False, np.nan, True, True], dtype=np.object_), dtype=expected_dtype + ) + tm.assert_series_equal(result, expected) + + result = values.str.contains(pat, na=False) + expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" + expected = Series(np.array([False, False, True, True]), dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + values = Series( + np.array(["foo", "xyz", "fooommm__foo", "mmm_"], dtype=np.object_), + dtype=any_string_dtype, + ) + result = values.str.contains(pat) + expected = Series(np.array([False, False, True, True]), dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + +def test_contains_object_mixed(): + mixed = Series( + np.array( + ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0], + dtype=object, + ) + ) + result = mixed.str.contains("o") + expected = Series( + np.array( + [False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan], + dtype=np.object_, + ) + ) + tm.assert_series_equal(result, expected) + + +def test_contains_na_kwarg_for_object_category(): + # gh 22158 + + # na for category + values = Series(["a", "b", "c", "a", np.nan], dtype="category") + result = values.str.contains("a", na=True) + expected = Series([True, False, False, True, True]) + tm.assert_series_equal(result, expected) + + result = values.str.contains("a", na=False) + expected = Series([True, False, False, True, False]) + tm.assert_series_equal(result, expected) + + # na for objects + values = Series(["a", "b", "c", "a", np.nan]) + result = values.str.contains("a", na=True) + expected = Series([True, False, False, True, True]) + tm.assert_series_equal(result, expected) + + result = values.str.contains("a", na=False) + expected = Series([True, False, False, True, False]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "na, expected", + [ + (None, pd.NA), + (True, True), + (False, False), + (0, False), + (3, True), + (np.nan, pd.NA), + ], +) +@pytest.mark.parametrize("regex", [True, False]) +def test_contains_na_kwarg_for_nullable_string_dtype( + nullable_string_dtype, na, expected, regex +): + # https://github.com/pandas-dev/pandas/pull/41025#issuecomment-824062416 + + values = Series(["a", "b", "c", "a", np.nan], dtype=nullable_string_dtype) + result = values.str.contains("a", na=na, regex=regex) + expected = Series([True, False, False, True, expected], dtype="boolean") + tm.assert_series_equal(result, expected) + + +def test_contains_moar(any_string_dtype): + # PR #1179 + s = Series( + ["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"], + dtype=any_string_dtype, + ) + + result = s.str.contains("a") + expected_dtype = "object" if any_string_dtype == "object" else "boolean" + expected = Series( + [False, False, False, True, True, False, np.nan, False, False, True], + dtype=expected_dtype, + ) + tm.assert_series_equal(result, expected) + + result = s.str.contains("a", case=False) + expected = Series( + [True, False, False, True, True, False, np.nan, True, False, True], + dtype=expected_dtype, + ) + tm.assert_series_equal(result, expected) + + result = s.str.contains("Aa") + expected = Series( + [False, False, False, True, False, False, np.nan, False, False, False], + dtype=expected_dtype, + ) + tm.assert_series_equal(result, expected) + + result = s.str.contains("ba") + expected = Series( + [False, False, False, True, False, False, np.nan, False, False, False], + dtype=expected_dtype, + ) + tm.assert_series_equal(result, expected) + + result = s.str.contains("ba", case=False) + expected = Series( + [False, False, False, True, True, False, np.nan, True, False, False], + dtype=expected_dtype, + ) + tm.assert_series_equal(result, expected) + + +def test_contains_nan(any_string_dtype): + # PR #14171 + s = Series([np.nan, np.nan, np.nan], dtype=any_string_dtype) + + result = s.str.contains("foo", na=False) + expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" + expected = Series([False, False, False], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + result = s.str.contains("foo", na=True) + expected = Series([True, True, True], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + result = s.str.contains("foo", na="foo") + if any_string_dtype == "object": + expected = Series(["foo", "foo", "foo"], dtype=np.object_) + else: + expected = Series([True, True, True], dtype="boolean") + tm.assert_series_equal(result, expected) + + result = s.str.contains("foo") + expected_dtype = "object" if any_string_dtype == "object" else "boolean" + expected = Series([np.nan, np.nan, np.nan], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + +# -------------------------------------------------------------------------------------- +# str.startswith +# -------------------------------------------------------------------------------------- + + +@pytest.mark.parametrize("dtype", [None, "category"]) +@pytest.mark.parametrize("null_value", [None, np.nan, pd.NA]) +@pytest.mark.parametrize("na", [True, False]) +def test_startswith(dtype, null_value, na): + # add category dtype parametrizations for GH-36241 + values = Series( + ["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"], + dtype=dtype, + ) + + result = values.str.startswith("foo") + exp = Series([False, np.nan, True, False, False, np.nan, True]) + tm.assert_series_equal(result, exp) + + result = values.str.startswith("foo", na=na) + exp = Series([False, na, True, False, False, na, True]) + tm.assert_series_equal(result, exp) + + # mixed + mixed = np.array( + ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0], + dtype=np.object_, + ) + rs = Series(mixed).str.startswith("f") + xp = Series([False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan]) + tm.assert_series_equal(rs, xp) + + +@pytest.mark.parametrize("na", [None, True, False]) +def test_startswith_nullable_string_dtype(nullable_string_dtype, na): + values = Series( + ["om", None, "foo_nom", "nom", "bar_foo", None, "foo", "regex", "rege."], + dtype=nullable_string_dtype, + ) + result = values.str.startswith("foo", na=na) + exp = Series( + [False, na, True, False, False, na, True, False, False], dtype="boolean" + ) + tm.assert_series_equal(result, exp) + + result = values.str.startswith("rege.", na=na) + exp = Series( + [False, na, False, False, False, na, False, False, True], dtype="boolean" + ) + tm.assert_series_equal(result, exp) + + +# -------------------------------------------------------------------------------------- +# str.endswith +# -------------------------------------------------------------------------------------- + + +@pytest.mark.parametrize("dtype", [None, "category"]) +@pytest.mark.parametrize("null_value", [None, np.nan, pd.NA]) +@pytest.mark.parametrize("na", [True, False]) +def test_endswith(dtype, null_value, na): + # add category dtype parametrizations for GH-36241 + values = Series( + ["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"], + dtype=dtype, + ) + + result = values.str.endswith("foo") + exp = Series([False, np.nan, False, False, True, np.nan, True]) + tm.assert_series_equal(result, exp) + + result = values.str.endswith("foo", na=na) + exp = Series([False, na, False, False, True, na, True]) + tm.assert_series_equal(result, exp) + + # mixed + mixed = np.array( + ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0], + dtype=object, + ) + rs = Series(mixed).str.endswith("f") + xp = Series([False, np.nan, False, np.nan, np.nan, False, np.nan, np.nan, np.nan]) + tm.assert_series_equal(rs, xp) + + +@pytest.mark.parametrize("na", [None, True, False]) +def test_endswith_nullable_string_dtype(nullable_string_dtype, na): + values = Series( + ["om", None, "foo_nom", "nom", "bar_foo", None, "foo", "regex", "rege."], + dtype=nullable_string_dtype, + ) + result = values.str.endswith("foo", na=na) + exp = Series( + [False, na, False, False, True, na, True, False, False], dtype="boolean" + ) + tm.assert_series_equal(result, exp) + + result = values.str.endswith("rege.", na=na) + exp = Series( + [False, na, False, False, False, na, False, False, True], dtype="boolean" + ) + tm.assert_series_equal(result, exp) + + +# -------------------------------------------------------------------------------------- +# str.replace +# -------------------------------------------------------------------------------------- + + +def test_replace(any_string_dtype): + ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype) + + result = ser.str.replace("BAD[_]*", "", regex=True) + expected = Series(["foobar", np.nan], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +def test_replace_max_replacements(any_string_dtype): + ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype) + + expected = Series(["foobarBAD", np.nan], dtype=any_string_dtype) + result = ser.str.replace("BAD[_]*", "", n=1, regex=True) + tm.assert_series_equal(result, expected) + + expected = Series(["foo__barBAD", np.nan], dtype=any_string_dtype) + result = ser.str.replace("BAD", "", n=1, regex=False) + tm.assert_series_equal(result, expected) + + +def test_replace_mixed_object(): + ser = Series( + ["aBAD", np.nan, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0] + ) + result = Series(ser).str.replace("BAD[_]*", "", regex=True) + expected = Series(["a", np.nan, "b", np.nan, np.nan, "foo", np.nan, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + +def test_replace_unicode(any_string_dtype): + ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype) + expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype) + result = ser.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("repl", [None, 3, {"a": "b"}]) +@pytest.mark.parametrize("data", [["a", "b", None], ["a", "b", "c", "ad"]]) +def test_replace_wrong_repl_type_raises(any_string_dtype, index_or_series, repl, data): + # https://github.com/pandas-dev/pandas/issues/13438 + msg = "repl must be a string or callable" + obj = index_or_series(data, dtype=any_string_dtype) + with pytest.raises(TypeError, match=msg): + obj.str.replace("a", repl) + + +def test_replace_callable(any_string_dtype): + # GH 15055 + ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype) + + # test with callable + repl = lambda m: m.group(0).swapcase() + result = ser.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True) + expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "repl", [lambda: None, lambda m, x: None, lambda m, x, y=None: None] +) +def test_replace_callable_raises(any_string_dtype, repl): + # GH 15055 + values = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype) + + # test with wrong number of arguments, raising an error + msg = ( + r"((takes)|(missing)) (?(2)from \d+ to )?\d+ " + r"(?(3)required )positional arguments?" + ) + with pytest.raises(TypeError, match=msg): + values.str.replace("a", repl) + + +def test_replace_callable_named_groups(any_string_dtype): + # test regex named groups + ser = Series(["Foo Bar Baz", np.nan], dtype=any_string_dtype) + pat = r"(?P\w+) (?P\w+) (?P\w+)" + repl = lambda m: m.group("middle").swapcase() + result = ser.str.replace(pat, repl, regex=True) + expected = Series(["bAR", np.nan], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +def test_replace_compiled_regex(any_string_dtype): + # GH 15446 + ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype) + + # test with compiled regex + pat = re.compile(r"BAD_*") + result = ser.str.replace(pat, "", regex=True) + expected = Series(["foobar", np.nan], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + result = ser.str.replace(pat, "", n=1, regex=True) + expected = Series(["foobarBAD", np.nan], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +def test_replace_compiled_regex_mixed_object(): + pat = re.compile(r"BAD_*") + ser = Series( + ["aBAD", np.nan, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0] + ) + result = Series(ser).str.replace(pat, "", regex=True) + expected = Series(["a", np.nan, "b", np.nan, np.nan, "foo", np.nan, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + +def test_replace_compiled_regex_unicode(any_string_dtype): + ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype) + expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype) + pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE) + result = ser.str.replace(pat, ", ") + tm.assert_series_equal(result, expected) + + +def test_replace_compiled_regex_raises(any_string_dtype): + # case and flags provided to str.replace will have no effect + # and will produce warnings + ser = Series(["fooBAD__barBAD__bad", np.nan], dtype=any_string_dtype) + pat = re.compile(r"BAD_*") + + msg = "case and flags cannot be set when pat is a compiled regex" + + with pytest.raises(ValueError, match=msg): + ser.str.replace(pat, "", flags=re.IGNORECASE) + + with pytest.raises(ValueError, match=msg): + ser.str.replace(pat, "", case=False) + + with pytest.raises(ValueError, match=msg): + ser.str.replace(pat, "", case=True) + + +def test_replace_compiled_regex_callable(any_string_dtype): + # test with callable + ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype) + repl = lambda m: m.group(0).swapcase() + pat = re.compile("[a-z][A-Z]{2}") + result = ser.str.replace(pat, repl, n=2) + expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "regex,expected", [(True, ["bao", "bao", np.nan]), (False, ["bao", "foo", np.nan])] +) +def test_replace_literal(regex, expected, any_string_dtype): + # GH16808 literal replace (regex=False vs regex=True) + ser = Series(["f.o", "foo", np.nan], dtype=any_string_dtype) + expected = Series(expected, dtype=any_string_dtype) + result = ser.str.replace("f.", "ba", regex=regex) + tm.assert_series_equal(result, expected) + + +def test_replace_literal_callable_raises(any_string_dtype): + ser = Series([], dtype=any_string_dtype) + repl = lambda m: m.group(0).swapcase() + + msg = "Cannot use a callable replacement when regex=False" + with pytest.raises(ValueError, match=msg): + ser.str.replace("abc", repl, regex=False) + + +def test_replace_literal_compiled_raises(any_string_dtype): + ser = Series([], dtype=any_string_dtype) + pat = re.compile("[a-z][A-Z]{2}") + + msg = "Cannot use a compiled regex as replacement pattern with regex=False" + with pytest.raises(ValueError, match=msg): + ser.str.replace(pat, "", regex=False) + + +def test_replace_moar(any_string_dtype): + # PR #1179 + ser = Series( + ["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"], + dtype=any_string_dtype, + ) + + result = ser.str.replace("A", "YYY") + expected = Series( + ["YYY", "B", "C", "YYYaba", "Baca", "", np.nan, "CYYYBYYY", "dog", "cat"], + dtype=any_string_dtype, + ) + tm.assert_series_equal(result, expected) + + result = ser.str.replace("A", "YYY", case=False) + expected = Series( + [ + "YYY", + "B", + "C", + "YYYYYYbYYY", + "BYYYcYYY", + "", + np.nan, + "CYYYBYYY", + "dog", + "cYYYt", + ], + dtype=any_string_dtype, + ) + tm.assert_series_equal(result, expected) + + result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True) + expected = Series( + [ + "A", + "B", + "C", + "XX-XX ba", + "XX-XX ca", + "", + np.nan, + "XX-XX BA", + "XX-XX ", + "XX-XX t", + ], + dtype=any_string_dtype, + ) + tm.assert_series_equal(result, expected) + + +def test_replace_not_case_sensitive_not_regex(any_string_dtype): + # https://github.com/pandas-dev/pandas/issues/41602 + ser = Series(["A.", "a.", "Ab", "ab", np.nan], dtype=any_string_dtype) + + result = ser.str.replace("a", "c", case=False, regex=False) + expected = Series(["c.", "c.", "cb", "cb", np.nan], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + result = ser.str.replace("a.", "c.", case=False, regex=False) + expected = Series(["c.", "c.", "Ab", "ab", np.nan], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +def test_replace_regex_default_warning(any_string_dtype): + # https://github.com/pandas-dev/pandas/pull/24809 + s = Series(["a", "b", "ac", np.nan, ""], dtype=any_string_dtype) + msg = ( + "The default value of regex will change from True to False in a " + "future version\\.$" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.str.replace("^.$", "a") + expected = Series(["a", "a", "ac", np.nan, ""], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("regex", [True, False, None]) +def test_replace_regex_single_character(regex, any_string_dtype): + # https://github.com/pandas-dev/pandas/pull/24809 + + # The current behavior is to treat single character patterns as literal strings, + # even when ``regex`` is set to ``True``. + + s = Series(["a.b", ".", "b", np.nan, ""], dtype=any_string_dtype) + + if regex is None: + msg = re.escape( + "The default value of regex will change from True to False in a future " + "version. In addition, single character regular expressions will *not* " + "be treated as literal strings when regex=True." + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.str.replace(".", "a", regex=regex) + else: + result = s.str.replace(".", "a", regex=regex) + + expected = Series(["aab", "a", "b", np.nan, ""], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +# -------------------------------------------------------------------------------------- +# str.match +# -------------------------------------------------------------------------------------- + + +def test_match(any_string_dtype): + # New match behavior introduced in 0.13 + expected_dtype = "object" if any_string_dtype == "object" else "boolean" + + values = Series(["fooBAD__barBAD", np.nan, "foo"], dtype=any_string_dtype) + result = values.str.match(".*(BAD[_]+).*(BAD)") + expected = Series([True, np.nan, False], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + values = Series( + ["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype=any_string_dtype + ) + result = values.str.match(".*BAD[_]+.*BAD") + expected = Series([True, True, np.nan, False], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + result = values.str.match("BAD[_]+.*BAD") + expected = Series([False, True, np.nan, False], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + values = Series( + ["fooBAD__barBAD", "^BAD_BADleroybrown", np.nan, "foo"], dtype=any_string_dtype + ) + result = values.str.match("^BAD[_]+.*BAD") + expected = Series([False, False, np.nan, False], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + result = values.str.match("\\^BAD[_]+.*BAD") + expected = Series([False, True, np.nan, False], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + +def test_match_mixed_object(): + mixed = Series( + [ + "aBAD_BAD", + np.nan, + "BAD_b_BAD", + True, + datetime.today(), + "foo", + None, + 1, + 2.0, + ] + ) + result = Series(mixed).str.match(".*(BAD[_]+).*(BAD)") + expected = Series( + [True, np.nan, True, np.nan, np.nan, False, np.nan, np.nan, np.nan] + ) + assert isinstance(result, Series) + tm.assert_series_equal(result, expected) + + +def test_match_na_kwarg(any_string_dtype): + # GH #6609 + s = Series(["a", "b", np.nan], dtype=any_string_dtype) + + result = s.str.match("a", na=False) + expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" + expected = Series([True, False, False], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + result = s.str.match("a") + expected_dtype = "object" if any_string_dtype == "object" else "boolean" + expected = Series([True, False, np.nan], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + +def test_match_case_kwarg(any_string_dtype): + values = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype) + result = values.str.match("ab", case=False) + expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" + expected = Series([True, True, True, True], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + +# -------------------------------------------------------------------------------------- +# str.fullmatch +# -------------------------------------------------------------------------------------- + + +def test_fullmatch(any_string_dtype): + # GH 32806 + ser = Series( + ["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype=any_string_dtype + ) + result = ser.str.fullmatch(".*BAD[_]+.*BAD") + expected_dtype = "object" if any_string_dtype == "object" else "boolean" + expected = Series([True, False, np.nan, False], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + +def test_fullmatch_na_kwarg(any_string_dtype): + ser = Series( + ["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype=any_string_dtype + ) + result = ser.str.fullmatch(".*BAD[_]+.*BAD", na=False) + expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" + expected = Series([True, False, False, False], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + +def test_fullmatch_case_kwarg(any_string_dtype): + ser = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype) + expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" + + expected = Series([True, False, False, False], dtype=expected_dtype) + + result = ser.str.fullmatch("ab", case=True) + tm.assert_series_equal(result, expected) + + expected = Series([True, True, False, False], dtype=expected_dtype) + + result = ser.str.fullmatch("ab", case=False) + tm.assert_series_equal(result, expected) + + result = ser.str.fullmatch("ab", flags=re.IGNORECASE) + tm.assert_series_equal(result, expected) + + +# -------------------------------------------------------------------------------------- +# str.findall +# -------------------------------------------------------------------------------------- + + +def test_findall(any_string_dtype): + ser = Series(["fooBAD__barBAD", np.nan, "foo", "BAD"], dtype=any_string_dtype) + result = ser.str.findall("BAD[_]*") + expected = Series([["BAD__", "BAD"], np.nan, [], ["BAD"]]) + tm.assert_series_equal(result, expected) + + +def test_findall_mixed_object(): + ser = Series( + [ + "fooBAD__barBAD", + np.nan, + "foo", + True, + datetime.today(), + "BAD", + None, + 1, + 2.0, + ] + ) + + result = ser.str.findall("BAD[_]*") + expected = Series( + [ + ["BAD__", "BAD"], + np.nan, + [], + np.nan, + np.nan, + ["BAD"], + np.nan, + np.nan, + np.nan, + ] + ) + + tm.assert_series_equal(result, expected) + + +# -------------------------------------------------------------------------------------- +# str.find +# -------------------------------------------------------------------------------------- + + +def test_find(any_string_dtype): + ser = Series( + ["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF", "XXXX"], dtype=any_string_dtype + ) + expected_dtype = np.int64 if any_string_dtype == "object" else "Int64" + + result = ser.str.find("EF") + expected = Series([4, 3, 1, 0, -1], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + expected = np.array([v.find("EF") for v in np.array(ser)], dtype=np.int64) + tm.assert_numpy_array_equal(np.array(result, dtype=np.int64), expected) + + result = ser.str.rfind("EF") + expected = Series([4, 5, 7, 4, -1], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + expected = np.array([v.rfind("EF") for v in np.array(ser)], dtype=np.int64) + tm.assert_numpy_array_equal(np.array(result, dtype=np.int64), expected) + + result = ser.str.find("EF", 3) + expected = Series([4, 3, 7, 4, -1], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + expected = np.array([v.find("EF", 3) for v in np.array(ser)], dtype=np.int64) + tm.assert_numpy_array_equal(np.array(result, dtype=np.int64), expected) + + result = ser.str.rfind("EF", 3) + expected = Series([4, 5, 7, 4, -1], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + expected = np.array([v.rfind("EF", 3) for v in np.array(ser)], dtype=np.int64) + tm.assert_numpy_array_equal(np.array(result, dtype=np.int64), expected) + + result = ser.str.find("EF", 3, 6) + expected = Series([4, 3, -1, 4, -1], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + expected = np.array([v.find("EF", 3, 6) for v in np.array(ser)], dtype=np.int64) + tm.assert_numpy_array_equal(np.array(result, dtype=np.int64), expected) + + result = ser.str.rfind("EF", 3, 6) + expected = Series([4, 3, -1, 4, -1], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + expected = np.array([v.rfind("EF", 3, 6) for v in np.array(ser)], dtype=np.int64) + tm.assert_numpy_array_equal(np.array(result, dtype=np.int64), expected) + + +def test_find_bad_arg_raises(any_string_dtype): + ser = Series([], dtype=any_string_dtype) + with pytest.raises(TypeError, match="expected a string object, not int"): + ser.str.find(0) + + with pytest.raises(TypeError, match="expected a string object, not int"): + ser.str.rfind(0) + + +def test_find_nan(any_string_dtype): + ser = Series( + ["ABCDEFG", np.nan, "DEFGHIJEF", np.nan, "XXXX"], dtype=any_string_dtype + ) + expected_dtype = np.float64 if any_string_dtype == "object" else "Int64" + + result = ser.str.find("EF") + expected = Series([4, np.nan, 1, np.nan, -1], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + result = ser.str.rfind("EF") + expected = Series([4, np.nan, 7, np.nan, -1], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + result = ser.str.find("EF", 3) + expected = Series([4, np.nan, 7, np.nan, -1], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + result = ser.str.rfind("EF", 3) + expected = Series([4, np.nan, 7, np.nan, -1], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + result = ser.str.find("EF", 3, 6) + expected = Series([4, np.nan, -1, np.nan, -1], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + result = ser.str.rfind("EF", 3, 6) + expected = Series([4, np.nan, -1, np.nan, -1], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + +# -------------------------------------------------------------------------------------- +# str.translate +# -------------------------------------------------------------------------------------- + + +def test_translate(index_or_series, any_string_dtype): + obj = index_or_series( + ["abcdefg", "abcc", "cdddfg", "cdefggg"], dtype=any_string_dtype + ) + table = str.maketrans("abc", "cde") + result = obj.str.translate(table) + expected = index_or_series( + ["cdedefg", "cdee", "edddfg", "edefggg"], dtype=any_string_dtype + ) + tm.assert_equal(result, expected) + + +def test_translate_mixed_object(): + # Series with non-string values + s = Series(["a", "b", "c", 1.2]) + table = str.maketrans("abc", "cde") + expected = Series(["c", "d", "e", np.nan]) + result = s.str.translate(table) + tm.assert_series_equal(result, expected) + + +# -------------------------------------------------------------------------------------- + + +def test_flags_kwarg(any_string_dtype): + data = { + "Dave": "dave@google.com", + "Steve": "steve@gmail.com", + "Rob": "rob@gmail.com", + "Wes": np.nan, + } + data = Series(data, dtype=any_string_dtype) + + pat = r"([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})" + + result = data.str.extract(pat, flags=re.IGNORECASE, expand=True) + assert result.iloc[0].tolist() == ["dave", "google", "com"] + + result = data.str.match(pat, flags=re.IGNORECASE) + assert result[0] + + result = data.str.fullmatch(pat, flags=re.IGNORECASE) + assert result[0] + + result = data.str.findall(pat, flags=re.IGNORECASE) + assert result[0][0] == ("dave", "google", "com") + + result = data.str.count(pat, flags=re.IGNORECASE) + assert result[0] == 1 + + msg = "has match groups" + with tm.assert_produces_warning(UserWarning, match=msg): + result = data.str.contains(pat, flags=re.IGNORECASE) + assert result[0] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/test_get_dummies.py b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_get_dummies.py new file mode 100644 index 0000000..31386e4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_get_dummies.py @@ -0,0 +1,53 @@ +import numpy as np + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + _testing as tm, +) + + +def test_get_dummies(any_string_dtype): + s = Series(["a|b", "a|c", np.nan], dtype=any_string_dtype) + result = s.str.get_dummies("|") + expected = DataFrame([[1, 1, 0], [1, 0, 1], [0, 0, 0]], columns=list("abc")) + tm.assert_frame_equal(result, expected) + + s = Series(["a;b", "a", 7], dtype=any_string_dtype) + result = s.str.get_dummies(";") + expected = DataFrame([[0, 1, 1], [0, 1, 0], [1, 0, 0]], columns=list("7ab")) + tm.assert_frame_equal(result, expected) + + +def test_get_dummies_index(): + # GH9980, GH8028 + idx = Index(["a|b", "a|c", "b|c"]) + result = idx.str.get_dummies("|") + + expected = MultiIndex.from_tuples( + [(1, 1, 0), (1, 0, 1), (0, 1, 1)], names=("a", "b", "c") + ) + tm.assert_index_equal(result, expected) + + +def test_get_dummies_with_name_dummy(any_string_dtype): + # GH 12180 + # Dummies named 'name' should work as expected + s = Series(["a", "b,name", "b"], dtype=any_string_dtype) + result = s.str.get_dummies(",") + expected = DataFrame([[1, 0, 0], [0, 1, 1], [0, 1, 0]], columns=["a", "b", "name"]) + tm.assert_frame_equal(result, expected) + + +def test_get_dummies_with_name_dummy_index(): + # GH 12180 + # Dummies named 'name' should work as expected + idx = Index(["a|b", "name|c", "b|name"]) + result = idx.str.get_dummies("|") + + expected = MultiIndex.from_tuples( + [(1, 1, 0, 0), (0, 0, 1, 1), (0, 1, 0, 1)], names=("a", "b", "c", "name") + ) + tm.assert_index_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/test_split_partition.py b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_split_partition.py new file mode 100644 index 0000000..74458c1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_split_partition.py @@ -0,0 +1,719 @@ +from datetime import datetime +import re + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + _testing as tm, +) + + +@pytest.mark.parametrize("method", ["split", "rsplit"]) +def test_split(any_string_dtype, method): + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype) + + result = getattr(values.str, method)("_") + exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]]) + tm.assert_series_equal(result, exp) + + +@pytest.mark.parametrize("method", ["split", "rsplit"]) +def test_split_more_than_one_char(any_string_dtype, method): + # more than one char + values = Series(["a__b__c", "c__d__e", np.nan, "f__g__h"], dtype=any_string_dtype) + result = getattr(values.str, method)("__") + exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]]) + tm.assert_series_equal(result, exp) + + result = getattr(values.str, method)("__", expand=False) + tm.assert_series_equal(result, exp) + + +def test_split_more_regex_split(any_string_dtype): + # regex split + values = Series(["a,b_c", "c_d,e", np.nan, "f,g,h"], dtype=any_string_dtype) + result = values.str.split("[,_]") + exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]]) + tm.assert_series_equal(result, exp) + + +def test_split_regex(any_string_dtype): + # GH 43563 + # explicit regex = True split + values = Series("xxxjpgzzz.jpg", dtype=any_string_dtype) + result = values.str.split(r"\.jpg", regex=True) + exp = Series([["xxxjpgzzz", ""]]) + tm.assert_series_equal(result, exp) + + +def test_split_regex_explicit(any_string_dtype): + # explicit regex = True split with compiled regex + regex_pat = re.compile(r".jpg") + values = Series("xxxjpgzzz.jpg", dtype=any_string_dtype) + result = values.str.split(regex_pat) + exp = Series([["xx", "zzz", ""]]) + tm.assert_series_equal(result, exp) + + # explicit regex = False split + result = values.str.split(r"\.jpg", regex=False) + exp = Series([["xxxjpgzzz.jpg"]]) + tm.assert_series_equal(result, exp) + + # non explicit regex split, pattern length == 1 + result = values.str.split(r".") + exp = Series([["xxxjpgzzz", "jpg"]]) + tm.assert_series_equal(result, exp) + + # non explicit regex split, pattern length != 1 + result = values.str.split(r".jpg") + exp = Series([["xx", "zzz", ""]]) + tm.assert_series_equal(result, exp) + + # regex=False with pattern compiled regex raises error + with pytest.raises( + ValueError, + match="Cannot use a compiled regex as replacement pattern with regex=False", + ): + values.str.split(regex_pat, regex=False) + + +@pytest.mark.parametrize("expand", [None, False]) +@pytest.mark.parametrize("method", ["split", "rsplit"]) +def test_split_object_mixed(expand, method): + mixed = Series(["a_b_c", np.nan, "d_e_f", True, datetime.today(), None, 1, 2.0]) + result = getattr(mixed.str, method)("_", expand=expand) + exp = Series( + [ + ["a", "b", "c"], + np.nan, + ["d", "e", "f"], + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + ] + ) + assert isinstance(result, Series) + tm.assert_almost_equal(result, exp) + + +@pytest.mark.parametrize("method", ["split", "rsplit"]) +@pytest.mark.parametrize("n", [None, 0]) +def test_split_n(any_string_dtype, method, n): + s = Series(["a b", pd.NA, "b c"], dtype=any_string_dtype) + expected = Series([["a", "b"], pd.NA, ["b", "c"]]) + + result = getattr(s.str, method)(" ", n=n) + tm.assert_series_equal(result, expected) + + +def test_rsplit(any_string_dtype): + # regex split is not supported by rsplit + values = Series(["a,b_c", "c_d,e", np.nan, "f,g,h"], dtype=any_string_dtype) + result = values.str.rsplit("[,_]") + exp = Series([["a,b_c"], ["c_d,e"], np.nan, ["f,g,h"]]) + tm.assert_series_equal(result, exp) + + +def test_rsplit_max_number(any_string_dtype): + # setting max number of splits, make sure it's from reverse + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype) + result = values.str.rsplit("_", n=1) + exp = Series([["a_b", "c"], ["c_d", "e"], np.nan, ["f_g", "h"]]) + tm.assert_series_equal(result, exp) + + +def test_split_blank_string(any_string_dtype): + # expand blank split GH 20067 + values = Series([""], name="test", dtype=any_string_dtype) + result = values.str.split(expand=True) + exp = DataFrame([[]], dtype=any_string_dtype) # NOTE: this is NOT an empty df + tm.assert_frame_equal(result, exp) + + +def test_split_blank_string_with_non_empty(any_string_dtype): + values = Series(["a b c", "a b", "", " "], name="test", dtype=any_string_dtype) + result = values.str.split(expand=True) + exp = DataFrame( + [ + ["a", "b", "c"], + ["a", "b", np.nan], + [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan], + ], + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, exp) + + +@pytest.mark.parametrize("method", ["split", "rsplit"]) +def test_split_noargs(any_string_dtype, method): + # #1859 + s = Series(["Wes McKinney", "Travis Oliphant"], dtype=any_string_dtype) + result = getattr(s.str, method)() + expected = ["Travis", "Oliphant"] + assert result[1] == expected + + +@pytest.mark.parametrize( + "data, pat", + [ + (["bd asdf jfg", "kjasdflqw asdfnfk"], None), + (["bd asdf jfg", "kjasdflqw asdfnfk"], "asdf"), + (["bd_asdf_jfg", "kjasdflqw_asdfnfk"], "_"), + ], +) +@pytest.mark.parametrize("n", [-1, 0]) +def test_split_maxsplit(data, pat, any_string_dtype, n): + # re.split 0, str.split -1 + s = Series(data, dtype=any_string_dtype) + + result = s.str.split(pat=pat, n=n) + xp = s.str.split(pat=pat) + tm.assert_series_equal(result, xp) + + +@pytest.mark.parametrize( + "data, pat, expected", + [ + ( + ["split once", "split once too!"], + None, + Series({0: ["split", "once"], 1: ["split", "once too!"]}), + ), + ( + ["split_once", "split_once_too!"], + "_", + Series({0: ["split", "once"], 1: ["split", "once_too!"]}), + ), + ], +) +def test_split_no_pat_with_nonzero_n(data, pat, expected, any_string_dtype): + s = Series(data, dtype=any_string_dtype) + result = s.str.split(pat=pat, n=1) + tm.assert_series_equal(expected, result, check_index_type=False) + + +def test_split_to_dataframe_no_splits(any_string_dtype): + s = Series(["nosplit", "alsonosplit"], dtype=any_string_dtype) + result = s.str.split("_", expand=True) + exp = DataFrame({0: Series(["nosplit", "alsonosplit"], dtype=any_string_dtype)}) + tm.assert_frame_equal(result, exp) + + +def test_split_to_dataframe(any_string_dtype): + s = Series(["some_equal_splits", "with_no_nans"], dtype=any_string_dtype) + result = s.str.split("_", expand=True) + exp = DataFrame( + {0: ["some", "with"], 1: ["equal", "no"], 2: ["splits", "nans"]}, + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, exp) + + +def test_split_to_dataframe_unequal_splits(any_string_dtype): + s = Series( + ["some_unequal_splits", "one_of_these_things_is_not"], dtype=any_string_dtype + ) + result = s.str.split("_", expand=True) + exp = DataFrame( + { + 0: ["some", "one"], + 1: ["unequal", "of"], + 2: ["splits", "these"], + 3: [np.nan, "things"], + 4: [np.nan, "is"], + 5: [np.nan, "not"], + }, + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, exp) + + +def test_split_to_dataframe_with_index(any_string_dtype): + s = Series( + ["some_splits", "with_index"], index=["preserve", "me"], dtype=any_string_dtype + ) + result = s.str.split("_", expand=True) + exp = DataFrame( + {0: ["some", "with"], 1: ["splits", "index"]}, + index=["preserve", "me"], + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, exp) + + with pytest.raises(ValueError, match="expand must be"): + s.str.split("_", expand="not_a_boolean") + + +def test_split_to_multiindex_expand_no_splits(): + # https://github.com/pandas-dev/pandas/issues/23677 + + idx = Index(["nosplit", "alsonosplit", np.nan]) + result = idx.str.split("_", expand=True) + exp = idx + tm.assert_index_equal(result, exp) + assert result.nlevels == 1 + + +def test_split_to_multiindex_expand(): + idx = Index(["some_equal_splits", "with_no_nans", np.nan, None]) + result = idx.str.split("_", expand=True) + exp = MultiIndex.from_tuples( + [ + ("some", "equal", "splits"), + ("with", "no", "nans"), + [np.nan, np.nan, np.nan], + [None, None, None], + ] + ) + tm.assert_index_equal(result, exp) + assert result.nlevels == 3 + + +def test_split_to_multiindex_expand_unequal_splits(): + idx = Index(["some_unequal_splits", "one_of_these_things_is_not", np.nan, None]) + result = idx.str.split("_", expand=True) + exp = MultiIndex.from_tuples( + [ + ("some", "unequal", "splits", np.nan, np.nan, np.nan), + ("one", "of", "these", "things", "is", "not"), + (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan), + (None, None, None, None, None, None), + ] + ) + tm.assert_index_equal(result, exp) + assert result.nlevels == 6 + + with pytest.raises(ValueError, match="expand must be"): + idx.str.split("_", expand="not_a_boolean") + + +def test_rsplit_to_dataframe_expand_no_splits(any_string_dtype): + s = Series(["nosplit", "alsonosplit"], dtype=any_string_dtype) + result = s.str.rsplit("_", expand=True) + exp = DataFrame({0: Series(["nosplit", "alsonosplit"])}, dtype=any_string_dtype) + tm.assert_frame_equal(result, exp) + + +def test_rsplit_to_dataframe_expand(any_string_dtype): + s = Series(["some_equal_splits", "with_no_nans"], dtype=any_string_dtype) + result = s.str.rsplit("_", expand=True) + exp = DataFrame( + {0: ["some", "with"], 1: ["equal", "no"], 2: ["splits", "nans"]}, + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, exp) + + result = s.str.rsplit("_", expand=True, n=2) + exp = DataFrame( + {0: ["some", "with"], 1: ["equal", "no"], 2: ["splits", "nans"]}, + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, exp) + + result = s.str.rsplit("_", expand=True, n=1) + exp = DataFrame( + {0: ["some_equal", "with_no"], 1: ["splits", "nans"]}, dtype=any_string_dtype + ) + tm.assert_frame_equal(result, exp) + + +def test_rsplit_to_dataframe_expand_with_index(any_string_dtype): + s = Series( + ["some_splits", "with_index"], index=["preserve", "me"], dtype=any_string_dtype + ) + result = s.str.rsplit("_", expand=True) + exp = DataFrame( + {0: ["some", "with"], 1: ["splits", "index"]}, + index=["preserve", "me"], + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, exp) + + +def test_rsplit_to_multiindex_expand_no_split(): + idx = Index(["nosplit", "alsonosplit"]) + result = idx.str.rsplit("_", expand=True) + exp = idx + tm.assert_index_equal(result, exp) + assert result.nlevels == 1 + + +def test_rsplit_to_multiindex_expand(): + idx = Index(["some_equal_splits", "with_no_nans"]) + result = idx.str.rsplit("_", expand=True) + exp = MultiIndex.from_tuples([("some", "equal", "splits"), ("with", "no", "nans")]) + tm.assert_index_equal(result, exp) + assert result.nlevels == 3 + + +def test_rsplit_to_multiindex_expand_n(): + idx = Index(["some_equal_splits", "with_no_nans"]) + result = idx.str.rsplit("_", expand=True, n=1) + exp = MultiIndex.from_tuples([("some_equal", "splits"), ("with_no", "nans")]) + tm.assert_index_equal(result, exp) + assert result.nlevels == 2 + + +def test_split_nan_expand(any_string_dtype): + # gh-18450 + s = Series(["foo,bar,baz", np.nan], dtype=any_string_dtype) + result = s.str.split(",", expand=True) + exp = DataFrame( + [["foo", "bar", "baz"], [np.nan, np.nan, np.nan]], dtype=any_string_dtype + ) + tm.assert_frame_equal(result, exp) + + # check that these are actually np.nan/pd.NA and not None + # TODO see GH 18463 + # tm.assert_frame_equal does not differentiate + if any_string_dtype == "object": + assert all(np.isnan(x) for x in result.iloc[1]) + else: + assert all(x is pd.NA for x in result.iloc[1]) + + +def test_split_with_name_series(any_string_dtype): + # GH 12617 + + # should preserve name + s = Series(["a,b", "c,d"], name="xxx", dtype=any_string_dtype) + res = s.str.split(",") + exp = Series([["a", "b"], ["c", "d"]], name="xxx") + tm.assert_series_equal(res, exp) + + res = s.str.split(",", expand=True) + exp = DataFrame([["a", "b"], ["c", "d"]], dtype=any_string_dtype) + tm.assert_frame_equal(res, exp) + + +def test_split_with_name_index(): + # GH 12617 + idx = Index(["a,b", "c,d"], name="xxx") + res = idx.str.split(",") + exp = Index([["a", "b"], ["c", "d"]], name="xxx") + assert res.nlevels == 1 + tm.assert_index_equal(res, exp) + + res = idx.str.split(",", expand=True) + exp = MultiIndex.from_tuples([("a", "b"), ("c", "d")]) + assert res.nlevels == 2 + tm.assert_index_equal(res, exp) + + +@pytest.mark.parametrize( + "method, exp", + [ + [ + "partition", + [ + ("a", "__", "b__c"), + ("c", "__", "d__e"), + np.nan, + ("f", "__", "g__h"), + None, + ], + ], + [ + "rpartition", + [ + ("a__b", "__", "c"), + ("c__d", "__", "e"), + np.nan, + ("f__g", "__", "h"), + None, + ], + ], + ], +) +def test_partition_series_more_than_one_char(method, exp, any_string_dtype): + # https://github.com/pandas-dev/pandas/issues/23558 + # more than one char + s = Series(["a__b__c", "c__d__e", np.nan, "f__g__h", None], dtype=any_string_dtype) + result = getattr(s.str, method)("__", expand=False) + expected = Series(exp) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "method, exp", + [ + [ + "partition", + [("a", " ", "b c"), ("c", " ", "d e"), np.nan, ("f", " ", "g h"), None], + ], + [ + "rpartition", + [("a b", " ", "c"), ("c d", " ", "e"), np.nan, ("f g", " ", "h"), None], + ], + ], +) +def test_partition_series_none(any_string_dtype, method, exp): + # https://github.com/pandas-dev/pandas/issues/23558 + # None + s = Series(["a b c", "c d e", np.nan, "f g h", None], dtype=any_string_dtype) + result = getattr(s.str, method)(expand=False) + expected = Series(exp) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "method, exp", + [ + [ + "partition", + [("abc", "", ""), ("cde", "", ""), np.nan, ("fgh", "", ""), None], + ], + [ + "rpartition", + [("", "", "abc"), ("", "", "cde"), np.nan, ("", "", "fgh"), None], + ], + ], +) +def test_partition_series_not_split(any_string_dtype, method, exp): + # https://github.com/pandas-dev/pandas/issues/23558 + # Not split + s = Series(["abc", "cde", np.nan, "fgh", None], dtype=any_string_dtype) + result = getattr(s.str, method)("_", expand=False) + expected = Series(exp) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "method, exp", + [ + [ + "partition", + [("a", "_", "b_c"), ("c", "_", "d_e"), np.nan, ("f", "_", "g_h")], + ], + [ + "rpartition", + [("a_b", "_", "c"), ("c_d", "_", "e"), np.nan, ("f_g", "_", "h")], + ], + ], +) +def test_partition_series_unicode(any_string_dtype, method, exp): + # https://github.com/pandas-dev/pandas/issues/23558 + # unicode + s = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype) + + result = getattr(s.str, method)("_", expand=False) + expected = Series(exp) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("method", ["partition", "rpartition"]) +def test_partition_series_stdlib(any_string_dtype, method): + # https://github.com/pandas-dev/pandas/issues/23558 + # compare to standard lib + s = Series(["A_B_C", "B_C_D", "E_F_G", "EFGHEF"], dtype=any_string_dtype) + result = getattr(s.str, method)("_", expand=False).tolist() + assert result == [getattr(v, method)("_") for v in s] + + +@pytest.mark.parametrize( + "method, expand, exp, exp_levels", + [ + [ + "partition", + False, + np.array( + [("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None], + dtype=object, + ), + 1, + ], + [ + "rpartition", + False, + np.array( + [("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None], + dtype=object, + ), + 1, + ], + ], +) +def test_partition_index(method, expand, exp, exp_levels): + # https://github.com/pandas-dev/pandas/issues/23558 + + values = Index(["a_b_c", "c_d_e", "f_g_h", np.nan, None]) + + result = getattr(values.str, method)("_", expand=expand) + exp = Index(exp) + tm.assert_index_equal(result, exp) + assert result.nlevels == exp_levels + + +@pytest.mark.parametrize( + "method, exp", + [ + [ + "partition", + { + 0: ["a", "c", np.nan, "f", None], + 1: ["_", "_", np.nan, "_", None], + 2: ["b_c", "d_e", np.nan, "g_h", None], + }, + ], + [ + "rpartition", + { + 0: ["a_b", "c_d", np.nan, "f_g", None], + 1: ["_", "_", np.nan, "_", None], + 2: ["c", "e", np.nan, "h", None], + }, + ], + ], +) +def test_partition_to_dataframe(any_string_dtype, method, exp): + # https://github.com/pandas-dev/pandas/issues/23558 + + s = Series(["a_b_c", "c_d_e", np.nan, "f_g_h", None], dtype=any_string_dtype) + result = getattr(s.str, method)("_") + expected = DataFrame( + exp, + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "method, exp", + [ + [ + "partition", + { + 0: ["a", "c", np.nan, "f", None], + 1: ["_", "_", np.nan, "_", None], + 2: ["b_c", "d_e", np.nan, "g_h", None], + }, + ], + [ + "rpartition", + { + 0: ["a_b", "c_d", np.nan, "f_g", None], + 1: ["_", "_", np.nan, "_", None], + 2: ["c", "e", np.nan, "h", None], + }, + ], + ], +) +def test_partition_to_dataframe_from_series(any_string_dtype, method, exp): + # https://github.com/pandas-dev/pandas/issues/23558 + s = Series(["a_b_c", "c_d_e", np.nan, "f_g_h", None], dtype=any_string_dtype) + result = getattr(s.str, method)("_", expand=True) + expected = DataFrame( + exp, + dtype=any_string_dtype, + ) + tm.assert_frame_equal(result, expected) + + +def test_partition_with_name(any_string_dtype): + # GH 12617 + + s = Series(["a,b", "c,d"], name="xxx", dtype=any_string_dtype) + result = s.str.partition(",") + expected = DataFrame( + {0: ["a", "c"], 1: [",", ","], 2: ["b", "d"]}, dtype=any_string_dtype + ) + tm.assert_frame_equal(result, expected) + + +def test_partition_with_name_expand(any_string_dtype): + # GH 12617 + # should preserve name + s = Series(["a,b", "c,d"], name="xxx", dtype=any_string_dtype) + result = s.str.partition(",", expand=False) + expected = Series([("a", ",", "b"), ("c", ",", "d")], name="xxx") + tm.assert_series_equal(result, expected) + + +def test_partition_index_with_name(): + idx = Index(["a,b", "c,d"], name="xxx") + result = idx.str.partition(",") + expected = MultiIndex.from_tuples([("a", ",", "b"), ("c", ",", "d")]) + assert result.nlevels == 3 + tm.assert_index_equal(result, expected) + + +def test_partition_index_with_name_expand_false(): + idx = Index(["a,b", "c,d"], name="xxx") + # should preserve name + result = idx.str.partition(",", expand=False) + expected = Index(np.array([("a", ",", "b"), ("c", ",", "d")]), name="xxx") + assert result.nlevels == 1 + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("method", ["partition", "rpartition"]) +def test_partition_sep_kwarg(any_string_dtype, method): + # GH 22676; depr kwarg "pat" in favor of "sep" + s = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype) + + expected = getattr(s.str, method)(sep="_") + result = getattr(s.str, method)("_") + tm.assert_frame_equal(result, expected) + + +def test_get(): + ser = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) + result = ser.str.split("_").str.get(1) + expected = Series(["b", "d", np.nan, "g"]) + tm.assert_series_equal(result, expected) + + +def test_get_mixed_object(): + ser = Series(["a_b_c", np.nan, "c_d_e", True, datetime.today(), None, 1, 2.0]) + result = ser.str.split("_").str.get(1) + expected = Series(["b", np.nan, "d", np.nan, np.nan, np.nan, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("idx", [2, -3]) +def test_get_bounds(idx): + ser = Series(["1_2_3_4_5", "6_7_8_9_10", "11_12"]) + result = ser.str.split("_").str.get(idx) + expected = Series(["3", "8", np.nan]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "idx, exp", [[2, [3, 3, np.nan, "b"]], [-1, [3, 3, np.nan, np.nan]]] +) +def test_get_complex(idx, exp): + # GH 20671, getting value not in dict raising `KeyError` + ser = Series([(1, 2, 3), [1, 2, 3], {1, 2, 3}, {1: "a", 2: "b", 3: "c"}]) + + result = ser.str.get(idx) + expected = Series(exp) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("to_type", [tuple, list, np.array]) +def test_get_complex_nested(to_type): + ser = Series([to_type([to_type([1, 2])])]) + + result = ser.str.get(0) + expected = Series([to_type([1, 2])]) + tm.assert_series_equal(result, expected) + + result = ser.str.get(1) + expected = Series([np.nan]) + tm.assert_series_equal(result, expected) + + +def test_get_strings(any_string_dtype): + ser = Series(["a", "ab", np.nan, "abc"], dtype=any_string_dtype) + result = ser.str.get(2) + expected = Series([np.nan, np.nan, np.nan, "c"], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/test_string_array.py b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_string_array.py new file mode 100644 index 0000000..90c26a7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_string_array.py @@ -0,0 +1,101 @@ +import numpy as np +import pytest + +from pandas._libs import lib + +from pandas import ( + DataFrame, + Series, + _testing as tm, +) + + +def test_string_array(nullable_string_dtype, any_string_method): + method_name, args, kwargs = any_string_method + + data = ["a", "bb", np.nan, "ccc"] + a = Series(data, dtype=object) + b = Series(data, dtype=nullable_string_dtype) + + if method_name == "decode": + with pytest.raises(TypeError, match="a bytes-like object is required"): + getattr(b.str, method_name)(*args, **kwargs) + return + + expected = getattr(a.str, method_name)(*args, **kwargs) + result = getattr(b.str, method_name)(*args, **kwargs) + + if isinstance(expected, Series): + if expected.dtype == "object" and lib.is_string_array( + expected.dropna().values, + ): + assert result.dtype == nullable_string_dtype + result = result.astype(object) + + elif expected.dtype == "object" and lib.is_bool_array( + expected.values, skipna=True + ): + assert result.dtype == "boolean" + result = result.astype(object) + + elif expected.dtype == "bool": + assert result.dtype == "boolean" + result = result.astype("bool") + + elif expected.dtype == "float" and expected.isna().any(): + assert result.dtype == "Int64" + result = result.astype("float") + + elif isinstance(expected, DataFrame): + columns = expected.select_dtypes(include="object").columns + assert all(result[columns].dtypes == nullable_string_dtype) + result[columns] = result[columns].astype(object) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "method,expected", + [ + ("count", [2, None]), + ("find", [0, None]), + ("index", [0, None]), + ("rindex", [2, None]), + ], +) +def test_string_array_numeric_integer_array(nullable_string_dtype, method, expected): + s = Series(["aba", None], dtype=nullable_string_dtype) + result = getattr(s.str, method)("a") + expected = Series(expected, dtype="Int64") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "method,expected", + [ + ("isdigit", [False, None, True]), + ("isalpha", [True, None, False]), + ("isalnum", [True, None, True]), + ("isnumeric", [False, None, True]), + ], +) +def test_string_array_boolean_array(nullable_string_dtype, method, expected): + s = Series(["a", None, "1"], dtype=nullable_string_dtype) + result = getattr(s.str, method)() + expected = Series(expected, dtype="boolean") + tm.assert_series_equal(result, expected) + + +def test_string_array_extract(nullable_string_dtype): + # https://github.com/pandas-dev/pandas/issues/30969 + # Only expand=False & multiple groups was failing + + a = Series(["a1", "b2", "cc"], dtype=nullable_string_dtype) + b = Series(["a1", "b2", "cc"], dtype="object") + pat = r"(\w)(\d)" + + result = a.str.extract(pat, expand=False) + expected = b.str.extract(pat, expand=False) + assert all(result.dtypes == nullable_string_dtype) + + result = result.astype(object) + tm.assert_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/strings/test_strings.py b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_strings.py new file mode 100644 index 0000000..b72dd11 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/strings/test_strings.py @@ -0,0 +1,728 @@ +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + isna, +) +import pandas._testing as tm + + +def assert_series_or_index_equal(left, right): + if isinstance(left, Series): + tm.assert_series_equal(left, right) + else: # Index + tm.assert_index_equal(left, right) + + +def test_iter(): + # GH3638 + strs = "google", "wikimedia", "wikipedia", "wikitravel" + ser = Series(strs) + + with tm.assert_produces_warning(FutureWarning): + for s in ser.str: + # iter must yield a Series + assert isinstance(s, Series) + + # indices of each yielded Series should be equal to the index of + # the original Series + tm.assert_index_equal(s.index, ser.index) + + for el in s: + # each element of the series is either a basestring/str or nan + assert isinstance(el, str) or isna(el) + + # desired behavior is to iterate until everything would be nan on the + # next iter so make sure the last element of the iterator was 'l' in + # this case since 'wikitravel' is the longest string + assert s.dropna().values.item() == "l" + + +def test_iter_empty(any_string_dtype): + ser = Series([], dtype=any_string_dtype) + + i, s = 100, 1 + + with tm.assert_produces_warning(FutureWarning): + for i, s in enumerate(ser.str): + pass + + # nothing to iterate over so nothing defined values should remain + # unchanged + assert i == 100 + assert s == 1 + + +def test_iter_single_element(any_string_dtype): + ser = Series(["a"], dtype=any_string_dtype) + + with tm.assert_produces_warning(FutureWarning): + for i, s in enumerate(ser.str): + pass + + assert not i + tm.assert_series_equal(ser, s) + + +def test_iter_object_try_string(): + ser = Series( + [ + slice(None, np.random.randint(10), np.random.randint(10, 20)) + for _ in range(4) + ] + ) + + i, s = 100, "h" + + with tm.assert_produces_warning(FutureWarning): + for i, s in enumerate(ser.str): + pass + + assert i == 100 + assert s == "h" + + +# test integer/float dtypes (inferred by constructor) and mixed + + +def test_count(any_string_dtype): + ser = Series(["foo", "foofoo", np.nan, "foooofooofommmfoo"], dtype=any_string_dtype) + result = ser.str.count("f[o]+") + expected_dtype = np.float64 if any_string_dtype == "object" else "Int64" + expected = Series([1, 2, np.nan, 4], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + +def test_count_mixed_object(): + ser = Series( + ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0], + dtype=object, + ) + result = ser.str.count("a") + expected = Series([1, np.nan, 0, np.nan, np.nan, 0, np.nan, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + +def test_repeat(any_string_dtype): + ser = Series(["a", "b", np.nan, "c", np.nan, "d"], dtype=any_string_dtype) + + result = ser.str.repeat(3) + expected = Series( + ["aaa", "bbb", np.nan, "ccc", np.nan, "ddd"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + + result = ser.str.repeat([1, 2, 3, 4, 5, 6]) + expected = Series( + ["a", "bb", np.nan, "cccc", np.nan, "dddddd"], dtype=any_string_dtype + ) + tm.assert_series_equal(result, expected) + + +def test_repeat_mixed_object(): + ser = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0]) + result = ser.str.repeat(3) + expected = Series( + ["aaa", np.nan, "bbb", np.nan, np.nan, "foofoofoo", np.nan, np.nan, np.nan] + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("arg, repeat", [[None, 4], ["b", None]]) +def test_repeat_with_null(any_string_dtype, arg, repeat): + # GH: 31632 + ser = Series(["a", arg], dtype=any_string_dtype) + result = ser.str.repeat([3, repeat]) + expected = Series(["aaa", np.nan], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +def test_empty_str_methods(any_string_dtype): + empty_str = empty = Series(dtype=any_string_dtype) + if any_string_dtype == "object": + empty_int = Series(dtype="int64") + empty_bool = Series(dtype=bool) + else: + empty_int = Series(dtype="Int64") + empty_bool = Series(dtype="boolean") + empty_object = Series(dtype=object) + empty_bytes = Series(dtype=object) + empty_df = DataFrame() + + # GH7241 + # (extract) on empty series + + tm.assert_series_equal(empty_str, empty.str.cat(empty)) + assert "" == empty.str.cat() + tm.assert_series_equal(empty_str, empty.str.title()) + tm.assert_series_equal(empty_int, empty.str.count("a")) + tm.assert_series_equal(empty_bool, empty.str.contains("a")) + tm.assert_series_equal(empty_bool, empty.str.startswith("a")) + tm.assert_series_equal(empty_bool, empty.str.endswith("a")) + tm.assert_series_equal(empty_str, empty.str.lower()) + tm.assert_series_equal(empty_str, empty.str.upper()) + tm.assert_series_equal(empty_str, empty.str.replace("a", "b")) + tm.assert_series_equal(empty_str, empty.str.repeat(3)) + tm.assert_series_equal(empty_bool, empty.str.match("^a")) + tm.assert_frame_equal( + DataFrame(columns=[0], dtype=any_string_dtype), + empty.str.extract("()", expand=True), + ) + tm.assert_frame_equal( + DataFrame(columns=[0, 1], dtype=any_string_dtype), + empty.str.extract("()()", expand=True), + ) + tm.assert_series_equal(empty_str, empty.str.extract("()", expand=False)) + tm.assert_frame_equal( + DataFrame(columns=[0, 1], dtype=any_string_dtype), + empty.str.extract("()()", expand=False), + ) + tm.assert_frame_equal(empty_df, empty.str.get_dummies()) + tm.assert_series_equal(empty_str, empty_str.str.join("")) + tm.assert_series_equal(empty_int, empty.str.len()) + tm.assert_series_equal(empty_object, empty_str.str.findall("a")) + tm.assert_series_equal(empty_int, empty.str.find("a")) + tm.assert_series_equal(empty_int, empty.str.rfind("a")) + tm.assert_series_equal(empty_str, empty.str.pad(42)) + tm.assert_series_equal(empty_str, empty.str.center(42)) + tm.assert_series_equal(empty_object, empty.str.split("a")) + tm.assert_series_equal(empty_object, empty.str.rsplit("a")) + tm.assert_series_equal(empty_object, empty.str.partition("a", expand=False)) + tm.assert_frame_equal(empty_df, empty.str.partition("a")) + tm.assert_series_equal(empty_object, empty.str.rpartition("a", expand=False)) + tm.assert_frame_equal(empty_df, empty.str.rpartition("a")) + tm.assert_series_equal(empty_str, empty.str.slice(stop=1)) + tm.assert_series_equal(empty_str, empty.str.slice(step=1)) + tm.assert_series_equal(empty_str, empty.str.strip()) + tm.assert_series_equal(empty_str, empty.str.lstrip()) + tm.assert_series_equal(empty_str, empty.str.rstrip()) + tm.assert_series_equal(empty_str, empty.str.wrap(42)) + tm.assert_series_equal(empty_str, empty.str.get(0)) + tm.assert_series_equal(empty_object, empty_bytes.str.decode("ascii")) + tm.assert_series_equal(empty_bytes, empty.str.encode("ascii")) + # ismethods should always return boolean (GH 29624) + tm.assert_series_equal(empty_bool, empty.str.isalnum()) + tm.assert_series_equal(empty_bool, empty.str.isalpha()) + tm.assert_series_equal(empty_bool, empty.str.isdigit()) + tm.assert_series_equal(empty_bool, empty.str.isspace()) + tm.assert_series_equal(empty_bool, empty.str.islower()) + tm.assert_series_equal(empty_bool, empty.str.isupper()) + tm.assert_series_equal(empty_bool, empty.str.istitle()) + tm.assert_series_equal(empty_bool, empty.str.isnumeric()) + tm.assert_series_equal(empty_bool, empty.str.isdecimal()) + tm.assert_series_equal(empty_str, empty.str.capitalize()) + tm.assert_series_equal(empty_str, empty.str.swapcase()) + tm.assert_series_equal(empty_str, empty.str.normalize("NFC")) + + table = str.maketrans("a", "b") + tm.assert_series_equal(empty_str, empty.str.translate(table)) + + +@pytest.mark.parametrize( + "method, expected", + [ + ("isalnum", [True, True, True, True, True, False, True, True, False, False]), + ("isalpha", [True, True, True, False, False, False, True, False, False, False]), + ( + "isdigit", + [False, False, False, True, False, False, False, True, False, False], + ), + ( + "isnumeric", + [False, False, False, True, False, False, False, True, False, False], + ), + ( + "isspace", + [False, False, False, False, False, False, False, False, False, True], + ), + ( + "islower", + [False, True, False, False, False, False, False, False, False, False], + ), + ( + "isupper", + [True, False, False, False, True, False, True, False, False, False], + ), + ( + "istitle", + [True, False, True, False, True, False, False, False, False, False], + ), + ], +) +def test_ismethods(method, expected, any_string_dtype): + ser = Series( + ["A", "b", "Xy", "4", "3A", "", "TT", "55", "-", " "], dtype=any_string_dtype + ) + expected_dtype = "bool" if any_string_dtype == "object" else "boolean" + expected = Series(expected, dtype=expected_dtype) + result = getattr(ser.str, method)() + tm.assert_series_equal(result, expected) + + # compare with standard library + expected = [getattr(item, method)() for item in ser] + assert list(result) == expected + + +@pytest.mark.parametrize( + "method, expected", + [ + ("isnumeric", [False, True, True, False, True, True, False]), + ("isdecimal", [False, True, False, False, False, True, False]), + ], +) +def test_isnumeric_unicode(method, expected, any_string_dtype): + # 0x00bc: ¼ VULGAR FRACTION ONE QUARTER + # 0x2605: ★ not number + # 0x1378: ፸ ETHIOPIC NUMBER SEVENTY + # 0xFF13: 3 Em 3 + ser = Series(["A", "3", "¼", "★", "፸", "3", "four"], dtype=any_string_dtype) + expected_dtype = "bool" if any_string_dtype == "object" else "boolean" + expected = Series(expected, dtype=expected_dtype) + result = getattr(ser.str, method)() + tm.assert_series_equal(result, expected) + + # compare with standard library + expected = [getattr(item, method)() for item in ser] + assert list(result) == expected + + +@pytest.mark.parametrize( + "method, expected", + [ + ("isnumeric", [False, np.nan, True, False, np.nan, True, False]), + ("isdecimal", [False, np.nan, False, False, np.nan, True, False]), + ], +) +def test_isnumeric_unicode_missing(method, expected, any_string_dtype): + values = ["A", np.nan, "¼", "★", np.nan, "3", "four"] + ser = Series(values, dtype=any_string_dtype) + expected_dtype = "object" if any_string_dtype == "object" else "boolean" + expected = Series(expected, dtype=expected_dtype) + result = getattr(ser.str, method)() + tm.assert_series_equal(result, expected) + + +def test_spilt_join_roundtrip(any_string_dtype): + ser = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype) + result = ser.str.split("_").str.join("_") + expected = ser.astype(object) + tm.assert_series_equal(result, expected) + + +def test_spilt_join_roundtrip_mixed_object(): + ser = Series( + ["a_b", np.nan, "asdf_cas_asdf", True, datetime.today(), "foo", None, 1, 2.0] + ) + result = ser.str.split("_").str.join("_") + expected = Series( + ["a_b", np.nan, "asdf_cas_asdf", np.nan, np.nan, "foo", np.nan, np.nan, np.nan] + ) + tm.assert_series_equal(result, expected) + + +def test_len(any_string_dtype): + ser = Series( + ["foo", "fooo", "fooooo", np.nan, "fooooooo", "foo\n", "あ"], + dtype=any_string_dtype, + ) + result = ser.str.len() + expected_dtype = "float64" if any_string_dtype == "object" else "Int64" + expected = Series([3, 4, 6, np.nan, 8, 4, 1], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + +def test_len_mixed(): + ser = Series( + ["a_b", np.nan, "asdf_cas_asdf", True, datetime.today(), "foo", None, 1, 2.0] + ) + result = ser.str.len() + expected = Series([3, np.nan, 13, np.nan, np.nan, 3, np.nan, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "method,sub,start,end,expected", + [ + ("index", "EF", None, None, [4, 3, 1, 0]), + ("rindex", "EF", None, None, [4, 5, 7, 4]), + ("index", "EF", 3, None, [4, 3, 7, 4]), + ("rindex", "EF", 3, None, [4, 5, 7, 4]), + ("index", "E", 4, 8, [4, 5, 7, 4]), + ("rindex", "E", 0, 5, [4, 3, 1, 4]), + ], +) +def test_index( + method, sub, start, end, index_or_series, any_string_dtype, expected, request +): + + obj = index_or_series( + ["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF"], dtype=any_string_dtype + ) + expected_dtype = np.int64 if any_string_dtype == "object" else "Int64" + expected = index_or_series(expected, dtype=expected_dtype) + + result = getattr(obj.str, method)(sub, start, end) + + if index_or_series is Series: + tm.assert_series_equal(result, expected) + else: + tm.assert_index_equal(result, expected) + + # compare with standard library + expected = [getattr(item, method)(sub, start, end) for item in obj] + assert list(result) == expected + + +def test_index_not_found_raises(index_or_series, any_string_dtype): + obj = index_or_series( + ["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF"], dtype=any_string_dtype + ) + with pytest.raises(ValueError, match="substring not found"): + obj.str.index("DE") + + +@pytest.mark.parametrize("method", ["index", "rindex"]) +def test_index_wrong_type_raises(index_or_series, any_string_dtype, method): + obj = index_or_series([], dtype=any_string_dtype) + msg = "expected a string object, not int" + + with pytest.raises(TypeError, match=msg): + getattr(obj.str, method)(0) + + +@pytest.mark.parametrize( + "method, exp", + [ + ["index", [1, 1, 0]], + ["rindex", [3, 1, 2]], + ], +) +def test_index_missing(any_string_dtype, method, exp): + ser = Series(["abcb", "ab", "bcbe", np.nan], dtype=any_string_dtype) + expected_dtype = np.float64 if any_string_dtype == "object" else "Int64" + + result = getattr(ser.str, method)("b") + expected = Series(exp + [np.nan], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + +def test_pipe_failures(any_string_dtype): + # #2119 + ser = Series(["A|B|C"], dtype=any_string_dtype) + + result = ser.str.split("|") + expected = Series([["A", "B", "C"]], dtype=object) + tm.assert_series_equal(result, expected) + + result = ser.str.replace("|", " ", regex=False) + expected = Series(["A B C"], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "start, stop, step, expected", + [ + (2, 5, None, ["foo", "bar", np.nan, "baz"]), + (0, 3, -1, ["", "", np.nan, ""]), + (None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"]), + (3, 10, 2, ["oto", "ato", np.nan, "aqx"]), + (3, 0, -1, ["ofa", "aba", np.nan, "aba"]), + ], +) +def test_slice(start, stop, step, expected, any_string_dtype): + ser = Series(["aafootwo", "aabartwo", np.nan, "aabazqux"], dtype=any_string_dtype) + result = ser.str.slice(start, stop, step) + expected = Series(expected, dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "start, stop, step, expected", + [ + (2, 5, None, ["foo", np.nan, "bar", np.nan, np.nan, np.nan, np.nan, np.nan]), + (4, 1, -1, ["oof", np.nan, "rab", np.nan, np.nan, np.nan, np.nan, np.nan]), + ], +) +def test_slice_mixed_object(start, stop, step, expected): + ser = Series(["aafootwo", np.nan, "aabartwo", True, datetime.today(), None, 1, 2.0]) + result = ser.str.slice(start, stop, step) + expected = Series(expected) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "start,stop,repl,expected", + [ + (2, 3, None, ["shrt", "a it longer", "evnlongerthanthat", "", np.nan]), + (2, 3, "z", ["shzrt", "a zit longer", "evznlongerthanthat", "z", np.nan]), + (2, 2, "z", ["shzort", "a zbit longer", "evzenlongerthanthat", "z", np.nan]), + (2, 1, "z", ["shzort", "a zbit longer", "evzenlongerthanthat", "z", np.nan]), + (-1, None, "z", ["shorz", "a bit longez", "evenlongerthanthaz", "z", np.nan]), + (None, -2, "z", ["zrt", "zer", "zat", "z", np.nan]), + (6, 8, "z", ["shortz", "a bit znger", "evenlozerthanthat", "z", np.nan]), + (-10, 3, "z", ["zrt", "a zit longer", "evenlongzerthanthat", "z", np.nan]), + ], +) +def test_slice_replace(start, stop, repl, expected, any_string_dtype): + ser = Series( + ["short", "a bit longer", "evenlongerthanthat", "", np.nan], + dtype=any_string_dtype, + ) + expected = Series(expected, dtype=any_string_dtype) + result = ser.str.slice_replace(start, stop, repl) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "method, exp", + [ + ["strip", ["aa", "bb", np.nan, "cc"]], + ["lstrip", ["aa ", "bb \n", np.nan, "cc "]], + ["rstrip", [" aa", " bb", np.nan, "cc"]], + ], +) +def test_strip_lstrip_rstrip(any_string_dtype, method, exp): + ser = Series([" aa ", " bb \n", np.nan, "cc "], dtype=any_string_dtype) + + result = getattr(ser.str, method)() + expected = Series(exp, dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "method, exp", + [ + ["strip", ["aa", np.nan, "bb"]], + ["lstrip", ["aa ", np.nan, "bb \t\n"]], + ["rstrip", [" aa", np.nan, " bb"]], + ], +) +def test_strip_lstrip_rstrip_mixed_object(method, exp): + ser = Series([" aa ", np.nan, " bb \t\n", True, datetime.today(), None, 1, 2.0]) + + result = getattr(ser.str, method)() + expected = Series(exp + [np.nan, np.nan, np.nan, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "method, exp", + [ + ["strip", ["ABC", " BNSD", "LDFJH "]], + ["lstrip", ["ABCxx", " BNSD", "LDFJH xx"]], + ["rstrip", ["xxABC", "xx BNSD", "LDFJH "]], + ], +) +def test_strip_lstrip_rstrip_args(any_string_dtype, method, exp): + ser = Series(["xxABCxx", "xx BNSD", "LDFJH xx"], dtype=any_string_dtype) + + result = getattr(ser.str, method)("x") + expected = Series(exp, dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "prefix, expected", [("a", ["b", " b c", "bc"]), ("ab", ["", "a b c", "bc"])] +) +def test_removeprefix(any_string_dtype, prefix, expected): + ser = Series(["ab", "a b c", "bc"], dtype=any_string_dtype) + result = ser.str.removeprefix(prefix) + ser_expected = Series(expected, dtype=any_string_dtype) + tm.assert_series_equal(result, ser_expected) + + +@pytest.mark.parametrize( + "suffix, expected", [("c", ["ab", "a b ", "b"]), ("bc", ["ab", "a b c", ""])] +) +def test_removesuffix(any_string_dtype, suffix, expected): + ser = Series(["ab", "a b c", "bc"], dtype=any_string_dtype) + result = ser.str.removesuffix(suffix) + ser_expected = Series(expected, dtype=any_string_dtype) + tm.assert_series_equal(result, ser_expected) + + +def test_string_slice_get_syntax(any_string_dtype): + ser = Series( + ["YYY", "B", "C", "YYYYYYbYYY", "BYYYcYYY", np.nan, "CYYYBYYY", "dog", "cYYYt"], + dtype=any_string_dtype, + ) + + result = ser.str[0] + expected = ser.str.get(0) + tm.assert_series_equal(result, expected) + + result = ser.str[:3] + expected = ser.str.slice(stop=3) + tm.assert_series_equal(result, expected) + + result = ser.str[2::-1] + expected = ser.str.slice(start=2, step=-1) + tm.assert_series_equal(result, expected) + + +def test_string_slice_out_of_bounds_nested(): + ser = Series([(1, 2), (1,), (3, 4, 5)]) + result = ser.str[1] + expected = Series([2, np.nan, 4]) + tm.assert_series_equal(result, expected) + + +def test_string_slice_out_of_bounds(any_string_dtype): + ser = Series(["foo", "b", "ba"], dtype=any_string_dtype) + result = ser.str[1] + expected = Series(["o", np.nan, "a"], dtype=any_string_dtype) + tm.assert_series_equal(result, expected) + + +def test_encode_decode(any_string_dtype): + ser = Series(["a", "b", "a\xe4"], dtype=any_string_dtype).str.encode("utf-8") + result = ser.str.decode("utf-8") + expected = ser.map(lambda x: x.decode("utf-8")) + tm.assert_series_equal(result, expected) + + +def test_encode_errors_kwarg(any_string_dtype): + ser = Series(["a", "b", "a\x9d"], dtype=any_string_dtype) + + msg = ( + r"'charmap' codec can't encode character '\\x9d' in position 1: " + "character maps to " + ) + with pytest.raises(UnicodeEncodeError, match=msg): + ser.str.encode("cp1252") + + result = ser.str.encode("cp1252", "ignore") + expected = ser.map(lambda x: x.encode("cp1252", "ignore")) + tm.assert_series_equal(result, expected) + + +def test_decode_errors_kwarg(): + ser = Series([b"a", b"b", b"a\x9d"]) + + msg = ( + "'charmap' codec can't decode byte 0x9d in position 1: " + "character maps to " + ) + with pytest.raises(UnicodeDecodeError, match=msg): + ser.str.decode("cp1252") + + result = ser.str.decode("cp1252", "ignore") + expected = ser.map(lambda x: x.decode("cp1252", "ignore")) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "form, expected", + [ + ("NFKC", ["ABC", "ABC", "123", np.nan, "アイエ"]), + ("NFC", ["ABC", "ABC", "123", np.nan, "アイエ"]), + ], +) +def test_normalize(form, expected, any_string_dtype): + ser = Series( + ["ABC", "ABC", "123", np.nan, "アイエ"], + index=["a", "b", "c", "d", "e"], + dtype=any_string_dtype, + ) + expected = Series(expected, index=["a", "b", "c", "d", "e"], dtype=any_string_dtype) + result = ser.str.normalize(form) + tm.assert_series_equal(result, expected) + + +def test_normalize_bad_arg_raises(any_string_dtype): + ser = Series( + ["ABC", "ABC", "123", np.nan, "アイエ"], + index=["a", "b", "c", "d", "e"], + dtype=any_string_dtype, + ) + with pytest.raises(ValueError, match="invalid normalization form"): + ser.str.normalize("xxx") + + +def test_normalize_index(): + idx = Index(["ABC", "123", "アイエ"]) + expected = Index(["ABC", "123", "アイエ"]) + result = idx.str.normalize("NFKC") + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "values,inferred_type", + [ + (["a", "b"], "string"), + (["a", "b", 1], "mixed-integer"), + (["a", "b", 1.3], "mixed"), + (["a", "b", 1.3, 1], "mixed-integer"), + (["aa", datetime(2011, 1, 1)], "mixed"), + ], +) +def test_index_str_accessor_visibility(values, inferred_type, index_or_series): + from pandas.core.strings import StringMethods + + obj = index_or_series(values) + if index_or_series is Index: + assert obj.inferred_type == inferred_type + + assert isinstance(obj.str, StringMethods) + + +@pytest.mark.parametrize( + "values,inferred_type", + [ + ([1, np.nan], "floating"), + ([datetime(2011, 1, 1)], "datetime64"), + ([timedelta(1)], "timedelta64"), + ], +) +def test_index_str_accessor_non_string_values_raises( + values, inferred_type, index_or_series +): + obj = index_or_series(values) + if index_or_series is Index: + assert obj.inferred_type == inferred_type + + msg = "Can only use .str accessor with string values" + with pytest.raises(AttributeError, match=msg): + obj.str + + +def test_index_str_accessor_multiindex_raises(): + # MultiIndex has mixed dtype, but not allow to use accessor + idx = MultiIndex.from_tuples([("a", "b"), ("a", "b")]) + assert idx.inferred_type == "mixed" + + msg = "Can only use .str accessor with Index, not MultiIndex" + with pytest.raises(AttributeError, match=msg): + idx.str + + +def test_str_accessor_no_new_attributes(any_string_dtype): + # https://github.com/pandas-dev/pandas/issues/10673 + ser = Series(list("aabbcde"), dtype=any_string_dtype) + with pytest.raises(AttributeError, match="You cannot add any new attribute"): + ser.str.xlabel = "a" + + +def test_cat_on_bytes_raises(): + lhs = Series(np.array(list("abc"), "S1").astype(object)) + rhs = Series(np.array(list("def"), "S1").astype(object)) + msg = "Cannot use .str.cat with values of inferred dtype 'bytes'" + with pytest.raises(TypeError, match=msg): + lhs.str.cat(rhs) + + +def test_str_accessor_in_apply_func(): + # https://github.com/pandas-dev/pandas/issues/38979 + df = DataFrame(zip("abc", "def")) + expected = Series(["A/D", "B/E", "C/F"]) + result = df.apply(lambda f: "/".join(f.str.upper()), axis=1) + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/test_aggregation.py b/.local/lib/python3.8/site-packages/pandas/tests/test_aggregation.py new file mode 100644 index 0000000..7695c95 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/test_aggregation.py @@ -0,0 +1,93 @@ +import numpy as np +import pytest + +from pandas.core.apply import ( + _make_unique_kwarg_list, + maybe_mangle_lambdas, +) + + +def test_maybe_mangle_lambdas_passthrough(): + assert maybe_mangle_lambdas("mean") == "mean" + assert maybe_mangle_lambdas(lambda x: x).__name__ == "" + # don't mangel single lambda. + assert maybe_mangle_lambdas([lambda x: x])[0].__name__ == "" + + +def test_maybe_mangle_lambdas_listlike(): + aggfuncs = [lambda x: 1, lambda x: 2] + result = maybe_mangle_lambdas(aggfuncs) + assert result[0].__name__ == "" + assert result[1].__name__ == "" + assert aggfuncs[0](None) == result[0](None) + assert aggfuncs[1](None) == result[1](None) + + +def test_maybe_mangle_lambdas(): + func = {"A": [lambda x: 0, lambda x: 1]} + result = maybe_mangle_lambdas(func) + assert result["A"][0].__name__ == "" + assert result["A"][1].__name__ == "" + + +def test_maybe_mangle_lambdas_args(): + func = {"A": [lambda x, a, b=1: (0, a, b), lambda x: 1]} + result = maybe_mangle_lambdas(func) + assert result["A"][0].__name__ == "" + assert result["A"][1].__name__ == "" + + assert func["A"][0](0, 1) == (0, 1, 1) + assert func["A"][0](0, 1, 2) == (0, 1, 2) + assert func["A"][0](0, 2, b=3) == (0, 2, 3) + + +def test_maybe_mangle_lambdas_named(): + func = {"C": np.mean, "D": {"foo": np.mean, "bar": np.mean}} + result = maybe_mangle_lambdas(func) + assert result == func + + +@pytest.mark.parametrize( + "order, expected_reorder", + [ + ( + [ + ("height", ""), + ("height", "max"), + ("weight", "max"), + ("height", ""), + ("weight", ""), + ], + [ + ("height", "_0"), + ("height", "max"), + ("weight", "max"), + ("height", "_1"), + ("weight", ""), + ], + ), + ( + [ + ("col2", "min"), + ("col1", ""), + ("col1", ""), + ("col1", ""), + ], + [ + ("col2", "min"), + ("col1", "_0"), + ("col1", "_1"), + ("col1", "_2"), + ], + ), + ( + [("col", ""), ("col", ""), ("col", "")], + [("col", "_0"), ("col", "_1"), ("col", "_2")], + ), + ], +) +def test_make_unique(order, expected_reorder): + # GH 27519, test if make_unique function reorders correctly + result = _make_unique_kwarg_list(order) + + assert result == expected_reorder diff --git a/.local/lib/python3.8/site-packages/pandas/tests/test_algos.py b/.local/lib/python3.8/site-packages/pandas/tests/test_algos.py new file mode 100644 index 0000000..94a2090 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/test_algos.py @@ -0,0 +1,2459 @@ +from datetime import datetime +from itertools import permutations +import struct + +import numpy as np +import pytest + +from pandas._libs import ( + algos as libalgos, + hashtable as ht, +) +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_complex_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype as CDT + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + DatetimeIndex, + Index, + IntervalIndex, + MultiIndex, + NaT, + Period, + PeriodIndex, + Series, + Timedelta, + Timestamp, + date_range, + timedelta_range, + to_datetime, + to_timedelta, +) +import pandas._testing as tm +import pandas.core.algorithms as algos +from pandas.core.arrays import DatetimeArray +import pandas.core.common as com + + +class TestFactorize: + @pytest.mark.parametrize("sort", [True, False]) + def test_factorize(self, index_or_series_obj, sort): + obj = index_or_series_obj + result_codes, result_uniques = obj.factorize(sort=sort) + + constructor = Index + if isinstance(obj, MultiIndex): + constructor = MultiIndex.from_tuples + expected_uniques = constructor(obj.unique()) + + if sort: + expected_uniques = expected_uniques.sort_values() + + # construct an integer ndarray so that + # `expected_uniques.take(expected_codes)` is equal to `obj` + expected_uniques_list = list(expected_uniques) + expected_codes = [expected_uniques_list.index(val) for val in obj] + expected_codes = np.asarray(expected_codes, dtype=np.intp) + + tm.assert_numpy_array_equal(result_codes, expected_codes) + tm.assert_index_equal(result_uniques, expected_uniques, exact=True) + + def test_series_factorize_na_sentinel_none(self): + # GH#35667 + values = np.array([1, 2, 1, np.nan]) + ser = Series(values) + codes, uniques = ser.factorize(na_sentinel=None) + + expected_codes = np.array([0, 1, 0, 2], dtype=np.intp) + expected_uniques = Index([1.0, 2.0, np.nan]) + + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_index_equal(uniques, expected_uniques) + + def test_basic(self): + + codes, uniques = algos.factorize(["a", "b", "b", "a", "a", "c", "c", "c"]) + tm.assert_numpy_array_equal(uniques, np.array(["a", "b", "c"], dtype=object)) + + codes, uniques = algos.factorize( + ["a", "b", "b", "a", "a", "c", "c", "c"], sort=True + ) + exp = np.array([0, 1, 1, 0, 0, 2, 2, 2], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = np.array(["a", "b", "c"], dtype=object) + tm.assert_numpy_array_equal(uniques, exp) + + arr = np.arange(5, dtype=np.intp)[::-1] + + codes, uniques = algos.factorize(arr) + exp = np.array([0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = np.array([4, 3, 2, 1, 0], dtype=arr.dtype) + tm.assert_numpy_array_equal(uniques, exp) + + codes, uniques = algos.factorize(arr, sort=True) + exp = np.array([4, 3, 2, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = np.array([0, 1, 2, 3, 4], dtype=arr.dtype) + tm.assert_numpy_array_equal(uniques, exp) + + arr = np.arange(5.0)[::-1] + + codes, uniques = algos.factorize(arr) + exp = np.array([0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = np.array([4.0, 3.0, 2.0, 1.0, 0.0], dtype=arr.dtype) + tm.assert_numpy_array_equal(uniques, exp) + + codes, uniques = algos.factorize(arr, sort=True) + exp = np.array([4, 3, 2, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = np.array([0.0, 1.0, 2.0, 3.0, 4.0], dtype=arr.dtype) + tm.assert_numpy_array_equal(uniques, exp) + + def test_mixed(self): + + # doc example reshaping.rst + x = Series(["A", "A", np.nan, "B", 3.14, np.inf]) + codes, uniques = algos.factorize(x) + + exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = Index(["A", "B", 3.14, np.inf]) + tm.assert_index_equal(uniques, exp) + + codes, uniques = algos.factorize(x, sort=True) + exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = Index([3.14, np.inf, "A", "B"]) + tm.assert_index_equal(uniques, exp) + + def test_datelike(self): + + # M8 + v1 = Timestamp("20130101 09:00:00.00004") + v2 = Timestamp("20130101") + x = Series([v1, v1, v1, v2, v2, v1]) + codes, uniques = algos.factorize(x) + + exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = DatetimeIndex([v1, v2]) + tm.assert_index_equal(uniques, exp) + + codes, uniques = algos.factorize(x, sort=True) + exp = np.array([1, 1, 1, 0, 0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = DatetimeIndex([v2, v1]) + tm.assert_index_equal(uniques, exp) + + # period + v1 = Period("201302", freq="M") + v2 = Period("201303", freq="M") + x = Series([v1, v1, v1, v2, v2, v1]) + + # periods are not 'sorted' as they are converted back into an index + codes, uniques = algos.factorize(x) + exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + tm.assert_index_equal(uniques, PeriodIndex([v1, v2])) + + codes, uniques = algos.factorize(x, sort=True) + exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + tm.assert_index_equal(uniques, PeriodIndex([v1, v2])) + + # GH 5986 + v1 = to_timedelta("1 day 1 min") + v2 = to_timedelta("1 day") + x = Series([v1, v2, v1, v1, v2, v2, v1]) + codes, uniques = algos.factorize(x) + exp = np.array([0, 1, 0, 0, 1, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + tm.assert_index_equal(uniques, to_timedelta([v1, v2])) + + codes, uniques = algos.factorize(x, sort=True) + exp = np.array([1, 0, 1, 1, 0, 0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + tm.assert_index_equal(uniques, to_timedelta([v2, v1])) + + def test_factorize_nan(self): + # nan should map to na_sentinel, not reverse_indexer[na_sentinel] + # rizer.factorize should not raise an exception if na_sentinel indexes + # outside of reverse_indexer + key = np.array([1, 2, 1, np.nan], dtype="O") + rizer = ht.ObjectFactorizer(len(key)) + for na_sentinel in (-1, 20): + ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel) + expected = np.array([0, 1, 0, na_sentinel], dtype="int32") + assert len(set(key)) == len(set(expected)) + tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel) + + # nan still maps to na_sentinel when sort=False + key = np.array([0, np.nan, 1], dtype="O") + na_sentinel = -1 + + # TODO(wesm): unused? + ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel) # noqa + + expected = np.array([2, -1, 0], dtype="int32") + assert len(set(key)) == len(set(expected)) + tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel) + + @pytest.mark.parametrize( + "data, expected_codes, expected_uniques", + [ + ( + [(1, 1), (1, 2), (0, 0), (1, 2), "nonsense"], + [0, 1, 2, 1, 3], + [(1, 1), (1, 2), (0, 0), "nonsense"], + ), + ( + [(1, 1), (1, 2), (0, 0), (1, 2), (1, 2, 3)], + [0, 1, 2, 1, 3], + [(1, 1), (1, 2), (0, 0), (1, 2, 3)], + ), + ([(1, 1), (1, 2), (0, 0), (1, 2)], [0, 1, 2, 1], [(1, 1), (1, 2), (0, 0)]), + ], + ) + def test_factorize_tuple_list(self, data, expected_codes, expected_uniques): + # GH9454 + codes, uniques = pd.factorize(data) + + tm.assert_numpy_array_equal(codes, np.array(expected_codes, dtype=np.intp)) + + expected_uniques_array = com.asarray_tuplesafe(expected_uniques, dtype=object) + tm.assert_numpy_array_equal(uniques, expected_uniques_array) + + def test_complex_sorting(self): + # gh 12666 - check no segfault + x17 = np.array([complex(i) for i in range(17)], dtype=object) + + msg = "'[<>]' not supported between instances of .*" + with pytest.raises(TypeError, match=msg): + algos.factorize(x17[::-1], sort=True) + + def test_numeric_dtype_factorize(self, any_real_numpy_dtype): + # GH41132 + dtype = any_real_numpy_dtype + data = np.array([1, 2, 2, 1], dtype=dtype) + expected_codes = np.array([0, 1, 1, 0], dtype=np.intp) + expected_uniques = np.array([1, 2], dtype=dtype) + + codes, uniques = algos.factorize(data) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + def test_float64_factorize(self, writable): + data = np.array([1.0, 1e8, 1.0, 1e-8, 1e8, 1.0], dtype=np.float64) + data.setflags(write=writable) + expected_codes = np.array([0, 1, 0, 2, 1, 0], dtype=np.intp) + expected_uniques = np.array([1.0, 1e8, 1e-8], dtype=np.float64) + + codes, uniques = algos.factorize(data) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + def test_uint64_factorize(self, writable): + data = np.array([2 ** 64 - 1, 1, 2 ** 64 - 1], dtype=np.uint64) + data.setflags(write=writable) + expected_codes = np.array([0, 1, 0], dtype=np.intp) + expected_uniques = np.array([2 ** 64 - 1, 1], dtype=np.uint64) + + codes, uniques = algos.factorize(data) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + def test_int64_factorize(self, writable): + data = np.array([2 ** 63 - 1, -(2 ** 63), 2 ** 63 - 1], dtype=np.int64) + data.setflags(write=writable) + expected_codes = np.array([0, 1, 0], dtype=np.intp) + expected_uniques = np.array([2 ** 63 - 1, -(2 ** 63)], dtype=np.int64) + + codes, uniques = algos.factorize(data) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + def test_string_factorize(self, writable): + data = np.array(["a", "c", "a", "b", "c"], dtype=object) + data.setflags(write=writable) + expected_codes = np.array([0, 1, 0, 2, 1], dtype=np.intp) + expected_uniques = np.array(["a", "c", "b"], dtype=object) + + codes, uniques = algos.factorize(data) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + def test_object_factorize(self, writable): + data = np.array(["a", "c", None, np.nan, "a", "b", NaT, "c"], dtype=object) + data.setflags(write=writable) + expected_codes = np.array([0, 1, -1, -1, 0, 2, -1, 1], dtype=np.intp) + expected_uniques = np.array(["a", "c", "b"], dtype=object) + + codes, uniques = algos.factorize(data) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + def test_datetime64_factorize(self, writable): + # GH35650 Verify whether read-only datetime64 array can be factorized + data = np.array([np.datetime64("2020-01-01T00:00:00.000")]) + data.setflags(write=writable) + expected_codes = np.array([0], dtype=np.intp) + expected_uniques = np.array( + ["2020-01-01T00:00:00.000000000"], dtype="datetime64[ns]" + ) + + codes, uniques = pd.factorize(data) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + @pytest.mark.parametrize("sort", [True, False]) + def test_factorize_rangeindex(self, sort): + # increasing -> sort doesn't matter + ri = pd.RangeIndex.from_range(range(10)) + expected = np.arange(10, dtype=np.intp), ri + + result = algos.factorize(ri, sort=sort) + tm.assert_numpy_array_equal(result[0], expected[0]) + tm.assert_index_equal(result[1], expected[1], exact=True) + + result = ri.factorize(sort=sort) + tm.assert_numpy_array_equal(result[0], expected[0]) + tm.assert_index_equal(result[1], expected[1], exact=True) + + @pytest.mark.parametrize("sort", [True, False]) + def test_factorize_rangeindex_decreasing(self, sort): + # decreasing -> sort matters + ri = pd.RangeIndex.from_range(range(10)) + expected = np.arange(10, dtype=np.intp), ri + + ri2 = ri[::-1] + expected = expected[0], ri2 + if sort: + expected = expected[0][::-1], expected[1][::-1] + + result = algos.factorize(ri2, sort=sort) + tm.assert_numpy_array_equal(result[0], expected[0]) + tm.assert_index_equal(result[1], expected[1], exact=True) + + result = ri2.factorize(sort=sort) + tm.assert_numpy_array_equal(result[0], expected[0]) + tm.assert_index_equal(result[1], expected[1], exact=True) + + def test_deprecate_order(self): + # gh 19727 - check warning is raised for deprecated keyword, order. + # Test not valid once order keyword is removed. + data = np.array([2 ** 63, 1, 2 ** 63], dtype=np.uint64) + with pytest.raises(TypeError, match="got an unexpected keyword"): + algos.factorize(data, order=True) + with tm.assert_produces_warning(False): + algos.factorize(data) + + @pytest.mark.parametrize( + "data", + [ + np.array([0, 1, 0], dtype="u8"), + np.array([-(2 ** 63), 1, -(2 ** 63)], dtype="i8"), + np.array(["__nan__", "foo", "__nan__"], dtype="object"), + ], + ) + def test_parametrized_factorize_na_value_default(self, data): + # arrays that include the NA default for that type, but isn't used. + codes, uniques = algos.factorize(data) + expected_uniques = data[[0, 1]] + expected_codes = np.array([0, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + @pytest.mark.parametrize( + "data, na_value", + [ + (np.array([0, 1, 0, 2], dtype="u8"), 0), + (np.array([1, 0, 1, 2], dtype="u8"), 1), + (np.array([-(2 ** 63), 1, -(2 ** 63), 0], dtype="i8"), -(2 ** 63)), + (np.array([1, -(2 ** 63), 1, 0], dtype="i8"), 1), + (np.array(["a", "", "a", "b"], dtype=object), "a"), + (np.array([(), ("a", 1), (), ("a", 2)], dtype=object), ()), + (np.array([("a", 1), (), ("a", 1), ("a", 2)], dtype=object), ("a", 1)), + ], + ) + def test_parametrized_factorize_na_value(self, data, na_value): + codes, uniques = algos.factorize_array(data, na_value=na_value) + expected_uniques = data[[1, 3]] + expected_codes = np.array([-1, 0, -1, 1], dtype=np.intp) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("na_sentinel", [-1, -10, 100]) + @pytest.mark.parametrize( + "data, uniques", + [ + ( + np.array(["b", "a", None, "b"], dtype=object), + np.array(["b", "a"], dtype=object), + ), + ( + pd.array([2, 1, np.nan, 2], dtype="Int64"), + pd.array([2, 1], dtype="Int64"), + ), + ], + ids=["numpy_array", "extension_array"], + ) + def test_factorize_na_sentinel(self, sort, na_sentinel, data, uniques): + codes, uniques = algos.factorize(data, sort=sort, na_sentinel=na_sentinel) + if sort: + expected_codes = np.array([1, 0, na_sentinel, 1], dtype=np.intp) + expected_uniques = algos.safe_sort(uniques) + else: + expected_codes = np.array([0, 1, na_sentinel, 0], dtype=np.intp) + expected_uniques = uniques + tm.assert_numpy_array_equal(codes, expected_codes) + if isinstance(data, np.ndarray): + tm.assert_numpy_array_equal(uniques, expected_uniques) + else: + tm.assert_extension_array_equal(uniques, expected_uniques) + + @pytest.mark.parametrize( + "data, expected_codes, expected_uniques", + [ + ( + ["a", None, "b", "a"], + np.array([0, 2, 1, 0], dtype=np.dtype("intp")), + np.array(["a", "b", np.nan], dtype=object), + ), + ( + ["a", np.nan, "b", "a"], + np.array([0, 2, 1, 0], dtype=np.dtype("intp")), + np.array(["a", "b", np.nan], dtype=object), + ), + ], + ) + def test_object_factorize_na_sentinel_none( + self, data, expected_codes, expected_uniques + ): + codes, uniques = algos.factorize(data, na_sentinel=None) + + tm.assert_numpy_array_equal(uniques, expected_uniques) + tm.assert_numpy_array_equal(codes, expected_codes) + + @pytest.mark.parametrize( + "data, expected_codes, expected_uniques", + [ + ( + [1, None, 1, 2], + np.array([0, 2, 0, 1], dtype=np.dtype("intp")), + np.array([1, 2, np.nan], dtype="O"), + ), + ( + [1, np.nan, 1, 2], + np.array([0, 2, 0, 1], dtype=np.dtype("intp")), + np.array([1, 2, np.nan], dtype=np.float64), + ), + ], + ) + def test_int_factorize_na_sentinel_none( + self, data, expected_codes, expected_uniques + ): + codes, uniques = algos.factorize(data, na_sentinel=None) + + tm.assert_numpy_array_equal(uniques, expected_uniques) + tm.assert_numpy_array_equal(codes, expected_codes) + + +class TestUnique: + def test_ints(self): + arr = np.random.randint(0, 100, size=50) + + result = algos.unique(arr) + assert isinstance(result, np.ndarray) + + def test_objects(self): + arr = np.random.randint(0, 100, size=50).astype("O") + + result = algos.unique(arr) + assert isinstance(result, np.ndarray) + + def test_object_refcount_bug(self): + lst = ["A", "B", "C", "D", "E"] + for i in range(1000): + len(algos.unique(lst)) + + def test_on_index_object(self): + + mindex = MultiIndex.from_arrays( + [np.arange(5).repeat(5), np.tile(np.arange(5), 5)] + ) + expected = mindex.values + expected.sort() + + mindex = mindex.repeat(2) + + result = pd.unique(mindex) + result.sort() + + tm.assert_almost_equal(result, expected) + + def test_dtype_preservation(self, any_numpy_dtype): + # GH 15442 + if any_numpy_dtype in (tm.BYTES_DTYPES + tm.STRING_DTYPES): + data = [1, 2, 2] + uniques = [1, 2] + elif is_integer_dtype(any_numpy_dtype): + data = [1, 2, 2] + uniques = [1, 2] + elif is_float_dtype(any_numpy_dtype): + data = [1, 2, 2] + uniques = [1.0, 2.0] + elif is_complex_dtype(any_numpy_dtype): + data = [complex(1, 0), complex(2, 0), complex(2, 0)] + uniques = [complex(1, 0), complex(2, 0)] + elif is_bool_dtype(any_numpy_dtype): + data = [True, True, False] + uniques = [True, False] + elif is_object_dtype(any_numpy_dtype): + data = ["A", "B", "B"] + uniques = ["A", "B"] + else: + # datetime64[ns]/M8[ns]/timedelta64[ns]/m8[ns] tested elsewhere + data = [1, 2, 2] + uniques = [1, 2] + + result = Series(data, dtype=any_numpy_dtype).unique() + expected = np.array(uniques, dtype=any_numpy_dtype) + + if any_numpy_dtype in tm.STRING_DTYPES: + expected = expected.astype(object) + + tm.assert_numpy_array_equal(result, expected) + + def test_datetime64_dtype_array_returned(self): + # GH 9431 + expected = np.array( + [ + "2015-01-03T00:00:00.000000000", + "2015-01-01T00:00:00.000000000", + ], + dtype="M8[ns]", + ) + + dt_index = to_datetime( + [ + "2015-01-03T00:00:00.000000000", + "2015-01-01T00:00:00.000000000", + "2015-01-01T00:00:00.000000000", + ] + ) + result = algos.unique(dt_index) + tm.assert_numpy_array_equal(result, expected) + assert result.dtype == expected.dtype + + s = Series(dt_index) + result = algos.unique(s) + tm.assert_numpy_array_equal(result, expected) + assert result.dtype == expected.dtype + + arr = s.values + result = algos.unique(arr) + tm.assert_numpy_array_equal(result, expected) + assert result.dtype == expected.dtype + + def test_datetime_non_ns(self): + a = np.array(["2000", "2000", "2001"], dtype="datetime64[s]") + result = pd.unique(a) + expected = np.array(["2000", "2001"], dtype="datetime64[ns]") + tm.assert_numpy_array_equal(result, expected) + + def test_timedelta_non_ns(self): + a = np.array(["2000", "2000", "2001"], dtype="timedelta64[s]") + result = pd.unique(a) + expected = np.array([2000000000000, 2001000000000], dtype="timedelta64[ns]") + tm.assert_numpy_array_equal(result, expected) + + def test_timedelta64_dtype_array_returned(self): + # GH 9431 + expected = np.array([31200, 45678, 10000], dtype="m8[ns]") + + td_index = to_timedelta([31200, 45678, 31200, 10000, 45678]) + result = algos.unique(td_index) + tm.assert_numpy_array_equal(result, expected) + assert result.dtype == expected.dtype + + s = Series(td_index) + result = algos.unique(s) + tm.assert_numpy_array_equal(result, expected) + assert result.dtype == expected.dtype + + arr = s.values + result = algos.unique(arr) + tm.assert_numpy_array_equal(result, expected) + assert result.dtype == expected.dtype + + def test_uint64_overflow(self): + s = Series([1, 2, 2 ** 63, 2 ** 63], dtype=np.uint64) + exp = np.array([1, 2, 2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(algos.unique(s), exp) + + def test_nan_in_object_array(self): + duplicated_items = ["a", np.nan, "c", "c"] + result = pd.unique(duplicated_items) + expected = np.array(["a", np.nan, "c"], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_categorical(self): + + # we are expecting to return in the order + # of appearance + expected = Categorical(list("bac")) + + # we are expecting to return in the order + # of the categories + expected_o = Categorical(list("bac"), categories=list("abc"), ordered=True) + + # GH 15939 + c = Categorical(list("baabc")) + result = c.unique() + tm.assert_categorical_equal(result, expected) + + result = algos.unique(c) + tm.assert_categorical_equal(result, expected) + + c = Categorical(list("baabc"), ordered=True) + result = c.unique() + tm.assert_categorical_equal(result, expected_o) + + result = algos.unique(c) + tm.assert_categorical_equal(result, expected_o) + + # Series of categorical dtype + s = Series(Categorical(list("baabc")), name="foo") + result = s.unique() + tm.assert_categorical_equal(result, expected) + + result = pd.unique(s) + tm.assert_categorical_equal(result, expected) + + # CI -> return CI + ci = CategoricalIndex(Categorical(list("baabc"), categories=list("abc"))) + expected = CategoricalIndex(expected) + result = ci.unique() + tm.assert_index_equal(result, expected) + + result = pd.unique(ci) + tm.assert_index_equal(result, expected) + + def test_datetime64tz_aware(self): + # GH 15939 + + result = Series( + Index( + [ + Timestamp("20160101", tz="US/Eastern"), + Timestamp("20160101", tz="US/Eastern"), + ] + ) + ).unique() + expected = DatetimeArray._from_sequence( + np.array([Timestamp("2016-01-01 00:00:00-0500", tz="US/Eastern")]) + ) + tm.assert_extension_array_equal(result, expected) + + result = Index( + [ + Timestamp("20160101", tz="US/Eastern"), + Timestamp("20160101", tz="US/Eastern"), + ] + ).unique() + expected = DatetimeIndex( + ["2016-01-01 00:00:00"], dtype="datetime64[ns, US/Eastern]", freq=None + ) + tm.assert_index_equal(result, expected) + + result = pd.unique( + Series( + Index( + [ + Timestamp("20160101", tz="US/Eastern"), + Timestamp("20160101", tz="US/Eastern"), + ] + ) + ) + ) + expected = DatetimeArray._from_sequence( + np.array([Timestamp("2016-01-01", tz="US/Eastern")]) + ) + tm.assert_extension_array_equal(result, expected) + + result = pd.unique( + Index( + [ + Timestamp("20160101", tz="US/Eastern"), + Timestamp("20160101", tz="US/Eastern"), + ] + ) + ) + expected = DatetimeIndex( + ["2016-01-01 00:00:00"], dtype="datetime64[ns, US/Eastern]", freq=None + ) + tm.assert_index_equal(result, expected) + + def test_order_of_appearance(self): + # 9346 + # light testing of guarantee of order of appearance + # these also are the doc-examples + result = pd.unique(Series([2, 1, 3, 3])) + tm.assert_numpy_array_equal(result, np.array([2, 1, 3], dtype="int64")) + + result = pd.unique(Series([2] + [1] * 5)) + tm.assert_numpy_array_equal(result, np.array([2, 1], dtype="int64")) + + result = pd.unique(Series([Timestamp("20160101"), Timestamp("20160101")])) + expected = np.array(["2016-01-01T00:00:00.000000000"], dtype="datetime64[ns]") + tm.assert_numpy_array_equal(result, expected) + + result = pd.unique( + Index( + [ + Timestamp("20160101", tz="US/Eastern"), + Timestamp("20160101", tz="US/Eastern"), + ] + ) + ) + expected = DatetimeIndex( + ["2016-01-01 00:00:00"], dtype="datetime64[ns, US/Eastern]", freq=None + ) + tm.assert_index_equal(result, expected) + + result = pd.unique(list("aabc")) + expected = np.array(["a", "b", "c"], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = pd.unique(Series(Categorical(list("aabc")))) + expected = Categorical(list("abc")) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize( + "arg ,expected", + [ + (("1", "1", "2"), np.array(["1", "2"], dtype=object)), + (("foo",), np.array(["foo"], dtype=object)), + ], + ) + def test_tuple_with_strings(self, arg, expected): + # see GH 17108 + result = pd.unique(arg) + tm.assert_numpy_array_equal(result, expected) + + def test_obj_none_preservation(self): + # GH 20866 + arr = np.array(["foo", None], dtype=object) + result = pd.unique(arr) + expected = np.array(["foo", None], dtype=object) + + tm.assert_numpy_array_equal(result, expected, strict_nan=True) + + def test_signed_zero(self): + # GH 21866 + a = np.array([-0.0, 0.0]) + result = pd.unique(a) + expected = np.array([-0.0]) # 0.0 and -0.0 are equivalent + tm.assert_numpy_array_equal(result, expected) + + def test_different_nans(self): + # GH 21866 + # create different nans from bit-patterns: + NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0] + NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0] + assert NAN1 != NAN1 + assert NAN2 != NAN2 + a = np.array([NAN1, NAN2]) # NAN1 and NAN2 are equivalent + result = pd.unique(a) + expected = np.array([np.nan]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("el_type", [np.float64, object]) + def test_first_nan_kept(self, el_type): + # GH 22295 + # create different nans from bit-patterns: + bits_for_nan1 = 0xFFF8000000000001 + bits_for_nan2 = 0x7FF8000000000001 + NAN1 = struct.unpack("d", struct.pack("=Q", bits_for_nan1))[0] + NAN2 = struct.unpack("d", struct.pack("=Q", bits_for_nan2))[0] + assert NAN1 != NAN1 + assert NAN2 != NAN2 + a = np.array([NAN1, NAN2], dtype=el_type) + result = pd.unique(a) + assert result.size == 1 + # use bit patterns to identify which nan was kept: + result_nan_bits = struct.unpack("=Q", struct.pack("d", result[0]))[0] + assert result_nan_bits == bits_for_nan1 + + def test_do_not_mangle_na_values(self, unique_nulls_fixture, unique_nulls_fixture2): + # GH 22295 + if unique_nulls_fixture is unique_nulls_fixture2: + return # skip it, values not unique + a = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=object) + result = pd.unique(a) + assert result.size == 2 + assert a[0] is unique_nulls_fixture + assert a[1] is unique_nulls_fixture2 + + +class TestIsin: + def test_invalid(self): + + msg = ( + r"only list-like objects are allowed to be passed to isin\(\), " + r"you passed a \[int\]" + ) + with pytest.raises(TypeError, match=msg): + algos.isin(1, 1) + with pytest.raises(TypeError, match=msg): + algos.isin(1, [1]) + with pytest.raises(TypeError, match=msg): + algos.isin([1], 1) + + def test_basic(self): + + result = algos.isin([1, 2], [1]) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(np.array([1, 2]), [1]) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(Series([1, 2]), [1]) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(Series([1, 2]), Series([1])) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(Series([1, 2]), {1}) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(["a", "b"], ["a"]) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(Series(["a", "b"]), Series(["a"])) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(Series(["a", "b"]), {"a"}) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(["a", "b"], [1]) + expected = np.array([False, False]) + tm.assert_numpy_array_equal(result, expected) + + def test_i8(self): + + arr = date_range("20130101", periods=3).values + result = algos.isin(arr, [arr[0]]) + expected = np.array([True, False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(arr, arr[0:2]) + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(arr, set(arr[0:2])) + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + arr = timedelta_range("1 day", periods=3).values + result = algos.isin(arr, [arr[0]]) + expected = np.array([True, False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(arr, arr[0:2]) + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(arr, set(arr[0:2])) + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dtype1", ["m8[ns]", "M8[ns]", "M8[ns, UTC]", "period[D]"]) + @pytest.mark.parametrize("dtype", ["i8", "f8", "u8"]) + def test_isin_datetimelike_values_numeric_comps(self, dtype, dtype1): + # Anything but object and we get all-False shortcut + + dta = date_range("2013-01-01", periods=3)._values + if dtype1 == "period[D]": + # TODO: fix Series.view to get this on its own + arr = dta.to_period("D") + elif dtype1 == "M8[ns, UTC]": + # TODO: fix Series.view to get this on its own + arr = dta.tz_localize("UTC") + else: + arr = Series(dta.view("i8")).view(dtype1)._values + + comps = arr.view("i8").astype(dtype) + + result = algos.isin(comps, arr) + expected = np.zeros(comps.shape, dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + def test_large(self): + s = date_range("20000101", periods=2000000, freq="s").values + result = algos.isin(s, s[0:2]) + expected = np.zeros(len(s), dtype=bool) + expected[0] = True + expected[1] = True + tm.assert_numpy_array_equal(result, expected) + + def test_categorical_from_codes(self): + # GH 16639 + vals = np.array([0, 1, 2, 0]) + cats = ["a", "b", "c"] + Sd = Series(Categorical([1]).from_codes(vals, cats)) + St = Series(Categorical([1]).from_codes(np.array([0, 1]), cats)) + expected = np.array([True, True, False, True]) + result = algos.isin(Sd, St) + tm.assert_numpy_array_equal(expected, result) + + def test_categorical_isin(self): + vals = np.array([0, 1, 2, 0]) + cats = ["a", "b", "c"] + cat = Categorical([1]).from_codes(vals, cats) + other = Categorical([1]).from_codes(np.array([0, 1]), cats) + + expected = np.array([True, True, False, True]) + result = algos.isin(cat, other) + tm.assert_numpy_array_equal(expected, result) + + def test_same_nan_is_in(self): + # GH 22160 + # nan is special, because from " a is b" doesn't follow "a == b" + # at least, isin() should follow python's "np.nan in [nan] == True" + # casting to -> np.float64 -> another float-object somewhere on + # the way could lead jepardize this behavior + comps = [np.nan] # could be casted to float64 + values = [np.nan] + expected = np.array([True]) + result = algos.isin(comps, values) + tm.assert_numpy_array_equal(expected, result) + + def test_same_nan_is_in_large(self): + # https://github.com/pandas-dev/pandas/issues/22205 + s = np.tile(1.0, 1_000_001) + s[0] = np.nan + result = algos.isin(s, [np.nan, 1]) + expected = np.ones(len(s), dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + def test_same_nan_is_in_large_series(self): + # https://github.com/pandas-dev/pandas/issues/22205 + s = np.tile(1.0, 1_000_001) + series = Series(s) + s[0] = np.nan + result = series.isin([np.nan, 1]) + expected = Series(np.ones(len(s), dtype=bool)) + tm.assert_series_equal(result, expected) + + def test_same_object_is_in(self): + # GH 22160 + # there could be special treatment for nans + # the user however could define a custom class + # with similar behavior, then we at least should + # fall back to usual python's behavior: "a in [a] == True" + class LikeNan: + def __eq__(self, other) -> bool: + return False + + def __hash__(self): + return 0 + + a, b = LikeNan(), LikeNan() + # same object -> True + tm.assert_numpy_array_equal(algos.isin([a], [a]), np.array([True])) + # different objects -> False + tm.assert_numpy_array_equal(algos.isin([a], [b]), np.array([False])) + + def test_different_nans(self): + # GH 22160 + # all nans are handled as equivalent + + comps = [float("nan")] + values = [float("nan")] + assert comps[0] is not values[0] # different nan-objects + + # as list of python-objects: + result = algos.isin(comps, values) + tm.assert_numpy_array_equal(np.array([True]), result) + + # as object-array: + result = algos.isin( + np.asarray(comps, dtype=object), np.asarray(values, dtype=object) + ) + tm.assert_numpy_array_equal(np.array([True]), result) + + # as float64-array: + result = algos.isin( + np.asarray(comps, dtype=np.float64), np.asarray(values, dtype=np.float64) + ) + tm.assert_numpy_array_equal(np.array([True]), result) + + def test_no_cast(self): + # GH 22160 + # ensure 42 is not casted to a string + comps = ["ss", 42] + values = ["42"] + expected = np.array([False, False]) + result = algos.isin(comps, values) + tm.assert_numpy_array_equal(expected, result) + + @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) + def test_empty(self, empty): + # see gh-16991 + vals = Index(["a", "b"]) + expected = np.array([False, False]) + + result = algos.isin(vals, empty) + tm.assert_numpy_array_equal(expected, result) + + def test_different_nan_objects(self): + # GH 22119 + comps = np.array(["nan", np.nan * 1j, float("nan")], dtype=object) + vals = np.array([float("nan")], dtype=object) + expected = np.array([False, False, True]) + result = algos.isin(comps, vals) + tm.assert_numpy_array_equal(expected, result) + + def test_different_nans_as_float64(self): + # GH 21866 + # create different nans from bit-patterns, + # these nans will land in different buckets in the hash-table + # if no special care is taken + NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0] + NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0] + assert NAN1 != NAN1 + assert NAN2 != NAN2 + + # check that NAN1 and NAN2 are equivalent: + arr = np.array([NAN1, NAN2], dtype=np.float64) + lookup1 = np.array([NAN1], dtype=np.float64) + result = algos.isin(arr, lookup1) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + + lookup2 = np.array([NAN2], dtype=np.float64) + result = algos.isin(arr, lookup2) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + + def test_isin_int_df_string_search(self): + """Comparing df with int`s (1,2) with a string at isin() ("1") + -> should not match values because int 1 is not equal str 1""" + df = DataFrame({"values": [1, 2]}) + result = df.isin(["1"]) + expected_false = DataFrame({"values": [False, False]}) + tm.assert_frame_equal(result, expected_false) + + def test_isin_nan_df_string_search(self): + """Comparing df with nan value (np.nan,2) with a string at isin() ("NaN") + -> should not match values because np.nan is not equal str NaN""" + df = DataFrame({"values": [np.nan, 2]}) + result = df.isin(["NaN"]) + expected_false = DataFrame({"values": [False, False]}) + tm.assert_frame_equal(result, expected_false) + + def test_isin_float_df_string_search(self): + """Comparing df with floats (1.4245,2.32441) with a string at isin() ("1.4245") + -> should not match values because float 1.4245 is not equal str 1.4245""" + df = DataFrame({"values": [1.4245, 2.32441]}) + result = df.isin(["1.4245"]) + expected_false = DataFrame({"values": [False, False]}) + tm.assert_frame_equal(result, expected_false) + + +class TestValueCounts: + def test_value_counts(self): + np.random.seed(1234) + from pandas.core.reshape.tile import cut + + arr = np.random.randn(4) + factor = cut(arr, 4) + + # assert isinstance(factor, n) + result = algos.value_counts(factor) + breaks = [-1.194, -0.535, 0.121, 0.777, 1.433] + index = IntervalIndex.from_breaks(breaks).astype(CDT(ordered=True)) + expected = Series([1, 1, 1, 1], index=index) + tm.assert_series_equal(result.sort_index(), expected.sort_index()) + + def test_value_counts_bins(self): + s = [1, 2, 3, 4] + result = algos.value_counts(s, bins=1) + expected = Series([4], index=IntervalIndex.from_tuples([(0.996, 4.0)])) + tm.assert_series_equal(result, expected) + + result = algos.value_counts(s, bins=2, sort=False) + expected = Series( + [2, 2], index=IntervalIndex.from_tuples([(0.996, 2.5), (2.5, 4.0)]) + ) + tm.assert_series_equal(result, expected) + + def test_value_counts_dtypes(self): + result = algos.value_counts([1, 1.0]) + assert len(result) == 1 + + result = algos.value_counts([1, 1.0], bins=1) + assert len(result) == 1 + + result = algos.value_counts(Series([1, 1.0, "1"])) # object + assert len(result) == 2 + + msg = "bins argument only works with numeric data" + with pytest.raises(TypeError, match=msg): + algos.value_counts(["1", 1], bins=1) + + def test_value_counts_nat(self): + td = Series([np.timedelta64(10000), NaT], dtype="timedelta64[ns]") + dt = to_datetime(["NaT", "2014-01-01"]) + + for s in [td, dt]: + vc = algos.value_counts(s) + vc_with_na = algos.value_counts(s, dropna=False) + assert len(vc) == 1 + assert len(vc_with_na) == 2 + + exp_dt = Series({Timestamp("2014-01-01 00:00:00"): 1}) + tm.assert_series_equal(algos.value_counts(dt), exp_dt) + # TODO same for (timedelta) + + def test_value_counts_datetime_outofbounds(self): + # GH 13663 + s = Series( + [ + datetime(3000, 1, 1), + datetime(5000, 1, 1), + datetime(5000, 1, 1), + datetime(6000, 1, 1), + datetime(3000, 1, 1), + datetime(3000, 1, 1), + ] + ) + res = s.value_counts() + + exp_index = Index( + [datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)], + dtype=object, + ) + exp = Series([3, 2, 1], index=exp_index) + tm.assert_series_equal(res, exp) + + # GH 12424 + res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore") + exp = Series(["2362-01-01", np.nan], dtype=object) + tm.assert_series_equal(res, exp) + + def test_categorical(self): + s = Series(Categorical(list("aaabbc"))) + result = s.value_counts() + expected = Series([3, 2, 1], index=CategoricalIndex(["a", "b", "c"])) + + tm.assert_series_equal(result, expected, check_index_type=True) + + # preserve order? + s = s.cat.as_ordered() + result = s.value_counts() + expected.index = expected.index.as_ordered() + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_categorical_nans(self): + s = Series(Categorical(list("aaaaabbbcc"))) # 4,3,2,1 (nan) + s.iloc[1] = np.nan + result = s.value_counts() + expected = Series( + [4, 3, 2], + index=CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"]), + ) + tm.assert_series_equal(result, expected, check_index_type=True) + result = s.value_counts(dropna=False) + expected = Series([4, 3, 2, 1], index=CategoricalIndex(["a", "b", "c", np.nan])) + tm.assert_series_equal(result, expected, check_index_type=True) + + # out of order + s = Series( + Categorical(list("aaaaabbbcc"), ordered=True, categories=["b", "a", "c"]) + ) + s.iloc[1] = np.nan + result = s.value_counts() + expected = Series( + [4, 3, 2], + index=CategoricalIndex( + ["a", "b", "c"], categories=["b", "a", "c"], ordered=True + ), + ) + tm.assert_series_equal(result, expected, check_index_type=True) + + result = s.value_counts(dropna=False) + expected = Series( + [4, 3, 2, 1], + index=CategoricalIndex( + ["a", "b", "c", np.nan], categories=["b", "a", "c"], ordered=True + ), + ) + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_categorical_zeroes(self): + # keep the `d` category with 0 + s = Series(Categorical(list("bbbaac"), categories=list("abcd"), ordered=True)) + result = s.value_counts() + expected = Series( + [3, 2, 1, 0], + index=Categorical( + ["b", "a", "c", "d"], categories=list("abcd"), ordered=True + ), + ) + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_dropna(self): + # https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328 + + tm.assert_series_equal( + Series([True, True, False]).value_counts(dropna=True), + Series([2, 1], index=[True, False]), + ) + tm.assert_series_equal( + Series([True, True, False]).value_counts(dropna=False), + Series([2, 1], index=[True, False]), + ) + + tm.assert_series_equal( + Series([True] * 3 + [False] * 2 + [None] * 5).value_counts(dropna=True), + Series([3, 2], index=[True, False]), + ) + tm.assert_series_equal( + Series([True] * 5 + [False] * 3 + [None] * 2).value_counts(dropna=False), + Series([5, 3, 2], index=[True, False, np.nan]), + ) + tm.assert_series_equal( + Series([10.3, 5.0, 5.0]).value_counts(dropna=True), + Series([2, 1], index=[5.0, 10.3]), + ) + tm.assert_series_equal( + Series([10.3, 5.0, 5.0]).value_counts(dropna=False), + Series([2, 1], index=[5.0, 10.3]), + ) + + tm.assert_series_equal( + Series([10.3, 5.0, 5.0, None]).value_counts(dropna=True), + Series([2, 1], index=[5.0, 10.3]), + ) + + result = Series([10.3, 10.3, 5.0, 5.0, 5.0, None]).value_counts(dropna=False) + expected = Series([3, 2, 1], index=[5.0, 10.3, np.nan]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", (np.float64, object, "M8[ns]")) + def test_value_counts_normalized(self, dtype): + # GH12558 + s = Series([1] * 2 + [2] * 3 + [np.nan] * 5) + s_typed = s.astype(dtype) + result = s_typed.value_counts(normalize=True, dropna=False) + expected = Series( + [0.5, 0.3, 0.2], index=Series([np.nan, 2.0, 1.0], dtype=dtype) + ) + tm.assert_series_equal(result, expected) + + result = s_typed.value_counts(normalize=True, dropna=True) + expected = Series([0.6, 0.4], index=Series([2.0, 1.0], dtype=dtype)) + tm.assert_series_equal(result, expected) + + def test_value_counts_uint64(self): + arr = np.array([2 ** 63], dtype=np.uint64) + expected = Series([1], index=[2 ** 63]) + result = algos.value_counts(arr) + + tm.assert_series_equal(result, expected) + + arr = np.array([-1, 2 ** 63], dtype=object) + expected = Series([1, 1], index=[-1, 2 ** 63]) + result = algos.value_counts(arr) + + tm.assert_series_equal(result, expected) + + +class TestDuplicated: + def test_duplicated_with_nas(self): + keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object) + + result = algos.duplicated(keys) + expected = np.array([False, False, False, True, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.duplicated(keys, keep="first") + expected = np.array([False, False, False, True, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.duplicated(keys, keep="last") + expected = np.array([True, False, True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.duplicated(keys, keep=False) + expected = np.array([True, False, True, True, False, True]) + tm.assert_numpy_array_equal(result, expected) + + keys = np.empty(8, dtype=object) + for i, t in enumerate( + zip([0, 0, np.nan, np.nan] * 2, [0, np.nan, 0, np.nan] * 2) + ): + keys[i] = t + + result = algos.duplicated(keys) + falses = [False] * 4 + trues = [True] * 4 + expected = np.array(falses + trues) + tm.assert_numpy_array_equal(result, expected) + + result = algos.duplicated(keys, keep="last") + expected = np.array(trues + falses) + tm.assert_numpy_array_equal(result, expected) + + result = algos.duplicated(keys, keep=False) + expected = np.array(trues + trues) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "case", + [ + np.array([1, 2, 1, 5, 3, 2, 4, 1, 5, 6]), + np.array([1.1, 2.2, 1.1, np.nan, 3.3, 2.2, 4.4, 1.1, np.nan, 6.6]), + np.array( + [ + 1 + 1j, + 2 + 2j, + 1 + 1j, + 5 + 5j, + 3 + 3j, + 2 + 2j, + 4 + 4j, + 1 + 1j, + 5 + 5j, + 6 + 6j, + ] + ), + np.array(["a", "b", "a", "e", "c", "b", "d", "a", "e", "f"], dtype=object), + np.array( + [1, 2 ** 63, 1, 3 ** 5, 10, 2 ** 63, 39, 1, 3 ** 5, 7], dtype=np.uint64 + ), + ], + ) + def test_numeric_object_likes(self, case): + exp_first = np.array( + [False, False, True, False, False, True, False, True, True, False] + ) + exp_last = np.array( + [True, True, True, True, False, False, False, False, False, False] + ) + exp_false = exp_first | exp_last + + res_first = algos.duplicated(case, keep="first") + tm.assert_numpy_array_equal(res_first, exp_first) + + res_last = algos.duplicated(case, keep="last") + tm.assert_numpy_array_equal(res_last, exp_last) + + res_false = algos.duplicated(case, keep=False) + tm.assert_numpy_array_equal(res_false, exp_false) + + # index + for idx in [Index(case), Index(case, dtype="category")]: + res_first = idx.duplicated(keep="first") + tm.assert_numpy_array_equal(res_first, exp_first) + + res_last = idx.duplicated(keep="last") + tm.assert_numpy_array_equal(res_last, exp_last) + + res_false = idx.duplicated(keep=False) + tm.assert_numpy_array_equal(res_false, exp_false) + + # series + for s in [Series(case), Series(case, dtype="category")]: + res_first = s.duplicated(keep="first") + tm.assert_series_equal(res_first, Series(exp_first)) + + res_last = s.duplicated(keep="last") + tm.assert_series_equal(res_last, Series(exp_last)) + + res_false = s.duplicated(keep=False) + tm.assert_series_equal(res_false, Series(exp_false)) + + def test_datetime_likes(self): + + dt = [ + "2011-01-01", + "2011-01-02", + "2011-01-01", + "NaT", + "2011-01-03", + "2011-01-02", + "2011-01-04", + "2011-01-01", + "NaT", + "2011-01-06", + ] + td = [ + "1 days", + "2 days", + "1 days", + "NaT", + "3 days", + "2 days", + "4 days", + "1 days", + "NaT", + "6 days", + ] + + cases = [ + np.array([Timestamp(d) for d in dt]), + np.array([Timestamp(d, tz="US/Eastern") for d in dt]), + np.array([Period(d, freq="D") for d in dt]), + np.array([np.datetime64(d) for d in dt]), + np.array([Timedelta(d) for d in td]), + ] + + exp_first = np.array( + [False, False, True, False, False, True, False, True, True, False] + ) + exp_last = np.array( + [True, True, True, True, False, False, False, False, False, False] + ) + exp_false = exp_first | exp_last + + for case in cases: + res_first = algos.duplicated(case, keep="first") + tm.assert_numpy_array_equal(res_first, exp_first) + + res_last = algos.duplicated(case, keep="last") + tm.assert_numpy_array_equal(res_last, exp_last) + + res_false = algos.duplicated(case, keep=False) + tm.assert_numpy_array_equal(res_false, exp_false) + + # index + for idx in [ + Index(case), + Index(case, dtype="category"), + Index(case, dtype=object), + ]: + res_first = idx.duplicated(keep="first") + tm.assert_numpy_array_equal(res_first, exp_first) + + res_last = idx.duplicated(keep="last") + tm.assert_numpy_array_equal(res_last, exp_last) + + res_false = idx.duplicated(keep=False) + tm.assert_numpy_array_equal(res_false, exp_false) + + # series + for s in [ + Series(case), + Series(case, dtype="category"), + Series(case, dtype=object), + ]: + res_first = s.duplicated(keep="first") + tm.assert_series_equal(res_first, Series(exp_first)) + + res_last = s.duplicated(keep="last") + tm.assert_series_equal(res_last, Series(exp_last)) + + res_false = s.duplicated(keep=False) + tm.assert_series_equal(res_false, Series(exp_false)) + + @pytest.mark.parametrize("case", [Index([1, 2, 3]), pd.RangeIndex(0, 3)]) + def test_unique_index(self, case): + assert case.is_unique is True + tm.assert_numpy_array_equal(case.duplicated(), np.array([False, False, False])) + + @pytest.mark.parametrize( + "arr, uniques", + [ + ( + [(0, 0), (0, 1), (1, 0), (1, 1), (0, 0), (0, 1), (1, 0), (1, 1)], + [(0, 0), (0, 1), (1, 0), (1, 1)], + ), + ( + [("b", "c"), ("a", "b"), ("a", "b"), ("b", "c")], + [("b", "c"), ("a", "b")], + ), + ([("a", 1), ("b", 2), ("a", 3), ("a", 1)], [("a", 1), ("b", 2), ("a", 3)]), + ], + ) + def test_unique_tuples(self, arr, uniques): + # https://github.com/pandas-dev/pandas/issues/16519 + expected = np.empty(len(uniques), dtype=object) + expected[:] = uniques + + result = pd.unique(arr) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "array,expected", + [ + ( + [1 + 1j, 0, 1, 1j, 1 + 2j, 1 + 2j], + # Should return a complex dtype in the future + np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)], dtype=object), + ) + ], + ) + def test_unique_complex_numbers(self, array, expected): + # GH 17927 + result = pd.unique(array) + tm.assert_numpy_array_equal(result, expected) + + +class TestHashTable: + def test_string_hashtable_set_item_signature(self): + # GH#30419 fix typing in StringHashTable.set_item to prevent segfault + tbl = ht.StringHashTable() + + tbl.set_item("key", 1) + assert tbl.get_item("key") == 1 + + with pytest.raises(TypeError, match="'key' has incorrect type"): + # key arg typed as string, not object + tbl.set_item(4, 6) + with pytest.raises(TypeError, match="'val' has incorrect type"): + tbl.get_item(4) + + def test_lookup_nan(self, writable): + xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3]) + # GH 21688 ensure we can deal with readonly memory views + xs.setflags(write=writable) + m = ht.Float64HashTable() + m.map_locations(xs) + tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.intp)) + + def test_add_signed_zeros(self): + # GH 21866 inconsistent hash-function for float64 + # default hash-function would lead to different hash-buckets + # for 0.0 and -0.0 if there are more than 2^30 hash-buckets + # but this would mean 16GB + N = 4 # 12 * 10**8 would trigger the error, if you have enough memory + m = ht.Float64HashTable(N) + m.set_item(0.0, 0) + m.set_item(-0.0, 0) + assert len(m) == 1 # 0.0 and -0.0 are equivalent + + def test_add_different_nans(self): + # GH 21866 inconsistent hash-function for float64 + # create different nans from bit-patterns: + NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0] + NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0] + assert NAN1 != NAN1 + assert NAN2 != NAN2 + # default hash function would lead to different hash-buckets + # for NAN1 and NAN2 even if there are only 4 buckets: + m = ht.Float64HashTable() + m.set_item(NAN1, 0) + m.set_item(NAN2, 0) + assert len(m) == 1 # NAN1 and NAN2 are equivalent + + def test_lookup_overflow(self, writable): + xs = np.array([1, 2, 2 ** 63], dtype=np.uint64) + # GH 21688 ensure we can deal with readonly memory views + xs.setflags(write=writable) + m = ht.UInt64HashTable() + m.map_locations(xs) + tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.intp)) + + def test_get_unique(self): + s = Series([1, 2, 2 ** 63, 2 ** 63], dtype=np.uint64) + exp = np.array([1, 2, 2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(s.unique(), exp) + + @pytest.mark.parametrize("nvals", [0, 10]) # resizing to 0 is special case + @pytest.mark.parametrize( + "htable, uniques, dtype, safely_resizes", + [ + (ht.PyObjectHashTable, ht.ObjectVector, "object", False), + (ht.StringHashTable, ht.ObjectVector, "object", True), + (ht.Float64HashTable, ht.Float64Vector, "float64", False), + (ht.Int64HashTable, ht.Int64Vector, "int64", False), + (ht.Int32HashTable, ht.Int32Vector, "int32", False), + (ht.UInt64HashTable, ht.UInt64Vector, "uint64", False), + ], + ) + def test_vector_resize( + self, writable, htable, uniques, dtype, safely_resizes, nvals + ): + # Test for memory errors after internal vector + # reallocations (GH 7157) + vals = np.array(np.random.randn(1000), dtype=dtype) + + # GH 21688 ensures we can deal with read-only memory views + vals.setflags(write=writable) + + # initialise instances; cannot initialise in parametrization, + # as otherwise external views would be held on the array (which is + # one of the things this test is checking) + htable = htable() + uniques = uniques() + + # get_labels may append to uniques + htable.get_labels(vals[:nvals], uniques, 0, -1) + # to_array() sets an external_view_exists flag on uniques. + tmp = uniques.to_array() + oldshape = tmp.shape + + # subsequent get_labels() calls can no longer append to it + # (except for StringHashTables + ObjectVector) + if safely_resizes: + htable.get_labels(vals, uniques, 0, -1) + else: + with pytest.raises(ValueError, match="external reference.*"): + htable.get_labels(vals, uniques, 0, -1) + + uniques.to_array() # should not raise here + assert tmp.shape == oldshape + + @pytest.mark.parametrize( + "htable, tm_dtype", + [ + (ht.PyObjectHashTable, "String"), + (ht.StringHashTable, "String"), + (ht.Float64HashTable, "Float"), + (ht.Int64HashTable, "Int"), + (ht.UInt64HashTable, "UInt"), + ], + ) + def test_hashtable_unique(self, htable, tm_dtype, writable): + # output of maker has guaranteed unique elements + maker = getattr(tm, "make" + tm_dtype + "Index") + s = Series(maker(1000)) + if htable == ht.Float64HashTable: + # add NaN for float column + s.loc[500] = np.nan + elif htable == ht.PyObjectHashTable: + # use different NaN types for object column + s.loc[500:502] = [np.nan, None, NaT] + + # create duplicated selection + s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True) + s_duplicated.values.setflags(write=writable) + + # drop_duplicates has own cython code (hash_table_func_helper.pxi) + # and is tested separately; keeps first occurrence like ht.unique() + expected_unique = s_duplicated.drop_duplicates(keep="first").values + result_unique = htable().unique(s_duplicated.values) + tm.assert_numpy_array_equal(result_unique, expected_unique) + + # test return_inverse=True + # reconstruction can only succeed if the inverse is correct + result_unique, result_inverse = htable().unique( + s_duplicated.values, return_inverse=True + ) + tm.assert_numpy_array_equal(result_unique, expected_unique) + reconstr = result_unique[result_inverse] + tm.assert_numpy_array_equal(reconstr, s_duplicated.values) + + @pytest.mark.parametrize( + "htable, tm_dtype", + [ + (ht.PyObjectHashTable, "String"), + (ht.StringHashTable, "String"), + (ht.Float64HashTable, "Float"), + (ht.Int64HashTable, "Int"), + (ht.UInt64HashTable, "UInt"), + ], + ) + def test_hashtable_factorize(self, htable, tm_dtype, writable): + # output of maker has guaranteed unique elements + maker = getattr(tm, "make" + tm_dtype + "Index") + s = Series(maker(1000)) + if htable == ht.Float64HashTable: + # add NaN for float column + s.loc[500] = np.nan + elif htable == ht.PyObjectHashTable: + # use different NaN types for object column + s.loc[500:502] = [np.nan, None, NaT] + + # create duplicated selection + s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True) + s_duplicated.values.setflags(write=writable) + na_mask = s_duplicated.isna().values + + result_unique, result_inverse = htable().factorize(s_duplicated.values) + + # drop_duplicates has own cython code (hash_table_func_helper.pxi) + # and is tested separately; keeps first occurrence like ht.factorize() + # since factorize removes all NaNs, we do the same here + expected_unique = s_duplicated.dropna().drop_duplicates().values + tm.assert_numpy_array_equal(result_unique, expected_unique) + + # reconstruction can only succeed if the inverse is correct. Since + # factorize removes the NaNs, those have to be excluded here as well + result_reconstruct = result_unique[result_inverse[~na_mask]] + expected_reconstruct = s_duplicated.dropna().values + tm.assert_numpy_array_equal(result_reconstruct, expected_reconstruct) + + @pytest.mark.parametrize( + "hashtable", + [ + ht.PyObjectHashTable, + ht.StringHashTable, + ht.Float64HashTable, + ht.Int64HashTable, + ht.Int32HashTable, + ht.UInt64HashTable, + ], + ) + def test_hashtable_large_sizehint(self, hashtable): + # GH#22729 smoketest for not raising when passing a large size_hint + size_hint = np.iinfo(np.uint32).max + 1 + hashtable(size_hint=size_hint) + + +def test_unique_label_indices(): + + a = np.random.randint(1, 1 << 10, 1 << 15).astype(np.intp) + + left = ht.unique_label_indices(a) + right = np.unique(a, return_index=True)[1] + + tm.assert_numpy_array_equal(left, right, check_dtype=False) + + a[np.random.choice(len(a), 10)] = -1 + left = ht.unique_label_indices(a) + right = np.unique(a, return_index=True)[1][1:] + tm.assert_numpy_array_equal(left, right, check_dtype=False) + + +class TestRank: + @td.skip_if_no_scipy + @pytest.mark.parametrize( + "arr", + [ + [np.nan, np.nan, 5.0, 5.0, 5.0, np.nan, 1, 2, 3, np.nan], + [4.0, np.nan, 5.0, 5.0, 5.0, np.nan, 1, 2, 4.0, np.nan], + ], + ) + def test_scipy_compat(self, arr): + from scipy.stats import rankdata + + arr = np.array(arr) + + mask = ~np.isfinite(arr) + arr = arr.copy() + result = libalgos.rank_1d(arr) + arr[mask] = np.inf + exp = rankdata(arr) + exp[mask] = np.nan + tm.assert_almost_equal(result, exp) + + @pytest.mark.parametrize("dtype", np.typecodes["AllInteger"]) + def test_basic(self, writable, dtype): + exp = np.array([1, 2], dtype=np.float64) + + data = np.array([1, 100], dtype=dtype) + data.setflags(write=writable) + ser = Series(data) + result = algos.rank(ser) + tm.assert_numpy_array_equal(result, exp) + + @pytest.mark.parametrize("dtype", [np.float64, np.uint64]) + def test_uint64_overflow(self, dtype): + exp = np.array([1, 2], dtype=np.float64) + + s = Series([1, 2 ** 63], dtype=dtype) + tm.assert_numpy_array_equal(algos.rank(s), exp) + + def test_too_many_ndims(self): + arr = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]) + msg = "Array with ndim > 2 are not supported" + + with pytest.raises(TypeError, match=msg): + algos.rank(arr) + + @pytest.mark.single + @pytest.mark.high_memory + def test_pct_max_many_rows(self): + # GH 18271 + values = np.arange(2 ** 24 + 1) + result = algos.rank(values, pct=True).max() + assert result == 1 + + values = np.arange(2 ** 25 + 2).reshape(2 ** 24 + 1, 2) + result = algos.rank(values, pct=True).max() + assert result == 1 + + +def test_pad_backfill_object_segfault(): + + old = np.array([], dtype="O") + new = np.array([datetime(2010, 12, 31)], dtype="O") + + result = libalgos.pad["object"](old, new) + expected = np.array([-1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + result = libalgos.pad["object"](new, old) + expected = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + result = libalgos.backfill["object"](old, new) + expected = np.array([-1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + result = libalgos.backfill["object"](new, old) + expected = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + +class TestTseriesUtil: + def test_backfill(self): + old = Index([1, 5, 10]) + new = Index(list(range(12))) + + filler = libalgos.backfill["int64_t"](old.values, new.values) + + expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.intp) + tm.assert_numpy_array_equal(filler, expect_filler) + + # corner case + old = Index([1, 4]) + new = Index(list(range(5, 10))) + filler = libalgos.backfill["int64_t"](old.values, new.values) + + expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(filler, expect_filler) + + def test_pad(self): + old = Index([1, 5, 10]) + new = Index(list(range(12))) + + filler = libalgos.pad["int64_t"](old.values, new.values) + + expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.intp) + tm.assert_numpy_array_equal(filler, expect_filler) + + # corner case + old = Index([5, 10]) + new = Index(np.arange(5)) + filler = libalgos.pad["int64_t"](old.values, new.values) + expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(filler, expect_filler) + + +def test_is_lexsorted(): + failure = [ + np.array( + [ + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + dtype="int64", + ), + np.array( + [ + 30, + 29, + 28, + 27, + 26, + 25, + 24, + 23, + 22, + 21, + 20, + 19, + 18, + 17, + 16, + 15, + 14, + 13, + 12, + 11, + 10, + 9, + 8, + 7, + 6, + 5, + 4, + 3, + 2, + 1, + 0, + 30, + 29, + 28, + 27, + 26, + 25, + 24, + 23, + 22, + 21, + 20, + 19, + 18, + 17, + 16, + 15, + 14, + 13, + 12, + 11, + 10, + 9, + 8, + 7, + 6, + 5, + 4, + 3, + 2, + 1, + 0, + 30, + 29, + 28, + 27, + 26, + 25, + 24, + 23, + 22, + 21, + 20, + 19, + 18, + 17, + 16, + 15, + 14, + 13, + 12, + 11, + 10, + 9, + 8, + 7, + 6, + 5, + 4, + 3, + 2, + 1, + 0, + 30, + 29, + 28, + 27, + 26, + 25, + 24, + 23, + 22, + 21, + 20, + 19, + 18, + 17, + 16, + 15, + 14, + 13, + 12, + 11, + 10, + 9, + 8, + 7, + 6, + 5, + 4, + 3, + 2, + 1, + 0, + ], + dtype="int64", + ), + ] + + assert not libalgos.is_lexsorted(failure) + + +def test_groupsort_indexer(): + a = np.random.randint(0, 1000, 100).astype(np.intp) + b = np.random.randint(0, 1000, 100).astype(np.intp) + + result = libalgos.groupsort_indexer(a, 1000)[0] + + # need to use a stable sort + # np.argsort returns int, groupsort_indexer + # always returns intp + expected = np.argsort(a, kind="mergesort") + expected = expected.astype(np.intp) + + tm.assert_numpy_array_equal(result, expected) + + # compare with lexsort + # np.lexsort returns int, groupsort_indexer + # always returns intp + key = a * 1000 + b + result = libalgos.groupsort_indexer(key, 1000000)[0] + expected = np.lexsort((b, a)) + expected = expected.astype(np.intp) + + tm.assert_numpy_array_equal(result, expected) + + +def test_infinity_sort(): + # GH 13445 + # numpy's argsort can be unhappy if something is less than + # itself. Instead, let's give our infinities a self-consistent + # ordering, but outside the float extended real line. + + Inf = libalgos.Infinity() + NegInf = libalgos.NegInfinity() + + ref_nums = [NegInf, float("-inf"), -1e100, 0, 1e100, float("inf"), Inf] + + assert all(Inf >= x for x in ref_nums) + assert all(Inf > x or x is Inf for x in ref_nums) + assert Inf >= Inf and Inf == Inf + assert not Inf < Inf and not Inf > Inf + assert libalgos.Infinity() == libalgos.Infinity() + assert not libalgos.Infinity() != libalgos.Infinity() + + assert all(NegInf <= x for x in ref_nums) + assert all(NegInf < x or x is NegInf for x in ref_nums) + assert NegInf <= NegInf and NegInf == NegInf + assert not NegInf < NegInf and not NegInf > NegInf + assert libalgos.NegInfinity() == libalgos.NegInfinity() + assert not libalgos.NegInfinity() != libalgos.NegInfinity() + + for perm in permutations(ref_nums): + assert sorted(perm) == ref_nums + + # smoke tests + np.array([libalgos.Infinity()] * 32).argsort() + np.array([libalgos.NegInfinity()] * 32).argsort() + + +def test_infinity_against_nan(): + Inf = libalgos.Infinity() + NegInf = libalgos.NegInfinity() + + assert not Inf > np.nan + assert not Inf >= np.nan + assert not Inf < np.nan + assert not Inf <= np.nan + assert not Inf == np.nan + assert Inf != np.nan + + assert not NegInf > np.nan + assert not NegInf >= np.nan + assert not NegInf < np.nan + assert not NegInf <= np.nan + assert not NegInf == np.nan + assert NegInf != np.nan + + +def test_ensure_platform_int(): + arr = np.arange(100, dtype=np.intp) + + result = libalgos.ensure_platform_int(arr) + assert result is arr + + +def test_int64_add_overflow(): + # see gh-14068 + msg = "Overflow in int64 addition" + m = np.iinfo(np.int64).max + n = np.iinfo(np.int64).min + + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr(np.array([m, m]), m) + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr(np.array([m, m]), np.array([m, m])) + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr(np.array([n, n]), n) + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr(np.array([n, n]), np.array([n, n])) + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr(np.array([m, n]), np.array([n, n])) + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr( + np.array([m, m]), np.array([m, m]), arr_mask=np.array([False, True]) + ) + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr( + np.array([m, m]), np.array([m, m]), b_mask=np.array([False, True]) + ) + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr( + np.array([m, m]), + np.array([m, m]), + arr_mask=np.array([False, True]), + b_mask=np.array([False, True]), + ) + with pytest.raises(OverflowError, match=msg): + with tm.assert_produces_warning(RuntimeWarning): + algos.checked_add_with_arr(np.array([m, m]), np.array([np.nan, m])) + + # Check that the nan boolean arrays override whether or not + # the addition overflows. We don't check the result but just + # the fact that an OverflowError is not raised. + algos.checked_add_with_arr( + np.array([m, m]), np.array([m, m]), arr_mask=np.array([True, True]) + ) + algos.checked_add_with_arr( + np.array([m, m]), np.array([m, m]), b_mask=np.array([True, True]) + ) + algos.checked_add_with_arr( + np.array([m, m]), + np.array([m, m]), + arr_mask=np.array([True, False]), + b_mask=np.array([False, True]), + ) + + +class TestMode: + def test_no_mode(self): + exp = Series([], dtype=np.float64, index=Index([], dtype=int)) + tm.assert_numpy_array_equal(algos.mode([]), exp.values) + + @pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"]) + def test_mode_single(self, dt): + # GH 15714 + exp_single = [1] + data_single = [1] + + exp_multi = [1] + data_multi = [1, 1] + + ser = Series(data_single, dtype=dt) + exp = Series(exp_single, dtype=dt) + tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + tm.assert_series_equal(ser.mode(), exp) + + ser = Series(data_multi, dtype=dt) + exp = Series(exp_multi, dtype=dt) + tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + tm.assert_series_equal(ser.mode(), exp) + + def test_mode_obj_int(self): + exp = Series([1], dtype=int) + tm.assert_numpy_array_equal(algos.mode([1]), exp.values) + + exp = Series(["a", "b", "c"], dtype=object) + tm.assert_numpy_array_equal(algos.mode(["a", "b", "c"]), exp.values) + + @pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"]) + def test_number_mode(self, dt): + exp_single = [1] + data_single = [1] * 5 + [2] * 3 + + exp_multi = [1, 3] + data_multi = [1] * 5 + [2] * 3 + [3] * 5 + + ser = Series(data_single, dtype=dt) + exp = Series(exp_single, dtype=dt) + tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + tm.assert_series_equal(ser.mode(), exp) + + ser = Series(data_multi, dtype=dt) + exp = Series(exp_multi, dtype=dt) + tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + tm.assert_series_equal(ser.mode(), exp) + + def test_strobj_mode(self): + exp = ["b"] + data = ["a"] * 2 + ["b"] * 3 + + ser = Series(data, dtype="c") + exp = Series(exp, dtype="c") + tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + tm.assert_series_equal(ser.mode(), exp) + + @pytest.mark.parametrize("dt", [str, object]) + def test_strobj_multi_char(self, dt): + exp = ["bar"] + data = ["foo"] * 2 + ["bar"] * 3 + + ser = Series(data, dtype=dt) + exp = Series(exp, dtype=dt) + tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + tm.assert_series_equal(ser.mode(), exp) + + def test_datelike_mode(self): + exp = Series(["1900-05-03", "2011-01-03", "2013-01-02"], dtype="M8[ns]") + ser = Series(["2011-01-03", "2013-01-02", "1900-05-03"], dtype="M8[ns]") + tm.assert_extension_array_equal(algos.mode(ser.values), exp._values) + tm.assert_series_equal(ser.mode(), exp) + + exp = Series(["2011-01-03", "2013-01-02"], dtype="M8[ns]") + ser = Series( + ["2011-01-03", "2013-01-02", "1900-05-03", "2011-01-03", "2013-01-02"], + dtype="M8[ns]", + ) + tm.assert_extension_array_equal(algos.mode(ser.values), exp._values) + tm.assert_series_equal(ser.mode(), exp) + + def test_timedelta_mode(self): + exp = Series(["-1 days", "0 days", "1 days"], dtype="timedelta64[ns]") + ser = Series(["1 days", "-1 days", "0 days"], dtype="timedelta64[ns]") + tm.assert_extension_array_equal(algos.mode(ser.values), exp._values) + tm.assert_series_equal(ser.mode(), exp) + + exp = Series(["2 min", "1 day"], dtype="timedelta64[ns]") + ser = Series( + ["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"], + dtype="timedelta64[ns]", + ) + tm.assert_extension_array_equal(algos.mode(ser.values), exp._values) + tm.assert_series_equal(ser.mode(), exp) + + def test_mixed_dtype(self): + exp = Series(["foo"]) + ser = Series([1, "foo", "foo"]) + tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + tm.assert_series_equal(ser.mode(), exp) + + def test_uint64_overflow(self): + exp = Series([2 ** 63], dtype=np.uint64) + ser = Series([1, 2 ** 63, 2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + tm.assert_series_equal(ser.mode(), exp) + + exp = Series([1, 2 ** 63], dtype=np.uint64) + ser = Series([1, 2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + tm.assert_series_equal(ser.mode(), exp) + + def test_categorical(self): + c = Categorical([1, 2]) + exp = c + msg = "Categorical.mode is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = c.mode() + tm.assert_categorical_equal(res, exp) + + c = Categorical([1, "a", "a"]) + exp = Categorical(["a"], categories=[1, "a"]) + with tm.assert_produces_warning(FutureWarning, match=msg): + res = c.mode() + tm.assert_categorical_equal(res, exp) + + c = Categorical([1, 1, 2, 3, 3]) + exp = Categorical([1, 3], categories=[1, 2, 3]) + with tm.assert_produces_warning(FutureWarning, match=msg): + res = c.mode() + tm.assert_categorical_equal(res, exp) + + def test_index(self): + idx = Index([1, 2, 3]) + exp = Series([1, 2, 3], dtype=np.int64) + tm.assert_numpy_array_equal(algos.mode(idx), exp.values) + + idx = Index([1, "a", "a"]) + exp = Series(["a"], dtype=object) + tm.assert_numpy_array_equal(algos.mode(idx), exp.values) + + idx = Index([1, 1, 2, 3, 3]) + exp = Series([1, 3], dtype=np.int64) + tm.assert_numpy_array_equal(algos.mode(idx), exp.values) + + idx = Index( + ["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"], + dtype="timedelta64[ns]", + ) + with pytest.raises(AttributeError, match="TimedeltaIndex"): + # algos.mode expects Arraylike, does *not* unwrap TimedeltaIndex + algos.mode(idx) + + +class TestDiff: + @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) + def test_diff_datetimelike_nat(self, dtype): + # NaT - NaT is NaT, not 0 + arr = np.arange(12).astype(np.int64).view(dtype).reshape(3, 4) + arr[:, 2] = arr.dtype.type("NaT", "ns") + result = algos.diff(arr, 1, axis=0) + + expected = np.ones(arr.shape, dtype="timedelta64[ns]") * 4 + expected[:, 2] = np.timedelta64("NaT", "ns") + expected[0, :] = np.timedelta64("NaT", "ns") + + tm.assert_numpy_array_equal(result, expected) + + result = algos.diff(arr.T, 1, axis=1) + tm.assert_numpy_array_equal(result, expected.T) + + def test_diff_ea_axis(self): + dta = date_range("2016-01-01", periods=3, tz="US/Pacific")._data + + msg = "cannot diff DatetimeArray on axis=1" + with pytest.raises(ValueError, match=msg): + algos.diff(dta, 1, axis=1) + + @pytest.mark.parametrize("dtype", ["int8", "int16"]) + def test_diff_low_precision_int(self, dtype): + arr = np.array([0, 1, 1, 0, 0], dtype=dtype) + result = algos.diff(arr, 1) + expected = np.array([np.nan, 1, 0, -1, 0], dtype="float32") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("op", [np.array, pd.array]) +def test_union_with_duplicates(op): + # GH#36289 + lvals = op([3, 1, 3, 4]) + rvals = op([2, 3, 1, 1]) + expected = op([3, 3, 1, 1, 4, 2]) + if isinstance(expected, np.ndarray): + result = algos.union_with_duplicates(lvals, rvals) + tm.assert_numpy_array_equal(result, expected) + else: + result = algos.union_with_duplicates(lvals, rvals) + tm.assert_extension_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/test_common.py b/.local/lib/python3.8/site-packages/pandas/tests/test_common.py new file mode 100644 index 0000000..0850ba6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/test_common.py @@ -0,0 +1,215 @@ +import collections +from functools import partial +import string + +import numpy as np +import pytest + +import pandas as pd +from pandas import Series +import pandas._testing as tm +from pandas.core import ops +import pandas.core.common as com +from pandas.util.version import Version + + +def test_get_callable_name(): + getname = com.get_callable_name + + def fn(x): + return x + + lambda_ = lambda x: x + part1 = partial(fn) + part2 = partial(part1) + + class somecall: + def __call__(self): + # This shouldn't actually get called below; somecall.__init__ + # should. + raise NotImplementedError + + assert getname(fn) == "fn" + assert getname(lambda_) + assert getname(part1) == "fn" + assert getname(part2) == "fn" + assert getname(somecall()) == "somecall" + assert getname(1) is None + + +def test_any_none(): + assert com.any_none(1, 2, 3, None) + assert not com.any_none(1, 2, 3, 4) + + +def test_all_not_none(): + assert com.all_not_none(1, 2, 3, 4) + assert not com.all_not_none(1, 2, 3, None) + assert not com.all_not_none(None, None, None, None) + + +def test_random_state(): + import numpy.random as npr + + # Check with seed + state = com.random_state(5) + assert state.uniform() == npr.RandomState(5).uniform() + + # Check with random state object + state2 = npr.RandomState(10) + assert com.random_state(state2).uniform() == npr.RandomState(10).uniform() + + # check with no arg random state + assert com.random_state() is np.random + + # check array-like + # GH32503 + state_arr_like = npr.randint(0, 2 ** 31, size=624, dtype="uint32") + assert ( + com.random_state(state_arr_like).uniform() + == npr.RandomState(state_arr_like).uniform() + ) + + # Check BitGenerators + # GH32503 + assert ( + com.random_state(npr.MT19937(3)).uniform() + == npr.RandomState(npr.MT19937(3)).uniform() + ) + assert ( + com.random_state(npr.PCG64(11)).uniform() + == npr.RandomState(npr.PCG64(11)).uniform() + ) + + # Error for floats or strings + msg = ( + "random_state must be an integer, array-like, a BitGenerator, Generator, " + "a numpy RandomState, or None" + ) + with pytest.raises(ValueError, match=msg): + com.random_state("test") + + with pytest.raises(ValueError, match=msg): + com.random_state(5.5) + + +@pytest.mark.parametrize( + "left, right, expected", + [ + (Series([1], name="x"), Series([2], name="x"), "x"), + (Series([1], name="x"), Series([2], name="y"), None), + (Series([1]), Series([2], name="x"), None), + (Series([1], name="x"), Series([2]), None), + (Series([1], name="x"), [2], "x"), + ([1], Series([2], name="y"), "y"), + # matching NAs + (Series([1], name=np.nan), pd.Index([], name=np.nan), np.nan), + (Series([1], name=np.nan), pd.Index([], name=pd.NaT), None), + (Series([1], name=pd.NA), pd.Index([], name=pd.NA), pd.NA), + # tuple name GH#39757 + ( + Series([1], name=np.int64(1)), + pd.Index([], name=(np.int64(1), np.int64(2))), + None, + ), + ( + Series([1], name=(np.int64(1), np.int64(2))), + pd.Index([], name=(np.int64(1), np.int64(2))), + (np.int64(1), np.int64(2)), + ), + pytest.param( + Series([1], name=(np.float64("nan"), np.int64(2))), + pd.Index([], name=(np.float64("nan"), np.int64(2))), + (np.float64("nan"), np.int64(2)), + marks=pytest.mark.xfail( + reason="Not checking for matching NAs inside tuples." + ), + ), + ], +) +def test_maybe_match_name(left, right, expected): + res = ops.common._maybe_match_name(left, right) + assert res is expected or res == expected + + +def test_standardize_mapping(): + # No uninitialized defaultdicts + msg = r"to_dict\(\) only accepts initialized defaultdicts" + with pytest.raises(TypeError, match=msg): + com.standardize_mapping(collections.defaultdict) + + # No non-mapping subtypes, instance + msg = "unsupported type: " + with pytest.raises(TypeError, match=msg): + com.standardize_mapping([]) + + # No non-mapping subtypes, class + with pytest.raises(TypeError, match=msg): + com.standardize_mapping(list) + + fill = {"bad": "data"} + assert com.standardize_mapping(fill) == dict + + # Convert instance to type + assert com.standardize_mapping({}) == dict + + dd = collections.defaultdict(list) + assert isinstance(com.standardize_mapping(dd), partial) + + +def test_git_version(): + # GH 21295 + git_version = pd.__git_version__ + assert len(git_version) == 40 + assert all(c in string.hexdigits for c in git_version) + + +def test_version_tag(): + version = Version(pd.__version__) + try: + version > Version("0.0.1") + except TypeError: + raise ValueError( + "No git tags exist, please sync tags between upstream and your repo" + ) + + +@pytest.mark.parametrize( + "obj", [(obj,) for obj in pd.__dict__.values() if callable(obj)] +) +def test_serializable(obj): + # GH 35611 + unpickled = tm.round_trip_pickle(obj) + assert type(obj) == type(unpickled) + + +class TestIsBoolIndexer: + def test_non_bool_array_with_na(self): + # in particular, this should not raise + arr = np.array(["A", "B", np.nan], dtype=object) + assert not com.is_bool_indexer(arr) + + def test_list_subclass(self): + # GH#42433 + + class MyList(list): + pass + + val = MyList(["a"]) + + assert not com.is_bool_indexer(val) + + val = MyList([True]) + assert com.is_bool_indexer(val) + + def test_frozenlist(self): + # GH#42461 + data = {"col1": [1, 2], "col2": [3, 4]} + df = pd.DataFrame(data=data) + + frozen = df.index.names[1:] + assert not com.is_bool_indexer(frozen) + + result = df[frozen] + expected = df[[]] + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/test_downstream.py b/.local/lib/python3.8/site-packages/pandas/tests/test_downstream.py new file mode 100644 index 0000000..49ec5c1 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/test_downstream.py @@ -0,0 +1,270 @@ +""" +Testing that we work in the downstream packages +""" +import importlib +import subprocess +import sys + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm + + +def import_module(name): + # we *only* want to skip if the module is truly not available + # and NOT just an actual import error because of pandas changes + + try: + return importlib.import_module(name) + except ModuleNotFoundError: + pytest.skip(f"skipping as {name} not available") + + +@pytest.fixture +def df(): + return DataFrame({"A": [1, 2, 3]}) + + +@pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning") +def test_dask(df): + + # dask sets "compute.use_numexpr" to False, so catch the current value + # and ensure to reset it afterwards to avoid impacting other tests + olduse = pd.get_option("compute.use_numexpr") + + try: + toolz = import_module("toolz") # noqa:F841 + dask = import_module("dask") # noqa:F841 + + import dask.dataframe as dd + + ddf = dd.from_pandas(df, npartitions=3) + assert ddf.A is not None + assert ddf.compute() is not None + finally: + pd.set_option("compute.use_numexpr", olduse) + + +@pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning") +def test_dask_ufunc(): + # At the time of dask 2022.01.0, dask is still directly using __array_wrap__ + # for some ufuncs (https://github.com/dask/dask/issues/8580). + + # dask sets "compute.use_numexpr" to False, so catch the current value + # and ensure to reset it afterwards to avoid impacting other tests + olduse = pd.get_option("compute.use_numexpr") + + try: + dask = import_module("dask") # noqa:F841 + import dask.array as da + import dask.dataframe as dd + + s = pd.Series([1.5, 2.3, 3.7, 4.0]) + ds = dd.from_pandas(s, npartitions=2) + + result = da.fix(ds).compute() + expected = np.fix(s) + tm.assert_series_equal(result, expected) + finally: + pd.set_option("compute.use_numexpr", olduse) + + +def test_xarray(df): + + xarray = import_module("xarray") # noqa:F841 + + assert df.to_xarray() is not None + + +@td.skip_if_no("cftime") +@td.skip_if_no("xarray", "0.10.4") +def test_xarray_cftimeindex_nearest(): + # https://github.com/pydata/xarray/issues/3751 + import cftime + import xarray + + times = xarray.cftime_range("0001", periods=2) + key = cftime.DatetimeGregorian(2000, 1, 1) + with tm.assert_produces_warning( + FutureWarning, match="deprecated", check_stacklevel=False + ): + result = times.get_loc(key, method="nearest") + expected = 1 + assert result == expected + + +def test_oo_optimizable(): + # GH 21071 + subprocess.check_call([sys.executable, "-OO", "-c", "import pandas"]) + + +def test_oo_optimized_datetime_index_unpickle(): + # GH 42866 + subprocess.check_call( + [ + sys.executable, + "-OO", + "-c", + ( + "import pandas as pd, pickle; " + "pickle.loads(pickle.dumps(pd.date_range('2021-01-01', periods=1)))" + ), + ] + ) + + +@pytest.mark.network +@tm.network +# Cython import warning +@pytest.mark.filterwarnings("ignore:pandas.util.testing is deprecated") +@pytest.mark.filterwarnings("ignore:can't:ImportWarning") +@pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning") +@pytest.mark.filterwarnings( + # patsy needs to update their imports + "ignore:Using or importing the ABCs from 'collections:DeprecationWarning" +) +@pytest.mark.filterwarnings( + # numpy 1.22 + "ignore:`np.MachAr` is deprecated.*:DeprecationWarning" +) +def test_statsmodels(): + + statsmodels = import_module("statsmodels") # noqa:F841 + import statsmodels.api as sm + import statsmodels.formula.api as smf + + df = sm.datasets.get_rdataset("Guerry", "HistData").data + smf.ols("Lottery ~ Literacy + np.log(Pop1831)", data=df).fit() + + +# Cython import warning +@pytest.mark.filterwarnings("ignore:can't:ImportWarning") +def test_scikit_learn(df): + + sklearn = import_module("sklearn") # noqa:F841 + from sklearn import ( + datasets, + svm, + ) + + digits = datasets.load_digits() + clf = svm.SVC(gamma=0.001, C=100.0) + clf.fit(digits.data[:-1], digits.target[:-1]) + clf.predict(digits.data[-1:]) + + +# Cython import warning and traitlets +@pytest.mark.network +@tm.network +@pytest.mark.filterwarnings("ignore") +def test_seaborn(): + + seaborn = import_module("seaborn") + tips = seaborn.load_dataset("tips") + seaborn.stripplot(x="day", y="total_bill", data=tips) + + +def test_pandas_gbq(df): + + pandas_gbq = import_module("pandas_gbq") # noqa:F841 + + +@pytest.mark.network +@tm.network +@pytest.mark.xfail( + raises=ValueError, + reason="The Quandl API key must be provided either through the api_key " + "variable or through the environmental variable QUANDL_API_KEY", +) +def test_pandas_datareader(): + + pandas_datareader = import_module("pandas_datareader") + pandas_datareader.DataReader("F", "quandl", "2017-01-01", "2017-02-01") + + +# importing from pandas, Cython import warning +@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") +def test_geopandas(): + + geopandas = import_module("geopandas") + fp = geopandas.datasets.get_path("naturalearth_lowres") + assert geopandas.read_file(fp) is not None + + +# Cython import warning +@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") +@pytest.mark.filterwarnings("ignore:RangeIndex.* is deprecated:DeprecationWarning") +def test_pyarrow(df): + + pyarrow = import_module("pyarrow") + table = pyarrow.Table.from_pandas(df) + result = table.to_pandas() + tm.assert_frame_equal(result, df) + + +def test_torch_frame_construction(using_array_manager): + # GH#44616 + torch = import_module("torch") + val_tensor = torch.randn(700, 64) + + df = DataFrame(val_tensor) + + if not using_array_manager: + assert np.shares_memory(df, val_tensor) + + ser = pd.Series(val_tensor[0]) + assert np.shares_memory(ser, val_tensor) + + +def test_yaml_dump(df): + # GH#42748 + yaml = import_module("yaml") + + dumped = yaml.dump(df) + + loaded = yaml.load(dumped, Loader=yaml.Loader) + tm.assert_frame_equal(df, loaded) + + loaded2 = yaml.load(dumped, Loader=yaml.UnsafeLoader) + tm.assert_frame_equal(df, loaded2) + + +def test_missing_required_dependency(): + # GH 23868 + # To ensure proper isolation, we pass these flags + # -S : disable site-packages + # -s : disable user site-packages + # -E : disable PYTHON* env vars, especially PYTHONPATH + # https://github.com/MacPython/pandas-wheels/pull/50 + + pyexe = sys.executable.replace("\\", "/") + + # We skip this test if pandas is installed as a site package. We first + # import the package normally and check the path to the module before + # executing the test which imports pandas with site packages disabled. + call = [pyexe, "-c", "import pandas;print(pandas.__file__)"] + output = subprocess.check_output(call).decode() + if "site-packages" in output: + pytest.skip("pandas installed as site package") + + # This test will fail if pandas is installed as a site package. The flags + # prevent pandas being imported and the test will report Failed: DID NOT + # RAISE + call = [pyexe, "-sSE", "-c", "import pandas"] + + msg = ( + rf"Command '\['{pyexe}', '-sSE', '-c', 'import pandas'\]' " + "returned non-zero exit status 1." + ) + + with pytest.raises(subprocess.CalledProcessError, match=msg) as exc: + subprocess.check_output(call, stderr=subprocess.STDOUT) + + output = exc.value.stdout.decode() + for name in ["numpy", "pytz", "dateutil"]: + assert name in output diff --git a/.local/lib/python3.8/site-packages/pandas/tests/test_errors.py b/.local/lib/python3.8/site-packages/pandas/tests/test_errors.py new file mode 100644 index 0000000..6207b88 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/test_errors.py @@ -0,0 +1,70 @@ +import pytest + +from pandas.errors import AbstractMethodError + +import pandas as pd + + +@pytest.mark.parametrize( + "exc", + [ + "UnsupportedFunctionCall", + "UnsortedIndexError", + "OutOfBoundsDatetime", + "ParserError", + "PerformanceWarning", + "DtypeWarning", + "EmptyDataError", + "ParserWarning", + "MergeError", + "OptionError", + "NumbaUtilError", + ], +) +def test_exception_importable(exc): + from pandas import errors + + err = getattr(errors, exc) + assert err is not None + + # check that we can raise on them + + msg = "^$" + + with pytest.raises(err, match=msg): + raise err() + + +def test_catch_oob(): + from pandas import errors + + msg = "Out of bounds nanosecond timestamp: 1500-01-01 00:00:00" + with pytest.raises(errors.OutOfBoundsDatetime, match=msg): + pd.Timestamp("15000101") + + +class Foo: + @classmethod + def classmethod(cls): + raise AbstractMethodError(cls, methodtype="classmethod") + + @property + def property(self): + raise AbstractMethodError(self, methodtype="property") + + def method(self): + raise AbstractMethodError(self) + + +def test_AbstractMethodError_classmethod(): + xpr = "This classmethod must be defined in the concrete class Foo" + with pytest.raises(AbstractMethodError, match=xpr): + Foo.classmethod() + + xpr = "This property must be defined in the concrete class Foo" + with pytest.raises(AbstractMethodError, match=xpr): + Foo().property + + xpr = "This method must be defined in the concrete class Foo" + with pytest.raises(AbstractMethodError, match=xpr): + Foo().method() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/test_expressions.py b/.local/lib/python3.8/site-packages/pandas/tests/test_expressions.py new file mode 100644 index 0000000..d8afb4a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/test_expressions.py @@ -0,0 +1,411 @@ +import operator +import re +import warnings + +import numpy as np +import pytest + +from pandas import set_option +import pandas._testing as tm +from pandas.core.api import ( + DataFrame, + Index, + Series, +) +from pandas.core.computation import expressions as expr + +_frame = DataFrame(np.random.randn(10001, 4), columns=list("ABCD"), dtype="float64") +_frame2 = DataFrame(np.random.randn(100, 4), columns=list("ABCD"), dtype="float64") +_mixed = DataFrame( + { + "A": _frame["A"].copy(), + "B": _frame["B"].astype("float32"), + "C": _frame["C"].astype("int64"), + "D": _frame["D"].astype("int32"), + } +) +_mixed2 = DataFrame( + { + "A": _frame2["A"].copy(), + "B": _frame2["B"].astype("float32"), + "C": _frame2["C"].astype("int64"), + "D": _frame2["D"].astype("int32"), + } +) +_integer = DataFrame( + np.random.randint(1, 100, size=(10001, 4)), columns=list("ABCD"), dtype="int64" +) +_integer2 = DataFrame( + np.random.randint(1, 100, size=(101, 4)), columns=list("ABCD"), dtype="int64" +) +_array = _frame["A"].values.copy() +_array2 = _frame2["A"].values.copy() + +_array_mixed = _mixed["D"].values.copy() +_array_mixed2 = _mixed2["D"].values.copy() + + +@pytest.mark.skipif(not expr.USE_NUMEXPR, reason="not using numexpr") +class TestExpressions: + def setup_method(self, method): + self._MIN_ELEMENTS = expr._MIN_ELEMENTS + + def teardown_method(self, method): + expr._MIN_ELEMENTS = self._MIN_ELEMENTS + + @staticmethod + def call_op(df, other, flex: bool, opname: str): + if flex: + op = lambda x, y: getattr(x, opname)(y) + op.__name__ = opname + else: + op = getattr(operator, opname) + + set_option("compute.use_numexpr", False) + expected = op(df, other) + set_option("compute.use_numexpr", True) + + expr.get_test_result() + + result = op(df, other) + return result, expected + + @pytest.mark.parametrize( + "df", + [ + _integer, + _integer2, + # randint to get a case with zeros + _integer * np.random.randint(0, 2, size=np.shape(_integer)), + _frame, + _frame2, + _mixed, + _mixed2, + ], + ) + @pytest.mark.parametrize("flex", [True, False]) + @pytest.mark.parametrize( + "arith", ["add", "sub", "mul", "mod", "truediv", "floordiv"] + ) + def test_run_arithmetic(self, df, flex, arith): + expr._MIN_ELEMENTS = 0 + result, expected = self.call_op(df, df, flex, arith) + + if arith == "truediv": + assert all(x.kind == "f" for x in expected.dtypes.values) + tm.assert_equal(expected, result) + + for i in range(len(df.columns)): + result, expected = self.call_op(df.iloc[:, i], df.iloc[:, i], flex, arith) + if arith == "truediv": + assert expected.dtype.kind == "f" + tm.assert_equal(expected, result) + + @pytest.mark.parametrize( + "df", + [ + _integer, + _integer2, + # randint to get a case with zeros + _integer * np.random.randint(0, 2, size=np.shape(_integer)), + _frame, + _frame2, + _mixed, + _mixed2, + ], + ) + @pytest.mark.parametrize("flex", [True, False]) + def test_run_binary(self, df, flex, comparison_op): + """ + tests solely that the result is the same whether or not numexpr is + enabled. Need to test whether the function does the correct thing + elsewhere. + """ + arith = comparison_op.__name__ + set_option("compute.use_numexpr", False) + other = df.copy() + 1 + set_option("compute.use_numexpr", True) + + expr._MIN_ELEMENTS = 0 + expr.set_test_mode(True) + + result, expected = self.call_op(df, other, flex, arith) + + used_numexpr = expr.get_test_result() + assert used_numexpr, "Did not use numexpr as expected." + tm.assert_equal(expected, result) + + # FIXME: dont leave commented-out + # series doesn't uses vec_compare instead of numexpr... + # for i in range(len(df.columns)): + # binary_comp = other.iloc[:, i] + 1 + # self.run_binary(df.iloc[:, i], binary_comp, flex) + + def test_invalid(self): + array = np.random.randn(1_000_001) + array2 = np.random.randn(100) + + # no op + result = expr._can_use_numexpr(operator.add, None, array, array, "evaluate") + assert not result + + # min elements + result = expr._can_use_numexpr(operator.add, "+", array2, array2, "evaluate") + assert not result + + # ok, we only check on first part of expression + result = expr._can_use_numexpr(operator.add, "+", array, array2, "evaluate") + assert result + + @pytest.mark.parametrize( + "opname,op_str", + [("add", "+"), ("sub", "-"), ("mul", "*"), ("truediv", "/"), ("pow", "**")], + ) + @pytest.mark.parametrize( + "left,right", [(_array, _array2), (_array_mixed, _array_mixed2)] + ) + def test_binary_ops(self, opname, op_str, left, right): + def testit(): + + if opname == "pow": + # TODO: get this working + return + + op = getattr(operator, opname) + + with warnings.catch_warnings(): + # array has 0s + msg = "invalid value encountered in true_divide" + warnings.filterwarnings("ignore", msg, RuntimeWarning) + result = expr.evaluate(op, left, left, use_numexpr=True) + expected = expr.evaluate(op, left, left, use_numexpr=False) + tm.assert_numpy_array_equal(result, expected) + + result = expr._can_use_numexpr(op, op_str, right, right, "evaluate") + assert not result + + set_option("compute.use_numexpr", False) + testit() + set_option("compute.use_numexpr", True) + expr.set_numexpr_threads(1) + testit() + expr.set_numexpr_threads() + testit() + + @pytest.mark.parametrize( + "opname,op_str", + [ + ("gt", ">"), + ("lt", "<"), + ("ge", ">="), + ("le", "<="), + ("eq", "=="), + ("ne", "!="), + ], + ) + @pytest.mark.parametrize( + "left,right", [(_array, _array2), (_array_mixed, _array_mixed2)] + ) + def test_comparison_ops(self, opname, op_str, left, right): + def testit(): + f12 = left + 1 + f22 = right + 1 + + op = getattr(operator, opname) + + result = expr.evaluate(op, left, f12, use_numexpr=True) + expected = expr.evaluate(op, left, f12, use_numexpr=False) + tm.assert_numpy_array_equal(result, expected) + + result = expr._can_use_numexpr(op, op_str, right, f22, "evaluate") + assert not result + + set_option("compute.use_numexpr", False) + testit() + set_option("compute.use_numexpr", True) + expr.set_numexpr_threads(1) + testit() + expr.set_numexpr_threads() + testit() + + @pytest.mark.parametrize("cond", [True, False]) + @pytest.mark.parametrize("df", [_frame, _frame2, _mixed, _mixed2]) + def test_where(self, cond, df): + def testit(): + c = np.empty(df.shape, dtype=np.bool_) + c.fill(cond) + result = expr.where(c, df.values, df.values + 1) + expected = np.where(c, df.values, df.values + 1) + tm.assert_numpy_array_equal(result, expected) + + set_option("compute.use_numexpr", False) + testit() + set_option("compute.use_numexpr", True) + expr.set_numexpr_threads(1) + testit() + expr.set_numexpr_threads() + testit() + + @pytest.mark.parametrize( + "op_str,opname", [("/", "truediv"), ("//", "floordiv"), ("**", "pow")] + ) + def test_bool_ops_raise_on_arithmetic(self, op_str, opname): + df = DataFrame({"a": np.random.rand(10) > 0.5, "b": np.random.rand(10) > 0.5}) + + msg = f"operator '{opname}' not implemented for bool dtypes" + f = getattr(operator, opname) + err_msg = re.escape(msg) + + with pytest.raises(NotImplementedError, match=err_msg): + f(df, df) + + with pytest.raises(NotImplementedError, match=err_msg): + f(df.a, df.b) + + with pytest.raises(NotImplementedError, match=err_msg): + f(df.a, True) + + with pytest.raises(NotImplementedError, match=err_msg): + f(False, df.a) + + with pytest.raises(NotImplementedError, match=err_msg): + f(False, df) + + with pytest.raises(NotImplementedError, match=err_msg): + f(df, True) + + @pytest.mark.parametrize( + "op_str,opname", [("+", "add"), ("*", "mul"), ("-", "sub")] + ) + def test_bool_ops_warn_on_arithmetic(self, op_str, opname): + n = 10 + df = DataFrame({"a": np.random.rand(n) > 0.5, "b": np.random.rand(n) > 0.5}) + + subs = {"+": "|", "*": "&", "-": "^"} + sub_funcs = {"|": "or_", "&": "and_", "^": "xor"} + + f = getattr(operator, opname) + fe = getattr(operator, sub_funcs[subs[op_str]]) + + if op_str == "-": + # raises TypeError + return + + with tm.use_numexpr(True, min_elements=5): + with tm.assert_produces_warning(): + r = f(df, df) + e = fe(df, df) + tm.assert_frame_equal(r, e) + + with tm.assert_produces_warning(): + r = f(df.a, df.b) + e = fe(df.a, df.b) + tm.assert_series_equal(r, e) + + with tm.assert_produces_warning(): + r = f(df.a, True) + e = fe(df.a, True) + tm.assert_series_equal(r, e) + + with tm.assert_produces_warning(): + r = f(False, df.a) + e = fe(False, df.a) + tm.assert_series_equal(r, e) + + with tm.assert_produces_warning(): + r = f(False, df) + e = fe(False, df) + tm.assert_frame_equal(r, e) + + with tm.assert_produces_warning(): + r = f(df, True) + e = fe(df, True) + tm.assert_frame_equal(r, e) + + @pytest.mark.parametrize( + "test_input,expected", + [ + ( + DataFrame( + [[0, 1, 2, "aa"], [0, 1, 2, "aa"]], columns=["a", "b", "c", "dtype"] + ), + DataFrame([[False, False], [False, False]], columns=["a", "dtype"]), + ), + ( + DataFrame( + [[0, 3, 2, "aa"], [0, 4, 2, "aa"], [0, 1, 1, "bb"]], + columns=["a", "b", "c", "dtype"], + ), + DataFrame( + [[False, False], [False, False], [False, False]], + columns=["a", "dtype"], + ), + ), + ], + ) + def test_bool_ops_column_name_dtype(self, test_input, expected): + # GH 22383 - .ne fails if columns containing column name 'dtype' + result = test_input.loc[:, ["a", "dtype"]].ne(test_input.loc[:, ["a", "dtype"]]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "arith", ("add", "sub", "mul", "mod", "truediv", "floordiv") + ) + @pytest.mark.parametrize("axis", (0, 1)) + def test_frame_series_axis(self, axis, arith): + # GH#26736 Dataframe.floordiv(Series, axis=1) fails + + df = _frame + if axis == 1: + other = df.iloc[0, :] + else: + other = df.iloc[:, 0] + + expr._MIN_ELEMENTS = 0 + + op_func = getattr(df, arith) + + set_option("compute.use_numexpr", False) + expected = op_func(other, axis=axis) + set_option("compute.use_numexpr", True) + + result = op_func(other, axis=axis) + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "op", + [ + "__mod__", + "__rmod__", + "__floordiv__", + "__rfloordiv__", + ], + ) + @pytest.mark.parametrize("box", [DataFrame, Series, Index]) + @pytest.mark.parametrize("scalar", [-5, 5]) + def test_python_semantics_with_numexpr_installed(self, op, box, scalar): + # https://github.com/pandas-dev/pandas/issues/36047 + expr._MIN_ELEMENTS = 0 + data = np.arange(-50, 50) + obj = box(data) + method = getattr(obj, op) + result = method(scalar) + + # compare result with numpy + set_option("compute.use_numexpr", False) + expected = method(scalar) + set_option("compute.use_numexpr", True) + tm.assert_equal(result, expected) + + # compare result element-wise with Python + for i, elem in enumerate(data): + if box == DataFrame: + scalar_result = result.iloc[i, 0] + else: + scalar_result = result[i] + try: + expected = getattr(int(elem), op)(scalar) + except ZeroDivisionError: + pass + else: + assert scalar_result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/test_flags.py b/.local/lib/python3.8/site-packages/pandas/tests/test_flags.py new file mode 100644 index 0000000..9294b3f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/test_flags.py @@ -0,0 +1,48 @@ +import pytest + +import pandas as pd + + +class TestFlags: + def test_equality(self): + a = pd.DataFrame().set_flags(allows_duplicate_labels=True).flags + b = pd.DataFrame().set_flags(allows_duplicate_labels=False).flags + + assert a == a + assert b == b + assert a != b + assert a != 2 + + def test_set(self): + df = pd.DataFrame().set_flags(allows_duplicate_labels=True) + a = df.flags + a.allows_duplicate_labels = False + assert a.allows_duplicate_labels is False + a["allows_duplicate_labels"] = True + assert a.allows_duplicate_labels is True + + def test_repr(self): + a = repr(pd.DataFrame({"A"}).set_flags(allows_duplicate_labels=True).flags) + assert a == "" + a = repr(pd.DataFrame({"A"}).set_flags(allows_duplicate_labels=False).flags) + assert a == "" + + def test_obj_ref(self): + df = pd.DataFrame() + flags = df.flags + del df + with pytest.raises(ValueError, match="object has been deleted"): + flags.allows_duplicate_labels = True + + def test_getitem(self): + df = pd.DataFrame() + flags = df.flags + assert flags["allows_duplicate_labels"] is True + flags["allows_duplicate_labels"] = False + assert flags["allows_duplicate_labels"] is False + + with pytest.raises(KeyError, match="a"): + flags["a"] + + with pytest.raises(ValueError, match="a"): + flags["a"] = 10 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/test_multilevel.py b/.local/lib/python3.8/site-packages/pandas/tests/test_multilevel.py new file mode 100644 index 0000000..1bff5b4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/test_multilevel.py @@ -0,0 +1,422 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + Series, +) +import pandas._testing as tm + +AGG_FUNCTIONS = [ + "sum", + "prod", + "min", + "max", + "median", + "mean", + "skew", + "mad", + "std", + "var", + "sem", +] + + +class TestMultiLevel: + def test_reindex_level(self, multiindex_year_month_day_dataframe_random_data): + # axis=0 + ymd = multiindex_year_month_day_dataframe_random_data + + with tm.assert_produces_warning(FutureWarning): + month_sums = ymd.sum(level="month") + result = month_sums.reindex(ymd.index, level=1) + expected = ymd.groupby(level="month").transform(np.sum) + + tm.assert_frame_equal(result, expected) + + # Series + result = month_sums["A"].reindex(ymd.index, level=1) + expected = ymd["A"].groupby(level="month").transform(np.sum) + tm.assert_series_equal(result, expected, check_names=False) + + # axis=1 + with tm.assert_produces_warning(FutureWarning): + month_sums = ymd.T.sum(axis=1, level="month") + result = month_sums.reindex(columns=ymd.index, level=1) + expected = ymd.groupby(level="month").transform(np.sum).T + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("opname", ["sub", "add", "mul", "div"]) + def test_binops_level( + self, opname, multiindex_year_month_day_dataframe_random_data + ): + ymd = multiindex_year_month_day_dataframe_random_data + + op = getattr(DataFrame, opname) + with tm.assert_produces_warning(FutureWarning): + month_sums = ymd.sum(level="month") + result = op(ymd, month_sums, level="month") + + broadcasted = ymd.groupby(level="month").transform(np.sum) + expected = op(ymd, broadcasted) + tm.assert_frame_equal(result, expected) + + # Series + op = getattr(Series, opname) + result = op(ymd["A"], month_sums["A"], level="month") + broadcasted = ymd["A"].groupby(level="month").transform(np.sum) + expected = op(ymd["A"], broadcasted) + expected.name = "A" + tm.assert_series_equal(result, expected) + + def test_reindex(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + expected = frame.iloc[[0, 3]] + reindexed = frame.loc[[("foo", "one"), ("bar", "one")]] + tm.assert_frame_equal(reindexed, expected) + + def test_reindex_preserve_levels( + self, multiindex_year_month_day_dataframe_random_data + ): + ymd = multiindex_year_month_day_dataframe_random_data + + new_index = ymd.index[::10] + chunk = ymd.reindex(new_index) + assert chunk.index is new_index + + chunk = ymd.loc[new_index] + assert chunk.index.equals(new_index) + + ymdT = ymd.T + chunk = ymdT.reindex(columns=new_index) + assert chunk.columns is new_index + + chunk = ymdT.loc[:, new_index] + assert chunk.columns.equals(new_index) + + def test_groupby_transform(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + s = frame["A"] + grouper = s.index.get_level_values(0) + + grouped = s.groupby(grouper) + + applied = grouped.apply(lambda x: x * 2) + expected = grouped.transform(lambda x: x * 2) + result = applied.reindex(expected.index) + tm.assert_series_equal(result, expected, check_names=False) + + def test_groupby_corner(self): + midx = MultiIndex( + levels=[["foo"], ["bar"], ["baz"]], + codes=[[0], [0], [0]], + names=["one", "two", "three"], + ) + df = DataFrame([np.random.rand(4)], columns=["a", "b", "c", "d"], index=midx) + # should work + df.groupby(level="three") + + def test_groupby_level_no_obs(self): + # #1697 + midx = MultiIndex.from_tuples( + [ + ("f1", "s1"), + ("f1", "s2"), + ("f2", "s1"), + ("f2", "s2"), + ("f3", "s1"), + ("f3", "s2"), + ] + ) + df = DataFrame([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], columns=midx) + df1 = df.loc(axis=1)[df.columns.map(lambda u: u[0] in ["f2", "f3"])] + + grouped = df1.groupby(axis=1, level=0) + result = grouped.sum() + assert (result.columns == ["f2", "f3"]).all() + + def test_setitem_with_expansion_multiindex_columns( + self, multiindex_year_month_day_dataframe_random_data + ): + ymd = multiindex_year_month_day_dataframe_random_data + + df = ymd[:5].T + df[2000, 1, 10] = df[2000, 1, 7] + assert isinstance(df.columns, MultiIndex) + assert (df[2000, 1, 10] == df[2000, 1, 7]).all() + + def test_alignment(self): + x = Series( + data=[1, 2, 3], index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3)]) + ) + + y = Series( + data=[4, 5, 6], index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B", 3)]) + ) + + res = x - y + exp_index = x.index.union(y.index) + exp = x.reindex(exp_index) - y.reindex(exp_index) + tm.assert_series_equal(res, exp) + + # hit non-monotonic code path + res = x[::-1] - y[::-1] + exp_index = x.index.union(y.index) + exp = x.reindex(exp_index) - y.reindex(exp_index) + tm.assert_series_equal(res, exp) + + @pytest.mark.parametrize("op", AGG_FUNCTIONS) + @pytest.mark.parametrize("level", [0, 1]) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("sort", [True, False]) + def test_series_group_min_max( + self, op, level, skipna, sort, series_with_multilevel_index + ): + # GH 17537 + ser = series_with_multilevel_index + + grouped = ser.groupby(level=level, sort=sort) + # skipna=True + leftside = grouped.agg(lambda x: getattr(x, op)(skipna=skipna)) + with tm.assert_produces_warning(FutureWarning): + rightside = getattr(ser, op)(level=level, skipna=skipna) + if sort: + rightside = rightside.sort_index(level=level) + tm.assert_series_equal(leftside, rightside) + + @pytest.mark.parametrize("op", AGG_FUNCTIONS) + @pytest.mark.parametrize("level", [0, 1]) + @pytest.mark.parametrize("axis", [0, 1]) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("sort", [True, False]) + def test_frame_group_ops( + self, op, level, axis, skipna, sort, multiindex_dataframe_random_data + ): + # GH 17537 + frame = multiindex_dataframe_random_data + + frame.iloc[1, [1, 2]] = np.nan + frame.iloc[7, [0, 1]] = np.nan + + level_name = frame.index.names[level] + + if axis == 0: + frame = frame + else: + frame = frame.T + + grouped = frame.groupby(level=level, axis=axis, sort=sort) + + pieces = [] + + def aggf(x): + pieces.append(x) + return getattr(x, op)(skipna=skipna, axis=axis) + + leftside = grouped.agg(aggf) + with tm.assert_produces_warning(FutureWarning): + rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna) + if sort: + rightside = rightside.sort_index(level=level, axis=axis) + frame = frame.sort_index(level=level, axis=axis) + + # for good measure, groupby detail + level_index = frame._get_axis(axis).levels[level].rename(level_name) + + tm.assert_index_equal(leftside._get_axis(axis), level_index) + tm.assert_index_equal(rightside._get_axis(axis), level_index) + + tm.assert_frame_equal(leftside, rightside) + + @pytest.mark.parametrize("meth", ["var", "std"]) + def test_std_var_pass_ddof(self, meth): + index = MultiIndex.from_arrays( + [np.arange(5).repeat(10), np.tile(np.arange(10), 5)] + ) + df = DataFrame(np.random.randn(len(index), 5), index=index) + + ddof = 4 + alt = lambda x: getattr(x, meth)(ddof=ddof) + + with tm.assert_produces_warning(FutureWarning): + result = getattr(df[0], meth)(level=0, ddof=ddof) + expected = df[0].groupby(level=0).agg(alt) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = getattr(df, meth)(level=0, ddof=ddof) + expected = df.groupby(level=0).agg(alt) + tm.assert_frame_equal(result, expected) + + def test_agg_multiple_levels( + self, multiindex_year_month_day_dataframe_random_data, frame_or_series + ): + ymd = multiindex_year_month_day_dataframe_random_data + ymd = tm.get_obj(ymd, frame_or_series) + + with tm.assert_produces_warning(FutureWarning): + result = ymd.sum(level=["year", "month"]) + expected = ymd.groupby(level=["year", "month"]).sum() + tm.assert_equal(result, expected) + + def test_groupby_multilevel(self, multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + + result = ymd.groupby(level=[0, 1]).mean() + + k1 = ymd.index.get_level_values(0) + k2 = ymd.index.get_level_values(1) + + expected = ymd.groupby([k1, k2]).mean() + + # TODO groupby with level_values drops names + tm.assert_frame_equal(result, expected, check_names=False) + assert result.index.names == ymd.index.names[:2] + + result2 = ymd.groupby(level=ymd.index.names[:2]).mean() + tm.assert_frame_equal(result, result2) + + def test_multilevel_consolidate(self): + index = MultiIndex.from_tuples( + [("foo", "one"), ("foo", "two"), ("bar", "one"), ("bar", "two")] + ) + df = DataFrame(np.random.randn(4, 4), index=index, columns=index) + df["Totals", ""] = df.sum(1) + df = df._consolidate() + + def test_level_with_tuples(self): + index = MultiIndex( + levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + ) + + series = Series(np.random.randn(6), index=index) + frame = DataFrame(np.random.randn(6, 4), index=index) + + result = series[("foo", "bar", 0)] + result2 = series.loc[("foo", "bar", 0)] + expected = series[:2] + expected.index = expected.index.droplevel(0) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + with pytest.raises(KeyError, match=r"^\(\('foo', 'bar', 0\), 2\)$"): + series[("foo", "bar", 0), 2] + + result = frame.loc[("foo", "bar", 0)] + result2 = frame.xs(("foo", "bar", 0)) + expected = frame[:2] + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + index = MultiIndex( + levels=[[("foo", "bar"), ("foo", "baz"), ("foo", "qux")], [0, 1]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + ) + + series = Series(np.random.randn(6), index=index) + frame = DataFrame(np.random.randn(6, 4), index=index) + + result = series[("foo", "bar")] + result2 = series.loc[("foo", "bar")] + expected = series[:2] + expected.index = expected.index.droplevel(0) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + result = frame.loc[("foo", "bar")] + result2 = frame.xs(("foo", "bar")) + expected = frame[:2] + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + def test_reindex_level_partial_selection(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + result = frame.reindex(["foo", "qux"], level=0) + expected = frame.iloc[[0, 1, 2, 7, 8, 9]] + tm.assert_frame_equal(result, expected) + + result = frame.T.reindex(["foo", "qux"], axis=1, level=0) + tm.assert_frame_equal(result, expected.T) + + result = frame.loc[["foo", "qux"]] + tm.assert_frame_equal(result, expected) + + result = frame["A"].loc[["foo", "qux"]] + tm.assert_series_equal(result, expected["A"]) + + result = frame.T.loc[:, ["foo", "qux"]] + tm.assert_frame_equal(result, expected.T) + + @pytest.mark.parametrize("d", [4, "d"]) + def test_empty_frame_groupby_dtypes_consistency(self, d): + # GH 20888 + group_keys = ["a", "b", "c"] + df = DataFrame({"a": [1], "b": [2], "c": [3], "d": [d]}) + + g = df[df.a == 2].groupby(group_keys) + result = g.first().index + expected = MultiIndex( + levels=[[1], [2], [3]], codes=[[], [], []], names=["a", "b", "c"] + ) + + tm.assert_index_equal(result, expected) + + def test_duplicate_groupby_issues(self): + idx_tp = [ + ("600809", "20061231"), + ("600809", "20070331"), + ("600809", "20070630"), + ("600809", "20070331"), + ] + dt = ["demo", "demo", "demo", "demo"] + + idx = MultiIndex.from_tuples(idx_tp, names=["STK_ID", "RPT_Date"]) + s = Series(dt, index=idx) + + result = s.groupby(s.index).first() + assert len(result) == 3 + + def test_subsets_multiindex_dtype(self): + # GH 20757 + data = [["x", 1]] + columns = [("a", "b", np.nan), ("a", "c", 0.0)] + df = DataFrame(data, columns=MultiIndex.from_tuples(columns)) + expected = df.dtypes.a.b + result = df.a.b.dtypes + tm.assert_series_equal(result, expected) + + +class TestSorted: + """everything you wanted to test about sorting""" + + def test_sort_non_lexsorted(self): + # degenerate case where we sort but don't + # have a satisfying result :< + # GH 15797 + idx = MultiIndex( + [["A", "B", "C"], ["c", "b", "a"]], [[0, 1, 2, 0, 1, 2], [0, 2, 1, 1, 0, 2]] + ) + + df = DataFrame({"col": range(len(idx))}, index=idx, dtype="int64") + assert df.index.is_monotonic is False + + sorted = df.sort_index() + assert sorted.index.is_monotonic is True + + expected = DataFrame( + {"col": [1, 4, 5, 2]}, + index=MultiIndex.from_tuples( + [("B", "a"), ("B", "c"), ("C", "a"), ("C", "b")] + ), + dtype="int64", + ) + result = sorted.loc[pd.IndexSlice["B":"C", "a":"c"], :] + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/test_nanops.py b/.local/lib/python3.8/site-packages/pandas/tests/test_nanops.py new file mode 100644 index 0000000..ee451d0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/test_nanops.py @@ -0,0 +1,1095 @@ +from functools import partial +import operator +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_integer_dtype + +import pandas as pd +from pandas import ( + Series, + isna, +) +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray +import pandas.core.nanops as nanops + +use_bn = nanops._USE_BOTTLENECK +has_c16 = hasattr(np, "complex128") + + +@pytest.fixture(params=[True, False]) +def skipna(request): + """ + Fixture to pass skipna to nanops functions. + """ + return request.param + + +class TestnanopsDataFrame: + def setup_method(self, method): + np.random.seed(11235) + nanops._USE_BOTTLENECK = False + + arr_shape = (11, 7) + + self.arr_float = np.random.randn(*arr_shape) + self.arr_float1 = np.random.randn(*arr_shape) + self.arr_complex = self.arr_float + self.arr_float1 * 1j + self.arr_int = np.random.randint(-10, 10, arr_shape) + self.arr_bool = np.random.randint(0, 2, arr_shape) == 0 + self.arr_str = np.abs(self.arr_float).astype("S") + self.arr_utf = np.abs(self.arr_float).astype("U") + self.arr_date = np.random.randint(0, 20000, arr_shape).astype("M8[ns]") + self.arr_tdelta = np.random.randint(0, 20000, arr_shape).astype("m8[ns]") + + self.arr_nan = np.tile(np.nan, arr_shape) + self.arr_float_nan = np.vstack([self.arr_float, self.arr_nan]) + self.arr_float1_nan = np.vstack([self.arr_float1, self.arr_nan]) + self.arr_nan_float1 = np.vstack([self.arr_nan, self.arr_float1]) + self.arr_nan_nan = np.vstack([self.arr_nan, self.arr_nan]) + + self.arr_inf = self.arr_float * np.inf + self.arr_float_inf = np.vstack([self.arr_float, self.arr_inf]) + + self.arr_nan_inf = np.vstack([self.arr_nan, self.arr_inf]) + self.arr_float_nan_inf = np.vstack([self.arr_float, self.arr_nan, self.arr_inf]) + self.arr_nan_nan_inf = np.vstack([self.arr_nan, self.arr_nan, self.arr_inf]) + self.arr_obj = np.vstack( + [ + self.arr_float.astype("O"), + self.arr_int.astype("O"), + self.arr_bool.astype("O"), + self.arr_complex.astype("O"), + self.arr_str.astype("O"), + self.arr_utf.astype("O"), + self.arr_date.astype("O"), + self.arr_tdelta.astype("O"), + ] + ) + + with np.errstate(invalid="ignore"): + self.arr_nan_nanj = self.arr_nan + self.arr_nan * 1j + self.arr_complex_nan = np.vstack([self.arr_complex, self.arr_nan_nanj]) + + self.arr_nan_infj = self.arr_inf * 1j + self.arr_complex_nan_infj = np.vstack([self.arr_complex, self.arr_nan_infj]) + + self.arr_float_2d = self.arr_float + self.arr_float1_2d = self.arr_float1 + + self.arr_nan_2d = self.arr_nan + self.arr_float_nan_2d = self.arr_float_nan + self.arr_float1_nan_2d = self.arr_float1_nan + self.arr_nan_float1_2d = self.arr_nan_float1 + + self.arr_float_1d = self.arr_float[:, 0] + self.arr_float1_1d = self.arr_float1[:, 0] + + self.arr_nan_1d = self.arr_nan[:, 0] + self.arr_float_nan_1d = self.arr_float_nan[:, 0] + self.arr_float1_nan_1d = self.arr_float1_nan[:, 0] + self.arr_nan_float1_1d = self.arr_nan_float1[:, 0] + + def teardown_method(self, method): + nanops._USE_BOTTLENECK = use_bn + + def check_results(self, targ, res, axis, check_dtype=True): + res = getattr(res, "asm8", res) + + if ( + axis != 0 + and hasattr(targ, "shape") + and targ.ndim + and targ.shape != res.shape + ): + res = np.split(res, [targ.shape[0]], axis=0)[0] + + try: + tm.assert_almost_equal(targ, res, check_dtype=check_dtype) + except AssertionError: + + # handle timedelta dtypes + if hasattr(targ, "dtype") and targ.dtype == "m8[ns]": + raise + + # There are sometimes rounding errors with + # complex and object dtypes. + # If it isn't one of those, re-raise the error. + if not hasattr(res, "dtype") or res.dtype.kind not in ["c", "O"]: + raise + # convert object dtypes to something that can be split into + # real and imaginary parts + if res.dtype.kind == "O": + if targ.dtype.kind != "O": + res = res.astype(targ.dtype) + else: + cast_dtype = "c16" if has_c16 else "f8" + res = res.astype(cast_dtype) + targ = targ.astype(cast_dtype) + # there should never be a case where numpy returns an object + # but nanops doesn't, so make that an exception + elif targ.dtype.kind == "O": + raise + tm.assert_almost_equal(np.real(targ), np.real(res), check_dtype=check_dtype) + tm.assert_almost_equal(np.imag(targ), np.imag(res), check_dtype=check_dtype) + + def check_fun_data( + self, + testfunc, + targfunc, + testarval, + targarval, + skipna, + check_dtype=True, + empty_targfunc=None, + **kwargs, + ): + for axis in list(range(targarval.ndim)) + [None]: + targartempval = targarval if skipna else testarval + if skipna and empty_targfunc and isna(targartempval).all(): + targ = empty_targfunc(targartempval, axis=axis, **kwargs) + else: + targ = targfunc(targartempval, axis=axis, **kwargs) + + if targartempval.dtype == object and ( + targfunc is np.any or targfunc is np.all + ): + # GH#12863 the numpy functions will retain e.g. floatiness + if isinstance(targ, np.ndarray): + targ = targ.astype(bool) + else: + targ = bool(targ) + + res = testfunc(testarval, axis=axis, skipna=skipna, **kwargs) + self.check_results(targ, res, axis, check_dtype=check_dtype) + if skipna: + res = testfunc(testarval, axis=axis, **kwargs) + self.check_results(targ, res, axis, check_dtype=check_dtype) + if axis is None: + res = testfunc(testarval, skipna=skipna, **kwargs) + self.check_results(targ, res, axis, check_dtype=check_dtype) + if skipna and axis is None: + res = testfunc(testarval, **kwargs) + self.check_results(targ, res, axis, check_dtype=check_dtype) + + if testarval.ndim <= 1: + return + + # Recurse on lower-dimension + testarval2 = np.take(testarval, 0, axis=-1) + targarval2 = np.take(targarval, 0, axis=-1) + self.check_fun_data( + testfunc, + targfunc, + testarval2, + targarval2, + skipna=skipna, + check_dtype=check_dtype, + empty_targfunc=empty_targfunc, + **kwargs, + ) + + def check_fun( + self, testfunc, targfunc, testar, skipna, empty_targfunc=None, **kwargs + ): + + targar = testar + if testar.endswith("_nan") and hasattr(self, testar[:-4]): + targar = testar[:-4] + + testarval = getattr(self, testar) + targarval = getattr(self, targar) + self.check_fun_data( + testfunc, + targfunc, + testarval, + targarval, + skipna=skipna, + empty_targfunc=empty_targfunc, + **kwargs, + ) + + def check_funs( + self, + testfunc, + targfunc, + skipna, + allow_complex=True, + allow_all_nan=True, + allow_date=True, + allow_tdelta=True, + allow_obj=True, + **kwargs, + ): + self.check_fun(testfunc, targfunc, "arr_float", skipna, **kwargs) + self.check_fun(testfunc, targfunc, "arr_float_nan", skipna, **kwargs) + self.check_fun(testfunc, targfunc, "arr_int", skipna, **kwargs) + self.check_fun(testfunc, targfunc, "arr_bool", skipna, **kwargs) + objs = [ + self.arr_float.astype("O"), + self.arr_int.astype("O"), + self.arr_bool.astype("O"), + ] + + if allow_all_nan: + self.check_fun(testfunc, targfunc, "arr_nan", skipna, **kwargs) + + if allow_complex: + self.check_fun(testfunc, targfunc, "arr_complex", skipna, **kwargs) + self.check_fun(testfunc, targfunc, "arr_complex_nan", skipna, **kwargs) + if allow_all_nan: + self.check_fun(testfunc, targfunc, "arr_nan_nanj", skipna, **kwargs) + objs += [self.arr_complex.astype("O")] + + if allow_date: + targfunc(self.arr_date) + self.check_fun(testfunc, targfunc, "arr_date", skipna, **kwargs) + objs += [self.arr_date.astype("O")] + + if allow_tdelta: + try: + targfunc(self.arr_tdelta) + except TypeError: + pass + else: + self.check_fun(testfunc, targfunc, "arr_tdelta", skipna, **kwargs) + objs += [self.arr_tdelta.astype("O")] + + if allow_obj: + self.arr_obj = np.vstack(objs) + # some nanops handle object dtypes better than their numpy + # counterparts, so the numpy functions need to be given something + # else + if allow_obj == "convert": + targfunc = partial( + self._badobj_wrap, func=targfunc, allow_complex=allow_complex + ) + self.check_fun(testfunc, targfunc, "arr_obj", skipna, **kwargs) + + def _badobj_wrap(self, value, func, allow_complex=True, **kwargs): + if value.dtype.kind == "O": + if allow_complex: + value = value.astype("c16") + else: + value = value.astype("f8") + return func(value, **kwargs) + + @pytest.mark.parametrize( + "nan_op,np_op", [(nanops.nanany, np.any), (nanops.nanall, np.all)] + ) + def test_nan_funcs(self, nan_op, np_op, skipna): + self.check_funs(nan_op, np_op, skipna, allow_all_nan=False, allow_date=False) + + def test_nansum(self, skipna): + self.check_funs( + nanops.nansum, + np.sum, + skipna, + allow_date=False, + check_dtype=False, + empty_targfunc=np.nansum, + ) + + def test_nanmean(self, skipna): + self.check_funs( + nanops.nanmean, np.mean, skipna, allow_obj=False, allow_date=False + ) + + def test_nanmean_overflow(self): + # GH 10155 + # In the previous implementation mean can overflow for int dtypes, it + # is now consistent with numpy + + for a in [2 ** 55, -(2 ** 55), 20150515061816532]: + s = Series(a, index=range(500), dtype=np.int64) + result = s.mean() + np_result = s.values.mean() + assert result == a + assert result == np_result + assert result.dtype == np.float64 + + @pytest.mark.parametrize( + "dtype", + [ + np.int16, + np.int32, + np.int64, + np.float32, + np.float64, + getattr(np, "float128", None), + ], + ) + def test_returned_dtype(self, dtype): + if dtype is None: + # no float128 available + return + + s = Series(range(10), dtype=dtype) + group_a = ["mean", "std", "var", "skew", "kurt"] + group_b = ["min", "max"] + for method in group_a + group_b: + result = getattr(s, method)() + if is_integer_dtype(dtype) and method in group_a: + assert result.dtype == np.float64 + else: + assert result.dtype == dtype + + def test_nanmedian(self, skipna): + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + self.check_funs( + nanops.nanmedian, + np.median, + skipna, + allow_complex=False, + allow_date=False, + allow_obj="convert", + ) + + @pytest.mark.parametrize("ddof", range(3)) + def test_nanvar(self, ddof, skipna): + self.check_funs( + nanops.nanvar, + np.var, + skipna, + allow_complex=False, + allow_date=False, + allow_obj="convert", + ddof=ddof, + ) + + @pytest.mark.parametrize("ddof", range(3)) + def test_nanstd(self, ddof, skipna): + self.check_funs( + nanops.nanstd, + np.std, + skipna, + allow_complex=False, + allow_date=False, + allow_obj="convert", + ddof=ddof, + ) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("ddof", range(3)) + def test_nansem(self, ddof, skipna): + from scipy.stats import sem + + with np.errstate(invalid="ignore"): + self.check_funs( + nanops.nansem, + sem, + skipna, + allow_complex=False, + allow_date=False, + allow_tdelta=False, + allow_obj="convert", + ddof=ddof, + ) + + @pytest.mark.parametrize( + "nan_op,np_op", [(nanops.nanmin, np.min), (nanops.nanmax, np.max)] + ) + def test_nanops_with_warnings(self, nan_op, np_op, skipna): + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + self.check_funs(nan_op, np_op, skipna, allow_obj=False) + + def _argminmax_wrap(self, value, axis=None, func=None): + res = func(value, axis) + nans = np.min(value, axis) + nullnan = isna(nans) + if res.ndim: + res[nullnan] = -1 + elif ( + hasattr(nullnan, "all") + and nullnan.all() + or not hasattr(nullnan, "all") + and nullnan + ): + res = -1 + return res + + def test_nanargmax(self, skipna): + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + func = partial(self._argminmax_wrap, func=np.argmax) + self.check_funs(nanops.nanargmax, func, skipna, allow_obj=False) + + def test_nanargmin(self, skipna): + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + func = partial(self._argminmax_wrap, func=np.argmin) + self.check_funs(nanops.nanargmin, func, skipna, allow_obj=False) + + def _skew_kurt_wrap(self, values, axis=None, func=None): + if not isinstance(values.dtype.type, np.floating): + values = values.astype("f8") + result = func(values, axis=axis, bias=False) + # fix for handling cases where all elements in an axis are the same + if isinstance(result, np.ndarray): + result[np.max(values, axis=axis) == np.min(values, axis=axis)] = 0 + return result + elif np.max(values) == np.min(values): + return 0.0 + return result + + @td.skip_if_no_scipy + def test_nanskew(self, skipna): + from scipy.stats import skew + + func = partial(self._skew_kurt_wrap, func=skew) + with np.errstate(invalid="ignore"): + self.check_funs( + nanops.nanskew, + func, + skipna, + allow_complex=False, + allow_date=False, + allow_tdelta=False, + ) + + @td.skip_if_no_scipy + def test_nankurt(self, skipna): + from scipy.stats import kurtosis + + func1 = partial(kurtosis, fisher=True) + func = partial(self._skew_kurt_wrap, func=func1) + with np.errstate(invalid="ignore"): + self.check_funs( + nanops.nankurt, + func, + skipna, + allow_complex=False, + allow_date=False, + allow_tdelta=False, + ) + + def test_nanprod(self, skipna): + self.check_funs( + nanops.nanprod, + np.prod, + skipna, + allow_date=False, + allow_tdelta=False, + empty_targfunc=np.nanprod, + ) + + def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs): + res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, **kwargs) + res01 = checkfun( + self.arr_float_2d, + self.arr_float1_2d, + min_periods=len(self.arr_float_2d) - 1, + **kwargs, + ) + tm.assert_almost_equal(targ0, res00) + tm.assert_almost_equal(targ0, res01) + + res10 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d, **kwargs) + res11 = checkfun( + self.arr_float_nan_2d, + self.arr_float1_nan_2d, + min_periods=len(self.arr_float_2d) - 1, + **kwargs, + ) + tm.assert_almost_equal(targ1, res10) + tm.assert_almost_equal(targ1, res11) + + targ2 = np.nan + res20 = checkfun(self.arr_nan_2d, self.arr_float1_2d, **kwargs) + res21 = checkfun(self.arr_float_2d, self.arr_nan_2d, **kwargs) + res22 = checkfun(self.arr_nan_2d, self.arr_nan_2d, **kwargs) + res23 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d, **kwargs) + res24 = checkfun( + self.arr_float_nan_2d, + self.arr_nan_float1_2d, + min_periods=len(self.arr_float_2d) - 1, + **kwargs, + ) + res25 = checkfun( + self.arr_float_2d, + self.arr_float1_2d, + min_periods=len(self.arr_float_2d) + 1, + **kwargs, + ) + tm.assert_almost_equal(targ2, res20) + tm.assert_almost_equal(targ2, res21) + tm.assert_almost_equal(targ2, res22) + tm.assert_almost_equal(targ2, res23) + tm.assert_almost_equal(targ2, res24) + tm.assert_almost_equal(targ2, res25) + + def check_nancorr_nancov_1d(self, checkfun, targ0, targ1, **kwargs): + res00 = checkfun(self.arr_float_1d, self.arr_float1_1d, **kwargs) + res01 = checkfun( + self.arr_float_1d, + self.arr_float1_1d, + min_periods=len(self.arr_float_1d) - 1, + **kwargs, + ) + tm.assert_almost_equal(targ0, res00) + tm.assert_almost_equal(targ0, res01) + + res10 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d, **kwargs) + res11 = checkfun( + self.arr_float_nan_1d, + self.arr_float1_nan_1d, + min_periods=len(self.arr_float_1d) - 1, + **kwargs, + ) + tm.assert_almost_equal(targ1, res10) + tm.assert_almost_equal(targ1, res11) + + targ2 = np.nan + res20 = checkfun(self.arr_nan_1d, self.arr_float1_1d, **kwargs) + res21 = checkfun(self.arr_float_1d, self.arr_nan_1d, **kwargs) + res22 = checkfun(self.arr_nan_1d, self.arr_nan_1d, **kwargs) + res23 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d, **kwargs) + res24 = checkfun( + self.arr_float_nan_1d, + self.arr_nan_float1_1d, + min_periods=len(self.arr_float_1d) - 1, + **kwargs, + ) + res25 = checkfun( + self.arr_float_1d, + self.arr_float1_1d, + min_periods=len(self.arr_float_1d) + 1, + **kwargs, + ) + tm.assert_almost_equal(targ2, res20) + tm.assert_almost_equal(targ2, res21) + tm.assert_almost_equal(targ2, res22) + tm.assert_almost_equal(targ2, res23) + tm.assert_almost_equal(targ2, res24) + tm.assert_almost_equal(targ2, res25) + + def test_nancorr(self): + targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1] + targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1] + self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1) + targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1] + targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1] + self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson") + + def test_nancorr_pearson(self): + targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1] + targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1] + self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="pearson") + targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1] + targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1] + self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson") + + @td.skip_if_no_scipy + def test_nancorr_kendall(self): + from scipy.stats import kendalltau + + targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0] + targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0] + self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="kendall") + targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0] + targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0] + self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="kendall") + + @td.skip_if_no_scipy + def test_nancorr_spearman(self): + from scipy.stats import spearmanr + + targ0 = spearmanr(self.arr_float_2d, self.arr_float1_2d)[0] + targ1 = spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0] + self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="spearman") + targ0 = spearmanr(self.arr_float_1d, self.arr_float1_1d)[0] + targ1 = spearmanr(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0] + self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="spearman") + + @td.skip_if_no_scipy + def test_invalid_method(self): + targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1] + targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1] + msg = "Unknown method 'foo', expected one of 'kendall', 'spearman'" + with pytest.raises(ValueError, match=msg): + self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="foo") + + def test_nancov(self): + targ0 = np.cov(self.arr_float_2d, self.arr_float1_2d)[0, 1] + targ1 = np.cov(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1] + self.check_nancorr_nancov_2d(nanops.nancov, targ0, targ1) + targ0 = np.cov(self.arr_float_1d, self.arr_float1_1d)[0, 1] + targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1] + self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1) + + def check_nancomp(self, checkfun, targ0): + arr_float = self.arr_float + arr_float1 = self.arr_float1 + arr_nan = self.arr_nan + arr_nan_nan = self.arr_nan_nan + arr_float_nan = self.arr_float_nan + arr_float1_nan = self.arr_float1_nan + arr_nan_float1 = self.arr_nan_float1 + + while targ0.ndim: + res0 = checkfun(arr_float, arr_float1) + tm.assert_almost_equal(targ0, res0) + + if targ0.ndim > 1: + targ1 = np.vstack([targ0, arr_nan]) + else: + targ1 = np.hstack([targ0, arr_nan]) + res1 = checkfun(arr_float_nan, arr_float1_nan) + tm.assert_numpy_array_equal(targ1, res1, check_dtype=False) + + targ2 = arr_nan_nan + res2 = checkfun(arr_float_nan, arr_nan_float1) + tm.assert_numpy_array_equal(targ2, res2, check_dtype=False) + + # Lower dimension for next step in the loop + arr_float = np.take(arr_float, 0, axis=-1) + arr_float1 = np.take(arr_float1, 0, axis=-1) + arr_nan = np.take(arr_nan, 0, axis=-1) + arr_nan_nan = np.take(arr_nan_nan, 0, axis=-1) + arr_float_nan = np.take(arr_float_nan, 0, axis=-1) + arr_float1_nan = np.take(arr_float1_nan, 0, axis=-1) + arr_nan_float1 = np.take(arr_nan_float1, 0, axis=-1) + targ0 = np.take(targ0, 0, axis=-1) + + @pytest.mark.parametrize( + "op,nanop", + [ + (operator.eq, nanops.naneq), + (operator.ne, nanops.nanne), + (operator.gt, nanops.nangt), + (operator.ge, nanops.nange), + (operator.lt, nanops.nanlt), + (operator.le, nanops.nanle), + ], + ) + def test_nan_comparison(self, op, nanop): + targ0 = op(self.arr_float, self.arr_float1) + self.check_nancomp(nanop, targ0) + + def check_bool(self, func, value, correct): + while getattr(value, "ndim", True): + res0 = func(value) + if correct: + assert res0 + else: + assert not res0 + + if not hasattr(value, "ndim"): + break + + # Reduce dimension for next step in the loop + value = np.take(value, 0, axis=-1) + + def test__has_infs(self): + pairs = [ + ("arr_complex", False), + ("arr_int", False), + ("arr_bool", False), + ("arr_str", False), + ("arr_utf", False), + ("arr_complex", False), + ("arr_complex_nan", False), + ("arr_nan_nanj", False), + ("arr_nan_infj", True), + ("arr_complex_nan_infj", True), + ] + pairs_float = [ + ("arr_float", False), + ("arr_nan", False), + ("arr_float_nan", False), + ("arr_nan_nan", False), + ("arr_float_inf", True), + ("arr_inf", True), + ("arr_nan_inf", True), + ("arr_float_nan_inf", True), + ("arr_nan_nan_inf", True), + ] + + for arr, correct in pairs: + val = getattr(self, arr) + self.check_bool(nanops._has_infs, val, correct) + + for arr, correct in pairs_float: + val = getattr(self, arr) + self.check_bool(nanops._has_infs, val, correct) + self.check_bool(nanops._has_infs, val.astype("f4"), correct) + self.check_bool(nanops._has_infs, val.astype("f2"), correct) + + def test__bn_ok_dtype(self): + assert nanops._bn_ok_dtype(self.arr_float.dtype, "test") + assert nanops._bn_ok_dtype(self.arr_complex.dtype, "test") + assert nanops._bn_ok_dtype(self.arr_int.dtype, "test") + assert nanops._bn_ok_dtype(self.arr_bool.dtype, "test") + assert nanops._bn_ok_dtype(self.arr_str.dtype, "test") + assert nanops._bn_ok_dtype(self.arr_utf.dtype, "test") + assert not nanops._bn_ok_dtype(self.arr_date.dtype, "test") + assert not nanops._bn_ok_dtype(self.arr_tdelta.dtype, "test") + assert not nanops._bn_ok_dtype(self.arr_obj.dtype, "test") + + +class TestEnsureNumeric: + def test_numeric_values(self): + # Test integer + assert nanops._ensure_numeric(1) == 1 + + # Test float + assert nanops._ensure_numeric(1.1) == 1.1 + + # Test complex + assert nanops._ensure_numeric(1 + 2j) == 1 + 2j + + def test_ndarray(self): + # Test numeric ndarray + values = np.array([1, 2, 3]) + assert np.allclose(nanops._ensure_numeric(values), values) + + # Test object ndarray + o_values = values.astype(object) + assert np.allclose(nanops._ensure_numeric(o_values), values) + + # Test convertible string ndarray + s_values = np.array(["1", "2", "3"], dtype=object) + assert np.allclose(nanops._ensure_numeric(s_values), values) + + # Test non-convertible string ndarray + s_values = np.array(["foo", "bar", "baz"], dtype=object) + msg = r"Could not convert .* to numeric" + with pytest.raises(TypeError, match=msg): + nanops._ensure_numeric(s_values) + + def test_convertable_values(self): + assert np.allclose(nanops._ensure_numeric("1"), 1.0) + assert np.allclose(nanops._ensure_numeric("1.1"), 1.1) + assert np.allclose(nanops._ensure_numeric("1+1j"), 1 + 1j) + + def test_non_convertable_values(self): + msg = "Could not convert foo to numeric" + with pytest.raises(TypeError, match=msg): + nanops._ensure_numeric("foo") + + # with the wrong type, python raises TypeError for us + msg = "argument must be a string or a number" + with pytest.raises(TypeError, match=msg): + nanops._ensure_numeric({}) + with pytest.raises(TypeError, match=msg): + nanops._ensure_numeric([]) + + +class TestNanvarFixedValues: + + # xref GH10242 + + def setup_method(self, method): + # Samples from a normal distribution. + self.variance = variance = 3.0 + self.samples = self.prng.normal(scale=variance ** 0.5, size=100000) + + def test_nanvar_all_finite(self): + samples = self.samples + actual_variance = nanops.nanvar(samples) + tm.assert_almost_equal(actual_variance, self.variance, rtol=1e-2) + + def test_nanvar_nans(self): + samples = np.nan * np.ones(2 * self.samples.shape[0]) + samples[::2] = self.samples + + actual_variance = nanops.nanvar(samples, skipna=True) + tm.assert_almost_equal(actual_variance, self.variance, rtol=1e-2) + + actual_variance = nanops.nanvar(samples, skipna=False) + tm.assert_almost_equal(actual_variance, np.nan, rtol=1e-2) + + def test_nanstd_nans(self): + samples = np.nan * np.ones(2 * self.samples.shape[0]) + samples[::2] = self.samples + + actual_std = nanops.nanstd(samples, skipna=True) + tm.assert_almost_equal(actual_std, self.variance ** 0.5, rtol=1e-2) + + actual_std = nanops.nanvar(samples, skipna=False) + tm.assert_almost_equal(actual_std, np.nan, rtol=1e-2) + + def test_nanvar_axis(self): + # Generate some sample data. + samples_norm = self.samples + samples_unif = self.prng.uniform(size=samples_norm.shape[0]) + samples = np.vstack([samples_norm, samples_unif]) + + actual_variance = nanops.nanvar(samples, axis=1) + tm.assert_almost_equal( + actual_variance, np.array([self.variance, 1.0 / 12]), rtol=1e-2 + ) + + def test_nanvar_ddof(self): + n = 5 + samples = self.prng.uniform(size=(10000, n + 1)) + samples[:, -1] = np.nan # Force use of our own algorithm. + + variance_0 = nanops.nanvar(samples, axis=1, skipna=True, ddof=0).mean() + variance_1 = nanops.nanvar(samples, axis=1, skipna=True, ddof=1).mean() + variance_2 = nanops.nanvar(samples, axis=1, skipna=True, ddof=2).mean() + + # The unbiased estimate. + var = 1.0 / 12 + tm.assert_almost_equal(variance_1, var, rtol=1e-2) + + # The underestimated variance. + tm.assert_almost_equal(variance_0, (n - 1.0) / n * var, rtol=1e-2) + + # The overestimated variance. + tm.assert_almost_equal(variance_2, (n - 1.0) / (n - 2.0) * var, rtol=1e-2) + + @pytest.mark.parametrize("axis", range(2)) + @pytest.mark.parametrize("ddof", range(3)) + def test_ground_truth(self, axis, ddof): + # Test against values that were precomputed with Numpy. + samples = np.empty((4, 4)) + samples[:3, :3] = np.array( + [ + [0.97303362, 0.21869576, 0.55560287], + [0.72980153, 0.03109364, 0.99155171], + [0.09317602, 0.60078248, 0.15871292], + ] + ) + samples[3] = samples[:, 3] = np.nan + + # Actual variances along axis=0, 1 for ddof=0, 1, 2 + variance = np.array( + [ + [ + [0.13762259, 0.05619224, 0.11568816], + [0.20643388, 0.08428837, 0.17353224], + [0.41286776, 0.16857673, 0.34706449], + ], + [ + [0.09519783, 0.16435395, 0.05082054], + [0.14279674, 0.24653093, 0.07623082], + [0.28559348, 0.49306186, 0.15246163], + ], + ] + ) + + # Test nanvar. + var = nanops.nanvar(samples, skipna=True, axis=axis, ddof=ddof) + tm.assert_almost_equal(var[:3], variance[axis, ddof]) + assert np.isnan(var[3]) + + # Test nanstd. + std = nanops.nanstd(samples, skipna=True, axis=axis, ddof=ddof) + tm.assert_almost_equal(std[:3], variance[axis, ddof] ** 0.5) + assert np.isnan(std[3]) + + @pytest.mark.parametrize("ddof", range(3)) + def test_nanstd_roundoff(self, ddof): + # Regression test for GH 10242 (test data taken from GH 10489). Ensure + # that variance is stable. + data = Series(766897346 * np.ones(10)) + result = data.std(ddof=ddof) + assert result == 0.0 + + @property + def prng(self): + return np.random.RandomState(1234) + + +class TestNanskewFixedValues: + + # xref GH 11974 + + def setup_method(self, method): + # Test data + skewness value (computed with scipy.stats.skew) + self.samples = np.sin(np.linspace(0, 1, 200)) + self.actual_skew = -0.1875895205961754 + + def test_constant_series(self): + # xref GH 11974 + for val in [3075.2, 3075.3, 3075.5]: + data = val * np.ones(300) + skew = nanops.nanskew(data) + assert skew == 0.0 + + def test_all_finite(self): + alpha, beta = 0.3, 0.1 + left_tailed = self.prng.beta(alpha, beta, size=100) + assert nanops.nanskew(left_tailed) < 0 + + alpha, beta = 0.1, 0.3 + right_tailed = self.prng.beta(alpha, beta, size=100) + assert nanops.nanskew(right_tailed) > 0 + + def test_ground_truth(self): + skew = nanops.nanskew(self.samples) + tm.assert_almost_equal(skew, self.actual_skew) + + def test_axis(self): + samples = np.vstack([self.samples, np.nan * np.ones(len(self.samples))]) + skew = nanops.nanskew(samples, axis=1) + tm.assert_almost_equal(skew, np.array([self.actual_skew, np.nan])) + + def test_nans(self): + samples = np.hstack([self.samples, np.nan]) + skew = nanops.nanskew(samples, skipna=False) + assert np.isnan(skew) + + def test_nans_skipna(self): + samples = np.hstack([self.samples, np.nan]) + skew = nanops.nanskew(samples, skipna=True) + tm.assert_almost_equal(skew, self.actual_skew) + + @property + def prng(self): + return np.random.RandomState(1234) + + +class TestNankurtFixedValues: + + # xref GH 11974 + + def setup_method(self, method): + # Test data + kurtosis value (computed with scipy.stats.kurtosis) + self.samples = np.sin(np.linspace(0, 1, 200)) + self.actual_kurt = -1.2058303433799713 + + @pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5]) + def test_constant_series(self, val): + # xref GH 11974 + data = val * np.ones(300) + kurt = nanops.nankurt(data) + assert kurt == 0.0 + + def test_all_finite(self): + alpha, beta = 0.3, 0.1 + left_tailed = self.prng.beta(alpha, beta, size=100) + assert nanops.nankurt(left_tailed) < 0 + + alpha, beta = 0.1, 0.3 + right_tailed = self.prng.beta(alpha, beta, size=100) + assert nanops.nankurt(right_tailed) > 0 + + def test_ground_truth(self): + kurt = nanops.nankurt(self.samples) + tm.assert_almost_equal(kurt, self.actual_kurt) + + def test_axis(self): + samples = np.vstack([self.samples, np.nan * np.ones(len(self.samples))]) + kurt = nanops.nankurt(samples, axis=1) + tm.assert_almost_equal(kurt, np.array([self.actual_kurt, np.nan])) + + def test_nans(self): + samples = np.hstack([self.samples, np.nan]) + kurt = nanops.nankurt(samples, skipna=False) + assert np.isnan(kurt) + + def test_nans_skipna(self): + samples = np.hstack([self.samples, np.nan]) + kurt = nanops.nankurt(samples, skipna=True) + tm.assert_almost_equal(kurt, self.actual_kurt) + + @property + def prng(self): + return np.random.RandomState(1234) + + +class TestDatetime64NaNOps: + # Enabling mean changes the behavior of DataFrame.mean + # See https://github.com/pandas-dev/pandas/issues/24752 + def test_nanmean(self): + dti = pd.date_range("2016-01-01", periods=3) + expected = dti[1] + + for obj in [dti, DatetimeArray(dti), Series(dti)]: + result = nanops.nanmean(obj) + assert result == expected + + dti2 = dti.insert(1, pd.NaT) + + for obj in [dti2, DatetimeArray(dti2), Series(dti2)]: + result = nanops.nanmean(obj) + assert result == expected + + @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) + def test_nanmean_skipna_false(self, dtype): + arr = np.arange(12).astype(np.int64).view(dtype).reshape(4, 3) + + arr[-1, -1] = "NaT" + + result = nanops.nanmean(arr, skipna=False) + assert result is pd.NaT + + result = nanops.nanmean(arr, axis=0, skipna=False) + expected = np.array([4, 5, "NaT"], dtype=arr.dtype) + tm.assert_numpy_array_equal(result, expected) + + result = nanops.nanmean(arr, axis=1, skipna=False) + expected = np.array([arr[0, 1], arr[1, 1], arr[2, 1], arr[-1, -1]]) + tm.assert_numpy_array_equal(result, expected) + + +def test_use_bottleneck(): + + if nanops._BOTTLENECK_INSTALLED: + + pd.set_option("use_bottleneck", True) + assert pd.get_option("use_bottleneck") + + pd.set_option("use_bottleneck", False) + assert not pd.get_option("use_bottleneck") + + pd.set_option("use_bottleneck", use_bn) + + +@pytest.mark.parametrize( + "numpy_op, expected", + [ + (np.sum, 10), + (np.nansum, 10), + (np.mean, 2.5), + (np.nanmean, 2.5), + (np.median, 2.5), + (np.nanmedian, 2.5), + (np.min, 1), + (np.max, 4), + (np.nanmin, 1), + (np.nanmax, 4), + ], +) +def test_numpy_ops(numpy_op, expected): + # GH8383 + result = numpy_op(Series([1, 2, 3, 4])) + assert result == expected + + +@pytest.mark.parametrize( + "operation", + [ + nanops.nanany, + nanops.nanall, + nanops.nansum, + nanops.nanmean, + nanops.nanmedian, + nanops.nanstd, + nanops.nanvar, + nanops.nansem, + nanops.nanargmax, + nanops.nanargmin, + nanops.nanmax, + nanops.nanmin, + nanops.nanskew, + nanops.nankurt, + nanops.nanprod, + ], +) +def test_nanops_independent_of_mask_param(operation): + # GH22764 + s = Series([1, 2, np.nan, 3, np.nan, 4]) + mask = s.isna() + median_expected = operation(s) + median_result = operation(s, mask=mask) + assert median_expected == median_result diff --git a/.local/lib/python3.8/site-packages/pandas/tests/test_optional_dependency.py b/.local/lib/python3.8/site-packages/pandas/tests/test_optional_dependency.py new file mode 100644 index 0000000..c1d1948 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/test_optional_dependency.py @@ -0,0 +1,86 @@ +import sys +import types + +import pytest + +from pandas.compat._optional import ( + VERSIONS, + import_optional_dependency, +) + +import pandas._testing as tm + + +def test_import_optional(): + match = "Missing .*notapackage.* pip .* conda .* notapackage" + with pytest.raises(ImportError, match=match) as exc_info: + import_optional_dependency("notapackage") + # The original exception should be there as context: + assert isinstance(exc_info.value.__context__, ImportError) + + result = import_optional_dependency("notapackage", errors="ignore") + assert result is None + + +def test_xlrd_version_fallback(): + pytest.importorskip("xlrd") + import_optional_dependency("xlrd") + + +def test_bad_version(monkeypatch): + name = "fakemodule" + module = types.ModuleType(name) + module.__version__ = "0.9.0" + sys.modules[name] = module + monkeypatch.setitem(VERSIONS, name, "1.0.0") + + match = "Pandas requires .*1.0.0.* of .fakemodule.*'0.9.0'" + with pytest.raises(ImportError, match=match): + import_optional_dependency("fakemodule") + + # Test min_version parameter + result = import_optional_dependency("fakemodule", min_version="0.8") + assert result is module + + with tm.assert_produces_warning(UserWarning): + result = import_optional_dependency("fakemodule", errors="warn") + assert result is None + + module.__version__ = "1.0.0" # exact match is OK + result = import_optional_dependency("fakemodule") + assert result is module + + +def test_submodule(monkeypatch): + # Create a fake module with a submodule + name = "fakemodule" + module = types.ModuleType(name) + module.__version__ = "0.9.0" + sys.modules[name] = module + sub_name = "submodule" + submodule = types.ModuleType(sub_name) + setattr(module, sub_name, submodule) + sys.modules[f"{name}.{sub_name}"] = submodule + monkeypatch.setitem(VERSIONS, name, "1.0.0") + + match = "Pandas requires .*1.0.0.* of .fakemodule.*'0.9.0'" + with pytest.raises(ImportError, match=match): + import_optional_dependency("fakemodule.submodule") + + with tm.assert_produces_warning(UserWarning): + result = import_optional_dependency("fakemodule.submodule", errors="warn") + assert result is None + + module.__version__ = "1.0.0" # exact match is OK + result = import_optional_dependency("fakemodule.submodule") + assert result is submodule + + +def test_no_version_raises(monkeypatch): + name = "fakemodule" + module = types.ModuleType(name) + sys.modules[name] = module + monkeypatch.setitem(VERSIONS, name, "1.0.0") + + with pytest.raises(ImportError, match="Can't determine .* fakemodule"): + import_optional_dependency(name) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/test_register_accessor.py b/.local/lib/python3.8/site-packages/pandas/tests/test_register_accessor.py new file mode 100644 index 0000000..6e22424 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/test_register_accessor.py @@ -0,0 +1,109 @@ +import contextlib + +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core import accessor + + +def test_dirname_mixin(): + # GH37173 + + class X(accessor.DirNamesMixin): + x = 1 + y: int + + def __init__(self): + self.z = 3 + + result = [attr_name for attr_name in dir(X()) if not attr_name.startswith("_")] + + assert result == ["x", "z"] + + +@contextlib.contextmanager +def ensure_removed(obj, attr): + """Ensure that an attribute added to 'obj' during the test is + removed when we're done + """ + try: + yield + finally: + try: + delattr(obj, attr) + except AttributeError: + pass + obj._accessors.discard(attr) + + +class MyAccessor: + def __init__(self, obj): + self.obj = obj + self.item = "item" + + @property + def prop(self): + return self.item + + def method(self): + return self.item + + +@pytest.mark.parametrize( + "obj, registrar", + [ + (pd.Series, pd.api.extensions.register_series_accessor), + (pd.DataFrame, pd.api.extensions.register_dataframe_accessor), + (pd.Index, pd.api.extensions.register_index_accessor), + ], +) +def test_register(obj, registrar): + with ensure_removed(obj, "mine"): + before = set(dir(obj)) + registrar("mine")(MyAccessor) + o = obj([]) if obj is not pd.Series else obj([], dtype=object) + assert o.mine.prop == "item" + after = set(dir(obj)) + assert (before ^ after) == {"mine"} + assert "mine" in obj._accessors + + +def test_accessor_works(): + with ensure_removed(pd.Series, "mine"): + pd.api.extensions.register_series_accessor("mine")(MyAccessor) + + s = pd.Series([1, 2]) + assert s.mine.obj is s + + assert s.mine.prop == "item" + assert s.mine.method() == "item" + + +def test_overwrite_warns(): + # Need to restore mean + mean = pd.Series.mean + try: + with tm.assert_produces_warning(UserWarning) as w: + pd.api.extensions.register_series_accessor("mean")(MyAccessor) + s = pd.Series([1, 2]) + assert s.mean.prop == "item" + msg = str(w[0].message) + assert "mean" in msg + assert "MyAccessor" in msg + assert "Series" in msg + finally: + pd.Series.mean = mean + + +def test_raises_attribute_error(): + + with ensure_removed(pd.Series, "bad"): + + @pd.api.extensions.register_series_accessor("bad") + class Bad: + def __init__(self, data): + raise AttributeError("whoops") + + with pytest.raises(AttributeError, match="whoops"): + pd.Series([], dtype=object).bad diff --git a/.local/lib/python3.8/site-packages/pandas/tests/test_sorting.py b/.local/lib/python3.8/site-packages/pandas/tests/test_sorting.py new file mode 100644 index 0000000..37820fe --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/test_sorting.py @@ -0,0 +1,512 @@ +from collections import defaultdict +from datetime import datetime +from itertools import product + +import numpy as np +import pytest + +from pandas.compat import ( + is_ci_environment, + is_platform_windows, +) + +from pandas import ( + DataFrame, + MultiIndex, + Series, + array, + concat, + merge, +) +import pandas._testing as tm +from pandas.core.algorithms import safe_sort +import pandas.core.common as com +from pandas.core.sorting import ( + decons_group_index, + get_group_index, + is_int64_overflow_possible, + lexsort_indexer, + nargsort, +) + + +@pytest.fixture +def left_right(): + low, high, n = -1 << 10, 1 << 10, 1 << 20 + left = DataFrame(np.random.randint(low, high, (n, 7)), columns=list("ABCDEFG")) + left["left"] = left.sum(axis=1) + + # one-2-one match + i = np.random.permutation(len(left)) + right = left.iloc[i].copy() + right.columns = right.columns[:-1].tolist() + ["right"] + right.index = np.arange(len(right)) + right["right"] *= -1 + return left, right + + +class TestSorting: + @pytest.mark.slow + def test_int64_overflow(self): + B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500))) + A = np.arange(2500) + df = DataFrame( + { + "A": A, + "B": B, + "C": A, + "D": B, + "E": A, + "F": B, + "G": A, + "H": B, + "values": np.random.randn(2500), + } + ) + + lg = df.groupby(["A", "B", "C", "D", "E", "F", "G", "H"]) + rg = df.groupby(["H", "G", "F", "E", "D", "C", "B", "A"]) + + left = lg.sum()["values"] + right = rg.sum()["values"] + + exp_index, _ = left.index.sortlevel() + tm.assert_index_equal(left.index, exp_index) + + exp_index, _ = right.index.sortlevel(0) + tm.assert_index_equal(right.index, exp_index) + + tups = list(map(tuple, df[["A", "B", "C", "D", "E", "F", "G", "H"]].values)) + tups = com.asarray_tuplesafe(tups) + + expected = df.groupby(tups).sum()["values"] + + for k, v in expected.items(): + assert left[k] == right[k[::-1]] + assert left[k] == v + assert len(left) == len(right) + + def test_int64_overflow_groupby_large_range(self): + # GH9096 + values = range(55109) + data = DataFrame.from_dict({"a": values, "b": values, "c": values, "d": values}) + grouped = data.groupby(["a", "b", "c", "d"]) + assert len(grouped) == len(values) + + @pytest.mark.parametrize("agg", ["mean", "median"]) + def test_int64_overflow_groupby_large_df_shuffled(self, agg): + arr = np.random.randint(-1 << 12, 1 << 12, (1 << 15, 5)) + i = np.random.choice(len(arr), len(arr) * 4) + arr = np.vstack((arr, arr[i])) # add some duplicate rows + + i = np.random.permutation(len(arr)) + arr = arr[i] # shuffle rows + + df = DataFrame(arr, columns=list("abcde")) + df["jim"], df["joe"] = np.random.randn(2, len(df)) * 10 + gr = df.groupby(list("abcde")) + + # verify this is testing what it is supposed to test! + assert is_int64_overflow_possible(gr.grouper.shape) + + # manually compute groupings + jim, joe = defaultdict(list), defaultdict(list) + for key, a, b in zip(map(tuple, arr), df["jim"], df["joe"]): + jim[key].append(a) + joe[key].append(b) + + assert len(gr) == len(jim) + mi = MultiIndex.from_tuples(jim.keys(), names=list("abcde")) + + f = lambda a: np.fromiter(map(getattr(np, agg), a), dtype="f8") + arr = np.vstack((f(jim.values()), f(joe.values()))).T + res = DataFrame(arr, columns=["jim", "joe"], index=mi).sort_index() + + tm.assert_frame_equal(getattr(gr, agg)(), res) + + @pytest.mark.parametrize( + "order, na_position, exp", + [ + [ + True, + "last", + list(range(5, 105)) + list(range(5)) + list(range(105, 110)), + ], + [ + True, + "first", + list(range(5)) + list(range(105, 110)) + list(range(5, 105)), + ], + [ + False, + "last", + list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)), + ], + [ + False, + "first", + list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)), + ], + ], + ) + def test_lexsort_indexer(self, order, na_position, exp): + keys = [[np.nan] * 5 + list(range(100)) + [np.nan] * 5] + result = lexsort_indexer(keys, orders=order, na_position=na_position) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + @pytest.mark.parametrize( + "ascending, na_position, exp, box", + [ + [ + True, + "last", + list(range(5, 105)) + list(range(5)) + list(range(105, 110)), + list, + ], + [ + True, + "first", + list(range(5)) + list(range(105, 110)) + list(range(5, 105)), + list, + ], + [ + False, + "last", + list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)), + list, + ], + [ + False, + "first", + list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)), + list, + ], + [ + True, + "last", + list(range(5, 105)) + list(range(5)) + list(range(105, 110)), + lambda x: np.array(x, dtype="O"), + ], + [ + True, + "first", + list(range(5)) + list(range(105, 110)) + list(range(5, 105)), + lambda x: np.array(x, dtype="O"), + ], + [ + False, + "last", + list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)), + lambda x: np.array(x, dtype="O"), + ], + [ + False, + "first", + list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)), + lambda x: np.array(x, dtype="O"), + ], + ], + ) + def test_nargsort(self, ascending, na_position, exp, box): + # list places NaNs last, np.array(..., dtype="O") may not place NaNs first + items = box([np.nan] * 5 + list(range(100)) + [np.nan] * 5) + + # mergesort is the most difficult to get right because we want it to be + # stable. + + # According to numpy/core/tests/test_multiarray, """The number of + # sorted items must be greater than ~50 to check the actual algorithm + # because quick and merge sort fall over to insertion sort for small + # arrays.""" + + result = nargsort( + items, kind="mergesort", ascending=ascending, na_position=na_position + ) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + +class TestMerge: + def test_int64_overflow_outer_merge(self): + # #2690, combinatorial explosion + df1 = DataFrame(np.random.randn(1000, 7), columns=list("ABCDEF") + ["G1"]) + df2 = DataFrame(np.random.randn(1000, 7), columns=list("ABCDEF") + ["G2"]) + result = merge(df1, df2, how="outer") + assert len(result) == 2000 + + @pytest.mark.slow + def test_int64_overflow_check_sum_col(self, left_right): + left, right = left_right + + out = merge(left, right, how="outer") + assert len(out) == len(left) + tm.assert_series_equal(out["left"], -out["right"], check_names=False) + result = out.iloc[:, :-2].sum(axis=1) + tm.assert_series_equal(out["left"], result, check_names=False) + assert result.name is None + + @pytest.mark.slow + @pytest.mark.parametrize("how", ["left", "right", "outer", "inner"]) + def test_int64_overflow_how_merge(self, left_right, how): + left, right = left_right + + out = merge(left, right, how="outer") + out.sort_values(out.columns.tolist(), inplace=True) + out.index = np.arange(len(out)) + tm.assert_frame_equal(out, merge(left, right, how=how, sort=True)) + + @pytest.mark.slow + def test_int64_overflow_sort_false_order(self, left_right): + left, right = left_right + + # check that left merge w/ sort=False maintains left frame order + out = merge(left, right, how="left", sort=False) + tm.assert_frame_equal(left, out[left.columns.tolist()]) + + out = merge(right, left, how="left", sort=False) + tm.assert_frame_equal(right, out[right.columns.tolist()]) + + @pytest.mark.slow + @pytest.mark.parametrize("how", ["left", "right", "outer", "inner"]) + @pytest.mark.parametrize("sort", [True, False]) + def test_int64_overflow_one_to_many_none_match(self, how, sort): + # one-2-many/none match + low, high, n = -1 << 10, 1 << 10, 1 << 11 + left = DataFrame( + np.random.randint(low, high, (n, 7)).astype("int64"), + columns=list("ABCDEFG"), + ) + + # confirm that this is checking what it is supposed to check + shape = left.apply(Series.nunique).values + assert is_int64_overflow_possible(shape) + + # add duplicates to left frame + left = concat([left, left], ignore_index=True) + + right = DataFrame( + np.random.randint(low, high, (n // 2, 7)).astype("int64"), + columns=list("ABCDEFG"), + ) + + # add duplicates & overlap with left to the right frame + i = np.random.choice(len(left), n) + right = concat([right, right, left.iloc[i]], ignore_index=True) + + left["left"] = np.random.randn(len(left)) + right["right"] = np.random.randn(len(right)) + + # shuffle left & right frames + i = np.random.permutation(len(left)) + left = left.iloc[i].copy() + left.index = np.arange(len(left)) + + i = np.random.permutation(len(right)) + right = right.iloc[i].copy() + right.index = np.arange(len(right)) + + # manually compute outer merge + ldict, rdict = defaultdict(list), defaultdict(list) + + for idx, row in left.set_index(list("ABCDEFG")).iterrows(): + ldict[idx].append(row["left"]) + + for idx, row in right.set_index(list("ABCDEFG")).iterrows(): + rdict[idx].append(row["right"]) + + vals = [] + for k, lval in ldict.items(): + rval = rdict.get(k, [np.nan]) + for lv, rv in product(lval, rval): + vals.append( + k + + ( + lv, + rv, + ) + ) + + for k, rval in rdict.items(): + if k not in ldict: + for rv in rval: + vals.append( + k + + ( + np.nan, + rv, + ) + ) + + def align(df): + df = df.sort_values(df.columns.tolist()) + df.index = np.arange(len(df)) + return df + + out = DataFrame(vals, columns=list("ABCDEFG") + ["left", "right"]) + out = align(out) + + jmask = { + "left": out["left"].notna(), + "right": out["right"].notna(), + "inner": out["left"].notna() & out["right"].notna(), + "outer": np.ones(len(out), dtype="bool"), + } + + mask = jmask[how] + frame = align(out[mask].copy()) + assert mask.all() ^ mask.any() or how == "outer" + + res = merge(left, right, how=how, sort=sort) + if sort: + kcols = list("ABCDEFG") + tm.assert_frame_equal( + res[kcols].copy(), res[kcols].sort_values(kcols, kind="mergesort") + ) + + # as in GH9092 dtypes break with outer/right join + # 2021-12-18: dtype does not break anymore + tm.assert_frame_equal(frame, align(res)) + + +@pytest.mark.parametrize( + "codes_list, shape", + [ + [ + [ + np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100).astype(np.int64), + np.tile([0, 2, 4, 3, 0, 1, 2, 3], 100).astype(np.int64), + np.tile([5, 1, 0, 2, 3, 0, 5, 4], 100).astype(np.int64), + ], + (4, 5, 6), + ], + [ + [ + np.tile(np.arange(10000, dtype=np.int64), 5), + np.tile(np.arange(10000, dtype=np.int64), 5), + ], + (10000, 10000), + ], + ], +) +def test_decons(codes_list, shape): + group_index = get_group_index(codes_list, shape, sort=True, xnull=True) + codes_list2 = decons_group_index(group_index, shape) + + for a, b in zip(codes_list, codes_list2): + tm.assert_numpy_array_equal(a, b) + + +class TestSafeSort: + @pytest.mark.parametrize( + "arg, exp", + [ + [[3, 1, 2, 0, 4], [0, 1, 2, 3, 4]], + [list("baaacb"), np.array(list("aaabbc"), dtype=object)], + [[], []], + ], + ) + def test_basic_sort(self, arg, exp): + result = safe_sort(arg) + expected = np.array(exp) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("verify", [True, False]) + @pytest.mark.parametrize( + "codes, exp_codes, na_sentinel", + [ + [[0, 1, 1, 2, 3, 0, -1, 4], [3, 1, 1, 2, 0, 3, -1, 4], -1], + [[0, 1, 1, 2, 3, 0, 99, 4], [3, 1, 1, 2, 0, 3, 99, 4], 99], + [[], [], -1], + ], + ) + def test_codes(self, verify, codes, exp_codes, na_sentinel): + values = [3, 1, 2, 0, 4] + expected = np.array([0, 1, 2, 3, 4]) + + result, result_codes = safe_sort( + values, codes, na_sentinel=na_sentinel, verify=verify + ) + expected_codes = np.array(exp_codes, dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_codes, expected_codes) + + @pytest.mark.skipif( + is_platform_windows() and is_ci_environment(), + reason="In CI environment can crash thread with: " + "Windows fatal exception: access violation", + ) + @pytest.mark.parametrize("na_sentinel", [-1, 99]) + def test_codes_out_of_bound(self, na_sentinel): + values = [3, 1, 2, 0, 4] + expected = np.array([0, 1, 2, 3, 4]) + + # out of bound indices + codes = [0, 101, 102, 2, 3, 0, 99, 4] + result, result_codes = safe_sort(values, codes, na_sentinel=na_sentinel) + expected_codes = np.array( + [3, na_sentinel, na_sentinel, 2, 0, 3, na_sentinel, 4], dtype=np.intp + ) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_codes, expected_codes) + + @pytest.mark.parametrize("box", [lambda x: np.array(x, dtype=object), list]) + def test_mixed_integer(self, box): + values = box(["b", 1, 0, "a", 0, "b"]) + result = safe_sort(values) + expected = np.array([0, 0, 1, "a", "b", "b"], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_mixed_integer_with_codes(self): + values = np.array(["b", 1, 0, "a"], dtype=object) + codes = [0, 1, 2, 3, 0, -1, 1] + result, result_codes = safe_sort(values, codes) + expected = np.array([0, 1, "a", "b"], dtype=object) + expected_codes = np.array([3, 1, 0, 2, 3, -1, 1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_codes, expected_codes) + + def test_unsortable(self): + # GH 13714 + arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object) + msg = "'[<>]' not supported between instances of .*" + with pytest.raises(TypeError, match=msg): + safe_sort(arr) + + @pytest.mark.parametrize( + "arg, codes, err, msg", + [ + [1, None, TypeError, "Only list-like objects are allowed"], + [[0, 1, 2], 1, TypeError, "Only list-like objects or None"], + [[0, 1, 2, 1], [0, 1], ValueError, "values should be unique"], + ], + ) + def test_exceptions(self, arg, codes, err, msg): + with pytest.raises(err, match=msg): + safe_sort(values=arg, codes=codes) + + @pytest.mark.parametrize( + "arg, exp", [[[1, 3, 2], [1, 2, 3]], [[1, 3, np.nan, 2], [1, 2, 3, np.nan]]] + ) + def test_extension_array(self, arg, exp): + a = array(arg, dtype="Int64") + result = safe_sort(a) + expected = array(exp, dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize("verify", [True, False]) + @pytest.mark.parametrize("na_sentinel", [-1, 99]) + def test_extension_array_codes(self, verify, na_sentinel): + a = array([1, 3, 2], dtype="Int64") + result, codes = safe_sort( + a, [0, 1, na_sentinel, 2], na_sentinel=na_sentinel, verify=verify + ) + expected_values = array([1, 2, 3], dtype="Int64") + expected_codes = np.array([0, 2, na_sentinel, 1], dtype=np.intp) + tm.assert_extension_array_equal(result, expected_values) + tm.assert_numpy_array_equal(codes, expected_codes) + + +def test_mixed_str_nan(): + values = np.array(["b", np.nan, "a", "b"], dtype=object) + result = safe_sort(values) + expected = np.array([np.nan, "a", "b", "b"], dtype=object) + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/test_take.py b/.local/lib/python3.8/site-packages/pandas/tests/test_take.py new file mode 100644 index 0000000..f0737f7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/test_take.py @@ -0,0 +1,334 @@ +from datetime import datetime +import re + +import numpy as np +import pytest + +from pandas._libs import iNaT + +import pandas._testing as tm +import pandas.core.algorithms as algos + + +@pytest.fixture(params=[True, False]) +def writeable(request): + return request.param + + +# Check that take_nd works both with writeable arrays +# (in which case fast typed memory-views implementation) +# and read-only arrays alike. +@pytest.fixture( + params=[ + (np.float64, True), + (np.float32, True), + (np.uint64, False), + (np.uint32, False), + (np.uint16, False), + (np.uint8, False), + (np.int64, False), + (np.int32, False), + (np.int16, False), + (np.int8, False), + (np.object_, True), + (np.bool_, False), + ] +) +def dtype_can_hold_na(request): + return request.param + + +@pytest.fixture( + params=[ + (np.int8, np.int16(127), np.int8), + (np.int8, np.int16(128), np.int16), + (np.int32, 1, np.int32), + (np.int32, 2.0, np.float64), + (np.int32, 3.0 + 4.0j, np.complex128), + (np.int32, True, np.object_), + (np.int32, "", np.object_), + (np.float64, 1, np.float64), + (np.float64, 2.0, np.float64), + (np.float64, 3.0 + 4.0j, np.complex128), + (np.float64, True, np.object_), + (np.float64, "", np.object_), + (np.complex128, 1, np.complex128), + (np.complex128, 2.0, np.complex128), + (np.complex128, 3.0 + 4.0j, np.complex128), + (np.complex128, True, np.object_), + (np.complex128, "", np.object_), + (np.bool_, 1, np.object_), + (np.bool_, 2.0, np.object_), + (np.bool_, 3.0 + 4.0j, np.object_), + (np.bool_, True, np.bool_), + (np.bool_, "", np.object_), + ] +) +def dtype_fill_out_dtype(request): + return request.param + + +class TestTake: + # Standard incompatible fill error. + fill_error = re.compile("Incompatible type for fill_value") + + def test_1d_fill_nonna(self, dtype_fill_out_dtype): + dtype, fill_value, out_dtype = dtype_fill_out_dtype + data = np.random.randint(0, 2, 4).astype(dtype) + indexer = [2, 1, 0, -1] + + result = algos.take_nd(data, indexer, fill_value=fill_value) + assert (result[[0, 1, 2]] == data[[2, 1, 0]]).all() + assert result[3] == fill_value + assert result.dtype == out_dtype + + indexer = [2, 1, 0, 1] + + result = algos.take_nd(data, indexer, fill_value=fill_value) + assert (result[[0, 1, 2, 3]] == data[indexer]).all() + assert result.dtype == dtype + + def test_2d_fill_nonna(self, dtype_fill_out_dtype): + dtype, fill_value, out_dtype = dtype_fill_out_dtype + data = np.random.randint(0, 2, (5, 3)).astype(dtype) + indexer = [2, 1, 0, -1] + + result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) + assert (result[[0, 1, 2], :] == data[[2, 1, 0], :]).all() + assert (result[3, :] == fill_value).all() + assert result.dtype == out_dtype + + result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) + assert (result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all() + assert (result[:, 3] == fill_value).all() + assert result.dtype == out_dtype + + indexer = [2, 1, 0, 1] + result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) + assert (result[[0, 1, 2, 3], :] == data[indexer, :]).all() + assert result.dtype == dtype + + result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) + assert (result[:, [0, 1, 2, 3]] == data[:, indexer]).all() + assert result.dtype == dtype + + def test_3d_fill_nonna(self, dtype_fill_out_dtype): + dtype, fill_value, out_dtype = dtype_fill_out_dtype + + data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype) + indexer = [2, 1, 0, -1] + + result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) + assert (result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all() + assert (result[3, :, :] == fill_value).all() + assert result.dtype == out_dtype + + result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) + assert (result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all() + assert (result[:, 3, :] == fill_value).all() + assert result.dtype == out_dtype + + result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value) + assert (result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all() + assert (result[:, :, 3] == fill_value).all() + assert result.dtype == out_dtype + + indexer = [2, 1, 0, 1] + result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) + assert (result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all() + assert result.dtype == dtype + + result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) + assert (result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all() + assert result.dtype == dtype + + result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value) + assert (result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all() + assert result.dtype == dtype + + def test_1d_other_dtypes(self): + arr = np.random.randn(10).astype(np.float32) + + indexer = [1, 2, 3, -1] + result = algos.take_nd(arr, indexer) + expected = arr.take(indexer) + expected[-1] = np.nan + tm.assert_almost_equal(result, expected) + + def test_2d_other_dtypes(self): + arr = np.random.randn(10, 5).astype(np.float32) + + indexer = [1, 2, 3, -1] + + # axis=0 + result = algos.take_nd(arr, indexer, axis=0) + expected = arr.take(indexer, axis=0) + expected[-1] = np.nan + tm.assert_almost_equal(result, expected) + + # axis=1 + result = algos.take_nd(arr, indexer, axis=1) + expected = arr.take(indexer, axis=1) + expected[:, -1] = np.nan + tm.assert_almost_equal(result, expected) + + def test_1d_bool(self): + arr = np.array([0, 1, 0], dtype=bool) + + result = algos.take_nd(arr, [0, 2, 2, 1]) + expected = arr.take([0, 2, 2, 1]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.take_nd(arr, [0, 2, -1]) + assert result.dtype == np.object_ + + def test_2d_bool(self): + arr = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 1]], dtype=bool) + + result = algos.take_nd(arr, [0, 2, 2, 1]) + expected = arr.take([0, 2, 2, 1], axis=0) + tm.assert_numpy_array_equal(result, expected) + + result = algos.take_nd(arr, [0, 2, 2, 1], axis=1) + expected = arr.take([0, 2, 2, 1], axis=1) + tm.assert_numpy_array_equal(result, expected) + + result = algos.take_nd(arr, [0, 2, -1]) + assert result.dtype == np.object_ + + def test_2d_float32(self): + arr = np.random.randn(4, 3).astype(np.float32) + indexer = [0, 2, -1, 1, -1] + + # axis=0 + result = algos.take_nd(arr, indexer, axis=0) + + expected = arr.take(indexer, axis=0) + expected[[2, 4], :] = np.nan + tm.assert_almost_equal(result, expected) + + # axis=1 + result = algos.take_nd(arr, indexer, axis=1) + expected = arr.take(indexer, axis=1) + expected[:, [2, 4]] = np.nan + tm.assert_almost_equal(result, expected) + + def test_2d_datetime64(self): + # 2005/01/01 - 2006/01/01 + arr = np.random.randint(11_045_376, 11_360_736, (5, 3)) * 100_000_000_000 + arr = arr.view(dtype="datetime64[ns]") + indexer = [0, 2, -1, 1, -1] + + # axis=0 + result = algos.take_nd(arr, indexer, axis=0) + expected = arr.take(indexer, axis=0) + expected.view(np.int64)[[2, 4], :] = iNaT + tm.assert_almost_equal(result, expected) + + result = algos.take_nd(arr, indexer, axis=0, fill_value=datetime(2007, 1, 1)) + expected = arr.take(indexer, axis=0) + expected[[2, 4], :] = datetime(2007, 1, 1) + tm.assert_almost_equal(result, expected) + + # axis=1 + result = algos.take_nd(arr, indexer, axis=1) + expected = arr.take(indexer, axis=1) + expected.view(np.int64)[:, [2, 4]] = iNaT + tm.assert_almost_equal(result, expected) + + result = algos.take_nd(arr, indexer, axis=1, fill_value=datetime(2007, 1, 1)) + expected = arr.take(indexer, axis=1) + expected[:, [2, 4]] = datetime(2007, 1, 1) + tm.assert_almost_equal(result, expected) + + def test_take_axis_0(self): + arr = np.arange(12).reshape(4, 3) + result = algos.take(arr, [0, -1]) + expected = np.array([[0, 1, 2], [9, 10, 11]]) + tm.assert_numpy_array_equal(result, expected) + + # allow_fill=True + result = algos.take(arr, [0, -1], allow_fill=True, fill_value=0) + expected = np.array([[0, 1, 2], [0, 0, 0]]) + tm.assert_numpy_array_equal(result, expected) + + def test_take_axis_1(self): + arr = np.arange(12).reshape(4, 3) + result = algos.take(arr, [0, -1], axis=1) + expected = np.array([[0, 2], [3, 5], [6, 8], [9, 11]]) + tm.assert_numpy_array_equal(result, expected) + + # allow_fill=True + result = algos.take(arr, [0, -1], axis=1, allow_fill=True, fill_value=0) + expected = np.array([[0, 0], [3, 0], [6, 0], [9, 0]]) + tm.assert_numpy_array_equal(result, expected) + + # GH#26976 make sure we validate along the correct axis + with pytest.raises(IndexError, match="indices are out-of-bounds"): + algos.take(arr, [0, 3], axis=1, allow_fill=True, fill_value=0) + + def test_take_non_hashable_fill_value(self): + arr = np.array([1, 2, 3]) + indexer = np.array([1, -1]) + with pytest.raises(ValueError, match="fill_value must be a scalar"): + algos.take(arr, indexer, allow_fill=True, fill_value=[1]) + + # with object dtype it is allowed + arr = np.array([1, 2, 3], dtype=object) + result = algos.take(arr, indexer, allow_fill=True, fill_value=[1]) + expected = np.array([2, [1]], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +class TestExtensionTake: + # The take method found in pd.api.extensions + + def test_bounds_check_large(self): + arr = np.array([1, 2]) + + msg = "indices are out-of-bounds" + with pytest.raises(IndexError, match=msg): + algos.take(arr, [2, 3], allow_fill=True) + + msg = "index 2 is out of bounds for( axis 0 with)? size 2" + with pytest.raises(IndexError, match=msg): + algos.take(arr, [2, 3], allow_fill=False) + + def test_bounds_check_small(self): + arr = np.array([1, 2, 3], dtype=np.int64) + indexer = [0, -1, -2] + + msg = r"'indices' contains values less than allowed \(-2 < -1\)" + with pytest.raises(ValueError, match=msg): + algos.take(arr, indexer, allow_fill=True) + + result = algos.take(arr, indexer) + expected = np.array([1, 3, 2], dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("allow_fill", [True, False]) + def test_take_empty(self, allow_fill): + arr = np.array([], dtype=np.int64) + # empty take is ok + result = algos.take(arr, [], allow_fill=allow_fill) + tm.assert_numpy_array_equal(arr, result) + + msg = "|".join( + [ + "cannot do a non-empty take from an empty axes.", + "indices are out-of-bounds", + ] + ) + with pytest.raises(IndexError, match=msg): + algos.take(arr, [0], allow_fill=allow_fill) + + def test_take_na_empty(self): + result = algos.take(np.array([]), [-1, -1], allow_fill=True, fill_value=0.0) + expected = np.array([0.0, 0.0]) + tm.assert_numpy_array_equal(result, expected) + + def test_take_coerces_list(self): + arr = [1, 2, 3] + result = algos.take(arr, [0, 0]) + expected = np.array([1, 1]) + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tools/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..fdbf3f6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/test_to_datetime.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/test_to_datetime.cpython-38.pyc new file mode 100644 index 0000000..ce69303 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/test_to_datetime.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/test_to_numeric.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/test_to_numeric.cpython-38.pyc new file mode 100644 index 0000000..270d52f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/test_to_numeric.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/test_to_time.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/test_to_time.cpython-38.pyc new file mode 100644 index 0000000..24c2fbe Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/test_to_time.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/test_to_timedelta.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/test_to_timedelta.cpython-38.pyc new file mode 100644 index 0000000..de398ed Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tools/__pycache__/test_to_timedelta.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tools/test_to_datetime.py b/.local/lib/python3.8/site-packages/pandas/tests/tools/test_to_datetime.py new file mode 100644 index 0000000..0a9d422 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tools/test_to_datetime.py @@ -0,0 +1,2719 @@ +""" test to_datetime """ + +import calendar +from collections import deque +from datetime import ( + datetime, + timedelta, +) +from decimal import Decimal +import locale + +from dateutil.parser import parse +from dateutil.tz.tz import tzoffset +import numpy as np +import pytest +import pytz + +from pandas._libs import tslib +from pandas._libs.tslibs import ( + iNaT, + parsing, +) +from pandas.errors import ( + OutOfBoundsDatetime, + OutOfBoundsTimedelta, +) +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_datetime64_ns_dtype + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + NaT, + Series, + Timestamp, + date_range, + isna, + to_datetime, +) +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray +from pandas.core.tools import datetimes as tools +from pandas.core.tools.datetimes import start_caching_at +from pandas.util.version import Version + + +@pytest.fixture(params=[True, False]) +def cache(request): + """ + cache keyword to pass to to_datetime. + """ + return request.param + + +class TestTimeConversionFormats: + @pytest.mark.parametrize("readonly", [True, False]) + def test_to_datetime_readonly(self, readonly): + # GH#34857 + arr = np.array([], dtype=object) + if readonly: + arr.setflags(write=False) + result = to_datetime(arr) + expected = to_datetime([]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("box", [Series, Index]) + @pytest.mark.parametrize( + "format, expected", + [ + [ + "%d/%m/%Y", + [Timestamp("20000101"), Timestamp("20000201"), Timestamp("20000301")], + ], + [ + "%m/%d/%Y", + [Timestamp("20000101"), Timestamp("20000102"), Timestamp("20000103")], + ], + ], + ) + def test_to_datetime_format(self, cache, box, format, expected): + values = box(["1/1/2000", "1/2/2000", "1/3/2000"]) + result = to_datetime(values, format=format, cache=cache) + expected = box(expected) + if isinstance(expected, Series): + tm.assert_series_equal(result, expected) + else: + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "arg, expected, format", + [ + ["1/1/2000", "20000101", "%d/%m/%Y"], + ["1/1/2000", "20000101", "%m/%d/%Y"], + ["1/2/2000", "20000201", "%d/%m/%Y"], + ["1/2/2000", "20000102", "%m/%d/%Y"], + ["1/3/2000", "20000301", "%d/%m/%Y"], + ["1/3/2000", "20000103", "%m/%d/%Y"], + ], + ) + def test_to_datetime_format_scalar(self, cache, arg, expected, format): + result = to_datetime(arg, format=format, cache=cache) + expected = Timestamp(expected) + assert result == expected + + def test_to_datetime_format_YYYYMMDD(self, cache): + ser = Series([19801222, 19801222] + [19810105] * 5) + expected = Series([Timestamp(x) for x in ser.apply(str)]) + + result = to_datetime(ser, format="%Y%m%d", cache=cache) + tm.assert_series_equal(result, expected) + + result = to_datetime(ser.apply(str), format="%Y%m%d", cache=cache) + tm.assert_series_equal(result, expected) + + def test_to_datetime_format_YYYYMMDD_with_nat(self, cache): + ser = Series([19801222, 19801222] + [19810105] * 5) + # with NaT + expected = Series( + [Timestamp("19801222"), Timestamp("19801222")] + [Timestamp("19810105")] * 5 + ) + expected[2] = np.nan + ser[2] = np.nan + + result = to_datetime(ser, format="%Y%m%d", cache=cache) + tm.assert_series_equal(result, expected) + + # string with NaT + ser2 = ser.apply(str) + ser2[2] = "nat" + result = to_datetime(ser2, format="%Y%m%d", cache=cache) + tm.assert_series_equal(result, expected) + + def test_to_datetime_format_YYYYMMDD_ignore(self, cache): + # coercion + # GH 7930 + ser = Series([20121231, 20141231, 99991231]) + result = to_datetime(ser, format="%Y%m%d", errors="ignore", cache=cache) + expected = Series( + [datetime(2012, 12, 31), datetime(2014, 12, 31), datetime(9999, 12, 31)], + dtype=object, + ) + tm.assert_series_equal(result, expected) + + def test_to_datetime_format_YYYYMMDD_coercion(self, cache): + # coercion + # GH 7930 + ser = Series([20121231, 20141231, 99991231]) + result = to_datetime(ser, format="%Y%m%d", errors="coerce", cache=cache) + expected = Series(["20121231", "20141231", "NaT"], dtype="M8[ns]") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "input_s", + [ + # Null values with Strings + ["19801222", "20010112", None], + ["19801222", "20010112", np.nan], + ["19801222", "20010112", NaT], + ["19801222", "20010112", "NaT"], + # Null values with Integers + [19801222, 20010112, None], + [19801222, 20010112, np.nan], + [19801222, 20010112, NaT], + [19801222, 20010112, "NaT"], + ], + ) + def test_to_datetime_format_YYYYMMDD_with_none(self, input_s): + # GH 30011 + # format='%Y%m%d' + # with None + expected = Series([Timestamp("19801222"), Timestamp("20010112"), NaT]) + result = Series(to_datetime(input_s, format="%Y%m%d")) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "input_s, expected", + [ + # NaN before strings with invalid date values + [ + Series(["19801222", np.nan, "20010012", "10019999"]), + Series([Timestamp("19801222"), np.nan, np.nan, np.nan]), + ], + # NaN after strings with invalid date values + [ + Series(["19801222", "20010012", "10019999", np.nan]), + Series([Timestamp("19801222"), np.nan, np.nan, np.nan]), + ], + # NaN before integers with invalid date values + [ + Series([20190813, np.nan, 20010012, 20019999]), + Series([Timestamp("20190813"), np.nan, np.nan, np.nan]), + ], + # NaN after integers with invalid date values + [ + Series([20190813, 20010012, np.nan, 20019999]), + Series([Timestamp("20190813"), np.nan, np.nan, np.nan]), + ], + ], + ) + def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected): + # GH 25512 + # format='%Y%m%d', errors='coerce' + result = to_datetime(input_s, format="%Y%m%d", errors="coerce") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data, format, expected", + [ + ([pd.NA], "%Y%m%d%H%M%S", DatetimeIndex(["NaT"])), + ([pd.NA], None, DatetimeIndex(["NaT"])), + ( + [pd.NA, "20210202202020"], + "%Y%m%d%H%M%S", + DatetimeIndex(["NaT", "2021-02-02 20:20:20"]), + ), + (["201010", pd.NA], "%y%m%d", DatetimeIndex(["2020-10-10", "NaT"])), + (["201010", pd.NA], "%d%m%y", DatetimeIndex(["2010-10-20", "NaT"])), + (["201010", pd.NA], None, DatetimeIndex(["2010-10-20", "NaT"])), + ([None, np.nan, pd.NA], None, DatetimeIndex(["NaT", "NaT", "NaT"])), + ([None, np.nan, pd.NA], "%Y%m%d", DatetimeIndex(["NaT", "NaT", "NaT"])), + ], + ) + def test_to_datetime_with_NA(self, data, format, expected): + # GH#42957 + result = to_datetime(data, format=format) + tm.assert_index_equal(result, expected) + + def test_to_datetime_format_integer(self, cache): + # GH 10178 + ser = Series([2000, 2001, 2002]) + expected = Series([Timestamp(x) for x in ser.apply(str)]) + + result = to_datetime(ser, format="%Y", cache=cache) + tm.assert_series_equal(result, expected) + + ser = Series([200001, 200105, 200206]) + expected = Series([Timestamp(x[:4] + "-" + x[4:]) for x in ser.apply(str)]) + + result = to_datetime(ser, format="%Y%m", cache=cache) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "int_date, expected", + [ + # valid date, length == 8 + [20121030, datetime(2012, 10, 30)], + # short valid date, length == 6 + [199934, datetime(1999, 3, 4)], + # long integer date partially parsed to datetime(2012,1,1), length > 8 + [2012010101, 2012010101], + # invalid date partially parsed to datetime(2012,9,9), length == 8 + [20129930, 20129930], + # short integer date partially parsed to datetime(2012,9,9), length < 8 + [2012993, 2012993], + # short invalid date, length == 4 + [2121, 2121], + ], + ) + def test_int_to_datetime_format_YYYYMMDD_typeerror(self, int_date, expected): + # GH 26583 + result = to_datetime(int_date, format="%Y%m%d", errors="ignore") + assert result == expected + + def test_to_datetime_format_microsecond(self, cache): + month_abbr = calendar.month_abbr[4] + val = f"01-{month_abbr}-2011 00:00:01.978" + + format = "%d-%b-%Y %H:%M:%S.%f" + result = to_datetime(val, format=format, cache=cache) + exp = datetime.strptime(val, format) + assert result == exp + + @pytest.mark.parametrize( + "value, format, dt", + [ + ["01/10/2010 15:20", "%m/%d/%Y %H:%M", Timestamp("2010-01-10 15:20")], + ["01/10/2010 05:43", "%m/%d/%Y %I:%M", Timestamp("2010-01-10 05:43")], + [ + "01/10/2010 13:56:01", + "%m/%d/%Y %H:%M:%S", + Timestamp("2010-01-10 13:56:01"), + ], + pytest.param( + "01/10/2010 08:14 PM", + "%m/%d/%Y %I:%M %p", + Timestamp("2010-01-10 20:14"), + marks=pytest.mark.xfail( + locale.getlocale()[0] in ("zh_CN", "it_IT"), + reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8", + ), + ), + pytest.param( + "01/10/2010 07:40 AM", + "%m/%d/%Y %I:%M %p", + Timestamp("2010-01-10 07:40"), + marks=pytest.mark.xfail( + locale.getlocale()[0] in ("zh_CN", "it_IT"), + reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8", + ), + ), + pytest.param( + "01/10/2010 09:12:56 AM", + "%m/%d/%Y %I:%M:%S %p", + Timestamp("2010-01-10 09:12:56"), + marks=pytest.mark.xfail( + locale.getlocale()[0] in ("zh_CN", "it_IT"), + reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8", + ), + ), + ], + ) + def test_to_datetime_format_time(self, cache, value, format, dt): + assert to_datetime(value, format=format, cache=cache) == dt + + @td.skip_if_has_locale + def test_to_datetime_with_non_exact(self, cache): + # GH 10834 + # 8904 + # exact kw + ser = Series( + ["19MAY11", "foobar19MAY11", "19MAY11:00:00:00", "19MAY11 00:00:00Z"] + ) + result = to_datetime(ser, format="%d%b%y", exact=False, cache=cache) + expected = to_datetime( + ser.str.extract(r"(\d+\w+\d+)", expand=False), format="%d%b%y", cache=cache + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "arg", + [ + "2012-01-01 09:00:00.000000001", + "2012-01-01 09:00:00.000001", + "2012-01-01 09:00:00.001", + "2012-01-01 09:00:00.001000", + "2012-01-01 09:00:00.001000000", + ], + ) + def test_parse_nanoseconds_with_formula(self, cache, arg): + + # GH8989 + # truncating the nanoseconds when a format was provided + expected = to_datetime(arg, cache=cache) + result = to_datetime(arg, format="%Y-%m-%d %H:%M:%S.%f", cache=cache) + assert result == expected + + @pytest.mark.parametrize( + "value,fmt,expected", + [ + ["2009324", "%Y%W%w", Timestamp("2009-08-13")], + ["2013020", "%Y%U%w", Timestamp("2013-01-13")], + ], + ) + def test_to_datetime_format_weeks(self, value, fmt, expected, cache): + assert to_datetime(value, format=fmt, cache=cache) == expected + + @pytest.mark.parametrize( + "fmt,dates,expected_dates", + [ + [ + "%Y-%m-%d %H:%M:%S %Z", + ["2010-01-01 12:00:00 UTC"] * 2, + [Timestamp("2010-01-01 12:00:00", tz="UTC")] * 2, + ], + [ + "%Y-%m-%d %H:%M:%S %Z", + [ + "2010-01-01 12:00:00 UTC", + "2010-01-01 12:00:00 GMT", + "2010-01-01 12:00:00 US/Pacific", + ], + [ + Timestamp("2010-01-01 12:00:00", tz="UTC"), + Timestamp("2010-01-01 12:00:00", tz="GMT"), + Timestamp("2010-01-01 12:00:00", tz="US/Pacific"), + ], + ], + [ + "%Y-%m-%d %H:%M:%S%z", + ["2010-01-01 12:00:00+0100"] * 2, + [Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60))] * 2, + ], + [ + "%Y-%m-%d %H:%M:%S %z", + ["2010-01-01 12:00:00 +0100"] * 2, + [Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60))] * 2, + ], + [ + "%Y-%m-%d %H:%M:%S %z", + ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"], + [ + Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60)), + Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(-60)), + ], + ], + [ + "%Y-%m-%d %H:%M:%S %z", + ["2010-01-01 12:00:00 Z", "2010-01-01 12:00:00 Z"], + [ + Timestamp( + "2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(0) + ), # pytz coerces to UTC + Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(0)), + ], + ], + ], + ) + def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates): + # GH 13486 + result = to_datetime(dates, format=fmt) + expected = Index(expected_dates) + tm.assert_equal(result, expected) + + def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self): + # GH 32792 + dates = [ + "2010-01-01 12:00:00 +0100", + "2010-01-01 12:00:00 -0100", + "2010-01-01 12:00:00 +0300", + "2010-01-01 12:00:00 +0400", + ] + expected_dates = [ + "2010-01-01 11:00:00+00:00", + "2010-01-01 13:00:00+00:00", + "2010-01-01 09:00:00+00:00", + "2010-01-01 08:00:00+00:00", + ] + fmt = "%Y-%m-%d %H:%M:%S %z" + + result = to_datetime(dates, format=fmt, utc=True) + expected = DatetimeIndex(expected_dates) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "offset", ["+0", "-1foo", "UTCbar", ":10", "+01:000:01", ""] + ) + def test_to_datetime_parse_timezone_malformed(self, offset): + fmt = "%Y-%m-%d %H:%M:%S %z" + date = "2010-01-01 12:00:00 " + offset + + msg = "does not match format|unconverted data remains" + with pytest.raises(ValueError, match=msg): + to_datetime([date], format=fmt) + + def test_to_datetime_parse_timezone_keeps_name(self): + # GH 21697 + fmt = "%Y-%m-%d %H:%M:%S %z" + arg = Index(["2010-01-01 12:00:00 Z"], name="foo") + result = to_datetime(arg, format=fmt) + expected = DatetimeIndex(["2010-01-01 12:00:00"], tz="UTC", name="foo") + tm.assert_index_equal(result, expected) + + +class TestToDatetime: + @pytest.mark.parametrize( + "s, _format, dt", + [ + ["2015-1-1", "%G-%V-%u", datetime(2014, 12, 29, 0, 0)], + ["2015-1-4", "%G-%V-%u", datetime(2015, 1, 1, 0, 0)], + ["2015-1-7", "%G-%V-%u", datetime(2015, 1, 4, 0, 0)], + ], + ) + def test_to_datetime_iso_week_year_format(self, s, _format, dt): + # See GH#16607 + assert to_datetime(s, format=_format) == dt + + @pytest.mark.parametrize( + "msg, s, _format", + [ + [ + "ISO week directive '%V' must be used with the ISO year directive " + "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", + "1999 50", + "%Y %V", + ], + [ + "ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", + "1999 51", + "%G %V", + ], + [ + "ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", + "1999 Monday", + "%G %A", + ], + [ + "ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", + "1999 Mon", + "%G %a", + ], + [ + "ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", + "1999 6", + "%G %w", + ], + [ + "ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", + "1999 6", + "%G %u", + ], + [ + "ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", + "2051", + "%G", + ], + [ + "Day of the year directive '%j' is not compatible with ISO year " + "directive '%G'. Use '%Y' instead.", + "1999 51 6 256", + "%G %V %u %j", + ], + [ + "ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", + "1999 51 Sunday", + "%Y %V %A", + ], + [ + "ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", + "1999 51 Sun", + "%Y %V %a", + ], + [ + "ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", + "1999 51 1", + "%Y %V %w", + ], + [ + "ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", + "1999 51 1", + "%Y %V %u", + ], + [ + "ISO week directive '%V' must be used with the ISO year directive " + "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", + "20", + "%V", + ], + ], + ) + def test_error_iso_week_year(self, msg, s, _format): + # See GH#16607 + # This test checks for errors thrown when giving the wrong format + # However, as discussed on PR#25541, overriding the locale + # causes a different error to be thrown due to the format being + # locale specific, but the test data is in english. + # Therefore, the tests only run when locale is not overwritten, + # as a sort of solution to this problem. + if locale.getlocale() != ("zh_CN", "UTF-8") and locale.getlocale() != ( + "it_IT", + "UTF-8", + ): + with pytest.raises(ValueError, match=msg): + to_datetime(s, format=_format) + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + def test_to_datetime_dtarr(self, tz): + # DatetimeArray + dti = date_range("1965-04-03", periods=19, freq="2W", tz=tz) + arr = DatetimeArray(dti) + + result = to_datetime(arr) + assert result is arr + + def test_to_datetime_pydatetime(self): + actual = to_datetime(datetime(2008, 1, 15)) + assert actual == datetime(2008, 1, 15) + + def test_to_datetime_YYYYMMDD(self): + actual = to_datetime("20080115") + assert actual == datetime(2008, 1, 15) + + def test_to_datetime_unparseable_ignore(self): + # unparsable + ser = "Month 1, 1999" + assert to_datetime(ser, errors="ignore") == ser + + @td.skip_if_windows # `tm.set_timezone` does not work in windows + def test_to_datetime_now(self): + # See GH#18666 + with tm.set_timezone("US/Eastern"): + msg = "The parsing of 'now' in pd.to_datetime" + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + # checking stacklevel is tricky because we go through cython code + # GH#18705 + npnow = np.datetime64("now").astype("datetime64[ns]") + pdnow = to_datetime("now") + pdnow2 = to_datetime(["now"])[0] + + # These should all be equal with infinite perf; this gives + # a generous margin of 10 seconds + assert abs(pdnow.value - npnow.astype(np.int64)) < 1e10 + assert abs(pdnow2.value - npnow.astype(np.int64)) < 1e10 + + assert pdnow.tzinfo is None + assert pdnow2.tzinfo is None + + @td.skip_if_windows # `tm.set_timezone` does not work in windows + @pytest.mark.parametrize("tz", ["Pacific/Auckland", "US/Samoa"]) + def test_to_datetime_today(self, tz): + # See GH#18666 + # Test with one timezone far ahead of UTC and another far behind, so + # one of these will _almost_ always be in a different day from UTC. + # Unfortunately this test between 12 and 1 AM Samoa time + # this both of these timezones _and_ UTC will all be in the same day, + # so this test will not detect the regression introduced in #18666. + with tm.set_timezone(tz): + nptoday = np.datetime64("today").astype("datetime64[ns]").astype(np.int64) + pdtoday = to_datetime("today") + pdtoday2 = to_datetime(["today"])[0] + + tstoday = Timestamp("today") + tstoday2 = Timestamp.today() + + # These should all be equal with infinite perf; this gives + # a generous margin of 10 seconds + assert abs(pdtoday.normalize().value - nptoday) < 1e10 + assert abs(pdtoday2.normalize().value - nptoday) < 1e10 + assert abs(pdtoday.value - tstoday.value) < 1e10 + assert abs(pdtoday.value - tstoday2.value) < 1e10 + + assert pdtoday.tzinfo is None + assert pdtoday2.tzinfo is None + + @pytest.mark.parametrize("arg", ["now", "today"]) + def test_to_datetime_today_now_unicode_bytes(self, arg): + warn = FutureWarning if arg == "now" else None + msg = "The parsing of 'now' in pd.to_datetime" + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): + # checking stacklevel is tricky because we go through cython code + # GH#18705 + to_datetime([arg]) + + @pytest.mark.parametrize( + "dt", [np.datetime64("2000-01-01"), np.datetime64("2000-01-02")] + ) + def test_to_datetime_dt64s(self, cache, dt): + assert to_datetime(dt, cache=cache) == Timestamp(dt) + + @pytest.mark.parametrize( + "dt", [np.datetime64("1000-01-01"), np.datetime64("5000-01-02")] + ) + def test_to_datetime_dt64s_out_of_bounds(self, cache, dt): + msg = f"Out of bounds nanosecond timestamp: {dt}" + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime(dt, errors="raise") + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp(dt) + assert to_datetime(dt, errors="coerce", cache=cache) is NaT + + @pytest.mark.parametrize("unit", ["s", "D"]) + def test_to_datetime_array_of_dt64s(self, cache, unit): + # https://github.com/pandas-dev/pandas/issues/31491 + # Need at least 50 to ensure cache is used. + dts = [ + np.datetime64("2000-01-01", unit), + np.datetime64("2000-01-02", unit), + ] * 30 + # Assuming all datetimes are in bounds, to_datetime() returns + # an array that is equal to Timestamp() parsing + tm.assert_index_equal( + to_datetime(dts, cache=cache), + DatetimeIndex([Timestamp(x).asm8 for x in dts]), + ) + + # A list of datetimes where the last one is out of bounds + dts_with_oob = dts + [np.datetime64("9999-01-01")] + + msg = "Out of bounds nanosecond timestamp: 9999-01-01 00:00:00" + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime(dts_with_oob, errors="raise") + + tm.assert_index_equal( + to_datetime(dts_with_oob, errors="coerce", cache=cache), + DatetimeIndex( + [Timestamp(dts_with_oob[0]).asm8, Timestamp(dts_with_oob[1]).asm8] * 30 + + [NaT], + ), + ) + + # With errors='ignore', out of bounds datetime64s + # are converted to their .item(), which depending on the version of + # numpy is either a python datetime.datetime or datetime.date + tm.assert_index_equal( + to_datetime(dts_with_oob, errors="ignore", cache=cache), + Index([dt.item() for dt in dts_with_oob]), + ) + + def test_to_datetime_tz(self, cache): + + # xref 8260 + # uniform returns a DatetimeIndex + arr = [ + Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"), + Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific"), + ] + result = to_datetime(arr, cache=cache) + expected = DatetimeIndex( + ["2013-01-01 13:00:00", "2013-01-02 14:00:00"], tz="US/Pacific" + ) + tm.assert_index_equal(result, expected) + + def test_to_datetime_tz_mixed_raises(self, cache): + # mixed tzs will raise + arr = [ + Timestamp("2013-01-01 13:00:00", tz="US/Pacific"), + Timestamp("2013-01-02 14:00:00", tz="US/Eastern"), + ] + msg = ( + "Tz-aware datetime.datetime cannot be " + "converted to datetime64 unless utc=True" + ) + with pytest.raises(ValueError, match=msg): + to_datetime(arr, cache=cache) + + def test_to_datetime_different_offsets(self, cache): + # inspired by asv timeseries.ToDatetimeNONISO8601 benchmark + # see GH-26097 for more + ts_string_1 = "March 1, 2018 12:00:00+0400" + ts_string_2 = "March 1, 2018 12:00:00+0500" + arr = [ts_string_1] * 5 + [ts_string_2] * 5 + expected = Index([parse(x) for x in arr]) + result = to_datetime(arr, cache=cache) + tm.assert_index_equal(result, expected) + + def test_to_datetime_tz_pytz(self, cache): + # see gh-8260 + us_eastern = pytz.timezone("US/Eastern") + arr = np.array( + [ + us_eastern.localize( + datetime(year=2000, month=1, day=1, hour=3, minute=0) + ), + us_eastern.localize( + datetime(year=2000, month=6, day=1, hour=3, minute=0) + ), + ], + dtype=object, + ) + result = to_datetime(arr, utc=True, cache=cache) + expected = DatetimeIndex( + ["2000-01-01 08:00:00+00:00", "2000-06-01 07:00:00+00:00"], + dtype="datetime64[ns, UTC]", + freq=None, + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "init_constructor, end_constructor", + [ + (Index, DatetimeIndex), + (list, DatetimeIndex), + (np.array, DatetimeIndex), + (Series, Series), + ], + ) + def test_to_datetime_utc_true(self, cache, init_constructor, end_constructor): + # See gh-11934 & gh-6415 + data = ["20100102 121314", "20100102 121315"] + expected_data = [ + Timestamp("2010-01-02 12:13:14", tz="utc"), + Timestamp("2010-01-02 12:13:15", tz="utc"), + ] + + result = to_datetime( + init_constructor(data), format="%Y%m%d %H%M%S", utc=True, cache=cache + ) + expected = end_constructor(expected_data) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "scalar, expected", + [ + ["20100102 121314", Timestamp("2010-01-02 12:13:14", tz="utc")], + ["20100102 121315", Timestamp("2010-01-02 12:13:15", tz="utc")], + ], + ) + def test_to_datetime_utc_true_scalar(self, cache, scalar, expected): + # Test scalar case as well + result = to_datetime(scalar, format="%Y%m%d %H%M%S", utc=True, cache=cache) + assert result == expected + + def test_to_datetime_utc_true_with_series_single_value(self, cache): + # GH 15760 UTC=True with Series + ts = 1.5e18 + result = to_datetime(Series([ts]), utc=True, cache=cache) + expected = Series([Timestamp(ts, tz="utc")]) + tm.assert_series_equal(result, expected) + + def test_to_datetime_utc_true_with_series_tzaware_string(self, cache): + ts = "2013-01-01 00:00:00-01:00" + expected_ts = "2013-01-01 01:00:00" + data = Series([ts] * 3) + result = to_datetime(data, utc=True, cache=cache) + expected = Series([Timestamp(expected_ts, tz="utc")] * 3) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "date, dtype", + [ + ("2013-01-01 01:00:00", "datetime64[ns]"), + ("2013-01-01 01:00:00", "datetime64[ns, UTC]"), + ], + ) + def test_to_datetime_utc_true_with_series_datetime_ns(self, cache, date, dtype): + expected = Series([Timestamp("2013-01-01 01:00:00", tz="UTC")]) + result = to_datetime(Series([date], dtype=dtype), utc=True, cache=cache) + tm.assert_series_equal(result, expected) + + @td.skip_if_no("psycopg2") + def test_to_datetime_tz_psycopg2(self, request, cache): + + # xref 8260 + import psycopg2 + + # https://www.psycopg.org/docs/news.html#what-s-new-in-psycopg-2-9 + request.node.add_marker( + pytest.mark.xfail( + Version(psycopg2.__version__.split()[0]) > Version("2.8.7"), + raises=AttributeError, + reason="psycopg2.tz is deprecated (and appears dropped) in 2.9", + ) + ) + + # misc cases + tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None) + tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None) + arr = np.array( + [ + datetime(2000, 1, 1, 3, 0, tzinfo=tz1), + datetime(2000, 6, 1, 3, 0, tzinfo=tz2), + ], + dtype=object, + ) + + result = to_datetime(arr, errors="coerce", utc=True, cache=cache) + expected = DatetimeIndex( + ["2000-01-01 08:00:00+00:00", "2000-06-01 07:00:00+00:00"], + dtype="datetime64[ns, UTC]", + freq=None, + ) + tm.assert_index_equal(result, expected) + + # dtype coercion + i = DatetimeIndex( + ["2000-01-01 08:00:00"], + tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None), + ) + assert is_datetime64_ns_dtype(i) + + # tz coercion + result = to_datetime(i, errors="coerce", cache=cache) + tm.assert_index_equal(result, i) + + result = to_datetime(i, errors="coerce", utc=True, cache=cache) + expected = DatetimeIndex(["2000-01-01 13:00:00"], dtype="datetime64[ns, UTC]") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("arg", [True, False]) + def test_datetime_bool(self, cache, arg): + # GH13176 + msg = r"dtype bool cannot be converted to datetime64\[ns\]" + with pytest.raises(TypeError, match=msg): + to_datetime(arg) + assert to_datetime(arg, errors="coerce", cache=cache) is NaT + assert to_datetime(arg, errors="ignore", cache=cache) is arg + + def test_datetime_bool_arrays_mixed(self, cache): + msg = f"{type(cache)} is not convertible to datetime" + with pytest.raises(TypeError, match=msg): + to_datetime([False, datetime.today()], cache=cache) + with pytest.raises(TypeError, match=msg): + to_datetime(["20130101", True], cache=cache) + tm.assert_index_equal( + to_datetime([0, False, NaT, 0.0], errors="coerce", cache=cache), + DatetimeIndex( + [to_datetime(0, cache=cache), NaT, NaT, to_datetime(0, cache=cache)] + ), + ) + + @pytest.mark.parametrize("arg", [bool, to_datetime]) + def test_datetime_invalid_datatype(self, arg): + # GH13176 + msg = "is not convertible to datetime" + with pytest.raises(TypeError, match=msg): + to_datetime(arg) + + @pytest.mark.parametrize("value", ["a", "00:01:99"]) + @pytest.mark.parametrize("infer", [True, False]) + @pytest.mark.parametrize("format", [None, "H%:M%:S%"]) + def test_datetime_invalid_scalar(self, value, format, infer): + # GH24763 + res = to_datetime( + value, errors="ignore", format=format, infer_datetime_format=infer + ) + assert res == value + + res = to_datetime( + value, errors="coerce", format=format, infer_datetime_format=infer + ) + assert res is NaT + + msg = ( + "is a bad directive in format|" + "second must be in 0..59|" + "Given date string not likely a datetime" + ) + with pytest.raises(ValueError, match=msg): + to_datetime( + value, errors="raise", format=format, infer_datetime_format=infer + ) + + @pytest.mark.parametrize("value", ["3000/12/11 00:00:00"]) + @pytest.mark.parametrize("infer", [True, False]) + @pytest.mark.parametrize("format", [None, "H%:M%:S%"]) + def test_datetime_outofbounds_scalar(self, value, format, infer): + # GH24763 + res = to_datetime( + value, errors="ignore", format=format, infer_datetime_format=infer + ) + assert res == value + + res = to_datetime( + value, errors="coerce", format=format, infer_datetime_format=infer + ) + assert res is NaT + + if format is not None: + msg = "is a bad directive in format|Out of bounds nanosecond timestamp" + with pytest.raises(ValueError, match=msg): + to_datetime( + value, errors="raise", format=format, infer_datetime_format=infer + ) + else: + msg = "Out of bounds nanosecond timestamp" + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime( + value, errors="raise", format=format, infer_datetime_format=infer + ) + + @pytest.mark.parametrize("values", [["a"], ["00:01:99"], ["a", "b", "99:00:00"]]) + @pytest.mark.parametrize("infer", [True, False]) + @pytest.mark.parametrize("format", [None, "H%:M%:S%"]) + def test_datetime_invalid_index(self, values, format, infer): + # GH24763 + res = to_datetime( + values, errors="ignore", format=format, infer_datetime_format=infer + ) + tm.assert_index_equal(res, Index(values)) + + res = to_datetime( + values, errors="coerce", format=format, infer_datetime_format=infer + ) + tm.assert_index_equal(res, DatetimeIndex([NaT] * len(values))) + + msg = ( + "is a bad directive in format|" + "Given date string not likely a datetime|" + "second must be in 0..59" + ) + with pytest.raises(ValueError, match=msg): + to_datetime( + values, errors="raise", format=format, infer_datetime_format=infer + ) + + @pytest.mark.parametrize("utc", [True, None]) + @pytest.mark.parametrize("format", ["%Y%m%d %H:%M:%S", None]) + @pytest.mark.parametrize("constructor", [list, tuple, np.array, Index, deque]) + def test_to_datetime_cache(self, utc, format, constructor): + date = "20130101 00:00:00" + test_dates = [date] * 10 ** 5 + data = constructor(test_dates) + + result = to_datetime(data, utc=utc, format=format, cache=True) + expected = to_datetime(data, utc=utc, format=format, cache=False) + + tm.assert_index_equal(result, expected) + + def test_to_datetime_from_deque(self): + # GH 29403 + result = to_datetime(deque([Timestamp("2010-06-02 09:30:00")] * 51)) + expected = to_datetime([Timestamp("2010-06-02 09:30:00")] * 51) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("utc", [True, None]) + @pytest.mark.parametrize("format", ["%Y%m%d %H:%M:%S", None]) + def test_to_datetime_cache_series(self, utc, format): + date = "20130101 00:00:00" + test_dates = [date] * 10 ** 5 + data = Series(test_dates) + result = to_datetime(data, utc=utc, format=format, cache=True) + expected = to_datetime(data, utc=utc, format=format, cache=False) + tm.assert_series_equal(result, expected) + + def test_to_datetime_cache_scalar(self): + date = "20130101 00:00:00" + result = to_datetime(date, cache=True) + expected = Timestamp("20130101 00:00:00") + assert result == expected + + @pytest.mark.parametrize( + "datetimelikes,expected_values", + ( + ( + (None, np.nan) + (NaT,) * start_caching_at, + (NaT,) * (start_caching_at + 2), + ), + ( + (None, Timestamp("2012-07-26")) + (NaT,) * start_caching_at, + (NaT, Timestamp("2012-07-26")) + (NaT,) * start_caching_at, + ), + ( + (None,) + + (NaT,) * start_caching_at + + ("2012 July 26", Timestamp("2012-07-26")), + (NaT,) * (start_caching_at + 1) + + (Timestamp("2012-07-26"), Timestamp("2012-07-26")), + ), + ), + ) + def test_convert_object_to_datetime_with_cache( + self, datetimelikes, expected_values + ): + # GH#39882 + ser = Series( + datetimelikes, + dtype="object", + ) + result_series = to_datetime(ser, errors="coerce") + expected_series = Series( + expected_values, + dtype="datetime64[ns]", + ) + tm.assert_series_equal(result_series, expected_series) + + @pytest.mark.parametrize( + "date, format", + [ + ("2017-20", "%Y-%W"), + ("20 Sunday", "%W %A"), + ("20 Sun", "%W %a"), + ("2017-21", "%Y-%U"), + ("20 Sunday", "%U %A"), + ("20 Sun", "%U %a"), + ], + ) + def test_week_without_day_and_calendar_year(self, date, format): + # GH16774 + + msg = "Cannot use '%W' or '%U' without day and year" + with pytest.raises(ValueError, match=msg): + to_datetime(date, format=format) + + def test_to_datetime_coerce(self): + # GH 26122 + ts_strings = [ + "March 1, 2018 12:00:00+0400", + "March 1, 2018 12:00:00+0500", + "20100240", + ] + result = to_datetime(ts_strings, errors="coerce") + expected = Index( + [ + datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 14400)), + datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 18000)), + NaT, + ] + ) + tm.assert_index_equal(result, expected) + + def test_to_datetime_coerce_malformed(self): + # GH 28299 + ts_strings = ["200622-12-31", "111111-24-11"] + result = to_datetime(ts_strings, errors="coerce") + expected = Index([NaT, NaT]) + tm.assert_index_equal(result, expected) + + def test_iso_8601_strings_with_same_offset(self): + # GH 17697, 11736 + ts_str = "2015-11-18 15:30:00+05:30" + result = to_datetime(ts_str) + expected = Timestamp(ts_str) + assert result == expected + + expected = DatetimeIndex([Timestamp(ts_str)] * 2) + result = to_datetime([ts_str] * 2) + tm.assert_index_equal(result, expected) + + result = DatetimeIndex([ts_str] * 2) + tm.assert_index_equal(result, expected) + + def test_iso_8601_strings_with_different_offsets(self): + # GH 17697, 11736 + ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT] + result = to_datetime(ts_strings) + expected = np.array( + [ + datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), + datetime(2015, 11, 18, 16, 30, tzinfo=tzoffset(None, 23400)), + NaT, + ], + dtype=object, + ) + # GH 21864 + expected = Index(expected) + tm.assert_index_equal(result, expected) + + def test_iso_8601_strings_with_different_offsets_utc(self): + ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT] + result = to_datetime(ts_strings, utc=True) + expected = DatetimeIndex( + [Timestamp(2015, 11, 18, 10), Timestamp(2015, 11, 18, 10), NaT], tz="UTC" + ) + tm.assert_index_equal(result, expected) + + def test_iso8601_strings_mixed_offsets_with_naive(self): + # GH 24992 + result = to_datetime( + [ + "2018-11-28T00:00:00", + "2018-11-28T00:00:00+12:00", + "2018-11-28T00:00:00", + "2018-11-28T00:00:00+06:00", + "2018-11-28T00:00:00", + ], + utc=True, + ) + expected = to_datetime( + [ + "2018-11-28T00:00:00", + "2018-11-27T12:00:00", + "2018-11-28T00:00:00", + "2018-11-27T18:00:00", + "2018-11-28T00:00:00", + ], + utc=True, + ) + tm.assert_index_equal(result, expected) + + def test_iso8601_strings_mixed_offsets_with_naive_reversed(self): + items = ["2018-11-28T00:00:00+12:00", "2018-11-28T00:00:00"] + result = to_datetime(items, utc=True) + expected = to_datetime(list(reversed(items)), utc=True)[::-1] + tm.assert_index_equal(result, expected) + + def test_mixed_offsets_with_native_datetime_raises(self): + # GH 25978 + + vals = [ + "nan", + Timestamp("1990-01-01"), + "2015-03-14T16:15:14.123-08:00", + "2019-03-04T21:56:32.620-07:00", + None, + ] + ser = Series(vals) + assert all(ser[i] is vals[i] for i in range(len(vals))) # GH#40111 + + mixed = to_datetime(ser) + expected = Series( + [ + "NaT", + Timestamp("1990-01-01"), + Timestamp("2015-03-14T16:15:14.123-08:00").to_pydatetime(), + Timestamp("2019-03-04T21:56:32.620-07:00").to_pydatetime(), + None, + ], + dtype=object, + ) + tm.assert_series_equal(mixed, expected) + + with pytest.raises(ValueError, match="Tz-aware datetime.datetime"): + to_datetime(mixed) + + def test_non_iso_strings_with_tz_offset(self): + result = to_datetime(["March 1, 2018 12:00:00+0400"] * 2) + expected = DatetimeIndex( + [datetime(2018, 3, 1, 12, tzinfo=pytz.FixedOffset(240))] * 2 + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "ts, expected", + [ + (Timestamp("2018-01-01"), Timestamp("2018-01-01", tz="UTC")), + ( + Timestamp("2018-01-01", tz="US/Pacific"), + Timestamp("2018-01-01 08:00", tz="UTC"), + ), + ], + ) + def test_timestamp_utc_true(self, ts, expected): + # GH 24415 + result = to_datetime(ts, utc=True) + assert result == expected + + @pytest.mark.parametrize("dt_str", ["00010101", "13000101", "30000101", "99990101"]) + def test_to_datetime_with_format_out_of_bounds(self, dt_str): + # GH 9107 + msg = "Out of bounds nanosecond timestamp" + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime(dt_str, format="%Y%m%d") + + def test_to_datetime_utc(self): + arr = np.array([parse("2012-06-13T01:39:00Z")], dtype=object) + + result = to_datetime(arr, utc=True) + assert result.tz is pytz.utc + + def test_to_datetime_fixed_offset(self): + from pandas.tests.indexes.datetimes.test_timezones import fixed_off + + dates = [ + datetime(2000, 1, 1, tzinfo=fixed_off), + datetime(2000, 1, 2, tzinfo=fixed_off), + datetime(2000, 1, 3, tzinfo=fixed_off), + ] + result = to_datetime(dates) + assert result.tz == fixed_off + + +class TestToDatetimeUnit: + def test_unit(self, cache): + # GH 11758 + # test proper behavior with errors + msg = "cannot specify both format and unit" + with pytest.raises(ValueError, match=msg): + to_datetime([1], unit="D", format="%Y%m%d", cache=cache) + + def test_unit_array_mixed_nans(self, cache): + values = [11111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""] + result = to_datetime(values, unit="D", errors="ignore", cache=cache) + expected = Index( + [ + 11111111, + Timestamp("1970-01-02"), + Timestamp("1970-01-02"), + NaT, + NaT, + NaT, + NaT, + NaT, + ], + dtype=object, + ) + tm.assert_index_equal(result, expected) + + result = to_datetime(values, unit="D", errors="coerce", cache=cache) + expected = DatetimeIndex( + ["NaT", "1970-01-02", "1970-01-02", "NaT", "NaT", "NaT", "NaT", "NaT"] + ) + tm.assert_index_equal(result, expected) + + msg = "cannot convert input 11111111 with the unit 'D'" + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime(values, unit="D", errors="raise", cache=cache) + + def test_unit_array_mixed_nans_large_int(self, cache): + values = [1420043460000, iNaT, NaT, np.nan, "NaT"] + + result = to_datetime(values, errors="ignore", unit="s", cache=cache) + expected = Index([1420043460000, NaT, NaT, NaT, NaT], dtype=object) + tm.assert_index_equal(result, expected) + + result = to_datetime(values, errors="coerce", unit="s", cache=cache) + expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"]) + tm.assert_index_equal(result, expected) + + msg = "cannot convert input 1420043460000 with the unit 's'" + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime(values, errors="raise", unit="s", cache=cache) + + def test_to_datetime_invalid_str_not_out_of_bounds_valuerror(self, cache): + # if we have a string, then we raise a ValueError + # and NOT an OutOfBoundsDatetime + msg = "non convertible value foo with the unit 's'" + with pytest.raises(ValueError, match=msg): + to_datetime("foo", errors="raise", unit="s", cache=cache) + + @pytest.mark.parametrize("error", ["raise", "coerce", "ignore"]) + def test_unit_consistency(self, cache, error): + # consistency of conversions + expected = Timestamp("1970-05-09 14:25:11") + result = to_datetime(11111111, unit="s", errors=error, cache=cache) + assert result == expected + assert isinstance(result, Timestamp) + + @pytest.mark.parametrize("errors", ["ignore", "raise", "coerce"]) + @pytest.mark.parametrize("dtype", ["float64", "int64"]) + def test_unit_with_numeric(self, cache, errors, dtype): + # GH 13180 + # coercions from floats/ints are ok + expected = DatetimeIndex(["2015-06-19 05:33:20", "2015-05-27 22:33:20"]) + arr = np.array([1.434692e18, 1.432766e18]).astype(dtype) + result = to_datetime(arr, errors=errors, cache=cache) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "exp, arr", + [ + [ + ["NaT", "2015-06-19 05:33:20", "2015-05-27 22:33:20"], + ["foo", 1.434692e18, 1.432766e18], + ], + [ + ["2015-06-19 05:33:20", "2015-05-27 22:33:20", "NaT", "NaT"], + [1.434692e18, 1.432766e18, "foo", "NaT"], + ], + ], + ) + def test_unit_with_numeric_coerce(self, cache, exp, arr): + # but we want to make sure that we are coercing + # if we have ints/strings + expected = DatetimeIndex(exp) + result = to_datetime(arr, errors="coerce", cache=cache) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "exp, arr", + [ + [ + ["2013-01-01", "NaT", "NaT"], + [Timestamp("20130101"), 1.434692e18, 1.432766e18], + ], + [ + ["NaT", "NaT", "2013-01-01"], + [1.434692e18, 1.432766e18, Timestamp("20130101")], + ], + ], + ) + def test_unit_mixed(self, cache, exp, arr): + + # mixed integers/datetimes + expected = DatetimeIndex(exp) + result = to_datetime(arr, errors="coerce", cache=cache) + tm.assert_index_equal(result, expected) + + msg = "mixed datetimes and integers in passed array" + with pytest.raises(ValueError, match=msg): + to_datetime(arr, errors="raise", cache=cache) + + def test_unit_rounding(self, cache): + # GH 14156 & GH 20445: argument will incur floating point errors + # but no premature rounding + result = to_datetime(1434743731.8770001, unit="s", cache=cache) + expected = Timestamp("2015-06-19 19:55:31.877000192") + assert result == expected + + def test_unit_ignore_keeps_name(self, cache): + # GH 21697 + expected = Index([15e9] * 2, name="name") + result = to_datetime(expected, errors="ignore", unit="s", cache=cache) + tm.assert_index_equal(result, expected) + + def test_to_datetime_errors_ignore_utc_true(self): + # GH#23758 + result = to_datetime([1], unit="s", utc=True, errors="ignore") + expected = DatetimeIndex(["1970-01-01 00:00:01"], tz="UTC") + tm.assert_index_equal(result, expected) + + # TODO: this is moved from tests.series.test_timeseries, may be redundant + @pytest.mark.parametrize("dtype", [int, float]) + def test_to_datetime_unit(self, dtype): + epoch = 1370745748 + ser = Series([epoch + t for t in range(20)]).astype(dtype) + result = to_datetime(ser, unit="s") + expected = Series( + [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("null", [iNaT, np.nan]) + def test_to_datetime_unit_with_nulls(self, null): + epoch = 1370745748 + ser = Series([epoch + t for t in range(20)] + [null]) + result = to_datetime(ser, unit="s") + expected = Series( + [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] + + [NaT] + ) + tm.assert_series_equal(result, expected) + + def test_to_datetime_unit_fractional_seconds(self): + + # GH13834 + epoch = 1370745748 + ser = Series([epoch + t for t in np.arange(0, 2, 0.25)] + [iNaT]).astype(float) + result = to_datetime(ser, unit="s") + expected = Series( + [ + Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) + for t in np.arange(0, 2, 0.25) + ] + + [NaT] + ) + # GH20455 argument will incur floating point errors but no premature rounding + result = result.round("ms") + tm.assert_series_equal(result, expected) + + def test_to_datetime_unit_na_values(self): + result = to_datetime([1, 2, "NaT", NaT, np.nan], unit="D") + expected = DatetimeIndex( + [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 3 + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("bad_val", ["foo", 111111111]) + def test_to_datetime_unit_invalid(self, bad_val): + msg = f"{bad_val} with the unit 'D'" + with pytest.raises(ValueError, match=msg): + to_datetime([1, 2, bad_val], unit="D") + + @pytest.mark.parametrize("bad_val", ["foo", 111111111]) + def test_to_timestamp_unit_coerce(self, bad_val): + # coerce we can process + expected = DatetimeIndex( + [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 1 + ) + result = to_datetime([1, 2, bad_val], unit="D", errors="coerce") + tm.assert_index_equal(result, expected) + + +class TestToDatetimeDataFrame: + @pytest.fixture + def df(self): + return DataFrame( + { + "year": [2015, 2016], + "month": [2, 3], + "day": [4, 5], + "hour": [6, 7], + "minute": [58, 59], + "second": [10, 11], + "ms": [1, 1], + "us": [2, 2], + "ns": [3, 3], + } + ) + + def test_dataframe(self, df, cache): + + result = to_datetime( + {"year": df["year"], "month": df["month"], "day": df["day"]}, cache=cache + ) + expected = Series( + [Timestamp("20150204 00:00:00"), Timestamp("20160305 00:0:00")] + ) + tm.assert_series_equal(result, expected) + + # dict-like + result = to_datetime(df[["year", "month", "day"]].to_dict(), cache=cache) + tm.assert_series_equal(result, expected) + + def test_dataframe_dict_with_constructable(self, df, cache): + # dict but with constructable + df2 = df[["year", "month", "day"]].to_dict() + df2["month"] = 2 + result = to_datetime(df2, cache=cache) + expected2 = Series( + [Timestamp("20150204 00:00:00"), Timestamp("20160205 00:0:00")] + ) + tm.assert_series_equal(result, expected2) + + @pytest.mark.parametrize( + "unit", + [ + { + "year": "years", + "month": "months", + "day": "days", + "hour": "hours", + "minute": "minutes", + "second": "seconds", + }, + { + "year": "year", + "month": "month", + "day": "day", + "hour": "hour", + "minute": "minute", + "second": "second", + }, + ], + ) + def test_dataframe_field_aliases_column_subset(self, df, cache, unit): + # unit mappings + result = to_datetime(df[list(unit.keys())].rename(columns=unit), cache=cache) + expected = Series( + [Timestamp("20150204 06:58:10"), Timestamp("20160305 07:59:11")] + ) + tm.assert_series_equal(result, expected) + + def test_dataframe_field_aliases(self, df, cache): + d = { + "year": "year", + "month": "month", + "day": "day", + "hour": "hour", + "minute": "minute", + "second": "second", + "ms": "ms", + "us": "us", + "ns": "ns", + } + + result = to_datetime(df.rename(columns=d), cache=cache) + expected = Series( + [ + Timestamp("20150204 06:58:10.001002003"), + Timestamp("20160305 07:59:11.001002003"), + ] + ) + tm.assert_series_equal(result, expected) + + def test_dataframe_str_dtype(self, df, cache): + # coerce back to int + result = to_datetime(df.astype(str), cache=cache) + expected = Series( + [ + Timestamp("20150204 06:58:10.001002003"), + Timestamp("20160305 07:59:11.001002003"), + ] + ) + tm.assert_series_equal(result, expected) + + def test_dataframe_coerce(self, cache): + # passing coerce + df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]}) + + msg = ( + "cannot assemble the datetimes: time data .+ does not " + r"match format '%Y%m%d' \(match\)" + ) + with pytest.raises(ValueError, match=msg): + to_datetime(df2, cache=cache) + + result = to_datetime(df2, errors="coerce", cache=cache) + expected = Series([Timestamp("20150204 00:00:00"), NaT]) + tm.assert_series_equal(result, expected) + + def test_dataframe_extra_keys_raisesm(self, df, cache): + # extra columns + msg = r"extra keys have been passed to the datetime assemblage: \[foo\]" + with pytest.raises(ValueError, match=msg): + df2 = df.copy() + df2["foo"] = 1 + to_datetime(df2, cache=cache) + + @pytest.mark.parametrize( + "cols", + [ + ["year"], + ["year", "month"], + ["year", "month", "second"], + ["month", "day"], + ["year", "day", "second"], + ], + ) + def test_dataframe_missing_keys_raises(self, df, cache, cols): + # not enough + msg = ( + r"to assemble mappings requires at least that \[year, month, " + r"day\] be specified: \[.+\] is missing" + ) + with pytest.raises(ValueError, match=msg): + to_datetime(df[cols], cache=cache) + + def test_dataframe_duplicate_columns_raises(self, cache): + # duplicates + msg = "cannot assemble with duplicate keys" + df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]}) + df2.columns = ["year", "year", "day"] + with pytest.raises(ValueError, match=msg): + to_datetime(df2, cache=cache) + + df2 = DataFrame( + {"year": [2015, 2016], "month": [2, 20], "day": [4, 5], "hour": [4, 5]} + ) + df2.columns = ["year", "month", "day", "day"] + with pytest.raises(ValueError, match=msg): + to_datetime(df2, cache=cache) + + def test_dataframe_int16(self, cache): + # GH#13451 + df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) + + # int16 + result = to_datetime(df.astype("int16"), cache=cache) + expected = Series( + [Timestamp("20150204 00:00:00"), Timestamp("20160305 00:00:00")] + ) + tm.assert_series_equal(result, expected) + + def test_dataframe_mixed(self, cache): + # mixed dtypes + df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) + df["month"] = df["month"].astype("int8") + df["day"] = df["day"].astype("int8") + result = to_datetime(df, cache=cache) + expected = Series( + [Timestamp("20150204 00:00:00"), Timestamp("20160305 00:00:00")] + ) + tm.assert_series_equal(result, expected) + + def test_dataframe_float(self, cache): + # float + df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]}) + msg = "cannot assemble the datetimes: unconverted data remains: 1" + with pytest.raises(ValueError, match=msg): + to_datetime(df, cache=cache) + + def test_dataframe_utc_true(self): + # GH#23760 + df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) + result = to_datetime(df, utc=True) + expected = Series( + np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[ns]") + ).dt.tz_localize("UTC") + tm.assert_series_equal(result, expected) + + +class TestToDatetimeMisc: + def test_to_datetime_barely_out_of_bounds(self): + # GH#19529 + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object) + + msg = "Out of bounds nanosecond timestamp" + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime(arr) + + @pytest.mark.parametrize( + "arg, exp_str", + [ + ["2012-01-01 00:00:00", "2012-01-01 00:00:00"], + ["20121001", "2012-10-01"], # bad iso 8601 + ], + ) + def test_to_datetime_iso8601(self, cache, arg, exp_str): + result = to_datetime([arg], cache=cache) + exp = Timestamp(exp_str) + assert result[0] == exp + + def test_to_datetime_default(self, cache): + rs = to_datetime("2001", cache=cache) + xp = datetime(2001, 1, 1) + assert rs == xp + + @pytest.mark.xfail(reason="fails to enforce dayfirst=True, which would raise") + def test_to_datetime_respects_dayfirst(self, cache): + # dayfirst is essentially broken + + # The msg here is not important since it isn't actually raised yet. + msg = "Invalid date specified" + with pytest.raises(ValueError, match=msg): + # if dayfirst is respected, then this would parse as month=13, which + # would raise + with tm.assert_produces_warning(UserWarning, match="Provide format"): + to_datetime("01-13-2012", dayfirst=True, cache=cache) + + def test_to_datetime_on_datetime64_series(self, cache): + # #2699 + ser = Series(date_range("1/1/2000", periods=10)) + + result = to_datetime(ser, cache=cache) + assert result[0] == ser[0] + + def test_to_datetime_with_space_in_series(self, cache): + # GH 6428 + ser = Series(["10/18/2006", "10/18/2008", " "]) + msg = r"(\(')?String does not contain a date(:', ' '\))?" + with pytest.raises(ValueError, match=msg): + to_datetime(ser, errors="raise", cache=cache) + result_coerce = to_datetime(ser, errors="coerce", cache=cache) + expected_coerce = Series([datetime(2006, 10, 18), datetime(2008, 10, 18), NaT]) + tm.assert_series_equal(result_coerce, expected_coerce) + result_ignore = to_datetime(ser, errors="ignore", cache=cache) + tm.assert_series_equal(result_ignore, ser) + + @td.skip_if_has_locale + def test_to_datetime_with_apply(self, cache): + # this is only locale tested with US/None locales + # GH 5195 + # with a format and coerce a single item to_datetime fails + td = Series(["May 04", "Jun 02", "Dec 11"], index=[1, 2, 3]) + expected = to_datetime(td, format="%b %y", cache=cache) + result = td.apply(to_datetime, format="%b %y", cache=cache) + tm.assert_series_equal(result, expected) + + @td.skip_if_has_locale + def test_to_datetime_with_apply_with_empty_str(self, cache): + # this is only locale tested with US/None locales + # GH 5195 + # with a format and coerce a single item to_datetime fails + td = Series(["May 04", "Jun 02", ""], index=[1, 2, 3]) + msg = r"time data '' does not match format '%b %y' \(match\)" + with pytest.raises(ValueError, match=msg): + to_datetime(td, format="%b %y", errors="raise", cache=cache) + with pytest.raises(ValueError, match=msg): + td.apply(to_datetime, format="%b %y", errors="raise", cache=cache) + expected = to_datetime(td, format="%b %y", errors="coerce", cache=cache) + + result = td.apply( + lambda x: to_datetime(x, format="%b %y", errors="coerce", cache=cache) + ) + tm.assert_series_equal(result, expected) + + def test_to_datetime_empty_stt(self, cache): + # empty string + result = to_datetime("", cache=cache) + assert result is NaT + + def test_to_datetime_empty_str_list(self, cache): + result = to_datetime(["", ""], cache=cache) + assert isna(result).all() + + def test_to_datetime_zero(self, cache): + # ints + result = Timestamp(0) + expected = to_datetime(0, cache=cache) + assert result == expected + + def test_to_datetime_strings(self, cache): + # GH 3888 (strings) + expected = to_datetime(["2012"], cache=cache)[0] + result = to_datetime("2012", cache=cache) + assert result == expected + + def test_to_datetime_strings_variation(self, cache): + array = ["2012", "20120101", "20120101 12:01:01"] + expected = list(to_datetime(array, cache=cache)) + result = [Timestamp(date_str) for date_str in array] + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize("result", [Timestamp("2012"), to_datetime("2012")]) + def test_to_datetime_strings_vs_constructor(self, result): + expected = Timestamp(2012, 1, 1) + assert result == expected + + def test_to_datetime_unprocessable_input(self, cache): + # GH 4928 + # GH 21864 + result = to_datetime([1, "1"], errors="ignore", cache=cache) + + expected = Index(np.array([1, "1"], dtype="O")) + tm.assert_equal(result, expected) + msg = "invalid string coercion to datetime" + with pytest.raises(TypeError, match=msg): + to_datetime([1, "1"], errors="raise", cache=cache) + + def test_to_datetime_unhashable_input(self, cache): + series = Series([["a"]] * 100) + result = to_datetime(series, errors="ignore", cache=cache) + tm.assert_series_equal(series, result) + + def test_to_datetime_other_datetime64_units(self): + # 5/25/2012 + scalar = np.int64(1337904000000000).view("M8[us]") + as_obj = scalar.astype("O") + + index = DatetimeIndex([scalar]) + assert index[0] == scalar.astype("O") + + value = Timestamp(scalar) + assert value == as_obj + + def test_to_datetime_list_of_integers(self): + rng = date_range("1/1/2000", periods=20) + rng = DatetimeIndex(rng.values) + + ints = list(rng.asi8) + + result = DatetimeIndex(ints) + + tm.assert_index_equal(rng, result) + + def test_to_datetime_overflow(self): + # gh-17637 + # we are overflowing Timedelta range here + + msg = "|".join( + [ + "Python int too large to convert to C long", + "long too big to convert", + "int too big to convert", + ] + ) + with pytest.raises(OutOfBoundsTimedelta, match=msg): + date_range(start="1/1/1700", freq="B", periods=100000) + + def test_string_na_nat_conversion(self, cache): + # GH #999, #858 + + strings = np.array( + ["1/1/2000", "1/2/2000", np.nan, "1/4/2000, 12:34:56"], dtype=object + ) + + expected = np.empty(4, dtype="M8[ns]") + for i, val in enumerate(strings): + if isna(val): + expected[i] = iNaT + else: + expected[i] = parse(val) + + result = tslib.array_to_datetime(strings)[0] + tm.assert_almost_equal(result, expected) + + result2 = to_datetime(strings, cache=cache) + assert isinstance(result2, DatetimeIndex) + tm.assert_numpy_array_equal(result, result2.values) + + def test_string_na_nat_conversion_malformed(self, cache): + malformed = np.array(["1/100/2000", np.nan], dtype=object) + + # GH 10636, default is now 'raise' + msg = r"Unknown string format:|day is out of range for month" + with pytest.raises(ValueError, match=msg): + to_datetime(malformed, errors="raise", cache=cache) + + result = to_datetime(malformed, errors="ignore", cache=cache) + # GH 21864 + expected = Index(malformed) + tm.assert_index_equal(result, expected) + + with pytest.raises(ValueError, match=msg): + to_datetime(malformed, errors="raise", cache=cache) + + def test_string_na_nat_conversion_with_name(self, cache): + idx = ["a", "b", "c", "d", "e"] + series = Series( + ["1/1/2000", np.nan, "1/3/2000", np.nan, "1/5/2000"], index=idx, name="foo" + ) + dseries = Series( + [ + to_datetime("1/1/2000", cache=cache), + np.nan, + to_datetime("1/3/2000", cache=cache), + np.nan, + to_datetime("1/5/2000", cache=cache), + ], + index=idx, + name="foo", + ) + + result = to_datetime(series, cache=cache) + dresult = to_datetime(dseries, cache=cache) + + expected = Series(np.empty(5, dtype="M8[ns]"), index=idx) + for i in range(5): + x = series[i] + if isna(x): + expected[i] = NaT + else: + expected[i] = to_datetime(x, cache=cache) + + tm.assert_series_equal(result, expected, check_names=False) + assert result.name == "foo" + + tm.assert_series_equal(dresult, expected, check_names=False) + assert dresult.name == "foo" + + @pytest.mark.parametrize( + "dtype", + [ + "datetime64[h]", + "datetime64[m]", + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[ns]", + ], + ) + def test_dti_constructor_numpy_timeunits(self, cache, dtype): + # GH 9114 + base = to_datetime(["2000-01-01T00:00", "2000-01-02T00:00", "NaT"], cache=cache) + + values = base.values.astype(dtype) + + tm.assert_index_equal(DatetimeIndex(values), base) + tm.assert_index_equal(to_datetime(values, cache=cache), base) + + def test_dayfirst(self, cache): + # GH 5917 + arr = ["10/02/2014", "11/02/2014", "12/02/2014"] + expected = DatetimeIndex( + [datetime(2014, 2, 10), datetime(2014, 2, 11), datetime(2014, 2, 12)] + ) + idx1 = DatetimeIndex(arr, dayfirst=True) + idx2 = DatetimeIndex(np.array(arr), dayfirst=True) + idx3 = to_datetime(arr, dayfirst=True, cache=cache) + idx4 = to_datetime(np.array(arr), dayfirst=True, cache=cache) + idx5 = DatetimeIndex(Index(arr), dayfirst=True) + idx6 = DatetimeIndex(Series(arr), dayfirst=True) + tm.assert_index_equal(expected, idx1) + tm.assert_index_equal(expected, idx2) + tm.assert_index_equal(expected, idx3) + tm.assert_index_equal(expected, idx4) + tm.assert_index_equal(expected, idx5) + tm.assert_index_equal(expected, idx6) + + def test_dayfirst_warnings_valid_input(self): + # GH 12585 + warning_msg_day_first = ( + "Parsing '31/12/2014' in DD/MM/YYYY format. Provide " + "format or specify infer_datetime_format=True for consistent parsing." + ) + + # CASE 1: valid input + arr = ["31/12/2014", "10/03/2011"] + expected_consistent = DatetimeIndex( + ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None + ) + expected_inconsistent = DatetimeIndex( + ["2014-12-31", "2011-10-03"], dtype="datetime64[ns]", freq=None + ) + + # A. dayfirst arg correct, no warning + res1 = to_datetime(arr, dayfirst=True) + tm.assert_index_equal(expected_consistent, res1) + + # B. dayfirst arg incorrect, warning + incorrect output + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res2 = to_datetime(arr, dayfirst=False) + tm.assert_index_equal(expected_inconsistent, res2) + + # C. dayfirst default arg, same as B + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res3 = to_datetime(arr, dayfirst=False) + tm.assert_index_equal(expected_inconsistent, res3) + + # D. infer_datetime_format=True overrides dayfirst default + # no warning + correct result + res4 = to_datetime(arr, infer_datetime_format=True) + tm.assert_index_equal(expected_consistent, res4) + + def test_dayfirst_warnings_invalid_input(self): + # CASE 2: invalid input + # cannot consistently process with single format + # warnings *always* raised + warning_msg_day_first = ( + "Parsing '31/12/2014' in DD/MM/YYYY format. Provide " + "format or specify infer_datetime_format=True for consistent parsing." + ) + warning_msg_month_first = ( + "Parsing '03/30/2011' in MM/DD/YYYY format. Provide " + "format or specify infer_datetime_format=True for consistent parsing." + ) + + arr = ["31/12/2014", "03/30/2011"] + # first in DD/MM/YYYY, second in MM/DD/YYYY + expected = DatetimeIndex( + ["2014-12-31", "2011-03-30"], dtype="datetime64[ns]", freq=None + ) + + # A. use dayfirst=True + with tm.assert_produces_warning(UserWarning, match=warning_msg_month_first): + res5 = to_datetime(arr, dayfirst=True) + tm.assert_index_equal(expected, res5) + + # B. use dayfirst=False + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res6 = to_datetime(arr, dayfirst=False) + tm.assert_index_equal(expected, res6) + + # C. use dayfirst default arg, same as B + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res7 = to_datetime(arr, dayfirst=False) + tm.assert_index_equal(expected, res7) + + # D. use infer_datetime_format=True + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res8 = to_datetime(arr, infer_datetime_format=True) + tm.assert_index_equal(expected, res8) + + @pytest.mark.parametrize("klass", [DatetimeIndex, DatetimeArray]) + def test_to_datetime_dta_tz(self, klass): + # GH#27733 + dti = date_range("2015-04-05", periods=3).rename("foo") + expected = dti.tz_localize("UTC") + + obj = klass(dti) + expected = klass(expected) + + result = to_datetime(obj, utc=True) + tm.assert_equal(result, expected) + + +class TestGuessDatetimeFormat: + @td.skip_if_not_us_locale + @pytest.mark.parametrize( + "test_array", + [ + [ + "2011-12-30 00:00:00.000000", + "2011-12-30 00:00:00.000000", + "2011-12-30 00:00:00.000000", + ], + [np.nan, np.nan, "2011-12-30 00:00:00.000000"], + ["2011-12-30 00:00:00.000000", "random_string"], + ], + ) + def test_guess_datetime_format_for_array(self, test_array): + expected_format = "%Y-%m-%d %H:%M:%S.%f" + assert tools._guess_datetime_format_for_array(test_array) == expected_format + + @td.skip_if_not_us_locale + def test_guess_datetime_format_for_array_all_nans(self): + format_for_string_of_nans = tools._guess_datetime_format_for_array( + np.array([np.nan, np.nan, np.nan], dtype="O") + ) + assert format_for_string_of_nans is None + + +class TestToDatetimeInferFormat: + @pytest.mark.parametrize( + "test_format", ["%m-%d-%Y", "%m/%d/%Y %H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%f"] + ) + def test_to_datetime_infer_datetime_format_consistent_format( + self, cache, test_format + ): + ser = Series(date_range("20000101", periods=50, freq="H")) + + s_as_dt_strings = ser.apply(lambda x: x.strftime(test_format)) + + with_format = to_datetime(s_as_dt_strings, format=test_format, cache=cache) + no_infer = to_datetime( + s_as_dt_strings, infer_datetime_format=False, cache=cache + ) + yes_infer = to_datetime( + s_as_dt_strings, infer_datetime_format=True, cache=cache + ) + + # Whether the format is explicitly passed, it is inferred, or + # it is not inferred, the results should all be the same + tm.assert_series_equal(with_format, no_infer) + tm.assert_series_equal(no_infer, yes_infer) + + @pytest.mark.parametrize( + "data", + [ + ["01/01/2011 00:00:00", "01-02-2011 00:00:00", "2011-01-03T00:00:00"], + ["Jan/01/2011", "Feb/01/2011", "Mar/01/2011"], + ], + ) + def test_to_datetime_infer_datetime_format_inconsistent_format(self, cache, data): + ser = Series(np.array(data)) + + # When the format is inconsistent, infer_datetime_format should just + # fallback to the default parsing + tm.assert_series_equal( + to_datetime(ser, infer_datetime_format=False, cache=cache), + to_datetime(ser, infer_datetime_format=True, cache=cache), + ) + + def test_to_datetime_infer_datetime_format_series_with_nans(self, cache): + ser = Series( + np.array( + ["01/01/2011 00:00:00", np.nan, "01/03/2011 00:00:00", np.nan], + dtype=object, + ) + ) + tm.assert_series_equal( + to_datetime(ser, infer_datetime_format=False, cache=cache), + to_datetime(ser, infer_datetime_format=True, cache=cache), + ) + + def test_to_datetime_infer_datetime_format_series_start_with_nans(self, cache): + ser = Series( + np.array( + [ + np.nan, + np.nan, + "01/01/2011 00:00:00", + "01/02/2011 00:00:00", + "01/03/2011 00:00:00", + ], + dtype=object, + ) + ) + + tm.assert_series_equal( + to_datetime(ser, infer_datetime_format=False, cache=cache), + to_datetime(ser, infer_datetime_format=True, cache=cache), + ) + + @pytest.mark.parametrize( + "tz_name, offset", [("UTC", 0), ("UTC-3", 180), ("UTC+3", -180)] + ) + def test_infer_datetime_format_tz_name(self, tz_name, offset): + # GH 33133 + ser = Series([f"2019-02-02 08:07:13 {tz_name}"]) + result = to_datetime(ser, infer_datetime_format=True) + expected = Series( + [Timestamp("2019-02-02 08:07:13").tz_localize(pytz.FixedOffset(offset))] + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "ts,zero_tz", + [ + ("2019-02-02 08:07:13", "Z"), + ("2019-02-02 08:07:13", ""), + ("2019-02-02 08:07:13.012345", "Z"), + ("2019-02-02 08:07:13.012345", ""), + ], + ) + def test_infer_datetime_format_zero_tz(self, ts, zero_tz): + # GH 41047 + ser = Series([ts + zero_tz]) + result = to_datetime(ser, infer_datetime_format=True) + tz = pytz.utc if zero_tz == "Z" else None + expected = Series([Timestamp(ts, tz=tz)]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("format", [None, "%Y-%m-%d"]) + def test_to_datetime_iso8601_noleading_0s(self, cache, format): + # GH 11871 + ser = Series(["2014-1-1", "2014-2-2", "2015-3-3"]) + expected = Series( + [ + Timestamp("2014-01-01"), + Timestamp("2014-02-02"), + Timestamp("2015-03-03"), + ] + ) + tm.assert_series_equal(to_datetime(ser, format=format, cache=cache), expected) + + +class TestDaysInMonth: + # tests for issue #10154 + + @pytest.mark.parametrize( + "arg, format", + [ + ["2015-02-29", None], + ["2015-02-29", "%Y-%m-%d"], + ["2015-02-32", "%Y-%m-%d"], + ["2015-04-31", "%Y-%m-%d"], + ], + ) + def test_day_not_in_month_coerce(self, cache, arg, format): + assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache)) + + def test_day_not_in_month_raise(self, cache): + msg = "day is out of range for month" + with pytest.raises(ValueError, match=msg): + to_datetime("2015-02-29", errors="raise", cache=cache) + + @pytest.mark.parametrize("arg", ["2015-02-29", "2015-02-32", "2015-04-31"]) + def test_day_not_in_month_raise_value(self, cache, arg): + msg = f"time data {arg} doesn't match format specified" + with pytest.raises(ValueError, match=msg): + to_datetime(arg, errors="raise", format="%Y-%m-%d", cache=cache) + + @pytest.mark.parametrize( + "expected, format", + [ + ["2015-02-29", None], + ["2015-02-29", "%Y-%m-%d"], + ["2015-02-29", "%Y-%m-%d"], + ["2015-04-31", "%Y-%m-%d"], + ], + ) + def test_day_not_in_month_ignore(self, cache, expected, format): + result = to_datetime(expected, errors="ignore", format=format, cache=cache) + assert result == expected + + +class TestDatetimeParsingWrappers: + @pytest.mark.parametrize( + "date_str,expected", + list( + { + "2011-01-01": datetime(2011, 1, 1), + "2Q2005": datetime(2005, 4, 1), + "2Q05": datetime(2005, 4, 1), + "2005Q1": datetime(2005, 1, 1), + "05Q1": datetime(2005, 1, 1), + "2011Q3": datetime(2011, 7, 1), + "11Q3": datetime(2011, 7, 1), + "3Q2011": datetime(2011, 7, 1), + "3Q11": datetime(2011, 7, 1), + # quarterly without space + "2000Q4": datetime(2000, 10, 1), + "00Q4": datetime(2000, 10, 1), + "4Q2000": datetime(2000, 10, 1), + "4Q00": datetime(2000, 10, 1), + "2000q4": datetime(2000, 10, 1), + "2000-Q4": datetime(2000, 10, 1), + "00-Q4": datetime(2000, 10, 1), + "4Q-2000": datetime(2000, 10, 1), + "4Q-00": datetime(2000, 10, 1), + "00q4": datetime(2000, 10, 1), + "2005": datetime(2005, 1, 1), + "2005-11": datetime(2005, 11, 1), + "2005 11": datetime(2005, 11, 1), + "11-2005": datetime(2005, 11, 1), + "11 2005": datetime(2005, 11, 1), + "200511": datetime(2020, 5, 11), + "20051109": datetime(2005, 11, 9), + "20051109 10:15": datetime(2005, 11, 9, 10, 15), + "20051109 08H": datetime(2005, 11, 9, 8, 0), + "2005-11-09 10:15": datetime(2005, 11, 9, 10, 15), + "2005-11-09 08H": datetime(2005, 11, 9, 8, 0), + "2005/11/09 10:15": datetime(2005, 11, 9, 10, 15), + "2005/11/09 08H": datetime(2005, 11, 9, 8, 0), + "Thu Sep 25 10:36:28 2003": datetime(2003, 9, 25, 10, 36, 28), + "Thu Sep 25 2003": datetime(2003, 9, 25), + "Sep 25 2003": datetime(2003, 9, 25), + "January 1 2014": datetime(2014, 1, 1), + # GHE10537 + "2014-06": datetime(2014, 6, 1), + "06-2014": datetime(2014, 6, 1), + "2014-6": datetime(2014, 6, 1), + "6-2014": datetime(2014, 6, 1), + "20010101 12": datetime(2001, 1, 1, 12), + "20010101 1234": datetime(2001, 1, 1, 12, 34), + "20010101 123456": datetime(2001, 1, 1, 12, 34, 56), + }.items() + ), + ) + def test_parsers(self, date_str, expected, cache): + + # dateutil >= 2.5.0 defaults to yearfirst=True + # https://github.com/dateutil/dateutil/issues/217 + yearfirst = True + + result1, _ = parsing.parse_time_string(date_str, yearfirst=yearfirst) + result2 = to_datetime(date_str, yearfirst=yearfirst) + result3 = to_datetime([date_str], yearfirst=yearfirst) + # result5 is used below + result4 = to_datetime( + np.array([date_str], dtype=object), yearfirst=yearfirst, cache=cache + ) + result6 = DatetimeIndex([date_str], yearfirst=yearfirst) + # result7 is used below + result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst) + result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst) + + for res in [result1, result2]: + assert res == expected + for res in [result3, result4, result6, result8, result9]: + exp = DatetimeIndex([Timestamp(expected)]) + tm.assert_index_equal(res, exp) + + # these really need to have yearfirst, but we don't support + if not yearfirst: + result5 = Timestamp(date_str) + assert result5 == expected + result7 = date_range(date_str, freq="S", periods=1, yearfirst=yearfirst) + assert result7 == expected + + def test_na_values_with_cache( + self, cache, unique_nulls_fixture, unique_nulls_fixture2 + ): + # GH22305 + expected = Index([NaT, NaT], dtype="datetime64[ns]") + result = to_datetime([unique_nulls_fixture, unique_nulls_fixture2], cache=cache) + tm.assert_index_equal(result, expected) + + def test_parsers_nat(self): + # Test that each of several string-accepting methods return pd.NaT + result1, _ = parsing.parse_time_string("NaT") + result2 = to_datetime("NaT") + result3 = Timestamp("NaT") + result4 = DatetimeIndex(["NaT"])[0] + assert result1 is NaT + assert result2 is NaT + assert result3 is NaT + assert result4 is NaT + + @pytest.mark.parametrize( + "date_str, dayfirst, yearfirst, expected", + [ + ("10-11-12", False, False, datetime(2012, 10, 11)), + ("10-11-12", True, False, datetime(2012, 11, 10)), + ("10-11-12", False, True, datetime(2010, 11, 12)), + ("10-11-12", True, True, datetime(2010, 12, 11)), + ("20/12/21", False, False, datetime(2021, 12, 20)), + ("20/12/21", True, False, datetime(2021, 12, 20)), + ("20/12/21", False, True, datetime(2020, 12, 21)), + ("20/12/21", True, True, datetime(2020, 12, 21)), + ], + ) + def test_parsers_dayfirst_yearfirst( + self, cache, date_str, dayfirst, yearfirst, expected + ): + # OK + # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 + # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00 + # 2.5.3 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 + + # OK + # 2.5.1 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.3 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + + # bug fix in 2.5.2 + # 2.5.1 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.2 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 + # 2.5.3 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 + + # OK + # 2.5.1 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + # 2.5.2 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + # 2.5.3 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.2 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.3 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.2 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.3 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + + # revert of bug in 2.5.2 + # 2.5.1 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.2 20/12/21 [dayfirst=1, yearfirst=1] -> month must be in 1..12 + # 2.5.3 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + + # str : dayfirst, yearfirst, expected + + # compare with dateutil result + dateutil_result = parse(date_str, dayfirst=dayfirst, yearfirst=yearfirst) + assert dateutil_result == expected + + result1, _ = parsing.parse_time_string( + date_str, dayfirst=dayfirst, yearfirst=yearfirst + ) + + # we don't support dayfirst/yearfirst here: + if not dayfirst and not yearfirst: + result2 = Timestamp(date_str) + assert result2 == expected + + result3 = to_datetime( + date_str, dayfirst=dayfirst, yearfirst=yearfirst, cache=cache + ) + + result4 = DatetimeIndex([date_str], dayfirst=dayfirst, yearfirst=yearfirst)[0] + + assert result1 == expected + assert result3 == expected + assert result4 == expected + + @pytest.mark.parametrize( + "date_str, exp_def", + [["10:15", datetime(1, 1, 1, 10, 15)], ["9:05", datetime(1, 1, 1, 9, 5)]], + ) + def test_parsers_timestring(self, date_str, exp_def): + # must be the same as dateutil result + exp_now = parse(date_str) + + result1, _ = parsing.parse_time_string(date_str) + result2 = to_datetime(date_str) + result3 = to_datetime([date_str]) + result4 = Timestamp(date_str) + result5 = DatetimeIndex([date_str])[0] + # parse time string return time string based on default date + # others are not, and can't be changed because it is used in + # time series plot + assert result1 == exp_def + assert result2 == exp_now + assert result3 == exp_now + assert result4 == exp_now + assert result5 == exp_now + + @pytest.mark.parametrize( + "dt_string, tz, dt_string_repr", + [ + ( + "2013-01-01 05:45+0545", + pytz.FixedOffset(345), + "Timestamp('2013-01-01 05:45:00+0545', tz='pytz.FixedOffset(345)')", + ), + ( + "2013-01-01 05:30+0530", + pytz.FixedOffset(330), + "Timestamp('2013-01-01 05:30:00+0530', tz='pytz.FixedOffset(330)')", + ), + ], + ) + def test_parsers_timezone_minute_offsets_roundtrip( + self, cache, dt_string, tz, dt_string_repr + ): + # GH11708 + base = to_datetime("2013-01-01 00:00:00", cache=cache) + base = base.tz_localize("UTC").tz_convert(tz) + dt_time = to_datetime(dt_string, cache=cache) + assert base == dt_time + assert dt_string_repr == repr(dt_time) + + +@pytest.fixture(params=["D", "s", "ms", "us", "ns"]) +def units(request): + """Day and some time units. + + * D + * s + * ms + * us + * ns + """ + return request.param + + +@pytest.fixture +def epoch_1960(): + """Timestamp at 1960-01-01.""" + return Timestamp("1960-01-01") + + +@pytest.fixture +def units_from_epochs(): + return list(range(5)) + + +@pytest.fixture(params=["timestamp", "pydatetime", "datetime64", "str_1960"]) +def epochs(epoch_1960, request): + """Timestamp at 1960-01-01 in various forms. + + * Timestamp + * datetime.datetime + * numpy.datetime64 + * str + """ + assert request.param in {"timestamp", "pydatetime", "datetime64", "str_1960"} + if request.param == "timestamp": + return epoch_1960 + elif request.param == "pydatetime": + return epoch_1960.to_pydatetime() + elif request.param == "datetime64": + return epoch_1960.to_datetime64() + else: + return str(epoch_1960) + + +@pytest.fixture +def julian_dates(): + return date_range("2014-1-1", periods=10).to_julian_date().values + + +class TestOrigin: + def test_julian(self, julian_dates): + # gh-11276, gh-11745 + # for origin as julian + + result = Series(to_datetime(julian_dates, unit="D", origin="julian")) + expected = Series( + to_datetime(julian_dates - Timestamp(0).to_julian_date(), unit="D") + ) + tm.assert_series_equal(result, expected) + + def test_unix(self): + result = Series(to_datetime([0, 1, 2], unit="D", origin="unix")) + expected = Series( + [Timestamp("1970-01-01"), Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ) + tm.assert_series_equal(result, expected) + + def test_julian_round_trip(self): + result = to_datetime(2456658, origin="julian", unit="D") + assert result.to_julian_date() == 2456658 + + # out-of-bounds + msg = "1 is Out of Bounds for origin='julian'" + with pytest.raises(ValueError, match=msg): + to_datetime(1, origin="julian", unit="D") + + def test_invalid_unit(self, units, julian_dates): + + # checking for invalid combination of origin='julian' and unit != D + if units != "D": + msg = "unit must be 'D' for origin='julian'" + with pytest.raises(ValueError, match=msg): + to_datetime(julian_dates, unit=units, origin="julian") + + @pytest.mark.parametrize("unit", ["ns", "D"]) + def test_invalid_origin(self, unit): + + # need to have a numeric specified + msg = "it must be numeric with a unit specified" + with pytest.raises(ValueError, match=msg): + to_datetime("2005-01-01", origin="1960-01-01", unit=unit) + + def test_epoch(self, units, epochs, epoch_1960, units_from_epochs): + + expected = Series( + [pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs] + ) + + result = Series(to_datetime(units_from_epochs, unit=units, origin=epochs)) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "origin, exc", + [ + ("random_string", ValueError), + ("epoch", ValueError), + ("13-24-1990", ValueError), + (datetime(1, 1, 1), OutOfBoundsDatetime), + ], + ) + def test_invalid_origins(self, origin, exc, units, units_from_epochs): + + msg = f"origin {origin} (is Out of Bounds|cannot be converted to a Timestamp)" + with pytest.raises(exc, match=msg): + to_datetime(units_from_epochs, unit=units, origin=origin) + + def test_invalid_origins_tzinfo(self): + # GH16842 + with pytest.raises(ValueError, match="must be tz-naive"): + to_datetime(1, unit="D", origin=datetime(2000, 1, 1, tzinfo=pytz.utc)) + + @pytest.mark.parametrize("format", [None, "%Y-%m-%d %H:%M:%S"]) + def test_to_datetime_out_of_bounds_with_format_arg(self, format): + # see gh-23830 + msg = "Out of bounds nanosecond timestamp" + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime("2417-10-27 00:00:00", format=format) + + @pytest.mark.parametrize( + "arg, origin, expected_str", + [ + [200 * 365, "unix", "2169-11-13 00:00:00"], + [200 * 365, "1870-01-01", "2069-11-13 00:00:00"], + [300 * 365, "1870-01-01", "2169-10-20 00:00:00"], + ], + ) + def test_processing_order(self, arg, origin, expected_str): + # make sure we handle out-of-bounds *before* + # constructing the dates + + result = to_datetime(arg, unit="D", origin=origin) + expected = Timestamp(expected_str) + assert result == expected + + result = to_datetime(200 * 365, unit="D", origin="1870-01-01") + expected = Timestamp("2069-11-13 00:00:00") + assert result == expected + + result = to_datetime(300 * 365, unit="D", origin="1870-01-01") + expected = Timestamp("2169-10-20 00:00:00") + assert result == expected + + @pytest.mark.parametrize( + "offset,utc,exp", + [ + ["Z", True, "2019-01-01T00:00:00.000Z"], + ["Z", None, "2019-01-01T00:00:00.000Z"], + ["-01:00", True, "2019-01-01T01:00:00.000Z"], + ["-01:00", None, "2019-01-01T00:00:00.000-01:00"], + ], + ) + def test_arg_tz_ns_unit(self, offset, utc, exp): + # GH 25546 + arg = "2019-01-01T00:00:00.000" + offset + result = to_datetime([arg], unit="ns", utc=utc) + expected = to_datetime([exp]) + tm.assert_index_equal(result, expected) + + +class TestShouldCache: + @pytest.mark.parametrize( + "listlike,do_caching", + [ + ([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], False), + ([1, 1, 1, 1, 4, 5, 6, 7, 8, 9], True), + ], + ) + def test_should_cache(self, listlike, do_caching): + assert ( + tools.should_cache(listlike, check_count=len(listlike), unique_share=0.7) + == do_caching + ) + + @pytest.mark.parametrize( + "unique_share,check_count, err_message", + [ + (0.5, 11, r"check_count must be in next bounds: \[0; len\(arg\)\]"), + (10, 2, r"unique_share must be in next bounds: \(0; 1\)"), + ], + ) + def test_should_cache_errors(self, unique_share, check_count, err_message): + arg = [5] * 10 + + with pytest.raises(AssertionError, match=err_message): + tools.should_cache(arg, unique_share, check_count) + + @pytest.mark.parametrize( + "listlike", + [ + (deque([Timestamp("2010-06-02 09:30:00")] * 51)), + ([Timestamp("2010-06-02 09:30:00")] * 51), + (tuple([Timestamp("2010-06-02 09:30:00")] * 51)), + ], + ) + def test_no_slicing_errors_in_should_cache(self, listlike): + # GH#29403 + assert tools.should_cache(listlike) is True + + +def test_nullable_integer_to_datetime(): + # Test for #30050 + ser = Series([1, 2, None, 2 ** 61, None]) + ser = ser.astype("Int64") + ser_copy = ser.copy() + + res = to_datetime(ser, unit="ns") + + expected = Series( + [ + np.datetime64("1970-01-01 00:00:00.000000001"), + np.datetime64("1970-01-01 00:00:00.000000002"), + np.datetime64("NaT"), + np.datetime64("2043-01-25 23:56:49.213693952"), + np.datetime64("NaT"), + ] + ) + tm.assert_series_equal(res, expected) + # Check that ser isn't mutated + tm.assert_series_equal(ser, ser_copy) + + +@pytest.mark.parametrize("klass", [np.array, list]) +def test_na_to_datetime(nulls_fixture, klass): + + if isinstance(nulls_fixture, Decimal): + with pytest.raises(TypeError, match="not convertible to datetime"): + to_datetime(klass([nulls_fixture])) + + else: + result = to_datetime(klass([nulls_fixture])) + + assert result[0] is NaT + + +def test_empty_string_datetime_coerce_format(): + # GH13044 + td = Series(["03/24/2016", "03/25/2016", ""]) + format = "%m/%d/%Y" + + # coerce empty string to pd.NaT + result = to_datetime(td, format=format, errors="coerce") + expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[ns]") + tm.assert_series_equal(expected, result) + + # raise an exception in case a format is given + with pytest.raises(ValueError, match="does not match format"): + to_datetime(td, format=format, errors="raise") + + # don't raise an exception in case no format is given + result = to_datetime(td, errors="raise") + tm.assert_series_equal(result, expected) + + +def test_empty_string_datetime_coerce__unit(): + # GH13044 + # coerce empty string to pd.NaT + result = to_datetime([1, ""], unit="s", errors="coerce") + expected = DatetimeIndex(["1970-01-01 00:00:01", "NaT"], dtype="datetime64[ns]") + tm.assert_index_equal(expected, result) + + # verify that no exception is raised even when errors='raise' is set + result = to_datetime([1, ""], unit="s", errors="raise") + tm.assert_index_equal(expected, result) + + +@td.skip_if_no("xarray") +def test_xarray_coerce_unit(): + # GH44053 + import xarray as xr + + arr = xr.DataArray([1, 2, 3]) + result = to_datetime(arr, unit="ns") + expected = DatetimeIndex( + [ + "1970-01-01 00:00:00.000000001", + "1970-01-01 00:00:00.000000002", + "1970-01-01 00:00:00.000000003", + ], + dtype="datetime64[ns]", + freq=None, + ) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("cache", [True, False]) +def test_to_datetime_monotonic_increasing_index(cache): + # GH28238 + cstart = start_caching_at + times = date_range(Timestamp("1980"), periods=cstart, freq="YS") + times = times.to_frame(index=False, name="DT").sample(n=cstart, random_state=1) + times.index = times.index.to_series().astype(float) / 1000 + result = to_datetime(times.iloc[:, 0], cache=cache) + expected = times.iloc[:, 0] + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tools/test_to_numeric.py b/.local/lib/python3.8/site-packages/pandas/tests/tools/test_to_numeric.py new file mode 100644 index 0000000..643a561 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tools/test_to_numeric.py @@ -0,0 +1,791 @@ +import decimal + +import numpy as np +from numpy import iinfo +import pytest + +from pandas.compat import is_platform_arm + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + to_numeric, +) +import pandas._testing as tm + + +@pytest.fixture(params=[None, "ignore", "raise", "coerce"]) +def errors(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def signed(request): + return request.param + + +@pytest.fixture(params=[lambda x: x, str], ids=["identity", "str"]) +def transform(request): + return request.param + + +@pytest.fixture(params=[47393996303418497800, 100000000000000000000]) +def large_val(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def multiple_elts(request): + return request.param + + +@pytest.fixture( + params=[ + (lambda x: Index(x, name="idx"), tm.assert_index_equal), + (lambda x: Series(x, name="ser"), tm.assert_series_equal), + (lambda x: np.array(Index(x).values), tm.assert_numpy_array_equal), + ] +) +def transform_assert_equal(request): + return request.param + + +@pytest.mark.parametrize( + "input_kwargs,result_kwargs", + [ + ({}, {"dtype": np.int64}), + ({"errors": "coerce", "downcast": "integer"}, {"dtype": np.int8}), + ], +) +def test_empty(input_kwargs, result_kwargs): + # see gh-16302 + ser = Series([], dtype=object) + result = to_numeric(ser, **input_kwargs) + + expected = Series([], **result_kwargs) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("last_val", ["7", 7]) +def test_series(last_val): + ser = Series(["1", "-3.14", last_val]) + result = to_numeric(ser) + + expected = Series([1, -3.14, 7]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [ + [1, 3, 4, 5], + [1.0, 3.0, 4.0, 5.0], + # Bool is regarded as numeric. + [True, False, True, True], + ], +) +def test_series_numeric(data): + ser = Series(data, index=list("ABCD"), name="EFG") + + result = to_numeric(ser) + tm.assert_series_equal(result, ser) + + +@pytest.mark.parametrize( + "data,msg", + [ + ([1, -3.14, "apple"], 'Unable to parse string "apple" at position 2'), + ( + ["orange", 1, -3.14, "apple"], + 'Unable to parse string "orange" at position 0', + ), + ], +) +def test_error(data, msg): + ser = Series(data) + + with pytest.raises(ValueError, match=msg): + to_numeric(ser, errors="raise") + + +@pytest.mark.parametrize( + "errors,exp_data", [("ignore", [1, -3.14, "apple"]), ("coerce", [1, -3.14, np.nan])] +) +def test_ignore_error(errors, exp_data): + ser = Series([1, -3.14, "apple"]) + result = to_numeric(ser, errors=errors) + + expected = Series(exp_data) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "errors,exp", + [ + ("raise", 'Unable to parse string "apple" at position 2'), + ("ignore", [True, False, "apple"]), + # Coerces to float. + ("coerce", [1.0, 0.0, np.nan]), + ], +) +def test_bool_handling(errors, exp): + ser = Series([True, False, "apple"]) + + if isinstance(exp, str): + with pytest.raises(ValueError, match=exp): + to_numeric(ser, errors=errors) + else: + result = to_numeric(ser, errors=errors) + expected = Series(exp) + + tm.assert_series_equal(result, expected) + + +def test_list(): + ser = ["1", "-3.14", "7"] + res = to_numeric(ser) + + expected = np.array([1, -3.14, 7]) + tm.assert_numpy_array_equal(res, expected) + + +@pytest.mark.parametrize( + "data,arr_kwargs", + [ + ([1, 3, 4, 5], {"dtype": np.int64}), + ([1.0, 3.0, 4.0, 5.0], {}), + # Boolean is regarded as numeric. + ([True, False, True, True], {}), + ], +) +def test_list_numeric(data, arr_kwargs): + result = to_numeric(data) + expected = np.array(data, **arr_kwargs) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("kwargs", [{"dtype": "O"}, {}]) +def test_numeric(kwargs): + data = [1, -3.14, 7] + + ser = Series(data, **kwargs) + result = to_numeric(ser) + + expected = Series(data) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "columns", + [ + # One column. + "a", + # Multiple columns. + ["a", "b"], + ], +) +def test_numeric_df_columns(columns): + # see gh-14827 + df = DataFrame( + { + "a": [1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), "0.1"], + "b": [1.0, 2.0, 3.0, 4.0], + } + ) + + expected = DataFrame({"a": [1.2, 3.14, np.inf, 0.1], "b": [1.0, 2.0, 3.0, 4.0]}) + + df_copy = df.copy() + df_copy[columns] = df_copy[columns].apply(to_numeric) + + tm.assert_frame_equal(df_copy, expected) + + +@pytest.mark.parametrize( + "data,exp_data", + [ + ( + [[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1], + [[3.14, 1.0], 1.6, 0.1], + ), + ([np.array([decimal.Decimal(3.14), 1.0]), 0.1], [[3.14, 1.0], 0.1]), + ], +) +def test_numeric_embedded_arr_likes(data, exp_data): + # Test to_numeric with embedded lists and arrays + df = DataFrame({"a": data}) + df["a"] = df["a"].apply(to_numeric) + + expected = DataFrame({"a": exp_data}) + tm.assert_frame_equal(df, expected) + + +def test_all_nan(): + ser = Series(["a", "b", "c"]) + result = to_numeric(ser, errors="coerce") + + expected = Series([np.nan, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + +def test_type_check(errors): + # see gh-11776 + df = DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]}) + kwargs = {"errors": errors} if errors is not None else {} + with pytest.raises(TypeError, match="1-d array"): + to_numeric(df, **kwargs) + + +@pytest.mark.parametrize("val", [1, 1.1, 20001]) +def test_scalar(val, signed, transform): + val = -val if signed else val + assert to_numeric(transform(val)) == float(val) + + +def test_really_large_scalar(large_val, signed, transform, errors): + # see gh-24910 + kwargs = {"errors": errors} if errors is not None else {} + val = -large_val if signed else large_val + + val = transform(val) + val_is_string = isinstance(val, str) + + if val_is_string and errors in (None, "raise"): + msg = "Integer out of range. at position 0" + with pytest.raises(ValueError, match=msg): + to_numeric(val, **kwargs) + else: + expected = float(val) if (errors == "coerce" and val_is_string) else val + tm.assert_almost_equal(to_numeric(val, **kwargs), expected) + + +def test_really_large_in_arr(large_val, signed, transform, multiple_elts, errors): + # see gh-24910 + kwargs = {"errors": errors} if errors is not None else {} + val = -large_val if signed else large_val + val = transform(val) + + extra_elt = "string" + arr = [val] + multiple_elts * [extra_elt] + + val_is_string = isinstance(val, str) + coercing = errors == "coerce" + + if errors in (None, "raise") and (val_is_string or multiple_elts): + if val_is_string: + msg = "Integer out of range. at position 0" + else: + msg = 'Unable to parse string "string" at position 1' + + with pytest.raises(ValueError, match=msg): + to_numeric(arr, **kwargs) + else: + result = to_numeric(arr, **kwargs) + + exp_val = float(val) if (coercing and val_is_string) else val + expected = [exp_val] + + if multiple_elts: + if coercing: + expected.append(np.nan) + exp_dtype = float + else: + expected.append(extra_elt) + exp_dtype = object + else: + exp_dtype = float if isinstance(exp_val, (int, float)) else object + + tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype)) + + +def test_really_large_in_arr_consistent(large_val, signed, multiple_elts, errors): + # see gh-24910 + # + # Even if we discover that we have to hold float, does not mean + # we should be lenient on subsequent elements that fail to be integer. + kwargs = {"errors": errors} if errors is not None else {} + arr = [str(-large_val if signed else large_val)] + + if multiple_elts: + arr.insert(0, large_val) + + if errors in (None, "raise"): + index = int(multiple_elts) + msg = f"Integer out of range. at position {index}" + + with pytest.raises(ValueError, match=msg): + to_numeric(arr, **kwargs) + else: + result = to_numeric(arr, **kwargs) + + if errors == "coerce": + expected = [float(i) for i in arr] + exp_dtype = float + else: + expected = arr + exp_dtype = object + + tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype)) + + +@pytest.mark.parametrize( + "errors,checker", + [ + ("raise", 'Unable to parse string "fail" at position 0'), + ("ignore", lambda x: x == "fail"), + ("coerce", lambda x: np.isnan(x)), + ], +) +def test_scalar_fail(errors, checker): + scalar = "fail" + + if isinstance(checker, str): + with pytest.raises(ValueError, match=checker): + to_numeric(scalar, errors=errors) + else: + assert checker(to_numeric(scalar, errors=errors)) + + +@pytest.mark.parametrize("data", [[1, 2, 3], [1.0, np.nan, 3, np.nan]]) +def test_numeric_dtypes(data, transform_assert_equal): + transform, assert_equal = transform_assert_equal + data = transform(data) + + result = to_numeric(data) + assert_equal(result, data) + + +@pytest.mark.parametrize( + "data,exp", + [ + (["1", "2", "3"], np.array([1, 2, 3], dtype="int64")), + (["1.5", "2.7", "3.4"], np.array([1.5, 2.7, 3.4])), + ], +) +def test_str(data, exp, transform_assert_equal): + transform, assert_equal = transform_assert_equal + result = to_numeric(transform(data)) + + expected = transform(exp) + assert_equal(result, expected) + + +def test_datetime_like(tz_naive_fixture, transform_assert_equal): + transform, assert_equal = transform_assert_equal + idx = pd.date_range("20130101", periods=3, tz=tz_naive_fixture) + + result = to_numeric(transform(idx)) + expected = transform(idx.asi8) + assert_equal(result, expected) + + +def test_timedelta(transform_assert_equal): + transform, assert_equal = transform_assert_equal + idx = pd.timedelta_range("1 days", periods=3, freq="D") + + result = to_numeric(transform(idx)) + expected = transform(idx.asi8) + assert_equal(result, expected) + + +def test_period(transform_assert_equal): + transform, assert_equal = transform_assert_equal + + idx = pd.period_range("2011-01", periods=3, freq="M", name="") + inp = transform(idx) + + if isinstance(inp, Index): + result = to_numeric(inp) + expected = transform(idx.asi8) + assert_equal(result, expected) + else: + # TODO: PeriodDtype, so support it in to_numeric. + pytest.skip("Missing PeriodDtype support in to_numeric") + + +@pytest.mark.parametrize( + "errors,expected", + [ + ("raise", "Invalid object type at position 0"), + ("ignore", Series([[10.0, 2], 1.0, "apple"])), + ("coerce", Series([np.nan, 1.0, np.nan])), + ], +) +def test_non_hashable(errors, expected): + # see gh-13324 + ser = Series([[10.0, 2], 1.0, "apple"]) + + if isinstance(expected, str): + with pytest.raises(TypeError, match=expected): + to_numeric(ser, errors=errors) + else: + result = to_numeric(ser, errors=errors) + tm.assert_series_equal(result, expected) + + +def test_downcast_invalid_cast(): + # see gh-13352 + data = ["1", 2, 3] + invalid_downcast = "unsigned-integer" + msg = "invalid downcasting method provided" + + with pytest.raises(ValueError, match=msg): + to_numeric(data, downcast=invalid_downcast) + + +def test_errors_invalid_value(): + # see gh-26466 + data = ["1", 2, 3] + invalid_error_value = "invalid" + msg = "invalid error value specified" + + with pytest.raises(ValueError, match=msg): + to_numeric(data, errors=invalid_error_value) + + +@pytest.mark.parametrize( + "data", + [ + ["1", 2, 3], + [1, 2, 3], + np.array(["1970-01-02", "1970-01-03", "1970-01-04"], dtype="datetime64[D]"), + ], +) +@pytest.mark.parametrize( + "kwargs,exp_dtype", + [ + # Basic function tests. + ({}, np.int64), + ({"downcast": None}, np.int64), + # Support below np.float32 is rare and far between. + ({"downcast": "float"}, np.dtype(np.float32).char), + # Basic dtype support. + ({"downcast": "unsigned"}, np.dtype(np.typecodes["UnsignedInteger"][0])), + ], +) +def test_downcast_basic(data, kwargs, exp_dtype): + # see gh-13352 + result = to_numeric(data, **kwargs) + expected = np.array([1, 2, 3], dtype=exp_dtype) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("signed_downcast", ["integer", "signed"]) +@pytest.mark.parametrize( + "data", + [ + ["1", 2, 3], + [1, 2, 3], + np.array(["1970-01-02", "1970-01-03", "1970-01-04"], dtype="datetime64[D]"), + ], +) +def test_signed_downcast(data, signed_downcast): + # see gh-13352 + smallest_int_dtype = np.dtype(np.typecodes["Integer"][0]) + expected = np.array([1, 2, 3], dtype=smallest_int_dtype) + + res = to_numeric(data, downcast=signed_downcast) + tm.assert_numpy_array_equal(res, expected) + + +def test_ignore_downcast_invalid_data(): + # If we can't successfully cast the given + # data to a numeric dtype, do not bother + # with the downcast parameter. + data = ["foo", 2, 3] + expected = np.array(data, dtype=object) + + res = to_numeric(data, errors="ignore", downcast="unsigned") + tm.assert_numpy_array_equal(res, expected) + + +def test_ignore_downcast_neg_to_unsigned(): + # Cannot cast to an unsigned integer + # because we have a negative number. + data = ["-1", 2, 3] + expected = np.array([-1, 2, 3], dtype=np.int64) + + res = to_numeric(data, downcast="unsigned") + tm.assert_numpy_array_equal(res, expected) + + +@pytest.mark.parametrize("downcast", ["integer", "signed", "unsigned"]) +@pytest.mark.parametrize( + "data,expected", + [ + (["1.1", 2, 3], np.array([1.1, 2, 3], dtype=np.float64)), + ( + [10000.0, 20000, 3000, 40000.36, 50000, 50000.00], + np.array( + [10000.0, 20000, 3000, 40000.36, 50000, 50000.00], dtype=np.float64 + ), + ), + ], +) +def test_ignore_downcast_cannot_convert_float(data, expected, downcast): + # Cannot cast to an integer (signed or unsigned) + # because we have a float number. + res = to_numeric(data, downcast=downcast) + tm.assert_numpy_array_equal(res, expected) + + +@pytest.mark.parametrize( + "downcast,expected_dtype", + [("integer", np.int16), ("signed", np.int16), ("unsigned", np.uint16)], +) +def test_downcast_not8bit(downcast, expected_dtype): + # the smallest integer dtype need not be np.(u)int8 + data = ["256", 257, 258] + + expected = np.array([256, 257, 258], dtype=expected_dtype) + res = to_numeric(data, downcast=downcast) + tm.assert_numpy_array_equal(res, expected) + + +@pytest.mark.parametrize( + "dtype,downcast,min_max", + [ + ("int8", "integer", [iinfo(np.int8).min, iinfo(np.int8).max]), + ("int16", "integer", [iinfo(np.int16).min, iinfo(np.int16).max]), + ("int32", "integer", [iinfo(np.int32).min, iinfo(np.int32).max]), + ("int64", "integer", [iinfo(np.int64).min, iinfo(np.int64).max]), + ("uint8", "unsigned", [iinfo(np.uint8).min, iinfo(np.uint8).max]), + ("uint16", "unsigned", [iinfo(np.uint16).min, iinfo(np.uint16).max]), + ("uint32", "unsigned", [iinfo(np.uint32).min, iinfo(np.uint32).max]), + ("uint64", "unsigned", [iinfo(np.uint64).min, iinfo(np.uint64).max]), + ("int16", "integer", [iinfo(np.int8).min, iinfo(np.int8).max + 1]), + ("int32", "integer", [iinfo(np.int16).min, iinfo(np.int16).max + 1]), + ("int64", "integer", [iinfo(np.int32).min, iinfo(np.int32).max + 1]), + ("int16", "integer", [iinfo(np.int8).min - 1, iinfo(np.int16).max]), + ("int32", "integer", [iinfo(np.int16).min - 1, iinfo(np.int32).max]), + ("int64", "integer", [iinfo(np.int32).min - 1, iinfo(np.int64).max]), + ("uint16", "unsigned", [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]), + ("uint32", "unsigned", [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]), + ("uint64", "unsigned", [iinfo(np.uint32).min, iinfo(np.uint32).max + 1]), + ], +) +def test_downcast_limits(dtype, downcast, min_max): + # see gh-14404: test the limits of each downcast. + series = to_numeric(Series(min_max), downcast=downcast) + assert series.dtype == dtype + + +@pytest.mark.parametrize( + "ser,expected", + [ + ( + Series([0, 9223372036854775808]), + Series([0, 9223372036854775808], dtype=np.uint64), + ) + ], +) +def test_downcast_uint64(ser, expected): + # see gh-14422: + # BUG: to_numeric doesn't work uint64 numbers + + result = to_numeric(ser, downcast="unsigned") + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "data,exp_data", + [ + ( + [200, 300, "", "NaN", 30000000000000000000], + [200, 300, np.nan, np.nan, 30000000000000000000], + ), + ( + ["12345678901234567890", "1234567890", "ITEM"], + [12345678901234567890, 1234567890, np.nan], + ), + ], +) +def test_coerce_uint64_conflict(data, exp_data): + # see gh-17007 and gh-17125 + # + # Still returns float despite the uint64-nan conflict, + # which would normally force the casting to object. + result = to_numeric(Series(data), errors="coerce") + expected = Series(exp_data, dtype=float) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "errors,exp", + [ + ("ignore", Series(["12345678901234567890", "1234567890", "ITEM"])), + ("raise", "Unable to parse string"), + ], +) +def test_non_coerce_uint64_conflict(errors, exp): + # see gh-17007 and gh-17125 + # + # For completeness. + ser = Series(["12345678901234567890", "1234567890", "ITEM"]) + + if isinstance(exp, str): + with pytest.raises(ValueError, match=exp): + to_numeric(ser, errors=errors) + else: + result = to_numeric(ser, errors=errors) + tm.assert_series_equal(result, ser) + + +@pytest.mark.parametrize("dc1", ["integer", "float", "unsigned"]) +@pytest.mark.parametrize("dc2", ["integer", "float", "unsigned"]) +def test_downcast_empty(dc1, dc2): + # GH32493 + + tm.assert_numpy_array_equal( + to_numeric([], downcast=dc1), + to_numeric([], downcast=dc2), + check_dtype=False, + ) + + +def test_failure_to_convert_uint64_string_to_NaN(): + # GH 32394 + result = to_numeric("uint64", errors="coerce") + assert np.isnan(result) + + ser = Series([32, 64, np.nan]) + result = to_numeric(Series(["32", "64", "uint64"]), errors="coerce") + tm.assert_series_equal(result, ser) + + +@pytest.mark.parametrize( + "strrep", + [ + "243.164", + "245.968", + "249.585", + "259.745", + "265.742", + "272.567", + "279.196", + "280.366", + "275.034", + "271.351", + "272.889", + "270.627", + "280.828", + "290.383", + "308.153", + "319.945", + "336.0", + "344.09", + "351.385", + "356.178", + "359.82", + "361.03", + "367.701", + "380.812", + "387.98", + "391.749", + "391.171", + "385.97", + "385.345", + "386.121", + "390.996", + "399.734", + "413.073", + "421.532", + "430.221", + "437.092", + "439.746", + "446.01", + "451.191", + "460.463", + "469.779", + "472.025", + "479.49", + "474.864", + "467.54", + "471.978", + ], +) +def test_precision_float_conversion(strrep): + # GH 31364 + result = to_numeric(strrep) + + assert result == float(strrep) + + +@pytest.mark.parametrize( + "values, expected", + [ + (["1", "2", None], Series([1, 2, np.nan])), + (["1", "2", "3"], Series([1, 2, 3])), + (["1", "2", 3], Series([1, 2, 3])), + (["1", "2", 3.5], Series([1, 2, 3.5])), + (["1", None, 3.5], Series([1, np.nan, 3.5])), + (["1", "2", "3.5"], Series([1, 2, 3.5])), + ], +) +def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected): + # https://github.com/pandas-dev/pandas/issues/37262 + s = Series(values, dtype=nullable_string_dtype) + result = to_numeric(s) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "data, input_dtype, downcast, expected_dtype", + ( + ([1, 1], "Int64", "integer", "Int8"), + ([1.0, pd.NA], "Float64", "integer", "Int8"), + ([1.0, 1.1], "Float64", "integer", "Float64"), + ([1, pd.NA], "Int64", "integer", "Int8"), + ([450, 300], "Int64", "integer", "Int16"), + ([1, 1], "Float64", "integer", "Int8"), + ([np.iinfo(np.int64).max - 1, 1], "Int64", "integer", "Int64"), + ([1, 1], "Int64", "signed", "Int8"), + ([1.0, 1.0], "Float32", "signed", "Int8"), + ([1.0, 1.1], "Float64", "signed", "Float64"), + ([1, pd.NA], "Int64", "signed", "Int8"), + ([450, -300], "Int64", "signed", "Int16"), + pytest.param( + [np.iinfo(np.uint64).max - 1, 1], + "UInt64", + "signed", + "UInt64", + marks=pytest.mark.xfail(not is_platform_arm(), reason="GH38798"), + ), + ([1, 1], "Int64", "unsigned", "UInt8"), + ([1.0, 1.0], "Float32", "unsigned", "UInt8"), + ([1.0, 1.1], "Float64", "unsigned", "Float64"), + ([1, pd.NA], "Int64", "unsigned", "UInt8"), + ([450, -300], "Int64", "unsigned", "Int64"), + ([-1, -1], "Int32", "unsigned", "Int32"), + ([1, 1], "Float64", "float", "Float32"), + ([1, 1.1], "Float64", "float", "Float32"), + ), +) +def test_downcast_nullable_numeric(data, input_dtype, downcast, expected_dtype): + arr = pd.array(data, dtype=input_dtype) + result = to_numeric(arr, downcast=downcast) + expected = pd.array(data, dtype=expected_dtype) + tm.assert_extension_array_equal(result, expected) + + +def test_downcast_nullable_mask_is_copied(): + # GH38974 + + arr = pd.array([1, 2, pd.NA], dtype="Int64") + + result = to_numeric(arr, downcast="integer") + expected = pd.array([1, 2, pd.NA], dtype="Int8") + tm.assert_extension_array_equal(result, expected) + + arr[1] = pd.NA # should not modify result + tm.assert_extension_array_equal(result, expected) + + +def test_to_numeric_scientific_notation(): + # GH 15898 + result = to_numeric("1.7e+308") + expected = np.float64(1.7e308) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tools/test_to_time.py b/.local/lib/python3.8/site-packages/pandas/tests/tools/test_to_time.py new file mode 100644 index 0000000..7983944 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tools/test_to_time.py @@ -0,0 +1,74 @@ +from datetime import time +import locale + +import numpy as np +import pytest + +from pandas import Series +import pandas._testing as tm +from pandas.core.tools.datetimes import to_time as to_time_alias +from pandas.core.tools.times import to_time + +fails_on_non_english = pytest.mark.xfail( + locale.getlocale()[0] in ("zh_CN", "it_IT"), + reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8", +) + + +class TestToTime: + @pytest.mark.parametrize( + "time_string", + [ + "14:15", + "1415", + pytest.param("2:15pm", marks=fails_on_non_english), + pytest.param("0215pm", marks=fails_on_non_english), + "14:15:00", + "141500", + pytest.param("2:15:00pm", marks=fails_on_non_english), + pytest.param("021500pm", marks=fails_on_non_english), + time(14, 15), + ], + ) + def test_parsers_time(self, time_string): + # GH#11818 + assert to_time(time_string) == time(14, 15) + + def test_odd_format(self): + new_string = "14.15" + msg = r"Cannot convert arg \['14\.15'\] to a time" + with pytest.raises(ValueError, match=msg): + to_time(new_string) + assert to_time(new_string, format="%H.%M") == time(14, 15) + + def test_arraylike(self): + arg = ["14:15", "20:20"] + expected_arr = [time(14, 15), time(20, 20)] + assert to_time(arg) == expected_arr + assert to_time(arg, format="%H:%M") == expected_arr + assert to_time(arg, infer_time_format=True) == expected_arr + assert to_time(arg, format="%I:%M%p", errors="coerce") == [None, None] + + res = to_time(arg, format="%I:%M%p", errors="ignore") + tm.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) + + msg = "Cannot convert.+to a time with given format" + with pytest.raises(ValueError, match=msg): + to_time(arg, format="%I:%M%p", errors="raise") + + tm.assert_series_equal( + to_time(Series(arg, name="test")), Series(expected_arr, name="test") + ) + + res = to_time(np.array(arg)) + assert isinstance(res, list) + assert res == expected_arr + + +def test_to_time_alias(): + expected = time(14, 15) + + with tm.assert_produces_warning(FutureWarning): + result = to_time_alias(expected) + + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tools/test_to_timedelta.py b/.local/lib/python3.8/site-packages/pandas/tests/tools/test_to_timedelta.py new file mode 100644 index 0000000..ec6fccd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tools/test_to_timedelta.py @@ -0,0 +1,276 @@ +from datetime import ( + time, + timedelta, +) + +import numpy as np +import pytest + +from pandas.errors import OutOfBoundsTimedelta + +import pandas as pd +from pandas import ( + Series, + TimedeltaIndex, + isna, + to_timedelta, +) +import pandas._testing as tm +from pandas.core.arrays import TimedeltaArray + + +class TestTimedeltas: + @pytest.mark.parametrize("readonly", [True, False]) + def test_to_timedelta_readonly(self, readonly): + # GH#34857 + arr = np.array([], dtype=object) + if readonly: + arr.setflags(write=False) + result = to_timedelta(arr) + expected = to_timedelta([]) + tm.assert_index_equal(result, expected) + + def test_to_timedelta_null(self): + result = to_timedelta(["", ""]) + assert isna(result).all() + + def test_to_timedelta_same_np_timedelta64(self): + # pass thru + result = to_timedelta(np.array([np.timedelta64(1, "s")])) + expected = pd.Index(np.array([np.timedelta64(1, "s")])) + tm.assert_index_equal(result, expected) + + def test_to_timedelta_series(self): + # Series + expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) + result = to_timedelta(Series(["1d", "1days 00:00:01"])) + tm.assert_series_equal(result, expected) + + def test_to_timedelta_units(self): + # with units + result = TimedeltaIndex( + [np.timedelta64(0, "ns"), np.timedelta64(10, "s").astype("m8[ns]")] + ) + expected = to_timedelta([0, 10], unit="s") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, unit", + [ + ["int64", "s"], + ["int64", "m"], + ["int64", "h"], + ["timedelta64[s]", "s"], + ["timedelta64[D]", "D"], + ], + ) + def test_to_timedelta_units_dtypes(self, dtype, unit): + # arrays of various dtypes + arr = np.array([1] * 5, dtype=dtype) + result = to_timedelta(arr, unit=unit) + expected = TimedeltaIndex([np.timedelta64(1, unit)] * 5) + tm.assert_index_equal(result, expected) + + def test_to_timedelta_oob_non_nano(self): + arr = np.array([pd.NaT.value + 1], dtype="timedelta64[s]") + + msg = r"Out of bounds for nanosecond timedelta64\[s\] -9223372036854775807" + with pytest.raises(OutOfBoundsTimedelta, match=msg): + to_timedelta(arr) + + with pytest.raises(OutOfBoundsTimedelta, match=msg): + TimedeltaIndex(arr) + + with pytest.raises(OutOfBoundsTimedelta, match=msg): + TimedeltaArray._from_sequence(arr) + + @pytest.mark.parametrize( + "arg", [np.arange(10).reshape(2, 5), pd.DataFrame(np.arange(10).reshape(2, 5))] + ) + @pytest.mark.parametrize("errors", ["ignore", "raise", "coerce"]) + def test_to_timedelta_dataframe(self, arg, errors): + # GH 11776 + with pytest.raises(TypeError, match="1-d array"): + to_timedelta(arg, errors=errors) + + def test_to_timedelta_invalid_errors(self): + + # bad value for errors parameter + msg = "errors must be one of" + with pytest.raises(ValueError, match=msg): + to_timedelta(["foo"], errors="never") + + @pytest.mark.parametrize("arg", [[1, 2], 1]) + def test_to_timedelta_invalid_unit(self, arg): + # these will error + msg = "invalid unit abbreviation: foo" + with pytest.raises(ValueError, match=msg): + to_timedelta(arg, unit="foo") + + def test_to_timedelta_time(self): + # time not supported ATM + msg = ( + "Value must be Timedelta, string, integer, float, timedelta or convertible" + ) + with pytest.raises(ValueError, match=msg): + to_timedelta(time(second=1)) + assert to_timedelta(time(second=1), errors="coerce") is pd.NaT + + def test_to_timedelta_bad_value(self): + msg = "Could not convert 'foo' to NumPy timedelta" + with pytest.raises(ValueError, match=msg): + to_timedelta(["foo", "bar"]) + + def test_to_timedelta_bad_value_coerce(self): + tm.assert_index_equal( + TimedeltaIndex([pd.NaT, pd.NaT]), + to_timedelta(["foo", "bar"], errors="coerce"), + ) + + tm.assert_index_equal( + TimedeltaIndex(["1 day", pd.NaT, "1 min"]), + to_timedelta(["1 day", "bar", "1 min"], errors="coerce"), + ) + + def test_to_timedelta_invalid_errors_ignore(self): + # gh-13613: these should not error because errors='ignore' + invalid_data = "apple" + assert invalid_data == to_timedelta(invalid_data, errors="ignore") + + invalid_data = ["apple", "1 days"] + tm.assert_numpy_array_equal( + np.array(invalid_data, dtype=object), + to_timedelta(invalid_data, errors="ignore"), + ) + + invalid_data = pd.Index(["apple", "1 days"]) + tm.assert_index_equal(invalid_data, to_timedelta(invalid_data, errors="ignore")) + + invalid_data = Series(["apple", "1 days"]) + tm.assert_series_equal( + invalid_data, to_timedelta(invalid_data, errors="ignore") + ) + + @pytest.mark.parametrize( + "val, warning", + [ + ("1M", FutureWarning), + ("1 M", FutureWarning), + ("1Y", FutureWarning), + ("1 Y", FutureWarning), + ("1y", FutureWarning), + ("1 y", FutureWarning), + ("1m", None), + ("1 m", None), + ("1 day", None), + ("2day", None), + ], + ) + def test_unambiguous_timedelta_values(self, val, warning): + # GH36666 Deprecate use of strings denoting units with 'M', 'Y', 'm' or 'y' + # in pd.to_timedelta + msg = "Units 'M', 'Y' and 'y' do not represent unambiguous timedelta" + with tm.assert_produces_warning(warning, match=msg, check_stacklevel=False): + to_timedelta(val) + + def test_to_timedelta_via_apply(self): + # GH 5458 + expected = Series([np.timedelta64(1, "s")]) + result = Series(["00:00:01"]).apply(to_timedelta) + tm.assert_series_equal(result, expected) + + result = Series([to_timedelta("00:00:01")]) + tm.assert_series_equal(result, expected) + + def test_to_timedelta_inference_without_warning(self): + # GH#41731 inference produces a warning in the Series constructor, + # but _not_ in to_timedelta + vals = ["00:00:01", pd.NaT] + with tm.assert_produces_warning(None): + result = to_timedelta(vals) + + expected = TimedeltaIndex([pd.Timedelta(seconds=1), pd.NaT]) + tm.assert_index_equal(result, expected) + + def test_to_timedelta_on_missing_values(self): + # GH5438 + timedelta_NaT = np.timedelta64("NaT") + + actual = to_timedelta(Series(["00:00:01", np.nan])) + expected = Series( + [np.timedelta64(1000000000, "ns"), timedelta_NaT], dtype=" 1 else code + assert frequencies.infer_freq(index) == exp_freq + + +@pytest.mark.parametrize( + "constructor", + [ + lambda now, delta: DatetimeIndex( + [now + delta * 7] + [now + delta * j for j in range(3)] + ), + lambda now, delta: DatetimeIndex( + [now + delta * j for j in range(3)] + [now + delta * 7] + ), + ], +) +def test_infer_freq_custom(base_delta_code_pair, constructor): + b = Timestamp(datetime.now()) + base_delta, _ = base_delta_code_pair + + index = constructor(b, base_delta) + assert frequencies.infer_freq(index) is None + + +@pytest.mark.parametrize( + "freq,expected", [("Q", "Q-DEC"), ("Q-NOV", "Q-NOV"), ("Q-OCT", "Q-OCT")] +) +def test_infer_freq_index(freq, expected): + rng = period_range("1959Q2", "2009Q3", freq=freq) + rng = Index(rng.to_timestamp("D", how="e").astype(object)) + + assert rng.inferred_freq == expected + + +@pytest.mark.parametrize( + "expected,dates", + list( + { + "AS-JAN": ["2009-01-01", "2010-01-01", "2011-01-01", "2012-01-01"], + "Q-OCT": ["2009-01-31", "2009-04-30", "2009-07-31", "2009-10-31"], + "M": ["2010-11-30", "2010-12-31", "2011-01-31", "2011-02-28"], + "W-SAT": ["2010-12-25", "2011-01-01", "2011-01-08", "2011-01-15"], + "D": ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], + "H": [ + "2011-12-31 22:00", + "2011-12-31 23:00", + "2012-01-01 00:00", + "2012-01-01 01:00", + ], + }.items() + ), +) +def test_infer_freq_tz(tz_naive_fixture, expected, dates): + # see gh-7310 + tz = tz_naive_fixture + idx = DatetimeIndex(dates, tz=tz) + assert idx.inferred_freq == expected + + +@pytest.mark.parametrize( + "date_pair", + [ + ["2013-11-02", "2013-11-5"], # Fall DST + ["2014-03-08", "2014-03-11"], # Spring DST + ["2014-01-01", "2014-01-03"], # Regular Time + ], +) +@pytest.mark.parametrize( + "freq", ["H", "3H", "10T", "3601S", "3600001L", "3600000001U", "3600000000001N"] +) +def test_infer_freq_tz_transition(tz_naive_fixture, date_pair, freq): + # see gh-8772 + tz = tz_naive_fixture + idx = date_range(date_pair[0], date_pair[1], freq=freq, tz=tz) + assert idx.inferred_freq == freq + + +def test_infer_freq_tz_transition_custom(): + index = date_range("2013-11-03", periods=5, freq="3H").tz_localize( + "America/Chicago" + ) + assert index.inferred_freq is None + + +@pytest.mark.parametrize( + "data,expected", + [ + # Hourly freq in a day must result in "H" + ( + [ + "2014-07-01 09:00", + "2014-07-01 10:00", + "2014-07-01 11:00", + "2014-07-01 12:00", + "2014-07-01 13:00", + "2014-07-01 14:00", + ], + "H", + ), + ( + [ + "2014-07-01 09:00", + "2014-07-01 10:00", + "2014-07-01 11:00", + "2014-07-01 12:00", + "2014-07-01 13:00", + "2014-07-01 14:00", + "2014-07-01 15:00", + "2014-07-01 16:00", + "2014-07-02 09:00", + "2014-07-02 10:00", + "2014-07-02 11:00", + ], + "BH", + ), + ( + [ + "2014-07-04 09:00", + "2014-07-04 10:00", + "2014-07-04 11:00", + "2014-07-04 12:00", + "2014-07-04 13:00", + "2014-07-04 14:00", + "2014-07-04 15:00", + "2014-07-04 16:00", + "2014-07-07 09:00", + "2014-07-07 10:00", + "2014-07-07 11:00", + ], + "BH", + ), + ( + [ + "2014-07-04 09:00", + "2014-07-04 10:00", + "2014-07-04 11:00", + "2014-07-04 12:00", + "2014-07-04 13:00", + "2014-07-04 14:00", + "2014-07-04 15:00", + "2014-07-04 16:00", + "2014-07-07 09:00", + "2014-07-07 10:00", + "2014-07-07 11:00", + "2014-07-07 12:00", + "2014-07-07 13:00", + "2014-07-07 14:00", + "2014-07-07 15:00", + "2014-07-07 16:00", + "2014-07-08 09:00", + "2014-07-08 10:00", + "2014-07-08 11:00", + "2014-07-08 12:00", + "2014-07-08 13:00", + "2014-07-08 14:00", + "2014-07-08 15:00", + "2014-07-08 16:00", + ], + "BH", + ), + ], +) +def test_infer_freq_business_hour(data, expected): + # see gh-7905 + idx = DatetimeIndex(data) + assert idx.inferred_freq == expected + + +def test_not_monotonic(): + rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"]) + rng = rng[::-1] + + assert rng.inferred_freq == "-1A-JAN" + + +def test_non_datetime_index2(): + rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"]) + vals = rng.to_pydatetime() + + result = frequencies.infer_freq(vals) + assert result == rng.inferred_freq + + +@pytest.mark.parametrize( + "idx", [tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)] +) +def test_invalid_index_types(idx): + msg = "|".join( + [ + "cannot infer freq from a non-convertible", + "Check the `freq` attribute instead of using infer_freq", + ] + ) + + with pytest.raises(TypeError, match=msg): + frequencies.infer_freq(idx) + + +@pytest.mark.skipif(is_platform_windows(), reason="see gh-10822: Windows issue") +@pytest.mark.parametrize("idx", [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]) +def test_invalid_index_types_unicode(idx): + # see gh-10822 + # + # Odd error message on conversions to datetime for unicode. + msg = "Unknown string format" + + with pytest.raises(ValueError, match=msg): + frequencies.infer_freq(idx) + + +def test_string_datetime_like_compat(): + # see gh-6463 + data = ["2004-01", "2004-02", "2004-03", "2004-04"] + + expected = frequencies.infer_freq(data) + result = frequencies.infer_freq(Index(data)) + + assert result == expected + + +def test_series(): + # see gh-6407 + s = Series(date_range("20130101", "20130110")) + inferred = frequencies.infer_freq(s) + assert inferred == "D" + + +@pytest.mark.parametrize("end", [10, 10.0]) +def test_series_invalid_type(end): + # see gh-6407 + msg = "cannot infer freq from a non-convertible dtype on a Series" + s = Series(np.arange(end)) + + with pytest.raises(TypeError, match=msg): + frequencies.infer_freq(s) + + +def test_series_inconvertible_string(): + # see gh-6407 + msg = "Unknown string format" + + with pytest.raises(ValueError, match=msg): + frequencies.infer_freq(Series(["foo", "bar"])) + + +@pytest.mark.parametrize("freq", [None, "L"]) +def test_series_period_index(freq): + # see gh-6407 + # + # Cannot infer on PeriodIndex + msg = "cannot infer freq from a non-convertible dtype on a Series" + s = Series(period_range("2013", periods=10, freq=freq)) + + with pytest.raises(TypeError, match=msg): + frequencies.infer_freq(s) + + +@pytest.mark.parametrize("freq", ["M", "L", "S"]) +def test_series_datetime_index(freq): + s = Series(date_range("20130101", periods=10, freq=freq)) + inferred = frequencies.infer_freq(s) + assert inferred == freq + + +@pytest.mark.parametrize( + "offset_func", + [ + frequencies._get_offset, + lambda freq: date_range("2011-01-01", periods=5, freq=freq), + ], +) +@pytest.mark.parametrize( + "freq", + [ + "WEEKDAY", + "EOM", + "W@MON", + "W@TUE", + "W@WED", + "W@THU", + "W@FRI", + "W@SAT", + "W@SUN", + "Q@JAN", + "Q@FEB", + "Q@MAR", + "A@JAN", + "A@FEB", + "A@MAR", + "A@APR", + "A@MAY", + "A@JUN", + "A@JUL", + "A@AUG", + "A@SEP", + "A@OCT", + "A@NOV", + "A@DEC", + "Y@JAN", + "WOM@1MON", + "WOM@2MON", + "WOM@3MON", + "WOM@4MON", + "WOM@1TUE", + "WOM@2TUE", + "WOM@3TUE", + "WOM@4TUE", + "WOM@1WED", + "WOM@2WED", + "WOM@3WED", + "WOM@4WED", + "WOM@1THU", + "WOM@2THU", + "WOM@3THU", + "WOM@4THU", + "WOM@1FRI", + "WOM@2FRI", + "WOM@3FRI", + "WOM@4FRI", + ], +) +def test_legacy_offset_warnings(offset_func, freq): + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + offset_func(freq) + + +def test_ms_vs_capital_ms(): + left = frequencies._get_offset("ms") + right = frequencies._get_offset("MS") + + assert left == offsets.Milli() + assert right == offsets.MonthBegin() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..6b569f9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/test_calendar.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/test_calendar.cpython-38.pyc new file mode 100644 index 0000000..7ccb380 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/test_calendar.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/test_federal.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/test_federal.cpython-38.pyc new file mode 100644 index 0000000..d13157e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/test_federal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/test_holiday.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/test_holiday.cpython-38.pyc new file mode 100644 index 0000000..eaaf046 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/test_holiday.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/test_observance.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/test_observance.cpython-38.pyc new file mode 100644 index 0000000..e697796 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/__pycache__/test_observance.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/test_calendar.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/test_calendar.py new file mode 100644 index 0000000..a1e3c19 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/test_calendar.py @@ -0,0 +1,116 @@ +from datetime import datetime + +import pytest + +from pandas import ( + DatetimeIndex, + offsets, + to_datetime, +) +import pandas._testing as tm + +from pandas.tseries.holiday import ( + AbstractHolidayCalendar, + Holiday, + Timestamp, + USFederalHolidayCalendar, + USLaborDay, + USThanksgivingDay, + get_calendar, +) + + +@pytest.mark.parametrize( + "transform", [lambda x: x, lambda x: x.strftime("%Y-%m-%d"), lambda x: Timestamp(x)] +) +def test_calendar(transform): + start_date = datetime(2012, 1, 1) + end_date = datetime(2012, 12, 31) + + calendar = USFederalHolidayCalendar() + holidays = calendar.holidays(transform(start_date), transform(end_date)) + + expected = [ + datetime(2012, 1, 2), + datetime(2012, 1, 16), + datetime(2012, 2, 20), + datetime(2012, 5, 28), + datetime(2012, 7, 4), + datetime(2012, 9, 3), + datetime(2012, 10, 8), + datetime(2012, 11, 12), + datetime(2012, 11, 22), + datetime(2012, 12, 25), + ] + + assert list(holidays.to_pydatetime()) == expected + + +def test_calendar_caching(): + # see gh-9552. + + class TestCalendar(AbstractHolidayCalendar): + def __init__(self, name=None, rules=None): + super().__init__(name=name, rules=rules) + + jan1 = TestCalendar(rules=[Holiday("jan1", year=2015, month=1, day=1)]) + jan2 = TestCalendar(rules=[Holiday("jan2", year=2015, month=1, day=2)]) + + # Getting holidays for Jan 1 should not alter results for Jan 2. + tm.assert_index_equal(jan1.holidays(), DatetimeIndex(["01-Jan-2015"])) + tm.assert_index_equal(jan2.holidays(), DatetimeIndex(["02-Jan-2015"])) + + +def test_calendar_observance_dates(): + # see gh-11477 + us_fed_cal = get_calendar("USFederalHolidayCalendar") + holidays0 = us_fed_cal.holidays( + datetime(2015, 7, 3), datetime(2015, 7, 3) + ) # <-- same start and end dates + holidays1 = us_fed_cal.holidays( + datetime(2015, 7, 3), datetime(2015, 7, 6) + ) # <-- different start and end dates + holidays2 = us_fed_cal.holidays( + datetime(2015, 7, 3), datetime(2015, 7, 3) + ) # <-- same start and end dates + + # These should all produce the same result. + # + # In addition, calling with different start and end + # dates should not alter the output if we call the + # function again with the same start and end date. + tm.assert_index_equal(holidays0, holidays1) + tm.assert_index_equal(holidays0, holidays2) + + +def test_rule_from_name(): + us_fed_cal = get_calendar("USFederalHolidayCalendar") + assert us_fed_cal.rule_from_name("Thanksgiving Day") == USThanksgivingDay + + +def test_calendar_2031(): + # See gh-27790 + # + # Labor Day 2031 is on September 1. Saturday before is August 30. + # Next working day after August 30 ought to be Tuesday, September 2. + + class testCalendar(AbstractHolidayCalendar): + rules = [USLaborDay] + + cal = testCalendar() + workDay = offsets.CustomBusinessDay(calendar=cal) + Sat_before_Labor_Day_2031 = to_datetime("2031-08-30") + next_working_day = Sat_before_Labor_Day_2031 + 0 * workDay + assert next_working_day == to_datetime("2031-09-02") + + +def test_no_holidays_calendar(): + # Test for issue #31415 + + class NoHolidaysCalendar(AbstractHolidayCalendar): + pass + + cal = NoHolidaysCalendar() + holidays = cal.holidays(Timestamp("01-Jan-2020"), Timestamp("01-Jan-2021")) + empty_index = DatetimeIndex([]) # Type is DatetimeIndex since return_name=False + tm.assert_index_equal(holidays, empty_index) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/test_federal.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/test_federal.py new file mode 100644 index 0000000..64c60d4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/test_federal.py @@ -0,0 +1,38 @@ +from datetime import datetime + +from pandas.tseries.holiday import ( + AbstractHolidayCalendar, + USMartinLutherKingJr, + USMemorialDay, +) + + +def test_no_mlk_before_1986(): + # see gh-10278 + class MLKCalendar(AbstractHolidayCalendar): + rules = [USMartinLutherKingJr] + + holidays = MLKCalendar().holidays(start="1984", end="1988").to_pydatetime().tolist() + + # Testing to make sure holiday is not incorrectly observed before 1986. + assert holidays == [datetime(1986, 1, 20, 0, 0), datetime(1987, 1, 19, 0, 0)] + + +def test_memorial_day(): + class MemorialDay(AbstractHolidayCalendar): + rules = [USMemorialDay] + + holidays = MemorialDay().holidays(start="1971", end="1980").to_pydatetime().tolist() + + # Fixes 5/31 error and checked manually against Wikipedia. + assert holidays == [ + datetime(1971, 5, 31, 0, 0), + datetime(1972, 5, 29, 0, 0), + datetime(1973, 5, 28, 0, 0), + datetime(1974, 5, 27, 0, 0), + datetime(1975, 5, 26, 0, 0), + datetime(1976, 5, 31, 0, 0), + datetime(1977, 5, 30, 0, 0), + datetime(1978, 5, 29, 0, 0), + datetime(1979, 5, 28, 0, 0), + ] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/test_holiday.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/test_holiday.py new file mode 100644 index 0000000..cefb2f8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/test_holiday.py @@ -0,0 +1,266 @@ +from datetime import datetime + +import pytest +from pytz import utc + +import pandas._testing as tm + +from pandas.tseries.holiday import ( + MO, + SA, + AbstractHolidayCalendar, + DateOffset, + EasterMonday, + GoodFriday, + Holiday, + HolidayCalendarFactory, + Timestamp, + USColumbusDay, + USLaborDay, + USMartinLutherKingJr, + USMemorialDay, + USPresidentsDay, + USThanksgivingDay, + get_calendar, + next_monday, +) + + +@pytest.mark.parametrize( + "holiday,start_date,end_date,expected", + [ + ( + USMemorialDay, + datetime(2011, 1, 1), + datetime(2020, 12, 31), + [ + datetime(2011, 5, 30), + datetime(2012, 5, 28), + datetime(2013, 5, 27), + datetime(2014, 5, 26), + datetime(2015, 5, 25), + datetime(2016, 5, 30), + datetime(2017, 5, 29), + datetime(2018, 5, 28), + datetime(2019, 5, 27), + datetime(2020, 5, 25), + ], + ), + ( + Holiday("July 4th Eve", month=7, day=3), + "2001-01-01", + "2003-03-03", + [Timestamp("2001-07-03 00:00:00"), Timestamp("2002-07-03 00:00:00")], + ), + ( + Holiday("July 4th Eve", month=7, day=3, days_of_week=(0, 1, 2, 3)), + "2001-01-01", + "2008-03-03", + [ + Timestamp("2001-07-03 00:00:00"), + Timestamp("2002-07-03 00:00:00"), + Timestamp("2003-07-03 00:00:00"), + Timestamp("2006-07-03 00:00:00"), + Timestamp("2007-07-03 00:00:00"), + ], + ), + ( + EasterMonday, + datetime(2011, 1, 1), + datetime(2020, 12, 31), + [ + Timestamp("2011-04-25 00:00:00"), + Timestamp("2012-04-09 00:00:00"), + Timestamp("2013-04-01 00:00:00"), + Timestamp("2014-04-21 00:00:00"), + Timestamp("2015-04-06 00:00:00"), + Timestamp("2016-03-28 00:00:00"), + Timestamp("2017-04-17 00:00:00"), + Timestamp("2018-04-02 00:00:00"), + Timestamp("2019-04-22 00:00:00"), + Timestamp("2020-04-13 00:00:00"), + ], + ), + ( + GoodFriday, + datetime(2011, 1, 1), + datetime(2020, 12, 31), + [ + Timestamp("2011-04-22 00:00:00"), + Timestamp("2012-04-06 00:00:00"), + Timestamp("2013-03-29 00:00:00"), + Timestamp("2014-04-18 00:00:00"), + Timestamp("2015-04-03 00:00:00"), + Timestamp("2016-03-25 00:00:00"), + Timestamp("2017-04-14 00:00:00"), + Timestamp("2018-03-30 00:00:00"), + Timestamp("2019-04-19 00:00:00"), + Timestamp("2020-04-10 00:00:00"), + ], + ), + ( + USThanksgivingDay, + datetime(2011, 1, 1), + datetime(2020, 12, 31), + [ + datetime(2011, 11, 24), + datetime(2012, 11, 22), + datetime(2013, 11, 28), + datetime(2014, 11, 27), + datetime(2015, 11, 26), + datetime(2016, 11, 24), + datetime(2017, 11, 23), + datetime(2018, 11, 22), + datetime(2019, 11, 28), + datetime(2020, 11, 26), + ], + ), + ], +) +def test_holiday_dates(holiday, start_date, end_date, expected): + assert list(holiday.dates(start_date, end_date)) == expected + + # Verify that timezone info is preserved. + assert list( + holiday.dates( + utc.localize(Timestamp(start_date)), utc.localize(Timestamp(end_date)) + ) + ) == [utc.localize(dt) for dt in expected] + + +@pytest.mark.parametrize( + "holiday,start,expected", + [ + (USMemorialDay, datetime(2015, 7, 1), []), + (USMemorialDay, "2015-05-25", [Timestamp("2015-05-25")]), + (USLaborDay, datetime(2015, 7, 1), []), + (USLaborDay, "2015-09-07", [Timestamp("2015-09-07")]), + (USColumbusDay, datetime(2015, 7, 1), []), + (USColumbusDay, "2015-10-12", [Timestamp("2015-10-12")]), + (USThanksgivingDay, datetime(2015, 7, 1), []), + (USThanksgivingDay, "2015-11-26", [Timestamp("2015-11-26")]), + (USMartinLutherKingJr, datetime(2015, 7, 1), []), + (USMartinLutherKingJr, "2015-01-19", [Timestamp("2015-01-19")]), + (USPresidentsDay, datetime(2015, 7, 1), []), + (USPresidentsDay, "2015-02-16", [Timestamp("2015-02-16")]), + (GoodFriday, datetime(2015, 7, 1), []), + (GoodFriday, "2015-04-03", [Timestamp("2015-04-03")]), + (EasterMonday, "2015-04-06", [Timestamp("2015-04-06")]), + (EasterMonday, datetime(2015, 7, 1), []), + (EasterMonday, "2015-04-05", []), + ("New Year's Day", "2015-01-01", [Timestamp("2015-01-01")]), + ("New Year's Day", "2010-12-31", [Timestamp("2010-12-31")]), + ("New Year's Day", datetime(2015, 7, 1), []), + ("New Year's Day", "2011-01-01", []), + ("Independence Day", "2015-07-03", [Timestamp("2015-07-03")]), + ("Independence Day", datetime(2015, 7, 1), []), + ("Independence Day", "2015-07-04", []), + ("Veterans Day", "2012-11-12", [Timestamp("2012-11-12")]), + ("Veterans Day", datetime(2015, 7, 1), []), + ("Veterans Day", "2012-11-11", []), + ("Christmas Day", "2011-12-26", [Timestamp("2011-12-26")]), + ("Christmas Day", datetime(2015, 7, 1), []), + ("Christmas Day", "2011-12-25", []), + ("Juneteenth National Independence Day", "2020-06-19", []), + ( + "Juneteenth National Independence Day", + "2021-06-18", + [Timestamp("2021-06-18")], + ), + ("Juneteenth National Independence Day", "2022-06-19", []), + ( + "Juneteenth National Independence Day", + "2022-06-20", + [Timestamp("2022-06-20")], + ), + ], +) +def test_holidays_within_dates(holiday, start, expected): + # see gh-11477 + # + # Fix holiday behavior where holiday.dates returned dates outside + # start/end date, or observed rules could not be applied because the + # holiday was not in the original date range (e.g., 7/4/2015 -> 7/3/2015). + if isinstance(holiday, str): + calendar = get_calendar("USFederalHolidayCalendar") + holiday = calendar.rule_from_name(holiday) + + assert list(holiday.dates(start, start)) == expected + + # Verify that timezone info is preserved. + assert list( + holiday.dates(utc.localize(Timestamp(start)), utc.localize(Timestamp(start))) + ) == [utc.localize(dt) for dt in expected] + + +@pytest.mark.parametrize( + "transform", [lambda x: x.strftime("%Y-%m-%d"), lambda x: Timestamp(x)] +) +def test_argument_types(transform): + start_date = datetime(2011, 1, 1) + end_date = datetime(2020, 12, 31) + + holidays = USThanksgivingDay.dates(start_date, end_date) + holidays2 = USThanksgivingDay.dates(transform(start_date), transform(end_date)) + tm.assert_index_equal(holidays, holidays2) + + +@pytest.mark.parametrize( + "name,kwargs", + [ + ("One-Time", {"year": 2012, "month": 5, "day": 28}), + ( + "Range", + { + "month": 5, + "day": 28, + "start_date": datetime(2012, 1, 1), + "end_date": datetime(2012, 12, 31), + "offset": DateOffset(weekday=MO(1)), + }, + ), + ], +) +def test_special_holidays(name, kwargs): + base_date = [datetime(2012, 5, 28)] + holiday = Holiday(name, **kwargs) + + start_date = datetime(2011, 1, 1) + end_date = datetime(2020, 12, 31) + + assert base_date == holiday.dates(start_date, end_date) + + +def test_get_calendar(): + class TestCalendar(AbstractHolidayCalendar): + rules = [] + + calendar = get_calendar("TestCalendar") + assert TestCalendar == type(calendar) + + +def test_factory(): + class_1 = HolidayCalendarFactory( + "MemorialDay", AbstractHolidayCalendar, USMemorialDay + ) + class_2 = HolidayCalendarFactory( + "Thanksgiving", AbstractHolidayCalendar, USThanksgivingDay + ) + class_3 = HolidayCalendarFactory("Combined", class_1, class_2) + + assert len(class_1.rules) == 1 + assert len(class_2.rules) == 1 + assert len(class_3.rules) == 2 + + +def test_both_offset_observance_raises(): + # see gh-10217 + msg = "Cannot use both offset and observance" + with pytest.raises(NotImplementedError, match=msg): + Holiday( + "Cyber Monday", + month=11, + day=1, + offset=[DateOffset(weekday=SA(4))], + observance=next_monday, + ) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/test_observance.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/test_observance.py new file mode 100644 index 0000000..83038ad --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/holiday/test_observance.py @@ -0,0 +1,105 @@ +from datetime import datetime + +import pytest + +from pandas.tseries.holiday import ( + after_nearest_workday, + before_nearest_workday, + nearest_workday, + next_monday, + next_monday_or_tuesday, + next_workday, + previous_friday, + previous_workday, + sunday_to_monday, + weekend_to_monday, +) + +_WEDNESDAY = datetime(2014, 4, 9) +_THURSDAY = datetime(2014, 4, 10) +_FRIDAY = datetime(2014, 4, 11) +_SATURDAY = datetime(2014, 4, 12) +_SUNDAY = datetime(2014, 4, 13) +_MONDAY = datetime(2014, 4, 14) +_TUESDAY = datetime(2014, 4, 15) +_NEXT_WEDNESDAY = datetime(2014, 4, 16) + + +@pytest.mark.parametrize("day", [_SATURDAY, _SUNDAY]) +def test_next_monday(day): + assert next_monday(day) == _MONDAY + + +@pytest.mark.parametrize( + "day,expected", [(_SATURDAY, _MONDAY), (_SUNDAY, _TUESDAY), (_MONDAY, _TUESDAY)] +) +def test_next_monday_or_tuesday(day, expected): + assert next_monday_or_tuesday(day) == expected + + +@pytest.mark.parametrize("day", [_SATURDAY, _SUNDAY]) +def test_previous_friday(day): + assert previous_friday(day) == _FRIDAY + + +def test_sunday_to_monday(): + assert sunday_to_monday(_SUNDAY) == _MONDAY + + +@pytest.mark.parametrize( + "day,expected", [(_SATURDAY, _FRIDAY), (_SUNDAY, _MONDAY), (_MONDAY, _MONDAY)] +) +def test_nearest_workday(day, expected): + assert nearest_workday(day) == expected + + +@pytest.mark.parametrize( + "day,expected", [(_SATURDAY, _MONDAY), (_SUNDAY, _MONDAY), (_MONDAY, _MONDAY)] +) +def test_weekend_to_monday(day, expected): + assert weekend_to_monday(day) == expected + + +@pytest.mark.parametrize( + "day,expected", + [ + (_WEDNESDAY, _THURSDAY), + (_THURSDAY, _FRIDAY), + (_SATURDAY, _MONDAY), + (_SUNDAY, _MONDAY), + (_MONDAY, _TUESDAY), + (_TUESDAY, _NEXT_WEDNESDAY), # WED is same week as TUE + ], +) +def test_next_workday(day, expected): + assert next_workday(day) == expected + + +@pytest.mark.parametrize( + "day,expected", [(_SATURDAY, _FRIDAY), (_SUNDAY, _FRIDAY), (_TUESDAY, _MONDAY)] +) +def test_previous_workday(day, expected): + assert previous_workday(day) == expected + + +@pytest.mark.parametrize( + "day,expected", + [ + (_THURSDAY, _WEDNESDAY), + (_FRIDAY, _THURSDAY), + (_SATURDAY, _THURSDAY), + (_SUNDAY, _FRIDAY), + (_MONDAY, _FRIDAY), # last week Friday + (_TUESDAY, _MONDAY), + (_NEXT_WEDNESDAY, _TUESDAY), # WED is same week as TUE + ], +) +def test_before_nearest_workday(day, expected): + assert before_nearest_workday(day) == expected + + +@pytest.mark.parametrize( + "day,expected", [(_SATURDAY, _MONDAY), (_SUNDAY, _TUESDAY), (_FRIDAY, _MONDAY)] +) +def test_after_nearest_workday(day, expected): + assert after_nearest_workday(day) == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..4317bb3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/common.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..5cbd91c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/common.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..8e84e3a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_day.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_day.cpython-38.pyc new file mode 100644 index 0000000..ba2c2df Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_day.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_hour.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_hour.cpython-38.pyc new file mode 100644 index 0000000..58f7a50 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_hour.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_month.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_month.cpython-38.pyc new file mode 100644 index 0000000..dd19d2f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_month.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_quarter.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_quarter.cpython-38.pyc new file mode 100644 index 0000000..c85c98d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_quarter.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_year.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_year.cpython-38.pyc new file mode 100644 index 0000000..3df0bae Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_business_year.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_custom_business_day.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_custom_business_day.cpython-38.pyc new file mode 100644 index 0000000..e783610 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_custom_business_day.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_custom_business_hour.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_custom_business_hour.cpython-38.pyc new file mode 100644 index 0000000..6eaefe9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_custom_business_hour.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_custom_business_month.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_custom_business_month.cpython-38.pyc new file mode 100644 index 0000000..e287854 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_custom_business_month.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_dst.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_dst.cpython-38.pyc new file mode 100644 index 0000000..c2b4d7f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_dst.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_easter.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_easter.cpython-38.pyc new file mode 100644 index 0000000..8ed3101 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_easter.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_fiscal.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_fiscal.cpython-38.pyc new file mode 100644 index 0000000..9bfdd1c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_fiscal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_index.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_index.cpython-38.pyc new file mode 100644 index 0000000..cd8a2d1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_index.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_month.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_month.cpython-38.pyc new file mode 100644 index 0000000..5364df2 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_month.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_offsets.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_offsets.cpython-38.pyc new file mode 100644 index 0000000..bc5cb30 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_offsets.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_offsets_properties.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_offsets_properties.cpython-38.pyc new file mode 100644 index 0000000..e1a7c38 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_offsets_properties.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_quarter.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_quarter.cpython-38.pyc new file mode 100644 index 0000000..a62d5a5 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_quarter.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_ticks.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_ticks.cpython-38.pyc new file mode 100644 index 0000000..4ab5ab9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_ticks.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_week.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_week.cpython-38.pyc new file mode 100644 index 0000000..d4492fa Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_week.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_year.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_year.cpython-38.pyc new file mode 100644 index 0000000..0df5059 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/__pycache__/test_year.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/common.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/common.py new file mode 100644 index 0000000..d8e98bb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/common.py @@ -0,0 +1,206 @@ +""" +Assertion helpers and base class for offsets tests +""" +from __future__ import annotations + +from datetime import datetime + +from dateutil.tz.tz import tzlocal +import pytest + +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + Timestamp, +) +from pandas._libs.tslibs.offsets import ( + FY5253, + BusinessHour, + CustomBusinessHour, + DateOffset, + FY5253Quarter, + LastWeekOfMonth, + Week, + WeekOfMonth, +) +from pandas.compat import IS64 + + +def assert_offset_equal(offset, base, expected): + actual = offset + base + actual_swapped = base + offset + actual_apply = offset._apply(base) + try: + assert actual == expected + assert actual_swapped == expected + assert actual_apply == expected + except AssertionError as err: + raise AssertionError( + f"\nExpected: {expected}\nActual: {actual}\nFor Offset: {offset})" + f"\nAt Date: {base}" + ) from err + + +def assert_is_on_offset(offset, date, expected): + actual = offset.is_on_offset(date) + assert actual == expected, ( + f"\nExpected: {expected}\nActual: {actual}\nFor Offset: {offset})" + f"\nAt Date: {date}" + ) + + +class WeekDay: + MON = 0 + TUE = 1 + WED = 2 + THU = 3 + FRI = 4 + SAT = 5 + SUN = 6 + + +class Base: + _offset: type[DateOffset] | None = None + d = Timestamp(datetime(2008, 1, 2)) + + timezones = [ + None, + "UTC", + "Asia/Tokyo", + "US/Eastern", + "dateutil/Asia/Tokyo", + "dateutil/US/Pacific", + ] + + def _get_offset(self, klass, value=1, normalize=False): + # create instance from offset class + if klass is FY5253: + klass = klass( + n=value, + startingMonth=1, + weekday=1, + variation="last", + normalize=normalize, + ) + elif klass is FY5253Quarter: + klass = klass( + n=value, + startingMonth=1, + weekday=1, + qtr_with_extra_week=1, + variation="last", + normalize=normalize, + ) + elif klass is LastWeekOfMonth: + klass = klass(n=value, weekday=5, normalize=normalize) + elif klass is WeekOfMonth: + klass = klass(n=value, week=1, weekday=5, normalize=normalize) + elif klass is Week: + klass = klass(n=value, weekday=5, normalize=normalize) + elif klass is DateOffset: + klass = klass(days=value, normalize=normalize) + else: + klass = klass(value, normalize=normalize) + return klass + + def test_apply_out_of_range(self, request, tz_naive_fixture): + tz = tz_naive_fixture + if self._offset is None: + return + + # try to create an out-of-bounds result timestamp; if we can't create + # the offset skip + try: + if self._offset in (BusinessHour, CustomBusinessHour): + # Using 10000 in BusinessHour fails in tz check because of DST + # difference + offset = self._get_offset(self._offset, value=100000) + else: + offset = self._get_offset(self._offset, value=10000) + + result = Timestamp("20080101") + offset + assert isinstance(result, datetime) + assert result.tzinfo is None + + # Check tz is preserved + t = Timestamp("20080101", tz=tz) + result = t + offset + assert isinstance(result, datetime) + + if isinstance(tz, tzlocal) and not IS64: + # If we hit OutOfBoundsDatetime on non-64 bit machines + # we'll drop out of the try clause before the next test + request.node.add_marker( + pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038") + ) + assert t.tzinfo == result.tzinfo + + except OutOfBoundsDatetime: + pass + except (ValueError, KeyError): + # we are creating an invalid offset + # so ignore + pass + + def test_offsets_compare_equal(self): + # root cause of GH#456: __ne__ was not implemented + if self._offset is None: + return + offset1 = self._offset() + offset2 = self._offset() + assert not offset1 != offset2 + assert offset1 == offset2 + + def test_rsub(self): + if self._offset is None or not hasattr(self, "offset2"): + # i.e. skip for TestCommon and YQM subclasses that do not have + # offset2 attr + return + assert self.d - self.offset2 == (-self.offset2)._apply(self.d) + + def test_radd(self): + if self._offset is None or not hasattr(self, "offset2"): + # i.e. skip for TestCommon and YQM subclasses that do not have + # offset2 attr + return + assert self.d + self.offset2 == self.offset2 + self.d + + def test_sub(self): + if self._offset is None or not hasattr(self, "offset2"): + # i.e. skip for TestCommon and YQM subclasses that do not have + # offset2 attr + return + off = self.offset2 + msg = "Cannot subtract datetime from offset" + with pytest.raises(TypeError, match=msg): + off - self.d + + assert 2 * off - off == off + assert self.d - self.offset2 == self.d + self._offset(-2) + assert self.d - self.offset2 == self.d - (2 * off - off) + + def testMult1(self): + if self._offset is None or not hasattr(self, "offset1"): + # i.e. skip for TestCommon and YQM subclasses that do not have + # offset1 attr + return + assert self.d + 10 * self.offset1 == self.d + self._offset(10) + assert self.d + 5 * self.offset1 == self.d + self._offset(5) + + def testMult2(self): + if self._offset is None: + return + assert self.d + (-5 * self._offset(-10)) == self.d + self._offset(50) + assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6) + + def test_compare_str(self): + # GH#23524 + # comparing to strings that cannot be cast to DateOffsets should + # not raise for __eq__ or __ne__ + if self._offset is None: + return + off = self._get_offset(self._offset) + + assert not off == "infer" + assert off != "foo" + # Note: inequalities are only implemented for Tick subclasses; + # tests for this are in test_ticks diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/conftest.py new file mode 100644 index 0000000..df68c98 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/conftest.py @@ -0,0 +1,27 @@ +import pytest + +from pandas._libs.tslibs.offsets import MonthOffset + +import pandas.tseries.offsets as offsets + + +@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__]) +def offset_types(request): + """ + Fixture for all the datetime offsets available for a time series. + """ + return request.param + + +@pytest.fixture( + params=[ + getattr(offsets, o) + for o in offsets.__all__ + if issubclass(getattr(offsets, o), MonthOffset) and o != "MonthOffset" + ] +) +def month_classes(request): + """ + Fixture for month based datetime offsets available for a time series. + """ + return request.param diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_day.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_day.py new file mode 100644 index 0000000..482d697 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_day.py @@ -0,0 +1,235 @@ +""" +Tests for offsets.BDay +""" +from datetime import ( + date, + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas._libs.tslibs.offsets import ( + ApplyTypeError, + BDay, + BMonthEnd, +) + +from pandas import ( + DatetimeIndex, + Timedelta, + _testing as tm, +) +from pandas.tests.tseries.offsets.common import ( + Base, + assert_is_on_offset, + assert_offset_equal, +) +from pandas.tests.tseries.offsets.test_offsets import _ApplyCases + +from pandas.tseries import offsets as offsets + + +class TestBusinessDay(Base): + _offset = BDay + + def setup_method(self, method): + self.d = datetime(2008, 1, 1) + self.nd = np.datetime64("2008-01-01 00:00:00") + + self.offset = self._offset() + self.offset1 = self.offset + self.offset2 = self._offset(2) + + def test_different_normalize_equals(self): + # GH#21404 changed __eq__ to return False when `normalize` does not match + offset = self._offset() + offset2 = self._offset(normalize=True) + assert offset != offset2 + + def test_repr(self): + assert repr(self.offset) == "" + assert repr(self.offset2) == "<2 * BusinessDays>" + + expected = "" + assert repr(self.offset + timedelta(1)) == expected + + def test_with_offset(self): + offset = self.offset + timedelta(hours=2) + + assert (self.d + offset) == datetime(2008, 1, 2, 2) + + @pytest.mark.parametrize( + "td", + [ + Timedelta(hours=2), + Timedelta(hours=2).to_pytimedelta(), + Timedelta(hours=2).to_timedelta64(), + ], + ids=lambda x: type(x), + ) + def test_with_offset_index(self, td): + + dti = DatetimeIndex([self.d]) + expected = DatetimeIndex([datetime(2008, 1, 2, 2)]) + + result = dti + (td + self.offset) + tm.assert_index_equal(result, expected) + + result = dti + (self.offset + td) + tm.assert_index_equal(result, expected) + + def test_eq(self): + assert self.offset2 == self.offset2 + + def test_mul(self): + pass + + def test_hash(self): + assert hash(self.offset2) == hash(self.offset2) + + def test_call(self): + with tm.assert_produces_warning(FutureWarning): + # GH#34171 DateOffset.__call__ is deprecated + assert self.offset2(self.d) == datetime(2008, 1, 3) + assert self.offset2(self.nd) == datetime(2008, 1, 3) + + def testRollback1(self): + assert self._offset(10).rollback(self.d) == self.d + + def testRollback2(self): + assert self._offset(10).rollback(datetime(2008, 1, 5)) == datetime(2008, 1, 4) + + def testRollforward1(self): + assert self._offset(10).rollforward(self.d) == self.d + + def testRollforward2(self): + assert self._offset(10).rollforward(datetime(2008, 1, 5)) == datetime( + 2008, 1, 7 + ) + + def test_roll_date_object(self): + offset = self._offset() + + dt = date(2012, 9, 15) + + result = offset.rollback(dt) + assert result == datetime(2012, 9, 14) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 17) + + offset = offsets.Day() + result = offset.rollback(dt) + assert result == datetime(2012, 9, 15) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 15) + + def test_is_on_offset(self): + tests = [ + (self._offset(), datetime(2008, 1, 1), True), + (self._offset(), datetime(2008, 1, 5), False), + ] + + for offset, d, expected in tests: + assert_is_on_offset(offset, d, expected) + + apply_cases: _ApplyCases = [ + ( + 1, + { + datetime(2008, 1, 1): datetime(2008, 1, 2), + datetime(2008, 1, 4): datetime(2008, 1, 7), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 8), + }, + ), + ( + 2, + { + datetime(2008, 1, 1): datetime(2008, 1, 3), + datetime(2008, 1, 4): datetime(2008, 1, 8), + datetime(2008, 1, 5): datetime(2008, 1, 8), + datetime(2008, 1, 6): datetime(2008, 1, 8), + datetime(2008, 1, 7): datetime(2008, 1, 9), + }, + ), + ( + -1, + { + datetime(2008, 1, 1): datetime(2007, 12, 31), + datetime(2008, 1, 4): datetime(2008, 1, 3), + datetime(2008, 1, 5): datetime(2008, 1, 4), + datetime(2008, 1, 6): datetime(2008, 1, 4), + datetime(2008, 1, 7): datetime(2008, 1, 4), + datetime(2008, 1, 8): datetime(2008, 1, 7), + }, + ), + ( + -2, + { + datetime(2008, 1, 1): datetime(2007, 12, 28), + datetime(2008, 1, 4): datetime(2008, 1, 2), + datetime(2008, 1, 5): datetime(2008, 1, 3), + datetime(2008, 1, 6): datetime(2008, 1, 3), + datetime(2008, 1, 7): datetime(2008, 1, 3), + datetime(2008, 1, 8): datetime(2008, 1, 4), + datetime(2008, 1, 9): datetime(2008, 1, 7), + }, + ), + ( + 0, + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 1, 4): datetime(2008, 1, 4), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 7), + }, + ), + ] + + @pytest.mark.parametrize("case", apply_cases) + def test_apply(self, case): + n, cases = case + offset = self._offset(n) + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + def test_apply_large_n(self): + dt = datetime(2012, 10, 23) + + result = dt + self._offset(10) + assert result == datetime(2012, 11, 6) + + result = dt + self._offset(100) - self._offset(100) + assert result == dt + + off = self._offset() * 6 + rs = datetime(2012, 1, 1) - off + xp = datetime(2011, 12, 23) + assert rs == xp + + st = datetime(2011, 12, 18) + rs = st + off + xp = datetime(2011, 12, 26) + assert rs == xp + + off = self._offset() * 10 + rs = datetime(2014, 1, 5) + off # see #5890 + xp = datetime(2014, 1, 17) + assert rs == xp + + def test_apply_corner(self): + if self._offset is BDay: + msg = "Only know how to combine business day with datetime or timedelta" + else: + msg = ( + "Only know how to combine trading day " + "with datetime, datetime64 or timedelta" + ) + with pytest.raises(ApplyTypeError, match=msg): + self._offset()._apply(BMonthEnd()) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_hour.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_hour.py new file mode 100644 index 0000000..401bfe6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_hour.py @@ -0,0 +1,1370 @@ +""" +Tests for offsets.BusinessHour +""" +from datetime import ( + datetime, + time as dt_time, +) + +import pytest + +from pandas._libs.tslibs import ( + Timedelta, + Timestamp, +) +from pandas._libs.tslibs.offsets import ( + BDay, + BusinessHour, + Nano, +) + +from pandas import ( + DatetimeIndex, + _testing as tm, + date_range, +) +from pandas.tests.tseries.offsets.common import ( + Base, + assert_offset_equal, +) + + +class TestBusinessHour(Base): + _offset = BusinessHour + + def setup_method(self, method): + self.d = datetime(2014, 7, 1, 10, 00) + + self.offset1 = BusinessHour() + self.offset2 = BusinessHour(n=3) + + self.offset3 = BusinessHour(n=-1) + self.offset4 = BusinessHour(n=-4) + + from datetime import time as dt_time + + self.offset5 = BusinessHour(start=dt_time(11, 0), end=dt_time(14, 30)) + self.offset6 = BusinessHour(start="20:00", end="05:00") + self.offset7 = BusinessHour(n=-2, start=dt_time(21, 30), end=dt_time(6, 30)) + self.offset8 = BusinessHour(start=["09:00", "13:00"], end=["12:00", "17:00"]) + self.offset9 = BusinessHour( + n=3, start=["09:00", "22:00"], end=["13:00", "03:00"] + ) + self.offset10 = BusinessHour( + n=-1, start=["23:00", "13:00"], end=["02:00", "17:00"] + ) + + @pytest.mark.parametrize( + "start,end,match", + [ + ( + dt_time(11, 0, 5), + "17:00", + "time data must be specified only with hour and minute", + ), + ("AAA", "17:00", "time data must match '%H:%M' format"), + ("14:00:05", "17:00", "time data must match '%H:%M' format"), + ([], "17:00", "Must include at least 1 start time"), + ("09:00", [], "Must include at least 1 end time"), + ( + ["09:00", "11:00"], + "17:00", + "number of starting time and ending time must be the same", + ), + ( + ["09:00", "11:00"], + ["10:00"], + "number of starting time and ending time must be the same", + ), + ( + ["09:00", "11:00"], + ["12:00", "20:00"], + r"invalid starting and ending time\(s\): opening hours should not " + "touch or overlap with one another", + ), + ( + ["12:00", "20:00"], + ["09:00", "11:00"], + r"invalid starting and ending time\(s\): opening hours should not " + "touch or overlap with one another", + ), + ], + ) + def test_constructor_errors(self, start, end, match): + with pytest.raises(ValueError, match=match): + BusinessHour(start=start, end=end) + + def test_different_normalize_equals(self): + # GH#21404 changed __eq__ to return False when `normalize` does not match + offset = self._offset() + offset2 = self._offset(normalize=True) + assert offset != offset2 + + def test_repr(self): + assert repr(self.offset1) == "" + assert repr(self.offset2) == "<3 * BusinessHours: BH=09:00-17:00>" + assert repr(self.offset3) == "<-1 * BusinessHour: BH=09:00-17:00>" + assert repr(self.offset4) == "<-4 * BusinessHours: BH=09:00-17:00>" + + assert repr(self.offset5) == "" + assert repr(self.offset6) == "" + assert repr(self.offset7) == "<-2 * BusinessHours: BH=21:30-06:30>" + assert repr(self.offset8) == "" + assert repr(self.offset9) == "<3 * BusinessHours: BH=09:00-13:00,22:00-03:00>" + assert repr(self.offset10) == "<-1 * BusinessHour: BH=13:00-17:00,23:00-02:00>" + + def test_with_offset(self): + expected = Timestamp("2014-07-01 13:00") + + assert self.d + BusinessHour() * 3 == expected + assert self.d + BusinessHour(n=3) == expected + + @pytest.mark.parametrize( + "offset_name", + ["offset1", "offset2", "offset3", "offset4", "offset8", "offset9", "offset10"], + ) + def test_eq_attribute(self, offset_name): + offset = getattr(self, offset_name) + assert offset == offset + + @pytest.mark.parametrize( + "offset1,offset2", + [ + (BusinessHour(start="09:00"), BusinessHour()), + ( + BusinessHour(start=["23:00", "13:00"], end=["12:00", "17:00"]), + BusinessHour(start=["13:00", "23:00"], end=["17:00", "12:00"]), + ), + ], + ) + def test_eq(self, offset1, offset2): + assert offset1 == offset2 + + @pytest.mark.parametrize( + "offset1,offset2", + [ + (BusinessHour(), BusinessHour(-1)), + (BusinessHour(start="09:00"), BusinessHour(start="09:01")), + ( + BusinessHour(start="09:00", end="17:00"), + BusinessHour(start="17:00", end="09:01"), + ), + ( + BusinessHour(start=["13:00", "23:00"], end=["18:00", "07:00"]), + BusinessHour(start=["13:00", "23:00"], end=["17:00", "12:00"]), + ), + ], + ) + def test_neq(self, offset1, offset2): + assert offset1 != offset2 + + @pytest.mark.parametrize( + "offset_name", + ["offset1", "offset2", "offset3", "offset4", "offset8", "offset9", "offset10"], + ) + def test_hash(self, offset_name): + offset = getattr(self, offset_name) + assert offset == offset + + def test_call(self): + with tm.assert_produces_warning(FutureWarning): + # GH#34171 DateOffset.__call__ is deprecated + assert self.offset1(self.d) == datetime(2014, 7, 1, 11) + assert self.offset2(self.d) == datetime(2014, 7, 1, 13) + assert self.offset3(self.d) == datetime(2014, 6, 30, 17) + assert self.offset4(self.d) == datetime(2014, 6, 30, 14) + assert self.offset8(self.d) == datetime(2014, 7, 1, 11) + assert self.offset9(self.d) == datetime(2014, 7, 1, 22) + assert self.offset10(self.d) == datetime(2014, 7, 1, 1) + + def test_sub(self): + # we have to override test_sub here because self.offset2 is not + # defined as self._offset(2) + off = self.offset2 + msg = "Cannot subtract datetime from offset" + with pytest.raises(TypeError, match=msg): + off - self.d + assert 2 * off - off == off + + assert self.d - self.offset2 == self.d + self._offset(-3) + + def testRollback1(self): + assert self.offset1.rollback(self.d) == self.d + assert self.offset2.rollback(self.d) == self.d + assert self.offset3.rollback(self.d) == self.d + assert self.offset4.rollback(self.d) == self.d + assert self.offset5.rollback(self.d) == datetime(2014, 6, 30, 14, 30) + assert self.offset6.rollback(self.d) == datetime(2014, 7, 1, 5, 0) + assert self.offset7.rollback(self.d) == datetime(2014, 7, 1, 6, 30) + assert self.offset8.rollback(self.d) == self.d + assert self.offset9.rollback(self.d) == self.d + assert self.offset10.rollback(self.d) == datetime(2014, 7, 1, 2) + + d = datetime(2014, 7, 1, 0) + assert self.offset1.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset2.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset3.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset4.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset5.rollback(d) == datetime(2014, 6, 30, 14, 30) + assert self.offset6.rollback(d) == d + assert self.offset7.rollback(d) == d + assert self.offset8.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset9.rollback(d) == d + assert self.offset10.rollback(d) == d + + assert self._offset(5).rollback(self.d) == self.d + + def testRollback2(self): + assert self._offset(-3).rollback(datetime(2014, 7, 5, 15, 0)) == datetime( + 2014, 7, 4, 17, 0 + ) + + def testRollforward1(self): + assert self.offset1.rollforward(self.d) == self.d + assert self.offset2.rollforward(self.d) == self.d + assert self.offset3.rollforward(self.d) == self.d + assert self.offset4.rollforward(self.d) == self.d + assert self.offset5.rollforward(self.d) == datetime(2014, 7, 1, 11, 0) + assert self.offset6.rollforward(self.d) == datetime(2014, 7, 1, 20, 0) + assert self.offset7.rollforward(self.d) == datetime(2014, 7, 1, 21, 30) + assert self.offset8.rollforward(self.d) == self.d + assert self.offset9.rollforward(self.d) == self.d + assert self.offset10.rollforward(self.d) == datetime(2014, 7, 1, 13) + + d = datetime(2014, 7, 1, 0) + assert self.offset1.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset2.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset3.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset4.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset5.rollforward(d) == datetime(2014, 7, 1, 11) + assert self.offset6.rollforward(d) == d + assert self.offset7.rollforward(d) == d + assert self.offset8.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset9.rollforward(d) == d + assert self.offset10.rollforward(d) == d + + assert self._offset(5).rollforward(self.d) == self.d + + def testRollforward2(self): + assert self._offset(-3).rollforward(datetime(2014, 7, 5, 16, 0)) == datetime( + 2014, 7, 7, 9 + ) + + def test_roll_date_object(self): + offset = BusinessHour() + + dt = datetime(2014, 7, 6, 15, 0) + + result = offset.rollback(dt) + assert result == datetime(2014, 7, 4, 17) + + result = offset.rollforward(dt) + assert result == datetime(2014, 7, 7, 9) + + normalize_cases = [] + normalize_cases.append( + ( + BusinessHour(normalize=True), + { + datetime(2014, 7, 1, 8): datetime(2014, 7, 1), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2), + datetime(2014, 7, 1, 23): datetime(2014, 7, 2), + datetime(2014, 7, 1, 0): datetime(2014, 7, 1), + datetime(2014, 7, 4, 15): datetime(2014, 7, 4), + datetime(2014, 7, 4, 15, 59): datetime(2014, 7, 4), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7), + datetime(2014, 7, 5, 23): datetime(2014, 7, 7), + datetime(2014, 7, 6, 10): datetime(2014, 7, 7), + }, + ) + ) + + normalize_cases.append( + ( + BusinessHour(-1, normalize=True), + { + datetime(2014, 7, 1, 8): datetime(2014, 6, 30), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1), + datetime(2014, 7, 1, 10): datetime(2014, 6, 30), + datetime(2014, 7, 1, 0): datetime(2014, 6, 30), + datetime(2014, 7, 7, 10): datetime(2014, 7, 4), + datetime(2014, 7, 7, 10, 1): datetime(2014, 7, 7), + datetime(2014, 7, 5, 23): datetime(2014, 7, 4), + datetime(2014, 7, 6, 10): datetime(2014, 7, 4), + }, + ) + ) + + normalize_cases.append( + ( + BusinessHour(1, normalize=True, start="17:00", end="04:00"), + { + datetime(2014, 7, 1, 8): datetime(2014, 7, 1), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1), + datetime(2014, 7, 1, 23): datetime(2014, 7, 2), + datetime(2014, 7, 2, 2): datetime(2014, 7, 2), + datetime(2014, 7, 2, 3): datetime(2014, 7, 2), + datetime(2014, 7, 4, 23): datetime(2014, 7, 5), + datetime(2014, 7, 5, 2): datetime(2014, 7, 5), + datetime(2014, 7, 7, 2): datetime(2014, 7, 7), + datetime(2014, 7, 7, 17): datetime(2014, 7, 7), + }, + ) + ) + + @pytest.mark.parametrize("case", normalize_cases) + def test_normalize(self, case): + offset, cases = case + for dt, expected in cases.items(): + assert offset._apply(dt) == expected + + on_offset_cases = [] + on_offset_cases.append( + ( + BusinessHour(), + { + datetime(2014, 7, 1, 9): True, + datetime(2014, 7, 1, 8, 59): False, + datetime(2014, 7, 1, 8): False, + datetime(2014, 7, 1, 17): True, + datetime(2014, 7, 1, 17, 1): False, + datetime(2014, 7, 1, 18): False, + datetime(2014, 7, 5, 9): False, + datetime(2014, 7, 6, 12): False, + }, + ) + ) + + on_offset_cases.append( + ( + BusinessHour(start="10:00", end="15:00"), + { + datetime(2014, 7, 1, 9): False, + datetime(2014, 7, 1, 10): True, + datetime(2014, 7, 1, 15): True, + datetime(2014, 7, 1, 15, 1): False, + datetime(2014, 7, 5, 12): False, + datetime(2014, 7, 6, 12): False, + }, + ) + ) + + on_offset_cases.append( + ( + BusinessHour(start="19:00", end="05:00"), + { + datetime(2014, 7, 1, 9, 0): False, + datetime(2014, 7, 1, 10, 0): False, + datetime(2014, 7, 1, 15): False, + datetime(2014, 7, 1, 15, 1): False, + datetime(2014, 7, 5, 12, 0): False, + datetime(2014, 7, 6, 12, 0): False, + datetime(2014, 7, 1, 19, 0): True, + datetime(2014, 7, 2, 0, 0): True, + datetime(2014, 7, 4, 23): True, + datetime(2014, 7, 5, 1): True, + datetime(2014, 7, 5, 5, 0): True, + datetime(2014, 7, 6, 23, 0): False, + datetime(2014, 7, 7, 3, 0): False, + }, + ) + ) + + on_offset_cases.append( + ( + BusinessHour(start=["09:00", "13:00"], end=["12:00", "17:00"]), + { + datetime(2014, 7, 1, 9): True, + datetime(2014, 7, 1, 8, 59): False, + datetime(2014, 7, 1, 8): False, + datetime(2014, 7, 1, 17): True, + datetime(2014, 7, 1, 17, 1): False, + datetime(2014, 7, 1, 18): False, + datetime(2014, 7, 5, 9): False, + datetime(2014, 7, 6, 12): False, + datetime(2014, 7, 1, 12, 30): False, + }, + ) + ) + + on_offset_cases.append( + ( + BusinessHour(start=["19:00", "23:00"], end=["21:00", "05:00"]), + { + datetime(2014, 7, 1, 9, 0): False, + datetime(2014, 7, 1, 10, 0): False, + datetime(2014, 7, 1, 15): False, + datetime(2014, 7, 1, 15, 1): False, + datetime(2014, 7, 5, 12, 0): False, + datetime(2014, 7, 6, 12, 0): False, + datetime(2014, 7, 1, 19, 0): True, + datetime(2014, 7, 2, 0, 0): True, + datetime(2014, 7, 4, 23): True, + datetime(2014, 7, 5, 1): True, + datetime(2014, 7, 5, 5, 0): True, + datetime(2014, 7, 6, 23, 0): False, + datetime(2014, 7, 7, 3, 0): False, + datetime(2014, 7, 4, 22): False, + }, + ) + ) + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, cases = case + for dt, expected in cases.items(): + assert offset.is_on_offset(dt) == expected + + apply_cases = [ + ( + BusinessHour(), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 12), + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 16), + datetime(2014, 7, 1, 19): datetime(2014, 7, 2, 10), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 2, 9, 30, 15), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 10), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 12), + # out of business hours + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 10), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 10), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 10), + # saturday + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 9, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 9, 30, 30), + }, + ), + ( + BusinessHour(4), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 15), + datetime(2014, 7, 1, 13): datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 15): datetime(2014, 7, 2, 11), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 12), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 13), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 13), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 13), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 13), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 13), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 13), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 12, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 12, 30, 30), + }, + ), + ( + BusinessHour(-1), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 10), + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 12), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 15), + datetime(2014, 7, 1, 10): datetime(2014, 6, 30, 17), + datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 1, 15, 30, 15), + datetime(2014, 7, 1, 9, 30, 15): datetime(2014, 6, 30, 16, 30, 15), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 16), + datetime(2014, 7, 1, 5): datetime(2014, 6, 30, 16), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 10), + # out of business hours + datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 16), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 16), + datetime(2014, 7, 2, 23): datetime(2014, 7, 2, 16), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 16), + # saturday + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 16), + datetime(2014, 7, 7, 9): datetime(2014, 7, 4, 16), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 4, 16, 30), + datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 4, 16, 30, 30), + }, + ), + ( + BusinessHour(-4), + { + datetime(2014, 7, 1, 11): datetime(2014, 6, 30, 15), + datetime(2014, 7, 1, 13): datetime(2014, 6, 30, 17), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 11), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 12), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 13), + datetime(2014, 7, 2, 11): datetime(2014, 7, 1, 15), + datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 13), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 13), + datetime(2014, 7, 2, 23): datetime(2014, 7, 2, 13), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 13), + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 13), + datetime(2014, 7, 4, 18): datetime(2014, 7, 4, 13), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 4, 13, 30), + datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 4, 13, 30, 30), + }, + ), + ( + BusinessHour(start="13:00", end="16:00"), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 15): datetime(2014, 7, 2, 13), + datetime(2014, 7, 1, 19): datetime(2014, 7, 2, 14), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 14), + datetime(2014, 7, 1, 15, 30, 15): datetime(2014, 7, 2, 13, 30, 15), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 14), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 14), + }, + ), + ( + BusinessHour(n=2, start="13:00", end="16:00"), + { + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 14): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 15), + datetime(2014, 7, 2, 14, 30): datetime(2014, 7, 3, 13, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 15), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 15), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 15), + datetime(2014, 7, 4, 14, 30): datetime(2014, 7, 7, 13, 30), + datetime(2014, 7, 4, 14, 30, 30): datetime(2014, 7, 7, 13, 30, 30), + }, + ), + ( + BusinessHour(n=-1, start="13:00", end="16:00"), + { + datetime(2014, 7, 2, 11): datetime(2014, 7, 1, 15), + datetime(2014, 7, 2, 13): datetime(2014, 7, 1, 15), + datetime(2014, 7, 2, 14): datetime(2014, 7, 1, 16), + datetime(2014, 7, 2, 15): datetime(2014, 7, 2, 14), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 16): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 13, 30, 15): datetime(2014, 7, 1, 15, 30, 15), + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 15), + datetime(2014, 7, 7, 11): datetime(2014, 7, 4, 15), + }, + ), + ( + BusinessHour(n=-3, start="10:00", end="16:00"), + { + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 13), + datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 11), + datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 13), + datetime(2014, 7, 2, 13): datetime(2014, 7, 1, 16), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 13), + datetime(2014, 7, 2, 11, 30): datetime(2014, 7, 1, 14, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 13), + datetime(2014, 7, 4, 10): datetime(2014, 7, 3, 13), + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 13), + datetime(2014, 7, 4, 16): datetime(2014, 7, 4, 13), + datetime(2014, 7, 4, 12, 30): datetime(2014, 7, 3, 15, 30), + datetime(2014, 7, 4, 12, 30, 30): datetime(2014, 7, 3, 15, 30, 30), + }, + ), + ( + BusinessHour(start="19:00", end="05:00"), + { + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 20), + datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 20), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 20), + datetime(2014, 7, 2, 13): datetime(2014, 7, 2, 20), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 20), + datetime(2014, 7, 2, 4, 30): datetime(2014, 7, 2, 19, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 1), + datetime(2014, 7, 4, 10): datetime(2014, 7, 4, 20), + datetime(2014, 7, 4, 23): datetime(2014, 7, 5, 0), + datetime(2014, 7, 5, 0): datetime(2014, 7, 5, 1), + datetime(2014, 7, 5, 4): datetime(2014, 7, 7, 19), + datetime(2014, 7, 5, 4, 30): datetime(2014, 7, 7, 19, 30), + datetime(2014, 7, 5, 4, 30, 30): datetime(2014, 7, 7, 19, 30, 30), + }, + ), + ( + BusinessHour(n=-1, start="19:00", end="05:00"), + { + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 4), + datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 13): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 20): datetime(2014, 7, 2, 5), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 19, 30): datetime(2014, 7, 2, 4, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 23), + datetime(2014, 7, 3, 6): datetime(2014, 7, 3, 4), + datetime(2014, 7, 4, 23): datetime(2014, 7, 4, 22), + datetime(2014, 7, 5, 0): datetime(2014, 7, 4, 23), + datetime(2014, 7, 5, 4): datetime(2014, 7, 5, 3), + datetime(2014, 7, 7, 19, 30): datetime(2014, 7, 5, 4, 30), + datetime(2014, 7, 7, 19, 30, 30): datetime(2014, 7, 5, 4, 30, 30), + }, + ), + ( + BusinessHour(n=4, start="00:00", end="23:00"), + { + datetime(2014, 7, 3, 22): datetime(2014, 7, 4, 3), + datetime(2014, 7, 4, 22): datetime(2014, 7, 7, 3), + datetime(2014, 7, 3, 22, 30): datetime(2014, 7, 4, 3, 30), + datetime(2014, 7, 3, 22, 20): datetime(2014, 7, 4, 3, 20), + datetime(2014, 7, 4, 22, 30, 30): datetime(2014, 7, 7, 3, 30, 30), + datetime(2014, 7, 4, 22, 30, 20): datetime(2014, 7, 7, 3, 30, 20), + }, + ), + ( + BusinessHour(n=-4, start="00:00", end="23:00"), + { + datetime(2014, 7, 4, 3): datetime(2014, 7, 3, 22), + datetime(2014, 7, 7, 3): datetime(2014, 7, 4, 22), + datetime(2014, 7, 4, 3, 30): datetime(2014, 7, 3, 22, 30), + datetime(2014, 7, 4, 3, 20): datetime(2014, 7, 3, 22, 20), + datetime(2014, 7, 7, 3, 30, 30): datetime(2014, 7, 4, 22, 30, 30), + datetime(2014, 7, 7, 3, 30, 20): datetime(2014, 7, 4, 22, 30, 20), + }, + ), + ( + BusinessHour(start=["09:00", "14:00"], end=["12:00", "18:00"]), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 16), + datetime(2014, 7, 1, 19): datetime(2014, 7, 2, 10), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 17), + datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 1, 17, 30, 15), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 9), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 14), + # out of business hours + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 15), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 10), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 10), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 10), + # saturday + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 17, 30): datetime(2014, 7, 7, 9, 30), + datetime(2014, 7, 4, 17, 30, 30): datetime(2014, 7, 7, 9, 30, 30), + }, + ), + ( + BusinessHour(n=4, start=["09:00", "14:00"], end=["12:00", "18:00"]), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 17), + datetime(2014, 7, 1, 13): datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 15): datetime(2014, 7, 2, 10), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 11), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 14), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 17), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 15), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 15), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 15), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 15), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 14), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 11, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 11, 30, 30), + }, + ), + ( + BusinessHour(n=-4, start=["09:00", "14:00"], end=["12:00", "18:00"]), + { + datetime(2014, 7, 1, 11): datetime(2014, 6, 30, 16), + datetime(2014, 7, 1, 13): datetime(2014, 6, 30, 17), + datetime(2014, 7, 1, 15): datetime(2014, 6, 30, 18), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 10), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 11), + datetime(2014, 7, 2, 11): datetime(2014, 7, 1, 16), + datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 12), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 12), + datetime(2014, 7, 2, 23): datetime(2014, 7, 2, 12), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 12), + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 12), + datetime(2014, 7, 4, 18): datetime(2014, 7, 4, 12), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 4, 14, 30), + datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 4, 14, 30, 30), + }, + ), + ( + BusinessHour(n=-1, start=["19:00", "03:00"], end=["01:00", "05:00"]), + { + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 4), + datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 13): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 20): datetime(2014, 7, 2, 5), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 4): datetime(2014, 7, 2, 1), + datetime(2014, 7, 2, 19, 30): datetime(2014, 7, 2, 4, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 23), + datetime(2014, 7, 3, 6): datetime(2014, 7, 3, 4), + datetime(2014, 7, 4, 23): datetime(2014, 7, 4, 22), + datetime(2014, 7, 5, 0): datetime(2014, 7, 4, 23), + datetime(2014, 7, 5, 4): datetime(2014, 7, 5, 0), + datetime(2014, 7, 7, 3, 30): datetime(2014, 7, 5, 0, 30), + datetime(2014, 7, 7, 19, 30): datetime(2014, 7, 7, 4, 30), + datetime(2014, 7, 7, 19, 30, 30): datetime(2014, 7, 7, 4, 30, 30), + }, + ), + ] + + # long business hours (see gh-26381) + + # multiple business hours + + @pytest.mark.parametrize("case", apply_cases) + def test_apply(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + apply_large_n_cases = [ + ( + # A week later + BusinessHour(40), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 8, 11), + datetime(2014, 7, 1, 13): datetime(2014, 7, 8, 13), + datetime(2014, 7, 1, 15): datetime(2014, 7, 8, 15), + datetime(2014, 7, 1, 16): datetime(2014, 7, 8, 16), + datetime(2014, 7, 1, 17): datetime(2014, 7, 9, 9), + datetime(2014, 7, 2, 11): datetime(2014, 7, 9, 11), + datetime(2014, 7, 2, 8): datetime(2014, 7, 9, 9), + datetime(2014, 7, 2, 19): datetime(2014, 7, 10, 9), + datetime(2014, 7, 2, 23): datetime(2014, 7, 10, 9), + datetime(2014, 7, 3, 0): datetime(2014, 7, 10, 9), + datetime(2014, 7, 5, 15): datetime(2014, 7, 14, 9), + datetime(2014, 7, 4, 18): datetime(2014, 7, 14, 9), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 14, 9, 30), + datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 14, 9, 30, 30), + }, + ), + ( + # 3 days and 1 hour before + BusinessHour(-25), + { + datetime(2014, 7, 1, 11): datetime(2014, 6, 26, 10), + datetime(2014, 7, 1, 13): datetime(2014, 6, 26, 12), + datetime(2014, 7, 1, 9): datetime(2014, 6, 25, 16), + datetime(2014, 7, 1, 10): datetime(2014, 6, 25, 17), + datetime(2014, 7, 3, 11): datetime(2014, 6, 30, 10), + datetime(2014, 7, 3, 8): datetime(2014, 6, 27, 16), + datetime(2014, 7, 3, 19): datetime(2014, 6, 30, 16), + datetime(2014, 7, 3, 23): datetime(2014, 6, 30, 16), + datetime(2014, 7, 4, 9): datetime(2014, 6, 30, 16), + datetime(2014, 7, 5, 15): datetime(2014, 7, 1, 16), + datetime(2014, 7, 6, 18): datetime(2014, 7, 1, 16), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 1, 16, 30), + datetime(2014, 7, 7, 10, 30, 30): datetime(2014, 7, 2, 9, 30, 30), + }, + ), + ( + # 5 days and 3 hours later + BusinessHour(28, start="21:00", end="02:00"), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 9, 0), + datetime(2014, 7, 1, 22): datetime(2014, 7, 9, 1), + datetime(2014, 7, 1, 23): datetime(2014, 7, 9, 21), + datetime(2014, 7, 2, 2): datetime(2014, 7, 10, 0), + datetime(2014, 7, 3, 21): datetime(2014, 7, 11, 0), + datetime(2014, 7, 4, 1): datetime(2014, 7, 11, 23), + datetime(2014, 7, 4, 2): datetime(2014, 7, 12, 0), + datetime(2014, 7, 4, 3): datetime(2014, 7, 12, 0), + datetime(2014, 7, 5, 1): datetime(2014, 7, 14, 23), + datetime(2014, 7, 5, 15): datetime(2014, 7, 15, 0), + datetime(2014, 7, 6, 18): datetime(2014, 7, 15, 0), + datetime(2014, 7, 7, 1): datetime(2014, 7, 15, 0), + datetime(2014, 7, 7, 23, 30): datetime(2014, 7, 15, 21, 30), + }, + ), + ( + # large n for multiple opening hours (3 days and 1 hour before) + BusinessHour(n=-25, start=["09:00", "14:00"], end=["12:00", "19:00"]), + { + datetime(2014, 7, 1, 11): datetime(2014, 6, 26, 10), + datetime(2014, 7, 1, 13): datetime(2014, 6, 26, 11), + datetime(2014, 7, 1, 9): datetime(2014, 6, 25, 18), + datetime(2014, 7, 1, 10): datetime(2014, 6, 25, 19), + datetime(2014, 7, 3, 11): datetime(2014, 6, 30, 10), + datetime(2014, 7, 3, 8): datetime(2014, 6, 27, 18), + datetime(2014, 7, 3, 19): datetime(2014, 6, 30, 18), + datetime(2014, 7, 3, 23): datetime(2014, 6, 30, 18), + datetime(2014, 7, 4, 9): datetime(2014, 6, 30, 18), + datetime(2014, 7, 5, 15): datetime(2014, 7, 1, 18), + datetime(2014, 7, 6, 18): datetime(2014, 7, 1, 18), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 1, 18, 30), + datetime(2014, 7, 7, 10, 30, 30): datetime(2014, 7, 2, 9, 30, 30), + }, + ), + ( + # 5 days and 3 hours later + BusinessHour(28, start=["21:00", "03:00"], end=["01:00", "04:00"]), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 9, 0), + datetime(2014, 7, 1, 22): datetime(2014, 7, 9, 3), + datetime(2014, 7, 1, 23): datetime(2014, 7, 9, 21), + datetime(2014, 7, 2, 2): datetime(2014, 7, 9, 23), + datetime(2014, 7, 3, 21): datetime(2014, 7, 11, 0), + datetime(2014, 7, 4, 1): datetime(2014, 7, 11, 23), + datetime(2014, 7, 4, 2): datetime(2014, 7, 11, 23), + datetime(2014, 7, 4, 3): datetime(2014, 7, 11, 23), + datetime(2014, 7, 4, 21): datetime(2014, 7, 12, 0), + datetime(2014, 7, 5, 0): datetime(2014, 7, 14, 22), + datetime(2014, 7, 5, 1): datetime(2014, 7, 14, 23), + datetime(2014, 7, 5, 15): datetime(2014, 7, 14, 23), + datetime(2014, 7, 6, 18): datetime(2014, 7, 14, 23), + datetime(2014, 7, 7, 1): datetime(2014, 7, 14, 23), + datetime(2014, 7, 7, 23, 30): datetime(2014, 7, 15, 21, 30), + }, + ), + ] + + @pytest.mark.parametrize("case", apply_large_n_cases) + def test_apply_large_n(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + def test_apply_nanoseconds(self): + tests = [ + ( + BusinessHour(), + { + Timestamp("2014-07-04 15:00") + + Nano(5): Timestamp("2014-07-04 16:00") + + Nano(5), + Timestamp("2014-07-04 16:00") + + Nano(5): Timestamp("2014-07-07 09:00") + + Nano(5), + Timestamp("2014-07-04 16:00") + - Nano(5): Timestamp("2014-07-04 17:00") + - Nano(5), + }, + ), + ( + BusinessHour(-1), + { + Timestamp("2014-07-04 15:00") + + Nano(5): Timestamp("2014-07-04 14:00") + + Nano(5), + Timestamp("2014-07-04 10:00") + + Nano(5): Timestamp("2014-07-04 09:00") + + Nano(5), + Timestamp("2014-07-04 10:00") + - Nano(5): Timestamp("2014-07-03 17:00") + - Nano(5), + }, + ), + ] + + for offset, cases in tests: + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + def test_datetimeindex(self): + idx1 = date_range(start="2014-07-04 15:00", end="2014-07-08 10:00", freq="BH") + idx2 = date_range(start="2014-07-04 15:00", periods=12, freq="BH") + idx3 = date_range(end="2014-07-08 10:00", periods=12, freq="BH") + expected = DatetimeIndex( + [ + "2014-07-04 15:00", + "2014-07-04 16:00", + "2014-07-07 09:00", + "2014-07-07 10:00", + "2014-07-07 11:00", + "2014-07-07 12:00", + "2014-07-07 13:00", + "2014-07-07 14:00", + "2014-07-07 15:00", + "2014-07-07 16:00", + "2014-07-08 09:00", + "2014-07-08 10:00", + ], + freq="BH", + ) + for idx in [idx1, idx2, idx3]: + tm.assert_index_equal(idx, expected) + + idx1 = date_range(start="2014-07-04 15:45", end="2014-07-08 10:45", freq="BH") + idx2 = date_range(start="2014-07-04 15:45", periods=12, freq="BH") + idx3 = date_range(end="2014-07-08 10:45", periods=12, freq="BH") + + expected = idx1 + for idx in [idx1, idx2, idx3]: + tm.assert_index_equal(idx, expected) + + def test_bday_ignores_timedeltas(self): + idx = date_range("2010/02/01", "2010/02/10", freq="12H") + t1 = idx + BDay(offset=Timedelta(3, unit="H")) + + expected = DatetimeIndex( + [ + "2010-02-02 03:00:00", + "2010-02-02 15:00:00", + "2010-02-03 03:00:00", + "2010-02-03 15:00:00", + "2010-02-04 03:00:00", + "2010-02-04 15:00:00", + "2010-02-05 03:00:00", + "2010-02-05 15:00:00", + "2010-02-08 03:00:00", + "2010-02-08 15:00:00", + "2010-02-08 03:00:00", + "2010-02-08 15:00:00", + "2010-02-08 03:00:00", + "2010-02-08 15:00:00", + "2010-02-09 03:00:00", + "2010-02-09 15:00:00", + "2010-02-10 03:00:00", + "2010-02-10 15:00:00", + "2010-02-11 03:00:00", + ], + freq=None, + ) + tm.assert_index_equal(t1, expected) + + +class TestOpeningTimes: + # opening time should be affected by sign of n, not by n's value and end + opening_time_cases = [ + ( + [ + BusinessHour(), + BusinessHour(n=2), + BusinessHour(n=4), + BusinessHour(end="10:00"), + BusinessHour(n=2, end="4:00"), + BusinessHour(n=4, end="15:00"), + ], + { + datetime(2014, 7, 1, 11): ( + datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 9), + ), + datetime(2014, 7, 1, 18): ( + datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 9), + ), + datetime(2014, 7, 1, 23): ( + datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 9), + ), + datetime(2014, 7, 2, 8): ( + datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 9), + ), + # if timestamp is on opening time, next opening time is + # as it is + datetime(2014, 7, 2, 9): ( + datetime(2014, 7, 2, 9), + datetime(2014, 7, 2, 9), + ), + datetime(2014, 7, 2, 10): ( + datetime(2014, 7, 3, 9), + datetime(2014, 7, 2, 9), + ), + # 2014-07-05 is saturday + datetime(2014, 7, 5, 10): ( + datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9), + ), + datetime(2014, 7, 4, 10): ( + datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9), + ), + datetime(2014, 7, 4, 23): ( + datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9), + ), + datetime(2014, 7, 6, 10): ( + datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9), + ), + datetime(2014, 7, 7, 5): ( + datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9), + ), + datetime(2014, 7, 7, 9, 1): ( + datetime(2014, 7, 8, 9), + datetime(2014, 7, 7, 9), + ), + }, + ), + ( + [ + BusinessHour(start="11:15"), + BusinessHour(n=2, start="11:15"), + BusinessHour(n=3, start="11:15"), + BusinessHour(start="11:15", end="10:00"), + BusinessHour(n=2, start="11:15", end="4:00"), + BusinessHour(n=3, start="11:15", end="15:00"), + ], + { + datetime(2014, 7, 1, 11): ( + datetime(2014, 7, 1, 11, 15), + datetime(2014, 6, 30, 11, 15), + ), + datetime(2014, 7, 1, 18): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15), + ), + datetime(2014, 7, 1, 23): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15), + ), + datetime(2014, 7, 2, 8): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15), + ), + datetime(2014, 7, 2, 9): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15), + ), + datetime(2014, 7, 2, 10): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15), + ), + datetime(2014, 7, 2, 11, 15): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 2, 11, 15), + ), + datetime(2014, 7, 2, 11, 15, 1): ( + datetime(2014, 7, 3, 11, 15), + datetime(2014, 7, 2, 11, 15), + ), + datetime(2014, 7, 5, 10): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15), + ), + datetime(2014, 7, 4, 10): ( + datetime(2014, 7, 4, 11, 15), + datetime(2014, 7, 3, 11, 15), + ), + datetime(2014, 7, 4, 23): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15), + ), + datetime(2014, 7, 6, 10): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15), + ), + datetime(2014, 7, 7, 5): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15), + ), + datetime(2014, 7, 7, 9, 1): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15), + ), + }, + ), + ( + [ + BusinessHour(-1), + BusinessHour(n=-2), + BusinessHour(n=-4), + BusinessHour(n=-1, end="10:00"), + BusinessHour(n=-2, end="4:00"), + BusinessHour(n=-4, end="15:00"), + ], + { + datetime(2014, 7, 1, 11): ( + datetime(2014, 7, 1, 9), + datetime(2014, 7, 2, 9), + ), + datetime(2014, 7, 1, 18): ( + datetime(2014, 7, 1, 9), + datetime(2014, 7, 2, 9), + ), + datetime(2014, 7, 1, 23): ( + datetime(2014, 7, 1, 9), + datetime(2014, 7, 2, 9), + ), + datetime(2014, 7, 2, 8): ( + datetime(2014, 7, 1, 9), + datetime(2014, 7, 2, 9), + ), + datetime(2014, 7, 2, 9): ( + datetime(2014, 7, 2, 9), + datetime(2014, 7, 2, 9), + ), + datetime(2014, 7, 2, 10): ( + datetime(2014, 7, 2, 9), + datetime(2014, 7, 3, 9), + ), + datetime(2014, 7, 5, 10): ( + datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9), + ), + datetime(2014, 7, 4, 10): ( + datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9), + ), + datetime(2014, 7, 4, 23): ( + datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9), + ), + datetime(2014, 7, 6, 10): ( + datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9), + ), + datetime(2014, 7, 7, 5): ( + datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9), + ), + datetime(2014, 7, 7, 9): ( + datetime(2014, 7, 7, 9), + datetime(2014, 7, 7, 9), + ), + datetime(2014, 7, 7, 9, 1): ( + datetime(2014, 7, 7, 9), + datetime(2014, 7, 8, 9), + ), + }, + ), + ( + [ + BusinessHour(start="17:00", end="05:00"), + BusinessHour(n=3, start="17:00", end="03:00"), + ], + { + datetime(2014, 7, 1, 11): ( + datetime(2014, 7, 1, 17), + datetime(2014, 6, 30, 17), + ), + datetime(2014, 7, 1, 18): ( + datetime(2014, 7, 2, 17), + datetime(2014, 7, 1, 17), + ), + datetime(2014, 7, 1, 23): ( + datetime(2014, 7, 2, 17), + datetime(2014, 7, 1, 17), + ), + datetime(2014, 7, 2, 8): ( + datetime(2014, 7, 2, 17), + datetime(2014, 7, 1, 17), + ), + datetime(2014, 7, 2, 9): ( + datetime(2014, 7, 2, 17), + datetime(2014, 7, 1, 17), + ), + datetime(2014, 7, 4, 17): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 4, 17), + ), + datetime(2014, 7, 5, 10): ( + datetime(2014, 7, 7, 17), + datetime(2014, 7, 4, 17), + ), + datetime(2014, 7, 4, 10): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 3, 17), + ), + datetime(2014, 7, 4, 23): ( + datetime(2014, 7, 7, 17), + datetime(2014, 7, 4, 17), + ), + datetime(2014, 7, 6, 10): ( + datetime(2014, 7, 7, 17), + datetime(2014, 7, 4, 17), + ), + datetime(2014, 7, 7, 5): ( + datetime(2014, 7, 7, 17), + datetime(2014, 7, 4, 17), + ), + datetime(2014, 7, 7, 17, 1): ( + datetime(2014, 7, 8, 17), + datetime(2014, 7, 7, 17), + ), + }, + ), + ( + [ + BusinessHour(-1, start="17:00", end="05:00"), + BusinessHour(n=-2, start="17:00", end="03:00"), + ], + { + datetime(2014, 7, 1, 11): ( + datetime(2014, 6, 30, 17), + datetime(2014, 7, 1, 17), + ), + datetime(2014, 7, 1, 18): ( + datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17), + ), + datetime(2014, 7, 1, 23): ( + datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17), + ), + datetime(2014, 7, 2, 8): ( + datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17), + ), + datetime(2014, 7, 2, 9): ( + datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17), + ), + datetime(2014, 7, 2, 16, 59): ( + datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17), + ), + datetime(2014, 7, 5, 10): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 17), + ), + datetime(2014, 7, 4, 10): ( + datetime(2014, 7, 3, 17), + datetime(2014, 7, 4, 17), + ), + datetime(2014, 7, 4, 23): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 17), + ), + datetime(2014, 7, 6, 10): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 17), + ), + datetime(2014, 7, 7, 5): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 17), + ), + datetime(2014, 7, 7, 18): ( + datetime(2014, 7, 7, 17), + datetime(2014, 7, 8, 17), + ), + }, + ), + ( + [ + BusinessHour(start=["11:15", "15:00"], end=["13:00", "20:00"]), + BusinessHour(n=3, start=["11:15", "15:00"], end=["12:00", "20:00"]), + BusinessHour(start=["11:15", "15:00"], end=["13:00", "17:00"]), + BusinessHour(n=2, start=["11:15", "15:00"], end=["12:00", "03:00"]), + BusinessHour(n=3, start=["11:15", "15:00"], end=["13:00", "16:00"]), + ], + { + datetime(2014, 7, 1, 11): ( + datetime(2014, 7, 1, 11, 15), + datetime(2014, 6, 30, 15), + ), + datetime(2014, 7, 1, 18): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15), + ), + datetime(2014, 7, 1, 23): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15), + ), + datetime(2014, 7, 2, 8): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15), + ), + datetime(2014, 7, 2, 9): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15), + ), + datetime(2014, 7, 2, 10): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15), + ), + datetime(2014, 7, 2, 11, 15): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 2, 11, 15), + ), + datetime(2014, 7, 2, 11, 15, 1): ( + datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 11, 15), + ), + datetime(2014, 7, 5, 10): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15), + ), + datetime(2014, 7, 4, 10): ( + datetime(2014, 7, 4, 11, 15), + datetime(2014, 7, 3, 15), + ), + datetime(2014, 7, 4, 23): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15), + ), + datetime(2014, 7, 6, 10): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15), + ), + datetime(2014, 7, 7, 5): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15), + ), + datetime(2014, 7, 7, 9, 1): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15), + ), + datetime(2014, 7, 7, 12): ( + datetime(2014, 7, 7, 15), + datetime(2014, 7, 7, 11, 15), + ), + }, + ), + ( + [ + BusinessHour(n=-1, start=["17:00", "08:00"], end=["05:00", "10:00"]), + BusinessHour(n=-2, start=["08:00", "17:00"], end=["10:00", "03:00"]), + ], + { + datetime(2014, 7, 1, 11): ( + datetime(2014, 7, 1, 8), + datetime(2014, 7, 1, 17), + ), + datetime(2014, 7, 1, 18): ( + datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 8), + ), + datetime(2014, 7, 1, 23): ( + datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 8), + ), + datetime(2014, 7, 2, 8): ( + datetime(2014, 7, 2, 8), + datetime(2014, 7, 2, 8), + ), + datetime(2014, 7, 2, 9): ( + datetime(2014, 7, 2, 8), + datetime(2014, 7, 2, 17), + ), + datetime(2014, 7, 2, 16, 59): ( + datetime(2014, 7, 2, 8), + datetime(2014, 7, 2, 17), + ), + datetime(2014, 7, 5, 10): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 8), + ), + datetime(2014, 7, 4, 10): ( + datetime(2014, 7, 4, 8), + datetime(2014, 7, 4, 17), + ), + datetime(2014, 7, 4, 23): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 8), + ), + datetime(2014, 7, 6, 10): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 8), + ), + datetime(2014, 7, 7, 5): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 8), + ), + datetime(2014, 7, 7, 18): ( + datetime(2014, 7, 7, 17), + datetime(2014, 7, 8, 8), + ), + }, + ), + ] + + @pytest.mark.parametrize("case", opening_time_cases) + def test_opening_time(self, case): + _offsets, cases = case + for offset in _offsets: + for dt, (exp_next, exp_prev) in cases.items(): + assert offset._next_opening_time(dt) == exp_next + assert offset._prev_opening_time(dt) == exp_prev diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_month.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_month.py new file mode 100644 index 0000000..bb2049f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_month.py @@ -0,0 +1,220 @@ +""" +Tests for the following offsets: +- BMonthBegin +- BMonthEnd +""" +from datetime import datetime + +import pytest + +import pandas as pd +from pandas.tests.tseries.offsets.common import ( + Base, + assert_is_on_offset, + assert_offset_equal, +) + +from pandas.tseries.offsets import ( + BMonthBegin, + BMonthEnd, +) + + +@pytest.mark.parametrize("n", [-2, 1]) +@pytest.mark.parametrize( + "cls", + [ + BMonthBegin, + BMonthEnd, + ], +) +def test_apply_index(cls, n): + offset = cls(n=n) + rng = pd.date_range(start="1/1/2000", periods=100000, freq="T") + ser = pd.Series(rng) + + res = rng + offset + assert res.freq is None # not retained + assert res[0] == rng[0] + offset + assert res[-1] == rng[-1] + offset + res2 = ser + offset + # apply_index is only for indexes, not series, so no res2_v2 + assert res2.iloc[0] == ser.iloc[0] + offset + assert res2.iloc[-1] == ser.iloc[-1] + offset + + +class TestBMonthBegin(Base): + _offset = BMonthBegin + + def test_offsets_compare_equal(self): + # root cause of #456 + offset1 = BMonthBegin() + offset2 = BMonthBegin() + assert not offset1 != offset2 + + offset_cases = [] + offset_cases.append( + ( + BMonthBegin(), + { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2006, 9, 1): datetime(2006, 10, 2), + datetime(2007, 1, 1): datetime(2007, 2, 1), + datetime(2006, 12, 1): datetime(2007, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + BMonthBegin(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2006, 10, 2): datetime(2006, 10, 2), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2006, 9, 15): datetime(2006, 10, 2), + }, + ) + ) + + offset_cases.append( + ( + BMonthBegin(2), + { + datetime(2008, 1, 1): datetime(2008, 3, 3), + datetime(2008, 1, 15): datetime(2008, 3, 3), + datetime(2006, 12, 29): datetime(2007, 2, 1), + datetime(2006, 12, 31): datetime(2007, 2, 1), + datetime(2007, 1, 1): datetime(2007, 3, 1), + datetime(2006, 11, 1): datetime(2007, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + BMonthBegin(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 1), + datetime(2008, 6, 30): datetime(2008, 6, 2), + datetime(2008, 6, 1): datetime(2008, 5, 1), + datetime(2008, 3, 10): datetime(2008, 3, 3), + datetime(2008, 12, 31): datetime(2008, 12, 1), + datetime(2006, 12, 29): datetime(2006, 12, 1), + datetime(2006, 12, 30): datetime(2006, 12, 1), + datetime(2007, 1, 1): datetime(2006, 12, 1), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (BMonthBegin(), datetime(2007, 12, 31), False), + (BMonthBegin(), datetime(2008, 1, 1), True), + (BMonthBegin(), datetime(2001, 4, 2), True), + (BMonthBegin(), datetime(2008, 3, 3), True), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + +class TestBMonthEnd(Base): + _offset = BMonthEnd + + def test_normalize(self): + dt = datetime(2007, 1, 1, 3) + + result = dt + BMonthEnd(normalize=True) + expected = dt.replace(hour=0) + BMonthEnd() + assert result == expected + + def test_offsets_compare_equal(self): + # root cause of #456 + offset1 = BMonthEnd() + offset2 = BMonthEnd() + assert not offset1 != offset2 + + offset_cases = [] + offset_cases.append( + ( + BMonthEnd(), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2006, 12, 29): datetime(2007, 1, 31), + datetime(2006, 12, 31): datetime(2007, 1, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31), + datetime(2006, 12, 1): datetime(2006, 12, 29), + }, + ) + ) + + offset_cases.append( + ( + BMonthEnd(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2006, 12, 29): datetime(2006, 12, 29), + datetime(2006, 12, 31): datetime(2007, 1, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31), + }, + ) + ) + + offset_cases.append( + ( + BMonthEnd(2), + { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 1, 31): datetime(2008, 3, 31), + datetime(2006, 12, 29): datetime(2007, 2, 28), + datetime(2006, 12, 31): datetime(2007, 2, 28), + datetime(2007, 1, 1): datetime(2007, 2, 28), + datetime(2006, 11, 1): datetime(2006, 12, 29), + }, + ) + ) + + offset_cases.append( + ( + BMonthEnd(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 29), + datetime(2008, 6, 30): datetime(2008, 5, 30), + datetime(2008, 12, 31): datetime(2008, 11, 28), + datetime(2006, 12, 29): datetime(2006, 11, 30), + datetime(2006, 12, 30): datetime(2006, 12, 29), + datetime(2007, 1, 1): datetime(2006, 12, 29), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (BMonthEnd(), datetime(2007, 12, 31), True), + (BMonthEnd(), datetime(2008, 1, 1), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_quarter.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_quarter.py new file mode 100644 index 0000000..b928b47 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_quarter.py @@ -0,0 +1,311 @@ +""" +Tests for the following offsets: +- BQuarterBegin +- BQuarterEnd +""" +from datetime import datetime + +import pytest + +from pandas.tests.tseries.offsets.common import ( + Base, + assert_is_on_offset, + assert_offset_equal, +) + +from pandas.tseries.offsets import ( + BQuarterBegin, + BQuarterEnd, +) + + +def test_quarterly_dont_normalize(): + date = datetime(2012, 3, 31, 5, 30) + + offsets = (BQuarterEnd, BQuarterBegin) + + for klass in offsets: + result = date + klass() + assert result.time() == date.time() + + +@pytest.mark.parametrize("offset", [BQuarterBegin(), BQuarterEnd()]) +def test_on_offset(offset): + dates = [ + datetime(2016, m, d) + for m in [10, 11, 12] + for d in [1, 2, 3, 28, 29, 30, 31] + if not (m == 11 and d == 31) + ] + for date in dates: + res = offset.is_on_offset(date) + slow_version = date == (date + offset) - offset + assert res == slow_version + + +class TestBQuarterBegin(Base): + _offset = BQuarterBegin + + def test_repr(self): + expected = "" + assert repr(BQuarterBegin()) == expected + expected = "" + assert repr(BQuarterBegin(startingMonth=3)) == expected + expected = "" + assert repr(BQuarterBegin(startingMonth=1)) == expected + + def test_is_anchored(self): + assert BQuarterBegin(startingMonth=1).is_anchored() + assert BQuarterBegin().is_anchored() + assert not BQuarterBegin(2, startingMonth=1).is_anchored() + + def test_offset_corner_case(self): + # corner + offset = BQuarterBegin(n=-1, startingMonth=1) + assert datetime(2007, 4, 3) + offset == datetime(2007, 4, 2) + + offset_cases = [] + offset_cases.append( + ( + BQuarterBegin(startingMonth=1), + { + datetime(2008, 1, 1): datetime(2008, 4, 1), + datetime(2008, 1, 31): datetime(2008, 4, 1), + datetime(2008, 2, 15): datetime(2008, 4, 1), + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 3, 15): datetime(2008, 4, 1), + datetime(2008, 3, 31): datetime(2008, 4, 1), + datetime(2008, 4, 15): datetime(2008, 7, 1), + datetime(2007, 3, 15): datetime(2007, 4, 2), + datetime(2007, 2, 28): datetime(2007, 4, 2), + datetime(2007, 1, 1): datetime(2007, 4, 2), + datetime(2007, 4, 15): datetime(2007, 7, 2), + datetime(2007, 7, 1): datetime(2007, 7, 2), + datetime(2007, 4, 1): datetime(2007, 4, 2), + datetime(2007, 4, 2): datetime(2007, 7, 2), + datetime(2008, 4, 30): datetime(2008, 7, 1), + }, + ) + ) + + offset_cases.append( + ( + BQuarterBegin(startingMonth=2), + { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2008, 1, 15): datetime(2008, 2, 1), + datetime(2008, 2, 29): datetime(2008, 5, 1), + datetime(2008, 3, 15): datetime(2008, 5, 1), + datetime(2008, 3, 31): datetime(2008, 5, 1), + datetime(2008, 4, 15): datetime(2008, 5, 1), + datetime(2008, 8, 15): datetime(2008, 11, 3), + datetime(2008, 9, 15): datetime(2008, 11, 3), + datetime(2008, 11, 1): datetime(2008, 11, 3), + datetime(2008, 4, 30): datetime(2008, 5, 1), + }, + ) + ) + + offset_cases.append( + ( + BQuarterBegin(startingMonth=1, n=0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2007, 12, 31): datetime(2008, 1, 1), + datetime(2008, 2, 15): datetime(2008, 4, 1), + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 1, 15): datetime(2008, 4, 1), + datetime(2008, 2, 27): datetime(2008, 4, 1), + datetime(2008, 3, 15): datetime(2008, 4, 1), + datetime(2007, 4, 1): datetime(2007, 4, 2), + datetime(2007, 4, 2): datetime(2007, 4, 2), + datetime(2007, 7, 1): datetime(2007, 7, 2), + datetime(2007, 4, 15): datetime(2007, 7, 2), + datetime(2007, 7, 2): datetime(2007, 7, 2), + }, + ) + ) + + offset_cases.append( + ( + BQuarterBegin(startingMonth=1, n=-1), + { + datetime(2008, 1, 1): datetime(2007, 10, 1), + datetime(2008, 1, 31): datetime(2008, 1, 1), + datetime(2008, 2, 15): datetime(2008, 1, 1), + datetime(2008, 2, 29): datetime(2008, 1, 1), + datetime(2008, 3, 15): datetime(2008, 1, 1), + datetime(2008, 3, 31): datetime(2008, 1, 1), + datetime(2008, 4, 15): datetime(2008, 4, 1), + datetime(2007, 7, 3): datetime(2007, 7, 2), + datetime(2007, 4, 3): datetime(2007, 4, 2), + datetime(2007, 7, 2): datetime(2007, 4, 2), + datetime(2008, 4, 1): datetime(2008, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + BQuarterBegin(startingMonth=1, n=2), + { + datetime(2008, 1, 1): datetime(2008, 7, 1), + datetime(2008, 1, 15): datetime(2008, 7, 1), + datetime(2008, 2, 29): datetime(2008, 7, 1), + datetime(2008, 3, 15): datetime(2008, 7, 1), + datetime(2007, 3, 31): datetime(2007, 7, 2), + datetime(2007, 4, 15): datetime(2007, 10, 1), + datetime(2008, 4, 30): datetime(2008, 10, 1), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + +class TestBQuarterEnd(Base): + _offset = BQuarterEnd + + def test_repr(self): + expected = "" + assert repr(BQuarterEnd()) == expected + expected = "" + assert repr(BQuarterEnd(startingMonth=3)) == expected + expected = "" + assert repr(BQuarterEnd(startingMonth=1)) == expected + + def test_is_anchored(self): + assert BQuarterEnd(startingMonth=1).is_anchored() + assert BQuarterEnd().is_anchored() + assert not BQuarterEnd(2, startingMonth=1).is_anchored() + + def test_offset_corner_case(self): + # corner + offset = BQuarterEnd(n=-1, startingMonth=1) + assert datetime(2010, 1, 31) + offset == datetime(2010, 1, 29) + + offset_cases = [] + offset_cases.append( + ( + BQuarterEnd(startingMonth=1), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 4, 30), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 7, 31), + }, + ) + ) + + offset_cases.append( + ( + BQuarterEnd(startingMonth=2), + { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2008, 2, 15): datetime(2008, 2, 29), + datetime(2008, 2, 29): datetime(2008, 5, 30), + datetime(2008, 3, 15): datetime(2008, 5, 30), + datetime(2008, 3, 31): datetime(2008, 5, 30), + datetime(2008, 4, 15): datetime(2008, 5, 30), + datetime(2008, 4, 30): datetime(2008, 5, 30), + }, + ) + ) + + offset_cases.append( + ( + BQuarterEnd(startingMonth=1, n=0), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 4, 30), + }, + ) + ) + + offset_cases.append( + ( + BQuarterEnd(startingMonth=1, n=-1), + { + datetime(2008, 1, 1): datetime(2007, 10, 31), + datetime(2008, 1, 31): datetime(2007, 10, 31), + datetime(2008, 2, 15): datetime(2008, 1, 31), + datetime(2008, 2, 29): datetime(2008, 1, 31), + datetime(2008, 3, 15): datetime(2008, 1, 31), + datetime(2008, 3, 31): datetime(2008, 1, 31), + datetime(2008, 4, 15): datetime(2008, 1, 31), + datetime(2008, 4, 30): datetime(2008, 1, 31), + }, + ) + ) + + offset_cases.append( + ( + BQuarterEnd(startingMonth=1, n=2), + { + datetime(2008, 1, 31): datetime(2008, 7, 31), + datetime(2008, 2, 15): datetime(2008, 7, 31), + datetime(2008, 2, 29): datetime(2008, 7, 31), + datetime(2008, 3, 15): datetime(2008, 7, 31), + datetime(2008, 3, 31): datetime(2008, 7, 31), + datetime(2008, 4, 15): datetime(2008, 7, 31), + datetime(2008, 4, 30): datetime(2008, 10, 31), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (BQuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), True), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), True), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), True), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_year.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_year.py new file mode 100644 index 0000000..d531a58 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_business_year.py @@ -0,0 +1,220 @@ +""" +Tests for the following offsets: +- BYearBegin +- BYearEnd +""" +from datetime import datetime + +import pytest + +from pandas.tests.tseries.offsets.common import ( + Base, + assert_is_on_offset, + assert_offset_equal, +) + +from pandas.tseries.offsets import ( + BYearBegin, + BYearEnd, +) + + +class TestBYearBegin(Base): + _offset = BYearBegin + + def test_misspecified(self): + msg = "Month must go from 1 to 12" + with pytest.raises(ValueError, match=msg): + BYearBegin(month=13) + with pytest.raises(ValueError, match=msg): + BYearEnd(month=13) + + offset_cases = [] + offset_cases.append( + ( + BYearBegin(), + { + datetime(2008, 1, 1): datetime(2009, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2011, 1, 1): datetime(2011, 1, 3), + datetime(2011, 1, 3): datetime(2012, 1, 2), + datetime(2005, 12, 30): datetime(2006, 1, 2), + datetime(2005, 12, 31): datetime(2006, 1, 2), + }, + ) + ) + + offset_cases.append( + ( + BYearBegin(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2005, 12, 30): datetime(2006, 1, 2), + datetime(2005, 12, 31): datetime(2006, 1, 2), + }, + ) + ) + + offset_cases.append( + ( + BYearBegin(-1), + { + datetime(2007, 1, 1): datetime(2006, 1, 2), + datetime(2009, 1, 4): datetime(2009, 1, 1), + datetime(2009, 1, 1): datetime(2008, 1, 1), + datetime(2008, 6, 30): datetime(2008, 1, 1), + datetime(2008, 12, 31): datetime(2008, 1, 1), + datetime(2006, 12, 29): datetime(2006, 1, 2), + datetime(2006, 12, 30): datetime(2006, 1, 2), + datetime(2006, 1, 1): datetime(2005, 1, 3), + }, + ) + ) + + offset_cases.append( + ( + BYearBegin(-2), + { + datetime(2007, 1, 1): datetime(2005, 1, 3), + datetime(2007, 6, 30): datetime(2006, 1, 2), + datetime(2008, 12, 31): datetime(2007, 1, 1), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + +class TestBYearEnd(Base): + _offset = BYearEnd + + offset_cases = [] + offset_cases.append( + ( + BYearEnd(), + { + datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2009, 12, 31), + datetime(2005, 12, 30): datetime(2006, 12, 29), + datetime(2005, 12, 31): datetime(2006, 12, 29), + }, + ) + ) + + offset_cases.append( + ( + BYearEnd(0), + { + datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2008, 12, 31), + datetime(2005, 12, 31): datetime(2006, 12, 29), + }, + ) + ) + + offset_cases.append( + ( + BYearEnd(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 29), + datetime(2008, 6, 30): datetime(2007, 12, 31), + datetime(2008, 12, 31): datetime(2007, 12, 31), + datetime(2006, 12, 29): datetime(2005, 12, 30), + datetime(2006, 12, 30): datetime(2006, 12, 29), + datetime(2007, 1, 1): datetime(2006, 12, 29), + }, + ) + ) + + offset_cases.append( + ( + BYearEnd(-2), + { + datetime(2007, 1, 1): datetime(2005, 12, 30), + datetime(2008, 6, 30): datetime(2006, 12, 29), + datetime(2008, 12, 31): datetime(2006, 12, 29), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (BYearEnd(), datetime(2007, 12, 31), True), + (BYearEnd(), datetime(2008, 1, 1), False), + (BYearEnd(), datetime(2006, 12, 31), False), + (BYearEnd(), datetime(2006, 12, 29), True), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + +class TestBYearEndLagged(Base): + _offset = BYearEnd + + def test_bad_month_fail(self): + msg = "Month must go from 1 to 12" + with pytest.raises(ValueError, match=msg): + BYearEnd(month=13) + with pytest.raises(ValueError, match=msg): + BYearEnd(month=0) + + offset_cases = [] + offset_cases.append( + ( + BYearEnd(month=6), + { + datetime(2008, 1, 1): datetime(2008, 6, 30), + datetime(2007, 6, 30): datetime(2008, 6, 30), + }, + ) + ) + + offset_cases.append( + ( + BYearEnd(n=-1, month=6), + { + datetime(2008, 1, 1): datetime(2007, 6, 29), + datetime(2007, 6, 30): datetime(2007, 6, 29), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + def test_roll(self): + offset = BYearEnd(month=6) + date = datetime(2009, 11, 30) + + assert offset.rollforward(date) == datetime(2010, 6, 30) + assert offset.rollback(date) == datetime(2009, 6, 30) + + on_offset_cases = [ + (BYearEnd(month=2), datetime(2007, 2, 28), True), + (BYearEnd(month=6), datetime(2007, 6, 30), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_custom_business_day.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_custom_business_day.py new file mode 100644 index 0000000..3bbbaa8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_custom_business_day.py @@ -0,0 +1,91 @@ +""" +Tests for offsets.CustomBusinessDay / CDay +""" +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas._libs.tslibs.offsets import CDay + +from pandas import ( + _testing as tm, + read_pickle, +) +from pandas.tests.tseries.offsets.common import assert_offset_equal +from pandas.tests.tseries.offsets.test_business_day import TestBusinessDay + +from pandas.tseries.holiday import USFederalHolidayCalendar + + +class TestCustomBusinessDay(TestBusinessDay): + _offset = CDay + + def test_repr(self): + assert repr(self.offset) == "" + assert repr(self.offset2) == "<2 * CustomBusinessDays>" + + expected = "" + assert repr(self.offset + timedelta(1)) == expected + + def test_holidays(self): + # Define a TradingDay offset + holidays = ["2012-05-01", datetime(2013, 5, 1), np.datetime64("2014-05-01")] + tday = CDay(holidays=holidays) + for year in range(2012, 2015): + dt = datetime(year, 4, 30) + xp = datetime(year, 5, 2) + rs = dt + tday + assert rs == xp + + def test_weekmask(self): + weekmask_saudi = "Sat Sun Mon Tue Wed" # Thu-Fri Weekend + weekmask_uae = "1111001" # Fri-Sat Weekend + weekmask_egypt = [1, 1, 1, 1, 0, 0, 1] # Fri-Sat Weekend + bday_saudi = CDay(weekmask=weekmask_saudi) + bday_uae = CDay(weekmask=weekmask_uae) + bday_egypt = CDay(weekmask=weekmask_egypt) + dt = datetime(2013, 5, 1) + xp_saudi = datetime(2013, 5, 4) + xp_uae = datetime(2013, 5, 2) + xp_egypt = datetime(2013, 5, 2) + assert xp_saudi == dt + bday_saudi + assert xp_uae == dt + bday_uae + assert xp_egypt == dt + bday_egypt + xp2 = datetime(2013, 5, 5) + assert xp2 == dt + 2 * bday_saudi + assert xp2 == dt + 2 * bday_uae + assert xp2 == dt + 2 * bday_egypt + + def test_weekmask_and_holidays(self): + weekmask_egypt = "Sun Mon Tue Wed Thu" # Fri-Sat Weekend + holidays = ["2012-05-01", datetime(2013, 5, 1), np.datetime64("2014-05-01")] + bday_egypt = CDay(holidays=holidays, weekmask=weekmask_egypt) + dt = datetime(2013, 4, 30) + xp_egypt = datetime(2013, 5, 5) + assert xp_egypt == dt + 2 * bday_egypt + + @pytest.mark.filterwarnings("ignore:Non:pandas.errors.PerformanceWarning") + def test_calendar(self): + calendar = USFederalHolidayCalendar() + dt = datetime(2014, 1, 17) + assert_offset_equal(CDay(calendar=calendar), dt, datetime(2014, 1, 21)) + + def test_roundtrip_pickle(self): + def _check_roundtrip(obj): + unpickled = tm.round_trip_pickle(obj) + assert unpickled == obj + + _check_roundtrip(self.offset) + _check_roundtrip(self.offset2) + _check_roundtrip(self.offset * 2) + + def test_pickle_compat_0_14_1(self, datapath): + hdays = [datetime(2013, 1, 1) for ele in range(4)] + pth = datapath("tseries", "offsets", "data", "cday-0.14.1.pickle") + cday0_14_1 = read_pickle(pth) + cday = CDay(holidays=hdays) + assert cday == cday0_14_1 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_custom_business_hour.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_custom_business_hour.py new file mode 100644 index 0000000..dbc0ff4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_custom_business_hour.py @@ -0,0 +1,328 @@ +""" +Tests for offsets.CustomBusinessHour +""" +from datetime import datetime + +import numpy as np +import pytest + +from pandas._libs.tslibs import Timestamp +from pandas._libs.tslibs.offsets import ( + BusinessHour, + CustomBusinessHour, + Nano, +) + +import pandas._testing as tm +from pandas.tests.tseries.offsets.common import ( + Base, + assert_offset_equal, +) + +from pandas.tseries.holiday import USFederalHolidayCalendar + + +class TestCustomBusinessHour(Base): + _offset = CustomBusinessHour + holidays = ["2014-06-27", datetime(2014, 6, 30), np.datetime64("2014-07-02")] + + def setup_method(self, method): + # 2014 Calendar to check custom holidays + # Sun Mon Tue Wed Thu Fri Sat + # 6/22 23 24 25 26 27 28 + # 29 30 7/1 2 3 4 5 + # 6 7 8 9 10 11 12 + self.d = datetime(2014, 7, 1, 10, 00) + self.offset1 = CustomBusinessHour(weekmask="Tue Wed Thu Fri") + + self.offset2 = CustomBusinessHour(holidays=self.holidays) + + def test_constructor_errors(self): + from datetime import time as dt_time + + msg = "time data must be specified only with hour and minute" + with pytest.raises(ValueError, match=msg): + CustomBusinessHour(start=dt_time(11, 0, 5)) + msg = "time data must match '%H:%M' format" + with pytest.raises(ValueError, match=msg): + CustomBusinessHour(start="AAA") + msg = "time data must match '%H:%M' format" + with pytest.raises(ValueError, match=msg): + CustomBusinessHour(start="14:00:05") + + def test_different_normalize_equals(self): + # GH#21404 changed __eq__ to return False when `normalize` does not match + offset = self._offset() + offset2 = self._offset(normalize=True) + assert offset != offset2 + + def test_repr(self): + assert repr(self.offset1) == "" + assert repr(self.offset2) == "" + + def test_with_offset(self): + expected = Timestamp("2014-07-01 13:00") + + assert self.d + CustomBusinessHour() * 3 == expected + assert self.d + CustomBusinessHour(n=3) == expected + + def test_eq(self): + for offset in [self.offset1, self.offset2]: + assert offset == offset + + assert CustomBusinessHour() != CustomBusinessHour(-1) + assert CustomBusinessHour(start="09:00") == CustomBusinessHour() + assert CustomBusinessHour(start="09:00") != CustomBusinessHour(start="09:01") + assert CustomBusinessHour(start="09:00", end="17:00") != CustomBusinessHour( + start="17:00", end="09:01" + ) + + assert CustomBusinessHour(weekmask="Tue Wed Thu Fri") != CustomBusinessHour( + weekmask="Mon Tue Wed Thu Fri" + ) + assert CustomBusinessHour(holidays=["2014-06-27"]) != CustomBusinessHour( + holidays=["2014-06-28"] + ) + + def test_sub(self): + # override the Base.test_sub implementation because self.offset2 is + # defined differently in this class than the test expects + pass + + def test_hash(self): + assert hash(self.offset1) == hash(self.offset1) + assert hash(self.offset2) == hash(self.offset2) + + def test_call(self): + with tm.assert_produces_warning(FutureWarning): + # GH#34171 DateOffset.__call__ is deprecated + assert self.offset1(self.d) == datetime(2014, 7, 1, 11) + assert self.offset2(self.d) == datetime(2014, 7, 1, 11) + + def testRollback1(self): + assert self.offset1.rollback(self.d) == self.d + assert self.offset2.rollback(self.d) == self.d + + d = datetime(2014, 7, 1, 0) + + # 2014/07/01 is Tuesday, 06/30 is Monday(holiday) + assert self.offset1.rollback(d) == datetime(2014, 6, 27, 17) + + # 2014/6/30 and 2014/6/27 are holidays + assert self.offset2.rollback(d) == datetime(2014, 6, 26, 17) + + def testRollback2(self): + assert self._offset(-3).rollback(datetime(2014, 7, 5, 15, 0)) == datetime( + 2014, 7, 4, 17, 0 + ) + + def testRollforward1(self): + assert self.offset1.rollforward(self.d) == self.d + assert self.offset2.rollforward(self.d) == self.d + + d = datetime(2014, 7, 1, 0) + assert self.offset1.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset2.rollforward(d) == datetime(2014, 7, 1, 9) + + def testRollforward2(self): + assert self._offset(-3).rollforward(datetime(2014, 7, 5, 16, 0)) == datetime( + 2014, 7, 7, 9 + ) + + def test_roll_date_object(self): + offset = BusinessHour() + + dt = datetime(2014, 7, 6, 15, 0) + + result = offset.rollback(dt) + assert result == datetime(2014, 7, 4, 17) + + result = offset.rollforward(dt) + assert result == datetime(2014, 7, 7, 9) + + normalize_cases = [ + ( + CustomBusinessHour(normalize=True, holidays=holidays), + { + datetime(2014, 7, 1, 8): datetime(2014, 7, 1), + datetime(2014, 7, 1, 17): datetime(2014, 7, 3), + datetime(2014, 7, 1, 16): datetime(2014, 7, 3), + datetime(2014, 7, 1, 23): datetime(2014, 7, 3), + datetime(2014, 7, 1, 0): datetime(2014, 7, 1), + datetime(2014, 7, 4, 15): datetime(2014, 7, 4), + datetime(2014, 7, 4, 15, 59): datetime(2014, 7, 4), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7), + datetime(2014, 7, 5, 23): datetime(2014, 7, 7), + datetime(2014, 7, 6, 10): datetime(2014, 7, 7), + }, + ), + ( + CustomBusinessHour(-1, normalize=True, holidays=holidays), + { + datetime(2014, 7, 1, 8): datetime(2014, 6, 26), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1), + datetime(2014, 7, 1, 10): datetime(2014, 6, 26), + datetime(2014, 7, 1, 0): datetime(2014, 6, 26), + datetime(2014, 7, 7, 10): datetime(2014, 7, 4), + datetime(2014, 7, 7, 10, 1): datetime(2014, 7, 7), + datetime(2014, 7, 5, 23): datetime(2014, 7, 4), + datetime(2014, 7, 6, 10): datetime(2014, 7, 4), + }, + ), + ( + CustomBusinessHour( + 1, normalize=True, start="17:00", end="04:00", holidays=holidays + ), + { + datetime(2014, 7, 1, 8): datetime(2014, 7, 1), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1), + datetime(2014, 7, 1, 23): datetime(2014, 7, 2), + datetime(2014, 7, 2, 2): datetime(2014, 7, 2), + datetime(2014, 7, 2, 3): datetime(2014, 7, 3), + datetime(2014, 7, 4, 23): datetime(2014, 7, 5), + datetime(2014, 7, 5, 2): datetime(2014, 7, 5), + datetime(2014, 7, 7, 2): datetime(2014, 7, 7), + datetime(2014, 7, 7, 17): datetime(2014, 7, 7), + }, + ), + ] + + @pytest.mark.parametrize("norm_cases", normalize_cases) + def test_normalize(self, norm_cases): + offset, cases = norm_cases + for dt, expected in cases.items(): + assert offset._apply(dt) == expected + + def test_is_on_offset(self): + tests = [ + ( + CustomBusinessHour(start="10:00", end="15:00", holidays=self.holidays), + { + datetime(2014, 7, 1, 9): False, + datetime(2014, 7, 1, 10): True, + datetime(2014, 7, 1, 15): True, + datetime(2014, 7, 1, 15, 1): False, + datetime(2014, 7, 5, 12): False, + datetime(2014, 7, 6, 12): False, + }, + ) + ] + + for offset, cases in tests: + for dt, expected in cases.items(): + assert offset.is_on_offset(dt) == expected + + apply_cases = [ + ( + CustomBusinessHour(holidays=holidays), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 12), + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 16), + datetime(2014, 7, 1, 19): datetime(2014, 7, 3, 10), + datetime(2014, 7, 1, 16): datetime(2014, 7, 3, 9), + datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 3, 9, 30, 15), + datetime(2014, 7, 1, 17): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 11): datetime(2014, 7, 3, 10), + # out of business hours + datetime(2014, 7, 2, 8): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 10), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 10), + # saturday + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 9, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 9, 30, 30), + }, + ), + ( + CustomBusinessHour(4, holidays=holidays), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 15), + datetime(2014, 7, 1, 13): datetime(2014, 7, 3, 9), + datetime(2014, 7, 1, 15): datetime(2014, 7, 3, 11), + datetime(2014, 7, 1, 16): datetime(2014, 7, 3, 12), + datetime(2014, 7, 1, 17): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 11): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 8): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 13), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 13), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 13), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 13), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 12, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 12, 30, 30), + }, + ), + ] + + @pytest.mark.parametrize("apply_case", apply_cases) + def test_apply(self, apply_case): + offset, cases = apply_case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + nano_cases = [ + ( + CustomBusinessHour(holidays=holidays), + { + Timestamp("2014-07-01 15:00") + + Nano(5): Timestamp("2014-07-01 16:00") + + Nano(5), + Timestamp("2014-07-01 16:00") + + Nano(5): Timestamp("2014-07-03 09:00") + + Nano(5), + Timestamp("2014-07-01 16:00") + - Nano(5): Timestamp("2014-07-01 17:00") + - Nano(5), + }, + ), + ( + CustomBusinessHour(-1, holidays=holidays), + { + Timestamp("2014-07-01 15:00") + + Nano(5): Timestamp("2014-07-01 14:00") + + Nano(5), + Timestamp("2014-07-01 10:00") + + Nano(5): Timestamp("2014-07-01 09:00") + + Nano(5), + Timestamp("2014-07-01 10:00") + - Nano(5): Timestamp("2014-06-26 17:00") + - Nano(5), + }, + ), + ] + + @pytest.mark.parametrize("nano_case", nano_cases) + def test_apply_nanoseconds(self, nano_case): + offset, cases = nano_case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + def test_us_federal_holiday_with_datetime(self): + # GH 16867 + bhour_us = CustomBusinessHour(calendar=USFederalHolidayCalendar()) + t0 = datetime(2014, 1, 17, 15) + result = t0 + bhour_us * 8 + expected = Timestamp("2014-01-21 15:00:00") + assert result == expected + + +@pytest.mark.parametrize( + "weekmask, expected_time, mult", + [ + ["Mon Tue Wed Thu Fri Sat", "2018-11-10 09:00:00", 10], + ["Tue Wed Thu Fri Sat", "2018-11-13 08:00:00", 18], + ], +) +def test_custom_businesshour_weekmask_and_holidays(weekmask, expected_time, mult): + # GH 23542 + holidays = ["2018-11-09"] + bh = CustomBusinessHour( + start="08:00", end="17:00", weekmask=weekmask, holidays=holidays + ) + result = Timestamp("2018-11-08 08:00") + mult * bh + expected = Timestamp(expected_time) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_custom_business_month.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_custom_business_month.py new file mode 100644 index 0000000..fb0f331 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_custom_business_month.py @@ -0,0 +1,444 @@ +""" +Tests for the following offsets: +- CustomBusinessMonthBase +- CustomBusinessMonthBegin +- CustomBusinessMonthEnd +""" +from datetime import ( + date, + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas._libs.tslibs.offsets import ( + CBMonthBegin, + CBMonthEnd, + CDay, +) + +from pandas import ( + _testing as tm, + date_range, +) +from pandas.tests.tseries.offsets.common import ( + Base, + assert_is_on_offset, + assert_offset_equal, +) +from pandas.tests.tseries.offsets.test_offsets import _ApplyCases + +from pandas.tseries import offsets as offsets +from pandas.tseries.holiday import USFederalHolidayCalendar + + +class CustomBusinessMonthBase: + def setup_method(self, method): + self.d = datetime(2008, 1, 1) + self.offset = self._offset() + self.offset1 = self.offset + self.offset2 = self._offset(2) + + def test_eq(self): + assert self.offset2 == self.offset2 + + def test_mul(self): + pass + + def test_hash(self): + assert hash(self.offset2) == hash(self.offset2) + + def test_roundtrip_pickle(self): + def _check_roundtrip(obj): + unpickled = tm.round_trip_pickle(obj) + assert unpickled == obj + + _check_roundtrip(self._offset()) + _check_roundtrip(self._offset(2)) + _check_roundtrip(self._offset() * 2) + + def test_copy(self): + # GH 17452 + off = self._offset(weekmask="Mon Wed Fri") + assert off == off.copy() + + +class TestCustomBusinessMonthBegin(CustomBusinessMonthBase, Base): + _offset = CBMonthBegin + + def test_different_normalize_equals(self): + # GH#21404 changed __eq__ to return False when `normalize` does not match + offset = self._offset() + offset2 = self._offset(normalize=True) + assert offset != offset2 + + def test_repr(self): + assert repr(self.offset) == "" + assert repr(self.offset2) == "<2 * CustomBusinessMonthBegins>" + + def test_call(self): + with tm.assert_produces_warning(FutureWarning): + # GH#34171 DateOffset.__call__ is deprecated + assert self.offset2(self.d) == datetime(2008, 3, 3) + + def testRollback1(self): + assert CDay(10).rollback(datetime(2007, 12, 31)) == datetime(2007, 12, 31) + + def testRollback2(self): + assert CBMonthBegin(10).rollback(self.d) == datetime(2008, 1, 1) + + def testRollforward1(self): + assert CBMonthBegin(10).rollforward(self.d) == datetime(2008, 1, 1) + + def test_roll_date_object(self): + offset = CBMonthBegin() + + dt = date(2012, 9, 15) + + result = offset.rollback(dt) + assert result == datetime(2012, 9, 3) + + result = offset.rollforward(dt) + assert result == datetime(2012, 10, 1) + + offset = offsets.Day() + result = offset.rollback(dt) + assert result == datetime(2012, 9, 15) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 15) + + on_offset_cases = [ + (CBMonthBegin(), datetime(2008, 1, 1), True), + (CBMonthBegin(), datetime(2008, 1, 31), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + apply_cases: _ApplyCases = [ + ( + CBMonthBegin(), + { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 2, 7): datetime(2008, 3, 3), + }, + ), + ( + 2 * CBMonthBegin(), + { + datetime(2008, 1, 1): datetime(2008, 3, 3), + datetime(2008, 2, 7): datetime(2008, 4, 1), + }, + ), + ( + -CBMonthBegin(), + { + datetime(2008, 1, 1): datetime(2007, 12, 3), + datetime(2008, 2, 8): datetime(2008, 2, 1), + }, + ), + ( + -2 * CBMonthBegin(), + { + datetime(2008, 1, 1): datetime(2007, 11, 1), + datetime(2008, 2, 9): datetime(2008, 1, 1), + }, + ), + ( + CBMonthBegin(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 1, 7): datetime(2008, 2, 1), + }, + ), + ] + + @pytest.mark.parametrize("case", apply_cases) + def test_apply(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + def test_apply_large_n(self): + dt = datetime(2012, 10, 23) + + result = dt + CBMonthBegin(10) + assert result == datetime(2013, 8, 1) + + result = dt + CDay(100) - CDay(100) + assert result == dt + + off = CBMonthBegin() * 6 + rs = datetime(2012, 1, 1) - off + xp = datetime(2011, 7, 1) + assert rs == xp + + st = datetime(2011, 12, 18) + rs = st + off + + xp = datetime(2012, 6, 1) + assert rs == xp + + def test_holidays(self): + # Define a TradingDay offset + holidays = ["2012-02-01", datetime(2012, 2, 2), np.datetime64("2012-03-01")] + bm_offset = CBMonthBegin(holidays=holidays) + dt = datetime(2012, 1, 1) + + assert dt + bm_offset == datetime(2012, 1, 2) + assert dt + 2 * bm_offset == datetime(2012, 2, 3) + + @pytest.mark.filterwarnings("ignore:Non:pandas.errors.PerformanceWarning") + def test_datetimeindex(self): + hcal = USFederalHolidayCalendar() + cbmb = CBMonthBegin(calendar=hcal) + assert date_range(start="20120101", end="20130101", freq=cbmb).tolist()[ + 0 + ] == datetime(2012, 1, 3) + + @pytest.mark.parametrize( + "case", + [ + ( + CBMonthBegin(n=1, offset=timedelta(days=5)), + { + datetime(2021, 3, 1): datetime(2021, 4, 1) + timedelta(days=5), + datetime(2021, 4, 17): datetime(2021, 5, 3) + timedelta(days=5), + }, + ), + ( + CBMonthBegin(n=2, offset=timedelta(days=40)), + { + datetime(2021, 3, 10): datetime(2021, 5, 3) + timedelta(days=40), + datetime(2021, 4, 30): datetime(2021, 6, 1) + timedelta(days=40), + }, + ), + ( + CBMonthBegin(n=1, offset=timedelta(days=-5)), + { + datetime(2021, 3, 1): datetime(2021, 4, 1) - timedelta(days=5), + datetime(2021, 4, 11): datetime(2021, 5, 3) - timedelta(days=5), + }, + ), + ( + -2 * CBMonthBegin(n=1, offset=timedelta(days=10)), + { + datetime(2021, 3, 1): datetime(2021, 1, 1) + timedelta(days=10), + datetime(2021, 4, 3): datetime(2021, 3, 1) + timedelta(days=10), + }, + ), + ( + CBMonthBegin(n=0, offset=timedelta(days=1)), + { + datetime(2021, 3, 2): datetime(2021, 4, 1) + timedelta(days=1), + datetime(2021, 4, 1): datetime(2021, 4, 1) + timedelta(days=1), + }, + ), + ( + CBMonthBegin( + n=1, holidays=["2021-04-01", "2021-04-02"], offset=timedelta(days=1) + ), + { + datetime(2021, 3, 2): datetime(2021, 4, 5) + timedelta(days=1), + }, + ), + ], + ) + def test_apply_with_extra_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + +class TestCustomBusinessMonthEnd(CustomBusinessMonthBase, Base): + _offset = CBMonthEnd + + def test_different_normalize_equals(self): + # GH#21404 changed __eq__ to return False when `normalize` does not match + offset = self._offset() + offset2 = self._offset(normalize=True) + assert offset != offset2 + + def test_repr(self): + assert repr(self.offset) == "" + assert repr(self.offset2) == "<2 * CustomBusinessMonthEnds>" + + def test_call(self): + with tm.assert_produces_warning(FutureWarning): + # GH#34171 DateOffset.__call__ is deprecated + assert self.offset2(self.d) == datetime(2008, 2, 29) + + def testRollback1(self): + assert CDay(10).rollback(datetime(2007, 12, 31)) == datetime(2007, 12, 31) + + def testRollback2(self): + assert CBMonthEnd(10).rollback(self.d) == datetime(2007, 12, 31) + + def testRollforward1(self): + assert CBMonthEnd(10).rollforward(self.d) == datetime(2008, 1, 31) + + def test_roll_date_object(self): + offset = CBMonthEnd() + + dt = date(2012, 9, 15) + + result = offset.rollback(dt) + assert result == datetime(2012, 8, 31) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 28) + + offset = offsets.Day() + result = offset.rollback(dt) + assert result == datetime(2012, 9, 15) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 15) + + on_offset_cases = [ + (CBMonthEnd(), datetime(2008, 1, 31), True), + (CBMonthEnd(), datetime(2008, 1, 1), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, d, expected = case + assert_is_on_offset(offset, d, expected) + + apply_cases: _ApplyCases = [ + ( + CBMonthEnd(), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 2, 7): datetime(2008, 2, 29), + }, + ), + ( + 2 * CBMonthEnd(), + { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 2, 7): datetime(2008, 3, 31), + }, + ), + ( + -CBMonthEnd(), + { + datetime(2008, 1, 1): datetime(2007, 12, 31), + datetime(2008, 2, 8): datetime(2008, 1, 31), + }, + ), + ( + -2 * CBMonthEnd(), + { + datetime(2008, 1, 1): datetime(2007, 11, 30), + datetime(2008, 2, 9): datetime(2007, 12, 31), + }, + ), + ( + CBMonthEnd(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 2, 7): datetime(2008, 2, 29), + }, + ), + ] + + @pytest.mark.parametrize("case", apply_cases) + def test_apply(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + def test_apply_large_n(self): + dt = datetime(2012, 10, 23) + + result = dt + CBMonthEnd(10) + assert result == datetime(2013, 7, 31) + + result = dt + CDay(100) - CDay(100) + assert result == dt + + off = CBMonthEnd() * 6 + rs = datetime(2012, 1, 1) - off + xp = datetime(2011, 7, 29) + assert rs == xp + + st = datetime(2011, 12, 18) + rs = st + off + xp = datetime(2012, 5, 31) + assert rs == xp + + def test_holidays(self): + # Define a TradingDay offset + holidays = ["2012-01-31", datetime(2012, 2, 28), np.datetime64("2012-02-29")] + bm_offset = CBMonthEnd(holidays=holidays) + dt = datetime(2012, 1, 1) + assert dt + bm_offset == datetime(2012, 1, 30) + assert dt + 2 * bm_offset == datetime(2012, 2, 27) + + @pytest.mark.filterwarnings("ignore:Non:pandas.errors.PerformanceWarning") + def test_datetimeindex(self): + from pandas.tseries.holiday import USFederalHolidayCalendar + + hcal = USFederalHolidayCalendar() + freq = CBMonthEnd(calendar=hcal) + + assert date_range(start="20120101", end="20130101", freq=freq).tolist()[ + 0 + ] == datetime(2012, 1, 31) + + @pytest.mark.parametrize( + "case", + [ + ( + CBMonthEnd(n=1, offset=timedelta(days=5)), + { + datetime(2021, 3, 1): datetime(2021, 3, 31) + timedelta(days=5), + datetime(2021, 4, 17): datetime(2021, 4, 30) + timedelta(days=5), + }, + ), + ( + CBMonthEnd(n=2, offset=timedelta(days=40)), + { + datetime(2021, 3, 10): datetime(2021, 4, 30) + timedelta(days=40), + datetime(2021, 4, 30): datetime(2021, 6, 30) + timedelta(days=40), + }, + ), + ( + CBMonthEnd(n=1, offset=timedelta(days=-5)), + { + datetime(2021, 3, 1): datetime(2021, 3, 31) - timedelta(days=5), + datetime(2021, 4, 11): datetime(2021, 4, 30) - timedelta(days=5), + }, + ), + ( + -2 * CBMonthEnd(n=1, offset=timedelta(days=10)), + { + datetime(2021, 3, 1): datetime(2021, 1, 29) + timedelta(days=10), + datetime(2021, 4, 3): datetime(2021, 2, 26) + timedelta(days=10), + }, + ), + ( + CBMonthEnd(n=0, offset=timedelta(days=1)), + { + datetime(2021, 3, 2): datetime(2021, 3, 31) + timedelta(days=1), + datetime(2021, 4, 1): datetime(2021, 4, 30) + timedelta(days=1), + }, + ), + ( + CBMonthEnd(n=1, holidays=["2021-03-31"], offset=timedelta(days=1)), + { + datetime(2021, 3, 2): datetime(2021, 3, 30) + timedelta(days=1), + }, + ), + ], + ) + def test_apply_with_extra_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_dst.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_dst.py new file mode 100644 index 0000000..50c5a91 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_dst.py @@ -0,0 +1,227 @@ +""" +Tests for DateOffset additions over Daylight Savings Time +""" +from datetime import timedelta + +import pytest +import pytz + +from pandas._libs.tslibs import Timestamp +from pandas._libs.tslibs.offsets import ( + BMonthBegin, + BMonthEnd, + BQuarterBegin, + BQuarterEnd, + BYearBegin, + BYearEnd, + CBMonthBegin, + CBMonthEnd, + CustomBusinessDay, + DateOffset, + Day, + MonthBegin, + MonthEnd, + QuarterBegin, + QuarterEnd, + SemiMonthBegin, + SemiMonthEnd, + Week, + YearBegin, + YearEnd, +) + +from pandas.tests.tseries.offsets.test_offsets import get_utc_offset_hours + + +class TestDST: + + # one microsecond before the DST transition + ts_pre_fallback = "2013-11-03 01:59:59.999999" + ts_pre_springfwd = "2013-03-10 01:59:59.999999" + + # test both basic names and dateutil timezones + timezone_utc_offsets = { + "US/Eastern": {"utc_offset_daylight": -4, "utc_offset_standard": -5}, + "dateutil/US/Pacific": {"utc_offset_daylight": -7, "utc_offset_standard": -8}, + } + valid_date_offsets_singular = [ + "weekday", + "day", + "hour", + "minute", + "second", + "microsecond", + ] + valid_date_offsets_plural = [ + "weeks", + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + ] + + def _test_all_offsets(self, n, **kwds): + valid_offsets = ( + self.valid_date_offsets_plural + if n > 1 + else self.valid_date_offsets_singular + ) + + for name in valid_offsets: + self._test_offset(offset_name=name, offset_n=n, **kwds) + + def _test_offset(self, offset_name, offset_n, tstart, expected_utc_offset): + offset = DateOffset(**{offset_name: offset_n}) + + t = tstart + offset + if expected_utc_offset is not None: + assert get_utc_offset_hours(t) == expected_utc_offset + + if offset_name == "weeks": + # dates should match + assert t.date() == timedelta(days=7 * offset.kwds["weeks"]) + tstart.date() + # expect the same day of week, hour of day, minute, second, ... + assert ( + t.dayofweek == tstart.dayofweek + and t.hour == tstart.hour + and t.minute == tstart.minute + and t.second == tstart.second + ) + elif offset_name == "days": + # dates should match + assert timedelta(offset.kwds["days"]) + tstart.date() == t.date() + # expect the same hour of day, minute, second, ... + assert ( + t.hour == tstart.hour + and t.minute == tstart.minute + and t.second == tstart.second + ) + elif offset_name in self.valid_date_offsets_singular: + # expect the singular offset value to match between tstart and t + datepart_offset = getattr( + t, offset_name if offset_name != "weekday" else "dayofweek" + ) + assert datepart_offset == offset.kwds[offset_name] + else: + # the offset should be the same as if it was done in UTC + assert t == (tstart.tz_convert("UTC") + offset).tz_convert("US/Pacific") + + def _make_timestamp(self, string, hrs_offset, tz): + if hrs_offset >= 0: + offset_string = f"{hrs_offset:02d}00" + else: + offset_string = f"-{(hrs_offset * -1):02}00" + return Timestamp(string + offset_string).tz_convert(tz) + + def test_springforward_plural(self): + # test moving from standard to daylight savings + for tz, utc_offsets in self.timezone_utc_offsets.items(): + hrs_pre = utc_offsets["utc_offset_standard"] + hrs_post = utc_offsets["utc_offset_daylight"] + self._test_all_offsets( + n=3, + tstart=self._make_timestamp(self.ts_pre_springfwd, hrs_pre, tz), + expected_utc_offset=hrs_post, + ) + + def test_fallback_singular(self): + # in the case of singular offsets, we don't necessarily know which utc + # offset the new Timestamp will wind up in (the tz for 1 month may be + # different from 1 second) so we don't specify an expected_utc_offset + for tz, utc_offsets in self.timezone_utc_offsets.items(): + hrs_pre = utc_offsets["utc_offset_standard"] + self._test_all_offsets( + n=1, + tstart=self._make_timestamp(self.ts_pre_fallback, hrs_pre, tz), + expected_utc_offset=None, + ) + + def test_springforward_singular(self): + for tz, utc_offsets in self.timezone_utc_offsets.items(): + hrs_pre = utc_offsets["utc_offset_standard"] + self._test_all_offsets( + n=1, + tstart=self._make_timestamp(self.ts_pre_springfwd, hrs_pre, tz), + expected_utc_offset=None, + ) + + offset_classes = { + MonthBegin: ["11/2/2012", "12/1/2012"], + MonthEnd: ["11/2/2012", "11/30/2012"], + BMonthBegin: ["11/2/2012", "12/3/2012"], + BMonthEnd: ["11/2/2012", "11/30/2012"], + CBMonthBegin: ["11/2/2012", "12/3/2012"], + CBMonthEnd: ["11/2/2012", "11/30/2012"], + SemiMonthBegin: ["11/2/2012", "11/15/2012"], + SemiMonthEnd: ["11/2/2012", "11/15/2012"], + Week: ["11/2/2012", "11/9/2012"], + YearBegin: ["11/2/2012", "1/1/2013"], + YearEnd: ["11/2/2012", "12/31/2012"], + BYearBegin: ["11/2/2012", "1/1/2013"], + BYearEnd: ["11/2/2012", "12/31/2012"], + QuarterBegin: ["11/2/2012", "12/1/2012"], + QuarterEnd: ["11/2/2012", "12/31/2012"], + BQuarterBegin: ["11/2/2012", "12/3/2012"], + BQuarterEnd: ["11/2/2012", "12/31/2012"], + Day: ["11/4/2012", "11/4/2012 23:00"], + }.items() + + @pytest.mark.parametrize("tup", offset_classes) + def test_all_offset_classes(self, tup): + offset, test_values = tup + + first = Timestamp(test_values[0], tz="US/Eastern") + offset() + second = Timestamp(test_values[1], tz="US/Eastern") + assert first == second + + +@pytest.mark.parametrize( + "original_dt, target_dt, offset, tz", + [ + pytest.param( + Timestamp("1900-01-01"), + Timestamp("1905-07-01"), + MonthBegin(66), + "Africa/Kinshasa", + marks=pytest.mark.xfail( + # error: Module has no attribute "__version__" + float(pytz.__version__) <= 2020.1, # type: ignore[attr-defined] + reason="GH#41906", + ), + ), + ( + Timestamp("2021-10-01 01:15"), + Timestamp("2021-10-31 01:15"), + MonthEnd(1), + "Europe/London", + ), + ( + Timestamp("2010-12-05 02:59"), + Timestamp("2010-10-31 02:59"), + SemiMonthEnd(-3), + "Europe/Paris", + ), + ( + Timestamp("2021-10-31 01:20"), + Timestamp("2021-11-07 01:20"), + CustomBusinessDay(2, weekmask="Sun Mon"), + "US/Eastern", + ), + ( + Timestamp("2020-04-03 01:30"), + Timestamp("2020-11-01 01:30"), + YearBegin(1, month=11), + "America/Chicago", + ), + ], +) +def test_nontick_offset_with_ambiguous_time_error(original_dt, target_dt, offset, tz): + # .apply for non-Tick offsets throws AmbiguousTimeError when the target dt + # is dst-ambiguous + localized_dt = original_dt.tz_localize(tz) + + msg = f"Cannot infer dst time from {target_dt}, try using the 'ambiguous' argument" + with pytest.raises(pytz.AmbiguousTimeError, match=msg): + localized_dt + offset diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_easter.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_easter.py new file mode 100644 index 0000000..90ee7c7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_easter.py @@ -0,0 +1,36 @@ +""" +Tests for the following offsets: +- Easter +""" +from __future__ import annotations + +from datetime import datetime + +import pytest + +from pandas.tests.tseries.offsets.common import ( + Base, + assert_offset_equal, +) + +from pandas.tseries.offsets import Easter + + +class TestEaster(Base): + @pytest.mark.parametrize( + "offset,date,expected", + [ + (Easter(), datetime(2010, 1, 1), datetime(2010, 4, 4)), + (Easter(), datetime(2010, 4, 5), datetime(2011, 4, 24)), + (Easter(2), datetime(2010, 1, 1), datetime(2011, 4, 24)), + (Easter(), datetime(2010, 4, 4), datetime(2011, 4, 24)), + (Easter(2), datetime(2010, 4, 4), datetime(2012, 4, 8)), + (-Easter(), datetime(2011, 1, 1), datetime(2010, 4, 4)), + (-Easter(), datetime(2010, 4, 5), datetime(2010, 4, 4)), + (-Easter(2), datetime(2011, 1, 1), datetime(2009, 4, 12)), + (-Easter(), datetime(2010, 4, 4), datetime(2009, 4, 12)), + (-Easter(2), datetime(2010, 4, 4), datetime(2008, 3, 23)), + ], + ) + def test_offset(self, offset, date, expected): + assert_offset_equal(offset, date, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_fiscal.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_fiscal.py new file mode 100644 index 0000000..8df9310 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_fiscal.py @@ -0,0 +1,698 @@ +""" +Tests for Fiscal Year and Fiscal Quarter offset classes +""" +from datetime import datetime + +from dateutil.relativedelta import relativedelta +import pytest + +from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG + +from pandas import Timestamp +import pandas._testing as tm +from pandas.tests.tseries.offsets.common import ( + Base, + WeekDay, + assert_is_on_offset, + assert_offset_equal, +) + +from pandas.tseries.frequencies import get_offset +from pandas.tseries.offsets import ( + FY5253, + FY5253Quarter, +) + + +def makeFY5253LastOfMonthQuarter(*args, **kwds): + return FY5253Quarter(*args, variation="last", **kwds) + + +def makeFY5253NearestEndMonthQuarter(*args, **kwds): + return FY5253Quarter(*args, variation="nearest", **kwds) + + +def makeFY5253NearestEndMonth(*args, **kwds): + return FY5253(*args, variation="nearest", **kwds) + + +def makeFY5253LastOfMonth(*args, **kwds): + return FY5253(*args, variation="last", **kwds) + + +def test_get_offset_name(): + assert ( + makeFY5253LastOfMonthQuarter( + weekday=1, startingMonth=3, qtr_with_extra_week=4 + ).freqstr + == "REQ-L-MAR-TUE-4" + ) + assert ( + makeFY5253NearestEndMonthQuarter( + weekday=1, startingMonth=3, qtr_with_extra_week=3 + ).freqstr + == "REQ-N-MAR-TUE-3" + ) + + +def test_get_offset(): + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + with tm.assert_produces_warning(FutureWarning): + get_offset("gibberish") + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + with tm.assert_produces_warning(FutureWarning): + get_offset("QS-JAN-B") + + pairs = [ + ("RE-N-DEC-MON", makeFY5253NearestEndMonth(weekday=0, startingMonth=12)), + ("RE-L-DEC-TUE", makeFY5253LastOfMonth(weekday=1, startingMonth=12)), + ( + "REQ-L-MAR-TUE-4", + makeFY5253LastOfMonthQuarter( + weekday=1, startingMonth=3, qtr_with_extra_week=4 + ), + ), + ( + "REQ-L-DEC-MON-3", + makeFY5253LastOfMonthQuarter( + weekday=0, startingMonth=12, qtr_with_extra_week=3 + ), + ), + ( + "REQ-N-DEC-MON-3", + makeFY5253NearestEndMonthQuarter( + weekday=0, startingMonth=12, qtr_with_extra_week=3 + ), + ), + ] + + for name, expected in pairs: + with tm.assert_produces_warning(FutureWarning): + offset = get_offset(name) + assert offset == expected, ( + f"Expected {repr(name)} to yield {repr(expected)} " + f"(actual: {repr(offset)})" + ) + + +class TestFY5253LastOfMonth(Base): + offset_lom_sat_aug = makeFY5253LastOfMonth(1, startingMonth=8, weekday=WeekDay.SAT) + offset_lom_sat_sep = makeFY5253LastOfMonth(1, startingMonth=9, weekday=WeekDay.SAT) + + on_offset_cases = [ + # From Wikipedia (see: + # https://en.wikipedia.org/wiki/4%E2%80%934%E2%80%935_calendar#Last_Saturday_of_the_month_at_fiscal_year_end) + (offset_lom_sat_aug, datetime(2006, 8, 26), True), + (offset_lom_sat_aug, datetime(2007, 8, 25), True), + (offset_lom_sat_aug, datetime(2008, 8, 30), True), + (offset_lom_sat_aug, datetime(2009, 8, 29), True), + (offset_lom_sat_aug, datetime(2010, 8, 28), True), + (offset_lom_sat_aug, datetime(2011, 8, 27), True), + (offset_lom_sat_aug, datetime(2012, 8, 25), True), + (offset_lom_sat_aug, datetime(2013, 8, 31), True), + (offset_lom_sat_aug, datetime(2014, 8, 30), True), + (offset_lom_sat_aug, datetime(2015, 8, 29), True), + (offset_lom_sat_aug, datetime(2016, 8, 27), True), + (offset_lom_sat_aug, datetime(2017, 8, 26), True), + (offset_lom_sat_aug, datetime(2018, 8, 25), True), + (offset_lom_sat_aug, datetime(2019, 8, 31), True), + (offset_lom_sat_aug, datetime(2006, 8, 27), False), + (offset_lom_sat_aug, datetime(2007, 8, 28), False), + (offset_lom_sat_aug, datetime(2008, 8, 31), False), + (offset_lom_sat_aug, datetime(2009, 8, 30), False), + (offset_lom_sat_aug, datetime(2010, 8, 29), False), + (offset_lom_sat_aug, datetime(2011, 8, 28), False), + (offset_lom_sat_aug, datetime(2006, 8, 25), False), + (offset_lom_sat_aug, datetime(2007, 8, 24), False), + (offset_lom_sat_aug, datetime(2008, 8, 29), False), + (offset_lom_sat_aug, datetime(2009, 8, 28), False), + (offset_lom_sat_aug, datetime(2010, 8, 27), False), + (offset_lom_sat_aug, datetime(2011, 8, 26), False), + (offset_lom_sat_aug, datetime(2019, 8, 30), False), + # From GMCR (see for example: + # http://yahoo.brand.edgar-online.com/Default.aspx? + # companyid=3184&formtypeID=7) + (offset_lom_sat_sep, datetime(2010, 9, 25), True), + (offset_lom_sat_sep, datetime(2011, 9, 24), True), + (offset_lom_sat_sep, datetime(2012, 9, 29), True), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + def test_apply(self): + offset_lom_aug_sat = makeFY5253LastOfMonth(startingMonth=8, weekday=WeekDay.SAT) + offset_lom_aug_sat_1 = makeFY5253LastOfMonth( + n=1, startingMonth=8, weekday=WeekDay.SAT + ) + + date_seq_lom_aug_sat = [ + datetime(2006, 8, 26), + datetime(2007, 8, 25), + datetime(2008, 8, 30), + datetime(2009, 8, 29), + datetime(2010, 8, 28), + datetime(2011, 8, 27), + datetime(2012, 8, 25), + datetime(2013, 8, 31), + datetime(2014, 8, 30), + datetime(2015, 8, 29), + datetime(2016, 8, 27), + ] + + tests = [ + (offset_lom_aug_sat, date_seq_lom_aug_sat), + (offset_lom_aug_sat_1, date_seq_lom_aug_sat), + (offset_lom_aug_sat, [datetime(2006, 8, 25)] + date_seq_lom_aug_sat), + (offset_lom_aug_sat_1, [datetime(2006, 8, 27)] + date_seq_lom_aug_sat[1:]), + ( + makeFY5253LastOfMonth(n=-1, startingMonth=8, weekday=WeekDay.SAT), + list(reversed(date_seq_lom_aug_sat)), + ), + ] + for test in tests: + offset, data = test + current = data[0] + for datum in data[1:]: + current = current + offset + assert current == datum + + +class TestFY5253NearestEndMonth(Base): + def test_get_year_end(self): + assert makeFY5253NearestEndMonth( + startingMonth=8, weekday=WeekDay.SAT + ).get_year_end(datetime(2013, 1, 1)) == datetime(2013, 8, 31) + assert makeFY5253NearestEndMonth( + startingMonth=8, weekday=WeekDay.SUN + ).get_year_end(datetime(2013, 1, 1)) == datetime(2013, 9, 1) + assert makeFY5253NearestEndMonth( + startingMonth=8, weekday=WeekDay.FRI + ).get_year_end(datetime(2013, 1, 1)) == datetime(2013, 8, 30) + + offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, variation="nearest") + assert offset_n.get_year_end(datetime(2012, 1, 1)) == datetime(2013, 1, 1) + assert offset_n.get_year_end(datetime(2012, 1, 10)) == datetime(2013, 1, 1) + + assert offset_n.get_year_end(datetime(2013, 1, 1)) == datetime(2013, 12, 31) + assert offset_n.get_year_end(datetime(2013, 1, 2)) == datetime(2013, 12, 31) + assert offset_n.get_year_end(datetime(2013, 1, 3)) == datetime(2013, 12, 31) + assert offset_n.get_year_end(datetime(2013, 1, 10)) == datetime(2013, 12, 31) + + JNJ = FY5253(n=1, startingMonth=12, weekday=6, variation="nearest") + assert JNJ.get_year_end(datetime(2006, 1, 1)) == datetime(2006, 12, 31) + + offset_lom_aug_sat = makeFY5253NearestEndMonth( + 1, startingMonth=8, weekday=WeekDay.SAT + ) + offset_lom_aug_thu = makeFY5253NearestEndMonth( + 1, startingMonth=8, weekday=WeekDay.THU + ) + offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, variation="nearest") + + on_offset_cases = [ + # From Wikipedia (see: + # https://en.wikipedia.org/wiki/4%E2%80%934%E2%80%935_calendar + # #Saturday_nearest_the_end_of_month) + # 2006-09-02 2006 September 2 + # 2007-09-01 2007 September 1 + # 2008-08-30 2008 August 30 (leap year) + # 2009-08-29 2009 August 29 + # 2010-08-28 2010 August 28 + # 2011-09-03 2011 September 3 + # 2012-09-01 2012 September 1 (leap year) + # 2013-08-31 2013 August 31 + # 2014-08-30 2014 August 30 + # 2015-08-29 2015 August 29 + # 2016-09-03 2016 September 3 (leap year) + # 2017-09-02 2017 September 2 + # 2018-09-01 2018 September 1 + # 2019-08-31 2019 August 31 + (offset_lom_aug_sat, datetime(2006, 9, 2), True), + (offset_lom_aug_sat, datetime(2007, 9, 1), True), + (offset_lom_aug_sat, datetime(2008, 8, 30), True), + (offset_lom_aug_sat, datetime(2009, 8, 29), True), + (offset_lom_aug_sat, datetime(2010, 8, 28), True), + (offset_lom_aug_sat, datetime(2011, 9, 3), True), + (offset_lom_aug_sat, datetime(2016, 9, 3), True), + (offset_lom_aug_sat, datetime(2017, 9, 2), True), + (offset_lom_aug_sat, datetime(2018, 9, 1), True), + (offset_lom_aug_sat, datetime(2019, 8, 31), True), + (offset_lom_aug_sat, datetime(2006, 8, 27), False), + (offset_lom_aug_sat, datetime(2007, 8, 28), False), + (offset_lom_aug_sat, datetime(2008, 8, 31), False), + (offset_lom_aug_sat, datetime(2009, 8, 30), False), + (offset_lom_aug_sat, datetime(2010, 8, 29), False), + (offset_lom_aug_sat, datetime(2011, 8, 28), False), + (offset_lom_aug_sat, datetime(2006, 8, 25), False), + (offset_lom_aug_sat, datetime(2007, 8, 24), False), + (offset_lom_aug_sat, datetime(2008, 8, 29), False), + (offset_lom_aug_sat, datetime(2009, 8, 28), False), + (offset_lom_aug_sat, datetime(2010, 8, 27), False), + (offset_lom_aug_sat, datetime(2011, 8, 26), False), + (offset_lom_aug_sat, datetime(2019, 8, 30), False), + # From Micron, see: + # http://google.brand.edgar-online.com/?sym=MU&formtypeID=7 + (offset_lom_aug_thu, datetime(2012, 8, 30), True), + (offset_lom_aug_thu, datetime(2011, 9, 1), True), + (offset_n, datetime(2012, 12, 31), False), + (offset_n, datetime(2013, 1, 1), True), + (offset_n, datetime(2013, 1, 2), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + def test_apply(self): + date_seq_nem_8_sat = [ + datetime(2006, 9, 2), + datetime(2007, 9, 1), + datetime(2008, 8, 30), + datetime(2009, 8, 29), + datetime(2010, 8, 28), + datetime(2011, 9, 3), + ] + + JNJ = [ + datetime(2005, 1, 2), + datetime(2006, 1, 1), + datetime(2006, 12, 31), + datetime(2007, 12, 30), + datetime(2008, 12, 28), + datetime(2010, 1, 3), + datetime(2011, 1, 2), + datetime(2012, 1, 1), + datetime(2012, 12, 30), + ] + + DEC_SAT = FY5253(n=-1, startingMonth=12, weekday=5, variation="nearest") + + tests = [ + ( + makeFY5253NearestEndMonth(startingMonth=8, weekday=WeekDay.SAT), + date_seq_nem_8_sat, + ), + ( + makeFY5253NearestEndMonth(n=1, startingMonth=8, weekday=WeekDay.SAT), + date_seq_nem_8_sat, + ), + ( + makeFY5253NearestEndMonth(startingMonth=8, weekday=WeekDay.SAT), + [datetime(2006, 9, 1)] + date_seq_nem_8_sat, + ), + ( + makeFY5253NearestEndMonth(n=1, startingMonth=8, weekday=WeekDay.SAT), + [datetime(2006, 9, 3)] + date_seq_nem_8_sat[1:], + ), + ( + makeFY5253NearestEndMonth(n=-1, startingMonth=8, weekday=WeekDay.SAT), + list(reversed(date_seq_nem_8_sat)), + ), + ( + makeFY5253NearestEndMonth(n=1, startingMonth=12, weekday=WeekDay.SUN), + JNJ, + ), + ( + makeFY5253NearestEndMonth(n=-1, startingMonth=12, weekday=WeekDay.SUN), + list(reversed(JNJ)), + ), + ( + makeFY5253NearestEndMonth(n=1, startingMonth=12, weekday=WeekDay.SUN), + [datetime(2005, 1, 2), datetime(2006, 1, 1)], + ), + ( + makeFY5253NearestEndMonth(n=1, startingMonth=12, weekday=WeekDay.SUN), + [datetime(2006, 1, 2), datetime(2006, 12, 31)], + ), + (DEC_SAT, [datetime(2013, 1, 15), datetime(2012, 12, 29)]), + ] + for test in tests: + offset, data = test + current = data[0] + for datum in data[1:]: + current = current + offset + assert current == datum + + +class TestFY5253LastOfMonthQuarter(Base): + def test_is_anchored(self): + assert makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ).is_anchored() + assert makeFY5253LastOfMonthQuarter( + weekday=WeekDay.SAT, startingMonth=3, qtr_with_extra_week=4 + ).is_anchored() + assert not makeFY5253LastOfMonthQuarter( + 2, startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ).is_anchored() + + def test_equality(self): + assert makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) == makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + assert makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) != makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SUN, qtr_with_extra_week=4 + ) + assert makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) != makeFY5253LastOfMonthQuarter( + startingMonth=2, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + + def test_offset(self): + offset = makeFY5253LastOfMonthQuarter( + 1, startingMonth=9, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + offset2 = makeFY5253LastOfMonthQuarter( + 2, startingMonth=9, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + offset4 = makeFY5253LastOfMonthQuarter( + 4, startingMonth=9, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + + offset_neg1 = makeFY5253LastOfMonthQuarter( + -1, startingMonth=9, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + offset_neg2 = makeFY5253LastOfMonthQuarter( + -2, startingMonth=9, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + + GMCR = [ + datetime(2010, 3, 27), + datetime(2010, 6, 26), + datetime(2010, 9, 25), + datetime(2010, 12, 25), + datetime(2011, 3, 26), + datetime(2011, 6, 25), + datetime(2011, 9, 24), + datetime(2011, 12, 24), + datetime(2012, 3, 24), + datetime(2012, 6, 23), + datetime(2012, 9, 29), + datetime(2012, 12, 29), + datetime(2013, 3, 30), + datetime(2013, 6, 29), + ] + + assert_offset_equal(offset, base=GMCR[0], expected=GMCR[1]) + assert_offset_equal( + offset, base=GMCR[0] + relativedelta(days=-1), expected=GMCR[0] + ) + assert_offset_equal(offset, base=GMCR[1], expected=GMCR[2]) + + assert_offset_equal(offset2, base=GMCR[0], expected=GMCR[2]) + assert_offset_equal(offset4, base=GMCR[0], expected=GMCR[4]) + + assert_offset_equal(offset_neg1, base=GMCR[-1], expected=GMCR[-2]) + assert_offset_equal( + offset_neg1, base=GMCR[-1] + relativedelta(days=+1), expected=GMCR[-1] + ) + assert_offset_equal(offset_neg2, base=GMCR[-1], expected=GMCR[-3]) + + date = GMCR[0] + relativedelta(days=-1) + for expected in GMCR: + assert_offset_equal(offset, date, expected) + date = date + offset + + date = GMCR[-1] + relativedelta(days=+1) + for expected in reversed(GMCR): + assert_offset_equal(offset_neg1, date, expected) + date = date + offset_neg1 + + lomq_aug_sat_4 = makeFY5253LastOfMonthQuarter( + 1, startingMonth=8, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + lomq_sep_sat_4 = makeFY5253LastOfMonthQuarter( + 1, startingMonth=9, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + + on_offset_cases = [ + # From Wikipedia + (lomq_aug_sat_4, datetime(2006, 8, 26), True), + (lomq_aug_sat_4, datetime(2007, 8, 25), True), + (lomq_aug_sat_4, datetime(2008, 8, 30), True), + (lomq_aug_sat_4, datetime(2009, 8, 29), True), + (lomq_aug_sat_4, datetime(2010, 8, 28), True), + (lomq_aug_sat_4, datetime(2011, 8, 27), True), + (lomq_aug_sat_4, datetime(2019, 8, 31), True), + (lomq_aug_sat_4, datetime(2006, 8, 27), False), + (lomq_aug_sat_4, datetime(2007, 8, 28), False), + (lomq_aug_sat_4, datetime(2008, 8, 31), False), + (lomq_aug_sat_4, datetime(2009, 8, 30), False), + (lomq_aug_sat_4, datetime(2010, 8, 29), False), + (lomq_aug_sat_4, datetime(2011, 8, 28), False), + (lomq_aug_sat_4, datetime(2006, 8, 25), False), + (lomq_aug_sat_4, datetime(2007, 8, 24), False), + (lomq_aug_sat_4, datetime(2008, 8, 29), False), + (lomq_aug_sat_4, datetime(2009, 8, 28), False), + (lomq_aug_sat_4, datetime(2010, 8, 27), False), + (lomq_aug_sat_4, datetime(2011, 8, 26), False), + (lomq_aug_sat_4, datetime(2019, 8, 30), False), + # From GMCR + (lomq_sep_sat_4, datetime(2010, 9, 25), True), + (lomq_sep_sat_4, datetime(2011, 9, 24), True), + (lomq_sep_sat_4, datetime(2012, 9, 29), True), + (lomq_sep_sat_4, datetime(2013, 6, 29), True), + (lomq_sep_sat_4, datetime(2012, 6, 23), True), + (lomq_sep_sat_4, datetime(2012, 6, 30), False), + (lomq_sep_sat_4, datetime(2013, 3, 30), True), + (lomq_sep_sat_4, datetime(2012, 3, 24), True), + (lomq_sep_sat_4, datetime(2012, 12, 29), True), + (lomq_sep_sat_4, datetime(2011, 12, 24), True), + # INTC (extra week in Q1) + # See: http://www.intc.com/releasedetail.cfm?ReleaseID=542844 + ( + makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ), + datetime(2011, 4, 2), + True, + ), + # see: http://google.brand.edgar-online.com/?sym=INTC&formtypeID=7 + ( + makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ), + datetime(2012, 12, 29), + True, + ), + ( + makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ), + datetime(2011, 12, 31), + True, + ), + ( + makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ), + datetime(2010, 12, 25), + True, + ), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + def test_year_has_extra_week(self): + # End of long Q1 + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ).year_has_extra_week(datetime(2011, 4, 2)) + + # Start of long Q1 + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ).year_has_extra_week(datetime(2010, 12, 26)) + + # End of year before year with long Q1 + assert not makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ).year_has_extra_week(datetime(2010, 12, 25)) + + for year in [ + x for x in range(1994, 2011 + 1) if x not in [2011, 2005, 2000, 1994] + ]: + assert not makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ).year_has_extra_week(datetime(year, 4, 2)) + + # Other long years + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ).year_has_extra_week(datetime(2005, 4, 2)) + + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ).year_has_extra_week(datetime(2000, 4, 2)) + + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ).year_has_extra_week(datetime(1994, 4, 2)) + + def test_get_weeks(self): + sat_dec_1 = makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ) + sat_dec_4 = makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + + assert sat_dec_1.get_weeks(datetime(2011, 4, 2)) == [14, 13, 13, 13] + assert sat_dec_4.get_weeks(datetime(2011, 4, 2)) == [13, 13, 13, 14] + assert sat_dec_1.get_weeks(datetime(2010, 12, 25)) == [13, 13, 13, 13] + + +class TestFY5253NearestEndMonthQuarter(Base): + + offset_nem_sat_aug_4 = makeFY5253NearestEndMonthQuarter( + 1, startingMonth=8, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + offset_nem_thu_aug_4 = makeFY5253NearestEndMonthQuarter( + 1, startingMonth=8, weekday=WeekDay.THU, qtr_with_extra_week=4 + ) + offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, variation="nearest") + + on_offset_cases = [ + # From Wikipedia + (offset_nem_sat_aug_4, datetime(2006, 9, 2), True), + (offset_nem_sat_aug_4, datetime(2007, 9, 1), True), + (offset_nem_sat_aug_4, datetime(2008, 8, 30), True), + (offset_nem_sat_aug_4, datetime(2009, 8, 29), True), + (offset_nem_sat_aug_4, datetime(2010, 8, 28), True), + (offset_nem_sat_aug_4, datetime(2011, 9, 3), True), + (offset_nem_sat_aug_4, datetime(2016, 9, 3), True), + (offset_nem_sat_aug_4, datetime(2017, 9, 2), True), + (offset_nem_sat_aug_4, datetime(2018, 9, 1), True), + (offset_nem_sat_aug_4, datetime(2019, 8, 31), True), + (offset_nem_sat_aug_4, datetime(2006, 8, 27), False), + (offset_nem_sat_aug_4, datetime(2007, 8, 28), False), + (offset_nem_sat_aug_4, datetime(2008, 8, 31), False), + (offset_nem_sat_aug_4, datetime(2009, 8, 30), False), + (offset_nem_sat_aug_4, datetime(2010, 8, 29), False), + (offset_nem_sat_aug_4, datetime(2011, 8, 28), False), + (offset_nem_sat_aug_4, datetime(2006, 8, 25), False), + (offset_nem_sat_aug_4, datetime(2007, 8, 24), False), + (offset_nem_sat_aug_4, datetime(2008, 8, 29), False), + (offset_nem_sat_aug_4, datetime(2009, 8, 28), False), + (offset_nem_sat_aug_4, datetime(2010, 8, 27), False), + (offset_nem_sat_aug_4, datetime(2011, 8, 26), False), + (offset_nem_sat_aug_4, datetime(2019, 8, 30), False), + # From Micron, see: + # http://google.brand.edgar-online.com/?sym=MU&formtypeID=7 + (offset_nem_thu_aug_4, datetime(2012, 8, 30), True), + (offset_nem_thu_aug_4, datetime(2011, 9, 1), True), + # See: http://google.brand.edgar-online.com/?sym=MU&formtypeID=13 + (offset_nem_thu_aug_4, datetime(2013, 5, 30), True), + (offset_nem_thu_aug_4, datetime(2013, 2, 28), True), + (offset_nem_thu_aug_4, datetime(2012, 11, 29), True), + (offset_nem_thu_aug_4, datetime(2012, 5, 31), True), + (offset_nem_thu_aug_4, datetime(2007, 3, 1), True), + (offset_nem_thu_aug_4, datetime(1994, 3, 3), True), + (offset_n, datetime(2012, 12, 31), False), + (offset_n, datetime(2013, 1, 1), True), + (offset_n, datetime(2013, 1, 2), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + def test_offset(self): + offset = makeFY5253NearestEndMonthQuarter( + 1, startingMonth=8, weekday=WeekDay.THU, qtr_with_extra_week=4 + ) + + MU = [ + datetime(2012, 5, 31), + datetime(2012, 8, 30), + datetime(2012, 11, 29), + datetime(2013, 2, 28), + datetime(2013, 5, 30), + ] + + date = MU[0] + relativedelta(days=-1) + for expected in MU: + assert_offset_equal(offset, date, expected) + date = date + offset + + assert_offset_equal(offset, datetime(2012, 5, 31), datetime(2012, 8, 30)) + assert_offset_equal(offset, datetime(2012, 5, 30), datetime(2012, 5, 31)) + + offset2 = FY5253Quarter( + weekday=5, startingMonth=12, variation="last", qtr_with_extra_week=4 + ) + + assert_offset_equal(offset2, datetime(2013, 1, 15), datetime(2013, 3, 30)) + + +def test_bunched_yearends(): + # GH#14774 cases with two fiscal year-ends in the same calendar-year + fy = FY5253(n=1, weekday=5, startingMonth=12, variation="nearest") + dt = Timestamp("2004-01-01") + assert fy.rollback(dt) == Timestamp("2002-12-28") + assert (-fy)._apply(dt) == Timestamp("2002-12-28") + assert dt - fy == Timestamp("2002-12-28") + + assert fy.rollforward(dt) == Timestamp("2004-01-03") + assert fy._apply(dt) == Timestamp("2004-01-03") + assert fy + dt == Timestamp("2004-01-03") + assert dt + fy == Timestamp("2004-01-03") + + # Same thing, but starting from a Timestamp in the previous year. + dt = Timestamp("2003-12-31") + assert fy.rollback(dt) == Timestamp("2002-12-28") + assert (-fy)._apply(dt) == Timestamp("2002-12-28") + assert dt - fy == Timestamp("2002-12-28") + + +def test_fy5253_last_onoffset(): + # GH#18877 dates on the year-end but not normalized to midnight + offset = FY5253(n=-5, startingMonth=5, variation="last", weekday=0) + ts = Timestamp("1984-05-28 06:29:43.955911354+0200", tz="Europe/San_Marino") + fast = offset.is_on_offset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + +def test_fy5253_nearest_onoffset(): + # GH#18877 dates on the year-end but not normalized to midnight + offset = FY5253(n=3, startingMonth=7, variation="nearest", weekday=2) + ts = Timestamp("2032-07-28 00:12:59.035729419+0000", tz="Africa/Dakar") + fast = offset.is_on_offset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + +def test_fy5253qtr_onoffset_nearest(): + # GH#19036 + ts = Timestamp("1985-09-02 23:57:46.232550356-0300", tz="Atlantic/Bermuda") + offset = FY5253Quarter( + n=3, qtr_with_extra_week=1, startingMonth=2, variation="nearest", weekday=0 + ) + fast = offset.is_on_offset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + +def test_fy5253qtr_onoffset_last(): + # GH#19036 + offset = FY5253Quarter( + n=-2, qtr_with_extra_week=1, startingMonth=7, variation="last", weekday=2 + ) + ts = Timestamp("2011-01-26 19:03:40.331096129+0200", tz="Africa/Windhoek") + slow = (ts + offset) - offset == ts + fast = offset.is_on_offset(ts) + assert fast == slow diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_index.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_index.py new file mode 100644 index 0000000..ad3478b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_index.py @@ -0,0 +1,57 @@ +""" +Tests for offset behavior with indices. +""" +import pytest + +from pandas import ( + Series, + date_range, +) + +from pandas.tseries.offsets import ( + BMonthBegin, + BMonthEnd, + BQuarterBegin, + BQuarterEnd, + BYearBegin, + BYearEnd, + MonthBegin, + MonthEnd, + QuarterBegin, + QuarterEnd, + YearBegin, + YearEnd, +) + + +@pytest.mark.parametrize("n", [-2, 1]) +@pytest.mark.parametrize( + "cls", + [ + MonthBegin, + MonthEnd, + BMonthBegin, + BMonthEnd, + QuarterBegin, + QuarterEnd, + BQuarterBegin, + BQuarterEnd, + YearBegin, + YearEnd, + BYearBegin, + BYearEnd, + ], +) +def test_apply_index(cls, n): + offset = cls(n=n) + rng = date_range(start="1/1/2000", periods=100000, freq="T") + ser = Series(rng) + + res = rng + offset + assert res.freq is None # not retained + assert res[0] == rng[0] + offset + assert res[-1] == rng[-1] + offset + res2 = ser + offset + # apply_index is only for indexes, not series, so no res2_v2 + assert res2.iloc[0] == ser.iloc[0] + offset + assert res2.iloc[-1] == ser.iloc[-1] + offset diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_month.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_month.py new file mode 100644 index 0000000..00b9d7e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_month.py @@ -0,0 +1,692 @@ +""" +Tests for the following offsets: +- SemiMonthBegin +- SemiMonthEnd +- MonthBegin +- MonthEnd +""" +from datetime import datetime + +import pytest + +from pandas._libs.tslibs import Timestamp +from pandas._libs.tslibs.offsets import ( + MonthBegin, + MonthEnd, + SemiMonthBegin, + SemiMonthEnd, +) + +from pandas import ( + DatetimeIndex, + Series, + _testing as tm, + date_range, +) +from pandas.tests.tseries.offsets.common import ( + Base, + assert_is_on_offset, + assert_offset_equal, +) + + +class TestSemiMonthEnd(Base): + _offset = SemiMonthEnd + offset1 = _offset() + offset2 = _offset(2) + + def test_offset_whole_year(self): + dates = ( + datetime(2007, 12, 31), + datetime(2008, 1, 15), + datetime(2008, 1, 31), + datetime(2008, 2, 15), + datetime(2008, 2, 29), + datetime(2008, 3, 15), + datetime(2008, 3, 31), + datetime(2008, 4, 15), + datetime(2008, 4, 30), + datetime(2008, 5, 15), + datetime(2008, 5, 31), + datetime(2008, 6, 15), + datetime(2008, 6, 30), + datetime(2008, 7, 15), + datetime(2008, 7, 31), + datetime(2008, 8, 15), + datetime(2008, 8, 31), + datetime(2008, 9, 15), + datetime(2008, 9, 30), + datetime(2008, 10, 15), + datetime(2008, 10, 31), + datetime(2008, 11, 15), + datetime(2008, 11, 30), + datetime(2008, 12, 15), + datetime(2008, 12, 31), + ) + + for base, exp_date in zip(dates[:-1], dates[1:]): + assert_offset_equal(SemiMonthEnd(), base, exp_date) + + # ensure .apply_index works as expected + shift = DatetimeIndex(dates[:-1]) + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = SemiMonthEnd() + shift + + exp = DatetimeIndex(dates[1:]) + tm.assert_index_equal(result, exp) + + # ensure generating a range with DatetimeIndex gives same result + result = date_range(start=dates[0], end=dates[-1], freq="SM") + exp = DatetimeIndex(dates, freq="SM") + tm.assert_index_equal(result, exp) + + offset_cases = [] + offset_cases.append( + ( + SemiMonthEnd(), + { + datetime(2008, 1, 1): datetime(2008, 1, 15), + datetime(2008, 1, 15): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 15), + datetime(2006, 12, 14): datetime(2006, 12, 15), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2007, 1, 15), + datetime(2007, 1, 1): datetime(2007, 1, 15), + datetime(2006, 12, 1): datetime(2006, 12, 15), + datetime(2006, 12, 15): datetime(2006, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthEnd(day_of_month=20), + { + datetime(2008, 1, 1): datetime(2008, 1, 20), + datetime(2008, 1, 15): datetime(2008, 1, 20), + datetime(2008, 1, 21): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 20), + datetime(2006, 12, 14): datetime(2006, 12, 20), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2007, 1, 20), + datetime(2007, 1, 1): datetime(2007, 1, 20), + datetime(2006, 12, 1): datetime(2006, 12, 20), + datetime(2006, 12, 15): datetime(2006, 12, 20), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthEnd(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 15), + datetime(2008, 1, 16): datetime(2008, 1, 31), + datetime(2008, 1, 15): datetime(2008, 1, 15), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2006, 12, 31), + datetime(2007, 1, 1): datetime(2007, 1, 15), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthEnd(0, day_of_month=16), + { + datetime(2008, 1, 1): datetime(2008, 1, 16), + datetime(2008, 1, 16): datetime(2008, 1, 16), + datetime(2008, 1, 15): datetime(2008, 1, 16), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2006, 12, 31), + datetime(2007, 1, 1): datetime(2007, 1, 16), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthEnd(2), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2006, 12, 29): datetime(2007, 1, 15), + datetime(2006, 12, 31): datetime(2007, 1, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31), + datetime(2007, 1, 16): datetime(2007, 2, 15), + datetime(2006, 11, 1): datetime(2006, 11, 30), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthEnd(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 31), + datetime(2008, 6, 30): datetime(2008, 6, 15), + datetime(2008, 12, 31): datetime(2008, 12, 15), + datetime(2006, 12, 29): datetime(2006, 12, 15), + datetime(2006, 12, 30): datetime(2006, 12, 15), + datetime(2007, 1, 1): datetime(2006, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthEnd(-1, day_of_month=4), + { + datetime(2007, 1, 1): datetime(2006, 12, 31), + datetime(2007, 1, 4): datetime(2006, 12, 31), + datetime(2008, 6, 30): datetime(2008, 6, 4), + datetime(2008, 12, 31): datetime(2008, 12, 4), + datetime(2006, 12, 5): datetime(2006, 12, 4), + datetime(2006, 12, 30): datetime(2006, 12, 4), + datetime(2007, 1, 1): datetime(2006, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthEnd(-2), + { + datetime(2007, 1, 1): datetime(2006, 12, 15), + datetime(2008, 6, 30): datetime(2008, 5, 31), + datetime(2008, 3, 15): datetime(2008, 2, 15), + datetime(2008, 12, 31): datetime(2008, 11, 30), + datetime(2006, 12, 29): datetime(2006, 11, 30), + datetime(2006, 12, 14): datetime(2006, 11, 15), + datetime(2007, 1, 1): datetime(2006, 12, 15), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + @pytest.mark.parametrize("case", offset_cases) + def test_apply_index(self, case): + # https://github.com/pandas-dev/pandas/issues/34580 + offset, cases = case + shift = DatetimeIndex(cases.keys()) + exp = DatetimeIndex(cases.values()) + + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = offset + shift + tm.assert_index_equal(result, exp) + + with tm.assert_produces_warning(FutureWarning): + result = offset.apply_index(shift) + tm.assert_index_equal(result, exp) + + on_offset_cases = [ + (datetime(2007, 12, 31), True), + (datetime(2007, 12, 15), True), + (datetime(2007, 12, 14), False), + (datetime(2007, 12, 1), False), + (datetime(2008, 2, 29), True), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + dt, expected = case + assert_is_on_offset(SemiMonthEnd(), dt, expected) + + @pytest.mark.parametrize("klass", [Series, DatetimeIndex]) + def test_vectorized_offset_addition(self, klass): + shift = klass( + [ + Timestamp("2000-01-15 00:15:00", tz="US/Central"), + Timestamp("2000-02-15", tz="US/Central"), + ], + name="a", + ) + + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = shift + SemiMonthEnd() + result2 = SemiMonthEnd() + shift + + exp = klass( + [ + Timestamp("2000-01-31 00:15:00", tz="US/Central"), + Timestamp("2000-02-29", tz="US/Central"), + ], + name="a", + ) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + shift = klass( + [ + Timestamp("2000-01-01 00:15:00", tz="US/Central"), + Timestamp("2000-02-01", tz="US/Central"), + ], + name="a", + ) + + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = shift + SemiMonthEnd() + result2 = SemiMonthEnd() + shift + + exp = klass( + [ + Timestamp("2000-01-15 00:15:00", tz="US/Central"), + Timestamp("2000-02-15", tz="US/Central"), + ], + name="a", + ) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + +class TestSemiMonthBegin(Base): + _offset = SemiMonthBegin + offset1 = _offset() + offset2 = _offset(2) + + def test_offset_whole_year(self): + dates = ( + datetime(2007, 12, 15), + datetime(2008, 1, 1), + datetime(2008, 1, 15), + datetime(2008, 2, 1), + datetime(2008, 2, 15), + datetime(2008, 3, 1), + datetime(2008, 3, 15), + datetime(2008, 4, 1), + datetime(2008, 4, 15), + datetime(2008, 5, 1), + datetime(2008, 5, 15), + datetime(2008, 6, 1), + datetime(2008, 6, 15), + datetime(2008, 7, 1), + datetime(2008, 7, 15), + datetime(2008, 8, 1), + datetime(2008, 8, 15), + datetime(2008, 9, 1), + datetime(2008, 9, 15), + datetime(2008, 10, 1), + datetime(2008, 10, 15), + datetime(2008, 11, 1), + datetime(2008, 11, 15), + datetime(2008, 12, 1), + datetime(2008, 12, 15), + ) + + for base, exp_date in zip(dates[:-1], dates[1:]): + assert_offset_equal(SemiMonthBegin(), base, exp_date) + + # ensure .apply_index works as expected + shift = DatetimeIndex(dates[:-1]) + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = SemiMonthBegin() + shift + + exp = DatetimeIndex(dates[1:]) + tm.assert_index_equal(result, exp) + + # ensure generating a range with DatetimeIndex gives same result + result = date_range(start=dates[0], end=dates[-1], freq="SMS") + exp = DatetimeIndex(dates, freq="SMS") + tm.assert_index_equal(result, exp) + + offset_cases = [ + ( + SemiMonthBegin(), + { + datetime(2008, 1, 1): datetime(2008, 1, 15), + datetime(2008, 1, 15): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 14): datetime(2006, 12, 15), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2007, 1, 1): datetime(2007, 1, 15), + datetime(2006, 12, 1): datetime(2006, 12, 15), + datetime(2006, 12, 15): datetime(2007, 1, 1), + }, + ), + ( + SemiMonthBegin(day_of_month=20), + { + datetime(2008, 1, 1): datetime(2008, 1, 20), + datetime(2008, 1, 15): datetime(2008, 1, 20), + datetime(2008, 1, 21): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 14): datetime(2006, 12, 20), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2007, 1, 1): datetime(2007, 1, 20), + datetime(2006, 12, 1): datetime(2006, 12, 20), + datetime(2006, 12, 15): datetime(2006, 12, 20), + }, + ), + ( + SemiMonthBegin(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 1, 16): datetime(2008, 2, 1), + datetime(2008, 1, 15): datetime(2008, 1, 15), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 2): datetime(2006, 12, 15), + datetime(2007, 1, 1): datetime(2007, 1, 1), + }, + ), + ( + SemiMonthBegin(0, day_of_month=16), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 1, 16): datetime(2008, 1, 16), + datetime(2008, 1, 15): datetime(2008, 1, 16), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2007, 1, 5): datetime(2007, 1, 16), + datetime(2007, 1, 1): datetime(2007, 1, 1), + }, + ), + ( + SemiMonthBegin(2), + { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 15), + datetime(2006, 12, 1): datetime(2007, 1, 1), + datetime(2006, 12, 29): datetime(2007, 1, 15), + datetime(2006, 12, 15): datetime(2007, 1, 15), + datetime(2007, 1, 1): datetime(2007, 2, 1), + datetime(2007, 1, 16): datetime(2007, 2, 15), + datetime(2006, 11, 1): datetime(2006, 12, 1), + }, + ), + ( + SemiMonthBegin(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 15), + datetime(2008, 6, 30): datetime(2008, 6, 15), + datetime(2008, 6, 14): datetime(2008, 6, 1), + datetime(2008, 12, 31): datetime(2008, 12, 15), + datetime(2006, 12, 29): datetime(2006, 12, 15), + datetime(2006, 12, 15): datetime(2006, 12, 1), + datetime(2007, 1, 1): datetime(2006, 12, 15), + }, + ), + ( + SemiMonthBegin(-1, day_of_month=4), + { + datetime(2007, 1, 1): datetime(2006, 12, 4), + datetime(2007, 1, 4): datetime(2007, 1, 1), + datetime(2008, 6, 30): datetime(2008, 6, 4), + datetime(2008, 12, 31): datetime(2008, 12, 4), + datetime(2006, 12, 5): datetime(2006, 12, 4), + datetime(2006, 12, 30): datetime(2006, 12, 4), + datetime(2006, 12, 2): datetime(2006, 12, 1), + datetime(2007, 1, 1): datetime(2006, 12, 4), + }, + ), + ( + SemiMonthBegin(-2), + { + datetime(2007, 1, 1): datetime(2006, 12, 1), + datetime(2008, 6, 30): datetime(2008, 6, 1), + datetime(2008, 6, 14): datetime(2008, 5, 15), + datetime(2008, 12, 31): datetime(2008, 12, 1), + datetime(2006, 12, 29): datetime(2006, 12, 1), + datetime(2006, 12, 15): datetime(2006, 11, 15), + datetime(2007, 1, 1): datetime(2006, 12, 1), + }, + ), + ] + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + @pytest.mark.parametrize("case", offset_cases) + def test_apply_index(self, case): + offset, cases = case + shift = DatetimeIndex(cases.keys()) + + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = offset + shift + + exp = DatetimeIndex(cases.values()) + tm.assert_index_equal(result, exp) + + on_offset_cases = [ + (datetime(2007, 12, 1), True), + (datetime(2007, 12, 15), True), + (datetime(2007, 12, 14), False), + (datetime(2007, 12, 31), False), + (datetime(2008, 2, 15), True), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + dt, expected = case + assert_is_on_offset(SemiMonthBegin(), dt, expected) + + @pytest.mark.parametrize("klass", [Series, DatetimeIndex]) + def test_vectorized_offset_addition(self, klass): + shift = klass( + [ + Timestamp("2000-01-15 00:15:00", tz="US/Central"), + Timestamp("2000-02-15", tz="US/Central"), + ], + name="a", + ) + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = shift + SemiMonthBegin() + result2 = SemiMonthBegin() + shift + + exp = klass( + [ + Timestamp("2000-02-01 00:15:00", tz="US/Central"), + Timestamp("2000-03-01", tz="US/Central"), + ], + name="a", + ) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + shift = klass( + [ + Timestamp("2000-01-01 00:15:00", tz="US/Central"), + Timestamp("2000-02-01", tz="US/Central"), + ], + name="a", + ) + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = shift + SemiMonthBegin() + result2 = SemiMonthBegin() + shift + + exp = klass( + [ + Timestamp("2000-01-15 00:15:00", tz="US/Central"), + Timestamp("2000-02-15", tz="US/Central"), + ], + name="a", + ) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + +class TestMonthBegin(Base): + _offset = MonthBegin + + offset_cases = [] + # NOTE: I'm not entirely happy with the logic here for Begin -ss + # see thread 'offset conventions' on the ML + offset_cases.append( + ( + MonthBegin(), + { + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2008, 2, 1): datetime(2008, 3, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2006, 12, 1): datetime(2007, 1, 1), + datetime(2007, 1, 31): datetime(2007, 2, 1), + }, + ) + ) + + offset_cases.append( + ( + MonthBegin(0), + { + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2006, 12, 3): datetime(2007, 1, 1), + datetime(2007, 1, 31): datetime(2007, 2, 1), + }, + ) + ) + + offset_cases.append( + ( + MonthBegin(2), + { + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 1, 31): datetime(2008, 3, 1), + datetime(2006, 12, 31): datetime(2007, 2, 1), + datetime(2007, 12, 28): datetime(2008, 2, 1), + datetime(2007, 1, 1): datetime(2007, 3, 1), + datetime(2006, 11, 1): datetime(2007, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + MonthBegin(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 1), + datetime(2008, 5, 31): datetime(2008, 5, 1), + datetime(2008, 12, 31): datetime(2008, 12, 1), + datetime(2006, 12, 29): datetime(2006, 12, 1), + datetime(2006, 1, 2): datetime(2006, 1, 1), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + +class TestMonthEnd(Base): + _offset = MonthEnd + + def test_day_of_month(self): + dt = datetime(2007, 1, 1) + offset = MonthEnd() + + result = dt + offset + assert result == Timestamp(2007, 1, 31) + + result = result + offset + assert result == Timestamp(2007, 2, 28) + + def test_normalize(self): + dt = datetime(2007, 1, 1, 3) + + result = dt + MonthEnd(normalize=True) + expected = dt.replace(hour=0) + MonthEnd() + assert result == expected + + offset_cases = [] + offset_cases.append( + ( + MonthEnd(), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2007, 1, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31), + datetime(2006, 12, 1): datetime(2006, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + MonthEnd(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2006, 12, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31), + }, + ) + ) + + offset_cases.append( + ( + MonthEnd(2), + { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 1, 31): datetime(2008, 3, 31), + datetime(2006, 12, 29): datetime(2007, 1, 31), + datetime(2006, 12, 31): datetime(2007, 2, 28), + datetime(2007, 1, 1): datetime(2007, 2, 28), + datetime(2006, 11, 1): datetime(2006, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + MonthEnd(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 31), + datetime(2008, 6, 30): datetime(2008, 5, 31), + datetime(2008, 12, 31): datetime(2008, 11, 30), + datetime(2006, 12, 29): datetime(2006, 11, 30), + datetime(2006, 12, 30): datetime(2006, 11, 30), + datetime(2007, 1, 1): datetime(2006, 12, 31), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (MonthEnd(), datetime(2007, 12, 31), True), + (MonthEnd(), datetime(2008, 1, 1), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_offsets.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_offsets.py new file mode 100644 index 0000000..0737029 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_offsets.py @@ -0,0 +1,866 @@ +""" +Tests of pandas.tseries.offsets +""" +from __future__ import annotations + +from datetime import ( + datetime, + timedelta, +) +from typing import ( + Dict, + List, + Tuple, +) + +import numpy as np +import pytest + +from pandas._libs.tslibs import ( + NaT, + Timestamp, + conversion, + timezones, +) +import pandas._libs.tslibs.offsets as liboffsets +from pandas._libs.tslibs.offsets import ( + _get_offset, + _offset_map, +) +from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG +from pandas.errors import PerformanceWarning + +from pandas import DatetimeIndex +import pandas._testing as tm +from pandas.tests.tseries.offsets.common import ( + Base, + WeekDay, +) + +import pandas.tseries.offsets as offsets +from pandas.tseries.offsets import ( + FY5253, + BaseOffset, + BDay, + BMonthEnd, + BusinessHour, + CustomBusinessDay, + CustomBusinessHour, + CustomBusinessMonthBegin, + CustomBusinessMonthEnd, + DateOffset, + Easter, + FY5253Quarter, + LastWeekOfMonth, + MonthBegin, + Nano, + Tick, + Week, + WeekOfMonth, +) + +_ApplyCases = List[Tuple[BaseOffset, Dict[datetime, datetime]]] + + +class TestCommon(Base): + # executed value created by Base._get_offset + # are applied to 2011/01/01 09:00 (Saturday) + # used for .apply and .rollforward + expecteds = { + "Day": Timestamp("2011-01-02 09:00:00"), + "DateOffset": Timestamp("2011-01-02 09:00:00"), + "BusinessDay": Timestamp("2011-01-03 09:00:00"), + "CustomBusinessDay": Timestamp("2011-01-03 09:00:00"), + "CustomBusinessMonthEnd": Timestamp("2011-01-31 09:00:00"), + "CustomBusinessMonthBegin": Timestamp("2011-01-03 09:00:00"), + "MonthBegin": Timestamp("2011-02-01 09:00:00"), + "BusinessMonthBegin": Timestamp("2011-01-03 09:00:00"), + "MonthEnd": Timestamp("2011-01-31 09:00:00"), + "SemiMonthEnd": Timestamp("2011-01-15 09:00:00"), + "SemiMonthBegin": Timestamp("2011-01-15 09:00:00"), + "BusinessMonthEnd": Timestamp("2011-01-31 09:00:00"), + "YearBegin": Timestamp("2012-01-01 09:00:00"), + "BYearBegin": Timestamp("2011-01-03 09:00:00"), + "YearEnd": Timestamp("2011-12-31 09:00:00"), + "BYearEnd": Timestamp("2011-12-30 09:00:00"), + "QuarterBegin": Timestamp("2011-03-01 09:00:00"), + "BQuarterBegin": Timestamp("2011-03-01 09:00:00"), + "QuarterEnd": Timestamp("2011-03-31 09:00:00"), + "BQuarterEnd": Timestamp("2011-03-31 09:00:00"), + "BusinessHour": Timestamp("2011-01-03 10:00:00"), + "CustomBusinessHour": Timestamp("2011-01-03 10:00:00"), + "WeekOfMonth": Timestamp("2011-01-08 09:00:00"), + "LastWeekOfMonth": Timestamp("2011-01-29 09:00:00"), + "FY5253Quarter": Timestamp("2011-01-25 09:00:00"), + "FY5253": Timestamp("2011-01-25 09:00:00"), + "Week": Timestamp("2011-01-08 09:00:00"), + "Easter": Timestamp("2011-04-24 09:00:00"), + "Hour": Timestamp("2011-01-01 10:00:00"), + "Minute": Timestamp("2011-01-01 09:01:00"), + "Second": Timestamp("2011-01-01 09:00:01"), + "Milli": Timestamp("2011-01-01 09:00:00.001000"), + "Micro": Timestamp("2011-01-01 09:00:00.000001"), + "Nano": Timestamp("2011-01-01T09:00:00.000000001"), + } + + def test_immutable(self, offset_types): + # GH#21341 check that __setattr__ raises + offset = self._get_offset(offset_types) + msg = "objects is not writable|DateOffset objects are immutable" + with pytest.raises(AttributeError, match=msg): + offset.normalize = True + with pytest.raises(AttributeError, match=msg): + offset.n = 91 + + def test_return_type(self, offset_types): + offset = self._get_offset(offset_types) + + # make sure that we are returning a Timestamp + result = Timestamp("20080101") + offset + assert isinstance(result, Timestamp) + + # make sure that we are returning NaT + assert NaT + offset is NaT + assert offset + NaT is NaT + + assert NaT - offset is NaT + assert (-offset)._apply(NaT) is NaT + + def test_offset_n(self, offset_types): + offset = self._get_offset(offset_types) + assert offset.n == 1 + + neg_offset = offset * -1 + assert neg_offset.n == -1 + + mul_offset = offset * 3 + assert mul_offset.n == 3 + + def test_offset_timedelta64_arg(self, offset_types): + # check that offset._validate_n raises TypeError on a timedelt64 + # object + off = self._get_offset(offset_types) + + td64 = np.timedelta64(4567, "s") + with pytest.raises(TypeError, match="argument must be an integer"): + type(off)(n=td64, **off.kwds) + + def test_offset_mul_ndarray(self, offset_types): + off = self._get_offset(offset_types) + + expected = np.array([[off, off * 2], [off * 3, off * 4]]) + + result = np.array([[1, 2], [3, 4]]) * off + tm.assert_numpy_array_equal(result, expected) + + result = off * np.array([[1, 2], [3, 4]]) + tm.assert_numpy_array_equal(result, expected) + + def test_offset_freqstr(self, offset_types): + offset = self._get_offset(offset_types) + + freqstr = offset.freqstr + if freqstr not in ("", "", "LWOM-SAT"): + code = _get_offset(freqstr) + assert offset.rule_code == code + + def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=False): + + if normalize and issubclass(offset, Tick): + # normalize=True disallowed for Tick subclasses GH#21427 + return + + offset_s = self._get_offset(offset, normalize=normalize) + func = getattr(offset_s, funcname) + + result = func(dt) + assert isinstance(result, Timestamp) + assert result == expected + + result = func(Timestamp(dt)) + assert isinstance(result, Timestamp) + assert result == expected + + # see gh-14101 + exp_warning = None + ts = Timestamp(dt) + Nano(5) + + if ( + type(offset_s).__name__ == "DateOffset" + and (funcname in ["apply", "_apply"] or normalize) + and ts.nanosecond > 0 + ): + exp_warning = UserWarning + + # test nanosecond is preserved + with tm.assert_produces_warning(exp_warning): + result = func(ts) + + if exp_warning is None and funcname == "_apply": + # GH#44522 + # Check in this particular case to avoid headaches with + # testing for multiple warnings produced by the same call. + with tm.assert_produces_warning(FutureWarning, match="apply is deprecated"): + res2 = offset_s.apply(ts) + + assert type(res2) is type(result) + assert res2 == result + + assert isinstance(result, Timestamp) + if normalize is False: + assert result == expected + Nano(5) + else: + assert result == expected + + if isinstance(dt, np.datetime64): + # test tz when input is datetime or Timestamp + return + + for tz in self.timezones: + expected_localize = expected.tz_localize(tz) + tz_obj = timezones.maybe_get_tz(tz) + dt_tz = conversion.localize_pydatetime(dt, tz_obj) + + result = func(dt_tz) + assert isinstance(result, Timestamp) + assert result == expected_localize + + result = func(Timestamp(dt, tz=tz)) + assert isinstance(result, Timestamp) + assert result == expected_localize + + # see gh-14101 + exp_warning = None + ts = Timestamp(dt, tz=tz) + Nano(5) + + if ( + type(offset_s).__name__ == "DateOffset" + and (funcname in ["apply", "_apply"] or normalize) + and ts.nanosecond > 0 + ): + exp_warning = UserWarning + + # test nanosecond is preserved + with tm.assert_produces_warning(exp_warning): + result = func(ts) + assert isinstance(result, Timestamp) + if normalize is False: + assert result == expected_localize + Nano(5) + else: + assert result == expected_localize + + def test_apply(self, offset_types): + sdt = datetime(2011, 1, 1, 9, 0) + ndt = np.datetime64("2011-01-01 09:00") + + expected = self.expecteds[offset_types.__name__] + expected_norm = Timestamp(expected.date()) + + for dt in [sdt, ndt]: + self._check_offsetfunc_works(offset_types, "_apply", dt, expected) + + self._check_offsetfunc_works( + offset_types, "_apply", dt, expected_norm, normalize=True + ) + + def test_rollforward(self, offset_types): + expecteds = self.expecteds.copy() + + # result will not be changed if the target is on the offset + no_changes = [ + "Day", + "MonthBegin", + "SemiMonthBegin", + "YearBegin", + "Week", + "Hour", + "Minute", + "Second", + "Milli", + "Micro", + "Nano", + "DateOffset", + ] + for n in no_changes: + expecteds[n] = Timestamp("2011/01/01 09:00") + + expecteds["BusinessHour"] = Timestamp("2011-01-03 09:00:00") + expecteds["CustomBusinessHour"] = Timestamp("2011-01-03 09:00:00") + + # but be changed when normalize=True + norm_expected = expecteds.copy() + for k in norm_expected: + norm_expected[k] = Timestamp(norm_expected[k].date()) + + normalized = { + "Day": Timestamp("2011-01-02 00:00:00"), + "DateOffset": Timestamp("2011-01-02 00:00:00"), + "MonthBegin": Timestamp("2011-02-01 00:00:00"), + "SemiMonthBegin": Timestamp("2011-01-15 00:00:00"), + "YearBegin": Timestamp("2012-01-01 00:00:00"), + "Week": Timestamp("2011-01-08 00:00:00"), + "Hour": Timestamp("2011-01-01 00:00:00"), + "Minute": Timestamp("2011-01-01 00:00:00"), + "Second": Timestamp("2011-01-01 00:00:00"), + "Milli": Timestamp("2011-01-01 00:00:00"), + "Micro": Timestamp("2011-01-01 00:00:00"), + } + norm_expected.update(normalized) + + sdt = datetime(2011, 1, 1, 9, 0) + ndt = np.datetime64("2011-01-01 09:00") + + for dt in [sdt, ndt]: + expected = expecteds[offset_types.__name__] + self._check_offsetfunc_works(offset_types, "rollforward", dt, expected) + expected = norm_expected[offset_types.__name__] + self._check_offsetfunc_works( + offset_types, "rollforward", dt, expected, normalize=True + ) + + def test_rollback(self, offset_types): + expecteds = { + "BusinessDay": Timestamp("2010-12-31 09:00:00"), + "CustomBusinessDay": Timestamp("2010-12-31 09:00:00"), + "CustomBusinessMonthEnd": Timestamp("2010-12-31 09:00:00"), + "CustomBusinessMonthBegin": Timestamp("2010-12-01 09:00:00"), + "BusinessMonthBegin": Timestamp("2010-12-01 09:00:00"), + "MonthEnd": Timestamp("2010-12-31 09:00:00"), + "SemiMonthEnd": Timestamp("2010-12-31 09:00:00"), + "BusinessMonthEnd": Timestamp("2010-12-31 09:00:00"), + "BYearBegin": Timestamp("2010-01-01 09:00:00"), + "YearEnd": Timestamp("2010-12-31 09:00:00"), + "BYearEnd": Timestamp("2010-12-31 09:00:00"), + "QuarterBegin": Timestamp("2010-12-01 09:00:00"), + "BQuarterBegin": Timestamp("2010-12-01 09:00:00"), + "QuarterEnd": Timestamp("2010-12-31 09:00:00"), + "BQuarterEnd": Timestamp("2010-12-31 09:00:00"), + "BusinessHour": Timestamp("2010-12-31 17:00:00"), + "CustomBusinessHour": Timestamp("2010-12-31 17:00:00"), + "WeekOfMonth": Timestamp("2010-12-11 09:00:00"), + "LastWeekOfMonth": Timestamp("2010-12-25 09:00:00"), + "FY5253Quarter": Timestamp("2010-10-26 09:00:00"), + "FY5253": Timestamp("2010-01-26 09:00:00"), + "Easter": Timestamp("2010-04-04 09:00:00"), + } + + # result will not be changed if the target is on the offset + for n in [ + "Day", + "MonthBegin", + "SemiMonthBegin", + "YearBegin", + "Week", + "Hour", + "Minute", + "Second", + "Milli", + "Micro", + "Nano", + "DateOffset", + ]: + expecteds[n] = Timestamp("2011/01/01 09:00") + + # but be changed when normalize=True + norm_expected = expecteds.copy() + for k in norm_expected: + norm_expected[k] = Timestamp(norm_expected[k].date()) + + normalized = { + "Day": Timestamp("2010-12-31 00:00:00"), + "DateOffset": Timestamp("2010-12-31 00:00:00"), + "MonthBegin": Timestamp("2010-12-01 00:00:00"), + "SemiMonthBegin": Timestamp("2010-12-15 00:00:00"), + "YearBegin": Timestamp("2010-01-01 00:00:00"), + "Week": Timestamp("2010-12-25 00:00:00"), + "Hour": Timestamp("2011-01-01 00:00:00"), + "Minute": Timestamp("2011-01-01 00:00:00"), + "Second": Timestamp("2011-01-01 00:00:00"), + "Milli": Timestamp("2011-01-01 00:00:00"), + "Micro": Timestamp("2011-01-01 00:00:00"), + } + norm_expected.update(normalized) + + sdt = datetime(2011, 1, 1, 9, 0) + ndt = np.datetime64("2011-01-01 09:00") + + for dt in [sdt, ndt]: + expected = expecteds[offset_types.__name__] + self._check_offsetfunc_works(offset_types, "rollback", dt, expected) + + expected = norm_expected[offset_types.__name__] + self._check_offsetfunc_works( + offset_types, "rollback", dt, expected, normalize=True + ) + + def test_is_on_offset(self, offset_types): + dt = self.expecteds[offset_types.__name__] + offset_s = self._get_offset(offset_types) + assert offset_s.is_on_offset(dt) + + # when normalize=True, is_on_offset checks time is 00:00:00 + if issubclass(offset_types, Tick): + # normalize=True disallowed for Tick subclasses GH#21427 + return + offset_n = self._get_offset(offset_types, normalize=True) + assert not offset_n.is_on_offset(dt) + + if offset_types in (BusinessHour, CustomBusinessHour): + # In default BusinessHour (9:00-17:00), normalized time + # cannot be in business hour range + return + date = datetime(dt.year, dt.month, dt.day) + assert offset_n.is_on_offset(date) + + def test_add(self, offset_types, tz_naive_fixture): + tz = tz_naive_fixture + dt = datetime(2011, 1, 1, 9, 0) + + offset_s = self._get_offset(offset_types) + expected = self.expecteds[offset_types.__name__] + + result_dt = dt + offset_s + result_ts = Timestamp(dt) + offset_s + for result in [result_dt, result_ts]: + assert isinstance(result, Timestamp) + assert result == expected + + expected_localize = expected.tz_localize(tz) + result = Timestamp(dt, tz=tz) + offset_s + assert isinstance(result, Timestamp) + assert result == expected_localize + + # normalize=True, disallowed for Tick subclasses GH#21427 + if issubclass(offset_types, Tick): + return + offset_s = self._get_offset(offset_types, normalize=True) + expected = Timestamp(expected.date()) + + result_dt = dt + offset_s + result_ts = Timestamp(dt) + offset_s + for result in [result_dt, result_ts]: + assert isinstance(result, Timestamp) + assert result == expected + + expected_localize = expected.tz_localize(tz) + result = Timestamp(dt, tz=tz) + offset_s + assert isinstance(result, Timestamp) + assert result == expected_localize + + def test_add_empty_datetimeindex(self, offset_types, tz_naive_fixture): + # GH#12724, GH#30336 + offset_s = self._get_offset(offset_types) + + dti = DatetimeIndex([], tz=tz_naive_fixture) + + warn = None + if isinstance( + offset_s, + ( + Easter, + WeekOfMonth, + LastWeekOfMonth, + CustomBusinessDay, + BusinessHour, + CustomBusinessHour, + CustomBusinessMonthBegin, + CustomBusinessMonthEnd, + FY5253, + FY5253Quarter, + ), + ): + # We don't have an optimized apply_index + warn = PerformanceWarning + + with tm.assert_produces_warning(warn): + result = dti + offset_s + tm.assert_index_equal(result, dti) + with tm.assert_produces_warning(warn): + result = offset_s + dti + tm.assert_index_equal(result, dti) + + dta = dti._data + with tm.assert_produces_warning(warn): + result = dta + offset_s + tm.assert_equal(result, dta) + with tm.assert_produces_warning(warn): + result = offset_s + dta + tm.assert_equal(result, dta) + + def test_pickle_roundtrip(self, offset_types): + off = self._get_offset(offset_types) + res = tm.round_trip_pickle(off) + assert off == res + if type(off) is not DateOffset: + for attr in off._attributes: + if attr == "calendar": + # np.busdaycalendar __eq__ will return False; + # we check holidays and weekmask attrs so are OK + continue + # Make sure nothings got lost from _params (which __eq__) is based on + assert getattr(off, attr) == getattr(res, attr) + + def test_pickle_dateoffset_odd_inputs(self): + # GH#34511 + off = DateOffset(months=12) + res = tm.round_trip_pickle(off) + assert off == res + + base_dt = datetime(2020, 1, 1) + assert base_dt + off == base_dt + res + + def test_onOffset_deprecated(self, offset_types, fixed_now_ts): + # GH#30340 use idiomatic naming + off = self._get_offset(offset_types) + + ts = fixed_now_ts + with tm.assert_produces_warning(FutureWarning): + result = off.onOffset(ts) + + expected = off.is_on_offset(ts) + assert result == expected + + def test_isAnchored_deprecated(self, offset_types): + # GH#30340 use idiomatic naming + off = self._get_offset(offset_types) + + with tm.assert_produces_warning(FutureWarning): + result = off.isAnchored() + + expected = off.is_anchored() + assert result == expected + + def test_offsets_hashable(self, offset_types): + # GH: 37267 + off = self._get_offset(offset_types) + assert hash(off) is not None + + +class TestDateOffset(Base): + def setup_method(self, method): + self.d = Timestamp(datetime(2008, 1, 2)) + _offset_map.clear() + + def test_repr(self): + repr(DateOffset()) + repr(DateOffset(2)) + repr(2 * DateOffset()) + repr(2 * DateOffset(months=2)) + + def test_mul(self): + assert DateOffset(2) == 2 * DateOffset(1) + assert DateOffset(2) == DateOffset(1) * 2 + + def test_constructor(self): + + assert (self.d + DateOffset(months=2)) == datetime(2008, 3, 2) + assert (self.d - DateOffset(months=2)) == datetime(2007, 11, 2) + + assert (self.d + DateOffset(2)) == datetime(2008, 1, 4) + + assert not DateOffset(2).is_anchored() + assert DateOffset(1).is_anchored() + + d = datetime(2008, 1, 31) + assert (d + DateOffset(months=1)) == datetime(2008, 2, 29) + + def test_copy(self): + assert DateOffset(months=2).copy() == DateOffset(months=2) + + def test_eq(self): + offset1 = DateOffset(days=1) + offset2 = DateOffset(days=365) + + assert offset1 != offset2 + + +class TestOffsetNames: + def test_get_offset_name(self): + assert BDay().freqstr == "B" + assert BDay(2).freqstr == "2B" + assert BMonthEnd().freqstr == "BM" + assert Week(weekday=0).freqstr == "W-MON" + assert Week(weekday=1).freqstr == "W-TUE" + assert Week(weekday=2).freqstr == "W-WED" + assert Week(weekday=3).freqstr == "W-THU" + assert Week(weekday=4).freqstr == "W-FRI" + + assert LastWeekOfMonth(weekday=WeekDay.SUN).freqstr == "LWOM-SUN" + + +def test_get_offset(): + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + _get_offset("gibberish") + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + _get_offset("QS-JAN-B") + + pairs = [ + ("B", BDay()), + ("b", BDay()), + ("bm", BMonthEnd()), + ("Bm", BMonthEnd()), + ("W-MON", Week(weekday=0)), + ("W-TUE", Week(weekday=1)), + ("W-WED", Week(weekday=2)), + ("W-THU", Week(weekday=3)), + ("W-FRI", Week(weekday=4)), + ] + + for name, expected in pairs: + offset = _get_offset(name) + assert offset == expected, ( + f"Expected {repr(name)} to yield {repr(expected)} " + f"(actual: {repr(offset)})" + ) + + +def test_get_offset_legacy(): + pairs = [("w@Sat", Week(weekday=5))] + for name, expected in pairs: + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + _get_offset(name) + + +class TestOffsetAliases: + def setup_method(self, method): + _offset_map.clear() + + def test_alias_equality(self): + for k, v in _offset_map.items(): + if v is None: + continue + assert k == v.copy() + + def test_rule_code(self): + lst = ["M", "MS", "BM", "BMS", "D", "B", "H", "T", "S", "L", "U"] + for k in lst: + assert k == _get_offset(k).rule_code + # should be cached - this is kind of an internals test... + assert k in _offset_map + assert k == (_get_offset(k) * 3).rule_code + + suffix_lst = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"] + base = "W" + for v in suffix_lst: + alias = "-".join([base, v]) + assert alias == _get_offset(alias).rule_code + assert alias == (_get_offset(alias) * 5).rule_code + + suffix_lst = [ + "JAN", + "FEB", + "MAR", + "APR", + "MAY", + "JUN", + "JUL", + "AUG", + "SEP", + "OCT", + "NOV", + "DEC", + ] + base_lst = ["A", "AS", "BA", "BAS", "Q", "QS", "BQ", "BQS"] + for base in base_lst: + for v in suffix_lst: + alias = "-".join([base, v]) + assert alias == _get_offset(alias).rule_code + assert alias == (_get_offset(alias) * 5).rule_code + + +def test_freq_offsets(): + off = BDay(1, offset=timedelta(0, 1800)) + assert off.freqstr == "B+30Min" + + off = BDay(1, offset=timedelta(0, -1800)) + assert off.freqstr == "B-30Min" + + +class TestReprNames: + def test_str_for_named_is_name(self): + # look at all the amazing combinations! + month_prefixes = ["A", "AS", "BA", "BAS", "Q", "BQ", "BQS", "QS"] + names = [ + prefix + "-" + month + for prefix in month_prefixes + for month in [ + "JAN", + "FEB", + "MAR", + "APR", + "MAY", + "JUN", + "JUL", + "AUG", + "SEP", + "OCT", + "NOV", + "DEC", + ] + ] + days = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"] + names += ["W-" + day for day in days] + names += ["WOM-" + week + day for week in ("1", "2", "3", "4") for day in days] + _offset_map.clear() + for name in names: + offset = _get_offset(name) + assert offset.freqstr == name + + +def get_utc_offset_hours(ts): + # take a Timestamp and compute total hours of utc offset + o = ts.utcoffset() + return (o.days * 24 * 3600 + o.seconds) / 3600.0 + + +# --------------------------------------------------------------------- + + +def test_valid_default_arguments(offset_types): + # GH#19142 check that the calling the constructors without passing + # any keyword arguments produce valid offsets + cls = offset_types + cls() + + +@pytest.mark.parametrize("kwd", sorted(liboffsets._relativedelta_kwds)) +def test_valid_month_attributes(kwd, month_classes): + # GH#18226 + cls = month_classes + # check that we cannot create e.g. MonthEnd(weeks=3) + msg = rf"__init__\(\) got an unexpected keyword argument '{kwd}'" + with pytest.raises(TypeError, match=msg): + cls(**{kwd: 3}) + + +def test_month_offset_name(month_classes): + # GH#33757 off.name with n != 1 should not raise AttributeError + obj = month_classes(1) + obj2 = month_classes(2) + assert obj2.name == obj.name + + +@pytest.mark.parametrize("kwd", sorted(liboffsets._relativedelta_kwds)) +def test_valid_relativedelta_kwargs(kwd): + # Check that all the arguments specified in liboffsets._relativedelta_kwds + # are in fact valid relativedelta keyword args + DateOffset(**{kwd: 1}) + + +@pytest.mark.parametrize("kwd", sorted(liboffsets._relativedelta_kwds)) +def test_valid_tick_attributes(kwd, tick_classes): + # GH#18226 + cls = tick_classes + # check that we cannot create e.g. Hour(weeks=3) + msg = rf"__init__\(\) got an unexpected keyword argument '{kwd}'" + with pytest.raises(TypeError, match=msg): + cls(**{kwd: 3}) + + +def test_validate_n_error(): + with pytest.raises(TypeError, match="argument must be an integer"): + DateOffset(n="Doh!") + + with pytest.raises(TypeError, match="argument must be an integer"): + MonthBegin(n=timedelta(1)) + + with pytest.raises(TypeError, match="argument must be an integer"): + BDay(n=np.array([1, 2], dtype=np.int64)) + + +def test_require_integers(offset_types): + cls = offset_types + with pytest.raises(ValueError, match="argument must be an integer"): + cls(n=1.5) + + +def test_tick_normalize_raises(tick_classes): + # check that trying to create a Tick object with normalize=True raises + # GH#21427 + cls = tick_classes + msg = "Tick offset with `normalize=True` are not allowed." + with pytest.raises(ValueError, match=msg): + cls(n=3, normalize=True) + + +@pytest.mark.parametrize( + "offset_kwargs, expected_arg", + [ + ({"nanoseconds": 1}, "1970-01-01 00:00:00.000000001"), + ({"nanoseconds": 5}, "1970-01-01 00:00:00.000000005"), + ({"nanoseconds": -1}, "1969-12-31 23:59:59.999999999"), + ({"microseconds": 1}, "1970-01-01 00:00:00.000001"), + ({"microseconds": -1}, "1969-12-31 23:59:59.999999"), + ({"seconds": 1}, "1970-01-01 00:00:01"), + ({"seconds": -1}, "1969-12-31 23:59:59"), + ({"minutes": 1}, "1970-01-01 00:01:00"), + ({"minutes": -1}, "1969-12-31 23:59:00"), + ({"hours": 1}, "1970-01-01 01:00:00"), + ({"hours": -1}, "1969-12-31 23:00:00"), + ({"days": 1}, "1970-01-02 00:00:00"), + ({"days": -1}, "1969-12-31 00:00:00"), + ({"weeks": 1}, "1970-01-08 00:00:00"), + ({"weeks": -1}, "1969-12-25 00:00:00"), + ({"months": 1}, "1970-02-01 00:00:00"), + ({"months": -1}, "1969-12-01 00:00:00"), + ({"years": 1}, "1971-01-01 00:00:00"), + ({"years": -1}, "1969-01-01 00:00:00"), + ], +) +def test_dateoffset_add_sub(offset_kwargs, expected_arg): + offset = DateOffset(**offset_kwargs) + ts = Timestamp(0) + result = ts + offset + expected = Timestamp(expected_arg) + assert result == expected + result -= offset + assert result == ts + result = offset + ts + assert result == expected + + +def test_dataoffset_add_sub_timestamp_with_nano(): + offset = DateOffset(minutes=2, nanoseconds=9) + ts = Timestamp(4) + result = ts + offset + expected = Timestamp("1970-01-01 00:02:00.000000013") + assert result == expected + result -= offset + assert result == ts + result = offset + ts + assert result == expected + + +@pytest.mark.parametrize( + "attribute", + [ + "hours", + "days", + "weeks", + "months", + "years", + ], +) +def test_dateoffset_immutable(attribute): + offset = DateOffset(**{attribute: 0}) + msg = "DateOffset objects are immutable" + with pytest.raises(AttributeError, match=msg): + setattr(offset, attribute, 5) + + +def test_dateoffset_misc(): + oset = offsets.DateOffset(months=2, days=4) + # it works + oset.freqstr + + assert not offsets.DateOffset(months=2) == 2 + + +@pytest.mark.parametrize("n", [-1, 1, 3]) +def test_construct_int_arg_no_kwargs_assumed_days(n): + # GH 45890, 45643 + offset = DateOffset(n) + assert offset._offset == timedelta(1) + result = Timestamp(2022, 1, 2) + offset + expected = Timestamp(2022, 1, 2 + n) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_offsets_properties.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_offsets_properties.py new file mode 100644 index 0000000..1b4fa92 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_offsets_properties.py @@ -0,0 +1,60 @@ +""" +Behavioral based tests for offsets and date_range. + +This file is adapted from https://github.com/pandas-dev/pandas/pull/18761 - +which was more ambitious but less idiomatic in its use of Hypothesis. + +You may wish to consult the previous version for inspiration on further +tests, or when trying to pin down the bugs exposed by the tests below. +""" +from hypothesis import ( + assume, + given, +) +import pytest +import pytz + +import pandas as pd +from pandas._testing._hypothesis import ( + DATETIME_JAN_1_1900_OPTIONAL_TZ, + YQM_OFFSET, +) + +# ---------------------------------------------------------------- +# Offset-specific behaviour tests + + +@pytest.mark.arm_slow +@given(DATETIME_JAN_1_1900_OPTIONAL_TZ, YQM_OFFSET) +def test_on_offset_implementations(dt, offset): + assume(not offset.normalize) + # check that the class-specific implementations of is_on_offset match + # the general case definition: + # (dt + offset) - offset == dt + try: + compare = (dt + offset) - offset + except (pytz.NonExistentTimeError, pytz.AmbiguousTimeError): + # When dt + offset does not exist or is DST-ambiguous, assume(False) to + # indicate to hypothesis that this is not a valid test case + # DST-ambiguous example (GH41906): + # dt = datetime.datetime(1900, 1, 1, tzinfo=pytz.timezone('Africa/Kinshasa')) + # offset = MonthBegin(66) + assume(False) + + assert offset.is_on_offset(dt) == (compare == dt) + + +@given(YQM_OFFSET) +def test_shift_across_dst(offset): + # GH#18319 check that 1) timezone is correctly normalized and + # 2) that hour is not incorrectly changed by this normalization + assume(not offset.normalize) + + # Note that dti includes a transition across DST boundary + dti = pd.date_range( + start="2017-10-30 12:00:00", end="2017-11-06", freq="D", tz="US/Eastern" + ) + assert (dti.hour == 12).all() # we haven't screwed up yet + + res = dti + offset + assert (res.hour == 12).all() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_quarter.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_quarter.py new file mode 100644 index 0000000..e076fb9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_quarter.py @@ -0,0 +1,298 @@ +""" +Tests for the following offsets: +- QuarterBegin +- QuarterEnd +""" +from datetime import datetime + +import pytest + +from pandas.tests.tseries.offsets.common import ( + Base, + assert_is_on_offset, + assert_offset_equal, +) + +from pandas.tseries.offsets import ( + QuarterBegin, + QuarterEnd, +) + + +def test_quarterly_dont_normalize(): + date = datetime(2012, 3, 31, 5, 30) + + offsets = (QuarterBegin, QuarterEnd) + + for klass in offsets: + result = date + klass() + assert result.time() == date.time() + + +@pytest.mark.parametrize("offset", [QuarterBegin(), QuarterEnd()]) +def test_on_offset(offset): + dates = [ + datetime(2016, m, d) + for m in [10, 11, 12] + for d in [1, 2, 3, 28, 29, 30, 31] + if not (m == 11 and d == 31) + ] + for date in dates: + res = offset.is_on_offset(date) + slow_version = date == (date + offset) - offset + assert res == slow_version + + +class TestQuarterBegin(Base): + def test_repr(self): + expected = "" + assert repr(QuarterBegin()) == expected + expected = "" + assert repr(QuarterBegin(startingMonth=3)) == expected + expected = "" + assert repr(QuarterBegin(startingMonth=1)) == expected + + def test_is_anchored(self): + assert QuarterBegin(startingMonth=1).is_anchored() + assert QuarterBegin().is_anchored() + assert not QuarterBegin(2, startingMonth=1).is_anchored() + + def test_offset_corner_case(self): + # corner + offset = QuarterBegin(n=-1, startingMonth=1) + assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 1) + + offset_cases = [] + offset_cases.append( + ( + QuarterBegin(startingMonth=1), + { + datetime(2007, 12, 1): datetime(2008, 1, 1), + datetime(2008, 1, 1): datetime(2008, 4, 1), + datetime(2008, 2, 15): datetime(2008, 4, 1), + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 3, 15): datetime(2008, 4, 1), + datetime(2008, 3, 31): datetime(2008, 4, 1), + datetime(2008, 4, 15): datetime(2008, 7, 1), + datetime(2008, 4, 1): datetime(2008, 7, 1), + }, + ) + ) + + offset_cases.append( + ( + QuarterBegin(startingMonth=2), + { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2008, 1, 15): datetime(2008, 2, 1), + datetime(2008, 2, 29): datetime(2008, 5, 1), + datetime(2008, 3, 15): datetime(2008, 5, 1), + datetime(2008, 3, 31): datetime(2008, 5, 1), + datetime(2008, 4, 15): datetime(2008, 5, 1), + datetime(2008, 4, 30): datetime(2008, 5, 1), + }, + ) + ) + + offset_cases.append( + ( + QuarterBegin(startingMonth=1, n=0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 12, 1): datetime(2009, 1, 1), + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 2, 15): datetime(2008, 4, 1), + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 3, 15): datetime(2008, 4, 1), + datetime(2008, 3, 31): datetime(2008, 4, 1), + datetime(2008, 4, 15): datetime(2008, 7, 1), + datetime(2008, 4, 30): datetime(2008, 7, 1), + }, + ) + ) + + offset_cases.append( + ( + QuarterBegin(startingMonth=1, n=-1), + { + datetime(2008, 1, 1): datetime(2007, 10, 1), + datetime(2008, 1, 31): datetime(2008, 1, 1), + datetime(2008, 2, 15): datetime(2008, 1, 1), + datetime(2008, 2, 29): datetime(2008, 1, 1), + datetime(2008, 3, 15): datetime(2008, 1, 1), + datetime(2008, 3, 31): datetime(2008, 1, 1), + datetime(2008, 4, 15): datetime(2008, 4, 1), + datetime(2008, 4, 30): datetime(2008, 4, 1), + datetime(2008, 7, 1): datetime(2008, 4, 1), + }, + ) + ) + + offset_cases.append( + ( + QuarterBegin(startingMonth=1, n=2), + { + datetime(2008, 1, 1): datetime(2008, 7, 1), + datetime(2008, 2, 15): datetime(2008, 7, 1), + datetime(2008, 2, 29): datetime(2008, 7, 1), + datetime(2008, 3, 15): datetime(2008, 7, 1), + datetime(2008, 3, 31): datetime(2008, 7, 1), + datetime(2008, 4, 15): datetime(2008, 10, 1), + datetime(2008, 4, 1): datetime(2008, 10, 1), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + +class TestQuarterEnd(Base): + _offset = QuarterEnd + + def test_repr(self): + expected = "" + assert repr(QuarterEnd()) == expected + expected = "" + assert repr(QuarterEnd(startingMonth=3)) == expected + expected = "" + assert repr(QuarterEnd(startingMonth=1)) == expected + + def test_is_anchored(self): + assert QuarterEnd(startingMonth=1).is_anchored() + assert QuarterEnd().is_anchored() + assert not QuarterEnd(2, startingMonth=1).is_anchored() + + def test_offset_corner_case(self): + # corner + offset = QuarterEnd(n=-1, startingMonth=1) + assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 31) + + offset_cases = [] + offset_cases.append( + ( + QuarterEnd(startingMonth=1), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 4, 30), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 7, 31), + }, + ) + ) + + offset_cases.append( + ( + QuarterEnd(startingMonth=2), + { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2008, 2, 15): datetime(2008, 2, 29), + datetime(2008, 2, 29): datetime(2008, 5, 31), + datetime(2008, 3, 15): datetime(2008, 5, 31), + datetime(2008, 3, 31): datetime(2008, 5, 31), + datetime(2008, 4, 15): datetime(2008, 5, 31), + datetime(2008, 4, 30): datetime(2008, 5, 31), + }, + ) + ) + + offset_cases.append( + ( + QuarterEnd(startingMonth=1, n=0), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 4, 30), + }, + ) + ) + + offset_cases.append( + ( + QuarterEnd(startingMonth=1, n=-1), + { + datetime(2008, 1, 1): datetime(2007, 10, 31), + datetime(2008, 1, 31): datetime(2007, 10, 31), + datetime(2008, 2, 15): datetime(2008, 1, 31), + datetime(2008, 2, 29): datetime(2008, 1, 31), + datetime(2008, 3, 15): datetime(2008, 1, 31), + datetime(2008, 3, 31): datetime(2008, 1, 31), + datetime(2008, 4, 15): datetime(2008, 1, 31), + datetime(2008, 4, 30): datetime(2008, 1, 31), + datetime(2008, 7, 1): datetime(2008, 4, 30), + }, + ) + ) + + offset_cases.append( + ( + QuarterEnd(startingMonth=1, n=2), + { + datetime(2008, 1, 31): datetime(2008, 7, 31), + datetime(2008, 2, 15): datetime(2008, 7, 31), + datetime(2008, 2, 29): datetime(2008, 7, 31), + datetime(2008, 3, 15): datetime(2008, 7, 31), + datetime(2008, 3, 31): datetime(2008, 7, 31), + datetime(2008, 4, 15): datetime(2008, 7, 31), + datetime(2008, 4, 30): datetime(2008, 10, 31), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (QuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), + (QuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False), + (QuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False), + (QuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), + (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False), + (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 31), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False), + (QuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), + (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 31), True), + (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False), + (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True), + (QuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), True), + (QuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 31), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), True), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_ticks.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_ticks.py new file mode 100644 index 0000000..7a174b8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_ticks.py @@ -0,0 +1,393 @@ +""" +Tests for offsets.Tick and subclasses +""" +from datetime import ( + datetime, + timedelta, +) + +from hypothesis import ( + assume, + example, + given, +) +import numpy as np +import pytest + +from pandas._libs.tslibs.offsets import delta_to_tick + +from pandas import ( + Timedelta, + Timestamp, +) +import pandas._testing as tm +from pandas._testing._hypothesis import INT_NEG_999_TO_POS_999 +from pandas.tests.tseries.offsets.common import assert_offset_equal + +from pandas.tseries import offsets +from pandas.tseries.offsets import ( + Hour, + Micro, + Milli, + Minute, + Nano, + Second, +) + +# --------------------------------------------------------------------- +# Test Helpers + +tick_classes = [Hour, Minute, Second, Milli, Micro, Nano] + + +# --------------------------------------------------------------------- + + +def test_apply_ticks(): + result = offsets.Hour(3)._apply(offsets.Hour(4)) + exp = offsets.Hour(7) + assert result == exp + + +def test_delta_to_tick(): + delta = timedelta(3) + + tick = delta_to_tick(delta) + assert tick == offsets.Day(3) + + td = Timedelta(nanoseconds=5) + tick = delta_to_tick(td) + assert tick == Nano(5) + + +@pytest.mark.parametrize("cls", tick_classes) +@example(n=2, m=3) +@example(n=800, m=300) +@example(n=1000, m=5) +@given(n=INT_NEG_999_TO_POS_999, m=INT_NEG_999_TO_POS_999) +def test_tick_add_sub(cls, n, m): + # For all Tick subclasses and all integers n, m, we should have + # tick(n) + tick(m) == tick(n+m) + # tick(n) - tick(m) == tick(n-m) + left = cls(n) + right = cls(m) + expected = cls(n + m) + + assert left + right == expected + assert left._apply(right) == expected + + expected = cls(n - m) + assert left - right == expected + + +@pytest.mark.arm_slow +@pytest.mark.parametrize("cls", tick_classes) +@example(n=2, m=3) +@given(n=INT_NEG_999_TO_POS_999, m=INT_NEG_999_TO_POS_999) +def test_tick_equality(cls, n, m): + assume(m != n) + # tick == tock iff tick.n == tock.n + left = cls(n) + right = cls(m) + assert left != right + assert not (left == right) + + right = cls(n) + assert left == right + assert not (left != right) + + if n != 0: + assert cls(n) != cls(-n) + + +# --------------------------------------------------------------------- + + +def test_Hour(): + assert_offset_equal(Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 1)) + assert_offset_equal(Hour(-1), datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) + assert_offset_equal(2 * Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 2)) + assert_offset_equal(-1 * Hour(), datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) + + assert Hour(3) + Hour(2) == Hour(5) + assert Hour(3) - Hour(2) == Hour() + + assert Hour(4) != Hour(1) + + +def test_Minute(): + assert_offset_equal(Minute(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 1)) + assert_offset_equal(Minute(-1), datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1)) + assert_offset_equal(2 * Minute(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 2)) + assert_offset_equal(-1 * Minute(), datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1)) + + assert Minute(3) + Minute(2) == Minute(5) + assert Minute(3) - Minute(2) == Minute() + assert Minute(5) != Minute() + + +def test_Second(): + assert_offset_equal(Second(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 1)) + assert_offset_equal(Second(-1), datetime(2010, 1, 1, 0, 0, 1), datetime(2010, 1, 1)) + assert_offset_equal( + 2 * Second(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 2) + ) + assert_offset_equal( + -1 * Second(), datetime(2010, 1, 1, 0, 0, 1), datetime(2010, 1, 1) + ) + + assert Second(3) + Second(2) == Second(5) + assert Second(3) - Second(2) == Second() + + +def test_Millisecond(): + assert_offset_equal( + Milli(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 0, 1000) + ) + assert_offset_equal( + Milli(-1), datetime(2010, 1, 1, 0, 0, 0, 1000), datetime(2010, 1, 1) + ) + assert_offset_equal( + Milli(2), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 0, 2000) + ) + assert_offset_equal( + 2 * Milli(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 0, 2000) + ) + assert_offset_equal( + -1 * Milli(), datetime(2010, 1, 1, 0, 0, 0, 1000), datetime(2010, 1, 1) + ) + + assert Milli(3) + Milli(2) == Milli(5) + assert Milli(3) - Milli(2) == Milli() + + +def test_MillisecondTimestampArithmetic(): + assert_offset_equal( + Milli(), Timestamp("2010-01-01"), Timestamp("2010-01-01 00:00:00.001") + ) + assert_offset_equal( + Milli(-1), Timestamp("2010-01-01 00:00:00.001"), Timestamp("2010-01-01") + ) + + +def test_Microsecond(): + assert_offset_equal(Micro(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 0, 1)) + assert_offset_equal( + Micro(-1), datetime(2010, 1, 1, 0, 0, 0, 1), datetime(2010, 1, 1) + ) + + assert_offset_equal( + 2 * Micro(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 0, 2) + ) + assert_offset_equal( + -1 * Micro(), datetime(2010, 1, 1, 0, 0, 0, 1), datetime(2010, 1, 1) + ) + + assert Micro(3) + Micro(2) == Micro(5) + assert Micro(3) - Micro(2) == Micro() + + +def test_NanosecondGeneric(): + timestamp = Timestamp(datetime(2010, 1, 1)) + assert timestamp.nanosecond == 0 + + result = timestamp + Nano(10) + assert result.nanosecond == 10 + + reverse_result = Nano(10) + timestamp + assert reverse_result.nanosecond == 10 + + +def test_Nanosecond(): + timestamp = Timestamp(datetime(2010, 1, 1)) + assert_offset_equal(Nano(), timestamp, timestamp + np.timedelta64(1, "ns")) + assert_offset_equal(Nano(-1), timestamp + np.timedelta64(1, "ns"), timestamp) + assert_offset_equal(2 * Nano(), timestamp, timestamp + np.timedelta64(2, "ns")) + assert_offset_equal(-1 * Nano(), timestamp + np.timedelta64(1, "ns"), timestamp) + + assert Nano(3) + Nano(2) == Nano(5) + assert Nano(3) - Nano(2) == Nano() + + # GH9284 + assert Nano(1) + Nano(10) == Nano(11) + assert Nano(5) + Micro(1) == Nano(1005) + assert Micro(5) + Nano(1) == Nano(5001) + + +@pytest.mark.parametrize( + "kls, expected", + [ + (Hour, Timedelta(hours=5)), + (Minute, Timedelta(hours=2, minutes=3)), + (Second, Timedelta(hours=2, seconds=3)), + (Milli, Timedelta(hours=2, milliseconds=3)), + (Micro, Timedelta(hours=2, microseconds=3)), + (Nano, Timedelta(hours=2, nanoseconds=3)), + ], +) +def test_tick_addition(kls, expected): + offset = kls(3) + td = Timedelta(hours=2) + + for other in [td, td.to_pytimedelta(), td.to_timedelta64()]: + result = offset + other + assert isinstance(result, Timedelta) + assert result == expected + + result = other + offset + assert isinstance(result, Timedelta) + assert result == expected + + +@pytest.mark.parametrize("cls", tick_classes) +def test_tick_division(cls): + off = cls(10) + + assert off / cls(5) == 2 + assert off / 2 == cls(5) + assert off / 2.0 == cls(5) + + assert off / off.delta == 1 + assert off / off.delta.to_timedelta64() == 1 + + assert off / Nano(1) == off.delta / Nano(1).delta + + if cls is not Nano: + # A case where we end up with a smaller class + result = off / 1000 + assert isinstance(result, offsets.Tick) + assert not isinstance(result, cls) + assert result.delta == off.delta / 1000 + + if cls._nanos_inc < Timedelta(seconds=1).value: + # Case where we end up with a bigger class + result = off / 0.001 + assert isinstance(result, offsets.Tick) + assert not isinstance(result, cls) + assert result.delta == off.delta / 0.001 + + +def test_tick_mul_float(): + off = Micro(2) + + # Case where we retain type + result = off * 1.5 + expected = Micro(3) + assert result == expected + assert isinstance(result, Micro) + + # Case where we bump up to the next type + result = off * 1.25 + expected = Nano(2500) + assert result == expected + assert isinstance(result, Nano) + + +@pytest.mark.parametrize("cls", tick_classes) +def test_tick_rdiv(cls): + off = cls(10) + delta = off.delta + td64 = delta.to_timedelta64() + instance__type = ".".join([cls.__module__, cls.__name__]) + msg = ( + "unsupported operand type\\(s\\) for \\/: 'int'|'float' and " + f"'{instance__type}'" + ) + + with pytest.raises(TypeError, match=msg): + 2 / off + with pytest.raises(TypeError, match=msg): + 2.0 / off + + assert (td64 * 2.5) / off == 2.5 + + if cls is not Nano: + # skip pytimedelta for Nano since it gets dropped + assert (delta.to_pytimedelta() * 2) / off == 2 + + result = np.array([2 * td64, td64]) / off + expected = np.array([2.0, 1.0]) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("cls1", tick_classes) +@pytest.mark.parametrize("cls2", tick_classes) +def test_tick_zero(cls1, cls2): + assert cls1(0) == cls2(0) + assert cls1(0) + cls2(0) == cls1(0) + + if cls1 is not Nano: + assert cls1(2) + cls2(0) == cls1(2) + + if cls1 is Nano: + assert cls1(2) + Nano(0) == cls1(2) + + +@pytest.mark.parametrize("cls", tick_classes) +def test_tick_equalities(cls): + assert cls() == cls(1) + + +@pytest.mark.parametrize("cls", tick_classes) +def test_tick_offset(cls): + assert not cls().is_anchored() + + +@pytest.mark.parametrize("cls", tick_classes) +def test_compare_ticks(cls): + three = cls(3) + four = cls(4) + + assert three < cls(4) + assert cls(3) < four + assert four > cls(3) + assert cls(4) > three + assert cls(3) == cls(3) + assert cls(3) != cls(4) + + +@pytest.mark.parametrize("cls", tick_classes) +def test_compare_ticks_to_strs(cls): + # GH#23524 + off = cls(19) + + # These tests should work with any strings, but we particularly are + # interested in "infer" as that comparison is convenient to make in + # Datetime/Timedelta Array/Index constructors + assert not off == "infer" + assert not "foo" == off + + instance_type = ".".join([cls.__module__, cls.__name__]) + msg = ( + "'<'|'<='|'>'|'>=' not supported between instances of " + f"'str' and '{instance_type}'|'{instance_type}' and 'str'" + ) + + for left, right in [("infer", off), (off, "infer")]: + with pytest.raises(TypeError, match=msg): + left < right + with pytest.raises(TypeError, match=msg): + left <= right + with pytest.raises(TypeError, match=msg): + left > right + with pytest.raises(TypeError, match=msg): + left >= right + + +@pytest.mark.parametrize("cls", tick_classes) +def test_compare_ticks_to_timedeltalike(cls): + off = cls(19) + + td = off.delta + + others = [td, td.to_timedelta64()] + if cls is not Nano: + others.append(td.to_pytimedelta()) + + for other in others: + assert off == other + assert not off != other + assert not off < other + assert not off > other + assert off <= other + assert off >= other diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_week.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_week.py new file mode 100644 index 0000000..be574fd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_week.py @@ -0,0 +1,363 @@ +""" +Tests for the following offsets: +- Week +- WeekOfMonth +- LastWeekOfMonth +""" +from datetime import ( + datetime, + timedelta, +) + +import pytest + +from pandas._libs.tslibs import Timestamp +from pandas._libs.tslibs.offsets import ( + Day, + LastWeekOfMonth, + Week, + WeekOfMonth, +) + +from pandas.tests.tseries.offsets.common import ( + Base, + WeekDay, + assert_is_on_offset, + assert_offset_equal, +) + + +class TestWeek(Base): + _offset = Week + d = Timestamp(datetime(2008, 1, 2)) + offset1 = _offset() + offset2 = _offset(2) + + def test_repr(self): + assert repr(Week(weekday=0)) == "" + assert repr(Week(n=-1, weekday=0)) == "<-1 * Week: weekday=0>" + assert repr(Week(n=-2, weekday=0)) == "<-2 * Weeks: weekday=0>" + + def test_corner(self): + with pytest.raises(ValueError, match="Day must be"): + Week(weekday=7) + + with pytest.raises(ValueError, match="Day must be"): + Week(weekday=-1) + + def test_is_anchored(self): + assert Week(weekday=0).is_anchored() + assert not Week().is_anchored() + assert not Week(2, weekday=2).is_anchored() + assert not Week(2).is_anchored() + + offset_cases = [] + # not business week + offset_cases.append( + ( + Week(), + { + datetime(2008, 1, 1): datetime(2008, 1, 8), + datetime(2008, 1, 4): datetime(2008, 1, 11), + datetime(2008, 1, 5): datetime(2008, 1, 12), + datetime(2008, 1, 6): datetime(2008, 1, 13), + datetime(2008, 1, 7): datetime(2008, 1, 14), + }, + ) + ) + + # Mon + offset_cases.append( + ( + Week(weekday=0), + { + datetime(2007, 12, 31): datetime(2008, 1, 7), + datetime(2008, 1, 4): datetime(2008, 1, 7), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 14), + }, + ) + ) + + # n=0 -> roll forward. Mon + offset_cases.append( + ( + Week(0, weekday=0), + { + datetime(2007, 12, 31): datetime(2007, 12, 31), + datetime(2008, 1, 4): datetime(2008, 1, 7), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 7), + }, + ) + ) + + # n=0 -> roll forward. Mon + offset_cases.append( + ( + Week(-2, weekday=1), + { + datetime(2010, 4, 6): datetime(2010, 3, 23), + datetime(2010, 4, 8): datetime(2010, 3, 30), + datetime(2010, 4, 5): datetime(2010, 3, 23), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + @pytest.mark.parametrize("weekday", range(7)) + def test_is_on_offset(self, weekday): + offset = Week(weekday=weekday) + + for day in range(1, 8): + date = datetime(2008, 1, day) + + if day % 7 == weekday: + expected = True + else: + expected = False + assert_is_on_offset(offset, date, expected) + + @pytest.mark.parametrize( + "n,date", + [ + (2, "1862-01-13 09:03:34.873477378+0210"), + (-2, "1856-10-24 16:18:36.556360110-0717"), + ], + ) + def test_is_on_offset_weekday_none(self, n, date): + # GH 18510 Week with weekday = None, normalize = False + # should always be is_on_offset + offset = Week(n=n, weekday=None) + ts = Timestamp(date, tz="Africa/Lusaka") + fast = offset.is_on_offset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + def test_week_add_invalid(self): + # Week with weekday should raise TypeError and _not_ AttributeError + # when adding invalid offset + offset = Week(weekday=1) + other = Day() + with pytest.raises(TypeError, match="Cannot add"): + offset + other + + +class TestWeekOfMonth(Base): + _offset = WeekOfMonth + offset1 = _offset() + offset2 = _offset(2) + + def test_constructor(self): + with pytest.raises(ValueError, match="^Week"): + WeekOfMonth(n=1, week=4, weekday=0) + + with pytest.raises(ValueError, match="^Week"): + WeekOfMonth(n=1, week=-1, weekday=0) + + with pytest.raises(ValueError, match="^Day"): + WeekOfMonth(n=1, week=0, weekday=-1) + + with pytest.raises(ValueError, match="^Day"): + WeekOfMonth(n=1, week=0, weekday=-7) + + def test_repr(self): + assert ( + repr(WeekOfMonth(weekday=1, week=2)) == "" + ) + + def test_offset(self): + date1 = datetime(2011, 1, 4) # 1st Tuesday of Month + date2 = datetime(2011, 1, 11) # 2nd Tuesday of Month + date3 = datetime(2011, 1, 18) # 3rd Tuesday of Month + date4 = datetime(2011, 1, 25) # 4th Tuesday of Month + + # see for loop for structure + test_cases = [ + (-2, 2, 1, date1, datetime(2010, 11, 16)), + (-2, 2, 1, date2, datetime(2010, 11, 16)), + (-2, 2, 1, date3, datetime(2010, 11, 16)), + (-2, 2, 1, date4, datetime(2010, 12, 21)), + (-1, 2, 1, date1, datetime(2010, 12, 21)), + (-1, 2, 1, date2, datetime(2010, 12, 21)), + (-1, 2, 1, date3, datetime(2010, 12, 21)), + (-1, 2, 1, date4, datetime(2011, 1, 18)), + (0, 0, 1, date1, datetime(2011, 1, 4)), + (0, 0, 1, date2, datetime(2011, 2, 1)), + (0, 0, 1, date3, datetime(2011, 2, 1)), + (0, 0, 1, date4, datetime(2011, 2, 1)), + (0, 1, 1, date1, datetime(2011, 1, 11)), + (0, 1, 1, date2, datetime(2011, 1, 11)), + (0, 1, 1, date3, datetime(2011, 2, 8)), + (0, 1, 1, date4, datetime(2011, 2, 8)), + (0, 0, 1, date1, datetime(2011, 1, 4)), + (0, 1, 1, date2, datetime(2011, 1, 11)), + (0, 2, 1, date3, datetime(2011, 1, 18)), + (0, 3, 1, date4, datetime(2011, 1, 25)), + (1, 0, 0, date1, datetime(2011, 2, 7)), + (1, 0, 0, date2, datetime(2011, 2, 7)), + (1, 0, 0, date3, datetime(2011, 2, 7)), + (1, 0, 0, date4, datetime(2011, 2, 7)), + (1, 0, 1, date1, datetime(2011, 2, 1)), + (1, 0, 1, date2, datetime(2011, 2, 1)), + (1, 0, 1, date3, datetime(2011, 2, 1)), + (1, 0, 1, date4, datetime(2011, 2, 1)), + (1, 0, 2, date1, datetime(2011, 1, 5)), + (1, 0, 2, date2, datetime(2011, 2, 2)), + (1, 0, 2, date3, datetime(2011, 2, 2)), + (1, 0, 2, date4, datetime(2011, 2, 2)), + (1, 2, 1, date1, datetime(2011, 1, 18)), + (1, 2, 1, date2, datetime(2011, 1, 18)), + (1, 2, 1, date3, datetime(2011, 2, 15)), + (1, 2, 1, date4, datetime(2011, 2, 15)), + (2, 2, 1, date1, datetime(2011, 2, 15)), + (2, 2, 1, date2, datetime(2011, 2, 15)), + (2, 2, 1, date3, datetime(2011, 3, 15)), + (2, 2, 1, date4, datetime(2011, 3, 15)), + ] + + for n, week, weekday, dt, expected in test_cases: + offset = WeekOfMonth(n, week=week, weekday=weekday) + assert_offset_equal(offset, dt, expected) + + # try subtracting + result = datetime(2011, 2, 1) - WeekOfMonth(week=1, weekday=2) + assert result == datetime(2011, 1, 12) + + result = datetime(2011, 2, 3) - WeekOfMonth(week=0, weekday=2) + assert result == datetime(2011, 2, 2) + + on_offset_cases = [ + (0, 0, datetime(2011, 2, 7), True), + (0, 0, datetime(2011, 2, 6), False), + (0, 0, datetime(2011, 2, 14), False), + (1, 0, datetime(2011, 2, 14), True), + (0, 1, datetime(2011, 2, 1), True), + (0, 1, datetime(2011, 2, 8), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + week, weekday, dt, expected = case + offset = WeekOfMonth(week=week, weekday=weekday) + assert offset.is_on_offset(dt) == expected + + @pytest.mark.parametrize( + "n,week,date,tz", + [ + (2, 2, "1916-05-15 01:14:49.583410462+0422", "Asia/Qyzylorda"), + (-3, 1, "1980-12-08 03:38:52.878321185+0500", "Asia/Oral"), + ], + ) + def test_is_on_offset_nanoseconds(self, n, week, date, tz): + # GH 18864 + # Make sure that nanoseconds don't trip up is_on_offset (and with it apply) + offset = WeekOfMonth(n=n, week=week, weekday=0) + ts = Timestamp(date, tz=tz) + fast = offset.is_on_offset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + +class TestLastWeekOfMonth(Base): + _offset = LastWeekOfMonth + offset1 = _offset() + offset2 = _offset(2) + + def test_constructor(self): + with pytest.raises(ValueError, match="^N cannot be 0"): + LastWeekOfMonth(n=0, weekday=1) + + with pytest.raises(ValueError, match="^Day"): + LastWeekOfMonth(n=1, weekday=-1) + + with pytest.raises(ValueError, match="^Day"): + LastWeekOfMonth(n=1, weekday=7) + + def test_offset(self): + # Saturday + last_sat = datetime(2013, 8, 31) + next_sat = datetime(2013, 9, 28) + offset_sat = LastWeekOfMonth(n=1, weekday=5) + + one_day_before = last_sat + timedelta(days=-1) + assert one_day_before + offset_sat == last_sat + + one_day_after = last_sat + timedelta(days=+1) + assert one_day_after + offset_sat == next_sat + + # Test On that day + assert last_sat + offset_sat == next_sat + + # Thursday + + offset_thur = LastWeekOfMonth(n=1, weekday=3) + last_thurs = datetime(2013, 1, 31) + next_thurs = datetime(2013, 2, 28) + + one_day_before = last_thurs + timedelta(days=-1) + assert one_day_before + offset_thur == last_thurs + + one_day_after = last_thurs + timedelta(days=+1) + assert one_day_after + offset_thur == next_thurs + + # Test on that day + assert last_thurs + offset_thur == next_thurs + + three_before = last_thurs + timedelta(days=-3) + assert three_before + offset_thur == last_thurs + + two_after = last_thurs + timedelta(days=+2) + assert two_after + offset_thur == next_thurs + + offset_sunday = LastWeekOfMonth(n=1, weekday=WeekDay.SUN) + assert datetime(2013, 7, 31) + offset_sunday == datetime(2013, 8, 25) + + on_offset_cases = [ + (WeekDay.SUN, datetime(2013, 1, 27), True), + (WeekDay.SAT, datetime(2013, 3, 30), True), + (WeekDay.MON, datetime(2013, 2, 18), False), # Not the last Mon + (WeekDay.SUN, datetime(2013, 2, 25), False), # Not a SUN + (WeekDay.MON, datetime(2013, 2, 25), True), + (WeekDay.SAT, datetime(2013, 11, 30), True), + (WeekDay.SAT, datetime(2006, 8, 26), True), + (WeekDay.SAT, datetime(2007, 8, 25), True), + (WeekDay.SAT, datetime(2008, 8, 30), True), + (WeekDay.SAT, datetime(2009, 8, 29), True), + (WeekDay.SAT, datetime(2010, 8, 28), True), + (WeekDay.SAT, datetime(2011, 8, 27), True), + (WeekDay.SAT, datetime(2019, 8, 31), True), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + weekday, dt, expected = case + offset = LastWeekOfMonth(weekday=weekday) + assert offset.is_on_offset(dt) == expected + + @pytest.mark.parametrize( + "n,weekday,date,tz", + [ + (4, 6, "1917-05-27 20:55:27.084284178+0200", "Europe/Warsaw"), + (-4, 5, "2005-08-27 05:01:42.799392561-0500", "America/Rainy_River"), + ], + ) + def test_last_week_of_month_on_offset(self, n, weekday, date, tz): + # GH 19036, GH 18977 _adjust_dst was incorrect for LastWeekOfMonth + offset = LastWeekOfMonth(n=n, weekday=weekday) + ts = Timestamp(date, tz=tz) + slow = (ts + offset) - offset == ts + fast = offset.is_on_offset(ts) + assert fast == slow + + def test_repr(self): + assert ( + repr(LastWeekOfMonth(n=2, weekday=1)) == "<2 * LastWeekOfMonths: weekday=1>" + ) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_year.py b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_year.py new file mode 100644 index 0000000..85994ad --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tseries/offsets/test_year.py @@ -0,0 +1,322 @@ +""" +Tests for the following offsets: +- YearBegin +- YearEnd +""" +from datetime import datetime + +import pytest + +from pandas.tests.tseries.offsets.common import ( + Base, + assert_is_on_offset, + assert_offset_equal, +) + +from pandas.tseries.offsets import ( + YearBegin, + YearEnd, +) + + +class TestYearBegin(Base): + _offset = YearBegin + + def test_misspecified(self): + with pytest.raises(ValueError, match="Month must go from 1 to 12"): + YearBegin(month=13) + + offset_cases = [] + offset_cases.append( + ( + YearBegin(), + { + datetime(2008, 1, 1): datetime(2009, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2005, 12, 30): datetime(2006, 1, 1), + datetime(2005, 12, 31): datetime(2006, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2005, 12, 30): datetime(2006, 1, 1), + datetime(2005, 12, 31): datetime(2006, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(3), + { + datetime(2008, 1, 1): datetime(2011, 1, 1), + datetime(2008, 6, 30): datetime(2011, 1, 1), + datetime(2008, 12, 31): datetime(2011, 1, 1), + datetime(2005, 12, 30): datetime(2008, 1, 1), + datetime(2005, 12, 31): datetime(2008, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(-1), + { + datetime(2007, 1, 1): datetime(2006, 1, 1), + datetime(2007, 1, 15): datetime(2007, 1, 1), + datetime(2008, 6, 30): datetime(2008, 1, 1), + datetime(2008, 12, 31): datetime(2008, 1, 1), + datetime(2006, 12, 29): datetime(2006, 1, 1), + datetime(2006, 12, 30): datetime(2006, 1, 1), + datetime(2007, 1, 1): datetime(2006, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(-2), + { + datetime(2007, 1, 1): datetime(2005, 1, 1), + datetime(2008, 6, 30): datetime(2007, 1, 1), + datetime(2008, 12, 31): datetime(2007, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(month=4), + { + datetime(2007, 4, 1): datetime(2008, 4, 1), + datetime(2007, 4, 15): datetime(2008, 4, 1), + datetime(2007, 3, 1): datetime(2007, 4, 1), + datetime(2007, 12, 15): datetime(2008, 4, 1), + datetime(2012, 1, 31): datetime(2012, 4, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(0, month=4), + { + datetime(2007, 4, 1): datetime(2007, 4, 1), + datetime(2007, 3, 1): datetime(2007, 4, 1), + datetime(2007, 12, 15): datetime(2008, 4, 1), + datetime(2012, 1, 31): datetime(2012, 4, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(4, month=4), + { + datetime(2007, 4, 1): datetime(2011, 4, 1), + datetime(2007, 4, 15): datetime(2011, 4, 1), + datetime(2007, 3, 1): datetime(2010, 4, 1), + datetime(2007, 12, 15): datetime(2011, 4, 1), + datetime(2012, 1, 31): datetime(2015, 4, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(-1, month=4), + { + datetime(2007, 4, 1): datetime(2006, 4, 1), + datetime(2007, 3, 1): datetime(2006, 4, 1), + datetime(2007, 12, 15): datetime(2007, 4, 1), + datetime(2012, 1, 31): datetime(2011, 4, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(-3, month=4), + { + datetime(2007, 4, 1): datetime(2004, 4, 1), + datetime(2007, 3, 1): datetime(2004, 4, 1), + datetime(2007, 12, 15): datetime(2005, 4, 1), + datetime(2012, 1, 31): datetime(2009, 4, 1), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (YearBegin(), datetime(2007, 1, 3), False), + (YearBegin(), datetime(2008, 1, 1), True), + (YearBegin(), datetime(2006, 12, 31), False), + (YearBegin(), datetime(2006, 1, 2), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + +class TestYearEnd(Base): + _offset = YearEnd + + def test_misspecified(self): + with pytest.raises(ValueError, match="Month must go from 1 to 12"): + YearEnd(month=13) + + offset_cases = [] + offset_cases.append( + ( + YearEnd(), + { + datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2009, 12, 31), + datetime(2005, 12, 30): datetime(2005, 12, 31), + datetime(2005, 12, 31): datetime(2006, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + YearEnd(0), + { + datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2008, 12, 31), + datetime(2005, 12, 30): datetime(2005, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + YearEnd(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 31), + datetime(2008, 6, 30): datetime(2007, 12, 31), + datetime(2008, 12, 31): datetime(2007, 12, 31), + datetime(2006, 12, 29): datetime(2005, 12, 31), + datetime(2006, 12, 30): datetime(2005, 12, 31), + datetime(2007, 1, 1): datetime(2006, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + YearEnd(-2), + { + datetime(2007, 1, 1): datetime(2005, 12, 31), + datetime(2008, 6, 30): datetime(2006, 12, 31), + datetime(2008, 12, 31): datetime(2006, 12, 31), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (YearEnd(), datetime(2007, 12, 31), True), + (YearEnd(), datetime(2008, 1, 1), False), + (YearEnd(), datetime(2006, 12, 31), True), + (YearEnd(), datetime(2006, 12, 29), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + +class TestYearEndDiffMonth(Base): + offset_cases = [] + offset_cases.append( + ( + YearEnd(month=3), + { + datetime(2008, 1, 1): datetime(2008, 3, 31), + datetime(2008, 2, 15): datetime(2008, 3, 31), + datetime(2008, 3, 31): datetime(2009, 3, 31), + datetime(2008, 3, 30): datetime(2008, 3, 31), + datetime(2005, 3, 31): datetime(2006, 3, 31), + datetime(2006, 7, 30): datetime(2007, 3, 31), + }, + ) + ) + + offset_cases.append( + ( + YearEnd(0, month=3), + { + datetime(2008, 1, 1): datetime(2008, 3, 31), + datetime(2008, 2, 28): datetime(2008, 3, 31), + datetime(2008, 3, 31): datetime(2008, 3, 31), + datetime(2005, 3, 30): datetime(2005, 3, 31), + }, + ) + ) + + offset_cases.append( + ( + YearEnd(-1, month=3), + { + datetime(2007, 1, 1): datetime(2006, 3, 31), + datetime(2008, 2, 28): datetime(2007, 3, 31), + datetime(2008, 3, 31): datetime(2007, 3, 31), + datetime(2006, 3, 29): datetime(2005, 3, 31), + datetime(2006, 3, 30): datetime(2005, 3, 31), + datetime(2007, 3, 1): datetime(2006, 3, 31), + }, + ) + ) + + offset_cases.append( + ( + YearEnd(-2, month=3), + { + datetime(2007, 1, 1): datetime(2005, 3, 31), + datetime(2008, 6, 30): datetime(2007, 3, 31), + datetime(2008, 3, 31): datetime(2006, 3, 31), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (YearEnd(month=3), datetime(2007, 3, 31), True), + (YearEnd(month=3), datetime(2008, 1, 1), False), + (YearEnd(month=3), datetime(2006, 3, 31), True), + (YearEnd(month=3), datetime(2006, 3, 29), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..54e9c31 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_api.cpython-38.pyc new file mode 100644 index 0000000..3863e2f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_array_to_datetime.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_array_to_datetime.cpython-38.pyc new file mode 100644 index 0000000..80a9153 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_array_to_datetime.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_ccalendar.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_ccalendar.cpython-38.pyc new file mode 100644 index 0000000..e447a8c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_ccalendar.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_conversion.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_conversion.cpython-38.pyc new file mode 100644 index 0000000..44b89c3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_conversion.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_fields.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_fields.cpython-38.pyc new file mode 100644 index 0000000..ec11c7b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_fields.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_libfrequencies.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_libfrequencies.cpython-38.pyc new file mode 100644 index 0000000..456cad9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_libfrequencies.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_liboffsets.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_liboffsets.cpython-38.pyc new file mode 100644 index 0000000..4e3333e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_liboffsets.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_parse_iso8601.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_parse_iso8601.cpython-38.pyc new file mode 100644 index 0000000..596fab8 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_parse_iso8601.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_parsing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_parsing.cpython-38.pyc new file mode 100644 index 0000000..d63b501 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_parsing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_period_asfreq.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_period_asfreq.cpython-38.pyc new file mode 100644 index 0000000..768ca7c Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_period_asfreq.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_timedeltas.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_timedeltas.cpython-38.pyc new file mode 100644 index 0000000..4e3f4ed Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_timedeltas.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_timezones.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_timezones.cpython-38.pyc new file mode 100644 index 0000000..0658c68 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_timezones.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_to_offset.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_to_offset.cpython-38.pyc new file mode 100644 index 0000000..ac150f0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/__pycache__/test_to_offset.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_api.py b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_api.py new file mode 100644 index 0000000..d7abb19 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_api.py @@ -0,0 +1,56 @@ +"""Tests that the tslibs API is locked down""" + +from pandas._libs import tslibs + + +def test_namespace(): + + submodules = [ + "base", + "ccalendar", + "conversion", + "dtypes", + "fields", + "nattype", + "np_datetime", + "offsets", + "parsing", + "period", + "strptime", + "vectorized", + "timedeltas", + "timestamps", + "timezones", + "tzconversion", + ] + + api = [ + "BaseOffset", + "NaT", + "NaTType", + "iNaT", + "nat_strings", + "OutOfBoundsDatetime", + "OutOfBoundsTimedelta", + "Period", + "IncompatibleFrequency", + "Resolution", + "Tick", + "Timedelta", + "dt64arr_to_periodarr", + "Timestamp", + "is_date_array_normalized", + "ints_to_pydatetime", + "normalize_i8_timestamps", + "get_resolution", + "delta_to_nanoseconds", + "ints_to_pytimedelta", + "localize_pydatetime", + "tz_convert_from_utc_single", + "to_offset", + "tz_compare", + ] + + expected = set(submodules + api) + names = [x for x in dir(tslibs) if not x.startswith("__")] + assert set(names) == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_array_to_datetime.py b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_array_to_datetime.py new file mode 100644 index 0000000..a0fafc2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_array_to_datetime.py @@ -0,0 +1,200 @@ +from datetime import ( + date, + datetime, +) + +from dateutil.tz.tz import tzoffset +import numpy as np +import pytest +import pytz + +from pandas._libs import ( + iNaT, + tslib, +) + +from pandas import Timestamp +import pandas._testing as tm + + +@pytest.mark.parametrize( + "data,expected", + [ + ( + ["01-01-2013", "01-02-2013"], + [ + "2013-01-01T00:00:00.000000000", + "2013-01-02T00:00:00.000000000", + ], + ), + ( + ["Mon Sep 16 2013", "Tue Sep 17 2013"], + [ + "2013-09-16T00:00:00.000000000", + "2013-09-17T00:00:00.000000000", + ], + ), + ], +) +def test_parsing_valid_dates(data, expected): + arr = np.array(data, dtype=object) + result, _ = tslib.array_to_datetime(arr) + + expected = np.array(expected, dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "dt_string, expected_tz", + [ + ["01-01-2013 08:00:00+08:00", 480], + ["2013-01-01T08:00:00.000000000+0800", 480], + ["2012-12-31T16:00:00.000000000-0800", -480], + ["12-31-2012 23:00:00-01:00", -60], + ], +) +def test_parsing_timezone_offsets(dt_string, expected_tz): + # All of these datetime strings with offsets are equivalent + # to the same datetime after the timezone offset is added. + arr = np.array(["01-01-2013 00:00:00"], dtype=object) + expected, _ = tslib.array_to_datetime(arr) + + arr = np.array([dt_string], dtype=object) + result, result_tz = tslib.array_to_datetime(arr) + + tm.assert_numpy_array_equal(result, expected) + assert result_tz is pytz.FixedOffset(expected_tz) + + +def test_parsing_non_iso_timezone_offset(): + dt_string = "01-01-2013T00:00:00.000000000+0000" + arr = np.array([dt_string], dtype=object) + + result, result_tz = tslib.array_to_datetime(arr) + expected = np.array([np.datetime64("2013-01-01 00:00:00.000000000")]) + + tm.assert_numpy_array_equal(result, expected) + assert result_tz is pytz.FixedOffset(0) + + +def test_parsing_different_timezone_offsets(): + # see gh-17697 + data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] + data = np.array(data, dtype=object) + + result, result_tz = tslib.array_to_datetime(data) + expected = np.array( + [ + datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), + datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 23400)), + ], + dtype=object, + ) + + tm.assert_numpy_array_equal(result, expected) + assert result_tz is None + + +@pytest.mark.parametrize( + "data", [["-352.737091", "183.575577"], ["1", "2", "3", "4", "5"]] +) +def test_number_looking_strings_not_into_datetime(data): + # see gh-4601 + # + # These strings don't look like datetimes, so + # they shouldn't be attempted to be converted. + arr = np.array(data, dtype=object) + result, _ = tslib.array_to_datetime(arr, errors="ignore") + + tm.assert_numpy_array_equal(result, arr) + + +@pytest.mark.parametrize( + "invalid_date", + [ + date(1000, 1, 1), + datetime(1000, 1, 1), + "1000-01-01", + "Jan 1, 1000", + np.datetime64("1000-01-01"), + ], +) +@pytest.mark.parametrize("errors", ["coerce", "raise"]) +def test_coerce_outside_ns_bounds(invalid_date, errors): + arr = np.array([invalid_date], dtype="object") + kwargs = {"values": arr, "errors": errors} + + if errors == "raise": + msg = "Out of bounds nanosecond timestamp" + + with pytest.raises(ValueError, match=msg): + tslib.array_to_datetime(**kwargs) + else: # coerce. + result, _ = tslib.array_to_datetime(**kwargs) + expected = np.array([iNaT], dtype="M8[ns]") + + tm.assert_numpy_array_equal(result, expected) + + +def test_coerce_outside_ns_bounds_one_valid(): + arr = np.array(["1/1/1000", "1/1/2000"], dtype=object) + result, _ = tslib.array_to_datetime(arr, errors="coerce") + + expected = [iNaT, "2000-01-01T00:00:00.000000000"] + expected = np.array(expected, dtype="M8[ns]") + + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("errors", ["ignore", "coerce"]) +def test_coerce_of_invalid_datetimes(errors): + arr = np.array(["01-01-2013", "not_a_date", "1"], dtype=object) + kwargs = {"values": arr, "errors": errors} + + if errors == "ignore": + # Without coercing, the presence of any invalid + # dates prevents any values from being converted. + result, _ = tslib.array_to_datetime(**kwargs) + tm.assert_numpy_array_equal(result, arr) + else: # coerce. + # With coercing, the invalid dates becomes iNaT + result, _ = tslib.array_to_datetime(arr, errors="coerce") + expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT] + + tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[ns]")) + + +def test_to_datetime_barely_out_of_bounds(): + # see gh-19382, gh-19529 + # + # Close enough to bounds that dropping nanos + # would result in an in-bounds datetime. + arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object) + msg = "Out of bounds nanosecond timestamp: 2262-04-11 23:47:16" + + with pytest.raises(tslib.OutOfBoundsDatetime, match=msg): + tslib.array_to_datetime(arr) + + +class SubDatetime(datetime): + pass + + +@pytest.mark.parametrize( + "data,expected", + [ + ([SubDatetime(2000, 1, 1)], ["2000-01-01T00:00:00.000000000"]), + ([datetime(2000, 1, 1)], ["2000-01-01T00:00:00.000000000"]), + ([Timestamp(2000, 1, 1)], ["2000-01-01T00:00:00.000000000"]), + ], +) +def test_datetime_subclass(data, expected): + # GH 25851 + # ensure that subclassed datetime works with + # array_to_datetime + + arr = np.array(data, dtype=object) + result, _ = tslib.array_to_datetime(arr) + + expected = np.array(expected, dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_ccalendar.py b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_ccalendar.py new file mode 100644 index 0000000..6a0d0a8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_ccalendar.py @@ -0,0 +1,63 @@ +from datetime import ( + date, + datetime, +) + +from hypothesis import given +import numpy as np +import pytest + +from pandas._libs.tslibs import ccalendar + +from pandas._testing._hypothesis import DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ + + +@pytest.mark.parametrize( + "date_tuple,expected", + [ + ((2001, 3, 1), 60), + ((2004, 3, 1), 61), + ((1907, 12, 31), 365), # End-of-year, non-leap year. + ((2004, 12, 31), 366), # End-of-year, leap year. + ], +) +def test_get_day_of_year_numeric(date_tuple, expected): + assert ccalendar.get_day_of_year(*date_tuple) == expected + + +def test_get_day_of_year_dt(): + dt = datetime.fromordinal(1 + np.random.randint(365 * 4000)) + result = ccalendar.get_day_of_year(dt.year, dt.month, dt.day) + + expected = (dt - dt.replace(month=1, day=1)).days + 1 + assert result == expected + + +@pytest.mark.parametrize( + "input_date_tuple, expected_iso_tuple", + [ + [(2020, 1, 1), (2020, 1, 3)], + [(2019, 12, 31), (2020, 1, 2)], + [(2019, 12, 30), (2020, 1, 1)], + [(2009, 12, 31), (2009, 53, 4)], + [(2010, 1, 1), (2009, 53, 5)], + [(2010, 1, 3), (2009, 53, 7)], + [(2010, 1, 4), (2010, 1, 1)], + [(2006, 1, 1), (2005, 52, 7)], + [(2005, 12, 31), (2005, 52, 6)], + [(2008, 12, 28), (2008, 52, 7)], + [(2008, 12, 29), (2009, 1, 1)], + ], +) +def test_dt_correct_iso_8601_year_week_and_day(input_date_tuple, expected_iso_tuple): + result = ccalendar.get_iso_calendar(*input_date_tuple) + expected_from_date_isocalendar = date(*input_date_tuple).isocalendar() + assert result == expected_from_date_isocalendar + assert result == expected_iso_tuple + + +@given(DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ) +def test_isocalendar(dt): + expected = dt.isocalendar() + result = ccalendar.get_iso_calendar(dt.year, dt.month, dt.day) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_conversion.py b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_conversion.py new file mode 100644 index 0000000..41eb7ae --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_conversion.py @@ -0,0 +1,141 @@ +from datetime import datetime + +import numpy as np +import pytest +from pytz import UTC + +from pandas._libs.tslibs import ( + OutOfBoundsTimedelta, + conversion, + iNaT, + timezones, + tzconversion, +) + +from pandas import ( + Timestamp, + date_range, +) +import pandas._testing as tm + + +def _compare_utc_to_local(tz_didx): + def f(x): + return tzconversion.tz_convert_from_utc_single(x, tz_didx.tz) + + result = tzconversion.tz_convert_from_utc(tz_didx.asi8, tz_didx.tz) + expected = np.vectorize(f)(tz_didx.asi8) + + tm.assert_numpy_array_equal(result, expected) + + +def _compare_local_to_utc(tz_didx, naive_didx): + # Check that tz_localize behaves the same vectorized and pointwise. + err1 = err2 = None + try: + result = tzconversion.tz_localize_to_utc(naive_didx.asi8, tz_didx.tz) + err1 = None + except Exception as err: + err1 = err + + try: + expected = naive_didx.map(lambda x: x.tz_localize(tz_didx.tz)).asi8 + except Exception as err: + err2 = err + + if err1 is not None: + assert type(err1) == type(err2) + else: + assert err2 is None + tm.assert_numpy_array_equal(result, expected) + + +def test_tz_convert_single_matches_tz_convert_hourly(tz_aware_fixture): + tz = tz_aware_fixture + tz_didx = date_range("2014-03-01", "2015-01-10", freq="H", tz=tz) + naive_didx = date_range("2014-03-01", "2015-01-10", freq="H") + + _compare_utc_to_local(tz_didx) + _compare_local_to_utc(tz_didx, naive_didx) + + +@pytest.mark.parametrize("freq", ["D", "A"]) +def test_tz_convert_single_matches_tz_convert(tz_aware_fixture, freq): + tz = tz_aware_fixture + tz_didx = date_range("2000-01-01", "2020-01-01", freq=freq, tz=tz) + naive_didx = date_range("2000-01-01", "2020-01-01", freq=freq) + + _compare_utc_to_local(tz_didx) + _compare_local_to_utc(tz_didx, naive_didx) + + +@pytest.mark.parametrize( + "arr", + [ + pytest.param(np.array([], dtype=np.int64), id="empty"), + pytest.param(np.array([iNaT], dtype=np.int64), id="all_nat"), + ], +) +def test_tz_convert_corner(arr): + result = tzconversion.tz_convert_from_utc(arr, timezones.maybe_get_tz("Asia/Tokyo")) + tm.assert_numpy_array_equal(result, arr) + + +def test_tz_convert_readonly(): + # GH#35530 + arr = np.array([0], dtype=np.int64) + arr.setflags(write=False) + result = tzconversion.tz_convert_from_utc(arr, UTC) + tm.assert_numpy_array_equal(result, arr) + + +@pytest.mark.parametrize("copy", [True, False]) +@pytest.mark.parametrize("dtype", ["M8[ns]", "M8[s]"]) +def test_length_zero_copy(dtype, copy): + arr = np.array([], dtype=dtype) + result = conversion.ensure_datetime64ns(arr, copy=copy) + assert result.base is (None if copy else arr) + + +def test_ensure_datetime64ns_bigendian(): + # GH#29684 + arr = np.array([np.datetime64(1, "ms")], dtype=">M8[ms]") + result = conversion.ensure_datetime64ns(arr) + + expected = np.array([np.datetime64(1, "ms")], dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + +def test_ensure_timedelta64ns_overflows(): + arr = np.arange(10).astype("m8[Y]") * 100 + msg = r"Out of bounds for nanosecond timedelta64\[Y\] 900" + with pytest.raises(OutOfBoundsTimedelta, match=msg): + conversion.ensure_timedelta64ns(arr) + + +class SubDatetime(datetime): + pass + + +@pytest.mark.parametrize( + "dt, expected", + [ + pytest.param( + Timestamp("2000-01-01"), Timestamp("2000-01-01", tz=UTC), id="timestamp" + ), + pytest.param( + datetime(2000, 1, 1), datetime(2000, 1, 1, tzinfo=UTC), id="datetime" + ), + pytest.param( + SubDatetime(2000, 1, 1), + SubDatetime(2000, 1, 1, tzinfo=UTC), + id="subclassed_datetime", + ), + ], +) +def test_localize_pydatetime_dt_types(dt, expected): + # GH 25851 + # ensure that subclassed datetime works with + # localize_pydatetime + result = conversion.localize_pydatetime(dt, UTC) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_fields.py b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_fields.py new file mode 100644 index 0000000..9d0b3ff --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_fields.py @@ -0,0 +1,40 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import fields + +import pandas._testing as tm + + +@pytest.fixture +def dtindex(): + dtindex = np.arange(5, dtype=np.int64) * 10 ** 9 * 3600 * 24 * 32 + dtindex.flags.writeable = False + return dtindex + + +def test_get_date_name_field_readonly(dtindex): + # https://github.com/vaexio/vaex/issues/357 + # fields functions shouldn't raise when we pass read-only data + result = fields.get_date_name_field(dtindex, "month_name") + expected = np.array(["January", "February", "March", "April", "May"], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +def test_get_date_field_readonly(dtindex): + result = fields.get_date_field(dtindex, "Y") + expected = np.array([1970, 1970, 1970, 1970, 1970], dtype=np.int32) + tm.assert_numpy_array_equal(result, expected) + + +def test_get_start_end_field_readonly(dtindex): + result = fields.get_start_end_field(dtindex, "is_month_start", None) + expected = np.array([True, False, False, False, False], dtype=np.bool_) + tm.assert_numpy_array_equal(result, expected) + + +def test_get_timedelta_field_readonly(dtindex): + # treat dtindex as timedeltas for this next one + result = fields.get_timedelta_field(dtindex, "days") + expected = np.arange(5, dtype=np.int32) * 32 + tm.assert_numpy_array_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_libfrequencies.py b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_libfrequencies.py new file mode 100644 index 0000000..83f28f6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_libfrequencies.py @@ -0,0 +1,29 @@ +import pytest + +from pandas._libs.tslibs.parsing import get_rule_month + +from pandas.tseries import offsets + + +@pytest.mark.parametrize( + "obj,expected", + [ + ("W", "DEC"), + (offsets.Week().freqstr, "DEC"), + ("D", "DEC"), + (offsets.Day().freqstr, "DEC"), + ("Q", "DEC"), + (offsets.QuarterEnd(startingMonth=12).freqstr, "DEC"), + ("Q-JAN", "JAN"), + (offsets.QuarterEnd(startingMonth=1).freqstr, "JAN"), + ("A-DEC", "DEC"), + ("Y-DEC", "DEC"), + (offsets.YearEnd().freqstr, "DEC"), + ("A-MAY", "MAY"), + ("Y-MAY", "MAY"), + (offsets.YearEnd(month=5).freqstr, "MAY"), + ], +) +def test_get_rule_month(obj, expected): + result = get_rule_month(obj) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_liboffsets.py b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_liboffsets.py new file mode 100644 index 0000000..c189a43 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_liboffsets.py @@ -0,0 +1,173 @@ +""" +Tests for helper functions in the cython tslibs.offsets +""" +from datetime import datetime + +import pytest + +from pandas._libs.tslibs.ccalendar import ( + get_firstbday, + get_lastbday, +) +import pandas._libs.tslibs.offsets as liboffsets +from pandas._libs.tslibs.offsets import roll_qtrday + +from pandas import Timestamp + + +@pytest.fixture(params=["start", "end", "business_start", "business_end"]) +def day_opt(request): + return request.param + + +@pytest.mark.parametrize( + "dt,exp_week_day,exp_last_day", + [ + (datetime(2017, 11, 30), 3, 30), # Business day. + (datetime(1993, 10, 31), 6, 29), # Non-business day. + ], +) +def test_get_last_bday(dt, exp_week_day, exp_last_day): + assert dt.weekday() == exp_week_day + assert get_lastbday(dt.year, dt.month) == exp_last_day + + +@pytest.mark.parametrize( + "dt,exp_week_day,exp_first_day", + [ + (datetime(2017, 4, 1), 5, 3), # Non-weekday. + (datetime(1993, 10, 1), 4, 1), # Business day. + ], +) +def test_get_first_bday(dt, exp_week_day, exp_first_day): + assert dt.weekday() == exp_week_day + assert get_firstbday(dt.year, dt.month) == exp_first_day + + +@pytest.mark.parametrize( + "months,day_opt,expected", + [ + (0, 15, datetime(2017, 11, 15)), + (0, None, datetime(2017, 11, 30)), + (1, "start", datetime(2017, 12, 1)), + (-145, "end", datetime(2005, 10, 31)), + (0, "business_end", datetime(2017, 11, 30)), + (0, "business_start", datetime(2017, 11, 1)), + ], +) +def test_shift_month_dt(months, day_opt, expected): + dt = datetime(2017, 11, 30) + assert liboffsets.shift_month(dt, months, day_opt=day_opt) == expected + + +@pytest.mark.parametrize( + "months,day_opt,expected", + [ + (1, "start", Timestamp("1929-06-01")), + (-3, "end", Timestamp("1929-02-28")), + (25, None, Timestamp("1931-06-5")), + (-1, 31, Timestamp("1929-04-30")), + ], +) +def test_shift_month_ts(months, day_opt, expected): + ts = Timestamp("1929-05-05") + assert liboffsets.shift_month(ts, months, day_opt=day_opt) == expected + + +def test_shift_month_error(): + dt = datetime(2017, 11, 15) + day_opt = "this should raise" + + with pytest.raises(ValueError, match=day_opt): + liboffsets.shift_month(dt, 3, day_opt=day_opt) + + +@pytest.mark.parametrize( + "other,expected", + [ + # Before March 1. + (datetime(2017, 2, 10), {2: 1, -7: -7, 0: 0}), + # After March 1. + (Timestamp("2014-03-15", tz="US/Eastern"), {2: 2, -7: -6, 0: 1}), + ], +) +@pytest.mark.parametrize("n", [2, -7, 0]) +def test_roll_qtrday_year(other, expected, n): + month = 3 + day_opt = "start" # `other` will be compared to March 1. + + assert roll_qtrday(other, n, month, day_opt, modby=12) == expected[n] + + +@pytest.mark.parametrize( + "other,expected", + [ + # Before June 30. + (datetime(1999, 6, 29), {5: 4, -7: -7, 0: 0}), + # After June 30. + (Timestamp(2072, 8, 24, 6, 17, 18), {5: 5, -7: -6, 0: 1}), + ], +) +@pytest.mark.parametrize("n", [5, -7, 0]) +def test_roll_qtrday_year2(other, expected, n): + month = 6 + day_opt = "end" # `other` will be compared to June 30. + + assert roll_qtrday(other, n, month, day_opt, modby=12) == expected[n] + + +def test_get_day_of_month_error(): + # get_day_of_month is not directly exposed. + # We test it via roll_qtrday. + dt = datetime(2017, 11, 15) + day_opt = "foo" + + with pytest.raises(ValueError, match=day_opt): + # To hit the raising case we need month == dt.month and n > 0. + roll_qtrday(dt, n=3, month=11, day_opt=day_opt, modby=12) + + +@pytest.mark.parametrize( + "month", + [3, 5], # (other.month % 3) < (month % 3) # (other.month % 3) > (month % 3) +) +@pytest.mark.parametrize("n", [4, -3]) +def test_roll_qtr_day_not_mod_unequal(day_opt, month, n): + expected = {3: {-3: -2, 4: 4}, 5: {-3: -3, 4: 3}} + + other = Timestamp(2072, 10, 1, 6, 17, 18) # Saturday. + assert roll_qtrday(other, n, month, day_opt, modby=3) == expected[month][n] + + +@pytest.mark.parametrize( + "other,month,exp_dict", + [ + # Monday. + (datetime(1999, 5, 31), 2, {-1: {"start": 0, "business_start": 0}}), + # Saturday. + ( + Timestamp(2072, 10, 1, 6, 17, 18), + 4, + {2: {"end": 1, "business_end": 1, "business_start": 1}}, + ), + # First business day. + ( + Timestamp(2072, 10, 3, 6, 17, 18), + 4, + {2: {"end": 1, "business_end": 1}, -1: {"start": 0}}, + ), + ], +) +@pytest.mark.parametrize("n", [2, -1]) +def test_roll_qtr_day_mod_equal(other, month, exp_dict, n, day_opt): + # All cases have (other.month % 3) == (month % 3). + expected = exp_dict.get(n, {}).get(day_opt, n) + assert roll_qtrday(other, n, month, day_opt, modby=3) == expected + + +@pytest.mark.parametrize( + "n,expected", [(42, {29: 42, 1: 42, 31: 41}), (-4, {29: -4, 1: -3, 31: -4})] +) +@pytest.mark.parametrize("compare", [29, 1, 31]) +def test_roll_convention(n, expected, compare): + assert liboffsets.roll_convention(29, n, compare) == expected[compare] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_parse_iso8601.py b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_parse_iso8601.py new file mode 100644 index 0000000..1c01e82 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_parse_iso8601.py @@ -0,0 +1,72 @@ +from datetime import datetime + +import pytest + +from pandas._libs import tslib + + +@pytest.mark.parametrize( + "date_str, exp", + [ + ("2011-01-02", datetime(2011, 1, 2)), + ("2011-1-2", datetime(2011, 1, 2)), + ("2011-01", datetime(2011, 1, 1)), + ("2011-1", datetime(2011, 1, 1)), + ("2011 01 02", datetime(2011, 1, 2)), + ("2011.01.02", datetime(2011, 1, 2)), + ("2011/01/02", datetime(2011, 1, 2)), + ("2011\\01\\02", datetime(2011, 1, 2)), + ("2013-01-01 05:30:00", datetime(2013, 1, 1, 5, 30)), + ("2013-1-1 5:30:00", datetime(2013, 1, 1, 5, 30)), + ], +) +def test_parsers_iso8601(date_str, exp): + # see gh-12060 + # + # Test only the ISO parser - flexibility to + # different separators and leading zero's. + actual = tslib._test_parse_iso8601(date_str) + assert actual == exp + + +@pytest.mark.parametrize( + "date_str", + [ + "2011-01/02", + "2011=11=11", + "201401", + "201111", + "200101", + # Mixed separated and unseparated. + "2005-0101", + "200501-01", + "20010101 12:3456", + "20010101 1234:56", + # HHMMSS must have two digits in + # each component if unseparated. + "20010101 1", + "20010101 123", + "20010101 12345", + "20010101 12345Z", + ], +) +def test_parsers_iso8601_invalid(date_str): + msg = f'Error parsing datetime string "{date_str}"' + + with pytest.raises(ValueError, match=msg): + tslib._test_parse_iso8601(date_str) + + +def test_parsers_iso8601_invalid_offset_invalid(): + date_str = "2001-01-01 12-34-56" + msg = f'Timezone hours offset out of range in datetime string "{date_str}"' + + with pytest.raises(ValueError, match=msg): + tslib._test_parse_iso8601(date_str) + + +def test_parsers_iso8601_leading_space(): + # GH#25895 make sure isoparser doesn't overflow with long input + date_str, expected = ("2013-1-1 5:30:00", datetime(2013, 1, 1, 5, 30)) + actual = tslib._test_parse_iso8601(" " * 200 + date_str) + assert actual == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_parsing.py b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_parsing.py new file mode 100644 index 0000000..3d2daec --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_parsing.py @@ -0,0 +1,280 @@ +""" +Tests for Timestamp parsing, aimed at pandas/_libs/tslibs/parsing.pyx +""" +from datetime import datetime +import re + +from dateutil.parser import parse +import numpy as np +import pytest + +from pandas._libs.tslibs import parsing +from pandas._libs.tslibs.parsing import parse_time_string +import pandas.util._test_decorators as td + +import pandas._testing as tm + + +def test_parse_time_string(): + (parsed, reso) = parse_time_string("4Q1984") + (parsed_lower, reso_lower) = parse_time_string("4q1984") + + assert reso == reso_lower + assert parsed == parsed_lower + + +def test_parse_time_string_invalid_type(): + # Raise on invalid input, don't just return it + msg = "Argument 'arg' has incorrect type (expected str, got tuple)" + with pytest.raises(TypeError, match=re.escape(msg)): + parse_time_string((4, 5)) + + +@pytest.mark.parametrize( + "dashed,normal", [("1988-Q2", "1988Q2"), ("2Q-1988", "2Q1988")] +) +def test_parse_time_quarter_with_dash(dashed, normal): + # see gh-9688 + (parsed_dash, reso_dash) = parse_time_string(dashed) + (parsed, reso) = parse_time_string(normal) + + assert parsed_dash == parsed + assert reso_dash == reso + + +@pytest.mark.parametrize("dashed", ["-2Q1992", "2-Q1992", "4-4Q1992"]) +def test_parse_time_quarter_with_dash_error(dashed): + msg = f"Unknown datetime string format, unable to parse: {dashed}" + + with pytest.raises(parsing.DateParseError, match=msg): + parse_time_string(dashed) + + +@pytest.mark.parametrize( + "date_string,expected", + [ + ("123.1234", False), + ("-50000", False), + ("999", False), + ("m", False), + ("T", False), + ("Mon Sep 16, 2013", True), + ("2012-01-01", True), + ("01/01/2012", True), + ("01012012", True), + ("0101", True), + ("1-1", True), + ], +) +def test_does_not_convert_mixed_integer(date_string, expected): + assert parsing._does_string_look_like_datetime(date_string) is expected + + +@pytest.mark.parametrize( + "date_str,kwargs,msg", + [ + ( + "2013Q5", + {}, + ( + "Incorrect quarterly string is given, " + "quarter must be between 1 and 4: 2013Q5" + ), + ), + # see gh-5418 + ( + "2013Q1", + {"freq": "INVLD-L-DEC-SAT"}, + ( + "Unable to retrieve month information " + "from given freq: INVLD-L-DEC-SAT" + ), + ), + ], +) +def test_parsers_quarterly_with_freq_error(date_str, kwargs, msg): + with pytest.raises(parsing.DateParseError, match=msg): + parsing.parse_time_string(date_str, **kwargs) + + +@pytest.mark.parametrize( + "date_str,freq,expected", + [ + ("2013Q2", None, datetime(2013, 4, 1)), + ("2013Q2", "A-APR", datetime(2012, 8, 1)), + ("2013-Q2", "A-DEC", datetime(2013, 4, 1)), + ], +) +def test_parsers_quarterly_with_freq(date_str, freq, expected): + result, _ = parsing.parse_time_string(date_str, freq=freq) + assert result == expected + + +@pytest.mark.parametrize( + "date_str", ["2Q 2005", "2Q-200A", "2Q-200", "22Q2005", "2Q200.", "6Q-20"] +) +def test_parsers_quarter_invalid(date_str): + if date_str == "6Q-20": + msg = ( + "Incorrect quarterly string is given, quarter " + f"must be between 1 and 4: {date_str}" + ) + else: + msg = f"Unknown datetime string format, unable to parse: {date_str}" + + with pytest.raises(ValueError, match=msg): + parsing.parse_time_string(date_str) + + +@pytest.mark.parametrize( + "date_str,expected", + [("201101", datetime(2011, 1, 1, 0, 0)), ("200005", datetime(2000, 5, 1, 0, 0))], +) +def test_parsers_month_freq(date_str, expected): + result, _ = parsing.parse_time_string(date_str, freq="M") + assert result == expected + + +@td.skip_if_not_us_locale +@pytest.mark.parametrize( + "string,fmt", + [ + ("20111230", "%Y%m%d"), + ("2011-12-30", "%Y-%m-%d"), + ("30-12-2011", "%d-%m-%Y"), + ("2011-12-30 00:00:00", "%Y-%m-%d %H:%M:%S"), + ("2011-12-30T00:00:00", "%Y-%m-%dT%H:%M:%S"), + ("2011-12-30T00:00:00UTC", "%Y-%m-%dT%H:%M:%S%Z"), + ("2011-12-30T00:00:00Z", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+9", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+09", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+090", None), + ("2011-12-30T00:00:00+0900", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00-0900", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+09:00", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+09:000", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+9:0", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+09:", None), + ("2011-12-30T00:00:00.000000UTC", "%Y-%m-%dT%H:%M:%S.%f%Z"), + ("2011-12-30T00:00:00.000000Z", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+9", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+09", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+090", None), + ("2011-12-30T00:00:00.000000+0900", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000-0900", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+09:00", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+09:000", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+9:0", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+09:", None), + ("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"), + ("Tue 24 Aug 2021 01:30:48 AM", "%a %d %b %Y %H:%M:%S %p"), + ("Tuesday 24 Aug 2021 01:30:48 AM", "%A %d %b %Y %H:%M:%S %p"), + ], +) +def test_guess_datetime_format_with_parseable_formats(string, fmt): + result = parsing.guess_datetime_format(string) + assert result == fmt + + +@pytest.mark.parametrize("dayfirst,expected", [(True, "%d/%m/%Y"), (False, "%m/%d/%Y")]) +def test_guess_datetime_format_with_dayfirst(dayfirst, expected): + ambiguous_string = "01/01/2011" + result = parsing.guess_datetime_format(ambiguous_string, dayfirst=dayfirst) + assert result == expected + + +@td.skip_if_has_locale +@pytest.mark.parametrize( + "string,fmt", + [ + ("30/Dec/2011", "%d/%b/%Y"), + ("30/December/2011", "%d/%B/%Y"), + ("30/Dec/2011 00:00:00", "%d/%b/%Y %H:%M:%S"), + ], +) +def test_guess_datetime_format_with_locale_specific_formats(string, fmt): + result = parsing.guess_datetime_format(string) + assert result == fmt + + +@pytest.mark.parametrize( + "invalid_dt", + [ + "2013", + "01/2013", + "12:00:00", + "1/1/1/1", + "this_is_not_a_datetime", + "51a", + 9, + datetime(2011, 1, 1), + ], +) +def test_guess_datetime_format_invalid_inputs(invalid_dt): + # A datetime string must include a year, month and a day for it to be + # guessable, in addition to being a string that looks like a datetime. + assert parsing.guess_datetime_format(invalid_dt) is None + + +@pytest.mark.parametrize( + "string,fmt", + [ + ("2011-1-1", "%Y-%m-%d"), + ("1/1/2011", "%m/%d/%Y"), + ("30-1-2011", "%d-%m-%Y"), + ("2011-1-1 0:0:0", "%Y-%m-%d %H:%M:%S"), + ("2011-1-3T00:00:0", "%Y-%m-%dT%H:%M:%S"), + ("2011-1-1 00:00:00", "%Y-%m-%d %H:%M:%S"), + ], +) +def test_guess_datetime_format_no_padding(string, fmt): + # see gh-11142 + result = parsing.guess_datetime_format(string) + assert result == fmt + + +def test_try_parse_dates(): + arr = np.array(["5/1/2000", "6/1/2000", "7/1/2000"], dtype=object) + result = parsing.try_parse_dates(arr, dayfirst=True) + + expected = np.array([parse(d, dayfirst=True) for d in arr]) + tm.assert_numpy_array_equal(result, expected) + + +def test_parse_time_string_check_instance_type_raise_exception(): + # issue 20684 + msg = "Argument 'arg' has incorrect type (expected str, got tuple)" + with pytest.raises(TypeError, match=re.escape(msg)): + parse_time_string((1, 2, 3)) + + result = parse_time_string("2019") + expected = (datetime(2019, 1, 1), "year") + assert result == expected + + +@pytest.mark.parametrize( + "fmt,expected", + [ + ("%Y %m %d %H:%M:%S", True), + ("%Y/%m/%d %H:%M:%S", True), + (r"%Y\%m\%d %H:%M:%S", True), + ("%Y-%m-%d %H:%M:%S", True), + ("%Y.%m.%d %H:%M:%S", True), + ("%Y%m%d %H:%M:%S", True), + ("%Y-%m-%dT%H:%M:%S", True), + ("%Y-%m-%dT%H:%M:%S%z", True), + ("%Y-%m-%dT%H:%M:%S%Z", True), + ("%Y-%m-%dT%H:%M:%S.%f", True), + ("%Y-%m-%dT%H:%M:%S.%f%z", True), + ("%Y-%m-%dT%H:%M:%S.%f%Z", True), + ("%Y%m%d", False), + ("%Y%m", False), + ("%Y", False), + ("%Y-%m-%d", True), + ("%Y-%m", True), + ], +) +def test_is_iso_format(fmt, expected): + # see gh-41047 + result = parsing.format_is_iso(fmt) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_period_asfreq.py b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_period_asfreq.py new file mode 100644 index 0000000..2592fdb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_period_asfreq.py @@ -0,0 +1,90 @@ +import pytest + +from pandas._libs.tslibs import to_offset +from pandas._libs.tslibs.period import ( + period_asfreq, + period_ordinal, +) + + +def get_freq_code(freqstr: str) -> int: + off = to_offset(freqstr) + code = off._period_dtype_code + return code + + +@pytest.mark.parametrize( + "freq1,freq2,expected", + [ + ("D", "H", 24), + ("D", "T", 1440), + ("D", "S", 86400), + ("D", "L", 86400000), + ("D", "U", 86400000000), + ("D", "N", 86400000000000), + ("H", "T", 60), + ("H", "S", 3600), + ("H", "L", 3600000), + ("H", "U", 3600000000), + ("H", "N", 3600000000000), + ("T", "S", 60), + ("T", "L", 60000), + ("T", "U", 60000000), + ("T", "N", 60000000000), + ("S", "L", 1000), + ("S", "U", 1000000), + ("S", "N", 1000000000), + ("L", "U", 1000), + ("L", "N", 1000000), + ("U", "N", 1000), + ], +) +def test_intra_day_conversion_factors(freq1, freq2, expected): + assert ( + period_asfreq(1, get_freq_code(freq1), get_freq_code(freq2), False) == expected + ) + + +@pytest.mark.parametrize( + "freq,expected", [("A", 0), ("M", 0), ("W", 1), ("D", 0), ("B", 0)] +) +def test_period_ordinal_start_values(freq, expected): + # information for Jan. 1, 1970. + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq_code(freq)) == expected + + +@pytest.mark.parametrize( + "dt,expected", + [ + ((1970, 1, 4, 0, 0, 0, 0, 0), 1), + ((1970, 1, 5, 0, 0, 0, 0, 0), 2), + ((2013, 10, 6, 0, 0, 0, 0, 0), 2284), + ((2013, 10, 7, 0, 0, 0, 0, 0), 2285), + ], +) +def test_period_ordinal_week(dt, expected): + args = dt + (get_freq_code("W"),) + assert period_ordinal(*args) == expected + + +@pytest.mark.parametrize( + "day,expected", + [ + # Thursday (Oct. 3, 2013). + (3, 11415), + # Friday (Oct. 4, 2013). + (4, 11416), + # Saturday (Oct. 5, 2013). + (5, 11417), + # Sunday (Oct. 6, 2013). + (6, 11417), + # Monday (Oct. 7, 2013). + (7, 11417), + # Tuesday (Oct. 8, 2013). + (8, 11418), + ], +) +def test_period_ordinal_business_day(day, expected): + # 5000 is PeriodDtypeCode for BusinessDay + args = (2013, 10, day, 0, 0, 0, 0, 0, 5000) + assert period_ordinal(*args) == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_timedeltas.py b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_timedeltas.py new file mode 100644 index 0000000..a25f148 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_timedeltas.py @@ -0,0 +1,65 @@ +import re + +import numpy as np +import pytest + +from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds + +from pandas import ( + Timedelta, + offsets, +) + + +@pytest.mark.parametrize( + "obj,expected", + [ + (np.timedelta64(14, "D"), 14 * 24 * 3600 * 1e9), + (Timedelta(minutes=-7), -7 * 60 * 1e9), + (Timedelta(minutes=-7).to_pytimedelta(), -7 * 60 * 1e9), + (Timedelta(seconds=1234e-9), 1234), # GH43764, GH40946 + ( + Timedelta(seconds=1e-9, milliseconds=1e-5, microseconds=1e-1), + 111, + ), # GH43764 + ( + Timedelta(days=1, seconds=1e-9, milliseconds=1e-5, microseconds=1e-1), + 24 * 3600e9 + 111, + ), # GH43764 + (offsets.Nano(125), 125), + (1, 1), + (np.int64(2), 2), + (np.int32(3), 3), + ], +) +def test_delta_to_nanoseconds(obj, expected): + result = delta_to_nanoseconds(obj) + assert result == expected + + +def test_delta_to_nanoseconds_error(): + obj = np.array([123456789], dtype="m8[ns]") + + with pytest.raises(TypeError, match=""): + delta_to_nanoseconds(obj) + + +def test_huge_nanoseconds_overflow(): + # GH 32402 + assert delta_to_nanoseconds(Timedelta(1e10)) == 1e10 + assert delta_to_nanoseconds(Timedelta(nanoseconds=1e10)) == 1e10 + + +@pytest.mark.parametrize( + "kwargs", [{"Seconds": 1}, {"seconds": 1, "Nanoseconds": 1}, {"Foo": 2}] +) +def test_kwarg_assertion(kwargs): + err_message = ( + "cannot construct a Timedelta from the passed arguments, " + "allowed keywords are " + "[weeks, days, hours, minutes, seconds, " + "milliseconds, microseconds, nanoseconds]" + ) + + with pytest.raises(ValueError, match=re.escape(err_message)): + Timedelta(**kwargs) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_timezones.py b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_timezones.py new file mode 100644 index 0000000..7ab0ad0 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_timezones.py @@ -0,0 +1,168 @@ +from datetime import ( + datetime, + timedelta, + timezone, +) + +import dateutil.tz +import pytest +import pytz + +from pandas._libs.tslibs import ( + conversion, + timezones, +) + +from pandas import Timestamp + + +def test_is_utc(utc_fixture): + tz = timezones.maybe_get_tz(utc_fixture) + assert timezones.is_utc(tz) + + +@pytest.mark.parametrize("tz_name", list(pytz.common_timezones)) +def test_cache_keys_are_distinct_for_pytz_vs_dateutil(tz_name): + if tz_name == "UTC": + pytest.skip("UTC: special case in dateutil") + + tz_p = timezones.maybe_get_tz(tz_name) + tz_d = timezones.maybe_get_tz("dateutil/" + tz_name) + + if tz_d is None: + pytest.skip(tz_name + ": dateutil does not know about this one") + + assert timezones._p_tz_cache_key(tz_p) != timezones._p_tz_cache_key(tz_d) + + +def test_tzlocal_repr(): + # see gh-13583 + ts = Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()) + assert ts.tz == dateutil.tz.tzlocal() + assert "tz='tzlocal()')" in repr(ts) + + +def test_tzlocal_maybe_get_tz(): + # see gh-13583 + tz = timezones.maybe_get_tz("tzlocal()") + assert tz == dateutil.tz.tzlocal() + + +def test_tzlocal_offset(): + # see gh-13583 + # + # Get offset using normal datetime for test. + ts = Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()) + + offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) + offset = offset.total_seconds() * 1000000000 + + assert ts.value + offset == Timestamp("2011-01-01").value + + +def test_tzlocal_is_not_utc(): + # even if the machine running the test is localized to UTC + tz = dateutil.tz.tzlocal() + assert not timezones.is_utc(tz) + + assert not timezones.tz_compare(tz, dateutil.tz.tzutc()) + + +def test_tz_compare_utc(utc_fixture, utc_fixture2): + tz = timezones.maybe_get_tz(utc_fixture) + tz2 = timezones.maybe_get_tz(utc_fixture2) + assert timezones.tz_compare(tz, tz2) + + +@pytest.fixture( + params=[ + (pytz.timezone("US/Eastern"), lambda tz, x: tz.localize(x)), + (dateutil.tz.gettz("US/Eastern"), lambda tz, x: x.replace(tzinfo=tz)), + ] +) +def infer_setup(request): + eastern, localize = request.param + + start_naive = datetime(2001, 1, 1) + end_naive = datetime(2009, 1, 1) + + start = localize(eastern, start_naive) + end = localize(eastern, end_naive) + + return eastern, localize, start, end, start_naive, end_naive + + +def test_infer_tz_compat(infer_setup): + eastern, _, start, end, start_naive, end_naive = infer_setup + + assert ( + timezones.infer_tzinfo(start, end) + is conversion.localize_pydatetime(start_naive, eastern).tzinfo + ) + assert ( + timezones.infer_tzinfo(start, None) + is conversion.localize_pydatetime(start_naive, eastern).tzinfo + ) + assert ( + timezones.infer_tzinfo(None, end) + is conversion.localize_pydatetime(end_naive, eastern).tzinfo + ) + + +def test_infer_tz_utc_localize(infer_setup): + _, _, start, end, start_naive, end_naive = infer_setup + utc = pytz.utc + + start = utc.localize(start_naive) + end = utc.localize(end_naive) + + assert timezones.infer_tzinfo(start, end) is utc + + +@pytest.mark.parametrize("ordered", [True, False]) +def test_infer_tz_mismatch(infer_setup, ordered): + eastern, _, _, _, start_naive, end_naive = infer_setup + msg = "Inputs must both have the same timezone" + + utc = pytz.utc + start = utc.localize(start_naive) + end = conversion.localize_pydatetime(end_naive, eastern) + + args = (start, end) if ordered else (end, start) + + with pytest.raises(AssertionError, match=msg): + timezones.infer_tzinfo(*args) + + +def test_maybe_get_tz_invalid_types(): + with pytest.raises(TypeError, match=""): + timezones.maybe_get_tz(44.0) + + with pytest.raises(TypeError, match=""): + timezones.maybe_get_tz(pytz) + + msg = "" + with pytest.raises(TypeError, match=msg): + timezones.maybe_get_tz(Timestamp("2021-01-01", tz="UTC")) + + +def test_maybe_get_tz_offset_only(): + # see gh-36004 + + # timezone.utc + tz = timezones.maybe_get_tz(timezone.utc) + assert tz == timezone(timedelta(hours=0, minutes=0)) + + # without UTC+- prefix + tz = timezones.maybe_get_tz("+01:15") + assert tz == timezone(timedelta(hours=1, minutes=15)) + + tz = timezones.maybe_get_tz("-01:15") + assert tz == timezone(-timedelta(hours=1, minutes=15)) + + # with UTC+- prefix + tz = timezones.maybe_get_tz("UTC+02:45") + assert tz == timezone(timedelta(hours=2, minutes=45)) + + tz = timezones.maybe_get_tz("UTC-02:45") + assert tz == timezone(-timedelta(hours=2, minutes=45)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_to_offset.py b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_to_offset.py new file mode 100644 index 0000000..27ddbb8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/tslibs/test_to_offset.py @@ -0,0 +1,174 @@ +import re + +import pytest + +from pandas._libs.tslibs import ( + Timedelta, + offsets, + to_offset, +) + + +@pytest.mark.parametrize( + "freq_input,expected", + [ + (to_offset("10us"), offsets.Micro(10)), + (offsets.Hour(), offsets.Hour()), + ("2h30min", offsets.Minute(150)), + ("2h 30min", offsets.Minute(150)), + ("2h30min15s", offsets.Second(150 * 60 + 15)), + ("2h 60min", offsets.Hour(3)), + ("2h 20.5min", offsets.Second(8430)), + ("1.5min", offsets.Second(90)), + ("0.5S", offsets.Milli(500)), + ("15l500u", offsets.Micro(15500)), + ("10s75L", offsets.Milli(10075)), + ("1s0.25ms", offsets.Micro(1000250)), + ("1s0.25L", offsets.Micro(1000250)), + ("2800N", offsets.Nano(2800)), + ("2SM", offsets.SemiMonthEnd(2)), + ("2SM-16", offsets.SemiMonthEnd(2, day_of_month=16)), + ("2SMS-14", offsets.SemiMonthBegin(2, day_of_month=14)), + ("2SMS-15", offsets.SemiMonthBegin(2)), + ], +) +def test_to_offset(freq_input, expected): + result = to_offset(freq_input) + assert result == expected + + +@pytest.mark.parametrize( + "freqstr,expected", [("-1S", -1), ("-2SM", -2), ("-1SMS", -1), ("-5min10s", -310)] +) +def test_to_offset_negative(freqstr, expected): + result = to_offset(freqstr) + assert result.n == expected + + +@pytest.mark.parametrize( + "freqstr", + [ + "2h20m", + "U1", + "-U", + "3U1", + "-2-3U", + "-2D:3H", + "1.5.0S", + "2SMS-15-15", + "2SMS-15D", + "100foo", + # Invalid leading +/- signs. + "+-1d", + "-+1h", + "+1", + "-7", + "+d", + "-m", + # Invalid shortcut anchors. + "SM-0", + "SM-28", + "SM-29", + "SM-FOO", + "BSM", + "SM--1", + "SMS-1", + "SMS-28", + "SMS-30", + "SMS-BAR", + "SMS-BYR", + "BSMS", + "SMS--2", + ], +) +def test_to_offset_invalid(freqstr): + # see gh-13930 + + # We escape string because some of our + # inputs contain regex special characters. + msg = re.escape(f"Invalid frequency: {freqstr}") + with pytest.raises(ValueError, match=msg): + to_offset(freqstr) + + +def test_to_offset_no_evaluate(): + msg = str(("", "")) + with pytest.raises(TypeError, match=msg): + to_offset(("", "")) + + +def test_to_offset_tuple_unsupported(): + with pytest.raises(TypeError, match="pass as a string instead"): + to_offset((5, "T")) + + +@pytest.mark.parametrize( + "freqstr,expected", + [ + ("2D 3H", offsets.Hour(51)), + ("2 D3 H", offsets.Hour(51)), + ("2 D 3 H", offsets.Hour(51)), + (" 2 D 3 H ", offsets.Hour(51)), + (" H ", offsets.Hour()), + (" 3 H ", offsets.Hour(3)), + ], +) +def test_to_offset_whitespace(freqstr, expected): + result = to_offset(freqstr) + assert result == expected + + +@pytest.mark.parametrize( + "freqstr,expected", [("00H 00T 01S", 1), ("-00H 03T 14S", -194)] +) +def test_to_offset_leading_zero(freqstr, expected): + result = to_offset(freqstr) + assert result.n == expected + + +@pytest.mark.parametrize("freqstr,expected", [("+1d", 1), ("+2h30min", 150)]) +def test_to_offset_leading_plus(freqstr, expected): + result = to_offset(freqstr) + assert result.n == expected + + +@pytest.mark.parametrize( + "kwargs,expected", + [ + ({"days": 1, "seconds": 1}, offsets.Second(86401)), + ({"days": -1, "seconds": 1}, offsets.Second(-86399)), + ({"hours": 1, "minutes": 10}, offsets.Minute(70)), + ({"hours": 1, "minutes": -10}, offsets.Minute(50)), + ({"weeks": 1}, offsets.Day(7)), + ({"hours": 1}, offsets.Hour(1)), + ({"hours": 1}, to_offset("60min")), + ({"microseconds": 1}, offsets.Micro(1)), + ({"microseconds": 0}, offsets.Nano(0)), + ], +) +def test_to_offset_pd_timedelta(kwargs, expected): + # see gh-9064 + td = Timedelta(**kwargs) + result = to_offset(td) + assert result == expected + + +@pytest.mark.parametrize( + "shortcut,expected", + [ + ("W", offsets.Week(weekday=6)), + ("W-SUN", offsets.Week(weekday=6)), + ("Q", offsets.QuarterEnd(startingMonth=12)), + ("Q-DEC", offsets.QuarterEnd(startingMonth=12)), + ("Q-MAY", offsets.QuarterEnd(startingMonth=5)), + ("SM", offsets.SemiMonthEnd(day_of_month=15)), + ("SM-15", offsets.SemiMonthEnd(day_of_month=15)), + ("SM-1", offsets.SemiMonthEnd(day_of_month=1)), + ("SM-27", offsets.SemiMonthEnd(day_of_month=27)), + ("SMS-2", offsets.SemiMonthBegin(day_of_month=2)), + ("SMS-27", offsets.SemiMonthBegin(day_of_month=27)), + ], +) +def test_anchored_shortcuts(shortcut, expected): + result = to_offset(shortcut) + assert result == expected diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..e383b63 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..5b219ff Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_almost_equal.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_almost_equal.cpython-38.pyc new file mode 100644 index 0000000..f33dff0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_almost_equal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_attr_equal.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_attr_equal.cpython-38.pyc new file mode 100644 index 0000000..0080916 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_attr_equal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_categorical_equal.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_categorical_equal.cpython-38.pyc new file mode 100644 index 0000000..d6bfcc3 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_categorical_equal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_extension_array_equal.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_extension_array_equal.cpython-38.pyc new file mode 100644 index 0000000..766ed5a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_extension_array_equal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_frame_equal.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_frame_equal.cpython-38.pyc new file mode 100644 index 0000000..3bc9115 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_frame_equal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_index_equal.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_index_equal.cpython-38.pyc new file mode 100644 index 0000000..3c9a9a9 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_index_equal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_interval_array_equal.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_interval_array_equal.cpython-38.pyc new file mode 100644 index 0000000..c5cca13 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_interval_array_equal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_numpy_array_equal.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_numpy_array_equal.cpython-38.pyc new file mode 100644 index 0000000..499db6d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_numpy_array_equal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_produces_warning.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_produces_warning.cpython-38.pyc new file mode 100644 index 0000000..d893989 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_produces_warning.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_series_equal.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_series_equal.cpython-38.pyc new file mode 100644 index 0000000..4dd61df Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_assert_series_equal.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_deprecate.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_deprecate.cpython-38.pyc new file mode 100644 index 0000000..60430b4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_deprecate.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_deprecate_kwarg.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_deprecate_kwarg.cpython-38.pyc new file mode 100644 index 0000000..d6c03eb Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_deprecate_kwarg.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_deprecate_nonkeyword_arguments.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_deprecate_nonkeyword_arguments.cpython-38.pyc new file mode 100644 index 0000000..9004c7a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_deprecate_nonkeyword_arguments.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_doc.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_doc.cpython-38.pyc new file mode 100644 index 0000000..59f0fd6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_doc.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_hashing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_hashing.cpython-38.pyc new file mode 100644 index 0000000..1522895 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_hashing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_numba.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_numba.cpython-38.pyc new file mode 100644 index 0000000..60004ad Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_numba.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_safe_import.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_safe_import.cpython-38.pyc new file mode 100644 index 0000000..e5bd77a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_safe_import.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_shares_memory.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_shares_memory.cpython-38.pyc new file mode 100644 index 0000000..52106dc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_shares_memory.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_show_versions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_show_versions.cpython-38.pyc new file mode 100644 index 0000000..ba4dd16 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_show_versions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_util.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_util.cpython-38.pyc new file mode 100644 index 0000000..33defde Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_util.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_validate_args.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_validate_args.cpython-38.pyc new file mode 100644 index 0000000..c7526c4 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_validate_args.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_validate_args_and_kwargs.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_validate_args_and_kwargs.cpython-38.pyc new file mode 100644 index 0000000..cf9854e Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_validate_args_and_kwargs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_validate_inclusive.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_validate_inclusive.cpython-38.pyc new file mode 100644 index 0000000..48eed15 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_validate_inclusive.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_validate_kwargs.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_validate_kwargs.cpython-38.pyc new file mode 100644 index 0000000..9be455d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/util/__pycache__/test_validate_kwargs.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/util/conftest.py new file mode 100644 index 0000000..b68bcc9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/conftest.py @@ -0,0 +1,26 @@ +import pytest + + +@pytest.fixture(params=[True, False]) +def check_dtype(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def check_exact(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def check_index_type(request): + return request.param + + +@pytest.fixture(params=[0.5e-3, 0.5e-5]) +def rtol(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def check_categorical(request): + return request.param diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_almost_equal.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_almost_equal.py new file mode 100644 index 0000000..ab53707 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_almost_equal.py @@ -0,0 +1,460 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + Series, + Timestamp, +) +import pandas._testing as tm + + +def _assert_almost_equal_both(a, b, **kwargs): + """ + Check that two objects are approximately equal. + + This check is performed commutatively. + + Parameters + ---------- + a : object + The first object to compare. + b : object + The second object to compare. + **kwargs + The arguments passed to `tm.assert_almost_equal`. + """ + tm.assert_almost_equal(a, b, **kwargs) + tm.assert_almost_equal(b, a, **kwargs) + + +def _assert_not_almost_equal(a, b, **kwargs): + """ + Check that two objects are not approximately equal. + + Parameters + ---------- + a : object + The first object to compare. + b : object + The second object to compare. + **kwargs + The arguments passed to `tm.assert_almost_equal`. + """ + try: + tm.assert_almost_equal(a, b, **kwargs) + msg = f"{a} and {b} were approximately equal when they shouldn't have been" + pytest.fail(msg=msg) + except AssertionError: + pass + + +def _assert_not_almost_equal_both(a, b, **kwargs): + """ + Check that two objects are not approximately equal. + + This check is performed commutatively. + + Parameters + ---------- + a : object + The first object to compare. + b : object + The second object to compare. + **kwargs + The arguments passed to `tm.assert_almost_equal`. + """ + _assert_not_almost_equal(a, b, **kwargs) + _assert_not_almost_equal(b, a, **kwargs) + + +@pytest.mark.parametrize( + "a,b,check_less_precise", + [(1.1, 1.1, False), (1.1, 1.100001, True), (1.1, 1.1001, 2)], +) +def test_assert_almost_equal_deprecated(a, b, check_less_precise): + # GH#30562 + with tm.assert_produces_warning(FutureWarning): + _assert_almost_equal_both(a, b, check_less_precise=check_less_precise) + + +@pytest.mark.parametrize( + "a,b", + [ + (1.1, 1.1), + (1.1, 1.100001), + (np.int16(1), 1.000001), + (np.float64(1.1), 1.1), + (np.uint32(5), 5), + ], +) +def test_assert_almost_equal_numbers(a, b): + _assert_almost_equal_both(a, b) + + +@pytest.mark.parametrize( + "a,b", + [ + (1.1, 1), + (1.1, True), + (1, 2), + (1.0001, np.int16(1)), + # The following two examples are not "almost equal" due to tol. + (0.1, 0.1001), + (0.0011, 0.0012), + ], +) +def test_assert_not_almost_equal_numbers(a, b): + _assert_not_almost_equal_both(a, b) + + +@pytest.mark.parametrize( + "a,b", + [ + (1.1, 1.1), + (1.1, 1.100001), + (1.1, 1.1001), + (0.000001, 0.000005), + (1000.0, 1000.0005), + # Testing this example, as per #13357 + (0.000011, 0.000012), + ], +) +def test_assert_almost_equal_numbers_atol(a, b): + # Equivalent to the deprecated check_less_precise=True + _assert_almost_equal_both(a, b, rtol=0.5e-3, atol=0.5e-3) + + +@pytest.mark.parametrize("a,b", [(1.1, 1.11), (0.1, 0.101), (0.000011, 0.001012)]) +def test_assert_not_almost_equal_numbers_atol(a, b): + _assert_not_almost_equal_both(a, b, atol=1e-3) + + +@pytest.mark.parametrize( + "a,b", + [ + (1.1, 1.1), + (1.1, 1.100001), + (1.1, 1.1001), + (1000.0, 1000.0005), + (1.1, 1.11), + (0.1, 0.101), + ], +) +def test_assert_almost_equal_numbers_rtol(a, b): + _assert_almost_equal_both(a, b, rtol=0.05) + + +@pytest.mark.parametrize("a,b", [(0.000011, 0.000012), (0.000001, 0.000005)]) +def test_assert_not_almost_equal_numbers_rtol(a, b): + _assert_not_almost_equal_both(a, b, rtol=0.05) + + +@pytest.mark.parametrize( + "a,b,rtol", + [ + (1.00001, 1.00005, 0.001), + (-0.908356 + 0.2j, -0.908358 + 0.2j, 1e-3), + (0.1 + 1.009j, 0.1 + 1.006j, 0.1), + (0.1001 + 2.0j, 0.1 + 2.001j, 0.01), + ], +) +def test_assert_almost_equal_complex_numbers(a, b, rtol): + _assert_almost_equal_both(a, b, rtol=rtol) + _assert_almost_equal_both(np.complex64(a), np.complex64(b), rtol=rtol) + _assert_almost_equal_both(np.complex128(a), np.complex128(b), rtol=rtol) + + +@pytest.mark.parametrize( + "a,b,rtol", + [ + (0.58310768, 0.58330768, 1e-7), + (-0.908 + 0.2j, -0.978 + 0.2j, 0.001), + (0.1 + 1j, 0.1 + 2j, 0.01), + (-0.132 + 1.001j, -0.132 + 1.005j, 1e-5), + (0.58310768j, 0.58330768j, 1e-9), + ], +) +def test_assert_not_almost_equal_complex_numbers(a, b, rtol): + _assert_not_almost_equal_both(a, b, rtol=rtol) + _assert_not_almost_equal_both(np.complex64(a), np.complex64(b), rtol=rtol) + _assert_not_almost_equal_both(np.complex128(a), np.complex128(b), rtol=rtol) + + +@pytest.mark.parametrize("a,b", [(0, 0), (0, 0.0), (0, np.float64(0)), (0.00000001, 0)]) +def test_assert_almost_equal_numbers_with_zeros(a, b): + _assert_almost_equal_both(a, b) + + +@pytest.mark.parametrize("a,b", [(0.001, 0), (1, 0)]) +def test_assert_not_almost_equal_numbers_with_zeros(a, b): + _assert_not_almost_equal_both(a, b) + + +@pytest.mark.parametrize("a,b", [(1, "abc"), (1, [1]), (1, object())]) +def test_assert_not_almost_equal_numbers_with_mixed(a, b): + _assert_not_almost_equal_both(a, b) + + +@pytest.mark.parametrize( + "left_dtype", ["M8[ns]", "m8[ns]", "float64", "int64", "object"] +) +@pytest.mark.parametrize( + "right_dtype", ["M8[ns]", "m8[ns]", "float64", "int64", "object"] +) +def test_assert_almost_equal_edge_case_ndarrays(left_dtype, right_dtype): + # Empty compare. + _assert_almost_equal_both( + np.array([], dtype=left_dtype), + np.array([], dtype=right_dtype), + check_dtype=False, + ) + + +def test_assert_almost_equal_dicts(): + _assert_almost_equal_both({"a": 1, "b": 2}, {"a": 1, "b": 2}) + + +@pytest.mark.parametrize( + "a,b", + [ + ({"a": 1, "b": 2}, {"a": 1, "b": 3}), + ({"a": 1, "b": 2}, {"a": 1, "b": 2, "c": 3}), + ({"a": 1}, 1), + ({"a": 1}, "abc"), + ({"a": 1}, [1]), + ], +) +def test_assert_not_almost_equal_dicts(a, b): + _assert_not_almost_equal_both(a, b) + + +@pytest.mark.parametrize("val", [1, 2]) +def test_assert_almost_equal_dict_like_object(val): + dict_val = 1 + real_dict = {"a": val} + + class DictLikeObj: + def keys(self): + return ("a",) + + def __getitem__(self, item): + if item == "a": + return dict_val + + func = ( + _assert_almost_equal_both if val == dict_val else _assert_not_almost_equal_both + ) + func(real_dict, DictLikeObj(), check_dtype=False) + + +def test_assert_almost_equal_strings(): + _assert_almost_equal_both("abc", "abc") + + +@pytest.mark.parametrize( + "a,b", [("abc", "abcd"), ("abc", "abd"), ("abc", 1), ("abc", [1])] +) +def test_assert_not_almost_equal_strings(a, b): + _assert_not_almost_equal_both(a, b) + + +@pytest.mark.parametrize( + "a,b", [([1, 2, 3], [1, 2, 3]), (np.array([1, 2, 3]), np.array([1, 2, 3]))] +) +def test_assert_almost_equal_iterables(a, b): + _assert_almost_equal_both(a, b) + + +@pytest.mark.parametrize( + "a,b", + [ + # Class is different. + (np.array([1, 2, 3]), [1, 2, 3]), + # Dtype is different. + (np.array([1, 2, 3]), np.array([1.0, 2.0, 3.0])), + # Can't compare generators. + (iter([1, 2, 3]), [1, 2, 3]), + ([1, 2, 3], [1, 2, 4]), + ([1, 2, 3], [1, 2, 3, 4]), + ([1, 2, 3], 1), + ], +) +def test_assert_not_almost_equal_iterables(a, b): + _assert_not_almost_equal(a, b) + + +def test_assert_almost_equal_null(): + _assert_almost_equal_both(None, None) + + +@pytest.mark.parametrize("a,b", [(None, np.NaN), (None, 0), (np.NaN, 0)]) +def test_assert_not_almost_equal_null(a, b): + _assert_not_almost_equal(a, b) + + +@pytest.mark.parametrize( + "a,b", + [ + (np.inf, np.inf), + (np.inf, float("inf")), + (np.array([np.inf, np.nan, -np.inf]), np.array([np.inf, np.nan, -np.inf])), + ( + np.array([np.inf, None, -np.inf], dtype=np.object_), + np.array([np.inf, np.nan, -np.inf], dtype=np.object_), + ), + ], +) +def test_assert_almost_equal_inf(a, b): + _assert_almost_equal_both(a, b) + + +def test_assert_not_almost_equal_inf(): + _assert_not_almost_equal_both(np.inf, 0) + + +@pytest.mark.parametrize( + "a,b", + [ + (Index([1.0, 1.1]), Index([1.0, 1.100001])), + (Series([1.0, 1.1]), Series([1.0, 1.100001])), + (np.array([1.1, 2.000001]), np.array([1.1, 2.0])), + (DataFrame({"a": [1.0, 1.1]}), DataFrame({"a": [1.0, 1.100001]})), + ], +) +def test_assert_almost_equal_pandas(a, b): + _assert_almost_equal_both(a, b) + + +def test_assert_almost_equal_object(): + a = [Timestamp("2011-01-01"), Timestamp("2011-01-01")] + b = [Timestamp("2011-01-01"), Timestamp("2011-01-01")] + _assert_almost_equal_both(a, b) + + +def test_assert_almost_equal_value_mismatch(): + msg = "expected 2\\.00000 but got 1\\.00000, with rtol=1e-05, atol=1e-08" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(1, 2) + + +@pytest.mark.parametrize( + "a,b,klass1,klass2", + [(np.array([1]), 1, "ndarray", "int"), (1, np.array([1]), "int", "ndarray")], +) +def test_assert_almost_equal_class_mismatch(a, b, klass1, klass2): + + msg = f"""numpy array are different + +numpy array classes are different +\\[left\\]: {klass1} +\\[right\\]: {klass2}""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(a, b) + + +def test_assert_almost_equal_value_mismatch1(): + msg = """numpy array are different + +numpy array values are different \\(66\\.66667 %\\) +\\[left\\]: \\[nan, 2\\.0, 3\\.0\\] +\\[right\\]: \\[1\\.0, nan, 3\\.0\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) + + +def test_assert_almost_equal_value_mismatch2(): + msg = """numpy array are different + +numpy array values are different \\(50\\.0 %\\) +\\[left\\]: \\[1, 2\\] +\\[right\\]: \\[1, 3\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(np.array([1, 2]), np.array([1, 3])) + + +def test_assert_almost_equal_value_mismatch3(): + msg = """numpy array are different + +numpy array values are different \\(16\\.66667 %\\) +\\[left\\]: \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\] +\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal( + np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]]) + ) + + +def test_assert_almost_equal_value_mismatch4(): + msg = """numpy array are different + +numpy array values are different \\(25\\.0 %\\) +\\[left\\]: \\[\\[1, 2\\], \\[3, 4\\]\\] +\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) + + +def test_assert_almost_equal_shape_mismatch_override(): + msg = """Index are different + +Index shapes are different +\\[left\\]: \\(2L*,\\) +\\[right\\]: \\(3L*,\\)""" + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]), obj="Index") + + +def test_assert_almost_equal_unicode(): + # see gh-20503 + msg = """numpy array are different + +numpy array values are different \\(33\\.33333 %\\) +\\[left\\]: \\[á, à, ä\\] +\\[right\\]: \\[á, à, å\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(np.array(["á", "à", "ä"]), np.array(["á", "à", "å"])) + + +def test_assert_almost_equal_timestamp(): + a = np.array([Timestamp("2011-01-01"), Timestamp("2011-01-01")]) + b = np.array([Timestamp("2011-01-01"), Timestamp("2011-01-02")]) + + msg = """numpy array are different + +numpy array values are different \\(50\\.0 %\\) +\\[left\\]: \\[2011-01-01 00:00:00, 2011-01-01 00:00:00\\] +\\[right\\]: \\[2011-01-01 00:00:00, 2011-01-02 00:00:00\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(a, b) + + +def test_assert_almost_equal_iterable_length_mismatch(): + msg = """Iterable are different + +Iterable length are different +\\[left\\]: 2 +\\[right\\]: 3""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal([1, 2], [3, 4, 5]) + + +def test_assert_almost_equal_iterable_values_mismatch(): + msg = """Iterable are different + +Iterable values are different \\(50\\.0 %\\) +\\[left\\]: \\[1, 2\\] +\\[right\\]: \\[1, 3\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal([1, 2], [1, 3]) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_attr_equal.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_attr_equal.py new file mode 100644 index 0000000..115ef58 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_attr_equal.py @@ -0,0 +1,33 @@ +from types import SimpleNamespace + +import pytest + +from pandas.core.dtypes.common import is_float + +import pandas._testing as tm + + +def test_assert_attr_equal(nulls_fixture): + obj = SimpleNamespace() + obj.na_value = nulls_fixture + assert tm.assert_attr_equal("na_value", obj, obj) + + +def test_assert_attr_equal_different_nulls(nulls_fixture, nulls_fixture2): + obj = SimpleNamespace() + obj.na_value = nulls_fixture + + obj2 = SimpleNamespace() + obj2.na_value = nulls_fixture2 + + if nulls_fixture is nulls_fixture2: + assert tm.assert_attr_equal("na_value", obj, obj2) + elif is_float(nulls_fixture) and is_float(nulls_fixture2): + # we consider float("nan") and np.float64("nan") to be equivalent + assert tm.assert_attr_equal("na_value", obj, obj2) + elif type(nulls_fixture) is type(nulls_fixture2): + # e.g. Decimal("NaN") + assert tm.assert_attr_equal("na_value", obj, obj2) + else: + with pytest.raises(AssertionError, match='"na_value" are different'): + tm.assert_attr_equal("na_value", obj, obj2) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_categorical_equal.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_categorical_equal.py new file mode 100644 index 0000000..29a0805 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_categorical_equal.py @@ -0,0 +1,90 @@ +import pytest + +from pandas import Categorical +import pandas._testing as tm + + +@pytest.mark.parametrize( + "c", + [Categorical([1, 2, 3, 4]), Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4, 5])], +) +def test_categorical_equal(c): + tm.assert_categorical_equal(c, c) + + +@pytest.mark.parametrize("check_category_order", [True, False]) +def test_categorical_equal_order_mismatch(check_category_order): + c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) + c2 = Categorical([1, 2, 3, 4], categories=[4, 3, 2, 1]) + kwargs = {"check_category_order": check_category_order} + + if check_category_order: + msg = """Categorical\\.categories are different + +Categorical\\.categories values are different \\(100\\.0 %\\) +\\[left\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: Int64Index\\(\\[4, 3, 2, 1\\], dtype='int64'\\)""" + with pytest.raises(AssertionError, match=msg): + tm.assert_categorical_equal(c1, c2, **kwargs) + else: + tm.assert_categorical_equal(c1, c2, **kwargs) + + +def test_categorical_equal_categories_mismatch(): + msg = """Categorical\\.categories are different + +Categorical\\.categories values are different \\(25\\.0 %\\) +\\[left\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: Int64Index\\(\\[1, 2, 3, 5\\], dtype='int64'\\)""" + + c1 = Categorical([1, 2, 3, 4]) + c2 = Categorical([1, 2, 3, 5]) + + with pytest.raises(AssertionError, match=msg): + tm.assert_categorical_equal(c1, c2) + + +def test_categorical_equal_codes_mismatch(): + categories = [1, 2, 3, 4] + msg = """Categorical\\.codes are different + +Categorical\\.codes values are different \\(50\\.0 %\\) +\\[left\\]: \\[0, 1, 3, 2\\] +\\[right\\]: \\[0, 1, 2, 3\\]""" + + c1 = Categorical([1, 2, 4, 3], categories=categories) + c2 = Categorical([1, 2, 3, 4], categories=categories) + + with pytest.raises(AssertionError, match=msg): + tm.assert_categorical_equal(c1, c2) + + +def test_categorical_equal_ordered_mismatch(): + data = [1, 2, 3, 4] + msg = """Categorical are different + +Attribute "ordered" are different +\\[left\\]: False +\\[right\\]: True""" + + c1 = Categorical(data, ordered=False) + c2 = Categorical(data, ordered=True) + + with pytest.raises(AssertionError, match=msg): + tm.assert_categorical_equal(c1, c2) + + +@pytest.mark.parametrize("obj", ["index", "foo", "pandas"]) +def test_categorical_equal_object_override(obj): + data = [1, 2, 3, 4] + msg = f"""{obj} are different + +Attribute "ordered" are different +\\[left\\]: False +\\[right\\]: True""" + + c1 = Categorical(data, ordered=False) + c2 = Categorical(data, ordered=True) + + with pytest.raises(AssertionError, match=msg): + tm.assert_categorical_equal(c1, c2, obj=obj) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_extension_array_equal.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_extension_array_equal.py new file mode 100644 index 0000000..545f0dc --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_extension_array_equal.py @@ -0,0 +1,113 @@ +import numpy as np +import pytest + +from pandas import array +import pandas._testing as tm +from pandas.core.arrays.sparse import SparseArray + + +@pytest.mark.parametrize( + "kwargs", + [ + {}, # Default is check_exact=False + {"check_exact": False}, + {"check_exact": True}, + ], +) +def test_assert_extension_array_equal_not_exact(kwargs): + # see gh-23709 + arr1 = SparseArray([-0.17387645482451206, 0.3414148016424936]) + arr2 = SparseArray([-0.17387645482451206, 0.3414148016424937]) + + if kwargs.get("check_exact", False): + msg = """\ +ExtensionArray are different + +ExtensionArray values are different \\(50\\.0 %\\) +\\[left\\]: \\[-0\\.17387645482.*, 0\\.341414801642.*\\] +\\[right\\]: \\[-0\\.17387645482.*, 0\\.341414801642.*\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_extension_array_equal(arr1, arr2, **kwargs) + else: + tm.assert_extension_array_equal(arr1, arr2, **kwargs) + + +@pytest.mark.parametrize("decimals", range(10)) +def test_assert_extension_array_equal_less_precise(decimals): + rtol = 0.5 * 10 ** -decimals + arr1 = SparseArray([0.5, 0.123456]) + arr2 = SparseArray([0.5, 0.123457]) + + if decimals >= 5: + msg = """\ +ExtensionArray are different + +ExtensionArray values are different \\(50\\.0 %\\) +\\[left\\]: \\[0\\.5, 0\\.123456\\] +\\[right\\]: \\[0\\.5, 0\\.123457\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_extension_array_equal(arr1, arr2, rtol=rtol) + else: + tm.assert_extension_array_equal(arr1, arr2, rtol=rtol) + + +def test_assert_extension_array_equal_dtype_mismatch(check_dtype): + end = 5 + kwargs = {"check_dtype": check_dtype} + + arr1 = SparseArray(np.arange(end, dtype="int64")) + arr2 = SparseArray(np.arange(end, dtype="int32")) + + if check_dtype: + msg = """\ +ExtensionArray are different + +Attribute "dtype" are different +\\[left\\]: Sparse\\[int64, 0\\] +\\[right\\]: Sparse\\[int32, 0\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_extension_array_equal(arr1, arr2, **kwargs) + else: + tm.assert_extension_array_equal(arr1, arr2, **kwargs) + + +def test_assert_extension_array_equal_missing_values(): + arr1 = SparseArray([np.nan, 1, 2, np.nan]) + arr2 = SparseArray([np.nan, 1, 2, 3]) + + msg = """\ +ExtensionArray NA mask are different + +ExtensionArray NA mask values are different \\(25\\.0 %\\) +\\[left\\]: \\[True, False, False, True\\] +\\[right\\]: \\[True, False, False, False\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_extension_array_equal(arr1, arr2) + + +@pytest.mark.parametrize("side", ["left", "right"]) +def test_assert_extension_array_equal_non_extension_array(side): + numpy_array = np.arange(5) + extension_array = SparseArray(numpy_array) + + msg = f"{side} is not an ExtensionArray" + args = ( + (numpy_array, extension_array) + if side == "left" + else (extension_array, numpy_array) + ) + + with pytest.raises(AssertionError, match=msg): + tm.assert_extension_array_equal(*args) + + +@pytest.mark.parametrize("right_dtype", ["Int32", "int64"]) +def test_assert_extension_array_equal_ignore_dtype_mismatch(right_dtype): + # https://github.com/pandas-dev/pandas/issues/35715 + left = array([1, 2, 3], dtype="Int64") + right = array([1, 2, 3], dtype=right_dtype) + tm.assert_extension_array_equal(left, right, check_dtype=False) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_frame_equal.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_frame_equal.py new file mode 100644 index 0000000..6ff1a1c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_frame_equal.py @@ -0,0 +1,344 @@ +import pytest + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm + + +@pytest.fixture(params=[True, False]) +def by_blocks_fixture(request): + return request.param + + +@pytest.fixture(params=["DataFrame", "Series"]) +def obj_fixture(request): + return request.param + + +def _assert_frame_equal_both(a, b, **kwargs): + """ + Check that two DataFrame equal. + + This check is performed commutatively. + + Parameters + ---------- + a : DataFrame + The first DataFrame to compare. + b : DataFrame + The second DataFrame to compare. + kwargs : dict + The arguments passed to `tm.assert_frame_equal`. + """ + tm.assert_frame_equal(a, b, **kwargs) + tm.assert_frame_equal(b, a, **kwargs) + + +def _assert_not_frame_equal(a, b, **kwargs): + """ + Check that two DataFrame are not equal. + + Parameters + ---------- + a : DataFrame + The first DataFrame to compare. + b : DataFrame + The second DataFrame to compare. + kwargs : dict + The arguments passed to `tm.assert_frame_equal`. + """ + msg = "The two DataFrames were equal when they shouldn't have been" + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(a, b, **kwargs) + + +def _assert_not_frame_equal_both(a, b, **kwargs): + """ + Check that two DataFrame are not equal. + + This check is performed commutatively. + + Parameters + ---------- + a : DataFrame + The first DataFrame to compare. + b : DataFrame + The second DataFrame to compare. + kwargs : dict + The arguments passed to `tm.assert_frame_equal`. + """ + _assert_not_frame_equal(a, b, **kwargs) + _assert_not_frame_equal(b, a, **kwargs) + + +@pytest.mark.parametrize("check_like", [True, False]) +def test_frame_equal_row_order_mismatch(check_like, obj_fixture): + df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"]) + df2 = DataFrame({"A": [3, 2, 1], "B": [6, 5, 4]}, index=["c", "b", "a"]) + + if not check_like: # Do not ignore row-column orderings. + msg = f"{obj_fixture}.index are different" + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, check_like=check_like, obj=obj_fixture) + else: + _assert_frame_equal_both(df1, df2, check_like=check_like, obj=obj_fixture) + + +@pytest.mark.parametrize( + "df1,df2", + [ + (DataFrame({"A": [1, 2, 3]}), DataFrame({"A": [1, 2, 3, 4]})), + (DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), DataFrame({"A": [1, 2, 3]})), + ], +) +def test_frame_equal_shape_mismatch(df1, df2, obj_fixture): + msg = f"{obj_fixture} are different" + + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, obj=obj_fixture) + + +@pytest.mark.parametrize( + "df1,df2,msg", + [ + # Index + ( + DataFrame.from_records({"a": [1, 2], "c": ["l1", "l2"]}, index=["a"]), + DataFrame.from_records({"a": [1.0, 2.0], "c": ["l1", "l2"]}, index=["a"]), + "DataFrame\\.index are different", + ), + # MultiIndex + ( + DataFrame.from_records( + {"a": [1, 2], "b": [2.1, 1.5], "c": ["l1", "l2"]}, index=["a", "b"] + ), + DataFrame.from_records( + {"a": [1.0, 2.0], "b": [2.1, 1.5], "c": ["l1", "l2"]}, index=["a", "b"] + ), + "MultiIndex level \\[0\\] are different", + ), + ], +) +def test_frame_equal_index_dtype_mismatch(df1, df2, msg, check_index_type): + kwargs = {"check_index_type": check_index_type} + + if check_index_type: + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, **kwargs) + else: + tm.assert_frame_equal(df1, df2, **kwargs) + + +def test_empty_dtypes(check_dtype): + columns = ["col1", "col2"] + df1 = DataFrame(columns=columns) + df2 = DataFrame(columns=columns) + + kwargs = {"check_dtype": check_dtype} + df1["col1"] = df1["col1"].astype("int64") + + if check_dtype: + msg = r"Attributes of DataFrame\..* are different" + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, **kwargs) + else: + tm.assert_frame_equal(df1, df2, **kwargs) + + +@pytest.mark.parametrize("check_like", [True, False]) +def test_frame_equal_index_mismatch(check_like, obj_fixture): + msg = f"""{obj_fixture}\\.index are different + +{obj_fixture}\\.index values are different \\(33\\.33333 %\\) +\\[left\\]: Index\\(\\['a', 'b', 'c'\\], dtype='object'\\) +\\[right\\]: Index\\(\\['a', 'b', 'd'\\], dtype='object'\\)""" + + df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"]) + df2 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "d"]) + + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, check_like=check_like, obj=obj_fixture) + + +@pytest.mark.parametrize("check_like", [True, False]) +def test_frame_equal_columns_mismatch(check_like, obj_fixture): + msg = f"""{obj_fixture}\\.columns are different + +{obj_fixture}\\.columns values are different \\(50\\.0 %\\) +\\[left\\]: Index\\(\\['A', 'B'\\], dtype='object'\\) +\\[right\\]: Index\\(\\['A', 'b'\\], dtype='object'\\)""" + + df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"]) + df2 = DataFrame({"A": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"]) + + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, check_like=check_like, obj=obj_fixture) + + +def test_frame_equal_block_mismatch(by_blocks_fixture, obj_fixture): + obj = obj_fixture + msg = f"""{obj}\\.iloc\\[:, 1\\] \\(column name="B"\\) are different + +{obj}\\.iloc\\[:, 1\\] \\(column name="B"\\) values are different \\(33\\.33333 %\\) +\\[index\\]: \\[0, 1, 2\\] +\\[left\\]: \\[4, 5, 6\\] +\\[right\\]: \\[4, 5, 7\\]""" + + df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df2 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 7]}) + + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, by_blocks=by_blocks_fixture, obj=obj_fixture) + + +@pytest.mark.parametrize( + "df1,df2,msg", + [ + ( + DataFrame({"A": ["á", "à", "ä"], "E": ["é", "è", "ë"]}), + DataFrame({"A": ["á", "à", "ä"], "E": ["é", "è", "e̊"]}), + """{obj}\\.iloc\\[:, 1\\] \\(column name="E"\\) are different + +{obj}\\.iloc\\[:, 1\\] \\(column name="E"\\) values are different \\(33\\.33333 %\\) +\\[index\\]: \\[0, 1, 2\\] +\\[left\\]: \\[é, è, ë\\] +\\[right\\]: \\[é, è, e̊\\]""", + ), + ( + DataFrame({"A": ["á", "à", "ä"], "E": ["é", "è", "ë"]}), + DataFrame({"A": ["a", "a", "a"], "E": ["e", "e", "e"]}), + """{obj}\\.iloc\\[:, 0\\] \\(column name="A"\\) are different + +{obj}\\.iloc\\[:, 0\\] \\(column name="A"\\) values are different \\(100\\.0 %\\) +\\[index\\]: \\[0, 1, 2\\] +\\[left\\]: \\[á, à, ä\\] +\\[right\\]: \\[a, a, a\\]""", + ), + ], +) +def test_frame_equal_unicode(df1, df2, msg, by_blocks_fixture, obj_fixture): + # see gh-20503 + # + # Test ensures that `tm.assert_frame_equals` raises the right exception + # when comparing DataFrames containing differing unicode objects. + msg = msg.format(obj=obj_fixture) + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, by_blocks=by_blocks_fixture, obj=obj_fixture) + + +def test_assert_frame_equal_extension_dtype_mismatch(): + # https://github.com/pandas-dev/pandas/issues/32747 + left = DataFrame({"a": [1, 2, 3]}, dtype="Int64") + right = left.astype(int) + + msg = ( + "Attributes of DataFrame\\.iloc\\[:, 0\\] " + '\\(column name="a"\\) are different\n\n' + 'Attribute "dtype" are different\n' + "\\[left\\]: Int64\n" + "\\[right\\]: int[32|64]" + ) + + tm.assert_frame_equal(left, right, check_dtype=False) + + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(left, right, check_dtype=True) + + +def test_assert_frame_equal_interval_dtype_mismatch(): + # https://github.com/pandas-dev/pandas/issues/32747 + left = DataFrame({"a": [pd.Interval(0, 1)]}, dtype="interval") + right = left.astype(object) + + msg = ( + "Attributes of DataFrame\\.iloc\\[:, 0\\] " + '\\(column name="a"\\) are different\n\n' + 'Attribute "dtype" are different\n' + "\\[left\\]: interval\\[int64, right\\]\n" + "\\[right\\]: object" + ) + + tm.assert_frame_equal(left, right, check_dtype=False) + + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(left, right, check_dtype=True) + + +@pytest.mark.parametrize("right_dtype", ["Int32", "int64"]) +def test_assert_frame_equal_ignore_extension_dtype_mismatch(right_dtype): + # https://github.com/pandas-dev/pandas/issues/35715 + left = DataFrame({"a": [1, 2, 3]}, dtype="Int64") + right = DataFrame({"a": [1, 2, 3]}, dtype=right_dtype) + tm.assert_frame_equal(left, right, check_dtype=False) + + +@pytest.mark.parametrize( + "dtype", + [ + ("timedelta64[ns]"), + ("datetime64[ns, UTC]"), + ("Period[D]"), + ], +) +def test_assert_frame_equal_datetime_like_dtype_mismatch(dtype): + df1 = DataFrame({"a": []}, dtype=dtype) + df2 = DataFrame({"a": []}) + tm.assert_frame_equal(df1, df2, check_dtype=False) + + +def test_allows_duplicate_labels(): + left = DataFrame() + right = DataFrame().set_flags(allows_duplicate_labels=False) + tm.assert_frame_equal(left, left) + tm.assert_frame_equal(right, right) + tm.assert_frame_equal(left, right, check_flags=False) + tm.assert_frame_equal(right, left, check_flags=False) + + with pytest.raises(AssertionError, match="\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(a, b) + + +def test_numpy_array_equal_identical_na(nulls_fixture): + a = np.array([nulls_fixture], dtype=object) + + tm.assert_numpy_array_equal(a, a) + + # matching but not the identical object + if hasattr(nulls_fixture, "copy"): + other = nulls_fixture.copy() + else: + other = copy.copy(nulls_fixture) + b = np.array([other], dtype=object) + tm.assert_numpy_array_equal(a, b) + + +def test_numpy_array_equal_different_na(): + a = np.array([np.nan], dtype=object) + b = np.array([pd.NA], dtype=object) + + msg = """numpy array are different + +numpy array values are different \\(100.0 %\\) +\\[left\\]: \\[nan\\] +\\[right\\]: \\[\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(a, b) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_produces_warning.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_produces_warning.py new file mode 100644 index 0000000..e3eb083 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_produces_warning.py @@ -0,0 +1,204 @@ +"""" +Test module for testing ``pandas._testing.assert_produces_warning``. +""" +import warnings + +import pytest + +from pandas.errors import ( + DtypeWarning, + PerformanceWarning, +) + +import pandas._testing as tm + + +@pytest.fixture( + params=[ + RuntimeWarning, + ResourceWarning, + UserWarning, + FutureWarning, + DeprecationWarning, + PerformanceWarning, + DtypeWarning, + ], +) +def category(request): + """ + Return unique warning. + + Useful for testing behavior of tm.assert_produces_warning with various categories. + """ + return request.param + + +@pytest.fixture( + params=[ + (RuntimeWarning, UserWarning), + (UserWarning, FutureWarning), + (FutureWarning, RuntimeWarning), + (DeprecationWarning, PerformanceWarning), + (PerformanceWarning, FutureWarning), + (DtypeWarning, DeprecationWarning), + (ResourceWarning, DeprecationWarning), + (FutureWarning, DeprecationWarning), + ], + ids=lambda x: type(x).__name__, +) +def pair_different_warnings(request): + """ + Return pair or different warnings. + + Useful for testing how several different warnings are handled + in tm.assert_produces_warning. + """ + return request.param + + +def f(): + warnings.warn("f1", FutureWarning) + warnings.warn("f2", RuntimeWarning) + + +@pytest.mark.filterwarnings("ignore:f1:FutureWarning") +def test_assert_produces_warning_honors_filter(): + # Raise by default. + msg = r"Caused unexpected warning\(s\)" + with pytest.raises(AssertionError, match=msg): + with tm.assert_produces_warning(RuntimeWarning): + f() + + with tm.assert_produces_warning(RuntimeWarning, raise_on_extra_warnings=False): + f() + + +@pytest.mark.parametrize( + "message, match", + [ + ("", None), + ("", ""), + ("Warning message", r".*"), + ("Warning message", "War"), + ("Warning message", r"[Ww]arning"), + ("Warning message", "age"), + ("Warning message", r"age$"), + ("Message 12-234 with numbers", r"\d{2}-\d{3}"), + ("Message 12-234 with numbers", r"^Mes.*\d{2}-\d{3}"), + ("Message 12-234 with numbers", r"\d{2}-\d{3}\s\S+"), + ("Message, which we do not match", None), + ], +) +def test_catch_warning_category_and_match(category, message, match): + with tm.assert_produces_warning(category, match=match): + warnings.warn(message, category) + + +def test_fail_to_match_runtime_warning(): + category = RuntimeWarning + match = "Did not see this warning" + unmatched = ( + r"Did not see warning 'RuntimeWarning' matching 'Did not see this warning'. " + r"The emitted warning messages are " + r"\[RuntimeWarning\('This is not a match.'\), " + r"RuntimeWarning\('Another unmatched warning.'\)\]" + ) + with pytest.raises(AssertionError, match=unmatched): + with tm.assert_produces_warning(category, match=match): + warnings.warn("This is not a match.", category) + warnings.warn("Another unmatched warning.", category) + + +def test_fail_to_match_future_warning(): + category = FutureWarning + match = "Warning" + unmatched = ( + r"Did not see warning 'FutureWarning' matching 'Warning'. " + r"The emitted warning messages are " + r"\[FutureWarning\('This is not a match.'\), " + r"FutureWarning\('Another unmatched warning.'\)\]" + ) + with pytest.raises(AssertionError, match=unmatched): + with tm.assert_produces_warning(category, match=match): + warnings.warn("This is not a match.", category) + warnings.warn("Another unmatched warning.", category) + + +def test_fail_to_match_resource_warning(): + category = ResourceWarning + match = r"\d+" + unmatched = ( + r"Did not see warning 'ResourceWarning' matching '\\d\+'. " + r"The emitted warning messages are " + r"\[ResourceWarning\('This is not a match.'\), " + r"ResourceWarning\('Another unmatched warning.'\)\]" + ) + with pytest.raises(AssertionError, match=unmatched): + with tm.assert_produces_warning(category, match=match): + warnings.warn("This is not a match.", category) + warnings.warn("Another unmatched warning.", category) + + +def test_fail_to_catch_actual_warning(pair_different_warnings): + expected_category, actual_category = pair_different_warnings + match = "Did not see expected warning of class" + with pytest.raises(AssertionError, match=match): + with tm.assert_produces_warning(expected_category): + warnings.warn("warning message", actual_category) + + +def test_ignore_extra_warning(pair_different_warnings): + expected_category, extra_category = pair_different_warnings + with tm.assert_produces_warning(expected_category, raise_on_extra_warnings=False): + warnings.warn("Expected warning", expected_category) + warnings.warn("Unexpected warning OK", extra_category) + + +def test_raise_on_extra_warning(pair_different_warnings): + expected_category, extra_category = pair_different_warnings + match = r"Caused unexpected warning\(s\)" + with pytest.raises(AssertionError, match=match): + with tm.assert_produces_warning(expected_category): + warnings.warn("Expected warning", expected_category) + warnings.warn("Unexpected warning NOT OK", extra_category) + + +def test_same_category_different_messages_first_match(): + category = UserWarning + with tm.assert_produces_warning(category, match=r"^Match this"): + warnings.warn("Match this", category) + warnings.warn("Do not match that", category) + warnings.warn("Do not match that either", category) + + +def test_same_category_different_messages_last_match(): + category = DeprecationWarning + with tm.assert_produces_warning(category, match=r"^Match this"): + warnings.warn("Do not match that", category) + warnings.warn("Do not match that either", category) + warnings.warn("Match this", category) + + +def test_right_category_wrong_match_raises(pair_different_warnings): + target_category, other_category = pair_different_warnings + with pytest.raises(AssertionError, match="Did not see warning.*matching"): + with tm.assert_produces_warning(target_category, match=r"^Match this"): + warnings.warn("Do not match it", target_category) + warnings.warn("Match this", other_category) + + +@pytest.mark.parametrize("false_or_none", [False, None]) +class TestFalseOrNoneExpectedWarning: + def test_raise_on_warning(self, false_or_none): + msg = r"Caused unexpected warning\(s\)" + with pytest.raises(AssertionError, match=msg): + with tm.assert_produces_warning(false_or_none): + f() + + def test_no_raise_without_warning(self, false_or_none): + with tm.assert_produces_warning(false_or_none): + pass + + def test_no_raise_with_false_raise_on_extra(self, false_or_none): + with tm.assert_produces_warning(false_or_none, raise_on_extra_warnings=False): + f() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_series_equal.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_series_equal.py new file mode 100644 index 0000000..150e7e8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_assert_series_equal.py @@ -0,0 +1,353 @@ +import pytest + +from pandas.core.dtypes.common import is_extension_array_dtype + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Series, +) +import pandas._testing as tm + + +def _assert_series_equal_both(a, b, **kwargs): + """ + Check that two Series equal. + + This check is performed commutatively. + + Parameters + ---------- + a : Series + The first Series to compare. + b : Series + The second Series to compare. + kwargs : dict + The arguments passed to `tm.assert_series_equal`. + """ + tm.assert_series_equal(a, b, **kwargs) + tm.assert_series_equal(b, a, **kwargs) + + +def _assert_not_series_equal(a, b, **kwargs): + """ + Check that two Series are not equal. + + Parameters + ---------- + a : Series + The first Series to compare. + b : Series + The second Series to compare. + kwargs : dict + The arguments passed to `tm.assert_series_equal`. + """ + try: + tm.assert_series_equal(a, b, **kwargs) + msg = "The two Series were equal when they shouldn't have been" + + pytest.fail(msg=msg) + except AssertionError: + pass + + +def _assert_not_series_equal_both(a, b, **kwargs): + """ + Check that two Series are not equal. + + This check is performed commutatively. + + Parameters + ---------- + a : Series + The first Series to compare. + b : Series + The second Series to compare. + kwargs : dict + The arguments passed to `tm.assert_series_equal`. + """ + _assert_not_series_equal(a, b, **kwargs) + _assert_not_series_equal(b, a, **kwargs) + + +@pytest.mark.parametrize("data", [range(3), list("abc"), list("áàä")]) +def test_series_equal(data): + _assert_series_equal_both(Series(data), Series(data)) + + +@pytest.mark.parametrize( + "data1,data2", + [ + (range(3), range(1, 4)), + (list("abc"), list("xyz")), + (list("áàä"), list("éèë")), + (list("áàä"), list(b"aaa")), + (range(3), range(4)), + ], +) +def test_series_not_equal_value_mismatch(data1, data2): + _assert_not_series_equal_both(Series(data1), Series(data2)) + + +@pytest.mark.parametrize( + "kwargs", + [ + {"dtype": "float64"}, # dtype mismatch + {"index": [1, 2, 4]}, # index mismatch + {"name": "foo"}, # name mismatch + ], +) +def test_series_not_equal_metadata_mismatch(kwargs): + data = range(3) + s1 = Series(data) + + s2 = Series(data, **kwargs) + _assert_not_series_equal_both(s1, s2) + + +@pytest.mark.parametrize("data1,data2", [(0.12345, 0.12346), (0.1235, 0.1236)]) +@pytest.mark.parametrize("dtype", ["float32", "float64", "Float32"]) +@pytest.mark.parametrize("decimals", [0, 1, 2, 3, 5, 10]) +def test_less_precise(data1, data2, dtype, decimals): + rtol = 10 ** -decimals + s1 = Series([data1], dtype=dtype) + s2 = Series([data2], dtype=dtype) + + if (decimals == 5 or decimals == 10) or ( + decimals >= 3 and abs(data1 - data2) >= 0.0005 + ): + if is_extension_array_dtype(dtype): + msg = "ExtensionArray are different" + else: + msg = "Series values are different" + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(s1, s2, rtol=rtol) + else: + _assert_series_equal_both(s1, s2, rtol=rtol) + + +@pytest.mark.parametrize( + "s1,s2,msg", + [ + # Index + ( + Series(["l1", "l2"], index=[1, 2]), + Series(["l1", "l2"], index=[1.0, 2.0]), + "Series\\.index are different", + ), + # MultiIndex + ( + DataFrame.from_records( + {"a": [1, 2], "b": [2.1, 1.5], "c": ["l1", "l2"]}, index=["a", "b"] + ).c, + DataFrame.from_records( + {"a": [1.0, 2.0], "b": [2.1, 1.5], "c": ["l1", "l2"]}, index=["a", "b"] + ).c, + "MultiIndex level \\[0\\] are different", + ), + ], +) +def test_series_equal_index_dtype(s1, s2, msg, check_index_type): + kwargs = {"check_index_type": check_index_type} + + if check_index_type: + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(s1, s2, **kwargs) + else: + tm.assert_series_equal(s1, s2, **kwargs) + + +def test_series_equal_length_mismatch(rtol): + msg = """Series are different + +Series length are different +\\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) +\\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" + + s1 = Series([1, 2, 3]) + s2 = Series([1, 2, 3, 4]) + + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(s1, s2, rtol=rtol) + + +def test_series_equal_numeric_values_mismatch(rtol): + msg = """Series are different + +Series values are different \\(33\\.33333 %\\) +\\[index\\]: \\[0, 1, 2\\] +\\[left\\]: \\[1, 2, 3\\] +\\[right\\]: \\[1, 2, 4\\]""" + + s1 = Series([1, 2, 3]) + s2 = Series([1, 2, 4]) + + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(s1, s2, rtol=rtol) + + +def test_series_equal_categorical_values_mismatch(rtol): + msg = """Series are different + +Series values are different \\(66\\.66667 %\\) +\\[index\\]: \\[0, 1, 2\\] +\\[left\\]: \\['a', 'b', 'c'\\] +Categories \\(3, object\\): \\['a', 'b', 'c'\\] +\\[right\\]: \\['a', 'c', 'b'\\] +Categories \\(3, object\\): \\['a', 'b', 'c'\\]""" + + s1 = Series(Categorical(["a", "b", "c"])) + s2 = Series(Categorical(["a", "c", "b"])) + + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(s1, s2, rtol=rtol) + + +def test_series_equal_datetime_values_mismatch(rtol): + msg = """numpy array are different + +numpy array values are different \\(100.0 %\\) +\\[index\\]: \\[0, 1, 2\\] +\\[left\\]: \\[1514764800000000000, 1514851200000000000, 1514937600000000000\\] +\\[right\\]: \\[1549065600000000000, 1549152000000000000, 1549238400000000000\\]""" + + s1 = Series(pd.date_range("2018-01-01", periods=3, freq="D")) + s2 = Series(pd.date_range("2019-02-02", periods=3, freq="D")) + + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(s1, s2, rtol=rtol) + + +def test_series_equal_categorical_mismatch(check_categorical): + msg = """Attributes of Series are different + +Attribute "dtype" are different +\\[left\\]: CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False\\) +\\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], \ +ordered=False\\)""" + + s1 = Series(Categorical(["a", "b"])) + s2 = Series(Categorical(["a", "b"], categories=list("abc"))) + + if check_categorical: + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(s1, s2, check_categorical=check_categorical) + else: + _assert_series_equal_both(s1, s2, check_categorical=check_categorical) + + +def test_assert_series_equal_extension_dtype_mismatch(): + # https://github.com/pandas-dev/pandas/issues/32747 + left = Series(pd.array([1, 2, 3], dtype="Int64")) + right = left.astype(int) + + msg = """Attributes of Series are different + +Attribute "dtype" are different +\\[left\\]: Int64 +\\[right\\]: int[32|64]""" + + tm.assert_series_equal(left, right, check_dtype=False) + + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(left, right, check_dtype=True) + + +def test_assert_series_equal_interval_dtype_mismatch(): + # https://github.com/pandas-dev/pandas/issues/32747 + left = Series([pd.Interval(0, 1)], dtype="interval") + right = left.astype(object) + + msg = """Attributes of Series are different + +Attribute "dtype" are different +\\[left\\]: interval\\[int64, right\\] +\\[right\\]: object""" + + tm.assert_series_equal(left, right, check_dtype=False) + + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(left, right, check_dtype=True) + + +def test_series_equal_series_type(): + class MySeries(Series): + pass + + s1 = Series([1, 2]) + s2 = Series([1, 2]) + s3 = MySeries([1, 2]) + + tm.assert_series_equal(s1, s2, check_series_type=False) + tm.assert_series_equal(s1, s2, check_series_type=True) + + tm.assert_series_equal(s1, s3, check_series_type=False) + tm.assert_series_equal(s3, s1, check_series_type=False) + + with pytest.raises(AssertionError, match="Series classes are different"): + tm.assert_series_equal(s1, s3, check_series_type=True) + + with pytest.raises(AssertionError, match="Series classes are different"): + tm.assert_series_equal(s3, s1, check_series_type=True) + + +def test_series_equal_exact_for_nonnumeric(): + # https://github.com/pandas-dev/pandas/issues/35446 + s1 = Series(["a", "b"]) + s2 = Series(["a", "b"]) + s3 = Series(["b", "a"]) + + tm.assert_series_equal(s1, s2, check_exact=True) + tm.assert_series_equal(s2, s1, check_exact=True) + + msg = """Series are different + +Series values are different \\(100\\.0 %\\) +\\[index\\]: \\[0, 1\\] +\\[left\\]: \\[a, b\\] +\\[right\\]: \\[b, a\\]""" + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(s1, s3, check_exact=True) + + msg = """Series are different + +Series values are different \\(100\\.0 %\\) +\\[index\\]: \\[0, 1\\] +\\[left\\]: \\[b, a\\] +\\[right\\]: \\[a, b\\]""" + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(s3, s1, check_exact=True) + + +@pytest.mark.parametrize("right_dtype", ["Int32", "int64"]) +def test_assert_series_equal_ignore_extension_dtype_mismatch(right_dtype): + # https://github.com/pandas-dev/pandas/issues/35715 + left = Series([1, 2, 3], dtype="Int64") + right = Series([1, 2, 3], dtype=right_dtype) + tm.assert_series_equal(left, right, check_dtype=False) + + +def test_allows_duplicate_labels(): + left = Series([1]) + right = Series([1]).set_flags(allows_duplicate_labels=False) + tm.assert_series_equal(left, left) + tm.assert_series_equal(right, right) + tm.assert_series_equal(left, right, check_flags=False) + tm.assert_series_equal(right, left, check_flags=False) + + with pytest.raises(AssertionError, match=">> cumavg([1, 2, 3]) + 2 + """ + ), + method="cumavg", + operation="average", +) +def cumavg(whatever): + pass + + +@doc(cumsum, method="cummax", operation="maximum") +def cummax(whatever): + pass + + +@doc(cummax, method="cummin", operation="minimum") +def cummin(whatever): + pass + + +def test_docstring_formatting(): + docstr = dedent( + """ + This is the cumsum method. + + It computes the cumulative sum. + """ + ) + assert cumsum.__doc__ == docstr + + +def test_docstring_appending(): + docstr = dedent( + """ + This is the cumavg method. + + It computes the cumulative average. + + Examples + -------- + + >>> cumavg([1, 2, 3]) + 2 + """ + ) + assert cumavg.__doc__ == docstr + + +def test_doc_template_from_func(): + docstr = dedent( + """ + This is the cummax method. + + It computes the cumulative maximum. + """ + ) + assert cummax.__doc__ == docstr + + +def test_inherit_doc_template(): + docstr = dedent( + """ + This is the cummin method. + + It computes the cumulative minimum. + """ + ) + assert cummin.__doc__ == docstr diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_hashing.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_hashing.py new file mode 100644 index 0000000..6eee756 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_hashing.py @@ -0,0 +1,392 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm +from pandas.core.util.hashing import hash_tuples +from pandas.util import ( + hash_array, + hash_pandas_object, +) + + +@pytest.fixture( + params=[ + Series([1, 2, 3] * 3, dtype="int32"), + Series([None, 2.5, 3.5] * 3, dtype="float32"), + Series(["a", "b", "c"] * 3, dtype="category"), + Series(["d", "e", "f"] * 3), + Series([True, False, True] * 3), + Series(pd.date_range("20130101", periods=9)), + Series(pd.date_range("20130101", periods=9, tz="US/Eastern")), + Series(pd.timedelta_range("2000", periods=9)), + ] +) +def series(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def index(request): + return request.param + + +def test_consistency(): + # Check that our hash doesn't change because of a mistake + # in the actual code; this is the ground truth. + result = hash_pandas_object(Index(["foo", "bar", "baz"])) + expected = Series( + np.array( + [3600424527151052760, 1374399572096150070, 477881037637427054], + dtype="uint64", + ), + index=["foo", "bar", "baz"], + ) + tm.assert_series_equal(result, expected) + + +def test_hash_array(series): + arr = series.values + tm.assert_numpy_array_equal(hash_array(arr), hash_array(arr)) + + +@pytest.mark.parametrize("dtype", ["U", object]) +def test_hash_array_mixed(dtype): + result1 = hash_array(np.array(["3", "4", "All"])) + result2 = hash_array(np.array([3, 4, "All"], dtype=dtype)) + + tm.assert_numpy_array_equal(result1, result2) + + +@pytest.mark.parametrize("val", [5, "foo", pd.Timestamp("20130101")]) +def test_hash_array_errors(val): + msg = "must pass a ndarray-like" + with pytest.raises(TypeError, match=msg): + hash_array(val) + + +def test_hash_tuples(): + tuples = [(1, "one"), (1, "two"), (2, "one")] + result = hash_tuples(tuples) + + expected = hash_pandas_object(MultiIndex.from_tuples(tuples)).values + tm.assert_numpy_array_equal(result, expected) + + # We only need to support MultiIndex and list-of-tuples + msg = "|".join(["object is not iterable", "zip argument #1 must support iteration"]) + with pytest.raises(TypeError, match=msg): + hash_tuples(tuples[0]) + + +@pytest.mark.parametrize("val", [5, "foo", pd.Timestamp("20130101")]) +def test_hash_tuples_err(val): + msg = "must be convertible to a list-of-tuples" + with pytest.raises(TypeError, match=msg): + hash_tuples(val) + + +def test_multiindex_unique(): + mi = MultiIndex.from_tuples([(118, 472), (236, 118), (51, 204), (102, 51)]) + assert mi.is_unique is True + + result = hash_pandas_object(mi) + assert result.is_unique is True + + +def test_multiindex_objects(): + mi = MultiIndex( + levels=[["b", "d", "a"], [1, 2, 3]], + codes=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=["col1", "col2"], + ) + recons = mi._sort_levels_monotonic() + + # These are equal. + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + +@pytest.mark.parametrize( + "obj", + [ + Series([1, 2, 3]), + Series([1.0, 1.5, 3.2]), + Series([1.0, 1.5, np.nan]), + Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]), + Series(["a", "b", "c"]), + Series(["a", np.nan, "c"]), + Series(["a", None, "c"]), + Series([True, False, True]), + Series(dtype=object), + DataFrame({"x": ["a", "b", "c"], "y": [1, 2, 3]}), + DataFrame(), + tm.makeMissingDataframe(), + tm.makeMixedDataFrame(), + tm.makeTimeDataFrame(), + tm.makeTimeSeries(), + Series(tm.makePeriodIndex()), + Series(pd.date_range("20130101", periods=3, tz="US/Eastern")), + ], +) +def test_hash_pandas_object(obj, index): + a = hash_pandas_object(obj, index=index) + b = hash_pandas_object(obj, index=index) + tm.assert_series_equal(a, b) + + +@pytest.mark.parametrize( + "obj", + [ + Series([1, 2, 3]), + Series([1.0, 1.5, 3.2]), + Series([1.0, 1.5, np.nan]), + Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]), + Series(["a", "b", "c"]), + Series(["a", np.nan, "c"]), + Series(["a", None, "c"]), + Series([True, False, True]), + DataFrame({"x": ["a", "b", "c"], "y": [1, 2, 3]}), + tm.makeMissingDataframe(), + tm.makeMixedDataFrame(), + tm.makeTimeDataFrame(), + tm.makeTimeSeries(), + Series(tm.makePeriodIndex()), + Series(pd.date_range("20130101", periods=3, tz="US/Eastern")), + ], +) +def test_hash_pandas_object_diff_index_non_empty(obj): + a = hash_pandas_object(obj, index=True) + b = hash_pandas_object(obj, index=False) + assert not (a == b).all() + + +@pytest.mark.parametrize( + "obj", + [ + Index([1, 2, 3]), + Index([True, False, True]), + tm.makeTimedeltaIndex(), + tm.makePeriodIndex(), + MultiIndex.from_product( + [range(5), ["foo", "bar", "baz"], pd.date_range("20130101", periods=2)] + ), + MultiIndex.from_product([pd.CategoricalIndex(list("aabc")), range(3)]), + ], +) +def test_hash_pandas_index(obj, index): + a = hash_pandas_object(obj, index=index) + b = hash_pandas_object(obj, index=index) + tm.assert_series_equal(a, b) + + +def test_hash_pandas_series(series, index): + a = hash_pandas_object(series, index=index) + b = hash_pandas_object(series, index=index) + tm.assert_series_equal(a, b) + + +def test_hash_pandas_series_diff_index(series): + a = hash_pandas_object(series, index=True) + b = hash_pandas_object(series, index=False) + assert not (a == b).all() + + +@pytest.mark.parametrize( + "obj", [Series([], dtype="float64"), Series([], dtype="object"), Index([])] +) +def test_hash_pandas_empty_object(obj, index): + # These are by-definition the same with + # or without the index as the data is empty. + a = hash_pandas_object(obj, index=index) + b = hash_pandas_object(obj, index=index) + tm.assert_series_equal(a, b) + + +@pytest.mark.parametrize( + "s1", + [ + Series(["a", "b", "c", "d"]), + Series([1000, 2000, 3000, 4000]), + Series(pd.date_range(0, periods=4)), + ], +) +@pytest.mark.parametrize("categorize", [True, False]) +def test_categorical_consistency(s1, categorize): + # see gh-15143 + # + # Check that categoricals hash consistent with their values, + # not codes. This should work for categoricals of any dtype. + s2 = s1.astype("category").cat.set_categories(s1) + s3 = s2.cat.set_categories(list(reversed(s1))) + + # These should all hash identically. + h1 = hash_pandas_object(s1, categorize=categorize) + h2 = hash_pandas_object(s2, categorize=categorize) + h3 = hash_pandas_object(s3, categorize=categorize) + + tm.assert_series_equal(h1, h2) + tm.assert_series_equal(h1, h3) + + +def test_categorical_with_nan_consistency(): + c = pd.Categorical.from_codes( + [-1, 0, 1, 2, 3, 4], categories=pd.date_range("2012-01-01", periods=5, name="B") + ) + expected = hash_array(c, categorize=False) + + c = pd.Categorical.from_codes([-1, 0], categories=[pd.Timestamp("2012-01-01")]) + result = hash_array(c, categorize=False) + + assert result[0] in expected + assert result[1] in expected + + +def test_pandas_errors(): + msg = "Unexpected type for hashing" + with pytest.raises(TypeError, match=msg): + hash_pandas_object(pd.Timestamp("20130101")) + + +def test_hash_keys(): + # Using different hash keys, should have + # different hashes for the same data. + # + # This only matters for object dtypes. + obj = Series(list("abc")) + + a = hash_pandas_object(obj, hash_key="9876543210123456") + b = hash_pandas_object(obj, hash_key="9876543210123465") + + assert (a != b).all() + + +def test_df_hash_keys(): + # DataFrame version of the test_hash_keys. + # https://github.com/pandas-dev/pandas/issues/41404 + obj = DataFrame({"x": np.arange(3), "y": list("abc")}) + + a = hash_pandas_object(obj, hash_key="9876543210123456") + b = hash_pandas_object(obj, hash_key="9876543210123465") + + assert (a != b).all() + + +def test_df_encoding(): + # Check that DataFrame recognizes optional encoding. + # https://github.com/pandas-dev/pandas/issues/41404 + # https://github.com/pandas-dev/pandas/pull/42049 + obj = DataFrame({"x": np.arange(3), "y": list("a+c")}) + + a = hash_pandas_object(obj, encoding="utf8") + b = hash_pandas_object(obj, encoding="utf7") + + # Note that the "+" is encoded as "+-" in utf-7. + assert a[0] == b[0] + assert a[1] != b[1] + assert a[2] == b[2] + + +def test_invalid_key(): + # This only matters for object dtypes. + msg = "key should be a 16-byte string encoded" + + with pytest.raises(ValueError, match=msg): + hash_pandas_object(Series(list("abc")), hash_key="foo") + + +def test_already_encoded(index): + # If already encoded, then ok. + obj = Series(list("abc")).str.encode("utf8") + a = hash_pandas_object(obj, index=index) + b = hash_pandas_object(obj, index=index) + tm.assert_series_equal(a, b) + + +def test_alternate_encoding(index): + obj = Series(list("abc")) + a = hash_pandas_object(obj, index=index) + b = hash_pandas_object(obj, index=index) + tm.assert_series_equal(a, b) + + +@pytest.mark.parametrize("l_exp", range(8)) +@pytest.mark.parametrize("l_add", [0, 1]) +def test_same_len_hash_collisions(l_exp, l_add): + length = 2 ** (l_exp + 8) + l_add + s = tm.rands_array(length, 2) + + result = hash_array(s, "utf8") + assert not result[0] == result[1] + + +def test_hash_collisions(): + # Hash collisions are bad. + # + # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726 + hashes = [ + "Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9", # noqa: E501 + "Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe", # noqa: E501 + ] + + # These should be different. + result1 = hash_array(np.asarray(hashes[0:1], dtype=object), "utf8") + expected1 = np.array([14963968704024874985], dtype=np.uint64) + tm.assert_numpy_array_equal(result1, expected1) + + result2 = hash_array(np.asarray(hashes[1:2], dtype=object), "utf8") + expected2 = np.array([16428432627716348016], dtype=np.uint64) + tm.assert_numpy_array_equal(result2, expected2) + + result = hash_array(np.asarray(hashes, dtype=object), "utf8") + tm.assert_numpy_array_equal(result, np.concatenate([expected1, expected2], axis=0)) + + +@pytest.mark.parametrize( + "data, result_data", + [ + [[tuple("1"), tuple("2")], [10345501319357378243, 8331063931016360761]], + [[(1,), (2,)], [9408946347443669104, 3278256261030523334]], + ], +) +def test_hash_with_tuple(data, result_data): + # GH#28969 array containing a tuple raises on call to arr.astype(str) + # apparently a numpy bug github.com/numpy/numpy/issues/9441 + + df = DataFrame({"data": data}) + result = hash_pandas_object(df) + expected = Series(result_data, dtype=np.uint64) + tm.assert_series_equal(result, expected) + + +def test_hashable_tuple_args(): + # require that the elements of such tuples are themselves hashable + + df3 = DataFrame( + { + "data": [ + ( + 1, + [], + ), + ( + 2, + {}, + ), + ] + } + ) + with pytest.raises(TypeError, match="unhashable type: 'list'"): + hash_pandas_object(df3) + + +def test_hash_object_none_key(): + # https://github.com/pandas-dev/pandas/issues/30887 + result = pd.util.hash_pandas_object(Series(["a", "b"]), hash_key=None) + expected = Series([4578374827886788867, 17338122309987883691], dtype="uint64") + tm.assert_series_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_numba.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_numba.py new file mode 100644 index 0000000..27b68ff --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_numba.py @@ -0,0 +1,12 @@ +import pytest + +import pandas.util._test_decorators as td + +from pandas import option_context + + +@td.skip_if_installed("numba") +def test_numba_not_installed_option_context(): + with pytest.raises(ImportError, match="Missing optional"): + with option_context("compute.use_numba", True): + pass diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_safe_import.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_safe_import.py new file mode 100644 index 0000000..bd07bea --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_safe_import.py @@ -0,0 +1,39 @@ +import sys +import types + +import pytest + +import pandas.util._test_decorators as td + + +@pytest.mark.parametrize("name", ["foo", "hello123"]) +def test_safe_import_non_existent(name): + assert not td.safe_import(name) + + +def test_safe_import_exists(): + assert td.safe_import("pandas") + + +@pytest.mark.parametrize("min_version,valid", [("0.0.0", True), ("99.99.99", False)]) +def test_safe_import_versions(min_version, valid): + result = td.safe_import("pandas", min_version=min_version) + result = result if valid else not result + assert result + + +@pytest.mark.parametrize( + "min_version,valid", [(None, False), ("1.0", True), ("2.0", False)] +) +def test_safe_import_dummy(monkeypatch, min_version, valid): + mod_name = "hello123" + + mod = types.ModuleType(mod_name) + mod.__version__ = "1.5" + + if min_version is not None: + monkeypatch.setitem(sys.modules, mod_name, mod) + + result = td.safe_import(mod_name, min_version=min_version) + result = result if valid else not result + assert result diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_shares_memory.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_shares_memory.py new file mode 100644 index 0000000..ed8227a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_shares_memory.py @@ -0,0 +1,13 @@ +import pandas as pd +import pandas._testing as tm + + +def test_shares_memory_interval(): + obj = pd.interval_range(1, 5) + + assert tm.shares_memory(obj, obj) + assert tm.shares_memory(obj, obj._data) + assert tm.shares_memory(obj, obj[::-1]) + assert tm.shares_memory(obj, obj[:2]) + + assert not tm.shares_memory(obj, obj._data.copy()) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_show_versions.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_show_versions.py new file mode 100644 index 0000000..7a13630 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_show_versions.py @@ -0,0 +1,101 @@ +import json +import os +import re + +import pytest + +from pandas.util._print_versions import ( + _get_dependency_info, + _get_sys_info, +) + +import pandas as pd + + +@pytest.mark.filterwarnings( + # openpyxl + "ignore:defusedxml.lxml is no longer supported:DeprecationWarning" +) +@pytest.mark.filterwarnings( + # html5lib + "ignore:Using or importing the ABCs from:DeprecationWarning" +) +@pytest.mark.filterwarnings( + # fastparquet + "ignore:pandas.core.index is deprecated:FutureWarning" +) +@pytest.mark.filterwarnings( + # pandas_datareader + "ignore:pandas.util.testing is deprecated:FutureWarning" +) +@pytest.mark.filterwarnings( + # https://github.com/pandas-dev/pandas/issues/35252 + "ignore:Distutils:UserWarning" +) +def test_show_versions(tmpdir): + # GH39701 + as_json = os.path.join(tmpdir, "test_output.json") + + pd.show_versions(as_json=as_json) + + with open(as_json) as fd: + # check if file output is valid JSON, will raise an exception if not + result = json.load(fd) + + # Basic check that each version element is found in output + expected = { + "system": _get_sys_info(), + "dependencies": _get_dependency_info(), + } + + assert result == expected + + +def test_show_versions_console_json(capsys): + # GH39701 + pd.show_versions(as_json=True) + stdout = capsys.readouterr().out + + # check valid json is printed to the console if as_json is True + result = json.loads(stdout) + + # Basic check that each version element is found in output + expected = { + "system": _get_sys_info(), + "dependencies": _get_dependency_info(), + } + + assert result == expected + + +def test_show_versions_console(capsys): + # gh-32041 + # gh-32041 + pd.show_versions(as_json=False) + result = capsys.readouterr().out + + # check header + assert "INSTALLED VERSIONS" in result + + # check full commit hash + assert re.search(r"commit\s*:\s[0-9a-f]{40}\n", result) + + # check required dependency + # 2020-12-09 npdev has "dirty" in the tag + assert re.search(r"numpy\s*:\s([0-9\.\+a-g\_]|dev)+(dirty)?\n", result) + + # check optional dependency + assert re.search(r"pyarrow\s*:\s([0-9\.]+|None)\n", result) + + +def test_json_output_match(capsys, tmpdir): + # GH39701 + pd.show_versions(as_json=True) + result_console = capsys.readouterr().out + + out_path = os.path.join(tmpdir, "test_json.json") + pd.show_versions(as_json=out_path) + with open(out_path) as out_fd: + result_file = out_fd.read() + + assert result_console == result_file diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_util.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_util.py new file mode 100644 index 0000000..d73a789 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_util.py @@ -0,0 +1,83 @@ +import os + +import pytest + +import pandas.compat as compat + +import pandas._testing as tm + + +def test_rands(): + r = tm.rands(10) + assert len(r) == 10 + + +def test_rands_array_1d(): + arr = tm.rands_array(5, size=10) + assert arr.shape == (10,) + assert len(arr[0]) == 5 + + +def test_rands_array_2d(): + arr = tm.rands_array(7, size=(10, 10)) + assert arr.shape == (10, 10) + assert len(arr[1, 1]) == 7 + + +def test_numpy_err_state_is_default(): + expected = {"over": "warn", "divide": "warn", "invalid": "warn", "under": "ignore"} + import numpy as np + + # The error state should be unchanged after that import. + assert np.geterr() == expected + + +def test_convert_rows_list_to_csv_str(): + rows_list = ["aaa", "bbb", "ccc"] + ret = tm.convert_rows_list_to_csv_str(rows_list) + + if compat.is_platform_windows(): + expected = "aaa\r\nbbb\r\nccc\r\n" + else: + expected = "aaa\nbbb\nccc\n" + + assert ret == expected + + +def test_create_temp_directory(): + with tm.ensure_clean_dir() as path: + assert os.path.exists(path) + assert os.path.isdir(path) + assert not os.path.exists(path) + + +@pytest.mark.parametrize("strict_data_files", [True, False]) +def test_datapath_missing(datapath): + with pytest.raises(ValueError, match="Could not find file"): + datapath("not_a_file") + + +def test_datapath(datapath): + args = ("io", "data", "csv", "iris.csv") + + result = datapath(*args) + expected = os.path.join(os.path.dirname(os.path.dirname(__file__)), *args) + + assert result == expected + + +def test_rng_context(): + import numpy as np + + expected0 = 1.764052345967664 + expected1 = 1.6243453636632417 + + with tm.RNGContext(0): + with tm.RNGContext(1): + assert np.random.randn() == expected1 + assert np.random.randn() == expected0 + + +def test_external_error_raised(): + with tm.external_error_raised(TypeError): + raise TypeError("Should not check this error message, so it will pass") diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_validate_args.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_validate_args.py new file mode 100644 index 0000000..db53248 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_validate_args.py @@ -0,0 +1,67 @@ +import pytest + +from pandas.util._validators import validate_args + +_fname = "func" + + +def test_bad_min_fname_arg_count(): + msg = "'max_fname_arg_count' must be non-negative" + + with pytest.raises(ValueError, match=msg): + validate_args(_fname, (None,), -1, "foo") + + +def test_bad_arg_length_max_value_single(): + args = (None, None) + compat_args = ("foo",) + + min_fname_arg_count = 0 + max_length = len(compat_args) + min_fname_arg_count + actual_length = len(args) + min_fname_arg_count + msg = ( + fr"{_fname}\(\) takes at most {max_length} " + fr"argument \({actual_length} given\)" + ) + + with pytest.raises(TypeError, match=msg): + validate_args(_fname, args, min_fname_arg_count, compat_args) + + +def test_bad_arg_length_max_value_multiple(): + args = (None, None) + compat_args = {"foo": None} + + min_fname_arg_count = 2 + max_length = len(compat_args) + min_fname_arg_count + actual_length = len(args) + min_fname_arg_count + msg = ( + fr"{_fname}\(\) takes at most {max_length} " + fr"arguments \({actual_length} given\)" + ) + + with pytest.raises(TypeError, match=msg): + validate_args(_fname, args, min_fname_arg_count, compat_args) + + +@pytest.mark.parametrize("i", range(1, 3)) +def test_not_all_defaults(i): + bad_arg = "foo" + msg = ( + f"the '{bad_arg}' parameter is not supported " + fr"in the pandas implementation of {_fname}\(\)" + ) + + compat_args = {"foo": 2, "bar": -1, "baz": 3} + arg_vals = (1, -1, 3) + + with pytest.raises(ValueError, match=msg): + validate_args(_fname, arg_vals[:i], 2, compat_args) + + +def test_validation(): + # No exceptions should be raised. + validate_args(_fname, (None,), 2, {"out": None}) + + compat_args = {"axis": 1, "out": None} + validate_args(_fname, (1, None), 2, compat_args) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_validate_args_and_kwargs.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_validate_args_and_kwargs.py new file mode 100644 index 0000000..941ba86 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_validate_args_and_kwargs.py @@ -0,0 +1,81 @@ +import pytest + +from pandas.util._validators import validate_args_and_kwargs + +_fname = "func" + + +def test_invalid_total_length_max_length_one(): + compat_args = ("foo",) + kwargs = {"foo": "FOO"} + args = ("FoO", "BaZ") + + min_fname_arg_count = 0 + max_length = len(compat_args) + min_fname_arg_count + actual_length = len(kwargs) + len(args) + min_fname_arg_count + + msg = ( + fr"{_fname}\(\) takes at most {max_length} " + fr"argument \({actual_length} given\)" + ) + + with pytest.raises(TypeError, match=msg): + validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args) + + +def test_invalid_total_length_max_length_multiple(): + compat_args = ("foo", "bar", "baz") + kwargs = {"foo": "FOO", "bar": "BAR"} + args = ("FoO", "BaZ") + + min_fname_arg_count = 2 + max_length = len(compat_args) + min_fname_arg_count + actual_length = len(kwargs) + len(args) + min_fname_arg_count + + msg = ( + fr"{_fname}\(\) takes at most {max_length} " + fr"arguments \({actual_length} given\)" + ) + + with pytest.raises(TypeError, match=msg): + validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args) + + +@pytest.mark.parametrize("args,kwargs", [((), {"foo": -5, "bar": 2}), ((-5, 2), {})]) +def test_missing_args_or_kwargs(args, kwargs): + bad_arg = "bar" + min_fname_arg_count = 2 + + compat_args = {"foo": -5, bad_arg: 1} + + msg = ( + fr"the '{bad_arg}' parameter is not supported " + fr"in the pandas implementation of {_fname}\(\)" + ) + + with pytest.raises(ValueError, match=msg): + validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args) + + +def test_duplicate_argument(): + min_fname_arg_count = 2 + + compat_args = {"foo": None, "bar": None, "baz": None} + kwargs = {"foo": None, "bar": None} + args = (None,) # duplicate value for "foo" + + msg = fr"{_fname}\(\) got multiple values for keyword argument 'foo'" + + with pytest.raises(TypeError, match=msg): + validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args) + + +def test_validation(): + # No exceptions should be raised. + compat_args = {"foo": 1, "bar": None, "baz": -2} + kwargs = {"baz": -2} + + args = (1, None) + min_fname_arg_count = 2 + + validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_validate_inclusive.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_validate_inclusive.py new file mode 100644 index 0000000..c1254c6 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_validate_inclusive.py @@ -0,0 +1,40 @@ +import numpy as np +import pytest + +from pandas.util._validators import validate_inclusive + +import pandas as pd + + +@pytest.mark.parametrize( + "invalid_inclusive", + ( + "ccc", + 2, + object(), + None, + np.nan, + pd.NA, + pd.DataFrame(), + ), +) +def test_invalid_inclusive(invalid_inclusive): + with pytest.raises( + ValueError, + match="Inclusive has to be either 'both', 'neither', 'left' or 'right'", + ): + validate_inclusive(invalid_inclusive) + + +@pytest.mark.parametrize( + "valid_inclusive, expected_tuple", + ( + ("left", (True, False)), + ("right", (False, True)), + ("both", (True, True)), + ("neither", (False, False)), + ), +) +def test_valid_inclusive(valid_inclusive, expected_tuple): + resultant_tuple = validate_inclusive(valid_inclusive) + assert expected_tuple == resultant_tuple diff --git a/.local/lib/python3.8/site-packages/pandas/tests/util/test_validate_kwargs.py b/.local/lib/python3.8/site-packages/pandas/tests/util/test_validate_kwargs.py new file mode 100644 index 0000000..0e271ef --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/util/test_validate_kwargs.py @@ -0,0 +1,66 @@ +import pytest + +from pandas.util._validators import ( + validate_bool_kwarg, + validate_kwargs, +) + +_fname = "func" + + +def test_bad_kwarg(): + good_arg = "f" + bad_arg = good_arg + "o" + + compat_args = {good_arg: "foo", bad_arg + "o": "bar"} + kwargs = {good_arg: "foo", bad_arg: "bar"} + + msg = fr"{_fname}\(\) got an unexpected keyword argument '{bad_arg}'" + + with pytest.raises(TypeError, match=msg): + validate_kwargs(_fname, kwargs, compat_args) + + +@pytest.mark.parametrize("i", range(1, 3)) +def test_not_all_none(i): + bad_arg = "foo" + msg = ( + fr"the '{bad_arg}' parameter is not supported " + fr"in the pandas implementation of {_fname}\(\)" + ) + + compat_args = {"foo": 1, "bar": "s", "baz": None} + + kwarg_keys = ("foo", "bar", "baz") + kwarg_vals = (2, "s", None) + + kwargs = dict(zip(kwarg_keys[:i], kwarg_vals[:i])) + + with pytest.raises(ValueError, match=msg): + validate_kwargs(_fname, kwargs, compat_args) + + +def test_validation(): + # No exceptions should be raised. + compat_args = {"f": None, "b": 1, "ba": "s"} + + kwargs = {"f": None, "b": 1} + validate_kwargs(_fname, kwargs, compat_args) + + +@pytest.mark.parametrize("name", ["inplace", "copy"]) +@pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) +def test_validate_bool_kwarg_fail(name, value): + msg = ( + f'For argument "{name}" expected type bool, ' + f"received type {type(value).__name__}" + ) + + with pytest.raises(ValueError, match=msg): + validate_bool_kwarg(value, name) + + +@pytest.mark.parametrize("name", ["inplace", "copy"]) +@pytest.mark.parametrize("value", [True, False, None]) +def test_validate_bool_kwarg(name, value): + assert validate_bool_kwarg(value, name) == value diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/window/__init__.py new file mode 100644 index 0000000..757bdfe --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/__init__.py @@ -0,0 +1,8 @@ +import pytest + +pytestmark = [ + # 2021-02-01 needed until numba updates their usage + pytest.mark.filterwarnings( + r"ignore:`np\.int` is a deprecated alias:DeprecationWarning" + ), +] diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..7f59f24 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..35c2f75 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_api.cpython-38.pyc new file mode 100644 index 0000000..9cc916f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_apply.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_apply.cpython-38.pyc new file mode 100644 index 0000000..6cd2c52 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_apply.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_base_indexer.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_base_indexer.cpython-38.pyc new file mode 100644 index 0000000..4d25f0a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_base_indexer.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_cython_aggregations.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_cython_aggregations.cpython-38.pyc new file mode 100644 index 0000000..61aacd0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_cython_aggregations.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_dtypes.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_dtypes.cpython-38.pyc new file mode 100644 index 0000000..7628290 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_dtypes.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_ewm.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_ewm.cpython-38.pyc new file mode 100644 index 0000000..7a04047 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_ewm.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_expanding.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_expanding.cpython-38.pyc new file mode 100644 index 0000000..adf6b90 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_expanding.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_groupby.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_groupby.cpython-38.pyc new file mode 100644 index 0000000..16b2f1a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_groupby.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_numba.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_numba.cpython-38.pyc new file mode 100644 index 0000000..90d5281 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_numba.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_online.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_online.cpython-38.pyc new file mode 100644 index 0000000..7e006b7 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_online.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_pairwise.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_pairwise.cpython-38.pyc new file mode 100644 index 0000000..48ada6b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_pairwise.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_rolling.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_rolling.cpython-38.pyc new file mode 100644 index 0000000..e1e423b Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_rolling.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_rolling_functions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_rolling_functions.cpython-38.pyc new file mode 100644 index 0000000..8a73f82 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_rolling_functions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_rolling_quantile.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_rolling_quantile.cpython-38.pyc new file mode 100644 index 0000000..cdf6f8a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_rolling_quantile.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_rolling_skew_kurt.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_rolling_skew_kurt.cpython-38.pyc new file mode 100644 index 0000000..1457adc Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_rolling_skew_kurt.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_timeseries_window.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_timeseries_window.cpython-38.pyc new file mode 100644 index 0000000..35140f0 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_timeseries_window.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_win_type.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_win_type.cpython-38.pyc new file mode 100644 index 0000000..5f33c54 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/__pycache__/test_win_type.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/window/conftest.py new file mode 100644 index 0000000..09233e3 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/conftest.py @@ -0,0 +1,261 @@ +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, + bdate_range, + to_datetime, +) + + +@pytest.fixture(params=[True, False]) +def raw(request): + """raw keyword argument for rolling.apply""" + return request.param + + +@pytest.fixture( + params=[ + "triang", + "blackman", + "hamming", + "bartlett", + "bohman", + "blackmanharris", + "nuttall", + "barthann", + ] +) +def win_types(request): + return request.param + + +@pytest.fixture(params=["kaiser", "gaussian", "general_gaussian", "exponential"]) +def win_types_special(request): + return request.param + + +@pytest.fixture( + params=[ + "sum", + "mean", + "median", + "max", + "min", + "var", + "std", + "kurt", + "skew", + "count", + "sem", + ] +) +def arithmetic_win_operators(request): + return request.param + + +@pytest.fixture( + params=[ + ["sum", {}], + ["mean", {}], + ["median", {}], + ["max", {}], + ["min", {}], + ["var", {}], + ["var", {"ddof": 0}], + ["std", {}], + ["std", {"ddof": 0}], + ] +) +def arithmetic_numba_supported_operators(request): + return request.param + + +@pytest.fixture(params=["right", "left", "both", "neither"]) +def closed(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def center(request): + return request.param + + +@pytest.fixture(params=[None, 1]) +def min_periods(request): + return request.param + + +@pytest.fixture(params=["single", "table"]) +def method(request): + """method keyword in rolling/expanding/ewm constructor""" + return request.param + + +@pytest.fixture(params=[True, False]) +def parallel(request): + """parallel keyword argument for numba.jit""" + return request.param + + +# Can parameterize nogil & nopython over True | False, but limiting per +# https://github.com/pandas-dev/pandas/pull/41971#issuecomment-860607472 + + +@pytest.fixture(params=[False]) +def nogil(request): + """nogil keyword argument for numba.jit""" + return request.param + + +@pytest.fixture(params=[True]) +def nopython(request): + """nopython keyword argument for numba.jit""" + return request.param + + +@pytest.fixture(params=[True, False]) +def adjust(request): + """adjust keyword argument for ewm""" + return request.param + + +@pytest.fixture(params=[True, False]) +def ignore_na(request): + """ignore_na keyword argument for ewm""" + return request.param + + +@pytest.fixture(params=[pytest.param("numba", marks=td.skip_if_no("numba")), "cython"]) +def engine(request): + """engine keyword argument for rolling.apply""" + return request.param + + +@pytest.fixture( + params=[ + pytest.param(("numba", True), marks=td.skip_if_no("numba")), + ("cython", True), + ("cython", False), + ] +) +def engine_and_raw(request): + """engine and raw keyword arguments for rolling.apply""" + return request.param + + +@pytest.fixture +def times_frame(): + """Frame for testing times argument in EWM groupby.""" + return DataFrame( + { + "A": ["a", "b", "c", "a", "b", "c", "a", "b", "c", "a"], + "B": [0, 0, 0, 1, 1, 1, 2, 2, 2, 3], + "C": to_datetime( + [ + "2020-01-01", + "2020-01-01", + "2020-01-01", + "2020-01-02", + "2020-01-10", + "2020-01-22", + "2020-01-03", + "2020-01-23", + "2020-01-23", + "2020-01-04", + ] + ), + } + ) + + +@pytest.fixture(params=["1 day", timedelta(days=1)]) +def halflife_with_times(request): + """Halflife argument for EWM when times is specified.""" + return request.param + + +@pytest.fixture( + params=[ + "object", + "category", + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float16", + "float32", + "float64", + "m8[ns]", + "M8[ns]", + "datetime64[ns, UTC]", + ] +) +def dtypes(request): + """Dtypes for window tests""" + return request.param + + +@pytest.fixture( + params=[ + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", "C"]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1.0, 0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0.0, 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", 1]), + DataFrame([[2.0, 4.0], [1.0, 2.0], [5.0, 2.0], [8.0, 1.0]], columns=[1, 0.0]), + DataFrame([[2, 4.0], [1, 2.0], [5, 2.0], [8, 1.0]], columns=[0, 1.0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.0]], columns=[1.0, "X"]), + ] +) +def pairwise_frames(request): + """Pairwise frames test_pairwise""" + return request.param + + +@pytest.fixture +def pairwise_target_frame(): + """Pairwise target frame for test_pairwise""" + return DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1]) + + +@pytest.fixture +def pairwise_other_frame(): + """Pairwise other frame for test_pairwise""" + return DataFrame( + [[None, 1, 1], [None, 1, 2], [None, 3, 2], [None, 8, 1]], + columns=["Y", "Z", "X"], + ) + + +@pytest.fixture +def series(): + """Make mocked series as fixture.""" + arr = np.random.randn(100) + locs = np.arange(20, 40) + arr[locs] = np.NaN + series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100)) + return series + + +@pytest.fixture +def frame(): + """Make mocked frame as fixture.""" + return DataFrame( + np.random.randn(100, 10), + index=bdate_range(datetime(2009, 1, 1), periods=100), + columns=np.arange(10), + ) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__init__.py b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..3d2f2c1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/conftest.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/conftest.cpython-38.pyc new file mode 100644 index 0000000..f3bdece Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/conftest.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/test_moments_consistency_ewm.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/test_moments_consistency_ewm.cpython-38.pyc new file mode 100644 index 0000000..327cff6 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/test_moments_consistency_ewm.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/test_moments_consistency_expanding.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/test_moments_consistency_expanding.cpython-38.pyc new file mode 100644 index 0000000..8fb04c1 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/test_moments_consistency_expanding.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/test_moments_consistency_rolling.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/test_moments_consistency_rolling.cpython-38.pyc new file mode 100644 index 0000000..44bcb98 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/__pycache__/test_moments_consistency_rolling.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/moments/conftest.py b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/conftest.py new file mode 100644 index 0000000..8f7c20f --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/conftest.py @@ -0,0 +1,79 @@ +import itertools + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, + notna, +) + + +def create_series(): + return [ + Series(dtype=np.float64, name="a"), + Series([np.nan] * 5), + Series([1.0] * 5), + Series(range(5, 0, -1)), + Series(range(5)), + Series([np.nan, 1.0, np.nan, 1.0, 1.0]), + Series([np.nan, 1.0, np.nan, 2.0, 3.0]), + Series([np.nan, 1.0, np.nan, 3.0, 2.0]), + ] + + +def create_dataframes(): + return [ + DataFrame(columns=["a", "a"]), + DataFrame(np.arange(15).reshape((5, 3)), columns=["a", "a", 99]), + ] + [DataFrame(s) for s in create_series()] + + +def is_constant(x): + values = x.values.ravel("K") + return len(set(values[notna(values)])) == 1 + + +@pytest.fixture( + params=( + obj + for obj in itertools.chain(create_series(), create_dataframes()) + if is_constant(obj) + ), + scope="module", +) +def consistent_data(request): + return request.param + + +@pytest.fixture(params=create_series()) +def series_data(request): + return request.param + + +@pytest.fixture(params=itertools.chain(create_series(), create_dataframes())) +def all_data(request): + """ + Test: + - Empty Series / DataFrame + - All NaN + - All consistent value + - Monotonically decreasing + - Monotonically increasing + - Monotonically consistent with NaNs + - Monotonically increasing with NaNs + - Monotonically decreasing with NaNs + """ + return request.param + + +@pytest.fixture(params=[(1, 0), (5, 1)]) +def rolling_consistency_cases(request): + """window, min_periods""" + return request.param + + +@pytest.fixture(params=[0, 2]) +def min_periods(request): + return request.param diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/moments/test_moments_consistency_ewm.py b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/test_moments_consistency_ewm.py new file mode 100644 index 0000000..f9f09bf --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -0,0 +1,248 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, + concat, +) +import pandas._testing as tm + + +def create_mock_weights(obj, com, adjust, ignore_na): + if isinstance(obj, DataFrame): + if not len(obj.columns): + return DataFrame(index=obj.index, columns=obj.columns) + w = concat( + [ + create_mock_series_weights( + obj.iloc[:, i], com=com, adjust=adjust, ignore_na=ignore_na + ) + for i in range(len(obj.columns)) + ], + axis=1, + ) + w.index = obj.index + w.columns = obj.columns + return w + else: + return create_mock_series_weights(obj, com, adjust, ignore_na) + + +def create_mock_series_weights(s, com, adjust, ignore_na): + w = Series(np.nan, index=s.index, name=s.name) + alpha = 1.0 / (1.0 + com) + if adjust: + count = 0 + for i in range(len(s)): + if s.iat[i] == s.iat[i]: + w.iat[i] = pow(1.0 / (1.0 - alpha), count) + count += 1 + elif not ignore_na: + count += 1 + else: + sum_wts = 0.0 + prev_i = -1 + count = 0 + for i in range(len(s)): + if s.iat[i] == s.iat[i]: + if prev_i == -1: + w.iat[i] = 1.0 + else: + w.iat[i] = alpha * sum_wts / pow(1.0 - alpha, count - prev_i) + sum_wts += w.iat[i] + prev_i = count + count += 1 + elif not ignore_na: + count += 1 + return w + + +def test_ewm_consistency_mean(all_data, adjust, ignore_na, min_periods): + com = 3.0 + + result = all_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean() + weights = create_mock_weights(all_data, com=com, adjust=adjust, ignore_na=ignore_na) + expected = ( + all_data.multiply(weights) + .cumsum() + .divide(weights.cumsum()) + .fillna(method="ffill") + ) + expected[ + all_data.expanding().count() < (max(min_periods, 1) if min_periods else 1) + ] = np.nan + tm.assert_equal(result, expected.astype("float64")) + + +def test_ewm_consistency_consistent(consistent_data, adjust, ignore_na, min_periods): + com = 3.0 + + count_x = consistent_data.expanding().count() + mean_x = consistent_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean() + # check that correlation of a series with itself is either 1 or NaN + corr_x_x = consistent_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).corr(consistent_data) + exp = ( + consistent_data.max() + if isinstance(consistent_data, Series) + else consistent_data.max().max() + ) + + # check mean of constant series + expected = consistent_data * np.nan + expected[count_x >= max(min_periods, 1)] = exp + tm.assert_equal(mean_x, expected) + + # check correlation of constant series with itself is NaN + expected[:] = np.nan + tm.assert_equal(corr_x_x, expected) + + +def test_ewm_consistency_var_debiasing_factors( + all_data, adjust, ignore_na, min_periods +): + com = 3.0 + + # check variance debiasing factors + var_unbiased_x = all_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=False) + var_biased_x = all_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=True) + + weights = create_mock_weights(all_data, com=com, adjust=adjust, ignore_na=ignore_na) + cum_sum = weights.cumsum().fillna(method="ffill") + cum_sum_sq = (weights * weights).cumsum().fillna(method="ffill") + numerator = cum_sum * cum_sum + denominator = numerator - cum_sum_sq + denominator[denominator <= 0.0] = np.nan + var_debiasing_factors_x = numerator / denominator + + tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) + + +@pytest.mark.parametrize("bias", [True, False]) +def test_moments_consistency_var(all_data, adjust, ignore_na, min_periods, bias): + com = 3.0 + + mean_x = all_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean() + var_x = all_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=bias) + assert not (var_x < 0).any().any() + + if bias: + # check that biased var(x) == mean(x^2) - mean(x)^2 + mean_x2 = ( + (all_data * all_data) + .ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na) + .mean() + ) + tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x)) + + +@pytest.mark.parametrize("bias", [True, False]) +def test_moments_consistency_var_constant( + consistent_data, adjust, ignore_na, min_periods, bias +): + com = 3.0 + count_x = consistent_data.expanding(min_periods=min_periods).count() + var_x = consistent_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=bias) + + # check that variance of constant series is identically 0 + assert not (var_x > 0).any().any() + expected = consistent_data * np.nan + expected[count_x >= max(min_periods, 1)] = 0.0 + if not bias: + expected[count_x < 2] = np.nan + tm.assert_equal(var_x, expected) + + +@pytest.mark.parametrize("bias", [True, False]) +def test_ewm_consistency_std(all_data, adjust, ignore_na, min_periods, bias): + com = 3.0 + var_x = all_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=bias) + assert not (var_x < 0).any().any() + + std_x = all_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).std(bias=bias) + assert not (std_x < 0).any().any() + + # check that var(x) == std(x)^2 + tm.assert_equal(var_x, std_x * std_x) + + cov_x_x = all_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).cov(all_data, bias=bias) + assert not (cov_x_x < 0).any().any() + + # check that var(x) == cov(x, x) + tm.assert_equal(var_x, cov_x_x) + + +@pytest.mark.parametrize("bias", [True, False]) +def test_ewm_consistency_series_cov_corr( + series_data, adjust, ignore_na, min_periods, bias +): + com = 3.0 + + var_x_plus_y = ( + (series_data + series_data) + .ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na) + .var(bias=bias) + ) + var_x = series_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=bias) + var_y = series_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=bias) + cov_x_y = series_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).cov(series_data, bias=bias) + # check that cov(x, y) == (var(x+y) - var(x) - + # var(y)) / 2 + tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) + + # check that corr(x, y) == cov(x, y) / (std(x) * + # std(y)) + corr_x_y = series_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).corr(series_data, bias=bias) + std_x = series_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).std(bias=bias) + std_y = series_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).std(bias=bias) + tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) + + if bias: + # check that biased cov(x, y) == mean(x*y) - + # mean(x)*mean(y) + mean_x = series_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean() + mean_y = series_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean() + mean_x_times_y = ( + (series_data * series_data) + .ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na) + .mean() + ) + tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/moments/test_moments_consistency_expanding.py b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/test_moments_consistency_expanding.py new file mode 100644 index 0000000..dafc60a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/test_moments_consistency_expanding.py @@ -0,0 +1,144 @@ +import numpy as np +import pytest + +from pandas import Series +import pandas._testing as tm + + +def no_nans(x): + return x.notna().all().all() + + +def all_na(x): + return x.isnull().all().all() + + +@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum]) +def test_expanding_apply_consistency_sum_nans(request, all_data, min_periods, f): + if f is np.sum: + if not no_nans(all_data) and not ( + all_na(all_data) and not all_data.empty and min_periods > 0 + ): + request.node.add_marker( + pytest.mark.xfail(reason="np.sum has different behavior with NaNs") + ) + expanding_f_result = all_data.expanding(min_periods=min_periods).sum() + expanding_apply_f_result = all_data.expanding(min_periods=min_periods).apply( + func=f, raw=True + ) + tm.assert_equal(expanding_f_result, expanding_apply_f_result) + + +@pytest.mark.parametrize("ddof", [0, 1]) +def test_moments_consistency_var(all_data, min_periods, ddof): + var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof) + assert not (var_x < 0).any().any() + + if ddof == 0: + # check that biased var(x) == mean(x^2) - mean(x)^2 + mean_x2 = (all_data * all_data).expanding(min_periods=min_periods).mean() + mean_x = all_data.expanding(min_periods=min_periods).mean() + tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x)) + + +@pytest.mark.parametrize("ddof", [0, 1]) +def test_moments_consistency_var_constant(consistent_data, min_periods, ddof): + count_x = consistent_data.expanding(min_periods=min_periods).count() + var_x = consistent_data.expanding(min_periods=min_periods).var(ddof=ddof) + + # check that variance of constant series is identically 0 + assert not (var_x > 0).any().any() + expected = consistent_data * np.nan + expected[count_x >= max(min_periods, 1)] = 0.0 + if ddof == 1: + expected[count_x < 2] = np.nan + tm.assert_equal(var_x, expected) + + +@pytest.mark.parametrize("ddof", [0, 1]) +def test_expanding_consistency_var_std_cov(all_data, min_periods, ddof): + var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof) + assert not (var_x < 0).any().any() + + std_x = all_data.expanding(min_periods=min_periods).std(ddof=ddof) + assert not (std_x < 0).any().any() + + # check that var(x) == std(x)^2 + tm.assert_equal(var_x, std_x * std_x) + + cov_x_x = all_data.expanding(min_periods=min_periods).cov(all_data, ddof=ddof) + assert not (cov_x_x < 0).any().any() + + # check that var(x) == cov(x, x) + tm.assert_equal(var_x, cov_x_x) + + +@pytest.mark.parametrize("ddof", [0, 1]) +def test_expanding_consistency_series_cov_corr(series_data, min_periods, ddof): + var_x_plus_y = ( + (series_data + series_data).expanding(min_periods=min_periods).var(ddof=ddof) + ) + var_x = series_data.expanding(min_periods=min_periods).var(ddof=ddof) + var_y = series_data.expanding(min_periods=min_periods).var(ddof=ddof) + cov_x_y = series_data.expanding(min_periods=min_periods).cov(series_data, ddof=ddof) + # check that cov(x, y) == (var(x+y) - var(x) - + # var(y)) / 2 + tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) + + # check that corr(x, y) == cov(x, y) / (std(x) * + # std(y)) + corr_x_y = series_data.expanding(min_periods=min_periods).corr(series_data) + std_x = series_data.expanding(min_periods=min_periods).std(ddof=ddof) + std_y = series_data.expanding(min_periods=min_periods).std(ddof=ddof) + tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) + + if ddof == 0: + # check that biased cov(x, y) == mean(x*y) - + # mean(x)*mean(y) + mean_x = series_data.expanding(min_periods=min_periods).mean() + mean_y = series_data.expanding(min_periods=min_periods).mean() + mean_x_times_y = ( + (series_data * series_data).expanding(min_periods=min_periods).mean() + ) + tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) + + +def test_expanding_consistency_mean(all_data, min_periods): + result = all_data.expanding(min_periods=min_periods).mean() + expected = ( + all_data.expanding(min_periods=min_periods).sum() + / all_data.expanding(min_periods=min_periods).count() + ) + tm.assert_equal(result, expected.astype("float64")) + + +def test_expanding_consistency_constant(consistent_data, min_periods): + count_x = consistent_data.expanding().count() + mean_x = consistent_data.expanding(min_periods=min_periods).mean() + # check that correlation of a series with itself is either 1 or NaN + corr_x_x = consistent_data.expanding(min_periods=min_periods).corr(consistent_data) + + exp = ( + consistent_data.max() + if isinstance(consistent_data, Series) + else consistent_data.max().max() + ) + + # check mean of constant series + expected = consistent_data * np.nan + expected[count_x >= max(min_periods, 1)] = exp + tm.assert_equal(mean_x, expected) + + # check correlation of constant series with itself is NaN + expected[:] = np.nan + tm.assert_equal(corr_x_x, expected) + + +def test_expanding_consistency_var_debiasing_factors(all_data, min_periods): + # check variance debiasing factors + var_unbiased_x = all_data.expanding(min_periods=min_periods).var() + var_biased_x = all_data.expanding(min_periods=min_periods).var(ddof=0) + var_debiasing_factors_x = all_data.expanding().count() / ( + all_data.expanding().count() - 1.0 + ).replace(0.0, np.nan) + tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/moments/test_moments_consistency_rolling.py b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/test_moments_consistency_rolling.py new file mode 100644 index 0000000..daca19b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/moments/test_moments_consistency_rolling.py @@ -0,0 +1,238 @@ +import numpy as np +import pytest + +from pandas import Series +import pandas._testing as tm + + +def no_nans(x): + return x.notna().all().all() + + +def all_na(x): + return x.isnull().all().all() + + +@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum]) +def test_rolling_apply_consistency_sum( + request, all_data, rolling_consistency_cases, center, f +): + window, min_periods = rolling_consistency_cases + + if f is np.sum: + if not no_nans(all_data) and not ( + all_na(all_data) and not all_data.empty and min_periods > 0 + ): + request.node.add_marker( + pytest.mark.xfail(reason="np.sum has different behavior with NaNs") + ) + rolling_f_result = all_data.rolling( + window=window, min_periods=min_periods, center=center + ).sum() + rolling_apply_f_result = all_data.rolling( + window=window, min_periods=min_periods, center=center + ).apply(func=f, raw=True) + tm.assert_equal(rolling_f_result, rolling_apply_f_result) + + +@pytest.mark.parametrize("ddof", [0, 1]) +def test_moments_consistency_var(all_data, rolling_consistency_cases, center, ddof): + window, min_periods = rolling_consistency_cases + + var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var( + ddof=ddof + ) + assert not (var_x < 0).any().any() + + if ddof == 0: + # check that biased var(x) == mean(x^2) - mean(x)^2 + mean_x = all_data.rolling( + window=window, min_periods=min_periods, center=center + ).mean() + mean_x2 = ( + (all_data * all_data) + .rolling(window=window, min_periods=min_periods, center=center) + .mean() + ) + tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x)) + + +@pytest.mark.parametrize("ddof", [0, 1]) +def test_moments_consistency_var_constant( + consistent_data, rolling_consistency_cases, center, ddof +): + window, min_periods = rolling_consistency_cases + + count_x = consistent_data.rolling( + window=window, min_periods=min_periods, center=center + ).count() + var_x = consistent_data.rolling( + window=window, min_periods=min_periods, center=center + ).var(ddof=ddof) + + # check that variance of constant series is identically 0 + assert not (var_x > 0).any().any() + expected = consistent_data * np.nan + expected[count_x >= max(min_periods, 1)] = 0.0 + if ddof == 1: + expected[count_x < 2] = np.nan + tm.assert_equal(var_x, expected) + + +@pytest.mark.parametrize("ddof", [0, 1]) +def test_rolling_consistency_var_std_cov( + all_data, rolling_consistency_cases, center, ddof +): + window, min_periods = rolling_consistency_cases + + var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var( + ddof=ddof + ) + assert not (var_x < 0).any().any() + + std_x = all_data.rolling(window=window, min_periods=min_periods, center=center).std( + ddof=ddof + ) + assert not (std_x < 0).any().any() + + # check that var(x) == std(x)^2 + tm.assert_equal(var_x, std_x * std_x) + + cov_x_x = all_data.rolling( + window=window, min_periods=min_periods, center=center + ).cov(all_data, ddof=ddof) + assert not (cov_x_x < 0).any().any() + + # check that var(x) == cov(x, x) + tm.assert_equal(var_x, cov_x_x) + + +@pytest.mark.parametrize("ddof", [0, 1]) +def test_rolling_consistency_series_cov_corr( + series_data, rolling_consistency_cases, center, ddof +): + window, min_periods = rolling_consistency_cases + + var_x_plus_y = ( + (series_data + series_data) + .rolling(window=window, min_periods=min_periods, center=center) + .var(ddof=ddof) + ) + var_x = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).var(ddof=ddof) + var_y = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).var(ddof=ddof) + cov_x_y = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).cov(series_data, ddof=ddof) + # check that cov(x, y) == (var(x+y) - var(x) - + # var(y)) / 2 + tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) + + # check that corr(x, y) == cov(x, y) / (std(x) * + # std(y)) + corr_x_y = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).corr(series_data) + std_x = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).std(ddof=ddof) + std_y = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).std(ddof=ddof) + tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) + + if ddof == 0: + # check that biased cov(x, y) == mean(x*y) - + # mean(x)*mean(y) + mean_x = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).mean() + mean_y = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).mean() + mean_x_times_y = ( + (series_data * series_data) + .rolling(window=window, min_periods=min_periods, center=center) + .mean() + ) + tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) + + +def test_rolling_consistency_mean(all_data, rolling_consistency_cases, center): + window, min_periods = rolling_consistency_cases + + result = all_data.rolling( + window=window, min_periods=min_periods, center=center + ).mean() + expected = ( + all_data.rolling(window=window, min_periods=min_periods, center=center) + .sum() + .divide( + all_data.rolling( + window=window, min_periods=min_periods, center=center + ).count() + ) + ) + tm.assert_equal(result, expected.astype("float64")) + + +def test_rolling_consistency_constant( + consistent_data, rolling_consistency_cases, center +): + window, min_periods = rolling_consistency_cases + + count_x = consistent_data.rolling( + window=window, min_periods=min_periods, center=center + ).count() + mean_x = consistent_data.rolling( + window=window, min_periods=min_periods, center=center + ).mean() + # check that correlation of a series with itself is either 1 or NaN + corr_x_x = consistent_data.rolling( + window=window, min_periods=min_periods, center=center + ).corr(consistent_data) + + exp = ( + consistent_data.max() + if isinstance(consistent_data, Series) + else consistent_data.max().max() + ) + + # check mean of constant series + expected = consistent_data * np.nan + expected[count_x >= max(min_periods, 1)] = exp + tm.assert_equal(mean_x, expected) + + # check correlation of constant series with itself is NaN + expected[:] = np.nan + tm.assert_equal(corr_x_x, expected) + + +def test_rolling_consistency_var_debiasing_factors( + all_data, rolling_consistency_cases, center +): + window, min_periods = rolling_consistency_cases + + # check variance debiasing factors + var_unbiased_x = all_data.rolling( + window=window, min_periods=min_periods, center=center + ).var() + var_biased_x = all_data.rolling( + window=window, min_periods=min_periods, center=center + ).var(ddof=0) + var_debiasing_factors_x = ( + all_data.rolling(window=window, min_periods=min_periods, center=center) + .count() + .divide( + ( + all_data.rolling( + window=window, min_periods=min_periods, center=center + ).count() + - 1.0 + ).replace(0.0, np.nan) + ) + ) + tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_api.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_api.py new file mode 100644 index 0000000..f84a579 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_api.py @@ -0,0 +1,390 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Period, + Series, + Timestamp, + concat, + date_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.base import SpecificationError + + +def test_getitem(): + frame = DataFrame(np.random.randn(5, 5)) + r = frame.rolling(window=5) + tm.assert_index_equal(r._selected_obj.columns, frame.columns) + + r = frame.rolling(window=5)[1] + assert r._selected_obj.name == frame.columns[1] + + # technically this is allowed + r = frame.rolling(window=5)[1, 3] + tm.assert_index_equal(r._selected_obj.columns, frame.columns[[1, 3]]) + + r = frame.rolling(window=5)[[1, 3]] + tm.assert_index_equal(r._selected_obj.columns, frame.columns[[1, 3]]) + + +def test_select_bad_cols(): + df = DataFrame([[1, 2]], columns=["A", "B"]) + g = df.rolling(window=5) + with pytest.raises(KeyError, match="Columns not found: 'C'"): + g[["C"]] + with pytest.raises(KeyError, match="^[^A]+$"): + # A should not be referenced as a bad column... + # will have to rethink regex if you change message! + g[["A", "C"]] + + +def test_attribute_access(): + + df = DataFrame([[1, 2]], columns=["A", "B"]) + r = df.rolling(window=5) + tm.assert_series_equal(r.A.sum(), r["A"].sum()) + msg = "'Rolling' object has no attribute 'F'" + with pytest.raises(AttributeError, match=msg): + r.F + + +def tests_skip_nuisance(): + + df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) + r = df.rolling(window=3) + result = r[["A", "B"]].sum() + expected = DataFrame( + {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, + columns=list("AB"), + ) + tm.assert_frame_equal(result, expected) + + +def test_skip_sum_object_raises(): + df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) + r = df.rolling(window=3) + msg = r"nuisance columns.*Dropped columns were Index\(\['C'\], dtype='object'\)" + with tm.assert_produces_warning(FutureWarning, match=msg): + # GH#42738 + result = r.sum() + expected = DataFrame( + {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, + columns=list("AB"), + ) + tm.assert_frame_equal(result, expected) + + +def test_agg(): + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + + r = df.rolling(window=3) + a_mean = r["A"].mean() + a_std = r["A"].std() + a_sum = r["A"].sum() + b_mean = r["B"].mean() + b_std = r["B"].std() + + result = r.aggregate([np.mean, np.std]) + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = MultiIndex.from_product([["A", "B"], ["mean", "std"]]) + tm.assert_frame_equal(result, expected) + + result = r.aggregate({"A": np.mean, "B": np.std}) + + expected = concat([a_mean, b_std], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) + + result = r.aggregate({"A": ["mean", "std"]}) + expected = concat([a_mean, a_std], axis=1) + expected.columns = MultiIndex.from_tuples([("A", "mean"), ("A", "std")]) + tm.assert_frame_equal(result, expected) + + result = r["A"].aggregate(["mean", "sum"]) + expected = concat([a_mean, a_sum], axis=1) + expected.columns = ["mean", "sum"] + tm.assert_frame_equal(result, expected) + + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + # using a dict with renaming + r.aggregate({"A": {"mean": "mean", "sum": "sum"}}) + + with pytest.raises(SpecificationError, match=msg): + r.aggregate( + {"A": {"mean": "mean", "sum": "sum"}, "B": {"mean2": "mean", "sum2": "sum"}} + ) + + result = r.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]}) + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) + + exp_cols = [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")] + expected.columns = MultiIndex.from_tuples(exp_cols) + tm.assert_frame_equal(result, expected, check_like=True) + + +def test_agg_apply(raw): + + # passed lambda + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + + r = df.rolling(window=3) + a_sum = r["A"].sum() + + result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)}) + rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw) + expected = concat([a_sum, rcustom], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) + + +def test_agg_consistency(): + + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + r = df.rolling(window=3) + + result = r.agg([np.sum, np.mean]).columns + expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]]) + tm.assert_index_equal(result, expected) + + result = r["A"].agg([np.sum, np.mean]).columns + expected = Index(["sum", "mean"]) + tm.assert_index_equal(result, expected) + + result = r.agg({"A": [np.sum, np.mean]}).columns + expected = MultiIndex.from_tuples([("A", "sum"), ("A", "mean")]) + tm.assert_index_equal(result, expected) + + +def test_agg_nested_dicts(): + + # API change for disallowing these types of nested dicts + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + r = df.rolling(window=3) + + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + r.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}}) + + expected = concat( + [r["A"].mean(), r["A"].std(), r["B"].mean(), r["B"].std()], axis=1 + ) + expected.columns = MultiIndex.from_tuples( + [("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")] + ) + with pytest.raises(SpecificationError, match=msg): + r[["A", "B"]].agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) + + with pytest.raises(SpecificationError, match=msg): + r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) + + +def test_count_nonnumeric_types(): + # GH12541 + cols = [ + "int", + "float", + "string", + "datetime", + "timedelta", + "periods", + "fl_inf", + "fl_nan", + "str_nan", + "dt_nat", + "periods_nat", + ] + dt_nat_col = [Timestamp("20170101"), Timestamp("20170203"), Timestamp(None)] + + df = DataFrame( + { + "int": [1, 2, 3], + "float": [4.0, 5.0, 6.0], + "string": list("abc"), + "datetime": date_range("20170101", periods=3), + "timedelta": timedelta_range("1 s", periods=3, freq="s"), + "periods": [ + Period("2012-01"), + Period("2012-02"), + Period("2012-03"), + ], + "fl_inf": [1.0, 2.0, np.Inf], + "fl_nan": [1.0, 2.0, np.NaN], + "str_nan": ["aa", "bb", np.NaN], + "dt_nat": dt_nat_col, + "periods_nat": [ + Period("2012-01"), + Period("2012-02"), + Period(None), + ], + }, + columns=cols, + ) + + expected = DataFrame( + { + "int": [1.0, 2.0, 2.0], + "float": [1.0, 2.0, 2.0], + "string": [1.0, 2.0, 2.0], + "datetime": [1.0, 2.0, 2.0], + "timedelta": [1.0, 2.0, 2.0], + "periods": [1.0, 2.0, 2.0], + "fl_inf": [1.0, 2.0, 2.0], + "fl_nan": [1.0, 2.0, 1.0], + "str_nan": [1.0, 2.0, 1.0], + "dt_nat": [1.0, 2.0, 1.0], + "periods_nat": [1.0, 2.0, 1.0], + }, + columns=cols, + ) + + result = df.rolling(window=2, min_periods=0).count() + tm.assert_frame_equal(result, expected) + + result = df.rolling(1, min_periods=0).count() + expected = df.notna().astype(float) + tm.assert_frame_equal(result, expected) + + +def test_preserve_metadata(): + # GH 10565 + s = Series(np.arange(100), name="foo") + + s2 = s.rolling(30).sum() + s3 = s.rolling(20).sum() + assert s2.name == "foo" + assert s3.name == "foo" + + +@pytest.mark.parametrize( + "func,window_size,expected_vals", + [ + ( + "rolling", + 2, + [ + [np.nan, np.nan, np.nan, np.nan], + [15.0, 20.0, 25.0, 20.0], + [25.0, 30.0, 35.0, 30.0], + [np.nan, np.nan, np.nan, np.nan], + [20.0, 30.0, 35.0, 30.0], + [35.0, 40.0, 60.0, 40.0], + [60.0, 80.0, 85.0, 80], + ], + ), + ( + "expanding", + None, + [ + [10.0, 10.0, 20.0, 20.0], + [15.0, 20.0, 25.0, 20.0], + [20.0, 30.0, 30.0, 20.0], + [10.0, 10.0, 30.0, 30.0], + [20.0, 30.0, 35.0, 30.0], + [26.666667, 40.0, 50.0, 30.0], + [40.0, 80.0, 60.0, 30.0], + ], + ), + ], +) +def test_multiple_agg_funcs(func, window_size, expected_vals): + # GH 15072 + df = DataFrame( + [ + ["A", 10, 20], + ["A", 20, 30], + ["A", 30, 40], + ["B", 10, 30], + ["B", 30, 40], + ["B", 40, 80], + ["B", 80, 90], + ], + columns=["stock", "low", "high"], + ) + + f = getattr(df.groupby("stock"), func) + if window_size: + window = f(window_size) + else: + window = f() + + index = MultiIndex.from_tuples( + [("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)], + names=["stock", None], + ) + columns = MultiIndex.from_tuples( + [("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")] + ) + expected = DataFrame(expected_vals, index=index, columns=columns) + + result = window.agg({"low": ["mean", "max"], "high": ["mean", "min"]}) + + tm.assert_frame_equal(result, expected) + + +def test_is_datetimelike_deprecated(): + s = Series(range(1)).rolling(1) + with tm.assert_produces_warning(FutureWarning): + assert not s.is_datetimelike + + +def test_validate_deprecated(): + s = Series(range(1)).rolling(1) + with tm.assert_produces_warning(FutureWarning): + assert s.validate() is None + + +@pytest.mark.filterwarnings("ignore:min_periods:FutureWarning") +def test_dont_modify_attributes_after_methods( + arithmetic_win_operators, closed, center, min_periods +): + # GH 39554 + roll_obj = Series(range(1)).rolling( + 1, center=center, closed=closed, min_periods=min_periods + ) + expected = {attr: getattr(roll_obj, attr) for attr in roll_obj._attributes} + getattr(roll_obj, arithmetic_win_operators)() + result = {attr: getattr(roll_obj, attr) for attr in roll_obj._attributes} + assert result == expected + + +def test_centered_axis_validation(): + + # ok + Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean() + + # bad axis + msg = "No axis named 1 for object type Series" + with pytest.raises(ValueError, match=msg): + Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean() + + # ok ok + DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=0).mean() + DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=1).mean() + + # bad axis + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + (DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=2).mean()) + + +def test_rolling_min_min_periods(): + a = Series([1, 2, 3, 4, 5]) + result = a.rolling(window=100, min_periods=1).min() + expected = Series(np.ones(len(a))) + tm.assert_series_equal(result, expected) + msg = "min_periods 5 must be <= window 3" + with pytest.raises(ValueError, match=msg): + Series([1, 2, 3]).rolling(window=3, min_periods=5).min() + + +def test_rolling_max_min_periods(): + a = Series([1, 2, 3, 4, 5], dtype=np.float64) + b = a.rolling(window=100, min_periods=1).max() + tm.assert_almost_equal(a, b) + msg = "min_periods 5 must be <= window 3" + with pytest.raises(ValueError, match=msg): + Series([1, 2, 3]).rolling(window=3, min_periods=5).max() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_apply.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_apply.py new file mode 100644 index 0000000..3139d4c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_apply.py @@ -0,0 +1,317 @@ +import warnings + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + concat, + date_range, + isna, + notna, +) +import pandas._testing as tm + +import pandas.tseries.offsets as offsets + + +def f(x): + # suppress warnings about empty slices, as we are deliberately testing + # with a 0-length Series + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=".*(empty slice|0 for slice).*", + category=RuntimeWarning, + ) + return x[np.isfinite(x)].mean() + + +@pytest.mark.parametrize("bad_raw", [None, 1, 0]) +def test_rolling_apply_invalid_raw(bad_raw): + with pytest.raises(ValueError, match="raw parameter must be `True` or `False`"): + Series(range(3)).rolling(1).apply(len, raw=bad_raw) + + +def test_rolling_apply_out_of_bounds(engine_and_raw): + # gh-1850 + engine, raw = engine_and_raw + + vals = Series([1, 2, 3, 4]) + + result = vals.rolling(10).apply(np.sum, engine=engine, raw=raw) + assert result.isna().all() + + result = vals.rolling(10, min_periods=1).apply(np.sum, engine=engine, raw=raw) + expected = Series([1, 3, 6, 10], dtype=float) + tm.assert_almost_equal(result, expected) + + +@pytest.mark.parametrize("window", [2, "2s"]) +def test_rolling_apply_with_pandas_objects(window): + # 5071 + df = DataFrame( + {"A": np.random.randn(5), "B": np.random.randint(0, 10, size=5)}, + index=date_range("20130101", periods=5, freq="s"), + ) + + # we have an equal spaced timeseries index + # so simulate removing the first period + def f(x): + if x.index[0] == df.index[0]: + return np.nan + return x.iloc[-1] + + result = df.rolling(window).apply(f, raw=False) + expected = df.iloc[2:].reindex_like(df) + tm.assert_frame_equal(result, expected) + + with tm.external_error_raised(AttributeError): + df.rolling(window).apply(f, raw=True) + + +def test_rolling_apply(engine_and_raw): + engine, raw = engine_and_raw + + expected = Series([], dtype="float64") + result = expected.rolling(10).apply(lambda x: x.mean(), engine=engine, raw=raw) + tm.assert_series_equal(result, expected) + + # gh-8080 + s = Series([None, None, None]) + result = s.rolling(2, min_periods=0).apply(lambda x: len(x), engine=engine, raw=raw) + expected = Series([1.0, 2.0, 2.0]) + tm.assert_series_equal(result, expected) + + result = s.rolling(2, min_periods=0).apply(len, engine=engine, raw=raw) + tm.assert_series_equal(result, expected) + + +def test_all_apply(engine_and_raw): + engine, raw = engine_and_raw + + df = ( + DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)} + ).set_index("A") + * 2 + ) + er = df.rolling(window=1) + r = df.rolling(window="1s") + + result = r.apply(lambda x: 1, engine=engine, raw=raw) + expected = er.apply(lambda x: 1, engine=engine, raw=raw) + tm.assert_frame_equal(result, expected) + + +def test_ragged_apply(engine_and_raw): + engine, raw = engine_and_raw + + df = DataFrame({"B": range(5)}) + df.index = [ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ] + + f = lambda x: 1 + result = df.rolling(window="1s", min_periods=1).apply(f, engine=engine, raw=raw) + expected = df.copy() + expected["B"] = 1.0 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).apply(f, engine=engine, raw=raw) + expected = df.copy() + expected["B"] = 1.0 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).apply(f, engine=engine, raw=raw) + expected = df.copy() + expected["B"] = 1.0 + tm.assert_frame_equal(result, expected) + + +def test_invalid_engine(): + with pytest.raises(ValueError, match="engine must be either 'numba' or 'cython'"): + Series(range(1)).rolling(1).apply(lambda x: x, engine="foo") + + +def test_invalid_engine_kwargs_cython(): + with pytest.raises(ValueError, match="cython engine does not accept engine_kwargs"): + Series(range(1)).rolling(1).apply( + lambda x: x, engine="cython", engine_kwargs={"nopython": False} + ) + + +def test_invalid_raw_numba(): + with pytest.raises( + ValueError, match="raw must be `True` when using the numba engine" + ): + Series(range(1)).rolling(1).apply(lambda x: x, raw=False, engine="numba") + + +@pytest.mark.parametrize("args_kwargs", [[None, {"par": 10}], [(10,), None]]) +def test_rolling_apply_args_kwargs(args_kwargs): + # GH 33433 + def foo(x, par): + return np.sum(x + par) + + df = DataFrame({"gr": [1, 1], "a": [1, 2]}) + + idx = Index(["gr", "a"]) + expected = DataFrame([[11.0, 11.0], [11.0, 12.0]], columns=idx) + + result = df.rolling(1).apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1]) + tm.assert_frame_equal(result, expected) + + midx = MultiIndex.from_tuples([(1, 0), (1, 1)], names=["gr", None]) + expected = Series([11.0, 12.0], index=midx, name="a") + + gb_rolling = df.groupby("gr")["a"].rolling(1) + + result = gb_rolling.apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1]) + tm.assert_series_equal(result, expected) + + +def test_nans(raw): + obj = Series(np.random.randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + + result = obj.rolling(50, min_periods=30).apply(f, raw=raw) + tm.assert_almost_equal(result.iloc[-1], np.mean(obj[10:-10])) + + # min_periods is working correctly + result = obj.rolling(20, min_periods=15).apply(f, raw=raw) + assert isna(result.iloc[23]) + assert not isna(result.iloc[24]) + + assert not isna(result.iloc[-6]) + assert isna(result.iloc[-5]) + + obj2 = Series(np.random.randn(20)) + result = obj2.rolling(10, min_periods=5).apply(f, raw=raw) + assert isna(result.iloc[3]) + assert notna(result.iloc[4]) + + result0 = obj.rolling(20, min_periods=0).apply(f, raw=raw) + result1 = obj.rolling(20, min_periods=1).apply(f, raw=raw) + tm.assert_almost_equal(result0, result1) + + +def test_center(raw): + obj = Series(np.random.randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + + result = obj.rolling(20, min_periods=15, center=True).apply(f, raw=raw) + expected = ( + concat([obj, Series([np.NaN] * 9)]) + .rolling(20, min_periods=15) + .apply(f, raw=raw)[9:] + .reset_index(drop=True) + ) + tm.assert_series_equal(result, expected) + + +def test_series(raw, series): + result = series.rolling(50).apply(f, raw=raw) + assert isinstance(result, Series) + tm.assert_almost_equal(result.iloc[-1], np.mean(series[-50:])) + + +def test_frame(raw, frame): + result = frame.rolling(50).apply(f, raw=raw) + assert isinstance(result, DataFrame) + tm.assert_series_equal( + result.iloc[-1, :], + frame.iloc[-50:, :].apply(np.mean, axis=0, raw=raw), + check_names=False, + ) + + +def test_time_rule_series(raw, series): + win = 25 + minp = 10 + ser = series[::2].resample("B").mean() + series_result = ser.rolling(window=win, min_periods=minp).apply(f, raw=raw) + last_date = series_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() + + trunc_series = series[::2].truncate(prev_date, last_date) + tm.assert_almost_equal(series_result[-1], np.mean(trunc_series)) + + +def test_time_rule_frame(raw, frame): + win = 25 + minp = 10 + frm = frame[::2].resample("B").mean() + frame_result = frm.rolling(window=win, min_periods=minp).apply(f, raw=raw) + last_date = frame_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() + + trunc_frame = frame[::2].truncate(prev_date, last_date) + tm.assert_series_equal( + frame_result.xs(last_date), + trunc_frame.apply(np.mean, raw=raw), + check_names=False, + ) + + +@pytest.mark.parametrize("minp", [0, 99, 100]) +def test_min_periods(raw, series, minp): + result = series.rolling(len(series) + 1, min_periods=minp).apply(f, raw=raw) + expected = series.rolling(len(series), min_periods=minp).apply(f, raw=raw) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + + +def test_center_reindex_series(raw, series): + # shifter index + s = [f"x{x:d}" for x in range(12)] + minp = 10 + + series_xp = ( + series.reindex(list(series.index) + s) + .rolling(window=25, min_periods=minp) + .apply(f, raw=raw) + .shift(-12) + .reindex(series.index) + ) + series_rs = series.rolling(window=25, min_periods=minp, center=True).apply( + f, raw=raw + ) + tm.assert_series_equal(series_xp, series_rs) + + +def test_center_reindex_frame(raw, frame): + # shifter index + s = [f"x{x:d}" for x in range(12)] + minp = 10 + + frame_xp = ( + frame.reindex(list(frame.index) + s) + .rolling(window=25, min_periods=minp) + .apply(f, raw=raw) + .shift(-12) + .reindex(frame.index) + ) + frame_rs = frame.rolling(window=25, min_periods=minp, center=True).apply(f, raw=raw) + tm.assert_frame_equal(frame_xp, frame_rs) + + +def test_axis1(raw): + # GH 45912 + df = DataFrame([1, 2]) + result = df.rolling(window=1, axis=1).apply(np.sum, raw=raw) + expected = DataFrame([1.0, 2.0]) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_base_indexer.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_base_indexer.py new file mode 100644 index 0000000..5593aa8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_base_indexer.py @@ -0,0 +1,497 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + Series, + concat, + date_range, +) +import pandas._testing as tm +from pandas.api.indexers import ( + BaseIndexer, + FixedForwardWindowIndexer, +) +from pandas.core.indexers.objects import ( + ExpandingIndexer, + FixedWindowIndexer, + VariableOffsetWindowIndexer, +) + +from pandas.tseries.offsets import BusinessDay + + +def test_bad_get_window_bounds_signature(): + class BadIndexer(BaseIndexer): + def get_window_bounds(self): + return None + + indexer = BadIndexer() + with pytest.raises(ValueError, match="BadIndexer does not implement"): + Series(range(5)).rolling(indexer) + + +def test_expanding_indexer(): + s = Series(range(10)) + indexer = ExpandingIndexer() + result = s.rolling(indexer).mean() + expected = s.expanding().mean() + tm.assert_series_equal(result, expected) + + +def test_indexer_constructor_arg(): + # Example found in computation.rst + use_expanding = [True, False, True, False, True] + df = DataFrame({"values": range(5)}) + + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + start = np.empty(num_values, dtype=np.int64) + end = np.empty(num_values, dtype=np.int64) + for i in range(num_values): + if self.use_expanding[i]: + start[i] = 0 + end[i] = i + 1 + else: + start[i] = i + end[i] = i + self.window_size + return start, end + + indexer = CustomIndexer(window_size=1, use_expanding=use_expanding) + result = df.rolling(indexer).sum() + expected = DataFrame({"values": [0.0, 1.0, 3.0, 3.0, 10.0]}) + tm.assert_frame_equal(result, expected) + + +def test_indexer_accepts_rolling_args(): + df = DataFrame({"values": range(5)}) + + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + start = np.empty(num_values, dtype=np.int64) + end = np.empty(num_values, dtype=np.int64) + for i in range(num_values): + if center and min_periods == 1 and closed == "both" and i == 2: + start[i] = 0 + end[i] = num_values + else: + start[i] = i + end[i] = i + self.window_size + return start, end + + indexer = CustomIndexer(window_size=1) + result = df.rolling(indexer, center=True, min_periods=1, closed="both").sum() + expected = DataFrame({"values": [0.0, 1.0, 10.0, 3.0, 4.0]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +@pytest.mark.parametrize( + "func,np_func,expected,np_kwargs", + [ + ("count", len, [3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, np.nan], {}), + ("min", np.min, [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 6.0, 7.0, 8.0, np.nan], {}), + ( + "max", + np.max, + [2.0, 3.0, 4.0, 100.0, 100.0, 100.0, 8.0, 9.0, 9.0, np.nan], + {}, + ), + ( + "std", + np.std, + [ + 1.0, + 1.0, + 1.0, + 55.71654452, + 54.85739087, + 53.9845657, + 1.0, + 1.0, + 0.70710678, + np.nan, + ], + {"ddof": 1}, + ), + ( + "var", + np.var, + [ + 1.0, + 1.0, + 1.0, + 3104.333333, + 3009.333333, + 2914.333333, + 1.0, + 1.0, + 0.500000, + np.nan, + ], + {"ddof": 1}, + ), + ( + "median", + np.median, + [1.0, 2.0, 3.0, 4.0, 6.0, 7.0, 7.0, 8.0, 8.5, np.nan], + {}, + ), + ], +) +@pytest.mark.filterwarnings("ignore:min_periods:FutureWarning") +def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs): + # GH 32865 + values = np.arange(10.0) + values[5] = 100.0 + + indexer = FixedForwardWindowIndexer(window_size=3) + + match = "Forward-looking windows can't have center=True" + with pytest.raises(ValueError, match=match): + rolling = constructor(values).rolling(window=indexer, center=True) + getattr(rolling, func)() + + match = "Forward-looking windows don't support setting the closed argument" + with pytest.raises(ValueError, match=match): + rolling = constructor(values).rolling(window=indexer, closed="right") + getattr(rolling, func)() + + rolling = constructor(values).rolling(window=indexer, min_periods=2) + result = getattr(rolling, func)() + + # Check that the function output matches the explicitly provided array + expected = constructor(expected) + tm.assert_equal(result, expected) + + # Check that the rolling function output matches applying an alternative + # function to the rolling window object + expected2 = constructor(rolling.apply(lambda x: np_func(x, **np_kwargs))) + tm.assert_equal(result, expected2) + + # Check that the function output matches applying an alternative function + # if min_periods isn't specified + # GH 39604: After count-min_periods deprecation, apply(lambda x: len(x)) + # is equivalent to count after setting min_periods=0 + min_periods = 0 if func == "count" else None + rolling3 = constructor(values).rolling(window=indexer, min_periods=min_periods) + result3 = getattr(rolling3, func)() + expected3 = constructor(rolling3.apply(lambda x: np_func(x, **np_kwargs))) + tm.assert_equal(result3, expected3) + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_rolling_forward_skewness(constructor): + values = np.arange(10.0) + values[5] = 100.0 + + indexer = FixedForwardWindowIndexer(window_size=5) + rolling = constructor(values).rolling(window=indexer, min_periods=3) + result = rolling.skew() + + expected = constructor( + [ + 0.0, + 2.232396, + 2.229508, + 2.228340, + 2.229091, + 2.231989, + 0.0, + 0.0, + np.nan, + np.nan, + ] + ) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "func,expected", + [ + ("cov", [2.0, 2.0, 2.0, 97.0, 2.0, -93.0, 2.0, 2.0, np.nan, np.nan]), + ( + "corr", + [ + 1.0, + 1.0, + 1.0, + 0.8704775290207161, + 0.018229084250926637, + -0.861357304646493, + 1.0, + 1.0, + np.nan, + np.nan, + ], + ), + ], +) +def test_rolling_forward_cov_corr(func, expected): + values1 = np.arange(10).reshape(-1, 1) + values2 = values1 * 2 + values1[5, 0] = 100 + values = np.concatenate([values1, values2], axis=1) + + indexer = FixedForwardWindowIndexer(window_size=3) + rolling = DataFrame(values).rolling(window=indexer, min_periods=3) + # We are interested in checking only pairwise covariance / correlation + result = getattr(rolling, func)().loc[(slice(None), 1), 0] + result = result.reset_index(drop=True) + expected = Series(expected) + expected.name = result.name + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "closed,expected_data", + [ + ["right", [0.0, 1.0, 2.0, 3.0, 7.0, 12.0, 6.0, 7.0, 8.0, 9.0]], + ["left", [0.0, 0.0, 1.0, 2.0, 5.0, 9.0, 5.0, 6.0, 7.0, 8.0]], + ], +) +def test_non_fixed_variable_window_indexer(closed, expected_data): + index = date_range("2020", periods=10) + df = DataFrame(range(10), index=index) + offset = BusinessDay(1) + indexer = VariableOffsetWindowIndexer(index=index, offset=offset) + result = df.rolling(indexer, closed=closed).sum() + expected = DataFrame(expected_data, index=index) + tm.assert_frame_equal(result, expected) + + +def test_fixed_forward_indexer_count(): + # GH: 35579 + df = DataFrame({"b": [None, None, None, 7]}) + indexer = FixedForwardWindowIndexer(window_size=2) + result = df.rolling(window=indexer, min_periods=0).count() + expected = DataFrame({"b": [0.0, 0.0, 1.0, 1.0]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + ("end_value", "values"), [(1, [0.0, 1, 1, 3, 2]), (-1, [0.0, 1, 0, 3, 1])] +) +@pytest.mark.parametrize(("func", "args"), [("median", []), ("quantile", [0.5])]) +def test_indexer_quantile_sum(end_value, values, func, args): + # GH 37153 + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + start = np.empty(num_values, dtype=np.int64) + end = np.empty(num_values, dtype=np.int64) + for i in range(num_values): + if self.use_expanding[i]: + start[i] = 0 + end[i] = max(i + end_value, 1) + else: + start[i] = i + end[i] = i + self.window_size + return start, end + + use_expanding = [True, False, True, False, True] + df = DataFrame({"values": range(5)}) + + indexer = CustomIndexer(window_size=1, use_expanding=use_expanding) + result = getattr(df.rolling(indexer), func)(*args) + expected = DataFrame({"values": values}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer_class", [FixedWindowIndexer, FixedForwardWindowIndexer, ExpandingIndexer] +) +@pytest.mark.parametrize("window_size", [1, 2, 12]) +@pytest.mark.parametrize( + "df_data", + [ + {"a": [1, 1], "b": [0, 1]}, + {"a": [1, 2], "b": [0, 1]}, + {"a": [1] * 16, "b": [np.nan, 1, 2, np.nan] + list(range(4, 16))}, + ], +) +def test_indexers_are_reusable_after_groupby_rolling( + indexer_class, window_size, df_data +): + # GH 43267 + df = DataFrame(df_data) + num_trials = 3 + indexer = indexer_class(window_size=window_size) + original_window_size = indexer.window_size + for i in range(num_trials): + df.groupby("a")["b"].rolling(window=indexer, min_periods=1).mean() + assert indexer.window_size == original_window_size + + +@pytest.mark.parametrize( + "window_size, num_values, expected_start, expected_end", + [ + (1, 1, [0], [1]), + (1, 2, [0, 1], [1, 2]), + (2, 1, [0], [1]), + (2, 2, [0, 1], [2, 2]), + (5, 12, range(12), list(range(5, 12)) + [12] * 5), + (12, 5, range(5), [5] * 5), + (0, 0, np.array([]), np.array([])), + (1, 0, np.array([]), np.array([])), + (0, 1, [0], [0]), + ], +) +def test_fixed_forward_indexer_bounds( + window_size, num_values, expected_start, expected_end +): + # GH 43267 + indexer = FixedForwardWindowIndexer(window_size=window_size) + start, end = indexer.get_window_bounds(num_values=num_values) + + tm.assert_numpy_array_equal(start, np.array(expected_start), check_dtype=False) + tm.assert_numpy_array_equal(end, np.array(expected_end), check_dtype=False) + assert len(start) == len(end) + + +@pytest.mark.parametrize( + "df, window_size, expected", + [ + ( + DataFrame({"b": [0, 1, 2], "a": [1, 2, 2]}), + 2, + Series( + [0, 1.5, 2.0], + index=MultiIndex.from_arrays([[1, 2, 2], range(3)], names=["a", None]), + name="b", + dtype=np.float64, + ), + ), + ( + DataFrame( + { + "b": [np.nan, 1, 2, np.nan] + list(range(4, 18)), + "a": [1] * 7 + [2] * 11, + "c": range(18), + } + ), + 12, + Series( + [ + 3.6, + 3.6, + 4.25, + 5.0, + 5.0, + 5.5, + 6.0, + 12.0, + 12.5, + 13.0, + 13.5, + 14.0, + 14.5, + 15.0, + 15.5, + 16.0, + 16.5, + 17.0, + ], + index=MultiIndex.from_arrays( + [[1] * 7 + [2] * 11, range(18)], names=["a", None] + ), + name="b", + dtype=np.float64, + ), + ), + ], +) +def test_rolling_groupby_with_fixed_forward_specific(df, window_size, expected): + # GH 43267 + indexer = FixedForwardWindowIndexer(window_size=window_size) + result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).mean() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "group_keys", + [ + (1,), + (1, 2), + (2, 1), + (1, 1, 2), + (1, 2, 1), + (1, 1, 2, 2), + (1, 2, 3, 2, 3), + (1, 1, 2) * 4, + (1, 2, 3) * 5, + ], +) +@pytest.mark.parametrize("window_size", [1, 2, 3, 4, 5, 8, 20]) +def test_rolling_groupby_with_fixed_forward_many(group_keys, window_size): + # GH 43267 + df = DataFrame( + { + "a": np.array(list(group_keys)), + "b": np.arange(len(group_keys), dtype=np.float64) + 17, + "c": np.arange(len(group_keys), dtype=np.int64), + } + ) + + indexer = FixedForwardWindowIndexer(window_size=window_size) + result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).sum() + result.index.names = ["a", "c"] + + groups = df.groupby("a")[["a", "b", "c"]] + manual = concat( + [ + g.assign( + b=[ + g["b"].iloc[i : i + window_size].sum(min_count=1) + for i in range(len(g)) + ] + ) + for _, g in groups + ] + ) + manual = manual.set_index(["a", "c"])["b"] + + tm.assert_series_equal(result, manual) + + +def test_unequal_start_end_bounds(): + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + return np.array([1]), np.array([1, 2]) + + indexer = CustomIndexer() + roll = Series(1).rolling(indexer) + match = "start" + with pytest.raises(ValueError, match=match): + roll.mean() + + with pytest.raises(ValueError, match=match): + next(iter(roll)) + + with pytest.raises(ValueError, match=match): + roll.corr(pairwise=True) + + with pytest.raises(ValueError, match=match): + roll.cov(pairwise=True) + + +def test_unequal_bounds_to_object(): + # GH 44470 + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + return np.array([1]), np.array([2]) + + indexer = CustomIndexer() + roll = Series([1, 1]).rolling(indexer) + match = "start and end" + with pytest.raises(ValueError, match=match): + roll.mean() + + with pytest.raises(ValueError, match=match): + next(iter(roll)) + + with pytest.raises(ValueError, match=match): + roll.corr(pairwise=True) + + with pytest.raises(ValueError, match=match): + roll.cov(pairwise=True) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_cython_aggregations.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_cython_aggregations.py new file mode 100644 index 0000000..c60cb6e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_cython_aggregations.py @@ -0,0 +1,111 @@ +from functools import partial +import sys + +import numpy as np +import pytest + +import pandas._libs.window.aggregations as window_aggregations + +from pandas import Series +import pandas._testing as tm + + +def _get_rolling_aggregations(): + # list pairs of name and function + # each function has this signature: + # (const float64_t[:] values, ndarray[int64_t] start, + # ndarray[int64_t] end, int64_t minp) -> np.ndarray + named_roll_aggs = ( + [ + ("roll_sum", window_aggregations.roll_sum), + ("roll_mean", window_aggregations.roll_mean), + ] + + [ + (f"roll_var({ddof})", partial(window_aggregations.roll_var, ddof=ddof)) + for ddof in [0, 1] + ] + + [ + ("roll_skew", window_aggregations.roll_skew), + ("roll_kurt", window_aggregations.roll_kurt), + ("roll_median_c", window_aggregations.roll_median_c), + ("roll_max", window_aggregations.roll_max), + ("roll_min", window_aggregations.roll_min), + ] + + [ + ( + f"roll_quantile({quantile},{interpolation})", + partial( + window_aggregations.roll_quantile, + quantile=quantile, + interpolation=interpolation, + ), + ) + for quantile in [0.0001, 0.5, 0.9999] + for interpolation in window_aggregations.interpolation_types + ] + + [ + ( + f"roll_rank({percentile},{method},{ascending})", + partial( + window_aggregations.roll_rank, + percentile=percentile, + method=method, + ascending=ascending, + ), + ) + for percentile in [True, False] + for method in window_aggregations.rolling_rank_tiebreakers.keys() + for ascending in [True, False] + ] + ) + # unzip to a list of 2 tuples, names and functions + unzipped = list(zip(*named_roll_aggs)) + return {"ids": unzipped[0], "params": unzipped[1]} + + +_rolling_aggregations = _get_rolling_aggregations() + + +@pytest.fixture( + params=_rolling_aggregations["params"], ids=_rolling_aggregations["ids"] +) +def rolling_aggregation(request): + """Make a rolling aggregation function as fixture.""" + return request.param + + +def test_rolling_aggregation_boundary_consistency(rolling_aggregation): + # GH-45647 + minp, step, width, size, selection = 0, 1, 3, 11, [2, 7] + values = np.arange(1, 1 + size, dtype=np.float64) + end = np.arange(width, size, step, dtype=np.int64) + start = end - width + selarr = np.array(selection, dtype=np.int32) + result = Series(rolling_aggregation(values, start[selarr], end[selarr], minp)) + expected = Series(rolling_aggregation(values, start, end, minp)[selarr]) + tm.assert_equal(expected, result) + + +def test_rolling_aggregation_with_unused_elements(rolling_aggregation): + # GH-45647 + minp, width = 0, 5 # width at least 4 for kurt + size = 2 * width + 5 + values = np.arange(1, size + 1, dtype=np.float64) + values[width : width + 2] = sys.float_info.min + values[width + 2] = np.nan + values[width + 3 : width + 5] = sys.float_info.max + start = np.array([0, size - width], dtype=np.int64) + end = np.array([width, size], dtype=np.int64) + loc = np.array( + [j for i in range(len(start)) for j in range(start[i], end[i])], + dtype=np.int32, + ) + result = Series(rolling_aggregation(values, start, end, minp)) + compact_values = np.array(values[loc], dtype=np.float64) + compact_start = np.arange(0, len(start) * width, width, dtype=np.int64) + compact_end = compact_start + width + expected = Series( + rolling_aggregation(compact_values, compact_start, compact_end, minp) + ) + assert np.isfinite(expected.values).all(), "Not all expected values are finite" + tm.assert_equal(expected, result) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_dtypes.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_dtypes.py new file mode 100644 index 0000000..02b5f8a --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_dtypes.py @@ -0,0 +1,146 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import pandas_dtype + +from pandas import ( + NA, + DataFrame, + Series, +) +import pandas._testing as tm +from pandas.core.base import DataError + +# gh-12373 : rolling functions error on float32 data +# make sure rolling functions works for different dtypes +# +# further note that we are only checking rolling for fully dtype +# compliance (though both expanding and ewm inherit) + + +def get_dtype(dtype, coerce_int=None): + if coerce_int is False and "int" in dtype: + return None + return pandas_dtype(dtype) + + +@pytest.mark.parametrize( + "method, data, expected_data, coerce_int, min_periods", + [ + ("count", np.arange(5), [1, 2, 2, 2, 2], True, 0), + ("count", np.arange(10, 0, -2), [1, 2, 2, 2, 2], True, 0), + ("count", [0, 1, 2, np.nan, 4], [1, 2, 2, 1, 1], False, 0), + ("max", np.arange(5), [np.nan, 1, 2, 3, 4], True, None), + ("max", np.arange(10, 0, -2), [np.nan, 10, 8, 6, 4], True, None), + ("max", [0, 1, 2, np.nan, 4], [np.nan, 1, 2, np.nan, np.nan], False, None), + ("min", np.arange(5), [np.nan, 0, 1, 2, 3], True, None), + ("min", np.arange(10, 0, -2), [np.nan, 8, 6, 4, 2], True, None), + ("min", [0, 1, 2, np.nan, 4], [np.nan, 0, 1, np.nan, np.nan], False, None), + ("sum", np.arange(5), [np.nan, 1, 3, 5, 7], True, None), + ("sum", np.arange(10, 0, -2), [np.nan, 18, 14, 10, 6], True, None), + ("sum", [0, 1, 2, np.nan, 4], [np.nan, 1, 3, np.nan, np.nan], False, None), + ("mean", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True, None), + ("mean", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True, None), + ("mean", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False, None), + ("std", np.arange(5), [np.nan] + [np.sqrt(0.5)] * 4, True, None), + ("std", np.arange(10, 0, -2), [np.nan] + [np.sqrt(2)] * 4, True, None), + ( + "std", + [0, 1, 2, np.nan, 4], + [np.nan] + [np.sqrt(0.5)] * 2 + [np.nan] * 2, + False, + None, + ), + ("var", np.arange(5), [np.nan, 0.5, 0.5, 0.5, 0.5], True, None), + ("var", np.arange(10, 0, -2), [np.nan, 2, 2, 2, 2], True, None), + ("var", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 0.5, np.nan, np.nan], False, None), + ("median", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True, None), + ("median", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True, None), + ( + "median", + [0, 1, 2, np.nan, 4], + [np.nan, 0.5, 1.5, np.nan, np.nan], + False, + None, + ), + ], +) +def test_series_dtypes(method, data, expected_data, coerce_int, dtypes, min_periods): + ser = Series(data, dtype=get_dtype(dtypes, coerce_int=coerce_int)) + rolled = ser.rolling(2, min_periods=min_periods) + + if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count": + msg = "No numeric types to aggregate" + with pytest.raises(DataError, match=msg): + getattr(rolled, method)() + else: + result = getattr(rolled, method)() + expected = Series(expected_data, dtype="float64") + tm.assert_almost_equal(result, expected) + + +def test_series_nullable_int(any_signed_int_ea_dtype): + # GH 43016 + ser = Series([0, 1, NA], dtype=any_signed_int_ea_dtype) + result = ser.rolling(2).mean() + expected = Series([np.nan, 0.5, np.nan]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "method, expected_data, min_periods", + [ + ("count", {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}, 0), + ( + "max", + {0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])}, + None, + ), + ( + "min", + {0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])}, + None, + ), + ( + "sum", + {0: Series([np.nan, 2, 6, 10, 14]), 1: Series([np.nan, 4, 8, 12, 16])}, + None, + ), + ( + "mean", + {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}, + None, + ), + ( + "std", + { + 0: Series([np.nan] + [np.sqrt(2)] * 4), + 1: Series([np.nan] + [np.sqrt(2)] * 4), + }, + None, + ), + ( + "var", + {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])}, + None, + ), + ( + "median", + {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}, + None, + ), + ], +) +def test_dataframe_dtypes(method, expected_data, dtypes, min_periods): + + df = DataFrame(np.arange(10).reshape((5, 2)), dtype=get_dtype(dtypes)) + rolled = df.rolling(2, min_periods=min_periods) + + if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count": + msg = "No numeric types to aggregate" + with pytest.raises(DataError, match=msg): + getattr(rolled, method)() + else: + result = getattr(rolled, method)() + expected = DataFrame(expected_data, dtype="float64") + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_ewm.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_ewm.py new file mode 100644 index 0000000..23c3a0e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_ewm.py @@ -0,0 +1,668 @@ +import numpy as np +import pytest + +from pandas.errors import UnsupportedFunctionCall + +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + date_range, +) +import pandas._testing as tm +from pandas.core.window import ExponentialMovingWindow + + +def test_doc_string(): + + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.ewm(com=0.5).mean() + + +def test_constructor(frame_or_series): + + c = frame_or_series(range(5)).ewm + + # valid + c(com=0.5) + c(span=1.5) + c(alpha=0.5) + c(halflife=0.75) + c(com=0.5, span=None) + c(alpha=0.5, com=None) + c(halflife=0.75, alpha=None) + + # not valid: mutually exclusive + msg = "comass, span, halflife, and alpha are mutually exclusive" + with pytest.raises(ValueError, match=msg): + c(com=0.5, alpha=0.5) + with pytest.raises(ValueError, match=msg): + c(span=1.5, halflife=0.75) + with pytest.raises(ValueError, match=msg): + c(alpha=0.5, span=1.5) + + # not valid: com < 0 + msg = "comass must satisfy: comass >= 0" + with pytest.raises(ValueError, match=msg): + c(com=-0.5) + + # not valid: span < 1 + msg = "span must satisfy: span >= 1" + with pytest.raises(ValueError, match=msg): + c(span=0.5) + + # not valid: halflife <= 0 + msg = "halflife must satisfy: halflife > 0" + with pytest.raises(ValueError, match=msg): + c(halflife=0) + + # not valid: alpha <= 0 or alpha > 1 + msg = "alpha must satisfy: 0 < alpha <= 1" + for alpha in (-0.5, 1.5): + with pytest.raises(ValueError, match=msg): + c(alpha=alpha) + + +@pytest.mark.parametrize("method", ["std", "mean", "var"]) +def test_numpy_compat(method): + # see gh-12811 + e = ExponentialMovingWindow(Series([2, 4, 6]), alpha=0.5) + + msg = "numpy operations are not valid with window objects" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(dtype=np.float64) + + +def test_ewma_times_not_datetime_type(): + msg = r"times must be datetime64\[ns\] dtype." + with pytest.raises(ValueError, match=msg): + Series(range(5)).ewm(times=np.arange(5)) + + +def test_ewma_times_not_same_length(): + msg = "times must be the same length as the object." + with pytest.raises(ValueError, match=msg): + Series(range(5)).ewm(times=np.arange(4).astype("datetime64[ns]")) + + +def test_ewma_halflife_not_correct_type(): + msg = "halflife must be a string or datetime.timedelta object" + with pytest.raises(ValueError, match=msg): + Series(range(5)).ewm(halflife=1, times=np.arange(5).astype("datetime64[ns]")) + + +def test_ewma_halflife_without_times(halflife_with_times): + msg = "halflife can only be a timedelta convertible argument if times is not None." + with pytest.raises(ValueError, match=msg): + Series(range(5)).ewm(halflife=halflife_with_times) + + +@pytest.mark.parametrize( + "times", + [ + np.arange(10).astype("datetime64[D]").astype("datetime64[ns]"), + date_range("2000", freq="D", periods=10), + date_range("2000", freq="D", periods=10).tz_localize("UTC"), + ], +) +@pytest.mark.parametrize("min_periods", [0, 2]) +def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods): + halflife = halflife_with_times + data = np.arange(10.0) + data[::2] = np.nan + df = DataFrame({"A": data, "time_col": date_range("2000", freq="D", periods=10)}) + with tm.assert_produces_warning(FutureWarning, match="nuisance columns"): + # GH#42738 + result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean() + expected = df.ewm(halflife=1.0, min_periods=min_periods).mean() + tm.assert_frame_equal(result, expected) + + +def test_ewma_with_times_variable_spacing(tz_aware_fixture): + tz = tz_aware_fixture + halflife = "23 days" + times = DatetimeIndex( + ["2020-01-01", "2020-01-10T00:04:05", "2020-02-23T05:00:23"] + ).tz_localize(tz) + data = np.arange(3) + df = DataFrame(data) + result = df.ewm(halflife=halflife, times=times).mean() + expected = DataFrame([0.0, 0.5674161888241773, 1.545239952073459]) + tm.assert_frame_equal(result, expected) + + +def test_ewm_with_nat_raises(halflife_with_times): + # GH#38535 + ser = Series(range(1)) + times = DatetimeIndex(["NaT"]) + with pytest.raises(ValueError, match="Cannot convert NaT values to integer"): + ser.ewm(com=0.1, halflife=halflife_with_times, times=times) + + +def test_ewm_with_times_getitem(halflife_with_times): + # GH 40164 + halflife = halflife_with_times + data = np.arange(10.0) + data[::2] = np.nan + times = date_range("2000", freq="D", periods=10) + df = DataFrame({"A": data, "B": data}) + result = df.ewm(halflife=halflife, times=times)["A"].mean() + expected = df.ewm(halflife=1.0)["A"].mean() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("arg", ["com", "halflife", "span", "alpha"]) +def test_ewm_getitem_attributes_retained(arg, adjust, ignore_na): + # GH 40164 + kwargs = {arg: 1, "adjust": adjust, "ignore_na": ignore_na} + ewm = DataFrame({"A": range(1), "B": range(1)}).ewm(**kwargs) + expected = {attr: getattr(ewm, attr) for attr in ewm._attributes} + ewm_slice = ewm["A"] + result = {attr: getattr(ewm, attr) for attr in ewm_slice._attributes} + assert result == expected + + +def test_ewm_vol_deprecated(): + ser = Series(range(1)) + with tm.assert_produces_warning(FutureWarning): + result = ser.ewm(com=0.1).vol() + expected = ser.ewm(com=0.1).std() + tm.assert_series_equal(result, expected) + + +def test_ewma_times_adjust_false_raises(): + # GH 40098 + with pytest.raises( + NotImplementedError, match="times is not supported with adjust=False." + ): + Series(range(1)).ewm( + 0.1, adjust=False, times=date_range("2000", freq="D", periods=1) + ) + + +@pytest.mark.parametrize( + "func, expected", + [ + [ + "mean", + DataFrame( + { + 0: range(5), + 1: range(4, 9), + 2: [7.428571, 9, 10.571429, 12.142857, 13.714286], + }, + dtype=float, + ), + ], + [ + "std", + DataFrame( + { + 0: [np.nan] * 5, + 1: [4.242641] * 5, + 2: [4.6291, 5.196152, 5.781745, 6.380775, 6.989788], + } + ), + ], + [ + "var", + DataFrame( + { + 0: [np.nan] * 5, + 1: [18.0] * 5, + 2: [21.428571, 27, 33.428571, 40.714286, 48.857143], + } + ), + ], + ], +) +def test_float_dtype_ewma(func, expected, float_numpy_dtype): + # GH#42452 + + df = DataFrame( + {0: range(5), 1: range(6, 11), 2: range(10, 20, 2)}, dtype=float_numpy_dtype + ) + e = df.ewm(alpha=0.5, axis=1) + result = getattr(e, func)() + + tm.assert_frame_equal(result, expected) + + +def test_times_string_col_deprecated(): + # GH 43265 + data = np.arange(10.0) + data[::2] = np.nan + df = DataFrame({"A": data, "time_col": date_range("2000", freq="D", periods=10)}) + with tm.assert_produces_warning(FutureWarning, match="Specifying times"): + result = df.ewm(halflife="1 day", min_periods=0, times="time_col").mean() + expected = df.ewm(halflife=1.0, min_periods=0).mean() + tm.assert_frame_equal(result, expected) + + +def test_ewm_sum_adjust_false_notimplemented(): + data = Series(range(1)).ewm(com=1, adjust=False) + with pytest.raises(NotImplementedError, match="sum is not"): + data.sum() + + +@pytest.mark.parametrize( + "expected_data, ignore", + [[[10.0, 5.0, 2.5, 11.25], False], [[10.0, 5.0, 5.0, 12.5], True]], +) +def test_ewm_sum(expected_data, ignore): + # xref from Numbagg tests + # https://github.com/numbagg/numbagg/blob/v0.2.1/numbagg/test/test_moving.py#L50 + data = Series([10, 0, np.nan, 10]) + result = data.ewm(alpha=0.5, ignore_na=ignore).sum() + expected = Series(expected_data) + tm.assert_series_equal(result, expected) + + +def test_ewma_adjust(): + vals = Series(np.zeros(1000)) + vals[5] = 1 + result = vals.ewm(span=100, adjust=False).mean().sum() + assert np.abs(result - 1) < 1e-2 + + +def test_ewma_cases(adjust, ignore_na): + # try adjust/ignore_na args matrix + + s = Series([1.0, 2.0, 4.0, 8.0]) + + if adjust: + expected = Series([1.0, 1.6, 2.736842, 4.923077]) + else: + expected = Series([1.0, 1.333333, 2.222222, 4.148148]) + + result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean() + tm.assert_series_equal(result, expected) + + +def test_ewma_nan_handling(): + s = Series([1.0] + [np.nan] * 5 + [1.0]) + result = s.ewm(com=5).mean() + tm.assert_series_equal(result, Series([1.0] * len(s))) + + s = Series([np.nan] * 2 + [1.0] + [np.nan] * 2 + [1.0]) + result = s.ewm(com=5).mean() + tm.assert_series_equal(result, Series([np.nan] * 2 + [1.0] * 4)) + + +@pytest.mark.parametrize( + "s, adjust, ignore_na, w", + [ + ( + Series([np.nan, 1.0, 101.0]), + True, + False, + [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), 1.0], + ), + ( + Series([np.nan, 1.0, 101.0]), + True, + True, + [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), 1.0], + ), + ( + Series([np.nan, 1.0, 101.0]), + False, + False, + [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), (1.0 / (1.0 + 2.0))], + ), + ( + Series([np.nan, 1.0, 101.0]), + False, + True, + [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), (1.0 / (1.0 + 2.0))], + ), + ( + Series([1.0, np.nan, 101.0]), + True, + False, + [(1.0 - (1.0 / (1.0 + 2.0))) ** 2, np.nan, 1.0], + ), + ( + Series([1.0, np.nan, 101.0]), + True, + True, + [(1.0 - (1.0 / (1.0 + 2.0))), np.nan, 1.0], + ), + ( + Series([1.0, np.nan, 101.0]), + False, + False, + [(1.0 - (1.0 / (1.0 + 2.0))) ** 2, np.nan, (1.0 / (1.0 + 2.0))], + ), + ( + Series([1.0, np.nan, 101.0]), + False, + True, + [(1.0 - (1.0 / (1.0 + 2.0))), np.nan, (1.0 / (1.0 + 2.0))], + ), + ( + Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]), + True, + False, + [np.nan, (1.0 - (1.0 / (1.0 + 2.0))) ** 3, np.nan, np.nan, 1.0, np.nan], + ), + ( + Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]), + True, + True, + [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), np.nan, np.nan, 1.0, np.nan], + ), + ( + Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]), + False, + False, + [ + np.nan, + (1.0 - (1.0 / (1.0 + 2.0))) ** 3, + np.nan, + np.nan, + (1.0 / (1.0 + 2.0)), + np.nan, + ], + ), + ( + Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]), + False, + True, + [ + np.nan, + (1.0 - (1.0 / (1.0 + 2.0))), + np.nan, + np.nan, + (1.0 / (1.0 + 2.0)), + np.nan, + ], + ), + ( + Series([1.0, np.nan, 101.0, 50.0]), + True, + False, + [ + (1.0 - (1.0 / (1.0 + 2.0))) ** 3, + np.nan, + (1.0 - (1.0 / (1.0 + 2.0))), + 1.0, + ], + ), + ( + Series([1.0, np.nan, 101.0, 50.0]), + True, + True, + [ + (1.0 - (1.0 / (1.0 + 2.0))) ** 2, + np.nan, + (1.0 - (1.0 / (1.0 + 2.0))), + 1.0, + ], + ), + ( + Series([1.0, np.nan, 101.0, 50.0]), + False, + False, + [ + (1.0 - (1.0 / (1.0 + 2.0))) ** 3, + np.nan, + (1.0 - (1.0 / (1.0 + 2.0))) * (1.0 / (1.0 + 2.0)), + (1.0 / (1.0 + 2.0)) + * ((1.0 - (1.0 / (1.0 + 2.0))) ** 2 + (1.0 / (1.0 + 2.0))), + ], + ), + ( + Series([1.0, np.nan, 101.0, 50.0]), + False, + True, + [ + (1.0 - (1.0 / (1.0 + 2.0))) ** 2, + np.nan, + (1.0 - (1.0 / (1.0 + 2.0))) * (1.0 / (1.0 + 2.0)), + (1.0 / (1.0 + 2.0)), + ], + ), + ], +) +def test_ewma_nan_handling_cases(s, adjust, ignore_na, w): + # GH 7603 + expected = (s.multiply(w).cumsum() / Series(w).cumsum()).fillna(method="ffill") + result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean() + + tm.assert_series_equal(result, expected) + if ignore_na is False: + # check that ignore_na defaults to False + result = s.ewm(com=2.0, adjust=adjust).mean() + tm.assert_series_equal(result, expected) + + +def test_ewm_alpha(): + # GH 10789 + arr = np.random.randn(100) + locs = np.arange(20, 40) + arr[locs] = np.NaN + + s = Series(arr) + a = s.ewm(alpha=0.61722699889169674).mean() + b = s.ewm(com=0.62014947789973052).mean() + c = s.ewm(span=2.240298955799461).mean() + d = s.ewm(halflife=0.721792864318).mean() + tm.assert_series_equal(a, b) + tm.assert_series_equal(a, c) + tm.assert_series_equal(a, d) + + +def test_ewm_domain_checks(): + # GH 12492 + arr = np.random.randn(100) + locs = np.arange(20, 40) + arr[locs] = np.NaN + + s = Series(arr) + msg = "comass must satisfy: comass >= 0" + with pytest.raises(ValueError, match=msg): + s.ewm(com=-0.1) + s.ewm(com=0.0) + s.ewm(com=0.1) + + msg = "span must satisfy: span >= 1" + with pytest.raises(ValueError, match=msg): + s.ewm(span=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(span=0.0) + with pytest.raises(ValueError, match=msg): + s.ewm(span=0.9) + s.ewm(span=1.0) + s.ewm(span=1.1) + + msg = "halflife must satisfy: halflife > 0" + with pytest.raises(ValueError, match=msg): + s.ewm(halflife=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(halflife=0.0) + s.ewm(halflife=0.1) + + msg = "alpha must satisfy: 0 < alpha <= 1" + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=0.0) + s.ewm(alpha=0.1) + s.ewm(alpha=1.0) + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=1.1) + + +@pytest.mark.parametrize("method", ["mean", "std", "var"]) +def test_ew_empty_series(method): + vals = Series([], dtype=np.float64) + + ewm = vals.ewm(3) + result = getattr(ewm, method)() + tm.assert_almost_equal(result, vals) + + +@pytest.mark.parametrize("min_periods", [0, 1]) +@pytest.mark.parametrize("name", ["mean", "var", "std"]) +def test_ew_min_periods(min_periods, name): + # excluding NaNs correctly + arr = np.random.randn(50) + arr[:10] = np.NaN + arr[-10:] = np.NaN + s = Series(arr) + + # check min_periods + # GH 7898 + result = getattr(s.ewm(com=50, min_periods=2), name)() + assert result[:11].isna().all() + assert not result[11:].isna().any() + + result = getattr(s.ewm(com=50, min_periods=min_periods), name)() + if name == "mean": + assert result[:10].isna().all() + assert not result[10:].isna().any() + else: + # ewm.std, ewm.var (with bias=False) require at least + # two values + assert result[:11].isna().all() + assert not result[11:].isna().any() + + # check series of length 0 + result = getattr(Series(dtype=object).ewm(com=50, min_periods=min_periods), name)() + tm.assert_series_equal(result, Series(dtype="float64")) + + # check series of length 1 + result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)() + if name == "mean": + tm.assert_series_equal(result, Series([1.0])) + else: + # ewm.std, ewm.var with bias=False require at least + # two values + tm.assert_series_equal(result, Series([np.NaN])) + + # pass in ints + result2 = getattr(Series(np.arange(50)).ewm(span=10), name)() + assert result2.dtype == np.float_ + + +@pytest.mark.parametrize("name", ["cov", "corr"]) +def test_ewm_corr_cov(name): + A = Series(np.random.randn(50), index=np.arange(50)) + B = A[2:] + np.random.randn(48) + + A[:10] = np.NaN + B[-10:] = np.NaN + + result = getattr(A.ewm(com=20, min_periods=5), name)(B) + assert np.isnan(result.values[:14]).all() + assert not np.isnan(result.values[14:]).any() + + +@pytest.mark.parametrize("min_periods", [0, 1, 2]) +@pytest.mark.parametrize("name", ["cov", "corr"]) +def test_ewm_corr_cov_min_periods(name, min_periods): + # GH 7898 + A = Series(np.random.randn(50), index=np.arange(50)) + B = A[2:] + np.random.randn(48) + + A[:10] = np.NaN + B[-10:] = np.NaN + + result = getattr(A.ewm(com=20, min_periods=min_periods), name)(B) + # binary functions (ewmcov, ewmcorr) with bias=False require at + # least two values + assert np.isnan(result.values[:11]).all() + assert not np.isnan(result.values[11:]).any() + + # check series of length 0 + empty = Series([], dtype=np.float64) + result = getattr(empty.ewm(com=50, min_periods=min_periods), name)(empty) + tm.assert_series_equal(result, empty) + + # check series of length 1 + result = getattr(Series([1.0]).ewm(com=50, min_periods=min_periods), name)( + Series([1.0]) + ) + tm.assert_series_equal(result, Series([np.NaN])) + + +@pytest.mark.parametrize("name", ["cov", "corr"]) +def test_different_input_array_raise_exception(name): + A = Series(np.random.randn(50), index=np.arange(50)) + A[:10] = np.NaN + + msg = "other must be a DataFrame or Series" + # exception raised is Exception + with pytest.raises(ValueError, match=msg): + getattr(A.ewm(com=20, min_periods=5), name)(np.random.randn(50)) + + +@pytest.mark.parametrize("name", ["var", "std", "mean"]) +def test_ewma_series(series, name): + series_result = getattr(series.ewm(com=10), name)() + assert isinstance(series_result, Series) + + +@pytest.mark.parametrize("name", ["var", "std", "mean"]) +def test_ewma_frame(frame, name): + frame_result = getattr(frame.ewm(com=10), name)() + assert isinstance(frame_result, DataFrame) + + +def test_ewma_span_com_args(series): + A = series.ewm(com=9.5).mean() + B = series.ewm(span=20).mean() + tm.assert_almost_equal(A, B) + msg = "comass, span, halflife, and alpha are mutually exclusive" + with pytest.raises(ValueError, match=msg): + series.ewm(com=9.5, span=20) + + msg = "Must pass one of comass, span, halflife, or alpha" + with pytest.raises(ValueError, match=msg): + series.ewm().mean() + + +def test_ewma_halflife_arg(series): + A = series.ewm(com=13.932726172912965).mean() + B = series.ewm(halflife=10.0).mean() + tm.assert_almost_equal(A, B) + msg = "comass, span, halflife, and alpha are mutually exclusive" + with pytest.raises(ValueError, match=msg): + series.ewm(span=20, halflife=50) + with pytest.raises(ValueError, match=msg): + series.ewm(com=9.5, halflife=50) + with pytest.raises(ValueError, match=msg): + series.ewm(com=9.5, span=20, halflife=50) + msg = "Must pass one of comass, span, halflife, or alpha" + with pytest.raises(ValueError, match=msg): + series.ewm() + + +def test_ewm_alpha_arg(series): + # GH 10789 + s = series + msg = "Must pass one of comass, span, halflife, or alpha" + with pytest.raises(ValueError, match=msg): + s.ewm() + + msg = "comass, span, halflife, and alpha are mutually exclusive" + with pytest.raises(ValueError, match=msg): + s.ewm(com=10.0, alpha=0.5) + with pytest.raises(ValueError, match=msg): + s.ewm(span=10.0, alpha=0.5) + with pytest.raises(ValueError, match=msg): + s.ewm(halflife=10.0, alpha=0.5) + + +@pytest.mark.parametrize("func", ["cov", "corr"]) +def test_ewm_pairwise_cov_corr(func, frame): + result = getattr(frame.ewm(span=10, min_periods=5), func)() + result = result.loc[(slice(None), 1), 5] + result.index = result.index.droplevel(1) + expected = getattr(frame[1].ewm(span=10, min_periods=5), func)(frame[5]) + tm.assert_series_equal(result, expected, check_names=False) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_expanding.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_expanding.py new file mode 100644 index 0000000..7ba81e8 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_expanding.py @@ -0,0 +1,653 @@ +import numpy as np +import pytest + +from pandas.errors import UnsupportedFunctionCall + +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + isna, + notna, +) +import pandas._testing as tm +from pandas.core.window import Expanding + + +def test_doc_string(): + + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.expanding(2).sum() + + +@pytest.mark.filterwarnings( + "ignore:The `center` argument on `expanding` will be removed in the future" +) +def test_constructor(frame_or_series): + # GH 12669 + + c = frame_or_series(range(5)).expanding + + # valid + c(min_periods=1) + c(min_periods=1, center=True) + c(min_periods=1, center=False) + + +@pytest.mark.parametrize("w", [2.0, "foo", np.array([2])]) +@pytest.mark.filterwarnings( + "ignore:The `center` argument on `expanding` will be removed in the future" +) +def test_constructor_invalid(frame_or_series, w): + # not valid + + c = frame_or_series(range(5)).expanding + msg = "min_periods must be an integer" + with pytest.raises(ValueError, match=msg): + c(min_periods=w) + + msg = "center must be a boolean" + with pytest.raises(ValueError, match=msg): + c(min_periods=1, center=w) + + +@pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) +def test_numpy_compat(method): + # see gh-12811 + e = Expanding(Series([2, 4, 6])) + + msg = "numpy operations are not valid with window objects" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(dtype=np.float64) + + +@pytest.mark.parametrize( + "expander", + [ + 1, + pytest.param( + "ls", + marks=pytest.mark.xfail( + reason="GH#16425 expanding with offset not supported" + ), + ), + ], +) +def test_empty_df_expanding(expander): + # GH 15819 Verifies that datetime and integer expanding windows can be + # applied to empty DataFrames + + expected = DataFrame() + result = DataFrame().expanding(expander).sum() + tm.assert_frame_equal(result, expected) + + # Verifies that datetime and integer expanding windows can be applied + # to empty DataFrames with datetime index + expected = DataFrame(index=DatetimeIndex([])) + result = DataFrame(index=DatetimeIndex([])).expanding(expander).sum() + tm.assert_frame_equal(result, expected) + + +def test_missing_minp_zero(): + # https://github.com/pandas-dev/pandas/pull/18921 + # minp=0 + x = Series([np.nan]) + result = x.expanding(min_periods=0).sum() + expected = Series([0.0]) + tm.assert_series_equal(result, expected) + + # minp=1 + result = x.expanding(min_periods=1).sum() + expected = Series([np.nan]) + tm.assert_series_equal(result, expected) + + +def test_expanding_axis(axis_frame): + # see gh-23372. + df = DataFrame(np.ones((10, 20))) + axis = df._get_axis_number(axis_frame) + + if axis == 0: + expected = DataFrame( + {i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)} + ) + else: + # axis == 1 + expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10) + + result = df.expanding(3, axis=axis_frame).sum() + tm.assert_frame_equal(result, expected) + + +def test_expanding_count_with_min_periods(frame_or_series): + # GH 26996 + result = frame_or_series(range(5)).expanding(min_periods=3).count() + expected = frame_or_series([np.nan, np.nan, 3.0, 4.0, 5.0]) + tm.assert_equal(result, expected) + + +def test_expanding_count_default_min_periods_with_null_values(frame_or_series): + # GH 26996 + values = [1, 2, 3, np.nan, 4, 5, 6] + expected_counts = [1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 6.0] + + result = frame_or_series(values).expanding().count() + expected = frame_or_series(expected_counts) + tm.assert_equal(result, expected) + + +def test_expanding_count_with_min_periods_exceeding_series_length(frame_or_series): + # GH 25857 + result = frame_or_series(range(5)).expanding(min_periods=6).count() + expected = frame_or_series([np.nan, np.nan, np.nan, np.nan, np.nan]) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "df,expected,min_periods", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 3, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 2, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 1, + ), + (DataFrame({"A": [1], "B": [4]}), [], 2), + (DataFrame(), [({}, [])], 1), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 3, + ), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 2, + ), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 1, + ), + ], +) +def test_iter_expanding_dataframe(df, expected, min_periods): + # GH 11704 + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, df.expanding(min_periods)): + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.parametrize( + "ser,expected,min_periods", + [ + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1), + (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2), + (Series([np.nan, 2]), [([np.nan], [0]), ([np.nan, 2], [0, 1])], 2), + (Series([], dtype="int64"), [], 2), + ], +) +def test_iter_expanding_series(ser, expected, min_periods): + # GH 11704 + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, ser.expanding(min_periods)): + tm.assert_series_equal(actual, expected) + + +def test_center_deprecate_warning(): + # GH 20647 + df = DataFrame() + with tm.assert_produces_warning(FutureWarning): + df.expanding(center=True) + + with tm.assert_produces_warning(FutureWarning): + df.expanding(center=False) + + with tm.assert_produces_warning(None): + df.expanding() + + +def test_expanding_sem(frame_or_series): + # GH: 26476 + obj = frame_or_series([0, 1, 2]) + result = obj.expanding().sem() + if isinstance(result, DataFrame): + result = Series(result[0].values) + expected = Series([np.nan] + [0.707107] * 2) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("method", ["skew", "kurt"]) +def test_expanding_skew_kurt_numerical_stability(method): + # GH: 6929 + s = Series(np.random.rand(10)) + expected = getattr(s.expanding(3), method)() + s = s + 5000 + result = getattr(s.expanding(3), method)() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("window", [1, 3, 10, 20]) +@pytest.mark.parametrize("method", ["min", "max", "average"]) +@pytest.mark.parametrize("pct", [True, False]) +@pytest.mark.parametrize("ascending", [True, False]) +@pytest.mark.parametrize("test_data", ["default", "duplicates", "nans"]) +def test_rank(window, method, pct, ascending, test_data): + length = 20 + if test_data == "default": + ser = Series(data=np.random.rand(length)) + elif test_data == "duplicates": + ser = Series(data=np.random.choice(3, length)) + elif test_data == "nans": + ser = Series( + data=np.random.choice([1.0, 0.25, 0.75, np.nan, np.inf, -np.inf], length) + ) + + expected = ser.expanding(window).apply( + lambda x: x.rank(method=method, pct=pct, ascending=ascending).iloc[-1] + ) + result = ser.expanding(window).rank(method=method, pct=pct, ascending=ascending) + + tm.assert_series_equal(result, expected) + + +def test_expanding_corr(series): + A = series.dropna() + B = (A + np.random.randn(len(A)))[:-5] + + result = A.expanding().corr(B) + + rolling_result = A.rolling(window=len(A), min_periods=1).corr(B) + + tm.assert_almost_equal(rolling_result, result) + + +def test_expanding_count(series): + result = series.expanding(min_periods=0).count() + tm.assert_almost_equal( + result, series.rolling(window=len(series), min_periods=0).count() + ) + + +def test_expanding_quantile(series): + result = series.expanding().quantile(0.5) + + rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5) + + tm.assert_almost_equal(result, rolling_result) + + +def test_expanding_cov(series): + A = series + B = (A + np.random.randn(len(A)))[:-5] + + result = A.expanding().cov(B) + + rolling_result = A.rolling(window=len(A), min_periods=1).cov(B) + + tm.assert_almost_equal(rolling_result, result) + + +def test_expanding_cov_pairwise(frame): + result = frame.expanding().cov() + + rolling_result = frame.rolling(window=len(frame), min_periods=1).cov() + + tm.assert_frame_equal(result, rolling_result) + + +def test_expanding_corr_pairwise(frame): + result = frame.expanding().corr() + + rolling_result = frame.rolling(window=len(frame), min_periods=1).corr() + tm.assert_frame_equal(result, rolling_result) + + +@pytest.mark.parametrize( + "func,static_comp", + [ + ("sum", np.sum), + ("mean", lambda x: np.mean(x, axis=0)), + ("max", lambda x: np.max(x, axis=0)), + ("min", lambda x: np.min(x, axis=0)), + ], + ids=["sum", "mean", "max", "min"], +) +def test_expanding_func(func, static_comp, frame_or_series): + data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10)) + result = getattr(data.expanding(min_periods=1, axis=0), func)() + assert isinstance(result, frame_or_series) + + expected = static_comp(data[:11]) + if frame_or_series is Series: + tm.assert_almost_equal(result[10], expected) + else: + tm.assert_series_equal(result.iloc[10], expected, check_names=False) + + +@pytest.mark.parametrize( + "func,static_comp", + [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], + ids=["sum", "mean", "max", "min"], +) +def test_expanding_min_periods(func, static_comp): + ser = Series(np.random.randn(50)) + + result = getattr(ser.expanding(min_periods=30, axis=0), func)() + assert result[:29].isna().all() + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) + + # min_periods is working correctly + result = getattr(ser.expanding(min_periods=15, axis=0), func)() + assert isna(result.iloc[13]) + assert notna(result.iloc[14]) + + ser2 = Series(np.random.randn(20)) + result = getattr(ser2.expanding(min_periods=5, axis=0), func)() + assert isna(result[3]) + assert notna(result[4]) + + # min_periods=0 + result0 = getattr(ser.expanding(min_periods=0, axis=0), func)() + result1 = getattr(ser.expanding(min_periods=1, axis=0), func)() + tm.assert_almost_equal(result0, result1) + + result = getattr(ser.expanding(min_periods=1, axis=0), func)() + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) + + +def test_expanding_apply(engine_and_raw, frame_or_series): + engine, raw = engine_and_raw + data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10)) + result = data.expanding(min_periods=1).apply( + lambda x: x.mean(), raw=raw, engine=engine + ) + assert isinstance(result, frame_or_series) + + if frame_or_series is Series: + tm.assert_almost_equal(result[9], np.mean(data[:11], axis=0)) + else: + tm.assert_series_equal( + result.iloc[9], np.mean(data[:11], axis=0), check_names=False + ) + + +def test_expanding_min_periods_apply(engine_and_raw): + engine, raw = engine_and_raw + ser = Series(np.random.randn(50)) + + result = ser.expanding(min_periods=30).apply( + lambda x: x.mean(), raw=raw, engine=engine + ) + assert result[:29].isna().all() + tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50])) + + # min_periods is working correctly + result = ser.expanding(min_periods=15).apply( + lambda x: x.mean(), raw=raw, engine=engine + ) + assert isna(result.iloc[13]) + assert notna(result.iloc[14]) + + ser2 = Series(np.random.randn(20)) + result = ser2.expanding(min_periods=5).apply( + lambda x: x.mean(), raw=raw, engine=engine + ) + assert isna(result[3]) + assert notna(result[4]) + + # min_periods=0 + result0 = ser.expanding(min_periods=0).apply( + lambda x: x.mean(), raw=raw, engine=engine + ) + result1 = ser.expanding(min_periods=1).apply( + lambda x: x.mean(), raw=raw, engine=engine + ) + tm.assert_almost_equal(result0, result1) + + result = ser.expanding(min_periods=1).apply( + lambda x: x.mean(), raw=raw, engine=engine + ) + tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50])) + + +@pytest.mark.parametrize( + "f", + [ + lambda x: (x.expanding(min_periods=5).cov(x, pairwise=True)), + lambda x: (x.expanding(min_periods=5).corr(x, pairwise=True)), + ], +) +def test_moment_functions_zero_length_pairwise(f): + + df1 = DataFrame() + df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar")) + df2["a"] = df2["a"].astype("float64") + + df1_expected = DataFrame( + index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([]) + ) + df2_expected = DataFrame( + index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]), + columns=Index(["a"], name="foo"), + dtype="float64", + ) + + df1_result = f(df1) + tm.assert_frame_equal(df1_result, df1_expected) + + df2_result = f(df2) + tm.assert_frame_equal(df2_result, df2_expected) + + +@pytest.mark.parametrize( + "f", + [ + lambda x: x.expanding().count(), + lambda x: x.expanding(min_periods=5).cov(x, pairwise=False), + lambda x: x.expanding(min_periods=5).corr(x, pairwise=False), + lambda x: x.expanding(min_periods=5).max(), + lambda x: x.expanding(min_periods=5).min(), + lambda x: x.expanding(min_periods=5).sum(), + lambda x: x.expanding(min_periods=5).mean(), + lambda x: x.expanding(min_periods=5).std(), + lambda x: x.expanding(min_periods=5).var(), + lambda x: x.expanding(min_periods=5).skew(), + lambda x: x.expanding(min_periods=5).kurt(), + lambda x: x.expanding(min_periods=5).quantile(0.5), + lambda x: x.expanding(min_periods=5).median(), + lambda x: x.expanding(min_periods=5).apply(sum, raw=False), + lambda x: x.expanding(min_periods=5).apply(sum, raw=True), + ], +) +def test_moment_functions_zero_length(f): + # GH 8056 + s = Series(dtype=np.float64) + s_expected = s + df1 = DataFrame() + df1_expected = df1 + df2 = DataFrame(columns=["a"]) + df2["a"] = df2["a"].astype("float64") + df2_expected = df2 + + s_result = f(s) + tm.assert_series_equal(s_result, s_expected) + + df1_result = f(df1) + tm.assert_frame_equal(df1_result, df1_expected) + + df2_result = f(df2) + tm.assert_frame_equal(df2_result, df2_expected) + + +def test_expanding_apply_empty_series(engine_and_raw): + engine, raw = engine_and_raw + ser = Series([], dtype=np.float64) + tm.assert_series_equal( + ser, ser.expanding().apply(lambda x: x.mean(), raw=raw, engine=engine) + ) + + +def test_expanding_apply_min_periods_0(engine_and_raw): + # GH 8080 + engine, raw = engine_and_raw + s = Series([None, None, None]) + result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw, engine=engine) + expected = Series([1.0, 2.0, 3.0]) + tm.assert_series_equal(result, expected) + + +def test_expanding_cov_diff_index(): + # GH 7512 + s1 = Series([1, 2, 3], index=[0, 1, 2]) + s2 = Series([1, 3], index=[0, 2]) + result = s1.expanding().cov(s2) + expected = Series([None, None, 2.0]) + tm.assert_series_equal(result, expected) + + s2a = Series([1, None, 3], index=[0, 1, 2]) + result = s1.expanding().cov(s2a) + tm.assert_series_equal(result, expected) + + s1 = Series([7, 8, 10], index=[0, 1, 3]) + s2 = Series([7, 9, 10], index=[0, 2, 3]) + result = s1.expanding().cov(s2) + expected = Series([None, None, None, 4.5]) + tm.assert_series_equal(result, expected) + + +def test_expanding_corr_diff_index(): + # GH 7512 + s1 = Series([1, 2, 3], index=[0, 1, 2]) + s2 = Series([1, 3], index=[0, 2]) + result = s1.expanding().corr(s2) + expected = Series([None, None, 1.0]) + tm.assert_series_equal(result, expected) + + s2a = Series([1, None, 3], index=[0, 1, 2]) + result = s1.expanding().corr(s2a) + tm.assert_series_equal(result, expected) + + s1 = Series([7, 8, 10], index=[0, 1, 3]) + s2 = Series([7, 9, 10], index=[0, 2, 3]) + result = s1.expanding().corr(s2) + expected = Series([None, None, None, 1.0]) + tm.assert_series_equal(result, expected) + + +def test_expanding_cov_pairwise_diff_length(): + # GH 7512 + df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=Index(["A", "B"], name="foo")) + df1a = DataFrame( + [[1, 5], [3, 9]], index=[0, 2], columns=Index(["A", "B"], name="foo") + ) + df2 = DataFrame( + [[5, 6], [None, None], [2, 1]], columns=Index(["X", "Y"], name="foo") + ) + df2a = DataFrame( + [[5, 6], [2, 1]], index=[0, 2], columns=Index(["X", "Y"], name="foo") + ) + # TODO: xref gh-15826 + # .loc is not preserving the names + result1 = df1.expanding().cov(df2, pairwise=True).loc[2] + result2 = df1.expanding().cov(df2a, pairwise=True).loc[2] + result3 = df1a.expanding().cov(df2, pairwise=True).loc[2] + result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2] + expected = DataFrame( + [[-3.0, -6.0], [-5.0, -10.0]], + columns=Index(["A", "B"], name="foo"), + index=Index(["X", "Y"], name="foo"), + ) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected) + tm.assert_frame_equal(result4, expected) + + +def test_expanding_corr_pairwise_diff_length(): + # GH 7512 + df1 = DataFrame( + [[1, 2], [3, 2], [3, 4]], columns=["A", "B"], index=Index(range(3), name="bar") + ) + df1a = DataFrame( + [[1, 2], [3, 4]], index=Index([0, 2], name="bar"), columns=["A", "B"] + ) + df2 = DataFrame( + [[5, 6], [None, None], [2, 1]], + columns=["X", "Y"], + index=Index(range(3), name="bar"), + ) + df2a = DataFrame( + [[5, 6], [2, 1]], index=Index([0, 2], name="bar"), columns=["X", "Y"] + ) + result1 = df1.expanding().corr(df2, pairwise=True).loc[2] + result2 = df1.expanding().corr(df2a, pairwise=True).loc[2] + result3 = df1a.expanding().corr(df2, pairwise=True).loc[2] + result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2] + expected = DataFrame( + [[-1.0, -1.0], [-1.0, -1.0]], columns=["A", "B"], index=Index(["X", "Y"]) + ) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected) + tm.assert_frame_equal(result4, expected) + + +def test_expanding_apply_args_kwargs(engine_and_raw): + def mean_w_arg(x, const): + return np.mean(x) + const + + engine, raw = engine_and_raw + + df = DataFrame(np.random.rand(20, 3)) + + expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0 + + result = df.expanding().apply(mean_w_arg, engine=engine, raw=raw, args=(20,)) + tm.assert_frame_equal(result, expected) + + result = df.expanding().apply(mean_w_arg, raw=raw, kwargs={"const": 20}) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_groupby.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_groupby.py new file mode 100644 index 0000000..6ec19e4 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_groupby.py @@ -0,0 +1,1155 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, + to_datetime, +) +import pandas._testing as tm +from pandas.api.indexers import BaseIndexer +from pandas.core.groupby.groupby import get_groupby + + +class TestRolling: + def setup_method(self): + self.frame = DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)}) + + def test_mutated(self): + + msg = r"groupby\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + self.frame.groupby("A", foo=1) + + g = self.frame.groupby("A") + assert not g.mutated + g = get_groupby(self.frame, by="A", mutated=True) + assert g.mutated + + def test_getitem(self): + g = self.frame.groupby("A") + g_mutated = get_groupby(self.frame, by="A", mutated=True) + + expected = g_mutated.B.apply(lambda x: x.rolling(2).mean()) + + result = g.rolling(2).mean().B + tm.assert_series_equal(result, expected) + + result = g.rolling(2).B.mean() + tm.assert_series_equal(result, expected) + + result = g.B.rolling(2).mean() + tm.assert_series_equal(result, expected) + + result = self.frame.B.groupby(self.frame.A).rolling(2).mean() + tm.assert_series_equal(result, expected) + + def test_getitem_multiple(self): + + # GH 13174 + g = self.frame.groupby("A") + r = g.rolling(2, min_periods=0) + g_mutated = get_groupby(self.frame, by="A", mutated=True) + expected = g_mutated.B.apply(lambda x: x.rolling(2, min_periods=0).count()) + + result = r.B.count() + tm.assert_series_equal(result, expected) + + result = r.B.count() + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "f", + [ + "sum", + "mean", + "min", + "max", + pytest.param( + "count", + marks=pytest.mark.filterwarnings("ignore:min_periods:FutureWarning"), + ), + "kurt", + "skew", + ], + ) + def test_rolling(self, f): + g = self.frame.groupby("A") + r = g.rolling(window=4) + + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.rolling(4), f)()) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("f", ["std", "var"]) + def test_rolling_ddof(self, f): + g = self.frame.groupby("A") + r = g.rolling(window=4) + + result = getattr(r, f)(ddof=1) + expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1)) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "interpolation", ["linear", "lower", "higher", "midpoint", "nearest"] + ) + def test_rolling_quantile(self, interpolation): + g = self.frame.groupby("A") + r = g.rolling(window=4) + + result = r.quantile(0.4, interpolation=interpolation) + expected = g.apply( + lambda x: x.rolling(4).quantile(0.4, interpolation=interpolation) + ) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("f, expected_val", [["corr", 1], ["cov", 0.5]]) + def test_rolling_corr_cov_other_same_size_as_groups(self, f, expected_val): + # GH 42915 + df = DataFrame( + {"value": range(10), "idx1": [1] * 5 + [2] * 5, "idx2": [1, 2, 3, 4, 5] * 2} + ).set_index(["idx1", "idx2"]) + other = DataFrame({"value": range(5), "idx2": [1, 2, 3, 4, 5]}).set_index( + "idx2" + ) + result = getattr(df.groupby(level=0).rolling(2), f)(other) + expected_data = ([np.nan] + [expected_val] * 4) * 2 + expected = DataFrame( + expected_data, + columns=["value"], + index=MultiIndex.from_arrays( + [ + [1] * 5 + [2] * 5, + [1] * 5 + [2] * 5, + list(range(1, 6)) * 2, + ], + names=["idx1", "idx1", "idx2"], + ), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("f", ["corr", "cov"]) + def test_rolling_corr_cov_other_diff_size_as_groups(self, f): + g = self.frame.groupby("A") + r = g.rolling(window=4) + + result = getattr(r, f)(self.frame) + + def func(x): + return getattr(x.rolling(4), f)(self.frame) + + expected = g.apply(func) + # GH 39591: The grouped column should be all np.nan + # (groupby.apply inserts 0s for cov) + expected["A"] = np.nan + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("f", ["corr", "cov"]) + def test_rolling_corr_cov_pairwise(self, f): + g = self.frame.groupby("A") + r = g.rolling(window=4) + + result = getattr(r.B, f)(pairwise=True) + + def func(x): + return getattr(x.B.rolling(4), f)(pairwise=True) + + expected = g.apply(func) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "func, expected_values", + [("cov", [[1.0, 1.0], [1.0, 4.0]]), ("corr", [[1.0, 0.5], [0.5, 1.0]])], + ) + def test_rolling_corr_cov_unordered(self, func, expected_values): + # GH 43386 + df = DataFrame( + { + "a": ["g1", "g2", "g1", "g1"], + "b": [0, 0, 1, 2], + "c": [2, 0, 6, 4], + } + ) + rol = df.groupby("a").rolling(3) + result = getattr(rol, func)() + expected = DataFrame( + { + "b": 4 * [np.nan] + expected_values[0] + 2 * [np.nan], + "c": 4 * [np.nan] + expected_values[1] + 2 * [np.nan], + }, + index=MultiIndex.from_tuples( + [ + ("g1", 0, "b"), + ("g1", 0, "c"), + ("g1", 2, "b"), + ("g1", 2, "c"), + ("g1", 3, "b"), + ("g1", 3, "c"), + ("g2", 1, "b"), + ("g2", 1, "c"), + ], + names=["a", None, None], + ), + ) + tm.assert_frame_equal(result, expected) + + def test_rolling_apply(self, raw): + g = self.frame.groupby("A") + r = g.rolling(window=4) + + # reduction + result = r.apply(lambda x: x.sum(), raw=raw) + expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw)) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index + tm.assert_frame_equal(result, expected) + + def test_rolling_apply_mutability(self): + # GH 14013 + df = DataFrame({"A": ["foo"] * 3 + ["bar"] * 3, "B": [1] * 6}) + g = df.groupby("A") + + mi = MultiIndex.from_tuples( + [("bar", 3), ("bar", 4), ("bar", 5), ("foo", 0), ("foo", 1), ("foo", 2)] + ) + + mi.names = ["A", None] + # Grouped column should not be a part of the output + expected = DataFrame([np.nan, 2.0, 2.0] * 2, columns=["B"], index=mi) + + result = g.rolling(window=2).sum() + tm.assert_frame_equal(result, expected) + + # Call an arbitrary function on the groupby + g.sum() + + # Make sure nothing has been mutated + result = g.rolling(window=2).sum() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("expected_value,raw_value", [[1.0, True], [0.0, False]]) + def test_groupby_rolling(self, expected_value, raw_value): + # GH 31754 + + def foo(x): + return int(isinstance(x, np.ndarray)) + + df = DataFrame({"id": [1, 1, 1], "value": [1, 2, 3]}) + result = df.groupby("id").value.rolling(1).apply(foo, raw=raw_value) + expected = Series( + [expected_value] * 3, + index=MultiIndex.from_tuples(((1, 0), (1, 1), (1, 2)), names=["id", None]), + name="value", + ) + tm.assert_series_equal(result, expected) + + def test_groupby_rolling_center_center(self): + # GH 35552 + series = Series(range(1, 6)) + result = series.groupby(series).rolling(center=True, window=3).mean() + expected = Series( + [np.nan] * 5, + index=MultiIndex.from_tuples(((1, 0), (2, 1), (3, 2), (4, 3), (5, 4))), + ) + tm.assert_series_equal(result, expected) + + series = Series(range(1, 5)) + result = series.groupby(series).rolling(center=True, window=3).mean() + expected = Series( + [np.nan] * 4, + index=MultiIndex.from_tuples(((1, 0), (2, 1), (3, 2), (4, 3))), + ) + tm.assert_series_equal(result, expected) + + df = DataFrame({"a": ["a"] * 5 + ["b"] * 6, "b": range(11)}) + result = df.groupby("a").rolling(center=True, window=3).mean() + expected = DataFrame( + [np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, 9, np.nan], + index=MultiIndex.from_tuples( + ( + ("a", 0), + ("a", 1), + ("a", 2), + ("a", 3), + ("a", 4), + ("b", 5), + ("b", 6), + ("b", 7), + ("b", 8), + ("b", 9), + ("b", 10), + ), + names=["a", None], + ), + columns=["b"], + ) + tm.assert_frame_equal(result, expected) + + df = DataFrame({"a": ["a"] * 5 + ["b"] * 5, "b": range(10)}) + result = df.groupby("a").rolling(center=True, window=3).mean() + expected = DataFrame( + [np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, np.nan], + index=MultiIndex.from_tuples( + ( + ("a", 0), + ("a", 1), + ("a", 2), + ("a", 3), + ("a", 4), + ("b", 5), + ("b", 6), + ("b", 7), + ("b", 8), + ("b", 9), + ), + names=["a", None], + ), + columns=["b"], + ) + tm.assert_frame_equal(result, expected) + + def test_groupby_rolling_center_on(self): + # GH 37141 + df = DataFrame( + data={ + "Date": date_range("2020-01-01", "2020-01-10"), + "gb": ["group_1"] * 6 + ["group_2"] * 4, + "value": range(10), + } + ) + result = ( + df.groupby("gb") + .rolling(6, on="Date", center=True, min_periods=1) + .value.mean() + ) + expected = Series( + [1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 7.0, 7.5, 7.5, 7.5], + name="value", + index=MultiIndex.from_tuples( + ( + ("group_1", Timestamp("2020-01-01")), + ("group_1", Timestamp("2020-01-02")), + ("group_1", Timestamp("2020-01-03")), + ("group_1", Timestamp("2020-01-04")), + ("group_1", Timestamp("2020-01-05")), + ("group_1", Timestamp("2020-01-06")), + ("group_2", Timestamp("2020-01-07")), + ("group_2", Timestamp("2020-01-08")), + ("group_2", Timestamp("2020-01-09")), + ("group_2", Timestamp("2020-01-10")), + ), + names=["gb", "Date"], + ), + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("min_periods", [5, 4, 3]) + def test_groupby_rolling_center_min_periods(self, min_periods): + # GH 36040 + df = DataFrame({"group": ["A"] * 10 + ["B"] * 10, "data": range(20)}) + + window_size = 5 + result = ( + df.groupby("group") + .rolling(window_size, center=True, min_periods=min_periods) + .mean() + ) + result = result.reset_index()[["group", "data"]] + + grp_A_mean = [1.0, 1.5, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 7.5, 8.0] + grp_B_mean = [x + 10.0 for x in grp_A_mean] + + num_nans = max(0, min_periods - 3) # For window_size of 5 + nans = [np.nan] * num_nans + grp_A_expected = nans + grp_A_mean[num_nans : 10 - num_nans] + nans + grp_B_expected = nans + grp_B_mean[num_nans : 10 - num_nans] + nans + + expected = DataFrame( + {"group": ["A"] * 10 + ["B"] * 10, "data": grp_A_expected + grp_B_expected} + ) + + tm.assert_frame_equal(result, expected) + + def test_groupby_subselect_rolling(self): + # GH 35486 + df = DataFrame( + {"a": [1, 2, 3, 2], "b": [4.0, 2.0, 3.0, 1.0], "c": [10, 20, 30, 20]} + ) + result = df.groupby("a")[["b"]].rolling(2).max() + expected = DataFrame( + [np.nan, np.nan, 2.0, np.nan], + columns=["b"], + index=MultiIndex.from_tuples( + ((1, 0), (2, 1), (2, 3), (3, 2)), names=["a", None] + ), + ) + tm.assert_frame_equal(result, expected) + + result = df.groupby("a")["b"].rolling(2).max() + expected = Series( + [np.nan, np.nan, 2.0, np.nan], + index=MultiIndex.from_tuples( + ((1, 0), (2, 1), (2, 3), (3, 2)), names=["a", None] + ), + name="b", + ) + tm.assert_series_equal(result, expected) + + def test_groupby_rolling_custom_indexer(self): + # GH 35557 + class SimpleIndexer(BaseIndexer): + def get_window_bounds( + self, num_values=0, min_periods=None, center=None, closed=None + ): + min_periods = self.window_size if min_periods is None else 0 + end = np.arange(num_values, dtype=np.int64) + 1 + start = end.copy() - self.window_size + start[start < 0] = min_periods + return start, end + + df = DataFrame( + {"a": [1.0, 2.0, 3.0, 4.0, 5.0] * 3}, index=[0] * 5 + [1] * 5 + [2] * 5 + ) + result = ( + df.groupby(df.index) + .rolling(SimpleIndexer(window_size=3), min_periods=1) + .sum() + ) + expected = df.groupby(df.index).rolling(window=3, min_periods=1).sum() + tm.assert_frame_equal(result, expected) + + def test_groupby_rolling_subset_with_closed(self): + # GH 35549 + df = DataFrame( + { + "column1": range(6), + "column2": range(6), + "group": 3 * ["A", "B"], + "date": [Timestamp("2019-01-01")] * 6, + } + ) + result = ( + df.groupby("group").rolling("1D", on="date", closed="left")["column1"].sum() + ) + expected = Series( + [np.nan, 0.0, 2.0, np.nan, 1.0, 4.0], + index=MultiIndex.from_tuples( + [("A", Timestamp("2019-01-01"))] * 3 + + [("B", Timestamp("2019-01-01"))] * 3, + names=["group", "date"], + ), + name="column1", + ) + tm.assert_series_equal(result, expected) + + def test_groupby_subset_rolling_subset_with_closed(self): + # GH 35549 + df = DataFrame( + { + "column1": range(6), + "column2": range(6), + "group": 3 * ["A", "B"], + "date": [Timestamp("2019-01-01")] * 6, + } + ) + + result = ( + df.groupby("group")[["column1", "date"]] + .rolling("1D", on="date", closed="left")["column1"] + .sum() + ) + expected = Series( + [np.nan, 0.0, 2.0, np.nan, 1.0, 4.0], + index=MultiIndex.from_tuples( + [("A", Timestamp("2019-01-01"))] * 3 + + [("B", Timestamp("2019-01-01"))] * 3, + names=["group", "date"], + ), + name="column1", + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("func", ["max", "min"]) + def test_groupby_rolling_index_changed(self, func): + # GH: #36018 nlevels of MultiIndex changed + ds = Series( + [1, 2, 2], + index=MultiIndex.from_tuples( + [("a", "x"), ("a", "y"), ("c", "z")], names=["1", "2"] + ), + name="a", + ) + + result = getattr(ds.groupby(ds).rolling(2), func)() + expected = Series( + [np.nan, np.nan, 2.0], + index=MultiIndex.from_tuples( + [(1, "a", "x"), (2, "a", "y"), (2, "c", "z")], names=["a", "1", "2"] + ), + name="a", + ) + tm.assert_series_equal(result, expected) + + def test_groupby_rolling_empty_frame(self): + # GH 36197 + expected = DataFrame({"s1": []}) + result = expected.groupby("s1").rolling(window=1).sum() + # GH 32262 + expected = expected.drop(columns="s1") + # GH-38057 from_tuples gives empty object dtype, we now get float/int levels + # expected.index = MultiIndex.from_tuples([], names=["s1", None]) + expected.index = MultiIndex.from_product( + [Index([], dtype="float64"), Index([], dtype="int64")], names=["s1", None] + ) + tm.assert_frame_equal(result, expected) + + expected = DataFrame({"s1": [], "s2": []}) + result = expected.groupby(["s1", "s2"]).rolling(window=1).sum() + # GH 32262 + expected = expected.drop(columns=["s1", "s2"]) + expected.index = MultiIndex.from_product( + [ + Index([], dtype="float64"), + Index([], dtype="float64"), + Index([], dtype="int64"), + ], + names=["s1", "s2", None], + ) + tm.assert_frame_equal(result, expected) + + def test_groupby_rolling_string_index(self): + # GH: 36727 + df = DataFrame( + [ + ["A", "group_1", Timestamp(2019, 1, 1, 9)], + ["B", "group_1", Timestamp(2019, 1, 2, 9)], + ["Z", "group_2", Timestamp(2019, 1, 3, 9)], + ["H", "group_1", Timestamp(2019, 1, 6, 9)], + ["E", "group_2", Timestamp(2019, 1, 20, 9)], + ], + columns=["index", "group", "eventTime"], + ).set_index("index") + + groups = df.groupby("group") + df["count_to_date"] = groups.cumcount() + rolling_groups = groups.rolling("10d", on="eventTime") + result = rolling_groups.apply(lambda df: df.shape[0]) + expected = DataFrame( + [ + ["A", "group_1", Timestamp(2019, 1, 1, 9), 1.0], + ["B", "group_1", Timestamp(2019, 1, 2, 9), 2.0], + ["H", "group_1", Timestamp(2019, 1, 6, 9), 3.0], + ["Z", "group_2", Timestamp(2019, 1, 3, 9), 1.0], + ["E", "group_2", Timestamp(2019, 1, 20, 9), 1.0], + ], + columns=["index", "group", "eventTime", "count_to_date"], + ).set_index(["group", "index"]) + tm.assert_frame_equal(result, expected) + + def test_groupby_rolling_no_sort(self): + # GH 36889 + result = ( + DataFrame({"foo": [2, 1], "bar": [2, 1]}) + .groupby("foo", sort=False) + .rolling(1) + .min() + ) + expected = DataFrame( + np.array([[2.0, 2.0], [1.0, 1.0]]), + columns=["foo", "bar"], + index=MultiIndex.from_tuples([(2, 0), (1, 1)], names=["foo", None]), + ) + # GH 32262 + expected = expected.drop(columns="foo") + tm.assert_frame_equal(result, expected) + + def test_groupby_rolling_count_closed_on(self): + # GH 35869 + df = DataFrame( + { + "column1": range(6), + "column2": range(6), + "group": 3 * ["A", "B"], + "date": date_range(end="20190101", periods=6), + } + ) + result = ( + df.groupby("group") + .rolling("3d", on="date", closed="left")["column1"] + .count() + ) + expected = Series( + [np.nan, 1.0, 1.0, np.nan, 1.0, 1.0], + name="column1", + index=MultiIndex.from_tuples( + [ + ("A", Timestamp("2018-12-27")), + ("A", Timestamp("2018-12-29")), + ("A", Timestamp("2018-12-31")), + ("B", Timestamp("2018-12-28")), + ("B", Timestamp("2018-12-30")), + ("B", Timestamp("2019-01-01")), + ], + names=["group", "date"], + ), + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + ("func", "kwargs"), + [("rolling", {"window": 2, "min_periods": 1}), ("expanding", {})], + ) + def test_groupby_rolling_sem(self, func, kwargs): + # GH: 26476 + df = DataFrame( + [["a", 1], ["a", 2], ["b", 1], ["b", 2], ["b", 3]], columns=["a", "b"] + ) + result = getattr(df.groupby("a"), func)(**kwargs).sem() + expected = DataFrame( + {"a": [np.nan] * 5, "b": [np.nan, 0.70711, np.nan, 0.70711, 0.70711]}, + index=MultiIndex.from_tuples( + [("a", 0), ("a", 1), ("b", 2), ("b", 3), ("b", 4)], names=["a", None] + ), + ) + # GH 32262 + expected = expected.drop(columns="a") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + ("rollings", "key"), [({"on": "a"}, "a"), ({"on": None}, "index")] + ) + def test_groupby_rolling_nans_in_index(self, rollings, key): + # GH: 34617 + df = DataFrame( + { + "a": to_datetime(["2020-06-01 12:00", "2020-06-01 14:00", np.nan]), + "b": [1, 2, 3], + "c": [1, 1, 1], + } + ) + if key == "index": + df = df.set_index("a") + with pytest.raises(ValueError, match=f"{key} must be monotonic"): + df.groupby("c").rolling("60min", **rollings) + + @pytest.mark.parametrize("group_keys", [True, False]) + def test_groupby_rolling_group_keys(self, group_keys): + # GH 37641 + # GH 38523: GH 37641 actually was not a bug. + # group_keys only applies to groupby.apply directly + arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]] + index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2")) + + s = Series([1, 2, 3], index=index) + result = s.groupby(["idx1", "idx2"], group_keys=group_keys).rolling(1).mean() + expected = Series( + [1.0, 2.0, 3.0], + index=MultiIndex.from_tuples( + [ + ("val1", "val1", "val1", "val1"), + ("val1", "val1", "val1", "val1"), + ("val2", "val2", "val2", "val2"), + ], + names=["idx1", "idx2", "idx1", "idx2"], + ), + ) + tm.assert_series_equal(result, expected) + + def test_groupby_rolling_index_level_and_column_label(self): + # The groupby keys should not appear as a resulting column + arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]] + index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2")) + + df = DataFrame({"A": [1, 1, 2], "B": range(3)}, index=index) + result = df.groupby(["idx1", "A"]).rolling(1).mean() + expected = DataFrame( + {"B": [0.0, 1.0, 2.0]}, + index=MultiIndex.from_tuples( + [ + ("val1", 1, "val1", "val1"), + ("val1", 1, "val1", "val1"), + ("val2", 2, "val2", "val2"), + ], + names=["idx1", "A", "idx1", "idx2"], + ), + ) + tm.assert_frame_equal(result, expected) + + def test_groupby_rolling_resulting_multiindex(self): + # a few different cases checking the created MultiIndex of the result + # https://github.com/pandas-dev/pandas/pull/38057 + + # grouping by 1 columns -> 2-level MI as result + df = DataFrame({"a": np.arange(8.0), "b": [1, 2] * 4}) + result = df.groupby("b").rolling(3).mean() + expected_index = MultiIndex.from_tuples( + [(1, 0), (1, 2), (1, 4), (1, 6), (2, 1), (2, 3), (2, 5), (2, 7)], + names=["b", None], + ) + tm.assert_index_equal(result.index, expected_index) + + def test_groupby_rolling_resulting_multiindex2(self): + # grouping by 2 columns -> 3-level MI as result + df = DataFrame({"a": np.arange(12.0), "b": [1, 2] * 6, "c": [1, 2, 3, 4] * 3}) + result = df.groupby(["b", "c"]).rolling(2).sum() + expected_index = MultiIndex.from_tuples( + [ + (1, 1, 0), + (1, 1, 4), + (1, 1, 8), + (1, 3, 2), + (1, 3, 6), + (1, 3, 10), + (2, 2, 1), + (2, 2, 5), + (2, 2, 9), + (2, 4, 3), + (2, 4, 7), + (2, 4, 11), + ], + names=["b", "c", None], + ) + tm.assert_index_equal(result.index, expected_index) + + def test_groupby_rolling_resulting_multiindex3(self): + # grouping with 1 level on dataframe with 2-level MI -> 3-level MI as result + df = DataFrame({"a": np.arange(8.0), "b": [1, 2] * 4, "c": [1, 2, 3, 4] * 2}) + df = df.set_index("c", append=True) + result = df.groupby("b").rolling(3).mean() + expected_index = MultiIndex.from_tuples( + [ + (1, 0, 1), + (1, 2, 3), + (1, 4, 1), + (1, 6, 3), + (2, 1, 2), + (2, 3, 4), + (2, 5, 2), + (2, 7, 4), + ], + names=["b", None, "c"], + ) + tm.assert_index_equal(result.index, expected_index, exact="equiv") + + def test_groupby_rolling_object_doesnt_affect_groupby_apply(self): + # GH 39732 + g = self.frame.groupby("A") + expected = g.apply(lambda x: x.rolling(4).sum()).index + _ = g.rolling(window=4) + result = g.apply(lambda x: x.rolling(4).sum()).index + tm.assert_index_equal(result, expected) + assert not g.mutated + assert not g.grouper.mutated + + @pytest.mark.parametrize( + ("window", "min_periods", "closed", "expected"), + [ + (2, 0, "left", [None, 0.0, 1.0, 1.0, None, 0.0, 1.0, 1.0]), + (2, 2, "left", [None, None, 1.0, 1.0, None, None, 1.0, 1.0]), + (4, 4, "left", [None, None, None, None, None, None, None, None]), + (4, 4, "right", [None, None, None, 5.0, None, None, None, 5.0]), + ], + ) + def test_groupby_rolling_var(self, window, min_periods, closed, expected): + df = DataFrame([1, 2, 3, 4, 5, 6, 7, 8]) + result = ( + df.groupby([1, 2, 1, 2, 1, 2, 1, 2]) + .rolling(window=window, min_periods=min_periods, closed=closed) + .var(0) + ) + expected_result = DataFrame( + np.array(expected, dtype="float64"), + index=MultiIndex( + levels=[[1, 2], [0, 1, 2, 3, 4, 5, 6, 7]], + codes=[[0, 0, 0, 0, 1, 1, 1, 1], [0, 2, 4, 6, 1, 3, 5, 7]], + ), + ) + tm.assert_frame_equal(result, expected_result) + + @pytest.mark.parametrize( + "columns", [MultiIndex.from_tuples([("A", ""), ("B", "C")]), ["A", "B"]] + ) + def test_by_column_not_in_values(self, columns): + # GH 32262 + df = DataFrame([[1, 0]] * 20 + [[2, 0]] * 12 + [[3, 0]] * 8, columns=columns) + g = df.groupby("A") + original_obj = g.obj.copy(deep=True) + r = g.rolling(4) + result = r.sum() + assert "A" not in result.columns + tm.assert_frame_equal(g.obj, original_obj) + + def test_groupby_level(self): + # GH 38523, 38787 + arrays = [ + ["Falcon", "Falcon", "Parrot", "Parrot"], + ["Captive", "Wild", "Captive", "Wild"], + ] + index = MultiIndex.from_arrays(arrays, names=("Animal", "Type")) + df = DataFrame({"Max Speed": [390.0, 350.0, 30.0, 20.0]}, index=index) + result = df.groupby(level=0)["Max Speed"].rolling(2).sum() + expected = Series( + [np.nan, 740.0, np.nan, 50.0], + index=MultiIndex.from_tuples( + [ + ("Falcon", "Falcon", "Captive"), + ("Falcon", "Falcon", "Wild"), + ("Parrot", "Parrot", "Captive"), + ("Parrot", "Parrot", "Wild"), + ], + names=["Animal", "Animal", "Type"], + ), + name="Max Speed", + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "by, expected_data", + [ + [["id"], {"num": [100.0, 150.0, 150.0, 200.0]}], + [ + ["id", "index"], + { + "date": [ + Timestamp("2018-01-01"), + Timestamp("2018-01-02"), + Timestamp("2018-01-01"), + Timestamp("2018-01-02"), + ], + "num": [100.0, 200.0, 150.0, 250.0], + }, + ], + ], + ) + def test_as_index_false(self, by, expected_data): + # GH 39433 + data = [ + ["A", "2018-01-01", 100.0], + ["A", "2018-01-02", 200.0], + ["B", "2018-01-01", 150.0], + ["B", "2018-01-02", 250.0], + ] + df = DataFrame(data, columns=["id", "date", "num"]) + df["date"] = to_datetime(df["date"]) + df = df.set_index(["date"]) + + gp_by = [getattr(df, attr) for attr in by] + result = ( + df.groupby(gp_by, as_index=False).rolling(window=2, min_periods=1).mean() + ) + + expected = {"id": ["A", "A", "B", "B"]} + expected.update(expected_data) + expected = DataFrame( + expected, + index=df.index, + ) + tm.assert_frame_equal(result, expected) + + def test_nan_and_zero_endpoints(self): + # https://github.com/twosigma/pandas/issues/53 + size = 1000 + idx = np.repeat(0, size) + idx[-1] = 1 + + val = 5e25 + arr = np.repeat(val, size) + arr[0] = np.nan + arr[-1] = 0 + + df = DataFrame( + { + "index": idx, + "adl2": arr, + } + ).set_index("index") + result = df.groupby("index")["adl2"].rolling(window=10, min_periods=1).mean() + expected = Series( + arr, + name="adl2", + index=MultiIndex.from_arrays( + [[0] * 999 + [1], [0] * 999 + [1]], names=["index", "index"] + ), + ) + tm.assert_series_equal(result, expected) + + +class TestExpanding: + def setup_method(self): + self.frame = DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)}) + + @pytest.mark.parametrize( + "f", ["sum", "mean", "min", "max", "count", "kurt", "skew"] + ) + def test_expanding(self, f): + g = self.frame.groupby("A") + r = g.expanding() + + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.expanding(), f)()) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("f", ["std", "var"]) + def test_expanding_ddof(self, f): + g = self.frame.groupby("A") + r = g.expanding() + + result = getattr(r, f)(ddof=0) + expected = g.apply(lambda x: getattr(x.expanding(), f)(ddof=0)) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "interpolation", ["linear", "lower", "higher", "midpoint", "nearest"] + ) + def test_expanding_quantile(self, interpolation): + g = self.frame.groupby("A") + r = g.expanding() + + result = r.quantile(0.4, interpolation=interpolation) + expected = g.apply( + lambda x: x.expanding().quantile(0.4, interpolation=interpolation) + ) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("f", ["corr", "cov"]) + def test_expanding_corr_cov(self, f): + g = self.frame.groupby("A") + r = g.expanding() + + result = getattr(r, f)(self.frame) + + def func(x): + return getattr(x.expanding(), f)(self.frame) + + expected = g.apply(func) + # GH 39591: groupby.apply returns 1 instead of nan for windows + # with all nan values + null_idx = list(range(20, 61)) + list(range(72, 113)) + expected.iloc[null_idx, 1] = np.nan + # GH 39591: The grouped column should be all np.nan + # (groupby.apply inserts 0s for cov) + expected["A"] = np.nan + tm.assert_frame_equal(result, expected) + + result = getattr(r.B, f)(pairwise=True) + + def func(x): + return getattr(x.B.expanding(), f)(pairwise=True) + + expected = g.apply(func) + tm.assert_series_equal(result, expected) + + def test_expanding_apply(self, raw): + g = self.frame.groupby("A") + r = g.expanding() + + # reduction + result = r.apply(lambda x: x.sum(), raw=raw) + expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw)) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index + tm.assert_frame_equal(result, expected) + + +class TestEWM: + @pytest.mark.parametrize( + "method, expected_data", + [ + ["mean", [0.0, 0.6666666666666666, 1.4285714285714286, 2.2666666666666666]], + ["std", [np.nan, 0.707107, 0.963624, 1.177164]], + ["var", [np.nan, 0.5, 0.9285714285714286, 1.3857142857142857]], + ], + ) + def test_methods(self, method, expected_data): + # GH 16037 + df = DataFrame({"A": ["a"] * 4, "B": range(4)}) + result = getattr(df.groupby("A").ewm(com=1.0), method)() + expected = DataFrame( + {"B": expected_data}, + index=MultiIndex.from_tuples( + [ + ("a", 0), + ("a", 1), + ("a", 2), + ("a", 3), + ], + names=["A", None], + ), + ) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match="nuisance"): + # GH#42738 + expected = df.groupby("A").apply( + lambda x: getattr(x.ewm(com=1.0), method)() + ) + + # There may be a bug in the above statement; not returning the correct index + tm.assert_frame_equal(result.reset_index(drop=True), expected) + + @pytest.mark.parametrize( + "method, expected_data", + [["corr", [np.nan, 1.0, 1.0, 1]], ["cov", [np.nan, 0.5, 0.928571, 1.385714]]], + ) + def test_pairwise_methods(self, method, expected_data): + # GH 16037 + df = DataFrame({"A": ["a"] * 4, "B": range(4)}) + result = getattr(df.groupby("A").ewm(com=1.0), method)() + expected = DataFrame( + {"B": expected_data}, + index=MultiIndex.from_tuples( + [ + ("a", 0, "B"), + ("a", 1, "B"), + ("a", 2, "B"), + ("a", 3, "B"), + ], + names=["A", None, None], + ), + ) + tm.assert_frame_equal(result, expected) + + expected = df.groupby("A").apply(lambda x: getattr(x.ewm(com=1.0), method)()) + tm.assert_frame_equal(result, expected) + + def test_times(self, times_frame): + # GH 40951 + halflife = "23 days" + with tm.assert_produces_warning(FutureWarning, match="nuisance"): + # GH#42738 + result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean() + expected = DataFrame( + { + "B": [ + 0.0, + 0.507534, + 1.020088, + 1.537661, + 0.0, + 0.567395, + 1.221209, + 0.0, + 0.653141, + 1.195003, + ] + }, + index=MultiIndex.from_tuples( + [ + ("a", 0), + ("a", 3), + ("a", 6), + ("a", 9), + ("b", 1), + ("b", 4), + ("b", 7), + ("c", 2), + ("c", 5), + ("c", 8), + ], + names=["A", None], + ), + ) + tm.assert_frame_equal(result, expected) + + def test_times_vs_apply(self, times_frame): + # GH 40951 + halflife = "23 days" + with tm.assert_produces_warning(FutureWarning, match="nuisance"): + # GH#42738 + result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean() + expected = ( + times_frame.groupby("A") + .apply(lambda x: x.ewm(halflife=halflife, times="C").mean()) + .iloc[[0, 3, 6, 9, 1, 4, 7, 2, 5, 8]] + .reset_index(drop=True) + ) + tm.assert_frame_equal(result.reset_index(drop=True), expected) + + def test_times_array(self, times_frame): + # GH 40951 + halflife = "23 days" + gb = times_frame.groupby("A") + with tm.assert_produces_warning(FutureWarning, match="nuisance"): + # GH#42738 + result = gb.ewm(halflife=halflife, times="C").mean() + expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean() + tm.assert_frame_equal(result, expected) + + def test_dont_mutate_obj_after_slicing(self): + # GH 43355 + df = DataFrame( + { + "id": ["a", "a", "b", "b", "b"], + "timestamp": date_range("2021-9-1", periods=5, freq="H"), + "y": range(5), + } + ) + grp = df.groupby("id").rolling("1H", on="timestamp") + result = grp.count() + expected_df = DataFrame( + { + "timestamp": date_range("2021-9-1", periods=5, freq="H"), + "y": [1.0] * 5, + }, + index=MultiIndex.from_arrays( + [["a", "a", "b", "b", "b"], list(range(5))], names=["id", None] + ), + ) + tm.assert_frame_equal(result, expected_df) + + result = grp["y"].count() + expected_series = Series( + [1.0] * 5, + index=MultiIndex.from_arrays( + [ + ["a", "a", "b", "b", "b"], + date_range("2021-9-1", periods=5, freq="H"), + ], + names=["id", "timestamp"], + ), + name="y", + ) + tm.assert_series_equal(result, expected_series) + # This is the key test + result = grp.count() + tm.assert_frame_equal(result, expected_df) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_numba.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_numba.py new file mode 100644 index 0000000..be6eade --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_numba.py @@ -0,0 +1,411 @@ +import numpy as np +import pytest + +from pandas.compat import ( + is_ci_environment, + is_platform_mac, + is_platform_windows, +) +from pandas.errors import NumbaUtilError +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, + option_context, + to_datetime, +) +import pandas._testing as tm +from pandas.core.util.numba_ import NUMBA_FUNC_CACHE + +# TODO(GH#44584): Mark these as pytest.mark.single +pytestmark = pytest.mark.skipif( + is_ci_environment() and (is_platform_windows() or is_platform_mac()), + reason="On Azure CI, Windows can fail with " + "'Windows fatal exception: stack overflow' " + "and MacOS can timeout", +) + + +@td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore:\n") +# Filter warnings when parallel=True and the function can't be parallelized by Numba +class TestEngine: + @pytest.mark.parametrize("jit", [True, False]) + def test_numba_vs_cython_apply(self, jit, nogil, parallel, nopython, center): + def f(x, *args): + arg_sum = 0 + for arg in args: + arg_sum += arg + return np.mean(x) + arg_sum + + if jit: + import numba + + f = numba.jit(f) + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + args = (2,) + + s = Series(range(10)) + result = s.rolling(2, center=center).apply( + f, args=args, engine="numba", engine_kwargs=engine_kwargs, raw=True + ) + expected = s.rolling(2, center=center).apply( + f, engine="cython", args=args, raw=True + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data", [DataFrame(np.eye(5)), Series(range(5), name="foo")] + ) + def test_numba_vs_cython_rolling_methods( + self, data, nogil, parallel, nopython, arithmetic_numba_supported_operators + ): + + method, kwargs = arithmetic_numba_supported_operators + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + roll = data.rolling(2) + result = getattr(roll, method)( + engine="numba", engine_kwargs=engine_kwargs, **kwargs + ) + expected = getattr(roll, method)(engine="cython", **kwargs) + + # Check the cache + if method not in ("mean", "sum", "var", "std", "max", "min"): + assert ( + getattr(np, f"nan{method}"), + "Rolling_apply_single", + ) in NUMBA_FUNC_CACHE + + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "data", [DataFrame(np.eye(5)), Series(range(5), name="foo")] + ) + def test_numba_vs_cython_expanding_methods( + self, data, nogil, parallel, nopython, arithmetic_numba_supported_operators + ): + + method, kwargs = arithmetic_numba_supported_operators + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + data = DataFrame(np.eye(5)) + expand = data.expanding() + result = getattr(expand, method)( + engine="numba", engine_kwargs=engine_kwargs, **kwargs + ) + expected = getattr(expand, method)(engine="cython", **kwargs) + + # Check the cache + if method not in ("mean", "sum", "var", "std", "max", "min"): + assert ( + getattr(np, f"nan{method}"), + "Expanding_apply_single", + ) in NUMBA_FUNC_CACHE + + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("jit", [True, False]) + def test_cache_apply(self, jit, nogil, parallel, nopython): + # Test that the functions are cached correctly if we switch functions + def func_1(x): + return np.mean(x) + 4 + + def func_2(x): + return np.std(x) * 5 + + if jit: + import numba + + func_1 = numba.jit(func_1) + func_2 = numba.jit(func_2) + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + roll = Series(range(10)).rolling(2) + result = roll.apply( + func_1, engine="numba", engine_kwargs=engine_kwargs, raw=True + ) + expected = roll.apply(func_1, engine="cython", raw=True) + tm.assert_series_equal(result, expected) + + # func_1 should be in the cache now + assert (func_1, "Rolling_apply_single") in NUMBA_FUNC_CACHE + + result = roll.apply( + func_2, engine="numba", engine_kwargs=engine_kwargs, raw=True + ) + expected = roll.apply(func_2, engine="cython", raw=True) + tm.assert_series_equal(result, expected) + # This run should use the cached func_1 + result = roll.apply( + func_1, engine="numba", engine_kwargs=engine_kwargs, raw=True + ) + expected = roll.apply(func_1, engine="cython", raw=True) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "window,window_kwargs", + [ + ["rolling", {"window": 3, "min_periods": 0}], + ["expanding", {}], + ], + ) + def test_dont_cache_args( + self, window, window_kwargs, nogil, parallel, nopython, method + ): + # GH 42287 + + def add(values, x): + return np.sum(values) + x + + engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel} + df = DataFrame({"value": [0, 0, 0]}) + result = getattr(df, window)(method=method, **window_kwargs).apply( + add, raw=True, engine="numba", engine_kwargs=engine_kwargs, args=(1,) + ) + expected = DataFrame({"value": [1.0, 1.0, 1.0]}) + tm.assert_frame_equal(result, expected) + + result = getattr(df, window)(method=method, **window_kwargs).apply( + add, raw=True, engine="numba", engine_kwargs=engine_kwargs, args=(2,) + ) + expected = DataFrame({"value": [2.0, 2.0, 2.0]}) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no("numba") +class TestEWM: + @pytest.mark.parametrize( + "grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"] + ) + @pytest.mark.parametrize("method", ["mean", "sum"]) + def test_invalid_engine(self, grouper, method): + df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)}) + with pytest.raises(ValueError, match="engine must be either"): + getattr(grouper(df).ewm(com=1.0), method)(engine="foo") + + @pytest.mark.parametrize( + "grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"] + ) + @pytest.mark.parametrize("method", ["mean", "sum"]) + def test_invalid_engine_kwargs(self, grouper, method): + df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)}) + with pytest.raises(ValueError, match="cython engine does not"): + getattr(grouper(df).ewm(com=1.0), method)( + engine="cython", engine_kwargs={"nopython": True} + ) + + @pytest.mark.parametrize("grouper", ["None", "groupby"]) + @pytest.mark.parametrize("method", ["mean", "sum"]) + def test_cython_vs_numba( + self, grouper, method, nogil, parallel, nopython, ignore_na, adjust + ): + if grouper == "None": + grouper = lambda x: x + warn = FutureWarning + else: + grouper = lambda x: x.groupby("A") + warn = None + if method == "sum": + adjust = True + df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)}) + ewm = grouper(df).ewm(com=1.0, adjust=adjust, ignore_na=ignore_na) + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + with tm.assert_produces_warning(warn, match="nuisance"): + # GH#42738 + result = getattr(ewm, method)(engine="numba", engine_kwargs=engine_kwargs) + expected = getattr(ewm, method)(engine="cython") + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("grouper", ["None", "groupby"]) + def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_na): + # GH 40951 + + if grouper == "None": + grouper = lambda x: x + warn = FutureWarning + else: + grouper = lambda x: x.groupby("A") + warn = None + + halflife = "23 days" + times = to_datetime( + [ + "2020-01-01", + "2020-01-01", + "2020-01-02", + "2020-01-10", + "2020-02-23", + "2020-01-03", + ] + ) + df = DataFrame({"A": ["a", "b", "a", "b", "b", "a"], "B": [0, 0, 1, 1, 2, 2]}) + ewm = grouper(df).ewm( + halflife=halflife, adjust=True, ignore_na=ignore_na, times=times + ) + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + with tm.assert_produces_warning(warn, match="nuisance"): + # GH#42738 + result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs) + expected = ewm.mean(engine="cython") + + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no("numba") +def test_use_global_config(): + def f(x): + return np.mean(x) + 2 + + s = Series(range(10)) + with option_context("compute.use_numba", True): + result = s.rolling(2).apply(f, engine=None, raw=True) + expected = s.rolling(2).apply(f, engine="numba", raw=True) + tm.assert_series_equal(expected, result) + + +@td.skip_if_no("numba") +def test_invalid_kwargs_nopython(): + with pytest.raises(NumbaUtilError, match="numba does not support kwargs with"): + Series(range(1)).rolling(1).apply( + lambda x: x, kwargs={"a": 1}, engine="numba", raw=True + ) + + +@td.skip_if_no("numba") +@pytest.mark.slow +@pytest.mark.filterwarnings("ignore:\n") +# Filter warnings when parallel=True and the function can't be parallelized by Numba +class TestTableMethod: + def test_table_series_valueerror(self): + def f(x): + return np.sum(x, axis=0) + 1 + + with pytest.raises( + ValueError, match="method='table' not applicable for Series objects." + ): + Series(range(1)).rolling(1, method="table").apply( + f, engine="numba", raw=True + ) + + def test_table_method_rolling_methods( + self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators + ): + method, kwargs = arithmetic_numba_supported_operators + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + df = DataFrame(np.eye(3)) + roll_table = df.rolling(2, method="table", axis=axis, min_periods=0) + if method in ("var", "std"): + with pytest.raises(NotImplementedError, match=f"{method} not supported"): + getattr(roll_table, method)( + engine_kwargs=engine_kwargs, engine="numba", **kwargs + ) + else: + roll_single = df.rolling(2, method="single", axis=axis, min_periods=0) + result = getattr(roll_table, method)( + engine_kwargs=engine_kwargs, engine="numba", **kwargs + ) + expected = getattr(roll_single, method)( + engine_kwargs=engine_kwargs, engine="numba", **kwargs + ) + tm.assert_frame_equal(result, expected) + + def test_table_method_rolling_apply(self, axis, nogil, parallel, nopython): + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + def f(x): + return np.sum(x, axis=0) + 1 + + df = DataFrame(np.eye(3)) + result = df.rolling(2, method="table", axis=axis, min_periods=0).apply( + f, raw=True, engine_kwargs=engine_kwargs, engine="numba" + ) + expected = df.rolling(2, method="single", axis=axis, min_periods=0).apply( + f, raw=True, engine_kwargs=engine_kwargs, engine="numba" + ) + tm.assert_frame_equal(result, expected) + + def test_table_method_rolling_weighted_mean(self): + def weighted_mean(x): + arr = np.ones((1, x.shape[1])) + arr[:, :2] = (x[:, :2] * x[:, 2]).sum(axis=0) / x[:, 2].sum() + return arr + + df = DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]]) + result = df.rolling(2, method="table", min_periods=0).apply( + weighted_mean, raw=True, engine="numba" + ) + expected = DataFrame( + [ + [1.0, 2.0, 1.0], + [1.8, 2.0, 1.0], + [3.333333, 2.333333, 1.0], + [1.555556, 7, 1.0], + ] + ) + tm.assert_frame_equal(result, expected) + + def test_table_method_expanding_apply(self, axis, nogil, parallel, nopython): + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + def f(x): + return np.sum(x, axis=0) + 1 + + df = DataFrame(np.eye(3)) + result = df.expanding(method="table", axis=axis).apply( + f, raw=True, engine_kwargs=engine_kwargs, engine="numba" + ) + expected = df.expanding(method="single", axis=axis).apply( + f, raw=True, engine_kwargs=engine_kwargs, engine="numba" + ) + tm.assert_frame_equal(result, expected) + + def test_table_method_expanding_methods( + self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators + ): + method, kwargs = arithmetic_numba_supported_operators + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + df = DataFrame(np.eye(3)) + expand_table = df.expanding(method="table", axis=axis) + if method in ("var", "std"): + with pytest.raises(NotImplementedError, match=f"{method} not supported"): + getattr(expand_table, method)( + engine_kwargs=engine_kwargs, engine="numba", **kwargs + ) + else: + expand_single = df.expanding(method="single", axis=axis) + result = getattr(expand_table, method)( + engine_kwargs=engine_kwargs, engine="numba", **kwargs + ) + expected = getattr(expand_single, method)( + engine_kwargs=engine_kwargs, engine="numba", **kwargs + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("data", [np.eye(3), np.ones((2, 3)), np.ones((3, 2))]) + @pytest.mark.parametrize("method", ["mean", "sum"]) + def test_table_method_ewm(self, data, method, axis, nogil, parallel, nopython): + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + df = DataFrame(data) + + result = getattr(df.ewm(com=1, method="table", axis=axis), method)( + engine_kwargs=engine_kwargs, engine="numba" + ) + expected = getattr(df.ewm(com=1, method="single", axis=axis), method)( + engine_kwargs=engine_kwargs, engine="numba" + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_online.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_online.py new file mode 100644 index 0000000..c157b0e --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_online.py @@ -0,0 +1,104 @@ +import numpy as np +import pytest + +from pandas.compat import ( + is_ci_environment, + is_platform_mac, + is_platform_windows, +) +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + +# TODO(GH#44584): Mark these as pytest.mark.single +pytestmark = pytest.mark.skipif( + is_ci_environment() and (is_platform_windows() or is_platform_mac()), + reason="On Azure CI, Windows can fail with " + "'Windows fatal exception: stack overflow' " + "and MacOS can timeout", +) + + +@td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore:\n") +class TestEWM: + def test_invalid_update(self): + df = DataFrame({"a": range(5), "b": range(5)}) + online_ewm = df.head(2).ewm(0.5).online() + with pytest.raises( + ValueError, + match="Must call mean with update=None first before passing update", + ): + online_ewm.mean(update=df.head(1)) + + @pytest.mark.slow + @pytest.mark.parametrize( + "obj", [DataFrame({"a": range(5), "b": range(5)}), Series(range(5), name="foo")] + ) + def test_online_vs_non_online_mean( + self, obj, nogil, parallel, nopython, adjust, ignore_na + ): + expected = obj.ewm(0.5, adjust=adjust, ignore_na=ignore_na).mean() + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + online_ewm = ( + obj.head(2) + .ewm(0.5, adjust=adjust, ignore_na=ignore_na) + .online(engine_kwargs=engine_kwargs) + ) + # Test resetting once + for _ in range(2): + result = online_ewm.mean() + tm.assert_equal(result, expected.head(2)) + + result = online_ewm.mean(update=obj.tail(3)) + tm.assert_equal(result, expected.tail(3)) + + online_ewm.reset() + + @pytest.mark.xfail(raises=NotImplementedError) + @pytest.mark.parametrize( + "obj", [DataFrame({"a": range(5), "b": range(5)}), Series(range(5), name="foo")] + ) + def test_update_times_mean( + self, obj, nogil, parallel, nopython, adjust, ignore_na, halflife_with_times + ): + times = Series( + np.array( + ["2020-01-01", "2020-01-05", "2020-01-07", "2020-01-17", "2020-01-21"], + dtype="datetime64", + ) + ) + expected = obj.ewm( + 0.5, + adjust=adjust, + ignore_na=ignore_na, + times=times, + halflife=halflife_with_times, + ).mean() + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + online_ewm = ( + obj.head(2) + .ewm( + 0.5, + adjust=adjust, + ignore_na=ignore_na, + times=times.head(2), + halflife=halflife_with_times, + ) + .online(engine_kwargs=engine_kwargs) + ) + # Test resetting once + for _ in range(2): + result = online_ewm.mean() + tm.assert_equal(result, expected.head(2)) + + result = online_ewm.mean(update=obj.tail(3), update_times=times.tail(3)) + tm.assert_equal(result, expected.tail(3)) + + online_ewm.reset() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_pairwise.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_pairwise.py new file mode 100644 index 0000000..77ff6ae --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_pairwise.py @@ -0,0 +1,406 @@ +import warnings + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + date_range, +) +import pandas._testing as tm +from pandas.core.algorithms import safe_sort + + +def test_rolling_cov(series): + A = series + B = A + np.random.randn(len(A)) + + result = A.rolling(window=50, min_periods=25).cov(B) + tm.assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) + + +def test_rolling_corr(series): + A = series + B = A + np.random.randn(len(A)) + + result = A.rolling(window=50, min_periods=25).corr(B) + tm.assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) + + # test for correct bias correction + a = tm.makeTimeSeries() + b = tm.makeTimeSeries() + a[:5] = np.nan + b[:10] = np.nan + + result = a.rolling(window=len(a), min_periods=1).corr(b) + tm.assert_almost_equal(result[-1], a.corr(b)) + + +@pytest.mark.parametrize("func", ["cov", "corr"]) +def test_rolling_pairwise_cov_corr(func, frame): + result = getattr(frame.rolling(window=10, min_periods=5), func)() + result = result.loc[(slice(None), 1), 5] + result.index = result.index.droplevel(1) + expected = getattr(frame[1].rolling(window=10, min_periods=5), func)(frame[5]) + tm.assert_series_equal(result, expected, check_names=False) + + +@pytest.mark.parametrize("method", ["corr", "cov"]) +def test_flex_binary_frame(method, frame): + series = frame[1] + + res = getattr(series.rolling(window=10), method)(frame) + res2 = getattr(frame.rolling(window=10), method)(series) + exp = frame.apply(lambda x: getattr(series.rolling(window=10), method)(x)) + + tm.assert_frame_equal(res, exp) + tm.assert_frame_equal(res2, exp) + + frame2 = frame.copy() + frame2.values[:] = np.random.randn(*frame2.shape) + + res3 = getattr(frame.rolling(window=10), method)(frame2) + exp = DataFrame( + {k: getattr(frame[k].rolling(window=10), method)(frame2[k]) for k in frame} + ) + tm.assert_frame_equal(res3, exp) + + +@pytest.mark.parametrize("window", range(7)) +def test_rolling_corr_with_zero_variance(window): + # GH 18430 + s = Series(np.zeros(20)) + other = Series(np.arange(20)) + + assert s.rolling(window=window).corr(other=other).isna().all() + + +def test_corr_sanity(): + # GH 3155 + df = DataFrame( + np.array( + [ + [0.87024726, 0.18505595], + [0.64355431, 0.3091617], + [0.92372966, 0.50552513], + [0.00203756, 0.04520709], + [0.84780328, 0.33394331], + [0.78369152, 0.63919667], + ] + ) + ) + + res = df[0].rolling(5, center=True).corr(df[1]) + assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res) + + df = DataFrame(np.random.rand(30, 2)) + res = df[0].rolling(5, center=True).corr(df[1]) + assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res) + + +def test_rolling_cov_diff_length(): + # GH 7512 + s1 = Series([1, 2, 3], index=[0, 1, 2]) + s2 = Series([1, 3], index=[0, 2]) + result = s1.rolling(window=3, min_periods=2).cov(s2) + expected = Series([None, None, 2.0]) + tm.assert_series_equal(result, expected) + + s2a = Series([1, None, 3], index=[0, 1, 2]) + result = s1.rolling(window=3, min_periods=2).cov(s2a) + tm.assert_series_equal(result, expected) + + +def test_rolling_corr_diff_length(): + # GH 7512 + s1 = Series([1, 2, 3], index=[0, 1, 2]) + s2 = Series([1, 3], index=[0, 2]) + result = s1.rolling(window=3, min_periods=2).corr(s2) + expected = Series([None, None, 1.0]) + tm.assert_series_equal(result, expected) + + s2a = Series([1, None, 3], index=[0, 1, 2]) + result = s1.rolling(window=3, min_periods=2).corr(s2a) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "f", + [ + lambda x: (x.rolling(window=10, min_periods=5).cov(x, pairwise=True)), + lambda x: (x.rolling(window=10, min_periods=5).corr(x, pairwise=True)), + ], +) +def test_rolling_functions_window_non_shrinkage_binary(f): + + # corr/cov return a MI DataFrame + df = DataFrame( + [[1, 5], [3, 2], [3, 9], [-1, 0]], + columns=Index(["A", "B"], name="foo"), + index=Index(range(4), name="bar"), + ) + df_expected = DataFrame( + columns=Index(["A", "B"], name="foo"), + index=MultiIndex.from_product([df.index, df.columns], names=["bar", "foo"]), + dtype="float64", + ) + df_result = f(df) + tm.assert_frame_equal(df_result, df_expected) + + +@pytest.mark.parametrize( + "f", + [ + lambda x: (x.rolling(window=10, min_periods=5).cov(x, pairwise=True)), + lambda x: (x.rolling(window=10, min_periods=5).corr(x, pairwise=True)), + ], +) +def test_moment_functions_zero_length_pairwise(f): + + df1 = DataFrame() + df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar")) + df2["a"] = df2["a"].astype("float64") + + df1_expected = DataFrame( + index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([]) + ) + df2_expected = DataFrame( + index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]), + columns=Index(["a"], name="foo"), + dtype="float64", + ) + + df1_result = f(df1) + tm.assert_frame_equal(df1_result, df1_expected) + + df2_result = f(df2) + tm.assert_frame_equal(df2_result, df2_expected) + + +class TestPairwise: + + # GH 7738 + @pytest.mark.parametrize("f", [lambda x: x.cov(), lambda x: x.corr()]) + def test_no_flex(self, pairwise_frames, pairwise_target_frame, f): + + # DataFrame methods (which do not call flex_binary_moment()) + + result = f(pairwise_frames) + tm.assert_index_equal(result.index, pairwise_frames.columns) + tm.assert_index_equal(result.columns, pairwise_frames.columns) + expected = f(pairwise_target_frame) + # since we have sorted the results + # we can only compare non-nans + result = result.dropna().values + expected = expected.dropna().values + + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + @pytest.mark.parametrize( + "f", + [ + lambda x: x.expanding().cov(pairwise=True), + lambda x: x.expanding().corr(pairwise=True), + lambda x: x.rolling(window=3).cov(pairwise=True), + lambda x: x.rolling(window=3).corr(pairwise=True), + lambda x: x.ewm(com=3).cov(pairwise=True), + lambda x: x.ewm(com=3).corr(pairwise=True), + ], + ) + def test_pairwise_with_self(self, pairwise_frames, pairwise_target_frame, f): + + # DataFrame with itself, pairwise=True + # note that we may construct the 1st level of the MI + # in a non-monotonic way, so compare accordingly + result = f(pairwise_frames) + tm.assert_index_equal( + result.index.levels[0], pairwise_frames.index, check_names=False + ) + tm.assert_numpy_array_equal( + safe_sort(result.index.levels[1]), + safe_sort(pairwise_frames.columns.unique()), + ) + tm.assert_index_equal(result.columns, pairwise_frames.columns) + expected = f(pairwise_target_frame) + # since we have sorted the results + # we can only compare non-nans + result = result.dropna().values + expected = expected.dropna().values + + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + @pytest.mark.parametrize( + "f", + [ + lambda x: x.expanding().cov(pairwise=False), + lambda x: x.expanding().corr(pairwise=False), + lambda x: x.rolling(window=3).cov(pairwise=False), + lambda x: x.rolling(window=3).corr(pairwise=False), + lambda x: x.ewm(com=3).cov(pairwise=False), + lambda x: x.ewm(com=3).corr(pairwise=False), + ], + ) + def test_no_pairwise_with_self(self, pairwise_frames, pairwise_target_frame, f): + + # DataFrame with itself, pairwise=False + result = f(pairwise_frames) + tm.assert_index_equal(result.index, pairwise_frames.index) + tm.assert_index_equal(result.columns, pairwise_frames.columns) + expected = f(pairwise_target_frame) + # since we have sorted the results + # we can only compare non-nans + result = result.dropna().values + expected = expected.dropna().values + + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + @pytest.mark.parametrize( + "f", + [ + lambda x, y: x.expanding().cov(y, pairwise=True), + lambda x, y: x.expanding().corr(y, pairwise=True), + lambda x, y: x.rolling(window=3).cov(y, pairwise=True), + lambda x, y: x.rolling(window=3).corr(y, pairwise=True), + lambda x, y: x.ewm(com=3).cov(y, pairwise=True), + lambda x, y: x.ewm(com=3).corr(y, pairwise=True), + ], + ) + def test_pairwise_with_other( + self, pairwise_frames, pairwise_target_frame, pairwise_other_frame, f + ): + + # DataFrame with another DataFrame, pairwise=True + result = f(pairwise_frames, pairwise_other_frame) + tm.assert_index_equal( + result.index.levels[0], pairwise_frames.index, check_names=False + ) + tm.assert_numpy_array_equal( + safe_sort(result.index.levels[1]), + safe_sort(pairwise_other_frame.columns.unique()), + ) + expected = f(pairwise_target_frame, pairwise_other_frame) + # since we have sorted the results + # we can only compare non-nans + result = result.dropna().values + expected = expected.dropna().values + + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + @pytest.mark.parametrize( + "f", + [ + lambda x, y: x.expanding().cov(y, pairwise=False), + lambda x, y: x.expanding().corr(y, pairwise=False), + lambda x, y: x.rolling(window=3).cov(y, pairwise=False), + lambda x, y: x.rolling(window=3).corr(y, pairwise=False), + lambda x, y: x.ewm(com=3).cov(y, pairwise=False), + lambda x, y: x.ewm(com=3).corr(y, pairwise=False), + ], + ) + def test_no_pairwise_with_other(self, pairwise_frames, pairwise_other_frame, f): + + # DataFrame with another DataFrame, pairwise=False + result = ( + f(pairwise_frames, pairwise_other_frame) + if pairwise_frames.columns.is_unique + else None + ) + if result is not None: + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + # we can have int and str columns + expected_index = pairwise_frames.index.union(pairwise_other_frame.index) + expected_columns = pairwise_frames.columns.union( + pairwise_other_frame.columns + ) + tm.assert_index_equal(result.index, expected_index) + tm.assert_index_equal(result.columns, expected_columns) + else: + with pytest.raises(ValueError, match="'arg1' columns are not unique"): + f(pairwise_frames, pairwise_other_frame) + with pytest.raises(ValueError, match="'arg2' columns are not unique"): + f(pairwise_other_frame, pairwise_frames) + + @pytest.mark.parametrize( + "f", + [ + lambda x, y: x.expanding().cov(y), + lambda x, y: x.expanding().corr(y), + lambda x, y: x.rolling(window=3).cov(y), + lambda x, y: x.rolling(window=3).corr(y), + lambda x, y: x.ewm(com=3).cov(y), + lambda x, y: x.ewm(com=3).corr(y), + ], + ) + def test_pairwise_with_series(self, pairwise_frames, pairwise_target_frame, f): + + # DataFrame with a Series + result = f(pairwise_frames, Series([1, 1, 3, 8])) + tm.assert_index_equal(result.index, pairwise_frames.index) + tm.assert_index_equal(result.columns, pairwise_frames.columns) + expected = f(pairwise_target_frame, Series([1, 1, 3, 8])) + # since we have sorted the results + # we can only compare non-nans + result = result.dropna().values + expected = expected.dropna().values + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + result = f(Series([1, 1, 3, 8]), pairwise_frames) + tm.assert_index_equal(result.index, pairwise_frames.index) + tm.assert_index_equal(result.columns, pairwise_frames.columns) + expected = f(Series([1, 1, 3, 8]), pairwise_target_frame) + # since we have sorted the results + # we can only compare non-nans + result = result.dropna().values + expected = expected.dropna().values + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + def test_corr_freq_memory_error(self): + # GH 31789 + s = Series(range(5), index=date_range("2020", periods=5)) + result = s.rolling("12H").corr(s) + expected = Series([np.nan] * 5, index=date_range("2020", periods=5)) + tm.assert_series_equal(result, expected) + + def test_cov_mulittindex(self): + # GH 34440 + + columns = MultiIndex.from_product([list("ab"), list("xy"), list("AB")]) + index = range(3) + df = DataFrame(np.arange(24).reshape(3, 8), index=index, columns=columns) + + result = df.ewm(alpha=0.1).cov() + + index = MultiIndex.from_product([range(3), list("ab"), list("xy"), list("AB")]) + columns = MultiIndex.from_product([list("ab"), list("xy"), list("AB")]) + expected = DataFrame( + np.vstack( + ( + np.full((8, 8), np.NaN), + np.full((8, 8), 32.000000), + np.full((8, 8), 63.881919), + ) + ), + index=index, + columns=columns, + ) + + tm.assert_frame_equal(result, expected) + + def test_multindex_columns_pairwise_func(self): + # GH 21157 + columns = MultiIndex.from_arrays([["M", "N"], ["P", "Q"]], names=["a", "b"]) + df = DataFrame(np.ones((5, 2)), columns=columns) + result = df.rolling(3).corr() + expected = DataFrame( + np.nan, + index=MultiIndex.from_arrays( + [np.repeat(np.arange(5), 2), ["M", "N"] * 5, ["P", "Q"] * 5], + names=[None, "a", "b"], + ), + columns=columns, + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_rolling.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_rolling.py new file mode 100644 index 0000000..814bd6b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_rolling.py @@ -0,0 +1,1736 @@ +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas.compat import ( + is_platform_arm, + is_platform_mac, +) +from pandas.errors import UnsupportedFunctionCall + +from pandas import ( + DataFrame, + DatetimeIndex, + MultiIndex, + Series, + Timedelta, + Timestamp, + date_range, + period_range, + to_datetime, + to_timedelta, +) +import pandas._testing as tm +from pandas.api.indexers import BaseIndexer +from pandas.core.window import Rolling + + +def test_doc_string(): + + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.rolling(2).sum() + df.rolling(2, min_periods=1).sum() + + +def test_constructor(frame_or_series): + # GH 12669 + + c = frame_or_series(range(5)).rolling + + # valid + c(0) + c(window=2) + c(window=2, min_periods=1) + c(window=2, min_periods=1, center=True) + c(window=2, min_periods=1, center=False) + + # GH 13383 + + msg = "window must be an integer 0 or greater" + + with pytest.raises(ValueError, match=msg): + c(-1) + + +@pytest.mark.parametrize("w", [2.0, "foo", np.array([2])]) +def test_invalid_constructor(frame_or_series, w): + # not valid + + c = frame_or_series(range(5)).rolling + + msg = "|".join( + [ + "window must be an integer", + "passed window foo is not compatible with a datetimelike index", + ] + ) + with pytest.raises(ValueError, match=msg): + c(window=w) + + msg = "min_periods must be an integer" + with pytest.raises(ValueError, match=msg): + c(window=2, min_periods=w) + + msg = "center must be a boolean" + with pytest.raises(ValueError, match=msg): + c(window=2, min_periods=1, center=w) + + +@pytest.mark.parametrize("window", [timedelta(days=3), Timedelta(days=3)]) +def test_constructor_with_timedelta_window(window): + # GH 15440 + n = 10 + df = DataFrame( + {"value": np.arange(n)}, + index=date_range("2015-12-24", periods=n, freq="D"), + ) + expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3)) + + result = df.rolling(window=window).sum() + expected = DataFrame( + {"value": expected_data}, + index=date_range("2015-12-24", periods=n, freq="D"), + ) + tm.assert_frame_equal(result, expected) + expected = df.rolling("3D").sum() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("window", [timedelta(days=3), Timedelta(days=3), "3D"]) +def test_constructor_timedelta_window_and_minperiods(window, raw): + # GH 15305 + n = 10 + df = DataFrame( + {"value": np.arange(n)}, + index=date_range("2017-08-08", periods=n, freq="D"), + ) + expected = DataFrame( + {"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))}, + index=date_range("2017-08-08", periods=n, freq="D"), + ) + result_roll_sum = df.rolling(window=window, min_periods=2).sum() + result_roll_generic = df.rolling(window=window, min_periods=2).apply(sum, raw=raw) + tm.assert_frame_equal(result_roll_sum, expected) + tm.assert_frame_equal(result_roll_generic, expected) + + +@pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) +def test_numpy_compat(method): + # see gh-12811 + r = Rolling(Series([2, 4, 6]), window=2) + + msg = "numpy operations are not valid with window objects" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, method)(dtype=np.float64) + + +def test_closed_fixed(closed, arithmetic_win_operators): + # GH 34315 + func_name = arithmetic_win_operators + df_fixed = DataFrame({"A": [0, 1, 2, 3, 4]}) + df_time = DataFrame({"A": [0, 1, 2, 3, 4]}, index=date_range("2020", periods=5)) + + result = getattr( + df_fixed.rolling(2, closed=closed, min_periods=1), + func_name, + )() + expected = getattr( + df_time.rolling("2D", closed=closed, min_periods=1), + func_name, + )().reset_index(drop=True) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "closed, window_selections", + [ + ( + "both", + [ + [True, True, False, False, False], + [True, True, True, False, False], + [False, True, True, True, False], + [False, False, True, True, True], + [False, False, False, True, True], + ], + ), + ( + "left", + [ + [True, False, False, False, False], + [True, True, False, False, False], + [False, True, True, False, False], + [False, False, True, True, False], + [False, False, False, True, True], + ], + ), + ( + "right", + [ + [True, True, False, False, False], + [False, True, True, False, False], + [False, False, True, True, False], + [False, False, False, True, True], + [False, False, False, False, True], + ], + ), + ( + "neither", + [ + [True, False, False, False, False], + [False, True, False, False, False], + [False, False, True, False, False], + [False, False, False, True, False], + [False, False, False, False, True], + ], + ), + ], +) +def test_datetimelike_centered_selections( + closed, window_selections, arithmetic_win_operators +): + # GH 34315 + func_name = arithmetic_win_operators + df_time = DataFrame( + {"A": [0.0, 1.0, 2.0, 3.0, 4.0]}, index=date_range("2020", periods=5) + ) + + expected = DataFrame( + {"A": [getattr(df_time["A"].iloc[s], func_name)() for s in window_selections]}, + index=date_range("2020", periods=5), + ) + + if func_name == "sem": + kwargs = {"ddof": 0} + else: + kwargs = {} + + result = getattr( + df_time.rolling("2D", closed=closed, min_periods=1, center=True), + func_name, + )(**kwargs) + + tm.assert_frame_equal(result, expected, check_dtype=False) + + +@pytest.mark.parametrize( + "window,closed,expected", + [ + ("3s", "right", [3.0, 3.0, 3.0]), + ("3s", "both", [3.0, 3.0, 3.0]), + ("3s", "left", [3.0, 3.0, 3.0]), + ("3s", "neither", [3.0, 3.0, 3.0]), + ("2s", "right", [3.0, 2.0, 2.0]), + ("2s", "both", [3.0, 3.0, 3.0]), + ("2s", "left", [1.0, 3.0, 3.0]), + ("2s", "neither", [1.0, 2.0, 2.0]), + ], +) +def test_datetimelike_centered_offset_covers_all( + window, closed, expected, frame_or_series +): + # GH 42753 + + index = [ + Timestamp("20130101 09:00:01"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:02"), + ] + df = frame_or_series([1, 1, 1], index=index) + + result = df.rolling(window, closed=closed, center=True).sum() + expected = frame_or_series(expected, index=index) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "window,closed,expected", + [ + ("2D", "right", [4, 4, 4, 4, 4, 4, 2, 2]), + ("2D", "left", [2, 2, 4, 4, 4, 4, 4, 4]), + ("2D", "both", [4, 4, 6, 6, 6, 6, 4, 4]), + ("2D", "neither", [2, 2, 2, 2, 2, 2, 2, 2]), + ], +) +def test_datetimelike_nonunique_index_centering( + window, closed, expected, frame_or_series +): + index = DatetimeIndex( + [ + "2020-01-01", + "2020-01-01", + "2020-01-02", + "2020-01-02", + "2020-01-03", + "2020-01-03", + "2020-01-04", + "2020-01-04", + ] + ) + + df = frame_or_series([1] * 8, index=index, dtype=float) + expected = frame_or_series(expected, index=index, dtype=float) + + result = df.rolling(window, center=True, closed=closed).sum() + + tm.assert_equal(result, expected) + + +def test_even_number_window_alignment(): + # see discussion in GH 38780 + s = Series(range(3), index=date_range(start="2020-01-01", freq="D", periods=3)) + + # behavior of index- and datetime-based windows differs here! + # s.rolling(window=2, min_periods=1, center=True).mean() + + result = s.rolling(window="2D", min_periods=1, center=True).mean() + + expected = Series([0.5, 1.5, 2], index=s.index) + + tm.assert_series_equal(result, expected) + + +def test_closed_fixed_binary_col(center): + # GH 34315 + data = [0, 1, 1, 0, 0, 1, 0, 1] + df = DataFrame( + {"binary_col": data}, + index=date_range(start="2020-01-01", freq="min", periods=len(data)), + ) + + if center: + expected_data = [2 / 3, 0.5, 0.4, 0.5, 0.428571, 0.5, 0.571429, 0.5] + else: + expected_data = [np.nan, 0, 0.5, 2 / 3, 0.5, 0.4, 0.5, 0.428571] + + expected = DataFrame( + expected_data, + columns=["binary_col"], + index=date_range(start="2020-01-01", freq="min", periods=len(expected_data)), + ) + + rolling = df.rolling(window=len(df), closed="left", min_periods=1, center=center) + result = rolling.mean() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("closed", ["neither", "left"]) +def test_closed_empty(closed, arithmetic_win_operators): + # GH 26005 + func_name = arithmetic_win_operators + ser = Series(data=np.arange(5), index=date_range("2000", periods=5, freq="2D")) + roll = ser.rolling("1D", closed=closed) + + result = getattr(roll, func_name)() + expected = Series([np.nan] * 5, index=ser.index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_closed_one_entry(func): + # GH24718 + ser = Series(data=[2], index=date_range("2000", periods=1)) + result = getattr(ser.rolling("10D", closed="left"), func)() + tm.assert_series_equal(result, Series([np.nan], index=ser.index)) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_closed_one_entry_groupby(func): + # GH24718 + ser = DataFrame( + data={"A": [1, 1, 2], "B": [3, 2, 1]}, + index=date_range("2000", periods=3), + ) + result = getattr( + ser.groupby("A", sort=False)["B"].rolling("10D", closed="left"), func + )() + exp_idx = MultiIndex.from_arrays(arrays=[[1, 1, 2], ser.index], names=("A", None)) + expected = Series(data=[np.nan, 3, np.nan], index=exp_idx, name="B") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("input_dtype", ["int", "float"]) +@pytest.mark.parametrize( + "func,closed,expected", + [ + ("min", "right", [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]), + ("min", "both", [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]), + ("min", "neither", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]), + ("min", "left", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]), + ("max", "right", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + ("max", "both", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + ("max", "neither", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), + ("max", "left", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), + ], +) +def test_closed_min_max_datetime(input_dtype, func, closed, expected): + # see gh-21704 + ser = Series( + data=np.arange(10).astype(input_dtype), + index=date_range("2000", periods=10), + ) + + result = getattr(ser.rolling("3D", closed=closed), func)() + expected = Series(expected, index=ser.index) + tm.assert_series_equal(result, expected) + + +def test_closed_uneven(): + # see gh-21704 + ser = Series(data=np.arange(10), index=date_range("2000", periods=10)) + + # uneven + ser = ser.drop(index=ser.index[[1, 5]]) + result = ser.rolling("3D", closed="left").min() + expected = Series([np.nan, 0, 0, 2, 3, 4, 6, 6], index=ser.index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "func,closed,expected", + [ + ("min", "right", [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), + ("min", "both", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]), + ("min", "neither", [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), + ("min", "left", [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]), + ("max", "right", [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]), + ("max", "both", [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]), + ("max", "neither", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]), + ("max", "left", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan]), + ], +) +def test_closed_min_max_minp(func, closed, expected): + # see gh-21704 + ser = Series(data=np.arange(10), index=date_range("2000", periods=10)) + ser[ser.index[-3:]] = np.nan + result = getattr(ser.rolling("3D", min_periods=2, closed=closed), func)() + expected = Series(expected, index=ser.index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "closed,expected", + [ + ("right", [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8]), + ("both", [0, 0.5, 1, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), + ("neither", [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), + ("left", [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]), + ], +) +def test_closed_median_quantile(closed, expected): + # GH 26005 + ser = Series(data=np.arange(10), index=date_range("2000", periods=10)) + roll = ser.rolling("3D", closed=closed) + expected = Series(expected, index=ser.index) + + result = roll.median() + tm.assert_series_equal(result, expected) + + result = roll.quantile(0.5) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("roller", ["1s", 1]) +def tests_empty_df_rolling(roller): + # GH 15819 Verifies that datetime and integer rolling windows can be + # applied to empty DataFrames + expected = DataFrame() + result = DataFrame().rolling(roller).sum() + tm.assert_frame_equal(result, expected) + + # Verifies that datetime and integer rolling windows can be applied to + # empty DataFrames with datetime index + expected = DataFrame(index=DatetimeIndex([])) + result = DataFrame(index=DatetimeIndex([])).rolling(roller).sum() + tm.assert_frame_equal(result, expected) + + +def test_empty_window_median_quantile(): + # GH 26005 + expected = Series([np.nan, np.nan, np.nan]) + roll = Series(np.arange(3)).rolling(0) + + result = roll.median() + tm.assert_series_equal(result, expected) + + result = roll.quantile(0.1) + tm.assert_series_equal(result, expected) + + +def test_missing_minp_zero(): + # https://github.com/pandas-dev/pandas/pull/18921 + # minp=0 + x = Series([np.nan]) + result = x.rolling(1, min_periods=0).sum() + expected = Series([0.0]) + tm.assert_series_equal(result, expected) + + # minp=1 + result = x.rolling(1, min_periods=1).sum() + expected = Series([np.nan]) + tm.assert_series_equal(result, expected) + + +def test_missing_minp_zero_variable(): + # https://github.com/pandas-dev/pandas/pull/18921 + x = Series( + [np.nan] * 4, + index=DatetimeIndex(["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"]), + ) + result = x.rolling(Timedelta("2d"), min_periods=0).sum() + expected = Series(0.0, index=x.index) + tm.assert_series_equal(result, expected) + + +def test_multi_index_names(): + + # GH 16789, 16825 + cols = MultiIndex.from_product([["A", "B"], ["C", "D", "E"]], names=["1", "2"]) + df = DataFrame(np.ones((10, 6)), columns=cols) + result = df.rolling(3).cov() + + tm.assert_index_equal(result.columns, df.columns) + assert result.index.names == [None, "1", "2"] + + +def test_rolling_axis_sum(axis_frame): + # see gh-23372. + df = DataFrame(np.ones((10, 20))) + axis = df._get_axis_number(axis_frame) + + if axis == 0: + expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)}) + else: + # axis == 1 + expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10) + + result = df.rolling(3, axis=axis_frame).sum() + tm.assert_frame_equal(result, expected) + + +def test_rolling_axis_count(axis_frame): + # see gh-26055 + df = DataFrame({"x": range(3), "y": range(3)}) + + axis = df._get_axis_number(axis_frame) + + if axis in [0, "index"]: + expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]}) + else: + expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) + + result = df.rolling(2, axis=axis_frame, min_periods=0).count() + tm.assert_frame_equal(result, expected) + + +def test_readonly_array(): + # GH-27766 + arr = np.array([1, 3, np.nan, 3, 5]) + arr.setflags(write=False) + result = Series(arr).rolling(2).mean() + expected = Series([np.nan, 2, np.nan, np.nan, 4]) + tm.assert_series_equal(result, expected) + + +def test_rolling_datetime(axis_frame, tz_naive_fixture): + # GH-28192 + tz = tz_naive_fixture + df = DataFrame( + {i: [1] * 2 for i in date_range("2019-8-01", "2019-08-03", freq="D", tz=tz)} + ) + if axis_frame in [0, "index"]: + result = df.T.rolling("2D", axis=axis_frame).sum().T + else: + result = df.rolling("2D", axis=axis_frame).sum() + expected = DataFrame( + { + **{ + i: [1.0] * 2 + for i in date_range("2019-8-01", periods=1, freq="D", tz=tz) + }, + **{ + i: [2.0] * 2 + for i in date_range("2019-8-02", "2019-8-03", freq="D", tz=tz) + }, + } + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "center, expected_data", + [ + ( + True, + ( + [88.0] * 7 + + [97.0] * 9 + + [98.0] + + [99.0] * 21 + + [95.0] * 16 + + [93.0] * 5 + + [89.0] * 5 + + [96.0] * 21 + + [94.0] * 14 + + [90.0] * 13 + + [88.0] * 2 + + [90.0] * 9 + + [96.0] * 21 + + [95.0] * 6 + + [91.0] + + [87.0] * 6 + + [92.0] * 21 + + [83.0] * 2 + + [86.0] * 10 + + [87.0] * 5 + + [98.0] * 21 + + [97.0] * 14 + + [93.0] * 7 + + [87.0] * 4 + + [86.0] * 4 + + [95.0] * 21 + + [85.0] * 14 + + [83.0] * 2 + + [76.0] * 5 + + [81.0] * 2 + + [98.0] * 21 + + [95.0] * 14 + + [91.0] * 7 + + [86.0] + + [93.0] * 3 + + [95.0] * 29 + + [77.0] * 2 + ), + ), + ( + False, + ( + [np.nan] * 2 + + [88.0] * 16 + + [97.0] * 9 + + [98.0] + + [99.0] * 21 + + [95.0] * 16 + + [93.0] * 5 + + [89.0] * 5 + + [96.0] * 21 + + [94.0] * 14 + + [90.0] * 13 + + [88.0] * 2 + + [90.0] * 9 + + [96.0] * 21 + + [95.0] * 6 + + [91.0] + + [87.0] * 6 + + [92.0] * 21 + + [83.0] * 2 + + [86.0] * 10 + + [87.0] * 5 + + [98.0] * 21 + + [97.0] * 14 + + [93.0] * 7 + + [87.0] * 4 + + [86.0] * 4 + + [95.0] * 21 + + [85.0] * 14 + + [83.0] * 2 + + [76.0] * 5 + + [81.0] * 2 + + [98.0] * 21 + + [95.0] * 14 + + [91.0] * 7 + + [86.0] + + [93.0] * 3 + + [95.0] * 20 + ), + ), + ], +) +def test_rolling_window_as_string(center, expected_data): + # see gh-22590 + date_today = datetime.now() + days = date_range(date_today, date_today + timedelta(365), freq="D") + + npr = np.random.RandomState(seed=421) + + data = npr.randint(1, high=100, size=len(days)) + df = DataFrame({"DateCol": days, "metric": data}) + + df.set_index("DateCol", inplace=True) + result = df.rolling(window="21D", min_periods=2, closed="left", center=center)[ + "metric" + ].agg("max") + + index = days.rename("DateCol") + index = index._with_freq(None) + expected = Series(expected_data, index=index, name="metric") + tm.assert_series_equal(result, expected) + + +def test_min_periods1(): + # GH#6795 + df = DataFrame([0, 1, 2, 1, 0], columns=["a"]) + result = df["a"].rolling(3, center=True, min_periods=1).max() + expected = Series([1.0, 2.0, 2.0, 2.0, 1.0], name="a") + tm.assert_series_equal(result, expected) + + +def test_rolling_count_with_min_periods(frame_or_series): + # GH 26996 + result = frame_or_series(range(5)).rolling(3, min_periods=3).count() + expected = frame_or_series([np.nan, np.nan, 3.0, 3.0, 3.0]) + tm.assert_equal(result, expected) + + +def test_rolling_count_default_min_periods_with_null_values(frame_or_series): + # GH 26996 + values = [1, 2, 3, np.nan, 4, 5, 6] + expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0] + + # GH 31302 + with tm.assert_produces_warning(FutureWarning): + result = frame_or_series(values).rolling(3).count() + expected = frame_or_series(expected_counts) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "df,expected,window,min_periods", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 3, + None, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + 2, + 1, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + 2, + 2, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + 1, + 1, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + 1, + 0, + ), + (DataFrame({"A": [1], "B": [4]}), [], 2, None), + (DataFrame({"A": [1], "B": [4]}), [], 2, 1), + (DataFrame(), [({}, [])], 2, None), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 3, + 2, + ), + ], +) +def test_iter_rolling_dataframe(df, expected, window, min_periods): + # GH 11704 + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, df.rolling(window, min_periods=min_periods) + ): + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.parametrize( + "expected,window", + [ + ( + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + "2D", + ), + ( + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + "3D", + ), + ( + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + "1D", + ), + ], +) +def test_iter_rolling_on_dataframe(expected, window): + # GH 11704, 40373 + df = DataFrame( + { + "A": [1, 2, 3, 4, 5], + "B": [4, 5, 6, 7, 8], + "C": date_range(start="2016-01-01", periods=5, freq="D"), + } + ) + + expected = [ + DataFrame(values, index=df.loc[index, "C"]) for (values, index) in expected + ] + for (expected, actual) in zip(expected, df.rolling(window, on="C")): + tm.assert_frame_equal(actual, expected) + + +def test_iter_rolling_on_dataframe_unordered(): + # GH 43386 + df = DataFrame({"a": ["x", "y", "x"], "b": [0, 1, 2]}) + results = list(df.groupby("a").rolling(2)) + expecteds = [df.iloc[idx, [1]] for idx in [[0], [0, 2], [1]]] + for result, expected in zip(results, expecteds): + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "ser,expected,window, min_periods", + [ + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], + 3, + None, + ), + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], + 3, + 1, + ), + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], + 2, + 1, + ), + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], + 2, + 2, + ), + (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 0), + (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 1), + (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2, 0), + (Series([], dtype="int64"), [], 2, 1), + ], +) +def test_iter_rolling_series(ser, expected, window, min_periods): + # GH 11704 + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, ser.rolling(window, min_periods=min_periods) + ): + tm.assert_series_equal(actual, expected) + + +@pytest.mark.parametrize( + "expected,expected_index,window", + [ + ( + [[0], [1], [2], [3], [4]], + [ + date_range("2020-01-01", periods=1, freq="D"), + date_range("2020-01-02", periods=1, freq="D"), + date_range("2020-01-03", periods=1, freq="D"), + date_range("2020-01-04", periods=1, freq="D"), + date_range("2020-01-05", periods=1, freq="D"), + ], + "1D", + ), + ( + [[0], [0, 1], [1, 2], [2, 3], [3, 4]], + [ + date_range("2020-01-01", periods=1, freq="D"), + date_range("2020-01-01", periods=2, freq="D"), + date_range("2020-01-02", periods=2, freq="D"), + date_range("2020-01-03", periods=2, freq="D"), + date_range("2020-01-04", periods=2, freq="D"), + ], + "2D", + ), + ( + [[0], [0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4]], + [ + date_range("2020-01-01", periods=1, freq="D"), + date_range("2020-01-01", periods=2, freq="D"), + date_range("2020-01-01", periods=3, freq="D"), + date_range("2020-01-02", periods=3, freq="D"), + date_range("2020-01-03", periods=3, freq="D"), + ], + "3D", + ), + ], +) +def test_iter_rolling_datetime(expected, expected_index, window): + # GH 11704 + ser = Series(range(5), index=date_range(start="2020-01-01", periods=5, freq="D")) + + expected = [ + Series(values, index=idx) for (values, idx) in zip(expected, expected_index) + ] + + for (expected, actual) in zip(expected, ser.rolling(window)): + tm.assert_series_equal(actual, expected) + + +@pytest.mark.parametrize( + "grouping,_index", + [ + ( + {"level": 0}, + MultiIndex.from_tuples( + [(0, 0), (0, 0), (1, 1), (1, 1), (1, 1)], names=[None, None] + ), + ), + ( + {"by": "X"}, + MultiIndex.from_tuples( + [(0, 0), (1, 0), (2, 1), (3, 1), (4, 1)], names=["X", None] + ), + ), + ], +) +def test_rolling_positional_argument(grouping, _index, raw): + # GH 34605 + + def scaled_sum(*args): + if len(args) < 2: + raise ValueError("The function needs two arguments") + array, scale = args + return array.sum() / scale + + df = DataFrame(data={"X": range(5)}, index=[0, 0, 1, 1, 1]) + + expected = DataFrame(data={"X": [0.0, 0.5, 1.0, 1.5, 2.0]}, index=_index) + # GH 40341 + if "by" in grouping: + expected = expected.drop(columns="X", errors="ignore") + result = df.groupby(**grouping).rolling(1).apply(scaled_sum, raw=raw, args=(2,)) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("add", [0.0, 2.0]) +def test_rolling_numerical_accuracy_kahan_mean(add): + # GH: 36031 implementing kahan summation + df = DataFrame( + {"A": [3002399751580331.0 + add, -0.0, -0.0]}, + index=[ + Timestamp("19700101 09:00:00"), + Timestamp("19700101 09:00:03"), + Timestamp("19700101 09:00:06"), + ], + ) + result = ( + df.resample("1s").ffill().rolling("3s", closed="left", min_periods=3).mean() + ) + dates = date_range("19700101 09:00:00", periods=7, freq="S") + expected = DataFrame( + { + "A": [ + np.nan, + np.nan, + np.nan, + 3002399751580330.5, + 2001599834386887.25, + 1000799917193443.625, + 0.0, + ] + }, + index=dates, + ) + tm.assert_frame_equal(result, expected) + + +def test_rolling_numerical_accuracy_kahan_sum(): + # GH: 13254 + df = DataFrame([2.186, -1.647, 0.0, 0.0, 0.0, 0.0], columns=["x"]) + result = df["x"].rolling(3).sum() + expected = Series([np.nan, np.nan, 0.539, -1.647, 0.0, 0.0], name="x") + tm.assert_series_equal(result, expected) + + +def test_rolling_numerical_accuracy_jump(): + # GH: 32761 + index = date_range(start="2020-01-01", end="2020-01-02", freq="60s").append( + DatetimeIndex(["2020-01-03"]) + ) + data = np.random.rand(len(index)) + + df = DataFrame({"data": data}, index=index) + result = df.rolling("60s").mean() + tm.assert_frame_equal(result, df[["data"]]) + + +def test_rolling_numerical_accuracy_small_values(): + # GH: 10319 + s = Series( + data=[0.00012456, 0.0003, -0.0, -0.0], + index=date_range("1999-02-03", "1999-02-06"), + ) + result = s.rolling(1).mean() + tm.assert_series_equal(result, s) + + +def test_rolling_numerical_too_large_numbers(): + # GH: 11645 + dates = date_range("2015-01-01", periods=10, freq="D") + ds = Series(data=range(10), index=dates, dtype=np.float64) + ds[2] = -9e33 + result = ds.rolling(5).mean() + expected = Series( + [ + np.nan, + np.nan, + np.nan, + np.nan, + -1.8e33, + -1.8e33, + -1.8e33, + 5.0, + 6.0, + 7.0, + ], + index=dates, + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + ("func", "value"), + [("sum", 2.0), ("max", 1.0), ("min", 1.0), ("mean", 1.0), ("median", 1.0)], +) +def test_rolling_mixed_dtypes_axis_1(func, value): + # GH: 20649 + df = DataFrame(1, index=[1, 2], columns=["a", "b", "c"]) + df["c"] = 1.0 + result = getattr(df.rolling(window=2, min_periods=1, axis=1), func)() + expected = DataFrame( + {"a": [1.0, 1.0], "b": [value, value], "c": [value, value]}, + index=[1, 2], + ) + tm.assert_frame_equal(result, expected) + + +def test_rolling_axis_one_with_nan(): + # GH: 35596 + df = DataFrame( + [ + [0, 1, 2, 4, np.nan, np.nan, np.nan], + [0, 1, 2, np.nan, np.nan, np.nan, np.nan], + [0, 2, 2, np.nan, 2, np.nan, 1], + ] + ) + result = df.rolling(window=7, min_periods=1, axis="columns").sum() + expected = DataFrame( + [ + [0.0, 1.0, 3.0, 7.0, 7.0, 7.0, 7.0], + [0.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0], + [0.0, 2.0, 4.0, 4.0, 6.0, 6.0, 7.0], + ] + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "value", + ["test", to_datetime("2019-12-31"), to_timedelta("1 days 06:05:01.00003")], +) +def test_rolling_axis_1_non_numeric_dtypes(value): + # GH: 20649 + df = DataFrame({"a": [1, 2]}) + df["b"] = value + result = df.rolling(window=2, min_periods=1, axis=1).sum() + expected = DataFrame({"a": [1.0, 2.0]}) + tm.assert_frame_equal(result, expected) + + +def test_rolling_on_df_transposed(): + # GH: 32724 + df = DataFrame({"A": [1, None], "B": [4, 5], "C": [7, 8]}) + expected = DataFrame({"A": [1.0, np.nan], "B": [5.0, 5.0], "C": [11.0, 13.0]}) + result = df.rolling(min_periods=1, window=2, axis=1).sum() + tm.assert_frame_equal(result, expected) + + result = df.T.rolling(min_periods=1, window=2).sum().T + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + ("index", "window"), + [ + ( + period_range(start="2020-01-01 08:00", end="2020-01-01 08:08", freq="T"), + "2T", + ), + ( + period_range(start="2020-01-01 08:00", end="2020-01-01 12:00", freq="30T"), + "1h", + ), + ], +) +@pytest.mark.parametrize( + ("func", "values"), + [ + ("min", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6]), + ("max", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7]), + ("sum", [np.nan, 0, 1, 3, 5, 7, 9, 11, 13]), + ], +) +def test_rolling_period_index(index, window, func, values): + # GH: 34225 + ds = Series([0, 1, 2, 3, 4, 5, 6, 7, 8], index=index) + result = getattr(ds.rolling(window, closed="left"), func)() + expected = Series(values, index=index) + tm.assert_series_equal(result, expected) + + +def test_rolling_sem(frame_or_series): + # GH: 26476 + obj = frame_or_series([0, 1, 2]) + result = obj.rolling(2, min_periods=1).sem() + if isinstance(result, DataFrame): + result = Series(result[0].values) + expected = Series([np.nan] + [0.7071067811865476] * 2) + tm.assert_series_equal(result, expected) + + +@pytest.mark.xfail(is_platform_arm() and not is_platform_mac(), reason="GH 38921") +@pytest.mark.parametrize( + ("func", "third_value", "values"), + [ + ("var", 1, [5e33, 0, 0.5, 0.5, 2, 0]), + ("std", 1, [7.071068e16, 0, 0.7071068, 0.7071068, 1.414214, 0]), + ("var", 2, [5e33, 0.5, 0, 0.5, 2, 0]), + ("std", 2, [7.071068e16, 0.7071068, 0, 0.7071068, 1.414214, 0]), + ], +) +def test_rolling_var_numerical_issues(func, third_value, values): + # GH: 37051 + ds = Series([99999999999999999, 1, third_value, 2, 3, 1, 1]) + result = getattr(ds.rolling(2), func)() + expected = Series([np.nan] + values) + tm.assert_series_equal(result, expected) + + +def test_timeoffset_as_window_parameter_for_corr(): + # GH: 28266 + exp = DataFrame( + { + "B": [ + np.nan, + np.nan, + 0.9999999999999998, + -1.0, + 1.0, + -0.3273268353539892, + 0.9999999999999998, + 1.0, + 0.9999999999999998, + 1.0, + ], + "A": [ + np.nan, + np.nan, + -1.0, + 1.0000000000000002, + -0.3273268353539892, + 0.9999999999999966, + 1.0, + 1.0000000000000002, + 1.0, + 1.0000000000000002, + ], + }, + index=MultiIndex.from_tuples( + [ + (Timestamp("20130101 09:00:00"), "B"), + (Timestamp("20130101 09:00:00"), "A"), + (Timestamp("20130102 09:00:02"), "B"), + (Timestamp("20130102 09:00:02"), "A"), + (Timestamp("20130103 09:00:03"), "B"), + (Timestamp("20130103 09:00:03"), "A"), + (Timestamp("20130105 09:00:05"), "B"), + (Timestamp("20130105 09:00:05"), "A"), + (Timestamp("20130106 09:00:06"), "B"), + (Timestamp("20130106 09:00:06"), "A"), + ] + ), + ) + + df = DataFrame( + {"B": [0, 1, 2, 4, 3], "A": [7, 4, 6, 9, 3]}, + index=[ + Timestamp("20130101 09:00:00"), + Timestamp("20130102 09:00:02"), + Timestamp("20130103 09:00:03"), + Timestamp("20130105 09:00:05"), + Timestamp("20130106 09:00:06"), + ], + ) + + res = df.rolling(window="3d").corr() + + tm.assert_frame_equal(exp, res) + + +@pytest.mark.parametrize("method", ["var", "sum", "mean", "skew", "kurt", "min", "max"]) +def test_rolling_decreasing_indices(method): + """ + Make sure that decreasing indices give the same results as increasing indices. + + GH 36933 + """ + df = DataFrame({"values": np.arange(-15, 10) ** 2}) + df_reverse = DataFrame({"values": df["values"][::-1]}, index=df.index[::-1]) + + increasing = getattr(df.rolling(window=5), method)() + decreasing = getattr(df_reverse.rolling(window=5), method)() + + assert np.abs(decreasing.values[::-1][:-4] - increasing.values[4:]).max() < 1e-12 + + +@pytest.mark.parametrize( + "window,closed,expected", + [ + ("2s", "right", [1.0, 3.0, 5.0, 3.0]), + ("2s", "left", [0.0, 1.0, 3.0, 5.0]), + ("2s", "both", [1.0, 3.0, 6.0, 5.0]), + ("2s", "neither", [0.0, 1.0, 2.0, 3.0]), + ("3s", "right", [1.0, 3.0, 6.0, 5.0]), + ("3s", "left", [1.0, 3.0, 6.0, 5.0]), + ("3s", "both", [1.0, 3.0, 6.0, 5.0]), + ("3s", "neither", [1.0, 3.0, 6.0, 5.0]), + ], +) +def test_rolling_decreasing_indices_centered(window, closed, expected, frame_or_series): + """ + Ensure that a symmetrical inverted index return same result as non-inverted. + """ + # GH 43927 + + index = date_range("2020", periods=4, freq="1s") + df_inc = frame_or_series(range(4), index=index) + df_dec = frame_or_series(range(4), index=index[::-1]) + + expected_inc = frame_or_series(expected, index=index) + expected_dec = frame_or_series(expected, index=index[::-1]) + + result_inc = df_inc.rolling(window, closed=closed, center=True).sum() + result_dec = df_dec.rolling(window, closed=closed, center=True).sum() + + tm.assert_equal(result_inc, expected_inc) + tm.assert_equal(result_dec, expected_dec) + + +@pytest.mark.parametrize( + "window,expected", + [ + ("1ns", [1.0, 1.0, 1.0, 1.0]), + ("3ns", [2.0, 3.0, 3.0, 2.0]), + ], +) +def test_rolling_center_nanosecond_resolution( + window, closed, expected, frame_or_series +): + index = date_range("2020", periods=4, freq="1ns") + df = frame_or_series([1, 1, 1, 1], index=index, dtype=float) + expected = frame_or_series(expected, index=index, dtype=float) + result = df.rolling(window, closed=closed, center=True).sum() + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "method,expected", + [ + ( + "var", + [ + float("nan"), + 43.0, + float("nan"), + 136.333333, + 43.5, + 94.966667, + 182.0, + 318.0, + ], + ), + ( + "mean", + [float("nan"), 7.5, float("nan"), 21.5, 6.0, 9.166667, 13.0, 17.5], + ), + ( + "sum", + [float("nan"), 30.0, float("nan"), 86.0, 30.0, 55.0, 91.0, 140.0], + ), + ( + "skew", + [ + float("nan"), + 0.709296, + float("nan"), + 0.407073, + 0.984656, + 0.919184, + 0.874674, + 0.842418, + ], + ), + ( + "kurt", + [ + float("nan"), + -0.5916711736073559, + float("nan"), + -1.0028993131317954, + -0.06103844629409494, + -0.254143227116194, + -0.37362637362637585, + -0.45439658241367054, + ], + ), + ], +) +def test_rolling_non_monotonic(method, expected): + """ + Make sure the (rare) branch of non-monotonic indices is covered by a test. + + output from 1.1.3 is assumed to be the expected output. Output of sum/mean has + manually been verified. + + GH 36933. + """ + # Based on an example found in computation.rst + use_expanding = [True, False, True, False, True, True, True, True] + df = DataFrame({"values": np.arange(len(use_expanding)) ** 2}) + + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + start = np.empty(num_values, dtype=np.int64) + end = np.empty(num_values, dtype=np.int64) + for i in range(num_values): + if self.use_expanding[i]: + start[i] = 0 + end[i] = i + 1 + else: + start[i] = i + end[i] = i + self.window_size + return start, end + + indexer = CustomIndexer(window_size=4, use_expanding=use_expanding) + + result = getattr(df.rolling(indexer), method)() + expected = DataFrame({"values": expected}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + ("index", "window"), + [ + ([0, 1, 2, 3, 4], 2), + (date_range("2001-01-01", freq="D", periods=5), "2D"), + ], +) +def test_rolling_corr_timedelta_index(index, window): + # GH: 31286 + x = Series([1, 2, 3, 4, 5], index=index) + y = x.copy() + x[0:2] = 0.0 + result = x.rolling(window).corr(y) + expected = Series([np.nan, np.nan, 1, 1, 1], index=index) + tm.assert_almost_equal(result, expected) + + +def test_groupby_rolling_nan_included(): + # GH 35542 + data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]} + df = DataFrame(data) + result = df.groupby("group", dropna=False).rolling(1, min_periods=1).mean() + expected = DataFrame( + {"B": [0.0, 2.0, 3.0, 1.0, 4.0]}, + # GH-38057 from_tuples puts the NaNs in the codes, result expects them + # to be in the levels, at the moment + # index=MultiIndex.from_tuples( + # [("g1", 0), ("g1", 2), ("g2", 3), (np.nan, 1), (np.nan, 4)], + # names=["group", None], + # ), + index=MultiIndex( + [["g1", "g2", np.nan], [0, 1, 2, 3, 4]], + [[0, 0, 1, 2, 2], [0, 2, 3, 1, 4]], + names=["group", None], + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_groupby_rolling_non_monotonic(): + # GH 43909 + + shuffled = [3, 0, 1, 2] + sec = 1_000 + df = DataFrame( + [{"t": Timestamp(2 * x * sec), "x": x + 1, "c": 42} for x in shuffled] + ) + with pytest.raises(ValueError, match=r".* must be monotonic"): + df.groupby("c").rolling(on="t", window="3s") + + +@pytest.mark.parametrize("method", ["skew", "kurt"]) +def test_rolling_skew_kurt_numerical_stability(method): + # GH#6929 + ser = Series(np.random.rand(10)) + ser_copy = ser.copy() + expected = getattr(ser.rolling(3), method)() + tm.assert_series_equal(ser, ser_copy) + ser = ser + 50000 + result = getattr(ser.rolling(3), method)() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + ("method", "values"), + [ + ("skew", [2.0, 0.854563, 0.0, 1.999984]), + ("kurt", [4.0, -1.289256, -1.2, 3.999946]), + ], +) +def test_rolling_skew_kurt_large_value_range(method, values): + # GH: 37557 + s = Series([3000000, 1, 1, 2, 3, 4, 999]) + result = getattr(s.rolling(4), method)() + expected = Series([np.nan] * 3 + values) + tm.assert_series_equal(result, expected) + + +def test_invalid_method(): + with pytest.raises(ValueError, match="method must be 'table' or 'single"): + Series(range(1)).rolling(1, method="foo") + + +@pytest.mark.parametrize("window", [1, "1d"]) +def test_rolling_descending_date_order_with_offset(window, frame_or_series): + # GH#40002 + idx = date_range(start="2020-01-01", end="2020-01-03", freq="1d") + obj = frame_or_series(range(1, 4), index=idx) + result = obj.rolling("1d", closed="left").sum() + expected = frame_or_series([np.nan, 1, 2], index=idx) + tm.assert_equal(result, expected) + + result = obj.iloc[::-1].rolling("1d", closed="left").sum() + idx = date_range(start="2020-01-03", end="2020-01-01", freq="-1d") + expected = frame_or_series([np.nan, 3, 2], index=idx) + tm.assert_equal(result, expected) + + +def test_rolling_var_floating_artifact_precision(): + # GH 37051 + s = Series([7, 5, 5, 5]) + result = s.rolling(3).var() + expected = Series([np.nan, np.nan, 4 / 3, 0]) + tm.assert_series_equal(result, expected, atol=1.0e-15, rtol=1.0e-15) + + +def test_rolling_std_small_values(): + # GH 37051 + s = Series( + [ + 0.00000054, + 0.00000053, + 0.00000054, + ] + ) + result = s.rolling(2).std() + expected = Series([np.nan, 7.071068e-9, 7.071068e-9]) + tm.assert_series_equal(result, expected, atol=1.0e-15, rtol=1.0e-15) + + +@pytest.mark.parametrize( + "start, exp_values", + [ + (1, [0.03, 0.0155, 0.0155, 0.011, 0.01025]), + (2, [0.001, 0.001, 0.0015, 0.00366666]), + ], +) +def test_rolling_mean_all_nan_window_floating_artifacts(start, exp_values): + # GH#41053 + df = DataFrame( + [ + 0.03, + 0.03, + 0.001, + np.NaN, + 0.002, + 0.008, + np.NaN, + np.NaN, + np.NaN, + np.NaN, + np.NaN, + np.NaN, + 0.005, + 0.2, + ] + ) + + values = exp_values + [ + 0.00366666, + 0.005, + 0.005, + 0.008, + np.NaN, + np.NaN, + 0.005, + 0.102500, + ] + expected = DataFrame( + values, + index=list(range(start, len(values) + start)), + ) + result = df.iloc[start:].rolling(5, min_periods=0).mean() + tm.assert_frame_equal(result, expected) + + +def test_rolling_sum_all_nan_window_floating_artifacts(): + # GH#41053 + df = DataFrame([0.002, 0.008, 0.005, np.NaN, np.NaN, np.NaN]) + result = df.rolling(3, min_periods=0).sum() + expected = DataFrame([0.002, 0.010, 0.015, 0.013, 0.005, 0.0]) + tm.assert_frame_equal(result, expected) + + +def test_rolling_zero_window(): + # GH 22719 + s = Series(range(1)) + result = s.rolling(0).min() + expected = Series([np.nan]) + tm.assert_series_equal(result, expected) + + +def test_rolling_float_dtype(float_numpy_dtype): + # GH#42452 + df = DataFrame({"A": range(5), "B": range(10, 15)}, dtype=float_numpy_dtype) + expected = DataFrame( + {"A": [np.nan] * 5, "B": range(10, 20, 2)}, + dtype=float_numpy_dtype, + ) + result = df.rolling(2, axis=1).sum() + tm.assert_frame_equal(result, expected, check_dtype=False) + + +def test_rolling_numeric_dtypes(): + # GH#41779 + df = DataFrame(np.arange(40).reshape(4, 10), columns=list("abcdefghij")).astype( + { + "a": "float16", + "b": "float32", + "c": "float64", + "d": "int8", + "e": "int16", + "f": "int32", + "g": "uint8", + "h": "uint16", + "i": "uint32", + "j": "uint64", + } + ) + result = df.rolling(window=2, min_periods=1, axis=1).min() + expected = DataFrame( + { + "a": range(0, 40, 10), + "b": range(0, 40, 10), + "c": range(1, 40, 10), + "d": range(2, 40, 10), + "e": range(3, 40, 10), + "f": range(4, 40, 10), + "g": range(5, 40, 10), + "h": range(6, 40, 10), + "i": range(7, 40, 10), + "j": range(8, 40, 10), + }, + dtype="float64", + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("window", [1, 3, 10, 20]) +@pytest.mark.parametrize("method", ["min", "max", "average"]) +@pytest.mark.parametrize("pct", [True, False]) +@pytest.mark.parametrize("ascending", [True, False]) +@pytest.mark.parametrize("test_data", ["default", "duplicates", "nans"]) +def test_rank(window, method, pct, ascending, test_data): + length = 20 + if test_data == "default": + ser = Series(data=np.random.rand(length)) + elif test_data == "duplicates": + ser = Series(data=np.random.choice(3, length)) + elif test_data == "nans": + ser = Series( + data=np.random.choice([1.0, 0.25, 0.75, np.nan, np.inf, -np.inf], length) + ) + + expected = ser.rolling(window).apply( + lambda x: x.rank(method=method, pct=pct, ascending=ascending).iloc[-1] + ) + result = ser.rolling(window).rank(method=method, pct=pct, ascending=ascending) + + tm.assert_series_equal(result, expected) + + +def test_rolling_quantile_np_percentile(): + # #9413: Tests that rolling window's quantile default behavior + # is analogous to Numpy's percentile + row = 10 + col = 5 + idx = date_range("20100101", periods=row, freq="B") + df = DataFrame(np.random.rand(row * col).reshape((row, -1)), index=idx) + + df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) + np_percentile = np.percentile(df, [25, 50, 75], axis=0) + + tm.assert_almost_equal(df_quantile.values, np.array(np_percentile)) + + +@pytest.mark.parametrize("quantile", [0.0, 0.1, 0.45, 0.5, 1]) +@pytest.mark.parametrize( + "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] +) +@pytest.mark.parametrize( + "data", + [ + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], + [8.0, 1.0, 3.0, 4.0, 5.0, 2.0, 6.0, 7.0], + [0.0, np.nan, 0.2, np.nan, 0.4], + [np.nan, np.nan, np.nan, np.nan], + [np.nan, 0.1, np.nan, 0.3, 0.4, 0.5], + [0.5], + [np.nan, 0.7, 0.6], + ], +) +def test_rolling_quantile_interpolation_options(quantile, interpolation, data): + # Tests that rolling window's quantile behavior is analogous to + # Series' quantile for each interpolation option + s = Series(data) + + q1 = s.quantile(quantile, interpolation) + q2 = s.expanding(min_periods=1).quantile(quantile, interpolation).iloc[-1] + + if np.isnan(q1): + assert np.isnan(q2) + else: + assert q1 == q2 + + +def test_invalid_quantile_value(): + data = np.arange(5) + s = Series(data) + + msg = "Interpolation 'invalid' is not supported" + with pytest.raises(ValueError, match=msg): + s.rolling(len(data), min_periods=1).quantile(0.5, interpolation="invalid") + + +def test_rolling_quantile_param(): + ser = Series([0.0, 0.1, 0.5, 0.9, 1.0]) + msg = "quantile value -0.1 not in \\[0, 1\\]" + with pytest.raises(ValueError, match=msg): + ser.rolling(3).quantile(-0.1) + + msg = "quantile value 10.0 not in \\[0, 1\\]" + with pytest.raises(ValueError, match=msg): + ser.rolling(3).quantile(10.0) + + msg = "must be real number, not str" + with pytest.raises(TypeError, match=msg): + ser.rolling(3).quantile("foo") + + +def test_rolling_std_1obs(): + vals = Series([1.0, 2.0, 3.0, 4.0, 5.0]) + + result = vals.rolling(1, min_periods=1).std() + expected = Series([np.nan] * 5) + tm.assert_series_equal(result, expected) + + result = vals.rolling(1, min_periods=1).std(ddof=0) + expected = Series([0.0] * 5) + tm.assert_series_equal(result, expected) + + result = Series([np.nan, np.nan, 3, 4, 5]).rolling(3, min_periods=2).std() + assert np.isnan(result[2]) + + +def test_rolling_std_neg_sqrt(): + # unit test from Bottleneck + + # Test move_nanstd for neg sqrt. + + a = Series( + [ + 0.0011448196318903589, + 0.00028718669878572767, + 0.00028718669878572767, + 0.00028718669878572767, + 0.00028718669878572767, + ] + ) + b = a.rolling(window=3).std() + assert np.isfinite(b[2:]).all() + + b = a.ewm(span=3).std() + assert np.isfinite(b[2:]).all() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_rolling_functions.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_rolling_functions.py new file mode 100644 index 0000000..c788b3d --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_rolling_functions.py @@ -0,0 +1,527 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + concat, + isna, + notna, +) +import pandas._testing as tm + +import pandas.tseries.offsets as offsets + + +@pytest.mark.parametrize( + "compare_func, roll_func, kwargs", + [ + [np.mean, "mean", {}], + [np.nansum, "sum", {}], + pytest.param( + lambda x: np.isfinite(x).astype(float).sum(), + "count", + {}, + marks=pytest.mark.filterwarnings("ignore:min_periods:FutureWarning"), + ), + [np.median, "median", {}], + [np.min, "min", {}], + [np.max, "max", {}], + [lambda x: np.std(x, ddof=1), "std", {}], + [lambda x: np.std(x, ddof=0), "std", {"ddof": 0}], + [lambda x: np.var(x, ddof=1), "var", {}], + [lambda x: np.var(x, ddof=0), "var", {"ddof": 0}], + ], +) +def test_series(series, compare_func, roll_func, kwargs): + result = getattr(series.rolling(50), roll_func)(**kwargs) + assert isinstance(result, Series) + tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:])) + + +@pytest.mark.parametrize( + "compare_func, roll_func, kwargs", + [ + [np.mean, "mean", {}], + [np.nansum, "sum", {}], + pytest.param( + lambda x: np.isfinite(x).astype(float).sum(), + "count", + {}, + marks=pytest.mark.filterwarnings("ignore:min_periods:FutureWarning"), + ), + [np.median, "median", {}], + [np.min, "min", {}], + [np.max, "max", {}], + [lambda x: np.std(x, ddof=1), "std", {}], + [lambda x: np.std(x, ddof=0), "std", {"ddof": 0}], + [lambda x: np.var(x, ddof=1), "var", {}], + [lambda x: np.var(x, ddof=0), "var", {"ddof": 0}], + ], +) +def test_frame(raw, frame, compare_func, roll_func, kwargs): + result = getattr(frame.rolling(50), roll_func)(**kwargs) + assert isinstance(result, DataFrame) + tm.assert_series_equal( + result.iloc[-1, :], + frame.iloc[-50:, :].apply(compare_func, axis=0, raw=raw), + check_names=False, + ) + + +@pytest.mark.parametrize( + "compare_func, roll_func, kwargs, minp", + [ + [np.mean, "mean", {}, 10], + [np.nansum, "sum", {}, 10], + [lambda x: np.isfinite(x).astype(float).sum(), "count", {}, 0], + [np.median, "median", {}, 10], + [np.min, "min", {}, 10], + [np.max, "max", {}, 10], + [lambda x: np.std(x, ddof=1), "std", {}, 10], + [lambda x: np.std(x, ddof=0), "std", {"ddof": 0}, 10], + [lambda x: np.var(x, ddof=1), "var", {}, 10], + [lambda x: np.var(x, ddof=0), "var", {"ddof": 0}, 10], + ], +) +def test_time_rule_series(series, compare_func, roll_func, kwargs, minp): + win = 25 + ser = series[::2].resample("B").mean() + series_result = getattr(ser.rolling(window=win, min_periods=minp), roll_func)( + **kwargs + ) + last_date = series_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() + + trunc_series = series[::2].truncate(prev_date, last_date) + tm.assert_almost_equal(series_result[-1], compare_func(trunc_series)) + + +@pytest.mark.parametrize( + "compare_func, roll_func, kwargs, minp", + [ + [np.mean, "mean", {}, 10], + [np.nansum, "sum", {}, 10], + [lambda x: np.isfinite(x).astype(float).sum(), "count", {}, 0], + [np.median, "median", {}, 10], + [np.min, "min", {}, 10], + [np.max, "max", {}, 10], + [lambda x: np.std(x, ddof=1), "std", {}, 10], + [lambda x: np.std(x, ddof=0), "std", {"ddof": 0}, 10], + [lambda x: np.var(x, ddof=1), "var", {}, 10], + [lambda x: np.var(x, ddof=0), "var", {"ddof": 0}, 10], + ], +) +def test_time_rule_frame(raw, frame, compare_func, roll_func, kwargs, minp): + win = 25 + frm = frame[::2].resample("B").mean() + frame_result = getattr(frm.rolling(window=win, min_periods=minp), roll_func)( + **kwargs + ) + last_date = frame_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() + + trunc_frame = frame[::2].truncate(prev_date, last_date) + tm.assert_series_equal( + frame_result.xs(last_date), + trunc_frame.apply(compare_func, raw=raw), + check_names=False, + ) + + +@pytest.mark.parametrize( + "compare_func, roll_func, kwargs", + [ + [np.mean, "mean", {}], + [np.nansum, "sum", {}], + [np.median, "median", {}], + [np.min, "min", {}], + [np.max, "max", {}], + [lambda x: np.std(x, ddof=1), "std", {}], + [lambda x: np.std(x, ddof=0), "std", {"ddof": 0}], + [lambda x: np.var(x, ddof=1), "var", {}], + [lambda x: np.var(x, ddof=0), "var", {"ddof": 0}], + ], +) +def test_nans(compare_func, roll_func, kwargs): + obj = Series(np.random.randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + + result = getattr(obj.rolling(50, min_periods=30), roll_func)(**kwargs) + tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10])) + + # min_periods is working correctly + result = getattr(obj.rolling(20, min_periods=15), roll_func)(**kwargs) + assert isna(result.iloc[23]) + assert not isna(result.iloc[24]) + + assert not isna(result.iloc[-6]) + assert isna(result.iloc[-5]) + + obj2 = Series(np.random.randn(20)) + result = getattr(obj2.rolling(10, min_periods=5), roll_func)(**kwargs) + assert isna(result.iloc[3]) + assert notna(result.iloc[4]) + + if roll_func != "sum": + result0 = getattr(obj.rolling(20, min_periods=0), roll_func)(**kwargs) + result1 = getattr(obj.rolling(20, min_periods=1), roll_func)(**kwargs) + tm.assert_almost_equal(result0, result1) + + +def test_nans_count(): + obj = Series(np.random.randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + result = obj.rolling(50, min_periods=30).count() + tm.assert_almost_equal( + result.iloc[-1], np.isfinite(obj[10:-10]).astype(float).sum() + ) + + +@pytest.mark.parametrize( + "roll_func, kwargs", + [ + ["mean", {}], + ["sum", {}], + ["median", {}], + ["min", {}], + ["max", {}], + ["std", {}], + ["std", {"ddof": 0}], + ["var", {}], + ["var", {"ddof": 0}], + ], +) +@pytest.mark.parametrize("minp", [0, 99, 100]) +def test_min_periods(series, minp, roll_func, kwargs): + result = getattr(series.rolling(len(series) + 1, min_periods=minp), roll_func)( + **kwargs + ) + expected = getattr(series.rolling(len(series), min_periods=minp), roll_func)( + **kwargs + ) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + + +def test_min_periods_count(series): + result = series.rolling(len(series) + 1, min_periods=0).count() + expected = series.rolling(len(series), min_periods=0).count() + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + + +@pytest.mark.parametrize( + "roll_func, kwargs, minp", + [ + ["mean", {}, 15], + ["sum", {}, 15], + ["count", {}, 0], + ["median", {}, 15], + ["min", {}, 15], + ["max", {}, 15], + ["std", {}, 15], + ["std", {"ddof": 0}, 15], + ["var", {}, 15], + ["var", {"ddof": 0}, 15], + ], +) +def test_center(roll_func, kwargs, minp): + obj = Series(np.random.randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + + result = getattr(obj.rolling(20, min_periods=minp, center=True), roll_func)( + **kwargs + ) + expected = getattr( + concat([obj, Series([np.NaN] * 9)]).rolling(20, min_periods=minp), roll_func + )(**kwargs)[9:].reset_index(drop=True) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "roll_func, kwargs, minp, fill_value", + [ + ["mean", {}, 10, None], + ["sum", {}, 10, None], + ["count", {}, 0, 0], + ["median", {}, 10, None], + ["min", {}, 10, None], + ["max", {}, 10, None], + ["std", {}, 10, None], + ["std", {"ddof": 0}, 10, None], + ["var", {}, 10, None], + ["var", {"ddof": 0}, 10, None], + ], +) +def test_center_reindex_series(series, roll_func, kwargs, minp, fill_value): + # shifter index + s = [f"x{x:d}" for x in range(12)] + + series_xp = ( + getattr( + series.reindex(list(series.index) + s).rolling(window=25, min_periods=minp), + roll_func, + )(**kwargs) + .shift(-12) + .reindex(series.index) + ) + series_rs = getattr( + series.rolling(window=25, min_periods=minp, center=True), roll_func + )(**kwargs) + if fill_value is not None: + series_xp = series_xp.fillna(fill_value) + tm.assert_series_equal(series_xp, series_rs) + + +@pytest.mark.parametrize( + "roll_func, kwargs, minp, fill_value", + [ + ["mean", {}, 10, None], + ["sum", {}, 10, None], + ["count", {}, 0, 0], + ["median", {}, 10, None], + ["min", {}, 10, None], + ["max", {}, 10, None], + ["std", {}, 10, None], + ["std", {"ddof": 0}, 10, None], + ["var", {}, 10, None], + ["var", {"ddof": 0}, 10, None], + ], +) +def test_center_reindex_frame(frame, roll_func, kwargs, minp, fill_value): + # shifter index + s = [f"x{x:d}" for x in range(12)] + + frame_xp = ( + getattr( + frame.reindex(list(frame.index) + s).rolling(window=25, min_periods=minp), + roll_func, + )(**kwargs) + .shift(-12) + .reindex(frame.index) + ) + frame_rs = getattr( + frame.rolling(window=25, min_periods=minp, center=True), roll_func + )(**kwargs) + if fill_value is not None: + frame_xp = frame_xp.fillna(fill_value) + tm.assert_frame_equal(frame_xp, frame_rs) + + +@pytest.mark.parametrize( + "f", + [ + lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).max(), + lambda x: x.rolling(window=10, min_periods=5).min(), + lambda x: x.rolling(window=10, min_periods=5).sum(), + lambda x: x.rolling(window=10, min_periods=5).mean(), + lambda x: x.rolling(window=10, min_periods=5).std(), + lambda x: x.rolling(window=10, min_periods=5).var(), + lambda x: x.rolling(window=10, min_periods=5).skew(), + lambda x: x.rolling(window=10, min_periods=5).kurt(), + lambda x: x.rolling(window=10, min_periods=5).quantile(quantile=0.5), + lambda x: x.rolling(window=10, min_periods=5).median(), + lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=False), + lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=True), + pytest.param( + lambda x: x.rolling(win_type="boxcar", window=10, min_periods=5).mean(), + marks=td.skip_if_no_scipy, + ), + ], +) +def test_rolling_functions_window_non_shrinkage(f): + # GH 7764 + s = Series(range(4)) + s_expected = Series(np.nan, index=s.index) + df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=["A", "B"]) + df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) + + s_result = f(s) + tm.assert_series_equal(s_result, s_expected) + + df_result = f(df) + tm.assert_frame_equal(df_result, df_expected) + + +def test_rolling_max_gh6297(): + """Replicate result expected in GH #6297""" + indices = [datetime(1975, 1, i) for i in range(1, 6)] + # So that we can have 2 datapoints on one of the days + indices.append(datetime(1975, 1, 3, 6, 0)) + series = Series(range(1, 7), index=indices) + # Use floats instead of ints as values + series = series.map(lambda x: float(x)) + # Sort chronologically + series = series.sort_index() + + expected = Series( + [1.0, 2.0, 6.0, 4.0, 5.0], + index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), + ) + x = series.resample("D").max().rolling(window=1).max() + tm.assert_series_equal(expected, x) + + +def test_rolling_max_resample(): + + indices = [datetime(1975, 1, i) for i in range(1, 6)] + # So that we can have 3 datapoints on last day (4, 10, and 20) + indices.append(datetime(1975, 1, 5, 1)) + indices.append(datetime(1975, 1, 5, 2)) + series = Series(list(range(0, 5)) + [10, 20], index=indices) + # Use floats instead of ints as values + series = series.map(lambda x: float(x)) + # Sort chronologically + series = series.sort_index() + + # Default how should be max + expected = Series( + [0.0, 1.0, 2.0, 3.0, 20.0], + index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), + ) + x = series.resample("D").max().rolling(window=1).max() + tm.assert_series_equal(expected, x) + + # Now specify median (10.0) + expected = Series( + [0.0, 1.0, 2.0, 3.0, 10.0], + index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), + ) + x = series.resample("D").median().rolling(window=1).max() + tm.assert_series_equal(expected, x) + + # Now specify mean (4+10+20)/3 + v = (4.0 + 10.0 + 20.0) / 3.0 + expected = Series( + [0.0, 1.0, 2.0, 3.0, v], + index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), + ) + x = series.resample("D").mean().rolling(window=1).max() + tm.assert_series_equal(expected, x) + + +def test_rolling_min_resample(): + + indices = [datetime(1975, 1, i) for i in range(1, 6)] + # So that we can have 3 datapoints on last day (4, 10, and 20) + indices.append(datetime(1975, 1, 5, 1)) + indices.append(datetime(1975, 1, 5, 2)) + series = Series(list(range(0, 5)) + [10, 20], index=indices) + # Use floats instead of ints as values + series = series.map(lambda x: float(x)) + # Sort chronologically + series = series.sort_index() + + # Default how should be min + expected = Series( + [0.0, 1.0, 2.0, 3.0, 4.0], + index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), + ) + r = series.resample("D").min().rolling(window=1) + tm.assert_series_equal(expected, r.min()) + + +def test_rolling_median_resample(): + + indices = [datetime(1975, 1, i) for i in range(1, 6)] + # So that we can have 3 datapoints on last day (4, 10, and 20) + indices.append(datetime(1975, 1, 5, 1)) + indices.append(datetime(1975, 1, 5, 2)) + series = Series(list(range(0, 5)) + [10, 20], index=indices) + # Use floats instead of ints as values + series = series.map(lambda x: float(x)) + # Sort chronologically + series = series.sort_index() + + # Default how should be median + expected = Series( + [0.0, 1.0, 2.0, 3.0, 10], + index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), + ) + x = series.resample("D").median().rolling(window=1).median() + tm.assert_series_equal(expected, x) + + +def test_rolling_median_memory_error(): + # GH11722 + n = 20000 + Series(np.random.randn(n)).rolling(window=2, center=False).median() + Series(np.random.randn(n)).rolling(window=2, center=False).median() + + +@pytest.mark.parametrize( + "data_type", + [np.dtype(f"f{width}") for width in [4, 8]] + + [np.dtype(f"{sign}{width}") for width in [1, 2, 4, 8] for sign in "ui"], +) +def test_rolling_min_max_numeric_types(data_type): + # GH12373 + + # Just testing that these don't throw exceptions and that + # the return type is float64. Other tests will cover quantitative + # correctness + result = DataFrame(np.arange(20, dtype=data_type)).rolling(window=5).max() + assert result.dtypes[0] == np.dtype("f8") + result = DataFrame(np.arange(20, dtype=data_type)).rolling(window=5).min() + assert result.dtypes[0] == np.dtype("f8") + + +@pytest.mark.parametrize( + "f", + [ + lambda x: x.rolling(window=10, min_periods=0).count(), + lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).max(), + lambda x: x.rolling(window=10, min_periods=5).min(), + lambda x: x.rolling(window=10, min_periods=5).sum(), + lambda x: x.rolling(window=10, min_periods=5).mean(), + lambda x: x.rolling(window=10, min_periods=5).std(), + lambda x: x.rolling(window=10, min_periods=5).var(), + lambda x: x.rolling(window=10, min_periods=5).skew(), + lambda x: x.rolling(window=10, min_periods=5).kurt(), + lambda x: x.rolling(window=10, min_periods=5).quantile(0.5), + lambda x: x.rolling(window=10, min_periods=5).median(), + lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=False), + lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=True), + pytest.param( + lambda x: x.rolling(win_type="boxcar", window=10, min_periods=5).mean(), + marks=td.skip_if_no_scipy, + ), + ], +) +def test_moment_functions_zero_length(f): + # GH 8056 + s = Series(dtype=np.float64) + s_expected = s + df1 = DataFrame() + df1_expected = df1 + df2 = DataFrame(columns=["a"]) + df2["a"] = df2["a"].astype("float64") + df2_expected = df2 + + s_result = f(s) + tm.assert_series_equal(s_result, s_expected) + + df1_result = f(df1) + tm.assert_frame_equal(df1_result, df1_expected) + + df2_result = f(df2) + tm.assert_frame_equal(df2_result, df2_expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_rolling_quantile.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_rolling_quantile.py new file mode 100644 index 0000000..56681c2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_rolling_quantile.py @@ -0,0 +1,172 @@ +from functools import partial + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, + concat, + isna, + notna, +) +import pandas._testing as tm + +import pandas.tseries.offsets as offsets + + +def scoreatpercentile(a, per): + values = np.sort(a, axis=0) + + idx = int(per / 1.0 * (values.shape[0] - 1)) + + if idx == values.shape[0] - 1: + retval = values[-1] + + else: + qlow = idx / (values.shape[0] - 1) + qhig = (idx + 1) / (values.shape[0] - 1) + vlow = values[idx] + vhig = values[idx + 1] + retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow) + + return retval + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_series(series, q): + compare_func = partial(scoreatpercentile, per=q) + result = series.rolling(50).quantile(q) + assert isinstance(result, Series) + tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:])) + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_frame(raw, frame, q): + compare_func = partial(scoreatpercentile, per=q) + result = frame.rolling(50).quantile(q) + assert isinstance(result, DataFrame) + tm.assert_series_equal( + result.iloc[-1, :], + frame.iloc[-50:, :].apply(compare_func, axis=0, raw=raw), + check_names=False, + ) + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_time_rule_series(series, q): + compare_func = partial(scoreatpercentile, per=q) + win = 25 + ser = series[::2].resample("B").mean() + series_result = ser.rolling(window=win, min_periods=10).quantile(q) + last_date = series_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() + + trunc_series = series[::2].truncate(prev_date, last_date) + tm.assert_almost_equal(series_result[-1], compare_func(trunc_series)) + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_time_rule_frame(raw, frame, q): + compare_func = partial(scoreatpercentile, per=q) + win = 25 + frm = frame[::2].resample("B").mean() + frame_result = frm.rolling(window=win, min_periods=10).quantile(q) + last_date = frame_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() + + trunc_frame = frame[::2].truncate(prev_date, last_date) + tm.assert_series_equal( + frame_result.xs(last_date), + trunc_frame.apply(compare_func, raw=raw), + check_names=False, + ) + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_nans(q): + compare_func = partial(scoreatpercentile, per=q) + obj = Series(np.random.randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + + result = obj.rolling(50, min_periods=30).quantile(q) + tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10])) + + # min_periods is working correctly + result = obj.rolling(20, min_periods=15).quantile(q) + assert isna(result.iloc[23]) + assert not isna(result.iloc[24]) + + assert not isna(result.iloc[-6]) + assert isna(result.iloc[-5]) + + obj2 = Series(np.random.randn(20)) + result = obj2.rolling(10, min_periods=5).quantile(q) + assert isna(result.iloc[3]) + assert notna(result.iloc[4]) + + result0 = obj.rolling(20, min_periods=0).quantile(q) + result1 = obj.rolling(20, min_periods=1).quantile(q) + tm.assert_almost_equal(result0, result1) + + +@pytest.mark.parametrize("minp", [0, 99, 100]) +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_min_periods(series, minp, q): + result = series.rolling(len(series) + 1, min_periods=minp).quantile(q) + expected = series.rolling(len(series), min_periods=minp).quantile(q) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_center(q): + obj = Series(np.random.randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + + result = obj.rolling(20, center=True).quantile(q) + expected = ( + concat([obj, Series([np.NaN] * 9)]) + .rolling(20) + .quantile(q)[9:] + .reset_index(drop=True) + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_center_reindex_series(series, q): + # shifter index + s = [f"x{x:d}" for x in range(12)] + + series_xp = ( + series.reindex(list(series.index) + s) + .rolling(window=25) + .quantile(q) + .shift(-12) + .reindex(series.index) + ) + + series_rs = series.rolling(window=25, center=True).quantile(q) + tm.assert_series_equal(series_xp, series_rs) + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_center_reindex_frame(frame, q): + # shifter index + s = [f"x{x:d}" for x in range(12)] + + frame_xp = ( + frame.reindex(list(frame.index) + s) + .rolling(window=25) + .quantile(q) + .shift(-12) + .reindex(frame.index) + ) + frame_rs = frame.rolling(window=25, center=True).quantile(q) + tm.assert_frame_equal(frame_xp, frame_rs) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_rolling_skew_kurt.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_rolling_skew_kurt.py new file mode 100644 index 0000000..2c275ed --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_rolling_skew_kurt.py @@ -0,0 +1,224 @@ +from functools import partial + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, + concat, + isna, + notna, +) +import pandas._testing as tm + +import pandas.tseries.offsets as offsets + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]]) +def test_series(series, sp_func, roll_func): + import scipy.stats + + compare_func = partial(getattr(scipy.stats, sp_func), bias=False) + result = getattr(series.rolling(50), roll_func)() + assert isinstance(result, Series) + tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:])) + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]]) +def test_frame(raw, frame, sp_func, roll_func): + import scipy.stats + + compare_func = partial(getattr(scipy.stats, sp_func), bias=False) + result = getattr(frame.rolling(50), roll_func)() + assert isinstance(result, DataFrame) + tm.assert_series_equal( + result.iloc[-1, :], + frame.iloc[-50:, :].apply(compare_func, axis=0, raw=raw), + check_names=False, + ) + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]]) +def test_time_rule_series(series, sp_func, roll_func): + import scipy.stats + + compare_func = partial(getattr(scipy.stats, sp_func), bias=False) + win = 25 + ser = series[::2].resample("B").mean() + series_result = getattr(ser.rolling(window=win, min_periods=10), roll_func)() + last_date = series_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() + + trunc_series = series[::2].truncate(prev_date, last_date) + tm.assert_almost_equal(series_result[-1], compare_func(trunc_series)) + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]]) +def test_time_rule_frame(raw, frame, sp_func, roll_func): + import scipy.stats + + compare_func = partial(getattr(scipy.stats, sp_func), bias=False) + win = 25 + frm = frame[::2].resample("B").mean() + frame_result = getattr(frm.rolling(window=win, min_periods=10), roll_func)() + last_date = frame_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() + + trunc_frame = frame[::2].truncate(prev_date, last_date) + tm.assert_series_equal( + frame_result.xs(last_date), + trunc_frame.apply(compare_func, raw=raw), + check_names=False, + ) + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]]) +def test_nans(sp_func, roll_func): + import scipy.stats + + compare_func = partial(getattr(scipy.stats, sp_func), bias=False) + obj = Series(np.random.randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + + result = getattr(obj.rolling(50, min_periods=30), roll_func)() + tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10])) + + # min_periods is working correctly + result = getattr(obj.rolling(20, min_periods=15), roll_func)() + assert isna(result.iloc[23]) + assert not isna(result.iloc[24]) + + assert not isna(result.iloc[-6]) + assert isna(result.iloc[-5]) + + obj2 = Series(np.random.randn(20)) + result = getattr(obj2.rolling(10, min_periods=5), roll_func)() + assert isna(result.iloc[3]) + assert notna(result.iloc[4]) + + result0 = getattr(obj.rolling(20, min_periods=0), roll_func)() + result1 = getattr(obj.rolling(20, min_periods=1), roll_func)() + tm.assert_almost_equal(result0, result1) + + +@pytest.mark.parametrize("minp", [0, 99, 100]) +@pytest.mark.parametrize("roll_func", ["kurt", "skew"]) +def test_min_periods(series, minp, roll_func): + result = getattr(series.rolling(len(series) + 1, min_periods=minp), roll_func)() + expected = getattr(series.rolling(len(series), min_periods=minp), roll_func)() + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + + +@pytest.mark.parametrize("roll_func", ["kurt", "skew"]) +def test_center(roll_func): + obj = Series(np.random.randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + + result = getattr(obj.rolling(20, center=True), roll_func)() + expected = getattr(concat([obj, Series([np.NaN] * 9)]).rolling(20), roll_func)()[ + 9: + ].reset_index(drop=True) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("roll_func", ["kurt", "skew"]) +def test_center_reindex_series(series, roll_func): + # shifter index + s = [f"x{x:d}" for x in range(12)] + + series_xp = ( + getattr( + series.reindex(list(series.index) + s).rolling(window=25), + roll_func, + )() + .shift(-12) + .reindex(series.index) + ) + series_rs = getattr(series.rolling(window=25, center=True), roll_func)() + tm.assert_series_equal(series_xp, series_rs) + + +@pytest.mark.slow +@pytest.mark.parametrize("roll_func", ["kurt", "skew"]) +def test_center_reindex_frame(frame, roll_func): + # shifter index + s = [f"x{x:d}" for x in range(12)] + + frame_xp = ( + getattr( + frame.reindex(list(frame.index) + s).rolling(window=25), + roll_func, + )() + .shift(-12) + .reindex(frame.index) + ) + frame_rs = getattr(frame.rolling(window=25, center=True), roll_func)() + tm.assert_frame_equal(frame_xp, frame_rs) + + +def test_rolling_skew_edge_cases(): + + all_nan = Series([np.NaN] * 5) + + # yields all NaN (0 variance) + d = Series([1] * 5) + x = d.rolling(window=5).skew() + tm.assert_series_equal(all_nan, x) + + # yields all NaN (window too small) + d = Series(np.random.randn(5)) + x = d.rolling(window=2).skew() + tm.assert_series_equal(all_nan, x) + + # yields [NaN, NaN, NaN, 0.177994, 1.548824] + d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401]) + expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824]) + x = d.rolling(window=4).skew() + tm.assert_series_equal(expected, x) + + +def test_rolling_kurt_edge_cases(): + + all_nan = Series([np.NaN] * 5) + + # yields all NaN (0 variance) + d = Series([1] * 5) + x = d.rolling(window=5).kurt() + tm.assert_series_equal(all_nan, x) + + # yields all NaN (window too small) + d = Series(np.random.randn(5)) + x = d.rolling(window=3).kurt() + tm.assert_series_equal(all_nan, x) + + # yields [NaN, NaN, NaN, 1.224307, 2.671499] + d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401]) + expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499]) + x = d.rolling(window=4).kurt() + tm.assert_series_equal(expected, x) + + +def test_rolling_skew_eq_value_fperr(): + # #18804 all rolling skew for all equal values should return Nan + a = Series([1.1] * 15).rolling(window=10).skew() + assert np.isnan(a).all() + + +def test_rolling_kurt_eq_value_fperr(): + # #18804 all rolling kurt for all equal values should return Nan + a = Series([1.1] * 15).rolling(window=10).kurt() + assert np.isnan(a).all() diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_timeseries_window.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_timeseries_window.py new file mode 100644 index 0000000..f2cf7bd --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_timeseries_window.py @@ -0,0 +1,759 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, + to_datetime, +) +import pandas._testing as tm + +import pandas.tseries.offsets as offsets + + +class TestRollingTS: + + # rolling time-series friendly + # xref GH13327 + + def setup_method(self, method): + + self.regular = DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)} + ).set_index("A") + + self.ragged = DataFrame({"B": range(5)}) + self.ragged.index = [ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ] + + def test_doc_string(self): + + df = DataFrame( + {"B": [0, 1, 2, np.nan, 4]}, + index=[ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ], + ) + df + df.rolling("2s").sum() + + def test_invalid_window_non_int(self): + + # not a valid freq + msg = "passed window foobar is not compatible with a datetimelike index" + with pytest.raises(ValueError, match=msg): + self.regular.rolling(window="foobar") + # not a datetimelike index + msg = "window must be an integer" + with pytest.raises(ValueError, match=msg): + self.regular.reset_index().rolling(window="foobar") + + @pytest.mark.parametrize("freq", ["2MS", offsets.MonthBegin(2)]) + def test_invalid_window_nonfixed(self, freq): + + # non-fixed freqs + msg = "\\<2 \\* MonthBegins\\> is a non-fixed frequency" + with pytest.raises(ValueError, match=msg): + self.regular.rolling(window=freq) + + @pytest.mark.parametrize("freq", ["1D", offsets.Day(2), "2ms"]) + def test_valid_window(self, freq): + self.regular.rolling(window=freq) + + @pytest.mark.parametrize("minp", [1.0, "foo", np.array([1, 2, 3])]) + def test_invalid_minp(self, minp): + # non-integer min_periods + msg = ( + r"local variable 'minp' referenced before assignment|" + "min_periods must be an integer" + ) + with pytest.raises(ValueError, match=msg): + self.regular.rolling(window="1D", min_periods=minp) + + def test_on(self): + + df = self.regular + + # not a valid column + msg = ( + r"invalid on specified as foobar, must be a column " + "\\(of DataFrame\\), an Index or None" + ) + with pytest.raises(ValueError, match=msg): + df.rolling(window="2s", on="foobar") + + # column is valid + df = df.copy() + df["C"] = date_range("20130101", periods=len(df)) + df.rolling(window="2d", on="C").sum() + + # invalid columns + msg = "window must be an integer" + with pytest.raises(ValueError, match=msg): + df.rolling(window="2d", on="B") + + # ok even though on non-selected + df.rolling(window="2d", on="C").B.sum() + + def test_monotonic_on(self): + + # on/index must be monotonic + df = DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)} + ) + + assert df.A.is_monotonic + df.rolling("2s", on="A").sum() + + df = df.set_index("A") + assert df.index.is_monotonic + df.rolling("2s").sum() + + def test_non_monotonic_on(self): + # GH 19248 + df = DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)} + ) + df = df.set_index("A") + non_monotonic_index = df.index.to_list() + non_monotonic_index[0] = non_monotonic_index[3] + df.index = non_monotonic_index + + assert not df.index.is_monotonic + + msg = "index must be monotonic" + with pytest.raises(ValueError, match=msg): + df.rolling("2s").sum() + + df = df.reset_index() + + msg = ( + r"invalid on specified as A, must be a column " + "\\(of DataFrame\\), an Index or None" + ) + with pytest.raises(ValueError, match=msg): + df.rolling("2s", on="A").sum() + + def test_frame_on(self): + + df = DataFrame( + {"B": range(5), "C": date_range("20130101 09:00:00", periods=5, freq="3s")} + ) + + df["A"] = [ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ] + + # we are doing simulating using 'on' + expected = df.set_index("A").rolling("2s").B.sum().reset_index(drop=True) + + result = df.rolling("2s", on="A").B.sum() + tm.assert_series_equal(result, expected) + + # test as a frame + # we should be ignoring the 'on' as an aggregation column + # note that the expected is setting, computing, and resetting + # so the columns need to be switched compared + # to the actual result where they are ordered as in the + # original + expected = ( + df.set_index("A").rolling("2s")[["B"]].sum().reset_index()[["B", "A"]] + ) + + result = df.rolling("2s", on="A")[["B"]].sum() + tm.assert_frame_equal(result, expected) + + def test_frame_on2(self): + + # using multiple aggregation columns + df = DataFrame( + { + "A": [0, 1, 2, 3, 4], + "B": [0, 1, 2, np.nan, 4], + "C": Index( + [ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ] + ), + }, + columns=["A", "C", "B"], + ) + + expected1 = DataFrame( + {"A": [0.0, 1, 3, 3, 7], "B": [0, 1, 3, np.nan, 4], "C": df["C"]}, + columns=["A", "C", "B"], + ) + + result = df.rolling("2s", on="C").sum() + expected = expected1 + tm.assert_frame_equal(result, expected) + + expected = Series([0, 1, 3, np.nan, 4], name="B") + result = df.rolling("2s", on="C").B.sum() + tm.assert_series_equal(result, expected) + + expected = expected1[["A", "B", "C"]] + result = df.rolling("2s", on="C")[["A", "B", "C"]].sum() + tm.assert_frame_equal(result, expected) + + def test_basic_regular(self): + + df = self.regular.copy() + + df.index = date_range("20130101", periods=5, freq="D") + expected = df.rolling(window=1, min_periods=1).sum() + result = df.rolling(window="1D").sum() + tm.assert_frame_equal(result, expected) + + df.index = date_range("20130101", periods=5, freq="2D") + expected = df.rolling(window=1, min_periods=1).sum() + result = df.rolling(window="2D", min_periods=1).sum() + tm.assert_frame_equal(result, expected) + + expected = df.rolling(window=1, min_periods=1).sum() + result = df.rolling(window="2D", min_periods=1).sum() + tm.assert_frame_equal(result, expected) + + expected = df.rolling(window=1).sum() + result = df.rolling(window="2D").sum() + tm.assert_frame_equal(result, expected) + + def test_min_periods(self): + + # compare for min_periods + df = self.regular + + # these slightly different + expected = df.rolling(2, min_periods=1).sum() + result = df.rolling("2s").sum() + tm.assert_frame_equal(result, expected) + + expected = df.rolling(2, min_periods=1).sum() + result = df.rolling("2s", min_periods=1).sum() + tm.assert_frame_equal(result, expected) + + def test_closed(self): + + # xref GH13965 + + df = DataFrame( + {"A": [1] * 5}, + index=[ + Timestamp("20130101 09:00:01"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:04"), + Timestamp("20130101 09:00:06"), + ], + ) + + # closed must be 'right', 'left', 'both', 'neither' + msg = "closed must be 'right', 'left', 'both' or 'neither'" + with pytest.raises(ValueError, match=msg): + self.regular.rolling(window="2s", closed="blabla") + + expected = df.copy() + expected["A"] = [1.0, 2, 2, 2, 1] + result = df.rolling("2s", closed="right").sum() + tm.assert_frame_equal(result, expected) + + # default should be 'right' + result = df.rolling("2s").sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [1.0, 2, 3, 3, 2] + result = df.rolling("2s", closed="both").sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [np.nan, 1.0, 2, 2, 1] + result = df.rolling("2s", closed="left").sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [np.nan, 1.0, 1, 1, np.nan] + result = df.rolling("2s", closed="neither").sum() + tm.assert_frame_equal(result, expected) + + def test_ragged_sum(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 3, 7] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=2).sum() + expected = df.copy() + expected["B"] = [np.nan, np.nan, 3, np.nan, 7] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="3s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 5, 7] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="3s").sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 5, 7] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="4s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 6, 9] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="4s", min_periods=3).sum() + expected = df.copy() + expected["B"] = [np.nan, np.nan, 3, 6, 9] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 6, 10] + tm.assert_frame_equal(result, expected) + + def test_ragged_mean(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).mean() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).mean() + expected = df.copy() + expected["B"] = [0.0, 1, 1.5, 3.0, 3.5] + tm.assert_frame_equal(result, expected) + + def test_ragged_median(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).median() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).median() + expected = df.copy() + expected["B"] = [0.0, 1, 1.5, 3.0, 3.5] + tm.assert_frame_equal(result, expected) + + def test_ragged_quantile(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).quantile(0.5) + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).quantile(0.5) + expected = df.copy() + expected["B"] = [0.0, 1, 1.5, 3.0, 3.5] + tm.assert_frame_equal(result, expected) + + def test_ragged_std(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).std(ddof=0) + expected = df.copy() + expected["B"] = [0.0] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="1s", min_periods=1).std(ddof=1) + expected = df.copy() + expected["B"] = [np.nan] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="3s", min_periods=1).std(ddof=0) + expected = df.copy() + expected["B"] = [0.0] + [0.5] * 4 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).std(ddof=1) + expected = df.copy() + expected["B"] = [np.nan, 0.707107, 1.0, 1.0, 1.290994] + tm.assert_frame_equal(result, expected) + + def test_ragged_var(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).var(ddof=0) + expected = df.copy() + expected["B"] = [0.0] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="1s", min_periods=1).var(ddof=1) + expected = df.copy() + expected["B"] = [np.nan] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="3s", min_periods=1).var(ddof=0) + expected = df.copy() + expected["B"] = [0.0] + [0.25] * 4 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).var(ddof=1) + expected = df.copy() + expected["B"] = [np.nan, 0.5, 1.0, 1.0, 1 + 2 / 3.0] + tm.assert_frame_equal(result, expected) + + def test_ragged_skew(self): + + df = self.ragged + result = df.rolling(window="3s", min_periods=1).skew() + expected = df.copy() + expected["B"] = [np.nan] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).skew() + expected = df.copy() + expected["B"] = [np.nan] * 2 + [0.0, 0.0, 0.0] + tm.assert_frame_equal(result, expected) + + def test_ragged_kurt(self): + + df = self.ragged + result = df.rolling(window="3s", min_periods=1).kurt() + expected = df.copy() + expected["B"] = [np.nan] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).kurt() + expected = df.copy() + expected["B"] = [np.nan] * 4 + [-1.2] + tm.assert_frame_equal(result, expected) + + def test_ragged_count(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).count() + expected = df.copy() + expected["B"] = [1.0, 1, 1, 1, 1] + tm.assert_frame_equal(result, expected) + + df = self.ragged + result = df.rolling(window="1s").count() + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).count() + expected = df.copy() + expected["B"] = [1.0, 1, 2, 1, 2] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=2).count() + expected = df.copy() + expected["B"] = [np.nan, np.nan, 2, np.nan, 2] + tm.assert_frame_equal(result, expected) + + def test_regular_min(self): + + df = DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": [0.0, 1, 2, 3, 4]} + ).set_index("A") + result = df.rolling("1s").min() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + df = DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": [5, 4, 3, 4, 5]} + ).set_index("A") + + tm.assert_frame_equal(result, expected) + result = df.rolling("2s").min() + expected = df.copy() + expected["B"] = [5.0, 4, 3, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling("5s").min() + expected = df.copy() + expected["B"] = [5.0, 4, 3, 3, 3] + tm.assert_frame_equal(result, expected) + + def test_ragged_min(self): + + df = self.ragged + + result = df.rolling(window="1s", min_periods=1).min() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).min() + expected = df.copy() + expected["B"] = [0.0, 1, 1, 3, 3] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).min() + expected = df.copy() + expected["B"] = [0.0, 0, 0, 1, 1] + tm.assert_frame_equal(result, expected) + + def test_perf_min(self): + + N = 10000 + + dfp = DataFrame( + {"B": np.random.randn(N)}, index=date_range("20130101", periods=N, freq="s") + ) + expected = dfp.rolling(2, min_periods=1).min() + result = dfp.rolling("2s").min() + assert ((result - expected) < 0.01).all().bool() + + expected = dfp.rolling(200, min_periods=1).min() + result = dfp.rolling("200s").min() + assert ((result - expected) < 0.01).all().bool() + + def test_ragged_max(self): + + df = self.ragged + + result = df.rolling(window="1s", min_periods=1).max() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).max() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).max() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "freq, op, result_data", + [ + ("ms", "min", [0.0] * 10), + ("ms", "mean", [0.0] * 9 + [2.0 / 9]), + ("ms", "max", [0.0] * 9 + [2.0]), + ("s", "min", [0.0] * 10), + ("s", "mean", [0.0] * 9 + [2.0 / 9]), + ("s", "max", [0.0] * 9 + [2.0]), + ("min", "min", [0.0] * 10), + ("min", "mean", [0.0] * 9 + [2.0 / 9]), + ("min", "max", [0.0] * 9 + [2.0]), + ("h", "min", [0.0] * 10), + ("h", "mean", [0.0] * 9 + [2.0 / 9]), + ("h", "max", [0.0] * 9 + [2.0]), + ("D", "min", [0.0] * 10), + ("D", "mean", [0.0] * 9 + [2.0 / 9]), + ("D", "max", [0.0] * 9 + [2.0]), + ], + ) + def test_freqs_ops(self, freq, op, result_data): + # GH 21096 + index = date_range(start="2018-1-1 01:00:00", freq=f"1{freq}", periods=10) + s = Series(data=0, index=index) + s.iloc[1] = np.nan + s.iloc[-1] = 2 + result = getattr(s.rolling(window=f"10{freq}"), op)() + expected = Series(data=result_data, index=index) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "f", + [ + "sum", + "mean", + pytest.param( + "count", + marks=pytest.mark.filterwarnings("ignore:min_periods:FutureWarning"), + ), + "median", + "std", + "var", + "kurt", + "skew", + "min", + "max", + ], + ) + def test_all(self, f): + + # simple comparison of integer vs time-based windowing + df = self.regular * 2 + er = df.rolling(window=1) + r = df.rolling(window="1s") + + result = getattr(r, f)() + expected = getattr(er, f)() + tm.assert_frame_equal(result, expected) + + result = r.quantile(0.5) + expected = er.quantile(0.5) + tm.assert_frame_equal(result, expected) + + def test_all2(self, arithmetic_win_operators): + f = arithmetic_win_operators + # more sophisticated comparison of integer vs. + # time-based windowing + df = DataFrame( + {"B": np.arange(50)}, index=date_range("20130101", periods=50, freq="H") + ) + # in-range data + dft = df.between_time("09:00", "16:00") + + r = dft.rolling(window="5H") + + result = getattr(r, f)() + + # we need to roll the days separately + # to compare with a time-based roll + # finally groupby-apply will return a multi-index + # so we need to drop the day + def agg_by_day(x): + x = x.between_time("09:00", "16:00") + return getattr(x.rolling(5, min_periods=1), f)() + + expected = ( + df.groupby(df.index.day).apply(agg_by_day).reset_index(level=0, drop=True) + ) + + tm.assert_frame_equal(result, expected) + + def test_groupby_monotonic(self): + + # GH 15130 + # we don't need to validate monotonicity when grouping + + # GH 43909 we should raise an error here to match + # behaviour of non-groupby rolling. + + data = [ + ["David", "1/1/2015", 100], + ["David", "1/5/2015", 500], + ["David", "5/30/2015", 50], + ["David", "7/25/2015", 50], + ["Ryan", "1/4/2014", 100], + ["Ryan", "1/19/2015", 500], + ["Ryan", "3/31/2016", 50], + ["Joe", "7/1/2015", 100], + ["Joe", "9/9/2015", 500], + ["Joe", "10/15/2015", 50], + ] + + df = DataFrame(data=data, columns=["name", "date", "amount"]) + df["date"] = to_datetime(df["date"]) + df = df.sort_values("date") + + expected = ( + df.set_index("date") + .groupby("name") + .apply(lambda x: x.rolling("180D")["amount"].sum()) + ) + result = df.groupby("name").rolling("180D", on="date")["amount"].sum() + tm.assert_series_equal(result, expected) + + def test_non_monotonic_raises(self): + # GH 13966 (similar to #15130, closed by #15175) + + # superseded by 43909 + + dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s") + df = DataFrame( + { + "A": [1] * 20 + [2] * 12 + [3] * 8, + "B": np.concatenate((dates, dates)), + "C": np.arange(40), + } + ) + + expected = ( + df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean()) + ) + with pytest.raises(ValueError, match=r".* must be monotonic"): + df.groupby("A").rolling( + "4s", on="B" + ).C.mean() # should raise for non-monotonic t series + + df2 = df.sort_values("B") + result = df2.groupby("A").rolling("4s", on="B").C.mean() + tm.assert_series_equal(result, expected) + + def test_rolling_cov_offset(self): + # GH16058 + + idx = date_range("2017-01-01", periods=24, freq="1h") + ss = Series(np.arange(len(idx)), index=idx) + + result = ss.rolling("2h").cov() + expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx) + tm.assert_series_equal(result, expected) + + expected2 = ss.rolling(2, min_periods=1).cov() + tm.assert_series_equal(result, expected2) + + result = ss.rolling("3h").cov() + expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx) + tm.assert_series_equal(result, expected) + + expected2 = ss.rolling(3, min_periods=1).cov() + tm.assert_series_equal(result, expected2) + + def test_rolling_on_decreasing_index(self): + # GH-19248, GH-32385 + index = [ + Timestamp("20190101 09:00:30"), + Timestamp("20190101 09:00:27"), + Timestamp("20190101 09:00:20"), + Timestamp("20190101 09:00:18"), + Timestamp("20190101 09:00:10"), + ] + + df = DataFrame({"column": [3, 4, 4, 5, 6]}, index=index) + result = df.rolling("5s").min() + expected = DataFrame({"column": [3.0, 3.0, 4.0, 4.0, 6.0]}, index=index) + tm.assert_frame_equal(result, expected) + + def test_rolling_on_empty(self): + # GH-32385 + df = DataFrame({"column": []}, index=[]) + result = df.rolling("5s").min() + expected = DataFrame({"column": []}, index=[]) + tm.assert_frame_equal(result, expected) + + def test_rolling_on_multi_index_level(self): + # GH-15584 + df = DataFrame( + {"column": range(6)}, + index=MultiIndex.from_product( + [date_range("20190101", periods=3), range(2)], names=["date", "seq"] + ), + ) + result = df.rolling("10d", on=df.index.get_level_values("date")).sum() + expected = DataFrame( + {"column": [0.0, 1.0, 3.0, 6.0, 10.0, 15.0]}, index=df.index + ) + tm.assert_frame_equal(result, expected) diff --git a/.local/lib/python3.8/site-packages/pandas/tests/window/test_win_type.py b/.local/lib/python3.8/site-packages/pandas/tests/window/test_win_type.py new file mode 100644 index 0000000..6e0edc9 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tests/window/test_win_type.py @@ -0,0 +1,655 @@ +import numpy as np +import pytest + +from pandas.errors import UnsupportedFunctionCall +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, + Timedelta, + concat, + date_range, +) +import pandas._testing as tm +from pandas.api.indexers import BaseIndexer + + +@td.skip_if_no_scipy +def test_constructor(frame_or_series): + # GH 12669 + c = frame_or_series(range(5)).rolling + + # valid + c(win_type="boxcar", window=2, min_periods=1) + c(win_type="boxcar", window=2, min_periods=1, center=True) + c(win_type="boxcar", window=2, min_periods=1, center=False) + + +@pytest.mark.parametrize("w", [2.0, "foo", np.array([2])]) +@td.skip_if_no_scipy +def test_invalid_constructor(frame_or_series, w): + # not valid + + c = frame_or_series(range(5)).rolling + with pytest.raises(ValueError, match="min_periods must be an integer"): + c(win_type="boxcar", window=2, min_periods=w) + with pytest.raises(ValueError, match="center must be a boolean"): + c(win_type="boxcar", window=2, min_periods=1, center=w) + + +@pytest.mark.parametrize("wt", ["foobar", 1]) +@td.skip_if_no_scipy +def test_invalid_constructor_wintype(frame_or_series, wt): + c = frame_or_series(range(5)).rolling + with pytest.raises(ValueError, match="Invalid win_type"): + c(win_type=wt, window=2) + + +@td.skip_if_no_scipy +def test_constructor_with_win_type(frame_or_series, win_types): + # GH 12669 + c = frame_or_series(range(5)).rolling + c(win_type=win_types, window=2) + + +@pytest.mark.parametrize("method", ["sum", "mean"]) +def test_numpy_compat(method): + # see gh-12811 + w = Series([2, 4, 6]).rolling(window=2) + + msg = "numpy operations are not valid with window objects" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(w, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(w, method)(dtype=np.float64) + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("arg", ["median", "kurt", "skew"]) +def test_agg_function_support(arg): + df = DataFrame({"A": np.arange(5)}) + roll = df.rolling(2, win_type="triang") + + msg = f"'{arg}' is not a valid function for 'Window' object" + with pytest.raises(AttributeError, match=msg): + roll.agg(arg) + + with pytest.raises(AttributeError, match=msg): + roll.agg([arg]) + + with pytest.raises(AttributeError, match=msg): + roll.agg({"A": arg}) + + +@td.skip_if_no_scipy +def test_invalid_scipy_arg(): + # This error is raised by scipy + msg = r"boxcar\(\) got an unexpected" + with pytest.raises(TypeError, match=msg): + Series(range(3)).rolling(1, win_type="boxcar").mean(foo="bar") + + +@td.skip_if_no_scipy +def test_constructor_with_win_type_invalid(frame_or_series): + # GH 13383 + c = frame_or_series(range(5)).rolling + + msg = "window must be an integer 0 or greater" + + with pytest.raises(ValueError, match=msg): + c(-1, win_type="boxcar") + + +@td.skip_if_no_scipy +@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") +def test_window_with_args(): + # make sure that we are aggregating window functions correctly with arg + r = Series(np.random.randn(100)).rolling( + window=10, min_periods=1, win_type="gaussian" + ) + expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) + expected.columns = ["", ""] + result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=0.01)]) + tm.assert_frame_equal(result, expected) + + def a(x): + return x.mean(std=10) + + def b(x): + return x.mean(std=0.01) + + expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) + expected.columns = ["a", "b"] + result = r.aggregate([a, b]) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no_scipy +def test_win_type_with_method_invalid(): + with pytest.raises( + NotImplementedError, match="'single' is the only supported method type." + ): + Series(range(1)).rolling(1, win_type="triang", method="table") + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("arg", [2000000000, "2s", Timedelta("2s")]) +def test_consistent_win_type_freq(arg): + # GH 15969 + s = Series(range(1)) + with pytest.raises(ValueError, match="Invalid win_type freq"): + s.rolling(arg, win_type="freq") + + +def test_win_type_freq_return_deprecation(): + freq_roll = Series(range(2), index=date_range("2020", periods=2)).rolling("2s") + with tm.assert_produces_warning(FutureWarning): + assert freq_roll.win_type == "freq" + + +@td.skip_if_no_scipy +def test_win_type_not_implemented(): + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + return np.array([0, 1]), np.array([1, 2]) + + df = DataFrame({"values": range(2)}) + indexer = CustomIndexer() + with pytest.raises(NotImplementedError, match="BaseIndexer subclasses not"): + df.rolling(indexer, win_type="boxcar") + + +@td.skip_if_no_scipy +def test_cmov_mean(): + # GH 8238 + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) + result = Series(vals).rolling(5, center=True).mean() + expected_values = [ + np.nan, + np.nan, + 9.962, + 11.27, + 11.564, + 12.516, + 12.818, + 12.952, + np.nan, + np.nan, + ] + expected = Series(expected_values) + tm.assert_series_equal(expected, result) + + +@td.skip_if_no_scipy +def test_cmov_window(): + # GH 8238 + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) + result = Series(vals).rolling(5, win_type="boxcar", center=True).mean() + expected_values = [ + np.nan, + np.nan, + 9.962, + 11.27, + 11.564, + 12.516, + 12.818, + 12.952, + np.nan, + np.nan, + ] + expected = Series(expected_values) + tm.assert_series_equal(expected, result) + + +@td.skip_if_no_scipy +def test_cmov_window_corner(): + # GH 8238 + # all nan + vals = Series([np.nan] * 10) + result = vals.rolling(5, center=True, win_type="boxcar").mean() + assert np.isnan(result).all() + + # empty + vals = Series([], dtype=object) + result = vals.rolling(5, center=True, win_type="boxcar").mean() + assert len(result) == 0 + + # shorter than window + vals = Series(np.random.randn(5)) + result = vals.rolling(10, win_type="boxcar").mean() + assert np.isnan(result).all() + assert len(result) == 5 + + +@td.skip_if_no_scipy +@pytest.mark.parametrize( + "f,xp", + [ + ( + "mean", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [9.252, 9.392], + [8.644, 9.906], + [8.87, 10.208], + [6.81, 8.588], + [7.792, 8.644], + [9.05, 7.824], + [np.nan, np.nan], + [np.nan, np.nan], + ], + ), + ( + "std", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [3.789706, 4.068313], + [3.429232, 3.237411], + [3.589269, 3.220810], + [3.405195, 2.380655], + [3.281839, 2.369869], + [3.676846, 1.801799], + [np.nan, np.nan], + [np.nan, np.nan], + ], + ), + ( + "var", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [14.36187, 16.55117], + [11.75963, 10.48083], + [12.88285, 10.37362], + [11.59535, 5.66752], + [10.77047, 5.61628], + [13.51920, 3.24648], + [np.nan, np.nan], + [np.nan, np.nan], + ], + ), + ( + "sum", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [46.26, 46.96], + [43.22, 49.53], + [44.35, 51.04], + [34.05, 42.94], + [38.96, 43.22], + [45.25, 39.12], + [np.nan, np.nan], + [np.nan, np.nan], + ], + ), + ], +) +def test_cmov_window_frame(f, xp): + # Gh 8238 + df = DataFrame( + np.array( + [ + [12.18, 3.64], + [10.18, 9.16], + [13.24, 14.61], + [4.51, 8.11], + [6.15, 11.44], + [9.14, 6.21], + [11.31, 10.67], + [2.94, 6.51], + [9.42, 8.39], + [12.44, 7.34], + ] + ) + ) + xp = DataFrame(np.array(xp)) + + roll = df.rolling(5, win_type="boxcar", center=True) + rs = getattr(roll, f)() + + tm.assert_frame_equal(xp, rs) + + +@td.skip_if_no_scipy +def test_cmov_window_na_min_periods(): + # min_periods + vals = Series(np.random.randn(10)) + vals[4] = np.nan + vals[8] = np.nan + + xp = vals.rolling(5, min_periods=4, center=True).mean() + rs = vals.rolling(5, win_type="boxcar", min_periods=4, center=True).mean() + tm.assert_series_equal(xp, rs) + + +@td.skip_if_no_scipy +def test_cmov_window_regular(win_types): + # GH 8238 + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) + xps = { + "hamming": [ + np.nan, + np.nan, + 8.71384, + 9.56348, + 12.38009, + 14.03687, + 13.8567, + 11.81473, + np.nan, + np.nan, + ], + "triang": [ + np.nan, + np.nan, + 9.28667, + 10.34667, + 12.00556, + 13.33889, + 13.38, + 12.33667, + np.nan, + np.nan, + ], + "barthann": [ + np.nan, + np.nan, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 14.0825, + 11.5675, + np.nan, + np.nan, + ], + "bohman": [ + np.nan, + np.nan, + 7.61599, + 9.1764, + 12.83559, + 14.17267, + 14.65923, + 11.10401, + np.nan, + np.nan, + ], + "blackmanharris": [ + np.nan, + np.nan, + 6.97691, + 9.16438, + 13.05052, + 14.02156, + 15.10512, + 10.74574, + np.nan, + np.nan, + ], + "nuttall": [ + np.nan, + np.nan, + 7.04618, + 9.16786, + 13.02671, + 14.03559, + 15.05657, + 10.78514, + np.nan, + np.nan, + ], + "blackman": [ + np.nan, + np.nan, + 7.73345, + 9.17869, + 12.79607, + 14.20036, + 14.57726, + 11.16988, + np.nan, + np.nan, + ], + "bartlett": [ + np.nan, + np.nan, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 14.0825, + 11.5675, + np.nan, + np.nan, + ], + } + + xp = Series(xps[win_types]) + rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() + tm.assert_series_equal(xp, rs) + + +@td.skip_if_no_scipy +def test_cmov_window_regular_linear_range(win_types): + # GH 8238 + vals = np.array(range(10), dtype=float) + xp = vals.copy() + xp[:2] = np.nan + xp[-2:] = np.nan + xp = Series(xp) + + rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() + tm.assert_series_equal(xp, rs) + + +@td.skip_if_no_scipy +def test_cmov_window_regular_missing_data(win_types): + # GH 8238 + vals = np.array( + [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48] + ) + xps = { + "bartlett": [ + np.nan, + np.nan, + 9.70333, + 10.5225, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 15.61667, + 13.655, + ], + "blackman": [ + np.nan, + np.nan, + 9.04582, + 11.41536, + 7.73345, + 9.17869, + 12.79607, + 14.20036, + 15.8706, + 13.655, + ], + "barthann": [ + np.nan, + np.nan, + 9.70333, + 10.5225, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 15.61667, + 13.655, + ], + "bohman": [ + np.nan, + np.nan, + 8.9444, + 11.56327, + 7.61599, + 9.1764, + 12.83559, + 14.17267, + 15.90976, + 13.655, + ], + "hamming": [ + np.nan, + np.nan, + 9.59321, + 10.29694, + 8.71384, + 9.56348, + 12.38009, + 14.20565, + 15.24694, + 13.69758, + ], + "nuttall": [ + np.nan, + np.nan, + 8.47693, + 12.2821, + 7.04618, + 9.16786, + 13.02671, + 14.03673, + 16.08759, + 13.65553, + ], + "triang": [ + np.nan, + np.nan, + 9.33167, + 9.76125, + 9.28667, + 10.34667, + 12.00556, + 13.82125, + 14.49429, + 13.765, + ], + "blackmanharris": [ + np.nan, + np.nan, + 8.42526, + 12.36824, + 6.97691, + 9.16438, + 13.05052, + 14.02175, + 16.1098, + 13.65509, + ], + } + + xp = Series(xps[win_types]) + rs = Series(vals).rolling(5, win_type=win_types, min_periods=3).mean() + tm.assert_series_equal(xp, rs) + + +@td.skip_if_no_scipy +def test_cmov_window_special(win_types_special): + # GH 8238 + kwds = { + "kaiser": {"beta": 1.0}, + "gaussian": {"std": 1.0}, + "general_gaussian": {"p": 2.0, "sig": 2.0}, + "exponential": {"tau": 10}, + } + + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) + + xps = { + "gaussian": [ + np.nan, + np.nan, + 8.97297, + 9.76077, + 12.24763, + 13.89053, + 13.65671, + 12.01002, + np.nan, + np.nan, + ], + "general_gaussian": [ + np.nan, + np.nan, + 9.85011, + 10.71589, + 11.73161, + 13.08516, + 12.95111, + 12.74577, + np.nan, + np.nan, + ], + "kaiser": [ + np.nan, + np.nan, + 9.86851, + 11.02969, + 11.65161, + 12.75129, + 12.90702, + 12.83757, + np.nan, + np.nan, + ], + "exponential": [ + np.nan, + np.nan, + 9.83364, + 11.10472, + 11.64551, + 12.66138, + 12.92379, + 12.83770, + np.nan, + np.nan, + ], + } + + xp = Series(xps[win_types_special]) + rs = ( + Series(vals) + .rolling(5, win_type=win_types_special, center=True) + .mean(**kwds[win_types_special]) + ) + tm.assert_series_equal(xp, rs) + + +@td.skip_if_no_scipy +def test_cmov_window_special_linear_range(win_types_special): + # GH 8238 + kwds = { + "kaiser": {"beta": 1.0}, + "gaussian": {"std": 1.0}, + "general_gaussian": {"p": 2.0, "sig": 2.0}, + "slepian": {"width": 0.5}, + "exponential": {"tau": 10}, + } + + vals = np.array(range(10), dtype=float) + xp = vals.copy() + xp[:2] = np.nan + xp[-2:] = np.nan + xp = Series(xp) + + rs = ( + Series(vals) + .rolling(5, win_type=win_types_special, center=True) + .mean(**kwds[win_types_special]) + ) + tm.assert_series_equal(xp, rs) diff --git a/.local/lib/python3.8/site-packages/pandas/tseries/__init__.py b/.local/lib/python3.8/site-packages/pandas/tseries/__init__.py new file mode 100644 index 0000000..dd4ce02 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tseries/__init__.py @@ -0,0 +1,11 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + # import modules that have public classes/functions: + from pandas.tseries import ( + frequencies, + offsets, + ) + + # and mark only those modules as public + __all__ = ["frequencies", "offsets"] diff --git a/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..a8bb270 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/api.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/api.cpython-38.pyc new file mode 100644 index 0000000..0723056 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/api.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/frequencies.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/frequencies.cpython-38.pyc new file mode 100644 index 0000000..8521680 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/frequencies.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/holiday.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/holiday.cpython-38.pyc new file mode 100644 index 0000000..f3feb0d Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/holiday.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/offsets.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/offsets.cpython-38.pyc new file mode 100644 index 0000000..218d887 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/tseries/__pycache__/offsets.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/tseries/api.py b/.local/lib/python3.8/site-packages/pandas/tseries/api.py new file mode 100644 index 0000000..59666fa --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tseries/api.py @@ -0,0 +1,8 @@ +""" +Timeseries API +""" + +# flake8: noqa:F401 + +from pandas.tseries.frequencies import infer_freq +import pandas.tseries.offsets as offsets diff --git a/.local/lib/python3.8/site-packages/pandas/tseries/frequencies.py b/.local/lib/python3.8/site-packages/pandas/tseries/frequencies.py new file mode 100644 index 0000000..415af96 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tseries/frequencies.py @@ -0,0 +1,613 @@ +from __future__ import annotations + +import warnings + +import numpy as np + +from pandas._libs.algos import unique_deltas +from pandas._libs.tslibs import ( + Timestamp, + tzconversion, +) +from pandas._libs.tslibs.ccalendar import ( + DAYS, + MONTH_ALIASES, + MONTH_NUMBERS, + MONTHS, + int_to_weekday, +) +from pandas._libs.tslibs.fields import ( + build_field_sarray, + month_position_check, +) +from pandas._libs.tslibs.offsets import ( # noqa:F401 + DateOffset, + Day, + _get_offset, + to_offset, +) +from pandas._libs.tslibs.parsing import get_rule_month +from pandas._typing import npt +from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_datetime64_dtype, + is_period_dtype, + is_timedelta64_dtype, +) +from pandas.core.dtypes.generic import ABCSeries + +from pandas.core.algorithms import unique + +_ONE_MICRO = 1000 +_ONE_MILLI = _ONE_MICRO * 1000 +_ONE_SECOND = _ONE_MILLI * 1000 +_ONE_MINUTE = 60 * _ONE_SECOND +_ONE_HOUR = 60 * _ONE_MINUTE +_ONE_DAY = 24 * _ONE_HOUR + +# --------------------------------------------------------------------- +# Offset names ("time rules") and related functions + +_offset_to_period_map = { + "WEEKDAY": "D", + "EOM": "M", + "BM": "M", + "BQS": "Q", + "QS": "Q", + "BQ": "Q", + "BA": "A", + "AS": "A", + "BAS": "A", + "MS": "M", + "D": "D", + "C": "C", + "B": "B", + "T": "T", + "S": "S", + "L": "L", + "U": "U", + "N": "N", + "H": "H", + "Q": "Q", + "A": "A", + "W": "W", + "M": "M", + "Y": "A", + "BY": "A", + "YS": "A", + "BYS": "A", +} + +_need_suffix = ["QS", "BQ", "BQS", "YS", "AS", "BY", "BA", "BYS", "BAS"] + +for _prefix in _need_suffix: + for _m in MONTHS: + key = f"{_prefix}-{_m}" + _offset_to_period_map[key] = _offset_to_period_map[_prefix] + +for _prefix in ["A", "Q"]: + for _m in MONTHS: + _alias = f"{_prefix}-{_m}" + _offset_to_period_map[_alias] = _alias + +for _d in DAYS: + _offset_to_period_map[f"W-{_d}"] = f"W-{_d}" + + +def get_period_alias(offset_str: str) -> str | None: + """ + Alias to closest period strings BQ->Q etc. + """ + return _offset_to_period_map.get(offset_str, None) + + +def get_offset(name: str) -> DateOffset: + """ + Return DateOffset object associated with rule name. + + .. deprecated:: 1.0.0 + + Examples + -------- + get_offset('EOM') --> BMonthEnd(1) + """ + warnings.warn( + "get_offset is deprecated and will be removed in a future version, " + "use to_offset instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return _get_offset(name) + + +# --------------------------------------------------------------------- +# Period codes + + +def infer_freq(index, warn: bool = True) -> str | None: + """ + Infer the most likely frequency given the input index. If the frequency is + uncertain, a warning will be printed. + + Parameters + ---------- + index : DatetimeIndex or TimedeltaIndex + If passed a Series will use the values of the series (NOT THE INDEX). + warn : bool, default True + + Returns + ------- + str or None + None if no discernible frequency. + + Raises + ------ + TypeError + If the index is not datetime-like. + ValueError + If there are fewer than three values. + + Examples + -------- + >>> idx = pd.date_range(start='2020/12/01', end='2020/12/30', periods=30) + >>> pd.infer_freq(idx) + 'D' + """ + from pandas.core.api import ( + DatetimeIndex, + Float64Index, + Index, + Int64Index, + ) + + if isinstance(index, ABCSeries): + values = index._values + if not ( + is_datetime64_dtype(values) + or is_timedelta64_dtype(values) + or values.dtype == object + ): + raise TypeError( + "cannot infer freq from a non-convertible dtype " + f"on a Series of {index.dtype}" + ) + index = values + + inferer: _FrequencyInferer + + if not hasattr(index, "dtype"): + pass + elif is_period_dtype(index.dtype): + raise TypeError( + "PeriodIndex given. Check the `freq` attribute " + "instead of using infer_freq." + ) + elif is_timedelta64_dtype(index.dtype): + # Allow TimedeltaIndex and TimedeltaArray + inferer = _TimedeltaFrequencyInferer(index, warn=warn) + return inferer.get_freq() + + if isinstance(index, Index) and not isinstance(index, DatetimeIndex): + if isinstance(index, (Int64Index, Float64Index)): + raise TypeError( + f"cannot infer freq from a non-convertible index type {type(index)}" + ) + index = index._values + + if not isinstance(index, DatetimeIndex): + index = DatetimeIndex(index) + + inferer = _FrequencyInferer(index, warn=warn) + return inferer.get_freq() + + +class _FrequencyInferer: + """ + Not sure if I can avoid the state machine here + """ + + def __init__(self, index, warn: bool = True): + self.index = index + self.i8values = index.asi8 + + # This moves the values, which are implicitly in UTC, to the + # the timezone so they are in local time + if hasattr(index, "tz"): + if index.tz is not None: + self.i8values = tzconversion.tz_convert_from_utc( + self.i8values, index.tz + ) + + self.warn = warn + + if len(index) < 3: + raise ValueError("Need at least 3 dates to infer frequency") + + self.is_monotonic = ( + self.index._is_monotonic_increasing or self.index._is_monotonic_decreasing + ) + + @cache_readonly + def deltas(self) -> npt.NDArray[np.int64]: + return unique_deltas(self.i8values) + + @cache_readonly + def deltas_asi8(self) -> npt.NDArray[np.int64]: + # NB: we cannot use self.i8values here because we may have converted + # the tz in __init__ + return unique_deltas(self.index.asi8) + + @cache_readonly + def is_unique(self) -> bool: + return len(self.deltas) == 1 + + @cache_readonly + def is_unique_asi8(self) -> bool: + return len(self.deltas_asi8) == 1 + + def get_freq(self) -> str | None: + """ + Find the appropriate frequency string to describe the inferred + frequency of self.i8values + + Returns + ------- + str or None + """ + if not self.is_monotonic or not self.index._is_unique: + return None + + delta = self.deltas[0] + if delta and _is_multiple(delta, _ONE_DAY): + return self._infer_daily_rule() + + # Business hourly, maybe. 17: one day / 65: one weekend + if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]): + return "BH" + + # Possibly intraday frequency. Here we use the + # original .asi8 values as the modified values + # will not work around DST transitions. See #8772 + if not self.is_unique_asi8: + return None + + delta = self.deltas_asi8[0] + if _is_multiple(delta, _ONE_HOUR): + # Hours + return _maybe_add_count("H", delta / _ONE_HOUR) + elif _is_multiple(delta, _ONE_MINUTE): + # Minutes + return _maybe_add_count("T", delta / _ONE_MINUTE) + elif _is_multiple(delta, _ONE_SECOND): + # Seconds + return _maybe_add_count("S", delta / _ONE_SECOND) + elif _is_multiple(delta, _ONE_MILLI): + # Milliseconds + return _maybe_add_count("L", delta / _ONE_MILLI) + elif _is_multiple(delta, _ONE_MICRO): + # Microseconds + return _maybe_add_count("U", delta / _ONE_MICRO) + else: + # Nanoseconds + return _maybe_add_count("N", delta) + + @cache_readonly + def day_deltas(self): + return [x / _ONE_DAY for x in self.deltas] + + @cache_readonly + def hour_deltas(self): + return [x / _ONE_HOUR for x in self.deltas] + + @cache_readonly + def fields(self) -> np.ndarray: # structured array of fields + return build_field_sarray(self.i8values) + + @cache_readonly + def rep_stamp(self): + return Timestamp(self.i8values[0]) + + def month_position_check(self): + return month_position_check(self.fields, self.index.dayofweek) + + @cache_readonly + def mdiffs(self) -> npt.NDArray[np.int64]: + nmonths = self.fields["Y"] * 12 + self.fields["M"] + return unique_deltas(nmonths.astype("i8")) + + @cache_readonly + def ydiffs(self) -> npt.NDArray[np.int64]: + return unique_deltas(self.fields["Y"].astype("i8")) + + def _infer_daily_rule(self) -> str | None: + annual_rule = self._get_annual_rule() + if annual_rule: + nyears = self.ydiffs[0] + month = MONTH_ALIASES[self.rep_stamp.month] + alias = f"{annual_rule}-{month}" + return _maybe_add_count(alias, nyears) + + quarterly_rule = self._get_quarterly_rule() + if quarterly_rule: + nquarters = self.mdiffs[0] / 3 + mod_dict = {0: 12, 2: 11, 1: 10} + month = MONTH_ALIASES[mod_dict[self.rep_stamp.month % 3]] + alias = f"{quarterly_rule}-{month}" + return _maybe_add_count(alias, nquarters) + + monthly_rule = self._get_monthly_rule() + if monthly_rule: + return _maybe_add_count(monthly_rule, self.mdiffs[0]) + + if self.is_unique: + return self._get_daily_rule() + + if self._is_business_daily(): + return "B" + + wom_rule = self._get_wom_rule() + if wom_rule: + return wom_rule + + return None + + def _get_daily_rule(self) -> str | None: + days = self.deltas[0] / _ONE_DAY + if days % 7 == 0: + # Weekly + wd = int_to_weekday[self.rep_stamp.weekday()] + alias = f"W-{wd}" + return _maybe_add_count(alias, days / 7) + else: + return _maybe_add_count("D", days) + + def _get_annual_rule(self) -> str | None: + if len(self.ydiffs) > 1: + return None + + if len(unique(self.fields["M"])) > 1: + return None + + pos_check = self.month_position_check() + return {"cs": "AS", "bs": "BAS", "ce": "A", "be": "BA"}.get(pos_check) + + def _get_quarterly_rule(self) -> str | None: + if len(self.mdiffs) > 1: + return None + + if not self.mdiffs[0] % 3 == 0: + return None + + pos_check = self.month_position_check() + return {"cs": "QS", "bs": "BQS", "ce": "Q", "be": "BQ"}.get(pos_check) + + def _get_monthly_rule(self) -> str | None: + if len(self.mdiffs) > 1: + return None + pos_check = self.month_position_check() + return {"cs": "MS", "bs": "BMS", "ce": "M", "be": "BM"}.get(pos_check) + + def _is_business_daily(self) -> bool: + # quick check: cannot be business daily + if self.day_deltas != [1, 3]: + return False + + # probably business daily, but need to confirm + first_weekday = self.index[0].weekday() + shifts = np.diff(self.index.asi8) + shifts = np.floor_divide(shifts, _ONE_DAY) + weekdays = np.mod(first_weekday + np.cumsum(shifts), 7) + + return bool( + np.all( + ((weekdays == 0) & (shifts == 3)) + | ((weekdays > 0) & (weekdays <= 4) & (shifts == 1)) + ) + ) + + def _get_wom_rule(self) -> str | None: + # FIXME: dont leave commented-out + # wdiffs = unique(np.diff(self.index.week)) + # We also need -47, -49, -48 to catch index spanning year boundary + # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all(): + # return None + + weekdays = unique(self.index.weekday) + if len(weekdays) > 1: + return None + + week_of_months = unique((self.index.day - 1) // 7) + # Only attempt to infer up to WOM-4. See #9425 + week_of_months = week_of_months[week_of_months < 4] + if len(week_of_months) == 0 or len(week_of_months) > 1: + return None + + # get which week + week = week_of_months[0] + 1 + wd = int_to_weekday[weekdays[0]] + + return f"WOM-{week}{wd}" + + +class _TimedeltaFrequencyInferer(_FrequencyInferer): + def _infer_daily_rule(self): + if self.is_unique: + return self._get_daily_rule() + + +def _is_multiple(us, mult: int) -> bool: + return us % mult == 0 + + +def _maybe_add_count(base: str, count: float) -> str: + if count != 1: + assert count == int(count) + count = int(count) + return f"{count}{base}" + else: + return base + + +# ---------------------------------------------------------------------- +# Frequency comparison + + +def is_subperiod(source, target) -> bool: + """ + Returns True if downsampling is possible between source and target + frequencies + + Parameters + ---------- + source : str or DateOffset + Frequency converting from + target : str or DateOffset + Frequency converting to + + Returns + ------- + bool + """ + + if target is None or source is None: + return False + source = _maybe_coerce_freq(source) + target = _maybe_coerce_freq(target) + + if _is_annual(target): + if _is_quarterly(source): + return _quarter_months_conform( + get_rule_month(source), get_rule_month(target) + ) + return source in {"D", "C", "B", "M", "H", "T", "S", "L", "U", "N"} + elif _is_quarterly(target): + return source in {"D", "C", "B", "M", "H", "T", "S", "L", "U", "N"} + elif _is_monthly(target): + return source in {"D", "C", "B", "H", "T", "S", "L", "U", "N"} + elif _is_weekly(target): + return source in {target, "D", "C", "B", "H", "T", "S", "L", "U", "N"} + elif target == "B": + return source in {"B", "H", "T", "S", "L", "U", "N"} + elif target == "C": + return source in {"C", "H", "T", "S", "L", "U", "N"} + elif target == "D": + return source in {"D", "H", "T", "S", "L", "U", "N"} + elif target == "H": + return source in {"H", "T", "S", "L", "U", "N"} + elif target == "T": + return source in {"T", "S", "L", "U", "N"} + elif target == "S": + return source in {"S", "L", "U", "N"} + elif target == "L": + return source in {"L", "U", "N"} + elif target == "U": + return source in {"U", "N"} + elif target == "N": + return source in {"N"} + else: + return False + + +def is_superperiod(source, target) -> bool: + """ + Returns True if upsampling is possible between source and target + frequencies + + Parameters + ---------- + source : str or DateOffset + Frequency converting from + target : str or DateOffset + Frequency converting to + + Returns + ------- + bool + """ + if target is None or source is None: + return False + source = _maybe_coerce_freq(source) + target = _maybe_coerce_freq(target) + + if _is_annual(source): + if _is_annual(target): + return get_rule_month(source) == get_rule_month(target) + + if _is_quarterly(target): + smonth = get_rule_month(source) + tmonth = get_rule_month(target) + return _quarter_months_conform(smonth, tmonth) + return target in {"D", "C", "B", "M", "H", "T", "S", "L", "U", "N"} + elif _is_quarterly(source): + return target in {"D", "C", "B", "M", "H", "T", "S", "L", "U", "N"} + elif _is_monthly(source): + return target in {"D", "C", "B", "H", "T", "S", "L", "U", "N"} + elif _is_weekly(source): + return target in {source, "D", "C", "B", "H", "T", "S", "L", "U", "N"} + elif source == "B": + return target in {"D", "C", "B", "H", "T", "S", "L", "U", "N"} + elif source == "C": + return target in {"D", "C", "B", "H", "T", "S", "L", "U", "N"} + elif source == "D": + return target in {"D", "C", "B", "H", "T", "S", "L", "U", "N"} + elif source == "H": + return target in {"H", "T", "S", "L", "U", "N"} + elif source == "T": + return target in {"T", "S", "L", "U", "N"} + elif source == "S": + return target in {"S", "L", "U", "N"} + elif source == "L": + return target in {"L", "U", "N"} + elif source == "U": + return target in {"U", "N"} + elif source == "N": + return target in {"N"} + else: + return False + + +def _maybe_coerce_freq(code) -> str: + """we might need to coerce a code to a rule_code + and uppercase it + + Parameters + ---------- + source : str or DateOffset + Frequency converting from + + Returns + ------- + str + """ + assert code is not None + if isinstance(code, DateOffset): + code = code.rule_code + return code.upper() + + +def _quarter_months_conform(source: str, target: str) -> bool: + snum = MONTH_NUMBERS[source] + tnum = MONTH_NUMBERS[target] + return snum % 3 == tnum % 3 + + +def _is_annual(rule: str) -> bool: + rule = rule.upper() + return rule == "A" or rule.startswith("A-") + + +def _is_quarterly(rule: str) -> bool: + rule = rule.upper() + return rule == "Q" or rule.startswith("Q-") or rule.startswith("BQ") + + +def _is_monthly(rule: str) -> bool: + rule = rule.upper() + return rule == "M" or rule == "BM" + + +def _is_weekly(rule: str) -> bool: + rule = rule.upper() + return rule == "W" or rule.startswith("W-") diff --git a/.local/lib/python3.8/site-packages/pandas/tseries/holiday.py b/.local/lib/python3.8/site-packages/pandas/tseries/holiday.py new file mode 100644 index 0000000..10b09cb --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tseries/holiday.py @@ -0,0 +1,582 @@ +from __future__ import annotations + +from datetime import ( + datetime, + timedelta, +) +import warnings + +from dateutil.relativedelta import ( # noqa:F401 + FR, + MO, + SA, + SU, + TH, + TU, + WE, +) +import numpy as np + +from pandas.errors import PerformanceWarning + +from pandas import ( + DateOffset, + DatetimeIndex, + Series, + Timestamp, + concat, + date_range, +) + +from pandas.tseries.offsets import ( + Day, + Easter, +) + + +def next_monday(dt: datetime) -> datetime: + """ + If holiday falls on Saturday, use following Monday instead; + if holiday falls on Sunday, use Monday instead + """ + if dt.weekday() == 5: + return dt + timedelta(2) + elif dt.weekday() == 6: + return dt + timedelta(1) + return dt + + +def next_monday_or_tuesday(dt: datetime) -> datetime: + """ + For second holiday of two adjacent ones! + If holiday falls on Saturday, use following Monday instead; + if holiday falls on Sunday or Monday, use following Tuesday instead + (because Monday is already taken by adjacent holiday on the day before) + """ + dow = dt.weekday() + if dow == 5 or dow == 6: + return dt + timedelta(2) + elif dow == 0: + return dt + timedelta(1) + return dt + + +def previous_friday(dt: datetime) -> datetime: + """ + If holiday falls on Saturday or Sunday, use previous Friday instead. + """ + if dt.weekday() == 5: + return dt - timedelta(1) + elif dt.weekday() == 6: + return dt - timedelta(2) + return dt + + +def sunday_to_monday(dt: datetime) -> datetime: + """ + If holiday falls on Sunday, use day thereafter (Monday) instead. + """ + if dt.weekday() == 6: + return dt + timedelta(1) + return dt + + +def weekend_to_monday(dt: datetime) -> datetime: + """ + If holiday falls on Sunday or Saturday, + use day thereafter (Monday) instead. + Needed for holidays such as Christmas observation in Europe + """ + if dt.weekday() == 6: + return dt + timedelta(1) + elif dt.weekday() == 5: + return dt + timedelta(2) + return dt + + +def nearest_workday(dt: datetime) -> datetime: + """ + If holiday falls on Saturday, use day before (Friday) instead; + if holiday falls on Sunday, use day thereafter (Monday) instead. + """ + if dt.weekday() == 5: + return dt - timedelta(1) + elif dt.weekday() == 6: + return dt + timedelta(1) + return dt + + +def next_workday(dt: datetime) -> datetime: + """ + returns next weekday used for observances + """ + dt += timedelta(days=1) + while dt.weekday() > 4: + # Mon-Fri are 0-4 + dt += timedelta(days=1) + return dt + + +def previous_workday(dt: datetime) -> datetime: + """ + returns previous weekday used for observances + """ + dt -= timedelta(days=1) + while dt.weekday() > 4: + # Mon-Fri are 0-4 + dt -= timedelta(days=1) + return dt + + +def before_nearest_workday(dt: datetime) -> datetime: + """ + returns previous workday after nearest workday + """ + return previous_workday(nearest_workday(dt)) + + +def after_nearest_workday(dt: datetime) -> datetime: + """ + returns next workday after nearest workday + needed for Boxing day or multiple holidays in a series + """ + return next_workday(nearest_workday(dt)) + + +class Holiday: + """ + Class that defines a holiday with start/end dates and rules + for observance. + """ + + def __init__( + self, + name, + year=None, + month=None, + day=None, + offset=None, + observance=None, + start_date=None, + end_date=None, + days_of_week=None, + ): + """ + Parameters + ---------- + name : str + Name of the holiday , defaults to class name + offset : array of pandas.tseries.offsets or + class from pandas.tseries.offsets + computes offset from date + observance: function + computes when holiday is given a pandas Timestamp + days_of_week: + provide a tuple of days e.g (0,1,2,3,) for Monday Through Thursday + Monday=0,..,Sunday=6 + + Examples + -------- + >>> from pandas.tseries.holiday import Holiday, nearest_workday + >>> from dateutil.relativedelta import MO + + >>> USMemorialDay = Holiday( + ... "Memorial Day", month=5, day=31, offset=pd.DateOffset(weekday=MO(-1)) + ... ) + >>> USMemorialDay + Holiday: Memorial Day (month=5, day=31, offset=) + + >>> USLaborDay = Holiday( + ... "Labor Day", month=9, day=1, offset=pd.DateOffset(weekday=MO(1)) + ... ) + >>> USLaborDay + Holiday: Labor Day (month=9, day=1, offset=) + + >>> July3rd = Holiday("July 3rd", month=7, day=3) + >>> July3rd + Holiday: July 3rd (month=7, day=3, ) + + >>> NewYears = Holiday( + ... "New Years Day", month=1, day=1, + ... observance=nearest_workday + ... ) + >>> NewYears # doctest: +SKIP + Holiday: New Years Day ( + month=1, day=1, observance= + ) + + >>> July3rd = Holiday("July 3rd", month=7, day=3, days_of_week=(0, 1, 2, 3)) + >>> July3rd + Holiday: July 3rd (month=7, day=3, ) + """ + if offset is not None and observance is not None: + raise NotImplementedError("Cannot use both offset and observance.") + + self.name = name + self.year = year + self.month = month + self.day = day + self.offset = offset + self.start_date = ( + Timestamp(start_date) if start_date is not None else start_date + ) + self.end_date = Timestamp(end_date) if end_date is not None else end_date + self.observance = observance + assert days_of_week is None or type(days_of_week) == tuple + self.days_of_week = days_of_week + + def __repr__(self) -> str: + info = "" + if self.year is not None: + info += f"year={self.year}, " + info += f"month={self.month}, day={self.day}, " + + if self.offset is not None: + info += f"offset={self.offset}" + + if self.observance is not None: + info += f"observance={self.observance}" + + repr = f"Holiday: {self.name} ({info})" + return repr + + def dates(self, start_date, end_date, return_name=False): + """ + Calculate holidays observed between start date and end date + + Parameters + ---------- + start_date : starting date, datetime-like, optional + end_date : ending date, datetime-like, optional + return_name : bool, optional, default=False + If True, return a series that has dates and holiday names. + False will only return dates. + """ + start_date = Timestamp(start_date) + end_date = Timestamp(end_date) + + filter_start_date = start_date + filter_end_date = end_date + + if self.year is not None: + dt = Timestamp(datetime(self.year, self.month, self.day)) + if return_name: + return Series(self.name, index=[dt]) + else: + return [dt] + + dates = self._reference_dates(start_date, end_date) + holiday_dates = self._apply_rule(dates) + if self.days_of_week is not None: + holiday_dates = holiday_dates[ + np.in1d(holiday_dates.dayofweek, self.days_of_week) + ] + + if self.start_date is not None: + filter_start_date = max( + self.start_date.tz_localize(filter_start_date.tz), filter_start_date + ) + if self.end_date is not None: + filter_end_date = min( + self.end_date.tz_localize(filter_end_date.tz), filter_end_date + ) + holiday_dates = holiday_dates[ + (holiday_dates >= filter_start_date) & (holiday_dates <= filter_end_date) + ] + if return_name: + return Series(self.name, index=holiday_dates) + return holiday_dates + + def _reference_dates(self, start_date, end_date): + """ + Get reference dates for the holiday. + + Return reference dates for the holiday also returning the year + prior to the start_date and year following the end_date. This ensures + that any offsets to be applied will yield the holidays within + the passed in dates. + """ + if self.start_date is not None: + start_date = self.start_date.tz_localize(start_date.tz) + + if self.end_date is not None: + end_date = self.end_date.tz_localize(start_date.tz) + + year_offset = DateOffset(years=1) + reference_start_date = Timestamp( + datetime(start_date.year - 1, self.month, self.day) + ) + + reference_end_date = Timestamp( + datetime(end_date.year + 1, self.month, self.day) + ) + # Don't process unnecessary holidays + dates = date_range( + start=reference_start_date, + end=reference_end_date, + freq=year_offset, + tz=start_date.tz, + ) + + return dates + + def _apply_rule(self, dates): + """ + Apply the given offset/observance to a DatetimeIndex of dates. + + Parameters + ---------- + dates : DatetimeIndex + Dates to apply the given offset/observance rule + + Returns + ------- + Dates with rules applied + """ + if self.observance is not None: + return dates.map(lambda d: self.observance(d)) + + if self.offset is not None: + if not isinstance(self.offset, list): + offsets = [self.offset] + else: + offsets = self.offset + for offset in offsets: + + # if we are adding a non-vectorized value + # ignore the PerformanceWarnings: + with warnings.catch_warnings(): + warnings.simplefilter("ignore", PerformanceWarning) + dates += offset + return dates + + +holiday_calendars = {} + + +def register(cls): + try: + name = cls.name + except AttributeError: + name = cls.__name__ + holiday_calendars[name] = cls + + +def get_calendar(name): + """ + Return an instance of a calendar based on its name. + + Parameters + ---------- + name : str + Calendar name to return an instance of + """ + return holiday_calendars[name]() + + +class HolidayCalendarMetaClass(type): + def __new__(cls, clsname, bases, attrs): + calendar_class = super().__new__(cls, clsname, bases, attrs) + register(calendar_class) + return calendar_class + + +class AbstractHolidayCalendar(metaclass=HolidayCalendarMetaClass): + """ + Abstract interface to create holidays following certain rules. + """ + + rules: list[Holiday] = [] + start_date = Timestamp(datetime(1970, 1, 1)) + end_date = Timestamp(datetime(2200, 12, 31)) + _cache = None + + def __init__(self, name=None, rules=None): + """ + Initializes holiday object with a given set a rules. Normally + classes just have the rules defined within them. + + Parameters + ---------- + name : str + Name of the holiday calendar, defaults to class name + rules : array of Holiday objects + A set of rules used to create the holidays. + """ + super().__init__() + if name is None: + name = type(self).__name__ + self.name = name + + if rules is not None: + self.rules = rules + + def rule_from_name(self, name): + for rule in self.rules: + if rule.name == name: + return rule + + return None + + def holidays(self, start=None, end=None, return_name=False): + """ + Returns a curve with holidays between start_date and end_date + + Parameters + ---------- + start : starting date, datetime-like, optional + end : ending date, datetime-like, optional + return_name : bool, optional + If True, return a series that has dates and holiday names. + False will only return a DatetimeIndex of dates. + + Returns + ------- + DatetimeIndex of holidays + """ + if self.rules is None: + raise Exception( + f"Holiday Calendar {self.name} does not have any rules specified" + ) + + if start is None: + start = AbstractHolidayCalendar.start_date + + if end is None: + end = AbstractHolidayCalendar.end_date + + start = Timestamp(start) + end = Timestamp(end) + + # If we don't have a cache or the dates are outside the prior cache, we + # get them again + if self._cache is None or start < self._cache[0] or end > self._cache[1]: + pre_holidays = [ + rule.dates(start, end, return_name=True) for rule in self.rules + ] + if pre_holidays: + holidays = concat(pre_holidays) + else: + holidays = Series(index=DatetimeIndex([]), dtype=object) + + self._cache = (start, end, holidays.sort_index()) + + holidays = self._cache[2] + holidays = holidays[start:end] + + if return_name: + return holidays + else: + return holidays.index + + @staticmethod + def merge_class(base, other): + """ + Merge holiday calendars together. The base calendar + will take precedence to other. The merge will be done + based on each holiday's name. + + Parameters + ---------- + base : AbstractHolidayCalendar + instance/subclass or array of Holiday objects + other : AbstractHolidayCalendar + instance/subclass or array of Holiday objects + """ + try: + other = other.rules + except AttributeError: + pass + + if not isinstance(other, list): + other = [other] + other_holidays = {holiday.name: holiday for holiday in other} + + try: + base = base.rules + except AttributeError: + pass + + if not isinstance(base, list): + base = [base] + base_holidays = {holiday.name: holiday for holiday in base} + + other_holidays.update(base_holidays) + return list(other_holidays.values()) + + def merge(self, other, inplace=False): + """ + Merge holiday calendars together. The caller's class + rules take precedence. The merge will be done + based on each holiday's name. + + Parameters + ---------- + other : holiday calendar + inplace : bool (default=False) + If True set rule_table to holidays, else return array of Holidays + """ + holidays = self.merge_class(self, other) + if inplace: + self.rules = holidays + else: + return holidays + + +USMemorialDay = Holiday( + "Memorial Day", month=5, day=31, offset=DateOffset(weekday=MO(-1)) +) +USLaborDay = Holiday("Labor Day", month=9, day=1, offset=DateOffset(weekday=MO(1))) +USColumbusDay = Holiday( + "Columbus Day", month=10, day=1, offset=DateOffset(weekday=MO(2)) +) +USThanksgivingDay = Holiday( + "Thanksgiving Day", month=11, day=1, offset=DateOffset(weekday=TH(4)) +) +USMartinLutherKingJr = Holiday( + "Birthday of Martin Luther King, Jr.", + start_date=datetime(1986, 1, 1), + month=1, + day=1, + offset=DateOffset(weekday=MO(3)), +) +USPresidentsDay = Holiday( + "Washington’s Birthday", month=2, day=1, offset=DateOffset(weekday=MO(3)) +) +GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)]) + +EasterMonday = Holiday("Easter Monday", month=1, day=1, offset=[Easter(), Day(1)]) + + +class USFederalHolidayCalendar(AbstractHolidayCalendar): + """ + US Federal Government Holiday Calendar based on rules specified by: + https://www.opm.gov/policy-data-oversight/ + snow-dismissal-procedures/federal-holidays/ + """ + + rules = [ + Holiday("New Year's Day", month=1, day=1, observance=nearest_workday), + USMartinLutherKingJr, + USPresidentsDay, + USMemorialDay, + Holiday( + "Juneteenth National Independence Day", + month=6, + day=19, + start_date="2021-06-18", + observance=nearest_workday, + ), + Holiday("Independence Day", month=7, day=4, observance=nearest_workday), + USLaborDay, + USColumbusDay, + Holiday("Veterans Day", month=11, day=11, observance=nearest_workday), + USThanksgivingDay, + Holiday("Christmas Day", month=12, day=25, observance=nearest_workday), + ] + + +def HolidayCalendarFactory(name, base, other, base_class=AbstractHolidayCalendar): + rules = AbstractHolidayCalendar.merge_class(base, other) + calendar_class = type(name, (base_class,), {"rules": rules, "name": name}) + return calendar_class diff --git a/.local/lib/python3.8/site-packages/pandas/tseries/offsets.py b/.local/lib/python3.8/site-packages/pandas/tseries/offsets.py new file mode 100644 index 0000000..cee99d2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/tseries/offsets.py @@ -0,0 +1,83 @@ +from pandas._libs.tslibs.offsets import ( # noqa:F401 + FY5253, + BaseOffset, + BDay, + BMonthBegin, + BMonthEnd, + BQuarterBegin, + BQuarterEnd, + BusinessDay, + BusinessHour, + BusinessMonthBegin, + BusinessMonthEnd, + BYearBegin, + BYearEnd, + CBMonthBegin, + CBMonthEnd, + CDay, + CustomBusinessDay, + CustomBusinessHour, + CustomBusinessMonthBegin, + CustomBusinessMonthEnd, + DateOffset, + Day, + Easter, + FY5253Quarter, + Hour, + LastWeekOfMonth, + Micro, + Milli, + Minute, + MonthBegin, + MonthEnd, + Nano, + QuarterBegin, + QuarterEnd, + Second, + SemiMonthBegin, + SemiMonthEnd, + Tick, + Week, + WeekOfMonth, + YearBegin, + YearEnd, +) + +__all__ = [ + "Day", + "BusinessDay", + "BDay", + "CustomBusinessDay", + "CDay", + "CBMonthEnd", + "CBMonthBegin", + "MonthBegin", + "BMonthBegin", + "MonthEnd", + "BMonthEnd", + "SemiMonthEnd", + "SemiMonthBegin", + "BusinessHour", + "CustomBusinessHour", + "YearBegin", + "BYearBegin", + "YearEnd", + "BYearEnd", + "QuarterBegin", + "BQuarterBegin", + "QuarterEnd", + "BQuarterEnd", + "LastWeekOfMonth", + "FY5253Quarter", + "FY5253", + "Week", + "WeekOfMonth", + "Easter", + "Hour", + "Minute", + "Second", + "Milli", + "Micro", + "Nano", + "DateOffset", +] diff --git a/.local/lib/python3.8/site-packages/pandas/util/__init__.py b/.local/lib/python3.8/site-packages/pandas/util/__init__.py new file mode 100644 index 0000000..7adfca7 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/util/__init__.py @@ -0,0 +1,19 @@ +from pandas.util._decorators import ( # noqa:F401 + Appender, + Substitution, + cache_readonly, +) + +from pandas.core.util.hashing import ( # noqa:F401 + hash_array, + hash_pandas_object, +) + + +def __getattr__(name): + if name == "testing": + import pandas.util.testing + + return pandas.util.testing + else: + raise AttributeError(f"module 'pandas.util' has no attribute '{name}'") diff --git a/.local/lib/python3.8/site-packages/pandas/util/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..3843f4a Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/__init__.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_decorators.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_decorators.cpython-38.pyc new file mode 100644 index 0000000..03b7453 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_decorators.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_doctools.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_doctools.cpython-38.pyc new file mode 100644 index 0000000..95d0794 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_doctools.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_exceptions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_exceptions.cpython-38.pyc new file mode 100644 index 0000000..2f69d5f Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_exceptions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_print_versions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_print_versions.cpython-38.pyc new file mode 100644 index 0000000..0b19434 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_print_versions.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_test_decorators.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_test_decorators.cpython-38.pyc new file mode 100644 index 0000000..a2c8f13 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_test_decorators.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_tester.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_tester.cpython-38.pyc new file mode 100644 index 0000000..4896594 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_tester.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_validators.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_validators.cpython-38.pyc new file mode 100644 index 0000000..0e009dd Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/_validators.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/util/__pycache__/testing.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/testing.cpython-38.pyc new file mode 100644 index 0000000..2efed84 Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/util/__pycache__/testing.cpython-38.pyc differ diff --git a/.local/lib/python3.8/site-packages/pandas/util/_decorators.py b/.local/lib/python3.8/site-packages/pandas/util/_decorators.py new file mode 100644 index 0000000..39a729b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/util/_decorators.py @@ -0,0 +1,497 @@ +from __future__ import annotations + +from functools import wraps +import inspect +from textwrap import dedent +from typing import ( + Any, + Callable, + Mapping, + cast, +) +import warnings + +from pandas._libs.properties import cache_readonly # noqa:F401 +from pandas._typing import F + + +def deprecate( + name: str, + alternative: Callable[..., Any], + version: str, + alt_name: str | None = None, + klass: type[Warning] | None = None, + stacklevel: int = 2, + msg: str | None = None, +) -> Callable[[F], F]: + """ + Return a new function that emits a deprecation warning on use. + + To use this method for a deprecated function, another function + `alternative` with the same signature must exist. The deprecated + function will emit a deprecation warning, and in the docstring + it will contain the deprecation directive with the provided version + so it can be detected for future removal. + + Parameters + ---------- + name : str + Name of function to deprecate. + alternative : func + Function to use instead. + version : str + Version of pandas in which the method has been deprecated. + alt_name : str, optional + Name to use in preference of alternative.__name__. + klass : Warning, default FutureWarning + stacklevel : int, default 2 + msg : str + The message to display in the warning. + Default is '{name} is deprecated. Use {alt_name} instead.' + """ + alt_name = alt_name or alternative.__name__ + klass = klass or FutureWarning + warning_msg = msg or f"{name} is deprecated, use {alt_name} instead." + + @wraps(alternative) + def wrapper(*args, **kwargs) -> Callable[..., Any]: + warnings.warn(warning_msg, klass, stacklevel=stacklevel) + return alternative(*args, **kwargs) + + # adding deprecated directive to the docstring + msg = msg or f"Use `{alt_name}` instead." + doc_error_msg = ( + "deprecate needs a correctly formatted docstring in " + "the target function (should have a one liner short " + "summary, and opening quotes should be in their own " + f"line). Found:\n{alternative.__doc__}" + ) + + # when python is running in optimized mode (i.e. `-OO`), docstrings are + # removed, so we check that a docstring with correct formatting is used + # but we allow empty docstrings + if alternative.__doc__: + if alternative.__doc__.count("\n") < 3: + raise AssertionError(doc_error_msg) + empty1, summary, empty2, doc = alternative.__doc__.split("\n", 3) + if empty1 or empty2 and not summary: + raise AssertionError(doc_error_msg) + wrapper.__doc__ = dedent( + f""" + {summary.strip()} + + .. deprecated:: {version} + {msg} + + {dedent(doc)}""" + ) + # error: Incompatible return value type (got "Callable[[VarArg(Any), KwArg(Any)], + # Callable[...,Any]]", expected "Callable[[F], F]") + return wrapper # type: ignore[return-value] + + +def deprecate_kwarg( + old_arg_name: str, + new_arg_name: str | None, + mapping: Mapping[Any, Any] | Callable[[Any], Any] | None = None, + stacklevel: int = 2, +) -> Callable[[F], F]: + """ + Decorator to deprecate a keyword argument of a function. + + Parameters + ---------- + old_arg_name : str + Name of argument in function to deprecate + new_arg_name : str or None + Name of preferred argument in function. Use None to raise warning that + ``old_arg_name`` keyword is deprecated. + mapping : dict or callable + If mapping is present, use it to translate old arguments to + new arguments. A callable must do its own value checking; + values not found in a dict will be forwarded unchanged. + + Examples + -------- + The following deprecates 'cols', using 'columns' instead + + >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns') + ... def f(columns=''): + ... print(columns) + ... + >>> f(columns='should work ok') + should work ok + + >>> f(cols='should raise warning') # doctest: +SKIP + FutureWarning: cols is deprecated, use columns instead + warnings.warn(msg, FutureWarning) + should raise warning + + >>> f(cols='should error', columns="can\'t pass do both") # doctest: +SKIP + TypeError: Can only specify 'cols' or 'columns', not both + + >>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False}) + ... def f(new=False): + ... print('yes!' if new else 'no!') + ... + >>> f(old='yes') # doctest: +SKIP + FutureWarning: old='yes' is deprecated, use new=True instead + warnings.warn(msg, FutureWarning) + yes! + + To raise a warning that a keyword will be removed entirely in the future + + >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name=None) + ... def f(cols='', another_param=''): + ... print(cols) + ... + >>> f(cols='should raise warning') # doctest: +SKIP + FutureWarning: the 'cols' keyword is deprecated and will be removed in a + future version please takes steps to stop use of 'cols' + should raise warning + >>> f(another_param='should not raise warning') # doctest: +SKIP + should not raise warning + + >>> f(cols='should raise warning', another_param='') # doctest: +SKIP + FutureWarning: the 'cols' keyword is deprecated and will be removed in a + future version please takes steps to stop use of 'cols' + should raise warning + """ + if mapping is not None and not hasattr(mapping, "get") and not callable(mapping): + raise TypeError( + "mapping from old to new argument values must be dict or callable!" + ) + + def _deprecate_kwarg(func: F) -> F: + @wraps(func) + def wrapper(*args, **kwargs) -> Callable[..., Any]: + old_arg_value = kwargs.pop(old_arg_name, None) + + if old_arg_value is not None: + if new_arg_name is None: + msg = ( + f"the {repr(old_arg_name)} keyword is deprecated and " + "will be removed in a future version. Please take " + f"steps to stop the use of {repr(old_arg_name)}" + ) + warnings.warn(msg, FutureWarning, stacklevel=stacklevel) + kwargs[old_arg_name] = old_arg_value + return func(*args, **kwargs) + + elif mapping is not None: + if callable(mapping): + new_arg_value = mapping(old_arg_value) + else: + new_arg_value = mapping.get(old_arg_value, old_arg_value) + msg = ( + f"the {old_arg_name}={repr(old_arg_value)} keyword is " + "deprecated, use " + f"{new_arg_name}={repr(new_arg_value)} instead." + ) + else: + new_arg_value = old_arg_value + msg = ( + f"the {repr(old_arg_name)}' keyword is deprecated, " + f"use {repr(new_arg_name)} instead." + ) + + warnings.warn(msg, FutureWarning, stacklevel=stacklevel) + if kwargs.get(new_arg_name) is not None: + msg = ( + f"Can only specify {repr(old_arg_name)} " + f"or {repr(new_arg_name)}, not both." + ) + raise TypeError(msg) + else: + kwargs[new_arg_name] = new_arg_value + return func(*args, **kwargs) + + return cast(F, wrapper) + + return _deprecate_kwarg + + +def _format_argument_list(allow_args: list[str]): + """ + Convert the allow_args argument (either string or integer) of + `deprecate_nonkeyword_arguments` function to a string describing + it to be inserted into warning message. + + Parameters + ---------- + allowed_args : list, tuple or int + The `allowed_args` argument for `deprecate_nonkeyword_arguments`, + but None value is not allowed. + + Returns + ------- + s : str + The substring describing the argument list in best way to be + inserted to the warning message. + + Examples + -------- + `format_argument_list([])` -> '' + `format_argument_list(['a'])` -> "except for the arguments 'a'" + `format_argument_list(['a', 'b'])` -> "except for the arguments 'a' and 'b'" + `format_argument_list(['a', 'b', 'c'])` -> + "except for the arguments 'a', 'b' and 'c'" + """ + if "self" in allow_args: + allow_args.remove("self") + if not allow_args: + return "" + elif len(allow_args) == 1: + return f" except for the argument '{allow_args[0]}'" + else: + last = allow_args[-1] + args = ", ".join(["'" + x + "'" for x in allow_args[:-1]]) + return f" except for the arguments {args} and '{last}'" + + +def future_version_msg(version: str | None) -> str: + """Specify which version of pandas the deprecation will take place in.""" + if version is None: + return "In a future version of pandas" + else: + return f"Starting with pandas version {version}" + + +def deprecate_nonkeyword_arguments( + version: str | None, + allowed_args: list[str] | None = None, + stacklevel: int = 2, +) -> Callable[[F], F]: + """ + Decorator to deprecate a use of non-keyword arguments of a function. + + Parameters + ---------- + version : str, optional + The version in which positional arguments will become + keyword-only. If None, then the warning message won't + specify any particular version. + + allowed_args : list, optional + In case of list, it must be the list of names of some + first arguments of the decorated functions that are + OK to be given as positional arguments. In case of None value, + defaults to list of all arguments not having the + default value. + + stacklevel : int, default=2 + The stack level for warnings.warn + """ + + def decorate(func): + if allowed_args is not None: + allow_args = allowed_args + else: + spec = inspect.getfullargspec(func) + + # We must have some defaults if we are deprecating default-less + assert spec.defaults is not None # for mypy + allow_args = spec.args[: -len(spec.defaults)] + + num_allow_args = len(allow_args) + msg = ( + f"{future_version_msg(version)} all arguments of " + f"{func.__qualname__}{{arguments}} will be keyword-only." + ) + + @wraps(func) + def wrapper(*args, **kwargs): + arguments = _format_argument_list(allow_args) + if len(args) > num_allow_args: + warnings.warn( + msg.format(arguments=arguments), + FutureWarning, + stacklevel=stacklevel, + ) + return func(*args, **kwargs) + + return wrapper + + return decorate + + +def rewrite_axis_style_signature( + name: str, extra_params: list[tuple[str, Any]] +) -> Callable[..., Any]: + def decorate(func: F) -> F: + @wraps(func) + def wrapper(*args, **kwargs) -> Callable[..., Any]: + return func(*args, **kwargs) + + kind = inspect.Parameter.POSITIONAL_OR_KEYWORD + params = [ + inspect.Parameter("self", kind), + inspect.Parameter(name, kind, default=None), + inspect.Parameter("index", kind, default=None), + inspect.Parameter("columns", kind, default=None), + inspect.Parameter("axis", kind, default=None), + ] + + for pname, default in extra_params: + params.append(inspect.Parameter(pname, kind, default=default)) + + sig = inspect.Signature(params) + + # https://github.com/python/typing/issues/598 + # error: "F" has no attribute "__signature__" + func.__signature__ = sig # type: ignore[attr-defined] + return cast(F, wrapper) + + return decorate + + +def doc(*docstrings: str | Callable, **params) -> Callable[[F], F]: + """ + A decorator take docstring templates, concatenate them and perform string + substitution on it. + + This decorator will add a variable "_docstring_components" to the wrapped + callable to keep track the original docstring template for potential usage. + If it should be consider as a template, it will be saved as a string. + Otherwise, it will be saved as callable, and later user __doc__ and dedent + to get docstring. + + Parameters + ---------- + *docstrings : str or callable + The string / docstring / docstring template to be appended in order + after default docstring under callable. + **params + The string which would be used to format docstring template. + """ + + def decorator(decorated: F) -> F: + # collecting docstring and docstring templates + docstring_components: list[str | Callable] = [] + if decorated.__doc__: + docstring_components.append(dedent(decorated.__doc__)) + + for docstring in docstrings: + if hasattr(docstring, "_docstring_components"): + # error: Item "str" of "Union[str, Callable[..., Any]]" has no attribute + # "_docstring_components" + # error: Item "function" of "Union[str, Callable[..., Any]]" has no + # attribute "_docstring_components" + docstring_components.extend( + docstring._docstring_components # type: ignore[union-attr] + ) + elif isinstance(docstring, str) or docstring.__doc__: + docstring_components.append(docstring) + + # formatting templates and concatenating docstring + decorated.__doc__ = "".join( + [ + component.format(**params) + if isinstance(component, str) + else dedent(component.__doc__ or "") + for component in docstring_components + ] + ) + + # error: "F" has no attribute "_docstring_components" + decorated._docstring_components = ( # type: ignore[attr-defined] + docstring_components + ) + return decorated + + return decorator + + +# Substitution and Appender are derived from matplotlib.docstring (1.1.0) +# module https://matplotlib.org/users/license.html + + +class Substitution: + """ + A decorator to take a function's docstring and perform string + substitution on it. + + This decorator should be robust even if func.__doc__ is None + (for example, if -OO was passed to the interpreter) + + Usage: construct a docstring.Substitution with a sequence or + dictionary suitable for performing substitution; then + decorate a suitable function with the constructed object. e.g. + + sub_author_name = Substitution(author='Jason') + + @sub_author_name + def some_function(x): + "%(author)s wrote this function" + + # note that some_function.__doc__ is now "Jason wrote this function" + + One can also use positional arguments. + + sub_first_last_names = Substitution('Edgar Allen', 'Poe') + + @sub_first_last_names + def some_function(x): + "%s %s wrote the Raven" + """ + + def __init__(self, *args, **kwargs): + if args and kwargs: + raise AssertionError("Only positional or keyword args are allowed") + + self.params = args or kwargs + + def __call__(self, func: F) -> F: + func.__doc__ = func.__doc__ and func.__doc__ % self.params + return func + + def update(self, *args, **kwargs) -> None: + """ + Update self.params with supplied args. + """ + if isinstance(self.params, dict): + self.params.update(*args, **kwargs) + + +class Appender: + """ + A function decorator that will append an addendum to the docstring + of the target function. + + This decorator should be robust even if func.__doc__ is None + (for example, if -OO was passed to the interpreter). + + Usage: construct a docstring.Appender with a string to be joined to + the original docstring. An optional 'join' parameter may be supplied + which will be used to join the docstring and addendum. e.g. + + add_copyright = Appender("Copyright (c) 2009", join='\n') + + @add_copyright + def my_dog(has='fleas'): + "This docstring will have a copyright below" + pass + """ + + addendum: str | None + + def __init__(self, addendum: str | None, join: str = "", indents: int = 0): + if indents > 0: + self.addendum = indent(addendum, indents=indents) + else: + self.addendum = addendum + self.join = join + + def __call__(self, func: F) -> F: + func.__doc__ = func.__doc__ if func.__doc__ else "" + self.addendum = self.addendum if self.addendum else "" + docitems = [func.__doc__, self.addendum] + func.__doc__ = dedent(self.join.join(docitems)) + return func + + +def indent(text: str | None, indents: int = 1) -> str: + if not text or not isinstance(text, str): + return "" + jointext = "".join(["\n"] + [" "] * indents) + return jointext.join(text.split("\n")) diff --git a/.local/lib/python3.8/site-packages/pandas/util/_doctools.py b/.local/lib/python3.8/site-packages/pandas/util/_doctools.py new file mode 100644 index 0000000..0d90d9b --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/util/_doctools.py @@ -0,0 +1,193 @@ +from __future__ import annotations + +import numpy as np + +import pandas as pd + + +class TablePlotter: + """ + Layout some DataFrames in vertical/horizontal layout for explanation. + Used in merging.rst + """ + + def __init__( + self, + cell_width: float = 0.37, + cell_height: float = 0.25, + font_size: float = 7.5, + ): + self.cell_width = cell_width + self.cell_height = cell_height + self.font_size = font_size + + def _shape(self, df: pd.DataFrame) -> tuple[int, int]: + """ + Calculate table shape considering index levels. + """ + row, col = df.shape + return row + df.columns.nlevels, col + df.index.nlevels + + def _get_cells(self, left, right, vertical) -> tuple[int, int]: + """ + Calculate appropriate figure size based on left and right data. + """ + if vertical: + # calculate required number of cells + vcells = max(sum(self._shape(df)[0] for df in left), self._shape(right)[0]) + hcells = max(self._shape(df)[1] for df in left) + self._shape(right)[1] + else: + vcells = max([self._shape(df)[0] for df in left] + [self._shape(right)[0]]) + hcells = sum([self._shape(df)[1] for df in left] + [self._shape(right)[1]]) + return hcells, vcells + + def plot(self, left, right, labels=None, vertical: bool = True): + """ + Plot left / right DataFrames in specified layout. + + Parameters + ---------- + left : list of DataFrames before operation is applied + right : DataFrame of operation result + labels : list of str to be drawn as titles of left DataFrames + vertical : bool, default True + If True, use vertical layout. If False, use horizontal layout. + """ + import matplotlib.gridspec as gridspec + import matplotlib.pyplot as plt + + if not isinstance(left, list): + left = [left] + left = [self._conv(df) for df in left] + right = self._conv(right) + + hcells, vcells = self._get_cells(left, right, vertical) + + if vertical: + figsize = self.cell_width * hcells, self.cell_height * vcells + else: + # include margin for titles + figsize = self.cell_width * hcells, self.cell_height * vcells + fig = plt.figure(figsize=figsize) + + if vertical: + gs = gridspec.GridSpec(len(left), hcells) + # left + max_left_cols = max(self._shape(df)[1] for df in left) + max_left_rows = max(self._shape(df)[0] for df in left) + for i, (l, label) in enumerate(zip(left, labels)): + ax = fig.add_subplot(gs[i, 0:max_left_cols]) + self._make_table(ax, l, title=label, height=1.0 / max_left_rows) + # right + ax = plt.subplot(gs[:, max_left_cols:]) + self._make_table(ax, right, title="Result", height=1.05 / vcells) + fig.subplots_adjust(top=0.9, bottom=0.05, left=0.05, right=0.95) + else: + max_rows = max(self._shape(df)[0] for df in left + [right]) + height = 1.0 / np.max(max_rows) + gs = gridspec.GridSpec(1, hcells) + # left + i = 0 + for df, label in zip(left, labels): + sp = self._shape(df) + ax = fig.add_subplot(gs[0, i : i + sp[1]]) + self._make_table(ax, df, title=label, height=height) + i += sp[1] + # right + ax = plt.subplot(gs[0, i:]) + self._make_table(ax, right, title="Result", height=height) + fig.subplots_adjust(top=0.85, bottom=0.05, left=0.05, right=0.95) + + return fig + + def _conv(self, data): + """ + Convert each input to appropriate for table outplot. + """ + if isinstance(data, pd.Series): + if data.name is None: + data = data.to_frame(name="") + else: + data = data.to_frame() + data = data.fillna("NaN") + return data + + def _insert_index(self, data): + # insert is destructive + data = data.copy() + idx_nlevels = data.index.nlevels + if idx_nlevels == 1: + data.insert(0, "Index", data.index) + else: + for i in range(idx_nlevels): + data.insert(i, f"Index{i}", data.index._get_level_values(i)) + + col_nlevels = data.columns.nlevels + if col_nlevels > 1: + col = data.columns._get_level_values(0) + values = [ + data.columns._get_level_values(i)._values for i in range(1, col_nlevels) + ] + col_df = pd.DataFrame(values) + data.columns = col_df.columns + data = pd.concat([col_df, data]) + data.columns = col + return data + + def _make_table(self, ax, df, title: str, height: float | None = None): + if df is None: + ax.set_visible(False) + return + + import pandas.plotting as plotting + + idx_nlevels = df.index.nlevels + col_nlevels = df.columns.nlevels + # must be convert here to get index levels for colorization + df = self._insert_index(df) + tb = plotting.table(ax, df, loc=9) + tb.set_fontsize(self.font_size) + + if height is None: + height = 1.0 / (len(df) + 1) + + props = tb.properties() + for (r, c), cell in props["celld"].items(): + if c == -1: + cell.set_visible(False) + elif r < col_nlevels and c < idx_nlevels: + cell.set_visible(False) + elif r < col_nlevels or c < idx_nlevels: + cell.set_facecolor("#AAAAAA") + cell.set_height(height) + + ax.set_title(title, size=self.font_size) + ax.axis("off") + + +if __name__ == "__main__": + import matplotlib.pyplot as plt + + p = TablePlotter() + + df1 = pd.DataFrame({"A": [10, 11, 12], "B": [20, 21, 22], "C": [30, 31, 32]}) + df2 = pd.DataFrame({"A": [10, 12], "C": [30, 32]}) + + p.plot([df1, df2], pd.concat([df1, df2]), labels=["df1", "df2"], vertical=True) + plt.show() + + df3 = pd.DataFrame({"X": [10, 12], "Z": [30, 32]}) + + p.plot( + [df1, df3], pd.concat([df1, df3], axis=1), labels=["df1", "df2"], vertical=False + ) + plt.show() + + idx = pd.MultiIndex.from_tuples( + [(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")] + ) + col = pd.MultiIndex.from_tuples([(1, "A"), (1, "B")]) + df3 = pd.DataFrame({"v1": [1, 2, 3, 4, 5, 6], "v2": [5, 6, 7, 8, 9, 10]}, index=idx) + df3.columns = col + p.plot(df3, df3, labels=["df3"]) + plt.show() diff --git a/.local/lib/python3.8/site-packages/pandas/util/_exceptions.py b/.local/lib/python3.8/site-packages/pandas/util/_exceptions.py new file mode 100644 index 0000000..806e2ab --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/util/_exceptions.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import contextlib +import inspect +import os + + +@contextlib.contextmanager +def rewrite_exception(old_name: str, new_name: str): + """ + Rewrite the message of an exception. + """ + try: + yield + except Exception as err: + if not err.args: + raise + msg = str(err.args[0]) + msg = msg.replace(old_name, new_name) + args: tuple[str, ...] = (msg,) + if len(err.args) > 1: + args = args + err.args[1:] + err.args = args + raise + + +def find_stack_level() -> int: + """ + Find the first place in the stack that is not inside pandas + (tests notwithstanding). + """ + stack = inspect.stack() + + import pandas as pd + + pkg_dir = os.path.dirname(pd.__file__) + test_dir = os.path.join(pkg_dir, "tests") + + for n in range(len(stack)): + fname = stack[n].filename + if fname.startswith(pkg_dir) and not fname.startswith(test_dir): + continue + else: + break + return n diff --git a/.local/lib/python3.8/site-packages/pandas/util/_print_versions.py b/.local/lib/python3.8/site-packages/pandas/util/_print_versions.py new file mode 100644 index 0000000..289900c --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/util/_print_versions.py @@ -0,0 +1,160 @@ +from __future__ import annotations + +import codecs +import json +import locale +import os +import platform +import struct +import sys + +from pandas._typing import JSONSerializable +from pandas.compat._optional import ( + VERSIONS, + get_version, + import_optional_dependency, +) + + +def _get_commit_hash() -> str | None: + """ + Use vendored versioneer code to get git hash, which handles + git worktree correctly. + """ + from pandas._version import get_versions + + versions = get_versions() + return versions["full-revisionid"] + + +def _get_sys_info() -> dict[str, JSONSerializable]: + """ + Returns system information as a JSON serializable dictionary. + """ + uname_result = platform.uname() + language_code, encoding = locale.getlocale() + return { + "commit": _get_commit_hash(), + "python": ".".join([str(i) for i in sys.version_info]), + "python-bits": struct.calcsize("P") * 8, + "OS": uname_result.system, + "OS-release": uname_result.release, + "Version": uname_result.version, + "machine": uname_result.machine, + "processor": uname_result.processor, + "byteorder": sys.byteorder, + "LC_ALL": os.environ.get("LC_ALL"), + "LANG": os.environ.get("LANG"), + "LOCALE": {"language-code": language_code, "encoding": encoding}, + } + + +def _get_dependency_info() -> dict[str, JSONSerializable]: + """ + Returns dependency information as a JSON serializable dictionary. + """ + deps = [ + "pandas", + # required + "numpy", + "pytz", + "dateutil", + # install / build, + "pip", + "setuptools", + "Cython", + # test + "pytest", + "hypothesis", + # docs + "sphinx", + # Other, need a min version + "blosc", + "feather", + "xlsxwriter", + "lxml.etree", + "html5lib", + "pymysql", + "psycopg2", + "jinja2", + # Other, not imported. + "IPython", + "pandas_datareader", + ] + deps.extend(list(VERSIONS)) + + result: dict[str, JSONSerializable] = {} + for modname in deps: + mod = import_optional_dependency(modname, errors="ignore") + result[modname] = get_version(mod) if mod else None + return result + + +def show_versions(as_json: str | bool = False) -> None: + """ + Provide useful information, important for bug reports. + + It comprises info about hosting operation system, pandas version, + and versions of other installed relative packages. + + Parameters + ---------- + as_json : str or bool, default False + * If False, outputs info in a human readable form to the console. + * If str, it will be considered as a path to a file. + Info will be written to that file in JSON format. + * If True, outputs info in JSON format to the console. + """ + sys_info = _get_sys_info() + deps = _get_dependency_info() + + if as_json: + j = {"system": sys_info, "dependencies": deps} + + if as_json is True: + sys.stdout.writelines(json.dumps(j, indent=2)) + else: + assert isinstance(as_json, str) # needed for mypy + with codecs.open(as_json, "wb", encoding="utf8") as f: + json.dump(j, f, indent=2) + + else: + assert isinstance(sys_info["LOCALE"], dict) # needed for mypy + language_code = sys_info["LOCALE"]["language-code"] + encoding = sys_info["LOCALE"]["encoding"] + sys_info["LOCALE"] = f"{language_code}.{encoding}" + + maxlen = max(len(x) for x in deps) + print("\nINSTALLED VERSIONS") + print("------------------") + for k, v in sys_info.items(): + print(f"{k:<{maxlen}}: {v}") + print("") + for k, v in deps.items(): + print(f"{k:<{maxlen}}: {v}") + + +def main() -> int: + from optparse import OptionParser + + parser = OptionParser() + parser.add_option( + "-j", + "--json", + metavar="FILE", + nargs=1, + help="Save output as JSON into file, pass in '-' to output to stdout", + ) + + (options, args) = parser.parse_args() + + if options.json == "-": + options.json = True + + show_versions(as_json=options.json) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.local/lib/python3.8/site-packages/pandas/util/_test_decorators.py b/.local/lib/python3.8/site-packages/pandas/util/_test_decorators.py new file mode 100644 index 0000000..ab96a03 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/util/_test_decorators.py @@ -0,0 +1,308 @@ +""" +This module provides decorator functions which can be applied to test objects +in order to skip those objects when certain conditions occur. A sample use case +is to detect if the platform is missing ``matplotlib``. If so, any test objects +which require ``matplotlib`` and decorated with ``@td.skip_if_no_mpl`` will be +skipped by ``pytest`` during the execution of the test suite. + +To illustrate, after importing this module: + +import pandas.util._test_decorators as td + +The decorators can be applied to classes: + +@td.skip_if_some_reason +class Foo: + ... + +Or individual functions: + +@td.skip_if_some_reason +def test_foo(): + ... + +For more information, refer to the ``pytest`` documentation on ``skipif``. +""" +from __future__ import annotations + +from contextlib import contextmanager +import locale +from typing import Callable +import warnings + +import numpy as np +import pytest + +from pandas._config import get_option + +from pandas.compat import ( + IS64, + is_platform_windows, +) +from pandas.compat._optional import import_optional_dependency + +from pandas.core.computation.expressions import ( + NUMEXPR_INSTALLED, + USE_NUMEXPR, +) +from pandas.util.version import Version + + +def safe_import(mod_name: str, min_version: str | None = None): + """ + Parameters + ---------- + mod_name : str + Name of the module to be imported + min_version : str, default None + Minimum required version of the specified mod_name + + Returns + ------- + object + The imported module if successful, or False + """ + with warnings.catch_warnings(): + # Suppress warnings that we can't do anything about, + # e.g. from aiohttp + warnings.filterwarnings( + "ignore", + category=DeprecationWarning, + module="aiohttp", + message=".*decorator is deprecated since Python 3.8.*", + ) + + # fastparquet import accesses pd.Int64Index + warnings.filterwarnings( + "ignore", + category=FutureWarning, + module="fastparquet", + message=".*Int64Index.*", + ) + + try: + mod = __import__(mod_name) + except ImportError: + return False + + if not min_version: + return mod + else: + import sys + + try: + version = getattr(sys.modules[mod_name], "__version__") + except AttributeError: + # xlrd uses a capitalized attribute name + version = getattr(sys.modules[mod_name], "__VERSION__") + if version and Version(version) >= Version(min_version): + return mod + + return False + + +def _skip_if_no_mpl(): + mod = safe_import("matplotlib") + if mod: + mod.use("Agg") + else: + return True + + +def _skip_if_has_locale(): + lang, _ = locale.getlocale() + if lang is not None: + return True + + +def _skip_if_not_us_locale(): + lang, _ = locale.getlocale() + if lang != "en_US": + return True + + +def _skip_if_no_scipy() -> bool: + return not ( + safe_import("scipy.stats") + and safe_import("scipy.sparse") + and safe_import("scipy.interpolate") + and safe_import("scipy.signal") + ) + + +# TODO(pytest#7469): return type, _pytest.mark.structures.MarkDecorator is not public +# https://github.com/pytest-dev/pytest/issues/7469 +def skip_if_installed(package: str): + """ + Skip a test if a package is installed. + + Parameters + ---------- + package : str + The name of the package. + """ + return pytest.mark.skipif( + safe_import(package), reason=f"Skipping because {package} is installed." + ) + + +# TODO(pytest#7469): return type, _pytest.mark.structures.MarkDecorator is not public +# https://github.com/pytest-dev/pytest/issues/7469 +def skip_if_no(package: str, min_version: str | None = None): + """ + Generic function to help skip tests when required packages are not + present on the testing system. + + This function returns a pytest mark with a skip condition that will be + evaluated during test collection. An attempt will be made to import the + specified ``package`` and optionally ensure it meets the ``min_version`` + + The mark can be used as either a decorator for a test function or to be + applied to parameters in pytest.mark.parametrize calls or parametrized + fixtures. + + If the import and version check are unsuccessful, then the test function + (or test case when used in conjunction with parametrization) will be + skipped. + + Parameters + ---------- + package: str + The name of the required package. + min_version: str or None, default None + Optional minimum version of the package. + + Returns + ------- + _pytest.mark.structures.MarkDecorator + a pytest.mark.skipif to use as either a test decorator or a + parametrization mark. + """ + msg = f"Could not import '{package}'" + if min_version: + msg += f" satisfying a min_version of {min_version}" + return pytest.mark.skipif( + not safe_import(package, min_version=min_version), reason=msg + ) + + +skip_if_no_mpl = pytest.mark.skipif( + _skip_if_no_mpl(), reason="Missing matplotlib dependency" +) +skip_if_mpl = pytest.mark.skipif(not _skip_if_no_mpl(), reason="matplotlib is present") +skip_if_32bit = pytest.mark.skipif(not IS64, reason="skipping for 32 bit") +skip_if_windows = pytest.mark.skipif(is_platform_windows(), reason="Running on Windows") +skip_if_has_locale = pytest.mark.skipif( + _skip_if_has_locale(), reason=f"Specific locale is set {locale.getlocale()[0]}" +) +skip_if_not_us_locale = pytest.mark.skipif( + _skip_if_not_us_locale(), reason=f"Specific locale is set {locale.getlocale()[0]}" +) +skip_if_no_scipy = pytest.mark.skipif( + _skip_if_no_scipy(), reason="Missing SciPy requirement" +) +skip_if_no_ne = pytest.mark.skipif( + not USE_NUMEXPR, + reason=f"numexpr enabled->{USE_NUMEXPR}, installed->{NUMEXPR_INSTALLED}", +) + + +# TODO(pytest#7469): return type, _pytest.mark.structures.MarkDecorator is not public +# https://github.com/pytest-dev/pytest/issues/7469 +def skip_if_np_lt(ver_str: str, *args, reason: str | None = None): + if reason is None: + reason = f"NumPy {ver_str} or greater required" + return pytest.mark.skipif( + Version(np.__version__) < Version(ver_str), + *args, + reason=reason, + ) + + +def parametrize_fixture_doc(*args): + """ + Intended for use as a decorator for parametrized fixture, + this function will wrap the decorated function with a pytest + ``parametrize_fixture_doc`` mark. That mark will format + initial fixture docstring by replacing placeholders {0}, {1} etc + with parameters passed as arguments. + + Parameters + ---------- + args: iterable + Positional arguments for docstring. + + Returns + ------- + function + The decorated function wrapped within a pytest + ``parametrize_fixture_doc`` mark + """ + + def documented_fixture(fixture): + fixture.__doc__ = fixture.__doc__.format(*args) + return fixture + + return documented_fixture + + +def check_file_leaks(func) -> Callable: + """ + Decorate a test function to check that we are not leaking file descriptors. + """ + with file_leak_context(): + return func + + +@contextmanager +def file_leak_context(): + """ + ContextManager analogue to check_file_leaks. + """ + psutil = safe_import("psutil") + if not psutil or is_platform_windows(): + # Checking for file leaks can hang on Windows CI + yield + else: + proc = psutil.Process() + flist = proc.open_files() + conns = proc.connections() + + try: + yield + finally: + flist2 = proc.open_files() + # on some builds open_files includes file position, which we _dont_ + # expect to remain unchanged, so we need to compare excluding that + flist_ex = [(x.path, x.fd) for x in flist] + flist2_ex = [(x.path, x.fd) for x in flist2] + assert flist2_ex == flist_ex, (flist2, flist) + + conns2 = proc.connections() + assert conns2 == conns, (conns2, conns) + + +def async_mark(): + try: + import_optional_dependency("pytest_asyncio") + async_mark = pytest.mark.asyncio + except ImportError: + async_mark = pytest.mark.skip(reason="Missing dependency pytest-asyncio") + + return async_mark + + +def mark_array_manager_not_yet_implemented(request): + mark = pytest.mark.xfail(reason="Not yet implemented for ArrayManager") + request.node.add_marker(mark) + + +skip_array_manager_not_yet_implemented = pytest.mark.xfail( + get_option("mode.data_manager") == "array", + reason="Not yet implemented for ArrayManager", +) + +skip_array_manager_invalid_test = pytest.mark.skipif( + get_option("mode.data_manager") == "array", + reason="Test that relies on BlockManager internals or specific behaviour", +) diff --git a/.local/lib/python3.8/site-packages/pandas/util/_tester.py b/.local/lib/python3.8/site-packages/pandas/util/_tester.py new file mode 100644 index 0000000..6725a84 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/util/_tester.py @@ -0,0 +1,33 @@ +""" +Entrypoint for testing from the top-level namespace. +""" +import os +import sys + +PKG = os.path.dirname(os.path.dirname(__file__)) + + +def test(extra_args=None): + """ + Run the pandas test suite using pytest. + """ + try: + import pytest + except ImportError as err: + raise ImportError("Need pytest>=5.0.1 to run tests") from err + try: + import hypothesis # noqa:F401 + except ImportError as err: + raise ImportError("Need hypothesis>=3.58 to run tests") from err + cmd = ["--skip-slow", "--skip-network", "--skip-db"] + if extra_args: + if not isinstance(extra_args, list): + extra_args = [extra_args] + cmd = extra_args + cmd += [PKG] + joined = " ".join(cmd) + print(f"running: pytest {joined}") + sys.exit(pytest.main(cmd)) + + +__all__ = ["test"] diff --git a/.local/lib/python3.8/site-packages/pandas/util/_validators.py b/.local/lib/python3.8/site-packages/pandas/util/_validators.py new file mode 100644 index 0000000..8e3de94 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/util/_validators.py @@ -0,0 +1,520 @@ +""" +Module that contains many useful utilities +for validating data or function arguments +""" +from __future__ import annotations + +from typing import ( + Iterable, + Sequence, +) +import warnings + +import numpy as np + +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_bool, + is_integer, +) + + +def _check_arg_length(fname, args, max_fname_arg_count, compat_args): + """ + Checks whether 'args' has length of at most 'compat_args'. Raises + a TypeError if that is not the case, similar to in Python when a + function is called with too many arguments. + """ + if max_fname_arg_count < 0: + raise ValueError("'max_fname_arg_count' must be non-negative") + + if len(args) > len(compat_args): + max_arg_count = len(compat_args) + max_fname_arg_count + actual_arg_count = len(args) + max_fname_arg_count + argument = "argument" if max_arg_count == 1 else "arguments" + + raise TypeError( + f"{fname}() takes at most {max_arg_count} {argument} " + f"({actual_arg_count} given)" + ) + + +def _check_for_default_values(fname, arg_val_dict, compat_args): + """ + Check that the keys in `arg_val_dict` are mapped to their + default values as specified in `compat_args`. + + Note that this function is to be called only when it has been + checked that arg_val_dict.keys() is a subset of compat_args + """ + for key in arg_val_dict: + # try checking equality directly with '=' operator, + # as comparison may have been overridden for the left + # hand object + try: + v1 = arg_val_dict[key] + v2 = compat_args[key] + + # check for None-ness otherwise we could end up + # comparing a numpy array vs None + if (v1 is not None and v2 is None) or (v1 is None and v2 is not None): + match = False + else: + match = v1 == v2 + + if not is_bool(match): + raise ValueError("'match' is not a boolean") + + # could not compare them directly, so try comparison + # using the 'is' operator + except ValueError: + match = arg_val_dict[key] is compat_args[key] + + if not match: + raise ValueError( + f"the '{key}' parameter is not supported in " + f"the pandas implementation of {fname}()" + ) + + +def validate_args(fname, args, max_fname_arg_count, compat_args): + """ + Checks whether the length of the `*args` argument passed into a function + has at most `len(compat_args)` arguments and whether or not all of these + elements in `args` are set to their default values. + + Parameters + ---------- + fname : str + The name of the function being passed the `*args` parameter + args : tuple + The `*args` parameter passed into a function + max_fname_arg_count : int + The maximum number of arguments that the function `fname` + can accept, excluding those in `args`. Used for displaying + appropriate error messages. Must be non-negative. + compat_args : dict + A dictionary of keys and their associated default values. + In order to accommodate buggy behaviour in some versions of `numpy`, + where a signature displayed keyword arguments but then passed those + arguments **positionally** internally when calling downstream + implementations, a dict ensures that the original + order of the keyword arguments is enforced. + + Raises + ------ + TypeError + If `args` contains more values than there are `compat_args` + ValueError + If `args` contains values that do not correspond to those + of the default values specified in `compat_args` + """ + _check_arg_length(fname, args, max_fname_arg_count, compat_args) + + # We do this so that we can provide a more informative + # error message about the parameters that we are not + # supporting in the pandas implementation of 'fname' + kwargs = dict(zip(compat_args, args)) + _check_for_default_values(fname, kwargs, compat_args) + + +def _check_for_invalid_keys(fname, kwargs, compat_args): + """ + Checks whether 'kwargs' contains any keys that are not + in 'compat_args' and raises a TypeError if there is one. + """ + # set(dict) --> set of the dictionary's keys + diff = set(kwargs) - set(compat_args) + + if diff: + bad_arg = list(diff)[0] + raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'") + + +def validate_kwargs(fname, kwargs, compat_args): + """ + Checks whether parameters passed to the **kwargs argument in a + function `fname` are valid parameters as specified in `*compat_args` + and whether or not they are set to their default values. + + Parameters + ---------- + fname : str + The name of the function being passed the `**kwargs` parameter + kwargs : dict + The `**kwargs` parameter passed into `fname` + compat_args: dict + A dictionary of keys that `kwargs` is allowed to have and their + associated default values + + Raises + ------ + TypeError if `kwargs` contains keys not in `compat_args` + ValueError if `kwargs` contains keys in `compat_args` that do not + map to the default values specified in `compat_args` + """ + kwds = kwargs.copy() + _check_for_invalid_keys(fname, kwargs, compat_args) + _check_for_default_values(fname, kwds, compat_args) + + +def validate_args_and_kwargs(fname, args, kwargs, max_fname_arg_count, compat_args): + """ + Checks whether parameters passed to the *args and **kwargs argument in a + function `fname` are valid parameters as specified in `*compat_args` + and whether or not they are set to their default values. + + Parameters + ---------- + fname: str + The name of the function being passed the `**kwargs` parameter + args: tuple + The `*args` parameter passed into a function + kwargs: dict + The `**kwargs` parameter passed into `fname` + max_fname_arg_count: int + The minimum number of arguments that the function `fname` + requires, excluding those in `args`. Used for displaying + appropriate error messages. Must be non-negative. + compat_args: dict + A dictionary of keys that `kwargs` is allowed to + have and their associated default values. + + Raises + ------ + TypeError if `args` contains more values than there are + `compat_args` OR `kwargs` contains keys not in `compat_args` + ValueError if `args` contains values not at the default value (`None`) + `kwargs` contains keys in `compat_args` that do not map to the default + value as specified in `compat_args` + + See Also + -------- + validate_args : Purely args validation. + validate_kwargs : Purely kwargs validation. + + """ + # Check that the total number of arguments passed in (i.e. + # args and kwargs) does not exceed the length of compat_args + _check_arg_length( + fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args + ) + + # Check there is no overlap with the positional and keyword + # arguments, similar to what is done in actual Python functions + args_dict = dict(zip(compat_args, args)) + + for key in args_dict: + if key in kwargs: + raise TypeError( + f"{fname}() got multiple values for keyword argument '{key}'" + ) + + kwargs.update(args_dict) + validate_kwargs(fname, kwargs, compat_args) + + +def validate_bool_kwarg(value, arg_name, none_allowed=True, int_allowed=False): + """ + Ensure that argument passed in arg_name can be interpreted as boolean. + + Parameters + ---------- + value : bool + Value to be validated. + arg_name : str + Name of the argument. To be reflected in the error message. + none_allowed : bool, default True + Whether to consider None to be a valid boolean. + int_allowed : bool, default False + Whether to consider integer value to be a valid boolean. + + Returns + ------- + value + The same value as input. + + Raises + ------ + ValueError + If the value is not a valid boolean. + """ + good_value = is_bool(value) + if none_allowed: + good_value = good_value or value is None + + if int_allowed: + good_value = good_value or isinstance(value, int) + + if not good_value: + raise ValueError( + f'For argument "{arg_name}" expected type bool, received ' + f"type {type(value).__name__}." + ) + return value + + +def validate_axis_style_args(data, args, kwargs, arg_name, method_name): + """ + Argument handler for mixed index, columns / axis functions + + In an attempt to handle both `.method(index, columns)`, and + `.method(arg, axis=.)`, we have to do some bad things to argument + parsing. This translates all arguments to `{index=., columns=.}` style. + + Parameters + ---------- + data : DataFrame + args : tuple + All positional arguments from the user + kwargs : dict + All keyword arguments from the user + arg_name, method_name : str + Used for better error messages + + Returns + ------- + kwargs : dict + A dictionary of keyword arguments. Doesn't modify ``kwargs`` + inplace, so update them with the return value here. + + Examples + -------- + >>> df = pd.DataFrame(range(2)) + >>> validate_axis_style_args(df, (str.upper,), {'columns': id}, + ... 'mapper', 'rename') + {'columns': , 'index': } + + This emits a warning + >>> validate_axis_style_args(df, (str.upper, id), {}, + ... 'mapper', 'rename') + {'index': , 'columns': } + """ + # TODO: Change to keyword-only args and remove all this + + out = {} + # Goal: fill 'out' with index/columns-style arguments + # like out = {'index': foo, 'columns': bar} + + # Start by validating for consistency + if "axis" in kwargs and any(x in kwargs for x in data._AXIS_TO_AXIS_NUMBER): + msg = "Cannot specify both 'axis' and any of 'index' or 'columns'." + raise TypeError(msg) + + # First fill with explicit values provided by the user... + if arg_name in kwargs: + if args: + msg = f"{method_name} got multiple values for argument '{arg_name}'" + raise TypeError(msg) + + axis = data._get_axis_name(kwargs.get("axis", 0)) + out[axis] = kwargs[arg_name] + + # More user-provided arguments, now from kwargs + for k, v in kwargs.items(): + try: + ax = data._get_axis_name(k) + except ValueError: + pass + else: + out[ax] = v + + # All user-provided kwargs have been handled now. + # Now we supplement with positional arguments, emitting warnings + # when there's ambiguity and raising when there's conflicts + + if len(args) == 0: + pass # It's up to the function to decide if this is valid + elif len(args) == 1: + axis = data._get_axis_name(kwargs.get("axis", 0)) + out[axis] = args[0] + elif len(args) == 2: + if "axis" in kwargs: + # Unambiguously wrong + msg = "Cannot specify both 'axis' and any of 'index' or 'columns'" + raise TypeError(msg) + + msg = ( + f"Interpreting call\n\t'.{method_name}(a, b)' as " + f"\n\t'.{method_name}(index=a, columns=b)'.\nUse named " + "arguments to remove any ambiguity. In the future, using " + "positional arguments for 'index' or 'columns' will raise " + "a 'TypeError'." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + out[data._get_axis_name(0)] = args[0] + out[data._get_axis_name(1)] = args[1] + else: + msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'." + raise TypeError(msg) + return out + + +def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): + """ + Validate the keyword arguments to 'fillna'. + + This checks that exactly one of 'value' and 'method' is specified. + If 'method' is specified, this validates that it's a valid method. + + Parameters + ---------- + value, method : object + The 'value' and 'method' keyword arguments for 'fillna'. + validate_scalar_dict_value : bool, default True + Whether to validate that 'value' is a scalar or dict. Specifically, + validate that it is not a list or tuple. + + Returns + ------- + value, method : object + """ + from pandas.core.missing import clean_fill_method + + if value is None and method is None: + raise ValueError("Must specify a fill 'value' or 'method'.") + elif value is None and method is not None: + method = clean_fill_method(method) + + elif value is not None and method is None: + if validate_scalar_dict_value and isinstance(value, (list, tuple)): + raise TypeError( + '"value" parameter must be a scalar or dict, but ' + f'you passed a "{type(value).__name__}"' + ) + + elif value is not None and method is not None: + raise ValueError("Cannot specify both 'value' and 'method'.") + + return value, method + + +def validate_percentile(q: float | Iterable[float]) -> np.ndarray: + """ + Validate percentiles (used by describe and quantile). + + This function checks if the given float or iterable of floats is a valid percentile + otherwise raises a ValueError. + + Parameters + ---------- + q: float or iterable of floats + A single percentile or an iterable of percentiles. + + Returns + ------- + ndarray + An ndarray of the percentiles if valid. + + Raises + ------ + ValueError if percentiles are not in given interval([0, 1]). + """ + q_arr = np.asarray(q) + # Don't change this to an f-string. The string formatting + # is too expensive for cases where we don't need it. + msg = "percentiles should all be in the interval [0, 1]. Try {} instead." + if q_arr.ndim == 0: + if not 0 <= q_arr <= 1: + raise ValueError(msg.format(q_arr / 100.0)) + else: + if not all(0 <= qs <= 1 for qs in q_arr): + raise ValueError(msg.format(q_arr / 100.0)) + return q_arr + + +def validate_ascending( + ascending: bool | int | Sequence[bool | int] = True, +): + """Validate ``ascending`` kwargs for ``sort_index`` method.""" + kwargs = {"none_allowed": False, "int_allowed": True} + if not isinstance(ascending, (list, tuple)): + return validate_bool_kwarg(ascending, "ascending", **kwargs) + + return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending] + + +def validate_endpoints(closed: str | None) -> tuple[bool, bool]: + """ + Check that the `closed` argument is among [None, "left", "right"] + + Parameters + ---------- + closed : {None, "left", "right"} + + Returns + ------- + left_closed : bool + right_closed : bool + + Raises + ------ + ValueError : if argument is not among valid values + """ + left_closed = False + right_closed = False + + if closed is None: + left_closed = True + right_closed = True + elif closed == "left": + left_closed = True + elif closed == "right": + right_closed = True + else: + raise ValueError("Closed has to be either 'left', 'right' or None") + + return left_closed, right_closed + + +def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]: + """ + Check that the `inclusive` argument is among {"both", "neither", "left", "right"}. + + Parameters + ---------- + inclusive : {"both", "neither", "left", "right"} + + Returns + ------- + left_right_inclusive : tuple[bool, bool] + + Raises + ------ + ValueError : if argument is not among valid values + """ + left_right_inclusive: tuple[bool, bool] | None = None + + if isinstance(inclusive, str): + left_right_inclusive = { + "both": (True, True), + "left": (True, False), + "right": (False, True), + "neither": (False, False), + }.get(inclusive) + + if left_right_inclusive is None: + raise ValueError( + "Inclusive has to be either 'both', 'neither', 'left' or 'right'" + ) + + return left_right_inclusive + + +def validate_insert_loc(loc: int, length: int) -> int: + """ + Check that we have an integer between -length and length, inclusive. + + Standardize negative loc to within [0, length]. + + The exceptions we raise on failure match np.insert. + """ + if not is_integer(loc): + raise TypeError(f"loc must be an integer between -{length} and {length}") + + if loc < 0: + loc += length + if not 0 <= loc <= length: + raise IndexError(f"loc must be an integer between -{length} and {length}") + return loc diff --git a/.local/lib/python3.8/site-packages/pandas/util/testing.py b/.local/lib/python3.8/site-packages/pandas/util/testing.py new file mode 100644 index 0000000..db9bfc2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/util/testing.py @@ -0,0 +1,14 @@ +import warnings + +from pandas.util._exceptions import find_stack_level + +from pandas._testing import * # noqa:F401,F403,PDF014 + +warnings.warn( + ( + "pandas.util.testing is deprecated. Use the functions in the " + "public API at pandas.testing instead." + ), + FutureWarning, + stacklevel=find_stack_level(), +) diff --git a/.local/lib/python3.8/site-packages/pandas/util/version/__init__.py b/.local/lib/python3.8/site-packages/pandas/util/version/__init__.py new file mode 100644 index 0000000..a6eccf2 --- /dev/null +++ b/.local/lib/python3.8/site-packages/pandas/util/version/__init__.py @@ -0,0 +1,579 @@ +# Vendored from https://github.com/pypa/packaging/blob/main/packaging/_structures.py +# and https://github.com/pypa/packaging/blob/main/packaging/_structures.py +# changeset ae891fd74d6dd4c6063bb04f2faeadaac6fc6313 +# 04/30/2021 + +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +from __future__ import annotations + +import collections +import itertools +import re +from typing import ( + Callable, + Iterator, + SupportsInt, + Tuple, + Union, +) +import warnings + +__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"] + + +class InfinityType: + def __repr__(self) -> str: + return "Infinity" + + def __hash__(self) -> int: + return hash(repr(self)) + + def __lt__(self, other: object) -> bool: + return False + + def __le__(self, other: object) -> bool: + return False + + def __eq__(self, other: object) -> bool: + return isinstance(other, type(self)) + + def __ne__(self, other: object) -> bool: + return not isinstance(other, type(self)) + + def __gt__(self, other: object) -> bool: + return True + + def __ge__(self, other: object) -> bool: + return True + + def __neg__(self: object) -> NegativeInfinityType: + return NegativeInfinity + + +Infinity = InfinityType() + + +class NegativeInfinityType: + def __repr__(self) -> str: + return "-Infinity" + + def __hash__(self) -> int: + return hash(repr(self)) + + def __lt__(self, other: object) -> bool: + return True + + def __le__(self, other: object) -> bool: + return True + + def __eq__(self, other: object) -> bool: + return isinstance(other, type(self)) + + def __ne__(self, other: object) -> bool: + return not isinstance(other, type(self)) + + def __gt__(self, other: object) -> bool: + return False + + def __ge__(self, other: object) -> bool: + return False + + def __neg__(self: object) -> InfinityType: + return Infinity + + +NegativeInfinity = NegativeInfinityType() + + +InfiniteTypes = Union[InfinityType, NegativeInfinityType] +PrePostDevType = Union[InfiniteTypes, Tuple[str, int]] +SubLocalType = Union[InfiniteTypes, int, str] +LocalType = Union[ + NegativeInfinityType, + Tuple[ + Union[ + SubLocalType, + Tuple[SubLocalType, str], + Tuple[NegativeInfinityType, SubLocalType], + ], + ..., + ], +] +CmpKey = Tuple[ + int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType +] +LegacyCmpKey = Tuple[int, Tuple[str, ...]] +VersionComparisonMethod = Callable[ + [Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool +] + +_Version = collections.namedtuple( + "_Version", ["epoch", "release", "dev", "pre", "post", "local"] +) + + +def parse(version: str) -> LegacyVersion | Version: + """ + Parse the given version string and return either a :class:`Version` object + or a :class:`LegacyVersion` object depending on if the given version is + a valid PEP 440 version or a legacy version. + """ + try: + return Version(version) + except InvalidVersion: + return LegacyVersion(version) + + +class InvalidVersion(ValueError): + """ + An invalid version was found, users should refer to PEP 440. + """ + + +class _BaseVersion: + _key: CmpKey | LegacyCmpKey + + def __hash__(self) -> int: + return hash(self._key) + + # Please keep the duplicated `isinstance` check + # in the six comparisons hereunder + # unless you find a way to avoid adding overhead function calls. + def __lt__(self, other: _BaseVersion) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key < other._key + + def __le__(self, other: _BaseVersion) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key <= other._key + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key == other._key + + def __ge__(self, other: _BaseVersion) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key >= other._key + + def __gt__(self, other: _BaseVersion) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key > other._key + + def __ne__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key != other._key + + +class LegacyVersion(_BaseVersion): + def __init__(self, version: str) -> None: + self._version = str(version) + self._key = _legacy_cmpkey(self._version) + + warnings.warn( + "Creating a LegacyVersion has been deprecated and will be " + "removed in the next major release.", + DeprecationWarning, + ) + + def __str__(self) -> str: + return self._version + + def __repr__(self) -> str: + return f"" + + @property + def public(self) -> str: + return self._version + + @property + def base_version(self) -> str: + return self._version + + @property + def epoch(self) -> int: + return -1 + + @property + def release(self) -> None: + return None + + @property + def pre(self) -> None: + return None + + @property + def post(self) -> None: + return None + + @property + def dev(self) -> None: + return None + + @property + def local(self) -> None: + return None + + @property + def is_prerelease(self) -> bool: + return False + + @property + def is_postrelease(self) -> bool: + return False + + @property + def is_devrelease(self) -> bool: + return False + + +_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE) + +_legacy_version_replacement_map = { + "pre": "c", + "preview": "c", + "-": "final-", + "rc": "c", + "dev": "@", +} + + +def _parse_version_parts(s: str) -> Iterator[str]: + for part in _legacy_version_component_re.split(s): + part = _legacy_version_replacement_map.get(part, part) + + if not part or part == ".": + continue + + if part[:1] in "0123456789": + # pad for numeric comparison + yield part.zfill(8) + else: + yield "*" + part + + # ensure that alpha/beta/candidate are before final + yield "*final" + + +def _legacy_cmpkey(version: str) -> LegacyCmpKey: + + # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch + # greater than or equal to 0. This will effectively put the LegacyVersion, + # which uses the defacto standard originally implemented by setuptools, + # as before all PEP 440 versions. + epoch = -1 + + # This scheme is taken from pkg_resources.parse_version setuptools prior to + # it's adoption of the packaging library. + parts: list[str] = [] + for part in _parse_version_parts(version.lower()): + if part.startswith("*"): + # remove "-" before a prerelease tag + if part < "*final": + while parts and parts[-1] == "*final-": + parts.pop() + + # remove trailing zeros from each series of numeric parts + while parts and parts[-1] == "00000000": + parts.pop() + + parts.append(part) + + return epoch, tuple(parts) + + +# Deliberately not anchored to the start and end of the string, to make it +# easier for 3rd party code to reuse +VERSION_PATTERN = r""" + v? + (?: + (?:(?P[0-9]+)!)? # epoch + (?P[0-9]+(?:\.[0-9]+)*) # release segment + (?P
                                          # pre-release
+            [-_\.]?
+            (?P(a|b|c|rc|alpha|beta|pre|preview))
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+        (?P                                         # post release
+            (?:-(?P[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?Ppost|rev|r)
+                [-_\.]?
+                (?P[0-9]+)?
+            )
+        )?
+        (?P                                          # dev release
+            [-_\.]?
+            (?Pdev)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+    )
+    (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+"""
+
+
+class Version(_BaseVersion):
+
+    _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
+
+    def __init__(self, version: str) -> None:
+
+        # Validate the version and parse it into pieces
+        match = self._regex.search(version)
+        if not match:
+            raise InvalidVersion(f"Invalid version: '{version}'")
+
+        # Store the parsed out pieces of the version
+        self._version = _Version(
+            epoch=int(match.group("epoch")) if match.group("epoch") else 0,
+            release=tuple(int(i) for i in match.group("release").split(".")),
+            pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
+            post=_parse_letter_version(
+                match.group("post_l"), match.group("post_n1") or match.group("post_n2")
+            ),
+            dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
+            local=_parse_local_version(match.group("local")),
+        )
+
+        # Generate a key which will be used for sorting
+        self._key = _cmpkey(
+            self._version.epoch,
+            self._version.release,
+            self._version.pre,
+            self._version.post,
+            self._version.dev,
+            self._version.local,
+        )
+
+    def __repr__(self) -> str:
+        return f""
+
+    def __str__(self) -> str:
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+
+        # Release segment
+        parts.append(".".join([str(x) for x in self.release]))
+
+        # Pre-release
+        if self.pre is not None:
+            parts.append("".join([str(x) for x in self.pre]))
+
+        # Post-release
+        if self.post is not None:
+            parts.append(f".post{self.post}")
+
+        # Development release
+        if self.dev is not None:
+            parts.append(f".dev{self.dev}")
+
+        # Local version segment
+        if self.local is not None:
+            parts.append(f"+{self.local}")
+
+        return "".join(parts)
+
+    @property
+    def epoch(self) -> int:
+        _epoch: int = self._version.epoch
+        return _epoch
+
+    @property
+    def release(self) -> tuple[int, ...]:
+        _release: tuple[int, ...] = self._version.release
+        return _release
+
+    @property
+    def pre(self) -> tuple[str, int] | None:
+        _pre: tuple[str, int] | None = self._version.pre
+        return _pre
+
+    @property
+    def post(self) -> int | None:
+        return self._version.post[1] if self._version.post else None
+
+    @property
+    def dev(self) -> int | None:
+        return self._version.dev[1] if self._version.dev else None
+
+    @property
+    def local(self) -> str | None:
+        if self._version.local:
+            return ".".join([str(x) for x in self._version.local])
+        else:
+            return None
+
+    @property
+    def public(self) -> str:
+        return str(self).split("+", 1)[0]
+
+    @property
+    def base_version(self) -> str:
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+
+        # Release segment
+        parts.append(".".join([str(x) for x in self.release]))
+
+        return "".join(parts)
+
+    @property
+    def is_prerelease(self) -> bool:
+        return self.dev is not None or self.pre is not None
+
+    @property
+    def is_postrelease(self) -> bool:
+        return self.post is not None
+
+    @property
+    def is_devrelease(self) -> bool:
+        return self.dev is not None
+
+    @property
+    def major(self) -> int:
+        return self.release[0] if len(self.release) >= 1 else 0
+
+    @property
+    def minor(self) -> int:
+        return self.release[1] if len(self.release) >= 2 else 0
+
+    @property
+    def micro(self) -> int:
+        return self.release[2] if len(self.release) >= 3 else 0
+
+
+def _parse_letter_version(
+    letter: str, number: str | bytes | SupportsInt
+) -> tuple[str, int] | None:
+
+    if letter:
+        # We consider there to be an implicit 0 in a pre-release if there is
+        # not a numeral associated with it.
+        if number is None:
+            number = 0
+
+        # We normalize any letters to their lower case form
+        letter = letter.lower()
+
+        # We consider some words to be alternate spellings of other words and
+        # in those cases we want to normalize the spellings to our preferred
+        # spelling.
+        if letter == "alpha":
+            letter = "a"
+        elif letter == "beta":
+            letter = "b"
+        elif letter in ["c", "pre", "preview"]:
+            letter = "rc"
+        elif letter in ["rev", "r"]:
+            letter = "post"
+
+        return letter, int(number)
+    if not letter and number:
+        # We assume if we are given a number, but we are not given a letter
+        # then this is using the implicit post release syntax (e.g. 1.0-1)
+        letter = "post"
+
+        return letter, int(number)
+
+    return None
+
+
+_local_version_separators = re.compile(r"[\._-]")
+
+
+def _parse_local_version(local: str) -> LocalType | None:
+    """
+    Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
+    """
+    if local is not None:
+        return tuple(
+            part.lower() if not part.isdigit() else int(part)
+            for part in _local_version_separators.split(local)
+        )
+    return None
+
+
+def _cmpkey(
+    epoch: int,
+    release: tuple[int, ...],
+    pre: tuple[str, int] | None,
+    post: tuple[str, int] | None,
+    dev: tuple[str, int] | None,
+    local: tuple[SubLocalType] | None,
+) -> CmpKey:
+
+    # When we compare a release version, we want to compare it with all of the
+    # trailing zeros removed. So we'll use a reverse the list, drop all the now
+    # leading zeros until we come to something non zero, then take the rest
+    # re-reverse it back into the correct order and make it a tuple and use
+    # that for our sorting key.
+    _release = tuple(
+        reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
+    )
+
+    # We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
+    # We'll do this by abusing the pre segment, but we _only_ want to do this
+    # if there is not a pre or a post segment. If we have one of those then
+    # the normal sorting rules will handle this case correctly.
+    if pre is None and post is None and dev is not None:
+        _pre: PrePostDevType = NegativeInfinity
+    # Versions without a pre-release (except as noted above) should sort after
+    # those with one.
+    elif pre is None:
+        _pre = Infinity
+    else:
+        _pre = pre
+
+    # Versions without a post segment should sort before those with one.
+    if post is None:
+        _post: PrePostDevType = NegativeInfinity
+
+    else:
+        _post = post
+
+    # Versions without a development segment should sort after those with one.
+    if dev is None:
+        _dev: PrePostDevType = Infinity
+
+    else:
+        _dev = dev
+
+    if local is None:
+        # Versions without a local segment should sort before those with one.
+        _local: LocalType = NegativeInfinity
+    else:
+        # Versions with a local segment need that segment parsed to implement
+        # the sorting rules in PEP440.
+        # - Alpha numeric segments sort before numeric segments
+        # - Alpha numeric segments sort lexicographically
+        # - Numeric segments sort numerically
+        # - Shorter versions sort before longer versions when the prefixes
+        #   match exactly
+        _local = tuple(
+            (i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
+        )
+
+    return epoch, _release, _pre, _post, _dev, _local
diff --git a/.local/lib/python3.8/site-packages/pandas/util/version/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pandas/util/version/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..d0cb839
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pandas/util/version/__pycache__/__init__.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/INSTALLER b/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/INSTALLER
new file mode 100644
index 0000000..a1b589e
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/LICENSE b/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/LICENSE
new file mode 100644
index 0000000..1e65815
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/LICENSE
@@ -0,0 +1,54 @@
+Copyright 2017- Paul Ganssle 
+Copyright 2017- dateutil contributors (see AUTHORS file)
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+The above license applies to all contributions after 2017-12-01, as well as
+all contributions that have been re-licensed (see AUTHORS file for the list of
+contributors who have re-licensed their code).
+--------------------------------------------------------------------------------
+dateutil - Extensions to the standard Python datetime module.
+
+Copyright (c) 2003-2011 - Gustavo Niemeyer 
+Copyright (c) 2012-2014 - Tomi Pieviläinen 
+Copyright (c) 2014-2016 - Yaron de Leeuw 
+Copyright (c) 2015-     - Paul Ganssle 
+Copyright (c) 2015-     - dateutil contributors (see AUTHORS file)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+The above BSD License Applies to all code, even that also covered by Apache 2.0.
\ No newline at end of file
diff --git a/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/METADATA b/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/METADATA
new file mode 100644
index 0000000..1e46c96
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/METADATA
@@ -0,0 +1,204 @@
+Metadata-Version: 2.1
+Name: python-dateutil
+Version: 2.8.2
+Summary: Extensions to the standard Python datetime module
+Home-page: https://github.com/dateutil/dateutil
+Author: Gustavo Niemeyer
+Author-email: gustavo@niemeyer.net
+Maintainer: Paul Ganssle
+Maintainer-email: dateutil@python.org
+License: Dual License
+Project-URL: Documentation, https://dateutil.readthedocs.io/en/stable/
+Project-URL: Source, https://github.com/dateutil/dateutil
+Platform: UNKNOWN
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Topic :: Software Development :: Libraries
+Requires-Python: !=3.0.*,!=3.1.*,!=3.2.*,>=2.7
+Description-Content-Type: text/x-rst
+License-File: LICENSE
+Requires-Dist: six (>=1.5)
+
+dateutil - powerful extensions to datetime
+==========================================
+
+|pypi| |support| |licence|
+
+|gitter| |readthedocs|
+
+|travis| |appveyor| |pipelines| |coverage|
+
+.. |pypi| image:: https://img.shields.io/pypi/v/python-dateutil.svg?style=flat-square
+    :target: https://pypi.org/project/python-dateutil/
+    :alt: pypi version
+
+.. |support| image:: https://img.shields.io/pypi/pyversions/python-dateutil.svg?style=flat-square
+    :target: https://pypi.org/project/python-dateutil/
+    :alt: supported Python version
+
+.. |travis| image:: https://img.shields.io/travis/dateutil/dateutil/master.svg?style=flat-square&label=Travis%20Build
+    :target: https://travis-ci.org/dateutil/dateutil
+    :alt: travis build status
+
+.. |appveyor| image:: https://img.shields.io/appveyor/ci/dateutil/dateutil/master.svg?style=flat-square&logo=appveyor
+    :target: https://ci.appveyor.com/project/dateutil/dateutil
+    :alt: appveyor build status
+
+.. |pipelines| image:: https://dev.azure.com/pythondateutilazure/dateutil/_apis/build/status/dateutil.dateutil?branchName=master
+    :target: https://dev.azure.com/pythondateutilazure/dateutil/_build/latest?definitionId=1&branchName=master
+    :alt: azure pipelines build status
+
+.. |coverage| image:: https://codecov.io/gh/dateutil/dateutil/branch/master/graphs/badge.svg?branch=master
+    :target: https://codecov.io/gh/dateutil/dateutil?branch=master
+    :alt: Code coverage
+
+.. |gitter| image:: https://badges.gitter.im/dateutil/dateutil.svg
+   :alt: Join the chat at https://gitter.im/dateutil/dateutil
+   :target: https://gitter.im/dateutil/dateutil
+
+.. |licence| image:: https://img.shields.io/pypi/l/python-dateutil.svg?style=flat-square
+    :target: https://pypi.org/project/python-dateutil/
+    :alt: licence
+
+.. |readthedocs| image:: https://img.shields.io/readthedocs/dateutil/latest.svg?style=flat-square&label=Read%20the%20Docs
+   :alt: Read the documentation at https://dateutil.readthedocs.io/en/latest/
+   :target: https://dateutil.readthedocs.io/en/latest/
+
+The `dateutil` module provides powerful extensions to
+the standard `datetime` module, available in Python.
+
+Installation
+============
+`dateutil` can be installed from PyPI using `pip` (note that the package name is
+different from the importable name)::
+
+    pip install python-dateutil
+
+Download
+========
+dateutil is available on PyPI
+https://pypi.org/project/python-dateutil/
+
+The documentation is hosted at:
+https://dateutil.readthedocs.io/en/stable/
+
+Code
+====
+The code and issue tracker are hosted on GitHub:
+https://github.com/dateutil/dateutil/
+
+Features
+========
+
+* Computing of relative deltas (next month, next year,
+  next Monday, last week of month, etc);
+* Computing of relative deltas between two given
+  date and/or datetime objects;
+* Computing of dates based on very flexible recurrence rules,
+  using a superset of the `iCalendar `_
+  specification. Parsing of RFC strings is supported as well.
+* Generic parsing of dates in almost any string format;
+* Timezone (tzinfo) implementations for tzfile(5) format
+  files (/etc/localtime, /usr/share/zoneinfo, etc), TZ
+  environment string (in all known formats), iCalendar
+  format files, given ranges (with help from relative deltas),
+  local machine timezone, fixed offset timezone, UTC timezone,
+  and Windows registry-based time zones.
+* Internal up-to-date world timezone information based on
+  Olson's database.
+* Computing of Easter Sunday dates for any given year,
+  using Western, Orthodox or Julian algorithms;
+* A comprehensive test suite.
+
+Quick example
+=============
+Here's a snapshot, just to give an idea about the power of the
+package. For more examples, look at the documentation.
+
+Suppose you want to know how much time is left, in
+years/months/days/etc, before the next easter happening on a
+year with a Friday 13th in August, and you want to get today's
+date out of the "date" unix system command. Here is the code:
+
+.. code-block:: python3
+
+    >>> from dateutil.relativedelta import *
+    >>> from dateutil.easter import *
+    >>> from dateutil.rrule import *
+    >>> from dateutil.parser import *
+    >>> from datetime import *
+    >>> now = parse("Sat Oct 11 17:13:46 UTC 2003")
+    >>> today = now.date()
+    >>> year = rrule(YEARLY,dtstart=now,bymonth=8,bymonthday=13,byweekday=FR)[0].year
+    >>> rdelta = relativedelta(easter(year), today)
+    >>> print("Today is: %s" % today)
+    Today is: 2003-10-11
+    >>> print("Year with next Aug 13th on a Friday is: %s" % year)
+    Year with next Aug 13th on a Friday is: 2004
+    >>> print("How far is the Easter of that year: %s" % rdelta)
+    How far is the Easter of that year: relativedelta(months=+6)
+    >>> print("And the Easter of that year is: %s" % (today+rdelta))
+    And the Easter of that year is: 2004-04-11
+
+Being exactly 6 months ahead was **really** a coincidence :)
+
+Contributing
+============
+
+We welcome many types of contributions - bug reports, pull requests (code, infrastructure or documentation fixes). For more information about how to contribute to the project, see the ``CONTRIBUTING.md`` file in the repository.
+
+
+Author
+======
+The dateutil module was written by Gustavo Niemeyer 
+in 2003.
+
+It is maintained by:
+
+* Gustavo Niemeyer  2003-2011
+* Tomi Pieviläinen  2012-2014
+* Yaron de Leeuw  2014-2016
+* Paul Ganssle  2015-
+
+Starting with version 2.4.1 and running until 2.8.2, all source and binary
+distributions will be signed by a PGP key that has, at the very least, been
+signed by the key which made the previous release. A table of release signing
+keys can be found below:
+
+===========  ============================
+Releases     Signing key fingerprint
+===========  ============================
+2.4.1-2.8.2  `6B49 ACBA DCF6 BD1C A206 67AB CD54 FCE3 D964 BEFB`_ 
+===========  ============================
+
+New releases *may* have signed tags, but binary and source distributions
+uploaded to PyPI will no longer have GPG signatures attached.
+
+Contact
+=======
+Our mailing list is available at `dateutil@python.org `_. As it is hosted by the PSF, it is subject to the `PSF code of
+conduct `_.
+
+License
+=======
+
+All contributions after December 1, 2017 released under dual license - either `Apache 2.0 License `_ or the `BSD 3-Clause License `_. Contributions before December 1, 2017 - except those those explicitly relicensed - are released only under the BSD 3-Clause License.
+
+
+.. _6B49 ACBA DCF6 BD1C A206 67AB CD54 FCE3 D964 BEFB:
+   https://pgp.mit.edu/pks/lookup?op=vindex&search=0xCD54FCE3D964BEFB
+
+
diff --git a/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/RECORD b/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/RECORD
new file mode 100644
index 0000000..c757d01
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/RECORD
@@ -0,0 +1,44 @@
+dateutil/__init__.py,sha256=lXElASqwYGwqlrSWSeX19JwF5Be9tNecDa9ebk-0gmk,222
+dateutil/__pycache__/__init__.cpython-38.pyc,,
+dateutil/__pycache__/_common.cpython-38.pyc,,
+dateutil/__pycache__/_version.cpython-38.pyc,,
+dateutil/__pycache__/easter.cpython-38.pyc,,
+dateutil/__pycache__/relativedelta.cpython-38.pyc,,
+dateutil/__pycache__/rrule.cpython-38.pyc,,
+dateutil/__pycache__/tzwin.cpython-38.pyc,,
+dateutil/__pycache__/utils.cpython-38.pyc,,
+dateutil/_common.py,sha256=77w0yytkrxlYbSn--lDVPUMabUXRR9I3lBv_vQRUqUY,932
+dateutil/_version.py,sha256=awyHv2PYvDR84dxjrHyzmm8nieFwMjcuuShPh-QNkM4,142
+dateutil/easter.py,sha256=dyBi-lKvimH1u_k6p7Z0JJK72QhqVtVBsqByvpEPKvc,2678
+dateutil/parser/__init__.py,sha256=wWk6GFuxTpjoggCGtgkceJoti4pVjl4_fHQXpNOaSYg,1766
+dateutil/parser/__pycache__/__init__.cpython-38.pyc,,
+dateutil/parser/__pycache__/_parser.cpython-38.pyc,,
+dateutil/parser/__pycache__/isoparser.cpython-38.pyc,,
+dateutil/parser/_parser.py,sha256=7klDdyicksQB_Xgl-3UAmBwzCYor1AIZqklIcT6dH_8,58796
+dateutil/parser/isoparser.py,sha256=EtLY7w22HWx-XJpTWxJD3XNs6LBHRCps77tCdLnYad8,13247
+dateutil/relativedelta.py,sha256=GjVxqpAVWnG67rdbf7pkoIlJvQqmju9NSfGCcqblc7U,24904
+dateutil/rrule.py,sha256=b6GVV4MpZDbBhJ5qitQKRyx8-_OKyeAbk57or2A8AYU,66556
+dateutil/tz/__init__.py,sha256=F-Mz13v6jYseklQf9Te9J6nzcLDmq47gORa61K35_FA,444
+dateutil/tz/__pycache__/__init__.cpython-38.pyc,,
+dateutil/tz/__pycache__/_common.cpython-38.pyc,,
+dateutil/tz/__pycache__/_factories.cpython-38.pyc,,
+dateutil/tz/__pycache__/tz.cpython-38.pyc,,
+dateutil/tz/__pycache__/win.cpython-38.pyc,,
+dateutil/tz/_common.py,sha256=cgzDTANsOXvEc86cYF77EsliuSab8Puwpsl5-bX3_S4,12977
+dateutil/tz/_factories.py,sha256=unb6XQNXrPMveksTCU-Ag8jmVZs4SojoPUcAHpWnrvU,2569
+dateutil/tz/tz.py,sha256=JotVjDcF16hzoouQ0kZW-5mCYu7Xj67NI-VQgnWapKE,62857
+dateutil/tz/win.py,sha256=xJszWgSwE1xPx_HJj4ZkepyukC_hNy016WMcXhbRaB8,12935
+dateutil/tzwin.py,sha256=7Ar4vdQCnnM0mKR3MUjbIKsZrBVfHgdwsJZc_mGYRew,59
+dateutil/utils.py,sha256=dKCchEw8eObi0loGTx91unBxm_7UGlU3v_FjFMdqwYM,1965
+dateutil/zoneinfo/__init__.py,sha256=KYg0pthCMjcp5MXSEiBJn3nMjZeNZav7rlJw5-tz1S4,5889
+dateutil/zoneinfo/__pycache__/__init__.cpython-38.pyc,,
+dateutil/zoneinfo/__pycache__/rebuild.cpython-38.pyc,,
+dateutil/zoneinfo/dateutil-zoneinfo.tar.gz,sha256=AkcdBx3XkEZwMSpS_TmOEfrEFHLvgxPNDVIwGVxTVaI,174394
+dateutil/zoneinfo/rebuild.py,sha256=MiqYzCIHvNbMH-LdRYLv-4T0EIA7hDKt5GLR0IRTLdI,2392
+python_dateutil-2.8.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+python_dateutil-2.8.2.dist-info/LICENSE,sha256=ugD1Gg2SgjtaHN4n2LW50jIeZ-2NqbwWPv-W1eF-V34,2889
+python_dateutil-2.8.2.dist-info/METADATA,sha256=RDHtGo7BnYRjmYxot_wlu_W3N2CyvPtvchbtyIlKKPA,8218
+python_dateutil-2.8.2.dist-info/RECORD,,
+python_dateutil-2.8.2.dist-info/WHEEL,sha256=Z-nyYpwrcSqxfdux5Mbn_DQ525iP7J2DG3JgGvOYyTQ,110
+python_dateutil-2.8.2.dist-info/top_level.txt,sha256=4tjdWkhRZvF7LA_BYe_L9gB2w_p2a-z5y6ArjaRkot8,9
+python_dateutil-2.8.2.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
diff --git a/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/WHEEL b/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/WHEEL
new file mode 100644
index 0000000..01b8fc7
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/WHEEL
@@ -0,0 +1,6 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.36.2)
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any
+
diff --git a/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/top_level.txt b/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/top_level.txt
new file mode 100644
index 0000000..6650148
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/top_level.txt
@@ -0,0 +1 @@
+dateutil
diff --git a/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/zip-safe b/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/zip-safe
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/python_dateutil-2.8.2.dist-info/zip-safe
@@ -0,0 +1 @@
+
diff --git a/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/INSTALLER b/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/INSTALLER
new file mode 100644
index 0000000..a1b589e
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/LICENSE b/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/LICENSE
new file mode 100644
index 0000000..82af695
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/LICENSE
@@ -0,0 +1,21 @@
+The MIT License
+
+Copyright (c) Val Neekman @ Neekware Inc. http://neekware.com
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/METADATA b/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/METADATA
new file mode 100644
index 0000000..928b7f6
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/METADATA
@@ -0,0 +1,240 @@
+Metadata-Version: 2.1
+Name: python-slugify
+Version: 6.1.1
+Summary: A Python slugify application that also handles Unicode
+Home-page: https://github.com/un33k/python-slugify
+Author: Val Neekman
+Author-email: info@neekware.com
+License: MIT
+Platform: UNKNOWN
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: Natural Language :: English
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*
+Description-Content-Type: text/markdown
+Requires-Dist: text-unidecode (>=1.3)
+Provides-Extra: unidecode
+Requires-Dist: Unidecode (>=1.1.1) ; extra == 'unidecode'
+
+# Python Slugify
+
+**A Python slugify application that handles unicode**.
+
+[![status-image]][status-link]
+[![version-image]][version-link]
+[![coverage-image]][coverage-link]
+
+# Overview
+
+**Best attempt** to create slugs from unicode strings while keeping it **DRY**.
+
+# Notice
+
+This module, by default installs and uses [text-unidecode](https://github.com/kmike/text-unidecode) _(GPL & Perl Artistic)_ for its decoding needs.
+
+However, there is an alternative decoding package called [Unidecode](https://github.com/avian2/unidecode) _(GPL)_. It can be installed as `python-slugify[unidecode]` for those who prefer it.
+
+### Python Versions & `Official` Support
+
+- Python `2.7` <-> python-slugify `< 5.0.0`
+- Python `3.6+` <-> python-slugify `>= 5.0.0`
+
+# How to install
+
+    easy_install python-slugify |OR| easy_install python-slugify[unidecode]
+    -- OR --
+    pip install python-slugify |OR| pip install python-slugify[unidecode]
+
+# Options
+
+```python
+def slugify(
+    text,
+    entities=True,
+    decimal=True,
+    hexadecimal=True,
+    max_length=0,
+    word_boundary=False,
+    separator='-',
+    save_order=False,
+    stopwords=(),
+    regex_pattern=None,
+    lowercase=True,
+    replacements=(),
+    allow_unicode=False
+  ):
+  """
+  Make a slug from the given text.
+  :param text (str): initial text
+  :param entities (bool): converts html entities to unicode (foo & bar -> foo-bar)
+  :param decimal (bool): converts html decimal to unicode (Ž -> Ž -> z)
+  :param hexadecimal (bool): converts html hexadecimal to unicode (Ž -> Ž -> z)
+  :param max_length (int): output string length
+  :param word_boundary (bool): truncates to end of full words (length may be shorter than max_length)
+  :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order
+  :param separator (str): separator between words
+  :param stopwords (iterable): words to discount
+  :param regex_pattern (str): regex pattern for disallowed characters
+  :param lowercase (bool): activate case sensitivity by setting it to False
+  :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
+  :param allow_unicode (bool): allow unicode characters
+  :return (str): slugify text
+  """
+```
+
+# How to use
+
+```python
+from slugify import slugify
+
+txt = "This is a test ---"
+r = slugify(txt)
+self.assertEqual(r, "this-is-a-test")
+
+txt = '影師嗎'
+r = slugify(txt)
+self.assertEqual(r, "ying-shi-ma")
+
+txt = '影師嗎'
+r = slugify(txt, allow_unicode=True)
+self.assertEqual(r, "影師嗎")
+
+txt = 'C\'est déjà l\'été.'
+r = slugify(txt)
+self.assertEqual(r, "c-est-deja-l-ete")
+
+txt = 'Nín hǎo. Wǒ shì zhōng guó rén'
+r = slugify(txt)
+self.assertEqual(r, "nin-hao-wo-shi-zhong-guo-ren")
+
+txt = 'Компьютер'
+r = slugify(txt)
+self.assertEqual(r, "kompiuter")
+
+txt = 'jaja---lol-méméméoo--a'
+r = slugify(txt, max_length=9)
+self.assertEqual(r, "jaja-lol")
+
+txt = 'jaja---lol-méméméoo--a'
+r = slugify(txt, max_length=15, word_boundary=True)
+self.assertEqual(r, "jaja-lol-a")
+
+txt = 'jaja---lol-méméméoo--a'
+r = slugify(txt, max_length=20, word_boundary=True, separator=".")
+self.assertEqual(r, "jaja.lol.mememeoo.a")
+
+txt = 'one two three four five'
+r = slugify(txt, max_length=13, word_boundary=True, save_order=True)
+self.assertEqual(r, "one-two-three")
+
+txt = 'the quick brown fox jumps over the lazy dog'
+r = slugify(txt, stopwords=['the'])
+self.assertEqual(r, 'quick-brown-fox-jumps-over-lazy-dog')
+
+txt = 'the quick brown fox jumps over the lazy dog in a hurry'
+r = slugify(txt, stopwords=['the', 'in', 'a', 'hurry'])
+self.assertEqual(r, 'quick-brown-fox-jumps-over-lazy-dog')
+
+txt = 'thIs Has a stopword Stopword'
+r = slugify(txt, stopwords=['Stopword'], lowercase=False)
+self.assertEqual(r, 'thIs-Has-a-stopword')
+
+txt = "___This is a test___"
+regex_pattern = r'[^-a-z0-9_]+'
+r = slugify(txt, regex_pattern=regex_pattern)
+self.assertEqual(r, "___this-is-a-test___")
+
+txt = "___This is a test___"
+regex_pattern = r'[^-a-z0-9_]+'
+r = slugify(txt, separator='_', regex_pattern=regex_pattern)
+self.assertNotEqual(r, "_this_is_a_test_")
+
+txt = '10 | 20 %'
+r = slugify(txt, replacements=[['|', 'or'], ['%', 'percent']])
+self.assertEqual(r, "10-or-20-percent")
+
+txt = 'ÜBER Über German Umlaut'
+r = slugify(txt, replacements=[['Ü', 'UE'], ['ü', 'ue']])
+self.assertEqual(r, "ueber-ueber-german-umlaut")
+
+txt = 'i love 🦄'
+r = slugify(txt, allow_unicode=True)
+self.assertEqual(r, "i-love")
+
+txt = 'i love 🦄'
+r = slugify(txt, allow_unicode=True, regex_pattern=r'[^🦄]+')
+self.assertEqual(r, "🦄")
+
+```
+
+For more examples, have a look at the [test.py](test.py) file.
+
+# Command Line Options
+
+With the package, a command line tool called `slugify` is also installed.
+
+It allows convenient command line access to all the features the `slugify` function supports. Call it with `-h` for help.
+
+The command can take its input directly on the command line or from STDIN (when the `--stdin` flag is passed):
+
+```
+$ echo "Taking input from STDIN" | slugify --stdin
+taking-input-from-stdin
+```
+
+```
+$ slugify taking input from the command line
+taking-input-from-the-command-line
+```
+
+Please note that when a multi-valued option such as `--stopwords` or `--replacements` is passed, you need to use `--` as separator before you start with the input:
+
+```
+$ slugify --stopwords the in a hurry -- the quick brown fox jumps over the lazy dog in a hurry
+quick-brown-fox-jumps-over-lazy-dog
+```
+
+# Running the tests
+
+To run the tests against the current environment:
+
+    python test.py
+
+# Contribution
+
+Please read the ([wiki](https://github.com/un33k/python-slugify/wiki/Python-Slugify-Wiki)) page prior to raising any PRs.
+
+# License
+
+Released under a ([MIT](LICENSE)) license.
+
+# Version
+
+X.Y.Z Version
+
+    `MAJOR` version -- when you make incompatible API changes,
+    `MINOR` version -- when you add functionality in a backwards-compatible manner, and
+    `PATCH` version -- when you make backwards-compatible bug fixes.
+
+[status-image]: https://github.com/un33k/python-slugify/actions/workflows/ci.yml/badge.svg
+[status-link]: https://github.com/un33k/python-slugify/actions/workflows/ci.yml
+[version-image]: https://img.shields.io/pypi/v/python-slugify.svg
+[version-link]: https://pypi.python.org/pypi/python-slugify
+[coverage-image]: https://coveralls.io/repos/un33k/python-slugify/badge.svg
+[coverage-link]: https://coveralls.io/r/un33k/python-slugify
+[download-image]: https://img.shields.io/pypi/dm/python-slugify.svg
+[download-link]: https://pypi.python.org/pypi/python-slugify
+
+# Sponsors
+
+[Neekware Inc.](http://neekware.com)
+
+
diff --git a/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/RECORD b/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/RECORD
new file mode 100644
index 0000000..6911638
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/RECORD
@@ -0,0 +1,16 @@
+python_slugify-6.1.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+python_slugify-6.1.1.dist-info/LICENSE,sha256=MLpNxpqfTc4TLdcDk3x6k7Vz4lJGBNLV-SxQZlFMDU8,1103
+python_slugify-6.1.1.dist-info/METADATA,sha256=Bo49izKtvVIC-SkOVthrxTjvBPHTmUs7RQB4Iy1-0SI,7647
+python_slugify-6.1.1.dist-info/RECORD,,
+python_slugify-6.1.1.dist-info/WHEEL,sha256=Z-nyYpwrcSqxfdux5Mbn_DQ525iP7J2DG3JgGvOYyTQ,110
+python_slugify-6.1.1.dist-info/top_level.txt,sha256=D7zuR7zxISqlCxArlOOOuLsWObz1_3jgosq5XhlSpew,8
+slugify/__init__.py,sha256=d5jT03a0fysR2YPP2bRyA_B7t-4aA0oUvWC14ETzW8Y,46
+slugify/__main__.py,sha256=3EVQris1UpnWMgvjeVLDvzRXGBqkNkdpzFPmez5syuU,3866
+slugify/__pycache__/__init__.cpython-38.pyc,,
+slugify/__pycache__/__main__.cpython-38.pyc,,
+slugify/__pycache__/__version__.cpython-38.pyc,,
+slugify/__pycache__/slugify.cpython-38.pyc,,
+slugify/__pycache__/special.cpython-38.pyc,,
+slugify/__version__.py,sha256=2Rp7wcYexYmBM50y4S4OYEf_m7PpjH5j01lAXvWCrSQ,325
+slugify/slugify.py,sha256=8OTtq3vluIeB_rrwAbz-RKSGUhrzbZYOdmUv-TloRzM,5795
+slugify/special.py,sha256=uV3YMYay1HTaP3nvyzaiV4FqGazjj8HmDHM1fsPQ3oo,1167
diff --git a/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/WHEEL b/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/WHEEL
new file mode 100644
index 0000000..01b8fc7
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/WHEEL
@@ -0,0 +1,6 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.36.2)
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any
+
diff --git a/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/top_level.txt b/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/top_level.txt
new file mode 100644
index 0000000..f4843f7
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/python_slugify-6.1.1.dist-info/top_level.txt
@@ -0,0 +1 @@
+slugify
diff --git a/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/INSTALLER b/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/INSTALLER
new file mode 100644
index 0000000..a1b589e
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/LICENSE.txt b/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/LICENSE.txt
new file mode 100644
index 0000000..5f1c112
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/LICENSE.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2003-2019 Stuart Bishop 
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/METADATA b/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/METADATA
new file mode 100644
index 0000000..625dd74
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/METADATA
@@ -0,0 +1,635 @@
+Metadata-Version: 2.1
+Name: pytz
+Version: 2022.1
+Summary: World timezone definitions, modern and historical
+Home-page: http://pythonhosted.org/pytz
+Author: Stuart Bishop
+Author-email: stuart@stuartbishop.net
+Maintainer: Stuart Bishop
+Maintainer-email: stuart@stuartbishop.net
+License: MIT
+Download-URL: https://pypi.org/project/pytz/
+Keywords: timezone,tzinfo,datetime,olson,time
+Platform: Independent
+Classifier: Development Status :: 6 - Mature
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Natural Language :: English
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.4
+Classifier: Programming Language :: Python :: 2.5
+Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.1
+Classifier: Programming Language :: Python :: 3.2
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+
+pytz - World Timezone Definitions for Python
+============================================
+
+:Author: Stuart Bishop 
+
+Introduction
+~~~~~~~~~~~~
+
+pytz brings the Olson tz database into Python. This library allows
+accurate and cross platform timezone calculations using Python 2.4
+or higher. It also solves the issue of ambiguous times at the end
+of daylight saving time, which you can read more about in the Python
+Library Reference (``datetime.tzinfo``).
+
+Almost all of the Olson timezones are supported.
+
+.. note::
+
+    This library differs from the documented Python API for
+    tzinfo implementations; if you want to create local wallclock
+    times you need to use the ``localize()`` method documented in this
+    document. In addition, if you perform date arithmetic on local
+    times that cross DST boundaries, the result may be in an incorrect
+    timezone (ie. subtract 1 minute from 2002-10-27 1:00 EST and you get
+    2002-10-27 0:59 EST instead of the correct 2002-10-27 1:59 EDT). A
+    ``normalize()`` method is provided to correct this. Unfortunately these
+    issues cannot be resolved without modifying the Python datetime
+    implementation (see PEP-431).
+
+
+Installation
+~~~~~~~~~~~~
+
+This package can either be installed using ``pip`` or from a tarball using the
+standard Python distutils.
+
+If you are installing using ``pip``, you don't need to download anything as the
+latest version will be downloaded for you from PyPI::
+
+    pip install pytz
+
+If you are installing from a tarball, run the following command as an
+administrative user::
+
+    python setup.py install
+
+
+pytz for Enterprise
+~~~~~~~~~~~~~~~~~~~
+
+Available as part of the Tidelift Subscription.
+
+The maintainers of pytz and thousands of other packages are working with Tidelift to deliver commercial support and maintenance for the open source dependencies you use to build your applications. Save time, reduce risk, and improve code health, while paying the maintainers of the exact dependencies you use. `Learn more. `_.
+
+
+Example & Usage
+~~~~~~~~~~~~~~~
+
+Localized times and date arithmetic
+-----------------------------------
+
+>>> from datetime import datetime, timedelta
+>>> from pytz import timezone
+>>> import pytz
+>>> utc = pytz.utc
+>>> utc.zone
+'UTC'
+>>> eastern = timezone('US/Eastern')
+>>> eastern.zone
+'US/Eastern'
+>>> amsterdam = timezone('Europe/Amsterdam')
+>>> fmt = '%Y-%m-%d %H:%M:%S %Z%z'
+
+This library only supports two ways of building a localized time. The
+first is to use the ``localize()`` method provided by the pytz library.
+This is used to localize a naive datetime (datetime with no timezone
+information):
+
+>>> loc_dt = eastern.localize(datetime(2002, 10, 27, 6, 0, 0))
+>>> print(loc_dt.strftime(fmt))
+2002-10-27 06:00:00 EST-0500
+
+The second way of building a localized time is by converting an existing
+localized time using the standard ``astimezone()`` method:
+
+>>> ams_dt = loc_dt.astimezone(amsterdam)
+>>> ams_dt.strftime(fmt)
+'2002-10-27 12:00:00 CET+0100'
+
+Unfortunately using the tzinfo argument of the standard datetime
+constructors ''does not work'' with pytz for many timezones.
+
+>>> datetime(2002, 10, 27, 12, 0, 0, tzinfo=amsterdam).strftime(fmt)  # /!\ Does not work this way!
+'2002-10-27 12:00:00 LMT+0020'
+
+It is safe for timezones without daylight saving transitions though, such
+as UTC:
+
+>>> datetime(2002, 10, 27, 12, 0, 0, tzinfo=pytz.utc).strftime(fmt)  # /!\ Not recommended except for UTC
+'2002-10-27 12:00:00 UTC+0000'
+
+The preferred way of dealing with times is to always work in UTC,
+converting to localtime only when generating output to be read
+by humans.
+
+>>> utc_dt = datetime(2002, 10, 27, 6, 0, 0, tzinfo=utc)
+>>> loc_dt = utc_dt.astimezone(eastern)
+>>> loc_dt.strftime(fmt)
+'2002-10-27 01:00:00 EST-0500'
+
+This library also allows you to do date arithmetic using local
+times, although it is more complicated than working in UTC as you
+need to use the ``normalize()`` method to handle daylight saving time
+and other timezone transitions. In this example, ``loc_dt`` is set
+to the instant when daylight saving time ends in the US/Eastern
+timezone.
+
+>>> before = loc_dt - timedelta(minutes=10)
+>>> before.strftime(fmt)
+'2002-10-27 00:50:00 EST-0500'
+>>> eastern.normalize(before).strftime(fmt)
+'2002-10-27 01:50:00 EDT-0400'
+>>> after = eastern.normalize(before + timedelta(minutes=20))
+>>> after.strftime(fmt)
+'2002-10-27 01:10:00 EST-0500'
+
+Creating local times is also tricky, and the reason why working with
+local times is not recommended. Unfortunately, you cannot just pass
+a ``tzinfo`` argument when constructing a datetime (see the next
+section for more details)
+
+>>> dt = datetime(2002, 10, 27, 1, 30, 0)
+>>> dt1 = eastern.localize(dt, is_dst=True)
+>>> dt1.strftime(fmt)
+'2002-10-27 01:30:00 EDT-0400'
+>>> dt2 = eastern.localize(dt, is_dst=False)
+>>> dt2.strftime(fmt)
+'2002-10-27 01:30:00 EST-0500'
+
+Converting between timezones is more easily done, using the
+standard astimezone method.
+
+>>> utc_dt = utc.localize(datetime.utcfromtimestamp(1143408899))
+>>> utc_dt.strftime(fmt)
+'2006-03-26 21:34:59 UTC+0000'
+>>> au_tz = timezone('Australia/Sydney')
+>>> au_dt = utc_dt.astimezone(au_tz)
+>>> au_dt.strftime(fmt)
+'2006-03-27 08:34:59 AEDT+1100'
+>>> utc_dt2 = au_dt.astimezone(utc)
+>>> utc_dt2.strftime(fmt)
+'2006-03-26 21:34:59 UTC+0000'
+>>> utc_dt == utc_dt2
+True
+
+You can take shortcuts when dealing with the UTC side of timezone
+conversions. ``normalize()`` and ``localize()`` are not really
+necessary when there are no daylight saving time transitions to
+deal with.
+
+>>> utc_dt = datetime.utcfromtimestamp(1143408899).replace(tzinfo=utc)
+>>> utc_dt.strftime(fmt)
+'2006-03-26 21:34:59 UTC+0000'
+>>> au_tz = timezone('Australia/Sydney')
+>>> au_dt = au_tz.normalize(utc_dt.astimezone(au_tz))
+>>> au_dt.strftime(fmt)
+'2006-03-27 08:34:59 AEDT+1100'
+>>> utc_dt2 = au_dt.astimezone(utc)
+>>> utc_dt2.strftime(fmt)
+'2006-03-26 21:34:59 UTC+0000'
+
+
+``tzinfo`` API
+--------------
+
+The ``tzinfo`` instances returned by the ``timezone()`` function have
+been extended to cope with ambiguous times by adding an ``is_dst``
+parameter to the ``utcoffset()``, ``dst()`` && ``tzname()`` methods.
+
+>>> tz = timezone('America/St_Johns')
+
+>>> normal = datetime(2009, 9, 1)
+>>> ambiguous = datetime(2009, 10, 31, 23, 30)
+
+The ``is_dst`` parameter is ignored for most timestamps. It is only used
+during DST transition ambiguous periods to resolve that ambiguity.
+
+>>> print(tz.utcoffset(normal, is_dst=True))
+-1 day, 21:30:00
+>>> print(tz.dst(normal, is_dst=True))
+1:00:00
+>>> tz.tzname(normal, is_dst=True)
+'NDT'
+
+>>> print(tz.utcoffset(ambiguous, is_dst=True))
+-1 day, 21:30:00
+>>> print(tz.dst(ambiguous, is_dst=True))
+1:00:00
+>>> tz.tzname(ambiguous, is_dst=True)
+'NDT'
+
+>>> print(tz.utcoffset(normal, is_dst=False))
+-1 day, 21:30:00
+>>> tz.dst(normal, is_dst=False).seconds
+3600
+>>> tz.tzname(normal, is_dst=False)
+'NDT'
+
+>>> print(tz.utcoffset(ambiguous, is_dst=False))
+-1 day, 20:30:00
+>>> tz.dst(ambiguous, is_dst=False)
+datetime.timedelta(0)
+>>> tz.tzname(ambiguous, is_dst=False)
+'NST'
+
+If ``is_dst`` is not specified, ambiguous timestamps will raise
+an ``pytz.exceptions.AmbiguousTimeError`` exception.
+
+>>> print(tz.utcoffset(normal))
+-1 day, 21:30:00
+>>> print(tz.dst(normal))
+1:00:00
+>>> tz.tzname(normal)
+'NDT'
+
+>>> import pytz.exceptions
+>>> try:
+...     tz.utcoffset(ambiguous)
+... except pytz.exceptions.AmbiguousTimeError:
+...     print('pytz.exceptions.AmbiguousTimeError: %s' % ambiguous)
+pytz.exceptions.AmbiguousTimeError: 2009-10-31 23:30:00
+>>> try:
+...     tz.dst(ambiguous)
+... except pytz.exceptions.AmbiguousTimeError:
+...     print('pytz.exceptions.AmbiguousTimeError: %s' % ambiguous)
+pytz.exceptions.AmbiguousTimeError: 2009-10-31 23:30:00
+>>> try:
+...     tz.tzname(ambiguous)
+... except pytz.exceptions.AmbiguousTimeError:
+...     print('pytz.exceptions.AmbiguousTimeError: %s' % ambiguous)
+pytz.exceptions.AmbiguousTimeError: 2009-10-31 23:30:00
+
+
+Problems with Localtime
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The major problem we have to deal with is that certain datetimes
+may occur twice in a year. For example, in the US/Eastern timezone
+on the last Sunday morning in October, the following sequence
+happens:
+
+    - 01:00 EDT occurs
+    - 1 hour later, instead of 2:00am the clock is turned back 1 hour
+      and 01:00 happens again (this time 01:00 EST)
+
+In fact, every instant between 01:00 and 02:00 occurs twice. This means
+that if you try and create a time in the 'US/Eastern' timezone
+the standard datetime syntax, there is no way to specify if you meant
+before of after the end-of-daylight-saving-time transition. Using the
+pytz custom syntax, the best you can do is make an educated guess:
+
+>>> loc_dt = eastern.localize(datetime(2002, 10, 27, 1, 30, 00))
+>>> loc_dt.strftime(fmt)
+'2002-10-27 01:30:00 EST-0500'
+
+As you can see, the system has chosen one for you and there is a 50%
+chance of it being out by one hour. For some applications, this does
+not matter. However, if you are trying to schedule meetings with people
+in different timezones or analyze log files it is not acceptable.
+
+The best and simplest solution is to stick with using UTC.  The pytz
+package encourages using UTC for internal timezone representation by
+including a special UTC implementation based on the standard Python
+reference implementation in the Python documentation.
+
+The UTC timezone unpickles to be the same instance, and pickles to a
+smaller size than other pytz tzinfo instances.  The UTC implementation
+can be obtained as pytz.utc, pytz.UTC, or pytz.timezone('UTC').
+
+>>> import pickle, pytz
+>>> dt = datetime(2005, 3, 1, 14, 13, 21, tzinfo=utc)
+>>> naive = dt.replace(tzinfo=None)
+>>> p = pickle.dumps(dt, 1)
+>>> naive_p = pickle.dumps(naive, 1)
+>>> len(p) - len(naive_p)
+17
+>>> new = pickle.loads(p)
+>>> new == dt
+True
+>>> new is dt
+False
+>>> new.tzinfo is dt.tzinfo
+True
+>>> pytz.utc is pytz.UTC is pytz.timezone('UTC')
+True
+
+Note that some other timezones are commonly thought of as the same (GMT,
+Greenwich, Universal, etc.). The definition of UTC is distinct from these
+other timezones, and they are not equivalent. For this reason, they will
+not compare the same in Python.
+
+>>> utc == pytz.timezone('GMT')
+False
+
+See the section `What is UTC`_, below.
+
+If you insist on working with local times, this library provides a
+facility for constructing them unambiguously:
+
+>>> loc_dt = datetime(2002, 10, 27, 1, 30, 00)
+>>> est_dt = eastern.localize(loc_dt, is_dst=True)
+>>> edt_dt = eastern.localize(loc_dt, is_dst=False)
+>>> print(est_dt.strftime(fmt) + ' / ' + edt_dt.strftime(fmt))
+2002-10-27 01:30:00 EDT-0400 / 2002-10-27 01:30:00 EST-0500
+
+If you pass None as the is_dst flag to localize(), pytz will refuse to
+guess and raise exceptions if you try to build ambiguous or non-existent
+times.
+
+For example, 1:30am on 27th Oct 2002 happened twice in the US/Eastern
+timezone when the clocks where put back at the end of Daylight Saving
+Time:
+
+>>> dt = datetime(2002, 10, 27, 1, 30, 00)
+>>> try:
+...     eastern.localize(dt, is_dst=None)
+... except pytz.exceptions.AmbiguousTimeError:
+...     print('pytz.exceptions.AmbiguousTimeError: %s' % dt)
+pytz.exceptions.AmbiguousTimeError: 2002-10-27 01:30:00
+
+Similarly, 2:30am on 7th April 2002 never happened at all in the
+US/Eastern timezone, as the clocks where put forward at 2:00am skipping
+the entire hour:
+
+>>> dt = datetime(2002, 4, 7, 2, 30, 00)
+>>> try:
+...     eastern.localize(dt, is_dst=None)
+... except pytz.exceptions.NonExistentTimeError:
+...     print('pytz.exceptions.NonExistentTimeError: %s' % dt)
+pytz.exceptions.NonExistentTimeError: 2002-04-07 02:30:00
+
+Both of these exceptions share a common base class to make error handling
+easier:
+
+>>> isinstance(pytz.AmbiguousTimeError(), pytz.InvalidTimeError)
+True
+>>> isinstance(pytz.NonExistentTimeError(), pytz.InvalidTimeError)
+True
+
+
+A special case is where countries change their timezone definitions
+with no daylight savings time switch. For example, in 1915 Warsaw
+switched from Warsaw time to Central European time with no daylight savings
+transition. So at the stroke of midnight on August 5th 1915 the clocks
+were wound back 24 minutes creating an ambiguous time period that cannot
+be specified without referring to the timezone abbreviation or the
+actual UTC offset. In this case midnight happened twice, neither time
+during a daylight saving time period. pytz handles this transition by
+treating the ambiguous period before the switch as daylight savings
+time, and the ambiguous period after as standard time.
+
+
+>>> warsaw = pytz.timezone('Europe/Warsaw')
+>>> amb_dt1 = warsaw.localize(datetime(1915, 8, 4, 23, 59, 59), is_dst=True)
+>>> amb_dt1.strftime(fmt)
+'1915-08-04 23:59:59 WMT+0124'
+>>> amb_dt2 = warsaw.localize(datetime(1915, 8, 4, 23, 59, 59), is_dst=False)
+>>> amb_dt2.strftime(fmt)
+'1915-08-04 23:59:59 CET+0100'
+>>> switch_dt = warsaw.localize(datetime(1915, 8, 5, 00, 00, 00), is_dst=False)
+>>> switch_dt.strftime(fmt)
+'1915-08-05 00:00:00 CET+0100'
+>>> str(switch_dt - amb_dt1)
+'0:24:01'
+>>> str(switch_dt - amb_dt2)
+'0:00:01'
+
+The best way of creating a time during an ambiguous time period is
+by converting from another timezone such as UTC:
+
+>>> utc_dt = datetime(1915, 8, 4, 22, 36, tzinfo=pytz.utc)
+>>> utc_dt.astimezone(warsaw).strftime(fmt)
+'1915-08-04 23:36:00 CET+0100'
+
+The standard Python way of handling all these ambiguities is not to
+handle them, such as demonstrated in this example using the US/Eastern
+timezone definition from the Python documentation (Note that this
+implementation only works for dates between 1987 and 2006 - it is
+included for tests only!):
+
+>>> from pytz.reference import Eastern # pytz.reference only for tests
+>>> dt = datetime(2002, 10, 27, 0, 30, tzinfo=Eastern)
+>>> str(dt)
+'2002-10-27 00:30:00-04:00'
+>>> str(dt + timedelta(hours=1))
+'2002-10-27 01:30:00-05:00'
+>>> str(dt + timedelta(hours=2))
+'2002-10-27 02:30:00-05:00'
+>>> str(dt + timedelta(hours=3))
+'2002-10-27 03:30:00-05:00'
+
+Notice the first two results? At first glance you might think they are
+correct, but taking the UTC offset into account you find that they are
+actually two hours appart instead of the 1 hour we asked for.
+
+>>> from pytz.reference import UTC # pytz.reference only for tests
+>>> str(dt.astimezone(UTC))
+'2002-10-27 04:30:00+00:00'
+>>> str((dt + timedelta(hours=1)).astimezone(UTC))
+'2002-10-27 06:30:00+00:00'
+
+
+Country Information
+~~~~~~~~~~~~~~~~~~~
+
+A mechanism is provided to access the timezones commonly in use
+for a particular country, looked up using the ISO 3166 country code.
+It returns a list of strings that can be used to retrieve the relevant
+tzinfo instance using ``pytz.timezone()``:
+
+>>> print(' '.join(pytz.country_timezones['nz']))
+Pacific/Auckland Pacific/Chatham
+
+The Olson database comes with a ISO 3166 country code to English country
+name mapping that pytz exposes as a dictionary:
+
+>>> print(pytz.country_names['nz'])
+New Zealand
+
+
+What is UTC
+~~~~~~~~~~~
+
+'UTC' is `Coordinated Universal Time`_. It is a successor to, but distinct
+from, Greenwich Mean Time (GMT) and the various definitions of Universal
+Time. UTC is now the worldwide standard for regulating clocks and time
+measurement.
+
+All other timezones are defined relative to UTC, and include offsets like
+UTC+0800 - hours to add or subtract from UTC to derive the local time. No
+daylight saving time occurs in UTC, making it a useful timezone to perform
+date arithmetic without worrying about the confusion and ambiguities caused
+by daylight saving time transitions, your country changing its timezone, or
+mobile computers that roam through multiple timezones.
+
+..  _Coordinated Universal Time: https://en.wikipedia.org/wiki/Coordinated_Universal_Time
+
+
+Helpers
+~~~~~~~
+
+There are two lists of timezones provided.
+
+``all_timezones`` is the exhaustive list of the timezone names that can
+be used.
+
+>>> from pytz import all_timezones
+>>> len(all_timezones) >= 500
+True
+>>> 'Etc/Greenwich' in all_timezones
+True
+
+``common_timezones`` is a list of useful, current timezones. It doesn't
+contain deprecated zones or historical zones, except for a few I've
+deemed in common usage, such as US/Eastern (open a bug report if you
+think other timezones are deserving of being included here). It is also
+a sequence of strings.
+
+>>> from pytz import common_timezones
+>>> len(common_timezones) < len(all_timezones)
+True
+>>> 'Etc/Greenwich' in common_timezones
+False
+>>> 'Australia/Melbourne' in common_timezones
+True
+>>> 'US/Eastern' in common_timezones
+True
+>>> 'Canada/Eastern' in common_timezones
+True
+>>> 'Australia/Yancowinna' in all_timezones
+True
+>>> 'Australia/Yancowinna' in common_timezones
+False
+
+Both ``common_timezones`` and ``all_timezones`` are alphabetically
+sorted:
+
+>>> common_timezones_dupe = common_timezones[:]
+>>> common_timezones_dupe.sort()
+>>> common_timezones == common_timezones_dupe
+True
+>>> all_timezones_dupe = all_timezones[:]
+>>> all_timezones_dupe.sort()
+>>> all_timezones == all_timezones_dupe
+True
+
+``all_timezones`` and ``common_timezones`` are also available as sets.
+
+>>> from pytz import all_timezones_set, common_timezones_set
+>>> 'US/Eastern' in all_timezones_set
+True
+>>> 'US/Eastern' in common_timezones_set
+True
+>>> 'Australia/Victoria' in common_timezones_set
+False
+
+You can also retrieve lists of timezones used by particular countries
+using the ``country_timezones()`` function. It requires an ISO-3166
+two letter country code.
+
+>>> from pytz import country_timezones
+>>> print(' '.join(country_timezones('ch')))
+Europe/Zurich
+>>> print(' '.join(country_timezones('CH')))
+Europe/Zurich
+
+
+Internationalization - i18n/l10n
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Pytz is an interface to the IANA database, which uses ASCII names. The `Unicode  Consortium's Unicode Locales (CLDR) `_
+project provides translations. Thomas Khyn's
+`l18n `_ package can be used to access
+these translations from Python.
+
+
+License
+~~~~~~~
+
+MIT license.
+
+This code is also available as part of Zope 3 under the Zope Public
+License,  Version 2.1 (ZPL).
+
+I'm happy to relicense this code if necessary for inclusion in other
+open source projects.
+
+
+Latest Versions
+~~~~~~~~~~~~~~~
+
+This package will be updated after releases of the Olson timezone
+database.  The latest version can be downloaded from the `Python Package
+Index `_.  The code that is used
+to generate this distribution is hosted on launchpad.net and available
+using git::
+
+    git clone https://git.launchpad.net/pytz
+
+A mirror on github is also available at https://github.com/stub42/pytz
+
+Announcements of new releases are made on
+`Launchpad `_, and the
+`Atom feed `_
+hosted there.
+
+
+Bugs, Feature Requests & Patches
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Bugs can be reported using `Launchpad Bugs `_.
+
+
+Security Issues
+~~~~~~~~~~~~~~~
+
+Reports about security issues can be made via `Tidelift `_.
+
+
+Issues & Limitations
+~~~~~~~~~~~~~~~~~~~~
+
+- Offsets from UTC are rounded to the nearest whole minute, so timezones
+  such as Europe/Amsterdam pre 1937 will be up to 30 seconds out. This
+  is a limitation of the Python datetime library.
+
+- If you think a timezone definition is incorrect, I probably can't fix
+  it. pytz is a direct translation of the Olson timezone database, and
+  changes to the timezone definitions need to be made to this source.
+  If you find errors they should be reported to the time zone mailing
+  list, linked from http://www.iana.org/time-zones.
+
+
+Further Reading
+~~~~~~~~~~~~~~~
+
+More info than you want to know about timezones:
+http://www.twinsun.com/tz/tz-link.htm
+
+
+Contact
+~~~~~~~
+
+Stuart Bishop 
+
+
+
+
diff --git a/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/RECORD b/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/RECORD
new file mode 100644
index 0000000..0329329
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/RECORD
@@ -0,0 +1,619 @@
+pytz-2022.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+pytz-2022.1.dist-info/LICENSE.txt,sha256=vosaN-vibFkqkPbA6zMQOn84POL010mMCvmlJpkKB7g,1088
+pytz-2022.1.dist-info/METADATA,sha256=6CH3GCROrYmB2ceZyH8e2UQIKzUnwYkYqMsXLDiz6BE,21448
+pytz-2022.1.dist-info/RECORD,,
+pytz-2022.1.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
+pytz-2022.1.dist-info/top_level.txt,sha256=6xRYlt934v1yHb1JIrXgHyGxn3cqACvd-yE8ski_kcc,5
+pytz-2022.1.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+pytz/__init__.py,sha256=m58Mf4zJPVDAqRw2-ClaCPvcIRtSerSsTPbwru_mG8Q,35163
+pytz/__pycache__/__init__.cpython-38.pyc,,
+pytz/__pycache__/exceptions.cpython-38.pyc,,
+pytz/__pycache__/lazy.cpython-38.pyc,,
+pytz/__pycache__/reference.cpython-38.pyc,,
+pytz/__pycache__/tzfile.cpython-38.pyc,,
+pytz/__pycache__/tzinfo.cpython-38.pyc,,
+pytz/exceptions.py,sha256=434ZcuLlpLQY9mWoGq7zJMV1TyiYvVgpKBU1qZkbDjM,1571
+pytz/lazy.py,sha256=toeR5uDWKBj6ezsUZ4elNP6CEMtK7CO2jS9A30nsFbo,5404
+pytz/reference.py,sha256=zUtCki7JFEmrzrjNsfMD7YL0lWDxynKc1Ubo4iXSs74,3778
+pytz/tzfile.py,sha256=K2y7pZs4vydpZVftrfAA_-hgw17y1Szc7z_QCse6udU,4723
+pytz/tzinfo.py,sha256=-5UjW-yqHbtO5NtSaWope7EbSdf2oTES26Kdlxjqdk0,19272
+pytz/zoneinfo/Africa/Abidjan,sha256=0u-sTl8j2IyV1ywdtCgHFw9S9D3ZiiBa9akqkbny2Zc,148
+pytz/zoneinfo/Africa/Accra,sha256=0u-sTl8j2IyV1ywdtCgHFw9S9D3ZiiBa9akqkbny2Zc,148
+pytz/zoneinfo/Africa/Addis_Ababa,sha256=yJsuJTqJJqbOz37_NOS_zbf-JNr_IthHGMMN7sDqSWg,265
+pytz/zoneinfo/Africa/Algiers,sha256=vaFpjNVCwObnbfu82rOQzdJvN6nVgmpXpQ1aqzfzsqY,735
+pytz/zoneinfo/Africa/Asmara,sha256=yJsuJTqJJqbOz37_NOS_zbf-JNr_IthHGMMN7sDqSWg,265
+pytz/zoneinfo/Africa/Asmera,sha256=yJsuJTqJJqbOz37_NOS_zbf-JNr_IthHGMMN7sDqSWg,265
+pytz/zoneinfo/Africa/Bamako,sha256=0u-sTl8j2IyV1ywdtCgHFw9S9D3ZiiBa9akqkbny2Zc,148
+pytz/zoneinfo/Africa/Bangui,sha256=z_6wKCzL1_ug5JP_hneh5abdUZeIUELkN_ladz-ESEY,235
+pytz/zoneinfo/Africa/Banjul,sha256=0u-sTl8j2IyV1ywdtCgHFw9S9D3ZiiBa9akqkbny2Zc,148
+pytz/zoneinfo/Africa/Bissau,sha256=IjuxDP6EZiDHFvl_bHS6NN7sdRxLKXllooBC829poak,194
+pytz/zoneinfo/Africa/Blantyre,sha256=k_GelVHViGiuWCB1LSyTpIYSTDZEY9yclInQRY-LxoI,149
+pytz/zoneinfo/Africa/Brazzaville,sha256=z_6wKCzL1_ug5JP_hneh5abdUZeIUELkN_ladz-ESEY,235
+pytz/zoneinfo/Africa/Bujumbura,sha256=k_GelVHViGiuWCB1LSyTpIYSTDZEY9yclInQRY-LxoI,149
+pytz/zoneinfo/Africa/Cairo,sha256=L6zLQLnQtLkEELOGfm6USaHY33qAEPgGV822-iU1vxc,1955
+pytz/zoneinfo/Africa/Casablanca,sha256=qzlDyFvkLZWy8Bydogdx_cxZCkWzRwEEsuVWstJI_-s,2429
+pytz/zoneinfo/Africa/Ceuta,sha256=jp7xqONgZ3NPnElHzJEVusHKM9rxDK1nxJm4-i7Ln8o,2036
+pytz/zoneinfo/Africa/Conakry,sha256=0u-sTl8j2IyV1ywdtCgHFw9S9D3ZiiBa9akqkbny2Zc,148
+pytz/zoneinfo/Africa/Dakar,sha256=0u-sTl8j2IyV1ywdtCgHFw9S9D3ZiiBa9akqkbny2Zc,148
+pytz/zoneinfo/Africa/Dar_es_Salaam,sha256=yJsuJTqJJqbOz37_NOS_zbf-JNr_IthHGMMN7sDqSWg,265
+pytz/zoneinfo/Africa/Djibouti,sha256=yJsuJTqJJqbOz37_NOS_zbf-JNr_IthHGMMN7sDqSWg,265
+pytz/zoneinfo/Africa/Douala,sha256=z_6wKCzL1_ug5JP_hneh5abdUZeIUELkN_ladz-ESEY,235
+pytz/zoneinfo/Africa/El_Aaiun,sha256=Ja0t5t3QHHrvY0EGgxadypAabj4GjMLuQTTbOAur5M0,2295
+pytz/zoneinfo/Africa/Freetown,sha256=0u-sTl8j2IyV1ywdtCgHFw9S9D3ZiiBa9akqkbny2Zc,148
+pytz/zoneinfo/Africa/Gaborone,sha256=k_GelVHViGiuWCB1LSyTpIYSTDZEY9yclInQRY-LxoI,149
+pytz/zoneinfo/Africa/Harare,sha256=k_GelVHViGiuWCB1LSyTpIYSTDZEY9yclInQRY-LxoI,149
+pytz/zoneinfo/Africa/Johannesburg,sha256=bBvMdSZo53WFowiuhUO9C8zY6BOGViboCb-U8_49l34,246
+pytz/zoneinfo/Africa/Juba,sha256=UVnIqEPJwHLTMC-r5qZQHNv9opoYVsKdq-ta_5XUw_Q,679
+pytz/zoneinfo/Africa/Kampala,sha256=yJsuJTqJJqbOz37_NOS_zbf-JNr_IthHGMMN7sDqSWg,265
+pytz/zoneinfo/Africa/Khartoum,sha256=MYWDoJ3AcCItZdApoeOgtWWDDxquwTon5v5TOGP70-o,679
+pytz/zoneinfo/Africa/Kigali,sha256=k_GelVHViGiuWCB1LSyTpIYSTDZEY9yclInQRY-LxoI,149
+pytz/zoneinfo/Africa/Kinshasa,sha256=z_6wKCzL1_ug5JP_hneh5abdUZeIUELkN_ladz-ESEY,235
+pytz/zoneinfo/Africa/Lagos,sha256=z_6wKCzL1_ug5JP_hneh5abdUZeIUELkN_ladz-ESEY,235
+pytz/zoneinfo/Africa/Libreville,sha256=z_6wKCzL1_ug5JP_hneh5abdUZeIUELkN_ladz-ESEY,235
+pytz/zoneinfo/Africa/Lome,sha256=0u-sTl8j2IyV1ywdtCgHFw9S9D3ZiiBa9akqkbny2Zc,148
+pytz/zoneinfo/Africa/Luanda,sha256=z_6wKCzL1_ug5JP_hneh5abdUZeIUELkN_ladz-ESEY,235
+pytz/zoneinfo/Africa/Lubumbashi,sha256=k_GelVHViGiuWCB1LSyTpIYSTDZEY9yclInQRY-LxoI,149
+pytz/zoneinfo/Africa/Lusaka,sha256=k_GelVHViGiuWCB1LSyTpIYSTDZEY9yclInQRY-LxoI,149
+pytz/zoneinfo/Africa/Malabo,sha256=z_6wKCzL1_ug5JP_hneh5abdUZeIUELkN_ladz-ESEY,235
+pytz/zoneinfo/Africa/Maputo,sha256=k_GelVHViGiuWCB1LSyTpIYSTDZEY9yclInQRY-LxoI,149
+pytz/zoneinfo/Africa/Maseru,sha256=bBvMdSZo53WFowiuhUO9C8zY6BOGViboCb-U8_49l34,246
+pytz/zoneinfo/Africa/Mbabane,sha256=bBvMdSZo53WFowiuhUO9C8zY6BOGViboCb-U8_49l34,246
+pytz/zoneinfo/Africa/Mogadishu,sha256=yJsuJTqJJqbOz37_NOS_zbf-JNr_IthHGMMN7sDqSWg,265
+pytz/zoneinfo/Africa/Monrovia,sha256=-VsJW5cU4KdvfgYaQVv4lcuzmaKIVFMd42nO6RXOBdU,208
+pytz/zoneinfo/Africa/Nairobi,sha256=yJsuJTqJJqbOz37_NOS_zbf-JNr_IthHGMMN7sDqSWg,265
+pytz/zoneinfo/Africa/Ndjamena,sha256=8T3A0Zm9Gj0Bvm6rd88t3GAXKiKdGUfHlIqYlkYI0KM,199
+pytz/zoneinfo/Africa/Niamey,sha256=z_6wKCzL1_ug5JP_hneh5abdUZeIUELkN_ladz-ESEY,235
+pytz/zoneinfo/Africa/Nouakchott,sha256=0u-sTl8j2IyV1ywdtCgHFw9S9D3ZiiBa9akqkbny2Zc,148
+pytz/zoneinfo/Africa/Ouagadougou,sha256=0u-sTl8j2IyV1ywdtCgHFw9S9D3ZiiBa9akqkbny2Zc,148
+pytz/zoneinfo/Africa/Porto-Novo,sha256=z_6wKCzL1_ug5JP_hneh5abdUZeIUELkN_ladz-ESEY,235
+pytz/zoneinfo/Africa/Sao_Tome,sha256=MdjxpQ268uzJ7Zx1ZroFUtRUwqsJ6F_yY3AYV9FXw1I,254
+pytz/zoneinfo/Africa/Timbuktu,sha256=0u-sTl8j2IyV1ywdtCgHFw9S9D3ZiiBa9akqkbny2Zc,148
+pytz/zoneinfo/Africa/Tripoli,sha256=W1dptGD70T7ppGoo0fczFQeDiIp0nultLNPV66MwB2c,625
+pytz/zoneinfo/Africa/Tunis,sha256=OFVMEM4eYT2Ez0beuhEUCTSIpcFldWxsV2uEoTZIUNI,689
+pytz/zoneinfo/Africa/Windhoek,sha256=xuhvudrMH4alnVmouSTQI8YL8F_HbgsF2EQ7AZKzuHs,955
+pytz/zoneinfo/America/Adak,sha256=IB1DhwJQAKbhPJ9jHLf8zW5Dad7HIkBS-dhv64E1OlM,2356
+pytz/zoneinfo/America/Anchorage,sha256=oZA1NSPS2BWdymYpnCHFO8BlYVS-ll5KLg2Ez9CbETs,2371
+pytz/zoneinfo/America/Anguilla,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Antigua,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Araguaina,sha256=kppiiytmSQeesflyNGYM3r8NVUl1C-ggu08s9_Tt-co,884
+pytz/zoneinfo/America/Argentina/Buenos_Aires,sha256=ntn_GFHadbrFJ4ZuhU6h2uzbFwmDyS9mXV5S28pkGF8,1076
+pytz/zoneinfo/America/Argentina/Catamarca,sha256=diH1f96kbbY-7gJYQnSCNHs3n9dwHJqUhSdGNx1L7I0,1076
+pytz/zoneinfo/America/Argentina/ComodRivadavia,sha256=diH1f96kbbY-7gJYQnSCNHs3n9dwHJqUhSdGNx1L7I0,1076
+pytz/zoneinfo/America/Argentina/Cordoba,sha256=1XqIP8Qo2bPR7909hrAI-qAttybmwEW4ms7FjZA5Yfw,1076
+pytz/zoneinfo/America/Argentina/Jujuy,sha256=5HR0TlZFifwJ5nLTmg7yWXgCTx9mRhahfs4_Wq70wOY,1048
+pytz/zoneinfo/America/Argentina/La_Rioja,sha256=Zf_E3akFE1YUt9MZ4xxbRnOrp2bH1D-Bjsc0SLFfRyU,1090
+pytz/zoneinfo/America/Argentina/Mendoza,sha256=5DJiYYeQpcLBR_IoIJtk43IswJeGYawx5GykszuJ-Nw,1076
+pytz/zoneinfo/America/Argentina/Rio_Gallegos,sha256=T97WADwva6JbxICviNQUt_7iw9c-nloI4QJCscENSck,1076
+pytz/zoneinfo/America/Argentina/Salta,sha256=ATw0uR6szWKPs6jzdn6revS7UxCXD26ORK6jlmsjL18,1048
+pytz/zoneinfo/America/Argentina/San_Juan,sha256=qlW693a0Tnofy-RdcVBuWY3DvTTGxWwcYdKU3Y98pX8,1090
+pytz/zoneinfo/America/Argentina/San_Luis,sha256=WYdcro5-Fe-N6LkQsKwx_1tVozmnBp58DO1-BJs2suo,1102
+pytz/zoneinfo/America/Argentina/Tucuman,sha256=wsjg1a5AM1dP2gjr112k3vt54trcOOM_StF74xzvBJc,1104
+pytz/zoneinfo/America/Argentina/Ushuaia,sha256=9548Vvq_kpw_NX5s65vYuIbqvwGV-PBxqwmcrflLI0U,1076
+pytz/zoneinfo/America/Aruba,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Asuncion,sha256=FTLtFk6MjJoh5VIDgJ2Sf4B_iNeCDxrV0MWwQL-sOVM,2044
+pytz/zoneinfo/America/Atikokan,sha256=kayA_pdpMcSQ0FjIzotdcf-m1JYfbKE-qcFT8LC8zqA,182
+pytz/zoneinfo/America/Atka,sha256=IB1DhwJQAKbhPJ9jHLf8zW5Dad7HIkBS-dhv64E1OlM,2356
+pytz/zoneinfo/America/Bahia,sha256=cmLkSAAzINlzYGXBqADEU3uPgA9S5nt-p1AV3Zy86VY,1024
+pytz/zoneinfo/America/Bahia_Banderas,sha256=BNjbcHSlPsJ4UpJx-gs1hpIyx2ScBieh1nyDuGb0PcE,1546
+pytz/zoneinfo/America/Barbados,sha256=ima-Qrrhazu4Qfvu2Z0-e6E-GTiYknuJBu6c2yVG9LE,436
+pytz/zoneinfo/America/Belem,sha256=_258hQZLCEXBX8xRLyQSw-AE-jiDmjVwJX32mN5UUEk,576
+pytz/zoneinfo/America/Belize,sha256=pkfLY2KfPchbeJa1pWcXmWAwp4ZlRvxWLVezXnrbkws,1614
+pytz/zoneinfo/America/Blanc-Sablon,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Boa_Vista,sha256=V4VVOkrFUV1qUfVp9E974IOJFmA5QxQrctatTBEb-hs,632
+pytz/zoneinfo/America/Bogota,sha256=ZaQKTZi35AMdlROs0vjEDA_phR8ztJOnjA8aLJZ5tHw,246
+pytz/zoneinfo/America/Boise,sha256=Yv4AXa2nSH_oVo3FZqZCR7V7z7c6WnQgKIUyNUpzGXA,2394
+pytz/zoneinfo/America/Buenos_Aires,sha256=ntn_GFHadbrFJ4ZuhU6h2uzbFwmDyS9mXV5S28pkGF8,1076
+pytz/zoneinfo/America/Cambridge_Bay,sha256=Nanl8yH4SshljhEjDe-PZCYEXbUuuZGmkbAAt2dB-bk,2084
+pytz/zoneinfo/America/Campo_Grande,sha256=5BBENR3_8gJp4F_Uj2RRknvRc4JJWNRPnZU9E7tb8QI,1444
+pytz/zoneinfo/America/Cancun,sha256=YR2U5T6mDGd5xm8EVA_TM1NwSRMYPNYWvV7wuthnX0I,782
+pytz/zoneinfo/America/Caracas,sha256=2NpwXPEtQkI82WCZuQWHXf66VCADcawMpfhKTsuA0x4,264
+pytz/zoneinfo/America/Catamarca,sha256=diH1f96kbbY-7gJYQnSCNHs3n9dwHJqUhSdGNx1L7I0,1076
+pytz/zoneinfo/America/Cayenne,sha256=atVbW5ChJiKQ_q-3kFs-DLTTZa9ptkiHkmJlq4AXoY4,198
+pytz/zoneinfo/America/Cayman,sha256=kayA_pdpMcSQ0FjIzotdcf-m1JYfbKE-qcFT8LC8zqA,182
+pytz/zoneinfo/America/Chicago,sha256=4aZFw-svkMyXmSpNufqzK-xveos-oVJDpEyI8Yu9HQE,3576
+pytz/zoneinfo/America/Chihuahua,sha256=cewXJyEw4KCoz33yl8o2tUJZmugBWH4R0Aovdmuqf-o,1484
+pytz/zoneinfo/America/Coral_Harbour,sha256=kayA_pdpMcSQ0FjIzotdcf-m1JYfbKE-qcFT8LC8zqA,182
+pytz/zoneinfo/America/Cordoba,sha256=1XqIP8Qo2bPR7909hrAI-qAttybmwEW4ms7FjZA5Yfw,1076
+pytz/zoneinfo/America/Costa_Rica,sha256=74rYa6lrgIkyls9PkHo8SCYl9oOqiuG5S7MWdnJelP4,316
+pytz/zoneinfo/America/Creston,sha256=nEOwYOnGxENw9zW8m50PGxbtVfTrX3QYAo4x4LgOLfI,328
+pytz/zoneinfo/America/Cuiaba,sha256=M0FsR8T9s4jFSuzD8Qi6pqtb6Rf2NTzyVHKGZrn56n4,1416
+pytz/zoneinfo/America/Curacao,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Danmarkshavn,sha256=YRZAfUCoVtaL1L-MYMYMH1wyOaVQnfUo_gFnvMXSuzw,698
+pytz/zoneinfo/America/Dawson,sha256=rAHhyuMuyjf_eyA2SBG76MRBf_fj_xi5FAuiWVQgJhw,1614
+pytz/zoneinfo/America/Dawson_Creek,sha256=aJXCyP4j3ggE4wGCN-LrS9hpD_5zWHzQTeSAKTWEPUM,1050
+pytz/zoneinfo/America/Denver,sha256=6_yPo1_mvnt9DgpPzr0QdHsjdsfUG6ALnagQLML1DSM,2444
+pytz/zoneinfo/America/Detroit,sha256=hecz8yqY2Cj5B61G3gLZdAVZvRgK9l0P90c_gN-uD5g,2230
+pytz/zoneinfo/America/Dominica,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Edmonton,sha256=-TkIfc3QlvaCf0p8COZ43Y1HRBAl-nARUi-JdXeK1vE,2332
+pytz/zoneinfo/America/Eirunepe,sha256=pS90HZzRwH4Tf8ugmKHfiphX7zCPqZkh_0CNb-fEMAM,656
+pytz/zoneinfo/America/El_Salvador,sha256=gvGN8Lkj-sGm2_rs8OUjAMf1oMtKp2Xes6UfWT0WqgU,224
+pytz/zoneinfo/America/Ensenada,sha256=OHHtvy3J70z6wvKBHgPqMEnGs6SXp8fkf0WX9ZiOODk,2342
+pytz/zoneinfo/America/Fort_Nelson,sha256=erfODr3DrSpz65kAdO7Ts2dGbZxvddEP6gx4BX3y2J0,2240
+pytz/zoneinfo/America/Fort_Wayne,sha256=GrNub1_3Um5Qh67wOx58_TEAz4fwAeAlk2AlMTVA_sI,1666
+pytz/zoneinfo/America/Fortaleza,sha256=mITuMrRLRTWyoiF04Oy_UZ8gxZofTpXDblM8t7ch7Sg,716
+pytz/zoneinfo/America/Glace_Bay,sha256=G8DGLGCapH_aYCF_OhaL5Qonf7FOAgAPwelO5htCWBc,2192
+pytz/zoneinfo/America/Godthab,sha256=FtlXWP_hBNuwBHkI2b1yne_tSUJpwLtWLyTHZoFZkmM,1878
+pytz/zoneinfo/America/Goose_Bay,sha256=JgaLueghSvX2g725FOfIgpgvsqxZGykWOhAZWGpQZRY,3210
+pytz/zoneinfo/America/Grand_Turk,sha256=4YOFEPK60Bel2_fCsY6vSZxUcMJKjiKtyOf_Q0khEwU,1834
+pytz/zoneinfo/America/Grenada,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Guadeloupe,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Guatemala,sha256=dugUgCd6QY52yHkHuUP4jRWzo5x439IQigaYCvEF46Q,280
+pytz/zoneinfo/America/Guayaquil,sha256=PbcF4bvGAm-aFwdtGPotJy3kb4NwoyWwxgwL98BeUWA,246
+pytz/zoneinfo/America/Guyana,sha256=icHu0YLCJhwk9D47f4VCBHgnex6yGrY4JFtjkfMIeDs,262
+pytz/zoneinfo/America/Halifax,sha256=TZpmc5PwWoLfTfQoQ_b3U17BE2iVKSeNkR0Ho8mbTn8,3424
+pytz/zoneinfo/America/Havana,sha256=HUQeAuKBsEkI5SLZjqynXICOUVOajkKzKH5r-Ov5Odc,2416
+pytz/zoneinfo/America/Hermosillo,sha256=9Ij30JYmMscC1XHi4o9v-uSXoUuE8V9zhGz2iV5hVFI,416
+pytz/zoneinfo/America/Indiana/Indianapolis,sha256=GrNub1_3Um5Qh67wOx58_TEAz4fwAeAlk2AlMTVA_sI,1666
+pytz/zoneinfo/America/Indiana/Knox,sha256=BiALShjiOLg1o8mMRWJ1jyTlJkgvwzte7B9WSOvTUNg,2428
+pytz/zoneinfo/America/Indiana/Marengo,sha256=CPYY3XgJFNEzONxei7x04wOGI_b86RAn4jBPewi1HZw,1722
+pytz/zoneinfo/America/Indiana/Petersburg,sha256=axot1SloP27ZWjezmo7kldu9qA2frEtPVqWngcXtft0,1904
+pytz/zoneinfo/America/Indiana/Tell_City,sha256=GrWNjb1i4sbIYlJ8fU0viJ2Q5JmrlvLgcLQILnk3El8,1684
+pytz/zoneinfo/America/Indiana/Vevay,sha256=GGosHbQUoIDOKPZxdal42X40veEITMmrnlKOnLUhb-c,1414
+pytz/zoneinfo/America/Indiana/Vincennes,sha256=gh7LAbHbMD92eo9C_c5IiwQ1fJvxhdJN402Q_4YJdLg,1694
+pytz/zoneinfo/America/Indiana/Winamac,sha256=yS-_aKSC4crd0WdNutkHRHxUjmBCU56QVQcqy7kYpbQ,1778
+pytz/zoneinfo/America/Indianapolis,sha256=GrNub1_3Um5Qh67wOx58_TEAz4fwAeAlk2AlMTVA_sI,1666
+pytz/zoneinfo/America/Inuvik,sha256=MU_oDiidQaijt1KV0B5h9LqHoCrJ8ieldD9tsiJiX5o,1894
+pytz/zoneinfo/America/Iqaluit,sha256=6PitEMSFWcSb-Io8fvm4oQ_7v39G_qANc6reTjXoZJ0,2032
+pytz/zoneinfo/America/Jamaica,sha256=wlagieUPRf5-beie-h7QsONbNzjGsm8vMs8uf28pw28,482
+pytz/zoneinfo/America/Jujuy,sha256=5HR0TlZFifwJ5nLTmg7yWXgCTx9mRhahfs4_Wq70wOY,1048
+pytz/zoneinfo/America/Juneau,sha256=k7hxb0aGRnfnE-DBi3LkcjAzRPyAf0_Hw0vVFfjGeb0,2353
+pytz/zoneinfo/America/Kentucky/Louisville,sha256=-yqgeeHZdq6oP3_WzVvYOmqV9HQv8y7ZWmc9bzHvJAY,2772
+pytz/zoneinfo/America/Kentucky/Monticello,sha256=NJMKjG7jjlRzZhndMPw51bYW0D3jviW2Qbl70YcU0Gg,2352
+pytz/zoneinfo/America/Knox_IN,sha256=BiALShjiOLg1o8mMRWJ1jyTlJkgvwzte7B9WSOvTUNg,2428
+pytz/zoneinfo/America/Kralendijk,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/La_Paz,sha256=PAGF2VU_QOw2xT1Cqdp2P8Aj9hXMVWlCByV7cvfIQ_k,232
+pytz/zoneinfo/America/Lima,sha256=JHDCg95uw6BEu4a4Gfyikm1s8rm8AsYPG8dJxQQNZFs,406
+pytz/zoneinfo/America/Los_Angeles,sha256=VOy1PikdjiVdJ7lukVGzwl8uDxV_KYqznkTm5BLEiDM,2836
+pytz/zoneinfo/America/Louisville,sha256=-yqgeeHZdq6oP3_WzVvYOmqV9HQv8y7ZWmc9bzHvJAY,2772
+pytz/zoneinfo/America/Lower_Princes,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Maceio,sha256=pzjNghmeHhvF4aI3cDq2G_5t71BSNGIbRAF5NmJyDmw,744
+pytz/zoneinfo/America/Managua,sha256=xBzF01AHn2E2fD8Qdy-DHFe36UqoeNpKPfChduBKWdk,430
+pytz/zoneinfo/America/Manaus,sha256=lp6RlkcXJQ7mSsKqnEgC8svJVrFDJk_16xxvfpNSpK4,604
+pytz/zoneinfo/America/Marigot,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Martinique,sha256=fMs80kOU2YFvC0f9y2eje97JeAtTYBamXrnlTunNLzQ,232
+pytz/zoneinfo/America/Matamoros,sha256=RlEMOT_zvCLQ8s7TNvRE2PnC4H9JrxO7MGxmfu5xPPI,1390
+pytz/zoneinfo/America/Mazatlan,sha256=aIyre-8trAXSHtqxbuu6gDDkWCUjI_SdAKPIjz74M2E,1526
+pytz/zoneinfo/America/Mendoza,sha256=5DJiYYeQpcLBR_IoIJtk43IswJeGYawx5GykszuJ-Nw,1076
+pytz/zoneinfo/America/Menominee,sha256=Arv9WLbfhNcpRsUjHDU757BEdwlp08Gt30AixG3gZ04,2274
+pytz/zoneinfo/America/Merida,sha256=BJQ5mzAT-akb_EA7WqGdNheCorDqLBnDS_4X3YJz0rc,1422
+pytz/zoneinfo/America/Metlakatla,sha256=twmieGTVY2V-U8nFxqvx7asYv8GVjeWdLtrOI7UApVI,1423
+pytz/zoneinfo/America/Mexico_City,sha256=DSpTe5TT0KBsxGx79Rs7ah-zJpiGOJKwPjztovRN0b4,1584
+pytz/zoneinfo/America/Miquelon,sha256=LNbkN87EnZUa41Xizko5VIN55EyQvf5Kk5b5AfNQG8Q,1666
+pytz/zoneinfo/America/Moncton,sha256=Wmv-bk9aKKcWWzOpc1UFu67HOfwaIk2Wmh3LgqGctys,3154
+pytz/zoneinfo/America/Monterrey,sha256=HA4yn9jQHk9i0PqiB7fSoFdzXtB1DT1cheGRPXrQNdQ,1390
+pytz/zoneinfo/America/Montevideo,sha256=4jcgTegK5X8F0yNYzk-3oySZ4U9XQ09UbTJ_mlu8N70,1510
+pytz/zoneinfo/America/Montreal,sha256=ggOSzbHkmfgu9wTQzP0MUKsrKMbgveuAeThh1eFl1a0,3494
+pytz/zoneinfo/America/Montserrat,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Nassau,sha256=ggOSzbHkmfgu9wTQzP0MUKsrKMbgveuAeThh1eFl1a0,3494
+pytz/zoneinfo/America/New_York,sha256=7AoiEGjr3wV4P7C4Qs35COZqwr2mjNDq7ocpsSPFOM8,3536
+pytz/zoneinfo/America/Nipigon,sha256=EGPXcOin8mfzFTkYJm4ICpY7fyE24I2pXg4ejafSMyU,2122
+pytz/zoneinfo/America/Nome,sha256=2izM3-P-PqJ9za6MdhzFfMvPFNq7Gim69tAvEwPeY2s,2367
+pytz/zoneinfo/America/Noronha,sha256=3R4lLV8jg5SljhC5OVVCk51Y77Efjo6zCe-oppg_FFo,716
+pytz/zoneinfo/America/North_Dakota/Beulah,sha256=PHlzEk3wsNXYsfMZZSio7ZfdnyxPFpOhK3dS-1AJKGg,2380
+pytz/zoneinfo/America/North_Dakota/Center,sha256=PaM52_JOVMEpVdw5qiOlhkp3qA0xp0d6Z9neOatmLKo,2380
+pytz/zoneinfo/America/North_Dakota/New_Salem,sha256=o0xmH1FUh3lVFLtP5Lb9c0PfSyaPTsRvQSQYwnn_yls,2380
+pytz/zoneinfo/America/Nuuk,sha256=FtlXWP_hBNuwBHkI2b1yne_tSUJpwLtWLyTHZoFZkmM,1878
+pytz/zoneinfo/America/Ojinaga,sha256=cO3V-x_1Q-mpbJgKNd6-WTfxDEHBV1aqS4wzVl5A0Q4,1484
+pytz/zoneinfo/America/Panama,sha256=kayA_pdpMcSQ0FjIzotdcf-m1JYfbKE-qcFT8LC8zqA,182
+pytz/zoneinfo/America/Pangnirtung,sha256=P9Kw_I-NxcUYJIr1j40jTn9q7F8TPAE_FqXsfLYF86A,2094
+pytz/zoneinfo/America/Paramaribo,sha256=Hm5tDwUmnoTrTUPEO4WArfSF74ZjywVEocy4kL51FzA,262
+pytz/zoneinfo/America/Phoenix,sha256=nEOwYOnGxENw9zW8m50PGxbtVfTrX3QYAo4x4LgOLfI,328
+pytz/zoneinfo/America/Port-au-Prince,sha256=09ZAJd4IOiMpfdpUuF1U44R_hRt6BvpAkFXOnYO9yOM,1434
+pytz/zoneinfo/America/Port_of_Spain,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Porto_Acre,sha256=17onkm8P_VgMkErjK9rr0qwNni7qp9tgcUZ93g3ltOs,628
+pytz/zoneinfo/America/Porto_Velho,sha256=ZRfzgGEu26hnl3JPtiZLOSFGj_WBSbOKdiLC1xIyc5c,576
+pytz/zoneinfo/America/Puerto_Rico,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Punta_Arenas,sha256=arlTaQbMOjCBiVMimhpHt9qOgZppSLU01rwVhw93bOY,1902
+pytz/zoneinfo/America/Rainy_River,sha256=r6kx6lD2IzCdygkj-DKyL2tPSn7k0Zil7PSHCBFKOa0,2122
+pytz/zoneinfo/America/Rankin_Inlet,sha256=KpQX97-EuF4MNyxQrtOKP616CK_vjniM-lo14WGVz0c,1892
+pytz/zoneinfo/America/Recife,sha256=ijFN2ZzZe5oBYdl8Ag3SwmGjj2JeVYYX2Vo767g2s6I,716
+pytz/zoneinfo/America/Regina,sha256=yjqT08pHbICYe83H8JmtaDBvCFqRv7Tfze3Y8xuXukw,980
+pytz/zoneinfo/America/Resolute,sha256=VP_u5XsepfSwx7Ou9zjGw2p5Qi10AIA54sP1J2DkppM,1892
+pytz/zoneinfo/America/Rio_Branco,sha256=17onkm8P_VgMkErjK9rr0qwNni7qp9tgcUZ93g3ltOs,628
+pytz/zoneinfo/America/Rosario,sha256=1XqIP8Qo2bPR7909hrAI-qAttybmwEW4ms7FjZA5Yfw,1076
+pytz/zoneinfo/America/Santa_Isabel,sha256=OHHtvy3J70z6wvKBHgPqMEnGs6SXp8fkf0WX9ZiOODk,2342
+pytz/zoneinfo/America/Santarem,sha256=Gl_lI3pPZ57UIYXWcmaTpFqWDA5re6bHh1nWs_Z0-Nc,602
+pytz/zoneinfo/America/Santiago,sha256=yxF9edZctGSER6k5w2e7R16COtd5AABVe0QW79Vgo6Y,2529
+pytz/zoneinfo/America/Santo_Domingo,sha256=DKtaEj8fQ92ybITTWU4Bm160S9pzJmUVbjaWRnenxU4,458
+pytz/zoneinfo/America/Sao_Paulo,sha256=cO3VGekMGdSf1y4f_UgkpDMRes26-l1oGUoDglIiUQg,1444
+pytz/zoneinfo/America/Scoresbysund,sha256=dfHb86egoiNykb3bR3OHXpGFPm_Apck8BLiVTCqVAVc,1916
+pytz/zoneinfo/America/Shiprock,sha256=6_yPo1_mvnt9DgpPzr0QdHsjdsfUG6ALnagQLML1DSM,2444
+pytz/zoneinfo/America/Sitka,sha256=aiS7Fk37hZpzZ9VkeJQeF-BqTLRC1QOTCgMAJwT8UxA,2329
+pytz/zoneinfo/America/St_Barthelemy,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/St_Johns,sha256=r1-17uKv27eZ3JsVkw_DLZQbo6wvjuuVu7C2pDsmOgI,3655
+pytz/zoneinfo/America/St_Kitts,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/St_Lucia,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/St_Thomas,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/St_Vincent,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Swift_Current,sha256=RRKOF7vZC8VvYxD8PP4J1_hUPayKBP7Lu80avRkfPDY,560
+pytz/zoneinfo/America/Tegucigalpa,sha256=EzOz7ntTlreMq69JZ2CcAb8Ps98V9bUMN480tpPIyw4,252
+pytz/zoneinfo/America/Thule,sha256=8xuPRaZU8RgO5ECqFYHYmnHioc81sBOailkVu8Y02i8,1502
+pytz/zoneinfo/America/Thunder_Bay,sha256=cJ9lcf2mDZttEx_ttYYoZAJfuGhSsDgNV2PI-ggWdPE,2202
+pytz/zoneinfo/America/Tijuana,sha256=OHHtvy3J70z6wvKBHgPqMEnGs6SXp8fkf0WX9ZiOODk,2342
+pytz/zoneinfo/America/Toronto,sha256=ggOSzbHkmfgu9wTQzP0MUKsrKMbgveuAeThh1eFl1a0,3494
+pytz/zoneinfo/America/Tortola,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Vancouver,sha256=sknKH0jSPWam-DHfM35qXs8Nam7d5TFlkUI9Sgxryyg,2892
+pytz/zoneinfo/America/Virgin,sha256=hJHlV_-AGoMGUWuMpZRv9fLmghrzFHfrR9fRkcxaZJc,246
+pytz/zoneinfo/America/Whitehorse,sha256=Kfv607qGHJxXGBP1nPJyNg2_duWrmxhZGFQr82ukgq8,1614
+pytz/zoneinfo/America/Winnipeg,sha256=7P-_YQrneFcon7QKSTOnkiGjEppFDn3Z48MJ1qq8VBw,2868
+pytz/zoneinfo/America/Yakutat,sha256=tFwnKbvwhyyn4LNTAn5ye_JWDdxjCerNDt7oOwUwO2M,2305
+pytz/zoneinfo/America/Yellowknife,sha256=pfFvC8NEy373KbO6r6ec-Gw_O0D2h64mXU1X1AsUDgE,1966
+pytz/zoneinfo/Antarctica/Casey,sha256=a_ShNA5q27F-GNPiFPttIhhdHc1MP485jX6pwRjZ_t0,384
+pytz/zoneinfo/Antarctica/Davis,sha256=6PokyOaaISRTN13sisuGgdt5vG5A2YqNooJpfLTb5SQ,297
+pytz/zoneinfo/Antarctica/DumontDUrville,sha256=ei_XjmiRDLh-RU94uvz9CCIIRFH1r0X7WL-sB-6DF60,186
+pytz/zoneinfo/Antarctica/Macquarie,sha256=ie7RlaU8RHTorVVj-MX8StKMqx_oXf4UH2PUqpzcwe0,2260
+pytz/zoneinfo/Antarctica/Mawson,sha256=9TW1g_z0tk5EfeB7K69VJo8agO7-K9ZxWbiqNKnUZNE,199
+pytz/zoneinfo/Antarctica/McMurdo,sha256=gADjoyPo_QISQU6UJrAgcHp3HDaMoOFRdH-d23uBSyc,2437
+pytz/zoneinfo/Antarctica/Palmer,sha256=DW_DXByXg5MnMZ-w1bNdu8b0lKOYD_EgrPRd5EcyEm4,1418
+pytz/zoneinfo/Antarctica/Rothera,sha256=QQI1m1IN4_2e6Bb0z-rOYaOwxp4XjMJDOKM9SFDUPKg,164
+pytz/zoneinfo/Antarctica/South_Pole,sha256=gADjoyPo_QISQU6UJrAgcHp3HDaMoOFRdH-d23uBSyc,2437
+pytz/zoneinfo/Antarctica/Syowa,sha256=rq9KPj8l0FBnnKn93WkMeA1IngNtTzk5_oV4sEZhc4w,165
+pytz/zoneinfo/Antarctica/Troll,sha256=3zrh-P_jMCss9GGwHJJHkypZZydq4mkgo_TDqctn3c4,1162
+pytz/zoneinfo/Antarctica/Vostok,sha256=6tx86WD3MVGJBCbOJUCoA6YlGwCn2BT4B85Zss0vz4Y,165
+pytz/zoneinfo/Arctic/Longyearbyen,sha256=UdCERhj1JYpx3ojmilaRoyVoR4qMA1-PEv6hGwnpsJA,2228
+pytz/zoneinfo/Asia/Aden,sha256=rq9KPj8l0FBnnKn93WkMeA1IngNtTzk5_oV4sEZhc4w,165
+pytz/zoneinfo/Asia/Almaty,sha256=rBIl_pqZNmKZabjEa4mcsLahl9PbAdZJpQMQLVmcfBU,997
+pytz/zoneinfo/Asia/Amman,sha256=S4wAXF0MX5MpeQyIHI08w_fG_735xdah3aRS9VRPFjE,1853
+pytz/zoneinfo/Asia/Anadyr,sha256=hDDTly45ejoVVP9Al07TmKpTACNGJaIPlcXLRbsG_4g,1188
+pytz/zoneinfo/Asia/Aqtau,sha256=A5exZN256JagFJTcasgdCrQ8giOqZ2EFMRVYBWTaqZA,983
+pytz/zoneinfo/Asia/Aqtobe,sha256=LQ7P5LEEe7jbWbjqvzmM79c0o6AdZeCExQS-fOWp8yw,1011
+pytz/zoneinfo/Asia/Ashgabat,sha256=L4DYV2mZWycsYeHIypXzO6ZNY3tD8wjgxfPR2ZPW26c,619
+pytz/zoneinfo/Asia/Ashkhabad,sha256=L4DYV2mZWycsYeHIypXzO6ZNY3tD8wjgxfPR2ZPW26c,619
+pytz/zoneinfo/Asia/Atyrau,sha256=3uEo89ORyDJqQ_TtaQdIf9UPaB8WqIRQVi0geeY9gVE,991
+pytz/zoneinfo/Asia/Baghdad,sha256=lQMSUnOuijbcoTaCqMNnYhnvKtS2IVP_kXFAzePVNDU,983
+pytz/zoneinfo/Asia/Bahrain,sha256=V0rFJdLHIrToJ5Wl28VzVowwCVZoY8ZZSeNp-7kOvjY,199
+pytz/zoneinfo/Asia/Baku,sha256=vhHnliaOdRyNudl0sFJFdLynEg0Hc0I-IiZNfbDeCbM,1227
+pytz/zoneinfo/Asia/Bangkok,sha256=eYq0vh89N1j069URoQvtBu0ndEal6FPrtbF8WCKKpDw,199
+pytz/zoneinfo/Asia/Barnaul,sha256=2c1Cq8XYlBgybRQMP8w0NCf7kaLDrPZtGn4M5iJZbJo,1221
+pytz/zoneinfo/Asia/Beirut,sha256=_Z_2ZAg_iL9vU51JDB8CB04uXBDrf1kLIis-JnXaS2o,2154
+pytz/zoneinfo/Asia/Bishkek,sha256=do_4ki1JvSKupUrvlz9jRkHspDhdvk1D2IkByFskjJM,983
+pytz/zoneinfo/Asia/Brunei,sha256=BMMjwEmZ9rMoNpWfg8IrlLhRbMKbdW48padRF-FGolc,203
+pytz/zoneinfo/Asia/Calcutta,sha256=6Qw0EDbLcgMgDik8s7UTJn4QSjmllPNeGVJU5rwKF88,285
+pytz/zoneinfo/Asia/Chita,sha256=4ICOcAVAEWnP-cdf_YJu1_kCYnYPG2_vYfSbuNI-VwI,1221
+pytz/zoneinfo/Asia/Choibalsan,sha256=sJQAAjiT9VyG73dYhpYkq4tcmfITcPpiAa8YXsDlKag,949
+pytz/zoneinfo/Asia/Chongqing,sha256=ZP_C5DqUQ1oEPAQNHTr36S0DGtx453N68YYbqk7u8-Y,561
+pytz/zoneinfo/Asia/Chungking,sha256=ZP_C5DqUQ1oEPAQNHTr36S0DGtx453N68YYbqk7u8-Y,561
+pytz/zoneinfo/Asia/Colombo,sha256=HGea9jswIIgz7k20LTzbKtQyUun67IP5HvsZrmAJZJY,372
+pytz/zoneinfo/Asia/Dacca,sha256=3K5llGhcpCdZMMcJuomICVv7lZlDRpU4PUb5DtFx8l4,337
+pytz/zoneinfo/Asia/Damascus,sha256=6mcB6bxH1KsLqzb_LmJUT3tUDnq9_ScLFKoMFkcZy3A,2294
+pytz/zoneinfo/Asia/Dhaka,sha256=3K5llGhcpCdZMMcJuomICVv7lZlDRpU4PUb5DtFx8l4,337
+pytz/zoneinfo/Asia/Dili,sha256=ptjbacc9JK0pv2JpD-gHMglrwYNj9LMMIua0U0ZTMUc,227
+pytz/zoneinfo/Asia/Dubai,sha256=-ga0m3ua9Y6kSWREz2_VdtcVAkq83WrW3vxjBI7WNGs,165
+pytz/zoneinfo/Asia/Dushanbe,sha256=FUk9Tt_GimfRulcWamEvuOvA7FQ52YfZqQ2w88qMx6M,591
+pytz/zoneinfo/Asia/Famagusta,sha256=CFrcygd8ude5x6OEtfM_Dw0KYHoxpPPzq46KoHVxjjc,2028
+pytz/zoneinfo/Asia/Gaza,sha256=t7JqgJ17VcFJwwpcC1ifwhFcnuUdgDL3ZjP6qlX_mRM,2422
+pytz/zoneinfo/Asia/Harbin,sha256=ZP_C5DqUQ1oEPAQNHTr36S0DGtx453N68YYbqk7u8-Y,561
+pytz/zoneinfo/Asia/Hebron,sha256=LcQWMBqMNBztWG-Ba_l556OtZ_HZbxtU9yH-oiDhrbI,2450
+pytz/zoneinfo/Asia/Ho_Chi_Minh,sha256=L5TXNg6-odIIn-JAyLTR8fKFiUFBNFwy0HzwZchbnm4,351
+pytz/zoneinfo/Asia/Hong_Kong,sha256=UcnFEc9S8hMWl9giVXni4TAhLPWX0H12XvwSt4AJHew,1203
+pytz/zoneinfo/Asia/Hovd,sha256=JUnOos7PNTi2VRKxD6XnaVR3NpuhsX_Pi18rIzVe1xw,891
+pytz/zoneinfo/Asia/Irkutsk,sha256=iUJZCVBjpfB4rNKJOr6g0zUZtccYYk_Gk0wTklx8Yj0,1243
+pytz/zoneinfo/Asia/Istanbul,sha256=2S0A_f7VxvyErJMMCPqK33AChA29IVkMr1o-SpMtMxk,1947
+pytz/zoneinfo/Asia/Jakarta,sha256=_WRgz6Zb6wxIXtMwpKjG4w4PJtDRzkhdrw-3a4NCBFA,355
+pytz/zoneinfo/Asia/Jayapura,sha256=ihzUd-L8HUVqG-Na10MyPE-YYwjVFj-xerqjTN4EJZs,221
+pytz/zoneinfo/Asia/Jerusalem,sha256=JUuWQmW5Tha0pJjw61Q5aN7CX0z4D7ops9OOSnda6Dc,2388
+pytz/zoneinfo/Asia/Kabul,sha256=ial7SvweHTQXDl79MnXm6QHtiw2i7Zt1e5urLXU8Sq8,208
+pytz/zoneinfo/Asia/Kamchatka,sha256=pBA0RbynKTKsMCmf2hJMZ_hgVUPemms-VceMMJ7QC64,1166
+pytz/zoneinfo/Asia/Karachi,sha256=iB-mWMTXUyfBwAkZdz8_UmEw0xsgxIub-KNI7akzhkk,379
+pytz/zoneinfo/Asia/Kashgar,sha256=AEXDJ5PxQOhePZZw1QZl98moDNa-bW3I3WVNQZHBPYA,165
+pytz/zoneinfo/Asia/Kathmandu,sha256=TUeW7rDSifOTSsNxvo9igIYZfGITEZUf-0EjglyRDWs,212
+pytz/zoneinfo/Asia/Katmandu,sha256=TUeW7rDSifOTSsNxvo9igIYZfGITEZUf-0EjglyRDWs,212
+pytz/zoneinfo/Asia/Khandyga,sha256=XYzE2tsE5Say9pg0cHDQkEE9aTuy2piFSLAGx_d-dmM,1271
+pytz/zoneinfo/Asia/Kolkata,sha256=6Qw0EDbLcgMgDik8s7UTJn4QSjmllPNeGVJU5rwKF88,285
+pytz/zoneinfo/Asia/Krasnoyarsk,sha256=nzRw4PI2AiK_Ge854b8U7TSDw0LGQy3ca5YuOOU2XwI,1207
+pytz/zoneinfo/Asia/Kuala_Lumpur,sha256=RfiIYo6sMEkSA8m5iUmyOyJzKZrgRs8ehGuDZwoq88k,383
+pytz/zoneinfo/Asia/Kuching,sha256=KsAtQ0aocINozixwW7CkorY-1PTLlsj7UUnQGQMEYTQ,483
+pytz/zoneinfo/Asia/Kuwait,sha256=rq9KPj8l0FBnnKn93WkMeA1IngNtTzk5_oV4sEZhc4w,165
+pytz/zoneinfo/Asia/Macao,sha256=MvAkRyRsrA2r052ItlyF5bh2FheRjI0jPwg0uIiH2Yk,1227
+pytz/zoneinfo/Asia/Macau,sha256=MvAkRyRsrA2r052ItlyF5bh2FheRjI0jPwg0uIiH2Yk,1227
+pytz/zoneinfo/Asia/Magadan,sha256=cqwjKQt8TlznM1w2DezAZuz1EjeOfLxPeSY19i9zkfQ,1222
+pytz/zoneinfo/Asia/Makassar,sha256=OhJtCqSTEU-u5n0opBVO5Bu-wQzcYPy9S_6aAhJXgOw,254
+pytz/zoneinfo/Asia/Manila,sha256=ujfq0kl1EhxcYSOrG-FS750aNaYUt1TT4bFuK4EcL_c,328
+pytz/zoneinfo/Asia/Muscat,sha256=-ga0m3ua9Y6kSWREz2_VdtcVAkq83WrW3vxjBI7WNGs,165
+pytz/zoneinfo/Asia/Nicosia,sha256=0Unm0IFT7HyGeQ7F3vTa_-klfysCgrulqFO6BD1plZU,2002
+pytz/zoneinfo/Asia/Novokuznetsk,sha256=vQGcqKdmYmWDdl73QPZTcyadnope1RPJ4oBgZelQu90,1165
+pytz/zoneinfo/Asia/Novosibirsk,sha256=ApL3s20HX2eIAno03HCa2RXdlLotVb9JvnZl7W1sM00,1221
+pytz/zoneinfo/Asia/Omsk,sha256=wxbEesfe7dJOkNPffqTwT6wuTSSTM6E9f0uFMAyzMCM,1207
+pytz/zoneinfo/Asia/Oral,sha256=iMjqD4LvDgyxN15v7CqyEdBDyBFaOlChwX1wHz2JiVQ,1005
+pytz/zoneinfo/Asia/Phnom_Penh,sha256=eYq0vh89N1j069URoQvtBu0ndEal6FPrtbF8WCKKpDw,199
+pytz/zoneinfo/Asia/Pontianak,sha256=inOXwuKtjKv1z_eliPZSIqjSt6whtuxhPeG1YpjU_BQ,353
+pytz/zoneinfo/Asia/Pyongyang,sha256=_-g3GnDAtfDX4XAktXH9jFouLUDmOovnjoOfvRpUDsE,237
+pytz/zoneinfo/Asia/Qatar,sha256=V0rFJdLHIrToJ5Wl28VzVowwCVZoY8ZZSeNp-7kOvjY,199
+pytz/zoneinfo/Asia/Qostanay,sha256=UGYEvmZfAAS9D6EMGd0n6-r_Az_zgTDSWLPeHzFLfu0,1011
+pytz/zoneinfo/Asia/Qyzylorda,sha256=aiSRxwoUbQ-TBHf2wcyaOhQb86j3jQpXwcQaSPnAtwU,1025
+pytz/zoneinfo/Asia/Rangoon,sha256=ZHuX-XVHr8dGJjrPQ5cW7b8jQUv3ihyd-VzN545mlMA,268
+pytz/zoneinfo/Asia/Riyadh,sha256=rq9KPj8l0FBnnKn93WkMeA1IngNtTzk5_oV4sEZhc4w,165
+pytz/zoneinfo/Asia/Saigon,sha256=L5TXNg6-odIIn-JAyLTR8fKFiUFBNFwy0HzwZchbnm4,351
+pytz/zoneinfo/Asia/Sakhalin,sha256=95AdPwOgSe0g9wdx67kKLDbjvY3FtpeVBoAWbJVco0w,1202
+pytz/zoneinfo/Asia/Samarkand,sha256=BBe6Gg_KlSQuS5hAyvvhZWmClcLJaFjnCNGC391HHQM,577
+pytz/zoneinfo/Asia/Seoul,sha256=LI9LsV3XcJC0l-KoQf8zI-y7rk-du57erS-N2Ptdi7Q,617
+pytz/zoneinfo/Asia/Shanghai,sha256=ZP_C5DqUQ1oEPAQNHTr36S0DGtx453N68YYbqk7u8-Y,561
+pytz/zoneinfo/Asia/Singapore,sha256=hIgr_LHMTWh3GgeG-MmLHBp-9anUxQcfMlKFtX8WvmU,383
+pytz/zoneinfo/Asia/Srednekolymsk,sha256=0DllW8q5VgXEMV5c_nLJElZsNpauvNhNACQpcgdqEl0,1208
+pytz/zoneinfo/Asia/Taipei,sha256=DMmQwOpPql25ue3Nf8vAKKT4em06D1Z9rHbLIitxixk,761
+pytz/zoneinfo/Asia/Tashkent,sha256=LS-yTxh0v1vmJoQ9I6fY-IERk7ukPmovVx2Ut_-b-Ig,591
+pytz/zoneinfo/Asia/Tbilisi,sha256=w6UNxgyn4BVVTF5WkAtxo_u7nnIY26makKQ5nRgifds,1035
+pytz/zoneinfo/Asia/Tehran,sha256=ATT50Q0hK6uSba5_WnOE3Px0OWxIwxaqK5Oi10P2A-M,2582
+pytz/zoneinfo/Asia/Tel_Aviv,sha256=JUuWQmW5Tha0pJjw61Q5aN7CX0z4D7ops9OOSnda6Dc,2388
+pytz/zoneinfo/Asia/Thimbu,sha256=uia8or5dtDkxVUZrcLwkjbTz9C7ZhLq0T4jlE4YvuvQ,203
+pytz/zoneinfo/Asia/Thimphu,sha256=uia8or5dtDkxVUZrcLwkjbTz9C7ZhLq0T4jlE4YvuvQ,203
+pytz/zoneinfo/Asia/Tokyo,sha256=oCueZgRNxcNcX3ZGdif9y6Su4cyVhga4XHdwlcrYLOs,309
+pytz/zoneinfo/Asia/Tomsk,sha256=77YgdJLxETRKjQjnaHHf54xBAqNywTDwQQmZ5v6Aq28,1221
+pytz/zoneinfo/Asia/Ujung_Pandang,sha256=OhJtCqSTEU-u5n0opBVO5Bu-wQzcYPy9S_6aAhJXgOw,254
+pytz/zoneinfo/Asia/Ulaanbaatar,sha256=uyQSzIBl0f2TXHrmUm3VPs1C9ro013hYmAlx6yUjh3Y,891
+pytz/zoneinfo/Asia/Ulan_Bator,sha256=uyQSzIBl0f2TXHrmUm3VPs1C9ro013hYmAlx6yUjh3Y,891
+pytz/zoneinfo/Asia/Urumqi,sha256=AEXDJ5PxQOhePZZw1QZl98moDNa-bW3I3WVNQZHBPYA,165
+pytz/zoneinfo/Asia/Ust-Nera,sha256=JAZhRAPdbOL9AL-WHOL8aZjxdZxLmGDNBGMCw9TKtR8,1252
+pytz/zoneinfo/Asia/Vientiane,sha256=eYq0vh89N1j069URoQvtBu0ndEal6FPrtbF8WCKKpDw,199
+pytz/zoneinfo/Asia/Vladivostok,sha256=Wokhgtj2nwUj992h7SyfB_fRNHAKfPNzhsf_oZpim8c,1208
+pytz/zoneinfo/Asia/Yakutsk,sha256=RVCIl52EvMrp2RG2hg2cjDSr9QhsscaAT-NV81xw7zc,1207
+pytz/zoneinfo/Asia/Yangon,sha256=ZHuX-XVHr8dGJjrPQ5cW7b8jQUv3ihyd-VzN545mlMA,268
+pytz/zoneinfo/Asia/Yekaterinburg,sha256=NzVc2DiPeyw0FdMHwSPQJF9k3tvWdtrETZiN58pyxLk,1243
+pytz/zoneinfo/Asia/Yerevan,sha256=k0WHtWQW_cBCjcEv8nP01cVPeTVDlf18lQ0_u6cin1o,1151
+pytz/zoneinfo/Atlantic/Azores,sha256=Q5Jqbe5h6WDi16jwK_B74gWWg58o0TgrdgB_cwbhzz0,3512
+pytz/zoneinfo/Atlantic/Bermuda,sha256=LNGKfMsnYvwImjTyzXrLhMOHHDu7qI67RbYNKvvI15I,2396
+pytz/zoneinfo/Atlantic/Canary,sha256=ymK9ufqphvNjDK3hzikN4GfkcR3QeCBiPKyVc6FjlbA,1897
+pytz/zoneinfo/Atlantic/Cape_Verde,sha256=ESQvE3deMI-lx9mG0yJLEsFX5KRl-7c6gD5O2h0Zm9Q,270
+pytz/zoneinfo/Atlantic/Faeroe,sha256=NibdZPZtapnYR_myIZnMdTaSKGsOBGgujj0_T2NvAzs,1815
+pytz/zoneinfo/Atlantic/Faroe,sha256=NibdZPZtapnYR_myIZnMdTaSKGsOBGgujj0_T2NvAzs,1815
+pytz/zoneinfo/Atlantic/Jan_Mayen,sha256=UdCERhj1JYpx3ojmilaRoyVoR4qMA1-PEv6hGwnpsJA,2228
+pytz/zoneinfo/Atlantic/Madeira,sha256=21Zcy0xRqDN3oY8jmjjO-LI7aC3G9mcS9ytaYg0g7ik,3503
+pytz/zoneinfo/Atlantic/Reykjavik,sha256=mSkaRBGZLeUrm88EeHcaWnEd35Wn-Ag2G10HtI3G2fg,1162
+pytz/zoneinfo/Atlantic/South_Georgia,sha256=QZ72fRKp6Kgvy7DfyHGht1MVnzGgSPujLQd4XMjNrrc,164
+pytz/zoneinfo/Atlantic/St_Helena,sha256=0u-sTl8j2IyV1ywdtCgHFw9S9D3ZiiBa9akqkbny2Zc,148
+pytz/zoneinfo/Atlantic/Stanley,sha256=exKMLw-P952wS1FTxVjnUU1mkD2OvKUDwtDt8IGgf8w,1214
+pytz/zoneinfo/Australia/ACT,sha256=QsOFdYWxbbL4_9R7oZ-qYPRzNA3o1P6TIOp76GFgWQY,2190
+pytz/zoneinfo/Australia/Adelaide,sha256=ld2EbxU75oVgmPe703z-I6aqLg0Kmv62ZcCGzkT5R20,2208
+pytz/zoneinfo/Australia/Brisbane,sha256=eW6Qzze2t0-speJmmvt1JMzbkSadIKdE84XHc7JUtGc,419
+pytz/zoneinfo/Australia/Broken_Hill,sha256=3k_3ljTvS5GSfo7Xh6w71UgR3aAwYPBsnCJ-mlEYCqQ,2229
+pytz/zoneinfo/Australia/Canberra,sha256=QsOFdYWxbbL4_9R7oZ-qYPRzNA3o1P6TIOp76GFgWQY,2190
+pytz/zoneinfo/Australia/Currie,sha256=GLQSzgIfsWxOvmKOrhpfofWqINQf6h36NYy3mcq6gcg,2358
+pytz/zoneinfo/Australia/Darwin,sha256=fn0IZhIW98FAnzLig-_GBtW5LA54jajdeeUzg4tCGvo,325
+pytz/zoneinfo/Australia/Eucla,sha256=LxEuFWyMse_cALVtRWCkf6sIIEk13jQ4JXW8k2agSd8,470
+pytz/zoneinfo/Australia/Hobart,sha256=GLQSzgIfsWxOvmKOrhpfofWqINQf6h36NYy3mcq6gcg,2358
+pytz/zoneinfo/Australia/LHI,sha256=Luf0Lx_iJHuh3kZd4LxRjf36tLF5-wW2UFMVNKNT7gg,1860
+pytz/zoneinfo/Australia/Lindeman,sha256=xM6Udx22oLNoLR1Y7GQhHOYov8nw3xQNqgc_NVQ2JK4,475
+pytz/zoneinfo/Australia/Lord_Howe,sha256=Luf0Lx_iJHuh3kZd4LxRjf36tLF5-wW2UFMVNKNT7gg,1860
+pytz/zoneinfo/Australia/Melbourne,sha256=lvx_MQcunMc6u2smIrl8X427bLsXvjkgpCSdjYCTNBM,2190
+pytz/zoneinfo/Australia/NSW,sha256=QsOFdYWxbbL4_9R7oZ-qYPRzNA3o1P6TIOp76GFgWQY,2190
+pytz/zoneinfo/Australia/North,sha256=fn0IZhIW98FAnzLig-_GBtW5LA54jajdeeUzg4tCGvo,325
+pytz/zoneinfo/Australia/Perth,sha256=Al1DOUh4U_ofMUQSeVlzSyD3x7SUjP9dchSaBUGmeWg,446
+pytz/zoneinfo/Australia/Queensland,sha256=eW6Qzze2t0-speJmmvt1JMzbkSadIKdE84XHc7JUtGc,419
+pytz/zoneinfo/Australia/South,sha256=ld2EbxU75oVgmPe703z-I6aqLg0Kmv62ZcCGzkT5R20,2208
+pytz/zoneinfo/Australia/Sydney,sha256=QsOFdYWxbbL4_9R7oZ-qYPRzNA3o1P6TIOp76GFgWQY,2190
+pytz/zoneinfo/Australia/Tasmania,sha256=GLQSzgIfsWxOvmKOrhpfofWqINQf6h36NYy3mcq6gcg,2358
+pytz/zoneinfo/Australia/Victoria,sha256=lvx_MQcunMc6u2smIrl8X427bLsXvjkgpCSdjYCTNBM,2190
+pytz/zoneinfo/Australia/West,sha256=Al1DOUh4U_ofMUQSeVlzSyD3x7SUjP9dchSaBUGmeWg,446
+pytz/zoneinfo/Australia/Yancowinna,sha256=3k_3ljTvS5GSfo7Xh6w71UgR3aAwYPBsnCJ-mlEYCqQ,2229
+pytz/zoneinfo/Brazil/Acre,sha256=17onkm8P_VgMkErjK9rr0qwNni7qp9tgcUZ93g3ltOs,628
+pytz/zoneinfo/Brazil/DeNoronha,sha256=3R4lLV8jg5SljhC5OVVCk51Y77Efjo6zCe-oppg_FFo,716
+pytz/zoneinfo/Brazil/East,sha256=cO3VGekMGdSf1y4f_UgkpDMRes26-l1oGUoDglIiUQg,1444
+pytz/zoneinfo/Brazil/West,sha256=lp6RlkcXJQ7mSsKqnEgC8svJVrFDJk_16xxvfpNSpK4,604
+pytz/zoneinfo/CET,sha256=o4omkrM_IsITxooUo8krM921XfBdvRs9JhwGXGd-Ypg,2094
+pytz/zoneinfo/CST6CDT,sha256=WGbtZ1FwjRX6Jeo_TCXKsfeDs4V9uhXGJfcnLJhk3s0,2310
+pytz/zoneinfo/Canada/Atlantic,sha256=TZpmc5PwWoLfTfQoQ_b3U17BE2iVKSeNkR0Ho8mbTn8,3424
+pytz/zoneinfo/Canada/Central,sha256=7P-_YQrneFcon7QKSTOnkiGjEppFDn3Z48MJ1qq8VBw,2868
+pytz/zoneinfo/Canada/Eastern,sha256=ggOSzbHkmfgu9wTQzP0MUKsrKMbgveuAeThh1eFl1a0,3494
+pytz/zoneinfo/Canada/Mountain,sha256=-TkIfc3QlvaCf0p8COZ43Y1HRBAl-nARUi-JdXeK1vE,2332
+pytz/zoneinfo/Canada/Newfoundland,sha256=r1-17uKv27eZ3JsVkw_DLZQbo6wvjuuVu7C2pDsmOgI,3655
+pytz/zoneinfo/Canada/Pacific,sha256=sknKH0jSPWam-DHfM35qXs8Nam7d5TFlkUI9Sgxryyg,2892
+pytz/zoneinfo/Canada/Saskatchewan,sha256=yjqT08pHbICYe83H8JmtaDBvCFqRv7Tfze3Y8xuXukw,980
+pytz/zoneinfo/Canada/Yukon,sha256=Kfv607qGHJxXGBP1nPJyNg2_duWrmxhZGFQr82ukgq8,1614
+pytz/zoneinfo/Chile/Continental,sha256=yxF9edZctGSER6k5w2e7R16COtd5AABVe0QW79Vgo6Y,2529
+pytz/zoneinfo/Chile/EasterIsland,sha256=paHp1QRXIa02kgd0-4V6vWXdqcwheow-hJQD9VqacfQ,2233
+pytz/zoneinfo/Cuba,sha256=HUQeAuKBsEkI5SLZjqynXICOUVOajkKzKH5r-Ov5Odc,2416
+pytz/zoneinfo/EET,sha256=gGVsW5-qnI7ty8vqVK1ADWhunrvAT8kUC79GUf-_7G8,1908
+pytz/zoneinfo/EST,sha256=uKE_VPKfxGyYEsyqV_DdE2MW55vs_qUioOdIn5Goobc,114
+pytz/zoneinfo/EST5EDT,sha256=fwzEMT1jgnY2dDjd0EqDl26_7LC-oF48Bd4ng5311H0,2310
+pytz/zoneinfo/Egypt,sha256=L6zLQLnQtLkEELOGfm6USaHY33qAEPgGV822-iU1vxc,1955
+pytz/zoneinfo/Eire,sha256=-JSA3vsi44F1DE8supVjSppH2Vpp12WjJI0_COtAmqU,3492
+pytz/zoneinfo/Etc/GMT,sha256=bZ83iIPAefhsA4elVHqSxEmGnYBuB94QCEqwTwJJAY0,114
+pytz/zoneinfo/Etc/GMT+0,sha256=bZ83iIPAefhsA4elVHqSxEmGnYBuB94QCEqwTwJJAY0,114
+pytz/zoneinfo/Etc/GMT+1,sha256=1Qzl2X9rQ_RXEf11yH09wQZCr_ph6UdFP7E0yu9s-IQ,116
+pytz/zoneinfo/Etc/GMT+10,sha256=JEQyQyQlkC0o6ZTdeVjZhCIOh6cK5TF7H00Pkls-sUI,117
+pytz/zoneinfo/Etc/GMT+11,sha256=tWvcvYMFCaE60nJVvDrrov7stJvs1KQYOyrhl3dzcUs,117
+pytz/zoneinfo/Etc/GMT+12,sha256=b70HEhErq8IJmq8x7cOZy4eR__3fq5uHHpjvPBEHqMA,117
+pytz/zoneinfo/Etc/GMT+2,sha256=T6Ep5zhslBKbYaECFUB6gUKh3iTZPyMoW1kjhonxrUo,116
+pytz/zoneinfo/Etc/GMT+3,sha256=QGoYrE04bUJ-OzL37dt2MZT5FxWNLpJDPVXgJbstYZA,116
+pytz/zoneinfo/Etc/GMT+4,sha256=RWrkNki-wV7X-coe0VvufBe6LrWVpkPJgia5QQYEnBo,116
+pytz/zoneinfo/Etc/GMT+5,sha256=oRmeC41dgYXT-zzyZIRKXN9IvdL2Da5nTuwmG2_prIA,116
+pytz/zoneinfo/Etc/GMT+6,sha256=d6dAnwiejyFI2n7AzFlFW0aFAT6zYNEjBIEG0uu0sbQ,116
+pytz/zoneinfo/Etc/GMT+7,sha256=TqjYbzd0YHpx1wisFg08J19wTpg6ztJLLongZY_lozs,116
+pytz/zoneinfo/Etc/GMT+8,sha256=th_8bIMmYgRPCesBrbmBhRr0jQO7whd70LiY9HfwJyk,116
+pytz/zoneinfo/Etc/GMT+9,sha256=Qq5E6iUS7JMJIymT7YoqlI8MtqtVy0mr9t6zWFtWc9Y,116
+pytz/zoneinfo/Etc/GMT-0,sha256=bZ83iIPAefhsA4elVHqSxEmGnYBuB94QCEqwTwJJAY0,114
+pytz/zoneinfo/Etc/GMT-1,sha256=73F1eU8uAQGP3mcoB2q99CjfManGFHk3fefljp9pYC4,117
+pytz/zoneinfo/Etc/GMT-10,sha256=fKWWNwLBOp1OkKjtc1w9LIXJR1mTTD-JdvYflRy1IrU,118
+pytz/zoneinfo/Etc/GMT-11,sha256=D2S79n6psa9t9_2vj5wIrFpHH2OJLcCKP6vtwzFZINY,118
+pytz/zoneinfo/Etc/GMT-12,sha256=me4V6lmWI8gSr8H7N41WAD0Eww1anh_EF34Qr9UoSnI,118
+pytz/zoneinfo/Etc/GMT-13,sha256=xbmbG1BQA6Dlpa_iUwEGyJxW4a3t6lmawdPKAE8vbR8,118
+pytz/zoneinfo/Etc/GMT-14,sha256=PpXoREBh02qFpvxVMj2pV9IAzSQvBE7XPvnN9qSZ-Kc,118
+pytz/zoneinfo/Etc/GMT-2,sha256=ve6hWLdeuiLhqagaWLqMD6HNybS1chRwjudfTZ2bYBE,117
+pytz/zoneinfo/Etc/GMT-3,sha256=N77jILanuLDVkLsdujXZSu-dsHiwN5MIpwh7fMUifso,117
+pytz/zoneinfo/Etc/GMT-4,sha256=LSko5fVHqPl5zfwjGqkbMa_OFnvtpT6o_4xYxNz9n5o,117
+pytz/zoneinfo/Etc/GMT-5,sha256=uLaSR5Mb18HRTsAA5SveY9PAJ97dO8QzIWqNXe3wZb4,117
+pytz/zoneinfo/Etc/GMT-6,sha256=JSN-RUAphJ50fpIv7cYC6unrtrz9S1Wma-piDHlGe7c,117
+pytz/zoneinfo/Etc/GMT-7,sha256=vVAOF8xU9T9ESnw68c0SFXpcvkoopaiwTR0zbefHHSU,117
+pytz/zoneinfo/Etc/GMT-8,sha256=S7xFQbFMpiDZy4v5L4D9fCrjRIzzoLC5p8Se23xi7us,117
+pytz/zoneinfo/Etc/GMT-9,sha256=I5vHNmUK-Yyg_S1skFN44VGVzBgktjFgVQiDIKO4aMI,117
+pytz/zoneinfo/Etc/GMT0,sha256=bZ83iIPAefhsA4elVHqSxEmGnYBuB94QCEqwTwJJAY0,114
+pytz/zoneinfo/Etc/Greenwich,sha256=bZ83iIPAefhsA4elVHqSxEmGnYBuB94QCEqwTwJJAY0,114
+pytz/zoneinfo/Etc/UCT,sha256=i4WEZ5GrLIpUY8g6W-PAQ-JXDXRIQ01BOYlp7Ufj5vI,114
+pytz/zoneinfo/Etc/UTC,sha256=i4WEZ5GrLIpUY8g6W-PAQ-JXDXRIQ01BOYlp7Ufj5vI,114
+pytz/zoneinfo/Etc/Universal,sha256=i4WEZ5GrLIpUY8g6W-PAQ-JXDXRIQ01BOYlp7Ufj5vI,114
+pytz/zoneinfo/Etc/Zulu,sha256=i4WEZ5GrLIpUY8g6W-PAQ-JXDXRIQ01BOYlp7Ufj5vI,114
+pytz/zoneinfo/Europe/Amsterdam,sha256=pw8HngVt3bU5QrRzu70qOmf69TIyklkglvVUte9ntKo,2910
+pytz/zoneinfo/Europe/Andorra,sha256=gTB5jCQmvIw3JJi1_vAcOYuhtzPBR6RXUx9gVV6p6ug,1742
+pytz/zoneinfo/Europe/Astrakhan,sha256=ywtzL92KVfoybOmAhE9eHqmMcvJZm5b0js5GDdWIJEQ,1165
+pytz/zoneinfo/Europe/Athens,sha256=XDY-FBUddRyQHN8GxQLZ4awjuOlWlzlUdjv7OdXFNzA,2262
+pytz/zoneinfo/Europe/Belfast,sha256=xp08wV44TZMmAdBqppttDChQAb8tRN03GcEht99RYtY,3648
+pytz/zoneinfo/Europe/Belgrade,sha256=OpWtsGFWBE_S-mYoQcAmjCta9HwbGQANnSmVY9OHCTo,1920
+pytz/zoneinfo/Europe/Berlin,sha256=XuR19xoPwaMvrrhJ-MOcbnqmbW1B7HQrl7OnQ2s7BwE,2298
+pytz/zoneinfo/Europe/Bratislava,sha256=G9fdhUXmzx651BnyZ6V7AOYIV9EV5aMJMm44eJaLLZw,2301
+pytz/zoneinfo/Europe/Brussels,sha256=gS9Vrrbozend9HhuFetCVrIegs9fXSjaG60X2UVwysA,2933
+pytz/zoneinfo/Europe/Bucharest,sha256=nfg6-bU2D6DMEWb9EMIBR5kxnNsbDSx0UKfHH_ZzqFc,2184
+pytz/zoneinfo/Europe/Budapest,sha256=lNwqxWciBvw9ei81VQwIKHbC_ZDJjpgHU6HFg4wCUkY,2368
+pytz/zoneinfo/Europe/Busingen,sha256=K5QY7Ujj2VUchKR4bhhb0hgdAJhmwED71ykXDQOGKe8,1909
+pytz/zoneinfo/Europe/Chisinau,sha256=p1J_rqFE13pL8cpBRrEFe-teCI8f0fKK4uTUy_4diF4,2390
+pytz/zoneinfo/Europe/Copenhagen,sha256=q7iAbkd7y9QvbAi6XGZEUOTwNDCRYWRu9VQCxUrZ01U,2137
+pytz/zoneinfo/Europe/Dublin,sha256=-JSA3vsi44F1DE8supVjSppH2Vpp12WjJI0_COtAmqU,3492
+pytz/zoneinfo/Europe/Gibraltar,sha256=egOcazf2u1njGZ0tDj-f1NzZT_K5rpUKSqtShxO7U6c,3052
+pytz/zoneinfo/Europe/Guernsey,sha256=xp08wV44TZMmAdBqppttDChQAb8tRN03GcEht99RYtY,3648
+pytz/zoneinfo/Europe/Helsinki,sha256=GEkB7LsVhmegt7YuuWheCDvDGC7b7Nw9bTdDGS9qkJc,1900
+pytz/zoneinfo/Europe/Isle_of_Man,sha256=xp08wV44TZMmAdBqppttDChQAb8tRN03GcEht99RYtY,3648
+pytz/zoneinfo/Europe/Istanbul,sha256=2S0A_f7VxvyErJMMCPqK33AChA29IVkMr1o-SpMtMxk,1947
+pytz/zoneinfo/Europe/Jersey,sha256=xp08wV44TZMmAdBqppttDChQAb8tRN03GcEht99RYtY,3648
+pytz/zoneinfo/Europe/Kaliningrad,sha256=s7GXSe1YvMcs7AiUhHNTA6I4nAOQn_Kmz_ZqJYO-LMM,1493
+pytz/zoneinfo/Europe/Kiev,sha256=-wrpG9jPuIKFP1NgBVvnxsMRf9L_h5z3J6Q3jj1AwNM,2120
+pytz/zoneinfo/Europe/Kirov,sha256=Sr4HEUwk3tPTXioeCLhvlgKbCAFU7Gy2UB3f--uWLDc,1153
+pytz/zoneinfo/Europe/Lisbon,sha256=mpUpxGexMhbOBImDLSQs5-GAk7pm7tg4qYW044Kkle0,3497
+pytz/zoneinfo/Europe/Ljubljana,sha256=OpWtsGFWBE_S-mYoQcAmjCta9HwbGQANnSmVY9OHCTo,1920
+pytz/zoneinfo/Europe/London,sha256=xp08wV44TZMmAdBqppttDChQAb8tRN03GcEht99RYtY,3648
+pytz/zoneinfo/Europe/Luxembourg,sha256=974Dvf_X1QISKG1zIiTJJIfGavobO21HUVS-HfysOcY,2946
+pytz/zoneinfo/Europe/Madrid,sha256=MTTMnrbDDtexRikd72-FbQEpCZjc63_UtBIiDomD95c,2614
+pytz/zoneinfo/Europe/Malta,sha256=xRwBfrV8hOihGtqcek5_B6l5hjc206g3yfbEWXIaUis,2620
+pytz/zoneinfo/Europe/Mariehamn,sha256=GEkB7LsVhmegt7YuuWheCDvDGC7b7Nw9bTdDGS9qkJc,1900
+pytz/zoneinfo/Europe/Minsk,sha256=mn86zdrNWpJYDfE51Iy9n1-Zi2piTyb9EPaS2A-uGJQ,1321
+pytz/zoneinfo/Europe/Monaco,sha256=50uVZXYXXqfnr-K4tsSNl26CZbRju65C-STp818wX84,2944
+pytz/zoneinfo/Europe/Moscow,sha256=KmkofRcj6T8Ph28PJChm8JVp13uRvef6TZ0GuPzUiDw,1535
+pytz/zoneinfo/Europe/Nicosia,sha256=0Unm0IFT7HyGeQ7F3vTa_-klfysCgrulqFO6BD1plZU,2002
+pytz/zoneinfo/Europe/Oslo,sha256=UdCERhj1JYpx3ojmilaRoyVoR4qMA1-PEv6hGwnpsJA,2228
+pytz/zoneinfo/Europe/Paris,sha256=q3ehSIot1GZ6TyMHIjbg0oRf4ghAXuwbSDSYVim6evg,2962
+pytz/zoneinfo/Europe/Podgorica,sha256=OpWtsGFWBE_S-mYoQcAmjCta9HwbGQANnSmVY9OHCTo,1920
+pytz/zoneinfo/Europe/Prague,sha256=G9fdhUXmzx651BnyZ6V7AOYIV9EV5aMJMm44eJaLLZw,2301
+pytz/zoneinfo/Europe/Riga,sha256=hJ2_0m1taW9IuA-hMyP5n-WX7YOrR0heKszJhgljRWk,2198
+pytz/zoneinfo/Europe/Rome,sha256=-X5F_d3Dz0kBRWiUTXUN-fgeCHbUEHLaaHIwEPZEdUQ,2641
+pytz/zoneinfo/Europe/Samara,sha256=z2innqSZ8_lkEy8cIyF9JM_FfnO2sWZaqeFqOh8pD7M,1215
+pytz/zoneinfo/Europe/San_Marino,sha256=-X5F_d3Dz0kBRWiUTXUN-fgeCHbUEHLaaHIwEPZEdUQ,2641
+pytz/zoneinfo/Europe/Sarajevo,sha256=OpWtsGFWBE_S-mYoQcAmjCta9HwbGQANnSmVY9OHCTo,1920
+pytz/zoneinfo/Europe/Saratov,sha256=BMej49HlQG24CWCh5VOENrB3jPuJPScPszRtb7MrJ3I,1183
+pytz/zoneinfo/Europe/Simferopol,sha256=W1y6rp4xKAT0-RR7KyoHEUVxjhtEk8zuGvsmXhzXKz4,1469
+pytz/zoneinfo/Europe/Skopje,sha256=OpWtsGFWBE_S-mYoQcAmjCta9HwbGQANnSmVY9OHCTo,1920
+pytz/zoneinfo/Europe/Sofia,sha256=hCQKXfMNrnA5xHNw_uzTjKzVw4-Bvsq5oGO4yUCv5tY,2077
+pytz/zoneinfo/Europe/Stockholm,sha256=Xgp4GSh8-pzdeJeP8TQ20jWDDUj17R69h6RYTbLYd2g,1909
+pytz/zoneinfo/Europe/Tallinn,sha256=4a6JC0aIpMzqIV7O35zoG0LLJwkQq5AoXZ2ivkic6-w,2148
+pytz/zoneinfo/Europe/Tirane,sha256=ztlZyCS9WCXeVW8nBun3Tyi5HUY0EtFbiBbEc1gucuw,2084
+pytz/zoneinfo/Europe/Tiraspol,sha256=p1J_rqFE13pL8cpBRrEFe-teCI8f0fKK4uTUy_4diF4,2390
+pytz/zoneinfo/Europe/Ulyanovsk,sha256=nFsgcVTmTiiFzHtyJDRnO-3H4GRAfAeceb6b2jFHLUQ,1267
+pytz/zoneinfo/Europe/Uzhgorod,sha256=msjVPUQsr1jTQ7KLWASRFpPIORKyzM_HpAWjAdk0tGo,2066
+pytz/zoneinfo/Europe/Vaduz,sha256=K5QY7Ujj2VUchKR4bhhb0hgdAJhmwED71ykXDQOGKe8,1909
+pytz/zoneinfo/Europe/Vatican,sha256=-X5F_d3Dz0kBRWiUTXUN-fgeCHbUEHLaaHIwEPZEdUQ,2641
+pytz/zoneinfo/Europe/Vienna,sha256=ZmI3kADE6bnrJEccqh73XXBY36L1G4DkpiTQImtNrUk,2200
+pytz/zoneinfo/Europe/Vilnius,sha256=UFzRX3orCTB8d9IzlxJPy5eUA2oBPuCu1UJl-2D7C3U,2162
+pytz/zoneinfo/Europe/Volgograd,sha256=XZNEUXwnmGdOTld_9Lug2CFfXbFCJFZC45nOMb59FRk,1165
+pytz/zoneinfo/Europe/Warsaw,sha256=TiLDPbeVF0ckgLVEkaSeDaKZ8wctdJDOl_HE_Wd5rKs,2654
+pytz/zoneinfo/Europe/Zagreb,sha256=OpWtsGFWBE_S-mYoQcAmjCta9HwbGQANnSmVY9OHCTo,1920
+pytz/zoneinfo/Europe/Zaporozhye,sha256=bbzGnKLNgoB7wKJxLQjLIx_wHzISSdMrnbEZluO2GfA,2138
+pytz/zoneinfo/Europe/Zurich,sha256=K5QY7Ujj2VUchKR4bhhb0hgdAJhmwED71ykXDQOGKe8,1909
+pytz/zoneinfo/Factory,sha256=aFFlKx93HXoJoF4SSuTlD8cZtJA-ne5oKzAa6eX2V4k,116
+pytz/zoneinfo/GB,sha256=xp08wV44TZMmAdBqppttDChQAb8tRN03GcEht99RYtY,3648
+pytz/zoneinfo/GB-Eire,sha256=xp08wV44TZMmAdBqppttDChQAb8tRN03GcEht99RYtY,3648
+pytz/zoneinfo/GMT,sha256=bZ83iIPAefhsA4elVHqSxEmGnYBuB94QCEqwTwJJAY0,114
+pytz/zoneinfo/GMT+0,sha256=bZ83iIPAefhsA4elVHqSxEmGnYBuB94QCEqwTwJJAY0,114
+pytz/zoneinfo/GMT-0,sha256=bZ83iIPAefhsA4elVHqSxEmGnYBuB94QCEqwTwJJAY0,114
+pytz/zoneinfo/GMT0,sha256=bZ83iIPAefhsA4elVHqSxEmGnYBuB94QCEqwTwJJAY0,114
+pytz/zoneinfo/Greenwich,sha256=bZ83iIPAefhsA4elVHqSxEmGnYBuB94QCEqwTwJJAY0,114
+pytz/zoneinfo/HST,sha256=1YkCncvgL9Z5CmUo4Vk8VbQmgA7ZAQ0PtE37j1yOli8,115
+pytz/zoneinfo/Hongkong,sha256=UcnFEc9S8hMWl9giVXni4TAhLPWX0H12XvwSt4AJHew,1203
+pytz/zoneinfo/Iceland,sha256=mSkaRBGZLeUrm88EeHcaWnEd35Wn-Ag2G10HtI3G2fg,1162
+pytz/zoneinfo/Indian/Antananarivo,sha256=yJsuJTqJJqbOz37_NOS_zbf-JNr_IthHGMMN7sDqSWg,265
+pytz/zoneinfo/Indian/Chagos,sha256=23B26pwwK0gxW7TP76GltyY-RU_o6RGGSrF93pF7S1E,199
+pytz/zoneinfo/Indian/Christmas,sha256=J4I0WDX_LYAJxsx2vU0EdxFJQKRE-rRL1UvNQv09pCs,165
+pytz/zoneinfo/Indian/Cocos,sha256=PX-k8JpghajjvhljtBjWozaiu9NhUSpVeoACy2cAxN8,174
+pytz/zoneinfo/Indian/Comoro,sha256=yJsuJTqJJqbOz37_NOS_zbf-JNr_IthHGMMN7sDqSWg,265
+pytz/zoneinfo/Indian/Kerguelen,sha256=oIvd6bmQFMLUefoBn4c1fQTOAawGcrPcmge2jU7BsYo,165
+pytz/zoneinfo/Indian/Mahe,sha256=ZNXjaoL_o6572xXgsgSmbd5D_SkaCaayolpSN1je82w,165
+pytz/zoneinfo/Indian/Maldives,sha256=dUQBbrmoB3odWsMt3K1YUnB447A6nkW3aR1aHzdLF7M,199
+pytz/zoneinfo/Indian/Mauritius,sha256=k6vWUVcfU3gS1K12e_aMw6BeSdMvdLyCJRCAL7CD0go,241
+pytz/zoneinfo/Indian/Mayotte,sha256=yJsuJTqJJqbOz37_NOS_zbf-JNr_IthHGMMN7sDqSWg,265
+pytz/zoneinfo/Indian/Reunion,sha256=lHnSVh7CYCuDBEM4dYsWDk006BSAznkCPxjiTtL_WiI,165
+pytz/zoneinfo/Iran,sha256=ATT50Q0hK6uSba5_WnOE3Px0OWxIwxaqK5Oi10P2A-M,2582
+pytz/zoneinfo/Israel,sha256=JUuWQmW5Tha0pJjw61Q5aN7CX0z4D7ops9OOSnda6Dc,2388
+pytz/zoneinfo/Jamaica,sha256=wlagieUPRf5-beie-h7QsONbNzjGsm8vMs8uf28pw28,482
+pytz/zoneinfo/Japan,sha256=oCueZgRNxcNcX3ZGdif9y6Su4cyVhga4XHdwlcrYLOs,309
+pytz/zoneinfo/Kwajalein,sha256=L4nH3qxv5EBKVRxYt67b9IfZfBzg5KJk19iu7x3oBMk,316
+pytz/zoneinfo/Libya,sha256=W1dptGD70T7ppGoo0fczFQeDiIp0nultLNPV66MwB2c,625
+pytz/zoneinfo/MET,sha256=i3CKSuP4N_PAj7o-Cbk8zPEdFs0CWWBCAfg2JXDx5V8,2094
+pytz/zoneinfo/MST,sha256=6IQwvtT12Bz1pTiqFuoVxNY-4ViS7ZrYHo5nPWwzKPw,114
+pytz/zoneinfo/MST7MDT,sha256=910Ek32FKoSyZWY_H19VHaVvqb-JsvnWTOOHvhrKsE0,2310
+pytz/zoneinfo/Mexico/BajaNorte,sha256=OHHtvy3J70z6wvKBHgPqMEnGs6SXp8fkf0WX9ZiOODk,2342
+pytz/zoneinfo/Mexico/BajaSur,sha256=aIyre-8trAXSHtqxbuu6gDDkWCUjI_SdAKPIjz74M2E,1526
+pytz/zoneinfo/Mexico/General,sha256=DSpTe5TT0KBsxGx79Rs7ah-zJpiGOJKwPjztovRN0b4,1584
+pytz/zoneinfo/NZ,sha256=gADjoyPo_QISQU6UJrAgcHp3HDaMoOFRdH-d23uBSyc,2437
+pytz/zoneinfo/NZ-CHAT,sha256=lkVqaSF1WWpv_B2K-k2uJp2setRVK6XbjsQ38gDGVEg,2068
+pytz/zoneinfo/Navajo,sha256=6_yPo1_mvnt9DgpPzr0QdHsjdsfUG6ALnagQLML1DSM,2444
+pytz/zoneinfo/PRC,sha256=ZP_C5DqUQ1oEPAQNHTr36S0DGtx453N68YYbqk7u8-Y,561
+pytz/zoneinfo/PST8PDT,sha256=Q7TCLkE69a6g7mPoPAkqhg-0dStyiAC0jVlM72KG_R8,2310
+pytz/zoneinfo/Pacific/Apia,sha256=cm6S6D0VdHsdqLJkupUJH6pLynao5QlwpMmRI9m5ZH4,612
+pytz/zoneinfo/Pacific/Auckland,sha256=gADjoyPo_QISQU6UJrAgcHp3HDaMoOFRdH-d23uBSyc,2437
+pytz/zoneinfo/Pacific/Bougainville,sha256=ZKDa_S_2gSlmOWizV1DqxH3wbE58rfK1vKZHZqrrtjI,268
+pytz/zoneinfo/Pacific/Chatham,sha256=lkVqaSF1WWpv_B2K-k2uJp2setRVK6XbjsQ38gDGVEg,2068
+pytz/zoneinfo/Pacific/Chuuk,sha256=6IYDKViuRDC_RVx1AJOxazVET6cZtdv_LFE6xbtGItI,269
+pytz/zoneinfo/Pacific/Easter,sha256=paHp1QRXIa02kgd0-4V6vWXdqcwheow-hJQD9VqacfQ,2233
+pytz/zoneinfo/Pacific/Efate,sha256=pG4NMVeM3hBJTZnZmqeLqz3Q5oCggTW4HO-R9Fe926A,538
+pytz/zoneinfo/Pacific/Enderbury,sha256=UvE7fVt5vGS7loKX10ibhNilliiNqwvQAXV9NRhYhgM,234
+pytz/zoneinfo/Pacific/Fakaofo,sha256=gow-SgE5r5c8J_Ag5nvJ5SUPDg6yH8pth_a-QLDcPv8,200
+pytz/zoneinfo/Pacific/Fiji,sha256=6gQaENzYya4ThAYG-mNkBdUo3pweeqPi3Q-9i7JTjaE,1049
+pytz/zoneinfo/Pacific/Funafuti,sha256=P-XYwlWQpWvS3Q_TYFe37BrgxKJy5tg7PHEQNCDGv5U,166
+pytz/zoneinfo/Pacific/Galapagos,sha256=MdtlC-ffp8reICzDxsQ8tWMsTkq5ZcN-j3OyyhjokV8,238
+pytz/zoneinfo/Pacific/Gambier,sha256=z6eYF8sszLjkfpqmWnbBBAUB-ibaR5nodKaAYbvXOe0,164
+pytz/zoneinfo/Pacific/Guadalcanal,sha256=6GX-XpxcCyA64qUMdxJMFMq4sPk0ZjhexqGbryzfgjE,166
+pytz/zoneinfo/Pacific/Guam,sha256=Ex9znmf6rNfGze6gNpZJCMr1TT4rkl2SnrhecrdJufI,494
+pytz/zoneinfo/Pacific/Honolulu,sha256=fwPRv1Jk56sCOi75uZfd_Iy2k2aSQHx3B2K5xUlSPzM,329
+pytz/zoneinfo/Pacific/Johnston,sha256=fwPRv1Jk56sCOi75uZfd_Iy2k2aSQHx3B2K5xUlSPzM,329
+pytz/zoneinfo/Pacific/Kanton,sha256=UvE7fVt5vGS7loKX10ibhNilliiNqwvQAXV9NRhYhgM,234
+pytz/zoneinfo/Pacific/Kiritimati,sha256=VHR3iuwiv3tx65WtitVHCoQEg3VJd812VZ5djuSyUxc,238
+pytz/zoneinfo/Pacific/Kosrae,sha256=Vm5AKI6NvuYSz58s8922WNIiWoqPcix2JOJOix1mlSU,351
+pytz/zoneinfo/Pacific/Kwajalein,sha256=L4nH3qxv5EBKVRxYt67b9IfZfBzg5KJk19iu7x3oBMk,316
+pytz/zoneinfo/Pacific/Majuro,sha256=Dwqh7gXoz7Duwu1n7XF8yEjhM4ULEs42LSQyy7F-qzQ,310
+pytz/zoneinfo/Pacific/Marquesas,sha256=uzsjVolutGRXp_FRnvXoU0ApDEb4ZaYoz_r60D7jufg,173
+pytz/zoneinfo/Pacific/Midway,sha256=fCYrYphYY6rUfxOw712y5cyRe104AC3pouqD3bCINFg,175
+pytz/zoneinfo/Pacific/Nauru,sha256=oGxocYsqssZ_EeQHf3cUP5cg0qtqzx1BzoEjVWjE_7g,252
+pytz/zoneinfo/Pacific/Niue,sha256=Kc0BRgsu7g2QTR9e37DuqRo1sUCWDFMowAQ4wO6YNQ0,203
+pytz/zoneinfo/Pacific/Norfolk,sha256=CdEXM9SKYC9Wn7aMxD2sV5i8zE88NQo25Z_L874JthI,880
+pytz/zoneinfo/Pacific/Noumea,sha256=FSanpAOCE7WHQeiop4QErKV9ZC3Tzu2GxkH8-tIXsHY,304
+pytz/zoneinfo/Pacific/Pago_Pago,sha256=fCYrYphYY6rUfxOw712y5cyRe104AC3pouqD3bCINFg,175
+pytz/zoneinfo/Pacific/Palau,sha256=CRW__McXPlOaxo2S9kHMHaBdjv7u59ZWEwYuJConzmQ,180
+pytz/zoneinfo/Pacific/Pitcairn,sha256=O65Ed1FOCF_0rEjpYPAquDwtAF3hxyJNiujgpgZV0kc,202
+pytz/zoneinfo/Pacific/Pohnpei,sha256=YqXrKwjhUnxWyV6PFg1L6_zu84MfPW82dypf0S7pHtQ,303
+pytz/zoneinfo/Pacific/Ponape,sha256=YqXrKwjhUnxWyV6PFg1L6_zu84MfPW82dypf0S7pHtQ,303
+pytz/zoneinfo/Pacific/Port_Moresby,sha256=ei_XjmiRDLh-RU94uvz9CCIIRFH1r0X7WL-sB-6DF60,186
+pytz/zoneinfo/Pacific/Rarotonga,sha256=3ur0jiBQqU20VyKMI3bSfA-HBaQ-HhjElTqsHWk1kic,603
+pytz/zoneinfo/Pacific/Saipan,sha256=Ex9znmf6rNfGze6gNpZJCMr1TT4rkl2SnrhecrdJufI,494
+pytz/zoneinfo/Pacific/Samoa,sha256=fCYrYphYY6rUfxOw712y5cyRe104AC3pouqD3bCINFg,175
+pytz/zoneinfo/Pacific/Tahiti,sha256=9iozXRFYDhBOLijmDk2mRS4Mb-LXWW1u7n790jBNKxM,165
+pytz/zoneinfo/Pacific/Tarawa,sha256=vT6UxW7KeGptdh80Fj9ASATGmLx8Wai630lML4mwg80,166
+pytz/zoneinfo/Pacific/Tongatapu,sha256=b0TbbaYBUDEkPIpcS-EnIKCZ5KSg2HNOGIZJ9Pa8TEI,372
+pytz/zoneinfo/Pacific/Truk,sha256=6IYDKViuRDC_RVx1AJOxazVET6cZtdv_LFE6xbtGItI,269
+pytz/zoneinfo/Pacific/Wake,sha256=dTJxldgcad-kGrODwo4cAHGRSsS-K3fjeZ62WEUhmFk,166
+pytz/zoneinfo/Pacific/Wallis,sha256=CAlw1H5gkER5lkvtmHY-ppoGL3hNmYxfMaXQpI0fTOE,166
+pytz/zoneinfo/Pacific/Yap,sha256=6IYDKViuRDC_RVx1AJOxazVET6cZtdv_LFE6xbtGItI,269
+pytz/zoneinfo/Poland,sha256=TiLDPbeVF0ckgLVEkaSeDaKZ8wctdJDOl_HE_Wd5rKs,2654
+pytz/zoneinfo/Portugal,sha256=mpUpxGexMhbOBImDLSQs5-GAk7pm7tg4qYW044Kkle0,3497
+pytz/zoneinfo/ROC,sha256=DMmQwOpPql25ue3Nf8vAKKT4em06D1Z9rHbLIitxixk,761
+pytz/zoneinfo/ROK,sha256=LI9LsV3XcJC0l-KoQf8zI-y7rk-du57erS-N2Ptdi7Q,617
+pytz/zoneinfo/Singapore,sha256=hIgr_LHMTWh3GgeG-MmLHBp-9anUxQcfMlKFtX8WvmU,383
+pytz/zoneinfo/Turkey,sha256=2S0A_f7VxvyErJMMCPqK33AChA29IVkMr1o-SpMtMxk,1947
+pytz/zoneinfo/UCT,sha256=i4WEZ5GrLIpUY8g6W-PAQ-JXDXRIQ01BOYlp7Ufj5vI,114
+pytz/zoneinfo/US/Alaska,sha256=oZA1NSPS2BWdymYpnCHFO8BlYVS-ll5KLg2Ez9CbETs,2371
+pytz/zoneinfo/US/Aleutian,sha256=IB1DhwJQAKbhPJ9jHLf8zW5Dad7HIkBS-dhv64E1OlM,2356
+pytz/zoneinfo/US/Arizona,sha256=nEOwYOnGxENw9zW8m50PGxbtVfTrX3QYAo4x4LgOLfI,328
+pytz/zoneinfo/US/Central,sha256=4aZFw-svkMyXmSpNufqzK-xveos-oVJDpEyI8Yu9HQE,3576
+pytz/zoneinfo/US/East-Indiana,sha256=GrNub1_3Um5Qh67wOx58_TEAz4fwAeAlk2AlMTVA_sI,1666
+pytz/zoneinfo/US/Eastern,sha256=7AoiEGjr3wV4P7C4Qs35COZqwr2mjNDq7ocpsSPFOM8,3536
+pytz/zoneinfo/US/Hawaii,sha256=fwPRv1Jk56sCOi75uZfd_Iy2k2aSQHx3B2K5xUlSPzM,329
+pytz/zoneinfo/US/Indiana-Starke,sha256=BiALShjiOLg1o8mMRWJ1jyTlJkgvwzte7B9WSOvTUNg,2428
+pytz/zoneinfo/US/Michigan,sha256=hecz8yqY2Cj5B61G3gLZdAVZvRgK9l0P90c_gN-uD5g,2230
+pytz/zoneinfo/US/Mountain,sha256=6_yPo1_mvnt9DgpPzr0QdHsjdsfUG6ALnagQLML1DSM,2444
+pytz/zoneinfo/US/Pacific,sha256=VOy1PikdjiVdJ7lukVGzwl8uDxV_KYqznkTm5BLEiDM,2836
+pytz/zoneinfo/US/Samoa,sha256=fCYrYphYY6rUfxOw712y5cyRe104AC3pouqD3bCINFg,175
+pytz/zoneinfo/UTC,sha256=i4WEZ5GrLIpUY8g6W-PAQ-JXDXRIQ01BOYlp7Ufj5vI,114
+pytz/zoneinfo/Universal,sha256=i4WEZ5GrLIpUY8g6W-PAQ-JXDXRIQ01BOYlp7Ufj5vI,114
+pytz/zoneinfo/W-SU,sha256=KmkofRcj6T8Ph28PJChm8JVp13uRvef6TZ0GuPzUiDw,1535
+pytz/zoneinfo/WET,sha256=Sc0l03EfVs_aIi17I4KyZJFkwiAHat5BgpjuuFDhgQ0,1905
+pytz/zoneinfo/Zulu,sha256=i4WEZ5GrLIpUY8g6W-PAQ-JXDXRIQ01BOYlp7Ufj5vI,114
+pytz/zoneinfo/iso3166.tab,sha256=BMh_yY7MXp8DMEy71jarFX3IJSNpwuEyIjIo2HKUXD4,4463
+pytz/zoneinfo/leapseconds,sha256=XqJcaqBog5UPa6fd84zib9yPUBoo6g-DqsGUfxqB-sE,3392
+pytz/zoneinfo/tzdata.zi,sha256=Nl0BSbgc-gXNYuMGwhVECQ-44rgo0iL8LMjm3ZIGAzo,112793
+pytz/zoneinfo/zone.tab,sha256=S1ouKxk9vkeA_AEHFmw2kJPVuJTUzPK0YrSZLGgURK4,19419
+pytz/zoneinfo/zone1970.tab,sha256=UlmR6Qfjsqd0DkTzQExCWGTeOfbtP0ruBBfI4_MKKQI,17593
diff --git a/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/WHEEL b/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/WHEEL
new file mode 100644
index 0000000..ef99c6c
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/WHEEL
@@ -0,0 +1,6 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.34.2)
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any
+
diff --git a/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/top_level.txt b/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/top_level.txt
new file mode 100644
index 0000000..af44f19
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/top_level.txt
@@ -0,0 +1 @@
+pytz
diff --git a/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/zip-safe b/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/zip-safe
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz-2022.1.dist-info/zip-safe
@@ -0,0 +1 @@
+
diff --git a/.local/lib/python3.8/site-packages/pytz/__init__.py b/.local/lib/python3.8/site-packages/pytz/__init__.py
new file mode 100644
index 0000000..900e8ca
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz/__init__.py
@@ -0,0 +1,1559 @@
+'''
+datetime.tzinfo timezone definitions generated from the
+Olson timezone database:
+
+    ftp://elsie.nci.nih.gov/pub/tz*.tar.gz
+
+See the datetime section of the Python Library Reference for information
+on how to use these modules.
+'''
+
+import sys
+import datetime
+import os.path
+
+from pytz.exceptions import AmbiguousTimeError
+from pytz.exceptions import InvalidTimeError
+from pytz.exceptions import NonExistentTimeError
+from pytz.exceptions import UnknownTimeZoneError
+from pytz.lazy import LazyDict, LazyList, LazySet  # noqa
+from pytz.tzinfo import unpickler, BaseTzInfo
+from pytz.tzfile import build_tzinfo
+
+
+# The IANA (nee Olson) database is updated several times a year.
+OLSON_VERSION = '2022a'
+VERSION = '2022.1'  # pip compatible version number.
+__version__ = VERSION
+
+OLSEN_VERSION = OLSON_VERSION  # Old releases had this misspelling
+
+__all__ = [
+    'timezone', 'utc', 'country_timezones', 'country_names',
+    'AmbiguousTimeError', 'InvalidTimeError',
+    'NonExistentTimeError', 'UnknownTimeZoneError',
+    'all_timezones', 'all_timezones_set',
+    'common_timezones', 'common_timezones_set',
+    'BaseTzInfo', 'FixedOffset',
+]
+
+
+if sys.version_info[0] > 2:  # Python 3.x
+
+    # Python 3.x doesn't have unicode(), making writing code
+    # for Python 2.3 and Python 3.x a pain.
+    unicode = str
+
+    def ascii(s):
+        r"""
+        >>> ascii('Hello')
+        'Hello'
+        >>> ascii('\N{TRADE MARK SIGN}') #doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+            ...
+        UnicodeEncodeError: ...
+        """
+        if type(s) == bytes:
+            s = s.decode('ASCII')
+        else:
+            s.encode('ASCII')  # Raise an exception if not ASCII
+        return s  # But the string - not a byte string.
+
+else:  # Python 2.x
+
+    def ascii(s):
+        r"""
+        >>> ascii('Hello')
+        'Hello'
+        >>> ascii(u'Hello')
+        'Hello'
+        >>> ascii(u'\N{TRADE MARK SIGN}') #doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+            ...
+        UnicodeEncodeError: ...
+        """
+        return s.encode('ASCII')
+
+
+def open_resource(name):
+    """Open a resource from the zoneinfo subdir for reading.
+
+    Uses the pkg_resources module if available and no standard file
+    found at the calculated location.
+
+    It is possible to specify different location for zoneinfo
+    subdir by using the PYTZ_TZDATADIR environment variable.
+    """
+    name_parts = name.lstrip('/').split('/')
+    for part in name_parts:
+        if part == os.path.pardir or os.path.sep in part:
+            raise ValueError('Bad path segment: %r' % part)
+    zoneinfo_dir = os.environ.get('PYTZ_TZDATADIR', None)
+    if zoneinfo_dir is not None:
+        filename = os.path.join(zoneinfo_dir, *name_parts)
+    else:
+        filename = os.path.join(os.path.dirname(__file__),
+                                'zoneinfo', *name_parts)
+        if not os.path.exists(filename):
+            # http://bugs.launchpad.net/bugs/383171 - we avoid using this
+            # unless absolutely necessary to help when a broken version of
+            # pkg_resources is installed.
+            try:
+                from pkg_resources import resource_stream
+            except ImportError:
+                resource_stream = None
+
+            if resource_stream is not None:
+                return resource_stream(__name__, 'zoneinfo/' + name)
+    return open(filename, 'rb')
+
+
+def resource_exists(name):
+    """Return true if the given resource exists"""
+    try:
+        if os.environ.get('PYTZ_SKIPEXISTSCHECK', ''):
+            # In "standard" distributions, we can assume that
+            # all the listed timezones are present. As an
+            # import-speed optimization, you can set the
+            # PYTZ_SKIPEXISTSCHECK flag to skip checking
+            # for the presence of the resource file on disk.
+            return True
+        open_resource(name).close()
+        return True
+    except IOError:
+        return False
+
+
+_tzinfo_cache = {}
+
+
+def timezone(zone):
+    r''' Return a datetime.tzinfo implementation for the given timezone
+
+    >>> from datetime import datetime, timedelta
+    >>> utc = timezone('UTC')
+    >>> eastern = timezone('US/Eastern')
+    >>> eastern.zone
+    'US/Eastern'
+    >>> timezone(unicode('US/Eastern')) is eastern
+    True
+    >>> utc_dt = datetime(2002, 10, 27, 6, 0, 0, tzinfo=utc)
+    >>> loc_dt = utc_dt.astimezone(eastern)
+    >>> fmt = '%Y-%m-%d %H:%M:%S %Z (%z)'
+    >>> loc_dt.strftime(fmt)
+    '2002-10-27 01:00:00 EST (-0500)'
+    >>> (loc_dt - timedelta(minutes=10)).strftime(fmt)
+    '2002-10-27 00:50:00 EST (-0500)'
+    >>> eastern.normalize(loc_dt - timedelta(minutes=10)).strftime(fmt)
+    '2002-10-27 01:50:00 EDT (-0400)'
+    >>> (loc_dt + timedelta(minutes=10)).strftime(fmt)
+    '2002-10-27 01:10:00 EST (-0500)'
+
+    Raises UnknownTimeZoneError if passed an unknown zone.
+
+    >>> try:
+    ...     timezone('Asia/Shangri-La')
+    ... except UnknownTimeZoneError:
+    ...     print('Unknown')
+    Unknown
+
+    >>> try:
+    ...     timezone(unicode('\N{TRADE MARK SIGN}'))
+    ... except UnknownTimeZoneError:
+    ...     print('Unknown')
+    Unknown
+
+    '''
+    if zone is None:
+        raise UnknownTimeZoneError(None)
+
+    if zone.upper() == 'UTC':
+        return utc
+
+    try:
+        zone = ascii(zone)
+    except UnicodeEncodeError:
+        # All valid timezones are ASCII
+        raise UnknownTimeZoneError(zone)
+
+    zone = _case_insensitive_zone_lookup(_unmunge_zone(zone))
+    if zone not in _tzinfo_cache:
+        if zone in all_timezones_set:  # noqa
+            fp = open_resource(zone)
+            try:
+                _tzinfo_cache[zone] = build_tzinfo(zone, fp)
+            finally:
+                fp.close()
+        else:
+            raise UnknownTimeZoneError(zone)
+
+    return _tzinfo_cache[zone]
+
+
+def _unmunge_zone(zone):
+    """Undo the time zone name munging done by older versions of pytz."""
+    return zone.replace('_plus_', '+').replace('_minus_', '-')
+
+
+_all_timezones_lower_to_standard = None
+
+
+def _case_insensitive_zone_lookup(zone):
+    """case-insensitively matching timezone, else return zone unchanged"""
+    global _all_timezones_lower_to_standard
+    if _all_timezones_lower_to_standard is None:
+        _all_timezones_lower_to_standard = dict((tz.lower(), tz) for tz in all_timezones)  # noqa
+    return _all_timezones_lower_to_standard.get(zone.lower()) or zone  # noqa
+
+
+ZERO = datetime.timedelta(0)
+HOUR = datetime.timedelta(hours=1)
+
+
+class UTC(BaseTzInfo):
+    """UTC
+
+    Optimized UTC implementation. It unpickles using the single module global
+    instance defined beneath this class declaration.
+    """
+    zone = "UTC"
+
+    _utcoffset = ZERO
+    _dst = ZERO
+    _tzname = zone
+
+    def fromutc(self, dt):
+        if dt.tzinfo is None:
+            return self.localize(dt)
+        return super(utc.__class__, self).fromutc(dt)
+
+    def utcoffset(self, dt):
+        return ZERO
+
+    def tzname(self, dt):
+        return "UTC"
+
+    def dst(self, dt):
+        return ZERO
+
+    def __reduce__(self):
+        return _UTC, ()
+
+    def localize(self, dt, is_dst=False):
+        '''Convert naive time to local time'''
+        if dt.tzinfo is not None:
+            raise ValueError('Not naive datetime (tzinfo is already set)')
+        return dt.replace(tzinfo=self)
+
+    def normalize(self, dt, is_dst=False):
+        '''Correct the timezone information on the given datetime'''
+        if dt.tzinfo is self:
+            return dt
+        if dt.tzinfo is None:
+            raise ValueError('Naive time - no tzinfo set')
+        return dt.astimezone(self)
+
+    def __repr__(self):
+        return ""
+
+    def __str__(self):
+        return "UTC"
+
+
+UTC = utc = UTC()  # UTC is a singleton
+
+
+def _UTC():
+    """Factory function for utc unpickling.
+
+    Makes sure that unpickling a utc instance always returns the same
+    module global.
+
+    These examples belong in the UTC class above, but it is obscured; or in
+    the README.rst, but we are not depending on Python 2.4 so integrating
+    the README.rst examples with the unit tests is not trivial.
+
+    >>> import datetime, pickle
+    >>> dt = datetime.datetime(2005, 3, 1, 14, 13, 21, tzinfo=utc)
+    >>> naive = dt.replace(tzinfo=None)
+    >>> p = pickle.dumps(dt, 1)
+    >>> naive_p = pickle.dumps(naive, 1)
+    >>> len(p) - len(naive_p)
+    17
+    >>> new = pickle.loads(p)
+    >>> new == dt
+    True
+    >>> new is dt
+    False
+    >>> new.tzinfo is dt.tzinfo
+    True
+    >>> utc is UTC is timezone('UTC')
+    True
+    >>> utc is timezone('GMT')
+    False
+    """
+    return utc
+
+
+_UTC.__safe_for_unpickling__ = True
+
+
+def _p(*args):
+    """Factory function for unpickling pytz tzinfo instances.
+
+    Just a wrapper around tzinfo.unpickler to save a few bytes in each pickle
+    by shortening the path.
+    """
+    return unpickler(*args)
+
+
+_p.__safe_for_unpickling__ = True
+
+
+class _CountryTimezoneDict(LazyDict):
+    """Map ISO 3166 country code to a list of timezone names commonly used
+    in that country.
+
+    iso3166_code is the two letter code used to identify the country.
+
+    >>> def print_list(list_of_strings):
+    ...     'We use a helper so doctests work under Python 2.3 -> 3.x'
+    ...     for s in list_of_strings:
+    ...         print(s)
+
+    >>> print_list(country_timezones['nz'])
+    Pacific/Auckland
+    Pacific/Chatham
+    >>> print_list(country_timezones['ch'])
+    Europe/Zurich
+    >>> print_list(country_timezones['CH'])
+    Europe/Zurich
+    >>> print_list(country_timezones[unicode('ch')])
+    Europe/Zurich
+    >>> print_list(country_timezones['XXX'])
+    Traceback (most recent call last):
+    ...
+    KeyError: 'XXX'
+
+    Previously, this information was exposed as a function rather than a
+    dictionary. This is still supported::
+
+    >>> print_list(country_timezones('nz'))
+    Pacific/Auckland
+    Pacific/Chatham
+    """
+    def __call__(self, iso3166_code):
+        """Backwards compatibility."""
+        return self[iso3166_code]
+
+    def _fill(self):
+        data = {}
+        zone_tab = open_resource('zone.tab')
+        try:
+            for line in zone_tab:
+                line = line.decode('UTF-8')
+                if line.startswith('#'):
+                    continue
+                code, coordinates, zone = line.split(None, 4)[:3]
+                if zone not in all_timezones_set:  # noqa
+                    continue
+                try:
+                    data[code].append(zone)
+                except KeyError:
+                    data[code] = [zone]
+            self.data = data
+        finally:
+            zone_tab.close()
+
+
+country_timezones = _CountryTimezoneDict()
+
+
+class _CountryNameDict(LazyDict):
+    '''Dictionary proving ISO3166 code -> English name.
+
+    >>> print(country_names['au'])
+    Australia
+    '''
+    def _fill(self):
+        data = {}
+        zone_tab = open_resource('iso3166.tab')
+        try:
+            for line in zone_tab.readlines():
+                line = line.decode('UTF-8')
+                if line.startswith('#'):
+                    continue
+                code, name = line.split(None, 1)
+                data[code] = name.strip()
+            self.data = data
+        finally:
+            zone_tab.close()
+
+
+country_names = _CountryNameDict()
+
+
+# Time-zone info based solely on fixed offsets
+
+class _FixedOffset(datetime.tzinfo):
+
+    zone = None  # to match the standard pytz API
+
+    def __init__(self, minutes):
+        if abs(minutes) >= 1440:
+            raise ValueError("absolute offset is too large", minutes)
+        self._minutes = minutes
+        self._offset = datetime.timedelta(minutes=minutes)
+
+    def utcoffset(self, dt):
+        return self._offset
+
+    def __reduce__(self):
+        return FixedOffset, (self._minutes, )
+
+    def dst(self, dt):
+        return ZERO
+
+    def tzname(self, dt):
+        return None
+
+    def __repr__(self):
+        return 'pytz.FixedOffset(%d)' % self._minutes
+
+    def localize(self, dt, is_dst=False):
+        '''Convert naive time to local time'''
+        if dt.tzinfo is not None:
+            raise ValueError('Not naive datetime (tzinfo is already set)')
+        return dt.replace(tzinfo=self)
+
+    def normalize(self, dt, is_dst=False):
+        '''Correct the timezone information on the given datetime'''
+        if dt.tzinfo is self:
+            return dt
+        if dt.tzinfo is None:
+            raise ValueError('Naive time - no tzinfo set')
+        return dt.astimezone(self)
+
+
+def FixedOffset(offset, _tzinfos={}):
+    """return a fixed-offset timezone based off a number of minutes.
+
+        >>> one = FixedOffset(-330)
+        >>> one
+        pytz.FixedOffset(-330)
+        >>> str(one.utcoffset(datetime.datetime.now()))
+        '-1 day, 18:30:00'
+        >>> str(one.dst(datetime.datetime.now()))
+        '0:00:00'
+
+        >>> two = FixedOffset(1380)
+        >>> two
+        pytz.FixedOffset(1380)
+        >>> str(two.utcoffset(datetime.datetime.now()))
+        '23:00:00'
+        >>> str(two.dst(datetime.datetime.now()))
+        '0:00:00'
+
+    The datetime.timedelta must be between the range of -1 and 1 day,
+    non-inclusive.
+
+        >>> FixedOffset(1440)
+        Traceback (most recent call last):
+        ...
+        ValueError: ('absolute offset is too large', 1440)
+
+        >>> FixedOffset(-1440)
+        Traceback (most recent call last):
+        ...
+        ValueError: ('absolute offset is too large', -1440)
+
+    An offset of 0 is special-cased to return UTC.
+
+        >>> FixedOffset(0) is UTC
+        True
+
+    There should always be only one instance of a FixedOffset per timedelta.
+    This should be true for multiple creation calls.
+
+        >>> FixedOffset(-330) is one
+        True
+        >>> FixedOffset(1380) is two
+        True
+
+    It should also be true for pickling.
+
+        >>> import pickle
+        >>> pickle.loads(pickle.dumps(one)) is one
+        True
+        >>> pickle.loads(pickle.dumps(two)) is two
+        True
+    """
+    if offset == 0:
+        return UTC
+
+    info = _tzinfos.get(offset)
+    if info is None:
+        # We haven't seen this one before. we need to save it.
+
+        # Use setdefault to avoid a race condition and make sure we have
+        # only one
+        info = _tzinfos.setdefault(offset, _FixedOffset(offset))
+
+    return info
+
+
+FixedOffset.__safe_for_unpickling__ = True
+
+
+def _test():
+    import doctest
+    sys.path.insert(0, os.pardir)
+    import pytz
+    return doctest.testmod(pytz)
+
+
+if __name__ == '__main__':
+    _test()
+all_timezones = \
+['Africa/Abidjan',
+ 'Africa/Accra',
+ 'Africa/Addis_Ababa',
+ 'Africa/Algiers',
+ 'Africa/Asmara',
+ 'Africa/Asmera',
+ 'Africa/Bamako',
+ 'Africa/Bangui',
+ 'Africa/Banjul',
+ 'Africa/Bissau',
+ 'Africa/Blantyre',
+ 'Africa/Brazzaville',
+ 'Africa/Bujumbura',
+ 'Africa/Cairo',
+ 'Africa/Casablanca',
+ 'Africa/Ceuta',
+ 'Africa/Conakry',
+ 'Africa/Dakar',
+ 'Africa/Dar_es_Salaam',
+ 'Africa/Djibouti',
+ 'Africa/Douala',
+ 'Africa/El_Aaiun',
+ 'Africa/Freetown',
+ 'Africa/Gaborone',
+ 'Africa/Harare',
+ 'Africa/Johannesburg',
+ 'Africa/Juba',
+ 'Africa/Kampala',
+ 'Africa/Khartoum',
+ 'Africa/Kigali',
+ 'Africa/Kinshasa',
+ 'Africa/Lagos',
+ 'Africa/Libreville',
+ 'Africa/Lome',
+ 'Africa/Luanda',
+ 'Africa/Lubumbashi',
+ 'Africa/Lusaka',
+ 'Africa/Malabo',
+ 'Africa/Maputo',
+ 'Africa/Maseru',
+ 'Africa/Mbabane',
+ 'Africa/Mogadishu',
+ 'Africa/Monrovia',
+ 'Africa/Nairobi',
+ 'Africa/Ndjamena',
+ 'Africa/Niamey',
+ 'Africa/Nouakchott',
+ 'Africa/Ouagadougou',
+ 'Africa/Porto-Novo',
+ 'Africa/Sao_Tome',
+ 'Africa/Timbuktu',
+ 'Africa/Tripoli',
+ 'Africa/Tunis',
+ 'Africa/Windhoek',
+ 'America/Adak',
+ 'America/Anchorage',
+ 'America/Anguilla',
+ 'America/Antigua',
+ 'America/Araguaina',
+ 'America/Argentina/Buenos_Aires',
+ 'America/Argentina/Catamarca',
+ 'America/Argentina/ComodRivadavia',
+ 'America/Argentina/Cordoba',
+ 'America/Argentina/Jujuy',
+ 'America/Argentina/La_Rioja',
+ 'America/Argentina/Mendoza',
+ 'America/Argentina/Rio_Gallegos',
+ 'America/Argentina/Salta',
+ 'America/Argentina/San_Juan',
+ 'America/Argentina/San_Luis',
+ 'America/Argentina/Tucuman',
+ 'America/Argentina/Ushuaia',
+ 'America/Aruba',
+ 'America/Asuncion',
+ 'America/Atikokan',
+ 'America/Atka',
+ 'America/Bahia',
+ 'America/Bahia_Banderas',
+ 'America/Barbados',
+ 'America/Belem',
+ 'America/Belize',
+ 'America/Blanc-Sablon',
+ 'America/Boa_Vista',
+ 'America/Bogota',
+ 'America/Boise',
+ 'America/Buenos_Aires',
+ 'America/Cambridge_Bay',
+ 'America/Campo_Grande',
+ 'America/Cancun',
+ 'America/Caracas',
+ 'America/Catamarca',
+ 'America/Cayenne',
+ 'America/Cayman',
+ 'America/Chicago',
+ 'America/Chihuahua',
+ 'America/Coral_Harbour',
+ 'America/Cordoba',
+ 'America/Costa_Rica',
+ 'America/Creston',
+ 'America/Cuiaba',
+ 'America/Curacao',
+ 'America/Danmarkshavn',
+ 'America/Dawson',
+ 'America/Dawson_Creek',
+ 'America/Denver',
+ 'America/Detroit',
+ 'America/Dominica',
+ 'America/Edmonton',
+ 'America/Eirunepe',
+ 'America/El_Salvador',
+ 'America/Ensenada',
+ 'America/Fort_Nelson',
+ 'America/Fort_Wayne',
+ 'America/Fortaleza',
+ 'America/Glace_Bay',
+ 'America/Godthab',
+ 'America/Goose_Bay',
+ 'America/Grand_Turk',
+ 'America/Grenada',
+ 'America/Guadeloupe',
+ 'America/Guatemala',
+ 'America/Guayaquil',
+ 'America/Guyana',
+ 'America/Halifax',
+ 'America/Havana',
+ 'America/Hermosillo',
+ 'America/Indiana/Indianapolis',
+ 'America/Indiana/Knox',
+ 'America/Indiana/Marengo',
+ 'America/Indiana/Petersburg',
+ 'America/Indiana/Tell_City',
+ 'America/Indiana/Vevay',
+ 'America/Indiana/Vincennes',
+ 'America/Indiana/Winamac',
+ 'America/Indianapolis',
+ 'America/Inuvik',
+ 'America/Iqaluit',
+ 'America/Jamaica',
+ 'America/Jujuy',
+ 'America/Juneau',
+ 'America/Kentucky/Louisville',
+ 'America/Kentucky/Monticello',
+ 'America/Knox_IN',
+ 'America/Kralendijk',
+ 'America/La_Paz',
+ 'America/Lima',
+ 'America/Los_Angeles',
+ 'America/Louisville',
+ 'America/Lower_Princes',
+ 'America/Maceio',
+ 'America/Managua',
+ 'America/Manaus',
+ 'America/Marigot',
+ 'America/Martinique',
+ 'America/Matamoros',
+ 'America/Mazatlan',
+ 'America/Mendoza',
+ 'America/Menominee',
+ 'America/Merida',
+ 'America/Metlakatla',
+ 'America/Mexico_City',
+ 'America/Miquelon',
+ 'America/Moncton',
+ 'America/Monterrey',
+ 'America/Montevideo',
+ 'America/Montreal',
+ 'America/Montserrat',
+ 'America/Nassau',
+ 'America/New_York',
+ 'America/Nipigon',
+ 'America/Nome',
+ 'America/Noronha',
+ 'America/North_Dakota/Beulah',
+ 'America/North_Dakota/Center',
+ 'America/North_Dakota/New_Salem',
+ 'America/Nuuk',
+ 'America/Ojinaga',
+ 'America/Panama',
+ 'America/Pangnirtung',
+ 'America/Paramaribo',
+ 'America/Phoenix',
+ 'America/Port-au-Prince',
+ 'America/Port_of_Spain',
+ 'America/Porto_Acre',
+ 'America/Porto_Velho',
+ 'America/Puerto_Rico',
+ 'America/Punta_Arenas',
+ 'America/Rainy_River',
+ 'America/Rankin_Inlet',
+ 'America/Recife',
+ 'America/Regina',
+ 'America/Resolute',
+ 'America/Rio_Branco',
+ 'America/Rosario',
+ 'America/Santa_Isabel',
+ 'America/Santarem',
+ 'America/Santiago',
+ 'America/Santo_Domingo',
+ 'America/Sao_Paulo',
+ 'America/Scoresbysund',
+ 'America/Shiprock',
+ 'America/Sitka',
+ 'America/St_Barthelemy',
+ 'America/St_Johns',
+ 'America/St_Kitts',
+ 'America/St_Lucia',
+ 'America/St_Thomas',
+ 'America/St_Vincent',
+ 'America/Swift_Current',
+ 'America/Tegucigalpa',
+ 'America/Thule',
+ 'America/Thunder_Bay',
+ 'America/Tijuana',
+ 'America/Toronto',
+ 'America/Tortola',
+ 'America/Vancouver',
+ 'America/Virgin',
+ 'America/Whitehorse',
+ 'America/Winnipeg',
+ 'America/Yakutat',
+ 'America/Yellowknife',
+ 'Antarctica/Casey',
+ 'Antarctica/Davis',
+ 'Antarctica/DumontDUrville',
+ 'Antarctica/Macquarie',
+ 'Antarctica/Mawson',
+ 'Antarctica/McMurdo',
+ 'Antarctica/Palmer',
+ 'Antarctica/Rothera',
+ 'Antarctica/South_Pole',
+ 'Antarctica/Syowa',
+ 'Antarctica/Troll',
+ 'Antarctica/Vostok',
+ 'Arctic/Longyearbyen',
+ 'Asia/Aden',
+ 'Asia/Almaty',
+ 'Asia/Amman',
+ 'Asia/Anadyr',
+ 'Asia/Aqtau',
+ 'Asia/Aqtobe',
+ 'Asia/Ashgabat',
+ 'Asia/Ashkhabad',
+ 'Asia/Atyrau',
+ 'Asia/Baghdad',
+ 'Asia/Bahrain',
+ 'Asia/Baku',
+ 'Asia/Bangkok',
+ 'Asia/Barnaul',
+ 'Asia/Beirut',
+ 'Asia/Bishkek',
+ 'Asia/Brunei',
+ 'Asia/Calcutta',
+ 'Asia/Chita',
+ 'Asia/Choibalsan',
+ 'Asia/Chongqing',
+ 'Asia/Chungking',
+ 'Asia/Colombo',
+ 'Asia/Dacca',
+ 'Asia/Damascus',
+ 'Asia/Dhaka',
+ 'Asia/Dili',
+ 'Asia/Dubai',
+ 'Asia/Dushanbe',
+ 'Asia/Famagusta',
+ 'Asia/Gaza',
+ 'Asia/Harbin',
+ 'Asia/Hebron',
+ 'Asia/Ho_Chi_Minh',
+ 'Asia/Hong_Kong',
+ 'Asia/Hovd',
+ 'Asia/Irkutsk',
+ 'Asia/Istanbul',
+ 'Asia/Jakarta',
+ 'Asia/Jayapura',
+ 'Asia/Jerusalem',
+ 'Asia/Kabul',
+ 'Asia/Kamchatka',
+ 'Asia/Karachi',
+ 'Asia/Kashgar',
+ 'Asia/Kathmandu',
+ 'Asia/Katmandu',
+ 'Asia/Khandyga',
+ 'Asia/Kolkata',
+ 'Asia/Krasnoyarsk',
+ 'Asia/Kuala_Lumpur',
+ 'Asia/Kuching',
+ 'Asia/Kuwait',
+ 'Asia/Macao',
+ 'Asia/Macau',
+ 'Asia/Magadan',
+ 'Asia/Makassar',
+ 'Asia/Manila',
+ 'Asia/Muscat',
+ 'Asia/Nicosia',
+ 'Asia/Novokuznetsk',
+ 'Asia/Novosibirsk',
+ 'Asia/Omsk',
+ 'Asia/Oral',
+ 'Asia/Phnom_Penh',
+ 'Asia/Pontianak',
+ 'Asia/Pyongyang',
+ 'Asia/Qatar',
+ 'Asia/Qostanay',
+ 'Asia/Qyzylorda',
+ 'Asia/Rangoon',
+ 'Asia/Riyadh',
+ 'Asia/Saigon',
+ 'Asia/Sakhalin',
+ 'Asia/Samarkand',
+ 'Asia/Seoul',
+ 'Asia/Shanghai',
+ 'Asia/Singapore',
+ 'Asia/Srednekolymsk',
+ 'Asia/Taipei',
+ 'Asia/Tashkent',
+ 'Asia/Tbilisi',
+ 'Asia/Tehran',
+ 'Asia/Tel_Aviv',
+ 'Asia/Thimbu',
+ 'Asia/Thimphu',
+ 'Asia/Tokyo',
+ 'Asia/Tomsk',
+ 'Asia/Ujung_Pandang',
+ 'Asia/Ulaanbaatar',
+ 'Asia/Ulan_Bator',
+ 'Asia/Urumqi',
+ 'Asia/Ust-Nera',
+ 'Asia/Vientiane',
+ 'Asia/Vladivostok',
+ 'Asia/Yakutsk',
+ 'Asia/Yangon',
+ 'Asia/Yekaterinburg',
+ 'Asia/Yerevan',
+ 'Atlantic/Azores',
+ 'Atlantic/Bermuda',
+ 'Atlantic/Canary',
+ 'Atlantic/Cape_Verde',
+ 'Atlantic/Faeroe',
+ 'Atlantic/Faroe',
+ 'Atlantic/Jan_Mayen',
+ 'Atlantic/Madeira',
+ 'Atlantic/Reykjavik',
+ 'Atlantic/South_Georgia',
+ 'Atlantic/St_Helena',
+ 'Atlantic/Stanley',
+ 'Australia/ACT',
+ 'Australia/Adelaide',
+ 'Australia/Brisbane',
+ 'Australia/Broken_Hill',
+ 'Australia/Canberra',
+ 'Australia/Currie',
+ 'Australia/Darwin',
+ 'Australia/Eucla',
+ 'Australia/Hobart',
+ 'Australia/LHI',
+ 'Australia/Lindeman',
+ 'Australia/Lord_Howe',
+ 'Australia/Melbourne',
+ 'Australia/NSW',
+ 'Australia/North',
+ 'Australia/Perth',
+ 'Australia/Queensland',
+ 'Australia/South',
+ 'Australia/Sydney',
+ 'Australia/Tasmania',
+ 'Australia/Victoria',
+ 'Australia/West',
+ 'Australia/Yancowinna',
+ 'Brazil/Acre',
+ 'Brazil/DeNoronha',
+ 'Brazil/East',
+ 'Brazil/West',
+ 'CET',
+ 'CST6CDT',
+ 'Canada/Atlantic',
+ 'Canada/Central',
+ 'Canada/Eastern',
+ 'Canada/Mountain',
+ 'Canada/Newfoundland',
+ 'Canada/Pacific',
+ 'Canada/Saskatchewan',
+ 'Canada/Yukon',
+ 'Chile/Continental',
+ 'Chile/EasterIsland',
+ 'Cuba',
+ 'EET',
+ 'EST',
+ 'EST5EDT',
+ 'Egypt',
+ 'Eire',
+ 'Etc/GMT',
+ 'Etc/GMT+0',
+ 'Etc/GMT+1',
+ 'Etc/GMT+10',
+ 'Etc/GMT+11',
+ 'Etc/GMT+12',
+ 'Etc/GMT+2',
+ 'Etc/GMT+3',
+ 'Etc/GMT+4',
+ 'Etc/GMT+5',
+ 'Etc/GMT+6',
+ 'Etc/GMT+7',
+ 'Etc/GMT+8',
+ 'Etc/GMT+9',
+ 'Etc/GMT-0',
+ 'Etc/GMT-1',
+ 'Etc/GMT-10',
+ 'Etc/GMT-11',
+ 'Etc/GMT-12',
+ 'Etc/GMT-13',
+ 'Etc/GMT-14',
+ 'Etc/GMT-2',
+ 'Etc/GMT-3',
+ 'Etc/GMT-4',
+ 'Etc/GMT-5',
+ 'Etc/GMT-6',
+ 'Etc/GMT-7',
+ 'Etc/GMT-8',
+ 'Etc/GMT-9',
+ 'Etc/GMT0',
+ 'Etc/Greenwich',
+ 'Etc/UCT',
+ 'Etc/UTC',
+ 'Etc/Universal',
+ 'Etc/Zulu',
+ 'Europe/Amsterdam',
+ 'Europe/Andorra',
+ 'Europe/Astrakhan',
+ 'Europe/Athens',
+ 'Europe/Belfast',
+ 'Europe/Belgrade',
+ 'Europe/Berlin',
+ 'Europe/Bratislava',
+ 'Europe/Brussels',
+ 'Europe/Bucharest',
+ 'Europe/Budapest',
+ 'Europe/Busingen',
+ 'Europe/Chisinau',
+ 'Europe/Copenhagen',
+ 'Europe/Dublin',
+ 'Europe/Gibraltar',
+ 'Europe/Guernsey',
+ 'Europe/Helsinki',
+ 'Europe/Isle_of_Man',
+ 'Europe/Istanbul',
+ 'Europe/Jersey',
+ 'Europe/Kaliningrad',
+ 'Europe/Kiev',
+ 'Europe/Kirov',
+ 'Europe/Lisbon',
+ 'Europe/Ljubljana',
+ 'Europe/London',
+ 'Europe/Luxembourg',
+ 'Europe/Madrid',
+ 'Europe/Malta',
+ 'Europe/Mariehamn',
+ 'Europe/Minsk',
+ 'Europe/Monaco',
+ 'Europe/Moscow',
+ 'Europe/Nicosia',
+ 'Europe/Oslo',
+ 'Europe/Paris',
+ 'Europe/Podgorica',
+ 'Europe/Prague',
+ 'Europe/Riga',
+ 'Europe/Rome',
+ 'Europe/Samara',
+ 'Europe/San_Marino',
+ 'Europe/Sarajevo',
+ 'Europe/Saratov',
+ 'Europe/Simferopol',
+ 'Europe/Skopje',
+ 'Europe/Sofia',
+ 'Europe/Stockholm',
+ 'Europe/Tallinn',
+ 'Europe/Tirane',
+ 'Europe/Tiraspol',
+ 'Europe/Ulyanovsk',
+ 'Europe/Uzhgorod',
+ 'Europe/Vaduz',
+ 'Europe/Vatican',
+ 'Europe/Vienna',
+ 'Europe/Vilnius',
+ 'Europe/Volgograd',
+ 'Europe/Warsaw',
+ 'Europe/Zagreb',
+ 'Europe/Zaporozhye',
+ 'Europe/Zurich',
+ 'GB',
+ 'GB-Eire',
+ 'GMT',
+ 'GMT+0',
+ 'GMT-0',
+ 'GMT0',
+ 'Greenwich',
+ 'HST',
+ 'Hongkong',
+ 'Iceland',
+ 'Indian/Antananarivo',
+ 'Indian/Chagos',
+ 'Indian/Christmas',
+ 'Indian/Cocos',
+ 'Indian/Comoro',
+ 'Indian/Kerguelen',
+ 'Indian/Mahe',
+ 'Indian/Maldives',
+ 'Indian/Mauritius',
+ 'Indian/Mayotte',
+ 'Indian/Reunion',
+ 'Iran',
+ 'Israel',
+ 'Jamaica',
+ 'Japan',
+ 'Kwajalein',
+ 'Libya',
+ 'MET',
+ 'MST',
+ 'MST7MDT',
+ 'Mexico/BajaNorte',
+ 'Mexico/BajaSur',
+ 'Mexico/General',
+ 'NZ',
+ 'NZ-CHAT',
+ 'Navajo',
+ 'PRC',
+ 'PST8PDT',
+ 'Pacific/Apia',
+ 'Pacific/Auckland',
+ 'Pacific/Bougainville',
+ 'Pacific/Chatham',
+ 'Pacific/Chuuk',
+ 'Pacific/Easter',
+ 'Pacific/Efate',
+ 'Pacific/Enderbury',
+ 'Pacific/Fakaofo',
+ 'Pacific/Fiji',
+ 'Pacific/Funafuti',
+ 'Pacific/Galapagos',
+ 'Pacific/Gambier',
+ 'Pacific/Guadalcanal',
+ 'Pacific/Guam',
+ 'Pacific/Honolulu',
+ 'Pacific/Johnston',
+ 'Pacific/Kanton',
+ 'Pacific/Kiritimati',
+ 'Pacific/Kosrae',
+ 'Pacific/Kwajalein',
+ 'Pacific/Majuro',
+ 'Pacific/Marquesas',
+ 'Pacific/Midway',
+ 'Pacific/Nauru',
+ 'Pacific/Niue',
+ 'Pacific/Norfolk',
+ 'Pacific/Noumea',
+ 'Pacific/Pago_Pago',
+ 'Pacific/Palau',
+ 'Pacific/Pitcairn',
+ 'Pacific/Pohnpei',
+ 'Pacific/Ponape',
+ 'Pacific/Port_Moresby',
+ 'Pacific/Rarotonga',
+ 'Pacific/Saipan',
+ 'Pacific/Samoa',
+ 'Pacific/Tahiti',
+ 'Pacific/Tarawa',
+ 'Pacific/Tongatapu',
+ 'Pacific/Truk',
+ 'Pacific/Wake',
+ 'Pacific/Wallis',
+ 'Pacific/Yap',
+ 'Poland',
+ 'Portugal',
+ 'ROC',
+ 'ROK',
+ 'Singapore',
+ 'Turkey',
+ 'UCT',
+ 'US/Alaska',
+ 'US/Aleutian',
+ 'US/Arizona',
+ 'US/Central',
+ 'US/East-Indiana',
+ 'US/Eastern',
+ 'US/Hawaii',
+ 'US/Indiana-Starke',
+ 'US/Michigan',
+ 'US/Mountain',
+ 'US/Pacific',
+ 'US/Samoa',
+ 'UTC',
+ 'Universal',
+ 'W-SU',
+ 'WET',
+ 'Zulu']
+all_timezones = LazyList(
+        tz for tz in all_timezones if resource_exists(tz))
+        
+all_timezones_set = LazySet(all_timezones)
+common_timezones = \
+['Africa/Abidjan',
+ 'Africa/Accra',
+ 'Africa/Addis_Ababa',
+ 'Africa/Algiers',
+ 'Africa/Asmara',
+ 'Africa/Bamako',
+ 'Africa/Bangui',
+ 'Africa/Banjul',
+ 'Africa/Bissau',
+ 'Africa/Blantyre',
+ 'Africa/Brazzaville',
+ 'Africa/Bujumbura',
+ 'Africa/Cairo',
+ 'Africa/Casablanca',
+ 'Africa/Ceuta',
+ 'Africa/Conakry',
+ 'Africa/Dakar',
+ 'Africa/Dar_es_Salaam',
+ 'Africa/Djibouti',
+ 'Africa/Douala',
+ 'Africa/El_Aaiun',
+ 'Africa/Freetown',
+ 'Africa/Gaborone',
+ 'Africa/Harare',
+ 'Africa/Johannesburg',
+ 'Africa/Juba',
+ 'Africa/Kampala',
+ 'Africa/Khartoum',
+ 'Africa/Kigali',
+ 'Africa/Kinshasa',
+ 'Africa/Lagos',
+ 'Africa/Libreville',
+ 'Africa/Lome',
+ 'Africa/Luanda',
+ 'Africa/Lubumbashi',
+ 'Africa/Lusaka',
+ 'Africa/Malabo',
+ 'Africa/Maputo',
+ 'Africa/Maseru',
+ 'Africa/Mbabane',
+ 'Africa/Mogadishu',
+ 'Africa/Monrovia',
+ 'Africa/Nairobi',
+ 'Africa/Ndjamena',
+ 'Africa/Niamey',
+ 'Africa/Nouakchott',
+ 'Africa/Ouagadougou',
+ 'Africa/Porto-Novo',
+ 'Africa/Sao_Tome',
+ 'Africa/Tripoli',
+ 'Africa/Tunis',
+ 'Africa/Windhoek',
+ 'America/Adak',
+ 'America/Anchorage',
+ 'America/Anguilla',
+ 'America/Antigua',
+ 'America/Araguaina',
+ 'America/Argentina/Buenos_Aires',
+ 'America/Argentina/Catamarca',
+ 'America/Argentina/Cordoba',
+ 'America/Argentina/Jujuy',
+ 'America/Argentina/La_Rioja',
+ 'America/Argentina/Mendoza',
+ 'America/Argentina/Rio_Gallegos',
+ 'America/Argentina/Salta',
+ 'America/Argentina/San_Juan',
+ 'America/Argentina/San_Luis',
+ 'America/Argentina/Tucuman',
+ 'America/Argentina/Ushuaia',
+ 'America/Aruba',
+ 'America/Asuncion',
+ 'America/Atikokan',
+ 'America/Bahia',
+ 'America/Bahia_Banderas',
+ 'America/Barbados',
+ 'America/Belem',
+ 'America/Belize',
+ 'America/Blanc-Sablon',
+ 'America/Boa_Vista',
+ 'America/Bogota',
+ 'America/Boise',
+ 'America/Cambridge_Bay',
+ 'America/Campo_Grande',
+ 'America/Cancun',
+ 'America/Caracas',
+ 'America/Cayenne',
+ 'America/Cayman',
+ 'America/Chicago',
+ 'America/Chihuahua',
+ 'America/Costa_Rica',
+ 'America/Creston',
+ 'America/Cuiaba',
+ 'America/Curacao',
+ 'America/Danmarkshavn',
+ 'America/Dawson',
+ 'America/Dawson_Creek',
+ 'America/Denver',
+ 'America/Detroit',
+ 'America/Dominica',
+ 'America/Edmonton',
+ 'America/Eirunepe',
+ 'America/El_Salvador',
+ 'America/Fort_Nelson',
+ 'America/Fortaleza',
+ 'America/Glace_Bay',
+ 'America/Goose_Bay',
+ 'America/Grand_Turk',
+ 'America/Grenada',
+ 'America/Guadeloupe',
+ 'America/Guatemala',
+ 'America/Guayaquil',
+ 'America/Guyana',
+ 'America/Halifax',
+ 'America/Havana',
+ 'America/Hermosillo',
+ 'America/Indiana/Indianapolis',
+ 'America/Indiana/Knox',
+ 'America/Indiana/Marengo',
+ 'America/Indiana/Petersburg',
+ 'America/Indiana/Tell_City',
+ 'America/Indiana/Vevay',
+ 'America/Indiana/Vincennes',
+ 'America/Indiana/Winamac',
+ 'America/Inuvik',
+ 'America/Iqaluit',
+ 'America/Jamaica',
+ 'America/Juneau',
+ 'America/Kentucky/Louisville',
+ 'America/Kentucky/Monticello',
+ 'America/Kralendijk',
+ 'America/La_Paz',
+ 'America/Lima',
+ 'America/Los_Angeles',
+ 'America/Lower_Princes',
+ 'America/Maceio',
+ 'America/Managua',
+ 'America/Manaus',
+ 'America/Marigot',
+ 'America/Martinique',
+ 'America/Matamoros',
+ 'America/Mazatlan',
+ 'America/Menominee',
+ 'America/Merida',
+ 'America/Metlakatla',
+ 'America/Mexico_City',
+ 'America/Miquelon',
+ 'America/Moncton',
+ 'America/Monterrey',
+ 'America/Montevideo',
+ 'America/Montserrat',
+ 'America/Nassau',
+ 'America/New_York',
+ 'America/Nipigon',
+ 'America/Nome',
+ 'America/Noronha',
+ 'America/North_Dakota/Beulah',
+ 'America/North_Dakota/Center',
+ 'America/North_Dakota/New_Salem',
+ 'America/Nuuk',
+ 'America/Ojinaga',
+ 'America/Panama',
+ 'America/Pangnirtung',
+ 'America/Paramaribo',
+ 'America/Phoenix',
+ 'America/Port-au-Prince',
+ 'America/Port_of_Spain',
+ 'America/Porto_Velho',
+ 'America/Puerto_Rico',
+ 'America/Punta_Arenas',
+ 'America/Rainy_River',
+ 'America/Rankin_Inlet',
+ 'America/Recife',
+ 'America/Regina',
+ 'America/Resolute',
+ 'America/Rio_Branco',
+ 'America/Santarem',
+ 'America/Santiago',
+ 'America/Santo_Domingo',
+ 'America/Sao_Paulo',
+ 'America/Scoresbysund',
+ 'America/Sitka',
+ 'America/St_Barthelemy',
+ 'America/St_Johns',
+ 'America/St_Kitts',
+ 'America/St_Lucia',
+ 'America/St_Thomas',
+ 'America/St_Vincent',
+ 'America/Swift_Current',
+ 'America/Tegucigalpa',
+ 'America/Thule',
+ 'America/Thunder_Bay',
+ 'America/Tijuana',
+ 'America/Toronto',
+ 'America/Tortola',
+ 'America/Vancouver',
+ 'America/Whitehorse',
+ 'America/Winnipeg',
+ 'America/Yakutat',
+ 'America/Yellowknife',
+ 'Antarctica/Casey',
+ 'Antarctica/Davis',
+ 'Antarctica/DumontDUrville',
+ 'Antarctica/Macquarie',
+ 'Antarctica/Mawson',
+ 'Antarctica/McMurdo',
+ 'Antarctica/Palmer',
+ 'Antarctica/Rothera',
+ 'Antarctica/Syowa',
+ 'Antarctica/Troll',
+ 'Antarctica/Vostok',
+ 'Arctic/Longyearbyen',
+ 'Asia/Aden',
+ 'Asia/Almaty',
+ 'Asia/Amman',
+ 'Asia/Anadyr',
+ 'Asia/Aqtau',
+ 'Asia/Aqtobe',
+ 'Asia/Ashgabat',
+ 'Asia/Atyrau',
+ 'Asia/Baghdad',
+ 'Asia/Bahrain',
+ 'Asia/Baku',
+ 'Asia/Bangkok',
+ 'Asia/Barnaul',
+ 'Asia/Beirut',
+ 'Asia/Bishkek',
+ 'Asia/Brunei',
+ 'Asia/Chita',
+ 'Asia/Choibalsan',
+ 'Asia/Colombo',
+ 'Asia/Damascus',
+ 'Asia/Dhaka',
+ 'Asia/Dili',
+ 'Asia/Dubai',
+ 'Asia/Dushanbe',
+ 'Asia/Famagusta',
+ 'Asia/Gaza',
+ 'Asia/Hebron',
+ 'Asia/Ho_Chi_Minh',
+ 'Asia/Hong_Kong',
+ 'Asia/Hovd',
+ 'Asia/Irkutsk',
+ 'Asia/Jakarta',
+ 'Asia/Jayapura',
+ 'Asia/Jerusalem',
+ 'Asia/Kabul',
+ 'Asia/Kamchatka',
+ 'Asia/Karachi',
+ 'Asia/Kathmandu',
+ 'Asia/Khandyga',
+ 'Asia/Kolkata',
+ 'Asia/Krasnoyarsk',
+ 'Asia/Kuala_Lumpur',
+ 'Asia/Kuching',
+ 'Asia/Kuwait',
+ 'Asia/Macau',
+ 'Asia/Magadan',
+ 'Asia/Makassar',
+ 'Asia/Manila',
+ 'Asia/Muscat',
+ 'Asia/Nicosia',
+ 'Asia/Novokuznetsk',
+ 'Asia/Novosibirsk',
+ 'Asia/Omsk',
+ 'Asia/Oral',
+ 'Asia/Phnom_Penh',
+ 'Asia/Pontianak',
+ 'Asia/Pyongyang',
+ 'Asia/Qatar',
+ 'Asia/Qostanay',
+ 'Asia/Qyzylorda',
+ 'Asia/Riyadh',
+ 'Asia/Sakhalin',
+ 'Asia/Samarkand',
+ 'Asia/Seoul',
+ 'Asia/Shanghai',
+ 'Asia/Singapore',
+ 'Asia/Srednekolymsk',
+ 'Asia/Taipei',
+ 'Asia/Tashkent',
+ 'Asia/Tbilisi',
+ 'Asia/Tehran',
+ 'Asia/Thimphu',
+ 'Asia/Tokyo',
+ 'Asia/Tomsk',
+ 'Asia/Ulaanbaatar',
+ 'Asia/Urumqi',
+ 'Asia/Ust-Nera',
+ 'Asia/Vientiane',
+ 'Asia/Vladivostok',
+ 'Asia/Yakutsk',
+ 'Asia/Yangon',
+ 'Asia/Yekaterinburg',
+ 'Asia/Yerevan',
+ 'Atlantic/Azores',
+ 'Atlantic/Bermuda',
+ 'Atlantic/Canary',
+ 'Atlantic/Cape_Verde',
+ 'Atlantic/Faroe',
+ 'Atlantic/Madeira',
+ 'Atlantic/Reykjavik',
+ 'Atlantic/South_Georgia',
+ 'Atlantic/St_Helena',
+ 'Atlantic/Stanley',
+ 'Australia/Adelaide',
+ 'Australia/Brisbane',
+ 'Australia/Broken_Hill',
+ 'Australia/Darwin',
+ 'Australia/Eucla',
+ 'Australia/Hobart',
+ 'Australia/Lindeman',
+ 'Australia/Lord_Howe',
+ 'Australia/Melbourne',
+ 'Australia/Perth',
+ 'Australia/Sydney',
+ 'Canada/Atlantic',
+ 'Canada/Central',
+ 'Canada/Eastern',
+ 'Canada/Mountain',
+ 'Canada/Newfoundland',
+ 'Canada/Pacific',
+ 'Europe/Amsterdam',
+ 'Europe/Andorra',
+ 'Europe/Astrakhan',
+ 'Europe/Athens',
+ 'Europe/Belgrade',
+ 'Europe/Berlin',
+ 'Europe/Bratislava',
+ 'Europe/Brussels',
+ 'Europe/Bucharest',
+ 'Europe/Budapest',
+ 'Europe/Busingen',
+ 'Europe/Chisinau',
+ 'Europe/Copenhagen',
+ 'Europe/Dublin',
+ 'Europe/Gibraltar',
+ 'Europe/Guernsey',
+ 'Europe/Helsinki',
+ 'Europe/Isle_of_Man',
+ 'Europe/Istanbul',
+ 'Europe/Jersey',
+ 'Europe/Kaliningrad',
+ 'Europe/Kiev',
+ 'Europe/Kirov',
+ 'Europe/Lisbon',
+ 'Europe/Ljubljana',
+ 'Europe/London',
+ 'Europe/Luxembourg',
+ 'Europe/Madrid',
+ 'Europe/Malta',
+ 'Europe/Mariehamn',
+ 'Europe/Minsk',
+ 'Europe/Monaco',
+ 'Europe/Moscow',
+ 'Europe/Oslo',
+ 'Europe/Paris',
+ 'Europe/Podgorica',
+ 'Europe/Prague',
+ 'Europe/Riga',
+ 'Europe/Rome',
+ 'Europe/Samara',
+ 'Europe/San_Marino',
+ 'Europe/Sarajevo',
+ 'Europe/Saratov',
+ 'Europe/Simferopol',
+ 'Europe/Skopje',
+ 'Europe/Sofia',
+ 'Europe/Stockholm',
+ 'Europe/Tallinn',
+ 'Europe/Tirane',
+ 'Europe/Ulyanovsk',
+ 'Europe/Uzhgorod',
+ 'Europe/Vaduz',
+ 'Europe/Vatican',
+ 'Europe/Vienna',
+ 'Europe/Vilnius',
+ 'Europe/Volgograd',
+ 'Europe/Warsaw',
+ 'Europe/Zagreb',
+ 'Europe/Zaporozhye',
+ 'Europe/Zurich',
+ 'GMT',
+ 'Indian/Antananarivo',
+ 'Indian/Chagos',
+ 'Indian/Christmas',
+ 'Indian/Cocos',
+ 'Indian/Comoro',
+ 'Indian/Kerguelen',
+ 'Indian/Mahe',
+ 'Indian/Maldives',
+ 'Indian/Mauritius',
+ 'Indian/Mayotte',
+ 'Indian/Reunion',
+ 'Pacific/Apia',
+ 'Pacific/Auckland',
+ 'Pacific/Bougainville',
+ 'Pacific/Chatham',
+ 'Pacific/Chuuk',
+ 'Pacific/Easter',
+ 'Pacific/Efate',
+ 'Pacific/Fakaofo',
+ 'Pacific/Fiji',
+ 'Pacific/Funafuti',
+ 'Pacific/Galapagos',
+ 'Pacific/Gambier',
+ 'Pacific/Guadalcanal',
+ 'Pacific/Guam',
+ 'Pacific/Honolulu',
+ 'Pacific/Kanton',
+ 'Pacific/Kiritimati',
+ 'Pacific/Kosrae',
+ 'Pacific/Kwajalein',
+ 'Pacific/Majuro',
+ 'Pacific/Marquesas',
+ 'Pacific/Midway',
+ 'Pacific/Nauru',
+ 'Pacific/Niue',
+ 'Pacific/Norfolk',
+ 'Pacific/Noumea',
+ 'Pacific/Pago_Pago',
+ 'Pacific/Palau',
+ 'Pacific/Pitcairn',
+ 'Pacific/Pohnpei',
+ 'Pacific/Port_Moresby',
+ 'Pacific/Rarotonga',
+ 'Pacific/Saipan',
+ 'Pacific/Tahiti',
+ 'Pacific/Tarawa',
+ 'Pacific/Tongatapu',
+ 'Pacific/Wake',
+ 'Pacific/Wallis',
+ 'US/Alaska',
+ 'US/Arizona',
+ 'US/Central',
+ 'US/Eastern',
+ 'US/Hawaii',
+ 'US/Mountain',
+ 'US/Pacific',
+ 'UTC']
+common_timezones = LazyList(
+            tz for tz in common_timezones if tz in all_timezones)
+        
+common_timezones_set = LazySet(common_timezones)
diff --git a/.local/lib/python3.8/site-packages/pytz/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/pytz/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..da4f5c0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/__pycache__/__init__.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/pytz/__pycache__/exceptions.cpython-38.pyc b/.local/lib/python3.8/site-packages/pytz/__pycache__/exceptions.cpython-38.pyc
new file mode 100644
index 0000000..5997d59
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/__pycache__/exceptions.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/pytz/__pycache__/lazy.cpython-38.pyc b/.local/lib/python3.8/site-packages/pytz/__pycache__/lazy.cpython-38.pyc
new file mode 100644
index 0000000..5b4a43f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/__pycache__/lazy.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/pytz/__pycache__/reference.cpython-38.pyc b/.local/lib/python3.8/site-packages/pytz/__pycache__/reference.cpython-38.pyc
new file mode 100644
index 0000000..2c3b3b1
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/__pycache__/reference.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/pytz/__pycache__/tzfile.cpython-38.pyc b/.local/lib/python3.8/site-packages/pytz/__pycache__/tzfile.cpython-38.pyc
new file mode 100644
index 0000000..d638e12
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/__pycache__/tzfile.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/pytz/__pycache__/tzinfo.cpython-38.pyc b/.local/lib/python3.8/site-packages/pytz/__pycache__/tzinfo.cpython-38.pyc
new file mode 100644
index 0000000..6cec388
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/__pycache__/tzinfo.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/pytz/exceptions.py b/.local/lib/python3.8/site-packages/pytz/exceptions.py
new file mode 100644
index 0000000..4b20bde
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz/exceptions.py
@@ -0,0 +1,59 @@
+'''
+Custom exceptions raised by pytz.
+'''
+
+__all__ = [
+    'UnknownTimeZoneError', 'InvalidTimeError', 'AmbiguousTimeError',
+    'NonExistentTimeError',
+]
+
+
+class Error(Exception):
+    '''Base class for all exceptions raised by the pytz library'''
+
+
+class UnknownTimeZoneError(KeyError, Error):
+    '''Exception raised when pytz is passed an unknown timezone.
+
+    >>> isinstance(UnknownTimeZoneError(), LookupError)
+    True
+
+    This class is actually a subclass of KeyError to provide backwards
+    compatibility with code relying on the undocumented behavior of earlier
+    pytz releases.
+
+    >>> isinstance(UnknownTimeZoneError(), KeyError)
+    True
+
+    And also a subclass of pytz.exceptions.Error, as are other pytz
+    exceptions.
+
+    >>> isinstance(UnknownTimeZoneError(), Error)
+    True
+
+    '''
+    pass
+
+
+class InvalidTimeError(Error):
+    '''Base class for invalid time exceptions.'''
+
+
+class AmbiguousTimeError(InvalidTimeError):
+    '''Exception raised when attempting to create an ambiguous wallclock time.
+
+    At the end of a DST transition period, a particular wallclock time will
+    occur twice (once before the clocks are set back, once after). Both
+    possibilities may be correct, unless further information is supplied.
+
+    See DstTzInfo.normalize() for more info
+    '''
+
+
+class NonExistentTimeError(InvalidTimeError):
+    '''Exception raised when attempting to create a wallclock time that
+    cannot exist.
+
+    At the start of a DST transition period, the wallclock time jumps forward.
+    The instants jumped over never occur.
+    '''
diff --git a/.local/lib/python3.8/site-packages/pytz/lazy.py b/.local/lib/python3.8/site-packages/pytz/lazy.py
new file mode 100644
index 0000000..39344fc
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz/lazy.py
@@ -0,0 +1,172 @@
+from threading import RLock
+try:
+    from collections.abc import Mapping as DictMixin
+except ImportError:  # Python < 3.3
+    try:
+        from UserDict import DictMixin  # Python 2
+    except ImportError:  # Python 3.0-3.3
+        from collections import Mapping as DictMixin
+
+
+# With lazy loading, we might end up with multiple threads triggering
+# it at the same time. We need a lock.
+_fill_lock = RLock()
+
+
+class LazyDict(DictMixin):
+    """Dictionary populated on first use."""
+    data = None
+
+    def __getitem__(self, key):
+        if self.data is None:
+            _fill_lock.acquire()
+            try:
+                if self.data is None:
+                    self._fill()
+            finally:
+                _fill_lock.release()
+        return self.data[key.upper()]
+
+    def __contains__(self, key):
+        if self.data is None:
+            _fill_lock.acquire()
+            try:
+                if self.data is None:
+                    self._fill()
+            finally:
+                _fill_lock.release()
+        return key in self.data
+
+    def __iter__(self):
+        if self.data is None:
+            _fill_lock.acquire()
+            try:
+                if self.data is None:
+                    self._fill()
+            finally:
+                _fill_lock.release()
+        return iter(self.data)
+
+    def __len__(self):
+        if self.data is None:
+            _fill_lock.acquire()
+            try:
+                if self.data is None:
+                    self._fill()
+            finally:
+                _fill_lock.release()
+        return len(self.data)
+
+    def keys(self):
+        if self.data is None:
+            _fill_lock.acquire()
+            try:
+                if self.data is None:
+                    self._fill()
+            finally:
+                _fill_lock.release()
+        return self.data.keys()
+
+
+class LazyList(list):
+    """List populated on first use."""
+
+    _props = [
+        '__str__', '__repr__', '__unicode__',
+        '__hash__', '__sizeof__', '__cmp__',
+        '__lt__', '__le__', '__eq__', '__ne__', '__gt__', '__ge__',
+        'append', 'count', 'index', 'extend', 'insert', 'pop', 'remove',
+        'reverse', 'sort', '__add__', '__radd__', '__iadd__', '__mul__',
+        '__rmul__', '__imul__', '__contains__', '__len__', '__nonzero__',
+        '__getitem__', '__setitem__', '__delitem__', '__iter__',
+        '__reversed__', '__getslice__', '__setslice__', '__delslice__']
+
+    def __new__(cls, fill_iter=None):
+
+        if fill_iter is None:
+            return list()
+
+        # We need a new class as we will be dynamically messing with its
+        # methods.
+        class LazyList(list):
+            pass
+
+        fill_iter = [fill_iter]
+
+        def lazy(name):
+            def _lazy(self, *args, **kw):
+                _fill_lock.acquire()
+                try:
+                    if len(fill_iter) > 0:
+                        list.extend(self, fill_iter.pop())
+                        for method_name in cls._props:
+                            delattr(LazyList, method_name)
+                finally:
+                    _fill_lock.release()
+                return getattr(list, name)(self, *args, **kw)
+            return _lazy
+
+        for name in cls._props:
+            setattr(LazyList, name, lazy(name))
+
+        new_list = LazyList()
+        return new_list
+
+# Not all versions of Python declare the same magic methods.
+# Filter out properties that don't exist in this version of Python
+# from the list.
+LazyList._props = [prop for prop in LazyList._props if hasattr(list, prop)]
+
+
+class LazySet(set):
+    """Set populated on first use."""
+
+    _props = (
+        '__str__', '__repr__', '__unicode__',
+        '__hash__', '__sizeof__', '__cmp__',
+        '__lt__', '__le__', '__eq__', '__ne__', '__gt__', '__ge__',
+        '__contains__', '__len__', '__nonzero__',
+        '__getitem__', '__setitem__', '__delitem__', '__iter__',
+        '__sub__', '__and__', '__xor__', '__or__',
+        '__rsub__', '__rand__', '__rxor__', '__ror__',
+        '__isub__', '__iand__', '__ixor__', '__ior__',
+        'add', 'clear', 'copy', 'difference', 'difference_update',
+        'discard', 'intersection', 'intersection_update', 'isdisjoint',
+        'issubset', 'issuperset', 'pop', 'remove',
+        'symmetric_difference', 'symmetric_difference_update',
+        'union', 'update')
+
+    def __new__(cls, fill_iter=None):
+
+        if fill_iter is None:
+            return set()
+
+        class LazySet(set):
+            pass
+
+        fill_iter = [fill_iter]
+
+        def lazy(name):
+            def _lazy(self, *args, **kw):
+                _fill_lock.acquire()
+                try:
+                    if len(fill_iter) > 0:
+                        for i in fill_iter.pop():
+                            set.add(self, i)
+                        for method_name in cls._props:
+                            delattr(LazySet, method_name)
+                finally:
+                    _fill_lock.release()
+                return getattr(set, name)(self, *args, **kw)
+            return _lazy
+
+        for name in cls._props:
+            setattr(LazySet, name, lazy(name))
+
+        new_set = LazySet()
+        return new_set
+
+# Not all versions of Python declare the same magic methods.
+# Filter out properties that don't exist in this version of Python
+# from the list.
+LazySet._props = [prop for prop in LazySet._props if hasattr(set, prop)]
diff --git a/.local/lib/python3.8/site-packages/pytz/reference.py b/.local/lib/python3.8/site-packages/pytz/reference.py
new file mode 100644
index 0000000..f765ca0
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz/reference.py
@@ -0,0 +1,140 @@
+'''
+Reference tzinfo implementations from the Python docs.
+Used for testing against as they are only correct for the years
+1987 to 2006. Do not use these for real code.
+'''
+
+from datetime import tzinfo, timedelta, datetime
+from pytz import HOUR, ZERO, UTC
+
+__all__ = [
+    'FixedOffset',
+    'LocalTimezone',
+    'USTimeZone',
+    'Eastern',
+    'Central',
+    'Mountain',
+    'Pacific',
+    'UTC'
+]
+
+
+# A class building tzinfo objects for fixed-offset time zones.
+# Note that FixedOffset(0, "UTC") is a different way to build a
+# UTC tzinfo object.
+class FixedOffset(tzinfo):
+    """Fixed offset in minutes east from UTC."""
+
+    def __init__(self, offset, name):
+        self.__offset = timedelta(minutes=offset)
+        self.__name = name
+
+    def utcoffset(self, dt):
+        return self.__offset
+
+    def tzname(self, dt):
+        return self.__name
+
+    def dst(self, dt):
+        return ZERO
+
+
+import time as _time
+
+STDOFFSET = timedelta(seconds=-_time.timezone)
+if _time.daylight:
+    DSTOFFSET = timedelta(seconds=-_time.altzone)
+else:
+    DSTOFFSET = STDOFFSET
+
+DSTDIFF = DSTOFFSET - STDOFFSET
+
+
+# A class capturing the platform's idea of local time.
+class LocalTimezone(tzinfo):
+
+    def utcoffset(self, dt):
+        if self._isdst(dt):
+            return DSTOFFSET
+        else:
+            return STDOFFSET
+
+    def dst(self, dt):
+        if self._isdst(dt):
+            return DSTDIFF
+        else:
+            return ZERO
+
+    def tzname(self, dt):
+        return _time.tzname[self._isdst(dt)]
+
+    def _isdst(self, dt):
+        tt = (dt.year, dt.month, dt.day,
+              dt.hour, dt.minute, dt.second,
+              dt.weekday(), 0, -1)
+        stamp = _time.mktime(tt)
+        tt = _time.localtime(stamp)
+        return tt.tm_isdst > 0
+
+Local = LocalTimezone()
+
+
+def first_sunday_on_or_after(dt):
+    days_to_go = 6 - dt.weekday()
+    if days_to_go:
+        dt += timedelta(days_to_go)
+    return dt
+
+
+# In the US, DST starts at 2am (standard time) on the first Sunday in April.
+DSTSTART = datetime(1, 4, 1, 2)
+# and ends at 2am (DST time; 1am standard time) on the last Sunday of Oct.
+# which is the first Sunday on or after Oct 25.
+DSTEND = datetime(1, 10, 25, 1)
+
+
+# A complete implementation of current DST rules for major US time zones.
+class USTimeZone(tzinfo):
+
+    def __init__(self, hours, reprname, stdname, dstname):
+        self.stdoffset = timedelta(hours=hours)
+        self.reprname = reprname
+        self.stdname = stdname
+        self.dstname = dstname
+
+    def __repr__(self):
+        return self.reprname
+
+    def tzname(self, dt):
+        if self.dst(dt):
+            return self.dstname
+        else:
+            return self.stdname
+
+    def utcoffset(self, dt):
+        return self.stdoffset + self.dst(dt)
+
+    def dst(self, dt):
+        if dt is None or dt.tzinfo is None:
+            # An exception may be sensible here, in one or both cases.
+            # It depends on how you want to treat them.  The default
+            # fromutc() implementation (called by the default astimezone()
+            # implementation) passes a datetime with dt.tzinfo is self.
+            return ZERO
+        assert dt.tzinfo is self
+
+        # Find first Sunday in April & the last in October.
+        start = first_sunday_on_or_after(DSTSTART.replace(year=dt.year))
+        end = first_sunday_on_or_after(DSTEND.replace(year=dt.year))
+
+        # Can't compare naive to aware objects, so strip the timezone from
+        # dt first.
+        if start <= dt.replace(tzinfo=None) < end:
+            return HOUR
+        else:
+            return ZERO
+
+Eastern = USTimeZone(-5, "Eastern", "EST", "EDT")
+Central = USTimeZone(-6, "Central", "CST", "CDT")
+Mountain = USTimeZone(-7, "Mountain", "MST", "MDT")
+Pacific = USTimeZone(-8, "Pacific", "PST", "PDT")
diff --git a/.local/lib/python3.8/site-packages/pytz/tzfile.py b/.local/lib/python3.8/site-packages/pytz/tzfile.py
new file mode 100644
index 0000000..99e7448
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz/tzfile.py
@@ -0,0 +1,133 @@
+'''
+$Id: tzfile.py,v 1.8 2004/06/03 00:15:24 zenzen Exp $
+'''
+
+from datetime import datetime
+from struct import unpack, calcsize
+
+from pytz.tzinfo import StaticTzInfo, DstTzInfo, memorized_ttinfo
+from pytz.tzinfo import memorized_datetime, memorized_timedelta
+
+
+def _byte_string(s):
+    """Cast a string or byte string to an ASCII byte string."""
+    return s.encode('ASCII')
+
+_NULL = _byte_string('\0')
+
+
+def _std_string(s):
+    """Cast a string or byte string to an ASCII string."""
+    return str(s.decode('ASCII'))
+
+
+def build_tzinfo(zone, fp):
+    head_fmt = '>4s c 15x 6l'
+    head_size = calcsize(head_fmt)
+    (magic, format, ttisgmtcnt, ttisstdcnt, leapcnt, timecnt,
+        typecnt, charcnt) = unpack(head_fmt, fp.read(head_size))
+
+    # Make sure it is a tzfile(5) file
+    assert magic == _byte_string('TZif'), 'Got magic %s' % repr(magic)
+
+    # Read out the transition times, localtime indices and ttinfo structures.
+    data_fmt = '>%(timecnt)dl %(timecnt)dB %(ttinfo)s %(charcnt)ds' % dict(
+        timecnt=timecnt, ttinfo='lBB' * typecnt, charcnt=charcnt)
+    data_size = calcsize(data_fmt)
+    data = unpack(data_fmt, fp.read(data_size))
+
+    # make sure we unpacked the right number of values
+    assert len(data) == 2 * timecnt + 3 * typecnt + 1
+    transitions = [memorized_datetime(trans)
+                   for trans in data[:timecnt]]
+    lindexes = list(data[timecnt:2 * timecnt])
+    ttinfo_raw = data[2 * timecnt:-1]
+    tznames_raw = data[-1]
+    del data
+
+    # Process ttinfo into separate structs
+    ttinfo = []
+    tznames = {}
+    i = 0
+    while i < len(ttinfo_raw):
+        # have we looked up this timezone name yet?
+        tzname_offset = ttinfo_raw[i + 2]
+        if tzname_offset not in tznames:
+            nul = tznames_raw.find(_NULL, tzname_offset)
+            if nul < 0:
+                nul = len(tznames_raw)
+            tznames[tzname_offset] = _std_string(
+                tznames_raw[tzname_offset:nul])
+        ttinfo.append((ttinfo_raw[i],
+                       bool(ttinfo_raw[i + 1]),
+                       tznames[tzname_offset]))
+        i += 3
+
+    # Now build the timezone object
+    if len(ttinfo) == 1 or len(transitions) == 0:
+        ttinfo[0][0], ttinfo[0][2]
+        cls = type(zone, (StaticTzInfo,), dict(
+            zone=zone,
+            _utcoffset=memorized_timedelta(ttinfo[0][0]),
+            _tzname=ttinfo[0][2]))
+    else:
+        # Early dates use the first standard time ttinfo
+        i = 0
+        while ttinfo[i][1]:
+            i += 1
+        if ttinfo[i] == ttinfo[lindexes[0]]:
+            transitions[0] = datetime.min
+        else:
+            transitions.insert(0, datetime.min)
+            lindexes.insert(0, i)
+
+        # calculate transition info
+        transition_info = []
+        for i in range(len(transitions)):
+            inf = ttinfo[lindexes[i]]
+            utcoffset = inf[0]
+            if not inf[1]:
+                dst = 0
+            else:
+                for j in range(i - 1, -1, -1):
+                    prev_inf = ttinfo[lindexes[j]]
+                    if not prev_inf[1]:
+                        break
+                dst = inf[0] - prev_inf[0]  # dst offset
+
+                # Bad dst? Look further. DST > 24 hours happens when
+                # a timzone has moved across the international dateline.
+                if dst <= 0 or dst > 3600 * 3:
+                    for j in range(i + 1, len(transitions)):
+                        stdinf = ttinfo[lindexes[j]]
+                        if not stdinf[1]:
+                            dst = inf[0] - stdinf[0]
+                            if dst > 0:
+                                break  # Found a useful std time.
+
+            tzname = inf[2]
+
+            # Round utcoffset and dst to the nearest minute or the
+            # datetime library will complain. Conversions to these timezones
+            # might be up to plus or minus 30 seconds out, but it is
+            # the best we can do.
+            utcoffset = int((utcoffset + 30) // 60) * 60
+            dst = int((dst + 30) // 60) * 60
+            transition_info.append(memorized_ttinfo(utcoffset, dst, tzname))
+
+        cls = type(zone, (DstTzInfo,), dict(
+            zone=zone,
+            _utc_transition_times=transitions,
+            _transition_info=transition_info))
+
+    return cls()
+
+if __name__ == '__main__':
+    import os.path
+    from pprint import pprint
+    base = os.path.join(os.path.dirname(__file__), 'zoneinfo')
+    tz = build_tzinfo('Australia/Melbourne',
+                      open(os.path.join(base, 'Australia', 'Melbourne'), 'rb'))
+    tz = build_tzinfo('US/Eastern',
+                      open(os.path.join(base, 'US', 'Eastern'), 'rb'))
+    pprint(tz._utc_transition_times)
diff --git a/.local/lib/python3.8/site-packages/pytz/tzinfo.py b/.local/lib/python3.8/site-packages/pytz/tzinfo.py
new file mode 100644
index 0000000..725978d
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz/tzinfo.py
@@ -0,0 +1,577 @@
+'''Base classes and helpers for building zone specific tzinfo classes'''
+
+from datetime import datetime, timedelta, tzinfo
+from bisect import bisect_right
+try:
+    set
+except NameError:
+    from sets import Set as set
+
+import pytz
+from pytz.exceptions import AmbiguousTimeError, NonExistentTimeError
+
+__all__ = []
+
+_timedelta_cache = {}
+
+
+def memorized_timedelta(seconds):
+    '''Create only one instance of each distinct timedelta'''
+    try:
+        return _timedelta_cache[seconds]
+    except KeyError:
+        delta = timedelta(seconds=seconds)
+        _timedelta_cache[seconds] = delta
+        return delta
+
+_epoch = datetime.utcfromtimestamp(0)
+_datetime_cache = {0: _epoch}
+
+
+def memorized_datetime(seconds):
+    '''Create only one instance of each distinct datetime'''
+    try:
+        return _datetime_cache[seconds]
+    except KeyError:
+        # NB. We can't just do datetime.utcfromtimestamp(seconds) as this
+        # fails with negative values under Windows (Bug #90096)
+        dt = _epoch + timedelta(seconds=seconds)
+        _datetime_cache[seconds] = dt
+        return dt
+
+_ttinfo_cache = {}
+
+
+def memorized_ttinfo(*args):
+    '''Create only one instance of each distinct tuple'''
+    try:
+        return _ttinfo_cache[args]
+    except KeyError:
+        ttinfo = (
+            memorized_timedelta(args[0]),
+            memorized_timedelta(args[1]),
+            args[2]
+        )
+        _ttinfo_cache[args] = ttinfo
+        return ttinfo
+
+_notime = memorized_timedelta(0)
+
+
+def _to_seconds(td):
+    '''Convert a timedelta to seconds'''
+    return td.seconds + td.days * 24 * 60 * 60
+
+
+class BaseTzInfo(tzinfo):
+    # Overridden in subclass
+    _utcoffset = None
+    _tzname = None
+    zone = None
+
+    def __str__(self):
+        return self.zone
+
+
+class StaticTzInfo(BaseTzInfo):
+    '''A timezone that has a constant offset from UTC
+
+    These timezones are rare, as most locations have changed their
+    offset at some point in their history
+    '''
+    def fromutc(self, dt):
+        '''See datetime.tzinfo.fromutc'''
+        if dt.tzinfo is not None and dt.tzinfo is not self:
+            raise ValueError('fromutc: dt.tzinfo is not self')
+        return (dt + self._utcoffset).replace(tzinfo=self)
+
+    def utcoffset(self, dt, is_dst=None):
+        '''See datetime.tzinfo.utcoffset
+
+        is_dst is ignored for StaticTzInfo, and exists only to
+        retain compatibility with DstTzInfo.
+        '''
+        return self._utcoffset
+
+    def dst(self, dt, is_dst=None):
+        '''See datetime.tzinfo.dst
+
+        is_dst is ignored for StaticTzInfo, and exists only to
+        retain compatibility with DstTzInfo.
+        '''
+        return _notime
+
+    def tzname(self, dt, is_dst=None):
+        '''See datetime.tzinfo.tzname
+
+        is_dst is ignored for StaticTzInfo, and exists only to
+        retain compatibility with DstTzInfo.
+        '''
+        return self._tzname
+
+    def localize(self, dt, is_dst=False):
+        '''Convert naive time to local time'''
+        if dt.tzinfo is not None:
+            raise ValueError('Not naive datetime (tzinfo is already set)')
+        return dt.replace(tzinfo=self)
+
+    def normalize(self, dt, is_dst=False):
+        '''Correct the timezone information on the given datetime.
+
+        This is normally a no-op, as StaticTzInfo timezones never have
+        ambiguous cases to correct:
+
+        >>> from pytz import timezone
+        >>> gmt = timezone('GMT')
+        >>> isinstance(gmt, StaticTzInfo)
+        True
+        >>> dt = datetime(2011, 5, 8, 1, 2, 3, tzinfo=gmt)
+        >>> gmt.normalize(dt) is dt
+        True
+
+        The supported method of converting between timezones is to use
+        datetime.astimezone(). Currently normalize() also works:
+
+        >>> la = timezone('America/Los_Angeles')
+        >>> dt = la.localize(datetime(2011, 5, 7, 1, 2, 3))
+        >>> fmt = '%Y-%m-%d %H:%M:%S %Z (%z)'
+        >>> gmt.normalize(dt).strftime(fmt)
+        '2011-05-07 08:02:03 GMT (+0000)'
+        '''
+        if dt.tzinfo is self:
+            return dt
+        if dt.tzinfo is None:
+            raise ValueError('Naive time - no tzinfo set')
+        return dt.astimezone(self)
+
+    def __repr__(self):
+        return '' % (self.zone,)
+
+    def __reduce__(self):
+        # Special pickle to zone remains a singleton and to cope with
+        # database changes.
+        return pytz._p, (self.zone,)
+
+
+class DstTzInfo(BaseTzInfo):
+    '''A timezone that has a variable offset from UTC
+
+    The offset might change if daylight saving time comes into effect,
+    or at a point in history when the region decides to change their
+    timezone definition.
+    '''
+    # Overridden in subclass
+
+    # Sorted list of DST transition times, UTC
+    _utc_transition_times = None
+
+    # [(utcoffset, dstoffset, tzname)] corresponding to
+    # _utc_transition_times entries
+    _transition_info = None
+
+    zone = None
+
+    # Set in __init__
+
+    _tzinfos = None
+    _dst = None  # DST offset
+
+    def __init__(self, _inf=None, _tzinfos=None):
+        if _inf:
+            self._tzinfos = _tzinfos
+            self._utcoffset, self._dst, self._tzname = _inf
+        else:
+            _tzinfos = {}
+            self._tzinfos = _tzinfos
+            self._utcoffset, self._dst, self._tzname = (
+                self._transition_info[0])
+            _tzinfos[self._transition_info[0]] = self
+            for inf in self._transition_info[1:]:
+                if inf not in _tzinfos:
+                    _tzinfos[inf] = self.__class__(inf, _tzinfos)
+
+    def fromutc(self, dt):
+        '''See datetime.tzinfo.fromutc'''
+        if (dt.tzinfo is not None and
+                getattr(dt.tzinfo, '_tzinfos', None) is not self._tzinfos):
+            raise ValueError('fromutc: dt.tzinfo is not self')
+        dt = dt.replace(tzinfo=None)
+        idx = max(0, bisect_right(self._utc_transition_times, dt) - 1)
+        inf = self._transition_info[idx]
+        return (dt + inf[0]).replace(tzinfo=self._tzinfos[inf])
+
+    def normalize(self, dt):
+        '''Correct the timezone information on the given datetime
+
+        If date arithmetic crosses DST boundaries, the tzinfo
+        is not magically adjusted. This method normalizes the
+        tzinfo to the correct one.
+
+        To test, first we need to do some setup
+
+        >>> from pytz import timezone
+        >>> utc = timezone('UTC')
+        >>> eastern = timezone('US/Eastern')
+        >>> fmt = '%Y-%m-%d %H:%M:%S %Z (%z)'
+
+        We next create a datetime right on an end-of-DST transition point,
+        the instant when the wallclocks are wound back one hour.
+
+        >>> utc_dt = datetime(2002, 10, 27, 6, 0, 0, tzinfo=utc)
+        >>> loc_dt = utc_dt.astimezone(eastern)
+        >>> loc_dt.strftime(fmt)
+        '2002-10-27 01:00:00 EST (-0500)'
+
+        Now, if we subtract a few minutes from it, note that the timezone
+        information has not changed.
+
+        >>> before = loc_dt - timedelta(minutes=10)
+        >>> before.strftime(fmt)
+        '2002-10-27 00:50:00 EST (-0500)'
+
+        But we can fix that by calling the normalize method
+
+        >>> before = eastern.normalize(before)
+        >>> before.strftime(fmt)
+        '2002-10-27 01:50:00 EDT (-0400)'
+
+        The supported method of converting between timezones is to use
+        datetime.astimezone(). Currently, normalize() also works:
+
+        >>> th = timezone('Asia/Bangkok')
+        >>> am = timezone('Europe/Amsterdam')
+        >>> dt = th.localize(datetime(2011, 5, 7, 1, 2, 3))
+        >>> fmt = '%Y-%m-%d %H:%M:%S %Z (%z)'
+        >>> am.normalize(dt).strftime(fmt)
+        '2011-05-06 20:02:03 CEST (+0200)'
+        '''
+        if dt.tzinfo is None:
+            raise ValueError('Naive time - no tzinfo set')
+
+        # Convert dt in localtime to UTC
+        offset = dt.tzinfo._utcoffset
+        dt = dt.replace(tzinfo=None)
+        dt = dt - offset
+        # convert it back, and return it
+        return self.fromutc(dt)
+
+    def localize(self, dt, is_dst=False):
+        '''Convert naive time to local time.
+
+        This method should be used to construct localtimes, rather
+        than passing a tzinfo argument to a datetime constructor.
+
+        is_dst is used to determine the correct timezone in the ambigous
+        period at the end of daylight saving time.
+
+        >>> from pytz import timezone
+        >>> fmt = '%Y-%m-%d %H:%M:%S %Z (%z)'
+        >>> amdam = timezone('Europe/Amsterdam')
+        >>> dt  = datetime(2004, 10, 31, 2, 0, 0)
+        >>> loc_dt1 = amdam.localize(dt, is_dst=True)
+        >>> loc_dt2 = amdam.localize(dt, is_dst=False)
+        >>> loc_dt1.strftime(fmt)
+        '2004-10-31 02:00:00 CEST (+0200)'
+        >>> loc_dt2.strftime(fmt)
+        '2004-10-31 02:00:00 CET (+0100)'
+        >>> str(loc_dt2 - loc_dt1)
+        '1:00:00'
+
+        Use is_dst=None to raise an AmbiguousTimeError for ambiguous
+        times at the end of daylight saving time
+
+        >>> try:
+        ...     loc_dt1 = amdam.localize(dt, is_dst=None)
+        ... except AmbiguousTimeError:
+        ...     print('Ambiguous')
+        Ambiguous
+
+        is_dst defaults to False
+
+        >>> amdam.localize(dt) == amdam.localize(dt, False)
+        True
+
+        is_dst is also used to determine the correct timezone in the
+        wallclock times jumped over at the start of daylight saving time.
+
+        >>> pacific = timezone('US/Pacific')
+        >>> dt = datetime(2008, 3, 9, 2, 0, 0)
+        >>> ploc_dt1 = pacific.localize(dt, is_dst=True)
+        >>> ploc_dt2 = pacific.localize(dt, is_dst=False)
+        >>> ploc_dt1.strftime(fmt)
+        '2008-03-09 02:00:00 PDT (-0700)'
+        >>> ploc_dt2.strftime(fmt)
+        '2008-03-09 02:00:00 PST (-0800)'
+        >>> str(ploc_dt2 - ploc_dt1)
+        '1:00:00'
+
+        Use is_dst=None to raise a NonExistentTimeError for these skipped
+        times.
+
+        >>> try:
+        ...     loc_dt1 = pacific.localize(dt, is_dst=None)
+        ... except NonExistentTimeError:
+        ...     print('Non-existent')
+        Non-existent
+        '''
+        if dt.tzinfo is not None:
+            raise ValueError('Not naive datetime (tzinfo is already set)')
+
+        # Find the two best possibilities.
+        possible_loc_dt = set()
+        for delta in [timedelta(days=-1), timedelta(days=1)]:
+            loc_dt = dt + delta
+            idx = max(0, bisect_right(
+                self._utc_transition_times, loc_dt) - 1)
+            inf = self._transition_info[idx]
+            tzinfo = self._tzinfos[inf]
+            loc_dt = tzinfo.normalize(dt.replace(tzinfo=tzinfo))
+            if loc_dt.replace(tzinfo=None) == dt:
+                possible_loc_dt.add(loc_dt)
+
+        if len(possible_loc_dt) == 1:
+            return possible_loc_dt.pop()
+
+        # If there are no possibly correct timezones, we are attempting
+        # to convert a time that never happened - the time period jumped
+        # during the start-of-DST transition period.
+        if len(possible_loc_dt) == 0:
+            # If we refuse to guess, raise an exception.
+            if is_dst is None:
+                raise NonExistentTimeError(dt)
+
+            # If we are forcing the pre-DST side of the DST transition, we
+            # obtain the correct timezone by winding the clock forward a few
+            # hours.
+            elif is_dst:
+                return self.localize(
+                    dt + timedelta(hours=6), is_dst=True) - timedelta(hours=6)
+
+            # If we are forcing the post-DST side of the DST transition, we
+            # obtain the correct timezone by winding the clock back.
+            else:
+                return self.localize(
+                    dt - timedelta(hours=6),
+                    is_dst=False) + timedelta(hours=6)
+
+        # If we get this far, we have multiple possible timezones - this
+        # is an ambiguous case occuring during the end-of-DST transition.
+
+        # If told to be strict, raise an exception since we have an
+        # ambiguous case
+        if is_dst is None:
+            raise AmbiguousTimeError(dt)
+
+        # Filter out the possiblilities that don't match the requested
+        # is_dst
+        filtered_possible_loc_dt = [
+            p for p in possible_loc_dt if bool(p.tzinfo._dst) == is_dst
+        ]
+
+        # Hopefully we only have one possibility left. Return it.
+        if len(filtered_possible_loc_dt) == 1:
+            return filtered_possible_loc_dt[0]
+
+        if len(filtered_possible_loc_dt) == 0:
+            filtered_possible_loc_dt = list(possible_loc_dt)
+
+        # If we get this far, we have in a wierd timezone transition
+        # where the clocks have been wound back but is_dst is the same
+        # in both (eg. Europe/Warsaw 1915 when they switched to CET).
+        # At this point, we just have to guess unless we allow more
+        # hints to be passed in (such as the UTC offset or abbreviation),
+        # but that is just getting silly.
+        #
+        # Choose the earliest (by UTC) applicable timezone if is_dst=True
+        # Choose the latest (by UTC) applicable timezone if is_dst=False
+        # i.e., behave like end-of-DST transition
+        dates = {}  # utc -> local
+        for local_dt in filtered_possible_loc_dt:
+            utc_time = (
+                local_dt.replace(tzinfo=None) - local_dt.tzinfo._utcoffset)
+            assert utc_time not in dates
+            dates[utc_time] = local_dt
+        return dates[[min, max][not is_dst](dates)]
+
+    def utcoffset(self, dt, is_dst=None):
+        '''See datetime.tzinfo.utcoffset
+
+        The is_dst parameter may be used to remove ambiguity during DST
+        transitions.
+
+        >>> from pytz import timezone
+        >>> tz = timezone('America/St_Johns')
+        >>> ambiguous = datetime(2009, 10, 31, 23, 30)
+
+        >>> str(tz.utcoffset(ambiguous, is_dst=False))
+        '-1 day, 20:30:00'
+
+        >>> str(tz.utcoffset(ambiguous, is_dst=True))
+        '-1 day, 21:30:00'
+
+        >>> try:
+        ...     tz.utcoffset(ambiguous)
+        ... except AmbiguousTimeError:
+        ...     print('Ambiguous')
+        Ambiguous
+
+        '''
+        if dt is None:
+            return None
+        elif dt.tzinfo is not self:
+            dt = self.localize(dt, is_dst)
+            return dt.tzinfo._utcoffset
+        else:
+            return self._utcoffset
+
+    def dst(self, dt, is_dst=None):
+        '''See datetime.tzinfo.dst
+
+        The is_dst parameter may be used to remove ambiguity during DST
+        transitions.
+
+        >>> from pytz import timezone
+        >>> tz = timezone('America/St_Johns')
+
+        >>> normal = datetime(2009, 9, 1)
+
+        >>> str(tz.dst(normal))
+        '1:00:00'
+        >>> str(tz.dst(normal, is_dst=False))
+        '1:00:00'
+        >>> str(tz.dst(normal, is_dst=True))
+        '1:00:00'
+
+        >>> ambiguous = datetime(2009, 10, 31, 23, 30)
+
+        >>> str(tz.dst(ambiguous, is_dst=False))
+        '0:00:00'
+        >>> str(tz.dst(ambiguous, is_dst=True))
+        '1:00:00'
+        >>> try:
+        ...     tz.dst(ambiguous)
+        ... except AmbiguousTimeError:
+        ...     print('Ambiguous')
+        Ambiguous
+
+        '''
+        if dt is None:
+            return None
+        elif dt.tzinfo is not self:
+            dt = self.localize(dt, is_dst)
+            return dt.tzinfo._dst
+        else:
+            return self._dst
+
+    def tzname(self, dt, is_dst=None):
+        '''See datetime.tzinfo.tzname
+
+        The is_dst parameter may be used to remove ambiguity during DST
+        transitions.
+
+        >>> from pytz import timezone
+        >>> tz = timezone('America/St_Johns')
+
+        >>> normal = datetime(2009, 9, 1)
+
+        >>> tz.tzname(normal)
+        'NDT'
+        >>> tz.tzname(normal, is_dst=False)
+        'NDT'
+        >>> tz.tzname(normal, is_dst=True)
+        'NDT'
+
+        >>> ambiguous = datetime(2009, 10, 31, 23, 30)
+
+        >>> tz.tzname(ambiguous, is_dst=False)
+        'NST'
+        >>> tz.tzname(ambiguous, is_dst=True)
+        'NDT'
+        >>> try:
+        ...     tz.tzname(ambiguous)
+        ... except AmbiguousTimeError:
+        ...     print('Ambiguous')
+        Ambiguous
+        '''
+        if dt is None:
+            return self.zone
+        elif dt.tzinfo is not self:
+            dt = self.localize(dt, is_dst)
+            return dt.tzinfo._tzname
+        else:
+            return self._tzname
+
+    def __repr__(self):
+        if self._dst:
+            dst = 'DST'
+        else:
+            dst = 'STD'
+        if self._utcoffset > _notime:
+            return '' % (
+                self.zone, self._tzname, self._utcoffset, dst
+            )
+        else:
+            return '' % (
+                self.zone, self._tzname, self._utcoffset, dst
+            )
+
+    def __reduce__(self):
+        # Special pickle to zone remains a singleton and to cope with
+        # database changes.
+        return pytz._p, (
+            self.zone,
+            _to_seconds(self._utcoffset),
+            _to_seconds(self._dst),
+            self._tzname
+        )
+
+
+def unpickler(zone, utcoffset=None, dstoffset=None, tzname=None):
+    """Factory function for unpickling pytz tzinfo instances.
+
+    This is shared for both StaticTzInfo and DstTzInfo instances, because
+    database changes could cause a zones implementation to switch between
+    these two base classes and we can't break pickles on a pytz version
+    upgrade.
+    """
+    # Raises a KeyError if zone no longer exists, which should never happen
+    # and would be a bug.
+    tz = pytz.timezone(zone)
+
+    # A StaticTzInfo - just return it
+    if utcoffset is None:
+        return tz
+
+    # This pickle was created from a DstTzInfo. We need to
+    # determine which of the list of tzinfo instances for this zone
+    # to use in order to restore the state of any datetime instances using
+    # it correctly.
+    utcoffset = memorized_timedelta(utcoffset)
+    dstoffset = memorized_timedelta(dstoffset)
+    try:
+        return tz._tzinfos[(utcoffset, dstoffset, tzname)]
+    except KeyError:
+        # The particular state requested in this timezone no longer exists.
+        # This indicates a corrupt pickle, or the timezone database has been
+        # corrected violently enough to make this particular
+        # (utcoffset,dstoffset) no longer exist in the zone, or the
+        # abbreviation has been changed.
+        pass
+
+    # See if we can find an entry differing only by tzname. Abbreviations
+    # get changed from the initial guess by the database maintainers to
+    # match reality when this information is discovered.
+    for localized_tz in tz._tzinfos.values():
+        if (localized_tz._utcoffset == utcoffset and
+                localized_tz._dst == dstoffset):
+            return localized_tz
+
+    # This (utcoffset, dstoffset) information has been removed from the
+    # zone. Add it back. This might occur when the database maintainers have
+    # corrected incorrect information. datetime instances using this
+    # incorrect information will continue to do so, exactly as they were
+    # before being pickled. This is purely an overly paranoid safety net - I
+    # doubt this will ever been needed in real life.
+    inf = (utcoffset, dstoffset, tzname)
+    tz._tzinfos[inf] = tz.__class__(inf, tz._tzinfos)
+    return tz._tzinfos[inf]
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Abidjan b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Abidjan
new file mode 100644
index 0000000..28b32ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Abidjan differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Accra b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Accra
new file mode 100644
index 0000000..28b32ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Accra differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Addis_Ababa b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Addis_Ababa
new file mode 100644
index 0000000..9dcfc19
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Addis_Ababa differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Algiers b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Algiers
new file mode 100644
index 0000000..6cfd8a1
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Algiers differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Asmara b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Asmara
new file mode 100644
index 0000000..9dcfc19
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Asmara differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Asmera b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Asmera
new file mode 100644
index 0000000..9dcfc19
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Asmera differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Bamako b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Bamako
new file mode 100644
index 0000000..28b32ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Bamako differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Bangui b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Bangui
new file mode 100644
index 0000000..afb6a4a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Bangui differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Banjul b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Banjul
new file mode 100644
index 0000000..28b32ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Banjul differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Bissau b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Bissau
new file mode 100644
index 0000000..82ea5aa
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Bissau differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Blantyre b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Blantyre
new file mode 100644
index 0000000..52753c0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Blantyre differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Brazzaville b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Brazzaville
new file mode 100644
index 0000000..afb6a4a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Brazzaville differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Bujumbura b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Bujumbura
new file mode 100644
index 0000000..52753c0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Bujumbura differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Cairo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Cairo
new file mode 100644
index 0000000..d3f8196
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Cairo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Casablanca b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Casablanca
new file mode 100644
index 0000000..17e0d1b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Casablanca differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Ceuta b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Ceuta
new file mode 100644
index 0000000..850c8f0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Ceuta differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Conakry b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Conakry
new file mode 100644
index 0000000..28b32ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Conakry differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Dakar b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Dakar
new file mode 100644
index 0000000..28b32ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Dakar differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Dar_es_Salaam b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Dar_es_Salaam
new file mode 100644
index 0000000..9dcfc19
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Dar_es_Salaam differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Djibouti b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Djibouti
new file mode 100644
index 0000000..9dcfc19
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Djibouti differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Douala b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Douala
new file mode 100644
index 0000000..afb6a4a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Douala differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/El_Aaiun b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/El_Aaiun
new file mode 100644
index 0000000..64f1b76
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/El_Aaiun differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Freetown b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Freetown
new file mode 100644
index 0000000..28b32ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Freetown differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Gaborone b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Gaborone
new file mode 100644
index 0000000..52753c0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Gaborone differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Harare b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Harare
new file mode 100644
index 0000000..52753c0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Harare differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Johannesburg b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Johannesburg
new file mode 100644
index 0000000..b1c425d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Johannesburg differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Juba b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Juba
new file mode 100644
index 0000000..0648294
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Juba differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Kampala b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Kampala
new file mode 100644
index 0000000..9dcfc19
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Kampala differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Khartoum b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Khartoum
new file mode 100644
index 0000000..8ee8cb9
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Khartoum differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Kigali b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Kigali
new file mode 100644
index 0000000..52753c0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Kigali differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Kinshasa b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Kinshasa
new file mode 100644
index 0000000..afb6a4a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Kinshasa differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Lagos b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Lagos
new file mode 100644
index 0000000..afb6a4a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Lagos differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Libreville b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Libreville
new file mode 100644
index 0000000..afb6a4a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Libreville differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Lome b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Lome
new file mode 100644
index 0000000..28b32ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Lome differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Luanda b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Luanda
new file mode 100644
index 0000000..afb6a4a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Luanda differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Lubumbashi b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Lubumbashi
new file mode 100644
index 0000000..52753c0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Lubumbashi differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Lusaka b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Lusaka
new file mode 100644
index 0000000..52753c0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Lusaka differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Malabo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Malabo
new file mode 100644
index 0000000..afb6a4a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Malabo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Maputo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Maputo
new file mode 100644
index 0000000..52753c0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Maputo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Maseru b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Maseru
new file mode 100644
index 0000000..b1c425d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Maseru differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Mbabane b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Mbabane
new file mode 100644
index 0000000..b1c425d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Mbabane differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Mogadishu b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Mogadishu
new file mode 100644
index 0000000..9dcfc19
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Mogadishu differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Monrovia b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Monrovia
new file mode 100644
index 0000000..6d68850
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Monrovia differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Nairobi b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Nairobi
new file mode 100644
index 0000000..9dcfc19
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Nairobi differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Ndjamena b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Ndjamena
new file mode 100644
index 0000000..a968845
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Ndjamena differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Niamey b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Niamey
new file mode 100644
index 0000000..afb6a4a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Niamey differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Nouakchott b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Nouakchott
new file mode 100644
index 0000000..28b32ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Nouakchott differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Ouagadougou b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Ouagadougou
new file mode 100644
index 0000000..28b32ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Ouagadougou differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Porto-Novo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Porto-Novo
new file mode 100644
index 0000000..afb6a4a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Porto-Novo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Sao_Tome b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Sao_Tome
new file mode 100644
index 0000000..59f3759
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Sao_Tome differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Timbuktu b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Timbuktu
new file mode 100644
index 0000000..28b32ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Timbuktu differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Tripoli b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Tripoli
new file mode 100644
index 0000000..07b393b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Tripoli differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Tunis b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Tunis
new file mode 100644
index 0000000..427fa56
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Tunis differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Windhoek b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Windhoek
new file mode 100644
index 0000000..abecd13
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Africa/Windhoek differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Adak b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Adak
new file mode 100644
index 0000000..4323649
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Adak differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Anchorage b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Anchorage
new file mode 100644
index 0000000..9bbb2fd
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Anchorage differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Anguilla b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Anguilla
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Anguilla differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Antigua b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Antigua
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Antigua differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Araguaina b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Araguaina
new file mode 100644
index 0000000..49381b4
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Araguaina differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Buenos_Aires b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Buenos_Aires
new file mode 100644
index 0000000..260f86a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Buenos_Aires differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Catamarca b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Catamarca
new file mode 100644
index 0000000..0ae222a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Catamarca differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/ComodRivadavia b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/ComodRivadavia
new file mode 100644
index 0000000..0ae222a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/ComodRivadavia differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Cordoba b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Cordoba
new file mode 100644
index 0000000..da4c23a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Cordoba differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Jujuy b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Jujuy
new file mode 100644
index 0000000..604b856
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Jujuy differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/La_Rioja b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/La_Rioja
new file mode 100644
index 0000000..2218e36
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/La_Rioja differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Mendoza b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Mendoza
new file mode 100644
index 0000000..f9e677f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Mendoza differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Rio_Gallegos b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Rio_Gallegos
new file mode 100644
index 0000000..c36587e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Rio_Gallegos differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Salta b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Salta
new file mode 100644
index 0000000..0e797f2
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Salta differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/San_Juan b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/San_Juan
new file mode 100644
index 0000000..2698495
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/San_Juan differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/San_Luis b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/San_Luis
new file mode 100644
index 0000000..fe50f62
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/San_Luis differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Tucuman b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Tucuman
new file mode 100644
index 0000000..c954000
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Tucuman differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Ushuaia b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Ushuaia
new file mode 100644
index 0000000..3643628
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Argentina/Ushuaia differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Aruba b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Aruba
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Aruba differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Asuncion b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Asuncion
new file mode 100644
index 0000000..2f3bbda
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Asuncion differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Atikokan b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Atikokan
new file mode 100644
index 0000000..9964b9a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Atikokan differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Atka b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Atka
new file mode 100644
index 0000000..4323649
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Atka differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Bahia b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Bahia
new file mode 100644
index 0000000..15808d3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Bahia differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Bahia_Banderas b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Bahia_Banderas
new file mode 100644
index 0000000..896af3f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Bahia_Banderas differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Barbados b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Barbados
new file mode 100644
index 0000000..00cd045
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Barbados differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Belem b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Belem
new file mode 100644
index 0000000..60b5924
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Belem differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Belize b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Belize
new file mode 100644
index 0000000..e6f5dfa
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Belize differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Blanc-Sablon b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Blanc-Sablon
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Blanc-Sablon differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Boa_Vista b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Boa_Vista
new file mode 100644
index 0000000..978c331
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Boa_Vista differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Bogota b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Bogota
new file mode 100644
index 0000000..b2647d7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Bogota differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Boise b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Boise
new file mode 100644
index 0000000..f8d54e2
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Boise differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Buenos_Aires b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Buenos_Aires
new file mode 100644
index 0000000..260f86a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Buenos_Aires differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cambridge_Bay b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cambridge_Bay
new file mode 100644
index 0000000..f8db4b6
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cambridge_Bay differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Campo_Grande b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Campo_Grande
new file mode 100644
index 0000000..8120624
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Campo_Grande differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cancun b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cancun
new file mode 100644
index 0000000..f907f0a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cancun differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Caracas b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Caracas
new file mode 100644
index 0000000..eedf725
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Caracas differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Catamarca b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Catamarca
new file mode 100644
index 0000000..0ae222a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Catamarca differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cayenne b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cayenne
new file mode 100644
index 0000000..e5bc06f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cayenne differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cayman b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cayman
new file mode 100644
index 0000000..9964b9a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cayman differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Chicago b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Chicago
new file mode 100644
index 0000000..a5b1617
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Chicago differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Chihuahua b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Chihuahua
new file mode 100644
index 0000000..8ed5f93
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Chihuahua differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Coral_Harbour b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Coral_Harbour
new file mode 100644
index 0000000..9964b9a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Coral_Harbour differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cordoba b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cordoba
new file mode 100644
index 0000000..da4c23a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cordoba differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Costa_Rica b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Costa_Rica
new file mode 100644
index 0000000..37cb85e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Costa_Rica differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Creston b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Creston
new file mode 100644
index 0000000..ac6bb0c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Creston differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cuiaba b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cuiaba
new file mode 100644
index 0000000..9bea3d4
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Cuiaba differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Curacao b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Curacao
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Curacao differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Danmarkshavn b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Danmarkshavn
new file mode 100644
index 0000000..9549adc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Danmarkshavn differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Dawson b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Dawson
new file mode 100644
index 0000000..343b632
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Dawson differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Dawson_Creek b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Dawson_Creek
new file mode 100644
index 0000000..db9e339
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Dawson_Creek differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Denver b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Denver
new file mode 100644
index 0000000..5fbe26b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Denver differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Detroit b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Detroit
new file mode 100644
index 0000000..e104faa
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Detroit differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Dominica b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Dominica
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Dominica differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Edmonton b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Edmonton
new file mode 100644
index 0000000..cd78a6f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Edmonton differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Eirunepe b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Eirunepe
new file mode 100644
index 0000000..39d6dae
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Eirunepe differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/El_Salvador b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/El_Salvador
new file mode 100644
index 0000000..e2f2230
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/El_Salvador differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Ensenada b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Ensenada
new file mode 100644
index 0000000..ada6bf7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Ensenada differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Fort_Nelson b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Fort_Nelson
new file mode 100644
index 0000000..5a0b7f1
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Fort_Nelson differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Fort_Wayne b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Fort_Wayne
new file mode 100644
index 0000000..09511cc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Fort_Wayne differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Fortaleza b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Fortaleza
new file mode 100644
index 0000000..be57dc2
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Fortaleza differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Glace_Bay b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Glace_Bay
new file mode 100644
index 0000000..48412a4
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Glace_Bay differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Godthab b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Godthab
new file mode 100644
index 0000000..0160308
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Godthab differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Goose_Bay b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Goose_Bay
new file mode 100644
index 0000000..a3f2990
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Goose_Bay differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Grand_Turk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Grand_Turk
new file mode 100644
index 0000000..06da1a6
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Grand_Turk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Grenada b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Grenada
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Grenada differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Guadeloupe b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Guadeloupe
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Guadeloupe differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Guatemala b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Guatemala
new file mode 100644
index 0000000..407138c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Guatemala differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Guayaquil b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Guayaquil
new file mode 100644
index 0000000..0559a7a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Guayaquil differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Guyana b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Guyana
new file mode 100644
index 0000000..7af58e5
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Guyana differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Halifax b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Halifax
new file mode 100644
index 0000000..756099a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Halifax differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Havana b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Havana
new file mode 100644
index 0000000..b69ac45
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Havana differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Hermosillo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Hermosillo
new file mode 100644
index 0000000..791a9fa
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Hermosillo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Indianapolis b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Indianapolis
new file mode 100644
index 0000000..09511cc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Indianapolis differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Knox b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Knox
new file mode 100644
index 0000000..fcd408d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Knox differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Marengo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Marengo
new file mode 100644
index 0000000..1abf75e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Marengo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Petersburg b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Petersburg
new file mode 100644
index 0000000..0133548
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Petersburg differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Tell_City b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Tell_City
new file mode 100644
index 0000000..7bbb653
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Tell_City differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Vevay b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Vevay
new file mode 100644
index 0000000..d236b7c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Vevay differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Vincennes b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Vincennes
new file mode 100644
index 0000000..c818929
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Vincennes differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Winamac b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Winamac
new file mode 100644
index 0000000..630935c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indiana/Winamac differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indianapolis b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indianapolis
new file mode 100644
index 0000000..09511cc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Indianapolis differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Inuvik b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Inuvik
new file mode 100644
index 0000000..87bb355
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Inuvik differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Iqaluit b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Iqaluit
new file mode 100644
index 0000000..c8138bd
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Iqaluit differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Jamaica b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Jamaica
new file mode 100644
index 0000000..2a9b7fd
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Jamaica differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Jujuy b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Jujuy
new file mode 100644
index 0000000..604b856
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Jujuy differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Juneau b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Juneau
new file mode 100644
index 0000000..451f349
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Juneau differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Kentucky/Louisville b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Kentucky/Louisville
new file mode 100644
index 0000000..177836e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Kentucky/Louisville differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Kentucky/Monticello b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Kentucky/Monticello
new file mode 100644
index 0000000..438e3ea
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Kentucky/Monticello differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Knox_IN b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Knox_IN
new file mode 100644
index 0000000..fcd408d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Knox_IN differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Kralendijk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Kralendijk
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Kralendijk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/La_Paz b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/La_Paz
new file mode 100644
index 0000000..a101372
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/La_Paz differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Lima b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Lima
new file mode 100644
index 0000000..3c6529b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Lima differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Los_Angeles b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Los_Angeles
new file mode 100644
index 0000000..9dad4f4
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Los_Angeles differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Louisville b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Louisville
new file mode 100644
index 0000000..177836e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Louisville differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Lower_Princes b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Lower_Princes
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Lower_Princes differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Maceio b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Maceio
new file mode 100644
index 0000000..bc8b951
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Maceio differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Managua b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Managua
new file mode 100644
index 0000000..e0242bf
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Managua differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Manaus b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Manaus
new file mode 100644
index 0000000..63d58f8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Manaus differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Marigot b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Marigot
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Marigot differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Martinique b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Martinique
new file mode 100644
index 0000000..8df43dc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Martinique differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Matamoros b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Matamoros
new file mode 100644
index 0000000..047968d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Matamoros differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Mazatlan b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Mazatlan
new file mode 100644
index 0000000..e4a7857
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Mazatlan differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Mendoza b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Mendoza
new file mode 100644
index 0000000..f9e677f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Mendoza differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Menominee b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Menominee
new file mode 100644
index 0000000..3146138
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Menominee differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Merida b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Merida
new file mode 100644
index 0000000..ea852da
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Merida differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Metlakatla b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Metlakatla
new file mode 100644
index 0000000..1e94be3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Metlakatla differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Mexico_City b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Mexico_City
new file mode 100644
index 0000000..e7fb6f2
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Mexico_City differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Miquelon b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Miquelon
new file mode 100644
index 0000000..b924b71
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Miquelon differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Moncton b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Moncton
new file mode 100644
index 0000000..9df8d0f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Moncton differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Monterrey b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Monterrey
new file mode 100644
index 0000000..a8928c8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Monterrey differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Montevideo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Montevideo
new file mode 100644
index 0000000..2f357bc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Montevideo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Montreal b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Montreal
new file mode 100644
index 0000000..6752c5b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Montreal differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Montserrat b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Montserrat
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Montserrat differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Nassau b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Nassau
new file mode 100644
index 0000000..6752c5b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Nassau differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/New_York b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/New_York
new file mode 100644
index 0000000..2f75480
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/New_York differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Nipigon b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Nipigon
new file mode 100644
index 0000000..f6a856e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Nipigon differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Nome b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Nome
new file mode 100644
index 0000000..10998df
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Nome differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Noronha b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Noronha
new file mode 100644
index 0000000..f140726
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Noronha differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/North_Dakota/Beulah b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/North_Dakota/Beulah
new file mode 100644
index 0000000..246345d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/North_Dakota/Beulah differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/North_Dakota/Center b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/North_Dakota/Center
new file mode 100644
index 0000000..1fa0703
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/North_Dakota/Center differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/North_Dakota/New_Salem b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/North_Dakota/New_Salem
new file mode 100644
index 0000000..123f2ae
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/North_Dakota/New_Salem differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Nuuk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Nuuk
new file mode 100644
index 0000000..0160308
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Nuuk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Ojinaga b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Ojinaga
new file mode 100644
index 0000000..fc4a03e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Ojinaga differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Panama b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Panama
new file mode 100644
index 0000000..9964b9a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Panama differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Pangnirtung b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Pangnirtung
new file mode 100644
index 0000000..3e4e0db
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Pangnirtung differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Paramaribo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Paramaribo
new file mode 100644
index 0000000..bc8a6ed
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Paramaribo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Phoenix b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Phoenix
new file mode 100644
index 0000000..ac6bb0c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Phoenix differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Port-au-Prince b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Port-au-Prince
new file mode 100644
index 0000000..287f143
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Port-au-Prince differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Port_of_Spain b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Port_of_Spain
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Port_of_Spain differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Porto_Acre b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Porto_Acre
new file mode 100644
index 0000000..a374cb4
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Porto_Acre differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Porto_Velho b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Porto_Velho
new file mode 100644
index 0000000..2e873a5
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Porto_Velho differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Puerto_Rico b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Puerto_Rico
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Puerto_Rico differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Punta_Arenas b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Punta_Arenas
new file mode 100644
index 0000000..13bd1d9
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Punta_Arenas differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Rainy_River b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Rainy_River
new file mode 100644
index 0000000..ea66099
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Rainy_River differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Rankin_Inlet b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Rankin_Inlet
new file mode 100644
index 0000000..3a70587
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Rankin_Inlet differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Recife b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Recife
new file mode 100644
index 0000000..d7abb16
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Recife differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Regina b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Regina
new file mode 100644
index 0000000..20c9c84
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Regina differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Resolute b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Resolute
new file mode 100644
index 0000000..0a73b75
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Resolute differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Rio_Branco b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Rio_Branco
new file mode 100644
index 0000000..a374cb4
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Rio_Branco differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Rosario b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Rosario
new file mode 100644
index 0000000..da4c23a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Rosario differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Santa_Isabel b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Santa_Isabel
new file mode 100644
index 0000000..ada6bf7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Santa_Isabel differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Santarem b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Santarem
new file mode 100644
index 0000000..c28f360
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Santarem differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Santiago b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Santiago
new file mode 100644
index 0000000..aa29060
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Santiago differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Santo_Domingo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Santo_Domingo
new file mode 100644
index 0000000..4fe36fd
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Santo_Domingo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Sao_Paulo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Sao_Paulo
new file mode 100644
index 0000000..13ff083
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Sao_Paulo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Scoresbysund b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Scoresbysund
new file mode 100644
index 0000000..e20e9e1
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Scoresbysund differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Shiprock b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Shiprock
new file mode 100644
index 0000000..5fbe26b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Shiprock differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Sitka b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Sitka
new file mode 100644
index 0000000..31f7061
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Sitka differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Barthelemy b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Barthelemy
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Barthelemy differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Johns b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Johns
new file mode 100644
index 0000000..65a5b0c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Johns differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Kitts b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Kitts
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Kitts differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Lucia b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Lucia
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Lucia differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Thomas b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Thomas
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Thomas differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Vincent b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Vincent
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/St_Vincent differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Swift_Current b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Swift_Current
new file mode 100644
index 0000000..8e9ef25
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Swift_Current differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Tegucigalpa b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Tegucigalpa
new file mode 100644
index 0000000..2adacb2
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Tegucigalpa differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Thule b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Thule
new file mode 100644
index 0000000..6f802f1
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Thule differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Thunder_Bay b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Thunder_Bay
new file mode 100644
index 0000000..e504c9a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Thunder_Bay differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Tijuana b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Tijuana
new file mode 100644
index 0000000..ada6bf7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Tijuana differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Toronto b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Toronto
new file mode 100644
index 0000000..6752c5b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Toronto differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Tortola b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Tortola
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Tortola differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Vancouver b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Vancouver
new file mode 100644
index 0000000..bb60cbc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Vancouver differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Virgin b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Virgin
new file mode 100644
index 0000000..a662a57
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Virgin differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Whitehorse b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Whitehorse
new file mode 100644
index 0000000..9ee229c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Whitehorse differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Winnipeg b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Winnipeg
new file mode 100644
index 0000000..ac40299
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Winnipeg differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Yakutat b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Yakutat
new file mode 100644
index 0000000..da209f9
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Yakutat differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Yellowknife b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Yellowknife
new file mode 100644
index 0000000..e6afa39
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/America/Yellowknife differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Casey b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Casey
new file mode 100644
index 0000000..cbcbe4e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Casey differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Davis b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Davis
new file mode 100644
index 0000000..916f2c2
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Davis differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/DumontDUrville b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/DumontDUrville
new file mode 100644
index 0000000..920ad27
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/DumontDUrville differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Macquarie b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Macquarie
new file mode 100644
index 0000000..9e7cc68
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Macquarie differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Mawson b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Mawson
new file mode 100644
index 0000000..b32e7fd
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Mawson differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/McMurdo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/McMurdo
new file mode 100644
index 0000000..6575fdc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/McMurdo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Palmer b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Palmer
new file mode 100644
index 0000000..3dd85f8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Palmer differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Rothera b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Rothera
new file mode 100644
index 0000000..8b2430a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Rothera differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/South_Pole b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/South_Pole
new file mode 100644
index 0000000..6575fdc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/South_Pole differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Syowa b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Syowa
new file mode 100644
index 0000000..2aea25f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Syowa differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Troll b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Troll
new file mode 100644
index 0000000..5e565da
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Troll differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Vostok b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Vostok
new file mode 100644
index 0000000..7283053
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Antarctica/Vostok differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Arctic/Longyearbyen b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Arctic/Longyearbyen
new file mode 100644
index 0000000..15a34c3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Arctic/Longyearbyen differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Aden b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Aden
new file mode 100644
index 0000000..2aea25f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Aden differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Almaty b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Almaty
new file mode 100644
index 0000000..a4b0077
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Almaty differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Amman b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Amman
new file mode 100644
index 0000000..5dcf7e0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Amman differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Anadyr b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Anadyr
new file mode 100644
index 0000000..6ed8b7c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Anadyr differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Aqtau b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Aqtau
new file mode 100644
index 0000000..e2d0f91
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Aqtau differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Aqtobe b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Aqtobe
new file mode 100644
index 0000000..06f0a13
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Aqtobe differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ashgabat b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ashgabat
new file mode 100644
index 0000000..73891af
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ashgabat differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ashkhabad b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ashkhabad
new file mode 100644
index 0000000..73891af
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ashkhabad differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Atyrau b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Atyrau
new file mode 100644
index 0000000..8b5153e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Atyrau differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Baghdad b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Baghdad
new file mode 100644
index 0000000..f7162ed
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Baghdad differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Bahrain b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Bahrain
new file mode 100644
index 0000000..63188b2
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Bahrain differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Baku b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Baku
new file mode 100644
index 0000000..a0de74b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Baku differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Bangkok b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Bangkok
new file mode 100644
index 0000000..c292ac5
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Bangkok differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Barnaul b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Barnaul
new file mode 100644
index 0000000..759592a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Barnaul differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Beirut b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Beirut
new file mode 100644
index 0000000..fb266ed
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Beirut differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Bishkek b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Bishkek
new file mode 100644
index 0000000..f6e20dd
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Bishkek differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Brunei b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Brunei
new file mode 100644
index 0000000..3dab0ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Brunei differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Calcutta b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Calcutta
new file mode 100644
index 0000000..0014046
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Calcutta differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Chita b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Chita
new file mode 100644
index 0000000..c4149c0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Chita differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Choibalsan b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Choibalsan
new file mode 100644
index 0000000..e48daa8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Choibalsan differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Chongqing b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Chongqing
new file mode 100644
index 0000000..91f6f8b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Chongqing differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Chungking b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Chungking
new file mode 100644
index 0000000..91f6f8b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Chungking differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Colombo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Colombo
new file mode 100644
index 0000000..62c64d8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Colombo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dacca b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dacca
new file mode 100644
index 0000000..b11c928
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dacca differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Damascus b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Damascus
new file mode 100644
index 0000000..d9104a7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Damascus differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dhaka b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dhaka
new file mode 100644
index 0000000..b11c928
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dhaka differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dili b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dili
new file mode 100644
index 0000000..30943bb
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dili differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dubai b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dubai
new file mode 100644
index 0000000..fc0a589
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dubai differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dushanbe b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dushanbe
new file mode 100644
index 0000000..82d85b8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Dushanbe differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Famagusta b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Famagusta
new file mode 100644
index 0000000..653b146
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Famagusta differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Gaza b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Gaza
new file mode 100644
index 0000000..266981a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Gaza differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Harbin b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Harbin
new file mode 100644
index 0000000..91f6f8b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Harbin differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Hebron b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Hebron
new file mode 100644
index 0000000..0078bf0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Hebron differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ho_Chi_Minh b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ho_Chi_Minh
new file mode 100644
index 0000000..e2934e3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ho_Chi_Minh differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Hong_Kong b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Hong_Kong
new file mode 100644
index 0000000..23d0375
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Hong_Kong differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Hovd b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Hovd
new file mode 100644
index 0000000..4cb800a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Hovd differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Irkutsk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Irkutsk
new file mode 100644
index 0000000..4dcbbb7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Irkutsk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Istanbul b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Istanbul
new file mode 100644
index 0000000..508446b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Istanbul differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Jakarta b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Jakarta
new file mode 100644
index 0000000..5baa3a8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Jakarta differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Jayapura b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Jayapura
new file mode 100644
index 0000000..3002c82
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Jayapura differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Jerusalem b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Jerusalem
new file mode 100644
index 0000000..1ebd066
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Jerusalem differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kabul b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kabul
new file mode 100644
index 0000000..d19b9bd
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kabul differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kamchatka b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kamchatka
new file mode 100644
index 0000000..3e80b4e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kamchatka differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Karachi b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Karachi
new file mode 100644
index 0000000..ba65c0e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Karachi differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kashgar b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kashgar
new file mode 100644
index 0000000..faa14d9
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kashgar differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kathmandu b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kathmandu
new file mode 100644
index 0000000..a5d5107
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kathmandu differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Katmandu b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Katmandu
new file mode 100644
index 0000000..a5d5107
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Katmandu differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Khandyga b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Khandyga
new file mode 100644
index 0000000..72bea64
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Khandyga differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kolkata b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kolkata
new file mode 100644
index 0000000..0014046
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kolkata differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Krasnoyarsk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Krasnoyarsk
new file mode 100644
index 0000000..30c6f16
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Krasnoyarsk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kuala_Lumpur b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kuala_Lumpur
new file mode 100644
index 0000000..612b01e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kuala_Lumpur differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kuching b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kuching
new file mode 100644
index 0000000..c86750c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kuching differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kuwait b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kuwait
new file mode 100644
index 0000000..2aea25f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Kuwait differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Macao b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Macao
new file mode 100644
index 0000000..cac6506
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Macao differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Macau b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Macau
new file mode 100644
index 0000000..cac6506
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Macau differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Magadan b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Magadan
new file mode 100644
index 0000000..b4fcac1
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Magadan differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Makassar b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Makassar
new file mode 100644
index 0000000..556ba86
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Makassar differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Manila b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Manila
new file mode 100644
index 0000000..f4f4b04
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Manila differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Muscat b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Muscat
new file mode 100644
index 0000000..fc0a589
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Muscat differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Nicosia b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Nicosia
new file mode 100644
index 0000000..f7f10ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Nicosia differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Novokuznetsk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Novokuznetsk
new file mode 100644
index 0000000..d983276
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Novokuznetsk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Novosibirsk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Novosibirsk
new file mode 100644
index 0000000..e0ee5fc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Novosibirsk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Omsk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Omsk
new file mode 100644
index 0000000..b29b769
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Omsk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Oral b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Oral
new file mode 100644
index 0000000..ad1f9ca
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Oral differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Phnom_Penh b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Phnom_Penh
new file mode 100644
index 0000000..c292ac5
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Phnom_Penh differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Pontianak b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Pontianak
new file mode 100644
index 0000000..12ce24c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Pontianak differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Pyongyang b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Pyongyang
new file mode 100644
index 0000000..7ad7e0b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Pyongyang differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Qatar b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Qatar
new file mode 100644
index 0000000..63188b2
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Qatar differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Qostanay b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Qostanay
new file mode 100644
index 0000000..73b9d96
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Qostanay differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Qyzylorda b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Qyzylorda
new file mode 100644
index 0000000..c2fe4c1
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Qyzylorda differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Rangoon b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Rangoon
new file mode 100644
index 0000000..dd77395
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Rangoon differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Riyadh b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Riyadh
new file mode 100644
index 0000000..2aea25f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Riyadh differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Saigon b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Saigon
new file mode 100644
index 0000000..e2934e3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Saigon differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Sakhalin b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Sakhalin
new file mode 100644
index 0000000..485459c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Sakhalin differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Samarkand b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Samarkand
new file mode 100644
index 0000000..030d47c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Samarkand differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Seoul b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Seoul
new file mode 100644
index 0000000..96199e7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Seoul differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Shanghai b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Shanghai
new file mode 100644
index 0000000..91f6f8b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Shanghai differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Singapore b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Singapore
new file mode 100644
index 0000000..2364b21
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Singapore differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Srednekolymsk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Srednekolymsk
new file mode 100644
index 0000000..261a983
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Srednekolymsk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Taipei b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Taipei
new file mode 100644
index 0000000..24c4344
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Taipei differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tashkent b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tashkent
new file mode 100644
index 0000000..32a9d7d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tashkent differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tbilisi b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tbilisi
new file mode 100644
index 0000000..b608d79
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tbilisi differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tehran b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tehran
new file mode 100644
index 0000000..8cec5ad
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tehran differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tel_Aviv b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tel_Aviv
new file mode 100644
index 0000000..1ebd066
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tel_Aviv differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Thimbu b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Thimbu
new file mode 100644
index 0000000..fe409c7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Thimbu differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Thimphu b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Thimphu
new file mode 100644
index 0000000..fe409c7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Thimphu differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tokyo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tokyo
new file mode 100644
index 0000000..26f4d34
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tokyo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tomsk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tomsk
new file mode 100644
index 0000000..670e2ad
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Tomsk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ujung_Pandang b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ujung_Pandang
new file mode 100644
index 0000000..556ba86
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ujung_Pandang differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ulaanbaatar b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ulaanbaatar
new file mode 100644
index 0000000..2e20cc3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ulaanbaatar differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ulan_Bator b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ulan_Bator
new file mode 100644
index 0000000..2e20cc3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ulan_Bator differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Urumqi b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Urumqi
new file mode 100644
index 0000000..faa14d9
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Urumqi differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ust-Nera b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ust-Nera
new file mode 100644
index 0000000..9e4a78f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Ust-Nera differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Vientiane b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Vientiane
new file mode 100644
index 0000000..c292ac5
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Vientiane differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Vladivostok b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Vladivostok
new file mode 100644
index 0000000..8ab253c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Vladivostok differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Yakutsk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Yakutsk
new file mode 100644
index 0000000..c815e99
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Yakutsk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Yangon b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Yangon
new file mode 100644
index 0000000..dd77395
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Yangon differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Yekaterinburg b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Yekaterinburg
new file mode 100644
index 0000000..6958d7e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Yekaterinburg differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Yerevan b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Yerevan
new file mode 100644
index 0000000..250bfe0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Asia/Yerevan differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Azores b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Azores
new file mode 100644
index 0000000..00a564f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Azores differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Bermuda b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Bermuda
new file mode 100644
index 0000000..527524e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Bermuda differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Canary b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Canary
new file mode 100644
index 0000000..f319215
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Canary differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Cape_Verde b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Cape_Verde
new file mode 100644
index 0000000..e2a49d2
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Cape_Verde differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Faeroe b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Faeroe
new file mode 100644
index 0000000..4dab7ef
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Faeroe differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Faroe b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Faroe
new file mode 100644
index 0000000..4dab7ef
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Faroe differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Jan_Mayen b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Jan_Mayen
new file mode 100644
index 0000000..15a34c3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Jan_Mayen differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Madeira b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Madeira
new file mode 100644
index 0000000..7ddcd88
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Madeira differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Reykjavik b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Reykjavik
new file mode 100644
index 0000000..10e0fc8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Reykjavik differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/South_Georgia b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/South_Georgia
new file mode 100644
index 0000000..4466608
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/South_Georgia differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/St_Helena b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/St_Helena
new file mode 100644
index 0000000..28b32ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/St_Helena differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Stanley b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Stanley
new file mode 100644
index 0000000..88077f1
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Atlantic/Stanley differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/ACT b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/ACT
new file mode 100644
index 0000000..0aea4c3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/ACT differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Adelaide b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Adelaide
new file mode 100644
index 0000000..f5dedca
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Adelaide differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Brisbane b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Brisbane
new file mode 100644
index 0000000..7ff9949
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Brisbane differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Broken_Hill b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Broken_Hill
new file mode 100644
index 0000000..698c76e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Broken_Hill differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Canberra b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Canberra
new file mode 100644
index 0000000..0aea4c3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Canberra differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Currie b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Currie
new file mode 100644
index 0000000..3adb8e1
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Currie differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Darwin b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Darwin
new file mode 100644
index 0000000..74a3087
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Darwin differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Eucla b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Eucla
new file mode 100644
index 0000000..3bf1171
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Eucla differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Hobart b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Hobart
new file mode 100644
index 0000000..3adb8e1
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Hobart differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/LHI b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/LHI
new file mode 100644
index 0000000..9e04a80
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/LHI differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Lindeman b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Lindeman
new file mode 100644
index 0000000..4ee1825
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Lindeman differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Lord_Howe b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Lord_Howe
new file mode 100644
index 0000000..9e04a80
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Lord_Howe differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Melbourne b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Melbourne
new file mode 100644
index 0000000..ee903f4
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Melbourne differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/NSW b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/NSW
new file mode 100644
index 0000000..0aea4c3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/NSW differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/North b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/North
new file mode 100644
index 0000000..74a3087
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/North differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Perth b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Perth
new file mode 100644
index 0000000..f8ddbdf
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Perth differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Queensland b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Queensland
new file mode 100644
index 0000000..7ff9949
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Queensland differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/South b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/South
new file mode 100644
index 0000000..f5dedca
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/South differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Sydney b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Sydney
new file mode 100644
index 0000000..0aea4c3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Sydney differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Tasmania b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Tasmania
new file mode 100644
index 0000000..3adb8e1
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Tasmania differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Victoria b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Victoria
new file mode 100644
index 0000000..ee903f4
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Victoria differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/West b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/West
new file mode 100644
index 0000000..f8ddbdf
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/West differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Yancowinna b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Yancowinna
new file mode 100644
index 0000000..698c76e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Australia/Yancowinna differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Brazil/Acre b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Brazil/Acre
new file mode 100644
index 0000000..a374cb4
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Brazil/Acre differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Brazil/DeNoronha b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Brazil/DeNoronha
new file mode 100644
index 0000000..f140726
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Brazil/DeNoronha differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Brazil/East b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Brazil/East
new file mode 100644
index 0000000..13ff083
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Brazil/East differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Brazil/West b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Brazil/West
new file mode 100644
index 0000000..63d58f8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Brazil/West differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/CET b/.local/lib/python3.8/site-packages/pytz/zoneinfo/CET
new file mode 100644
index 0000000..122e934
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/CET differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/CST6CDT b/.local/lib/python3.8/site-packages/pytz/zoneinfo/CST6CDT
new file mode 100644
index 0000000..ca67929
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/CST6CDT differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Atlantic b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Atlantic
new file mode 100644
index 0000000..756099a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Atlantic differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Central b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Central
new file mode 100644
index 0000000..ac40299
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Central differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Eastern b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Eastern
new file mode 100644
index 0000000..6752c5b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Eastern differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Mountain b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Mountain
new file mode 100644
index 0000000..cd78a6f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Mountain differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Newfoundland b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Newfoundland
new file mode 100644
index 0000000..65a5b0c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Newfoundland differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Pacific b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Pacific
new file mode 100644
index 0000000..bb60cbc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Pacific differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Saskatchewan b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Saskatchewan
new file mode 100644
index 0000000..20c9c84
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Saskatchewan differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Yukon b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Yukon
new file mode 100644
index 0000000..9ee229c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Canada/Yukon differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Chile/Continental b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Chile/Continental
new file mode 100644
index 0000000..aa29060
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Chile/Continental differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Chile/EasterIsland b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Chile/EasterIsland
new file mode 100644
index 0000000..cae3744
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Chile/EasterIsland differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Cuba b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Cuba
new file mode 100644
index 0000000..b69ac45
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Cuba differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/EET b/.local/lib/python3.8/site-packages/pytz/zoneinfo/EET
new file mode 100644
index 0000000..cbdb71d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/EET differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/EST b/.local/lib/python3.8/site-packages/pytz/zoneinfo/EST
new file mode 100644
index 0000000..21ebc00
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/EST differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/EST5EDT b/.local/lib/python3.8/site-packages/pytz/zoneinfo/EST5EDT
new file mode 100644
index 0000000..9bce500
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/EST5EDT differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Egypt b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Egypt
new file mode 100644
index 0000000..d3f8196
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Egypt differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Eire b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Eire
new file mode 100644
index 0000000..1d99490
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Eire differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT
new file mode 100644
index 0000000..c634746
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+0 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+0
new file mode 100644
index 0000000..c634746
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+0 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+1 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+1
new file mode 100644
index 0000000..4dab6f9
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+1 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+10 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+10
new file mode 100644
index 0000000..c749290
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+10 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+11 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+11
new file mode 100644
index 0000000..d969982
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+11 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+12 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+12
new file mode 100644
index 0000000..cdeec90
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+12 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+2 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+2
new file mode 100644
index 0000000..fbd2a94
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+2 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+3 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+3
new file mode 100644
index 0000000..ee246ef
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+3 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+4 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+4
new file mode 100644
index 0000000..5a25ff2
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+4 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+5 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+5
new file mode 100644
index 0000000..c0b745f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+5 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+6 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+6
new file mode 100644
index 0000000..06e777d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+6 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+7 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+7
new file mode 100644
index 0000000..4e0b53a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+7 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+8 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+8
new file mode 100644
index 0000000..714b0c5
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+8 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+9 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+9
new file mode 100644
index 0000000..78b9daa
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT+9 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-0 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-0
new file mode 100644
index 0000000..c634746
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-0 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-1 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-1
new file mode 100644
index 0000000..a838beb
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-1 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-10 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-10
new file mode 100644
index 0000000..68ff77d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-10 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-11 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-11
new file mode 100644
index 0000000..66af5a4
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-11 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-12 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-12
new file mode 100644
index 0000000..17ba505
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-12 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-13 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-13
new file mode 100644
index 0000000..5f3706c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-13 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-14 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-14
new file mode 100644
index 0000000..7e9f9c4
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-14 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-2 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-2
new file mode 100644
index 0000000..fcef6d9
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-2 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-3 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-3
new file mode 100644
index 0000000..27973bc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-3 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-4 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-4
new file mode 100644
index 0000000..1efd841
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-4 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-5 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-5
new file mode 100644
index 0000000..1f76184
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-5 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-6 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-6
new file mode 100644
index 0000000..952681e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-6 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-7 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-7
new file mode 100644
index 0000000..cefc912
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-7 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-8 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-8
new file mode 100644
index 0000000..afb093d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-8 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-9 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-9
new file mode 100644
index 0000000..9265fb7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT-9 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT0 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT0
new file mode 100644
index 0000000..c634746
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/GMT0 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/Greenwich b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/Greenwich
new file mode 100644
index 0000000..c634746
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/Greenwich differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/UCT b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/UCT
new file mode 100644
index 0000000..91558be
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/UCT differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/UTC b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/UTC
new file mode 100644
index 0000000..91558be
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/UTC differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/Universal b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/Universal
new file mode 100644
index 0000000..91558be
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/Universal differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/Zulu b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/Zulu
new file mode 100644
index 0000000..91558be
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Etc/Zulu differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Amsterdam b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Amsterdam
new file mode 100644
index 0000000..c3ff07b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Amsterdam differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Andorra b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Andorra
new file mode 100644
index 0000000..5962550
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Andorra differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Astrakhan b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Astrakhan
new file mode 100644
index 0000000..73a4d01
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Astrakhan differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Athens b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Athens
new file mode 100644
index 0000000..9f3a067
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Athens differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Belfast b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Belfast
new file mode 100644
index 0000000..ac02a81
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Belfast differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Belgrade b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Belgrade
new file mode 100644
index 0000000..27de456
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Belgrade differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Berlin b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Berlin
new file mode 100644
index 0000000..7f6d958
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Berlin differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Bratislava b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Bratislava
new file mode 100644
index 0000000..ce8f433
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Bratislava differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Brussels b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Brussels
new file mode 100644
index 0000000..40d7124
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Brussels differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Bucharest b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Bucharest
new file mode 100644
index 0000000..4303b90
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Bucharest differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Budapest b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Budapest
new file mode 100644
index 0000000..b76c873
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Budapest differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Busingen b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Busingen
new file mode 100644
index 0000000..ad6cf59
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Busingen differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Chisinau b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Chisinau
new file mode 100644
index 0000000..5ee23fe
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Chisinau differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Copenhagen b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Copenhagen
new file mode 100644
index 0000000..776be6e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Copenhagen differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Dublin b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Dublin
new file mode 100644
index 0000000..1d99490
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Dublin differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Gibraltar b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Gibraltar
new file mode 100644
index 0000000..117aadb
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Gibraltar differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Guernsey b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Guernsey
new file mode 100644
index 0000000..ac02a81
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Guernsey differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Helsinki b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Helsinki
new file mode 100644
index 0000000..b4f8f9c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Helsinki differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Isle_of_Man b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Isle_of_Man
new file mode 100644
index 0000000..ac02a81
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Isle_of_Man differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Istanbul b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Istanbul
new file mode 100644
index 0000000..508446b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Istanbul differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Jersey b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Jersey
new file mode 100644
index 0000000..ac02a81
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Jersey differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Kaliningrad b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Kaliningrad
new file mode 100644
index 0000000..cc99bea
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Kaliningrad differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Kiev b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Kiev
new file mode 100644
index 0000000..52efea8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Kiev differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Kirov b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Kirov
new file mode 100644
index 0000000..a3b5320
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Kirov differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Lisbon b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Lisbon
new file mode 100644
index 0000000..55f0193
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Lisbon differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Ljubljana b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Ljubljana
new file mode 100644
index 0000000..27de456
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Ljubljana differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/London b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/London
new file mode 100644
index 0000000..ac02a81
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/London differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Luxembourg b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Luxembourg
new file mode 100644
index 0000000..c4ca733
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Luxembourg differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Madrid b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Madrid
new file mode 100644
index 0000000..16f6420
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Madrid differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Malta b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Malta
new file mode 100644
index 0000000..bf2452d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Malta differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Mariehamn b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Mariehamn
new file mode 100644
index 0000000..b4f8f9c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Mariehamn differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Minsk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Minsk
new file mode 100644
index 0000000..453306c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Minsk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Monaco b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Monaco
new file mode 100644
index 0000000..adbe45d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Monaco differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Moscow b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Moscow
new file mode 100644
index 0000000..ddb3f4e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Moscow differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Nicosia b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Nicosia
new file mode 100644
index 0000000..f7f10ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Nicosia differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Oslo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Oslo
new file mode 100644
index 0000000..15a34c3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Oslo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Paris b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Paris
new file mode 100644
index 0000000..7d366c6
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Paris differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Podgorica b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Podgorica
new file mode 100644
index 0000000..27de456
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Podgorica differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Prague b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Prague
new file mode 100644
index 0000000..ce8f433
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Prague differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Riga b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Riga
new file mode 100644
index 0000000..8db477d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Riga differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Rome b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Rome
new file mode 100644
index 0000000..ac4c163
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Rome differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Samara b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Samara
new file mode 100644
index 0000000..97d5dd9
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Samara differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/San_Marino b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/San_Marino
new file mode 100644
index 0000000..ac4c163
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/San_Marino differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Sarajevo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Sarajevo
new file mode 100644
index 0000000..27de456
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Sarajevo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Saratov b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Saratov
new file mode 100644
index 0000000..8fd5f6d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Saratov differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Simferopol b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Simferopol
new file mode 100644
index 0000000..29107a0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Simferopol differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Skopje b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Skopje
new file mode 100644
index 0000000..27de456
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Skopje differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Sofia b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Sofia
new file mode 100644
index 0000000..0e4d879
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Sofia differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Stockholm b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Stockholm
new file mode 100644
index 0000000..f3e0c7f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Stockholm differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Tallinn b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Tallinn
new file mode 100644
index 0000000..b5acca3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Tallinn differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Tirane b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Tirane
new file mode 100644
index 0000000..0b86017
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Tirane differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Tiraspol b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Tiraspol
new file mode 100644
index 0000000..5ee23fe
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Tiraspol differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Ulyanovsk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Ulyanovsk
new file mode 100644
index 0000000..7b61bdc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Ulyanovsk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Uzhgorod b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Uzhgorod
new file mode 100644
index 0000000..0eb2150
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Uzhgorod differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Vaduz b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Vaduz
new file mode 100644
index 0000000..ad6cf59
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Vaduz differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Vatican b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Vatican
new file mode 100644
index 0000000..ac4c163
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Vatican differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Vienna b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Vienna
new file mode 100644
index 0000000..3582bb1
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Vienna differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Vilnius b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Vilnius
new file mode 100644
index 0000000..7abd63f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Vilnius differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Volgograd b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Volgograd
new file mode 100644
index 0000000..11739ac
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Volgograd differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Warsaw b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Warsaw
new file mode 100644
index 0000000..e33cf67
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Warsaw differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Zagreb b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Zagreb
new file mode 100644
index 0000000..27de456
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Zagreb differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Zaporozhye b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Zaporozhye
new file mode 100644
index 0000000..f0406c1
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Zaporozhye differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Zurich b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Zurich
new file mode 100644
index 0000000..ad6cf59
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Europe/Zurich differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Factory b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Factory
new file mode 100644
index 0000000..60aa2a0
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Factory differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/GB b/.local/lib/python3.8/site-packages/pytz/zoneinfo/GB
new file mode 100644
index 0000000..ac02a81
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/GB differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/GB-Eire b/.local/lib/python3.8/site-packages/pytz/zoneinfo/GB-Eire
new file mode 100644
index 0000000..ac02a81
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/GB-Eire differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/GMT b/.local/lib/python3.8/site-packages/pytz/zoneinfo/GMT
new file mode 100644
index 0000000..c634746
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/GMT differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/GMT+0 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/GMT+0
new file mode 100644
index 0000000..c634746
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/GMT+0 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/GMT-0 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/GMT-0
new file mode 100644
index 0000000..c634746
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/GMT-0 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/GMT0 b/.local/lib/python3.8/site-packages/pytz/zoneinfo/GMT0
new file mode 100644
index 0000000..c634746
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/GMT0 differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Greenwich b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Greenwich
new file mode 100644
index 0000000..c634746
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Greenwich differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/HST b/.local/lib/python3.8/site-packages/pytz/zoneinfo/HST
new file mode 100644
index 0000000..cccd45e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/HST differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Hongkong b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Hongkong
new file mode 100644
index 0000000..23d0375
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Hongkong differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Iceland b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Iceland
new file mode 100644
index 0000000..10e0fc8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Iceland differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Antananarivo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Antananarivo
new file mode 100644
index 0000000..9dcfc19
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Antananarivo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Chagos b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Chagos
new file mode 100644
index 0000000..93d6dda
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Chagos differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Christmas b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Christmas
new file mode 100644
index 0000000..d18c381
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Christmas differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Cocos b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Cocos
new file mode 100644
index 0000000..f8116e7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Cocos differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Comoro b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Comoro
new file mode 100644
index 0000000..9dcfc19
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Comoro differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Kerguelen b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Kerguelen
new file mode 100644
index 0000000..cde4cf7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Kerguelen differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Mahe b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Mahe
new file mode 100644
index 0000000..208f938
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Mahe differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Maldives b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Maldives
new file mode 100644
index 0000000..7c839cf
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Maldives differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Mauritius b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Mauritius
new file mode 100644
index 0000000..17f2616
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Mauritius differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Mayotte b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Mayotte
new file mode 100644
index 0000000..9dcfc19
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Mayotte differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Reunion b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Reunion
new file mode 100644
index 0000000..dfe0831
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Indian/Reunion differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Iran b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Iran
new file mode 100644
index 0000000..8cec5ad
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Iran differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Israel b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Israel
new file mode 100644
index 0000000..1ebd066
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Israel differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Jamaica b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Jamaica
new file mode 100644
index 0000000..2a9b7fd
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Jamaica differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Japan b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Japan
new file mode 100644
index 0000000..26f4d34
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Japan differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Kwajalein b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Kwajalein
new file mode 100644
index 0000000..1a7975f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Kwajalein differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Libya b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Libya
new file mode 100644
index 0000000..07b393b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Libya differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/MET b/.local/lib/python3.8/site-packages/pytz/zoneinfo/MET
new file mode 100644
index 0000000..4a826bb
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/MET differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/MST b/.local/lib/python3.8/site-packages/pytz/zoneinfo/MST
new file mode 100644
index 0000000..c93a58e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/MST differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/MST7MDT b/.local/lib/python3.8/site-packages/pytz/zoneinfo/MST7MDT
new file mode 100644
index 0000000..4506a6e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/MST7MDT differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Mexico/BajaNorte b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Mexico/BajaNorte
new file mode 100644
index 0000000..ada6bf7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Mexico/BajaNorte differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Mexico/BajaSur b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Mexico/BajaSur
new file mode 100644
index 0000000..e4a7857
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Mexico/BajaSur differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Mexico/General b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Mexico/General
new file mode 100644
index 0000000..e7fb6f2
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Mexico/General differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/NZ b/.local/lib/python3.8/site-packages/pytz/zoneinfo/NZ
new file mode 100644
index 0000000..6575fdc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/NZ differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/NZ-CHAT b/.local/lib/python3.8/site-packages/pytz/zoneinfo/NZ-CHAT
new file mode 100644
index 0000000..c004109
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/NZ-CHAT differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Navajo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Navajo
new file mode 100644
index 0000000..5fbe26b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Navajo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/PRC b/.local/lib/python3.8/site-packages/pytz/zoneinfo/PRC
new file mode 100644
index 0000000..91f6f8b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/PRC differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/PST8PDT b/.local/lib/python3.8/site-packages/pytz/zoneinfo/PST8PDT
new file mode 100644
index 0000000..99d246b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/PST8PDT differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Apia b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Apia
new file mode 100644
index 0000000..999c367
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Apia differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Auckland b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Auckland
new file mode 100644
index 0000000..6575fdc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Auckland differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Bougainville b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Bougainville
new file mode 100644
index 0000000..2892d26
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Bougainville differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Chatham b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Chatham
new file mode 100644
index 0000000..c004109
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Chatham differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Chuuk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Chuuk
new file mode 100644
index 0000000..07c84b7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Chuuk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Easter b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Easter
new file mode 100644
index 0000000..cae3744
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Easter differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Efate b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Efate
new file mode 100644
index 0000000..d8d4093
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Efate differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Enderbury b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Enderbury
new file mode 100644
index 0000000..39b786e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Enderbury differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Fakaofo b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Fakaofo
new file mode 100644
index 0000000..e40307f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Fakaofo differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Fiji b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Fiji
new file mode 100644
index 0000000..af07ac8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Fiji differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Funafuti b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Funafuti
new file mode 100644
index 0000000..ea72863
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Funafuti differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Galapagos b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Galapagos
new file mode 100644
index 0000000..31f0921
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Galapagos differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Gambier b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Gambier
new file mode 100644
index 0000000..e1fc3da
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Gambier differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Guadalcanal b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Guadalcanal
new file mode 100644
index 0000000..7e9d10a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Guadalcanal differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Guam b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Guam
new file mode 100644
index 0000000..66490d2
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Guam differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Honolulu b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Honolulu
new file mode 100644
index 0000000..c7cd060
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Honolulu differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Johnston b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Johnston
new file mode 100644
index 0000000..c7cd060
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Johnston differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Kanton b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Kanton
new file mode 100644
index 0000000..39b786e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Kanton differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Kiritimati b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Kiritimati
new file mode 100644
index 0000000..7cae0cb
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Kiritimati differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Kosrae b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Kosrae
new file mode 100644
index 0000000..a584aae
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Kosrae differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Kwajalein b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Kwajalein
new file mode 100644
index 0000000..1a7975f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Kwajalein differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Majuro b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Majuro
new file mode 100644
index 0000000..9ef8374
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Majuro differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Marquesas b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Marquesas
new file mode 100644
index 0000000..74d6792
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Marquesas differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Midway b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Midway
new file mode 100644
index 0000000..cb56709
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Midway differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Nauru b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Nauru
new file mode 100644
index 0000000..acec042
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Nauru differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Niue b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Niue
new file mode 100644
index 0000000..89117b3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Niue differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Norfolk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Norfolk
new file mode 100644
index 0000000..53c1aad
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Norfolk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Noumea b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Noumea
new file mode 100644
index 0000000..931a1a3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Noumea differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Pago_Pago b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Pago_Pago
new file mode 100644
index 0000000..cb56709
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Pago_Pago differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Palau b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Palau
new file mode 100644
index 0000000..146b351
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Palau differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Pitcairn b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Pitcairn
new file mode 100644
index 0000000..ef91b06
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Pitcairn differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Pohnpei b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Pohnpei
new file mode 100644
index 0000000..c298ddd
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Pohnpei differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Ponape b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Ponape
new file mode 100644
index 0000000..c298ddd
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Ponape differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Port_Moresby b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Port_Moresby
new file mode 100644
index 0000000..920ad27
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Port_Moresby differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Rarotonga b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Rarotonga
new file mode 100644
index 0000000..eea37ab
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Rarotonga differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Saipan b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Saipan
new file mode 100644
index 0000000..66490d2
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Saipan differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Samoa b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Samoa
new file mode 100644
index 0000000..cb56709
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Samoa differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Tahiti b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Tahiti
new file mode 100644
index 0000000..442b8eb
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Tahiti differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Tarawa b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Tarawa
new file mode 100644
index 0000000..3db6c75
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Tarawa differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Tongatapu b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Tongatapu
new file mode 100644
index 0000000..c2e5999
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Tongatapu differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Truk b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Truk
new file mode 100644
index 0000000..07c84b7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Truk differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Wake b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Wake
new file mode 100644
index 0000000..c9e3106
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Wake differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Wallis b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Wallis
new file mode 100644
index 0000000..b35344b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Wallis differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Yap b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Yap
new file mode 100644
index 0000000..07c84b7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Pacific/Yap differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Poland b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Poland
new file mode 100644
index 0000000..e33cf67
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Poland differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Portugal b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Portugal
new file mode 100644
index 0000000..55f0193
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Portugal differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/ROC b/.local/lib/python3.8/site-packages/pytz/zoneinfo/ROC
new file mode 100644
index 0000000..24c4344
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/ROC differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/ROK b/.local/lib/python3.8/site-packages/pytz/zoneinfo/ROK
new file mode 100644
index 0000000..96199e7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/ROK differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Singapore b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Singapore
new file mode 100644
index 0000000..2364b21
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Singapore differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Turkey b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Turkey
new file mode 100644
index 0000000..508446b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Turkey differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/UCT b/.local/lib/python3.8/site-packages/pytz/zoneinfo/UCT
new file mode 100644
index 0000000..91558be
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/UCT differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Alaska b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Alaska
new file mode 100644
index 0000000..9bbb2fd
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Alaska differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Aleutian b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Aleutian
new file mode 100644
index 0000000..4323649
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Aleutian differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Arizona b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Arizona
new file mode 100644
index 0000000..ac6bb0c
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Arizona differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Central b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Central
new file mode 100644
index 0000000..a5b1617
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Central differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/East-Indiana b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/East-Indiana
new file mode 100644
index 0000000..09511cc
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/East-Indiana differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Eastern b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Eastern
new file mode 100644
index 0000000..2f75480
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Eastern differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Hawaii b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Hawaii
new file mode 100644
index 0000000..c7cd060
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Hawaii differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Indiana-Starke b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Indiana-Starke
new file mode 100644
index 0000000..fcd408d
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Indiana-Starke differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Michigan b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Michigan
new file mode 100644
index 0000000..e104faa
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Michigan differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Mountain b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Mountain
new file mode 100644
index 0000000..5fbe26b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Mountain differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Pacific b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Pacific
new file mode 100644
index 0000000..9dad4f4
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Pacific differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Samoa b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Samoa
new file mode 100644
index 0000000..cb56709
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/US/Samoa differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/UTC b/.local/lib/python3.8/site-packages/pytz/zoneinfo/UTC
new file mode 100644
index 0000000..91558be
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/UTC differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Universal b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Universal
new file mode 100644
index 0000000..91558be
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Universal differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/W-SU b/.local/lib/python3.8/site-packages/pytz/zoneinfo/W-SU
new file mode 100644
index 0000000..ddb3f4e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/W-SU differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/WET b/.local/lib/python3.8/site-packages/pytz/zoneinfo/WET
new file mode 100644
index 0000000..c27390b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/WET differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/Zulu b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Zulu
new file mode 100644
index 0000000..91558be
Binary files /dev/null and b/.local/lib/python3.8/site-packages/pytz/zoneinfo/Zulu differ
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/iso3166.tab b/.local/lib/python3.8/site-packages/pytz/zoneinfo/iso3166.tab
new file mode 100644
index 0000000..a4ff61a
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz/zoneinfo/iso3166.tab
@@ -0,0 +1,274 @@
+# ISO 3166 alpha-2 country codes
+#
+# This file is in the public domain, so clarified as of
+# 2009-05-17 by Arthur David Olson.
+#
+# From Paul Eggert (2015-05-02):
+# This file contains a table of two-letter country codes.  Columns are
+# separated by a single tab.  Lines beginning with '#' are comments.
+# All text uses UTF-8 encoding.  The columns of the table are as follows:
+#
+# 1.  ISO 3166-1 alpha-2 country code, current as of
+#     ISO 3166-1 N976 (2018-11-06).  See: Updates on ISO 3166-1
+#     https://isotc.iso.org/livelink/livelink/Open/16944257
+# 2.  The usual English name for the coded region,
+#     chosen so that alphabetic sorting of subsets produces helpful lists.
+#     This is not the same as the English name in the ISO 3166 tables.
+#
+# The table is sorted by country code.
+#
+# This table is intended as an aid for users, to help them select time
+# zone data appropriate for their practical needs.  It is not intended
+# to take or endorse any position on legal or territorial claims.
+#
+#country-
+#code	name of country, territory, area, or subdivision
+AD	Andorra
+AE	United Arab Emirates
+AF	Afghanistan
+AG	Antigua & Barbuda
+AI	Anguilla
+AL	Albania
+AM	Armenia
+AO	Angola
+AQ	Antarctica
+AR	Argentina
+AS	Samoa (American)
+AT	Austria
+AU	Australia
+AW	Aruba
+AX	Åland Islands
+AZ	Azerbaijan
+BA	Bosnia & Herzegovina
+BB	Barbados
+BD	Bangladesh
+BE	Belgium
+BF	Burkina Faso
+BG	Bulgaria
+BH	Bahrain
+BI	Burundi
+BJ	Benin
+BL	St Barthelemy
+BM	Bermuda
+BN	Brunei
+BO	Bolivia
+BQ	Caribbean NL
+BR	Brazil
+BS	Bahamas
+BT	Bhutan
+BV	Bouvet Island
+BW	Botswana
+BY	Belarus
+BZ	Belize
+CA	Canada
+CC	Cocos (Keeling) Islands
+CD	Congo (Dem. Rep.)
+CF	Central African Rep.
+CG	Congo (Rep.)
+CH	Switzerland
+CI	Côte d'Ivoire
+CK	Cook Islands
+CL	Chile
+CM	Cameroon
+CN	China
+CO	Colombia
+CR	Costa Rica
+CU	Cuba
+CV	Cape Verde
+CW	Curaçao
+CX	Christmas Island
+CY	Cyprus
+CZ	Czech Republic
+DE	Germany
+DJ	Djibouti
+DK	Denmark
+DM	Dominica
+DO	Dominican Republic
+DZ	Algeria
+EC	Ecuador
+EE	Estonia
+EG	Egypt
+EH	Western Sahara
+ER	Eritrea
+ES	Spain
+ET	Ethiopia
+FI	Finland
+FJ	Fiji
+FK	Falkland Islands
+FM	Micronesia
+FO	Faroe Islands
+FR	France
+GA	Gabon
+GB	Britain (UK)
+GD	Grenada
+GE	Georgia
+GF	French Guiana
+GG	Guernsey
+GH	Ghana
+GI	Gibraltar
+GL	Greenland
+GM	Gambia
+GN	Guinea
+GP	Guadeloupe
+GQ	Equatorial Guinea
+GR	Greece
+GS	South Georgia & the South Sandwich Islands
+GT	Guatemala
+GU	Guam
+GW	Guinea-Bissau
+GY	Guyana
+HK	Hong Kong
+HM	Heard Island & McDonald Islands
+HN	Honduras
+HR	Croatia
+HT	Haiti
+HU	Hungary
+ID	Indonesia
+IE	Ireland
+IL	Israel
+IM	Isle of Man
+IN	India
+IO	British Indian Ocean Territory
+IQ	Iraq
+IR	Iran
+IS	Iceland
+IT	Italy
+JE	Jersey
+JM	Jamaica
+JO	Jordan
+JP	Japan
+KE	Kenya
+KG	Kyrgyzstan
+KH	Cambodia
+KI	Kiribati
+KM	Comoros
+KN	St Kitts & Nevis
+KP	Korea (North)
+KR	Korea (South)
+KW	Kuwait
+KY	Cayman Islands
+KZ	Kazakhstan
+LA	Laos
+LB	Lebanon
+LC	St Lucia
+LI	Liechtenstein
+LK	Sri Lanka
+LR	Liberia
+LS	Lesotho
+LT	Lithuania
+LU	Luxembourg
+LV	Latvia
+LY	Libya
+MA	Morocco
+MC	Monaco
+MD	Moldova
+ME	Montenegro
+MF	St Martin (French)
+MG	Madagascar
+MH	Marshall Islands
+MK	North Macedonia
+ML	Mali
+MM	Myanmar (Burma)
+MN	Mongolia
+MO	Macau
+MP	Northern Mariana Islands
+MQ	Martinique
+MR	Mauritania
+MS	Montserrat
+MT	Malta
+MU	Mauritius
+MV	Maldives
+MW	Malawi
+MX	Mexico
+MY	Malaysia
+MZ	Mozambique
+NA	Namibia
+NC	New Caledonia
+NE	Niger
+NF	Norfolk Island
+NG	Nigeria
+NI	Nicaragua
+NL	Netherlands
+NO	Norway
+NP	Nepal
+NR	Nauru
+NU	Niue
+NZ	New Zealand
+OM	Oman
+PA	Panama
+PE	Peru
+PF	French Polynesia
+PG	Papua New Guinea
+PH	Philippines
+PK	Pakistan
+PL	Poland
+PM	St Pierre & Miquelon
+PN	Pitcairn
+PR	Puerto Rico
+PS	Palestine
+PT	Portugal
+PW	Palau
+PY	Paraguay
+QA	Qatar
+RE	Réunion
+RO	Romania
+RS	Serbia
+RU	Russia
+RW	Rwanda
+SA	Saudi Arabia
+SB	Solomon Islands
+SC	Seychelles
+SD	Sudan
+SE	Sweden
+SG	Singapore
+SH	St Helena
+SI	Slovenia
+SJ	Svalbard & Jan Mayen
+SK	Slovakia
+SL	Sierra Leone
+SM	San Marino
+SN	Senegal
+SO	Somalia
+SR	Suriname
+SS	South Sudan
+ST	Sao Tome & Principe
+SV	El Salvador
+SX	St Maarten (Dutch)
+SY	Syria
+SZ	Eswatini (Swaziland)
+TC	Turks & Caicos Is
+TD	Chad
+TF	French Southern & Antarctic Lands
+TG	Togo
+TH	Thailand
+TJ	Tajikistan
+TK	Tokelau
+TL	East Timor
+TM	Turkmenistan
+TN	Tunisia
+TO	Tonga
+TR	Turkey
+TT	Trinidad & Tobago
+TV	Tuvalu
+TW	Taiwan
+TZ	Tanzania
+UA	Ukraine
+UG	Uganda
+UM	US minor outlying islands
+US	United States
+UY	Uruguay
+UZ	Uzbekistan
+VA	Vatican City
+VC	St Vincent
+VE	Venezuela
+VG	Virgin Islands (UK)
+VI	Virgin Islands (US)
+VN	Vietnam
+VU	Vanuatu
+WF	Wallis & Futuna
+WS	Samoa (western)
+YE	Yemen
+YT	Mayotte
+ZA	South Africa
+ZM	Zambia
+ZW	Zimbabwe
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/leapseconds b/.local/lib/python3.8/site-packages/pytz/zoneinfo/leapseconds
new file mode 100644
index 0000000..ffa5eb8
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz/zoneinfo/leapseconds
@@ -0,0 +1,82 @@
+# Allowance for leap seconds added to each time zone file.
+
+# This file is in the public domain.
+
+# This file is generated automatically from the data in the public-domain
+# NIST format leap-seconds.list file, which can be copied from
+# 
+# or .
+# The NIST file is used instead of its IERS upstream counterpart
+# 
+# because under US law the NIST file is public domain
+# whereas the IERS file's copyright and license status is unclear.
+# For more about leap-seconds.list, please see
+# The NTP Timescale and Leap Seconds
+# .
+
+# The rules for leap seconds are specified in Annex 1 (Time scales) of:
+# Standard-frequency and time-signal emissions.
+# International Telecommunication Union - Radiocommunication Sector
+# (ITU-R) Recommendation TF.460-6 (02/2002)
+# .
+# The International Earth Rotation and Reference Systems Service (IERS)
+# periodically uses leap seconds to keep UTC to within 0.9 s of UT1
+# (a proxy for Earth's angle in space as measured by astronomers)
+# and publishes leap second data in a copyrighted file
+# .
+# See: Levine J. Coordinated Universal Time and the leap second.
+# URSI Radio Sci Bull. 2016;89(4):30-6. doi:10.23919/URSIRSB.2016.7909995
+# .
+
+# There were no leap seconds before 1972, as no official mechanism
+# accounted for the discrepancy between atomic time (TAI) and the earth's
+# rotation.  The first ("1 Jan 1972") data line in leap-seconds.list
+# does not denote a leap second; it denotes the start of the current definition
+# of UTC.
+
+# All leap-seconds are Stationary (S) at the given UTC time.
+# The correction (+ or -) is made at the given time, so in the unlikely
+# event of a negative leap second, a line would look like this:
+# Leap	YEAR	MON	DAY	23:59:59	-	S
+# Typical lines look like this:
+# Leap	YEAR	MON	DAY	23:59:60	+	S
+Leap	1972	Jun	30	23:59:60	+	S
+Leap	1972	Dec	31	23:59:60	+	S
+Leap	1973	Dec	31	23:59:60	+	S
+Leap	1974	Dec	31	23:59:60	+	S
+Leap	1975	Dec	31	23:59:60	+	S
+Leap	1976	Dec	31	23:59:60	+	S
+Leap	1977	Dec	31	23:59:60	+	S
+Leap	1978	Dec	31	23:59:60	+	S
+Leap	1979	Dec	31	23:59:60	+	S
+Leap	1981	Jun	30	23:59:60	+	S
+Leap	1982	Jun	30	23:59:60	+	S
+Leap	1983	Jun	30	23:59:60	+	S
+Leap	1985	Jun	30	23:59:60	+	S
+Leap	1987	Dec	31	23:59:60	+	S
+Leap	1989	Dec	31	23:59:60	+	S
+Leap	1990	Dec	31	23:59:60	+	S
+Leap	1992	Jun	30	23:59:60	+	S
+Leap	1993	Jun	30	23:59:60	+	S
+Leap	1994	Jun	30	23:59:60	+	S
+Leap	1995	Dec	31	23:59:60	+	S
+Leap	1997	Jun	30	23:59:60	+	S
+Leap	1998	Dec	31	23:59:60	+	S
+Leap	2005	Dec	31	23:59:60	+	S
+Leap	2008	Dec	31	23:59:60	+	S
+Leap	2012	Jun	30	23:59:60	+	S
+Leap	2015	Jun	30	23:59:60	+	S
+Leap	2016	Dec	31	23:59:60	+	S
+
+# UTC timestamp when this leap second list expires.
+# Any additional leap seconds will come after this.
+# This Expires line is commented out for now,
+# so that pre-2020a zic implementations do not reject this file.
+#Expires 2022	Dec	28	00:00:00
+
+# POSIX timestamps for the data in this file:
+#updated 1467936000 (2016-07-08 00:00:00 UTC)
+#expires 1672185600 (2022-12-28 00:00:00 UTC)
+
+#	Updated through IERS Bulletin C63
+#	File expires on:  28 December 2022
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/tzdata.zi b/.local/lib/python3.8/site-packages/pytz/zoneinfo/tzdata.zi
new file mode 100644
index 0000000..e21fc92
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz/zoneinfo/tzdata.zi
@@ -0,0 +1,4437 @@
+# version unknown-dirty
+# This zic input file is in the public domain.
+R d 1916 o - Jun 14 23s 1 S
+R d 1916 1919 - O Su>=1 23s 0 -
+R d 1917 o - Mar 24 23s 1 S
+R d 1918 o - Mar 9 23s 1 S
+R d 1919 o - Mar 1 23s 1 S
+R d 1920 o - F 14 23s 1 S
+R d 1920 o - O 23 23s 0 -
+R d 1921 o - Mar 14 23s 1 S
+R d 1921 o - Jun 21 23s 0 -
+R d 1939 o - S 11 23s 1 S
+R d 1939 o - N 19 1 0 -
+R d 1944 1945 - Ap M>=1 2 1 S
+R d 1944 o - O 8 2 0 -
+R d 1945 o - S 16 1 0 -
+R d 1971 o - Ap 25 23s 1 S
+R d 1971 o - S 26 23s 0 -
+R d 1977 o - May 6 0 1 S
+R d 1977 o - O 21 0 0 -
+R d 1978 o - Mar 24 1 1 S
+R d 1978 o - S 22 3 0 -
+R d 1980 o - Ap 25 0 1 S
+R d 1980 o - O 31 2 0 -
+Z Africa/Algiers 0:12:12 - LMT 1891 Mar 16
+0:9:21 - PMT 1911 Mar 11
+0 d WE%sT 1940 F 25 2
+1 d CE%sT 1946 O 7
+0 - WET 1956 Ja 29
+1 - CET 1963 Ap 14
+0 d WE%sT 1977 O 21
+1 d CE%sT 1979 O 26
+0 d WE%sT 1981 May
+1 - CET
+Z Atlantic/Cape_Verde -1:34:4 - LMT 1912 Ja 1 2u
+-2 - -02 1942 S
+-2 1 -01 1945 O 15
+-2 - -02 1975 N 25 2
+-1 - -01
+Z Africa/Ndjamena 1:0:12 - LMT 1912
+1 - WAT 1979 O 14
+1 1 WAST 1980 Mar 8
+1 - WAT
+Z Africa/Abidjan -0:16:8 - LMT 1912
+0 - GMT
+L Africa/Abidjan Africa/Accra
+L Africa/Abidjan Africa/Bamako
+L Africa/Abidjan Africa/Banjul
+L Africa/Abidjan Africa/Conakry
+L Africa/Abidjan Africa/Dakar
+L Africa/Abidjan Africa/Freetown
+L Africa/Abidjan Africa/Lome
+L Africa/Abidjan Africa/Nouakchott
+L Africa/Abidjan Africa/Ouagadougou
+L Africa/Abidjan Atlantic/St_Helena
+R K 1940 o - Jul 15 0 1 S
+R K 1940 o - O 1 0 0 -
+R K 1941 o - Ap 15 0 1 S
+R K 1941 o - S 16 0 0 -
+R K 1942 1944 - Ap 1 0 1 S
+R K 1942 o - O 27 0 0 -
+R K 1943 1945 - N 1 0 0 -
+R K 1945 o - Ap 16 0 1 S
+R K 1957 o - May 10 0 1 S
+R K 1957 1958 - O 1 0 0 -
+R K 1958 o - May 1 0 1 S
+R K 1959 1981 - May 1 1 1 S
+R K 1959 1965 - S 30 3 0 -
+R K 1966 1994 - O 1 3 0 -
+R K 1982 o - Jul 25 1 1 S
+R K 1983 o - Jul 12 1 1 S
+R K 1984 1988 - May 1 1 1 S
+R K 1989 o - May 6 1 1 S
+R K 1990 1994 - May 1 1 1 S
+R K 1995 2010 - Ap lastF 0s 1 S
+R K 1995 2005 - S lastTh 24 0 -
+R K 2006 o - S 21 24 0 -
+R K 2007 o - S Th>=1 24 0 -
+R K 2008 o - Au lastTh 24 0 -
+R K 2009 o - Au 20 24 0 -
+R K 2010 o - Au 10 24 0 -
+R K 2010 o - S 9 24 1 S
+R K 2010 o - S lastTh 24 0 -
+R K 2014 o - May 15 24 1 S
+R K 2014 o - Jun 26 24 0 -
+R K 2014 o - Jul 31 24 1 S
+R K 2014 o - S lastTh 24 0 -
+Z Africa/Cairo 2:5:9 - LMT 1900 O
+2 K EE%sT
+Z Africa/Bissau -1:2:20 - LMT 1912 Ja 1 1u
+-1 - -01 1975
+0 - GMT
+Z Africa/Nairobi 2:27:16 - LMT 1908 May
+2:30 - +0230 1928 Jun 30 24
+3 - EAT 1930 Ja 4 24
+2:30 - +0230 1936 D 31 24
+2:45 - +0245 1942 Jul 31 24
+3 - EAT
+L Africa/Nairobi Africa/Addis_Ababa
+L Africa/Nairobi Africa/Asmara
+L Africa/Nairobi Africa/Dar_es_Salaam
+L Africa/Nairobi Africa/Djibouti
+L Africa/Nairobi Africa/Kampala
+L Africa/Nairobi Africa/Mogadishu
+L Africa/Nairobi Indian/Antananarivo
+L Africa/Nairobi Indian/Comoro
+L Africa/Nairobi Indian/Mayotte
+Z Africa/Monrovia -0:43:8 - LMT 1882
+-0:43:8 - MMT 1919 Mar
+-0:44:30 - MMT 1972 Ja 7
+0 - GMT
+R L 1951 o - O 14 2 1 S
+R L 1952 o - Ja 1 0 0 -
+R L 1953 o - O 9 2 1 S
+R L 1954 o - Ja 1 0 0 -
+R L 1955 o - S 30 0 1 S
+R L 1956 o - Ja 1 0 0 -
+R L 1982 1984 - Ap 1 0 1 S
+R L 1982 1985 - O 1 0 0 -
+R L 1985 o - Ap 6 0 1 S
+R L 1986 o - Ap 4 0 1 S
+R L 1986 o - O 3 0 0 -
+R L 1987 1989 - Ap 1 0 1 S
+R L 1987 1989 - O 1 0 0 -
+R L 1997 o - Ap 4 0 1 S
+R L 1997 o - O 4 0 0 -
+R L 2013 o - Mar lastF 1 1 S
+R L 2013 o - O lastF 2 0 -
+Z Africa/Tripoli 0:52:44 - LMT 1920
+1 L CE%sT 1959
+2 - EET 1982
+1 L CE%sT 1990 May 4
+2 - EET 1996 S 30
+1 L CE%sT 1997 O 4
+2 - EET 2012 N 10 2
+1 L CE%sT 2013 O 25 2
+2 - EET
+R MU 1982 o - O 10 0 1 -
+R MU 1983 o - Mar 21 0 0 -
+R MU 2008 o - O lastSu 2 1 -
+R MU 2009 o - Mar lastSu 2 0 -
+Z Indian/Mauritius 3:50 - LMT 1907
+4 MU +04/+05
+R M 1939 o - S 12 0 1 -
+R M 1939 o - N 19 0 0 -
+R M 1940 o - F 25 0 1 -
+R M 1945 o - N 18 0 0 -
+R M 1950 o - Jun 11 0 1 -
+R M 1950 o - O 29 0 0 -
+R M 1967 o - Jun 3 12 1 -
+R M 1967 o - O 1 0 0 -
+R M 1974 o - Jun 24 0 1 -
+R M 1974 o - S 1 0 0 -
+R M 1976 1977 - May 1 0 1 -
+R M 1976 o - Au 1 0 0 -
+R M 1977 o - S 28 0 0 -
+R M 1978 o - Jun 1 0 1 -
+R M 1978 o - Au 4 0 0 -
+R M 2008 o - Jun 1 0 1 -
+R M 2008 o - S 1 0 0 -
+R M 2009 o - Jun 1 0 1 -
+R M 2009 o - Au 21 0 0 -
+R M 2010 o - May 2 0 1 -
+R M 2010 o - Au 8 0 0 -
+R M 2011 o - Ap 3 0 1 -
+R M 2011 o - Jul 31 0 0 -
+R M 2012 2013 - Ap lastSu 2 1 -
+R M 2012 o - Jul 20 3 0 -
+R M 2012 o - Au 20 2 1 -
+R M 2012 o - S 30 3 0 -
+R M 2013 o - Jul 7 3 0 -
+R M 2013 o - Au 10 2 1 -
+R M 2013 2018 - O lastSu 3 0 -
+R M 2014 2018 - Mar lastSu 2 1 -
+R M 2014 o - Jun 28 3 0 -
+R M 2014 o - Au 2 2 1 -
+R M 2015 o - Jun 14 3 0 -
+R M 2015 o - Jul 19 2 1 -
+R M 2016 o - Jun 5 3 0 -
+R M 2016 o - Jul 10 2 1 -
+R M 2017 o - May 21 3 0 -
+R M 2017 o - Jul 2 2 1 -
+R M 2018 o - May 13 3 0 -
+R M 2018 o - Jun 17 2 1 -
+R M 2019 o - May 5 3 -1 -
+R M 2019 o - Jun 9 2 0 -
+R M 2020 o - Ap 19 3 -1 -
+R M 2020 o - May 31 2 0 -
+R M 2021 o - Ap 11 3 -1 -
+R M 2021 o - May 16 2 0 -
+R M 2022 o - Mar 27 3 -1 -
+R M 2022 o - May 8 2 0 -
+R M 2023 o - Mar 19 3 -1 -
+R M 2023 o - Ap 30 2 0 -
+R M 2024 o - Mar 10 3 -1 -
+R M 2024 o - Ap 14 2 0 -
+R M 2025 o - F 23 3 -1 -
+R M 2025 o - Ap 6 2 0 -
+R M 2026 o - F 15 3 -1 -
+R M 2026 o - Mar 22 2 0 -
+R M 2027 o - F 7 3 -1 -
+R M 2027 o - Mar 14 2 0 -
+R M 2028 o - Ja 23 3 -1 -
+R M 2028 o - Mar 5 2 0 -
+R M 2029 o - Ja 14 3 -1 -
+R M 2029 o - F 18 2 0 -
+R M 2029 o - D 30 3 -1 -
+R M 2030 o - F 10 2 0 -
+R M 2030 o - D 22 3 -1 -
+R M 2031 o - F 2 2 0 -
+R M 2031 o - D 14 3 -1 -
+R M 2032 o - Ja 18 2 0 -
+R M 2032 o - N 28 3 -1 -
+R M 2033 o - Ja 9 2 0 -
+R M 2033 o - N 20 3 -1 -
+R M 2033 o - D 25 2 0 -
+R M 2034 o - N 5 3 -1 -
+R M 2034 o - D 17 2 0 -
+R M 2035 o - O 28 3 -1 -
+R M 2035 o - D 9 2 0 -
+R M 2036 o - O 19 3 -1 -
+R M 2036 o - N 23 2 0 -
+R M 2037 o - O 4 3 -1 -
+R M 2037 o - N 15 2 0 -
+R M 2038 o - S 26 3 -1 -
+R M 2038 o - N 7 2 0 -
+R M 2039 o - S 18 3 -1 -
+R M 2039 o - O 23 2 0 -
+R M 2040 o - S 2 3 -1 -
+R M 2040 o - O 14 2 0 -
+R M 2041 o - Au 25 3 -1 -
+R M 2041 o - S 29 2 0 -
+R M 2042 o - Au 10 3 -1 -
+R M 2042 o - S 21 2 0 -
+R M 2043 o - Au 2 3 -1 -
+R M 2043 o - S 13 2 0 -
+R M 2044 o - Jul 24 3 -1 -
+R M 2044 o - Au 28 2 0 -
+R M 2045 o - Jul 9 3 -1 -
+R M 2045 o - Au 20 2 0 -
+R M 2046 o - Jul 1 3 -1 -
+R M 2046 o - Au 12 2 0 -
+R M 2047 o - Jun 23 3 -1 -
+R M 2047 o - Jul 28 2 0 -
+R M 2048 o - Jun 7 3 -1 -
+R M 2048 o - Jul 19 2 0 -
+R M 2049 o - May 30 3 -1 -
+R M 2049 o - Jul 4 2 0 -
+R M 2050 o - May 15 3 -1 -
+R M 2050 o - Jun 26 2 0 -
+R M 2051 o - May 7 3 -1 -
+R M 2051 o - Jun 18 2 0 -
+R M 2052 o - Ap 28 3 -1 -
+R M 2052 o - Jun 2 2 0 -
+R M 2053 o - Ap 13 3 -1 -
+R M 2053 o - May 25 2 0 -
+R M 2054 o - Ap 5 3 -1 -
+R M 2054 o - May 17 2 0 -
+R M 2055 o - Mar 28 3 -1 -
+R M 2055 o - May 2 2 0 -
+R M 2056 o - Mar 12 3 -1 -
+R M 2056 o - Ap 23 2 0 -
+R M 2057 o - Mar 4 3 -1 -
+R M 2057 o - Ap 8 2 0 -
+R M 2058 o - F 17 3 -1 -
+R M 2058 o - Mar 31 2 0 -
+R M 2059 o - F 9 3 -1 -
+R M 2059 o - Mar 23 2 0 -
+R M 2060 o - F 1 3 -1 -
+R M 2060 o - Mar 7 2 0 -
+R M 2061 o - Ja 16 3 -1 -
+R M 2061 o - F 27 2 0 -
+R M 2062 o - Ja 8 3 -1 -
+R M 2062 o - F 19 2 0 -
+R M 2062 o - D 31 3 -1 -
+R M 2063 o - F 4 2 0 -
+R M 2063 o - D 16 3 -1 -
+R M 2064 o - Ja 27 2 0 -
+R M 2064 o - D 7 3 -1 -
+R M 2065 o - Ja 11 2 0 -
+R M 2065 o - N 22 3 -1 -
+R M 2066 o - Ja 3 2 0 -
+R M 2066 o - N 14 3 -1 -
+R M 2066 o - D 26 2 0 -
+R M 2067 o - N 6 3 -1 -
+R M 2067 o - D 11 2 0 -
+R M 2068 o - O 21 3 -1 -
+R M 2068 o - D 2 2 0 -
+R M 2069 o - O 13 3 -1 -
+R M 2069 o - N 24 2 0 -
+R M 2070 o - O 5 3 -1 -
+R M 2070 o - N 9 2 0 -
+R M 2071 o - S 20 3 -1 -
+R M 2071 o - N 1 2 0 -
+R M 2072 o - S 11 3 -1 -
+R M 2072 o - O 16 2 0 -
+R M 2073 o - Au 27 3 -1 -
+R M 2073 o - O 8 2 0 -
+R M 2074 o - Au 19 3 -1 -
+R M 2074 o - S 30 2 0 -
+R M 2075 o - Au 11 3 -1 -
+R M 2075 o - S 15 2 0 -
+R M 2076 o - Jul 26 3 -1 -
+R M 2076 o - S 6 2 0 -
+R M 2077 o - Jul 18 3 -1 -
+R M 2077 o - Au 29 2 0 -
+R M 2078 o - Jul 10 3 -1 -
+R M 2078 o - Au 14 2 0 -
+R M 2079 o - Jun 25 3 -1 -
+R M 2079 o - Au 6 2 0 -
+R M 2080 o - Jun 16 3 -1 -
+R M 2080 o - Jul 21 2 0 -
+R M 2081 o - Jun 1 3 -1 -
+R M 2081 o - Jul 13 2 0 -
+R M 2082 o - May 24 3 -1 -
+R M 2082 o - Jul 5 2 0 -
+R M 2083 o - May 16 3 -1 -
+R M 2083 o - Jun 20 2 0 -
+R M 2084 o - Ap 30 3 -1 -
+R M 2084 o - Jun 11 2 0 -
+R M 2085 o - Ap 22 3 -1 -
+R M 2085 o - Jun 3 2 0 -
+R M 2086 o - Ap 14 3 -1 -
+R M 2086 o - May 19 2 0 -
+R M 2087 o - Mar 30 3 -1 -
+R M 2087 o - May 11 2 0 -
+Z Africa/Casablanca -0:30:20 - LMT 1913 O 26
+0 M +00/+01 1984 Mar 16
+1 - +01 1986
+0 M +00/+01 2018 O 28 3
+1 M +01/+00
+Z Africa/El_Aaiun -0:52:48 - LMT 1934
+-1 - -01 1976 Ap 14
+0 M +00/+01 2018 O 28 3
+1 M +01/+00
+Z Africa/Maputo 2:10:20 - LMT 1903 Mar
+2 - CAT
+L Africa/Maputo Africa/Blantyre
+L Africa/Maputo Africa/Bujumbura
+L Africa/Maputo Africa/Gaborone
+L Africa/Maputo Africa/Harare
+L Africa/Maputo Africa/Kigali
+L Africa/Maputo Africa/Lubumbashi
+L Africa/Maputo Africa/Lusaka
+R NA 1994 o - Mar 21 0 -1 WAT
+R NA 1994 2017 - S Su>=1 2 0 CAT
+R NA 1995 2017 - Ap Su>=1 2 -1 WAT
+Z Africa/Windhoek 1:8:24 - LMT 1892 F 8
+1:30 - +0130 1903 Mar
+2 - SAST 1942 S 20 2
+2 1 SAST 1943 Mar 21 2
+2 - SAST 1990 Mar 21
+2 NA %s
+Z Africa/Lagos 0:13:35 - LMT 1905 Jul
+0 - GMT 1908 Jul
+0:13:35 - LMT 1914
+0:30 - +0030 1919 S
+1 - WAT
+L Africa/Lagos Africa/Bangui
+L Africa/Lagos Africa/Brazzaville
+L Africa/Lagos Africa/Douala
+L Africa/Lagos Africa/Kinshasa
+L Africa/Lagos Africa/Libreville
+L Africa/Lagos Africa/Luanda
+L Africa/Lagos Africa/Malabo
+L Africa/Lagos Africa/Niamey
+L Africa/Lagos Africa/Porto-Novo
+Z Indian/Reunion 3:41:52 - LMT 1911 Jun
+4 - +04
+Z Africa/Sao_Tome 0:26:56 - LMT 1884
+-0:36:45 - LMT 1912 Ja 1 0u
+0 - GMT 2018 Ja 1 1
+1 - WAT 2019 Ja 1 2
+0 - GMT
+Z Indian/Mahe 3:41:48 - LMT 1907
+4 - +04
+R SA 1942 1943 - S Su>=15 2 1 -
+R SA 1943 1944 - Mar Su>=15 2 0 -
+Z Africa/Johannesburg 1:52 - LMT 1892 F 8
+1:30 - SAST 1903 Mar
+2 SA SAST
+L Africa/Johannesburg Africa/Maseru
+L Africa/Johannesburg Africa/Mbabane
+R SD 1970 o - May 1 0 1 S
+R SD 1970 1985 - O 15 0 0 -
+R SD 1971 o - Ap 30 0 1 S
+R SD 1972 1985 - Ap lastSu 0 1 S
+Z Africa/Khartoum 2:10:8 - LMT 1931
+2 SD CA%sT 2000 Ja 15 12
+3 - EAT 2017 N
+2 - CAT
+Z Africa/Juba 2:6:28 - LMT 1931
+2 SD CA%sT 2000 Ja 15 12
+3 - EAT 2021 F
+2 - CAT
+R n 1939 o - Ap 15 23s 1 S
+R n 1939 o - N 18 23s 0 -
+R n 1940 o - F 25 23s 1 S
+R n 1941 o - O 6 0 0 -
+R n 1942 o - Mar 9 0 1 S
+R n 1942 o - N 2 3 0 -
+R n 1943 o - Mar 29 2 1 S
+R n 1943 o - Ap 17 2 0 -
+R n 1943 o - Ap 25 2 1 S
+R n 1943 o - O 4 2 0 -
+R n 1944 1945 - Ap M>=1 2 1 S
+R n 1944 o - O 8 0 0 -
+R n 1945 o - S 16 0 0 -
+R n 1977 o - Ap 30 0s 1 S
+R n 1977 o - S 24 0s 0 -
+R n 1978 o - May 1 0s 1 S
+R n 1978 o - O 1 0s 0 -
+R n 1988 o - Jun 1 0s 1 S
+R n 1988 1990 - S lastSu 0s 0 -
+R n 1989 o - Mar 26 0s 1 S
+R n 1990 o - May 1 0s 1 S
+R n 2005 o - May 1 0s 1 S
+R n 2005 o - S 30 1s 0 -
+R n 2006 2008 - Mar lastSu 2s 1 S
+R n 2006 2008 - O lastSu 2s 0 -
+Z Africa/Tunis 0:40:44 - LMT 1881 May 12
+0:9:21 - PMT 1911 Mar 11
+1 n CE%sT
+Z Antarctica/Casey 0 - -00 1969
+8 - +08 2009 O 18 2
+11 - +11 2010 Mar 5 2
+8 - +08 2011 O 28 2
+11 - +11 2012 F 21 17u
+8 - +08 2016 O 22
+11 - +11 2018 Mar 11 4
+8 - +08 2018 O 7 4
+11 - +11 2019 Mar 17 3
+8 - +08 2019 O 4 3
+11 - +11 2020 Mar 8 3
+8 - +08 2020 O 4 0:1
+11 - +11
+Z Antarctica/Davis 0 - -00 1957 Ja 13
+7 - +07 1964 N
+0 - -00 1969 F
+7 - +07 2009 O 18 2
+5 - +05 2010 Mar 10 20u
+7 - +07 2011 O 28 2
+5 - +05 2012 F 21 20u
+7 - +07
+Z Antarctica/Mawson 0 - -00 1954 F 13
+6 - +06 2009 O 18 2
+5 - +05
+Z Indian/Kerguelen 0 - -00 1950
+5 - +05
+R Tr 2005 ma - Mar lastSu 1u 2 +02
+R Tr 2004 ma - O lastSu 1u 0 +00
+Z Antarctica/Troll 0 - -00 2005 F 12
+0 Tr %s
+Z Antarctica/Vostok 0 - -00 1957 D 16
+6 - +06
+Z Antarctica/Rothera 0 - -00 1976 D
+-3 - -03
+Z Asia/Kabul 4:36:48 - LMT 1890
+4 - +04 1945
+4:30 - +0430
+R AM 2011 o - Mar lastSu 2s 1 -
+R AM 2011 o - O lastSu 2s 0 -
+Z Asia/Yerevan 2:58 - LMT 1924 May 2
+3 - +03 1957 Mar
+4 R +04/+05 1991 Mar 31 2s
+3 R +03/+04 1995 S 24 2s
+4 - +04 1997
+4 R +04/+05 2011
+4 AM +04/+05
+R AZ 1997 2015 - Mar lastSu 4 1 -
+R AZ 1997 2015 - O lastSu 5 0 -
+Z Asia/Baku 3:19:24 - LMT 1924 May 2
+3 - +03 1957 Mar
+4 R +04/+05 1991 Mar 31 2s
+3 R +03/+04 1992 S lastSu 2s
+4 - +04 1996
+4 E +04/+05 1997
+4 AZ +04/+05
+R BD 2009 o - Jun 19 23 1 -
+R BD 2009 o - D 31 24 0 -
+Z Asia/Dhaka 6:1:40 - LMT 1890
+5:53:20 - HMT 1941 O
+6:30 - +0630 1942 May 15
+5:30 - +0530 1942 S
+6:30 - +0630 1951 S 30
+6 - +06 2009
+6 BD +06/+07
+Z Asia/Thimphu 5:58:36 - LMT 1947 Au 15
+5:30 - +0530 1987 O
+6 - +06
+Z Indian/Chagos 4:49:40 - LMT 1907
+5 - +05 1996
+6 - +06
+Z Asia/Brunei 7:39:40 - LMT 1926 Mar
+7:30 - +0730 1933
+8 - +08
+Z Asia/Yangon 6:24:47 - LMT 1880
+6:24:47 - RMT 1920
+6:30 - +0630 1942 May
+9 - +09 1945 May 3
+6:30 - +0630
+R Sh 1919 o - Ap 12 24 1 D
+R Sh 1919 o - S 30 24 0 S
+R Sh 1940 o - Jun 1 0 1 D
+R Sh 1940 o - O 12 24 0 S
+R Sh 1941 o - Mar 15 0 1 D
+R Sh 1941 o - N 1 24 0 S
+R Sh 1942 o - Ja 31 0 1 D
+R Sh 1945 o - S 1 24 0 S
+R Sh 1946 o - May 15 0 1 D
+R Sh 1946 o - S 30 24 0 S
+R Sh 1947 o - Ap 15 0 1 D
+R Sh 1947 o - O 31 24 0 S
+R Sh 1948 1949 - May 1 0 1 D
+R Sh 1948 1949 - S 30 24 0 S
+R CN 1986 o - May 4 2 1 D
+R CN 1986 1991 - S Su>=11 2 0 S
+R CN 1987 1991 - Ap Su>=11 2 1 D
+Z Asia/Shanghai 8:5:43 - LMT 1901
+8 Sh C%sT 1949 May 28
+8 CN C%sT
+Z Asia/Urumqi 5:50:20 - LMT 1928
+6 - +06
+R HK 1946 o - Ap 21 0 1 S
+R HK 1946 o - D 1 3:30s 0 -
+R HK 1947 o - Ap 13 3:30s 1 S
+R HK 1947 o - N 30 3:30s 0 -
+R HK 1948 o - May 2 3:30s 1 S
+R HK 1948 1952 - O Su>=28 3:30s 0 -
+R HK 1949 1953 - Ap Su>=1 3:30 1 S
+R HK 1953 1964 - O Su>=31 3:30 0 -
+R HK 1954 1964 - Mar Su>=18 3:30 1 S
+R HK 1965 1976 - Ap Su>=16 3:30 1 S
+R HK 1965 1976 - O Su>=16 3:30 0 -
+R HK 1973 o - D 30 3:30 1 S
+R HK 1979 o - May 13 3:30 1 S
+R HK 1979 o - O 21 3:30 0 -
+Z Asia/Hong_Kong 7:36:42 - LMT 1904 O 30 0:36:42
+8 - HKT 1941 Jun 15 3
+8 1 HKST 1941 O 1 4
+8 0:30 HKWT 1941 D 25
+9 - JST 1945 N 18 2
+8 HK HK%sT
+R f 1946 o - May 15 0 1 D
+R f 1946 o - O 1 0 0 S
+R f 1947 o - Ap 15 0 1 D
+R f 1947 o - N 1 0 0 S
+R f 1948 1951 - May 1 0 1 D
+R f 1948 1951 - O 1 0 0 S
+R f 1952 o - Mar 1 0 1 D
+R f 1952 1954 - N 1 0 0 S
+R f 1953 1959 - Ap 1 0 1 D
+R f 1955 1961 - O 1 0 0 S
+R f 1960 1961 - Jun 1 0 1 D
+R f 1974 1975 - Ap 1 0 1 D
+R f 1974 1975 - O 1 0 0 S
+R f 1979 o - Jul 1 0 1 D
+R f 1979 o - O 1 0 0 S
+Z Asia/Taipei 8:6 - LMT 1896
+8 - CST 1937 O
+9 - JST 1945 S 21 1
+8 f C%sT
+R _ 1942 1943 - Ap 30 23 1 -
+R _ 1942 o - N 17 23 0 -
+R _ 1943 o - S 30 23 0 S
+R _ 1946 o - Ap 30 23s 1 D
+R _ 1946 o - S 30 23s 0 S
+R _ 1947 o - Ap 19 23s 1 D
+R _ 1947 o - N 30 23s 0 S
+R _ 1948 o - May 2 23s 1 D
+R _ 1948 o - O 31 23s 0 S
+R _ 1949 1950 - Ap Sa>=1 23s 1 D
+R _ 1949 1950 - O lastSa 23s 0 S
+R _ 1951 o - Mar 31 23s 1 D
+R _ 1951 o - O 28 23s 0 S
+R _ 1952 1953 - Ap Sa>=1 23s 1 D
+R _ 1952 o - N 1 23s 0 S
+R _ 1953 1954 - O lastSa 23s 0 S
+R _ 1954 1956 - Mar Sa>=17 23s 1 D
+R _ 1955 o - N 5 23s 0 S
+R _ 1956 1964 - N Su>=1 3:30 0 S
+R _ 1957 1964 - Mar Su>=18 3:30 1 D
+R _ 1965 1973 - Ap Su>=16 3:30 1 D
+R _ 1965 1966 - O Su>=16 2:30 0 S
+R _ 1967 1976 - O Su>=16 3:30 0 S
+R _ 1973 o - D 30 3:30 1 D
+R _ 1975 1976 - Ap Su>=16 3:30 1 D
+R _ 1979 o - May 13 3:30 1 D
+R _ 1979 o - O Su>=16 3:30 0 S
+Z Asia/Macau 7:34:10 - LMT 1904 O 30
+8 - CST 1941 D 21 23
+9 _ +09/+10 1945 S 30 24
+8 _ C%sT
+R CY 1975 o - Ap 13 0 1 S
+R CY 1975 o - O 12 0 0 -
+R CY 1976 o - May 15 0 1 S
+R CY 1976 o - O 11 0 0 -
+R CY 1977 1980 - Ap Su>=1 0 1 S
+R CY 1977 o - S 25 0 0 -
+R CY 1978 o - O 2 0 0 -
+R CY 1979 1997 - S lastSu 0 0 -
+R CY 1981 1998 - Mar lastSu 0 1 S
+Z Asia/Nicosia 2:13:28 - LMT 1921 N 14
+2 CY EE%sT 1998 S
+2 E EE%sT
+Z Asia/Famagusta 2:15:48 - LMT 1921 N 14
+2 CY EE%sT 1998 S
+2 E EE%sT 2016 S 8
+3 - +03 2017 O 29 1u
+2 E EE%sT
+L Asia/Nicosia Europe/Nicosia
+Z Asia/Tbilisi 2:59:11 - LMT 1880
+2:59:11 - TBMT 1924 May 2
+3 - +03 1957 Mar
+4 R +04/+05 1991 Mar 31 2s
+3 R +03/+04 1992
+3 e +03/+04 1994 S lastSu
+4 e +04/+05 1996 O lastSu
+4 1 +05 1997 Mar lastSu
+4 e +04/+05 2004 Jun 27
+3 R +03/+04 2005 Mar lastSu 2
+4 - +04
+Z Asia/Dili 8:22:20 - LMT 1912
+8 - +08 1942 F 21 23
+9 - +09 1976 May 3
+8 - +08 2000 S 17
+9 - +09
+Z Asia/Kolkata 5:53:28 - LMT 1854 Jun 28
+5:53:20 - HMT 1870
+5:21:10 - MMT 1906
+5:30 - IST 1941 O
+5:30 1 +0630 1942 May 15
+5:30 - IST 1942 S
+5:30 1 +0630 1945 O 15
+5:30 - IST
+Z Asia/Jakarta 7:7:12 - LMT 1867 Au 10
+7:7:12 - BMT 1923 D 31 23:47:12
+7:20 - +0720 1932 N
+7:30 - +0730 1942 Mar 23
+9 - +09 1945 S 23
+7:30 - +0730 1948 May
+8 - +08 1950 May
+7:30 - +0730 1964
+7 - WIB
+Z Asia/Pontianak 7:17:20 - LMT 1908 May
+7:17:20 - PMT 1932 N
+7:30 - +0730 1942 Ja 29
+9 - +09 1945 S 23
+7:30 - +0730 1948 May
+8 - +08 1950 May
+7:30 - +0730 1964
+8 - WITA 1988
+7 - WIB
+Z Asia/Makassar 7:57:36 - LMT 1920
+7:57:36 - MMT 1932 N
+8 - +08 1942 F 9
+9 - +09 1945 S 23
+8 - WITA
+Z Asia/Jayapura 9:22:48 - LMT 1932 N
+9 - +09 1944 S
+9:30 - +0930 1964
+9 - WIT
+R i 1978 1980 - Mar 20 24 1 -
+R i 1978 o - O 20 24 0 -
+R i 1979 o - S 18 24 0 -
+R i 1980 o - S 22 24 0 -
+R i 1991 o - May 2 24 1 -
+R i 1992 1995 - Mar 21 24 1 -
+R i 1991 1995 - S 21 24 0 -
+R i 1996 o - Mar 20 24 1 -
+R i 1996 o - S 20 24 0 -
+R i 1997 1999 - Mar 21 24 1 -
+R i 1997 1999 - S 21 24 0 -
+R i 2000 o - Mar 20 24 1 -
+R i 2000 o - S 20 24 0 -
+R i 2001 2003 - Mar 21 24 1 -
+R i 2001 2003 - S 21 24 0 -
+R i 2004 o - Mar 20 24 1 -
+R i 2004 o - S 20 24 0 -
+R i 2005 o - Mar 21 24 1 -
+R i 2005 o - S 21 24 0 -
+R i 2008 o - Mar 20 24 1 -
+R i 2008 o - S 20 24 0 -
+R i 2009 2011 - Mar 21 24 1 -
+R i 2009 2011 - S 21 24 0 -
+R i 2012 o - Mar 20 24 1 -
+R i 2012 o - S 20 24 0 -
+R i 2013 2015 - Mar 21 24 1 -
+R i 2013 2015 - S 21 24 0 -
+R i 2016 o - Mar 20 24 1 -
+R i 2016 o - S 20 24 0 -
+R i 2017 2019 - Mar 21 24 1 -
+R i 2017 2019 - S 21 24 0 -
+R i 2020 o - Mar 20 24 1 -
+R i 2020 o - S 20 24 0 -
+R i 2021 2023 - Mar 21 24 1 -
+R i 2021 2023 - S 21 24 0 -
+R i 2024 o - Mar 20 24 1 -
+R i 2024 o - S 20 24 0 -
+R i 2025 2027 - Mar 21 24 1 -
+R i 2025 2027 - S 21 24 0 -
+R i 2028 2029 - Mar 20 24 1 -
+R i 2028 2029 - S 20 24 0 -
+R i 2030 2031 - Mar 21 24 1 -
+R i 2030 2031 - S 21 24 0 -
+R i 2032 2033 - Mar 20 24 1 -
+R i 2032 2033 - S 20 24 0 -
+R i 2034 2035 - Mar 21 24 1 -
+R i 2034 2035 - S 21 24 0 -
+R i 2036 2037 - Mar 20 24 1 -
+R i 2036 2037 - S 20 24 0 -
+R i 2038 2039 - Mar 21 24 1 -
+R i 2038 2039 - S 21 24 0 -
+R i 2040 2041 - Mar 20 24 1 -
+R i 2040 2041 - S 20 24 0 -
+R i 2042 2043 - Mar 21 24 1 -
+R i 2042 2043 - S 21 24 0 -
+R i 2044 2045 - Mar 20 24 1 -
+R i 2044 2045 - S 20 24 0 -
+R i 2046 2047 - Mar 21 24 1 -
+R i 2046 2047 - S 21 24 0 -
+R i 2048 2049 - Mar 20 24 1 -
+R i 2048 2049 - S 20 24 0 -
+R i 2050 2051 - Mar 21 24 1 -
+R i 2050 2051 - S 21 24 0 -
+R i 2052 2053 - Mar 20 24 1 -
+R i 2052 2053 - S 20 24 0 -
+R i 2054 2055 - Mar 21 24 1 -
+R i 2054 2055 - S 21 24 0 -
+R i 2056 2057 - Mar 20 24 1 -
+R i 2056 2057 - S 20 24 0 -
+R i 2058 2059 - Mar 21 24 1 -
+R i 2058 2059 - S 21 24 0 -
+R i 2060 2062 - Mar 20 24 1 -
+R i 2060 2062 - S 20 24 0 -
+R i 2063 o - Mar 21 24 1 -
+R i 2063 o - S 21 24 0 -
+R i 2064 2066 - Mar 20 24 1 -
+R i 2064 2066 - S 20 24 0 -
+R i 2067 o - Mar 21 24 1 -
+R i 2067 o - S 21 24 0 -
+R i 2068 2070 - Mar 20 24 1 -
+R i 2068 2070 - S 20 24 0 -
+R i 2071 o - Mar 21 24 1 -
+R i 2071 o - S 21 24 0 -
+R i 2072 2074 - Mar 20 24 1 -
+R i 2072 2074 - S 20 24 0 -
+R i 2075 o - Mar 21 24 1 -
+R i 2075 o - S 21 24 0 -
+R i 2076 2078 - Mar 20 24 1 -
+R i 2076 2078 - S 20 24 0 -
+R i 2079 o - Mar 21 24 1 -
+R i 2079 o - S 21 24 0 -
+R i 2080 2082 - Mar 20 24 1 -
+R i 2080 2082 - S 20 24 0 -
+R i 2083 o - Mar 21 24 1 -
+R i 2083 o - S 21 24 0 -
+R i 2084 2086 - Mar 20 24 1 -
+R i 2084 2086 - S 20 24 0 -
+R i 2087 o - Mar 21 24 1 -
+R i 2087 o - S 21 24 0 -
+R i 2088 ma - Mar 20 24 1 -
+R i 2088 ma - S 20 24 0 -
+Z Asia/Tehran 3:25:44 - LMT 1916
+3:25:44 - TMT 1946
+3:30 - +0330 1977 N
+4 i +04/+05 1979
+3:30 i +0330/+0430
+R IQ 1982 o - May 1 0 1 -
+R IQ 1982 1984 - O 1 0 0 -
+R IQ 1983 o - Mar 31 0 1 -
+R IQ 1984 1985 - Ap 1 0 1 -
+R IQ 1985 1990 - S lastSu 1s 0 -
+R IQ 1986 1990 - Mar lastSu 1s 1 -
+R IQ 1991 2007 - Ap 1 3s 1 -
+R IQ 1991 2007 - O 1 3s 0 -
+Z Asia/Baghdad 2:57:40 - LMT 1890
+2:57:36 - BMT 1918
+3 - +03 1982 May
+3 IQ +03/+04
+R Z 1940 o - May 31 24u 1 D
+R Z 1940 o - S 30 24u 0 S
+R Z 1940 o - N 16 24u 1 D
+R Z 1942 1946 - O 31 24u 0 S
+R Z 1943 1944 - Mar 31 24u 1 D
+R Z 1945 1946 - Ap 15 24u 1 D
+R Z 1948 o - May 22 24u 2 DD
+R Z 1948 o - Au 31 24u 1 D
+R Z 1948 1949 - O 31 24u 0 S
+R Z 1949 o - Ap 30 24u 1 D
+R Z 1950 o - Ap 15 24u 1 D
+R Z 1950 o - S 14 24u 0 S
+R Z 1951 o - Mar 31 24u 1 D
+R Z 1951 o - N 10 24u 0 S
+R Z 1952 o - Ap 19 24u 1 D
+R Z 1952 o - O 18 24u 0 S
+R Z 1953 o - Ap 11 24u 1 D
+R Z 1953 o - S 12 24u 0 S
+R Z 1954 o - Jun 12 24u 1 D
+R Z 1954 o - S 11 24u 0 S
+R Z 1955 o - Jun 11 24u 1 D
+R Z 1955 o - S 10 24u 0 S
+R Z 1956 o - Jun 2 24u 1 D
+R Z 1956 o - S 29 24u 0 S
+R Z 1957 o - Ap 27 24u 1 D
+R Z 1957 o - S 21 24u 0 S
+R Z 1974 o - Jul 6 24 1 D
+R Z 1974 o - O 12 24 0 S
+R Z 1975 o - Ap 19 24 1 D
+R Z 1975 o - Au 30 24 0 S
+R Z 1980 o - Au 2 24s 1 D
+R Z 1980 o - S 13 24s 0 S
+R Z 1984 o - May 5 24s 1 D
+R Z 1984 o - Au 25 24s 0 S
+R Z 1985 o - Ap 13 24 1 D
+R Z 1985 o - Au 31 24 0 S
+R Z 1986 o - May 17 24 1 D
+R Z 1986 o - S 6 24 0 S
+R Z 1987 o - Ap 14 24 1 D
+R Z 1987 o - S 12 24 0 S
+R Z 1988 o - Ap 9 24 1 D
+R Z 1988 o - S 3 24 0 S
+R Z 1989 o - Ap 29 24 1 D
+R Z 1989 o - S 2 24 0 S
+R Z 1990 o - Mar 24 24 1 D
+R Z 1990 o - Au 25 24 0 S
+R Z 1991 o - Mar 23 24 1 D
+R Z 1991 o - Au 31 24 0 S
+R Z 1992 o - Mar 28 24 1 D
+R Z 1992 o - S 5 24 0 S
+R Z 1993 o - Ap 2 0 1 D
+R Z 1993 o - S 5 0 0 S
+R Z 1994 o - Ap 1 0 1 D
+R Z 1994 o - Au 28 0 0 S
+R Z 1995 o - Mar 31 0 1 D
+R Z 1995 o - S 3 0 0 S
+R Z 1996 o - Mar 14 24 1 D
+R Z 1996 o - S 15 24 0 S
+R Z 1997 o - Mar 20 24 1 D
+R Z 1997 o - S 13 24 0 S
+R Z 1998 o - Mar 20 0 1 D
+R Z 1998 o - S 6 0 0 S
+R Z 1999 o - Ap 2 2 1 D
+R Z 1999 o - S 3 2 0 S
+R Z 2000 o - Ap 14 2 1 D
+R Z 2000 o - O 6 1 0 S
+R Z 2001 o - Ap 9 1 1 D
+R Z 2001 o - S 24 1 0 S
+R Z 2002 o - Mar 29 1 1 D
+R Z 2002 o - O 7 1 0 S
+R Z 2003 o - Mar 28 1 1 D
+R Z 2003 o - O 3 1 0 S
+R Z 2004 o - Ap 7 1 1 D
+R Z 2004 o - S 22 1 0 S
+R Z 2005 2012 - Ap F<=1 2 1 D
+R Z 2005 o - O 9 2 0 S
+R Z 2006 o - O 1 2 0 S
+R Z 2007 o - S 16 2 0 S
+R Z 2008 o - O 5 2 0 S
+R Z 2009 o - S 27 2 0 S
+R Z 2010 o - S 12 2 0 S
+R Z 2011 o - O 2 2 0 S
+R Z 2012 o - S 23 2 0 S
+R Z 2013 ma - Mar F>=23 2 1 D
+R Z 2013 ma - O lastSu 2 0 S
+Z Asia/Jerusalem 2:20:54 - LMT 1880
+2:20:40 - JMT 1918
+2 Z I%sT
+R JP 1948 o - May Sa>=1 24 1 D
+R JP 1948 1951 - S Sa>=8 25 0 S
+R JP 1949 o - Ap Sa>=1 24 1 D
+R JP 1950 1951 - May Sa>=1 24 1 D
+Z Asia/Tokyo 9:18:59 - LMT 1887 D 31 15u
+9 JP J%sT
+R J 1973 o - Jun 6 0 1 S
+R J 1973 1975 - O 1 0 0 -
+R J 1974 1977 - May 1 0 1 S
+R J 1976 o - N 1 0 0 -
+R J 1977 o - O 1 0 0 -
+R J 1978 o - Ap 30 0 1 S
+R J 1978 o - S 30 0 0 -
+R J 1985 o - Ap 1 0 1 S
+R J 1985 o - O 1 0 0 -
+R J 1986 1988 - Ap F>=1 0 1 S
+R J 1986 1990 - O F>=1 0 0 -
+R J 1989 o - May 8 0 1 S
+R J 1990 o - Ap 27 0 1 S
+R J 1991 o - Ap 17 0 1 S
+R J 1991 o - S 27 0 0 -
+R J 1992 o - Ap 10 0 1 S
+R J 1992 1993 - O F>=1 0 0 -
+R J 1993 1998 - Ap F>=1 0 1 S
+R J 1994 o - S F>=15 0 0 -
+R J 1995 1998 - S F>=15 0s 0 -
+R J 1999 o - Jul 1 0s 1 S
+R J 1999 2002 - S lastF 0s 0 -
+R J 2000 2001 - Mar lastTh 0s 1 S
+R J 2002 2012 - Mar lastTh 24 1 S
+R J 2003 o - O 24 0s 0 -
+R J 2004 o - O 15 0s 0 -
+R J 2005 o - S lastF 0s 0 -
+R J 2006 2011 - O lastF 0s 0 -
+R J 2013 o - D 20 0 0 -
+R J 2014 2021 - Mar lastTh 24 1 S
+R J 2014 ma - O lastF 0s 0 -
+R J 2022 ma - F lastTh 24 1 S
+Z Asia/Amman 2:23:44 - LMT 1931
+2 J EE%sT
+Z Asia/Almaty 5:7:48 - LMT 1924 May 2
+5 - +05 1930 Jun 21
+6 R +06/+07 1991 Mar 31 2s
+5 R +05/+06 1992 Ja 19 2s
+6 R +06/+07 2004 O 31 2s
+6 - +06
+Z Asia/Qyzylorda 4:21:52 - LMT 1924 May 2
+4 - +04 1930 Jun 21
+5 - +05 1981 Ap
+5 1 +06 1981 O
+6 - +06 1982 Ap
+5 R +05/+06 1991 Mar 31 2s
+4 R +04/+05 1991 S 29 2s
+5 R +05/+06 1992 Ja 19 2s
+6 R +06/+07 1992 Mar 29 2s
+5 R +05/+06 2004 O 31 2s
+6 - +06 2018 D 21
+5 - +05
+Z Asia/Qostanay 4:14:28 - LMT 1924 May 2
+4 - +04 1930 Jun 21
+5 - +05 1981 Ap
+5 1 +06 1981 O
+6 - +06 1982 Ap
+5 R +05/+06 1991 Mar 31 2s
+4 R +04/+05 1992 Ja 19 2s
+5 R +05/+06 2004 O 31 2s
+6 - +06
+Z Asia/Aqtobe 3:48:40 - LMT 1924 May 2
+4 - +04 1930 Jun 21
+5 - +05 1981 Ap
+5 1 +06 1981 O
+6 - +06 1982 Ap
+5 R +05/+06 1991 Mar 31 2s
+4 R +04/+05 1992 Ja 19 2s
+5 R +05/+06 2004 O 31 2s
+5 - +05
+Z Asia/Aqtau 3:21:4 - LMT 1924 May 2
+4 - +04 1930 Jun 21
+5 - +05 1981 O
+6 - +06 1982 Ap
+5 R +05/+06 1991 Mar 31 2s
+4 R +04/+05 1992 Ja 19 2s
+5 R +05/+06 1994 S 25 2s
+4 R +04/+05 2004 O 31 2s
+5 - +05
+Z Asia/Atyrau 3:27:44 - LMT 1924 May 2
+3 - +03 1930 Jun 21
+5 - +05 1981 O
+6 - +06 1982 Ap
+5 R +05/+06 1991 Mar 31 2s
+4 R +04/+05 1992 Ja 19 2s
+5 R +05/+06 1999 Mar 28 2s
+4 R +04/+05 2004 O 31 2s
+5 - +05
+Z Asia/Oral 3:25:24 - LMT 1924 May 2
+3 - +03 1930 Jun 21
+5 - +05 1981 Ap
+5 1 +06 1981 O
+6 - +06 1982 Ap
+5 R +05/+06 1989 Mar 26 2s
+4 R +04/+05 1992 Ja 19 2s
+5 R +05/+06 1992 Mar 29 2s
+4 R +04/+05 2004 O 31 2s
+5 - +05
+R KG 1992 1996 - Ap Su>=7 0s 1 -
+R KG 1992 1996 - S lastSu 0 0 -
+R KG 1997 2005 - Mar lastSu 2:30 1 -
+R KG 1997 2004 - O lastSu 2:30 0 -
+Z Asia/Bishkek 4:58:24 - LMT 1924 May 2
+5 - +05 1930 Jun 21
+6 R +06/+07 1991 Mar 31 2s
+5 R +05/+06 1991 Au 31 2
+5 KG +05/+06 2005 Au 12
+6 - +06
+R KR 1948 o - Jun 1 0 1 D
+R KR 1948 o - S 12 24 0 S
+R KR 1949 o - Ap 3 0 1 D
+R KR 1949 1951 - S Sa>=7 24 0 S
+R KR 1950 o - Ap 1 0 1 D
+R KR 1951 o - May 6 0 1 D
+R KR 1955 o - May 5 0 1 D
+R KR 1955 o - S 8 24 0 S
+R KR 1956 o - May 20 0 1 D
+R KR 1956 o - S 29 24 0 S
+R KR 1957 1960 - May Su>=1 0 1 D
+R KR 1957 1960 - S Sa>=17 24 0 S
+R KR 1987 1988 - May Su>=8 2 1 D
+R KR 1987 1988 - O Su>=8 3 0 S
+Z Asia/Seoul 8:27:52 - LMT 1908 Ap
+8:30 - KST 1912
+9 - JST 1945 S 8
+9 KR K%sT 1954 Mar 21
+8:30 KR K%sT 1961 Au 10
+9 KR K%sT
+Z Asia/Pyongyang 8:23 - LMT 1908 Ap
+8:30 - KST 1912
+9 - JST 1945 Au 24
+9 - KST 2015 Au 15
+8:30 - KST 2018 May 4 23:30
+9 - KST
+R l 1920 o - Mar 28 0 1 S
+R l 1920 o - O 25 0 0 -
+R l 1921 o - Ap 3 0 1 S
+R l 1921 o - O 3 0 0 -
+R l 1922 o - Mar 26 0 1 S
+R l 1922 o - O 8 0 0 -
+R l 1923 o - Ap 22 0 1 S
+R l 1923 o - S 16 0 0 -
+R l 1957 1961 - May 1 0 1 S
+R l 1957 1961 - O 1 0 0 -
+R l 1972 o - Jun 22 0 1 S
+R l 1972 1977 - O 1 0 0 -
+R l 1973 1977 - May 1 0 1 S
+R l 1978 o - Ap 30 0 1 S
+R l 1978 o - S 30 0 0 -
+R l 1984 1987 - May 1 0 1 S
+R l 1984 1991 - O 16 0 0 -
+R l 1988 o - Jun 1 0 1 S
+R l 1989 o - May 10 0 1 S
+R l 1990 1992 - May 1 0 1 S
+R l 1992 o - O 4 0 0 -
+R l 1993 ma - Mar lastSu 0 1 S
+R l 1993 1998 - S lastSu 0 0 -
+R l 1999 ma - O lastSu 0 0 -
+Z Asia/Beirut 2:22 - LMT 1880
+2 l EE%sT
+R NB 1935 1941 - S 14 0 0:20 -
+R NB 1935 1941 - D 14 0 0 -
+Z Asia/Kuala_Lumpur 6:46:46 - LMT 1901
+6:55:25 - SMT 1905 Jun
+7 - +07 1933
+7 0:20 +0720 1936
+7:20 - +0720 1941 S
+7:30 - +0730 1942 F 16
+9 - +09 1945 S 12
+7:30 - +0730 1982
+8 - +08
+Z Asia/Kuching 7:21:20 - LMT 1926 Mar
+7:30 - +0730 1933
+8 NB +08/+0820 1942 F 16
+9 - +09 1945 S 12
+8 - +08
+Z Indian/Maldives 4:54 - LMT 1880
+4:54 - MMT 1960
+5 - +05
+R X 1983 1984 - Ap 1 0 1 -
+R X 1983 o - O 1 0 0 -
+R X 1985 1998 - Mar lastSu 0 1 -
+R X 1984 1998 - S lastSu 0 0 -
+R X 2001 o - Ap lastSa 2 1 -
+R X 2001 2006 - S lastSa 2 0 -
+R X 2002 2006 - Mar lastSa 2 1 -
+R X 2015 2016 - Mar lastSa 2 1 -
+R X 2015 2016 - S lastSa 0 0 -
+Z Asia/Hovd 6:6:36 - LMT 1905 Au
+6 - +06 1978
+7 X +07/+08
+Z Asia/Ulaanbaatar 7:7:32 - LMT 1905 Au
+7 - +07 1978
+8 X +08/+09
+Z Asia/Choibalsan 7:38 - LMT 1905 Au
+7 - +07 1978
+8 - +08 1983 Ap
+9 X +09/+10 2008 Mar 31
+8 X +08/+09
+Z Asia/Kathmandu 5:41:16 - LMT 1920
+5:30 - +0530 1986
+5:45 - +0545
+R PK 2002 o - Ap Su>=2 0 1 S
+R PK 2002 o - O Su>=2 0 0 -
+R PK 2008 o - Jun 1 0 1 S
+R PK 2008 2009 - N 1 0 0 -
+R PK 2009 o - Ap 15 0 1 S
+Z Asia/Karachi 4:28:12 - LMT 1907
+5:30 - +0530 1942 S
+5:30 1 +0630 1945 O 15
+5:30 - +0530 1951 S 30
+5 - +05 1971 Mar 26
+5 PK PK%sT
+R P 1999 2005 - Ap F>=15 0 1 S
+R P 1999 2003 - O F>=15 0 0 -
+R P 2004 o - O 1 1 0 -
+R P 2005 o - O 4 2 0 -
+R P 2006 2007 - Ap 1 0 1 S
+R P 2006 o - S 22 0 0 -
+R P 2007 o - S 13 2 0 -
+R P 2008 2009 - Mar lastF 0 1 S
+R P 2008 o - S 1 0 0 -
+R P 2009 o - S 4 1 0 -
+R P 2010 o - Mar 26 0 1 S
+R P 2010 o - Au 11 0 0 -
+R P 2011 o - Ap 1 0:1 1 S
+R P 2011 o - Au 1 0 0 -
+R P 2011 o - Au 30 0 1 S
+R P 2011 o - S 30 0 0 -
+R P 2012 2014 - Mar lastTh 24 1 S
+R P 2012 o - S 21 1 0 -
+R P 2013 o - S 27 0 0 -
+R P 2014 o - O 24 0 0 -
+R P 2015 o - Mar 28 0 1 S
+R P 2015 o - O 23 1 0 -
+R P 2016 2018 - Mar Sa>=24 1 1 S
+R P 2016 2018 - O Sa>=24 1 0 -
+R P 2019 o - Mar 29 0 1 S
+R P 2019 o - O Sa>=24 0 0 -
+R P 2020 2021 - Mar Sa>=24 0 1 S
+R P 2020 o - O 24 1 0 -
+R P 2021 ma - O F>=23 1 0 -
+R P 2022 ma - Mar Su>=25 0 1 S
+Z Asia/Gaza 2:17:52 - LMT 1900 O
+2 Z EET/EEST 1948 May 15
+2 K EE%sT 1967 Jun 5
+2 Z I%sT 1996
+2 J EE%sT 1999
+2 P EE%sT 2008 Au 29
+2 - EET 2008 S
+2 P EE%sT 2010
+2 - EET 2010 Mar 27 0:1
+2 P EE%sT 2011 Au
+2 - EET 2012
+2 P EE%sT
+Z Asia/Hebron 2:20:23 - LMT 1900 O
+2 Z EET/EEST 1948 May 15
+2 K EE%sT 1967 Jun 5
+2 Z I%sT 1996
+2 J EE%sT 1999
+2 P EE%sT
+R PH 1936 o - N 1 0 1 D
+R PH 1937 o - F 1 0 0 S
+R PH 1954 o - Ap 12 0 1 D
+R PH 1954 o - Jul 1 0 0 S
+R PH 1978 o - Mar 22 0 1 D
+R PH 1978 o - S 21 0 0 S
+Z Asia/Manila -15:56 - LMT 1844 D 31
+8:4 - LMT 1899 May 11
+8 PH P%sT 1942 May
+9 - JST 1944 N
+8 PH P%sT
+Z Asia/Qatar 3:26:8 - LMT 1920
+4 - +04 1972 Jun
+3 - +03
+L Asia/Qatar Asia/Bahrain
+Z Asia/Riyadh 3:6:52 - LMT 1947 Mar 14
+3 - +03
+L Asia/Riyadh Antarctica/Syowa
+L Asia/Riyadh Asia/Aden
+L Asia/Riyadh Asia/Kuwait
+Z Asia/Singapore 6:55:25 - LMT 1901
+6:55:25 - SMT 1905 Jun
+7 - +07 1933
+7 0:20 +0720 1936
+7:20 - +0720 1941 S
+7:30 - +0730 1942 F 16
+9 - +09 1945 S 12
+7:30 - +0730 1982
+8 - +08
+Z Asia/Colombo 5:19:24 - LMT 1880
+5:19:32 - MMT 1906
+5:30 - +0530 1942 Ja 5
+5:30 0:30 +06 1942 S
+5:30 1 +0630 1945 O 16 2
+5:30 - +0530 1996 May 25
+6:30 - +0630 1996 O 26 0:30
+6 - +06 2006 Ap 15 0:30
+5:30 - +0530
+R S 1920 1923 - Ap Su>=15 2 1 S
+R S 1920 1923 - O Su>=1 2 0 -
+R S 1962 o - Ap 29 2 1 S
+R S 1962 o - O 1 2 0 -
+R S 1963 1965 - May 1 2 1 S
+R S 1963 o - S 30 2 0 -
+R S 1964 o - O 1 2 0 -
+R S 1965 o - S 30 2 0 -
+R S 1966 o - Ap 24 2 1 S
+R S 1966 1976 - O 1 2 0 -
+R S 1967 1978 - May 1 2 1 S
+R S 1977 1978 - S 1 2 0 -
+R S 1983 1984 - Ap 9 2 1 S
+R S 1983 1984 - O 1 2 0 -
+R S 1986 o - F 16 2 1 S
+R S 1986 o - O 9 2 0 -
+R S 1987 o - Mar 1 2 1 S
+R S 1987 1988 - O 31 2 0 -
+R S 1988 o - Mar 15 2 1 S
+R S 1989 o - Mar 31 2 1 S
+R S 1989 o - O 1 2 0 -
+R S 1990 o - Ap 1 2 1 S
+R S 1990 o - S 30 2 0 -
+R S 1991 o - Ap 1 0 1 S
+R S 1991 1992 - O 1 0 0 -
+R S 1992 o - Ap 8 0 1 S
+R S 1993 o - Mar 26 0 1 S
+R S 1993 o - S 25 0 0 -
+R S 1994 1996 - Ap 1 0 1 S
+R S 1994 2005 - O 1 0 0 -
+R S 1997 1998 - Mar lastM 0 1 S
+R S 1999 2006 - Ap 1 0 1 S
+R S 2006 o - S 22 0 0 -
+R S 2007 o - Mar lastF 0 1 S
+R S 2007 o - N F>=1 0 0 -
+R S 2008 o - Ap F>=1 0 1 S
+R S 2008 o - N 1 0 0 -
+R S 2009 o - Mar lastF 0 1 S
+R S 2010 2011 - Ap F>=1 0 1 S
+R S 2012 ma - Mar lastF 0 1 S
+R S 2009 ma - O lastF 0 0 -
+Z Asia/Damascus 2:25:12 - LMT 1920
+2 S EE%sT
+Z Asia/Dushanbe 4:35:12 - LMT 1924 May 2
+5 - +05 1930 Jun 21
+6 R +06/+07 1991 Mar 31 2s
+5 1 +05/+06 1991 S 9 2s
+5 - +05
+Z Asia/Bangkok 6:42:4 - LMT 1880
+6:42:4 - BMT 1920 Ap
+7 - +07
+L Asia/Bangkok Asia/Phnom_Penh
+L Asia/Bangkok Asia/Vientiane
+Z Asia/Ashgabat 3:53:32 - LMT 1924 May 2
+4 - +04 1930 Jun 21
+5 R +05/+06 1991 Mar 31 2
+4 R +04/+05 1992 Ja 19 2
+5 - +05
+Z Asia/Dubai 3:41:12 - LMT 1920
+4 - +04
+L Asia/Dubai Asia/Muscat
+Z Asia/Samarkand 4:27:53 - LMT 1924 May 2
+4 - +04 1930 Jun 21
+5 - +05 1981 Ap
+5 1 +06 1981 O
+6 - +06 1982 Ap
+5 R +05/+06 1992
+5 - +05
+Z Asia/Tashkent 4:37:11 - LMT 1924 May 2
+5 - +05 1930 Jun 21
+6 R +06/+07 1991 Mar 31 2
+5 R +05/+06 1992
+5 - +05
+Z Asia/Ho_Chi_Minh 7:6:40 - LMT 1906 Jul
+7:6:30 - PLMT 1911 May
+7 - +07 1942 D 31 23
+8 - +08 1945 Mar 14 23
+9 - +09 1945 S 2
+7 - +07 1947 Ap
+8 - +08 1955 Jul
+7 - +07 1959 D 31 23
+8 - +08 1975 Jun 13
+7 - +07
+R AU 1917 o - Ja 1 2s 1 D
+R AU 1917 o - Mar lastSu 2s 0 S
+R AU 1942 o - Ja 1 2s 1 D
+R AU 1942 o - Mar lastSu 2s 0 S
+R AU 1942 o - S 27 2s 1 D
+R AU 1943 1944 - Mar lastSu 2s 0 S
+R AU 1943 o - O 3 2s 1 D
+Z Australia/Darwin 8:43:20 - LMT 1895 F
+9 - ACST 1899 May
+9:30 AU AC%sT
+R AW 1974 o - O lastSu 2s 1 D
+R AW 1975 o - Mar Su>=1 2s 0 S
+R AW 1983 o - O lastSu 2s 1 D
+R AW 1984 o - Mar Su>=1 2s 0 S
+R AW 1991 o - N 17 2s 1 D
+R AW 1992 o - Mar Su>=1 2s 0 S
+R AW 2006 o - D 3 2s 1 D
+R AW 2007 2009 - Mar lastSu 2s 0 S
+R AW 2007 2008 - O lastSu 2s 1 D
+Z Australia/Perth 7:43:24 - LMT 1895 D
+8 AU AW%sT 1943 Jul
+8 AW AW%sT
+Z Australia/Eucla 8:35:28 - LMT 1895 D
+8:45 AU +0845/+0945 1943 Jul
+8:45 AW +0845/+0945
+R AQ 1971 o - O lastSu 2s 1 D
+R AQ 1972 o - F lastSu 2s 0 S
+R AQ 1989 1991 - O lastSu 2s 1 D
+R AQ 1990 1992 - Mar Su>=1 2s 0 S
+R Ho 1992 1993 - O lastSu 2s 1 D
+R Ho 1993 1994 - Mar Su>=1 2s 0 S
+Z Australia/Brisbane 10:12:8 - LMT 1895
+10 AU AE%sT 1971
+10 AQ AE%sT
+Z Australia/Lindeman 9:55:56 - LMT 1895
+10 AU AE%sT 1971
+10 AQ AE%sT 1992 Jul
+10 Ho AE%sT
+R AS 1971 1985 - O lastSu 2s 1 D
+R AS 1986 o - O 19 2s 1 D
+R AS 1987 2007 - O lastSu 2s 1 D
+R AS 1972 o - F 27 2s 0 S
+R AS 1973 1985 - Mar Su>=1 2s 0 S
+R AS 1986 1990 - Mar Su>=15 2s 0 S
+R AS 1991 o - Mar 3 2s 0 S
+R AS 1992 o - Mar 22 2s 0 S
+R AS 1993 o - Mar 7 2s 0 S
+R AS 1994 o - Mar 20 2s 0 S
+R AS 1995 2005 - Mar lastSu 2s 0 S
+R AS 2006 o - Ap 2 2s 0 S
+R AS 2007 o - Mar lastSu 2s 0 S
+R AS 2008 ma - Ap Su>=1 2s 0 S
+R AS 2008 ma - O Su>=1 2s 1 D
+Z Australia/Adelaide 9:14:20 - LMT 1895 F
+9 - ACST 1899 May
+9:30 AU AC%sT 1971
+9:30 AS AC%sT
+R AT 1916 o - O Su>=1 2s 1 D
+R AT 1917 o - Mar lastSu 2s 0 S
+R AT 1917 1918 - O Su>=22 2s 1 D
+R AT 1918 1919 - Mar Su>=1 2s 0 S
+R AT 1967 o - O Su>=1 2s 1 D
+R AT 1968 o - Mar Su>=29 2s 0 S
+R AT 1968 1985 - O lastSu 2s 1 D
+R AT 1969 1971 - Mar Su>=8 2s 0 S
+R AT 1972 o - F lastSu 2s 0 S
+R AT 1973 1981 - Mar Su>=1 2s 0 S
+R AT 1982 1983 - Mar lastSu 2s 0 S
+R AT 1984 1986 - Mar Su>=1 2s 0 S
+R AT 1986 o - O Su>=15 2s 1 D
+R AT 1987 1990 - Mar Su>=15 2s 0 S
+R AT 1987 o - O Su>=22 2s 1 D
+R AT 1988 1990 - O lastSu 2s 1 D
+R AT 1991 1999 - O Su>=1 2s 1 D
+R AT 1991 2005 - Mar lastSu 2s 0 S
+R AT 2000 o - Au lastSu 2s 1 D
+R AT 2001 ma - O Su>=1 2s 1 D
+R AT 2006 o - Ap Su>=1 2s 0 S
+R AT 2007 o - Mar lastSu 2s 0 S
+R AT 2008 ma - Ap Su>=1 2s 0 S
+Z Australia/Hobart 9:49:16 - LMT 1895 S
+10 AT AE%sT 1919 O 24
+10 AU AE%sT 1967
+10 AT AE%sT
+R AV 1971 1985 - O lastSu 2s 1 D
+R AV 1972 o - F lastSu 2s 0 S
+R AV 1973 1985 - Mar Su>=1 2s 0 S
+R AV 1986 1990 - Mar Su>=15 2s 0 S
+R AV 1986 1987 - O Su>=15 2s 1 D
+R AV 1988 1999 - O lastSu 2s 1 D
+R AV 1991 1994 - Mar Su>=1 2s 0 S
+R AV 1995 2005 - Mar lastSu 2s 0 S
+R AV 2000 o - Au lastSu 2s 1 D
+R AV 2001 2007 - O lastSu 2s 1 D
+R AV 2006 o - Ap Su>=1 2s 0 S
+R AV 2007 o - Mar lastSu 2s 0 S
+R AV 2008 ma - Ap Su>=1 2s 0 S
+R AV 2008 ma - O Su>=1 2s 1 D
+Z Australia/Melbourne 9:39:52 - LMT 1895 F
+10 AU AE%sT 1971
+10 AV AE%sT
+R AN 1971 1985 - O lastSu 2s 1 D
+R AN 1972 o - F 27 2s 0 S
+R AN 1973 1981 - Mar Su>=1 2s 0 S
+R AN 1982 o - Ap Su>=1 2s 0 S
+R AN 1983 1985 - Mar Su>=1 2s 0 S
+R AN 1986 1989 - Mar Su>=15 2s 0 S
+R AN 1986 o - O 19 2s 1 D
+R AN 1987 1999 - O lastSu 2s 1 D
+R AN 1990 1995 - Mar Su>=1 2s 0 S
+R AN 1996 2005 - Mar lastSu 2s 0 S
+R AN 2000 o - Au lastSu 2s 1 D
+R AN 2001 2007 - O lastSu 2s 1 D
+R AN 2006 o - Ap Su>=1 2s 0 S
+R AN 2007 o - Mar lastSu 2s 0 S
+R AN 2008 ma - Ap Su>=1 2s 0 S
+R AN 2008 ma - O Su>=1 2s 1 D
+Z Australia/Sydney 10:4:52 - LMT 1895 F
+10 AU AE%sT 1971
+10 AN AE%sT
+Z Australia/Broken_Hill 9:25:48 - LMT 1895 F
+10 - AEST 1896 Au 23
+9 - ACST 1899 May
+9:30 AU AC%sT 1971
+9:30 AN AC%sT 2000
+9:30 AS AC%sT
+R LH 1981 1984 - O lastSu 2 1 -
+R LH 1982 1985 - Mar Su>=1 2 0 -
+R LH 1985 o - O lastSu 2 0:30 -
+R LH 1986 1989 - Mar Su>=15 2 0 -
+R LH 1986 o - O 19 2 0:30 -
+R LH 1987 1999 - O lastSu 2 0:30 -
+R LH 1990 1995 - Mar Su>=1 2 0 -
+R LH 1996 2005 - Mar lastSu 2 0 -
+R LH 2000 o - Au lastSu 2 0:30 -
+R LH 2001 2007 - O lastSu 2 0:30 -
+R LH 2006 o - Ap Su>=1 2 0 -
+R LH 2007 o - Mar lastSu 2 0 -
+R LH 2008 ma - Ap Su>=1 2 0 -
+R LH 2008 ma - O Su>=1 2 0:30 -
+Z Australia/Lord_Howe 10:36:20 - LMT 1895 F
+10 - AEST 1981 Mar
+10:30 LH +1030/+1130 1985 Jul
+10:30 LH +1030/+11
+Z Antarctica/Macquarie 0 - -00 1899 N
+10 - AEST 1916 O 1 2
+10 1 AEDT 1917 F
+10 AU AE%sT 1919 Ap 1 0s
+0 - -00 1948 Mar 25
+10 AU AE%sT 1967
+10 AT AE%sT 2010
+10 1 AEDT 2011
+10 AT AE%sT
+Z Indian/Christmas 7:2:52 - LMT 1895 F
+7 - +07
+Z Indian/Cocos 6:27:40 - LMT 1900
+6:30 - +0630
+R FJ 1998 1999 - N Su>=1 2 1 -
+R FJ 1999 2000 - F lastSu 3 0 -
+R FJ 2009 o - N 29 2 1 -
+R FJ 2010 o - Mar lastSu 3 0 -
+R FJ 2010 2013 - O Su>=21 2 1 -
+R FJ 2011 o - Mar Su>=1 3 0 -
+R FJ 2012 2013 - Ja Su>=18 3 0 -
+R FJ 2014 o - Ja Su>=18 2 0 -
+R FJ 2014 2018 - N Su>=1 2 1 -
+R FJ 2015 2021 - Ja Su>=12 3 0 -
+R FJ 2019 o - N Su>=8 2 1 -
+R FJ 2020 o - D 20 2 1 -
+R FJ 2022 ma - N Su>=8 2 1 -
+R FJ 2023 ma - Ja Su>=12 3 0 -
+Z Pacific/Fiji 11:55:44 - LMT 1915 O 26
+12 FJ +12/+13
+Z Pacific/Gambier -8:59:48 - LMT 1912 O
+-9 - -09
+Z Pacific/Marquesas -9:18 - LMT 1912 O
+-9:30 - -0930
+Z Pacific/Tahiti -9:58:16 - LMT 1912 O
+-10 - -10
+R Gu 1959 o - Jun 27 2 1 D
+R Gu 1961 o - Ja 29 2 0 S
+R Gu 1967 o - S 1 2 1 D
+R Gu 1969 o - Ja 26 0:1 0 S
+R Gu 1969 o - Jun 22 2 1 D
+R Gu 1969 o - Au 31 2 0 S
+R Gu 1970 1971 - Ap lastSu 2 1 D
+R Gu 1970 1971 - S Su>=1 2 0 S
+R Gu 1973 o - D 16 2 1 D
+R Gu 1974 o - F 24 2 0 S
+R Gu 1976 o - May 26 2 1 D
+R Gu 1976 o - Au 22 2:1 0 S
+R Gu 1977 o - Ap 24 2 1 D
+R Gu 1977 o - Au 28 2 0 S
+Z Pacific/Guam -14:21 - LMT 1844 D 31
+9:39 - LMT 1901
+10 - GST 1941 D 10
+9 - +09 1944 Jul 31
+10 Gu G%sT 2000 D 23
+10 - ChST
+L Pacific/Guam Pacific/Saipan
+Z Pacific/Tarawa 11:32:4 - LMT 1901
+12 - +12
+Z Pacific/Kanton 0 - -00 1937 Au 31
+-12 - -12 1979 O
+-11 - -11 1994 D 31
+13 - +13
+Z Pacific/Kiritimati -10:29:20 - LMT 1901
+-10:40 - -1040 1979 O
+-10 - -10 1994 D 31
+14 - +14
+Z Pacific/Majuro 11:24:48 - LMT 1901
+11 - +11 1914 O
+9 - +09 1919 F
+11 - +11 1937
+10 - +10 1941 Ap
+9 - +09 1944 Ja 30
+11 - +11 1969 O
+12 - +12
+Z Pacific/Kwajalein 11:9:20 - LMT 1901
+11 - +11 1937
+10 - +10 1941 Ap
+9 - +09 1944 F 6
+11 - +11 1969 O
+-12 - -12 1993 Au 20 24
+12 - +12
+Z Pacific/Chuuk -13:52:52 - LMT 1844 D 31
+10:7:8 - LMT 1901
+10 - +10 1914 O
+9 - +09 1919 F
+10 - +10 1941 Ap
+9 - +09 1945 Au
+10 - +10
+Z Pacific/Pohnpei -13:27:8 - LMT 1844 D 31
+10:32:52 - LMT 1901
+11 - +11 1914 O
+9 - +09 1919 F
+11 - +11 1937
+10 - +10 1941 Ap
+9 - +09 1945 Au
+11 - +11
+Z Pacific/Kosrae -13:8:4 - LMT 1844 D 31
+10:51:56 - LMT 1901
+11 - +11 1914 O
+9 - +09 1919 F
+11 - +11 1937
+10 - +10 1941 Ap
+9 - +09 1945 Au
+11 - +11 1969 O
+12 - +12 1999
+11 - +11
+Z Pacific/Nauru 11:7:40 - LMT 1921 Ja 15
+11:30 - +1130 1942 Au 29
+9 - +09 1945 S 8
+11:30 - +1130 1979 F 10 2
+12 - +12
+R NC 1977 1978 - D Su>=1 0 1 -
+R NC 1978 1979 - F 27 0 0 -
+R NC 1996 o - D 1 2s 1 -
+R NC 1997 o - Mar 2 2s 0 -
+Z Pacific/Noumea 11:5:48 - LMT 1912 Ja 13
+11 NC +11/+12
+R NZ 1927 o - N 6 2 1 S
+R NZ 1928 o - Mar 4 2 0 M
+R NZ 1928 1933 - O Su>=8 2 0:30 S
+R NZ 1929 1933 - Mar Su>=15 2 0 M
+R NZ 1934 1940 - Ap lastSu 2 0 M
+R NZ 1934 1940 - S lastSu 2 0:30 S
+R NZ 1946 o - Ja 1 0 0 S
+R NZ 1974 o - N Su>=1 2s 1 D
+R k 1974 o - N Su>=1 2:45s 1 -
+R NZ 1975 o - F lastSu 2s 0 S
+R k 1975 o - F lastSu 2:45s 0 -
+R NZ 1975 1988 - O lastSu 2s 1 D
+R k 1975 1988 - O lastSu 2:45s 1 -
+R NZ 1976 1989 - Mar Su>=1 2s 0 S
+R k 1976 1989 - Mar Su>=1 2:45s 0 -
+R NZ 1989 o - O Su>=8 2s 1 D
+R k 1989 o - O Su>=8 2:45s 1 -
+R NZ 1990 2006 - O Su>=1 2s 1 D
+R k 1990 2006 - O Su>=1 2:45s 1 -
+R NZ 1990 2007 - Mar Su>=15 2s 0 S
+R k 1990 2007 - Mar Su>=15 2:45s 0 -
+R NZ 2007 ma - S lastSu 2s 1 D
+R k 2007 ma - S lastSu 2:45s 1 -
+R NZ 2008 ma - Ap Su>=1 2s 0 S
+R k 2008 ma - Ap Su>=1 2:45s 0 -
+Z Pacific/Auckland 11:39:4 - LMT 1868 N 2
+11:30 NZ NZ%sT 1946
+12 NZ NZ%sT
+Z Pacific/Chatham 12:13:48 - LMT 1868 N 2
+12:15 - +1215 1946
+12:45 k +1245/+1345
+L Pacific/Auckland Antarctica/McMurdo
+R CK 1978 o - N 12 0 0:30 -
+R CK 1979 1991 - Mar Su>=1 0 0 -
+R CK 1979 1990 - O lastSu 0 0:30 -
+Z Pacific/Rarotonga 13:20:56 - LMT 1899 D 26
+-10:39:4 - LMT 1952 O 16
+-10:30 - -1030 1978 N 12
+-10 CK -10/-0930
+Z Pacific/Niue -11:19:40 - LMT 1952 O 16
+-11:20 - -1120 1964 Jul
+-11 - -11
+Z Pacific/Norfolk 11:11:52 - LMT 1901
+11:12 - +1112 1951
+11:30 - +1130 1974 O 27 2s
+11:30 1 +1230 1975 Mar 2 2s
+11:30 - +1130 2015 O 4 2s
+11 - +11 2019 Jul
+11 AN +11/+12
+Z Pacific/Palau -15:2:4 - LMT 1844 D 31
+8:57:56 - LMT 1901
+9 - +09
+Z Pacific/Port_Moresby 9:48:40 - LMT 1880
+9:48:32 - PMMT 1895
+10 - +10
+L Pacific/Port_Moresby Antarctica/DumontDUrville
+Z Pacific/Bougainville 10:22:16 - LMT 1880
+9:48:32 - PMMT 1895
+10 - +10 1942 Jul
+9 - +09 1945 Au 21
+10 - +10 2014 D 28 2
+11 - +11
+Z Pacific/Pitcairn -8:40:20 - LMT 1901
+-8:30 - -0830 1998 Ap 27
+-8 - -08
+Z Pacific/Pago_Pago 12:37:12 - LMT 1892 Jul 5
+-11:22:48 - LMT 1911
+-11 - SST
+L Pacific/Pago_Pago Pacific/Midway
+R WS 2010 o - S lastSu 0 1 -
+R WS 2011 o - Ap Sa>=1 4 0 -
+R WS 2011 o - S lastSa 3 1 -
+R WS 2012 2021 - Ap Su>=1 4 0 -
+R WS 2012 2020 - S lastSu 3 1 -
+Z Pacific/Apia 12:33:4 - LMT 1892 Jul 5
+-11:26:56 - LMT 1911
+-11:30 - -1130 1950
+-11 WS -11/-10 2011 D 29 24
+13 WS +13/+14
+Z Pacific/Guadalcanal 10:39:48 - LMT 1912 O
+11 - +11
+Z Pacific/Fakaofo -11:24:56 - LMT 1901
+-11 - -11 2011 D 30
+13 - +13
+R TO 1999 o - O 7 2s 1 -
+R TO 2000 o - Mar 19 2s 0 -
+R TO 2000 2001 - N Su>=1 2 1 -
+R TO 2001 2002 - Ja lastSu 2 0 -
+R TO 2016 o - N Su>=1 2 1 -
+R TO 2017 o - Ja Su>=15 3 0 -
+Z Pacific/Tongatapu 12:19:12 - LMT 1945 S 10
+12:20 - +1220 1961
+13 - +13 1999
+13 TO +13/+14
+Z Pacific/Funafuti 11:56:52 - LMT 1901
+12 - +12
+Z Pacific/Wake 11:6:28 - LMT 1901
+12 - +12
+R VU 1973 o - D 22 12u 1 -
+R VU 1974 o - Mar 30 12u 0 -
+R VU 1983 1991 - S Sa>=22 24 1 -
+R VU 1984 1991 - Mar Sa>=22 24 0 -
+R VU 1992 1993 - Ja Sa>=22 24 0 -
+R VU 1992 o - O Sa>=22 24 1 -
+Z Pacific/Efate 11:13:16 - LMT 1912 Ja 13
+11 VU +11/+12
+Z Pacific/Wallis 12:15:20 - LMT 1901
+12 - +12
+R G 1916 o - May 21 2s 1 BST
+R G 1916 o - O 1 2s 0 GMT
+R G 1917 o - Ap 8 2s 1 BST
+R G 1917 o - S 17 2s 0 GMT
+R G 1918 o - Mar 24 2s 1 BST
+R G 1918 o - S 30 2s 0 GMT
+R G 1919 o - Mar 30 2s 1 BST
+R G 1919 o - S 29 2s 0 GMT
+R G 1920 o - Mar 28 2s 1 BST
+R G 1920 o - O 25 2s 0 GMT
+R G 1921 o - Ap 3 2s 1 BST
+R G 1921 o - O 3 2s 0 GMT
+R G 1922 o - Mar 26 2s 1 BST
+R G 1922 o - O 8 2s 0 GMT
+R G 1923 o - Ap Su>=16 2s 1 BST
+R G 1923 1924 - S Su>=16 2s 0 GMT
+R G 1924 o - Ap Su>=9 2s 1 BST
+R G 1925 1926 - Ap Su>=16 2s 1 BST
+R G 1925 1938 - O Su>=2 2s 0 GMT
+R G 1927 o - Ap Su>=9 2s 1 BST
+R G 1928 1929 - Ap Su>=16 2s 1 BST
+R G 1930 o - Ap Su>=9 2s 1 BST
+R G 1931 1932 - Ap Su>=16 2s 1 BST
+R G 1933 o - Ap Su>=9 2s 1 BST
+R G 1934 o - Ap Su>=16 2s 1 BST
+R G 1935 o - Ap Su>=9 2s 1 BST
+R G 1936 1937 - Ap Su>=16 2s 1 BST
+R G 1938 o - Ap Su>=9 2s 1 BST
+R G 1939 o - Ap Su>=16 2s 1 BST
+R G 1939 o - N Su>=16 2s 0 GMT
+R G 1940 o - F Su>=23 2s 1 BST
+R G 1941 o - May Su>=2 1s 2 BDST
+R G 1941 1943 - Au Su>=9 1s 1 BST
+R G 1942 1944 - Ap Su>=2 1s 2 BDST
+R G 1944 o - S Su>=16 1s 1 BST
+R G 1945 o - Ap M>=2 1s 2 BDST
+R G 1945 o - Jul Su>=9 1s 1 BST
+R G 1945 1946 - O Su>=2 2s 0 GMT
+R G 1946 o - Ap Su>=9 2s 1 BST
+R G 1947 o - Mar 16 2s 1 BST
+R G 1947 o - Ap 13 1s 2 BDST
+R G 1947 o - Au 10 1s 1 BST
+R G 1947 o - N 2 2s 0 GMT
+R G 1948 o - Mar 14 2s 1 BST
+R G 1948 o - O 31 2s 0 GMT
+R G 1949 o - Ap 3 2s 1 BST
+R G 1949 o - O 30 2s 0 GMT
+R G 1950 1952 - Ap Su>=14 2s 1 BST
+R G 1950 1952 - O Su>=21 2s 0 GMT
+R G 1953 o - Ap Su>=16 2s 1 BST
+R G 1953 1960 - O Su>=2 2s 0 GMT
+R G 1954 o - Ap Su>=9 2s 1 BST
+R G 1955 1956 - Ap Su>=16 2s 1 BST
+R G 1957 o - Ap Su>=9 2s 1 BST
+R G 1958 1959 - Ap Su>=16 2s 1 BST
+R G 1960 o - Ap Su>=9 2s 1 BST
+R G 1961 1963 - Mar lastSu 2s 1 BST
+R G 1961 1968 - O Su>=23 2s 0 GMT
+R G 1964 1967 - Mar Su>=19 2s 1 BST
+R G 1968 o - F 18 2s 1 BST
+R G 1972 1980 - Mar Su>=16 2s 1 BST
+R G 1972 1980 - O Su>=23 2s 0 GMT
+R G 1981 1995 - Mar lastSu 1u 1 BST
+R G 1981 1989 - O Su>=23 1u 0 GMT
+R G 1990 1995 - O Su>=22 1u 0 GMT
+Z Europe/London -0:1:15 - LMT 1847 D 1 0s
+0 G %s 1968 O 27
+1 - BST 1971 O 31 2u
+0 G %s 1996
+0 E GMT/BST
+L Europe/London Europe/Jersey
+L Europe/London Europe/Guernsey
+L Europe/London Europe/Isle_of_Man
+R IE 1971 o - O 31 2u -1 -
+R IE 1972 1980 - Mar Su>=16 2u 0 -
+R IE 1972 1980 - O Su>=23 2u -1 -
+R IE 1981 ma - Mar lastSu 1u 0 -
+R IE 1981 1989 - O Su>=23 1u -1 -
+R IE 1990 1995 - O Su>=22 1u -1 -
+R IE 1996 ma - O lastSu 1u -1 -
+Z Europe/Dublin -0:25 - LMT 1880 Au 2
+-0:25:21 - DMT 1916 May 21 2s
+-0:25:21 1 IST 1916 O 1 2s
+0 G %s 1921 D 6
+0 G GMT/IST 1940 F 25 2s
+0 1 IST 1946 O 6 2s
+0 - GMT 1947 Mar 16 2s
+0 1 IST 1947 N 2 2s
+0 - GMT 1948 Ap 18 2s
+0 G GMT/IST 1968 O 27
+1 IE IST/GMT
+R E 1977 1980 - Ap Su>=1 1u 1 S
+R E 1977 o - S lastSu 1u 0 -
+R E 1978 o - O 1 1u 0 -
+R E 1979 1995 - S lastSu 1u 0 -
+R E 1981 ma - Mar lastSu 1u 1 S
+R E 1996 ma - O lastSu 1u 0 -
+R W- 1977 1980 - Ap Su>=1 1s 1 S
+R W- 1977 o - S lastSu 1s 0 -
+R W- 1978 o - O 1 1s 0 -
+R W- 1979 1995 - S lastSu 1s 0 -
+R W- 1981 ma - Mar lastSu 1s 1 S
+R W- 1996 ma - O lastSu 1s 0 -
+R c 1916 o - Ap 30 23 1 S
+R c 1916 o - O 1 1 0 -
+R c 1917 1918 - Ap M>=15 2s 1 S
+R c 1917 1918 - S M>=15 2s 0 -
+R c 1940 o - Ap 1 2s 1 S
+R c 1942 o - N 2 2s 0 -
+R c 1943 o - Mar 29 2s 1 S
+R c 1943 o - O 4 2s 0 -
+R c 1944 1945 - Ap M>=1 2s 1 S
+R c 1944 o - O 2 2s 0 -
+R c 1945 o - S 16 2s 0 -
+R c 1977 1980 - Ap Su>=1 2s 1 S
+R c 1977 o - S lastSu 2s 0 -
+R c 1978 o - O 1 2s 0 -
+R c 1979 1995 - S lastSu 2s 0 -
+R c 1981 ma - Mar lastSu 2s 1 S
+R c 1996 ma - O lastSu 2s 0 -
+R e 1977 1980 - Ap Su>=1 0 1 S
+R e 1977 o - S lastSu 0 0 -
+R e 1978 o - O 1 0 0 -
+R e 1979 1995 - S lastSu 0 0 -
+R e 1981 ma - Mar lastSu 0 1 S
+R e 1996 ma - O lastSu 0 0 -
+R R 1917 o - Jul 1 23 1 MST
+R R 1917 o - D 28 0 0 MMT
+R R 1918 o - May 31 22 2 MDST
+R R 1918 o - S 16 1 1 MST
+R R 1919 o - May 31 23 2 MDST
+R R 1919 o - Jul 1 0u 1 MSD
+R R 1919 o - Au 16 0 0 MSK
+R R 1921 o - F 14 23 1 MSD
+R R 1921 o - Mar 20 23 2 +05
+R R 1921 o - S 1 0 1 MSD
+R R 1921 o - O 1 0 0 -
+R R 1981 1984 - Ap 1 0 1 S
+R R 1981 1983 - O 1 0 0 -
+R R 1984 1995 - S lastSu 2s 0 -
+R R 1985 2010 - Mar lastSu 2s 1 S
+R R 1996 2010 - O lastSu 2s 0 -
+Z WET 0 E WE%sT
+Z CET 1 c CE%sT
+Z MET 1 c ME%sT
+Z EET 2 E EE%sT
+R q 1940 o - Jun 16 0 1 S
+R q 1942 o - N 2 3 0 -
+R q 1943 o - Mar 29 2 1 S
+R q 1943 o - Ap 10 3 0 -
+R q 1974 o - May 4 0 1 S
+R q 1974 o - O 2 0 0 -
+R q 1975 o - May 1 0 1 S
+R q 1975 o - O 2 0 0 -
+R q 1976 o - May 2 0 1 S
+R q 1976 o - O 3 0 0 -
+R q 1977 o - May 8 0 1 S
+R q 1977 o - O 2 0 0 -
+R q 1978 o - May 6 0 1 S
+R q 1978 o - O 1 0 0 -
+R q 1979 o - May 5 0 1 S
+R q 1979 o - S 30 0 0 -
+R q 1980 o - May 3 0 1 S
+R q 1980 o - O 4 0 0 -
+R q 1981 o - Ap 26 0 1 S
+R q 1981 o - S 27 0 0 -
+R q 1982 o - May 2 0 1 S
+R q 1982 o - O 3 0 0 -
+R q 1983 o - Ap 18 0 1 S
+R q 1983 o - O 1 0 0 -
+R q 1984 o - Ap 1 0 1 S
+Z Europe/Tirane 1:19:20 - LMT 1914
+1 - CET 1940 Jun 16
+1 q CE%sT 1984 Jul
+1 E CE%sT
+Z Europe/Andorra 0:6:4 - LMT 1901
+0 - WET 1946 S 30
+1 - CET 1985 Mar 31 2
+1 E CE%sT
+R a 1920 o - Ap 5 2s 1 S
+R a 1920 o - S 13 2s 0 -
+R a 1946 o - Ap 14 2s 1 S
+R a 1946 o - O 7 2s 0 -
+R a 1947 1948 - O Su>=1 2s 0 -
+R a 1947 o - Ap 6 2s 1 S
+R a 1948 o - Ap 18 2s 1 S
+R a 1980 o - Ap 6 0 1 S
+R a 1980 o - S 28 0 0 -
+Z Europe/Vienna 1:5:21 - LMT 1893 Ap
+1 c CE%sT 1920
+1 a CE%sT 1940 Ap 1 2s
+1 c CE%sT 1945 Ap 2 2s
+1 1 CEST 1945 Ap 12 2s
+1 - CET 1946
+1 a CE%sT 1981
+1 E CE%sT
+Z Europe/Minsk 1:50:16 - LMT 1880
+1:50 - MMT 1924 May 2
+2 - EET 1930 Jun 21
+3 - MSK 1941 Jun 28
+1 c CE%sT 1944 Jul 3
+3 R MSK/MSD 1990
+3 - MSK 1991 Mar 31 2s
+2 R EE%sT 2011 Mar 27 2s
+3 - +03
+R b 1918 o - Mar 9 0s 1 S
+R b 1918 1919 - O Sa>=1 23s 0 -
+R b 1919 o - Mar 1 23s 1 S
+R b 1920 o - F 14 23s 1 S
+R b 1920 o - O 23 23s 0 -
+R b 1921 o - Mar 14 23s 1 S
+R b 1921 o - O 25 23s 0 -
+R b 1922 o - Mar 25 23s 1 S
+R b 1922 1927 - O Sa>=1 23s 0 -
+R b 1923 o - Ap 21 23s 1 S
+R b 1924 o - Mar 29 23s 1 S
+R b 1925 o - Ap 4 23s 1 S
+R b 1926 o - Ap 17 23s 1 S
+R b 1927 o - Ap 9 23s 1 S
+R b 1928 o - Ap 14 23s 1 S
+R b 1928 1938 - O Su>=2 2s 0 -
+R b 1929 o - Ap 21 2s 1 S
+R b 1930 o - Ap 13 2s 1 S
+R b 1931 o - Ap 19 2s 1 S
+R b 1932 o - Ap 3 2s 1 S
+R b 1933 o - Mar 26 2s 1 S
+R b 1934 o - Ap 8 2s 1 S
+R b 1935 o - Mar 31 2s 1 S
+R b 1936 o - Ap 19 2s 1 S
+R b 1937 o - Ap 4 2s 1 S
+R b 1938 o - Mar 27 2s 1 S
+R b 1939 o - Ap 16 2s 1 S
+R b 1939 o - N 19 2s 0 -
+R b 1940 o - F 25 2s 1 S
+R b 1944 o - S 17 2s 0 -
+R b 1945 o - Ap 2 2s 1 S
+R b 1945 o - S 16 2s 0 -
+R b 1946 o - May 19 2s 1 S
+R b 1946 o - O 7 2s 0 -
+Z Europe/Brussels 0:17:30 - LMT 1880
+0:17:30 - BMT 1892 May 1 0:17:30
+0 - WET 1914 N 8
+1 - CET 1916 May
+1 c CE%sT 1918 N 11 11u
+0 b WE%sT 1940 May 20 2s
+1 c CE%sT 1944 S 3
+1 b CE%sT 1977
+1 E CE%sT
+R BG 1979 o - Mar 31 23 1 S
+R BG 1979 o - O 1 1 0 -
+R BG 1980 1982 - Ap Sa>=1 23 1 S
+R BG 1980 o - S 29 1 0 -
+R BG 1981 o - S 27 2 0 -
+Z Europe/Sofia 1:33:16 - LMT 1880
+1:56:56 - IMT 1894 N 30
+2 - EET 1942 N 2 3
+1 c CE%sT 1945
+1 - CET 1945 Ap 2 3
+2 - EET 1979 Mar 31 23
+2 BG EE%sT 1982 S 26 3
+2 c EE%sT 1991
+2 e EE%sT 1997
+2 E EE%sT
+R CZ 1945 o - Ap M>=1 2s 1 S
+R CZ 1945 o - O 1 2s 0 -
+R CZ 1946 o - May 6 2s 1 S
+R CZ 1946 1949 - O Su>=1 2s 0 -
+R CZ 1947 1948 - Ap Su>=15 2s 1 S
+R CZ 1949 o - Ap 9 2s 1 S
+Z Europe/Prague 0:57:44 - LMT 1850
+0:57:44 - PMT 1891 O
+1 c CE%sT 1945 May 9
+1 CZ CE%sT 1946 D 1 3
+1 -1 GMT 1947 F 23 2
+1 CZ CE%sT 1979
+1 E CE%sT
+R D 1916 o - May 14 23 1 S
+R D 1916 o - S 30 23 0 -
+R D 1940 o - May 15 0 1 S
+R D 1945 o - Ap 2 2s 1 S
+R D 1945 o - Au 15 2s 0 -
+R D 1946 o - May 1 2s 1 S
+R D 1946 o - S 1 2s 0 -
+R D 1947 o - May 4 2s 1 S
+R D 1947 o - Au 10 2s 0 -
+R D 1948 o - May 9 2s 1 S
+R D 1948 o - Au 8 2s 0 -
+Z Europe/Copenhagen 0:50:20 - LMT 1890
+0:50:20 - CMT 1894
+1 D CE%sT 1942 N 2 2s
+1 c CE%sT 1945 Ap 2 2
+1 D CE%sT 1980
+1 E CE%sT
+Z Atlantic/Faroe -0:27:4 - LMT 1908 Ja 11
+0 - WET 1981
+0 E WE%sT
+R Th 1991 1992 - Mar lastSu 2 1 D
+R Th 1991 1992 - S lastSu 2 0 S
+R Th 1993 2006 - Ap Su>=1 2 1 D
+R Th 1993 2006 - O lastSu 2 0 S
+R Th 2007 ma - Mar Su>=8 2 1 D
+R Th 2007 ma - N Su>=1 2 0 S
+Z America/Danmarkshavn -1:14:40 - LMT 1916 Jul 28
+-3 - -03 1980 Ap 6 2
+-3 E -03/-02 1996
+0 - GMT
+Z America/Scoresbysund -1:27:52 - LMT 1916 Jul 28
+-2 - -02 1980 Ap 6 2
+-2 c -02/-01 1981 Mar 29
+-1 E -01/+00
+Z America/Nuuk -3:26:56 - LMT 1916 Jul 28
+-3 - -03 1980 Ap 6 2
+-3 E -03/-02
+Z America/Thule -4:35:8 - LMT 1916 Jul 28
+-4 Th A%sT
+Z Europe/Tallinn 1:39 - LMT 1880
+1:39 - TMT 1918 F
+1 c CE%sT 1919 Jul
+1:39 - TMT 1921 May
+2 - EET 1940 Au 6
+3 - MSK 1941 S 15
+1 c CE%sT 1944 S 22
+3 R MSK/MSD 1989 Mar 26 2s
+2 1 EEST 1989 S 24 2s
+2 c EE%sT 1998 S 22
+2 E EE%sT 1999 O 31 4
+2 - EET 2002 F 21
+2 E EE%sT
+R FI 1942 o - Ap 2 24 1 S
+R FI 1942 o - O 4 1 0 -
+R FI 1981 1982 - Mar lastSu 2 1 S
+R FI 1981 1982 - S lastSu 3 0 -
+Z Europe/Helsinki 1:39:49 - LMT 1878 May 31
+1:39:49 - HMT 1921 May
+2 FI EE%sT 1983
+2 E EE%sT
+L Europe/Helsinki Europe/Mariehamn
+R F 1916 o - Jun 14 23s 1 S
+R F 1916 1919 - O Su>=1 23s 0 -
+R F 1917 o - Mar 24 23s 1 S
+R F 1918 o - Mar 9 23s 1 S
+R F 1919 o - Mar 1 23s 1 S
+R F 1920 o - F 14 23s 1 S
+R F 1920 o - O 23 23s 0 -
+R F 1921 o - Mar 14 23s 1 S
+R F 1921 o - O 25 23s 0 -
+R F 1922 o - Mar 25 23s 1 S
+R F 1922 1938 - O Sa>=1 23s 0 -
+R F 1923 o - May 26 23s 1 S
+R F 1924 o - Mar 29 23s 1 S
+R F 1925 o - Ap 4 23s 1 S
+R F 1926 o - Ap 17 23s 1 S
+R F 1927 o - Ap 9 23s 1 S
+R F 1928 o - Ap 14 23s 1 S
+R F 1929 o - Ap 20 23s 1 S
+R F 1930 o - Ap 12 23s 1 S
+R F 1931 o - Ap 18 23s 1 S
+R F 1932 o - Ap 2 23s 1 S
+R F 1933 o - Mar 25 23s 1 S
+R F 1934 o - Ap 7 23s 1 S
+R F 1935 o - Mar 30 23s 1 S
+R F 1936 o - Ap 18 23s 1 S
+R F 1937 o - Ap 3 23s 1 S
+R F 1938 o - Mar 26 23s 1 S
+R F 1939 o - Ap 15 23s 1 S
+R F 1939 o - N 18 23s 0 -
+R F 1940 o - F 25 2 1 S
+R F 1941 o - May 5 0 2 M
+R F 1941 o - O 6 0 1 S
+R F 1942 o - Mar 9 0 2 M
+R F 1942 o - N 2 3 1 S
+R F 1943 o - Mar 29 2 2 M
+R F 1943 o - O 4 3 1 S
+R F 1944 o - Ap 3 2 2 M
+R F 1944 o - O 8 1 1 S
+R F 1945 o - Ap 2 2 2 M
+R F 1945 o - S 16 3 0 -
+R F 1976 o - Mar 28 1 1 S
+R F 1976 o - S 26 1 0 -
+Z Europe/Paris 0:9:21 - LMT 1891 Mar 16
+0:9:21 - PMT 1911 Mar 11
+0 F WE%sT 1940 Jun 14 23
+1 c CE%sT 1944 Au 25
+0 F WE%sT 1945 S 16 3
+1 F CE%sT 1977
+1 E CE%sT
+R DE 1946 o - Ap 14 2s 1 S
+R DE 1946 o - O 7 2s 0 -
+R DE 1947 1949 - O Su>=1 2s 0 -
+R DE 1947 o - Ap 6 3s 1 S
+R DE 1947 o - May 11 2s 2 M
+R DE 1947 o - Jun 29 3 1 S
+R DE 1948 o - Ap 18 2s 1 S
+R DE 1949 o - Ap 10 2s 1 S
+R So 1945 o - May 24 2 2 M
+R So 1945 o - S 24 3 1 S
+R So 1945 o - N 18 2s 0 -
+Z Europe/Berlin 0:53:28 - LMT 1893 Ap
+1 c CE%sT 1945 May 24 2
+1 So CE%sT 1946
+1 DE CE%sT 1980
+1 E CE%sT
+L Europe/Zurich Europe/Busingen
+Z Europe/Gibraltar -0:21:24 - LMT 1880 Au 2 0s
+0 G %s 1957 Ap 14 2
+1 - CET 1982
+1 E CE%sT
+R g 1932 o - Jul 7 0 1 S
+R g 1932 o - S 1 0 0 -
+R g 1941 o - Ap 7 0 1 S
+R g 1942 o - N 2 3 0 -
+R g 1943 o - Mar 30 0 1 S
+R g 1943 o - O 4 0 0 -
+R g 1952 o - Jul 1 0 1 S
+R g 1952 o - N 2 0 0 -
+R g 1975 o - Ap 12 0s 1 S
+R g 1975 o - N 26 0s 0 -
+R g 1976 o - Ap 11 2s 1 S
+R g 1976 o - O 10 2s 0 -
+R g 1977 1978 - Ap Su>=1 2s 1 S
+R g 1977 o - S 26 2s 0 -
+R g 1978 o - S 24 4 0 -
+R g 1979 o - Ap 1 9 1 S
+R g 1979 o - S 29 2 0 -
+R g 1980 o - Ap 1 0 1 S
+R g 1980 o - S 28 0 0 -
+Z Europe/Athens 1:34:52 - LMT 1895 S 14
+1:34:52 - AMT 1916 Jul 28 0:1
+2 g EE%sT 1941 Ap 30
+1 g CE%sT 1944 Ap 4
+2 g EE%sT 1981
+2 E EE%sT
+R h 1918 1919 - Ap 15 2 1 S
+R h 1918 1920 - S M>=15 3 0 -
+R h 1920 o - Ap 5 2 1 S
+R h 1945 o - May 1 23 1 S
+R h 1945 o - N 1 1 0 -
+R h 1946 o - Mar 31 2s 1 S
+R h 1946 o - O 7 2 0 -
+R h 1947 1949 - Ap Su>=4 2s 1 S
+R h 1947 1949 - O Su>=1 2s 0 -
+R h 1954 o - May 23 0 1 S
+R h 1954 o - O 3 0 0 -
+R h 1955 o - May 22 2 1 S
+R h 1955 o - O 2 3 0 -
+R h 1956 1957 - Jun Su>=1 2 1 S
+R h 1956 1957 - S lastSu 3 0 -
+R h 1980 o - Ap 6 0 1 S
+R h 1980 o - S 28 1 0 -
+R h 1981 1983 - Mar lastSu 0 1 S
+R h 1981 1983 - S lastSu 1 0 -
+Z Europe/Budapest 1:16:20 - LMT 1890 N
+1 c CE%sT 1918
+1 h CE%sT 1941 Ap 7 23
+1 c CE%sT 1945
+1 h CE%sT 1984
+1 E CE%sT
+R w 1917 1919 - F 19 23 1 -
+R w 1917 o - O 21 1 0 -
+R w 1918 1919 - N 16 1 0 -
+R w 1921 o - Mar 19 23 1 -
+R w 1921 o - Jun 23 1 0 -
+R w 1939 o - Ap 29 23 1 -
+R w 1939 o - O 29 2 0 -
+R w 1940 o - F 25 2 1 -
+R w 1940 1941 - N Su>=2 1s 0 -
+R w 1941 1942 - Mar Su>=2 1s 1 -
+R w 1943 1946 - Mar Su>=1 1s 1 -
+R w 1942 1948 - O Su>=22 1s 0 -
+R w 1947 1967 - Ap Su>=1 1s 1 -
+R w 1949 o - O 30 1s 0 -
+R w 1950 1966 - O Su>=22 1s 0 -
+R w 1967 o - O 29 1s 0 -
+Z Atlantic/Reykjavik -1:28 - LMT 1908
+-1 w -01/+00 1968 Ap 7 1s
+0 - GMT
+R I 1916 o - Jun 3 24 1 S
+R I 1916 1917 - S 30 24 0 -
+R I 1917 o - Mar 31 24 1 S
+R I 1918 o - Mar 9 24 1 S
+R I 1918 o - O 6 24 0 -
+R I 1919 o - Mar 1 24 1 S
+R I 1919 o - O 4 24 0 -
+R I 1920 o - Mar 20 24 1 S
+R I 1920 o - S 18 24 0 -
+R I 1940 o - Jun 14 24 1 S
+R I 1942 o - N 2 2s 0 -
+R I 1943 o - Mar 29 2s 1 S
+R I 1943 o - O 4 2s 0 -
+R I 1944 o - Ap 2 2s 1 S
+R I 1944 o - S 17 2s 0 -
+R I 1945 o - Ap 2 2 1 S
+R I 1945 o - S 15 1 0 -
+R I 1946 o - Mar 17 2s 1 S
+R I 1946 o - O 6 2s 0 -
+R I 1947 o - Mar 16 0s 1 S
+R I 1947 o - O 5 0s 0 -
+R I 1948 o - F 29 2s 1 S
+R I 1948 o - O 3 2s 0 -
+R I 1966 1968 - May Su>=22 0s 1 S
+R I 1966 o - S 24 24 0 -
+R I 1967 1969 - S Su>=22 0s 0 -
+R I 1969 o - Jun 1 0s 1 S
+R I 1970 o - May 31 0s 1 S
+R I 1970 o - S lastSu 0s 0 -
+R I 1971 1972 - May Su>=22 0s 1 S
+R I 1971 o - S lastSu 0s 0 -
+R I 1972 o - O 1 0s 0 -
+R I 1973 o - Jun 3 0s 1 S
+R I 1973 1974 - S lastSu 0s 0 -
+R I 1974 o - May 26 0s 1 S
+R I 1975 o - Jun 1 0s 1 S
+R I 1975 1977 - S lastSu 0s 0 -
+R I 1976 o - May 30 0s 1 S
+R I 1977 1979 - May Su>=22 0s 1 S
+R I 1978 o - O 1 0s 0 -
+R I 1979 o - S 30 0s 0 -
+Z Europe/Rome 0:49:56 - LMT 1866 D 12
+0:49:56 - RMT 1893 O 31 23:49:56
+1 I CE%sT 1943 S 10
+1 c CE%sT 1944 Jun 4
+1 I CE%sT 1980
+1 E CE%sT
+L Europe/Rome Europe/Vatican
+L Europe/Rome Europe/San_Marino
+R LV 1989 1996 - Mar lastSu 2s 1 S
+R LV 1989 1996 - S lastSu 2s 0 -
+Z Europe/Riga 1:36:34 - LMT 1880
+1:36:34 - RMT 1918 Ap 15 2
+1:36:34 1 LST 1918 S 16 3
+1:36:34 - RMT 1919 Ap 1 2
+1:36:34 1 LST 1919 May 22 3
+1:36:34 - RMT 1926 May 11
+2 - EET 1940 Au 5
+3 - MSK 1941 Jul
+1 c CE%sT 1944 O 13
+3 R MSK/MSD 1989 Mar lastSu 2s
+2 1 EEST 1989 S lastSu 2s
+2 LV EE%sT 1997 Ja 21
+2 E EE%sT 2000 F 29
+2 - EET 2001 Ja 2
+2 E EE%sT
+L Europe/Zurich Europe/Vaduz
+Z Europe/Vilnius 1:41:16 - LMT 1880
+1:24 - WMT 1917
+1:35:36 - KMT 1919 O 10
+1 - CET 1920 Jul 12
+2 - EET 1920 O 9
+1 - CET 1940 Au 3
+3 - MSK 1941 Jun 24
+1 c CE%sT 1944 Au
+3 R MSK/MSD 1989 Mar 26 2s
+2 R EE%sT 1991 S 29 2s
+2 c EE%sT 1998
+2 - EET 1998 Mar 29 1u
+1 E CE%sT 1999 O 31 1u
+2 - EET 2003
+2 E EE%sT
+R LX 1916 o - May 14 23 1 S
+R LX 1916 o - O 1 1 0 -
+R LX 1917 o - Ap 28 23 1 S
+R LX 1917 o - S 17 1 0 -
+R LX 1918 o - Ap M>=15 2s 1 S
+R LX 1918 o - S M>=15 2s 0 -
+R LX 1919 o - Mar 1 23 1 S
+R LX 1919 o - O 5 3 0 -
+R LX 1920 o - F 14 23 1 S
+R LX 1920 o - O 24 2 0 -
+R LX 1921 o - Mar 14 23 1 S
+R LX 1921 o - O 26 2 0 -
+R LX 1922 o - Mar 25 23 1 S
+R LX 1922 o - O Su>=2 1 0 -
+R LX 1923 o - Ap 21 23 1 S
+R LX 1923 o - O Su>=2 2 0 -
+R LX 1924 o - Mar 29 23 1 S
+R LX 1924 1928 - O Su>=2 1 0 -
+R LX 1925 o - Ap 5 23 1 S
+R LX 1926 o - Ap 17 23 1 S
+R LX 1927 o - Ap 9 23 1 S
+R LX 1928 o - Ap 14 23 1 S
+R LX 1929 o - Ap 20 23 1 S
+Z Europe/Luxembourg 0:24:36 - LMT 1904 Jun
+1 LX CE%sT 1918 N 25
+0 LX WE%sT 1929 O 6 2s
+0 b WE%sT 1940 May 14 3
+1 c WE%sT 1944 S 18 3
+1 b CE%sT 1977
+1 E CE%sT
+R MT 1973 o - Mar 31 0s 1 S
+R MT 1973 o - S 29 0s 0 -
+R MT 1974 o - Ap 21 0s 1 S
+R MT 1974 o - S 16 0s 0 -
+R MT 1975 1979 - Ap Su>=15 2 1 S
+R MT 1975 1980 - S Su>=15 2 0 -
+R MT 1980 o - Mar 31 2 1 S
+Z Europe/Malta 0:58:4 - LMT 1893 N 2 0s
+1 I CE%sT 1973 Mar 31
+1 MT CE%sT 1981
+1 E CE%sT
+R MD 1997 ma - Mar lastSu 2 1 S
+R MD 1997 ma - O lastSu 3 0 -
+Z Europe/Chisinau 1:55:20 - LMT 1880
+1:55 - CMT 1918 F 15
+1:44:24 - BMT 1931 Jul 24
+2 z EE%sT 1940 Au 15
+2 1 EEST 1941 Jul 17
+1 c CE%sT 1944 Au 24
+3 R MSK/MSD 1990 May 6 2
+2 R EE%sT 1992
+2 e EE%sT 1997
+2 MD EE%sT
+Z Europe/Monaco 0:29:32 - LMT 1892 Jun
+0:9:21 - PMT 1911 Mar 29
+0 F WE%sT 1945 S 16 3
+1 F CE%sT 1977
+1 E CE%sT
+R N 1916 o - May 1 0 1 NST
+R N 1916 o - O 1 0 0 AMT
+R N 1917 o - Ap 16 2s 1 NST
+R N 1917 o - S 17 2s 0 AMT
+R N 1918 1921 - Ap M>=1 2s 1 NST
+R N 1918 1921 - S lastM 2s 0 AMT
+R N 1922 o - Mar lastSu 2s 1 NST
+R N 1922 1936 - O Su>=2 2s 0 AMT
+R N 1923 o - Jun F>=1 2s 1 NST
+R N 1924 o - Mar lastSu 2s 1 NST
+R N 1925 o - Jun F>=1 2s 1 NST
+R N 1926 1931 - May 15 2s 1 NST
+R N 1932 o - May 22 2s 1 NST
+R N 1933 1936 - May 15 2s 1 NST
+R N 1937 o - May 22 2s 1 NST
+R N 1937 o - Jul 1 0 1 S
+R N 1937 1939 - O Su>=2 2s 0 -
+R N 1938 1939 - May 15 2s 1 S
+R N 1945 o - Ap 2 2s 1 S
+R N 1945 o - S 16 2s 0 -
+Z Europe/Amsterdam 0:19:32 - LMT 1835
+0:19:32 N %s 1937 Jul
+0:20 N +0020/+0120 1940 May 16
+1 c CE%sT 1945 Ap 2 2
+1 N CE%sT 1977
+1 E CE%sT
+R NO 1916 o - May 22 1 1 S
+R NO 1916 o - S 30 0 0 -
+R NO 1945 o - Ap 2 2s 1 S
+R NO 1945 o - O 1 2s 0 -
+R NO 1959 1964 - Mar Su>=15 2s 1 S
+R NO 1959 1965 - S Su>=15 2s 0 -
+R NO 1965 o - Ap 25 2s 1 S
+Z Europe/Oslo 0:43 - LMT 1895
+1 NO CE%sT 1940 Au 10 23
+1 c CE%sT 1945 Ap 2 2
+1 NO CE%sT 1980
+1 E CE%sT
+L Europe/Oslo Arctic/Longyearbyen
+R O 1918 1919 - S 16 2s 0 -
+R O 1919 o - Ap 15 2s 1 S
+R O 1944 o - Ap 3 2s 1 S
+R O 1944 o - O 4 2 0 -
+R O 1945 o - Ap 29 0 1 S
+R O 1945 o - N 1 0 0 -
+R O 1946 o - Ap 14 0s 1 S
+R O 1946 o - O 7 2s 0 -
+R O 1947 o - May 4 2s 1 S
+R O 1947 1949 - O Su>=1 2s 0 -
+R O 1948 o - Ap 18 2s 1 S
+R O 1949 o - Ap 10 2s 1 S
+R O 1957 o - Jun 2 1s 1 S
+R O 1957 1958 - S lastSu 1s 0 -
+R O 1958 o - Mar 30 1s 1 S
+R O 1959 o - May 31 1s 1 S
+R O 1959 1961 - O Su>=1 1s 0 -
+R O 1960 o - Ap 3 1s 1 S
+R O 1961 1964 - May lastSu 1s 1 S
+R O 1962 1964 - S lastSu 1s 0 -
+Z Europe/Warsaw 1:24 - LMT 1880
+1:24 - WMT 1915 Au 5
+1 c CE%sT 1918 S 16 3
+2 O EE%sT 1922 Jun
+1 O CE%sT 1940 Jun 23 2
+1 c CE%sT 1944 O
+1 O CE%sT 1977
+1 W- CE%sT 1988
+1 E CE%sT
+R p 1916 o - Jun 17 23 1 S
+R p 1916 o - N 1 1 0 -
+R p 1917 o - F 28 23s 1 S
+R p 1917 1921 - O 14 23s 0 -
+R p 1918 o - Mar 1 23s 1 S
+R p 1919 o - F 28 23s 1 S
+R p 1920 o - F 29 23s 1 S
+R p 1921 o - F 28 23s 1 S
+R p 1924 o - Ap 16 23s 1 S
+R p 1924 o - O 14 23s 0 -
+R p 1926 o - Ap 17 23s 1 S
+R p 1926 1929 - O Sa>=1 23s 0 -
+R p 1927 o - Ap 9 23s 1 S
+R p 1928 o - Ap 14 23s 1 S
+R p 1929 o - Ap 20 23s 1 S
+R p 1931 o - Ap 18 23s 1 S
+R p 1931 1932 - O Sa>=1 23s 0 -
+R p 1932 o - Ap 2 23s 1 S
+R p 1934 o - Ap 7 23s 1 S
+R p 1934 1938 - O Sa>=1 23s 0 -
+R p 1935 o - Mar 30 23s 1 S
+R p 1936 o - Ap 18 23s 1 S
+R p 1937 o - Ap 3 23s 1 S
+R p 1938 o - Mar 26 23s 1 S
+R p 1939 o - Ap 15 23s 1 S
+R p 1939 o - N 18 23s 0 -
+R p 1940 o - F 24 23s 1 S
+R p 1940 1941 - O 5 23s 0 -
+R p 1941 o - Ap 5 23s 1 S
+R p 1942 1945 - Mar Sa>=8 23s 1 S
+R p 1942 o - Ap 25 22s 2 M
+R p 1942 o - Au 15 22s 1 S
+R p 1942 1945 - O Sa>=24 23s 0 -
+R p 1943 o - Ap 17 22s 2 M
+R p 1943 1945 - Au Sa>=25 22s 1 S
+R p 1944 1945 - Ap Sa>=21 22s 2 M
+R p 1946 o - Ap Sa>=1 23s 1 S
+R p 1946 o - O Sa>=1 23s 0 -
+R p 1947 1965 - Ap Su>=1 2s 1 S
+R p 1947 1965 - O Su>=1 2s 0 -
+R p 1977 o - Mar 27 0s 1 S
+R p 1977 o - S 25 0s 0 -
+R p 1978 1979 - Ap Su>=1 0s 1 S
+R p 1978 o - O 1 0s 0 -
+R p 1979 1982 - S lastSu 1s 0 -
+R p 1980 o - Mar lastSu 0s 1 S
+R p 1981 1982 - Mar lastSu 1s 1 S
+R p 1983 o - Mar lastSu 2s 1 S
+Z Europe/Lisbon -0:36:45 - LMT 1884
+-0:36:45 - LMT 1912 Ja 1 0u
+0 p WE%sT 1966 Ap 3 2
+1 - CET 1976 S 26 1
+0 p WE%sT 1983 S 25 1s
+0 W- WE%sT 1992 S 27 1s
+1 E CE%sT 1996 Mar 31 1u
+0 E WE%sT
+Z Atlantic/Azores -1:42:40 - LMT 1884
+-1:54:32 - HMT 1912 Ja 1 2u
+-2 p -02/-01 1942 Ap 25 22s
+-2 p +00 1942 Au 15 22s
+-2 p -02/-01 1943 Ap 17 22s
+-2 p +00 1943 Au 28 22s
+-2 p -02/-01 1944 Ap 22 22s
+-2 p +00 1944 Au 26 22s
+-2 p -02/-01 1945 Ap 21 22s
+-2 p +00 1945 Au 25 22s
+-2 p -02/-01 1966 Ap 3 2
+-1 p -01/+00 1983 S 25 1s
+-1 W- -01/+00 1992 S 27 1s
+0 E WE%sT 1993 Mar 28 1u
+-1 E -01/+00
+Z Atlantic/Madeira -1:7:36 - LMT 1884
+-1:7:36 - FMT 1912 Ja 1 1u
+-1 p -01/+00 1942 Ap 25 22s
+-1 p +01 1942 Au 15 22s
+-1 p -01/+00 1943 Ap 17 22s
+-1 p +01 1943 Au 28 22s
+-1 p -01/+00 1944 Ap 22 22s
+-1 p +01 1944 Au 26 22s
+-1 p -01/+00 1945 Ap 21 22s
+-1 p +01 1945 Au 25 22s
+-1 p -01/+00 1966 Ap 3 2
+0 p WE%sT 1983 S 25 1s
+0 E WE%sT
+R z 1932 o - May 21 0s 1 S
+R z 1932 1939 - O Su>=1 0s 0 -
+R z 1933 1939 - Ap Su>=2 0s 1 S
+R z 1979 o - May 27 0 1 S
+R z 1979 o - S lastSu 0 0 -
+R z 1980 o - Ap 5 23 1 S
+R z 1980 o - S lastSu 1 0 -
+R z 1991 1993 - Mar lastSu 0s 1 S
+R z 1991 1993 - S lastSu 0s 0 -
+Z Europe/Bucharest 1:44:24 - LMT 1891 O
+1:44:24 - BMT 1931 Jul 24
+2 z EE%sT 1981 Mar 29 2s
+2 c EE%sT 1991
+2 z EE%sT 1994
+2 e EE%sT 1997
+2 E EE%sT
+Z Europe/Kaliningrad 1:22 - LMT 1893 Ap
+1 c CE%sT 1945 Ap 10
+2 O EE%sT 1946 Ap 7
+3 R MSK/MSD 1989 Mar 26 2s
+2 R EE%sT 2011 Mar 27 2s
+3 - +03 2014 O 26 2s
+2 - EET
+Z Europe/Moscow 2:30:17 - LMT 1880
+2:30:17 - MMT 1916 Jul 3
+2:31:19 R %s 1919 Jul 1 0u
+3 R %s 1921 O
+3 R MSK/MSD 1922 O
+2 - EET 1930 Jun 21
+3 R MSK/MSD 1991 Mar 31 2s
+2 R EE%sT 1992 Ja 19 2s
+3 R MSK/MSD 2011 Mar 27 2s
+4 - MSK 2014 O 26 2s
+3 - MSK
+Z Europe/Simferopol 2:16:24 - LMT 1880
+2:16 - SMT 1924 May 2
+2 - EET 1930 Jun 21
+3 - MSK 1941 N
+1 c CE%sT 1944 Ap 13
+3 R MSK/MSD 1990
+3 - MSK 1990 Jul 1 2
+2 - EET 1992 Mar 20
+2 c EE%sT 1994 May
+3 e MSK/MSD 1996 Mar 31 0s
+3 1 MSD 1996 O 27 3s
+3 R MSK/MSD 1997
+3 - MSK 1997 Mar lastSu 1u
+2 E EE%sT 2014 Mar 30 2
+4 - MSK 2014 O 26 2s
+3 - MSK
+Z Europe/Astrakhan 3:12:12 - LMT 1924 May
+3 - +03 1930 Jun 21
+4 R +04/+05 1989 Mar 26 2s
+3 R +03/+04 1991 Mar 31 2s
+4 - +04 1992 Mar 29 2s
+3 R +03/+04 2011 Mar 27 2s
+4 - +04 2014 O 26 2s
+3 - +03 2016 Mar 27 2s
+4 - +04
+Z Europe/Volgograd 2:57:40 - LMT 1920 Ja 3
+3 - +03 1930 Jun 21
+4 - +04 1961 N 11
+4 R +04/+05 1988 Mar 27 2s
+3 R +03/+04 1991 Mar 31 2s
+4 - +04 1992 Mar 29 2s
+3 R +03/+04 2011 Mar 27 2s
+4 - +04 2014 O 26 2s
+3 - +03 2018 O 28 2s
+4 - +04 2020 D 27 2s
+3 - +03
+Z Europe/Saratov 3:4:18 - LMT 1919 Jul 1 0u
+3 - +03 1930 Jun 21
+4 R +04/+05 1988 Mar 27 2s
+3 R +03/+04 1991 Mar 31 2s
+4 - +04 1992 Mar 29 2s
+3 R +03/+04 2011 Mar 27 2s
+4 - +04 2014 O 26 2s
+3 - +03 2016 D 4 2s
+4 - +04
+Z Europe/Kirov 3:18:48 - LMT 1919 Jul 1 0u
+3 - +03 1930 Jun 21
+4 R +04/+05 1989 Mar 26 2s
+3 R +03/+04 1991 Mar 31 2s
+4 - +04 1992 Mar 29 2s
+3 R +03/+04 2011 Mar 27 2s
+4 - +04 2014 O 26 2s
+3 - +03
+Z Europe/Samara 3:20:20 - LMT 1919 Jul 1 0u
+3 - +03 1930 Jun 21
+4 - +04 1935 Ja 27
+4 R +04/+05 1989 Mar 26 2s
+3 R +03/+04 1991 Mar 31 2s
+2 R +02/+03 1991 S 29 2s
+3 - +03 1991 O 20 3
+4 R +04/+05 2010 Mar 28 2s
+3 R +03/+04 2011 Mar 27 2s
+4 - +04
+Z Europe/Ulyanovsk 3:13:36 - LMT 1919 Jul 1 0u
+3 - +03 1930 Jun 21
+4 R +04/+05 1989 Mar 26 2s
+3 R +03/+04 1991 Mar 31 2s
+2 R +02/+03 1992 Ja 19 2s
+3 R +03/+04 2011 Mar 27 2s
+4 - +04 2014 O 26 2s
+3 - +03 2016 Mar 27 2s
+4 - +04
+Z Asia/Yekaterinburg 4:2:33 - LMT 1916 Jul 3
+3:45:5 - PMT 1919 Jul 15 4
+4 - +04 1930 Jun 21
+5 R +05/+06 1991 Mar 31 2s
+4 R +04/+05 1992 Ja 19 2s
+5 R +05/+06 2011 Mar 27 2s
+6 - +06 2014 O 26 2s
+5 - +05
+Z Asia/Omsk 4:53:30 - LMT 1919 N 14
+5 - +05 1930 Jun 21
+6 R +06/+07 1991 Mar 31 2s
+5 R +05/+06 1992 Ja 19 2s
+6 R +06/+07 2011 Mar 27 2s
+7 - +07 2014 O 26 2s
+6 - +06
+Z Asia/Barnaul 5:35 - LMT 1919 D 10
+6 - +06 1930 Jun 21
+7 R +07/+08 1991 Mar 31 2s
+6 R +06/+07 1992 Ja 19 2s
+7 R +07/+08 1995 May 28
+6 R +06/+07 2011 Mar 27 2s
+7 - +07 2014 O 26 2s
+6 - +06 2016 Mar 27 2s
+7 - +07
+Z Asia/Novosibirsk 5:31:40 - LMT 1919 D 14 6
+6 - +06 1930 Jun 21
+7 R +07/+08 1991 Mar 31 2s
+6 R +06/+07 1992 Ja 19 2s
+7 R +07/+08 1993 May 23
+6 R +06/+07 2011 Mar 27 2s
+7 - +07 2014 O 26 2s
+6 - +06 2016 Jul 24 2s
+7 - +07
+Z Asia/Tomsk 5:39:51 - LMT 1919 D 22
+6 - +06 1930 Jun 21
+7 R +07/+08 1991 Mar 31 2s
+6 R +06/+07 1992 Ja 19 2s
+7 R +07/+08 2002 May 1 3
+6 R +06/+07 2011 Mar 27 2s
+7 - +07 2014 O 26 2s
+6 - +06 2016 May 29 2s
+7 - +07
+Z Asia/Novokuznetsk 5:48:48 - LMT 1924 May
+6 - +06 1930 Jun 21
+7 R +07/+08 1991 Mar 31 2s
+6 R +06/+07 1992 Ja 19 2s
+7 R +07/+08 2010 Mar 28 2s
+6 R +06/+07 2011 Mar 27 2s
+7 - +07
+Z Asia/Krasnoyarsk 6:11:26 - LMT 1920 Ja 6
+6 - +06 1930 Jun 21
+7 R +07/+08 1991 Mar 31 2s
+6 R +06/+07 1992 Ja 19 2s
+7 R +07/+08 2011 Mar 27 2s
+8 - +08 2014 O 26 2s
+7 - +07
+Z Asia/Irkutsk 6:57:5 - LMT 1880
+6:57:5 - IMT 1920 Ja 25
+7 - +07 1930 Jun 21
+8 R +08/+09 1991 Mar 31 2s
+7 R +07/+08 1992 Ja 19 2s
+8 R +08/+09 2011 Mar 27 2s
+9 - +09 2014 O 26 2s
+8 - +08
+Z Asia/Chita 7:33:52 - LMT 1919 D 15
+8 - +08 1930 Jun 21
+9 R +09/+10 1991 Mar 31 2s
+8 R +08/+09 1992 Ja 19 2s
+9 R +09/+10 2011 Mar 27 2s
+10 - +10 2014 O 26 2s
+8 - +08 2016 Mar 27 2
+9 - +09
+Z Asia/Yakutsk 8:38:58 - LMT 1919 D 15
+8 - +08 1930 Jun 21
+9 R +09/+10 1991 Mar 31 2s
+8 R +08/+09 1992 Ja 19 2s
+9 R +09/+10 2011 Mar 27 2s
+10 - +10 2014 O 26 2s
+9 - +09
+Z Asia/Vladivostok 8:47:31 - LMT 1922 N 15
+9 - +09 1930 Jun 21
+10 R +10/+11 1991 Mar 31 2s
+9 R +09/+10 1992 Ja 19 2s
+10 R +10/+11 2011 Mar 27 2s
+11 - +11 2014 O 26 2s
+10 - +10
+Z Asia/Khandyga 9:2:13 - LMT 1919 D 15
+8 - +08 1930 Jun 21
+9 R +09/+10 1991 Mar 31 2s
+8 R +08/+09 1992 Ja 19 2s
+9 R +09/+10 2004
+10 R +10/+11 2011 Mar 27 2s
+11 - +11 2011 S 13 0s
+10 - +10 2014 O 26 2s
+9 - +09
+Z Asia/Sakhalin 9:30:48 - LMT 1905 Au 23
+9 - +09 1945 Au 25
+11 R +11/+12 1991 Mar 31 2s
+10 R +10/+11 1992 Ja 19 2s
+11 R +11/+12 1997 Mar lastSu 2s
+10 R +10/+11 2011 Mar 27 2s
+11 - +11 2014 O 26 2s
+10 - +10 2016 Mar 27 2s
+11 - +11
+Z Asia/Magadan 10:3:12 - LMT 1924 May 2
+10 - +10 1930 Jun 21
+11 R +11/+12 1991 Mar 31 2s
+10 R +10/+11 1992 Ja 19 2s
+11 R +11/+12 2011 Mar 27 2s
+12 - +12 2014 O 26 2s
+10 - +10 2016 Ap 24 2s
+11 - +11
+Z Asia/Srednekolymsk 10:14:52 - LMT 1924 May 2
+10 - +10 1930 Jun 21
+11 R +11/+12 1991 Mar 31 2s
+10 R +10/+11 1992 Ja 19 2s
+11 R +11/+12 2011 Mar 27 2s
+12 - +12 2014 O 26 2s
+11 - +11
+Z Asia/Ust-Nera 9:32:54 - LMT 1919 D 15
+8 - +08 1930 Jun 21
+9 R +09/+10 1981 Ap
+11 R +11/+12 1991 Mar 31 2s
+10 R +10/+11 1992 Ja 19 2s
+11 R +11/+12 2011 Mar 27 2s
+12 - +12 2011 S 13 0s
+11 - +11 2014 O 26 2s
+10 - +10
+Z Asia/Kamchatka 10:34:36 - LMT 1922 N 10
+11 - +11 1930 Jun 21
+12 R +12/+13 1991 Mar 31 2s
+11 R +11/+12 1992 Ja 19 2s
+12 R +12/+13 2010 Mar 28 2s
+11 R +11/+12 2011 Mar 27 2s
+12 - +12
+Z Asia/Anadyr 11:49:56 - LMT 1924 May 2
+12 - +12 1930 Jun 21
+13 R +13/+14 1982 Ap 1 0s
+12 R +12/+13 1991 Mar 31 2s
+11 R +11/+12 1992 Ja 19 2s
+12 R +12/+13 2010 Mar 28 2s
+11 R +11/+12 2011 Mar 27 2s
+12 - +12
+Z Europe/Belgrade 1:22 - LMT 1884
+1 - CET 1941 Ap 18 23
+1 c CE%sT 1945
+1 - CET 1945 May 8 2s
+1 1 CEST 1945 S 16 2s
+1 - CET 1982 N 27
+1 E CE%sT
+L Europe/Belgrade Europe/Ljubljana
+L Europe/Belgrade Europe/Podgorica
+L Europe/Belgrade Europe/Sarajevo
+L Europe/Belgrade Europe/Skopje
+L Europe/Belgrade Europe/Zagreb
+L Europe/Prague Europe/Bratislava
+R s 1918 o - Ap 15 23 1 S
+R s 1918 1919 - O 6 24s 0 -
+R s 1919 o - Ap 6 23 1 S
+R s 1924 o - Ap 16 23 1 S
+R s 1924 o - O 4 24s 0 -
+R s 1926 o - Ap 17 23 1 S
+R s 1926 1929 - O Sa>=1 24s 0 -
+R s 1927 o - Ap 9 23 1 S
+R s 1928 o - Ap 15 0 1 S
+R s 1929 o - Ap 20 23 1 S
+R s 1937 o - Jun 16 23 1 S
+R s 1937 o - O 2 24s 0 -
+R s 1938 o - Ap 2 23 1 S
+R s 1938 o - Ap 30 23 2 M
+R s 1938 o - O 2 24 1 S
+R s 1939 o - O 7 24s 0 -
+R s 1942 o - May 2 23 1 S
+R s 1942 o - S 1 1 0 -
+R s 1943 1946 - Ap Sa>=13 23 1 S
+R s 1943 1944 - O Su>=1 1 0 -
+R s 1945 1946 - S lastSu 1 0 -
+R s 1949 o - Ap 30 23 1 S
+R s 1949 o - O 2 1 0 -
+R s 1974 1975 - Ap Sa>=12 23 1 S
+R s 1974 1975 - O Su>=1 1 0 -
+R s 1976 o - Mar 27 23 1 S
+R s 1976 1977 - S lastSu 1 0 -
+R s 1977 o - Ap 2 23 1 S
+R s 1978 o - Ap 2 2s 1 S
+R s 1978 o - O 1 2s 0 -
+R Sp 1967 o - Jun 3 12 1 S
+R Sp 1967 o - O 1 0 0 -
+R Sp 1974 o - Jun 24 0 1 S
+R Sp 1974 o - S 1 0 0 -
+R Sp 1976 1977 - May 1 0 1 S
+R Sp 1976 o - Au 1 0 0 -
+R Sp 1977 o - S 28 0 0 -
+R Sp 1978 o - Jun 1 0 1 S
+R Sp 1978 o - Au 4 0 0 -
+Z Europe/Madrid -0:14:44 - LMT 1900 D 31 23:45:16
+0 s WE%sT 1940 Mar 16 23
+1 s CE%sT 1979
+1 E CE%sT
+Z Africa/Ceuta -0:21:16 - LMT 1900 D 31 23:38:44
+0 - WET 1918 May 6 23
+0 1 WEST 1918 O 7 23
+0 - WET 1924
+0 s WE%sT 1929
+0 - WET 1967
+0 Sp WE%sT 1984 Mar 16
+1 - CET 1986
+1 E CE%sT
+Z Atlantic/Canary -1:1:36 - LMT 1922 Mar
+-1 - -01 1946 S 30 1
+0 - WET 1980 Ap 6 0s
+0 1 WEST 1980 S 28 1u
+0 E WE%sT
+Z Europe/Stockholm 1:12:12 - LMT 1879
+1:0:14 - SET 1900
+1 - CET 1916 May 14 23
+1 1 CEST 1916 O 1 1
+1 - CET 1980
+1 E CE%sT
+R CH 1941 1942 - May M>=1 1 1 S
+R CH 1941 1942 - O M>=1 2 0 -
+Z Europe/Zurich 0:34:8 - LMT 1853 Jul 16
+0:29:46 - BMT 1894 Jun
+1 CH CE%sT 1981
+1 E CE%sT
+R T 1916 o - May 1 0 1 S
+R T 1916 o - O 1 0 0 -
+R T 1920 o - Mar 28 0 1 S
+R T 1920 o - O 25 0 0 -
+R T 1921 o - Ap 3 0 1 S
+R T 1921 o - O 3 0 0 -
+R T 1922 o - Mar 26 0 1 S
+R T 1922 o - O 8 0 0 -
+R T 1924 o - May 13 0 1 S
+R T 1924 1925 - O 1 0 0 -
+R T 1925 o - May 1 0 1 S
+R T 1940 o - Jul 1 0 1 S
+R T 1940 o - O 6 0 0 -
+R T 1940 o - D 1 0 1 S
+R T 1941 o - S 21 0 0 -
+R T 1942 o - Ap 1 0 1 S
+R T 1945 o - O 8 0 0 -
+R T 1946 o - Jun 1 0 1 S
+R T 1946 o - O 1 0 0 -
+R T 1947 1948 - Ap Su>=16 0 1 S
+R T 1947 1951 - O Su>=2 0 0 -
+R T 1949 o - Ap 10 0 1 S
+R T 1950 o - Ap 16 0 1 S
+R T 1951 o - Ap 22 0 1 S
+R T 1962 o - Jul 15 0 1 S
+R T 1963 o - O 30 0 0 -
+R T 1964 o - May 15 0 1 S
+R T 1964 o - O 1 0 0 -
+R T 1973 o - Jun 3 1 1 S
+R T 1973 1976 - O Su>=31 2 0 -
+R T 1974 o - Mar 31 2 1 S
+R T 1975 o - Mar 22 2 1 S
+R T 1976 o - Mar 21 2 1 S
+R T 1977 1978 - Ap Su>=1 2 1 S
+R T 1977 1978 - O Su>=15 2 0 -
+R T 1978 o - Jun 29 0 0 -
+R T 1983 o - Jul 31 2 1 S
+R T 1983 o - O 2 2 0 -
+R T 1985 o - Ap 20 1s 1 S
+R T 1985 o - S 28 1s 0 -
+R T 1986 1993 - Mar lastSu 1s 1 S
+R T 1986 1995 - S lastSu 1s 0 -
+R T 1994 o - Mar 20 1s 1 S
+R T 1995 2006 - Mar lastSu 1s 1 S
+R T 1996 2006 - O lastSu 1s 0 -
+Z Europe/Istanbul 1:55:52 - LMT 1880
+1:56:56 - IMT 1910 O
+2 T EE%sT 1978 Jun 29
+3 T +03/+04 1984 N 1 2
+2 T EE%sT 2007
+2 E EE%sT 2011 Mar 27 1u
+2 - EET 2011 Mar 28 1u
+2 E EE%sT 2014 Mar 30 1u
+2 - EET 2014 Mar 31 1u
+2 E EE%sT 2015 O 25 1u
+2 1 EEST 2015 N 8 1u
+2 E EE%sT 2016 S 7
+3 - +03
+L Europe/Istanbul Asia/Istanbul
+Z Europe/Kiev 2:2:4 - LMT 1880
+2:2:4 - KMT 1924 May 2
+2 - EET 1930 Jun 21
+3 - MSK 1941 S 20
+1 c CE%sT 1943 N 6
+3 R MSK/MSD 1990 Jul 1 2
+2 1 EEST 1991 S 29 3
+2 c EE%sT 1996 May 13
+2 E EE%sT
+Z Europe/Uzhgorod 1:29:12 - LMT 1890 O
+1 - CET 1940
+1 c CE%sT 1944 O
+1 1 CEST 1944 O 26
+1 - CET 1945 Jun 29
+3 R MSK/MSD 1990
+3 - MSK 1990 Jul 1 2
+1 - CET 1991 Mar 31 3
+2 - EET 1992 Mar 20
+2 c EE%sT 1996 May 13
+2 E EE%sT
+Z Europe/Zaporozhye 2:20:40 - LMT 1880
+2:20 - +0220 1924 May 2
+2 - EET 1930 Jun 21
+3 - MSK 1941 Au 25
+1 c CE%sT 1943 O 25
+3 R MSK/MSD 1991 Mar 31 2
+2 e EE%sT 1992 Mar 20
+2 c EE%sT 1996 May 13
+2 E EE%sT
+R u 1918 1919 - Mar lastSu 2 1 D
+R u 1918 1919 - O lastSu 2 0 S
+R u 1942 o - F 9 2 1 W
+R u 1945 o - Au 14 23u 1 P
+R u 1945 o - S 30 2 0 S
+R u 1967 2006 - O lastSu 2 0 S
+R u 1967 1973 - Ap lastSu 2 1 D
+R u 1974 o - Ja 6 2 1 D
+R u 1975 o - F lastSu 2 1 D
+R u 1976 1986 - Ap lastSu 2 1 D
+R u 1987 2006 - Ap Su>=1 2 1 D
+R u 2007 ma - Mar Su>=8 2 1 D
+R u 2007 ma - N Su>=1 2 0 S
+Z EST -5 - EST
+Z MST -7 - MST
+Z HST -10 - HST
+Z EST5EDT -5 u E%sT
+Z CST6CDT -6 u C%sT
+Z MST7MDT -7 u M%sT
+Z PST8PDT -8 u P%sT
+R NY 1920 o - Mar lastSu 2 1 D
+R NY 1920 o - O lastSu 2 0 S
+R NY 1921 1966 - Ap lastSu 2 1 D
+R NY 1921 1954 - S lastSu 2 0 S
+R NY 1955 1966 - O lastSu 2 0 S
+Z America/New_York -4:56:2 - LMT 1883 N 18 12:3:58
+-5 u E%sT 1920
+-5 NY E%sT 1942
+-5 u E%sT 1946
+-5 NY E%sT 1967
+-5 u E%sT
+R Ch 1920 o - Jun 13 2 1 D
+R Ch 1920 1921 - O lastSu 2 0 S
+R Ch 1921 o - Mar lastSu 2 1 D
+R Ch 1922 1966 - Ap lastSu 2 1 D
+R Ch 1922 1954 - S lastSu 2 0 S
+R Ch 1955 1966 - O lastSu 2 0 S
+Z America/Chicago -5:50:36 - LMT 1883 N 18 12:9:24
+-6 u C%sT 1920
+-6 Ch C%sT 1936 Mar 1 2
+-5 - EST 1936 N 15 2
+-6 Ch C%sT 1942
+-6 u C%sT 1946
+-6 Ch C%sT 1967
+-6 u C%sT
+Z America/North_Dakota/Center -6:45:12 - LMT 1883 N 18 12:14:48
+-7 u M%sT 1992 O 25 2
+-6 u C%sT
+Z America/North_Dakota/New_Salem -6:45:39 - LMT 1883 N 18 12:14:21
+-7 u M%sT 2003 O 26 2
+-6 u C%sT
+Z America/North_Dakota/Beulah -6:47:7 - LMT 1883 N 18 12:12:53
+-7 u M%sT 2010 N 7 2
+-6 u C%sT
+R De 1920 1921 - Mar lastSu 2 1 D
+R De 1920 o - O lastSu 2 0 S
+R De 1921 o - May 22 2 0 S
+R De 1965 1966 - Ap lastSu 2 1 D
+R De 1965 1966 - O lastSu 2 0 S
+Z America/Denver -6:59:56 - LMT 1883 N 18 12:0:4
+-7 u M%sT 1920
+-7 De M%sT 1942
+-7 u M%sT 1946
+-7 De M%sT 1967
+-7 u M%sT
+R CA 1948 o - Mar 14 2:1 1 D
+R CA 1949 o - Ja 1 2 0 S
+R CA 1950 1966 - Ap lastSu 1 1 D
+R CA 1950 1961 - S lastSu 2 0 S
+R CA 1962 1966 - O lastSu 2 0 S
+Z America/Los_Angeles -7:52:58 - LMT 1883 N 18 12:7:2
+-8 u P%sT 1946
+-8 CA P%sT 1967
+-8 u P%sT
+Z America/Juneau 15:2:19 - LMT 1867 O 19 15:33:32
+-8:57:41 - LMT 1900 Au 20 12
+-8 - PST 1942
+-8 u P%sT 1946
+-8 - PST 1969
+-8 u P%sT 1980 Ap 27 2
+-9 u Y%sT 1980 O 26 2
+-8 u P%sT 1983 O 30 2
+-9 u Y%sT 1983 N 30
+-9 u AK%sT
+Z America/Sitka 14:58:47 - LMT 1867 O 19 15:30
+-9:1:13 - LMT 1900 Au 20 12
+-8 - PST 1942
+-8 u P%sT 1946
+-8 - PST 1969
+-8 u P%sT 1983 O 30 2
+-9 u Y%sT 1983 N 30
+-9 u AK%sT
+Z America/Metlakatla 15:13:42 - LMT 1867 O 19 15:44:55
+-8:46:18 - LMT 1900 Au 20 12
+-8 - PST 1942
+-8 u P%sT 1946
+-8 - PST 1969
+-8 u P%sT 1983 O 30 2
+-8 - PST 2015 N 1 2
+-9 u AK%sT 2018 N 4 2
+-8 - PST 2019 Ja 20 2
+-9 u AK%sT
+Z America/Yakutat 14:41:5 - LMT 1867 O 19 15:12:18
+-9:18:55 - LMT 1900 Au 20 12
+-9 - YST 1942
+-9 u Y%sT 1946
+-9 - YST 1969
+-9 u Y%sT 1983 N 30
+-9 u AK%sT
+Z America/Anchorage 14:0:24 - LMT 1867 O 19 14:31:37
+-9:59:36 - LMT 1900 Au 20 12
+-10 - AST 1942
+-10 u A%sT 1967 Ap
+-10 - AHST 1969
+-10 u AH%sT 1983 O 30 2
+-9 u Y%sT 1983 N 30
+-9 u AK%sT
+Z America/Nome 12:58:22 - LMT 1867 O 19 13:29:35
+-11:1:38 - LMT 1900 Au 20 12
+-11 - NST 1942
+-11 u N%sT 1946
+-11 - NST 1967 Ap
+-11 - BST 1969
+-11 u B%sT 1983 O 30 2
+-9 u Y%sT 1983 N 30
+-9 u AK%sT
+Z America/Adak 12:13:22 - LMT 1867 O 19 12:44:35
+-11:46:38 - LMT 1900 Au 20 12
+-11 - NST 1942
+-11 u N%sT 1946
+-11 - NST 1967 Ap
+-11 - BST 1969
+-11 u B%sT 1983 O 30 2
+-10 u AH%sT 1983 N 30
+-10 u H%sT
+Z Pacific/Honolulu -10:31:26 - LMT 1896 Ja 13 12
+-10:30 - HST 1933 Ap 30 2
+-10:30 1 HDT 1933 May 21 12
+-10:30 u H%sT 1947 Jun 8 2
+-10 - HST
+Z America/Phoenix -7:28:18 - LMT 1883 N 18 11:31:42
+-7 u M%sT 1944 Ja 1 0:1
+-7 - MST 1944 Ap 1 0:1
+-7 u M%sT 1944 O 1 0:1
+-7 - MST 1967
+-7 u M%sT 1968 Mar 21
+-7 - MST
+L America/Phoenix America/Creston
+Z America/Boise -7:44:49 - LMT 1883 N 18 12:15:11
+-8 u P%sT 1923 May 13 2
+-7 u M%sT 1974
+-7 - MST 1974 F 3 2
+-7 u M%sT
+R In 1941 o - Jun 22 2 1 D
+R In 1941 1954 - S lastSu 2 0 S
+R In 1946 1954 - Ap lastSu 2 1 D
+Z America/Indiana/Indianapolis -5:44:38 - LMT 1883 N 18 12:15:22
+-6 u C%sT 1920
+-6 In C%sT 1942
+-6 u C%sT 1946
+-6 In C%sT 1955 Ap 24 2
+-5 - EST 1957 S 29 2
+-6 - CST 1958 Ap 27 2
+-5 - EST 1969
+-5 u E%sT 1971
+-5 - EST 2006
+-5 u E%sT
+R Ma 1951 o - Ap lastSu 2 1 D
+R Ma 1951 o - S lastSu 2 0 S
+R Ma 1954 1960 - Ap lastSu 2 1 D
+R Ma 1954 1960 - S lastSu 2 0 S
+Z America/Indiana/Marengo -5:45:23 - LMT 1883 N 18 12:14:37
+-6 u C%sT 1951
+-6 Ma C%sT 1961 Ap 30 2
+-5 - EST 1969
+-5 u E%sT 1974 Ja 6 2
+-6 1 CDT 1974 O 27 2
+-5 u E%sT 1976
+-5 - EST 2006
+-5 u E%sT
+R V 1946 o - Ap lastSu 2 1 D
+R V 1946 o - S lastSu 2 0 S
+R V 1953 1954 - Ap lastSu 2 1 D
+R V 1953 1959 - S lastSu 2 0 S
+R V 1955 o - May 1 0 1 D
+R V 1956 1963 - Ap lastSu 2 1 D
+R V 1960 o - O lastSu 2 0 S
+R V 1961 o - S lastSu 2 0 S
+R V 1962 1963 - O lastSu 2 0 S
+Z America/Indiana/Vincennes -5:50:7 - LMT 1883 N 18 12:9:53
+-6 u C%sT 1946
+-6 V C%sT 1964 Ap 26 2
+-5 - EST 1969
+-5 u E%sT 1971
+-5 - EST 2006 Ap 2 2
+-6 u C%sT 2007 N 4 2
+-5 u E%sT
+R Pe 1955 o - May 1 0 1 D
+R Pe 1955 1960 - S lastSu 2 0 S
+R Pe 1956 1963 - Ap lastSu 2 1 D
+R Pe 1961 1963 - O lastSu 2 0 S
+Z America/Indiana/Tell_City -5:47:3 - LMT 1883 N 18 12:12:57
+-6 u C%sT 1946
+-6 Pe C%sT 1964 Ap 26 2
+-5 - EST 1967 O 29 2
+-6 u C%sT 1969 Ap 27 2
+-5 u E%sT 1971
+-5 - EST 2006 Ap 2 2
+-6 u C%sT
+R Pi 1955 o - May 1 0 1 D
+R Pi 1955 1960 - S lastSu 2 0 S
+R Pi 1956 1964 - Ap lastSu 2 1 D
+R Pi 1961 1964 - O lastSu 2 0 S
+Z America/Indiana/Petersburg -5:49:7 - LMT 1883 N 18 12:10:53
+-6 u C%sT 1955
+-6 Pi C%sT 1965 Ap 25 2
+-5 - EST 1966 O 30 2
+-6 u C%sT 1977 O 30 2
+-5 - EST 2006 Ap 2 2
+-6 u C%sT 2007 N 4 2
+-5 u E%sT
+R St 1947 1961 - Ap lastSu 2 1 D
+R St 1947 1954 - S lastSu 2 0 S
+R St 1955 1956 - O lastSu 2 0 S
+R St 1957 1958 - S lastSu 2 0 S
+R St 1959 1961 - O lastSu 2 0 S
+Z America/Indiana/Knox -5:46:30 - LMT 1883 N 18 12:13:30
+-6 u C%sT 1947
+-6 St C%sT 1962 Ap 29 2
+-5 - EST 1963 O 27 2
+-6 u C%sT 1991 O 27 2
+-5 - EST 2006 Ap 2 2
+-6 u C%sT
+R Pu 1946 1960 - Ap lastSu 2 1 D
+R Pu 1946 1954 - S lastSu 2 0 S
+R Pu 1955 1956 - O lastSu 2 0 S
+R Pu 1957 1960 - S lastSu 2 0 S
+Z America/Indiana/Winamac -5:46:25 - LMT 1883 N 18 12:13:35
+-6 u C%sT 1946
+-6 Pu C%sT 1961 Ap 30 2
+-5 - EST 1969
+-5 u E%sT 1971
+-5 - EST 2006 Ap 2 2
+-6 u C%sT 2007 Mar 11 2
+-5 u E%sT
+Z America/Indiana/Vevay -5:40:16 - LMT 1883 N 18 12:19:44
+-6 u C%sT 1954 Ap 25 2
+-5 - EST 1969
+-5 u E%sT 1973
+-5 - EST 2006
+-5 u E%sT
+R v 1921 o - May 1 2 1 D
+R v 1921 o - S 1 2 0 S
+R v 1941 o - Ap lastSu 2 1 D
+R v 1941 o - S lastSu 2 0 S
+R v 1946 o - Ap lastSu 0:1 1 D
+R v 1946 o - Jun 2 2 0 S
+R v 1950 1961 - Ap lastSu 2 1 D
+R v 1950 1955 - S lastSu 2 0 S
+R v 1956 1961 - O lastSu 2 0 S
+Z America/Kentucky/Louisville -5:43:2 - LMT 1883 N 18 12:16:58
+-6 u C%sT 1921
+-6 v C%sT 1942
+-6 u C%sT 1946
+-6 v C%sT 1961 Jul 23 2
+-5 - EST 1968
+-5 u E%sT 1974 Ja 6 2
+-6 1 CDT 1974 O 27 2
+-5 u E%sT
+Z America/Kentucky/Monticello -5:39:24 - LMT 1883 N 18 12:20:36
+-6 u C%sT 1946
+-6 - CST 1968
+-6 u C%sT 2000 O 29 2
+-5 u E%sT
+R Dt 1948 o - Ap lastSu 2 1 D
+R Dt 1948 o - S lastSu 2 0 S
+Z America/Detroit -5:32:11 - LMT 1905
+-6 - CST 1915 May 15 2
+-5 - EST 1942
+-5 u E%sT 1946
+-5 Dt E%sT 1967 Jun 14 0:1
+-5 u E%sT 1969
+-5 - EST 1973
+-5 u E%sT 1975
+-5 - EST 1975 Ap 27 2
+-5 u E%sT
+R Me 1946 o - Ap lastSu 2 1 D
+R Me 1946 o - S lastSu 2 0 S
+R Me 1966 o - Ap lastSu 2 1 D
+R Me 1966 o - O lastSu 2 0 S
+Z America/Menominee -5:50:27 - LMT 1885 S 18 12
+-6 u C%sT 1946
+-6 Me C%sT 1969 Ap 27 2
+-5 - EST 1973 Ap 29 2
+-6 u C%sT
+R C 1918 o - Ap 14 2 1 D
+R C 1918 o - O 27 2 0 S
+R C 1942 o - F 9 2 1 W
+R C 1945 o - Au 14 23u 1 P
+R C 1945 o - S 30 2 0 S
+R C 1974 1986 - Ap lastSu 2 1 D
+R C 1974 2006 - O lastSu 2 0 S
+R C 1987 2006 - Ap Su>=1 2 1 D
+R C 2007 ma - Mar Su>=8 2 1 D
+R C 2007 ma - N Su>=1 2 0 S
+R j 1917 o - Ap 8 2 1 D
+R j 1917 o - S 17 2 0 S
+R j 1919 o - May 5 23 1 D
+R j 1919 o - Au 12 23 0 S
+R j 1920 1935 - May Su>=1 23 1 D
+R j 1920 1935 - O lastSu 23 0 S
+R j 1936 1941 - May M>=9 0 1 D
+R j 1936 1941 - O M>=2 0 0 S
+R j 1946 1950 - May Su>=8 2 1 D
+R j 1946 1950 - O Su>=2 2 0 S
+R j 1951 1986 - Ap lastSu 2 1 D
+R j 1951 1959 - S lastSu 2 0 S
+R j 1960 1986 - O lastSu 2 0 S
+R j 1987 o - Ap Su>=1 0:1 1 D
+R j 1987 2006 - O lastSu 0:1 0 S
+R j 1988 o - Ap Su>=1 0:1 2 DD
+R j 1989 2006 - Ap Su>=1 0:1 1 D
+R j 2007 2011 - Mar Su>=8 0:1 1 D
+R j 2007 2010 - N Su>=1 0:1 0 S
+Z America/St_Johns -3:30:52 - LMT 1884
+-3:30:52 j N%sT 1918
+-3:30:52 C N%sT 1919
+-3:30:52 j N%sT 1935 Mar 30
+-3:30 j N%sT 1942 May 11
+-3:30 C N%sT 1946
+-3:30 j N%sT 2011 N
+-3:30 C N%sT
+Z America/Goose_Bay -4:1:40 - LMT 1884
+-3:30:52 - NST 1918
+-3:30:52 C N%sT 1919
+-3:30:52 - NST 1935 Mar 30
+-3:30 - NST 1936
+-3:30 j N%sT 1942 May 11
+-3:30 C N%sT 1946
+-3:30 j N%sT 1966 Mar 15 2
+-4 j A%sT 2011 N
+-4 C A%sT
+R H 1916 o - Ap 1 0 1 D
+R H 1916 o - O 1 0 0 S
+R H 1920 o - May 9 0 1 D
+R H 1920 o - Au 29 0 0 S
+R H 1921 o - May 6 0 1 D
+R H 1921 1922 - S 5 0 0 S
+R H 1922 o - Ap 30 0 1 D
+R H 1923 1925 - May Su>=1 0 1 D
+R H 1923 o - S 4 0 0 S
+R H 1924 o - S 15 0 0 S
+R H 1925 o - S 28 0 0 S
+R H 1926 o - May 16 0 1 D
+R H 1926 o - S 13 0 0 S
+R H 1927 o - May 1 0 1 D
+R H 1927 o - S 26 0 0 S
+R H 1928 1931 - May Su>=8 0 1 D
+R H 1928 o - S 9 0 0 S
+R H 1929 o - S 3 0 0 S
+R H 1930 o - S 15 0 0 S
+R H 1931 1932 - S M>=24 0 0 S
+R H 1932 o - May 1 0 1 D
+R H 1933 o - Ap 30 0 1 D
+R H 1933 o - O 2 0 0 S
+R H 1934 o - May 20 0 1 D
+R H 1934 o - S 16 0 0 S
+R H 1935 o - Jun 2 0 1 D
+R H 1935 o - S 30 0 0 S
+R H 1936 o - Jun 1 0 1 D
+R H 1936 o - S 14 0 0 S
+R H 1937 1938 - May Su>=1 0 1 D
+R H 1937 1941 - S M>=24 0 0 S
+R H 1939 o - May 28 0 1 D
+R H 1940 1941 - May Su>=1 0 1 D
+R H 1946 1949 - Ap lastSu 2 1 D
+R H 1946 1949 - S lastSu 2 0 S
+R H 1951 1954 - Ap lastSu 2 1 D
+R H 1951 1954 - S lastSu 2 0 S
+R H 1956 1959 - Ap lastSu 2 1 D
+R H 1956 1959 - S lastSu 2 0 S
+R H 1962 1973 - Ap lastSu 2 1 D
+R H 1962 1973 - O lastSu 2 0 S
+Z America/Halifax -4:14:24 - LMT 1902 Jun 15
+-4 H A%sT 1918
+-4 C A%sT 1919
+-4 H A%sT 1942 F 9 2s
+-4 C A%sT 1946
+-4 H A%sT 1974
+-4 C A%sT
+Z America/Glace_Bay -3:59:48 - LMT 1902 Jun 15
+-4 C A%sT 1953
+-4 H A%sT 1954
+-4 - AST 1972
+-4 H A%sT 1974
+-4 C A%sT
+R o 1933 1935 - Jun Su>=8 1 1 D
+R o 1933 1935 - S Su>=8 1 0 S
+R o 1936 1938 - Jun Su>=1 1 1 D
+R o 1936 1938 - S Su>=1 1 0 S
+R o 1939 o - May 27 1 1 D
+R o 1939 1941 - S Sa>=21 1 0 S
+R o 1940 o - May 19 1 1 D
+R o 1941 o - May 4 1 1 D
+R o 1946 1972 - Ap lastSu 2 1 D
+R o 1946 1956 - S lastSu 2 0 S
+R o 1957 1972 - O lastSu 2 0 S
+R o 1993 2006 - Ap Su>=1 0:1 1 D
+R o 1993 2006 - O lastSu 0:1 0 S
+Z America/Moncton -4:19:8 - LMT 1883 D 9
+-5 - EST 1902 Jun 15
+-4 C A%sT 1933
+-4 o A%sT 1942
+-4 C A%sT 1946
+-4 o A%sT 1973
+-4 C A%sT 1993
+-4 o A%sT 2007
+-4 C A%sT
+R t 1919 o - Mar 30 23:30 1 D
+R t 1919 o - O 26 0 0 S
+R t 1920 o - May 2 2 1 D
+R t 1920 o - S 26 0 0 S
+R t 1921 o - May 15 2 1 D
+R t 1921 o - S 15 2 0 S
+R t 1922 1923 - May Su>=8 2 1 D
+R t 1922 1926 - S Su>=15 2 0 S
+R t 1924 1927 - May Su>=1 2 1 D
+R t 1927 1937 - S Su>=25 2 0 S
+R t 1928 1937 - Ap Su>=25 2 1 D
+R t 1938 1940 - Ap lastSu 2 1 D
+R t 1938 1939 - S lastSu 2 0 S
+R t 1945 1946 - S lastSu 2 0 S
+R t 1946 o - Ap lastSu 2 1 D
+R t 1947 1949 - Ap lastSu 0 1 D
+R t 1947 1948 - S lastSu 0 0 S
+R t 1949 o - N lastSu 0 0 S
+R t 1950 1973 - Ap lastSu 2 1 D
+R t 1950 o - N lastSu 2 0 S
+R t 1951 1956 - S lastSu 2 0 S
+R t 1957 1973 - O lastSu 2 0 S
+Z America/Toronto -5:17:32 - LMT 1895
+-5 C E%sT 1919
+-5 t E%sT 1942 F 9 2s
+-5 C E%sT 1946
+-5 t E%sT 1974
+-5 C E%sT
+L America/Toronto America/Nassau
+Z America/Thunder_Bay -5:57 - LMT 1895
+-6 - CST 1910
+-5 - EST 1942
+-5 C E%sT 1970
+-5 t E%sT 1973
+-5 - EST 1974
+-5 C E%sT
+Z America/Nipigon -5:53:4 - LMT 1895
+-5 C E%sT 1940 S 29
+-5 1 EDT 1942 F 9 2s
+-5 C E%sT
+Z America/Rainy_River -6:18:16 - LMT 1895
+-6 C C%sT 1940 S 29
+-6 1 CDT 1942 F 9 2s
+-6 C C%sT
+R W 1916 o - Ap 23 0 1 D
+R W 1916 o - S 17 0 0 S
+R W 1918 o - Ap 14 2 1 D
+R W 1918 o - O 27 2 0 S
+R W 1937 o - May 16 2 1 D
+R W 1937 o - S 26 2 0 S
+R W 1942 o - F 9 2 1 W
+R W 1945 o - Au 14 23u 1 P
+R W 1945 o - S lastSu 2 0 S
+R W 1946 o - May 12 2 1 D
+R W 1946 o - O 13 2 0 S
+R W 1947 1949 - Ap lastSu 2 1 D
+R W 1947 1949 - S lastSu 2 0 S
+R W 1950 o - May 1 2 1 D
+R W 1950 o - S 30 2 0 S
+R W 1951 1960 - Ap lastSu 2 1 D
+R W 1951 1958 - S lastSu 2 0 S
+R W 1959 o - O lastSu 2 0 S
+R W 1960 o - S lastSu 2 0 S
+R W 1963 o - Ap lastSu 2 1 D
+R W 1963 o - S 22 2 0 S
+R W 1966 1986 - Ap lastSu 2s 1 D
+R W 1966 2005 - O lastSu 2s 0 S
+R W 1987 2005 - Ap Su>=1 2s 1 D
+Z America/Winnipeg -6:28:36 - LMT 1887 Jul 16
+-6 W C%sT 2006
+-6 C C%sT
+R r 1918 o - Ap 14 2 1 D
+R r 1918 o - O 27 2 0 S
+R r 1930 1934 - May Su>=1 0 1 D
+R r 1930 1934 - O Su>=1 0 0 S
+R r 1937 1941 - Ap Su>=8 0 1 D
+R r 1937 o - O Su>=8 0 0 S
+R r 1938 o - O Su>=1 0 0 S
+R r 1939 1941 - O Su>=8 0 0 S
+R r 1942 o - F 9 2 1 W
+R r 1945 o - Au 14 23u 1 P
+R r 1945 o - S lastSu 2 0 S
+R r 1946 o - Ap Su>=8 2 1 D
+R r 1946 o - O Su>=8 2 0 S
+R r 1947 1957 - Ap lastSu 2 1 D
+R r 1947 1957 - S lastSu 2 0 S
+R r 1959 o - Ap lastSu 2 1 D
+R r 1959 o - O lastSu 2 0 S
+R Sw 1957 o - Ap lastSu 2 1 D
+R Sw 1957 o - O lastSu 2 0 S
+R Sw 1959 1961 - Ap lastSu 2 1 D
+R Sw 1959 o - O lastSu 2 0 S
+R Sw 1960 1961 - S lastSu 2 0 S
+Z America/Regina -6:58:36 - LMT 1905 S
+-7 r M%sT 1960 Ap lastSu 2
+-6 - CST
+Z America/Swift_Current -7:11:20 - LMT 1905 S
+-7 C M%sT 1946 Ap lastSu 2
+-7 r M%sT 1950
+-7 Sw M%sT 1972 Ap lastSu 2
+-6 - CST
+R Ed 1918 1919 - Ap Su>=8 2 1 D
+R Ed 1918 o - O 27 2 0 S
+R Ed 1919 o - May 27 2 0 S
+R Ed 1920 1923 - Ap lastSu 2 1 D
+R Ed 1920 o - O lastSu 2 0 S
+R Ed 1921 1923 - S lastSu 2 0 S
+R Ed 1942 o - F 9 2 1 W
+R Ed 1945 o - Au 14 23u 1 P
+R Ed 1945 o - S lastSu 2 0 S
+R Ed 1947 o - Ap lastSu 2 1 D
+R Ed 1947 o - S lastSu 2 0 S
+R Ed 1972 1986 - Ap lastSu 2 1 D
+R Ed 1972 2006 - O lastSu 2 0 S
+Z America/Edmonton -7:33:52 - LMT 1906 S
+-7 Ed M%sT 1987
+-7 C M%sT
+R Va 1918 o - Ap 14 2 1 D
+R Va 1918 o - O 27 2 0 S
+R Va 1942 o - F 9 2 1 W
+R Va 1945 o - Au 14 23u 1 P
+R Va 1945 o - S 30 2 0 S
+R Va 1946 1986 - Ap lastSu 2 1 D
+R Va 1946 o - S 29 2 0 S
+R Va 1947 1961 - S lastSu 2 0 S
+R Va 1962 2006 - O lastSu 2 0 S
+Z America/Vancouver -8:12:28 - LMT 1884
+-8 Va P%sT 1987
+-8 C P%sT
+Z America/Dawson_Creek -8:0:56 - LMT 1884
+-8 C P%sT 1947
+-8 Va P%sT 1972 Au 30 2
+-7 - MST
+Z America/Fort_Nelson -8:10:47 - LMT 1884
+-8 Va P%sT 1946
+-8 - PST 1947
+-8 Va P%sT 1987
+-8 C P%sT 2015 Mar 8 2
+-7 - MST
+R Y 1918 o - Ap 14 2 1 D
+R Y 1918 o - O 27 2 0 S
+R Y 1919 o - May 25 2 1 D
+R Y 1919 o - N 1 0 0 S
+R Y 1942 o - F 9 2 1 W
+R Y 1945 o - Au 14 23u 1 P
+R Y 1945 o - S 30 2 0 S
+R Y 1965 o - Ap lastSu 0 2 DD
+R Y 1965 o - O lastSu 2 0 S
+R Y 1980 1986 - Ap lastSu 2 1 D
+R Y 1980 2006 - O lastSu 2 0 S
+R Y 1987 2006 - Ap Su>=1 2 1 D
+Z America/Pangnirtung 0 - -00 1921
+-4 Y A%sT 1995 Ap Su>=1 2
+-5 C E%sT 1999 O 31 2
+-6 C C%sT 2000 O 29 2
+-5 C E%sT
+Z America/Iqaluit 0 - -00 1942 Au
+-5 Y E%sT 1999 O 31 2
+-6 C C%sT 2000 O 29 2
+-5 C E%sT
+Z America/Resolute 0 - -00 1947 Au 31
+-6 Y C%sT 2000 O 29 2
+-5 - EST 2001 Ap 1 3
+-6 C C%sT 2006 O 29 2
+-5 - EST 2007 Mar 11 3
+-6 C C%sT
+Z America/Rankin_Inlet 0 - -00 1957
+-6 Y C%sT 2000 O 29 2
+-5 - EST 2001 Ap 1 3
+-6 C C%sT
+Z America/Cambridge_Bay 0 - -00 1920
+-7 Y M%sT 1999 O 31 2
+-6 C C%sT 2000 O 29 2
+-5 - EST 2000 N 5
+-6 - CST 2001 Ap 1 3
+-7 C M%sT
+Z America/Yellowknife 0 - -00 1935
+-7 Y M%sT 1980
+-7 C M%sT
+Z America/Inuvik 0 - -00 1953
+-8 Y P%sT 1979 Ap lastSu 2
+-7 Y M%sT 1980
+-7 C M%sT
+Z America/Whitehorse -9:0:12 - LMT 1900 Au 20
+-9 Y Y%sT 1967 May 28
+-8 Y P%sT 1980
+-8 C P%sT 2020 N
+-7 - MST
+Z America/Dawson -9:17:40 - LMT 1900 Au 20
+-9 Y Y%sT 1973 O 28
+-8 Y P%sT 1980
+-8 C P%sT 2020 N
+-7 - MST
+R m 1939 o - F 5 0 1 D
+R m 1939 o - Jun 25 0 0 S
+R m 1940 o - D 9 0 1 D
+R m 1941 o - Ap 1 0 0 S
+R m 1943 o - D 16 0 1 W
+R m 1944 o - May 1 0 0 S
+R m 1950 o - F 12 0 1 D
+R m 1950 o - Jul 30 0 0 S
+R m 1996 2000 - Ap Su>=1 2 1 D
+R m 1996 2000 - O lastSu 2 0 S
+R m 2001 o - May Su>=1 2 1 D
+R m 2001 o - S lastSu 2 0 S
+R m 2002 ma - Ap Su>=1 2 1 D
+R m 2002 ma - O lastSu 2 0 S
+Z America/Cancun -5:47:4 - LMT 1922 Ja 1 0:12:56
+-6 - CST 1981 D 23
+-5 m E%sT 1998 Au 2 2
+-6 m C%sT 2015 F 1 2
+-5 - EST
+Z America/Merida -5:58:28 - LMT 1922 Ja 1 0:1:32
+-6 - CST 1981 D 23
+-5 - EST 1982 D 2
+-6 m C%sT
+Z America/Matamoros -6:40 - LMT 1921 D 31 23:20
+-6 - CST 1988
+-6 u C%sT 1989
+-6 m C%sT 2010
+-6 u C%sT
+Z America/Monterrey -6:41:16 - LMT 1921 D 31 23:18:44
+-6 - CST 1988
+-6 u C%sT 1989
+-6 m C%sT
+Z America/Mexico_City -6:36:36 - LMT 1922 Ja 1 0:23:24
+-7 - MST 1927 Jun 10 23
+-6 - CST 1930 N 15
+-7 - MST 1931 May 1 23
+-6 - CST 1931 O
+-7 - MST 1932 Ap
+-6 m C%sT 2001 S 30 2
+-6 - CST 2002 F 20
+-6 m C%sT
+Z America/Ojinaga -6:57:40 - LMT 1922 Ja 1 0:2:20
+-7 - MST 1927 Jun 10 23
+-6 - CST 1930 N 15
+-7 - MST 1931 May 1 23
+-6 - CST 1931 O
+-7 - MST 1932 Ap
+-6 - CST 1996
+-6 m C%sT 1998
+-6 - CST 1998 Ap Su>=1 3
+-7 m M%sT 2010
+-7 u M%sT
+Z America/Chihuahua -7:4:20 - LMT 1921 D 31 23:55:40
+-7 - MST 1927 Jun 10 23
+-6 - CST 1930 N 15
+-7 - MST 1931 May 1 23
+-6 - CST 1931 O
+-7 - MST 1932 Ap
+-6 - CST 1996
+-6 m C%sT 1998
+-6 - CST 1998 Ap Su>=1 3
+-7 m M%sT
+Z America/Hermosillo -7:23:52 - LMT 1921 D 31 23:36:8
+-7 - MST 1927 Jun 10 23
+-6 - CST 1930 N 15
+-7 - MST 1931 May 1 23
+-6 - CST 1931 O
+-7 - MST 1932 Ap
+-6 - CST 1942 Ap 24
+-7 - MST 1949 Ja 14
+-8 - PST 1970
+-7 m M%sT 1999
+-7 - MST
+Z America/Mazatlan -7:5:40 - LMT 1921 D 31 23:54:20
+-7 - MST 1927 Jun 10 23
+-6 - CST 1930 N 15
+-7 - MST 1931 May 1 23
+-6 - CST 1931 O
+-7 - MST 1932 Ap
+-6 - CST 1942 Ap 24
+-7 - MST 1949 Ja 14
+-8 - PST 1970
+-7 m M%sT
+Z America/Bahia_Banderas -7:1 - LMT 1921 D 31 23:59
+-7 - MST 1927 Jun 10 23
+-6 - CST 1930 N 15
+-7 - MST 1931 May 1 23
+-6 - CST 1931 O
+-7 - MST 1932 Ap
+-6 - CST 1942 Ap 24
+-7 - MST 1949 Ja 14
+-8 - PST 1970
+-7 m M%sT 2010 Ap 4 2
+-6 m C%sT
+Z America/Tijuana -7:48:4 - LMT 1922 Ja 1 0:11:56
+-7 - MST 1924
+-8 - PST 1927 Jun 10 23
+-7 - MST 1930 N 15
+-8 - PST 1931 Ap
+-8 1 PDT 1931 S 30
+-8 - PST 1942 Ap 24
+-8 1 PWT 1945 Au 14 23u
+-8 1 PPT 1945 N 12
+-8 - PST 1948 Ap 5
+-8 1 PDT 1949 Ja 14
+-8 - PST 1954
+-8 CA P%sT 1961
+-8 - PST 1976
+-8 u P%sT 1996
+-8 m P%sT 2001
+-8 u P%sT 2002 F 20
+-8 m P%sT 2010
+-8 u P%sT
+R BB 1942 o - Ap 19 5u 1 D
+R BB 1942 o - Au 31 6u 0 S
+R BB 1943 o - May 2 5u 1 D
+R BB 1943 o - S 5 6u 0 S
+R BB 1944 o - Ap 10 5u 0:30 -
+R BB 1944 o - S 10 6u 0 S
+R BB 1977 o - Jun 12 2 1 D
+R BB 1977 1978 - O Su>=1 2 0 S
+R BB 1978 1980 - Ap Su>=15 2 1 D
+R BB 1979 o - S 30 2 0 S
+R BB 1980 o - S 25 2 0 S
+Z America/Barbados -3:58:29 - LMT 1911 Au 28
+-4 BB A%sT 1944
+-4 BB AST/-0330 1945
+-4 BB A%sT
+R BZ 1918 1941 - O Sa>=1 24 0:30 -0530
+R BZ 1919 1942 - F Sa>=8 24 0 CST
+R BZ 1942 o - Jun 27 24 1 CWT
+R BZ 1945 o - Au 14 23u 1 CPT
+R BZ 1945 o - D 15 24 0 CST
+R BZ 1947 1967 - O Sa>=1 24 0:30 -0530
+R BZ 1948 1968 - F Sa>=8 24 0 CST
+R BZ 1973 o - D 5 0 1 CDT
+R BZ 1974 o - F 9 0 0 CST
+R BZ 1982 o - D 18 0 1 CDT
+R BZ 1983 o - F 12 0 0 CST
+Z America/Belize -5:52:48 - LMT 1912 Ap
+-6 BZ %s
+R Be 1917 o - Ap 5 24 1 -
+R Be 1917 o - S 30 24 0 -
+R Be 1918 o - Ap 13 24 1 -
+R Be 1918 o - S 15 24 0 S
+R Be 1942 o - Ja 11 2 1 D
+R Be 1942 o - O 18 2 0 S
+R Be 1943 o - Mar 21 2 1 D
+R Be 1943 o - O 31 2 0 S
+R Be 1944 1945 - Mar Su>=8 2 1 D
+R Be 1944 1945 - N Su>=1 2 0 S
+R Be 1947 o - May Su>=15 2 1 D
+R Be 1947 o - S Su>=8 2 0 S
+R Be 1948 1952 - May Su>=22 2 1 D
+R Be 1948 1952 - S Su>=1 2 0 S
+R Be 1956 o - May Su>=22 2 1 D
+R Be 1956 o - O lastSu 2 0 S
+Z Atlantic/Bermuda -4:19:18 - LMT 1890
+-4:19:18 Be BMT/BST 1930 Ja 1 2
+-4 Be A%sT 1974 Ap 28 2
+-4 C A%sT 1976
+-4 u A%sT
+R CR 1979 1980 - F lastSu 0 1 D
+R CR 1979 1980 - Jun Su>=1 0 0 S
+R CR 1991 1992 - Ja Sa>=15 0 1 D
+R CR 1991 o - Jul 1 0 0 S
+R CR 1992 o - Mar 15 0 0 S
+Z America/Costa_Rica -5:36:13 - LMT 1890
+-5:36:13 - SJMT 1921 Ja 15
+-6 CR C%sT
+R Q 1928 o - Jun 10 0 1 D
+R Q 1928 o - O 10 0 0 S
+R Q 1940 1942 - Jun Su>=1 0 1 D
+R Q 1940 1942 - S Su>=1 0 0 S
+R Q 1945 1946 - Jun Su>=1 0 1 D
+R Q 1945 1946 - S Su>=1 0 0 S
+R Q 1965 o - Jun 1 0 1 D
+R Q 1965 o - S 30 0 0 S
+R Q 1966 o - May 29 0 1 D
+R Q 1966 o - O 2 0 0 S
+R Q 1967 o - Ap 8 0 1 D
+R Q 1967 1968 - S Su>=8 0 0 S
+R Q 1968 o - Ap 14 0 1 D
+R Q 1969 1977 - Ap lastSu 0 1 D
+R Q 1969 1971 - O lastSu 0 0 S
+R Q 1972 1974 - O 8 0 0 S
+R Q 1975 1977 - O lastSu 0 0 S
+R Q 1978 o - May 7 0 1 D
+R Q 1978 1990 - O Su>=8 0 0 S
+R Q 1979 1980 - Mar Su>=15 0 1 D
+R Q 1981 1985 - May Su>=5 0 1 D
+R Q 1986 1989 - Mar Su>=14 0 1 D
+R Q 1990 1997 - Ap Su>=1 0 1 D
+R Q 1991 1995 - O Su>=8 0s 0 S
+R Q 1996 o - O 6 0s 0 S
+R Q 1997 o - O 12 0s 0 S
+R Q 1998 1999 - Mar lastSu 0s 1 D
+R Q 1998 2003 - O lastSu 0s 0 S
+R Q 2000 2003 - Ap Su>=1 0s 1 D
+R Q 2004 o - Mar lastSu 0s 1 D
+R Q 2006 2010 - O lastSu 0s 0 S
+R Q 2007 o - Mar Su>=8 0s 1 D
+R Q 2008 o - Mar Su>=15 0s 1 D
+R Q 2009 2010 - Mar Su>=8 0s 1 D
+R Q 2011 o - Mar Su>=15 0s 1 D
+R Q 2011 o - N 13 0s 0 S
+R Q 2012 o - Ap 1 0s 1 D
+R Q 2012 ma - N Su>=1 0s 0 S
+R Q 2013 ma - Mar Su>=8 0s 1 D
+Z America/Havana -5:29:28 - LMT 1890
+-5:29:36 - HMT 1925 Jul 19 12
+-5 Q C%sT
+R DO 1966 o - O 30 0 1 EDT
+R DO 1967 o - F 28 0 0 EST
+R DO 1969 1973 - O lastSu 0 0:30 -0430
+R DO 1970 o - F 21 0 0 EST
+R DO 1971 o - Ja 20 0 0 EST
+R DO 1972 1974 - Ja 21 0 0 EST
+Z America/Santo_Domingo -4:39:36 - LMT 1890
+-4:40 - SDMT 1933 Ap 1 12
+-5 DO %s 1974 O 27
+-4 - AST 2000 O 29 2
+-5 u E%sT 2000 D 3 1
+-4 - AST
+R SV 1987 1988 - May Su>=1 0 1 D
+R SV 1987 1988 - S lastSu 0 0 S
+Z America/El_Salvador -5:56:48 - LMT 1921
+-6 SV C%sT
+R GT 1973 o - N 25 0 1 D
+R GT 1974 o - F 24 0 0 S
+R GT 1983 o - May 21 0 1 D
+R GT 1983 o - S 22 0 0 S
+R GT 1991 o - Mar 23 0 1 D
+R GT 1991 o - S 7 0 0 S
+R GT 2006 o - Ap 30 0 1 D
+R GT 2006 o - O 1 0 0 S
+Z America/Guatemala -6:2:4 - LMT 1918 O 5
+-6 GT C%sT
+R HT 1983 o - May 8 0 1 D
+R HT 1984 1987 - Ap lastSu 0 1 D
+R HT 1983 1987 - O lastSu 0 0 S
+R HT 1988 1997 - Ap Su>=1 1s 1 D
+R HT 1988 1997 - O lastSu 1s 0 S
+R HT 2005 2006 - Ap Su>=1 0 1 D
+R HT 2005 2006 - O lastSu 0 0 S
+R HT 2012 2015 - Mar Su>=8 2 1 D
+R HT 2012 2015 - N Su>=1 2 0 S
+R HT 2017 ma - Mar Su>=8 2 1 D
+R HT 2017 ma - N Su>=1 2 0 S
+Z America/Port-au-Prince -4:49:20 - LMT 1890
+-4:49 - PPMT 1917 Ja 24 12
+-5 HT E%sT
+R HN 1987 1988 - May Su>=1 0 1 D
+R HN 1987 1988 - S lastSu 0 0 S
+R HN 2006 o - May Su>=1 0 1 D
+R HN 2006 o - Au M>=1 0 0 S
+Z America/Tegucigalpa -5:48:52 - LMT 1921 Ap
+-6 HN C%sT
+Z America/Jamaica -5:7:10 - LMT 1890
+-5:7:10 - KMT 1912 F
+-5 - EST 1974
+-5 u E%sT 1984
+-5 - EST
+Z America/Martinique -4:4:20 - LMT 1890
+-4:4:20 - FFMT 1911 May
+-4 - AST 1980 Ap 6
+-4 1 ADT 1980 S 28
+-4 - AST
+R NI 1979 1980 - Mar Su>=16 0 1 D
+R NI 1979 1980 - Jun M>=23 0 0 S
+R NI 2005 o - Ap 10 0 1 D
+R NI 2005 o - O Su>=1 0 0 S
+R NI 2006 o - Ap 30 2 1 D
+R NI 2006 o - O Su>=1 1 0 S
+Z America/Managua -5:45:8 - LMT 1890
+-5:45:12 - MMT 1934 Jun 23
+-6 - CST 1973 May
+-5 - EST 1975 F 16
+-6 NI C%sT 1992 Ja 1 4
+-5 - EST 1992 S 24
+-6 - CST 1993
+-5 - EST 1997
+-6 NI C%sT
+Z America/Panama -5:18:8 - LMT 1890
+-5:19:36 - CMT 1908 Ap 22
+-5 - EST
+L America/Panama America/Atikokan
+L America/Panama America/Cayman
+Z America/Puerto_Rico -4:24:25 - LMT 1899 Mar 28 12
+-4 - AST 1942 May 3
+-4 u A%sT 1946
+-4 - AST
+L America/Puerto_Rico America/Anguilla
+L America/Puerto_Rico America/Antigua
+L America/Puerto_Rico America/Aruba
+L America/Puerto_Rico America/Curacao
+L America/Puerto_Rico America/Blanc-Sablon
+L America/Puerto_Rico America/Dominica
+L America/Puerto_Rico America/Grenada
+L America/Puerto_Rico America/Guadeloupe
+L America/Puerto_Rico America/Kralendijk
+L America/Puerto_Rico America/Lower_Princes
+L America/Puerto_Rico America/Marigot
+L America/Puerto_Rico America/Montserrat
+L America/Puerto_Rico America/Port_of_Spain
+L America/Puerto_Rico America/St_Barthelemy
+L America/Puerto_Rico America/St_Kitts
+L America/Puerto_Rico America/St_Lucia
+L America/Puerto_Rico America/St_Thomas
+L America/Puerto_Rico America/St_Vincent
+L America/Puerto_Rico America/Tortola
+Z America/Miquelon -3:44:40 - LMT 1911 May 15
+-4 - AST 1980 May
+-3 - -03 1987
+-3 C -03/-02
+Z America/Grand_Turk -4:44:32 - LMT 1890
+-5:7:10 - KMT 1912 F
+-5 - EST 1979
+-5 u E%sT 2015 Mar 8 2
+-4 - AST 2018 Mar 11 3
+-5 u E%sT
+R A 1930 o - D 1 0 1 -
+R A 1931 o - Ap 1 0 0 -
+R A 1931 o - O 15 0 1 -
+R A 1932 1940 - Mar 1 0 0 -
+R A 1932 1939 - N 1 0 1 -
+R A 1940 o - Jul 1 0 1 -
+R A 1941 o - Jun 15 0 0 -
+R A 1941 o - O 15 0 1 -
+R A 1943 o - Au 1 0 0 -
+R A 1943 o - O 15 0 1 -
+R A 1946 o - Mar 1 0 0 -
+R A 1946 o - O 1 0 1 -
+R A 1963 o - O 1 0 0 -
+R A 1963 o - D 15 0 1 -
+R A 1964 1966 - Mar 1 0 0 -
+R A 1964 1966 - O 15 0 1 -
+R A 1967 o - Ap 2 0 0 -
+R A 1967 1968 - O Su>=1 0 1 -
+R A 1968 1969 - Ap Su>=1 0 0 -
+R A 1974 o - Ja 23 0 1 -
+R A 1974 o - May 1 0 0 -
+R A 1988 o - D 1 0 1 -
+R A 1989 1993 - Mar Su>=1 0 0 -
+R A 1989 1992 - O Su>=15 0 1 -
+R A 1999 o - O Su>=1 0 1 -
+R A 2000 o - Mar 3 0 0 -
+R A 2007 o - D 30 0 1 -
+R A 2008 2009 - Mar Su>=15 0 0 -
+R A 2008 o - O Su>=15 0 1 -
+Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31
+-4:16:48 - CMT 1920 May
+-4 - -04 1930 D
+-4 A -04/-03 1969 O 5
+-3 A -03/-02 1999 O 3
+-4 A -04/-03 2000 Mar 3
+-3 A -03/-02
+Z America/Argentina/Cordoba -4:16:48 - LMT 1894 O 31
+-4:16:48 - CMT 1920 May
+-4 - -04 1930 D
+-4 A -04/-03 1969 O 5
+-3 A -03/-02 1991 Mar 3
+-4 - -04 1991 O 20
+-3 A -03/-02 1999 O 3
+-4 A -04/-03 2000 Mar 3
+-3 A -03/-02
+Z America/Argentina/Salta -4:21:40 - LMT 1894 O 31
+-4:16:48 - CMT 1920 May
+-4 - -04 1930 D
+-4 A -04/-03 1969 O 5
+-3 A -03/-02 1991 Mar 3
+-4 - -04 1991 O 20
+-3 A -03/-02 1999 O 3
+-4 A -04/-03 2000 Mar 3
+-3 A -03/-02 2008 O 18
+-3 - -03
+Z America/Argentina/Tucuman -4:20:52 - LMT 1894 O 31
+-4:16:48 - CMT 1920 May
+-4 - -04 1930 D
+-4 A -04/-03 1969 O 5
+-3 A -03/-02 1991 Mar 3
+-4 - -04 1991 O 20
+-3 A -03/-02 1999 O 3
+-4 A -04/-03 2000 Mar 3
+-3 - -03 2004 Jun
+-4 - -04 2004 Jun 13
+-3 A -03/-02
+Z America/Argentina/La_Rioja -4:27:24 - LMT 1894 O 31
+-4:16:48 - CMT 1920 May
+-4 - -04 1930 D
+-4 A -04/-03 1969 O 5
+-3 A -03/-02 1991 Mar
+-4 - -04 1991 May 7
+-3 A -03/-02 1999 O 3
+-4 A -04/-03 2000 Mar 3
+-3 - -03 2004 Jun
+-4 - -04 2004 Jun 20
+-3 A -03/-02 2008 O 18
+-3 - -03
+Z America/Argentina/San_Juan -4:34:4 - LMT 1894 O 31
+-4:16:48 - CMT 1920 May
+-4 - -04 1930 D
+-4 A -04/-03 1969 O 5
+-3 A -03/-02 1991 Mar
+-4 - -04 1991 May 7
+-3 A -03/-02 1999 O 3
+-4 A -04/-03 2000 Mar 3
+-3 - -03 2004 May 31
+-4 - -04 2004 Jul 25
+-3 A -03/-02 2008 O 18
+-3 - -03
+Z America/Argentina/Jujuy -4:21:12 - LMT 1894 O 31
+-4:16:48 - CMT 1920 May
+-4 - -04 1930 D
+-4 A -04/-03 1969 O 5
+-3 A -03/-02 1990 Mar 4
+-4 - -04 1990 O 28
+-4 1 -03 1991 Mar 17
+-4 - -04 1991 O 6
+-3 1 -02 1992
+-3 A -03/-02 1999 O 3
+-4 A -04/-03 2000 Mar 3
+-3 A -03/-02 2008 O 18
+-3 - -03
+Z America/Argentina/Catamarca -4:23:8 - LMT 1894 O 31
+-4:16:48 - CMT 1920 May
+-4 - -04 1930 D
+-4 A -04/-03 1969 O 5
+-3 A -03/-02 1991 Mar 3
+-4 - -04 1991 O 20
+-3 A -03/-02 1999 O 3
+-4 A -04/-03 2000 Mar 3
+-3 - -03 2004 Jun
+-4 - -04 2004 Jun 20
+-3 A -03/-02 2008 O 18
+-3 - -03
+Z America/Argentina/Mendoza -4:35:16 - LMT 1894 O 31
+-4:16:48 - CMT 1920 May
+-4 - -04 1930 D
+-4 A -04/-03 1969 O 5
+-3 A -03/-02 1990 Mar 4
+-4 - -04 1990 O 15
+-4 1 -03 1991 Mar
+-4 - -04 1991 O 15
+-4 1 -03 1992 Mar
+-4 - -04 1992 O 18
+-3 A -03/-02 1999 O 3
+-4 A -04/-03 2000 Mar 3
+-3 - -03 2004 May 23
+-4 - -04 2004 S 26
+-3 A -03/-02 2008 O 18
+-3 - -03
+R Sa 2008 2009 - Mar Su>=8 0 0 -
+R Sa 2007 2008 - O Su>=8 0 1 -
+Z America/Argentina/San_Luis -4:25:24 - LMT 1894 O 31
+-4:16:48 - CMT 1920 May
+-4 - -04 1930 D
+-4 A -04/-03 1969 O 5
+-3 A -03/-02 1990
+-3 1 -02 1990 Mar 14
+-4 - -04 1990 O 15
+-4 1 -03 1991 Mar
+-4 - -04 1991 Jun
+-3 - -03 1999 O 3
+-4 1 -03 2000 Mar 3
+-3 - -03 2004 May 31
+-4 - -04 2004 Jul 25
+-3 A -03/-02 2008 Ja 21
+-4 Sa -04/-03 2009 O 11
+-3 - -03
+Z America/Argentina/Rio_Gallegos -4:36:52 - LMT 1894 O 31
+-4:16:48 - CMT 1920 May
+-4 - -04 1930 D
+-4 A -04/-03 1969 O 5
+-3 A -03/-02 1999 O 3
+-4 A -04/-03 2000 Mar 3
+-3 - -03 2004 Jun
+-4 - -04 2004 Jun 20
+-3 A -03/-02 2008 O 18
+-3 - -03
+Z America/Argentina/Ushuaia -4:33:12 - LMT 1894 O 31
+-4:16:48 - CMT 1920 May
+-4 - -04 1930 D
+-4 A -04/-03 1969 O 5
+-3 A -03/-02 1999 O 3
+-4 A -04/-03 2000 Mar 3
+-3 - -03 2004 May 30
+-4 - -04 2004 Jun 20
+-3 A -03/-02 2008 O 18
+-3 - -03
+Z America/La_Paz -4:32:36 - LMT 1890
+-4:32:36 - CMT 1931 O 15
+-4:32:36 1 BST 1932 Mar 21
+-4 - -04
+R B 1931 o - O 3 11 1 -
+R B 1932 1933 - Ap 1 0 0 -
+R B 1932 o - O 3 0 1 -
+R B 1949 1952 - D 1 0 1 -
+R B 1950 o - Ap 16 1 0 -
+R B 1951 1952 - Ap 1 0 0 -
+R B 1953 o - Mar 1 0 0 -
+R B 1963 o - D 9 0 1 -
+R B 1964 o - Mar 1 0 0 -
+R B 1965 o - Ja 31 0 1 -
+R B 1965 o - Mar 31 0 0 -
+R B 1965 o - D 1 0 1 -
+R B 1966 1968 - Mar 1 0 0 -
+R B 1966 1967 - N 1 0 1 -
+R B 1985 o - N 2 0 1 -
+R B 1986 o - Mar 15 0 0 -
+R B 1986 o - O 25 0 1 -
+R B 1987 o - F 14 0 0 -
+R B 1987 o - O 25 0 1 -
+R B 1988 o - F 7 0 0 -
+R B 1988 o - O 16 0 1 -
+R B 1989 o - Ja 29 0 0 -
+R B 1989 o - O 15 0 1 -
+R B 1990 o - F 11 0 0 -
+R B 1990 o - O 21 0 1 -
+R B 1991 o - F 17 0 0 -
+R B 1991 o - O 20 0 1 -
+R B 1992 o - F 9 0 0 -
+R B 1992 o - O 25 0 1 -
+R B 1993 o - Ja 31 0 0 -
+R B 1993 1995 - O Su>=11 0 1 -
+R B 1994 1995 - F Su>=15 0 0 -
+R B 1996 o - F 11 0 0 -
+R B 1996 o - O 6 0 1 -
+R B 1997 o - F 16 0 0 -
+R B 1997 o - O 6 0 1 -
+R B 1998 o - Mar 1 0 0 -
+R B 1998 o - O 11 0 1 -
+R B 1999 o - F 21 0 0 -
+R B 1999 o - O 3 0 1 -
+R B 2000 o - F 27 0 0 -
+R B 2000 2001 - O Su>=8 0 1 -
+R B 2001 2006 - F Su>=15 0 0 -
+R B 2002 o - N 3 0 1 -
+R B 2003 o - O 19 0 1 -
+R B 2004 o - N 2 0 1 -
+R B 2005 o - O 16 0 1 -
+R B 2006 o - N 5 0 1 -
+R B 2007 o - F 25 0 0 -
+R B 2007 o - O Su>=8 0 1 -
+R B 2008 2017 - O Su>=15 0 1 -
+R B 2008 2011 - F Su>=15 0 0 -
+R B 2012 o - F Su>=22 0 0 -
+R B 2013 2014 - F Su>=15 0 0 -
+R B 2015 o - F Su>=22 0 0 -
+R B 2016 2019 - F Su>=15 0 0 -
+R B 2018 o - N Su>=1 0 1 -
+Z America/Noronha -2:9:40 - LMT 1914
+-2 B -02/-01 1990 S 17
+-2 - -02 1999 S 30
+-2 B -02/-01 2000 O 15
+-2 - -02 2001 S 13
+-2 B -02/-01 2002 O
+-2 - -02
+Z America/Belem -3:13:56 - LMT 1914
+-3 B -03/-02 1988 S 12
+-3 - -03
+Z America/Santarem -3:38:48 - LMT 1914
+-4 B -04/-03 1988 S 12
+-4 - -04 2008 Jun 24
+-3 - -03
+Z America/Fortaleza -2:34 - LMT 1914
+-3 B -03/-02 1990 S 17
+-3 - -03 1999 S 30
+-3 B -03/-02 2000 O 22
+-3 - -03 2001 S 13
+-3 B -03/-02 2002 O
+-3 - -03
+Z America/Recife -2:19:36 - LMT 1914
+-3 B -03/-02 1990 S 17
+-3 - -03 1999 S 30
+-3 B -03/-02 2000 O 15
+-3 - -03 2001 S 13
+-3 B -03/-02 2002 O
+-3 - -03
+Z America/Araguaina -3:12:48 - LMT 1914
+-3 B -03/-02 1990 S 17
+-3 - -03 1995 S 14
+-3 B -03/-02 2003 S 24
+-3 - -03 2012 O 21
+-3 B -03/-02 2013 S
+-3 - -03
+Z America/Maceio -2:22:52 - LMT 1914
+-3 B -03/-02 1990 S 17
+-3 - -03 1995 O 13
+-3 B -03/-02 1996 S 4
+-3 - -03 1999 S 30
+-3 B -03/-02 2000 O 22
+-3 - -03 2001 S 13
+-3 B -03/-02 2002 O
+-3 - -03
+Z America/Bahia -2:34:4 - LMT 1914
+-3 B -03/-02 2003 S 24
+-3 - -03 2011 O 16
+-3 B -03/-02 2012 O 21
+-3 - -03
+Z America/Sao_Paulo -3:6:28 - LMT 1914
+-3 B -03/-02 1963 O 23
+-3 1 -02 1964
+-3 B -03/-02
+Z America/Campo_Grande -3:38:28 - LMT 1914
+-4 B -04/-03
+Z America/Cuiaba -3:44:20 - LMT 1914
+-4 B -04/-03 2003 S 24
+-4 - -04 2004 O
+-4 B -04/-03
+Z America/Porto_Velho -4:15:36 - LMT 1914
+-4 B -04/-03 1988 S 12
+-4 - -04
+Z America/Boa_Vista -4:2:40 - LMT 1914
+-4 B -04/-03 1988 S 12
+-4 - -04 1999 S 30
+-4 B -04/-03 2000 O 15
+-4 - -04
+Z America/Manaus -4:0:4 - LMT 1914
+-4 B -04/-03 1988 S 12
+-4 - -04 1993 S 28
+-4 B -04/-03 1994 S 22
+-4 - -04
+Z America/Eirunepe -4:39:28 - LMT 1914
+-5 B -05/-04 1988 S 12
+-5 - -05 1993 S 28
+-5 B -05/-04 1994 S 22
+-5 - -05 2008 Jun 24
+-4 - -04 2013 N 10
+-5 - -05
+Z America/Rio_Branco -4:31:12 - LMT 1914
+-5 B -05/-04 1988 S 12
+-5 - -05 2008 Jun 24
+-4 - -04 2013 N 10
+-5 - -05
+R x 1927 1931 - S 1 0 1 -
+R x 1928 1932 - Ap 1 0 0 -
+R x 1968 o - N 3 4u 1 -
+R x 1969 o - Mar 30 3u 0 -
+R x 1969 o - N 23 4u 1 -
+R x 1970 o - Mar 29 3u 0 -
+R x 1971 o - Mar 14 3u 0 -
+R x 1970 1972 - O Su>=9 4u 1 -
+R x 1972 1986 - Mar Su>=9 3u 0 -
+R x 1973 o - S 30 4u 1 -
+R x 1974 1987 - O Su>=9 4u 1 -
+R x 1987 o - Ap 12 3u 0 -
+R x 1988 1990 - Mar Su>=9 3u 0 -
+R x 1988 1989 - O Su>=9 4u 1 -
+R x 1990 o - S 16 4u 1 -
+R x 1991 1996 - Mar Su>=9 3u 0 -
+R x 1991 1997 - O Su>=9 4u 1 -
+R x 1997 o - Mar 30 3u 0 -
+R x 1998 o - Mar Su>=9 3u 0 -
+R x 1998 o - S 27 4u 1 -
+R x 1999 o - Ap 4 3u 0 -
+R x 1999 2010 - O Su>=9 4u 1 -
+R x 2000 2007 - Mar Su>=9 3u 0 -
+R x 2008 o - Mar 30 3u 0 -
+R x 2009 o - Mar Su>=9 3u 0 -
+R x 2010 o - Ap Su>=1 3u 0 -
+R x 2011 o - May Su>=2 3u 0 -
+R x 2011 o - Au Su>=16 4u 1 -
+R x 2012 2014 - Ap Su>=23 3u 0 -
+R x 2012 2014 - S Su>=2 4u 1 -
+R x 2016 2018 - May Su>=9 3u 0 -
+R x 2016 2018 - Au Su>=9 4u 1 -
+R x 2019 ma - Ap Su>=2 3u 0 -
+R x 2019 ma - S Su>=2 4u 1 -
+Z America/Santiago -4:42:45 - LMT 1890
+-4:42:45 - SMT 1910 Ja 10
+-5 - -05 1916 Jul
+-4:42:45 - SMT 1918 S 10
+-4 - -04 1919 Jul
+-4:42:45 - SMT 1927 S
+-5 x -05/-04 1932 S
+-4 - -04 1942 Jun
+-5 - -05 1942 Au
+-4 - -04 1946 Jul 15
+-4 1 -03 1946 S
+-4 - -04 1947 Ap
+-5 - -05 1947 May 21 23
+-4 x -04/-03
+Z America/Punta_Arenas -4:43:40 - LMT 1890
+-4:42:45 - SMT 1910 Ja 10
+-5 - -05 1916 Jul
+-4:42:45 - SMT 1918 S 10
+-4 - -04 1919 Jul
+-4:42:45 - SMT 1927 S
+-5 x -05/-04 1932 S
+-4 - -04 1942 Jun
+-5 - -05 1942 Au
+-4 - -04 1947 Ap
+-5 - -05 1947 May 21 23
+-4 x -04/-03 2016 D 4
+-3 - -03
+Z Pacific/Easter -7:17:28 - LMT 1890
+-7:17:28 - EMT 1932 S
+-7 x -07/-06 1982 Mar 14 3u
+-6 x -06/-05
+Z Antarctica/Palmer 0 - -00 1965
+-4 A -04/-03 1969 O 5
+-3 A -03/-02 1982 May
+-4 x -04/-03 2016 D 4
+-3 - -03
+R CO 1992 o - May 3 0 1 -
+R CO 1993 o - Ap 4 0 0 -
+Z America/Bogota -4:56:16 - LMT 1884 Mar 13
+-4:56:16 - BMT 1914 N 23
+-5 CO -05/-04
+R EC 1992 o - N 28 0 1 -
+R EC 1993 o - F 5 0 0 -
+Z America/Guayaquil -5:19:20 - LMT 1890
+-5:14 - QMT 1931
+-5 EC -05/-04
+Z Pacific/Galapagos -5:58:24 - LMT 1931
+-5 - -05 1986
+-6 EC -06/-05
+R FK 1937 1938 - S lastSu 0 1 -
+R FK 1938 1942 - Mar Su>=19 0 0 -
+R FK 1939 o - O 1 0 1 -
+R FK 1940 1942 - S lastSu 0 1 -
+R FK 1943 o - Ja 1 0 0 -
+R FK 1983 o - S lastSu 0 1 -
+R FK 1984 1985 - Ap lastSu 0 0 -
+R FK 1984 o - S 16 0 1 -
+R FK 1985 2000 - S Su>=9 0 1 -
+R FK 1986 2000 - Ap Su>=16 0 0 -
+R FK 2001 2010 - Ap Su>=15 2 0 -
+R FK 2001 2010 - S Su>=1 2 1 -
+Z Atlantic/Stanley -3:51:24 - LMT 1890
+-3:51:24 - SMT 1912 Mar 12
+-4 FK -04/-03 1983 May
+-3 FK -03/-02 1985 S 15
+-4 FK -04/-03 2010 S 5 2
+-3 - -03
+Z America/Cayenne -3:29:20 - LMT 1911 Jul
+-4 - -04 1967 O
+-3 - -03
+Z America/Guyana -3:52:39 - LMT 1911 Au
+-4 - -04 1915 Mar
+-3:45 - -0345 1975 Au
+-3 - -03 1992 Mar 29 1
+-4 - -04
+R y 1975 1988 - O 1 0 1 -
+R y 1975 1978 - Mar 1 0 0 -
+R y 1979 1991 - Ap 1 0 0 -
+R y 1989 o - O 22 0 1 -
+R y 1990 o - O 1 0 1 -
+R y 1991 o - O 6 0 1 -
+R y 1992 o - Mar 1 0 0 -
+R y 1992 o - O 5 0 1 -
+R y 1993 o - Mar 31 0 0 -
+R y 1993 1995 - O 1 0 1 -
+R y 1994 1995 - F lastSu 0 0 -
+R y 1996 o - Mar 1 0 0 -
+R y 1996 2001 - O Su>=1 0 1 -
+R y 1997 o - F lastSu 0 0 -
+R y 1998 2001 - Mar Su>=1 0 0 -
+R y 2002 2004 - Ap Su>=1 0 0 -
+R y 2002 2003 - S Su>=1 0 1 -
+R y 2004 2009 - O Su>=15 0 1 -
+R y 2005 2009 - Mar Su>=8 0 0 -
+R y 2010 ma - O Su>=1 0 1 -
+R y 2010 2012 - Ap Su>=8 0 0 -
+R y 2013 ma - Mar Su>=22 0 0 -
+Z America/Asuncion -3:50:40 - LMT 1890
+-3:50:40 - AMT 1931 O 10
+-4 - -04 1972 O
+-3 - -03 1974 Ap
+-4 y -04/-03
+R PE 1938 o - Ja 1 0 1 -
+R PE 1938 o - Ap 1 0 0 -
+R PE 1938 1939 - S lastSu 0 1 -
+R PE 1939 1940 - Mar Su>=24 0 0 -
+R PE 1986 1987 - Ja 1 0 1 -
+R PE 1986 1987 - Ap 1 0 0 -
+R PE 1990 o - Ja 1 0 1 -
+R PE 1990 o - Ap 1 0 0 -
+R PE 1994 o - Ja 1 0 1 -
+R PE 1994 o - Ap 1 0 0 -
+Z America/Lima -5:8:12 - LMT 1890
+-5:8:36 - LMT 1908 Jul 28
+-5 PE -05/-04
+Z Atlantic/South_Georgia -2:26:8 - LMT 1890
+-2 - -02
+Z America/Paramaribo -3:40:40 - LMT 1911
+-3:40:52 - PMT 1935
+-3:40:36 - PMT 1945 O
+-3:30 - -0330 1984 O
+-3 - -03
+R U 1923 1925 - O 1 0 0:30 -
+R U 1924 1926 - Ap 1 0 0 -
+R U 1933 1938 - O lastSu 0 0:30 -
+R U 1934 1941 - Mar lastSa 24 0 -
+R U 1939 o - O 1 0 0:30 -
+R U 1940 o - O 27 0 0:30 -
+R U 1941 o - Au 1 0 0:30 -
+R U 1942 o - D 14 0 0:30 -
+R U 1943 o - Mar 14 0 0 -
+R U 1959 o - May 24 0 0:30 -
+R U 1959 o - N 15 0 0 -
+R U 1960 o - Ja 17 0 1 -
+R U 1960 o - Mar 6 0 0 -
+R U 1965 o - Ap 4 0 1 -
+R U 1965 o - S 26 0 0 -
+R U 1968 o - May 27 0 0:30 -
+R U 1968 o - D 1 0 0 -
+R U 1970 o - Ap 25 0 1 -
+R U 1970 o - Jun 14 0 0 -
+R U 1972 o - Ap 23 0 1 -
+R U 1972 o - Jul 16 0 0 -
+R U 1974 o - Ja 13 0 1:30 -
+R U 1974 o - Mar 10 0 0:30 -
+R U 1974 o - S 1 0 0 -
+R U 1974 o - D 22 0 1 -
+R U 1975 o - Mar 30 0 0 -
+R U 1976 o - D 19 0 1 -
+R U 1977 o - Mar 6 0 0 -
+R U 1977 o - D 4 0 1 -
+R U 1978 1979 - Mar Su>=1 0 0 -
+R U 1978 o - D 17 0 1 -
+R U 1979 o - Ap 29 0 1 -
+R U 1980 o - Mar 16 0 0 -
+R U 1987 o - D 14 0 1 -
+R U 1988 o - F 28 0 0 -
+R U 1988 o - D 11 0 1 -
+R U 1989 o - Mar 5 0 0 -
+R U 1989 o - O 29 0 1 -
+R U 1990 o - F 25 0 0 -
+R U 1990 1991 - O Su>=21 0 1 -
+R U 1991 1992 - Mar Su>=1 0 0 -
+R U 1992 o - O 18 0 1 -
+R U 1993 o - F 28 0 0 -
+R U 2004 o - S 19 0 1 -
+R U 2005 o - Mar 27 2 0 -
+R U 2005 o - O 9 2 1 -
+R U 2006 2015 - Mar Su>=8 2 0 -
+R U 2006 2014 - O Su>=1 2 1 -
+Z America/Montevideo -3:44:51 - LMT 1908 Jun 10
+-3:44:51 - MMT 1920 May
+-4 - -04 1923 O
+-3:30 U -0330/-03 1942 D 14
+-3 U -03/-0230 1960
+-3 U -03/-02 1968
+-3 U -03/-0230 1970
+-3 U -03/-02 1974
+-3 U -03/-0130 1974 Mar 10
+-3 U -03/-0230 1974 D 22
+-3 U -03/-02
+Z America/Caracas -4:27:44 - LMT 1890
+-4:27:40 - CMT 1912 F 12
+-4:30 - -0430 1965
+-4 - -04 2007 D 9 3
+-4:30 - -0430 2016 May 1 2:30
+-4 - -04
+Z Etc/GMT 0 - GMT
+Z Etc/UTC 0 - UTC
+L Etc/GMT GMT
+L Etc/UTC Etc/Universal
+L Etc/UTC Etc/Zulu
+L Etc/GMT Etc/Greenwich
+L Etc/GMT Etc/GMT-0
+L Etc/GMT Etc/GMT+0
+L Etc/GMT Etc/GMT0
+Z Etc/GMT-14 14 - +14
+Z Etc/GMT-13 13 - +13
+Z Etc/GMT-12 12 - +12
+Z Etc/GMT-11 11 - +11
+Z Etc/GMT-10 10 - +10
+Z Etc/GMT-9 9 - +09
+Z Etc/GMT-8 8 - +08
+Z Etc/GMT-7 7 - +07
+Z Etc/GMT-6 6 - +06
+Z Etc/GMT-5 5 - +05
+Z Etc/GMT-4 4 - +04
+Z Etc/GMT-3 3 - +03
+Z Etc/GMT-2 2 - +02
+Z Etc/GMT-1 1 - +01
+Z Etc/GMT+1 -1 - -01
+Z Etc/GMT+2 -2 - -02
+Z Etc/GMT+3 -3 - -03
+Z Etc/GMT+4 -4 - -04
+Z Etc/GMT+5 -5 - -05
+Z Etc/GMT+6 -6 - -06
+Z Etc/GMT+7 -7 - -07
+Z Etc/GMT+8 -8 - -08
+Z Etc/GMT+9 -9 - -09
+Z Etc/GMT+10 -10 - -10
+Z Etc/GMT+11 -11 - -11
+Z Etc/GMT+12 -12 - -12
+Z Factory 0 - -00
+L Africa/Nairobi Africa/Asmera
+L Africa/Abidjan Africa/Timbuktu
+L America/Argentina/Catamarca America/Argentina/ComodRivadavia
+L America/Adak America/Atka
+L America/Argentina/Buenos_Aires America/Buenos_Aires
+L America/Argentina/Catamarca America/Catamarca
+L America/Panama America/Coral_Harbour
+L America/Argentina/Cordoba America/Cordoba
+L America/Tijuana America/Ensenada
+L America/Indiana/Indianapolis America/Fort_Wayne
+L America/Nuuk America/Godthab
+L America/Indiana/Indianapolis America/Indianapolis
+L America/Argentina/Jujuy America/Jujuy
+L America/Indiana/Knox America/Knox_IN
+L America/Kentucky/Louisville America/Louisville
+L America/Argentina/Mendoza America/Mendoza
+L America/Toronto America/Montreal
+L America/Rio_Branco America/Porto_Acre
+L America/Argentina/Cordoba America/Rosario
+L America/Tijuana America/Santa_Isabel
+L America/Denver America/Shiprock
+L America/Puerto_Rico America/Virgin
+L Pacific/Auckland Antarctica/South_Pole
+L Asia/Ashgabat Asia/Ashkhabad
+L Asia/Kolkata Asia/Calcutta
+L Asia/Shanghai Asia/Chongqing
+L Asia/Shanghai Asia/Chungking
+L Asia/Dhaka Asia/Dacca
+L Asia/Shanghai Asia/Harbin
+L Asia/Urumqi Asia/Kashgar
+L Asia/Kathmandu Asia/Katmandu
+L Asia/Macau Asia/Macao
+L Asia/Yangon Asia/Rangoon
+L Asia/Ho_Chi_Minh Asia/Saigon
+L Asia/Jerusalem Asia/Tel_Aviv
+L Asia/Thimphu Asia/Thimbu
+L Asia/Makassar Asia/Ujung_Pandang
+L Asia/Ulaanbaatar Asia/Ulan_Bator
+L Atlantic/Faroe Atlantic/Faeroe
+L Europe/Oslo Atlantic/Jan_Mayen
+L Australia/Sydney Australia/ACT
+L Australia/Sydney Australia/Canberra
+L Australia/Hobart Australia/Currie
+L Australia/Lord_Howe Australia/LHI
+L Australia/Sydney Australia/NSW
+L Australia/Darwin Australia/North
+L Australia/Brisbane Australia/Queensland
+L Australia/Adelaide Australia/South
+L Australia/Hobart Australia/Tasmania
+L Australia/Melbourne Australia/Victoria
+L Australia/Perth Australia/West
+L Australia/Broken_Hill Australia/Yancowinna
+L America/Rio_Branco Brazil/Acre
+L America/Noronha Brazil/DeNoronha
+L America/Sao_Paulo Brazil/East
+L America/Manaus Brazil/West
+L America/Halifax Canada/Atlantic
+L America/Winnipeg Canada/Central
+L America/Toronto Canada/Eastern
+L America/Edmonton Canada/Mountain
+L America/St_Johns Canada/Newfoundland
+L America/Vancouver Canada/Pacific
+L America/Regina Canada/Saskatchewan
+L America/Whitehorse Canada/Yukon
+L America/Santiago Chile/Continental
+L Pacific/Easter Chile/EasterIsland
+L America/Havana Cuba
+L Africa/Cairo Egypt
+L Europe/Dublin Eire
+L Etc/UTC Etc/UCT
+L Europe/London Europe/Belfast
+L Europe/Chisinau Europe/Tiraspol
+L Europe/London GB
+L Europe/London GB-Eire
+L Etc/GMT GMT+0
+L Etc/GMT GMT-0
+L Etc/GMT GMT0
+L Etc/GMT Greenwich
+L Asia/Hong_Kong Hongkong
+L Atlantic/Reykjavik Iceland
+L Asia/Tehran Iran
+L Asia/Jerusalem Israel
+L America/Jamaica Jamaica
+L Asia/Tokyo Japan
+L Pacific/Kwajalein Kwajalein
+L Africa/Tripoli Libya
+L America/Tijuana Mexico/BajaNorte
+L America/Mazatlan Mexico/BajaSur
+L America/Mexico_City Mexico/General
+L Pacific/Auckland NZ
+L Pacific/Chatham NZ-CHAT
+L America/Denver Navajo
+L Asia/Shanghai PRC
+L Pacific/Kanton Pacific/Enderbury
+L Pacific/Honolulu Pacific/Johnston
+L Pacific/Pohnpei Pacific/Ponape
+L Pacific/Pago_Pago Pacific/Samoa
+L Pacific/Chuuk Pacific/Truk
+L Pacific/Chuuk Pacific/Yap
+L Europe/Warsaw Poland
+L Europe/Lisbon Portugal
+L Asia/Taipei ROC
+L Asia/Seoul ROK
+L Asia/Singapore Singapore
+L Europe/Istanbul Turkey
+L Etc/UTC UCT
+L America/Anchorage US/Alaska
+L America/Adak US/Aleutian
+L America/Phoenix US/Arizona
+L America/Chicago US/Central
+L America/Indiana/Indianapolis US/East-Indiana
+L America/New_York US/Eastern
+L Pacific/Honolulu US/Hawaii
+L America/Indiana/Knox US/Indiana-Starke
+L America/Detroit US/Michigan
+L America/Denver US/Mountain
+L America/Los_Angeles US/Pacific
+L Pacific/Pago_Pago US/Samoa
+L Etc/UTC UTC
+L Etc/UTC Universal
+L Europe/Moscow W-SU
+L Etc/UTC Zulu
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/zone.tab b/.local/lib/python3.8/site-packages/pytz/zoneinfo/zone.tab
new file mode 100644
index 0000000..086458f
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz/zoneinfo/zone.tab
@@ -0,0 +1,454 @@
+# tzdb timezone descriptions (deprecated version)
+#
+# This file is in the public domain, so clarified as of
+# 2009-05-17 by Arthur David Olson.
+#
+# From Paul Eggert (2021-09-20):
+# This file is intended as a backward-compatibility aid for older programs.
+# New programs should use zone1970.tab.  This file is like zone1970.tab (see
+# zone1970.tab's comments), but with the following additional restrictions:
+#
+# 1.  This file contains only ASCII characters.
+# 2.  The first data column contains exactly one country code.
+#
+# Because of (2), each row stands for an area that is the intersection
+# of a region identified by a country code and of a timezone where civil
+# clocks have agreed since 1970; this is a narrower definition than
+# that of zone1970.tab.
+#
+# Unlike zone1970.tab, a row's third column can be a Link from
+# 'backward' instead of a Zone.
+#
+# This table is intended as an aid for users, to help them select timezones
+# appropriate for their practical needs.  It is not intended to take or
+# endorse any position on legal or territorial claims.
+#
+#country-
+#code	coordinates	TZ			comments
+AD	+4230+00131	Europe/Andorra
+AE	+2518+05518	Asia/Dubai
+AF	+3431+06912	Asia/Kabul
+AG	+1703-06148	America/Antigua
+AI	+1812-06304	America/Anguilla
+AL	+4120+01950	Europe/Tirane
+AM	+4011+04430	Asia/Yerevan
+AO	-0848+01314	Africa/Luanda
+AQ	-7750+16636	Antarctica/McMurdo	New Zealand time - McMurdo, South Pole
+AQ	-6617+11031	Antarctica/Casey	Casey
+AQ	-6835+07758	Antarctica/Davis	Davis
+AQ	-6640+14001	Antarctica/DumontDUrville	Dumont-d'Urville
+AQ	-6736+06253	Antarctica/Mawson	Mawson
+AQ	-6448-06406	Antarctica/Palmer	Palmer
+AQ	-6734-06808	Antarctica/Rothera	Rothera
+AQ	-690022+0393524	Antarctica/Syowa	Syowa
+AQ	-720041+0023206	Antarctica/Troll	Troll
+AQ	-7824+10654	Antarctica/Vostok	Vostok
+AR	-3436-05827	America/Argentina/Buenos_Aires	Buenos Aires (BA, CF)
+AR	-3124-06411	America/Argentina/Cordoba	Argentina (most areas: CB, CC, CN, ER, FM, MN, SE, SF)
+AR	-2447-06525	America/Argentina/Salta	Salta (SA, LP, NQ, RN)
+AR	-2411-06518	America/Argentina/Jujuy	Jujuy (JY)
+AR	-2649-06513	America/Argentina/Tucuman	Tucuman (TM)
+AR	-2828-06547	America/Argentina/Catamarca	Catamarca (CT); Chubut (CH)
+AR	-2926-06651	America/Argentina/La_Rioja	La Rioja (LR)
+AR	-3132-06831	America/Argentina/San_Juan	San Juan (SJ)
+AR	-3253-06849	America/Argentina/Mendoza	Mendoza (MZ)
+AR	-3319-06621	America/Argentina/San_Luis	San Luis (SL)
+AR	-5138-06913	America/Argentina/Rio_Gallegos	Santa Cruz (SC)
+AR	-5448-06818	America/Argentina/Ushuaia	Tierra del Fuego (TF)
+AS	-1416-17042	Pacific/Pago_Pago
+AT	+4813+01620	Europe/Vienna
+AU	-3133+15905	Australia/Lord_Howe	Lord Howe Island
+AU	-5430+15857	Antarctica/Macquarie	Macquarie Island
+AU	-4253+14719	Australia/Hobart	Tasmania
+AU	-3749+14458	Australia/Melbourne	Victoria
+AU	-3352+15113	Australia/Sydney	New South Wales (most areas)
+AU	-3157+14127	Australia/Broken_Hill	New South Wales (Yancowinna)
+AU	-2728+15302	Australia/Brisbane	Queensland (most areas)
+AU	-2016+14900	Australia/Lindeman	Queensland (Whitsunday Islands)
+AU	-3455+13835	Australia/Adelaide	South Australia
+AU	-1228+13050	Australia/Darwin	Northern Territory
+AU	-3157+11551	Australia/Perth	Western Australia (most areas)
+AU	-3143+12852	Australia/Eucla	Western Australia (Eucla)
+AW	+1230-06958	America/Aruba
+AX	+6006+01957	Europe/Mariehamn
+AZ	+4023+04951	Asia/Baku
+BA	+4352+01825	Europe/Sarajevo
+BB	+1306-05937	America/Barbados
+BD	+2343+09025	Asia/Dhaka
+BE	+5050+00420	Europe/Brussels
+BF	+1222-00131	Africa/Ouagadougou
+BG	+4241+02319	Europe/Sofia
+BH	+2623+05035	Asia/Bahrain
+BI	-0323+02922	Africa/Bujumbura
+BJ	+0629+00237	Africa/Porto-Novo
+BL	+1753-06251	America/St_Barthelemy
+BM	+3217-06446	Atlantic/Bermuda
+BN	+0456+11455	Asia/Brunei
+BO	-1630-06809	America/La_Paz
+BQ	+120903-0681636	America/Kralendijk
+BR	-0351-03225	America/Noronha	Atlantic islands
+BR	-0127-04829	America/Belem	Para (east); Amapa
+BR	-0343-03830	America/Fortaleza	Brazil (northeast: MA, PI, CE, RN, PB)
+BR	-0803-03454	America/Recife	Pernambuco
+BR	-0712-04812	America/Araguaina	Tocantins
+BR	-0940-03543	America/Maceio	Alagoas, Sergipe
+BR	-1259-03831	America/Bahia	Bahia
+BR	-2332-04637	America/Sao_Paulo	Brazil (southeast: GO, DF, MG, ES, RJ, SP, PR, SC, RS)
+BR	-2027-05437	America/Campo_Grande	Mato Grosso do Sul
+BR	-1535-05605	America/Cuiaba	Mato Grosso
+BR	-0226-05452	America/Santarem	Para (west)
+BR	-0846-06354	America/Porto_Velho	Rondonia
+BR	+0249-06040	America/Boa_Vista	Roraima
+BR	-0308-06001	America/Manaus	Amazonas (east)
+BR	-0640-06952	America/Eirunepe	Amazonas (west)
+BR	-0958-06748	America/Rio_Branco	Acre
+BS	+2505-07721	America/Nassau
+BT	+2728+08939	Asia/Thimphu
+BW	-2439+02555	Africa/Gaborone
+BY	+5354+02734	Europe/Minsk
+BZ	+1730-08812	America/Belize
+CA	+4734-05243	America/St_Johns	Newfoundland; Labrador (southeast)
+CA	+4439-06336	America/Halifax	Atlantic - NS (most areas); PE
+CA	+4612-05957	America/Glace_Bay	Atlantic - NS (Cape Breton)
+CA	+4606-06447	America/Moncton	Atlantic - New Brunswick
+CA	+5320-06025	America/Goose_Bay	Atlantic - Labrador (most areas)
+CA	+5125-05707	America/Blanc-Sablon	AST - QC (Lower North Shore)
+CA	+4339-07923	America/Toronto	Eastern - ON, QC (most areas)
+CA	+4901-08816	America/Nipigon	Eastern - ON, QC (no DST 1967-73)
+CA	+4823-08915	America/Thunder_Bay	Eastern - ON (Thunder Bay)
+CA	+6344-06828	America/Iqaluit	Eastern - NU (most east areas)
+CA	+6608-06544	America/Pangnirtung	Eastern - NU (Pangnirtung)
+CA	+484531-0913718	America/Atikokan	EST - ON (Atikokan); NU (Coral H)
+CA	+4953-09709	America/Winnipeg	Central - ON (west); Manitoba
+CA	+4843-09434	America/Rainy_River	Central - ON (Rainy R, Ft Frances)
+CA	+744144-0944945	America/Resolute	Central - NU (Resolute)
+CA	+624900-0920459	America/Rankin_Inlet	Central - NU (central)
+CA	+5024-10439	America/Regina	CST - SK (most areas)
+CA	+5017-10750	America/Swift_Current	CST - SK (midwest)
+CA	+5333-11328	America/Edmonton	Mountain - AB; BC (E); SK (W)
+CA	+690650-1050310	America/Cambridge_Bay	Mountain - NU (west)
+CA	+6227-11421	America/Yellowknife	Mountain - NT (central)
+CA	+682059-1334300	America/Inuvik	Mountain - NT (west)
+CA	+4906-11631	America/Creston	MST - BC (Creston)
+CA	+5946-12014	America/Dawson_Creek	MST - BC (Dawson Cr, Ft St John)
+CA	+5848-12242	America/Fort_Nelson	MST - BC (Ft Nelson)
+CA	+6043-13503	America/Whitehorse	MST - Yukon (east)
+CA	+6404-13925	America/Dawson	MST - Yukon (west)
+CA	+4916-12307	America/Vancouver	Pacific - BC (most areas)
+CC	-1210+09655	Indian/Cocos
+CD	-0418+01518	Africa/Kinshasa	Dem. Rep. of Congo (west)
+CD	-1140+02728	Africa/Lubumbashi	Dem. Rep. of Congo (east)
+CF	+0422+01835	Africa/Bangui
+CG	-0416+01517	Africa/Brazzaville
+CH	+4723+00832	Europe/Zurich
+CI	+0519-00402	Africa/Abidjan
+CK	-2114-15946	Pacific/Rarotonga
+CL	-3327-07040	America/Santiago	Chile (most areas)
+CL	-5309-07055	America/Punta_Arenas	Region of Magallanes
+CL	-2709-10926	Pacific/Easter	Easter Island
+CM	+0403+00942	Africa/Douala
+CN	+3114+12128	Asia/Shanghai	Beijing Time
+CN	+4348+08735	Asia/Urumqi	Xinjiang Time
+CO	+0436-07405	America/Bogota
+CR	+0956-08405	America/Costa_Rica
+CU	+2308-08222	America/Havana
+CV	+1455-02331	Atlantic/Cape_Verde
+CW	+1211-06900	America/Curacao
+CX	-1025+10543	Indian/Christmas
+CY	+3510+03322	Asia/Nicosia	Cyprus (most areas)
+CY	+3507+03357	Asia/Famagusta	Northern Cyprus
+CZ	+5005+01426	Europe/Prague
+DE	+5230+01322	Europe/Berlin	Germany (most areas)
+DE	+4742+00841	Europe/Busingen	Busingen
+DJ	+1136+04309	Africa/Djibouti
+DK	+5540+01235	Europe/Copenhagen
+DM	+1518-06124	America/Dominica
+DO	+1828-06954	America/Santo_Domingo
+DZ	+3647+00303	Africa/Algiers
+EC	-0210-07950	America/Guayaquil	Ecuador (mainland)
+EC	-0054-08936	Pacific/Galapagos	Galapagos Islands
+EE	+5925+02445	Europe/Tallinn
+EG	+3003+03115	Africa/Cairo
+EH	+2709-01312	Africa/El_Aaiun
+ER	+1520+03853	Africa/Asmara
+ES	+4024-00341	Europe/Madrid	Spain (mainland)
+ES	+3553-00519	Africa/Ceuta	Ceuta, Melilla
+ES	+2806-01524	Atlantic/Canary	Canary Islands
+ET	+0902+03842	Africa/Addis_Ababa
+FI	+6010+02458	Europe/Helsinki
+FJ	-1808+17825	Pacific/Fiji
+FK	-5142-05751	Atlantic/Stanley
+FM	+0725+15147	Pacific/Chuuk	Chuuk/Truk, Yap
+FM	+0658+15813	Pacific/Pohnpei	Pohnpei/Ponape
+FM	+0519+16259	Pacific/Kosrae	Kosrae
+FO	+6201-00646	Atlantic/Faroe
+FR	+4852+00220	Europe/Paris
+GA	+0023+00927	Africa/Libreville
+GB	+513030-0000731	Europe/London
+GD	+1203-06145	America/Grenada
+GE	+4143+04449	Asia/Tbilisi
+GF	+0456-05220	America/Cayenne
+GG	+492717-0023210	Europe/Guernsey
+GH	+0533-00013	Africa/Accra
+GI	+3608-00521	Europe/Gibraltar
+GL	+6411-05144	America/Nuuk	Greenland (most areas)
+GL	+7646-01840	America/Danmarkshavn	National Park (east coast)
+GL	+7029-02158	America/Scoresbysund	Scoresbysund/Ittoqqortoormiit
+GL	+7634-06847	America/Thule	Thule/Pituffik
+GM	+1328-01639	Africa/Banjul
+GN	+0931-01343	Africa/Conakry
+GP	+1614-06132	America/Guadeloupe
+GQ	+0345+00847	Africa/Malabo
+GR	+3758+02343	Europe/Athens
+GS	-5416-03632	Atlantic/South_Georgia
+GT	+1438-09031	America/Guatemala
+GU	+1328+14445	Pacific/Guam
+GW	+1151-01535	Africa/Bissau
+GY	+0648-05810	America/Guyana
+HK	+2217+11409	Asia/Hong_Kong
+HN	+1406-08713	America/Tegucigalpa
+HR	+4548+01558	Europe/Zagreb
+HT	+1832-07220	America/Port-au-Prince
+HU	+4730+01905	Europe/Budapest
+ID	-0610+10648	Asia/Jakarta	Java, Sumatra
+ID	-0002+10920	Asia/Pontianak	Borneo (west, central)
+ID	-0507+11924	Asia/Makassar	Borneo (east, south); Sulawesi/Celebes, Bali, Nusa Tengarra; Timor (west)
+ID	-0232+14042	Asia/Jayapura	New Guinea (West Papua / Irian Jaya); Malukus/Moluccas
+IE	+5320-00615	Europe/Dublin
+IL	+314650+0351326	Asia/Jerusalem
+IM	+5409-00428	Europe/Isle_of_Man
+IN	+2232+08822	Asia/Kolkata
+IO	-0720+07225	Indian/Chagos
+IQ	+3321+04425	Asia/Baghdad
+IR	+3540+05126	Asia/Tehran
+IS	+6409-02151	Atlantic/Reykjavik
+IT	+4154+01229	Europe/Rome
+JE	+491101-0020624	Europe/Jersey
+JM	+175805-0764736	America/Jamaica
+JO	+3157+03556	Asia/Amman
+JP	+353916+1394441	Asia/Tokyo
+KE	-0117+03649	Africa/Nairobi
+KG	+4254+07436	Asia/Bishkek
+KH	+1133+10455	Asia/Phnom_Penh
+KI	+0125+17300	Pacific/Tarawa	Gilbert Islands
+KI	-0247-17143	Pacific/Kanton	Phoenix Islands
+KI	+0152-15720	Pacific/Kiritimati	Line Islands
+KM	-1141+04316	Indian/Comoro
+KN	+1718-06243	America/St_Kitts
+KP	+3901+12545	Asia/Pyongyang
+KR	+3733+12658	Asia/Seoul
+KW	+2920+04759	Asia/Kuwait
+KY	+1918-08123	America/Cayman
+KZ	+4315+07657	Asia/Almaty	Kazakhstan (most areas)
+KZ	+4448+06528	Asia/Qyzylorda	Qyzylorda/Kyzylorda/Kzyl-Orda
+KZ	+5312+06337	Asia/Qostanay	Qostanay/Kostanay/Kustanay
+KZ	+5017+05710	Asia/Aqtobe	Aqtobe/Aktobe
+KZ	+4431+05016	Asia/Aqtau	Mangghystau/Mankistau
+KZ	+4707+05156	Asia/Atyrau	Atyrau/Atirau/Gur'yev
+KZ	+5113+05121	Asia/Oral	West Kazakhstan
+LA	+1758+10236	Asia/Vientiane
+LB	+3353+03530	Asia/Beirut
+LC	+1401-06100	America/St_Lucia
+LI	+4709+00931	Europe/Vaduz
+LK	+0656+07951	Asia/Colombo
+LR	+0618-01047	Africa/Monrovia
+LS	-2928+02730	Africa/Maseru
+LT	+5441+02519	Europe/Vilnius
+LU	+4936+00609	Europe/Luxembourg
+LV	+5657+02406	Europe/Riga
+LY	+3254+01311	Africa/Tripoli
+MA	+3339-00735	Africa/Casablanca
+MC	+4342+00723	Europe/Monaco
+MD	+4700+02850	Europe/Chisinau
+ME	+4226+01916	Europe/Podgorica
+MF	+1804-06305	America/Marigot
+MG	-1855+04731	Indian/Antananarivo
+MH	+0709+17112	Pacific/Majuro	Marshall Islands (most areas)
+MH	+0905+16720	Pacific/Kwajalein	Kwajalein
+MK	+4159+02126	Europe/Skopje
+ML	+1239-00800	Africa/Bamako
+MM	+1647+09610	Asia/Yangon
+MN	+4755+10653	Asia/Ulaanbaatar	Mongolia (most areas)
+MN	+4801+09139	Asia/Hovd	Bayan-Olgiy, Govi-Altai, Hovd, Uvs, Zavkhan
+MN	+4804+11430	Asia/Choibalsan	Dornod, Sukhbaatar
+MO	+221150+1133230	Asia/Macau
+MP	+1512+14545	Pacific/Saipan
+MQ	+1436-06105	America/Martinique
+MR	+1806-01557	Africa/Nouakchott
+MS	+1643-06213	America/Montserrat
+MT	+3554+01431	Europe/Malta
+MU	-2010+05730	Indian/Mauritius
+MV	+0410+07330	Indian/Maldives
+MW	-1547+03500	Africa/Blantyre
+MX	+1924-09909	America/Mexico_City	Central Time
+MX	+2105-08646	America/Cancun	Eastern Standard Time - Quintana Roo
+MX	+2058-08937	America/Merida	Central Time - Campeche, Yucatan
+MX	+2540-10019	America/Monterrey	Central Time - Durango; Coahuila, Nuevo Leon, Tamaulipas (most areas)
+MX	+2550-09730	America/Matamoros	Central Time US - Coahuila, Nuevo Leon, Tamaulipas (US border)
+MX	+2313-10625	America/Mazatlan	Mountain Time - Baja California Sur, Nayarit, Sinaloa
+MX	+2838-10605	America/Chihuahua	Mountain Time - Chihuahua (most areas)
+MX	+2934-10425	America/Ojinaga	Mountain Time US - Chihuahua (US border)
+MX	+2904-11058	America/Hermosillo	Mountain Standard Time - Sonora
+MX	+3232-11701	America/Tijuana	Pacific Time US - Baja California
+MX	+2048-10515	America/Bahia_Banderas	Central Time - Bahia de Banderas
+MY	+0310+10142	Asia/Kuala_Lumpur	Malaysia (peninsula)
+MY	+0133+11020	Asia/Kuching	Sabah, Sarawak
+MZ	-2558+03235	Africa/Maputo
+NA	-2234+01706	Africa/Windhoek
+NC	-2216+16627	Pacific/Noumea
+NE	+1331+00207	Africa/Niamey
+NF	-2903+16758	Pacific/Norfolk
+NG	+0627+00324	Africa/Lagos
+NI	+1209-08617	America/Managua
+NL	+5222+00454	Europe/Amsterdam
+NO	+5955+01045	Europe/Oslo
+NP	+2743+08519	Asia/Kathmandu
+NR	-0031+16655	Pacific/Nauru
+NU	-1901-16955	Pacific/Niue
+NZ	-3652+17446	Pacific/Auckland	New Zealand (most areas)
+NZ	-4357-17633	Pacific/Chatham	Chatham Islands
+OM	+2336+05835	Asia/Muscat
+PA	+0858-07932	America/Panama
+PE	-1203-07703	America/Lima
+PF	-1732-14934	Pacific/Tahiti	Society Islands
+PF	-0900-13930	Pacific/Marquesas	Marquesas Islands
+PF	-2308-13457	Pacific/Gambier	Gambier Islands
+PG	-0930+14710	Pacific/Port_Moresby	Papua New Guinea (most areas)
+PG	-0613+15534	Pacific/Bougainville	Bougainville
+PH	+1435+12100	Asia/Manila
+PK	+2452+06703	Asia/Karachi
+PL	+5215+02100	Europe/Warsaw
+PM	+4703-05620	America/Miquelon
+PN	-2504-13005	Pacific/Pitcairn
+PR	+182806-0660622	America/Puerto_Rico
+PS	+3130+03428	Asia/Gaza	Gaza Strip
+PS	+313200+0350542	Asia/Hebron	West Bank
+PT	+3843-00908	Europe/Lisbon	Portugal (mainland)
+PT	+3238-01654	Atlantic/Madeira	Madeira Islands
+PT	+3744-02540	Atlantic/Azores	Azores
+PW	+0720+13429	Pacific/Palau
+PY	-2516-05740	America/Asuncion
+QA	+2517+05132	Asia/Qatar
+RE	-2052+05528	Indian/Reunion
+RO	+4426+02606	Europe/Bucharest
+RS	+4450+02030	Europe/Belgrade
+RU	+5443+02030	Europe/Kaliningrad	MSK-01 - Kaliningrad
+RU	+554521+0373704	Europe/Moscow	MSK+00 - Moscow area
+# The obsolescent zone.tab format cannot represent Europe/Simferopol well.
+# Put it in RU section and list as UA.  See "territorial claims" above.
+# Programs should use zone1970.tab instead; see above.
+UA	+4457+03406	Europe/Simferopol	Crimea
+RU	+5836+04939	Europe/Kirov	MSK+00 - Kirov
+RU	+4844+04425	Europe/Volgograd	MSK+00 - Volgograd
+RU	+4621+04803	Europe/Astrakhan	MSK+01 - Astrakhan
+RU	+5134+04602	Europe/Saratov	MSK+01 - Saratov
+RU	+5420+04824	Europe/Ulyanovsk	MSK+01 - Ulyanovsk
+RU	+5312+05009	Europe/Samara	MSK+01 - Samara, Udmurtia
+RU	+5651+06036	Asia/Yekaterinburg	MSK+02 - Urals
+RU	+5500+07324	Asia/Omsk	MSK+03 - Omsk
+RU	+5502+08255	Asia/Novosibirsk	MSK+04 - Novosibirsk
+RU	+5322+08345	Asia/Barnaul	MSK+04 - Altai
+RU	+5630+08458	Asia/Tomsk	MSK+04 - Tomsk
+RU	+5345+08707	Asia/Novokuznetsk	MSK+04 - Kemerovo
+RU	+5601+09250	Asia/Krasnoyarsk	MSK+04 - Krasnoyarsk area
+RU	+5216+10420	Asia/Irkutsk	MSK+05 - Irkutsk, Buryatia
+RU	+5203+11328	Asia/Chita	MSK+06 - Zabaykalsky
+RU	+6200+12940	Asia/Yakutsk	MSK+06 - Lena River
+RU	+623923+1353314	Asia/Khandyga	MSK+06 - Tomponsky, Ust-Maysky
+RU	+4310+13156	Asia/Vladivostok	MSK+07 - Amur River
+RU	+643337+1431336	Asia/Ust-Nera	MSK+07 - Oymyakonsky
+RU	+5934+15048	Asia/Magadan	MSK+08 - Magadan
+RU	+4658+14242	Asia/Sakhalin	MSK+08 - Sakhalin Island
+RU	+6728+15343	Asia/Srednekolymsk	MSK+08 - Sakha (E); North Kuril Is
+RU	+5301+15839	Asia/Kamchatka	MSK+09 - Kamchatka
+RU	+6445+17729	Asia/Anadyr	MSK+09 - Bering Sea
+RW	-0157+03004	Africa/Kigali
+SA	+2438+04643	Asia/Riyadh
+SB	-0932+16012	Pacific/Guadalcanal
+SC	-0440+05528	Indian/Mahe
+SD	+1536+03232	Africa/Khartoum
+SE	+5920+01803	Europe/Stockholm
+SG	+0117+10351	Asia/Singapore
+SH	-1555-00542	Atlantic/St_Helena
+SI	+4603+01431	Europe/Ljubljana
+SJ	+7800+01600	Arctic/Longyearbyen
+SK	+4809+01707	Europe/Bratislava
+SL	+0830-01315	Africa/Freetown
+SM	+4355+01228	Europe/San_Marino
+SN	+1440-01726	Africa/Dakar
+SO	+0204+04522	Africa/Mogadishu
+SR	+0550-05510	America/Paramaribo
+SS	+0451+03137	Africa/Juba
+ST	+0020+00644	Africa/Sao_Tome
+SV	+1342-08912	America/El_Salvador
+SX	+180305-0630250	America/Lower_Princes
+SY	+3330+03618	Asia/Damascus
+SZ	-2618+03106	Africa/Mbabane
+TC	+2128-07108	America/Grand_Turk
+TD	+1207+01503	Africa/Ndjamena
+TF	-492110+0701303	Indian/Kerguelen
+TG	+0608+00113	Africa/Lome
+TH	+1345+10031	Asia/Bangkok
+TJ	+3835+06848	Asia/Dushanbe
+TK	-0922-17114	Pacific/Fakaofo
+TL	-0833+12535	Asia/Dili
+TM	+3757+05823	Asia/Ashgabat
+TN	+3648+01011	Africa/Tunis
+TO	-210800-1751200	Pacific/Tongatapu
+TR	+4101+02858	Europe/Istanbul
+TT	+1039-06131	America/Port_of_Spain
+TV	-0831+17913	Pacific/Funafuti
+TW	+2503+12130	Asia/Taipei
+TZ	-0648+03917	Africa/Dar_es_Salaam
+UA	+5026+03031	Europe/Kiev	Ukraine (most areas)
+UA	+4837+02218	Europe/Uzhgorod	Transcarpathia
+UA	+4750+03510	Europe/Zaporozhye	Zaporozhye and east Lugansk
+UG	+0019+03225	Africa/Kampala
+UM	+2813-17722	Pacific/Midway	Midway Islands
+UM	+1917+16637	Pacific/Wake	Wake Island
+US	+404251-0740023	America/New_York	Eastern (most areas)
+US	+421953-0830245	America/Detroit	Eastern - MI (most areas)
+US	+381515-0854534	America/Kentucky/Louisville	Eastern - KY (Louisville area)
+US	+364947-0845057	America/Kentucky/Monticello	Eastern - KY (Wayne)
+US	+394606-0860929	America/Indiana/Indianapolis	Eastern - IN (most areas)
+US	+384038-0873143	America/Indiana/Vincennes	Eastern - IN (Da, Du, K, Mn)
+US	+410305-0863611	America/Indiana/Winamac	Eastern - IN (Pulaski)
+US	+382232-0862041	America/Indiana/Marengo	Eastern - IN (Crawford)
+US	+382931-0871643	America/Indiana/Petersburg	Eastern - IN (Pike)
+US	+384452-0850402	America/Indiana/Vevay	Eastern - IN (Switzerland)
+US	+415100-0873900	America/Chicago	Central (most areas)
+US	+375711-0864541	America/Indiana/Tell_City	Central - IN (Perry)
+US	+411745-0863730	America/Indiana/Knox	Central - IN (Starke)
+US	+450628-0873651	America/Menominee	Central - MI (Wisconsin border)
+US	+470659-1011757	America/North_Dakota/Center	Central - ND (Oliver)
+US	+465042-1012439	America/North_Dakota/New_Salem	Central - ND (Morton rural)
+US	+471551-1014640	America/North_Dakota/Beulah	Central - ND (Mercer)
+US	+394421-1045903	America/Denver	Mountain (most areas)
+US	+433649-1161209	America/Boise	Mountain - ID (south); OR (east)
+US	+332654-1120424	America/Phoenix	MST - Arizona (except Navajo)
+US	+340308-1181434	America/Los_Angeles	Pacific
+US	+611305-1495401	America/Anchorage	Alaska (most areas)
+US	+581807-1342511	America/Juneau	Alaska - Juneau area
+US	+571035-1351807	America/Sitka	Alaska - Sitka area
+US	+550737-1313435	America/Metlakatla	Alaska - Annette Island
+US	+593249-1394338	America/Yakutat	Alaska - Yakutat
+US	+643004-1652423	America/Nome	Alaska (west)
+US	+515248-1763929	America/Adak	Aleutian Islands
+US	+211825-1575130	Pacific/Honolulu	Hawaii
+UY	-345433-0561245	America/Montevideo
+UZ	+3940+06648	Asia/Samarkand	Uzbekistan (west)
+UZ	+4120+06918	Asia/Tashkent	Uzbekistan (east)
+VA	+415408+0122711	Europe/Vatican
+VC	+1309-06114	America/St_Vincent
+VE	+1030-06656	America/Caracas
+VG	+1827-06437	America/Tortola
+VI	+1821-06456	America/St_Thomas
+VN	+1045+10640	Asia/Ho_Chi_Minh
+VU	-1740+16825	Pacific/Efate
+WF	-1318-17610	Pacific/Wallis
+WS	-1350-17144	Pacific/Apia
+YE	+1245+04512	Asia/Aden
+YT	-1247+04514	Indian/Mayotte
+ZA	-2615+02800	Africa/Johannesburg
+ZM	-1525+02817	Africa/Lusaka
+ZW	-1750+03103	Africa/Harare
diff --git a/.local/lib/python3.8/site-packages/pytz/zoneinfo/zone1970.tab b/.local/lib/python3.8/site-packages/pytz/zoneinfo/zone1970.tab
new file mode 100644
index 0000000..c614be8
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/pytz/zoneinfo/zone1970.tab
@@ -0,0 +1,374 @@
+# tzdb timezone descriptions
+#
+# This file is in the public domain.
+#
+# From Paul Eggert (2018-06-27):
+# This file contains a table where each row stands for a timezone where
+# civil timestamps have agreed since 1970.  Columns are separated by
+# a single tab.  Lines beginning with '#' are comments.  All text uses
+# UTF-8 encoding.  The columns of the table are as follows:
+#
+# 1.  The countries that overlap the timezone, as a comma-separated list
+#     of ISO 3166 2-character country codes.  See the file 'iso3166.tab'.
+# 2.  Latitude and longitude of the timezone's principal location
+#     in ISO 6709 sign-degrees-minutes-seconds format,
+#     either ±DDMM±DDDMM or ±DDMMSS±DDDMMSS,
+#     first latitude (+ is north), then longitude (+ is east).
+# 3.  Timezone name used in value of TZ environment variable.
+#     Please see the theory.html file for how these names are chosen.
+#     If multiple timezones overlap a country, each has a row in the
+#     table, with each column 1 containing the country code.
+# 4.  Comments; present if and only if a country has multiple timezones.
+#
+# If a timezone covers multiple countries, the most-populous city is used,
+# and that country is listed first in column 1; any other countries
+# are listed alphabetically by country code.  The table is sorted
+# first by country code, then (if possible) by an order within the
+# country that (1) makes some geographical sense, and (2) puts the
+# most populous timezones first, where that does not contradict (1).
+#
+# This table is intended as an aid for users, to help them select timezones
+# appropriate for their practical needs.  It is not intended to take or
+# endorse any position on legal or territorial claims.
+#
+#country-
+#codes	coordinates	TZ	comments
+AD	+4230+00131	Europe/Andorra
+AE,OM	+2518+05518	Asia/Dubai
+AF	+3431+06912	Asia/Kabul
+AL	+4120+01950	Europe/Tirane
+AM	+4011+04430	Asia/Yerevan
+AQ	-6617+11031	Antarctica/Casey	Casey
+AQ	-6835+07758	Antarctica/Davis	Davis
+AQ	-6736+06253	Antarctica/Mawson	Mawson
+AQ	-6448-06406	Antarctica/Palmer	Palmer
+AQ	-6734-06808	Antarctica/Rothera	Rothera
+AQ	-720041+0023206	Antarctica/Troll	Troll
+AQ	-7824+10654	Antarctica/Vostok	Vostok
+AR	-3436-05827	America/Argentina/Buenos_Aires	Buenos Aires (BA, CF)
+AR	-3124-06411	America/Argentina/Cordoba	Argentina (most areas: CB, CC, CN, ER, FM, MN, SE, SF)
+AR	-2447-06525	America/Argentina/Salta	Salta (SA, LP, NQ, RN)
+AR	-2411-06518	America/Argentina/Jujuy	Jujuy (JY)
+AR	-2649-06513	America/Argentina/Tucuman	Tucumán (TM)
+AR	-2828-06547	America/Argentina/Catamarca	Catamarca (CT); Chubut (CH)
+AR	-2926-06651	America/Argentina/La_Rioja	La Rioja (LR)
+AR	-3132-06831	America/Argentina/San_Juan	San Juan (SJ)
+AR	-3253-06849	America/Argentina/Mendoza	Mendoza (MZ)
+AR	-3319-06621	America/Argentina/San_Luis	San Luis (SL)
+AR	-5138-06913	America/Argentina/Rio_Gallegos	Santa Cruz (SC)
+AR	-5448-06818	America/Argentina/Ushuaia	Tierra del Fuego (TF)
+AS,UM	-1416-17042	Pacific/Pago_Pago	Samoa, Midway
+AT	+4813+01620	Europe/Vienna
+AU	-3133+15905	Australia/Lord_Howe	Lord Howe Island
+AU	-5430+15857	Antarctica/Macquarie	Macquarie Island
+AU	-4253+14719	Australia/Hobart	Tasmania
+AU	-3749+14458	Australia/Melbourne	Victoria
+AU	-3352+15113	Australia/Sydney	New South Wales (most areas)
+AU	-3157+14127	Australia/Broken_Hill	New South Wales (Yancowinna)
+AU	-2728+15302	Australia/Brisbane	Queensland (most areas)
+AU	-2016+14900	Australia/Lindeman	Queensland (Whitsunday Islands)
+AU	-3455+13835	Australia/Adelaide	South Australia
+AU	-1228+13050	Australia/Darwin	Northern Territory
+AU	-3157+11551	Australia/Perth	Western Australia (most areas)
+AU	-3143+12852	Australia/Eucla	Western Australia (Eucla)
+AZ	+4023+04951	Asia/Baku
+BB	+1306-05937	America/Barbados
+BD	+2343+09025	Asia/Dhaka
+BE	+5050+00420	Europe/Brussels
+BG	+4241+02319	Europe/Sofia
+BM	+3217-06446	Atlantic/Bermuda
+BN	+0456+11455	Asia/Brunei
+BO	-1630-06809	America/La_Paz
+BR	-0351-03225	America/Noronha	Atlantic islands
+BR	-0127-04829	America/Belem	Pará (east); Amapá
+BR	-0343-03830	America/Fortaleza	Brazil (northeast: MA, PI, CE, RN, PB)
+BR	-0803-03454	America/Recife	Pernambuco
+BR	-0712-04812	America/Araguaina	Tocantins
+BR	-0940-03543	America/Maceio	Alagoas, Sergipe
+BR	-1259-03831	America/Bahia	Bahia
+BR	-2332-04637	America/Sao_Paulo	Brazil (southeast: GO, DF, MG, ES, RJ, SP, PR, SC, RS)
+BR	-2027-05437	America/Campo_Grande	Mato Grosso do Sul
+BR	-1535-05605	America/Cuiaba	Mato Grosso
+BR	-0226-05452	America/Santarem	Pará (west)
+BR	-0846-06354	America/Porto_Velho	Rondônia
+BR	+0249-06040	America/Boa_Vista	Roraima
+BR	-0308-06001	America/Manaus	Amazonas (east)
+BR	-0640-06952	America/Eirunepe	Amazonas (west)
+BR	-0958-06748	America/Rio_Branco	Acre
+BT	+2728+08939	Asia/Thimphu
+BY	+5354+02734	Europe/Minsk
+BZ	+1730-08812	America/Belize
+CA	+4734-05243	America/St_Johns	Newfoundland; Labrador (southeast)
+CA	+4439-06336	America/Halifax	Atlantic - NS (most areas); PE
+CA	+4612-05957	America/Glace_Bay	Atlantic - NS (Cape Breton)
+CA	+4606-06447	America/Moncton	Atlantic - New Brunswick
+CA	+5320-06025	America/Goose_Bay	Atlantic - Labrador (most areas)
+CA,BS	+4339-07923	America/Toronto	Eastern - ON, QC (most areas), Bahamas
+CA	+4901-08816	America/Nipigon	Eastern - ON, QC (no DST 1967-73)
+CA	+4823-08915	America/Thunder_Bay	Eastern - ON (Thunder Bay)
+CA	+6344-06828	America/Iqaluit	Eastern - NU (most east areas)
+CA	+6608-06544	America/Pangnirtung	Eastern - NU (Pangnirtung)
+CA	+4953-09709	America/Winnipeg	Central - ON (west); Manitoba
+CA	+4843-09434	America/Rainy_River	Central - ON (Rainy R, Ft Frances)
+CA	+744144-0944945	America/Resolute	Central - NU (Resolute)
+CA	+624900-0920459	America/Rankin_Inlet	Central - NU (central)
+CA	+5024-10439	America/Regina	CST - SK (most areas)
+CA	+5017-10750	America/Swift_Current	CST - SK (midwest)
+CA	+5333-11328	America/Edmonton	Mountain - AB; BC (E); SK (W)
+CA	+690650-1050310	America/Cambridge_Bay	Mountain - NU (west)
+CA	+6227-11421	America/Yellowknife	Mountain - NT (central)
+CA	+682059-1334300	America/Inuvik	Mountain - NT (west)
+CA	+5946-12014	America/Dawson_Creek	MST - BC (Dawson Cr, Ft St John)
+CA	+5848-12242	America/Fort_Nelson	MST - BC (Ft Nelson)
+CA	+6043-13503	America/Whitehorse	MST - Yukon (east)
+CA	+6404-13925	America/Dawson	MST - Yukon (west)
+CA	+4916-12307	America/Vancouver	Pacific - BC (most areas)
+CC	-1210+09655	Indian/Cocos
+CH,DE,LI	+4723+00832	Europe/Zurich	Swiss time
+CI,BF,GH,GM,GN,ML,MR,SH,SL,SN,TG	+0519-00402	Africa/Abidjan
+CK	-2114-15946	Pacific/Rarotonga
+CL	-3327-07040	America/Santiago	Chile (most areas)
+CL	-5309-07055	America/Punta_Arenas	Region of Magallanes
+CL	-2709-10926	Pacific/Easter	Easter Island
+CN	+3114+12128	Asia/Shanghai	Beijing Time
+CN	+4348+08735	Asia/Urumqi	Xinjiang Time
+CO	+0436-07405	America/Bogota
+CR	+0956-08405	America/Costa_Rica
+CU	+2308-08222	America/Havana
+CV	+1455-02331	Atlantic/Cape_Verde
+CX	-1025+10543	Indian/Christmas
+CY	+3510+03322	Asia/Nicosia	Cyprus (most areas)
+CY	+3507+03357	Asia/Famagusta	Northern Cyprus
+CZ,SK	+5005+01426	Europe/Prague
+DE	+5230+01322	Europe/Berlin	Germany (most areas)
+DK	+5540+01235	Europe/Copenhagen
+DO	+1828-06954	America/Santo_Domingo
+DZ	+3647+00303	Africa/Algiers
+EC	-0210-07950	America/Guayaquil	Ecuador (mainland)
+EC	-0054-08936	Pacific/Galapagos	Galápagos Islands
+EE	+5925+02445	Europe/Tallinn
+EG	+3003+03115	Africa/Cairo
+EH	+2709-01312	Africa/El_Aaiun
+ES	+4024-00341	Europe/Madrid	Spain (mainland)
+ES	+3553-00519	Africa/Ceuta	Ceuta, Melilla
+ES	+2806-01524	Atlantic/Canary	Canary Islands
+FI,AX	+6010+02458	Europe/Helsinki
+FJ	-1808+17825	Pacific/Fiji
+FK	-5142-05751	Atlantic/Stanley
+FM	+0725+15147	Pacific/Chuuk	Chuuk/Truk, Yap
+FM	+0658+15813	Pacific/Pohnpei	Pohnpei/Ponape
+FM	+0519+16259	Pacific/Kosrae	Kosrae
+FO	+6201-00646	Atlantic/Faroe
+FR	+4852+00220	Europe/Paris
+GB,GG,IM,JE	+513030-0000731	Europe/London
+GE	+4143+04449	Asia/Tbilisi
+GF	+0456-05220	America/Cayenne
+GI	+3608-00521	Europe/Gibraltar
+GL	+6411-05144	America/Nuuk	Greenland (most areas)
+GL	+7646-01840	America/Danmarkshavn	National Park (east coast)
+GL	+7029-02158	America/Scoresbysund	Scoresbysund/Ittoqqortoormiit
+GL	+7634-06847	America/Thule	Thule/Pituffik
+GR	+3758+02343	Europe/Athens
+GS	-5416-03632	Atlantic/South_Georgia
+GT	+1438-09031	America/Guatemala
+GU,MP	+1328+14445	Pacific/Guam
+GW	+1151-01535	Africa/Bissau
+GY	+0648-05810	America/Guyana
+HK	+2217+11409	Asia/Hong_Kong
+HN	+1406-08713	America/Tegucigalpa
+HT	+1832-07220	America/Port-au-Prince
+HU	+4730+01905	Europe/Budapest
+ID	-0610+10648	Asia/Jakarta	Java, Sumatra
+ID	-0002+10920	Asia/Pontianak	Borneo (west, central)
+ID	-0507+11924	Asia/Makassar	Borneo (east, south); Sulawesi/Celebes, Bali, Nusa Tengarra; Timor (west)
+ID	-0232+14042	Asia/Jayapura	New Guinea (West Papua / Irian Jaya); Malukus/Moluccas
+IE	+5320-00615	Europe/Dublin
+IL	+314650+0351326	Asia/Jerusalem
+IN	+2232+08822	Asia/Kolkata
+IO	-0720+07225	Indian/Chagos
+IQ	+3321+04425	Asia/Baghdad
+IR	+3540+05126	Asia/Tehran
+IS	+6409-02151	Atlantic/Reykjavik
+IT,SM,VA	+4154+01229	Europe/Rome
+JM	+175805-0764736	America/Jamaica
+JO	+3157+03556	Asia/Amman
+JP	+353916+1394441	Asia/Tokyo
+KE,DJ,ER,ET,KM,MG,SO,TZ,UG,YT	-0117+03649	Africa/Nairobi
+KG	+4254+07436	Asia/Bishkek
+KI	+0125+17300	Pacific/Tarawa	Gilbert Islands
+KI	-0247-17143	Pacific/Kanton	Phoenix Islands
+KI	+0152-15720	Pacific/Kiritimati	Line Islands
+KP	+3901+12545	Asia/Pyongyang
+KR	+3733+12658	Asia/Seoul
+KZ	+4315+07657	Asia/Almaty	Kazakhstan (most areas)
+KZ	+4448+06528	Asia/Qyzylorda	Qyzylorda/Kyzylorda/Kzyl-Orda
+KZ	+5312+06337	Asia/Qostanay	Qostanay/Kostanay/Kustanay
+KZ	+5017+05710	Asia/Aqtobe	Aqtöbe/Aktobe
+KZ	+4431+05016	Asia/Aqtau	Mangghystaū/Mankistau
+KZ	+4707+05156	Asia/Atyrau	Atyraū/Atirau/Gur'yev
+KZ	+5113+05121	Asia/Oral	West Kazakhstan
+LB	+3353+03530	Asia/Beirut
+LK	+0656+07951	Asia/Colombo
+LR	+0618-01047	Africa/Monrovia
+LT	+5441+02519	Europe/Vilnius
+LU	+4936+00609	Europe/Luxembourg
+LV	+5657+02406	Europe/Riga
+LY	+3254+01311	Africa/Tripoli
+MA	+3339-00735	Africa/Casablanca
+MC	+4342+00723	Europe/Monaco
+MD	+4700+02850	Europe/Chisinau
+MH	+0709+17112	Pacific/Majuro	Marshall Islands (most areas)
+MH	+0905+16720	Pacific/Kwajalein	Kwajalein
+MM	+1647+09610	Asia/Yangon
+MN	+4755+10653	Asia/Ulaanbaatar	Mongolia (most areas)
+MN	+4801+09139	Asia/Hovd	Bayan-Ölgii, Govi-Altai, Hovd, Uvs, Zavkhan
+MN	+4804+11430	Asia/Choibalsan	Dornod, Sükhbaatar
+MO	+221150+1133230	Asia/Macau
+MQ	+1436-06105	America/Martinique
+MT	+3554+01431	Europe/Malta
+MU	-2010+05730	Indian/Mauritius
+MV	+0410+07330	Indian/Maldives
+MX	+1924-09909	America/Mexico_City	Central Time
+MX	+2105-08646	America/Cancun	Eastern Standard Time - Quintana Roo
+MX	+2058-08937	America/Merida	Central Time - Campeche, Yucatán
+MX	+2540-10019	America/Monterrey	Central Time - Durango; Coahuila, Nuevo León, Tamaulipas (most areas)
+MX	+2550-09730	America/Matamoros	Central Time US - Coahuila, Nuevo León, Tamaulipas (US border)
+MX	+2313-10625	America/Mazatlan	Mountain Time - Baja California Sur, Nayarit, Sinaloa
+MX	+2838-10605	America/Chihuahua	Mountain Time - Chihuahua (most areas)
+MX	+2934-10425	America/Ojinaga	Mountain Time US - Chihuahua (US border)
+MX	+2904-11058	America/Hermosillo	Mountain Standard Time - Sonora
+MX	+3232-11701	America/Tijuana	Pacific Time US - Baja California
+MX	+2048-10515	America/Bahia_Banderas	Central Time - Bahía de Banderas
+MY	+0310+10142	Asia/Kuala_Lumpur	Malaysia (peninsula)
+MY	+0133+11020	Asia/Kuching	Sabah, Sarawak
+MZ,BI,BW,CD,MW,RW,ZM,ZW	-2558+03235	Africa/Maputo	Central Africa Time
+NA	-2234+01706	Africa/Windhoek
+NC	-2216+16627	Pacific/Noumea
+NF	-2903+16758	Pacific/Norfolk
+NG,AO,BJ,CD,CF,CG,CM,GA,GQ,NE	+0627+00324	Africa/Lagos	West Africa Time
+NI	+1209-08617	America/Managua
+NL	+5222+00454	Europe/Amsterdam
+NO,SJ	+5955+01045	Europe/Oslo
+NP	+2743+08519	Asia/Kathmandu
+NR	-0031+16655	Pacific/Nauru
+NU	-1901-16955	Pacific/Niue
+NZ,AQ	-3652+17446	Pacific/Auckland	New Zealand time
+NZ	-4357-17633	Pacific/Chatham	Chatham Islands
+PA,CA,KY	+0858-07932	America/Panama	EST - Panama, Cayman, ON (Atikokan), NU (Coral H)
+PE	-1203-07703	America/Lima
+PF	-1732-14934	Pacific/Tahiti	Society Islands
+PF	-0900-13930	Pacific/Marquesas	Marquesas Islands
+PF	-2308-13457	Pacific/Gambier	Gambier Islands
+PG,AQ	-0930+14710	Pacific/Port_Moresby	Papua New Guinea (most areas), Dumont d'Urville
+PG	-0613+15534	Pacific/Bougainville	Bougainville
+PH	+1435+12100	Asia/Manila
+PK	+2452+06703	Asia/Karachi
+PL	+5215+02100	Europe/Warsaw
+PM	+4703-05620	America/Miquelon
+PN	-2504-13005	Pacific/Pitcairn
+PR,AG,CA,AI,AW,BL,BQ,CW,DM,GD,GP,KN,LC,MF,MS,SX,TT,VC,VG,VI	+182806-0660622	America/Puerto_Rico	AST
+PS	+3130+03428	Asia/Gaza	Gaza Strip
+PS	+313200+0350542	Asia/Hebron	West Bank
+PT	+3843-00908	Europe/Lisbon	Portugal (mainland)
+PT	+3238-01654	Atlantic/Madeira	Madeira Islands
+PT	+3744-02540	Atlantic/Azores	Azores
+PW	+0720+13429	Pacific/Palau
+PY	-2516-05740	America/Asuncion
+QA,BH	+2517+05132	Asia/Qatar
+RE,TF	-2052+05528	Indian/Reunion	Réunion, Crozet, Scattered Islands
+RO	+4426+02606	Europe/Bucharest
+RS,BA,HR,ME,MK,SI	+4450+02030	Europe/Belgrade
+RU	+5443+02030	Europe/Kaliningrad	MSK-01 - Kaliningrad
+RU	+554521+0373704	Europe/Moscow	MSK+00 - Moscow area
+# Mention RU and UA alphabetically.  See "territorial claims" above.
+RU,UA	+4457+03406	Europe/Simferopol	Crimea
+RU	+5836+04939	Europe/Kirov	MSK+00 - Kirov
+RU	+4844+04425	Europe/Volgograd	MSK+00 - Volgograd
+RU	+4621+04803	Europe/Astrakhan	MSK+01 - Astrakhan
+RU	+5134+04602	Europe/Saratov	MSK+01 - Saratov
+RU	+5420+04824	Europe/Ulyanovsk	MSK+01 - Ulyanovsk
+RU	+5312+05009	Europe/Samara	MSK+01 - Samara, Udmurtia
+RU	+5651+06036	Asia/Yekaterinburg	MSK+02 - Urals
+RU	+5500+07324	Asia/Omsk	MSK+03 - Omsk
+RU	+5502+08255	Asia/Novosibirsk	MSK+04 - Novosibirsk
+RU	+5322+08345	Asia/Barnaul	MSK+04 - Altai
+RU	+5630+08458	Asia/Tomsk	MSK+04 - Tomsk
+RU	+5345+08707	Asia/Novokuznetsk	MSK+04 - Kemerovo
+RU	+5601+09250	Asia/Krasnoyarsk	MSK+04 - Krasnoyarsk area
+RU	+5216+10420	Asia/Irkutsk	MSK+05 - Irkutsk, Buryatia
+RU	+5203+11328	Asia/Chita	MSK+06 - Zabaykalsky
+RU	+6200+12940	Asia/Yakutsk	MSK+06 - Lena River
+RU	+623923+1353314	Asia/Khandyga	MSK+06 - Tomponsky, Ust-Maysky
+RU	+4310+13156	Asia/Vladivostok	MSK+07 - Amur River
+RU	+643337+1431336	Asia/Ust-Nera	MSK+07 - Oymyakonsky
+RU	+5934+15048	Asia/Magadan	MSK+08 - Magadan
+RU	+4658+14242	Asia/Sakhalin	MSK+08 - Sakhalin Island
+RU	+6728+15343	Asia/Srednekolymsk	MSK+08 - Sakha (E); North Kuril Is
+RU	+5301+15839	Asia/Kamchatka	MSK+09 - Kamchatka
+RU	+6445+17729	Asia/Anadyr	MSK+09 - Bering Sea
+SA,AQ,KW,YE	+2438+04643	Asia/Riyadh	Arabia, Syowa
+SB	-0932+16012	Pacific/Guadalcanal
+SC	-0440+05528	Indian/Mahe
+SD	+1536+03232	Africa/Khartoum
+SE	+5920+01803	Europe/Stockholm
+SG,MY	+0117+10351	Asia/Singapore	Singapore, peninsular Malaysia
+SR	+0550-05510	America/Paramaribo
+SS	+0451+03137	Africa/Juba
+ST	+0020+00644	Africa/Sao_Tome
+SV	+1342-08912	America/El_Salvador
+SY	+3330+03618	Asia/Damascus
+TC	+2128-07108	America/Grand_Turk
+TD	+1207+01503	Africa/Ndjamena
+TF	-492110+0701303	Indian/Kerguelen	Kerguelen, St Paul Island, Amsterdam Island
+TH,KH,LA,VN	+1345+10031	Asia/Bangkok	Indochina (most areas)
+TJ	+3835+06848	Asia/Dushanbe
+TK	-0922-17114	Pacific/Fakaofo
+TL	-0833+12535	Asia/Dili
+TM	+3757+05823	Asia/Ashgabat
+TN	+3648+01011	Africa/Tunis
+TO	-210800-1751200	Pacific/Tongatapu
+TR	+4101+02858	Europe/Istanbul
+TV	-0831+17913	Pacific/Funafuti
+TW	+2503+12130	Asia/Taipei
+UA	+5026+03031	Europe/Kiev	Ukraine (most areas)
+UA	+4837+02218	Europe/Uzhgorod	Transcarpathia
+UA	+4750+03510	Europe/Zaporozhye	Zaporozhye and east Lugansk
+UM	+1917+16637	Pacific/Wake	Wake Island
+US	+404251-0740023	America/New_York	Eastern (most areas)
+US	+421953-0830245	America/Detroit	Eastern - MI (most areas)
+US	+381515-0854534	America/Kentucky/Louisville	Eastern - KY (Louisville area)
+US	+364947-0845057	America/Kentucky/Monticello	Eastern - KY (Wayne)
+US	+394606-0860929	America/Indiana/Indianapolis	Eastern - IN (most areas)
+US	+384038-0873143	America/Indiana/Vincennes	Eastern - IN (Da, Du, K, Mn)
+US	+410305-0863611	America/Indiana/Winamac	Eastern - IN (Pulaski)
+US	+382232-0862041	America/Indiana/Marengo	Eastern - IN (Crawford)
+US	+382931-0871643	America/Indiana/Petersburg	Eastern - IN (Pike)
+US	+384452-0850402	America/Indiana/Vevay	Eastern - IN (Switzerland)
+US	+415100-0873900	America/Chicago	Central (most areas)
+US	+375711-0864541	America/Indiana/Tell_City	Central - IN (Perry)
+US	+411745-0863730	America/Indiana/Knox	Central - IN (Starke)
+US	+450628-0873651	America/Menominee	Central - MI (Wisconsin border)
+US	+470659-1011757	America/North_Dakota/Center	Central - ND (Oliver)
+US	+465042-1012439	America/North_Dakota/New_Salem	Central - ND (Morton rural)
+US	+471551-1014640	America/North_Dakota/Beulah	Central - ND (Mercer)
+US	+394421-1045903	America/Denver	Mountain (most areas)
+US	+433649-1161209	America/Boise	Mountain - ID (south); OR (east)
+US,CA	+332654-1120424	America/Phoenix	MST - Arizona (except Navajo), Creston BC
+US	+340308-1181434	America/Los_Angeles	Pacific
+US	+611305-1495401	America/Anchorage	Alaska (most areas)
+US	+581807-1342511	America/Juneau	Alaska - Juneau area
+US	+571035-1351807	America/Sitka	Alaska - Sitka area
+US	+550737-1313435	America/Metlakatla	Alaska - Annette Island
+US	+593249-1394338	America/Yakutat	Alaska - Yakutat
+US	+643004-1652423	America/Nome	Alaska (west)
+US	+515248-1763929	America/Adak	Aleutian Islands
+US,UM	+211825-1575130	Pacific/Honolulu	Hawaii
+UY	-345433-0561245	America/Montevideo
+UZ	+3940+06648	Asia/Samarkand	Uzbekistan (west)
+UZ	+4120+06918	Asia/Tashkent	Uzbekistan (east)
+VE	+1030-06656	America/Caracas
+VN	+1045+10640	Asia/Ho_Chi_Minh	Vietnam (south)
+VU	-1740+16825	Pacific/Efate
+WF	-1318-17610	Pacific/Wallis
+WS	-1350-17144	Pacific/Apia
+ZA,LS,SZ	-2615+02800	Africa/Johannesburg
diff --git a/.local/lib/python3.8/site-packages/slugify/__init__.py b/.local/lib/python3.8/site-packages/slugify/__init__.py
new file mode 100644
index 0000000..ac21492
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/slugify/__init__.py
@@ -0,0 +1,2 @@
+from .special import *
+from .slugify import *
diff --git a/.local/lib/python3.8/site-packages/slugify/__main__.py b/.local/lib/python3.8/site-packages/slugify/__main__.py
new file mode 100644
index 0000000..7dd6b01
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/slugify/__main__.py
@@ -0,0 +1,96 @@
+from __future__ import print_function, absolute_import
+import argparse
+import sys
+
+from .slugify import slugify, DEFAULT_SEPARATOR
+
+
+def parse_args(argv):
+    parser = argparse.ArgumentParser(description="Slug string")
+
+    input_group = parser.add_argument_group(description="Input")
+    input_group.add_argument("input_string", nargs='*',
+                             help='Text to slugify')
+    input_group.add_argument("--stdin", action='store_true',
+                             help="Take the text from STDIN")
+
+    parser.add_argument("--no-entities", action='store_false', dest='entities', default=True,
+                        help="Do not convert HTML entities to unicode")
+    parser.add_argument("--no-decimal", action='store_false', dest='decimal', default=True,
+                        help="Do not convert HTML decimal to unicode")
+    parser.add_argument("--no-hexadecimal", action='store_false', dest='hexadecimal', default=True,
+                        help="Do not convert HTML hexadecimal to unicode")
+    parser.add_argument("--max-length", type=int, default=0,
+                        help="Output string length, 0 for no limit")
+    parser.add_argument("--word-boundary", action='store_true', default=False,
+                        help="Truncate to complete word even if length ends up shorter than --max_length")
+    parser.add_argument("--save-order", action='store_true', default=False,
+                        help="When set and --max_length > 0 return whole words in the initial order")
+    parser.add_argument("--separator", type=str, default=DEFAULT_SEPARATOR,
+                        help="Separator between words. By default " + DEFAULT_SEPARATOR)
+    parser.add_argument("--stopwords", nargs='+',
+                        help="Words to discount")
+    parser.add_argument("--regex-pattern",
+                        help="Python regex pattern for disallowed characters")
+    parser.add_argument("--no-lowercase", action='store_false', dest='lowercase', default=True,
+                        help="Activate case sensitivity")
+    parser.add_argument("--replacements", nargs='+',
+                        help="""Additional replacement rules e.g. "|->or", "%%->percent".""")
+    parser.add_argument("--allow-unicode", action='store_true', default=False,
+                        help="Allow unicode characters")
+
+    args = parser.parse_args(argv[1:])
+
+    if args.input_string and args.stdin:
+        parser.error("Input strings and --stdin cannot work together")
+
+    if args.replacements:
+        def split_check(repl):
+            SEP = '->'
+            if SEP not in repl:
+                parser.error("Replacements must be of the form: ORIGINAL{SEP}REPLACED".format(SEP=SEP))
+            return repl.split(SEP, 1)
+        args.replacements = [split_check(repl) for repl in args.replacements]
+
+    if args.input_string:
+        args.input_string = " ".join(args.input_string)
+    elif args.stdin:
+        args.input_string = sys.stdin.read()
+
+    if not args.input_string:
+        args.input_string = ''
+
+    return args
+
+
+def slugify_params(args):
+    return dict(
+        text=args.input_string,
+        entities=args.entities,
+        decimal=args.decimal,
+        hexadecimal=args.hexadecimal,
+        max_length=args.max_length,
+        word_boundary=args.word_boundary,
+        save_order=args.save_order,
+        separator=args.separator,
+        stopwords=args.stopwords,
+        lowercase=args.lowercase,
+        replacements=args.replacements,
+        allow_unicode=args.allow_unicode
+    )
+
+
+def main(argv=None):  # pragma: no cover
+    """ Run this program """
+    if argv is None:
+        argv = sys.argv
+    args = parse_args(argv)
+    params = slugify_params(args)
+    try:
+        print(slugify(**params))
+    except KeyboardInterrupt:
+        sys.exit(-1)
+
+
+if __name__ == '__main__':  # pragma: no cover
+    main()
diff --git a/.local/lib/python3.8/site-packages/slugify/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/slugify/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..f732347
Binary files /dev/null and b/.local/lib/python3.8/site-packages/slugify/__pycache__/__init__.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/slugify/__pycache__/__main__.cpython-38.pyc b/.local/lib/python3.8/site-packages/slugify/__pycache__/__main__.cpython-38.pyc
new file mode 100644
index 0000000..e618502
Binary files /dev/null and b/.local/lib/python3.8/site-packages/slugify/__pycache__/__main__.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/slugify/__pycache__/__version__.cpython-38.pyc b/.local/lib/python3.8/site-packages/slugify/__pycache__/__version__.cpython-38.pyc
new file mode 100644
index 0000000..2667019
Binary files /dev/null and b/.local/lib/python3.8/site-packages/slugify/__pycache__/__version__.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/slugify/__pycache__/slugify.cpython-38.pyc b/.local/lib/python3.8/site-packages/slugify/__pycache__/slugify.cpython-38.pyc
new file mode 100644
index 0000000..681893f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/slugify/__pycache__/slugify.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/slugify/__pycache__/special.cpython-38.pyc b/.local/lib/python3.8/site-packages/slugify/__pycache__/special.cpython-38.pyc
new file mode 100644
index 0000000..d3ac4c6
Binary files /dev/null and b/.local/lib/python3.8/site-packages/slugify/__pycache__/special.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/slugify/__version__.py b/.local/lib/python3.8/site-packages/slugify/__version__.py
new file mode 100644
index 0000000..f971770
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/slugify/__version__.py
@@ -0,0 +1,8 @@
+__title__ = 'python-slugify'
+__author__ = 'Val Neekman'
+__author_email__ = 'info@neekware.com'
+__description__ = 'A Python slugify application that also handles Unicode'
+__url__ = 'https://github.com/un33k/python-slugify'
+__license__ = 'MIT'
+__copyright__ = 'Copyright 2022 Val Neekman @ Neekware Inc.'
+__version__ = '6.1.1'
diff --git a/.local/lib/python3.8/site-packages/slugify/slugify.py b/.local/lib/python3.8/site-packages/slugify/slugify.py
new file mode 100644
index 0000000..b8c02ad
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/slugify/slugify.py
@@ -0,0 +1,177 @@
+import re
+import sys
+import unicodedata
+from html.entities import name2codepoint
+
+try:
+    import text_unidecode as unidecode
+except ImportError:
+    import unidecode
+
+__all__ = ['slugify', 'smart_truncate']
+
+
+CHAR_ENTITY_PATTERN = re.compile(r'&(%s);' % '|'.join(name2codepoint))
+DECIMAL_PATTERN = re.compile(r'&#(\d+);')
+HEX_PATTERN = re.compile(r'&#x([\da-fA-F]+);')
+QUOTE_PATTERN = re.compile(r'[\']+')
+DISALLOWED_CHARS_PATTERN = re.compile(r'[^-a-zA-Z0-9]+')
+DISALLOWED_UNICODE_CHARS_PATTERN = re.compile(r'[\W_]+')
+DUPLICATE_DASH_PATTERN = re.compile(r'-{2,}')
+NUMBERS_PATTERN = re.compile(r'(?<=\d),(?=\d)')
+DEFAULT_SEPARATOR = '-'
+
+
+def smart_truncate(string, max_length=0, word_boundary=False, separator=' ', save_order=False):
+    """
+    Truncate a string.
+    :param string (str): string for modification
+    :param max_length (int): output string length
+    :param word_boundary (bool):
+    :param save_order (bool): if True then word order of output string is like input string
+    :param separator (str): separator between words
+    :return:
+    """
+
+    string = string.strip(separator)
+
+    if not max_length:
+        return string
+
+    if len(string) < max_length:
+        return string
+
+    if not word_boundary:
+        return string[:max_length].strip(separator)
+
+    if separator not in string:
+        return string[:max_length]
+
+    truncated = ''
+    for word in string.split(separator):
+        if word:
+            next_len = len(truncated) + len(word)
+            if next_len < max_length:
+                truncated += '{}{}'.format(word, separator)
+            elif next_len == max_length:
+                truncated += '{}'.format(word)
+                break
+            else:
+                if save_order:
+                    break
+    if not truncated:  # pragma: no cover
+        truncated = string[:max_length]
+    return truncated.strip(separator)
+
+
+def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False,
+            separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None, lowercase=True,
+            replacements=(), allow_unicode=False):
+    """
+    Make a slug from the given text.
+    :param text (str): initial text
+    :param entities (bool): converts html entities to unicode
+    :param decimal (bool): converts html decimal to unicode
+    :param hexadecimal (bool): converts html hexadecimal to unicode
+    :param max_length (int): output string length
+    :param word_boundary (bool): truncates to complete word even if length ends up shorter than max_length
+    :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order
+    :param separator (str): separator between words
+    :param stopwords (iterable): words to discount
+    :param regex_pattern (str): regex pattern for disallowed characters
+    :param lowercase (bool): activate case sensitivity by setting it to False
+    :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
+    :param allow_unicode (bool): allow unicode characters
+    :return (str):
+    """
+
+    # user-specific replacements
+    if replacements:
+        for old, new in replacements:
+            text = text.replace(old, new)
+
+    # ensure text is unicode
+    if not isinstance(text, str):
+        text = str(text, 'utf-8', 'ignore')
+
+    # replace quotes with dashes - pre-process
+    text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)
+
+    # decode unicode
+    if not allow_unicode:
+        text = unidecode.unidecode(text)
+
+    # ensure text is still in unicode
+    if not isinstance(text, str):
+        text = str(text, 'utf-8', 'ignore')
+
+    # character entity reference
+    if entities:
+        text = CHAR_ENTITY_PATTERN.sub(lambda m: chr(name2codepoint[m.group(1)]), text)
+
+    # decimal character reference
+    if decimal:
+        try:
+            text = DECIMAL_PATTERN.sub(lambda m: chr(int(m.group(1))), text)
+        except Exception:
+            pass
+
+    # hexadecimal character reference
+    if hexadecimal:
+        try:
+            text = HEX_PATTERN.sub(lambda m: chr(int(m.group(1), 16)), text)
+        except Exception:
+            pass
+
+    # translate
+    if allow_unicode:
+        text = unicodedata.normalize('NFKC', text)
+    else:
+        text = unicodedata.normalize('NFKD', text)
+
+    if sys.version_info < (3,):
+        text = text.encode('ascii', 'ignore')
+
+    # make the text lowercase (optional)
+    if lowercase:
+        text = text.lower()
+
+    # remove generated quotes -- post-process
+    text = QUOTE_PATTERN.sub('', text)
+
+    # cleanup numbers
+    text = NUMBERS_PATTERN.sub('', text)
+
+    # replace all other unwanted characters
+    if allow_unicode:
+        pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN
+    else:
+        pattern = regex_pattern or DISALLOWED_CHARS_PATTERN
+
+    text = re.sub(pattern, DEFAULT_SEPARATOR, text)
+
+    # remove redundant
+    text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)
+
+    # remove stopwords
+    if stopwords:
+        if lowercase:
+            stopwords_lower = [s.lower() for s in stopwords]
+            words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower]
+        else:
+            words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords]
+        text = DEFAULT_SEPARATOR.join(words)
+
+    # finalize user-specific replacements
+    if replacements:
+        for old, new in replacements:
+            text = text.replace(old, new)
+
+    # smart truncate if requested
+    if max_length > 0:
+        text = smart_truncate(text, max_length, word_boundary, DEFAULT_SEPARATOR, save_order)
+
+    if separator != DEFAULT_SEPARATOR:
+        text = text.replace(DEFAULT_SEPARATOR, separator)
+
+    return text
diff --git a/.local/lib/python3.8/site-packages/slugify/special.py b/.local/lib/python3.8/site-packages/slugify/special.py
new file mode 100644
index 0000000..54eb85c
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/slugify/special.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+
+
+def add_uppercase_char(char_list):
+    """ Given a replacement char list, this adds uppercase chars to the list """
+
+    for item in char_list:
+        char, xlate = item
+        upper_dict = char.upper(), xlate.capitalize()
+        if upper_dict not in char_list and char != upper_dict[0]:
+            char_list.insert(0, upper_dict)
+        return char_list
+
+
+# Language specific pre translations
+# Source awesome-slugify
+
+_CYRILLIC = [      # package defaults:
+    (u'ё', u'e'),    # io / yo
+    (u'я', u'ya'),   # ia
+    (u'х', u'h'),    # kh
+    (u'у', u'y'),    # u
+    (u'щ', u'sch'),  # sch
+    (u'ю', u'u'),    # iu / yu
+]
+CYRILLIC = add_uppercase_char(_CYRILLIC)
+
+_GERMAN = [        # package defaults:
+    (u'ä', u'ae'),   # a
+    (u'ö', u'oe'),   # o
+    (u'ü', u'ue'),   # u
+]
+GERMAN = add_uppercase_char(_GERMAN)
+
+_GREEK = [         # package defaults:
+    (u'χ', u'ch'),   # kh
+    (u'Ξ', u'X'),    # Ks
+    (u'ϒ', u'Y'),    # U
+    (u'υ', u'y'),    # u
+    (u'ύ', u'y'),
+    (u'ϋ', u'y'),
+    (u'ΰ', u'y'),
+]
+GREEK = add_uppercase_char(_GREEK)
+
+# Pre translations
+PRE_TRANSLATIONS = CYRILLIC + GERMAN + GREEK
diff --git a/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/DESCRIPTION.rst b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/DESCRIPTION.rst
new file mode 100644
index 0000000..a17ced9
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/DESCRIPTION.rst
@@ -0,0 +1,46 @@
+Text-Unidecode
+==============
+
+.. image:: https://travis-ci.org/kmike/text-unidecode.svg?branch=master
+    :target: https://travis-ci.org/kmike/text-unidecode
+    :alt: Build Status
+
+text-unidecode is the most basic port of the
+`Text::Unidecode `_
+Perl library.
+
+There are other Python ports of Text::Unidecode (unidecode_
+and isounidecode_). unidecode_ is GPL; isounidecode_ uses too much memory,
+and it didn't support Python 3 when this package was created.
+
+You can redistribute it and/or modify this port under the terms of either:
+
+* `Artistic License`_, or
+* GPL or GPLv2+
+
+If you're OK with GPL-only, use unidecode_ (it has better memory usage and
+better transliteration quality).
+
+``text-unidecode`` supports Python 2.7 and 3.4+.
+
+.. _unidecode: https://pypi.python.org/pypi/Unidecode/
+.. _isounidecode: https://pypi.python.org/pypi/isounidecode/
+.. _Artistic License: https://opensource.org/licenses/Artistic-Perl-1.0
+
+Installation
+------------
+
+::
+
+    pip install text-unidecode
+
+Usage
+-----
+
+::
+
+    >>> from text_unidecode import unidecode
+    >>> unidecode(u'какой-то текст')
+    'kakoi-to tekst'
+
+
diff --git a/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/INSTALLER b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/INSTALLER
new file mode 100644
index 0000000..a1b589e
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/LICENSE.txt b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/LICENSE.txt
new file mode 100644
index 0000000..5ed2d0f
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/LICENSE.txt
@@ -0,0 +1,134 @@
+text-unidecode is a free software; you can redistribute
+it and/or modify it under the terms of either:
+
+* GPL or GPLv2+ (see https://www.gnu.org/licenses/license-list.html#GNUGPL), or
+* Artistic License - see below:
+
+
+                         The "Artistic License"
+
+                                Preamble
+
+The intent of this document is to state the conditions under which a
+Package may be copied, such that the Copyright Holder maintains some
+semblance of artistic control over the development of the package,
+while giving the users of the package the right to use and distribute
+the Package in a more-or-less customary fashion, plus the right to make
+reasonable modifications.
+
+Definitions:
+
+        "Package" refers to the collection of files distributed by the
+        Copyright Holder, and derivatives of that collection of files
+        created through textual modification.
+
+        "Standard Version" refers to such a Package if it has not been
+        modified, or has been modified in accordance with the wishes
+        of the Copyright Holder as specified below.
+
+        "Copyright Holder" is whoever is named in the copyright or
+        copyrights for the package.
+
+        "You" is you, if you're thinking about copying or distributing
+        this Package.
+
+        "Reasonable copying fee" is whatever you can justify on the
+        basis of media cost, duplication charges, time of people involved,
+        and so on.  (You will not be required to justify it to the
+        Copyright Holder, but only to the computing community at large
+        as a market that must bear the fee.)
+
+        "Freely Available" means that no fee is charged for the item
+        itself, though there may be fees involved in handling the item.
+        It also means that recipients of the item may redistribute it
+        under the same conditions they received it.
+
+1. You may make and give away verbatim copies of the source form of the
+Standard Version of this Package without restriction, provided that you
+duplicate all of the original copyright notices and associated disclaimers.
+
+2. You may apply bug fixes, portability fixes and other modifications
+derived from the Public Domain or from the Copyright Holder.  A Package
+modified in such a way shall still be considered the Standard Version.
+
+3. You may otherwise modify your copy of this Package in any way, provided
+that you insert a prominent notice in each changed file stating how and
+when you changed that file, and provided that you do at least ONE of the
+following:
+
+    a) place your modifications in the Public Domain or otherwise make them
+    Freely Available, such as by posting said modifications to Usenet or
+    an equivalent medium, or placing the modifications on a major archive
+    site such as uunet.uu.net, or by allowing the Copyright Holder to include
+    your modifications in the Standard Version of the Package.
+
+    b) use the modified Package only within your corporation or organization.
+
+    c) rename any non-standard executables so the names do not conflict
+    with standard executables, which must also be provided, and provide
+    a separate manual page for each non-standard executable that clearly
+    documents how it differs from the Standard Version.
+
+    d) make other distribution arrangements with the Copyright Holder.
+
+4. You may distribute the programs of this Package in object code or
+executable form, provided that you do at least ONE of the following:
+
+    a) distribute a Standard Version of the executables and library files,
+    together with instructions (in the manual page or equivalent) on where
+    to get the Standard Version.
+
+    b) accompany the distribution with the machine-readable source of
+    the Package with your modifications.
+
+    c) give non-standard executables non-standard names, and clearly
+    document the differences in manual pages (or equivalent), together
+    with instructions on where to get the Standard Version.
+
+    d) make other distribution arrangements with the Copyright Holder.
+
+5. You may charge a reasonable copying fee for any distribution of this
+Package.  You may charge any fee you choose for support of this
+Package.  You may not charge a fee for this Package itself.  However,
+you may distribute this Package in aggregate with other (possibly
+commercial) programs as part of a larger (possibly commercial) software
+distribution provided that you do not advertise this Package as a
+product of your own.  You may embed this Package's interpreter within
+an executable of yours (by linking); this shall be construed as a mere
+form of aggregation, provided that the complete Standard Version of the
+interpreter is so embedded.
+
+6. The scripts and library files supplied as input to or produced as
+output from the programs of this Package do not automatically fall
+under the copyright of this Package, but belong to whoever generated
+them, and may be sold commercially, and may be aggregated with this
+Package.  If such scripts or library files are aggregated with this
+Package via the so-called "undump" or "unexec" methods of producing a
+binary executable image, then distribution of such an image shall
+neither be construed as a distribution of this Package nor shall it
+fall under the restrictions of Paragraphs 3 and 4, provided that you do
+not represent such an executable image as a Standard Version of this
+Package.
+
+7. C subroutines (or comparably compiled subroutines in other
+languages) supplied by you and linked into this Package in order to
+emulate subroutines and variables of the language defined by this
+Package shall not be considered part of this Package, but are the
+equivalent of input as in Paragraph 6, provided these subroutines do
+not change the language in any way that would cause it to fail the
+regression tests for the language.
+
+8. Aggregation of this Package with a commercial distribution is always
+permitted provided that the use of this Package is embedded; that is,
+when no overt attempt is made to make this Package's interfaces visible
+to the end user of the commercial distribution.  Such use shall not be
+construed as a distribution of this Package.
+
+9. The name of the Copyright Holder may not be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
+                                The End
diff --git a/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/METADATA b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/METADATA
new file mode 100644
index 0000000..23bd3d4
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/METADATA
@@ -0,0 +1,73 @@
+Metadata-Version: 2.0
+Name: text-unidecode
+Version: 1.3
+Summary: The most basic Text::Unidecode port
+Home-page: https://github.com/kmike/text-unidecode/
+Author: Mikhail Korobov
+Author-email: kmike84@gmail.com
+License: Artistic License
+Platform: UNKNOWN
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Artistic License
+Classifier: License :: OSI Approved :: GNU General Public License (GPL)
+Classifier: License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Text Processing :: Linguistic
+
+Text-Unidecode
+==============
+
+.. image:: https://travis-ci.org/kmike/text-unidecode.svg?branch=master
+    :target: https://travis-ci.org/kmike/text-unidecode
+    :alt: Build Status
+
+text-unidecode is the most basic port of the
+`Text::Unidecode `_
+Perl library.
+
+There are other Python ports of Text::Unidecode (unidecode_
+and isounidecode_). unidecode_ is GPL; isounidecode_ uses too much memory,
+and it didn't support Python 3 when this package was created.
+
+You can redistribute it and/or modify this port under the terms of either:
+
+* `Artistic License`_, or
+* GPL or GPLv2+
+
+If you're OK with GPL-only, use unidecode_ (it has better memory usage and
+better transliteration quality).
+
+``text-unidecode`` supports Python 2.7 and 3.4+.
+
+.. _unidecode: https://pypi.python.org/pypi/Unidecode/
+.. _isounidecode: https://pypi.python.org/pypi/isounidecode/
+.. _Artistic License: https://opensource.org/licenses/Artistic-Perl-1.0
+
+Installation
+------------
+
+::
+
+    pip install text-unidecode
+
+Usage
+-----
+
+::
+
+    >>> from text_unidecode import unidecode
+    >>> unidecode(u'какой-то текст')
+    'kakoi-to tekst'
+
+
diff --git a/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/RECORD b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/RECORD
new file mode 100644
index 0000000..61fddcd
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/RECORD
@@ -0,0 +1,11 @@
+text_unidecode-1.3.dist-info/DESCRIPTION.rst,sha256=6Fgx54K_UeXRByELmqkfLcHlbXhsvNNJdkPFT0VF0J0,1199
+text_unidecode-1.3.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+text_unidecode-1.3.dist-info/LICENSE.txt,sha256=OTjnU1w-TvfmNj3ptpP-O6_jxKXl9JJc1IN1CW_nr9U,6535
+text_unidecode-1.3.dist-info/METADATA,sha256=B9j-1l4-yN9P5e8mpBrXCqAQsRUnA4Izyy0hG7Jyrn4,2422
+text_unidecode-1.3.dist-info/RECORD,,
+text_unidecode-1.3.dist-info/WHEEL,sha256=o2k-Qa-RMNIJmUdIc7KU6VWR_ErNRbWNlxDIpl7lm34,110
+text_unidecode-1.3.dist-info/metadata.json,sha256=vYPs2_8Q45eS8mrUw0qzf1NeShHDuX_lpyh8S3yqg9U,1299
+text_unidecode-1.3.dist-info/top_level.txt,sha256=SQH9SRjWlLrD-XgHyQOPtDQg_DaBt3Gt6hiMSNwHbuE,15
+text_unidecode/__init__.py,sha256=_hESqlvGR_cTy0oryPuoyrVntCOIID7bHDA-y22I1ig,484
+text_unidecode/__pycache__/__init__.cpython-38.pyc,,
+text_unidecode/data.bin,sha256=eSRmbaTOCtJNS4FCszDm2OiUZc2IOTRyTNnkt9gTDwk,311077
diff --git a/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/WHEEL b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/WHEEL
new file mode 100644
index 0000000..8b6dd1b
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/WHEEL
@@ -0,0 +1,6 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.29.0)
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any
+
diff --git a/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/metadata.json b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/metadata.json
new file mode 100644
index 0000000..3d8b506
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/metadata.json
@@ -0,0 +1 @@
+{"classifiers": ["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: Artistic License", "License :: OSI Approved :: GNU General Public License (GPL)", "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Linguistic"], "extensions": {"python.details": {"contacts": [{"email": "kmike84@gmail.com", "name": "Mikhail Korobov", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst", "license": "LICENSE.txt"}, "project_urls": {"Home": "https://github.com/kmike/text-unidecode/"}}}, "generator": "bdist_wheel (0.29.0)", "license": "Artistic License", "metadata_version": "2.0", "name": "text-unidecode", "summary": "The most basic Text::Unidecode port", "version": "1.3"}
\ No newline at end of file
diff --git a/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/top_level.txt b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/top_level.txt
new file mode 100644
index 0000000..2f7a53e
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/text_unidecode-1.3.dist-info/top_level.txt
@@ -0,0 +1 @@
+text_unidecode
diff --git a/.local/lib/python3.8/site-packages/text_unidecode/__init__.py b/.local/lib/python3.8/site-packages/text_unidecode/__init__.py
new file mode 100644
index 0000000..80282c7
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/text_unidecode/__init__.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, unicode_literals
+import os
+import pkgutil
+
+_replaces = pkgutil.get_data(__name__, 'data.bin').decode('utf8').split('\x00')
+
+def unidecode(txt):
+    chars = []
+    for ch in txt:
+        codepoint = ord(ch)
+
+        if not codepoint:
+            chars.append('\x00')
+            continue
+
+        try:
+            chars.append(_replaces[codepoint-1])
+        except IndexError:
+            pass
+    return "".join(chars)
diff --git a/.local/lib/python3.8/site-packages/text_unidecode/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/text_unidecode/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..c03f0ec
Binary files /dev/null and b/.local/lib/python3.8/site-packages/text_unidecode/__pycache__/__init__.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/text_unidecode/data.bin b/.local/lib/python3.8/site-packages/text_unidecode/data.bin
new file mode 100644
index 0000000..523d489
Binary files /dev/null and b/.local/lib/python3.8/site-packages/text_unidecode/data.bin differ
diff --git a/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/INSTALLER b/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/INSTALLER
new file mode 100644
index 0000000..a1b589e
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/LICENCE b/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/LICENCE
new file mode 100644
index 0000000..5b3cab7
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/LICENCE
@@ -0,0 +1,49 @@
+`tqdm` is a product of collaborative work.
+Unless otherwise stated, all authors (see commit logs) retain copyright
+for their respective work, and release the work under the MIT licence
+(text below).
+
+Exceptions or notable authors are listed below
+in reverse chronological order:
+
+* files: *
+  MPLv2.0 2015-2021 (c) Casper da Costa-Luis
+  [casperdcl](https://github.com/casperdcl).
+* files: tqdm/_tqdm.py
+  MIT 2016 (c) [PR #96] on behalf of Google Inc.
+* files: tqdm/_tqdm.py setup.py README.rst MANIFEST.in .gitignore
+  MIT 2013 (c) Noam Yorav-Raphael, original author.
+
+[PR #96]: https://github.com/tqdm/tqdm/pull/96
+
+
+Mozilla Public Licence (MPL) v. 2.0 - Exhibit A
+-----------------------------------------------
+
+This Source Code Form is subject to the terms of the
+Mozilla Public License, v. 2.0.
+If a copy of the MPL was not distributed with this project,
+You can obtain one at https://mozilla.org/MPL/2.0/.
+
+
+MIT License (MIT)
+-----------------
+
+Copyright (c) 2013 noamraph
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/METADATA b/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/METADATA
new file mode 100644
index 0000000..5b52eba
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/METADATA
@@ -0,0 +1,1587 @@
+Metadata-Version: 2.1
+Name: tqdm
+Version: 4.63.1
+Summary: Fast, Extensible Progress Meter
+Home-page: https://tqdm.github.io
+Maintainer: tqdm developers
+Maintainer-email: python.tqdm@gmail.com
+License: MPLv2.0, MIT Licences
+Project-URL: Changelog, https://tqdm.github.io/releases
+Project-URL: Source, https://github.com/tqdm/tqdm
+Project-URL: Wiki, https://github.com/tqdm/tqdm/wiki
+Keywords: progressbar,progressmeter,progress,bar,meter,rate,eta,console,terminal,time
+Platform: any
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Environment :: Console
+Classifier: Environment :: MacOS X
+Classifier: Environment :: Other Environment
+Classifier: Environment :: Win32 (MS Windows)
+Classifier: Environment :: X11 Applications
+Classifier: Framework :: IPython
+Classifier: Framework :: Jupyter
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Education
+Classifier: Intended Audience :: End Users/Desktop
+Classifier: Intended Audience :: Other Audience
+Classifier: Intended Audience :: System Administrators
+Classifier: License :: OSI Approved :: MIT License
+Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
+Classifier: Operating System :: MacOS
+Classifier: Operating System :: MacOS :: MacOS X
+Classifier: Operating System :: Microsoft
+Classifier: Operating System :: Microsoft :: MS-DOS
+Classifier: Operating System :: Microsoft :: Windows
+Classifier: Operating System :: POSIX
+Classifier: Operating System :: POSIX :: BSD
+Classifier: Operating System :: POSIX :: BSD :: FreeBSD
+Classifier: Operating System :: POSIX :: Linux
+Classifier: Operating System :: POSIX :: SunOS/Solaris
+Classifier: Operating System :: Unix
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: Implementation
+Classifier: Programming Language :: Python :: Implementation :: IronPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: Programming Language :: Unix Shell
+Classifier: Topic :: Desktop Environment
+Classifier: Topic :: Education :: Computer Aided Instruction (CAI)
+Classifier: Topic :: Education :: Testing
+Classifier: Topic :: Office/Business
+Classifier: Topic :: Other/Nonlisted Topic
+Classifier: Topic :: Software Development :: Build Tools
+Classifier: Topic :: Software Development :: Libraries
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Software Development :: Pre-processors
+Classifier: Topic :: Software Development :: User Interfaces
+Classifier: Topic :: System :: Installation/Setup
+Classifier: Topic :: System :: Logging
+Classifier: Topic :: System :: Monitoring
+Classifier: Topic :: System :: Shells
+Classifier: Topic :: Terminals
+Classifier: Topic :: Utilities
+Provides: tqdm
+Requires-Python: !=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7
+Description-Content-Type: text/x-rst
+License-File: LICENCE
+Requires-Dist: colorama ; platform_system == "Windows"
+Requires-Dist: importlib-resources ; python_version < "3.7"
+Provides-Extra: dev
+Requires-Dist: py-make (>=0.1.0) ; extra == 'dev'
+Requires-Dist: twine ; extra == 'dev'
+Requires-Dist: wheel ; extra == 'dev'
+Provides-Extra: notebook
+Requires-Dist: ipywidgets (>=6) ; extra == 'notebook'
+Provides-Extra: telegram
+Requires-Dist: requests ; extra == 'telegram'
+
+|Logo|
+
+tqdm
+====
+
+|Py-Versions| |Versions| |Conda-Forge-Status| |Docker| |Snapcraft|
+
+|Build-Status| |Coverage-Status| |Branch-Coverage-Status| |Codacy-Grade| |Libraries-Rank| |PyPI-Downloads|
+
+|LICENCE| |OpenHub-Status| |binder-demo| |awesome-python|
+
+``tqdm`` derives from the Arabic word *taqaddum* (تقدّم) which can mean "progress,"
+and is an abbreviation for "I love you so much" in Spanish (*te quiero demasiado*).
+
+Instantly make your loops show a smart progress meter - just wrap any
+iterable with ``tqdm(iterable)``, and you're done!
+
+.. code:: python
+
+    from tqdm import tqdm
+    for i in tqdm(range(10000)):
+        ...
+
+``76%|████████████████████████        | 7568/10000 [00:33<00:10, 229.00it/s]``
+
+``trange(N)`` can be also used as a convenient shortcut for
+``tqdm(range(N))``.
+
+|Screenshot|
+    |Video| |Slides| |Merch|
+
+It can also be executed as a module with pipes:
+
+.. code:: sh
+
+    $ seq 9999999 | tqdm --bytes | wc -l
+    75.2MB [00:00, 217MB/s]
+    9999999
+
+    $ tar -zcf - docs/ | tqdm --bytes --total `du -sb docs/ | cut -f1` \
+        > backup.tgz
+     32%|██████████▍                      | 8.89G/27.9G [00:42<01:31, 223MB/s]
+
+Overhead is low -- about 60ns per iteration (80ns with ``tqdm.gui``), and is
+unit tested against performance regression.
+By comparison, the well-established
+`ProgressBar `__ has
+an 800ns/iter overhead.
+
+In addition to its low overhead, ``tqdm`` uses smart algorithms to predict
+the remaining time and to skip unnecessary iteration displays, which allows
+for a negligible overhead in most cases.
+
+``tqdm`` works on any platform
+(Linux, Windows, Mac, FreeBSD, NetBSD, Solaris/SunOS),
+in any console or in a GUI, and is also friendly with IPython/Jupyter notebooks.
+
+``tqdm`` does not require any dependencies (not even ``curses``!), just
+Python and an environment supporting ``carriage return \r`` and
+``line feed \n`` control characters.
+
+------------------------------------------
+
+.. contents:: Table of contents
+   :backlinks: top
+   :local:
+
+
+Installation
+------------
+
+Latest PyPI stable release
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+|Versions| |PyPI-Downloads| |Libraries-Dependents|
+
+.. code:: sh
+
+    pip install tqdm
+
+Latest development release on GitHub
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+|GitHub-Status| |GitHub-Stars| |GitHub-Commits| |GitHub-Forks| |GitHub-Updated|
+
+Pull and install pre-release ``devel`` branch:
+
+.. code:: sh
+
+    pip install "git+https://github.com/tqdm/tqdm.git@devel#egg=tqdm"
+
+Latest Conda release
+~~~~~~~~~~~~~~~~~~~~
+
+|Conda-Forge-Status|
+
+.. code:: sh
+
+    conda install -c conda-forge tqdm
+
+Latest Snapcraft release
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+|Snapcraft|
+
+There are 3 channels to choose from:
+
+.. code:: sh
+
+    snap install tqdm  # implies --stable, i.e. latest tagged release
+    snap install tqdm  --candidate  # master branch
+    snap install tqdm  --edge  # devel branch
+
+Note that ``snap`` binaries are purely for CLI use (not ``import``-able), and
+automatically set up ``bash`` tab-completion.
+
+Latest Docker release
+~~~~~~~~~~~~~~~~~~~~~
+
+|Docker|
+
+.. code:: sh
+
+    docker pull tqdm/tqdm
+    docker run -i --rm tqdm/tqdm --help
+
+Other
+~~~~~
+
+There are other (unofficial) places where ``tqdm`` may be downloaded, particularly for CLI use:
+
+|Repology|
+
+.. |Repology| image:: https://repology.org/badge/tiny-repos/python:tqdm.svg
+   :target: https://repology.org/project/python:tqdm/versions
+
+Changelog
+---------
+
+The list of all changes is available either on GitHub's Releases:
+|GitHub-Status|, on the
+`wiki `__, or on the
+`website `__.
+
+
+Usage
+-----
+
+``tqdm`` is very versatile and can be used in a number of ways.
+The three main ones are given below.
+
+Iterable-based
+~~~~~~~~~~~~~~
+
+Wrap ``tqdm()`` around any iterable:
+
+.. code:: python
+
+    from tqdm import tqdm
+    from time import sleep
+
+    text = ""
+    for char in tqdm(["a", "b", "c", "d"]):
+        sleep(0.25)
+        text = text + char
+
+``trange(i)`` is a special optimised instance of ``tqdm(range(i))``:
+
+.. code:: python
+
+    from tqdm import trange
+
+    for i in trange(100):
+        sleep(0.01)
+
+Instantiation outside of the loop allows for manual control over ``tqdm()``:
+
+.. code:: python
+
+    pbar = tqdm(["a", "b", "c", "d"])
+    for char in pbar:
+        sleep(0.25)
+        pbar.set_description("Processing %s" % char)
+
+Manual
+~~~~~~
+
+Manual control of ``tqdm()`` updates using a ``with`` statement:
+
+.. code:: python
+
+    with tqdm(total=100) as pbar:
+        for i in range(10):
+            sleep(0.1)
+            pbar.update(10)
+
+If the optional variable ``total`` (or an iterable with ``len()``) is
+provided, predictive stats are displayed.
+
+``with`` is also optional (you can just assign ``tqdm()`` to a variable,
+but in this case don't forget to ``del`` or ``close()`` at the end:
+
+.. code:: python
+
+    pbar = tqdm(total=100)
+    for i in range(10):
+        sleep(0.1)
+        pbar.update(10)
+    pbar.close()
+
+Module
+~~~~~~
+
+Perhaps the most wonderful use of ``tqdm`` is in a script or on the command
+line. Simply inserting ``tqdm`` (or ``python -m tqdm``) between pipes will pass
+through all ``stdin`` to ``stdout`` while printing progress to ``stderr``.
+
+The example below demonstrate counting the number of lines in all Python files
+in the current directory, with timing information included.
+
+.. code:: sh
+
+    $ time find . -name '*.py' -type f -exec cat \{} \; | wc -l
+    857365
+
+    real    0m3.458s
+    user    0m0.274s
+    sys     0m3.325s
+
+    $ time find . -name '*.py' -type f -exec cat \{} \; | tqdm | wc -l
+    857366it [00:03, 246471.31it/s]
+    857365
+
+    real    0m3.585s
+    user    0m0.862s
+    sys     0m3.358s
+
+Note that the usual arguments for ``tqdm`` can also be specified.
+
+.. code:: sh
+
+    $ find . -name '*.py' -type f -exec cat \{} \; |
+        tqdm --unit loc --unit_scale --total 857366 >> /dev/null
+    100%|█████████████████████████████████| 857K/857K [00:04<00:00, 246Kloc/s]
+
+Backing up a large directory?
+
+.. code:: sh
+
+    $ tar -zcf - docs/ | tqdm --bytes --total `du -sb docs/ | cut -f1` \
+      > backup.tgz
+     44%|██████████████▊                   | 153M/352M [00:14<00:18, 11.0MB/s]
+
+This can be beautified further:
+
+.. code:: sh
+
+    $ BYTES="$(du -sb docs/ | cut -f1)"
+    $ tar -cf - docs/ \
+      | tqdm --bytes --total "$BYTES" --desc Processing | gzip \
+      | tqdm --bytes --total "$BYTES" --desc Compressed --position 1 \
+      > ~/backup.tgz
+    Processing: 100%|██████████████████████| 352M/352M [00:14<00:00, 30.2MB/s]
+    Compressed:  42%|█████████▎            | 148M/352M [00:14<00:19, 10.9MB/s]
+
+Or done on a file level using 7-zip:
+
+.. code:: sh
+
+    $ 7z a -bd -r backup.7z docs/ | grep Compressing \
+      | tqdm --total $(find docs/ -type f | wc -l) --unit files \
+      | grep -v Compressing
+    100%|██████████████████████████▉| 15327/15327 [01:00<00:00, 712.96files/s]
+
+Pre-existing CLI programs already outputting basic progress information will
+benefit from ``tqdm``'s ``--update`` and ``--update_to`` flags:
+
+.. code:: sh
+
+    $ seq 3 0.1 5 | tqdm --total 5 --update_to --null
+    100%|████████████████████████████████████| 5.0/5 [00:00<00:00, 9673.21it/s]
+    $ seq 10 | tqdm --update --null  # 1 + 2 + ... + 10 = 55 iterations
+    55it [00:00, 90006.52it/s]
+
+FAQ and Known Issues
+--------------------
+
+|GitHub-Issues|
+
+The most common issues relate to excessive output on multiple lines, instead
+of a neat one-line progress bar.
+
+- Consoles in general: require support for carriage return (``CR``, ``\r``).
+- Nested progress bars:
+
+  * Consoles in general: require support for moving cursors up to the
+    previous line. For example,
+    `IDLE `__,
+    `ConEmu `__ and
+    `PyCharm `__ (also
+    `here `__,
+    `here `__, and
+    `here `__)
+    lack full support.
+  * Windows: additionally may require the Python module ``colorama``
+    to ensure nested bars stay within their respective lines.
+
+- Unicode:
+
+  * Environments which report that they support unicode will have solid smooth
+    progressbars. The fallback is an ``ascii``-only bar.
+  * Windows consoles often only partially support unicode and thus
+    `often require explicit ascii=True `__
+    (also `here `__). This is due to
+    either normal-width unicode characters being incorrectly displayed as
+    "wide", or some unicode characters not rendering.
+
+- Wrapping generators:
+
+  * Generator wrapper functions tend to hide the length of iterables.
+    ``tqdm`` does not.
+  * Replace ``tqdm(enumerate(...))`` with ``enumerate(tqdm(...))`` or
+    ``tqdm(enumerate(x), total=len(x), ...)``.
+    The same applies to ``numpy.ndenumerate``.
+  * Replace ``tqdm(zip(a, b))`` with ``zip(tqdm(a), b)`` or even
+    ``zip(tqdm(a), tqdm(b))``.
+  * The same applies to ``itertools``.
+  * Some useful convenience functions can be found under ``tqdm.contrib``.
+
+- `Hanging pipes in python2 `__:
+  when using ``tqdm`` on the CLI, you may need to use Python 3.5+ for correct
+  buffering.
+- `No intermediate output in docker-compose `__:
+  use ``docker-compose run`` instead of ``docker-compose up`` and ``tty: true``.
+
+If you come across any other difficulties, browse and file |GitHub-Issues|.
+
+Documentation
+-------------
+
+|Py-Versions| |README-Hits| (Since 19 May 2016)
+
+.. code:: python
+
+    class tqdm():
+      """
+      Decorate an iterable object, returning an iterator which acts exactly
+      like the original iterable, but prints a dynamically updating
+      progressbar every time a value is requested.
+      """
+
+      def __init__(self, iterable=None, desc=None, total=None, leave=True,
+                   file=None, ncols=None, mininterval=0.1,
+                   maxinterval=10.0, miniters=None, ascii=None, disable=False,
+                   unit='it', unit_scale=False, dynamic_ncols=False,
+                   smoothing=0.3, bar_format=None, initial=0, position=None,
+                   postfix=None, unit_divisor=1000):
+
+Parameters
+~~~~~~~~~~
+
+* iterable  : iterable, optional  
+    Iterable to decorate with a progressbar.
+    Leave blank to manually manage the updates.
+* desc  : str, optional  
+    Prefix for the progressbar.
+* total  : int or float, optional  
+    The number of expected iterations. If unspecified,
+    len(iterable) is used if possible. If float("inf") or as a last
+    resort, only basic progress statistics are displayed
+    (no ETA, no progressbar).
+    If ``gui`` is True and this parameter needs subsequent updating,
+    specify an initial arbitrary large positive number,
+    e.g. 9e9.
+* leave  : bool, optional  
+    If [default: True], keeps all traces of the progressbar
+    upon termination of iteration.
+    If ``None``, will leave only if ``position`` is ``0``.
+* file  : ``io.TextIOWrapper`` or ``io.StringIO``, optional  
+    Specifies where to output the progress messages
+    (default: sys.stderr). Uses ``file.write(str)`` and ``file.flush()``
+    methods.  For encoding, see ``write_bytes``.
+* ncols  : int, optional  
+    The width of the entire output message. If specified,
+    dynamically resizes the progressbar to stay within this bound.
+    If unspecified, attempts to use environment width. The
+    fallback is a meter width of 10 and no limit for the counter and
+    statistics. If 0, will not print any meter (only stats).
+* mininterval  : float, optional  
+    Minimum progress display update interval [default: 0.1] seconds.
+* maxinterval  : float, optional  
+    Maximum progress display update interval [default: 10] seconds.
+    Automatically adjusts ``miniters`` to correspond to ``mininterval``
+    after long display update lag. Only works if ``dynamic_miniters``
+    or monitor thread is enabled.
+* miniters  : int or float, optional  
+    Minimum progress display update interval, in iterations.
+    If 0 and ``dynamic_miniters``, will automatically adjust to equal
+    ``mininterval`` (more CPU efficient, good for tight loops).
+    If > 0, will skip display of specified number of iterations.
+    Tweak this and ``mininterval`` to get very efficient loops.
+    If your progress is erratic with both fast and slow iterations
+    (network, skipping items, etc) you should set miniters=1.
+* ascii  : bool or str, optional  
+    If unspecified or False, use unicode (smooth blocks) to fill
+    the meter. The fallback is to use ASCII characters " 123456789#".
+* disable  : bool, optional  
+    Whether to disable the entire progressbar wrapper
+    [default: False]. If set to None, disable on non-TTY.
+* unit  : str, optional  
+    String that will be used to define the unit of each iteration
+    [default: it].
+* unit_scale  : bool or int or float, optional  
+    If 1 or True, the number of iterations will be reduced/scaled
+    automatically and a metric prefix following the
+    International System of Units standard will be added
+    (kilo, mega, etc.) [default: False]. If any other non-zero
+    number, will scale ``total`` and ``n``.
+* dynamic_ncols  : bool, optional  
+    If set, constantly alters ``ncols`` and ``nrows`` to the
+    environment (allowing for window resizes) [default: False].
+* smoothing  : float, optional  
+    Exponential moving average smoothing factor for speed estimates
+    (ignored in GUI mode). Ranges from 0 (average speed) to 1
+    (current/instantaneous speed) [default: 0.3].
+* bar_format  : str, optional  
+    Specify a custom bar string formatting. May impact performance.
+    [default: '{l_bar}{bar}{r_bar}'], where
+    l_bar='{desc}: {percentage:3.0f}%|' and
+    r_bar='| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, '
+    '{rate_fmt}{postfix}]'
+    Possible vars: l_bar, bar, r_bar, n, n_fmt, total, total_fmt,
+    percentage, elapsed, elapsed_s, ncols, nrows, desc, unit,
+    rate, rate_fmt, rate_noinv, rate_noinv_fmt,
+    rate_inv, rate_inv_fmt, postfix, unit_divisor,
+    remaining, remaining_s, eta.
+    Note that a trailing ": " is automatically removed after {desc}
+    if the latter is empty.
+* initial  : int or float, optional  
+    The initial counter value. Useful when restarting a progress
+    bar [default: 0]. If using float, consider specifying ``{n:.3f}``
+    or similar in ``bar_format``, or specifying ``unit_scale``.
+* position  : int, optional  
+    Specify the line offset to print this bar (starting from 0)
+    Automatic if unspecified.
+    Useful to manage multiple bars at once (eg, from threads).
+* postfix  : dict or ``*``, optional  
+    Specify additional stats to display at the end of the bar.
+    Calls ``set_postfix(**postfix)`` if possible (dict).
+* unit_divisor  : float, optional  
+    [default: 1000], ignored unless ``unit_scale`` is True.
+* write_bytes  : bool, optional  
+    If (default: None) and ``file`` is unspecified,
+    bytes will be written in Python 2. If ``True`` will also write
+    bytes. In all other cases will default to unicode.
+* lock_args  : tuple, optional  
+    Passed to ``refresh`` for intermediate output
+    (initialisation, iterating, and updating).
+* nrows  : int, optional  
+    The screen height. If specified, hides nested bars outside this
+    bound. If unspecified, attempts to use environment height.
+    The fallback is 20.
+* colour  : str, optional  
+    Bar colour (e.g. 'green', '#00ff00').
+* delay  : float, optional  
+    Don't display until [default: 0] seconds have elapsed.
+
+Extra CLI Options
+~~~~~~~~~~~~~~~~~
+
+* delim  : chr, optional  
+    Delimiting character [default: '\n']. Use '\0' for null.
+    N.B.: on Windows systems, Python converts '\n' to '\r\n'.
+* buf_size  : int, optional  
+    String buffer size in bytes [default: 256]
+    used when ``delim`` is specified.
+* bytes  : bool, optional  
+    If true, will count bytes, ignore ``delim``, and default
+    ``unit_scale`` to True, ``unit_divisor`` to 1024, and ``unit`` to 'B'.
+* tee  : bool, optional  
+    If true, passes ``stdin`` to both ``stderr`` and ``stdout``.
+* update  : bool, optional  
+    If true, will treat input as newly elapsed iterations,
+    i.e. numbers to pass to ``update()``. Note that this is slow
+    (~2e5 it/s) since every input must be decoded as a number.
+* update_to  : bool, optional  
+    If true, will treat input as total elapsed iterations,
+    i.e. numbers to assign to ``self.n``. Note that this is slow
+    (~2e5 it/s) since every input must be decoded as a number.
+* null  : bool, optional  
+    If true, will discard input (no stdout).
+* manpath  : str, optional  
+    Directory in which to install tqdm man pages.
+* comppath  : str, optional  
+    Directory in which to place tqdm completion.
+* log  : str, optional  
+    CRITICAL|FATAL|ERROR|WARN(ING)|[default: 'INFO']|DEBUG|NOTSET.
+
+Returns
+~~~~~~~
+
+* out  : decorated iterator.  
+
+.. code:: python
+
+    class tqdm():
+      def update(self, n=1):
+          """
+          Manually update the progress bar, useful for streams
+          such as reading files.
+          E.g.:
+          >>> t = tqdm(total=filesize) # Initialise
+          >>> for current_buffer in stream:
+          ...    ...
+          ...    t.update(len(current_buffer))
+          >>> t.close()
+          The last line is highly recommended, but possibly not necessary if
+          ``t.update()`` will be called in such a way that ``filesize`` will be
+          exactly reached and printed.
+
+          Parameters
+          ----------
+          n  : int or float, optional
+              Increment to add to the internal counter of iterations
+              [default: 1]. If using float, consider specifying ``{n:.3f}``
+              or similar in ``bar_format``, or specifying ``unit_scale``.
+
+          Returns
+          -------
+          out  : bool or None
+              True if a ``display()`` was triggered.
+          """
+
+      def close(self):
+          """Cleanup and (if leave=False) close the progressbar."""
+
+      def clear(self, nomove=False):
+          """Clear current bar display."""
+
+      def refresh(self):
+          """
+          Force refresh the display of this bar.
+
+          Parameters
+          ----------
+          nolock  : bool, optional
+              If ``True``, does not lock.
+              If [default: ``False``]: calls ``acquire()`` on internal lock.
+          lock_args  : tuple, optional
+              Passed to internal lock's ``acquire()``.
+              If specified, will only ``display()`` if ``acquire()`` returns ``True``.
+          """
+
+      def unpause(self):
+          """Restart tqdm timer from last print time."""
+
+      def reset(self, total=None):
+          """
+          Resets to 0 iterations for repeated use.
+
+          Consider combining with ``leave=True``.
+
+          Parameters
+          ----------
+          total  : int or float, optional. Total to use for the new bar.
+          """
+
+      def set_description(self, desc=None, refresh=True):
+          """
+          Set/modify description of the progress bar.
+
+          Parameters
+          ----------
+          desc  : str, optional
+          refresh  : bool, optional
+              Forces refresh [default: True].
+          """
+
+      def set_postfix(self, ordered_dict=None, refresh=True, **tqdm_kwargs):
+          """
+          Set/modify postfix (additional stats)
+          with automatic formatting based on datatype.
+
+          Parameters
+          ----------
+          ordered_dict  : dict or OrderedDict, optional
+          refresh  : bool, optional
+              Forces refresh [default: True].
+          kwargs  : dict, optional
+          """
+
+      @classmethod
+      def write(cls, s, file=sys.stdout, end="\n"):
+          """Print a message via tqdm (without overlap with bars)."""
+
+      @property
+      def format_dict(self):
+          """Public API for read-only member access."""
+
+      def display(self, msg=None, pos=None):
+          """
+          Use ``self.sp`` to display ``msg`` in the specified ``pos``.
+
+          Consider overloading this function when inheriting to use e.g.:
+          ``self.some_frontend(**self.format_dict)`` instead of ``self.sp``.
+
+          Parameters
+          ----------
+          msg  : str, optional. What to display (default: ``repr(self)``).
+          pos  : int, optional. Position to ``moveto``
+            (default: ``abs(self.pos)``).
+          """
+
+      @classmethod
+      @contextmanager
+      def wrapattr(cls, stream, method, total=None, bytes=True, **tqdm_kwargs):
+          """
+          stream  : file-like object.
+          method  : str, "read" or "write". The result of ``read()`` and
+              the first argument of ``write()`` should have a ``len()``.
+
+          >>> with tqdm.wrapattr(file_obj, "read", total=file_obj.size) as fobj:
+          ...     while True:
+          ...         chunk = fobj.read(chunk_size)
+          ...         if not chunk:
+          ...             break
+          """
+
+      @classmethod
+      def pandas(cls, *targs, **tqdm_kwargs):
+          """Registers the current `tqdm` class with `pandas`."""
+
+    def trange(*args, **tqdm_kwargs):
+        """
+        A shortcut for `tqdm(xrange(*args), **tqdm_kwargs)`.
+        On Python3+, `range` is used instead of `xrange`.
+        """
+
+Convenience Functions
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+    def tqdm.contrib.tenumerate(iterable, start=0, total=None,
+                                tqdm_class=tqdm.auto.tqdm, **tqdm_kwargs):
+        """Equivalent of `numpy.ndenumerate` or builtin `enumerate`."""
+
+    def tqdm.contrib.tzip(iter1, *iter2plus, **tqdm_kwargs):
+        """Equivalent of builtin `zip`."""
+
+    def tqdm.contrib.tmap(function, *sequences, **tqdm_kwargs):
+        """Equivalent of builtin `map`."""
+
+Submodules
+~~~~~~~~~~
+
+.. code:: python
+
+    class tqdm.notebook.tqdm(tqdm.tqdm):
+        """IPython/Jupyter Notebook widget."""
+
+    class tqdm.auto.tqdm(tqdm.tqdm):
+        """Automatically chooses beween `tqdm.notebook` and `tqdm.tqdm`."""
+
+    class tqdm.asyncio.tqdm(tqdm.tqdm):
+      """Asynchronous version."""
+      @classmethod
+      def as_completed(cls, fs, *, loop=None, timeout=None, total=None,
+                       **tqdm_kwargs):
+          """Wrapper for `asyncio.as_completed`."""
+
+    class tqdm.gui.tqdm(tqdm.tqdm):
+        """Matplotlib GUI version."""
+
+    class tqdm.tk.tqdm(tqdm.tqdm):
+        """Tkinter GUI version."""
+
+    class tqdm.rich.tqdm(tqdm.tqdm):
+        """`rich.progress` version."""
+
+    class tqdm.keras.TqdmCallback(keras.callbacks.Callback):
+        """Keras callback for epoch and batch progress."""
+
+    class tqdm.dask.TqdmCallback(dask.callbacks.Callback):
+        """Dask callback for task progress."""
+
+
+``contrib``
++++++++++++
+
+The ``tqdm.contrib`` package also contains experimental modules:
+
+- ``tqdm.contrib.itertools``: Thin wrappers around ``itertools``
+- ``tqdm.contrib.concurrent``: Thin wrappers around ``concurrent.futures``
+- ``tqdm.contrib.discord``: Posts to `Discord `__ bots
+- ``tqdm.contrib.telegram``: Posts to `Telegram `__ bots
+- ``tqdm.contrib.bells``: Automagically enables all optional features
+
+  * ``auto``, ``pandas``, ``discord``, ``telegram``
+
+Examples and Advanced Usage
+---------------------------
+
+- See the `examples `__
+  folder;
+- import the module and run ``help()``;
+- consult the `wiki `__;
+
+  * this has an
+    `excellent article `__
+    on how to make a **great** progressbar;
+
+- check out the `slides from PyData London `__, or
+- run the |binder-demo|.
+
+Description and additional stats
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Custom information can be displayed and updated dynamically on ``tqdm`` bars
+with the ``desc`` and ``postfix`` arguments:
+
+.. code:: python
+
+    from tqdm import tqdm, trange
+    from random import random, randint
+    from time import sleep
+
+    with trange(10) as t:
+        for i in t:
+            # Description will be displayed on the left
+            t.set_description('GEN %i' % i)
+            # Postfix will be displayed on the right,
+            # formatted automatically based on argument's datatype
+            t.set_postfix(loss=random(), gen=randint(1,999), str='h',
+                          lst=[1, 2])
+            sleep(0.1)
+
+    with tqdm(total=10, bar_format="{postfix[0]} {postfix[1][value]:>8.2g}",
+              postfix=["Batch", dict(value=0)]) as t:
+        for i in range(10):
+            sleep(0.1)
+            t.postfix[1]["value"] = i / 2
+            t.update()
+
+Points to remember when using ``{postfix[...]}`` in the ``bar_format`` string:
+
+- ``postfix`` also needs to be passed as an initial argument in a compatible
+  format, and
+- ``postfix`` will be auto-converted to a string if it is a ``dict``-like
+  object. To prevent this behaviour, insert an extra item into the dictionary
+  where the key is not a string.
+
+Additional ``bar_format`` parameters may also be defined by overriding
+``format_dict``, and the bar itself may be modified using ``ascii``:
+
+.. code:: python
+
+    from tqdm import tqdm
+    class TqdmExtraFormat(tqdm):
+        """Provides a `total_time` format parameter"""
+        @property
+        def format_dict(self):
+            d = super(TqdmExtraFormat, self).format_dict
+            total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1)
+            d.update(total_time=self.format_interval(total_time) + " in total")
+            return d
+
+    for i in TqdmExtraFormat(
+          range(9), ascii=" .oO0",
+          bar_format="{total_time}: {percentage:.0f}%|{bar}{r_bar}"):
+        if i == 4:
+            break
+
+.. code::
+
+    00:00 in total: 44%|0000.     | 4/9 [00:00<00:00, 962.93it/s]
+
+Note that ``{bar}`` also supports a format specifier ``[width][type]``.
+
+- ``width``
+
+  * unspecified (default): automatic to fill ``ncols``
+  * ``int >= 0``: fixed width overriding ``ncols`` logic
+  * ``int < 0``: subtract from the automatic default
+
+- ``type``
+
+  * ``a``: ascii (``ascii=True`` override)
+  * ``u``: unicode (``ascii=False`` override)
+  * ``b``: blank (``ascii="  "`` override)
+
+This means a fixed bar with right-justified text may be created by using:
+``bar_format="{l_bar}{bar:10}|{bar:-10b}right-justified"``
+
+Nested progress bars
+~~~~~~~~~~~~~~~~~~~~
+
+``tqdm`` supports nested progress bars. Here's an example:
+
+.. code:: python
+
+    from tqdm.auto import trange
+    from time import sleep
+
+    for i in trange(4, desc='1st loop'):
+        for j in trange(5, desc='2nd loop'):
+            for k in trange(50, desc='3rd loop', leave=False):
+                sleep(0.01)
+
+For manual control over positioning (e.g. for multi-processing use),
+you may specify ``position=n`` where ``n=0`` for the outermost bar,
+``n=1`` for the next, and so on.
+However, it's best to check if ``tqdm`` can work without manual ``position``
+first.
+
+.. code:: python
+
+    from time import sleep
+    from tqdm import trange, tqdm
+    from multiprocessing import Pool, RLock, freeze_support
+
+    L = list(range(9))
+
+    def progresser(n):
+        interval = 0.001 / (n + 2)
+        total = 5000
+        text = "#{}, est. {:<04.2}s".format(n, interval * total)
+        for _ in trange(total, desc=text, position=n):
+            sleep(interval)
+
+    if __name__ == '__main__':
+        freeze_support()  # for Windows support
+        tqdm.set_lock(RLock())  # for managing output contention
+        p = Pool(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),))
+        p.map(progresser, L)
+
+Note that in Python 3, ``tqdm.write`` is thread-safe:
+
+.. code:: python
+
+    from time import sleep
+    from tqdm import tqdm, trange
+    from concurrent.futures import ThreadPoolExecutor
+
+    L = list(range(9))
+
+    def progresser(n):
+        interval = 0.001 / (n + 2)
+        total = 5000
+        text = "#{}, est. {:<04.2}s".format(n, interval * total)
+        for _ in trange(total, desc=text):
+            sleep(interval)
+        if n == 6:
+            tqdm.write("n == 6 completed.")
+            tqdm.write("`tqdm.write()` is thread-safe in py3!")
+
+    if __name__ == '__main__':
+        with ThreadPoolExecutor() as p:
+            p.map(progresser, L)
+
+Hooks and callbacks
+~~~~~~~~~~~~~~~~~~~
+
+``tqdm`` can easily support callbacks/hooks and manual updates.
+Here's an example with ``urllib``:
+
+**``urllib.urlretrieve`` documentation**
+
+    | [...]
+    | If present, the hook function will be called once
+    | on establishment of the network connection and once after each block read
+    | thereafter. The hook will be passed three arguments; a count of blocks
+    | transferred so far, a block size in bytes, and the total size of the file.
+    | [...]
+
+.. code:: python
+
+    import urllib, os
+    from tqdm import tqdm
+    urllib = getattr(urllib, 'request', urllib)
+
+    class TqdmUpTo(tqdm):
+        """Provides `update_to(n)` which uses `tqdm.update(delta_n)`."""
+        def update_to(self, b=1, bsize=1, tsize=None):
+            """
+            b  : int, optional
+                Number of blocks transferred so far [default: 1].
+            bsize  : int, optional
+                Size of each block (in tqdm units) [default: 1].
+            tsize  : int, optional
+                Total size (in tqdm units). If [default: None] remains unchanged.
+            """
+            if tsize is not None:
+                self.total = tsize
+            return self.update(b * bsize - self.n)  # also sets self.n = b * bsize
+
+    eg_link = "https://caspersci.uk.to/matryoshka.zip"
+    with TqdmUpTo(unit='B', unit_scale=True, unit_divisor=1024, miniters=1,
+                  desc=eg_link.split('/')[-1]) as t:  # all optional kwargs
+        urllib.urlretrieve(eg_link, filename=os.devnull,
+                           reporthook=t.update_to, data=None)
+        t.total = t.n
+
+Inspired by `twine#242 `__.
+Functional alternative in
+`examples/tqdm_wget.py `__.
+
+It is recommend to use ``miniters=1`` whenever there is potentially
+large differences in iteration speed (e.g. downloading a file over
+a patchy connection).
+
+**Wrapping read/write methods**
+
+To measure throughput through a file-like object's ``read`` or ``write``
+methods, use ``CallbackIOWrapper``:
+
+.. code:: python
+
+    from tqdm.auto import tqdm
+    from tqdm.utils import CallbackIOWrapper
+
+    with tqdm(total=file_obj.size,
+              unit='B', unit_scale=True, unit_divisor=1024) as t:
+        fobj = CallbackIOWrapper(t.update, file_obj, "read")
+        while True:
+            chunk = fobj.read(chunk_size)
+            if not chunk:
+                break
+        t.reset()
+        # ... continue to use `t` for something else
+
+Alternatively, use the even simpler ``wrapattr`` convenience function,
+which would condense both the ``urllib`` and ``CallbackIOWrapper`` examples
+down to:
+
+.. code:: python
+
+    import urllib, os
+    from tqdm import tqdm
+
+    eg_link = "https://caspersci.uk.to/matryoshka.zip"
+    response = getattr(urllib, 'request', urllib).urlopen(eg_link)
+    with tqdm.wrapattr(open(os.devnull, "wb"), "write",
+                       miniters=1, desc=eg_link.split('/')[-1],
+                       total=getattr(response, 'length', None)) as fout:
+        for chunk in response:
+            fout.write(chunk)
+
+The ``requests`` equivalent is nearly identical:
+
+.. code:: python
+
+    import requests, os
+    from tqdm import tqdm
+
+    eg_link = "https://caspersci.uk.to/matryoshka.zip"
+    response = requests.get(eg_link, stream=True)
+    with tqdm.wrapattr(open(os.devnull, "wb"), "write",
+                       miniters=1, desc=eg_link.split('/')[-1],
+                       total=int(response.headers.get('content-length', 0))) as fout:
+        for chunk in response.iter_content(chunk_size=4096):
+            fout.write(chunk)
+
+**Custom callback**
+
+``tqdm`` is known for intelligently skipping unnecessary displays. To make a
+custom callback take advantage of this, simply use the return value of
+``update()``. This is set to ``True`` if a ``display()`` was triggered.
+
+.. code:: python
+
+    from tqdm.auto import tqdm as std_tqdm
+
+    def external_callback(*args, **kwargs):
+        ...
+
+    class TqdmExt(std_tqdm):
+        def update(self, n=1):
+            displayed = super(TqdmExt, self).update(n):
+            if displayed:
+                external_callback(**self.format_dict)
+            return displayed
+
+``asyncio``
+~~~~~~~~~~~
+
+Note that ``break`` isn't currently caught by asynchronous iterators.
+This means that ``tqdm`` cannot clean up after itself in this case:
+
+.. code:: python
+
+    from tqdm.asyncio import tqdm
+
+    async for i in tqdm(range(9)):
+        if i == 2:
+            break
+
+Instead, either call ``pbar.close()`` manually or use the context manager syntax:
+
+.. code:: python
+
+    from tqdm.asyncio import tqdm
+
+    with tqdm(range(9)) as pbar:
+        async for i in pbar:
+            if i == 2:
+                break
+
+Pandas Integration
+~~~~~~~~~~~~~~~~~~
+
+Due to popular demand we've added support for ``pandas`` -- here's an example
+for ``DataFrame.progress_apply`` and ``DataFrameGroupBy.progress_apply``:
+
+.. code:: python
+
+    import pandas as pd
+    import numpy as np
+    from tqdm import tqdm
+
+    df = pd.DataFrame(np.random.randint(0, 100, (100000, 6)))
+
+    # Register `pandas.progress_apply` and `pandas.Series.map_apply` with `tqdm`
+    # (can use `tqdm.gui.tqdm`, `tqdm.notebook.tqdm`, optional kwargs, etc.)
+    tqdm.pandas(desc="my bar!")
+
+    # Now you can use `progress_apply` instead of `apply`
+    # and `progress_map` instead of `map`
+    df.progress_apply(lambda x: x**2)
+    # can also groupby:
+    # df.groupby(0).progress_apply(lambda x: x**2)
+
+In case you're interested in how this works (and how to modify it for your
+own callbacks), see the
+`examples `__
+folder or import the module and run ``help()``.
+
+Keras Integration
+~~~~~~~~~~~~~~~~~
+
+A ``keras`` callback is also available:
+
+.. code:: python
+
+    from tqdm.keras import TqdmCallback
+
+    ...
+
+    model.fit(..., verbose=0, callbacks=[TqdmCallback()])
+
+Dask Integration
+~~~~~~~~~~~~~~~~
+
+A ``dask`` callback is also available:
+
+.. code:: python
+
+    from tqdm.dask import TqdmCallback
+
+    with TqdmCallback(desc="compute"):
+        ...
+        arr.compute()
+
+    # or use callback globally
+    cb = TqdmCallback(desc="global")
+    cb.register()
+    arr.compute()
+
+IPython/Jupyter Integration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+IPython/Jupyter is supported via the ``tqdm.notebook`` submodule:
+
+.. code:: python
+
+    from tqdm.notebook import trange, tqdm
+    from time import sleep
+
+    for i in trange(3, desc='1st loop'):
+        for j in tqdm(range(100), desc='2nd loop'):
+            sleep(0.01)
+
+In addition to ``tqdm`` features, the submodule provides a native Jupyter
+widget (compatible with IPython v1-v4 and Jupyter), fully working nested bars
+and colour hints (blue: normal, green: completed, red: error/interrupt,
+light blue: no ETA); as demonstrated below.
+
+|Screenshot-Jupyter1|
+|Screenshot-Jupyter2|
+|Screenshot-Jupyter3|
+
+The ``notebook`` version supports percentage or pixels for overall width
+(e.g.: ``ncols='100%'`` or ``ncols='480px'``).
+
+It is also possible to let ``tqdm`` automatically choose between
+console or notebook versions by using the ``autonotebook`` submodule:
+
+.. code:: python
+
+    from tqdm.autonotebook import tqdm
+    tqdm.pandas()
+
+Note that this will issue a ``TqdmExperimentalWarning`` if run in a notebook
+since it is not meant to be possible to distinguish between ``jupyter notebook``
+and ``jupyter console``. Use ``auto`` instead of ``autonotebook`` to suppress
+this warning.
+
+Note that notebooks will display the bar in the cell where it was created.
+This may be a different cell from the one where it is used.
+If this is not desired, either
+
+- delay the creation of the bar to the cell where it must be displayed, or
+- create the bar with ``display=False``, and in a later cell call
+  ``display(bar.container)``:
+
+.. code:: python
+
+    from tqdm.notebook import tqdm
+    pbar = tqdm(..., display=False)
+
+.. code:: python
+
+    # different cell
+    display(pbar.container)
+
+The ``keras`` callback has a ``display()`` method which can be used likewise:
+
+.. code:: python
+
+    from tqdm.keras import TqdmCallback
+    cbk = TqdmCallback(display=False)
+
+.. code:: python
+
+    # different cell
+    cbk.display()
+    model.fit(..., verbose=0, callbacks=[cbk])
+
+Another possibility is to have a single bar (near the top of the notebook)
+which is constantly re-used (using ``reset()`` rather than ``close()``).
+For this reason, the notebook version (unlike the CLI version) does not
+automatically call ``close()`` upon ``Exception``.
+
+.. code:: python
+
+    from tqdm.notebook import tqdm
+    pbar = tqdm()
+
+.. code:: python
+
+    # different cell
+    iterable = range(100)
+    pbar.reset(total=len(iterable))  # initialise with new `total`
+    for i in iterable:
+        pbar.update()
+    pbar.refresh()  # force print final status but don't `close()`
+
+Custom Integration
+~~~~~~~~~~~~~~~~~~
+
+To change the default arguments (such as making ``dynamic_ncols=True``),
+simply use built-in Python magic:
+
+.. code:: python
+
+    from functools import partial
+    from tqdm import tqdm as std_tqdm
+    tqdm = partial(std_tqdm, dynamic_ncols=True)
+
+For further customisation,
+``tqdm`` may be inherited from to create custom callbacks (as with the
+``TqdmUpTo`` example `above <#hooks-and-callbacks>`__) or for custom frontends
+(e.g. GUIs such as notebook or plotting packages). In the latter case:
+
+1. ``def __init__()`` to call ``super().__init__(..., gui=True)`` to disable
+   terminal ``status_printer`` creation.
+2. Redefine: ``close()``, ``clear()``, ``display()``.
+
+Consider overloading ``display()`` to use e.g.
+``self.frontend(**self.format_dict)`` instead of ``self.sp(repr(self))``.
+
+Some submodule examples of inheritance:
+
+- `tqdm/notebook.py `__
+- `tqdm/gui.py `__
+- `tqdm/tk.py `__
+- `tqdm/contrib/telegram.py `__
+- `tqdm/contrib/discord.py `__
+
+Dynamic Monitor/Meter
+~~~~~~~~~~~~~~~~~~~~~
+
+You can use a ``tqdm`` as a meter which is not monotonically increasing.
+This could be because ``n`` decreases (e.g. a CPU usage monitor) or ``total``
+changes.
+
+One example would be recursively searching for files. The ``total`` is the
+number of objects found so far, while ``n`` is the number of those objects which
+are files (rather than folders):
+
+.. code:: python
+
+    from tqdm import tqdm
+    import os.path
+
+    def find_files_recursively(path, show_progress=True):
+        files = []
+        # total=1 assumes `path` is a file
+        t = tqdm(total=1, unit="file", disable=not show_progress)
+        if not os.path.exists(path):
+            raise IOError("Cannot find:" + path)
+
+        def append_found_file(f):
+            files.append(f)
+            t.update()
+
+        def list_found_dir(path):
+            """returns os.listdir(path) assuming os.path.isdir(path)"""
+            listing = os.listdir(path)
+            # subtract 1 since a "file" we found was actually this directory
+            t.total += len(listing) - 1
+            # fancy way to give info without forcing a refresh
+            t.set_postfix(dir=path[-10:], refresh=False)
+            t.update(0)  # may trigger a refresh
+            return listing
+
+        def recursively_search(path):
+            if os.path.isdir(path):
+                for f in list_found_dir(path):
+                    recursively_search(os.path.join(path, f))
+            else:
+                append_found_file(path)
+
+        recursively_search(path)
+        t.set_postfix(dir=path)
+        t.close()
+        return files
+
+Using ``update(0)`` is a handy way to let ``tqdm`` decide when to trigger a
+display refresh to avoid console spamming.
+
+Writing messages
+~~~~~~~~~~~~~~~~
+
+This is a work in progress (see
+`#737 `__).
+
+Since ``tqdm`` uses a simple printing mechanism to display progress bars,
+you should not write any message in the terminal using ``print()`` while
+a progressbar is open.
+
+To write messages in the terminal without any collision with ``tqdm`` bar
+display, a ``.write()`` method is provided:
+
+.. code:: python
+
+    from tqdm.auto import tqdm, trange
+    from time import sleep
+
+    bar = trange(10)
+    for i in bar:
+        # Print using tqdm class method .write()
+        sleep(0.1)
+        if not (i % 3):
+            tqdm.write("Done task %i" % i)
+        # Can also use bar.write()
+
+By default, this will print to standard output ``sys.stdout``. but you can
+specify any file-like object using the ``file`` argument. For example, this
+can be used to redirect the messages writing to a log file or class.
+
+Redirecting writing
+~~~~~~~~~~~~~~~~~~~
+
+If using a library that can print messages to the console, editing the library
+by  replacing ``print()`` with ``tqdm.write()`` may not be desirable.
+In that case, redirecting ``sys.stdout`` to ``tqdm.write()`` is an option.
+
+To redirect ``sys.stdout``, create a file-like class that will write
+any input string to ``tqdm.write()``, and supply the arguments
+``file=sys.stdout, dynamic_ncols=True``.
+
+A reusable canonical example is given below:
+
+.. code:: python
+
+    from time import sleep
+    import contextlib
+    import sys
+    from tqdm import tqdm
+    from tqdm.contrib import DummyTqdmFile
+
+
+    @contextlib.contextmanager
+    def std_out_err_redirect_tqdm():
+        orig_out_err = sys.stdout, sys.stderr
+        try:
+            sys.stdout, sys.stderr = map(DummyTqdmFile, orig_out_err)
+            yield orig_out_err[0]
+        # Relay exceptions
+        except Exception as exc:
+            raise exc
+        # Always restore sys.stdout/err if necessary
+        finally:
+            sys.stdout, sys.stderr = orig_out_err
+
+    def some_fun(i):
+        print("Fee, fi, fo,".split()[i])
+
+    # Redirect stdout to tqdm.write() (don't forget the `as save_stdout`)
+    with std_out_err_redirect_tqdm() as orig_stdout:
+        # tqdm needs the original stdout
+        # and dynamic_ncols=True to autodetect console width
+        for i in tqdm(range(3), file=orig_stdout, dynamic_ncols=True):
+            sleep(.5)
+            some_fun(i)
+
+    # After the `with`, printing is restored
+    print("Done!")
+
+Redirecting ``logging``
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Similar to ``sys.stdout``/``sys.stderr`` as detailed above, console ``logging``
+may also be redirected to ``tqdm.write()``.
+
+Warning: if also redirecting ``sys.stdout``/``sys.stderr``, make sure to
+redirect ``logging`` first if needed.
+
+Helper methods are available in ``tqdm.contrib.logging``. For example:
+
+.. code:: python
+
+    import logging
+    from tqdm import trange
+    from tqdm.contrib.logging import logging_redirect_tqdm
+
+    LOG = logging.getLogger(__name__)
+
+    if __name__ == '__main__':
+        logging.basicConfig(level=logging.INFO)
+        with logging_redirect_tqdm():
+            for i in trange(9):
+                if i == 4:
+                    LOG.info("console logging redirected to `tqdm.write()`")
+        # logging restored
+
+Monitoring thread, intervals and miniters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``tqdm`` implements a few tricks to increase efficiency and reduce overhead.
+
+- Avoid unnecessary frequent bar refreshing: ``mininterval`` defines how long
+  to wait between each refresh. ``tqdm`` always gets updated in the background,
+  but it will display only every ``mininterval``.
+- Reduce number of calls to check system clock/time.
+- ``mininterval`` is more intuitive to configure than ``miniters``.
+  A clever adjustment system ``dynamic_miniters`` will automatically adjust
+  ``miniters`` to the amount of iterations that fit into time ``mininterval``.
+  Essentially, ``tqdm`` will check if it's time to print without actually
+  checking time. This behaviour can be still be bypassed by manually setting
+  ``miniters``.
+
+However, consider a case with a combination of fast and slow iterations.
+After a few fast iterations, ``dynamic_miniters`` will set ``miniters`` to a
+large number. When iteration rate subsequently slows, ``miniters`` will
+remain large and thus reduce display update frequency. To address this:
+
+- ``maxinterval`` defines the maximum time between display refreshes.
+  A concurrent monitoring thread checks for overdue updates and forces one
+  where necessary.
+
+The monitoring thread should not have a noticeable overhead, and guarantees
+updates at least every 10 seconds by default.
+This value can be directly changed by setting the ``monitor_interval`` of
+any ``tqdm`` instance (i.e. ``t = tqdm.tqdm(...); t.monitor_interval = 2``).
+The monitor thread may be disabled application-wide by setting
+``tqdm.tqdm.monitor_interval = 0`` before instantiation of any ``tqdm`` bar.
+
+
+Merch
+-----
+
+You can buy `tqdm branded merch `__ now!
+
+Contributions
+-------------
+
+|GitHub-Commits| |GitHub-Issues| |GitHub-PRs| |OpenHub-Status| |GitHub-Contributions| |CII Best Practices|
+
+All source code is hosted on `GitHub `__.
+Contributions are welcome.
+
+See the
+`CONTRIBUTING `__
+file for more information.
+
+Developers who have made significant contributions, ranked by *SLoC*
+(surviving lines of code,
+`git fame `__ ``-wMC --excl '\.(png|gif|jpg)$'``),
+are:
+
+==================== ======================================================== ==== ================================
+Name                 ID                                                       SLoC Notes
+==================== ======================================================== ==== ================================
+Casper da Costa-Luis `casperdcl `__             ~78% primary maintainer |Gift-Casper|
+Stephen Larroque     `lrq3000 `__                 ~10% team member
+Martin Zugnoni       `martinzugnoni `__     ~4%
+Daniel Ecer          `de-code `__                 ~2%
+Richard Sheridan     `richardsheridan `__ ~1%
+Guangshuo Chen       `chengs `__                   ~1%
+Kyle Altendorf       `altendky `__               <1%
+Matthew Stevens      `mjstevens777 `__       <1%
+Hadrien Mary         `hadim `__                     <1%  team member
+Noam Yorav-Raphael   `noamraph `__               <1%  original author
+Mikhail Korobov      `kmike `__                     <1%  team member
+==================== ======================================================== ==== ================================
+
+Ports to Other Languages
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+A list is available on
+`this wiki page `__.
+
+
+LICENCE
+-------
+
+Open Source (OSI approved): |LICENCE|
+
+Citation information: |DOI|
+
+|README-Hits| (Since 19 May 2016)
+
+.. |Logo| image:: https://img.tqdm.ml/logo.gif
+.. |Screenshot| image:: https://img.tqdm.ml/tqdm.gif
+.. |Video| image:: https://img.tqdm.ml/video.jpg
+   :target: https://tqdm.github.io/video
+.. |Slides| image:: https://img.tqdm.ml/slides.jpg
+   :target: https://tqdm.github.io/PyData2019/slides.html
+.. |Merch| image:: https://img.tqdm.ml/merch.jpg
+   :target: https://tqdm.github.io/merch
+.. |Build-Status| image:: https://img.shields.io/github/workflow/status/tqdm/tqdm/Test/master?logo=GitHub
+   :target: https://github.com/tqdm/tqdm/actions?query=workflow%3ATest
+.. |Coverage-Status| image:: https://img.shields.io/coveralls/github/tqdm/tqdm/master?logo=coveralls
+   :target: https://coveralls.io/github/tqdm/tqdm
+.. |Branch-Coverage-Status| image:: https://codecov.io/gh/tqdm/tqdm/branch/master/graph/badge.svg
+   :target: https://codecov.io/gh/tqdm/tqdm
+.. |Codacy-Grade| image:: https://app.codacy.com/project/badge/Grade/3f965571598f44549c7818f29cdcf177
+   :target: https://www.codacy.com/gh/tqdm/tqdm/dashboard
+.. |CII Best Practices| image:: https://bestpractices.coreinfrastructure.org/projects/3264/badge
+   :target: https://bestpractices.coreinfrastructure.org/projects/3264
+.. |GitHub-Status| image:: https://img.shields.io/github/tag/tqdm/tqdm.svg?maxAge=86400&logo=github&logoColor=white
+   :target: https://github.com/tqdm/tqdm/releases
+.. |GitHub-Forks| image:: https://img.shields.io/github/forks/tqdm/tqdm.svg?logo=github&logoColor=white
+   :target: https://github.com/tqdm/tqdm/network
+.. |GitHub-Stars| image:: https://img.shields.io/github/stars/tqdm/tqdm.svg?logo=github&logoColor=white
+   :target: https://github.com/tqdm/tqdm/stargazers
+.. |GitHub-Commits| image:: https://img.shields.io/github/commit-activity/y/tqdm/tqdm.svg?logo=git&logoColor=white
+   :target: https://github.com/tqdm/tqdm/graphs/commit-activity
+.. |GitHub-Issues| image:: https://img.shields.io/github/issues-closed/tqdm/tqdm.svg?logo=github&logoColor=white
+   :target: https://github.com/tqdm/tqdm/issues?q=
+.. |GitHub-PRs| image:: https://img.shields.io/github/issues-pr-closed/tqdm/tqdm.svg?logo=github&logoColor=white
+   :target: https://github.com/tqdm/tqdm/pulls
+.. |GitHub-Contributions| image:: https://img.shields.io/github/contributors/tqdm/tqdm.svg?logo=github&logoColor=white
+   :target: https://github.com/tqdm/tqdm/graphs/contributors
+.. |GitHub-Updated| image:: https://img.shields.io/github/last-commit/tqdm/tqdm/master.svg?logo=github&logoColor=white&label=pushed
+   :target: https://github.com/tqdm/tqdm/pulse
+.. |Gift-Casper| image:: https://img.shields.io/badge/dynamic/json.svg?color=ff69b4&label=gifts%20received&prefix=%C2%A3&query=%24..sum&url=https%3A%2F%2Fcaspersci.uk.to%2Fgifts.json
+   :target: https://cdcl.ml/sponsor
+.. |Versions| image:: https://img.shields.io/pypi/v/tqdm.svg
+   :target: https://tqdm.github.io/releases
+.. |PyPI-Downloads| image:: https://img.shields.io/pypi/dm/tqdm.svg?label=pypi%20downloads&logo=PyPI&logoColor=white
+   :target: https://pepy.tech/project/tqdm
+.. |Py-Versions| image:: https://img.shields.io/pypi/pyversions/tqdm.svg?logo=python&logoColor=white
+   :target: https://pypi.org/project/tqdm
+.. |Conda-Forge-Status| image:: https://img.shields.io/conda/v/conda-forge/tqdm.svg?label=conda-forge&logo=conda-forge
+   :target: https://anaconda.org/conda-forge/tqdm
+.. |Snapcraft| image:: https://img.shields.io/badge/snap-install-82BEA0.svg?logo=snapcraft
+   :target: https://snapcraft.io/tqdm
+.. |Docker| image:: https://img.shields.io/badge/docker-pull-blue.svg?logo=docker&logoColor=white
+   :target: https://hub.docker.com/r/tqdm/tqdm
+.. |Libraries-Rank| image:: https://img.shields.io/librariesio/sourcerank/pypi/tqdm.svg?logo=koding&logoColor=white
+   :target: https://libraries.io/pypi/tqdm
+.. |Libraries-Dependents| image:: https://img.shields.io/librariesio/dependent-repos/pypi/tqdm.svg?logo=koding&logoColor=white
+    :target: https://github.com/tqdm/tqdm/network/dependents
+.. |OpenHub-Status| image:: https://www.openhub.net/p/tqdm/widgets/project_thin_badge?format=gif
+   :target: https://www.openhub.net/p/tqdm?ref=Thin+badge
+.. |awesome-python| image:: https://awesome.re/mentioned-badge.svg
+   :target: https://github.com/vinta/awesome-python
+.. |LICENCE| image:: https://img.shields.io/pypi/l/tqdm.svg
+   :target: https://raw.githubusercontent.com/tqdm/tqdm/master/LICENCE
+.. |DOI| image:: https://img.shields.io/badge/DOI-10.5281/zenodo.595120-blue.svg
+   :target: https://doi.org/10.5281/zenodo.595120
+.. |binder-demo| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/tqdm/tqdm/master?filepath=DEMO.ipynb
+.. |Screenshot-Jupyter1| image:: https://img.tqdm.ml/jupyter-1.gif
+.. |Screenshot-Jupyter2| image:: https://img.tqdm.ml/jupyter-2.gif
+.. |Screenshot-Jupyter3| image:: https://img.tqdm.ml/jupyter-3.gif
+.. |README-Hits| image:: https://caspersci.uk.to/cgi-bin/hits.cgi?q=tqdm&style=social&r=https://github.com/tqdm/tqdm&l=https://img.tqdm.ml/favicon.png&f=https://img.tqdm.ml/logo.gif
+   :target: https://caspersci.uk.to/cgi-bin/hits.cgi?q=tqdm&a=plot&r=https://github.com/tqdm/tqdm&l=https://img.tqdm.ml/favicon.png&f=https://img.tqdm.ml/logo.gif&style=social
+
+
diff --git a/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/RECORD b/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/RECORD
new file mode 100644
index 0000000..34ac6ea
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/RECORD
@@ -0,0 +1,72 @@
+../../../bin/tqdm,sha256=ejd0w3qZ13EuKvThVwl6Rl0PH6IwmWdnyQFKBOoY28U,207
+tqdm-4.63.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+tqdm-4.63.1.dist-info/LICENCE,sha256=oPwXhajyogCjEk1wPUlVBgG3dBzP_IYXE8LdqgelN90,2006
+tqdm-4.63.1.dist-info/METADATA,sha256=dZjklNO6JuadYj018BoVXQHQYx9FpC3vRWXJW3hTLJE,57083
+tqdm-4.63.1.dist-info/RECORD,,
+tqdm-4.63.1.dist-info/WHEEL,sha256=z9j0xAa_JmUKMpmz72K0ZGALSM_n-wQVmGbleXx2VHg,110
+tqdm-4.63.1.dist-info/entry_points.txt,sha256=wDAae6wt2WNgXb_Zk5TNu9YmqoafUG2pV7K1CXf3k_U,40
+tqdm-4.63.1.dist-info/top_level.txt,sha256=NLiUJNfmc9At15s7JURiwvqMEjUi9G5PMGRrmMYzNSM,5
+tqdm/__init__.py,sha256=LiezHIqATK3FOij_365eMcu11UPmijmDFKogl72fHp4,1639
+tqdm/__main__.py,sha256=bYt9eEaoRQWdejEHFD8REx9jxVEdZptECFsV7F49Ink,30
+tqdm/__pycache__/__init__.cpython-38.pyc,,
+tqdm/__pycache__/__main__.cpython-38.pyc,,
+tqdm/__pycache__/_dist_ver.cpython-38.pyc,,
+tqdm/__pycache__/_main.cpython-38.pyc,,
+tqdm/__pycache__/_monitor.cpython-38.pyc,,
+tqdm/__pycache__/_tqdm.cpython-38.pyc,,
+tqdm/__pycache__/_tqdm_gui.cpython-38.pyc,,
+tqdm/__pycache__/_tqdm_notebook.cpython-38.pyc,,
+tqdm/__pycache__/_tqdm_pandas.cpython-38.pyc,,
+tqdm/__pycache__/_utils.cpython-38.pyc,,
+tqdm/__pycache__/asyncio.cpython-38.pyc,,
+tqdm/__pycache__/auto.cpython-38.pyc,,
+tqdm/__pycache__/autonotebook.cpython-38.pyc,,
+tqdm/__pycache__/cli.cpython-38.pyc,,
+tqdm/__pycache__/dask.cpython-38.pyc,,
+tqdm/__pycache__/gui.cpython-38.pyc,,
+tqdm/__pycache__/keras.cpython-38.pyc,,
+tqdm/__pycache__/notebook.cpython-38.pyc,,
+tqdm/__pycache__/rich.cpython-38.pyc,,
+tqdm/__pycache__/std.cpython-38.pyc,,
+tqdm/__pycache__/tk.cpython-38.pyc,,
+tqdm/__pycache__/utils.cpython-38.pyc,,
+tqdm/__pycache__/version.cpython-38.pyc,,
+tqdm/_dist_ver.py,sha256=cujt01NudbgLibTtGx3ylBSYSlvN35b0nNDoib9PbxI,23
+tqdm/_main.py,sha256=9ySvgmi_2Sw4CAo5UDW0Q2dxfTryboEWGHohfCJz0sA,283
+tqdm/_monitor.py,sha256=Uku-DPWgzJ7dO5CK08xKJK-E_F6qQ-JB3ksuXczSYR0,3699
+tqdm/_tqdm.py,sha256=LfLCuJ6bpsVo9xilmtBXyEm1vGnUCFrliW85j3J-nD4,283
+tqdm/_tqdm_gui.py,sha256=03Hc8KayxJveieI5-0-2NGiDpLvw9jZekofJUV7CCwk,287
+tqdm/_tqdm_notebook.py,sha256=BuHiLuxu6uEfZFaPJW3RPpPaxaVctEQA3kdSJSDL1hw,307
+tqdm/_tqdm_pandas.py,sha256=c9jptUgigN6axRDhRd4Rif98Tmxeopc1nFNFhIpbFUE,888
+tqdm/_utils.py,sha256=YIwj0ZJQonXgYa2HaA3U_paP4xOXJqj0ZWMPeZSf6Pw,596
+tqdm/asyncio.py,sha256=7CWT2150uMvyXSMDkl9PvG9G_HrfOVY32rWbeP2bw1Y,2789
+tqdm/auto.py,sha256=P__dIfklVGqcRdzV4q68SOBVhLHe9QWnrCk3IJIA-fM,1106
+tqdm/autonotebook.py,sha256=tMy67BhpRXggmMCeZaCS8feg03R1uXw9ZmjxlUGGp0A,1057
+tqdm/cli.py,sha256=h4DvZDBZ2ZlANtlRi2mXblr8yjEe9iUGlOqZc5XPljc,10871
+tqdm/completion.sh,sha256=j79KbSmpIj_E11jfTfBXrGnUTzKXVpQ1vGVQvsyDRl4,946
+tqdm/contrib/__init__.py,sha256=gpiBeuWB1OaaoGFwiS-G_Nodv8fLPZ_xVxbENL0EYL4,2604
+tqdm/contrib/__pycache__/__init__.cpython-38.pyc,,
+tqdm/contrib/__pycache__/bells.cpython-38.pyc,,
+tqdm/contrib/__pycache__/concurrent.cpython-38.pyc,,
+tqdm/contrib/__pycache__/discord.cpython-38.pyc,,
+tqdm/contrib/__pycache__/itertools.cpython-38.pyc,,
+tqdm/contrib/__pycache__/logging.cpython-38.pyc,,
+tqdm/contrib/__pycache__/telegram.cpython-38.pyc,,
+tqdm/contrib/__pycache__/utils_worker.cpython-38.pyc,,
+tqdm/contrib/bells.py,sha256=_CURLQOkZHjn429e6HGB5U3ESTPBjxr15flG1346eF8,735
+tqdm/contrib/concurrent.py,sha256=YmHJG_jUYUsg2NR1eAhsrl6X-_BfzlANtW32IhNmRTA,4644
+tqdm/contrib/discord.py,sha256=H5Wq6xbvH0H8c_53jpSWZtyElK_RprVSPqkXZjDRVW0,3989
+tqdm/contrib/itertools.py,sha256=PW3WkdYKP-aVCrlcOfH53i4IA5R7G8rjEJdp6CF5IO8,814
+tqdm/contrib/logging.py,sha256=F4pEE2mRecNKoZNm7jIWr2nMeelvogvZ8aopo_irK44,3844
+tqdm/contrib/telegram.py,sha256=5S6IIZMjDg7rcdxJaGGcvTRC37DnHf0r36ahje_JyyQ,5228
+tqdm/contrib/utils_worker.py,sha256=3Mj9TvDa3qRGoZvrmU5cQTnmQLPd8oP7AURuJjVVFXo,1247
+tqdm/dask.py,sha256=BqPQ2O_Bd59hnXlC7B5rS7y9C2wI4cPkIHDdeCWGtzc,1377
+tqdm/gui.py,sha256=kQP-ezwAUSvJ44f50Up2fEG4Hq-p4snrEyKwSNwcgkI,5943
+tqdm/keras.py,sha256=auQQJvAZMHgr0Y3kY6pKQVD-6EAJMPg91ZOJ2WSTN-A,4409
+tqdm/notebook.py,sha256=CiV0VLSr5GSNvmJYuC9yO-2AHYC6JSKvqnZttKvG-a4,11260
+tqdm/rich.py,sha256=Hs8iu1pXZHUGj2DJVMiEUO897zmiodf3KwZQlO0Z_G4,4964
+tqdm/std.py,sha256=3RsIpr7oef5G3rbEZSptwSKziT8vXo8GCad5Df6mKpQ,58340
+tqdm/tk.py,sha256=a3lbj1GsP7jyDpQQgm5ohsFm9Y9-adeklYIhPH69P88,6948
+tqdm/tqdm.1,sha256=1YMLZFiY0wGAUYgjmrfr9vQTlyMql6LT31oUWvOyQdU,7997
+tqdm/utils.py,sha256=KvE0DM28X__NHYKgGl5jUrk6CM5BV60G4Nf55ITPeJI,9803
+tqdm/version.py,sha256=-1yWjfu3P0eghVsysHH07fbzdiADNRdzRtYPqOaqR2A,333
diff --git a/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/WHEEL b/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/WHEEL
new file mode 100644
index 0000000..0b18a28
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/WHEEL
@@ -0,0 +1,6 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.37.1)
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any
+
diff --git a/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/entry_points.txt b/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/entry_points.txt
new file mode 100644
index 0000000..9b3087d
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/entry_points.txt
@@ -0,0 +1,3 @@
+[console_scripts]
+tqdm = tqdm.cli:main
+
diff --git a/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/top_level.txt b/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/top_level.txt
new file mode 100644
index 0000000..78620c4
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm-4.63.1.dist-info/top_level.txt
@@ -0,0 +1 @@
+tqdm
diff --git a/.local/lib/python3.8/site-packages/tqdm/__init__.py b/.local/lib/python3.8/site-packages/tqdm/__init__.py
new file mode 100644
index 0000000..a021d16
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/__init__.py
@@ -0,0 +1,41 @@
+from ._monitor import TMonitor, TqdmSynchronisationWarning
+from ._tqdm_pandas import tqdm_pandas
+from .cli import main  # TODO: remove in v5.0.0
+from .gui import tqdm as tqdm_gui  # TODO: remove in v5.0.0
+from .gui import trange as tgrange  # TODO: remove in v5.0.0
+from .std import (
+    TqdmDeprecationWarning, TqdmExperimentalWarning, TqdmKeyError, TqdmMonitorWarning,
+    TqdmTypeError, TqdmWarning, tqdm, trange)
+from .version import __version__
+
+__all__ = ['tqdm', 'tqdm_gui', 'trange', 'tgrange', 'tqdm_pandas',
+           'tqdm_notebook', 'tnrange', 'main', 'TMonitor',
+           'TqdmTypeError', 'TqdmKeyError',
+           'TqdmWarning', 'TqdmDeprecationWarning',
+           'TqdmExperimentalWarning',
+           'TqdmMonitorWarning', 'TqdmSynchronisationWarning',
+           '__version__']
+
+
+def tqdm_notebook(*args, **kwargs):  # pragma: no cover
+    """See tqdm.notebook.tqdm for full documentation"""
+    from warnings import warn
+
+    from .notebook import tqdm as _tqdm_notebook
+    warn("This function will be removed in tqdm==5.0.0\n"
+         "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`",
+         TqdmDeprecationWarning, stacklevel=2)
+    return _tqdm_notebook(*args, **kwargs)
+
+
+def tnrange(*args, **kwargs):  # pragma: no cover
+    """
+    A shortcut for `tqdm.notebook.tqdm(xrange(*args), **kwargs)`.
+    On Python3+, `range` is used instead of `xrange`.
+    """
+    from warnings import warn
+
+    from .notebook import trange as _tnrange
+    warn("Please use `tqdm.notebook.trange` instead of `tqdm.tnrange`",
+         TqdmDeprecationWarning, stacklevel=2)
+    return _tnrange(*args, **kwargs)
diff --git a/.local/lib/python3.8/site-packages/tqdm/__main__.py b/.local/lib/python3.8/site-packages/tqdm/__main__.py
new file mode 100644
index 0000000..4e28416
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/__main__.py
@@ -0,0 +1,3 @@
+from .cli import main
+
+main()
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..65f6fc9
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/__init__.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/__main__.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/__main__.cpython-38.pyc
new file mode 100644
index 0000000..57b3fd7
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/__main__.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/_dist_ver.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_dist_ver.cpython-38.pyc
new file mode 100644
index 0000000..41d1573
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_dist_ver.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/_main.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_main.cpython-38.pyc
new file mode 100644
index 0000000..9a1269f
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_main.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/_monitor.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_monitor.cpython-38.pyc
new file mode 100644
index 0000000..951d590
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_monitor.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/_tqdm.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_tqdm.cpython-38.pyc
new file mode 100644
index 0000000..be25fd3
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_tqdm.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/_tqdm_gui.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_tqdm_gui.cpython-38.pyc
new file mode 100644
index 0000000..077dd8a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_tqdm_gui.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/_tqdm_notebook.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_tqdm_notebook.cpython-38.pyc
new file mode 100644
index 0000000..0d162e8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_tqdm_notebook.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/_tqdm_pandas.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_tqdm_pandas.cpython-38.pyc
new file mode 100644
index 0000000..28909ea
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_tqdm_pandas.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/_utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_utils.cpython-38.pyc
new file mode 100644
index 0000000..2d06866
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/_utils.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/asyncio.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/asyncio.cpython-38.pyc
new file mode 100644
index 0000000..44a67b2
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/asyncio.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/auto.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/auto.cpython-38.pyc
new file mode 100644
index 0000000..199c026
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/auto.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/autonotebook.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/autonotebook.cpython-38.pyc
new file mode 100644
index 0000000..19cf200
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/autonotebook.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/cli.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/cli.cpython-38.pyc
new file mode 100644
index 0000000..ee19070
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/cli.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/dask.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/dask.cpython-38.pyc
new file mode 100644
index 0000000..4b7d13a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/dask.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/gui.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/gui.cpython-38.pyc
new file mode 100644
index 0000000..2d9e5a9
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/gui.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/keras.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/keras.cpython-38.pyc
new file mode 100644
index 0000000..4b5614b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/keras.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/notebook.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/notebook.cpython-38.pyc
new file mode 100644
index 0000000..85e727b
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/notebook.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/rich.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/rich.cpython-38.pyc
new file mode 100644
index 0000000..7909541
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/rich.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/std.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/std.cpython-38.pyc
new file mode 100644
index 0000000..d5741b4
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/std.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/tk.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/tk.cpython-38.pyc
new file mode 100644
index 0000000..907c0fb
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/tk.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/utils.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/utils.cpython-38.pyc
new file mode 100644
index 0000000..0c0069e
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/utils.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/__pycache__/version.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/__pycache__/version.cpython-38.pyc
new file mode 100644
index 0000000..637cef5
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/__pycache__/version.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/_dist_ver.py b/.local/lib/python3.8/site-packages/tqdm/_dist_ver.py
new file mode 100644
index 0000000..fcd9996
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/_dist_ver.py
@@ -0,0 +1 @@
+__version__ = '4.63.1'
diff --git a/.local/lib/python3.8/site-packages/tqdm/_main.py b/.local/lib/python3.8/site-packages/tqdm/_main.py
new file mode 100644
index 0000000..04fdeef
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/_main.py
@@ -0,0 +1,9 @@
+from warnings import warn
+
+from .cli import *  # NOQA
+from .cli import __all__  # NOQA
+from .std import TqdmDeprecationWarning
+
+warn("This function will be removed in tqdm==5.0.0\n"
+     "Please use `tqdm.cli.*` instead of `tqdm._main.*`",
+     TqdmDeprecationWarning, stacklevel=2)
diff --git a/.local/lib/python3.8/site-packages/tqdm/_monitor.py b/.local/lib/python3.8/site-packages/tqdm/_monitor.py
new file mode 100644
index 0000000..f71aa56
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/_monitor.py
@@ -0,0 +1,95 @@
+import atexit
+from threading import Event, Thread, current_thread
+from time import time
+from warnings import warn
+
+__all__ = ["TMonitor", "TqdmSynchronisationWarning"]
+
+
+class TqdmSynchronisationWarning(RuntimeWarning):
+    """tqdm multi-thread/-process errors which may cause incorrect nesting
+    but otherwise no adverse effects"""
+    pass
+
+
+class TMonitor(Thread):
+    """
+    Monitoring thread for tqdm bars.
+    Monitors if tqdm bars are taking too much time to display
+    and readjusts miniters automatically if necessary.
+
+    Parameters
+    ----------
+    tqdm_cls  : class
+        tqdm class to use (can be core tqdm or a submodule).
+    sleep_interval  : float
+        Time to sleep between monitoring checks.
+    """
+    _test = {}  # internal vars for unit testing
+
+    def __init__(self, tqdm_cls, sleep_interval):
+        Thread.__init__(self)
+        self.daemon = True  # kill thread when main killed (KeyboardInterrupt)
+        self.woken = 0  # last time woken up, to sync with monitor
+        self.tqdm_cls = tqdm_cls
+        self.sleep_interval = sleep_interval
+        self._time = self._test.get("time", time)
+        self.was_killed = self._test.get("Event", Event)()
+        atexit.register(self.exit)
+        self.start()
+
+    def exit(self):
+        self.was_killed.set()
+        if self is not current_thread():
+            self.join()
+        return self.report()
+
+    def get_instances(self):
+        # returns a copy of started `tqdm_cls` instances
+        return [i for i in self.tqdm_cls._instances.copy()
+                # Avoid race by checking that the instance started
+                if hasattr(i, 'start_t')]
+
+    def run(self):
+        cur_t = self._time()
+        while True:
+            # After processing and before sleeping, notify that we woke
+            # Need to be done just before sleeping
+            self.woken = cur_t
+            # Sleep some time...
+            self.was_killed.wait(self.sleep_interval)
+            # Quit if killed
+            if self.was_killed.is_set():
+                return
+            # Then monitor!
+            # Acquire lock (to access _instances)
+            with self.tqdm_cls.get_lock():
+                cur_t = self._time()
+                # Check tqdm instances are waiting too long to print
+                instances = self.get_instances()
+                for instance in instances:
+                    # Check event in loop to reduce blocking time on exit
+                    if self.was_killed.is_set():
+                        return
+                    # Only if mininterval > 1 (else iterations are just slow)
+                    # and last refresh exceeded maxinterval
+                    if (
+                        instance.miniters > 1
+                        and (cur_t - instance.last_print_t) >= instance.maxinterval
+                    ):
+                        # force bypassing miniters on next iteration
+                        # (dynamic_miniters adjusts mininterval automatically)
+                        instance.miniters = 1
+                        # Refresh now! (works only for manual tqdm)
+                        instance.refresh(nolock=True)
+                    # Remove accidental long-lived strong reference
+                    del instance
+                if instances != self.get_instances():  # pragma: nocover
+                    warn("Set changed size during iteration" +
+                         " (see https://github.com/tqdm/tqdm/issues/481)",
+                         TqdmSynchronisationWarning, stacklevel=2)
+                # Remove accidental long-lived strong references
+                del instances
+
+    def report(self):
+        return not self.was_killed.is_set()
diff --git a/.local/lib/python3.8/site-packages/tqdm/_tqdm.py b/.local/lib/python3.8/site-packages/tqdm/_tqdm.py
new file mode 100644
index 0000000..7fc4962
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/_tqdm.py
@@ -0,0 +1,9 @@
+from warnings import warn
+
+from .std import *  # NOQA
+from .std import __all__  # NOQA
+from .std import TqdmDeprecationWarning
+
+warn("This function will be removed in tqdm==5.0.0\n"
+     "Please use `tqdm.std.*` instead of `tqdm._tqdm.*`",
+     TqdmDeprecationWarning, stacklevel=2)
diff --git a/.local/lib/python3.8/site-packages/tqdm/_tqdm_gui.py b/.local/lib/python3.8/site-packages/tqdm/_tqdm_gui.py
new file mode 100644
index 0000000..f32aa89
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/_tqdm_gui.py
@@ -0,0 +1,9 @@
+from warnings import warn
+
+from .gui import *  # NOQA
+from .gui import __all__  # NOQA
+from .std import TqdmDeprecationWarning
+
+warn("This function will be removed in tqdm==5.0.0\n"
+     "Please use `tqdm.gui.*` instead of `tqdm._tqdm_gui.*`",
+     TqdmDeprecationWarning, stacklevel=2)
diff --git a/.local/lib/python3.8/site-packages/tqdm/_tqdm_notebook.py b/.local/lib/python3.8/site-packages/tqdm/_tqdm_notebook.py
new file mode 100644
index 0000000..f225fbf
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/_tqdm_notebook.py
@@ -0,0 +1,9 @@
+from warnings import warn
+
+from .notebook import *  # NOQA
+from .notebook import __all__  # NOQA
+from .std import TqdmDeprecationWarning
+
+warn("This function will be removed in tqdm==5.0.0\n"
+     "Please use `tqdm.notebook.*` instead of `tqdm._tqdm_notebook.*`",
+     TqdmDeprecationWarning, stacklevel=2)
diff --git a/.local/lib/python3.8/site-packages/tqdm/_tqdm_pandas.py b/.local/lib/python3.8/site-packages/tqdm/_tqdm_pandas.py
new file mode 100644
index 0000000..c4fe6ef
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/_tqdm_pandas.py
@@ -0,0 +1,24 @@
+import sys
+
+__author__ = "github.com/casperdcl"
+__all__ = ['tqdm_pandas']
+
+
+def tqdm_pandas(tclass, **tqdm_kwargs):
+    """
+    Registers the given `tqdm` instance with
+    `pandas.core.groupby.DataFrameGroupBy.progress_apply`.
+    """
+    from tqdm import TqdmDeprecationWarning
+
+    if isinstance(tclass, type) or (getattr(tclass, '__name__', '').startswith(
+            'tqdm_')):  # delayed adapter case
+        TqdmDeprecationWarning(
+            "Please use `tqdm.pandas(...)` instead of `tqdm_pandas(tqdm, ...)`.",
+            fp_write=getattr(tqdm_kwargs.get('file', None), 'write', sys.stderr.write))
+        tclass.pandas(**tqdm_kwargs)
+    else:
+        TqdmDeprecationWarning(
+            "Please use `tqdm.pandas(...)` instead of `tqdm_pandas(tqdm(...))`.",
+            fp_write=getattr(tclass.fp, 'write', sys.stderr.write))
+        type(tclass).pandas(deprecated_t=tclass)
diff --git a/.local/lib/python3.8/site-packages/tqdm/_utils.py b/.local/lib/python3.8/site-packages/tqdm/_utils.py
new file mode 100644
index 0000000..2cf1090
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/_utils.py
@@ -0,0 +1,12 @@
+from warnings import warn
+
+from .std import TqdmDeprecationWarning
+from .utils import (  # NOQA, pylint: disable=unused-import
+    CUR_OS, IS_NIX, IS_WIN, RE_ANSI, Comparable, FormatReplace, SimpleTextIOWrapper, _basestring,
+    _environ_cols_wrapper, _is_ascii, _is_utf, _range, _screen_shape_linux, _screen_shape_tput,
+    _screen_shape_windows, _screen_shape_wrapper, _supports_unicode, _term_move_up, _unich,
+    _unicode, colorama)
+
+warn("This function will be removed in tqdm==5.0.0\n"
+     "Please use `tqdm.utils.*` instead of `tqdm._utils.*`",
+     TqdmDeprecationWarning, stacklevel=2)
diff --git a/.local/lib/python3.8/site-packages/tqdm/asyncio.py b/.local/lib/python3.8/site-packages/tqdm/asyncio.py
new file mode 100644
index 0000000..97c5f88
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/asyncio.py
@@ -0,0 +1,93 @@
+"""
+Asynchronous progressbar decorator for iterators.
+Includes a default `range` iterator printing to `stderr`.
+
+Usage:
+>>> from tqdm.asyncio import trange, tqdm
+>>> async for i in trange(10):
+...     ...
+"""
+import asyncio
+from sys import version_info
+
+from .std import tqdm as std_tqdm
+
+__author__ = {"github.com/": ["casperdcl"]}
+__all__ = ['tqdm_asyncio', 'tarange', 'tqdm', 'trange']
+
+
+class tqdm_asyncio(std_tqdm):
+    """
+    Asynchronous-friendly version of tqdm (Python 3.6+).
+    """
+    def __init__(self, iterable=None, *args, **kwargs):
+        super(tqdm_asyncio, self).__init__(iterable, *args, **kwargs)
+        self.iterable_awaitable = False
+        if iterable is not None:
+            if hasattr(iterable, "__anext__"):
+                self.iterable_next = iterable.__anext__
+                self.iterable_awaitable = True
+            elif hasattr(iterable, "__next__"):
+                self.iterable_next = iterable.__next__
+            else:
+                self.iterable_iterator = iter(iterable)
+                self.iterable_next = self.iterable_iterator.__next__
+
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self):
+        try:
+            if self.iterable_awaitable:
+                res = await self.iterable_next()
+            else:
+                res = self.iterable_next()
+            self.update()
+            return res
+        except StopIteration:
+            self.close()
+            raise StopAsyncIteration
+        except BaseException:
+            self.close()
+            raise
+
+    def send(self, *args, **kwargs):
+        return self.iterable.send(*args, **kwargs)
+
+    @classmethod
+    def as_completed(cls, fs, *, loop=None, timeout=None, total=None, **tqdm_kwargs):
+        """
+        Wrapper for `asyncio.as_completed`.
+        """
+        if total is None:
+            total = len(fs)
+        kwargs = {}
+        if version_info[:2] < (3, 10):
+            kwargs['loop'] = loop
+        yield from cls(asyncio.as_completed(fs, timeout=timeout, **kwargs),
+                       total=total, **tqdm_kwargs)
+
+    @classmethod
+    async def gather(cls, *fs, loop=None, timeout=None, total=None, **tqdm_kwargs):
+        """
+        Wrapper for `asyncio.gather`.
+        """
+        async def wrap_awaitable(i, f):
+            return i, await f
+
+        ifs = [wrap_awaitable(i, f) for i, f in enumerate(fs)]
+        res = [await f for f in cls.as_completed(ifs, loop=loop, timeout=timeout,
+                                                 total=total, **tqdm_kwargs)]
+        return [i for _, i in sorted(res)]
+
+
+def tarange(*args, **kwargs):
+    """
+    A shortcut for `tqdm.asyncio.tqdm(range(*args), **kwargs)`.
+    """
+    return tqdm_asyncio(range(*args), **kwargs)
+
+
+# Aliases
+tqdm = tqdm_asyncio
+trange = tarange
diff --git a/.local/lib/python3.8/site-packages/tqdm/auto.py b/.local/lib/python3.8/site-packages/tqdm/auto.py
new file mode 100644
index 0000000..cffca20
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/auto.py
@@ -0,0 +1,44 @@
+"""
+Enables multiple commonly used features.
+
+Method resolution order:
+
+- `tqdm.autonotebook` without import warnings
+- `tqdm.asyncio` on Python3.6+
+- `tqdm.std` base class
+
+Usage:
+>>> from tqdm.auto import trange, tqdm
+>>> for i in trange(10):
+...     ...
+"""
+import sys
+import warnings
+
+from .std import TqdmExperimentalWarning
+
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore", category=TqdmExperimentalWarning)
+    from .autonotebook import tqdm as notebook_tqdm
+    from .autonotebook import trange as notebook_trange
+
+if sys.version_info[:2] < (3, 6):
+    tqdm = notebook_tqdm
+    trange = notebook_trange
+else:  # Python3.6+
+    from .asyncio import tqdm as asyncio_tqdm
+    from .std import tqdm as std_tqdm
+
+    if notebook_tqdm != std_tqdm:
+        class tqdm(notebook_tqdm, asyncio_tqdm):  # pylint: disable=inconsistent-mro
+            pass
+    else:
+        tqdm = asyncio_tqdm
+
+    def trange(*args, **kwargs):
+        """
+        A shortcut for `tqdm.auto.tqdm(range(*args), **kwargs)`.
+        """
+        return tqdm(range(*args), **kwargs)
+
+__all__ = ["tqdm", "trange"]
diff --git a/.local/lib/python3.8/site-packages/tqdm/autonotebook.py b/.local/lib/python3.8/site-packages/tqdm/autonotebook.py
new file mode 100644
index 0000000..1f22581
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/autonotebook.py
@@ -0,0 +1,32 @@
+"""
+Automatically choose between `tqdm.notebook` and `tqdm.std`.
+
+Usage:
+>>> from tqdm.autonotebook import trange, tqdm
+>>> for i in trange(10):
+...     ...
+"""
+import os
+import sys
+from warnings import warn
+
+try:
+    get_ipython = sys.modules['IPython'].get_ipython
+    if 'IPKernelApp' not in get_ipython().config:  # pragma: no cover
+        raise ImportError("console")
+    if 'VSCODE_PID' in os.environ:  # pragma: no cover
+        raise ImportError("vscode")
+    from .notebook import WARN_NOIPYW, IProgress
+    if IProgress is None:
+        from .std import TqdmWarning
+        warn(WARN_NOIPYW, TqdmWarning, stacklevel=2)
+        raise ImportError('ipywidgets')
+except Exception:
+    from .std import tqdm, trange
+else:  # pragma: no cover
+    from .notebook import tqdm, trange
+    from .std import TqdmExperimentalWarning
+    warn("Using `tqdm.autonotebook.tqdm` in notebook mode."
+         " Use `tqdm.tqdm` instead to force console mode"
+         " (e.g. in jupyter console)", TqdmExperimentalWarning, stacklevel=2)
+__all__ = ["tqdm", "trange"]
diff --git a/.local/lib/python3.8/site-packages/tqdm/cli.py b/.local/lib/python3.8/site-packages/tqdm/cli.py
new file mode 100644
index 0000000..3ed25fb
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/cli.py
@@ -0,0 +1,315 @@
+"""
+Module version for monitoring CLI pipes (`... | python -m tqdm | ...`).
+"""
+import logging
+import re
+import sys
+from ast import literal_eval as numeric
+
+from .std import TqdmKeyError, TqdmTypeError, tqdm
+from .version import __version__
+
+__all__ = ["main"]
+log = logging.getLogger(__name__)
+
+
+def cast(val, typ):
+    log.debug((val, typ))
+    if " or " in typ:
+        for t in typ.split(" or "):
+            try:
+                return cast(val, t)
+            except TqdmTypeError:
+                pass
+        raise TqdmTypeError(val + ' : ' + typ)
+
+    # sys.stderr.write('\ndebug | `val:type`: `' + val + ':' + typ + '`.\n')
+    if typ == 'bool':
+        if (val == 'True') or (val == ''):
+            return True
+        elif val == 'False':
+            return False
+        else:
+            raise TqdmTypeError(val + ' : ' + typ)
+    try:
+        return eval(typ + '("' + val + '")')
+    except Exception:
+        if typ == 'chr':
+            return chr(ord(eval('"' + val + '"'))).encode()
+        else:
+            raise TqdmTypeError(val + ' : ' + typ)
+
+
+def posix_pipe(fin, fout, delim=b'\\n', buf_size=256,
+               callback=lambda float: None, callback_len=True):
+    """
+    Params
+    ------
+    fin  : binary file with `read(buf_size : int)` method
+    fout  : binary file with `write` (and optionally `flush`) methods.
+    callback  : function(float), e.g.: `tqdm.update`
+    callback_len  : If (default: True) do `callback(len(buffer))`.
+      Otherwise, do `callback(data) for data in buffer.split(delim)`.
+    """
+    fp_write = fout.write
+
+    if not delim:
+        while True:
+            tmp = fin.read(buf_size)
+
+            # flush at EOF
+            if not tmp:
+                getattr(fout, 'flush', lambda: None)()
+                return
+
+            fp_write(tmp)
+            callback(len(tmp))
+        # return
+
+    buf = b''
+    len_delim = len(delim)
+    # n = 0
+    while True:
+        tmp = fin.read(buf_size)
+
+        # flush at EOF
+        if not tmp:
+            if buf:
+                fp_write(buf)
+                if callback_len:
+                    # n += 1 + buf.count(delim)
+                    callback(1 + buf.count(delim))
+                else:
+                    for i in buf.split(delim):
+                        callback(i)
+            getattr(fout, 'flush', lambda: None)()
+            return  # n
+
+        while True:
+            i = tmp.find(delim)
+            if i < 0:
+                buf += tmp
+                break
+            fp_write(buf + tmp[:i + len(delim)])
+            # n += 1
+            callback(1 if callback_len else (buf + tmp[:i]))
+            buf = b''
+            tmp = tmp[i + len_delim:]
+
+
+# ((opt, type), ... )
+RE_OPTS = re.compile(r'\n {8}(\S+)\s{2,}:\s*([^,]+)')
+# better split method assuming no positional args
+RE_SHLEX = re.compile(r'\s*(?  : \2', d)
+    split = RE_OPTS.split(d)
+    opt_types_desc = zip(split[1::3], split[2::3], split[3::3])
+    d = ''.join(('\n  --{0}  : {2}{3}' if otd[1] == 'bool' else
+                 '\n  --{0}=<{1}>  : {2}{3}').format(
+                     otd[0].replace('_', '-'), otd[0], *otd[1:])
+                for otd in opt_types_desc if otd[0] not in UNSUPPORTED_OPTS)
+
+    help_short = "Usage:\n  tqdm [--help | options]\n"
+    d = help_short + """
+Options:
+  -h, --help     Print this help and exit.
+  -v, --version  Print version and exit.
+""" + d.strip('\n') + '\n'
+
+    # opts = docopt(d, version=__version__)
+    if any(v in argv for v in ('-v', '--version')):
+        sys.stdout.write(__version__ + '\n')
+        sys.exit(0)
+    elif any(v in argv for v in ('-h', '--help')):
+        sys.stdout.write(d + '\n')
+        sys.exit(0)
+    elif argv and argv[0][:2] != '--':
+        sys.stderr.write(
+            "Error:Unknown argument:{0}\n{1}".format(argv[0], help_short))
+
+    argv = RE_SHLEX.split(' '.join(["tqdm"] + argv))
+    opts = dict(zip(argv[1::3], argv[3::3]))
+
+    log.debug(opts)
+    opts.pop('log', True)
+
+    tqdm_args = {'file': fp}
+    try:
+        for (o, v) in opts.items():
+            o = o.replace('-', '_')
+            try:
+                tqdm_args[o] = cast(v, opt_types[o])
+            except KeyError as e:
+                raise TqdmKeyError(str(e))
+        log.debug('args:' + str(tqdm_args))
+
+        delim_per_char = tqdm_args.pop('bytes', False)
+        update = tqdm_args.pop('update', False)
+        update_to = tqdm_args.pop('update_to', False)
+        if sum((delim_per_char, update, update_to)) > 1:
+            raise TqdmKeyError("Can only have one of --bytes --update --update_to")
+    except Exception:
+        fp.write("\nError:\n" + help_short)
+        stdin, stdout_write = sys.stdin, sys.stdout.write
+        for i in stdin:
+            stdout_write(i)
+        raise
+    else:
+        buf_size = tqdm_args.pop('buf_size', 256)
+        delim = tqdm_args.pop('delim', b'\\n')
+        tee = tqdm_args.pop('tee', False)
+        manpath = tqdm_args.pop('manpath', None)
+        comppath = tqdm_args.pop('comppath', None)
+        if tqdm_args.pop('null', False):
+            class stdout(object):
+                @staticmethod
+                def write(_):
+                    pass
+        else:
+            stdout = sys.stdout
+            stdout = getattr(stdout, 'buffer', stdout)
+        stdin = getattr(sys.stdin, 'buffer', sys.stdin)
+        if manpath or comppath:
+            from os import path
+            from shutil import copyfile
+            try:  # py<3.7
+                import importlib_resources as resources
+            except ImportError:
+                from importlib import resources
+
+            def cp(name, dst):
+                """copy resource `name` to `dst`"""
+                if hasattr(resources, 'files'):
+                    copyfile(str(resources.files('tqdm') / name), dst)
+                else:  # py<3.9
+                    with resources.path('tqdm', name) as src:
+                        copyfile(str(src), dst)
+                log.info("written:%s", dst)
+            if manpath is not None:
+                cp('tqdm.1', path.join(manpath, 'tqdm.1'))
+            if comppath is not None:
+                cp('completion.sh', path.join(comppath, 'tqdm_completion.sh'))
+            sys.exit(0)
+        if tee:
+            stdout_write = stdout.write
+            fp_write = getattr(fp, 'buffer', fp).write
+
+            class stdout(object):  # pylint: disable=function-redefined
+                @staticmethod
+                def write(x):
+                    with tqdm.external_write_mode(file=fp):
+                        fp_write(x)
+                    stdout_write(x)
+        if delim_per_char:
+            tqdm_args.setdefault('unit', 'B')
+            tqdm_args.setdefault('unit_scale', True)
+            tqdm_args.setdefault('unit_divisor', 1024)
+            log.debug(tqdm_args)
+            with tqdm(**tqdm_args) as t:
+                posix_pipe(stdin, stdout, '', buf_size, t.update)
+        elif delim == b'\\n':
+            log.debug(tqdm_args)
+            write = stdout.write
+            if update or update_to:
+                with tqdm(**tqdm_args) as t:
+                    if update:
+                        def callback(i):
+                            t.update(numeric(i.decode()))
+                    else:  # update_to
+                        def callback(i):
+                            t.update(numeric(i.decode()) - t.n)
+                    for i in stdin:
+                        write(i)
+                        callback(i)
+            else:
+                for i in tqdm(stdin, **tqdm_args):
+                    write(i)
+        else:
+            log.debug(tqdm_args)
+            with tqdm(**tqdm_args) as t:
+                callback_len = False
+                if update:
+                    def callback(i):
+                        t.update(numeric(i.decode()))
+                elif update_to:
+                    def callback(i):
+                        t.update(numeric(i.decode()) - t.n)
+                else:
+                    callback = t.update
+                    callback_len = True
+                posix_pipe(stdin, stdout, delim, buf_size, callback, callback_len)
diff --git a/.local/lib/python3.8/site-packages/tqdm/completion.sh b/.local/lib/python3.8/site-packages/tqdm/completion.sh
new file mode 100755
index 0000000..9f61c7f
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/completion.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+_tqdm(){
+  local cur prv
+  cur="${COMP_WORDS[COMP_CWORD]}"
+  prv="${COMP_WORDS[COMP_CWORD - 1]}"
+
+  case ${prv} in
+  --bar_format|--buf_size|--colour|--comppath|--delay|--delim|--desc|--initial|--lock_args|--manpath|--maxinterval|--mininterval|--miniters|--ncols|--nrows|--position|--postfix|--smoothing|--total|--unit|--unit_divisor)
+    # await user input
+    ;;
+  "--log")
+    COMPREPLY=($(compgen -W       'CRITICAL FATAL ERROR WARN WARNING INFO DEBUG NOTSET' -- ${cur}))
+    ;;
+  *)
+    COMPREPLY=($(compgen -W '--ascii --bar_format --buf_size --bytes --colour --comppath --delay --delim --desc --disable --dynamic_ncols --help --initial --leave --lock_args --log --manpath --maxinterval --mininterval --miniters --ncols --nrows --null --position --postfix --smoothing --tee --total --unit --unit_divisor --unit_scale --update --update_to --version --write_bytes -h -v' -- ${cur}))
+    ;;
+  esac
+}
+complete -F _tqdm tqdm
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/__init__.py b/.local/lib/python3.8/site-packages/tqdm/contrib/__init__.py
new file mode 100644
index 0000000..0b52177
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/contrib/__init__.py
@@ -0,0 +1,98 @@
+"""
+Thin wrappers around common functions.
+
+Subpackages contain potentially unstable extensions.
+"""
+import sys
+from functools import wraps
+
+from ..auto import tqdm as tqdm_auto
+from ..std import tqdm
+from ..utils import ObjectWrapper
+
+__author__ = {"github.com/": ["casperdcl"]}
+__all__ = ['tenumerate', 'tzip', 'tmap']
+
+
+class DummyTqdmFile(ObjectWrapper):
+    """Dummy file-like that will write to tqdm"""
+
+    def __init__(self, wrapped):
+        super(DummyTqdmFile, self).__init__(wrapped)
+        self._buf = []
+
+    def write(self, x, nolock=False):
+        nl = b"\n" if isinstance(x, bytes) else "\n"
+        pre, sep, post = x.rpartition(nl)
+        if sep:
+            blank = type(nl)()
+            tqdm.write(blank.join(self._buf + [pre, sep]),
+                       end=blank, file=self._wrapped, nolock=nolock)
+            self._buf = [post]
+        else:
+            self._buf.append(x)
+
+    def __del__(self):
+        if self._buf:
+            blank = type(self._buf[0])()
+            try:
+                tqdm.write(blank.join(self._buf), end=blank, file=self._wrapped)
+            except (OSError, ValueError):
+                pass
+
+
+def builtin_iterable(func):
+    """Wraps `func()` output in a `list()` in py2"""
+    if sys.version_info[:1] < (3,):
+        @wraps(func)
+        def inner(*args, **kwargs):
+            return list(func(*args, **kwargs))
+        return inner
+    return func
+
+
+def tenumerate(iterable, start=0, total=None, tqdm_class=tqdm_auto, **tqdm_kwargs):
+    """
+    Equivalent of `numpy.ndenumerate` or builtin `enumerate`.
+
+    Parameters
+    ----------
+    tqdm_class  : [default: tqdm.auto.tqdm].
+    """
+    try:
+        import numpy as np
+    except ImportError:
+        pass
+    else:
+        if isinstance(iterable, np.ndarray):
+            return tqdm_class(np.ndenumerate(iterable), total=total or iterable.size,
+                              **tqdm_kwargs)
+    return enumerate(tqdm_class(iterable, total=total, **tqdm_kwargs), start)
+
+
+@builtin_iterable
+def tzip(iter1, *iter2plus, **tqdm_kwargs):
+    """
+    Equivalent of builtin `zip`.
+
+    Parameters
+    ----------
+    tqdm_class  : [default: tqdm.auto.tqdm].
+    """
+    kwargs = tqdm_kwargs.copy()
+    tqdm_class = kwargs.pop("tqdm_class", tqdm_auto)
+    for i in zip(tqdm_class(iter1, **kwargs), *iter2plus):
+        yield i
+
+
+@builtin_iterable
+def tmap(function, *sequences, **tqdm_kwargs):
+    """
+    Equivalent of builtin `map`.
+
+    Parameters
+    ----------
+    tqdm_class  : [default: tqdm.auto.tqdm].
+    """
+    for i in tzip(*sequences, **tqdm_kwargs):
+        yield function(*i)
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/__init__.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..99bd248
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/__init__.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/bells.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/bells.cpython-38.pyc
new file mode 100644
index 0000000..52c921a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/bells.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/concurrent.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/concurrent.cpython-38.pyc
new file mode 100644
index 0000000..f6eea1a
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/concurrent.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/discord.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/discord.cpython-38.pyc
new file mode 100644
index 0000000..628f8c8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/discord.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/itertools.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/itertools.cpython-38.pyc
new file mode 100644
index 0000000..73ef459
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/itertools.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/logging.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/logging.cpython-38.pyc
new file mode 100644
index 0000000..863bfc8
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/logging.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/telegram.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/telegram.cpython-38.pyc
new file mode 100644
index 0000000..dfadf23
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/telegram.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/utils_worker.cpython-38.pyc b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/utils_worker.cpython-38.pyc
new file mode 100644
index 0000000..875d603
Binary files /dev/null and b/.local/lib/python3.8/site-packages/tqdm/contrib/__pycache__/utils_worker.cpython-38.pyc differ
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/bells.py b/.local/lib/python3.8/site-packages/tqdm/contrib/bells.py
new file mode 100644
index 0000000..be22768
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/contrib/bells.py
@@ -0,0 +1,24 @@
+"""
+Even more features than `tqdm.auto` (all the bells & whistles):
+
+- `tqdm.auto`
+- `tqdm.tqdm.pandas`
+- `tqdm.contrib.telegram`
+    + uses `${TQDM_TELEGRAM_TOKEN}` and `${TQDM_TELEGRAM_CHAT_ID}`
+- `tqdm.contrib.discord`
+    + uses `${TQDM_DISCORD_TOKEN}` and `${TQDM_DISCORD_CHANNEL_ID}`
+"""
+__all__ = ['tqdm', 'trange']
+import warnings
+from os import getenv
+
+if getenv("TQDM_TELEGRAM_TOKEN") and getenv("TQDM_TELEGRAM_CHAT_ID"):
+    from .telegram import tqdm, trange
+elif getenv("TQDM_DISCORD_TOKEN") and getenv("TQDM_DISCORD_CHANNEL_ID"):
+    from .discord import tqdm, trange
+else:
+    from ..auto import tqdm, trange
+
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore", category=FutureWarning)
+    tqdm.pandas()
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/concurrent.py b/.local/lib/python3.8/site-packages/tqdm/contrib/concurrent.py
new file mode 100644
index 0000000..ccb5e12
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/contrib/concurrent.py
@@ -0,0 +1,130 @@
+"""
+Thin wrappers around `concurrent.futures`.
+"""
+from __future__ import absolute_import
+
+from contextlib import contextmanager
+
+from ..auto import tqdm as tqdm_auto
+from ..std import TqdmWarning
+
+try:
+    from operator import length_hint
+except ImportError:
+    def length_hint(it, default=0):
+        """Returns `len(it)`, falling back to `default`"""
+        try:
+            return len(it)
+        except TypeError:
+            return default
+try:
+    from os import cpu_count
+except ImportError:
+    try:
+        from multiprocessing import cpu_count
+    except ImportError:
+        def cpu_count():
+            return 4
+import sys
+
+__author__ = {"github.com/": ["casperdcl"]}
+__all__ = ['thread_map', 'process_map']
+
+
+@contextmanager
+def ensure_lock(tqdm_class, lock_name=""):
+    """get (create if necessary) and then restore `tqdm_class`'s lock"""
+    old_lock = getattr(tqdm_class, '_lock', None)  # don't create a new lock
+    lock = old_lock or tqdm_class.get_lock()  # maybe create a new lock
+    lock = getattr(lock, lock_name, lock)  # maybe subtype
+    tqdm_class.set_lock(lock)
+    yield lock
+    if old_lock is None:
+        del tqdm_class._lock
+    else:
+        tqdm_class.set_lock(old_lock)
+
+
+def _executor_map(PoolExecutor, fn, *iterables, **tqdm_kwargs):
+    """
+    Implementation of `thread_map` and `process_map`.
+
+    Parameters
+    ----------
+    tqdm_class  : [default: tqdm.auto.tqdm].
+    max_workers  : [default: min(32, cpu_count() + 4)].
+    chunksize  : [default: 1].
+    lock_name  : [default: "":str].
+    """
+    kwargs = tqdm_kwargs.copy()
+    if "total" not in kwargs:
+        kwargs["total"] = length_hint(iterables[0])
+    tqdm_class = kwargs.pop("tqdm_class", tqdm_auto)
+    max_workers = kwargs.pop("max_workers", min(32, cpu_count() + 4))
+    chunksize = kwargs.pop("chunksize", 1)
+    lock_name = kwargs.pop("lock_name", "")
+    with ensure_lock(tqdm_class, lock_name=lock_name) as lk:
+        pool_kwargs = {'max_workers': max_workers}
+        sys_version = sys.version_info[:2]
+        if sys_version >= (3, 7):
+            # share lock in case workers are already using `tqdm`
+            pool_kwargs.update(initializer=tqdm_class.set_lock, initargs=(lk,))
+        map_args = {}
+        if not (3, 0) < sys_version < (3, 5):
+            map_args.update(chunksize=chunksize)
+        with PoolExecutor(**pool_kwargs) as ex:
+            return list(tqdm_class(ex.map(fn, *iterables, **map_args), **kwargs))
+
+
+def thread_map(fn, *iterables, **tqdm_kwargs):
+    """
+    Equivalent of `list(map(fn, *iterables))`
+    driven by `concurrent.futures.ThreadPoolExecutor`.
+
+    Parameters
+    ----------
+    tqdm_class  : optional
+        `tqdm` class to use for bars [default: tqdm.auto.tqdm].
+    max_workers  : int, optional
+        Maximum number of workers to spawn; passed to
+        `concurrent.futures.ThreadPoolExecutor.__init__`.
+        [default: max(32, cpu_count() + 4)].
+    """
+    from concurrent.futures import ThreadPoolExecutor
+    return _executor_map(ThreadPoolExecutor, fn, *iterables, **tqdm_kwargs)
+
+
+def process_map(fn, *iterables, **tqdm_kwargs):
+    """
+    Equivalent of `list(map(fn, *iterables))`
+    driven by `concurrent.futures.ProcessPoolExecutor`.
+
+    Parameters
+    ----------
+    tqdm_class  : optional
+        `tqdm` class to use for bars [default: tqdm.auto.tqdm].
+    max_workers  : int, optional
+        Maximum number of workers to spawn; passed to
+        `concurrent.futures.ProcessPoolExecutor.__init__`.
+        [default: min(32, cpu_count() + 4)].
+    chunksize  : int, optional
+        Size of chunks sent to worker processes; passed to
+        `concurrent.futures.ProcessPoolExecutor.map`. [default: 1].
+    lock_name  : str, optional
+        Member of `tqdm_class.get_lock()` to use [default: mp_lock].
+    """
+    from concurrent.futures import ProcessPoolExecutor
+    if iterables and "chunksize" not in tqdm_kwargs:
+        # default `chunksize=1` has poor performance for large iterables
+        # (most time spent dispatching items to workers).
+        longest_iterable_len = max(map(length_hint, iterables))
+        if longest_iterable_len > 1000:
+            from warnings import warn
+            warn("Iterable length %d > 1000 but `chunksize` is not set."
+                 " This may seriously degrade multiprocess performance."
+                 " Set `chunksize=1` or more." % longest_iterable_len,
+                 TqdmWarning, stacklevel=2)
+    if "lock_name" not in tqdm_kwargs:
+        tqdm_kwargs = tqdm_kwargs.copy()
+        tqdm_kwargs["lock_name"] = "mp_lock"
+    return _executor_map(ProcessPoolExecutor, fn, *iterables, **tqdm_kwargs)
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/discord.py b/.local/lib/python3.8/site-packages/tqdm/contrib/discord.py
new file mode 100644
index 0000000..713a2f8
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/contrib/discord.py
@@ -0,0 +1,121 @@
+"""
+Sends updates to a Discord bot.
+
+Usage:
+>>> from tqdm.contrib.discord import tqdm, trange
+>>> for i in tqdm(iterable, token='{token}', channel_id='{channel_id}'):
+...     ...
+
+![screenshot](https://img.tqdm.ml/screenshot-discord.png)
+"""
+from __future__ import absolute_import
+
+import logging
+from os import getenv
+
+try:
+    from disco.client import Client, ClientConfig
+except ImportError:
+    raise ImportError("Please `pip install disco-py`")
+
+from ..auto import tqdm as tqdm_auto
+from ..utils import _range
+from .utils_worker import MonoWorker
+
+__author__ = {"github.com/": ["casperdcl"]}
+__all__ = ['DiscordIO', 'tqdm_discord', 'tdrange', 'tqdm', 'trange']
+
+
+class DiscordIO(MonoWorker):
+    """Non-blocking file-like IO using a Discord Bot."""
+    def __init__(self, token, channel_id):
+        """Creates a new message in the given `channel_id`."""
+        super(DiscordIO, self).__init__()
+        config = ClientConfig()
+        config.token = token
+        client = Client(config)
+        self.text = self.__class__.__name__
+        try:
+            self.message = client.api.channels_messages_create(channel_id, self.text)
+        except Exception as e:
+            tqdm_auto.write(str(e))
+
+    def write(self, s):
+        """Replaces internal `message`'s text with `s`."""
+        if not s:
+            s = "..."
+        s = s.replace('\r', '').strip()
+        if s == self.text:
+            return  # skip duplicate message
+        self.text = s
+        try:
+            future = self.submit(self.message.edit, '`' + s + '`')
+        except Exception as e:
+            tqdm_auto.write(str(e))
+        else:
+            return future
+
+
+class tqdm_discord(tqdm_auto):
+    """
+    Standard `tqdm.auto.tqdm` but also sends updates to a Discord Bot.
+    May take a few seconds to create (`__init__`).
+
+    - create a discord bot (not public, no requirement of OAuth2 code
+      grant, only send message permissions) & invite it to a channel:
+      
+    - copy the bot `{token}` & `{channel_id}` and paste below
+
+    >>> from tqdm.contrib.discord import tqdm, trange
+    >>> for i in tqdm(iterable, token='{token}', channel_id='{channel_id}'):
+    ...     ...
+    """
+    def __init__(self, *args, **kwargs):
+        """
+        Parameters
+        ----------
+        token  : str, required. Discord token
+            [default: ${TQDM_DISCORD_TOKEN}].
+        channel_id  : int, required. Discord channel ID
+            [default: ${TQDM_DISCORD_CHANNEL_ID}].
+        mininterval  : float, optional.
+          Minimum of [default: 1.5] to avoid rate limit.
+
+        See `tqdm.auto.tqdm.__init__` for other parameters.
+        """
+        if not kwargs.get('disable'):
+            kwargs = kwargs.copy()
+            logging.getLogger("HTTPClient").setLevel(logging.WARNING)
+            self.dio = DiscordIO(
+                kwargs.pop('token', getenv("TQDM_DISCORD_TOKEN")),
+                kwargs.pop('channel_id', getenv("TQDM_DISCORD_CHANNEL_ID")))
+            kwargs['mininterval'] = max(1.5, kwargs.get('mininterval', 1.5))
+        super(tqdm_discord, self).__init__(*args, **kwargs)
+
+    def display(self, **kwargs):
+        super(tqdm_discord, self).display(**kwargs)
+        fmt = self.format_dict
+        if fmt.get('bar_format', None):
+            fmt['bar_format'] = fmt['bar_format'].replace(
+                '', '{bar:10u}').replace('{bar}', '{bar:10u}')
+        else:
+            fmt['bar_format'] = '{l_bar}{bar:10u}{r_bar}'
+        self.dio.write(self.format_meter(**fmt))
+
+    def clear(self, *args, **kwargs):
+        super(tqdm_discord, self).clear(*args, **kwargs)
+        if not self.disable:
+            self.dio.write("")
+
+
+def tdrange(*args, **kwargs):
+    """
+    A shortcut for `tqdm.contrib.discord.tqdm(xrange(*args), **kwargs)`.
+    On Python3+, `range` is used instead of `xrange`.
+    """
+    return tqdm_discord(_range(*args), **kwargs)
+
+
+# Aliases
+tqdm = tqdm_discord
+trange = tdrange
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/itertools.py b/.local/lib/python3.8/site-packages/tqdm/contrib/itertools.py
new file mode 100644
index 0000000..5f22505
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/contrib/itertools.py
@@ -0,0 +1,37 @@
+"""
+Thin wrappers around `itertools`.
+"""
+from __future__ import absolute_import
+
+import itertools
+
+from ..auto import tqdm as tqdm_auto
+
+__author__ = {"github.com/": ["casperdcl"]}
+__all__ = ['product']
+
+
+def product(*iterables, **tqdm_kwargs):
+    """
+    Equivalent of `itertools.product`.
+
+    Parameters
+    ----------
+    tqdm_class  : [default: tqdm.auto.tqdm].
+    """
+    kwargs = tqdm_kwargs.copy()
+    tqdm_class = kwargs.pop("tqdm_class", tqdm_auto)
+    try:
+        lens = list(map(len, iterables))
+    except TypeError:
+        total = None
+    else:
+        total = 1
+        for i in lens:
+            total *= i
+        kwargs.setdefault("total", total)
+    with tqdm_class(**kwargs) as t:
+        it = itertools.product(*iterables)
+        for i in it:
+            yield i
+            t.update()
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/logging.py b/.local/lib/python3.8/site-packages/tqdm/contrib/logging.py
new file mode 100644
index 0000000..cd9925e
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/contrib/logging.py
@@ -0,0 +1,128 @@
+"""
+Helper functionality for interoperability with stdlib `logging`.
+"""
+from __future__ import absolute_import
+
+import logging
+import sys
+from contextlib import contextmanager
+
+try:
+    from typing import Iterator, List, Optional, Type  # pylint: disable=unused-import
+except ImportError:
+    pass
+
+from ..std import tqdm as std_tqdm
+
+
+class _TqdmLoggingHandler(logging.StreamHandler):
+    def __init__(
+        self,
+        tqdm_class=std_tqdm  # type: Type[std_tqdm]
+    ):
+        super(_TqdmLoggingHandler, self).__init__()
+        self.tqdm_class = tqdm_class
+
+    def emit(self, record):
+        try:
+            msg = self.format(record)
+            self.tqdm_class.write(msg, file=self.stream)
+            self.flush()
+        except (KeyboardInterrupt, SystemExit):
+            raise
+        except:  # noqa pylint: disable=bare-except
+            self.handleError(record)
+
+
+def _is_console_logging_handler(handler):
+    return (isinstance(handler, logging.StreamHandler)
+            and handler.stream in {sys.stdout, sys.stderr})
+
+
+def _get_first_found_console_logging_handler(handlers):
+    for handler in handlers:
+        if _is_console_logging_handler(handler):
+            return handler
+
+
+@contextmanager
+def logging_redirect_tqdm(
+    loggers=None,  # type: Optional[List[logging.Logger]],
+    tqdm_class=std_tqdm  # type: Type[std_tqdm]
+):
+    # type: (...) -> Iterator[None]
+    """
+    Context manager redirecting console logging to `tqdm.write()`, leaving
+    other logging handlers (e.g. log files) unaffected.
+
+    Parameters
+    ----------
+    loggers  : list, optional
+      Which handlers to redirect (default: [logging.root]).
+    tqdm_class  : optional
+
+    Example
+    -------
+    ```python
+    import logging
+    from tqdm import trange
+    from tqdm.contrib.logging import logging_redirect_tqdm
+
+    LOG = logging.getLogger(__name__)
+
+    if __name__ == '__main__':
+        logging.basicConfig(level=logging.INFO)
+        with logging_redirect_tqdm():
+            for i in trange(9):
+                if i == 4:
+                    LOG.info("console logging redirected to `tqdm.write()`")
+        # logging restored
+    ```
+    """
+    if loggers is None:
+        loggers = [logging.root]
+    original_handlers_list = [logger.handlers for logger in loggers]
+    try:
+        for logger in loggers:
+            tqdm_handler = _TqdmLoggingHandler(tqdm_class)
+            orig_handler = _get_first_found_console_logging_handler(logger.handlers)
+            if orig_handler is not None:
+                tqdm_handler.setFormatter(orig_handler.formatter)
+                tqdm_handler.stream = orig_handler.stream
+            logger.handlers = [
+                handler for handler in logger.handlers
+                if not _is_console_logging_handler(handler)] + [tqdm_handler]
+        yield
+    finally:
+        for logger, original_handlers in zip(loggers, original_handlers_list):
+            logger.handlers = original_handlers
+
+
+@contextmanager
+def tqdm_logging_redirect(
+    *args,
+    # loggers=None,  # type: Optional[List[logging.Logger]]
+    # tqdm=None,  # type: Optional[Type[tqdm.tqdm]]
+    **kwargs
+):
+    # type: (...) -> Iterator[None]
+    """
+    Convenience shortcut for:
+    ```python
+    with tqdm_class(*args, **tqdm_kwargs) as pbar:
+        with logging_redirect_tqdm(loggers=loggers, tqdm_class=tqdm_class):
+            yield pbar
+    ```
+
+    Parameters
+    ----------
+    tqdm_class  : optional, (default: tqdm.std.tqdm).
+    loggers  : optional, list.
+    **tqdm_kwargs  : passed to `tqdm_class`.
+    """
+    tqdm_kwargs = kwargs.copy()
+    loggers = tqdm_kwargs.pop('loggers', None)
+    tqdm_class = tqdm_kwargs.pop('tqdm_class', std_tqdm)
+    with tqdm_class(*args, **tqdm_kwargs) as pbar:
+        with logging_redirect_tqdm(loggers=loggers, tqdm_class=tqdm_class):
+            yield pbar
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/telegram.py b/.local/lib/python3.8/site-packages/tqdm/contrib/telegram.py
new file mode 100644
index 0000000..99cbe8c
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/contrib/telegram.py
@@ -0,0 +1,159 @@
+"""
+Sends updates to a Telegram bot.
+
+Usage:
+>>> from tqdm.contrib.telegram import tqdm, trange
+>>> for i in trange(10, token='{token}', chat_id='{chat_id}'):
+...     ...
+
+![screenshot](https://img.tqdm.ml/screenshot-telegram.gif)
+"""
+from __future__ import absolute_import
+
+from os import getenv
+from warnings import warn
+
+from requests import Session
+
+from ..auto import tqdm as tqdm_auto
+from ..std import TqdmWarning
+from ..utils import _range
+from .utils_worker import MonoWorker
+
+__author__ = {"github.com/": ["casperdcl"]}
+__all__ = ['TelegramIO', 'tqdm_telegram', 'ttgrange', 'tqdm', 'trange']
+
+
+class TelegramIO(MonoWorker):
+    """Non-blocking file-like IO using a Telegram Bot."""
+    API = 'https://api.telegram.org/bot'
+
+    def __init__(self, token, chat_id):
+        """Creates a new message in the given `chat_id`."""
+        super(TelegramIO, self).__init__()
+        self.token = token
+        self.chat_id = chat_id
+        self.session = Session()
+        self.text = self.__class__.__name__
+        self.message_id
+
+    @property
+    def message_id(self):
+        if hasattr(self, '_message_id'):
+            return self._message_id
+        try:
+            res = self.session.post(
+                self.API + '%s/sendMessage' % self.token,
+                data={'text': '`' + self.text + '`', 'chat_id': self.chat_id,
+                      'parse_mode': 'MarkdownV2'}).json()
+        except Exception as e:
+            tqdm_auto.write(str(e))
+        else:
+            if res.get('error_code') == 429:
+                warn("Creation rate limit: try increasing `mininterval`.",
+                     TqdmWarning, stacklevel=2)
+            else:
+                self._message_id = res['result']['message_id']
+                return self._message_id
+
+    def write(self, s):
+        """Replaces internal `message_id`'s text with `s`."""
+        if not s:
+            s = "..."
+        s = s.replace('\r', '').strip()
+        if s == self.text:
+            return  # avoid duplicate message Bot error
+        message_id = self.message_id
+        if message_id is None:
+            return
+        self.text = s
+        try:
+            future = self.submit(
+                self.session.post, self.API + '%s/editMessageText' % self.token,
+                data={'text': '`' + s + '`', 'chat_id': self.chat_id,
+                      'message_id': message_id, 'parse_mode': 'MarkdownV2'})
+        except Exception as e:
+            tqdm_auto.write(str(e))
+        else:
+            return future
+
+    def delete(self):
+        """Deletes internal `message_id`."""
+        try:
+            future = self.submit(
+                self.session.post, self.API + '%s/deleteMessage' % self.token,
+                data={'chat_id': self.chat_id, 'message_id': self.message_id})
+        except Exception as e:
+            tqdm_auto.write(str(e))
+        else:
+            return future
+
+
+class tqdm_telegram(tqdm_auto):
+    """
+    Standard `tqdm.auto.tqdm` but also sends updates to a Telegram Bot.
+    May take a few seconds to create (`__init__`).
+
+    - create a bot 
+    - copy its `{token}`
+    - add the bot to a chat and send it a message such as `/start`
+    - go to  to find out
+      the `{chat_id}`
+    - paste the `{token}` & `{chat_id}` below
+
+    >>> from tqdm.contrib.telegram import tqdm, trange
+    >>> for i in tqdm(iterable, token='{token}', chat_id='{chat_id}'):
+    ...     ...
+    """
+    def __init__(self, *args, **kwargs):
+        """
+        Parameters
+        ----------
+        token  : str, required. Telegram token
+            [default: ${TQDM_TELEGRAM_TOKEN}].
+        chat_id  : str, required. Telegram chat ID
+            [default: ${TQDM_TELEGRAM_CHAT_ID}].
+
+        See `tqdm.auto.tqdm.__init__` for other parameters.
+        """
+        if not kwargs.get('disable'):
+            kwargs = kwargs.copy()
+            self.tgio = TelegramIO(
+                kwargs.pop('token', getenv('TQDM_TELEGRAM_TOKEN')),
+                kwargs.pop('chat_id', getenv('TQDM_TELEGRAM_CHAT_ID')))
+        super(tqdm_telegram, self).__init__(*args, **kwargs)
+
+    def display(self, **kwargs):
+        super(tqdm_telegram, self).display(**kwargs)
+        fmt = self.format_dict
+        if fmt.get('bar_format', None):
+            fmt['bar_format'] = fmt['bar_format'].replace(
+                '', '{bar:10u}').replace('{bar}', '{bar:10u}')
+        else:
+            fmt['bar_format'] = '{l_bar}{bar:10u}{r_bar}'
+        self.tgio.write(self.format_meter(**fmt))
+
+    def clear(self, *args, **kwargs):
+        super(tqdm_telegram, self).clear(*args, **kwargs)
+        if not self.disable:
+            self.tgio.write("")
+
+    def close(self):
+        if self.disable:
+            return
+        super(tqdm_telegram, self).close()
+        if not (self.leave or (self.leave is None and self.pos == 0)):
+            self.tgio.delete()
+
+
+def ttgrange(*args, **kwargs):
+    """
+    A shortcut for `tqdm.contrib.telegram.tqdm(xrange(*args), **kwargs)`.
+    On Python3+, `range` is used instead of `xrange`.
+    """
+    return tqdm_telegram(_range(*args), **kwargs)
+
+
+# Aliases
+tqdm = tqdm_telegram
+trange = ttgrange
diff --git a/.local/lib/python3.8/site-packages/tqdm/contrib/utils_worker.py b/.local/lib/python3.8/site-packages/tqdm/contrib/utils_worker.py
new file mode 100644
index 0000000..17adda6
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/contrib/utils_worker.py
@@ -0,0 +1,40 @@
+"""
+IO/concurrency helpers for `tqdm.contrib`.
+"""
+from __future__ import absolute_import
+
+from collections import deque
+from concurrent.futures import ThreadPoolExecutor
+
+from ..auto import tqdm as tqdm_auto
+
+__author__ = {"github.com/": ["casperdcl"]}
+__all__ = ['MonoWorker']
+
+
+class MonoWorker(object):
+    """
+    Supports one running task and one waiting task.
+    The waiting task is the most recent submitted (others are discarded).
+    """
+    def __init__(self):
+        self.pool = ThreadPoolExecutor(max_workers=1)
+        self.futures = deque([], 2)
+
+    def submit(self, func, *args, **kwargs):
+        """`func(*args, **kwargs)` may replace currently waiting task."""
+        futures = self.futures
+        if len(futures) == futures.maxlen:
+            running = futures.popleft()
+            if not running.done():
+                if len(futures):  # clear waiting
+                    waiting = futures.pop()
+                    waiting.cancel()
+                futures.appendleft(running)  # re-insert running
+        try:
+            waiting = self.pool.submit(func, *args, **kwargs)
+        except Exception as e:
+            tqdm_auto.write(str(e))
+        else:
+            futures.append(waiting)
+            return waiting
diff --git a/.local/lib/python3.8/site-packages/tqdm/dask.py b/.local/lib/python3.8/site-packages/tqdm/dask.py
new file mode 100644
index 0000000..6fc7504
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/dask.py
@@ -0,0 +1,46 @@
+from __future__ import absolute_import
+
+from functools import partial
+
+from dask.callbacks import Callback
+
+from .auto import tqdm as tqdm_auto
+
+__author__ = {"github.com/": ["casperdcl"]}
+__all__ = ['TqdmCallback']
+
+
+class TqdmCallback(Callback):
+    """Dask callback for task progress."""
+    def __init__(self, start=None, pretask=None, tqdm_class=tqdm_auto,
+                 **tqdm_kwargs):
+        """
+        Parameters
+        ----------
+        tqdm_class  : optional
+            `tqdm` class to use for bars [default: `tqdm.auto.tqdm`].
+        tqdm_kwargs  : optional
+            Any other arguments used for all bars.
+        """
+        super(TqdmCallback, self).__init__(start=start, pretask=pretask)
+        if tqdm_kwargs:
+            tqdm_class = partial(tqdm_class, **tqdm_kwargs)
+        self.tqdm_class = tqdm_class
+
+    def _start_state(self, _, state):
+        self.pbar = self.tqdm_class(total=sum(
+            len(state[k]) for k in ['ready', 'waiting', 'running', 'finished']))
+
+    def _posttask(self, *_, **__):
+        self.pbar.update()
+
+    def _finish(self, *_, **__):
+        self.pbar.close()
+
+    def display(self):
+        """Displays in the current cell in Notebooks."""
+        container = getattr(self.bar, 'container', None)
+        if container is None:
+            return
+        from .notebook import display
+        display(container)
diff --git a/.local/lib/python3.8/site-packages/tqdm/gui.py b/.local/lib/python3.8/site-packages/tqdm/gui.py
new file mode 100644
index 0000000..4612701
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/gui.py
@@ -0,0 +1,191 @@
+"""
+Matplotlib GUI progressbar decorator for iterators.
+
+Usage:
+>>> from tqdm.gui import trange, tqdm
+>>> for i in trange(10):
+...     ...
+"""
+# future division is important to divide integers and get as
+# a result precise floating numbers (instead of truncated int)
+from __future__ import absolute_import, division
+
+import re
+from warnings import warn
+
+# to inherit from the tqdm class
+from .std import TqdmExperimentalWarning
+from .std import tqdm as std_tqdm
+# import compatibility functions and utilities
+from .utils import _range
+
+__author__ = {"github.com/": ["casperdcl", "lrq3000"]}
+__all__ = ['tqdm_gui', 'tgrange', 'tqdm', 'trange']
+
+
+class tqdm_gui(std_tqdm):  # pragma: no cover
+    """Experimental Matplotlib GUI version of tqdm!"""
+    # TODO: @classmethod: write() on GUI?
+    def __init__(self, *args, **kwargs):
+        from collections import deque
+
+        import matplotlib as mpl
+        import matplotlib.pyplot as plt
+        kwargs = kwargs.copy()
+        kwargs['gui'] = True
+        colour = kwargs.pop('colour', 'g')
+        super(tqdm_gui, self).__init__(*args, **kwargs)
+
+        if self.disable:
+            return
+
+        warn("GUI is experimental/alpha", TqdmExperimentalWarning, stacklevel=2)
+        self.mpl = mpl
+        self.plt = plt
+
+        # Remember if external environment uses toolbars
+        self.toolbar = self.mpl.rcParams['toolbar']
+        self.mpl.rcParams['toolbar'] = 'None'
+
+        self.mininterval = max(self.mininterval, 0.5)
+        self.fig, ax = plt.subplots(figsize=(9, 2.2))
+        # self.fig.subplots_adjust(bottom=0.2)
+        total = self.__len__()  # avoids TypeError on None #971
+        if total is not None:
+            self.xdata = []
+            self.ydata = []
+            self.zdata = []
+        else:
+            self.xdata = deque([])
+            self.ydata = deque([])
+            self.zdata = deque([])
+        self.line1, = ax.plot(self.xdata, self.ydata, color='b')
+        self.line2, = ax.plot(self.xdata, self.zdata, color='k')
+        ax.set_ylim(0, 0.001)
+        if total is not None:
+            ax.set_xlim(0, 100)
+            ax.set_xlabel("percent")
+            self.fig.legend((self.line1, self.line2), ("cur", "est"),
+                            loc='center right')
+            # progressbar
+            self.hspan = plt.axhspan(0, 0.001, xmin=0, xmax=0, color=colour)
+        else:
+            # ax.set_xlim(-60, 0)
+            ax.set_xlim(0, 60)
+            ax.invert_xaxis()
+            ax.set_xlabel("seconds")
+            ax.legend(("cur", "est"), loc='lower left')
+        ax.grid()
+        # ax.set_xlabel('seconds')
+        ax.set_ylabel((self.unit if self.unit else "it") + "/s")
+        if self.unit_scale:
+            plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+            ax.yaxis.get_offset_text().set_x(-0.15)
+
+        # Remember if external environment is interactive
+        self.wasion = plt.isinteractive()
+        plt.ion()
+        self.ax = ax
+
+    def close(self):
+        if self.disable:
+            return
+
+        self.disable = True
+
+        with self.get_lock():
+            self._instances.remove(self)
+
+        # Restore toolbars
+        self.mpl.rcParams['toolbar'] = self.toolbar
+        # Return to non-interactive mode
+        if not self.wasion:
+            self.plt.ioff()
+        if self.leave:
+            self.display()
+        else:
+            self.plt.close(self.fig)
+
+    def clear(self, *_, **__):
+        pass
+
+    def display(self, *_, **__):
+        n = self.n
+        cur_t = self._time()
+        elapsed = cur_t - self.start_t
+        delta_it = n - self.last_print_n
+        delta_t = cur_t - self.last_print_t
+
+        # Inline due to multiple calls
+        total = self.total
+        xdata = self.xdata
+        ydata = self.ydata
+        zdata = self.zdata
+        ax = self.ax
+        line1 = self.line1
+        line2 = self.line2
+        # instantaneous rate
+        y = delta_it / delta_t
+        # overall rate
+        z = n / elapsed
+        # update line data
+        xdata.append(n * 100.0 / total if total else cur_t)
+        ydata.append(y)
+        zdata.append(z)
+
+        # Discard old values
+        # xmin, xmax = ax.get_xlim()
+        # if (not total) and elapsed > xmin * 1.1:
+        if (not total) and elapsed > 66:
+            xdata.popleft()
+            ydata.popleft()
+            zdata.popleft()
+
+        ymin, ymax = ax.get_ylim()
+        if y > ymax or z > ymax:
+            ymax = 1.1 * y
+            ax.set_ylim(ymin, ymax)
+            ax.figure.canvas.draw()
+
+        if total:
+            line1.set_data(xdata, ydata)
+            line2.set_data(xdata, zdata)
+            try:
+                poly_lims = self.hspan.get_xy()
+            except AttributeError:
+                self.hspan = self.plt.axhspan(0, 0.001, xmin=0, xmax=0, color='g')
+                poly_lims = self.hspan.get_xy()
+            poly_lims[0, 1] = ymin
+            poly_lims[1, 1] = ymax
+            poly_lims[2] = [n / total, ymax]
+            poly_lims[3] = [poly_lims[2, 0], ymin]
+            if len(poly_lims) > 4:
+                poly_lims[4, 1] = ymin
+            self.hspan.set_xy(poly_lims)
+        else:
+            t_ago = [cur_t - i for i in xdata]
+            line1.set_data(t_ago, ydata)
+            line2.set_data(t_ago, zdata)
+
+        d = self.format_dict
+        # remove {bar}
+        d['bar_format'] = (d['bar_format'] or "{l_bar}{r_bar}").replace(
+            "{bar}", "")
+        msg = self.format_meter(**d)
+        if '' in msg:
+            msg = "".join(re.split(r'\|?\|?', msg, 1))
+        ax.set_title(msg, fontname="DejaVu Sans Mono", fontsize=11)
+        self.plt.pause(1e-9)
+
+
+def tgrange(*args, **kwargs):
+    """
+    A shortcut for `tqdm.gui.tqdm(xrange(*args), **kwargs)`.
+    On Python3+, `range` is used instead of `xrange`.
+    """
+    return tqdm_gui(_range(*args), **kwargs)
+
+
+# Aliases
+tqdm = tqdm_gui
+trange = tgrange
diff --git a/.local/lib/python3.8/site-packages/tqdm/keras.py b/.local/lib/python3.8/site-packages/tqdm/keras.py
new file mode 100644
index 0000000..523e62e
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/keras.py
@@ -0,0 +1,124 @@
+from __future__ import absolute_import, division
+
+from copy import copy
+from functools import partial
+
+from .auto import tqdm as tqdm_auto
+
+try:
+    import keras
+except (ImportError, AttributeError) as e:
+    try:
+        from tensorflow import keras
+    except ImportError:
+        raise e
+__author__ = {"github.com/": ["casperdcl"]}
+__all__ = ['TqdmCallback']
+
+
+class TqdmCallback(keras.callbacks.Callback):
+    """Keras callback for epoch and batch progress."""
+    @staticmethod
+    def bar2callback(bar, pop=None, delta=(lambda logs: 1)):
+        def callback(_, logs=None):
+            n = delta(logs)
+            if logs:
+                if pop:
+                    logs = copy(logs)
+                    [logs.pop(i, 0) for i in pop]
+                bar.set_postfix(logs, refresh=False)
+            bar.update(n)
+
+        return callback
+
+    def __init__(self, epochs=None, data_size=None, batch_size=None, verbose=1,
+                 tqdm_class=tqdm_auto, **tqdm_kwargs):
+        """
+        Parameters
+        ----------
+        epochs  : int, optional
+        data_size  : int, optional
+            Number of training pairs.
+        batch_size  : int, optional
+            Number of training pairs per batch.
+        verbose  : int
+            0: epoch, 1: batch (transient), 2: batch. [default: 1].
+            Will be set to `0` unless both `data_size` and `batch_size`
+            are given.
+        tqdm_class  : optional
+            `tqdm` class to use for bars [default: `tqdm.auto.tqdm`].
+        tqdm_kwargs  : optional
+            Any other arguments used for all bars.
+        """
+        if tqdm_kwargs:
+            tqdm_class = partial(tqdm_class, **tqdm_kwargs)
+        self.tqdm_class = tqdm_class
+        self.epoch_bar = tqdm_class(total=epochs, unit='epoch')
+        self.on_epoch_end = self.bar2callback(self.epoch_bar)
+        if data_size and batch_size:
+            self.batches = batches = (data_size + batch_size - 1) // batch_size
+        else:
+            self.batches = batches = None
+        self.verbose = verbose
+        if verbose == 1:
+            self.batch_bar = tqdm_class(total=batches, unit='batch', leave=False)
+            self.on_batch_end = self.bar2callback(
+                self.batch_bar, pop=['batch', 'size'],
+                delta=lambda logs: logs.get('size', 1))
+
+    def on_train_begin(self, *_, **__):
+        params = self.params.get
+        auto_total = params('epochs', params('nb_epoch', None))
+        if auto_total is not None and auto_total != self.epoch_bar.total:
+            self.epoch_bar.reset(total=auto_total)
+
+    def on_epoch_begin(self, epoch, *_, **__):
+        if self.epoch_bar.n < epoch:
+            ebar = self.epoch_bar
+            ebar.n = ebar.last_print_n = ebar.initial = epoch
+        if self.verbose:
+            params = self.params.get
+            total = params('samples', params(
+                'nb_sample', params('steps', None))) or self.batches
+            if self.verbose == 2:
+                if hasattr(self, 'batch_bar'):
+                    self.batch_bar.close()
+                self.batch_bar = self.tqdm_class(
+                    total=total, unit='batch', leave=True,
+                    unit_scale=1 / (params('batch_size', 1) or 1))
+                self.on_batch_end = self.bar2callback(
+                    self.batch_bar, pop=['batch', 'size'],
+                    delta=lambda logs: logs.get('size', 1))
+            elif self.verbose == 1:
+                self.batch_bar.unit_scale = 1 / (params('batch_size', 1) or 1)
+                self.batch_bar.reset(total=total)
+            else:
+                raise KeyError('Unknown verbosity')
+
+    def on_train_end(self, *_, **__):
+        if self.verbose:
+            self.batch_bar.close()
+        self.epoch_bar.close()
+
+    def display(self):
+        """Displays in the current cell in Notebooks."""
+        container = getattr(self.epoch_bar, 'container', None)
+        if container is None:
+            return
+        from .notebook import display
+        display(container)
+        batch_bar = getattr(self, 'batch_bar', None)
+        if batch_bar is not None:
+            display(batch_bar.container)
+
+    @staticmethod
+    def _implements_train_batch_hooks():
+        return True
+
+    @staticmethod
+    def _implements_test_batch_hooks():
+        return True
+
+    @staticmethod
+    def _implements_predict_batch_hooks():
+        return True
diff --git a/.local/lib/python3.8/site-packages/tqdm/notebook.py b/.local/lib/python3.8/site-packages/tqdm/notebook.py
new file mode 100644
index 0000000..5a264d3
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/notebook.py
@@ -0,0 +1,328 @@
+"""
+IPython/Jupyter Notebook progressbar decorator for iterators.
+Includes a default `range` iterator printing to `stderr`.
+
+Usage:
+>>> from tqdm.notebook import trange, tqdm
+>>> for i in trange(10):
+...     ...
+"""
+# future division is important to divide integers and get as
+# a result precise floating numbers (instead of truncated int)
+from __future__ import absolute_import, division
+
+# import compatibility functions and utilities
+import re
+import sys
+from weakref import proxy
+
+# to inherit from the tqdm class
+from .std import tqdm as std_tqdm
+from .utils import _range
+
+if True:  # pragma: no cover
+    # import IPython/Jupyter base widget and display utilities
+    IPY = 0
+    try:  # IPython 4.x
+        import ipywidgets
+        IPY = 4
+    except ImportError:  # IPython 3.x / 2.x
+        IPY = 32
+        import warnings
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                'ignore', message=".*The `IPython.html` package has been deprecated.*")
+            try:
+                import IPython.html.widgets as ipywidgets  # NOQA: F401
+            except ImportError:
+                pass
+
+    try:  # IPython 4.x / 3.x
+        if IPY == 32:
+            from IPython.html.widgets import HTML
+            from IPython.html.widgets import FloatProgress as IProgress
+            from IPython.html.widgets import HBox
+            IPY = 3
+        else:
+            from ipywidgets import HTML
+            from ipywidgets import FloatProgress as IProgress
+            from ipywidgets import HBox
+    except ImportError:
+        try:  # IPython 2.x
+            from IPython.html.widgets import HTML
+            from IPython.html.widgets import ContainerWidget as HBox
+            from IPython.html.widgets import FloatProgressWidget as IProgress
+            IPY = 2
+        except ImportError:
+            IPY = 0
+            IProgress = None
+            HBox = object
+
+    try:
+        from IPython.display import display  # , clear_output
+    except ImportError:
+        pass
+
+    # HTML encoding
+    try:  # Py3
+        from html import escape
+    except ImportError:  # Py2
+        from cgi import escape
+
+__author__ = {"github.com/": ["lrq3000", "casperdcl", "alexanderkuk"]}
+__all__ = ['tqdm_notebook', 'tnrange', 'tqdm', 'trange']
+WARN_NOIPYW = ("IProgress not found. Please update jupyter and ipywidgets."
+               " See https://ipywidgets.readthedocs.io/en/stable"
+               "/user_install.html")
+
+
+class TqdmHBox(HBox):
+    """`ipywidgets.HBox` with a pretty representation"""
+    def _repr_json_(self, pretty=None):
+        pbar = getattr(self, 'pbar', None)
+        if pbar is None:
+            return {}
+        d = pbar.format_dict
+        if pretty is not None:
+            d["ascii"] = not pretty
+        return d
+
+    def __repr__(self, pretty=False):
+        pbar = getattr(self, 'pbar', None)
+        if pbar is None:
+            return super(TqdmHBox, self).__repr__()
+        return pbar.format_meter(**self._repr_json_(pretty))
+
+    def _repr_pretty_(self, pp, *_, **__):
+        pp.text(self.__repr__(True))
+
+
+class tqdm_notebook(std_tqdm):
+    """
+    Experimental IPython/Jupyter Notebook widget using tqdm!
+    """
+    @staticmethod
+    def status_printer(_, total=None, desc=None, ncols=None):
+        """
+        Manage the printing of an IPython/Jupyter Notebook progress bar widget.
+        """
+        # Fallback to text bar if there's no total
+        # DEPRECATED: replaced with an 'info' style bar
+        # if not total:
+        #    return super(tqdm_notebook, tqdm_notebook).status_printer(file)
+
+        # fp = file
+
+        # Prepare IPython progress bar
+        if IProgress is None:  # #187 #451 #558 #872
+            raise ImportError(WARN_NOIPYW)
+        if total:
+            pbar = IProgress(min=0, max=total)
+        else:  # No total? Show info style bar with no progress tqdm status
+            pbar = IProgress(min=0, max=1)
+            pbar.value = 1
+            pbar.bar_style = 'info'
+            if ncols is None:
+                pbar.layout.width = "20px"
+
+        ltext = HTML()
+        rtext = HTML()
+        if desc:
+            ltext.value = desc
+        container = TqdmHBox(children=[ltext, pbar, rtext])
+        # Prepare layout
+        if ncols is not None:  # use default style of ipywidgets
+            # ncols could be 100, "100px", "100%"
+            ncols = str(ncols)  # ipywidgets only accepts string
+            try:
+                if int(ncols) > 0:  # isnumeric and positive
+                    ncols += 'px'
+            except ValueError:
+                pass
+            pbar.layout.flex = '2'
+            container.layout.width = ncols
+            container.layout.display = 'inline-flex'
+            container.layout.flex_flow = 'row wrap'
+
+        return container
+
+    def display(self, msg=None, pos=None,
+                # additional signals
+                close=False, bar_style=None, check_delay=True):
+        # Note: contrary to native tqdm, msg='' does NOT clear bar
+        # goal is to keep all infos if error happens so user knows
+        # at which iteration the loop failed.
+
+        # Clear previous output (really necessary?)
+        # clear_output(wait=1)
+
+        if not msg and not close:
+            d = self.format_dict
+            # remove {bar}
+            d['bar_format'] = (d['bar_format'] or "{l_bar}{r_bar}").replace(
+                "{bar}", "")
+            msg = self.format_meter(**d)
+
+        ltext, pbar, rtext = self.container.children
+        pbar.value = self.n
+
+        if msg:
+            # html escape special characters (like '&')
+            if '' in msg:
+                left, right = map(escape, re.split(r'\|?\|?', msg, 1))
+            else:
+                left, right = '', escape(msg)
+
+            # Update description
+            ltext.value = left
+            # never clear the bar (signal: msg='')
+            if right:
+                rtext.value = right
+
+        # Change bar style
+        if bar_style:
+            # Hack-ish way to avoid the danger bar_style being overridden by
+            # success because the bar gets closed after the error...
+            if pbar.bar_style != 'danger' or bar_style != 'success':
+                pbar.bar_style = bar_style
+
+        # Special signal to close the bar
+        if close and pbar.bar_style != 'danger':  # hide only if no error
+            try:
+                self.container.close()
+            except AttributeError:
+                self.container.visible = False
+
+        if check_delay and self.delay > 0 and not self.displayed:
+            display(self.container)
+            self.displayed = True
+
+    @property
+    def colour(self):
+        if hasattr(self, 'container'):
+            return self.container.children[-2].style.bar_color
+
+    @colour.setter
+    def colour(self, bar_color):
+        if hasattr(self, 'container'):
+            self.container.children[-2].style.bar_color = bar_color
+
+    def __init__(self, *args, **kwargs):
+        """
+        Supports the usual `tqdm.tqdm` parameters as well as those listed below.
+
+        Parameters
+        ----------
+        display  : Whether to call `display(self.container)` immediately
+            [default: True].
+        """
+        kwargs = kwargs.copy()
+        # Setup default output
+        file_kwarg = kwargs.get('file', sys.stderr)
+        if file_kwarg is sys.stderr or file_kwarg is None:
+            kwargs['file'] = sys.stdout  # avoid the red block in IPython
+
+        # Initialize parent class + avoid printing by using gui=True
+        kwargs['gui'] = True
+        # convert disable = None to False
+        kwargs['disable'] = bool(kwargs.get('disable', False))
+        colour = kwargs.pop('colour', None)
+        display_here = kwargs.pop('display', True)
+        super(tqdm_notebook, self).__init__(*args, **kwargs)
+        if self.disable or not kwargs['gui']:
+            self.disp = lambda *_, **__: None
+            return
+
+        # Get bar width
+        self.ncols = '100%' if self.dynamic_ncols else kwargs.get("ncols", None)
+
+        # Replace with IPython progress bar display (with correct total)
+        unit_scale = 1 if self.unit_scale is True else self.unit_scale or 1
+        total = self.total * unit_scale if self.total else self.total
+        self.container = self.status_printer(self.fp, total, self.desc, self.ncols)
+        self.container.pbar = proxy(self)
+        self.displayed = False
+        if display_here and self.delay <= 0:
+            display(self.container)
+            self.displayed = True
+        self.disp = self.display
+        self.colour = colour
+
+        # Print initial bar state
+        if not self.disable:
+            self.display(check_delay=False)
+
+    def __iter__(self):
+        try:
+            it = super(tqdm_notebook, self).__iter__()
+            for obj in it:
+                # return super(tqdm...) will not catch exception
+                yield obj
+        # NB: except ... [ as ...] breaks IPython async KeyboardInterrupt
+        except:  # NOQA
+            self.disp(bar_style='danger')
+            raise
+        # NB: don't `finally: close()`
+        # since this could be a shared bar which the user will `reset()`
+
+    def update(self, n=1):
+        try:
+            return super(tqdm_notebook, self).update(n=n)
+        # NB: except ... [ as ...] breaks IPython async KeyboardInterrupt
+        except:  # NOQA
+            # cannot catch KeyboardInterrupt when using manual tqdm
+            # as the interrupt will most likely happen on another statement
+            self.disp(bar_style='danger')
+            raise
+        # NB: don't `finally: close()`
+        # since this could be a shared bar which the user will `reset()`
+
+    def close(self):
+        if self.disable:
+            return
+        super(tqdm_notebook, self).close()
+        # Try to detect if there was an error or KeyboardInterrupt
+        # in manual mode: if n < total, things probably got wrong
+        if self.total and self.n < self.total:
+            self.disp(bar_style='danger', check_delay=False)
+        else:
+            if self.leave:
+                self.disp(bar_style='success', check_delay=False)
+            else:
+                self.disp(close=True, check_delay=False)
+
+    def clear(self, *_, **__):
+        pass
+
+    def reset(self, total=None):
+        """
+        Resets to 0 iterations for repeated use.
+
+        Consider combining with `leave=True`.
+
+        Parameters
+        ----------
+        total  : int or float, optional. Total to use for the new bar.
+        """
+        if self.disable:
+            return super(tqdm_notebook, self).reset(total=total)
+        _, pbar, _ = self.container.children
+        pbar.bar_style = ''
+        if total is not None:
+            pbar.max = total
+            if not self.total and self.ncols is None:  # no longer unknown total
+                pbar.layout.width = None  # reset width
+        return super(tqdm_notebook, self).reset(total=total)
+
+
+def tnrange(*args, **kwargs):
+    """
+    A shortcut for `tqdm.notebook.tqdm(xrange(*args), **kwargs)`.
+    On Python3+, `range` is used instead of `xrange`.
+    """
+    return tqdm_notebook(_range(*args), **kwargs)
+
+
+# Aliases
+tqdm = tqdm_notebook
+trange = tnrange
diff --git a/.local/lib/python3.8/site-packages/tqdm/rich.py b/.local/lib/python3.8/site-packages/tqdm/rich.py
new file mode 100644
index 0000000..cf8e714
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/rich.py
@@ -0,0 +1,152 @@
+"""
+`rich.progress` decorator for iterators.
+
+Usage:
+>>> from tqdm.rich import trange, tqdm
+>>> for i in trange(10):
+...     ...
+"""
+from __future__ import absolute_import
+
+from warnings import warn
+
+from rich.progress import (
+    BarColumn, Progress, ProgressColumn, Text, TimeElapsedColumn, TimeRemainingColumn, filesize)
+
+from .std import TqdmExperimentalWarning
+from .std import tqdm as std_tqdm
+from .utils import _range
+
+__author__ = {"github.com/": ["casperdcl"]}
+__all__ = ['tqdm_rich', 'trrange', 'tqdm', 'trange']
+
+
+class FractionColumn(ProgressColumn):
+    """Renders completed/total, e.g. '0.5/2.3 G'."""
+    def __init__(self, unit_scale=False, unit_divisor=1000):
+        self.unit_scale = unit_scale
+        self.unit_divisor = unit_divisor
+        super().__init__()
+
+    def render(self, task):
+        """Calculate common unit for completed and total."""
+        completed = int(task.completed)
+        total = int(task.total)
+        if self.unit_scale:
+            unit, suffix = filesize.pick_unit_and_suffix(
+                total,
+                ["", "K", "M", "G", "T", "P", "E", "Z", "Y"],
+                self.unit_divisor,
+            )
+        else:
+            unit, suffix = filesize.pick_unit_and_suffix(total, [""], 1)
+        precision = 0 if unit == 1 else 1
+        return Text(
+            f"{completed/unit:,.{precision}f}/{total/unit:,.{precision}f} {suffix}",
+            style="progress.download")
+
+
+class RateColumn(ProgressColumn):
+    """Renders human readable transfer speed."""
+    def __init__(self, unit="", unit_scale=False, unit_divisor=1000):
+        self.unit = unit
+        self.unit_scale = unit_scale
+        self.unit_divisor = unit_divisor
+        super().__init__()
+
+    def render(self, task):
+        """Show data transfer speed."""
+        speed = task.speed
+        if speed is None:
+            return Text(f"? {self.unit}/s", style="progress.data.speed")
+        if self.unit_scale:
+            unit, suffix = filesize.pick_unit_and_suffix(
+                speed,
+                ["", "K", "M", "G", "T", "P", "E", "Z", "Y"],
+                self.unit_divisor,
+            )
+        else:
+            unit, suffix = filesize.pick_unit_and_suffix(speed, [""], 1)
+        precision = 0 if unit == 1 else 1
+        return Text(f"{speed/unit:,.{precision}f} {suffix}{self.unit}/s",
+                    style="progress.data.speed")
+
+
+class tqdm_rich(std_tqdm):  # pragma: no cover
+    """Experimental rich.progress GUI version of tqdm!"""
+    # TODO: @classmethod: write()?
+    def __init__(self, *args, **kwargs):
+        """
+        This class accepts the following parameters *in addition* to
+        the parameters accepted by `tqdm`.
+
+        Parameters
+        ----------
+        progress  : tuple, optional
+            arguments for `rich.progress.Progress()`.
+        """
+        kwargs = kwargs.copy()
+        kwargs['gui'] = True
+        # convert disable = None to False
+        kwargs['disable'] = bool(kwargs.get('disable', False))
+        progress = kwargs.pop('progress', None)
+        super(tqdm_rich, self).__init__(*args, **kwargs)
+
+        if self.disable:
+            return
+
+        warn("rich is experimental/alpha", TqdmExperimentalWarning, stacklevel=2)
+        d = self.format_dict
+        if progress is None:
+            progress = (
+                "[progress.description]{task.description}"
+                "[progress.percentage]{task.percentage:>4.0f}%",
+                BarColumn(bar_width=None),
+                FractionColumn(
+                    unit_scale=d['unit_scale'], unit_divisor=d['unit_divisor']),
+                "[", TimeElapsedColumn(), "<", TimeRemainingColumn(),
+                ",", RateColumn(unit=d['unit'], unit_scale=d['unit_scale'],
+                                unit_divisor=d['unit_divisor']), "]"
+            )
+        self._prog = Progress(*progress, transient=not self.leave)
+        self._prog.__enter__()
+        self._task_id = self._prog.add_task(self.desc or "", **d)
+
+    def close(self):
+        if self.disable:
+            return
+        super(tqdm_rich, self).close()
+        self._prog.__exit__(None, None, None)
+
+    def clear(self, *_, **__):
+        pass
+
+    def display(self, *_, **__):
+        if not hasattr(self, '_prog'):
+            return
+        self._prog.update(self._task_id, completed=self.n, description=self.desc)
+
+    def reset(self, total=None):
+        """
+        Resets to 0 iterations for repeated use.
+
+        Parameters
+        ----------
+        total  : int or float, optional. Total to use for the new bar.
+        """
+        if hasattr(self, '_prog'):
+            self._prog.reset(total=total)
+        super(tqdm_rich, self).reset(total=total)
+
+
+def trrange(*args, **kwargs):
+    """
+    A shortcut for `tqdm.rich.tqdm(xrange(*args), **kwargs)`.
+    On Python3+, `range` is used instead of `xrange`.
+    """
+    return tqdm_rich(_range(*args), **kwargs)
+
+
+# Aliases
+tqdm = tqdm_rich
+trange = trrange
diff --git a/.local/lib/python3.8/site-packages/tqdm/std.py b/.local/lib/python3.8/site-packages/tqdm/std.py
new file mode 100644
index 0000000..9a68739
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/std.py
@@ -0,0 +1,1541 @@
+"""
+Customisable progressbar decorator for iterators.
+Includes a default `range` iterator printing to `stderr`.
+
+Usage:
+>>> from tqdm import trange, tqdm
+>>> for i in trange(10):
+...     ...
+"""
+from __future__ import absolute_import, division
+
+import sys
+from collections import OrderedDict, defaultdict
+from contextlib import contextmanager
+from datetime import datetime, timedelta
+from numbers import Number
+from time import time
+from warnings import warn
+from weakref import WeakSet
+
+from ._monitor import TMonitor
+from .utils import (
+    CallbackIOWrapper, Comparable, DisableOnWriteError, FormatReplace, SimpleTextIOWrapper,
+    _basestring, _is_ascii, _range, _screen_shape_wrapper, _supports_unicode, _term_move_up,
+    _unich, _unicode, disp_len, disp_trim)
+
+__author__ = "https://github.com/tqdm/tqdm#contributions"
+__all__ = ['tqdm', 'trange',
+           'TqdmTypeError', 'TqdmKeyError', 'TqdmWarning',
+           'TqdmExperimentalWarning', 'TqdmDeprecationWarning',
+           'TqdmMonitorWarning']
+
+
+class TqdmTypeError(TypeError):
+    pass
+
+
+class TqdmKeyError(KeyError):
+    pass
+
+
+class TqdmWarning(Warning):
+    """base class for all tqdm warnings.
+
+    Used for non-external-code-breaking errors, such as garbled printing.
+    """
+    def __init__(self, msg, fp_write=None, *a, **k):
+        if fp_write is not None:
+            fp_write("\n" + self.__class__.__name__ + ": " + str(msg).rstrip() + '\n')
+        else:
+            super(TqdmWarning, self).__init__(msg, *a, **k)
+
+
+class TqdmExperimentalWarning(TqdmWarning, FutureWarning):
+    """beta feature, unstable API and behaviour"""
+    pass
+
+
+class TqdmDeprecationWarning(TqdmWarning, DeprecationWarning):
+    # not suppressed if raised
+    pass
+
+
+class TqdmMonitorWarning(TqdmWarning, RuntimeWarning):
+    """tqdm monitor errors which do not affect external functionality"""
+    pass
+
+
+def TRLock(*args, **kwargs):
+    """threading RLock"""
+    try:
+        from threading import RLock
+        return RLock(*args, **kwargs)
+    except (ImportError, OSError):  # pragma: no cover
+        pass
+
+
+class TqdmDefaultWriteLock(object):
+    """
+    Provide a default write lock for thread and multiprocessing safety.
+    Works only on platforms supporting `fork` (so Windows is excluded).
+    You must initialise a `tqdm` or `TqdmDefaultWriteLock` instance
+    before forking in order for the write lock to work.
+    On Windows, you need to supply the lock from the parent to the children as
+    an argument to joblib or the parallelism lib you use.
+    """
+    # global thread lock so no setup required for multithreading.
+    # NB: Do not create multiprocessing lock as it sets the multiprocessing
+    # context, disallowing `spawn()`/`forkserver()`
+    th_lock = TRLock()
+
+    def __init__(self):
+        # Create global parallelism locks to avoid racing issues with parallel
+        # bars works only if fork available (Linux/MacOSX, but not Windows)
+        cls = type(self)
+        root_lock = cls.th_lock
+        if root_lock is not None:
+            root_lock.acquire()
+        cls.create_mp_lock()
+        self.locks = [lk for lk in [cls.mp_lock, cls.th_lock] if lk is not None]
+        if root_lock is not None:
+            root_lock.release()
+
+    def acquire(self, *a, **k):
+        for lock in self.locks:
+            lock.acquire(*a, **k)
+
+    def release(self):
+        for lock in self.locks[::-1]:  # Release in inverse order of acquisition
+            lock.release()
+
+    def __enter__(self):
+        self.acquire()
+
+    def __exit__(self, *exc):
+        self.release()
+
+    @classmethod
+    def create_mp_lock(cls):
+        if not hasattr(cls, 'mp_lock'):
+            try:
+                from multiprocessing import RLock
+                cls.mp_lock = RLock()
+            except (ImportError, OSError):  # pragma: no cover
+                cls.mp_lock = None
+
+    @classmethod
+    def create_th_lock(cls):
+        assert hasattr(cls, 'th_lock')
+        warn("create_th_lock not needed anymore", TqdmDeprecationWarning, stacklevel=2)
+
+
+class Bar(object):
+    """
+    `str.format`-able bar with format specifiers: `[width][type]`
+
+    - `width`
+      + unspecified (default): use `self.default_len`
+      + `int >= 0`: overrides `self.default_len`
+      + `int < 0`: subtract from `self.default_len`
+    - `type`
+      + `a`: ascii (`charset=self.ASCII` override)
+      + `u`: unicode (`charset=self.UTF` override)
+      + `b`: blank (`charset="  "` override)
+    """
+    ASCII = " 123456789#"
+    UTF = u" " + u''.join(map(_unich, range(0x258F, 0x2587, -1)))
+    BLANK = "  "
+    COLOUR_RESET = '\x1b[0m'
+    COLOUR_RGB = '\x1b[38;2;%d;%d;%dm'
+    COLOURS = {'BLACK': '\x1b[30m', 'RED': '\x1b[31m', 'GREEN': '\x1b[32m',
+               'YELLOW': '\x1b[33m', 'BLUE': '\x1b[34m', 'MAGENTA': '\x1b[35m',
+               'CYAN': '\x1b[36m', 'WHITE': '\x1b[37m'}
+
+    def __init__(self, frac, default_len=10, charset=UTF, colour=None):
+        if not 0 <= frac <= 1:
+            warn("clamping frac to range [0, 1]", TqdmWarning, stacklevel=2)
+            frac = max(0, min(1, frac))
+        assert default_len > 0
+        self.frac = frac
+        self.default_len = default_len
+        self.charset = charset
+        self.colour = colour
+
+    @property
+    def colour(self):
+        return self._colour
+
+    @colour.setter
+    def colour(self, value):
+        if not value:
+            self._colour = None
+            return
+        try:
+            if value.upper() in self.COLOURS:
+                self._colour = self.COLOURS[value.upper()]
+            elif value[0] == '#' and len(value) == 7:
+                self._colour = self.COLOUR_RGB % tuple(
+                    int(i, 16) for i in (value[1:3], value[3:5], value[5:7]))
+            else:
+                raise KeyError
+        except (KeyError, AttributeError):
+            warn("Unknown colour (%s); valid choices: [hex (#00ff00), %s]" % (
+                 value, ", ".join(self.COLOURS)),
+                 TqdmWarning, stacklevel=2)
+            self._colour = None
+
+    def __format__(self, format_spec):
+        if format_spec:
+            _type = format_spec[-1].lower()
+            try:
+                charset = {'a': self.ASCII, 'u': self.UTF, 'b': self.BLANK}[_type]
+            except KeyError:
+                charset = self.charset
+            else:
+                format_spec = format_spec[:-1]
+            if format_spec:
+                N_BARS = int(format_spec)
+                if N_BARS < 0:
+                    N_BARS += self.default_len
+            else:
+                N_BARS = self.default_len
+        else:
+            charset = self.charset
+            N_BARS = self.default_len
+
+        nsyms = len(charset) - 1
+        bar_length, frac_bar_length = divmod(int(self.frac * N_BARS * nsyms), nsyms)
+
+        res = charset[-1] * bar_length
+        if bar_length < N_BARS:  # whitespace padding
+            res = res + charset[frac_bar_length] + charset[0] * (N_BARS - bar_length - 1)
+        return self.colour + res + self.COLOUR_RESET if self.colour else res
+
+
+class EMA(object):
+    """
+    Exponential moving average: smoothing to give progressively lower
+    weights to older values.
+
+    Parameters
+    ----------
+    smoothing  : float, optional
+        Smoothing factor in range [0, 1], [default: 0.3].
+        Increase to give more weight to recent values.
+        Ranges from 0 (yields old value) to 1 (yields new value).
+    """
+    def __init__(self, smoothing=0.3):
+        self.alpha = smoothing
+        self.last = 0
+        self.calls = 0
+
+    def __call__(self, x=None):
+        """
+        Parameters
+        ----------
+        x  : float
+            New value to include in EMA.
+        """
+        beta = 1 - self.alpha
+        if x is not None:
+            self.last = self.alpha * x + beta * self.last
+            self.calls += 1
+        return self.last / (1 - beta ** self.calls) if self.calls else self.last
+
+
+class tqdm(Comparable):
+    """
+    Decorate an iterable object, returning an iterator which acts exactly
+    like the original iterable, but prints a dynamically updating
+    progressbar every time a value is requested.
+    """
+
+    monitor_interval = 10  # set to 0 to disable the thread
+    monitor = None
+    _instances = WeakSet()
+
+    @staticmethod
+    def format_sizeof(num, suffix='', divisor=1000):
+        """
+        Formats a number (greater than unity) with SI Order of Magnitude
+        prefixes.
+
+        Parameters
+        ----------
+        num  : float
+            Number ( >= 1) to format.
+        suffix  : str, optional
+            Post-postfix [default: ''].
+        divisor  : float, optional
+            Divisor between prefixes [default: 1000].
+
+        Returns
+        -------
+        out  : str
+            Number with Order of Magnitude SI unit postfix.
+        """
+        for unit in ['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z']:
+            if abs(num) < 999.5:
+                if abs(num) < 99.95:
+                    if abs(num) < 9.995:
+                        return '{0:1.2f}'.format(num) + unit + suffix
+                    return '{0:2.1f}'.format(num) + unit + suffix
+                return '{0:3.0f}'.format(num) + unit + suffix
+            num /= divisor
+        return '{0:3.1f}Y'.format(num) + suffix
+
+    @staticmethod
+    def format_interval(t):
+        """
+        Formats a number of seconds as a clock time, [H:]MM:SS
+
+        Parameters
+        ----------
+        t  : int
+            Number of seconds.
+
+        Returns
+        -------
+        out  : str
+            [H:]MM:SS
+        """
+        mins, s = divmod(int(t), 60)
+        h, m = divmod(mins, 60)
+        if h:
+            return '{0:d}:{1:02d}:{2:02d}'.format(h, m, s)
+        else:
+            return '{0:02d}:{1:02d}'.format(m, s)
+
+    @staticmethod
+    def format_num(n):
+        """
+        Intelligent scientific notation (.3g).
+
+        Parameters
+        ----------
+        n  : int or float or Numeric
+            A Number.
+
+        Returns
+        -------
+        out  : str
+            Formatted number.
+        """
+        f = '{0:.3g}'.format(n).replace('+0', '+').replace('-0', '-')
+        n = str(n)
+        return f if len(f) < len(n) else n
+
+    @staticmethod
+    def status_printer(file):
+        """
+        Manage the printing and in-place updating of a line of characters.
+        Note that if the string is longer than a line, then in-place
+        updating may not work (it will print a new line at each refresh).
+        """
+        fp = file
+        fp_flush = getattr(fp, 'flush', lambda: None)  # pragma: no cover
+        if fp in (sys.stderr, sys.stdout):
+            getattr(sys.stderr, 'flush', lambda: None)()
+            getattr(sys.stdout, 'flush', lambda: None)()
+
+        def fp_write(s):
+            fp.write(_unicode(s))
+            fp_flush()
+
+        last_len = [0]
+
+        def print_status(s):
+            len_s = disp_len(s)
+            fp_write('\r' + s + (' ' * max(last_len[0] - len_s, 0)))
+            last_len[0] = len_s
+
+        return print_status
+
+    @staticmethod
+    def format_meter(n, total, elapsed, ncols=None, prefix='', ascii=False, unit='it',
+                     unit_scale=False, rate=None, bar_format=None, postfix=None,
+                     unit_divisor=1000, initial=0, colour=None, **extra_kwargs):
+        """
+        Return a string-based progress bar given some parameters
+
+        Parameters
+        ----------
+        n  : int or float
+            Number of finished iterations.
+        total  : int or float
+            The expected total number of iterations. If meaningless (None),
+            only basic progress statistics are displayed (no ETA).
+        elapsed  : float
+            Number of seconds passed since start.
+        ncols  : int, optional
+            The width of the entire output message. If specified,
+            dynamically resizes `{bar}` to stay within this bound
+            [default: None]. If `0`, will not print any bar (only stats).
+            The fallback is `{bar:10}`.
+        prefix  : str, optional
+            Prefix message (included in total width) [default: ''].
+            Use as {desc} in bar_format string.
+        ascii  : bool, optional or str, optional
+            If not set, use unicode (smooth blocks) to fill the meter
+            [default: False]. The fallback is to use ASCII characters
+            " 123456789#".
+        unit  : str, optional
+            The iteration unit [default: 'it'].
+        unit_scale  : bool or int or float, optional
+            If 1 or True, the number of iterations will be printed with an
+            appropriate SI metric prefix (k = 10^3, M = 10^6, etc.)
+            [default: False]. If any other non-zero number, will scale
+            `total` and `n`.
+        rate  : float, optional
+            Manual override for iteration rate.
+            If [default: None], uses n/elapsed.
+        bar_format  : str, optional
+            Specify a custom bar string formatting. May impact performance.
+            [default: '{l_bar}{bar}{r_bar}'], where
+            l_bar='{desc}: {percentage:3.0f}%|' and
+            r_bar='| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, '
+              '{rate_fmt}{postfix}]'
+            Possible vars: l_bar, bar, r_bar, n, n_fmt, total, total_fmt,
+              percentage, elapsed, elapsed_s, ncols, nrows, desc, unit,
+              rate, rate_fmt, rate_noinv, rate_noinv_fmt,
+              rate_inv, rate_inv_fmt, postfix, unit_divisor,
+              remaining, remaining_s, eta.
+            Note that a trailing ": " is automatically removed after {desc}
+            if the latter is empty.
+        postfix  : *, optional
+            Similar to `prefix`, but placed at the end
+            (e.g. for additional stats).
+            Note: postfix is usually a string (not a dict) for this method,
+            and will if possible be set to postfix = ', ' + postfix.
+            However other types are supported (#382).
+        unit_divisor  : float, optional
+            [default: 1000], ignored unless `unit_scale` is True.
+        initial  : int or float, optional
+            The initial counter value [default: 0].
+        colour  : str, optional
+            Bar colour (e.g. 'green', '#00ff00').
+
+        Returns
+        -------
+        out  : Formatted meter and stats, ready to display.
+        """
+
+        # sanity check: total
+        if total and n >= (total + 0.5):  # allow float imprecision (#849)
+            total = None
+
+        # apply custom scale if necessary
+        if unit_scale and unit_scale not in (True, 1):
+            if total:
+                total *= unit_scale
+            n *= unit_scale
+            if rate:
+                rate *= unit_scale  # by default rate = self.avg_dn / self.avg_dt
+            unit_scale = False
+
+        elapsed_str = tqdm.format_interval(elapsed)
+
+        # if unspecified, attempt to use rate = average speed
+        # (we allow manual override since predicting time is an arcane art)
+        if rate is None and elapsed:
+            rate = (n - initial) / elapsed
+        inv_rate = 1 / rate if rate else None
+        format_sizeof = tqdm.format_sizeof
+        rate_noinv_fmt = ((format_sizeof(rate) if unit_scale else
+                           '{0:5.2f}'.format(rate)) if rate else '?') + unit + '/s'
+        rate_inv_fmt = (
+            (format_sizeof(inv_rate) if unit_scale else '{0:5.2f}'.format(inv_rate))
+            if inv_rate else '?') + 's/' + unit
+        rate_fmt = rate_inv_fmt if inv_rate and inv_rate > 1 else rate_noinv_fmt
+
+        if unit_scale:
+            n_fmt = format_sizeof(n, divisor=unit_divisor)
+            total_fmt = format_sizeof(total, divisor=unit_divisor) if total is not None else '?'
+        else:
+            n_fmt = str(n)
+            total_fmt = str(total) if total is not None else '?'
+
+        try:
+            postfix = ', ' + postfix if postfix else ''
+        except TypeError:
+            pass
+
+        remaining = (total - n) / rate if rate and total else 0
+        remaining_str = tqdm.format_interval(remaining) if rate else '?'
+        try:
+            eta_dt = (datetime.now() + timedelta(seconds=remaining)
+                      if rate and total else datetime.utcfromtimestamp(0))
+        except OverflowError:
+            eta_dt = datetime.max
+
+        # format the stats displayed to the left and right sides of the bar
+        if prefix:
+            # old prefix setup work around
+            bool_prefix_colon_already = (prefix[-2:] == ": ")
+            l_bar = prefix if bool_prefix_colon_already else prefix + ": "
+        else:
+            l_bar = ''
+
+        r_bar = '| {0}/{1} [{2}<{3}, {4}{5}]'.format(
+            n_fmt, total_fmt, elapsed_str, remaining_str, rate_fmt, postfix)
+
+        # Custom bar formatting
+        # Populate a dict with all available progress indicators
+        format_dict = dict(
+            # slight extension of self.format_dict
+            n=n, n_fmt=n_fmt, total=total, total_fmt=total_fmt,
+            elapsed=elapsed_str, elapsed_s=elapsed,
+            ncols=ncols, desc=prefix or '', unit=unit,
+            rate=inv_rate if inv_rate and inv_rate > 1 else rate,
+            rate_fmt=rate_fmt, rate_noinv=rate,
+            rate_noinv_fmt=rate_noinv_fmt, rate_inv=inv_rate,
+            rate_inv_fmt=rate_inv_fmt,
+            postfix=postfix, unit_divisor=unit_divisor,
+            colour=colour,
+            # plus more useful definitions
+            remaining=remaining_str, remaining_s=remaining,
+            l_bar=l_bar, r_bar=r_bar, eta=eta_dt,
+            **extra_kwargs)
+
+        # total is known: we can predict some stats
+        if total:
+            # fractional and percentage progress
+            frac = n / total
+            percentage = frac * 100
+
+            l_bar += '{0:3.0f}%|'.format(percentage)
+
+            if ncols == 0:
+                return l_bar[:-1] + r_bar[1:]
+
+            format_dict.update(l_bar=l_bar)
+            if bar_format:
+                format_dict.update(percentage=percentage)
+
+                # auto-remove colon for empty `desc`
+                if not prefix:
+                    bar_format = bar_format.replace("{desc}: ", '')
+            else:
+                bar_format = "{l_bar}{bar}{r_bar}"
+
+            full_bar = FormatReplace()
+            try:
+                nobar = bar_format.format(bar=full_bar, **format_dict)
+            except UnicodeEncodeError:
+                bar_format = _unicode(bar_format)
+                nobar = bar_format.format(bar=full_bar, **format_dict)
+            if not full_bar.format_called:
+                # no {bar}, we can just format and return
+                return nobar
+
+            # Formatting progress bar space available for bar's display
+            full_bar = Bar(frac,
+                           max(1, ncols - disp_len(nobar)) if ncols else 10,
+                           charset=Bar.ASCII if ascii is True else ascii or Bar.UTF,
+                           colour=colour)
+            if not _is_ascii(full_bar.charset) and _is_ascii(bar_format):
+                bar_format = _unicode(bar_format)
+            res = bar_format.format(bar=full_bar, **format_dict)
+            return disp_trim(res, ncols) if ncols else res
+
+        elif bar_format:
+            # user-specified bar_format but no total
+            l_bar += '|'
+            format_dict.update(l_bar=l_bar, percentage=0)
+            full_bar = FormatReplace()
+            nobar = bar_format.format(bar=full_bar, **format_dict)
+            if not full_bar.format_called:
+                return nobar
+            full_bar = Bar(0,
+                           max(1, ncols - disp_len(nobar)) if ncols else 10,
+                           charset=Bar.BLANK, colour=colour)
+            res = bar_format.format(bar=full_bar, **format_dict)
+            return disp_trim(res, ncols) if ncols else res
+        else:
+            # no total: no progressbar, ETA, just progress stats
+            return '{0}{1}{2} [{3}, {4}{5}]'.format(
+                (prefix + ": ") if prefix else '', n_fmt, unit, elapsed_str, rate_fmt, postfix)
+
+    def __new__(cls, *_, **__):
+        instance = object.__new__(cls)
+        with cls.get_lock():  # also constructs lock if non-existent
+            cls._instances.add(instance)
+            # create monitoring thread
+            if cls.monitor_interval and (cls.monitor is None
+                                         or not cls.monitor.report()):
+                try:
+                    cls.monitor = TMonitor(cls, cls.monitor_interval)
+                except Exception as e:  # pragma: nocover
+                    warn("tqdm:disabling monitor support"
+                         " (monitor_interval = 0) due to:\n" + str(e),
+                         TqdmMonitorWarning, stacklevel=2)
+                    cls.monitor_interval = 0
+        return instance
+
+    @classmethod
+    def _get_free_pos(cls, instance=None):
+        """Skips specified instance."""
+        positions = {abs(inst.pos) for inst in cls._instances
+                     if inst is not instance and hasattr(inst, "pos")}
+        return min(set(range(len(positions) + 1)).difference(positions))
+
+    @classmethod
+    def _decr_instances(cls, instance):
+        """
+        Remove from list and reposition another unfixed bar
+        to fill the new gap.
+
+        This means that by default (where all nested bars are unfixed),
+        order is not maintained but screen flicker/blank space is minimised.
+        (tqdm<=4.44.1 moved ALL subsequent unfixed bars up.)
+        """
+        with cls._lock:
+            try:
+                cls._instances.remove(instance)
+            except KeyError:
+                # if not instance.gui:  # pragma: no cover
+                #     raise
+                pass  # py2: maybe magically removed already
+            # else:
+            if not instance.gui:
+                last = (instance.nrows or 20) - 1
+                # find unfixed (`pos >= 0`) overflow (`pos >= nrows - 1`)
+                instances = list(filter(
+                    lambda i: hasattr(i, "pos") and last <= i.pos,
+                    cls._instances))
+                # set first found to current `pos`
+                if instances:
+                    inst = min(instances, key=lambda i: i.pos)
+                    inst.clear(nolock=True)
+                    inst.pos = abs(instance.pos)
+
+    @classmethod
+    def write(cls, s, file=None, end="\n", nolock=False):
+        """Print a message via tqdm (without overlap with bars)."""
+        fp = file if file is not None else sys.stdout
+        with cls.external_write_mode(file=file, nolock=nolock):
+            # Write the message
+            fp.write(s)
+            fp.write(end)
+
+    @classmethod
+    @contextmanager
+    def external_write_mode(cls, file=None, nolock=False):
+        """
+        Disable tqdm within context and refresh tqdm when exits.
+        Useful when writing to standard output stream
+        """
+        fp = file if file is not None else sys.stdout
+
+        try:
+            if not nolock:
+                cls.get_lock().acquire()
+            # Clear all bars
+            inst_cleared = []
+            for inst in getattr(cls, '_instances', []):
+                # Clear instance if in the target output file
+                # or if write output + tqdm output are both either
+                # sys.stdout or sys.stderr (because both are mixed in terminal)
+                if hasattr(inst, "start_t") and (inst.fp == fp or all(
+                        f in (sys.stdout, sys.stderr) for f in (fp, inst.fp))):
+                    inst.clear(nolock=True)
+                    inst_cleared.append(inst)
+            yield
+            # Force refresh display of bars we cleared
+            for inst in inst_cleared:
+                inst.refresh(nolock=True)
+        finally:
+            if not nolock:
+                cls._lock.release()
+
+    @classmethod
+    def set_lock(cls, lock):
+        """Set the global lock."""
+        cls._lock = lock
+
+    @classmethod
+    def get_lock(cls):
+        """Get the global lock. Construct it if it does not exist."""
+        if not hasattr(cls, '_lock'):
+            cls._lock = TqdmDefaultWriteLock()
+        return cls._lock
+
+    @classmethod
+    def pandas(cls, **tqdm_kwargs):
+        """
+        Registers the current `tqdm` class with
+            pandas.core.
+            ( frame.DataFrame
+            | series.Series
+            | groupby.(generic.)DataFrameGroupBy
+            | groupby.(generic.)SeriesGroupBy
+            ).progress_apply
+
+        A new instance will be create every time `progress_apply` is called,
+        and each instance will automatically `close()` upon completion.
+
+        Parameters
+        ----------
+        tqdm_kwargs  : arguments for the tqdm instance
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import numpy as np
+        >>> from tqdm import tqdm
+        >>> from tqdm.gui import tqdm as tqdm_gui
+        >>>
+        >>> df = pd.DataFrame(np.random.randint(0, 100, (100000, 6)))
+        >>> tqdm.pandas(ncols=50)  # can use tqdm_gui, optional kwargs, etc
+        >>> # Now you can use `progress_apply` instead of `apply`
+        >>> df.groupby(0).progress_apply(lambda x: x**2)
+
+        References
+        ----------
+        
+        """
+        from warnings import catch_warnings, simplefilter
+
+        from pandas.core.frame import DataFrame
+        from pandas.core.series import Series
+        try:
+            with catch_warnings():
+                simplefilter("ignore", category=FutureWarning)
+                from pandas import Panel
+        except ImportError:  # pandas>=1.2.0
+            Panel = None
+        Rolling, Expanding = None, None
+        try:  # pandas>=1.0.0
+            from pandas.core.window.rolling import _Rolling_and_Expanding
+        except ImportError:
+            try:  # pandas>=0.18.0
+                from pandas.core.window import _Rolling_and_Expanding
+            except ImportError:  # pandas>=1.2.0
+                try:  # pandas>=1.2.0
+                    from pandas.core.window.expanding import Expanding
+                    from pandas.core.window.rolling import Rolling
+                    _Rolling_and_Expanding = Rolling, Expanding
+                except ImportError:  # pragma: no cover
+                    _Rolling_and_Expanding = None
+        try:  # pandas>=0.25.0
+            from pandas.core.groupby.generic import SeriesGroupBy  # , NDFrameGroupBy
+            from pandas.core.groupby.generic import DataFrameGroupBy
+        except ImportError:  # pragma: no cover
+            try:  # pandas>=0.23.0
+                from pandas.core.groupby.groupby import DataFrameGroupBy, SeriesGroupBy
+            except ImportError:
+                from pandas.core.groupby import DataFrameGroupBy, SeriesGroupBy
+        try:  # pandas>=0.23.0
+            from pandas.core.groupby.groupby import GroupBy
+        except ImportError:  # pragma: no cover
+            from pandas.core.groupby import GroupBy
+
+        try:  # pandas>=0.23.0
+            from pandas.core.groupby.groupby import PanelGroupBy
+        except ImportError:
+            try:
+                from pandas.core.groupby import PanelGroupBy
+            except ImportError:  # pandas>=0.25.0
+                PanelGroupBy = None
+
+        tqdm_kwargs = tqdm_kwargs.copy()
+        deprecated_t = [tqdm_kwargs.pop('deprecated_t', None)]
+
+        def inner_generator(df_function='apply'):
+            def inner(df, func, *args, **kwargs):
+                """
+                Parameters
+                ----------
+                df  : (DataFrame|Series)[GroupBy]
+                    Data (may be grouped).
+                func  : function
+                    To be applied on the (grouped) data.
+                **kwargs  : optional
+                    Transmitted to `df.apply()`.
+                """
+
+                # Precompute total iterations
+                total = tqdm_kwargs.pop("total", getattr(df, 'ngroups', None))
+                if total is None:  # not grouped
+                    if df_function == 'applymap':
+                        total = df.size
+                    elif isinstance(df, Series):
+                        total = len(df)
+                    elif (_Rolling_and_Expanding is None or
+                          not isinstance(df, _Rolling_and_Expanding)):
+                        # DataFrame or Panel
+                        axis = kwargs.get('axis', 0)
+                        if axis == 'index':
+                            axis = 0
+                        elif axis == 'columns':
+                            axis = 1
+                        # when axis=0, total is shape[axis1]
+                        total = df.size // df.shape[axis]
+
+                # Init bar
+                if deprecated_t[0] is not None:
+                    t = deprecated_t[0]
+                    deprecated_t[0] = None
+                else:
+                    t = cls(total=total, **tqdm_kwargs)
+
+                if len(args) > 0:
+                    # *args intentionally not supported (see #244, #299)
+                    TqdmDeprecationWarning(
+                        "Except func, normal arguments are intentionally" +
+                        " not supported by" +
+                        " `(DataFrame|Series|GroupBy).progress_apply`." +
+                        " Use keyword arguments instead.",
+                        fp_write=getattr(t.fp, 'write', sys.stderr.write))
+
+                try:  # pandas>=1.3.0
+                    from pandas.core.common import is_builtin_func
+                except ImportError:
+                    is_builtin_func = df._is_builtin_func
+                try:
+                    func = is_builtin_func(func)
+                except TypeError:
+                    pass
+
+                # Define bar updating wrapper
+                def wrapper(*args, **kwargs):
+                    # update tbar correctly
+                    # it seems `pandas apply` calls `func` twice
+                    # on the first column/row to decide whether it can
+                    # take a fast or slow code path; so stop when t.total==t.n
+                    t.update(n=1 if not t.total or t.n < t.total else 0)
+                    return func(*args, **kwargs)
+
+                # Apply the provided function (in **kwargs)
+                # on the df using our wrapper (which provides bar updating)
+                try:
+                    return getattr(df, df_function)(wrapper, **kwargs)
+                finally:
+                    t.close()
+
+            return inner
+
+        # Monkeypatch pandas to provide easy methods
+        # Enable custom tqdm progress in pandas!
+        Series.progress_apply = inner_generator()
+        SeriesGroupBy.progress_apply = inner_generator()
+        Series.progress_map = inner_generator('map')
+        SeriesGroupBy.progress_map = inner_generator('map')
+
+        DataFrame.progress_apply = inner_generator()
+        DataFrameGroupBy.progress_apply = inner_generator()
+        DataFrame.progress_applymap = inner_generator('applymap')
+
+        if Panel is not None:
+            Panel.progress_apply = inner_generator()
+        if PanelGroupBy is not None:
+            PanelGroupBy.progress_apply = inner_generator()
+
+        GroupBy.progress_apply = inner_generator()
+        GroupBy.progress_aggregate = inner_generator('aggregate')
+        GroupBy.progress_transform = inner_generator('transform')
+
+        if Rolling is not None and Expanding is not None:
+            Rolling.progress_apply = inner_generator()
+            Expanding.progress_apply = inner_generator()
+        elif _Rolling_and_Expanding is not None:
+            _Rolling_and_Expanding.progress_apply = inner_generator()
+
+    def __init__(self, iterable=None, desc=None, total=None, leave=True, file=None,
+                 ncols=None, mininterval=0.1, maxinterval=10.0, miniters=None,
+                 ascii=None, disable=False, unit='it', unit_scale=False,
+                 dynamic_ncols=False, smoothing=0.3, bar_format=None, initial=0,
+                 position=None, postfix=None, unit_divisor=1000, write_bytes=None,
+                 lock_args=None, nrows=None, colour=None, delay=0, gui=False,
+                 **kwargs):
+        """
+        Parameters
+        ----------
+        iterable  : iterable, optional
+            Iterable to decorate with a progressbar.
+            Leave blank to manually manage the updates.
+        desc  : str, optional
+            Prefix for the progressbar.
+        total  : int or float, optional
+            The number of expected iterations. If unspecified,
+            len(iterable) is used if possible. If float("inf") or as a last
+            resort, only basic progress statistics are displayed
+            (no ETA, no progressbar).
+            If `gui` is True and this parameter needs subsequent updating,
+            specify an initial arbitrary large positive number,
+            e.g. 9e9.
+        leave  : bool, optional
+            If [default: True], keeps all traces of the progressbar
+            upon termination of iteration.
+            If `None`, will leave only if `position` is `0`.
+        file  : `io.TextIOWrapper` or `io.StringIO`, optional
+            Specifies where to output the progress messages
+            (default: sys.stderr). Uses `file.write(str)` and `file.flush()`
+            methods.  For encoding, see `write_bytes`.
+        ncols  : int, optional
+            The width of the entire output message. If specified,
+            dynamically resizes the progressbar to stay within this bound.
+            If unspecified, attempts to use environment width. The
+            fallback is a meter width of 10 and no limit for the counter and
+            statistics. If 0, will not print any meter (only stats).
+        mininterval  : float, optional
+            Minimum progress display update interval [default: 0.1] seconds.
+        maxinterval  : float, optional
+            Maximum progress display update interval [default: 10] seconds.
+            Automatically adjusts `miniters` to correspond to `mininterval`
+            after long display update lag. Only works if `dynamic_miniters`
+            or monitor thread is enabled.
+        miniters  : int or float, optional
+            Minimum progress display update interval, in iterations.
+            If 0 and `dynamic_miniters`, will automatically adjust to equal
+            `mininterval` (more CPU efficient, good for tight loops).
+            If > 0, will skip display of specified number of iterations.
+            Tweak this and `mininterval` to get very efficient loops.
+            If your progress is erratic with both fast and slow iterations
+            (network, skipping items, etc) you should set miniters=1.
+        ascii  : bool or str, optional
+            If unspecified or False, use unicode (smooth blocks) to fill
+            the meter. The fallback is to use ASCII characters " 123456789#".
+        disable  : bool, optional
+            Whether to disable the entire progressbar wrapper
+            [default: False]. If set to None, disable on non-TTY.
+        unit  : str, optional
+            String that will be used to define the unit of each iteration
+            [default: it].
+        unit_scale  : bool or int or float, optional
+            If 1 or True, the number of iterations will be reduced/scaled
+            automatically and a metric prefix following the
+            International System of Units standard will be added
+            (kilo, mega, etc.) [default: False]. If any other non-zero
+            number, will scale `total` and `n`.
+        dynamic_ncols  : bool, optional
+            If set, constantly alters `ncols` and `nrows` to the
+            environment (allowing for window resizes) [default: False].
+        smoothing  : float, optional
+            Exponential moving average smoothing factor for speed estimates
+            (ignored in GUI mode). Ranges from 0 (average speed) to 1
+            (current/instantaneous speed) [default: 0.3].
+        bar_format  : str, optional
+            Specify a custom bar string formatting. May impact performance.
+            [default: '{l_bar}{bar}{r_bar}'], where
+            l_bar='{desc}: {percentage:3.0f}%|' and
+            r_bar='| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, '
+              '{rate_fmt}{postfix}]'
+            Possible vars: l_bar, bar, r_bar, n, n_fmt, total, total_fmt,
+              percentage, elapsed, elapsed_s, ncols, nrows, desc, unit,
+              rate, rate_fmt, rate_noinv, rate_noinv_fmt,
+              rate_inv, rate_inv_fmt, postfix, unit_divisor,
+              remaining, remaining_s, eta.
+            Note that a trailing ": " is automatically removed after {desc}
+            if the latter is empty.
+        initial  : int or float, optional
+            The initial counter value. Useful when restarting a progress
+            bar [default: 0]. If using float, consider specifying `{n:.3f}`
+            or similar in `bar_format`, or specifying `unit_scale`.
+        position  : int, optional
+            Specify the line offset to print this bar (starting from 0)
+            Automatic if unspecified.
+            Useful to manage multiple bars at once (eg, from threads).
+        postfix  : dict or *, optional
+            Specify additional stats to display at the end of the bar.
+            Calls `set_postfix(**postfix)` if possible (dict).
+        unit_divisor  : float, optional
+            [default: 1000], ignored unless `unit_scale` is True.
+        write_bytes  : bool, optional
+            If (default: None) and `file` is unspecified,
+            bytes will be written in Python 2. If `True` will also write
+            bytes. In all other cases will default to unicode.
+        lock_args  : tuple, optional
+            Passed to `refresh` for intermediate output
+            (initialisation, iterating, and updating).
+        nrows  : int, optional
+            The screen height. If specified, hides nested bars outside this
+            bound. If unspecified, attempts to use environment height.
+            The fallback is 20.
+        colour  : str, optional
+            Bar colour (e.g. 'green', '#00ff00').
+        delay  : float, optional
+            Don't display until [default: 0] seconds have elapsed.
+        gui  : bool, optional
+            WARNING: internal parameter - do not use.
+            Use tqdm.gui.tqdm(...) instead. If set, will attempt to use
+            matplotlib animations for a graphical output [default: False].
+
+        Returns
+        -------
+        out  : decorated iterator.
+        """
+        if write_bytes is None:
+            write_bytes = file is None and sys.version_info < (3,)
+
+        if file is None:
+            file = sys.stderr
+
+        if write_bytes:
+            # Despite coercing unicode into bytes, py2 sys.std* streams
+            # should have bytes written to them.
+            file = SimpleTextIOWrapper(
+                file, encoding=getattr(file, 'encoding', None) or 'utf-8')
+
+        file = DisableOnWriteError(file, tqdm_instance=self)
+
+        if disable is None and hasattr(file, "isatty") and not file.isatty():
+            disable = True
+
+        if total is None and iterable is not None:
+            try:
+                total = len(iterable)
+            except (TypeError, AttributeError):
+                total = None
+        if total == float("inf"):
+            # Infinite iterations, behave same as unknown
+            total = None
+
+        if disable:
+            self.iterable = iterable
+            self.disable = disable
+            with self._lock:
+                self.pos = self._get_free_pos(self)
+                self._instances.remove(self)
+            self.n = initial
+            self.total = total
+            self.leave = leave
+            return
+
+        if kwargs:
+            self.disable = True
+            with self._lock:
+                self.pos = self._get_free_pos(self)
+                self._instances.remove(self)
+            raise (
+                TqdmDeprecationWarning(
+                    "`nested` is deprecated and automated.\n"
+                    "Use `position` instead for manual control.\n",
+                    fp_write=getattr(file, 'write', sys.stderr.write))
+                if "nested" in kwargs else
+                TqdmKeyError("Unknown argument(s): " + str(kwargs)))
+
+        # Preprocess the arguments
+        if (
+            (ncols is None or nrows is None) and (file in (sys.stderr, sys.stdout))
+        ) or dynamic_ncols:  # pragma: no cover
+            if dynamic_ncols:
+                dynamic_ncols = _screen_shape_wrapper()
+                if dynamic_ncols:
+                    ncols, nrows = dynamic_ncols(file)
+            else:
+                _dynamic_ncols = _screen_shape_wrapper()
+                if _dynamic_ncols:
+                    _ncols, _nrows = _dynamic_ncols(file)
+                    if ncols is None:
+                        ncols = _ncols
+                    if nrows is None:
+                        nrows = _nrows
+
+        if miniters is None:
+            miniters = 0
+            dynamic_miniters = True
+        else:
+            dynamic_miniters = False
+
+        if mininterval is None:
+            mininterval = 0
+
+        if maxinterval is None:
+            maxinterval = 0
+
+        if ascii is None:
+            ascii = not _supports_unicode(file)
+
+        if bar_format and ascii is not True and not _is_ascii(ascii):
+            # Convert bar format into unicode since terminal uses unicode
+            bar_format = _unicode(bar_format)
+
+        if smoothing is None:
+            smoothing = 0
+
+        # Store the arguments
+        self.iterable = iterable
+        self.desc = desc or ''
+        self.total = total
+        self.leave = leave
+        self.fp = file
+        self.ncols = ncols
+        self.nrows = nrows
+        self.mininterval = mininterval
+        self.maxinterval = maxinterval
+        self.miniters = miniters
+        self.dynamic_miniters = dynamic_miniters
+        self.ascii = ascii
+        self.disable = disable
+        self.unit = unit
+        self.unit_scale = unit_scale
+        self.unit_divisor = unit_divisor
+        self.initial = initial
+        self.lock_args = lock_args
+        self.delay = delay
+        self.gui = gui
+        self.dynamic_ncols = dynamic_ncols
+        self.smoothing = smoothing
+        self._ema_dn = EMA(smoothing)
+        self._ema_dt = EMA(smoothing)
+        self._ema_miniters = EMA(smoothing)
+        self.bar_format = bar_format
+        self.postfix = None
+        self.colour = colour
+        self._time = time
+        if postfix:
+            try:
+                self.set_postfix(refresh=False, **postfix)
+            except TypeError:
+                self.postfix = postfix
+
+        # Init the iterations counters
+        self.last_print_n = initial
+        self.n = initial
+
+        # if nested, at initial sp() call we replace '\r' by '\n' to
+        # not overwrite the outer progress bar
+        with self._lock:
+            # mark fixed positions as negative
+            self.pos = self._get_free_pos(self) if position is None else -position
+
+        if not gui:
+            # Initialize the screen printer
+            self.sp = self.status_printer(self.fp)
+            if delay <= 0:
+                self.refresh(lock_args=self.lock_args)
+
+        # Init the time counter
+        self.last_print_t = self._time()
+        # NB: Avoid race conditions by setting start_t at the very end of init
+        self.start_t = self.last_print_t
+
+    def __bool__(self):
+        if self.total is not None:
+            return self.total > 0
+        if self.iterable is None:
+            raise TypeError('bool() undefined when iterable == total == None')
+        return bool(self.iterable)
+
+    def __nonzero__(self):
+        return self.__bool__()
+
+    def __len__(self):
+        return (
+            self.total if self.iterable is None
+            else self.iterable.shape[0] if hasattr(self.iterable, "shape")
+            else len(self.iterable) if hasattr(self.iterable, "__len__")
+            else self.iterable.__length_hint__() if hasattr(self.iterable, "__length_hint__")
+            else getattr(self, "total", None))
+
+    def __reversed__(self):
+        try:
+            orig = self.iterable
+        except AttributeError:
+            raise TypeError("'tqdm' object is not reversible")
+        else:
+            self.iterable = reversed(self.iterable)
+            return self.__iter__()
+        finally:
+            self.iterable = orig
+
+    def __contains__(self, item):
+        contains = getattr(self.iterable, '__contains__', None)
+        return contains(item) if contains is not None else item in self.__iter__()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        try:
+            self.close()
+        except AttributeError:
+            # maybe eager thread cleanup upon external error
+            if (exc_type, exc_value, traceback) == (None, None, None):
+                raise
+            warn("AttributeError ignored", TqdmWarning, stacklevel=2)
+
+    def __del__(self):
+        self.close()
+
+    def __str__(self):
+        return self.format_meter(**self.format_dict)
+
+    @property
+    def _comparable(self):
+        return abs(getattr(self, "pos", 1 << 31))
+
+    def __hash__(self):
+        return id(self)
+
+    def __iter__(self):
+        """Backward-compatibility to use: for x in tqdm(iterable)"""
+
+        # Inlining instance variables as locals (speed optimisation)
+        iterable = self.iterable
+
+        # If the bar is disabled, then just walk the iterable
+        # (note: keep this check outside the loop for performance)
+        if self.disable:
+            for obj in iterable:
+                yield obj
+            return
+
+        mininterval = self.mininterval
+        last_print_t = self.last_print_t
+        last_print_n = self.last_print_n
+        min_start_t = self.start_t + self.delay
+        n = self.n
+        time = self._time
+
+        try:
+            for obj in iterable:
+                yield obj
+                # Update and possibly print the progressbar.
+                # Note: does not call self.update(1) for speed optimisation.
+                n += 1
+
+                if n - last_print_n >= self.miniters:
+                    cur_t = time()
+                    dt = cur_t - last_print_t
+                    if dt >= mininterval and cur_t >= min_start_t:
+                        self.update(n - last_print_n)
+                        last_print_n = self.last_print_n
+                        last_print_t = self.last_print_t
+        finally:
+            self.n = n
+            self.close()
+
+    def update(self, n=1):
+        """
+        Manually update the progress bar, useful for streams
+        such as reading files.
+        E.g.:
+        >>> t = tqdm(total=filesize) # Initialise
+        >>> for current_buffer in stream:
+        ...    ...
+        ...    t.update(len(current_buffer))
+        >>> t.close()
+        The last line is highly recommended, but possibly not necessary if
+        `t.update()` will be called in such a way that `filesize` will be
+        exactly reached and printed.
+
+        Parameters
+        ----------
+        n  : int or float, optional
+            Increment to add to the internal counter of iterations
+            [default: 1]. If using float, consider specifying `{n:.3f}`
+            or similar in `bar_format`, or specifying `unit_scale`.
+
+        Returns
+        -------
+        out  : bool or None
+            True if a `display()` was triggered.
+        """
+        if self.disable:
+            return
+
+        if n < 0:
+            self.last_print_n += n  # for auto-refresh logic to work
+        self.n += n
+
+        # check counter first to reduce calls to time()
+        if self.n - self.last_print_n >= self.miniters:
+            cur_t = self._time()
+            dt = cur_t - self.last_print_t
+            if dt >= self.mininterval and cur_t >= self.start_t + self.delay:
+                cur_t = self._time()
+                dn = self.n - self.last_print_n  # >= n
+                if self.smoothing and dt and dn:
+                    # EMA (not just overall average)
+                    self._ema_dn(dn)
+                    self._ema_dt(dt)
+                self.refresh(lock_args=self.lock_args)
+                if self.dynamic_miniters:
+                    # If no `miniters` was specified, adjust automatically to the
+                    # maximum iteration rate seen so far between two prints.
+                    # e.g.: After running `tqdm.update(5)`, subsequent
+                    # calls to `tqdm.update()` will only cause an update after
+                    # at least 5 more iterations.
+                    if self.maxinterval and dt >= self.maxinterval:
+                        self.miniters = dn * (self.mininterval or self.maxinterval) / dt
+                    elif self.smoothing:
+                        # EMA miniters update
+                        self.miniters = self._ema_miniters(
+                            dn * (self.mininterval / dt if self.mininterval and dt
+                                  else 1))
+                    else:
+                        # max iters between two prints
+                        self.miniters = max(self.miniters, dn)
+
+                # Store old values for next call
+                self.last_print_n = self.n
+                self.last_print_t = cur_t
+                return True
+
+    def close(self):
+        """Cleanup and (if leave=False) close the progressbar."""
+        if self.disable:
+            return
+
+        # Prevent multiple closures
+        self.disable = True
+
+        # decrement instance pos and remove from internal set
+        pos = abs(self.pos)
+        self._decr_instances(self)
+
+        if self.last_print_t < self.start_t + self.delay:
+            # haven't ever displayed; nothing to clear
+            return
+
+        # GUI mode
+        if getattr(self, 'sp', None) is None:
+            return
+
+        # annoyingly, _supports_unicode isn't good enough
+        def fp_write(s):
+            self.fp.write(_unicode(s))
+
+        try:
+            fp_write('')
+        except ValueError as e:
+            if 'closed' in str(e):
+                return
+            raise  # pragma: no cover
+
+        leave = pos == 0 if self.leave is None else self.leave
+
+        with self._lock:
+            if leave:
+                # stats for overall rate (no weighted average)
+                self._ema_dt = lambda: None
+                self.display(pos=0)
+                fp_write('\n')
+            else:
+                # clear previous display
+                if self.display(msg='', pos=pos) and not pos:
+                    fp_write('\r')
+
+    def clear(self, nolock=False):
+        """Clear current bar display."""
+        if self.disable:
+            return
+
+        if not nolock:
+            self._lock.acquire()
+        pos = abs(self.pos)
+        if pos < (self.nrows or 20):
+            self.moveto(pos)
+            self.sp('')
+            self.fp.write('\r')  # place cursor back at the beginning of line
+            self.moveto(-pos)
+        if not nolock:
+            self._lock.release()
+
+    def refresh(self, nolock=False, lock_args=None):
+        """
+        Force refresh the display of this bar.
+
+        Parameters
+        ----------
+        nolock  : bool, optional
+            If `True`, does not lock.
+            If [default: `False`]: calls `acquire()` on internal lock.
+        lock_args  : tuple, optional
+            Passed to internal lock's `acquire()`.
+            If specified, will only `display()` if `acquire()` returns `True`.
+        """
+        if self.disable:
+            return
+
+        if not nolock:
+            if lock_args:
+                if not self._lock.acquire(*lock_args):
+                    return False
+            else:
+                self._lock.acquire()
+        self.display()
+        if not nolock:
+            self._lock.release()
+        return True
+
+    def unpause(self):
+        """Restart tqdm timer from last print time."""
+        if self.disable:
+            return
+        cur_t = self._time()
+        self.start_t += cur_t - self.last_print_t
+        self.last_print_t = cur_t
+
+    def reset(self, total=None):
+        """
+        Resets to 0 iterations for repeated use.
+
+        Consider combining with `leave=True`.
+
+        Parameters
+        ----------
+        total  : int or float, optional. Total to use for the new bar.
+        """
+        self.n = 0
+        if total is not None:
+            self.total = total
+        if self.disable:
+            return
+        self.last_print_n = 0
+        self.last_print_t = self.start_t = self._time()
+        self._ema_dn = EMA(self.smoothing)
+        self._ema_dt = EMA(self.smoothing)
+        self._ema_miniters = EMA(self.smoothing)
+        self.refresh()
+
+    def set_description(self, desc=None, refresh=True):
+        """
+        Set/modify description of the progress bar.
+
+        Parameters
+        ----------
+        desc  : str, optional
+        refresh  : bool, optional
+            Forces refresh [default: True].
+        """
+        self.desc = desc + ': ' if desc else ''
+        if refresh:
+            self.refresh()
+
+    def set_description_str(self, desc=None, refresh=True):
+        """Set/modify description without ': ' appended."""
+        self.desc = desc or ''
+        if refresh:
+            self.refresh()
+
+    def set_postfix(self, ordered_dict=None, refresh=True, **kwargs):
+        """
+        Set/modify postfix (additional stats)
+        with automatic formatting based on datatype.
+
+        Parameters
+        ----------
+        ordered_dict  : dict or OrderedDict, optional
+        refresh  : bool, optional
+            Forces refresh [default: True].
+        kwargs  : dict, optional
+        """
+        # Sort in alphabetical order to be more deterministic
+        postfix = OrderedDict([] if ordered_dict is None else ordered_dict)
+        for key in sorted(kwargs.keys()):
+            postfix[key] = kwargs[key]
+        # Preprocess stats according to datatype
+        for key in postfix.keys():
+            # Number: limit the length of the string
+            if isinstance(postfix[key], Number):
+                postfix[key] = self.format_num(postfix[key])
+            # Else for any other type, try to get the string conversion
+            elif not isinstance(postfix[key], _basestring):
+                postfix[key] = str(postfix[key])
+            # Else if it's a string, don't need to preprocess anything
+        # Stitch together to get the final postfix
+        self.postfix = ', '.join(key + '=' + postfix[key].strip()
+                                 for key in postfix.keys())
+        if refresh:
+            self.refresh()
+
+    def set_postfix_str(self, s='', refresh=True):
+        """
+        Postfix without dictionary expansion, similar to prefix handling.
+        """
+        self.postfix = str(s)
+        if refresh:
+            self.refresh()
+
+    def moveto(self, n):
+        # TODO: private method
+        self.fp.write(_unicode('\n' * n + _term_move_up() * -n))
+        getattr(self.fp, 'flush', lambda: None)()
+
+    @property
+    def format_dict(self):
+        """Public API for read-only member access."""
+        if self.disable and not hasattr(self, 'unit'):
+            return defaultdict(lambda: None, {
+                'n': self.n, 'total': self.total, 'elapsed': 0, 'unit': 'it'})
+        if self.dynamic_ncols:
+            self.ncols, self.nrows = self.dynamic_ncols(self.fp)
+        return {
+            'n': self.n, 'total': self.total,
+            'elapsed': self._time() - self.start_t if hasattr(self, 'start_t') else 0,
+            'ncols': self.ncols, 'nrows': self.nrows, 'prefix': self.desc,
+            'ascii': self.ascii, 'unit': self.unit, 'unit_scale': self.unit_scale,
+            'rate': self._ema_dn() / self._ema_dt() if self._ema_dt() else None,
+            'bar_format': self.bar_format, 'postfix': self.postfix,
+            'unit_divisor': self.unit_divisor, 'initial': self.initial,
+            'colour': self.colour}
+
+    def display(self, msg=None, pos=None):
+        """
+        Use `self.sp` to display `msg` in the specified `pos`.
+
+        Consider overloading this function when inheriting to use e.g.:
+        `self.some_frontend(**self.format_dict)` instead of `self.sp`.
+
+        Parameters
+        ----------
+        msg  : str, optional. What to display (default: `repr(self)`).
+        pos  : int, optional. Position to `moveto`
+          (default: `abs(self.pos)`).
+        """
+        if pos is None:
+            pos = abs(self.pos)
+
+        nrows = self.nrows or 20
+        if pos >= nrows - 1:
+            if pos >= nrows:
+                return False
+            if msg or msg is None:  # override at `nrows - 1`
+                msg = " ... (more hidden) ..."
+
+        if not hasattr(self, "sp"):
+            raise TqdmDeprecationWarning(
+                "Please use `tqdm.gui.tqdm(...)`"
+                " instead of `tqdm(..., gui=True)`\n",
+                fp_write=getattr(self.fp, 'write', sys.stderr.write))
+
+        if pos:
+            self.moveto(pos)
+        self.sp(self.__str__() if msg is None else msg)
+        if pos:
+            self.moveto(-pos)
+        return True
+
+    @classmethod
+    @contextmanager
+    def wrapattr(cls, stream, method, total=None, bytes=True, **tqdm_kwargs):
+        """
+        stream  : file-like object.
+        method  : str, "read" or "write". The result of `read()` and
+            the first argument of `write()` should have a `len()`.
+
+        >>> with tqdm.wrapattr(file_obj, "read", total=file_obj.size) as fobj:
+        ...     while True:
+        ...         chunk = fobj.read(chunk_size)
+        ...         if not chunk:
+        ...             break
+        """
+        with cls(total=total, **tqdm_kwargs) as t:
+            if bytes:
+                t.unit = "B"
+                t.unit_scale = True
+                t.unit_divisor = 1024
+            yield CallbackIOWrapper(t.update, stream, method)
+
+
+def trange(*args, **kwargs):
+    """
+    A shortcut for tqdm(xrange(*args), **kwargs).
+    On Python3+ range is used instead of xrange.
+    """
+    return tqdm(_range(*args), **kwargs)
diff --git a/.local/lib/python3.8/site-packages/tqdm/tk.py b/.local/lib/python3.8/site-packages/tqdm/tk.py
new file mode 100644
index 0000000..92adb51
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/tk.py
@@ -0,0 +1,207 @@
+"""
+Tkinter GUI progressbar decorator for iterators.
+
+Usage:
+>>> from tqdm.tk import trange, tqdm
+>>> for i in trange(10):
+...     ...
+"""
+from __future__ import absolute_import, division
+
+import re
+import sys
+from warnings import warn
+
+try:
+    import tkinter
+    import tkinter.ttk as ttk
+except ImportError:
+    import Tkinter as tkinter
+    import ttk as ttk
+
+from .std import TqdmExperimentalWarning, TqdmWarning
+from .std import tqdm as std_tqdm
+from .utils import _range
+
+__author__ = {"github.com/": ["richardsheridan", "casperdcl"]}
+__all__ = ['tqdm_tk', 'ttkrange', 'tqdm', 'trange']
+
+
+class tqdm_tk(std_tqdm):  # pragma: no cover
+    """
+    Experimental Tkinter GUI version of tqdm!
+
+    Note: Window interactivity suffers if `tqdm_tk` is not running within
+    a Tkinter mainloop and values are generated infrequently. In this case,
+    consider calling `tqdm_tk.refresh()` frequently in the Tk thread.
+    """
+
+    # TODO: @classmethod: write()?
+
+    def __init__(self, *args, **kwargs):
+        """
+        This class accepts the following parameters *in addition* to
+        the parameters accepted by `tqdm`.
+
+        Parameters
+        ----------
+        grab  : bool, optional
+            Grab the input across all windows of the process.
+        tk_parent  : `tkinter.Wm`, optional
+            Parent Tk window.
+        cancel_callback  : Callable, optional
+            Create a cancel button and set `cancel_callback` to be called
+            when the cancel or window close button is clicked.
+        """
+        kwargs = kwargs.copy()
+        kwargs['gui'] = True
+        # convert disable = None to False
+        kwargs['disable'] = bool(kwargs.get('disable', False))
+        self._warn_leave = 'leave' in kwargs
+        grab = kwargs.pop('grab', False)
+        tk_parent = kwargs.pop('tk_parent', None)
+        self._cancel_callback = kwargs.pop('cancel_callback', None)
+        super(tqdm_tk, self).__init__(*args, **kwargs)
+
+        if self.disable:
+            return
+
+        if tk_parent is None:  # Discover parent widget
+            try:
+                tk_parent = tkinter._default_root
+            except AttributeError:
+                raise AttributeError(
+                    "`tk_parent` required when using `tkinter.NoDefaultRoot()`")
+            if tk_parent is None:  # use new default root window as display
+                self._tk_window = tkinter.Tk()
+            else:  # some other windows already exist
+                self._tk_window = tkinter.Toplevel()
+        else:
+            self._tk_window = tkinter.Toplevel(tk_parent)
+
+        warn("GUI is experimental/alpha", TqdmExperimentalWarning, stacklevel=2)
+        self._tk_dispatching = self._tk_dispatching_helper()
+
+        self._tk_window.protocol("WM_DELETE_WINDOW", self.cancel)
+        self._tk_window.wm_title(self.desc)
+        self._tk_window.wm_attributes("-topmost", 1)
+        self._tk_window.after(0, lambda: self._tk_window.wm_attributes("-topmost", 0))
+        self._tk_n_var = tkinter.DoubleVar(self._tk_window, value=0)
+        self._tk_text_var = tkinter.StringVar(self._tk_window)
+        pbar_frame = ttk.Frame(self._tk_window, padding=5)
+        pbar_frame.pack()
+        _tk_label = ttk.Label(pbar_frame, textvariable=self._tk_text_var,
+                              wraplength=600, anchor="center", justify="center")
+        _tk_label.pack()
+        self._tk_pbar = ttk.Progressbar(
+            pbar_frame, variable=self._tk_n_var, length=450)
+        if self.total is not None:
+            self._tk_pbar.configure(maximum=self.total)
+        else:
+            self._tk_pbar.configure(mode="indeterminate")
+        self._tk_pbar.pack()
+        if self._cancel_callback is not None:
+            _tk_button = ttk.Button(pbar_frame, text="Cancel", command=self.cancel)
+            _tk_button.pack()
+        if grab:
+            self._tk_window.grab_set()
+
+    def close(self):
+        if self.disable:
+            return
+
+        self.disable = True
+
+        with self.get_lock():
+            self._instances.remove(self)
+
+        def _close():
+            self._tk_window.after('idle', self._tk_window.destroy)
+            if not self._tk_dispatching:
+                self._tk_window.update()
+
+        self._tk_window.protocol("WM_DELETE_WINDOW", _close)
+
+        # if leave is set but we are self-dispatching, the left window is
+        # totally unresponsive unless the user manually dispatches
+        if not self.leave:
+            _close()
+        elif not self._tk_dispatching:
+            if self._warn_leave:
+                warn("leave flag ignored if not in tkinter mainloop",
+                     TqdmWarning, stacklevel=2)
+            _close()
+
+    def clear(self, *_, **__):
+        pass
+
+    def display(self, *_, **__):
+        self._tk_n_var.set(self.n)
+        d = self.format_dict
+        # remove {bar}
+        d['bar_format'] = (d['bar_format'] or "{l_bar}{r_bar}").replace(
+            "{bar}", "")
+        msg = self.format_meter(**d)
+        if '' in msg:
+            msg = "".join(re.split(r'\|?\|?', msg, 1))
+        self._tk_text_var.set(msg)
+        if not self._tk_dispatching:
+            self._tk_window.update()
+
+    def set_description(self, desc=None, refresh=True):
+        self.set_description_str(desc, refresh)
+
+    def set_description_str(self, desc=None, refresh=True):
+        self.desc = desc
+        if not self.disable:
+            self._tk_window.wm_title(desc)
+            if refresh and not self._tk_dispatching:
+                self._tk_window.update()
+
+    def cancel(self):
+        """
+        `cancel_callback()` followed by `close()`
+        when close/cancel buttons clicked.
+        """
+        if self._cancel_callback is not None:
+            self._cancel_callback()
+        self.close()
+
+    def reset(self, total=None):
+        """
+        Resets to 0 iterations for repeated use.
+
+        Parameters
+        ----------
+        total  : int or float, optional. Total to use for the new bar.
+        """
+        if hasattr(self, '_tk_pbar'):
+            if total is None:
+                self._tk_pbar.configure(maximum=100, mode="indeterminate")
+            else:
+                self._tk_pbar.configure(maximum=total, mode="determinate")
+        super(tqdm_tk, self).reset(total=total)
+
+    @staticmethod
+    def _tk_dispatching_helper():
+        """determine if Tkinter mainloop is dispatching events"""
+        codes = {tkinter.mainloop.__code__, tkinter.Misc.mainloop.__code__}
+        for frame in sys._current_frames().values():
+            while frame:
+                if frame.f_code in codes:
+                    return True
+                frame = frame.f_back
+        return False
+
+
+def ttkrange(*args, **kwargs):
+    """
+    A shortcut for `tqdm.tk.tqdm(xrange(*args), **kwargs)`.
+    On Python3+, `range` is used instead of `xrange`.
+    """
+    return tqdm_tk(_range(*args), **kwargs)
+
+
+# Aliases
+tqdm = tqdm_tk
+trange = ttkrange
diff --git a/.local/lib/python3.8/site-packages/tqdm/tqdm.1 b/.local/lib/python3.8/site-packages/tqdm/tqdm.1
new file mode 100644
index 0000000..0533198
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/tqdm.1
@@ -0,0 +1,316 @@
+.\" Automatically generated by Pandoc 1.19.2
+.\"
+.TH "TQDM" "1" "2015\-2021" "tqdm User Manuals" ""
+.hy
+.SH NAME
+.PP
+tqdm \- fast, extensible progress bar for Python and CLI
+.SH SYNOPSIS
+.PP
+tqdm [\f[I]options\f[]]
+.SH DESCRIPTION
+.PP
+See .
+Can be used as a pipe:
+.IP
+.nf
+\f[C]
+$\ #\ count\ lines\ of\ code
+$\ cat\ *.py\ |\ tqdm\ |\ wc\ \-l
+327it\ [00:00,\ 981773.38it/s]
+327
+
+$\ #\ find\ all\ files
+$\ find\ .\ \-name\ "*.py"\ |\ tqdm\ |\ wc\ \-l
+432it\ [00:00,\ 833842.30it/s]
+432
+
+#\ ...\ and\ more\ info
+$\ find\ .\ \-name\ \[aq]*.py\[aq]\ \-exec\ wc\ \-l\ \\{}\ \\;\ \\
+\ \ |\ tqdm\ \-\-total\ 432\ \-\-unit\ files\ \-\-desc\ counting\ \\
+\ \ |\ awk\ \[aq]{\ sum\ +=\ $1\ };\ END\ {\ print\ sum\ }\[aq]
+counting:\ 100%|█████████|\ 432/432\ [00:00<00:00,\ 794361.83files/s]
+131998
+\f[]
+.fi
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Print this help and exit.
+.RS
+.RE
+.TP
+.B \-v, \-\-version
+Print version and exit.
+.RS
+.RE
+.TP
+.B \-\-desc=\f[I]desc\f[]
+str, optional.
+Prefix for the progressbar.
+.RS
+.RE
+.TP
+.B \-\-total=\f[I]total\f[]
+int or float, optional.
+The number of expected iterations.
+If unspecified, len(iterable) is used if possible.
+If float("inf") or as a last resort, only basic progress statistics are
+displayed (no ETA, no progressbar).
+If \f[C]gui\f[] is True and this parameter needs subsequent updating,
+specify an initial arbitrary large positive number, e.g.
+9e9.
+.RS
+.RE
+.TP
+.B \-\-leave
+bool, optional.
+If [default: True], keeps all traces of the progressbar upon termination
+of iteration.
+If \f[C]None\f[], will leave only if \f[C]position\f[] is \f[C]0\f[].
+.RS
+.RE
+.TP
+.B \-\-ncols=\f[I]ncols\f[]
+int, optional.
+The width of the entire output message.
+If specified, dynamically resizes the progressbar to stay within this
+bound.
+If unspecified, attempts to use environment width.
+The fallback is a meter width of 10 and no limit for the counter and
+statistics.
+If 0, will not print any meter (only stats).
+.RS
+.RE
+.TP
+.B \-\-mininterval=\f[I]mininterval\f[]
+float, optional.
+Minimum progress display update interval [default: 0.1] seconds.
+.RS
+.RE
+.TP
+.B \-\-maxinterval=\f[I]maxinterval\f[]
+float, optional.
+Maximum progress display update interval [default: 10] seconds.
+Automatically adjusts \f[C]miniters\f[] to correspond to
+\f[C]mininterval\f[] after long display update lag.
+Only works if \f[C]dynamic_miniters\f[] or monitor thread is enabled.
+.RS
+.RE
+.TP
+.B \-\-miniters=\f[I]miniters\f[]
+int or float, optional.
+Minimum progress display update interval, in iterations.
+If 0 and \f[C]dynamic_miniters\f[], will automatically adjust to equal
+\f[C]mininterval\f[] (more CPU efficient, good for tight loops).
+If > 0, will skip display of specified number of iterations.
+Tweak this and \f[C]mininterval\f[] to get very efficient loops.
+If your progress is erratic with both fast and slow iterations (network,
+skipping items, etc) you should set miniters=1.
+.RS
+.RE
+.TP
+.B \-\-ascii=\f[I]ascii\f[]
+bool or str, optional.
+If unspecified or False, use unicode (smooth blocks) to fill the meter.
+The fallback is to use ASCII characters " 123456789#".
+.RS
+.RE
+.TP
+.B \-\-disable
+bool, optional.
+Whether to disable the entire progressbar wrapper [default: False].
+If set to None, disable on non\-TTY.
+.RS
+.RE
+.TP
+.B \-\-unit=\f[I]unit\f[]
+str, optional.
+String that will be used to define the unit of each iteration [default:
+it].
+.RS
+.RE
+.TP
+.B \-\-unit\-scale=\f[I]unit_scale\f[]
+bool or int or float, optional.
+If 1 or True, the number of iterations will be reduced/scaled
+automatically and a metric prefix following the International System of
+Units standard will be added (kilo, mega, etc.) [default: False].
+If any other non\-zero number, will scale \f[C]total\f[] and \f[C]n\f[].
+.RS
+.RE
+.TP
+.B \-\-dynamic\-ncols
+bool, optional.
+If set, constantly alters \f[C]ncols\f[] and \f[C]nrows\f[] to the
+environment (allowing for window resizes) [default: False].
+.RS
+.RE
+.TP
+.B \-\-smoothing=\f[I]smoothing\f[]
+float, optional.
+Exponential moving average smoothing factor for speed estimates (ignored
+in GUI mode).
+Ranges from 0 (average speed) to 1 (current/instantaneous speed)
+[default: 0.3].
+.RS
+.RE
+.TP
+.B \-\-bar\-format=\f[I]bar_format\f[]
+str, optional.
+Specify a custom bar string formatting.
+May impact performance.
+[default: \[aq]{l_bar}{bar}{r_bar}\[aq]], where l_bar=\[aq]{desc}:
+{percentage:3.0f}%|\[aq] and r_bar=\[aq]| {n_fmt}/{total_fmt}
+[{elapsed}<{remaining}, \[aq] \[aq]{rate_fmt}{postfix}]\[aq] Possible
+vars: l_bar, bar, r_bar, n, n_fmt, total, total_fmt, percentage,
+elapsed, elapsed_s, ncols, nrows, desc, unit, rate, rate_fmt,
+rate_noinv, rate_noinv_fmt, rate_inv, rate_inv_fmt, postfix,
+unit_divisor, remaining, remaining_s, eta.
+Note that a trailing ": " is automatically removed after {desc} if the
+latter is empty.
+.RS
+.RE
+.TP
+.B \-\-initial=\f[I]initial\f[]
+int or float, optional.
+The initial counter value.
+Useful when restarting a progress bar [default: 0].
+If using float, consider specifying \f[C]{n:.3f}\f[] or similar in
+\f[C]bar_format\f[], or specifying \f[C]unit_scale\f[].
+.RS
+.RE
+.TP
+.B \-\-position=\f[I]position\f[]
+int, optional.
+Specify the line offset to print this bar (starting from 0) Automatic if
+unspecified.
+Useful to manage multiple bars at once (eg, from threads).
+.RS
+.RE
+.TP
+.B \-\-postfix=\f[I]postfix\f[]
+dict or *, optional.
+Specify additional stats to display at the end of the bar.
+Calls \f[C]set_postfix(**postfix)\f[] if possible (dict).
+.RS
+.RE
+.TP
+.B \-\-unit\-divisor=\f[I]unit_divisor\f[]
+float, optional.
+[default: 1000], ignored unless \f[C]unit_scale\f[] is True.
+.RS
+.RE
+.TP
+.B \-\-write\-bytes
+bool, optional.
+If (default: None) and \f[C]file\f[] is unspecified, bytes will be
+written in Python 2.
+If \f[C]True\f[] will also write bytes.
+In all other cases will default to unicode.
+.RS
+.RE
+.TP
+.B \-\-lock\-args=\f[I]lock_args\f[]
+tuple, optional.
+Passed to \f[C]refresh\f[] for intermediate output (initialisation,
+iterating, and updating).
+.RS
+.RE
+.TP
+.B \-\-nrows=\f[I]nrows\f[]
+int, optional.
+The screen height.
+If specified, hides nested bars outside this bound.
+If unspecified, attempts to use environment height.
+The fallback is 20.
+.RS
+.RE
+.TP
+.B \-\-colour=\f[I]colour\f[]
+str, optional.
+Bar colour (e.g.
+\[aq]green\[aq], \[aq]#00ff00\[aq]).
+.RS
+.RE
+.TP
+.B \-\-delay=\f[I]delay\f[]
+float, optional.
+Don\[aq]t display until [default: 0] seconds have elapsed.
+.RS
+.RE
+.TP
+.B \-\-delim=\f[I]delim\f[]
+chr, optional.
+Delimiting character [default: \[aq]\\n\[aq]].
+Use \[aq]\\0\[aq] for null.
+N.B.: on Windows systems, Python converts \[aq]\\n\[aq] to
+\[aq]\\r\\n\[aq].
+.RS
+.RE
+.TP
+.B \-\-buf\-size=\f[I]buf_size\f[]
+int, optional.
+String buffer size in bytes [default: 256] used when \f[C]delim\f[] is
+specified.
+.RS
+.RE
+.TP
+.B \-\-bytes
+bool, optional.
+If true, will count bytes, ignore \f[C]delim\f[], and default
+\f[C]unit_scale\f[] to True, \f[C]unit_divisor\f[] to 1024, and
+\f[C]unit\f[] to \[aq]B\[aq].
+.RS
+.RE
+.TP
+.B \-\-tee
+bool, optional.
+If true, passes \f[C]stdin\f[] to both \f[C]stderr\f[] and
+\f[C]stdout\f[].
+.RS
+.RE
+.TP
+.B \-\-update
+bool, optional.
+If true, will treat input as newly elapsed iterations, i.e.
+numbers to pass to \f[C]update()\f[].
+Note that this is slow (~2e5 it/s) since every input must be decoded as
+a number.
+.RS
+.RE
+.TP
+.B \-\-update\-to
+bool, optional.
+If true, will treat input as total elapsed iterations, i.e.
+numbers to assign to \f[C]self.n\f[].
+Note that this is slow (~2e5 it/s) since every input must be decoded as
+a number.
+.RS
+.RE
+.TP
+.B \-\-null
+bool, optional.
+If true, will discard input (no stdout).
+.RS
+.RE
+.TP
+.B \-\-manpath=\f[I]manpath\f[]
+str, optional.
+Directory in which to install tqdm man pages.
+.RS
+.RE
+.TP
+.B \-\-comppath=\f[I]comppath\f[]
+str, optional.
+Directory in which to place tqdm completion.
+.RS
+.RE
+.TP
+.B \-\-log=\f[I]log\f[]
+str, optional.
+CRITICAL|FATAL|ERROR|WARN(ING)|[default: \[aq]INFO\[aq]]|DEBUG|NOTSET.
+.RS
+.RE
+.SH AUTHORS
+tqdm developers .
diff --git a/.local/lib/python3.8/site-packages/tqdm/utils.py b/.local/lib/python3.8/site-packages/tqdm/utils.py
new file mode 100644
index 0000000..0632b8d
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/utils.py
@@ -0,0 +1,354 @@
+"""
+General helpers required for `tqdm.std`.
+"""
+import os
+import re
+import sys
+from functools import wraps
+from warnings import warn
+from weakref import proxy
+
+# py2/3 compat
+try:
+    _range = xrange
+except NameError:
+    _range = range
+
+try:
+    _unich = unichr
+except NameError:
+    _unich = chr
+
+try:
+    _unicode = unicode
+except NameError:
+    _unicode = str
+
+try:
+    _basestring = basestring
+except NameError:
+    _basestring = str
+
+CUR_OS = sys.platform
+IS_WIN = any(CUR_OS.startswith(i) for i in ['win32', 'cygwin'])
+IS_NIX = any(CUR_OS.startswith(i) for i in ['aix', 'linux', 'darwin'])
+RE_ANSI = re.compile(r"\x1b\[[;\d]*[A-Za-z]")
+
+try:
+    if IS_WIN:
+        import colorama
+    else:
+        raise ImportError
+except ImportError:
+    colorama = None
+else:
+    try:
+        colorama.init(strip=False)
+    except TypeError:
+        colorama.init()
+
+
+class FormatReplace(object):
+    """
+    >>> a = FormatReplace('something')
+    >>> "{:5d}".format(a)
+    'something'
+    """  # NOQA: P102
+    def __init__(self, replace=''):
+        self.replace = replace
+        self.format_called = 0
+
+    def __format__(self, _):
+        self.format_called += 1
+        return self.replace
+
+
+class Comparable(object):
+    """Assumes child has self._comparable attr/@property"""
+    def __lt__(self, other):
+        return self._comparable < other._comparable
+
+    def __le__(self, other):
+        return (self < other) or (self == other)
+
+    def __eq__(self, other):
+        return self._comparable == other._comparable
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __gt__(self, other):
+        return not self <= other
+
+    def __ge__(self, other):
+        return not self < other
+
+
+class ObjectWrapper(object):
+    def __getattr__(self, name):
+        return getattr(self._wrapped, name)
+
+    def __setattr__(self, name, value):
+        return setattr(self._wrapped, name, value)
+
+    def wrapper_getattr(self, name):
+        """Actual `self.getattr` rather than self._wrapped.getattr"""
+        try:
+            return object.__getattr__(self, name)
+        except AttributeError:  # py2
+            return getattr(self, name)
+
+    def wrapper_setattr(self, name, value):
+        """Actual `self.setattr` rather than self._wrapped.setattr"""
+        return object.__setattr__(self, name, value)
+
+    def __init__(self, wrapped):
+        """
+        Thin wrapper around a given object
+        """
+        self.wrapper_setattr('_wrapped', wrapped)
+
+
+class SimpleTextIOWrapper(ObjectWrapper):
+    """
+    Change only `.write()` of the wrapped object by encoding the passed
+    value and passing the result to the wrapped object's `.write()` method.
+    """
+    # pylint: disable=too-few-public-methods
+    def __init__(self, wrapped, encoding):
+        super(SimpleTextIOWrapper, self).__init__(wrapped)
+        self.wrapper_setattr('encoding', encoding)
+
+    def write(self, s):
+        """
+        Encode `s` and pass to the wrapped object's `.write()` method.
+        """
+        return self._wrapped.write(s.encode(self.wrapper_getattr('encoding')))
+
+    def __eq__(self, other):
+        return self._wrapped == getattr(other, '_wrapped', other)
+
+
+class DisableOnWriteError(ObjectWrapper):
+    """
+    Disable the given `tqdm_instance` upon `write()` or `flush()` errors.
+    """
+    @staticmethod
+    def disable_on_exception(tqdm_instance, func):
+        """
+        Quietly set `tqdm_instance.miniters=inf` if `func` raises `errno=5`.
+        """
+        tqdm_instance = proxy(tqdm_instance)
+
+        def inner(*args, **kwargs):
+            try:
+                return func(*args, **kwargs)
+            except OSError as e:
+                if e.errno != 5:
+                    raise
+                try:
+                    tqdm_instance.miniters = float('inf')
+                except ReferenceError:
+                    pass
+            except ValueError as e:
+                if 'closed' not in str(e):
+                    raise
+                try:
+                    tqdm_instance.miniters = float('inf')
+                except ReferenceError:
+                    pass
+        return inner
+
+    def __init__(self, wrapped, tqdm_instance):
+        super(DisableOnWriteError, self).__init__(wrapped)
+        if hasattr(wrapped, 'write'):
+            self.wrapper_setattr(
+                'write', self.disable_on_exception(tqdm_instance, wrapped.write))
+        if hasattr(wrapped, 'flush'):
+            self.wrapper_setattr(
+                'flush', self.disable_on_exception(tqdm_instance, wrapped.flush))
+
+    def __eq__(self, other):
+        return self._wrapped == getattr(other, '_wrapped', other)
+
+
+class CallbackIOWrapper(ObjectWrapper):
+    def __init__(self, callback, stream, method="read"):
+        """
+        Wrap a given `file`-like object's `read()` or `write()` to report
+        lengths to the given `callback`
+        """
+        super(CallbackIOWrapper, self).__init__(stream)
+        func = getattr(stream, method)
+        if method == "write":
+            @wraps(func)
+            def write(data, *args, **kwargs):
+                res = func(data, *args, **kwargs)
+                callback(len(data))
+                return res
+            self.wrapper_setattr('write', write)
+        elif method == "read":
+            @wraps(func)
+            def read(*args, **kwargs):
+                data = func(*args, **kwargs)
+                callback(len(data))
+                return data
+            self.wrapper_setattr('read', read)
+        else:
+            raise KeyError("Can only wrap read/write methods")
+
+
+def _is_utf(encoding):
+    try:
+        u'\u2588\u2589'.encode(encoding)
+    except UnicodeEncodeError:
+        return False
+    except Exception:
+        try:
+            return encoding.lower().startswith('utf-') or ('U8' == encoding)
+        except Exception:
+            return False
+    else:
+        return True
+
+
+def _supports_unicode(fp):
+    try:
+        return _is_utf(fp.encoding)
+    except AttributeError:
+        return False
+
+
+def _is_ascii(s):
+    if isinstance(s, str):
+        for c in s:
+            if ord(c) > 255:
+                return False
+        return True
+    return _supports_unicode(s)
+
+
+def _screen_shape_wrapper():  # pragma: no cover
+    """
+    Return a function which returns console dimensions (width, height).
+    Supported: linux, osx, windows, cygwin.
+    """
+    _screen_shape = None
+    if IS_WIN:
+        _screen_shape = _screen_shape_windows
+        if _screen_shape is None:
+            _screen_shape = _screen_shape_tput
+    if IS_NIX:
+        _screen_shape = _screen_shape_linux
+    return _screen_shape
+
+
+def _screen_shape_windows(fp):  # pragma: no cover
+    try:
+        import struct
+        from ctypes import create_string_buffer, windll
+        from sys import stdin, stdout
+
+        io_handle = -12  # assume stderr
+        if fp == stdin:
+            io_handle = -10
+        elif fp == stdout:
+            io_handle = -11
+
+        h = windll.kernel32.GetStdHandle(io_handle)
+        csbi = create_string_buffer(22)
+        res = windll.kernel32.GetConsoleScreenBufferInfo(h, csbi)
+        if res:
+            (_bufx, _bufy, _curx, _cury, _wattr, left, top, right, bottom,
+             _maxx, _maxy) = struct.unpack("hhhhHhhhhhh", csbi.raw)
+            return right - left, bottom - top  # +1
+    except Exception:  # nosec
+        pass
+    return None, None
+
+
+def _screen_shape_tput(*_):  # pragma: no cover
+    """cygwin xterm (windows)"""
+    try:
+        import shlex
+        from subprocess import check_call  # nosec
+        return [int(check_call(shlex.split('tput ' + i))) - 1
+                for i in ('cols', 'lines')]
+    except Exception:  # nosec
+        pass
+    return None, None
+
+
+def _screen_shape_linux(fp):  # pragma: no cover
+
+    try:
+        from array import array
+        from fcntl import ioctl
+        from termios import TIOCGWINSZ
+    except ImportError:
+        return None, None
+    else:
+        try:
+            rows, cols = array('h', ioctl(fp, TIOCGWINSZ, '\0' * 8))[:2]
+            return cols, rows
+        except Exception:
+            try:
+                return [int(os.environ[i]) - 1 for i in ("COLUMNS", "LINES")]
+            except (KeyError, ValueError):
+                return None, None
+
+
+def _environ_cols_wrapper():  # pragma: no cover
+    """
+    Return a function which returns console width.
+    Supported: linux, osx, windows, cygwin.
+    """
+    warn("Use `_screen_shape_wrapper()(file)[0]` instead of"
+         " `_environ_cols_wrapper()(file)`", DeprecationWarning, stacklevel=2)
+    shape = _screen_shape_wrapper()
+    if not shape:
+        return None
+
+    @wraps(shape)
+    def inner(fp):
+        return shape(fp)[0]
+
+    return inner
+
+
+def _term_move_up():  # pragma: no cover
+    return '' if (os.name == 'nt') and (colorama is None) else '\x1b[A'
+
+
+try:
+    # TODO consider using wcswidth third-party package for 0-width characters
+    from unicodedata import east_asian_width
+except ImportError:
+    _text_width = len
+else:
+    def _text_width(s):
+        return sum(2 if east_asian_width(ch) in 'FW' else 1 for ch in _unicode(s))
+
+
+def disp_len(data):
+    """
+    Returns the real on-screen length of a string which may contain
+    ANSI control codes and wide chars.
+    """
+    return _text_width(RE_ANSI.sub('', data))
+
+
+def disp_trim(data, length):
+    """
+    Trim a string which may contain ANSI control characters.
+    """
+    if len(data) == disp_len(data):
+        return data[:length]
+
+    ansi_present = bool(RE_ANSI.search(data))
+    while disp_len(data) > length:  # carefully delete one char at a time
+        data = data[:-1]
+    if ansi_present and bool(RE_ANSI.search(data)):
+        # assume ANSI reset is required
+        return data if data.endswith("\033[0m") else data + "\033[0m"
+    return data
diff --git a/.local/lib/python3.8/site-packages/tqdm/version.py b/.local/lib/python3.8/site-packages/tqdm/version.py
new file mode 100644
index 0000000..11cbaea
--- /dev/null
+++ b/.local/lib/python3.8/site-packages/tqdm/version.py
@@ -0,0 +1,9 @@
+"""`tqdm` version detector. Precedence: installed dist, git, 'UNKNOWN'."""
+try:
+    from ._dist_ver import __version__
+except ImportError:
+    try:
+        from setuptools_scm import get_version
+        __version__ = get_version(root='..', relative_to=__file__)
+    except (ImportError, LookupError):
+        __version__ = "UNKNOWN"
diff --git a/.motd_shown b/.motd_shown
new file mode 100644
index 0000000..e69de29
diff --git a/.profile b/.profile
new file mode 100644
index 0000000..d89ea5a
--- /dev/null
+++ b/.profile
@@ -0,0 +1,27 @@
+# ~/.profile: executed by the command interpreter for login shells.
+# This file is not read by bash(1), if ~/.bash_profile or ~/.bash_login
+# exists.
+# see /usr/share/doc/bash/examples/startup-files for examples.
+# the files are located in the bash-doc package.
+
+# the default umask is set in /etc/profile; for setting the umask
+# for ssh logins, install and configure the libpam-umask package.
+#umask 022
+
+# if running bash
+if [ -n "$BASH_VERSION" ]; then
+    # include .bashrc if it exists
+    if [ -f "$HOME/.bashrc" ]; then
+	. "$HOME/.bashrc"
+    fi
+fi
+
+# set PATH so it includes user's private bin if it exists
+if [ -d "$HOME/bin" ] ; then
+    PATH="$HOME/bin:$PATH"
+fi
+
+# set PATH so it includes user's private bin if it exists
+if [ -d "$HOME/.local/bin" ] ; then
+    PATH="$HOME/.local/bin:$PATH"
+fi
diff --git a/.ssh/known_hosts b/.ssh/known_hosts
new file mode 100644
index 0000000..305c0af
--- /dev/null
+++ b/.ssh/known_hosts
@@ -0,0 +1,2 @@
+|1|6ZcI5Re/N/gv2nORzLOC6seD+BU=|gqEmo66a/U47Fi1xU7IEjjnFHHs= ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBDpZU7dhIR4DFiNCkk9qImyWZGPOVI3N+OAMCv7TAo3qtADFuHzwqQ2cI2/QU4KMt/CvddeGfHuOnTI8h/VVmCM=
+|1|C/Xydpzjq7RQvIxJFDPNw1DlvqQ=|2+fp8NuW4/HZsjmtnKYJ8Yu1rJY= ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBDpZU7dhIR4DFiNCkk9qImyWZGPOVI3N+OAMCv7TAo3qtADFuHzwqQ2cI2/QU4KMt/CvddeGfHuOnTI8h/VVmCM=
diff --git a/.sudo_as_admin_successful b/.sudo_as_admin_successful
new file mode 100644
index 0000000..e69de29
diff --git a/petite-difference-challenge2 b/petite-difference-challenge2
new file mode 160000
index 0000000..c484d01
--- /dev/null
+++ b/petite-difference-challenge2
@@ -0,0 +1 @@
+Subproject commit c484d015f79b0469646d8bc5e3ffdf2e0c732c76